diff --git a/.clangd b/.clangd
new file mode 100644
index 0000000000000..534bd8fa45fb9
--- /dev/null
+++ b/.clangd
@@ -0,0 +1,2 @@
+CompileFlags:
+  Add: [-I., -I.., -Iflisp, -Isupport, -I../support, -I../usr/include, -I../../usr/include, -Wall,]
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 3c555f006d082..3af8ba86153a1 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -5,3 +5,5 @@
 e66bfa5dd32f93e76068c00ad882c1fc839c5af8
 # whitespace: replace non-breaking space => space
 100a741e7ab38c91d48cc929bb001afc8e09261f
+# whitespace: replace tabs => space
+b03e8ab9c7bd3e001add519571858fa04d6a249b
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index d2da8839ddb39..bf1380f5a07bc 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,5 +2,5 @@ CODEOWNERS @JuliaLang/github-actions
 /.github/ @JuliaLang/github-actions
 /.buildkite/ @JuliaLang/github-actions
 
-/.github/workflows/retry.yml @DilumAluthge
+/.github/workflows/rerun_failed.yml @DilumAluthge
 /.github/workflows/statuses.yml @DilumAluthge
diff --git a/.github/workflows/LabelCheck.yml b/.github/workflows/LabelCheck.yml
new file mode 100644
index 0000000000000..194b0c92065c9
--- /dev/null
+++ b/.github/workflows/LabelCheck.yml
@@ -0,0 +1,19 @@
+name: Labels
+
+permissions:
+  contents: read
+on:
+  pull_request:
+    types: [labeled, unlabeled, opened, reopened, edited, synchronize]
+jobs:
+  enforce-labels:
+    name: Check for blocking labels
+    runs-on: ubuntu-latest
+    timeout-minutes: 2
+    steps:
+    - uses: yogevbd/enforce-label-action@2.2.2
+      with:
+        # REQUIRED_LABELS_ANY: "bug,enhancement,skip-changelog"
+        # REQUIRED_LABELS_ANY_DESCRIPTION: "Select at least one label ['bug','enhancement','skip-changelog']"
+        BANNED_LABELS: "needs docs,needs compat annotation,needs more info,needs nanosoldier run,needs news,needs pkgeval,needs tests,DO NOT MERGE"
+        BANNED_LABELS_DESCRIPTION: "A PR should not be merged with `needs *` or `DO NOT MERGE` labels"
diff --git a/.github/workflows/Typos.yml b/.github/workflows/Typos.yml
new file mode 100644
index 0000000000000..f9fa20fff5d12
--- /dev/null
+++ b/.github/workflows/Typos.yml
@@ -0,0 +1,56 @@
+name: Typos
+
+permissions: {}
+
+on: [pull_request]
+
+jobs:
+  typos-check:
+    name: Check for new typos
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Checkout the JuliaLang/julia repository
+        uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+      - name: Check spelling with typos
+        #uses: crate-ci/typos@master
+        env:
+          GH_TOKEN: "${{ github.token }}"
+        run: |
+          git fetch --depth=1 origin ${{ github.base_ref }}
+          OLD_FILES=$(git diff-index --name-only --diff-filter=ad FETCH_HEAD)
+          NEW_FILES=$(git diff-index --name-only --diff-filter=d FETCH_HEAD)
+
+          mkdir -p "${{ runner.temp }}/typos"
+          RELEASE_ASSET_URL="$(
+            gh api /repos/crate-ci/typos/releases/latest \
+            --jq '."assets"[] | select(."name" | test("^typos-.+-x86_64-unknown-linux-musl\\.tar\\.gz$")) | ."browser_download_url"'
+          )"
+          wget --secure-protocol=TLSv1_3 --max-redirect=1 --retry-on-host-error --retry-connrefused --tries=3 \
+            --quiet --output-document=- "${RELEASE_ASSET_URL}" \
+            | tar -xz -C "${{ runner.temp }}/typos" ./typos
+          "${{ runner.temp }}/typos/typos" --version
+
+          echo -n $NEW_FILES | xargs "${{ runner.temp }}/typos/typos" --format json >> ${{ runner.temp }}/new_typos.jsonl || true
+          git checkout FETCH_HEAD -- $OLD_FILES
+          echo -n $OLD_FILES | xargs "${{ runner.temp }}/typos/typos" --format json >> ${{ runner.temp }}/old_typos.jsonl  || true
+
+          python -c '
+          import sys, json
+          old = set()
+          with open(sys.argv[1]) as old_file:
+            for line in old_file:
+              old.add(json.loads(line)["typo"])
+          clean = True
+          with open(sys.argv[2]) as new_file:
+            for line in new_file:
+              new = json.loads(line)
+              if new["typo"] not in old:
+                if len(new["typo"]) > 6: # Short typos might be false positives. Long are probably real.
+                  clean = False
+                print("::warning file={},line={},col={}::perhaps \"{}\" should be \"{}\".".format(
+                  new["path"], new["line_num"], new["byte_offset"],
+                  new["typo"], " or ".join(new["corrections"])))
+          sys.exit(1 if not clean else 0)' "${{ runner.temp }}/old_typos.jsonl" "${{ runner.temp }}/new_typos.jsonl"
diff --git a/.github/workflows/rerun_failed.yml b/.github/workflows/rerun_failed.yml
deleted file mode 100644
index 7d022920658a9..0000000000000
--- a/.github/workflows/rerun_failed.yml
+++ /dev/null
@@ -1,92 +0,0 @@
-# Please ping @DilumAluthge when making any changes to this file.
-
-# Here are some steps that we take in this workflow file for security reasons:
-# 1. We do not checkout any code.
-# 2. We only run actions that are defined in a repository in the `JuliaLang` GitHub organization.
-# 3. We do not give the `GITHUB_TOKEN` any permissions.
-# 4. We only give the Buildkite API token (`BUILDKITE_API_TOKEN_RETRY`) the minimum necessary
-#    set of permissions.
-
-# Important note to Buildkite maintainers:
-# In order to make this work, you need to tell Buildkite that it should NOT create a brand-new
-# build when someone closes and reopens a pull request. To do so:
-# 1. Go to the relevant pipeline (e.g. https://buildkite.com/julialang/julia-master).
-# 2. Click on the "Pipeline Settings" button.
-# 3. In the left sidebar, under "Pipeline Settings", click on "GitHub".
-# 4. In the "GitHub Settings", under "Build Pull Requests", make sure that the "Skip pull
-#    request builds for existing commits" checkbox is checked. This is the setting that tells
-#    Buildkite that it should NOT create a brand-new build when someone closes and reopens a
-#    pull request.
-# 5. At the bottom of the page, click the "Save GitHub Settings" button.
-
-name: Rerun Failed Buildkite Jobs
-
-# There are two ways that a user can rerun the failed Buildkite jobs:
-# 1. Close and reopen the pull request.
-#    In order to use this approach, the user must be in one of the following three categories:
-#        (i)   Author of the pull request
-#        (ii)  Commit permissions
-#        (iii) Triage permissions
-# 2. Post a comment on the pull request with exactly the following contents: /buildkite rerun failed
-#    In order to use this approach, the user must be in the following category:
-#        - A member of the JuliaLang GitHub organization (the membership must be publicized)
-
-on:
-  # When using the `pull_request_target` event, all PRs will get access to secret environment
-  # variables (such as the `BUILDKITE_API_TOKEN_RETRY` secret environment variable), even if
-  # the PR is from a fork. Therefore, for security reasons, we do not checkout any code in
-  # this workflow.
-  pull_request_target:
-    types: [ reopened ]
-  issue_comment:
-    types: [ created ]
-
-# We do not give the `GITHUB_TOKEN` any permissions.
-# Therefore, the `GITHUB_TOKEN` only has the same access as any member of the public.
-permissions:
-  contents: none
-
-jobs:
-  rerun-failed-buildkite-jobs:
-    name: Rerun Failed Buildkite Jobs
-    runs-on: ubuntu-latest
-    if: (github.repository == 'JuliaLang/julia') && ((github.event_name == 'pull_request_target' && github.event.action == 'reopened') || (github.event_name == 'issue_comment' && github.event.issue.pull_request && github.event.comment.body == '/buildkite rerun failed'))
-    steps:
-      # For security reasons, we do not checkout any code in this workflow.
-      - name: Check organization membership
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
-            if [[ "${{ github.event.action }}" == "reopened" ]]; then
-              echo "This is a \"reopened\" event, so we do not need to check the user's organization membership."
-              echo "GOOD_TO_PROCEED=yes" >> ${GITHUB_ENV:?}
-              echo "PULL_REQUEST_NUMBER=${{ github.event.number }}" >> ${GITHUB_ENV:?}
-            else
-              echo "ERROR: The github.event_name is \"pull_request_target\", but the github.event.action is not \"reopened\"."
-              exit 1
-            fi
-          else
-            curl -H "Authorization: token ${GITHUB_TOKEN:?}" "https://api.github.com/users/${{ github.event.sender.login }}"
-            curl -H "Authorization: token ${GITHUB_TOKEN:?}" "https://api.github.com/users/${{ github.event.sender.login }}/orgs"
-            export USER_IS_ORGANIZATION_MEMBER=`curl -H "Authorization: token ${GITHUB_TOKEN:?}" "https://api.github.com/users/${{ github.event.sender.login }}/orgs" | jq '[.[] | .login] | index("JuliaLang") != null' | tr -s ' '`
-            if [[ "${USER_IS_ORGANIZATION_MEMBER:?}"   == "true" ]]; then
-              echo "The \"${{ github.event.sender.login }}\" user is a public member of the JuliaLang organization."
-              echo "GOOD_TO_PROCEED=yes" >> ${GITHUB_ENV:?}
-              echo "PULL_REQUEST_NUMBER=${{ github.event.issue.number }}" >> ${GITHUB_ENV:?}
-            else
-              echo "ERROR: the \"${{ github.event.sender.login }}\" user is NOT a public member of the JuliaLang organization."
-              echo "If you are a member, please make sure that you have publicized your membership."
-              exit 1
-            fi
-          fi
-      - run: |
-          echo "GOOD_TO_PROCEED: ${{ env.GOOD_TO_PROCEED }}"
-          echo "PULL_REQUEST_NUMBER: ${{ env.PULL_REQUEST_NUMBER }}"
-      - uses: JuliaLang/buildkite-rerun-failed@057f6f2d37aa29a57b7679fd2af0df1d9f9188b4
-        if: env.GOOD_TO_PROCEED == 'yes'
-        with:
-          buildkite_api_token: ${{ secrets.BUILDKITE_API_TOKEN_RETRY }}
-          buildkite_organization_slug: 'julialang'
-          buildkite_pipeline_slug: 'julia-master'
-          pr_number: ${{ env.PULL_REQUEST_NUMBER }}
diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
deleted file mode 100644
index 36a694a7c6d20..0000000000000
--- a/.github/workflows/statuses.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-# Please ping @DilumAluthge when making any changes to this file.
-
-# This is just a short-term solution until we have migrated all of CI to Buildkite.
-#
-# 1. TODO: delete this file once we have migrated all of CI to Buildkite.
-
-# Here are some steps that we take in this workflow file for security reasons:
-# 1. We do not checkout any code.
-# 2. We do not run any external actions.
-# 3. We only give the `GITHUB_TOKEN` the minimum necessary set of permissions.
-
-name: Create Buildbot Statuses
-
-on:
-  push:
-    branches:
-      - 'master'
-      - 'release-*'
-  # When using the `pull_request_target` event, all PRs will get a `GITHUB_TOKEN` that has
-  # write permissions, even if the PR is from a fork.
-  # Therefore, for security reasons, we do not checkout any code in this workflow.
-  pull_request_target:
-    types: [opened, synchronize]
-    branches:
-      - 'master'
-      - 'release-*'
-
-# These are the permissions for the `GITHUB_TOKEN`.
-# We should only give the token the minimum necessary set of permissions.
-permissions:
-  statuses: write
-
-jobs:
-  create-buildbot-statuses:
-    name: Create Buildbot Statuses
-    runs-on: ubuntu-latest
-    if: github.repository == 'JuliaLang/julia'
-    steps:
-      # For security reasons, we do not checkout any code in this workflow.
-      - run: echo "SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
-        if: github.event_name == 'pull_request_target'
-      - run: echo "SHA=${{ github.sha }}" >> $GITHUB_ENV
-        if: github.event_name != 'pull_request_target'
-      - run: echo "The SHA is ${{ env.SHA }}"
-
-      # As we incrementally migrate individual jobs from Buildbot to Buildkite, we should
-      # remove them from the `context_list`.
-      - run: |
-          declare -a CONTEXT_LIST=(
-                "buildbot/tester_freebsd64"
-                "buildbot/tester_win32"
-                "buildbot/tester_win64"
-                )
-          for CONTEXT in "${CONTEXT_LIST[@]}"
-          do
-            curl \
-              -X POST \
-              -H "Authorization: token $GITHUB_TOKEN" \
-              -H "Accept: application/vnd.github.v3+json" \
-              -d "{\"context\": \"$CONTEXT\", \"state\": \"$STATE\"}" \
-            https://api.github.com/repos/JuliaLang/julia/statuses/${{ env.SHA }}
-          done
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          STATE: "pending"
diff --git a/.gitignore b/.gitignore
index 2780210c41a9b..f0072fec9c91e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@
 /source-dist.tmp
 /source-dist.tmp1
 
+*.expmap
 *.exe
 *.dll
 *.dwo
@@ -33,7 +34,8 @@
 .DS_Store
 .idea/*
 .vscode/*
-
+*.heapsnapshot
+.cache
 # Buildkite: Ignore the entire .buildkite directory
 /.buildkite
 
diff --git a/.mailmap b/.mailmap
index 5335c88a63d7d..e91501651d065 100644
--- a/.mailmap
+++ b/.mailmap
@@ -283,3 +283,15 @@ Daniel Karrasch <Daniel.Karrasch@gmx.de> <daniel.karrasch@posteo.de>
 
 Roger Luo <rogerluo.rl18@gmail.com> <rogerluo.rl18@gmail.com>
 Roger Luo <rogerluo.rl18@gmail.com> <hiroger@qq.com>
+
+Frames Catherine White <me@oxinabox.net> <oxinabox@ucc.asn.au>
+Frames Catherine White <me@oxinabox.net> <lyndon.white@invenialabs.co.uk>
+Frames Catherine White <me@oxinabox.net> <lyndon.white@research.uwa.edu.au>
+
+Claire Foster <aka.c42f@gmail.com> <chris42f@gmail.com>
+
+Jishnu Bhattacharya <jishnub.github@gmail.com> <jishnub.github@gmail.com>
+Jishnu Bhattacharya <jishnub.github@gmail.com> <jishnub@users.noreply.github.com>
+
+Shuhei Kadowaki <aviatesk@gmail.com> <aviatesk@gmail.com>
+Shuhei Kadowaki <aviatesk@gmail.com> <40514306+aviatesk@users.noreply.github.com>
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e409381da3754..0131dcbc4a278 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -58,9 +58,9 @@ A useful bug report filed as a GitHub issue provides information about how to re
 
 ### Writing tests
 
-There are never enough tests. Track [code coverage at Coveralls](https://coveralls.io/r/JuliaLang/julia), and help improve it.
+There are never enough tests. Track [code coverage at Codecov](https://codecov.io/github/JuliaLang/julia), and help improve it.
 
-1. Go visit https://coveralls.io/r/JuliaLang/julia.
+1. Go visit https://codecov.io/github/JuliaLang/julia.
 
 2. Browse through the source files and find some untested functionality (highlighted in red) that you think you might be able to write a test for.
 
@@ -74,7 +74,7 @@ There are never enough tests. Track [code coverage at Coveralls](https://coveral
 * You can see the current buildbot setup at: https://build.julialang.org/builders
 * [Issue 9493](https://github.com/JuliaLang/julia/issues/9493) and [issue 11885](https://github.com/JuliaLang/julia/issues/11885) have more detailed discussion on code coverage.
 
-Coveralls shows functionality that still needs "proof of concept" tests. These are important, as are tests for tricky edge cases, such as converting between integer types when the number to convert is near the maximum of the range of one of the integer types. Even if a function already has some coverage on Coveralls, it may still benefit from tests for edge cases.
+Code coverage shows functionality that still needs "proof of concept" tests. These are important, as are tests for tricky edge cases, such as converting between integer types when the number to convert is near the maximum of the range of one of the integer types. Even if a function already has some coverage on Codecov, it may still benefit from tests for edge cases.
 
 ### Improving documentation
 
@@ -185,7 +185,14 @@ At the moment, this should always be done with the following `compat` admonition
 
 *By contributing code to Julia, you are agreeing to release it under the [MIT License](https://github.com/JuliaLang/julia/tree/master/LICENSE.md).*
 
-The Julia community uses [GitHub issues](https://github.com/JuliaLang/julia/issues) to track and discuss problems, feature requests, and pull requests (PR). You can make pull requests for incomplete features to get code review. The convention is to prefix the pull request title with "WIP:" for Work In Progress, or "RFC:" for Request for Comments when work is completed and ready for merging. This will prevent accidental merging of work that is in progress.
+The Julia community uses [GitHub issues](https://github.com/JuliaLang/julia/issues) to track and discuss problems, feature requests, and pull requests (PR).
+
+Issues and pull requests should have self explanatory titles such that they can be understood from the list of PRs and Issues.
+i.e. `Add {feature}` and `Fix {bug}` are good, `Fix #12345. Corrects the bug.` is bad.
+
+You can make pull requests for incomplete features to get code review. The convention is to open these a draft PRs and prefix
+the pull request title with "WIP:" for Work In Progress, or "RFC:" for Request for Comments when work is completed and ready
+for merging. This will prevent accidental merging of work that is in progress.
 
 Note: These instructions are for adding to or improving functionality in the base library. Before getting started, it can be helpful to discuss the proposed changes or additions on the [Julia Discourse forum](https://discourse.julialang.org) or in a GitHub issue---it's possible your proposed change belongs in a package rather than the core language. Also, keep in mind that changing stuff in the base can potentially break a lot of things. Finally, because of the time required to build Julia, note that it's usually faster to develop your code in stand-alone files, get it working, and then migrate it into the base libraries.
 
@@ -269,7 +276,7 @@ Be sure to change the UUID value back before making the pull request.
 
 ### Contributing to patch releases
 
-The process of creating a patch release is roughly as follows:
+The process of [creating a patch release](https://docs.julialang.org/en/v1/devdocs/build/distributing/#Point-releasing-101) is roughly as follows:
 
 1. Create a new branch (e.g. `backports-release-1.6`) against the relevant minor release
    branch (e.g. `release-1.6`). Usually a corresponding pull request is created as well.
@@ -293,7 +300,7 @@ The process of creating a patch release is roughly as follows:
 6. Ping `@JuliaLang/releases` to tag the patch release and update the website.
 
 7. Open a pull request that bumps the version of the relevant minor release to the
-   next prerelase patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37724).
+   next prerelease patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37724).
 
 Step 2 above, i.e. backporting commits to the `backports-release-X.Y` branch, has largely
 been automated via [`Backporter`](https://github.com/KristofferC/Backporter): Backporter
@@ -325,7 +332,7 @@ please remove the `backport-X.Y` tag from the originating pull request for the c
  - use lower case with underscores for method names
  - it is generally preferred to use ASCII operators and identifiers over
    Unicode equivalents whenever possible
- - in docstring refer to the language as "Julia" and the executable as "`julia`"
+ - in docstrings refer to the language as "Julia" and the executable as "`julia`"
 
 #### General Formatting Guidelines For C code contributions
 
@@ -340,12 +347,10 @@ please remove the `backport-X.Y` tag from the originating pull request for the c
 ### Git Recommendations For Pull Requests
 
  - Avoid working from the `master` branch of your fork, creating a new branch will make it easier if Julia's `master` changes and you need to update your pull request.
- - Try to [squash](http://gitready.com/advanced/2009/02/10/squashing-commits-with-rebase.html) together small commits that make repeated changes to the same section of code so your pull request is easier to review, and Julia's history won't have any broken intermediate commits. A reasonable number of separate well-factored commits is fine, especially for larger changes.
+ - Try to [squash](http://gitready.com/advanced/2009/02/10/squashing-commits-with-rebase.html) together small commits that make repeated changes to the same section of code so your pull request is easier to review. A reasonable number of separate well-factored commits is fine, especially for larger changes.
  - If any conflicts arise due to changes in Julia's `master`, prefer updating your pull request branch with `git rebase` versus `git merge` or `git pull`, since the latter will introduce merge commits that clutter the git history with noise that makes your changes more difficult to review.
- - If you see any unrelated changes to submodules like `deps/libuv`, `deps/openlibm`, etc., try running `git submodule update` first.
  - Descriptive commit messages are good.
  - Using `git add -p` or `git add -i` can be useful to avoid accidentally committing unrelated changes.
- - GitHub does not send notifications when you push a new commit to a pull request, so please add a comment to the pull request thread to let reviewers know when you've made changes.
  - When linking to specific lines of code in discussion of an issue or pull request, hit the `y` key while viewing code on GitHub to reload the page with a URL that includes the specific version that you're viewing. That way any lines of code that you refer to will still make sense in the future, even if the content of the file changes.
  - Whitespace can be automatically removed from existing commits with `git rebase`.
    - To remove whitespace for the previous commit, run
@@ -365,7 +370,7 @@ please remove the `backport-X.Y` tag from the originating pull request for the c
   - **Community:** <https://julialang.org/community/>
   - **Source code:** <https://github.com/JuliaLang/julia>
   - **Documentation:** <https://docs.julialang.org>
-  - **Code coverage:** <https://coveralls.io/r/JuliaLang/julia>
+  - **Code coverage:** <https://codecov.io/github/JuliaLang/julia>
 
 * Design of Julia
   - [Julia: A Fresh Approach to Numerical Computing](https://julialang.org/assets/research/julia-fresh-approach-BEKS.pdf)
diff --git a/HISTORY.md b/HISTORY.md
index eb661d5e53a18..1d46189c74c51 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,3 +1,424 @@
+Julia v1.10 Release Notes
+=========================
+
+New language features
+---------------------
+
+* JuliaSyntax.jl is now used as the default parser, providing better diagnostics and faster
+  parsing. Set environment variable `JULIA_USE_FLISP_PARSER` to `1` to switch back to the old
+  parser if necessary (and if you find this necessary, please file an issue) ([#46372]).
+* `⥺` (U+297A, `\leftarrowsubset`) and `⥷` (U+2977, `\leftarrowless`) may now be used as
+  binary operators with arrow precedence ([#45962]).
+
+Language changes
+----------------
+
+* When a task forks a child, the parent task's task-local RNG (random number generator) is no longer affected. The seeding of child based on the parent task also takes a more disciplined approach to collision resistance, using a design based on the SplitMix and DotMix splittable RNG schemes ([#49110]).
+* A new more-specific rule for methods resolves ambiguities containing Union{} in favor of
+  the method defined explicitly to handle the Union{} argument. This makes it possible to
+  define methods to explicitly handle Union{} without the ambiguities that commonly would
+  result previously. This also lets the runtime optimize certain method lookups in a way
+  that significantly improves load and inference times for heavily overloaded methods that
+  dispatch on Types (such as traits and constructors).
+* The "h bar" `ℏ` (`\hslash` U+210F) character is now treated as equivalent to `ħ` (`\hbar` U+0127).
+* The `@simd` macro now has more limited and clearer semantics: it only enables reordering and contraction
+  of floating-point operations, instead of turning on all "fastmath" optimizations.
+  If you observe performance regressions due to this change, you can recover previous behavior with `@fastmath @simd`,
+  if you are OK with all the optimizations enabled by the `@fastmath` macro ([#49405]).
+* When a method with keyword arguments is displayed in the stack trace view, the textual
+  representation of the keyword arguments' type is simplified using the new
+  `@Kwargs{key1::Type1, ...}` macro syntax ([#49959]).
+
+Compiler/Runtime improvements
+-----------------------------
+
+* Updated GC heuristics to count allocated pages instead of object sizes ([#50144]). This should help
+  some programs that consumed excessive memory before.
+* The mark phase of the garbage collector is now multi-threaded ([#48600]).
+* [JITLink](https://llvm.org/docs/JITLink.html) is enabled by default on Linux aarch64 when Julia is linked to LLVM 15 or later versions ([#49745]).
+  This should resolve many segmentation faults previously observed on this platform.
+* The precompilation process now uses pidfile locks and orchestrates multiple julia processes to only have one process
+  spend effort precompiling while the others wait. Previously all would do the work and race to overwrite the cache files.
+  ([#49052])
+
+Command-line option changes
+---------------------------
+
+* New option `--gcthreads` to set how many threads will be used by the garbage collector ([#48600]).
+  The default is `N/2` where `N` is the number of worker threads (`--threads`) used by Julia.
+
+Build system changes
+--------------------
+
+* SparseArrays and SuiteSparse are no longer included in the default system image, so the core
+  language no longer contains GPL libraries. However, these libraries are still included
+  alongside the language in the standard binary distribution ([#44247], [#48979], [#49266]).
+
+New library functions
+---------------------
+
+* `tanpi` is now defined. It computes tan(π*x) more accurately than `tan(pi*x)` ([#48575]).
+* `fourthroot(x)` is now defined in `Base.Math` and can be used to compute the fourth root of `x`.
+   It can also be accessed using the unicode character `∜`, which can be typed by `\fourthroot<tab>` ([#48899]).
+* `Libc.memmove`, `Libc.memset`, and `Libc.memcpy` are now defined, whose functionality matches that of their respective C calls.
+* `Base.isprecompiled(pkg::PkgId)` has been added, to identify whether a package has already been precompiled ([#50218]).
+
+New library features
+--------------------
+
+* `binomial(x, k)` now supports non-integer `x` ([#48124]).
+* A `CartesianIndex` is now treated as a "scalar" for broadcasting ([#47044]).
+* `printstyled` now supports italic output ([#45164]).
+* `parent` and `parentindices` support `SubString`s.
+* `replace(string, pattern...)` now supports an optional `IO` argument to
+  write the output to a stream rather than returning a string ([#48625]).
+* `startswith` now supports seekable `IO` streams ([#43055]).
+
+Standard library changes
+------------------------
+
+* The `initialized=true` keyword assignment for `sortperm!` and `partialsortperm!`
+  is now a no-op ([#47979]). It previously exposed unsafe behavior ([#47977]).
+* Printing integral `Rational`s will skip the denominator in `Rational`-typed IO context (e.g. in arrays) ([#45396]).
+
+#### Package Manager
+
+* `Pkg.precompile` now accepts `timing` as a keyword argument which displays per package timing information for precompilation (e.g. `Pkg.precompile(timing=true)`).
+
+#### LinearAlgebra
+
+* `AbstractQ` no longer subtypes `AbstractMatrix`. Moreover, `adjoint(Q::AbstractQ)`
+  no longer wraps `Q` in an `Adjoint` type, but instead in an `AdjointQ`, that itself
+  subtypes `AbstractQ`. This change accounts for the fact that typically `AbstractQ`
+  instances behave like function-based, matrix-backed linear operators, and hence don't
+  allow for efficient indexing. Also, many `AbstractQ` types can act on vectors/matrices
+  of different size, acting like a matrix with context-dependent size. With this change,
+  `AbstractQ` has a well-defined API that is described in detail in the
+  [Julia documentation](https://docs.julialang.org/en/v1/stdlib/LinearAlgebra/#man-linalg-abstractq)
+  ([#46196]).
+* Adjoints and transposes of `Factorization` objects are no longer wrapped in `Adjoint`
+  and `Transpose` wrappers, respectively. Instead, they are wrapped in
+  `AdjointFactorization` and `TransposeFactorization` types, which themselves subtype
+  `Factorization` ([#46874]).
+* New functions `hermitianpart` and `hermitianpart!` for extracting the Hermitian
+  (real symmetric) part of a matrix ([#31836]).
+* The `norm` of the adjoint or transpose of an `AbstractMatrix` now returns the norm of the
+  parent matrix by default, matching the current behaviour for `AbstractVector`s ([#49020]).
+* `eigen(A, B)` and `eigvals(A, B)`, where one of `A` or `B` is symmetric or Hermitian,
+  are now fully supported ([#49533]).
+* `eigvals/eigen(A, cholesky(B))` now computes the generalized eigenvalues (`eigen`: and eigenvectors)
+  of `A` and `B` via Cholesky decomposition for positive definite `B`. Note: The second argument is
+  the output of `cholesky`.
+
+#### Printf
+
+* Format specifiers now support dynamic width and precision, e.g. `%*s` and `%*.*g` ([#40105]).
+
+#### REPL
+
+* When stack traces are printed, the printed depth of types in function signatures will be limited
+  to avoid overly verbose output ([#49795]).
+
+#### Test
+
+* The `@test_broken` macro (or `@test` with `broken=true`) now complains if the test expression returns a
+  non-boolean value in the same way as a non-broken test ([#47804]).
+* When a call to `@test` fails or errors inside a function, a larger stacktrace is now printed such that the location of the  test within a `@testset` can be retrieved ([#49451]).
+
+#### InteractiveUtils
+
+* `code_native` and `@code_native` now default to intel syntax instead of AT&T.
+* `@time_imports` now shows the timing of any module `__init__()`s that are run ([#49529]).
+
+Deprecated or removed
+---------------------
+
+* The `@pure` macro is now deprecated. Use `Base.@assume_effects :foldable` instead ([#48682]).
+
+<!--- generated by NEWS-update.jl: -->
+[#31836]: https://github.com/JuliaLang/julia/issues/31836
+[#40105]: https://github.com/JuliaLang/julia/issues/40105
+[#43055]: https://github.com/JuliaLang/julia/issues/43055
+[#44247]: https://github.com/JuliaLang/julia/issues/44247
+[#45164]: https://github.com/JuliaLang/julia/issues/45164
+[#45396]: https://github.com/JuliaLang/julia/issues/45396
+[#45962]: https://github.com/JuliaLang/julia/issues/45962
+[#46196]: https://github.com/JuliaLang/julia/issues/46196
+[#46372]: https://github.com/JuliaLang/julia/issues/46372
+[#46874]: https://github.com/JuliaLang/julia/issues/46874
+[#47044]: https://github.com/JuliaLang/julia/issues/47044
+[#47804]: https://github.com/JuliaLang/julia/issues/47804
+[#47977]: https://github.com/JuliaLang/julia/issues/47977
+[#47979]: https://github.com/JuliaLang/julia/issues/47979
+[#48124]: https://github.com/JuliaLang/julia/issues/48124
+[#48575]: https://github.com/JuliaLang/julia/issues/48575
+[#48600]: https://github.com/JuliaLang/julia/issues/48600
+[#48625]: https://github.com/JuliaLang/julia/issues/48625
+[#48682]: https://github.com/JuliaLang/julia/issues/48682
+[#48899]: https://github.com/JuliaLang/julia/issues/48899
+[#48979]: https://github.com/JuliaLang/julia/issues/48979
+[#49020]: https://github.com/JuliaLang/julia/issues/49020
+[#49110]: https://github.com/JuliaLang/julia/issues/49110
+[#49266]: https://github.com/JuliaLang/julia/issues/49266
+[#49405]: https://github.com/JuliaLang/julia/issues/49405
+[#49451]: https://github.com/JuliaLang/julia/issues/49451
+[#49529]: https://github.com/JuliaLang/julia/issues/49529
+[#49533]: https://github.com/JuliaLang/julia/issues/49533
+[#49745]: https://github.com/JuliaLang/julia/issues/49745
+[#49795]: https://github.com/JuliaLang/julia/issues/49795
+[#49959]: https://github.com/JuliaLang/julia/issues/49959
+[#50144]: https://github.com/JuliaLang/julia/issues/50144
+[#50218]: https://github.com/JuliaLang/julia/issues/50218
+
+Julia v1.9 Release Notes
+========================
+
+New language features
+---------------------
+
+* It is now possible to assign to bindings in another module using `setproperty!(::Module, ::Symbol, x)` ([#44137]).
+* Slurping in assignments is now also allowed in non-final position. This is handled via `Base.split_rest` ([#42902]).
+* Character literals now support the same syntax allowed in string literals; i.e. the syntax can
+  represent invalid UTF-8 sequences as allowed by the `Char` type ([#44989]).
+* Support for Unicode 15 ([#47392]).
+* Nested combinations of tuples and named tuples of symbols are now allowed as type parameters ([#46300]).
+* New builtins `getglobal(::Module, ::Symbol[, order])` and `setglobal!(::Module, ::Symbol, x[, order])`
+  for reading from and writing to globals. `getglobal` should now be preferred for accessing globals over
+  `getfield` ([#44137]).
+
+Language changes
+----------------
+
+* The `@invoke` macro introduced in 1.7 is now exported. Additionally, it now uses `Core.Typeof(x)`
+  rather than `Any` when a type annotation is omitted for an argument `x` so that types passed
+  as arguments are handled correctly ([#45807]).
+* The `invokelatest` function and `@invokelatest` macro introduced in 1.7 are now exported ([#45831]).
+
+Compiler/Runtime improvements
+-----------------------------
+
+* Time to first execution (TTFX, sometimes called time to first plot) is greatly reduced. Package precompilation now
+  saves native code into a "pkgimage", meaning that code generated during the precompilation process will not
+  require compilation after package load. Use of pkgimages can be disabled via `--pkgimages=no` ([#44527]) ([#47184]).
+* The known quadratic behavior of type inference is now fixed and inference uses less memory in general.
+  Certain edge cases with auto-generated long functions (e.g. ModelingToolkit.jl with partial
+  differential equations and large causal models) should see significant compile-time improvements ([#45276], [#45404]).
+* Non-concrete call sites can now be union-split to be inlined or statically resolved even
+  if there are multiple dispatch candidates. This may improve runtime performance in certain
+  situations where object types are not fully known statically, by statically resolving
+  `@nospecialize`-d call sites and avoiding excessive compilation ([#44512]).
+* All uses of the `@pure` macro in `Base` have been replaced with the now-preferred `Base.@assume_effects` ([#44776]).
+* `invoke(f, invokesig, args...)` calls to a less-specific method than would normally be chosen
+  for `f(args...)` are no longer spuriously invalidated when loading package precompile files ([#46010]).
+
+Command-line option changes
+---------------------------
+
+* In Linux and Windows, `--threads=auto` now tries to infer the usable number of CPUs from the
+  process affinity which is set typically in HPC and cloud environments ([#42340]).
+* `--math-mode=fast` is now a no-op ([#41638]). Users are encouraged to use the @fastmath macro instead, which has more well-defined semantics.
+* The `--threads` command-line option now accepts `auto|N[,auto|M]` where `M` specifies the
+  number of interactive threads to create (`auto` currently means 1) ([#42302]).
+* New option `--heap-size-hint=<size>` suggests a size limit to invoke garbage collection more eagerly.
+  The size may be specified in bytes, kilobytes (1000k), megabytes (300M), or gigabytes (1.5G) ([#45369]).
+
+Multi-threading changes
+-----------------------
+
+* `Threads.@spawn` now accepts an optional first argument: `:default` or `:interactive`.
+  An interactive task desires low latency and implicitly agrees to be short duration or to yield frequently.
+  Interactive tasks will run on interactive threads, if any are specified when Julia is started ([#42302]).
+* Threads started outside the Julia runtime (e.g. from C or Java) can now become able to call into Julia code
+  by calling `jl_adopt_thread`. This is done automatically when entering Julia code via `cfunction` or a
+  `@ccallable` entry point. As a consequence, the number of threads can now change during execution ([#46609]).
+
+Build system changes
+--------------------
+
+
+New library functions
+---------------------
+
+* New function `Iterators.flatmap` ([#44792]).
+* New `pkgversion(m::Module)` function to get the version of the package that loaded
+  a given module, similar to `pkgdir(m::Module)` ([#45607]).
+* New function `stack(x)` which generalises `reduce(hcat, x::Vector{<:Vector})` to any dimensionality,
+  and allows any iterator of iterators. Method `stack(f, x)` generalises `mapreduce(f, hcat, x)` and
+  is more efficient ([#43334]).
+* New macro `@allocations` which is similar to `@allocated` except reporting the total number of allocations
+  rather than the total size of memory allocated ([#47367]).
+
+New library features
+--------------------
+
+* `RoundFromZero` now works for non-`BigFloat` types ([#41246]).
+* `Dict` can be now shrunk manually by `sizehint!` ([#45004]).
+* `@time` now separates out % time spent recompiling invalidated methods ([#45015]).
+
+Standard library changes
+------------------------
+
+* A known concurrency issue in `iterate` methods on `Dict` and other derived objects such
+  as `keys(::Dict)`, `values(::Dict)`, and `Set` is fixed. These methods of `iterate` can
+  now be called on a dictionary or set shared by arbitrary tasks provided that there are no
+  tasks mutating the dictionary or set ([#44534]).
+* Predicate function negation `!f` now returns a composed function `(!) ∘ f` instead of an anonymous function ([#44752]).
+* `eachslice` now works over multiple dimensions; `eachslice`, `eachrow` and `eachcol` return
+  a `Slices` object, which allows dispatching to provide more efficient methods ([#32310]).
+* `@kwdef` is now exported and added to the public API ([#46273]).
+* An issue with order of operations in `fld1` is now fixed ([#28973]).
+* Sorting is now always stable by default, as `QuickSort` was stabilized ([#45222]).
+* `Base.splat` is now exported. The return value is now a `Base.Splat` instead
+  of an anonymous function, which allows for pretty printing ([#42717]).
+
+#### Package Manager
+
+#### LinearAlgebra
+
+* The methods `a / b` and `b \ a` with `a` a scalar and `b` a vector, which were equivalent to `a * pinv(b)`,
+  have been removed due to the risk of confusion with elementwise division ([#44358]).
+* We are now wholly reliant on libblastrampoline (LBT) for calling BLAS and LAPACK. OpenBLAS is shipped by default,
+  but building the system image with other BLAS/LAPACK libraries is not supported. Instead, it is recommended that
+  the LBT mechanism be used for swapping BLAS/LAPACK with vendor provided ones ([#44360]).
+* `lu` supports a new pivoting strategy `RowNonZero()` that chooses the first non-zero pivot element, for use with
+  new arithmetic types and for pedagogy ([#44571]).
+* `normalize(x, p=2)` now supports any normed vector space `x`, including scalars ([#44925]).
+* The default number of BLAS threads is now set to the number of CPU threads on ARM CPUs, and half the number
+  of CPU threads on other architectures ([#45412], [#46085]).
+
+#### Printf
+
+* Error messages for bad format strings have been improved, to make it clearer what and where in the
+  format string is wrong ([#45366]).
+
+#### Profile
+
+* New function `Profile.take_heap_snapshot(file)` that writes a file in Chrome's JSON-based `.heapsnapshot`
+  format ([#46862]).
+
+#### Random
+
+* `randn` and `randexp` now work for any `AbstractFloat` type defining `rand` ([#44714]).
+
+#### REPL
+
+* `Alt-e` now opens the current input in an editor ([#33759]).
+* The contextual module which is active in the REPL can be changed (it is `Main` by default),
+  via the `REPL.activate(::Module)` function or via typing the module in the REPL and pressing
+  the keybinding Alt-m ([#33872]).
+* A "numbered prompt" mode which prints numbers for each input and output and stores evaluated results in `Out` can be
+  activated with `REPL.numbered_prompt!()`. See the manual for how to enable this at startup ([#46474]).
+* Tab completion displays available keyword arguments ([#43536])
+
+#### SuiteSparse
+
+* Code for the SuiteSparse solver wrappers has been moved to SparseArrays.jl. Solvers are now re-exported by
+  SuiteSparse.jl.
+
+#### SparseArrays
+
+* SuiteSparse solvers are now available as submodules of SparseArrays (<https://github.com/JuliaSparse/SparseArrays.jl/pull/95>).
+* UMFPACK (<https://github.com/JuliaSparse/SparseArrays.jl/pull/179>) and CHOLMOD (<https://github.com/JuliaSparse/SparseArrays.jl/pull/206>) thread safety are improved by
+  avoiding globals and using locks. Multithreaded `ldiv!` of UMFPACK objects may now be performed safely.
+* An experimental function `SparseArrays.allowscalar(::Bool)` allows scalar indexing of sparse arrays to be
+  disabled or enabled. This function is intended to help find accidental scalar indexing of `SparseMatrixCSC`
+  objects, which is a common source of performance issues (<https://github.com/JuliaSparse/SparseArrays.jl/pull/200>).
+
+#### Test
+
+* New fail-fast mode for testsets that will terminate the test run early if a failure or error occurs.
+  Set either via the `@testset` kwarg `failfast=true` or by setting env var `JULIA_TEST_FAILFAST`
+  to `"true"` i.e. in CI runs to request the job failure be posted eagerly when issues occur ([#45317])
+
+#### Dates
+
+* Empty strings are no longer incorrectly parsed as valid `DateTime`s, `Date`s or `Time`s and instead throw an
+  `ArgumentError` in constructors and `parse`, while `nothing` is returned by `tryparse` ([#47117]).
+
+#### Distributed
+
+* The package environment (active project, `LOAD_PATH`, `DEPOT_PATH`) is now propagated when adding *local* workers
+  (e.g. with `addprocs(N::Int)` or through the `--procs=N` command line flag) ([#43270]).
+* `addprocs` for local workers now accepts the `env` keyword argument for passing environment variables to worker
+  processes. This was already supported for remote workers ([#43270]).
+
+#### Unicode
+
+* `graphemes(s, m:n)` returns a substring of the `m`-th to `n`-th graphemes in `s` ([#44266]).
+
+#### DelimitedFiles
+
+* DelimitedFiles has been moved out as a separate package.
+
+Deprecated or removed
+---------------------
+
+
+External dependencies
+---------------------
+
+* On Linux, now autodetects the system libstdc++ version, and automatically loads the system library if it is newer.
+  The old behavior of loading the bundled libstdc++ regardless of the system version can be restored by setting the
+  environment variable `JULIA_PROBE_LIBSTDCXX=0` ([#46976]).
+* Removed `RPATH` from the julia binary. On Linux this may break libraries that have failed to set `RUNPATH`.
+
+Tooling Improvements
+--------------------
+
+* Printing of `MethodError` and methods (such as from `methods(my_func)`) is now prettified and colored consistently
+  with printing of methods in stacktraces ([#45069]).
+
+<!--- generated by NEWS-update.jl: -->
+[#28973]: https://github.com/JuliaLang/julia/issues/28973
+[#32310]: https://github.com/JuliaLang/julia/issues/32310
+[#33759]: https://github.com/JuliaLang/julia/issues/33759
+[#33872]: https://github.com/JuliaLang/julia/issues/33872
+[#41246]: https://github.com/JuliaLang/julia/issues/41246
+[#41638]: https://github.com/JuliaLang/julia/issues/41638
+[#42302]: https://github.com/JuliaLang/julia/issues/42302
+[#42340]: https://github.com/JuliaLang/julia/issues/42340
+[#42717]: https://github.com/JuliaLang/julia/issues/42717
+[#42902]: https://github.com/JuliaLang/julia/issues/42902
+[#43270]: https://github.com/JuliaLang/julia/issues/43270
+[#43334]: https://github.com/JuliaLang/julia/issues/43334
+[#44137]: https://github.com/JuliaLang/julia/issues/44137
+[#44266]: https://github.com/JuliaLang/julia/issues/44266
+[#44358]: https://github.com/JuliaLang/julia/issues/44358
+[#44360]: https://github.com/JuliaLang/julia/issues/44360
+[#44512]: https://github.com/JuliaLang/julia/issues/44512
+[#44534]: https://github.com/JuliaLang/julia/issues/44534
+[#44571]: https://github.com/JuliaLang/julia/issues/44571
+[#44714]: https://github.com/JuliaLang/julia/issues/44714
+[#44752]: https://github.com/JuliaLang/julia/issues/44752
+[#44776]: https://github.com/JuliaLang/julia/issues/44776
+[#44792]: https://github.com/JuliaLang/julia/issues/44792
+[#44925]: https://github.com/JuliaLang/julia/issues/44925
+[#44989]: https://github.com/JuliaLang/julia/issues/44989
+[#45004]: https://github.com/JuliaLang/julia/issues/45004
+[#45015]: https://github.com/JuliaLang/julia/issues/45015
+[#45069]: https://github.com/JuliaLang/julia/issues/45069
+[#45222]: https://github.com/JuliaLang/julia/issues/45222
+[#45276]: https://github.com/JuliaLang/julia/issues/45276
+[#45317]: https://github.com/JuliaLang/julia/issues/45317
+[#45366]: https://github.com/JuliaLang/julia/issues/45366
+[#45369]: https://github.com/JuliaLang/julia/issues/45369
+[#45404]: https://github.com/JuliaLang/julia/issues/45404
+[#45412]: https://github.com/JuliaLang/julia/issues/45412
+[#45607]: https://github.com/JuliaLang/julia/issues/45607
+[#45807]: https://github.com/JuliaLang/julia/issues/45807
+[#45831]: https://github.com/JuliaLang/julia/issues/45831
+[#46010]: https://github.com/JuliaLang/julia/issues/46010
+[#46085]: https://github.com/JuliaLang/julia/issues/46085
+[#46273]: https://github.com/JuliaLang/julia/issues/46273
+[#46300]: https://github.com/JuliaLang/julia/issues/46300
+[#46474]: https://github.com/JuliaLang/julia/issues/46474
+[#46609]: https://github.com/JuliaLang/julia/issues/46609
+[#46862]: https://github.com/JuliaLang/julia/issues/46862
+[#46976]: https://github.com/JuliaLang/julia/issues/46976
+[#47367]: https://github.com/JuliaLang/julia/issues/47367
+[#47392]: https://github.com/JuliaLang/julia/issues/47392
+
+
 Julia v1.8 Release Notes
 ========================
 
@@ -115,7 +536,8 @@ Standard library changes
 
 #### InteractiveUtils
 
-* New macro `@time_imports` for reporting any time spent importing packages and their dependencies ([#41612]).
+* New macro `@time_imports` for reporting any time spent importing packages and their dependencies, highlighting
+  compilation and recompilation time as percentages per import ([#41612],[#45064]).
 
 #### LinearAlgebra
 
@@ -200,6 +622,11 @@ Standard library changes
   definitions, including to other function calls, while recording all intermediate test results ([#42518]).
 * `TestLogger` and `LogRecord` are now exported from the Test stdlib ([#44080]).
 
+#### Distributed
+
+* SSHManager now supports workers with csh/tcsh login shell, via `addprocs()` option `shell=:csh` ([#41485]).
+
+
 Deprecated or removed
 ---------------------
 
@@ -231,6 +658,7 @@ Tooling Improvements
 [#41312]: https://github.com/JuliaLang/julia/issues/41312
 [#41328]: https://github.com/JuliaLang/julia/issues/41328
 [#41449]: https://github.com/JuliaLang/julia/issues/41449
+[#41485]: https://github.com/JuliaLang/julia/issues/41485
 [#41551]: https://github.com/JuliaLang/julia/issues/41551
 [#41576]: https://github.com/JuliaLang/julia/issues/41576
 [#41612]: https://github.com/JuliaLang/julia/issues/41612
@@ -322,7 +750,7 @@ Language changes
   same seed) unless an explicit RNG object is used.
   See the section on the `Random` standard library below ([#40546]).
 * `Iterators.peel(itr)` now returns `nothing` when `itr` is empty instead of throwing a `BoundsError` ([#39607]).
-* Multiple successive semicolons in an array expresion were previously ignored (e.g., `[1 ;; 2] == [1 ; 2]`).
+* Multiple successive semicolons in an array expression were previously ignored (e.g., `[1 ;; 2] == [1 ; 2]`).
   This syntax is now used to separate dimensions (see **New language features**).
 
 Compiler/Runtime improvements
@@ -544,7 +972,7 @@ Standard library changes
   target; other functions — `Tar.extract`, `Tar.rewrite`, `Tar.tree_hash` — treat a hard link as a
   copy of the target file (<https://github.com/JuliaIO/Tar.jl/pull/102>).
 * The standard format generated by `Tar.create` and `Tar.rewrite` now includes entries for non-empty
-  directories; this shouldn't be neccessary, but some tools that consume tarballs (including docker)
+  directories; this shouldn't be necessary, but some tools that consume tarballs (including docker)
   are confused by the absence of these directory entries (<https://github.com/JuliaIO/Tar.jl/pull/106>).
 * `Tar` now accepts tarballs with leading spaces in octal integer header fields: this is technically
   not a valid format according to the POSIX spec, but old Solaris `tar` commands produced tarballs like
@@ -2094,7 +2522,7 @@ Language changes
   * Juxtaposing binary, octal, and hexadecimal literals is deprecated, since it can lead to
     confusing code such as `0xapi == 0xa * pi` ([#16356]).
 
-  * Numeric literal juxtaposition now has slighty lower precedence than unary operators,
+  * Numeric literal juxtaposition now has slightly lower precedence than unary operators,
     so for example `√2x` parses as `(√2) * x` ([#27641]).
 
   * Declaring arguments as `x::ANY` to avoid specialization has been replaced
@@ -5161,7 +5589,7 @@ Library improvements
       for scalar indices to support indexing; all other indexing behaviors
       (including logical indexing, ranges of indices, vectors, colons, etc.) are
       implemented in default fallbacks. Similarly, they only need to implement
-      scalar `setindex!` to support all forms of indexed assingment ([#10525]).
+      scalar `setindex!` to support all forms of indexed assignment ([#10525]).
 
     * AbstractArrays that do not extend `similar` now return an `Array` by
       default ([#10525]).
@@ -5190,7 +5618,7 @@ Library improvements
   * New types
 
     * Enums are now supported through the `@enum EnumName EnumValue1
-      EnumValue2` syntax. Enum member values also support abitrary
+      EnumValue2` syntax. Enum member values also support arbitrary
       value assignment by the `@enum EnumName EnumValue1=1
       EnumValue2=10 EnumValue3=20` syntax ([#10168]).
 
@@ -5307,18 +5735,18 @@ Deprecated or removed
 
   * several syntax whitespace insensitivities have been deprecated ([#11891]).
     ```julia
-     # function call
-     f (x)
-
-     # getindex
-     x [17]
-     rand(2) [1]
-
-     # function definition
-     f (x) = x^2
-     function foo (x)
-	x^2
-     end
+    # function call
+    f (x)
+
+    # getindex
+    x [17]
+    rand(2) [1]
+
+    # function definition
+    f (x) = x^2
+    function foo (x)
+        x^2
+    end
     ```
 
   * indexing with `Real`s that are not subtypes of `Integer` (`Rational`, `AbstractFloat`, etc.) has been deprecated ([#10458]).
diff --git a/LICENSE.md b/LICENSE.md
index fdf24e7603d73..d4125f4fba221 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2009-2022: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
+Copyright (c) 2009-2023: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/Make.inc b/Make.inc
index c9692fa00c7e7..910fcdc276e3b 100644
--- a/Make.inc
+++ b/Make.inc
@@ -66,6 +66,7 @@ USE_SYSTEM_PATCHELF:=0
 USE_SYSTEM_LIBWHICH:=0
 USE_SYSTEM_ZLIB:=0
 USE_SYSTEM_P7ZIP:=0
+USE_SYSTEM_LLD:=0
 
 # Link to the LLVM shared library
 USE_LLVM_SHLIB := 1
@@ -76,7 +77,7 @@ JULIA_THREADS := 1
 # Set to 1 to enable profiling with OProfile
 USE_OPROFILE_JITEVENTS ?= 0
 
-# USE_PERF_JITEVENTS defined below since default is OS specific
+# USE_PERF_JITEVENTS, and USE_INTEL_JITEVENTS defined below since default is OS specific
 
 # assume we don't have LIBSSP support in our compiler, will enable later if likely true
 HAVE_SSP := 0
@@ -88,6 +89,16 @@ WITH_GC_DEBUG_ENV := 0
 # Enable DTrace support
 WITH_DTRACE := 0
 
+# Enable ITTAPI integration
+WITH_ITTAPI := 0
+
+# Enable Tracy support
+WITH_TRACY := 0
+WITH_TRACY_CALLSTACKS := 0
+
+# Enable Timing Counts support
+WITH_TIMING_COUNTS := 0
+
 # Prevent picking up $ARCH from the environment variables
 ARCH:=
 
@@ -134,23 +145,6 @@ endif
 export BUILDROOT
 unexport O
 
-# Make sure the user didn't try to specify a path that will confuse the shell / make
-METACHARACTERS := ][?*{}() $$%:;&|!\#,\\`\":
-ifneq (,$(findstring ',$(value BUILDROOT)))
-$(error cowardly refusing to build into directory with a single-quote in the path)
-endif
-ifneq (,$(findstring ',$(value JULIAHOME)))
-$(error cowardly refusing to build from source directory with a single-quote in the path)
-endif
-ifneq (,$(shell echo '$(value BUILDROOT)' | grep '[$(METACHARACTERS)]'))
-$(error cowardly refusing to build into directory with a shell-metacharacter in the path\
-    (got: $(value BUILDROOT)))
-endif
-ifneq (,$(shell echo '$(value JULIAHOME)' | grep '[$(METACHARACTERS)]'))
-$(error cowardly refusing to build from source directory with a shell-metacharacter in the path\
-    (got: $(value JULIAHOME)))
-endif
-
 # we include twice to pickup user definitions better
 # include from JULIAHOME first so that BUILDROOT can override
 MAYBE_HOST :=
@@ -191,18 +185,20 @@ endif
 JULIA_VERSION := $(shell cat $(JULIAHOME)/VERSION)
 JULIA_MAJOR_VERSION := $(shell echo $(JULIA_VERSION) | cut -d'-' -f 1 | cut -d'.' -f 1)
 JULIA_MINOR_VERSION := $(shell echo $(JULIA_VERSION) | cut -d'-' -f 1 | cut -d'.' -f 2)
-JULIA_PATCH_VERSION := $(shell echo $(JULIA_VERSION) | cut -d'-' -f 1 | cut -d'.' -f 3)
+JULIA_PATCH_VERSION := $(shell echo $(JULIA_VERSION) | cut -d'-' -f 1 | cut -d'+' -f 1 | cut -d'.' -f 3)
 
 # libjulia's SONAME will follow the format libjulia.so.$(SOMAJOR). Before v1.0.0,
-# SOMAJOR will be a two-decimal value, e.g. libjulia.so.0.5, whereas at and beyond
-# v1.0.0, SOMAJOR will be simply the major version number, e.g. libjulia.so.1
+# somajor was a two-decimal value (e.g. libjulia.so.0.5). During v1.0.x - v1.9.x,
+# somajor was simply the major version number (e.g. libjulia.so.1). Starting in
+# v1.10.0, somajor is major.minor again (e.g. libjulia.so.1.10)
 # The file itself will ultimately symlink to libjulia.so.$(SOMAJOR).$(SOMINOR)
-ifeq ($(JULIA_MAJOR_VERSION),0)
 SOMAJOR := $(JULIA_MAJOR_VERSION).$(JULIA_MINOR_VERSION)
 SOMINOR := $(JULIA_PATCH_VERSION)
-else
-SOMAJOR := $(JULIA_MAJOR_VERSION)
-SOMINOR := $(JULIA_MINOR_VERSION)
+
+# This suffix affects libjulia's SONAME and the symbol version associated with
+# all of its exported symbols.
+ifdef SYMBOL_VERSION_SUFFIX
+SOMAJOR := $(SOMAJOR)_$(SYMBOL_VERSION_SUFFIX)
 endif
 
 ifneq ($(NO_GIT), 1)
@@ -302,6 +298,9 @@ private_libdir := $(libdir)/julia
 endif
 build_private_libdir := $(build_libdir)/julia
 
+private_libexecdir := $(libexecdir)/julia
+build_private_libexecdir := $(build_libexecdir)/julia
+
 # A helper functions for dealing with lazily-evaluated, expensive operations..  Spinning
 # up a python process to, for exaxmple, parse a TOML file is expensive, and we must wait
 # until the TOML files are on-disk before we can parse them.  This means that we cannot
@@ -326,7 +325,7 @@ define cache_rel_path
 $(1)_rel_eval = $(call rel_path,$(2),$($(1)))
 $(1)_rel = $$(call hit_cache,$(1)_rel_eval)
 endef
-$(foreach D,libdir private_libdir datarootdir libexecdir docdir sysconfdir includedir,$(eval $(call cache_rel_path,$(D),$(bindir))))
+$(foreach D,libdir private_libdir datarootdir libexecdir private_libexecdir docdir sysconfdir includedir,$(eval $(call cache_rel_path,$(D),$(bindir))))
 $(foreach D,build_libdir build_private_libdir,$(eval $(call cache_rel_path,$(D),$(build_bindir))))
 
 # Save a special one: reverse_private_libdir_rel: usually just `../`, but good to be general:
@@ -347,6 +346,7 @@ BUILD_LLVM_CLANG := 0
 # see http://lldb.llvm.org/build.html for dependencies
 BUILD_LLDB := 0
 BUILD_LIBCXX := 0
+BUILD_LLD := 1
 
 # Options to enable Polly and its code-generation options
 USE_POLLY := 0
@@ -359,6 +359,10 @@ USE_MLIR := 0
 # Options to use RegionVectorizer
 USE_RV := 0
 
+# Use `ccache` for speeding up recompilation of the C/C++ part of Julia.
+# Requires the `ccache` executable to be in the `PATH` environment variable.
+USECCACHE := 0
+
 # Cross-compile
 #XC_HOST := i686-w64-mingw32
 #XC_HOST := x86_64-w64-mingw32
@@ -440,8 +444,10 @@ endif
 # Set to 1 to enable profiling with perf
 ifeq ("$(OS)", "Linux")
 USE_PERF_JITEVENTS ?= 1
+USE_INTEL_JITEVENTS ?= 1
 else
 USE_PERF_JITEVENTS ?= 0
+USE_INTEL_JITEVENTS ?= 0
 endif
 
 JULIACODEGEN := LLVM
@@ -450,8 +456,8 @@ JULIACODEGEN := LLVM
 ifeq ($(FORCE_ASSERTIONS), 1)
 # C++ code needs to include LLVM header with the same assertion flag as LLVM
 # Use this flag to re-enable assertion in our code after all the LLVM headers are included
-CXX_DISABLE_ASSERTION :=
-DISABLE_ASSERTIONS :=
+CXX_DISABLE_ASSERTION := -DJL_VERIFY_PASSES
+DISABLE_ASSERTIONS := -DJL_VERIFY_PASSES
 else
 CXX_DISABLE_ASSERTION := -DJL_NDEBUG
 DISABLE_ASSERTIONS := -DNDEBUG -DJL_NDEBUG
@@ -459,7 +465,13 @@ endif
 
 # Compiler specific stuff
 
-CC_VERSION_STRING = $(shell $(CC) --version)
+ifeq (default,$(origin CC))
+CC := $(CROSS_COMPILE)$(CC) # attempt to add cross-compiler prefix, if the user
+                            # is not overriding the default, to form target-triple-cc (which
+                            # may not exist), and use that to decide what compiler the user
+                            # is using for the target build (or default to gcc)
+endif
+CC_VERSION_STRING = $(shell $(CC) --version 2>/dev/null)
 ifneq (,$(findstring clang,$(CC_VERSION_STRING)))
 USECLANG := 1
 USEGCC := 0
@@ -474,43 +486,67 @@ FC := $(CROSS_COMPILE)gfortran
 ifeq ($(OS), Darwin)
 APPLE_ARCH := $(shell uname -m)
 ifneq ($(APPLE_ARCH),arm64)
-MACOSX_VERSION_MIN := 10.10
+MACOSX_VERSION_MIN := 10.14
 else
 MACOSX_VERSION_MIN := 11.0
 endif
 endif
 
-ifeq ($(USEGCC),1)
-CC := $(CROSS_COMPILE)gcc
-CXX := $(CROSS_COMPILE)g++
-JCFLAGS := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64
-# AArch64 needs this flag to generate the .eh_frame used by libunwind
-JCPPFLAGS := -fasynchronous-unwind-tables
-JCXXFLAGS := -pipe $(fPIC) -fno-rtti -std=c++14
+JCFLAGS_COMMON    := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64
+JCFLAGS_CLANG     := $(JCFLAGS_COMMON)
+JCFLAGS_GCC       := $(JCFLAGS_COMMON) -fno-gnu-unique
+
+# These flags are needed to generate decent debug info
+JCPPFLAGS_COMMON  := -fasynchronous-unwind-tables
+JCPPFLAGS_CLANG   := $(JCPPFLAGS_COMMON) -mllvm -enable-tail-merge=0
+JCPPFLAGS_GCC     := $(JCPPFLAGS_COMMON) -fno-tree-tail-merge
+
+JCXXFLAGS_COMMON  := -pipe $(fPIC) -fno-rtti -std=c++14
+JCXXFLAGS_CLANG   := $(JCXXFLAGS_COMMON) -pedantic
+JCXXFLAGS_GCC     := $(JCXXFLAGS_COMMON) -fno-gnu-unique
+
+DEBUGFLAGS_COMMON := -O0 -DJL_DEBUG_BUILD -fstack-protector
+DEBUGFLAGS_CLANG  := $(DEBUGFLAGS_COMMON) -g
+DEBUGFLAGS_GCC    := $(DEBUGFLAGS_COMMON) -ggdb2
+
+SHIPFLAGS_COMMON  := -O3
+SHIPFLAGS_CLANG   := $(SHIPFLAGS_COMMON) -g
+SHIPFLAGS_GCC     := $(SHIPFLAGS_COMMON) -ggdb2 -falign-functions
+
+ifeq ($(OS), Darwin)
+JCPPFLAGS_CLANG   += -D_LARGEFILE_SOURCE -D_DARWIN_USE_64_BIT_INODE=1
+endif
+
 ifneq ($(OS), WINNT)
 # Do not enable on windows to avoid warnings from libuv.
-JCXXFLAGS += -pedantic
+JCXXFLAGS_GCC     += -pedantic
 endif
-DEBUGFLAGS := -O0 -ggdb2 -DJL_DEBUG_BUILD -fstack-protector
-SHIPFLAGS := -O3 -ggdb2 -falign-functions
+
+ifeq ($(USEGCC),1)
+CC         := $(CROSS_COMPILE)gcc
+CXX        := $(CROSS_COMPILE)g++
+JCFLAGS    := $(JCFLAGS_GCC)
+JCPPFLAGS  := $(JCPPFLAGS_GCC)
+JCXXFLAGS  := $(JCXXFLAGS_GCC)
+DEBUGFLAGS := $(DEBUGFLAGS_GCC)
+SHIPFLAGS  := $(SHIPFLAGS_GCC)
 endif
 
 ifeq ($(USECLANG),1)
-CC := $(CROSS_COMPILE)clang
-CXX := $(CROSS_COMPILE)clang++
-JCFLAGS := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64
-# AArch64 needs this flag to generate the .eh_frame used by libunwind
-JCPPFLAGS := -fasynchronous-unwind-tables
-JCXXFLAGS := -pipe $(fPIC) -fno-rtti -pedantic -std=c++14
-DEBUGFLAGS := -O0 -g -DJL_DEBUG_BUILD -fstack-protector
-SHIPFLAGS := -O3 -g
+CC         := $(CROSS_COMPILE)clang
+CXX        := $(CROSS_COMPILE)clang++
+JCFLAGS    := $(JCFLAGS_CLANG)
+JCPPFLAGS  := $(JCPPFLAGS_CLANG)
+JCXXFLAGS  := $(JCXXFLAGS_CLANG)
+DEBUGFLAGS := $(DEBUGFLAGS_CLANG)
+SHIPFLAGS  := $(SHIPFLAGS_CLANG)
+
 ifeq ($(OS), Darwin)
 CC += -mmacosx-version-min=$(MACOSX_VERSION_MIN)
 CXX += -mmacosx-version-min=$(MACOSX_VERSION_MIN)
 FC += -mmacosx-version-min=$(MACOSX_VERSION_MIN)
 # export MACOSX_DEPLOYMENT_TARGET so that ld picks it up, especially for deps
 export MACOSX_DEPLOYMENT_TARGET=$(MACOSX_VERSION_MIN)
-JCPPFLAGS += -D_LARGEFILE_SOURCE -D_DARWIN_USE_64_BIT_INODE=1
 endif
 endif
 
@@ -546,8 +582,8 @@ CC_BASE := $(shell echo $(CC) | cut -d' ' -f1)
 CC_ARG := $(shell echo $(CC) | cut -s -d' ' -f2-)
 CXX_BASE := $(shell echo $(CXX) | cut -d' ' -f1)
 CXX_ARG := $(shell echo $(CXX) | cut -s -d' ' -f2-)
-FC_BASE := $(shell echo $(FC) | cut -d' ' -f1)
-FC_ARG := $(shell echo $(FC) | cut -s -d' ' -f2-)
+FC_BASE := $(shell echo $(FC) 2>/dev/null | cut -d' ' -f1)
+FC_ARG := $(shell echo $(FC) 2>/dev/null | cut -s -d' ' -f2-)
 endif
 
 JFFLAGS := -O2 $(fPIC)
@@ -563,23 +599,26 @@ CPP_STDOUT := $(CPP) -P
 # file extensions
 ifeq ($(OS), WINNT)
   SHLIB_EXT := dll
+  PATHSEP := ;
 else ifeq ($(OS), Darwin)
   SHLIB_EXT := dylib
+  PATHSEP := :
 else
   SHLIB_EXT := so
+  PATHSEP := :
 endif
 
 ifeq ($(OS),WINNT)
 define versioned_libname
-$$(if $(2),$(1)-$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
+$(if $(2),$(1)-$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
 endef
 else ifeq ($(OS),Darwin)
 define versioned_libname
-$$(if $(2),$(1).$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
+$(if $(2),$(1).$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
 endef
 else
 define versioned_libname
-$$(if $(2),$(1).$(SHLIB_EXT).$(2),$(1).$(SHLIB_EXT))
+$(if $(2),$(1).$(SHLIB_EXT).$(2),$(1).$(SHLIB_EXT))
 endef
 endif
 
@@ -687,10 +726,13 @@ SANITIZE_LDFLAGS :=
 ifeq ($(SANITIZE_MEMORY),1)
 SANITIZE_OPTS += -fsanitize=memory -fsanitize-memory-track-origins -fno-omit-frame-pointer
 SANITIZE_LDFLAGS += $(SANITIZE_OPTS)
-endif
+ifneq ($(findstring $(OS),Linux FreeBSD),)
+SANITIZE_LDFLAGS += -Wl,--warn-unresolved-symbols
+endif # OS Linux or FreeBSD
+endif # SANITIZE_MEMORY=1
 ifeq ($(SANITIZE_ADDRESS),1)
-SANITIZE_OPTS += -fsanitize=address -mllvm -asan-stack=0
-SANITIZE_LDFLAGS += -fsanitize=address
+SANITIZE_OPTS += -fsanitize=address
+SANITIZE_LDFLAGS += -fsanitize=address -shared-libasan
 endif
 ifeq ($(SANITIZE_THREAD),1)
 SANITIZE_OPTS += -fsanitize=thread
@@ -705,7 +747,7 @@ JLDFLAGS += $(SANITIZE_LDFLAGS)
 endif # SANITIZE
 
 TAR := $(shell which gtar 2>/dev/null || which tar 2>/dev/null)
-TAR_TEST := $(shell $(TAR) --help 2>&1  | egrep 'bsdtar|strip-components')
+TAR_TEST := $(shell $(TAR) --help 2>&1  | grep -E 'bsdtar|strip-components')
 ifeq (,$(findstring components,$(TAR_TEST)))
 ifneq (bsdtar,$(findstring bsdtar,$(TAR_TEST)))
 $(error "please install either GNU tar or bsdtar")
@@ -726,7 +768,28 @@ ifeq ($(WITH_DTRACE), 1)
 JCXXFLAGS += -DUSE_DTRACE
 JCFLAGS += -DUSE_DTRACE
 DTRACE := dtrace
-else
+endif
+
+ifeq ($(WITH_ITTAPI), 1)
+JCXXFLAGS += -DUSE_ITTAPI
+JCFLAGS += -DUSE_ITTAPI
+LIBITTAPI:=-littnotify
+endif
+
+ifeq ($(WITH_TRACY), 1)
+JCXXFLAGS += -DUSE_TRACY -DTRACY_ENABLE -DTRACY_FIBERS
+JCFLAGS += -DUSE_TRACY -DTRACY_ENABLE -DTRACY_FIBERS
+LIBTRACYCLIENT:=-lTracyClient
+endif
+ifeq ($(WITH_TRACY_CALLSTACKS), 1)
+JCXXFLAGS += -DTRACY_CALLSTACK=32
+JCFLAGS += -DTRACY_CALLSTACK=32
+LIBTRACYCLIENT:=-lTracyClient
+endif
+
+ifeq ($(WITH_TIMING_COUNTS), 1)
+JCXXFLAGS += -DUSE_TIMING_COUNTS
+JCFLAGS += -DUSE_TIMING_COUNTS
 endif
 
 # ===========================================================================
@@ -772,6 +835,8 @@ else ifeq (cygwin, $(shell $(CC) -dumpmachine | cut -d\- -f3))
 $(error "cannot build julia with cygwin-target compilers. set XC_HOST to i686-w64-mingw32 or x86_64-w64-mingw32 for mingw cross-compile")
 else ifeq (msys, $(shell $(CC) -dumpmachine | cut -d\- -f3))
 $(error "cannot build julia with msys-target compilers. please see the README.windows document for instructions on setting up mingw-w64 compilers")
+else ifneq (,$(findstring MSYS,$(shell uname)))
+$(error "cannot build julia from a msys shell. please launch a mingw shell instead by setting MSYSTEM=MINGW64")
 endif
 
 ifeq ($(BUILD_OS),Darwin)
@@ -839,10 +904,20 @@ else
 ISX86:=0
 endif
 
+
+#If nothing is set default to native unless we are cross-compiling
+ifeq ($(MARCH)$(MCPU)$(MTUNE)$(JULIA_CPU_TARGET)$(XC_HOST),)
+ifeq ($(ARCH),aarch64) #ARM recommends only setting MCPU for AArch64
+MCPU=native
+else
+MARCH=native
+MTUNE=native
+endif
+endif
+
 # If we are running on powerpc64le or ppc64le, set certain options automatically
 ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
 JCFLAGS += -fsigned-char
-OPENBLAS_DYNAMIC_ARCH:=0
 OPENBLAS_TARGET_ARCH:=POWER8
 BINARY:=64
 # GCC doesn't do -march= on ppc64le
@@ -916,6 +991,14 @@ FC += -mcpu=$(MCPU)
 JULIA_CPU_TARGET ?= $(MCPU)
 endif
 
+# Set MTUNE-specific flags
+ifneq ($(MTUNE),)
+CC += -mtune=$(MTUNE)
+CXX += -mtune=$(MTUNE)
+FC += -mtune=$(MTUNE)
+JULIA_CPU_TARGET ?= $(MTUNE)
+endif
+
 ifneq ($(MARCH)$(MCPU),)
 ifeq ($(OS),Darwin)
 # on Darwin, the standalone `as` program doesn't know
@@ -1030,6 +1113,10 @@ PATCHELF := patchelf
 else
 PATCHELF := $(build_depsbindir)/patchelf
 endif
+# In the standard build system we want to patch files with `--set-rpath`, but downstream
+# packagers like Spack may want to use `--add-rpath` instead, leave them the possibility to
+# choose the command.
+PATCHELF_SET_RPATH_ARG := --set-rpath
 
 ifeq ($(USE_SYSTEM_LIBWHICH), 1)
 LIBWHICH := libwhich
@@ -1110,7 +1197,7 @@ endif
 
 # We need python for things like BB triplet recognition.  We don't really care
 # about version, generally, so just find something that works:
-PYTHON := "$(shell which python 2>/dev/null || which python3 2>/dev/null || which python2 2>/dev/null || echo not found)"
+PYTHON := $(shell which python 2>/dev/null || which python3 2>/dev/null || which python2 2>/dev/null || echo not found)
 PYTHON_SYSTEM := $(shell $(PYTHON) -c 'from __future__ import print_function; import platform; print(platform.system())')
 
 # If we're running on Cygwin, but using a native-windows Python, we need to use cygpath -w
@@ -1136,8 +1223,11 @@ USE_BINARYBUILDER ?= 0
 endif
 
 # Auto-detect triplet once, create different versions that we use as defaults below for each BB install target
-FC_VERSION := $(shell $(FC) --version 2>/dev/null | head -1)
-FC_OR_CC_VERSION := $(or $(FC_VERSION),$(shell $(CC) --version 2>/dev/null | head -1))
+FC_VERSION := $(shell $(FC) -dM -E - < /dev/null 2>/dev/null | grep __GNUC__ | cut -d' ' -f3)
+ifeq ($(USEGCC)$(FC_VERSION),1)
+FC_OR_CC_VERSION := $(shell $(CC) -dumpfullversion -dumpversion 2>/dev/null | cut -d'.' -f1)
+# n.b. clang's __GNUC__ macro pretends to be gcc 4.2.1, so leave it as the empty string here if the compiler is not certain to be GCC
+endif
 BB_TRIPLET_LIBGFORTRAN_CXXABI := $(shell $(call invoke_python,$(JULIAHOME)/contrib/normalize_triplet.py) $(or $(XC_HOST),$(XC_HOST),$(BUILD_MACHINE)) "$(FC_OR_CC_VERSION)" "$(or $(shell echo '\#include <string>' | $(CXX) $(CXXFLAGS) -x c++ -dM -E - | grep _GLIBCXX_USE_CXX11_ABI | awk '{ print $$3 }' ),1)")
 BB_TRIPLET_LIBGFORTRAN := $(subst $(SPACE),-,$(filter-out cxx%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN_CXXABI))))
 BB_TRIPLET_CXXABI := $(subst $(SPACE),-,$(filter-out libgfortran%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN_CXXABI))))
@@ -1145,11 +1235,34 @@ BB_TRIPLET := $(subst $(SPACE),-,$(filter-out cxx%,$(filter-out libgfortran%,$(s
 
 LIBGFORTRAN_VERSION := $(subst libgfortran,,$(filter libgfortran%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN))))
 
+# CSL_NEXT_GLIBCXX_VERSION is a triple of the symbols representing support for whatever
+# the next libstdc++ version would be. This is used for two things.
+# 1. Whether the system libraries are new enough, if we need to use the libs bundled with CSL
+# 2. To know which libstdc++ to load at runtime
+# We want whichever libstdc++ library is newer, because if we don't it can cause problems.
+# While what CSL bundles is quite bleeding-edge compared to what most distros ship, if someone
+# tries to build an older branch of Julia, the version of CSL that ships with it may be
+# relatively old. This is not a problem for code that is built in BB, but when we build Julia
+# with the system compiler, that compiler uses the version of `libstdc++` that it is bundled
+# with, and we can get linker errors when trying to run that `julia` executable with the
+# `libstdc++` that comes from the (now old) BB-built CSL.
+# To fix this, we take note when the system `libstdc++.so` is newer than whatever we
+# would get from CSL (by searching for a `GLIBCXX_X.Y.Z` symbol that does not exist
+# in our CSL, but would in a newer one), and default to `USE_BINARYBUILDER_CSL=0` in
+# this case. This ensures that we link against a version with the symbols required.
+# We also check the system libstdc++ at runtime in the cli loader library, and
+# load it if it contains the version symbol that indicates that it is newer than the one
+# shipped with CSL. Although we do not depend on any of the symbols, it is entirely
+# possible that a user might choose to install a library which depends on symbols provided
+# by a newer libstdc++. Without runtime detection, those libraries would break.
+CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.33|GLIBCXX_3\.5\.|GLIBCXX_4\.
+
+
 # This is the set of projects that BinaryBuilder dependencies are hooked up for.
 # Note: we explicitly _do not_ define `CSL` here, since it requires some more
 # advanced techniques to decide whether it should be installed from a BB source
 # or not.  See `deps/csl.mk` for more detail.
-BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP
+BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP LLD LIBTRACYCLIENT
 define SET_BB_DEFAULT
 # First, check to see if BB is disabled on a global setting
 ifeq ($$(USE_BINARYBUILDER),0)
@@ -1201,18 +1314,16 @@ ifneq (,$(filter $(OS),WINNT emscripten))
   RPATH :=
   RPATH_ORIGIN :=
   RPATH_ESCAPED_ORIGIN :=
-  RPATH_LIB :=
 else ifeq ($(OS), Darwin)
   RPATH := -Wl,-rpath,'@executable_path/$(build_libdir_rel)'
   RPATH_ORIGIN := -Wl,-rpath,'@loader_path/'
   RPATH_ESCAPED_ORIGIN := $(RPATH_ORIGIN)
-  RPATH_LIB := -Wl,-rpath,'@loader_path/'
 else
-  RPATH := -Wl,-rpath,'$$ORIGIN/$(build_libdir_rel)' -Wl,-rpath,'$$ORIGIN/$(build_private_libdir_rel)' -Wl,-rpath-link,$(build_shlibdir) -Wl,-z,origin
-  RPATH_ORIGIN := -Wl,-rpath,'$$ORIGIN' -Wl,-z,origin
-  RPATH_ESCAPED_ORIGIN := -Wl,-rpath,'\$$\$$ORIGIN' -Wl,-z,origin -Wl,-rpath-link,$(build_shlibdir)
-  RPATH_LIB := -Wl,-rpath,'$$ORIGIN/' -Wl,-z,origin
+  RPATH := -Wl,-rpath,'$$ORIGIN/$(build_libdir_rel)' -Wl,-rpath,'$$ORIGIN/$(build_private_libdir_rel)' -Wl,-rpath-link,$(build_shlibdir) -Wl,-z,origin -Wl,--enable-new-dtags
+  RPATH_ORIGIN := -Wl,-rpath,'$$ORIGIN' -Wl,-z,origin -Wl,--enable-new-dtags
+  RPATH_ESCAPED_ORIGIN := -Wl,-rpath,'\$$\$$ORIGIN' -Wl,-z,origin -Wl,-rpath-link,$(build_shlibdir) -Wl,--enable-new-dtags
 endif
+RPATH_LIB := $(RPATH_ORIGIN)
 
 # --whole-archive
 ifeq ($(OS), Darwin)
@@ -1223,6 +1334,9 @@ else
   NO_WHOLE_ARCHIVE := -Wl,--no-whole-archive
 endif
 
+# Initialize these once, then add to them in OS-specific blocks
+JLIBLDFLAGS :=
+
 ifeq ($(OS), Linux)
 OSLIBS += -Wl,--no-as-needed -ldl -lrt -lpthread -latomic -Wl,--export-dynamic,--as-needed,--no-whole-archive
 # Detect if ifunc is supported
@@ -1231,19 +1345,19 @@ ifeq (supported, $(shell echo $(IFUNC_DETECT_SRC) | $(CC) -Werror -x c - -S -o /
 JCPPFLAGS += -DJULIA_HAS_IFUNC_SUPPORT=1
 endif
 JLDFLAGS += -Wl,-Bdynamic
-OSLIBS += -Wl,--version-script=$(JULIAHOME)/src/julia.expmap
+OSLIBS += -Wl,--version-script=$(BUILDROOT)/src/julia.expmap
 ifneq ($(SANITIZE),1)
 JLDFLAGS += -Wl,-no-undefined
 endif
 ifeq (-Bsymbolic-functions, $(shell $(LD) --help | grep -o -e "-Bsymbolic-functions"))
-JLIBLDFLAGS := -Wl,-Bsymbolic-functions
-else
-JLIBLDFLAGS :=
+JLIBLDFLAGS += -Wl,-Bsymbolic-functions
+endif
+ifeq (--enable-new-dtags, $(shell $(LD) --help | grep -o -e "--enable-new-dtags"))
+JLIBLDFLAGS += -Wl,--enable-new-dtags
 endif
+
 # Linker doesn't detect automatically that Julia doesn't need executable stack
 JLIBLDFLAGS += -Wl,-z,noexecstack
-else ifneq ($(OS), Darwin)
-JLIBLDFLAGS :=
 endif
 
 ifeq ($(OS), FreeBSD)
@@ -1256,7 +1370,7 @@ OSLIBS += -lelf -lkvm -lrt -lpthread -latomic
 # See #21788
 OSLIBS += -lgcc_s
 
-OSLIBS += -Wl,--export-dynamic -Wl,--version-script=$(JULIAHOME)/src/julia.expmap \
+OSLIBS += -Wl,--export-dynamic -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \
 	$(NO_WHOLE_ARCHIVE)
 endif
 
@@ -1266,12 +1380,12 @@ OSLIBS += -framework CoreFoundation
 WHOLE_ARCHIVE := -Xlinker -all_load
 NO_WHOLE_ARCHIVE :=
 HAVE_SSP := 1
-JLIBLDFLAGS := -Wl,-compatibility_version,$(SOMAJOR) -Wl,-current_version,$(JULIA_MAJOR_VERSION).$(JULIA_MINOR_VERSION).$(JULIA_PATCH_VERSION)
+JLIBLDFLAGS += -Wl,-compatibility_version,$(SOMAJOR) -Wl,-current_version,$(JULIA_MAJOR_VERSION).$(JULIA_MINOR_VERSION).$(JULIA_PATCH_VERSION)
 endif
 
 ifeq ($(OS), WINNT)
 HAVE_SSP := 1
-OSLIBS += -Wl,--export-all-symbols -Wl,--version-script=$(JULIAHOME)/src/julia.expmap \
+OSLIBS += -Wl,--export-all-symbols -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \
 	$(NO_WHOLE_ARCHIVE) -lpsapi -lkernel32 -lws2_32 -liphlpapi -lwinmm -ldbghelp -luserenv -lsecur32 -latomic
 JLDFLAGS += -Wl,--stack,8388608
 ifeq ($(ARCH),i686)
@@ -1279,7 +1393,13 @@ JLDFLAGS += -Wl,--large-address-aware
 endif
 JCPPFLAGS += -D_WIN32_WINNT=0x0502
 UNTRUSTED_SYSTEM_LIBM := 1
-endif
+# Use hard links for files on windows, rather than soft links
+#   https://stackoverflow.com/questions/3648819/how-to-make-a-symbolic-link-with-cygwin-in-windows-7
+# Usage: $(WIN_MAKE_HARD_LINK) <source> <target>
+WIN_MAKE_HARD_LINK := cp --dereference --link --force
+else
+WIN_MAKE_HARD_LINK := true -ignore
+endif # $(OS) == WINNT
 
 # Threads
 ifneq ($(JULIA_THREADS), 0)
@@ -1362,7 +1482,6 @@ CLANGSA_FLAGS :=
 CLANGSA_CXXFLAGS :=
 ifeq ($(OS), Darwin) # on new XCode, the files are hidden
 CLANGSA_FLAGS += -isysroot $(shell xcode-select -p)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
-CLANGSA_CXXFLAGS += -isystem $(shell xcode-select -p)/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1
 endif
 ifeq ($(USEGCC),1)
 # try to help clang find the c++ files for CC by guessing the value for --prefix
@@ -1388,13 +1507,13 @@ define symlink_target # (from, to-dir, to-name)
 CLEAN_TARGETS += clean-$$(abspath $(2)/$(3))
 clean-$$(abspath $(2)/$(3)):
 ifeq ($(BUILD_OS), WINNT)
-	-cmd //C rmdir $$(call mingw_to_dos,$(2)/$(3),cd $(2) &&)
+	-cmd //C rmdir $$(call cygpath_w,$(2)/$(3))
 else
 	rm -rf $$(abspath $(2)/$(3))
 endif
 $$(abspath $(2)/$(3)): | $$(abspath $(2))
 ifeq ($$(BUILD_OS), WINNT)
-	@cmd //C mklink //J $$(call mingw_to_dos,$(2)/$(3),cd $(2) &&) $$(call mingw_to_dos,$(1),)
+	@cmd //C mklink //J $$(call cygpath_w,$(2)/$(3)) $$(call cygpath_w,$(1))
 else ifneq (,$$(findstring CYGWIN,$$(BUILD_OS)))
 	@cmd /C mklink /J $$(call cygpath_w,$(2)/$(3)) $$(call cygpath_w,$(1))
 else ifdef JULIA_VAGRANT_BUILD
@@ -1409,10 +1528,16 @@ endef
 # Overridable in Make.user
 WINE ?= wine
 
+ifeq ($(BINARY),32)
+HEAPLIM := --heap-size-hint=1000M
+else
+HEAPLIM :=
+endif
+
 # many of the following targets must be = not := because the expansion of the makefile functions (and $1) shouldn't happen until later
 ifeq ($(BUILD_OS), WINNT) # MSYS
 spawn = $(1)
-cygpath_w = $(1)
+cygpath_w = `cygpath -w $(1)`
 else ifneq (,$(findstring CYGWIN,$(BUILD_OS))) # Cygwin
 spawn = $(1)
 cygpath_w = `cygpath -w $(1)`
@@ -1444,7 +1569,7 @@ JULIA_SYSIMG_release := $(build_private_libdir)/sys.$(SHLIB_EXT)
 JULIA_SYSIMG := $(JULIA_SYSIMG_$(JULIA_BUILD_MODE))
 
 define dep_lib_path
-$$($(PYTHON) $(call python_cygpath,$(JULIAHOME)/contrib/relative_path.py) $(1) $(2))
+$(shell $(PYTHON) $(call python_cygpath,$(JULIAHOME)/contrib/relative_path.py) $(1) $(2))
 endef
 
 LIBJULIAINTERNAL_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT))
@@ -1466,13 +1591,19 @@ else
 LIBGCC_NAME := libgcc_s_seh-1.$(SHLIB_EXT)
 endif
 endif
+# On macOS, libgcc_s has soversion 1.1 always on aarch64 and only for GCC 12+
+# (-> libgfortran 5) on x86_64
 ifeq ($(OS),Darwin)
 ifeq ($(ARCH),aarch64)
 LIBGCC_NAME := libgcc_s.1.1.$(SHLIB_EXT)
 else
+ifeq ($(LIBGFORTRAN_VERSION),5)
+LIBGCC_NAME := libgcc_s.1.1.$(SHLIB_EXT)
+else
 LIBGCC_NAME := libgcc_s.1.$(SHLIB_EXT)
 endif
 endif
+endif
 ifneq ($(findstring $(OS),Linux FreeBSD),)
 LIBGCC_NAME := libgcc_s.$(SHLIB_EXT).1
 endif
@@ -1485,6 +1616,19 @@ LIBGCC_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/$(L
 endif
 LIBGCC_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/$(LIBGCC_NAME))
 
+# We only bother to define this on Linux, as that's the only platform that does libstdc++ probing
+# On all other platforms, the LIBSTDCXX_*_DEPLIB variables will be empty.
+ifeq ($(OS),Linux)
+LIBSTDCXX_NAME := libstdc++.so.6
+ifeq ($(USE_SYSTEM_CSL),1)
+LIBSTDCXX_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_private_shlibdir)/$(LIBSTDCXX_NAME))
+else
+LIBSTDCXX_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/$(LIBSTDCXX_NAME))
+endif
+LIBSTDCXX_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/$(LIBSTDCXX_NAME))
+endif
+
+
 # USE_SYSTEM_LIBM and USE_SYSTEM_OPENLIBM causes it to get symlinked into build_private_shlibdir
 ifeq ($(USE_SYSTEM_LIBM),1)
 LIBM_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_private_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
@@ -1498,6 +1642,8 @@ LIBM_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/$(LIBMN
 # We list:
 #  * libgcc_s, because FreeBSD needs to load ours, not the system one.
 #  * libopenlibm, because Windows has an untrustworthy libm, and we want to use ours more than theirs
+#  * libstdc++, because while performing `libstdc++` probing we need to
+#    know the path to the bundled `libstdc++` library.
 #  * libjulia-internal, which must always come second-to-last.
 #  * libjulia-codegen, which must always come last
 #
@@ -1506,11 +1652,45 @@ LIBM_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/$(LIBMN
 #  * install time relative paths are not equal to build time relative paths (../lib vs. ../lib/julia)
 # That second point will no longer be true for most deps once they are placed within Artifacts directories.
 # Note that we prefix `libjulia-codegen` and `libjulia-internal` with `@` to signify to the loader that it
-# should not automatically dlopen() it in its loading loop.
-LOADER_BUILD_DEP_LIBS = $(LIBGCC_BUILD_DEPLIB):$(LIBM_BUILD_DEPLIB):@$(LIBJULIAINTERNAL_BUILD_DEPLIB):@$(LIBJULIACODEGEN_BUILD_DEPLIB):
-LOADER_DEBUG_BUILD_DEP_LIBS = $(LIBGCC_BUILD_DEPLIB):$(LIBM_BUILD_DEPLIB):@$(LIBJULIAINTERNAL_DEBUG_BUILD_DEPLIB):@$(LIBJULIACODEGEN_DEBUG_BUILD_DEPLIB):
-LOADER_INSTALL_DEP_LIBS = $(LIBGCC_INSTALL_DEPLIB):$(LIBM_INSTALL_DEPLIB):@$(LIBJULIAINTERNAL_INSTALL_DEPLIB):@$(LIBJULIACODEGEN_INSTALL_DEPLIB):
-LOADER_DEBUG_INSTALL_DEP_LIBS = $(LIBGCC_INSTALL_DEPLIB):$(LIBM_INSTALL_DEPLIB):@$(LIBJULIAINTERNAL_DEBUG_INSTALL_DEPLIB):@$(LIBJULIACODEGEN_DEBUG_INSTALL_DEPLIB):
+# should not automatically dlopen() it in its loading loop, it is "special" and should happen later.
+# We do the same for `libstdc++`, and explicitly place it _after_ `libgcc_s`, and `libm` since `libstdc++`
+# may depend on those libraries (e.g. when USE_SYSTEM_LIBM=1)
+
+# Helper function to join a list with colons, then place an extra at the end.
+define build_deplibs
+$(subst $(SPACE),:,$(strip $(1))):
+endef
+
+LOADER_BUILD_DEP_LIBS = $(call build_deplibs, \
+    $(LIBGCC_BUILD_DEPLIB) \
+    $(LIBM_BUILD_DEPLIB) \
+    @$(LIBSTDCXX_BUILD_DEPLIB) \
+    @$(LIBJULIAINTERNAL_BUILD_DEPLIB) \
+    @$(LIBJULIACODEGEN_BUILD_DEPLIB) \
+)
+
+LOADER_DEBUG_BUILD_DEP_LIBS = $(call build_deplibs, \
+   $(LIBGCC_BUILD_DEPLIB) \
+   $(LIBM_BUILD_DEPLIB) \
+   @$(LIBSTDCXX_BUILD_DEPLIB) \
+   @$(LIBJULIAINTERNAL_DEBUG_BUILD_DEPLIB) \
+   @$(LIBJULIACODEGEN_DEBUG_BUILD_DEPLIB) \
+)
+
+LOADER_INSTALL_DEP_LIBS = $(call build_deplibs, \
+    $(LIBGCC_INSTALL_DEPLIB) \
+    $(LIBM_INSTALL_DEPLIB) \
+    @$(LIBSTDCXX_INSTALL_DEPLIB) \
+    @$(LIBJULIAINTERNAL_INSTALL_DEPLIB) \
+    @$(LIBJULIACODEGEN_INSTALL_DEPLIB) \
+)
+LOADER_DEBUG_INSTALL_DEP_LIBS = $(call build_deplibs, \
+    $(LIBGCC_INSTALL_DEPLIB) \
+    $(LIBM_INSTALL_DEPLIB) \
+    @$(LIBSTDCXX_INSTALL_DEPLIB) \
+    @$(LIBJULIAINTERNAL_DEBUG_INSTALL_DEPLIB) \
+    @$(LIBJULIACODEGEN_DEBUG_INSTALL_DEPLIB) \
+)
 
 # Colors for make
 ifndef VERBOSE
diff --git a/Makefile b/Makefile
index 958024c9942d3..68a316b0a32ae 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,24 @@
 JULIAHOME := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 include $(JULIAHOME)/Make.inc
+# import LLVM_SHARED_LIB_NAME
+include $(JULIAHOME)/deps/llvm-ver.make
+
+# Make sure the user didn't try to build in a path that will confuse the shell or make
+METACHARACTERS := [][?*{}() $$%:;&|!\#,\\`\":]\|/\./\|/\.\./
+ifneq (,$(findstring ',$(value BUILDROOT)))
+$(error cowardly refusing to build into directory with a single-quote in the path)
+endif
+ifneq (,$(findstring ',$(value JULIAHOME)))
+$(error cowardly refusing to build from source directory with a single-quote in the path)
+endif
+ifneq (,$(shell echo '$(value BUILDROOT)/' | grep '$(METACHARACTERS)'))
+$(error cowardly refusing to build into directory with a shell-metacharacter in the path\
+    (got: $(value BUILDROOT)))
+endif
+ifneq (,$(shell echo '$(value JULIAHOME)/' | grep '$(METACHARACTERS)'))
+$(error cowardly refusing to build from source directory with a shell-metacharacter in the path\
+    (got: $(value JULIAHOME)))
+endif
 
 VERSDIR := v`cut -d. -f1-2 < $(JULIAHOME)/VERSION`
 
@@ -7,11 +26,11 @@ default: $(JULIA_BUILD_MODE) # contains either "debug" or "release"
 all: debug release
 
 # sort is used to remove potential duplicates
-DIRS := $(sort $(build_bindir) $(build_depsbindir) $(build_libdir) $(build_private_libdir) $(build_libexecdir) $(build_includedir) $(build_includedir)/julia $(build_sysconfdir)/julia $(build_datarootdir)/julia $(build_datarootdir)/julia/src $(build_datarootdir)/julia/stdlib $(build_man1dir))
+DIRS := $(sort $(build_bindir) $(build_depsbindir) $(build_libdir) $(build_private_libdir) $(build_libexecdir) $(build_includedir) $(build_includedir)/julia $(build_sysconfdir)/julia $(build_datarootdir)/julia $(build_datarootdir)/julia/stdlib $(build_man1dir))
 ifneq ($(BUILDROOT),$(JULIAHOME))
-BUILDDIRS := $(BUILDROOT) $(addprefix $(BUILDROOT)/,base src src/flisp src/support src/clangsa cli doc deps stdlib test test/clangsa test/embedding test/llvmpasses)
-BUILDDIRMAKE := $(addsuffix /Makefile,$(BUILDDIRS)) $(BUILDROOT)/sysimage.mk
-DIRS := $(DIRS) $(BUILDDIRS)
+BUILDDIRS := $(BUILDROOT) $(addprefix $(BUILDROOT)/,base src src/flisp src/support src/clangsa cli doc deps stdlib test test/clangsa test/embedding test/gcext test/llvmpasses)
+BUILDDIRMAKE := $(addsuffix /Makefile,$(BUILDDIRS)) $(BUILDROOT)/sysimage.mk $(BUILDROOT)/pkgimage.mk
+DIRS += $(BUILDDIRS)
 $(BUILDDIRMAKE): | $(BUILDDIRS)
 	@# add Makefiles to the build directories for convenience (pointing back to the source location of each)
 	@echo '# -- This file is automatically generated in julia/Makefile -- #' > $@
@@ -22,12 +41,12 @@ configure-y: | $(BUILDDIRMAKE)
 configure:
 ifeq ("$(origin O)", "command line")
 	@if [ "$$(ls '$(BUILDROOT)' 2> /dev/null)" ]; then \
-		echo 'WARNING: configure called on non-empty directory $(BUILDROOT)'; \
+		printf $(WARNCOLOR)'WARNING: configure called on non-empty directory'$(ENDCOLOR)' %s\n' '$(BUILDROOT)'; \
 		read -p "Proceed [y/n]? " answer; \
 	else \
 		answer=y;\
 	fi; \
-	[ $$answer = 'y' ] && $(MAKE) configure-$$answer
+	[ "y$$answer" = yy ] && $(MAKE) configure-$$answer
 else
 	$(error "cannot rerun configure from within a build directory")
 endif
@@ -37,15 +56,11 @@ configure:
 endif
 
 $(foreach dir,$(DIRS),$(eval $(call dir_target,$(dir))))
-$(eval $(call symlink_target,$(JULIAHOME)/test,$$(build_datarootdir)/julia,test))
+$(foreach link,base $(JULIAHOME)/test,$(eval $(call symlink_target,$(link),$$(build_datarootdir)/julia,$(notdir $(link)))))
 
 julia_flisp.boot.inc.phony: julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src julia_flisp.boot.inc.phony
 
-# Build the HTML docs (skipped if already exists, notably in tarballs)
-$(BUILDROOT)/doc/_build/html/en/index.html: $(shell find $(BUILDROOT)/base $(BUILDROOT)/doc \( -path $(BUILDROOT)/doc/_build -o -path $(BUILDROOT)/doc/deps \) -prune -o -type f -print)
-	@$(MAKE) docs
-
 julia-symlink: julia-cli-$(JULIA_BUILD_MODE)
 ifeq ($(OS),WINNT)
 	echo '@"%~dp0/'"$$(echo '$(call rel_path,$(BUILDROOT),$(JULIA_EXECUTABLE))')"'" %*' | tr / '\\' > $(BUILDROOT)/julia.bat
@@ -56,7 +71,7 @@ ifndef JULIA_VAGRANT_BUILD
 endif
 endif
 
-julia-deps: | $(DIRS) $(build_datarootdir)/julia/test
+julia-deps: | $(DIRS) $(build_datarootdir)/julia/base $(build_datarootdir)/julia/test
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/deps
 
 # `julia-stdlib` depends on `julia-deps` so that the fake JLL stdlibs can copy in their Artifacts.toml files.
@@ -69,6 +84,12 @@ julia-base: julia-deps $(build_sysconfdir)/julia/startup.jl $(build_man1dir)/jul
 julia-libccalltest: julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libccalltest
 
+julia-libccalllazyfoo: julia-deps
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libccalllazyfoo
+
+julia-libccalllazybar: julia-deps julia-libccalllazyfoo
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libccalllazybar
+
 julia-libllvmcalltest: julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libllvmcalltest
 
@@ -84,19 +105,18 @@ julia-sysimg-ji : julia-stdlib julia-base julia-cli-$(JULIA_BUILD_MODE) julia-sr
 julia-sysimg-bc : julia-stdlib julia-base julia-cli-$(JULIA_BUILD_MODE) julia-src-$(JULIA_BUILD_MODE) | $(build_private_libdir)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-bc JULIA_EXECUTABLE='$(JULIA_EXECUTABLE)'
 
-$(JULIA_SYSIMG_release): julia-sysimg-ji julia-src-release
-	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-release
-$(JULIA_SYSIMG_debug) : julia-sysimg-ji julia-src-debug
-	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-debug
+julia-sysimg-release julia-sysimg-debug : julia-sysimg-% : julia-sysimg-ji julia-src-%
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-$*
 
-julia-sysimg-release : $(JULIA_SYSIMG_release)
-julia-sysimg-debug : $(JULIA_SYSIMG_debug)
-julia-base-cache: $(build_datarootdir)/julia/base.cache
-julia-debug julia-release : julia-% : julia-sysimg-% julia-src-% julia-symlink julia-libccalltest julia-libllvmcalltest julia-base-cache
+julia-debug julia-release : julia-% : julia-sysimg-% julia-src-% julia-symlink julia-libccalltest \
+                                      julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest julia-base-cache
 
-debug release : % : julia-%
+stdlibs-cache-release stdlibs-cache-debug : stdlibs-cache-% : julia-%
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f pkgimage.mk all-$*
 
-docs: julia-sysimg-$(JULIA_BUILD_MODE)
+debug release : % : julia-% stdlibs-cache-%
+
+docs: julia-sysimg-$(JULIA_BUILD_MODE) stdlibs-cache-$(JULIA_BUILD_MODE)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/doc JULIA_EXECUTABLE='$(call spawn,$(JULIA_EXECUTABLE_$(JULIA_BUILD_MODE))) --startup-file=no'
 
 docs-revise:
@@ -106,7 +126,7 @@ check-whitespace:
 ifneq ($(NO_GIT), 1)
 	@# Append the directory containing the julia we just built to the end of `PATH`,
 	@# to give us the best chance of being able to run this check.
-	@PATH=$(PATH):$(dirname $(JULIA_EXECUTABLE)) $(JULIAHOME)/contrib/check-whitespace.jl
+	@PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" julia $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl)
 else
 	$(warn "Skipping whitespace check because git is unavailable")
 endif
@@ -139,11 +159,13 @@ release-candidate: release testall
 	@echo 7. Clean out old .tar.gz files living in deps/, "\`git clean -fdx\`" seems to work	#"`
 	@echo 8. Replace github release tarball with tarballs created from make light-source-dist and make full-source-dist with USE_BINARYBUILDER=0
 	@echo 9. Check that 'make && make install && make test' succeed with unpacked tarballs even without Internet access.
-	@echo 10. Follow packaging instructions in doc/build/distributing.md to create binary packages for all platforms
+	@echo 10. Follow packaging instructions in doc/src/devdocs/build/distributing.md to create binary packages for all platforms
 	@echo 11. Upload to AWS, update https://julialang.org/downloads and http://status.julialang.org/stable links
 	@echo 12. Update checksums on AWS for tarball and packaged binaries
-	@echo 13. Announce on mailing lists
-	@echo 14. Change master to release-0.X in base/version.jl and base/version_git.sh as in 4cb1e20
+	@echo 13. Update versions.json. Wait at least 60 minutes before proceeding to step 14.
+	@echo 14. Push to Juliaup (https://github.com/JuliaLang/juliaup/wiki/Adding-a-Julia-version)
+	@echo 15. Announce on mailing lists
+	@echo 16. Change master to release-0.X in base/version.jl and base/version_git.sh as in 4cb1e20
 	@echo
 
 $(build_man1dir)/julia.1: $(JULIAHOME)/doc/man/julia.1 | $(build_man1dir)
@@ -161,24 +183,27 @@ $(build_datarootdir)/julia/julia-config.jl: $(JULIAHOME)/contrib/julia-config.jl
 $(build_depsbindir)/stringreplace: $(JULIAHOME)/contrib/stringreplace.c | $(build_depsbindir)
 	@$(call PRINT_CC, $(HOSTCC) -o $(build_depsbindir)/stringreplace $(JULIAHOME)/contrib/stringreplace.c)
 
-$(build_datarootdir)/julia/base.cache: $(JULIA_SYSIMG) | $(DIRS) $(build_datarootdir)/julia
-	@JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
+julia-base-cache: julia-sysimg-$(JULIA_BUILD_MODE) | $(DIRS) $(build_datarootdir)/julia
+	@JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) JULIA_FALLBACK_REPL=1 WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
 		$(call spawn, $(JULIA_EXECUTABLE) --startup-file=no $(call cygpath_w,$(JULIAHOME)/etc/write_base_cache.jl) \
-		$(call cygpath_w,$@))
+		$(call cygpath_w,$(build_datarootdir)/julia/base.cache))
 
 # public libraries, that are installed in $(prefix)/lib
+ifeq ($(JULIA_BUILD_MODE),release)
 JL_TARGETS := julia
-ifeq ($(BUNDLE_DEBUG_LIBS),1)
-JL_TARGETS += julia-debug
+else ifeq ($(JULIA_BUILD_MODE),debug)
+JL_TARGETS := julia-debug
 endif
 
 # private libraries, that are installed in $(prefix)/lib/julia
-JL_PRIVATE_LIBS-0 := libccalltest libllvmcalltest libjulia-internal libjulia-codegen
-ifeq ($(BUNDLE_DEBUG_LIBS),1)
+JL_PRIVATE_LIBS-0 := libccalltest libccalllazyfoo libccalllazybar libllvmcalltest
+ifeq ($(JULIA_BUILD_MODE),release)
+JL_PRIVATE_LIBS-0 += libjulia-internal libjulia-codegen
+else ifeq ($(JULIA_BUILD_MODE),debug)
 JL_PRIVATE_LIBS-0 += libjulia-internal-debug libjulia-codegen-debug
 endif
 ifeq ($(USE_GPL_LIBS), 1)
-JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libamd libbtf libcamd libccolamd libcholmod libcolamd libklu libldl librbio libspqr libsuitesparseconfig libumfpack
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libamd libbtf libcamd libccolamd libcholmod libcholmod_cuda libcolamd libklu libldl librbio libspqr libspqr_cuda libsuitesparseconfig libumfpack
 endif
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBBLASTRAMPOLINE) += libblastrampoline
 JL_PRIVATE_LIBS-$(USE_SYSTEM_PCRE) += libpcre2-8
@@ -197,7 +222,7 @@ else
 JL_PRIVATE_LIBS-$(USE_SYSTEM_ZLIB) += libz
 endif
 ifeq ($(USE_LLVM_SHLIB),1)
-JL_PRIVATE_LIBS-$(USE_SYSTEM_LLVM) += libLLVM libLLVM-14jl
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LLVM) += libLLVM $(LLVM_SHARED_LIB_NAME)
 endif
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBUNWIND) += libunwind
 
@@ -219,6 +244,17 @@ JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libwinpthread
 else
 JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libpthread
 endif
+ifeq ($(SANITIZE),1)
+ifeq ($(USECLANG),1)
+JL_PRIVATE_LIBS-1 += libclang_rt.asan
+else
+JL_PRIVATE_LIBS-1 += libasan
+endif
+endif
+
+ifeq ($(WITH_TRACY),1)
+JL_PRIVATE_LIBS-0 += libTracyClient
+endif
 
 
 ifeq ($(OS),Darwin)
@@ -229,49 +265,55 @@ endif
 endif
 endif
 
+# Note that we disable MSYS2's path munging here, as otherwise
+# it replaces our `:`-separated list as a `;`-separated one.
 define stringreplace
-	$(build_depsbindir)/stringreplace $$(strings -t x - $1 | grep $2 | awk '{print $$1;}') $3 255 "$(call cygpath_w,$1)"
+	MSYS2_ARG_CONV_EXCL='*' $(build_depsbindir)/stringreplace $$(strings -t x - '$1' | grep "$2" | awk '{print $$1;}') "$3" 255 "$(call cygpath_w,$1)"
 endef
 
 
-install: $(build_depsbindir)/stringreplace $(BUILDROOT)/doc/_build/html/en/index.html
-ifeq ($(BUNDLE_DEBUG_LIBS),1)
-	@$(MAKE) $(QUIET_MAKE) all
-else
-	@$(MAKE) $(QUIET_MAKE) release
-endif
-	@for subdir in $(bindir) $(datarootdir)/julia/stdlib/$(VERSDIR) $(docdir) $(man1dir) $(includedir)/julia $(libdir) $(private_libdir) $(sysconfdir) $(libexecdir); do \
+install: $(build_depsbindir)/stringreplace docs
+	@$(MAKE) $(QUIET_MAKE) $(JULIA_BUILD_MODE)
+	@for subdir in $(bindir) $(datarootdir)/julia/stdlib/$(VERSDIR) $(docdir) $(man1dir) $(includedir)/julia $(libdir) $(private_libdir) $(sysconfdir) $(private_libexecdir); do \
 		mkdir -p $(DESTDIR)$$subdir; \
 	done
 
-	$(INSTALL_M) $(build_bindir)/julia $(DESTDIR)$(bindir)/
-ifeq ($(BUNDLE_DEBUG_LIBS),1)
-	$(INSTALL_M) $(build_bindir)/julia-debug $(DESTDIR)$(bindir)/
-endif
+	$(INSTALL_M) $(JULIA_EXECUTABLE_$(JULIA_BUILD_MODE)) $(DESTDIR)$(bindir)/
 ifeq ($(OS),WINNT)
-	-$(INSTALL_M) $(filter-out $(build_bindir)/libjulia-debug.dll,$(wildcard $(build_bindir)/*.dll)) $(DESTDIR)$(bindir)/
+	-$(INSTALL_M) $(wildcard $(build_bindir)/*.dll) $(DESTDIR)$(bindir)/
+ifeq ($(JULIA_BUILD_MODE),release)
 	-$(INSTALL_M) $(build_libdir)/libjulia.dll.a $(DESTDIR)$(libdir)/
-
-	# We have a single exception; we want 7z.dll to live in libexec, not bin, so that 7z.exe can find it.
-	-mv $(DESTDIR)$(bindir)/7z.dll $(DESTDIR)$(libexecdir)/
-ifeq ($(BUNDLE_DEBUG_LIBS),1)
-	-$(INSTALL_M) $(build_bindir)/libjulia-debug.dll $(DESTDIR)$(bindir)/
+	-$(INSTALL_M) $(build_libdir)/libjulia-internal.dll.a $(DESTDIR)$(libdir)/
+else ifeq ($(JULIA_BUILD_MODE),debug)
 	-$(INSTALL_M) $(build_libdir)/libjulia-debug.dll.a $(DESTDIR)$(libdir)/
+	-$(INSTALL_M) $(build_libdir)/libjulia-internal-debug.dll.a $(DESTDIR)$(libdir)/
 endif
+	-$(INSTALL_M) $(wildcard $(build_private_libdir)/*.a) $(DESTDIR)$(private_libdir)/
+
+	# We have a single exception; we want 7z.dll to live in private_libexecdir,
+	# not bindir, so that 7z.exe can find it.
+	-mv $(DESTDIR)$(bindir)/7z.dll $(DESTDIR)$(private_libexecdir)/
 	-$(INSTALL_M) $(build_bindir)/libopenlibm.dll.a $(DESTDIR)$(libdir)/
+	-$(INSTALL_M) $(build_libdir)/libssp.dll.a $(DESTDIR)$(libdir)/
 else
 
 # Copy over .dSYM directories directly for Darwin
 ifneq ($(DARWIN_FRAMEWORK),1)
 ifeq ($(OS),Darwin)
+ifeq ($(JULIA_BUILD_MODE),release)
 	-cp -a $(build_libdir)/libjulia.*.dSYM $(DESTDIR)$(libdir)
+	-cp -a $(build_libdir)/libjulia-internal.*.dSYM $(DESTDIR)$(private_libdir)
+	-cp -a $(build_libdir)/libjulia-codegen.*.dSYM $(DESTDIR)$(private_libdir)
 	-cp -a $(build_private_libdir)/sys.dylib.dSYM $(DESTDIR)$(private_libdir)
-ifeq ($(BUNDLE_DEBUG_LIBS),1)
+else ifeq ($(JULIA_BUILD_MODE),debug)
 	-cp -a $(build_libdir)/libjulia-debug.*.dSYM $(DESTDIR)$(libdir)
+	-cp -a $(build_libdir)/libjulia-internal-debug.*.dSYM $(DESTDIR)$(private_libdir)
+	-cp -a $(build_libdir)/libjulia-codegen-debug.*.dSYM $(DESTDIR)$(private_libdir)
 	-cp -a $(build_private_libdir)/sys-debug.dylib.dSYM $(DESTDIR)$(private_libdir)
 endif
 endif
 
+# Copy over shared library file for libjulia.*
 	for suffix in $(JL_TARGETS) ; do \
 		for lib in $(build_libdir)/lib$${suffix}.*$(SHLIB_EXT)*; do \
 			if [ "$${lib##*.}" != "dSYM" ]; then \
@@ -280,11 +322,12 @@ endif
 		done \
 	done
 else
-	# libjulia in Darwin framework has special location and name
+# libjulia in Darwin framework has special location and name
+ifeq ($(JULIA_BUILD_MODE),release)
 	$(INSTALL_M) $(build_libdir)/libjulia.$(SOMAJOR).$(SOMINOR).dylib $(DESTDIR)$(prefix)/$(framework_dylib)
 	@$(DSYMUTIL) -o $(DESTDIR)$(prefix)/$(framework_resources)/$(FRAMEWORK_NAME).dSYM $(DESTDIR)$(prefix)/$(framework_dylib)
 	@$(DSYMUTIL) -o $(DESTDIR)$(prefix)/$(framework_resources)/sys.dylib.dSYM $(build_private_libdir)/sys.dylib
-ifeq ($(BUNDLE_DEBUG_LIBS),1)
+else ifeq ($(JULIA_BUILD_MODE),debug)
 	$(INSTALL_M) $(build_libdir)/libjulia-debug.$(SOMAJOR).$(SOMINOR).dylib $(DESTDIR)$(prefix)/$(framework_dylib)_debug
 	@$(DSYMUTIL) -o $(DESTDIR)$(prefix)/$(framework_resources)/$(FRAMEWORK_NAME)_debug.dSYM $(DESTDIR)$(prefix)/$(framework_dylib)_debug
 	@$(DSYMUTIL) -o $(DESTDIR)$(prefix)/$(framework_resources)/sys-debug.dylib.dSYM $(build_private_libdir)/sys-debug.dylib
@@ -306,26 +349,38 @@ endif
 		done \
 	done
 endif
-	# Install `7z` into libexec/
-	$(INSTALL_M) $(build_bindir)/7z$(EXE) $(DESTDIR)$(libexecdir)/
+	# Install `7z` into private_libexecdir
+	$(INSTALL_M) $(build_bindir)/7z$(EXE) $(DESTDIR)$(private_libexecdir)/
+
+	# Install `lld` into private_libexecdir
+	$(INSTALL_M) $(build_depsbindir)/lld$(EXE) $(DESTDIR)$(private_libexecdir)/
+
+	# Install `dsymutil` into private_libexecdir/
+	$(INSTALL_M) $(build_depsbindir)/dsymutil$(EXE) $(DESTDIR)$(private_libexecdir)/
 
 	# Copy public headers
 	cp -R -L $(build_includedir)/julia/* $(DESTDIR)$(includedir)/julia
 	# Copy system image
+ifeq ($(JULIA_BUILD_MODE),release)
 	$(INSTALL_M) $(build_private_libdir)/sys.$(SHLIB_EXT) $(DESTDIR)$(private_libdir)
-ifeq ($(BUNDLE_DEBUG_LIBS),1)
+else ifeq ($(JULIA_BUILD_MODE),debug)
 	$(INSTALL_M) $(build_private_libdir)/sys-debug.$(SHLIB_EXT) $(DESTDIR)$(private_libdir)
 endif
 
 	# Copy in all .jl sources as well
-	mkdir -p $(DESTDIR)$(datarootdir)/julia/src $(DESTDIR)$(datarootdir)/julia/test
-	cp -R -L $(build_datarootdir)/julia/* $(DESTDIR)$(datarootdir)/julia
+	mkdir -p $(DESTDIR)$(datarootdir)/julia/base $(DESTDIR)$(datarootdir)/julia/test
+	cp -R -L $(JULIAHOME)/base/* $(DESTDIR)$(datarootdir)/julia/base
 	cp -R -L $(JULIAHOME)/test/* $(DESTDIR)$(datarootdir)/julia/test
+	cp -R -L $(build_datarootdir)/julia/* $(DESTDIR)$(datarootdir)/julia
 	# Copy documentation
 	cp -R -L $(BUILDROOT)/doc/_build/html $(DESTDIR)$(docdir)/
 	# Remove various files which should not be installed
 	-rm -f $(DESTDIR)$(datarootdir)/julia/base/version_git.sh
 	-rm -f $(DESTDIR)$(datarootdir)/julia/test/Makefile
+	-rm -f $(DESTDIR)$(datarootdir)/julia/base/*/source-extracted
+	-rm -f $(DESTDIR)$(datarootdir)/julia/base/*/build-configured
+	-rm -f $(DESTDIR)$(datarootdir)/julia/base/*/build-compiled
+	-rm -f $(DESTDIR)$(datarootdir)/julia/base/*/build-checked
 	-rm -f $(DESTDIR)$(datarootdir)/julia/stdlib/$(VERSDIR)/*/source-extracted
 	-rm -f $(DESTDIR)$(datarootdir)/julia/stdlib/$(VERSDIR)/*/build-configured
 	-rm -f $(DESTDIR)$(datarootdir)/julia/stdlib/$(VERSDIR)/*/build-compiled
@@ -350,7 +405,7 @@ ifneq ($(DARWIN_FRAMEWORK),1)
 endif
 else ifneq (,$(findstring $(OS),Linux FreeBSD))
 	for j in $(JL_TARGETS) ; do \
-		$(PATCHELF) --set-rpath '$$ORIGIN/$(private_libdir_rel):$$ORIGIN/$(libdir_rel)' $(DESTDIR)$(bindir)/$$j; \
+		$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN/$(private_libdir_rel):$$ORIGIN/$(libdir_rel)' $(DESTDIR)$(bindir)/$$j; \
 	done
 endif
 
@@ -362,35 +417,45 @@ endif
 		RELEASE_TARGET=$(DESTDIR)$(prefix)/$(framework_dylib); \
 		DEBUG_TARGET=$(DESTDIR)$(prefix)/$(framework_dylib)_debug; \
 	fi; \
-	$(call stringreplace,$${RELEASE_TARGET},sys.$(SHLIB_EXT)$$,$(private_libdir_rel)/sys.$(SHLIB_EXT)); \
-	if [ "$(BUNDLE_DEBUG_LIBS)" = "1" ]; then \
+	if [ "$(JULIA_BUILD_MODE)" = "release" ]; then \
+		$(call stringreplace,$${RELEASE_TARGET},sys.$(SHLIB_EXT)$$,$(private_libdir_rel)/sys.$(SHLIB_EXT)); \
+	elif [ "$(JULIA_BUILD_MODE)" = "debug" ]; then \
 		$(call stringreplace,$${DEBUG_TARGET},sys-debug.$(SHLIB_EXT)$$,$(private_libdir_rel)/sys-debug.$(SHLIB_EXT)); \
 	fi;
 endif
 
-	# Set rpath for libjulia-internal, which is moving from `../lib` to `../lib/julia`.  We only need to do this for Linux/FreeBSD
-ifneq (,$(findstring $(OS),Linux FreeBSD))
-	$(PATCHELF) --set-rpath '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-internal.$(SHLIB_EXT)
-ifeq ($(BUNDLE_DEBUG_LIBS),1)
-	$(PATCHELF) --set-rpath '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-internal-debug.$(SHLIB_EXT)
+	# Set rpath for libjulia-internal, which is moving from `../lib` to `../lib/julia`.
+ifeq ($(OS), Darwin)
+ifneq ($(DARWIN_FRAMEWORK),1)
+ifeq ($(JULIA_BUILD_MODE),release)
+	install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel)/ $(DESTDIR)$(private_libdir)/libjulia-internal.$(SHLIB_EXT)
+	install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel)/ $(DESTDIR)$(private_libdir)/libjulia-codegen.$(SHLIB_EXT)
+else ifeq ($(JULIA_BUILD_MODE),debug)
+	install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel)/ $(DESTDIR)$(private_libdir)/libjulia-internal-debug.$(SHLIB_EXT)
+	install_name_tool -add_rpath @loader_path/$(reverse_private_libdir_rel)/ $(DESTDIR)$(private_libdir)/libjulia-codegen-debug.$(SHLIB_EXT)
+endif
+endif
+else ifneq (,$(findstring $(OS),Linux FreeBSD))
+ifeq ($(JULIA_BUILD_MODE),release)
+	$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-internal.$(SHLIB_EXT)
+	$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-codegen.$(SHLIB_EXT)
+else ifeq ($(JULIA_BUILD_MODE),debug)
+	$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-internal-debug.$(SHLIB_EXT)
+	$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-codegen-debug.$(SHLIB_EXT)
 endif
 endif
 
+	# Fix rpaths for dependencies. This should be fixed in BinaryBuilder later.
+ifeq ($(OS), Linux)
+	-$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN' $(DESTDIR)$(private_shlibdir)/libLLVM.$(SHLIB_EXT)
+endif
 
 ifneq ($(LOADER_BUILD_DEP_LIBS),$(LOADER_INSTALL_DEP_LIBS))
 	# Next, overwrite relative path to libjulia-internal in our loader if $$(LOADER_BUILD_DEP_LIBS) != $$(LOADER_INSTALL_DEP_LIBS)
+ifeq ($(JULIA_BUILD_MODE),release)
 	$(call stringreplace,$(DESTDIR)$(shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT),$(LOADER_BUILD_DEP_LIBS)$$,$(LOADER_INSTALL_DEP_LIBS))
-ifeq ($(OS),Darwin)
-	# Codesign the libjulia we just modified
-	$(JULIAHOME)/contrib/codesign.sh "$(MACOS_CODESIGN_IDENTITY)" "$(DESTDIR)$(shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT)"
-endif
-
-ifeq ($(BUNDLE_DEBUG_LIBS),1)
+else ifeq ($(JULIA_BUILD_MODE),debug)
 	$(call stringreplace,$(DESTDIR)$(shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT),$(LOADER_DEBUG_BUILD_DEP_LIBS)$$,$(LOADER_DEBUG_INSTALL_DEP_LIBS))
-ifeq ($(OS),Darwin)
-	# Codesign the libjulia we just modified
-	$(JULIAHOME)/contrib/codesign.sh "$(MACOS_CODESIGN_IDENTITY)" "$(DESTDIR)$(shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)"
-endif
 endif
 endif
 
@@ -404,7 +469,7 @@ ifeq ($(OS),FreeBSD)
 	# don't set libgfortran's RPATH, it won't be able to find its friends on systems
 	# that don't have the exact GCC port installed used for the build.
 	for lib in $(DESTDIR)$(private_libdir)/libgfortran*$(SHLIB_EXT)*; do \
-		$(PATCHELF) --set-rpath '$$ORIGIN' $$lib; \
+		$(PATCHELF) $(PATCHELF_SET_RPATH_ARG) '$$ORIGIN' $$lib; \
 	done
 endif
 
@@ -427,6 +492,12 @@ ifneq ($(OPENBLAS_DYNAMIC_ARCH),1)
 endif
 endif
 endif
+
+ifeq ($(USE_BINARYBUILDER_OPENBLAS),0)
+	# https://github.com/JuliaLang/julia/issues/46579
+	USE_BINARYBUILDER_OBJCONV=0
+endif
+
 ifneq ($(prefix),$(abspath julia-$(JULIA_COMMIT)))
 	$(error prefix must not be set for make binary-dist)
 endif
@@ -441,17 +512,14 @@ ifeq ($(OS), Linux)
 endif
 ifeq ($(OS), WINNT)
 	cd $(BUILDROOT)/julia-$(JULIA_COMMIT)/bin && rm -f llvm* llc.exe lli.exe opt.exe LTO.dll bugpoint.exe macho-dump.exe
-endif
-ifeq ($(OS),Darwin)
-	# If we're on macOS, and we have a codesigning identity, then codesign the binary-dist tarball!
-	$(JULIAHOME)/contrib/codesign.sh "$(MACOS_CODESIGN_IDENTITY)" "$(BUILDROOT)/julia-$(JULIA_COMMIT)"
 endif
 	cd $(BUILDROOT) && $(TAR) zcvf $(JULIA_BINARYDIST_FILENAME).tar.gz julia-$(JULIA_COMMIT)
 
 
 exe:
-	# run Inno Setup to compile installer
-	$(call spawn,$(JULIAHOME)/dist-extras/inno/iscc.exe /DAppVersion=$(JULIA_VERSION) /DSourceDir="$(call cygpath_w,$(BUILDROOT)/julia-$(JULIA_COMMIT))" /DRepoDir="$(call cygpath_w,$(JULIAHOME))" /F"$(JULIA_BINARYDIST_FILENAME)" /O"$(call cygpath_w,$(BUILDROOT))" $(INNO_ARGS) $(call cygpath_w,$(JULIAHOME)/contrib/windows/build-installer.iss))
+	# run Inno Setup to compile installer.
+	# Note that we disable MSYS2 path munging, as it interferes with the `/` options:
+	MSYS2_ARG_CONV_EXCL='*' $(call spawn,$(JULIAHOME)/dist-extras/inno/iscc.exe /DAppVersion=$(JULIA_VERSION) /DSourceDir="$(call cygpath_w,$(BUILDROOT)/julia-$(JULIA_COMMIT))" /DRepoDir="$(call cygpath_w,$(JULIAHOME))" /F"$(JULIA_BINARYDIST_FILENAME)" /O"$(call cygpath_w,$(BUILDROOT))" $(INNO_ARGS) $(call cygpath_w,$(JULIAHOME)/contrib/windows/build-installer.iss))
 	chmod a+x "$(BUILDROOT)/$(JULIA_BINARYDIST_FILENAME).exe"
 
 app:
@@ -461,12 +529,12 @@ app:
 darwinframework:
 	$(MAKE) -C $(JULIAHOME)/contrib/mac/framework
 
-light-source-dist.tmp: $(BUILDROOT)/doc/_build/html/en/index.html
+light-source-dist.tmp: docs
 ifneq ($(BUILDROOT),$(JULIAHOME))
 	$(error make light-source-dist does not work in out-of-tree builds)
 endif
 	# Save git information
-	-@$(MAKE) -C $(JULIAHOME)/base version_git.jl
+	-@$(MAKE) -C $(JULIAHOME)/base version_git.jl.phony
 
 	# Create file light-source-dist.tmp to hold all the filenames that go into the tarball
 	echo "base/version_git.jl" > light-source-dist.tmp
@@ -538,12 +606,13 @@ distcleanall: cleanall
 	@-$(MAKE) -C $(BUILDROOT)/deps distcleanall
 	@-$(MAKE) -C $(BUILDROOT)/doc cleanall
 
-.PHONY: default debug release check-whitespace release-candidate \
+.FORCE:
+.PHONY: .FORCE default debug release check-whitespace release-candidate \
 	julia-debug julia-release julia-stdlib julia-deps julia-deps-libs \
 	julia-cli-release julia-cli-debug julia-src-release julia-src-debug \
 	julia-symlink julia-base julia-sysimg julia-sysimg-ji julia-sysimg-release julia-sysimg-debug \
-	test testall testall1 test test-* test-revise-* \
-	clean distcleanall cleanall clean-* \
+	test testall testall1 test \
+	clean distcleanall cleanall $(CLEAN_TARGETS) \
 	run-julia run-julia-debug run-julia-release run \
 	install binary-dist light-source-dist.tmp light-source-dist \
 	dist full-source-dist source-dist
@@ -560,12 +629,12 @@ testall: check-whitespace $(JULIA_BUILD_MODE)
 testall1: check-whitespace $(JULIA_BUILD_MODE)
 	@env JULIA_CPU_THREADS=1 $(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test all JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)
 
-test-%: check-whitespace $(JULIA_BUILD_MODE)
+test-%: check-whitespace $(JULIA_BUILD_MODE) .FORCE
 	@([ $$(( $$(date +%s) - $$(date -r $(build_private_libdir)/sys.$(SHLIB_EXT) +%s) )) -le 100 ] && \
 		printf '\033[93m    HINT The system image was recently rebuilt. Are you aware of the test-revise-* targets? See CONTRIBUTING.md. \033[0m\n') || true
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test $* JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)
 
-test-revise-%:
+test-revise-%: .FORCE
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test revise-$* JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)
 
 # download target for some hardcoded windows dependencies
@@ -576,7 +645,7 @@ win-extras:
 	cd $(JULIAHOME)/dist-extras && \
 	$(JLDOWNLOAD) https://www.jrsoftware.org/download.php/is.exe && \
 	chmod a+x is.exe && \
-	$(call spawn, $(JULIAHOME)/dist-extras/is.exe /DIR="$(call cygpath_w,$(JULIAHOME)/dist-extras/inno)" /PORTABLE=1 /CURRENTUSER /VERYSILENT)
+	MSYS2_ARG_CONV_EXCL='*' $(call spawn, $(JULIAHOME)/dist-extras/is.exe /DIR="$(call cygpath_w,$(JULIAHOME)/dist-extras/inno)" /PORTABLE=1 /CURRENTUSER /VERYSILENT)
 
 # various statistics about the build that may interest the user
 ifeq ($(USE_SYSTEM_LLVM), 1)
diff --git a/NEWS.md b/NEWS.md
index 434e38078b01c..0ec643882a4af 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,179 +1,172 @@
-Julia v1.9 Release Notes
+Julia v1.11 Release Notes
 ========================
 
 New language features
 ---------------------
-
-* It is now possible to assign to bindings in another module using `setproperty!(::Module, ::Symbol, x)`. ([#44137])
-* Slurping in assignments is now also allowed in non-final position. This is
-  handled via `Base.split_rest`. ([#42902])
-* Character literals now support the same syntax allowed in string literals; i.e. the syntax can
-  represent invalid UTF-8 sequences as allowed by the `Char` type ([#44989]).
+* `public` is a new keyword. Symbols marked with `public` are considered public
+  API. Symbols marked with `export` are now also treated as public API. The
+  difference between `public` and `export` is that `public` names do not become
+  available when `using` a package/module ([#50105]).
+* `ScopedValue` implement dynamic scope with inheritance across tasks ([#50958]).
+* The new macro `Base.Cartesian.@ncallkw` is analogous to `Base.Cartesian.@ncall`,
+  but allows to add keyword arguments to the function call ([#51501]).
+* Support for Unicode 15.1 ([#51799]).
+* A new `AbstractString` type, `AnnotatedString`, is introduced that allows for
+  regional annotations to be attached to an underlying string. This type is
+  particularly useful for holding styling information, and is used extensively
+  in the new `StyledStrings` standard library. There is also a new `AnnotatedChar`
+  type, that is the equivalent new `AbstractChar` type.
+* `Manifest.toml` files can now be renamed in the format `Manifest-v{major}.{minor}.toml`
+  to be preferentially picked up by the given julia version. i.e. in the same folder,
+  a `Manifest-v1.11.toml` would be used by v1.11 and `Manifest.toml` by every other julia
+  version. This makes managing environments for multiple julia versions at the same time
+  easier ([#43845]).
 
 Language changes
 ----------------
-
-* New builtins `getglobal(::Module, ::Symbol[, order])` and `setglobal!(::Module, ::Symbol, x[, order])`
-  for reading from and writing to globals. `getglobal` should now be preferred for accessing globals over
-  `getfield`. ([#44137])
-* A few basic operators have been generalized to more naturally support vector space structures:
-  unary minus falls back to scalar multiplication with -1, `-(x) = Int8(-1)*x`,
-  binary minus falls back to addition `-(x, y) = x + (-y)`, and, at the most generic level,
-  left- and right-division fall back to multiplication with the inverse from left and right,
-  respectively, as stated in the docstring. ([#44564])
-* The `@invoke` macro introduced in 1.7 is now exported. Additionally, it now uses `Core.Typeof(x)`
-  rather than `Any` when a type annotation is omitted for an argument `x` so that types passed
-  as arguments are handled correctly. ([#45807])
+* During precompilation, the `atexit` hooks now run before saving the output file. This
+  allows users to safely tear down background state (such as closing Timers and sending
+  disconnect notifications to heartbeat tasks) and cleanup other resources when the program
+  wants to begin exiting.
+* Code coverage and malloc tracking is no longer generated during the package precompilation stage.
+  Further, during these modes pkgimage caches are now used for packages that are not being tracked.
+  Meaning that coverage testing (the default for `julia-actions/julia-runtest`) will by default use
+  pkgimage caches for all other packages than the package being tested, likely meaning faster test
+  execution. ([#52123])
 
 Compiler/Runtime improvements
 -----------------------------
-
-* The known quadratic behavior of type inference is now fixed and inference uses less memory in general.
-  Certain edge cases with auto-generated long functions (e.g. ModelingToolkit.jl with partial
-  differential equations and large causal models) should see significant compile-time improvements.
-  ([#45276], [#45404])
-* Non-concrete call sites can now be union-split to be inlined or statically-resolved even
-  if there are multiple dispatch candidates. This may improve runtime performance in certain
-  situations where object types are not fully known statically but mostly available at runtime
-  (as like Julia-level type inference implementation itself) by statically resolving
-  `@nospecialize`-d call sites and avoiding excessive compilation. ([#44512])
-* All the previous usages of `@pure`-macro in `Base` has been replaced with the preferred
-  `Base.@assume_effects`-based annotations. ([#44776])
+* Updated GC heuristics to count allocated pages instead of individual objects ([#50144]).
+* A new `LazyLibrary` type is exported from `Libdl` for use in building chained lazy library
+  loads, primarily to be used within JLLs ([#50074]).
+* Added a support for annotating `Base.@assume_effects` on code block ([#52400]).
 
 Command-line option changes
 ---------------------------
 
-* In Linux and Windows, `--threads=auto` now tries to infer usable number of CPUs from the
-  process affinity which is set typically in HPC and cloud environments ([#42340]).
-* `--math-mode=fast` is now a no-op ([#41638]). Users are encouraged to use the @fastmath macro instead, which has more well-defined semantics.
-* The `--threads` command-line option now accepts `auto|N[,auto|M]` where `M` specifies the
-  number of interactive threads to create (`auto` currently means 1) ([#42302]).
-* New option `--heap-size-hint=<size>` gives a memory hint for triggering greedy garbage
-  collection. The size might be specified in bytes, kilobytes(1000k), megabytes(300M),
-  gigabytes(1.5G)
+* The entry point for Julia has been standardized to `Main.main(ARGS)`. This must be explicitly opted into using the `@main` macro
+(see the docstring for further details). When opted-in, and julia is invoked to run a script or expression
+(i.e. using `julia script.jl` or `julia -e expr`), julia will subsequently run the `Main.main` function automatically.
+This is intended to unify script and compilation workflows, where code loading may happen
+in the compiler and execution of `Main.main` may happen in the resulting executable. For interactive use, there is no semantic
+difference between defining a `main` function and executing the code directly at the end of the script ([50974]).
+* The `--compiled-modules` and `--pkgimages` flags can now be set to `existing`, which will
+  cause Julia to consider loading existing cache files, but not to create new ones ([#50586]
+  and [#52573]).
 
 Multi-threading changes
 -----------------------
 
-* `Threads.@spawn` now accepts an optional first argument: `:default` or `:interactive`.
-  An interactive task desires low latency and implicitly agrees to be short duration or to
-  yield frequently. Interactive tasks will run on interactive threads, if any are specified
-  when Julia is started ([#42302]).
-
 Build system changes
 --------------------
 
-
 New library functions
 ---------------------
 
-* `Iterators.flatmap` was added ([#44792]).
-* New helper `Splat(f)` which acts like `x -> f(x...)`, with pretty printing for
-  inspecting which function `f` was originally wrapped. ([#42717])
-* New `pkgversion(m::Module)` function to get the version of the package that loaded
-  a given module, similar to `pkgdir(m::Module)`. ([#45607])
-
-Library changes
----------------
-
-* A known concurrency issue of `iterate` methods on `Dict` and other derived objects such
-  as `keys(::Dict)`, `values(::Dict)`, and `Set` is fixed.  These methods of `iterate` can
-  now be called on a dictionary or set shared by arbitrary tasks provided that there are no
-  tasks mutating the dictionary or set ([#44534]).
-* Predicate function negation `!f` now returns a composed function `(!) ∘ f` instead of an anonymous function ([#44752]).
-* `RoundFromZero` now works for non-`BigFloat` types ([#41246]).
-* `Dict` can be now shrunk manually by `sizehint!` ([#45004]).
-* `@time` now separates out % time spent recompiling invalidated methods ([#45015]).
-* `@time_imports` now shows any compilation and recompilation time percentages per import ([#45064]).
-* `eachslice` now works over multiple dimensions; `eachslice`, `eachrow` and `eachcol` return
-  a `Slices` object, which allows dispatching to provide more efficient methods ([#32310]).
+* `in!(x, s::AbstractSet)` will return whether `x` is in `s`, and insert `x` in `s` if not.
+* The new `Libc.mkfifo` function wraps the `mkfifo` C function on Unix platforms ([#34587]).
+* `hardlink(src, dst)` can be used to create hard links ([#41639]).
+* `diskstat(path=pwd())` can be used to return statistics about the disk ([#42248]).
+* `copyuntil(out, io, delim)` and `copyline(out, io)` copy data into an `out::IO` stream ([#48273]).
+* `eachrsplit(string, pattern)` iterates split substrings right to left.
+* `Sys.username()` can be used to return the current user's username ([#51897]).
+* `wrap(Array, m::Union{MemoryRef{T}, Memory{T}}, dims)` which is the safe counterpart to `unsafe_wrap` ([#52049]).
+
+New library features
+--------------------
+
+* `invmod(n, T)` where `T` is a native integer type now computes the modular inverse of `n` in the modular integer ring that `T` defines ([#52180]).
+* `invmod(n)` is an abbreviation for `invmod(n, typeof(n))` for native integer types ([#52180]).
+* `replace(string, pattern...)` now supports an optional `IO` argument to
+  write the output to a stream rather than returning a string ([#48625]).
+* `sizehint!(s, n)` now supports an optional `shrink` argument to disable shrinking ([#51929]).
+* New function `Docs.hasdoc(module, symbol)` tells whether a name has a docstring ([#52139]).
+* Passing an IOBuffer as a stdout argument for Process spawn now works as
+  expected, synchronized with `wait` or `success`, so a `Base.BufferStream` is
+  no longer required there for correctness to avoid data-races ([#TBD]).
+* After a process exits, `closewrite` will no longer be automatically called on
+  the stream passed to it. Call `wait` on the process instead to ensure the
+  content is fully written, then call `closewrite` manually to avoid
+  data-races. Or use the callback form of `open` to have all that handled
+  automatically.
 
 Standard library changes
 ------------------------
 
-#### Package Manager
+#### StyledStrings
 
-#### LinearAlgebra
+* A new standard library for handling styling in a more comprehensive and structured way ([#49586]).
+* The new `Faces` struct serves as a container for text styling information
+  (think typeface, as well as color and decoration), and comes with a framework
+  to provide a convenient, extensible (via `addface!`), and customisable (with a
+  user's `Faces.toml` and `loadfaces!`) approach to
+  styled content ([#49586]).
+* The new `@styled_str` string macro provides a convenient way of creating a
+  `AnnotatedString` with various faces or other attributes applied ([#49586]).
 
-* The methods `a / b` and `b \ a` with `a` a scalar and `b` a vector,
-  which were equivalent to `a * pinv(b)`, have been removed due to the
-  risk of confusion with elementwise division ([#44358]).
-* We are now wholly reliant on libblastrampoline (LBT) for calling
-  BLAS and LAPACK. OpenBLAS is shipped by default, but building the
-  system image with other BLAS/LAPACK libraries is not
-  supported. Instead, it is recommended that the LBT mechanism be used
-  for swapping BLAS/LAPACK with vendor provided ones. ([#44360])
-* `lu` now supports a new pivoting strategy `RowNonZero()` that chooses
-   the first non-zero pivot element, for use with new arithmetic types and for pedagogy ([#44571]).
-* `normalize(x, p=2)` now supports any normed vector space `x`, including scalars ([#44925]).
+#### Package Manager
 
-#### Markdown
+#### LinearAlgebra
+* `cbrt(::AbstractMatrix{<:Real})` is now defined and returns real-valued matrix cube roots of real-valued matrices ([#50661]).
+* `eigvals/eigen(A, bunchkaufman(B))` and `eigvals/eigen(A, lu(B))`, which utilize the Bunchkaufman (LDL) and LU decomposition of `B`,
+   respectively, now efficiently compute the generalized eigenvalues (`eigen`: and eigenvectors) of `A` and `B`. Note: The second
+   argument is the output of `bunchkaufman` or `lu` ([#50471]).
 
 #### Printf
 
-#### Random
+#### Profile
 
-* `randn` and `randexp` now work for any `AbstractFloat` type defining `rand` ([#44714]).
+#### Random
+* `rand` now supports sampling over `Tuple` types ([#35856], [#50251]).
+* `rand` now supports sampling over `Pair` types ([#28705]).
+* When seeding RNGs provided by `Random`, negative integer seeds can now be used ([#51416]).
+* Seedable random number generators from `Random` can now be seeded by a string, e.g.
+  `seed!(rng, "a random seed")` ([#51527]).
 
 #### REPL
 
-* `Meta-e` now opens the current input in an editor. The content (if modified) will be
-  executed upon existing the editor.
+* Tab complete hints now show in lighter text while typing in the repl. To disable
+  set `Base.active_repl.options.hint_tab_completes = false` ([#51229]).
+* Meta-M with an empty prompt now returns the contextual module of the REPL to `Main`.
+
+#### SuiteSparse
 
-* The contextual module which is active at the REPL can be changed (it is `Main` by default),
-  via the `REPL.activate(::Module)` function or via typing the module in the REPL and pressing
-  the keybinding Alt-m ([#33872]).
 
 #### SparseArrays
 
 #### Test
-* New fail-fast mode for testsets that will terminate the test run early if a failure or error occurs.
-  Set either via the `@testset` kwarg `failfast=true` or by setting env var `JULIA_TEST_FAILFAST`
-  to `"true"` i.e. in CI runs to request the job failure be posted eagerly when issues occur ([#45317])
 
 #### Dates
 
-#### Downloads
-
 #### Statistics
 
-#### Sockets
-
-#### Tar
+* Statistics is now an upgradeable standard library ([#46501]).
 
 #### Distributed
 
-* The package environment (active project, `LOAD_PATH`, `DEPOT_PATH`) are now propagated
-  when adding *local* workers (e.g. with `addprocs(N::Int)` or through the `--procs=N`
-  command line flag) ([#43270]).
-* `addprocs` for local workers now accept the `env` keyword argument for passing
-  environment variables to the workers processes. This was already supported for
-  remote workers ([#43270]).
-
-#### UUIDs
+* `pmap` now defaults to using a `CachingPool` ([#33892]).
 
 #### Unicode
 
-* `graphemes(s, m:n)` returns a substring of the `m`-th to `n`-th graphemes in `s` ([#44266]).
-
-#### Mmap
 
 #### DelimitedFiles
 
 
+#### InteractiveUtils
+
 Deprecated or removed
 ---------------------
 
-* Unexported `splat` is deprecated in favor of exported `Splat`, which has pretty printing of the wrapped function. ([#42717])
 
 External dependencies
 ---------------------
-
+* `tput` is no longer called to check terminal capabilities, it has been replaced with a pure-Julia terminfo parser ([#50797]).
 
 Tooling Improvements
----------------------
+--------------------
 
-* Printing of `MethodError` and methods (such as from `methods(my_func)`) are now prettified and color consistent with printing of methods
-  in stacktraces. ([#45069])
+* CI now performs limited automatic typo detection on all PRs. If you merge a PR with a
+  failing typo CI check, then the reported typos will be automatically ignored in future CI
+  runs on PRs that edit those same files ([#51704]).
 
 <!--- generated by NEWS-update.jl: -->
diff --git a/README.md b/README.md
index 007704a3e67b6..6fa264c57ac21 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,8 @@
     <tr>
         <td>Code coverage</td>
         <td>
-            <a href="https://coveralls.io/r/JuliaLang/julia?branch=master"><img src='https://img.shields.io/coveralls/github/JuliaLang/julia/master.svg?label=coveralls'/></a> <a href="https://codecov.io/github/JuliaLang/julia?branch=master"><img src='https://img.shields.io/codecov/c/github/JuliaLang/julia/master.svg?label=codecov'/></a>
+            <a href='https://coveralls.io/github/JuliaLang/julia?branch=master'><img src='https://coveralls.io/repos/github/JuliaLang/julia/badge.svg?branch=master' alt='Coverage Status'/></a>
+            <a href="https://codecov.io/gh/JuliaLang/julia"><img src="https://codecov.io/gh/JuliaLang/julia/branch/master/graph/badge.svg?token=TckCRxc7HS"/></a>
         </td>
     </tr>
 </table>
@@ -47,6 +48,7 @@ and installing Julia, below.
 - **Documentation:** <https://docs.julialang.org>
 - **Packages:** <https://julialang.org/packages/>
 - **Discussion forum:** <https://discourse.julialang.org>
+- **Zulip:** <https://julialang.zulipchat.com/>
 - **Slack:** <https://julialang.slack.com> (get an invite from <https://julialang.org/slack/>)
 - **YouTube:** <https://www.youtube.com/user/JuliaLanguage>
 - **Code coverage:** <https://coveralls.io/r/JuliaLang/julia>
@@ -67,7 +69,7 @@ If you would rather not compile the latest Julia from source,
 platform-specific tarballs with pre-compiled binaries are also
 [available for download](https://julialang.org/downloads/). The
 downloads page also provides details on the
-[different tiers of support](https://julialang.org/downloads/#support-tiers)
+[different tiers of support](https://julialang.org/downloads/#supported_platforms)
 for OS and platform combinations.
 
 If everything works correctly, you will see a Julia banner and an
@@ -88,26 +90,25 @@ Then, acquire the source code by cloning the git repository:
 
     git clone https://github.com/JuliaLang/julia.git
 
-By default you will be building the latest unstable version of
+and then use the command prompt to change into the resulting julia directory. By default you will be building the latest unstable version of
 Julia. However, most users should use the [most recent stable version](https://github.com/JuliaLang/julia/releases)
-of Julia. You can get this version by changing to the Julia directory
-and running:
+of Julia. You can get this version by running:
 
-    git checkout v1.7.3
+    git checkout v1.9.4
 
-Now run `make` to build the `julia` executable.
+To build the `julia` executable, run `make` from within the julia directory.
 
 Building Julia requires 2GiB of disk space and approximately 4GiB of virtual memory.
 
 **Note:** The build process will fail badly if any of the build directory's parent directories have spaces or other shell meta-characters such as `$` or `:` in their names (this is due to a limitation in GNU make).
 
-Once it is built, you can run the `julia` executable after you enter your julia directory and run
+Once it is built, you can run the `julia` executable. From within the julia directory, run
 
     ./julia
 
 Your first test of Julia determines whether your build is working
-properly. From the UNIX/Windows command prompt inside the `julia`
-source directory, type `make testall`. You should see output that
+properly. From the julia
+directory, type `make testall`. You should see output that
 lists a series of running tests; if they complete without error, you
 should be in good shape to start using Julia.
 
@@ -120,10 +121,9 @@ are included in the [build documentation](https://github.com/JuliaLang/julia/blo
 
 ### Uninstalling Julia
 
-Julia does not install anything outside the directory it was cloned
-into. Julia can be completely uninstalled by deleting this
-directory. Julia packages are installed in `~/.julia` by default, and
-can be uninstalled by deleting `~/.julia`.
+By default, Julia does not install anything outside the directory it was cloned
+into and `~/.julia`. Julia and the vast majority of Julia packages can be
+completely uninstalled by deleting these two directories.
 
 ## Source Code Organization
 
diff --git a/THIRDPARTY.md b/THIRDPARTY.md
index 4a35bbdb1b7ce..51950d9e2c6a1 100644
--- a/THIRDPARTY.md
+++ b/THIRDPARTY.md
@@ -24,6 +24,10 @@ own licenses:
 - [LLVM](https://releases.llvm.org/12.0.1/LICENSE.TXT) [APACHE 2.0 with LLVM Exception]
 - [UTF8PROC](https://github.com/JuliaStrings/utf8proc) [MIT]
 
+and optionally:
+
+- [ITTAPI](https://github.com/intel/ittapi/blob/master/LICENSES/BSD-3-Clause.txt) [BSD-3]
+
 Julia's `stdlib` uses the following external libraries, which have their own licenses:
 
 - [DSFMT](https://github.com/MersenneTwister-Lab/dSFMT/blob/master/LICENSE.txt) [BSD-3]
diff --git a/VERSION b/VERSION
index e889581dd8a30..0bc25cfcab2c1 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.9.0-DEV
+1.11.0-DEV
diff --git a/base/.gitignore b/base/.gitignore
index f7460230f217b..0fab5b41fda08 100644
--- a/base/.gitignore
+++ b/base/.gitignore
@@ -1,4 +1,11 @@
+/features_h.jl
+/pcre_h.jl
+/errno_h.jl
+/build_h.jl
 /build_h.jl.phony
+/file_constants.jl
+/uv_constants.jl
 /version_git.jl
 /version_git.jl.phony
 /userimg.jl
+/JuliaSyntax
diff --git a/base/Base.jl b/base/Base.jl
index 6fb7a7b897317..0ca13265adc4f 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -6,10 +6,19 @@ using Core.Intrinsics, Core.IR
 
 # to start, we're going to use a very simple definition of `include`
 # that doesn't require any function (except what we can get from the `Core` top-module)
-const _included_files = Array{Tuple{Module,String},1}()
+# start this big so that we don't have to resize before we have defined how to grow an array
+const _included_files = Array{Tuple{Module,String},1}(Core.undef, 400)
+setfield!(_included_files, :size, (1,))
 function include(mod::Module, path::String)
-    ccall(:jl_array_grow_end, Cvoid, (Any, UInt), _included_files, UInt(1))
-    Core.arrayset(true, _included_files, (mod, ccall(:jl_prepend_cwd, Any, (Any,), path)), arraylen(_included_files))
+    len = getfield(_included_files.size, 1)
+    memlen = _included_files.ref.mem.length
+    lenp1 = Core.add_int(len, 1)
+    if len === memlen # by the time this is true we hopefully will have defined _growend!
+        _growend!(_included_files, UInt(1))
+    else
+        setfield!(_included_files, :size, (lenp1,))
+    end
+    Core.memoryrefset!(Core.memoryref(_included_files.ref, lenp1), (mod, ccall(:jl_prepend_cwd, Any, (Any,), path)), :not_atomic, true)
     Core.println(path)
     ccall(:jl_uv_flush, Nothing, (Ptr{Nothing},), Core.io_pointer(Core.stdout))
     Core.include(mod, path)
@@ -25,24 +34,32 @@ ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Base, is_primary_base_module)
 macro inline()   Expr(:meta, :inline)   end
 macro noinline() Expr(:meta, :noinline) end
 
+macro _boundscheck() Expr(:boundscheck) end
+
 # Try to help prevent users from shooting them-selves in the foot
 # with ambiguities by defining a few common and critical operations
 # (and these don't need the extra convert code)
 getproperty(x::Module, f::Symbol) = (@inline; getglobal(x, f))
 getproperty(x::Type, f::Symbol) = (@inline; getfield(x, f))
 setproperty!(x::Type, f::Symbol, v) = error("setfield! fields of Types should not be changed")
+setproperty!(x::Array, f::Symbol, v) = error("setfield! fields of Array should not be changed")
 getproperty(x::Tuple, f::Int) = (@inline; getfield(x, f))
 setproperty!(x::Tuple, f::Int, v) = setfield!(x, f, v) # to get a decent error
 
 getproperty(x, f::Symbol) = (@inline; getfield(x, f))
-setproperty!(x, f::Symbol, v) = setfield!(x, f, convert(fieldtype(typeof(x), f), v))
+function setproperty!(x, f::Symbol, v)
+    ty = fieldtype(typeof(x), f)
+    val = v isa ty ? v : convert(ty, v)
+    return setfield!(x, f, val)
+end
 
 dotgetproperty(x, f) = getproperty(x, f)
 
 getproperty(x::Module, f::Symbol, order::Symbol) = (@inline; getglobal(x, f, order))
 function setproperty!(x::Module, f::Symbol, v, order::Symbol=:monotonic)
     @inline
-    val::Core.get_binding_type(x, f) = v
+    ty = Core.get_binding_type(x, f)
+    val = v isa ty ? v : convert(ty, v)
     return setglobal!(x, f, val, order)
 end
 getproperty(x::Type, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
@@ -51,14 +68,29 @@ getproperty(x::Tuple, f::Int, order::Symbol) = (@inline; getfield(x, f, order))
 setproperty!(x::Tuple, f::Int, v, order::Symbol) = setfield!(x, f, v, order) # to get a decent error
 
 getproperty(x, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
-setproperty!(x, f::Symbol, v, order::Symbol) = (@inline; setfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
+function setproperty!(x, f::Symbol, v, order::Symbol)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = v isa ty ? v : convert(ty, v)
+    return setfield!(x, f, val, order)
+end
 
-swapproperty!(x, f::Symbol, v, order::Symbol=:notatomic) =
-    (@inline; Core.swapfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
-modifyproperty!(x, f::Symbol, op, v, order::Symbol=:notatomic) =
-    (@inline; Core.modifyfield!(x, f, op, v, order))
-replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:notatomic, fail_order::Symbol=success_order) =
-    (@inline; Core.replacefield!(x, f, expected, convert(fieldtype(typeof(x), f), desired), success_order, fail_order))
+function swapproperty!(x, f::Symbol, v, order::Symbol=:not_atomic)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = v isa ty ? v : convert(ty, v)
+    return Core.swapfield!(x, f, val, order)
+end
+function modifyproperty!(x, f::Symbol, op, v, order::Symbol=:not_atomic)
+    @inline
+    return Core.modifyfield!(x, f, op, v, order)
+end
+function replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+    @inline
+    ty = fieldtype(typeof(x), f)
+    val = desired isa ty ? desired : convert(ty, desired)
+    return Core.replacefield!(x, f, expected, val, success_order, fail_order)
+end
 
 convert(::Type{Any}, Core.@nospecialize x) = x
 convert(::Type{T}, x::T) where {T} = x
@@ -87,7 +119,7 @@ if false
 end
 
 """
-    time_ns()
+    time_ns() -> UInt64
 
 Get the time in nanoseconds. The time corresponding to 0 is undefined, and wraps every 5.8 years.
 """
@@ -95,6 +127,12 @@ time_ns() = ccall(:jl_hrtime, UInt64, ())
 
 start_base_include = time_ns()
 
+# A warning to be interpolated in the docstring of every dangerous mutating function in Base, see PR #50824
+const _DOCS_ALIASING_WARNING = """
+!!! warning
+    Behavior can be unexpected when any mutated argument shares memory with any other argument.
+"""
+
 ## Load essential files and libraries
 include("essentials.jl")
 include("ctypes.jl")
@@ -103,13 +141,33 @@ include("generator.jl")
 include("reflection.jl")
 include("options.jl")
 
+# define invoke(f, T, args...; kwargs...), without kwargs wrapping
+# to forward to invoke
+function Core.kwcall(kwargs::NamedTuple, ::typeof(invoke), f, T, args...)
+    @inline
+    # prepend kwargs and f to the invoked from the user
+    T = rewrap_unionall(Tuple{Core.Typeof(kwargs), Core.Typeof(f), (unwrap_unionall(T)::DataType).parameters...}, T)
+    return invoke(Core.kwcall, T, kwargs, f, args...)
+end
+# invoke does not have its own call cache, but kwcall for invoke does
+setfield!(typeof(invoke).name.mt, :max_args, 3, :monotonic) # invoke, f, T, args...
+
+# define applicable(f, T, args...; kwargs...), without kwargs wrapping
+# to forward to applicable
+function Core.kwcall(kwargs::NamedTuple, ::typeof(applicable), @nospecialize(args...))
+    @inline
+    return applicable(Core.kwcall, kwargs, args...)
+end
+function Core._hasmethod(@nospecialize(f), @nospecialize(t)) # this function has a special tfunc (TODO: make this a Builtin instead like applicable)
+    tt = rewrap_unionall(Tuple{Core.Typeof(f), (unwrap_unionall(t)::DataType).parameters...}, t)
+    return Core._hasmethod(tt)
+end
+
+
 # core operations & types
 include("promotion.jl")
 include("tuple.jl")
 include("expr.jl")
-Pair{A, B}(@nospecialize(a), @nospecialize(b)) where {A, B} = (@inline; Pair{A, B}(convert(A, a)::A, convert(B, b)::B))
-#Pair{Any, B}(@nospecialize(a::Any), b) where {B} = (@inline; Pair{Any, B}(a, Base.convert(B, b)::B))
-#Pair{A, Any}(a, @nospecialize(b::Any)) where {A} = (@inline; Pair{A, Any}(Base.convert(A, a)::A, b))
 include("pair.jl")
 include("traits.jl")
 include("range.jl")
@@ -123,46 +181,77 @@ include("int.jl")
 include("operators.jl")
 include("pointer.jl")
 include("refvalue.jl")
+include("cmem.jl")
 include("refpointer.jl")
+
+# now replace the Pair constructor (relevant for NamedTuples) with one that calls our Base.convert
+delete_method(which(Pair{Any,Any}, (Any, Any)))
+@eval function (P::Type{Pair{A, B}})(@nospecialize(a), @nospecialize(b)) where {A, B}
+    @inline
+    return $(Expr(:new, :P, :(a isa A ? a : convert(A, a)), :(b isa B ? b : convert(B, b))))
+end
+
+# The REPL stdlib hooks into Base using this Ref
+const REPL_MODULE_REF = Ref{Module}()
+
 include("checked.jl")
 using .Checked
+function cld end
+function fld end
 
 # Lazy strings
 include("strings/lazy.jl")
 
 # array structures
 include("indices.jl")
+include("genericmemory.jl")
 include("array.jl")
 include("abstractarray.jl")
 include("subarray.jl")
 include("views.jl")
 include("baseext.jl")
 
+include("c.jl")
 include("ntuple.jl")
-
 include("abstractdict.jl")
 include("iddict.jl")
 include("idset.jl")
-
 include("iterators.jl")
 using .Iterators: zip, enumerate, only
 using .Iterators: Flatten, Filter, product  # for generators
-
+using .Iterators: Stateful    # compat (was formerly used in reinterpretarray.jl)
 include("namedtuple.jl")
 
 # For OS specific stuff
-include("../build_h.jl")
-include("../version_git.jl")
-
-# These used to be in build_h.jl and are retained for backwards compatibility
-const libblas_name = "libblastrampoline"
-const liblapack_name = "libblastrampoline"
+# We need to strcat things here, before strings are really defined
+function strcat(x::String, y::String)
+    out = ccall(:jl_alloc_string, Ref{String}, (Csize_t,), Core.sizeof(x) + Core.sizeof(y))
+    GC.@preserve x y out begin
+        out_ptr = unsafe_convert(Ptr{UInt8}, out)
+        unsafe_copyto!(out_ptr, unsafe_convert(Ptr{UInt8}, x), Core.sizeof(x))
+        unsafe_copyto!(out_ptr + Core.sizeof(x), unsafe_convert(Ptr{UInt8}, y), Core.sizeof(y))
+    end
+    return out
+end
+include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "build_h.jl"))     # include($BUILDROOT/base/build_h.jl)
+include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "version_git.jl")) # include($BUILDROOT/base/version_git.jl)
+# Initialize DL_LOAD_PATH as early as possible.  We are defining things here in
+# a slightly more verbose fashion than usual, because we're running so early.
+const DL_LOAD_PATH = String[]
+let os = ccall(:jl_get_UNAME, Any, ())
+    if os === :Darwin || os === :Apple
+        if Base.DARWIN_FRAMEWORK
+            push!(DL_LOAD_PATH, "@loader_path/Frameworks")
+        end
+        push!(DL_LOAD_PATH, "@loader_path")
+    end
+end
 
 # numeric operations
 include("hashing.jl")
 include("rounding.jl")
-using .Rounding
 include("div.jl")
+include("rawbigints.jl")
 include("float.jl")
 include("twiceprecision.jl")
 include("complex.jl")
@@ -204,24 +293,24 @@ include("set.jl")
 
 # Strings
 include("char.jl")
+function array_new_memory(mem::Memory{UInt8}, newlen::Int)
+    # add an optimization to array_new_memory for StringVector
+    if (@assume_effects :total @ccall jl_genericmemory_owner(mem::Any,)::Any) isa String
+        # If data is in a String, keep it that way.
+        # When implemented, this could use jl_gc_expand_string(oldstr, newlen) as an optimization
+        str = _string_n(newlen)
+        return (@assume_effects :total !:consistent @ccall jl_string_to_genericmemory(str::Any,)::Memory{UInt8})
+    else
+        # TODO: when implemented, this should use a memory growing call
+        return typeof(mem)(undef, newlen)
+    end
+end
 include("strings/basic.jl")
 include("strings/string.jl")
 include("strings/substring.jl")
-
-# Initialize DL_LOAD_PATH as early as possible.  We are defining things here in
-# a slightly more verbose fashion than usual, because we're running so early.
-const DL_LOAD_PATH = String[]
-let os = ccall(:jl_get_UNAME, Any, ())
-    if os === :Darwin || os === :Apple
-        if Base.DARWIN_FRAMEWORK
-            push!(DL_LOAD_PATH, "@loader_path/Frameworks")
-        end
-        push!(DL_LOAD_PATH, "@loader_path")
-    end
-end
+include("strings/cstring.jl")
 
 include("osutils.jl")
-include("c.jl")
 
 # Core I/O
 include("io.jl")
@@ -253,26 +342,39 @@ include("missing.jl")
 # version
 include("version.jl")
 
+# Concurrency (part 1)
+include("linked_list.jl")
+include("condition.jl")
+include("threads.jl")
+include("lock.jl")
+
 # system & environment
 include("sysinfo.jl")
 include("libc.jl")
-using .Libc: getpid, gethostname, time
+using .Libc: getpid, gethostname, time, memcpy, memset, memmove, memcmp
 
-# Logging
-include("logging.jl")
-using .CoreLogging
+# These used to be in build_h.jl and are retained for backwards compatibility.
+# NOTE: keep in sync with `libblastrampoline_jll.libblastrampoline`.
+const libblas_name = "libblastrampoline" * (Sys.iswindows() ? "-5" : "")
+const liblapack_name = libblas_name
 
-# Concurrency
-include("linked_list.jl")
-include("condition.jl")
-include("threads.jl")
-include("lock.jl")
+# Concurrency (part 2)
+# Note that `atomics.jl` here should be deprecated
+Core.eval(Threads, :(include("atomics.jl")))
 include("channels.jl")
 include("partr.jl")
 include("task.jl")
 include("threads_overloads.jl")
 include("weakkeydict.jl")
 
+# ScopedValues
+include("scopedvalues.jl")
+using .ScopedValues
+
+# Logging
+include("logging.jl")
+using .CoreLogging
+
 include("env.jl")
 
 # functions defined in Random
@@ -288,7 +390,7 @@ include("filesystem.jl")
 using .Filesystem
 include("cmd.jl")
 include("process.jl")
-include("ttyhascolor.jl")
+include("terminfo.jl")
 include("secretbuffer.jl")
 
 # core math functions
@@ -297,6 +399,7 @@ include("math.jl")
 using .Math
 const (√)=sqrt
 const (∛)=cbrt
+const (∜)=fourthroot
 
 # now switch to a simple, race-y TLS, relative include for the rest of Base
 delete_method(which(include, (Module, String)))
@@ -378,6 +481,7 @@ include("summarysize.jl")
 include("errorshow.jl")
 
 include("initdefs.jl")
+Filesystem.__postinit__()
 
 # worker threads
 include("threadcall.jl")
@@ -386,22 +490,21 @@ include("threadcall.jl")
 include("uuid.jl")
 include("pkgid.jl")
 include("toml_parser.jl")
+include("linking.jl")
 include("loading.jl")
 
 # misc useful functions & macros
 include("timing.jl")
 include("util.jl")
-
+include("client.jl")
 include("asyncmap.jl")
 
 # deprecated functions
 include("deprecated.jl")
-
-# Some basic documentation
+#
+# Some additional basic documentation
 include("docs/basedocs.jl")
 
-include("client.jl")
-
 # Documentation -- should always be included last in sysimg.
 include("docs/Docs.jl")
 using .Docs
@@ -413,11 +516,20 @@ end
 for m in methods(include)
     delete_method(m)
 end
+
+# This method is here only to be overwritten during the test suite to test
+# various sysimg related invalidation scenarios.
+a_method_to_overwrite_in_test() = inferencebarrier(1)
+
 # These functions are duplicated in client.jl/include(::String) for
 # nicer stacktraces. Modifications here have to be backported there
 include(mod::Module, _path::AbstractString) = _include(identity, mod, _path)
 include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexpr, mod, _path)
 
+# External libraries vendored into Base
+Core.println("JuliaSyntax/src/JuliaSyntax.jl")
+include(@__MODULE__, string((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "JuliaSyntax/src/JuliaSyntax.jl")) # include($BUILDROOT/base/JuliaSyntax/JuliaSyntax.jl)
+
 end_base_include = time_ns()
 
 const _sysimage_modules = PkgId[]
@@ -429,7 +541,7 @@ in_sysimage(pkgid::PkgId) = pkgid in _sysimage_modules
 for match = _methods(+, (Int, Int), -1, get_world_counter())
     m = match.method
     delete!(push!(Set{Method}(), m), m)
-    copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(match)))
+    copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(match), typemax(UInt)))
 
     empty!(Set())
     push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
@@ -479,8 +591,35 @@ for match = _methods(+, (Int, Int), -1, get_world_counter())
 end
 
 if is_primary_base_module
+
+# Profiling helper
+# triggers printing the report and (optionally) saving a heap snapshot after a SIGINFO/SIGUSR1 profile request
+# Needs to be in Base because Profile is no longer loaded on boot
+function profile_printing_listener(cond::Base.AsyncCondition)
+    profile = nothing
+    try
+        while _trywait(cond)
+            # this call to require is mostly legal, only because Profile has no dependencies and is usually in LOAD_PATH
+            profile = @something(profile, require(PkgId(UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile")))::Module
+            invokelatest(profile.peek_report[])
+            if Base.get_bool_env("JULIA_PROFILE_PEEK_HEAP_SNAPSHOT", false) === true
+                println(stderr, "Saving heap snapshot...")
+                fname = invokelatest(profile.take_heap_snapshot)
+                println(stderr, "Heap snapshot saved to `$(fname)`")
+            end
+        end
+    catch ex
+        if !isa(ex, InterruptException)
+            @error "Profile printing listener crashed" exception=ex,catch_backtrace()
+        end
+    end
+    nothing
+end
+
 function __init__()
     # Base library init
+    global _atexit_hooks_finished = false
+    Filesystem.__postinit__()
     reinit_stdio()
     Multimedia.reinit_displays() # since Multimedia.displays uses stdout as fallback
     # initialize loading
@@ -491,6 +630,32 @@ function __init__()
     if haskey(ENV, "JULIA_MAX_NUM_PRECOMPILE_FILES")
         MAX_NUM_PRECOMPILE_FILES[] = parse(Int, ENV["JULIA_MAX_NUM_PRECOMPILE_FILES"])
     end
+    # Profiling helper
+    @static if !Sys.iswindows()
+        # triggering a profile via signals is not implemented on windows
+        cond = Base.AsyncCondition()
+        Base.uv_unref(cond.handle)
+        t = errormonitor(Threads.@spawn(profile_printing_listener(cond)))
+        atexit() do
+            # destroy this callback when exiting
+            ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
+            # this will prompt any ongoing or pending event to flush also
+            close(cond)
+            # error-propagation is not needed, since the errormonitor will handle printing that better
+            _wait(t)
+        end
+        finalizer(cond) do c
+            # if something goes south, still make sure we aren't keeping a reference in C to this
+            ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
+        end
+        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), cond.handle)
+    end
+    _require_world_age[] = get_world_counter()
+    # Prevent spawned Julia process from getting stuck waiting on Tracy to connect.
+    delete!(ENV, "JULIA_WAIT_FOR_TRACY")
+    if get_bool_env("JULIA_USE_FLISP_PARSER", false) === false
+        JuliaSyntax.enable_in_core!()
+    end
     nothing
 end
 
@@ -499,5 +664,8 @@ end
 
 end
 
+# Ensure this file is also tracked
+@assert !isassigned(_included_files, 1)
+_included_files[1] = (parentmodule(Base), abspath(@__FILE__))
 
 end # baremodule Base
diff --git a/base/Enums.jl b/base/Enums.jl
index 413c880fcd3f2..45a1b66753484 100644
--- a/base/Enums.jl
+++ b/base/Enums.jl
@@ -17,10 +17,18 @@ abstract type Enum{T<:Integer} end
 basetype(::Type{<:Enum{T}}) where {T<:Integer} = T
 
 (::Type{T})(x::Enum{T2}) where {T<:Integer,T2<:Integer} = T(bitcast(T2, x))::T
-Base.cconvert(::Type{T}, x::Enum{T2}) where {T<:Integer,T2<:Integer} = T(x)
+Base.cconvert(::Type{T}, x::Enum{T2}) where {T<:Integer,T2<:Integer} = T(x)::T
 Base.write(io::IO, x::Enum{T}) where {T<:Integer} = write(io, T(x))
 Base.read(io::IO, ::Type{T}) where {T<:Enum} = T(read(io, basetype(T)))
 
+"""
+    _enum_hash(x::Enum, h::UInt)
+
+Compute hash for an enum value `x`. This internal method will be specialized
+for every enum type created through [`@enum`](@ref).
+"""
+_enum_hash(x::Enum, h::UInt) = invoke(hash, Tuple{Any, UInt}, x, h)
+Base.hash(x::Enum, h::UInt) = _enum_hash(x, h)
 Base.isless(x::T, y::T) where {T<:Enum} = isless(basetype(T)(x), basetype(T)(y))
 
 Base.Symbol(x::Enum) = namemap(typeof(x))[Integer(x)]::Symbol
@@ -37,7 +45,7 @@ function Base.show(io::IO, x::Enum)
     sym = _symbol(x)
     if !(get(io, :compact, false)::Bool)
         from = get(io, :module, Base.active_module())
-        def = typeof(x).name.module
+        def = parentmodule(typeof(x))
         if from === nothing || !Base.isvisible(sym, def, from)
             show(io, def)
             print(io, ".")
@@ -206,8 +214,12 @@ macro enum(T::Union{Symbol,Expr}, syms...)
         Enums.namemap(::Type{$(esc(typename))}) = $(esc(namemap))
         Base.typemin(x::Type{$(esc(typename))}) = $(esc(typename))($lo)
         Base.typemax(x::Type{$(esc(typename))}) = $(esc(typename))($hi)
-        let enum_hash = hash($(esc(typename)))
-            Base.hash(x::$(esc(typename)), h::UInt) = hash(enum_hash, hash(Integer(x), h))
+        let type_hash = hash($(esc(typename)))
+            # Use internal `_enum_hash` to allow users to specialize
+            # `Base.hash` for their own enum types without overwriting the
+            # method we would define here. This avoids a warning for
+            # precompilation.
+            Enums._enum_hash(x::$(esc(typename)), h::UInt) = hash(type_hash, hash(Integer(x), h))
         end
         let insts = (Any[ $(esc(typename))(v) for v in $values ]...,)
             Base.instances(::Type{$(esc(typename))}) = insts
diff --git a/base/Makefile b/base/Makefile
index 72b3ed145605e..ad2bb6a63ccc0 100644
--- a/base/Makefile
+++ b/base/Makefile
@@ -1,27 +1,14 @@
 SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 BUILDDIR := .
 JULIAHOME := $(abspath $(SRCDIR)/..)
-include $(JULIAHOME)/deps/Versions.make
 include $(JULIAHOME)/Make.inc
 
-TAGGED_RELEASE_BANNER := ""
-
-all:
-
-BASE_SRCS := $(patsubst ./%,%,$(shell cd $(SRCDIR) && find . -name \*.jl -and -not -name version_git.jl -and -not -name '*.phony'))
-GENERATED_SRCS := pcre_h.jl errno_h.jl build_h.jl.phony features_h.jl file_constants.jl uv_constants.jl version_git.jl.phony
+# import LLVM_SHARED_LIB_NAME
+include $(JULIAHOME)/deps/llvm-ver.make
 
-GENERATED_DSTS := $(addprefix $(build_datarootdir)/julia/src/,$(GENERATED_SRCS))
-BASE_DSTS := $(addprefix $(build_datarootdir)/julia/src/base/,$(BASE_SRCS)) $(GENERATED_DSTS)
-BASE_DIRS := $(sort $(dir $(BASE_DSTS)))
-$(foreach dir,$(BASE_DIRS),$(eval $(call dir_target,$(dir))))
-
-# we might like to add "| $(BASE_DIRS)" here, but that causes many version of 'make' to get confused and fail to build consistently
-$(build_datarootdir)/julia/src/base/%.jl: $(SRCDIR)/%.jl
-	@mkdir -p $(dir $@)
-	cp $< $@
+TAGGED_RELEASE_BANNER := ""
 
-all: $(BASE_DSTS)
+all: $(addprefix $(BUILDDIR)/,pcre_h.jl errno_h.jl build_h.jl.phony features_h.jl file_constants.jl uv_constants.jl version_git.jl.phony)
 
 PCRE_CONST := 0x[0-9a-fA-F]+|[0-9]+|\([\-0-9]+\)
 ifeq ($(USE_SYSTEM_PCRE), 1)
@@ -32,37 +19,29 @@ endif
 
 define parse_features
 @echo "# $(2) features" >> $@
-@$(call PRINT_PERL, cat $(JULIAHOME)/src/features_$(1).h | \
-	perl -lne 'print "const JL_$(2)_$$1 = UInt32($$2)" if /^\s*JL_FEATURE_DEF(?:_NAME)?\(\s*(\w+)\s*,\s*([^,]+)\s*,.*\)\s*(?:\/\/.*)?$$/' >> $@)
+@$(call PRINT_PERL, cat $(SRCDIR)/../src/features_$(1).h | perl -lne 'print "const JL_$(2)_$$1 = UInt32($$2)" if /^\s*JL_FEATURE_DEF(?:_NAME)?\(\s*(\w+)\s*,\s*([^,]+)\s*,.*\)\s*(?:\/\/.*)?$$/' >> $@)
 @echo >> $@
 endef
 
-$(build_datarootdir)/julia/src/features_h.jl: $(JULIAHOME)/src/features_x86.h $(JULIAHOME)/src/features_aarch32.h $(JULIAHOME)/src/features_aarch64.h
-	@mkdir -p $(dir $@)
+$(BUILDDIR)/features_h.jl: $(SRCDIR)/../src/features_x86.h $(SRCDIR)/../src/features_aarch32.h $(SRCDIR)/../src/features_aarch64.h
 	@-rm -f $@
 	@$(call parse_features,x86,X86)
 	@$(call parse_features,aarch32,AArch32)
 	@$(call parse_features,aarch64,AArch64)
 
-$(build_datarootdir)/julia/src/pcre_h.jl: $(PCRE_INCL_PATH)
-	@mkdir -p $(dir $@)
+$(BUILDDIR)/pcre_h.jl: $(PCRE_INCL_PATH)
 	@$(call PRINT_PERL, $(CPP) -D PCRE2_CODE_UNIT_WIDTH=8 -dM $< | perl -nle '/^\s*#define\s+PCRE2_(\w*)\s*\(?($(PCRE_CONST))\)?u?\s*$$/ and print index($$1, "ERROR_") == 0 ? "const $$1 = Cint($$2)" : "const $$1 = UInt32($$2)"' | LC_ALL=C sort > $@)
 
-$(build_datarootdir)/julia/src/errno_h.jl:
-	@mkdir -p $(dir $@)
+$(BUILDDIR)/errno_h.jl:
 	@$(call PRINT_PERL, echo '#include <errno.h>' | $(CPP) -dM - | perl -nle 'print "const $$1 = Int32($$2)" if /^#define\s+(E\w+)\s+(\d+)\s*$$/' | LC_ALL=C sort > $@)
 
-$(build_datarootdir)/julia/src/file_constants.jl: $(JULIAHOME)/src/file_constants.h
-	@mkdir -p $(dir $@)
+$(BUILDDIR)/file_constants.jl: $(SRCDIR)/../src/file_constants.h
 	@$(call PRINT_PERL, $(CPP_STDOUT) -DJULIA $< | perl -nle 'print "$$1 0o$$2" if /^(\s*const\s+[A-z_]+\s+=)\s+(0[0-9]*)\s*$$/; print "$$1" if /^\s*(const\s+[A-z_]+\s+=\s+([1-9]|0x)[0-9A-z]*)\s*$$/' > $@)
 
-$(build_datarootdir)/julia/src/uv_constants.jl: $(JULIAHOME)/src/uv_constants.h $(LIBUV_INC)/uv/errno.h
-	@mkdir -p $(dir $@)
+$(BUILDDIR)/uv_constants.jl: $(SRCDIR)/../src/uv_constants.h $(LIBUV_INC)/uv/errno.h
 	@$(call PRINT_PERL, $(CPP_STDOUT) "-I$(LIBUV_INC)" -DJULIA $< | tail -n 16 > $@)
 
-$(build_datarootdir)/julia/src/build_h.jl.phony: $(BUILDDIR)/build_h.jl.phony
 $(BUILDDIR)/build_h.jl.phony:
-	@mkdir -p $(build_datarootdir)/julia/src
 	@echo "# This file is automatically generated in base/Makefile" > $@
 ifeq ($(XC_HOST),)
 	@echo "const MACHINE = \"$(BUILD_MACHINE)\"" >> $@
@@ -81,6 +60,7 @@ else
 	@echo "const USE_GPL_LIBS = false" >> $@
 endif
 	@echo "const libllvm_version_string = \"$$($(LLVM_CONFIG_HOST) --version)\"" >> $@
+	@echo "const libllvm_name = \"$(LLVM_SHARED_LIB_NAME)\"" >> $@
 	@echo "const VERSION_STRING = \"$(JULIA_VERSION)\"" >> $@
 	@echo "const TAGGED_RELEASE_BANNER = \"$(TAGGED_RELEASE_BANNER)\"" >> $@
 ifeq ($(OS),WINNT)
@@ -90,6 +70,7 @@ ifeq ($(OS),WINNT)
 	@printf 'const LIBDIR = "%s"\n' '$(subst /,\\,$(libdir_rel))' >> $@
 	@printf 'const LIBEXECDIR = "%s"\n' '$(subst /,\\,$(libexecdir_rel))' >> $@
 	@printf 'const PRIVATE_LIBDIR = "%s"\n' '$(subst /,\\,$(private_libdir_rel))' >> $@
+	@printf 'const PRIVATE_LIBEXECDIR = "%s"\n' '$(subst /,\\,$(private_libexecdir_rel))' >> $@
 	@printf 'const INCLUDEDIR = "%s"\n' '$(subst /,\\,$(includedir_rel))' >> $@
 else
 	@echo "const SYSCONFDIR = \"$(sysconfdir_rel)\"" >> $@
@@ -98,6 +79,7 @@ else
 	@echo "const LIBDIR = \"$(libdir_rel)\"" >> $@
 	@echo "const LIBEXECDIR = \"$(libexecdir_rel)\"" >> $@
 	@echo "const PRIVATE_LIBDIR = \"$(private_libdir_rel)\"" >> $@
+	@echo "const PRIVATE_LIBEXECDIR = \"$(private_libexecdir_rel)\"" >> $@
 	@echo "const INCLUDEDIR = \"$(includedir_rel)\"" >> $@
 endif
 ifeq ($(DARWIN_FRAMEWORK), 1)
@@ -105,49 +87,44 @@ ifeq ($(DARWIN_FRAMEWORK), 1)
 	@echo "const DARWIN_FRAMEWORK_NAME = \"$(FRAMEWORK_NAME)\"" >> $@
 else
 	@echo "const DARWIN_FRAMEWORK = false" >> $@
+endif
+ifeq ($(OS), Darwin)
+	@echo "const MACOS_PRODUCT_VERSION = \"$(shell sw_vers -productVersion)\"" >> $@
+	@echo "const MACOS_PLATFORM_VERSION = \"$(shell xcrun --show-sdk-version)\"" >> $@
 endif
 	@echo "const BUILD_TRIPLET = \"$(BB_TRIPLET_LIBGFORTRAN_CXXABI)\"" >> $@
 
 	@# This to ensure that we always rebuild this file, but only when it is modified do we touch build_h.jl,
 	@# ensuring we rebuild the system image as infrequently as possible
-	@if ! cmp -s $@ $(build_datarootdir)/julia/src/build_h.jl; then \
+	@if ! cmp -s $@ build_h.jl; then \
 		$(call PRINT_PERL,) \
-		mv $@ $(build_datarootdir)/julia/src/build_h.jl; \
+		mv $@ build_h.jl; \
 	else \
 		rm -f $@; \
 	fi
 
-$(build_datarootdir)/julia/src/version_git.jl.phony: $(BUILDDIR)/version_git.jl.phony
 $(BUILDDIR)/version_git.jl.phony: $(SRCDIR)/version_git.sh
-	@mkdir -p $(build_datarootdir)/julia/src
-ifneq ($(NO_GIT),1)
-	@sh $< $(SRCDIR) > $@
-else ifeq ($(shell [ -f $(BUILDDIR)/version_git.jl ] && echo "true"), true)
-	@# Give warning if boilerplate git is found here
-	@if grep -q "Default output if git is not available" $(BUILDDIR)/version_git.jl; then \
-	    echo "WARNING: Using boilerplate git version info" >&2; \
-	fi
-	@cp $(BUILDDIR)/version_git.jl $@
-else ifeq ($(shell [ -f $(SRCDIR)/version_git.jl ] && echo "true"), true)
-	@# Give warning if boilerplate git is found here
-	@if grep -q "Default output if git is not available" $(SRCDIR)/version_git.jl; then \
-	    echo "WARNING: Using boilerplate git version info" >&2; \
-	fi
-	@cp $(SRCDIR)/version_git.jl $@
-else
-	$(warning "WARNING: Generating boilerplate git version info")
-	@sh $< $(SRCDIR) NO_GIT > $@
-endif
+ifneq ($(NO_GIT), 1)
+	sh $< $(SRCDIR) > $@
 	@# This to avoid touching version_git.jl when it is not modified,
 	@# so that the system image does not need to be rebuilt.
-	@if ! cmp -s $@ $(build_datarootdir)/julia/src/version_git.jl; then \
+	@if ! cmp -s $@ version_git.jl; then \
 	    $(call PRINT_PERL,) \
-	    mv $@ $(build_datarootdir)/julia/src/version_git.jl; \
+	    mv $@ version_git.jl; \
 	else \
 	    rm -f $@; \
 	fi
-$(BUILDDIR)/version_git.jl: $(SRCDIR)/version_git.sh
-	sh $< $(SRCDIR) > $@
+else
+ifeq ($(shell [ -f $(BUILDDIR)/version_git.jl ] && echo "true"), true)
+	@# Give warning if boilerplate git is used
+	@if grep -q "Default output if git is not available" $(BUILDDIR)/version_git.jl; then \
+	    echo "WARNING: Using boilerplate git version info" >&2; \
+	fi
+else
+	$(warning "WARNING: Generating boilerplate git version info")
+	@sh $(SRCDIR)/version_git.sh $(SRCDIR) NO_GIT > $(BUILDDIR)/version_git.jl
+endif
+endif
 
 ifeq (,$(filter $(OS), WINNT emscripten))
 # For any USE_SYSTEM_* libraries that will be dynamically loaded by libjulia,
@@ -195,7 +172,7 @@ endif
 define symlink_system_library
 libname_$2 := $$(notdir $(call versioned_libname,$2,$3))
 libpath_$2 := $$(shell $$(call spawn,$$(LIBWHICH)) -p $$(libname_$2) 2>/dev/null)
-symlink_$2: $$(build_private_libdir)/$$(libname_$2) .FORCE
+symlink_$2: $$(build_private_libdir)/$$(libname_$2)
 $$(build_private_libdir)/$$(libname_$2):
 	@if [ -e "$$(libpath_$2)" ]; then \
 		REALPATH=$$(libpath_$2); \
@@ -228,6 +205,18 @@ $(build_bindir)/7z$(EXE):
 	rm -f "$@" && \
 	ln -svf "$(7Z_PATH)" "$@"
 
+symlink_lld: $(build_bindir)/lld$(EXE)
+
+ifneq ($(USE_SYSTEM_LLD),0)
+SYMLINK_SYSTEM_LIBRARIES += symlink_lld
+LLD_PATH := $(shell which lld$(EXE))
+endif
+
+$(build_bindir)/lld$(EXE):
+	[ -e "$(LLD_PATH)" ] && \
+	rm -f "$@" && \
+	ln -svf "$(LLD_PATH)" "$@"
+
 # the following excludes: libuv.a, libutf8proc.a
 
 ifneq ($(USE_SYSTEM_LIBM),0)
@@ -236,11 +225,21 @@ else ifneq ($(USE_SYSTEM_OPENLIBM),0)
 $(eval $(call symlink_system_library,OPENLIBM,$(LIBMNAME)))
 endif
 
-ifeq ($(APPLE_ARCH),arm64)
+# On macOS, libgcc_s has soversion 1.1 always on aarch64 and only for GCC 12+
+# (-> libgfortran 5) on x86_64
+ifeq ($(OS),Darwin)
+ifeq ($(ARCH),aarch64)
+$(eval $(call symlink_system_library,CSL,libgcc_s,1.1))
+else
+ifeq ($(LIBGFORTRAN_VERSION),5)
 $(eval $(call symlink_system_library,CSL,libgcc_s,1.1))
 else
 $(eval $(call symlink_system_library,CSL,libgcc_s,1))
 endif
+endif
+else
+$(eval $(call symlink_system_library,CSL,libgcc_s,1))
+endif
 ifneq (,$(LIBGFORTRAN_VERSION))
 $(eval $(call symlink_system_library,CSL,libgfortran,$(LIBGFORTRAN_VERSION)))
 endif
@@ -270,9 +269,11 @@ $(eval $(call symlink_system_library,LIBSUITESPARSE,libamd))
 $(eval $(call symlink_system_library,LIBSUITESPARSE,libcamd))
 $(eval $(call symlink_system_library,LIBSUITESPARSE,libccolamd))
 $(eval $(call symlink_system_library,LIBSUITESPARSE,libcholmod))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libcholmod_cuda))
 $(eval $(call symlink_system_library,LIBSUITESPARSE,libcolamd))
 $(eval $(call symlink_system_library,LIBSUITESPARSE,libumfpack))
 $(eval $(call symlink_system_library,LIBSUITESPARSE,libspqr))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libspqr_cuda))
 $(eval $(call symlink_system_library,LIBSUITESPARSE,libsuitesparseconfig))
 # EXCLUDED LIBRARIES (installed/used, but not vendored for use with dlopen):
 # libunwind
@@ -301,10 +302,7 @@ endif
 
 symlink_system_libraries: $(SYMLINK_SYSTEM_LIBRARIES)
 
-.FORCE:
-.PHONY: $(BUILDDIR)/version_git.jl $(BUILDDIR)/version_git.jl.phony $(build_datarootdir)/julia/src/version_git.jl.phony \
-	$(BUILDDIR)/build_h.jl.phony $(build_datarootdir)/julia/src/build_h.jl.phony \
-	clean all .FORCE
+.PHONY: $(BUILDDIR)/build_h.jl.phony $(BUILDDIR)/version_git.jl.phony clean all symlink_*
 
 clean:
 	-rm -f $(BUILDDIR)/pcre_h.jl
@@ -316,5 +314,4 @@ clean:
 	-rm -f $(BUILDDIR)/file_constants.jl
 	-rm -f $(BUILDDIR)/version_git.jl
 	-rm -f $(BUILDDIR)/version_git.jl.phony
-	-rm -rf $(build_datarootdir)/julia/src/*
 	-rm -f $(build_private_libdir)/lib*.$(SHLIB_EXT)*
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index b710c35a0876b..200586213cc90 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -14,8 +14,8 @@ See also: [`AbstractVector`](@ref), [`AbstractMatrix`](@ref), [`eltype`](@ref),
 AbstractArray
 
 convert(::Type{T}, a::T) where {T<:AbstractArray} = a
-convert(::Type{AbstractArray{T}}, a::AbstractArray) where {T} = AbstractArray{T}(a)
-convert(::Type{AbstractArray{T,N}}, a::AbstractArray{<:Any,N}) where {T,N} = AbstractArray{T,N}(a)
+convert(::Type{AbstractArray{T}}, a::AbstractArray) where {T} = AbstractArray{T}(a)::AbstractArray{T}
+convert(::Type{AbstractArray{T,N}}, a::AbstractArray{<:Any,N}) where {T,N} = AbstractArray{T,N}(a)::AbstractArray{T,N}
 
 """
     size(A::AbstractArray, [dim])
@@ -55,6 +55,9 @@ julia> A = fill(1, (5,6,7));
 
 julia> axes(A, 2)
 Base.OneTo(6)
+
+julia> axes(A, 4) == 1:1  # all dimensions d > ndims(A) have size 1
+true
 ```
 
 # Usage note
@@ -92,7 +95,7 @@ julia> axes(A)
 """
 function axes(A)
     @inline
-    map(oneto, size(A))
+    map(unchecked_oneto, size(A))
 end
 
 """
@@ -104,10 +107,13 @@ If multiple arguments are passed, equivalent to `has_offset_axes(A) | has_offset
 
 See also [`require_one_based_indexing`](@ref).
 """
-has_offset_axes(A) = _tuple_any(x->Int(first(x))::Int != 1, axes(A))
+has_offset_axes(A) = _any_tuple(x->Int(first(x))::Int != 1, false, axes(A)...)
 has_offset_axes(A::AbstractVector) = Int(firstindex(A))::Int != 1 # improve performance of a common case (ranges)
-has_offset_axes(A...) = _tuple_any(has_offset_axes, A)
+# Use `_any_tuple` to avoid unneeded invoke.
+# note: this could call `any` directly if the compiler can infer it
+has_offset_axes(As...) = _any_tuple(has_offset_axes, false, As...)
 has_offset_axes(::Colon) = false
+has_offset_axes(::Array) = false
 
 """
     require_one_based_indexing(A::AbstractArray)
@@ -133,13 +139,25 @@ axes1(iter) = oneto(length(iter))
 
 Return an efficient array describing all valid indices for `a` arranged in the shape of `a` itself.
 
-They keys of 1-dimensional arrays (vectors) are integers, whereas all other N-dimensional
+The keys of 1-dimensional arrays (vectors) are integers, whereas all other N-dimensional
 arrays use [`CartesianIndex`](@ref) to describe their locations.  Often the special array
 types [`LinearIndices`](@ref) and [`CartesianIndices`](@ref) are used to efficiently
 represent these arrays of integers and `CartesianIndex`es, respectively.
 
 Note that the `keys` of an array might not be the most efficient index type; for maximum
 performance use  [`eachindex`](@ref) instead.
+
+# Examples
+```jldoctest
+julia> keys([4, 5, 6])
+3-element LinearIndices{1, Tuple{Base.OneTo{Int64}}}:
+ 1
+ 2
+ 3
+
+julia> keys([4 5; 6 7])
+CartesianIndices((2, 2))
+```
 """
 keys(a::AbstractArray) = CartesianIndices(axes(a))
 keys(a::AbstractVector) = LinearIndices(a)
@@ -149,7 +167,7 @@ keys(a::AbstractVector) = LinearIndices(a)
     keytype(A::AbstractArray)
 
 Return the key type of an array. This is equal to the
-`eltype` of the result of `keys(...)`, and is provided
+[`eltype`](@ref) of the result of `keys(...)`, and is provided
 mainly for compatibility with the dictionary interface.
 
 # Examples
@@ -165,17 +183,19 @@ CartesianIndex{2}
      For arrays, this function requires at least Julia 1.2.
 """
 keytype(a::AbstractArray) = keytype(typeof(a))
+keytype(::Type{Union{}}, slurp...) = eltype(Union{})
 
 keytype(A::Type{<:AbstractArray}) = CartesianIndex{ndims(A)}
 keytype(A::Type{<:AbstractVector}) = Int
 
 valtype(a::AbstractArray) = valtype(typeof(a))
+valtype(::Type{Union{}}, slurp...) = eltype(Union{})
 
 """
     valtype(T::Type{<:AbstractArray})
     valtype(A::AbstractArray)
 
-Return the value type of an array. This is identical to `eltype` and is
+Return the value type of an array. This is identical to [`eltype`](@ref) and is
 provided mainly for compatibility with the dictionary interface.
 
 # Examples
@@ -214,14 +234,14 @@ UInt8
 ```
 """
 eltype(::Type) = Any
-eltype(::Type{Bottom}) = throw(ArgumentError("Union{} does not have elements"))
+eltype(::Type{Bottom}, slurp...) = throw(ArgumentError("Union{} does not have elements"))
 eltype(x) = eltype(typeof(x))
 eltype(::Type{<:AbstractArray{E}}) where {E} = @isdefined(E) ? E : Any
 
 """
     elsize(type)
 
-Compute the memory stride in bytes between consecutive elements of `eltype`
+Compute the memory stride in bytes between consecutive elements of [`eltype`](@ref)
 stored inside the given `type`, if the array elements are stored densely with a
 uniform linear stride.
 
@@ -248,8 +268,9 @@ julia> ndims(A)
 3
 ```
 """
-ndims(::AbstractArray{T,N}) where {T,N} = N
-ndims(::Type{<:AbstractArray{<:Any,N}}) where {N} = N
+ndims(::AbstractArray{T,N}) where {T,N} = N::Int
+ndims(::Type{<:AbstractArray{<:Any,N}}) where {N} = N::Int
+ndims(::Type{Union{}}, slurp...) = throw(ArgumentError("Union{} does not have elements"))
 
 """
     length(collection) -> Integer
@@ -306,31 +327,43 @@ end
 
 """
     eachindex(A...)
+    eachindex(::IndexStyle, A::AbstractArray...)
 
 Create an iterable object for visiting each index of an `AbstractArray` `A` in an efficient
 manner. For array types that have opted into fast linear indexing (like `Array`), this is
-simply the range `1:length(A)`. For other array types, return a specialized Cartesian
-range to efficiently index into the array with indices specified for every dimension. For
-other iterables, including strings and dictionaries, return an iterator object
-supporting arbitrary index types (e.g. unevenly spaced or non-integer indices).
+simply the range `1:length(A)` if they use 1-based indexing.
+For array types that have not opted into fast linear indexing, a specialized Cartesian
+range is typically returned to efficiently index into the array with indices specified
+for every dimension.
+
+In general `eachindex` accepts arbitrary iterables, including strings and dictionaries, and returns
+an iterator object supporting arbitrary index types (e.g. unevenly spaced or non-integer indices).
+
+If `A` is `AbstractArray` it is possible to explicitly specify the style of the indices that
+should be returned by `eachindex` by passing a value having `IndexStyle` type as its first argument
+(typically `IndexLinear()` if linear indices are required or `IndexCartesian()` if Cartesian
+range is wanted).
 
 If you supply more than one `AbstractArray` argument, `eachindex` will create an
-iterable object that is fast for all arguments (a [`UnitRange`](@ref)
-if all inputs have fast linear indexing, a [`CartesianIndices`](@ref)
-otherwise).
+iterable object that is fast for all arguments (typically a [`UnitRange`](@ref)
+if all inputs have fast linear indexing, a [`CartesianIndices`](@ref) otherwise).
 If the arrays have different sizes and/or dimensionalities, a `DimensionMismatch` exception
 will be thrown.
+
+See also [`pairs`](@ref)`(A)` to iterate over indices and values together,
+and [`axes`](@ref)`(A, 2)` for valid indices along one dimension.
+
 # Examples
 ```jldoctest
-julia> A = [1 2; 3 4];
+julia> A = [10 20; 30 40];
 
 julia> for i in eachindex(A) # linear indexing
-           println(i)
+           println("A[", i, "] == ", A[i])
        end
-1
-2
-3
-4
+A[1] == 10
+A[2] == 30
+A[3] == 20
+A[4] == 40
 
 julia> for i in eachindex(view(A, 1:2, 1:1)) # Cartesian indexing
            println(i)
@@ -413,7 +446,7 @@ julia> firstindex(rand(3,4,5), 2)
 firstindex(a::AbstractArray) = (@inline; first(eachindex(IndexLinear(), a)))
 firstindex(a, d) = (@inline; first(axes(a, d)))
 
-first(a::AbstractArray) = a[first(eachindex(a))]
+@propagate_inbounds first(a::AbstractArray) = a[first(eachindex(a))]
 
 """
     first(coll)
@@ -466,8 +499,8 @@ Bool[]
 first(itr, n::Integer) = collect(Iterators.take(itr, n))
 # Faster method for vectors
 function first(v::AbstractVector, n::Integer)
-    n < 0 && throw(ArgumentError("Number of elements must be nonnegative"))
-    @inbounds v[begin:min(begin + n - 1, end)]
+    n < 0 && throw(ArgumentError("Number of elements must be non-negative"))
+    v[range(begin, length=min(n, checked_length(v)))]
 end
 
 """
@@ -516,8 +549,8 @@ Float64[]
 last(itr, n::Integer) = reverse!(collect(Iterators.take(Iterators.reverse(itr), n)))
 # Faster method for arrays
 function last(v::AbstractVector, n::Integer)
-    n < 0 && throw(ArgumentError("Number of elements must be nonnegative"))
-    @inbounds v[max(begin, end - n + 1):end]
+    n < 0 && throw(ArgumentError("Number of elements must be non-negative"))
+    v[range(stop=lastindex(v), length=min(n, checked_length(v)))]
 end
 
 """
@@ -571,20 +604,6 @@ end
 size_to_strides(s, d) = (s,)
 size_to_strides(s) = ()
 
-
-function isassigned(a::AbstractArray, i::Integer...)
-    try
-        a[i...]
-        true
-    catch e
-        if isa(e, BoundsError) || isa(e, UndefRefError)
-            return false
-        else
-            rethrow()
-        end
-    end
-end
-
 function isstored(A::AbstractArray{<:Any,N}, I::Vararg{Integer,N}) where {N}
     @boundscheck checkbounds(A, I...)
     return true
@@ -712,8 +731,6 @@ end
 checkbounds_indices(::Type{Bool}, IA::Tuple, ::Tuple{}) = (@inline; all(x->length(x)==1, IA))
 checkbounds_indices(::Type{Bool}, ::Tuple{}, ::Tuple{}) = true
 
-throw_boundserror(A, I) = (@noinline; throw(BoundsError(A, I)))
-
 # check along a single dimension
 """
     checkindex(Bool, inds::AbstractUnitRange, index)
@@ -737,6 +754,8 @@ false
 checkindex(::Type{Bool}, inds::AbstractUnitRange, i) =
     throw(ArgumentError("unable to check bounds for indices of type $(typeof(i))"))
 checkindex(::Type{Bool}, inds::AbstractUnitRange, i::Real) = (first(inds) <= i) & (i <= last(inds))
+checkindex(::Type{Bool}, inds::IdentityUnitRange, i::Real) = checkindex(Bool, inds.indices, i)
+checkindex(::Type{Bool}, inds::OneTo{T}, i::T) where {T<:BitInteger} = unsigned(i - one(i)) < unsigned(last(inds))
 checkindex(::Type{Bool}, inds::AbstractUnitRange, ::Colon) = true
 checkindex(::Type{Bool}, inds::AbstractUnitRange, ::Slice) = true
 function checkindex(::Type{Bool}, inds::AbstractUnitRange, r::AbstractRange)
@@ -884,6 +903,8 @@ If `dst` and `src` are of the same type, `dst == src` should hold after
 the call. If `dst` and `src` are multidimensional arrays, they must have
 equal [`axes`](@ref).
 
+$(_DOCS_ALIASING_WARNING)
+
 See also [`copyto!`](@ref).
 
 !!! compat "Julia 1.1"
@@ -891,13 +912,12 @@ See also [`copyto!`](@ref).
     is available from the `Future` standard library as `Future.copy!`.
 """
 function copy!(dst::AbstractVector, src::AbstractVector)
+    firstindex(dst) == firstindex(src) || throw(ArgumentError(
+        "vectors must have the same offset for copy! (consider using `copyto!`)"))
     if length(dst) != length(src)
         resize!(dst, length(src))
     end
-    for i in eachindex(dst, src)
-        @inbounds dst[i] = src[i]
-    end
-    dst
+    copyto!(dst, src)
 end
 
 function copy!(dst::AbstractArray, src::AbstractArray)
@@ -908,7 +928,7 @@ end
 
 ## from general iterable to any array
 
-# This is `@Experimental.max_methods 1 function copyto! end`, which is not
+# This is `Experimental.@max_methods 1 function copyto! end`, which is not
 # defined at this point in bootstrap.
 typeof(function copyto! end).name.max_methods = UInt8(1)
 
@@ -973,7 +993,7 @@ end
 # this method must be separate from the above since src might not have a length
 function copyto!(dest::AbstractArray, dstart::Integer, src, sstart::Integer, n::Integer)
     n < 0 && throw(ArgumentError(LazyString("tried to copy n=",n,
-        ", elements, but n should be nonnegative")))
+        ", elements, but n should be non-negative")))
     n == 0 && return dest
     dmax = dstart + n - 1
     inds = LinearIndices(dest)
@@ -1014,6 +1034,8 @@ the other elements are left untouched.
 
 See also [`copy!`](@ref Base.copy!), [`copy`](@ref).
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> x = [1., 0., 3., 0., 5.];
@@ -1035,6 +1057,10 @@ julia> y
 """
 function copyto!(dest::AbstractArray, src::AbstractArray)
     isempty(src) && return dest
+    if dest isa BitArray
+        # avoid ambiguities with other copyto!(::AbstractArray, ::SourceArray) methods
+        return _copyto_bitarray!(dest, src)
+    end
     src′ = unalias(dest, src)
     copyto_unaliased!(IndexStyle(dest), dest, IndexStyle(src′), src′)
 end
@@ -1056,13 +1082,22 @@ function copyto_unaliased!(deststyle::IndexStyle, dest::AbstractArray, srcstyle:
         if srcstyle isa IndexLinear
             # Single-index implementation
             @inbounds for i in srcinds
-                dest[i + Δi] = src[i]
+                if isassigned(src, i)
+                    dest[i + Δi] = src[i]
+                else
+                    _unsetindex!(dest, i + Δi)
+                end
             end
         else
             # Dual-index implementation
             i = idf - 1
-            @inbounds for a in src
-                dest[i+=1] = a
+            @inbounds for a in eachindex(src)
+                i += 1
+                if isassigned(src, a)
+                    dest[i] = src[a]
+                else
+                    _unsetindex!(dest, i)
+                end
             end
         end
     else
@@ -1070,14 +1105,22 @@ function copyto_unaliased!(deststyle::IndexStyle, dest::AbstractArray, srcstyle:
         if iterdest == itersrc
             # Shared-iterator implementation
             for I in iterdest
-                @inbounds dest[I] = src[I]
+                if isassigned(src, I)
+                    @inbounds dest[I] = src[I]
+                else
+                    _unsetindex!(dest, I)
+                end
             end
         else
             # Dual-iterator implementation
             ret = iterate(iterdest)
-            @inbounds for a in src
-                idx, state = ret
-                dest[idx] = a
+            @inbounds for a in itersrc
+                idx, state = ret::NTuple{2,Any}
+                if isassigned(src, a)
+                    dest[idx] = src[a]
+                else
+                    _unsetindex!(dest, idx)
+                end
                 ret = iterate(iterdest, state)
             end
         end
@@ -1096,16 +1139,21 @@ function copyto!(dest::AbstractArray, dstart::Integer, src::AbstractArray, sstar
 end
 
 function copyto!(dest::AbstractArray, dstart::Integer,
-               src::AbstractArray, sstart::Integer,
-               n::Integer)
+                 src::AbstractArray, sstart::Integer,
+                 n::Integer)
     n == 0 && return dest
     n < 0 && throw(ArgumentError(LazyString("tried to copy n=",
-        n," elements, but n should be nonnegative")))
+        n," elements, but n should be non-negative")))
     destinds, srcinds = LinearIndices(dest), LinearIndices(src)
     (checkbounds(Bool, destinds, dstart) && checkbounds(Bool, destinds, dstart+n-1)) || throw(BoundsError(dest, dstart:dstart+n-1))
     (checkbounds(Bool, srcinds, sstart)  && checkbounds(Bool, srcinds, sstart+n-1))  || throw(BoundsError(src,  sstart:sstart+n-1))
-    @inbounds for i = 0:(n-1)
-        dest[dstart+i] = src[sstart+i]
+    src′ = unalias(dest, src)
+    @inbounds for i = 0:n-1
+        if isassigned(src′, sstart+i)
+            dest[dstart+i] = src′[sstart+i]
+        else
+            _unsetindex!(dest, dstart+i)
+        end
     end
     return dest
 end
@@ -1116,7 +1164,7 @@ function copy(a::AbstractArray)
 end
 
 function copyto!(B::AbstractVecOrMat{R}, ir_dest::AbstractRange{Int}, jr_dest::AbstractRange{Int},
-               A::AbstractVecOrMat{S}, ir_src::AbstractRange{Int}, jr_src::AbstractRange{Int}) where {R,S}
+                 A::AbstractVecOrMat{S}, ir_src::AbstractRange{Int}, jr_src::AbstractRange{Int}) where {R,S}
     if length(ir_dest) != length(ir_src)
         throw(ArgumentError(LazyString("source and destination must have same size (got ",
             length(ir_src)," and ",length(ir_dest),")")))
@@ -1127,11 +1175,12 @@ function copyto!(B::AbstractVecOrMat{R}, ir_dest::AbstractRange{Int}, jr_dest::A
     end
     @boundscheck checkbounds(B, ir_dest, jr_dest)
     @boundscheck checkbounds(A, ir_src, jr_src)
+    A′ = unalias(B, A)
     jdest = first(jr_dest)
     for jsrc in jr_src
         idest = first(ir_dest)
         for isrc in ir_src
-            @inbounds B[idest,jdest] = A[isrc,jsrc]
+            @inbounds B[idest,jdest] = A′[isrc,jsrc]
             idest += step(ir_dest)
         end
         jdest += step(jr_dest)
@@ -1139,10 +1188,10 @@ function copyto!(B::AbstractVecOrMat{R}, ir_dest::AbstractRange{Int}, jr_dest::A
     return B
 end
 
-function copyto_axcheck!(dest, src)
-    @noinline checkaxs(axd, axs) = axd == axs || throw(DimensionMismatch("axes must agree, got $axd and $axs"))
+@noinline _checkaxs(axd, axs) = axd == axs || throw(DimensionMismatch("axes must agree, got $axd and $axs"))
 
-    checkaxs(axes(dest), axes(src))
+function copyto_axcheck!(dest, src)
+    _checkaxs(axes(dest), axes(src))
     copyto!(dest, src)
 end
 
@@ -1203,10 +1252,10 @@ end
 # note: the following type definitions don't mean any AbstractArray is convertible to
 # a data Ref. they just map the array element type to the pointer type for
 # convenience in cases that work.
-pointer(x::AbstractArray{T}) where {T} = unsafe_convert(Ptr{T}, x)
+pointer(x::AbstractArray{T}) where {T} = unsafe_convert(Ptr{T}, cconvert(Ptr{T}, x))
 function pointer(x::AbstractArray{T}, i::Integer) where T
     @inline
-    unsafe_convert(Ptr{T}, x) + Int(_memory_offset(x, i))::Int
+    pointer(x) + Int(_memory_offset(x, i))::Int
 end
 
 # The distance from pointer(x) to the element at x[I...] in bytes
@@ -1230,6 +1279,11 @@ end
 
 Return a subset of array `A` as specified by `inds`, where each `ind` may be,
 for example, an `Int`, an [`AbstractRange`](@ref), or a [`Vector`](@ref).
+
+When `inds` selects multiple elements, this function returns a newly
+allocated array. To index multiple elements without making a copy,
+use [`view`](@ref) instead.
+
 See the manual section on [array indexing](@ref man-array-indexing) for details.
 
 # Examples
@@ -1262,13 +1316,9 @@ end
 # To avoid invalidations from multidimensional.jl: getindex(A::Array, i1::Union{Integer, CartesianIndex}, I::Union{Integer, CartesianIndex}...)
 @propagate_inbounds getindex(A::Array, i1::Integer, I::Integer...) = A[to_indices(A, (i1, I...))...]
 
-function unsafe_getindex(A::AbstractArray, I...)
-    @inline
-    @inbounds r = getindex(A, I...)
-    r
-end
+@inline unsafe_getindex(A::AbstractArray, I...) = @inbounds getindex(A, I...)
 
-struct CanonicalIndexError
+struct CanonicalIndexError <: Exception
     func::String
     type::Any
     CanonicalIndexError(func::String, @nospecialize(type)) = new(func, type)
@@ -1343,6 +1393,8 @@ _unsafe_ind2sub(sz, i) = (@inline; _ind2sub(sz, i))
 Store values from array `X` within some subset of `A` as specified by `inds`.
 The syntax `A[inds...] = X` is equivalent to `(setindex!(A, X, inds...); X)`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = zeros(2,2);
@@ -1399,10 +1451,12 @@ function _setindex!(::IndexCartesian, A::AbstractArray, v, I::Vararg{Int,M}) whe
     r
 end
 
+_unsetindex!(A::AbstractArray, i::Integer) = _unsetindex!(A, to_index(i))
+
 """
     parent(A)
 
-Return the underlying "parent array”. This parent array of objects of types `SubArray`, `ReshapedArray`
+Return the underlying parent object of the view. This parent of objects of types `SubArray`, `SubString`, `ReshapedArray`
 or `LinearAlgebra.Transpose` is what was passed as an argument to `view`, `reshape`, `transpose`, etc.
 during object creation. If the input is not a wrapped object, return the input itself. If the input is
 wrapped multiple times, only the outermost wrapper will be removed.
@@ -1425,6 +1479,8 @@ julia> parent(V)
  3  4
 ```
 """
+function parent end
+
 parent(a::AbstractArray) = a
 
 ## rudimentary aliasing detection ##
@@ -1478,7 +1534,7 @@ Perform a conservative test to check if arrays `A` and `B` might share the same
 By default, this simply checks if either of the arrays reference the same memory
 regions, as identified by their [`Base.dataids`](@ref).
 """
-mightalias(A::AbstractArray, B::AbstractArray) = !isbits(A) && !isbits(B) && !_isdisjoint(dataids(A), dataids(B))
+mightalias(A::AbstractArray, B::AbstractArray) = !isbits(A) && !isbits(B) && !isempty(A) && !isempty(B) && !_isdisjoint(dataids(A), dataids(B))
 mightalias(x, y) = false
 
 _isdisjoint(as::Tuple{}, bs::Tuple{}) = true
@@ -1502,7 +1558,8 @@ their component parts.  A typical definition for an array that wraps a parent is
 `Base.dataids(C::CustomArray) = dataids(C.parent)`.
 """
 dataids(A::AbstractArray) = (UInt(objectid(A)),)
-dataids(A::Array) = (UInt(pointer(A)),)
+dataids(A::Memory) = (B = ccall(:jl_genericmemory_owner, Any, (Any,), A); (UInt(pointer(B isa typeof(A) ? B : A)),))
+dataids(A::Array) = dataids(A.ref.mem)
 dataids(::AbstractRange) = ()
 dataids(x) = ()
 
@@ -1556,11 +1613,17 @@ replace_in_print_matrix(A::AbstractVector,i::Integer,j::Integer,s::AbstractStrin
 eltypeof(x) = typeof(x)
 eltypeof(x::AbstractArray) = eltype(x)
 
-promote_eltypeof() = Bottom
+promote_eltypeof() = error()
+promote_eltypeof(v1) = eltypeof(v1)
 promote_eltypeof(v1, vs...) = promote_type(eltypeof(v1), promote_eltypeof(vs...))
+promote_eltypeof(v1::T, vs::T...) where {T} = eltypeof(v1)
+promote_eltypeof(v1::AbstractArray{T}, vs::AbstractArray{T}...) where {T} = T
 
-promote_eltype() = Bottom
+promote_eltype() = error()
+promote_eltype(v1) = eltype(v1)
 promote_eltype(v1, vs...) = promote_type(eltype(v1), promote_eltype(vs...))
+promote_eltype(v1::T, vs::T...) where {T} = eltype(T)
+promote_eltype(v1::AbstractArray{T}, vs::AbstractArray{T}...) where {T} = T
 
 #TODO: ERROR CHECK
 _cat(catdim::Int) = Vector{Any}()
@@ -1604,6 +1667,14 @@ end
 
 typed_hcat(::Type{T}, A::AbstractVecOrMat...) where {T} = _typed_hcat(T, A)
 
+# Catch indexing errors like v[i +1] (instead of v[i+1] or v[i + 1]), where indexing is
+# interpreted as a typed concatenation. (issue #49676)
+typed_hcat(::AbstractArray, other...) = throw(ArgumentError("It is unclear whether you \
+    intend to perform an indexing operation or typed concatenation. If you intend to \
+    perform indexing (v[1 + 2]), adjust spacing or insert missing operator to clarify. \
+    If you intend to perform typed concatenation (T[1 2]), ensure that T is a type."))
+
+
 hcat(A::AbstractVecOrMat...) = typed_hcat(promote_eltype(A...), A...)
 hcat(A::AbstractVecOrMat{T}...) where {T} = typed_hcat(T, A...)
 
@@ -1615,7 +1686,7 @@ function _typed_hcat(::Type{T}, A::AbstractVecOrTuple{AbstractVecOrMat}) where T
     for j = 1:nargs
         Aj = A[j]
         if size(Aj, 1) != nrows
-            throw(ArgumentError("number of rows of each array must match (got $(map(x->size(x,1), A)))"))
+            throw(DimensionMismatch("number of rows of each array must match (got $(map(x->size(x,1), A)))"))
         end
         dense &= isa(Aj,Array)
         nd = ndims(Aj)
@@ -1650,7 +1721,7 @@ function _typed_vcat(::Type{T}, A::AbstractVecOrTuple{AbstractVecOrMat}) where T
     ncols = size(A[1], 2)
     for j = 2:nargs
         if size(A[j], 2) != ncols
-            throw(ArgumentError("number of columns of each array must match (got $(map(x->size(x,2), A)))"))
+            throw(DimensionMismatch("number of columns of each array must match (got $(map(x->size(x,2), A)))"))
         end
     end
     B = similar(A[1], T, nrows, ncols)
@@ -1705,7 +1776,7 @@ function cat_shape(dims, shapes::Tuple)
     end
     return out_shape
 end
-# The new way to compute the shape (more inferrable than combining cat_size & cat_shape, due to Varargs + issue#36454)
+# The new way to compute the shape (more inferable than combining cat_size & cat_shape, due to Varargs + issue#36454)
 cat_size_shape(dims) = ntuple(zero, Val(length(dims)))
 @inline cat_size_shape(dims, X, tail...) = _cat_size_shape(dims, _cshp(1, dims, (), cat_size(X)), tail...)
 _cat_size_shape(dims, shape) = shape
@@ -1773,55 +1844,63 @@ function __cat_offset1!(A, shape, catdims, offsets, x)
     inds = ntuple(length(offsets)) do i
         (i <= length(catdims) && catdims[i]) ? offsets[i] .+ cat_indices(x, i) : 1:shape[i]
     end
-    if x isa AbstractArray
-        A[inds...] = x
-    else
-        fill!(view(A, inds...), x)
-    end
+    _copy_or_fill!(A, inds, x)
     newoffsets = ntuple(length(offsets)) do i
         (i <= length(catdims) && catdims[i]) ? offsets[i] + cat_size(x, i) : offsets[i]
     end
     return newoffsets
 end
 
+_copy_or_fill!(A, inds, x) = fill!(view(A, inds...), x)
+_copy_or_fill!(A, inds, x::AbstractArray) = (A[inds...] = x)
+
 """
     vcat(A...)
 
-Concatenate along dimension 1. To efficiently concatenate a large vector of arrays,
-use `reduce(vcat, x)`.
+Concatenate arrays or numbers vertically. Equivalent to [`cat`](@ref)`(A...; dims=1)`,
+and to the syntax `[a; b; c]`.
+
+To concatenate a large vector of arrays, `reduce(vcat, A)` calls an efficient method
+when `A isa AbstractVector{<:AbstractVecOrMat}`, rather than working pairwise.
+
+See also [`hcat`](@ref), [`Iterators.flatten`](@ref), [`stack`](@ref).
 
 # Examples
 ```jldoctest
-julia> a = [1 2 3 4 5]
-1×5 Matrix{Int64}:
- 1  2  3  4  5
+julia> v = vcat([1,2], [3,4])
+4-element Vector{Int64}:
+ 1
+ 2
+ 3
+ 4
 
-julia> b = [6 7 8 9 10; 11 12 13 14 15]
-2×5 Matrix{Int64}:
-  6   7   8   9  10
- 11  12  13  14  15
+julia> v == vcat(1, 2, [3,4])  # accepts numbers
+true
 
-julia> vcat(a,b)
-3×5 Matrix{Int64}:
-  1   2   3   4   5
-  6   7   8   9  10
- 11  12  13  14  15
+julia> v == [1; 2; [3,4]]  # syntax for the same operation
+true
 
-julia> c = ([1 2 3], [4 5 6])
-([1 2 3], [4 5 6])
+julia> summary(ComplexF64[1; 2; [3,4]])  # syntax for supplying the element type
+"4-element Vector{ComplexF64}"
 
-julia> vcat(c...)
-2×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
+julia> vcat(range(1, 2, length=3))  # collects lazy ranges
+3-element Vector{Float64}:
+ 1.0
+ 1.5
+ 2.0
 
-julia> vs = [[1, 2], [3, 4], [5, 6]]
-3-element Vector{Vector{Int64}}:
- [1, 2]
- [3, 4]
- [5, 6]
+julia> two = ([10, 20, 30]', Float64[4 5 6; 7 8 9])  # row vector and a matrix
+([10 20 30], [4.0 5.0 6.0; 7.0 8.0 9.0])
 
-julia> reduce(vcat, vs)
+julia> vcat(two...)
+3×3 Matrix{Float64}:
+ 10.0  20.0  30.0
+  4.0   5.0   6.0
+  7.0   8.0   9.0
+
+julia> vs = [[1, 2], [3, 4], [5, 6]];
+
+julia> reduce(vcat, vs)  # more efficient than vcat(vs...)
 6-element Vector{Int64}:
  1
  2
@@ -1829,69 +1908,59 @@ julia> reduce(vcat, vs)
  4
  5
  6
+
+julia> ans == collect(Iterators.flatten(vs))
+true
 ```
 """
 vcat(X...) = cat(X...; dims=Val(1))
 """
     hcat(A...)
 
-Concatenate along dimension 2. To efficiently concatenate a large vector of arrays,
-use `reduce(hcat, x)`.
+Concatenate arrays or numbers horizontally. Equivalent to [`cat`](@ref)`(A...; dims=2)`,
+and to the syntax `[a b c]` or `[a;; b;; c]`.
+
+For a large vector of arrays, `reduce(hcat, A)` calls an efficient method
+when `A isa AbstractVector{<:AbstractVecOrMat}`.
+For a vector of vectors, this can also be written [`stack`](@ref)`(A)`.
+
+See also [`vcat`](@ref), [`hvcat`](@ref).
 
 # Examples
 ```jldoctest
-julia> a = [1; 2; 3; 4; 5]
-5-element Vector{Int64}:
- 1
- 2
- 3
- 4
- 5
+julia> hcat([1,2], [3,4], [5,6])
+2×3 Matrix{Int64}:
+ 1  3  5
+ 2  4  6
 
-julia> b = [6 7; 8 9; 10 11; 12 13; 14 15]
-5×2 Matrix{Int64}:
-  6   7
-  8   9
- 10  11
- 12  13
- 14  15
-
-julia> hcat(a,b)
-5×3 Matrix{Int64}:
- 1   6   7
- 2   8   9
- 3  10  11
- 4  12  13
- 5  14  15
-
-julia> c = ([1; 2; 3], [4; 5; 6])
-([1, 2, 3], [4, 5, 6])
-
-julia> hcat(c...)
-3×2 Matrix{Int64}:
- 1  4
- 2  5
- 3  6
+julia> hcat(1, 2, [30 40], [5, 6, 7]')  # accepts numbers
+1×7 Matrix{Int64}:
+ 1  2  30  40  5  6  7
 
-julia> x = Matrix(undef, 3, 0)  # x = [] would have created an Array{Any, 1}, but need an Array{Any, 2}
-3×0 Matrix{Any}
+julia> ans == [1 2 [30 40] [5, 6, 7]']  # syntax for the same operation
+true
 
-julia> hcat(x, [1; 2; 3])
-3×1 Matrix{Any}:
- 1
- 2
- 3
+julia> Float32[1 2 [30 40] [5, 6, 7]']  # syntax for supplying the eltype
+1×7 Matrix{Float32}:
+ 1.0  2.0  30.0  40.0  5.0  6.0  7.0
 
-julia> vs = [[1, 2], [3, 4], [5, 6]]
-3-element Vector{Vector{Int64}}:
- [1, 2]
- [3, 4]
- [5, 6]
+julia> ms = [zeros(2,2), [1 2; 3 4], [50 60; 70 80]];
 
-julia> reduce(hcat, vs)
-2×3 Matrix{Int64}:
- 1  3  5
- 2  4  6
+julia> reduce(hcat, ms)  # more efficient than hcat(ms...)
+2×6 Matrix{Float64}:
+ 0.0  0.0  1.0  2.0  50.0  60.0
+ 0.0  0.0  3.0  4.0  70.0  80.0
+
+julia> stack(ms) |> summary  # disagrees on a vector of matrices
+"2×2×3 Array{Float64, 3}"
+
+julia> hcat(Int[], Int[], Int[])  # empty vectors, each of size (0,)
+0×3 Matrix{Int64}
+
+julia> hcat([1.1, 9.9], Matrix(undef, 2, 0))  # hcat with empty 2×0 Matrix
+2×1 Matrix{Any}:
+ 1.1
+ 9.9
 ```
 """
 hcat(X...) = cat(X...; dims=Val(2))
@@ -1902,34 +1971,45 @@ typed_hcat(::Type{T}, X...) where T = _cat_t(Val(2), T, X...)
 """
     cat(A...; dims)
 
-Concatenate the input arrays along the specified dimensions in the iterable `dims`. For
-dimensions not in `dims`, all input arrays should have the same size, which will also be the
-size of the output array along that dimension. For dimensions in `dims`, the size of the
-output array is the sum of the sizes of the input arrays along that dimension. If `dims` is
-a single number, the different arrays are tightly stacked along that dimension. If `dims` is
-an iterable containing several dimensions, this allows one to construct block diagonal
-matrices and their higher-dimensional analogues by simultaneously increasing several
-dimensions for every new input array and putting zero blocks elsewhere. For example,
-`cat(matrices...; dims=(1,2))` builds a block diagonal matrix, i.e. a block matrix with
-`matrices[1]`, `matrices[2]`, ... as diagonal blocks and matching zero blocks away from the
-diagonal.
+Concatenate the input arrays along the dimensions specified in `dims`.
+
+Along a dimension `d in dims`, the size of the output array is `sum(size(a,d) for
+a in A)`.
+Along other dimensions, all input arrays should have the same size,
+which will also be the size of the output array along those dimensions.
 
-See also [`hcat`](@ref), [`vcat`](@ref), [`hvcat`](@ref), [`repeat`](@ref).
+If `dims` is a single number, the different arrays are tightly packed along that dimension.
+If `dims` is an iterable containing several dimensions, the positions along these dimensions
+are increased simultaneously for each input array, filling with zero elsewhere.
+This allows one to construct block-diagonal matrices as `cat(matrices...; dims=(1,2))`,
+and their higher-dimensional analogues.
+
+The special case `dims=1` is [`vcat`](@ref), and `dims=2` is [`hcat`](@ref).
+See also [`hvcat`](@ref), [`hvncat`](@ref), [`stack`](@ref), [`repeat`](@ref).
+
+The keyword also accepts `Val(dims)`.
+
+!!! compat "Julia 1.8"
+    For multiple dimensions `dims = Val(::Tuple)` was added in Julia 1.8.
 
 # Examples
 ```jldoctest
-julia> cat([1 2; 3 4], [pi, pi], fill(10, 2,3,1); dims=2)
+julia> cat([1 2; 3 4], [pi, pi], fill(10, 2,3,1); dims=2)  # same as hcat
 2×6×1 Array{Float64, 3}:
 [:, :, 1] =
  1.0  2.0  3.14159  10.0  10.0  10.0
  3.0  4.0  3.14159  10.0  10.0  10.0
 
-julia> cat(true, trues(2,2), trues(4)', dims=(1,2))
+julia> cat(true, trues(2,2), trues(4)', dims=(1,2))  # block-diagonal
 4×7 Matrix{Bool}:
  1  0  0  0  0  0  0
  0  1  1  0  0  0  0
  0  1  1  0  0  0  0
  0  0  0  1  1  1  1
+
+julia> cat(1, [2], [3;;]; dims=Val(2))
+1×3 Matrix{Int64}:
+ 1  2  3
 ```
 """
 @inline cat(A...; dims) = _cat(dims, A...)
@@ -1941,9 +2021,11 @@ julia> cat(true, trues(2,2), trues(4)', dims=(1,2))
 vcat(A::AbstractArray) = cat(A; dims=Val(1))
 vcat(A::AbstractArray, B::AbstractArray) = cat(A, B; dims=Val(1))
 vcat(A::AbstractArray...) = cat(A...; dims=Val(1))
+vcat(A::Union{AbstractArray,Number}...) = cat(A...; dims=Val(1))
 hcat(A::AbstractArray) = cat(A; dims=Val(2))
 hcat(A::AbstractArray, B::AbstractArray) = cat(A, B; dims=Val(2))
 hcat(A::AbstractArray...) = cat(A...; dims=Val(2))
+hcat(A::Union{AbstractArray,Number}...) = cat(A...; dims=Val(2))
 
 typed_vcat(T::Type, A::AbstractArray) = _cat_t(Val(1), T, A)
 typed_vcat(T::Type, A::AbstractArray, B::AbstractArray) = _cat_t(Val(1), T, A, B)
@@ -1958,7 +2040,7 @@ typed_hcat(T::Type, A::AbstractArray...) = _cat_t(Val(2), T, A...)
 hvcat_rows(rows::Tuple...) = hvcat(map(length, rows), (rows...)...)
 typed_hvcat_rows(T::Type, rows::Tuple...) = typed_hvcat(T, map(length, rows), (rows...)...)
 
-function hvcat(nbc::Integer, as...)
+function hvcat(nbc::Int, as...)
     # nbc = # of block columns
     n = length(as)
     mod(n,nbc) != 0 &&
@@ -1968,11 +2050,12 @@ function hvcat(nbc::Integer, as...)
 end
 
 """
-    hvcat(rows::Tuple{Vararg{Int}}, values...)
+    hvcat(blocks_per_row::Union{Tuple{Vararg{Int}}, Int}, values...)
 
 Horizontal and vertical concatenation in one call. This function is called for block matrix
 syntax. The first argument specifies the number of arguments to concatenate in each block
-row.
+row. If the first argument is a single integer `n`, then all block rows are assumed to have `n`
+block columns.
 
 # Examples
 ```jldoctest
@@ -2000,13 +2083,12 @@ julia> hvcat((2,2,2), a,b,c,d,e,f)
  1  2
  3  4
  5  6
+julia> hvcat((2,2,2), a,b,c,d,e,f) == hvcat(2, a,b,c,d,e,f)
+true
 ```
-
-If the first argument is a single integer `n`, then all block rows are assumed to have `n`
-block columns.
 """
-hvcat(rows::Tuple{Vararg{Int}}, xs::AbstractVecOrMat...) = typed_hvcat(promote_eltype(xs...), rows, xs...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::AbstractVecOrMat{T}...) where {T} = typed_hvcat(T, rows, xs...)
+hvcat(rows::Tuple{Vararg{Int}}, xs::AbstractArray...) = typed_hvcat(promote_eltype(xs...), rows, xs...)
+hvcat(rows::Tuple{Vararg{Int}}, xs::AbstractArray{T}...) where {T} = typed_hvcat(T, rows, xs...)
 
 function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as::AbstractVecOrMat...) where T
     nbr = length(rows)  # number of block rows
@@ -2034,16 +2116,16 @@ function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as::AbstractVecOrMat..
             Aj = as[a+j-1]
             szj = size(Aj,2)
             if size(Aj,1) != szi
-                throw(ArgumentError("mismatched height in block row $(i) (expected $szi, got $(size(Aj,1)))"))
+                throw(DimensionMismatch("mismatched height in block row $(i) (expected $szi, got $(size(Aj,1)))"))
             end
             if c-1+szj > nc
-                throw(ArgumentError("block row $(i) has mismatched number of columns (expected $nc, got $(c-1+szj))"))
+                throw(DimensionMismatch("block row $(i) has mismatched number of columns (expected $nc, got $(c-1+szj))"))
             end
             out[r:r-1+szi, c:c-1+szj] = Aj
             c += szj
         end
         if c != nc+1
-            throw(ArgumentError("block row $(i) has mismatched number of columns (expected $nc, got $(c-1))"))
+            throw(DimensionMismatch("block row $(i) has mismatched number of columns (expected $nc, got $(c-1))"))
         end
         r += szi
         a += rows[i]
@@ -2065,7 +2147,7 @@ function hvcat(rows::Tuple{Vararg{Int}}, xs::T...) where T<:Number
     k = 1
     @inbounds for i=1:nr
         if nc != rows[i]
-            throw(ArgumentError("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
+            throw(DimensionMismatch("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
         end
         for j=1:nc
             a[i,j] = xs[k]
@@ -2093,13 +2175,15 @@ end
 
 hvcat(rows::Tuple{Vararg{Int}}, xs::Number...) = typed_hvcat(promote_typeof(xs...), rows, xs...)
 hvcat(rows::Tuple{Vararg{Int}}, xs...) = typed_hvcat(promote_eltypeof(xs...), rows, xs...)
+# the following method is needed to provide a more specific one compared to LinearAlgebra/uniformscaling.jl
+hvcat(rows::Tuple{Vararg{Int}}, xs::Union{AbstractArray,Number}...) = typed_hvcat(promote_eltypeof(xs...), rows, xs...)
 
 function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, xs::Number...) where T
     nr = length(rows)
     nc = rows[1]
     for i = 2:nr
         if nc != rows[i]
-            throw(ArgumentError("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
+            throw(DimensionMismatch("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
         end
     end
     hvcat_fill!(Matrix{T}(undef, nr, nc), xs)
@@ -2183,14 +2267,13 @@ julia> hvncat(((3, 3), (3, 3), (6,)), true, a, b, c, d, e, f)
  4  5  6
 ```
 
-
-# Examples for construction of the arguments:
-```julia
+# Examples for construction of the arguments
+```
 [a b c ; d e f ;;;
  g h i ; j k l ;;;
  m n o ; p q r ;;;
  s t u ; v w x]
-=> dims = (2, 3, 4)
+⇒ dims = (2, 3, 4)
 
 [a b ; c ;;; d ;;;;]
  ___   _     _
@@ -2201,7 +2284,7 @@ julia> hvncat(((3, 3), (3, 3), (6,)), true, a, b, c, d, e, f)
  4             = elements in each 3d slice (4,)
  _____________
  4             = elements in each 4d slice (4,)
- => shape = ((2, 1, 1), (3, 1), (4,), (4,)) with `rowfirst` = true
+⇒ shape = ((2, 1, 1), (3, 1), (4,), (4,)) with `row_first` = true
 ```
 """
 hvncat(dimsshape::Tuple, row_first::Bool, xs...) = _hvncat(dimsshape, row_first, xs...)
@@ -2231,17 +2314,18 @@ _typed_hvncat(::Type, ::Val{0}, ::AbstractArray...) = _typed_hvncat_0d_only_one(
 _typed_hvncat_0d_only_one() =
     throw(ArgumentError("a 0-dimensional array may only contain exactly one element"))
 
-_typed_hvncat(T::Type, dim::Int, ::Bool, xs...) = _typed_hvncat(T, Val(dim), xs...) # catches from _hvncat type promoters
+# `@constprop :aggressive` here to form constant `Val(dim)` type to get type stability
+@constprop :aggressive _typed_hvncat(T::Type, dim::Int, ::Bool, xs...) = _typed_hvncat(T, Val(dim), xs...) # catches from _hvncat type promoters
 
 function _typed_hvncat(::Type{T}, ::Val{N}) where {T, N}
     N < 0 &&
-        throw(ArgumentError("concatenation dimension must be nonnegative"))
+        throw(ArgumentError("concatenation dimension must be non-negative"))
     return Array{T, N}(undef, ntuple(x -> 0, Val(N)))
 end
 
 function _typed_hvncat(T::Type, ::Val{N}, xs::Number...) where N
     N < 0 &&
-        throw(ArgumentError("concatenation dimension must be nonnegative"))
+        throw(ArgumentError("concatenation dimension must be non-negative"))
     A = cat_similar(xs[1], T, (ntuple(x -> 1, Val(N - 1))..., length(xs)))
     hvncat_fill!(A, false, xs)
     return A
@@ -2253,7 +2337,7 @@ function _typed_hvncat(::Type{T}, ::Val{N}, as::AbstractArray...) where {T, N}
     length(as) > 0 ||
         throw(ArgumentError("must have at least one element"))
     N < 0 &&
-        throw(ArgumentError("concatenation dimension must be nonnegative"))
+        throw(ArgumentError("concatenation dimension must be non-negative"))
     for a ∈ as
         ndims(a) <= N || all(x -> size(a, x) == 1, (N + 1):ndims(a)) ||
             return _typed_hvncat(T, (ntuple(x -> 1, Val(N - 1))..., length(as), 1), false, as...)
@@ -2267,7 +2351,7 @@ function _typed_hvncat(::Type{T}, ::Val{N}, as::AbstractArray...) where {T, N}
         Ndim += cat_size(as[i], N)
         nd = max(nd, cat_ndims(as[i]))
         for d ∈ 1:N - 1
-            cat_size(as[1], d) == cat_size(as[i], d) || throw(ArgumentError("mismatched size along axis $d in element $i"))
+            cat_size(as[1], d) == cat_size(as[i], d) || throw(DimensionMismatch("mismatched size along axis $d in element $i"))
         end
     end
 
@@ -2286,7 +2370,7 @@ function _typed_hvncat(::Type{T}, ::Val{N}, as...) where {T, N}
     length(as) > 0 ||
         throw(ArgumentError("must have at least one element"))
     N < 0 &&
-        throw(ArgumentError("concatenation dimension must be nonnegative"))
+        throw(ArgumentError("concatenation dimension must be non-negative"))
     nd = N
     Ndim = 0
     for i ∈ eachindex(as)
@@ -2294,7 +2378,7 @@ function _typed_hvncat(::Type{T}, ::Val{N}, as...) where {T, N}
         nd = max(nd, cat_ndims(as[i]))
         for d ∈ 1:N-1
             cat_size(as[i], d) == 1 ||
-                throw(ArgumentError("all dimensions of element $i other than $N must be of length 1"))
+                throw(DimensionMismatch("all dimensions of element $i other than $N must be of length 1"))
         end
     end
 
@@ -2352,18 +2436,22 @@ function _typed_hvncat(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, xs::Num
 end
 
 function hvncat_fill!(A::Array, row_first::Bool, xs::Tuple)
+    nr, nc = size(A, 1), size(A, 2)
+    na = prod(size(A)[3:end])
+    len = length(xs)
+    nrc = nr * nc
+    if nrc * na != len
+        throw(ArgumentError("argument count $(len) does not match specified shape $(size(A))"))
+    end
     # putting these in separate functions leads to unnecessary allocations
     if row_first
-        nr, nc = size(A, 1), size(A, 2)
-        nrc = nr * nc
-        na = prod(size(A)[3:end])
         k = 1
         for d ∈ 1:na
             dd = nrc * (d - 1)
             for i ∈ 1:nr
                 Ai = dd + i
                 for j ∈ 1:nc
-                    A[Ai] = xs[k]
+                    @inbounds A[Ai] = xs[k]
                     k += 1
                     Ai += nr
                 end
@@ -2371,7 +2459,7 @@ function hvncat_fill!(A::Array, row_first::Bool, xs::Tuple)
         end
     else
         for k ∈ eachindex(xs)
-            A[k] = xs[k]
+            @inbounds A[k] = xs[k]
         end
     end
 end
@@ -2407,7 +2495,7 @@ function _typed_hvncat_dims(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, as
                 for dd ∈ 1:N
                     dd == d && continue
                     if cat_size(as[startelementi], dd) != cat_size(as[i], dd)
-                        throw(ArgumentError("incompatible shape in element $i"))
+                        throw(DimensionMismatch("incompatible shape in element $i"))
                     end
                 end
             end
@@ -2444,18 +2532,18 @@ function _typed_hvncat_dims(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, as
                     elseif currentdims[d] < outdims[d] # dimension in progress
                         break
                     else # exceeded dimension
-                        throw(ArgumentError("argument $i has too many elements along axis $d"))
+                        throw(DimensionMismatch("argument $i has too many elements along axis $d"))
                     end
                 end
             end
         elseif currentdims[d1] > outdims[d1] # exceeded dimension
-            throw(ArgumentError("argument $i has too many elements along axis $d1"))
+            throw(DimensionMismatch("argument $i has too many elements along axis $d1"))
         end
     end
 
     outlen = prod(outdims)
     elementcount == outlen ||
-        throw(ArgumentError("mismatched number of elements; expected $(outlen), got $(elementcount)"))
+        throw(DimensionMismatch("mismatched number of elements; expected $(outlen), got $(elementcount)"))
 
     # copy into final array
     A = cat_similar(as[1], T, outdims)
@@ -2516,8 +2604,8 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::
             if d == 1 || i == 1 || wasstartblock
                 currentdims[d] += dsize
             elseif dsize != cat_size(as[i - 1], ad)
-                throw(ArgumentError("argument $i has a mismatched number of elements along axis $ad; \
-                                    expected $(cat_size(as[i - 1], ad)), got $dsize"))
+                throw(DimensionMismatch("argument $i has a mismatched number of elements along axis $ad; \
+                                         expected $(cat_size(as[i - 1], ad)), got $dsize"))
             end
 
             wasstartblock = blockcounts[d] == 1 # remember for next dimension
@@ -2527,15 +2615,15 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::
                 if outdims[d] == -1
                     outdims[d] = currentdims[d]
                 elseif outdims[d] != currentdims[d]
-                    throw(ArgumentError("argument $i has a mismatched number of elements along axis $ad; \
-                                        expected $(abs(outdims[d] - (currentdims[d] - dsize))), got $dsize"))
+                    throw(DimensionMismatch("argument $i has a mismatched number of elements along axis $ad; \
+                                             expected $(abs(outdims[d] - (currentdims[d] - dsize))), got $dsize"))
                 end
                 currentdims[d] = 0
                 blockcounts[d] = 0
                 shapepos[d] += 1
                 d > 1 && (blockcounts[d - 1] == 0 ||
-                    throw(ArgumentError("shape in level $d is inconsistent; level counts must nest \
-                                        evenly into each other")))
+                    throw(DimensionMismatch("shape in level $d is inconsistent; level counts must nest \
+                                             evenly into each other")))
             end
         end
     end
@@ -2557,28 +2645,36 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::
     return A
 end
 
-function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int}, d1::Int, d2::Int, as::Tuple{Vararg}) where {T, N}
+function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int},
+                              d1::Int, d2::Int, as::Tuple) where {T, N}
+    N > 1 || throw(ArgumentError("dimensions of the destination array must be at least 2"))
+    length(scratch1) == length(scratch2) == N ||
+        throw(ArgumentError("scratch vectors must have as many elements as the destination array has dimensions"))
+    0 < d1 < 3 &&
+    0 < d2 < 3 &&
+    d1 != d2 ||
+        throw(ArgumentError("d1 and d2 must be either 1 or 2, exclusive."))
     outdims = size(A)
     offsets = scratch1
     inneroffsets = scratch2
     for a ∈ as
         if isa(a, AbstractArray)
             for ai ∈ a
-                Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
+                @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
                 A[Ai] = ai
 
-                for j ∈ 1:N
+                @inbounds for j ∈ 1:N
                     inneroffsets[j] += 1
                     inneroffsets[j] < cat_size(a, j) && break
                     inneroffsets[j] = 0
                 end
             end
         else
-            Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
+            @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
             A[Ai] = a
         end
 
-        for j ∈ (d1, d2, 3:N...)
+        @inbounds for j ∈ (d1, d2, 3:N...)
             offsets[j] += cat_size(a, j)
             offsets[j] < outdims[j] && break
             offsets[j] = 0
@@ -2599,6 +2695,236 @@ end
     Ai
 end
 
+"""
+    stack(iter; [dims])
+
+Combine a collection of arrays (or other iterable objects) of equal size
+into one larger array, by arranging them along one or more new dimensions.
+
+By default the axes of the elements are placed first,
+giving `size(result) = (size(first(iter))..., size(iter)...)`.
+This has the same order of elements as [`Iterators.flatten`](@ref)`(iter)`.
+
+With keyword `dims::Integer`, instead the `i`th element of `iter` becomes the slice
+[`selectdim`](@ref)`(result, dims, i)`, so that `size(result, dims) == length(iter)`.
+In this case `stack` reverses the action of [`eachslice`](@ref) with the same `dims`.
+
+The various [`cat`](@ref) functions also combine arrays. However, these all
+extend the arrays' existing (possibly trivial) dimensions, rather than placing
+the arrays along new dimensions.
+They also accept arrays as separate arguments, rather than a single collection.
+
+!!! compat "Julia 1.9"
+    This function requires at least Julia 1.9.
+
+# Examples
+```jldoctest
+julia> vecs = (1:2, [30, 40], Float32[500, 600]);
+
+julia> mat = stack(vecs)
+2×3 Matrix{Float32}:
+ 1.0  30.0  500.0
+ 2.0  40.0  600.0
+
+julia> mat == hcat(vecs...) == reduce(hcat, collect(vecs))
+true
+
+julia> vec(mat) == vcat(vecs...) == reduce(vcat, collect(vecs))
+true
+
+julia> stack(zip(1:4, 10:99))  # accepts any iterators of iterators
+2×4 Matrix{Int64}:
+  1   2   3   4
+ 10  11  12  13
+
+julia> vec(ans) == collect(Iterators.flatten(zip(1:4, 10:99)))
+true
+
+julia> stack(vecs; dims=1)  # unlike any cat function, 1st axis of vecs[1] is 2nd axis of result
+3×2 Matrix{Float32}:
+   1.0    2.0
+  30.0   40.0
+ 500.0  600.0
+
+julia> x = rand(3,4);
+
+julia> x == stack(eachcol(x)) == stack(eachrow(x), dims=1)  # inverse of eachslice
+true
+```
+
+Higher-dimensional examples:
+
+```jldoctest
+julia> A = rand(5, 7, 11);
+
+julia> E = eachslice(A, dims=2);  # a vector of matrices
+
+julia> (element = size(first(E)), container = size(E))
+(element = (5, 11), container = (7,))
+
+julia> stack(E) |> size
+(5, 11, 7)
+
+julia> stack(E) == stack(E; dims=3) == cat(E...; dims=3)
+true
+
+julia> A == stack(E; dims=2)
+true
+
+julia> M = (fill(10i+j, 2, 3) for i in 1:5, j in 1:7);
+
+julia> (element = size(first(M)), container = size(M))
+(element = (2, 3), container = (5, 7))
+
+julia> stack(M) |> size  # keeps all dimensions
+(2, 3, 5, 7)
+
+julia> stack(M; dims=1) |> size  # vec(container) along dims=1
+(35, 2, 3)
+
+julia> hvcat(5, M...) |> size  # hvcat puts matrices next to each other
+(14, 15)
+```
+"""
+stack(iter; dims=:) = _stack(dims, iter)
+
+"""
+    stack(f, args...; [dims])
+
+Apply a function to each element of a collection, and `stack` the result.
+Or to several collections, [`zip`](@ref)ped together.
+
+The function should return arrays (or tuples, or other iterators) all of the same size.
+These become slices of the result, each separated along `dims` (if given) or by default
+along the last dimensions.
+
+See also [`mapslices`](@ref), [`eachcol`](@ref).
+
+# Examples
+```jldoctest
+julia> stack(c -> (c, c-32), "julia")
+2×5 Matrix{Char}:
+ 'j'  'u'  'l'  'i'  'a'
+ 'J'  'U'  'L'  'I'  'A'
+
+julia> stack(eachrow([1 2 3; 4 5 6]), (10, 100); dims=1) do row, n
+         vcat(row, row .* n, row ./ n)
+       end
+2×9 Matrix{Float64}:
+ 1.0  2.0  3.0   10.0   20.0   30.0  0.1   0.2   0.3
+ 4.0  5.0  6.0  400.0  500.0  600.0  0.04  0.05  0.06
+```
+"""
+stack(f, iter; dims=:) = _stack(dims, f(x) for x in iter)
+stack(f, xs, yzs...; dims=:) = _stack(dims, f(xy...) for xy in zip(xs, yzs...))
+
+_stack(dims::Union{Integer, Colon}, iter) = _stack(dims, IteratorSize(iter), iter)
+
+_stack(dims, ::IteratorSize, iter) = _stack(dims, collect(iter))
+
+function _stack(dims, ::Union{HasShape, HasLength}, iter)
+    S = @default_eltype iter
+    T = S != Union{} ? eltype(S) : Any  # Union{} occurs for e.g. stack(1,2), postpone the error
+    if isconcretetype(T)
+        _typed_stack(dims, T, S, iter)
+    else  # Need to look inside, but shouldn't run an expensive iterator twice:
+        array = iter isa Union{Tuple, AbstractArray} ? iter : collect(iter)
+        isempty(array) && return _empty_stack(dims, T, S, iter)
+        T2 = mapreduce(eltype, promote_type, array)
+        _typed_stack(dims, T2, eltype(array), array)
+    end
+end
+
+function _typed_stack(::Colon, ::Type{T}, ::Type{S}, A, Aax=_iterator_axes(A)) where {T, S}
+    xit = iterate(A)
+    nothing === xit && return _empty_stack(:, T, S, A)
+    x1, _ = xit
+    ax1 = _iterator_axes(x1)
+    B = similar(_ensure_array(x1), T, ax1..., Aax...)
+    off = firstindex(B)
+    len = length(x1)
+    while xit !== nothing
+        x, state = xit
+        _stack_size_check(x, ax1)
+        copyto!(B, off, x)
+        off += len
+        xit = iterate(A, state)
+    end
+    B
+end
+
+_iterator_axes(x) = _iterator_axes(x, IteratorSize(x))
+_iterator_axes(x, ::HasLength) = (OneTo(length(x)),)
+_iterator_axes(x, ::IteratorSize) = axes(x)
+
+# For some dims values, stack(A; dims) == stack(vec(A)), and the : path will be faster
+_typed_stack(dims::Integer, ::Type{T}, ::Type{S}, A) where {T,S} =
+    _typed_stack(dims, T, S, IteratorSize(S), A)
+_typed_stack(dims::Integer, ::Type{T}, ::Type{S}, ::HasLength, A) where {T,S} =
+    _typed_stack(dims, T, S, HasShape{1}(), A)
+function _typed_stack(dims::Integer, ::Type{T}, ::Type{S}, ::HasShape{N}, A) where {T,S,N}
+    if dims == N+1
+        _typed_stack(:, T, S, A, (_vec_axis(A),))
+    else
+        _dim_stack(dims, T, S, A)
+    end
+end
+_typed_stack(dims::Integer, ::Type{T}, ::Type{S}, ::IteratorSize, A) where {T,S} =
+    _dim_stack(dims, T, S, A)
+
+_vec_axis(A, ax=_iterator_axes(A)) = length(ax) == 1 ? only(ax) : OneTo(prod(length, ax; init=1))
+
+@constprop :aggressive function _dim_stack(dims::Integer, ::Type{T}, ::Type{S}, A) where {T,S}
+    xit = Iterators.peel(A)
+    nothing === xit && return _empty_stack(dims, T, S, A)
+    x1, xrest = xit
+    ax1 = _iterator_axes(x1)
+    N1 = length(ax1)+1
+    dims in 1:N1 || throw(ArgumentError(LazyString("cannot stack slices ndims(x) = ", N1-1, " along dims = ", dims)))
+
+    newaxis = _vec_axis(A)
+    outax = ntuple(d -> d==dims ? newaxis : ax1[d - (d>dims)], N1)
+    B = similar(_ensure_array(x1), T, outax...)
+
+    if dims == 1
+        _dim_stack!(Val(1), B, x1, xrest)
+    elseif dims == 2
+        _dim_stack!(Val(2), B, x1, xrest)
+    else
+        _dim_stack!(Val(dims), B, x1, xrest)
+    end
+    B
+end
+
+function _dim_stack!(::Val{dims}, B::AbstractArray, x1, xrest) where {dims}
+    before = ntuple(d -> Colon(), dims - 1)
+    after = ntuple(d -> Colon(), ndims(B) - dims)
+
+    i = firstindex(B, dims)
+    copyto!(view(B, before..., i, after...), x1)
+
+    for x in xrest
+        _stack_size_check(x, _iterator_axes(x1))
+        i += 1
+        @inbounds copyto!(view(B, before..., i, after...), x)
+    end
+end
+
+@inline function _stack_size_check(x, ax1::Tuple)
+    if _iterator_axes(x) != ax1
+        uax1 = map(UnitRange, ax1)
+        uaxN = map(UnitRange, axes(x))
+        throw(DimensionMismatch(
+            LazyString("stack expects uniform slices, got axes(x) == ", uaxN, " while first had ", uax1)))
+    end
+end
+
+_ensure_array(x::AbstractArray) = x
+_ensure_array(x) = 1:0  # passed to similar, makes stack's output an Array
+
+_empty_stack(_...) = throw(ArgumentError("`stack` on an empty collection is not allowed"))
+
+
 ## Reductions and accumulates ##
 
 function isequal(A::AbstractArray, B::AbstractArray)
@@ -2626,7 +2952,7 @@ end
 """
     isless(A::AbstractVector, B::AbstractVector)
 
-Returns true when `A` is less than `B` in lexicographic order.
+Return `true` when `A` is less than `B` in lexicographic order.
 """
 isless(A::AbstractVector, B::AbstractVector) = cmp(A, B) < 0
 
@@ -2808,7 +3134,7 @@ concatenated along the remaining dimensions.
 For example, if `dims = [1,2]` and `A` is 4-dimensional, then `f` is called on `x = A[:,:,i,j]`
 for all `i` and `j`, and `f(x)` becomes `R[:,:,i,j]` in the result `R`.
 
-See also [`eachcol`](@ref), [`eachslice`](@ref), [`mapreduce`](@ref).
+See also [`eachcol`](@ref) or [`eachslice`](@ref), used with [`map`](@ref) or [`stack`](@ref).
 
 # Examples
 ```jldoctest
@@ -2828,7 +3154,7 @@ julia> A = reshape(1:30,(2,5,3))
 
 julia> f(x::Matrix) = fill(x[1,1], 1,4);  # returns a 1×4 matrix
 
-julia> mapslices(f, A, dims=(1,2))
+julia> B = mapslices(f, A, dims=(1,2))
 1×4×3 Array{$Int, 3}:
 [:, :, 1] =
  1  1  1  1
@@ -2839,6 +3165,11 @@ julia> mapslices(f, A, dims=(1,2))
 [:, :, 3] =
  21  21  21  21
 
+julia> f2(x::AbstractMatrix) = fill(x[1,1], 1,4);
+
+julia> B == stack(f2, eachslice(A, dims=3))
+true
+
 julia> g(x) = x[begin] // x[end-1];  # returns a number
 
 julia> mapslices(g, A, dims=[1,3])
@@ -2982,7 +3313,7 @@ mapany(f, itr) = Any[f(x) for x in itr]
     map(f, c...) -> collection
 
 Transform collection `c` by applying `f` to each element. For multiple collection arguments,
-apply `f` elementwise, and stop when when any of them is exhausted.
+apply `f` elementwise, and stop when any of them is exhausted.
 
 See also [`map!`](@ref), [`foreach`](@ref), [`mapreduce`](@ref), [`mapslices`](@ref), [`zip`](@ref), [`Iterators.map`](@ref).
 
@@ -3041,6 +3372,8 @@ end
 Like [`map`](@ref), but stores the result in `destination` rather than a new
 collection. `destination` must be at least as large as the smallest collection.
 
+$(_DOCS_ALIASING_WARNING)
+
 See also: [`map`](@ref), [`foreach`](@ref), [`zip`](@ref), [`copyto!`](@ref).
 
 # Examples
@@ -3230,8 +3563,9 @@ function circshift!(a::AbstractVector, shift::Integer)
     n == 0 && return
     shift = mod(shift, n)
     shift == 0 && return
-    reverse!(a, 1, shift)
-    reverse!(a, shift+1, length(a))
+    l = lastindex(a)
+    reverse!(a, firstindex(a), l-shift)
+    reverse!(a, l-shift+1, lastindex(a))
     reverse!(a)
     return a
 end
diff --git a/base/abstractdict.jl b/base/abstractdict.jl
index 7f1d8b4a1c504..39898ae71970f 100644
--- a/base/abstractdict.jl
+++ b/base/abstractdict.jl
@@ -12,6 +12,8 @@ struct KeyError <: Exception
     key
 end
 
+KeyTypeError(K, key) = TypeError(:var"dict key", K, key)
+
 const secret_table_token = :__c782dbf1cf4d6a2e5e3865d7e95634f2e09b5902__
 
 haskey(d::AbstractDict, k) = in(k, keys(d))
@@ -218,7 +220,7 @@ Dict{Int64, Int64} with 3 entries:
 function merge!(d::AbstractDict, others::AbstractDict...)
     for other in others
         if haslength(d) && haslength(other)
-            sizehint!(d, length(d) + length(other))
+            sizehint!(d, length(d) + length(other); shrink = false)
         end
         for (k,v) in other
             d[k] = v
@@ -536,12 +538,12 @@ function hash(a::AbstractDict, h::UInt)
     hash(hv, h)
 end
 
-function getindex(t::AbstractDict, key)
+function getindex(t::AbstractDict{<:Any,V}, key) where V
     v = get(t, key, secret_table_token)
     if v === secret_table_token
         throw(KeyError(key))
     end
-    return v
+    return v::V
 end
 
 # t[k1,k2,ks...] is syntactic sugar for t[(k1,k2,ks...)].  (Note
@@ -551,21 +553,21 @@ setindex!(t::AbstractDict, v, k1, k2, ks...) = setindex!(t, v, tuple(k1,k2,ks...
 
 get!(t::AbstractDict, key, default) = get!(() -> default, t, key)
 function get!(default::Callable, t::AbstractDict{K,V}, key) where K where V
-    haskey(t, key) && return t[key]
-    val = default()
-    t[key] = val
-    return val
+    key = convert(K, key)
+    if haskey(t, key)
+        return t[key]
+    else
+        return t[key] = convert(V, default())
+    end
 end
 
 push!(t::AbstractDict, p::Pair) = setindex!(t, p.second, p.first)
-push!(t::AbstractDict, p::Pair, q::Pair) = push!(push!(t, p), q)
-push!(t::AbstractDict, p::Pair, q::Pair, r::Pair...) = push!(push!(push!(t, p), q), r...)
 
 # AbstractDicts are convertible
 convert(::Type{T}, x::T) where {T<:AbstractDict} = x
 
 function convert(::Type{T}, x::AbstractDict) where T<:AbstractDict
-    h = T(x)
+    h = T(x)::T
     if length(h) != length(x)
         error("key collision during dictionary conversion")
     end
@@ -573,7 +575,7 @@ function convert(::Type{T}, x::AbstractDict) where T<:AbstractDict
 end
 
 # hashing objects by identity
-_tablesz(x::T) where T <: Integer = x < 16 ? T(16) : one(T)<<((sizeof(T)<<3)-leading_zeros(x-one(T)))
+_tablesz(x::T) where T <: Integer = x < 16 ? T(16) : one(T)<<(top_set_bit(x-one(T)))
 
 TP{K,V} = Union{Type{Tuple{K,V}},Type{Pair{K,V}}}
 
diff --git a/base/abstractset.jl b/base/abstractset.jl
index 85d81480ab990..b38cb2799740b 100644
--- a/base/abstractset.jl
+++ b/base/abstractset.jl
@@ -65,6 +65,8 @@ const ∪ = union
 Construct the [`union`](@ref) of passed in sets and overwrite `s` with the result.
 Maintain order with arrays.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> a = Set([3, 4, 5]);
@@ -99,7 +101,7 @@ max_values(::Type{Bool}) = 2
 max_values(::Type{Nothing}) = 1
 
 function union!(s::AbstractSet{T}, itr) where T
-    haslength(itr) && sizehint!(s, length(s) + Int(length(itr))::Int)
+    haslength(itr) && sizehint!(s, length(s) + Int(length(itr))::Int; shrink = false)
     for x in itr
         push!(s, x)
         length(s) == max_values(T) && break
@@ -182,6 +184,8 @@ const ∩ = intersect
 
 Intersect all passed in sets and overwrite `s` with the result.
 Maintain order with arrays.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function intersect!(s::AbstractSet, itrs...)
     for x in itrs
@@ -218,6 +222,8 @@ setdiff(s) = union(s)
 Remove from set `s` (in-place) each element of each iterable from `itrs`.
 Maintain order with arrays.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> a = Set([1, 3, 4, 5]);
@@ -248,7 +254,6 @@ end
 
 Construct the symmetric difference of elements in the passed in sets.
 When `s` is not an `AbstractSet`, the order is maintained.
-Note that in this case the multiplicity of elements matters.
 
 See also [`symdiff!`](@ref), [`setdiff`](@ref), [`union`](@ref) and [`intersect`](@ref).
 
@@ -261,11 +266,6 @@ julia> symdiff([1,2,3], [3,4,5], [4,5,6])
  6
 
 julia> symdiff([1,2,1], [2, 1, 2])
-2-element Vector{Int64}:
- 1
- 2
-
-julia> symdiff(unique([1,2,1]), unique([2, 1, 2]))
 Int64[]
 ```
 """
@@ -278,6 +278,8 @@ symdiff(s) = symdiff!(copy(s))
 Construct the symmetric difference of the passed in sets, and overwrite `s` with the result.
 When `s` is an array, the order is maintained.
 Note that in this case the multiplicity of elements matters.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function symdiff!(s::AbstractSet, itrs...)
     for x in itrs
@@ -286,7 +288,9 @@ function symdiff!(s::AbstractSet, itrs...)
     return s
 end
 
-function symdiff!(s::AbstractSet, itr)
+symdiff!(s::AbstractSet, itr) = symdiff!(s::AbstractSet, Set(itr))
+
+function symdiff!(s::AbstractSet, itr::AbstractSet)
     for x in itr
         x in s ? delete!(s, x) : push!(s, x)
     end
@@ -342,13 +346,17 @@ function issubset(a, b)
 end
 
 """
-    hasfastin(T)
+    Base.hasfastin(T)
 
 Determine whether the computation `x ∈ collection` where `collection::T` can be considered
 as a "fast" operation (typically constant or logarithmic complexity).
 The definition `hasfastin(x) = hasfastin(typeof(x))` is provided for convenience so that instances
 can be passed instead of types.
 However the form that accepts a type argument should be defined for new types.
+
+The default for `hasfastin(T)` is `true` for subtypes of
+[`AbstractSet`](@ref), [`AbstractDict`](@ref) and [`AbstractRange`](@ref)
+and `false` otherwise.
 """
 hasfastin(::Type) = false
 hasfastin(::Union{Type{<:AbstractSet},Type{<:AbstractDict},Type{<:AbstractRange}}) = true
@@ -356,6 +364,31 @@ hasfastin(x) = hasfastin(typeof(x))
 
 ⊇(a, b) = b ⊆ a
 
+"""
+    issubset(x)
+
+Create a function that compares its argument to `x` using [`issubset`](@ref), i.e.
+a function equivalent to `y -> issubset(y, x)`.
+The returned function is of type `Base.Fix2{typeof(issubset)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+issubset(a) = Fix2(issubset, a)
+
+"""
+    ⊇(x)
+
+Create a function that compares its argument to `x` using [`⊇`](@ref), i.e.
+a function equivalent to `y -> y ⊇ x`.
+The returned function is of type `Base.Fix2{typeof(⊇)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊇(a) = Fix2(⊇, a)
 ## strict subset comparison
 
 function ⊊ end
@@ -385,6 +418,31 @@ false
 ⊊(a, b) = Set(a) ⊊ Set(b)
 ⊋(a, b) = b ⊊ a
 
+"""
+    ⊋(x)
+
+Create a function that compares its argument to `x` using [`⊋`](@ref), i.e.
+a function equivalent to `y -> y ⊋ x`.
+The returned function is of type `Base.Fix2{typeof(⊋)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊋(a) = Fix2(⊋, a)
+"""
+    ⊊(x)
+
+Create a function that compares its argument to `x` using [`⊊`](@ref), i.e.
+a function equivalent to `y -> y ⊊ x`.
+The returned function is of type `Base.Fix2{typeof(⊊)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊊(a) = Fix2(⊊, a)
+
 function ⊈ end
 function ⊉ end
 """
@@ -409,6 +467,32 @@ false
 ⊈(a, b) = !⊆(a, b)
 ⊉(a, b) = b ⊈ a
 
+"""
+    ⊉(x)
+
+Create a function that compares its argument to `x` using [`⊉`](@ref), i.e.
+a function equivalent to `y -> y ⊉ x`.
+The returned function is of type `Base.Fix2{typeof(⊉)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊉(a) = Fix2(⊉, a)
+
+"""
+    ⊈(x)
+
+Create a function that compares its argument to `x` using [`⊈`](@ref), i.e.
+a function equivalent to `y -> y ⊈ x`.
+The returned function is of type `Base.Fix2{typeof(⊈)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+⊈(a) = Fix2(⊈, a)
+
 ## set equality comparison
 
 """
@@ -445,6 +529,19 @@ function issetequal(a, b)
     return issetequal(Set(a), Set(b))
 end
 
+"""
+    issetequal(x)
+
+Create a function that compares its argument to `x` using [`issetequal`](@ref), i.e.
+a function equivalent to `y -> issetequal(y, x)`.
+The returned function is of type `Base.Fix2{typeof(issetequal)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+issetequal(a) = Fix2(issetequal, a)
+
 ## set disjoint comparison
 """
     isdisjoint(a, b) -> Bool
@@ -480,6 +577,40 @@ function isdisjoint(a, b)
     _isdisjoint(a, b)
 end
 
+function isdisjoint(a::AbstractRange{T}, b::AbstractRange{T}) where T
+    (isempty(a) || isempty(b)) && return true
+    fa, la = extrema(a)
+    fb, lb = extrema(b)
+    if (la < fb) | (lb < fa)
+        return true
+    else
+        return _overlapping_range_isdisjoint(a, b)
+    end
+end
+
+"""
+    isdisjoint(x)
+
+Create a function that compares its argument to `x` using [`isdisjoint`](@ref), i.e.
+a function equivalent to `y -> isdisjoint(y, x)`.
+The returned function is of type `Base.Fix2{typeof(isdisjoint)}`, which can be
+used to implement specialized methods.
+
+!!! compat "Julia 1.11"
+    This functionality requires at least Julia 1.11.
+"""
+isdisjoint(a) = Fix2(isdisjoint, a)
+
+_overlapping_range_isdisjoint(a::AbstractRange{T}, b::AbstractRange{T}) where T = invoke(isdisjoint, Tuple{Any,Any}, a, b)
+
+function _overlapping_range_isdisjoint(a::AbstractRange{T}, b::AbstractRange{T}) where T<:Integer
+    if abs(step(a)) == abs(step(b))
+        return mod(minimum(a), step(a)) != mod(minimum(b), step(a))
+    else
+        return invoke(isdisjoint, Tuple{Any,Any}, a, b)
+    end
+end
+
 ## partial ordering of sets by containment
 
 ==(a::AbstractSet, b::AbstractSet) = length(a) == length(b) && a ⊆ b
diff --git a/base/accumulate.jl b/base/accumulate.jl
index 663bd850695a8..a2d8a1d368d86 100644
--- a/base/accumulate.jl
+++ b/base/accumulate.jl
@@ -42,6 +42,8 @@ end
     cumsum!(B, A; dims::Integer)
 
 Cumulative sum of `A` along the dimension `dims`, storing the result in `B`. See also [`cumsum`](@ref).
+
+$(_DOCS_ALIASING_WARNING)
 """
 cumsum!(B::AbstractArray{T}, A; dims::Integer) where {T} =
     accumulate!(add_sum, B, A, dims=dims)
@@ -150,6 +152,8 @@ cumsum(itr) = accumulate(add_sum, itr)
 
 Cumulative product of `A` along the dimension `dims`, storing the result in `B`.
 See also [`cumprod`](@ref).
+
+$(_DOCS_ALIASING_WARNING)
 """
 cumprod!(B::AbstractArray{T}, A; dims::Integer) where {T} =
     accumulate!(mul_prod, B, A, dims=dims)
@@ -159,6 +163,8 @@ cumprod!(B::AbstractArray{T}, A; dims::Integer) where {T} =
 
 Cumulative product of a vector `x`, storing the result in `y`.
 See also [`cumprod`](@ref).
+
+$(_DOCS_ALIASING_WARNING)
 """
 cumprod!(y::AbstractVector, x::AbstractVector) = cumprod!(y, x, dims=1)
 
@@ -280,7 +286,7 @@ function accumulate(op, A; dims::Union{Nothing,Integer}=nothing, kw...)
     elseif keys(nt) === (:init,)
         out = similar(A, promote_op(op, typeof(nt.init), eltype(A)))
     else
-        throw(ArgumentError("acccumulate does not support the keyword arguments $(setdiff(keys(nt), (:init,)))"))
+        throw(ArgumentError("accumulate does not support the keyword arguments $(setdiff(keys(nt), (:init,)))"))
     end
     accumulate!(op, out, A; dims=dims, kw...)
 end
@@ -301,6 +307,8 @@ Cumulative operation `op` on `A` along the dimension `dims`, storing the result
 Providing `dims` is optional for vectors.  If the keyword argument `init` is given, its
 value is used to instantiate the accumulation.
 
+$(_DOCS_ALIASING_WARNING)
+
 See also [`accumulate`](@ref), [`cumsum!`](@ref), [`cumprod!`](@ref).
 
 # Examples
@@ -341,7 +349,7 @@ function accumulate!(op, B, A; dims::Union{Integer, Nothing} = nothing, kw...)
     elseif keys(kw) === (:init,)
         _accumulate!(op, B, A, dims, Some(nt.init))
     else
-        throw(ArgumentError("acccumulate! does not support the keyword arguments $(setdiff(keys(nt), (:init,)))"))
+        throw(ArgumentError("accumulate! does not support the keyword arguments $(setdiff(keys(nt), (:init,)))"))
     end
 end
 
diff --git a/base/array.jl b/base/array.jl
index a572ee5c305e7..beb5246a0cbda 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -9,7 +9,7 @@ The objects called do not have matching dimensionality. Optional argument `msg`
 descriptive error string.
 """
 struct DimensionMismatch <: Exception
-    msg::String
+    msg::AbstractString
 end
 DimensionMismatch() = DimensionMismatch("")
 
@@ -36,7 +36,7 @@ const AbstractMatrix{T} = AbstractArray{T,2}
 Union type of [`AbstractVector{T}`](@ref) and [`AbstractMatrix{T}`](@ref).
 """
 const AbstractVecOrMat{T} = Union{AbstractVector{T}, AbstractMatrix{T}}
-const RangeIndex = Union{Int, AbstractRange{Int}, AbstractUnitRange{Int}}
+const RangeIndex = Union{<:BitInteger, AbstractRange{<:BitInteger}}
 const DimOrInd = Union{Integer, AbstractUnitRange}
 const IntOrInd = Union{Int, AbstractUnitRange}
 const DimsOrInds{N} = NTuple{N,DimOrInd}
@@ -120,10 +120,50 @@ const DenseVecOrMat{T} = Union{DenseVector{T}, DenseMatrix{T}}
 
 ## Basic functions ##
 
-using Core: arraysize, arrayset, const_arrayref
+"""
+    @_safeindex
+
+This internal macro converts:
+- `getindex(xs::Tuple, i::Int)` -> `__safe_getindex(xs, i)`
+- `setindex!(xs::Vector{T}, x, i::Int)` -> `__safe_setindex!(xs, x, i)`
+to tell the compiler that indexing operations within the applied expression are always
+inbounds and do not need to taint `:consistent` and `:nothrow`.
+"""
+macro _safeindex(ex)
+    return esc(_safeindex(ex))
+end
+function _safeindex(ex)
+    isa(ex, Expr) || return ex
+    if ex.head === :(=)
+        lhs = ex.args[1]
+        if isa(lhs, Expr) && lhs.head === :ref # xs[i] = x
+            rhs = ex.args[2]
+            xs = lhs.args[1]
+            args = Vector{Any}(undef, length(lhs.args)-1)
+            for i = 2:length(lhs.args)
+                args[i-1] = _safeindex(lhs.args[i])
+            end
+            return Expr(:call, GlobalRef(@__MODULE__, :__safe_setindex!), xs, _safeindex(rhs), args...)
+        end
+    elseif ex.head === :ref # xs[i]
+        return Expr(:call, GlobalRef(@__MODULE__, :__safe_getindex), ex.args...)
+    end
+    args = Vector{Any}(undef, length(ex.args))
+    for i = 1:length(ex.args)
+        args[i] = _safeindex(ex.args[i])
+    end
+    return Expr(ex.head, args...)
+end
 
 vect() = Vector{Any}()
-vect(X::T...) where {T} = T[ X[i] for i = 1:length(X) ]
+function vect(X::T...) where T
+    @_terminates_locally_meta
+    vec = Vector{T}(undef, length(X))
+    @_safeindex for i = 1:length(X)
+        vec[i] = X[i]
+    end
+    return vec
+end
 
 """
     vect(X...)
@@ -145,14 +185,17 @@ function vect(X...)
     return T[X...]
 end
 
-size(a::Array, d::Integer) = arraysize(a, convert(Int, d))
-size(a::Vector) = (arraysize(a,1),)
-size(a::Matrix) = (arraysize(a,1), arraysize(a,2))
-size(a::Array{<:Any,N}) where {N} = (@inline; ntuple(M -> size(a, M), Val(N))::Dims)
+size(a::Array, d::Integer) = size(a, Int(d)::Int)
+function size(a::Array, d::Int)
+    d < 1 && error("arraysize: dimension out of range")
+    sz = getfield(a, :size)
+    return d > length(sz) ? 1 : getfield(sz, d, false) # @inbounds
+end
+size(a::Array) = getfield(a, :size)
 
-asize_from(a::Array, n) = n > ndims(a) ? () : (arraysize(a,n), asize_from(a, n+1)...)
+asize_from(a::Array, n) = n > ndims(a) ? () : (size(a,n), asize_from(a, n+1)...)
 
-allocatedinline(T::Type) = (@_total_meta; ccall(:jl_stored_inline, Cint, (Any,), T) != Cint(0))
+allocatedinline(@nospecialize T::Type) = (@_total_meta; ccall(:jl_stored_inline, Cint, (Any,), T) != Cint(0))
 
 """
     Base.isbitsunion(::Type{T})
@@ -168,58 +211,45 @@ julia> Base.isbitsunion(Union{Float64, String})
 false
 ```
 """
-isbitsunion(u::Union) = allocatedinline(u)
-isbitsunion(x) = false
+isbitsunion(u::Type) = u isa Union && allocatedinline(u)
 
-function _unsetindex!(A::Array{T}, i::Int) where {T}
+function _unsetindex!(A::Array, i::Int)
     @inline
     @boundscheck checkbounds(A, i)
-    t = @_gc_preserve_begin A
-    p = Ptr{Ptr{Cvoid}}(pointer(A, i))
-    if !allocatedinline(T)
-        unsafe_store!(p, C_NULL)
-    elseif T isa DataType
-        if !datatype_pointerfree(T)
-            for j = 1:(Core.sizeof(T) ÷ Core.sizeof(Ptr{Cvoid}))
-                unsafe_store!(p, C_NULL, j)
-            end
-        end
-    end
-    @_gc_preserve_end t
+    @inbounds _unsetindex!(GenericMemoryRef(A.ref, i))
     return A
 end
 
 
-"""
-    Base.bitsunionsize(U::Union) -> Int
-
-For a `Union` of [`isbitstype`](@ref) types, return the size of the largest type; assumes `Base.isbitsunion(U) == true`.
-
-# Examples
-```jldoctest
-julia> Base.bitsunionsize(Union{Float64, UInt8})
-8
-
-julia> Base.bitsunionsize(Union{Float64, UInt8, Int128})
-16
-```
-"""
-function bitsunionsize(u::Union)
-    isinline, sz, _ = uniontype_layout(u)
-    @assert isinline
-    return sz
+# TODO: deprecate this (aligned_sizeof and/or elsize and/or sizeof(Some{T}) are more correct)
+elsize(::Type{A}) where {T,A<:Array{T}} = aligned_sizeof(T)
+function elsize(::Type{Ptr{T}}) where T
+    # this only must return something valid for values which satisfy is_valid_intrinsic_elptr(T),
+    # which includes Any and most concrete datatypes
+    T === Any && return sizeof(Ptr{Any})
+    T isa DataType || sizeof(Any) # throws
+    return LLT_ALIGN(Core.sizeof(T), datatype_alignment(T))
 end
+elsize(::Type{Union{}}, slurp...) = 0
 
-elsize(@nospecialize _::Type{A}) where {T,A<:Array{T}} = aligned_sizeof(T)
-sizeof(a::Array) = Core.sizeof(a)
+sizeof(a::Array) = length(a) * elsize(typeof(a)) # n.b. this ignores bitsunion bytes, as a historical fact
 
 function isassigned(a::Array, i::Int...)
     @inline
-    ii = (_sub2ind(size(a), i...) % UInt) - 1
-    @boundscheck ii < length(a) % UInt || return false
-    ccall(:jl_array_isassigned, Cint, (Any, UInt), a, ii) == 1
+    @_noub_if_noinbounds_meta
+    @boundscheck checkbounds(Bool, a, i...) || return false
+    ii = _sub2ind(size(a), i...)
+    return @inbounds isassigned(memoryref(a.ref, ii, false))
+end
+
+function isassigned(a::Vector, i::Int) # slight compiler simplification for the most common case
+    @inline
+    @_noub_if_noinbounds_meta
+    @boundscheck checkbounds(Bool, a, i) || return false
+    return @inbounds isassigned(memoryref(a.ref, i, false))
 end
 
+
 ## copy ##
 
 """
@@ -235,107 +265,56 @@ segfault your program, in the same manner as C.
 function unsafe_copyto!(dest::Ptr{T}, src::Ptr{T}, n) where T
     # Do not use this to copy data between pointer arrays.
     # It can't be made safe no matter how carefully you checked.
-    ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
-          dest, src, n * aligned_sizeof(T))
-    return dest
-end
-
-
-function _unsafe_copyto!(dest, doffs, src, soffs, n)
-    destp = pointer(dest, doffs)
-    srcp = pointer(src, soffs)
-    @inbounds if destp < srcp || destp > srcp + n
-        for i = 1:n
-            if isassigned(src, soffs + i - 1)
-                dest[doffs + i - 1] = src[soffs + i - 1]
-            else
-                _unsetindex!(dest, doffs + i - 1)
-            end
-        end
-    else
-        for i = n:-1:1
-            if isassigned(src, soffs + i - 1)
-                dest[doffs + i - 1] = src[soffs + i - 1]
-            else
-                _unsetindex!(dest, doffs + i - 1)
-            end
-        end
-    end
+    memmove(dest, src, n * aligned_sizeof(T))
     return dest
 end
 
 """
     unsafe_copyto!(dest::Array, do, src::Array, so, N)
 
-Copy `N` elements from a source array to a destination, starting at offset `so` in the
+Copy `N` elements from a source array to a destination, starting at the linear index `so` in the
 source and `do` in the destination (1-indexed).
 
 The `unsafe` prefix on this function indicates that no validation is performed to ensure
 that N is inbounds on either array. Incorrect usage may corrupt or segfault your program, in
 the same manner as C.
 """
-function unsafe_copyto!(dest::Array{T}, doffs, src::Array{T}, soffs, n) where T
-    t1 = @_gc_preserve_begin dest
-    t2 = @_gc_preserve_begin src
-    destp = pointer(dest, doffs)
-    srcp = pointer(src, soffs)
-    if !allocatedinline(T)
-        ccall(:jl_array_ptr_copy, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int),
-              dest, destp, src, srcp, n)
-    elseif isbitstype(T)
-        ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
-              destp, srcp, n * aligned_sizeof(T))
-    elseif isbitsunion(T)
-        ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
-              destp, srcp, n * aligned_sizeof(T))
-        # copy selector bytes
-        ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
-              ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), dest) + doffs - 1,
-              ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), src) + soffs - 1,
-              n)
-    else
-        _unsafe_copyto!(dest, doffs, src, soffs, n)
-    end
-    @_gc_preserve_end t2
-    @_gc_preserve_end t1
+function unsafe_copyto!(dest::Array, doffs, src::Array, soffs, n)
+    n == 0 && return dest
+    unsafe_copyto!(GenericMemoryRef(dest.ref, doffs), GenericMemoryRef(src.ref, soffs), n)
     return dest
 end
 
-unsafe_copyto!(dest::Array, doffs, src::Array, soffs, n) =
-    _unsafe_copyto!(dest, doffs, src, soffs, n)
-
 """
     copyto!(dest, do, src, so, N)
 
-Copy `N` elements from collection `src` starting at offset `so`, to array `dest` starting at
-offset `do`. Return `dest`.
+Copy `N` elements from collection `src` starting at the linear index `so`, to array `dest` starting at
+the index `do`. Return `dest`.
 """
-function copyto!(dest::Array, doffs::Integer, src::Array, soffs::Integer, n::Integer)
-    return _copyto_impl!(dest, doffs, src, soffs, n)
-end
+copyto!(dest::Array, doffs::Integer, src::Array, soffs::Integer, n::Integer) = _copyto_impl!(dest, doffs, src, soffs, n)
+copyto!(dest::Array, doffs::Integer, src::Memory, soffs::Integer, n::Integer) = _copyto_impl!(dest, doffs, src, soffs, n)
+copyto!(dest::Memory, doffs::Integer, src::Array, soffs::Integer, n::Integer) = _copyto_impl!(dest, doffs, src, soffs, n)
 
 # this is only needed to avoid possible ambiguities with methods added in some packages
-function copyto!(dest::Array{T}, doffs::Integer, src::Array{T}, soffs::Integer, n::Integer) where T
-    return _copyto_impl!(dest, doffs, src, soffs, n)
-end
+copyto!(dest::Array{T}, doffs::Integer, src::Array{T}, soffs::Integer, n::Integer) where {T} = _copyto_impl!(dest, doffs, src, soffs, n)
 
-function _copyto_impl!(dest::Array, doffs::Integer, src::Array, soffs::Integer, n::Integer)
+function _copyto_impl!(dest::Union{Array,Memory}, doffs::Integer, src::Union{Array,Memory}, soffs::Integer, n::Integer)
     n == 0 && return dest
-    n > 0 || _throw_argerror()
-    if soffs < 1 || doffs < 1 || soffs+n-1 > length(src) || doffs+n-1 > length(dest)
-        throw(BoundsError())
+    n > 0 || _throw_argerror("Number of elements to copy must be non-negative.")
+    @boundscheck checkbounds(dest, doffs:doffs+n-1)
+    @boundscheck checkbounds(src, soffs:soffs+n-1)
+    @inbounds let dest = GenericMemoryRef(dest isa Array ? getfield(dest, :ref) : dest, doffs)
+                  src = GenericMemoryRef(src isa Array ? getfield(src, :ref) : src, soffs)
+        unsafe_copyto!(dest, src, n)
     end
-    unsafe_copyto!(dest, doffs, src, soffs, n)
     return dest
 end
 
+
 # Outlining this because otherwise a catastrophic inference slowdown
 # occurs, see discussion in #27874.
 # It is also mitigated by using a constant string.
-function _throw_argerror()
-    @noinline
-    throw(ArgumentError("Number of elements to copy must be nonnegative."))
-end
+_throw_argerror(s) = (@noinline; throw(ArgumentError(s)))
 
 copyto!(dest::Array, src::Array) = copyto!(dest, 1, src, 1, length(src))
 
@@ -345,7 +324,7 @@ copyto!(dest::Array{T}, src::Array{T}) where {T} = copyto!(dest, 1, src, 1, leng
 # N.B: The generic definition in multidimensional.jl covers, this, this is just here
 # for bootstrapping purposes.
 function fill!(dest::Array{T}, x) where T
-    xT = convert(T, x)
+    xT = x isa T ? x : convert(T, x)::T
     for i in eachindex(dest)
         @inbounds dest[i] = xT
     end
@@ -359,11 +338,15 @@ Create a shallow copy of `x`: the outer structure is copied, but not all interna
 For example, copying an array produces a new array with identically-same elements as the
 original.
 
-See also [`copy!`](@ref Base.copy!), [`copyto!`](@ref).
+See also [`copy!`](@ref Base.copy!), [`copyto!`](@ref), [`deepcopy`](@ref).
 """
 copy
 
-copy(a::T) where {T<:Array} = ccall(:jl_array_copy, Ref{T}, (Any,), a)
+@eval function copy(a::Array{T}) where {T}
+    ref = a.ref
+    newmem = ccall(:jl_genericmemory_copy_slice, Ref{Memory{T}}, (Any, Ptr{Cvoid}, Int), ref.mem, ref.ptr_or_offset, length(a))
+    return $(Expr(:new, :(typeof(a)), :(Core.memoryref(newmem)), :(a.size)))
+end
 
 ## Constructors ##
 
@@ -398,9 +381,11 @@ julia> getindex(Int8, 1, 2, 3)
 ```
 """
 function getindex(::Type{T}, vals...) where T
+    @inline
+    @_effect_free_terminates_locally_meta
     a = Vector{T}(undef, length(vals))
     if vals isa NTuple
-        @inbounds for i in 1:length(vals)
+        @_safeindex for i in 1:length(vals)
             a[i] = vals[i]
         end
     else
@@ -414,8 +399,9 @@ function getindex(::Type{T}, vals...) where T
 end
 
 function getindex(::Type{Any}, @nospecialize vals...)
+    @_effect_free_terminates_locally_meta
     a = Vector{Any}(undef, length(vals))
-    @inbounds for i = 1:length(vals)
+    @_safeindex for i = 1:length(vals)
         a[i] = vals[i]
     end
     return a
@@ -423,7 +409,11 @@ end
 getindex(::Type{Any}) = Vector{Any}()
 
 function fill!(a::Union{Array{UInt8}, Array{Int8}}, x::Integer)
-    ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), a, convert(eltype(a), x), length(a))
+    ref = a.ref
+    t = @_gc_preserve_begin ref
+    p = unsafe_convert(Ptr{Cvoid}, ref)
+    memset(p, x isa eltype(a) ? x : convert(eltype(a), x), length(a))
+    @_gc_preserve_end t
     return a
 end
 
@@ -611,8 +601,7 @@ oneunit(x::AbstractMatrix{T}) where {T} = _one(oneunit(T), x)
 
 ## Conversions ##
 
-convert(::Type{T}, a::AbstractArray) where {T<:Array} = a isa T ? a : T(a)
-convert(::Type{Union{}}, a::AbstractArray) = throw(MethodError(convert, (Union{}, a)))
+convert(::Type{T}, a::AbstractArray) where {T<:Array} = a isa T ? a : T(a)::T
 
 promote_rule(a::Type{Array{T,n}}, b::Type{Array{S,n}}) where {T,n,S} = el_same(promote_type(T,S), a, b)
 
@@ -680,7 +669,7 @@ _array_for(::Type{T}, itr, isz) where {T} = _array_for(T, isz, _similar_shape(it
     collect(collection)
 
 Return an `Array` of all items in a collection or iterator. For dictionaries, returns
-`Pair{KeyType, ValType}`. If the argument is array-like or is an iterator with the
+`Vector{Pair{KeyType, ValType}}`. If the argument is array-like or is an iterator with the
 [`HasShape`](@ref IteratorSize) trait, the result will have the same shape
 and number of dimensions as the argument.
 
@@ -892,7 +881,7 @@ end
 
 ## Iteration ##
 
-iterate(A::Array, i=1) = (@inline; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
+iterate(A::Array, i=1) = (@inline; (i - 1)%UInt < length(A)%UInt ? (@inbounds A[i], i + 1) : nothing)
 
 ## Indexing: getindex ##
 
@@ -917,6 +906,12 @@ julia> getindex(A, "a")
 """
 function getindex end
 
+function getindex(A::Array, i1::Int, i2::Int, I::Int...)
+    @inline
+    @boundscheck checkbounds(A, i1, i2, I...) # generally _to_linear_index requires bounds checking
+    return @inbounds A[_to_linear_index(A, i1, i2, I...)]
+end
+
 # Faster contiguous indexing using copyto! for AbstractUnitRange and Colon
 function getindex(A::Array, I::AbstractUnitRange{<:Integer})
     @inline
@@ -953,12 +948,41 @@ end
 
 Store the given value at the given key or index within a collection. The syntax `a[i,j,...] =
 x` is converted by the compiler to `(setindex!(a, x, i, j, ...); x)`.
+
+# Examples
+```jldoctest
+julia> a = Dict("a"=>1)
+Dict{String, Int64} with 1 entry:
+  "a" => 1
+
+julia> setindex!(a, 2, "b")
+Dict{String, Int64} with 2 entries:
+  "b" => 2
+  "a" => 1
+```
 """
 function setindex! end
 
-@eval setindex!(A::Array{T}, x, i1::Int) where {T} = arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1)
-@eval setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T} =
-    (@inline; arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1, i2, I...))
+function setindex!(A::Array{T}, x, i::Int) where {T}
+    @_noub_if_noinbounds_meta
+    @boundscheck (i - 1)%UInt < length(A)%UInt || throw_boundserror(A, (i,))
+    memoryrefset!(memoryref(A.ref, i, false), x isa T ? x : convert(T,x)::T, :not_atomic, false)
+    return A
+end
+function setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T}
+    @inline
+    @_noub_if_noinbounds_meta
+    @boundscheck checkbounds(A, i1, i2, I...) # generally _to_linear_index requires bounds checking
+    memoryrefset!(memoryref(A.ref, _to_linear_index(A, i1, i2, I...), false), x isa T ? x : convert(T,x)::T, :not_atomic, false)
+    return A
+end
+
+__safe_setindex!(A::Vector{Any}, @nospecialize(x), i::Int) = (@inline; @_nothrow_noub_meta;
+    memoryrefset!(memoryref(A.ref, i, false), x, :not_atomic, false); return A)
+__safe_setindex!(A::Vector{T}, x::T, i::Int) where {T} = (@inline; @_nothrow_noub_meta;
+    memoryrefset!(memoryref(A.ref, i, false), x, :not_atomic, false); return A)
+__safe_setindex!(A::Vector{T}, x,    i::Int) where {T} = (@inline;
+    __safe_setindex!(A, convert(T, x)::T, i))
 
 # This is redundant with the abstract fallbacks but needed and helpful for bootstrap
 function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int})
@@ -997,24 +1021,184 @@ function setindex!(A::Array{T}, X::Array{T}, c::Colon) where T
     return A
 end
 
-# efficiently grow an array
-
-_growbeg!(a::Vector, delta::Integer) =
-    ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), a, delta)
-_growend!(a::Vector, delta::Integer) =
-    ccall(:jl_array_grow_end, Cvoid, (Any, UInt), a, delta)
-_growat!(a::Vector, i::Integer, delta::Integer) =
-    ccall(:jl_array_grow_at, Cvoid, (Any, Int, UInt), a, i - 1, delta)
+# Pick new memory size for efficiently growing an array
+# TODO: This should know about the size of our GC pools
+# Specifically we are wasting ~10% of memory for small arrays
+# by not picking memory sizes that max out a GC pool
+function overallocation(maxsize)
+    maxsize < 8 && return 8;
+    # compute maxsize = maxsize + 4*maxsize^(7/8) + maxsize/8
+    # for small n, we grow faster than O(n)
+    # for large n, we grow at O(n/8)
+    # and as we reach O(memory) for memory>>1MB,
+    # this means we end by adding about 10% of memory each time
+    exp2 = sizeof(maxsize) * 8 - Core.Intrinsics.ctlz_int(maxsize)
+    maxsize += (1 << div(exp2 * 7, 8)) * 4 + div(maxsize, 8)
+    return maxsize
+end
+
+array_new_memory(mem::Memory, newlen::Int) = typeof(mem)(undef, newlen) # when implemented, this should attempt to first expand mem
+
+function _growbeg!(a::Vector, delta::Integer)
+    delta = Int(delta)
+    delta == 0 && return # avoid attempting to index off the end
+    delta >= 0 || throw(ArgumentError("grow requires delta >= 0"))
+    ref = a.ref
+    mem = ref.mem
+    len = length(a)
+    offset = memoryrefoffset(ref)
+    newlen = len + delta
+    setfield!(a, :size, (newlen,))
+    # if offset is far enough advanced to fit data in existing memory without copying
+    if delta <= offset - 1
+        setfield!(a, :ref, @inbounds GenericMemoryRef(ref, 1 - delta))
+    else
+        @noinline (function()
+        memlen = length(mem)
+        # since we will allocate the array in the middle of the memory we need at least 2*delta extra space
+        # the +1 is because I didn't want to have an off by 1 error.
+        newmemlen = max(overallocation(memlen), len + 2 * delta + 1)
+        newoffset = div(newmemlen - newlen, 2) + 1
+        # If there is extra data after the end of the array we can use that space so long as there is enough
+        # space at the end that there won't be quadratic behavior with a mix of growth from both ends.
+        # Specifically, we want to ensure that we will only do this operation once before
+        # increasing the size of the array, and that we leave enough space at both the beginning and the end.
+        if newoffset + newlen < memlen
+            newoffset = div(memlen - newlen, 2) + 1
+            newmem = mem
+        else
+            newmem = array_new_memory(mem, newmemlen)
+        end
+        unsafe_copyto!(newmem, newoffset + delta, mem, offset, len)
+        setfield!(a, :ref, @inbounds GenericMemoryRef(newmem, newoffset))
+        end)()
+    end
+    return
+end
 
-# efficiently delete part of an array
+function _growend!(a::Vector, delta::Integer)
+    delta = Int(delta)
+    delta >= 0 || throw(ArgumentError("grow requires delta >= 0"))
+    ref = a.ref
+    mem = ref.mem
+    memlen = length(mem)
+    len = length(a)
+    newlen = len + delta
+    offset = memoryrefoffset(ref)
+    setfield!(a, :size, (newlen,))
+    newmemlen = offset + newlen - 1
+    if memlen < newmemlen
+        @noinline (function()
+        if offset - 1 > div(5 * newlen, 4)
+            # If the offset is far enough that we can copy without resizing
+            # while maintaining proportional spacing on both ends of the array
+            # note that this branch prevents infinite growth when doing combinations
+            # of push! and popfirst! (i.e. when using a Vector as a queue)
+            newmem = mem
+            newoffset = div(newlen, 8) + 1
+        else
+            # grow either by our computed overallocation factor
+            # or exactly the requested size, whichever is larger
+            # TODO we should possibly increase the offset if the current offset is nonzero.
+            newmemlen2 = max(overallocation(memlen), newmemlen)
+            newmem = array_new_memory(mem, newmemlen2)
+            newoffset = offset
+        end
+        newref = @inbounds GenericMemoryRef(newmem, newoffset)
+        unsafe_copyto!(newref, ref, len)
+        setfield!(a, :ref, newref)
+        end)()
+    end
+    return
+end
 
-_deletebeg!(a::Vector, delta::Integer) =
-    ccall(:jl_array_del_beg, Cvoid, (Any, UInt), a, delta)
-_deleteend!(a::Vector, delta::Integer) =
-    ccall(:jl_array_del_end, Cvoid, (Any, UInt), a, delta)
-_deleteat!(a::Vector, i::Integer, delta::Integer) =
-    ccall(:jl_array_del_at, Cvoid, (Any, Int, UInt), a, i - 1, delta)
+function _growat!(a::Vector, i::Integer, delta::Integer)
+    delta = Int(delta)
+    i = Int(i)
+    i == 1 && return _growbeg!(a, delta)
+    len = length(a)
+    i == len + 1 && return _growend!(a, delta)
+    delta >= 0 || throw(ArgumentError("grow requires delta >= 0"))
+    1 < i <= len || throw(BoundsError(a, i))
+    ref = a.ref
+    mem = ref.mem
+    memlen = length(mem)
+    newlen = len + delta
+    offset = memoryrefoffset(ref)
+    setfield!(a, :size, (newlen,))
+    newmemlen = offset + newlen - 1
+
+    # which side would we rather grow into?
+    prefer_start = i <= div(len, 2)
+    # if offset is far enough advanced to fit data in beginning of the memory
+    if prefer_start && delta <= offset - 1
+        newref = @inbounds GenericMemoryRef(mem, offset - delta)
+        unsafe_copyto!(newref, ref, i)
+        setfield!(a, :ref, newref)
+        for j in i:i+delta-1
+            @inbounds _unsetindex!(a, j)
+        end
+    elseif !prefer_start && memlen >= newmemlen
+        unsafe_copyto!(mem, offset - 1 + delta + i, mem, offset - 1 + i, len - i + 1)
+        for j in i:i+delta-1
+            @inbounds _unsetindex!(a, j)
+        end
+    else
+        # since we will allocate the array in the middle of the memory we need at least 2*delta extra space
+        # the +1 is because I didn't want to have an off by 1 error.
+        newmemlen = max(overallocation(memlen), len+2*delta+1)
+        newoffset = (newmemlen - newlen) ÷ 2 + 1
+        newmem = array_new_memory(mem, newmemlen)
+        newref = @inbounds GenericMemoryRef(newmem, newoffset)
+        unsafe_copyto!(newref, ref, i-1)
+        unsafe_copyto!(newmem, newoffset + delta + i - 1, mem, offset + i - 1, len - i + 1)
+        setfield!(a, :ref, newref)
+    end
+end
 
+# efficiently delete part of an array
+function _deletebeg!(a::Vector, delta::Integer)
+    delta = Int(delta)
+    len = length(a)
+    0 <= delta <= len || throw(ArgumentError("_deletebeg! requires delta in 0:length(a)"))
+    for i in 1:delta
+        @inbounds _unsetindex!(a, i)
+    end
+    newlen = len - delta
+    if newlen != 0 # if newlen==0 we could accidentally index past the memory
+        newref = @inbounds GenericMemoryRef(a.ref, delta + 1)
+        setfield!(a, :ref, newref)
+    end
+    setfield!(a, :size, (newlen,))
+    return
+end
+function _deleteend!(a::Vector, delta::Integer)
+    delta = Int(delta)
+    len = length(a)
+    0 <= delta <= len || throw(ArgumentError("_deleteend! requires delta in 0:length(a)"))
+    newlen = len - delta
+    for i in newlen+1:len
+        @inbounds _unsetindex!(a, i)
+    end
+    setfield!(a, :size, (newlen,))
+    return
+end
+function _deleteat!(a::Vector, i::Integer, delta::Integer)
+    i = Int(i)
+    len = length(a)
+    0 <= delta || throw(ArgumentError("_deleteat! requires delta >= 0"))
+    1 <= i <= len || throw(BoundsError(a, i))
+    i + delta <= len + 1 || throw(BoundsError(a, i + delta - 1))
+    newa = a
+    if 2*i + delta <= len
+        unsafe_copyto!(newa, 1 + delta, a, 1, i - 1)
+        _deletebeg!(a, delta)
+    else
+        unsafe_copyto!(newa, i, a, i + delta, len + 1 - delta - i)
+        _deleteend!(a, delta)
+    end
+    return
+end
 ## Dequeue functionality ##
 
 """
@@ -1045,26 +1229,27 @@ See also [`pushfirst!`](@ref).
 """
 function push! end
 
-function push!(a::Array{T,1}, item) where T
+function push!(a::Vector{T}, item) where T
     # convert first so we don't grow the array if the assignment won't work
-    itemT = convert(T, item)
+    itemT = item isa T ? item : convert(T, item)::T
     _growend!(a, 1)
-    @inbounds a[end] = itemT
+    @_safeindex a[length(a)] = itemT
     return a
 end
 
 # specialize and optimize the single argument case
 function push!(a::Vector{Any}, @nospecialize x)
     _growend!(a, 1)
-    arrayset(true, a, x, length(a))
+    @_safeindex a[length(a)] = x
     return a
 end
 function push!(a::Vector{Any}, @nospecialize x...)
+    @_terminates_locally_meta
     na = length(a)
     nx = length(x)
     _growend!(a, nx)
-    for i = 1:nx
-        arrayset(true, a, x[i], na+i)
+    @_safeindex for i = 1:nx
+        a[na+i] = x[i]
     end
     return a
 end
@@ -1105,11 +1290,13 @@ See [`sizehint!`](@ref) for notes about the performance model.
 See also [`vcat`](@ref) for vectors, [`union!`](@ref) for sets,
 and [`prepend!`](@ref) and [`pushfirst!`](@ref) for the opposite order.
 """
-function append!(a::Vector, items::AbstractVector)
-    itemindices = eachindex(items)
-    n = length(itemindices)
+function append! end
+
+function append!(a::Vector{T}, items::Union{AbstractVector{<:T},Tuple}) where T
+    items isa Tuple && (items = map(x -> convert(T, x), items))
+    n = length(items)
     _growend!(a, n)
-    copyto!(a, length(a)-n+1, items, first(itemindices), n)
+    copyto!(a, length(a)-n+1, items, firstindex(items), n)
     return a
 end
 
@@ -1118,17 +1305,16 @@ push!(a::AbstractVector, iter...) = append!(a, iter)
 
 append!(a::AbstractVector, iter...) = foldl(append!, iter, init=a)
 
-function _append!(a, ::Union{HasLength,HasShape}, iter)
-    n = length(a)
+function _append!(a::AbstractVector, ::Union{HasLength,HasShape}, iter)
+    n = Int(length(iter))::Int
     i = lastindex(a)
-    resize!(a, n+Int(length(iter))::Int)
-    @inbounds for (i, item) in zip(i+1:lastindex(a), iter)
-        a[i] = item
+    sizehint!(a, length(a) + n; shrink=false)
+    for item in iter
+        push!(a, item)
     end
     a
 end
-
-function _append!(a, ::IteratorSize, iter)
+function _append!(a::AbstractVector, ::IteratorSize, iter)
     for item in iter
         push!(a, item)
     end
@@ -1166,15 +1352,13 @@ julia> prepend!([6], [1, 2], [3, 4, 5])
 """
 function prepend! end
 
-function prepend!(a::Vector, items::AbstractVector)
-    itemindices = eachindex(items)
-    n = length(itemindices)
+function prepend!(a::Vector{T}, items::Union{AbstractVector{<:T},Tuple}) where T
+    items isa Tuple && (items = map(x -> convert(T, x), items))
+    n = length(items)
     _growbeg!(a, n)
-    if a === items
-        copyto!(a, 1, items, n+1, n)
-    else
-        copyto!(a, 1, items, first(itemindices), n)
-    end
+    # in case of aliasing, the _growbeg might have shifted our data, so copy
+    # just the last n elements instead of all of them from the first
+    copyto!(a, 1, items, lastindex(items)-n+1, n)
     return a
 end
 
@@ -1183,17 +1367,20 @@ pushfirst!(a::Vector, iter...) = prepend!(a, iter)
 
 prepend!(a::AbstractVector, iter...) = foldr((v, a) -> prepend!(a, v), iter, init=a)
 
-function _prepend!(a, ::Union{HasLength,HasShape}, iter)
+function _prepend!(a::Vector, ::Union{HasLength,HasShape}, iter)
+    @_terminates_locally_meta
     require_one_based_indexing(a)
-    n = length(iter)
-    _growbeg!(a, n)
-    i = 0
+    n = Int(length(iter))::Int
+    sizehint!(a, length(a) + n; first=true, shrink=false)
+    n = 0
     for item in iter
-        @inbounds a[i += 1] = item
+        n += 1
+        pushfirst!(a, item)
     end
+    reverse!(a, 1, n)
     a
 end
-function _prepend!(a, ::IteratorSize, iter)
+function _prepend!(a::Vector, ::IteratorSize, iter)
     n = 0
     for item in iter
         n += 1
@@ -1239,7 +1426,7 @@ function resize!(a::Vector, nl::Integer)
         _growend!(a, nl-l)
     elseif nl != l
         if nl < 0
-            throw(ArgumentError("new length must be ≥ 0"))
+            _throw_argerror("new length must be ≥ 0")
         end
         _deleteend!(a, l-nl)
     end
@@ -1247,9 +1434,22 @@ function resize!(a::Vector, nl::Integer)
 end
 
 """
-    sizehint!(s, n) -> s
+    sizehint!(s, n; first::Bool=false, shrink::Bool=true) -> s
+
+Suggest that collection `s` reserve capacity for at least `n` elements. That is, if
+you expect that you're going to have to push a lot of values onto `s`, you can avoid
+the cost of incremental reallocation by doing it once up front; this can improve
+performance.
+
+If `first` is `true`, then any additional space is reserved before the start of the collection.
+This way, subsequent calls to `pushfirst!` (instead of `push!`) may become faster.
+Supplying this keyword may result in an error if the collection is not ordered
+or if `pushfirst!` is not supported for this collection.
+
+If `shrink=true` (the default), the collection's capacity may be reduced if its current
+capacity is greater than `n`.
 
-Suggest that collection `s` reserve capacity for at least `n` elements. This can improve performance.
+See also [`resize!`](@ref).
 
 # Notes on the performance model
 
@@ -1263,14 +1463,54 @@ For types that support `sizehint!`,
    `Base`.
 
 3. `empty!` is nearly costless (and O(1)) for types that support this kind of preallocation.
+
+!!! compat "Julia 1.11"
+    The `shrink` and `first` arguments were added in Julia 1.11.
 """
 function sizehint! end
 
-function sizehint!(a::Vector, sz::Integer)
-    ccall(:jl_array_sizehint, Cvoid, (Any, UInt), a, sz)
+function sizehint!(a::Vector, sz::Integer; first::Bool=false, shrink::Bool=true)
+    len = length(a)
+    ref = a.ref
+    mem = ref.mem
+    memlen = length(mem)
+    sz = max(Int(sz), len)
+    inc = sz - len
+    if sz <= memlen
+        # if we don't save at least 1/8th memlen then its not worth it to shrink
+        if !shrink || memlen - sz <= div(memlen, 8)
+            return a
+        end
+        newmem = array_new_memory(mem, sz)
+        if first
+            newref = GenericMemoryRef(newmem, inc + 1)
+        else
+            newref = GenericMemoryRef(newmem)
+        end
+        unsafe_copyto!(newref, ref, len)
+        setfield!(a, :ref, newref)
+    elseif first
+        _growbeg!(a, inc)
+        newref = getfield(a, :ref)
+        newref = GenericMemoryRef(newref, inc + 1)
+        setfield!(a, :size, (len,)) # undo the size change from _growbeg!
+        setfield!(a, :ref, newref) # undo the offset change from _growbeg!
+    else # last
+        _growend!(a, inc)
+        setfield!(a, :size, (len,)) # undo the size change from _growend!
+    end
     a
 end
 
+# Fall-back implementation for non-shrinkable collections
+# avoid defining this the normal way to avoid avoid infinite recursion
+function Core.kwcall(kwargs::NamedTuple{names}, ::typeof(sizehint!), a, sz) where names
+    get(kwargs, :first, false)::Bool
+    get(kwargs, :shrink, true)::Bool
+    isempty(diff_names(names, (:first, :shrink))) || kwerr(kwargs, sizehint!, a, sz)
+    sizehint!(a, sz)
+end
+
 """
     pop!(collection) -> item
 
@@ -1314,7 +1554,7 @@ julia> pop!(Dict(1=>2))
 """
 function pop!(a::Vector)
     if isempty(a)
-        throw(ArgumentError("array must be non-empty"))
+        _throw_argerror("array must be non-empty")
     end
     item = a[end]
     _deleteend!(a, 1)
@@ -1388,24 +1628,25 @@ julia> pushfirst!([1, 2, 3, 4], 5, 6)
  4
 ```
 """
-function pushfirst!(a::Array{T,1}, item) where T
-    item = convert(T, item)
+function pushfirst!(a::Vector{T}, item) where T
+    item = item isa T ? item : convert(T, item)::T
     _growbeg!(a, 1)
-    a[1] = item
+    @_safeindex a[1] = item
     return a
 end
 
 # specialize and optimize the single argument case
 function pushfirst!(a::Vector{Any}, @nospecialize x)
     _growbeg!(a, 1)
-    a[1] = x
+    @_safeindex a[1] = x
     return a
 end
 function pushfirst!(a::Vector{Any}, @nospecialize x...)
+    @_terminates_locally_meta
     na = length(a)
     nx = length(x)
     _growbeg!(a, nx)
-    for i = 1:nx
+    @_safeindex for i = 1:nx
         a[i] = x[i]
     end
     return a
@@ -1445,7 +1686,7 @@ julia> A
 """
 function popfirst!(a::Vector)
     if isempty(a)
-        throw(ArgumentError("array must be non-empty"))
+        _throw_argerror("array must be non-empty")
     end
     item = a[1]
     _deletebeg!(a, 1)
@@ -1475,7 +1716,7 @@ julia> insert!(Any[1:6;], 3, "here")
 """
 function insert!(a::Array{T,1}, i::Integer, item) where T
     # Throw convert error before changing the shape of the array
-    _item = convert(T, item)
+    _item = item isa T ? item : convert(T, item)::T
     _growat!(a, i, 1)
     # _growat! already did bound check
     @inbounds a[i] = _item
@@ -1488,7 +1729,7 @@ end
 Remove the item at the given `i` and return the modified `a`. Subsequent items
 are shifted to fill the resulting gap.
 
-See also: [`delete!`](@ref), [`popat!`](@ref), [`splice!`](@ref).
+See also: [`keepat!`](@ref), [`delete!`](@ref), [`popat!`](@ref), [`splice!`](@ref).
 
 # Examples
 ```jldoctest
@@ -1555,17 +1796,19 @@ struct Nowhere; end
 push!(::Nowhere, _) = nothing
 _growend!(::Nowhere, _) = nothing
 
-@inline function _push_deleted!(dltd, a::Vector, ind)
-    if @inbounds isassigned(a, ind)
-        push!(dltd, @inbounds a[ind])
+function _push_deleted!(dltd, a::Vector, ind)
+    @_propagate_inbounds_meta
+    if isassigned(a, ind)
+        push!(dltd, a[ind])
     else
         _growend!(dltd, 1)
     end
 end
 
-@inline function _copy_item!(a::Vector, p, q)
-    if @inbounds isassigned(a, q)
-        @inbounds a[p] = a[q]
+function _copy_item!(a::Vector, p, q)
+    @_propagate_inbounds_meta
+    if isassigned(a, q)
+        a[p] = a[q]
     else
         _unsetindex!(a, p)
     end
@@ -1577,7 +1820,7 @@ function _deleteat!(a::Vector, inds, dltd=Nowhere())
     y === nothing && return a
     (p, s) = y
     checkbounds(a, p)
-    _push_deleted!(dltd, a, p)
+    @inbounds _push_deleted!(dltd, a, p)
     q = p+1
     while true
         y = iterate(inds, s)
@@ -1585,20 +1828,20 @@ function _deleteat!(a::Vector, inds, dltd=Nowhere())
         (i,s) = y
         if !(q <= i <= n)
             if i < q
-                throw(ArgumentError("indices must be unique and sorted"))
+                _throw_argerror("indices must be unique and sorted")
             else
                 throw(BoundsError())
             end
         end
         while q < i
-            _copy_item!(a, p, q)
+            @inbounds _copy_item!(a, p, q)
             p += 1; q += 1
         end
-        _push_deleted!(dltd, a, i)
+        @inbounds _push_deleted!(dltd, a, i)
         q = i+1
     end
     while q <= n
-        _copy_item!(a, p, q)
+        @inbounds _copy_item!(a, p, q)
         p += 1; q += 1
     end
     _deleteend!(a, n-p+1)
@@ -1611,7 +1854,7 @@ function deleteat!(a::Vector, inds::AbstractVector{Bool})
     length(inds) == n || throw(BoundsError(a, inds))
     p = 1
     for (q, i) in enumerate(inds)
-        _copy_item!(a, p, q)
+        @inbounds _copy_item!(a, p, q)
         p += !i
     end
     _deleteend!(a, n-p+1)
@@ -1677,7 +1920,7 @@ function splice!(a::Vector, i::Integer, ins=_default_splice)
     if m == 0
         _deleteat!(a, i, 1)
     elseif m == 1
-        a[i] = ins[1]
+        a[i] = only(ins)
     else
         _growat!(a, i, m-1)
         k = 1
@@ -1701,6 +1944,8 @@ place of the removed items; in this case, `indices` must be a `AbstractUnitRange
 To insert `replacement` before an index `n` without removing any items, use
 `splice!(collection, n:n-1, replacement)`.
 
+$(_DOCS_ALIASING_WARNING)
+
 !!! compat "Julia 1.5"
     Prior to Julia 1.5, `indices` must always be a `UnitRange`.
 
@@ -1759,23 +2004,56 @@ function empty!(a::Vector)
     return a
 end
 
-_memcmp(a, b, len) = ccall(:memcmp, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), a, b, len % Csize_t) % Int
-
 # use memcmp for cmp on byte arrays
 function cmp(a::Array{UInt8,1}, b::Array{UInt8,1})
-    c = _memcmp(a, b, min(length(a),length(b)))
+    aref = a.ref
+    bref = b.ref
+    ta = @_gc_preserve_begin aref
+    tb = @_gc_preserve_begin bref
+    pa = unsafe_convert(Ptr{Cvoid}, aref)
+    pb = unsafe_convert(Ptr{Cvoid}, bref)
+    c = memcmp(pa, pb, min(length(a),length(b)))
+    @_gc_preserve_end ta
+    @_gc_preserve_end tb
     return c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b))
 end
 
 const BitIntegerArray{N} = Union{map(T->Array{T,N}, BitInteger_types)...} where N
 # use memcmp for == on bit integer types
-==(a::Arr, b::Arr) where {Arr <: BitIntegerArray} =
-    size(a) == size(b) && 0 == _memcmp(a, b, sizeof(eltype(Arr)) * length(a))
+function ==(a::Arr, b::Arr) where {Arr <: BitIntegerArray}
+    if size(a) == size(b)
+        aref = a.ref
+        bref = b.ref
+        ta = @_gc_preserve_begin aref
+        tb = @_gc_preserve_begin bref
+        pa = unsafe_convert(Ptr{Cvoid}, aref)
+        pb = unsafe_convert(Ptr{Cvoid}, bref)
+        c = memcmp(pa, pb, sizeof(eltype(Arr)) * length(a))
+        @_gc_preserve_end ta
+        @_gc_preserve_end tb
+        return c == 0
+    else
+        return false
+    end
+end
 
-# this is ~20% faster than the generic implementation above for very small arrays
 function ==(a::Arr, b::Arr) where Arr <: BitIntegerArray{1}
     len = length(a)
-    len == length(b) && 0 == _memcmp(a, b, sizeof(eltype(Arr)) * len)
+    if len == length(b)
+        aref = a.ref
+        bref = b.ref
+        ta = @_gc_preserve_begin aref
+        tb = @_gc_preserve_begin bref
+        T = eltype(Arr)
+        pa = unsafe_convert(Ptr{T}, aref)
+        pb = unsafe_convert(Ptr{T}, bref)
+        c = memcmp(pa, pb, sizeof(T) * len)
+        @_gc_preserve_end ta
+        @_gc_preserve_end tb
+        return c == 0
+    else
+        return false
+    end
 end
 
 """
@@ -1841,7 +2119,7 @@ for (f,_f) in ((:reverse,:_reverse), (:reverse!,:_reverse!))
         $_f(A::AbstractVector, ::Colon) = $f(A, firstindex(A), lastindex(A))
         $_f(A::AbstractVector, dim::Tuple{Integer}) = $_f(A, first(dim))
         function $_f(A::AbstractVector, dim::Integer)
-            dim == 1 || throw(ArgumentError("invalid dimension $dim ≠ 1"))
+            dim == 1 || _throw_argerror(LazyString("invalid dimension ", dim, " ≠ 1"))
             return $_f(A, :)
         end
     end
@@ -1852,6 +2130,11 @@ function reverseind(a::AbstractVector, i::Integer)
     first(li) + last(li) - i
 end
 
+# This implementation of `midpoint` is performance-optimized but safe
+# only if `lo <= hi`.
+midpoint(lo::T, hi::T) where T<:Integer = lo + ((hi - lo) >>> 0x01)
+midpoint(lo::Integer, hi::Integer) = midpoint(promote(lo, hi)...)
+
 """
     reverse!(v [, start=firstindex(v) [, stop=lastindex(v) ]]) -> v
 
@@ -1880,22 +2163,23 @@ julia> A
 """
 function reverse!(v::AbstractVector, start::Integer, stop::Integer=lastindex(v))
     s, n = Int(start), Int(stop)
-    liv = LinearIndices(v)
-    if n <= s  # empty case; ok
-    elseif !(first(liv) ≤ s ≤ last(liv))
-        throw(BoundsError(v, s))
-    elseif !(first(liv) ≤ n ≤ last(liv))
-        throw(BoundsError(v, n))
-    end
-    r = n
-    @inbounds for i in s:div(s+n-1, 2)
-        v[i], v[r] = v[r], v[i]
-        r -= 1
+    if n > s # non-empty and non-trivial
+        liv = LinearIndices(v)
+        if !(first(liv) ≤ s ≤ last(liv))
+            throw(BoundsError(v, s))
+        elseif !(first(liv) ≤ n ≤ last(liv))
+            throw(BoundsError(v, n))
+        end
+        r = n
+        @inbounds for i in s:midpoint(s, n-1)
+            v[i], v[r] = v[r], v[i]
+            r -= 1
+        end
     end
     return v
 end
 
-# concatenations of homogeneous combinations of vectors, horizontal and vertical
+# concatenations of (in)homogeneous combinations of vectors, horizontal and vertical
 
 vcat() = Vector{Any}()
 hcat() = Vector{Any}()
@@ -1909,6 +2193,7 @@ function hcat(V::Vector{T}...) where T
     end
     return [ V[j][i]::T for i=1:length(V[1]), j=1:length(V) ]
 end
+hcat(A::Vector...) = cat(A...; dims=Val(2)) # more special than SparseArrays's hcat
 
 function vcat(arrays::Vector{T}...) where T
     n = 0
@@ -1925,6 +2210,7 @@ function vcat(arrays::Vector{T}...) where T
     end
     return arr
 end
+vcat(A::Vector...) = cat(A...; dims=Val(1)) # more special than SparseArrays's vcat
 
 _cat(n::Integer, x::Integer...) = reshape([x...], (ntuple(Returns(1), n-1)..., length(x)))
 
@@ -2096,7 +2382,7 @@ findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::AbstractUnitR
 function findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::StepRange{T,S}) where {T,S}
     isempty(r) && return nothing
     minimum(r) <= p.x <= maximum(r) || return nothing
-    d = convert(S, p.x - first(r))
+    d = convert(S, p.x - first(r))::S
     iszero(d % step(r)) || return nothing
     return d ÷ step(r) + 1
 end
@@ -2317,11 +2603,14 @@ julia> findall(x -> x >= 0, d)
 
 ```
 """
-findall(testf::Function, A) = collect(first(p) for p in pairs(A) if testf(last(p)))
+function findall(testf::Function, A)
+    gen = (first(p) for p in pairs(A) if testf(last(p)))
+    @default_eltype(gen) === Union{} ? collect(@default_eltype(keys(A)), gen) : collect(gen)
+end
 
 # Broadcasting is much faster for small testf, and computing
 # integer indices from logical index using findall has a negligible cost
-findall(testf::Function, A::AbstractArray) = findall(testf.(A))
+findall(testf::F, A::AbstractArray) where {F<:Function} = findall(testf.(A))
 
 """
     findall(A)
@@ -2368,42 +2657,19 @@ function findall(A)
 end
 
 # Allocating result upfront is faster (possible only when collection can be iterated twice)
-function _findall(f::Function, A::AbstractArray{Bool})
-    n = count(f, A)
+function findall(A::AbstractArray{Bool})
+    n = count(A)
     I = Vector{eltype(keys(A))}(undef, n)
-    isempty(I) && return I
-    _findall(f, I, A)
-end
-
-function _findall(f::Function, I::Vector, A::AbstractArray{Bool})
-    cnt = 1
-    len = length(I)
-    for (k, v) in pairs(A)
-        @inbounds I[cnt] = k
-        cnt += f(v)
-        cnt > len && return I
-    end
-    # In case of impure f, this line could potentially be hit. In that case,
-    # we can't assume I is the correct length.
-    resize!(I, cnt - 1)
-end
-
-function _findall(f::Function, I::Vector, A::AbstractVector{Bool})
-    i = firstindex(A)
     cnt = 1
-    len = length(I)
-    while cnt ≤ len
-        @inbounds I[cnt] = i
-        cnt += f(@inbounds A[i])
-        i = nextind(A, i)
+    for (i,a) in pairs(A)
+        if a
+            I[cnt] = i
+            cnt += 1
+        end
     end
-    cnt - 1 == len ? I : resize!(I, cnt - 1)
+    I
 end
 
-findall(f::Function, A::AbstractArray{Bool}) = _findall(f, A)
-findall(f::Fix2{typeof(in)}, A::AbstractArray{Bool}) = _findall(f, A)
-findall(A::AbstractArray{Bool}) = _findall(identity, A)
-
 findall(x::Bool) = x ? [1] : Vector{Int}()
 findall(testf::Function, x::Number) = testf(x) ? [1] : Vector{Int}()
 findall(p::Fix2{typeof(in)}, x::Number) = x in p.x ? [1] : Vector{Int}()
@@ -2629,6 +2895,33 @@ function filter!(f, a::AbstractVector)
     return a
 end
 
+"""
+    filter(f)
+
+Create a function that filters its arguments with function `f` using [`filter`](@ref), i.e.
+a function equivalent to `x -> filter(f, x)`.
+
+The returned function is of type `Base.Fix1{typeof(filter)}`, which can be
+used to implement specialized methods.
+
+# Examples
+```jldoctest
+julia> (1, 2, Inf, 4, NaN, 6) |> filter(isfinite)
+(1, 2, 4, 6)
+
+julia> map(filter(iseven), [1:3, 2:4, 3:5])
+3-element Vector{Vector{Int64}}:
+ [2]
+ [2, 4]
+ [4]
+```
+!!! compat "Julia 1.9"
+    This method requires at least Julia 1.9.
+"""
+function filter(f)
+    Fix1(filter, f)
+end
+
 """
     keepat!(a::Vector, inds)
     keepat!(a::BitVector, inds)
@@ -2637,6 +2930,8 @@ Remove the items at all the indices which are not given by `inds`,
 and return the modified `a`.
 Items which are kept are shifted to fill the resulting gaps.
 
+$(_DOCS_ALIASING_WARNING)
+
 `inds` must be an iterator of sorted and unique integer indices.
 See also [`deleteat!`](@ref).
 
@@ -2682,7 +2977,8 @@ keepat!(a::Vector, m::AbstractVector{Bool}) = _keepat!(a, m)
 # set-like operators for vectors
 # These are moderately efficient, preserve order, and remove dupes.
 
-_unique_filter!(pred, update!, state) = function (x)
+_unique_filter!(pred::P, update!::U, state) where {P,U} = function (x)
+    # P, U force specialization
     if pred(x, state)
         update!(state, x)
         true
@@ -2708,7 +3004,7 @@ union!(v::AbstractVector{T}, itrs...) where {T} =
 symdiff!(v::AbstractVector{T}, itrs...) where {T} =
     _grow!(_shrink_filter!(symdiff!(Set{T}(), v, itrs...)), v, itrs)
 
-function _shrink!(shrinker!, v::AbstractVector, itrs)
+function _shrink!(shrinker!::F, v::AbstractVector, itrs) where F
     seen = Set{eltype(v)}()
     filter!(_grow_filter!(seen), v)
     shrinker!(seen, itrs...)
@@ -2720,7 +3016,7 @@ setdiff!(  v::AbstractVector, itrs...) = _shrink!(setdiff!, v, itrs)
 
 vectorfilter(T::Type, f, v) = T[x for x in v if f(x)]
 
-function _shrink(shrinker!, itr, itrs)
+function _shrink(shrinker!::F, itr, itrs) where F
     T = promote_eltype(itr, itrs...)
     keep = shrinker!(Set{T}(itr), itrs...)
     vectorfilter(T, _shrink_filter!(keep), itr)
@@ -2736,3 +3032,45 @@ function intersect(v::AbstractVector, r::AbstractRange)
     return vectorfilter(T, _shrink_filter!(seen), common)
 end
 intersect(r::AbstractRange, v::AbstractVector) = intersect(v, r)
+
+# Here instead of range.jl for bootstrapping because `@propagate_inbounds` depends on Vectors.
+@propagate_inbounds function getindex(v::AbstractRange, i::Integer)
+    if i isa Bool # Not via dispatch to avoid ambiguities
+        throw(ArgumentError("invalid index: $i of type Bool"))
+    else
+        _getindex(v, i)
+    end
+end
+
+"""
+    wrap(Array, m::Union{Memory{T}, MemoryRef{T}}, dims)
+
+Create an array of size `dims` using `m` as the underlying memory. This can be thought of as a safe version
+of [`unsafe_wrap`](@ref) utilizing `Memory` or `MemoryRef` instead of raw pointers.
+"""
+function wrap end
+
+@eval @propagate_inbounds function wrap(::Type{Array}, ref::MemoryRef{T}, dims::NTuple{N, Integer}) where {T, N}
+    mem = ref.mem
+    mem_len = length(mem) + 1 - memoryrefoffset(ref)
+    len = Core.checked_dims(dims...)
+    @boundscheck mem_len >= len || invalid_wrap_err(mem_len, dims, len)
+    if N != 1 && !(ref === GenericMemoryRef(mem) && len === mem_len)
+        mem = ccall(:jl_genericmemory_slice, Memory{T}, (Any, Ptr{Cvoid}, Int), mem, ref.ptr_or_offset, len)
+        ref = MemoryRef(mem)
+    end
+    $(Expr(:new, :(Array{T, N}), :ref, :dims))
+end
+
+@noinline invalid_wrap_err(len, dims, proddims) = throw(DimensionMismatch(
+    "Attempted to wrap a MemoryRef of length $len with an Array of size dims=$dims, which is invalid because prod(dims) = $proddims > $len, so that the array would have more elements than the underlying memory can store."))
+
+function wrap(::Type{Array}, m::Memory{T}, dims::NTuple{N, Integer}) where {T, N}
+    wrap(Array, MemoryRef(m), dims)
+end
+function wrap(::Type{Array}, m::MemoryRef{T}, l::Integer) where {T}
+    wrap(Array, m, (l,))
+end
+function wrap(::Type{Array}, m::Memory{T}, l::Integer) where {T}
+    wrap(Array, MemoryRef(m), (l,))
+end
diff --git a/base/arrayshow.jl b/base/arrayshow.jl
index 0d480b64bb32d..db639a88876e8 100644
--- a/base/arrayshow.jl
+++ b/base/arrayshow.jl
@@ -40,7 +40,7 @@ centered cdot, used in printing of structural zeros of structured matrices.
 Accept keyword args `c` for alternate single character marker.
 """
 function replace_with_centered_mark(s::AbstractString;c::AbstractChar = '⋅')
-    N = length(s)
+    N = textwidth(ANSIIterator(s))
     return join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2)))
 end
 
@@ -202,7 +202,7 @@ function _print_matrix(io, @nospecialize(X::AbstractVecOrMat), pre, sep, post, h
     if n > maxpossiblecols
         colsA = [colsA[(0:maxpossiblecols-1) .+ firstindex(colsA)]; colsA[(end-maxpossiblecols+1):end]]
     else
-	    colsA = [colsA;]
+        colsA = [colsA;]
     end
     A = alignment(io, X, rowsA, colsA, screenwidth, screenwidth, sepsize, ncols)
     # Nine-slicing is accomplished using print_matrix_row repeatedly
@@ -278,7 +278,7 @@ show_nd(io::IO, a::AbstractArray, print_matrix::Function, show_full::Bool) =
     _show_nd(io, inferencebarrier(a), print_matrix, show_full, map(unitrange, axes(a)))
 
 function _show_nd(io::IO, @nospecialize(a::AbstractArray), print_matrix::Function, show_full::Bool, axs::Tuple{Vararg{AbstractUnitRange}})
-    limit::Bool = get(io, :limit, false)
+    limit = get(io, :limit, false)::Bool
     if isempty(a)
         return
     end
@@ -361,31 +361,31 @@ print_array(io::IO, X::AbstractArray) = show_nd(io, X, print_matrix, true)
 # typeinfo aware
 # implements: show(io::IO, ::MIME"text/plain", X::AbstractArray)
 function show(io::IO, ::MIME"text/plain", X::AbstractArray)
-    if isempty(X) && (get(io, :compact, false) || X isa Vector)
+    if isempty(X) && (get(io, :compact, false)::Bool || X isa Vector)
         return show(io, X)
     end
-    # 0) show summary before setting :compact
+    # 1) show summary before setting :compact
     summary(io, X)
     isempty(X) && return
     print(io, ":")
     show_circular(io, X) && return
 
-    # 1) compute new IOContext
+    # 2) compute new IOContext
     if !haskey(io, :compact) && length(axes(X, 2)) > 1
         io = IOContext(io, :compact => true)
     end
-    if get(io, :limit, false) && eltype(X) === Method
+    if get(io, :limit, false)::Bool && eltype(X) === Method
         # override usual show method for Vector{Method}: don't abbreviate long lists
         io = IOContext(io, :limit => false)
     end
 
-    if get(io, :limit, false) && displaysize(io)[1]-4 <= 0
+    if get(io, :limit, false)::Bool && displaysize(io)[1]-4 <= 0
         return print(io, " …")
     else
         println(io)
     end
 
-    # 2) update typeinfo
+    # 3) update typeinfo
     #
     # it must come after printing the summary, which can exploit :typeinfo itself
     # (e.g. views)
@@ -394,7 +394,7 @@ function show(io::IO, ::MIME"text/plain", X::AbstractArray)
     # checking for current :typeinfo (this could be changed in the future)
     io = IOContext(io, :typeinfo => eltype(X))
 
-    # 2) show actual content
+    # 4) show actual content
     recur_io = IOContext(io, :SHOWN_SET => X)
     print_array(recur_io, X)
 end
@@ -462,8 +462,10 @@ function _show_nonempty(io::IO, @nospecialize(X::AbstractMatrix), prefix::String
 end
 
 
-_show_nonempty(io::IO, X::AbstractArray, prefix::String) =
+function _show_nonempty(io::IO, X::AbstractArray, prefix::String)
+    print(io, prefix)
     show_nd(io, X, (io, slice) -> _show_nonempty(io, inferencebarrier(slice), prefix, true, axes(slice)), false)
+end
 
 # a specific call path is used to show vectors (show_vector)
 _show_nonempty(::IO, ::AbstractVector, ::String) =
@@ -516,7 +518,7 @@ function show_vector(io::IO, v, opn='[', cls=']')
     if !implicit
         io = IOContext(io, :typeinfo => eltype(v))
     end
-    limited = get(io, :limit, false)
+    limited = get(io, :limit, false)::Bool
 
     if limited && length(v) > 20
         axs1 = axes1(v)
@@ -538,10 +540,12 @@ end
 # returning Any, as this would cause incorrect printing in e.g. `Vector[Any[1]]`,
 # because eltype(Vector) == Any so `Any` wouldn't be printed in `Any[1]`)
 typeinfo_eltype(typeinfo) = nothing # element type not precisely known
+typeinfo_eltype(typeinfo::Type{Union{}}, slurp...) = nothing
 typeinfo_eltype(typeinfo::Type{<:AbstractArray{T}}) where {T} = eltype(typeinfo)
 typeinfo_eltype(typeinfo::Type{<:AbstractDict{K,V}}) where {K,V} = eltype(typeinfo)
 typeinfo_eltype(typeinfo::Type{<:AbstractSet{T}}) where {T} = eltype(typeinfo)
 
+
 # types that can be parsed back accurately from their un-decorated representations
 function typeinfo_implicit(@nospecialize(T))
     if T === Float64 || T === Int || T === Char || T === String || T === Symbol ||
@@ -568,11 +572,11 @@ function typeinfo_prefix(io::IO, X)
 
     if X isa AbstractDict
         if eltype_X == eltype_ctx
-            sprint(show_type_name, typeof(X).name), false
+            sprint(show_type_name, typeof(X).name; context=io), false
         elseif !isempty(X) && typeinfo_implicit(keytype(X)) && typeinfo_implicit(valtype(X))
-            sprint(show_type_name, typeof(X).name), true
+            sprint(show_type_name, typeof(X).name; context=io), true
         else
-            string(typeof(X)), false
+            sprint(print, typeof(X); context=io), false
         end
     else
         # Types hard-coded here are those which are created by default for a given syntax
@@ -581,9 +585,9 @@ function typeinfo_prefix(io::IO, X)
         elseif !isempty(X) && typeinfo_implicit(eltype_X)
             "", true
         elseif print_without_params(eltype_X)
-            sprint(show_type_name, unwrap_unionall(eltype_X).name), false # Print "Array" rather than "Array{T,N}"
+            sprint(show_type_name, unwrap_unionall(eltype_X).name; context=io), false # Print "Array" rather than "Array{T,N}"
         else
-            string(eltype_X), false
+            sprint(print, eltype_X; context=io), false
         end
     end
 end
diff --git a/base/asyncevent.jl b/base/asyncevent.jl
index d3938bd66c842..3c782be10e194 100644
--- a/base/asyncevent.jl
+++ b/base/asyncevent.jl
@@ -118,14 +118,18 @@ end
 unsafe_convert(::Type{Ptr{Cvoid}}, t::Timer) = t.handle
 unsafe_convert(::Type{Ptr{Cvoid}}, async::AsyncCondition) = async.handle
 
+# if this returns true, the object has been signaled
+# if this returns false, the object is closed
 function _trywait(t::Union{Timer, AsyncCondition})
     set = t.set
     if set
         # full barrier now for AsyncCondition
         t isa Timer || Core.Intrinsics.atomic_fence(:acquire_release)
     else
-        t.isopen || return false
-        t.handle == C_NULL && return false
+        if !isopen(t)
+            close(t) # wait for the close to complete
+            return false
+        end
         iolock_begin()
         set = t.set
         if !set
@@ -133,7 +137,7 @@ function _trywait(t::Union{Timer, AsyncCondition})
             lock(t.cond)
             try
                 set = t.set
-                if !set && t.isopen && t.handle != C_NULL
+                if !set && t.handle != C_NULL # wait for set or handle, but not the isopen flag
                     iolock_end()
                     set = wait(t.cond)
                     unlock(t.cond)
@@ -160,10 +164,28 @@ end
 isopen(t::Union{Timer, AsyncCondition}) = t.isopen && t.handle != C_NULL
 
 function close(t::Union{Timer, AsyncCondition})
+    t.handle == C_NULL && return # short-circuit path
     iolock_begin()
-    if isopen(t)
-        @atomic :monotonic t.isopen = false
-        ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
+    if t.handle != C_NULL
+        if t.isopen
+            @atomic :monotonic t.isopen = false
+            ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
+        end
+        # implement _trywait here without the auto-reset function, just waiting for the final close signal
+        preserve_handle(t)
+        lock(t.cond)
+        try
+            while t.handle != C_NULL
+                iolock_end()
+                wait(t.cond)
+                unlock(t.cond)
+                iolock_begin()
+                lock(t.cond)
+            end
+        finally
+            unlock(t.cond)
+            unpreserve_handle(t)
+        end
     end
     iolock_end()
     nothing
@@ -220,7 +242,10 @@ function uv_timercb(handle::Ptr{Cvoid})
         @atomic :monotonic t.set = true
         if ccall(:uv_timer_get_repeat, UInt64, (Ptr{Cvoid},), t) == 0
             # timer is stopped now
-            close(t)
+            if t.isopen
+                @atomic :monotonic t.isopen = false
+                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
+            end
         end
         notify(t.cond, true)
     finally
@@ -250,8 +275,8 @@ Create a timer that runs the function `callback` at each timer expiration.
 Waiting tasks are woken and the function `callback` is called after an initial delay of `delay`
 seconds, and then repeating with the given `interval` in seconds. If `interval` is equal to `0`, the
 callback is only run once. The function `callback` is called with a single argument, the timer
-itself. Stop a timer by calling `close`. The `cb` may still be run one final time, if the timer has
-already expired.
+itself. Stop a timer by calling `close`. The `callback` may still be run one final time, if the timer
+has already expired.
 
 # Examples
 
@@ -272,8 +297,8 @@ julia> begin
 3
 ```
 """
-function Timer(cb::Function, timeout::Real; interval::Real=0.0)
-    timer = Timer(timeout, interval=interval)
+function Timer(cb::Function, timeout; kwargs...)
+    timer = Timer(timeout; kwargs...)
     t = @task begin
         unpreserve_handle(timer)
         while _trywait(timer)
@@ -302,11 +327,24 @@ end
 """
     timedwait(testcb, timeout::Real; pollint::Real=0.1)
 
-Waits until `testcb()` returns `true` or `timeout` seconds have passed, whichever is earlier.
+Wait until `testcb()` returns `true` or `timeout` seconds have passed, whichever is earlier.
 The test function is polled every `pollint` seconds. The minimum value for `pollint` is 0.001 seconds,
 that is, 1 millisecond.
 
-Returns :ok or :timed_out
+Return `:ok` or `:timed_out`.
+
+# Examples
+```jldoctest
+julia> cb() = (sleep(5); return);
+
+julia> t = @async cb();
+
+julia> timedwait(()->istaskdone(t), 1)
+:timed_out
+
+julia> timedwait(()->istaskdone(t), 6.5)
+:ok
+```
 """
 function timedwait(testcb, timeout::Real; pollint::Real=0.1)
     pollint >= 1e-3 || throw(ArgumentError("pollint must be ≥ 1 millisecond"))
diff --git a/base/asyncmap.jl b/base/asyncmap.jl
index 0b3678f6b4b9b..c81afbb7e9115 100644
--- a/base/asyncmap.jl
+++ b/base/asyncmap.jl
@@ -70,12 +70,6 @@ julia> asyncmap(batch_func, 1:5; ntasks=2, batch_size=2)
  "args_tuple: (4,), element_val: 4, task: 4904288162898683522"
  "args_tuple: (5,), element_val: 5, task: 9118321258196414413"
 ```
-
-!!! note
-    Currently, all tasks in Julia are executed in a single OS thread co-operatively. Consequently,
-    `asyncmap` is beneficial only when the mapping function involves any I/O - disk, network, remote
-    worker invocation, etc.
-
 """
 function asyncmap(f, c...; ntasks=0, batch_size=nothing)
     return async_usemap(f, c...; ntasks=ntasks, batch_size=batch_size)
@@ -400,6 +394,8 @@ length(itr::AsyncGenerator) = length(itr.collector.enumerator)
 
 Like [`asyncmap`](@ref), but stores output in `results` rather than
 returning a collection.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function asyncmap!(f, r, c1, c...; ntasks=0, batch_size=nothing)
     foreach(identity, AsyncCollector(f, r, c1, c...; ntasks=ntasks, batch_size=batch_size))
diff --git a/base/atomics.jl b/base/atomics.jl
index e6d62c3fc807b..7312206c19896 100644
--- a/base/atomics.jl
+++ b/base/atomics.jl
@@ -20,7 +20,7 @@ export
 # - LLVM doesn't currently support atomics on floats for ppc64
 #   C++20 is adding limited support for atomics on float, but as of
 #   now Clang does not support that yet.
-if Sys.ARCH == :i686 || startswith(string(Sys.ARCH), "arm") ||
+if Sys.ARCH === :i686 || startswith(string(Sys.ARCH), "arm") ||
    Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le
     const inttypes = (Int8, Int16, Int32, Int64,
                       UInt8, UInt16, UInt32, UInt64)
diff --git a/base/baseext.jl b/base/baseext.jl
index 8ebd599312453..625a82ff29234 100644
--- a/base/baseext.jl
+++ b/base/baseext.jl
@@ -16,7 +16,7 @@ VecElement
 # hook up VecElement constructor to Base.convert
 VecElement{T}(arg) where {T} = VecElement{T}(convert(T, arg))
 convert(::Type{T}, arg::T) where {T<:VecElement} = arg
-convert(::Type{T}, arg)  where {T<:VecElement} = T(arg)
+convert(::Type{T}, arg)  where {T<:VecElement} = T(arg)::T
 
 # ## dims-type-converting Array constructors for convenience
 # type and dimensionality specified, accepting dims as series of Integers
diff --git a/base/binaryplatforms.jl b/base/binaryplatforms.jl
index 6eeaca1be84e3..b374d57ce9731 100644
--- a/base/binaryplatforms.jl
+++ b/base/binaryplatforms.jl
@@ -170,23 +170,21 @@ end
 
 
 # Allow us to easily serialize Platform objects
-function Base.repr(p::Platform; context=nothing)
-    str = string(
-        "Platform(",
-        repr(arch(p)),
-        ", ",
-        repr(os(p)),
-        "; ",
-        join(("$(k) = $(repr(v))" for (k, v) in tags(p) if k ∉ ("arch", "os")), ", "),
-        ")",
-    )
+function Base.show(io::IO, p::Platform)
+    print(io, "Platform(")
+    show(io, arch(p))
+    print(io, ", ")
+    show(io, os(p))
+    print(io, "; ")
+    join(io, ("$(k) = $(repr(v))" for (k, v) in tags(p) if k ∉ ("arch", "os")), ", ")
+    print(io, ")")
 end
 
 # Make showing the platform a bit more palatable
-function Base.show(io::IO, p::Platform)
+function Base.show(io::IO, ::MIME"text/plain", p::Platform)
     str = string(platform_name(p), " ", arch(p))
     # Add on all the other tags not covered by os/arch:
-    other_tags = sort(collect(filter(kv -> kv[1] ∉ ("os", "arch"), tags(p))))
+    other_tags = sort!(filter!(kv -> kv[1] ∉ ("os", "arch"), collect(tags(p))))
     if !isempty(other_tags)
         str = string(str, " {", join([string(k, "=", v) for (k, v) in other_tags], ", "), "}")
     end
@@ -259,14 +257,14 @@ end
 
 function set_compare_strategy!(p::Platform, key::String, f::Function)
     if !haskey(p.tags, key)
-        throw(ArgumentError("Cannot set comparison strategy for nonexistant tag $(key)!"))
+        throw(ArgumentError("Cannot set comparison strategy for nonexistent tag $(key)!"))
     end
     p.compare_strategies[key] = f
 end
 
 function get_compare_strategy(p::Platform, key::String, default = compare_default)
     if !haskey(p.tags, key)
-        throw(ArgumentError("Cannot get comparison strategy for nonexistant tag $(key)!"))
+        throw(ArgumentError("Cannot get comparison strategy for nonexistent tag $(key)!"))
     end
     return get(p.compare_strategies, key, default)
 end
@@ -278,7 +276,7 @@ get_compare_strategy(p::AbstractPlatform, key::String, default = compare_default
     compare_default(a::String, b::String, a_requested::Bool, b_requested::Bool)
 
 Default comparison strategy that falls back to `a == b`.  This only ever happens if both
-`a` and `b` request this strategy, as any other strategy is preferrable to this one.
+`a` and `b` request this strategy, as any other strategy is preferable to this one.
 """
 function compare_default(a::String, b::String, a_requested::Bool, b_requested::Bool)
     return a == b
@@ -494,7 +492,7 @@ julia> wordsize(Platform("x86_64", "macos"))
 wordsize(p::AbstractPlatform) = (arch(p) ∈ ("i686", "armv6l", "armv7l")) ? 32 : 64
 
 """
-    triplet(p::AbstractPlatform; exclude_tags::Vector{String})
+    triplet(p::AbstractPlatform)
 
 Get the target triplet for the given `Platform` object as a `String`.
 
@@ -584,6 +582,7 @@ Sys.islinux(p::AbstractPlatform) = os(p) == "linux"
 Sys.iswindows(p::AbstractPlatform) = os(p) == "windows"
 Sys.isfreebsd(p::AbstractPlatform) = os(p) == "freebsd"
 Sys.isbsd(p::AbstractPlatform) = os(p) ∈ ("freebsd", "macos")
+Sys.isunix(p::AbstractPlatform) = Sys.isbsd(p) || Sys.islinux(p)
 
 const arch_mapping = Dict(
     "x86_64" => "(x86_|amd)64",
@@ -740,10 +739,10 @@ function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = f
         end
         os_version = nothing
         if os == "macos"
-            os_version = extract_os_version("macos", r".*darwin([\d\.]+)")
+            os_version = extract_os_version("macos", r".*darwin([\d\.]+)"sa)
         end
         if os == "freebsd"
-            os_version = extract_os_version("freebsd", r".*freebsd([\d.]+)")
+            os_version = extract_os_version("freebsd", r".*freebsd([\d.]+)"sa)
         end
         tags["os_version"] = os_version
 
@@ -797,13 +796,13 @@ function parse_dl_name_version(path::String, os::String)
     local dlregex
     if os == "windows"
         # On Windows, libraries look like `libnettle-6.dll`
-        dlregex = r"^(.*?)(?:-((?:[\.\d]+)*))?\.dll$"
+        dlregex = r"^(.*?)(?:-((?:[\.\d]+)*))?\.dll$"sa
     elseif os == "macos"
         # On OSX, libraries look like `libnettle.6.3.dylib`
-        dlregex = r"^(.*?)((?:\.[\d]+)*)\.dylib$"
+        dlregex = r"^(.*?)((?:\.[\d]+)*)\.dylib$"sa
     else
         # On Linux and FreeBSD, libraries look like `libnettle.so.6.3.0`
-        dlregex = r"^(.*?)\.so((?:\.[\d]+)*)$"
+        dlregex = r"^(.*?)\.so((?:\.[\d]+)*)$"sa
     end
 
     m = match(dlregex, basename(path))
@@ -834,7 +833,7 @@ Inspects the current Julia process to determine the libgfortran version this Jul
 linked against (if any).
 """
 function detect_libgfortran_version()
-    libgfortran_paths = filter(x -> occursin("libgfortran", x), Libdl.dllist())
+    libgfortran_paths = filter!(x -> occursin("libgfortran", x), Libdl.dllist())
     if isempty(libgfortran_paths)
         # One day, I hope to not be linking against libgfortran in base Julia
         return nothing
@@ -864,7 +863,7 @@ it is linked against (if any).  `max_minor_version` is the latest version in the
 3.4 series of GLIBCXX where the search is performed.
 """
 function detect_libstdcxx_version(max_minor_version::Int=30)
-    libstdcxx_paths = filter(x -> occursin("libstdc++", x), Libdl.dllist())
+    libstdcxx_paths = filter!(x -> occursin("libstdc++", x), Libdl.dllist())
     if isempty(libstdcxx_paths)
         # This can happen if we were built by clang, so we don't link against
         # libstdc++ at all.
@@ -872,7 +871,7 @@ function detect_libstdcxx_version(max_minor_version::Int=30)
     end
 
     # Brute-force our way through GLIBCXX_* symbols to discover which version we're linked against
-    hdl = Libdl.dlopen(first(libstdcxx_paths))
+    hdl = Libdl.dlopen(first(libstdcxx_paths))::Ptr{Cvoid}
     # Try all GLIBCXX versions down to GCC v4.8:
     # https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html
     for minor_version in max_minor_version:-1:18
@@ -896,14 +895,14 @@ between Julia and LLVM; they must match.
 """
 function detect_cxxstring_abi()
     # First, if we're not linked against libstdc++, then early-exit because this doesn't matter.
-    libstdcxx_paths = filter(x -> occursin("libstdc++", x), Libdl.dllist())
+    libstdcxx_paths = filter!(x -> occursin("libstdc++", x), Libdl.dllist())
     if isempty(libstdcxx_paths)
         # We were probably built by `clang`; we don't link against `libstdc++`` at all.
         return nothing
     end
 
     function open_libllvm(f::Function)
-        for lib_name in ("libLLVM-14jl", "libLLVM", "LLVM", "libLLVMSupport")
+        for lib_name in (Base.libllvm_name, "libLLVM", "LLVM", "libLLVMSupport")
             hdl = Libdl.dlopen_e(lib_name)
             if hdl != C_NULL
                 try
@@ -1015,19 +1014,19 @@ function platforms_match(a::AbstractPlatform, b::AbstractPlatform)
 
         # Throw an error if `a` and `b` have both set non-default comparison strategies for `k`
         # and they're not the same strategy.
-        if a_comp != compare_default && b_comp != compare_default && a_comp != b_comp
+        if a_comp !== compare_default && b_comp !== compare_default && a_comp !== b_comp
             throw(ArgumentError("Cannot compare Platform objects with two different non-default comparison strategies for the same key \"$(k)\""))
         end
 
         # Select the custom comparator, if we have one.
         comparator = a_comp
-        if b_comp != compare_default
+        if b_comp !== compare_default
             comparator = b_comp
         end
 
         # Call the comparator, passing in which objects requested this comparison (one, the other, or both)
         # For some comparators this doesn't matter, but for non-symmetrical comparisons, it does.
-        if !comparator(ak, bk, a_comp == comparator, b_comp == comparator)
+        if !(comparator(ak, bk, a_comp === comparator, b_comp === comparator)::Bool)
             return false
         end
     end
@@ -1066,14 +1065,30 @@ function select_platform(download_info::Dict, platform::AbstractPlatform = HostP
         return nothing
     end
 
-    # At this point, we may have multiple possibilities.  E.g. if, in the future,
-    # Julia can be built without a direct dependency on libgfortran, we may match
-    # multiple tarballs that vary only within their libgfortran ABI.  To narrow it
-    # down, we just sort by triplet, then pick the last one.  This has the effect
-    # of generally choosing the latest release (e.g. a `libgfortran5` tarball
-    # rather than a `libgfortran3` tarball)
-    p = last(sort(ps, by = p -> triplet(p)))
-    return download_info[p]
+    # At this point, we may have multiple possibilities.  We now engage a multi-
+    # stage selection algorithm, where we first sort the matches by how complete
+    # the match is, e.g. preferring matches where the intersection of tags is
+    # equal to the union of the tags:
+    function match_loss(a, b)
+        a_tags = Set(keys(tags(a)))
+        b_tags = Set(keys(tags(b)))
+        return length(union(a_tags, b_tags)) - length(intersect(a_tags, b_tags))
+    end
+
+    # We prefer these better matches, and secondarily reverse-sort by triplet so
+    # as to generally choose the latest release (e.g. a `libgfortran5` tarball
+    # over a `libgfortran3` tarball).
+    sort!(ps, lt = (a, b) -> begin
+        loss_a = match_loss(a, platform)
+        loss_b = match_loss(b, platform)
+        if loss_a != loss_b
+            return loss_a < loss_b
+        end
+        return triplet(a) > triplet(b)
+    end)
+
+    # @invokelatest here to not get invalidated by new defs of `==(::Function, ::Function)`
+    return @invokelatest getindex(download_info, first(ps))
 end
 
 # precompiles to reduce latency (see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1025692379)
diff --git a/base/bitarray.jl b/base/bitarray.jl
index 73f274df44a85..4411fc9323826 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -18,7 +18,7 @@ the functions [`trues`](@ref) and [`falses`](@ref).
 
 !!! note
     Due to its packed storage format, concurrent access to the elements of a `BitArray`
-    where at least one of them is a write is not thread safe.
+    where at least one of them is a write is not thread-safe.
 
 """
 mutable struct BitArray{N} <: AbstractArray{Bool, N}
@@ -458,10 +458,11 @@ function unsafe_copyto!(dest::BitArray, doffs::Integer, src::Union{BitArray,Arra
     return dest
 end
 
-copyto!(dest::BitArray, doffs::Integer, src::Array, soffs::Integer, n::Integer) =
+copyto!(dest::BitArray, doffs::Integer, src::Union{BitArray,Array}, soffs::Integer, n::Integer) =
     _copyto_int!(dest, Int(doffs), src, Int(soffs), Int(n))
-function _copyto_int!(dest::BitArray, doffs::Int, src::Array, soffs::Int, n::Int)
+function _copyto_int!(dest::BitArray, doffs::Int, src::Union{BitArray,Array}, soffs::Int, n::Int)
     n == 0 && return dest
+    n < 0 && throw(ArgumentError("Number of elements to copy must be non-negative."))
     soffs < 1 && throw(BoundsError(src, soffs))
     doffs < 1 && throw(BoundsError(dest, doffs))
     soffs+n-1 > length(src) && throw(BoundsError(src, length(src)+1))
@@ -501,40 +502,42 @@ function Array{T,N}(B::BitArray{N}) where {T,N}
 end
 
 BitArray(A::AbstractArray{<:Any,N}) where {N} = BitArray{N}(A)
+
 function BitArray{N}(A::AbstractArray{T,N}) where N where T
     B = BitArray(undef, convert(Dims{N}, size(A)::Dims{N}))
-    Bc = B.chunks
-    l = length(B)
+    _checkaxs(axes(B), axes(A))
+    _copyto_bitarray!(B, A)
+    return B::BitArray{N}
+end
+
+function _copyto_bitarray!(B::BitArray, A::AbstractArray)
+    l = length(A)
     l == 0 && return B
-    ind = 1
+    l > length(B) && throw(BoundsError(B, length(B)+1))
+    Bc = B.chunks
+    nc = num_bit_chunks(l)
+    Ai = first(eachindex(A))
     @inbounds begin
-        for i = 1:length(Bc)-1
+        for i = 1:nc-1
             c = UInt64(0)
             for j = 0:63
-                c |= (UInt64(convert(Bool, A[ind])::Bool) << j)
-                ind += 1
+                c |= (UInt64(convert(Bool, A[Ai])::Bool) << j)
+                Ai = nextind(A, Ai)
             end
             Bc[i] = c
         end
         c = UInt64(0)
-        for j = 0:_mod64(l-1)
-            c |= (UInt64(convert(Bool, A[ind])::Bool) << j)
-            ind += 1
+        tail = _mod64(l - 1) + 1
+        for j = 0:tail-1
+            c |= (UInt64(convert(Bool, A[Ai])::Bool) << j)
+            Ai = nextind(A, Ai)
         end
-        Bc[end] = c
+        msk = _msk_end(tail)
+        Bc[nc] = (c & msk) | (Bc[nc] & ~msk)
     end
     return B
 end
 
-function BitArray{N}(A::Array{Bool,N}) where N
-    B = BitArray(undef, size(A))
-    Bc = B.chunks
-    l = length(B)
-    l == 0 && return B
-    copy_to_bitarray_chunks!(Bc, 1, A, 1, l)
-    return B::BitArray{N}
-end
-
 reinterpret(::Type{Bool}, B::BitArray, dims::NTuple{N,Int}) where {N} = reinterpret(B, dims)
 reinterpret(B::BitArray, dims::NTuple{N,Int}) where {N} = reshape(B, dims)
 
@@ -574,7 +577,7 @@ julia> BitArray(x+y == 3 for x = 1:2 for y = 1:3)
 BitArray(itr) = gen_bitarray(IteratorSize(itr), itr)
 BitArray{N}(itr) where N = gen_bitarrayN(BitArray{N}, IteratorSize(itr), itr)
 
-convert(T::Type{<:BitArray}, a::AbstractArray) = a isa T ? a : T(a)
+convert(::Type{T}, a::AbstractArray) where {T<:BitArray} = a isa T ? a : T(a)::T
 
 # generic constructor from an iterable without compile-time info
 # (we pass start(itr) explicitly to avoid a type-instability with filters)
@@ -721,24 +724,25 @@ function _unsafe_setindex!(B::BitArray, X::AbstractArray, I::BitArray)
     lx = length(X)
     last_chunk_len = _mod64(length(B)-1)+1
 
-    c = 1
+    Xi = first(eachindex(X))
+    lastXi = last(eachindex(X))
     for i = 1:lc
         @inbounds Imsk = Ic[i]
         @inbounds C = Bc[i]
         u = UInt64(1)
         for j = 1:(i < lc ? 64 : last_chunk_len)
             if Imsk & u != 0
-                lx < c && throw_setindex_mismatch(X, c)
-                @inbounds x = convert(Bool, X[c])
+                Xi > lastXi && throw_setindex_mismatch(X, count(I))
+                @inbounds x = convert(Bool, X[Xi])
                 C = ifelse(x, C | u, C & ~u)
-                c += 1
+                Xi = nextind(X, Xi)
             end
             u <<= 1
         end
         @inbounds Bc[i] = C
     end
-    if length(X) != c-1
-        throw_setindex_mismatch(X, c-1)
+    if Xi != nextind(X, lastXi)
+        throw_setindex_mismatch(X, count(I))
     end
     return B
 end
@@ -803,7 +807,7 @@ prepend!(B::BitVector, items) = prepend!(B, BitArray(items))
 prepend!(A::Vector{Bool}, items::BitVector) = prepend!(A, Array(items))
 
 function sizehint!(B::BitVector, sz::Integer)
-    ccall(:jl_array_sizehint, Cvoid, (Any, UInt), B.chunks, num_bit_chunks(sz))
+    sizehint!(B.chunks, num_bit_chunks(sz))
     return B
 end
 
@@ -1541,12 +1545,12 @@ function unsafe_bitfindprev(Bc::Vector{UInt64}, start::Int)
 
     @inbounds begin
         if Bc[chunk_start] & mask != 0
-            return (chunk_start-1) << 6 + (64 - leading_zeros(Bc[chunk_start] & mask))
+            return (chunk_start-1) << 6 + (top_set_bit(Bc[chunk_start] & mask))
         end
 
         for i = (chunk_start-1):-1:1
             if Bc[i] != 0
-                return (i-1) << 6 + (64 - leading_zeros(Bc[i]))
+                return (i-1) << 6 + (top_set_bit(Bc[i]))
             end
         end
     end
@@ -1775,26 +1779,44 @@ end
 # map across the chunks. Otherwise, fall-back to the AbstractArray method that
 # iterates bit-by-bit.
 function bit_map!(f::F, dest::BitArray, A::BitArray) where F
-    size(A) == size(dest) || throw(DimensionMismatch("sizes of dest and A must match"))
+    length(A) <= length(dest) || throw(DimensionMismatch("length of destination must be >= length of collection"))
     isempty(A) && return dest
     destc = dest.chunks
     Ac = A.chunks
-    for i = 1:(length(Ac)-1)
+    len_Ac = length(Ac)
+    for i = 1:(len_Ac-1)
         destc[i] = f(Ac[i])
     end
-    destc[end] = f(Ac[end]) & _msk_end(A)
+    # the last effected UInt64's original content
+    dest_last = destc[len_Ac]
+    _msk = _msk_end(A)
+    # first zero out the bits mask is going to change
+    # then update bits by `or`ing with a masked RHS
+    # DO NOT SEPARATE ONTO TO LINES.
+    # Otherwise there will be bugs when Ac aliases destc
+    destc[len_Ac] = (dest_last & (~_msk)) | f(Ac[len_Ac]) & _msk
     dest
 end
 function bit_map!(f::F, dest::BitArray, A::BitArray, B::BitArray) where F
-    size(A) == size(B) == size(dest) || throw(DimensionMismatch("sizes of dest, A, and B must all match"))
+    min_bitlen = min(length(A), length(B))
+    min_bitlen <= length(dest) || throw(DimensionMismatch("length of destination must be >= length of smallest input collection"))
     isempty(A) && return dest
+    isempty(B) && return dest
     destc = dest.chunks
     Ac = A.chunks
     Bc = B.chunks
-    for i = 1:(length(Ac)-1)
+    len_Ac = min(length(Ac), length(Bc))
+    for i = 1:len_Ac-1
         destc[i] = f(Ac[i], Bc[i])
     end
-    destc[end] = f(Ac[end], Bc[end]) & _msk_end(A)
+    # the last effected UInt64's original content
+    dest_last = destc[len_Ac]
+    _msk = _msk_end(min_bitlen)
+    # first zero out the bits mask is going to change
+    # then update bits by `or`ing with a masked RHS
+    # DO NOT SEPARATE ONTO TO LINES.
+    # Otherwise there will be bugs when Ac or Bc aliases destc
+    destc[len_Ac] = (dest_last & ~(_msk)) | f(Ac[end], Bc[end]) & _msk
     dest
 end
 
diff --git a/base/bitset.jl b/base/bitset.jl
index 0abd9d4b782d2..78d8fc8769de1 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -15,7 +15,11 @@ mutable struct BitSet <: AbstractSet{Int}
     # 1st stored Int equals 64*offset
     offset::Int
 
-    BitSet() = new(sizehint!(zeros(UInt64, 0), 4), NO_OFFSET)
+    function BitSet()
+        a = Vector{UInt64}(undef, 4) # start with some initial space for holding 0:255 without additional allocations later
+        setfield!(a, :size, (0,)) # aka `empty!(a)` inlined
+        return new(a, NO_OFFSET)
+   end
 end
 
 """
@@ -38,8 +42,6 @@ end
 
 @inline intoffset(s::BitSet) = s.offset << 6
 
-eltype(::Type{BitSet}) = Int
-
 empty(s::BitSet, ::Type{Int}=Int) = BitSet()
 emptymutable(s::BitSet, ::Type{Int}=Int) = BitSet()
 
@@ -53,7 +55,10 @@ function copy!(dest::BitSet, src::BitSet)
     dest
 end
 
-sizehint!(s::BitSet, n::Integer) = (sizehint!(s.bits, (n+63) >> 6); s)
+function sizehint!(s::BitSet, n::Integer; first::Bool=false, shrink::Bool=true)
+    sizehint!(s.bits, (n+63) >> 6; first, shrink)
+    s
+end
 
 function _bits_getindex(b::Bits, n::Int, offset::Int)
     ci = _div64(n) - offset + 1
@@ -125,7 +130,7 @@ end
 
 function union!(s::BitSet, r::AbstractUnitRange{<:Integer})
     isempty(r) && return s
-    a, b = _check_bitset_bounds(first(r)), _check_bitset_bounds(last(r))
+    a, b = Int(first(r)), Int(last(r))
     cidxa = _div64(a)
     cidxb = _div64(b)
     if s.offset == NO_OFFSET
@@ -137,20 +142,10 @@ function union!(s::BitSet, r::AbstractUnitRange{<:Integer})
 
     # grow s.bits as necessary
     if diffb >= len
-        _growend!(s.bits, diffb - len + 1)
-        # we set only some values to CHK0, those which will not be
-        # fully overwritten (i.e. only or'ed with `|`)
-        s.bits[end] = CHK0 # end == diffb + 1
-        if diffa >= len
-            s.bits[diffa + 1] = CHK0
-        end
+        _growend0!(s.bits, diffb - len + 1)
     end
     if diffa < 0
-        _growbeg!(s.bits, -diffa)
-        s.bits[1] = CHK0
-        if diffb < 0
-            s.bits[diffb - diffa + 1] = CHK0
-        end
+        _growbeg0!(s.bits, -diffa)
         s.offset = cidxa # s.offset += diffa
         diffb -= diffa
         diffa = 0
@@ -257,20 +252,7 @@ function _matched_map!(f, a1::Bits, b1::Int, a2::Bits, b2::Int,
     b1 # the new offset
 end
 
-
-@noinline _throw_bitset_bounds_err() =
-    throw(ArgumentError("elements of BitSet must be between typemin(Int) and typemax(Int)"))
-
-@inline _is_convertible_Int(n) = typemin(Int) <= n <= typemax(Int)
-
-@inline _check_bitset_bounds(n) =
-    _is_convertible_Int(n) ? Int(n) : _throw_bitset_bounds_err()
-
-@inline _check_bitset_bounds(n::Int) = n
-
-@noinline _throw_keyerror(n) = throw(KeyError(n))
-
-@inline push!(s::BitSet, n::Integer) = _setint!(s, _check_bitset_bounds(n), true)
+@inline push!(s::BitSet, n::Integer) = _setint!(s, Int(n), true)
 
 push!(s::BitSet, ns::Integer...) = (for n in ns; push!(s, n); end; s)
 
@@ -281,7 +263,7 @@ push!(s::BitSet, ns::Integer...) = (for n in ns; push!(s, n); end; s)
         delete!(s, n)
         n
     else
-        _throw_keyerror(n)
+        throw(KeyError(n))
     end
 end
 
@@ -294,6 +276,7 @@ end
     end
 end
 
+@inline _is_convertible_Int(n) = typemin(Int) <= n <= typemax(Int)
 @inline delete!(s::BitSet, n::Int) = _setint!(s, n, false)
 @inline delete!(s::BitSet, n::Integer) = _is_convertible_Int(n) ? delete!(s, Int(n)) : s
 
@@ -326,8 +309,15 @@ function symdiff!(s::BitSet, ns)
     return s
 end
 
+function symdiff!(s::BitSet, ns::AbstractSet)
+    for x in ns
+        int_symdiff!(s, x)
+    end
+    return s
+end
+
 function int_symdiff!(s::BitSet, n::Integer)
-    n0 = _check_bitset_bounds(n)
+    n0 = Int(n)
     val = !(n0 in s)
     _setint!(s, n0, val)
     s
@@ -408,7 +398,7 @@ function ==(s1::BitSet, s2::BitSet)
     if overlap > 0
         t1 = @_gc_preserve_begin a1
         t2 = @_gc_preserve_begin a2
-        _memcmp(pointer(a1, b2-b1+1), pointer(a2), overlap<<3) == 0 || return false
+        memcmp(pointer(a1, b2-b1+1), pointer(a2), overlap<<3) == 0 || return false
         @_gc_preserve_end t2
         @_gc_preserve_end t1
     end
diff --git a/base/bool.jl b/base/bool.jl
index 7648df3e0250e..d7dcf76caa91b 100644
--- a/base/bool.jl
+++ b/base/bool.jl
@@ -112,7 +112,8 @@ nand(x...) = ~(&)(x...)
 
 Bitwise nor (not or) of `x` and `y`. Implements
 [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
-returning [`missing`](@ref) if one of the arguments is `missing`.
+returning [`missing`](@ref) if one of the arguments is `missing` and the
+other is not `true`.
 
 The infix operation `a ⊽ b` is a synonym for `nor(a,b)`, and
 `⊽` can be typed by tab-completing `\\nor` or `\\barvee` in the Julia REPL.
@@ -131,6 +132,9 @@ false
 julia> false ⊽ false
 true
 
+julia> false ⊽ missing
+missing
+
 julia> [true; true; false] .⊽ [true; false; false]
 3-element BitVector:
  0
diff --git a/base/boot.jl b/base/boot.jl
index d152f6b62acaf..13bb6bcd7cd4b 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -52,8 +52,26 @@
 #abstract type AbstractArray{T,N} end
 #abstract type DenseArray{T,N} <: AbstractArray{T,N} end
 
+#primitive type AddrSpace{Backend::Module} 8 end
+#const CPU = bitcast(AddrSpace{Core}, 0x00)
+
+#struct GenericMemory{kind::Symbol, T, AS::AddrSpace}
+#   length::Int
+#   const data::Ptr{Cvoid} # make this GenericPtr{addrspace, Cvoid}
+#   Union{ # hidden data
+#       elements :: NTuple{length, T}
+#       owner :: Any
+#   }
+#end
+
+# struct GenericMemoryRef{kind::Symbol, T, AS::AddrSpace}
+#    mem::Memory{kind, T, AS}
+#    data::Ptr{Cvoid} # make this GenericPtr{addrspace, Cvoid}
+#end
+
 #mutable struct Array{T,N} <: DenseArray{T,N}
-## opaque
+#  ref::MemoryRef{T}
+#  size::NTuple{N,Int}
 #end
 
 #mutable struct Module
@@ -109,7 +127,7 @@
 
 #struct LineInfoNode
 #    module::Module
-#    method::Symbol
+#    method::Any (Union{Symbol, Method, MethodInstance})
 #    file::Symbol
 #    line::Int32
 #    inlined_at::Int32
@@ -163,7 +181,7 @@
 #    result::Any
 #    exception::Any
 #    backtrace::Any
-#    logstate::Any
+#    scope::Any
 #    code::Any
 #end
 
@@ -173,8 +191,8 @@ export
     Tuple, Type, UnionAll, TypeVar, Union, Nothing, Cvoid,
     AbstractArray, DenseArray, NamedTuple, Pair,
     # special objects
-    Function, Method,
-    Module, Symbol, Task, Array, UndefInitializer, undef, WeakRef, VecElement,
+    Function, Method, Array, Memory, MemoryRef, GenericMemory, GenericMemoryRef,
+    Module, Symbol, Task, UndefInitializer, undef, WeakRef, VecElement,
     # numeric types
     Number, Real, Integer, Bool, Ref, Ptr,
     AbstractFloat, Float16, Float32, Float64,
@@ -217,6 +235,8 @@ primitive type Float16 <: AbstractFloat 16 end
 primitive type Float32 <: AbstractFloat 32 end
 primitive type Float64 <: AbstractFloat 64 end
 
+primitive type BFloat16 <: AbstractFloat 16 end
+
 #primitive type Bool <: Integer 8 end
 abstract type AbstractChar end
 primitive type Char <: AbstractChar 32 end
@@ -245,33 +265,62 @@ ccall(:jl_toplevel_eval_in, Any, (Any, Any),
       (f::typeof(Typeof))(x) = ($(_expr(:meta,:nospecialize,:x)); isa(x,Type) ? Type{x} : typeof(x))
       end)
 
-
 macro nospecialize(x)
     _expr(:meta, :nospecialize, x)
 end
+Expr(@nospecialize args...) = _expr(args...)
 
-TypeVar(n::Symbol) = _typevar(n, Union{}, Any)
-TypeVar(n::Symbol, @nospecialize(ub)) = _typevar(n, Union{}, ub)
-TypeVar(n::Symbol, @nospecialize(lb), @nospecialize(ub)) = _typevar(n, lb, ub)
+_is_internal(__module__) = __module__ === Core
+# can be used in place of `@assume_effects :foldable` (supposed to be used for bootstrapping)
+macro _foldable_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#true,
+        #=:effect_free=#true,
+        #=:nothrow=#false,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#false,
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#true,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false))
+end
 
-UnionAll(v::TypeVar, @nospecialize(t)) = ccall(:jl_type_unionall, Any, (Any, Any), v, t)
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
 
-const Vararg = ccall(:jl_toplevel_eval_in, Any, (Any, Any), Core, _expr(:new, TypeofVararg))
+macro _boundscheck() Expr(:boundscheck) end
 
-# let the compiler assume that calling Union{} as a constructor does not need
-# to be considered ever (which comes up often as Type{<:T})
-Union{}(a...) = throw(MethodError(Union{}, a))
+# n.b. the effects and model of these is refined in inference abstractinterpretation.jl
+TypeVar(@nospecialize(n)) = _typevar(n::Symbol, Union{}, Any)
+TypeVar(@nospecialize(n), @nospecialize(ub)) = _typevar(n::Symbol, Union{}, ub)
+TypeVar(@nospecialize(n), @nospecialize(lb), @nospecialize(ub)) = _typevar(n::Symbol, lb, ub)
+UnionAll(@nospecialize(v), @nospecialize(t)) = ccall(:jl_type_unionall, Any, (Any, Any), v::TypeVar, t)
 
-Expr(@nospecialize args...) = _expr(args...)
+# simple convert for use by constructors of types in Core
+# note that there is no actual conversion defined here,
+# so the methods and ccall's in Core aren't permitted to use convert
+convert(::Type{Any}, @nospecialize(x)) = x
+convert(::Type{T}, x::T) where {T} = x
+cconvert(::Type{T}, x) where {T} = convert(T, x)
+unsafe_convert(::Type{T}, x::T) where {T} = x
+
+# dispatch token indicating a kwarg (keyword sorter) call
+function kwcall end
+# deprecated internal functions:
+kwfunc(@nospecialize(f)) = kwcall
+kwftype(@nospecialize(t)) = typeof(kwcall)
+
+# Let the compiler assume that calling Union{} as a constructor does not need
+# to be considered ever (which comes up often as Type{<:T} inference, and
+# occasionally in user code from eltype).
+Union{}(a...) = throw(ArgumentError("cannot construct a value of type Union{} for return result"))
+kwcall(kwargs, ::Type{Union{}}, a...) = Union{}(a...)
 
 abstract type Exception end
 struct ErrorException <: Exception
     msg::AbstractString
 end
 
-macro inline()   Expr(:meta, :inline)   end
-macro noinline() Expr(:meta, :noinline) end
-
 struct BoundsError <: Exception
     a::Any
     i::Any
@@ -287,10 +336,16 @@ struct StackOverflowError  <: Exception end
 struct UndefRefError       <: Exception end
 struct UndefVarError <: Exception
     var::Symbol
+    scope # a Module or Symbol or other object describing the context where this variable was looked for (e.g. Main or :local or :static_parameter)
+    UndefVarError(var::Symbol) = new(var)
+    UndefVarError(var::Symbol, @nospecialize scope) = new(var, scope)
 end
 struct ConcurrencyViolationError <: Exception
     msg::AbstractString
 end
+struct MissingCodeError <: Exception
+    mi::MethodInstance
+end
 struct InterruptException <: Exception end
 struct DomainError <: Exception
     val
@@ -315,9 +370,8 @@ TypeError(where, @nospecialize(expected::Type), @nospecialize(got)) =
     TypeError(Symbol(where), "", expected, got)
 struct InexactError <: Exception
     func::Symbol
-    T  # Type
-    val
-    InexactError(f::Symbol, @nospecialize(T), @nospecialize(val)) = (@noinline; new(f, T, val))
+    args
+    InexactError(f::Symbol, @nospecialize(args...)) = (@noinline; new(f, args))
 end
 struct OverflowError <: Exception
     msg::AbstractString
@@ -330,13 +384,15 @@ struct UndefKeywordError <: Exception
     var::Symbol
 end
 
+const typemax_UInt = Intrinsics.sext_int(UInt, 0xFF)
+const typemax_Int = Core.Intrinsics.udiv_int(Core.Intrinsics.sext_int(Int, 0xFF), 2)
+
 struct MethodError <: Exception
     f
     args
     world::UInt
     MethodError(@nospecialize(f), @nospecialize(args), world::UInt) = new(f, args, world)
 end
-const typemax_UInt = ccall(:jl_typemax_uint, Any, (Any,), UInt)
 MethodError(@nospecialize(f), @nospecialize(args)) = MethodError(f, args, typemax_UInt)
 
 struct AssertionError <: Exception
@@ -357,6 +413,8 @@ struct InitError <: WrappedException
     error
 end
 
+struct PrecompilableError <: Exception end
+
 String(s::String) = s  # no constructor yet
 
 const Cvoid = Nothing
@@ -369,10 +427,6 @@ include(m::Module, fname::String) = ccall(:jl_load_, Any, (Any, Any), m, fname)
 
 eval(m::Module, @nospecialize(e)) = ccall(:jl_toplevel_eval_in, Any, (Any, Any), m, e)
 
-kwfunc(@nospecialize(f)) = ccall(:jl_get_keyword_sorter, Any, (Any,), f)
-
-kwftype(@nospecialize(t)) = typeof(ccall(:jl_get_kwsorter, Any, (Any,), t))
-
 mutable struct Box
     contents::Any
     Box(@nospecialize(x)) = new(x)
@@ -405,6 +459,8 @@ eval(Core, quote
     ReturnNode(@nospecialize val) = $(Expr(:new, :ReturnNode, :val))
     ReturnNode() = $(Expr(:new, :ReturnNode)) # unassigned val indicates unreachable
     GotoIfNot(@nospecialize(cond), dest::Int) = $(Expr(:new, :GotoIfNot, :cond, :dest))
+    EnterNode(dest::Int) = $(Expr(:new, :EnterNode, :dest))
+    EnterNode(dest::Int, @nospecialize(scope)) = $(Expr(:new, :EnterNode, :dest, :scope))
     LineNumberNode(l::Int) = $(Expr(:new, :LineNumberNode, :l, nothing))
     function LineNumberNode(l::Int, @nospecialize(f))
         isa(f, String) && (f = Symbol(f))
@@ -412,115 +468,180 @@ eval(Core, quote
     end
     LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) =
         $(Expr(:new, :LineInfoNode, :mod, :method, :file, :line, :inlined_at))
-    GlobalRef(m::Module, s::Symbol) = $(Expr(:new, :GlobalRef, :m, :s))
     SlotNumber(n::Int) = $(Expr(:new, :SlotNumber, :n))
-    TypedSlot(n::Int, @nospecialize(t)) = $(Expr(:new, :TypedSlot, :n, :t))
     PhiNode(edges::Array{Int32, 1}, values::Array{Any, 1}) = $(Expr(:new, :PhiNode, :edges, :values))
     PiNode(@nospecialize(val), @nospecialize(typ)) = $(Expr(:new, :PiNode, :val, :typ))
     PhiCNode(values::Array{Any, 1}) = $(Expr(:new, :PhiCNode, :values))
     UpsilonNode(@nospecialize(val)) = $(Expr(:new, :UpsilonNode, :val))
     UpsilonNode() = $(Expr(:new, :UpsilonNode))
-    function CodeInstance(
-        mi::MethodInstance, @nospecialize(rettype), @nospecialize(inferred_const),
-        @nospecialize(inferred), const_flags::Int32, min_world::UInt, max_world::UInt,
-        ipo_effects::UInt32, effects::UInt32, @nospecialize(argescapes#=::Union{Nothing,Vector{ArgEscapeInfo}}=#),
-        relocatability::UInt8)
-        return ccall(:jl_new_codeinst, Ref{CodeInstance},
-            (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, UInt32, Any, UInt8),
-            mi, rettype, inferred_const, inferred, const_flags, min_world, max_world,
-            ipo_effects, effects, argescapes,
-            relocatability)
-    end
     Const(@nospecialize(v)) = $(Expr(:new, :Const, :v))
     # NOTE the main constructor is defined within `Core.Compiler`
-    _PartialStruct(typ::DataType, fields::Array{Any, 1}) = $(Expr(:new, :PartialStruct, :typ, :fields))
-    PartialOpaque(@nospecialize(typ), @nospecialize(env), parent::MethodInstance, source::Method) = $(Expr(:new, :PartialOpaque, :typ, :env, :parent, :source))
+    _PartialStruct(@nospecialize(typ), fields::Array{Any, 1}) = $(Expr(:new, :PartialStruct, :typ, :fields))
+    PartialOpaque(@nospecialize(typ), @nospecialize(env), parent::MethodInstance, source) = $(Expr(:new, :PartialOpaque, :typ, :env, :parent, :source))
+    InterConditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype)) = $(Expr(:new, :InterConditional, :slot, :thentype, :elsetype))
     MethodMatch(@nospecialize(spec_types), sparams::SimpleVector, method::Method, fully_covers::Bool) = $(Expr(:new, :MethodMatch, :spec_types, :sparams, :method, :fully_covers))
 end)
 
+function CodeInstance(
+    mi::MethodInstance, @nospecialize(rettype), @nospecialize(exctype), @nospecialize(inferred_const),
+    @nospecialize(inferred), const_flags::Int32, min_world::UInt, max_world::UInt,
+    ipo_effects::UInt32, effects::UInt32, @nospecialize(analysis_results),
+    relocatability::UInt8)
+    return ccall(:jl_new_codeinst, Ref{CodeInstance},
+        (Any, Any, Any, Any, Any, Int32, UInt, UInt, UInt32, UInt32, Any, UInt8),
+        mi, rettype, exctype, inferred_const, inferred, const_flags, min_world, max_world,
+        ipo_effects, effects, analysis_results,
+        relocatability)
+end
+GlobalRef(m::Module, s::Symbol) = ccall(:jl_module_globalref, Ref{GlobalRef}, (Any, Any), m, s)
 Module(name::Symbol=:anonymous, std_imports::Bool=true, default_names::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool, Bool), name, std_imports, default_names)
 
 function _Task(@nospecialize(f), reserved_stack::Int, completion_future)
     return ccall(:jl_new_task, Ref{Task}, (Any, Any, Int), f, completion_future, reserved_stack)
 end
 
-# simple convert for use by constructors of types in Core
-# note that there is no actual conversion defined here,
-# so the methods and ccall's in Core aren't permitted to use convert
-convert(::Type{Any}, @nospecialize(x)) = x
-convert(::Type{T}, x::T) where {T} = x
-cconvert(::Type{T}, x) where {T} = convert(T, x)
-unsafe_convert(::Type{T}, x::T) where {T} = x
-
 const NTuple{N,T} = Tuple{Vararg{T,N}}
 
 ## primitive Array constructors
 struct UndefInitializer end
 const undef = UndefInitializer()
+
+# type and dimensionality specified
+(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, m::Int) where {T,addrspace,kind} =
+    if isdefined(self, :instance) && m === 0
+        self.instance
+    else
+        ccall(:jl_alloc_genericmemory, Ref{GenericMemory{kind,T,addrspace}}, (Any, Int), self, m)
+    end
+(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, d::NTuple{1,Int}) where {T,kind,addrspace} = self(undef, getfield(d,1))
+# empty vector constructor
+(self::Type{GenericMemory{kind,T,addrspace}})() where {T,kind,addrspace} = self(undef, 0)
+# copy constructors
+
+const Memory{T} = GenericMemory{:not_atomic, T, CPU}
+const MemoryRef{T} = GenericMemoryRef{:not_atomic, T, CPU}
+GenericMemoryRef(mem::GenericMemory) = memoryref(mem)
+GenericMemoryRef(ref::GenericMemoryRef, i::Integer) = memoryref(ref, Int(i), @_boundscheck)
+GenericMemoryRef(mem::GenericMemory, i::Integer) = memoryref(memoryref(mem), Int(i), @_boundscheck)
+MemoryRef(mem::Memory) = memoryref(mem)
+MemoryRef(ref::MemoryRef, i::Integer) = memoryref(ref, Int(i), @_boundscheck)
+MemoryRef(mem::Memory, i::Integer) = memoryref(memoryref(mem), Int(i), @_boundscheck)
+MemoryRef{T}(mem::Memory{T}) where {T} = memoryref(mem)
+MemoryRef{T}(ref::MemoryRef{T}, i::Integer) where {T} = memoryref(ref, Int(i), @_boundscheck)
+MemoryRef{T}(mem::Memory{T}, i::Integer) where {T} = memoryref(memoryref(mem), Int(i), @_boundscheck)
+
+# construction helpers for Array
+new_as_memoryref(self::Type{GenericMemoryRef{isatomic,T,addrspace}}, m::Int) where {T,isatomic,addrspace} = memoryref(fieldtype(self, :mem)(undef, m))
+
+# checked-multiply intrinsic function for dimensions
+_checked_mul_dims() = 1, false
+_checked_mul_dims(m::Int) = m, Intrinsics.ule_int(typemax_Int, m) # equivalently: (m + 1) < 1
+function _checked_mul_dims(m::Int, n::Int)
+    b = Intrinsics.checked_smul_int(m, n)
+    a = getfield(b, 1)
+    ovflw = getfield(b, 2)
+    ovflw = Intrinsics.or_int(ovflw, Intrinsics.ule_int(typemax_Int, m))
+    ovflw = Intrinsics.or_int(ovflw, Intrinsics.ule_int(typemax_Int, n))
+    return a, ovflw
+end
+function _checked_mul_dims(m::Int, d::Int...)
+   @_foldable_meta # the compiler needs to know this loop terminates
+   a = m
+   i = 1
+   ovflw = false
+   while Intrinsics.sle_int(i, nfields(d))
+     di = getfield(d, i)
+     b = Intrinsics.checked_smul_int(a, di)
+     ovflw = Intrinsics.or_int(ovflw, getfield(b, 2))
+     ovflw = Intrinsics.or_int(ovflw, Intrinsics.ule_int(typemax_Int, di))
+     a = getfield(b, 1)
+     i = Intrinsics.add_int(i, 1)
+   end
+   return a, ovflw
+end
+
+# convert a set of dims to a length, with overflow checking
+checked_dims() = 1
+checked_dims(m::Int) = m # defer this check to Memory constructor instead
+function checked_dims(d::Int...)
+    b = _checked_mul_dims(d...)
+    getfield(b, 2) && throw(ArgumentError("invalid Array dimensions"))
+    return getfield(b, 1)
+end
+
 # type and dimensionality specified, accepting dims as series of Ints
-Array{T,1}(::UndefInitializer, m::Int) where {T} =
-    ccall(:jl_alloc_array_1d, Array{T,1}, (Any, Int), Array{T,1}, m)
-Array{T,2}(::UndefInitializer, m::Int, n::Int) where {T} =
-    ccall(:jl_alloc_array_2d, Array{T,2}, (Any, Int, Int), Array{T,2}, m, n)
-Array{T,3}(::UndefInitializer, m::Int, n::Int, o::Int) where {T} =
-    ccall(:jl_alloc_array_3d, Array{T,3}, (Any, Int, Int, Int), Array{T,3}, m, n, o)
-Array{T,N}(::UndefInitializer, d::Vararg{Int,N}) where {T,N} =
-    ccall(:jl_new_array, Array{T,N}, (Any, Any), Array{T,N}, d)
+eval(Core, :(function (self::Type{Array{T,1}})(::UndefInitializer, m::Int) where {T}
+    mem = fieldtype(fieldtype(self, :ref), :mem)(undef, m)
+    return $(Expr(:new, :self, :(memoryref(mem)), :((m,))))
+end))
+eval(Core, :(function (self::Type{Array{T,2}})(::UndefInitializer, m::Int, n::Int) where {T}
+    return $(Expr(:new, :self, :(new_as_memoryref(fieldtype(self, :ref), checked_dims(m, n))), :((m, n))))
+end))
+eval(Core, :(function (self::Type{Array{T,3}})(::UndefInitializer, m::Int, n::Int, o::Int) where {T}
+    return $(Expr(:new, :self, :(new_as_memoryref(fieldtype(self, :ref), checked_dims(m, n, o))), :((m, n, o))))
+end))
+eval(Core, :(function (self::Type{Array{T, N}})(::UndefInitializer, d::Vararg{Int, N}) where {T, N}
+    return $(Expr(:new, :self, :(new_as_memoryref(fieldtype(self, :ref), checked_dims(d...))), :d))
+end))
 # type and dimensionality specified, accepting dims as tuples of Ints
-Array{T,1}(::UndefInitializer, d::NTuple{1,Int}) where {T} = Array{T,1}(undef, getfield(d,1))
-Array{T,2}(::UndefInitializer, d::NTuple{2,Int}) where {T} = Array{T,2}(undef, getfield(d,1), getfield(d,2))
-Array{T,3}(::UndefInitializer, d::NTuple{3,Int}) where {T} = Array{T,3}(undef, getfield(d,1), getfield(d,2), getfield(d,3))
-Array{T,N}(::UndefInitializer, d::NTuple{N,Int}) where {T,N} = ccall(:jl_new_array, Array{T,N}, (Any, Any), Array{T,N}, d)
+(self::Type{Array{T,1}})(::UndefInitializer, d::NTuple{1, Int}) where {T} = self(undef, getfield(d, 1))
+(self::Type{Array{T,2}})(::UndefInitializer, d::NTuple{2, Int}) where {T} = self(undef, getfield(d, 1), getfield(d, 2))
+(self::Type{Array{T,3}})(::UndefInitializer, d::NTuple{3, Int}) where {T} = self(undef, getfield(d, 1), getfield(d, 2), getfield(d, 3))
+(self::Type{Array{T,N}})(::UndefInitializer, d::NTuple{N, Int}) where {T, N} = self(undef, d...)
 # type but not dimensionality specified
-Array{T}(::UndefInitializer, m::Int) where {T} = Array{T,1}(undef, m)
-Array{T}(::UndefInitializer, m::Int, n::Int) where {T} = Array{T,2}(undef, m, n)
-Array{T}(::UndefInitializer, m::Int, n::Int, o::Int) where {T} = Array{T,3}(undef, m, n, o)
-Array{T}(::UndefInitializer, d::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, d)
+Array{T}(::UndefInitializer, m::Int) where {T} = Array{T, 1}(undef, m)
+Array{T}(::UndefInitializer, m::Int, n::Int) where {T} = Array{T, 2}(undef, m, n)
+Array{T}(::UndefInitializer, m::Int, n::Int, o::Int) where {T} = Array{T, 3}(undef, m, n, o)
+Array{T}(::UndefInitializer, d::NTuple{N, Int}) where {T, N} = Array{T, N}(undef, d)
 # empty vector constructor
-Array{T,1}() where {T} = Array{T,1}(undef, 0)
-
+(self::Type{Array{T, 1}})() where {T} = self(undef, 0)
 
-(Array{T,N} where T)(x::AbstractArray{S,N}) where {S,N} = Array{S,N}(x)
+(Array{T, N} where T)(x::AbstractArray{S, N}) where {S, N} = Array{S, N}(x)
 
-Array(A::AbstractArray{T,N})    where {T,N}   = Array{T,N}(A)
-Array{T}(A::AbstractArray{S,N}) where {T,N,S} = Array{T,N}(A)
+Array(A::AbstractArray{T, N})    where {T, N}   = Array{T, N}(A)
+Array{T}(A::AbstractArray{S, N}) where {T, N, S} = Array{T, N}(A)
 
-AbstractArray{T}(A::AbstractArray{S,N}) where {T,S,N} = AbstractArray{T,N}(A)
+AbstractArray{T}(A::AbstractArray{S, N}) where {T, S, N} = AbstractArray{T, N}(A)
 
 # primitive Symbol constructors
-eval(Core, :(function Symbol(s::String)
-    $(Expr(:meta, :pure))
-    return ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int),
-                 ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s),
-                 sizeof(s))
-end))
-function Symbol(a::Array{UInt8,1})
-    return ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int),
-                 ccall(:jl_array_ptr, Ptr{UInt8}, (Any,), a),
-                 Intrinsics.arraylen(a))
+
+## Helper for proper GC rooting without unsafe_convert
+eval(Core, quote
+    _Symbol(ptr::Ptr{UInt8}, sz::Int, root::Any) = $(Expr(:foreigncall, QuoteNode(:jl_symbol_n),
+        Ref{Symbol}, svec(Ptr{UInt8}, Int), 0, QuoteNode(:ccall), :ptr, :sz, :root))
+end)
+
+function Symbol(s::String)
+    @_foldable_meta
+    @noinline
+    return _Symbol(ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s), sizeof(s), s)
+end
+function Symbol(a::Array{UInt8, 1})
+    @noinline
+    return _Symbol(bitcast(Ptr{UInt8}, a.ref.ptr_or_offset), getfield(a.size, 1), a.ref.mem)
 end
 Symbol(s::Symbol) = s
 
 # module providing the IR object model
 module IR
+
 export CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
-    NewvarNode, SSAValue, Slot, SlotNumber, TypedSlot, Argument,
+    NewvarNode, SSAValue, SlotNumber, Argument,
     PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
-    Const, PartialStruct
+    Const, PartialStruct, InterConditional, EnterNode
 
-import Core: CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
-    NewvarNode, SSAValue, Slot, SlotNumber, TypedSlot, Argument,
+using Core: CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
+    NewvarNode, SSAValue, SlotNumber, Argument,
     PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
-    Const, PartialStruct
+    Const, PartialStruct, InterConditional, EnterNode
 
-end
+end # module IR
 
 # docsystem basics
-const unescape = Symbol("hygienic-scope")
 macro doc(x...)
     docex = atdoc(__source__, __module__, x...)
     isa(docex, Expr) && docex.head === :escape && return docex
-    return Expr(:escape, Expr(unescape, docex, typeof(atdoc).name.module))
+    return Expr(:escape, Expr(:var"hygienic-scope", docex, typeof(atdoc).name.module, __source__))
 end
 macro __doc__(x)
     return Expr(:escape, Expr(:block, Expr(:meta, :doc), x))
@@ -572,28 +693,25 @@ println(@nospecialize a...) = println(stdout, a...)
 
 struct GeneratedFunctionStub
     gen
-    argnames::Array{Any,1}
-    spnames::Union{Nothing, Array{Any,1}}
-    line::Int
-    file::Symbol
-    expand_early::Bool
+    argnames::SimpleVector
+    spnames::SimpleVector
 end
 
-# invoke and wrap the results of @generated
-function (g::GeneratedFunctionStub)(@nospecialize args...)
+# invoke and wrap the results of @generated expression
+function (g::GeneratedFunctionStub)(world::UInt, source::LineNumberNode, @nospecialize args...)
+    # args is (spvals..., argtypes...)
     body = g.gen(args...)
-    if body isa CodeInfo
-        return body
-    end
-    lam = Expr(:lambda, g.argnames,
-               Expr(Symbol("scope-block"),
+    file = source.file
+    file isa Symbol || (file = :none)
+    lam = Expr(:lambda, Expr(:argnames, g.argnames...).args,
+               Expr(:var"scope-block",
                     Expr(:block,
-                         LineNumberNode(g.line, g.file),
-                         Expr(:meta, :push_loc, g.file, Symbol("@generated body")),
+                         source,
+                         Expr(:meta, :push_loc, file, :var"@generated body"),
                          Expr(:return, body),
                          Expr(:meta, :pop_loc))))
     spnames = g.spnames
-    if spnames === nothing
+    if spnames === svec()
         return lam
     else
         return Expr(Symbol("with-static-parameters"), lam, spnames...)
@@ -602,7 +720,8 @@ end
 
 NamedTuple() = NamedTuple{(),Tuple{}}(())
 
-NamedTuple{names}(args::Tuple) where {names} = NamedTuple{names,typeof(args)}(args)
+eval(Core, :(NamedTuple{names}(args::Tuple) where {names} =
+             $(Expr(:splatnew, :(NamedTuple{names,typeof(args)}), :args))))
 
 using .Intrinsics: sle_int, add_int
 
@@ -613,8 +732,6 @@ eval(Core, :(NamedTuple{names,T}(args::T) where {names, T <: Tuple} =
 
 import .Intrinsics: eq_int, trunc_int, lshr_int, sub_int, shl_int, bitcast, sext_int, zext_int, and_int
 
-throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = (@noinline; throw(InexactError(f, T, val)))
-
 function is_top_bit_set(x)
     @inline
     eq_int(trunc_int(UInt8, lshr_int(x, sub_int(shl_int(sizeof(x), 3), 1))), trunc_int(UInt8, 1))
@@ -625,6 +742,9 @@ function is_top_bit_set(x::Union{Int8,UInt8})
     eq_int(lshr_int(x, 7), trunc_int(typeof(x), 1))
 end
 
+#TODO delete this function (but see #48097):
+throw_inexacterror(args...) = throw(InexactError(args...))
+
 function check_top_bit(::Type{To}, x) where {To}
     @inline
     is_top_bit_set(x) && throw_inexacterror(:check_top_bit, To, x)
@@ -779,8 +899,8 @@ if Int === Int32
 Int64(x::Ptr) = Int64(UInt32(x))
 UInt64(x::Ptr) = UInt64(UInt32(x))
 end
-Ptr{T}(x::Union{Int,UInt,Ptr}) where {T} = bitcast(Ptr{T}, x)
-Ptr{T}() where {T} = Ptr{T}(0)
+(PT::Type{Ptr{T}} where T)(x::Union{Int,UInt,Ptr}=0) = bitcast(PT, x)
+(AS::Type{AddrSpace{Backend}} where Backend)(x::UInt8) = bitcast(AS, x)
 
 Signed(x::UInt8)    = Int8(x)
 Unsigned(x::Int8)   = UInt8(x)
@@ -811,9 +931,11 @@ Integer(x::Union{Float16, Float32, Float64}) = Int(x)
 # `_parse` must return an `svec` containing an `Expr` and the new offset as an
 # `Int`.
 #
-# The internal jl_parse which will call into Core._parse if not `nothing`.
+# The internal jl_parse will call into Core._parse if not `nothing`.
 _parse = nothing
 
+_setparser!(parser) = setglobal!(Core, :_parse, parser)
+
 # support for deprecated uses of internal _apply function
 _apply(x...) = Core._apply_iterate(Main.Base.iterate, x...)
 
@@ -825,8 +947,29 @@ struct Pair{A, B}
     # but also mark the whole function with `@inline` to ensure we will inline it whenever possible
     # (even if `convert(::Type{A}, a::A)` for some reason was expensive)
     Pair(a, b) = new{typeof(a), typeof(b)}(a, b)
-    Pair{A, B}(a::A, b::B) where {A, B} = new(a, b)
-    Pair{Any, Any}(@nospecialize(a::Any), @nospecialize(b::Any)) = new(a, b)
+    function Pair{A, B}(@nospecialize(a), @nospecialize(b)) where {A, B}
+        @inline
+        return new(a::A, b::B)
+    end
+end
+
+function _hasmethod(@nospecialize(tt)) # this function has a special tfunc
+    world = ccall(:jl_get_tls_world_age, UInt, ())
+    return Intrinsics.not_int(ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), tt, nothing, world) === nothing)
 end
 
+# for backward compat
+arrayref(inbounds::Bool, A::Array, i::Int...) = Main.Base.getindex(A, i...)
+const_arrayref(inbounds::Bool, A::Array, i::Int...) = Main.Base.getindex(A, i...)
+arrayset(inbounds::Bool, A::Array{T}, x::Any, i::Int...) where {T} = Main.Base.setindex!(A, x::T, i...)
+arraysize(a::Array) = a.size
+arraysize(a::Array, i::Int) = sle_int(i, nfields(a.size)) ? getfield(a.size, i) : 1
+export arrayref, arrayset, arraysize, const_arrayref
+
+# For convenience
+EnterNode(old::EnterNode, new_dest::Int) = isdefined(old, :scope) ?
+    EnterNode(new_dest, old.scope) : EnterNode(new_dest)
+
+include(Core, "optimized_generics.jl")
+
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Core, true)
diff --git a/base/broadcast.jl b/base/broadcast.jl
index 1896e5edad105..d6e5513889cee 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -34,6 +34,9 @@ that you may be able to leverage; see the
 """
 abstract type BroadcastStyle end
 
+struct Unknown <: BroadcastStyle end
+BroadcastStyle(::Type{Union{}}, slurp...) = Unknown()  # ambiguity resolution
+
 """
 `Broadcast.Style{C}()` defines a [`BroadcastStyle`](@ref) signaling through the type
 parameter `C`. You can use this as an alternative to creating custom subtypes of `BroadcastStyle`,
@@ -45,9 +48,6 @@ struct Style{T} <: BroadcastStyle end
 
 BroadcastStyle(::Type{<:Tuple}) = Style{Tuple}()
 
-struct Unknown <: BroadcastStyle end
-BroadcastStyle(::Type{Union{}}) = Unknown()  # ambiguity resolution
-
 """
 `Broadcast.AbstractArrayStyle{N} <: BroadcastStyle` is the abstract supertype for any style
 associated with an `AbstractArray` type.
@@ -167,16 +167,28 @@ BroadcastStyle(a::AbstractArrayStyle{M}, ::DefaultArrayStyle{N}) where {M,N} =
 #    copyto!(dest::AbstractArray, bc::Broadcasted{MyStyle})
 
 struct Broadcasted{Style<:Union{Nothing,BroadcastStyle}, Axes, F, Args<:Tuple} <: Base.AbstractBroadcasted
+    style::Style
     f::F
     args::Args
     axes::Axes          # the axes of the resulting object (may be bigger than implied by `args` if this is nested inside a larger `Broadcasted`)
-end
 
-Broadcasted(f::F, args::Args, axes=nothing) where {F, Args<:Tuple} =
-    Broadcasted{typeof(combine_styles(args...))}(f, args, axes)
-function Broadcasted{Style}(f::F, args::Args, axes=nothing) where {Style, F, Args<:Tuple}
-    # using Core.Typeof rather than F preserves inferrability when f is a type
-    Broadcasted{Style, typeof(axes), Core.Typeof(f), Args}(f, args, axes)
+    Broadcasted(style::Union{Nothing,BroadcastStyle}, f::Tuple, args::Tuple) = error() # disambiguation: tuple is not callable
+    function Broadcasted(style::Union{Nothing,BroadcastStyle}, f::F, args::Tuple, axes=nothing) where {F}
+        # using Core.Typeof rather than F preserves inferrability when f is a type
+        return new{typeof(style), typeof(axes), Core.Typeof(f), typeof(args)}(style, f, args, axes)
+    end
+
+    function Broadcasted(f::F, args::Tuple, axes=nothing) where {F}
+        Broadcasted(combine_styles(args...)::BroadcastStyle, f, args, axes)
+    end
+
+    function Broadcasted{Style}(f::F, args, axes=nothing) where {Style, F}
+        return new{Style, typeof(axes), Core.Typeof(f), typeof(args)}(Style()::Style, f, args, axes)
+    end
+
+    function Broadcasted{Style,Axes,F,Args}(f, args, axes) where {Style,Axes,F,Args}
+        return new{Style, Axes, F, Args}(Style()::Style, f, args, axes)
+    end
 end
 
 struct AndAnd end
@@ -194,16 +206,16 @@ function broadcasted(::OrOr, a, bc::Broadcasted)
     broadcasted((a, args...) -> a || bcf.f(args...), a, bcf.args...)
 end
 
-Base.convert(::Type{Broadcasted{NewStyle}}, bc::Broadcasted{Style,Axes,F,Args}) where {NewStyle,Style,Axes,F,Args} =
-    Broadcasted{NewStyle,Axes,F,Args}(bc.f, bc.args, bc.axes)
+Base.convert(::Type{Broadcasted{NewStyle}}, bc::Broadcasted{<:Any,Axes,F,Args}) where {NewStyle,Axes,F,Args} =
+    Broadcasted{NewStyle,Axes,F,Args}(bc.f, bc.args, bc.axes)::Broadcasted{NewStyle,Axes,F,Args}
 
 function Base.show(io::IO, bc::Broadcasted{Style}) where {Style}
     print(io, Broadcasted)
     # Only show the style parameter if we have a set of axes — representing an instantiated
     # "outermost" Broadcasted. The styles of nested Broadcasteds represent an intermediate
     # computation that is not relevant for dispatch, confusing, and just extra line noise.
-    bc.axes isa Tuple && print(io, '{', Style, '}')
-    print(io, '(', bc.f, ", ", bc.args, ')')
+    bc.axes isa Tuple && print(io, "{", Style, "}")
+    print(io, "(", bc.f, ", ", bc.args, ")")
     nothing
 end
 
@@ -231,7 +243,7 @@ BroadcastStyle(::Type{<:Broadcasted{Style}}) where {Style} = Style()
 BroadcastStyle(::Type{<:Broadcasted{S}}) where {S<:Union{Nothing,Unknown}} =
     throw(ArgumentError("Broadcasted{Unknown} wrappers do not have a style assigned"))
 
-argtype(::Type{Broadcasted{Style,Axes,F,Args}}) where {Style,Axes,F,Args} = Args
+argtype(::Type{BC}) where {BC<:Broadcasted} = fieldtype(BC, :args)
 argtype(bc::Broadcasted) = argtype(typeof(bc))
 
 @inline Base.eachindex(bc::Broadcasted) = _eachindex(axes(bc))
@@ -262,19 +274,15 @@ Base.@propagate_inbounds function Base.iterate(bc::Broadcasted, s)
 end
 
 Base.IteratorSize(::Type{T}) where {T<:Broadcasted} = Base.HasShape{ndims(T)}()
-Base.ndims(BC::Type{<:Broadcasted{<:Any,Nothing}}) = _maxndims(fieldtype(BC, 2))
+Base.ndims(BC::Type{<:Broadcasted{<:Any,Nothing}}) = _maxndims(fieldtype(BC, :args))
 Base.ndims(::Type{<:Broadcasted{<:AbstractArrayStyle{N},Nothing}}) where {N<:Integer} = N
 
-_maxndims(T::Type{<:Tuple}) = reduce(max, (ntuple(n -> _ndims(fieldtype(T, n)), Base._counttuple(T))))
-_maxndims(::Type{<:Tuple{T}}) where {T} = ndims(T)
-_maxndims(::Type{<:Tuple{T}}) where {T<:Tuple} = _ndims(T)
+_maxndims(::Type{T}) where {T<:Tuple} = reduce(max, ntuple(n -> (F = fieldtype(T, n); F <: Tuple ? 1 : ndims(F)), Base._counttuple(T)))
+_maxndims(::Type{<:Tuple{T}}) where {T} = T <: Tuple ? 1 : ndims(T)
 function _maxndims(::Type{<:Tuple{T, S}}) where {T, S}
-    return T<:Tuple || S<:Tuple ? max(_ndims(T), _ndims(S)) : max(ndims(T), ndims(S))
+    return max(T <: Tuple ? 1 : ndims(T), S <: Tuple ? 1 : ndims(S))
 end
 
-_ndims(x) = ndims(x)
-_ndims(::Type{<:Tuple}) = 1
-
 Base.IteratorEltype(::Type{<:Broadcasted}) = Base.EltypeUnknown()
 
 ## Instantiation fills in the "missing" fields in Broadcasted.
@@ -289,14 +297,14 @@ Custom [`BroadcastStyle`](@ref)s may override this default in cases where it is
 to compute and verify the resulting `axes` on-demand, leaving the `axis` field
 of the `Broadcasted` object empty (populated with [`nothing`](@ref)).
 """
-@inline function instantiate(bc::Broadcasted{Style}) where {Style}
+@inline function instantiate(bc::Broadcasted)
     if bc.axes isa Nothing # Not done via dispatch to make it easier to extend instantiate(::Broadcasted{Style})
         axes = combine_axes(bc.args...)
     else
         axes = bc.axes
         check_broadcast_axes(axes, bc.args...)
     end
-    return Broadcasted{Style}(bc.f, bc.args, axes)
+    return Broadcasted(bc.style, bc.f, bc.args, axes)
 end
 instantiate(bc::Broadcasted{<:AbstractArrayStyle{0}}) = bc
 # Tuples don't need axes, but when they have axes (for .= assignment), we need to check them (#33020)
@@ -325,24 +333,20 @@ becomes
 This is an optional operation that may make custom implementation of broadcasting easier in
 some cases.
 """
-function flatten(bc::Broadcasted{Style}) where {Style}
+function flatten(bc::Broadcasted)
     isflat(bc) && return bc
     # concatenate the nested arguments into {a, b, c, d}
     args = cat_nested(bc)
-    # build a function `makeargs` that takes a "flat" argument list and
-    # and creates the appropriate input arguments for `f`, e.g.,
-    #          makeargs = (w, x, y, z) -> (w, g(x, y), z)
-    #
-    # `makeargs` is built recursively and looks a bit like this:
-    #     makeargs(w, x, y, z) = (w, makeargs1(x, y, z)...)
-    #                          = (w, g(x, y), makeargs2(z)...)
-    #                          = (w, g(x, y), z)
-    let makeargs = make_makeargs(()->(), bc.args), f = bc.f
-        newf = @inline function(args::Vararg{Any,N}) where N
-            f(makeargs(args...)...)
-        end
-        return Broadcasted{Style}(newf, args, bc.axes)
-    end
+    # build a tuple of functions `makeargs`. Its elements take
+    # the whole "flat" argument list and and generate the appropriate
+    # input arguments for the broadcasted function `f`, e.g.,
+    #          makeargs[1] = ((w, x, y, z)) -> w
+    #          makeargs[2] = ((w, x, y, z)) -> g(x, y)
+    #          makeargs[3] = ((w, x, y, z)) -> z
+    makeargs = make_makeargs(bc.args)
+    f = Base.maybeconstructor(bc.f)
+    newf = (args...) -> (@inline; f(prepare_args(makeargs, args)...))
+    return Broadcasted(bc.style, newf, args, bc.axes)
 end
 
 const NestedTuple = Tuple{<:Broadcasted,Vararg{Any}}
@@ -351,78 +355,47 @@ _isflat(args::NestedTuple) = false
 _isflat(args::Tuple) = _isflat(tail(args))
 _isflat(args::Tuple{}) = true
 
-cat_nested(t::Broadcasted, rest...) = (cat_nested(t.args...)..., cat_nested(rest...)...)
-cat_nested(t::Any, rest...) = (t, cat_nested(rest...)...)
-cat_nested() = ()
+cat_nested(bc::Broadcasted) = cat_nested_args(bc.args)
+cat_nested_args(::Tuple{}) = ()
+cat_nested_args(t::Tuple{Any}) = cat_nested(t[1])
+cat_nested_args(t::Tuple) = (cat_nested(t[1])..., cat_nested_args(tail(t))...)
+cat_nested(a) = (a,)
 
 """
-    make_makeargs(makeargs_tail::Function, t::Tuple) -> Function
+    make_makeargs(t::Tuple) -> Tuple{Vararg{Function}}
 
 Each element of `t` is one (consecutive) node in a broadcast tree.
-Ignoring `makeargs_tail` for the moment, the job of `make_makeargs` is
-to return a function that takes in flattened argument list and returns a
-tuple (each entry corresponding to an entry in `t`, having evaluated
-the corresponding element in the broadcast tree). As an additional
-complication, the passed in tuple may be longer than the number of leaves
-in the subtree described by `t`. The `makeargs_tail` function should
-be called on such additional arguments (but not the arguments consumed
-by `t`).
+The returned `Tuple` are functions which take in the (whole) flattened
+list and generate the inputs for the corresponding broadcasted function.
 """
-@inline make_makeargs(makeargs_tail, t::Tuple{}) = makeargs_tail
-@inline function make_makeargs(makeargs_tail, t::Tuple)
-    makeargs = make_makeargs(makeargs_tail, tail(t))
-    (head, tail...)->(head, makeargs(tail...)...)
-end
-function make_makeargs(makeargs_tail, t::Tuple{<:Broadcasted, Vararg{Any}})
-    bc = t[1]
-    # c.f. the same expression in the function on leaf nodes above. Here
-    # we recurse into siblings in the broadcast tree.
-    let makeargs_tail = make_makeargs(makeargs_tail, tail(t)),
-            # Here we recurse into children. It would be valid to pass in makeargs_tail
-            # here, and not use it below. However, in that case, our recursion is no
-            # longer purely structural because we're building up one argument (the closure)
-            # while destructuing another.
-            makeargs_head = make_makeargs((args...)->args, bc.args),
-            f = bc.f
-        # Create two functions, one that splits of the first length(bc.args)
-        # elements from the tuple and one that yields the remaining arguments.
-        # N.B. We can't call headargs on `args...` directly because
-        # args is flattened (i.e. our children have not been evaluated
-        # yet).
-        headargs, tailargs = make_headargs(bc.args), make_tailargs(bc.args)
-        return @inline function(args::Vararg{Any,N}) where N
-            args1 = makeargs_head(args...)
-            a, b = headargs(args1...), makeargs_tail(tailargs(args1...)...)
-            (f(a...), b...)
-        end
-    end
-end
-
-@inline function make_headargs(t::Tuple)
-    let headargs = make_headargs(tail(t))
-        return @inline function(head, tail::Vararg{Any,N}) where N
-            (head, headargs(tail...)...)
-        end
-    end
+make_makeargs(args::Tuple) = _make_makeargs(args, 1)[1]
+
+# We build `makeargs` by traversing the broadcast nodes recursively.
+# note: `n` indicates the flattened index of the next unused argument.
+@inline function _make_makeargs(args::Tuple, n::Int)
+    head, n = _make_makeargs1(args[1], n)
+    rest, n = _make_makeargs(tail(args), n)
+    (head, rest...), n
 end
-@inline function make_headargs(::Tuple{})
-    return @inline function(tail::Vararg{Any,N}) where N
-        ()
-    end
+_make_makeargs(::Tuple{}, n::Int) = (), n
+
+# A help struct to store the flattened index statically
+struct Pick{N} <: Function end
+(::Pick{N})(@nospecialize(args::Tuple)) where {N} = args[N]
+
+# For flat nodes, we just consume one argument (n += 1), and return the "Pick" function
+@inline _make_makeargs1(_, n::Int) = Pick{n}(), n + 1
+# For nested nodes, we form the `makeargs1` based on the child `makeargs` (n += length(cat_nested(bc)))
+@inline function _make_makeargs1(bc::Broadcasted, n::Int)
+    makeargs, n = _make_makeargs(bc.args, n)
+    f = Base.maybeconstructor(bc.f)
+    makeargs1 = (args::Tuple) -> (@inline; f(prepare_args(makeargs, args)...))
+    makeargs1, n
 end
 
-@inline function make_tailargs(t::Tuple)
-    let tailargs = make_tailargs(tail(t))
-        return @inline function(head, tail::Vararg{Any,N}) where N
-            tailargs(tail...)
-        end
-    end
-end
-@inline function make_tailargs(::Tuple{})
-    return @inline function(tail::Vararg{Any,N}) where N
-        tail
-    end
-end
+@inline prepare_args(makeargs::Tuple, @nospecialize(x::Tuple)) = (makeargs[1](x), prepare_args(tail(makeargs), x)...)
+@inline prepare_args(makeargs::Tuple{Any}, @nospecialize(x::Tuple)) = (makeargs[1](x),)
+prepare_args(::Tuple{}, ::Tuple) = ()
 
 ## Broadcasting utilities ##
 
@@ -446,6 +419,10 @@ function combine_styles end
 
 combine_styles() = DefaultArrayStyle{0}()
 combine_styles(c) = result_style(BroadcastStyle(typeof(c)))
+function combine_styles(bc::Broadcasted)
+    bc.style isa Union{Nothing,Unknown} || return bc.style
+    throw(ArgumentError("Broadcasted{Unknown} wrappers do not have a style assigned"))
+end
 combine_styles(c1, c2) = result_style(combine_styles(c1), combine_styles(c2))
 @inline combine_styles(c1, c2, cs...) = result_style(combine_styles(c1), combine_styles(c2, cs...))
 
@@ -468,7 +445,9 @@ Base.Broadcast.DefaultArrayStyle{1}()
 function result_style end
 
 result_style(s::BroadcastStyle) = s
-result_style(s1::S, s2::S) where S<:BroadcastStyle = S()
+function result_style(s1::S, s2::S) where S<:BroadcastStyle
+    s1 ≡ s2 ? s1 : error("inconsistent broadcast styles, custom rule needed")
+end
 # Test both orders so users typically only have to declare one order
 result_style(s1, s2) = result_join(s1, s2, BroadcastStyle(s1, s2), BroadcastStyle(s2, s1))
 
@@ -484,7 +463,8 @@ result_join(::Any, ::Any, s::BroadcastStyle, ::Unknown) = s
 result_join(::AbstractArrayStyle, ::AbstractArrayStyle, ::Unknown, ::Unknown) =
     ArrayConflict()
 # Fallbacks in case users define `rule` for both argument-orders (not recommended)
-result_join(::Any, ::Any, ::S, ::S) where S<:BroadcastStyle = S()
+result_join(::Any, ::Any, s1::S, s2::S) where S<:BroadcastStyle = result_style(s1, s2)
+
 @noinline function result_join(::S, ::T, ::U, ::V) where {S,T,U,V}
     error("""
 conflicting broadcast rules defined
@@ -512,6 +492,20 @@ julia> Broadcast.combine_axes(1, 1, 1)
 @inline combine_axes(A, B) = broadcast_shape(axes(A), axes(B))
 combine_axes(A) = axes(A)
 
+"""
+    broadcast_shape(As...) -> Tuple
+
+Determine the result axes for broadcasting across all axes (size Tuples) in `As`.
+
+```jldoctest
+julia> Broadcast.broadcast_shape((1,2), (2,1))
+(2, 2)
+
+julia> Broadcast.broadcast_shape((1,), (1,5), (4,5,3))
+(4, 5, 3)
+```
+"""
+function broadcast_shape end
 # shape (i.e., tuple-of-indices) inputs
 broadcast_shape(shape::Tuple) = shape
 broadcast_shape(shape::Tuple, shape1::Tuple, shapes::Tuple...) = broadcast_shape(_bcs(shape, shape1), shapes...)
@@ -710,7 +704,7 @@ julia> Broadcast.broadcastable("hello") # Strings break convention of matching i
 Base.RefValue{String}("hello")
 ```
 """
-broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,AbstractPattern,Pair,IO}) = Ref(x)
+broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,AbstractPattern,Pair,IO,CartesianIndex}) = Ref(x)
 broadcastable(::Type{T}) where {T} = Ref{Type{T}}(T)
 broadcastable(x::Union{AbstractArray,Number,AbstractChar,Ref,Tuple,Broadcasted}) = x
 # Default to collecting iterables — which will error for non-iterables
@@ -718,17 +712,21 @@ broadcastable(x) = collect(x)
 broadcastable(::Union{AbstractDict, NamedTuple}) = throw(ArgumentError("broadcasting over dictionaries and `NamedTuple`s is reserved"))
 
 ## Computation of inferred result type, for empty and concretely inferred cases only
-_broadcast_getindex_eltype(bc::Broadcasted) = Base._return_type(bc.f, eltypes(bc.args))
+_broadcast_getindex_eltype(bc::Broadcasted) = combine_eltypes(bc.f, bc.args)
 _broadcast_getindex_eltype(A) = eltype(A)  # Tuple, Array, etc.
 
 eltypes(::Tuple{}) = Tuple{}
-eltypes(t::Tuple{Any}) = Tuple{_broadcast_getindex_eltype(t[1])}
-eltypes(t::Tuple{Any,Any}) = Tuple{_broadcast_getindex_eltype(t[1]), _broadcast_getindex_eltype(t[2])}
-eltypes(t::Tuple) = Tuple{_broadcast_getindex_eltype(t[1]), eltypes(tail(t)).types...}
+eltypes(t::Tuple{Any}) = Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]))
+eltypes(t::Tuple{Any,Any}) = Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]), _broadcast_getindex_eltype(t[2]))
+eltypes(t::Tuple) = (TT = eltypes(tail(t)); TT === Union{} ? Union{} : Iterators.TupleOrBottom(_broadcast_getindex_eltype(t[1]), TT.parameters...))
+# eltypes(t::Tuple) = Iterators.TupleOrBottom(ntuple(i -> _broadcast_getindex_eltype(t[i]), Val(length(t)))...)
 
 # Inferred eltype of result of broadcast(f, args...)
-combine_eltypes(f, args::Tuple) =
-    promote_typejoin_union(Base._return_type(f, eltypes(args)))
+function combine_eltypes(f, args::Tuple)
+    argT = eltypes(args)
+    argT === Union{} && return Union{}
+    return promote_typejoin_union(Base._return_type(f, argT))
+end
 
 ## Broadcasting core
 
@@ -877,11 +875,11 @@ materialize(x) = x
     return materialize!(dest, instantiate(Broadcasted(identity, (x,), axes(dest))))
 end
 
-@inline function materialize!(dest, bc::Broadcasted{Style}) where {Style}
+@inline function materialize!(dest, bc::Broadcasted{<:Any})
     return materialize!(combine_styles(dest, bc), dest, bc)
 end
-@inline function materialize!(::BroadcastStyle, dest, bc::Broadcasted{Style}) where {Style}
-    return copyto!(dest, instantiate(Broadcasted{Style}(bc.f, bc.args, axes(dest))))
+@inline function materialize!(::BroadcastStyle, dest, bc::Broadcasted{<:Any})
+    return copyto!(dest, instantiate(Broadcasted(bc.style, bc.f, bc.args, axes(dest))))
 end
 
 ## general `copy` methods
@@ -891,7 +889,7 @@ copy(bc::Broadcasted{<:Union{Nothing,Unknown}}) =
 
 const NonleafHandlingStyles = Union{DefaultArrayStyle,ArrayConflict}
 
-@inline function copy(bc::Broadcasted{Style}) where {Style}
+@inline function copy(bc::Broadcasted)
     ElType = combine_eltypes(bc.f, bc.args)
     if Base.isconcretetype(ElType)
         # We can trust it and defer to the simpler `copyto!`
@@ -950,7 +948,7 @@ broadcast_unalias(::Nothing, src) = src
 # Preprocessing a `Broadcasted` does two things:
 # * unaliases any arguments from `dest`
 # * "extrudes" the arguments where it is advantageous to pre-compute the broadcasted indices
-@inline preprocess(dest, bc::Broadcasted{Style}) where {Style} = Broadcasted{Style}(bc.f, preprocess_args(dest, bc.args), bc.axes)
+@inline preprocess(dest, bc::Broadcasted) = Broadcasted(bc.style, bc.f, preprocess_args(dest, bc.args), bc.axes)
 preprocess(dest, x) = extrude(broadcast_unalias(dest, x))
 
 @inline preprocess_args(dest, args::Tuple) = (preprocess(dest, args[1]), preprocess_args(dest, tail(args))...)
@@ -976,6 +974,32 @@ preprocess(dest, x) = extrude(broadcast_unalias(dest, x))
     return dest
 end
 
+# Performance optimization: for BitVector outputs, we cache the result
+# in a 64-bit register before writing into memory (to bypass LSQ)
+@inline function copyto!(dest::BitVector, bc::Broadcasted{Nothing})
+    axes(dest) == axes(bc) || throwdm(axes(dest), axes(bc))
+    ischunkedbroadcast(dest, bc) && return chunkedcopyto!(dest, bc)
+    destc = dest.chunks
+    bcp = preprocess(dest, bc)
+    length(bcp) <= 0 && return dest
+    len = Base.num_bit_chunks(Int(length(bcp)))
+    @inbounds for i = 0:(len - 2)
+        z = UInt64(0)
+        for j = 0:63
+           z |= UInt64(bcp[i*64 + j + 1]::Bool) << (j & 63)
+        end
+        destc[i + 1] = z
+    end
+    @inbounds let i = len - 1
+        z = UInt64(0)
+        for j = 0:((length(bcp) - 1) & 63)
+             z |= UInt64(bcp[i*64 + j + 1]::Bool) << (j & 63)
+        end
+        destc[i + 1] = z
+    end
+    return dest
+end
+
 # Performance optimization: for BitArray outputs, we cache the result
 # in a "small" Vector{Bool}, and then copy in chunks into the output
 @inline function copyto!(dest::BitArray, bc::Broadcasted{Nothing})
@@ -1020,11 +1044,11 @@ ischunkedbroadcast(R, args::Tuple{<:BroadcastedChunkableOp,Vararg{Any}}) = ischu
 ischunkedbroadcast(R, args::Tuple{}) = true
 
 # Convert compatible functions to chunkable ones. They must also be green-lighted as ChunkableOps
-liftfuncs(bc::Broadcasted{Style}) where {Style} = Broadcasted{Style}(bc.f, map(liftfuncs, bc.args), bc.axes)
-liftfuncs(bc::Broadcasted{Style,<:Any,typeof(sign)}) where {Style} = Broadcasted{Style}(identity, map(liftfuncs, bc.args), bc.axes)
-liftfuncs(bc::Broadcasted{Style,<:Any,typeof(!)}) where {Style} = Broadcasted{Style}(~, map(liftfuncs, bc.args), bc.axes)
-liftfuncs(bc::Broadcasted{Style,<:Any,typeof(*)}) where {Style} = Broadcasted{Style}(&, map(liftfuncs, bc.args), bc.axes)
-liftfuncs(bc::Broadcasted{Style,<:Any,typeof(==)}) where {Style} = Broadcasted{Style}((~)∘(xor), map(liftfuncs, bc.args), bc.axes)
+liftfuncs(bc::Broadcasted{<:Any,<:Any,<:Any}) = Broadcasted(bc.style, bc.f, map(liftfuncs, bc.args), bc.axes)
+liftfuncs(bc::Broadcasted{<:Any,<:Any,typeof(sign)}) = Broadcasted(bc.style, identity, map(liftfuncs, bc.args), bc.axes)
+liftfuncs(bc::Broadcasted{<:Any,<:Any,typeof(!)}) = Broadcasted(bc.style, ~, map(liftfuncs, bc.args), bc.axes)
+liftfuncs(bc::Broadcasted{<:Any,<:Any,typeof(*)}) = Broadcasted(bc.style, &, map(liftfuncs, bc.args), bc.axes)
+liftfuncs(bc::Broadcasted{<:Any,<:Any,typeof(==)}) = Broadcasted(bc.style, (~)∘(xor), map(liftfuncs, bc.args), bc.axes)
 liftfuncs(x) = x
 
 liftchunks(::Tuple{}) = ()
@@ -1183,9 +1207,9 @@ end
 end
 Base.@propagate_inbounds dotview(B::BitArray, i::BitArray) = BitMaskedBitArray(B, i)
 Base.show(io::IO, B::BitMaskedBitArray) = foreach(arg->show(io, arg), (typeof(B), (B.parent, B.mask)))
-# Override materialize! to prevent the BitMaskedBitArray from escaping to an overrideable method
+# Override materialize! to prevent the BitMaskedBitArray from escaping to an overridable method
 @inline materialize!(B::BitMaskedBitArray, bc::Broadcasted{<:Any,<:Any,typeof(identity),Tuple{Bool}}) = fill!(B, bc.args[1])
-@inline materialize!(B::BitMaskedBitArray, bc::Broadcasted{<:Any}) = materialize!(SubArray(B.parent, to_indices(B.parent, (B.mask,))), bc)
+@inline materialize!(B::BitMaskedBitArray, bc::Broadcasted{<:Any}) = materialize!(@inbounds(view(B.parent, B.mask)), bc)
 function Base.fill!(B::BitMaskedBitArray, b::Bool)
     Bc = B.parent.chunks
     Ic = B.mask.chunks
@@ -1297,7 +1321,7 @@ end
         return broadcasted((args...) -> f(args...; kwargs...), args...)
     end
 end
-@inline function broadcasted(f, args...)
+@inline function broadcasted(f::F, args...) where {F}
     args′ = map(broadcastable, args)
     broadcasted(combine_styles(args′...), f, args′...)
 end
@@ -1305,18 +1329,18 @@ end
 # the totally generic varargs broadcasted(f, args...) method above loses Type{T}s in
 # mapping broadcastable across the args. These additional methods with explicit
 # arguments ensure we preserve Type{T}s in the first or second argument position.
-@inline function broadcasted(f, arg1, args...)
+@inline function broadcasted(f::F, arg1, args...) where {F}
     arg1′ = broadcastable(arg1)
     args′ = map(broadcastable, args)
     broadcasted(combine_styles(arg1′, args′...), f, arg1′, args′...)
 end
-@inline function broadcasted(f, arg1, arg2, args...)
+@inline function broadcasted(f::F, arg1, arg2, args...) where {F}
     arg1′ = broadcastable(arg1)
     arg2′ = broadcastable(arg2)
     args′ = map(broadcastable, args)
     broadcasted(combine_styles(arg1′, arg2′, args′...), f, arg1′, arg2′, args′...)
 end
-@inline broadcasted(::S, f, args...) where S<:BroadcastStyle = Broadcasted{S}(f, args)
+@inline broadcasted(style::BroadcastStyle, f::F, args...) where {F} = Broadcasted(style, f, args)
 
 """
     BroadcastFunction{F} <: Function
diff --git a/base/c.jl b/base/c.jl
index 7d168f2293c9c..eb552d3507662 100644
--- a/base/c.jl
+++ b/base/c.jl
@@ -2,7 +2,7 @@
 
 # definitions related to C interface
 
-import Core.Intrinsics: cglobal, bitcast
+import Core.Intrinsics: cglobal
 
 """
     cglobal((symbol, library) [, type=Cvoid])
@@ -91,7 +91,7 @@ Equivalent to the native `char` c-type.
 Cchar
 
 # The ccall here is equivalent to Sys.iswindows(), but that's not defined yet
-@static if ccall(:jl_get_UNAME, Any, ()) === :NT
+if ccall(:jl_get_UNAME, Any, ()) === :NT
     const Clong = Int32
     const Culong = UInt32
     const Cwchar_t = UInt16
@@ -122,32 +122,7 @@ Equivalent to the native `wchar_t` c-type ([`Int32`](@ref)).
 """
 Cwchar_t
 
-"""
-    Cwstring
-
-A C-style string composed of the native wide character type
-[`Cwchar_t`](@ref)s. `Cwstring`s are NUL-terminated. For
-C-style strings composed of the native character
-type, see [`Cstring`](@ref). For more information
-about string interopability with C, see the
-[manual](@ref man-bits-types).
-
-"""
-Cwstring
-
-"""
-    Cstring
-
-A C-style string composed of the native character type
-[`Cchar`](@ref)s. `Cstring`s are NUL-terminated. For
-C-style strings composed of the native wide character
-type, see [`Cwstring`](@ref). For more information
-about string interopability with C, see the
-[manual](@ref man-bits-types).
-"""
-Cstring
-
-@static if ccall(:jl_get_UNAME, Any, ()) !== :NT
+if ccall(:jl_get_UNAME, Any, ()) !== :NT
     const sizeof_mode_t = ccall(:jl_sizeof_mode_t, Cint, ())
     if sizeof_mode_t == 2
         const Cmode_t = Int16
@@ -155,292 +130,11 @@ Cstring
         const Cmode_t = Int32
     elseif sizeof_mode_t == 8
         const Cmode_t = Int64
+    else
+        error("invalid sizeof mode_t")
     end
 end
 
-# construction from pointers
-Cstring(p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = bitcast(Cstring, p)
-Cwstring(p::Union{Ptr{Cwchar_t},Ptr{Cvoid}})       = bitcast(Cwstring, p)
-Ptr{T}(p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = bitcast(Ptr{T}, p)
-Ptr{T}(p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}}  = bitcast(Ptr{Cwchar_t}, p)
-
-convert(::Type{Cstring}, p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = Cstring(p)
-convert(::Type{Cwstring}, p::Union{Ptr{Cwchar_t},Ptr{Cvoid}}) = Cwstring(p)
-convert(::Type{Ptr{T}}, p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = Ptr{T}(p)
-convert(::Type{Ptr{T}}, p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}} = Ptr{T}(p)
-
-"""
-    pointer(array [, index])
-
-Get the native address of an array or string, optionally at a given location `index`.
-
-This function is "unsafe". Be careful to ensure that a Julia reference to
-`array` exists as long as this pointer will be used. The [`GC.@preserve`](@ref)
-macro should be used to protect the `array` argument from garbage collection
-within a given block of code.
-
-Calling [`Ref(array[, index])`](@ref Ref) is generally preferable to this function as it guarantees validity.
-"""
-function pointer end
-
-pointer(p::Cstring) = convert(Ptr{Cchar}, p)
-pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
-
-# comparisons against pointers (mainly to support `cstr==C_NULL`)
-==(x::Union{Cstring,Cwstring}, y::Ptr) = pointer(x) == y
-==(x::Ptr, y::Union{Cstring,Cwstring}) = x == pointer(y)
-
-unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s))
-
-# convert strings to String etc. to pass as pointers
-cconvert(::Type{Cstring}, s::String) = s
-cconvert(::Type{Cstring}, s::AbstractString) =
-    cconvert(Cstring, String(s)::String)
-
-function cconvert(::Type{Cwstring}, s::AbstractString)
-    v = transcode(Cwchar_t, String(s))
-    !isempty(v) && v[end] == 0 || push!(v, 0)
-    return v
-end
-
-eltype(::Type{Cstring}) = Cchar
-eltype(::Type{Cwstring}) = Cwchar_t
-
-containsnul(p::Ptr, len) =
-    C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len)
-containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
-containsnul(s::AbstractString) = '\0' in s
-
-function unsafe_convert(::Type{Cstring}, s::Union{String,AbstractVector{UInt8}})
-    p = unsafe_convert(Ptr{Cchar}, s)
-    containsnul(p, sizeof(s)) &&
-        throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
-    return Cstring(p)
-end
-
-function unsafe_convert(::Type{Cwstring}, v::Vector{Cwchar_t})
-    for i = 1:length(v)-1
-        v[i] == 0 &&
-            throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(v))"))
-    end
-    v[end] == 0 ||
-        throw(ArgumentError("C string data must be NUL terminated: $(repr(v))"))
-    p = unsafe_convert(Ptr{Cwchar_t}, v)
-    return Cwstring(p)
-end
-
-# symbols are guaranteed not to contain embedded NUL
-cconvert(::Type{Cstring}, s::Symbol) = s
-unsafe_convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))
-
-@static if ccall(:jl_get_UNAME, Any, ()) === :NT
-"""
-    Base.cwstring(s)
-
-Converts a string `s` to a NUL-terminated `Vector{Cwchar_t}`, suitable for passing to C
-functions expecting a `Ptr{Cwchar_t}`. The main advantage of using this over the implicit
-conversion provided by [`Cwstring`](@ref) is if the function is called multiple times with the
-same argument.
-
-This is only available on Windows.
-"""
-function cwstring(s::AbstractString)
-    bytes = codeunits(String(s))
-    0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
-    return push!(transcode(UInt16, bytes), 0)
-end
-end
-
-# transcoding between data in UTF-8 and UTF-16 for Windows APIs,
-# and also UTF-32 for APIs using Cwchar_t on other platforms.
-
-"""
-    transcode(T, src)
-
-Convert string data between Unicode encodings. `src` is either a
-`String` or a `Vector{UIntXX}` of UTF-XX code units, where
-`XX` is 8, 16, or 32. `T` indicates the encoding of the return value:
-`String` to return a (UTF-8 encoded) `String` or `UIntXX`
-to return a `Vector{UIntXX}` of UTF-`XX` data. (The alias [`Cwchar_t`](@ref)
-can also be used as the integer type, for converting `wchar_t*` strings
-used by external C libraries.)
-
-The `transcode` function succeeds as long as the input data can be
-reasonably represented in the target encoding; it always succeeds for
-conversions between UTF-XX encodings, even for invalid Unicode data.
-
-Only conversion to/from UTF-8 is currently supported.
-
-# Examples
-```jldoctest
-julia> str = "αβγ"
-"αβγ"
-
-julia> transcode(UInt16, str)
-3-element Vector{UInt16}:
- 0x03b1
- 0x03b2
- 0x03b3
-
-julia> transcode(String, transcode(UInt16, str))
-"αβγ"
-```
-"""
-function transcode end
-
-transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
-transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
-transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} =
-    transcode(T, String(Vector(src)))
-transcode(::Type{T}, src::CodeUnits{UInt8,String}) where {T<:Union{Int32,UInt32}} =
-    transcode(T, String(src))
-
-function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
-    buf = IOBuffer()
-    for c in src
-        print(buf, Char(c))
-    end
-    take!(buf)
-end
-transcode(::Type{String}, src::String) = src
-transcode(T, src::String) = transcode(T, codeunits(src))
-transcode(::Type{String}, src) = String(transcode(UInt8, src))
-
-function transcode(::Type{UInt16}, src::AbstractVector{UInt8})
-    require_one_based_indexing(src)
-    dst = UInt16[]
-    i, n = 1, length(src)
-    n > 0 || return dst
-    sizehint!(dst, 2n)
-    a = src[1]
-    while true
-        if i < n && -64 <= a % Int8 <= -12 # multi-byte character
-            b = src[i += 1]
-            if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
-                # invalid UTF-8 (non-continuation or too-high code point)
-                push!(dst, a)
-                a = b; continue
-            elseif a < 0xe0 # 2-byte UTF-8
-                push!(dst, xor(0x3080, UInt16(a) << 6, b))
-            elseif i < n # 3/4-byte character
-                c = src[i += 1]
-                if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
-                    push!(dst, a, b)
-                    a = c; continue
-                elseif a < 0xf0 # 3-byte UTF-8
-                    push!(dst, xor(0x2080, UInt16(a) << 12, UInt16(b) << 6, c))
-                elseif i < n
-                    d = src[i += 1]
-                    if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
-                        push!(dst, a, b, c)
-                        a = d; continue
-                    elseif a == 0xf0 && b < 0x90 # overlong encoding
-                        push!(dst, xor(0x2080, UInt16(b) << 12, UInt16(c) << 6, d))
-                    else # 4-byte UTF-8
-                        push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
-                                   xor(0xdc80, UInt16(c & 0xf) << 6, d))
-                    end
-                else # too short
-                    push!(dst, a, b, c)
-                    break
-                end
-            else # too short
-                push!(dst, a, b)
-                break
-            end
-        else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
-            push!(dst, a)
-        end
-        i < n || break
-        a = src[i += 1]
-    end
-    return dst
-end
-
-function transcode(::Type{UInt8}, src::AbstractVector{UInt16})
-    require_one_based_indexing(src)
-    n = length(src)
-    n == 0 && return UInt8[]
-
-    # Precompute m = sizeof(dst).   This involves annoying duplication
-    # of the loop over the src array.   However, this is not just an
-    # optimization: it is problematic for security reasons to grow
-    # dst dynamically, because Base.winprompt uses this function to
-    # convert passwords to UTF-8 and we don't want to make unintentional
-    # copies of the password data.
-    a = src[1]
-    i, m = 1, 0
-    while true
-        if a < 0x80
-            m += 1
-        elseif a < 0x800 # 2-byte UTF-8
-            m += 2
-        elseif a & 0xfc00 == 0xd800 && i < length(src)
-            b = src[i += 1]
-            if (b & 0xfc00) == 0xdc00 # 2-unit UTF-16 sequence => 4-byte UTF-8
-                m += 4
-            else
-                m += 3
-                a = b; continue
-            end
-        else
-            # 1-unit high UTF-16 or unpaired high surrogate
-            # either way, encode as 3-byte UTF-8 code point
-            m += 3
-        end
-        i < n || break
-        a = src[i += 1]
-    end
-
-    dst = StringVector(m)
-    a = src[1]
-    i, j = 1, 0
-    while true
-        if a < 0x80 # ASCII
-            dst[j += 1] = a % UInt8
-        elseif a < 0x800 # 2-byte UTF-8
-            dst[j += 1] = 0xc0 | ((a >> 6) % UInt8)
-            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
-        elseif a & 0xfc00 == 0xd800 && i < n
-            b = src[i += 1]
-            if (b & 0xfc00) == 0xdc00
-                # 2-unit UTF-16 sequence => 4-byte UTF-8
-                a += 0x2840
-                dst[j += 1] = 0xf0 | ((a >> 8) % UInt8)
-                dst[j += 1] = 0x80 | ((a % UInt8) >> 2)
-                dst[j += 1] = xor(0xf0, ((a % UInt8) << 4) & 0x3f, (b >> 6) % UInt8)
-                dst[j += 1] = 0x80 | ((b % UInt8) & 0x3f)
-            else
-                dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
-                dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
-                dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
-                a = b; continue
-            end
-        else
-            # 1-unit high UTF-16 or unpaired high surrogate
-            # either way, encode as 3-byte UTF-8 code point
-            dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
-            dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
-            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
-        end
-        i < n || break
-        a = src[i += 1]
-    end
-    return dst
-end
-
-function unsafe_string(p::Ptr{T}, length::Integer) where {T<:Union{UInt16,UInt32,Cwchar_t}}
-    transcode(String, unsafe_wrap(Array, p, length; own=false))
-end
-function unsafe_string(cw::Cwstring)
-    p = convert(Ptr{Cwchar_t}, cw)
-    n = 1
-    while unsafe_load(p, n) != 0
-        n += 1
-    end
-    return unsafe_string(p, n - 1)
-end
-
 # deferring (or un-deferring) ctrl-c handler for external C code that
 # is not interrupt safe (see also issue #2622).  The sigatomic_begin/end
 # functions should always be called in matched pairs, ideally via:
@@ -540,7 +234,7 @@ function expand_ccallable(rt, def)
                 end
             end
             return quote
-                $(esc(def))
+                @__doc__ $(esc(def))
                 _ccallable($(esc(rt)), $(Expr(:curly, :Tuple, esc(f), map(esc, at)...)))
             end
         end
@@ -565,9 +259,9 @@ end
 """
     ccall_macro_parse(expression)
 
-`ccall_macro_parse` is an implementation detail of `@ccall
+`ccall_macro_parse` is an implementation detail of `@ccall`.
 
-it takes an expression like `:(printf("%d"::Cstring, value::Cuint)::Cvoid)`
+It takes an expression like `:(printf("%d"::Cstring, value::Cuint)::Cvoid)`
 returns: a tuple of `(function_name, return_type, arg_types, args)`
 
 The above input outputs this:
@@ -576,21 +270,21 @@ The above input outputs this:
 """
 function ccall_macro_parse(expr::Expr)
     # setup and check for errors
-    if !Meta.isexpr(expr, :(::))
+    if !isexpr(expr, :(::))
         throw(ArgumentError("@ccall needs a function signature with a return type"))
     end
     rettype = expr.args[2]
 
     call = expr.args[1]
-    if !Meta.isexpr(call, :call)
+    if !isexpr(call, :call)
         throw(ArgumentError("@ccall has to take a function call"))
     end
 
     # get the function symbols
     func = let f = call.args[1]
-        if Meta.isexpr(f, :.)
+        if isexpr(f, :.)
             :(($(f.args[2]), $(f.args[1])))
-        elseif Meta.isexpr(f, :$)
+        elseif isexpr(f, :$)
             f
         elseif f isa Symbol
             QuoteNode(f)
@@ -603,7 +297,7 @@ function ccall_macro_parse(expr::Expr)
     varargs = nothing
     argstart = 2
     callargs = call.args
-    if length(callargs) >= 2 && Meta.isexpr(callargs[2], :parameters)
+    if length(callargs) >= 2 && isexpr(callargs[2], :parameters)
         argstart = 3
         varargs = callargs[2].args
     end
@@ -613,7 +307,7 @@ function ccall_macro_parse(expr::Expr)
     types = []
 
     function pusharg!(arg)
-        if !Meta.isexpr(arg, :(::))
+        if !isexpr(arg, :(::))
             throw(ArgumentError("args in @ccall need type annotations. '$arg' doesn't have one."))
         end
         push!(args, arg.args[1])
@@ -640,13 +334,11 @@ end
 
 
 function ccall_macro_lower(convention, func, rettype, types, args, nreq)
-    lowering = []
-    realargs = []
-    gcroots = []
+    statements = []
 
-    # if interpolation was used, ensure  variable is a function pointer at runtime.
-    if Meta.isexpr(func, :$)
-        push!(lowering, Expr(:(=), :func, esc(func.args[1])))
+    # if interpolation was used, ensure the value is a function pointer at runtime.
+    if isexpr(func, :$)
+        push!(statements, Expr(:(=), :func, esc(func.args[1])))
         name = QuoteNode(func.args[1])
         func = :func
         check = quote
@@ -655,31 +347,14 @@ function ccall_macro_lower(convention, func, rettype, types, args, nreq)
                 throw(ArgumentError("interpolated function `$name` was not a Ptr{Cvoid}, but $(typeof(func))"))
             end
         end
-        push!(lowering, check)
+        push!(statements, check)
     else
         func = esc(func)
     end
 
-    for (i, (arg, type)) in enumerate(zip(args, types))
-        sym = Symbol(string("arg", i, "root"))
-        sym2 = Symbol(string("arg", i, ))
-        earg, etype = esc(arg), esc(type)
-        push!(lowering, :(local $sym = $(GlobalRef(Base, :cconvert))($etype, $earg)))
-        push!(lowering, :(local $sym2 = $(GlobalRef(Base, :unsafe_convert))($etype, $sym)))
-        push!(realargs, sym2)
-        push!(gcroots, sym)
-    end
-    etypes = Expr(:call, Expr(:core, :svec), types...)
-    exp = Expr(:foreigncall,
-               func,
-               esc(rettype),
-               esc(etypes),
-               nreq,
-               QuoteNode(convention),
-               realargs..., gcroots...)
-    push!(lowering, exp)
-
-    return Expr(:block, lowering...)
+    return Expr(:block, statements...,
+                Expr(:call, :ccall, func, Expr(:cconv, convention, nreq), esc(rettype),
+                     Expr(:tuple, map(esc, types)...), map(esc, args)...))
 end
 
 """
@@ -734,6 +409,6 @@ macro ccall(expr)
     return ccall_macro_lower(:ccall, ccall_macro_parse(expr)...)
 end
 
-macro ccall_effects(effects::UInt8, expr)
+macro ccall_effects(effects::UInt16, expr)
     return ccall_macro_lower((:ccall, effects), ccall_macro_parse(expr)...)
 end
diff --git a/base/cartesian.jl b/base/cartesian.jl
index 5f96a2061880f..decc01cbc3e5f 100644
--- a/base/cartesian.jl
+++ b/base/cartesian.jl
@@ -2,7 +2,7 @@
 
 module Cartesian
 
-export @nloops, @nref, @ncall, @nexprs, @nextract, @nall, @nany, @ntuple, @nif
+export @nloops, @nref, @ncall, @ncallkw, @nexprs, @nextract, @nall, @nany, @ntuple, @nif
 
 ### Cartesian-specific macros
 
@@ -104,10 +104,38 @@ while `@ncall 2 func a b i->c[i]` yields
 macro ncall(N::Int, f, args...)
     pre = args[1:end-1]
     ex = args[end]
-    vars = Any[ inlineanonymous(ex,i) for i = 1:N ]
+    vars = (inlineanonymous(ex, i) for i = 1:N)
     Expr(:escape, Expr(:call, f, pre..., vars...))
 end
 
+"""
+    @ncallkw N f kw sym...
+
+Generate a function call expression with keyword arguments `kw...`. As
+in the case of [`@ncall`](@ref), `sym` represents any number of function arguments, the
+last of which may be an anonymous-function expression and is expanded into `N` arguments.
+
+# Example
+```jldoctest
+julia> using Base.Cartesian
+
+julia> f(x...; a, b = 1, c = 2, d = 3) = +(x..., a, b, c, d);
+
+julia> x_1, x_2 = (-1, -2); b = 0; kw = (c = 0, d = 0);
+
+julia> @ncallkw 2 f (; a = 0, b, kw...) x
+-3
+
+```
+"""
+macro ncallkw(N::Int, f, kw, args...)
+    pre = args[1:end-1]
+    ex = args[end]
+    vars = (inlineanonymous(ex, i) for i = 1:N)
+    param = Expr(:parameters, Expr(:(...), kw))
+    Expr(:escape, Expr(:call, f, param, pre..., vars...))
+end
+
 """
     @nexprs N expr
 
@@ -374,6 +402,8 @@ function exprresolve_conditional(ex::Expr)
                 return true, exprresolve_cond_dict[callee](ex.args[2], ex.args[3])
             end
         end
+    elseif Meta.isexpr(ex, :block, 2) && ex.args[1] isa LineNumberNode
+        return exprresolve_conditional(ex.args[2])
     end
     false, false
 end
@@ -402,10 +432,16 @@ function exprresolve(ex::Expr)
         return ex.args[1][ex.args[2:end]...]
     end
     # Resolve conditionals
-    if ex.head === :if
+    if ex.head === :if || ex.head === :elseif
         can_eval, tf = exprresolve_conditional(ex.args[1])
         if can_eval
-            ex = tf ? ex.args[2] : ex.args[3]
+            if tf
+                return ex.args[2]
+            elseif length(ex.args) == 3
+                return ex.args[3]
+            else
+                return nothing
+            end
         end
     end
     ex
diff --git a/base/channels.jl b/base/channels.jl
index da7b1d24583ca..75207e9fac76b 100644
--- a/base/channels.jl
+++ b/base/channels.jl
@@ -59,7 +59,7 @@ Channel(sz=0) = Channel{Any}(sz)
 
 # special constructors
 """
-    Channel{T=Any}(func::Function, size=0; taskref=nothing, spawn=false)
+    Channel{T=Any}(func::Function, size=0; taskref=nothing, spawn=false, threadpool=nothing)
 
 Create a new task from `func`, bind it to a new channel of type
 `T` and size `size`, and schedule the task, all in a single call.
@@ -70,9 +70,14 @@ The channel is automatically closed when the task terminates.
 If you need a reference to the created task, pass a `Ref{Task}` object via
 the keyword argument `taskref`.
 
-If `spawn = true`, the Task created for `func` may be scheduled on another thread
+If `spawn=true`, the `Task` created for `func` may be scheduled on another thread
 in parallel, equivalent to creating a task via [`Threads.@spawn`](@ref).
 
+If `spawn=true` and the `threadpool` argument is not set, it defaults to `:default`.
+
+If the `threadpool` argument is set (to `:default` or `:interactive`), this implies
+that `spawn=true` and the new Task is spawned to the specified threadpool.
+
 Return a `Channel`.
 
 # Examples
@@ -117,6 +122,9 @@ true
     In earlier versions of Julia, Channel used keyword arguments to set `size` and `T`, but
     those constructors are deprecated.
 
+!!! compat "Julia 1.9"
+    The `threadpool=` argument was added in Julia 1.9.
+
 ```jldoctest
 julia> chnl = Channel{Char}(1, spawn=true) do ch
            for c in "hello world"
@@ -129,12 +137,18 @@ julia> String(collect(chnl))
 "hello world"
 ```
 """
-function Channel{T}(func::Function, size=0; taskref=nothing, spawn=false) where T
+function Channel{T}(func::Function, size=0; taskref=nothing, spawn=false, threadpool=nothing) where T
     chnl = Channel{T}(size)
     task = Task(() -> func(chnl))
+    if threadpool === nothing
+        threadpool = :default
+    else
+        spawn = true
+    end
     task.sticky = !spawn
     bind(chnl, task)
     if spawn
+        Threads._spawn_set_thrpool(task, threadpool)
         schedule(task) # start it on (potentially) another thread
     else
         yield(task) # immediately start it, yielding the current thread
@@ -149,17 +163,17 @@ Channel(func::Function, args...; kwargs...) = Channel{Any}(func, args...; kwargs
 # of course not deprecated.)
 # We use `nothing` default values to check which arguments were set in order to throw the
 # deprecation warning if users try to use `spawn=` with `ctype=` or `csize=`.
-function Channel(func::Function; ctype=nothing, csize=nothing, taskref=nothing, spawn=nothing)
+function Channel(func::Function; ctype=nothing, csize=nothing, taskref=nothing, spawn=nothing, threadpool=nothing)
     # The spawn= keyword argument was added in Julia v1.3, and cannot be used with the
     # deprecated keyword arguments `ctype=` or `csize=`.
-    if (ctype !== nothing || csize !== nothing) && spawn !== nothing
-        throw(ArgumentError("Cannot set `spawn=` in the deprecated constructor `Channel(f; ctype=Any, csize=0)`. Please use `Channel{T=Any}(f, size=0; taskref=nothing, spawn=false)` instead!"))
+    if (ctype !== nothing || csize !== nothing) && (spawn !== nothing || threadpool !== nothing)
+        throw(ArgumentError("Cannot set `spawn=` or `threadpool=` in the deprecated constructor `Channel(f; ctype=Any, csize=0)`. Please use `Channel{T=Any}(f, size=0; taskref=nothing, spawn=false, threadpool=nothing)` instead!"))
     end
     # Set the actual default values for the arguments.
     ctype === nothing && (ctype = Any)
     csize === nothing && (csize = 0)
     spawn === nothing && (spawn = false)
-    return Channel{ctype}(func, csize; taskref=taskref, spawn=spawn)
+    return Channel{ctype}(func, csize; taskref=taskref, spawn=spawn, threadpool=threadpool)
 end
 
 closed_exception() = InvalidStateException("Channel is closed.", :closed)
@@ -183,7 +197,8 @@ Close a channel. An exception (optionally given by `excp`), is thrown by:
 * [`put!`](@ref) on a closed channel.
 * [`take!`](@ref) and [`fetch`](@ref) on an empty, closed channel.
 """
-function close(c::Channel, excp::Exception=closed_exception())
+close(c::Channel) = close(c, closed_exception()) # nospecialize on default arg seems to confuse makedocs
+function close(c::Channel, @nospecialize(excp::Exception))
     lock(c)
     try
         c.excp = excp
@@ -196,7 +211,12 @@ function close(c::Channel, excp::Exception=closed_exception())
     end
     nothing
 end
-isopen(c::Channel) = ((@atomic :monotonic c.state) === :open)
+
+# Use acquire here to pair with release store in `close`, so that subsequent `isready` calls
+# are forced to see `isready == true` if they see `isopen == false`. This means users must
+# call `isopen` before `isready` if you are using the race-y APIs (or call `iterate`, which
+# does this right for you).
+isopen(c::Channel) = ((@atomic :acquire c.state) === :open)
 
 """
     bind(chnl::Channel, task::Task)
@@ -252,6 +272,7 @@ Stacktrace:
 """
 function bind(c::Channel, task::Task)
     T = Task(() -> close_chnl_on_taskdone(task, c))
+    T.sticky = false
     _wait2(task, T)
     return c
 end
@@ -380,8 +401,26 @@ end
 """
     fetch(c::Channel)
 
-Wait for and get the first available item from the channel. Does not
-remove the item. `fetch` is unsupported on an unbuffered (0-size) channel.
+Waits for and returns (without removing) the first available item from the `Channel`.
+Note: `fetch` is unsupported on an unbuffered (0-size) `Channel`.
+
+# Examples
+
+Buffered channel:
+```jldoctest
+julia> c = Channel(3) do ch
+           foreach(i -> put!(ch, i), 1:3)
+       end;
+
+julia> fetch(c)
+1
+
+julia> collect(c)  # item is not removed
+3-element Vector{Any}:
+ 1
+ 2
+ 3
+```
 """
 fetch(c::Channel) = isbuffered(c) ? fetch_buffered(c) : fetch_unbuffered(c)
 function fetch_buffered(c::Channel)
@@ -402,10 +441,32 @@ fetch_unbuffered(c::Channel) = throw(ErrorException("`fetch` is not supported on
 """
     take!(c::Channel)
 
-Remove and return a value from a [`Channel`](@ref). Blocks until data is available.
+Removes and returns a value from a [`Channel`](@ref) in order. Blocks until data is available.
+For unbuffered channels, blocks until a [`put!`](@ref) is performed by a different task.
 
-For unbuffered channels, blocks until a [`put!`](@ref) is performed by a different
-task.
+# Examples
+
+Buffered channel:
+```jldoctest
+julia> c = Channel(1);
+
+julia> put!(c, 1);
+
+julia> take!(c)
+1
+```
+
+Unbuffered channel:
+```jldoctest
+julia> c = Channel(0);
+
+julia> task = Task(() -> put!(c, 1));
+
+julia> schedule(task);
+
+julia> take!(c)
+1
+```
 """
 take!(c::Channel) = isbuffered(c) ? take_buffered(c) : take_unbuffered(c)
 function take_buffered(c::Channel)
@@ -439,11 +500,41 @@ end
 """
     isready(c::Channel)
 
-Determine whether a [`Channel`](@ref) has a value stored to it. Returns
-immediately, does not block.
+Determines whether a [`Channel`](@ref) has a value stored in it.
+Returns immediately, does not block.
+
+For unbuffered channels returns `true` if there are tasks waiting on a [`put!`](@ref).
+
+# Examples
+
+Buffered channel:
+```jldoctest
+julia> c = Channel(1);
+
+julia> isready(c)
+false
+
+julia> put!(c, 1);
+
+julia> isready(c)
+true
+```
+
+Unbuffered channel:
+```jldoctest
+julia> c = Channel();
+
+julia> isready(c)  # no tasks waiting to put!
+false
+
+julia> task = Task(() -> put!(c, 1));
+
+julia> schedule(task);  # schedule a put! task
+
+julia> isready(c)
+true
+```
 
-For unbuffered channels returns `true` if there are tasks waiting
-on a [`put!`](@ref).
 """
 isready(c::Channel) = n_avail(c) > 0
 isempty(c::Channel) = n_avail(c) == 0
@@ -457,6 +548,30 @@ lock(f, c::Channel) = lock(f, c.cond_take)
 unlock(c::Channel) = unlock(c.cond_take)
 trylock(c::Channel) = trylock(c.cond_take)
 
+"""
+    wait(c::Channel)
+
+Blocks until the `Channel` [`isready`](@ref).
+
+```jldoctest
+julia> c = Channel(1);
+
+julia> isready(c)
+false
+
+julia> task = Task(() -> wait(c));
+
+julia> schedule(task);
+
+julia> istaskdone(task)  # task is blocked because channel is not ready
+false
+
+julia> put!(c, 1);
+
+julia> istaskdone(task)  # task is now unblocked
+true
+```
+"""
 function wait(c::Channel)
     isready(c) && return
     lock(c)
@@ -493,14 +608,18 @@ function show(io::IO, ::MIME"text/plain", c::Channel)
 end
 
 function iterate(c::Channel, state=nothing)
-    try
-        return (take!(c), nothing)
-    catch e
-        if isa(e, InvalidStateException) && e.state === :closed
-            return nothing
-        else
-            rethrow()
+    if isopen(c) || isready(c)
+        try
+            return (take!(c), nothing)
+        catch e
+            if isa(e, InvalidStateException) && e.state === :closed
+                return nothing
+            else
+                rethrow()
+            end
         end
+    else
+        return nothing
     end
 end
 
diff --git a/base/char.jl b/base/char.jl
index c8b1c28166bbf..08d661c41de56 100644
--- a/base/char.jl
+++ b/base/char.jl
@@ -181,9 +181,9 @@ end
 end
 
 convert(::Type{AbstractChar}, x::Number) = Char(x) # default to Char
-convert(::Type{T}, x::Number) where {T<:AbstractChar} = T(x)
-convert(::Type{T}, x::AbstractChar) where {T<:Number} = T(x)
-convert(::Type{T}, c::AbstractChar) where {T<:AbstractChar} = T(c)
+convert(::Type{T}, x::Number) where {T<:AbstractChar} = T(x)::T
+convert(::Type{T}, x::AbstractChar) where {T<:Number} = T(x)::T
+convert(::Type{T}, c::AbstractChar) where {T<:AbstractChar} = T(c)::T
 convert(::Type{T}, c::T) where {T<:AbstractChar} = c
 
 rem(x::AbstractChar, ::Type{T}) where {T<:Number} = rem(codepoint(x), T)
@@ -318,7 +318,7 @@ end
 
 function show(io::IO, ::MIME"text/plain", c::T) where {T<:AbstractChar}
     show(io, c)
-    get(io, :compact, false) && return
+    get(io, :compact, false)::Bool && return
     if !ismalformed(c)
         print(io, ": ")
         if isoverlong(c)
diff --git a/base/checked.jl b/base/checked.jl
index c3c8a888dcd1c..d5b4112397e84 100644
--- a/base/checked.jl
+++ b/base/checked.jl
@@ -42,12 +42,12 @@ const UnsignedInt = Union{UInt8,UInt16,UInt32,UInt64,UInt128}
 
 # LLVM has several code generation bugs for checked integer arithmetic (see e.g.
 # #4905). We thus distinguish between operations that can be implemented via
-# intrinsics, and operations for which we have to provide work-arounds.
+# intrinsics, and operations for which we have to provide workarounds.
 
 # Note: As far as this code has been tested, most checked_* functions are
 # working fine in LLVM. (Note that division is still handled via `base/int.jl`,
 # which always checks for overflow, and which provides its own sets of
-# work-arounds for LLVM codegen bugs.) However, the comments in `base/int.jl`
+# workarounds for LLVM codegen bugs.) However, the comments in `base/int.jl`
 # and in issue #4905 are more pessimistic. For the time being, we thus retain
 # the ability to handle codegen bugs in LLVM, until the code here has been
 # tested on more systems and architectures. It also seems that things depend on
diff --git a/base/client.jl b/base/client.jl
index 66d7ffc3d2135..201792c786b51 100644
--- a/base/client.jl
+++ b/base/client.jl
@@ -4,6 +4,7 @@
 ##             and REPL
 
 have_color = nothing
+have_truecolor = nothing
 const default_color_warn = :yellow
 const default_color_error = :light_red
 const default_color_info = :cyan
@@ -66,7 +67,15 @@ function repl_cmd(cmd, out)
             end
             cmd = `$shell -c $shell_escape_cmd`
         end
-        run(ignorestatus(cmd))
+        try
+            run(ignorestatus(cmd))
+        catch
+            # Windows doesn't shell out right now (complex issue), so Julia tries to run the program itself
+            # Julia throws an exception if it can't find the program, but the stack trace isn't useful
+            lasterr = current_exceptions()
+            lasterr = ExceptionStack([(exception = e[1], backtrace = [] ) for e in lasterr])
+            invokelatest(display_error, lasterr)
+        end
     end
     nothing
 end
@@ -95,8 +104,8 @@ scrub_repl_backtrace(stack::ExceptionStack) =
     ExceptionStack(Any[(;x.exception, backtrace = scrub_repl_backtrace(x.backtrace)) for x in stack])
 
 istrivialerror(stack::ExceptionStack) =
-    length(stack) == 1 && length(stack[1].backtrace) ≤ 1
-    # frame 1 = top level; assumes already went through scrub_repl_backtrace
+    length(stack) == 1 && length(stack[1].backtrace) ≤ 1 && !isa(stack[1].exception, MethodError)
+    # frame 1 = top level; assumes already went through scrub_repl_backtrace; MethodError see #50803
 
 function display_error(io::IO, stack::ExceptionStack)
     printstyled(io, "ERROR: "; bold=true, color=Base.error_color())
@@ -124,14 +133,14 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
             end
             if lasterr !== nothing
                 lasterr = scrub_repl_backtrace(lasterr)
-                istrivialerror(lasterr) || setglobal!(Main, :err, lasterr)
+                istrivialerror(lasterr) || setglobal!(Base.MainInclude, :err, lasterr)
                 invokelatest(display_error, errio, lasterr)
                 errcount = 0
                 lasterr = nothing
             else
                 ast = Meta.lower(Main, ast)
                 value = Core.eval(Main, ast)
-                setglobal!(Main, :ans, value)
+                setglobal!(Base.MainInclude, :ans, value)
                 if !(value === nothing) && show_value
                     if have_color
                         print(answer_color())
@@ -151,7 +160,7 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
             end
             errcount += 1
             lasterr = scrub_repl_backtrace(current_exceptions())
-            setglobal!(Main, :err, lasterr)
+            setglobal!(Base.MainInclude, :err, lasterr)
             if errcount > 2
                 @error "It is likely that something important is broken, and Julia will not be able to continue normally" errcount
                 break
@@ -194,10 +203,7 @@ parse_input_line(s::AbstractString) = parse_input_line(String(s))
 # detect the reason which caused an :incomplete expression
 # from the error message
 # NOTE: the error messages are defined in src/julia-parser.scm
-incomplete_tag(ex) = :none
-function incomplete_tag(ex::Expr)
-    Meta.isexpr(ex, :incomplete) || return :none
-    msg = ex.args[1]
+function fl_incomplete_tag(msg::AbstractString)
     occursin("string", msg) && return :string
     occursin("comment", msg) && return :comment
     occursin("requires end", msg) && return :block
@@ -206,12 +212,24 @@ function incomplete_tag(ex::Expr)
     return :other
 end
 
+incomplete_tag(ex) = :none
+function incomplete_tag(ex::Expr)
+    if ex.head !== :incomplete
+        return :none
+    elseif isempty(ex.args)
+        return :other
+    elseif ex.args[1] isa String
+        return fl_incomplete_tag(ex.args[1])
+    else
+        return incomplete_tag(ex.args[1])
+    end
+end
+incomplete_tag(exc::Meta.ParseError) = incomplete_tag(exc.detail)
+
+cmd_suppresses_program(cmd) = cmd in ('e', 'E')
 function exec_options(opts)
-    quiet                 = (opts.quiet != 0)
     startup               = (opts.startupfile != 2)
-    history_file          = (opts.historyfile != 0)
-    color_set             = (opts.color != 0) # --color!=auto
-    global have_color     = color_set ? (opts.color == 1) : nothing # --color=on
+    global have_color     = (opts.color != 0) ? (opts.color == 1) : nothing # --color=on
     global is_interactive = (opts.isinteractive != 0)
 
     # pre-process command line argument list
@@ -219,10 +237,7 @@ function exec_options(opts)
     repl = !arg_is_program
     cmds = unsafe_load_commands(opts.commands)
     for (cmd, arg) in cmds
-        if cmd == 'e'
-            arg_is_program = false
-            repl = false
-        elseif cmd == 'E'
+        if cmd_suppresses_program(cmd)
             arg_is_program = false
             repl = false
         elseif cmd == 'L'
@@ -231,9 +246,9 @@ function exec_options(opts)
             # If we're doing a bug report, don't load anything else. We will
             # spawn a child in which to execute these options.
             let InteractiveUtils = load_InteractiveUtils()
-                InteractiveUtils.report_bug(arg)
+                invokelatest(InteractiveUtils.report_bug, arg)
             end
-            return nothing
+            return false
         else
             @warn "Unexpected command -$cmd'$arg'"
         end
@@ -246,8 +261,8 @@ function exec_options(opts)
     distributed_mode = (opts.worker == 1) || (opts.nprocs > 0) || (opts.machine_file != C_NULL)
     if distributed_mode
         let Distributed = require(PkgId(UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed"))
-            Core.eval(Main, :(const Distributed = $Distributed))
-            Core.eval(Main, :(using .Distributed))
+            Core.eval(MainInclude, :(const Distributed = $Distributed))
+            Core.eval(Main, :(using Base.MainInclude.Distributed))
         end
 
         invokelatest(Main.Distributed.process_opts, opts)
@@ -256,6 +271,10 @@ function exec_options(opts)
     interactiveinput = (repl || is_interactive::Bool) && isa(stdin, TTY)
     is_interactive::Bool |= interactiveinput
 
+    # load terminfo in for styled printing
+    term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
+    global current_terminfo = load_terminfo(term_env)
+
     # load ~/.julia/config/startup.jl file
     if startup
         try
@@ -305,15 +324,8 @@ function exec_options(opts)
             end
         end
     end
-    if repl || is_interactive::Bool
-        if interactiveinput
-            banner = (opts.banner != 0) # --banner!=no
-        else
-            banner = (opts.banner == 1) # --banner=yes
-        end
-        run_main_repl(interactiveinput, quiet, banner, history_file, color_set)
-    end
-    nothing
+
+    return repl
 end
 
 function _global_julia_startup_file()
@@ -370,56 +382,68 @@ function __atreplinit(repl)
 end
 _atreplinit(repl) = invokelatest(__atreplinit, repl)
 
-# The REPL stdlib hooks into Base using this Ref
-const REPL_MODULE_REF = Ref{Module}()
-
 function load_InteractiveUtils(mod::Module=Main)
     # load interactive-only libraries
-    if !isdefined(mod, :InteractiveUtils)
+    if !isdefined(MainInclude, :InteractiveUtils)
         try
             let InteractiveUtils = require(PkgId(UUID(0xb77e0a4c_d291_57a0_90e8_8db25a27a240), "InteractiveUtils"))
-                Core.eval(mod, :(const InteractiveUtils = $InteractiveUtils))
-                Core.eval(mod, :(using .InteractiveUtils))
-                return InteractiveUtils
+                Core.eval(MainInclude, :(const InteractiveUtils = $InteractiveUtils))
             end
         catch ex
             @warn "Failed to import InteractiveUtils into module $mod" exception=(ex, catch_backtrace())
+            return nothing
         end
-        return nothing
     end
-    return getfield(mod, :InteractiveUtils)
+    Core.eval(mod, :(using Base.MainInclude.InteractiveUtils))
+    return MainInclude.InteractiveUtils
 end
 
-# run the requested sort of evaluation loop on stdio
-function run_main_repl(interactive::Bool, quiet::Bool, banner::Bool, history_file::Bool, color_set::Bool)
-    global active_repl
+function load_REPL()
+    # load interactive-only libraries
+    try
+        return Base.require(PkgId(UUID(0x3fa0cd96_eef1_5676_8a61_b3b8758bbffb), "REPL"))
+    catch ex
+        @warn "Failed to import REPL" exception=(ex, catch_backtrace())
+    end
+    return nothing
+end
 
-    load_InteractiveUtils()
+global active_repl
 
-    if interactive && isassigned(REPL_MODULE_REF)
+# run the requested sort of evaluation loop on stdio
+function run_main_repl(interactive::Bool, quiet::Bool, banner::Symbol, history_file::Bool, color_set::Bool)
+    fallback_repl = parse(Bool, get(ENV, "JULIA_FALLBACK_REPL", "false"))
+    if !fallback_repl && interactive
+        load_InteractiveUtils()
+        if !isassigned(REPL_MODULE_REF)
+            load_REPL()
+        end
+    end
+    # TODO cleanup REPL_MODULE_REF
+    if !fallback_repl && interactive && isassigned(REPL_MODULE_REF)
         invokelatest(REPL_MODULE_REF[]) do REPL
             term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
             term = REPL.Terminals.TTYTerminal(term_env, stdin, stdout, stderr)
-            banner && Base.banner(term)
+            banner == :no || REPL.banner(term, short=banner==:short)
             if term.term_type == "dumb"
-                active_repl = REPL.BasicREPL(term)
+                repl = REPL.BasicREPL(term)
                 quiet || @warn "Terminal not fully functional"
             else
-                active_repl = REPL.LineEditREPL(term, get(stdout, :color, false), true)
-                active_repl.history_file = history_file
+                repl = REPL.LineEditREPL(term, get(stdout, :color, false), true)
+                repl.history_file = history_file
             end
+            global active_repl = repl
             # Make sure any displays pushed in .julia/config/startup.jl ends up above the
             # REPLDisplay
-            pushdisplay(REPL.REPLDisplay(active_repl))
-            _atreplinit(active_repl)
-            REPL.run_repl(active_repl, backend->(global active_repl_backend = backend))
+            pushdisplay(REPL.REPLDisplay(repl))
+            _atreplinit(repl)
+            REPL.run_repl(repl, backend->(global active_repl_backend = backend))
         end
     else
         # otherwise provide a simple fallback
-        if interactive && !quiet
-            @warn "REPL provider not available: using basic fallback"
+        if !fallback_repl && interactive && !quiet
+            @warn "REPL provider not available: using basic fallback" LOAD_PATH=join(Base.LOAD_PATH, Sys.iswindows() ? ';' : ':')
         end
-        banner && Base.banner()
         let input = stdin
             if isa(input, File) || isa(input, IOStream)
                 # for files, we can slurp in the whole thing at once
@@ -435,7 +459,7 @@ function run_main_repl(interactive::Bool, quiet::Bool, banner::Bool, history_fil
                     eval_user_input(stderr, ex, true)
                 end
             else
-                while isopen(input) || !eof(input)
+                while !eof(input)
                     if interactive
                         print("julia> ")
                         flush(stdout)
@@ -461,64 +485,146 @@ function run_main_repl(interactive::Bool, quiet::Bool, banner::Bool, history_fil
     nothing
 end
 
-# MainInclude exists to hide Main.include and eval from `names(Main)`.
+# MainInclude exists to weakly add certain identifiers to Main
 baremodule MainInclude
 using ..Base
-# These definitions calls Base._include rather than Base.include to get
-# one-frame stacktraces for the common case of using include(fname) in Main.
-include(mapexpr::Function, fname::AbstractString) = Base._include(mapexpr, Main, fname)
-function include(fname::AbstractString)
-    isa(fname, String) || (fname = Base.convert(String, fname)::String)
-    Base._include(identity, Main, fname)
-end
-eval(x) = Core.eval(Main, x)
-end
 
 """
-    eval(expr)
+    ans
 
-Evaluate an expression in the global scope of the containing module.
-Every `Module` (except those defined with `baremodule`) has its own 1-argument
-definition of `eval`, which evaluates expressions in that module.
+A variable referring to the last computed value, automatically imported to the interactive prompt.
 """
-MainInclude.eval
+global ans = nothing
 
 """
-    include([mapexpr::Function,] path::AbstractString)
-
-Evaluate the contents of the input source file in the global scope of the containing module.
-Every module (except those defined with `baremodule`) has its own
-definition of `include`, which evaluates the file in that module.
-Returns the result of the last evaluated expression of the input file. During including,
-a task-local include path is set to the directory containing the file. Nested calls to
-`include` will search relative to that path. This function is typically used to load source
-interactively, or to combine files in packages that are broken into multiple source files.
-The argument `path` is normalized using [`normpath`](@ref) which will resolve
-relative path tokens such as `..` and convert `/` to the appropriate path separator.
-
-The optional first argument `mapexpr` can be used to transform the included code before
-it is evaluated: for each parsed expression `expr` in `path`, the `include` function
-actually evaluates `mapexpr(expr)`.  If it is omitted, `mapexpr` defaults to [`identity`](@ref).
-
-Use [`Base.include`](@ref) to evaluate a file into another module.
-
-!!! compat "Julia 1.5"
-    Julia 1.5 is required for passing the `mapexpr` argument.
+    err
+
+A variable referring to the last thrown errors, automatically imported to the interactive prompt.
+The thrown errors are collected in a stack of exceptions.
 """
-MainInclude.include
+global err = nothing
+
+# weakly exposes ans and err variables to Main
+export ans, err
+end
+
+function should_use_main_entrypoint()
+    isdefined(Main, :main) || return false
+    M_binding_owner = Base.binding_module(Main, :main)
+    (isdefined(M_binding_owner, Symbol("#__main_is_entrypoint__#")) && M_binding_owner.var"#__main_is_entrypoint__#") || return false
+    return true
+end
 
 function _start()
     empty!(ARGS)
     append!(ARGS, Core.ARGS)
     # clear any postoutput hooks that were saved in the sysimage
     empty!(Base.postoutput_hooks)
+    local ret = 0
     try
-        exec_options(JLOptions())
+        repl_was_requested = exec_options(JLOptions())
+        if should_use_main_entrypoint() && !is_interactive
+            if Core.Compiler.generating_output()
+                precompile(Main.main, (typeof(ARGS),))
+            else
+                ret = invokelatest(Main.main, ARGS)
+            end
+        elseif (repl_was_requested || is_interactive)
+            # Run the Base `main`, which will either load the REPL stdlib
+            # or run the fallback REPL
+            ret = repl_main(ARGS)
+        end
+        ret === nothing && (ret = 0)
+        ret = Cint(ret)
     catch
+        ret = Cint(1)
         invokelatest(display_error, scrub_repl_backtrace(current_exceptions()))
-        exit(1)
     end
     if is_interactive && get(stdout, :color, false)
         print(color_normal)
     end
+    return ret
+end
+
+function repl_main(_)
+    opts = Base.JLOptions()
+    interactiveinput = isa(stdin, Base.TTY)
+    b = opts.banner
+    auto = b == -1
+    banner = b == 0 || (auto && !interactiveinput) ? :no  :
+             b == 1 || (auto && interactiveinput)  ? :yes :
+             :short # b == 2
+
+    quiet                 = (opts.quiet != 0)
+    history_file          = (opts.historyfile != 0)
+    color_set             = (opts.color != 0) # --color!=auto
+    return run_main_repl(interactiveinput, quiet, banner, history_file, color_set)
+end
+
+"""
+    @main
+
+This macro is used to mark that the binding `main` in the current module is considered an
+entrypoint. The precise semantics of the entrypoint depend on the CLI driver.
+
+In the `julia` driver, if `Main.main` is marked as an entrypoint, it will be automatically called upon
+the completion of script execution.
+
+The `@main` macro may be used standalone or as part of the function definition, though in the latter
+case, parentheses are required. In particular, the following are equivalent:
+
+```
+function (@main)(ARGS)
+    println("Hello World")
+end
+```
+
+```
+function main(ARGS)
+end
+@main
+```
+
+## Detailed semantics
+
+The entrypoint semantics attach to the owner of the binding owner. In particular, if a marked entrypoint is
+imported into `Main`, it will be treated as an entrypoint in `Main`:
+
+```
+module MyApp
+    export main
+    (@main)(ARGS) = println("Hello World")
+end
+using .MyApp
+# `julia` Will execute MyApp.main at the conclusion of script execution
+```
+
+Note that in particular, the semantics do not attach to the method
+or the name:
+```
+module MyApp
+    (@main)(ARGS) = println("Hello World")
+end
+const main = MyApp.main
+# `julia` Will *NOT* execute MyApp.main unless there is a separate `@main` annotation in `Main`
+
+!!! compat "Julia 1.11"
+    This macro is new in Julia 1.11. At present, the precise semantics of `@main` are still subject to change.
+```
+"""
+macro main(args...)
+    if !isempty(args)
+        error("USAGE: `@main` is expected to be used as `(@main)` without macro arguments.")
+    end
+    if isdefined(__module__, :main)
+        if Base.binding_module(__module__, :main) !== __module__
+            error("USAGE: Symbol `main` is already a resolved import in module $(__module__). `@main` must be used in the defining module.")
+        end
+    end
+    Core.eval(__module__, quote
+        # Force the binding to resolve to this module
+        global main
+        global var"#__main_is_entrypoint__#"::Bool = true
+    end)
+    esc(:main)
 end
diff --git a/base/cmd.jl b/base/cmd.jl
index ecabb5c32b1d0..da29c732c7f26 100644
--- a/base/cmd.jl
+++ b/base/cmd.jl
@@ -41,6 +41,7 @@ has_nondefault_cmd_flags(c::Cmd) =
 
 """
     Cmd(cmd::Cmd; ignorestatus, detach, windows_verbatim, windows_hide, env, dir)
+    Cmd(exec::Vector{String})
 
 Construct a new `Cmd` object, representing an external program and arguments, from `cmd`,
 while changing the settings of the optional keyword arguments:
@@ -70,8 +71,15 @@ while changing the settings of the optional keyword arguments:
 * `dir::AbstractString`: Specify a working directory for the command (instead
   of the current directory).
 
-For any keywords that are not specified, the current settings from `cmd` are used. Normally,
-to create a `Cmd` object in the first place, one uses backticks, e.g.
+For any keywords that are not specified, the current settings from `cmd` are used.
+
+Note that the `Cmd(exec)` constructor does not create a copy of `exec`. Any subsequent changes to `exec` will be reflected in the `Cmd` object.
+
+The most common way to construct a `Cmd` object is with command literals (backticks), e.g.
+
+    `ls -l`
+
+This can then be passed to the `Cmd` constructor to modify its settings, e.g.
 
     Cmd(`echo "Hello world"`, ignorestatus=true, detach=false)
 """
@@ -230,7 +238,7 @@ function cstr(s)
     if Base.containsnul(s)
         throw(ArgumentError("strings containing NUL cannot be passed to spawned processes"))
     end
-    return String(s)
+    return String(s)::String
 end
 
 # convert various env representations into an array of "key=val" strings
@@ -462,7 +470,7 @@ function cmd_gen(parsed)
         (ignorestatus, flags, env, dir) = (cmd.ignorestatus, cmd.flags, cmd.env, cmd.dir)
         append!(args, cmd.exec)
         for arg in tail(parsed)
-            append!(args, arg_gen(arg...)::Vector{String})
+            append!(args, Base.invokelatest(arg_gen, arg...)::Vector{String})
         end
         return Cmd(Cmd(args), ignorestatus, flags, env, dir)
     else
@@ -473,6 +481,12 @@ function cmd_gen(parsed)
     end
 end
 
+@assume_effects :effect_free :terminates_globally :noub function cmd_gen(
+    parsed::Tuple{Vararg{Tuple{Vararg{Union{String, SubString{String}}}}}}
+)
+    return @invoke cmd_gen(parsed::Any)
+end
+
 """
     @cmd str
 
diff --git a/base/cmem.jl b/base/cmem.jl
new file mode 100644
index 0000000000000..dd4cbc30585f2
--- /dev/null
+++ b/base/cmem.jl
@@ -0,0 +1,57 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+    memcpy(dst::Ptr, src::Ptr, n::Integer) -> Ptr{Cvoid}
+
+Call `memcpy` from the C standard library.
+
+!!! compat "Julia 1.10"
+    Support for `memcpy` requires at least Julia 1.10.
+
+"""
+function memcpy(dst::Ptr, src::Ptr, n::Integer)
+    @_terminates_globally_meta
+    ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), dst, src, n)
+end
+
+"""
+    memmove(dst::Ptr, src::Ptr, n::Integer) -> Ptr{Cvoid}
+
+Call `memmove` from the C standard library.
+
+!!! compat "Julia 1.10"
+    Support for `memmove` requires at least Julia 1.10.
+
+"""
+function memmove(dst::Ptr, src::Ptr, n::Integer)
+    @_terminates_globally_meta
+    ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), dst, src, n)
+end
+
+"""
+    memset(dst::Ptr, val, n::Integer) -> Ptr{Cvoid}
+
+Call `memset` from the C standard library.
+
+!!! compat "Julia 1.10"
+    Support for `memset` requires at least Julia 1.10.
+
+"""
+function memset(p::Ptr, val, n::Integer)
+    @_terminates_globally_meta
+    ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), p, val, n)
+end
+
+"""
+    memcmp(a::Ptr, b::Ptr, n::Integer) -> Int
+
+Call `memcmp` from the C standard library.
+
+!!! compat "Julia 1.10"
+    Support for `memcmp` requires at least Julia 1.9.
+
+"""
+function memcmp(a::Ptr, b::Ptr, n::Integer)
+    @_terminates_globally_meta
+    ccall(:memcmp, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), a, b, n % Csize_t) % Int
+end
diff --git a/base/combinatorics.jl b/base/combinatorics.jl
index 9c753560e3f82..a169cfb9ecd77 100644
--- a/base/combinatorics.jl
+++ b/base/combinatorics.jl
@@ -136,27 +136,43 @@ function permutecols!!(a::AbstractMatrix, p::AbstractVector{<:Integer})
     a
 end
 
-function permute!!(a, p::AbstractVector{<:Integer})
+# Row and column permutations for AbstractMatrix
+permutecols!(a::AbstractMatrix, p::AbstractVector{<:Integer}) =
+    _permute!(a, p, Base.swapcols!)
+permuterows!(a::AbstractMatrix, p::AbstractVector{<:Integer}) =
+    _permute!(a, p, Base.swaprows!)
+@inline function _permute!(a::AbstractMatrix, p::AbstractVector{<:Integer}, swapfun!::F) where {F}
     require_one_based_indexing(a, p)
-    count = 0
-    start = 0
-    while count < length(a)
-        ptr = start = findnext(!iszero, p, start+1)::Int
-        temp = a[start]
-        next = p[start]
-        count += 1
-        while next != start
-            a[ptr] = a[next]
-            p[ptr] = 0
-            ptr = next
-            next = p[next]
-            count += 1
+    p .= .-p
+    for i in 1:length(p)
+        p[i] > 0 && continue
+        j = i
+        in = p[j] = -p[j]
+        while p[in] < 0
+            swapfun!(a, in, j)
+            j = in
+            in = p[in] = -p[in]
         end
-        a[ptr] = temp
-        p[ptr] = 0
     end
     a
 end
+invpermutecols!(a::AbstractMatrix, p::AbstractVector{<:Integer}) =
+    _invpermute!(a, p, Base.swapcols!)
+invpermuterows!(a::AbstractMatrix, p::AbstractVector{<:Integer}) =
+    _invpermute!(a, p, Base.swaprows!)
+@inline function _invpermute!(a::AbstractMatrix, p::AbstractVector{<:Integer}, swapfun!::F) where {F}
+    require_one_based_indexing(a, p)
+    p .= .-p
+    for i in 1:length(p)
+        p[i] > 0 && continue
+        j = p[i] = -p[i]
+        while j != i
+           swapfun!(a, j, i)
+           j = p[j] = -p[j]
+        end
+     end
+    a
+end
 
 """
     permute!(v, p)
@@ -164,7 +180,12 @@ end
 Permute vector `v` in-place, according to permutation `p`. No checking is done
 to verify that `p` is a permutation.
 
-To return a new permutation, use `v[p]`. Note that this is faster than `permute!(v, p)`.
+To return a new permutation, use `v[p]`. This is generally faster than `permute!(v, p)`;
+it is even faster to write into a pre-allocated output array with `u .= @view v[p]`.
+(Even though `permute!` overwrites `v` in-place, it internally requires some allocation
+to keep track of which elements have been moved.)
+
+$(_DOCS_ALIASING_WARNING)
 
 See also [`invpermute!`](@ref).
 
@@ -186,35 +207,17 @@ julia> A
 """
 permute!(v, p::AbstractVector) = (v .= v[p])
 
-function invpermute!!(a, p::AbstractVector{<:Integer})
-    require_one_based_indexing(a, p)
-    count = 0
-    start = 0
-    while count < length(a)
-        start = findnext(!iszero, p, start+1)::Int
-        temp = a[start]
-        next = p[start]
-        count += 1
-        while next != start
-            temp_next = a[next]
-            a[next] = temp
-            temp = temp_next
-            ptr = p[next]
-            p[next] = 0
-            next = ptr
-            count += 1
-        end
-        a[next] = temp
-        p[next] = 0
-    end
-    a
-end
-
 """
     invpermute!(v, p)
 
 Like [`permute!`](@ref), but the inverse of the given permutation is applied.
 
+Note that if you have a pre-allocated output array (e.g. `u = similar(v)`),
+it is quicker to instead employ `u[p] = v`.  (`invpermute!` internally
+allocates a copy of the data.)
+
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1, 1, 3, 4];
@@ -276,7 +279,7 @@ julia> B[invperm(v)]
 """
 function invperm(a::AbstractVector)
     require_one_based_indexing(a)
-    b = zero(a) # similar vector of zeros
+    b = fill!(similar(a), zero(eltype(a))) # mutable vector of zeros
     n = length(a)
     @inbounds for (i, j) in enumerate(a)
         ((1 <= j <= n) && b[j] == 0) ||
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 5aa7669c3a3a9..1f5b4ab445330 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -1,147 +1,97 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#############
-# constants #
-#############
-
-const _REF_NAME = Ref.body.name
-
-#########
-# logic #
-#########
-
 # See if the inference result of the current statement's result value might affect
 # the final answer for the method (aside from optimization potential and exceptions).
 # To do that, we need to check both for slot assignment and SSA usage.
-call_result_unused(frame::InferenceState) =
-    isexpr(frame.src.code[frame.currpc], :call) && isempty(frame.ssavalue_uses[frame.currpc])
-
-function get_max_methods(mod::Module, interp::AbstractInterpreter)
-    max_methods = ccall(:jl_get_module_max_methods, Cint, (Any,), mod) % Int
-    max_methods < 0 ? InferenceParams(interp).MAX_METHODS : max_methods
-end
-
-function get_max_methods(@nospecialize(f), mod::Module, interp::AbstractInterpreter)
-    if f !== nothing
-        fmm = typeof(f).name.max_methods
-        fmm !== UInt8(0) && return Int(fmm)
-    end
-    return get_max_methods(mod, interp)
-end
-
-const empty_bitset = BitSet()
-
-function should_infer_this_call(sv::InferenceState)
-    if sv.params.unoptimize_throw_blocks
-        # Disable inference of calls in throw blocks, since we're unlikely to
-        # need their types. There is one exception however: If up until now, the
-        # function has not seen any side effects, we would like to make sure there
-        # aren't any in the throw block either to enable other optimizations.
-        if is_stmt_throw_block(get_curr_ssaflag(sv))
-            should_infer_for_effects(sv) || return false
-        end
-    end
-    return true
-end
-
-function should_infer_for_effects(sv::InferenceState)
-    effects = Effects(sv)
-    return effects.terminates === ALWAYS_TRUE &&
-           effects.effect_free === ALWAYS_TRUE
-end
+call_result_unused(sv::InferenceState, currpc::Int) =
+    isexpr(sv.src.code[currpc], :call) && isempty(sv.ssavalue_uses[currpc])
+call_result_unused(si::StmtInfo) = !si.used
 
 function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
-                                  arginfo::ArgInfo, @nospecialize(atype),
-                                  sv::InferenceState, max_methods::Int)
-    if !should_infer_this_call(sv)
+                                  arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype),
+                                  sv::AbsIntState, max_methods::Int)
+    𝕃ₚ, 𝕃ᵢ = ipo_lattice(interp), typeinf_lattice(interp)
+    ⊑ₚ = ⊑(𝕃ₚ)
+    if !should_infer_this_call(interp, sv)
         add_remark!(interp, sv, "Skipped call in throw block")
-        nonoverlayed = false
-        if isoverlayed(method_table(interp)) && is_nonoverlayed(sv.ipo_effects)
-            # as we may want to concrete-evaluate this frame in cases when there are
-            # no overlayed calls, try an additional effort now to check if this call
-            # isn't overlayed rather than just handling it conservatively
-            matches = find_matching_methods(arginfo.argtypes, atype, method_table(interp),
-                InferenceParams(interp).MAX_UNION_SPLITTING, max_methods)
-            if !isa(matches, FailedMethodMatch)
-                nonoverlayed = matches.nonoverlayed
-            end
-        else
-            nonoverlayed = true
-        end
         # At this point we are guaranteed to end up throwing on this path,
         # which is all that's required for :consistent-cy. Of course, we don't
         # know anything else about this statement.
-        effects = Effects(; consistent=ALWAYS_TRUE, nonoverlayed)
-        return CallMeta(Any, effects, false)
+        effects = Effects(; consistent=ALWAYS_TRUE)
+        return CallMeta(Any, Any, effects, NoCallInfo())
     end
 
     argtypes = arginfo.argtypes
-    matches = find_matching_methods(argtypes, atype, method_table(interp),
-        InferenceParams(interp).MAX_UNION_SPLITTING, max_methods)
+    matches = find_matching_methods(𝕃ᵢ, argtypes, atype, method_table(interp),
+        InferenceParams(interp).max_union_splitting, max_methods)
     if isa(matches, FailedMethodMatch)
         add_remark!(interp, sv, matches.reason)
-        return CallMeta(Any, Effects(), false)
+        return CallMeta(Any, Any, Effects(), NoCallInfo())
     end
 
     (; valid_worlds, applicable, info) = matches
     update_valid_age!(sv, valid_worlds)
     napplicable = length(applicable)
-    rettype = Bottom
+    rettype = excttype = Bottom
     edges = MethodInstance[]
     conditionals = nothing # keeps refinement information of call argument types when the return type is boolean
     seen = 0               # number of signatures actually inferred
-    any_const_result = false
-    const_results = Union{Nothing,ConstResult}[]
+    const_results = nothing # or const_results::Vector{Union{Nothing,ConstResult}} if any const results are available
     multiple_matches = napplicable > 1
     fargs = arginfo.fargs
     all_effects = EFFECTS_TOTAL
-    if !matches.nonoverlayed
-        # currently we don't have a good way to execute the overlayed method definition,
-        # so we should give up pure/concrete eval when any of the matched methods is overlayed
-        f = nothing
-        all_effects = Effects(all_effects; nonoverlayed=false)
-    end
-
-    # try pure-evaluation
-    val = pure_eval_call(interp, f, applicable, arginfo, sv)
-    val !== nothing && return CallMeta(val, all_effects, MethodResultPure(info)) # TODO: add some sort of edge(s)
 
     for i in 1:napplicable
         match = applicable[i]::MethodMatch
         method = match.method
         sig = match.spec_types
-        if bail_out_toplevel_call(interp, sig, sv)
+        if bail_out_toplevel_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
             # only infer concrete call sites in top-level expressions
             add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
-            rettype = Any
             break
         end
         this_rt = Bottom
+        this_exct = Bottom
         splitunions = false
         # TODO: this used to trigger a bug in inference recursion detection, and is unmaintained now
         # sigtuple = unwrap_unionall(sig)::DataType
-        # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).MAX_UNION_SPLITTING
+        # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting
         if splitunions
             splitsigs = switchtupleunion(sig)
             for sig_n in splitsigs
-                result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, sv)
-                (; rt, edge, effects) = result
-                edge !== nothing && push!(edges, edge)
+                result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv)
+                (; rt, exct, edge, effects, volatile_inf_result) = result
                 this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
                 this_arginfo = ArgInfo(fargs, this_argtypes)
-                const_call_result = abstract_call_method_with_const_args(interp, result,
-                    f, this_arginfo, match, sv)
-                const_result = nothing
+                const_call_result = abstract_call_method_with_const_args(interp,
+                    result, f, this_arginfo, si, match, sv)
+                const_result = volatile_inf_result
                 if const_call_result !== nothing
-                    if const_call_result.rt ⊑ rt
+                    if const_call_result.rt ⊑ₚ rt
                         rt = const_call_result.rt
-                        (; effects, const_result) = const_call_result
+                        (; effects, const_result, edge) = const_call_result
+                    elseif is_better_effects(const_call_result.effects, effects)
+                        (; effects, const_result, edge) = const_call_result
+                    else
+                        add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
+                    end
+                    if !(exct ⊑ₚ const_call_result.exct)
+                        exct = const_call_result.exct
+                        (; const_result, edge) = const_call_result
+                    else
+                        add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference")
                     end
                 end
-                all_effects = tristate_merge(all_effects, effects)
-                push!(const_results, const_result)
-                any_const_result |= const_result !== nothing
+                all_effects = merge_effects(all_effects, effects)
+                if const_result !== nothing
+                    if const_results === nothing
+                        const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, #=TODO=#napplicable), nothing)
+                    end
+                    const_results[i] = const_result
+                end
+                edge === nothing || push!(edges, edge)
                 this_rt = tmerge(this_rt, rt)
+                this_exct = tmerge(this_exct, exct)
                 if bail_out_call(interp, this_rt, sv)
                     break
                 end
@@ -149,84 +99,112 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
             this_conditional = ignorelimited(this_rt)
             this_rt = widenwrappedconditional(this_rt)
         else
-            if infer_compilation_signature(interp)
-                # Also infer the compilation signature for this method, so it's available
-                # to the compiler in case it ends up needing it (which is likely).
-                csig = get_compileable_sig(method, sig, match.sparams)
-                if csig !== nothing && csig !== sig
-                    # The result of this inference is not directly used, so temporarily empty
-                    # the use set for the current SSA value.
-                    saved_uses = sv.ssavalue_uses[sv.currpc]
-                    sv.ssavalue_uses[sv.currpc] = empty_bitset
-                    abstract_call_method(interp, method, csig, match.sparams, multiple_matches, sv)
-                    sv.ssavalue_uses[sv.currpc] = saved_uses
-                end
-            end
-
-            result = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, sv)
-            (; rt, edge, effects) = result
+            result = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv)
+            (; rt, exct, edge, effects, volatile_inf_result) = result
             this_conditional = ignorelimited(rt)
             this_rt = widenwrappedconditional(rt)
-            edge !== nothing && push!(edges, edge)
+            this_exct = exct
             # try constant propagation with argtypes for this match
             # this is in preparation for inlining, or improving the return result
             this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
             this_arginfo = ArgInfo(fargs, this_argtypes)
-            const_call_result = abstract_call_method_with_const_args(interp, result,
-                f, this_arginfo, match, sv)
-            const_result = nothing
+            const_call_result = abstract_call_method_with_const_args(interp,
+                result, f, this_arginfo, si, match, sv)
+            const_result = volatile_inf_result
             if const_call_result !== nothing
                 this_const_conditional = ignorelimited(const_call_result.rt)
                 this_const_rt = widenwrappedconditional(const_call_result.rt)
-                # return type of const-prop' inference can be wider than that of non const-prop' inference
-                # e.g. in cases when there are cycles but cached result is still accurate
-                if this_const_rt ⊑ this_rt
+                if this_const_rt ⊑ₚ this_rt
+                    # As long as the const-prop result we have is not *worse* than
+                    # what we found out on types, we'd like to use it. Even if the
+                    # end result is exactly equivalent, it is likely that the IR
+                    # we produced while constproping is better than that with
+                    # generic types.
+                    # Return type of const-prop' inference can be wider than that of non const-prop' inference
+                    # e.g. in cases when there are cycles but cached result is still accurate
                     this_conditional = this_const_conditional
                     this_rt = this_const_rt
-                    (; effects, const_result) = const_call_result
+                    (; effects, const_result, edge) = const_call_result
+                elseif is_better_effects(const_call_result.effects, effects)
+                    (; effects, const_result, edge) = const_call_result
+                else
+                    add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
+                end
+                # Treat the exception type separately. Currently, constprop often cannot determine the exception type
+                # because consistent-cy does not apply to exceptions.
+                if !(this_exct ⊑ₚ const_call_result.exct)
+                    this_exct = const_call_result.exct
+                    (; const_result, edge) = const_call_result
+                else
+                    add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference")
                 end
             end
-            all_effects = tristate_merge(all_effects, effects)
-            push!(const_results, const_result)
-            any_const_result |= const_result !== nothing
+            all_effects = merge_effects(all_effects, effects)
+            if const_result !== nothing
+                if const_results === nothing
+                    const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, napplicable), nothing)
+                end
+                const_results[i] = const_result
+            end
+            edge === nothing || push!(edges, edge)
         end
-        @assert !(this_conditional isa Conditional) "invalid lattice element returned from inter-procedural context"
+        @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
         seen += 1
-        rettype = tmerge(rettype, this_rt)
-        if this_conditional !== Bottom && is_lattice_bool(rettype) && fargs !== nothing
+        rettype = tmerge(𝕃ₚ, rettype, this_rt)
+        excttype = tmerge(𝕃ₚ, excttype, this_exct)
+        if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing
             if conditionals === nothing
                 conditionals = Any[Bottom for _ in 1:length(argtypes)],
                                Any[Bottom for _ in 1:length(argtypes)]
             end
             for i = 1:length(argtypes)
-                cnd = conditional_argtype(this_conditional, sig, argtypes, i)
-                conditionals[1][i] = tmerge(conditionals[1][i], cnd.thentype)
-                conditionals[2][i] = tmerge(conditionals[2][i], cnd.elsetype)
+                cnd = conditional_argtype(𝕃ᵢ, this_conditional, sig, argtypes, i)
+                conditionals[1][i] = tmerge(𝕃ᵢ, conditionals[1][i], cnd.thentype)
+                conditionals[2][i] = tmerge(𝕃ᵢ, conditionals[2][i], cnd.elsetype)
             end
         end
-        if bail_out_call(interp, rettype, sv)
+        if bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
+            add_remark!(interp, sv, "Call inference reached maximally imprecise information. Bailing on.")
             break
         end
     end
 
-    if any_const_result && seen == napplicable
+    if const_results !== nothing
         @assert napplicable == nmatches(info) == length(const_results)
         info = ConstCallInfo(info, const_results)
     end
 
-    if seen != napplicable
-        # there may be unanalyzed effects within unseen dispatch candidate,
-        # but we can still ignore nonoverlayed effect here since we already accounted for it
-        all_effects = tristate_merge(all_effects, EFFECTS_UNKNOWN)
+    if seen ≠ napplicable
+        # there is unanalyzed candidate, widen type and effects to the top
+        rettype = excttype = Any
+        all_effects = Effects()
     elseif isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) :
             (!all(matches.fullmatches) || any_ambig(matches))
         # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
-        all_effects = Effects(all_effects; nothrow=ALWAYS_FALSE)
+        all_effects = Effects(all_effects; nothrow=false)
+        excttype = tmerge(𝕃ₚ, excttype, MethodError)
     end
 
-    rettype = from_interprocedural!(rettype, sv, arginfo, conditionals)
+    rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals)
+
+    # Also considering inferring the compilation signature for this method, so
+    # it is available to the compiler in case it ends up needing it.
+    if (isa(sv, InferenceState) && infer_compilation_signature(interp) &&
+        (1 == seen == napplicable) && rettype !== Any && rettype !== Bottom &&
+        !is_removable_if_unused(all_effects))
+        match = applicable[1]::MethodMatch
+        method = match.method
+        sig = match.spec_types
+        mi = specialize_method(match; preexisting=true)
+        if mi !== nothing && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
+            csig = get_compileable_sig(method, sig, match.sparams)
+            if csig !== nothing && csig !== sig
+                abstract_call_method(interp, method, csig, match.sparams, multiple_matches, StmtInfo(false), sv)
+            end
+        end
+    end
 
-    if call_result_unused(sv) && !(rettype === Bottom)
+    if call_result_unused(si) && !(rettype === Bottom)
         add_remark!(interp, sv, "Call result type was widened because the return value is unused")
         # We're mainly only here because the optimizer might want this code,
         # but we ourselves locally don't typically care about it locally
@@ -237,13 +215,20 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         rettype = Any
     end
     add_call_backedges!(interp, rettype, all_effects, edges, matches, atype, sv)
-    if !isempty(sv.pclimitations) # remove self, if present
-        delete!(sv.pclimitations, sv)
-        for caller in sv.callers_in_cycle
-            delete!(sv.pclimitations, caller)
+    if isa(sv, InferenceState)
+        # TODO (#48913) implement a proper recursion handling for irinterp:
+        # This works just because currently the `:terminate` condition guarantees that
+        # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
+        # We should revisit this once we have a better story for handling cycles in irinterp.
+        if !isempty(sv.pclimitations) # remove self, if present
+            delete!(sv.pclimitations, sv)
+            for caller in callers_in_cycle(sv)
+                delete!(sv.pclimitations, caller)
+            end
         end
     end
-    return CallMeta(rettype, all_effects, info)
+
+    return CallMeta(rettype, excttype, all_effects, info)
 end
 
 struct FailedMethodMatch
@@ -254,9 +239,8 @@ struct MethodMatches
     applicable::Vector{Any}
     info::MethodMatchInfo
     valid_worlds::WorldRange
-    mt::Core.MethodTable
+    mt::MethodTable
     fullmatch::Bool
-    nonoverlayed::Bool
 end
 any_ambig(info::MethodMatchInfo) = info.results.ambig
 any_ambig(m::MethodMatches) = any_ambig(m.info)
@@ -266,43 +250,40 @@ struct UnionSplitMethodMatches
     applicable_argtypes::Vector{Vector{Any}}
     info::UnionSplitInfo
     valid_worlds::WorldRange
-    mts::Vector{Core.MethodTable}
+    mts::Vector{MethodTable}
     fullmatches::Vector{Bool}
-    nonoverlayed::Bool
 end
-any_ambig(m::UnionSplitMethodMatches) = _any(any_ambig, m.info.matches)
+any_ambig(m::UnionSplitMethodMatches) = any(any_ambig, m.info.matches)
 
-function find_matching_methods(argtypes::Vector{Any}, @nospecialize(atype), method_table::MethodTableView,
-                               union_split::Int, max_methods::Int)
+function find_matching_methods(𝕃::AbstractLattice,
+                               argtypes::Vector{Any}, @nospecialize(atype), method_table::MethodTableView,
+                               max_union_splitting::Int, max_methods::Int)
     # NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
-    if 1 < unionsplitcost(argtypes) <= union_split
-        split_argtypes = switchtupleunion(argtypes)
+    if 1 < unionsplitcost(𝕃, argtypes) <= max_union_splitting
+        split_argtypes = switchtupleunion(𝕃, argtypes)
         infos = MethodMatchInfo[]
         applicable = Any[]
         applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
         valid_worlds = WorldRange()
-        mts = Core.MethodTable[]
+        mts = MethodTable[]
         fullmatches = Bool[]
-        nonoverlayed = true
         for i in 1:length(split_argtypes)
             arg_n = split_argtypes[i]::Vector{Any}
             sig_n = argtypes_to_type(arg_n)
             mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
             mt === nothing && return FailedMethodMatch("Could not identify method table for call")
-            mt = mt::Core.MethodTable
-            result = findall(sig_n, method_table; limit = max_methods)
-            if result === missing
+            mt = mt::MethodTable
+            matches = findall(sig_n, method_table; limit = max_methods)
+            if matches === nothing
                 return FailedMethodMatch("For one of the union split cases, too many methods matched")
             end
-            matches, overlayed = result
-            nonoverlayed &= !overlayed
             push!(infos, MethodMatchInfo(matches))
             for m in matches
                 push!(applicable, m)
                 push!(applicable_argtypes, arg_n)
             end
             valid_worlds = intersect(valid_worlds, matches.valid_worlds)
-            thisfullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
+            thisfullmatch = any(match::MethodMatch->match.fully_covers, matches)
             found = false
             for (i, mt′) in enumerate(mts)
                 if mt′ === mt
@@ -321,33 +302,31 @@ function find_matching_methods(argtypes::Vector{Any}, @nospecialize(atype), meth
                                        UnionSplitInfo(infos),
                                        valid_worlds,
                                        mts,
-                                       fullmatches,
-                                       nonoverlayed)
+                                       fullmatches)
     else
         mt = ccall(:jl_method_table_for, Any, (Any,), atype)
         if mt === nothing
             return FailedMethodMatch("Could not identify method table for call")
         end
-        mt = mt::Core.MethodTable
-        result = findall(atype, method_table; limit = max_methods)
-        if result === missing
+        mt = mt::MethodTable
+        matches = findall(atype, method_table; limit = max_methods)
+        if matches === nothing
             # this means too many methods matched
             # (assume this will always be true, so we don't compute / update valid age in this case)
             return FailedMethodMatch("Too many methods matched")
         end
-        matches, overlayed = result
-        fullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
+        fullmatch = any(match::MethodMatch->match.fully_covers, matches)
         return MethodMatches(matches.matches,
                              MethodMatchInfo(matches),
                              matches.valid_worlds,
                              mt,
-                             fullmatch,
-                             !overlayed)
+                             fullmatch)
     end
 end
 
 """
-    from_interprocedural!(rt, sv::InferenceState, arginfo::ArgInfo, maybecondinfo) -> newrt
+    from_interprocedural!(interp::AbstractInterpreter, rt, sv::AbsIntState,
+                          arginfo::ArgInfo, maybecondinfo) -> newrt
 
 Converts inter-procedural return type `rt` into a local lattice element `newrt`,
 that is appropriate in the context of current local analysis frame `sv`, especially:
@@ -366,16 +345,19 @@ In such cases `maybecondinfo` should be either of:
 When we deal with multiple `MethodMatch`es, it's better to precompute `maybecondinfo` by
 `tmerge`ing argument signature type of each method call.
 """
-function from_interprocedural!(@nospecialize(rt), sv::InferenceState, arginfo::ArgInfo, @nospecialize(maybecondinfo))
+function from_interprocedural!(interp::AbstractInterpreter, @nospecialize(rt), sv::AbsIntState,
+                               arginfo::ArgInfo, @nospecialize(maybecondinfo))
     rt = collect_limitations!(rt, sv)
-    if is_lattice_bool(rt)
+    if isa(rt, InterMustAlias)
+        rt = from_intermustalias(rt, arginfo, sv)
+    elseif is_lattice_bool(ipo_lattice(interp), rt)
         if maybecondinfo === nothing
             rt = widenconditional(rt)
         else
-            rt = from_interconditional(rt, sv, arginfo, maybecondinfo)
+            rt = from_interconditional(typeinf_lattice(interp), rt, sv, arginfo, maybecondinfo)
         end
     end
-    @assert !(rt isa InterConditional) "invalid lattice element returned from inter-procedural context"
+    @assert !(rt isa InterConditional || rt isa InterMustAlias) "invalid lattice element returned from inter-procedural context"
     return rt
 end
 
@@ -387,19 +369,53 @@ function collect_limitations!(@nospecialize(typ), sv::InferenceState)
     return typ
 end
 
-function from_interconditional(@nospecialize(typ), sv::InferenceState, (; fargs, argtypes)::ArgInfo, @nospecialize(maybecondinfo))
-    fargs === nothing && return widenconditional(typ)
+function from_intermustalias(rt::InterMustAlias, arginfo::ArgInfo, sv::AbsIntState)
+    fargs = arginfo.fargs
+    if fargs !== nothing && 1 ≤ rt.slot ≤ length(fargs)
+        arg = ssa_def_slot(fargs[rt.slot], sv)
+        if isa(arg, SlotNumber)
+            argtyp = widenslotwrapper(arginfo.argtypes[rt.slot])
+            if rt.vartyp ⊑ argtyp
+                return MustAlias(arg, rt.vartyp, rt.fldidx, rt.fldtyp)
+            else
+                # TODO optimize this case?
+            end
+        end
+    end
+    return widenmustalias(rt)
+end
+
+function from_interconditional(𝕃ᵢ::AbstractLattice, @nospecialize(rt), sv::AbsIntState,
+                               arginfo::ArgInfo, @nospecialize(maybecondinfo))
+    has_conditional(𝕃ᵢ, sv) || return widenconditional(rt)
+    (; fargs, argtypes) = arginfo
+    fargs === nothing && return widenconditional(rt)
     slot = 0
+    alias = nothing
     thentype = elsetype = Any
-    condval = maybe_extract_const_bool(typ)
+    condval = maybe_extract_const_bool(rt)
     for i in 1:length(fargs)
         # find the first argument which supports refinement,
         # and intersect all equivalent arguments with it
-        arg = ssa_def_slot(fargs[i], sv)
-        arg isa SlotNumber || continue # can't refine
-        old = argtypes[i]
-        old isa Type || continue # unlikely to refine
-        id = slot_id(arg)
+        argtyp = argtypes[i]
+        if alias === nothing
+            arg = ssa_def_slot(fargs[i], sv)
+            if isa(arg, SlotNumber) && widenslotwrapper(argtyp) isa Type
+                old = argtyp
+                id = slot_id(arg)
+            elseif argtyp isa MustAlias
+                old = argtyp.fldtyp
+                id = argtyp.slot
+            else
+                continue # unlikely to refine
+            end
+        elseif argtyp isa MustAlias && issubalias(argtyp, alias)
+            arg = nothing
+            old = alias.fldtyp
+            id = alias.slot
+        else
+            continue
+        end
         if slot == 0 || id == slot
             if isa(maybecondinfo, Tuple{Vector{Any},Vector{Any}})
                 # if we have already computed argument refinement information, apply that now to get the result
@@ -407,49 +423,58 @@ function from_interconditional(@nospecialize(typ), sv::InferenceState, (; fargs,
                 new_elsetype = maybecondinfo[2][i]
             else
                 # otherwise compute it on the fly
-                cnd = conditional_argtype(typ, maybecondinfo, argtypes, i)
+                cnd = conditional_argtype(𝕃ᵢ, rt, maybecondinfo, argtypes, i)
                 new_thentype = cnd.thentype
                 new_elsetype = cnd.elsetype
             end
             if condval === false
                 thentype = Bottom
-            elseif new_thentype ⊑ thentype
+            elseif ⊑(𝕃ᵢ, new_thentype, thentype)
                 thentype = new_thentype
             else
-                thentype = tmeet(thentype, widenconst(new_thentype))
+                thentype = tmeet(𝕃ᵢ, thentype, widenconst(new_thentype))
             end
             if condval === true
                 elsetype = Bottom
-            elseif new_elsetype ⊑ elsetype
+            elseif ⊑(𝕃ᵢ, new_elsetype, elsetype)
                 elsetype = new_elsetype
             else
-                elsetype = tmeet(elsetype, widenconst(new_elsetype))
+                elsetype = tmeet(𝕃ᵢ, elsetype, widenconst(new_elsetype))
             end
-            if (slot > 0 || condval !== false) && thentype ⋤ old
+            if (slot > 0 || condval !== false) && ⋤(𝕃ᵢ, thentype, old)
                 slot = id
-            elseif (slot > 0 || condval !== true) && elsetype ⋤ old
+                if !(arg isa SlotNumber) && argtyp isa MustAlias
+                    alias = argtyp
+                end
+            elseif (slot > 0 || condval !== true) && ⋤(𝕃ᵢ, elsetype, old)
                 slot = id
+                if !(arg isa SlotNumber) && argtyp isa MustAlias
+                    alias = argtyp
+                end
             else # reset: no new useful information for this slot
+                slot = 0
+                alias = nothing
                 thentype = elsetype = Any
-                if slot > 0
-                    slot = 0
-                end
             end
         end
     end
     if thentype === Bottom && elsetype === Bottom
         return Bottom # accidentally proved this call to be dead / throw !
     elseif slot > 0
+        if alias !== nothing
+            return form_mustalias_conditional(alias, thentype, elsetype)
+        end
         return Conditional(slot, thentype, elsetype) # record a Conditional improvement to this slot
     end
-    return widenconditional(typ)
+    return widenconditional(rt)
 end
 
-function conditional_argtype(@nospecialize(rt), @nospecialize(sig), argtypes::Vector{Any}, i::Int)
+function conditional_argtype(𝕃ᵢ::AbstractLattice, @nospecialize(rt), @nospecialize(sig),
+                             argtypes::Vector{Any}, i::Int)
     if isa(rt, InterConditional) && rt.slot == i
         return rt
     else
-        thentype = elsetype = tmeet(widenconditional(argtypes[i]), fieldtype(sig, i))
+        thentype = elsetype = tmeet(𝕃ᵢ, widenslotwrapper(argtypes[i]), fieldtype(sig, i))
         condval = maybe_extract_const_bool(rt)
         condval === true && (elsetype = Bottom)
         condval === false && (thentype = Bottom)
@@ -457,119 +482,143 @@ function conditional_argtype(@nospecialize(rt), @nospecialize(sig), argtypes::Ve
     end
 end
 
-function add_call_backedges!(interp::AbstractInterpreter,
-    @nospecialize(rettype), all_effects::Effects,
+function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype), all_effects::Effects,
     edges::Vector{MethodInstance}, matches::Union{MethodMatches,UnionSplitMethodMatches}, @nospecialize(atype),
-    sv::InferenceState)
-    # we don't need to add backedges when:
-    # - a new method couldn't refine (widen) this type and
-    # - the effects are known to not provide any useful IPO information
+    sv::AbsIntState)
+    # don't bother to add backedges when both type and effects information are already
+    # maximized to the top since a new method couldn't refine or widen them anyway
     if rettype === Any
+        # ignore the `:nonoverlayed` property if `interp` doesn't use overlayed method table
+        # since it will never be tainted anyway
         if !isoverlayed(method_table(interp))
-            # we can ignore the `nonoverlayed` property if `interp` doesn't use
-            # overlayed method table at all since it will never be tainted anyway
             all_effects = Effects(all_effects; nonoverlayed=false)
         end
-        if all_effects === Effects()
-            return
-        end
+        all_effects === Effects() && return nothing
     end
     for edge in edges
-        add_backedge!(edge, sv)
+        add_backedge!(sv, edge)
     end
     # also need an edge to the method table in case something gets
     # added that did not intersect with any existing method
     if isa(matches, MethodMatches)
-        matches.fullmatch || add_mt_backedge!(matches.mt, atype, sv)
+        matches.fullmatch || add_mt_backedge!(sv, matches.mt, atype)
     else
         for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
-            thisfullmatch || add_mt_backedge!(mt, atype, sv)
+            thisfullmatch || add_mt_backedge!(sv, mt, atype)
         end
     end
+    return nothing
 end
 
 const RECURSION_UNUSED_MSG = "Bounded recursion detected with unused result. Annotated return type may be wider than true result."
 const RECURSION_MSG = "Bounded recursion detected. Call was widened to force convergence."
+const RECURSION_MSG_HARDLIMIT = "Bounded recursion detected under hardlimit. Call was widened to force convergence."
 
-function abstract_call_method(interp::AbstractInterpreter, method::Method, @nospecialize(sig), sparams::SimpleVector, hardlimit::Bool, sv::InferenceState)
+function abstract_call_method(interp::AbstractInterpreter,
+                              method::Method, @nospecialize(sig), sparams::SimpleVector,
+                              hardlimit::Bool, si::StmtInfo, sv::AbsIntState)
     if method.name === :depwarn && isdefined(Main, :Base) && method.module === Main.Base
         add_remark!(interp, sv, "Refusing to infer into `depwarn`")
-        return MethodCallResult(Any, false, false, nothing, Effects())
+        return MethodCallResult(Any, Any, false, false, nothing, Effects())
     end
-    topmost = nothing
+    sigtuple = unwrap_unionall(sig)
+    sigtuple isa DataType ||
+        return MethodCallResult(Any, Any, false, false, nothing, Effects())
+    all(@nospecialize(x) -> valid_as_lattice(unwrapva(x), true), sigtuple.parameters) ||
+        return MethodCallResult(Union{}, Any, false, false, nothing, EFFECTS_THROWS) # catch bad type intersections early
+
+    if is_nospecializeinfer(method)
+        sig = get_nospecializeinfer_sig(method, sig, sparams)
+    end
+
     # Limit argument type tuple growth of functions:
     # look through the parents list to see if there's a call to the same method
     # and from the same method.
     # Returns the topmost occurrence of that repeated edge.
-    edgecycle = false
-    edgelimited = false
+    edgecycle = edgelimited = false
+    topmost = nothing
 
-    for infstate in InfStackUnwind(sv)
-        if method === infstate.linfo.def
-            if infstate.linfo.specTypes::Type == sig::Type
+    for sv′ in AbsIntStackUnwind(sv)
+        infmi = frame_instance(sv′)
+        if method === infmi.def
+            if infmi.specTypes::Type == sig::Type
                 # avoid widening when detecting self-recursion
                 # TODO: merge call cycle and return right away
-                if call_result_unused(sv)
+                if call_result_unused(si)
                     add_remark!(interp, sv, RECURSION_UNUSED_MSG)
                     # since we don't use the result (typically),
                     # we have a self-cycle in the call-graph, but not in the inference graph (typically):
                     # break this edge now (before we record it) by returning early
                     # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                    return MethodCallResult(Any, true, true, nothing, Effects())
+                    return MethodCallResult(Any, Any, true, true, nothing, Effects())
                 end
                 topmost = nothing
                 edgecycle = true
                 break
             end
             topmost === nothing || continue
-            if edge_matches_sv(infstate, method, sig, sparams, hardlimit, sv)
-                topmost = infstate
+            if edge_matches_sv(interp, sv′, method, sig, sparams, hardlimit, sv)
+                topmost = sv′
                 edgecycle = true
             end
         end
     end
+    washardlimit = hardlimit
 
     if topmost !== nothing
-        sigtuple = unwrap_unionall(sig)::DataType
         msig = unwrap_unionall(method.sig)::DataType
         spec_len = length(msig.parameters) + 1
-        ls = length(sigtuple.parameters)
+        mi = frame_instance(sv)
+
+        if isdefined(method, :recursion_relation)
+            # We don't require the recursion_relation to be transitive, so
+            # apply a hard limit
+            hardlimit = true
+        end
 
-        if method === sv.linfo.def
+        if method === mi.def
             # Under direct self-recursion, permit much greater use of reducers.
             # here we assume that complexity(specTypes) :>= complexity(sig)
-            comparison = sv.linfo.specTypes
+            comparison = mi.specTypes
             l_comparison = length((unwrap_unionall(comparison)::DataType).parameters)
             spec_len = max(spec_len, l_comparison)
+        elseif !hardlimit && isa(topmost, InferenceState)
+            # Without a hardlimit, permit use of reducers too.
+            comparison = frame_instance(topmost).specTypes
+            # n.b. currently don't allow vararg reducers
+            #l_comparison = length((unwrap_unionall(comparison)::DataType).parameters)
+            #spec_len = max(spec_len, l_comparison)
         else
             comparison = method.sig
         end
 
-        if isdefined(method, :recursion_relation)
-            # We don't recquire the recursion_relation to be transitive, so
-            # apply a hard limit
-            hardlimit = true
-        end
-
         # see if the type is actually too big (relative to the caller), and limit it if required
-        newsig = limit_type_size(sig, comparison, hardlimit ? comparison : sv.linfo.specTypes, InferenceParams(interp).TUPLE_COMPLEXITY_LIMIT_DEPTH, spec_len)
+        newsig = limit_type_size(sig, comparison, hardlimit ? comparison : mi.specTypes, InferenceParams(interp).tuple_complexity_limit_depth, spec_len)
 
         if newsig !== sig
             # continue inference, but note that we've limited parameter complexity
             # on this call (to ensure convergence), so that we don't cache this result
-            if call_result_unused(sv)
+            if call_result_unused(si)
                 add_remark!(interp, sv, RECURSION_UNUSED_MSG)
                 # if we don't (typically) actually care about this result,
                 # don't bother trying to examine some complex abstract signature
                 # since it's very unlikely that we'll try to inline this,
                 # or want make an invoke edge to its calling convention return type.
                 # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                return MethodCallResult(Any, true, true, nothing, Effects())
+                return MethodCallResult(Any, Any, true, true, nothing, Effects())
+            end
+            add_remark!(interp, sv, washardlimit ? RECURSION_MSG_HARDLIMIT : RECURSION_MSG)
+            # TODO (#48913) implement a proper recursion handling for irinterp:
+            # This works just because currently the `:terminate` condition guarantees that
+            # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
+            # We should revisit this once we have a better story for handling cycles in irinterp.
+            if isa(topmost, InferenceState)
+                parentframe = frame_parent(topmost)
+                if isa(sv, InferenceState) && isa(parentframe, InferenceState)
+                    poison_callstack!(sv, parentframe === nothing ? topmost : parentframe)
+                end
             end
-            add_remark!(interp, sv, RECURSION_MSG)
-            topmost = topmost::InferenceState
-            parentframe = topmost.parent
-            poison_callstack(sv, parentframe === nothing ? topmost : parentframe)
+            # n.b. this heuristic depends on the non-local state, so we must record the limit later
             sig = newsig
             sparams = svec()
             edgelimited = true
@@ -594,7 +643,7 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
         #     while !(newsig in seen)
         #         push!(seen, newsig)
         #         lsig = length((unwrap_unionall(sig)::DataType).parameters)
-        #         newsig = limit_type_size(newsig, sig, sv.linfo.specTypes, InferenceParams(interp).TUPLE_COMPLEXITY_LIMIT_DEPTH, lsig)
+        #         newsig = limit_type_size(newsig, sig, sv.linfo.specTypes, InferenceParams(interp).tuple_complexity_limit_depth, lsig)
         #         recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), newsig, method.sig)::SimpleVector
         #         newsig = recomputed[2]
         #     end
@@ -602,7 +651,7 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
         sparams = recomputed[2]::SimpleVector
     end
 
-    (; rt, edge, effects) = typeinf_edge(interp, method, sig, sparams, sv)
+    (; rt, exct, edge, effects, volatile_inf_result) = typeinf_edge(interp, method, sig, sparams, sv)
 
     if edge === nothing
         edgecycle = edgelimited = true
@@ -612,32 +661,39 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
     # may have been tainted due to recursion at this point even if it's overridden
     if is_effect_overridden(sv, :terminates_globally)
         # this frame is known to terminate
-        effects = Effects(effects, terminates=ALWAYS_TRUE)
+        effects = Effects(effects, terminates=true)
     elseif is_effect_overridden(method, :terminates_globally)
         # this edge is known to terminate
-        effects = Effects(effects; terminates=ALWAYS_TRUE)
+        effects = Effects(effects; terminates=true)
     elseif edgecycle
-        # Some sort of recursion was detected. Even if we did not limit types,
-        # we cannot guarantee that the call will terminate
-        effects = Effects(effects; terminates=ALWAYS_FALSE)
+        # Some sort of recursion was detected.
+        if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv)
+            # no `MethodInstance` cycles -- don't taint :terminate
+        else
+            # we cannot guarantee that the call will terminate
+            effects = Effects(effects; terminates=false)
+        end
     end
 
-    return MethodCallResult(rt, edgecycle, edgelimited, edge, effects)
+    return MethodCallResult(rt, exct, edgecycle, edgelimited, edge, effects, volatile_inf_result)
 end
 
-function edge_matches_sv(frame::InferenceState, method::Method, @nospecialize(sig), sparams::SimpleVector, hardlimit::Bool, sv::InferenceState)
+function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState,
+                         method::Method, @nospecialize(sig), sparams::SimpleVector,
+                         hardlimit::Bool, sv::AbsIntState)
     # The `method_for_inference_heuristics` will expand the given method's generator if
     # necessary in order to retrieve this field from the generated `CodeInfo`, if it exists.
     # The other `CodeInfo`s we inspect will already have this field inflated, so we just
     # access it directly instead (to avoid regeneration).
-    callee_method2 = method_for_inference_heuristics(method, sig, sparams) # Union{Method, Nothing}
+    world = get_world_counter(interp)
+    callee_method2 = method_for_inference_heuristics(method, sig, sparams, world) # Union{Method, Nothing}
 
-    inf_method2 = frame.src.method_for_inference_limit_heuristics # limit only if user token match
+    inf_method2 = method_for_inference_limit_heuristics(frame) # limit only if user token match
     inf_method2 isa Method || (inf_method2 = nothing)
     if callee_method2 !== inf_method2
         return false
     end
-    if !hardlimit
+    if !hardlimit || InferenceParams(interp).ignore_recursion_hardlimit
         # if this is a soft limit,
         # also inspect the parent of this edge,
         # to see if they are the same Method as sv
@@ -646,11 +702,10 @@ function edge_matches_sv(frame::InferenceState, method::Method, @nospecialize(si
 
         # check in the cycle list first
         # all items in here are mutual parents of all others
-        if !_any(p::InferenceState->matches_sv(p, sv), frame.callers_in_cycle)
-            let parent = frame.parent
+        if !any(p::AbsIntState->matches_sv(p, sv), callers_in_cycle(frame))
+            let parent = frame_parent(frame)
                 parent !== nothing || return false
-                parent = parent::InferenceState
-                (parent.cached || parent.parent !== nothing) || return false
+                (is_cached(parent) || frame_parent(parent) !== nothing) || return false
                 matches_sv(parent, sv) || return false
             end
         end
@@ -658,7 +713,7 @@ function edge_matches_sv(frame::InferenceState, method::Method, @nospecialize(si
         # If the method defines a recursion relation, give it a chance
         # to tell us that this recursion is actually ok.
         if isdefined(method, :recursion_relation)
-            if Core._apply_pure(method.recursion_relation, Any[method, callee_method2, sig, frame.linfo.specTypes])
+            if Core._apply_pure(method.recursion_relation, Any[method, callee_method2, sig, frame_instance(frame).specTypes])
                 return false
             end
         end
@@ -667,11 +722,11 @@ function edge_matches_sv(frame::InferenceState, method::Method, @nospecialize(si
 end
 
 # This function is used for computing alternate limit heuristics
-function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector)
-    if isdefined(method, :generator) && method.generator.expand_early && may_invoke_generator(method, sig, sparams)
+function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector, world::UInt)
+    if isdefined(method, :generator) && !(method.generator isa Core.GeneratedFunctionStub) && may_invoke_generator(method, sig, sparams)
         method_instance = specialize_method(method, sig, sparams)
         if isa(method_instance, MethodInstance)
-            cinfo = get_staged(method_instance)
+            cinfo = get_staged(method_instance, world)
             if isa(cinfo, CodeInfo)
                 method2 = cinfo.method_for_inference_limit_heuristics
                 if method2 isa Method
@@ -683,196 +738,243 @@ function method_for_inference_heuristics(method::Method, @nospecialize(sig), spa
     return nothing
 end
 
-function matches_sv(parent::InferenceState, sv::InferenceState)
-    sv_method2 = sv.src.method_for_inference_limit_heuristics # limit only if user token match
+function matches_sv(parent::AbsIntState, sv::AbsIntState)
+    sv_method2 = method_for_inference_limit_heuristics(sv) # limit only if user token match
     sv_method2 isa Method || (sv_method2 = nothing)
-    parent_method2 = parent.src.method_for_inference_limit_heuristics # limit only if user token match
+    parent_method2 = method_for_inference_limit_heuristics(parent) # limit only if user token match
     parent_method2 isa Method || (parent_method2 = nothing)
-    return parent.linfo.def === sv.linfo.def && sv_method2 === parent_method2
+    return frame_instance(parent).def === frame_instance(sv).def && sv_method2 === parent_method2
+end
+
+function is_edge_recursed(edge::MethodInstance, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return edge === frame_instance(sv)
+    end
+end
+
+function is_method_recursed(method::Method, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return method === frame_instance(sv).def
+    end
+end
+
+function is_constprop_edge_recursed(edge::MethodInstance, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return edge === frame_instance(sv) && is_constproped(sv)
+    end
+end
+
+function is_constprop_method_recursed(method::Method, caller::AbsIntState)
+    return any(AbsIntStackUnwind(caller)) do sv::AbsIntState
+        return method === frame_instance(sv).def && is_constproped(sv)
+    end
 end
 
 # keeps result and context information of abstract_method_call, which will later be used for
 # backedge computation, and concrete evaluation or constant-propagation
 struct MethodCallResult
     rt
+    exct
     edgecycle::Bool
     edgelimited::Bool
     edge::Union{Nothing,MethodInstance}
     effects::Effects
-    function MethodCallResult(@nospecialize(rt),
+    volatile_inf_result::Union{Nothing,VolatileInferenceResult}
+    function MethodCallResult(@nospecialize(rt), @nospecialize(exct),
                               edgecycle::Bool,
                               edgelimited::Bool,
                               edge::Union{Nothing,MethodInstance},
-                              effects::Effects)
-        return new(rt, edgecycle, edgelimited, edge, effects)
+                              effects::Effects,
+                              volatile_inf_result::Union{Nothing,VolatileInferenceResult}=nothing)
+        return new(rt, exct, edgecycle, edgelimited, edge, effects, volatile_inf_result)
     end
 end
 
-function pure_eval_eligible(interp::AbstractInterpreter,
-    @nospecialize(f), applicable::Vector{Any}, arginfo::ArgInfo, sv::InferenceState)
-    # XXX we need to check that this pure function doesn't call any overlayed method
-    return f !== nothing &&
-           length(applicable) == 1 &&
-           is_method_pure(applicable[1]::MethodMatch) &&
-           is_all_const_arg(arginfo)
+struct InvokeCall
+    types     # ::Type
+    lookupsig # ::Type
+    InvokeCall(@nospecialize(types), @nospecialize(lookupsig)) = new(types, lookupsig)
 end
 
-function is_method_pure(method::Method, @nospecialize(sig), sparams::SimpleVector)
-    if isdefined(method, :generator)
-        method.generator.expand_early || return false
-        mi = specialize_method(method, sig, sparams)
-        isa(mi, MethodInstance) || return false
-        staged = get_staged(mi)
-        (staged isa CodeInfo && (staged::CodeInfo).pure) || return false
-        return true
+struct ConstCallResults
+    rt::Any
+    exct::Any
+    const_result::ConstResult
+    effects::Effects
+    edge::MethodInstance
+    function ConstCallResults(
+        @nospecialize(rt), @nospecialize(exct),
+        const_result::ConstResult,
+        effects::Effects,
+        edge::MethodInstance)
+        return new(rt, exct, const_result, effects, edge)
     end
-    return method.pure
 end
-is_method_pure(match::MethodMatch) = is_method_pure(match.method, match.spec_types, match.sparams)
 
-function pure_eval_call(interp::AbstractInterpreter,
-    @nospecialize(f), applicable::Vector{Any}, arginfo::ArgInfo, sv::InferenceState)
-    pure_eval_eligible(interp, f, applicable, arginfo, sv) || return nothing
-    return _pure_eval_call(f, arginfo)
-end
-function _pure_eval_call(@nospecialize(f), arginfo::ArgInfo)
-    args = collect_const_args(arginfo)
-    value = try
-        Core._apply_pure(f, args)
-    catch
+function abstract_call_method_with_const_args(interp::AbstractInterpreter,
+    result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo,
+    match::MethodMatch, sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing)
+
+    if !const_prop_enabled(interp, sv, match)
         return nothing
     end
-    return Const(value)
-end
-
-function concrete_eval_eligible(interp::AbstractInterpreter,
-    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::InferenceState)
-    # disable concrete-evaluation if this function call is tainted by some overlayed
-    # method since currently there is no direct way to execute overlayed methods
-    isoverlayed(method_table(interp)) && !is_nonoverlayed(result.effects) && return false
-    return f !== nothing &&
-           result.edge !== nothing &&
-           is_foldable(result.effects) &&
-           is_all_const_arg(arginfo)
-end
-
-is_all_const_arg(arginfo::ArgInfo) = is_all_const_arg(arginfo.argtypes)
-function is_all_const_arg(argtypes::Vector{Any})
-    for i = 2:length(argtypes)
-        a = widenconditional(argtypes[i])
-        isa(a, Const) || isconstType(a) || issingletontype(a) || return false
+    if bail_out_const_call(interp, result, si)
+        add_remark!(interp, sv, "[constprop] No more information to be gained")
+        return nothing
     end
-    return true
-end
-
-collect_const_args(arginfo::ArgInfo) = collect_const_args(arginfo.argtypes)
-function collect_const_args(argtypes::Vector{Any})
-    return Any[ let a = widenconditional(argtypes[i])
-                    isa(a, Const) ? a.val :
-                    isconstType(a) ? (a::DataType).parameters[1] :
-                    (a::DataType).instance
-                end for i = 2:length(argtypes) ]
-end
-
-function concrete_eval_call(interp::AbstractInterpreter,
-    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::InferenceState)
-    concrete_eval_eligible(interp, f, result, arginfo, sv) || return nothing
-    args = collect_const_args(arginfo)
-    world = get_world_counter(interp)
-    value = try
-        Core._call_in_world_total(world, f, args...)
-    catch
-        # The evaulation threw. By :consistent-cy, we're guaranteed this would have happened at runtime
-        return ConstCallResults(Union{}, ConcreteResult(result.edge::MethodInstance, result.effects), result.effects)
+    eligibility = concrete_eval_eligible(interp, f, result, arginfo, sv)
+    concrete_eval_result = nothing
+    if eligibility === :concrete_eval
+        concrete_eval_result = concrete_eval_call(interp, f, result, arginfo, sv, invokecall)
+        # if we don't inline the result of this concrete evaluation,
+        # give const-prop' a chance to inline a better method body
+        if !may_optimize(interp) || (
+            may_inline_concrete_result(concrete_eval_result.const_result::ConcreteResult) ||
+            concrete_eval_result.rt === Bottom) # unless this call deterministically throws and thus is non-inlineable
+            return concrete_eval_result
+        end
+        # TODO allow semi-concrete interp for this call?
+    end
+    mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, si, match, sv)
+    mi === nothing && return concrete_eval_result
+    if is_constprop_recursed(result, mi, sv)
+        add_remark!(interp, sv, "[constprop] Edge cycle encountered")
+        return nothing
     end
-    if is_inlineable_constant(value) || call_result_unused(sv)
-        # If the constant is not inlineable, still do the const-prop, since the
-        # code that led to the creation of the Const may be inlineable in the same
-        # circumstance and may be optimizable.
-        return ConstCallResults(Const(value), ConcreteResult(result.edge::MethodInstance, EFFECTS_TOTAL, value), EFFECTS_TOTAL)
+    # try semi-concrete evaluation
+    if eligibility === :semi_concrete_eval
+        irinterp_result = semi_concrete_eval_call(interp, mi, result, arginfo, sv)
+        if irinterp_result !== nothing
+            return irinterp_result
+        end
     end
-    return nothing
+    # try constant prop'
+    return const_prop_call(interp, mi, result, arginfo, sv, concrete_eval_result)
 end
 
-function const_prop_enabled(interp::AbstractInterpreter, sv::InferenceState, match::MethodMatch)
+function const_prop_enabled(interp::AbstractInterpreter, sv::AbsIntState, match::MethodMatch)
     if !InferenceParams(interp).ipo_constant_propagation
         add_remark!(interp, sv, "[constprop] Disabled by parameter")
         return false
     end
-    method = match.method
-    if method.constprop == 0x02
+    if is_no_constprop(match.method)
         add_remark!(interp, sv, "[constprop] Disabled by method parameter")
         return false
     end
     return true
 end
 
-struct ConstCallResults
-    rt::Any
-    const_result::ConstResult
-    effects::Effects
-    ConstCallResults(@nospecialize(rt),
-                     const_result::ConstResult,
-                     effects::Effects) =
-        new(rt, const_result, effects)
+function bail_out_const_call(interp::AbstractInterpreter, result::MethodCallResult, si::StmtInfo)
+    if is_removable_if_unused(result.effects)
+        if isa(result.rt, Const) || call_result_unused(si)
+            return true
+        end
+    elseif result.rt === Bottom
+        if is_terminates(result.effects) && is_effect_free(result.effects)
+            # In the future, we may want to add `&& isa(result.exct, Const)` to
+            # the list of conditions here, but currently, our effect system isn't
+            # precise enough to let us determine :consistency of `exct`, so we
+            # would have to force constprop just to determine this, which is too
+            # expensive.
+            return true
+        end
+    end
+    return false
 end
 
-function abstract_call_method_with_const_args(interp::AbstractInterpreter, result::MethodCallResult,
-                                              @nospecialize(f), arginfo::ArgInfo, match::MethodMatch,
-                                              sv::InferenceState)
-    if !const_prop_enabled(interp, sv, match)
-        return nothing
-    end
-    val = concrete_eval_call(interp, f, result, arginfo, sv)
-    if val !== nothing
-        add_backedge!(val.const_result.mi, sv)
-        return val
+function concrete_eval_eligible(interp::AbstractInterpreter,
+    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
+    (;effects) = result
+    if inbounds_option() === :off
+        if !is_nothrow(effects)
+            # Disable concrete evaluation in `--check-bounds=no` mode,
+            # unless it is known to not throw.
+            return :none
+        end
     end
-    mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, match, sv)
-    mi === nothing && return nothing
-    # try constant prop'
-    inf_cache = get_inference_cache(interp)
-    inf_result = cache_lookup(mi, arginfo.argtypes, inf_cache)
-    if inf_result === nothing
-        # if there might be a cycle, check to make sure we don't end up
-        # calling ourselves here.
-        let result = result # prevent capturing
-            if result.edgecycle && _any(InfStackUnwind(sv)) do infstate
-                    # if the type complexity limiting didn't decide to limit the call signature (`result.edgelimited = false`)
-                    # we can relax the cycle detection by comparing `MethodInstance`s and allow inference to
-                    # propagate different constant elements if the recursion is finite over the lattice
-                    return (result.edgelimited ? match.method === infstate.linfo.def : mi === infstate.linfo) &&
-                            any(infstate.result.overridden_by_const)
-                end
-                add_remark!(interp, sv, "[constprop] Edge cycle encountered")
-                return nothing
+    mi = result.edge
+    if mi !== nothing && is_foldable(effects)
+        if f !== nothing && is_all_const_arg(arginfo, #=start=#2)
+            if is_nonoverlayed(interp) || is_nonoverlayed(effects)
+                return :concrete_eval
             end
+            # disable concrete-evaluation if this function call is tainted by some overlayed
+            # method since currently there is no easy way to execute overlayed methods
+            add_remark!(interp, sv, "[constprop] Concrete eval disabled for overlayed methods")
         end
-        inf_result = InferenceResult(mi, (arginfo, sv))
-        if !any(inf_result.overridden_by_const)
-            add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
-            return nothing
+        if !any_conditional(arginfo)
+            return :semi_concrete_eval
         end
-        frame = InferenceState(inf_result, #=cache=#:local, interp)
-        frame === nothing && return nothing # this is probably a bad generated function (unsound), but just ignore it
-        frame.parent = sv
-        typeinf(interp, frame) || return nothing
     end
-    result = inf_result.result
-    # if constant inference hits a cycle, just bail out
-    isa(result, InferenceState) && return nothing
-    add_backedge!(mi, sv)
-    return ConstCallResults(result, ConstPropResult(inf_result), inf_result.ipo_effects)
+    return :none
+end
+
+is_all_const_arg(arginfo::ArgInfo, start::Int) = is_all_const_arg(arginfo.argtypes, start::Int)
+function is_all_const_arg(argtypes::Vector{Any}, start::Int)
+    for i = start:length(argtypes)
+        argtype = widenslotwrapper(argtypes[i])
+        is_const_argtype(argtype) || return false
+    end
+    return true
+end
+
+is_const_argtype(@nospecialize argtype) = isa(argtype, Const) || isconstType(argtype) || issingletontype(argtype)
+
+any_conditional(argtypes::Vector{Any}) = any(@nospecialize(x)->isa(x, Conditional), argtypes)
+any_conditional(arginfo::ArgInfo) = any_conditional(arginfo.argtypes)
+
+collect_const_args(arginfo::ArgInfo, start::Int) = collect_const_args(arginfo.argtypes, start)
+function collect_const_args(argtypes::Vector{Any}, start::Int)
+    return Any[ let a = widenslotwrapper(argtypes[i])
+                    isa(a, Const) ? a.val :
+                    isconstType(a) ? (a::DataType).parameters[1] :
+                    (a::DataType).instance
+                end for i = start:length(argtypes) ]
+end
+
+function concrete_eval_call(interp::AbstractInterpreter,
+    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState,
+    invokecall::Union{InvokeCall,Nothing}=nothing)
+    args = collect_const_args(arginfo, #=start=#2)
+    if invokecall !== nothing
+        # this call should be `invoke`d, rewrite `args` back now
+        pushfirst!(args, f, invokecall.types)
+        f = invoke
+    end
+    world = get_world_counter(interp)
+    edge = result.edge::MethodInstance
+    value = try
+        Core._call_in_world_total(world, f, args...)
+    catch e
+        # The evaluation threw. By :consistent-cy, we're guaranteed this would have happened at runtime.
+        # Howevever, at present, :consistency does not mandate the type of the exception
+        return ConstCallResults(Bottom, Any, ConcreteResult(edge, result.effects), result.effects, edge)
+    end
+    return ConstCallResults(Const(value), Union{}, ConcreteResult(edge, EFFECTS_TOTAL, value), EFFECTS_TOTAL, edge)
+end
+
+# check if there is a cycle and duplicated inference of `mi`
+function is_constprop_recursed(result::MethodCallResult, mi::MethodInstance, sv::AbsIntState)
+    result.edgecycle || return false
+    if result.edgelimited
+        return is_constprop_method_recursed(mi.def::Method, sv)
+    else
+        # if the type complexity limiting didn't decide to limit the call signature (as
+        # indicated by `result.edgelimited === false`), we can relax the cycle detection
+        # by comparing `MethodInstance`s and allow inference to propagate different
+        # constant elements if the recursion is finite over the lattice
+        return is_constprop_edge_recursed(mi, sv)
+    end
 end
 
 # if there's a possibility we could get a better result with these constant arguments
 # (hopefully without doing too much work), returns `MethodInstance`, or nothing otherwise
-function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::MethodCallResult,
-                                         @nospecialize(f), arginfo::ArgInfo, match::MethodMatch,
-                                         sv::InferenceState)
+function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
+    result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo,
+    match::MethodMatch, sv::AbsIntState)
     method = match.method
     force = force_const_prop(interp, f, method)
-    force || const_prop_entry_heuristic(interp, result, sv) || return nothing
+    force || const_prop_entry_heuristic(interp, result, si, sv) || return nothing
     nargs::Int = method.nargs
     method.isva && (nargs -= 1)
     length(arginfo.argtypes) < nargs && return nothing
@@ -880,9 +982,8 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::Me
         add_remark!(interp, sv, "[constprop] Disabled by argument and rettype heuristics")
         return nothing
     end
-    all_overridden = is_all_overridden(arginfo, sv)
-    if !force && !const_prop_function_heuristic(interp, f, arginfo, nargs, all_overridden,
-            sv.ipo_effects.nothrow === ALWAYS_TRUE, sv)
+    all_overridden = is_all_overridden(interp, arginfo, sv)
+    if !force && !const_prop_function_heuristic(interp, f, arginfo, nargs, all_overridden, sv)
         add_remark!(interp, sv, "[constprop] Disabled by function heuristic")
         return nothing
     end
@@ -893,15 +994,15 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::Me
         return nothing
     end
     mi = mi::MethodInstance
-    if !force && !const_prop_methodinstance_heuristic(interp, match, mi, arginfo, sv)
+    if !force && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
         add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic")
         return nothing
     end
     return mi
 end
 
-function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodCallResult, sv::InferenceState)
-    if call_result_unused(sv) && result.edgecycle
+function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodCallResult, si::StmtInfo, sv::AbsIntState)
+    if call_result_unused(si) && result.edgecycle
         add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (edgecycle with unused result)")
         return false
     end
@@ -917,7 +1018,7 @@ function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodC
         else
             return true
         end
-    elseif isa(rt, PartialStruct) || isa(rt, InterConditional)
+    elseif isa(rt, PartialStruct) || isa(rt, InterConditional) || isa(rt, InterMustAlias)
         # could be improved to `Const` or a more precise wrapper
         return true
     elseif isa(rt, LimitedAccuracy)
@@ -927,7 +1028,7 @@ function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodC
         return false
     else
         if isa(rt, Const)
-            if result.effects.nothrow !== ALWAYS_TRUE
+            if !is_nothrow(result.effects)
                 # Could still be improved to Bottom (or at least could see the effects improved)
                 return true
             end
@@ -939,41 +1040,28 @@ end
 
 # determines heuristically whether if constant propagation can be worthwhile
 # by checking if any of given `argtypes` is "interesting" enough to be propagated
-function const_prop_argument_heuristic(_::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::InferenceState)
+function const_prop_argument_heuristic(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    argtypes = arginfo.argtypes
     for i in 1:length(argtypes)
         a = argtypes[i]
-        if isa(a, Conditional) && fargs !== nothing
-            is_const_prop_profitable_conditional(a, fargs, sv) && return true
+        if has_conditional(𝕃ᵢ, sv) && isa(a, Conditional) && arginfo.fargs !== nothing
+            is_const_prop_profitable_conditional(a, arginfo.fargs, sv) && return true
         else
-            a = widenconditional(a)
-            has_nontrivial_const_info(a) && is_const_prop_profitable_arg(a) && return true
+            a = widenslotwrapper(a)
+            has_nontrivial_extended_info(𝕃ᵢ, a) && is_const_prop_profitable_arg(𝕃ᵢ, a) && return true
         end
     end
     return false
 end
 
-function is_const_prop_profitable_arg(@nospecialize(arg))
-    # have new information from argtypes that wasn't available from the signature
-    if isa(arg, PartialStruct)
-        for b in arg.fields
-            isconstType(b) && return true
-            is_const_prop_profitable_arg(b) && return true
-        end
-    end
-    isa(arg, PartialOpaque) && return true
-    isa(arg, Const) || return true
-    val = arg.val
-    # don't consider mutable values or Strings useful constants
-    return isa(val, Symbol) || isa(val, Type) || (!isa(val, String) && !ismutable(val))
-end
-
 function is_const_prop_profitable_conditional(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
     slotid = find_constrained_arg(cnd, fargs, sv)
     if slotid !== nothing
         return true
     end
     # as a minor optimization, we just check the result is a constant or not,
-    # since both `has_nontrivial_const_info`/`is_const_prop_profitable_arg` return `true`
+    # since both `has_nontrivial_extended_info`/`is_const_prop_profitable_arg` return `true`
     # for `Const(::Bool)`
     return isa(widenconditional(cnd), Const)
 end
@@ -990,44 +1078,47 @@ function find_constrained_arg(cnd::Conditional, fargs::Vector{Any}, sv::Inferenc
 end
 
 # checks if all argtypes has additional information other than what `Type` can provide
-function is_all_overridden((; fargs, argtypes)::ArgInfo, sv::InferenceState)
-    for a in argtypes
-        if isa(a, Conditional) && fargs !== nothing
+function is_all_overridden(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::AbsIntState)
+    𝕃ᵢ = typeinf_lattice(interp)
+    for i in 1:length(argtypes)
+        a = argtypes[i]
+        if has_conditional(𝕃ᵢ, sv) && isa(a, Conditional) && fargs !== nothing
             is_const_prop_profitable_conditional(a, fargs, sv) || return false
         else
-            a = widenconditional(a)
-            is_forwardable_argtype(a) || return false
+            is_forwardable_argtype(𝕃ᵢ, widenslotwrapper(a)) || return false
         end
     end
     return true
 end
 
 function force_const_prop(interp::AbstractInterpreter, @nospecialize(f), method::Method)
-    return method.constprop == 0x01 ||
+    return is_aggressive_constprop(method) ||
            InferenceParams(interp).aggressive_constant_propagation ||
            istopfunction(f, :getproperty) ||
            istopfunction(f, :setproperty!)
 end
 
-function const_prop_function_heuristic(
-    _::AbstractInterpreter, @nospecialize(f), (; argtypes)::ArgInfo,
-    nargs::Int, all_overridden::Bool, still_nothrow::Bool, _::InferenceState)
+function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecialize(f),
+    arginfo::ArgInfo, nargs::Int, all_overridden::Bool, sv::AbsIntState)
+    argtypes = arginfo.argtypes
     if nargs > 1
+        𝕃ᵢ = typeinf_lattice(interp)
         if istopfunction(f, :getindex) || istopfunction(f, :setindex!)
             arrty = argtypes[2]
             # don't propagate constant index into indexing of non-constant array
             if arrty isa Type && arrty <: AbstractArray && !issingletontype(arrty)
                 # For static arrays, allow the constprop if we could possibly
                 # deduce nothrow as a result.
+                still_nothrow = isa(sv, InferenceState) ? is_nothrow(sv.ipo_effects) : false
                 if !still_nothrow || ismutabletype(arrty)
                     return false
                 end
-            elseif arrty ⊑ Array
+            elseif ⊑(𝕃ᵢ, arrty, Array)
                 return false
             end
         elseif istopfunction(f, :iterate)
             itrty = argtypes[2]
-            if itrty ⊑ Array
+            if ⊑(𝕃ᵢ, itrty, Array)
                 return false
             end
         end
@@ -1055,10 +1146,9 @@ end
 # This is a heuristic to avoid trying to const prop through complicated functions
 # where we would spend a lot of time, but are probably unlikely to get an improved
 # result anyway.
-function const_prop_methodinstance_heuristic(
-    interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance,
-    (; argtypes)::ArgInfo, sv::InferenceState)
-    method = match.method
+function const_prop_methodinstance_heuristic(interp::AbstractInterpreter,
+    mi::MethodInstance, arginfo::ArgInfo, sv::AbsIntState)
+    method = mi.def::Method
     if method.is_for_opaque_closure
         # Not inlining an opaque closure can be very expensive, so be generous
         # with the const-prop-ability. It is quite possible that we can't infer
@@ -1066,26 +1156,32 @@ function const_prop_methodinstance_heuristic(
         # isn't particularly helpful here.
         return true
     end
-    # Peek at the inferred result for the function to determine if the optimizer
-    # was able to cut it down to something simple (inlineable in particular).
-    # If so, there's a good chance we might be able to const prop all the way
-    # through and learn something new.
-    if isdefined(method, :source) && ccall(:jl_ir_flag_inlineable, Bool, (Any,), method.source)
+    # now check if the source of this method instance is inlineable, since the extended type
+    # information we have here would be discarded if it is not inlined into a callee context
+    # (modulo the inferred return type that can be potentially refined)
+    if is_declared_inline(method)
+        # this method is declared as `@inline` and will be inlined
+        return true
+    end
+    flag = get_curr_ssaflag(sv)
+    if is_stmt_inline(flag)
+        # force constant propagation for a call that is going to be inlined
+        # since the inliner will try to find this constant result
+        # if these constant arguments arrive there
         return true
+    elseif is_stmt_noinline(flag)
+        # this call won't be inlined, thus this constant-prop' will most likely be unfruitful
+        return false
     else
-        flag = get_curr_ssaflag(sv)
-        if is_stmt_inline(flag)
-            # force constant propagation for a call that is going to be inlined
-            # since the inliner will try to find this constant result
-            # if these constant arguments arrive there
-            return true
-        elseif is_stmt_noinline(flag)
-            # this call won't be inlined, thus this constant-prop' will most likely be unfruitful
-            return false
-        else
-            code = get(code_cache(interp), mi, nothing)
-            if isdefined(code, :inferred) && inlining_policy(
-                    interp, code.inferred, IR_FLAG_NULL, mi, argtypes) !== nothing
+        # Peek at the inferred result for the method to determine if the optimizer
+        # was able to cut it down to something simple (inlineable in particular).
+        # If so, there will be a good chance we might be able to const prop
+        # all the way through and learn something new.
+        code = get(code_cache(interp), mi, nothing)
+        if isa(code, CodeInstance)
+            inferred = @atomic :monotonic code.inferred
+            # TODO propagate a specific `CallInfo` that conveys information about this call
+            if inlining_policy(interp, inferred, NoCallInfo(), IR_FLAG_NULL) !== nothing
                 return true
             end
         end
@@ -1093,6 +1189,146 @@ function const_prop_methodinstance_heuristic(
     return false # the cache isn't inlineable, so this constant-prop' will most likely be unfruitful
 end
 
+function semi_concrete_eval_call(interp::AbstractInterpreter,
+    mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState)
+    world = frame_world(sv)
+    mi_cache = WorldView(code_cache(interp), world)
+    code = get(mi_cache, mi, nothing)
+    if code !== nothing
+        irsv = IRInterpretationState(interp, code, mi, arginfo.argtypes, world)
+        if irsv !== nothing
+            irsv.parent = sv
+            rt, (nothrow, noub) = ir_abstract_constant_propagation(interp, irsv)
+            @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from irinterp"
+            if !(isa(rt, Type) && hasintersect(rt, Bool))
+                ir = irsv.ir
+                # TODO (#48913) enable double inlining pass when there are any calls
+                # that are newly resolved by irinterp
+                # state = InliningState(interp)
+                # ir = ssa_inlining_pass!(irsv.ir, state, propagate_inbounds(irsv))
+                effects = result.effects
+                if nothrow
+                    effects = Effects(effects; nothrow=true)
+                end
+                if noub
+                    effects = Effects(effects; noub=ALWAYS_TRUE)
+                end
+                exct = refine_exception_type(result.exct, effects)
+                return ConstCallResults(rt, exct, SemiConcreteResult(mi, ir, effects), effects, mi)
+            end
+        end
+    end
+    return nothing
+end
+
+function const_prop_call(interp::AbstractInterpreter,
+    mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState,
+    concrete_eval_result::Union{Nothing, ConstCallResults}=nothing)
+    inf_cache = get_inference_cache(interp)
+    𝕃ᵢ = typeinf_lattice(interp)
+    inf_result = cache_lookup(𝕃ᵢ, mi, arginfo.argtypes, inf_cache)
+    if inf_result === nothing
+        # fresh constant prop'
+        argtypes = has_conditional(𝕃ᵢ, sv) ? ConditionalArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes)
+        inf_result = InferenceResult(mi, argtypes, typeinf_lattice(interp))
+        if !any(inf_result.overridden_by_const)
+            add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
+            return nothing
+        end
+        frame = InferenceState(inf_result, #=cache_mode=#:local, interp)
+        if frame === nothing
+            add_remark!(interp, sv, "[constprop] Could not retrieve the source")
+            return nothing # this is probably a bad generated function (unsound), but just ignore it
+        end
+        frame.parent = sv
+        if !typeinf(interp, frame)
+            add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle")
+            return nothing
+        end
+        @assert inf_result.result !== nothing
+        if concrete_eval_result !== nothing
+            # override return type and effects with concrete evaluation result if available
+            inf_result.result = concrete_eval_result.rt
+            inf_result.ipo_effects = concrete_eval_result.effects
+        end
+    else
+        # found the cache for this constant prop'
+        if inf_result.result === nothing
+            add_remark!(interp, sv, "[constprop] Found cached constant inference in a cycle")
+            return nothing
+        end
+    end
+    return ConstCallResults(inf_result.result, inf_result.exc_result,
+        ConstPropResult(inf_result), inf_result.ipo_effects, mi)
+end
+
+# TODO implement MustAlias forwarding
+
+struct ConditionalArgtypes <: ForwardableArgtypes
+    arginfo::ArgInfo
+    sv::InferenceState
+end
+
+"""
+    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance,
+                            conditional_argtypes::ConditionalArgtypes)
+
+The implementation is able to forward `Conditional` of `conditional_argtypes`,
+as well as the other general extended lattice information.
+"""
+function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance,
+                                 conditional_argtypes::ConditionalArgtypes)
+    (; arginfo, sv) = conditional_argtypes
+    (; fargs, argtypes) = arginfo
+    given_argtypes = Vector{Any}(undef, length(argtypes))
+    def = linfo.def::Method
+    nargs = Int(def.nargs)
+    cache_argtypes, overridden_by_const = matching_cache_argtypes(𝕃, linfo)
+    local condargs = nothing
+    for i in 1:length(argtypes)
+        argtype = argtypes[i]
+        # forward `Conditional` if it conveys a constraint on any other argument
+        if isa(argtype, Conditional) && fargs !== nothing
+            cnd = argtype
+            slotid = find_constrained_arg(cnd, fargs, sv)
+            if slotid !== nothing
+                # using union-split signature, we may be able to narrow down `Conditional`
+                sigt = widenconst(slotid > nargs ? argtypes[slotid] : cache_argtypes[slotid])
+                thentype = tmeet(cnd.thentype, sigt)
+                elsetype = tmeet(cnd.elsetype, sigt)
+                if thentype === Bottom && elsetype === Bottom
+                    # we accidentally proved this method match is impossible
+                    # TODO bail out here immediately rather than just propagating Bottom ?
+                    given_argtypes[i] = Bottom
+                else
+                    if condargs === nothing
+                        condargs = Tuple{Int,Int}[]
+                    end
+                    push!(condargs, (slotid, i))
+                    given_argtypes[i] = Conditional(slotid, thentype, elsetype)
+                end
+                continue
+            end
+        end
+        given_argtypes[i] = widenslotwrapper(argtype)
+    end
+    if condargs !== nothing
+        given_argtypes = let condargs=condargs
+            va_process_argtypes(𝕃, given_argtypes, linfo) do isva_given_argtypes::Vector{Any}, last::Int
+                # invalidate `Conditional` imposed on varargs
+                for (slotid, i) in condargs
+                    if slotid ≥ last && (1 ≤ i ≤ length(isva_given_argtypes)) # `Conditional` is already widened to vararg-tuple otherwise
+                        isva_given_argtypes[i] = widenconditional(isva_given_argtypes[i])
+                    end
+                end
+            end
+        end
+    else
+        given_argtypes = va_process_argtypes(𝕃, given_argtypes, linfo)
+    end
+    return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes)
+end
+
 # This is only for use with `Conditional`.
 # In general, usage of this is wrong.
 function ssa_def_slot(@nospecialize(arg), sv::InferenceState)
@@ -1144,26 +1380,45 @@ function ssa_def_slot(@nospecialize(arg), sv::InferenceState)
     return arg
 end
 
+# No slots in irinterp
+ssa_def_slot(@nospecialize(arg), sv::IRInterpretationState) = nothing
+
+struct AbstractIterationResult
+    cti::Vector{Any}
+    info::MaybeAbstractIterationInfo
+    ai_effects::Effects
+end
+AbstractIterationResult(cti::Vector{Any}, info::MaybeAbstractIterationInfo) =
+    AbstractIterationResult(cti, info, EFFECTS_TOTAL)
+
 # `typ` is the inferred type for expression `arg`.
 # if the expression constructs a container (e.g. `svec(x,y,z)`),
 # refine its type to an array of element types.
 # Union of Tuples of the same length is converted to Tuple of Unions.
 # returns an array of types
-function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(typ), sv::InferenceState)
-    if isa(typ, PartialStruct) && typ.typ.name === Tuple.name
-        return typ.fields, nothing
+function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(typ),
+                                sv::AbsIntState)
+    if isa(typ, PartialStruct)
+        widet = typ.typ
+        if isa(widet, DataType)
+            if widet.name === Tuple.name
+                return AbstractIterationResult(typ.fields, nothing)
+            elseif widet.name === _NAMEDTUPLE_NAME
+                return AbstractIterationResult(typ.fields, nothing)
+            end
+        end
     end
 
     if isa(typ, Const)
         val = typ.val
-        if isa(val, SimpleVector) || isa(val, Tuple)
-            return Any[ Const(val[i]) for i in 1:length(val) ], nothing # avoid making a tuple Generator here!
+        if isa(val, SimpleVector) || isa(val, Tuple) || isa(val, NamedTuple)
+            return AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing) # avoid making a tuple Generator here!
         end
     end
 
     tti0 = widenconst(typ)
     tti = unwrap_unionall(tti0)
-    if isa(tti, DataType) && tti.name === NamedTuple_typename
+    if isa(tti, DataType) && tti.name === _NAMEDTUPLE_NAME
         # A NamedTuple iteration is the same as the iteration of its Tuple parameter:
         # compute a new `tti == unwrap_unionall(tti0)` based on that Tuple type
         tti = unwraptv(tti.parameters[2])
@@ -1171,91 +1426,111 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
     end
     if isa(tti, Union)
         utis = uniontypes(tti)
-        if _any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
-            return Any[Vararg{Any}], nothing
-        end
-        ltp = length((utis[1]::DataType).parameters)
-        for t in utis
-            if length((t::DataType).parameters) != ltp
-                return Any[Vararg{Any}], nothing
+        # refine the Union to remove elements that are not valid tags for objects
+        filter!(@nospecialize(x) -> valid_as_lattice(x, true), utis)
+        if length(utis) == 0
+            return AbstractIterationResult(Any[], nothing) # oops, this statement was actually unreachable
+        elseif length(utis) == 1
+            tti = utis[1]
+            tti0 = rewrap_unionall(tti, tti0)
+        else
+            if any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
+                return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
             end
-        end
-        result = Any[ Union{} for _ in 1:ltp ]
-        for t in utis
-            tps = (t::DataType).parameters
-            _all(valid_as_lattice, tps) || continue
-            for j in 1:ltp
-                result[j] = tmerge(result[j], rewrap_unionall(tps[j], tti0))
+            ltp = length((utis[1]::DataType).parameters)
+            for t in utis
+                if length((t::DataType).parameters) != ltp
+                    return AbstractIterationResult(Any[Vararg{Any}], nothing)
+                end
+            end
+            result = Any[ Union{} for _ in 1:ltp ]
+            for t in utis
+                tps = (t::DataType).parameters
+                for j in 1:ltp
+                    @assert valid_as_lattice(tps[j], true)
+                    result[j] = tmerge(result[j], rewrap_unionall(tps[j], tti0))
+                end
             end
+            return AbstractIterationResult(result, nothing)
         end
-        return result, nothing
-    elseif tti0 <: Tuple
+    end
+    if tti0 <: Tuple
         if isa(tti0, DataType)
-            return Any[ p for p in tti0.parameters ], nothing
+            return AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing)
         elseif !isa(tti, DataType)
-            return Any[Vararg{Any}], nothing
+            return AbstractIterationResult(Any[Vararg{Any}], nothing)
         else
             len = length(tti.parameters)
             last = tti.parameters[len]
             va = isvarargtype(last)
             elts = Any[ fieldtype(tti0, i) for i = 1:len ]
             if va
-                elts[len] = Vararg{elts[len]}
+                if elts[len] === Union{}
+                    pop!(elts)
+                else
+                    elts[len] = Vararg{elts[len]}
+                end
             end
-            return elts, nothing
+            return AbstractIterationResult(elts, nothing)
         end
-    elseif tti0 === SimpleVector || tti0 === Any
-        return Any[Vararg{Any}], nothing
-    elseif tti0 <: Array
-        return Any[Vararg{eltype(tti0)}], nothing
+    elseif tti0 === SimpleVector
+        return AbstractIterationResult(Any[Vararg{Any}], nothing)
+    elseif tti0 === Any
+        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
+    elseif tti0 <: Array || tti0 <: GenericMemory
+        if eltype(tti0) === Union{}
+            return AbstractIterationResult(Any[], nothing)
+        end
+        return AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing)
     else
         return abstract_iteration(interp, itft, typ, sv)
     end
 end
 
 # simulate iteration protocol on container type up to fixpoint
-function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(itertype), sv::InferenceState)
+function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(itertype), sv::AbsIntState)
     if isa(itft, Const)
         iteratef = itft.val
     else
-        return Any[Vararg{Any}], nothing
+        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
     end
     @assert !isvarargtype(itertype)
-    call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), sv)
+    call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true), sv)
     stateordonet = call.rt
     info = call.info
     # Return Bottom if this is not an iterator.
     # WARNING: Changes to the iteration protocol must be reflected here,
     # this is not just an optimization.
     # TODO: this doesn't realize that Array, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol
-    stateordonet === Bottom && return Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, call.effects, info)])
+    stateordonet === Bottom && return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, Any, call.effects, info)], true))
     valtype = statetype = Bottom
     ret = Any[]
     calls = CallMeta[call]
     stateordonet_widened = widenconst(stateordonet)
+    𝕃ᵢ = typeinf_lattice(interp)
 
-    # Try to unroll the iteration up to MAX_TUPLE_SPLAT, which covers any finite
+    # Try to unroll the iteration up to max_tuple_splat, which covers any finite
     # length iterators, or interesting prefix
     while true
         if stateordonet_widened === Nothing
-            return ret, AbstractIterationInfo(calls)
+            return AbstractIterationResult(ret, AbstractIterationInfo(calls, true))
         end
-        if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).MAX_TUPLE_SPLAT
+        if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat
             break
         end
         if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
             break
         end
-        nstatetype = getfield_tfunc(stateordonet, Const(2))
+        nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2))
         # If there's no new information in this statetype, don't bother continuing,
         # the iterator won't be finite.
-        if nstatetype ⊑ statetype
-            return Any[Bottom], nothing
+        if ⊑(𝕃ᵢ, nstatetype, statetype)
+            return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS)
         end
-        valtype = getfield_tfunc(stateordonet, Const(1))
+        valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1))
         push!(ret, valtype)
         statetype = nstatetype
-        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), sv)
+        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
         stateordonet = call.rt
         stateordonet_widened = widenconst(stateordonet)
         push!(calls, call)
@@ -1264,8 +1539,7 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
     # the precise (potentially const) state type
     # statetype and valtype are reinitialized in the first iteration below from the
     # (widened) stateordonet, which has not yet been fully analyzed in the loop above
-    statetype = Bottom
-    valtype = Bottom
+    valtype = statetype = Bottom
     may_have_terminated = Nothing <: stateordonet_widened
     while valtype !== Any
         nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
@@ -1280,7 +1554,7 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
                 # ... but cannot terminate
                 if !may_have_terminated
                     #  ... and cannot have terminated prior to this loop
-                    return Any[Bottom], nothing
+                    return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects())
                 else
                     # iterator may have terminated prior to this loop, but not during it
                     valtype = Bottom
@@ -1290,21 +1564,23 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
         end
         valtype = tmerge(valtype, nounion.parameters[1])
         statetype = tmerge(statetype, nounion.parameters[2])
-        stateordonet = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), sv).rt
+        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
+        push!(calls, call)
+        stateordonet = call.rt
         stateordonet_widened = widenconst(stateordonet)
     end
     if valtype !== Union{}
         push!(ret, Vararg{valtype})
     end
-    return ret, nothing
+    return AbstractIterationResult(ret, AbstractIterationInfo(calls, false))
 end
 
 # do apply(af, fargs...), where af is a function value
-function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState,
-                        max_methods::Int = get_max_methods(sv.mod, interp))
+function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo,
+                        sv::AbsIntState, max_methods::Int=get_max_methods(interp, sv))
     itft = argtype_by_index(argtypes, 2)
     aft = argtype_by_index(argtypes, 3)
-    (itft === Bottom || aft === Bottom) && return CallMeta(Bottom, EFFECTS_THROWS, false)
+    (itft === Bottom || aft === Bottom) && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
     aargtypes = argtype_tail(argtypes, 4)
     aftw = widenconst(aft)
     if !isa(aft, Const) && !isa(aft, PartialOpaque) && (!isType(aftw) || has_free_typevars(aftw))
@@ -1312,12 +1588,12 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::
             add_remark!(interp, sv, "Core._apply_iterate called on a function of a non-concrete type")
             # bail now, since it seems unlikely that abstract_call will be able to do any better after splitting
             # this also ensures we don't call abstract_call_gf_by_type below on an IntrinsicFunction or Builtin
-            return CallMeta(Any, Effects(), false)
+            return CallMeta(Any, Any, Effects(), NoCallInfo())
         end
     end
     res = Union{}
     nargs = length(aargtypes)
-    splitunions = 1 < unionsplitcost(aargtypes) <= InferenceParams(interp).MAX_APPLY_UNION_ENUM
+    splitunions = 1 < unionsplitcost(typeinf_lattice(interp), aargtypes) <= InferenceParams(interp).max_apply_union_enum
     ctypes = [Any[aft]]
     infos = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
     effects = EFFECTS_TOTAL
@@ -1326,13 +1602,9 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::
         infos′ = Vector{MaybeAbstractIterationInfo}[]
         for ti in (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
             if !isvarargtype(ti)
-                cti_info = precise_container_type(interp, itft, ti, sv)
-                cti = cti_info[1]::Vector{Any}
-                info = cti_info[2]::MaybeAbstractIterationInfo
+                (;cti, info, ai_effects) = precise_container_type(interp, itft, ti, sv)
             else
-                cti_info = precise_container_type(interp, itft, unwrapva(ti), sv)
-                cti = cti_info[1]::Vector{Any}
-                info = cti_info[2]::MaybeAbstractIterationInfo
+                (;cti, info, ai_effects) = precise_container_type(interp, itft, unwrapva(ti), sv)
                 # We can't represent a repeating sequence of the same types,
                 # so tmerge everything together to get one type that represents
                 # everything.
@@ -1345,16 +1617,22 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::
                 end
                 cti = Any[Vararg{argt}]
             end
-            if _any(t -> t === Bottom, cti)
+            effects = merge_effects(effects, ai_effects)
+            if info !== nothing
+                for call in info.each
+                    effects = merge_effects(effects, call.effects)
+                end
+            end
+            if any(@nospecialize(t) -> t === Bottom, cti)
                 continue
             end
             for j = 1:length(ctypes)
                 ct = ctypes[j]::Vector{Any}
                 if isvarargtype(ct[end])
-                    # This is vararg, we're not gonna be able to do any inling,
+                    # This is vararg, we're not gonna be able to do any inlining,
                     # drop the info
                     info = nothing
-                    tail = tuple_tail_elem(unwrapva(ct[end]), cti)
+                    tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti)
                     push!(ctypes´, push!(ct[1:(end - 1)], tail))
                 else
                     push!(ctypes´, append!(ct[:], cti))
@@ -1367,7 +1645,10 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::
     end
     retinfos = ApplyCallInfo[]
     retinfo = UnionSplitApplyCallInfo(retinfos)
-    for i = 1:length(ctypes)
+    napplicable = length(ctypes)
+    seen = 0
+    exct = effects.nothrow ? Union{} : Any
+    for i = 1:napplicable
         ct = ctypes[i]
         arginfo = infos[i]
         lct = length(ct)
@@ -1375,26 +1656,32 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::
         for i = 1:lct-1
             cti = ct[i]
             if isvarargtype(cti)
-                ct[i] = tuple_tail_elem(unwrapva(cti), ct[(i+1):lct])
+                ct[i] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(i+1):lct])
                 resize!(ct, i)
                 break
             end
         end
-        call = abstract_call(interp, ArgInfo(nothing, ct), sv, max_methods)
+        call = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)
+        seen += 1
         push!(retinfos, ApplyCallInfo(call.info, arginfo))
-        res = tmerge(res, call.rt)
-        effects = tristate_merge(effects, call.effects)
-        if bail_out_apply(interp, res, sv)
-            if i != length(ctypes)
-                # No point carrying forward the info, we're not gonna inline it anyway
-                retinfo = false
-            end
+        res = tmerge(typeinf_lattice(interp), res, call.rt)
+        exct = tmerge(typeinf_lattice(interp), exct, call.exct)
+        effects = merge_effects(effects, call.effects)
+        if bail_out_apply(interp, InferenceLoopState(ct, res, effects), sv)
+            add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.")
             break
         end
     end
+    if seen ≠ napplicable
+        # there is unanalyzed candidate, widen type and effects to the top
+        res = Any
+        exct = Any
+        effects = Effects()
+        retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing
+    end
     # TODO: Add a special info type to capture all the iteration info.
     # For now, only propagate info if we don't also union-split the iteration
-    return CallMeta(res, effects, retinfo)
+    return CallMeta(res, exct, effects, retinfo)
 end
 
 function argtype_by_index(argtypes::Vector{Any}, i::Int)
@@ -1415,11 +1702,72 @@ function argtype_tail(argtypes::Vector{Any}, i::Int)
     return argtypes[i:n]
 end
 
-function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs, argtypes)::ArgInfo,
-                               sv::InferenceState, max_methods::Int)
+struct ConditionalTypes
+    thentype
+    elsetype
+    ConditionalTypes(thentype, elsetype) = (@nospecialize; new(thentype, elsetype))
+end
+
+@inline function isa_condition(@nospecialize(xt), @nospecialize(ty), max_union_splitting::Int,
+    @nospecialize(rt))
+    if isa(rt, Const)
+        xt = widenslotwrapper(xt)
+        if rt.val === false
+            return ConditionalTypes(Bottom, xt)
+        elseif rt.val === true
+            return ConditionalTypes(xt, Bottom)
+        end
+    end
+    return isa_condition(xt, ty, max_union_splitting)
+end
+@inline function isa_condition(@nospecialize(xt), @nospecialize(ty), max_union_splitting::Int)
+    tty_ub, isexact_tty = instanceof_tfunc(ty, true)
+    tty = widenconst(xt)
+    if isexact_tty && !isa(tty_ub, TypeVar)
+        tty_lb = tty_ub # TODO: this would be wrong if !isexact_tty, but instanceof_tfunc doesn't preserve this info
+        if !has_free_typevars(tty_lb) && !has_free_typevars(tty_ub)
+            thentype = typeintersect(tty, tty_ub)
+            if iskindtype(tty_ub) && thentype !== Bottom
+                # `typeintersect` may be unable narrow down `Type`-type
+                thentype = tty_ub
+            end
+            valid_as_lattice(thentype, true) || (thentype = Bottom)
+            elsetype = typesubtract(tty, tty_lb, max_union_splitting)
+            return ConditionalTypes(thentype, elsetype)
+        end
+    end
+    return nothing
+end
+
+@inline function egal_condition(c::Const, @nospecialize(xt), max_union_splitting::Int,
+    @nospecialize(rt))
+    thentype = c
+    elsetype = widenslotwrapper(xt)
+    if rt === Const(false)
+        thentype = Bottom
+    elseif rt === Const(true)
+        elsetype = Bottom
+    elseif elsetype isa Type && issingletontype(typeof(c.val)) # can only widen a if it is a singleton
+        elsetype = typesubtract(elsetype, typeof(c.val), max_union_splitting)
+    end
+    return ConditionalTypes(thentype, elsetype)
+end
+@inline function egal_condition(c::Const, @nospecialize(xt), max_union_splitting::Int)
+    thentype = c
+    elsetype = widenslotwrapper(xt)
+    if elsetype isa Type && issingletontype(typeof(c.val)) # can only widen a if it is a singleton
+        elsetype = typesubtract(elsetype, typeof(c.val), max_union_splitting)
+    end
+    return ConditionalTypes(thentype, elsetype)
+end
+
+function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs, argtypes)::ArgInfo,
+                               sv::AbsIntState)
     @nospecialize f
     la = length(argtypes)
-    if f === Core.ifelse && fargs isa Vector{Any} && la == 4
+    𝕃ᵢ = typeinf_lattice(interp)
+    ⊑ᵢ = ⊑(𝕃ᵢ)
+    if has_conditional(𝕃ᵢ, sv) && f === Core.ifelse && fargs isa Vector{Any} && la == 4
         cnd = argtypes[2]
         if isa(cnd, Conditional)
             newcnd = widenconditional(cnd)
@@ -1433,82 +1781,124 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs
                 a = ssa_def_slot(fargs[3], sv)
                 b = ssa_def_slot(fargs[4], sv)
                 if isa(a, SlotNumber) && cnd.slot == slot_id(a)
-                    tx = (cnd.thentype ⊑ tx ? cnd.thentype : tmeet(tx, widenconst(cnd.thentype)))
+                    tx = (cnd.thentype ⊑ᵢ tx ? cnd.thentype : tmeet(𝕃ᵢ, tx, widenconst(cnd.thentype)))
                 end
                 if isa(b, SlotNumber) && cnd.slot == slot_id(b)
-                    ty = (cnd.elsetype ⊑ ty ? cnd.elsetype : tmeet(ty, widenconst(cnd.elsetype)))
+                    ty = (cnd.elsetype ⊑ᵢ ty ? cnd.elsetype : tmeet(𝕃ᵢ, ty, widenconst(cnd.elsetype)))
                 end
-                return tmerge(tx, ty)
+                return tmerge(𝕃ᵢ, tx, ty)
             end
         end
     end
-    rt = builtin_tfunction(interp, f, argtypes[2:end], sv)
-    if (rt === Bool || (isa(rt, Const) && isa(rt.val, Bool))) && isa(fargs, Vector{Any})
+    ft = popfirst!(argtypes)
+    rt = builtin_tfunction(interp, f, argtypes, sv)
+    pushfirst!(argtypes, ft)
+    if has_mustalias(𝕃ᵢ) && f === getfield && isa(fargs, Vector{Any}) && la ≥ 3
+        a3 = argtypes[3]
+        if isa(a3, Const)
+            if rt !== Bottom && !isalreadyconst(rt)
+                var = ssa_def_slot(fargs[2], sv)
+                if isa(var, SlotNumber)
+                    vartyp = widenslotwrapper(argtypes[2])
+                    fldidx = maybe_const_fldidx(vartyp, a3.val)
+                    if fldidx !== nothing
+                        # wrap this aliasable field into `MustAlias` for possible constraint propagations
+                        return MustAlias(var, vartyp, fldidx, rt)
+                    end
+                end
+            end
+        end
+    elseif has_conditional(𝕃ᵢ, sv) && (rt === Bool || (isa(rt, Const) && isa(rt.val, Bool))) && isa(fargs, Vector{Any})
         # perform very limited back-propagation of type information for `is` and `isa`
         if f === isa
+            # try splitting value argument, based on types
             a = ssa_def_slot(fargs[2], sv)
+            a2 = argtypes[2]
+            a3 = argtypes[3]
             if isa(a, SlotNumber)
-                aty = widenconst(argtypes[2])
-                if rt === Const(false)
-                    return Conditional(a, Union{}, aty)
-                elseif rt === Const(true)
-                    return Conditional(a, aty, Union{})
+                cndt = isa_condition(a2, a3, InferenceParams(interp).max_union_splitting, rt)
+                if cndt !== nothing
+                    return Conditional(a, cndt.thentype, cndt.elsetype)
                 end
-                tty_ub, isexact_tty = instanceof_tfunc(argtypes[3])
-                if isexact_tty && !isa(tty_ub, TypeVar)
-                    tty_lb = tty_ub # TODO: this would be wrong if !isexact_tty, but instanceof_tfunc doesn't preserve this info
-                    if !has_free_typevars(tty_lb) && !has_free_typevars(tty_ub)
-                        ifty = typeintersect(aty, tty_ub)
-                        valid_as_lattice(ifty) || (ifty = Union{})
-                        elty = typesubtract(aty, tty_lb, InferenceParams(interp).MAX_UNION_SPLITTING)
-                        return Conditional(a, ifty, elty)
+            end
+            if isa(a2, MustAlias)
+                if !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
+                    cndt = isa_condition(a2, a3, InferenceParams(interp).max_union_splitting)
+                    if cndt !== nothing
+                        return form_mustalias_conditional(a2, cndt.thentype, cndt.elsetype)
                     end
                 end
             end
+            # try splitting type argument, based on value
+            if isdispatchelem(widenconst(a2)) && a3 isa Union && !has_free_typevars(a3) && !isa(rt, Const)
+                b = ssa_def_slot(fargs[3], sv)
+                if isa(b, SlotNumber)
+                    # !(x isa T) implies !(Type{a2} <: T)
+                    # TODO: complete splitting, based on which portions of the Union a3 for which isa_tfunc returns Const(true) or Const(false) instead of Bool
+                    elsetype = typesubtract(a3, Type{widenconst(a2)}, InferenceParams(interp).max_union_splitting)
+                    return Conditional(b, a3, elsetype)
+                end
+            end
         elseif f === (===)
             a = ssa_def_slot(fargs[2], sv)
             b = ssa_def_slot(fargs[3], sv)
             aty = argtypes[2]
             bty = argtypes[3]
             # if doing a comparison to a singleton, consider returning a `Conditional` instead
-            if isa(aty, Const) && isa(b, SlotNumber)
-                if rt === Const(false)
-                    aty = Union{}
-                elseif rt === Const(true)
-                    bty = Union{}
-                elseif bty isa Type && isdefined(typeof(aty.val), :instance) # can only widen a if it is a singleton
-                    bty = typesubtract(bty, typeof(aty.val), InferenceParams(interp).MAX_UNION_SPLITTING)
+            if isa(aty, Const)
+                if isa(b, SlotNumber)
+                    cndt = egal_condition(aty, bty, InferenceParams(interp).max_union_splitting, rt)
+                    return Conditional(b, cndt.thentype, cndt.elsetype)
+                elseif isa(bty, MustAlias) && !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
+                    cndt = egal_condition(aty, bty.fldtyp, InferenceParams(interp).max_union_splitting)
+                    return form_mustalias_conditional(bty, cndt.thentype, cndt.elsetype)
                 end
-                return Conditional(b, aty, bty)
-            end
-            if isa(bty, Const) && isa(a, SlotNumber)
-                if rt === Const(false)
-                    bty = Union{}
-                elseif rt === Const(true)
-                    aty = Union{}
-                elseif aty isa Type && isdefined(typeof(bty.val), :instance) # same for b
-                    aty = typesubtract(aty, typeof(bty.val), InferenceParams(interp).MAX_UNION_SPLITTING)
+            elseif isa(bty, Const)
+                if isa(a, SlotNumber)
+                    cndt = egal_condition(bty, aty, InferenceParams(interp).max_union_splitting, rt)
+                    return Conditional(a, cndt.thentype, cndt.elsetype)
+                elseif isa(aty, MustAlias) && !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
+                    cndt = egal_condition(bty, aty.fldtyp, InferenceParams(interp).max_union_splitting)
+                    return form_mustalias_conditional(aty, cndt.thentype, cndt.elsetype)
+                end
+            end
+            # TODO enable multiple constraints propagation here, there are two possible improvements:
+            # 1. propagate constraints for both lhs and rhs
+            # 2. we can propagate both constraints on aliased fields and slots
+            # As for 2, for now, we prioritize constraints on aliased fields, since currently
+            # different slots that represent the same object can't share same field constraint,
+            # and thus binding `MustAlias` to the other slot is less likely useful
+            if !isa(rt, Const) # skip refinement when the field is known precisely (just optimization)
+                if isa(bty, MustAlias)
+                    thentype = widenslotwrapper(aty)
+                    elsetype = bty.fldtyp
+                    if thentype ⊏ elsetype
+                        return form_mustalias_conditional(bty, thentype, elsetype)
+                    end
+                elseif isa(aty, MustAlias)
+                    thentype = widenslotwrapper(bty)
+                    elsetype = aty.fldtyp
+                    if thentype ⊏ elsetype
+                        return form_mustalias_conditional(aty, thentype, elsetype)
+                    end
                 end
-                return Conditional(a, bty, aty)
             end
             # narrow the lattice slightly (noting the dependency on one of the slots), to promote more effective smerge
             if isa(b, SlotNumber)
-                return Conditional(b, rt === Const(false) ? Union{} : bty, rt === Const(true) ? Union{} : bty)
-            end
-            if isa(a, SlotNumber)
-                return Conditional(a, rt === Const(false) ? Union{} : aty, rt === Const(true) ? Union{} : aty)
+                thentype = rt === Const(false) ? Bottom : widenslotwrapper(bty)
+                elsetype = rt === Const(true)  ? Bottom : widenslotwrapper(bty)
+                return Conditional(b, thentype, elsetype)
+            elseif isa(a, SlotNumber)
+                thentype = rt === Const(false) ? Bottom : widenslotwrapper(aty)
+                elsetype = rt === Const(true)  ? Bottom : widenslotwrapper(aty)
+                return Conditional(a, thentype, elsetype)
             end
         elseif f === Core.Compiler.not_int
             aty = argtypes[2]
             if isa(aty, Conditional)
-                ifty = aty.elsetype
-                elty = aty.thentype
-                if rt === Const(false)
-                    ifty = Union{}
-                elseif rt === Const(true)
-                    elty = Union{}
-                end
-                return Conditional(aty.slot, ifty, elty)
+                thentype = rt === Const(false) ? Bottom : aty.elsetype
+                elsetype = rt === Const(true)  ? Bottom : aty.thentype
+                return Conditional(aty.slot, thentype, elsetype)
             end
         elseif f === isdefined
             uty = argtypes[2]
@@ -1518,7 +1908,7 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs
                 thentype = Bottom
                 elsetype = Bottom
                 for ty in uniontypes(uty)
-                    cnd = isdefined_tfunc(ty, fld)
+                    cnd = isdefined_tfunc(𝕃ᵢ, ty, fld)
                     if isa(cnd, Const)
                         if cnd.val::Bool
                             thentype = tmerge(thentype, ty)
@@ -1538,65 +1928,80 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs
     return rt
 end
 
-function abstract_call_unionall(argtypes::Vector{Any})
-    if length(argtypes) == 3
-        canconst = true
+function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{Any}, call::CallMeta)
+    na = length(argtypes)
+    if isvarargtype(argtypes[end])
+        if na ≤ 2
+            return CallMeta(Any, Any, EFFECTS_THROWS, call.info)
+        elseif na > 4
+            return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+        end
+        a2 = argtypes[2]
+        a3 = unwrapva(argtypes[3])
+        nothrow = false
+    elseif na == 3
+        a2 = argtypes[2]
         a3 = argtypes[3]
-        if isa(a3, Const)
-            body = a3.val
-        elseif isType(a3)
-            body = a3.parameters[1]
+        ⊑ᵢ = ⊑(typeinf_lattice(interp))
+        nothrow = a2 ⊑ᵢ TypeVar && (a3 ⊑ᵢ Type || a3 ⊑ᵢ TypeVar)
+    else
+        return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    end
+    canconst = true
+    if isa(a3, Const)
+        body = a3.val
+    elseif isType(a3)
+        body = a3.parameters[1]
+        canconst = false
+    else
+        return CallMeta(Any, Any, Effects(EFFECTS_TOTAL; nothrow), call.info)
+    end
+    if !(isa(body, Type) || isa(body, TypeVar))
+        return CallMeta(Any, Any, EFFECTS_THROWS, call.info)
+    end
+    if has_free_typevars(body)
+        if isa(a2, Const)
+            tv = a2.val
+        elseif isa(a2, PartialTypeVar)
+            tv = a2.tv
             canconst = false
         else
-            return Any
-        end
-        if !isa(body, Type) && !isa(body, TypeVar)
-            return Any
+            return CallMeta(Any, Any, EFFECTS_THROWS, call.info)
         end
-        if has_free_typevars(body)
-            a2 = argtypes[2]
-            if isa(a2, Const)
-                tv = a2.val
-            elseif isa(a2, PartialTypeVar)
-                tv = a2.tv
-                canconst = false
-            else
-                return Any
-            end
-            !isa(tv, TypeVar) && return Any
-            body = UnionAll(tv, body)
-        end
-        ret = canconst ? Const(body) : Type{body}
-        return ret
+        isa(tv, TypeVar) || return CallMeta(Any, Any, EFFECTS_THROWS, call.info)
+        body = UnionAll(tv, body)
     end
-    return Any
+    ret = canconst ? Const(body) : Type{body}
+    return CallMeta(ret, Any, Effects(EFFECTS_TOTAL; nothrow), call.info)
 end
 
-function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::InferenceState)
+function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, si::StmtInfo, sv::AbsIntState)
     ft′ = argtype_by_index(argtypes, 2)
     ft = widenconst(ft′)
-    ft === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, false)
-    (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3))
-    types === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, false)
-    isexact || return CallMeta(Any, Effects(), false)
+    ft === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3), false)
+    isexact || return CallMeta(Any, Any, Effects(), NoCallInfo())
+    unwrapped = unwrap_unionall(types)
+    if types === Bottom || !(unwrapped isa DataType) || unwrapped.name !== Tuple.name
+        return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    end
     argtype = argtypes_to_type(argtype_tail(argtypes, 4))
     nargtype = typeintersect(types, argtype)
-    nargtype === Bottom && return CallMeta(Bottom, EFFECTS_THROWS, false)
-    nargtype isa DataType || return CallMeta(Any, Effects(), false) # other cases are not implemented below
-    isdispatchelem(ft) || return CallMeta(Any, Effects(), false) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
+    nargtype === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    nargtype isa DataType || return CallMeta(Any, Any, Effects(), NoCallInfo()) # other cases are not implemented below
+    isdispatchelem(ft) || return CallMeta(Any, Any, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
     ft = ft::DataType
-    types = rewrap_unionall(Tuple{ft, unwrap_unionall(types).parameters...}, types)::Type
+    lookupsig = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type
     nargtype = Tuple{ft, nargtype.parameters...}
     argtype = Tuple{ft, argtype.parameters...}
-    match, valid_worlds, overlayed = findsup(types, method_table(interp))
-    match === nothing && return CallMeta(Any, Effects(), false)
+    match, valid_worlds = findsup(lookupsig, method_table(interp))
+    match === nothing && return CallMeta(Any, Any, Effects(), NoCallInfo())
     update_valid_age!(sv, valid_worlds)
     method = match.method
     tienv = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
     ti = tienv[1]; env = tienv[2]::SimpleVector
-    result = abstract_call_method(interp, method, ti, env, false, sv)
-    (; rt, edge, effects) = result
-    edge !== nothing && add_backedge!(edge::MethodInstance, sv)
+    result = abstract_call_method(interp, method, ti, env, false, si, sv)
+    (; rt, edge, effects, volatile_inf_result) = result
     match = MethodMatch(ti, env, method, argtype <: method.sig)
     res = nothing
     sig = match.spec_types
@@ -1608,16 +2013,21 @@ function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgIn
     #     t, a = ti.parameters[i], argtypes′[i]
     #     argtypes′[i] = t ⊑ a ? t : a
     # end
-    const_call_result = abstract_call_method_with_const_args(interp, result,
-        overlayed ? nothing : singleton_type(ft′), arginfo, match, sv)
-    const_result = nothing
+    𝕃ₚ = ipo_lattice(interp)
+    f = singleton_type(ft′)
+    invokecall = InvokeCall(types, lookupsig)
+    const_call_result = abstract_call_method_with_const_args(interp,
+        result, f, arginfo, si, match, sv, invokecall)
+    const_result = volatile_inf_result
     if const_call_result !== nothing
-        if const_call_result.rt ⊑ rt
-            (; rt, effects, const_result) = const_call_result
+        if ⊑(𝕃ₚ, const_call_result.rt, rt)
+            (; rt, effects, const_result, edge) = const_call_result
         end
     end
-    effects = Effects(effects; nonoverlayed=!overlayed)
-    return CallMeta(from_interprocedural!(rt, sv, arginfo, sig), effects, InvokeCallInfo(match, const_result))
+    rt = from_interprocedural!(interp, rt, sv, arginfo, sig)
+    info = InvokeCallInfo(match, const_result)
+    edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge)
+    return CallMeta(rt, Any, effects, info)
 end
 
 function invoke_rewrite(xs::Vector{Any})
@@ -1627,52 +2037,66 @@ function invoke_rewrite(xs::Vector{Any})
     return newxs
 end
 
-function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState)
+function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState)
     if length(argtypes) == 3
         finalizer_argvec = Any[argtypes[2], argtypes[3]]
-        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), sv, 1)
-        return CallMeta(Nothing, Effects(), FinalizerInfo(call.info, call.effects))
+        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)
+        return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects))
     end
-    return CallMeta(Nothing, Effects(), false)
+    return CallMeta(Nothing, Any, Effects(), NoCallInfo())
+end
+
+function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
+    na = length(argtypes)
+    𝕃ᵢ = typeinf_lattice(interp)
+    if na == 2
+        argtype2 = argtypes[2]
+        if isvarargtype(argtype2)
+            exct = tmerge(𝕃ᵢ, unwrapva(argtype2), ArgumentError)
+        else
+            exct = argtype2
+        end
+    elseif na == 3 && isvarargtype(argtypes[3])
+        exct = tmerge(𝕃ᵢ, argtypes[2], ArgumentError)
+    else
+        exct = ArgumentError
+    end
+    return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo())
 end
 
 # call where the function is known exactly
 function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
-        arginfo::ArgInfo, sv::InferenceState,
-        max_methods::Int = get_max_methods(f, sv.mod, interp))
+        arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState,
+        max_methods::Int = get_max_methods(interp, f, sv))
     (; fargs, argtypes) = arginfo
     la = length(argtypes)
-
+    𝕃ᵢ = typeinf_lattice(interp)
     if isa(f, Builtin)
         if f === _apply_iterate
-            return abstract_apply(interp, argtypes, sv, max_methods)
+            return abstract_apply(interp, argtypes, si, sv, max_methods)
         elseif f === invoke
-            return abstract_invoke(interp, arginfo, sv)
+            return abstract_invoke(interp, arginfo, si, sv)
         elseif f === modifyfield!
-            return abstract_modifyfield!(interp, argtypes, sv)
+            return abstract_modifyfield!(interp, argtypes, si, sv)
         elseif f === Core.finalizer
             return abstract_finalizer(interp, argtypes, sv)
-        end
-        rt = abstract_call_builtin(interp, f, arginfo, sv, max_methods)
-        return CallMeta(rt, builtin_effects(f, argtypes, rt), false)
+        elseif f === applicable
+            return abstract_applicable(interp, argtypes, sv, max_methods)
+        elseif f === throw
+            return abstract_throw(interp, argtypes, sv)
+        end
+        rt = abstract_call_builtin(interp, f, arginfo, sv)
+        ft = popfirst!(argtypes)
+        effects = builtin_effects(𝕃ᵢ, f, argtypes, rt)
+        pushfirst!(argtypes, ft)
+        return CallMeta(rt, effects.nothrow ? Union{} : Any, effects, NoCallInfo())
     elseif isa(f, Core.OpaqueClosure)
         # calling an OpaqueClosure about which we have no information returns no information
-        return CallMeta(Any, Effects(), false)
-    elseif f === Core.kwfunc
-        if la == 2
-            aty = argtypes[2]
-            if !isvarargtype(aty)
-                ft = widenconst(aty)
-                if isa(ft, DataType) && isdefined(ft.name, :mt) && isdefined(ft.name.mt, :kwsorter)
-                    return CallMeta(Const(ft.name.mt.kwsorter), EFFECTS_TOTAL, MethodResultPure())
-                end
-            end
-        end
-        return CallMeta(Any, EFFECTS_UNKNOWN, false)
+        return CallMeta(typeof(f).parameters[2], Any, Effects(), NoCallInfo())
     elseif f === TypeVar
         # Manually look through the definition of TypeVar to
         # make sure to be able to get `PartialTypeVar`s out.
-        (la < 2 || la > 4) && return CallMeta(Union{}, EFFECTS_UNKNOWN, false)
+        (la < 2 || la > 4) && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
         n = argtypes[2]
         ub_var = Const(Any)
         lb_var = Const(Union{})
@@ -1682,33 +2106,44 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         elseif la == 3
             ub_var = argtypes[3]
         end
-        return CallMeta(typevar_tfunc(n, lb_var, ub_var), EFFECTS_UNKNOWN, false)
+        # make sure generic code is prepared for inlining if needed later
+        call = let T = Any[Type{TypeVar}, Any, Any, Any]
+            resize!(T, la)
+            atype = Tuple{T...}
+            T[1] = Const(TypeVar)
+            abstract_call_gf_by_type(interp, f, ArgInfo(nothing, T), si, atype, sv, max_methods)
+        end
+        pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var)
+        effects = builtin_effects(𝕃ᵢ, Core._typevar, Any[n, lb_var, ub_var], pT)
+        return CallMeta(pT, Any, effects, call.info)
     elseif f === UnionAll
-        return CallMeta(abstract_call_unionall(argtypes), EFFECTS_UNKNOWN, false)
+        call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, Any[Const(UnionAll), Any, Any]), si, Tuple{Type{UnionAll}, Any, Any}, sv, max_methods)
+        return abstract_call_unionall(interp, argtypes, call)
     elseif f === Tuple && la == 2
         aty = argtypes[2]
         ty = isvarargtype(aty) ? unwrapva(aty) : widenconst(aty)
         if !isconcretetype(ty)
-            return CallMeta(Tuple, EFFECTS_UNKNOWN, false)
+            return CallMeta(Tuple, Any, EFFECTS_UNKNOWN, NoCallInfo())
         end
     elseif is_return_type(f)
-        return return_type_tfunc(interp, argtypes, sv)
+        return return_type_tfunc(interp, argtypes, si, sv)
     elseif la == 2 && istopfunction(f, :!)
         # handle Conditional propagation through !Bool
         aty = argtypes[2]
         if isa(aty, Conditional)
-            call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Bool]), Tuple{typeof(f), Bool}, sv, max_methods) # make sure we've inferred `!(::Bool)`
-            return CallMeta(Conditional(aty.slot, aty.elsetype, aty.thentype), call.effects, call.info)
+            call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Bool]), si, Tuple{typeof(f), Bool}, sv, max_methods) # make sure we've inferred `!(::Bool)`
+            return CallMeta(Conditional(aty.slot, aty.elsetype, aty.thentype), Any, call.effects, call.info)
         end
     elseif la == 3 && istopfunction(f, :!==)
         # mark !== as exactly a negated call to ===
-        rty = abstract_call_known(interp, (===), arginfo, sv, max_methods).rt
+        call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Any, Any]), si, Tuple{typeof(f), Any, Any}, sv, max_methods)
+        rty = abstract_call_known(interp, (===), arginfo, si, sv, max_methods).rt
         if isa(rty, Conditional)
-            return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), EFFECTS_TOTAL, false) # swap if-else
+            return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), Bottom, EFFECTS_TOTAL, NoCallInfo()) # swap if-else
         elseif isa(rty, Const)
-            return CallMeta(Const(rty.val === false), EFFECTS_TOTAL, MethodResultPure())
+            return CallMeta(Const(rty.val === false), Bottom, EFFECTS_TOTAL, MethodResultPure())
         end
-        return CallMeta(rty, EFFECTS_TOTAL, false)
+        return call
     elseif la == 3 && istopfunction(f, :(>:))
         # mark issupertype as a exact alias for issubtype
         # swap T1 and T2 arguments and call <:
@@ -1718,62 +2153,48 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             fargs = nothing
         end
         argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
-        return CallMeta(abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), sv, max_methods).rt, EFFECTS_TOTAL, false)
-    elseif la == 2 &&
-           (a2 = argtypes[2]; isa(a2, Const)) && (svecval = a2.val; isa(svecval, SimpleVector)) &&
-           istopfunction(f, :length)
-        # mark length(::SimpleVector) as @pure
-        return CallMeta(Const(length(svecval)), EFFECTS_TOTAL, MethodResultPure())
-    elseif la == 3 &&
-           (a2 = argtypes[2]; isa(a2, Const)) && (svecval = a2.val; isa(svecval, SimpleVector)) &&
-           (a3 = argtypes[3]; isa(a3, Const)) && (idx = a3.val; isa(idx, Int)) &&
-           istopfunction(f, :getindex)
-        # mark getindex(::SimpleVector, i::Int) as @pure
-        if 1 <= idx <= length(svecval) && isassigned(svecval, idx)
-            return CallMeta(Const(getindex(svecval, idx)), EFFECTS_TOTAL, MethodResultPure())
-        end
+        return abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods)
     elseif la == 2 && istopfunction(f, :typename)
-        return CallMeta(typename_static(argtypes[2]), EFFECTS_TOTAL, MethodResultPure())
-    elseif la == 3 && istopfunction(f, :typejoin)
-        if is_all_const_arg(arginfo)
-            val = _pure_eval_call(f, arginfo)
-            return CallMeta(val === nothing ? Type : val, EFFECTS_TOTAL, MethodResultPure())
-        end
+        return CallMeta(typename_static(argtypes[2]), Bottom, EFFECTS_TOTAL, MethodResultPure())
+    elseif f === Core._hasmethod
+        return _hasmethod_tfunc(interp, argtypes, sv)
     end
     atype = argtypes_to_type(argtypes)
-    return abstract_call_gf_by_type(interp, f, arginfo, atype, sv, max_methods)
+    return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)
 end
 
 function abstract_call_opaque_closure(interp::AbstractInterpreter,
-    closure::PartialOpaque, arginfo::ArgInfo, sv::InferenceState, check::Bool=true)
+    closure::PartialOpaque, arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState, check::Bool=true)
     sig = argtypes_to_type(arginfo.argtypes)
-    result = abstract_call_method(interp, closure.source, sig, Core.svec(), false, sv)
-    (; rt, edge, effects) = result
-    edge !== nothing && add_backedge!(edge, sv)
+    result = abstract_call_method(interp, closure.source::Method, sig, Core.svec(), false, si, sv)
+    (; rt, edge, effects, volatile_inf_result) = result
     tt = closure.typ
     sigT = (unwrap_unionall(tt)::DataType).parameters[1]
     match = MethodMatch(sig, Core.svec(), closure.source, sig <: rewrap_unionall(sigT, tt))
-    const_result = nothing
+    𝕃ₚ = ipo_lattice(interp)
+    ⊑ₚ = ⊑(𝕃ₚ)
+    const_result = volatile_inf_result
     if !result.edgecycle
         const_call_result = abstract_call_method_with_const_args(interp, result,
-            nothing, arginfo, match, sv)
+            nothing, arginfo, si, match, sv)
         if const_call_result !== nothing
-            if const_call_result.rt ⊑ rt
-                (; rt, effects, const_result) = const_call_result
+            if const_call_result.rt ⊑ₚ rt
+                (; rt, effects, const_result, edge) = const_call_result
             end
         end
     end
-    info = OpaqueClosureCallInfo(match, const_result)
     if check # analyze implicit type asserts on argument and return type
         ftt = closure.typ
         (aty, rty) = (unwrap_unionall(ftt)::DataType).parameters
         rty = rewrap_unionall(rty isa TypeVar ? rty.lb : rty, ftt)
-        if !(rt ⊑ rty && tuple_tfunc(arginfo.argtypes[2:end]) ⊑ rewrap_unionall(aty, ftt))
-            effects = Effects(effects; nothrow=ALWAYS_FALSE)
+        if !(rt ⊑ₚ rty && tuple_tfunc(𝕃ₚ, arginfo.argtypes[2:end]) ⊑ₚ rewrap_unionall(aty, ftt))
+            effects = Effects(effects; nothrow=false)
         end
     end
-    rt = from_interprocedural!(rt, sv, arginfo, match.spec_types)
-    return CallMeta(rt, effects, info)
+    rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types)
+    info = OpaqueClosureCallInfo(match, const_result)
+    edge !== nothing && add_backedge!(sv, edge)
+    return CallMeta(rt, Any, effects, info)
 end
 
 function most_general_argtypes(closure::PartialOpaque)
@@ -1783,42 +2204,53 @@ function most_general_argtypes(closure::PartialOpaque)
     if !isa(argt, DataType) || argt.name !== typename(Tuple)
         argt = Tuple
     end
-    return most_general_argtypes(closure.source, argt, false)
+    return Any[argt.parameters...]
 end
 
-# call where the function is any lattice element
-function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo,
-                       sv::InferenceState, max_methods::Union{Int, Nothing} = nothing)
-    argtypes = arginfo.argtypes
-    ft = argtypes[1]
-    f = singleton_type(ft)
+function abstract_call_unknown(interp::AbstractInterpreter, @nospecialize(ft),
+                               arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState,
+                               max_methods::Int)
     if isa(ft, PartialOpaque)
-        newargtypes = copy(argtypes)
+        newargtypes = copy(arginfo.argtypes)
         newargtypes[1] = ft.env
         return abstract_call_opaque_closure(interp,
-            ft, ArgInfo(arginfo.fargs, newargtypes), sv, #=check=#true)
-    elseif (uft = unwrap_unionall(widenconst(ft)); isa(uft, DataType) && uft.name === typename(Core.OpaqueClosure))
-        return CallMeta(rewrap_unionall((uft::DataType).parameters[2], widenconst(ft)), Effects(), false)
-    elseif f === nothing
-        # non-constant function, but the number of arguments is known
-        # and the ft is not a Builtin or IntrinsicFunction
-        if hasintersect(widenconst(ft), Union{Builtin, Core.OpaqueClosure})
-            add_remark!(interp, sv, "Could not identify method table for call")
-            return CallMeta(Any, Effects(), false)
+            ft, ArgInfo(arginfo.fargs, newargtypes), si, sv, #=check=#true)
+    end
+    wft = widenconst(ft)
+    if hasintersect(wft, Builtin)
+        add_remark!(interp, sv, "Could not identify method table for call")
+        return CallMeta(Any, Any, Effects(), NoCallInfo())
+    elseif hasintersect(wft, Core.OpaqueClosure)
+        uft = unwrap_unionall(wft)
+        if isa(uft, DataType)
+            return CallMeta(rewrap_unionall(uft.parameters[2], wft), Any, Effects(), NoCallInfo())
         end
-        max_methods = max_methods === nothing ? get_max_methods(sv.mod, interp) : max_methods
-        return abstract_call_gf_by_type(interp, nothing, arginfo, argtypes_to_type(argtypes), sv, max_methods)
+        return CallMeta(Any, Any, Effects(), NoCallInfo())
+    end
+    # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic
+    atype = argtypes_to_type(arginfo.argtypes)
+    return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods)
+end
+
+# call where the function is any lattice element
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtInfo,
+                       sv::AbsIntState, max_methods::Int=typemin(Int))
+    ft = widenslotwrapper(arginfo.argtypes[1])
+    f = singleton_type(ft)
+    if f === nothing
+        max_methods = max_methods == typemin(Int) ? get_max_methods(interp, sv) : max_methods
+        return abstract_call_unknown(interp, ft, arginfo, si, sv, max_methods)
     end
-    max_methods = max_methods === nothing ? get_max_methods(f, sv.mod, interp) : max_methods
-    return abstract_call_known(interp, f, arginfo, sv, max_methods)
+    max_methods = max_methods == typemin(Int) ? get_max_methods(interp, f, sv) : max_methods
+    return abstract_call_known(interp, f, arginfo, si, sv, max_methods)
 end
 
 function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
     isref = false
-    if T === Bottom
+    if unwrapva(T) === Bottom
         return Bottom
     elseif isa(T, Type)
-        if isa(T, DataType) && (T::DataType).name === _REF_NAME
+        if isa(T, DataType) && (T::DataType).name === Ref.body.name
             isref = true
             T = T.parameters[1]
             if isreturn && T === Any
@@ -1841,6 +2273,12 @@ function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
                         T = UnionAll(v, T)
                     end
                 end
+                if has_free_typevars(T)
+                    fv = ccall(:jl_find_free_typevars, Vector{Any}, (Any,), T)
+                    for v in fv
+                        T = UnionAll(v, T)
+                    end
+                end
             else
                 T = rewrap_unionall(T, spsig)
             end
@@ -1849,57 +2287,91 @@ function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
     return unwraptv(T)
 end
 
-function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::VarTable, sv::InferenceState)
+function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
     f = abstract_eval_value(interp, e.args[2], vtypes, sv)
     # rt = sp_type_rewrap(e.args[3], sv.linfo, true)
-    at = Any[ sp_type_rewrap(argt, sv.linfo, false) for argt in e.args[4]::SimpleVector ]
-    pushfirst!(at, f)
+    atv = e.args[4]::SimpleVector
+    at = Vector{Any}(undef, length(atv) + 1)
+    at[1] = f
+    for i = 1:length(atv)
+        at[i + 1] = sp_type_rewrap(at[i], frame_instance(sv), false)
+        at[i + 1] === Bottom && return
+    end
     # this may be the wrong world for the call,
     # but some of the result is likely to be valid anyways
     # and that may help generate better codegen
-    abstract_call(interp, ArgInfo(nothing, at), sv)
+    abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false), sv)
     nothing
 end
 
-function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, vtypes::VarTable, sv::InferenceState)
-    if e.head === :static_parameter
-        n = e.args[1]::Int
-        t = Any
-        if 1 <= n <= length(sv.sptypes)
-            t = sv.sptypes[n]
-        end
-        return t
-    elseif e.head === :boundscheck
-        return Bool
-    else
-        return Any
-    end
-end
-
-function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
+function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
     if isa(e, QuoteNode)
-        return Const(e.value)
+        effects = Effects(EFFECTS_TOTAL;
+            inaccessiblememonly = is_mutation_free_argtype(typeof(e.value)) ? ALWAYS_TRUE : ALWAYS_FALSE)
+        return RTEffects(Const(e.value), Union{}, effects)
     elseif isa(e, SSAValue)
-        return abstract_eval_ssavalue(e, sv)
-    elseif isa(e, SlotNumber) || isa(e, Argument)
-        return vtypes[slot_id(e)].typ
+        return RTEffects(abstract_eval_ssavalue(e, sv), Union{}, EFFECTS_TOTAL)
+    elseif isa(e, SlotNumber)
+        if vtypes !== nothing
+            vtyp = vtypes[slot_id(e)]
+            if !vtyp.undef
+                return RTEffects(vtyp.typ, Union{}, EFFECTS_TOTAL)
+            end
+            return RTEffects(vtyp.typ, UndefVarError, EFFECTS_THROWS)
+        end
+        return RTEffects(Any, UndefVarError, EFFECTS_THROWS)
+    elseif isa(e, Argument)
+        if vtypes !== nothing
+            return RTEffects(vtypes[slot_id(e)].typ, Union{}, EFFECTS_TOTAL)
+        else
+            @assert isa(sv, IRInterpretationState)
+            return RTEffects(sv.ir.argtypes[e.n], Union{}, EFFECTS_TOTAL) # TODO frame_argtypes(sv)[e.n] and remove the assertion
+        end
     elseif isa(e, GlobalRef)
-        return abstract_eval_global(e.mod, e.name, sv)
+        return abstract_eval_globalref(interp, e, sv)
     end
 
-    return Const(e)
+    return RTEffects(Const(e), Union{}, EFFECTS_TOTAL)
+end
+
+function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
+    head = e.head
+    if head === :static_parameter
+        n = e.args[1]::Int
+        nothrow = false
+        if 1 <= n <= length(sv.sptypes)
+            sp = sv.sptypes[n]
+            rt = sp.typ
+            nothrow = !sp.undef
+        else
+            rt = Any
+        end
+        merge_effects!(interp, sv, Effects(EFFECTS_TOTAL; nothrow))
+        return rt
+    elseif head === :call
+        # TODO: We still have non-linearized cglobal
+        @assert e.args[1] === Core.tuple ||
+                e.args[1] === GlobalRef(Core, :tuple)
+    else
+        # Some of our tests expect us to handle invalid IR here and error later
+        # - permit that for now.
+        # @assert false "Unexpected EXPR head in value position"
+        merge_effects!(interp, sv, EFFECTS_UNKNOWN)
+    end
+    return Any
 end
 
-function abstract_eval_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
+function abstract_eval_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
     if isa(e, Expr)
         return abstract_eval_value_expr(interp, e, vtypes, sv)
     else
-        typ = abstract_eval_special_value(interp, e, vtypes, sv)
-        return collect_limitations!(typ, sv)
+        (;rt, effects) = abstract_eval_special_value(interp, e, vtypes, sv)
+        merge_effects!(interp, sv, effects)
+        return collect_limitations!(rt, sv)
     end
 end
 
-function collect_argtypes(interp::AbstractInterpreter, ea::Vector{Any}, vtypes::VarTable, sv::InferenceState)
+function collect_argtypes(interp::AbstractInterpreter, ea::Vector{Any}, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
     n = length(ea)
     argtypes = Vector{Any}(undef, n)
     @inbounds for i = 1:n
@@ -1912,152 +2384,178 @@ function collect_argtypes(interp::AbstractInterpreter, ea::Vector{Any}, vtypes::
     return argtypes
 end
 
-function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
-    if !isa(e, Expr)
-        if isa(e, PhiNode)
-            rt = Union{}
-            for val in e.values
-                rt = tmerge(rt, abstract_eval_special_value(interp, val, vtypes, sv))
-            end
-            return rt
-        end
-        return abstract_eval_special_value(interp, e, vtypes, sv)
+struct RTEffects
+    rt
+    exct
+    effects::Effects
+    RTEffects(@nospecialize(rt), @nospecialize(exct), effects::Effects) = new(rt, exct, effects)
+end
+
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sv::InferenceState)
+    si = StmtInfo(!call_result_unused(sv, sv.currpc))
+    (; rt, exct, effects, info) = abstract_call(interp, arginfo, si, sv)
+    sv.stmt_info[sv.currpc] = info
+    # mark this call statement as DCE-eligible
+    # TODO better to do this in a single pass based on the `info` object at the end of abstractinterpret?
+    return RTEffects(rt, exct, effects)
+end
+
+function abstract_eval_call(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
+                            sv::AbsIntState)
+    ea = e.args
+    argtypes = collect_argtypes(interp, ea, vtypes, sv)
+    if argtypes === nothing
+        return RTEffects(Bottom, Any, Effects())
     end
-    e = e::Expr
+    arginfo = ArgInfo(ea, argtypes)
+    return abstract_call(interp, arginfo, sv)
+end
+
+function abstract_eval_the_exception(interp::AbstractInterpreter, sv::InferenceState)
+    return sv.handlers[sv.handler_at[sv.currpc][2]].exct
+end
+abstract_eval_the_exception(::AbstractInterpreter, ::IRInterpretationState) = Any
+
+function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
+                                      sv::AbsIntState)
+    effects = Effects()
     ehead = e.head
+    𝕃ᵢ = typeinf_lattice(interp)
+    ⊑ᵢ = ⊑(𝕃ᵢ)
+    exct = Any
     if ehead === :call
-        ea = e.args
-        argtypes = collect_argtypes(interp, ea, vtypes, sv)
-        if argtypes === nothing
-            t = Bottom
-        else
-            callinfo = abstract_call(interp, ArgInfo(ea, argtypes), sv)
-            tristate_merge!(sv, callinfo.effects)
-            sv.stmt_info[sv.currpc] = callinfo.info
-            t = callinfo.rt
-        end
+        (; rt, exct, effects) = abstract_eval_call(interp, e, vtypes, sv)
+        t = rt
     elseif ehead === :new
-        t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))
-        is_nothrow = true
-        if isconcretedispatch(t)
-            fcount = fieldcount(t)
+        t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv), true)
+        ut = unwrap_unionall(t)
+        exct = Union{ErrorException, TypeError}
+        if isa(ut, DataType) && !isabstracttype(ut)
+            ismutable = ismutabletype(ut)
+            fcount = datatype_fieldcount(ut)
             nargs = length(e.args) - 1
-            is_nothrow && (is_nothrow = fcount ≥ nargs)
-            ats = Vector{Any}(undef, nargs)
-            local anyrefine = false
-            local allconst = true
-            for i = 2:length(e.args)
-                at = widenconditional(abstract_eval_value(interp, e.args[i], vtypes, sv))
-                ft = fieldtype(t, i-1)
-                is_nothrow && (is_nothrow = at ⊑ ft)
-                at = tmeet(at, ft)
-                if at === Bottom
-                    t = Bottom
-                    tristate_merge!(sv, EFFECTS_THROWS)
-                    @goto t_computed
-                elseif !isa(at, Const)
-                    allconst = false
-                end
-                if !anyrefine
-                    anyrefine = has_nontrivial_const_info(at) || # constant information
-                                at ⋤ ft                          # just a type-level information, but more precise than the declared type
+            has_any_uninitialized = (fcount === nothing || (fcount > nargs && (let t = t
+                    any(i::Int -> !is_undefref_fieldtype(fieldtype(t, i)), (nargs+1):fcount)
+                end)))
+            if has_any_uninitialized
+                # allocation with undefined field is inconsistent always
+                consistent = ALWAYS_FALSE
+            elseif ismutable
+                # mutable allocation isn't `:consistent`, but we still have a chance that
+                # return type information later refines the `:consistent`-cy of the method
+                consistent = CONSISTENT_IF_NOTRETURNED
+            else
+                consistent = ALWAYS_TRUE # immutable allocation is consistent
+            end
+            if isconcretedispatch(t)
+                nothrow = true
+                @assert fcount !== nothing && fcount ≥ nargs "malformed :new expression" # syntactically enforced by the front-end
+                ats = Vector{Any}(undef, nargs)
+                local anyrefine = false
+                local allconst = true
+                for i = 1:nargs
+                    at = widenslotwrapper(abstract_eval_value(interp, e.args[i+1], vtypes, sv))
+                    ft = fieldtype(t, i)
+                    nothrow && (nothrow = at ⊑ᵢ ft)
+                    at = tmeet(𝕃ᵢ, at, ft)
+                    at === Bottom && @goto always_throw
+                    if ismutable && !isconst(t, i)
+                        ats[i] = ft # can't constrain this field (as it may be modified later)
+                        continue
+                    end
+                    allconst &= isa(at, Const)
+                    if !anyrefine
+                        anyrefine = has_nontrivial_extended_info(𝕃ᵢ, at) || # extended lattice information
+                                    ⋤(𝕃ᵢ, at, ft) # just a type-level information, but more precise than the declared type
+                    end
+                    ats[i] = at
                 end
-                ats[i-1] = at
-            end
-            # For now, don't allow:
-            # - Const/PartialStruct of mutables
-            # - partially initialized Const/PartialStruct
-            if !ismutabletype(t) && fcount == nargs
-                if allconst
-                    argvals = Vector{Any}(undef, nargs)
-                    for j in 1:nargs
-                        argvals[j] = (ats[j]::Const).val
+                # For now, don't allow:
+                # - Const/PartialStruct of mutables (but still allow PartialStruct of mutables
+                #   with `const` fields if anything refined)
+                # - partially initialized Const/PartialStruct
+                if fcount == nargs
+                    if consistent === ALWAYS_TRUE && allconst
+                        argvals = Vector{Any}(undef, nargs)
+                        for j in 1:nargs
+                            argvals[j] = (ats[j]::Const).val
+                        end
+                        t = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), t, argvals, nargs))
+                    elseif anyrefine
+                        t = PartialStruct(t, ats)
                     end
-                    t = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), t, argvals, nargs))
-                elseif anyrefine
-                    t = PartialStruct(t, ats)
                 end
+            else
+                t = refine_partial_type(t)
+                nothrow = false
             end
         else
-            is_nothrow = false
+            consistent = ALWAYS_FALSE
+            nothrow = false
         end
-        tristate_merge!(sv, Effects(EFFECTS_TOTAL;
-            consistent = !ismutabletype(t) ? ALWAYS_TRUE : TRISTATE_UNKNOWN,
-            nothrow = is_nothrow ? ALWAYS_TRUE : ALWAYS_FALSE))
+        effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
+        nothrow && (exct = Union{})
     elseif ehead === :splatnew
-        t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))
-        is_nothrow = false # TODO: More precision
-        if length(e.args) == 2 && isconcretetype(t) && !ismutabletype(t)
+        t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv), true)
+        nothrow = false # TODO: More precision
+        if length(e.args) == 2 && isconcretedispatch(t) && !ismutabletype(t)
             at = abstract_eval_value(interp, e.args[2], vtypes, sv)
             n = fieldcount(t)
-            if isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
-                let t = t, at = at; _all(i->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n); end
-                is_nothrow = isexact && isconcretedispatch(t)
+            if (isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
+                (let t = t, at = at
+                    all(i::Int->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n)
+                end))
+                nothrow = isexact
                 t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, at.val))
-            elseif isa(at, PartialStruct) && at ⊑ Tuple && n == length(at.fields::Vector{Any}) &&
-                let t = t, at = at; _all(i->(at.fields::Vector{Any})[i] ⊑ fieldtype(t, i), 1:n); end
-                is_nothrow = isexact && isconcretedispatch(t)
+            elseif (isa(at, PartialStruct) && at ⊑ᵢ Tuple && n > 0 && n == length(at.fields::Vector{Any}) && !isvarargtype(at.fields[end]) &&
+                    (let t = t, at = at, ⊑ᵢ = ⊑ᵢ
+                        all(i::Int->(at.fields::Vector{Any})[i] ⊑ᵢ fieldtype(t, i), 1:n)
+                    end))
+                nothrow = isexact
                 t = PartialStruct(t, at.fields::Vector{Any})
             end
+        else
+            t = refine_partial_type(t)
         end
-        tristate_merge!(sv, Effects(EFFECTS_TOTAL;
-            consistent = !ismutabletype(t) ? ALWAYS_TRUE : TRISTATE_UNKNOWN,
-            nothrow = is_nothrow ? ALWAYS_TRUE : ALWAYS_FALSE))
+        consistent = !ismutabletype(t) ? ALWAYS_TRUE : CONSISTENT_IF_NOTRETURNED
+        effects = Effects(EFFECTS_TOTAL; consistent, nothrow)
     elseif ehead === :new_opaque_closure
-        tristate_merge!(sv, Effects()) # TODO
         t = Union{}
+        effects = Effects() # TODO
+        merge_effects!(interp, sv, effects)
         if length(e.args) >= 4
             ea = e.args
             argtypes = collect_argtypes(interp, ea, vtypes, sv)
             if argtypes === nothing
                 t = Bottom
             else
-                t = _opaque_closure_tfunc(argtypes[1], argtypes[2], argtypes[3],
-                    argtypes[4], argtypes[5:end], sv.linfo)
-                if isa(t, PartialOpaque)
+                mi = frame_instance(sv)
+                t = opaque_closure_tfunc(𝕃ᵢ, argtypes[1], argtypes[2], argtypes[3],
+                    argtypes[4], argtypes[5:end], mi)
+                if isa(t, PartialOpaque) && isa(sv, InferenceState) && !call_result_unused(sv, sv.currpc)
                     # Infer this now so that the specialization is available to
                     # optimization.
                     argtypes = most_general_argtypes(t)
                     pushfirst!(argtypes, t.env)
                     callinfo = abstract_call_opaque_closure(interp, t,
-                        ArgInfo(nothing, argtypes), sv, #=check=#false)
+                        ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false)
                     sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
                 end
             end
         end
     elseif ehead === :foreigncall
-        abstract_eval_value(interp, e.args[1], vtypes, sv)
-        t = sp_type_rewrap(e.args[2], sv.linfo, true)
-        for i = 3:length(e.args)
-            if abstract_eval_value(interp, e.args[i], vtypes, sv) === Bottom
-                t = Bottom
-            end
-        end
-        cconv = e.args[5]
-        if isa(cconv, QuoteNode) && (v = cconv.value; isa(v, Tuple{Symbol, UInt8}))
-            effects = v[2]
-            effects = decode_effects_override(effects)
-            tristate_merge!(sv, Effects(
-                effects.consistent ? ALWAYS_TRUE : ALWAYS_FALSE,
-                effects.effect_free ? ALWAYS_TRUE : ALWAYS_FALSE,
-                effects.nothrow ? ALWAYS_TRUE : ALWAYS_FALSE,
-                effects.terminates_globally ? ALWAYS_TRUE : ALWAYS_FALSE,
-                #=nonoverlayed=#true,
-                effects.notaskstate ? ALWAYS_TRUE : ALWAYS_FALSE
-            ))
-        else
-            tristate_merge!(sv, EFFECTS_UNKNOWN)
-        end
+        (; rt, exct, effects) = abstract_eval_foreigncall(interp, e, vtypes, sv)
+        t = rt
     elseif ehead === :cfunction
-        tristate_merge!(sv, EFFECTS_UNKNOWN)
+        effects = EFFECTS_UNKNOWN
         t = e.args[1]
         isa(t, Type) || (t = Any)
         abstract_eval_cfunction(interp, e, vtypes, sv)
     elseif ehead === :method
-        tristate_merge!(sv, EFFECTS_UNKNOWN)
         t = (length(e.args) == 1) ? Any : Nothing
+        effects = EFFECTS_UNKNOWN
     elseif ehead === :copyast
-        tristate_merge!(sv, EFFECTS_UNKNOWN)
+        effects = EFFECTS_UNKNOWN
         t = abstract_eval_value(interp, e.args[1], vtypes, sv)
         if t isa Const && t.val isa Expr
             # `copyast` makes copies of Exprs
@@ -2068,7 +2566,9 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
     elseif ehead === :isdefined
         sym = e.args[1]
         t = Bool
-        if isa(sym, SlotNumber)
+        effects = EFFECTS_TOTAL
+        exct = Union{}
+        if isa(sym, SlotNumber) && vtypes !== nothing
             vtyp = vtypes[slot_id(sym)]
             if vtyp.typ === Bottom
                 t = Const(false) # never assigned previously
@@ -2076,73 +2576,250 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 t = Const(true) # definitely assigned previously
             end
         elseif isa(sym, Symbol)
-            if isdefined(sv.mod, sym)
+            if isdefined(frame_module(sv), sym)
                 t = Const(true)
+            elseif InferenceParams(interp).assume_bindings_static
+                t = Const(false)
+            else
+                effects = Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE)
             end
         elseif isa(sym, GlobalRef)
             if isdefined(sym.mod, sym.name)
                 t = Const(true)
+            elseif InferenceParams(interp).assume_bindings_static
+                t = Const(false)
+            else
+                effects = Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE)
             end
         elseif isexpr(sym, :static_parameter)
             n = sym.args[1]::Int
             if 1 <= n <= length(sv.sptypes)
-                spty = sv.sptypes[n]
-                if isa(spty, Const)
+                sp = sv.sptypes[n]
+                if !sp.undef
                     t = Const(true)
+                elseif sp.typ === Bottom
+                    t = Const(false)
                 end
             end
+        else
+            effects = EFFECTS_UNKNOWN
+        end
+    elseif ehead === :throw_undef_if_not
+        condt = argextype(stmt.args[2], ir)
+        condval = maybe_extract_const_bool(condt)
+        t = Nothing
+        exct = UndefVarError
+        effects = EFFECTS_THROWS
+        if condval isa Bool
+            if condval
+                effects = EFFECTS_TOTAL
+                exct = Union{}
+            else
+                t = Union{}
+            end
+        elseif !hasintersect(windenconst(condt), Bool)
+            t = Union{}
         end
+    elseif ehead === :boundscheck
+        t = Bool
+        exct = Union{}
+        effects = Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE)
+    elseif ehead === :the_exception
+        t = abstract_eval_the_exception(interp, sv)
+        exct = Union{}
+        effects = Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE)
+    elseif ehead === :static_parameter
+        n = e.args[1]::Int
+        nothrow = false
+        exct = UndefVarError
+        if 1 <= n <= length(sv.sptypes)
+            sp = sv.sptypes[n]
+            t = sp.typ
+            nothrow = !sp.undef
+        else
+            t = Any
+        end
+        if nothrow
+            exct = Union{}
+        end
+        effects = Effects(EFFECTS_TOTAL; nothrow)
+    elseif ehead === :gc_preserve_begin || ehead === :aliasscope
+        t = Any
+        exct = Union{}
+        effects = Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE, effect_free=EFFECT_FREE_GLOBALLY)
+    elseif ehead === :gc_preserve_end || ehead === :leave || ehead === :pop_exception || ehead === :global || ehead === :popaliasscope
+        t = Nothing
+        exct = Union{}
+        effects = Effects(EFFECTS_TOTAL; effect_free=EFFECT_FREE_GLOBALLY)
+    elseif ehead === :method
+        t = Method
+        exct = Union{}
+        effects = Effects(EFFECTS_TOTAL; effect_free=EFFECT_FREE_GLOBALLY)
+    elseif ehead === :thunk
+        t = Any
+        effects = EFFECTS_UNKNOWN
+    elseif false
+        @label always_throw
+        t = Bottom
+        effects = EFFECTS_THROWS
     else
         t = abstract_eval_value_expr(interp, e, vtypes, sv)
+        # N.B.: abstract_eval_value_expr can modify the global effects, but
+        # we move out any arguments with effects during SSA construction later
+        # and recompute the effects.
+        effects = EFFECTS_TOTAL
     end
-    @label t_computed
-    @assert !isa(t, TypeVar) "unhandled TypeVar"
-    if isa(t, DataType) && isdefined(t, :instance)
-        # replace singleton types with their equivalent Const object
-        t = Const(t.instance)
+    return RTEffects(t, exct, effects)
+end
+
+# refine the result of instantiation of partially-known type `t` if some invariant can be assumed
+function refine_partial_type(@nospecialize t)
+    t′ = unwrap_unionall(t)
+    if isa(t′, DataType) && t′.name === _NAMEDTUPLE_NAME && length(t′.parameters) == 2 &&
+        (t′.parameters[1] === () || t′.parameters[2] === Tuple{})
+        # if the first/second parameter of `NamedTuple` is known to be empty,
+        # the second/first argument should also be empty tuple type,
+        # so refine it here
+        return Const(NamedTuple())
     end
-    if !isempty(sv.pclimitations)
-        if t isa Const || t === Bottom
-            empty!(sv.pclimitations)
-        else
-            t = LimitedAccuracy(t, sv.pclimitations)
-            sv.pclimitations = IdSet{InferenceState}()
+    return t
+end
+
+function abstract_eval_foreigncall(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
+    mi = frame_instance(sv)
+    t = sp_type_rewrap(e.args[2], mi, true)
+    for i = 3:length(e.args)
+        if abstract_eval_value(interp, e.args[i], vtypes, sv) === Bottom
+            return RTEffects(Bottom, Any, EFFECTS_THROWS)
         end
     end
-    return t
+    effects = foreigncall_effects(e) do @nospecialize x
+        abstract_eval_value(interp, x, vtypes, sv)
+    end
+    cconv = e.args[5]
+    if isa(cconv, QuoteNode) && (v = cconv.value; isa(v, Tuple{Symbol, UInt16}))
+        override = decode_effects_override(v[2])
+        effects = override_effects(effects, override)
+    end
+    return RTEffects(t, Any, effects)
 end
 
-function abstract_eval_global(M::Module, s::Symbol)
-    if isdefined(M, s) && isconst(M, s)
-        return Const(getglobal(M, s))
+function abstract_eval_phi(interp::AbstractInterpreter, phi::PhiNode, vtypes::Union{VarTable,Nothing}, sv::AbsIntState)
+    rt = Union{}
+    for i in 1:length(phi.values)
+        isassigned(phi.values, i) || continue
+        val = phi.values[i]
+        # N.B.: Phi arguments are restricted to not have effects, so we can drop
+        # them here safely.
+        rt = tmerge(typeinf_lattice(interp), rt, abstract_eval_special_value(interp, val, vtypes, sv).rt)
     end
-    ty = ccall(:jl_binding_type, Any, (Any, Any), M, s)
-    ty === nothing && return Any
-    return ty
+    return rt
 end
 
-function abstract_eval_global(M::Module, s::Symbol, frame::InferenceState)
-    ty = abstract_eval_global(M, s)
-    isa(ty, Const) && return ty
-    if isdefined(M,s)
-        tristate_merge!(frame, Effects(EFFECTS_TOTAL; consistent=ALWAYS_FALSE))
+function stmt_taints_inbounds_consistency(sv::AbsIntState)
+    propagate_inbounds(sv) && return true
+    return has_curr_ssaflag(sv, IR_FLAG_INBOUNDS)
+end
+
+function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
+    if !isa(e, Expr)
+        if isa(e, PhiNode)
+            add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)
+            return RTEffects(abstract_eval_phi(interp, e, vtypes, sv), Union{}, EFFECTS_TOTAL)
+        end
+        (; rt, exct, effects) = abstract_eval_special_value(interp, e, vtypes, sv)
     else
-        tristate_merge!(frame, Effects(EFFECTS_TOTAL;
-            consistent=ALWAYS_FALSE,
-            nothrow=ALWAYS_FALSE))
+        (; rt, exct, effects) = abstract_eval_statement_expr(interp, e, vtypes, sv)
+        if effects.noub === NOUB_IF_NOINBOUNDS
+            if has_curr_ssaflag(sv, IR_FLAG_INBOUNDS)
+                effects = Effects(effects; noub=ALWAYS_FALSE)
+            elseif !propagate_inbounds(sv)
+                # The callee read our inbounds flag, but unless we propagate inbounds,
+                # we ourselves don't read our parent's inbounds.
+                effects = Effects(effects; noub=ALWAYS_TRUE)
+            end
+        end
+        e = e::Expr
+        @assert !isa(rt, TypeVar) "unhandled TypeVar"
+        rt = maybe_singleton_const(rt)
+        if !isempty(sv.pclimitations)
+            if rt isa Const || rt === Union{}
+                empty!(sv.pclimitations)
+            else
+                rt = LimitedAccuracy(rt, sv.pclimitations)
+                sv.pclimitations = IdSet{InferenceState}()
+            end
+        end
+    end
+    # N.B.: This only applies to the effects of the statement itself.
+    # It is possible for arguments (GlobalRef/:static_parameter) to throw,
+    # but these will be recomputed during SSA construction later.
+    override = decode_statement_effects_override(sv)
+    effects = override_effects(effects, override)
+    set_curr_ssaflag!(sv, flags_for_effects(effects), IR_FLAGS_EFFECTS)
+    merge_effects!(interp, sv, effects)
+
+    return RTEffects(rt, exct, effects)
+end
+
+function override_effects(effects::Effects, override::EffectsOverride)
+    return Effects(effects;
+        consistent = override.consistent ? ALWAYS_TRUE : effects.consistent,
+        effect_free = override.effect_free ? ALWAYS_TRUE : effects.effect_free,
+        nothrow = override.nothrow ? true : effects.nothrow,
+        terminates = override.terminates_globally ? true : effects.terminates,
+        notaskstate = override.notaskstate ? true : effects.notaskstate,
+        inaccessiblememonly = override.inaccessiblememonly ? ALWAYS_TRUE : effects.inaccessiblememonly,
+        noub = override.noub ? ALWAYS_TRUE :
+               override.noub_if_noinbounds && effects.noub !== ALWAYS_TRUE ? NOUB_IF_NOINBOUNDS :
+               effects.noub)
+end
+
+isdefined_globalref(g::GlobalRef) = !iszero(ccall(:jl_globalref_boundp, Cint, (Any,), g))
+
+function abstract_eval_globalref_type(g::GlobalRef)
+    if isdefined_globalref(g) && isconst(g)
+        return Const(ccall(:jl_get_globalref_value, Any, (Any,), g))
     end
+    ty = ccall(:jl_get_binding_type, Any, (Any, Any), g.mod, g.name)
+    ty === nothing && return Any
     return ty
 end
+abstract_eval_global(M::Module, s::Symbol) = abstract_eval_globalref_type(GlobalRef(M, s))
+
+function abstract_eval_globalref(interp::AbstractInterpreter, g::GlobalRef, sv::AbsIntState)
+    rt = abstract_eval_globalref_type(g)
+    consistent = inaccessiblememonly = ALWAYS_FALSE
+    nothrow = false
+    if isa(rt, Const)
+        consistent = ALWAYS_TRUE
+        nothrow = true
+        if is_mutation_free_argtype(rt)
+            inaccessiblememonly = ALWAYS_TRUE
+        end
+    elseif isdefined_globalref(g)
+        nothrow = true
+    elseif InferenceParams(interp).assume_bindings_static
+        consistent = inaccessiblememonly = ALWAYS_TRUE
+        rt = Union{}
+    end
+    return RTEffects(rt, nothrow ? Union{} : UndefVarError, Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly))
+end
 
 function handle_global_assignment!(interp::AbstractInterpreter, frame::InferenceState, lhs::GlobalRef, @nospecialize(newty))
     effect_free = ALWAYS_FALSE
-    nothrow = global_assignment_nothrow(lhs.mod, lhs.name, newty) ?
-        ALWAYS_TRUE : ALWAYS_FALSE
-    tristate_merge!(frame, Effects(EFFECTS_TOTAL; effect_free, nothrow))
+    nothrow = global_assignment_nothrow(lhs.mod, lhs.name, newty)
+    inaccessiblememonly = ALWAYS_FALSE
+    if !nothrow
+        sub_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
+    end
+    sub_curr_ssaflag!(frame, IR_FLAG_EFFECT_FREE)
+    merge_effects!(interp, frame, Effects(EFFECTS_TOTAL; effect_free, nothrow, inaccessiblememonly))
+    return nothing
 end
 
 abstract_eval_ssavalue(s::SSAValue, sv::InferenceState) = abstract_eval_ssavalue(s, sv.ssavaluetypes)
-abstract_eval_ssavalue(s::SSAValue, src::CodeInfo) = abstract_eval_ssavalue(s, src.ssavaluetypes::Vector{Any})
+
 function abstract_eval_ssavalue(s::SSAValue, ssavaluetypes::Vector{Any})
     typ = ssavaluetypes[s.id]
     if typ === NOT_FOUND
@@ -2151,18 +2828,54 @@ function abstract_eval_ssavalue(s::SSAValue, ssavaluetypes::Vector{Any})
     return typ
 end
 
-function widenreturn(@nospecialize(rt), @nospecialize(bestguess), nargs::Int, slottypes::Vector{Any}, changes::VarTable)
-    if !(bestguess ⊑ Bool) || bestguess === Bool
+struct BestguessInfo{Interp<:AbstractInterpreter}
+    interp::Interp
+    bestguess
+    nargs::Int
+    slottypes::Vector{Any}
+    changes::VarTable
+    function BestguessInfo(interp::Interp, @nospecialize(bestguess), nargs::Int,
+        slottypes::Vector{Any}, changes::VarTable) where Interp<:AbstractInterpreter
+        new{Interp}(interp, bestguess, nargs, slottypes, changes)
+    end
+end
+
+@nospecializeinfer function widenreturn(@nospecialize(rt), info::BestguessInfo)
+    return widenreturn(typeinf_lattice(info.interp), rt, info)
+end
+
+@nospecializeinfer function widenreturn(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo)
+    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
+end
+@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo)
+    return widenreturn_noslotwrapper(widenlattice(𝕃ᵢ), rt, info)
+end
+
+@nospecializeinfer function widenreturn(𝕃ᵢ::MustAliasesLattice, @nospecialize(rt), info::BestguessInfo)
+    if isa(rt, MustAlias)
+        if 1 ≤ rt.slot ≤ info.nargs
+            rt = InterMustAlias(rt)
+        else
+            rt = widenmustalias(rt)
+        end
+    end
+    isa(rt, InterMustAlias) && return rt
+    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
+end
+
+@nospecializeinfer function widenreturn(𝕃ᵢ::ConditionalsLattice, @nospecialize(rt), info::BestguessInfo)
+    ⊑ᵢ = ⊑(𝕃ᵢ)
+    if !(⊑(ipo_lattice(info.interp), info.bestguess, Bool)) || info.bestguess === Bool
         # give up inter-procedural constraint back-propagation
         # when tmerge would widen the result anyways (as an optimization)
         rt = widenconditional(rt)
     else
         if isa(rt, Conditional)
             id = rt.slot
-            if 1 ≤ id ≤ nargs
-                old_id_type = widenconditional(slottypes[id]) # same as `(states[1]::VarTable)[id].typ`
-                if (!(rt.thentype ⊑ old_id_type) || old_id_type ⊑ rt.thentype) &&
-                   (!(rt.elsetype ⊑ old_id_type) || old_id_type ⊑ rt.elsetype)
+            if 1 ≤ id ≤ info.nargs
+                old_id_type = widenconditional(info.slottypes[id]) # same as `(states[1]::VarTable)[id].typ`
+                if (!(rt.thentype ⊑ᵢ old_id_type) || old_id_type ⊑ᵢ rt.thentype) &&
+                   (!(rt.elsetype ⊑ᵢ old_id_type) || old_id_type ⊑ᵢ rt.elsetype)
                    # discard this `Conditional` since it imposes
                    # no new constraint on the argument type
                    # (the caller will recreate it if needed)
@@ -2177,44 +2890,71 @@ function widenreturn(@nospecialize(rt), @nospecialize(bestguess), nargs::Int, sl
         end
         if isa(rt, Conditional)
             rt = InterConditional(rt.slot, rt.thentype, rt.elsetype)
-        elseif is_lattice_bool(rt)
-            if isa(bestguess, InterConditional)
-                # if the bestguess so far is already `Conditional`, try to convert
-                # this `rt` into `Conditional` on the slot to avoid overapproximation
-                # due to conflict of different slots
-                rt = bool_rt_to_conditional(rt, slottypes, changes, bestguess.slot)
-            else
-                # pick up the first "interesting" slot, convert `rt` to its `Conditional`
-                # TODO: ideally we want `Conditional` and `InterConditional` to convey
-                # constraints on multiple slots
-                for slot_id in 1:nargs
-                    rt = bool_rt_to_conditional(rt, slottypes, changes, slot_id)
-                    rt isa InterConditional && break
-                end
-            end
+        elseif is_lattice_bool(𝕃ᵢ, rt)
+            rt = bool_rt_to_conditional(rt, info)
         end
     end
-
-    # only propagate information we know we can store
-    # and is valid and good inter-procedurally
-    isa(rt, Conditional) && return InterConditional(rt)
+    if isa(rt, Conditional)
+        rt = InterConditional(rt)
+    end
     isa(rt, InterConditional) && return rt
-    return widenreturn_noconditional(rt)
+    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
+end
+@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), info::BestguessInfo)
+    bestguess = info.bestguess
+    if isa(bestguess, InterConditional)
+        # if the bestguess so far is already `Conditional`, try to convert
+        # this `rt` into `Conditional` on the slot to avoid overapproximation
+        # due to conflict of different slots
+        rt = bool_rt_to_conditional(rt, bestguess.slot, info)
+    else
+        # pick up the first "interesting" slot, convert `rt` to its `Conditional`
+        # TODO: ideally we want `Conditional` and `InterConditional` to convey
+        # constraints on multiple slots
+        for slot_id = 1:info.nargs
+            rt = bool_rt_to_conditional(rt, slot_id, info)
+            rt isa InterConditional && break
+        end
+    end
+    return rt
+end
+@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), slot_id::Int, info::BestguessInfo)
+    ⊑ᵢ = ⊑(typeinf_lattice(info.interp))
+    old = info.slottypes[slot_id]
+    new = widenslotwrapper(info.changes[slot_id].typ) # avoid nested conditional
+    if new ⊑ᵢ old && !(old ⊑ᵢ new)
+        if isa(rt, Const)
+            val = rt.val
+            if val === true
+                return InterConditional(slot_id, new, Bottom)
+            elseif val === false
+                return InterConditional(slot_id, Bottom, new)
+            end
+        elseif rt === Bool
+            return InterConditional(slot_id, new, new)
+        end
+    end
+    return rt
 end
 
-function widenreturn_noconditional(@nospecialize(rt))
-    isa(rt, Const) && return rt
-    isa(rt, Type) && return rt
+@nospecializeinfer function widenreturn(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
+    return widenreturn_partials(𝕃ᵢ, rt, info)
+end
+@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
+    return widenreturn_partials(𝕃ᵢ, rt, info)
+end
+@nospecializeinfer function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
     if isa(rt, PartialStruct)
         fields = copy(rt.fields)
         local anyrefine = false
+        𝕃 = typeinf_lattice(info.interp)
         for i in 1:length(fields)
             a = fields[i]
-            a = isvarargtype(a) ? a : widenreturn_noconditional(a)
+            a = isvarargtype(a) ? a : widenreturn_noslotwrapper(𝕃, a, info)
             if !anyrefine
                 # TODO: consider adding && const_prop_profitable(a) here?
-                anyrefine = has_const_info(a) ||
-                            a ⊏ fieldtype(rt.typ, i)
+                anyrefine = has_extended_info(a) ||
+                            ⊏(𝕃, a, fieldtype(rt.typ, i))
             end
             fields[i] = a
         end
@@ -2223,15 +2963,33 @@ function widenreturn_noconditional(@nospecialize(rt))
     if isa(rt, PartialOpaque)
         return rt # XXX: this case was missed in #39512
     end
+    return widenreturn(widenlattice(𝕃ᵢ), rt, info)
+end
+
+@nospecializeinfer function widenreturn(::ConstsLattice, @nospecialize(rt), ::BestguessInfo)
+    return widenreturn_consts(rt)
+end
+@nospecializeinfer function widenreturn_noslotwrapper(::ConstsLattice, @nospecialize(rt), ::BestguessInfo)
+    return widenreturn_consts(rt)
+end
+@nospecializeinfer function widenreturn_consts(@nospecialize(rt))
+    isa(rt, Const) && return rt
+    return widenconst(rt)
+end
+
+@nospecializeinfer function widenreturn(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo)
+    return widenconst(rt)
+end
+@nospecializeinfer function widenreturn_noslotwrapper(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo)
     return widenconst(rt)
 end
 
-function handle_control_backedge!(frame::InferenceState, from::Int, to::Int)
+function handle_control_backedge!(interp::AbstractInterpreter, frame::InferenceState, from::Int, to::Int)
     if from > to
         if is_effect_overridden(frame, :terminates_locally)
             # this backedge is known to terminate
         else
-            tristate_merge!(frame, Effects(EFFECTS_TOTAL; terminates=ALWAYS_FALSE))
+            merge_effects!(interp, frame, Effects(EFFECTS_TOTAL; terminates=false))
         end
     end
     return nothing
@@ -2239,54 +2997,55 @@ end
 
 struct BasicStmtChange
     changes::Union{Nothing,StateUpdate}
-    type::Any # ::Union{Type, Nothing} - `nothing` if this statement may not be used as an SSA Value
+    rt::Any # extended lattice element or `nothing` - `nothing` if this statement may not be used as an SSA Value
+    exct::Any
     # TODO effects::Effects
-    BasicStmtChange(changes::Union{Nothing,StateUpdate}, @nospecialize type) = new(changes, type)
+    BasicStmtChange(changes::Union{Nothing,StateUpdate}, @nospecialize(rt), @nospecialize(exct)) = new(changes, rt, exct)
 end
 
 @inline function abstract_eval_basic_statement(interp::AbstractInterpreter,
     @nospecialize(stmt), pc_vartable::VarTable, frame::InferenceState)
     if isa(stmt, NewvarNode)
         changes = StateUpdate(stmt.slot, VarState(Bottom, true), pc_vartable, false)
-        return BasicStmtChange(changes, nothing)
+        return BasicStmtChange(changes, nothing, Union{})
     elseif !isa(stmt, Expr)
-        t = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, t)
+        (; rt, exct) = abstract_eval_statement(interp, stmt, pc_vartable, frame)
+        return BasicStmtChange(nothing, rt, exct)
     end
     changes = nothing
     stmt = stmt::Expr
     hd = stmt.head
     if hd === :(=)
-        t = abstract_eval_statement(interp, stmt.args[2], pc_vartable, frame)
-        if t === Bottom
-            return BasicStmtChange(nothing, Bottom)
+        (; rt, exct) = abstract_eval_statement(interp, stmt.args[2], pc_vartable, frame)
+        if rt === Bottom
+            return BasicStmtChange(nothing, Bottom, exct)
         end
         lhs = stmt.args[1]
         if isa(lhs, SlotNumber)
-            changes = StateUpdate(lhs, VarState(t, false), pc_vartable, false)
+            changes = StateUpdate(lhs, VarState(rt, false), pc_vartable, false)
         elseif isa(lhs, GlobalRef)
-            handle_global_assignment!(interp, frame, lhs, t)
+            handle_global_assignment!(interp, frame, lhs, rt)
         elseif !isa(lhs, SSAValue)
-            tristate_merge!(frame, EFFECTS_UNKNOWN)
+            merge_effects!(interp, frame, EFFECTS_UNKNOWN)
         end
-        return BasicStmtChange(changes, t)
+        return BasicStmtChange(changes, rt, exct)
     elseif hd === :method
         fname = stmt.args[1]
         if isa(fname, SlotNumber)
             changes = StateUpdate(fname, VarState(Any, false), pc_vartable, false)
         end
-        return BasicStmtChange(changes, nothing)
+        return BasicStmtChange(changes, nothing, Union{})
     elseif (hd === :code_coverage_effect || (
             hd !== :boundscheck && # :boundscheck can be narrowed to Bool
             is_meta_expr(stmt)))
-        return BasicStmtChange(nothing, Nothing)
+        return BasicStmtChange(nothing, Nothing, Bottom)
     else
-        t = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, t)
+        (; rt, exct) = abstract_eval_statement(interp, stmt, pc_vartable, frame)
+        return BasicStmtChange(nothing, rt, exct)
     end
 end
 
-function update_bbstate!(frame::InferenceState, bb::Int, vartable::VarTable)
+function update_bbstate!(𝕃ᵢ::AbstractLattice, frame::InferenceState, bb::Int, vartable::VarTable)
     bbtable = frame.bb_vartables[bb]
     if bbtable === nothing
         # if a basic block hasn't been analyzed yet,
@@ -2294,7 +3053,7 @@ function update_bbstate!(frame::InferenceState, bb::Int, vartable::VarTable)
         frame.bb_vartables[bb] = copy(vartable)
         return true
     else
-        return stupdate!(bbtable, vartable)
+        return stupdate!(𝕃ᵢ, bbtable, vartable)
     end
 end
 
@@ -2306,18 +3065,94 @@ function init_vartable!(vartable::VarTable, frame::InferenceState)
     return vartable
 end
 
+function update_bestguess!(interp::AbstractInterpreter, frame::InferenceState,
+                           currstate::VarTable, @nospecialize(rt))
+    bestguess = frame.bestguess
+    nargs = narguments(frame, #=include_va=#false)
+    slottypes = frame.slottypes
+    rt = widenreturn(rt, BestguessInfo(interp, bestguess, nargs, slottypes, currstate))
+    # narrow representation of bestguess slightly to prepare for tmerge with rt
+    if rt isa InterConditional && bestguess isa Const
+        slot_id = rt.slot
+        old_id_type = slottypes[slot_id]
+        if bestguess.val === true && rt.elsetype !== Bottom
+            bestguess = InterConditional(slot_id, old_id_type, Bottom)
+        elseif bestguess.val === false && rt.thentype !== Bottom
+            bestguess = InterConditional(slot_id, Bottom, old_id_type)
+        end
+    end
+    # copy limitations to return value
+    if !isempty(frame.pclimitations)
+        union!(frame.limitations, frame.pclimitations)
+        empty!(frame.pclimitations)
+    end
+    if !isempty(frame.limitations)
+        rt = LimitedAccuracy(rt, copy(frame.limitations))
+    end
+    𝕃ₚ = ipo_lattice(interp)
+    if !⊑(𝕃ₚ, rt, bestguess)
+        # TODO: if bestguess isa InterConditional && !interesting(bestguess); bestguess = widenconditional(bestguess); end
+        frame.bestguess = tmerge(𝕃ₚ, bestguess, rt) # new (wider) return type for frame
+        return true
+    else
+        return false
+    end
+end
+
+function update_exc_bestguess!(interp::AbstractInterpreter, @nospecialize(exct), frame::InferenceState)
+    𝕃ₚ = ipo_lattice(interp)
+    cur_hand = frame.handler_at[frame.currpc][1]
+    if cur_hand == 0
+        if !⊑(𝕃ₚ, exct, frame.exc_bestguess)
+            frame.exc_bestguess = tmerge(𝕃ₚ, frame.exc_bestguess, exct)
+            update_cycle_worklists!(frame) do caller::InferenceState, caller_pc::Int
+                caller_handler = caller.handler_at[caller_pc][1]
+                caller_exct = caller_handler == 0 ?
+                    caller.exc_bestguess : caller.handlers[caller_handler].exct
+                return caller_exct !== Any
+            end
+        end
+    else
+        handler_frame = frame.handlers[cur_hand]
+        if !⊑(𝕃ₚ, exct, handler_frame.exct)
+            handler_frame.exct = tmerge(𝕃ₚ, handler_frame.exct, exct)
+            enter = frame.src.code[handler_frame.enter_idx]::EnterNode
+            exceptbb = block_for_inst(frame.cfg, enter.catch_dest)
+            push!(frame.ip, exceptbb)
+        end
+    end
+end
+
+function propagate_to_error_handler!(currstate::VarTable, frame::InferenceState, 𝕃ᵢ::AbstractLattice)
+    # If this statement potentially threw, propagate the currstate to the
+    # exception handler, BEFORE applying any state changes.
+    cur_hand = frame.handler_at[frame.currpc][1]
+    if cur_hand != 0
+        enter = frame.src.code[frame.handlers[cur_hand].enter_idx]::EnterNode
+        exceptbb = block_for_inst(frame.cfg, enter.catch_dest)
+        if update_bbstate!(𝕃ᵢ, frame, exceptbb, currstate)
+            push!(frame.ip, exceptbb)
+        end
+    end
+end
+
+function update_cycle_worklists!(callback, frame::InferenceState)
+    for (caller, caller_pc) in frame.cycle_backedges
+        if callback(caller, caller_pc)
+            push!(caller.ip, block_for_inst(caller.cfg, caller_pc))
+        end
+    end
+end
+
 # make as much progress on `frame` as possible (without handling cycles)
 function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
-    @assert !frame.inferred
+    @assert !is_inferred(frame)
     frame.dont_work_on_me = true # mark that this function is currently on the stack
     W = frame.ip
-    def = frame.linfo.def
-    isva = isa(def, Method) && def.isva
-    nargs = length(frame.result.argtypes) - isva
-    slottypes = frame.slottypes
     ssavaluetypes = frame.ssavaluetypes
     bbs = frame.cfg.blocks
     nbbs = length(bbs)
+    𝕃ᵢ = typeinf_lattice(interp)
 
     currbb = frame.currbb
     if currbb != 1
@@ -2343,22 +3178,34 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     @assert length(succs) == 1
                     nextbb = succs[1]
                     ssavaluetypes[currpc] = Any
-                    handle_control_backedge!(frame, currpc, stmt.label)
+                    handle_control_backedge!(interp, frame, currpc, stmt.label)
+                    add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
                     @goto branch
                 elseif isa(stmt, GotoIfNot)
                     condx = stmt.cond
+                    condxslot = ssa_def_slot(condx, frame)
                     condt = abstract_eval_value(interp, condx, currstate, frame)
                     if condt === Bottom
                         ssavaluetypes[currpc] = Bottom
                         empty!(frame.pclimitations)
                         @goto find_next_bb
                     end
-                    if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condx, SlotNumber)
+                    orig_condt = condt
+                    if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condxslot, SlotNumber)
                         # if this non-`Conditional` object is a slot, we form and propagate
                         # the conditional constraint on it
-                        condt = Conditional(condx, Const(true), Const(false))
+                        condt = Conditional(condxslot, Const(true), Const(false))
                     end
                     condval = maybe_extract_const_bool(condt)
+                    nothrow = (condval !== nothing) || ⊑(𝕃ᵢ, orig_condt, Bool)
+                    if nothrow
+                        add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
+                    else
+                        update_exc_bestguess!(interp, TypeError, frame)
+                        propagate_to_error_handler!(currstate, frame, 𝕃ᵢ)
+                        merge_effects!(interp, frame, EFFECTS_THROWS)
+                    end
+
                     if !isempty(frame.pclimitations)
                         # we can't model the possible effect of control
                         # dependencies on the return
@@ -2370,6 +3217,11 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     if condval === true
                         @goto fallthrough
                     else
+                        if !nothrow && !hasintersect(widenconst(orig_condt), Bool)
+                            ssavaluetypes[currpc] = Bottom
+                            @goto find_next_bb
+                        end
+
                         succs = bbs[currbb].succs
                         if length(succs) == 1
                             @assert condval === false || (stmt.dest === currpc + 1)
@@ -2381,115 +3233,87 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                         falsebb = succs[1] == truebb ? succs[2] : succs[1]
                         if condval === false
                             nextbb = falsebb
-                            handle_control_backedge!(frame, currpc, stmt.dest)
+                            handle_control_backedge!(interp, frame, currpc, stmt.dest)
                             @goto branch
-                        else
-                            # We continue with the true branch, but process the false
-                            # branch here.
-                            if isa(condt, Conditional)
-                                else_change = conditional_change(currstate, condt.elsetype, condt.slot)
-                                if else_change !== nothing
-                                    false_vartable = stoverwrite1!(copy(currstate), else_change)
-                                else
-                                    false_vartable = currstate
-                                end
-                                changed = update_bbstate!(frame, falsebb, false_vartable)
-                                then_change = conditional_change(currstate, condt.thentype, condt.slot)
-                                if then_change !== nothing
-                                    stoverwrite1!(currstate, then_change)
-                                end
+                        end
+
+                        # We continue with the true branch, but process the false
+                        # branch here.
+                        if isa(condt, Conditional)
+                            else_change = conditional_change(𝕃ᵢ, currstate, condt.elsetype, condt.slot)
+                            if else_change !== nothing
+                                false_vartable = stoverwrite1!(copy(currstate), else_change)
                             else
-                                changed = update_bbstate!(frame, falsebb, currstate)
+                                false_vartable = currstate
                             end
-                            if changed
-                                handle_control_backedge!(frame, currpc, stmt.dest)
-                                push!(W, falsebb)
+                            changed = update_bbstate!(𝕃ᵢ, frame, falsebb, false_vartable)
+                            then_change = conditional_change(𝕃ᵢ, currstate, condt.thentype, condt.slot)
+                            if then_change !== nothing
+                                stoverwrite1!(currstate, then_change)
                             end
-                            @goto fallthrough
+                        else
+                            changed = update_bbstate!(𝕃ᵢ, frame, falsebb, currstate)
+                        end
+                        if changed
+                            handle_control_backedge!(interp, frame, currpc, stmt.dest)
+                            push!(W, falsebb)
                         end
+                        @goto fallthrough
                     end
                 elseif isa(stmt, ReturnNode)
-                    bestguess = frame.bestguess
                     rt = abstract_eval_value(interp, stmt.val, currstate, frame)
-                    rt = widenreturn(rt, bestguess, nargs, slottypes, currstate)
-                    # narrow representation of bestguess slightly to prepare for tmerge with rt
-                    if rt isa InterConditional && bestguess isa Const
-                        let slot_id = rt.slot
-                            old_id_type = slottypes[slot_id]
-                            if bestguess.val === true && rt.elsetype !== Bottom
-                                bestguess = InterConditional(slot_id, old_id_type, Bottom)
-                            elseif bestguess.val === false && rt.thentype !== Bottom
-                                bestguess = InterConditional(slot_id, Bottom, old_id_type)
-                            end
-                        end
-                    end
-                    # copy limitations to return value
-                    if !isempty(frame.pclimitations)
-                        union!(frame.limitations, frame.pclimitations)
-                        empty!(frame.pclimitations)
-                    end
-                    if !isempty(frame.limitations)
-                        rt = LimitedAccuracy(rt, copy(frame.limitations))
-                    end
-                    if tchanged(rt, bestguess)
-                        # new (wider) return type for frame
-                        bestguess = tmerge(bestguess, rt)
-                        # TODO: if bestguess isa InterConditional && !interesting(bestguess); bestguess = widenconditional(bestguess); end
-                        frame.bestguess = bestguess
-                        for (caller, caller_pc) in frame.cycle_backedges
-                            if !(caller.ssavaluetypes[caller_pc] === Any)
-                                # no reason to revisit if that call-site doesn't affect the final result
-                                push!(caller.ip, block_for_inst(caller.cfg, caller_pc))
-                            end
+                    if update_bestguess!(interp, frame, currstate, rt)
+                        update_cycle_worklists!(frame) do caller::InferenceState, caller_pc::Int
+                            # no reason to revisit if that call-site doesn't affect the final result
+                            return caller.ssavaluetypes[caller_pc] !== Any
                         end
                     end
                     ssavaluetypes[frame.currpc] = Any
                     @goto find_next_bb
-                elseif isexpr(stmt, :enter)
-                    # Propagate entry info to exception handler
-                    l = stmt.args[1]::Int
-                    catchbb = block_for_inst(frame.cfg, l)
-                    if update_bbstate!(frame, catchbb, currstate)
-                        push!(W, catchbb)
+                elseif isa(stmt, EnterNode)
+                    ssavaluetypes[currpc] = Any
+                    add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
+                    if isdefined(stmt, :scope)
+                        scopet = abstract_eval_value(interp, stmt.scope, currstate, frame)
+                        handler = frame.handlers[frame.handler_at[frame.currpc+1][1]]
+                        @assert handler.scopet !== nothing
+                        if !⊑(𝕃ᵢ, scopet, handler.scopet)
+                            handler.scopet = tmerge(𝕃ᵢ, scopet, handler.scopet)
+                            if isdefined(handler, :scope_uses)
+                                for bb in handler.scope_uses
+                                    push!(W, bb)
+                                end
+                            end
+                        end
                     end
+                    @goto fallthrough
+                elseif isexpr(stmt, :leave)
                     ssavaluetypes[currpc] = Any
                     @goto fallthrough
                 end
                 # Fall through terminator - treat as regular stmt
             end
             # Process non control-flow statements
-            (; changes, type) = abstract_eval_basic_statement(interp,
+            (; changes, rt, exct) = abstract_eval_basic_statement(interp,
                 stmt, currstate, frame)
-            if type === Bottom
+            if exct !== Union{}
+                update_exc_bestguess!(interp, exct, frame)
+            end
+            if !has_curr_ssaflag(frame, IR_FLAG_NOTHROW)
+                propagate_to_error_handler!(currstate, frame, 𝕃ᵢ)
+            end
+            if rt === Bottom
                 ssavaluetypes[currpc] = Bottom
                 @goto find_next_bb
             end
             if changes !== nothing
                 stoverwrite1!(currstate, changes)
-                let cur_hand = frame.handler_at[currpc], l, enter
-                    while cur_hand != 0
-                        enter = frame.src.code[cur_hand]::Expr
-                        l = enter.args[1]::Int
-                        exceptbb = block_for_inst(frame.cfg, l)
-                        # propagate new type info to exception handler
-                        # the handling for Expr(:enter) propagates all changes from before the try/catch
-                        # so this only needs to propagate any changes
-                        if stupdate1!(states[exceptbb]::VarTable, changes)
-                            push!(W, exceptbb)
-                        end
-                        cur_hand = frame.handler_at[cur_hand]
-                    end
-                end
             end
-            if type === nothing
+            if rt === nothing
                 ssavaluetypes[currpc] = Any
                 continue
             end
-            if !isempty(frame.ssavalue_uses[currpc])
-                record_ssa_assign!(currpc, type, frame)
-            else
-                ssavaluetypes[currpc] = type
-            end
+            record_ssa_assign!(𝕃ᵢ, currpc, rt, frame)
         end # for currpc in bbstart:bbend
 
         # Case 1: Fallthrough termination
@@ -2499,7 +3323,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
 
         # Case 2: Directly branch to a different BB
         begin @label branch
-            if update_bbstate!(frame, nextbb, currstate)
+            if update_bbstate!(𝕃ᵢ, frame, nextbb, currstate)
                 push!(W, nextbb)
             end
         end
@@ -2523,35 +3347,25 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
     nothing
 end
 
-function conditional_change(state::VarTable, @nospecialize(typ), slot::Int)
+function conditional_change(𝕃ᵢ::AbstractLattice, state::VarTable, @nospecialize(typ), slot::Int)
     vtype = state[slot]
     oldtyp = vtype.typ
-    # approximate test for `typ ∩ oldtyp` being better than `oldtyp`
-    # since we probably formed these types with `typesubstract`, the comparison is likely simple
-    if ignorelimited(typ) ⊑ ignorelimited(oldtyp)
-        # typ is better unlimited, but we may still need to compute the tmeet with the limit "causes" since we ignored those in the comparison
-        oldtyp isa LimitedAccuracy && (typ = tmerge(typ, LimitedAccuracy(Bottom, oldtyp.causes)))
-        return StateUpdate(SlotNumber(slot), VarState(typ, vtype.undef), state, true)
+    if iskindtype(typ)
+        # this code path corresponds to the special handling for `isa(x, iskindtype)` check
+        # implemented within `abstract_call_builtin`
+    elseif ⊑(𝕃ᵢ, ignorelimited(typ), ignorelimited(oldtyp))
+        # approximate test for `typ ∩ oldtyp` being better than `oldtyp`
+        # since we probably formed these types with `typesubstract`,
+        # the comparison is likely simple
+    else
+        return nothing
     end
-    return nothing
-end
-
-function bool_rt_to_conditional(@nospecialize(rt), slottypes::Vector{Any}, state::VarTable, slot_id::Int)
-    old = slottypes[slot_id]
-    new = widenconditional(state[slot_id].typ) # avoid nested conditional
-    if new ⊑ old && !(old ⊑ new)
-        if isa(rt, Const)
-            val = rt.val
-            if val === true
-                return InterConditional(slot_id, new, Bottom)
-            elseif val === false
-                return InterConditional(slot_id, Bottom, new)
-            end
-        elseif rt === Bool
-            return InterConditional(slot_id, new, new)
-        end
+    if oldtyp isa LimitedAccuracy
+        # typ is better unlimited, but we may still need to compute the tmeet with the limit
+        # "causes" since we ignored those in the comparison
+        typ = tmerge(𝕃ᵢ, typ, LimitedAccuracy(Bottom, oldtyp.causes))
     end
-    return rt
+    return StateUpdate(SlotNumber(slot), VarState(typ, vtype.undef), state, true)
 end
 
 # make as much progress on `frame` as possible (by handling cycles)
@@ -2571,7 +3385,7 @@ function typeinf_nocycle(interp::AbstractInterpreter, frame::InferenceState)
                 typeinf_local(interp, caller)
                 no_active_ips_in_callers = false
             end
-            caller.valid_worlds = intersect(caller.valid_worlds, frame.valid_worlds)
+            update_valid_age!(caller, frame.valid_worlds)
         end
     end
     return true
diff --git a/base/compiler/abstractlattice.jl b/base/compiler/abstractlattice.jl
new file mode 100644
index 0000000000000..18b6a5473abd0
--- /dev/null
+++ b/base/compiler/abstractlattice.jl
@@ -0,0 +1,308 @@
+# TODO add more documentations
+
+function widenlattice end
+function is_valid_lattice_norec end
+
+"""
+    struct JLTypeLattice <: AbstractLattice
+
+A singleton type representing the lattice of Julia types, without any inference extensions.
+"""
+struct JLTypeLattice <: AbstractLattice; end
+widenlattice(::JLTypeLattice) = error("Type lattice is the least-precise lattice available")
+is_valid_lattice_norec(::JLTypeLattice, @nospecialize(elem)) = isa(elem, Type)
+
+"""
+    struct ConstsLattice <: AbstractLattice
+
+A lattice extending `JLTypeLattice` and adjoining `Const` and `PartialTypeVar`.
+"""
+struct ConstsLattice <: AbstractLattice; end
+widenlattice(::ConstsLattice) = JLTypeLattice()
+is_valid_lattice_norec(::ConstsLattice, @nospecialize(elem)) = isa(elem, Const) || isa(elem, PartialTypeVar)
+
+"""
+    struct PartialsLattice{𝕃<:AbstractLattice} <: AbstractLattice
+
+A lattice extending a base lattice `𝕃` and adjoining `PartialStruct` and `PartialOpaque`.
+"""
+struct PartialsLattice{𝕃<:AbstractLattice} <: AbstractLattice
+    parent::𝕃
+end
+widenlattice(𝕃::PartialsLattice) = 𝕃.parent
+is_valid_lattice_norec(::PartialsLattice, @nospecialize(elem)) = isa(elem, PartialStruct) || isa(elem, PartialOpaque)
+
+"""
+    struct ConditionalsLattice{𝕃<:AbstractLattice} <: AbstractLattice
+
+A lattice extending a base lattice `𝕃` and adjoining `Conditional`.
+"""
+struct ConditionalsLattice{𝕃<:AbstractLattice} <: AbstractLattice
+    parent::𝕃
+end
+widenlattice(𝕃::ConditionalsLattice) = 𝕃.parent
+is_valid_lattice_norec(::ConditionalsLattice, @nospecialize(elem)) = isa(elem, Conditional)
+
+"""
+    struct InterConditionalsLattice{𝕃<:AbstractLattice} <: AbstractLattice
+
+A lattice extending a base lattice `𝕃` and adjoining `InterConditional`.
+"""
+struct InterConditionalsLattice{𝕃<:AbstractLattice} <: AbstractLattice
+    parent::𝕃
+end
+widenlattice(𝕃::InterConditionalsLattice) = 𝕃.parent
+is_valid_lattice_norec(::InterConditionalsLattice, @nospecialize(elem)) = isa(elem, InterConditional)
+
+"""
+    struct MustAliasesLattice{𝕃<:AbstractLattice}
+
+A lattice extending lattice `𝕃` and adjoining `MustAlias`.
+"""
+struct MustAliasesLattice{𝕃<:AbstractLattice} <: AbstractLattice
+    parent::𝕃
+end
+widenlattice(𝕃::MustAliasesLattice) = 𝕃.parent
+is_valid_lattice_norec(::MustAliasesLattice, @nospecialize(elem)) = isa(elem, MustAlias)
+
+"""
+    struct InterMustAliasesLattice{𝕃<:AbstractLattice}
+
+A lattice extending lattice `𝕃` and adjoining `InterMustAlias`.
+"""
+struct InterMustAliasesLattice{𝕃<:AbstractLattice} <: AbstractLattice
+    parent::𝕃
+end
+widenlattice(𝕃::InterMustAliasesLattice) = 𝕃.parent
+is_valid_lattice_norec(::InterMustAliasesLattice, @nospecialize(elem)) = isa(elem, InterMustAlias)
+
+const AnyConditionalsLattice{𝕃<:AbstractLattice} = Union{ConditionalsLattice{𝕃}, InterConditionalsLattice{𝕃}}
+const AnyMustAliasesLattice{𝕃<:AbstractLattice} = Union{MustAliasesLattice{𝕃}, InterMustAliasesLattice{𝕃}}
+
+const SimpleInferenceLattice = typeof(PartialsLattice(ConstsLattice()))
+const BaseInferenceLattice = typeof(ConditionalsLattice(SimpleInferenceLattice.instance))
+const IPOResultLattice = typeof(InterConditionalsLattice(SimpleInferenceLattice.instance))
+
+"""
+    struct InferenceLattice{𝕃<:AbstractLattice} <: AbstractLattice
+
+The full lattice used for abstract interpretation during inference.
+Extends a base lattice `𝕃` and adjoins `LimitedAccuracy`.
+"""
+struct InferenceLattice{𝕃<:AbstractLattice} <: AbstractLattice
+    parent::𝕃
+end
+widenlattice(𝕃::InferenceLattice) = 𝕃.parent
+is_valid_lattice_norec(::InferenceLattice, @nospecialize(elem)) = isa(elem, LimitedAccuracy)
+
+"""
+    tmeet(𝕃::AbstractLattice, a, b::Type)
+
+Compute the lattice meet of lattice elements `a` and `b` over the lattice `𝕃`,
+dropping any results that will not be inhabited at runtime.
+If `𝕃` is `JLTypeLattice`, this is equivalent to type intersection plus the
+elimination of results that have no concrete subtypes.
+Note that currently `b` is restricted to being a type
+(interpreted as a lattice element in the `JLTypeLattice` sub-lattice of `𝕃`).
+"""
+function tmeet end
+
+function tmeet(::JLTypeLattice, @nospecialize(a::Type), @nospecialize(b::Type))
+    ti = typeintersect(a, b)
+    valid_as_lattice(ti, true) || return Bottom
+    return ti
+end
+
+"""
+    tmerge(𝕃::AbstractLattice, a, b)
+
+Compute a lattice join of elements `a` and `b` over the lattice `𝕃`.
+Note that the computed element need not be the least upper bound of `a` and
+`b`, but rather, we impose additional limitations on the complexity of the
+joined element, ideally without losing too much precision in common cases and
+remaining mostly associative and commutative.
+"""
+function tmerge end
+
+"""
+    tmerge_field(𝕃::AbstractLattice, a, b) -> nothing or lattice element
+
+Compute a lattice join of elements `a` and `b` over the lattice `𝕃`,
+where `a` and `b` are fields of `PartialStruct` or `Const`.
+This is an opt-in interface to allow external lattice implementation to provide its own
+field-merge strategy. If it returns `nothing`, `tmerge(::PartialsLattice, ...)`
+will use the default aggressive type merge implementation that does not use `tmerge`
+recursively to reach convergence.
+"""
+function tmerge_field end
+
+function tmerge_field(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b))
+    return tmerge_field(widenlattice(𝕃), a, b)
+end
+tmerge_field(::JLTypeLattice, @nospecialize(a), @nospecialize(b)) = nothing
+
+"""
+    ⊑(𝕃::AbstractLattice, a, b)
+
+Compute the lattice ordering (i.e. less-than-or-equal) relationship between
+lattice elements `a` and `b` over the lattice `𝕃`.
+If `𝕃` is `JLTypeLattice`, this is equivalent to subtyping.
+"""
+function ⊑ end
+
+@nospecializeinfer ⊑(::JLTypeLattice, @nospecialize(a::Type), @nospecialize(b::Type)) = a <: b
+
+"""
+    ⊏(𝕃::AbstractLattice, a, b) -> Bool
+
+The strict partial order over the type inference lattice.
+This is defined as the irreflexive kernel of `⊑`.
+"""
+@nospecializeinfer ⊏(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) = ⊑(𝕃, a, b) && !⊑(𝕃, b, a)
+
+"""
+    ⋤(𝕃::AbstractLattice, a, b) -> Bool
+
+This order could be used as a slightly more efficient version of the strict order `⊏`,
+where we can safely assume `a ⊑ b` holds.
+"""
+@nospecializeinfer ⋤(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) = !⊑(𝕃, b, a)
+
+"""
+    is_lattice_equal(𝕃::AbstractLattice, a, b) -> Bool
+
+Check if two lattice elements are partial order equivalent.
+This is basically `a ⊑ b && b ⊑ a` in the lattice of `𝕃`
+but (optionally) with extra performance optimizations.
+"""
+@nospecializeinfer function is_lattice_equal(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b))
+    a === b && return true
+    return ⊑(𝕃, a, b) && ⊑(𝕃, b, a)
+end
+
+"""
+    has_nontrivial_extended_info(𝕃::AbstractLattice, t) -> Bool
+
+Determines whether the given lattice element `t` of `𝕃` has non-trivial extended lattice
+information that would not be available from the type itself.
+"""
+@nospecializeinfer has_nontrivial_extended_info(𝕃::AbstractLattice, @nospecialize t) =
+    has_nontrivial_extended_info(widenlattice(𝕃), t)
+@nospecializeinfer function has_nontrivial_extended_info(𝕃::PartialsLattice, @nospecialize t)
+    isa(t, PartialStruct) && return true
+    isa(t, PartialOpaque) && return true
+    return has_nontrivial_extended_info(widenlattice(𝕃), t)
+end
+@nospecializeinfer function has_nontrivial_extended_info(𝕃::ConstsLattice, @nospecialize t)
+    isa(t, PartialTypeVar) && return true
+    if isa(t, Const)
+        val = t.val
+        return !issingletontype(typeof(val)) && !(isa(val, Type) && hasuniquerep(val))
+    end
+    return has_nontrivial_extended_info(widenlattice(𝕃), t)
+end
+@nospecializeinfer has_nontrivial_extended_info(::JLTypeLattice, @nospecialize(t)) = false
+
+"""
+    is_const_prop_profitable_arg(𝕃::AbstractLattice, t) -> Bool
+
+Determines whether the given lattice element `t` of `𝕃` has new extended lattice information
+that should be forwarded along with constant propagation.
+"""
+@nospecializeinfer is_const_prop_profitable_arg(𝕃::AbstractLattice, @nospecialize t) =
+    is_const_prop_profitable_arg(widenlattice(𝕃), t)
+@nospecializeinfer function is_const_prop_profitable_arg(𝕃::PartialsLattice, @nospecialize t)
+    if isa(t, PartialStruct)
+        return true # might be a bit aggressive, may want to enable some check like follows:
+        # for i = 1:length(t.fields)
+        #     fld = t.fields[i]
+        #     isconstType(fld) && return true
+        #     is_const_prop_profitable_arg(fld) && return true
+        #     fld ⊏ fieldtype(t.typ, i) && return true
+        # end
+        # return false
+    end
+    isa(t, PartialOpaque) && return true
+    return is_const_prop_profitable_arg(widenlattice(𝕃), t)
+end
+@nospecializeinfer function is_const_prop_profitable_arg(𝕃::ConstsLattice, @nospecialize t)
+    if isa(t, Const)
+        # don't consider mutable values useful constants
+        val = t.val
+        return isa(val, Symbol) || isa(val, Type) || !ismutable(val)
+    end
+    isa(t, PartialTypeVar) && return false # this isn't forwardable
+    return is_const_prop_profitable_arg(widenlattice(𝕃), t)
+end
+@nospecializeinfer is_const_prop_profitable_arg(::JLTypeLattice, @nospecialize t) = false
+
+@nospecializeinfer is_forwardable_argtype(𝕃::AbstractLattice, @nospecialize(x)) =
+    is_forwardable_argtype(widenlattice(𝕃), x)
+@nospecializeinfer function is_forwardable_argtype(𝕃::ConditionalsLattice, @nospecialize x)
+    isa(x, Conditional) && return true
+    return is_forwardable_argtype(widenlattice(𝕃), x)
+end
+@nospecializeinfer function is_forwardable_argtype(𝕃::PartialsLattice, @nospecialize x)
+    isa(x, PartialStruct) && return true
+    isa(x, PartialOpaque) && return true
+    return is_forwardable_argtype(widenlattice(𝕃), x)
+end
+@nospecializeinfer function is_forwardable_argtype(𝕃::ConstsLattice, @nospecialize x)
+    isa(x, Const) && return true
+    return is_forwardable_argtype(widenlattice(𝕃), x)
+end
+@nospecializeinfer is_forwardable_argtype(::JLTypeLattice, @nospecialize x) = false
+
+"""
+    widenreturn(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo) -> new_bestguess
+    widenreturn_noslotwrapper(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo) -> new_bestguess
+
+Appropriately converts inferred type of a return value `rt` to such a type
+that we know we can store in the cache and is valid and good inter-procedurally,
+E.g. if `rt isa Conditional` then `rt` should be converted to `InterConditional`
+or the other cacheable lattice element.
+
+External lattice `𝕃ᵢ::ExternalLattice` may overload:
+- `widenreturn(𝕃ᵢ::ExternalLattice, @nospecialize(rt), info::BestguessInfo)`
+- `widenreturn_noslotwrapper(𝕃ᵢ::ExternalLattice, @nospecialize(rt), info::BestguessInfo)`
+"""
+function widenreturn end, function widenreturn_noslotwrapper end
+
+@nospecializeinfer is_valid_lattice(𝕃::AbstractLattice, @nospecialize(elem)) =
+    is_valid_lattice_norec(𝕃, elem) && is_valid_lattice(widenlattice(𝕃), elem)
+@nospecializeinfer is_valid_lattice(𝕃::JLTypeLattice, @nospecialize(elem)) = is_valid_lattice_norec(𝕃, elem)
+
+has_conditional(𝕃::AbstractLattice) = has_conditional(widenlattice(𝕃))
+has_conditional(::AnyConditionalsLattice) = true
+has_conditional(::JLTypeLattice) = false
+
+has_mustalias(𝕃::AbstractLattice) = has_mustalias(widenlattice(𝕃))
+has_mustalias(::AnyMustAliasesLattice) = true
+has_mustalias(::JLTypeLattice) = false
+
+has_extended_unionsplit(𝕃::AbstractLattice) = has_extended_unionsplit(widenlattice(𝕃))
+has_extended_unionsplit(::AnyMustAliasesLattice) = true
+has_extended_unionsplit(::JLTypeLattice) = false
+
+# Curried versions
+⊑(lattice::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊑(lattice, a, b)
+⊏(lattice::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊏(lattice, a, b)
+⋤(lattice::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⋤(lattice, a, b)
+
+# Fallbacks for external packages using these methods
+const fallback_lattice = InferenceLattice(BaseInferenceLattice.instance)
+const fallback_ipo_lattice = InferenceLattice(IPOResultLattice.instance)
+
+@nospecializeinfer @nospecialize(a) ⊑ @nospecialize(b) = ⊑(fallback_lattice, a, b)
+@nospecializeinfer @nospecialize(a) ⊏ @nospecialize(b) = ⊏(fallback_lattice, a, b)
+@nospecializeinfer @nospecialize(a) ⋤ @nospecialize(b) = ⋤(fallback_lattice, a, b)
+@nospecializeinfer tmeet(@nospecialize(a), @nospecialize(b)) = tmeet(fallback_lattice, a, b)
+@nospecializeinfer tmerge(@nospecialize(a), @nospecialize(b)) = tmerge(fallback_lattice, a, b)
+@nospecializeinfer is_lattice_equal(@nospecialize(a), @nospecialize(b)) = is_lattice_equal(fallback_lattice, a, b)
+
+# Widenlattice with argument
+widenlattice(::JLTypeLattice, @nospecialize(t)) = widenconst(t)
+function widenlattice(𝕃::AbstractLattice, @nospecialize(t))
+    is_valid_lattice_norec(𝕃, t) && return t
+    widenlattice(widenlattice(𝕃), t)
+end
diff --git a/base/compiler/bootstrap.jl b/base/compiler/bootstrap.jl
index f335cf31a8467..12c83df74fe50 100644
--- a/base/compiler/bootstrap.jl
+++ b/base/compiler/bootstrap.jl
@@ -5,17 +5,16 @@
 # especially try to make sure any recursive and leaf functions have concrete signatures,
 # since we won't be able to specialize & infer them at runtime
 
-time() = ccall(:jl_clock_now, Float64, ())
+let time() = ccall(:jl_clock_now, Float64, ())
 
-let
-    world = get_world_counter()
-    interp = NativeInterpreter(world)
+    interp = NativeInterpreter()
 
-    analyze_escapes_tt = Tuple{typeof(analyze_escapes), IRCode, Int, Bool, typeof(null_escape_cache)}
+    # analyze_escapes_tt = Tuple{typeof(analyze_escapes), IRCode, Int, TODO}
+    optimize_tt = Tuple{typeof(optimize), NativeInterpreter, OptimizationState{NativeInterpreter}, InferenceResult}
     fs = Any[
         # we first create caches for the optimizer, because they contain many loop constructions
         # and they're better to not run in interpreter even during bootstrapping
-        #=analyze_escapes_tt,=# run_passes,
+        #=analyze_escapes_tt,=# optimize_tt,
         # then we create caches for inference entries
         typeinf_ext, typeinf, typeinf_edge,
     ]
@@ -38,8 +37,9 @@ let
         else
             tt = Tuple{typeof(f), Vararg{Any}}
         end
-        for m in _methods_by_ftype(tt, 10, typemax(UInt))
+        for m in _methods_by_ftype(tt, 10, get_world_counter())::Vector
             # remove any TypeVars from the intersection
+            m = m::MethodMatch
             typ = Any[m.spec_types.parameters...]
             for i = 1:length(typ)
                 typ[i] = unwraptv(typ[i])
diff --git a/base/compiler/cicache.jl b/base/compiler/cicache.jl
index 294b1f0055f79..8332777e6d5bc 100644
--- a/base/compiler/cicache.jl
+++ b/base/compiler/cicache.jl
@@ -7,11 +7,11 @@ Internally, each `MethodInstance` keep a unique global cache of code instances
 that have been created for the given method instance, stratified by world age
 ranges. This struct abstracts over access to this cache.
 """
-struct InternalCodeCache
-end
+struct InternalCodeCache end
 
 function setindex!(cache::InternalCodeCache, ci::CodeInstance, mi::MethodInstance)
     ccall(:jl_mi_cache_insert, Cvoid, (Any, Any), mi, ci)
+    return cache
 end
 
 const GLOBAL_CI_CACHE = InternalCodeCache()
@@ -49,11 +49,11 @@ WorldView(wvc::WorldView, wr::WorldRange) = WorldView(wvc.cache, wr)
 WorldView(wvc::WorldView, args...) = WorldView(wvc.cache, args...)
 
 function haskey(wvc::WorldView{InternalCodeCache}, mi::MethodInstance)
-    ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, first(wvc.worlds), last(wvc.worlds))::Union{Nothing, CodeInstance} !== nothing
+    return ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, first(wvc.worlds), last(wvc.worlds)) !== nothing
 end
 
 function get(wvc::WorldView{InternalCodeCache}, mi::MethodInstance, default)
-    r = ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, first(wvc.worlds), last(wvc.worlds))::Union{Nothing, CodeInstance}
+    r = ccall(:jl_rettype_inferred, Any, (Any, UInt, UInt), mi, first(wvc.worlds), last(wvc.worlds))
     if r === nothing
         return default
     end
@@ -66,5 +66,7 @@ function getindex(wvc::WorldView{InternalCodeCache}, mi::MethodInstance)
     return r::CodeInstance
 end
 
-setindex!(wvc::WorldView{InternalCodeCache}, ci::CodeInstance, mi::MethodInstance) =
+function setindex!(wvc::WorldView{InternalCodeCache}, ci::CodeInstance, mi::MethodInstance)
     setindex!(wvc.cache, ci, mi)
+    return wvc
+end
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index 9d8de51729341..6868f3ea03477 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -6,7 +6,7 @@ using Core.Intrinsics, Core.IR
 
 import Core: print, println, show, write, unsafe_write, stdout, stderr,
              _apply_iterate, svec, apply_type, Builtin, IntrinsicFunction,
-             MethodInstance, CodeInstance, MethodMatch, PartialOpaque,
+             MethodInstance, CodeInstance, MethodTable, MethodMatch, PartialOpaque,
              TypeofVararg
 
 const getproperty = Core.getfield
@@ -28,9 +28,51 @@ include(mod, x) = Core.include(mod, x)
 macro inline()   Expr(:meta, :inline)   end
 macro noinline() Expr(:meta, :noinline) end
 
+macro _boundscheck() Expr(:boundscheck) end
+
 convert(::Type{Any}, Core.@nospecialize x) = x
 convert(::Type{T}, x::T) where {T} = x
 
+# These types are used by reflection.jl and expr.jl too, so declare them here.
+# Note that `@assume_effects` is available only after loading namedtuple.jl.
+abstract type MethodTableView end
+abstract type AbstractInterpreter end
+struct EffectsOverride
+    consistent::Bool
+    effect_free::Bool
+    nothrow::Bool
+    terminates_globally::Bool
+    terminates_locally::Bool
+    notaskstate::Bool
+    inaccessiblememonly::Bool
+    noub::Bool
+    noub_if_noinbounds::Bool
+end
+function EffectsOverride(
+    override::EffectsOverride =
+        EffectsOverride(false, false, false, false, false, false, false, false, false);
+    consistent::Bool = override.consistent,
+    effect_free::Bool = override.effect_free,
+    nothrow::Bool = override.nothrow,
+    terminates_globally::Bool = override.terminates_globally,
+    terminates_locally::Bool = override.terminates_locally,
+    notaskstate::Bool = override.notaskstate,
+    inaccessiblememonly::Bool = override.inaccessiblememonly,
+    noub::Bool = override.noub,
+    noub_if_noinbounds::Bool = override.noub_if_noinbounds)
+    return EffectsOverride(
+        consistent,
+        effect_free,
+        nothrow,
+        terminates_globally,
+        terminates_locally,
+        notaskstate,
+        inaccessiblememonly,
+        noub,
+        noub_if_noinbounds)
+end
+const NUM_EFFECTS_OVERRIDES = 9 # sync with julia.h
+
 # essential files and libraries
 include("essentials.jl")
 include("ctypes.jl")
@@ -38,9 +80,16 @@ include("generator.jl")
 include("reflection.jl")
 include("options.jl")
 
+ntuple(f, ::Val{0}) = ()
+ntuple(f, ::Val{1}) = (@inline; (f(1),))
+ntuple(f, ::Val{2}) = (@inline; (f(1), f(2)))
+ntuple(f, ::Val{3}) = (@inline; (f(1), f(2), f(3)))
+ntuple(f, ::Val{n}) where {n} = ntuple(f, n::Int)
+ntuple(f, n) = (Any[f(i) for i = 1:n]...,)
+
 # core operations & types
 function return_type end # promotion.jl expects this to exist
-is_return_type(@Core.nospecialize(f)) = f === return_type
+is_return_type(Core.@nospecialize(f)) = f === return_type
 include("promotion.jl")
 include("tuple.jl")
 include("pair.jl")
@@ -60,6 +109,21 @@ include("refvalue.jl")
 
 # the same constructor as defined in float.jl, but with a different name to avoid redefinition
 _Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
+# fld(x,y) == div(x,y) - ((x>=0) != (y>=0) && rem(x,y) != 0 ? 1 : 0)
+fld(x::T, y::T) where {T<:Unsigned} = div(x, y)
+function fld(x::T, y::T) where T<:Integer
+    d = div(x, y)
+    return d - (signbit(x ⊻ y) & (d * y != x))
+end
+# cld(x,y) = div(x,y) + ((x>0) == (y>0) && rem(x,y) != 0 ? 1 : 0)
+function cld(x::T, y::T) where T<:Unsigned
+    d = div(x, y)
+    return d + (d * y != x)
+end
+function cld(x::T, y::T) where T<:Integer
+    d = div(x, y)
+    return d + (((x > 0) == (y > 0)) & (d * y != x))
+end
 
 # checked arithmetic
 const checked_add = +
@@ -73,10 +137,12 @@ add_with_overflow(x::T, y::T) where {T<:SignedInt}   = checked_sadd_int(x, y)
 add_with_overflow(x::T, y::T) where {T<:UnsignedInt} = checked_uadd_int(x, y)
 add_with_overflow(x::Bool, y::Bool) = (x+y, false)
 
+include("cmem.jl")
 include("strings/lazy.jl")
 
 # core array operations
 include("indices.jl")
+include("genericmemory.jl")
 include("array.jl")
 include("abstractarray.jl")
 
@@ -92,27 +158,15 @@ using .Iterators: zip, enumerate
 using .Iterators: Flatten, Filter, product  # for generators
 include("namedtuple.jl")
 
-ntuple(f, ::Val{0}) = ()
-ntuple(f, ::Val{1}) = (@inline; (f(1),))
-ntuple(f, ::Val{2}) = (@inline; (f(1), f(2)))
-ntuple(f, ::Val{3}) = (@inline; (f(1), f(2), f(3)))
-ntuple(f, ::Val{n}) where {n} = ntuple(f, n::Int)
-ntuple(f, n) = (Any[f(i) for i = 1:n]...,)
-
 # core docsystem
 include("docs/core.jl")
 import Core.Compiler.CoreDocs
 Core.atdoc!(CoreDocs.docm)
 
 # sorting
-function sort end
-function sort! end
-function issorted end
-function sortperm end
 include("ordering.jl")
 using .Order
-include("sort.jl")
-using .Sort
+include("compiler/sort.jl")
 
 # We don't include some.jl, but this definition is still useful.
 something(x::Nothing, y...) = something(y...)
@@ -122,21 +176,32 @@ something(x::Any, y...) = x
 # compiler #
 ############
 
+if false
+    import Base: Base, @show
+else
+    macro show(ex...)
+        blk = Expr(:block)
+        for s in ex
+            push!(blk.args, :(println(stdout, $(QuoteNode(s)), " = ",
+                                              begin local value = $(esc(s)) end)))
+        end
+        isempty(ex) || push!(blk.args, :value)
+        blk
+    end
+end
+
 include("compiler/cicache.jl")
+include("compiler/methodtable.jl")
 include("compiler/effects.jl")
 include("compiler/types.jl")
 include("compiler/utilities.jl")
 include("compiler/validation.jl")
-include("compiler/methodtable.jl")
 
-function argextype end # imported by EscapeAnalysis
-function stmt_effect_free end # imported by EscapeAnalysis
-function alloc_array_ndims end # imported by EscapeAnalysis
-function try_compute_field end # imported by EscapeAnalysis
 include("compiler/ssair/basicblock.jl")
 include("compiler/ssair/domtree.jl")
 include("compiler/ssair/ir.jl")
 
+include("compiler/abstractlattice.jl")
 include("compiler/inferenceresult.jl")
 include("compiler/inferencestate.jl")
 
@@ -148,28 +213,13 @@ include("compiler/stmtinfo.jl")
 
 include("compiler/abstractinterpretation.jl")
 include("compiler/typeinfer.jl")
-include("compiler/optimize.jl") # TODO: break this up further + extract utilities
-
-# required for bootstrap because sort.jl uses extrema
-# to decide whether to dispatch to counting sort.
-#
-# TODO: remove it.
-function extrema(x::Array)
-    isempty(x) && throw(ArgumentError("collection must be non-empty"))
-    vmin = vmax = x[1]
-    for i in 2:length(x)
-        xi = x[i]
-        vmax = max(vmax, xi)
-        vmin = min(vmin, xi)
-    end
-    return vmin, vmax
-end
+include("compiler/optimize.jl")
 
 include("compiler/bootstrap.jl")
 ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel)
 
 include("compiler/parsing.jl")
-Core.eval(Core, :(_parse = Compiler.fl_parse))
+Core._setparser!(fl_parse)
 
 end # baremodule Compiler
 ))
diff --git a/base/compiler/effects.jl b/base/compiler/effects.jl
index 9e041dca3a733..a864925b23eb1 100644
--- a/base/compiler/effects.jl
+++ b/base/compiler/effects.jl
@@ -1,186 +1,350 @@
-struct TriState; state::UInt8; end
-const ALWAYS_FALSE     = TriState(0x00)
-const ALWAYS_TRUE      = TriState(0x01)
-const TRISTATE_UNKNOWN = TriState(0x02)
-
-function tristate_merge(old::TriState, new::TriState)
-    (old === ALWAYS_FALSE || new === ALWAYS_FALSE) && return ALWAYS_FALSE
-    old === TRISTATE_UNKNOWN && return old
-    return new
-end
-
 """
     effects::Effects
 
 Represents computational effects of a method call.
 
-The effects are composed of the following set of different properties:
-- `effects.consistent::TriState`: this method is guaranteed to return or terminate consistently
-- `effect_free::TriState`: this method is free from externally semantically visible side effects
-- `nothrow::TriState`: this method is guaranteed to not throw an exception
-- `terminates::TriState`: this method is guaranteed to terminate
-- `nonoverlayed::Bool`: indicates that any methods that may be called within this method
-  are not defined in an [overlayed method table](@ref OverlayMethodTable)
-- `notaskstate::TriState`: this method does not access any state bound to the current
+The effects are a composition of different effect bits that represent some program property
+of the method being analyzed. They are represented as `Bool` or `UInt8` bits with the
+following meanings:
+- `consistent::UInt8`:
+  * `ALWAYS_TRUE`: this method is guaranteed to return or terminate consistently.
+  * `ALWAYS_FALSE`: this method may be not return or terminate consistently, and there is
+    no need for further analysis with respect to this effect property as this conclusion
+    will not be refined anyway.
+  * `CONSISTENT_IF_NOTRETURNED`: the `:consistent`-cy of this method can later be refined to
+    `ALWAYS_TRUE` in a case when the return value of this method never involves newly
+    allocated mutable objects.
+  * `CONSISTENT_IF_INACCESSIBLEMEMONLY`: the `:consistent`-cy of this method can later be
+    refined to `ALWAYS_TRUE` in a case when `:inaccessiblememonly` is proven.
+- `effect_free::UInt8`:
+  * `ALWAYS_TRUE`: this method is free from externally semantically visible side effects.
+  * `ALWAYS_FALSE`: this method may not be free from externally semantically visible side effects, and there is
+    no need for further analysis with respect to this effect property as this conclusion
+    will not be refined anyway.
+  * `EFFECT_FREE_IF_INACCESSIBLEMEMONLY`: the `:effect-free`-ness of this method can later be
+    refined to `ALWAYS_TRUE` in a case when `:inaccessiblememonly` is proven.
+- `nothrow::Bool`: this method is guaranteed to not throw an exception.
+  If the execution of this method may raise `MethodError`s and similar exceptions, then
+  the method is not considered as `:nothrow`.
+  However, note that environment-dependent errors like `StackOverflowError` or `InterruptException`
+  are not modeled by this effect and thus a method that may result in `StackOverflowError`
+  does not necessarily need to taint `:nothrow` (although it should usually taint `:terminates` too).
+- `terminates::Bool`: this method is guaranteed to terminate.
+- `notaskstate::Bool`: this method does not access any state bound to the current
   task and may thus be moved to a different task without changing observable
   behavior. Note that this currently implies that `noyield` as well, since
   yielding modifies the state of the current task, though this may be split
   in the future.
-See [`Base.@assume_effects`](@ref) for more detailed explanation on the definitions of these properties.
-
-Along the abstract interpretation, `Effects` at each statement are analyzed locally and
-they are merged into the single global `Effects` that represents the entire effects of
-the analyzed method (see `tristate_merge!`).
-Each effect property is represented as tri-state and managed separately.
-The tri-state consists of `ALWAYS_TRUE`, `TRISTATE_UNKNOWN` and `ALWAYS_FALSE`, where they
-have the following meanings:
-- `ALWAYS_TRUE`: this method is guaranteed to not have this effect.
-- `ALWAYS_FALSE`: this method may have this effect, and there is no need to do any further
-  analysis w.r.t. this effect property as this conclusion will not be refined anyway.
-- `TRISTATE_UNKNOWN`: this effect property may still be refined to `ALWAYS_TRUE` or
-  `ALWAYS_FALSE`, e.g. using return type information.
-
-An effect property is initialized with `ALWAYS_TRUE` and then transitioned towards
-`ALWAYS_FALSE`. When we find a statement that has some effect, either of `TRISTATE_UNKNOWN`
-or `ALWAYS_FALSE` is propagated. Note that however, within the current flow-insensitive
-analysis design, it is usually difficult to derive a global conclusion accurately from local
-analysis on each statement, and therefore, the effect analysis usually propagates the
-`ALWAYS_FALSE` state conservatively.
+- `inaccessiblememonly::UInt8`:
+  * `ALWAYS_TRUE`: this method does not access or modify externally accessible mutable memory.
+    This state corresponds to LLVM's `inaccessiblememonly` function attribute.
+  * `ALWAYS_FALSE`: this method may access or modify externally accessible mutable memory.
+  * `INACCESSIBLEMEM_OR_ARGMEMONLY`: this method does not access or modify externally accessible mutable memory,
+    except that it may access or modify mutable memory pointed to by its call arguments.
+    This may later be refined to `ALWAYS_TRUE` in a case when call arguments are known to be immutable.
+    This state corresponds to LLVM's `inaccessiblemem_or_argmemonly` function attribute.
+- `noub::Bool`: indicates that the method will not execute any undefined behavior (for any input).
+  Note that undefined behavior may technically cause the method to violate any other effect
+  assertions (such as `:consistent` or `:effect_free`) as well, but we do not model this,
+  and they assume the absence of undefined behavior.
+- `nonoverlayed::Bool`: indicates that any methods that may be called within this method
+  are not defined in an [overlayed method table](@ref OverlayMethodTable).
+- `noinbounds::Bool`: If set, indicates that this method does not read the parent's `:inbounds`
+  state. In particular, it does not have any reached `:boundscheck` exprs, not propagates inbounds
+  to any children that do.
+
+Note that the representations above are just internal implementation details and thus likely
+to change in the future. See [`Base.@assume_effects`](@ref) for more detailed explanation
+on the definitions of these properties.
+
+Along the abstract interpretation, `Effects` at each statement are analyzed locally and they
+are merged into the single global `Effects` that represents the entire effects of the
+analyzed method (see the implementation of `merge_effects!`). Each effect property is
+initialized with `ALWAYS_TRUE`/`true` and then transitioned towards `ALWAYS_FALSE`/`false`.
+Note that within the current flow-insensitive analysis design, effects detected by local
+analysis on each statement usually taint the global conclusion conservatively.
+
+## Key for `show` output of Effects:
+
+The output represents the state of different effect properties in the following order:
+
+1. `consistent` (`c`):
+    - `+c` (green): `ALWAYS_TRUE`
+    - `-c` (red): `ALWAYS_FALSE`
+    - `?c` (yellow): `CONSISTENT_IF_NOTRETURNED` and/or `CONSISTENT_IF_INACCESSIBLEMEMONLY`
+2. `effect_free` (`e`):
+    - `+e` (green): `ALWAYS_TRUE`
+    - `-e` (red): `ALWAYS_FALSE`
+    - `?e` (yellow): `EFFECT_FREE_IF_INACCESSIBLEMEMONLY`
+3. `nothrow` (`n`):
+    - `+n` (green): `true`
+    - `-n` (red): `false`
+4. `terminates` (`t`):
+    - `+t` (green): `true`
+    - `-t` (red): `false`
+5. `notaskstate` (`s`):
+    - `+s` (green): `true`
+    - `-s` (red): `false`
+6. `inaccessiblememonly` (`m`):
+    - `+m` (green): `ALWAYS_TRUE`
+    - `-m` (red): `ALWAYS_FALSE`
+    - `?m` (yellow): `INACCESSIBLEMEM_OR_ARGMEMONLY`
+7. `noub` (`u`):
+    - `+u` (green): `true`
+    - `-u` (red): `false`
+8. `noinbounds` (`i`):
+    - `+i` (green): `true`
+    - `-i` (red): `false`
+
+Additionally, if the `nonoverlayed` property is false, a red prime symbol (′) is displayed after the tuple.
 """
 struct Effects
-    consistent::TriState
-    effect_free::TriState
-    nothrow::TriState
-    terminates::TriState
+    consistent::UInt8
+    effect_free::UInt8
+    nothrow::Bool
+    terminates::Bool
+    notaskstate::Bool
+    inaccessiblememonly::UInt8
+    noub::UInt8
     nonoverlayed::Bool
-    notaskstate::TriState
-    # This effect is currently only tracked in inference and modified
-    # :consistent before caching. We may want to track it in the future.
-    inbounds_taints_consistency::Bool
+    function Effects(
+        consistent::UInt8,
+        effect_free::UInt8,
+        nothrow::Bool,
+        terminates::Bool,
+        notaskstate::Bool,
+        inaccessiblememonly::UInt8,
+        noub::UInt8,
+        nonoverlayed::Bool)
+        return new(
+            consistent,
+            effect_free,
+            nothrow,
+            terminates,
+            notaskstate,
+            inaccessiblememonly,
+            noub,
+            nonoverlayed)
+    end
 end
-function Effects(
-    consistent::TriState,
-    effect_free::TriState,
-    nothrow::TriState,
-    terminates::TriState,
-    nonoverlayed::Bool,
-    notaskstate::TriState)
+
+const ALWAYS_TRUE  = 0x00
+const ALWAYS_FALSE = 0x01
+
+# :consistent-cy bits
+const CONSISTENT_IF_NOTRETURNED         = 0x01 << 1
+const CONSISTENT_IF_INACCESSIBLEMEMONLY = 0x01 << 2
+
+# :effect_free-ness bits
+const EFFECT_FREE_IF_INACCESSIBLEMEMONLY = 0x02
+
+"""
+`EFFECT_FREE_GLOBALLY` means that the statement is `:effect-free` and does not have a
+caller-visible effect, but may not be removed from the function itself. This may e.g.
+be used for effects that last only for the scope of the current function.
+"""
+const EFFECT_FREE_GLOBALLY = 0x03
+
+# :inaccessiblememonly bits
+const INACCESSIBLEMEM_OR_ARGMEMONLY = 0x01 << 1
+
+# :noub bits
+const NOUB_IF_NOINBOUNDS = 0x01 << 1
+
+const EFFECTS_TOTAL    = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  true,  true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  true)
+const EFFECTS_THROWS   = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  false, true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  true)
+const EFFECTS_UNKNOWN  = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, true) # unknown mostly, but it's not overlayed at least (e.g. it's not a call)
+const _EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, false) # unknown really
+
+function Effects(effects::Effects = _EFFECTS_UNKNOWN;
+    consistent::UInt8 = effects.consistent,
+    effect_free::UInt8 = effects.effect_free,
+    nothrow::Bool = effects.nothrow,
+    terminates::Bool = effects.terminates,
+    notaskstate::Bool = effects.notaskstate,
+    inaccessiblememonly::UInt8 = effects.inaccessiblememonly,
+    noub::UInt8 = effects.noub,
+    nonoverlayed::Bool = effects.nonoverlayed)
     return Effects(
         consistent,
         effect_free,
         nothrow,
         terminates,
-        nonoverlayed,
         notaskstate,
-        false)
+        inaccessiblememonly,
+        noub,
+        nonoverlayed)
 end
 
-const EFFECTS_TOTAL    = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_TRUE,  true,  ALWAYS_TRUE)
-const EFFECTS_THROWS   = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_FALSE, ALWAYS_TRUE,  true,  ALWAYS_TRUE)
-const EFFECTS_UNKNOWN  = Effects(ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_FALSE, true,  ALWAYS_FALSE)  # mostly unknown, but it's not overlayed at least (e.g. it's not a call)
-const EFFECTS_UNKNOWN′ = Effects(ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_FALSE, false, ALWAYS_FALSE) # unknown, really
-
-function Effects(e::Effects = EFFECTS_UNKNOWN′;
-    consistent::TriState = e.consistent,
-    effect_free::TriState = e.effect_free,
-    nothrow::TriState = e.nothrow,
-    terminates::TriState = e.terminates,
-    nonoverlayed::Bool = e.nonoverlayed,
-    notaskstate::TriState = e.notaskstate,
-    inbounds_taints_consistency::Bool = e.inbounds_taints_consistency)
+function is_better_effects(new::Effects, old::Effects)
+    any_improved = false
+    if new.consistent == ALWAYS_TRUE
+        any_improved |= old.consistent != ALWAYS_TRUE
+    else
+        if !iszero(new.consistent & CONSISTENT_IF_NOTRETURNED)
+            old.consistent == ALWAYS_TRUE && return false
+            any_improved |= iszero(old.consistent & CONSISTENT_IF_NOTRETURNED)
+        elseif !iszero(new.consistent & CONSISTENT_IF_INACCESSIBLEMEMONLY)
+            old.consistent == ALWAYS_TRUE && return false
+            any_improved |= iszero(old.consistent & CONSISTENT_IF_INACCESSIBLEMEMONLY)
+        else
+            return false
+        end
+    end
+    if new.effect_free == ALWAYS_TRUE
+        any_improved |= old.consistent != ALWAYS_TRUE
+    elseif new.effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+        old.effect_free == ALWAYS_TRUE && return false
+        any_improved |= old.effect_free != EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+    elseif new.effect_free != old.effect_free
+        return false
+    end
+    if new.nothrow
+        any_improved |= !old.nothrow
+    elseif new.nothrow != old.nothrow
+        return false
+    end
+    if new.terminates
+        any_improved |= !old.terminates
+    elseif new.terminates != old.terminates
+        return false
+    end
+    if new.notaskstate
+        any_improved |= !old.notaskstate
+    elseif new.notaskstate != old.notaskstate
+        return false
+    end
+    if new.inaccessiblememonly == ALWAYS_TRUE
+        any_improved |= old.inaccessiblememonly != ALWAYS_TRUE
+    elseif new.inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
+        old.inaccessiblememonly == ALWAYS_TRUE && return false
+        any_improved |= old.inaccessiblememonly != INACCESSIBLEMEM_OR_ARGMEMONLY
+    elseif new.inaccessiblememonly != old.inaccessiblememonly
+        return false
+    end
+    if new.noub == ALWAYS_TRUE
+        any_improved |= old.noub != ALWAYS_TRUE
+    elseif new.noub == NOUB_IF_NOINBOUNDS
+        old.noub == ALWAYS_TRUE && return false
+        any_improved |= old.noub != NOUB_IF_NOINBOUNDS
+    elseif new.noub != old.noub
+        return false
+    end
+    if new.nonoverlayed
+        any_improved |= !old.nonoverlayed
+    elseif new.nonoverlayed != old.nonoverlayed
+        return false
+    end
+    return any_improved
+end
+
+function merge_effects(old::Effects, new::Effects)
     return Effects(
-        consistent,
-        effect_free,
-        nothrow,
-        terminates,
-        nonoverlayed,
-        notaskstate,
-        inbounds_taints_consistency)
+        merge_effectbits(old.consistent, new.consistent),
+        merge_effectbits(old.effect_free, new.effect_free),
+        merge_effectbits(old.nothrow, new.nothrow),
+        merge_effectbits(old.terminates, new.terminates),
+        merge_effectbits(old.notaskstate, new.notaskstate),
+        merge_effectbits(old.inaccessiblememonly, new.inaccessiblememonly),
+        merge_effectbits(old.noub, new.noub),
+        merge_effectbits(old.nonoverlayed, new.nonoverlayed))
+end
+
+function merge_effectbits(old::UInt8, new::UInt8)
+    if old === ALWAYS_FALSE || new === ALWAYS_FALSE
+        return ALWAYS_FALSE
+    end
+    return old | new
 end
+merge_effectbits(old::Bool, new::Bool) = old & new
 
-is_consistent(effects::Effects)   = effects.consistent === ALWAYS_TRUE
-is_effect_free(effects::Effects)  = effects.effect_free === ALWAYS_TRUE
-is_nothrow(effects::Effects)      = effects.nothrow === ALWAYS_TRUE
-is_terminates(effects::Effects)   = effects.terminates === ALWAYS_TRUE
-is_notaskstate(effects::Effects)  = effects.notaskstate === ALWAYS_TRUE
-is_nonoverlayed(effects::Effects) = effects.nonoverlayed
+is_consistent(effects::Effects)          = effects.consistent === ALWAYS_TRUE
+is_effect_free(effects::Effects)         = effects.effect_free === ALWAYS_TRUE
+is_nothrow(effects::Effects)             = effects.nothrow
+is_terminates(effects::Effects)          = effects.terminates
+is_notaskstate(effects::Effects)         = effects.notaskstate
+is_inaccessiblememonly(effects::Effects) = effects.inaccessiblememonly === ALWAYS_TRUE
+is_noub(effects::Effects)                = effects.noub === ALWAYS_TRUE
+is_noub_if_noinbounds(effects::Effects)  = effects.noub === NOUB_IF_NOINBOUNDS
+is_nonoverlayed(effects::Effects)        = effects.nonoverlayed
 
-# implies :notaskstate, but not explicitly checked here
+# implies `is_notaskstate` & `is_inaccessiblememonly`, but not explicitly checked here
 is_foldable(effects::Effects) =
     is_consistent(effects) &&
+    (is_noub(effects) || is_noub_if_noinbounds(effects)) &&
     is_effect_free(effects) &&
     is_terminates(effects)
 
-is_total(effects::Effects) =
+is_foldable_nothrow(effects::Effects) =
     is_foldable(effects) &&
     is_nothrow(effects)
 
+# TODO add `is_noub` here?
 is_removable_if_unused(effects::Effects) =
     is_effect_free(effects) &&
     is_terminates(effects) &&
     is_nothrow(effects)
 
+is_finalizer_inlineable(effects::Effects) =
+    is_nothrow(effects) &&
+    is_notaskstate(effects)
+
+is_consistent_if_notreturned(effects::Effects)         = !iszero(effects.consistent & CONSISTENT_IF_NOTRETURNED)
+is_consistent_if_inaccessiblememonly(effects::Effects) = !iszero(effects.consistent & CONSISTENT_IF_INACCESSIBLEMEMONLY)
+
+is_effect_free_if_inaccessiblememonly(effects::Effects) = !iszero(effects.effect_free & EFFECT_FREE_IF_INACCESSIBLEMEMONLY)
+
+is_inaccessiblemem_or_argmemonly(effects::Effects) = effects.inaccessiblememonly === INACCESSIBLEMEM_OR_ARGMEMONLY
+
 function encode_effects(e::Effects)
-    return (e.consistent.state << 0) |
-           (e.effect_free.state << 2) |
-           (e.nothrow.state << 4) |
-           (e.terminates.state << 6) |
-           (UInt32(e.nonoverlayed) << 8) |
-           (UInt32(e.notaskstate.state) << 9)
-end
-function decode_effects(e::UInt32)
-    return Effects(
-        TriState((e >> 0) & 0x03),
-        TriState((e >> 2) & 0x03),
-        TriState((e >> 4) & 0x03),
-        TriState((e >> 6) & 0x03),
-        _Bool(   (e >> 8) & 0x01),
-        TriState((e >> 9) & 0x03),
-        false)
+    return ((e.consistent          % UInt32) << 0)  |
+           ((e.effect_free         % UInt32) << 3)  |
+           ((e.nothrow             % UInt32) << 5)  |
+           ((e.terminates          % UInt32) << 6)  |
+           ((e.notaskstate         % UInt32) << 7)  |
+           ((e.inaccessiblememonly % UInt32) << 8)  |
+           ((e.noub                % UInt32) << 10) |
+           ((e.nonoverlayed        % UInt32) << 12)
 end
 
-function tristate_merge(old::Effects, new::Effects)
+function decode_effects(e::UInt32)
     return Effects(
-        tristate_merge(
-            old.consistent, new.consistent),
-        tristate_merge(
-            old.effect_free, new.effect_free),
-        tristate_merge(
-            old.nothrow, new.nothrow),
-        tristate_merge(
-            old.terminates, new.terminates),
-        old.nonoverlayed & new.nonoverlayed,
-        tristate_merge(
-            old.notaskstate, new.notaskstate),
-        old.inbounds_taints_consistency | new.inbounds_taints_consistency)
-end
-
-struct EffectsOverride
-    consistent::Bool
-    effect_free::Bool
-    nothrow::Bool
-    terminates_globally::Bool
-    terminates_locally::Bool
-    notaskstate::Bool
+        UInt8((e >> 0) & 0x07),
+        UInt8((e >> 3) & 0x03),
+        _Bool((e >> 5) & 0x01),
+        _Bool((e >> 6) & 0x01),
+        _Bool((e >> 7) & 0x01),
+        UInt8((e >> 8) & 0x03),
+        UInt8((e >> 10) & 0x03),
+        _Bool((e >> 12) & 0x01))
 end
 
 function encode_effects_override(eo::EffectsOverride)
-    e = 0x00
-    eo.consistent && (e |= 0x01)
-    eo.effect_free && (e |= 0x02)
-    eo.nothrow && (e |= 0x04)
-    eo.terminates_globally && (e |= 0x08)
-    eo.terminates_locally && (e |= 0x10)
-    eo.notaskstate && (e |= 0x20)
+    e = 0x0000
+    eo.consistent          && (e |= (0x0001 << 0))
+    eo.effect_free         && (e |= (0x0001 << 1))
+    eo.nothrow             && (e |= (0x0001 << 2))
+    eo.terminates_globally && (e |= (0x0001 << 3))
+    eo.terminates_locally  && (e |= (0x0001 << 4))
+    eo.notaskstate         && (e |= (0x0001 << 5))
+    eo.inaccessiblememonly && (e |= (0x0001 << 6))
+    eo.noub                && (e |= (0x0001 << 7))
+    eo.noub_if_noinbounds  && (e |= (0x0001 << 8))
     return e
 end
 
-function decode_effects_override(e::UInt8)
+function decode_effects_override(e::UInt16)
     return EffectsOverride(
-        (e & 0x01) != 0x00,
-        (e & 0x02) != 0x00,
-        (e & 0x04) != 0x00,
-        (e & 0x08) != 0x00,
-        (e & 0x10) != 0x00,
-        (e & 0x20) != 0x00)
+        !iszero(e & (0x0001 << 0)),
+        !iszero(e & (0x0001 << 1)),
+        !iszero(e & (0x0001 << 2)),
+        !iszero(e & (0x0001 << 3)),
+        !iszero(e & (0x0001 << 4)),
+        !iszero(e & (0x0001 << 5)),
+        !iszero(e & (0x0001 << 6)),
+        !iszero(e & (0x0001 << 7)),
+        !iszero(e & (0x0001 << 8)))
 end
+
+decode_statement_effects_override(ssaflag::UInt32) =
+    decode_effects_override(UInt16((ssaflag >> NUM_IR_FLAGS) & (1 << NUM_EFFECTS_OVERRIDES - 1)))
diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl
index 1e570b943d968..86eed13686ae9 100644
--- a/base/compiler/inferenceresult.jl
+++ b/base/compiler/inferenceresult.jl
@@ -1,65 +1,98 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-function is_argtype_match(@nospecialize(given_argtype),
-                          @nospecialize(cache_argtype),
-                          overridden_by_const::Bool)
-    if is_forwardable_argtype(given_argtype)
-        return is_lattice_equal(given_argtype, cache_argtype)
-    end
-    return !overridden_by_const
+"""
+    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance) ->
+        (cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
+
+Returns argument types `cache_argtypes::Vector{Any}` for `linfo` that are in the native
+Julia type domain. `overridden_by_const::BitVector` is all `false` meaning that
+there is no additional extended lattice information there.
+
+    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, argtypes::ForwardableArgtypes) ->
+        (cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
+
+Returns cache-correct extended lattice argument types `cache_argtypes::Vector{Any}`
+for `linfo` given some `argtypes` accompanied by `overridden_by_const::BitVector`
+that marks which argument contains additional extended lattice information.
+
+In theory, there could be a `cache` containing a matching `InferenceResult`
+for the provided `linfo` and `given_argtypes`. The purpose of this function is
+to return a valid value for `cache_lookup(𝕃, linfo, argtypes, cache).argtypes`,
+so that we can construct cache-correct `InferenceResult`s in the first place.
+"""
+function matching_cache_argtypes end
+
+function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance)
+    mthd = isa(linfo.def, Method) ? linfo.def::Method : nothing
+    cache_argtypes = most_general_argtypes(mthd, linfo.specTypes)
+    return cache_argtypes, falses(length(cache_argtypes))
 end
 
-function is_forwardable_argtype(@nospecialize x)
-    return isa(x, Const) ||
-           isa(x, Conditional) ||
-           isa(x, PartialStruct) ||
-           isa(x, PartialOpaque)
+struct SimpleArgtypes <: ForwardableArgtypes
+    argtypes::Vector{Any}
 end
 
-# In theory, there could be a `cache` containing a matching `InferenceResult`
-# for the provided `linfo` and `given_argtypes`. The purpose of this function is
-# to return a valid value for `cache_lookup(linfo, argtypes, cache).argtypes`,
-# so that we can construct cache-correct `InferenceResult`s in the first place.
-function matching_cache_argtypes(
-    linfo::MethodInstance, (arginfo, sv)#=::Tuple{ArgInfo,InferenceState}=#)
-    (; fargs, argtypes) = arginfo
-    def = linfo.def
-    @assert isa(def, Method) # ensure the next line works
-    nargs::Int = def.nargs
-    cache_argtypes, overridden_by_const = matching_cache_argtypes(linfo, nothing)
+"""
+    matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, argtypes::SimpleArgtypes)
+
+The implementation for `argtypes` with general extended lattice information.
+This is supposed to be used for debugging and testing or external `AbstractInterpreter`
+usages and in general `matching_cache_argtypes(::MethodInstance, ::ConditionalArgtypes)`
+is more preferred it can forward `Conditional` information.
+"""
+function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, simple_argtypes::SimpleArgtypes)
+    (; argtypes) = simple_argtypes
     given_argtypes = Vector{Any}(undef, length(argtypes))
-    local condargs = nothing
-    for i in 1:length(argtypes)
-        argtype = argtypes[i]
-        # forward `Conditional` if it conveys a constraint on any other argument
-        if isa(argtype, Conditional) && fargs !== nothing
-            cnd = argtype
-            slotid = find_constrained_arg(cnd, fargs, sv)
-            if slotid !== nothing
-                # using union-split signature, we may be able to narrow down `Conditional`
-                sigt = widenconst(slotid > nargs ? argtypes[slotid] : cache_argtypes[slotid])
-                thentype = tmeet(cnd.thentype, sigt)
-                elsetype = tmeet(cnd.elsetype, sigt)
-                if thentype === Bottom && elsetype === Bottom
-                    # we accidentally proved this method match is impossible
-                    # TODO bail out here immediately rather than just propagating Bottom ?
-                    given_argtypes[i] = Bottom
-                else
-                    if condargs === nothing
-                        condargs = Tuple{Int,Int}[]
-                    end
-                    push!(condargs, (slotid, i))
-                    given_argtypes[i] = Conditional(slotid, thentype, elsetype)
-                end
-                continue
+    for i = 1:length(argtypes)
+        given_argtypes[i] = widenslotwrapper(argtypes[i])
+    end
+    given_argtypes = va_process_argtypes(𝕃, given_argtypes, linfo)
+    return pick_const_args(𝕃, linfo, given_argtypes)
+end
+
+function pick_const_args(𝕃::AbstractLattice, linfo::MethodInstance, given_argtypes::Vector{Any})
+    cache_argtypes, overridden_by_const = matching_cache_argtypes(𝕃, linfo)
+    return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes)
+end
+
+function pick_const_args!(𝕃::AbstractLattice, cache_argtypes::Vector{Any}, overridden_by_const::BitVector, given_argtypes::Vector{Any})
+    for i = 1:length(given_argtypes)
+        given_argtype = given_argtypes[i]
+        cache_argtype = cache_argtypes[i]
+        if !is_argtype_match(𝕃, given_argtype, cache_argtype, false)
+            # prefer the argtype we were given over the one computed from `linfo`
+            if (isa(given_argtype, PartialStruct) && isa(cache_argtype, Type) &&
+                !⊏(𝕃, given_argtype, cache_argtype))
+                # if the type information of this `PartialStruct` is less strict than
+                # declared method signature, narrow it down using `tmeet`
+                given_argtype = tmeet(𝕃, given_argtype, cache_argtype)
             end
+            cache_argtypes[i] = given_argtype
+            overridden_by_const[i] = true
         end
-        given_argtypes[i] = widenconditional(argtype)
     end
-    isva = def.isva
+    return cache_argtypes, overridden_by_const
+end
+
+function is_argtype_match(𝕃::AbstractLattice,
+                          @nospecialize(given_argtype),
+                          @nospecialize(cache_argtype),
+                          overridden_by_const::Bool)
+    if is_forwardable_argtype(𝕃, given_argtype)
+        return is_lattice_equal(𝕃, given_argtype, cache_argtype)
+    end
+    return !overridden_by_const
+end
+
+va_process_argtypes(𝕃::AbstractLattice, given_argtypes::Vector{Any}, mi::MethodInstance) =
+    va_process_argtypes(Returns(nothing), 𝕃, given_argtypes, mi)
+function va_process_argtypes(@specialize(va_handler!), 𝕃::AbstractLattice, given_argtypes::Vector{Any}, mi::MethodInstance)
+    def = mi.def
+    isva = isa(def, Method) ? def.isva : false
+    nargs = isa(def, Method) ? Int(def.nargs) : length(mi.specTypes.parameters)
     if isva || isvarargtype(given_argtypes[end])
         isva_given_argtypes = Vector{Any}(undef, nargs)
-        for i = 1:(nargs - isva)
+        for i = 1:(nargs-isva)
             isva_given_argtypes[i] = argtype_by_index(given_argtypes, i)
         end
         if isva
@@ -68,29 +101,13 @@ function matching_cache_argtypes(
             else
                 last = nargs
             end
-            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[last:end])
-            # invalidate `Conditional` imposed on varargs
-            if condargs !== nothing
-                for (slotid, i) in condargs
-                    if slotid ≥ last
-                        isva_given_argtypes[i] = widenconditional(isva_given_argtypes[i])
-                    end
-                end
-            end
+            isva_given_argtypes[nargs] = tuple_tfunc(𝕃, given_argtypes[last:end])
+            va_handler!(isva_given_argtypes, last)
         end
-        given_argtypes = isva_given_argtypes
+        return isva_given_argtypes
     end
-    @assert length(given_argtypes) == nargs
-    for i in 1:nargs
-        given_argtype = given_argtypes[i]
-        cache_argtype = cache_argtypes[i]
-        if !is_argtype_match(given_argtype, cache_argtype, false)
-            # prefer the argtype we were given over the one computed from `linfo`
-            cache_argtypes[i] = given_argtype
-            overridden_by_const[i] = true
-        end
-    end
-    return cache_argtypes, overridden_by_const
+    @assert length(given_argtypes) == nargs "invalid `given_argtypes` for `mi`"
+    return given_argtypes
 end
 
 function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(specTypes),
@@ -99,18 +116,16 @@ function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(spe
     isva = !toplevel && method.isva
     linfo_argtypes = Any[(unwrap_unionall(specTypes)::DataType).parameters...]
     nargs::Int = toplevel ? 0 : method.nargs
-    if !withfirst
-        # For opaque closure, the closure environment is processed elsewhere
-        nargs -= 1
-    end
+    # For opaque closure, the closure environment is processed elsewhere
+    withfirst || (nargs -= 1)
     cache_argtypes = Vector{Any}(undef, nargs)
     # First, if we're dealing with a varargs method, then we set the last element of `args`
     # to the appropriate `Tuple` type or `PartialStruct` instance.
     if !toplevel && isva
         if specTypes::Type == Tuple
+            linfo_argtypes = Any[Any for i = 1:nargs]
             if nargs > 1
-                linfo_argtypes = Any[Any for i = 1:nargs]
-                linfo_argtypes[end] = Vararg{Any}
+                linfo_argtypes[end] = Tuple
             end
             vargtype = Tuple
         else
@@ -132,14 +147,14 @@ function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(spe
                 end
                 for i in 1:length(vargtype_elements)
                     atyp = vargtype_elements[i]
-                    if isa(atyp, DataType) && isdefined(atyp, :instance)
+                    if issingletontype(atyp)
                         # replace singleton types with their equivalent Const object
                         vargtype_elements[i] = Const(atyp.instance)
                     elseif isconstType(atyp)
                         vargtype_elements[i] = Const(atyp.parameters[1])
                     end
                 end
-                vargtype = tuple_tfunc(vargtype_elements)
+                vargtype = tuple_tfunc(fallback_lattice, vargtype_elements)
             end
         end
         cache_argtypes[nargs] = vargtype
@@ -161,7 +176,7 @@ function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(spe
                 tail_index -= 1
             end
             atyp = unwraptv(atyp)
-            if isa(atyp, DataType) && isdefined(atyp, :instance)
+            if issingletontype(atyp)
                 # replace singleton types with their equivalent Const object
                 atyp = Const(atyp.instance)
             elseif isconstType(atyp)
@@ -194,37 +209,29 @@ function elim_free_typevars(@nospecialize t)
     end
 end
 
-function matching_cache_argtypes(linfo::MethodInstance, ::Nothing)
-    mthd = isa(linfo.def, Method) ? linfo.def::Method : nothing
-    cache_argtypes = most_general_argtypes(mthd, linfo.specTypes)
-    return cache_argtypes, falses(length(cache_argtypes))
-end
-
-function cache_lookup(linfo::MethodInstance, given_argtypes::Vector{Any}, cache::Vector{InferenceResult})
+function cache_lookup(𝕃::AbstractLattice, linfo::MethodInstance, given_argtypes::Vector{Any}, cache::Vector{InferenceResult})
     method = linfo.def::Method
-    nargs::Int = method.nargs
+    nargs = Int(method.nargs)
     method.isva && (nargs -= 1)
-    length(given_argtypes) >= nargs || return nothing
+    length(given_argtypes) ≥ nargs || return nothing
     for cached_result in cache
         cached_result.linfo === linfo || continue
-        cache_match = true
         cache_argtypes = cached_result.argtypes
         cache_overridden_by_const = cached_result.overridden_by_const
         for i in 1:nargs
-            if !is_argtype_match(given_argtypes[i],
-                                 cache_argtypes[i],
-                                 cache_overridden_by_const[i])
-                cache_match = false
-                break
+            if !is_argtype_match(𝕃, widenmustalias(given_argtypes[i]),
+                                 cache_argtypes[i], cache_overridden_by_const[i])
+                @goto next_cache
             end
         end
-        if method.isva && cache_match
-            cache_match = is_argtype_match(tuple_tfunc(given_argtypes[(nargs + 1):end]),
-                                           cache_argtypes[end],
-                                           cache_overridden_by_const[end])
+        if method.isva
+            if !is_argtype_match(𝕃, tuple_tfunc(𝕃, given_argtypes[(nargs + 1):end]),
+                                 cache_argtypes[end], cache_overridden_by_const[end])
+                @goto next_cache
+            end
         end
-        cache_match || continue
         return cached_result
+        @label next_cache
     end
     return nothing
 end
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index 45e75ed05573a..1961099e1d02f 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -1,22 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# The type of a variable load is either a value or an UndefVarError
-# (only used in abstractinterpret, doesn't appear in optimize)
-struct VarState
-    typ
-    undef::Bool
-    VarState(@nospecialize(typ), undef::Bool) = new(typ, undef)
-end
-
-"""
-    const VarTable = Vector{VarState}
-
-The extended lattice that maps local variables to inferred type represented as `AbstractLattice`.
-Each index corresponds to the `id` of `SlotNumber` which identifies each local variable.
-Note that `InferenceState` will maintain multiple `VarTable`s at each SSA statement
-to enable flow-sensitive analysis.
-"""
-const VarTable = Vector{VarState}
+# data structures
+# ===============
 
 mutable struct BitSetBoundedMinPrioritySet <: AbstractSet{Int}
     elems::BitSet
@@ -80,27 +65,175 @@ function in(idx::Int, bsbmp::BitSetBoundedMinPrioritySet)
     return idx in bsbmp.elems
 end
 
+iterate(bsbmp::BitSetBoundedMinPrioritySet, s...) = iterate(bsbmp.elems, s...)
+
+function append!(bsbmp::BitSetBoundedMinPrioritySet, itr)
+    for val in itr
+        push!(bsbmp, val)
+    end
+end
+
+mutable struct TwoPhaseVectorView <: AbstractVector{Int}
+    const data::Vector{Int}
+    count::Int
+    const range::UnitRange{Int}
+end
+size(tpvv::TwoPhaseVectorView) = (tpvv.count,)
+function getindex(tpvv::TwoPhaseVectorView, i::Int)
+    checkbounds(tpvv, i)
+    @inbounds tpvv.data[first(tpvv.range) + i - 1]
+end
+function push!(tpvv::TwoPhaseVectorView, v::Int)
+    tpvv.count += 1
+    tpvv.data[first(tpvv.range) + tpvv.count - 1] = v
+    return nothing
+end
+
+"""
+    mutable struct TwoPhaseDefUseMap
+
+This struct is intended as a memory- and GC-pressure-efficient mechanism
+for incrementally computing def-use maps. The idea is that the def-use map
+is constructed into two passes over the IR. In the first, we simply count the
+the number of uses, computing the number of uses for each def as well as the
+total number of uses. In the second pass, we actually fill in the def-use
+information.
+
+The idea is that either of these two phases can be combined with other useful
+work that needs to scan the instruction stream anyway, while avoiding the
+significant allocation pressure of e.g. allocating an array for every SSA value
+or attempting to dynamically move things around as new uses are discovered.
+
+The def-use map is presented as a vector of vectors. For every def, indexing
+into the map will return a vector of uses.
+"""
+mutable struct TwoPhaseDefUseMap <: AbstractVector{TwoPhaseVectorView}
+    ssa_uses::Vector{Int}
+    data::Vector{Int}
+    complete::Bool
+end
+
+function complete!(tpdum::TwoPhaseDefUseMap)
+    cumsum = 0
+    for i = 1:length(tpdum.ssa_uses)
+        this_val = cumsum + 1
+        cumsum += tpdum.ssa_uses[i]
+        tpdum.ssa_uses[i] = this_val
+    end
+    resize!(tpdum.data, cumsum)
+    fill!(tpdum.data, 0)
+    tpdum.complete = true
+end
+
+function TwoPhaseDefUseMap(nssas::Int)
+    ssa_uses = zeros(Int, nssas)
+    data = Int[]
+    complete = false
+    return TwoPhaseDefUseMap(ssa_uses, data, complete)
+end
+
+function count!(tpdum::TwoPhaseDefUseMap, arg::SSAValue)
+    @assert !tpdum.complete
+    tpdum.ssa_uses[arg.id] += 1
+end
+
+function kill_def_use!(tpdum::TwoPhaseDefUseMap, def::Int, use::Int)
+    if !tpdum.complete
+        tpdum.ssa_uses[def] -= 1
+    else
+        range = tpdum.ssa_uses[def]:(def == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[def + 1] - 1))
+        # TODO: Sorted
+        useidx = findfirst(idx->tpdum.data[idx] == use, range)
+        @assert useidx !== nothing
+        idx = range[useidx]
+        while idx < lastindex(range)
+            ndata = tpdum.data[idx+1]
+            ndata == 0 && break
+            tpdum.data[idx] = ndata
+            idx += 1
+        end
+        tpdum.data[idx] = 0
+    end
+end
+kill_def_use!(tpdum::TwoPhaseDefUseMap, def::SSAValue, use::Int) =
+    kill_def_use!(tpdum, def.id, use)
+
+function getindex(tpdum::TwoPhaseDefUseMap, idx::Int)
+    @assert tpdum.complete
+    range = tpdum.ssa_uses[idx]:(idx == length(tpdum.ssa_uses) ? length(tpdum.data) : (tpdum.ssa_uses[idx + 1] - 1))
+    # TODO: Make logarithmic
+    nelems = 0
+    for i in range
+        tpdum.data[i] == 0 && break
+        nelems += 1
+    end
+    return TwoPhaseVectorView(tpdum.data, nelems, range)
+end
+
+mutable struct LazyGenericDomtree{IsPostDom}
+    ir::IRCode
+    domtree::GenericDomTree{IsPostDom}
+    LazyGenericDomtree{IsPostDom}(ir::IRCode) where {IsPostDom} = new{IsPostDom}(ir)
+end
+function get!(x::LazyGenericDomtree{IsPostDom}) where {IsPostDom}
+    isdefined(x, :domtree) && return x.domtree
+    return @timeit "domtree 2" x.domtree = IsPostDom ?
+        construct_postdomtree(x.ir.cfg.blocks) :
+        construct_domtree(x.ir.cfg.blocks)
+end
+
+const LazyDomtree = LazyGenericDomtree{false}
+const LazyPostDomtree = LazyGenericDomtree{true}
+
+# InferenceState
+# ==============
+
+"""
+    const VarTable = Vector{VarState}
+
+The extended lattice that maps local variables to inferred type represented as `AbstractLattice`.
+Each index corresponds to the `id` of `SlotNumber` which identifies each local variable.
+Note that `InferenceState` will maintain multiple `VarTable`s at each SSA statement
+to enable flow-sensitive analysis.
+"""
+const VarTable = Vector{VarState}
+
+const CACHE_MODE_NULL     = 0x00      # not cached, without optimization
+const CACHE_MODE_GLOBAL   = 0x01 << 0 # cached globally, optimization allowed
+const CACHE_MODE_LOCAL    = 0x01 << 1 # cached locally, optimization allowed
+const CACHE_MODE_VOLATILE = 0x01 << 2 # not cached, optimization allowed
+
+mutable struct TryCatchFrame
+    exct
+    scopet
+    const enter_idx::Int
+    scope_uses::Vector{Int}
+    TryCatchFrame(@nospecialize(exct), @nospecialize(scopet), enter_idx::Int) = new(exct, scopet, enter_idx)
+end
+
 mutable struct InferenceState
     #= information about this method instance =#
     linfo::MethodInstance
     world::UInt
     mod::Module
-    sptypes::Vector{Any}
+    sptypes::Vector{VarState}
     slottypes::Vector{Any}
     src::CodeInfo
     cfg::CFG
+    method_info::MethodInfo
 
     #= intermediate states for local abstract interpretation =#
     currbb::Int
     currpc::Int
     ip::BitSet#=TODO BoundedMinPrioritySet=# # current active instruction pointers
-    handler_at::Vector{Int} # current exception handler info
+    handlers::Vector{TryCatchFrame}
+    handler_at::Vector{Tuple{Int, Int}} # tuple of current (handler, exception stack) value at the pc
     ssavalue_uses::Vector{BitSet} # ssavalue sparsity and restart info
     # TODO: Could keep this sparsely by doing structural liveness analysis ahead of time.
     bb_vartables::Vector{Union{Nothing,VarTable}} # nothing if not analyzed yet
     ssavaluetypes::Vector{Any}
     stmt_edges::Vector{Union{Nothing,Vector{Any}}}
-    stmt_info::Vector{Any}
+    stmt_info::Vector{CallInfo}
 
     #= intermediate states for interprocedural abstract interpretation =#
     pclimitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on currpc ssavalue
@@ -108,29 +241,30 @@ mutable struct InferenceState
     cycle_backedges::Vector{Tuple{InferenceState, Int}} # call-graph backedges connecting from callee to caller
     callers_in_cycle::Vector{InferenceState}
     dont_work_on_me::Bool
-    parent::Union{Nothing, InferenceState}
-    inferred::Bool # TODO move this to InferenceResult?
+    parent # ::Union{Nothing,AbsIntState}
 
     #= results =#
     result::InferenceResult # remember where to put the result
+    unreachable::BitSet # statements that were found to be statically unreachable
     valid_worlds::WorldRange
     bestguess #::Type
+    exc_bestguess
     ipo_effects::Effects
 
     #= flags =#
-    params::InferenceParams
     # Whether to restrict inference of abstract call sites to avoid excessive work
     # Set by default for toplevel frame.
     restrict_abstract_call_sites::Bool
-    cached::Bool # TODO move this to InferenceResult?
+    cache_mode::UInt8 # TODO move this to InferenceResult?
+    insert_coverage::Bool
 
     # The interpreter that created this inference state. Not looked at by
     # NativeInterpreter. But other interpreters may use this to detect cycles
     interp::AbstractInterpreter
 
     # src is assumed to be a newly-allocated CodeInfo, that can be modified in-place to contain intermediate results
-    function InferenceState(result::InferenceResult, src::CodeInfo, cache::Symbol,
-        interp::AbstractInterpreter)
+    function InferenceState(result::InferenceResult, src::CodeInfo, cache_mode::UInt8,
+                            interp::AbstractInterpreter)
         linfo = result.linfo
         world = get_world_counter(interp)
         def = linfo.def
@@ -138,15 +272,16 @@ mutable struct InferenceState
         sptypes = sptypes_from_meth_instance(linfo)
         code = src.code::Vector{Any}
         cfg = compute_basic_blocks(code)
+        method_info = MethodInfo(src)
 
         currbb = currpc = 1
         ip = BitSet(1) # TODO BitSetBoundedMinPrioritySet(1)
-        handler_at = compute_trycatch(code, BitSet())
+        handler_at, handlers = compute_trycatch(code, BitSet())
         nssavalues = src.ssavaluetypes::Int
         ssavalue_uses = find_ssavalue_uses(code, nssavalues)
         nstmts = length(code)
         stmt_edges = Union{Nothing, Vector{Any}}[ nothing for i = 1:nstmts ]
-        stmt_info = Any[ nothing for i = 1:nstmts ]
+        stmt_info = CallInfo[ NoCallInfo() for i = 1:nstmts ]
 
         nslots = length(src.slotflags)
         slottypes = Vector{Any}(undef, nslots)
@@ -161,92 +296,53 @@ mutable struct InferenceState
         end
         src.ssavaluetypes = ssavaluetypes = Any[ NOT_FOUND for i = 1:nssavalues ]
 
+        unreachable = BitSet()
         pclimitations = IdSet{InferenceState}()
         limitations = IdSet{InferenceState}()
         cycle_backedges = Vector{Tuple{InferenceState,Int}}()
         callers_in_cycle = Vector{InferenceState}()
         dont_work_on_me = false
         parent = nothing
-        inferred = false
 
         valid_worlds = WorldRange(src.min_world, src.max_world == typemax(UInt) ? get_world_counter() : src.max_world)
         bestguess = Bottom
-        # TODO: Currently, any :inbounds declaration taints consistency,
-        #       because we cannot be guaranteed whether or not boundschecks
-        #       will be eliminated and if they are, we cannot be guaranteed
-        #       that no undefined behavior will occur (the effects assumptions
-        #       are stronger than the inbounds assumptions, since the latter
-        #       requires dynamic reachability, while the former is global).
-        inbounds = inbounds_option()
-        inbounds_taints_consistency = !(inbounds === :on || (inbounds === :default && !any_inbounds(code)))
-        consistent = inbounds_taints_consistency ? ALWAYS_FALSE : ALWAYS_TRUE
-        ipo_effects = Effects(EFFECTS_TOTAL; consistent, inbounds_taints_consistency)
-
-        params = InferenceParams(interp)
-        restrict_abstract_call_sites = isa(linfo.def, Module)
-        @assert cache === :no || cache === :local || cache === :global
-        cached = cache === :global
-
-        frame = new(
-            linfo, world, mod, sptypes, slottypes, src, cfg,
-            currbb, currpc, ip, handler_at, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info,
-            pclimitations, limitations, cycle_backedges, callers_in_cycle, dont_work_on_me, parent, inferred,
-            result, valid_worlds, bestguess, ipo_effects,
-            params, restrict_abstract_call_sites, cached,
-            interp)
+        exc_bestguess = Bottom
+        ipo_effects = EFFECTS_TOTAL
 
-        # some more setups
-        params.unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
-        result.result = frame
-        cache !== :no && push!(get_inference_cache(interp), result)
-
-        return frame
-    end
-end
+        insert_coverage = should_insert_coverage(mod, src)
+        if insert_coverage
+            ipo_effects = Effects(ipo_effects; effect_free = ALWAYS_FALSE)
+        end
 
-Effects(state::InferenceState) = state.ipo_effects
+        if def isa Method
+            ipo_effects = Effects(ipo_effects; nonoverlayed=is_nonoverlayed(def))
+        end
 
-function tristate_merge!(caller::InferenceState, effects::Effects)
-    caller.ipo_effects = tristate_merge(caller.ipo_effects, effects)
-end
-tristate_merge!(caller::InferenceState, callee::InferenceState) =
-    tristate_merge!(caller, Effects(callee))
+        restrict_abstract_call_sites = isa(def, Module)
 
-is_effect_overridden(sv::InferenceState, effect::Symbol) = is_effect_overridden(sv.linfo, effect)
-function is_effect_overridden(linfo::MethodInstance, effect::Symbol)
-    def = linfo.def
-    return isa(def, Method) && is_effect_overridden(def, effect)
-end
-is_effect_overridden(method::Method, effect::Symbol) = is_effect_overridden(decode_effects_override(method.purity), effect)
-is_effect_overridden(override::EffectsOverride, effect::Symbol) = getfield(override, effect)
-
-function InferenceResult(
-    linfo::MethodInstance,
-    arginfo::Union{Nothing,Tuple{ArgInfo,InferenceState}} = nothing)
-    return _InferenceResult(linfo, arginfo)
+        # some more setups
+        InferenceParams(interp).unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
+        !iszero(cache_mode & CACHE_MODE_LOCAL) && push!(get_inference_cache(interp), result)
+
+        return new(
+            linfo, world, mod, sptypes, slottypes, src, cfg, method_info,
+            currbb, currpc, ip, handlers, handler_at, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info,
+            pclimitations, limitations, cycle_backedges, callers_in_cycle, dont_work_on_me, parent,
+            result, unreachable, valid_worlds, bestguess, exc_bestguess, ipo_effects,
+            restrict_abstract_call_sites, cache_mode, insert_coverage,
+            interp)
+    end
 end
 
-add_remark!(::AbstractInterpreter, sv::InferenceState, remark) = return
-
-function bail_out_toplevel_call(::AbstractInterpreter, @nospecialize(callsig), sv::InferenceState)
-    return sv.restrict_abstract_call_sites && !isdispatchtuple(callsig)
-end
-function bail_out_call(::AbstractInterpreter, @nospecialize(rt), sv::InferenceState)
-    return rt === Any
-end
-function bail_out_apply(::AbstractInterpreter, @nospecialize(rt), sv::InferenceState)
-    return rt === Any
-end
+is_nonoverlayed(m::Method) = !isdefined(m, :external_mt)
+is_nonoverlayed(interp::AbstractInterpreter) = !isoverlayed(method_table(interp))
+isoverlayed(::MethodTableView) = error("unsatisfied MethodTableView interface")
+isoverlayed(::InternalMethodTable) = false
+isoverlayed(::OverlayMethodTable) = true
+isoverlayed(mt::CachedMethodTable) = isoverlayed(mt.table)
 
-function any_inbounds(code::Vector{Any})
-    for i=1:length(code)
-        stmt = code[i]
-        if isa(stmt, Expr) && stmt.head === :inbounds
-            return true
-        end
-    end
-    return false
-end
+is_inferred(sv::InferenceState) = is_inferred(sv.result)
+is_inferred(result::InferenceResult) = result.result !== nothing
 
 was_reached(sv::InferenceState, pc::Int) = sv.ssavaluetypes[pc] !== NOT_FOUND
 
@@ -254,75 +350,96 @@ function compute_trycatch(code::Vector{Any}, ip::BitSet)
     # The goal initially is to record the frame like this for the state at exit:
     # 1: (enter 3) # == 0
     # 3: (expr)    # == 1
-    # 3: (leave 1) # == 1
+    # 3: (leave %1) # == 1
     # 4: (expr)    # == 0
-    # then we can find all trys by walking backwards from :enter statements,
-    # and all catches by looking at the statement after the :enter
+    # then we can find all `try`s by walking backwards from :enter statements,
+    # and all `catch`es by looking at the statement after the :enter
     n = length(code)
     empty!(ip)
     ip.offset = 0 # for _bits_findnext
     push!(ip, n + 1)
-    handler_at = fill(0, n)
+    handler_at = fill((0, 0), n)
+    handlers = TryCatchFrame[]
 
     # start from all :enter statements and record the location of the try
     for pc = 1:n
         stmt = code[pc]
-        if isexpr(stmt, :enter)
-            l = stmt.args[1]::Int
-            handler_at[pc + 1] = pc
+        if isa(stmt, EnterNode)
+            l = stmt.catch_dest
+            push!(handlers, TryCatchFrame(Bottom, isdefined(stmt, :scope) ? Bottom : nothing, pc))
+            handler_id = length(handlers)
+            handler_at[pc + 1] = (handler_id, 0)
             push!(ip, pc + 1)
-            handler_at[l] = pc
-            push!(ip, l)
+            if l != 0
+                handler_at[l] = (0, handler_id)
+                push!(ip, l)
+            end
         end
     end
 
     # now forward those marks to all :leave statements
-    pc´´ = 0
     while true
         # make progress on the active ip set
-        pc = _bits_findnext(ip.bits, pc´´)::Int
+        pc = _bits_findnext(ip.bits, 0)::Int
         pc > n && break
         while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
             pc´ = pc + 1 # next program-counter (after executing instruction)
-            if pc == pc´´
-                pc´´ = pc´
-            end
             delete!(ip, pc)
-            cur_hand = handler_at[pc]
-            @assert cur_hand != 0 "unbalanced try/catch"
+            cur_stacks = handler_at[pc]
+            @assert cur_stacks != (0, 0) "unbalanced try/catch"
             stmt = code[pc]
             if isa(stmt, GotoNode)
                 pc´ = stmt.label
             elseif isa(stmt, GotoIfNot)
                 l = stmt.dest::Int
-                if handler_at[l] != cur_hand
-                    @assert handler_at[l] == 0 "unbalanced try/catch"
-                    handler_at[l] = cur_hand
-                    if l < pc´´
-                        pc´´ = l
-                    end
+                if handler_at[l] != cur_stacks
+                    @assert handler_at[l][1] == 0 || handler_at[l][1] == cur_stacks[1] "unbalanced try/catch"
+                    handler_at[l] = cur_stacks
                     push!(ip, l)
                 end
             elseif isa(stmt, ReturnNode)
-                @assert !isdefined(stmt, :val) "unbalanced try/catch"
+                @assert !isdefined(stmt, :val) || cur_stacks[1] == 0 "unbalanced try/catch"
                 break
+            elseif isa(stmt, EnterNode)
+                l = stmt.catch_dest
+                # We assigned a handler number above. Here we just merge that
+                # with out current handler information.
+                if l != 0
+                    handler_at[l] = (cur_stacks[1], handler_at[l][2])
+                end
+                cur_stacks = (handler_at[pc´][1], cur_stacks[2])
             elseif isa(stmt, Expr)
                 head = stmt.head
-                if head === :enter
-                    cur_hand = pc
-                elseif head === :leave
-                    l = stmt.args[1]::Int
+                if head === :leave
+                    l = 0
+                    for j = 1:length(stmt.args)
+                        arg = stmt.args[j]
+                        if arg === nothing
+                            continue
+                        else
+                            enter_stmt = code[(arg::SSAValue).id]
+                            if enter_stmt === nothing
+                                continue
+                            end
+                            @assert isa(enter_stmt, EnterNode) "malformed :leave"
+                        end
+                        l += 1
+                    end
+                    cur_hand = cur_stacks[1]
                     for i = 1:l
-                        cur_hand = handler_at[cur_hand]
+                        cur_hand = handler_at[handlers[cur_hand].enter_idx][1]
                     end
-                    cur_hand == 0 && break
+                    cur_stacks = (cur_hand, cur_stacks[2])
+                    cur_stacks == (0, 0) && break
+                elseif head === :pop_exception
+                    cur_stacks = (cur_stacks[1], handler_at[(stmt.args[1]::SSAValue).id][2])
+                    cur_stacks == (0, 0) && break
                 end
             end
 
             pc´ > n && break # can't proceed with the fast-path fall-through
-            if handler_at[pc´] != cur_hand
-                @assert handler_at[pc´] == 0 "unbalanced try/catch"
-                handler_at[pc´] = cur_hand
+            if handler_at[pc´] != cur_stacks
+                handler_at[pc´] = cur_stacks
             elseif !in(pc´, ip)
                 break  # already visited
             end
@@ -331,129 +448,227 @@ function compute_trycatch(code::Vector{Any}, ip::BitSet)
     end
 
     @assert first(ip) == n + 1
-    return handler_at
+    return handler_at, handlers
 end
 
-"""
-    Iterate through all callers of the given InferenceState in the abstract
-    interpretation stack (including the given InferenceState itself), vising
-    children before their parents (i.e. ascending the tree from the given
-    InferenceState). Note that cycles may be visited in any order.
-"""
-struct InfStackUnwind
-    inf::InferenceState
-end
-iterate(unw::InfStackUnwind) = (unw.inf, (unw.inf, 0))
-function iterate(unw::InfStackUnwind, (infstate, cyclei)::Tuple{InferenceState, Int})
-    # iterate through the cycle before walking to the parent
-    if cyclei < length(infstate.callers_in_cycle)
-        cyclei += 1
-        infstate = infstate.callers_in_cycle[cyclei]
-    else
-        cyclei = 0
-        infstate = infstate.parent
+# check if coverage mode is enabled
+function should_insert_coverage(mod::Module, src::CodeInfo)
+    coverage_enabled(mod) && return true
+    JLOptions().code_coverage == 3 || return false
+    # path-specific coverage mode: if any line falls in a tracked file enable coverage for all
+    linetable = src.linetable
+    if isa(linetable, Vector{Any})
+        for line in linetable
+            line = line::LineInfoNode
+            if is_file_tracked(line.file)
+                return true
+            end
+        end
+    elseif isa(linetable, Vector{LineInfoNode})
+        for line in linetable
+            if is_file_tracked(line.file)
+                return true
+            end
+        end
     end
-    infstate === nothing && return nothing
-    (infstate::InferenceState, (infstate, cyclei))
+    return false
 end
 
-function InferenceState(result::InferenceResult, cache::Symbol, interp::AbstractInterpreter)
+function InferenceState(result::InferenceResult, cache_mode::UInt8, interp::AbstractInterpreter)
     # prepare an InferenceState object for inferring lambda
-    src = retrieve_code_info(result.linfo)
+    world = get_world_counter(interp)
+    src = retrieve_code_info(result.linfo, world)
     src === nothing && return nothing
     validate_code_in_debug_mode(result.linfo, src, "lowered")
-    return InferenceState(result, src, cache, interp)
+    return InferenceState(result, src, cache_mode, interp)
 end
+InferenceState(result::InferenceResult, cache_mode::Symbol, interp::AbstractInterpreter) =
+    InferenceState(result, convert_cache_mode(cache_mode), interp)
+InferenceState(result::InferenceResult, src::CodeInfo, cache_mode::Symbol, interp::AbstractInterpreter) =
+    InferenceState(result, src, convert_cache_mode(cache_mode), interp)
+
+function convert_cache_mode(cache_mode::Symbol)
+    if cache_mode === :global
+        return CACHE_MODE_GLOBAL
+    elseif cache_mode === :local
+        return CACHE_MODE_LOCAL
+    elseif cache_mode === :volatile
+        return CACHE_MODE_VOLATILE
+    elseif cache_mode === :no
+        return CACHE_MODE_NULL
+    end
+    error("unexpected `cache_mode` is given")
+end
+
+"""
+    constrains_param(var::TypeVar, sig, covariant::Bool, type_constrains::Bool)
+
+Check if `var` will be constrained to have a definite value
+in any concrete leaftype subtype of `sig`.
+
+It is used as a helper to determine whether type intersection is guaranteed to be able to
+find a value for a particular type parameter.
+A necessary condition for type intersection to not assign a parameter is that it only
+appears in a `Union[All]` and during subtyping some other union component (that does not
+constrain the type parameter) is selected.
+
+The `type_constrains` flag determines whether Type{T} is considered to be constraining
+`T`. This is not true in general, because of the existence of types with free type
+parameters, however, some callers would like to ignore this corner case.
+"""
+function constrains_param(var::TypeVar, @nospecialize(typ), covariant::Bool, type_constrains::Bool=false)
+    typ === var && return true
+    while typ isa UnionAll
+        covariant && constrains_param(var, typ.var.ub, covariant, type_constrains) && return true
+        # typ.var.lb doesn't constrain var
+        typ = typ.body
+    end
+    if typ isa Union
+        # for unions, verify that both options would constrain var
+        ba = constrains_param(var, typ.a, covariant, type_constrains)
+        bb = constrains_param(var, typ.b, covariant, type_constrains)
+        (ba && bb) && return true
+    elseif typ isa DataType
+        # return true if any param constrains var
+        fc = length(typ.parameters)
+        if fc > 0
+            if typ.name === Tuple.name
+                # vararg tuple needs special handling
+                for i in 1:(fc - 1)
+                    p = typ.parameters[i]
+                    constrains_param(var, p, covariant, type_constrains) && return true
+                end
+                lastp = typ.parameters[fc]
+                vararg = unwrap_unionall(lastp)
+                if vararg isa Core.TypeofVararg && isdefined(vararg, :N)
+                    constrains_param(var, vararg.N, covariant, type_constrains) && return true
+                    # T = vararg.parameters[1] doesn't constrain var
+                else
+                    constrains_param(var, lastp, covariant, type_constrains) && return true
+                end
+            else
+                if typ.name === typename(Type) && typ.parameters[1] === var && var.ub === Any
+                    # Types with free type parameters are <: Type cause the typevar
+                    # to be unconstrained because Type{T} with free typevars is illegal
+                    return type_constrains
+                end
+                for i in 1:fc
+                    p = typ.parameters[i]
+                    constrains_param(var, p, false, type_constrains) && return true
+                end
+            end
+        end
+    end
+    return false
+end
+
+const EMPTY_SPTYPES = VarState[]
 
 function sptypes_from_meth_instance(linfo::MethodInstance)
-    toplevel = !isa(linfo.def, Method)
-    if !toplevel && isempty(linfo.sparam_vals) && isa(linfo.def.sig, UnionAll)
+    def = linfo.def
+    isa(def, Method) || return EMPTY_SPTYPES # toplevel
+    sig = def.sig
+    if isempty(linfo.sparam_vals)
+        isa(sig, UnionAll) || return EMPTY_SPTYPES
         # linfo is unspecialized
-        sp = Any[]
-        sig = linfo.def.sig
-        while isa(sig, UnionAll)
-            push!(sp, sig.var)
-            sig = sig.body
+        spvals = Any[]
+        sig′ = sig
+        while isa(sig′, UnionAll)
+            push!(spvals, sig′.var)
+            sig′ = sig′.body
         end
     else
-        sp = collect(Any, linfo.sparam_vals)
+        spvals = linfo.sparam_vals
     end
-    for i = 1:length(sp)
-        v = sp[i]
+    nvals = length(spvals)
+    sptypes = Vector{VarState}(undef, nvals)
+    for i = 1:nvals
+        v = spvals[i]
         if v isa TypeVar
-            fromArg = 0
-            # if this parameter came from arg::Type{T}, then `arg` is more precise than
-            # Type{T} where lb<:T<:ub
-            sig = linfo.def.sig
             temp = sig
             for j = 1:i-1
                 temp = temp.body
             end
-            Pi = temp.var
-            while temp isa UnionAll
-                temp = temp.body
-            end
-            sigtypes = (temp::DataType).parameters
+            vᵢ = (temp::UnionAll).var
+            sigtypes = (unwrap_unionall(temp)::DataType).parameters
             for j = 1:length(sigtypes)
-                tj = sigtypes[j]
-                if isType(tj) && tj.parameters[1] === Pi
-                    fromArg = j
-                    break
+                sⱼ = sigtypes[j]
+                if isType(sⱼ) && sⱼ.parameters[1] === vᵢ
+                    # if this parameter came from `arg::Type{T}`,
+                    # then `arg` is more precise than `Type{T} where lb<:T<:ub`
+                    ty = fieldtype(linfo.specTypes, j)
+                    @goto ty_computed
+                elseif (va = va_from_vatuple(sⱼ)) !== nothing
+                    # if this parameter came from `::Tuple{.., Vararg{T,vᵢ}}`,
+                    # then `vᵢ` is known to be `Int`
+                    if isdefined(va, :N) && va.N === vᵢ
+                        ty = Int
+                        @goto ty_computed
+                    end
                 end
             end
-            if fromArg > 0
-                ty = fieldtype(linfo.specTypes, fromArg)
+            ub = unwraptv_ub(v)
+            if has_free_typevars(ub)
+                ub = Any
+            end
+            lb = unwraptv_lb(v)
+            if has_free_typevars(lb)
+                lb = Bottom
+            end
+            if Any === ub && lb === Bottom
+                ty = Any
             else
-                ub = v.ub
-                while ub isa TypeVar
-                    ub = ub.ub
-                end
-                if has_free_typevars(ub)
-                    ub = Any
-                end
-                lb = v.lb
-                while lb isa TypeVar
-                    lb = lb.lb
-                end
-                if has_free_typevars(lb)
-                    lb = Bottom
-                end
-                if Any <: ub && lb <: Bottom
-                    ty = Any
-                else
-                    tv = TypeVar(v.name, lb, ub)
-                    ty = UnionAll(tv, Type{tv})
-                end
+                tv = TypeVar(v.name, lb, ub)
+                ty = UnionAll(tv, Type{tv})
             end
+            @label ty_computed
+            undef = !(let sig=sig
+                # if the specialized signature `linfo.specTypes` doesn't contain any free
+                # type variables, we can use it for a more accurate analysis of whether `v`
+                # is constrained or not, otherwise we should use `def.sig` which always
+                # doesn't contain any free type variables
+                if !has_free_typevars(linfo.specTypes)
+                    sig = linfo.specTypes
+                end
+                @assert !has_free_typevars(sig)
+                constrains_param(v, sig, #=covariant=#true)
+            end)
         elseif isvarargtype(v)
+            # if this parameter came from `func(..., ::Vararg{T,v})`,
+            # so the type is known to be `Int`
             ty = Int
+            undef = false
         else
             ty = Const(v)
+            undef = false
         end
-        sp[i] = ty
+        sptypes[i] = VarState(ty, undef)
     end
-    return sp
+    return sptypes
 end
 
-_topmod(sv::InferenceState) = _topmod(sv.mod)
-
-# work towards converging the valid age range for sv
-function update_valid_age!(sv::InferenceState, worlds::WorldRange)
-    sv.valid_worlds = intersect(worlds, sv.valid_worlds)
-    @assert(sv.world in sv.valid_worlds, "invalid age range update")
-    nothing
+function va_from_vatuple(@nospecialize(t))
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    if isa(t, DataType)
+        n = length(t.parameters)
+        if n > 0
+            va = t.parameters[n]
+            if isvarargtype(va)
+               return va
+            end
+        end
+    end
+    return nothing
 end
 
-update_valid_age!(edge::InferenceState, sv::InferenceState) = update_valid_age!(sv, edge.valid_worlds)
+_topmod(sv::InferenceState) = _topmod(frame_module(sv))
 
-function record_ssa_assign!(ssa_id::Int, @nospecialize(new), frame::InferenceState)
+function record_ssa_assign!(𝕃ᵢ::AbstractLattice, ssa_id::Int, @nospecialize(new), frame::InferenceState)
     ssavaluetypes = frame.ssavaluetypes
     old = ssavaluetypes[ssa_id]
-    if old === NOT_FOUND || !(new ⊑ old)
-        # typically, we expect that old ⊑ new (that output information only
-        # gets less precise with worse input information), but to actually
-        # guarantee convergence we need to use tmerge here to ensure that is true
-        ssavaluetypes[ssa_id] = old === NOT_FOUND ? new : tmerge(old, new)
+    if old === NOT_FOUND || !is_lattice_equal(𝕃ᵢ, new, old)
+        ssavaluetypes[ssa_id] = new
         W = frame.ip
         for r in frame.ssavalue_uses[ssa_id]
             if was_reached(frame, r)
@@ -470,38 +685,24 @@ function record_ssa_assign!(ssa_id::Int, @nospecialize(new), frame::InferenceSta
     return nothing
 end
 
-function add_cycle_backedge!(frame::InferenceState, caller::InferenceState, currpc::Int)
-    update_valid_age!(frame, caller)
+function add_cycle_backedge!(caller::InferenceState, frame::InferenceState, currpc::Int)
+    update_valid_age!(caller, frame.valid_worlds)
     backedge = (caller, currpc)
     contains_is(frame.cycle_backedges, backedge) || push!(frame.cycle_backedges, backedge)
-    add_backedge!(frame.linfo, caller)
+    add_backedge!(caller, frame.linfo)
     return frame
 end
 
-# temporarily accumulate our edges to later add as backedges in the callee
-function add_backedge!(li::MethodInstance, caller::InferenceState)
-    isa(caller.linfo.def, Method) || return # don't add backedges to toplevel exprs
-    edges = caller.stmt_edges[caller.currpc]
+function get_stmt_edges!(caller::InferenceState, currpc::Int=caller.currpc)
+    stmt_edges = caller.stmt_edges
+    edges = stmt_edges[currpc]
     if edges === nothing
-        edges = caller.stmt_edges[caller.currpc] = []
+        edges = stmt_edges[currpc] = []
     end
-    push!(edges, li)
-    return nothing
+    return edges
 end
 
-# used to temporarily accumulate our no method errors to later add as backedges in the callee method table
-function add_mt_backedge!(mt::Core.MethodTable, @nospecialize(typ), caller::InferenceState)
-    isa(caller.linfo.def, Method) || return # don't add backedges to toplevel exprs
-    edges = caller.stmt_edges[caller.currpc]
-    if edges === nothing
-        edges = caller.stmt_edges[caller.currpc] = []
-    end
-    push!(edges, mt)
-    push!(edges, typ)
-    return nothing
-end
-
-function empty_backedges!(frame::InferenceState, currpc::Int = frame.currpc)
+function empty_backedges!(frame::InferenceState, currpc::Int=frame.currpc)
     edges = frame.stmt_edges[currpc]
     edges === nothing || empty!(edges)
     return nothing
@@ -510,7 +711,7 @@ end
 function print_callstack(sv::InferenceState)
     while sv !== nothing
         print(sv.linfo)
-        !sv.cached && print("  [uncached]")
+        is_cached(sv) || print("  [uncached]")
         println()
         for cycle in sv.callers_in_cycle
             print(' ', cycle.linfo)
@@ -520,4 +721,296 @@ function print_callstack(sv::InferenceState)
     end
 end
 
+function narguments(sv::InferenceState, include_va::Bool=true)
+    def = sv.linfo.def
+    nargs = length(sv.result.argtypes)
+    if !include_va
+        nargs -= isa(def, Method) && def.isva
+    end
+    return nargs
+end
+
+# IRInterpretationState
+# =====================
+
+# TODO add `result::InferenceResult` and put the irinterp result into the inference cache?
+mutable struct IRInterpretationState
+    const method_info::MethodInfo
+    const ir::IRCode
+    const mi::MethodInstance
+    const world::UInt
+    curridx::Int
+    const argtypes_refined::Vector{Bool}
+    const sptypes::Vector{VarState}
+    const tpdum::TwoPhaseDefUseMap
+    const ssa_refined::BitSet
+    const lazydomtree::LazyDomtree
+    valid_worlds::WorldRange
+    const edges::Vector{Any}
+    parent # ::Union{Nothing,AbsIntState}
+
+    function IRInterpretationState(interp::AbstractInterpreter,
+        method_info::MethodInfo, ir::IRCode, mi::MethodInstance, argtypes::Vector{Any},
+        world::UInt, min_world::UInt, max_world::UInt)
+        curridx = 1
+        given_argtypes = Vector{Any}(undef, length(argtypes))
+        for i = 1:length(given_argtypes)
+            given_argtypes[i] = widenslotwrapper(argtypes[i])
+        end
+        given_argtypes = va_process_argtypes(optimizer_lattice(interp), given_argtypes, mi)
+        argtypes_refined = Bool[!⊑(optimizer_lattice(interp), ir.argtypes[i], given_argtypes[i])
+            for i = 1:length(given_argtypes)]
+        empty!(ir.argtypes)
+        append!(ir.argtypes, given_argtypes)
+        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
+        ssa_refined = BitSet()
+        lazydomtree = LazyDomtree(ir)
+        valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world)
+        edges = Any[]
+        parent = nothing
+        return new(method_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum,
+                   ssa_refined, lazydomtree, valid_worlds, edges, parent)
+    end
+end
+
+function IRInterpretationState(interp::AbstractInterpreter,
+    code::CodeInstance, mi::MethodInstance, argtypes::Vector{Any}, world::UInt)
+    @assert code.def === mi
+    src = @atomic :monotonic code.inferred
+    if isa(src, String)
+        src = _uncompressed_ir(mi.def, src)
+    else
+        isa(src, CodeInfo) || return nothing
+    end
+    method_info = MethodInfo(src)
+    ir = inflate_ir(src, mi)
+    return IRInterpretationState(interp, method_info, ir, mi, argtypes, world,
+                                 src.min_world, src.max_world)
+end
+
+# AbsIntState
+# ===========
+
+const AbsIntState = Union{InferenceState,IRInterpretationState}
+
+frame_instance(sv::InferenceState) = sv.linfo
+frame_instance(sv::IRInterpretationState) = sv.mi
+
+function frame_module(sv::AbsIntState)
+    mi = frame_instance(sv)
+    def = mi.def
+    isa(def, Module) && return def
+    return def.module
+end
+
+frame_parent(sv::InferenceState) = sv.parent::Union{Nothing,AbsIntState}
+frame_parent(sv::IRInterpretationState) = sv.parent::Union{Nothing,AbsIntState}
+
+is_constproped(sv::InferenceState) = any(sv.result.overridden_by_const)
+is_constproped(::IRInterpretationState) = true
+
+is_cached(sv::InferenceState) = !iszero(sv.cache_mode & CACHE_MODE_GLOBAL)
+is_cached(::IRInterpretationState) = false
+
+method_info(sv::InferenceState) = sv.method_info
+method_info(sv::IRInterpretationState) = sv.method_info
+
+propagate_inbounds(sv::AbsIntState) = method_info(sv).propagate_inbounds
+method_for_inference_limit_heuristics(sv::AbsIntState) = method_info(sv).method_for_inference_limit_heuristics
+
+frame_world(sv::InferenceState) = sv.world
+frame_world(sv::IRInterpretationState) = sv.world
+
+callers_in_cycle(sv::InferenceState) = sv.callers_in_cycle
+callers_in_cycle(sv::IRInterpretationState) = ()
+
+function is_effect_overridden(sv::AbsIntState, effect::Symbol)
+    if is_effect_overridden(frame_instance(sv), effect)
+        return true
+    elseif is_effect_overridden(decode_statement_effects_override(sv), effect)
+        return true
+    end
+    return false
+end
+function is_effect_overridden(linfo::MethodInstance, effect::Symbol)
+    def = linfo.def
+    return isa(def, Method) && is_effect_overridden(def, effect)
+end
+is_effect_overridden(method::Method, effect::Symbol) = is_effect_overridden(decode_effects_override(method.purity), effect)
+is_effect_overridden(override::EffectsOverride, effect::Symbol) = getfield(override, effect)
+
+has_conditional(𝕃::AbstractLattice, ::InferenceState) = has_conditional(𝕃)
+has_conditional(::AbstractLattice, ::IRInterpretationState) = false
+
+# work towards converging the valid age range for sv
+function update_valid_age!(sv::AbsIntState, valid_worlds::WorldRange)
+    valid_worlds = sv.valid_worlds = intersect(valid_worlds, sv.valid_worlds)
+    @assert sv.world in valid_worlds "invalid age range update"
+    return valid_worlds
+end
+
+"""
+    AbsIntStackUnwind(sv::AbsIntState)
+
+Iterate through all callers of the given `AbsIntState` in the abstract interpretation stack
+(including the given `AbsIntState` itself), visiting children before their parents (i.e.
+ascending the tree from the given `AbsIntState`).
+Note that cycles may be visited in any order.
+"""
+struct AbsIntStackUnwind
+    sv::AbsIntState
+end
+iterate(unw::AbsIntStackUnwind) = (unw.sv, (unw.sv, 0))
+function iterate(unw::AbsIntStackUnwind, (sv, cyclei)::Tuple{AbsIntState, Int})
+    # iterate through the cycle before walking to the parent
+    callers = callers_in_cycle(sv)
+    if callers !== () && cyclei < length(callers)
+        cyclei += 1
+        parent = callers[cyclei]
+    else
+        cyclei = 0
+        parent = frame_parent(sv)
+    end
+    parent === nothing && return nothing
+    return (parent, (parent, cyclei))
+end
+
+# temporarily accumulate our edges to later add as backedges in the callee
+function add_backedge!(caller::InferenceState, mi::MethodInstance)
+    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
+    return push!(get_stmt_edges!(caller), mi)
+end
+function add_backedge!(irsv::IRInterpretationState, mi::MethodInstance)
+    return push!(irsv.edges, mi)
+end
+
+function add_invoke_backedge!(caller::InferenceState, @nospecialize(invokesig::Type), mi::MethodInstance)
+    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
+    return push!(get_stmt_edges!(caller), invokesig, mi)
+end
+function add_invoke_backedge!(irsv::IRInterpretationState, @nospecialize(invokesig::Type), mi::MethodInstance)
+    return push!(irsv.edges, invokesig, mi)
+end
+
+# used to temporarily accumulate our no method errors to later add as backedges in the callee method table
+function add_mt_backedge!(caller::InferenceState, mt::MethodTable, @nospecialize(typ))
+    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
+    return push!(get_stmt_edges!(caller), mt, typ)
+end
+function add_mt_backedge!(irsv::IRInterpretationState, mt::MethodTable, @nospecialize(typ))
+    return push!(irsv.edges, mt, typ)
+end
+
 get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc]
+get_curr_ssaflag(sv::IRInterpretationState) = sv.ir.stmts[sv.curridx][:flag]
+
+has_curr_ssaflag(sv::InferenceState, flag::UInt32) = has_flag(sv.src.ssaflags[sv.currpc], flag)
+has_curr_ssaflag(sv::IRInterpretationState, flag::UInt32) = has_flag(sv.ir.stmts[sv.curridx][:flag], flag)
+
+function set_curr_ssaflag!(sv::InferenceState, flag::UInt32, mask::UInt32=typemax(UInt32))
+    curr_flag = sv.src.ssaflags[sv.currpc]
+    sv.src.ssaflags[sv.currpc] = (curr_flag & ~mask) | flag
+end
+function set_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32, mask::UInt32=typemax(UInt32))
+    curr_flag = sv.ir.stmts[sv.curridx][:flag]
+    sv.ir.stmts[sv.curridx][:flag] = (curr_flag & ~mask) | flag
+end
+
+add_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.src.ssaflags[sv.currpc] |= flag
+add_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32) = add_flag!(sv.ir.stmts[sv.curridx], flag)
+
+sub_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.src.ssaflags[sv.currpc] &= ~flag
+sub_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32) = sub_flag!(sv.ir.stmts[sv.curridx], flag)
+
+function merge_effects!(::AbstractInterpreter, caller::InferenceState, effects::Effects)
+    if effects.effect_free === EFFECT_FREE_GLOBALLY
+        # This tracks the global effects
+        effects = Effects(effects; effect_free=ALWAYS_TRUE)
+    end
+    caller.ipo_effects = merge_effects(caller.ipo_effects, effects)
+end
+merge_effects!(::AbstractInterpreter, ::IRInterpretationState, ::Effects) = return
+
+decode_statement_effects_override(sv::AbsIntState) =
+    decode_statement_effects_override(get_curr_ssaflag(sv))
+
+struct InferenceLoopState
+    sig
+    rt
+    effects::Effects
+    function InferenceLoopState(@nospecialize(sig), @nospecialize(rt), effects::Effects)
+        new(sig, rt, effects)
+    end
+end
+
+bail_out_toplevel_call(::AbstractInterpreter, state::InferenceLoopState, sv::InferenceState) =
+    sv.restrict_abstract_call_sites && !isdispatchtuple(state.sig)
+bail_out_toplevel_call(::AbstractInterpreter, ::InferenceLoopState, ::IRInterpretationState) = false
+
+bail_out_call(::AbstractInterpreter, state::InferenceLoopState, ::InferenceState) =
+    state.rt === Any && !is_foldable(state.effects)
+bail_out_call(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
+    state.rt === Any && !is_foldable(state.effects)
+
+bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::InferenceState) =
+    state.rt === Any
+bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
+    state.rt === Any
+
+function should_infer_this_call(interp::AbstractInterpreter, sv::InferenceState)
+    if InferenceParams(interp).unoptimize_throw_blocks
+        # Disable inference of calls in throw blocks, since we're unlikely to
+        # need their types. There is one exception however: If up until now, the
+        # function has not seen any side effects, we would like to make sure there
+        # aren't any in the throw block either to enable other optimizations.
+        if is_stmt_throw_block(get_curr_ssaflag(sv))
+            should_infer_for_effects(sv) || return false
+        end
+    end
+    return true
+end
+function should_infer_for_effects(sv::InferenceState)
+    def = sv.linfo.def
+    def isa Method || return false # toplevel frame will not be [semi-]concrete-evaluated
+    effects = sv.ipo_effects
+    override = decode_effects_override(def.purity)
+    effects.consistent === ALWAYS_FALSE && !is_effect_overridden(override, :consistent) && return false
+    effects.effect_free === ALWAYS_FALSE && !is_effect_overridden(override, :effect_free) && return false
+    !effects.terminates && !is_effect_overridden(override, :terminates_globally) && return false
+    return true
+end
+should_infer_this_call(::AbstractInterpreter, ::IRInterpretationState) = true
+
+add_remark!(::AbstractInterpreter, ::InferenceState, remark) = return
+add_remark!(::AbstractInterpreter, ::IRInterpretationState, remark) = return
+
+function get_max_methods(interp::AbstractInterpreter, @nospecialize(f), sv::AbsIntState)
+    fmax = get_max_methods_for_func(f)
+    fmax !== nothing && return fmax
+    return get_max_methods(interp, sv)
+end
+function get_max_methods(interp::AbstractInterpreter, @nospecialize(f))
+    fmax = get_max_methods_for_func(f)
+    fmax !== nothing && return fmax
+    return get_max_methods(interp)
+end
+function get_max_methods(interp::AbstractInterpreter, sv::AbsIntState)
+    mmax = get_max_methods_for_module(sv)
+    mmax !== nothing && return mmax
+    return get_max_methods(interp)
+end
+get_max_methods(interp::AbstractInterpreter) = InferenceParams(interp).max_methods
+
+function get_max_methods_for_func(@nospecialize(f))
+    if f !== nothing
+        fmm = typeof(f).name.max_methods
+        fmm !== UInt8(0) && return Int(fmm)
+    end
+    return nothing
+end
+get_max_methods_for_module(sv::AbsIntState) = get_max_methods_for_module(frame_module(sv))
+function get_max_methods_for_module(mod::Module)
+    max_methods = ccall(:jl_get_module_max_methods, Cint, (Any,), mod) % Int
+    max_methods < 0 && return nothing
+    return max_methods
+end
diff --git a/base/compiler/methodtable.jl b/base/compiler/methodtable.jl
index 7aa686009c1af..ce04ff48d805e 100644
--- a/base/compiler/methodtable.jl
+++ b/base/compiler/methodtable.jl
@@ -1,6 +1,20 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-abstract type MethodTableView; end
+struct MethodLookupResult
+    # Really Vector{Core.MethodMatch}, but it's easier to represent this as
+    # and work with Vector{Any} on the C side.
+    matches::Vector{Any}
+    valid_worlds::WorldRange
+    ambig::Bool
+end
+length(result::MethodLookupResult) = length(result.matches)
+function iterate(result::MethodLookupResult, args...)
+    r = iterate(result.matches, args...)
+    r === nothing && return nothing
+    match, state = r
+    return (match::MethodMatch, state)
+end
+getindex(result::MethodLookupResult, idx::Int) = getindex(result.matches, idx)::MethodMatch
 
 """
     struct InternalMethodTable <: MethodTableView
@@ -20,74 +34,85 @@ external table, e.g., to override existing method.
 """
 struct OverlayMethodTable <: MethodTableView
     world::UInt
-    mt::Core.MethodTable
+    mt::MethodTable
 end
 
-struct MethodLookupResult
-    # Really Vector{Core.MethodMatch}, but it's easier to represent this as
-    # and work with Vector{Any} on the C side.
-    matches::Vector{Any}
-    valid_worlds::WorldRange
-    ambig::Bool
+struct MethodMatchKey
+    sig # ::Type
+    limit::Int
+    MethodMatchKey(@nospecialize(sig), limit::Int) = new(sig, limit)
 end
-length(result::MethodLookupResult) = length(result.matches)
-function iterate(result::MethodLookupResult, args...)
-    r = iterate(result.matches, args...)
-    r === nothing && return nothing
-    match, state = r
-    return (match::MethodMatch, state)
+
+"""
+    struct CachedMethodTable <: MethodTableView
+
+Overlays another method table view with an additional local fast path cache that
+can respond to repeated, identical queries faster than the original method table.
+"""
+struct CachedMethodTable{T<:MethodTableView} <: MethodTableView
+    cache::IdDict{MethodMatchKey, Union{Nothing,MethodLookupResult}}
+    table::T
 end
-getindex(result::MethodLookupResult, idx::Int) = getindex(result.matches, idx)::MethodMatch
+CachedMethodTable(table::T) where T = CachedMethodTable{T}(IdDict{MethodMatchKey, Union{Nothing,MethodLookupResult}}(), table)
 
 """
-    findall(sig::Type, view::MethodTableView; limit::Int=typemax(Int)) ->
-        (matches::MethodLookupResult, overlayed::Bool) or missing
+    findall(sig::Type, view::MethodTableView; limit::Int=-1) ->
+        matches::MethodLookupResult or nothing
 
 Find all methods in the given method table `view` that are applicable to the given signature `sig`.
 If no applicable methods are found, an empty result is returned.
-If the number of applicable methods exceeded the specified limit, `missing` is returned.
+If the number of applicable methods exceeded the specified `limit`, `nothing` is returned.
+Note that the default setting `limit=-1` does not limit the number of applicable methods.
 `overlayed` indicates if any of the matching methods comes from an overlayed method table.
 """
-function findall(@nospecialize(sig::Type), table::InternalMethodTable; limit::Int=Int(typemax(Int32)))
-    result = _findall(sig, nothing, table.world, limit)
-    result === missing && return missing
-    return result, false
-end
+findall(@nospecialize(sig::Type), table::InternalMethodTable; limit::Int=-1) =
+    _findall(sig, nothing, table.world, limit)
 
-function findall(@nospecialize(sig::Type), table::OverlayMethodTable; limit::Int=Int(typemax(Int32)))
+function findall(@nospecialize(sig::Type), table::OverlayMethodTable; limit::Int=-1)
     result = _findall(sig, table.mt, table.world, limit)
-    result === missing && return missing
+    result === nothing && return nothing
     nr = length(result)
     if nr ≥ 1 && result[nr].fully_covers
         # no need to fall back to the internal method table
-        return result, true
+        return result
     end
     # fall back to the internal method table
     fallback_result = _findall(sig, nothing, table.world, limit)
-    fallback_result === missing && return missing
+    fallback_result === nothing && return nothing
     # merge the fallback match results with the internal method table
     return MethodLookupResult(
         vcat(result.matches, fallback_result.matches),
         WorldRange(
             max(result.valid_worlds.min_world, fallback_result.valid_worlds.min_world),
             min(result.valid_worlds.max_world, fallback_result.valid_worlds.max_world)),
-        result.ambig | fallback_result.ambig), !isempty(result)
+        result.ambig | fallback_result.ambig)
 end
 
-function _findall(@nospecialize(sig::Type), mt::Union{Nothing,Core.MethodTable}, world::UInt, limit::Int)
+function _findall(@nospecialize(sig::Type), mt::Union{Nothing,MethodTable}, world::UInt, limit::Int)
     _min_val = RefValue{UInt}(typemin(UInt))
     _max_val = RefValue{UInt}(typemax(UInt))
     _ambig = RefValue{Int32}(0)
     ms = _methods_by_ftype(sig, mt, limit, world, false, _min_val, _max_val, _ambig)
-    if ms === false
-        return missing
+    isa(ms, Vector) || return nothing
+    return MethodLookupResult(ms, WorldRange(_min_val[], _max_val[]), _ambig[] != 0)
+end
+
+function findall(@nospecialize(sig::Type), table::CachedMethodTable; limit::Int=-1)
+    if isconcretetype(sig)
+        # as for concrete types, we cache result at on the next level
+        return findall(sig, table.table; limit)
+    end
+    key = MethodMatchKey(sig, limit)
+    if haskey(table.cache, key)
+        return table.cache[key]
+    else
+        return table.cache[key] = findall(sig, table.table; limit)
     end
-    return MethodLookupResult(ms::Vector{Any}, WorldRange(_min_val[], _max_val[]), _ambig[] != 0)
 end
 
 """
     findsup(sig::Type, view::MethodTableView) ->
-        (match::MethodMatch, valid_worlds::WorldRange, overlayed::Bool) or nothing
+        (match::Union{MethodMatch,Nothing}, valid_worlds::WorldRange, overlayed::Bool)
 
 Find the (unique) method such that `sig <: match.method.sig`, while being more
 specific than any other method with the same property. In other words, find the method
@@ -103,24 +128,22 @@ In both cases `nothing` is returned.
 
 `overlayed` indicates if any of the matching methods comes from an overlayed method table.
 """
-function findsup(@nospecialize(sig::Type), table::InternalMethodTable)
-    return (_findsup(sig, nothing, table.world)..., false)
-end
+findsup(@nospecialize(sig::Type), table::InternalMethodTable) =
+    _findsup(sig, nothing, table.world)
 
 function findsup(@nospecialize(sig::Type), table::OverlayMethodTable)
     match, valid_worlds = _findsup(sig, table.mt, table.world)
-    match !== nothing && return match, valid_worlds, true
+    match !== nothing && return match, valid_worlds
     # fall back to the internal method table
     fallback_match, fallback_valid_worlds = _findsup(sig, nothing, table.world)
     return (
         fallback_match,
         WorldRange(
             max(valid_worlds.min_world, fallback_valid_worlds.min_world),
-            min(valid_worlds.max_world, fallback_valid_worlds.max_world)),
-        false)
+            min(valid_worlds.max_world, fallback_valid_worlds.max_world)))
 end
 
-function _findsup(@nospecialize(sig::Type), mt::Union{Nothing,Core.MethodTable}, world::UInt)
+function _findsup(@nospecialize(sig::Type), mt::Union{Nothing,MethodTable}, world::UInt)
     min_valid = RefValue{UInt}(typemin(UInt))
     max_valid = RefValue{UInt}(typemax(UInt))
     match = ccall(:jl_gf_invoke_lookup_worlds, Any, (Any, Any, UInt, Ptr{Csize_t}, Ptr{Csize_t}),
@@ -129,6 +152,5 @@ function _findsup(@nospecialize(sig::Type), mt::Union{Nothing,Core.MethodTable},
     return match, valid_worlds
 end
 
-isoverlayed(::MethodTableView)     = error("unsatisfied MethodTableView interface")
-isoverlayed(::InternalMethodTable) = false
-isoverlayed(::OverlayMethodTable)  = true
+# This query is not cached
+findsup(@nospecialize(sig::Type), table::CachedMethodTable) = findsup(sig, table.table)
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index 1e812db13c9eb..57fb1082add5e 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -15,148 +15,204 @@ const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
 
 # NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
 
-const IR_FLAG_NULL        = 0x00
+const IR_FLAG_NULL        = zero(UInt32)
 # This statement is marked as @inbounds by user.
 # Ff replaced by inlining, any contained boundschecks may be removed.
-const IR_FLAG_INBOUNDS    = 0x01 << 0
+const IR_FLAG_INBOUNDS    = one(UInt32) << 0
 # This statement is marked as @inline by user
-const IR_FLAG_INLINE      = 0x01 << 1
+const IR_FLAG_INLINE      = one(UInt32) << 1
 # This statement is marked as @noinline by user
-const IR_FLAG_NOINLINE    = 0x01 << 2
-const IR_FLAG_THROW_BLOCK = 0x01 << 3
-# This statement may be removed if its result is unused. In particular it must
-# thus be both pure and effect free.
-const IR_FLAG_EFFECT_FREE = 0x01 << 4
+const IR_FLAG_NOINLINE    = one(UInt32) << 2
+const IR_FLAG_THROW_BLOCK = one(UInt32) << 3
+# This statement was proven :effect_free
+const IR_FLAG_EFFECT_FREE = one(UInt32) << 4
 # This statement was proven not to throw
-const IR_FLAG_NOTHROW     = 0x01 << 5
+const IR_FLAG_NOTHROW     = one(UInt32) << 5
+# This is :consistent
+const IR_FLAG_CONSISTENT  = one(UInt32) << 6
+# An optimization pass has updated this statement in a way that may
+# have exposed information that inference did not see. Re-running
+# inference on this statement may be profitable.
+const IR_FLAG_REFINED     = one(UInt32) << 7
+# This is :noub == ALWAYS_TRUE
+const IR_FLAG_NOUB        = one(UInt32) << 8
 
+# TODO: Both of these should eventually go away once
+# This is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+const IR_FLAG_EFIIMO      = one(UInt32) << 9
+# This is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
+const IR_FLAG_INACCESSIBLE_OR_ARGMEM = one(UInt32) << 10
 
-const TOP_TUPLE = GlobalRef(Core, :tuple)
+const NUM_IR_FLAGS = 11 # sync with julia.h
 
-#####################
-# OptimizationState #
-#####################
+const IR_FLAGS_EFFECTS = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_CONSISTENT | IR_FLAG_NOUB
 
-struct EdgeTracker
-    edges::Vector{Any}
-    valid_worlds::RefValue{WorldRange}
-    EdgeTracker(edges::Vector{Any}, range::WorldRange) =
-        new(edges, RefValue{WorldRange}(range))
-end
-EdgeTracker() = EdgeTracker(Any[], 0:typemax(UInt))
+has_flag(curr::UInt32, flag::UInt32) = (curr & flag) == flag
 
-intersect!(et::EdgeTracker, range::WorldRange) =
-    et.valid_worlds[] = intersect(et.valid_worlds[], range)
+const TOP_TUPLE = GlobalRef(Core, :tuple)
 
-push!(et::EdgeTracker, mi::MethodInstance) = push!(et.edges, mi)
-function push!(et::EdgeTracker, ci::CodeInstance)
-    intersect!(et, WorldRange(min_world(li), max_world(li)))
-    push!(et, ci.def)
+# This corresponds to the type of `CodeInfo`'s `inlining_cost` field
+const InlineCostType = UInt16
+const MAX_INLINE_COST = typemax(InlineCostType)
+const MIN_INLINE_COST = InlineCostType(10)
+const MaybeCompressed = Union{CodeInfo, String}
+
+is_inlineable(@nospecialize src::MaybeCompressed) =
+    ccall(:jl_ir_inlining_cost, InlineCostType, (Any,), src) != MAX_INLINE_COST
+set_inlineable!(src::CodeInfo, val::Bool) =
+    src.inlining_cost = (val ? MIN_INLINE_COST : MAX_INLINE_COST)
+
+function inline_cost_clamp(x::Int)
+    x > MAX_INLINE_COST && return MAX_INLINE_COST
+    x < MIN_INLINE_COST && return MIN_INLINE_COST
+    return convert(InlineCostType, x)
 end
 
-struct InliningState{S <: Union{EdgeTracker, Nothing}, MICache, I<:AbstractInterpreter}
-    params::OptimizationParams
-    et::S
-    mi_cache::MICache # TODO move this to `OptimizationState` (as used by EscapeAnalysis as well)
-    interp::I
-end
+is_declared_inline(@nospecialize src::MaybeCompressed) =
+    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 1
+
+is_declared_noinline(@nospecialize src::MaybeCompressed) =
+    ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 2
 
-is_source_inferred(@nospecialize(src::Union{CodeInfo, Vector{UInt8}})) =
+#####################
+# OptimizationState #
+#####################
+
+is_source_inferred(@nospecialize src::MaybeCompressed) =
     ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
 
-function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8,
-                         mi::MethodInstance, argtypes::Vector{Any})
-    if isa(src, CodeInfo) || isa(src, Vector{UInt8})
-        src_inferred = is_source_inferred(src)
-        src_inlineable = is_stmt_inline(stmt_flag) || ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
-        return src_inferred && src_inlineable ? src : nothing
-    elseif src === nothing && is_stmt_inline(stmt_flag)
-        # if this statement is forced to be inlined, make an additional effort to find the
-        # inferred source in the local cache
-        # we still won't find a source for recursive call because the "single-level" inlining
-        # seems to be more trouble and complex than it's worth
-        inf_result = cache_lookup(mi, argtypes, get_inference_cache(interp))
-        inf_result === nothing && return nothing
-        src = inf_result.src
-        if isa(src, CodeInfo)
-            src_inferred = is_source_inferred(src)
-            return src_inferred ? src : nothing
-        else
-            return nothing
-        end
+function inlining_policy(interp::AbstractInterpreter,
+    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
+    if isa(src, MaybeCompressed)
+        is_source_inferred(src) || return nothing
+        src_inlineable = is_stmt_inline(stmt_flag) || is_inlineable(src)
+        return src_inlineable ? src : nothing
+    elseif isa(src, IRCode)
+        return src
+    elseif isa(src, SemiConcreteResult)
+        return src
     end
     return nothing
 end
 
-include("compiler/ssair/driver.jl")
+struct InliningState{Interp<:AbstractInterpreter}
+    edges::Vector{Any}
+    world::UInt
+    interp::Interp
+end
+function InliningState(sv::InferenceState, interp::AbstractInterpreter)
+    edges = sv.stmt_edges[1]::Vector{Any}
+    return InliningState(edges, sv.world, interp)
+end
+function InliningState(interp::AbstractInterpreter)
+    return InliningState(Any[], get_world_counter(interp), interp)
+end
+
+# get `code_cache(::AbstractInterpreter)` from `state::InliningState`
+code_cache(state::InliningState) = WorldView(code_cache(state.interp), state.world)
 
-mutable struct OptimizationState
+mutable struct OptimizationState{Interp<:AbstractInterpreter}
     linfo::MethodInstance
     src::CodeInfo
     ir::Union{Nothing, IRCode}
-    stmt_info::Vector{Any}
+    stmt_info::Vector{CallInfo}
     mod::Module
-    sptypes::Vector{Any} # static parameters
+    sptypes::Vector{VarState}
     slottypes::Vector{Any}
-    inlining::InliningState
-    cfg::Union{Nothing,CFG}
-    function OptimizationState(frame::InferenceState, params::OptimizationParams,
-                               interp::AbstractInterpreter, recompute_cfg::Bool=true)
-        s_edges = frame.stmt_edges[1]::Vector{Any}
-        inlining = InliningState(params,
-            EdgeTracker(s_edges, frame.valid_worlds),
-            WorldView(code_cache(interp), frame.world),
-            interp)
-        cfg = recompute_cfg ? nothing : frame.cfg
-        return new(frame.linfo, frame.src, nothing, frame.stmt_info, frame.mod,
-                   frame.sptypes, frame.slottypes, inlining, cfg)
-    end
-    function OptimizationState(linfo::MethodInstance, src::CodeInfo, params::OptimizationParams,
-                               interp::AbstractInterpreter)
-        # prepare src for running optimization passes
-        # if it isn't already
-        nssavalues = src.ssavaluetypes
-        if nssavalues isa Int
-            src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
-        else
-            nssavalues = length(src.ssavaluetypes::Vector{Any})
-        end
-        sptypes = sptypes_from_meth_instance(linfo)
-        nslots = length(src.slotflags)
-        slottypes = src.slottypes
-        if slottypes === nothing
-            slottypes = Any[ Any for i = 1:nslots ]
-        end
-        stmt_info = Any[nothing for i = 1:nssavalues]
-        # cache some useful state computations
-        def = linfo.def
-        mod = isa(def, Method) ? def.module : def
-        # Allow using the global MI cache, but don't track edges.
-        # This method is mostly used for unit testing the optimizer
-        inlining = InliningState(params,
-            nothing,
-            WorldView(code_cache(interp), get_world_counter()),
-            interp)
-        return new(linfo, src, nothing, stmt_info, mod,
-                   sptypes, slottypes, inlining, nothing)
-    end
-end
-
-function OptimizationState(linfo::MethodInstance, params::OptimizationParams, interp::AbstractInterpreter)
-    src = retrieve_code_info(linfo)
+    inlining::InliningState{Interp}
+    cfg::CFG
+    unreachable::BitSet
+    bb_vartables::Vector{Union{Nothing,VarTable}}
+    insert_coverage::Bool
+end
+function OptimizationState(sv::InferenceState, interp::AbstractInterpreter)
+    inlining = InliningState(sv, interp)
+    return OptimizationState(sv.linfo, sv.src, nothing, sv.stmt_info, sv.mod,
+                             sv.sptypes, sv.slottypes, inlining, sv.cfg,
+                             sv.unreachable, sv.bb_vartables, sv.insert_coverage)
+end
+function OptimizationState(linfo::MethodInstance, src::CodeInfo, interp::AbstractInterpreter)
+    # prepare src for running optimization passes if it isn't already
+    nssavalues = src.ssavaluetypes
+    if nssavalues isa Int
+        src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
+    else
+        nssavalues = length(src.ssavaluetypes::Vector{Any})
+    end
+    sptypes = sptypes_from_meth_instance(linfo)
+    nslots = length(src.slotflags)
+    slottypes = src.slottypes
+    if slottypes === nothing
+        slottypes = Any[ Any for i = 1:nslots ]
+    end
+    stmt_info = CallInfo[ NoCallInfo() for i = 1:nssavalues ]
+    # cache some useful state computations
+    def = linfo.def
+    mod = isa(def, Method) ? def.module : def
+    # Allow using the global MI cache, but don't track edges.
+    # This method is mostly used for unit testing the optimizer
+    inlining = InliningState(interp)
+    cfg = compute_basic_blocks(src.code)
+    unreachable = BitSet()
+    bb_vartables = Union{VarTable,Nothing}[]
+    for block = 1:length(cfg.blocks)
+        push!(bb_vartables, VarState[
+            VarState(slottypes[slot], src.slotflags[slot] & SLOT_USEDUNDEF != 0)
+            for slot = 1:nslots
+        ])
+    end
+    return OptimizationState(linfo, src, nothing, stmt_info, mod, sptypes, slottypes, inlining, cfg, unreachable, bb_vartables, false)
+end
+function OptimizationState(linfo::MethodInstance, interp::AbstractInterpreter)
+    world = get_world_counter(interp)
+    src = retrieve_code_info(linfo, world)
     src === nothing && return nothing
-    return OptimizationState(linfo, src, params, interp)
+    return OptimizationState(linfo, src, interp)
 end
 
+function argextype end # imported by EscapeAnalysis
+function try_compute_field end # imported by EscapeAnalysis
+
+include("compiler/ssair/heap.jl")
+include("compiler/ssair/slot2ssa.jl")
+include("compiler/ssair/inlining.jl")
+include("compiler/ssair/verify.jl")
+include("compiler/ssair/legacy.jl")
+include("compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl")
+include("compiler/ssair/passes.jl")
+include("compiler/ssair/irinterp.jl")
+
 function ir_to_codeinf!(opt::OptimizationState)
     (; linfo, src) = opt
-    optdef = linfo.def
-    replace_code_newstyle!(src, opt.ir::IRCode, isa(optdef, Method) ? Int(optdef.nargs) : 0)
+    src = ir_to_codeinf!(src, opt.ir::IRCode)
     opt.ir = nothing
+    validate_code_in_debug_mode(linfo, src, "optimized")
+    return src
+end
+
+function ir_to_codeinf!(src::CodeInfo, ir::IRCode)
+    replace_code_newstyle!(src, ir)
     widen_all_consts!(src)
     src.inferred = true
-    # finish updating the result struct
-    validate_code_in_debug_mode(linfo, src, "optimized")
+    return src
+end
+
+# widen all Const elements in type annotations
+function widen_all_consts!(src::CodeInfo)
+    ssavaluetypes = src.ssavaluetypes::Vector{Any}
+    for i = 1:length(ssavaluetypes)
+        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
+    end
+
+    for i = 1:length(src.code)
+        x = src.code[i]
+        if isa(x, PiNode)
+            src.code[i] = PiNode(x.val, widenconst(x.typ))
+        end
+    end
+
+    src.rettype = widenconst(src.rettype)
+
     return src
 end
 
@@ -166,213 +222,161 @@ end
 
 _topmod(sv::OptimizationState) = _topmod(sv.mod)
 
-is_stmt_inline(stmt_flag::UInt8)      = stmt_flag & IR_FLAG_INLINE      ≠ 0
-is_stmt_noinline(stmt_flag::UInt8)    = stmt_flag & IR_FLAG_NOINLINE    ≠ 0
-is_stmt_throw_block(stmt_flag::UInt8) = stmt_flag & IR_FLAG_THROW_BLOCK ≠ 0
-
-# These affect control flow within the function (so may not be removed
-# if there is no usage within the function), but don't affect the purity
-# of the function as a whole.
-function stmt_affects_purity(@nospecialize(stmt), ir)
-    if isa(stmt, GotoNode) || isa(stmt, ReturnNode)
-        return false
-    end
-    if isa(stmt, GotoIfNot)
-        t = argextype(stmt.cond, ir)
-        return !(t ⊑ Bool)
+is_stmt_inline(stmt_flag::UInt32)      = has_flag(stmt_flag, IR_FLAG_INLINE)
+is_stmt_noinline(stmt_flag::UInt32)    = has_flag(stmt_flag, IR_FLAG_NOINLINE)
+is_stmt_throw_block(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_THROW_BLOCK)
+
+function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
+    Targ = args[1]
+    atyp = argextype(Targ, src)
+    # `Expr(:new)` of unknown type could raise arbitrary TypeError.
+    typ, isexact = instanceof_tfunc(atyp, true)
+    if !isexact
+        atyp = unwrap_unionall(widenconst(atyp))
+        if isType(atyp) && isTypeDataType(atyp.parameters[1])
+            typ = atyp.parameters[1]
+        else
+            return (false, false, false)
+        end
+        isabstracttype(typ) && return (false, false, false)
+    else
+        isconcretedispatch(typ) || return (false, false, false)
     end
-    if isa(stmt, Expr)
-        return stmt.head !== :loopinfo && stmt.head !== :enter
+    typ = typ::DataType
+    fcount = datatype_fieldcount(typ)
+    fcount === nothing && return (false, false, false)
+    fcount >= length(args) - 1 || return (false, false, false)
+    for fidx in 1:(length(args) - 1)
+        farg = args[fidx + 1]
+        eT = argextype(farg, src)
+        fT = fieldtype(typ, fidx)
+        if !isexact && has_free_typevars(fT)
+            if pattern_match !== nothing && pattern_match(src, typ, fidx, Targ, farg)
+                continue
+            end
+            return (false, false, false)
+        end
+        ⊑(𝕃ₒ, eT, fT) || return (false, false, false)
     end
-    return true
+    return (false, true, true)
 end
 
 """
-    stmt_effect_flags(stmt, rt, src::Union{IRCode,IncrementalCompact})
+    stmt_effect_flags(stmt, rt, src::Union{IRCode,IncrementalCompact}) ->
+        (consistent::Bool, effect_free_and_nothrow::Bool, nothrow::Bool)
 
-Returns a tuple of (effect_free_and_nothrow, nothrow) for a given statement.
+Returns a tuple of `(:consistent, :effect_free_and_nothrow, :nothrow)` flags for a given statement.
 """
-function stmt_effect_flags(@nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
+function stmt_effect_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
     # TODO: We're duplicating analysis from inference here.
-    isa(stmt, PiNode) && return (true, true)
-    isa(stmt, PhiNode) && return (true, true)
-    isa(stmt, ReturnNode) && return (false, true)
-    isa(stmt, GotoNode) && return (false, true)
-    isa(stmt, GotoIfNot) && return (false, argextype(stmt.cond, src) ⊑ Bool)
-    isa(stmt, Slot) && return (false, false) # Slots shouldn't occur in the IR at this point, but let's be defensive here
+    isa(stmt, PiNode) && return (true, true, true)
+    isa(stmt, PhiNode) && return (true, true, true)
+    isa(stmt, ReturnNode) && return (true, false, true)
+    isa(stmt, EnterNode) && return (true, false, true)
+    isa(stmt, GotoNode) && return (true, false, true)
+    isa(stmt, GotoIfNot) && return (true, false, ⊑(𝕃ₒ, argextype(stmt.cond, src), Bool))
     if isa(stmt, GlobalRef)
         nothrow = isdefined(stmt.mod, stmt.name)
-        return (nothrow, nothrow)
-    end
-    if isa(stmt, Expr)
+        consistent = nothrow && isconst(stmt.mod, stmt.name)
+        return (consistent, nothrow, nothrow)
+    elseif isa(stmt, Expr)
         (; head, args) = stmt
         if head === :static_parameter
-            etyp = (isa(src, IRCode) ? src.sptypes : src.ir.sptypes)[args[1]::Int]
             # if we aren't certain enough about the type, it might be an UndefVarError at runtime
-            nothrow = isa(etyp, Const)
-            return (nothrow, nothrow)
+            sptypes = isa(src, IRCode) ? src.sptypes : src.ir.sptypes
+            nothrow = !sptypes[args[1]::Int].undef
+            return (true, nothrow, nothrow)
         end
         if head === :call
             f = argextype(args[1], src)
             f = singleton_type(f)
-            f === nothing && return (false, false)
-            if isa(f, IntrinsicFunction)
-                nothrow = intrinsic_nothrow(f,
-                    Any[argextype(args[i], src) for i = 2:length(args)])
-                nothrow || return (false, false)
-                return (intrinsic_effect_free_if_nothrow(f), nothrow)
-            end
-            contains_is(_PURE_BUILTINS, f) && return (true, true)
-            # `get_binding_type` sets the type to Any if the binding doesn't exist yet
-            if f === Core.get_binding_type
-                length(args) == 3 || return false
-                M, s = argextype(args[2], src), argextype(args[3], src)
-                total = get_binding_type_effect_free(M, s)
-                return (total, total)
-            end
-            rt === Bottom && return (false, false)
-            nothrow = _builtin_nothrow(f, Any[argextype(args[i], src) for i = 2:length(args)], rt)
-            nothrow || return (false, false)
-            return (contains_is(_EFFECT_FREE_BUILTINS, f), nothrow)
+            f === nothing && return (false, false, false)
+            if f === UnionAll
+                # TODO: This is a weird special case - should be determined in inference
+                argtypes = Any[argextype(args[arg], src) for arg in 2:length(args)]
+                nothrow = _builtin_nothrow(𝕃ₒ, f, argtypes, rt)
+                return (true, nothrow, nothrow)
+            end
+            if f === Intrinsics.cglobal || f === Intrinsics.llvmcall
+                # TODO: these are not yet linearized
+                return (false, false, false)
+            end
+            isa(f, Builtin) || return (false, false, false)
+            # Needs to be handled in inlining to look at the callee effects
+            f === Core._apply_iterate && return (false, false, false)
+            argtypes = Any[argextype(args[arg], src) for arg in 2:length(args)]
+            effects = builtin_effects(𝕃ₒ, f, argtypes, rt)
+            consistent = is_consistent(effects)
+            effect_free = is_effect_free(effects)
+            nothrow = is_nothrow(effects)
+            return (consistent, effect_free & nothrow, nothrow)
         elseif head === :new
-            typ = argextype(args[1], src)
-            # `Expr(:new)` of unknown type could raise arbitrary TypeError.
-            typ, isexact = instanceof_tfunc(typ)
-            isexact || return (false, false)
-            isconcretedispatch(typ) || return (false, false)
-            typ = typ::DataType
-            fieldcount(typ) >= length(args) - 1 || return (false, false)
-            for fld_idx in 1:(length(args) - 1)
-                eT = argextype(args[fld_idx + 1], src)
-                fT = fieldtype(typ, fld_idx)
-                eT ⊑ fT || return (false, false)
-            end
-            return (true, true)
+            return new_expr_effect_flags(𝕃ₒ, args, src)
         elseif head === :foreigncall
-            total = foreigncall_effect_free(stmt, src)
-            return (total, total)
+            effects = foreigncall_effects(stmt) do @nospecialize x
+                argextype(x, src)
+            end
+            consistent = is_consistent(effects)
+            effect_free = is_effect_free(effects)
+            nothrow = is_nothrow(effects)
+            return (consistent, effect_free & nothrow, nothrow)
         elseif head === :new_opaque_closure
-            length(args) < 4 && return (false, false)
+            length(args) < 4 && return (false, false, false)
             typ = argextype(args[1], src)
-            typ, isexact = instanceof_tfunc(typ)
-            isexact || return (false, false)
-            typ ⊑ Tuple || return (false, false)
+            typ, isexact = instanceof_tfunc(typ, true)
+            isexact || return (false, false, false)
+            ⊑(𝕃ₒ, typ, Tuple) || return (false, false, false)
             rt_lb = argextype(args[2], src)
             rt_ub = argextype(args[3], src)
-            src = argextype(args[4], src)
-            if !(rt_lb ⊑ Type && rt_ub ⊑ Type && src ⊑ Method)
-                return (false, false)
+            source = argextype(args[4], src)
+            if !(⊑(𝕃ₒ, rt_lb, Type) && ⊑(𝕃ₒ, rt_ub, Type) && ⊑(𝕃ₒ, source, Method))
+                return (false, false, false)
             end
-            return (true, true)
-        elseif head === :isdefined || head === :the_exception || head === :copyast || head === :inbounds || head === :boundscheck
-            return (true, true)
+            return (false, true, true)
+        elseif head === :inbounds
+            return (true, true, true)
+        elseif head === :boundscheck || head === :isdefined || head === :the_exception || head === :copyast
+            return (false, true, true)
         else
             # e.g. :loopinfo
-            return (false, false)
+            return (false, false, false)
         end
     end
-    return (true, true)
-end
-
-function foreigncall_effect_free(stmt::Expr, src::Union{IRCode,IncrementalCompact})
-    args = stmt.args
-    name = args[1]
-    isa(name, QuoteNode) && (name = name.value)
-    isa(name, Symbol) || return false
-    ndims = alloc_array_ndims(name)
-    if ndims !== nothing
-        if ndims == 0
-            return new_array_no_throw(args, src)
-        else
-            return alloc_array_no_throw(args, ndims, src)
-        end
-    end
-    return false
-end
-
-function alloc_array_ndims(name::Symbol)
-    if name === :jl_alloc_array_1d
-        return 1
-    elseif name === :jl_alloc_array_2d
-        return 2
-    elseif name === :jl_alloc_array_3d
-        return 3
-    elseif name === :jl_new_array
-        return 0
-    end
-    return nothing
-end
-
-const FOREIGNCALL_ARG_START = 6
-
-function alloc_array_no_throw(args::Vector{Any}, ndims::Int, src::Union{IRCode,IncrementalCompact})
-    length(args) ≥ ndims+FOREIGNCALL_ARG_START || return false
-    atype = instanceof_tfunc(argextype(args[FOREIGNCALL_ARG_START], src))[1]
-    dims = Csize_t[]
-    for i in 1:ndims
-        dim = argextype(args[i+FOREIGNCALL_ARG_START], src)
-        isa(dim, Const) || return false
-        dimval = dim.val
-        isa(dimval, Int) || return false
-        push!(dims, reinterpret(Csize_t, dimval))
-    end
-    return _new_array_no_throw(atype, ndims, dims)
-end
-
-function new_array_no_throw(args::Vector{Any}, src::Union{IRCode,IncrementalCompact})
-    length(args) ≥ FOREIGNCALL_ARG_START+1 || return false
-    atype = instanceof_tfunc(argextype(args[FOREIGNCALL_ARG_START], src))[1]
-    dims = argextype(args[FOREIGNCALL_ARG_START+1], src)
-    isa(dims, Const) || return dims === Tuple{}
-    dimsval = dims.val
-    isa(dimsval, Tuple{Vararg{Int}}) || return false
-    ndims = nfields(dimsval)
-    isa(ndims, Int) || return false
-    dims = Csize_t[reinterpret(Csize_t, dimval) for dimval in dimsval]
-    return _new_array_no_throw(atype, ndims, dims)
-end
-
-function _new_array_no_throw(@nospecialize(atype), ndims::Int, dims::Vector{Csize_t})
-    isa(atype, DataType) || return false
-    eltype = atype.parameters[1]
-    iskindtype(typeof(eltype)) || return false
-    elsz = aligned_sizeof(eltype)
-    return ccall(:jl_array_validate_dims, Cint,
-        (Ptr{Csize_t}, Ptr{Csize_t}, UInt32, Ptr{Csize_t}, Csize_t),
-        #=nel=#RefValue{Csize_t}(), #=tot=#RefValue{Csize_t}(), ndims, dims, elsz) == 0
+    isa(stmt, SlotNumber) && error("unexpected IR elements")
+    return (true, true, true)
 end
 
 """
     argextype(x, src::Union{IRCode,IncrementalCompact}) -> t
-    argextype(x, src::CodeInfo, sptypes::Vector{Any}) -> t
+    argextype(x, src::CodeInfo, sptypes::Vector{VarState}) -> t
 
 Return the type of value `x` in the context of inferred source `src`.
 Note that `t` might be an extended lattice element.
 Use `widenconst(t)` to get the native Julia type of `x`.
 """
-argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{Any} = ir.sptypes) =
+argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{VarState} = ir.sptypes) =
     argextype(x, ir, sptypes, ir.argtypes)
-function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{Any} = compact.ir.sptypes)
+function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{VarState} = compact.ir.sptypes)
     isa(x, AnySSAValue) && return types(compact)[x]
     return argextype(x, compact, sptypes, compact.ir.argtypes)
 end
-argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{Any}) = argextype(x, src, sptypes, src.slottypes::Vector{Any})
+argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{VarState}) = argextype(x, src, sptypes, src.slottypes::Vector{Any})
 function argextype(
     @nospecialize(x), src::Union{IRCode,IncrementalCompact,CodeInfo},
-    sptypes::Vector{Any}, slottypes::Vector{Any})
+    sptypes::Vector{VarState}, slottypes::Vector{Any})
     if isa(x, Expr)
         if x.head === :static_parameter
-            return sptypes[x.args[1]::Int]
+            return sptypes[x.args[1]::Int].typ
         elseif x.head === :boundscheck
             return Bool
         elseif x.head === :copyast
             return argextype(x.args[1], src, sptypes, slottypes)
         end
-        @assert false "argextype only works on argument-position values"
+        Core.println("argextype called on Expr with head ", x.head,
+                     " which is not valid for IR in argument-position.")
+        @assert false
     elseif isa(x, SlotNumber)
         return slottypes[x.id]
-    elseif isa(x, TypedSlot)
-        return x.typ
     elseif isa(x, SSAValue)
         return abstract_eval_ssavalue(x, src)
     elseif isa(x, Argument)
@@ -380,7 +384,7 @@ function argextype(
     elseif isa(x, QuoteNode)
         return Const(x.value)
     elseif isa(x, GlobalRef)
-        return abstract_eval_global(x.mod, x.name)
+        return abstract_eval_globalref_type(x)
     elseif isa(x, PhiNode)
         return Any
     elseif isa(x, PiNode)
@@ -389,117 +393,55 @@ function argextype(
         return Const(x)
     end
 end
+abstract_eval_ssavalue(s::SSAValue, src::CodeInfo) = abstract_eval_ssavalue(s, src.ssavaluetypes::Vector{Any})
 abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
 
-struct ConstAPI
-    val
-    ConstAPI(@nospecialize val) = new(val)
-end
-
 """
     finish(interp::AbstractInterpreter, opt::OptimizationState,
-           params::OptimizationParams, ir::IRCode, caller::InferenceResult) -> analyzed::Union{Nothing,ConstAPI}
+           ir::IRCode, caller::InferenceResult)
 
-Post process information derived by Julia-level optimizations for later uses:
-- computes "purity", i.e. side-effect-freeness
-- computes inlining cost
-
-In a case when the purity is proven, `finish` can return `ConstAPI` object wrapping the constant
-value so that the runtime system will use the constant calling convention for the method calls.
+Post-process information derived by Julia-level optimizations for later use.
+In particular, this function determines the inlineability of the optimized code.
 """
 function finish(interp::AbstractInterpreter, opt::OptimizationState,
-                params::OptimizationParams, ir::IRCode, caller::InferenceResult)
+                ir::IRCode, caller::InferenceResult)
     (; src, linfo) = opt
     (; def, specTypes) = linfo
 
-    analyzed = nothing # `ConstAPI` if this call can use constant calling convention
-    force_noinline = _any(x::Expr -> x.head === :meta && x.args[1] === :noinline, ir.meta)
+    force_noinline = is_declared_noinline(src)
 
     # compute inlining and other related optimizations
     result = caller.result
     @assert !(result isa LimitedAccuracy)
-    result = isa(result, InterConditional) ? widenconditional(result) : result
-    if (isa(result, Const) || isconstType(result))
-        proven_pure = false
-        # must be proven pure to use constant calling convention;
-        # otherwise we might skip throwing errors (issue #20704)
-        # TODO: Improve this analysis; if a function is marked @pure we should really
-        # only care about certain errors (e.g. method errors and type errors).
-        if length(ir.stmts) < 15
-            proven_pure = true
-            for i in 1:length(ir.stmts)
-                node = ir.stmts[i]
-                stmt = node[:inst]
-                if stmt_affects_purity(stmt, ir) && !stmt_effect_flags(stmt, node[:type], ir)[1]
-                    proven_pure = false
-                    break
-                end
-            end
-            if proven_pure
-                for fl in src.slotflags
-                    if (fl & SLOT_USEDUNDEF) != 0
-                        proven_pure = false
-                        break
-                    end
-                end
-            end
-        end
-
-        if proven_pure
-            # use constant calling convention
-            # Do not emit `jl_fptr_const_return` if coverage is enabled
-            # so that we don't need to add coverage support
-            # to the `jl_call_method_internal` fast path
-            # Still set pure flag to make sure `inference` tests pass
-            # and to possibly enable more optimization in the future
-            src.pure = true
-            if isa(result, Const)
-                val = result.val
-                if is_inlineable_constant(val)
-                    analyzed = ConstAPI(val)
-                end
-            else
-                @assert isconstType(result)
-                analyzed = ConstAPI(result.parameters[1])
-            end
-            force_noinline || (src.inlineable = true)
-        end
-    end
+    result = widenslotwrapper(result)
 
     opt.ir = ir
 
     # determine and cache inlineability
-    union_penalties = false
     if !force_noinline
         sig = unwrap_unionall(specTypes)
-        if isa(sig, DataType) && sig.name === Tuple.name
-            for P in sig.parameters
-                P = unwrap_unionall(P)
-                if isa(P, Union)
-                    union_penalties = true
-                    break
-                end
-            end
-        else
+        if !(isa(sig, DataType) && sig.name === Tuple.name)
             force_noinline = true
         end
-        if !src.inlineable && result === Bottom
+        if !is_declared_inline(src) && result === Bottom
             force_noinline = true
         end
     end
     if force_noinline
-        src.inlineable = false
+        set_inlineable!(src, false)
     elseif isa(def, Method)
-        if src.inlineable && isdispatchtuple(specTypes)
+        if is_declared_inline(src) && isdispatchtuple(specTypes)
             # obey @inline declaration if a dispatch barrier would not help
+            set_inlineable!(src, true)
         else
             # compute the cost (size) of inlining this code
+            params = OptimizationParams(interp)
             cost_threshold = default = params.inline_cost_threshold
-            if result ⊑ Tuple && !isconcretetype(widenconst(result))
+            if ⊑(optimizer_lattice(interp), result, Tuple) && !isconcretetype(widenconst(result))
                 cost_threshold += params.inline_tupleret_bonus
             end
             # if the method is declared as `@inline`, increase the cost threshold 20x
-            if src.inlineable
+            if is_declared_inline(src)
                 cost_threshold += 19*default
             end
             # a few functions get special treatment
@@ -509,45 +451,453 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState,
                     cost_threshold += 4*default
                 end
             end
-            src.inlineable = inline_worthy(ir, params, union_penalties, cost_threshold)
+            src.inlining_cost = inline_cost(ir, params, cost_threshold)
         end
     end
+    return nothing
+end
 
-    return analyzed
+function visit_bb_phis!(callback, ir::IRCode, bb::Int)
+    stmts = ir.cfg.blocks[bb].stmts
+    for idx in stmts
+        stmt = ir[SSAValue(idx)][:stmt]
+        if !isa(stmt, PhiNode)
+            if !is_valid_phiblock_stmt(stmt)
+                return
+            end
+        else
+            callback(idx)
+        end
+    end
 end
 
-# run the optimization work
-function optimize(interp::AbstractInterpreter, opt::OptimizationState,
-                  params::OptimizationParams, caller::InferenceResult)
-    @timeit "optimizer" ir = run_passes(opt.src, opt, caller)
-    return finish(interp, opt, params, ir, caller)
+function any_stmt_may_throw(ir::IRCode, bb::Int)
+    for stmt in ir.cfg.blocks[bb].stmts
+        if has_flag(ir[SSAValue(stmt)], IR_FLAG_NOTHROW)
+            return true
+        end
+    end
+    return false
 end
 
-using .EscapeAnalysis
-import .EscapeAnalysis: EscapeState, ArgEscapeCache, is_ipo_profitable
+function conditional_successors_may_throw(lazypostdomtree::LazyPostDomtree, ir::IRCode, bb::Int)
+    visited = BitSet((bb,))
+    worklist = Int[bb]
+    while !isempty(worklist)
+        thisbb = pop!(worklist)
+        for succ in ir.cfg.blocks[thisbb].succs
+            succ in visited && continue
+            push!(visited, succ)
+            postdominates(get!(lazypostdomtree), succ, thisbb) && continue
+            any_stmt_may_throw(ir, succ) && return true
+            push!(worklist, succ)
+        end
+    end
+    return false
+end
 
-"""
-    cache_escapes!(caller::InferenceResult, estate::EscapeState)
+struct AugmentedDomtree
+    cfg::CFG
+    domtree::DomTree
+end
 
-Transforms escape information of call arguments of `caller`,
-and then caches it into a global cache for later interprocedural propagation.
-"""
-cache_escapes!(caller::InferenceResult, estate::EscapeState) =
-    caller.argescapes = ArgEscapeCache(estate)
+mutable struct LazyAugmentedDomtree
+    const ir::IRCode
+    agdomtree::AugmentedDomtree
+    LazyAugmentedDomtree(ir::IRCode) = new(ir)
+end
+
+function get!(lazyagdomtree::LazyAugmentedDomtree)
+    isdefined(lazyagdomtree, :agdomtree) && return lazyagdomtree.agdomtree
+    ir = lazyagdomtree.ir
+    cfg = copy(ir.cfg)
+    # Add a virtual basic block to represent the exit
+    push!(cfg.blocks, BasicBlock(StmtRange(0:-1)))
+    for bb = 1:(length(cfg.blocks)-1)
+        terminator = ir[SSAValue(last(cfg.blocks[bb].stmts))][:stmt]
+        if isa(terminator, ReturnNode) && isdefined(terminator, :val)
+            cfg_insert_edge!(cfg, bb, length(cfg.blocks))
+        end
+    end
+    domtree = construct_domtree(cfg.blocks)
+    return lazyagdomtree.agdomtree = AugmentedDomtree(cfg, domtree)
+end
+
+mutable struct PostOptAnalysisState
+    const result::InferenceResult
+    const ir::IRCode
+    const inconsistent::BitSetBoundedMinPrioritySet
+    const tpdum::TwoPhaseDefUseMap
+    const lazypostdomtree::LazyPostDomtree
+    const lazyagdomtree::LazyAugmentedDomtree
+    const ea_analysis_pending::Vector{Int}
+    all_retpaths_consistent::Bool
+    all_effect_free::Bool
+    effect_free_if_argmem_only::Union{Nothing,Bool}
+    all_nothrow::Bool
+    all_noub::Bool
+    any_conditional_ub::Bool
+    function PostOptAnalysisState(result::InferenceResult, ir::IRCode)
+        inconsistent = BitSetBoundedMinPrioritySet(length(ir.stmts))
+        tpdum = TwoPhaseDefUseMap(length(ir.stmts))
+        lazypostdomtree = LazyPostDomtree(ir)
+        lazyagdomtree = LazyAugmentedDomtree(ir)
+        return new(result, ir, inconsistent, tpdum, lazypostdomtree, lazyagdomtree, Int[],
+                   true, true, nothing, true, true, false)
+    end
+end
+
+give_up_refinements!(sv::PostOptAnalysisState) =
+    sv.all_retpaths_consistent = sv.all_effect_free = sv.effect_free_if_argmem_only =
+    sv.all_nothrow = sv.all_noub = false
+
+function any_refinable(sv::PostOptAnalysisState)
+    effects = sv.result.ipo_effects
+    return ((!is_consistent(effects) & sv.all_retpaths_consistent) |
+            (!is_effect_free(effects) & sv.all_effect_free) |
+            (!is_nothrow(effects) & sv.all_nothrow) |
+            (!is_noub(effects) & sv.all_noub))
+end
+
+struct GetNativeEscapeCache{CodeCache}
+    code_cache::CodeCache
+    GetNativeEscapeCache(code_cache::CodeCache) where CodeCache = new{CodeCache}(code_cache)
+end
+GetNativeEscapeCache(interp::AbstractInterpreter) = GetNativeEscapeCache(code_cache(interp))
+function ((; code_cache)::GetNativeEscapeCache)(mi::MethodInstance)
+    codeinst = get(code_cache, mi, nothing)
+    codeinst isa CodeInstance || return false
+    argescapes = traverse_analysis_results(codeinst) do @nospecialize result
+        return result isa EscapeAnalysis.ArgEscapeCache ? result : nothing
+    end
+    if argescapes !== nothing
+        return argescapes
+    end
+    effects = decode_effects(codeinst.ipo_purity_bits)
+    if is_effect_free(effects) && is_inaccessiblememonly(effects)
+        # We might not have run EA on simple frames without any escapes (e.g. when optimization
+        # is skipped when result is constant-folded by abstract interpretation). If those
+        # frames aren't inlined, the accuracy of EA for caller context takes a big hit.
+        # This is a HACK to avoid that, but obviously, a more comprehensive fix would be ideal.
+        return true
+    end
+    return false
+end
+
+function refine_effects!(interp::AbstractInterpreter, sv::PostOptAnalysisState)
+    if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending)
+        ir = sv.ir
+        nargs = length(ir.argtypes)
+        estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), GetNativeEscapeCache(interp))
+        argescapes = EscapeAnalysis.ArgEscapeCache(estate)
+        stack_analysis_result!(sv.result, argescapes)
+        validate_mutable_arg_escapes!(estate, sv)
+    end
+
+    any_refinable(sv) || return false
+    effects = sv.result.ipo_effects
+    sv.result.ipo_effects = Effects(effects;
+        consistent = sv.all_retpaths_consistent ? ALWAYS_TRUE : effects.consistent,
+        effect_free = sv.all_effect_free ? ALWAYS_TRUE :
+                      sv.effect_free_if_argmem_only === true ? EFFECT_FREE_IF_INACCESSIBLEMEMONLY : effects.effect_free,
+        nothrow = sv.all_nothrow ? true : effects.nothrow,
+        noub = sv.all_noub ? (sv.any_conditional_ub ? NOUB_IF_NOINBOUNDS : ALWAYS_TRUE) : effects.noub)
+    return true
+end
+
+function is_ipo_dataflow_analysis_profitable(effects::Effects)
+    return !(is_consistent(effects) && is_effect_free(effects) &&
+             is_nothrow(effects) && is_noub(effects))
+end
 
-function ipo_escape_cache(mi_cache::MICache) where MICache
-    return function (linfo::Union{InferenceResult,MethodInstance})
-        if isa(linfo, InferenceResult)
-            argescapes = linfo.argescapes
+function iscall_with_boundscheck(@nospecialize(stmt), sv::PostOptAnalysisState)
+    isexpr(stmt, :call) || return false
+    ft = argextype(stmt.args[1], sv.ir)
+    f = singleton_type(ft)
+    f === nothing && return false
+    if f === getfield
+        nargs = 4
+    elseif f === memoryref || f === memoryrefget || f === memoryref_isassigned
+        nargs = 4
+    elseif f === memoryrefset!
+        nargs = 5
+    else
+        return false
+    end
+    length(stmt.args) < nargs && return false
+    boundscheck = stmt.args[end]
+    argextype(boundscheck, sv.ir) === Bool || return false
+    isa(boundscheck, SSAValue) || return false
+    return true
+end
+
+function check_all_args_noescape!(sv::PostOptAnalysisState, ir::IRCode, @nospecialize(stmt),
+                                  estate::EscapeAnalysis.EscapeState)
+    stmt isa Expr || return false
+    if isexpr(stmt, :invoke)
+        startidx = 2
+    elseif isexpr(stmt, :new)
+        startidx = 1
+    else
+        return false
+    end
+    for i = startidx:length(stmt.args)
+        arg = stmt.args[i]
+        argt = argextype(arg, ir)
+        if is_mutation_free_argtype(argt)
+            continue
+        end
+        # See if we can find the allocation
+        if isa(arg, Argument)
+            if EscapeAnalysis.has_no_escape(EscapeAnalysis.ignore_argescape(estate[arg]))
+                # Even if we prove everything else effect_free, the best we can
+                # say is :effect_free_if_argmem_only
+                if sv.effect_free_if_argmem_only === nothing
+                    sv.effect_free_if_argmem_only = true
+                end
+            else
+                sv.effect_free_if_argmem_only = false
+            end
+            return false
+        elseif isa(arg, SSAValue)
+            EscapeAnalysis.has_no_escape(estate[arg]) || return false
+            check_all_args_noescape!(sv, ir, ir[arg][:stmt], estate) || return false
         else
-            codeinst = get(mi_cache, linfo, nothing)
-            isa(codeinst, CodeInstance) || return nothing
-            argescapes = codeinst.argescapes
+            return false
         end
-        return argescapes !== nothing ? argescapes::ArgEscapeCache : nothing
     end
+    return true
+end
+
+function validate_mutable_arg_escapes!(estate::EscapeAnalysis.EscapeState, sv::PostOptAnalysisState)
+    ir = sv.ir
+    for idx in sv.ea_analysis_pending
+        # See if any mutable memory was allocated in this function and determined
+        # not to escape.
+        inst = ir[SSAValue(idx)]
+        stmt = inst[:stmt]
+        if !check_all_args_noescape!(sv, ir, stmt, estate)
+            return sv.all_effect_free = false
+        end
+    end
+    return true
+end
+
+function is_conditional_noub(inst::Instruction, sv::PostOptAnalysisState)
+    stmt = inst[:stmt]
+    iscall_with_boundscheck(stmt, sv) || return false
+    barg = stmt.args[end]::SSAValue
+    bstmt = sv.ir[barg][:stmt]
+    isexpr(bstmt, :boundscheck) || return false
+    # If IR_FLAG_INBOUNDS is already set, no more conditional ub
+    (!isempty(bstmt.args) && bstmt.args[1] === false) && return false
+    return true
+end
+
+const IR_FLAGS_NEEDS_EA = IR_FLAG_EFIIMO | IR_FLAG_INACCESSIBLE_OR_ARGMEM
+
+function scan_non_dataflow_flags!(inst::Instruction, sv::PostOptAnalysisState)
+    flag = inst[:flag]
+    # If we can prove that the argmem does not escape the current function, we can
+    # refine this to :effect_free.
+    needs_ea_validation = has_flag(flag, IR_FLAGS_NEEDS_EA)
+    stmt = inst[:stmt]
+    if !needs_ea_validation
+        if !isterminator(stmt) && stmt !== nothing
+            # ignore control flow node – they are not removable on their own and thus not
+            # have `IR_FLAG_EFFECT_FREE` but still do not taint `:effect_free`-ness of
+            # the whole method invocation
+            sv.all_effect_free &= has_flag(flag, IR_FLAG_EFFECT_FREE)
+        end
+    elseif sv.all_effect_free
+        if (isexpr(stmt, :invoke) || isexpr(stmt, :new) ||
+            # HACK for performance: limit the scope of EA to code with object field access only,
+            # since its abilities to reason about e.g. arrays are currently very limited anyways.
+            is_known_call(stmt, setfield!, sv.ir))
+            push!(sv.ea_analysis_pending, inst.idx)
+        else
+            sv.all_effect_free = false
+        end
+    end
+    sv.all_nothrow &= has_flag(flag, IR_FLAG_NOTHROW)
+    if !has_flag(flag, IR_FLAG_NOUB)
+        # Special case: `:boundscheck` into `getfield` or memory operations is `:noub_if_noinbounds`
+        if is_conditional_noub(inst, sv)
+            sv.any_conditional_ub = true
+        else
+            sv.all_noub = false
+        end
+    end
+end
+
+function scan_inconsistency!(inst::Instruction, sv::PostOptAnalysisState)
+    flag = inst[:flag]
+    stmt_inconsistent = !has_flag(flag, IR_FLAG_CONSISTENT)
+    stmt = inst[:stmt]
+    # Special case: For `getfield` and memory operations, we allow inconsistency of the :boundscheck argument
+    (; inconsistent, tpdum) = sv
+    if iscall_with_boundscheck(stmt, sv)
+        for i = 1:(length(stmt.args)-1)
+            val = stmt.args[i]
+            if isa(val, SSAValue)
+                stmt_inconsistent |= val.id in inconsistent
+                count!(tpdum, val)
+            end
+        end
+    else
+        for ur in userefs(stmt)
+            val = ur[]
+            if isa(val, SSAValue)
+                stmt_inconsistent |= val.id in inconsistent
+                count!(tpdum, val)
+            end
+        end
+    end
+    stmt_inconsistent && push!(inconsistent, inst.idx)
+    return stmt_inconsistent
+end
+
+struct ScanStmt
+    sv::PostOptAnalysisState
+end
+
+function ((; sv)::ScanStmt)(inst::Instruction, lstmt::Int, bb::Int)
+    stmt = inst[:stmt]
+
+    if isa(stmt, EnterNode)
+        # try/catch not yet modeled
+        give_up_refinements!(sv)
+        return nothing
+    end
+
+    scan_non_dataflow_flags!(inst, sv)
+
+    stmt_inconsistent = scan_inconsistency!(inst, sv)
+
+    if stmt_inconsistent && inst.idx == lstmt
+        if isa(stmt, ReturnNode) && isdefined(stmt, :val)
+            sv.all_retpaths_consistent = false
+        elseif isa(stmt, GotoIfNot)
+            # Conditional Branch with inconsistent condition.
+            # If we do not know this function terminates, taint consistency, now,
+            # :consistent requires consistent termination. TODO: Just look at the
+            # inconsistent region.
+            if !sv.result.ipo_effects.terminates
+                sv.all_retpaths_consistent = false
+            elseif conditional_successors_may_throw(sv.lazypostdomtree, sv.ir, bb)
+                # Check if there are potential throws that require
+                sv.all_retpaths_consistent = false
+            else
+                (; cfg, domtree) = get!(sv.lazyagdomtree)
+                for succ in iterated_dominance_frontier(cfg, BlockLiveness(sv.ir.cfg.blocks[bb].succs, nothing), domtree)
+                    if succ == length(cfg.blocks)
+                        # Phi node in the virtual exit -> We have a conditional
+                        # return. TODO: Check if all the retvals are egal.
+                        sv.all_retpaths_consistent = false
+                    else
+                        visit_bb_phis!(sv.ir, succ) do phiidx::Int
+                            push!(sv.inconsistent, phiidx)
+                        end
+                    end
+                end
+            end
+        end
+    end
+
+    # bail out early if there are no possibilities to refine the effects
+    if !any_refinable(sv)
+        return nothing
+    end
+
+    return true
+end
+
+function check_inconsistentcy!(sv::PostOptAnalysisState, scanner::BBScanner)
+    scan!(ScanStmt(sv), scanner, false)
+    complete!(sv.tpdum); push!(scanner.bb_ip, 1)
+    populate_def_use_map!(sv.tpdum, scanner)
+
+    (; ir, inconsistent, tpdum) = sv
+    stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts))
+    for def in sv.inconsistent
+        for use in tpdum[def]
+            if !(use in inconsistent)
+                push!(inconsistent, use)
+                append!(stmt_ip, tpdum[use])
+            end
+        end
+    end
+    while !isempty(stmt_ip)
+        idx = popfirst!(stmt_ip)
+        inst = ir[SSAValue(idx)]
+        stmt = inst[:stmt]
+        if iscall_with_boundscheck(stmt, sv)
+            any_non_boundscheck_inconsistent = false
+            for i = 1:(length(stmt.args)-1)
+                val = stmt.args[i]
+                if isa(val, SSAValue)
+                    any_non_boundscheck_inconsistent |= val.id in inconsistent
+                    any_non_boundscheck_inconsistent && break
+                end
+            end
+            any_non_boundscheck_inconsistent || continue
+        elseif isa(stmt, ReturnNode)
+            sv.all_retpaths_consistent = false
+        else isa(stmt, GotoIfNot)
+            bb = block_for_inst(ir, idx)
+            cfg = ir.cfg
+            blockliveness = BlockLiveness(cfg.blocks[bb].succs, nothing)
+            domtree = construct_domtree(cfg.blocks)
+            for succ in iterated_dominance_frontier(cfg, blockliveness, domtree)
+                visit_bb_phis!(ir, succ) do phiidx::Int
+                    push!(inconsistent, phiidx)
+                    push!(stmt_ip, phiidx)
+                end
+            end
+        end
+        sv.all_retpaths_consistent || break
+        append!(inconsistent, tpdum[idx])
+        append!(stmt_ip, tpdum[idx])
+    end
+end
+
+function ipo_dataflow_analysis!(interp::AbstractInterpreter, ir::IRCode, result::InferenceResult)
+    if !is_ipo_dataflow_analysis_profitable(result.ipo_effects)
+        return false
+    end
+
+    @assert isempty(ir.new_nodes) "IRCode should be compacted before post-opt analysis"
+
+    sv = PostOptAnalysisState(result, ir)
+    scanner = BBScanner(ir)
+
+    completed_scan = scan!(ScanStmt(sv), scanner, true)
+
+    if !completed_scan
+        if sv.all_retpaths_consistent
+            check_inconsistentcy!(sv, scanner)
+        else
+            # No longer any dataflow concerns, just scan the flags
+            scan!(scanner, false) do inst::Instruction, lstmt::Int, bb::Int
+                scan_non_dataflow_flags!(inst, sv)
+                # bail out early if there are no possibilities to refine the effects
+                if !any_refinable(sv)
+                    return nothing
+                end
+                return true
+            end
+        end
+    end
+
+    return refine_effects!(interp, sv)
+end
+
+# run the optimization work
+function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
+    @timeit "optimizer" ir = run_passes_ipo_safe(opt.src, opt, caller)
+    ipo_dataflow_analysis!(interp, ir, caller)
+    return finish(interp, opt, ir, caller)
 end
-null_escape_cache(linfo::Union{InferenceResult,MethodInstance}) = nothing
 
 macro pass(name, expr)
     optimize_until = esc(:optimize_until)
@@ -560,31 +910,32 @@ macro pass(name, expr)
     end
 end
 
-matchpass(optimize_until::Int, stage, _name) = optimize_until < stage
-matchpass(optimize_until::String, _stage, name) = optimize_until == name
+matchpass(optimize_until::Int, stage, _) = optimize_until == stage
+matchpass(optimize_until::String, _, name) = optimize_until == name
 matchpass(::Nothing, _, _) = false
 
-function run_passes(
+function run_passes_ipo_safe(
     ci::CodeInfo,
     sv::OptimizationState,
     caller::InferenceResult,
     optimize_until = nothing,  # run all passes by default
 )
-    __stage__ = 1  # used by @pass
+    __stage__ = 0  # used by @pass
     # NOTE: The pass name MUST be unique for `optimize_until::AbstractString` to work
     @pass "convert"   ir = convert_to_ircode(ci, sv)
     @pass "slot2reg"  ir = slot2reg(ir, ci, sv)
     # TODO: Domsorting can produce an updated domtree - no need to recompute here
     @pass "compact 1" ir = compact!(ir)
-    @pass "Inlining"  ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
+    @pass "Inlining"  ir = ssa_inlining_pass!(ir, sv.inlining, ci.propagate_inbounds)
     # @timeit "verify 2" verify_ir(ir)
     @pass "compact 2" ir = compact!(ir)
     @pass "SROA"      ir = sroa_pass!(ir, sv.inlining)
-    @pass "ADCE"      ir = adce_pass!(ir)
-    @pass "type lift" ir = type_lift_pass!(ir)
-    @pass "compact 3" ir = compact!(ir)
+    @pass "ADCE"      (ir, made_changes) = adce_pass!(ir, sv.inlining)
+    if made_changes
+        @pass "compact 3" ir = compact!(ir, true)
+    end
     if JLOptions().debug_level == 2
-        @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
+        @timeit "verify 3" (verify_ir(ir, true, false, optimizer_lattice(sv.inlining.interp)); verify_linetable(ir.linetable))
     end
     @label __done__  # used by @pass
     return ir
@@ -596,94 +947,169 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
         linetable = collect(LineInfoNode, linetable::Vector{Any})::Vector{LineInfoNode}
     end
 
-    # check if coverage mode is enabled
-    coverage = coverage_enabled(sv.mod)
-    if !coverage && JLOptions().code_coverage == 3 # path-specific coverage mode
-        for line in linetable
-            if is_file_tracked(line.file)
-                # if any line falls in a tracked file enable coverage for all
-                coverage = true
-                break
+    # Update control-flow to reflect any unreachable branches.
+    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
+    code = copy_exprargs(ci.code)
+    for i = 1:length(code)
+        expr = code[i]
+        if !(i in sv.unreachable)
+            if isa(expr, GotoIfNot)
+                # Replace this live GotoIfNot with:
+                # - no-op if :nothrow and the branch target is unreachable
+                # - cond if :nothrow and both targets are unreachable
+                # - typeassert if must-throw
+                block = block_for_inst(sv.cfg, i)
+                if ssavaluetypes[i] === Bottom
+                    destblock = block_for_inst(sv.cfg, expr.dest)
+                    cfg_delete_edge!(sv.cfg, block, block + 1)
+                    ((block + 1) != destblock) && cfg_delete_edge!(sv.cfg, block, destblock)
+                    expr = Expr(:call, Core.typeassert, expr.cond, Bool)
+                elseif i + 1 in sv.unreachable
+                    @assert has_flag(ci.ssaflags[i], IR_FLAG_NOTHROW)
+                    cfg_delete_edge!(sv.cfg, block, block + 1)
+                    expr = GotoNode(expr.dest)
+                elseif expr.dest in sv.unreachable
+                    @assert has_flag(ci.ssaflags[i], IR_FLAG_NOTHROW)
+                    cfg_delete_edge!(sv.cfg, block, block_for_inst(sv.cfg, expr.dest))
+                    expr = nothing
+                end
+                code[i] = expr
+            elseif isa(expr, EnterNode)
+                catchdest = expr.catch_dest
+                if catchdest in sv.unreachable
+                    cfg_delete_edge!(sv.cfg, block_for_inst(sv.cfg, i), block_for_inst(sv.cfg, catchdest))
+                    if isdefined(expr, :scope)
+                        # We've proven that nothing inside the enter region throws,
+                        # but we don't yet know whether something might read the scope,
+                        # so we need to retain this enter for the time being. However,
+                        # we use the special marker `0` to indicate that setting up
+                        # the try/catch frame is not required.
+                        code[i] = EnterNode(expr, 0)
+                    else
+                        code[i] = nothing
+                    end
+                end
             end
         end
     end
 
     # Go through and add an unreachable node after every
     # Union{} call. Then reindex labels.
-    code = copy_exprargs(ci.code)
     stmtinfo = sv.stmt_info
     codelocs = ci.codelocs
-    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
     ssaflags = ci.ssaflags
     meta = Expr[]
     idx = 1
     oldidx = 1
     nstmts = length(code)
-    ssachangemap = labelchangemap = nothing
+    ssachangemap = labelchangemap = blockchangemap = nothing
     prevloc = zero(eltype(ci.codelocs))
     while idx <= length(code)
         codeloc = codelocs[idx]
-        if coverage && codeloc != prevloc && codeloc != 0
+        if sv.insert_coverage && codeloc != prevloc && codeloc != 0
             # insert a side-effect instruction before the current instruction in the same basic block
             insert!(code, idx, Expr(:code_coverage_effect))
             insert!(codelocs, idx, codeloc)
             insert!(ssavaluetypes, idx, Nothing)
-            insert!(stmtinfo, idx, nothing)
+            insert!(stmtinfo, idx, NoCallInfo())
             insert!(ssaflags, idx, IR_FLAG_NULL)
             if ssachangemap === nothing
                 ssachangemap = fill(0, nstmts)
             end
             if labelchangemap === nothing
-                labelchangemap = coverage ? fill(0, nstmts) : ssachangemap
+                labelchangemap = fill(0, nstmts)
             end
             ssachangemap[oldidx] += 1
             if oldidx < length(labelchangemap)
                 labelchangemap[oldidx + 1] += 1
             end
+            if blockchangemap === nothing
+                blockchangemap = fill(0, length(sv.cfg.blocks))
+            end
+            blockchangemap[block_for_inst(sv.cfg, oldidx)] += 1
             idx += 1
             prevloc = codeloc
         end
-        if code[idx] isa Expr && ssavaluetypes[idx] === Union{}
+        if ssavaluetypes[idx] === Union{} && !(oldidx in sv.unreachable)
+            # We should have converted any must-throw terminators to an equivalent w/o control-flow edges
+            @assert !isterminator(code[idx])
+
+            block = block_for_inst(sv.cfg, oldidx)
+            block_end = last(sv.cfg.blocks[block].stmts) + (idx - oldidx)
+
+            # Delete all successors to this basic block
+            for succ in sv.cfg.blocks[block].succs
+                preds = sv.cfg.blocks[succ].preds
+                deleteat!(preds, findfirst(x::Int->x==block, preds)::Int)
+            end
+            empty!(sv.cfg.blocks[block].succs)
+
             if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
-                # insert unreachable in the same basic block after the current instruction (splitting it)
-                insert!(code, idx + 1, ReturnNode())
-                insert!(codelocs, idx + 1, codelocs[idx])
-                insert!(ssavaluetypes, idx + 1, Union{})
-                insert!(stmtinfo, idx + 1, nothing)
-                insert!(ssaflags, idx + 1, ssaflags[idx])
-                if ssachangemap === nothing
-                    ssachangemap = fill(0, nstmts)
-                end
-                if labelchangemap === nothing
-                    labelchangemap = coverage ? fill(0, nstmts) : ssachangemap
-                end
-                if oldidx < length(ssachangemap)
-                    ssachangemap[oldidx + 1] += 1
-                    coverage && (labelchangemap[oldidx + 1] += 1)
+                # Any statements from here to the end of the block have been wrapped in Core.Const(...)
+                # by type inference (effectively deleting them). Only task left is to replace the block
+                # terminator with an explicit `unreachable` marker.
+                if block_end > idx
+                    code[block_end] = ReturnNode()
+                    codelocs[block_end] = codelocs[idx]
+                    ssavaluetypes[block_end] = Union{}
+                    stmtinfo[block_end] = NoCallInfo()
+                    ssaflags[block_end] = IR_FLAG_NOTHROW
+
+                    # Verify that type-inference did its job
+                    if JLOptions().debug_level == 2
+                        for i = (oldidx + 1):last(sv.cfg.blocks[block].stmts)
+                            @assert i in sv.unreachable
+                        end
+                    end
+
+                    idx += block_end - idx
+                else
+                    insert!(code, idx + 1, ReturnNode())
+                    insert!(codelocs, idx + 1, codelocs[idx])
+                    insert!(ssavaluetypes, idx + 1, Union{})
+                    insert!(stmtinfo, idx + 1, NoCallInfo())
+                    insert!(ssaflags, idx + 1, IR_FLAG_NOTHROW)
+                    if ssachangemap === nothing
+                        ssachangemap = fill(0, nstmts)
+                    end
+                    if labelchangemap === nothing
+                        labelchangemap = sv.insert_coverage ? fill(0, nstmts) : ssachangemap
+                    end
+                    if oldidx < length(ssachangemap)
+                        ssachangemap[oldidx + 1] += 1
+                        sv.insert_coverage && (labelchangemap[oldidx + 1] += 1)
+                    end
+                    if blockchangemap === nothing
+                        blockchangemap = fill(0, length(sv.cfg.blocks))
+                    end
+                    blockchangemap[block] += 1
+                    idx += 1
                 end
-                idx += 1
+                oldidx = last(sv.cfg.blocks[block].stmts)
             end
         end
         idx += 1
         oldidx += 1
     end
 
-    cfg = sv.cfg
     if ssachangemap !== nothing && labelchangemap !== nothing
         renumber_ir_elements!(code, ssachangemap, labelchangemap)
-        cfg = nothing # recompute CFG
+    end
+    if blockchangemap !== nothing
+        renumber_cfg_stmts!(sv.cfg, blockchangemap)
     end
 
     for i = 1:length(code)
         code[i] = process_meta!(meta, code[i])
     end
-    strip_trailing_junk!(ci, code, stmtinfo)
+    strip_trailing_junk!(ci, sv.cfg, code, stmtinfo)
     types = Any[]
     stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
-    if cfg === nothing
-        cfg = compute_basic_blocks(code)
-    end
-    return IRCode(stmts, cfg, linetable, sv.slottypes, meta, sv.sptypes)
+    # NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
+    # types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
+    # and eliminates slots (see below)
+    argtypes = sv.slottypes
+    return IRCode(stmts, sv.cfg, linetable, argtypes, meta, sv.sptypes)
 end
 
 function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
@@ -699,21 +1125,26 @@ function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
     svdef = sv.linfo.def
     nargs = isa(svdef, Method) ? Int(svdef.nargs) : 0
     @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks)
-    defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst)
-    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, sv.slottypes) # consumes `ir`
+    defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.stmt)
+    𝕃ₒ = optimizer_lattice(sv.inlining.interp)
+    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, sv, domtree, defuse_insts, 𝕃ₒ) # consumes `ir`
+    # NOTE now we have converted `ir` to the SSA form and eliminated slots
+    # let's resize `argtypes` now and remove unnecessary types for the eliminated slots
+    resize!(ir.argtypes, nargs)
     return ir
 end
 
 ## Computing the cost of a function body
 
-# saturating sum (inputs are nonnegative), prevents overflow with typemax(Int) below
+# saturating sum (inputs are non-negative), prevents overflow with typemax(Int) below
 plus_saturate(x::Int, y::Int) = max(x, y, x+y)
 
 # known return type
 isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
 
-function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any},
-                        union_penalties::Bool, params::OptimizationParams, error_path::Bool = false)
+function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
+                        params::OptimizationParams, error_path::Bool = false)
+    #=const=# UNKNOWN_CALL_COST = 20
     head = ex.head
     if is_meta_expr_head(head)
         return 0
@@ -723,7 +1154,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
         if ftyp === IntrinsicFunction && farg isa SSAValue
             # if this comes from code that was already inlined into another function,
             # Consts have been widened. try to recover in simple cases.
-            farg = isa(src, CodeInfo) ? src.code[farg.id] : src.stmts[farg.id][:inst]
+            farg = isa(src, CodeInfo) ? src.code[farg.id] : src[farg][:stmt]
             if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter)
                 ftyp = argextype(farg, src, sptypes)
             end
@@ -731,13 +1162,38 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
         f = singleton_type(ftyp)
         if isa(f, IntrinsicFunction)
             iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
-            if !isassigned(T_IFUNC_COST, iidx)
-                # unknown/unhandled intrinsic
-                return params.inline_nonleaf_penalty
+            if isassigned(T_IFUNC, iidx)
+                minarg, maxarg, = T_IFUNC[iidx]
+                nargs = length(ex.args)
+                if minarg + 1 <= nargs <= maxarg + 1
+                    # With mostly constant arguments, all Intrinsics tend to become very cheap
+                    # and are likely to combine with the operations around them,
+                    # so reduce their cost by half.
+                    cost = T_IFUNC_COST[iidx]
+                    if cost == 0 || nargs < 3 ||
+                       (f === Intrinsics.cglobal || f === Intrinsics.llvmcall) # these hold malformed IR, so argextype will crash on them
+                        return cost
+                    end
+                    aty2 = widenconditional(argextype(ex.args[2], src, sptypes))
+                    nconst = Int(aty2 isa Const)
+                    for i = 3:nargs
+                        aty = widenconditional(argextype(ex.args[i], src, sptypes))
+                        if widenconst(aty) != widenconst(aty2)
+                            nconst = 0
+                            break
+                        end
+                        nconst += aty isa Const
+                    end
+                    if nconst + 2 >= nargs
+                        cost = (cost - 1) ÷ 2
+                    end
+                    return cost
+                end
             end
-            return T_IFUNC_COST[iidx]
+            # unknown/unhandled intrinsic: hopefully the caller gets a slightly better answer after the inlining
+            return UNKNOWN_CALL_COST
         end
-        if isa(f, Builtin)
+        if isa(f, Builtin) && f !== invoke
             # The efficiency of operations like a[i] and s.b
             # depend strongly on whether the result can be
             # inferred, so check the type of ex
@@ -746,24 +1202,20 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
                 # tuple iteration/destructuring makes that impossible
                 # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
                 return 0
-            elseif (f === Core.arrayref || f === Core.const_arrayref || f === Core.arrayset) && length(ex.args) >= 3
-                atyp = argextype(ex.args[3], src, sptypes)
-                return isknowntype(atyp) ? 4 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
+            elseif (f === Core.memoryrefget || f === Core.memoryref_isassigned) && length(ex.args) >= 3
+                atyp = argextype(ex.args[2], src, sptypes)
+                return isknowntype(atyp) ? 1 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
+            elseif f === Core.memoryrefset! && length(ex.args) >= 3
+                atyp = argextype(ex.args[2], src, sptypes)
+                return isknowntype(atyp) ? 5 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
             elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
                 return 1
-            elseif f === Core.isa
-                # If we're in a union context, we penalize type computations
-                # on union types. In such cases, it is usually better to perform
-                # union splitting on the outside.
-                if union_penalties && isa(argextype(ex.args[2],  src, sptypes), Union)
-                    return params.inline_nonleaf_penalty
-                end
             end
             fidx = find_tfunc(f)
             if fidx === nothing
                 # unknown/unhandled builtin
                 # Use the generic cost of a direct function call
-                return 20
+                return UNKNOWN_CALL_COST
             end
             return T_FFUNC_COST[fidx]
         end
@@ -772,43 +1224,43 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
             return 0
         end
         return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
-    elseif head === :foreigncall || head === :invoke || head == :invoke_modify
+    elseif head === :foreigncall
+        foreigncall = ex.args[1]
+        if foreigncall isa QuoteNode && foreigncall.value === :jl_string_ptr
+            return 1
+        end
+        return 20
+    elseif head === :invoke || head === :invoke_modify
         # Calls whose "return type" is Union{} do not actually return:
         # they are errors. Since these are not part of the typical
         # run-time of the function, we omit them from
         # consideration. This way, non-inlined error branches do not
         # prevent inlining.
         extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
-        return extyp === Union{} ? 0 : 20
+        return extyp === Union{} ? 0 : UNKNOWN_CALL_COST
     elseif head === :(=)
         if ex.args[1] isa GlobalRef
-            cost = 20
+            cost = UNKNOWN_CALL_COST
         else
             cost = 0
         end
         a = ex.args[2]
         if a isa Expr
-            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, union_penalties, params, error_path))
+            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params, error_path))
         end
         return cost
     elseif head === :copyast
         return 100
-    elseif head === :enter
-        # try/catch is a couple function calls,
-        # but don't inline functions with try/catch
-        # since these aren't usually performance-sensitive functions,
-        # and llvm is more likely to miscompile them when these functions get large
-        return typemax(Int)
     end
     return 0
 end
 
-function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any},
-                                  union_penalties::Bool, params::OptimizationParams)
+function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
+                                  params::OptimizationParams)
     thiscost = 0
     dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
     if stmt isa Expr
-        thiscost = statement_cost(stmt, line, src, sptypes, union_penalties, params,
+        thiscost = statement_cost(stmt, line, src, sptypes, params,
                                   is_stmt_throw_block(isa(src, IRCode) ? src.stmts.flag[line] : src.ssaflags[line]))::Int
     elseif stmt isa GotoNode
         # loops are generally always expensive
@@ -817,28 +1269,35 @@ function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{Cod
         thiscost = dst(stmt.label) < line ? 40 : 0
     elseif stmt isa GotoIfNot
         thiscost = dst(stmt.dest) < line ? 40 : 0
+    elseif stmt isa EnterNode
+        # try/catch is a couple function calls,
+        # but don't inline functions with try/catch
+        # since these aren't usually performance-sensitive functions,
+        # and llvm is more likely to miscompile them when these functions get large
+        thiscost = typemax(Int)
     end
     return thiscost
 end
 
-function inline_worthy(ir::IRCode,
-                       params::OptimizationParams, union_penalties::Bool=false, cost_threshold::Integer=params.inline_cost_threshold)
-    bodycost::Int = 0
-    for line = 1:length(ir.stmts)
-        stmt = ir.stmts[line][:inst]
-        thiscost = statement_or_branch_cost(stmt, line, ir, ir.sptypes, union_penalties, params)
+function inline_cost(ir::IRCode, params::OptimizationParams, cost_threshold::Int)
+    bodycost = 0
+    for i = 1:length(ir.stmts)
+        stmt = ir[SSAValue(i)][:stmt]
+        thiscost = statement_or_branch_cost(stmt, i, ir, ir.sptypes, params)
         bodycost = plus_saturate(bodycost, thiscost)
-        bodycost > cost_threshold && return false
+        if bodycost > cost_threshold
+            return MAX_INLINE_COST
+        end
     end
-    return true
+    return inline_cost_clamp(bodycost)
 end
 
-function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any}, unionpenalties::Bool, params::OptimizationParams)
+function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState}, params::OptimizationParams)
     maxcost = 0
     for line = 1:length(body)
         stmt = body[line]
         thiscost = statement_or_branch_cost(stmt, line, src, sptypes,
-                                            unionpenalties, params)
+                                            params)
         cost[line] = thiscost
         if thiscost > maxcost
             maxcost = thiscost
@@ -847,8 +1306,8 @@ function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeI
     return maxcost
 end
 
-function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int})
-    return renumber_ir_elements!(body, ssachangemap, ssachangemap)
+function renumber_ir_elements!(body::Vector{Any}, cfg::Union{CFG,Nothing}, ssachangemap::Vector{Int})
+    return renumber_ir_elements!(body, cfg, ssachangemap, ssachangemap)
 end
 
 function cumsum_ssamap!(ssachangemap::Vector{Int})
@@ -912,14 +1371,22 @@ function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, lab
                     i += 1
                 end
             end
+        elseif isa(el, EnterNode)
+            tgt = el.catch_dest
+            if tgt != 0
+                was_deleted = labelchangemap[tgt] == typemin(Int)
+                if was_deleted
+                    @assert !isdefined(el, :scope)
+                    body[i] = nothing
+                else
+                    body[i] = EnterNode(el, tgt + labelchangemap[tgt])
+                end
+            end
         elseif isa(el, Expr)
             if el.head === :(=) && el.args[2] isa Expr
                 el = el.args[2]::Expr
             end
-            if el.head === :enter
-                tgt = el.args[1]::Int
-                el.args[1] = tgt + labelchangemap[tgt]
-            elseif !is_meta_expr_head(el.head)
+            if !is_meta_expr_head(el.head)
                 args = el.args
                 for i = 1:length(args)
                     el = args[i]
@@ -931,3 +1398,16 @@ function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, lab
         end
     end
 end
+
+function renumber_cfg_stmts!(cfg::CFG, blockchangemap::Vector{Int})
+    cumsum_ssamap!(blockchangemap) || return
+    for i = 1:length(cfg.blocks)
+        old_range = cfg.blocks[i].stmts
+        new_range = StmtRange(first(old_range) + ((i > 1) ? blockchangemap[i - 1] : 0),
+                              last(old_range) + blockchangemap[i])
+        cfg.blocks[i] = BasicBlock(cfg.blocks[i], new_range)
+        if i <= length(cfg.index)
+            cfg.index[i] = cfg.index[i] + blockchangemap[i]
+        end
+    end
+end
diff --git a/base/compiler/sort.jl b/base/compiler/sort.jl
new file mode 100644
index 0000000000000..71d2f8a51cd59
--- /dev/null
+++ b/base/compiler/sort.jl
@@ -0,0 +1,100 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# reference on sorted binary search:
+#   http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary
+
+# index of the first value of vector a that is greater than or equal to x;
+# returns lastindex(v)+1 if x is greater than all values in v.
+function searchsortedfirst(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keytype(v) where T<:Integer
+    hi = hi + T(1)
+    len = hi - lo
+    @inbounds while len != 0
+        half_len = len >>> 0x01
+        m = lo + half_len
+        if lt(o, v[m], x)
+            lo = m + 1
+            len -= half_len + 1
+        else
+            hi = m
+            len = half_len
+        end
+    end
+    return lo
+end
+
+# index of the last value of vector a that is less than or equal to x;
+# returns firstindex(v)-1 if x is less than all values of v.
+function searchsortedlast(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keytype(v) where T<:Integer
+    u = T(1)
+    lo = lo - u
+    hi = hi + u
+    @inbounds while lo < hi - u
+        m = midpoint(lo, hi)
+        if lt(o, x, v[m])
+            hi = m
+        else
+            lo = m
+        end
+    end
+    return lo
+end
+
+# returns the range of indices of v equal to x
+# if v does not contain x, returns a 0-length range
+# indicating the insertion point of x
+function searchsorted(v::AbstractVector, x, ilo::T, ihi::T, o::Ordering)::UnitRange{keytype(v)} where T<:Integer
+    u = T(1)
+    lo = ilo - u
+    hi = ihi + u
+    @inbounds while lo < hi - u
+        m = midpoint(lo, hi)
+        if lt(o, v[m], x)
+            lo = m
+        elseif lt(o, x, v[m])
+            hi = m
+        else
+            a = searchsortedfirst(v, x, max(lo,ilo), m, o)
+            b = searchsortedlast(v, x, m, min(hi,ihi), o)
+            return a : b
+        end
+    end
+    return (lo + 1) : (hi - 1)
+end
+
+for s in [:searchsortedfirst, :searchsortedlast, :searchsorted]
+    @eval begin
+        $s(v::AbstractVector, x, o::Ordering) = $s(v,x,firstindex(v),lastindex(v),o)
+        $s(v::AbstractVector, x;
+           lt=isless, by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) =
+            $s(v,x,ord(lt,by,rev,order))
+    end
+end
+
+# An unstable sorting algorithm for internal use
+function sort!(v::Vector; by::Function=identity, (<)::Function=<)
+    isempty(v) && return v # This branch is hit 95% of the time
+
+    # Of the remaining 5%, this branch is hit less than 1% of the time
+    if length(v) > 200 # Heap sort prevents quadratic runtime
+        o = ord(<, by, true)
+        heapify!(v, o)
+        for i in lastindex(v):-1:2
+            y = v[i]
+            v[i] = v[1]
+            percolate_down!(v, 1, y, o, i-1)
+        end
+        return v
+    end
+
+    @inbounds for i in 2:length(v) # Insertion sort
+        x = v[i]
+        y = by(x)
+        while i > 1 && y < by(v[i-1])
+            v[i] = v[i-1]
+            i -= 1
+        end
+        v[i] = x
+    end
+
+    v
+end
diff --git a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
index 272ea0e8edbbc..e4ea159ac8824 100644
--- a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
+++ b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
@@ -15,23 +15,19 @@ const _TOP_MOD = ccall(:jl_base_relative_to, Any, (Any,), EscapeAnalysis)::Modul
 # imports
 import ._TOP_MOD: ==, getindex, setindex!
 # usings
-import Core:
-    MethodInstance, Const, Argument, SSAValue, PiNode, PhiNode, UpsilonNode, PhiCNode,
-    ReturnNode, GotoNode, GotoIfNot, SimpleVector, MethodMatch, CodeInstance,
-    sizeof, ifelse, arrayset, arrayref, arraysize
-import ._TOP_MOD:     # Base definitions
-    @__MODULE__, @eval, @assert, @specialize, @nospecialize, @inbounds, @inline, @noinline,
-    @label, @goto, !, !==, !=, ≠, +, -, *, ≤, <, ≥, >, &, |, <<, error, missing, copy,
-    Vector, BitSet, IdDict, IdSet, UnitRange, Csize_t, Callable, ∪, ⊆, ∩, :, ∈, ∉, =>,
-    in, length, get, first, last, haskey, keys, get!, isempty, isassigned,
-    pop!, push!, pushfirst!, empty!, delete!, max, min, enumerate, unwrap_unionall,
-    ismutabletype
-import Core.Compiler: # Core.Compiler specific definitions
-    Bottom, InferenceResult, IRCode, IR_FLAG_EFFECT_FREE,
-    isbitstype, isexpr, is_meta_expr_head, println, widenconst, argextype, singleton_type,
-    fieldcount_noerror, try_compute_field, try_compute_fieldidx, hasintersect, ⊑,
-    intrinsic_nothrow, array_builtin_common_typecheck, arrayset_typecheck,
-    setfield!_nothrow, alloc_array_ndims, check_effect_free!
+using Core: MethodMatch, SimpleVector, ifelse, sizeof
+using Core.IR
+using ._TOP_MOD:     # Base definitions
+    @__MODULE__, @assert, @eval, @goto, @inbounds, @inline, @label, @noinline,
+    @nospecialize, @specialize, BitSet, Callable, Csize_t, IdDict, IdSet, UnitRange, Vector,
+    copy, delete!, empty!, enumerate, error, first, get, get!, haskey, in, isassigned,
+    isempty, ismutabletype, keys, last, length, max, min, missing, pop!, push!, pushfirst!,
+    unwrap_unionall, !, !=, !==, &, *, +, -, :, <, <<, =>, >, |, ∈, ∉, ∩, ∪, ≠, ≤, ≥, ⊆
+using Core.Compiler: # Core.Compiler specific definitions
+    Bottom, IRCode, IR_FLAG_NOTHROW, InferenceResult, SimpleInferenceLattice,
+    argextype, check_effect_free!, fieldcount_noerror, hasintersect, has_flag,
+    intrinsic_nothrow, is_meta_expr_head, isbitstype, isexpr, println, setfield!_nothrow,
+    singleton_type, try_compute_field, try_compute_fieldidx, widenconst, ⊑, AbstractLattice
 
 include(x) = _TOP_MOD.include(@__MODULE__, x)
 if _TOP_MOD === Core.Compiler
@@ -41,7 +37,6 @@ else
 end
 
 const AInfo = IdSet{Any}
-const LivenessSet = BitSet
 
 """
     x::EscapeInfo
@@ -87,25 +82,23 @@ An abstract state will be initialized with the bottom(-like) elements:
 struct EscapeInfo
     Analyzed::Bool
     ReturnEscape::Bool
-    ThrownEscape::LivenessSet
+    ThrownEscape::BitSet
     AliasInfo #::Union{IndexableFields,IndexableElements,Unindexable,Bool}
-    Liveness::LivenessSet
+    Liveness::BitSet
 
     function EscapeInfo(
         Analyzed::Bool,
         ReturnEscape::Bool,
-        ThrownEscape::LivenessSet,
+        ThrownEscape::BitSet,
         AliasInfo#=::Union{IndexableFields,IndexableElements,Unindexable,Bool}=#,
-        Liveness::LivenessSet,
-        )
+        Liveness::BitSet)
         @nospecialize AliasInfo
         return new(
             Analyzed,
             ReturnEscape,
             ThrownEscape,
             AliasInfo,
-            Liveness,
-            )
+            Liveness)
     end
     function EscapeInfo(
         x::EscapeInfo,
@@ -114,40 +107,38 @@ struct EscapeInfo
         AliasInfo#=::Union{IndexableFields,IndexableElements,Unindexable,Bool}=# = x.AliasInfo;
         Analyzed::Bool = x.Analyzed,
         ReturnEscape::Bool = x.ReturnEscape,
-        ThrownEscape::LivenessSet = x.ThrownEscape,
-        Liveness::LivenessSet = x.Liveness,
-        )
+        ThrownEscape::BitSet = x.ThrownEscape,
+        Liveness::BitSet = x.Liveness)
         @nospecialize AliasInfo
         return new(
             Analyzed,
             ReturnEscape,
             ThrownEscape,
             AliasInfo,
-            Liveness,
-            )
+            Liveness)
     end
 end
 
 # precomputed default values in order to eliminate computations at each callsite
 
-const BOT_THROWN_ESCAPE = LivenessSet()
+const BOT_THROWN_ESCAPE = BitSet()
 # NOTE the lattice operations should try to avoid actual set computations on this top value,
-# and e.g. LivenessSet(0:1000000) should also work without incurring excessive computations
-const TOP_THROWN_ESCAPE = LivenessSet(-1)
+# and e.g. BitSet(0:1000000) should also work without incurring excessive computations
+const TOP_THROWN_ESCAPE = BitSet(-1)
 
-const BOT_LIVENESS = LivenessSet()
+const BOT_LIVENESS = BitSet()
 # NOTE the lattice operations should try to avoid actual set computations on this top value,
-# and e.g. LivenessSet(0:1000000) should also work without incurring excessive computations
-const TOP_LIVENESS = LivenessSet(-1:0)
-const ARG_LIVENESS = LivenessSet(0)
+# and e.g. BitSet(0:1000000) should also work without incurring excessive computations
+const TOP_LIVENESS = BitSet(-1:0)
+const ARG_LIVENESS = BitSet(0)
 
 # the constructors
 NotAnalyzed() = EscapeInfo(false, false, BOT_THROWN_ESCAPE, false, BOT_LIVENESS) # not formally part of the lattice
 NoEscape() = EscapeInfo(true, false, BOT_THROWN_ESCAPE, false, BOT_LIVENESS)
 ArgEscape() = EscapeInfo(true, false, BOT_THROWN_ESCAPE, true, ARG_LIVENESS)
-ReturnEscape(pc::Int) = EscapeInfo(true, true, BOT_THROWN_ESCAPE, false, LivenessSet(pc))
+ReturnEscape(pc::Int) = EscapeInfo(true, true, BOT_THROWN_ESCAPE, false, BitSet(pc))
 AllReturnEscape() = EscapeInfo(true, true, BOT_THROWN_ESCAPE, false, TOP_LIVENESS)
-ThrownEscape(pc::Int) = EscapeInfo(true, false, LivenessSet(pc), false, BOT_LIVENESS)
+ThrownEscape(pc::Int) = EscapeInfo(true, false, BitSet(pc), false, BOT_LIVENESS)
 AllEscape() = EscapeInfo(true, true, TOP_THROWN_ESCAPE, true, TOP_LIVENESS)
 
 const ⊥, ⊤ = NotAnalyzed(), AllEscape()
@@ -542,14 +533,14 @@ isaliased(xidx::Int, yidx::Int, estate::EscapeState) =
     in_same_set(estate.aliasset, xidx, yidx)
 
 struct ArgEscapeInfo
-    EscapeBits::UInt8
+    escape_bits::UInt8
 end
 function ArgEscapeInfo(x::EscapeInfo)
     x === ⊤ && return ArgEscapeInfo(ARG_ALL_ESCAPE)
-    EscapeBits = 0x00
-    has_return_escape(x) && (EscapeBits |= ARG_RETURN_ESCAPE)
-    has_thrown_escape(x) && (EscapeBits |= ARG_THROWN_ESCAPE)
-    return ArgEscapeInfo(EscapeBits)
+    escape_bits = 0x00
+    has_return_escape(x) && (escape_bits |= ARG_RETURN_ESCAPE)
+    has_thrown_escape(x) && (escape_bits |= ARG_THROWN_ESCAPE)
+    return ArgEscapeInfo(escape_bits)
 end
 
 const ARG_ALL_ESCAPE    = 0x01 << 0
@@ -557,9 +548,9 @@ const ARG_RETURN_ESCAPE = 0x01 << 1
 const ARG_THROWN_ESCAPE = 0x01 << 2
 
 has_no_escape(x::ArgEscapeInfo)     = !has_all_escape(x) && !has_return_escape(x) && !has_thrown_escape(x)
-has_all_escape(x::ArgEscapeInfo)    = x.EscapeBits & ARG_ALL_ESCAPE    ≠ 0
-has_return_escape(x::ArgEscapeInfo) = x.EscapeBits & ARG_RETURN_ESCAPE ≠ 0
-has_thrown_escape(x::ArgEscapeInfo) = x.EscapeBits & ARG_THROWN_ESCAPE ≠ 0
+has_all_escape(x::ArgEscapeInfo)    = x.escape_bits & ARG_ALL_ESCAPE    ≠ 0
+has_return_escape(x::ArgEscapeInfo) = x.escape_bits & ARG_RETURN_ESCAPE ≠ 0
+has_thrown_escape(x::ArgEscapeInfo) = x.escape_bits & ARG_THROWN_ESCAPE ≠ 0
 
 struct ArgAliasing
     aidx::Int
@@ -569,46 +560,22 @@ end
 struct ArgEscapeCache
     argescapes::Vector{ArgEscapeInfo}
     argaliases::Vector{ArgAliasing}
-end
-
-function ArgEscapeCache(estate::EscapeState)
-    nargs = estate.nargs
-    argescapes = Vector{ArgEscapeInfo}(undef, nargs)
-    argaliases = ArgAliasing[]
-    for i = 1:nargs
-        info = estate.escapes[i]
-        @assert info.AliasInfo === true
-        argescapes[i] = ArgEscapeInfo(info)
-        for j = (i+1):nargs
-            if isaliased(i, j, estate)
-                push!(argaliases, ArgAliasing(i, j))
+    function ArgEscapeCache(estate::EscapeState)
+        nargs = estate.nargs
+        argescapes = Vector{ArgEscapeInfo}(undef, nargs)
+        argaliases = ArgAliasing[]
+        for i = 1:nargs
+            info = estate.escapes[i]
+            @assert info.AliasInfo === true
+            argescapes[i] = ArgEscapeInfo(info)
+            for j = (i+1):nargs
+                if isaliased(i, j, estate)
+                    push!(argaliases, ArgAliasing(i, j))
+                end
             end
         end
+        return new(argescapes, argaliases)
     end
-    return ArgEscapeCache(argescapes, argaliases)
-end
-
-"""
-    is_ipo_profitable(ir::IRCode, nargs::Int) -> Bool
-
-Heuristically checks if there is any profitability to run the escape analysis on `ir`
-and generate IPO escape information cache. Specifically, this function examines
-if any call argument is "interesting" in terms of their escapability.
-"""
-function is_ipo_profitable(ir::IRCode, nargs::Int)
-    for i = 1:nargs
-        t = unwrap_unionall(widenconst(ir.argtypes[i]))
-        t <: IO && return false # bail out IO-related functions
-        is_ipo_profitable_type(t) && return true
-    end
-    return false
-end
-function is_ipo_profitable_type(@nospecialize t)
-    if isa(t, Union)
-        return is_ipo_profitable_type(t.a) && is_ipo_profitable_type(t.b)
-    end
-    (t === String || t === Symbol || t === Module || t === SimpleVector) && return false
-    return ismutabletype(t)
 end
 
 abstract type Change end
@@ -630,57 +597,43 @@ struct LivenessChange <: Change
 end
 const Changes = Vector{Change}
 
-struct AnalysisState{T<:Callable}
+struct AnalysisState{T, L <: AbstractLattice}
     ir::IRCode
     estate::EscapeState
     changes::Changes
+    𝕃ₒ::L
     get_escape_cache::T
 end
 
-function getinst(ir::IRCode, idx::Int)
-    nstmts = length(ir.stmts)
-    if idx ≤ nstmts
-        return ir.stmts[idx]
-    else
-        return ir.new_nodes.stmts[idx - nstmts]
-    end
-end
-
 """
-    analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape_cache::Callable)
-        -> estate::EscapeState
+    analyze_escapes(ir::IRCode, nargs::Int, get_escape_cache) -> estate::EscapeState
 
 Analyzes escape information in `ir`:
 - `nargs`: the number of actual arguments of the analyzed call
-- `call_resolved`: if interprocedural calls are already resolved by `ssa_inlining_pass!`
-- `get_escape_cache(::Union{InferenceResult,MethodInstance}) -> Union{Nothing,ArgEscapeCache}`:
+- `get_escape_cache(::MethodInstance) -> Union{Bool,ArgEscapeCache}`:
   retrieves cached argument escape information
 """
-function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape_cache::T) where T<:Callable
+function analyze_escapes(ir::IRCode, nargs::Int, 𝕃ₒ::AbstractLattice, get_escape_cache)
     stmts = ir.stmts
     nstmts = length(stmts) + length(ir.new_nodes.stmts)
 
-    tryregions, arrayinfo, callinfo = compute_frameinfo(ir, call_resolved)
+    tryregions, arrayinfo = compute_frameinfo(ir)
     estate = EscapeState(nargs, nstmts, arrayinfo)
     changes = Changes() # keeps changes that happen at current statement
-    astate = AnalysisState(ir, estate, changes, get_escape_cache)
+    astate = AnalysisState(ir, estate, changes, 𝕃ₒ, get_escape_cache)
 
     local debug_itr_counter = 0
     while true
         local anyupdate = false
 
         for pc in nstmts:-1:1
-            stmt = getinst(ir, pc)[:inst]
+            stmt = ir[SSAValue(pc)][:stmt]
 
             # collect escape information
             if isa(stmt, Expr)
                 head = stmt.head
                 if head === :call
-                    if callinfo !== nothing
-                        escape_call!(astate, pc, stmt.args, callinfo)
-                    else
-                        escape_call!(astate, pc, stmt.args)
-                    end
+                    escape_call!(astate, pc, stmt.args)
                 elseif head === :invoke
                     escape_invoke!(astate, pc, stmt.args)
                 elseif head === :new || head === :splatnew
@@ -699,14 +652,13 @@ function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape
                 elseif is_meta_expr_head(head)
                     # meta expressions doesn't account for any usages
                     continue
-                elseif head === :enter || head === :leave || head === :the_exception || head === :pop_exception
+                elseif head === :leave || head === :the_exception || head === :pop_exception
                     # ignore these expressions since escapes via exceptions are handled by `escape_exception!`
                     # `escape_exception!` conservatively propagates `AllEscape` anyway,
                     # and so escape information imposed on `:the_exception` isn't computed
                     continue
                 elseif head === :static_parameter ||  # this exists statically, not interested in its escape
                        head === :copyast ||           # XXX can this account for some escapes?
-                       head === :undefcheck ||        # XXX can this account for some escapes?
                        head === :isdefined ||         # just returns `Bool`, nothing accounts for any escapes
                        head === :gc_preserve_begin || # `GC.@preserve` expressions themselves won't be used anywhere
                        head === :gc_preserve_end      # `GC.@preserve` expressions themselves won't be used anywhere
@@ -714,6 +666,9 @@ function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape
                 else
                     add_conservative_changes!(astate, pc, stmt.args)
                 end
+            elseif isa(stmt, EnterNode)
+                # Handled via escape_exception!
+                continue
             elseif isa(stmt, ReturnNode)
                 if isdefined(stmt, :val)
                     add_escape_change!(astate, stmt.val, ReturnEscape(pc))
@@ -758,78 +713,30 @@ function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape
 end
 
 """
-    compute_frameinfo(ir::IRCode, call_resolved::Bool) -> (tryregions, arrayinfo, callinfo)
+    compute_frameinfo(ir::IRCode) -> (tryregions, arrayinfo)
 
 A preparatory linear scan before the escape analysis on `ir` to find:
 - `tryregions::Union{Nothing,Vector{UnitRange{Int}}}`: regions in which potential `throw`s can be caught (used by `escape_exception!`)
 - `arrayinfo::Union{Nothing,IdDict{Int,Vector{Int}}}`: array allocations whose dimensions are known precisely (with some very simple local analysis)
-- `callinfo::`: when `!call_resolved`, `compute_frameinfo` additionally returns `callinfo::Vector{Union{MethodInstance,InferenceResult}}`,
-  which contains information about statically resolved callsites.
-  The inliner will use essentially equivalent interprocedural information to inline callees as well as resolve static callsites,
-  this additional information won't be required when analyzing post-inlining IR.
 
 !!! note
     This array dimension analysis to compute `arrayinfo` is very local and doesn't account
     for flow-sensitivity nor complex aliasing.
     Ideally this dimension analysis should be done as a part of type inference that
-    propagates array dimenstions in a flow sensitive way.
+    propagates array dimensions in a flow sensitive way.
 """
-function compute_frameinfo(ir::IRCode, call_resolved::Bool)
+function compute_frameinfo(ir::IRCode)
     nstmts, nnewnodes = length(ir.stmts), length(ir.new_nodes.stmts)
     tryregions, arrayinfo = nothing, nothing
-    if !call_resolved
-        callinfo = Vector{Any}(undef, nstmts+nnewnodes)
-    else
-        callinfo = nothing
-    end
     for idx in 1:nstmts+nnewnodes
-        inst = getinst(ir, idx)
-        stmt = inst[:inst]
-        if !call_resolved
-            # TODO don't call `check_effect_free!` in the inlinear
-            check_effect_free!(ir, idx, stmt, inst[:type])
-        end
-        if callinfo !== nothing && isexpr(stmt, :call)
-            callinfo[idx] = resolve_call(ir, stmt, inst[:info])
-        elseif isexpr(stmt, :enter)
+        inst = ir[SSAValue(idx)]
+        stmt = inst[:stmt]
+        if isa(stmt, EnterNode)
             @assert idx ≤ nstmts "try/catch inside new_nodes unsupported"
             tryregions === nothing && (tryregions = UnitRange{Int}[])
-            leave_block = stmt.args[1]::Int
+            leave_block = stmt.catch_dest
             leave_pc = first(ir.cfg.blocks[leave_block].stmts)
             push!(tryregions, idx:leave_pc)
-        elseif isexpr(stmt, :foreigncall)
-            args = stmt.args
-            name = args[1]
-            nn = normalize(name)
-            isa(nn, Symbol) || @goto next_stmt
-            ndims = alloc_array_ndims(nn)
-            ndims === nothing && @goto next_stmt
-            if ndims ≠ 0
-                length(args) ≥ ndims+6 || @goto next_stmt
-                dims = Int[]
-                for i in 1:ndims
-                    dim = argextype(args[i+6], ir)
-                    isa(dim, Const) || @goto next_stmt
-                    dim = dim.val
-                    isa(dim, Int) || @goto next_stmt
-                    push!(dims, dim)
-                end
-            else
-                length(args) ≥ 7 || @goto next_stmt
-                dims = argextype(args[7], ir)
-                if isa(dims, Const)
-                    dims = dims.val
-                    isa(dims, Tuple{Vararg{Int}}) || @goto next_stmt
-                    dims = collect(Int, dims)
-                else
-                    dims === Tuple{} || @goto next_stmt
-                    dims = Int[]
-                end
-            end
-            if arrayinfo === nothing
-                arrayinfo = ArrayInfo()
-            end
-            arrayinfo[idx] = dims
         elseif arrayinfo !== nothing
             # TODO this super limited alias analysis is able to handle only very simple cases
             # this should be replaced with a proper forward dimension analysis
@@ -864,14 +771,7 @@ function compute_frameinfo(ir::IRCode, call_resolved::Bool)
         end
         @label next_stmt
     end
-    return tryregions, arrayinfo, callinfo
-end
-
-# define resolve_call
-if _TOP_MOD === Core.Compiler
-    include("compiler/ssair/EscapeAnalysis/interprocedural.jl")
-else
-    include("interprocedural.jl")
+    return tryregions, arrayinfo
 end
 
 # propagate changes, and check convergence
@@ -919,7 +819,7 @@ end
     return false
 end
 
-# propagate Liveness changes separately in order to avoid constructing too many LivenessSet
+# propagate Liveness changes separately in order to avoid constructing too many BitSet
 @inline function propagate_liveness_change!(estate::EscapeState, change::LivenessChange)
     (; xidx, livepc) = change
     info = estate.escapes[xidx]
@@ -1078,7 +978,7 @@ end
     error("unexpected assignment found: inspect `Main.pc` and `Main.pc`")
 end
 
-is_effect_free(ir::IRCode, pc::Int) = getinst(ir, pc)[:flag] & IR_FLAG_EFFECT_FREE ≠ 0
+is_nothrow(ir::IRCode, pc::Int) = has_flag(ir[SSAValue(pc)], IR_FLAG_NOTHROW)
 
 # NOTE if we don't maintain the alias set that is separated from the lattice state, we can do
 # something like below: it essentially incorporates forward escape propagation in our default
@@ -1139,6 +1039,7 @@ function escape_exception!(astate::AnalysisState, tryregions::Vector{UnitRange{I
     # NOTE if `:the_exception` is the only way to access the exception, we can do:
     # exc = SSAValue(pc)
     # excinfo = estate[exc]
+    # TODO? set up a special effect bit that checks the existence of `rethrow` and `current_exceptions` and use it here
     excinfo = ⊤
     escapes = estate.escapes
     for i in 1:length(escapes)
@@ -1158,25 +1059,22 @@ function escape_exception!(astate::AnalysisState, tryregions::Vector{UnitRange{I
 end
 
 # escape statically-resolved call, i.e. `Expr(:invoke, ::MethodInstance, ...)`
-escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any}) =
-    escape_invoke!(astate, pc, args, first(args)::MethodInstance, 2)
-
-function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any},
-    linfo::Linfo, first_idx::Int, last_idx::Int = length(args))
-    if isa(linfo, InferenceResult)
-        cache = astate.get_escape_cache(linfo)
-        linfo = linfo.linfo
-    else
-        cache = astate.get_escape_cache(linfo)
-    end
-    if cache === nothing
-        return add_conservative_changes!(astate, pc, args, 2)
-    else
-        cache = cache::ArgEscapeCache
+function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any})
+    mi = first(args)::MethodInstance
+    first_idx, last_idx = 2, length(args)
+    # TODO inspect `astate.ir.stmts[pc][:info]` and use const-prop'ed `InferenceResult` if available
+    cache = astate.get_escape_cache(mi)
+    if cache isa Bool
+        if cache
+            return nothing # guaranteed to have no escape
+        else
+            return add_conservative_changes!(astate, pc, args, 2)
+        end
     end
+    cache = cache::ArgEscapeCache
     ret = SSAValue(pc)
     retinfo = astate.estate[ret] # escape information imposed on the call statement
-    method = linfo.def::Method
+    method = mi.def::Method
     nargs = Int(method.nargs)
     for (i, argidx) in enumerate(first_idx:last_idx)
         arg = args[argidx]
@@ -1185,46 +1083,40 @@ function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any},
             # COMBAK will this be invalid once we take alias information into account?
             i = nargs
         end
-        arginfo = cache.argescapes[i]
-        info = from_interprocedural(arginfo, pc)
-        if has_return_escape(arginfo)
-            # if this argument can be "returned", in addition to propagating
-            # the escape information imposed on this call argument within the callee,
-            # we should also account for possible aliasing of this argument and the returned value
-            add_escape_change!(astate, arg, info)
+        argescape = cache.argescapes[i]
+        info = from_interprocedural(argescape, pc)
+        # propagate the escape information imposed on this call argument by the callee
+        add_escape_change!(astate, arg, info)
+        if has_return_escape(argescape)
+            # if this argument can be "returned", we should also account for possible
+            # aliasing between this argument and the returned value
             add_alias_change!(astate, ret, arg)
-        else
-            # if this is simply passed as the call argument, we can just propagate
-            # the escape information imposed on this call argument within the callee
-            add_escape_change!(astate, arg, info)
         end
     end
     for (; aidx, bidx) in cache.argaliases
-        add_alias_change!(astate, args[aidx-(first_idx-1)], args[bidx-(first_idx-1)])
+        add_alias_change!(astate, args[aidx+(first_idx-1)], args[bidx+(first_idx-1)])
     end
     # we should disable the alias analysis on this newly introduced object
     add_escape_change!(astate, ret, EscapeInfo(retinfo, true))
 end
 
 """
-    from_interprocedural(arginfo::ArgEscapeInfo, pc::Int) -> x::EscapeInfo
+    from_interprocedural(argescape::ArgEscapeInfo, pc::Int) -> x::EscapeInfo
 
-Reinterprets the escape information imposed on the call argument which is cached as `arginfo`
+Reinterprets the escape information imposed on the call argument which is cached as `argescape`
 in the context of the caller frame, where `pc` is the SSA statement number of the return value.
 """
-function from_interprocedural(arginfo::ArgEscapeInfo, pc::Int)
-    has_all_escape(arginfo) && return ⊤
-
-    ThrownEscape = has_thrown_escape(arginfo) ? LivenessSet(pc) : BOT_THROWN_ESCAPE
-
-    return EscapeInfo(
-        #=Analyzed=#true, #=ReturnEscape=#false, ThrownEscape,
-        # FIXME implement interprocedural memory effect-analysis
-        # currently, this essentially disables the entire field analysis
-        # it might be okay from the SROA point of view, since we can't remove the allocation
-        # as far as it's passed to a callee anyway, but still we may want some field analysis
-        # for e.g. stack allocation or some other IPO optimizations
-        #=AliasInfo=#true, #=Liveness=#LivenessSet(pc))
+function from_interprocedural(argescape::ArgEscapeInfo, pc::Int)
+    has_all_escape(argescape) && return ⊤
+    ThrownEscape = has_thrown_escape(argescape) ? BitSet(pc) : BOT_THROWN_ESCAPE
+    # TODO implement interprocedural memory effect-analysis:
+    # currently, this essentially disables the entire field analysis–it might be okay from
+    # the SROA point of view, since we can't remove the allocation as far as it's passed to
+    # a callee anyway, but still we may want some field analysis for e.g. stack allocation
+    # or some other IPO optimizations
+    AliasInfo = true
+    Liveness = BitSet(pc)
+    return EscapeInfo(#=Analyzed=#true, #=ReturnEscape=#false, ThrownEscape, AliasInfo, Liveness)
 end
 
 # escape every argument `(args[6:length(args[3])])` and the name `args[1]`
@@ -1241,25 +1133,12 @@ function escape_foreigncall!(astate::AnalysisState, pc::Int, args::Vector{Any})
     name = args[1]
     nn = normalize(name)
     if isa(nn, Symbol)
-        boundserror_ninds = array_resize_info(nn)
-        if boundserror_ninds !== nothing
-            boundserror, ninds = boundserror_ninds
-            escape_array_resize!(boundserror, ninds, astate, pc, args)
-            return
-        end
-        if is_array_copy(nn)
-            escape_array_copy!(astate, pc, args)
-            return
-        elseif is_array_isassigned(nn)
-            escape_array_isassigned!(astate, pc, args)
-            return
-        end
         # if nn === :jl_gc_add_finalizer_th
         #     # TODO add `FinalizerEscape` ?
         # end
     end
     # NOTE array allocations might have been proven as nothrow (https://github.com/JuliaLang/julia/pull/43565)
-    nothrow = is_effect_free(astate.ir, pc)
+    nothrow = is_nothrow(astate.ir, pc)
     name_info = nothrow ? ⊥ : ThrownEscape(pc)
     add_escape_change!(astate, name, name_info)
     add_liveness_change!(astate, name, pc)
@@ -1283,27 +1162,6 @@ end
 
 normalize(@nospecialize x) = isa(x, QuoteNode) ? x.value : x
 
-function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any}, callinfo::Vector{Any})
-    info = callinfo[pc]
-    if isa(info, Bool)
-        info && return # known to be no escape
-        # now cascade to the builtin handling
-        escape_call!(astate, pc, args)
-        return
-    elseif isa(info, CallInfo)
-        for linfo in info.linfos
-            escape_invoke!(astate, pc, args, linfo, 1)
-        end
-        # accounts for a potential escape via MethodError
-        info.nothrow || add_thrown_escapes!(astate, pc, args)
-        return
-    else
-        @assert info === missing
-        # if this call couldn't be analyzed, escape it conservatively
-        add_conservative_changes!(astate, pc, args)
-    end
-end
-
 function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any})
     ir = astate.ir
     ft = argextype(first(args), ir, ir.sptypes, ir.argtypes)
@@ -1335,7 +1193,7 @@ function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any})
         # we escape statements with the `ThrownEscape` property using the effect-freeness
         # computed by `stmt_effect_flags` invoked within inlining
         # TODO throwness ≠ "effect-free-ness"
-        if is_effect_free(astate.ir, pc)
+        if is_nothrow(astate.ir, pc)
             add_liveness_changes!(astate, pc, args, 2)
         else
             add_fallback_changes!(astate, pc, args, 2)
@@ -1344,16 +1202,17 @@ function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any})
     end
 end
 
-escape_builtin!(@nospecialize(f), _...) = return missing
+escape_builtin!(@nospecialize(f), _...) = missing
 
 # safe builtins
-escape_builtin!(::typeof(isa), _...) = return false
-escape_builtin!(::typeof(typeof), _...) = return false
-escape_builtin!(::typeof(sizeof), _...) = return false
-escape_builtin!(::typeof(===), _...) = return false
+escape_builtin!(::typeof(isa), _...) = false
+escape_builtin!(::typeof(typeof), _...) = false
+escape_builtin!(::typeof(sizeof), _...) = false
+escape_builtin!(::typeof(===), _...) = false
+escape_builtin!(::typeof(Core.donotdelete), _...) = false
 # not really safe, but `ThrownEscape` will be imposed later
-escape_builtin!(::typeof(isdefined), _...) = return false
-escape_builtin!(::typeof(throw), _...) = return false
+escape_builtin!(::typeof(isdefined), _...) = false
+escape_builtin!(::typeof(throw), _...) = false
 
 function escape_builtin!(::typeof(ifelse), astate::AnalysisState, pc::Int, args::Vector{Any})
     length(args) == 4 || return false
@@ -1441,7 +1300,7 @@ function escape_new!(astate::AnalysisState, pc::Int, args::Vector{Any})
             add_liveness_change!(astate, arg, pc)
         end
     end
-    if !is_effect_free(astate.ir, pc)
+    if !is_nothrow(astate.ir, pc)
         add_thrown_escapes!(astate, pc, args)
     end
 end
@@ -1503,6 +1362,8 @@ function escape_builtin!(::typeof(getfield), astate::AnalysisState, pc::Int, arg
     if isa(obj, SSAValue) || isa(obj, Argument)
         objinfo = estate[obj]
     else
+        # unanalyzable object, so the return value is also unanalyzable
+        add_escape_change!(astate, SSAValue(pc), ⊤)
         return false
     end
     AliasInfo = objinfo.AliasInfo
@@ -1594,143 +1455,16 @@ function escape_builtin!(::typeof(setfield!), astate::AnalysisState, pc::Int, ar
     add_escape_change!(astate, val, ssainfo)
     # compute the throwness of this setfield! call here since builtin_nothrow doesn't account for that
     @label add_thrown_escapes
-    argtypes = Any[]
-    for i = 2:length(args)
-        push!(argtypes, argextype(args[i], ir))
-    end
-    setfield!_nothrow(argtypes) || add_thrown_escapes!(astate, pc, args, 2)
-    return true
-end
-
-function escape_builtin!(::typeof(arrayref), astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) ≥ 4 || return false
-    # check potential thrown escapes from this arrayref call
-    argtypes = Any[argextype(args[i], astate.ir) for i in 2:length(args)]
-    boundcheckt = argtypes[1]
-    aryt = argtypes[2]
-    if !array_builtin_common_typecheck(boundcheckt, aryt, argtypes, 3)
-        add_thrown_escapes!(astate, pc, args, 2)
-    end
-    ary = args[3]
-    inbounds = isa(boundcheckt, Const) && !boundcheckt.val::Bool
-    inbounds || add_escape_change!(astate, ary, ThrownEscape(pc))
-    # we don't track precise index information about this array and thus don't know what values
-    # can be referenced here: directly propagate the escape information imposed on the return
-    # value of this `arrayref` call to the array itself as the most conservative propagation
-    # but also with updated index information
-    estate = astate.estate
-    if isa(ary, SSAValue) || isa(ary, Argument)
-        aryinfo = estate[ary]
-    else
+    if length(args) == 4 && setfield!_nothrow(astate.𝕃ₒ,
+        argextype(args[2], ir), argextype(args[3], ir), argextype(args[4], ir))
+        return true
+    elseif length(args) == 3 && setfield!_nothrow(astate.𝕃ₒ,
+        argextype(args[2], ir), argextype(args[3], ir))
         return true
-    end
-    AliasInfo = aryinfo.AliasInfo
-    if isa(AliasInfo, Bool)
-        AliasInfo && @goto conservative_propagation
-        # AliasInfo of this array hasn't been analyzed yet: set AliasInfo now
-        idx = array_nd_index(astate, ary, args[4:end])
-        if isa(idx, Int)
-            AliasInfo = IndexableElements(IdDict{Int,AInfo}())
-            @goto record_indexable_use
-        end
-        AliasInfo = Unindexable()
-        @goto record_unindexable_use
-    elseif isa(AliasInfo, IndexableElements)
-        idx = array_nd_index(astate, ary, args[4:end])
-        if !isa(idx, Int)
-            AliasInfo = merge_to_unindexable(AliasInfo)
-            @goto record_unindexable_use
-        end
-        @label record_indexable_use
-        info = get!(()->AInfo(), AliasInfo.infos, idx)
-        push!(info, LocalUse(pc))
-        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
-    elseif isa(AliasInfo, Unindexable)
-        @label record_unindexable_use
-        push!(AliasInfo.info, LocalUse(pc))
-        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
     else
-        # this object has been used as struct, but it is used as array here (thus should throw)
-        # update ary's element information and just handle this case conservatively
-        aryinfo = escape_unanalyzable_obj!(astate, ary, aryinfo)
-        @label conservative_propagation
-        # at the extreme case, an element of `ary` may point to `ary` itself
-        # so add the alias change here as the most conservative propagation
-        add_alias_change!(astate, ary, SSAValue(pc))
-    end
-    return true
-end
-
-function escape_builtin!(::typeof(arrayset), astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) ≥ 5 || return false
-    # check potential escapes from this arrayset call
-    # NOTE here we essentially only need to account for TypeError, assuming that
-    # UndefRefError or BoundsError don't capture any of the arguments here
-    argtypes = Any[argextype(args[i], astate.ir) for i in 2:length(args)]
-    boundcheckt = argtypes[1]
-    aryt = argtypes[2]
-    valt = argtypes[3]
-    if !(array_builtin_common_typecheck(boundcheckt, aryt, argtypes, 4) &&
-         arrayset_typecheck(aryt, valt))
         add_thrown_escapes!(astate, pc, args, 2)
-    end
-    ary = args[3]
-    val = args[4]
-    inbounds = isa(boundcheckt, Const) && !boundcheckt.val::Bool
-    inbounds || add_escape_change!(astate, ary, ThrownEscape(pc))
-    # we don't track precise index information about this array and won't record what value
-    # is being assigned here: directly propagate the escape information of this array to
-    # the value being assigned as the most conservative propagation
-    estate = astate.estate
-    if isa(ary, SSAValue) || isa(ary, Argument)
-        aryinfo = estate[ary]
-    else
-        # unanalyzable object (e.g. obj::GlobalRef): escape field value conservatively
-        add_escape_change!(astate, val, ⊤)
         return true
     end
-    AliasInfo = aryinfo.AliasInfo
-    if isa(AliasInfo, Bool)
-        AliasInfo && @goto conservative_propagation
-        # AliasInfo of this array hasn't been analyzed yet: set AliasInfo now
-        idx = array_nd_index(astate, ary, args[5:end])
-        if isa(idx, Int)
-            AliasInfo = IndexableElements(IdDict{Int,AInfo}())
-            @goto escape_indexable_def
-        end
-        AliasInfo = Unindexable()
-        @goto escape_unindexable_def
-    elseif isa(AliasInfo, IndexableElements)
-        idx = array_nd_index(astate, ary, args[5:end])
-        if !isa(idx, Int)
-            AliasInfo = merge_to_unindexable(AliasInfo)
-            @goto escape_unindexable_def
-        end
-        @label escape_indexable_def
-        info = get!(()->AInfo(), AliasInfo.infos, idx)
-        add_alias_escapes!(astate, val, info)
-        push!(info, LocalDef(pc))
-        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
-        # propagate the escape information of this array ignoring elements information
-        add_escape_change!(astate, val, ignore_aliasinfo(aryinfo))
-    elseif isa(AliasInfo, Unindexable)
-        @label escape_unindexable_def
-        add_alias_escapes!(astate, val, AliasInfo.info)
-        push!(AliasInfo.info, LocalDef(pc))
-        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
-        # propagate the escape information of this array ignoring elements information
-        add_escape_change!(astate, val, ignore_aliasinfo(aryinfo))
-    else
-        # this object has been used as struct, but it is used as array here (thus should throw)
-        # update ary's element information and just handle this case conservatively
-        aryinfo = escape_unanalyzable_obj!(astate, ary, aryinfo)
-        @label conservative_propagation
-        add_alias_change!(astate, val, ary)
-    end
-    # also propagate escape information imposed on the return value of this `arrayset`
-    ssainfo = estate[SSAValue(pc)]
-    add_escape_change!(astate, ary, ssainfo)
-    return true
 end
 
 # NOTE this function models and thus should be synced with the implementation of:
@@ -1766,63 +1500,6 @@ function array_nd_index(astate::AnalysisState, @nospecialize(ary), args::Vector{
     return i
 end
 
-function escape_builtin!(::typeof(arraysize), astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) == 3 || return false
-    ary = args[2]
-    dim = args[3]
-    if !arraysize_typecheck(ary, dim, astate.ir)
-        add_escape_change!(astate, ary, ThrownEscape(pc))
-        add_escape_change!(astate, dim, ThrownEscape(pc))
-    end
-    # NOTE we may still see "arraysize: dimension out of range", but it doesn't capture anything
-    return true
-end
-
-function arraysize_typecheck(@nospecialize(ary), @nospecialize(dim), ir::IRCode)
-    aryt = argextype(ary, ir)
-    aryt ⊑ Array || return false
-    dimt = argextype(dim, ir)
-    dimt ⊑ Int || return false
-    return true
-end
-
-# returns nothing if this isn't array resizing operation,
-# otherwise returns true if it can throw BoundsError and false if not
-function array_resize_info(name::Symbol)
-    if name === :jl_array_grow_beg || name === :jl_array_grow_end
-        return false, 1
-    elseif name === :jl_array_del_beg || name === :jl_array_del_end
-        return true, 1
-    elseif name === :jl_array_grow_at || name === :jl_array_del_at
-        return true, 2
-    else
-        return nothing
-    end
-end
-
-# NOTE may potentially throw "cannot resize array with shared data" error,
-# but just ignore it since it doesn't capture anything
-function escape_array_resize!(boundserror::Bool, ninds::Int,
-    astate::AnalysisState, pc::Int, args::Vector{Any})
-    length(args) ≥ 6+ninds || return add_fallback_changes!(astate, pc, args)
-    ary = args[6]
-    aryt = argextype(ary, astate.ir)
-    aryt ⊑ Array || return add_fallback_changes!(astate, pc, args)
-    for i in 1:ninds
-        ind = args[i+6]
-        indt = argextype(ind, astate.ir)
-        indt ⊑ Integer || return add_fallback_changes!(astate, pc, args)
-    end
-    if boundserror
-        # this array resizing can potentially throw `BoundsError`, impose it now
-        add_escape_change!(astate, ary, ThrownEscape(pc))
-    end
-    # give up indexing analysis whenever we see array resizing
-    # (since we track array dimensions only globally)
-    mark_unindexable!(astate, ary)
-    add_liveness_changes!(astate, pc, args, 6)
-end
-
 function mark_unindexable!(astate::AnalysisState, @nospecialize(ary))
     isa(ary, SSAValue) || return
     aryinfo = astate.estate[ary]
@@ -1832,8 +1509,6 @@ function mark_unindexable!(astate::AnalysisState, @nospecialize(ary))
     add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo))
 end
 
-is_array_copy(name::Symbol) = name === :jl_array_copy
-
 # FIXME this implementation is very conservative, improve the accuracy and solve broken test cases
 function escape_array_copy!(astate::AnalysisState, pc::Int, args::Vector{Any})
     length(args) ≥ 6 || return add_fallback_changes!(astate, pc, args)
@@ -1850,64 +1525,4 @@ function escape_array_copy!(astate::AnalysisState, pc::Int, args::Vector{Any})
     add_liveness_changes!(astate, pc, args, 6)
 end
 
-is_array_isassigned(name::Symbol) = name === :jl_array_isassigned
-
-function escape_array_isassigned!(astate::AnalysisState, pc::Int, args::Vector{Any})
-    if !array_isassigned_nothrow(args, astate.ir)
-        add_thrown_escapes!(astate, pc, args)
-    end
-    add_liveness_changes!(astate, pc, args, 6)
-end
-
-function array_isassigned_nothrow(args::Vector{Any}, src::IRCode)
-    # if !validate_foreigncall_args(args,
-    #     :jl_array_isassigned, Cint, svec(Any,Csize_t), 0, :ccall)
-    #     return false
-    # end
-    length(args) ≥ 7 || return false
-    arytype = argextype(args[6], src)
-    arytype ⊑ Array || return false
-    idxtype = argextype(args[7], src)
-    idxtype ⊑ Csize_t || return false
-    return true
-end
-
-# # COMBAK do we want to enable this (and also backport this to Base for array allocations?)
-# import Core.Compiler: Cint, svec
-# function validate_foreigncall_args(args::Vector{Any},
-#     name::Symbol, @nospecialize(rt), argtypes::SimpleVector, nreq::Int, convension::Symbol)
-#     length(args) ≥ 5 || return false
-#     normalize(args[1]) === name || return false
-#     args[2] === rt || return false
-#     args[3] === argtypes || return false
-#     args[4] === vararg || return false
-#     normalize(args[5]) === convension || return false
-#     return true
-# end
-
-if isdefined(Core, :ImmutableArray)
-
-import Core: ImmutableArray, arrayfreeze, mutating_arrayfreeze, arraythaw
-
-escape_builtin!(::typeof(arrayfreeze), astate::AnalysisState, pc::Int, args::Vector{Any}) =
-    is_safe_immutable_array_op(Array, astate, args)
-escape_builtin!(::typeof(mutating_arrayfreeze), astate::AnalysisState, pc::Int, args::Vector{Any}) =
-    is_safe_immutable_array_op(Array, astate, args)
-escape_builtin!(::typeof(arraythaw), astate::AnalysisState, pc::Int, args::Vector{Any}) =
-    is_safe_immutable_array_op(ImmutableArray, astate, args)
-function is_safe_immutable_array_op(@nospecialize(arytype), astate::AnalysisState, args::Vector{Any})
-    length(args) == 2 || return false
-    argextype(args[2], astate.ir) ⊑ arytype || return false
-    return true
-end
-
-end # if isdefined(Core, :ImmutableArray)
-
-if _TOP_MOD !== Core.Compiler
-    # NOTE define fancy package utilities when developing EA as an external package
-    include("EAUtils.jl")
-    using .EAUtils
-    export code_escapes, @code_escapes, __clear_cache!
-end
-
 end # baremodule EscapeAnalysis
diff --git a/base/compiler/ssair/EscapeAnalysis/interprocedural.jl b/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
deleted file mode 100644
index 5d75db990e6f4..0000000000000
--- a/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
+++ /dev/null
@@ -1,151 +0,0 @@
-# TODO this file contains many duplications with the inlining analysis code, factor them out
-
-import Core.Compiler:
-    MethodInstance, InferenceResult, Signature, ConstPropResult, ConcreteResult,
-    MethodResultPure, MethodMatchInfo, UnionSplitInfo, ConstCallInfo, InvokeCallInfo,
-    call_sig, argtypes_to_type, is_builtin, is_return_type, istopfunction, validate_sparams,
-    specialize_method, invoke_rewrite
-
-const Linfo = Union{MethodInstance,InferenceResult}
-struct CallInfo
-    linfos::Vector{Linfo}
-    nothrow::Bool
-end
-
-function resolve_call(ir::IRCode, stmt::Expr, @nospecialize(info))
-    sig = call_sig(ir, stmt)
-    if sig === nothing
-        return missing
-    end
-    # TODO handle _apply_iterate
-    if is_builtin(sig) && sig.f !== invoke
-        return false
-    end
-    # handling corresponding to late_inline_special_case!
-    (; f, argtypes) = sig
-    if length(argtypes) == 3 && istopfunction(f, :!==)
-        return true
-    elseif length(argtypes) == 3 && istopfunction(f, :(>:))
-        return true
-    elseif f === TypeVar && 2 ≤ length(argtypes) ≤ 4 && (argtypes[2] ⊑ Symbol)
-        return true
-    elseif f === UnionAll && length(argtypes) == 3 && (argtypes[2] ⊑ TypeVar)
-        return true
-    elseif is_return_type(f)
-        return true
-    end
-    if info isa MethodResultPure
-        return true
-    elseif info === false
-        return missing
-    end
-    # TODO handle OpaqueClosureCallInfo
-    if sig.f === invoke
-        isa(info, InvokeCallInfo) || return missing
-        return analyze_invoke_call(sig, info)
-    elseif isa(info, ConstCallInfo)
-        return analyze_const_call(sig, info)
-    elseif isa(info, MethodMatchInfo)
-        infos = MethodMatchInfo[info]
-    elseif isa(info, UnionSplitInfo)
-        infos = info.matches
-    else # isa(info, ReturnTypeCallInfo), etc.
-        return missing
-    end
-    return analyze_call(sig, infos)
-end
-
-function analyze_invoke_call(sig::Signature, info::InvokeCallInfo)
-    match = info.match
-    if !match.fully_covers
-        # TODO: We could union split out the signature check and continue on
-        return missing
-    end
-    result = info.result
-    if isa(result, ConstPropResult)
-        return CallInfo(Linfo[result.result], true)
-    else
-        argtypes = invoke_rewrite(sig.argtypes)
-        mi = analyze_match(match, length(argtypes))
-        mi === nothing && return missing
-        return CallInfo(Linfo[mi], true)
-    end
-end
-
-function analyze_const_call(sig::Signature, cinfo::ConstCallInfo)
-    linfos = Linfo[]
-    (; call, results) = cinfo
-    infos = isa(call, MethodMatchInfo) ? MethodMatchInfo[call] : call.matches
-    local nothrow = true # required to account for potential escape via MethodError
-    local j = 0
-    for i in 1:length(infos)
-        meth = infos[i].results
-        nothrow &= !meth.ambig
-        nmatch = Core.Compiler.length(meth)
-        if nmatch == 0 # No applicable methods
-            # mark this call may potentially throw, and the try next union split
-            nothrow = false
-            continue
-        end
-        for i = 1:nmatch
-            j += 1
-            result = results[j]
-            match = Core.Compiler.getindex(meth, i)
-            if result === nothing
-                mi = analyze_match(match, length(sig.argtypes))
-                mi === nothing && return missing
-                push!(linfos, mi)
-            elseif isa(result, ConcreteResult)
-                # TODO we may want to feedback information that this call always throws if !isdefined(result, :result)
-                push!(linfos, result.mi)
-            elseif isa(result, ConstPropResult)
-                push!(linfos, result.result)
-            end
-            nothrow &= match.fully_covers
-        end
-    end
-    return CallInfo(linfos, nothrow)
-end
-
-function analyze_call(sig::Signature, infos::Vector{MethodMatchInfo})
-    linfos = Linfo[]
-    local nothrow = true # required to account for potential escape via MethodError
-    for i in 1:length(infos)
-        meth = infos[i].results
-        nothrow &= !meth.ambig
-        nmatch = Core.Compiler.length(meth)
-        if nmatch == 0 # No applicable methods
-            # mark this call may potentially throw, and the try next union split
-            nothrow = false
-            continue
-        end
-        for i = 1:nmatch
-            match = Core.Compiler.getindex(meth, i)
-            mi = analyze_match(match, length(sig.argtypes))
-            mi === nothing && return missing
-            push!(linfos, mi)
-            nothrow &= match.fully_covers
-        end
-    end
-    return CallInfo(linfos, nothrow)
-end
-
-function analyze_match(match::MethodMatch, npassedargs::Int)
-    method = match.method
-    na = Int(method.nargs)
-    if na != npassedargs && !(na > 0 && method.isva)
-        # we have a method match only because an earlier
-        # inference step shortened our call args list, even
-        # though we have too many arguments to actually
-        # call this function
-        return nothing
-    end
-
-    # Bail out if any static parameters are left as TypeVar
-    # COMBAK is this needed for escape analysis?
-    validate_sparams(match.sparams) || return nothing
-
-    # See if there exists a specialization for this method signature
-    mi = specialize_method(match; preexisting=true) # Union{Nothing, MethodInstance}
-    return mi
-end
diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl
index fd49a7e118eb7..934cd456dd945 100644
--- a/base/compiler/ssair/domtree.jl
+++ b/base/compiler/ssair/domtree.jl
@@ -109,10 +109,16 @@ end
 
 length(D::DFSTree) = length(D.from_pre)
 
-function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
+function DFS!(D::DFSTree, blocks::Vector{BasicBlock}, is_post_dominator::Bool)
     copy!(D, DFSTree(length(blocks)))
-    to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)]
-    pre_num = 1
+    if is_post_dominator
+        # TODO: We're using -1 as the virtual exit node here. Would it make
+        #       sense to actually have a real BB for the exit always?
+        to_visit = Tuple{BBNumber, PreNumber, Bool}[(-1, 0, false)]
+    else
+        to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)]
+    end
+    pre_num = is_post_dominator ? 0 : 1
     post_num = 1
     while !isempty(to_visit)
         # Because we want the postorder number as well as the preorder number,
@@ -123,12 +129,14 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
         if pushed_children
             # Going up the DFS tree, so all we need to do is record the
             # postorder number, then move on
-            D.to_post[current_node_bb] = post_num
-            D.from_post[post_num] = current_node_bb
+            if current_node_bb != -1
+                D.to_post[current_node_bb] = post_num
+                D.from_post[post_num] = current_node_bb
+            end
             post_num += 1
             pop!(to_visit)
 
-        elseif D.to_pre[current_node_bb] != 0
+        elseif current_node_bb != -1 && D.to_pre[current_node_bb] != 0
             # Node has already been visited, move on
             pop!(to_visit)
             continue
@@ -136,15 +144,30 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
             # Going down the DFS tree
 
             # Record preorder number
-            D.to_pre[current_node_bb] = pre_num
-            D.from_pre[pre_num] = current_node_bb
-            D.to_parent_pre[pre_num] = parent_pre
+            if current_node_bb != -1
+                D.to_pre[current_node_bb] = pre_num
+                D.from_pre[pre_num] = current_node_bb
+                D.to_parent_pre[pre_num] = parent_pre
+            end
 
             # Record that children (will) have been pushed
             to_visit[end] = (current_node_bb, parent_pre, true)
 
+            if is_post_dominator && current_node_bb == -1
+                edges = Int[bb for bb in 1:length(blocks) if isempty(blocks[bb].succs)]
+            else
+                edges = is_post_dominator ? blocks[current_node_bb].preds :
+                                            blocks[current_node_bb].succs
+            end
+
             # Push children to the stack
-            for succ_bb in blocks[current_node_bb].succs
+            for succ_bb in edges
+                if succ_bb == 0
+                    # Edge 0 indicates an error entry, but shouldn't affect
+                    # the post-dominator tree.
+                    @assert is_post_dominator
+                    continue
+                end
                 push!(to_visit, (succ_bb, pre_num, false))
             end
 
@@ -161,7 +184,7 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
     return D
 end
 
-DFS(blocks::Vector{BasicBlock}) = DFS!(DFSTree(0), blocks)
+DFS(blocks::Vector{BasicBlock}, is_post_dominator::Bool=false) = DFS!(DFSTree(0), blocks, is_post_dominator)
 
 """
 Keeps the per-BB state of the Semi NCA algorithm. In the original formulation,
@@ -184,7 +207,7 @@ end
 DomTreeNode() = DomTreeNode(1, Vector{BBNumber}())
 
 "Data structure that encodes which basic block dominates which."
-struct DomTree
+struct GenericDomTree{IsPostDom}
     # These can be reused when updating domtree dynamically
     dfs_tree::DFSTree
     snca_state::Vector{SNCAData}
@@ -195,19 +218,25 @@ struct DomTree
     # The nodes in the tree (ordered by BB indices)
     nodes::Vector{DomTreeNode}
 end
+const DomTree = GenericDomTree{false}
+const PostDomTree = GenericDomTree{true}
 
-function DomTree()
-    return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[])
+function (T::Type{<:GenericDomTree})()
+    return T(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[])
 end
 
 function construct_domtree(blocks::Vector{BasicBlock})
     return update_domtree!(blocks, DomTree(), true, 0)
 end
 
-function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree,
-                         recompute_dfs::Bool, max_pre::PreNumber)
+function construct_postdomtree(blocks::Vector{BasicBlock})
+    return update_domtree!(blocks, PostDomTree(), true, 0)
+end
+
+function update_domtree!(blocks::Vector{BasicBlock}, domtree::GenericDomTree{IsPostDom},
+                         recompute_dfs::Bool, max_pre::PreNumber) where {IsPostDom}
     if recompute_dfs
-        DFS!(domtree.dfs_tree, blocks)
+        DFS!(domtree.dfs_tree, blocks, IsPostDom)
     end
 
     if max_pre == 0
@@ -219,16 +248,24 @@ function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree,
     return domtree
 end
 
-function compute_domtree_nodes!(domtree::DomTree)
+function compute_domtree_nodes!(domtree::GenericDomTree{IsPostDom}) where {IsPostDom}
     # Compute children
     copy!(domtree.nodes,
           DomTreeNode[DomTreeNode() for _ in 1:length(domtree.idoms_bb)])
     for (idx, idom) in Iterators.enumerate(domtree.idoms_bb)
-        (idx == 1 || idom == 0) && continue
+        ((!IsPostDom && idx == 1) || idom == 0) && continue
         push!(domtree.nodes[idom].children, idx)
     end
+    # n.b. now issorted(domtree.nodes[*].children) since idx is sorted above
     # Recursively set level
-    update_level!(domtree.nodes, 1, 1)
+    if IsPostDom
+        for (node, idom) in enumerate(domtree.idoms_bb)
+            idom == 0 || continue
+            update_level!(domtree.nodes, node, 1)
+        end
+    else
+        update_level!(domtree.nodes, 1, 1)
+    end
     return domtree.nodes
 end
 
@@ -243,13 +280,18 @@ function update_level!(nodes::Vector{DomTreeNode}, node::BBNumber, level::Int)
     end
 end
 
+dom_edges(domtree::DomTree, blocks::Vector{BasicBlock}, idx::BBNumber) =
+    blocks[idx].preds
+dom_edges(domtree::PostDomTree, blocks::Vector{BasicBlock}, idx::BBNumber) =
+    blocks[idx].succs
+
 """
 The main Semi-NCA algorithm. Matches Figure 2.8 in [LG05]. Note that the
 pseudocode in [LG05] is not entirely accurate. The best way to understand
 what's happening is to read [LT79], then the description of SLT in [LG05]
 (warning: inconsistent notation), then the description of Semi-NCA.
 """
-function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
+function SNCA!(domtree::GenericDomTree{IsPostDom}, blocks::Vector{BasicBlock}, max_pre::PreNumber) where {IsPostDom}
     D = domtree.dfs_tree
     state = domtree.snca_state
     # There may be more blocks than are reachable in the DFS / dominator tree
@@ -288,13 +330,14 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
     # Calculate semidominators, but only for blocks with preorder number up to
     # max_pre
     ancestors = copy(D.to_parent_pre)
-    for w::PreNumber in reverse(2:max_pre)
+    relevant_blocks = IsPostDom ? (1:max_pre) : (2:max_pre)
+    for w::PreNumber in reverse(relevant_blocks)
         # LLVM initializes this to the parent, the paper initializes this to
         # `w`, but it doesn't really matter (the parent is a predecessor, so at
         # worst we'll discover it below). Save a memory reference here.
         semi_w = typemax(PreNumber)
         last_linked = PreNumber(w + 1)
-        for v ∈ blocks[D.from_pre[w]].preds
+        for v ∈ dom_edges(domtree, blocks, D.from_pre[w])
             # For the purpose of the domtree, ignore virtual predecessors into
             # catch blocks.
             v == 0 && continue
@@ -330,7 +373,7 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
     # ancestor in the (immediate) dominator tree between its semidominator and
     # its parent (see Lemma 2.6 in [LG05]).
     idoms_pre = copy(D.to_parent_pre)
-    for v in 2:n_nodes
+    for v in (IsPostDom ? (1:n_nodes) : (2:n_nodes))
         idom = idoms_pre[v]
         vsemi = state[v].semi
         while idom > vsemi
@@ -342,10 +385,11 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
     # Express idoms in BB indexing
     resize!(domtree.idoms_bb, n_blocks)
     for i::BBNumber in 1:n_blocks
-        if i == 1 || D.to_pre[i] == 0
+        if (!IsPostDom && i == 1) || D.to_pre[i] == 0
             domtree.idoms_bb[i] = 0
         else
-            domtree.idoms_bb[i] = D.from_pre[idoms_pre[D.to_pre[i]]]
+            ip = idoms_pre[D.to_pre[i]]
+            domtree.idoms_bb[i] = ip == 0 ? 0 : D.from_pre[ip]
         end
     end
 end
@@ -541,12 +585,28 @@ function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber})
 end
 
 """
-Checks if bb1 dominates bb2.
-bb1 and bb2 are indexes into the CFG blocks.
-bb1 dominates bb2 if the only way to enter bb2 is via bb1.
-(Other blocks may be in between, e.g bb1->bbX->bb2).
+    dominates(domtree::DomTree, bb1::Int, bb2::Int) -> Bool
+
+Checks if `bb1` dominates `bb2`.
+`bb1` and `bb2` are indexes into the `CFG` blocks.
+`bb1` dominates `bb2` if the only way to enter `bb2` is via `bb1`.
+(Other blocks may be in between, e.g `bb1->bbx->bb2`).
 """
-function dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber)
+dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber) =
+    _dominates(domtree, bb1, bb2)
+
+"""
+    postdominates(domtree::PostDomTree, bb1::Int, bb2::Int) -> Bool
+
+Checks if `bb1` post-dominates `bb2`.
+`bb1` and `bb2` are indexes into the `CFG` blocks.
+`bb1` post-dominates `bb2` if every pass from `bb2` to the exit is via `bb1`.
+(Other blocks may be in between, e.g `bb2->bbx->bb1->exit`).
+"""
+postdominates(domtree::PostDomTree, bb1::BBNumber, bb2::BBNumber) =
+    _dominates(domtree, bb1, bb2)
+
+function _dominates(domtree::GenericDomTree, bb1::BBNumber, bb2::BBNumber)
     bb1 == bb2 && return true
     target_level = domtree.nodes[bb1].level
     source_level = domtree.nodes[bb2].level
@@ -581,19 +641,48 @@ function iterate(doms::DominatedBlocks, state::Nothing=nothing)
     return (bb, nothing)
 end
 
-function naive_idoms(blocks::Vector{BasicBlock})
+"""
+    nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
+
+Compute the nearest common (post-)dominator of `a` and `b`.
+"""
+function nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
+    alevel = domtree.nodes[a].level
+    blevel = domtree.nodes[b].level
+    # W.l.g. assume blevel <= alevel
+    if alevel < blevel
+        a, b = b, a
+        alevel, blevel = blevel, alevel
+    end
+    while alevel > blevel
+        a = domtree.idoms_bb[a]
+        alevel -= 1
+    end
+    while a != b && a != 0
+        a = domtree.idoms_bb[a]
+        b = domtree.idoms_bb[b]
+    end
+    @assert a == b
+    return a
+end
+
+function naive_idoms(blocks::Vector{BasicBlock}, is_post_dominator::Bool=false)
     nblocks = length(blocks)
     # The extra +1 helps us detect unreachable blocks below
     dom_all = BitSet(1:nblocks+1)
-    dominators = BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks]
+    dominators = is_post_dominator ?
+        BitSet[isempty(blocks[n].succs) ? BitSet(n) : copy(dom_all) for n = 1:nblocks] :
+        BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks]
     changed = true
+    relevant_blocks = (is_post_dominator ? (1:nblocks) : (2:nblocks))
     while changed
         changed = false
-        for n = 2:nblocks
-            if isempty(blocks[n].preds)
+        for n in relevant_blocks
+            edges = is_post_dominator ? blocks[n].succs : blocks[n].preds
+            if isempty(edges)
                 continue
             end
-            firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, blocks[n].preds))::NTuple{2,Any}
+            firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, edges))::NTuple{2,Any}
             new_doms = copy(dominators[firstp])
             for p in rest
                 intersect!(new_doms, dominators[p])
@@ -605,7 +694,7 @@ function naive_idoms(blocks::Vector{BasicBlock})
     end
     # Compute idoms
     idoms = fill(0, nblocks)
-    for i = 2:nblocks
+    for i in relevant_blocks
         if dominators[i] == dom_all
             idoms[i] = 0
             continue
diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
deleted file mode 100644
index 6c17bbc7868f2..0000000000000
--- a/base/compiler/ssair/driver.jl
+++ /dev/null
@@ -1,16 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-if false
-    import Base: Base, @show
-else
-    macro show(s)
-        return :(println(stdout, $(QuoteNode(s)), " = ", $(esc(s))))
-    end
-end
-
-include("compiler/ssair/slot2ssa.jl")
-include("compiler/ssair/inlining.jl")
-include("compiler/ssair/verify.jl")
-include("compiler/ssair/legacy.jl")
-include("compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl")
-include("compiler/ssair/passes.jl")
diff --git a/base/compiler/ssair/heap.jl b/base/compiler/ssair/heap.jl
new file mode 100644
index 0000000000000..6e9883bc4ec60
--- /dev/null
+++ b/base/compiler/ssair/heap.jl
@@ -0,0 +1,74 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Heap operations on flat vectors
+# -------------------------------
+
+
+# Binary heap indexing
+heapleft(i::Integer) = 2i
+heapright(i::Integer) = 2i + 1
+heapparent(i::Integer) = div(i, 2)
+
+
+# Binary min-heap percolate down.
+function percolate_down!(xs::Vector, i::Integer, x, o::Ordering, len::Integer=length(xs))
+    @inbounds while (l = heapleft(i)) <= len
+        r = heapright(i)
+        j = r > len || lt(o, xs[l], xs[r]) ? l : r
+        lt(o, xs[j], x) || break
+        xs[i] = xs[j]
+        i = j
+    end
+    xs[i] = x
+end
+
+# Binary min-heap percolate up.
+function percolate_up!(xs::Vector, i::Integer, x, o::Ordering)
+    @inbounds while (j = heapparent(i)) >= 1
+        lt(o, x, xs[j]) || break
+        xs[i] = xs[j]
+        i = j
+    end
+    xs[i] = x
+end
+
+"""
+    heappop!(v, ord)
+
+Given a binary heap-ordered array, remove and return the lowest ordered element.
+For efficiency, this function does not check that the array is indeed heap-ordered.
+"""
+function heappop!(xs::Vector, o::Ordering)
+    x = xs[1]
+    y = pop!(xs)
+    if !isempty(xs)
+        percolate_down!(xs, 1, y, o)
+    end
+    return x
+end
+
+"""
+    heappush!(v, x, ord)
+
+Given a binary heap-ordered array, push a new element `x`, preserving the heap property.
+For efficiency, this function does not check that the array is indeed heap-ordered.
+"""
+function heappush!(xs::Vector, x, o::Ordering)
+    push!(xs, x)
+    i = lastindex(xs)
+    percolate_up!(xs, i, @inbounds(xs[i]), o)
+    return xs
+end
+
+
+"""
+    heapify!(v, ord::Ordering)
+
+Turn an arbitrary vector into a binary min-heap in linear time.
+"""
+function heapify!(xs::Vector, o::Ordering)
+    for i in heapparent(lastindex(xs)):-1:1
+        percolate_down!(xs, i, @inbounds(xs[i]), o)
+    end
+    return xs
+end
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index ca9eb818f0edb..650af8248883c 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -1,7 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-@nospecialize
-
 struct Signature
     f::Any
     ft::Any
@@ -9,8 +7,10 @@ struct Signature
     Signature(@nospecialize(f), @nospecialize(ft), argtypes::Vector{Any}) = new(f, ft, argtypes)
 end
 
-struct ResolvedInliningSpec
-    # The LineTable and IR of the inlinee
+struct InliningTodo
+    # The MethodInstance to be inlined
+    mi::MethodInstance
+    # The IR of the inlinee
     ir::IRCode
     # If the function being inlined is a single basic block we can use a
     # simpler inlining algorithm. This flag determines whether that's allowed
@@ -18,49 +18,29 @@ struct ResolvedInliningSpec
     # Effects of the call statement
     effects::Effects
 end
-ResolvedInliningSpec(ir::IRCode, effects::Effects) =
-    ResolvedInliningSpec(ir, linear_inline_eligible(ir), effects)
-
-"""
-Represents a callsite that our analysis has determined is legal to inline,
-but did not resolve during the analysis step to allow the outer inlining
-pass to apply its own inlining policy decisions.
-"""
-struct DelayedInliningSpec
-    match::Union{MethodMatch, InferenceResult}
-    argtypes::Vector{Any}
-end
-
-struct InliningTodo
-    # The MethodInstance to be inlined
-    mi::MethodInstance
-    spec::Union{ResolvedInliningSpec, DelayedInliningSpec}
+function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects)
+    return InliningTodo(mi, ir, linear_inline_eligible(ir), effects)
 end
 
-InliningTodo(mi::MethodInstance, match::MethodMatch, argtypes::Vector{Any}) =
-    InliningTodo(mi, DelayedInliningSpec(match, argtypes))
-
-InliningTodo(result::InferenceResult, argtypes::Vector{Any}) =
-    InliningTodo(result.linfo, DelayedInliningSpec(result, argtypes))
-
 struct ConstantCase
     val::Any
-    ConstantCase(val) = new(val)
+    ConstantCase(@nospecialize val) = new(val)
 end
 
 struct SomeCase
     val::Any
-    SomeCase(val) = new(val)
+    SomeCase(@nospecialize val) = new(val)
 end
 
 struct InvokeCase
     invoke::MethodInstance
     effects::Effects
+    info::CallInfo
 end
 
 struct InliningCase
     sig  # Type
-    item # Union{InliningTodo, MethodInstance, ConstantCase}
+    item # Union{InliningTodo, InvokeCase, ConstantCase}
     function InliningCase(@nospecialize(sig), @nospecialize(item))
         @assert isa(item, Union{InliningTodo, InvokeCase, ConstantCase}) "invalid inlining item"
         return new(sig, item)
@@ -76,15 +56,29 @@ struct UnionSplit
         new(fully_covered, atype, cases, Int[])
 end
 
-@specialize
+struct InliningEdgeTracker
+    edges::Vector{Any}
+    invokesig::Union{Nothing,Vector{Any}}
+    InliningEdgeTracker(state::InliningState, invokesig::Union{Nothing,Vector{Any}}=nothing) =
+        new(state.edges, invokesig)
+end
 
-function ssa_inlining_pass!(ir::IRCode, linetable::Vector{LineInfoNode}, state::InliningState, propagate_inbounds::Bool)
-    # Go through the function, performing simple ininlingin (e.g. replacing call by constants
+function add_inlining_backedge!((; edges, invokesig)::InliningEdgeTracker, mi::MethodInstance)
+    if invokesig === nothing
+        push!(edges, mi)
+    else # invoke backedge
+        push!(edges, invoke_signature(invokesig), mi)
+    end
+    return nothing
+end
+
+function ssa_inlining_pass!(ir::IRCode, state::InliningState, propagate_inbounds::Bool)
+    # Go through the function, performing simple inlining (e.g. replacing call by constants
     # and analyzing legality of inlining).
     @timeit "analysis" todo = assemble_inline_todo!(ir, state)
     isempty(todo) && return ir
     # Do the actual inlining for every call we identified
-    @timeit "execution" ir = batch_inline!(todo, ir, linetable, propagate_inbounds, state.params)
+    @timeit "execution" ir = batch_inline!(ir, todo, propagate_inbounds, OptimizationParams(state.interp))
     return ir
 end
 
@@ -126,8 +120,8 @@ function inline_into_block!(state::CFGInliningState, block::Int)
     return
 end
 
-function cfg_inline_item!(ir::IRCode, idx::Int, spec::ResolvedInliningSpec, state::CFGInliningState, from_unionsplit::Bool=false)
-    inlinee_cfg = spec.ir.cfg
+function cfg_inline_item!(ir::IRCode, idx::Int, todo::InliningTodo, state::CFGInliningState, from_unionsplit::Bool=false)
+    inlinee_cfg = todo.ir.cfg
     # Figure out if we need to split the BB
     need_split_before = false
     need_split = true
@@ -141,7 +135,7 @@ function cfg_inline_item!(ir::IRCode, idx::Int, spec::ResolvedInliningSpec, stat
     last_block_idx = last(state.cfg.blocks[block].stmts)
     if false # TODO: ((idx+1) == last_block_idx && isa(ir[SSAValue(last_block_idx)], GotoNode))
         need_split = false
-        post_bb_id = -ir[SSAValue(last_block_idx)][:inst].label
+        post_bb_id = -ir[SSAValue(last_block_idx)][:stmt].label
     else
         post_bb_id = length(state.new_cfg_blocks) + length(inlinee_cfg.blocks) + (need_split_before ? 1 : 0)
         need_split = true #!(idx == last_block_idx)
@@ -202,7 +196,7 @@ function cfg_inline_item!(ir::IRCode, idx::Int, spec::ResolvedInliningSpec, stat
     for (old_block, new_block) in enumerate(bb_rename_range)
         if (length(state.new_cfg_blocks[new_block].succs) == 0)
             terminator_idx = last(inlinee_cfg.blocks[old_block].stmts)
-            terminator = spec.ir[SSAValue(terminator_idx)][:inst]
+            terminator = todo.ir[SSAValue(terminator_idx)][:stmt]
             if isa(terminator, ReturnNode) && isdefined(terminator, :val)
                 any_edges = true
                 push!(state.new_cfg_blocks[new_block].succs, post_bb_id)
@@ -217,10 +211,9 @@ function cfg_inline_item!(ir::IRCode, idx::Int, spec::ResolvedInliningSpec, stat
     return nothing
 end
 
-function cfg_inline_unionsplit!(ir::IRCode, idx::Int,
-                                (; fully_covered, #=atype,=# cases, bbs)::UnionSplit,
-                                state::CFGInliningState,
-                                params::OptimizationParams)
+function cfg_inline_unionsplit!(ir::IRCode, idx::Int, union_split::UnionSplit,
+                                state::CFGInliningState, params::OptimizationParams)
+    (; fully_covered, #=atype,=# cases, bbs) = union_split
     inline_into_block!(state, block_for_inst(ir, idx))
     from_bbs = Int[]
     delete!(state.split_targets, length(state.new_cfg_blocks))
@@ -235,15 +228,12 @@ function cfg_inline_unionsplit!(ir::IRCode, idx::Int,
         push!(state.new_cfg_blocks[cond_bb].succs, cond_bb+1)
         case = cases[i].item
         if isa(case, InliningTodo)
-            spec = case.spec::ResolvedInliningSpec
-            if !spec.linear_inline_eligible
-                cfg_inline_item!(ir, idx, spec, state, true)
+            if !case.linear_inline_eligible
+                cfg_inline_item!(ir, idx, case, state, true)
             end
         end
         push!(from_bbs, length(state.new_cfg_blocks))
-        # TODO: Right now we unconditionally generate a fallback block
-        # in case of subtyping errors - This is probably unnecessary.
-        if i != length(cases) || (!fully_covered || (!params.trust_inference))
+        if !(i == length(cases) && fully_covered)
             # This block will have the next condition or the final else case
             push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx)))
             push!(state.new_cfg_blocks[cond_bb].succs, length(state.new_cfg_blocks))
@@ -308,18 +298,38 @@ function finish_cfg_inline!(state::CFGInliningState)
     end
 end
 
+# duplicated from IRShow
+function normalize_method_name(m)
+    if m isa Method
+        return m.name
+    elseif m isa MethodInstance
+        return (m.def::Method).name
+    elseif m isa Symbol
+        return m
+    else
+        return Symbol("")
+    end
+end
+@noinline method_name(m::LineInfoNode) = normalize_method_name(m.method)
+
+inline_node_is_duplicate(topline::LineInfoNode, line::LineInfoNode) =
+    topline.module === line.module &&
+    method_name(topline) === method_name(line) &&
+    topline.file === line.file &&
+    topline.line === line.line
+
 function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCode,
-                              inlinee::Method,
-                              inlined_at::Int32)
-    coverage = coverage_enabled(inlinee.module)
+                              inlinee::MethodInstance, inlined_at::Int32)
+    inlinee_def = inlinee.def::Method
+    coverage = coverage_enabled(inlinee_def.module)
     linetable_offset::Int32 = length(linetable)
     # Append the linetable of the inlined function to our line table
     topline::Int32 = linetable_offset + Int32(1)
     coverage_by_path = JLOptions().code_coverage == 3
-    push!(linetable, LineInfoNode(inlinee.module, inlinee.name, inlinee.file, inlinee.line, inlined_at))
+    push!(linetable, LineInfoNode(inlinee_def.module, inlinee_def.name, inlinee_def.file, inlinee_def.line, inlined_at))
     oldlinetable = inlinee_ir.linetable
-    extra_coverage_line = 0
-    for oldline in 1:length(oldlinetable)
+    extra_coverage_line = zero(Int32)
+    for oldline in eachindex(oldlinetable)
         entry = oldlinetable[oldline]
         if !coverage && coverage_by_path && is_file_tracked(entry.file)
             # include topline coverage entry if in path-specific coverage mode, and any file falls under path
@@ -329,7 +339,7 @@ function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCod
             (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset + (oldline == 1) : inlined_at))
         if oldline == 1
             # check for a duplicate on the first iteration (likely true)
-            if newentry === linetable[topline]
+            if inline_node_is_duplicate(linetable[topline], newentry)
                 continue
             else
                 linetable_offset += 1
@@ -343,51 +353,71 @@ function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCod
     return linetable_offset, extra_coverage_line
 end
 
-function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
-                         linetable::Vector{LineInfoNode}, item::InliningTodo,
-                         boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
-    # Ok, do the inlining here
-    spec = item.spec::ResolvedInliningSpec
-    sparam_vals = item.mi.sparam_vals
-    def = item.mi.def::Method
-    inlined_at = compact.result[idx][:line]
-    linetable_offset::Int32 = length(linetable)
-    topline::Int32 = linetable_offset + Int32(1)
-    linetable_offset, extra_coverage_line = ir_inline_linetable!(linetable, item.spec.ir, def, inlined_at)
+function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCode, IncrementalCompact},
+                              ir::IRCode, mi::MethodInstance, inlined_at::Int32, argexprs::Vector{Any})
+    def = mi.def::Method
+    linetable = inline_target isa IRCode ? inline_target.linetable : inline_target.ir.linetable
+    topline::Int32 = length(linetable) + Int32(1)
+    linetable_offset, extra_coverage_line = ir_inline_linetable!(linetable, ir, mi, inlined_at)
     if extra_coverage_line != 0
-        insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line))
+        insert_node!(NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line))
+    end
+    spvals_ssa = nothing
+    if !validate_sparams(mi.sparam_vals)
+        # N.B. This works on the caller-side argexprs, (i.e. before the va fixup below)
+        spvals_ssa = insert_node!(
+            effect_free_and_nothrow(NewInstruction(Expr(:call, Core._compute_sparams, def, argexprs...), SimpleVector, topline)))
     end
     if def.isva
         nargs_def = Int(def.nargs::Int32)
         if nargs_def > 0
-            argexprs = fix_va_argexprs!(compact, argexprs, nargs_def, topline)
+            argexprs = fix_va_argexprs!(insert_node!, inline_target, argexprs, nargs_def, topline)
         end
     end
     if def.is_for_opaque_closure
         # Replace the first argument by a load of the capture environment
-        argexprs[1] = insert_node_here!(compact,
+        argexprs[1] = insert_node!(
             NewInstruction(Expr(:call, GlobalRef(Core, :getfield), argexprs[1], QuoteNode(:captures)),
-            spec.ir.argtypes[1], topline))
+            ir.argtypes[1], topline))
     end
-    if boundscheck === :default || boundscheck === :propagate
-        if (compact.result[idx][:flag] & IR_FLAG_INBOUNDS) != 0
-            boundscheck = :off
-        end
+    return SSASubstitute(mi, argexprs, spvals_ssa, linetable_offset)
+end
+
+function adjust_boundscheck!(inline_compact::IncrementalCompact, idx′::Int, stmt::Expr, boundscheck::Symbol)
+    if boundscheck === :off
+        isempty(stmt.args) && push!(stmt.args, false)
+    elseif boundscheck !== :propagate
+        isempty(stmt.args) && push!(stmt.args, true)
     end
+    return nothing
+end
+
+function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
+                         item::InliningTodo, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
+    # Ok, do the inlining here
+    inlined_at = compact.result[idx][:line]
+
+    ssa_substitute = ir_prepare_inlining!(InsertHere(compact), compact, item.ir, item.mi, inlined_at, argexprs)
+
+    boundscheck = has_flag(compact.result[idx], IR_FLAG_INBOUNDS) ? :off : boundscheck
+
     # If the iterator already moved on to the next basic block,
     # temporarily re-open in again.
     local return_value
-    sig = def.sig
     # Special case inlining that maintains the current basic block if there's only one BB in the target
-    if spec.linear_inline_eligible
+    new_new_offset = length(compact.new_new_nodes)
+    late_fixup_offset = length(compact.late_fixup)
+    if item.linear_inline_eligible
         #compact[idx] = nothing
-        inline_compact = IncrementalCompact(compact, spec.ir, compact.result_idx)
+        inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx)
         for ((_, idx′), stmt′) in inline_compact
             # This dance is done to maintain accurate usage counts in the
             # face of rename_arguments! mutating in place - should figure out
             # something better eventually.
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, sig, sparam_vals, linetable_offset, boundscheck, compact)
+            insert_node! = InsertBefore(inline_compact, SSAValue(idx′))
+            stmt′ = ssa_substitute!(insert_node!, inline_compact[SSAValue(idx′)], stmt′,
+                                    ssa_substitute)
             if isa(stmt′, ReturnNode)
                 val = stmt′.val
                 return_value = SSAValue(idx′)
@@ -395,26 +425,30 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                 inline_compact.result[idx′][:type] =
                     argextype(val, isa(val, Argument) || isa(val, Expr) ? compact : inline_compact)
                 # Everything legal in value position is guaranteed to be effect free in stmt position
-                inline_compact.result[idx′][:flag] = IR_FLAG_EFFECT_FREE
+                inline_compact.result[idx′][:flag] = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
                 break
+            elseif isexpr(stmt′, :boundscheck)
+                adjust_boundscheck!(inline_compact, idx′, stmt′, boundscheck)
             end
             inline_compact[idx′] = stmt′
         end
-        just_fixup!(inline_compact)
+        just_fixup!(inline_compact, new_new_offset, late_fixup_offset)
         compact.result_idx = inline_compact.result_idx
     else
         bb_offset, post_bb_id = popfirst!(todo_bbs)
         # This implements the need_split_before flag above
-        need_split_before = !isempty(spec.ir.cfg.blocks[1].preds)
+        need_split_before = !isempty(item.ir.cfg.blocks[1].preds)
         if need_split_before
             finish_current_bb!(compact, 0)
         end
         pn = PhiNode()
         #compact[idx] = nothing
-        inline_compact = IncrementalCompact(compact, spec.ir, compact.result_idx)
+        inline_compact = IncrementalCompact(compact, item.ir, compact.result_idx)
         for ((_, idx′), stmt′) in inline_compact
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, sig, sparam_vals, linetable_offset, boundscheck, compact)
+            insert_node! = InsertBefore(inline_compact, SSAValue(idx′))
+            stmt′ = ssa_substitute!(insert_node!, inline_compact[SSAValue(idx′)], stmt′,
+                                    ssa_substitute)
             if isa(stmt′, ReturnNode)
                 if isdefined(stmt′, :val)
                     val = stmt′.val
@@ -425,16 +459,18 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                 end
             elseif isa(stmt′, GotoNode)
                 stmt′ = GotoNode(stmt′.label + bb_offset)
-            elseif isa(stmt′, Expr) && stmt′.head === :enter
-                stmt′ = Expr(:enter, stmt′.args[1]::Int + bb_offset)
+            elseif isa(stmt′, EnterNode)
+                stmt′ = EnterNode(stmt′, stmt′.catch_dest == 0 ? 0 : stmt′.catch_dest + bb_offset)
             elseif isa(stmt′, GotoIfNot)
                 stmt′ = GotoIfNot(stmt′.cond, stmt′.dest + bb_offset)
             elseif isa(stmt′, PhiNode)
                 stmt′ = PhiNode(Int32[edge+bb_offset for edge in stmt′.edges], stmt′.values)
+            elseif isexpr(stmt′, :boundscheck)
+                adjust_boundscheck!(inline_compact, idx′, stmt′, boundscheck)
             end
             inline_compact[idx′] = stmt′
         end
-        just_fixup!(inline_compact)
+        just_fixup!(inline_compact, new_new_offset, late_fixup_offset)
         compact.result_idx = inline_compact.result_idx
         compact.active_result_bb = inline_compact.active_result_bb
         if length(pn.edges) == 1
@@ -447,7 +483,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
     return_value
 end
 
-function fix_va_argexprs!(compact::IncrementalCompact,
+function fix_va_argexprs!(insert_node!::Inserter, inline_target::Union{IRCode, IncrementalCompact},
     argexprs::Vector{Any}, nargs_def::Int, line_idx::Int32)
     newargexprs = argexprs[1:(nargs_def-1)]
     tuple_call = Expr(:call, TOP_TUPLE)
@@ -455,62 +491,56 @@ function fix_va_argexprs!(compact::IncrementalCompact,
     for i in nargs_def:length(argexprs)
         arg = argexprs[i]
         push!(tuple_call.args, arg)
-        push!(tuple_typs, argextype(arg, compact))
+        push!(tuple_typs, argextype(arg, inline_target))
     end
-    tuple_typ = tuple_tfunc(tuple_typs)
-    push!(newargexprs, insert_node_here!(compact, NewInstruction(tuple_call, tuple_typ, line_idx)))
+    tuple_typ = tuple_tfunc(SimpleInferenceLattice.instance, tuple_typs)
+    tuple_inst = NewInstruction(tuple_call, tuple_typ, line_idx)
+    push!(newargexprs, insert_node!(tuple_inst))
     return newargexprs
 end
 
-const FATAL_TYPE_BOUND_ERROR = ErrorException("fatal error in type inference (type bound)")
-
 """
     ir_inline_unionsplit!
 
-The core idea of this function is to simulate the dispatch semantics by generating
-(flat) `isa`-checks corresponding to the signatures of union-split dispatch candidates,
-and then inline their bodies into each `isa`-conditional block.
-This `isa`-based virtual dispatch requires few pre-conditions to hold in order to simulate
-the actual semantics correctly.
+The primary purpose of this function is to emulate the dispatch behavior by generating flat
+`isa`-checks that correspond to the signatures of union-split dispatch candidates.
+These checks allow us to inline the method bodies into respective `isa`-conditional blocks.
 
-The first one is that these dispatch candidates need to be processed in order of their specificity,
-and the corresponding `isa`-checks should reflect the method specificities, since now their
-signatures are not necessarily concrete.
-For example, given the following definitions:
+Note that two pre-conditions are required for this emulation to work correctly:
+
+1. Ordered Dispatch Candidates
+
+The dispatch candidates must be processed in order of their specificity.
+The generated `isa`-checks should reflect this order,
+especially since the method signatures may not be concrete.
+For instance, with the methods:
 
     f(x::Int)    = ...
     f(x::Number) = ...
     f(x::Any)    = ...
 
-and a callsite:
-
-    f(x::Any)
-
-then a correct `isa`-based virtual dispatch would be:
+A correct `isa`-based dispatch emulation for the call site `f(x::Any)` would look like:
 
     if isa(x, Int)
         [inlined/resolved f(x::Int)]
     elseif isa(x, Number)
         [inlined/resolved f(x::Number)]
-    else # implies `isa(x, Any)`, which fully covers this call signature,
-         # otherwise we need to insert a fallback dynamic dispatch case also
+    else
         [inlined/resolved f(x::Any)]
     end
 
-Fortunately, `ml_matches` should already sorted them in that way, except cases when there is
-any ambiguity, from which we already bail out at this point.
-
-Another consideration is type equality constraint from type variables: the `isa`-checks are
-not enough to simulate the dispatch semantics in cases like:
-Given a definition:
+`ml_matches` should already sort the matched method candidates correctly,
+except in ambiguous cases, which we've already excluded at this state.
 
-    g(x::T, y::T) where T<:Integer = ...
+2. Type Equality Constraints
 
-transform a callsite:
+Another factor is the type equality constraint imposed by type variables.
+Simple `isa`-checks are insufficient to capture the semantics in some cases.
+For example, given the following method definition:
 
-    g(x::Any, y::Any)
+    g(x::T, y::T) where T<:Integer = ...
 
-into the optimized form:
+it is _invalid_ to optimize a cal site like `g(x::Any, y::Any)` into:
 
     if isa(x, Integer) && isa(y, Integer)
         [inlined/resolved g(x::Integer, y::Integer)]
@@ -518,18 +548,20 @@ into the optimized form:
         g(x, y) # fallback dynamic dispatch
     end
 
-But again, we should already bail out from such cases at this point, essentially by
-excluding cases where `case.sig::UnionAll`.
+since we also need to check that `x` and `y` are equal types.
 
-In short, here we can process the dispatch candidates in order, assuming we haven't changed
-their order somehow somewhere up to this point.
+But, we've already excluded such cases at this point,
+mainly by filtering out `case.sig::UnionAll`,
+so there is no need to worry about type equality at this point.
+
+In essence, we can process the dispatch candidates sequentially,
+assuming their order stays the same post-discovery in `ml_matches`.
 """
-function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
-                               argexprs::Vector{Any}, linetable::Vector{LineInfoNode},
-                               (; fully_covered, atype, cases, bbs)::UnionSplit,
-                               boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}},
-                               params::OptimizationParams)
-    stmt, typ, line = compact.result[idx][:inst], compact.result[idx][:type], compact.result[idx][:line]
+function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
+                               union_split::UnionSplit, boundscheck::Symbol,
+                               todo_bbs::Vector{Tuple{Int,Int}}, params::OptimizationParams)
+    (; fully_covered, atype, cases, bbs) = union_split
+    stmt, typ, line = compact.result[idx][:stmt], compact.result[idx][:type], compact.result[idx][:line]
     join_bb = bbs[end]
     pn = PhiNode()
     local bb = compact.active_result_bb
@@ -543,7 +575,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
         cond = true
         nparams = fieldcount(atype)
         @assert nparams == fieldcount(mtype)
-        if i != ncases || !fully_covered || !params.trust_inference
+        if !(i == ncases && fully_covered)
             for i = 1:nparams
                 a, m = fieldtype(atype, i), fieldtype(mtype, i)
                 # If this is always true, we don't need to check for it
@@ -576,17 +608,16 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
             end
         end
         if isa(case, InliningTodo)
-            val = ir_inline_item!(compact, idx, argexprs′, linetable, case, boundscheck, todo_bbs)
+            val = ir_inline_item!(compact, idx, argexprs′, case, boundscheck, todo_bbs)
         elseif isa(case, InvokeCase)
-            effect_free = is_removable_if_unused(case.effects)
-            val = insert_node_here!(compact,
-                NewInstruction(Expr(:invoke, case.invoke, argexprs′...), typ, nothing,
-                    line, effect_free ? IR_FLAG_EFFECT_FREE : IR_FLAG_NULL, effect_free))
+            invoke_stmt = Expr(:invoke, case.invoke, argexprs′...)
+            flag = flags_for_effects(case.effects)
+            val = insert_node_here!(compact, NewInstruction(invoke_stmt, typ, case.info, line, flag))
         else
             case = case::ConstantCase
             val = case.val
         end
-        if !isempty(compact.result_bbs[bb].preds)
+        if !isempty(compact.cfg_transform.result_bbs[bb].preds)
             push!(pn.edges, bb)
             push!(pn.values, val)
             insert_node_here!(compact,
@@ -599,14 +630,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
     end
     bb += 1
     # We're now in the fall through block, decide what to do
-    if fully_covered
-        if !params.trust_inference
-            e = Expr(:call, GlobalRef(Core, :throw), FATAL_TYPE_BOUND_ERROR)
-            insert_node_here!(compact, NewInstruction(e, Union{}, line))
-            insert_node_here!(compact, NewInstruction(ReturnNode(), Union{}, line))
-            finish_current_bb!(compact, 0)
-        end
-    else
+    if !fully_covered
         ssa = insert_node_here!(compact, NewInstruction(stmt, typ, line))
         push!(pn.edges, bb)
         push!(pn.values, ssa)
@@ -618,7 +642,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
     return insert_node_here!(compact, NewInstruction(pn, typ, line))
 end
 
-function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vector{LineInfoNode}, propagate_inbounds::Bool, params::OptimizationParams)
+function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inbounds::Bool, params::OptimizationParams)
     # Compute the new CFG first (modulo statement ranges, which will be computed below)
     state = CFGInliningState(ir)
     for (idx, item) in todo
@@ -626,27 +650,21 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
             cfg_inline_unionsplit!(ir, idx, item, state, params)
         else
             item = item::InliningTodo
-            spec = item.spec::ResolvedInliningSpec
             # A linear inline does not modify the CFG
-            spec.linear_inline_eligible && continue
-            cfg_inline_item!(ir, idx, spec, state, false)
+            item.linear_inline_eligible && continue
+            cfg_inline_item!(ir, idx, item, state, false)
         end
     end
     finish_cfg_inline!(state)
 
-    boundscheck = inbounds_option()
-    if boundscheck === :default && propagate_inbounds
-        boundscheck = :propagate
-    end
+    boundscheck = propagate_inbounds ? :propagate : :default
 
-    let compact = IncrementalCompact(ir, false)
-        compact.result_bbs = state.new_cfg_blocks
+    let compact = IncrementalCompact(ir, CFGTransformState!(state.new_cfg_blocks, false))
         # This needs to be a minimum and is more of a size hint
         nn = 0
         for (_, item) in todo
             if isa(item, InliningTodo)
-                spec = item.spec::ResolvedInliningSpec
-                nn += (length(spec.ir.stmts) + length(spec.ir.new_nodes))
+                nn += (length(item.ir.stmts) + length(item.ir.new_nodes))
             end
         end
         nnewnodes = length(compact.result) + nn
@@ -655,9 +673,14 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
         for ((old_idx, idx), stmt) in compact
             if old_idx == inline_idx
                 stmt = stmt::Expr
-                argexprs = copy(stmt.args)
+                if stmt.head === :invoke
+                    argexprs = stmt.args[2:end]
+                else
+                    @assert stmt.head === :call
+                    argexprs = copy(stmt.args)
+                end
                 refinish = false
-                if compact.result_idx == first(compact.result_bbs[compact.active_result_bb].stmts)
+                if compact.result_idx == first(compact.cfg_transform.result_bbs[compact.active_result_bb].stmts)
                     compact.active_result_bb -= 1
                     refinish = true
                 end
@@ -669,14 +692,14 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
                 for aidx in 1:length(argexprs)
                     aexpr = argexprs[aidx]
                     if isa(aexpr, Expr) || isa(aexpr, GlobalRef)
-                        ninst = effect_free(NewInstruction(aexpr, argextype(aexpr, compact), compact.result[idx][:line]))
+                        ninst = effect_free_and_nothrow(NewInstruction(aexpr, argextype(aexpr, compact), compact.result[idx][:line]))
                         argexprs[aidx] = insert_node_here!(compact, ninst)
                     end
                 end
                 if isa(item, InliningTodo)
-                    compact.ssa_rename[old_idx] = ir_inline_item!(compact, idx, argexprs, linetable, item, boundscheck, state.todo_bbs)
+                    compact.ssa_rename[old_idx] = ir_inline_item!(compact, idx, argexprs, item, boundscheck, state.todo_bbs)
                 elseif isa(item, UnionSplit)
-                    compact.ssa_rename[old_idx] = ir_inline_unionsplit!(compact, idx, argexprs, linetable, item, boundscheck, state.todo_bbs, params)
+                    compact.ssa_rename[old_idx] = ir_inline_unionsplit!(compact, idx, argexprs, item, boundscheck, state.todo_bbs, params)
                 end
                 compact[idx] = nothing
                 refinish && finish_current_bb!(compact, 0)
@@ -687,8 +710,8 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
                 end
             elseif isa(stmt, GotoNode)
                 compact[idx] = GotoNode(state.bb_rename[stmt.label])
-            elseif isa(stmt, Expr) && stmt.head === :enter
-                compact[idx] = Expr(:enter, state.bb_rename[stmt.args[1]::Int])
+            elseif isa(stmt, EnterNode)
+                compact[idx] = EnterNode(stmt, stmt.catch_dest == 0 ? 0 : state.bb_rename[stmt.catch_dest])
             elseif isa(stmt, GotoIfNot)
                 compact[idx] = GotoIfNot(stmt.cond, state.bb_rename[stmt.dest])
             elseif isa(stmt, PhiNode)
@@ -702,9 +725,9 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
 end
 
 # This assumes the caller has verified that all arguments to the _apply_iterate call are Tuples.
-function rewrite_apply_exprargs!(
+function rewrite_apply_exprargs!(todo::Vector{Pair{Int,Any}},
     ir::IRCode, idx::Int, stmt::Expr, argtypes::Vector{Any},
-    arginfos::Vector{MaybeAbstractIterationInfo}, arg_start::Int, istate::InliningState, todo::Vector{Pair{Int, Any}})
+    arginfos::Vector{MaybeAbstractIterationInfo}, arg_start::Int, istate::InliningState)
     flag = ir.stmts[idx][:flag]
     argexprs = stmt.args
     new_argexprs = Any[argexprs[arg_start]]
@@ -714,7 +737,7 @@ function rewrite_apply_exprargs!(
         def = argexprs[i]
         def_type = argtypes[i]
         thisarginfo = arginfos[i-arg_start]
-        if thisarginfo === nothing
+        if thisarginfo === nothing || !thisarginfo.complete
             if def_type isa PartialStruct
                 # def_type.typ <: Tuple is assumed
                 def_argtypes = def_type.fields
@@ -726,11 +749,11 @@ function rewrite_apply_exprargs!(
                     end
                 else
                     ti = widenconst(def_type)::DataType # checked by `is_valid_type_for_apply_rewrite`
-                    if ti.name === NamedTuple_typename
+                    if ti.name === _NAMEDTUPLE_NAME
                         ti = ti.parameters[2]::DataType # checked by `is_valid_type_for_apply_rewrite`
                     end
                     for p in ti.parameters
-                        if isa(p, DataType) && isdefined(p, :instance)
+                        if issingletontype(p)
                             # replace singleton types with their equivalent Const object
                             p = Const(p.instance)
                         elseif isconstType(p)
@@ -760,19 +783,10 @@ function rewrite_apply_exprargs!(
                 state1 = insert_node!(ir, idx, NewInstruction(new_stmt, call.rt))
                 new_sig = call_sig(ir, new_stmt)::Signature
                 new_info = call.info
-                if isa(new_info, ConstCallInfo)
-                    handle_const_call!(
-                        ir, state1.id, new_stmt, new_info, flag,
-                        new_sig, istate, todo)
-                elseif isa(new_info, MethodMatchInfo) || isa(new_info, UnionSplitInfo)
-                    new_infos = isa(new_info, MethodMatchInfo) ? MethodMatchInfo[new_info] : new_info.matches
-                    # See if we can inline this call to `iterate`
-                    analyze_single_call!(
-                        ir, state1.id, new_stmt, new_infos, flag,
-                        new_sig, istate, todo)
-                end
+                # See if we can inline this call to `iterate`
+                handle_call!(todo, ir, state1.id, new_stmt, new_info, flag, new_sig, istate)
                 if i != length(thisarginfo.each)
-                    valT = getfield_tfunc(call.rt, Const(1))
+                    valT = getfield_tfunc(optimizer_lattice(istate.interp), call.rt, Const(1))
                     val_extracted = insert_node!(ir, idx, NewInstruction(
                         Expr(:call, GlobalRef(Core, :getfield), state1, 1),
                         valT))
@@ -780,7 +794,7 @@ function rewrite_apply_exprargs!(
                     push!(new_argtypes, valT)
                     state_extracted = insert_node!(ir, idx, NewInstruction(
                         Expr(:call, GlobalRef(Core, :getfield), state1, 2),
-                        getfield_tfunc(call.rt, Const(2))))
+                        getfield_tfunc(optimizer_lattice(istate.interp), call.rt, Const(2))))
                     state = Core.svec(state_extracted)
                 end
             end
@@ -790,92 +804,150 @@ function rewrite_apply_exprargs!(
     return new_argtypes
 end
 
-function compileable_specialization(et::Union{EdgeTracker, Nothing}, match::MethodMatch, effects::Effects)
-    mi = specialize_method(match; compilesig=true)
-    mi === nothing && return nothing
-    et !== nothing && push!(et, mi)
-    return InvokeCase(mi, effects)
+function compileable_specialization(mi::MethodInstance, effects::Effects,
+        et::InliningEdgeTracker, @nospecialize(info::CallInfo); compilesig_invokes::Bool=true)
+    mi_invoke = mi
+    method, atype, sparams = mi.def::Method, mi.specTypes, mi.sparam_vals
+    if compilesig_invokes
+        new_atype = get_compileable_sig(method, atype, sparams)
+        new_atype === nothing && return nothing
+        if atype !== new_atype
+            sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), new_atype, method.sig)::SimpleVector
+            if sparams === sp_[2]::SimpleVector
+                mi_invoke = specialize_method(method, new_atype, sparams)
+                mi_invoke === nothing && return nothing
+            end
+        end
+    else
+        # If this caller does not want us to optimize calls to use their
+        # declared compilesig, then it is also likely they would handle sparams
+        # incorrectly if there were any unknown typevars, so we conservatively return nothing
+        if any(@nospecialize(t)->isa(t, TypeVar), mi.sparam_vals)
+            return nothing
+        end
+    end
+    add_inlining_backedge!(et, mi) # to the dispatch lookup
+    push!(et.edges, method.sig, mi_invoke) # add_inlining_backedge to the invoke call
+    return InvokeCase(mi_invoke, effects, info)
 end
 
-function compileable_specialization(et::Union{EdgeTracker, Nothing}, linfo::MethodInstance, effects::Effects)
-    mi = specialize_method(linfo.def::Method, linfo.specTypes, linfo.sparam_vals; compilesig=true)
-    mi === nothing && return nothing
-    et !== nothing && push!(et, mi)
-    return InvokeCase(mi, effects)
+function compileable_specialization(match::MethodMatch, effects::Effects,
+        et::InliningEdgeTracker, @nospecialize(info::CallInfo); compilesig_invokes::Bool=true)
+    mi = specialize_method(match)
+    return compileable_specialization(mi, effects, et, info; compilesig_invokes)
 end
 
-function compileable_specialization(et::Union{EdgeTracker, Nothing}, result::InferenceResult, effects::Effects)
-    return compileable_specialization(et, result.linfo, effects)
+struct InferredResult
+    src::Any
+    effects::Effects
+    InferredResult(@nospecialize(src), effects::Effects) = new(src, effects)
 end
-
-function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8)
-    mi = todo.mi
-    (; match, argtypes) = todo.spec::DelayedInliningSpec
-    et = state.et
-
-    #XXX: update_valid_age!(min_valid[1], max_valid[1], sv)
-    if isa(match, InferenceResult)
-        inferred_src = match.src
-        if isa(inferred_src, ConstAPI)
+@inline function get_cached_result(state::InliningState, mi::MethodInstance)
+    code = get(code_cache(state), mi, nothing)
+    if code isa CodeInstance
+        if use_const_api(code)
+            # in this case function can be inlined to a constant
+            return ConstantCase(quoted(code.rettype_const))
+        end
+        src = @atomic :monotonic code.inferred
+        effects = decode_effects(code.ipo_purity_bits)
+        return InferredResult(src, effects)
+    end
+    return InferredResult(nothing, Effects())
+end
+@inline function get_local_result(inf_result::InferenceResult)
+    effects = inf_result.ipo_effects
+    if is_foldable_nothrow(effects)
+        res = inf_result.result
+        if isa(res, Const) && is_inlineable_constant(res.val)
             # use constant calling convention
-            et !== nothing && push!(et, mi)
-            return ConstantCase(quoted(inferred_src.val))
-        else
-            src = inferred_src # ::Union{Nothing,CodeInfo} for NativeInterpreter
+            return ConstantCase(quoted(res.val))
         end
-        effects = match.ipo_effects
+    end
+    return InferredResult(inf_result.src, effects)
+end
+
+# the general resolver for usual and const-prop'ed calls
+function resolve_todo(mi::MethodInstance, result::Union{Nothing,InferenceResult,VolatileInferenceResult},
+    @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
+    invokesig::Union{Nothing,Vector{Any}}=nothing)
+    et = InliningEdgeTracker(state, invokesig)
+
+    preserve_local_sources = true
+    if isa(result, InferenceResult)
+        inferred_result = get_local_result(result)
+    elseif isa(result, VolatileInferenceResult)
+        inferred_result = get_local_result(result.inf_result)
+        # volatile inference result can be inlined destructively
+        preserve_local_sources = !result.inf_result.is_src_volatile | OptimizationParams(state.interp).preserve_local_sources
     else
-        code = get(state.mi_cache, mi, nothing)
-        if code isa CodeInstance
-            if use_const_api(code)
-                # in this case function can be inlined to a constant
-                et !== nothing && push!(et, mi)
-                return ConstantCase(quoted(code.rettype_const))
-            else
-                src = code.inferred
-            end
-            effects = decode_effects(code.ipo_purity_bits)
-        else # fallback pass for external AbstractInterpreter cache
-            effects = Effects()
-            src = code
-        end
+        inferred_result = get_cached_result(state, mi)
+    end
+    if inferred_result isa ConstantCase
+        add_inlining_backedge!(et, mi)
+        return inferred_result
     end
+    (; src, effects) = inferred_result
 
     # the duplicated check might have been done already within `analyze_method!`, but still
     # we need it here too since we may come here directly using a constant-prop' result
-    if !state.params.inlining || is_stmt_noinline(flag)
-        return compileable_specialization(et, match, effects)
+    if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag)
+        return compileable_specialization(mi, effects, et, info;
+            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
     end
 
-    src = inlining_policy(state.interp, src, flag, mi, argtypes)
+    src = inlining_policy(state.interp, src, info, flag)
+    src === nothing && return compileable_specialization(mi, effects, et, info;
+        compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
 
-    src === nothing && return compileable_specialization(et, match, effects)
-
-    et !== nothing && push!(et, mi)
-    return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects)
+    add_inlining_backedge!(et, mi)
+    ir = retrieve_ir_for_inlining(mi, src, preserve_local_sources)
+    return InliningTodo(mi, ir, effects)
 end
 
-function resolve_todo((; fully_covered, atype, cases, #=bbs=#)::UnionSplit, state::InliningState, flag::UInt8)
-    ncases = length(cases)
-    newcases = Vector{InliningCase}(undef, ncases)
-    for i in 1:ncases
-        (; sig, item) = cases[i]
-        newitem = resolve_todo(item, state, flag)
-        push!(newcases, InliningCase(sig, newitem))
+# the special resolver for :invoke-d call
+function resolve_todo(mi::MethodInstance, @nospecialize(info::CallInfo), flag::UInt32,
+                      state::InliningState)
+    if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag)
+        return nothing
     end
-    return UnionSplit(fully_covered, atype, newcases)
+
+    et = InliningEdgeTracker(state)
+
+    cached_result = get_cached_result(state, mi)
+    if cached_result isa ConstantCase
+        add_inlining_backedge!(et, mi)
+        return cached_result
+    end
+    (; src, effects) = cached_result
+
+    src = inlining_policy(state.interp, src, info, flag)
+
+    src === nothing && return nothing
+
+    add_inlining_backedge!(et, mi)
+    return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects)
 end
 
 function validate_sparams(sparams::SimpleVector)
     for i = 1:length(sparams)
-        (isa(sparams[i], TypeVar) || isvarargtype(sparams[i])) && return false
+        spᵢ = sparams[i]
+        (isa(spᵢ, TypeVar) || isvarargtype(spᵢ)) && return false
     end
     return true
 end
 
+function may_have_fcalls(m::Method)
+    isdefined(m, :source) || return true
+    src = m.source
+    isa(src, MaybeCompressed) || return true
+    return ccall(:jl_ir_flag_has_fcall, Bool, (Any,), src)
+end
+
 function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
-                         flag::UInt8, state::InliningState,
-                         do_resolve::Bool = true)
+    @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
+    allow_typevars::Bool, invokesig::Union{Nothing,Vector{Any}}=nothing,
+    volatile_inf_result::Union{Nothing,VolatileInferenceResult}=nothing)
     method = match.method
     spec_types = match.spec_types
 
@@ -897,94 +969,117 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
         end
     end
 
-    # Bail out if any static parameters are left as TypeVar
-    validate_sparams(match.sparams) || return nothing
-
-    et = state.et
-
-    # See if there exists a specialization for this method signature
-    mi = specialize_method(match; preexisting=true) # Union{Nothing, MethodInstance}
-    isa(mi, MethodInstance) || return compileable_specialization(et, match, Effects())
+    if !validate_sparams(match.sparams)
+        (allow_typevars && !may_have_fcalls(match.method)) || return nothing
+    end
 
-    todo = InliningTodo(mi, match, argtypes)
-    # If we don't have caches here, delay resolving this MethodInstance
-    # until the batch inlining step (or an external post-processing pass)
-    do_resolve && state.mi_cache === nothing && return todo
-    return resolve_todo(todo, state, flag)
+    # Get the specialization for this method signature
+    # (later we will decide what to do with it)
+    mi = specialize_method(match)
+    return resolve_todo(mi, volatile_inf_result, info, flag, state; invokesig)
 end
 
-function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects)
-    return InliningTodo(mi, ResolvedInliningSpec(ir, effects))
+function retrieve_ir_for_inlining(mi::MethodInstance, src::String, ::Bool=true)
+    src = _uncompressed_ir(mi.def, src)
+    return inflate_ir!(src, mi)
 end
-
-function retrieve_ir_for_inlining(mi::MethodInstance, src::Array{UInt8, 1})
-    src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo
+function retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo, preserve_local_sources::Bool=true)
+    if preserve_local_sources
+        src = copy(src)
+    end
     return inflate_ir!(src, mi)
 end
-retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo) = inflate_ir(src, mi)::IRCode
-retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode) = copy(ir)
+function retrieve_ir_for_inlining(::MethodInstance, ir::IRCode, preserve_local_sources::Bool=true)
+    if preserve_local_sources
+        ir = copy(ir)
+    end
+    return ir
+end
+
+function flags_for_effects(effects::Effects)
+    flags::UInt32 = 0
+    if is_consistent(effects)
+        flags |= IR_FLAG_CONSISTENT
+    end
+    if is_effect_free(effects)
+        flags |= IR_FLAG_EFFECT_FREE
+    elseif is_effect_free_if_inaccessiblememonly(effects)
+        flags |= IR_FLAG_EFIIMO
+    end
+    if is_inaccessiblemem_or_argmemonly(effects)
+        flags |= IR_FLAG_INACCESSIBLE_OR_ARGMEM
+    end
+    if is_nothrow(effects)
+        flags |= IR_FLAG_NOTHROW
+    end
+    if is_noub(effects)
+        flags |= IR_FLAG_NOUB
+    end
+    return flags
+end
 
-function handle_single_case!(
-    ir::IRCode, idx::Int, stmt::Expr,
-    @nospecialize(case), todo::Vector{Pair{Int, Any}}, params::OptimizationParams, isinvoke::Bool = false)
+function handle_single_case!(todo::Vector{Pair{Int,Any}},
+    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(case),
+    isinvoke::Bool = false)
     if isa(case, ConstantCase)
-        ir[SSAValue(idx)][:inst] = case.val
+        ir[SSAValue(idx)][:stmt] = case.val
     elseif isa(case, InvokeCase)
-        is_total(case.effects) && inline_const_if_inlineable!(ir[SSAValue(idx)]) && return nothing
+        is_foldable_nothrow(case.effects) && inline_const_if_inlineable!(ir[SSAValue(idx)]) && return nothing
         isinvoke && rewrite_invoke_exprargs!(stmt)
-        stmt.head = :invoke
-        pushfirst!(stmt.args, case.invoke)
-        if is_removable_if_unused(case.effects)
-            ir[SSAValue(idx)][:flag] |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-        elseif is_nothrow(case.effects)
-            ir[SSAValue(idx)][:flag] |= IR_FLAG_NOTHROW
+        if stmt.head === :invoke
+            stmt.args[1] = case.invoke
+        else
+            stmt.head = :invoke
+            pushfirst!(stmt.args, case.invoke)
         end
+        add_flag!(ir[SSAValue(idx)], flags_for_effects(case.effects))
     elseif case === nothing
         # Do, well, nothing
     else
         isinvoke && rewrite_invoke_exprargs!(stmt)
         push!(todo, idx=>(case::InliningTodo))
     end
-    nothing
+    return nothing
 end
 
 rewrite_invoke_exprargs!(expr::Expr) = (expr.args = invoke_rewrite(expr.args); expr)
 
 function is_valid_type_for_apply_rewrite(@nospecialize(typ), params::OptimizationParams)
     if isa(typ, Const) && (v = typ.val; isa(v, SimpleVector))
-        length(v) > params.MAX_TUPLE_SPLAT && return false
+        length(v) > params.max_tuple_splat && return false
         for p in v
             is_inlineable_constant(p) || return false
         end
         return true
     end
     typ = widenconst(typ)
-    if isa(typ, DataType) && typ.name === NamedTuple_typename
+    if isa(typ, DataType) && typ.name === _NAMEDTUPLE_NAME
         typ = typ.parameters[2]
         typ = unwraptv(typ)
     end
     isa(typ, DataType) || return false
     if typ.name === Tuple.name
-        return !isvatuple(typ) && length(typ.parameters) <= params.MAX_TUPLE_SPLAT
+        return !isvatuple(typ) && length(typ.parameters) <= params.max_tuple_splat
     else
         return false
     end
 end
 
-function inline_splatnew!(ir::IRCode, idx::Int, stmt::Expr, @nospecialize(rt))
-    nf = nfields_tfunc(rt)
+function inline_splatnew!(ir::IRCode, idx::Int, stmt::Expr, @nospecialize(rt), state::InliningState)
+    𝕃ₒ = optimizer_lattice(state.interp)
+    nf = nfields_tfunc(𝕃ₒ, rt)
     if nf isa Const
         eargs = stmt.args
         tup = eargs[2]
         tt = argextype(tup, ir)
-        tnf = nfields_tfunc(tt)
+        tnf = nfields_tfunc(𝕃ₒ, tt)
         # TODO: hoisting this tnf.val === nf.val check into codegen
         # would enable us to almost always do this transform
         if tnf isa Const && tnf.val === nf.val
             n = tnf.val::Int
             new_argexprs = Any[eargs[1]]
             for j = 1:n
-                atype = getfield_tfunc(tt, Const(j))
+                atype = getfield_tfunc(𝕃ₒ, tt, Const(j))
                 new_call = Expr(:call, Core.getfield, tup, j)
                 new_argexpr = insert_node!(ir, idx, NewInstruction(new_call, atype))
                 push!(new_argexprs, new_argexpr)
@@ -993,19 +1088,26 @@ function inline_splatnew!(ir::IRCode, idx::Int, stmt::Expr, @nospecialize(rt))
             stmt.args = new_argexprs
         end
     end
-    nothing
+    return nothing
 end
 
 function call_sig(ir::IRCode, stmt::Expr)
     isempty(stmt.args) && return nothing
-    ft = argextype(stmt.args[1], ir)
+    if stmt.head === :call
+        offset = 1
+    elseif stmt.head === :invoke
+        offset = 2
+    else
+        return nothing
+    end
+    ft = argextype(stmt.args[offset], ir)
     has_free_typevars(ft) && return nothing
     f = singleton_type(ft)
     f === Core.Intrinsics.llvmcall && return nothing
     f === Core.Intrinsics.cglobal && return nothing
     argtypes = Vector{Any}(undef, length(stmt.args))
     argtypes[1] = ft
-    for i = 2:length(stmt.args)
+    for i = (offset+1):length(stmt.args)
         a = argextype(stmt.args[i], ir)
         (a === Bottom || isvarargtype(a)) && return nothing
         argtypes[i] = a
@@ -1013,22 +1115,21 @@ function call_sig(ir::IRCode, stmt::Expr)
     return Signature(f, ft, argtypes)
 end
 
-function inline_apply!(
-    ir::IRCode, idx::Int, stmt::Expr, sig::Signature,
-    state::InliningState, todo::Vector{Pair{Int, Any}})
+function inline_apply!(todo::Vector{Pair{Int,Any}},
+    ir::IRCode, idx::Int, stmt::Expr, sig::Signature, state::InliningState)
     while sig.f === Core._apply_iterate
         info = ir.stmts[idx][:info]
         if isa(info, UnionSplitApplyCallInfo)
             if length(info.infos) != 1
                 # TODO: Handle union split applies?
-                new_info = info = false
+                new_info = info = NoCallInfo()
             else
                 info = info.infos[1]
                 new_info = info.call
             end
         else
-            @assert info === nothing || info === false
-            new_info = info = false
+            @assert info === NoCallInfo()
+            new_info = info = NoCallInfo()
         end
         arg_start = 3
         argtypes = sig.argtypes
@@ -1040,10 +1141,11 @@ function inline_apply!(
             # if one argument is a tuple already, and the rest are empty, we can just return it
             # e.g. rewrite `((t::Tuple)...,)` to `t`
             nonempty_idx = 0
+            𝕃ₒ = optimizer_lattice(state.interp)
             for i = (arg_start + 1):length(argtypes)
                 ti = argtypes[i]
-                ti ⊑ Tuple{} && continue
-                if ti ⊑ Tuple && nonempty_idx == 0
+                ⊑(𝕃ₒ, ti, Tuple{}) && continue
+                if ⊑(𝕃ₒ, ti, Tuple) && nonempty_idx == 0
                     nonempty_idx = i
                     continue
                 end
@@ -1051,7 +1153,7 @@ function inline_apply!(
                 break
             end
             if nonempty_idx != 0
-                ir.stmts[idx][:inst] = stmt.args[nonempty_idx]
+                ir[SSAValue(idx)][:stmt] = stmt.args[nonempty_idx]
                 return nothing
             end
         end
@@ -1060,10 +1162,10 @@ function inline_apply!(
         arginfos = MaybeAbstractIterationInfo[]
         for i = (arg_start + 1):length(argtypes)
             thisarginfo = nothing
-            if !is_valid_type_for_apply_rewrite(argtypes[i], state.params)
-                if isa(info, ApplyCallInfo) && info.arginfo[i-arg_start] !== nothing
-                    thisarginfo = info.arginfo[i-arg_start]
-                else
+            if !is_valid_type_for_apply_rewrite(argtypes[i], OptimizationParams(state.interp))
+                isa(info, ApplyCallInfo) || return nothing
+                thisarginfo = info.arginfo[i-arg_start]
+                if thisarginfo === nothing || !thisarginfo.complete
                     return nothing
                 end
             end
@@ -1071,9 +1173,8 @@ function inline_apply!(
         end
         # Independent of whether we can inline, the above analysis allows us to rewrite
         # this apply call to a regular call
-        argtypes = rewrite_apply_exprargs!(
-            ir, idx, stmt, argtypes,
-            arginfos, arg_start, state, todo)
+        argtypes = rewrite_apply_exprargs!(todo,
+            ir, idx, stmt, argtypes, arginfos, arg_start, state)
         ir.stmts[idx][:info] = new_info
         has_free_typevars(ft) && return nothing
         f = singleton_type(ft)
@@ -1083,41 +1184,52 @@ function inline_apply!(
 end
 
 # TODO: this test is wrong if we start to handle Unions of function types later
-is_builtin(s::Signature) =
-    isa(s.f, IntrinsicFunction) ||
-    s.ft ⊑ IntrinsicFunction ||
-    isa(s.f, Builtin) ||
-    s.ft ⊑ Builtin
-
-function inline_invoke!(
-    ir::IRCode, idx::Int, stmt::Expr, info::InvokeCallInfo, flag::UInt8,
-    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+function is_builtin(𝕃ₒ::AbstractLattice, s::Signature)
+    isa(s.f, IntrinsicFunction) && return true
+    ⊑(𝕃ₒ, s.ft, IntrinsicFunction) && return true
+    isa(s.f, Builtin) && return true
+    ⊑(𝕃ₒ, s.ft, Builtin) && return true
+    return false
+end
+
+function handle_invoke_call!(todo::Vector{Pair{Int,Any}},
+    ir::IRCode, idx::Int, stmt::Expr, info::InvokeCallInfo, flag::UInt32,
+    sig::Signature, state::InliningState)
     match = info.match
     if !match.fully_covers
         # TODO: We could union split out the signature check and continue on
         return nothing
     end
     result = info.result
+    invokesig = sig.argtypes
     if isa(result, ConcreteResult)
-        item = concrete_result_item(result, state)
+        item = concrete_result_item(result, info, state; invokesig)
+    elseif isa(result, SemiConcreteResult)
+        item = semiconcrete_result_item(result, info, flag, state)
     else
         argtypes = invoke_rewrite(sig.argtypes)
         if isa(result, ConstPropResult)
-            (; mi) = item = InliningTodo(result.result, argtypes)
+            mi = result.result.linfo
             validate_sparams(mi.sparam_vals) || return nothing
-            if argtypes_to_type(argtypes) <: mi.def.sig
-                state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
-                handle_single_case!(ir, idx, stmt, item, todo, state.params, true)
+            if Union{} !== argtypes_to_type(argtypes) <: mi.def.sig
+                item = resolve_todo(mi, result.result, info, flag, state; invokesig)
+                handle_single_case!(todo, ir, idx, stmt, item, true)
                 return nothing
             end
         end
-        item = analyze_method!(match, argtypes, flag, state)
+        volatile_inf_result = result isa VolatileInferenceResult ? result : nothing
+        item = analyze_method!(match, argtypes, info, flag, state; allow_typevars=false, invokesig, volatile_inf_result)
     end
-    handle_single_case!(ir, idx, stmt, item, todo, state.params, true)
+    handle_single_case!(todo, ir, idx, stmt, item, true)
     return nothing
 end
 
-function narrow_opaque_closure!(ir::IRCode, stmt::Expr, @nospecialize(info), state::InliningState)
+function invoke_signature(argtypes::Vector{Any})
+    ft, argtyps = widenconst(argtypes[2]), instanceof_tfunc(widenconst(argtypes[3]), false)[1]
+    return rewrap_unionall(Tuple{ft, unwrap_unionall(argtyps).parameters...}, argtyps)
+end
+
+function narrow_opaque_closure!(ir::IRCode, stmt::Expr, @nospecialize(info::CallInfo), state::InliningState)
     if isa(info, OpaqueClosureCreateInfo)
         lbt = argextype(stmt.args[2], ir)
         lb, exact = instanceof_tfunc(lbt)
@@ -1126,9 +1238,10 @@ function narrow_opaque_closure!(ir::IRCode, stmt::Expr, @nospecialize(info), sta
         ub, exact = instanceof_tfunc(ubt)
         exact || return
         # Narrow opaque closure type
-        newT = widenconst(tmeet(tmerge(lb, info.unspec.rt), ub))
+        𝕃ₒ = optimizer_lattice(state.interp)
+        newT = widenconst(tmeet(𝕃ₒ, tmerge(𝕃ₒ, lb, info.unspec.rt), ub))
         if newT != ub
-            # N.B.: Narrowing the ub requires a backdge on the mi whose type
+            # N.B.: Narrowing the ub requires a backedge on the mi whose type
             # information we're using, since a change in that function may
             # invalidate ub result.
             stmt.args[3] = newT
@@ -1139,34 +1252,54 @@ end
 # As a matter of convenience, this pass also computes effect-freenes.
 # For primitives, we do that right here. For proper calls, we will
 # discover this when we consult the caches.
-function check_effect_free!(ir::IRCode, idx::Int, @nospecialize(stmt), @nospecialize(rt))
-    (total, nothrow) = stmt_effect_flags(stmt, rt, ir)
-    if total
-        ir.stmts[idx][:flag] |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
+function check_effect_free!(ir::IRCode, idx::Int, @nospecialize(stmt), @nospecialize(rt), state::InliningState)
+    return check_effect_free!(ir, idx, stmt, rt, optimizer_lattice(state.interp))
+end
+function check_effect_free!(ir::IRCode, idx::Int, @nospecialize(stmt), @nospecialize(rt), 𝕃ₒ::AbstractLattice)
+    (consistent, effect_free_and_nothrow, nothrow) = stmt_effect_flags(𝕃ₒ, stmt, rt, ir)
+    inst = ir.stmts[idx]
+    if consistent
+        add_flag!(inst, IR_FLAG_CONSISTENT)
+    end
+    if effect_free_and_nothrow
+        add_flag!(inst, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)
     elseif nothrow
-        ir.stmts[idx][:flag] |= IR_FLAG_NOTHROW
+        add_flag!(inst, IR_FLAG_NOTHROW)
     end
-    return total
+    if !(isexpr(stmt, :call) || isexpr(stmt, :invoke))
+        # There is a bit of a subtle point here, which is that some non-call
+        # statements (e.g. PiNode) can be UB:, however, we consider it
+        # illegal to introduce such statements that actually cause UB (for any
+        # input). Ideally that'd be handled at insertion time (TODO), but for
+        # the time being just do that here.
+        add_flag!(inst, IR_FLAG_NOUB)
+    end
+    return effect_free_and_nothrow
 end
 
 # Handles all analysis and inlining of intrinsics and builtins. In particular,
 # this method does not access the method table or otherwise process generic
 # functions.
-function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vector{Pair{Int, Any}})
-    stmt = ir.stmts[idx][:inst]
-    rt = ir.stmts[idx][:type]
+function process_simple!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, state::InliningState)
+    inst = ir[SSAValue(idx)]
+    stmt = inst[:stmt]
+    rt = inst[:type]
     if !(stmt isa Expr)
-        check_effect_free!(ir, idx, stmt, rt)
+        check_effect_free!(ir, idx, stmt, rt, state)
         return nothing
     end
     head = stmt.head
     if head !== :call
         if head === :splatnew
-            inline_splatnew!(ir, idx, stmt, rt)
+            inline_splatnew!(ir, idx, stmt, rt, state)
         elseif head === :new_opaque_closure
-            narrow_opaque_closure!(ir, stmt, ir.stmts[idx][:info], state)
+            narrow_opaque_closure!(ir, stmt, inst[:info], state)
+        elseif head === :invoke
+            sig = call_sig(ir, stmt)
+            sig === nothing && return nothing
+            return stmt, sig
         end
-        check_effect_free!(ir, idx, stmt, rt)
+        check_effect_free!(ir, idx, stmt, rt, state)
         return nothing
     end
 
@@ -1174,114 +1307,90 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto
     sig === nothing && return nothing
 
     # Handle _apply_iterate
-    sig = inline_apply!(ir, idx, stmt, sig, state, todo)
+    sig = inline_apply!(todo, ir, idx, stmt, sig, state)
     sig === nothing && return nothing
 
     # Check if we match any of the early inliners
-    earlyres = early_inline_special_case(ir, stmt, rt, sig, state.params)
+    earlyres = early_inline_special_case(ir, stmt, rt, sig, state)
     if isa(earlyres, SomeCase)
-        ir.stmts[idx][:inst] = earlyres.val
-        return nothing
-    end
-    if (sig.f === modifyfield! || sig.ft ⊑ typeof(modifyfield!)) && 5 <= length(stmt.args) <= 6
-        let info = ir.stmts[idx][:info]
-            info isa MethodResultPure && (info = info.info)
-            info isa ConstCallInfo && (info = info.call)
-            info isa MethodMatchInfo || return nothing
-            length(info.results) == 1 || return nothing
-            match = info.results[1]::MethodMatch
-            match.fully_covers || return nothing
-            case = compileable_specialization(state.et, match, Effects())
-            case === nothing && return nothing
-            stmt.head = :invoke_modify
-            pushfirst!(stmt.args, case.invoke)
-            ir.stmts[idx][:inst] = stmt
-        end
+        inst[:stmt] = earlyres.val
         return nothing
     end
 
-    if check_effect_free!(ir, idx, stmt, rt)
-        if sig.f === typeassert || sig.ft ⊑ typeof(typeassert)
+    if check_effect_free!(ir, idx, stmt, rt, state)
+        if sig.f === typeassert || ⊑(optimizer_lattice(state.interp), sig.ft, typeof(typeassert))
             # typeassert is a no-op if effect free
-            ir.stmts[idx][:inst] = stmt.args[2]
+            inst[:stmt] = stmt.args[2]
             return nothing
         end
     end
 
-    if sig.f !== Core.invoke && sig.f !== Core.finalizer && is_builtin(sig)
-        # No inlining for builtins (other invoke/apply/typeassert)
+    if (sig.f !== Core.invoke && sig.f !== Core.finalizer && sig.f !== modifyfield!) &&
+        is_builtin(optimizer_lattice(state.interp), sig)
+        # No inlining for builtins (other invoke/apply/typeassert/finalizer)
         return nothing
     end
 
     # Special case inliners for regular functions
-    lateres = late_inline_special_case!(ir, idx, stmt, rt, sig, state.params)
+    lateres = late_inline_special_case!(ir, idx, stmt, rt, sig, state)
     if isa(lateres, SomeCase)
-        ir[SSAValue(idx)][:inst] = lateres.val
-        check_effect_free!(ir, idx, lateres.val, rt)
+        inst[:stmt] = lateres.val
+        check_effect_free!(ir, idx, lateres.val, rt, state)
         return nothing
     end
 
     return stmt, sig
 end
 
-# TODO inline non-`isdispatchtuple`, union-split callsites?
-function compute_inlining_cases(
-        infos::Vector{MethodMatchInfo}, flag::UInt8,
-        sig::Signature, state::InliningState,
-        do_resolve::Bool = true)
-    argtypes = sig.argtypes
-    cases = InliningCase[]
-    local any_fully_covered = false
-    local handled_all_cases = true
-    for i in 1:length(infos)
-        meth = infos[i].results
-        if meth.ambig
-            # Too many applicable methods
-            # Or there is a (partial?) ambiguity
-            return nothing
-        elseif length(meth) == 0
-            # No applicable methods; try next union split
-            handled_all_cases = false
-            continue
-        end
-        for match in meth
-            handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true, do_resolve)
-            any_fully_covered |= match.fully_covers
-        end
-    end
-
-    if !handled_all_cases
-        # if we've not seen all candidates, union split is valid only for dispatch tuples
-        filter!(case::InliningCase->isdispatchtuple(case.sig), cases)
+function handle_any_const_result!(cases::Vector{InliningCase},
+    @nospecialize(result), match::MethodMatch, argtypes::Vector{Any},
+    @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
+    allow_abstract::Bool, allow_typevars::Bool)
+    if isa(result, ConcreteResult)
+        return handle_concrete_result!(cases, result, info, state)
+    elseif isa(result, SemiConcreteResult)
+        return handle_semi_concrete_result!(cases, result, info, flag, state; allow_abstract)
+    elseif isa(result, ConstPropResult)
+        return handle_const_prop_result!(cases, result, info, flag, state; allow_abstract, allow_typevars)
+    else
+        @assert result === nothing || result isa VolatileInferenceResult
+        return handle_match!(cases, match, argtypes, info, flag, state; allow_abstract, allow_typevars, volatile_inf_result = result)
     end
-
-    return cases, handled_all_cases & any_fully_covered
 end
 
-function analyze_single_call!(
-    ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8,
-    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
-
-    r = compute_inlining_cases(infos, flag, sig, state)
-    r === nothing && return nothing
-    cases, all_covered = r
-    handle_cases!(ir, idx, stmt, argtypes_to_type(sig.argtypes), cases,
-        all_covered, todo, state.params)
+function info_effects(@nospecialize(result), match::MethodMatch, state::InliningState)
+    if isa(result, ConcreteResult)
+        return result.effects
+    elseif isa(result, SemiConcreteResult)
+        return result.effects
+    elseif isa(result, ConstPropResult)
+        return result.result.ipo_effects
+    else
+        mi = specialize_method(match; preexisting=true)
+        if isa(mi, MethodInstance)
+            code = get(code_cache(state), mi, nothing)
+            if code isa CodeInstance
+                return decode_effects(code.ipo_purity_bits)
+            end
+        end
+        return Effects()
+    end
 end
 
-# similar to `analyze_single_call!`, but with constant results
-function handle_const_call!(
-    ir::IRCode, idx::Int, stmt::Expr, cinfo::ConstCallInfo, flag::UInt8,
-    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
-    argtypes = sig.argtypes
-    (; call, results) = cinfo
-    infos = isa(call, MethodMatchInfo) ? MethodMatchInfo[call] : call.matches
+function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt32, sig::Signature,
+    state::InliningState)
+    nunion = nsplit(info)
+    nunion === nothing && return nothing
     cases = InliningCase[]
-    local any_fully_covered = false
-    local handled_all_cases = true
-    local j = 0
-    for i in 1:length(infos)
-        meth = infos[i].results
+    argtypes = sig.argtypes
+    local handled_all_cases::Bool = true
+    local revisit_idx = local only_method = nothing
+    local meth::MethodLookupResult
+    local all_result_count = 0
+    local joint_effects::Effects = EFFECTS_TOTAL
+    local fully_covered::Bool = true
+    for i = 1:nunion
+        meth = getsplit(info, i)
         if meth.ambig
             # Too many applicable methods
             # Or there is a (partial?) ambiguity
@@ -1290,107 +1399,291 @@ function handle_const_call!(
             # No applicable methods; try next union split
             handled_all_cases = false
             continue
+        else
+            if length(meth) == 1 && only_method !== missing
+                if only_method === nothing
+                    only_method = meth[1].method
+                elseif only_method !== meth[1].method
+                    only_method = missing
+                end
+            else
+                only_method = missing
+            end
         end
-        for match in meth
-            j += 1
-            result = results[j]
-            any_fully_covered |= match.fully_covers
-            if isa(result, ConcreteResult)
-                case = concrete_result_item(result, state)
-                push!(cases, InliningCase(result.mi.specTypes, case))
-            elseif isa(result, ConstPropResult)
-                handled_all_cases &= handle_const_prop_result!(result, argtypes, flag, state, cases, true)
+        local split_fully_covered::Bool = false
+        for (j, match) in enumerate(meth)
+            all_result_count += 1
+            result = getresult(info, all_result_count)
+            joint_effects = merge_effects(joint_effects, info_effects(result, match, state))
+            split_fully_covered |= match.fully_covers
+            if !validate_sparams(match.sparams)
+                if !match.fully_covers
+                    handled_all_cases = false
+                    continue
+                end
+                if revisit_idx === nothing
+                    revisit_idx = (i, j, all_result_count)
+                else
+                    handled_all_cases = false
+                    revisit_idx = nothing
+                end
             else
-                @assert result === nothing
-                handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true)
+                handled_all_cases &= handle_any_const_result!(cases,
+                    result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=false)
             end
         end
-    end
-
-    if !handled_all_cases
+        fully_covered &= split_fully_covered
+    end
+
+    (handled_all_cases && fully_covered) || (joint_effects = Effects(joint_effects; nothrow=false))
+
+    if handled_all_cases && revisit_idx !== nothing
+        # we handled everything except one match with unmatched sparams,
+        # so try to handle it by bypassing validate_sparams
+        (i, j, k) = revisit_idx
+        match = getsplit(info, i)[j]
+        result = getresult(info, k)
+        handled_all_cases &= handle_any_const_result!(cases,
+            result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=true)
+    elseif length(cases) == 0 && only_method isa Method
+        # if the signature is fully covered and there is only one applicable method,
+        # we can try to inline it even in the presence of unmatched sparams
+        # -- But don't try it if we already tried to handle the match in the revisit_idx
+        # case, because that'll (necessarily) be the same method.
+        if nsplit(info)::Int > 1
+            atype = argtypes_to_type(argtypes)
+            (metharg, methsp) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), atype, only_method.sig)::SimpleVector
+            match = MethodMatch(metharg, methsp::SimpleVector, only_method, true)
+            result = nothing
+        else
+            @assert length(meth) == 1
+            match = meth[1]
+            result = getresult(info, 1)
+        end
+        handle_any_const_result!(cases,
+            result, match, argtypes, info, flag, state; allow_abstract=true, allow_typevars=true)
+        fully_covered = handled_all_cases = match.fully_covers
+    elseif !handled_all_cases
         # if we've not seen all candidates, union split is valid only for dispatch tuples
         filter!(case::InliningCase->isdispatchtuple(case.sig), cases)
     end
 
-    handle_cases!(ir, idx, stmt, argtypes_to_type(argtypes), cases,
-        handled_all_cases & any_fully_covered, todo, state.params)
+    return cases, (handled_all_cases & fully_covered), joint_effects
 end
 
-function handle_match!(
-    match::MethodMatch, argtypes::Vector{Any}, flag::UInt8, state::InliningState,
-    cases::Vector{InliningCase}, allow_abstract::Bool = false,
-    do_resolve::Bool = true)
+function handle_call!(todo::Vector{Pair{Int,Any}},
+    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(info::CallInfo), flag::UInt32, sig::Signature,
+    state::InliningState)
+    cases = compute_inlining_cases(info, flag, sig, state)
+    cases === nothing && return nothing
+    cases, all_covered, joint_effects = cases
+    handle_cases!(todo, ir, idx, stmt, argtypes_to_type(sig.argtypes), cases,
+        all_covered, joint_effects)
+end
+
+function handle_match!(cases::Vector{InliningCase},
+    match::MethodMatch, argtypes::Vector{Any}, @nospecialize(info::CallInfo), flag::UInt32,
+    state::InliningState;
+    allow_abstract::Bool, allow_typevars::Bool, volatile_inf_result::Union{Nothing,VolatileInferenceResult})
     spec_types = match.spec_types
     allow_abstract || isdispatchtuple(spec_types) || return false
-    # we may see duplicated dispatch signatures here when a signature gets widened
+    # We may see duplicated dispatch signatures here when a signature gets widened
     # during abstract interpretation: for the purpose of inlining, we can just skip
-    # processing this dispatch candidate
-    _any(case->case.sig === spec_types, cases) && return true
-    item = analyze_method!(match, argtypes, flag, state, do_resolve)
+    # processing this dispatch candidate (unless unmatched type parameters are present)
+    !allow_typevars && any(case::InliningCase->case.sig === spec_types, cases) && return true
+    item = analyze_method!(match, argtypes, info, flag, state; allow_typevars, volatile_inf_result)
+    item === nothing && return false
+    push!(cases, InliningCase(spec_types, item))
+    return true
+end
+
+function handle_const_prop_result!(cases::Vector{InliningCase},
+    result::ConstPropResult, @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
+    allow_abstract::Bool, allow_typevars::Bool)
+    mi = result.result.linfo
+    spec_types = mi.specTypes
+    allow_abstract || isdispatchtuple(spec_types) || return false
+    if !validate_sparams(mi.sparam_vals)
+        (allow_typevars && !may_have_fcalls(mi.def::Method)) || return false
+    end
+    item = resolve_todo(mi, result.result, info, flag, state)
     item === nothing && return false
     push!(cases, InliningCase(spec_types, item))
     return true
 end
 
-function handle_const_prop_result!(
-    result::ConstPropResult, argtypes::Vector{Any}, flag::UInt8, state::InliningState,
-    cases::Vector{InliningCase}, allow_abstract::Bool = false)
-    (; mi) = item = InliningTodo(result.result, argtypes)
+function semiconcrete_result_item(result::SemiConcreteResult,
+        @nospecialize(info::CallInfo), flag::UInt32, state::InliningState)
+    mi = result.mi
+    et = InliningEdgeTracker(state)
+
+    if (!OptimizationParams(state.interp).inlining || is_stmt_noinline(flag) ||
+        # For `NativeInterpreter`, `SemiConcreteResult` may be produced for
+        # a `@noinline`-declared method when it's marked as `@constprop :aggressive`.
+        # Suppress the inlining here (unless inlining is requested at the callsite).
+        (is_declared_noinline(mi.def::Method) && !is_stmt_inline(flag)))
+        return compileable_specialization(mi, result.effects, et, info;
+            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
+    end
+    ir = inlining_policy(state.interp, result.ir, info, flag)
+    ir === nothing && return compileable_specialization(mi, result.effects, et, info;
+        compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
+
+    add_inlining_backedge!(et, mi)
+    preserve_local_sources = OptimizationParams(state.interp).preserve_local_sources
+    ir = retrieve_ir_for_inlining(mi, ir, preserve_local_sources)
+    return InliningTodo(mi, ir, result.effects)
+end
+
+function handle_semi_concrete_result!(cases::Vector{InliningCase}, result::SemiConcreteResult,
+        @nospecialize(info::CallInfo), flag::UInt32, state::InliningState;
+        allow_abstract::Bool)
+    mi = result.mi
     spec_types = mi.specTypes
     allow_abstract || isdispatchtuple(spec_types) || return false
     validate_sparams(mi.sparam_vals) || return false
-    state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
+    item = semiconcrete_result_item(result, info, flag, state)
     item === nothing && return false
     push!(cases, InliningCase(spec_types, item))
     return true
 end
 
-function concrete_result_item(result::ConcreteResult, state::InliningState)
-    if !isdefined(result, :result) || !is_inlineable_constant(result.result)
-        case = compileable_specialization(state.et, result.mi, result.effects)
-        @assert case !== nothing "concrete evaluation should never happen for uncompileable callsite"
-        return case
+function handle_concrete_result!(cases::Vector{InliningCase}, result::ConcreteResult, @nospecialize(info::CallInfo), state::InliningState)
+    case = concrete_result_item(result, info, state)
+    case === nothing && return false
+    push!(cases, InliningCase(result.mi.specTypes, case))
+    return true
+end
+
+may_inline_concrete_result(result::ConcreteResult) =
+    isdefined(result, :result) && is_inlineable_constant(result.result)
+
+function concrete_result_item(result::ConcreteResult, @nospecialize(info::CallInfo), state::InliningState;
+    invokesig::Union{Nothing,Vector{Any}}=nothing)
+    if !may_inline_concrete_result(result)
+        et = InliningEdgeTracker(state, invokesig)
+        return compileable_specialization(result.mi, result.effects, et, info;
+            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
     end
     @assert result.effects === EFFECTS_TOTAL
     return ConstantCase(quoted(result.result))
 end
 
-function handle_cases!(ir::IRCode, idx::Int, stmt::Expr, @nospecialize(atype),
-    cases::Vector{InliningCase}, fully_covered::Bool, todo::Vector{Pair{Int, Any}},
-    params::OptimizationParams)
+function handle_cases!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stmt::Expr,
+    @nospecialize(atype), cases::Vector{InliningCase}, fully_covered::Bool,
+    joint_effects::Effects)
     # If we only have one case and that case is fully covered, we may either
     # be able to do the inlining now (for constant cases), or push it directly
     # onto the todo list
     if fully_covered && length(cases) == 1
-        handle_single_case!(ir, idx, stmt, cases[1].item, todo, params)
+        handle_single_case!(todo, ir, idx, stmt, cases[1].item)
     elseif length(cases) > 0
         isa(atype, DataType) || return nothing
         for case in cases
             isa(case.sig, DataType) || return nothing
         end
         push!(todo, idx=>UnionSplit(fully_covered, atype, cases))
+    else
+        add_flag!(ir[SSAValue(idx)], flags_for_effects(joint_effects))
+    end
+    return nothing
+end
+
+function handle_opaque_closure_call!(todo::Vector{Pair{Int,Any}},
+    ir::IRCode, idx::Int, stmt::Expr, info::OpaqueClosureCallInfo,
+    flag::UInt32, sig::Signature, state::InliningState)
+    result = info.result
+    if isa(result, ConstPropResult)
+        mi = result.result.linfo
+        validate_sparams(mi.sparam_vals) || return nothing
+        item = resolve_todo(mi, result.result, info, flag, state)
+    elseif isa(result, ConcreteResult)
+        item = concrete_result_item(result, info, state)
+    elseif isa(result, SemiConcreteResult)
+        item = item = semiconcrete_result_item(result, info, flag, state)
+    else
+        @assert result === nothing || result isa VolatileInferenceResult
+        volatile_inf_result = result
+        item = analyze_method!(info.match, sig.argtypes, info, flag, state; allow_typevars=false, volatile_inf_result)
     end
+    handle_single_case!(todo, ir, idx, stmt, item)
     return nothing
 end
 
-function handle_const_opaque_closure_call!(
-    ir::IRCode, idx::Int, stmt::Expr, result::ConstPropResult, flag::UInt8,
-    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
-    item = InliningTodo(result.result, sig.argtypes)
-    isdispatchtuple(item.mi.specTypes) || return
-    validate_sparams(item.mi.sparam_vals) || return
-    state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
-    handle_single_case!(ir, idx, stmt, item, todo, state.params)
+function handle_modifyfield!_call!(ir::IRCode, idx::Int, stmt::Expr, info::ModifyFieldInfo, state::InliningState)
+    info = info.info
+    info isa MethodResultPure && (info = info.info)
+    info isa ConstCallInfo && (info = info.call)
+    info isa MethodMatchInfo || return nothing
+    length(info.results) == 1 || return nothing
+    match = info.results[1]::MethodMatch
+    match.fully_covers || return nothing
+    case = compileable_specialization(match, Effects(), InliningEdgeTracker(state), info;
+        compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
+    case === nothing && return nothing
+    stmt.head = :invoke_modify
+    pushfirst!(stmt.args, case.invoke)
+    ir[SSAValue(idx)][:stmt] = stmt
+    return nothing
+end
+
+function handle_finalizer_call!(ir::IRCode, idx::Int, stmt::Expr, info::FinalizerInfo,
+                                state::InliningState)
+    # Finalizers don't return values, so if their execution is not observable,
+    # we can just not register them
+    if is_removable_if_unused(info.effects)
+        ir[SSAValue(idx)] = nothing
+        return nothing
+    end
+
+    # Only inline finalizers that are known nothrow and notls.
+    # This avoids having to set up state for finalizer isolation
+    is_finalizer_inlineable(info.effects) || return nothing
+
+    ft = argextype(stmt.args[2], ir)
+    has_free_typevars(ft) && return nothing
+    f = singleton_type(ft)
+    argtypes = Vector{Any}(undef, 2)
+    argtypes[1] = ft
+    argtypes[2] = argextype(stmt.args[3], ir)
+    sig = Signature(f, ft, argtypes)
+
+    cases = compute_inlining_cases(info.info, #=flag=#UInt32(0), sig, state)
+    cases === nothing && return nothing
+    cases, all_covered, _ = cases
+    if all_covered && length(cases) == 1
+        # NOTE we don't append `item1` to `stmt` here so that we don't serialize
+        # `Core.Compiler` data structure into the global cache
+        item1 = cases[1].item
+        if isa(item1, InliningTodo)
+            push!(stmt.args, true)
+            push!(stmt.args, item1.mi)
+        elseif isa(item1, InvokeCase)
+            push!(stmt.args, false)
+            push!(stmt.args, item1.invoke)
+        elseif isa(item1, ConstantCase)
+            push!(stmt.args, nothing)
+            push!(stmt.args, item1.val)
+        end
+    end
+    return nothing
+end
+
+function handle_invoke_expr!(todo::Vector{Pair{Int,Any}}, ir::IRCode,
+    idx::Int, stmt::Expr, @nospecialize(info::CallInfo), flag::UInt32, sig::Signature, state::InliningState)
+    mi = stmt.args[1]::MethodInstance
+    case = resolve_todo(mi, info, flag, state)
+    handle_single_case!(todo, ir, idx, stmt, case, false)
     return nothing
 end
 
 function inline_const_if_inlineable!(inst::Instruction)
     rt = inst[:type]
     if rt isa Const && is_inlineable_constant(rt.val)
-        inst[:inst] = quoted(rt.val)
+        inst[:stmt] = quoted(rt.val)
         return true
     end
-    inst[:flag] |= IR_FLAG_EFFECT_FREE
+    add_flag!(inst, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)
     return false
 end
 
@@ -1398,110 +1691,43 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
     todo = Pair{Int, Any}[]
 
     for idx in 1:length(ir.stmts)
-        simpleres = process_simple!(ir, idx, state, todo)
+        simpleres = process_simple!(todo, ir, idx, state)
         simpleres === nothing && continue
         stmt, sig = simpleres
 
+        flag = ir.stmts[idx][:flag]
         info = ir.stmts[idx][:info]
 
+        # `NativeInterpreter` won't need this, but provide a support for `:invoke` exprs here
+        # for external `AbstractInterpreter`s that may run the inlining pass multiple times
+        if isexpr(stmt, :invoke)
+            handle_invoke_expr!(todo, ir, idx, stmt, info, flag, sig, state)
+            continue
+        end
+
         # Check whether this call was @pure and evaluates to a constant
         if info isa MethodResultPure
             inline_const_if_inlineable!(ir[SSAValue(idx)]) && continue
             info = info.info
         end
-        if info === false
+        if info === NoCallInfo()
             # Inference determined this couldn't be analyzed. Don't question it.
             continue
         end
 
-        flag = ir.stmts[idx][:flag]
-
+        # handle special cased builtins
         if isa(info, OpaqueClosureCallInfo)
-            result = info.result
-            if isa(result, ConstPropResult)
-                handle_const_opaque_closure_call!(
-                    ir, idx, stmt, result, flag,
-                    sig, state, todo)
-            else
-                if isa(result, ConcreteResult)
-                    item = concrete_result_item(result, state)
-                else
-                    item = analyze_method!(info.match, sig.argtypes, flag, state)
-                end
-                handle_single_case!(ir, idx, stmt, item, todo, state.params)
-            end
-            continue
-        end
-
-        # Handle invoke
-        if sig.f === Core.invoke
-            if isa(info, InvokeCallInfo)
-                inline_invoke!(ir, idx, stmt, info, flag, sig, state, todo)
-            end
-            continue
-        end
-
-        # Handle finalizer
-        if sig.f === Core.finalizer
-            if isa(info, FinalizerInfo)
-                # Only inline finalizers that are known nothrow and notls.
-                # This avoids having to set up state for finalizer isolation
-                (is_nothrow(info.effects) && is_notaskstate(info.effects)) || continue
-
-                info = info.info
-                if isa(info, MethodMatchInfo)
-                    infos = MethodMatchInfo[info]
-                elseif isa(info, UnionSplitInfo)
-                    infos = info.matches
-                else
-                    continue
-                end
-
-                ft = argextype(stmt.args[2], ir)
-                has_free_typevars(ft) && return nothing
-                f = singleton_type(ft)
-                argtypes = Vector{Any}(undef, 2)
-                argtypes[1] = ft
-                argtypes[2] = argextype(stmt.args[3], ir)
-                sig = Signature(f, ft, argtypes)
-
-                cases, all_covered = compute_inlining_cases(infos, UInt8(0), sig, state, false)
-                length(cases) == 0 && continue
-                if all_covered && length(cases) == 1
-                    if isa(cases[1], InliningCase)
-                        case1 = cases[1].item
-                        if isa(case1, InliningTodo)
-                            push!(stmt.args, true)
-                            push!(stmt.args, case1.mi)
-                        elseif isa(case1, InvokeCase)
-                            push!(stmt.args, false)
-                            push!(stmt.args, case1.invoke)
-                        end
-                    end
-                end
-                continue
-            end
-        end
-
-        # if inference arrived here with constant-prop'ed result(s),
-        # we can perform a specialized analysis for just this case
-        if isa(info, ConstCallInfo)
-            handle_const_call!(
-                ir, idx, stmt, info, flag,
-                sig, state, todo)
-            continue
-        end
-
-        # Ok, now figure out what method to call
-        if isa(info, MethodMatchInfo)
-            infos = MethodMatchInfo[info]
-        elseif isa(info, UnionSplitInfo)
-            infos = info.matches
+            handle_opaque_closure_call!(todo, ir, idx, stmt, info, flag, sig, state)
+        elseif isa(info, ModifyFieldInfo)
+            handle_modifyfield!_call!(ir, idx, stmt, info, state)
+        elseif isa(info, InvokeCallInfo)
+            handle_invoke_call!(todo, ir, idx, stmt, info, flag, sig, state)
+        elseif isa(info, FinalizerInfo)
+            handle_finalizer_call!(ir, idx, stmt, info, state)
         else
-            continue # isa(info, ReturnTypeCallInfo), etc.
+            # cascade to the generic (and extendable) handler
+            handle_call!(todo, ir, idx, stmt, info, flag, sig, state)
         end
-
-        analyze_single_call!(ir, idx, stmt, infos, flag, sig, state, todo)
     end
 
     return todo
@@ -1509,24 +1735,16 @@ end
 
 function linear_inline_eligible(ir::IRCode)
     length(ir.cfg.blocks) == 1 || return false
-    terminator = ir[SSAValue(last(ir.cfg.blocks[1].stmts))][:inst]
+    terminator = ir[SSAValue(last(ir.cfg.blocks[1].stmts))][:stmt]
     isa(terminator, ReturnNode) || return false
     isdefined(terminator, :val) || return false
     return true
 end
 
-# Check for a number of functions known to be pure
-function ispuretopfunction(@nospecialize(f))
-    return istopfunction(f, :typejoin) ||
-        istopfunction(f, :isbits) ||
-        istopfunction(f, :isbitstype) ||
-        istopfunction(f, :promote_type)
-end
-
 function early_inline_special_case(
     ir::IRCode, stmt::Expr, @nospecialize(type), sig::Signature,
-    params::OptimizationParams)
-    params.inlining || return nothing
+    state::InliningState)
+    OptimizationParams(state.interp).inlining || return nothing
     (; f, ft, argtypes) = sig
 
     if isa(type, Const) # || isconstType(type)
@@ -1536,10 +1754,10 @@ function early_inline_special_case(
             if is_pure_intrinsic_infer(f) && intrinsic_nothrow(f, argtypes[2:end])
                 return SomeCase(quoted(val))
             end
-        elseif ispuretopfunction(f) || contains_is(_PURE_BUILTINS, f)
+        elseif contains_is(_PURE_BUILTINS, f)
             return SomeCase(quoted(val))
         elseif contains_is(_EFFECT_FREE_BUILTINS, f)
-            if _builtin_nothrow(f, argtypes[2:end], type)
+            if _builtin_nothrow(optimizer_lattice(state.interp), f, argtypes[2:end], type)
                 return SomeCase(quoted(val))
             end
         elseif f === Core.get_binding_type
@@ -1549,6 +1767,29 @@ function early_inline_special_case(
             end
         end
     end
+    if f === compilerbarrier
+        # check if this `compilerbarrier` has already imposed a barrier on abstract interpretation
+        # so that it can be eliminated here
+        length(argtypes) == 3 || return nothing
+        setting = argtypes[2]
+        isa(setting, Const) || return nothing
+        setting = setting.val
+        isa(setting, Symbol) || return nothing
+        # setting === :const || setting === :type barrier const evaluation,
+        # so they can't be eliminated at IPO time
+        setting === :conditional || return nothing
+        # barriered successfully already, eliminate it
+        return SomeCase(stmt.args[3])
+    elseif f === Core.ifelse && length(argtypes) == 4
+        cond = argtypes[2]
+        if isa(cond, Const)
+            if cond.val === true
+                return SomeCase(stmt.args[3])
+            elseif cond.val === false
+                return SomeCase(stmt.args[4])
+            end
+        end
+    end
     return nothing
 end
 
@@ -1557,8 +1798,8 @@ end
 # NOTE we manually inline the method bodies, and so the logic here needs to precisely sync with their definitions
 function late_inline_special_case!(
     ir::IRCode, idx::Int, stmt::Expr, @nospecialize(type), sig::Signature,
-    params::OptimizationParams)
-    params.inlining || return nothing
+    state::InliningState)
+    OptimizationParams(state.interp).inlining || return nothing
     (; f, ft, argtypes) = sig
     if length(argtypes) == 3 && istopfunction(f, :!==)
         # special-case inliner for !== that precedes _methods_by_ftype union splitting
@@ -1567,23 +1808,23 @@ function late_inline_special_case!(
             return SomeCase(quoted(type.val))
         end
         cmp_call = Expr(:call, GlobalRef(Core, :(===)), stmt.args[2], stmt.args[3])
-        cmp_call_ssa = insert_node!(ir, idx, effect_free(NewInstruction(cmp_call, Bool)))
+        cmp_call_ssa = insert_node!(ir, idx, effect_free_and_nothrow(NewInstruction(cmp_call, Bool)))
         not_call = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), cmp_call_ssa)
         return SomeCase(not_call)
     elseif length(argtypes) == 3 && istopfunction(f, :(>:))
         # special-case inliner for issupertype
         # that works, even though inference generally avoids inferring the `>:` Method
-        if isa(type, Const) && _builtin_nothrow(<:, Any[argtypes[3], argtypes[2]], type)
+        if isa(type, Const) && _builtin_nothrow(optimizer_lattice(state.interp), <:, Any[argtypes[3], argtypes[2]], type)
             return SomeCase(quoted(type.val))
         end
         subtype_call = Expr(:call, GlobalRef(Core, :(<:)), stmt.args[3], stmt.args[2])
         return SomeCase(subtype_call)
-    elseif f === TypeVar && 2 <= length(argtypes) <= 4 && (argtypes[2] ⊑ Symbol)
+    elseif f === TypeVar && 2 <= length(argtypes) <= 4 && ⊑(optimizer_lattice(state.interp), argtypes[2], Symbol)
         typevar_call = Expr(:call, GlobalRef(Core, :_typevar), stmt.args[2],
             length(stmt.args) < 4 ? Bottom : stmt.args[3],
             length(stmt.args) == 2 ? Any : stmt.args[end])
         return SomeCase(typevar_call)
-    elseif f === UnionAll && length(argtypes) == 3 && (argtypes[2] ⊑ TypeVar)
+    elseif f === UnionAll && length(argtypes) == 3 && ⊑(optimizer_lattice(state.interp), argtypes[2], TypeVar)
         unionall_call = Expr(:foreigncall, QuoteNode(:jl_type_unionall), Any, svec(Any, Any),
             0, QuoteNode(:ccall), stmt.args[2], stmt.args[3])
         return SomeCase(unionall_call)
@@ -1597,55 +1838,90 @@ function late_inline_special_case!(
     return nothing
 end
 
-function ssa_substitute!(idx::Int, @nospecialize(val), arg_replacements::Vector{Any},
-                         @nospecialize(spsig), spvals::SimpleVector,
-                         linetable_offset::Int32, boundscheck::Symbol, compact::IncrementalCompact)
-    compact.result[idx][:flag] &= ~IR_FLAG_INBOUNDS
-    compact.result[idx][:line] += linetable_offset
-    return ssa_substitute_op!(val, arg_replacements, spsig, spvals, boundscheck)
+struct SSASubstitute
+    mi::MethodInstance
+    arg_replacements::Vector{Any}
+    spvals_ssa::Union{Nothing,SSAValue}
+    linetable_offset::Int32
+end
+
+function ssa_substitute!(insert_node!::Inserter, subst_inst::Instruction, @nospecialize(val),
+                         ssa_substitute::SSASubstitute)
+    subst_inst[:line] += ssa_substitute.linetable_offset
+    return ssa_substitute_op!(insert_node!, subst_inst, val, ssa_substitute)
+end
+
+function insert_spval!(insert_node!::Inserter, spvals_ssa::SSAValue, spidx::Int, do_isdefined::Bool)
+    ret = insert_node!(
+        effect_free_and_nothrow(NewInstruction(Expr(:call, Core._svec_ref, spvals_ssa, spidx), Any)))
+    tcheck_not = nothing
+    if do_isdefined
+        tcheck = insert_node!(
+            effect_free_and_nothrow(NewInstruction(Expr(:call, Core.isa, ret, Core.TypeVar), Bool)))
+        tcheck_not = insert_node!(
+            effect_free_and_nothrow(NewInstruction(Expr(:call, not_int, tcheck), Bool)))
+    end
+    return (ret, tcheck_not)
 end
 
-function ssa_substitute_op!(@nospecialize(val), arg_replacements::Vector{Any},
-                            @nospecialize(spsig), spvals::SimpleVector, boundscheck::Symbol)
+function ssa_substitute_op!(insert_node!::Inserter, subst_inst::Instruction, @nospecialize(val),
+                            ssa_substitute::SSASubstitute)
     if isa(val, Argument)
-        return arg_replacements[val.n]
+        return ssa_substitute.arg_replacements[val.n]
     end
     if isa(val, Expr)
         e = val::Expr
         head = e.head
+        sparam_vals = ssa_substitute.mi.sparam_vals
         if head === :static_parameter
-            return quoted(spvals[e.args[1]::Int])
-        elseif head === :cfunction
-            @assert !isa(spsig, UnionAll) || !isempty(spvals)
-            e.args[3] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[3], spsig, spvals)
+            spidx = e.args[1]::Int
+            val = sparam_vals[spidx]
+            if !isa(val, TypeVar) && val !== Vararg
+                return quoted(val)
+            else
+                flag = subst_inst[:flag]
+                maybe_undef = !has_flag(flag, IR_FLAG_NOTHROW) && isa(val, TypeVar)
+                (ret, tcheck_not) = insert_spval!(insert_node!, ssa_substitute.spvals_ssa::SSAValue, spidx, maybe_undef)
+                if maybe_undef
+                    insert_node!(
+                        NewInstruction(Expr(:throw_undef_if_not, val.name, tcheck_not), Nothing))
+                end
+                return ret
+            end
+        elseif head === :isdefined && isa(e.args[1], Expr) && e.args[1].head === :static_parameter
+            spidx = (e.args[1]::Expr).args[1]::Int
+            val = sparam_vals[spidx]
+            if !isa(val, TypeVar)
+                return true
+            else
+                (_, tcheck_not) = insert_spval!(insert_node!, ssa_substitute.spvals_ssa::SSAValue, spidx, true)
+                return tcheck_not
+            end
+        elseif head === :cfunction && ssa_substitute.spvals_ssa === nothing
+            msig = (ssa_substitute.mi.def::Method).sig
+            @assert !isa(msig, UnionAll) || !isempty(sparam_vals)
+            e.args[3] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[3], msig, sparam_vals)
             e.args[4] = svec(Any[
-                ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, spsig, spvals)
+                ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, msig, sparam_vals)
                 for argt in e.args[4]::SimpleVector ]...)
-        elseif head === :foreigncall
-            @assert !isa(spsig, UnionAll) || !isempty(spvals)
+        elseif head === :foreigncall && ssa_substitute.spvals_ssa === nothing
+            msig = (ssa_substitute.mi.def::Method).sig
+            @assert !isa(msig, UnionAll) || !isempty(sparam_vals)
             for i = 1:length(e.args)
                 if i == 2
-                    e.args[2] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[2], spsig, spvals)
+                    e.args[2] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[2], msig, sparam_vals)
                 elseif i == 3
                     e.args[3] = svec(Any[
-                        ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, spsig, spvals)
+                        ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, msig, sparam_vals)
                         for argt in e.args[3]::SimpleVector ]...)
                 end
             end
-        elseif head === :boundscheck
-            if boundscheck === :off # inbounds == true
-                return false
-            elseif boundscheck === :propagate
-                return e
-            else # on or default
-                return true
-            end
         end
     end
-    isa(val, Union{SSAValue, NewSSAValue}) && return val # avoid infinite loop
+    isa(val, AnySSAValue) && return val # avoid infinite loop
     urs = userefs(val)
     for op in urs
-        op[] = ssa_substitute_op!(op[], arg_replacements, spsig, spvals, boundscheck)
+        op[] = ssa_substitute_op!(insert_node!, subst_inst, op[], ssa_substitute)
     end
     return urs[]
 end
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 93ca66cb7c931..289f643a84f3a 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -2,7 +2,7 @@
 
 Core.PhiNode() = Core.PhiNode(Int32[], Any[])
 
-isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode)
+isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode) || isa(stmt, EnterNode)
 
 struct CFG
     blocks::Vector{BasicBlock}
@@ -23,17 +23,23 @@ function cfg_delete_edge!(cfg::CFG, from::Int, to::Int)
     preds = cfg.blocks[to].preds
     succs = cfg.blocks[from].succs
     # Assumes that blocks appear at most once in preds and succs
-    deleteat!(preds, findfirst(x->x === from, preds)::Int)
-    deleteat!(succs, findfirst(x->x === to, succs)::Int)
+    deleteat!(preds, findfirst(x::Int->x==from, preds)::Int)
+    deleteat!(succs, findfirst(x::Int->x==to, succs)::Int)
     nothing
 end
 
+function bb_ordering()
+    lt = (<=)
+    by = x::BasicBlock -> first(x.stmts)
+    ord(lt, by, nothing, Forward)
+end
+
 function block_for_inst(index::Vector{Int}, inst::Int)
     return searchsortedfirst(index, inst, lt=(<=))
 end
 
 function block_for_inst(index::Vector{BasicBlock}, inst::Int)
-    return searchsortedfirst(index, BasicBlock(StmtRange(inst, inst)), by=x->first(x.stmts), lt=(<=))-1
+    return searchsortedfirst(index, BasicBlock(StmtRange(inst, inst)), bb_ordering())-1
 end
 
 block_for_inst(cfg::CFG, inst::Int) = block_for_inst(cfg.index, inst)
@@ -54,16 +60,18 @@ block_for_inst(cfg::CFG, inst::Int) = block_for_inst(cfg.index, inst)
             # This is a fake dest to force the next stmt to start a bb
             idx < length(stmts) && push!(jump_dests, idx+1)
             push!(jump_dests, stmt.label)
+        elseif isa(stmt, EnterNode)
+            # :enter starts/ends a BB
+            push!(jump_dests, idx)
+            push!(jump_dests, idx+1)
+            # The catch block is a jump dest
+            if stmt.catch_dest != 0
+                push!(jump_dests, stmt.catch_dest)
+            end
         elseif isa(stmt, Expr)
             if stmt.head === :leave
                 # :leave terminates a BB
                 push!(jump_dests, idx+1)
-            elseif stmt.head === :enter
-                # :enter starts/ends a BB
-                push!(jump_dests, idx)
-                push!(jump_dests, idx+1)
-                # The catch block is a jump dest
-                push!(jump_dests, stmt.args[1]::Int)
             end
         end
         if isa(stmt, PhiNode)
@@ -119,14 +127,16 @@ function compute_basic_blocks(stmts::Vector{Any})
                 push!(blocks[block′].preds, num)
                 push!(b.succs, block′)
             end
-        elseif isexpr(terminator, :enter)
+        elseif isa(terminator, EnterNode)
             # :enter gets a virtual edge to the exception handler and
             # the exception handler gets a virtual edge from outside
             # the function.
-            block′ = block_for_inst(basic_block_index, terminator.args[1]::Int)
-            push!(blocks[block′].preds, num)
-            push!(blocks[block′].preds, 0)
-            push!(b.succs, block′)
+            if terminator.catch_dest != 0
+                block′ = block_for_inst(basic_block_index, terminator.catch_dest)
+                push!(blocks[block′].preds, num)
+                push!(blocks[block′].preds, 0)
+                push!(b.succs, block′)
+            end
         end
         # statement fall-through
         if num + 1 <= length(blocks)
@@ -138,14 +148,30 @@ function compute_basic_blocks(stmts::Vector{Any})
 end
 
 # this function assumes insert position exists
-function first_insert_for_bb(code, cfg::CFG, block::Int)
-    for idx in cfg.blocks[block].stmts
+function is_valid_phiblock_stmt(@nospecialize(stmt))
+    isa(stmt, PhiNode) && return true
+    isa(stmt, Union{UpsilonNode, PhiCNode, SSAValue}) && return false
+    isa(stmt, Expr) && return is_value_pos_expr_head(stmt.head)
+    return true
+end
+
+function first_insert_for_bb(code::Vector{Any}, cfg::CFG, block::Int)
+    stmts = cfg.blocks[block].stmts
+    lastnonphiidx = first(stmts)
+    for idx in stmts
         stmt = code[idx]
         if !isa(stmt, PhiNode)
-            return idx
+            if !is_valid_phiblock_stmt(stmt)
+                return lastnonphiidx
+            end
+        else
+            lastnonphiidx = idx + 1
         end
     end
-    error("any insert position isn't found")
+    if lastnonphiidx > last(stmts)
+        error("any insert position isn't found")
+    end
+    return lastnonphiidx
 end
 
 # SSA values that need renaming
@@ -180,32 +206,32 @@ const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue}
 
 # SSA-indexed nodes
 struct InstructionStream
-    inst::Vector{Any}
+    stmt::Vector{Any}
     type::Vector{Any}
-    info::Vector{Any}
+    info::Vector{CallInfo}
     line::Vector{Int32}
-    flag::Vector{UInt8}
+    flag::Vector{UInt32}
 end
 function InstructionStream(len::Int)
-    insts = Array{Any}(undef, len)
-    types = Array{Any}(undef, len)
-    info = Array{Any}(undef, len)
-    fill!(info, nothing)
+    stmts = Vector{Any}(undef, len)
+    types = Vector{Any}(undef, len)
+    info = Vector{CallInfo}(undef, len)
+    fill!(info, NoCallInfo())
     lines = fill(Int32(0), len)
     flags = fill(IR_FLAG_NULL, len)
-    return InstructionStream(insts, types, info, lines, flags)
+    return InstructionStream(stmts, types, info, lines, flags)
 end
 InstructionStream() = InstructionStream(0)
-length(is::InstructionStream) = length(is.inst)
-isempty(is::InstructionStream) = isempty(is.inst)
-function add!(is::InstructionStream)
+length(is::InstructionStream) = length(is.stmt)
+isempty(is::InstructionStream) = isempty(is.stmt)
+function add_new_idx!(is::InstructionStream)
     ninst = length(is) + 1
     resize!(is, ninst)
     return ninst
 end
 function copy(is::InstructionStream)
     return InstructionStream(
-        copy_exprargs(is.inst),
+        copy_exprargs(is.stmt),
         copy(is.type),
         copy(is.info),
         copy(is.line),
@@ -213,7 +239,7 @@ function copy(is::InstructionStream)
 end
 function resize!(stmts::InstructionStream, len)
     old_length = length(stmts)
-    resize!(stmts.inst, len)
+    resize!(stmts.stmt, len)
     resize!(stmts.type, len)
     resize!(stmts.info, len)
     resize!(stmts.line, len)
@@ -221,7 +247,7 @@ function resize!(stmts::InstructionStream, len)
     for i in (old_length + 1):len
         stmts.line[i] = 0
         stmts.flag[i] = IR_FLAG_NULL
-        stmts.info[i] = nothing
+        stmts.info[i] = NoCallInfo()
     end
     return stmts
 end
@@ -230,20 +256,22 @@ struct Instruction
     data::InstructionStream
     idx::Int
 end
-Instruction(is::InstructionStream) = Instruction(is, add!(is))
+Instruction(is::InstructionStream) = Instruction(is, add_new_idx!(is))
 
 @inline function getindex(node::Instruction, fld::Symbol)
+    (fld === :inst) && (fld = :stmt) # deprecated
     isdefined(node, fld) && return getfield(node, fld)
     return getfield(getfield(node, :data), fld)[getfield(node, :idx)]
 end
 @inline function setindex!(node::Instruction, @nospecialize(val), fld::Symbol)
+    (fld === :inst) && (fld = :stmt) # deprecated
     getfield(getfield(node, :data), fld)[getfield(node, :idx)] = val
     return node
 end
 
 @inline getindex(is::InstructionStream, idx::Int) = Instruction(is, idx)
 function setindex!(is::InstructionStream, newval::Instruction, idx::Int)
-    is.inst[idx] = newval[:inst]
+    is.stmt[idx] = newval[:stmt]
     is.type[idx] = newval[:type]
     is.info[idx] = newval[:info]
     is.line[idx] = newval[:line]
@@ -251,7 +279,7 @@ function setindex!(is::InstructionStream, newval::Instruction, idx::Int)
     return is
 end
 function setindex!(is::InstructionStream, newval::Union{AnySSAValue, Nothing}, idx::Int)
-    is.inst[idx] = newval
+    is.stmt[idx] = newval
     return is
 end
 function setindex!(node::Instruction, newval::Instruction)
@@ -259,6 +287,10 @@ function setindex!(node::Instruction, newval::Instruction)
     return node
 end
 
+has_flag(inst::Instruction, flag::UInt32) = has_flag(inst[:flag], flag)
+add_flag!(inst::Instruction, flag::UInt32) = inst[:flag] |= flag
+sub_flag!(inst::Instruction, flag::UInt32) = inst[:flag] &= ~flag
+
 struct NewNodeInfo
     # Insertion position (interpretation depends on which array this is in)
     pos::Int
@@ -272,7 +304,7 @@ end
 NewNodeStream(len::Int=0) = NewNodeStream(InstructionStream(len), fill(NewNodeInfo(0, false), len))
 length(new::NewNodeStream) = length(new.stmts)
 isempty(new::NewNodeStream) = isempty(new.stmts)
-function add!(new::NewNodeStream, pos::Int, attach_after::Bool)
+function add_inst!(new::NewNodeStream, pos::Int, attach_after::Bool)
     push!(new.info, NewNodeInfo(pos, attach_after))
     return Instruction(new.stmts)
 end
@@ -281,53 +313,90 @@ copy(nns::NewNodeStream) = NewNodeStream(copy(nns.stmts), copy(nns.info))
 struct NewInstruction
     stmt::Any
     type::Any
-    info::Any
-    # If nothing, copy the line from previous statement
-    # in the insertion location
-    line::Union{Int32, Nothing}
-    flag::UInt8
-
-    ## Insertion options
-
-    # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced).
-    # Don't bother redoing so on insertion.
-    effect_free_computed::Bool
-    NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info),
-            line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) =
-        new(stmt, type, info, line, flag, effect_free_computed)
-end
-NewInstruction(@nospecialize(stmt), @nospecialize(type)) =
-    NewInstruction(stmt, type, nothing)
-NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) =
-    NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false)
-NewInstruction(@nospecialize(stmt), meta::Instruction; line::Union{Int32, Nothing}=nothing) =
-    NewInstruction(stmt, meta[:type], meta[:info], line === nothing ? meta[:line] : line, meta[:flag], true)
-
-effect_free(inst::NewInstruction) =
-    NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true)
-non_effect_free(inst::NewInstruction) =
-    NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true)
+    info::CallInfo
+    line::Union{Int32,Nothing} # if nothing, copy the line from previous statement in the insertion location
+    flag::Union{UInt32,Nothing} # if nothing, IR flags will be recomputed on insertion
+    function NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::CallInfo),
+                            line::Union{Int32,Nothing}, flag::Union{UInt32,Nothing})
+        return new(stmt, type, info, line, flag)
+    end
+end
+function NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Int32,Nothing}=nothing)
+    return NewInstruction(stmt, type, NoCallInfo(), line, nothing)
+end
+@nospecialize
+function NewInstruction(newinst::NewInstruction;
+    stmt::Any=newinst.stmt,
+    type::Any=newinst.type,
+    info::CallInfo=newinst.info,
+    line::Union{Int32,Nothing}=newinst.line,
+    flag::Union{UInt32,Nothing}=newinst.flag)
+    return NewInstruction(stmt, type, info, line, flag)
+end
+function NewInstruction(inst::Instruction;
+    stmt::Any=inst[:stmt],
+    type::Any=inst[:type],
+    info::CallInfo=inst[:info],
+    line::Union{Int32,Nothing}=inst[:line],
+    flag::Union{UInt32,Nothing}=inst[:flag])
+    return NewInstruction(stmt, type, info, line, flag)
+end
+@specialize
+effect_free_and_nothrow(newinst::NewInstruction) = add_flag(newinst, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)
+function add_flag(newinst::NewInstruction, newflag::UInt32)
+    flag = newinst.flag
+    if flag === nothing
+        flag = newflag
+    else
+        flag |= newflag
+    end
+    return NewInstruction(newinst; flag)
+end
+function sub_flag(newinst::NewInstruction, newflag::UInt32)
+    flag = newinst.flag
+    if flag === nothing
+        flag = IR_FLAG_NULL
+    else
+        flag &= ~newflag
+    end
+    return NewInstruction(newinst; flag)
+end
 
 struct IRCode
     stmts::InstructionStream
     argtypes::Vector{Any}
-    sptypes::Vector{Any}
+    sptypes::Vector{VarState}
     linetable::Vector{LineInfoNode}
     cfg::CFG
     new_nodes::NewNodeStream
     meta::Vector{Expr}
 
-    function IRCode(stmts::InstructionStream, cfg::CFG, linetable::Vector{LineInfoNode}, argtypes::Vector{Any}, meta::Vector{Expr}, sptypes::Vector{Any})
+    function IRCode(stmts::InstructionStream, cfg::CFG, linetable::Vector{LineInfoNode}, argtypes::Vector{Any}, meta::Vector{Expr}, sptypes::Vector{VarState})
         return new(stmts, argtypes, sptypes, linetable, cfg, NewNodeStream(), meta)
     end
     function IRCode(ir::IRCode, stmts::InstructionStream, cfg::CFG, new_nodes::NewNodeStream)
         return new(stmts, ir.argtypes, ir.sptypes, ir.linetable, cfg, new_nodes, ir.meta)
     end
-    global copy
-    copy(ir::IRCode) = new(copy(ir.stmts), copy(ir.argtypes), copy(ir.sptypes),
+    global copy(ir::IRCode) = new(copy(ir.stmts), copy(ir.argtypes), copy(ir.sptypes),
         copy(ir.linetable), copy(ir.cfg), copy(ir.new_nodes), copy(ir.meta))
 end
 
+"""
+    IRCode()
+
+Create an empty IRCode object with a single `return nothing` statement. This method is mostly intended
+for debugging and unit testing of IRCode APIs. The compiler itself should generally obtain an IRCode
+from the frontend or one of the caches.
+"""
+function IRCode()
+    ir = IRCode(InstructionStream(1), CFG([BasicBlock(1:1, Int[], Int[])], Int[1]), LineInfoNode[], Any[], Expr[], VarState[])
+    ir[SSAValue(1)][:stmt] = ReturnNode(nothing)
+    ir[SSAValue(1)][:type] = Nothing
+    ir[SSAValue(1)][:flag] = 0x00
+    ir[SSAValue(1)][:line] = Int32(0)
+    return ir
+end
+
 function block_for_inst(ir::IRCode, inst::Int)
     if inst > length(ir.stmts)
         inst = ir.new_nodes.info[inst - length(ir.stmts)].pos
@@ -335,11 +404,12 @@ function block_for_inst(ir::IRCode, inst::Int)
     block_for_inst(ir.cfg, inst)
 end
 
-function getindex(x::IRCode, s::SSAValue)
-    if s.id <= length(x.stmts)
-        return x.stmts[s.id]
+function getindex(ir::IRCode, s::SSAValue)
+    nstmts = length(ir.stmts)
+    if s.id <= nstmts
+        return ir.stmts[s.id]
     else
-        return x.new_nodes.stmts[s.id - length(x.stmts)]
+        return ir.new_nodes.stmts[s.id - nstmts]
     end
 end
 
@@ -390,11 +460,15 @@ struct UndefToken end; const UNDEF_TOKEN = UndefToken()
         isdefined(stmt, :val) || return OOB_TOKEN
         op == 1 || return OOB_TOKEN
         return stmt.val
+    elseif isa(stmt, EnterNode)
+        isdefined(stmt, :scope) || return OOB_TOKEN
+        op == 1 || return OOB_TOKEN
+        return stmt.scope
     elseif isa(stmt, PiNode)
         isdefined(stmt, :val) || return OOB_TOKEN
         op == 1 || return OOB_TOKEN
         return stmt.val
-    elseif isa(stmt, Union{SSAValue, NewSSAValue})
+    elseif isa(stmt, Union{AnySSAValue, GlobalRef})
         op == 1 || return OOB_TOKEN
         return stmt
     elseif isa(stmt, UpsilonNode)
@@ -420,8 +494,9 @@ function is_relevant_expr(e::Expr)
                       :new, :splatnew, :(=), :(&),
                       :gc_preserve_begin, :gc_preserve_end,
                       :foreigncall, :isdefined, :copyast,
-                      :undefcheck, :throw_undef_if_not,
+                      :throw_undef_if_not,
                       :cfunction, :method, :pop_exception,
+                      :leave,
                       :new_opaque_closure)
 end
 
@@ -445,16 +520,19 @@ end
         stmt = GotoIfNot(v, stmt.dest)
     elseif isa(stmt, ReturnNode)
         op == 1 || throw(BoundsError())
-        stmt = typeof(stmt)(v)
-    elseif isa(stmt, Union{SSAValue, NewSSAValue})
+        stmt = ReturnNode(v)
+    elseif isa(stmt, EnterNode)
+        op == 1 || throw(BoundsError())
+        stmt = EnterNode(stmt.catch_dest, v)
+    elseif isa(stmt, Union{AnySSAValue, GlobalRef})
         op == 1 || throw(BoundsError())
         stmt = v
     elseif isa(stmt, UpsilonNode)
         op == 1 || throw(BoundsError())
-        stmt = typeof(stmt)(v)
+        stmt = UpsilonNode(v)
     elseif isa(stmt, PiNode)
         op == 1 || throw(BoundsError())
-        stmt = typeof(stmt)(v, stmt.typ)
+        stmt = PiNode(v, stmt.typ)
     elseif isa(stmt, PhiNode)
         op > length(stmt.values) && throw(BoundsError())
         isassigned(stmt.values, op) || throw(BoundsError())
@@ -476,8 +554,8 @@ end
 
 function userefs(@nospecialize(x))
     relevant = (isa(x, Expr) && is_relevant_expr(x)) ||
-        isa(x, GotoIfNot) || isa(x, ReturnNode) || isa(x, SSAValue) || isa(x, NewSSAValue) ||
-        isa(x, PiNode) || isa(x, PhiNode) || isa(x, PhiCNode) || isa(x, UpsilonNode)
+        isa(x, GotoIfNot) || isa(x, ReturnNode) || isa(x, SSAValue) || isa(x, OldSSAValue) || isa(x, NewSSAValue) ||
+        isa(x, PiNode) || isa(x, PhiNode) || isa(x, PhiCNode) || isa(x, UpsilonNode) || isa(x, EnterNode)
     return UseRefIterator(x, relevant)
 end
 
@@ -499,45 +577,89 @@ end
 
 # This function is used from the show code, which may have a different
 # `push!`/`used` type since it's in Base.
-scan_ssa_use!(push!, used, @nospecialize(stmt)) = foreachssa(ssa -> push!(used, ssa.id), stmt)
+scan_ssa_use!(@specialize(push!), used, @nospecialize(stmt)) = foreachssa(ssa::SSAValue -> push!(used, ssa.id), stmt)
 
 # Manually specialized copy of the above with push! === Compiler.push!
-scan_ssa_use!(used::IdSet, @nospecialize(stmt)) = foreachssa(ssa -> push!(used, ssa.id), stmt)
-
-function insert_node!(ir::IRCode, pos::Int, inst::NewInstruction, attach_after::Bool=false)
-    node = add!(ir.new_nodes, pos, attach_after)
-    node[:line] = something(inst.line, ir.stmts[pos][:line])
-    flag = inst.flag
-    if !inst.effect_free_computed
-        (effect_free_and_nothrow, nothrow) = stmt_effect_flags(inst.stmt, inst.type, ir)
-        if effect_free_and_nothrow
-            flag |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-        elseif nothrow
-            flag |= IR_FLAG_NOTHROW
+scan_ssa_use!(used::IdSet, @nospecialize(stmt)) = foreachssa(ssa::SSAValue -> push!(used, ssa.id), stmt)
+
+function insert_node!(ir::IRCode, pos::SSAValue, newinst::NewInstruction, attach_after::Bool=false)
+    posid = pos.id
+    if pos.id > length(ir.stmts)
+        if attach_after
+            info = ir.new_nodes.info[pos.id-length(ir.stmts)];
+            posid = info.pos
+            attach_after = info.attach_after
+        else
+            error("Cannot attach before a pending node.")
         end
     end
-    node[:inst], node[:type], node[:flag] = inst.stmt, inst.type, flag
+    node = add_inst!(ir.new_nodes, posid, attach_after)
+    newline = something(newinst.line, ir[pos][:line])
+    newflag = recompute_inst_flag(newinst, ir)
+    node = inst_from_newinst!(node, newinst, newline, newflag)
     return SSAValue(length(ir.stmts) + node.idx)
 end
+insert_node!(ir::IRCode, pos::Int, newinst::NewInstruction, attach_after::Bool=false) =
+    insert_node!(ir, SSAValue(pos), newinst, attach_after)
 
-# For bootstrapping
-function my_sortperm(v)
-    p = Vector{Int}(undef, length(v))
-    for i = 1:length(v)
-        p[i] = i
+struct CFGTransformState
+    cfg_transforms_enabled::Bool
+    fold_constant_branches::Bool
+    result_bbs::Vector{BasicBlock}
+    bb_rename_pred::Vector{Int}
+    bb_rename_succ::Vector{Int}
+    domtree::Union{Nothing, DomTree}
+end
+
+# N.B.: Takes ownership of the CFG array
+function CFGTransformState!(blocks::Vector{BasicBlock}, allow_cfg_transforms::Bool=false)
+    if allow_cfg_transforms
+        bb_rename = Vector{Int}(undef, length(blocks))
+        cur_bb = 1
+        domtree = construct_domtree(blocks)
+        for i = 1:length(bb_rename)
+            if bb_unreachable(domtree, i)
+                bb_rename[i] = -1
+            else
+                bb_rename[i] = cur_bb
+                cur_bb += 1
+            end
+        end
+        for i = 1:length(bb_rename)
+            bb_rename[i] == -1 && continue
+            preds, succs = blocks[i].preds, blocks[i].succs
+            # Rename preds
+            for j = 1:length(preds)
+                if preds[j] != 0
+                    preds[j] = bb_rename[preds[j]]
+                end
+            end
+            # Dead blocks get removed from the predecessor list
+            filter!(x::Int->x≠-1, preds)
+            # Rename succs
+            for j = 1:length(succs)
+                succs[j] = bb_rename[succs[j]]
+            end
+        end
+        let blocks = blocks, bb_rename = bb_rename
+            result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1]
+        end
+        # TODO: This could be done by just renaming the domtree
+        domtree = construct_domtree(result_bbs)
+    else
+        bb_rename = Vector{Int}()
+        result_bbs = blocks
+        domtree = nothing
     end
-    sort!(p, Sort.DEFAULT_UNSTABLE, Order.Perm(Sort.Forward,v))
-    p
+    return CFGTransformState(allow_cfg_transforms, allow_cfg_transforms, result_bbs, bb_rename, bb_rename, domtree)
 end
 
 mutable struct IncrementalCompact
     ir::IRCode
     result::InstructionStream
-    result_bbs::Vector{BasicBlock}
 
+    cfg_transform::CFGTransformState
     ssa_rename::Vector{Any}
-    bb_rename_pred::Vector{Int}
-    bb_rename_succ::Vector{Int}
 
     used_ssas::Vector{Int}
     late_fixup::Vector{Int}
@@ -546,92 +668,56 @@ mutable struct IncrementalCompact
     # This supports insertion while compacting
     new_new_nodes::NewNodeStream  # New nodes that were before the compaction point at insertion time
     new_new_used_ssas::Vector{Int}
-    # TODO: Switch these two to a min-heap of some sort
     pending_nodes::NewNodeStream  # New nodes that were after the compaction point at insertion time
-    pending_perm::Vector{Int}
+    pending_perm::Vector{Int} # pending_nodes.info[pending_perm] is in min-heap order by pos
 
     # State
     idx::Int
     result_idx::Int
+    active_bb::Int
     active_result_bb::Int
     renamed_new_nodes::Bool
-    cfg_transforms_enabled::Bool
-    fold_constant_branches::Bool
 
-    function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false)
+    function IncrementalCompact(code::IRCode, cfg_transform::CFGTransformState)
         # Sort by position with attach after nodes after regular ones
-        perm = my_sortperm(Int[let new_node = code.new_nodes.info[i]
-            (new_node.pos * 2 + Int(new_node.attach_after))
-            end for i in 1:length(code.new_nodes)])
-        new_len = length(code.stmts) + length(code.new_nodes)
+        info = code.new_nodes.info
+        perm = sort!(collect(eachindex(info)); by=i::Int->(2info[i].pos+info[i].attach_after, i))
+        new_len = length(code.stmts) + length(info)
         result = InstructionStream(new_len)
         used_ssas = fill(0, new_len)
         new_new_used_ssas = Vector{Int}()
         blocks = code.cfg.blocks
-        if allow_cfg_transforms
-            bb_rename = Vector{Int}(undef, length(blocks))
-            cur_bb = 1
-            domtree = construct_domtree(blocks)
-            for i = 1:length(bb_rename)
-                if bb_unreachable(domtree, i)
-                    bb_rename[i] = -1
-                else
-                    bb_rename[i] = cur_bb
-                    cur_bb += 1
-                end
-            end
-            for i = 1:length(bb_rename)
-                bb_rename[i] == -1 && continue
-                preds, succs = blocks[i].preds, blocks[i].succs
-                # Rename preds
-                for j = 1:length(preds)
-                    if preds[j] != 0
-                        preds[j] = bb_rename[preds[j]]
-                    end
-                end
-                # Dead blocks get removed from the predecessor list
-                filter!(x->x !== -1, preds)
-                # Rename succs
-                for j = 1:length(succs)
-                    succs[j] = bb_rename[succs[j]]
-                end
-            end
-            let blocks = blocks, bb_rename = bb_rename
-                result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1]
-            end
-        else
-            bb_rename = Vector{Int}()
-            result_bbs = code.cfg.blocks
-        end
         ssa_rename = Any[SSAValue(i) for i = 1:new_len]
         late_fixup = Vector{Int}()
         new_new_nodes = NewNodeStream()
         pending_nodes = NewNodeStream()
         pending_perm = Int[]
-        return new(code, result, result_bbs, ssa_rename, bb_rename, bb_rename, used_ssas, late_fixup, perm, 1,
+        return new(code, result, cfg_transform, ssa_rename, used_ssas, late_fixup, perm, 1,
             new_new_nodes, new_new_used_ssas, pending_nodes, pending_perm,
-            1, 1, 1, false, allow_cfg_transforms, allow_cfg_transforms)
+            1, 1, 1, 1, false)
     end
 
     # For inlining
     function IncrementalCompact(parent::IncrementalCompact, code::IRCode, result_offset)
-        perm = my_sortperm(Int[code.new_nodes.info[i].pos for i in 1:length(code.new_nodes)])
-        new_len = length(code.stmts) + length(code.new_nodes)
+        info = code.new_nodes.info
+        perm = sort!(collect(eachindex(info)); by=i::Int->(info[i].pos, i))
+        new_len = length(code.stmts) + length(info)
         ssa_rename = Any[SSAValue(i) for i = 1:new_len]
-        new_new_used_ssas = Vector{Int}()
-        late_fixup = Vector{Int}()
         bb_rename = Vector{Int}()
-        new_new_nodes = NewNodeStream()
         pending_nodes = NewNodeStream()
         pending_perm = Int[]
-        return new(code, parent.result,
-            parent.result_bbs, ssa_rename, bb_rename, bb_rename, parent.used_ssas,
-            late_fixup, perm, 1,
-            new_new_nodes, new_new_used_ssas, pending_nodes, pending_perm,
-            1, result_offset, parent.active_result_bb, false, false, false)
+        return new(code, parent.result, CFGTransformState(false, false, parent.cfg_transform.result_bbs, bb_rename, bb_rename, nothing),
+            ssa_rename, parent.used_ssas,
+            parent.late_fixup, perm, 1,
+            parent.new_new_nodes, parent.new_new_used_ssas, pending_nodes, pending_perm,
+            1, result_offset, 1, parent.active_result_bb, false)
     end
 end
 
+function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false)
+    return IncrementalCompact(code, CFGTransformState!(code.cfg.blocks, allow_cfg_transforms))
+end
+
 struct TypesView{T}
     ir::T # ::Union{IRCode, IncrementalCompact}
 end
@@ -645,7 +731,7 @@ end
 function getindex(compact::IncrementalCompact, ssa::OldSSAValue)
     id = ssa.id
     if id < compact.idx
-        new_idx = compact.ssa_rename[id]
+        new_idx = compact.ssa_rename[id]::Int
         return compact.result[new_idx]
     elseif id <= length(compact.ir.stmts)
         return compact.ir.stmts[id]
@@ -669,7 +755,8 @@ end
 function block_for_inst(compact::IncrementalCompact, idx::SSAValue)
     id = idx.id
     if id < compact.result_idx # if ssa within result
-        return block_for_inst(compact.result_bbs, id)
+        return searchsortedfirst(compact.cfg_transform.result_bbs, BasicBlock(StmtRange(id, id)),
+            1, compact.active_result_bb, bb_ordering())-1
     else
         return block_for_inst(compact.ir.cfg, id)
     end
@@ -678,7 +765,8 @@ end
 function block_for_inst(compact::IncrementalCompact, idx::OldSSAValue)
     id = idx.id
     if id < compact.idx # if ssa within result
-        return block_for_inst(compact.result_bbs, compact.ssa_rename[id])
+        id = compact.ssa_rename[id]::Int
+        return block_for_inst(compact, SSAValue(id))
     else
         return block_for_inst(compact.ir.cfg, id)
     end
@@ -697,6 +785,16 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV
     xb = block_for_inst(compact, x)
     yb = block_for_inst(compact, y)
     if xb == yb
+        if isa(compact[x][:stmt], PhiNode)
+            if isa(compact[y][:stmt], PhiNode)
+                # A node dominates another only if it dominates all uses of that note.
+                # Usually that is not a distinction. However, for phi nodes, the use
+                # occurs on the edge to the predecessor block. Thus, by definition, for
+                # any other PhiNode in the same BB there must be (at least) one edge
+                # that this phi node does not dominate.
+                return false
+            end
+        end
         xinfo = yinfo = nothing
         if isa(x, OldSSAValue)
             x′ = compact.ssa_rename[x.id]::SSAValue
@@ -722,7 +820,7 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV
         else
             y′ = y
         end
-        if x′.id == y′.id && (xinfo !== nothing || yinfo !== nothing)
+        if x′.id == y′.id
             if xinfo !== nothing && yinfo !== nothing
                 if xinfo.attach_after == yinfo.attach_after
                     return x.id < y.id
@@ -730,8 +828,8 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV
                 return yinfo.attach_after
             elseif xinfo !== nothing
                 return !xinfo.attach_after
-            else
-                return (yinfo::NewNodeInfo).attach_after
+            elseif yinfo !== nothing
+                return yinfo.attach_after
             end
         end
         return x′.id < y′.id
@@ -739,43 +837,77 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV
     return dominates(domtree, xb, yb)
 end
 
+function _count_added_node!(compact::IncrementalCompact, @nospecialize(val))
+    if isa(val, SSAValue)
+        compact.used_ssas[val.id] += 1
+        return false
+    elseif isa(val, NewSSAValue)
+        @assert val.id < 0 # Newly added nodes should be canonicalized
+        compact.new_new_used_ssas[-val.id] += 1
+        return true
+    end
+    return false
+end
+
 function count_added_node!(compact::IncrementalCompact, @nospecialize(v))
     needs_late_fixup = false
     for ops in userefs(v)
-        val = ops[]
-        if isa(val, SSAValue)
-            compact.used_ssas[val.id] += 1
-        elseif isa(val, NewSSAValue)
-            @assert val.id < 0 # Newly added nodes should be canonicalized
-            compact.new_new_used_ssas[-val.id] += 1
-            needs_late_fixup = true
-        end
+        needs_late_fixup |= _count_added_node!(compact, ops[])
     end
     return needs_late_fixup
 end
 
 function add_pending!(compact::IncrementalCompact, pos::Int, attach_after::Bool)
-    node = add!(compact.pending_nodes, pos, attach_after)
-    # TODO: switch this to `l = length(pending_nodes); splice!(pending_perm, searchsorted(pending_perm, l), l)`
-    push!(compact.pending_perm, length(compact.pending_nodes))
-    sort!(compact.pending_perm, DEFAULT_STABLE, Order.By(x->compact.pending_nodes.info[x].pos, Order.Forward))
+    node = add_inst!(compact.pending_nodes, pos, attach_after)
+    heappush!(compact.pending_perm, length(compact.pending_nodes), By(x::Int->compact.pending_nodes.info[x].pos))
+    return node
+end
+
+function inst_from_newinst!(node::Instruction, newinst::NewInstruction,
+    newline::Int32=newinst.line::Int32, newflag::UInt32=newinst.flag::UInt32)
+    node[:stmt] = newinst.stmt
+    node[:type] = newinst.type
+    node[:info] = newinst.info
+    node[:line] = newline
+    node[:flag] = newflag
     return node
 end
 
-function insert_node!(compact::IncrementalCompact, before, inst::NewInstruction, attach_after::Bool=false)
-    @assert inst.effect_free_computed
+function recompute_inst_flag(newinst::NewInstruction, src::Union{IRCode,IncrementalCompact})
+    flag = newinst.flag
+    flag !== nothing && return flag
+    flag = IR_FLAG_NULL
+    (consistent, effect_free_and_nothrow, nothrow) = stmt_effect_flags(
+        fallback_lattice, newinst.stmt, newinst.type, src)
+    if consistent
+        flag |= IR_FLAG_CONSISTENT
+    end
+    if effect_free_and_nothrow
+        flag |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
+    elseif nothrow
+        flag |= IR_FLAG_NOTHROW
+    end
+    if !isexpr(newinst.stmt, :call) && !isexpr(newinst.stmt, :invoke)
+        # See comment in check_effect_free!
+        flag |= IR_FLAG_NOUB
+    end
+    return flag
+end
+
+function insert_node!(compact::IncrementalCompact, @nospecialize(before), newinst::NewInstruction, attach_after::Bool=false)
+    newflag = recompute_inst_flag(newinst, compact)
     if isa(before, SSAValue)
         if before.id < compact.result_idx
-            count_added_node!(compact, inst.stmt)
-            line = something(inst.line, compact.result[before.id][:line])
-            node = add!(compact.new_new_nodes, before.id, attach_after)
+            count_added_node!(compact, newinst.stmt)
+            newline = something(newinst.line, compact.result[before.id][:line])
+            node = add_inst!(compact.new_new_nodes, before.id, attach_after)
+            node = inst_from_newinst!(node, newinst, newline, newflag)
             push!(compact.new_new_used_ssas, 0)
-            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
             return NewSSAValue(-node.idx)
         else
-            line = something(inst.line, compact.ir.stmts[before.id][:line])
+            newline = something(newinst.line, compact.ir.stmts[before.id][:line])
             node = add_pending!(compact, before.id, attach_after)
-            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
+            node = inst_from_newinst!(node, newinst, newline, newflag)
             os = OldSSAValue(length(compact.ir.stmts) + length(compact.ir.new_nodes) + length(compact.pending_nodes))
             push!(compact.ssa_rename, os)
             push!(compact.used_ssas, 0)
@@ -785,11 +917,11 @@ function insert_node!(compact::IncrementalCompact, before, inst::NewInstruction,
         pos = before.id
         if pos < compact.idx
             renamed = compact.ssa_rename[pos]::AnySSAValue
-            count_added_node!(compact, inst.stmt)
-            line = something(inst.line, compact.result[renamed.id][:line])
-            node = add!(compact.new_new_nodes, renamed.id, attach_after)
+            count_added_node!(compact, newinst.stmt)
+            newline = something(newinst.line, compact.result[renamed.id][:line])
+            node = add_inst!(compact.new_new_nodes, renamed.id, attach_after)
+            node = inst_from_newinst!(node, newinst, newline, newflag)
             push!(compact.new_new_used_ssas, 0)
-            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
             return NewSSAValue(-node.idx)
         else
             if pos > length(compact.ir.stmts)
@@ -797,20 +929,23 @@ function insert_node!(compact::IncrementalCompact, before, inst::NewInstruction,
                 info = compact.pending_nodes.info[pos - length(compact.ir.stmts) - length(compact.ir.new_nodes)]
                 pos, attach_after = info.pos, info.attach_after
             end
-            line = something(inst.line, compact.ir.stmts[pos][:line])
+            newline = something(newinst.line, compact.ir.stmts[pos][:line])
             node = add_pending!(compact, pos, attach_after)
-            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
+            node = inst_from_newinst!(node, newinst, newline, newflag)
             os = OldSSAValue(length(compact.ir.stmts) + length(compact.ir.new_nodes) + length(compact.pending_nodes))
             push!(compact.ssa_rename, os)
             push!(compact.used_ssas, 0)
             return os
         end
     elseif isa(before, NewSSAValue)
+        # As above, new_new_nodes must get counted. We don't visit them during our compact,
+        # so they're immediately considered reified.
+        count_added_node!(compact, newinst.stmt)
         # TODO: This is incorrect and does not maintain ordering among the new nodes
         before_entry = compact.new_new_nodes.info[-before.id]
-        line = something(inst.line, compact.new_new_nodes.stmts[-before.id][:line])
-        new_entry = add!(compact.new_new_nodes, before_entry.pos, attach_after)
-        new_entry[:inst], new_entry[:type], new_entry[:line], new_entry[:flag] = inst.stmt, inst.type, line, inst.flag
+        newline = something(newinst.line, compact.new_new_nodes.stmts[-before.id][:line])
+        new_entry = add_inst!(compact.new_new_nodes, before_entry.pos, attach_after)
+        new_entry = inst_from_newinst!(new_entry, newinst, newline, newflag)
         push!(compact.new_new_used_ssas, 0)
         return NewSSAValue(-new_entry.idx)
     else
@@ -818,38 +953,51 @@ function insert_node!(compact::IncrementalCompact, before, inst::NewInstruction,
     end
 end
 
-function insert_node_here!(compact::IncrementalCompact, inst::NewInstruction, reverse_affinity::Bool=false)
-    @assert inst.line !== nothing
-    refinish = false
+function maybe_reopen_bb!(compact)
     result_idx = compact.result_idx
-    if reverse_affinity &&
-            ((compact.active_result_bb == length(compact.result_bbs) + 1) ||
-             result_idx == first(compact.result_bbs[compact.active_result_bb].stmts))
+    result_bbs = compact.cfg_transform.result_bbs
+    if (compact.active_result_bb == length(result_bbs) + 1) ||
+        result_idx == first(result_bbs[compact.active_result_bb].stmts)
         compact.active_result_bb -= 1
-        refinish = true
+        return true
     end
+    return false
+end
+
+function insert_node_here!(compact::IncrementalCompact, newinst::NewInstruction, reverse_affinity::Bool=false)
+    newline = newinst.line::Int32
+    refinish = false
+    result_idx = compact.result_idx
+    result_bbs = compact.cfg_transform.result_bbs
+    refinish = reverse_affinity && maybe_reopen_bb!(compact)
     if result_idx > length(compact.result)
         @assert result_idx == length(compact.result) + 1
         resize!(compact, result_idx)
     end
-    flag = inst.flag
-    if !inst.effect_free_computed
-        (effect_free_and_nothrow, nothrow) = stmt_effect_flags(inst.stmt, inst.type, compact)
-        if effect_free_and_nothrow
-            flag |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
-        elseif nothrow
-            flag |= IR_FLAG_NOTHROW
-        end
-    end
-    node = compact.result[result_idx]
-    node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, inst.line, flag
-    count_added_node!(compact, inst.stmt) && push!(compact.late_fixup, result_idx)
+    newflag = recompute_inst_flag(newinst, compact)
+    node = inst_from_newinst!(compact.result[result_idx], newinst, newline, newflag)
+    count_added_node!(compact, newinst.stmt) && push!(compact.late_fixup, result_idx)
     compact.result_idx = result_idx + 1
     inst = SSAValue(result_idx)
     refinish && finish_current_bb!(compact, 0)
     return inst
 end
 
+function delete_inst_here!(compact::IncrementalCompact)
+    # If we already closed this bb, reopen it for our modification
+    refinish = maybe_reopen_bb!(compact)
+
+    # Delete the statement, update refcounts etc
+    compact[SSAValue(compact.result_idx-1)] = nothing
+
+    # Pretend that we never compacted this statement in the first place
+    compact.result_idx -= 1
+
+    refinish && finish_current_bb!(compact, 0)
+
+    return nothing
+end
+
 function getindex(view::TypesView, v::OldSSAValue)
     id = v.id
     ir = view.ir.ir
@@ -865,51 +1013,53 @@ function getindex(view::TypesView, v::OldSSAValue)
     return view.ir.pending_nodes.stmts[id][:type]
 end
 
-function kill_current_uses(compact::IncrementalCompact, @nospecialize(stmt))
+function kill_current_use!(compact::IncrementalCompact, @nospecialize(val))
+    if isa(val, SSAValue)
+        @assert compact.used_ssas[val.id] >= 1
+        compact.used_ssas[val.id] -= 1
+    elseif isa(val, NewSSAValue)
+        @assert val.id < 0
+        @assert compact.new_new_used_ssas[-val.id] >= 1
+        compact.new_new_used_ssas[-val.id] -= 1
+    end
+end
+
+function kill_current_uses!(compact::IncrementalCompact, @nospecialize(stmt))
     for ops in userefs(stmt)
-        val = ops[]
-        if isa(val, SSAValue)
-            @assert compact.used_ssas[val.id] >= 1
-            compact.used_ssas[val.id] -= 1
-        elseif isa(val, NewSSAValue)
-            @assert val.id < 0
-            @assert compact.new_new_used_ssas[-val.id] >= 1
-            compact.new_new_used_ssas[-val.id] -= 1
-        end
+        kill_current_use!(compact, ops[])
     end
 end
 
-function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::SSAValue)
-    @assert idx.id < compact.result_idx
-    (compact.result[idx.id][:inst] === v) && return
+function setindex!(compact::IncrementalCompact, @nospecialize(v), ssa::Union{SSAValue, NewSSAValue})
+    (compact[ssa][:stmt] === v) && return compact
     # Kill count for current uses
-    kill_current_uses(compact, compact.result[idx.id][:inst])
-    compact.result[idx.id][:inst] = v
+    kill_current_uses!(compact, compact[ssa][:stmt])
+    compact[ssa][:stmt] = v
     # Add count for new use
-    count_added_node!(compact, v) && push!(compact.late_fixup, idx.id)
+    count_added_node!(compact, v) && isa(ssa, SSAValue) && push!(compact.late_fixup, ssa.id)
     return compact
 end
 
 function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::OldSSAValue)
     id = idx.id
     if id < compact.idx
-        new_idx = compact.ssa_rename[id]
-        (compact.result[new_idx][:inst] === v) && return
-        kill_current_uses(compact, compact.result[new_idx][:inst])
-        compact.result[new_idx][:inst] = v
+        new_idx = compact.ssa_rename[id]::Int
+        (compact.result[new_idx][:stmt] === v) && return compact
+        kill_current_uses!(compact, compact.result[new_idx][:stmt])
+        compact.result[new_idx][:stmt] = v
         count_added_node!(compact, v) && push!(compact.late_fixup, new_idx)
         return compact
     elseif id <= length(compact.ir.stmts)  # ir.stmts, new_nodes, and pending_nodes uses aren't counted yet, so no need to adjust
-        compact.ir.stmts[id][:inst] = v
+        compact.ir.stmts[id][:stmt] = v
         return compact
     end
     id -= length(compact.ir.stmts)
     if id <= length(compact.ir.new_nodes)
-        compact.ir.new_nodes.stmts[id][:inst] = v
+        compact.ir.new_nodes.stmts[id][:stmt] = v
         return compact
     end
     id -= length(compact.ir.new_nodes)
-    compact.pending_nodes.stmts[id][:inst] = v
+    compact.pending_nodes.stmts[id][:stmt] = v
     return compact
 end
 
@@ -917,7 +1067,7 @@ function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::Int)
     if idx < compact.result_idx
         compact[SSAValue(idx)] = v
     else
-        compact.ir.stmts[idx][:inst] = v
+        compact.ir.stmts[idx][:stmt] = v
     end
     return compact
 end
@@ -925,21 +1075,63 @@ end
 __set_check_ssa_counts(onoff::Bool) = __check_ssa_counts__[] = onoff
 const __check_ssa_counts__ = fill(false)
 
+should_check_ssa_counts() = __check_ssa_counts__[]
+
+# specifically meant to be used with body1 = compact.result and body2 = compact.new_new_nodes, with nvals == length(compact.used_ssas)
+function find_ssavalue_uses1(compact::IncrementalCompact)
+    body1, body2 = compact.result.stmt, compact.new_new_nodes.stmts.stmt
+    nvals = length(compact.used_ssas)
+    nvalsnew = length(compact.new_new_used_ssas)
+    nbody1 = compact.result_idx
+    nbody2 = length(body2)
+
+    uses = zeros(Int, nvals)
+    usesnew = zeros(Int, nvalsnew)
+    function increment_uses(ssa::AnySSAValue)
+        if isa(ssa, NewSSAValue)
+            usesnew[-ssa.id] += 1
+        elseif isa(ssa, SSAValue)
+            uses[ssa.id] += 1
+        end
+    end
+
+    for line in 1:(nbody1 + nbody2)
+        # index into the right body
+        if line <= nbody1
+            isassigned(body1, line) || continue
+            e = body1[line]
+        else
+            line -= nbody1
+            isassigned(body2, line) || continue
+            e = body2[line]
+        end
+
+        foreach_anyssa(increment_uses, e)
+    end
+
+    return (uses, usesnew)
+end
+
 function _oracle_check(compact::IncrementalCompact)
-    observed_used_ssas = Core.Compiler.find_ssavalue_uses1(compact)
+    (observed_used_ssas, observed_used_newssas) = Core.Compiler.find_ssavalue_uses1(compact)
     for i = 1:length(observed_used_ssas)
         if observed_used_ssas[i] != compact.used_ssas[i]
-            return observed_used_ssas
+            return (observed_used_ssas, observed_used_newssas, SSAValue(i))
         end
     end
-    return nothing
+    for i = 1:length(observed_used_newssas)
+        if observed_used_newssas[i] != compact.new_new_used_ssas[i]
+            return (observed_used_ssas, observed_used_newssas, NewSSAValue(i))
+        end
+    end
+    return (nothing, nothing, 0)
 end
 
 function oracle_check(compact::IncrementalCompact)
-    maybe_oracle_used_ssas = _oracle_check(compact)
+    (maybe_oracle_used_ssas, observed_used_newssas, oracle_error_ssa) = _oracle_check(compact)
     if maybe_oracle_used_ssas !== nothing
-        @eval Main (compact = $compact; oracle_used_ssas = $maybe_oracle_used_ssas)
-        error("Oracle check failed, inspect Main.compact and Main.oracle_used_ssas")
+        @eval Main (compact = $compact; oracle_used_ssas = $maybe_oracle_used_ssas; observed_used_newssas = $observed_used_newssas; oracle_error_ssa = $(QuoteNode(oracle_error_ssa)))
+        error("Oracle check failed, inspect Main.{compact, oracle_used_ssas, observed_used_newssas, oracle_error_ssa}")
     end
 end
 
@@ -967,64 +1159,81 @@ function getindex(view::TypesView, idx::NewSSAValue)
     return view.ir[idx][:type]
 end
 
+# N.B.: Don't make this <: Function to avoid ::Function deopt
+struct Refiner
+    result_flags::Vector{UInt32}
+    result_idx::Int
+end
+(this::Refiner)() = (this.result_flags[this.result_idx] |= IR_FLAG_REFINED; nothing)
+
 function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int},
-                                processed_idx::Int, result_idx::Int,
+                                already_inserted, result_idx::Int,
                                 ssa_rename::Vector{Any}, used_ssas::Vector{Int},
                                 new_new_used_ssas::Vector{Int},
-                                do_rename_ssa::Bool)
+                                do_rename_ssa::Bool,
+                                mark_refined!::Union{Refiner, Nothing})
     values = Vector{Any}(undef, length(old_values))
     for i = 1:length(old_values)
         isassigned(old_values, i) || continue
         val = old_values[i]
         if isa(val, SSAValue)
             if do_rename_ssa
-                if val.id > processed_idx
+                if !already_inserted(i, OldSSAValue(val.id))
                     push!(late_fixup, result_idx)
                     val = OldSSAValue(val.id)
                 else
-                    val = renumber_ssa2(val, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa)
+                    val = renumber_ssa2(val, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)
                 end
             else
                 used_ssas[val.id] += 1
             end
         elseif isa(val, OldSSAValue)
-            if val.id > processed_idx
+            if !already_inserted(i, val)
                 push!(late_fixup, result_idx)
             else
                 # Always renumber these. do_rename_ssa applies only to actual SSAValues
-                val = renumber_ssa2(SSAValue(val.id), ssa_rename, used_ssas, new_new_used_ssas, true)
+                val = renumber_ssa2(SSAValue(val.id), ssa_rename, used_ssas, new_new_used_ssas, true, mark_refined!)
             end
         elseif isa(val, NewSSAValue)
             if val.id < 0
-                push!(late_fixup, result_idx)
                 new_new_used_ssas[-val.id] += 1
             else
                 @assert do_rename_ssa
                 val = SSAValue(val.id)
             end
         end
+        if isa(val, NewSSAValue)
+            push!(late_fixup, result_idx)
+        end
         values[i] = val
     end
     return values
 end
 
 function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int},
-        new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool)
+        new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool, mark_refined!::Union{Refiner, Nothing})
     id = val.id
-    if id > length(ssanums)
-        return val
-    end
     if do_rename_ssa
+        if id > length(ssanums)
+            return val
+        end
         val = ssanums[id]
     end
+    if isa(val, Refined)
+        val = val.val
+        mark_refined! !== nothing && mark_refined!()
+    end
     if isa(val, SSAValue)
         used_ssas[val.id] += 1
+    elseif isa(val, NewSSAValue)
+        @assert val.id < 0
+        new_new_used_ssas[-val.id] += 1
     end
     return val
 end
 
 function renumber_ssa2(val::NewSSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int},
-        new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool)
+        new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool, mark_refined!::Union{Refiner, Nothing})
     if val.id < 0
         new_new_used_ssas[-val.id] += 1
         return val
@@ -1034,7 +1243,7 @@ function renumber_ssa2(val::NewSSAValue, ssanums::Vector{Any}, used_ssas::Vector
     end
 end
 
-function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Vector{Int}, new_new_used_ssas::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool)
+function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Vector{Int}, new_new_used_ssas::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool, mark_refined!::Union{Refiner, Nothing})
     urs = userefs(stmt)
     for op in urs
         val = op[]
@@ -1042,7 +1251,7 @@ function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Ve
             push!(late_fixup, result_idx)
         end
         if isa(val, Union{SSAValue, NewSSAValue})
-            val = renumber_ssa2(val, ssanums, used_ssas, new_new_used_ssas, do_rename_ssa)
+            val = renumber_ssa2(val, ssanums, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)
         end
         if isa(val, OldSSAValue) || isa(val, NewSSAValue)
             push!(late_fixup, result_idx)
@@ -1053,51 +1262,71 @@ function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Ve
 end
 
 # Used in inlining before we start compacting - Only works at the CFG level
-function kill_edge!(bbs::Vector{BasicBlock}, from::Int, to::Int)
+function kill_edge!(bbs::Vector{BasicBlock}, from::Int, to::Int, callback=nothing)
     preds, succs = bbs[to].preds, bbs[from].succs
-    deleteat!(preds, findfirst(x->x === from, preds)::Int)
-    deleteat!(succs, findfirst(x->x === to, succs)::Int)
+    deleteat!(preds, findfirst(x::Int->x==from, preds)::Int)
+    deleteat!(succs, findfirst(x::Int->x==to, succs)::Int)
     if length(preds) == 0
         for succ in copy(bbs[to].succs)
-            kill_edge!(bbs, to, succ)
+            kill_edge!(bbs, to, succ, callback)
         end
     end
+    if callback !== nothing
+        callback(from, to)
+    end
+end
+
+function kill_edge!(ir::IRCode, from::Int, to::Int, callback=nothing)
+    kill_edge!(ir.cfg.blocks, from, to, callback)
 end
 
 # N.B.: from and to are non-renamed indices
 function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::Int)
-    # Note: We recursively kill as many edges as are obviously dead. However, this
-    # may leave dead loops in the IR. We kill these later in a CFG cleanup pass (or
-    # worstcase during codegen).
-    preds = compact.result_bbs[compact.bb_rename_succ[to]].preds
-    succs = compact.result_bbs[compact.bb_rename_pred[from]].succs
-    deleteat!(preds, findfirst(x->x === compact.bb_rename_pred[from], preds)::Int)
-    deleteat!(succs, findfirst(x->x === compact.bb_rename_succ[to], succs)::Int)
+    # Note: We recursively kill as many edges as are obviously dead.
+    (; bb_rename_pred, bb_rename_succ, result_bbs, domtree) = compact.cfg_transform
+    preds = result_bbs[bb_rename_succ[to]].preds
+    succs = result_bbs[bb_rename_pred[from]].succs
+    deleteat!(preds, findfirst(x::Int->x==bb_rename_pred[from], preds)::Int)
+    deleteat!(succs, findfirst(x::Int->x==bb_rename_succ[to], succs)::Int)
+    if domtree !== nothing
+        domtree_delete_edge!(domtree, result_bbs, bb_rename_pred[from], bb_rename_succ[to])
+    end
     # Check if the block is now dead
-    if length(preds) == 0
-        for succ in copy(compact.result_bbs[compact.bb_rename_succ[to]].succs)
-            kill_edge!(compact, active_bb, to, findfirst(x->x === succ, compact.bb_rename_pred)::Int)
+    if length(preds) == 0 || (domtree !== nothing && bb_unreachable(domtree, bb_rename_succ[to]))
+        to_succs = result_bbs[bb_rename_succ[to]].succs
+        for succ in copy(to_succs)
+            new_succ = findfirst(x::Int->x==succ, bb_rename_pred)
+            new_succ === nothing && continue
+            kill_edge!(compact, active_bb, to, new_succ)
         end
+        empty!(preds)
+        empty!(to_succs)
         if to < active_bb
             # Kill all statements in the block
-            stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts
+            stmts = result_bbs[bb_rename_succ[to]].stmts
             for stmt in stmts
-                compact.result[stmt][:inst] = nothing
+                compact.result[stmt][:stmt] = nothing
             end
-            compact.result[last(stmts)][:inst] = ReturnNode()
+            compact.result[last(stmts)][:stmt] = ReturnNode()
+        else
+            # Tell compaction to not schedule this block. A value of -2 here
+            # indicates that the block is not to be scheduled, but there should
+            # still be an (unreachable) BB inserted into the final IR to avoid
+            # disturbing the BB numbering.
+            bb_rename_succ[to] = -2
         end
     else
         # Remove this edge from all phi nodes in `to` block
         # NOTE: It is possible for `to` to contain only `nothing` statements,
         #       so we must be careful to stop at its last statement
         if to < active_bb
-            stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts
+            stmts = result_bbs[bb_rename_succ[to]].stmts
             idx = first(stmts)
             while idx <= last(stmts)
-                stmt = compact.result[idx][:inst]
+                stmt = compact.result[idx][:stmt]
                 stmt === nothing && continue
                 isa(stmt, PhiNode) || break
-                i = findfirst(x-> x == compact.bb_rename_pred[from], stmt.edges)
+                i = findfirst(x::Int32->x==bb_rename_pred[from], stmt.edges)
                 if i !== nothing
                     deleteat!(stmt.edges, i)
                     deleteat!(stmt.values, i)
@@ -1109,7 +1338,7 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
             for stmt in CompactPeekIterator(compact, first(stmts), last(stmts))
                 stmt === nothing && continue
                 isa(stmt, PhiNode) || break
-                i = findfirst(x-> x == from, stmt.edges)
+                i = findfirst(x::Int32->x==from, stmt.edges)
                 if i !== nothing
                     deleteat!(stmt.edges, i)
                     deleteat!(stmt.values, i)
@@ -1120,61 +1349,124 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
     nothing
 end
 
+struct Refined
+    val::Any
+    Refined(@nospecialize(val)) = new(val)
+end
+
 function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instruction, idx::Int, processed_idx::Int, active_bb::Int, do_rename_ssa::Bool)
-    stmt = inst[:inst]
-    (; result, ssa_rename, late_fixup, used_ssas, new_new_used_ssas, cfg_transforms_enabled, fold_constant_branches) = compact
-    ssa_rename[idx] = SSAValue(result_idx)
+    stmt = inst[:stmt]
+    (; result, ssa_rename, late_fixup, used_ssas, new_new_used_ssas) = compact
+    (; cfg_transforms_enabled, fold_constant_branches, bb_rename_succ, bb_rename_pred, result_bbs) = compact.cfg_transform
+    mark_refined! = Refiner(result.flag, result_idx)
+    already_inserted = (::Int, ssa::OldSSAValue)->ssa.id <= processed_idx
     if stmt === nothing
         ssa_rename[idx] = stmt
     elseif isa(stmt, OldSSAValue)
         ssa_rename[idx] = ssa_rename[stmt.id]
     elseif isa(stmt, GotoNode) && cfg_transforms_enabled
-        result[result_idx][:inst] = GotoNode(compact.bb_rename_succ[stmt.label])
+        label = bb_rename_succ[stmt.label]
+        @assert label > 0
+        ssa_rename[idx] = SSAValue(result_idx)
+        result[result_idx][:stmt] = GotoNode(label)
         result_idx += 1
     elseif isa(stmt, GlobalRef)
-        result[result_idx][:inst] = stmt
-        result[result_idx][:type] = argextype(stmt, compact)
-        result_idx += 1
+        total_flags = IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE
+        flag = result[result_idx][:flag]
+        if has_flag(flag, total_flags)
+            ssa_rename[idx] = stmt
+        else
+            ssa_rename[idx] = SSAValue(result_idx)
+            result[result_idx][:stmt] = stmt
+            result_idx += 1
+        end
     elseif isa(stmt, GotoNode)
-        result[result_idx][:inst] = stmt
+        ssa_rename[idx] = SSAValue(result_idx)
+        result[result_idx][:stmt] = stmt
         result_idx += 1
     elseif isa(stmt, GotoIfNot) && cfg_transforms_enabled
-        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::GotoIfNot
-        result[result_idx][:inst] = stmt
+        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::GotoIfNot
+        result[result_idx][:stmt] = stmt
         cond = stmt.cond
         if fold_constant_branches
             if !isa(cond, Bool)
                 condT = widenconditional(argextype(cond, compact))
                 isa(condT, Const) || @goto bail
+                kill_current_use!(compact, cond)
                 cond = condT.val
                 isa(cond, Bool) || @goto bail
             end
             if cond
-                result[result_idx][:inst] = nothing
+                ssa_rename[idx] = nothing
+                result[result_idx][:stmt] = nothing
                 kill_edge!(compact, active_bb, active_bb, stmt.dest)
                 # Don't increment result_idx => Drop this statement
             else
-                result[result_idx][:inst] = GotoNode(compact.bb_rename_succ[stmt.dest])
+                label = bb_rename_succ[stmt.dest]
+                @assert label > 0
+                ssa_rename[idx] = SSAValue(result_idx)
+                result[result_idx][:stmt] = GotoNode(label)
                 kill_edge!(compact, active_bb, active_bb, active_bb+1)
                 result_idx += 1
             end
         else
             @label bail
-            result[result_idx][:inst] = GotoIfNot(cond, compact.bb_rename_succ[stmt.dest])
+            label = bb_rename_succ[stmt.dest]
+            @assert label > 0
+            ssa_rename[idx] = SSAValue(result_idx)
+            result[result_idx][:stmt] = GotoIfNot(cond, label)
             result_idx += 1
         end
+    elseif cfg_transforms_enabled && isa(stmt, EnterNode)
+        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::EnterNode
+        if stmt.catch_dest != 0
+            label = bb_rename_succ[stmt.catch_dest]
+            @assert label > 0
+            ssa_rename[idx] = SSAValue(result_idx)
+            result[result_idx][:stmt] = EnterNode(stmt, label)
+        else
+            result[result_idx][:stmt] = stmt
+        end
+        result_idx += 1
     elseif isa(stmt, Expr)
-        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::Expr
-        if cfg_transforms_enabled && isexpr(stmt, :enter)
-            stmt.args[1] = compact.bb_rename_succ[stmt.args[1]::Int]
+        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::Expr
+        if isexpr(stmt, :throw_undef_if_not)
+            cond = stmt.args[2]
+            if isa(cond, Bool) && cond === true
+                # cond was folded to true - this statement
+                # is dead.
+                ssa_rename[idx] = nothing
+                return result_idx
+            end
+        elseif isexpr(stmt, :leave)
+            let i = 1
+                while i <= length(stmt.args)
+                    if stmt.args[i] === nothing
+                        deleteat!(stmt.args, i)
+                    else
+                        i += 1
+                    end
+                end
+            end
+            if isempty(stmt.args)
+                # This :leave is dead
+                ssa_rename[idx] = nothing
+                return result_idx
+            end
+        end
+        typ = inst[:type]
+        if isa(typ, Const) && is_inlineable_constant(typ.val)
+            ssa_rename[idx] = quoted(typ.val)
+        else
+            ssa_rename[idx] = SSAValue(result_idx)
         end
-        result[result_idx][:inst] = stmt
+        result[result_idx][:stmt] = stmt
         result_idx += 1
     elseif isa(stmt, PiNode)
         # As an optimization, we eliminate any trivial pinodes. For performance, we use ===
         # type equality. We may want to consider using == in either a separate pass or if
         # performance turns out ok
-        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::PiNode
+        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::PiNode
         pi_val = stmt.val
         if isa(pi_val, SSAValue)
             if stmt.typ === result[pi_val.id][:type]
@@ -1188,21 +1480,29 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
                 return result_idx
             end
         elseif !isa(pi_val, AnySSAValue) && !isa(pi_val, GlobalRef)
-            valtyp = isa(pi_val, QuoteNode) ? typeof(pi_val.value) : typeof(pi_val)
-            if valtyp === stmt.typ
+            pi_val′ = isa(pi_val, QuoteNode) ? pi_val.value : pi_val
+            stmttyp = stmt.typ
+            if isa(stmttyp, Const) ? pi_val′ === stmttyp.val : typeof(pi_val′) === stmttyp
                 ssa_rename[idx] = pi_val
                 return result_idx
             end
         end
-        result[result_idx][:inst] = stmt
+        ssa_rename[idx] = SSAValue(result_idx)
+        result[result_idx][:stmt] = stmt
         result_idx += 1
-    elseif isa(stmt, ReturnNode) || isa(stmt, UpsilonNode) || isa(stmt, GotoIfNot)
-        result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)
+    elseif isa(stmt, ReturnNode) || isa(stmt, UpsilonNode) || isa(stmt, GotoIfNot) || isa(stmt, EnterNode)
+        ssa_rename[idx] = SSAValue(result_idx)
+        result[result_idx][:stmt] = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)
         result_idx += 1
     elseif isa(stmt, PhiNode)
+        # N.B.: For PhiNodes, this needs to be at the top, since PhiNodes
+        # can self-reference.
+        ssa_rename[idx] = SSAValue(result_idx)
         if cfg_transforms_enabled
             # Rename phi node edges
-            map!(i -> compact.bb_rename_pred[i], stmt.edges, stmt.edges)
+            let bb_rename_pred=bb_rename_pred
+                map!(i::Int32->i == 0 ? 0 : bb_rename_pred[i], stmt.edges, stmt.edges)
+            end
 
             # Remove edges and values associated with dead blocks. Entries in
             # `values` can be undefined when the phi node refers to something
@@ -1215,11 +1515,12 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             # not a value we can copy), we copy only the edges and (defined)
             # values we want to keep to new arrays initialized with undefined
             # elements.
+
             edges = Vector{Int32}(undef, length(stmt.edges))
             values = Vector{Any}(undef, length(stmt.values))
             new_index = 1
             for old_index in 1:length(stmt.edges)
-                if stmt.edges[old_index] != -1
+                if stmt.edges[old_index] > 0
                     edges[new_index] = stmt.edges[old_index]
                     if isassigned(stmt.values, old_index)
                         values[new_index] = stmt.values[old_index]
@@ -1234,7 +1535,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             values = stmt.values
         end
 
-        values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa)
+        values = process_phinode_values(values, late_fixup, already_inserted, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)
         # Don't remove the phi node if it is before the definition of its value
         # because doing so can create forward references. This should only
         # happen with dead loops, but can cause problems when optimization
@@ -1244,7 +1545,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         before_def = isassigned(values, 1) && (v = values[1]; isa(v, OldSSAValue)) && idx < v.id
         if length(edges) == 1 && isassigned(values, 1) && !before_def &&
                 length(cfg_transforms_enabled ?
-                    compact.result_bbs[compact.bb_rename_succ[active_bb]].preds :
+                    result_bbs[bb_rename_succ[active_bb]].preds :
                     compact.ir.cfg.blocks[active_bb].preds) == 1
             # There's only one predecessor left - just replace it
             v = values[1]
@@ -1254,28 +1555,49 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             end
             ssa_rename[idx] = v
         else
-            result[result_idx][:inst] = PhiNode(edges, values)
+            result[result_idx][:stmt] = PhiNode(edges, values)
             result_idx += 1
         end
     elseif isa(stmt, PhiCNode)
-        result[result_idx][:inst] = PhiCNode(process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa))
-        result_idx += 1
-    elseif isa(stmt, SSAValue)
-        # identity assign, replace uses of this ssa value with its result
-        if do_rename_ssa
-            stmt = ssa_rename[stmt.id]
+        ssa_rename[idx] = SSAValue(result_idx)
+        values = stmt.values
+        if cfg_transforms_enabled
+            # Filter arguments that come from dead blocks
+            values = Any[]
+            for value in stmt.values
+                if isa(value, SSAValue)
+                    blk = block_for_inst(compact.ir.cfg, value.id)
+                    if bb_rename_pred[blk] < 0
+                        continue
+                    end
+                end
+                push!(values, value)
+            end
         end
-        ssa_rename[idx] = stmt
-    elseif isa(stmt, NewSSAValue)
-        ssa_rename[idx] = SSAValue(stmt.id)
+        result[result_idx][:stmt] = PhiCNode(process_phinode_values(values, late_fixup, already_inserted, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!))
+        result_idx += 1
     else
-        # Constant assign, replace uses of this ssa value with its result
+        if isa(stmt, SSAValue)
+            # identity assign, replace uses of this ssa value with its result
+            if do_rename_ssa
+                stmt = ssa_rename[stmt.id]
+            end
+        elseif isa(stmt, NewSSAValue)
+            stmt = SSAValue(stmt.id)
+        else
+            # Constant assign, replace uses of this ssa value with its result
+        end
+        if has_flag(inst, IR_FLAG_REFINED) && !isa(stmt, Refined)
+            # If we're compacting away an instruction that was marked as refined,
+            # leave a marker in the ssa_rename, so we can taint any users.
+            stmt = Refined(stmt)
+        end
         ssa_rename[idx] = stmt
     end
     return result_idx
 end
 
-function resize!(compact::IncrementalCompact, nnewnodes)
+function resize!(compact::IncrementalCompact, nnewnodes::Int)
     old_length = length(compact.result)
     resize!(compact.result, nnewnodes)
     resize!(compact.used_ssas, nnewnodes)
@@ -1285,45 +1607,57 @@ function resize!(compact::IncrementalCompact, nnewnodes)
     return compact
 end
 
-function finish_current_bb!(compact::IncrementalCompact, active_bb, old_result_idx=compact.result_idx, unreachable=false)
-    if compact.active_result_bb > length(compact.result_bbs)
+function finish_current_bb!(compact::IncrementalCompact, active_bb::Int,
+                            old_result_idx::Int=compact.result_idx, unreachable::Bool=false)
+    (;result_bbs, cfg_transforms_enabled, bb_rename_succ) = compact.cfg_transform
+    if compact.active_result_bb > length(result_bbs)
         #@assert compact.bb_rename[active_bb] == -1
         return true
     end
-    bb = compact.result_bbs[compact.active_result_bb]
+    bb = result_bbs[compact.active_result_bb]
     # If this was the last statement in the BB and we decided to skip it, insert a
     # dummy `nothing` node, to prevent changing the structure of the CFG
     skipped = false
-    if !compact.cfg_transforms_enabled || active_bb == 0 || active_bb > length(compact.bb_rename_succ) || compact.bb_rename_succ[active_bb] != -1
+    if !cfg_transforms_enabled || active_bb == 0 || active_bb > length(bb_rename_succ) || bb_rename_succ[active_bb] != -1
         if compact.result_idx == first(bb.stmts)
             length(compact.result) < old_result_idx && resize!(compact, old_result_idx)
             node = compact.result[old_result_idx]
             if unreachable
-                node[:inst], node[:type], node[:line] = ReturnNode(), Union{}, 0
+                node[:stmt], node[:type], node[:line] = ReturnNode(), Union{}, 0
             else
-                node[:inst], node[:type], node[:line] = nothing, Nothing, 0
+                node[:stmt], node[:type], node[:line], node[:flag] = nothing, Nothing, 0, IR_FLAGS_EFFECTS
             end
             compact.result_idx = old_result_idx + 1
-        elseif compact.cfg_transforms_enabled && compact.result_idx - 1 == first(bb.stmts)
+        elseif cfg_transforms_enabled && compact.result_idx - 1 == first(bb.stmts)
             # Optimization: If this BB consists of only a branch, eliminate this bb
         end
-        compact.result_bbs[compact.active_result_bb] = BasicBlock(bb, StmtRange(first(bb.stmts), compact.result_idx-1))
+        result_bbs[compact.active_result_bb] = BasicBlock(bb, StmtRange(first(bb.stmts), compact.result_idx-1))
         compact.active_result_bb += 1
     else
         skipped = true
     end
-    if compact.active_result_bb <= length(compact.result_bbs)
-        new_bb = compact.result_bbs[compact.active_result_bb]
-        compact.result_bbs[compact.active_result_bb] = BasicBlock(new_bb,
+    if compact.active_result_bb <= length(result_bbs)
+        new_bb = result_bbs[compact.active_result_bb]
+        result_bbs[compact.active_result_bb] = BasicBlock(new_bb,
             StmtRange(compact.result_idx, last(new_bb.stmts)))
     end
     return skipped
 end
 
-function attach_after_stmt_after(compact::IncrementalCompact, idx::Int)
-    compact.new_nodes_idx > length(compact.perm) && return false
-    entry = compact.ir.new_nodes.info[compact.perm[compact.new_nodes_idx]]
-    return entry.pos == idx && entry.attach_after
+"""
+    stmts_awaiting_insertion(compact::IncrementalCompact, idx::Int)
+
+Returns true if there are new/pending instructions enqueued for insertion into
+`compact` on any instruction in the range `1:idx`. Otherwise, returns false.
+"""
+function stmts_awaiting_insertion(compact::IncrementalCompact, idx::Int)
+
+    new_node_waiting = compact.new_nodes_idx <= length(compact.perm) &&
+        compact.ir.new_nodes.info[compact.perm[compact.new_nodes_idx]].pos <= idx
+    pending_node_waiting = !isempty(compact.pending_perm) &&
+        compact.pending_nodes.info[compact.pending_perm[1]].pos <= idx
+
+    return new_node_waiting || pending_node_waiting
 end
 
 function process_newnode!(compact::IncrementalCompact, new_idx::Int, new_node_entry::Instruction, new_node_info::NewNodeInfo, idx::Int, active_bb::Int, do_rename_ssa::Bool)
@@ -1335,14 +1669,11 @@ function process_newnode!(compact::IncrementalCompact, new_idx::Int, new_node_en
     compact.result_idx = result_idx
     # If this instruction has reverse affinity and we were at the end of a basic block,
     # finish it now.
-    if new_node_info.attach_after && idx == last(bb.stmts)+1 && !attach_after_stmt_after(compact, idx-1)
+    if new_node_info.attach_after && idx == last(bb.stmts)+1 && !stmts_awaiting_insertion(compact, idx-1)
         active_bb += 1
         finish_current_bb!(compact, active_bb, old_result_idx)
     end
-    (old_result_idx == result_idx) && return iterate(compact, (idx, active_bb))
-    return Pair{Pair{Int, Int}, Any}(
-        Pair{Int,Int}(new_idx,old_result_idx),
-        compact.result[old_result_idx][:inst]), (idx, active_bb)
+    return (old_result_idx, result_idx, active_bb)
 end
 
 struct CompactPeekIterator
@@ -1369,7 +1700,7 @@ function iterate(it::CompactPeekIterator, (idx, aidx, bidx)::NTuple{3, Int}=(it.
         for eidx in aidx:length(compact.perm)
             if entry_at_idx(new_nodes.info[compact.perm[eidx]], idx)
                 entry = new_nodes.stmts[compact.perm[eidx]]
-                return (entry[:inst], (idx, eidx+1, bidx))
+                return (entry[:stmt], (idx, eidx+1, bidx))
             end
         end
     end
@@ -1377,18 +1708,30 @@ function iterate(it::CompactPeekIterator, (idx, aidx, bidx)::NTuple{3, Int}=(it.
         for eidx in bidx:length(compact.pending_perm)
             if entry_at_idx(compact.pending_nodes.info[compact.pending_perm[eidx]], idx)
                 entry = compact.pending_nodes.stmts[compact.pending_perm[eidx]]
-                return (entry[:inst], (idx, aidx, eidx+1))
+                return (entry[:stmt], (idx, aidx, eidx+1))
             end
         end
     end
     idx > length(compact.ir.stmts) && return nothing
-    return (compact.ir.stmts[idx][:inst], (idx + 1, aidx, bidx))
+    return (compact.ir.stmts[idx][:stmt], (idx + 1, aidx, bidx))
 end
 
-function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}=(compact.idx, 1))
+# the returned Union{Nothing, Pair{Pair{Int,Int},Any}} cannot be stack allocated,
+# so we inline this function into the caller
+@inline function iterate(compact::IncrementalCompact, state=nothing)
+    idxs = iterate_compact(compact)
+    idxs === nothing && return nothing
+    old_result_idx = idxs[2]
+    return Pair{Pair{Int,Int},Any}(idxs, compact.result[old_result_idx][:stmt]), nothing
+end
+
+function iterate_compact(compact::IncrementalCompact)
     # Create label to dodge recursion so that we don't stack overflow
     @label restart
 
+    idx = compact.idx
+    active_bb = compact.active_bb
+
     old_result_idx = compact.result_idx
     if idx > length(compact.ir.stmts) && (compact.new_nodes_idx > length(compact.perm))
         return nothing
@@ -1397,7 +1740,8 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}=
         resize!(compact, old_result_idx)
     end
     bb = compact.ir.cfg.blocks[active_bb]
-    if compact.cfg_transforms_enabled && active_bb > 1 && active_bb <= length(compact.bb_rename_succ) && compact.bb_rename_succ[active_bb] == -1
+    (; cfg_transforms_enabled, bb_rename_succ) = compact.cfg_transform
+    if cfg_transforms_enabled && active_bb > 1 && active_bb <= length(bb_rename_succ) && bb_rename_succ[active_bb] <= -1
         # Dead block, so kill the entire block.
         compact.idx = last(bb.stmts)
         # Pop any remaining insertion nodes
@@ -1413,14 +1757,15 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}=
             if !(info.attach_after ? info.pos <= compact.idx - 1 : info.pos <= compact.idx)
                 break
             end
-            popfirst!(compact.pending_perm)
+            heappop!(compact.pending_perm, By(x::Int -> compact.pending_nodes.info[x].pos))
         end
         # Move to next block
         compact.idx += 1
+        compact.active_bb += 1
         if finish_current_bb!(compact, active_bb, old_result_idx, true)
-            return iterate(compact, (compact.idx, active_bb + 1))
+            return iterate_compact(compact)
         else
-            return Pair{Pair{Int, Int}, Any}(Pair{Int,Int}(compact.idx-1, old_result_idx), compact.result[old_result_idx][:inst]), (compact.idx, active_bb + 1)
+            return Pair{Int,Int}(compact.idx-1, old_result_idx)
         end
     end
     if compact.new_nodes_idx <= length(compact.perm) &&
@@ -1431,49 +1776,55 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}=
         new_node_entry = compact.ir.new_nodes.stmts[new_idx]
         new_node_info = compact.ir.new_nodes.info[new_idx]
         new_idx += length(compact.ir.stmts)
-        return process_newnode!(compact, new_idx, new_node_entry, new_node_info, idx, active_bb, true)
+        (old_result_idx, result_idx, active_bb) =
+                process_newnode!(compact, new_idx, new_node_entry, new_node_info, idx, active_bb, true)
+        compact.active_bb = active_bb
+        old_result_idx == result_idx && @goto restart
+        return Pair{Int,Int}(new_idx, old_result_idx)
     elseif !isempty(compact.pending_perm) &&
         (info = compact.pending_nodes.info[compact.pending_perm[1]];
          info.attach_after ? info.pos == idx - 1 : info.pos == idx)
-        new_idx = popfirst!(compact.pending_perm)
+        new_idx = heappop!(compact.pending_perm, By(x::Int -> compact.pending_nodes.info[x].pos))
         new_node_entry = compact.pending_nodes.stmts[new_idx]
         new_node_info = compact.pending_nodes.info[new_idx]
         new_idx += length(compact.ir.stmts) + length(compact.ir.new_nodes)
-        return process_newnode!(compact, new_idx, new_node_entry, new_node_info, idx, active_bb, false)
+        (old_result_idx, result_idx, active_bb) =
+                process_newnode!(compact, new_idx, new_node_entry, new_node_info, idx, active_bb, false)
+        compact.active_bb = active_bb
+        old_result_idx == result_idx && @goto restart
+        return Pair{Int,Int}(new_idx, old_result_idx)
     end
     # This will get overwritten in future iterations if
     # result_idx is not, incremented, but that's ok and expected
     compact.result[old_result_idx] = compact.ir.stmts[idx]
     result_idx = process_node!(compact, old_result_idx, compact.ir.stmts[idx], idx, idx, active_bb, true)
     compact.result_idx = result_idx
-    if idx == last(bb.stmts) && !attach_after_stmt_after(compact, idx)
+    if idx == last(bb.stmts) && !stmts_awaiting_insertion(compact, idx)
         finish_current_bb!(compact, active_bb, old_result_idx)
         active_bb += 1
     end
     compact.idx = idx + 1
+    compact.active_bb = active_bb
     if old_result_idx == compact.result_idx
         idx += 1
         @goto restart
     end
-    @assert isassigned(compact.result.inst, old_result_idx)
-    return Pair{Pair{Int,Int}, Any}(Pair{Int,Int}(compact.idx-1, old_result_idx),
-        compact.result[old_result_idx][:inst]), (compact.idx, active_bb)
+    @assert isassigned(compact.result.stmt, old_result_idx)
+    return Pair{Int,Int}(compact.idx-1, old_result_idx)
 end
 
-function maybe_erase_unused!(
-    extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int, in_worklist::Bool,
-    callback = null_dce_callback)
-
-    inst = idx <= length(compact.result) ? compact.result[idx] :
-        compact.new_new_nodes.stmts[idx - length(compact.result)]
-    stmt = inst[:inst]
+maybe_erase_unused!(compact::IncrementalCompact, idx::Int, in_worklist::Bool, extra_worklist::Vector{Int}) =
+    maybe_erase_unused!(null_dce_callback, compact, idx, in_worklist, extra_worklist)
+function maybe_erase_unused!(callback::Function, compact::IncrementalCompact, idx::Int,
+    in_worklist::Bool, extra_worklist::Vector{Int})
+    nresult = length(compact.result)
+    inst = idx ≤ nresult ? compact.result[idx] : compact.new_new_nodes.stmts[idx-nresult]
+    stmt = inst[:stmt]
     stmt === nothing && return false
-    if inst[:type] === Bottom
-        effect_free = false
-    else
-        effect_free = inst[:flag] & IR_FLAG_EFFECT_FREE != 0
-    end
-    function kill_ssa_value(val::SSAValue)
+    inst[:type] === Bottom && return false
+    effect_free = has_flag(inst, (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW))
+    effect_free || return false
+    foreachssa(stmt) do val::SSAValue
         if compact.used_ssas[val.id] == 1
             if val.id < idx || in_worklist
                 push!(extra_worklist, val.id)
@@ -1482,85 +1833,116 @@ function maybe_erase_unused!(
         compact.used_ssas[val.id] -= 1
         callback(val)
     end
-    if effect_free
-        foreachssa(kill_ssa_value, stmt)
-        inst[:inst] = nothing
-        return true
-    end
-    return false
+    inst[:stmt] = nothing
+    return true
 end
 
-function fixup_phinode_values!(compact::IncrementalCompact, old_values::Vector{Any})
+struct FixedNode
+    node::Any
+    needs_fixup::Bool
+    FixedNode(@nospecialize(node), needs_fixup::Bool) = new(node, needs_fixup)
+end
+
+function fixup_phinode_values!(compact::IncrementalCompact, old_values::Vector{Any}, reify_new_nodes::Bool)
     values = Vector{Any}(undef, length(old_values))
+    fixup = false
     for i = 1:length(old_values)
         isassigned(old_values, i) || continue
-        val = old_values[i]
-        if isa(val, Union{OldSSAValue, NewSSAValue})
-            val = fixup_node(compact, val)
-        end
-        values[i] = val
+        (; node, needs_fixup) = fixup_node(compact, old_values[i], reify_new_nodes)
+        fixup |= needs_fixup
+        values[i] = node
     end
-    values
+    return (values, fixup)
 end
 
-function fixup_node(compact::IncrementalCompact, @nospecialize(stmt))
+function fixup_node(compact::IncrementalCompact, @nospecialize(stmt), reify_new_nodes::Bool)
     if isa(stmt, PhiNode)
-        return PhiNode(stmt.edges, fixup_phinode_values!(compact, stmt.values))
+        (node, needs_fixup) = fixup_phinode_values!(compact, stmt.values, reify_new_nodes)
+        return FixedNode(PhiNode(stmt.edges, node), needs_fixup)
     elseif isa(stmt, PhiCNode)
-        return PhiCNode(fixup_phinode_values!(compact, stmt.values))
+        (node, needs_fixup) = fixup_phinode_values!(compact, stmt.values, reify_new_nodes)
+        return FixedNode(PhiCNode(node), needs_fixup)
     elseif isa(stmt, NewSSAValue)
         @assert stmt.id < 0
-        return SSAValue(length(compact.result) - stmt.id)
+        if reify_new_nodes
+            val = SSAValue(length(compact.result) - stmt.id)
+            return FixedNode(val, false)
+        else
+            return FixedNode(stmt, true)
+        end
     elseif isa(stmt, OldSSAValue)
-        val = compact.ssa_rename[stmt.id]
-        if isa(val, SSAValue)
-            # If `val.id` is greater than the length of `compact.result` or
-            # `compact.used_ssas`, this SSA value is in `new_new_nodes`, so
-            # don't count the use
-            compact.used_ssas[val.id] += 1
+        node = compact.ssa_rename[stmt.id]
+        if isa(node, Refined)
+            node = node.val
         end
-        return val
+        needs_fixup = false
+        if isa(node, NewSSAValue)
+            (;node, needs_fixup) = fixup_node(compact, node, reify_new_nodes)
+        end
+        if isa(node, SSAValue)
+            compact.used_ssas[node.id] += 1
+        elseif isa(node, NewSSAValue)
+            compact.new_new_used_ssas[-node.id] += 1
+        end
+        return FixedNode(node, needs_fixup)
     else
         urs = userefs(stmt)
+        fixup = false
         for ur in urs
             val = ur[]
             if isa(val, Union{NewSSAValue, OldSSAValue})
-                ur[] = fixup_node(compact, val)
+                (;node, needs_fixup) = fixup_node(compact, val, reify_new_nodes)
+                fixup |= needs_fixup
+                ur[] = node
             end
         end
-        return urs[]
+        return FixedNode(urs[], fixup)
+    end
+end
+
+function just_fixup!(compact::IncrementalCompact, new_new_nodes_offset::Union{Int, Nothing} = nothing, late_fixup_offset::Union{Int, Nothing} = nothing)
+    if new_new_nodes_offset === late_fixup_offset === nothing # only do this appending in non_dce_finish!
+        resize!(compact.used_ssas, length(compact.result))
+        append!(compact.used_ssas, compact.new_new_used_ssas)
+        empty!(compact.new_new_used_ssas)
+    end
+    off = late_fixup_offset === nothing ? 1 : (late_fixup_offset+1)
+    set_off = off
+    for i in off:length(compact.late_fixup)
+        idx = compact.late_fixup[i]
+        stmt = compact.result[idx][:stmt]
+        (;node, needs_fixup) = fixup_node(compact, stmt, late_fixup_offset === nothing)
+        (stmt === node) || (compact.result[idx][:stmt] = node)
+        if needs_fixup
+            compact.late_fixup[set_off] = idx
+            set_off += 1
+        end
     end
-end
-
-function just_fixup!(compact::IncrementalCompact)
-    resize!(compact.used_ssas, length(compact.result))
-    append!(compact.used_ssas, compact.new_new_used_ssas)
-    empty!(compact.new_new_used_ssas)
-    for idx in compact.late_fixup
-        stmt = compact.result[idx][:inst]
-        new_stmt = fixup_node(compact, stmt)
-        (stmt === new_stmt) || (compact.result[idx][:inst] = new_stmt)
+    if late_fixup_offset !== nothing
+        resize!(compact.late_fixup, set_off-1)
     end
-    for idx in 1:length(compact.new_new_nodes)
-        node = compact.new_new_nodes.stmts[idx]
-        stmt = node[:inst]
-        new_stmt = fixup_node(compact, stmt)
-        if new_stmt !== stmt
-            node[:inst] = new_stmt
+    off = new_new_nodes_offset === nothing ? 1 : (new_new_nodes_offset+1)
+    for idx in off:length(compact.new_new_nodes)
+        new_node = compact.new_new_nodes.stmts[idx]
+        stmt = new_node[:stmt]
+        (;node) = fixup_node(compact, stmt, late_fixup_offset === nothing)
+        if node !== stmt
+            new_node[:stmt] = node
         end
     end
 end
 
-function simple_dce!(compact::IncrementalCompact, callback = null_dce_callback)
+simple_dce!(compact::IncrementalCompact) = simple_dce!(null_dce_callback, compact)
+function simple_dce!(callback::Function, compact::IncrementalCompact)
     # Perform simple DCE for unused values
     @assert isempty(compact.new_new_used_ssas) # just_fixup! wasn't run?
     extra_worklist = Int[]
     for (idx, nused) in Iterators.enumerate(compact.used_ssas)
         nused == 0 || continue
-        maybe_erase_unused!(extra_worklist, compact, idx, false, callback)
+        maybe_erase_unused!(callback, compact, idx, false, extra_worklist)
     end
     while !isempty(extra_worklist)
-        maybe_erase_unused!(extra_worklist, compact, pop!(extra_worklist), true, callback)
+        maybe_erase_unused!(callback, compact, pop!(extra_worklist), true, extra_worklist)
     end
 end
 
@@ -1570,8 +1952,8 @@ function non_dce_finish!(compact::IncrementalCompact)
     result_idx = compact.result_idx
     resize!(compact.result, result_idx - 1)
     just_fixup!(compact)
-    bb = compact.result_bbs[end]
-    compact.result_bbs[end] = BasicBlock(bb,
+    bb = compact.cfg_transform.result_bbs[end]
+    compact.cfg_transform.result_bbs[end] = BasicBlock(bb,
                 StmtRange(first(bb.stmts), result_idx-1))
     compact.renamed_new_nodes = true
     nothing
@@ -1584,11 +1966,24 @@ function finish(compact::IncrementalCompact)
 end
 
 function complete(compact::IncrementalCompact)
-    result_bbs = resize!(compact.result_bbs, compact.active_result_bb-1)
+    result_bbs = resize!(compact.cfg_transform.result_bbs, compact.active_result_bb-1)
     cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)])
-    if __check_ssa_counts__[]
+    if should_check_ssa_counts()
         oracle_check(compact)
     end
+
+    # trim trailing undefined statements due to copy propagation
+    nundef = 0
+    for i in length(compact.result):-1:1
+        if isassigned(compact.result.stmt, i)
+            break
+        end
+        nundef += 1
+    end
+    if nundef > 0
+        resize!(compact.result, length(compact.result) - nundef)
+    end
+
     return IRCode(compact.ir, compact.result, cfg, compact.new_new_nodes)
 end
 
@@ -1614,3 +2009,18 @@ function iterate(x::BBIdxIter, (idx, bb)::Tuple{Int, Int}=(1, 1))
     end
     return (bb, idx), (idx + 1, next_bb)
 end
+
+# Inserters
+
+abstract type Inserter; end
+
+struct InsertHere <: Inserter
+    compact::IncrementalCompact
+end
+(i::InsertHere)(newinst::NewInstruction) = insert_node_here!(i.compact, newinst)
+
+struct InsertBefore{T<:Union{IRCode, IncrementalCompact}} <: Inserter
+    src::T
+    pos::SSAValue
+end
+(i::InsertBefore)(newinst::NewInstruction) = insert_node!(i.src, i.pos, newinst)
diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl
new file mode 100644
index 0000000000000..cc93757a60cbc
--- /dev/null
+++ b/base/compiler/ssair/irinterp.jl
@@ -0,0 +1,456 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+function collect_limitations!(@nospecialize(typ), ::IRInterpretationState)
+    @assert !isa(typ, LimitedAccuracy) "irinterp is unable to handle heavy recursion"
+    return typ
+end
+
+function concrete_eval_invoke(interp::AbstractInterpreter,
+    inst::Expr, mi::MethodInstance, irsv::IRInterpretationState)
+    world = frame_world(irsv)
+    mi_cache = WorldView(code_cache(interp), world)
+    code = get(mi_cache, mi, nothing)
+    code === nothing && return Pair{Any,Tuple{Bool,Bool}}(nothing, (false, false))
+    argtypes = collect_argtypes(interp, inst.args[2:end], nothing, irsv)
+    argtypes === nothing && return Pair{Any,Tuple{Bool,Bool}}(Bottom, (false, false))
+    effects = decode_effects(code.ipo_purity_bits)
+    if (is_foldable(effects) && is_all_const_arg(argtypes, #=start=#1) &&
+        (is_nonoverlayed(interp) || is_nonoverlayed(effects)))
+        args = collect_const_args(argtypes, #=start=#1)
+        value = let world = get_world_counter(interp)
+            try
+                Core._call_in_world_total(world, args...)
+            catch
+                return Pair{Any,Tuple{Bool,Bool}}(Bottom, (false, is_noub(effects)))
+            end
+        end
+        return Pair{Any,Tuple{Bool,Bool}}(Const(value), (true, true))
+    else
+        if is_constprop_edge_recursed(mi, irsv)
+            return Pair{Any,Tuple{Bool,Bool}}(nothing, (is_nothrow(effects), is_noub(effects)))
+        end
+        newirsv = IRInterpretationState(interp, code, mi, argtypes, world)
+        if newirsv !== nothing
+            newirsv.parent = irsv
+            return ir_abstract_constant_propagation(interp, newirsv)
+        end
+        return Pair{Any,Tuple{Bool,Bool}}(nothing, (is_nothrow(effects), is_noub(effects)))
+    end
+end
+
+abstract_eval_ssavalue(s::SSAValue, sv::IRInterpretationState) = abstract_eval_ssavalue(s, sv.ir)
+
+function abstract_eval_phi_stmt(interp::AbstractInterpreter, phi::PhiNode, ::Int, irsv::IRInterpretationState)
+    return abstract_eval_phi(interp, phi, nothing, irsv)
+end
+
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, irsv::IRInterpretationState)
+    si = StmtInfo(true) # TODO better job here?
+    (; rt, exct, effects, info) = abstract_call(interp, arginfo, si, irsv)
+    irsv.ir.stmts[irsv.curridx][:info] = info
+    return RTEffects(rt, exct, effects)
+end
+
+function kill_block!(ir::IRCode, bb::Int)
+    # Kill the entire block
+    stmts = ir.cfg.blocks[bb].stmts
+    for bidx = stmts
+        inst = ir[SSAValue(bidx)]
+        inst[:stmt] = nothing
+        inst[:type] = Bottom
+        inst[:flag] = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW
+    end
+    ir[SSAValue(last(stmts))][:stmt] = ReturnNode()
+    return
+end
+
+function update_phi!(irsv::IRInterpretationState, from::Int, to::Int)
+    ir = irsv.ir
+    if length(ir.cfg.blocks[to].preds) == 0
+        kill_block!(ir, to)
+    end
+    for sidx = ir.cfg.blocks[to].stmts
+        stmt = ir[SSAValue(sidx)][:stmt]
+        isa(stmt, Nothing) && continue # allowed between `PhiNode`s
+        isa(stmt, PhiNode) || break
+        for (eidx, edge) in enumerate(stmt.edges)
+            if edge == from
+                deleteat!(stmt.edges, eidx)
+                deleteat!(stmt.values, eidx)
+                push!(irsv.ssa_refined, sidx)
+                break
+            end
+        end
+    end
+end
+update_phi!(irsv::IRInterpretationState) = (from::Int, to::Int)->update_phi!(irsv, from, to)
+
+function kill_terminator_edges!(irsv::IRInterpretationState, term_idx::Int, bb::Int=block_for_inst(irsv.ir, term_idx))
+    ir = irsv.ir
+    stmt = ir[SSAValue(term_idx)][:stmt]
+    if isa(stmt, GotoIfNot)
+        kill_edge!(irsv, bb, stmt.dest)
+        kill_edge!(irsv, bb, bb+1)
+    elseif isa(stmt, GotoNode)
+        kill_edge!(irsv, bb, stmt.label)
+    elseif isa(stmt, ReturnNode)
+        # Nothing to do
+    else
+        @assert !isa(stmt, EnterNode)
+        kill_edge!(irsv, bb, bb+1)
+    end
+end
+
+function kill_edge!(irsv::IRInterpretationState, from::Int, to::Int)
+    ir = irsv.ir
+    kill_edge!(ir, from, to, update_phi!(irsv))
+
+    lazydomtree = irsv.lazydomtree
+    domtree = nothing
+    if isdefined(lazydomtree, :domtree)
+        domtree = get!(lazydomtree)
+        domtree_delete_edge!(domtree, ir.cfg.blocks, from, to)
+    elseif length(ir.cfg.blocks[to].preds) != 0
+        # TODO: If we're not maintaining the domtree, computing it just for this
+        # is slightly overkill - just the dfs tree would be enough.
+        domtree = get!(lazydomtree)
+    end
+
+    if domtree !== nothing && bb_unreachable(domtree, to)
+        kill_block!(ir, to)
+        for edge in ir.cfg.blocks[to].succs
+            kill_edge!(irsv, to, edge)
+        end
+    end
+end
+
+function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction, idx::Int,
+                                bb::Union{Int,Nothing}, irsv::IRInterpretationState)
+    ir = irsv.ir
+    stmt = inst[:stmt]
+    if isa(stmt, GotoIfNot)
+        cond = stmt.cond
+        condval = maybe_extract_const_bool(argextype(cond, ir))
+        if condval isa Bool
+            if isa(cond, SSAValue)
+                kill_def_use!(irsv.tpdum, cond, idx)
+            end
+            if bb === nothing
+                bb = block_for_inst(ir, idx)
+            end
+            add_flag!(inst, IR_FLAG_NOTHROW)
+            if condval
+                inst[:stmt] = nothing
+                inst[:type] = Any
+                kill_edge!(irsv, bb, stmt.dest)
+            else
+                inst[:stmt] = GotoNode(stmt.dest)
+                kill_edge!(irsv, bb, bb+1)
+            end
+            return true
+        end
+        return false
+    end
+    rt = nothing
+    if isa(stmt, Expr)
+        head = stmt.head
+        if head === :call || head === :foreigncall || head === :new || head === :splatnew || head === :static_parameter || head === :isdefined || head === :boundscheck
+            (; rt, effects) = abstract_eval_statement_expr(interp, stmt, nothing, irsv)
+            add_flag!(inst, flags_for_effects(effects))
+        elseif head === :invoke
+            rt, (nothrow, noub) = concrete_eval_invoke(interp, stmt, stmt.args[1]::MethodInstance, irsv)
+            if nothrow
+                add_flag!(inst, IR_FLAG_NOTHROW)
+            end
+            if noub
+                add_flag!(inst, IR_FLAG_NOUB)
+            end
+        elseif head === :throw_undef_if_not
+            condval = maybe_extract_const_bool(argextype(stmt.args[2], ir))
+            condval isa Bool || return false
+            if condval
+                inst[:stmt] = nothing
+                # We simplified the IR, but we did not update the type
+                return false
+            end
+            rt = Union{}
+        elseif head === :gc_preserve_begin ||
+               head === :gc_preserve_end
+            return false
+        elseif head === :leave
+            return false
+        else
+            error("reprocess_instruction!: unhandled expression found")
+        end
+    elseif isa(stmt, PhiNode)
+        rt = abstract_eval_phi_stmt(interp, stmt, idx, irsv)
+    elseif isa(stmt, ReturnNode)
+        # Handled at the very end
+        return false
+    elseif isa(stmt, PiNode)
+        rt = tmeet(typeinf_lattice(interp), argextype(stmt.val, ir), widenconst(stmt.typ))
+    elseif stmt === nothing
+        return false
+    elseif isa(stmt, GlobalRef)
+        # GlobalRef is not refinable
+    else
+        rt = argextype(stmt, irsv.ir)
+    end
+    if rt !== nothing
+        if isa(rt, Const)
+            inst[:type] = rt
+            if is_inlineable_constant(rt.val) && has_flag(inst, (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW))
+                inst[:stmt] = quoted(rt.val)
+            end
+            return true
+        elseif !⊑(typeinf_lattice(interp), inst[:type], rt)
+            inst[:type] = rt
+            return true
+        end
+    end
+    return false
+end
+
+# Process the terminator and add the successor to `bb_ip`. Returns whether a backedge was seen.
+function process_terminator!(@nospecialize(stmt), bb::Int, bb_ip::BitSetBoundedMinPrioritySet)
+    if isa(stmt, ReturnNode)
+        return false
+    elseif isa(stmt, GotoNode)
+        backedge = stmt.label <= bb
+        backedge || push!(bb_ip, stmt.label)
+        return backedge
+    elseif isa(stmt, GotoIfNot)
+        backedge = stmt.dest <= bb
+        backedge || push!(bb_ip, stmt.dest)
+        push!(bb_ip, bb+1)
+        return backedge
+    elseif isa(stmt, EnterNode)
+        dest = stmt.catch_dest
+        @assert dest > bb
+        push!(bb_ip, dest)
+        push!(bb_ip, bb+1)
+        return false
+    else
+        push!(bb_ip, bb+1)
+        return false
+    end
+end
+
+struct BBScanner
+    ir::IRCode
+    bb_ip::BitSetBoundedMinPrioritySet
+end
+
+function BBScanner(ir::IRCode)
+    bbs = ir.cfg.blocks
+    bb_ip = BitSetBoundedMinPrioritySet(length(bbs))
+    push!(bb_ip, 1)
+    return BBScanner(ir, bb_ip)
+end
+
+function scan!(callback, scanner::BBScanner, forwards_only::Bool)
+    (; bb_ip, ir) = scanner
+    bbs = ir.cfg.blocks
+    while !isempty(bb_ip)
+        bb = popfirst!(bb_ip)
+        stmts = bbs[bb].stmts
+        lstmt = last(stmts)
+        for idx = stmts
+            inst = ir[SSAValue(idx)]
+            ret = callback(inst, lstmt, bb)
+            ret === nothing && return true
+            ret::Bool || break
+            idx == lstmt && process_terminator!(inst[:stmt], bb, bb_ip) && forwards_only && return false
+        end
+    end
+    return true
+end
+
+function populate_def_use_map!(tpdum::TwoPhaseDefUseMap, scanner::BBScanner)
+    scan!(scanner, false) do inst::Instruction, lstmt::Int, bb::Int
+        for ur in userefs(inst)
+            val = ur[]
+            if isa(val, SSAValue)
+                push!(tpdum[val.id], inst.idx)
+            end
+        end
+        return true
+    end
+end
+populate_def_use_map!(tpdum::TwoPhaseDefUseMap, ir::IRCode) =
+    populate_def_use_map!(tpdum, BBScanner(ir))
+
+function is_all_const_call(@nospecialize(stmt), interp::AbstractInterpreter, irsv::IRInterpretationState)
+    isexpr(stmt, :call) || return false
+    @inbounds for i = 2:length(stmt.args)
+        argtype = abstract_eval_value(interp, stmt.args[i], nothing, irsv)
+        is_const_argtype(argtype) || return false
+    end
+    return true
+end
+
+function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
+        externally_refined::Union{Nothing,BitSet} = nothing)
+    (; ir, tpdum, ssa_refined) = irsv
+
+    @assert isempty(ir.new_nodes) "IRCode should be compacted before irinterp"
+
+    all_rets = Int[]
+    scanner = BBScanner(ir)
+
+    check_ret!(@nospecialize(stmt), idx::Int) = isa(stmt, ReturnNode) && isdefined(stmt, :val) && push!(all_rets, idx)
+
+    # Fast path: Scan both use counts and refinement in one single pass of
+    #            of the instructions. In the absence of backedges, this will
+    #            converge.
+    completed_scan = scan!(scanner, true) do inst::Instruction, lstmt::Int, bb::Int
+        idx = inst.idx
+        irsv.curridx = idx
+        stmt = inst[:stmt]
+        typ = inst[:type]
+        flag = inst[:flag]
+        any_refined = false
+        if has_flag(flag, IR_FLAG_REFINED)
+            any_refined = true
+            sub_flag!(inst, IR_FLAG_REFINED)
+        elseif is_all_const_call(stmt, interp, irsv)
+            # force reinference on calls with all constant arguments
+            any_refined = true
+        end
+        for ur in userefs(stmt)
+            val = ur[]
+            if isa(val, Argument)
+                any_refined |= irsv.argtypes_refined[val.n]
+            elseif isa(val, SSAValue)
+                any_refined |= val.id in ssa_refined
+                count!(tpdum, val)
+            end
+        end
+        if isa(stmt, PhiNode) && idx in ssa_refined
+            any_refined = true
+            delete!(ssa_refined, idx)
+        end
+        check_ret!(stmt, idx)
+        is_terminator_or_phi = (isa(stmt, PhiNode) || isterminator(stmt))
+        if typ === Bottom && !(idx == lstmt && is_terminator_or_phi)
+            return true
+        end
+        if (any_refined && reprocess_instruction!(interp, inst, idx, bb, irsv)) ||
+            (externally_refined !== nothing && idx in externally_refined)
+            push!(ssa_refined, idx)
+            stmt = inst[:stmt]
+            typ = inst[:type]
+        end
+        if typ === Bottom && !is_terminator_or_phi
+            kill_terminator_edges!(irsv, lstmt, bb)
+            if idx != lstmt
+                for idx2 in (idx+1:lstmt-1)
+                    ir[SSAValue(idx2)] = nothing
+                end
+                ir[SSAValue(lstmt)][:stmt] = ReturnNode()
+            end
+            return false
+        end
+        return true
+    end
+
+    if !completed_scan
+        # Slow path
+        stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts))
+
+        # Slow Path Phase 1.A: Complete use scanning
+        scan!(scanner, false) do inst::Instruction, lstmt::Int, bb::Int
+            idx = inst.idx
+            irsv.curridx = idx
+            stmt = inst[:stmt]
+            flag = inst[:flag]
+            if has_flag(flag, IR_FLAG_REFINED)
+                sub_flag!(inst, IR_FLAG_REFINED)
+                push!(stmt_ip, idx)
+            end
+            check_ret!(stmt, idx)
+            for ur in userefs(stmt)
+                val = ur[]
+                if isa(val, Argument)
+                    if irsv.argtypes_refined[val.n]
+                        push!(stmt_ip, idx)
+                    end
+                elseif isa(val, SSAValue)
+                    count!(tpdum, val)
+                end
+            end
+            return true
+        end
+
+        # Slow Path Phase 1.B: Assemble def-use map
+        complete!(tpdum); push!(scanner.bb_ip, 1)
+        populate_def_use_map!(tpdum, scanner)
+
+        # Slow Path Phase 2: Use def-use map to converge cycles.
+        # TODO: It would be possible to return to the fast path after converging
+        #       each cycle, but that's somewhat complicated.
+        for val in ssa_refined
+            for use in tpdum[val]
+                if !(use in ssa_refined)
+                    push!(stmt_ip, use)
+                end
+            end
+        end
+        while !isempty(stmt_ip)
+            idx = popfirst!(stmt_ip)
+            irsv.curridx = idx
+            inst = ir[SSAValue(idx)]
+            if reprocess_instruction!(interp, inst, idx, nothing, irsv)
+                append!(stmt_ip, tpdum[idx])
+            end
+        end
+    end
+
+    ultimate_rt = Bottom
+    for idx in all_rets
+        bb = block_for_inst(ir.cfg, idx)
+        if bb != 1 && length(ir.cfg.blocks[bb].preds) == 0
+            # Could have discovered this block is dead after the initial scan
+            continue
+        end
+        inst = ir[SSAValue(idx)][:stmt]::ReturnNode
+        rt = argextype(inst.val, ir)
+        ultimate_rt = tmerge(typeinf_lattice(interp), ultimate_rt, rt)
+    end
+
+    nothrow = noub = true
+    for idx = 1:length(ir.stmts)
+        if ir[SSAValue(idx)][:stmt] === nothing
+            # skip `nothing` statement, which might be inserted as a dummy node,
+            # e.g. by `finish_current_bb!` without explicitly marking it as `:nothrow`
+            continue
+        end
+        flag = ir[SSAValue(idx)][:flag]
+        nothrow &= has_flag(flag, IR_FLAG_NOTHROW)
+        noub &= has_flag(flag, IR_FLAG_NOUB)
+        (nothrow | noub) || break
+    end
+
+    if last(irsv.valid_worlds) >= get_world_counter()
+        # if we aren't cached, we don't need this edge
+        # but our caller might, so let's just make it anyways
+        store_backedges(frame_instance(irsv), irsv.edges)
+    end
+
+    return Pair{Any,Tuple{Bool,Bool}}(maybe_singleton_const(ultimate_rt), (nothrow, noub))
+end
+
+function ir_abstract_constant_propagation(interp::NativeInterpreter, irsv::IRInterpretationState)
+    if __measure_typeinf__[]
+        inf_frame = Timings.InferenceFrameInfo(irsv.mi, irsv.world, VarState[], Any[], length(irsv.ir.argtypes))
+        Timings.enter_new_timer(inf_frame)
+        ret = _ir_abstract_constant_propagation(interp, irsv)
+        append!(inf_frame.slottypes, irsv.ir.argtypes)
+        Timings.exit_current_timer(inf_frame)
+        return ret
+    else
+        return _ir_abstract_constant_propagation(interp, irsv)
+    end
+end
+ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState) =
+    _ir_abstract_constant_propagation(interp, irsv)
diff --git a/base/compiler/ssair/legacy.jl b/base/compiler/ssair/legacy.jl
index b56a5fb4a6a00..3e9a4e2a746dc 100644
--- a/base/compiler/ssair/legacy.jl
+++ b/base/compiler/ssair/legacy.jl
@@ -2,7 +2,7 @@
 
 """
     inflate_ir!(ci::CodeInfo, linfo::MethodInstance) -> ir::IRCode
-    inflate_ir!(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any}) -> ir::IRCode
+    inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
 
 Inflates `ci::CodeInfo`-IR to `ir::IRCode`-format.
 This should be used with caution as it is a in-place transformation where the fields of
@@ -10,14 +10,10 @@ the original `ci::CodeInfo` are modified.
 """
 function inflate_ir!(ci::CodeInfo, linfo::MethodInstance)
     sptypes = sptypes_from_meth_instance(linfo)
-    if ci.inferred
-        argtypes, _ = matching_cache_argtypes(linfo, nothing)
-    else
-        argtypes = Any[ Any for i = 1:length(ci.slotflags) ]
-    end
+    argtypes, _ = matching_cache_argtypes(fallback_lattice, linfo)
     return inflate_ir!(ci, sptypes, argtypes)
 end
-function inflate_ir!(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any})
+function inflate_ir!(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any})
     code = ci.code
     cfg = compute_basic_blocks(code)
     for i = 1:length(code)
@@ -29,9 +25,8 @@ function inflate_ir!(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any})
             code[i] = GotoIfNot(stmt.cond, block_for_inst(cfg, stmt.dest))
         elseif isa(stmt, PhiNode)
             code[i] = PhiNode(Int32[block_for_inst(cfg, Int(edge)) for edge in stmt.edges], stmt.values)
-        elseif isexpr(stmt, :enter)
-            stmt.args[1] = block_for_inst(cfg, stmt.args[1]::Int)
-            code[i] = stmt
+        elseif isa(stmt, EnterNode)
+            code[i] = EnterNode(stmt, stmt.catch_dest == 0 ? 0 : block_for_inst(cfg, stmt.catch_dest))
         end
     end
     nstmts = length(code)
@@ -39,7 +34,7 @@ function inflate_ir!(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any})
     if !isa(ssavaluetypes, Vector{Any})
         ssavaluetypes = Any[ Any for i = 1:ssavaluetypes::Int ]
     end
-    info = Any[nothing for i = 1:nstmts]
+    info = CallInfo[NoCallInfo() for i = 1:nstmts]
     stmts = InstructionStream(code, ssavaluetypes, info, ci.codelocs, ci.ssaflags)
     linetable = ci.linetable
     if !isa(linetable, Vector{LineInfoNode})
@@ -51,22 +46,32 @@ end
 
 """
     inflate_ir(ci::CodeInfo, linfo::MethodInstance) -> ir::IRCode
-    inflate_ir(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any}) -> ir::IRCode
+    inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) -> ir::IRCode
     inflate_ir(ci::CodeInfo) -> ir::IRCode
 
 Non-destructive version of `inflate_ir!`.
 Mainly used for testing or interactive use.
 """
 inflate_ir(ci::CodeInfo, linfo::MethodInstance) = inflate_ir!(copy(ci), linfo)
-inflate_ir(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any}) = inflate_ir!(copy(ci), sptypes, argtypes)
-inflate_ir(ci::CodeInfo) = inflate_ir(ci, Any[], Any[ Any for i = 1:length(ci.slotflags) ])
+inflate_ir(ci::CodeInfo, sptypes::Vector{VarState}, argtypes::Vector{Any}) = inflate_ir!(copy(ci), sptypes, argtypes)
+function inflate_ir(ci::CodeInfo)
+    parent = ci.parent
+    isa(parent, MethodInstance) && return inflate_ir(ci, parent)
+    # XXX the length of `ci.slotflags` may be different from the actual number of call
+    # arguments, but we really don't know that information in this case
+    argtypes = Any[ Any for i = 1:length(ci.slotflags) ]
+    return inflate_ir(ci, VarState[], argtypes)
+end
 
-function replace_code_newstyle!(ci::CodeInfo, ir::IRCode, nargs::Int)
+function replace_code_newstyle!(ci::CodeInfo, ir::IRCode)
     @assert isempty(ir.new_nodes)
     # All but the first `nargs` slots will now be unused
+    nargs = length(ir.argtypes)
+    resize!(ci.slotnames, nargs)
     resize!(ci.slotflags, nargs)
+    resize!(ci.slottypes, nargs)
     stmts = ir.stmts
-    code = ci.code = stmts.inst
+    code = ci.code = stmts.stmt
     ssavaluetypes = ci.ssavaluetypes = stmts.type
     codelocs = ci.codelocs = stmts.line
     ssaflags = ci.ssaflags = stmts.flag
@@ -86,10 +91,9 @@ function replace_code_newstyle!(ci::CodeInfo, ir::IRCode, nargs::Int)
         elseif isa(stmt, GotoIfNot)
             code[i] = GotoIfNot(stmt.cond, first(ir.cfg.blocks[stmt.dest].stmts))
         elseif isa(stmt, PhiNode)
-            code[i] = PhiNode(Int32[last(ir.cfg.blocks[edge].stmts) for edge in stmt.edges], stmt.values)
-        elseif isexpr(stmt, :enter)
-            stmt.args[1] = first(ir.cfg.blocks[stmt.args[1]::Int].stmts)
-            code[i] = stmt
+            code[i] = PhiNode(Int32[edge == 0 ? 0 : last(ir.cfg.blocks[edge].stmts) for edge in stmt.edges], stmt.values)
+        elseif isa(stmt, EnterNode)
+            code[i] = EnterNode(stmt, stmt.catch_dest == 0 ? 0 : first(ir.cfg.blocks[stmt.catch_dest].stmts))
         end
     end
 end
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index 047d4577cc7bc..754d0a6cefd8b 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -6,6 +6,13 @@ function is_known_call(@nospecialize(x), @nospecialize(func), ir::Union{IRCode,I
     return singleton_type(ft) === func
 end
 
+function is_known_invoke_or_call(@nospecialize(x), @nospecialize(func), ir::Union{IRCode,IncrementalCompact})
+    isinvoke = isexpr(x, :invoke)
+    (isinvoke || isexpr(x, :call)) || return false
+    ft = argextype(x.args[isinvoke ? 2 : 1], ir)
+    return singleton_type(ft) === func
+end
+
 struct SSAUse
     kind::Symbol
     idx::Int
@@ -14,7 +21,7 @@ GetfieldUse(idx::Int)  = SSAUse(:getfield, idx)
 PreserveUse(idx::Int)  = SSAUse(:preserve, idx)
 NoPreserve()           = SSAUse(:nopreserve, 0)
 IsdefinedUse(idx::Int) = SSAUse(:isdefined, idx)
-AddFinalizerUse(idx::Int) = SSAUse(:add_finalizer, idx)
+FinalizerUse(idx::Int) = SSAUse(:finalizer, idx)
 
 """
     du::SSADefUse
@@ -43,13 +50,9 @@ function compute_live_ins(cfg::CFG, du::SSADefUse)
         use.kind === :isdefined && continue # filter out `isdefined` usages
         push!(uses, use.idx)
     end
-    compute_live_ins(cfg, du.defs, uses)
+    compute_live_ins(cfg, sort!(du.defs), uses)
 end
 
-# assume `stmt == getfield(obj, field, ...)` or `stmt == setfield!(obj, field, val, ...)`
-try_compute_field_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::Expr) =
-    try_compute_field(ir, stmt.args[3])
-
 function try_compute_field(ir::Union{IncrementalCompact,IRCode}, @nospecialize(field))
     # fields are usually literals, handle them manually
     if isa(field, QuoteNode)
@@ -67,32 +70,32 @@ function try_compute_field(ir::Union{IncrementalCompact,IRCode}, @nospecialize(f
     return isa(field, Union{Int, Symbol}) ? field : nothing
 end
 
-function try_compute_fieldidx_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::Expr, typ::DataType)
-    field = try_compute_field_stmt(ir, stmt)
+# assume `stmt` is a call of `getfield`/`setfield!`/`isdefined`
+function try_compute_fieldidx_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::Expr, @nospecialize(typ))
+    field = try_compute_field(ir, stmt.args[3])
     return try_compute_fieldidx(typ, field)
 end
 
-function find_curblock(domtree::DomTree, allblocks::Vector{Int}, curblock::Int)
+function find_curblock(domtree::DomTree, allblocks::BitSet, curblock::Int)
     # TODO: This can be much faster by looking at current level and only
     # searching for those blocks in a sorted order
-    while !(curblock in allblocks)
+    while !(curblock in allblocks) && curblock !== 0
         curblock = domtree.idoms_bb[curblock]
     end
     return curblock
 end
 
 function val_for_def_expr(ir::IRCode, def::Int, fidx::Int)
-    ex = ir[SSAValue(def)][:inst]
+    ex = ir[SSAValue(def)][:stmt]
     if isexpr(ex, :new)
         return ex.args[1+fidx]
     else
-        @assert isa(ex, Expr)
-        # The use is whatever the setfield was
+        @assert is_known_call(ex, setfield!, ir)
         return ex.args[4]
     end
 end
 
-function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int)
+function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::BitSet, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int)
     curblock = find_curblock(domtree, allblocks, curblock)
     def = 0
     for stmt in du.defs
@@ -103,7 +106,7 @@ function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector
     def == 0 ? phinodes[curblock] : val_for_def_expr(ir, def, fidx)
 end
 
-function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int},
+function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::BitSet,
     du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use::Int)
     def, useblock, curblock = find_def_for_use(ir, domtree, allblocks, du, use)
     if def == 0
@@ -122,7 +125,7 @@ end
 # even when the allocation contains an uninitialized field, we try an extra effort to check
 # if this load at `idx` have any "safe" `setfield!` calls that define the field
 function has_safe_def(
-    ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse,
+    ir::IRCode, domtree::DomTree, allblocks::BitSet, du::SSADefUse,
     newidx::Int, idx::Int)
     def, _, _ = find_def_for_use(ir, domtree, allblocks, du, idx)
     # will throw since we already checked this `:new` site doesn't define this field
@@ -140,7 +143,7 @@ function has_safe_def(
         # if this block has already been examined, bail out to avoid infinite cycles
         pred in seen && return false
         idx = last(ir.cfg.blocks[pred].stmts)
-        # NOTE `idx` isn't a load, thus we can use inclusive coondition within the `find_def_for_use`
+        # NOTE `idx` isn't a load, thus we can use inclusive condition within the `find_def_for_use`
         def, _, _ = find_def_for_use(ir, domtree, allblocks, du, idx, true)
         # will throw since we already checked this `:new` site doesn't define this field
         def == newidx && return false
@@ -157,7 +160,7 @@ end
 
 # find the first dominating def for the given use
 function find_def_for_use(
-    ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, use::Int, inclusive::Bool=false)
+    ir::IRCode, domtree::DomTree, allblocks::BitSet, du::SSADefUse, use::Int, inclusive::Bool=false)
     useblock = block_for_inst(ir.cfg, use)
     curblock = find_curblock(domtree, allblocks, useblock)
     local def = 0
@@ -179,11 +182,12 @@ function find_def_for_use(
     return def, useblock, curblock
 end
 
-function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospecialize(typeconstraint))
+function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospecialize(typeconstraint), 𝕃ₒ::AbstractLattice,
+                        predecessors = ((@nospecialize(def), compact::IncrementalCompact) -> isa(def, PhiNode) ? def.values : nothing))
     if isa(val, Union{OldSSAValue, SSAValue})
         val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint)
     end
-    return walk_to_defs(compact, val, typeconstraint)
+    return walk_to_defs(compact, val, typeconstraint, predecessors, 𝕃ₒ)
 end
 
 function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#),
@@ -192,6 +196,9 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
         if isa(defssa, OldSSAValue)
             if already_inserted(compact, defssa)
                 rename = compact.ssa_rename[defssa.id]
+                if isa(rename, Refined)
+                    rename = rename.val
+                end
                 if isa(rename, AnySSAValue)
                     defssa = rename
                     continue
@@ -199,7 +206,7 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
                 return rename
             end
         end
-        def = compact[defssa][:inst]
+        def = compact[defssa][:stmt]
         if isa(def, PiNode)
             if callback(def, defssa)
                 return defssa
@@ -238,16 +245,21 @@ function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defss
 end
 
 """
-    walk_to_defs(compact, val, typeconstraint)
+    walk_to_defs(compact, val, typeconstraint, predecessors)
 
 Starting at `val` walk use-def chains to get all the leaves feeding into this `val`
-(pruning those leaves rules out by path conditions).
+(pruning those leaves ruled out by path conditions).
+
+`predecessors(def, compact)` is a callback which should return the set of possible
+predecessors for a "phi-like" node (PhiNode or Core.ifelse) or `nothing` otherwise.
 """
-function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint))
-    visited_phinodes = AnySSAValue[]
-    isa(defssa, AnySSAValue) || return Any[defssa], visited_phinodes
-    def = compact[defssa][:inst]
-    isa(def, PhiNode) || return Any[defssa], visited_phinodes
+function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), predecessors, 𝕃ₒ::AbstractLattice)
+    visited_philikes = AnySSAValue[]
+    isa(defssa, AnySSAValue) || return Any[defssa], visited_philikes
+    def = compact[defssa][:stmt]
+    if predecessors(def, compact) === nothing
+        return Any[defssa], visited_philikes
+    end
     visited_constraints = IdDict{AnySSAValue, Any}()
     worklist_defs = AnySSAValue[]
     worklist_constraints = Any[]
@@ -258,13 +270,17 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
         defssa = pop!(worklist_defs)
         typeconstraint = pop!(worklist_constraints)
         visited_constraints[defssa] = typeconstraint
-        def = compact[defssa][:inst]
-        if isa(def, PhiNode)
-            push!(visited_phinodes, defssa)
+        def = compact[defssa][:stmt]
+        values = predecessors(def, compact)
+        if values !== nothing
+            if isa(def, PhiNode) || length(values) > 1
+                push!(visited_philikes, defssa)
+            end
             possible_predecessors = Int[]
-            for n in 1:length(def.edges)
-                isassigned(def.values, n) || continue
-                val = def.values[n]
+
+            for n in 1:length(values)
+                isassigned(values, n) || continue
+                val = values[n]
                 if is_old(compact, defssa) && isa(val, SSAValue)
                     val = OldSSAValue(val.id)
                 end
@@ -273,8 +289,7 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
                 push!(possible_predecessors, n)
             end
             for n in possible_predecessors
-                pred = def.edges[n]
-                val = def.values[n]
+                val = values[n]
                 if is_old(compact, defssa) && isa(val, SSAValue)
                     val = OldSSAValue(val.id)
                 end
@@ -289,7 +304,7 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
                             # path, with a different type constraint. We may have
                             # to redo some work here with the wider typeconstraint
                             push!(worklist_defs, new_def)
-                            push!(worklist_constraints, tmerge(new_constraint, visited_constraints[new_def]))
+                            push!(worklist_constraints, tmerge(𝕃ₒ, new_constraint, visited_constraints[new_def]))
                         end
                         continue
                     end
@@ -309,7 +324,7 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
             push!(leaves, defssa)
         end
     end
-    return leaves, visited_phinodes
+    return leaves, visited_philikes
 end
 
 function record_immutable_preserve!(new_preserves::Vector{Any}, def::Expr, compact::IncrementalCompact)
@@ -329,7 +344,7 @@ function already_inserted(compact::IncrementalCompact, old::OldSSAValue)
     end
     id -= length(compact.ir.stmts)
     if id < length(compact.ir.new_nodes)
-        error("")
+        return already_inserted(compact, OldSSAValue(compact.ir.new_nodes.info[id].pos))
     end
     id -= length(compact.ir.new_nodes)
     @assert id <= length(compact.pending_nodes)
@@ -340,7 +355,7 @@ function is_pending(compact::IncrementalCompact, old::OldSSAValue)
     return old.id > length(compact.ir.stmts) + length(compact.ir.new_nodes)
 end
 
-function is_getfield_captures(@nospecialize(def), compact::IncrementalCompact)
+function is_getfield_captures(@nospecialize(def), compact::IncrementalCompact, 𝕃ₒ::AbstractLattice)
     isa(def, Expr) || return false
     length(def.args) >= 3 || return false
     is_known_call(def, getfield, compact) || return false
@@ -348,19 +363,20 @@ function is_getfield_captures(@nospecialize(def), compact::IncrementalCompact)
     isa(which, Const) || return false
     which.val === :captures || return false
     oc = argextype(def.args[2], compact)
-    return oc ⊑ Core.OpaqueClosure
+    return ⊑(𝕃ₒ, oc, Core.OpaqueClosure)
 end
 
 struct LiftedValue
-    x
-    LiftedValue(@nospecialize x) = new(x)
+    val
+    LiftedValue(@nospecialize val) = new(val)
 end
 const LiftedLeaves = IdDict{Any, Union{Nothing,LiftedValue}}
+const LiftedDefs = IdDict{Any, Bool}
 
 # try to compute lifted values that can replace `getfield(x, field)` call
 # where `x` is an immutable struct that are defined at any of `leaves`
-function lift_leaves(compact::IncrementalCompact,
-                     @nospecialize(result_t), field::Int, leaves::Vector{Any})
+function lift_leaves(compact::IncrementalCompact, field::Int,
+                     leaves::Vector{Any}, 𝕃ₒ::AbstractLattice)
     # For every leaf, the lifted value
     lifted_leaves = LiftedLeaves()
     maybe_undef = false
@@ -375,7 +391,9 @@ function lift_leaves(compact::IncrementalCompact,
             elseif isexpr(def, :new)
                 typ = unwrap_unionall(widenconst(types(compact)[leaf]))
                 (isa(typ, DataType) && !isabstracttype(typ)) || return nothing
-                @assert !ismutabletype(typ)
+                if ismutabletype(typ)
+                    isconst(typ, field) || return nothing
+                end
                 if length(def.args) < 1+field
                     if field > fieldcount(typ)
                         return nothing
@@ -389,19 +407,20 @@ function lift_leaves(compact::IncrementalCompact,
                         continue
                     end
                     return nothing
-                    # Expand the Expr(:new) to include it's element Expr(:new) nodes up until the one we want
-                    compact[leaf] = nothing
-                    for i = (length(def.args) + 1):(1+field)
-                        ftyp = fieldtype(typ, i - 1)
-                        isbitstype(ftyp) || return nothing
-                        ninst = effect_free(NewInstruction(Expr(:new, ftyp), result_t))
-                        push!(def.args, insert_node!(compact, leaf, ninst))
-                    end
-                    compact[leaf] = def
                 end
                 lift_arg!(compact, leaf, cache_key, def, 1+field, lifted_leaves)
                 continue
-            elseif is_getfield_captures(def, compact)
+            # NOTE we can enable this, but most `:splatnew` expressions are transformed into
+            #      `:new` expressions by the inlinear
+            # elseif isexpr(def, :splatnew) && length(def.args) == 2 && isa(def.args[2], AnySSAValue)
+            #     tplssa = def.args[2]::AnySSAValue
+            #     tplexpr = compact[tplssa][:stmt]
+            #     if is_known_call(tplexpr, tuple, compact) && 1 ≤ field < length(tplexpr.args)
+            #         lift_arg!(compact, tplssa, cache_key, tplexpr, 1+field, lifted_leaves)
+            #         continue
+            #     end
+            #     return nothing
+            elseif is_getfield_captures(def, compact, 𝕃ₒ)
                 # Walk to new_opaque_closure
                 ocleaf = def.args[2]
                 if isa(ocleaf, AnySSAValue)
@@ -456,45 +475,45 @@ function lift_arg!(
     if is_old(compact, leaf) && isa(lifted, SSAValue)
         lifted = OldSSAValue(lifted.id)
         if already_inserted(compact, lifted)
-            lifted = compact.ssa_rename[lifted.id]
-        end
-    end
-    if isa(lifted, GlobalRef) || isa(lifted, Expr)
-        lifted = insert_node!(compact, leaf, effect_free(NewInstruction(lifted, argextype(lifted, compact))))
-        compact[leaf] = nothing
-        stmt.args[argidx] = lifted
-        compact[leaf] = stmt
-        if isa(leaf, SSAValue) && leaf.id < compact.result_idx
-            push!(compact.late_fixup, leaf.id)
+            new_lifted = compact.ssa_rename[lifted.id]
+            if isa(new_lifted, Refined)
+                new_lifted = new_lifted.val
+            end
+            # Special case: If lifted happens to be the statement we're currently processing,
+            # leave it as old SSAValue in case we decide to handle this in the renamer
+            if !isa(new_lifted, SSAValue) || new_lifted != SSAValue(compact.result_idx-1)
+                lifted = new_lifted
+            end
         end
     end
     lifted_leaves[cache_key] = LiftedValue(lifted)
-    nothing
+    return nothing
 end
 
 function walk_to_def(compact::IncrementalCompact, @nospecialize(leaf))
     if isa(leaf, OldSSAValue) && already_inserted(compact, leaf)
         leaf = compact.ssa_rename[leaf.id]
+        if isa(leaf, Refined)
+            leaf = leaf.val
+        end
         if isa(leaf, AnySSAValue)
             leaf = simple_walk(compact, leaf)
         end
         if isa(leaf, AnySSAValue)
-            def = compact[leaf][:inst]
+            def = compact[leaf][:stmt]
         else
             def = leaf
         end
     elseif isa(leaf, AnySSAValue)
-        def = compact[leaf][:inst]
+        def = compact[leaf][:stmt]
     else
         def = leaf
     end
     return Pair{Any, Any}(def, leaf)
 end
 
-make_MaybeUndef(@nospecialize(typ)) = isa(typ, MaybeUndef) ? typ : MaybeUndef(typ)
-
 """
-    lift_comparison!(cmp, compact::IncrementalCompact, idx::Int, stmt::Expr)
+    lift_comparison!(cmp, compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
 
 Replaces `cmp(φ(x, y)::Union{X,Y}, constant)` by `φ(cmp(x, constant), cmp(y, constant))`,
 where `cmp(x, constant)` and `cmp(y, constant)` can be replaced with constant `Bool`eans.
@@ -509,7 +528,7 @@ end
 function lift_comparison! end
 
 function lift_comparison!(::typeof(===), compact::IncrementalCompact,
-    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue})
+    idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
     args = stmt.args
     length(args) == 3 || return
     lhs, rhs = args[2], args[3]
@@ -525,44 +544,51 @@ function lift_comparison!(::typeof(===), compact::IncrementalCompact,
     else
         return
     end
-    lift_comparison_leaves!(egal_tfunc, compact, val, cmp, lifting_cache, idx)
+    lift_comparison_leaves!(egal_tfunc, compact, val, cmp, idx, 𝕃ₒ)
 end
 
 function lift_comparison!(::typeof(isa), compact::IncrementalCompact,
-    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue})
+    idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
     args = stmt.args
     length(args) == 3 || return
     cmp = argextype(args[3], compact)
     val = args[2]
-    lift_comparison_leaves!(isa_tfunc, compact, val, cmp, lifting_cache, idx)
+    lift_comparison_leaves!(isa_tfunc, compact, val, cmp, idx, 𝕃ₒ)
 end
 
 function lift_comparison!(::typeof(isdefined), compact::IncrementalCompact,
-    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue})
+    idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
     args = stmt.args
     length(args) == 3 || return
     cmp = argextype(args[3], compact)
     isa(cmp, Const) || return # `isdefined_tfunc` won't return Const
     val = args[2]
-    lift_comparison_leaves!(isdefined_tfunc, compact, val, cmp, lifting_cache, idx)
+    lift_comparison_leaves!(isdefined_tfunc, compact, val, cmp, idx, 𝕃ₒ)
+end
+
+function phi_or_ifelse_predecessors(@nospecialize(def), compact::IncrementalCompact)
+    isa(def, PhiNode) && return def.values
+    is_known_call(def, Core.ifelse, compact) && return def.args[3:4]
+    return nothing
 end
 
 function lift_comparison_leaves!(@specialize(tfunc),
     compact::IncrementalCompact, @nospecialize(val), @nospecialize(cmp),
-    lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue}, idx::Int)
+    idx::Int, 𝕃ₒ::AbstractLattice)
     typeconstraint = widenconst(argextype(val, compact))
     if isa(val, Union{OldSSAValue, SSAValue})
         val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint)
     end
     isa(typeconstraint, Union) || return # bail out if there won't be a good chance for lifting
-    leaves, visited_phinodes = collect_leaves(compact, val, typeconstraint)
+
+    leaves, visited_philikes = collect_leaves(compact, val, typeconstraint, 𝕃ₒ, phi_or_ifelse_predecessors)
     length(leaves) ≤ 1 && return # bail out if we don't have multiple leaves
 
     # check if we can evaluate the comparison for each one of the leaves
     lifted_leaves = nothing
     for i = 1:length(leaves)
         leaf = leaves[i]
-        result = tfunc(argextype(leaf, compact), cmp)
+        result = tfunc(𝕃ₒ, argextype(leaf, compact), cmp)
         if isa(result, Const)
             if lifted_leaves === nothing
                 lifted_leaves = LiftedLeaves()
@@ -574,41 +600,140 @@ function lift_comparison_leaves!(@specialize(tfunc),
     end
 
     # perform lifting
-    lifted_val = perform_lifting!(compact,
-        visited_phinodes, cmp, lifting_cache, Bool,
-        lifted_leaves::LiftedLeaves, val, nothing)::LiftedValue
+    (lifted_val, nest) = perform_lifting!(compact,
+        visited_philikes, cmp, Bool, lifted_leaves::LiftedLeaves, val, nothing)
+
+    compact[idx] = (lifted_val::LiftedValue).val
+
+    finish_phi_nest!(compact, nest)
+end
 
-    compact[idx] = lifted_val.x
+struct IfElseCall
+    call::Expr
 end
 
-struct LiftedPhi
+# An intermediate data structure used for lifting expressions through a
+# "phi-like" instruction (either a PhiNode or a call to Core.ifelse)
+struct LiftedPhilike
     ssa::AnySSAValue
-    node::PhiNode
+    node::Union{PhiNode,IfElseCall}
     need_argupdate::Bool
 end
 
+struct SkipToken end; const SKIP_TOKEN = SkipToken()
+
+function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=::AnySSAValue=#), @nospecialize(old_value),
+                      lifted_philikes::Vector{LiftedPhilike}, lifted_leaves::Union{LiftedLeaves, LiftedDefs}, reverse_mapping::IdDict{AnySSAValue, Int})
+    val = old_value
+    if is_old(compact, old_node_ssa) && isa(val, SSAValue)
+        val = OldSSAValue(val.id)
+    end
+    if isa(val, AnySSAValue)
+        val = simple_walk(compact, val)
+    end
+    if val in keys(lifted_leaves)
+        lifted_val = lifted_leaves[val]
+        if isa(lifted_leaves, LiftedDefs)
+            return lifted_val
+        end
+        lifted_val === nothing && return UNDEF_TOKEN
+        val = lifted_val.val
+        if isa(val, AnySSAValue)
+            callback = (@nospecialize(pi), @nospecialize(idx)) -> true
+            val = simple_walk(compact, val, callback)
+        end
+        return val
+    elseif isa(val, AnySSAValue) && val in keys(reverse_mapping)
+        return lifted_philikes[reverse_mapping[val]].ssa
+    else
+        return SKIP_TOKEN # Probably ignored by path condition, skip this
+    end
+end
+
 function is_old(compact, @nospecialize(old_node_ssa))
-    isa(old_node_ssa, OldSSAValue) &&
-        !is_pending(compact, old_node_ssa) &&
-        !already_inserted(compact, old_node_ssa)
+    isa(old_node_ssa, OldSSAValue) || return false
+    is_pending(compact, old_node_ssa) && return false
+    already_inserted(compact, old_node_ssa) && return false
+    return true
 end
 
-mutable struct LazyDomtree
-    ir::IRCode
-    domtree::DomTree
-    LazyDomtree(ir::IRCode) = new(ir)
+struct PhiNest
+    visited_philikes::Vector{AnySSAValue}
+    lifted_philikes::Vector{LiftedPhilike}
+    lifted_leaves::Union{LiftedLeaves, LiftedDefs}
+    reverse_mapping::IdDict{AnySSAValue, Int}
 end
-function get(x::LazyDomtree)
-    isdefined(x, :domtree) && return x.domtree
-    return @timeit "domtree 2" x.domtree = construct_domtree(x.ir.cfg.blocks)
+
+function finish_phi_nest!(compact::IncrementalCompact, nest::PhiNest)
+    (;visited_philikes, lifted_philikes, lifted_leaves, reverse_mapping) = nest
+    nphilikes = length(lifted_philikes)
+    # Fix up arguments
+    for i = 1:nphilikes
+        (old_node_ssa, lf) = visited_philikes[i], lifted_philikes[i]
+        lf.need_argupdate || continue
+        should_count = !isa(lf.ssa, OldSSAValue) || already_inserted(compact, lf.ssa)
+
+        lfnode = lf.node
+        if isa(lfnode, PhiNode)
+            old_node = compact[old_node_ssa][:stmt]::PhiNode
+            new_node = lfnode
+            for i = 1:length(old_node.values)
+                isassigned(old_node.values, i) || continue
+                val = lifted_value(compact, old_node_ssa, old_node.values[i],
+                                   lifted_philikes, lifted_leaves, reverse_mapping)
+                val !== SKIP_TOKEN && push!(new_node.edges, old_node.edges[i])
+                if val === UNDEF_TOKEN
+                    resize!(new_node.values, length(new_node.values)+1)
+                elseif val !== SKIP_TOKEN
+                    should_count && _count_added_node!(compact, val)
+                    push!(new_node.values, val)
+                end
+            end
+        elseif isa(lfnode, IfElseCall)
+            old_node = compact[old_node_ssa][:stmt]::Expr
+            then_result, else_result = old_node.args[3], old_node.args[4]
+
+            then_result = lifted_value(compact, old_node_ssa, then_result,
+                                       lifted_philikes, lifted_leaves, reverse_mapping)
+            else_result = lifted_value(compact, old_node_ssa, else_result,
+                                       lifted_philikes, lifted_leaves, reverse_mapping)
+
+            # In cases where the Core.ifelse condition is statically-known, e.g., thanks
+            # to a PiNode from a guarding conditional, replace with the remaining branch.
+            if then_result === SKIP_TOKEN || else_result === SKIP_TOKEN
+                only_result = (then_result === SKIP_TOKEN) ? else_result : then_result
+
+                # Replace Core.ifelse(%cond, %a, %b) with %a
+                compact[lf.ssa] = only_result
+
+                # Note: Core.ifelse(%cond, %a, %b) has observable effects (!nothrow), but since
+                # we have not deleted the preceding statement that this was derived from, this
+                # replacement is safe, i.e. it will not affect the effects observed.
+                continue
+            end
+
+            @assert then_result !== SKIP_TOKEN && then_result !== UNDEF_TOKEN
+            @assert else_result !== SKIP_TOKEN && else_result !== UNDEF_TOKEN
+
+            if should_count
+                _count_added_node!(compact, then_result)
+                _count_added_node!(compact, else_result)
+            end
+
+            push!(lfnode.call.args, then_result)
+            push!(lfnode.call.args, else_result)
+        end
+    end
 end
 
 function perform_lifting!(compact::IncrementalCompact,
-    visited_phinodes::Vector{AnySSAValue}, @nospecialize(cache_key),
-    lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
-    @nospecialize(result_t), lifted_leaves::LiftedLeaves, @nospecialize(stmt_val),
-    lazydomtree::Union{LazyDomtree,Nothing})
-    reverse_mapping = IdDict{AnySSAValue, Int}(ssa => id for (id, ssa) in enumerate(visited_phinodes))
+        visited_philikes::Vector{AnySSAValue}, @nospecialize(cache_key),
+        @nospecialize(result_t), lifted_leaves::Union{LiftedLeaves, LiftedDefs}, @nospecialize(stmt_val),
+        lazydomtree::Union{LazyDomtree,Nothing})
+    reverse_mapping = IdDict{AnySSAValue, Int}()
+    for id in 1:length(visited_philikes)
+        reverse_mapping[visited_philikes[id]] = id
+    end
 
     # Check if all the lifted leaves are the same
     local the_leaf
@@ -623,101 +748,400 @@ function perform_lifting!(compact::IncrementalCompact,
         end
     end
 
-    the_leaf_val = isa(the_leaf, LiftedValue) ? the_leaf.x : nothing
-    if !isa(the_leaf_val, SSAValue)
-        all_same = false
-    end
-
-    if all_same
+    if all_same && isa(the_leaf, LiftedValue)
         dominates_all = true
-        if lazydomtree !== nothing
-            domtree = get(lazydomtree)
-            for item in visited_phinodes
-                if !dominates_ssa(compact, domtree, the_leaf_val, item)
-                    dominates_all = false
-                    break
+        the_leaf_val = the_leaf.val
+        if isa(the_leaf_val, AnySSAValue)
+            if lazydomtree === nothing
+                # Must conservatively assume this
+                dominates_all = false
+            else
+                # This code guards against the possibility of accidentally forwarding a value from a
+                # previous iteration. Consider for example:
+                #
+                # %p = phi(%arg, %t)
+                # %b = <...>
+                # %c = getfield(%p, 1)
+                # %t = tuple(%b)
+                #
+                # It would be incorrect to replace `%c` by `%b`, because that would read the value of
+                # `%b` in the *current* iteration, while the value of `%b` that comes in via `%p` is
+                # that of the previous iteration.
+                domtree = get!(lazydomtree)
+                for item in visited_philikes
+                    if !dominates_ssa(compact, domtree, the_leaf_val, item)
+                        dominates_all = false
+                        break
+                    end
                 end
             end
-            if dominates_all
-                return the_leaf
+        end
+        if dominates_all
+            if isa(the_leaf, OldSSAValue)
+                the_leaf = simple_walk(compact, the_leaf)
             end
+            return Pair{Any, PhiNest}(the_leaf, PhiNest(visited_philikes, Vector{LiftedPhilike}(undef, 0), lifted_leaves, reverse_mapping))
         end
     end
 
     # Insert PhiNodes
-    lifted_phis = LiftedPhi[]
-    for item in visited_phinodes
-        # FIXME this cache is broken somehow
-        # ckey = Pair{AnySSAValue, Any}(item, cache_key)
-        # cached = ckey in keys(lifting_cache)
-        cached = false
-        if cached
-            ssa = lifting_cache[ckey]
-            push!(lifted_phis, LiftedPhi(ssa, compact[ssa][:inst]::PhiNode, false))
-            continue
+    nphilikes = length(visited_philikes)
+    lifted_philikes = Vector{LiftedPhilike}(undef, nphilikes)
+    for i = 1:nphilikes
+        old_ssa = visited_philikes[i]
+        old_inst = compact[old_ssa]
+        old_node = old_inst[:stmt]::Union{PhiNode,Expr}
+        if isa(old_node, PhiNode)
+            new_node = PhiNode()
+            ssa = insert_node!(compact, old_ssa, effect_free_and_nothrow(NewInstruction(new_node, result_t)))
+            lifted_philikes[i] = LiftedPhilike(ssa, new_node, true)
+        else
+            @assert is_known_call(old_node, Core.ifelse, compact)
+            ifelse_func, condition = old_node.args
+            if is_old(compact, old_ssa) && isa(condition, SSAValue)
+                condition = OldSSAValue(condition.id)
+            end
+
+            new_node = Expr(:call, ifelse_func, condition) # Renamed then_result, else_result added below
+            new_inst = NewInstruction(new_node, result_t, NoCallInfo(), old_inst[:line], old_inst[:flag])
+
+            ssa = insert_node!(compact, old_ssa, new_inst, #= attach_after =# true)
+            lifted_philikes[i] = LiftedPhilike(ssa, IfElseCall(new_node), true)
         end
-        n = PhiNode()
-        ssa = insert_node!(compact, item, effect_free(NewInstruction(n, result_t)))
-        # lifting_cache[ckey] = ssa
-        push!(lifted_phis, LiftedPhi(ssa, n, true))
     end
 
-    # Fix up arguments
-    for (old_node_ssa, lf) in zip(visited_phinodes, lifted_phis)
-        old_node = compact[old_node_ssa][:inst]::PhiNode
-        new_node = lf.node
-        lf.need_argupdate || continue
-        for i = 1:length(old_node.edges)
-            edge = old_node.edges[i]
-            isassigned(old_node.values, i) || continue
-            val = old_node.values[i]
-            if is_old(compact, old_node_ssa) && isa(val, SSAValue)
-                val = OldSSAValue(val.id)
-            end
-            if isa(val, AnySSAValue)
-                val = simple_walk(compact, val)
-            end
-            if val in keys(lifted_leaves)
-                push!(new_node.edges, edge)
-                lifted_val = lifted_leaves[val]
-                if lifted_val === nothing
-                    resize!(new_node.values, length(new_node.values)+1)
-                    continue
+    # Fixup the stmt itself
+    if isa(stmt_val, Union{SSAValue, OldSSAValue})
+        stmt_val = simple_walk(compact, stmt_val)
+    end
+
+    if stmt_val in keys(lifted_leaves)
+        stmt_val = lifted_leaves[stmt_val]
+    elseif isa(stmt_val, AnySSAValue) && stmt_val in keys(reverse_mapping)
+        stmt_val = LiftedValue(lifted_philikes[reverse_mapping[stmt_val]].ssa)
+    end
+
+    return Pair{Any, PhiNest}(stmt_val, PhiNest(visited_philikes, lifted_philikes, lifted_leaves, reverse_mapping))
+end
+
+function lift_svec_ref!(compact::IncrementalCompact, idx::Int, stmt::Expr)
+    length(stmt.args) != 3 && return
+
+    vec = stmt.args[2]
+    val = stmt.args[3]
+    valT = argextype(val, compact)
+    (isa(valT, Const) && isa(valT.val, Int)) || return
+    valI = valT.val::Int
+    valI >= 1 || return
+
+    if isa(vec, SimpleVector)
+        valI <= length(vec) || return
+        compact[idx] = quoted(vec[valI])
+    elseif isa(vec, SSAValue)
+        def = compact[vec][:stmt]
+        if is_known_call(def, Core.svec, compact)
+            valI <= length(def.args) - 1 || return
+            compact[idx] = def.args[valI+1]
+        elseif is_known_call(def, Core._compute_sparams, compact)
+            valI != 1 && return # TODO generalize this for more values of valI
+            res = _lift_svec_ref(def, compact)
+            res === nothing && return
+            compact[idx] = res.val
+        end
+    end
+    return
+end
+
+function lift_leaves_keyvalue(compact::IncrementalCompact, @nospecialize(key),
+                             leaves::Vector{Any}, 𝕃ₒ::AbstractLattice)
+    # For every leaf, the lifted value
+    lifted_leaves = LiftedLeaves()
+    for i = 1:length(leaves)
+        leaf = leaves[i]
+        cache_key = leaf
+        if isa(leaf, AnySSAValue)
+            (def, leaf) = walk_to_def(compact, leaf)
+            if is_known_invoke_or_call(def, Core.OptimizedGenerics.KeyValue.set, compact)
+                @assert isexpr(def, :invoke)
+                if length(def.args) in (5, 6)
+                    set_key = def.args[end-1]
+                    set_val_idx = length(def.args)
+                elseif length(def.args) == 4
+                    # Key is deleted
+                    # TODO: Model this
+                    return nothing
+                elseif length(def.args) == 3
+                    # The whole collection is deleted
+                    # TODO: Model this
+                    return nothing
+                else
+                    return nothing
                 end
-                val = lifted_val.x
-                if isa(val, AnySSAValue)
-                    callback = (@nospecialize(pi), @nospecialize(idx)) -> true
-                    val = simple_walk(compact, val, callback)
+                if set_key === key || (egal_tfunc(𝕃ₒ, argextype(key, compact), argextype(set_key, compact)) == Const(true))
+                    lift_arg!(compact, leaf, cache_key, def, set_val_idx, lifted_leaves)
+                    break
                 end
-                push!(new_node.values, val)
-            elseif isa(val, AnySSAValue) && val in keys(reverse_mapping)
-                push!(new_node.edges, edge)
-                push!(new_node.values, lifted_phis[reverse_mapping[val]].ssa)
+                continue
+            end
+        end
+        return nothing
+    end
+    return lifted_leaves
+end
+
+function lift_keyvalue_get!(compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
+    collection = stmt.args[end-1]
+    key = stmt.args[end]
+
+    function keyvalue_predecessors(@nospecialize(def), compact::IncrementalCompact)
+        if is_known_invoke_or_call(def, Core.OptimizedGenerics.KeyValue.set, compact)
+            @assert isexpr(def, :invoke)
+            if length(def.args) in (5, 6)
+                collection = def.args[end-2]
+                set_key = def.args[end-1]
+                set_val_idx = length(def.args)
+            elseif length(def.args) == 4
+                collection = def.args[end-1]
+                # Key is deleted
+                # TODO: Model this
+                return nothing
+            elseif length(def.args) == 3
+                collection = def.args[end]
+                # The whole collection is deleted
+                # TODO: Model this
+                return nothing
             else
-                # Probably ignored by path condition, skip this
+                return nothing
+            end
+            if set_key === key || (egal_tfunc(𝕃ₒ, argextype(key, compact), argextype(set_key, compact)) == Const(true))
+                # This is an actual def
+                return nothing
             end
+            return Any[collection]
         end
-        count_added_node!(compact, new_node)
+        return phi_or_ifelse_predecessors(def, compact)
     end
 
-    # Fixup the stmt itself
-    if isa(stmt_val, Union{SSAValue, OldSSAValue})
-        stmt_val = simple_walk(compact, stmt_val)
+    leaves, visited_philikes = collect_leaves(compact, collection, Any, 𝕃ₒ, keyvalue_predecessors)
+    isempty(leaves) && return
+
+    lifted_leaves = lift_leaves_keyvalue(compact, key, leaves, 𝕃ₒ)
+    lifted_leaves === nothing && return
+
+    result_t = Union{}
+    for v in values(lifted_leaves)
+        v === nothing && return
+        result_t = tmerge(𝕃ₒ, result_t, argextype(v.val, compact))
     end
 
-    if stmt_val in keys(lifted_leaves)
-        return lifted_leaves[stmt_val]
-    elseif isa(stmt_val, AnySSAValue) && stmt_val in keys(reverse_mapping)
-        return LiftedValue(lifted_phis[reverse_mapping[stmt_val]].ssa)
+    (lifted_val, nest) = perform_lifting!(compact,
+        visited_philikes, key, result_t, lifted_leaves, collection, nothing)
+
+    compact[idx] = lifted_val === nothing ? nothing : Expr(:call, GlobalRef(Core, :tuple), lifted_val.val)
+    finish_phi_nest!(compact, nest)
+    if lifted_val !== nothing
+        if !⊑(𝕃ₒ, compact[SSAValue(idx)][:type], tuple_tfunc(𝕃ₒ, Any[result_t]))
+            add_flag!(compact[SSAValue(idx)], IR_FLAG_REFINED)
+        end
     end
 
-    return stmt_val # N.B. should never happen
+    return
 end
 
+# TODO: We could do the whole lifing machinery here, but really all
+# we want to do is clean this up when it got inserted by inlining,
+# which always targets simple `svec` call or `_compute_sparams`,
+# so this specialized lifting would be enough
+@inline function _lift_svec_ref(def::Expr, compact::IncrementalCompact)
+    length(def.args) >= 3 || return nothing
+    m = argextype(def.args[2], compact)
+    isa(m, Const) || return nothing
+    m = m.val
+    isa(m, Method) || return nothing
+
+    # TODO: More general structural analysis of the intersection
+    sig = m.sig
+    isa(sig, UnionAll) || return nothing
+    tvar = sig.var
+    sig = sig.body
+    isa(sig, DataType) || return nothing
+    sig.name === Tuple.name || return nothing
+    length(sig.parameters) >= 1 || return nothing
+
+    i = let sig=sig
+        findfirst(j::Int->has_typevar(sig.parameters[j], tvar), 1:length(sig.parameters))
+    end
+    i === nothing && return nothing
+    let sig=sig
+        any(j::Int->has_typevar(sig.parameters[j], tvar), i+1:length(sig.parameters))
+    end && return nothing
+
+    arg = sig.parameters[i]
+
+    rarg = def.args[2 + i]
+    isa(rarg, SSAValue) || return nothing
+    argdef = compact[rarg][:stmt]
+    if isexpr(argdef, :new)
+        rarg = argdef.args[1]
+        isa(rarg, SSAValue) || return nothing
+        argdef = compact[rarg][:stmt]
+    else
+        isType(arg) || return nothing
+        arg = arg.parameters[1]
+    end
+
+    is_known_call(argdef, Core.apply_type, compact) || return nothing
+    length(argdef.args) == 3 || return nothing
+
+    applyT = argextype(argdef.args[2], compact)
+    isa(applyT, Const) || return nothing
+    applyT = applyT.val
+
+    isa(applyT, UnionAll) || return nothing
+    # N.B.: At the moment we only lift the valI == 1 case, so we
+    # only need to look at the outermost tvar.
+    applyTvar = applyT.var
+    applyTbody = applyT.body
+
+    arg = unwrap_unionall(arg)
+    applyTbody = unwrap_unionall(applyTbody)
+
+    (isa(arg, DataType) && isa(applyTbody, DataType)) || return nothing
+    applyTbody.name === arg.name || return nothing
+    length(applyTbody.parameters) == length(arg.parameters) || return nothing
+    for i = 1:length(applyTbody.parameters)
+        if applyTbody.parameters[i] === applyTvar && arg.parameters[i] === tvar
+            return LiftedValue(argdef.args[3])
+        end
+    end
+    return nothing
+end
+
+struct IsEgal <: Function
+    x::Any
+    IsEgal(@nospecialize(x)) = new(x)
+end
+(x::IsEgal)(@nospecialize(y)) = x.x === y
+
+# This tries to match patterns of the form
+#  %ft   = typeof(%farg)
+#  %Targ = apply_type(Foo, ft)
+#  %x    = new(%Targ, %farg)
+#
+# and if possible refines the nothrowness of the new expr based on it.
+function pattern_match_typeof(compact::IncrementalCompact, typ::DataType, fidx::Int,
+                              @nospecialize(Targ), @nospecialize(farg))
+    isa(Targ, SSAValue) || return false
+
+    Tdef = compact[Targ][:stmt]
+    is_known_call(Tdef, Core.apply_type, compact) || return false
+    length(Tdef.args) ≥ 2 || return false
+
+    applyT = argextype(Tdef.args[2], compact)
+    isa(applyT, Const) || return false
+
+    applyT = applyT.val
+    tvars = Any[]
+    while isa(applyT, UnionAll)
+        applyTvar = applyT.var
+        applyT = applyT.body
+        push!(tvars, applyTvar)
+    end
+
+    @assert applyT.name === typ.name
+    fT = fieldtype(applyT, fidx)
+    idx = findfirst(IsEgal(fT), tvars)
+    idx === nothing && return false
+    checkbounds(Bool, Tdef.args, 2+idx) || return false
+    valarg = Tdef.args[2+idx]
+    isa(valarg, SSAValue) || return false
+    valdef = compact[valarg][:stmt]
+    is_known_call(valdef, typeof, compact) || return false
+
+    return valdef.args[2] === farg
+end
+
+function refine_new_effects!(𝕃ₒ::AbstractLattice, compact::IncrementalCompact, idx::Int, stmt::Expr)
+    inst = compact[SSAValue(idx)]
+    if has_flag(inst, (IR_FLAG_NOTHROW | IR_FLAG_EFFECT_FREE))
+        return # already accurate
+    end
+    (consistent, effect_free_and_nothrow, nothrow) = new_expr_effect_flags(𝕃ₒ, stmt.args, compact, pattern_match_typeof)
+    if consistent
+        add_flag!(inst, IR_FLAG_CONSISTENT)
+    end
+    if effect_free_and_nothrow
+        add_flag!(inst, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)
+    elseif nothrow
+        add_flag!(inst, IR_FLAG_NOTHROW)
+    end
+    return nothing
+end
+
+function fold_ifelse!(compact::IncrementalCompact, idx::Int, stmt::Expr)
+    length(stmt.args) == 4 || return false
+    condarg = stmt.args[2]
+    condtyp = argextype(condarg, compact)
+    if isa(condtyp, Const)
+        if condtyp.val === true
+            compact[idx] = stmt.args[3]
+            return true
+        elseif condtyp.val === false
+            compact[idx] = stmt.args[4]
+            return true
+        end
+    end
+    return false
+end
+
+function fold_current_scope!(compact::IncrementalCompact, idx::Int, stmt::Expr, lazydomtree::LazyDomtree)
+    domtree = get!(lazydomtree)
+
+    # The frontend enforces the invariant that any :enter dominates its active
+    # region, so all we have to do here is walk the domtree to find it.
+    dombb = block_for_inst(compact, SSAValue(idx))
+
+    local bbterminator
+    prevdombb = dombb
+    while true
+        dombb = domtree.idoms_bb[dombb]
+
+        # Did not find any dominating :enter - scope is inherited from the outside
+        dombb == 0 && return nothing
+
+        bbterminator = compact[SSAValue(last(compact.cfg_transform.result_bbs[dombb].stmts))][:stmt]
+        if !isa(bbterminator, EnterNode) || !isdefined(bbterminator, :scope)
+            prevdombb = dombb
+            continue
+        end
+        if bbterminator.catch_dest == 0
+            # TODO: dominance alone is not enough here, we need to actually find the :leaves
+            return nothing
+        end
+        # Check that we are inside the :enter region, i.e. are dominated by the first block in the
+        # enter region - otherwise we've already left this :enter and should keep going
+        if prevdombb != dombb + 1
+            prevdombb = dombb
+            continue
+        end
+        compact[idx] = bbterminator.scope
+        return nothing
+    end
+end
+
+
 # NOTE we use `IdSet{Int}` instead of `BitSet` for in these passes since they work on IR after inlining,
 # which can be very large sometimes, and program counters in question are often very sparse
 const SPCSet = IdSet{Int}
 
+struct IntermediaryCollector
+    intermediaries::SPCSet
+end
+function (this::IntermediaryCollector)(@nospecialize(pi), @nospecialize(ssa))
+    push!(this.intermediaries, ssa.id)
+    return false
+end
+
 """
     sroa_pass!(ir::IRCode) -> newir::IRCode
 
@@ -736,16 +1160,16 @@ its argument).
 In a case when all usages are fully eliminated, `struct` allocation may also be erased as
 a result of succeeding dead code elimination.
 """
-function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothing)
+function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
+    𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp)
     compact = IncrementalCompact(ir)
     defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations
-    lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}()
     # initialization of domtree is delayed to avoid the expensive computation in many cases
     lazydomtree = LazyDomtree(ir)
-    for ((_, idx), stmt) in compact
+    for ((old_idx, idx), stmt) in compact
         # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement)
         isa(stmt, Expr) || continue
-        is_setfield = is_isdefined = is_finalizer = false
+        is_setfield = is_isdefined = is_finalizer = is_keyvalue_get = false
         field_ordering = :unspecified
         if is_known_call(stmt, setfield!, compact)
             4 <= length(stmt.args) <= 5 || continue
@@ -770,10 +1194,15 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothin
             end
         elseif is_known_call(stmt, Core.finalizer, compact)
             3 <= length(stmt.args) <= 5 || continue
-            # Inlining performs legality checks on the finalizer to determine
-            # whether or not we may inline it. If so, it appends extra arguments
-            # at the end of the intrinsic. Detect that here.
-            length(stmt.args) == 5 || continue
+            info = compact[SSAValue(idx)][:info]
+            if isa(info, FinalizerInfo)
+                is_finalizer_inlineable(info.effects) || continue
+            else
+                # Inlining performs legality checks on the finalizer to determine
+                # whether or not we may inline it. If so, it appends extra arguments
+                # at the end of the intrinsic. Detect that here.
+                length(stmt.args) == 5 || continue
+            end
             is_finalizer = true
         elseif isexpr(stmt, :foreigncall)
             nccallargs = length(stmt.args[3]::SimpleVector)
@@ -790,7 +1219,7 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothin
                     def = simple_walk(compact, preserved_arg, callback)
                     isa(def, SSAValue) || continue
                     defidx = def.id
-                    def = compact[def][:inst]
+                    def = compact[def][:stmt]
                     if is_known_call(def, tuple, compact)
                         record_immutable_preserve!(new_preserves, def, compact)
                         push!(preserved, preserved_arg.id)
@@ -822,48 +1251,52 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothin
         else # TODO: This isn't the best place to put these
             if is_known_call(stmt, typeassert, compact)
                 canonicalize_typeassert!(compact, idx, stmt)
+            elseif is_known_call(stmt, Core._svec_ref, compact)
+                lift_svec_ref!(compact, idx, stmt)
             elseif is_known_call(stmt, (===), compact)
-                lift_comparison!(===, compact, idx, stmt, lifting_cache)
+                lift_comparison!(===, compact, idx, stmt, 𝕃ₒ)
             elseif is_known_call(stmt, isa, compact)
-                lift_comparison!(isa, compact, idx, stmt, lifting_cache)
+                lift_comparison!(isa, compact, idx, stmt, 𝕃ₒ)
+            elseif is_known_call(stmt, Core.ifelse, compact)
+                fold_ifelse!(compact, idx, stmt)
+            elseif is_known_invoke_or_call(stmt, Core.OptimizedGenerics.KeyValue.get, compact)
+                2 == (length(stmt.args) - (isexpr(stmt, :invoke) ? 2 : 1)) || continue
+                lift_keyvalue_get!(compact, idx, stmt, 𝕃ₒ)
+            elseif is_known_call(stmt, Core.current_scope, compact)
+                fold_current_scope!(compact, idx, stmt, lazydomtree)
+            elseif isexpr(stmt, :new)
+                refine_new_effects!(𝕃ₒ, compact, idx, stmt)
             end
             continue
         end
 
-        # analyze this `getfield` / `isdefined` / `setfield!` call
-
-        if !is_finalizer
-            field = try_compute_field_stmt(compact, stmt)
-            field === nothing && continue
-            val = stmt.args[2]
-        else
+        if is_finalizer
             val = stmt.args[3]
+        else
+            # analyze `getfield` / `isdefined` / `setfield!` call
+            val = stmt.args[2]
         end
-
-        struct_typ = unwrap_unionall(widenconst(argextype(val, compact)))
-        if isa(struct_typ, Union) && struct_typ <: Tuple
-            struct_typ = unswitchtupleunion(struct_typ)
-        end
-        if isa(struct_typ, Union) && is_isdefined
-            lift_comparison!(isdefined, compact, idx, stmt, lifting_cache)
+        struct_typ = widenconst(argextype(val, compact))
+        struct_argtyp = argument_datatype(struct_typ)
+        if struct_argtyp === nothing
+            if isa(struct_typ, Union) && is_isdefined
+                lift_comparison!(isdefined, compact, idx, stmt, 𝕃ₒ)
+            end
             continue
         end
-        isa(struct_typ, DataType) || continue
+        struct_typ_name = struct_argtyp.name
 
-        struct_typ.name.atomicfields == C_NULL || continue # TODO: handle more
+        struct_typ_name.atomicfields == C_NULL || continue # TODO: handle more
         if !((field_ordering === :unspecified) ||
              (field_ordering isa Const && field_ordering.val === :not_atomic))
             continue
         end
 
         # analyze this mutable struct here for the later pass
-        if ismutabletype(struct_typ)
+        if ismutabletypename(struct_typ_name)
             isa(val, SSAValue) || continue
             let intermediaries = SPCSet()
-                callback = function (@nospecialize(pi), @nospecialize(ssa))
-                    push!(intermediaries, ssa.id)
-                    return false
-                end
+                callback = IntermediaryCollector(intermediaries)
                 def = simple_walk(compact, val, callback)
                 # Mutable stuff here
                 isa(def, SSAValue) || continue
@@ -876,7 +1309,7 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothin
                 elseif is_isdefined
                     push!(defuse.uses, IsdefinedUse(idx))
                 elseif is_finalizer
-                    push!(defuse.uses, AddFinalizerUse(idx))
+                    push!(defuse.uses, FinalizerUse(idx))
                 else
                     push!(defuse.uses, GetfieldUse(idx))
                 end
@@ -890,38 +1323,68 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothin
         end
 
         # perform SROA on immutable structs here on
-
-        field = try_compute_fieldidx(struct_typ, field)
+        field = try_compute_fieldidx_stmt(compact, stmt, struct_typ)
         field === nothing && continue
 
-        leaves, visited_phinodes = collect_leaves(compact, val, struct_typ)
+        leaves, visited_philikes = collect_leaves(compact, val, struct_typ, 𝕃ₒ, phi_or_ifelse_predecessors)
         isempty(leaves) && continue
 
-        result_t = argextype(SSAValue(idx), compact)
-        lifted_result = lift_leaves(compact, result_t, field, leaves)
+        lifted_result = lift_leaves(compact, field, leaves, 𝕃ₒ)
         lifted_result === nothing && continue
         lifted_leaves, any_undef = lifted_result
 
-        if any_undef
-            result_t = make_MaybeUndef(result_t)
+        result_t = Union{}
+        for v in values(lifted_leaves)
+            v === nothing && continue
+            result_t = tmerge(𝕃ₒ, result_t, argextype(v.val, compact))
+        end
+
+        (lifted_val, nest) = perform_lifting!(compact,
+            visited_philikes, field, result_t, lifted_leaves, val, lazydomtree)
+
+        node_was_deleted = false
+        line = compact[SSAValue(idx)][:line]
+        if lifted_val !== nothing && !⊑(𝕃ₒ, compact[SSAValue(idx)][:type], result_t)
+            compact[idx] = lifted_val === nothing ? nothing : lifted_val.val
+            add_flag!(compact[SSAValue(idx)], IR_FLAG_REFINED)
+        elseif lifted_val === nothing || isa(lifted_val.val, AnySSAValue)
+            # Save some work in a later compaction, by inserting this into the renamer now,
+            # but only do this if we didn't set the REFINED flag, to save work for irinterp
+            # in revisiting only the renamings that came through *this* idx.
+            delete_inst_here!(compact)
+            compact.ssa_rename[old_idx] = lifted_val === nothing ? nothing : lifted_val.val
+            node_was_deleted = true
+        else
+            compact[idx] = lifted_val === nothing ? nothing : lifted_val.val
         end
 
-        val = perform_lifting!(compact,
-            visited_phinodes, field, lifting_cache, result_t, lifted_leaves, val, lazydomtree)
+        finish_phi_nest!(compact, nest)
 
         # Insert the undef check if necessary
         if any_undef
-            if val === nothing
-                insert_node!(compact, SSAValue(idx),
-                    non_effect_free(NewInstruction(Expr(:throw_undef_if_not, Symbol("##getfield##"), false), Nothing)))
+            if lifted_val === nothing
+                def_val = false
             else
-                # val must be defined
+                lifted_leaves_def = LiftedDefs()
+                for (k, v) in pairs(lifted_leaves)
+                    lifted_leaves_def[k] = v === nothing ? false : true
+                end
+                (def_val, nest) = perform_lifting!(compact,
+                    visited_philikes, field, Bool, lifted_leaves_def, val, lazydomtree)
+                def_val = (def_val::LiftedValue).val
+                finish_phi_nest!(compact, nest)
+            end
+            ni = NewInstruction(
+                Expr(:throw_undef_if_not, Symbol("##getfield##"), def_val), Nothing, line)
+            if node_was_deleted
+                insert_node_here!(compact, ni, true)
+            else
+                insert_node!(compact, SSAValue(idx), ni)
             end
         else
-            @assert val !== nothing
+            # val must be defined
+            @assert lifted_val !== nothing
         end
-
-        compact[idx] = val === nothing ? nothing : val.x
     end
 
     non_dce_finish!(compact)
@@ -932,7 +1395,9 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothin
         # but before the DCE) for our predicate within `sroa_mutables!`, but we also
         # try an extra effort using a callback so that reference counts are updated
         used_ssas = copy(compact.used_ssas)
-        simple_dce!(compact, (x::SSAValue) -> used_ssas[x.id] -= 1)
+        simple_dce!(compact) do x::SSAValue
+            used_ssas[x.id] -= 1
+        end
         ir = complete(compact)
         sroa_mutables!(ir, defuses, used_ssas, lazydomtree, inlining)
         return ir
@@ -942,20 +1407,25 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothin
     end
 end
 
-function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::MethodInstance, inlining::InliningState)
-    code = get(inlining.mi_cache, mi, nothing)
+# NOTE we resolve the inlining source here as we don't want to serialize `Core.Compiler`
+# data structure into the global cache (see the comment in `handle_finalizer_call!`)
+function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
+    mi::MethodInstance, @nospecialize(info::CallInfo), inlining::InliningState,
+    attach_after::Bool)
+    code = get(code_cache(inlining), mi, nothing)
+    et = InliningEdgeTracker(inlining)
     if code isa CodeInstance
         if use_const_api(code)
             # No code in the function - Nothing to do
-            inlining.et !== nothing && push!(inlining.et, mi)
+            add_inlining_backedge!(et, mi)
             return true
         end
-        src = code.inferred
+        src = @atomic :monotonic code.inferred
     else
-        src = code
+        src = nothing
     end
 
-    src = inlining_policy(inlining.interp, src, IR_FLAG_NULL, mi, Any[])
+    src = inlining_policy(inlining.interp, src, info, IR_FLAG_NULL)
     src === nothing && return false
     src = retrieve_ir_for_inlining(mi, src)
 
@@ -963,39 +1433,172 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::
     length(src.cfg.blocks) == 1 || return false
 
     # Ok, we're committed to inlining the finalizer
-    inlining.et !== nothing && push!(inlining.et, mi)
+    add_inlining_backedge!(et, mi)
 
-    linetable_offset, extra_coverage_line = ir_inline_linetable!(ir.linetable, src, mi.def, ir[SSAValue(idx)][:line])
-    if extra_coverage_line != 0
-        insert_node!(ir, idx, NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line))
-    end
+    # TODO: Should there be a special line number node for inlined finalizers?
+    inlined_at = ir[SSAValue(idx)][:line]
+    ssa_substitute = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir, src, mi, inlined_at, argexprs)
 
-    # TODO: Use the actual inliner here rather than open coding this special
-    # purpose inliner.
-    spvals = mi.sparam_vals
+    # TODO: Use the actual inliner here rather than open coding this special purpose inliner.
     ssa_rename = Vector{Any}(undef, length(src.stmts))
     for idx′ = 1:length(src.stmts)
-        urs = userefs(src[SSAValue(idx′)][:inst])
-        for ur in urs
-            if isa(ur[], SSAValue)
-                ur[] = ssa_rename[ur[].id]
-            elseif isa(ur[], Argument)
-                ur[] = argexprs[ur[].n]
-            elseif isexpr(ur[], :static_parameter)
-                ur[] = spvals[ur[].args[1]]
-            end
-        end
-        # TODO: Scan newly added statement into the sroa defuse struct
-        stmt = urs[]
-        isa(stmt, ReturnNode) && continue
         inst = src[SSAValue(idx′)]
-        ssa_rename[idx′] = insert_node!(ir, idx, NewInstruction(stmt, inst; line = inst[:line] + linetable_offset), true)
+        stmt′ = inst[:stmt]
+        isa(stmt′, ReturnNode) && continue
+        stmt′ = ssamap(stmt′) do ssa::SSAValue
+            ssa_rename[ssa.id]
+        end
+        stmt′ = ssa_substitute_op!(InsertBefore(ir, SSAValue(idx)), inst, stmt′, ssa_substitute)
+        ssa_rename[idx′] = insert_node!(ir, idx,
+            NewInstruction(inst; stmt=stmt′, line=inst[:line]+ssa_substitute.linetable_offset),
+            attach_after)
     end
+
     return true
 end
 
-is_nothrow(ir::IRCode, pc::Int) = ir.stmts[pc][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW) ≠ 0
+is_nothrow(ir::IRCode, ssa::SSAValue) = has_flag(ir[ssa], IR_FLAG_NOTHROW)
+
+function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Union{Nothing,Int} = nothing)
+    worklist = Int[from_bb]
+    visited = BitSet(from_bb)
+    if to_bb !== nothing
+        push!(visited, to_bb)
+    end
+    function visit!(bb::Int)
+        if bb ∉ visited
+            push!(visited, bb)
+            push!(worklist, bb)
+        end
+    end
+    while !isempty(worklist)
+        foreach(visit!, cfg.blocks[pop!(worklist)].succs)
+    end
+    return visited
+end
+
+function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse::SSADefUse,
+        inlining::InliningState, lazydomtree::LazyDomtree,
+        lazypostdomtree::LazyPostDomtree, @nospecialize(info::CallInfo))
+    # For now, require that:
+    # 1. The allocation dominates the finalizer registration
+    # 2. The finalizer registration dominates all uses reachable from the
+    #    finalizer registration.
+    # 3. The insertion block for the finalizer is the post-dominator of all
+    #    uses and the finalizer registration block. The insertion block must
+    #    be dominated by the finalizer registration block.
+    # 4. The path from the finalizer registration to the finalizer inlining
+    #    location is nothrow
+    #
+    # TODO: We could relax item 3, by inlining the finalizer multiple times.
+
+    # Check #1: The allocation dominates the finalizer registration
+    domtree = get!(lazydomtree)
+    finalizer_bb = block_for_inst(ir, finalizer_idx)
+    alloc_bb = block_for_inst(ir, idx)
+    dominates(domtree, alloc_bb, finalizer_bb) || return nothing
+
+    bb_insert_block::Int = finalizer_bb
+    bb_insert_idx::Union{Int,Nothing} = finalizer_idx
+    function note_block_use!(usebb::Int, useidx::Int)
+        new_bb_insert_block = nearest_common_dominator(get!(lazypostdomtree),
+            bb_insert_block, usebb)
+        if new_bb_insert_block == bb_insert_block && bb_insert_idx !== nothing
+            bb_insert_idx = max(bb_insert_idx::Int, useidx)
+        elseif new_bb_insert_block == usebb
+            bb_insert_idx = useidx
+        else
+            bb_insert_idx = nothing
+        end
+        bb_insert_block = new_bb_insert_block
+        nothing
+    end
+
+    # Collect all reachable blocks between the finalizer registration and the
+    # insertion point
+    blocks = reachable_blocks(ir.cfg, finalizer_bb, alloc_bb)
+
+    # Check #2
+    function check_defuse(x::Union{Int,SSAUse})
+        duidx = x isa SSAUse ? x.idx : x
+        duidx == finalizer_idx && return true
+        bb = block_for_inst(ir, duidx)
+        # Not reachable from finalizer registration - we're ok
+        bb ∉ blocks && return true
+        note_block_use!(bb, duidx)
+        if dominates(domtree, finalizer_bb, bb)
+            return true
+        else
+            return false
+        end
+    end
+    all(check_defuse, defuse.uses) || return nothing
+    all(check_defuse, defuse.defs) || return nothing
+
+    # Check #3
+    dominates(domtree, finalizer_bb, bb_insert_block) || return nothing
+
+    if !OptimizationParams(inlining.interp).assume_fatal_throw
+        # Collect all reachable blocks between the finalizer registration and the
+        # insertion point
+        blocks = finalizer_bb == bb_insert_block ? Int[finalizer_bb] :
+            reachable_blocks(ir.cfg, finalizer_bb, bb_insert_block)
+
+        # Check #4
+        function check_range_nothrow(ir::IRCode, s::Int, e::Int)
+            return all(s:e) do sidx::Int
+                sidx == finalizer_idx && return true
+                sidx == idx && return true
+                return is_nothrow(ir, SSAValue(sidx))
+            end
+        end
+        for bb in blocks
+            range = ir.cfg.blocks[bb].stmts
+            s, e = first(range), last(range)
+            if bb == bb_insert_block
+                bb_insert_idx === nothing && continue
+                e = bb_insert_idx
+            end
+            if bb == finalizer_bb
+                s = finalizer_idx
+            end
+            check_range_nothrow(ir, s, e) || return nothing
+        end
+    end
+
+    # Ok, legality check complete. Figure out the exact statement where we're
+    # gonna inline the finalizer.
+    loc = bb_insert_idx === nothing ? first(ir.cfg.blocks[bb_insert_block].stmts) : bb_insert_idx::Int
+    attach_after = bb_insert_idx !== nothing
+
+    finalizer_stmt = ir[SSAValue(finalizer_idx)][:stmt]
+    argexprs = Any[finalizer_stmt.args[2], finalizer_stmt.args[3]]
+    flag = info isa FinalizerInfo ? flags_for_effects(info.effects) : IR_FLAG_NULL
+    if length(finalizer_stmt.args) >= 4
+        inline = finalizer_stmt.args[4]
+        if inline === nothing
+            # No code in the function - Nothing to do
+        else
+            mi = finalizer_stmt.args[5]::MethodInstance
+            if inline::Bool && try_inline_finalizer!(ir, argexprs, loc, mi, info, inlining, attach_after)
+                # the finalizer body has been inlined
+            else
+                newinst = add_flag(NewInstruction(Expr(:invoke, mi, argexprs...), Nothing), flag)
+                insert_node!(ir, loc, newinst, attach_after)
+            end
+        end
+    else
+        newinst = add_flag(NewInstruction(Expr(:call, argexprs...), Nothing), flag)
+        insert_node!(ir, loc, newinst, attach_after)
+    end
+    # Erase the call to `finalizer`
+    ir[SSAValue(finalizer_idx)][:stmt] = nothing
+    return nothing
+end
+
 function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState})
+    𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp)
+    lazypostdomtree = LazyPostDomtree(ir)
     for (idx, (intermediaries, defuse)) in defuses
         intermediaries = collect(intermediaries)
         # Check if there are any uses we did not account for. If so, the variable
@@ -1010,80 +1613,32 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
         nuses_total = used_ssas[idx] + nuses - length(intermediaries)
         nleaves == nuses_total || continue
         # Find the type for this allocation
-        defexpr = ir[SSAValue(idx)][:inst]
+        defexpr = ir[SSAValue(idx)][:stmt]
         isexpr(defexpr, :new) || continue
         newidx = idx
         typ = unwrap_unionall(ir.stmts[newidx][:type])
         # Could still end up here if we tried to setfield! on an immutable, which would
         # error at runtime, but is not illegal to have in the IR.
+        typ = widenconst(typ)
         ismutabletype(typ) || continue
         typ = typ::DataType
-        # First check for any add_finalizer calls
-        add_finalizer_idx = nothing
+        # First check for any finalizer calls
+        finalizer_idx = nothing
         for use in defuse.uses
-            if use.kind === :add_finalizer
-                # For now: Only allow one add_finalizer per allocation
-                add_finalizer_idx !== nothing && @goto skip
-                add_finalizer_idx = use.idx
+            if use.kind === :finalizer
+                # For now: Only allow one finalizer per allocation
+                finalizer_idx !== nothing && @goto skip
+                finalizer_idx = use.idx
             end
         end
-        if add_finalizer_idx !== nothing
-            # For now: Require that all uses and defs are in the same basic block,
-            # so that live range calculations are easy.
-            bb = ir.cfg.blocks[block_for_inst(ir.cfg, first(defuse.uses).idx)]
-            minval::Int = typemax(Int)
-            maxval::Int = 0
-
-            check_in_range(defuse) = check_in_range(defuse.idx)
-            function check_in_range(didx::Int)
-                didx in bb.stmts || return false
-                if didx < minval
-                    minval = didx
-                end
-                if didx > maxval
-                    maxval = didx
-                end
-                return true
-            end
-
-            check_in_range(idx) || continue
-            _all(check_in_range, defuse.uses) || continue
-            _all(check_in_range, defuse.defs) || continue
-
-            # For now: Require all statements in the basic block range to be
-            # nothrow.
-            all_nothrow = _all(idx->is_nothrow(ir, idx) || idx == add_finalizer_idx, minval:maxval)
-            all_nothrow || continue
-
-            # Ok, finalizer rewrite is legal.
-            add_finalizer_stmt = ir[SSAValue(add_finalizer_idx)][:inst]
-            argexprs = Any[add_finalizer_stmt.args[2], add_finalizer_stmt.args[3]]
-            may_inline = add_finalizer_stmt.args[4]::Bool
-            mi = add_finalizer_stmt.args[5]::Union{MethodInstance, Nothing}
-            if may_inline && mi !== nothing
-                if try_inline_finalizer!(ir, argexprs, maxval, add_finalizer_stmt.args[5], inlining)
-                    @goto done_finalizer
-                end
-                mi = compileable_specialization(inlining.et, mi, Effects()).invoke
-            end
-            if mi !== nothing
-                insert_node!(ir, maxval,
-                    NewInstruction(Expr(:invoke, mi, argexprs...), Nothing),
-                    true)
-            else
-                insert_node!(ir, maxval,
-                    NewInstruction(Expr(:call, argexprs...), Nothing),
-                    true)
-            end
-            @label done_finalizer
-            # Erase call to add_finalizer
-            ir[SSAValue(add_finalizer_idx)][:inst] = nothing
+        if finalizer_idx !== nothing && inlining !== nothing
+            try_resolve_finalizer!(ir, idx, finalizer_idx, defuse, inlining,
+                lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
             continue
         end
         # Partition defuses by field
         fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)]
         all_eliminated = all_forwarded = true
-        has_finalizer = false
         for use in defuse.uses
             if use.kind === :preserve
                 for du in fielddefuse
@@ -1091,7 +1646,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                 end
                 continue
             end
-            stmt = ir[SSAValue(use.idx)][:inst] # == `getfield`/`isdefined` call
+            stmt = ir[SSAValue(use.idx)][:stmt] # == `getfield`/`isdefined` call
             # We may have discovered above that this use is dead
             # after the getfield elim of immutables. In that case,
             # it would have been deleted. That's fine, just ignore
@@ -1105,7 +1660,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             push!(fielddefuse[field].uses, use)
         end
         for def in defuse.defs
-            stmt = ir[SSAValue(def)][:inst]::Expr # == `setfield!` call
+            stmt = ir[SSAValue(def)][:stmt]::Expr # == `setfield!` call
             field = try_compute_fieldidx_stmt(ir, stmt, typ)
             field === nothing && @goto skip
             isconst(typ, field) && @goto skip # we discovered an attempt to mutate a const field, which must error
@@ -1116,7 +1671,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
         # but we should come up with semantics for well defined semantics
         # for uninitialized fields first.
         ndefuse = length(fielddefuse)
-        blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# Vector{Int}}}(undef, ndefuse)
+        blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# BitSet}}(undef, ndefuse)
         for fidx in 1:ndefuse
             du = fielddefuse[fidx]
             isempty(du.uses) && continue
@@ -1125,34 +1680,34 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             if isempty(ldu.live_in_bbs)
                 phiblocks = Int[]
             else
-                phiblocks = iterated_dominance_frontier(ir.cfg, ldu, get(lazydomtree))
+                phiblocks = iterated_dominance_frontier(ir.cfg, ldu, get!(lazydomtree))
             end
-            allblocks = sort!(vcat(phiblocks, ldu.def_bbs); alg=QuickSort)
+            allblocks = union!(BitSet(phiblocks), ldu.def_bbs)
             blocks[fidx] = phiblocks, allblocks
             if fidx + 1 > length(defexpr.args)
                 for i = 1:length(du.uses)
                     use = du.uses[i]
                     if use.kind === :isdefined
-                        if has_safe_def(ir, get(lazydomtree), allblocks, du, newidx, use.idx)
-                            ir[SSAValue(use.idx)][:inst] = true
+                        if has_safe_def(ir, get!(lazydomtree), allblocks, du, newidx, use.idx)
+                            ir[SSAValue(use.idx)][:stmt] = true
                         else
                             all_eliminated = false
                         end
                         continue
                     elseif use.kind === :preserve
-                        if length(du.defs) == 1 # allocation with this field unintialized
+                        if length(du.defs) == 1 # allocation with this field uninitialized
                             # there is nothing to preserve, just ignore this use
                             du.uses[i] = NoPreserve()
                             continue
                         end
                     end
-                    has_safe_def(ir, get(lazydomtree), allblocks, du, newidx, use.idx) || @goto skip
+                    has_safe_def(ir, get!(lazydomtree), allblocks, du, newidx, use.idx) || @goto skip
                 end
             else # always have some definition at the allocation site
                 for i = 1:length(du.uses)
                     use = du.uses[i]
                     if use.kind === :isdefined
-                        ir[SSAValue(use.idx)][:inst] = true
+                        ir[SSAValue(use.idx)][:stmt] = true
                     end
                 end
             end
@@ -1161,7 +1716,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
         # Compute domtree now, needed below, now that we have finished compacting the IR.
         # This needs to be after we iterate through the IR with `IncrementalCompact`
         # because removing dead blocks can invalidate the domtree.
-        domtree = get(lazydomtree)
+        domtree = get!(lazydomtree)
         local preserve_uses = nothing
         for fidx in 1:ndefuse
             du = fielddefuse[fidx]
@@ -1176,12 +1731,14 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                 # Now go through all uses and rewrite them
                 for use in du.uses
                     if use.kind === :getfield
-                        ir[SSAValue(use.idx)][:inst] = compute_value_for_use(ir, domtree, allblocks,
+                        inst = ir[SSAValue(use.idx)]
+                        inst[:stmt] = compute_value_for_use(ir, domtree, allblocks,
                             du, phinodes, fidx, use.idx)
+                        add_flag!(inst, IR_FLAG_REFINED)
                     elseif use.kind === :isdefined
                         continue # already rewritten if possible
                     elseif use.kind === :nopreserve
-                        continue # nothing to preserve (may happen when there are unintialized fields)
+                        continue # nothing to preserve (may happen when there are uninitialized fields)
                     elseif use.kind === :preserve
                         newval = compute_value_for_use(ir, domtree, allblocks,
                             du, phinodes, fidx, use.idx)
@@ -1189,28 +1746,42 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                             if preserve_uses === nothing
                                 preserve_uses = IdDict{Int, Vector{Any}}()
                             end
-                            push!(get!(()->Any[], preserve_uses, use.idx), newval)
+                            push!(get!(Vector{Any}, preserve_uses, use.idx), newval)
                         end
                     else
                         @assert false "sroa_mutables!: unexpected use"
                     end
                 end
                 for b in phiblocks
-                    n = ir[phinodes[b]][:inst]::PhiNode
+                    n = ir[phinodes[b]][:stmt]::PhiNode
+                    result_t = Bottom
                     for p in ir.cfg.blocks[b].preds
                         push!(n.edges, p)
-                        push!(n.values, compute_value_for_block(ir, domtree,
-                            allblocks, du, phinodes, fidx, p))
+                        v = compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, p)
+                        push!(n.values, v)
+                        result_t = tmerge(𝕃ₒ, result_t, argextype(v, ir))
                     end
+                    ir[phinodes[b]][:type] = result_t
                 end
             end
             all_eliminated || continue
             # all "usages" (i.e. `getfield` and `isdefined` calls) are eliminated,
-            # now eliminate "definitions" (`setfield!`) calls
+            # now eliminate "definitions" (i.e. `setfield!`) calls
             # (NOTE the allocation itself will be eliminated by DCE pass later)
-            for stmt in du.defs
-                stmt == newidx && continue
-                ir[SSAValue(stmt)][:inst] = nothing
+            for idx in du.defs
+                idx == newidx && continue # this is allocation
+                # verify this statement won't throw, otherwise it can't be eliminated safely
+                ssa = SSAValue(idx)
+                if is_nothrow(ir, ssa)
+                    ir[ssa][:stmt] = nothing
+                else
+                    # We can't eliminate this statement, because it might still
+                    # throw an error, but we can mark it as effect-free since we
+                    # know we have removed all uses of the mutable allocation.
+                    # As a result, if we ever do prove nothrow, we can delete
+                    # this statement then.
+                    add_flag!(ir[ssa], IR_FLAG_EFFECT_FREE)
+                end
             end
         end
         preserve_uses === nothing && continue
@@ -1222,7 +1793,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
         end
         # Insert the new preserves
         for (useidx, new_preserves) in preserve_uses
-            ir[SSAValue(useidx)][:inst] = form_new_preserves(ir[SSAValue(useidx)][:inst]::Expr,
+            ir[SSAValue(useidx)][:stmt] = form_new_preserves(ir[SSAValue(useidx)][:stmt]::Expr,
                 intermediaries, new_preserves)
         end
 
@@ -1267,16 +1838,18 @@ function canonicalize_typeassert!(compact::IncrementalCompact, idx::Int, stmt::E
         NewInstruction(
             PiNode(stmt.args[2], compact.result[idx][:type]),
             compact.result[idx][:type],
-            compact.result[idx][:line]), true)
+            compact.result[idx][:line]), #=reverse_affinity=#true)
     compact.ssa_rename[compact.idx-1] = pi
 end
 
 function adce_erase!(phi_uses::Vector{Int}, extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int, in_worklist::Bool)
     # return whether this made a change
-    if isa(compact.result[idx][:inst], PhiNode)
-        return maybe_erase_unused!(extra_worklist, compact, idx, in_worklist, val::SSAValue -> phi_uses[val.id] -= 1)
+    if isa(compact.result[idx][:stmt], PhiNode)
+        return maybe_erase_unused!(compact, idx, in_worklist, extra_worklist) do val::SSAValue
+            phi_uses[val.id] -= 1
+        end
     else
-        return maybe_erase_unused!(extra_worklist, compact, idx, in_worklist)
+        return maybe_erase_unused!(compact, idx, in_worklist, extra_worklist)
     end
 end
 
@@ -1286,10 +1859,10 @@ function mark_phi_cycles!(compact::IncrementalCompact, safe_phis::SPCSet, phi::I
     while !isempty(worklist)
         phi = pop!(worklist)
         push!(safe_phis, phi)
-        for ur in userefs(compact.result[phi][:inst])
+        for ur in userefs(compact.result[phi][:stmt])
             val = ur[]
             isa(val, SSAValue) || continue
-            isa(compact[val][:inst], PhiNode) || continue
+            isa(compact[val][:stmt], PhiNode) || continue
             (val.id in safe_phis) && continue
             push!(worklist, val.id)
         end
@@ -1297,13 +1870,37 @@ function mark_phi_cycles!(compact::IncrementalCompact, safe_phis::SPCSet, phi::I
 end
 
 function is_some_union(@nospecialize(t))
-    isa(t, MaybeUndef) && (t = t.typ)
     return isa(t, Union)
 end
 
 function is_union_phi(compact::IncrementalCompact, idx::Int)
     inst = compact.result[idx]
-    return isa(inst[:inst], PhiNode) && is_some_union(inst[:type])
+    isa(inst[:stmt], PhiNode) || return false
+    return is_some_union(inst[:type])
+end
+
+function kill_phi!(compact::IncrementalCompact, phi_uses::Vector{Int},
+                    to_drop::Union{Vector{Int}, UnitRange{Int}},
+                    ssa::SSAValue, phi::PhiNode, delete_inst::Bool = false)
+    for d in to_drop
+        if isassigned(phi.values, d)
+            val = phi.values[d]
+            if !delete_inst
+                # Deleting the inst will update compact's use count, so
+                # don't do it here.
+                kill_current_use!(compact, val)
+            end
+            if isa(val, SSAValue)
+                phi_uses[val.id] -= 1
+            end
+        end
+    end
+    if delete_inst
+        compact[ssa] = nothing
+    elseif !isempty(to_drop)
+        deleteat!(phi.values, to_drop)
+        deleteat!(phi.edges, to_drop)
+    end
 end
 
 """
@@ -1325,11 +1922,13 @@ Also note that currently this pass _needs_ to run after `sroa_pass!`, because
 the `typeassert` elimination depends on the transformation by `canonicalize_typeassert!` done
 within `sroa_pass!` which redirects references of `typeassert`ed value to the corresponding `PiNode`.
 """
-function adce_pass!(ir::IRCode)
+function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
+    𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp)
     phi_uses = fill(0, length(ir.stmts) + length(ir.new_nodes))
     all_phis = Int[]
     unionphis = Pair{Int,Any}[] # sorted
-    compact = IncrementalCompact(ir)
+    compact = IncrementalCompact(ir, true)
+    made_changes = false
     for ((_, idx), stmt) in compact
         if isa(stmt, PhiNode)
             push!(all_phis, idx)
@@ -1342,16 +1941,16 @@ function adce_pass!(ir::IRCode)
                 r = searchsorted(unionphis, val.id; by = first)
                 if !isempty(r)
                     unionphi = unionphis[first(r)]
-                    t = tmerge(unionphi[2], stmt.typ)
+                    t = tmerge(𝕃ₒ, unionphi[2], stmt.typ)
                     unionphis[first(r)] = Pair{Int,Any}(unionphi[1], t)
                 end
             end
         else
             if is_known_call(stmt, typeassert, compact) && length(stmt.args) == 3
                 # nullify safe `typeassert` calls
-                ty, isexact = instanceof_tfunc(argextype(stmt.args[3], compact))
-                if isexact && argextype(stmt.args[2], compact) ⊑ ty
-                    compact[idx] = nothing
+                ty, isexact = instanceof_tfunc(argextype(stmt.args[3], compact), true)
+                if isexact && ⊑(𝕃ₒ, argextype(stmt.args[2], compact), ty)
+                    delete_inst_here!(compact)
                     continue
                 end
             end
@@ -1369,17 +1968,17 @@ function adce_pass!(ir::IRCode)
     non_dce_finish!(compact)
     for phi in all_phis
         inst = compact.result[phi]
-        for ur in userefs(inst[:inst]::PhiNode)
+        for ur in userefs(inst[:stmt]::PhiNode)
             use = ur[]
             if isa(use, SSAValue)
                 phi_uses[use.id] += 1
-                stmt = compact.result[use.id][:inst]
+                stmt = compact.result[use.id][:stmt]
                 if isa(stmt, PhiNode)
                     r = searchsorted(unionphis, use.id; by=first)
                     if !isempty(r)
                         unionphi = unionphis[first(r)]
                         unionphis[first(r)] = Pair{Int,Any}(unionphi[1],
-                            tmerge(unionphi[2], inst[:type]))
+                            tmerge(𝕃ₒ, unionphi[2], inst[:type]))
                     end
                 end
             end
@@ -1391,15 +1990,17 @@ function adce_pass!(ir::IRCode)
         phi = unionphi[1]
         t = unionphi[2]
         if t === Union{}
-            compact.result[phi][:inst] = nothing
+            stmt = compact[SSAValue(phi)][:stmt]::PhiNode
+            kill_phi!(compact, phi_uses, 1:length(stmt.values), SSAValue(phi), stmt, true)
+            made_changes = true
             continue
         elseif t === Any
             continue
-        elseif compact.result[phi][:type] ⊑ t
+        elseif ⊑(𝕃ₒ, compact.result[phi][:type], t)
             continue
         end
         to_drop = Int[]
-        stmt = compact[SSAValue(phi)][:inst]
+        stmt = compact[SSAValue(phi)][:stmt]
         stmt === nothing && continue
         stmt = stmt::PhiNode
         for i = 1:length(stmt.values)
@@ -1412,19 +2013,18 @@ function adce_pass!(ir::IRCode)
             end
         end
         compact.result[phi][:type] = t
-        isempty(to_drop) && continue
-        deleteat!(stmt.values, to_drop)
-        deleteat!(stmt.edges, to_drop)
+        kill_phi!(compact, phi_uses, to_drop, SSAValue(phi), stmt, false)
+        made_changes = true
     end
     # Perform simple DCE for unused values
     extra_worklist = Int[]
     for (idx, nused) in Iterators.enumerate(compact.used_ssas)
         idx >= compact.result_idx && break
         nused == 0 || continue
-        adce_erase!(phi_uses, extra_worklist, compact, idx, false)
+        made_changes |= adce_erase!(phi_uses, extra_worklist, compact, idx, false)
     end
     while !isempty(extra_worklist)
-        adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist), true)
+        made_changes |= adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist), true)
     end
     # Go back and erase any phi cycles
     changed = true
@@ -1445,150 +2045,17 @@ function adce_pass!(ir::IRCode)
         while !isempty(extra_worklist)
             if adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist), true)
                 changed = true
+                made_changes = true
             end
         end
     end
-    return complete(compact)
-end
-
-function type_lift_pass!(ir::IRCode)
-    lifted_undef = IdDict{Int, Any}()
-    insts = ir.stmts
-    for idx in 1:length(insts)
-        stmt = insts[idx][:inst]
-        stmt isa Expr || continue
-        if (stmt.head === :isdefined || stmt.head === :undefcheck)
-            # after optimization, undef can only show up by being introduced in
-            # a phi node (or an UpsilonNode() argument to a PhiC node), so lift
-            # all these nodes that have maybe undef values
-            val = stmt.args[(stmt.head === :isdefined) ? 1 : 2]
-            if stmt.head === :isdefined && (val isa Slot || val isa GlobalRef ||
-                    isexpr(val, :static_parameter) || val isa Argument || val isa Symbol)
-                # this is a legal node, so assume it was not introduced by
-                # slot2ssa (at worst, we might leave in a runtime check that
-                # shouldn't have been there)
-                continue
-            end
-            # otherwise, we definitely have a corrupt node from slot2ssa, and
-            # must fix or delete that now
-            processed = IdDict{Int, Union{SSAValue, Bool}}()
-            def = val
-            while true
-                # peek through PiNodes
-                isa(val, SSAValue) || break
-                def = insts[val.id][:inst]
-                isa(def, PiNode) || break
-                val = def.val
-            end
-            if !isa(val, SSAValue) || (!isa(def, PhiNode) && !isa(def, PhiCNode))
-                # in most cases, reaching this statement implies we had a value
-                if stmt.head === :undefcheck
-                    insts[idx][:inst] = nothing
-                else
-                    insts[idx][:inst] = true
-                end
-                continue
-            end
-            stmt_id = val.id
-            worklist = Tuple{Int, Int, SSAValue, Int}[(stmt_id, 0, SSAValue(0), 0)]
-            if !haskey(lifted_undef, stmt_id)
-                first = true
-                while !isempty(worklist)
-                    item, w_up_id, which, use = pop!(worklist)
-                    def = insts[item][:inst]
-                    if isa(def, PhiNode)
-                        edges = copy(def.edges)
-                        values = Vector{Any}(undef, length(edges))
-                        new_phi = if length(values) == 0
-                            false
-                        else
-                            insert_node!(ir, item, NewInstruction(PhiNode(edges, values), Bool))
-                        end
-                    else
-                        def = def::PhiCNode
-                        values = Vector{Any}(undef, length(def.values))
-                        new_phi = if length(values) == 0
-                            false
-                        else
-                            insert_node!(ir, item, NewInstruction(PhiCNode(values), Bool))
-                        end
-                    end
-                    processed[item] = new_phi
-                    if first
-                        lifted_undef[stmt_id] = new_phi
-                        first = false
-                    end
-                    local id::Int = 0
-                    for i = 1:length(values)
-                        if !isassigned(def.values, i)
-                            val = false
-                        elseif !isa(def.values[i], SSAValue)
-                            val = true
-                        else
-                            up_id = id = (def.values[i]::SSAValue).id
-                            @label restart
-                            if !isa(ir.stmts[id][:type], MaybeUndef)
-                                val = true
-                            else
-                                node = insts[id][:inst]
-                                if isa(node, UpsilonNode)
-                                    if !isdefined(node, :val)
-                                        val = false
-                                    elseif !isa(node.val, SSAValue)
-                                        val = true
-                                    else
-                                        id = (node.val::SSAValue).id
-                                        @goto restart
-                                    end
-                                else
-                                    while isa(node, PiNode)
-                                        id = (node.val::SSAValue).id
-                                        node = insts[id][:inst]
-                                    end
-                                    if isa(node, Union{PhiNode, PhiCNode})
-                                        if haskey(processed, id)
-                                            val = processed[id]
-                                        else
-                                            push!(worklist, (id, up_id, new_phi::SSAValue, i))
-                                            continue
-                                        end
-                                    else
-                                        val = true
-                                    end
-                                end
-                            end
-                        end
-                        if isa(def, PhiNode)
-                            values[i] = val
-                        else
-                            values[i] = insert_node!(ir, up_id, NewInstruction(UpsilonNode(val), Bool))
-                        end
-                    end
-                    if which !== SSAValue(0)
-                        phi = ir[which][:inst]
-                        if isa(phi, PhiNode)
-                            phi.values[use] = new_phi
-                        else
-                            phi = phi::PhiCNode
-                            phi.values[use] = insert_node!(ir, w_up_id, NewInstruction(UpsilonNode(new_phi), Bool))
-                        end
-                    end
-                end
-            end
-            inst = lifted_undef[stmt_id]
-            if stmt.head === :undefcheck
-                inst = Expr(:throw_undef_if_not, stmt.args[1], inst)
-            end
-            insts[idx][:inst] = inst
-        end
-    end
-    ir
+    return Pair{IRCode, Bool}(complete(compact), made_changes)
 end
 
 function is_bb_empty(ir::IRCode, bb::BasicBlock)
     isempty(bb.stmts) && return true
     if length(bb.stmts) == 1
-        stmt = ir[SSAValue(first(bb.stmts))][:inst]
+        stmt = ir[SSAValue(first(bb.stmts))][:stmt]
         return stmt === nothing || isa(stmt, GotoNode)
     end
     return false
@@ -1596,68 +2063,146 @@ end
 
 # TODO: This is terrible, we should change the IR for GotoIfNot to gain an else case
 function is_legal_bb_drop(ir::IRCode, bbidx::Int, bb::BasicBlock)
+    # For the time being, don't drop the first bb, because it has special predecessor semantics.
+    bbidx == 1 && return false
     # If the block we're going to is the same as the fallthrow, it's always legal to drop
     # the block.
     length(bb.stmts) == 0 && return true
     if length(bb.stmts) == 1
-        stmt = ir[SSAValue(first(bb.stmts))][:inst]
+        stmt = ir[SSAValue(first(bb.stmts))][:stmt]
         stmt === nothing && return true
         ((stmt::GotoNode).label == bbidx + 1) && return true
     end
-    # Otherwise make sure we're not the fallthrough case of any predecessor
-    for pred in bb.preds
-        if pred == bbidx - 1
-            terminator = ir[SSAValue(first(bb.stmts)-1)][:inst]
-            if isa(terminator, GotoIfNot)
-                if terminator.dest != bbidx
-                    return false
-                end
+    return true
+end
+
+function legalize_bb_drop_pred!(ir::IRCode, bb::BasicBlock, bbidx::Int, bbs::Vector{BasicBlock}, dropped_bbs::Vector{Int})
+    (bbidx-1) in bb.preds || return true
+    last_fallthrough = bbidx-1
+    dbi = length(dropped_bbs)
+    while dbi != 0 && dropped_bbs[dbi] == last_fallthrough && (last_fallthrough-1 in bbs[last_fallthrough].preds)
+        last_fallthrough -= 1
+        dbi -= 1
+    end
+    last_fallthrough_term_ssa = SSAValue(last(bbs[last_fallthrough].stmts))
+    terminator = ir[last_fallthrough_term_ssa][:stmt]
+    if isa(terminator, GotoIfNot)
+        if terminator.dest != bbidx
+            # The previous terminator's destination matches our fallthrough.
+            # If we're also a fallthrough terminator, then we just have
+            # to delete the GotoIfNot.
+            our_terminator = ir[SSAValue(last(bb.stmts))][:stmt]
+            if terminator.dest != (isa(our_terminator, GotoNode) ? our_terminator.label : bbidx + 1)
+                return false
             end
-            break
         end
+        ir[last_fallthrough_term_ssa] = nothing
+        kill_edge!(bbs, last_fallthrough, terminator.dest)
+    elseif isa(terminator, EnterNode)
+        return false
+    elseif isa(terminator, GotoNode)
+        return true
     end
+    # Hack, but effective. If we have a predecessor with a fall-through terminator, change the
+    # instruction numbering to merge the blocks now such that below processing will properly
+    # update it.
+    bbs[last_fallthrough] = BasicBlock(first(bbs[last_fallthrough].stmts):last(bb.stmts), bbs[last_fallthrough].preds, bbs[last_fallthrough].succs)
     return true
 end
 
+function follow_map(map::Vector{Int}, idx::Int)
+    while map[idx] ≠ 0
+        idx = map[idx]
+    end
+    return idx
+end
+
+function ascend_eliminated_preds(bbs::Vector{BasicBlock}, pred::Int)
+    pred == 0 && return pred
+    while pred != 1 && length(bbs[pred].preds) == 1 && length(bbs[pred].succs) == 1
+        pred = bbs[pred].preds[1]
+    end
+    return pred
+end
+
+# Compute (renamed) successors and predecessors given (renamed) block
+function compute_succs(merged_succ::Vector{Int}, bbs::Vector{BasicBlock}, result_bbs::Vector{Int}, bb_rename_succ::Vector{Int}, i::Int)
+    orig_bb = follow_map(merged_succ, result_bbs[i])
+    return Int[bb_rename_succ[i] for i in bbs[orig_bb].succs]
+end
+
+function compute_preds(bbs::Vector{BasicBlock}, result_bbs::Vector{Int}, bb_rename_pred::Vector{Int}, i::Int)
+    orig_bb = result_bbs[i]
+    preds = copy(bbs[orig_bb].preds)
+    res = Int[]
+    while !isempty(preds)
+        pred = popfirst!(preds)
+        if pred == 0
+            push!(res, 0)
+            continue
+        end
+        r = bb_rename_pred[pred]
+        (r == -2 || r == -1) && continue
+        if r == -3
+            prepend!(preds, bbs[pred].preds)
+        else
+            push!(res, r)
+        end
+    end
+    return res
+end
+
+function add_preds!(all_new_preds::Vector{Int32}, bbs::Vector{BasicBlock}, bb_rename_pred::Vector{Int}, old_edge::Int32)
+    preds = copy(bbs[old_edge].preds)
+    while !isempty(preds)
+        old_edge′ = popfirst!(preds)
+        if old_edge′ == 0
+            push!(all_new_preds, old_edge′)
+            continue
+        end
+        new_edge = bb_rename_pred[old_edge′]
+        if new_edge > 0 && new_edge ∉ all_new_preds
+            push!(all_new_preds, Int32(new_edge))
+        elseif new_edge == -3
+            prepend!(preds, bbs[old_edge′].preds)
+        end
+    end
+end
+
 function cfg_simplify!(ir::IRCode)
     bbs = ir.cfg.blocks
     merge_into = zeros(Int, length(bbs))
     merged_succ = zeros(Int, length(bbs))
     dropped_bbs = Vector{Int}() # sorted
-    function follow_merge_into(idx::Int)
-        while merge_into[idx] != 0
-            idx = merge_into[idx]
-        end
-        return idx
-    end
-    function follow_merged_succ(idx::Int)
-        while merged_succ[idx] != 0
-            idx = merged_succ[idx]
-        end
-        return idx
-    end
 
     # Walk the CFG from the entry block and aggressively combine blocks
     for (idx, bb) in enumerate(bbs)
         if length(bb.succs) == 1
             succ = bb.succs[1]
-            if length(bbs[succ].preds) == 1
+            if length(bbs[succ].preds) == 1 && succ != 1
+                # Can't merge blocks with :enter terminator even if they
+                # only have one successor.
+                if isa(ir[SSAValue(last(bb.stmts))][:stmt], EnterNode)
+                    continue
+                end
                 # Prevent cycles by making sure we don't end up back at `idx`
                 # by following what is to be merged into `succ`
-                if follow_merged_succ(succ) != idx
+                if follow_map(merged_succ, succ) != idx
                     merge_into[succ] = idx
                     merged_succ[idx] = succ
                 end
-            elseif is_bb_empty(ir, bb) && is_legal_bb_drop(ir, idx, bb)
+            elseif merge_into[idx] == 0 && is_bb_empty(ir, bb) && is_legal_bb_drop(ir, idx, bb)
                 # If this BB is empty, we can still merge it as long as none of our successor's phi nodes
                 # reference our predecessors.
                 found_interference = false
+                preds = Int[ascend_eliminated_preds(bbs, pred) for pred in bb.preds]
                 for idx in bbs[succ].stmts
-                    stmt = ir[SSAValue(idx)][:inst]
+                    stmt = ir[SSAValue(idx)][:stmt]
                     stmt === nothing && continue
                     isa(stmt, PhiNode) || break
                     for edge in stmt.edges
-                        for pred in bb.preds
+                        edge = ascend_eliminated_preds(bbs, Int(edge))
+                        for pred in preds
                             if pred == edge
                                 found_interference = true
                                 @goto done
@@ -1666,54 +2211,75 @@ function cfg_simplify!(ir::IRCode)
                     end
                 end
                 @label done
-                if !found_interference
-                    push!(dropped_bbs, idx)
-                end
+                found_interference && continue
+                legalize_bb_drop_pred!(ir, bb, idx, bbs, dropped_bbs) || continue
+                push!(dropped_bbs, idx)
             end
         end
     end
 
-    # Assign new BB numbers
+    # Assign new BB numbers in DFS order, dropping unreachable blocks
     max_bb_num = 1
     bb_rename_succ = zeros(Int, length(bbs))
-    for i = 1:length(bbs)
+    worklist = BitSetBoundedMinPrioritySet(length(bbs))
+    push!(worklist, 1)
+    while !isempty(worklist)
+        i = popfirst!(worklist)
         # Drop blocks that will be merged away
         if merge_into[i] != 0
-            bb_rename_succ[i] = -1
-        end
-        # Drop blocks with no predecessors
-        if i != 1 && length(ir.cfg.blocks[i].preds) == 0
-            bb_rename_succ[i] = -1
+            bb_rename_succ[i] = typemin(Int)
         end
         # Mark dropped blocks for fixup
         if !isempty(searchsorted(dropped_bbs, i))
-            bb_rename_succ[i] = -bbs[i].succs[1]
+            succ = bbs[i].succs[1]
+            push!(worklist, succ)
+            bb_rename_succ[i] = -succ
         end
 
-        bb_rename_succ[i] != 0 && continue
-
-        curr = i
-        while true
-            bb_rename_succ[curr] = max_bb_num
-            max_bb_num += 1
-            # Now walk the chain of blocks we merged.
-            # If we end in something that may fall through,
-            # we have to schedule that block next
-            curr = follow_merged_succ(curr)
-            terminator = ir.stmts[ir.cfg.blocks[curr].stmts[end]][:inst]
-            if isa(terminator, GotoNode) || isa(terminator, ReturnNode)
-                break
+        if bb_rename_succ[i] == 0
+            curr = i
+            while true
+                @assert bb_rename_succ[curr] == 0
+                bb_rename_succ[curr] = max_bb_num
+                max_bb_num += 1
+                # Now walk the chain of blocks we merged.
+                # If we end in something that may fall through,
+                # we have to schedule that block next
+                while merged_succ[curr] != 0
+                    if bb_rename_succ[curr] == 0
+                        bb_rename_succ[curr] = typemin(Int)
+                    end
+                    curr = merged_succ[curr]
+                end
+                terminator = ir[SSAValue(ir.cfg.blocks[curr].stmts[end])][:stmt]
+                if isa(terminator, GotoNode) || isa(terminator, ReturnNode)
+                    break
+                elseif isa(terminator, GotoIfNot)
+                    if bb_rename_succ[terminator.dest] == 0
+                        push!(worklist, terminator.dest)
+                    end
+                elseif isa(terminator, EnterNode)
+                    catchbb = terminator.catch_dest
+                    if bb_rename_succ[catchbb] == 0
+                        push!(worklist, catchbb)
+                    end
+                end
+                ncurr = curr + 1
+                while !isempty(searchsorted(dropped_bbs, ncurr))
+                    bb_rename_succ[ncurr] = -bbs[ncurr].succs[1]
+                    ncurr += 1
+                end
+                curr = ncurr
             end
-            curr += 1
-            if !isempty(searchsorted(dropped_bbs, curr))
-                break
+
+            for succ in bbs[curr].succs
+                if bb_rename_succ[succ] == 0
+                    push!(worklist, succ)
+                end
             end
         end
     end
 
-    # Compute map from new to old blocks
-    result_bbs = Int[findfirst(j->i==j, bb_rename_succ) for i = 1:max_bb_num-1]
-
     # Fixup dropped BBs
     resolved_all = false
     while !resolved_all
@@ -1721,9 +2287,9 @@ function cfg_simplify!(ir::IRCode)
         resolved_all = true
         for bb in dropped_bbs
             obb = bb_rename_succ[bb]
-            if obb < -1
+            if obb < 0 && obb != typemin(Int)
                 nsucc = bb_rename_succ[-obb]
-                if nsucc == -1
+                if nsucc == typemin(Int)
                     nsucc = -merge_into[-obb]
                 end
                 bb_rename_succ[bb] = nsucc
@@ -1732,8 +2298,26 @@ function cfg_simplify!(ir::IRCode)
         end
     end
 
-    # Figure out how predecessors should be renamed
+    # Drop remaining unvisited bbs
     bb_rename_pred = zeros(Int, length(bbs))
+    for i = 1:length(bbs)
+        if bb_rename_succ[i] == 0
+            bb_rename_succ[i] = -1
+            bb_rename_pred[i] = -2
+        elseif bb_rename_succ[i] == typemin(Int)
+            bb_rename_succ[i] = -1
+        end
+    end
+
+    # Compute map from new to old blocks
+    result_bbs = zeros(Int, max_bb_num-1)
+    for (o, bb) in enumerate(bb_rename_succ)
+        bb > 0 || continue
+        isempty(searchsorted(dropped_bbs, o)) || continue
+        result_bbs[bb] = o
+    end
+
+    # Figure out how predecessors should be renamed
     for i = 1:length(bbs)
         if merged_succ[i] != 0
             # Block `i` should no longer be a predecessor (before renaming)
@@ -1742,11 +2326,32 @@ function cfg_simplify!(ir::IRCode)
             continue
         end
         pred = i
+        is_unreachable = false
+        is_multi = false
         while pred !== 1 && !isempty(searchsorted(dropped_bbs, pred))
-            pred = bbs[pred].preds[1]
+            preds = bbs[pred].preds
+            if length(preds) == 0
+                is_unreachable = true
+                break
+            elseif length(preds) > 1
+                # This block has multiple predecessors - the only way this is
+                # legal is if we proved above that our successors don't have
+                # any phi nodes that would interfere with the renaming. Mark
+                # this specially.
+                is_multi = true
+                break
+            end
+            @assert length(preds) == 1
+            pred = preds[1]
+        end
+        if is_unreachable
+            @assert bb_rename_pred[i] == -2
+        elseif is_multi
+            bb_rename_pred[i] = -3
+        else
+            bbnum = follow_map(merge_into, pred)
+            bb_rename_pred[i] = bb_rename_succ[bbnum]
         end
-        bbnum = follow_merge_into(pred)
-        bb_rename_pred[i] = bb_rename_succ[bbnum]
     end
 
     # Compute new block lengths
@@ -1766,40 +2371,23 @@ function cfg_simplify!(ir::IRCode)
         bb_starts[i+1] = bb_starts[i] + result_bbs_lengths[i]
     end
 
-    cresult_bbs = let result_bbs = result_bbs,
-                      merged_succ = merged_succ,
-                      merge_into = merge_into,
-                      bbs = bbs,
-                      bb_rename_succ = bb_rename_succ
-
-        # Compute (renamed) successors and predecessors given (renamed) block
-        function compute_succs(i)
-            orig_bb = follow_merged_succ(result_bbs[i])
-            return Int[bb_rename_succ[i] for i in bbs[orig_bb].succs]
-        end
-        function compute_preds(i)
-            orig_bb = result_bbs[i]
-            preds = bbs[orig_bb].preds
-            return Int[bb_rename_pred[pred] for pred in preds]
-        end
-
-        BasicBlock[
-            BasicBlock(StmtRange(bb_starts[i],
-                                 i+1 > length(bb_starts) ?
-                                    length(compact.result) : bb_starts[i+1]-1),
-                       compute_preds(i),
-                       compute_succs(i))
-            for i = 1:length(result_bbs)]
-    end
+    cresult_bbs = BasicBlock[
+        BasicBlock(StmtRange(bb_starts[i],
+                             i+1 > length(bb_starts) ? length(compact.result) : bb_starts[i+1]-1),
+                   compute_preds(bbs, result_bbs, bb_rename_pred, i),
+                   compute_succs(merged_succ, bbs, result_bbs, bb_rename_succ, i))
+        for i = 1:length(result_bbs)]
 
     # Fixup terminators for any blocks that would have caused double edges
     for (bbidx, (new_bb, old_bb)) in enumerate(zip(cresult_bbs, result_bbs))
         @assert length(new_bb.succs) <= 2
         length(new_bb.succs) <= 1 && continue
         if new_bb.succs[1] == new_bb.succs[2]
-            terminator = ir[SSAValue(last(bbs[old_bb].stmts))]
-            @assert isa(terminator[:inst], GotoIfNot)
-            terminator[:inst] = GotoNode(terminator[:inst].dest)
+            old_bb2 = findfirst(x::Int->x==bbidx, bb_rename_pred)::Int
+            terminator = ir[SSAValue(last(bbs[old_bb2].stmts))]
+            @assert terminator[:stmt] isa GotoIfNot
+            # N.B.: The dest will be renamed in process_node! below
+            terminator[:stmt] = GotoNode(terminator[:stmt].dest::Int)
             pop!(new_bb.succs)
             new_succ = cresult_bbs[new_bb.succs[1]]
             for (i, nsp) in enumerate(new_succ.preds)
@@ -1811,34 +2399,100 @@ function cfg_simplify!(ir::IRCode)
         end
     end
 
-    compact = IncrementalCompact(ir, true)
     # Run instruction compaction to produce the result,
     # but we're messing with the CFG
     # so we don't want compaction to do so independently
-    compact.fold_constant_branches = false
-    compact.bb_rename_succ = bb_rename_succ
-    compact.bb_rename_pred = bb_rename_pred
-    compact.result_bbs = cresult_bbs
+    compact = IncrementalCompact(ir, CFGTransformState(true, false, cresult_bbs, bb_rename_pred, bb_rename_succ, nothing))
     result_idx = 1
     for (idx, orig_bb) in enumerate(result_bbs)
         ms = orig_bb
+        bb_start = true
         while ms != 0
-            for i in bbs[ms].stmts
+            old_bb_stmts = bbs[ms].stmts
+            for i in old_bb_stmts
                 node = ir.stmts[i]
                 compact.result[compact.result_idx] = node
-                if isa(node[:inst], GotoNode) && merged_succ[ms] != 0
+                stmt = node[:stmt]
+                if isa(stmt, GotoNode) && merged_succ[ms] != 0
                     # If we merged a basic block, we need remove the trailing GotoNode (if any)
-                    compact.result[compact.result_idx][:inst] = nothing
+                    compact.result[compact.result_idx][:stmt] = nothing
+                elseif isa(stmt, PhiNode)
+                    phi = stmt
+                    values = phi.values
+                    (; ssa_rename, late_fixup, used_ssas, new_new_used_ssas) = compact
+                    ssa_rename[i] = SSAValue(compact.result_idx)
+                    already_inserted = function (i::Int, val::OldSSAValue)
+                        if val.id in old_bb_stmts
+                            return val.id <= i
+                        end
+                        return bb_rename_pred[phi.edges[i]] < idx
+                    end
+                    renamed_values = process_phinode_values(values, late_fixup, already_inserted, compact.result_idx, ssa_rename, used_ssas, new_new_used_ssas, true, nothing)
+                    edges = Int32[]
+                    values = Any[]
+                    sizehint!(edges, length(phi.edges)); sizehint!(values, length(renamed_values))
+                    for old_index in 1:length(phi.edges)
+                        old_edge = phi.edges[old_index]
+                        new_edge = bb_rename_pred[old_edge]
+                        if new_edge > 0
+                            push!(edges, new_edge)
+                            if isassigned(renamed_values, old_index)
+                                push!(values, renamed_values[old_index])
+                            else
+                                resize!(values, length(values)+1)
+                            end
+                        elseif new_edge == -1
+                            @assert length(phi.edges) == 1
+                            if isassigned(renamed_values, old_index)
+                                push!(edges, -1)
+                                push!(values, renamed_values[old_index])
+                            end
+                        elseif new_edge == -3
+                            # Multiple predecessors, we need to expand out this phi
+                            all_new_preds = Int32[]
+                            add_preds!(all_new_preds, bbs, bb_rename_pred, old_edge)
+                            append!(edges, all_new_preds)
+                            if isassigned(renamed_values, old_index)
+                                val = renamed_values[old_index]
+                                for _ in 1:length(all_new_preds)
+                                    push!(values, val)
+                                end
+                                length(all_new_preds) == 0 && kill_current_use!(compact, val)
+                                for _ in 2:length(all_new_preds)
+                                    count_added_node!(compact, val)
+                                end
+                            else
+                                resize!(values, length(values)+length(all_new_preds))
+                            end
+                        else
+                            isassigned(renamed_values, old_index) && kill_current_use!(compact, renamed_values[old_index])
+                        end
+                    end
+                    if length(edges) == 0 || (length(edges) == 1 && !isassigned(values, 1))
+                        compact.result[compact.result_idx][:stmt] = nothing
+                    elseif length(edges) == 1 && !bb_start
+                        compact.result[compact.result_idx][:stmt] = values[1]
+                    else
+                        @assert bb_start
+                        compact.result[compact.result_idx][:stmt] = PhiNode(edges, values)
+                    end
                 else
-                    process_node!(compact, compact.result_idx, node, i, i, ms, true)
+                    ri = process_node!(compact, compact.result_idx, node, i, i, ms, true)
+                    if ri == compact.result_idx
+                        # process_node! wanted this statement dropped. We don't do this,
+                        # but we still need to erase the node
+                        compact.result[compact.result_idx][:stmt] = nothing
+                    end
                 end
                 # We always increase the result index to ensure a predicatable
                 # placement of the resulting nodes.
                 compact.result_idx += 1
             end
             ms = merged_succ[ms]
+            bb_start = false
         end
     end
+    compact.idx = length(ir.stmts)
     compact.active_result_bb = length(bb_starts)
     return finish(compact)
 end
diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl
index 51abf2a228de3..3936a82a6560e 100644
--- a/base/compiler/ssair/show.jl
+++ b/base/compiler/ssair/show.jl
@@ -1,5 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# This file is not loaded into `Core.Compiler` but rather loaded into the context of
+# `Base.IRShow` and thus does not participate in bootstrapping.
+
 @nospecialize
 
 if Pair != Base.Pair
@@ -13,10 +16,18 @@ import Base: show_unquoted
 using Base: printstyled, with_output_color, prec_decl, @invoke
 
 function Base.show(io::IO, cfg::CFG)
+    print(io, "CFG with $(length(cfg.blocks)) blocks:")
     for (idx, block) in enumerate(cfg.blocks)
-        print(io, idx, "\t=>\t")
-        join(io, block.succs, ", ")
-        println(io)
+        print(io, "\n  bb ", idx)
+        if block.stmts.start == block.stmts.stop
+            print(io, " (stmt ", block.stmts.start, ")")
+        else
+            print(io, " (stmts ", block.stmts.start, ":", block.stmts.stop, ")")
+        end
+        if !isempty(block.succs)
+            print(io, " → bb ")
+            join(io, block.succs, ", ")
+        end
     end
 end
 
@@ -37,7 +48,7 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
         print(io, ", ")
         print(io, stmt.typ)
         print(io, ")")
-    elseif isexpr(stmt, :invoke)
+    elseif isexpr(stmt, :invoke) && length(stmt.args) >= 2 && isa(stmt.args[1], MethodInstance)
         stmt = stmt::Expr
         # TODO: why is this here, and not in Base.show_unquoted
         print(io, "invoke ")
@@ -56,18 +67,18 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
         join(io, (print_arg(i) for i = 3:length(stmt.args)), ", ")
         print(io, ")")
     # given control flow information, we prefer to print these with the basic block #, instead of the ssa %
-    elseif isexpr(stmt, :enter) && length((stmt::Expr).args) == 1 && (stmt::Expr).args[1] isa Int
-        print(io, "\$(Expr(:enter, #", (stmt::Expr).args[1]::Int, "))")
+    elseif isa(stmt, EnterNode)
+        print(io, "enter #", stmt.catch_dest, "")
+        if isdefined(stmt, :scope)
+            print(io, " with scope ")
+            show_unquoted(io, stmt.scope, indent)
+        end
     elseif stmt isa GotoNode
         print(io, "goto #", stmt.label)
     elseif stmt isa PhiNode
         show_unquoted_phinode(io, stmt, indent, "#")
     elseif stmt isa GotoIfNot
         show_unquoted_gotoifnot(io, stmt, indent, "#")
-    elseif stmt isa TypedSlot
-        # call `show` with the type set to Any so it will not be shown, since
-        # we will show the type ourselves.
-        show_unquoted(io, SlotNumber(stmt.id), indent, show_type ? prec_decl : 0)
     # everything else in the IR, defer to the generic AST printer
     else
         show_unquoted(io, stmt, indent, show_type ? prec_decl : 0)
@@ -147,22 +158,30 @@ end
 
 function should_print_ssa_type(@nospecialize node)
     if isa(node, Expr)
-        return !(node.head in (:gc_preserve_begin, :gc_preserve_end, :meta, :enter, :leave))
+        return !(node.head in (:gc_preserve_begin, :gc_preserve_end, :meta, :leave))
     end
     return !isa(node, PiNode)   && !isa(node, GotoIfNot) &&
            !isa(node, GotoNode) && !isa(node, ReturnNode) &&
-           !isa(node, QuoteNode)
+           !isa(node, QuoteNode) && !isa(node, EnterNode)
 end
 
-function default_expr_type_printer(io::IO, @nospecialize(typ), used::Bool)
-    printstyled(io, "::", typ, color=(used ? :cyan : :light_black))
-    nothing
+function default_expr_type_printer(io::IO; @nospecialize(type), used::Bool, show_type::Bool=true, _...)
+    show_type || return nothing
+    printstyled(io, "::", type, color=(used ? :cyan : :light_black))
+    return nothing
 end
 
-normalize_method_name(m::Method) = m.name
-normalize_method_name(m::MethodInstance) = (m.def::Method).name
-normalize_method_name(m::Symbol) = m
-normalize_method_name(m) = Symbol("")
+function normalize_method_name(m)
+    if m isa Method
+        return m.name
+    elseif m isa MethodInstance
+        return (m.def::Method).name
+    elseif m isa Symbol
+        return m
+    else
+        return Symbol("")
+    end
+end
 @noinline method_name(m::LineInfoNode) = normalize_method_name(m.method)
 
 # converts the linetable for line numbers
@@ -235,7 +254,7 @@ We get:
   └──      return %3                      │
 ```
 
-Even though we were in the `f` scope since the first statement, it tooks us two statements
+Even though we were in the `f` scope since the first statement, it took us two statements
 to catch up and print the intermediate scopes. Which scope is printed is indicated both
 by the indentation of the method name and by an increased thickness of the appropriate
 line for the scope.
@@ -329,8 +348,7 @@ function compute_ir_line_annotations(code::IRCode)
     return (loc_annotations, loc_methods, loc_lineno)
 end
 
-Base.show(io::IO, code::IRCode) = show_ir(io, code)
-
+Base.show(io::IO, code::Union{IRCode, IncrementalCompact}) = show_ir(io, code)
 
 lineinfo_disabled(io::IO, linestart::String, idx::Int) = ""
 
@@ -495,14 +513,18 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
     return emit_lineinfo_update
 end
 
-# line_info_preprinter(io::IO, indent::String, idx::Int) may print relevant info
-#   at the beginning of the line, and should at least print `indent`. It returns a
-#   string that will be printed after the final basic-block annotation.
-# line_info_postprinter(io::IO, typ, used::Bool) prints the type-annotation at the end
-#   of the statement
-# should_print_stmt(idx::Int) -> Bool: whether the statement at index `idx` should be
-#   printed as part of the IR or not
-# bb_color: color used for printing the basic block brackets on the left
+"""
+    IRShowConfig
+
+- `line_info_preprinter(io::IO, indent::String, idx::Int)`` may print relevant info
+  at the beginning of the line, and should at least print `indent`. It returns a
+  string that will be printed after the final basic-block annotation.
+- `line_info_postprinter(io::IO; type, used::Bool, show_type::Bool, idx::Int)` prints
+  relevant information like type-annotation at the end of the statement
+- `should_print_stmt(idx::Int) -> Bool`: whether the statement at index `idx` should be
+  printed as part of the IR or not
+- `bb_color`: color used for printing the basic block brackets on the left
+"""
 struct IRShowConfig
     line_info_preprinter
     line_info_postprinter
@@ -520,7 +542,11 @@ end
 
 function _stmt(code::IRCode, idx::Int)
     stmts = code.stmts
-    return isassigned(stmts.inst, idx) ? stmts[idx][:inst] : UNDEF
+    return isassigned(stmts.stmt, idx) ? stmts[idx][:stmt] : UNDEF
+end
+function _stmt(compact::IncrementalCompact, idx::Int)
+    stmts = compact.result
+    return isassigned(stmts.stmt, idx) ? stmts[idx][:stmt] : UNDEF
 end
 function _stmt(code::CodeInfo, idx::Int)
     code = code.code
@@ -531,6 +557,10 @@ function _type(code::IRCode, idx::Int)
     stmts = code.stmts
     return isassigned(stmts.type, idx) ? stmts[idx][:type] : UNDEF
 end
+function _type(compact::IncrementalCompact, idx::Int)
+    stmts = compact.result
+    return isassigned(stmts.type, idx) ? stmts[idx][:type] : UNDEF
+end
 function _type(code::CodeInfo, idx::Int)
     types = code.ssavaluetypes
     types isa Vector{Any} || return nothing
@@ -539,10 +569,8 @@ end
 
 function statement_indices_to_labels(stmt, cfg::CFG)
     # convert statement index to labels, as expected by print_stmt
-    if stmt isa Expr
-        if stmt.head === :enter && length(stmt.args) == 1 && stmt.args[1] isa Int
-            stmt = Expr(:enter, block_for_inst(cfg, stmt.args[1]::Int))
-        end
+    if stmt isa EnterNode
+        stmt = EnterNode(stmt, stmt.catch_dest == 0 ? 0 : block_for_inst(cfg, stmt.catch_dest))
     elseif isa(stmt, GotoIfNot)
         stmt = GotoIfNot(stmt.cond, block_for_inst(cfg, stmt.dest))
     elseif stmt isa GotoNode
@@ -556,17 +584,18 @@ end
 
 # Show a single statement, code.stmts[idx]/code.code[idx], in the context of the whole IRCode/CodeInfo.
 # Returns the updated value of bb_idx.
-# pop_new_node!(idx::Int) -> (node_idx, new_node_inst, new_node_type) may return a new
-#   node at the current index `idx`, which is printed before the statement at index
-#   `idx`. This function is repeatedly called until it returns `nothing`
-function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, config::IRShowConfig,
-                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing))
+# pop_new_node!(idx::Int; attach_after=false) -> (node_idx, new_node_inst, new_node_type)
+#   may return a new node at the current index `idx`, which is printed before the statement
+#   at index `idx`. This function is repeatedly called until it returns `nothing`.
+#   to iterate nodes that are to be inserted after the statement, set `attach_after=true`.
+function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact}, idx::Int, config::IRShowConfig,
+                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), only_after::Bool=false)
     return show_ir_stmt(io, code, idx, config.line_info_preprinter, config.line_info_postprinter,
-                        used, cfg, bb_idx; pop_new_node!, config.bb_color)
+                        used, cfg, bb_idx; pop_new_node!, only_after, config.bb_color)
 end
 
-function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, line_info_preprinter, line_info_postprinter,
-                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), bb_color=:light_black)
+function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo, IncrementalCompact}, idx::Int, line_info_preprinter, line_info_postprinter,
+                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), only_after::Bool=false, bb_color=:light_black)
     stmt = _stmt(code, idx)
     type = _type(code, idx)
     max_bb_idx_size = length(string(length(cfg.blocks)))
@@ -586,8 +615,7 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, line_info
     end
 
     i = 1
-    while true
-        next = pop_new_node!(idx)
+    function print_indentation(final::Bool=true)
         # Compute BB guard rail
         if bb_idx > length(cfg.blocks)
             # If invariants are violated, print a special leader
@@ -596,7 +624,6 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, line_info
             printstyled(io, "!!! ", "─"^max_bb_idx_size, color=bb_color)
         else
             bbrange = cfg.blocks[bb_idx].stmts
-            bbrange = bbrange.start:bbrange.stop
             # Print line info update
             linestart = idx == first(bbrange) ? "  " : sprint(io -> printstyled(io, "│ ", color=bb_color), context=io)
             linestart *= " "^max_bb_idx_size
@@ -609,24 +636,20 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, line_info
                 bb_pad = max_bb_idx_size - length(bb_idx_str)
                 bb_type = length(cfg.blocks[bb_idx].preds) <= 1 ? "─" : "┄"
                 printstyled(io, bb_idx_str, " ", bb_type, "─"^bb_pad, color=bb_color)
-            elseif next === nothing && idx == last(bbrange) # print separator
+            elseif final && idx == last(bbrange) # print separator
                 printstyled(io, "└", "─"^(1 + max_bb_idx_size), color=bb_color)
             else
                 printstyled(io, "│ ", " "^max_bb_idx_size, color=bb_color)
             end
         end
         print(io, inlining_indent, " ")
+    end
 
-        if next === nothing
-            if bb_idx <= length(cfg.blocks) && idx == last(bbrange)
-                bb_idx += 1
-            end
-            break
-        end
-
-        # print new nodes first in the right position
-        node_idx, new_node_inst, new_node_type = next
+    # first, print new nodes that are to be inserted before the current statement
+    function print_new_node(node; final::Bool=true)
+        print_indentation(final)
 
+        node_idx, new_node_inst, new_node_type = node
         @assert new_node_inst !== UNDEF # we filtered these out earlier
         show_type = should_print_ssa_type(new_node_inst)
         let maxlength_idx=maxlength_idx, show_type=show_type
@@ -637,52 +660,110 @@ function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, line_info
 
         if new_node_type === UNDEF # try to be robust against errors
             printstyled(io, "::#UNDEF", color=:red)
-        elseif show_type
-            line_info_postprinter(IOContext(io, :idx => node_idx), new_node_type, node_idx in used)
+        else
+            line_info_postprinter(io; type = new_node_type, used = node_idx in used, show_type, idx = node_idx)
         end
         println(io)
+    end
+    while (next = pop_new_node!(idx)) !== nothing
+        only_after || print_new_node(next; final=false)
         i += 1
     end
-    if code isa CodeInfo
-        stmt = statement_indices_to_labels(stmt, cfg)
+
+    # peek at the nodes to be inserted after the current statement
+    # (to determine of the statement itself is the final one)
+    next = pop_new_node!(idx; attach_after=true)
+
+    # then, print the current statement
+    # FIXME: `only_after` is hack so that we can call this function to print uncompacted
+    #        attach-after nodes when the current node has already been compated already
+    if !only_after
+        print_indentation(next===nothing)
+        if code isa CodeInfo
+            stmt = statement_indices_to_labels(stmt, cfg)
+        end
+        show_type = type !== nothing && should_print_ssa_type(stmt)
+        print_stmt(io, idx, stmt, used, maxlength_idx, true, show_type)
+        if type !== nothing # ignore types for pre-inference code
+            if type === UNDEF
+                # This is an error, but can happen if passes don't update their type information
+                printstyled(io, "::#UNDEF", color=:red)
+            else
+                line_info_postprinter(io; type, used = idx in used, show_type, idx)
+            end
+        end
+        println(io)
+    end
+    i += 1
+
+    # finally, print new nodes that are to be inserted after the current statement
+    while next !== nothing
+        print_new_node(next)
+        i += 1
+        next = pop_new_node!(idx; attach_after=true)
     end
-    show_type = type !== nothing && should_print_ssa_type(stmt)
-    print_stmt(io, idx, stmt, used, maxlength_idx, true, show_type)
-    if type !== nothing # ignore types for pre-inference code
-        if type === UNDEF
-            # This is an error, but can happen if passes don't update their type information
-            printstyled(io, "::#UNDEF", color=:red)
-        elseif show_type
-            line_info_postprinter(IOContext(io, :idx => idx), type, idx in used)
+
+    # increment the basic block counter
+    if bb_idx <= length(cfg.blocks)
+        bbrange = cfg.blocks[bb_idx].stmts
+        if bb_idx <= length(cfg.blocks) && idx == last(bbrange)
+            bb_idx += 1
         end
     end
-    println(io)
+
     return bb_idx
 end
 
-function ircode_new_nodes_iter(code::IRCode)
-    stmts = code.stmts
-    new_nodes = code.new_nodes.stmts
-    new_nodes_info = code.new_nodes.info
-    new_nodes_perm = filter(i -> isassigned(new_nodes.inst, i), 1:length(new_nodes))
+function _new_nodes_iter(stmts, new_nodes, new_nodes_info, new_nodes_idx)
+    new_nodes_perm = filter(i -> isassigned(new_nodes.stmt, i), 1:length(new_nodes))
     sort!(new_nodes_perm, by = x -> (x = new_nodes_info[x]; (x.pos, x.attach_after)))
-    perm_idx = Ref(1)
 
-    function (idx::Int)
-        perm_idx[] <= length(new_nodes_perm) || return nothing
-        node_idx = new_nodes_perm[perm_idx[]]
-        if new_nodes_info[node_idx].pos != idx
+    # separate iterators for the nodes that are inserted before resp. after each statement
+    before_iter = Ref(1)
+    after_iter = Ref(1)
+
+    return function get_new_node(idx::Int; attach_after=false)
+        iter = attach_after ? after_iter : before_iter
+        iter[] <= length(new_nodes_perm) || return nothing
+        node_idx = new_nodes_perm[iter[]]
+
+        # skip nodes
+        while node_idx < new_nodes_idx ||                           # already compacted
+              idx > new_nodes_info[node_idx].pos ||                 # not interested in
+              new_nodes_info[node_idx].attach_after != attach_after
+            iter[] += 1
+            iter[] > length(new_nodes_perm) && return nothing
+            node_idx = new_nodes_perm[iter[]]
+        end
+
+        if new_nodes_info[node_idx].pos != idx ||
+           new_nodes_info[node_idx].attach_after != attach_after
             return nothing
         end
-        perm_idx[] += 1
+
+        iter[] += 1
         new_node = new_nodes[node_idx]
-        new_node_inst = isassigned(new_nodes.inst, node_idx) ? new_node[:inst] : UNDEF
+        new_node_inst = isassigned(new_nodes.stmt, node_idx) ? new_node[:stmt] : UNDEF
         new_node_type = isassigned(new_nodes.type, node_idx) ? new_node[:type] : UNDEF
         node_idx += length(stmts)
         return node_idx, new_node_inst, new_node_type
     end
 end
 
+function new_nodes_iter(ir::IRCode, new_nodes_idx=1)
+    stmts = ir.stmts
+    new_nodes = ir.new_nodes.stmts
+    new_nodes_info = ir.new_nodes.info
+    return _new_nodes_iter(stmts, new_nodes, new_nodes_info, new_nodes_idx)
+end
+
+function new_nodes_iter(compact::IncrementalCompact)
+    stmts = compact.result
+    new_nodes = compact.new_new_nodes.stmts
+    new_nodes_info = compact.new_new_nodes.info
+    return _new_nodes_iter(stmts, new_nodes, new_nodes_info, 1)
+end
+
 # print only line numbers on the left, some of the method names and nesting depth on the right
 function inline_linfo_printer(code::IRCode)
     loc_annotations, loc_methods, loc_lineno = compute_ir_line_annotations(code)
@@ -710,7 +791,7 @@ function inline_linfo_printer(code::IRCode)
         end
         # Print location information right aligned. If the line below is too long, it'll overwrite this,
         # but that's what we want.
-        if get(io, :color, false)
+        if get(io, :color, false)::Bool
             method_start_column = cols - max_method_width - max_loc_width - 2
             filler = " "^(max_loc_width-length(annotation))
             printstyled(io, "\e[$(method_start_column)G$(annotation)$(filler)$(loc_method)\e[1G", color = :light_black)
@@ -720,7 +801,7 @@ function inline_linfo_printer(code::IRCode)
     end
 end
 
-_strip_color(s::String) = replace(s, r"\e\[\d+m" => "")
+_strip_color(s::String) = replace(s, r"\e\[\d+m"a => "")
 
 function statementidx_lineinfo_printer(f, code::IRCode)
     printer = f(code.linetable)
@@ -737,15 +818,15 @@ end
 statementidx_lineinfo_printer(code) = statementidx_lineinfo_printer(DILineInfoPrinter, code)
 
 function stmts_used(io::IO, code::IRCode, warn_unset_entry=true)
-    stmts = code.stmts
+    insts = code.stmts
     used = BitSet()
-    for stmt in stmts
-        scan_ssa_use!(push!, used, stmt[:inst])
+    for inst in insts
+        scan_ssa_use!(push!, used, inst[:stmt])
     end
     new_nodes = code.new_nodes.stmts
     for nn in 1:length(new_nodes)
-        if isassigned(new_nodes.inst, nn)
-            scan_ssa_use!(push!, used, new_nodes[nn][:inst])
+        if isassigned(new_nodes.stmt, nn)
+            scan_ssa_use!(push!, used, new_nodes[nn][:stmt])
         elseif warn_unset_entry
             printstyled(io, "ERROR: New node array has unset entry\n", color=:red)
             warn_unset_entry = false
@@ -770,55 +851,176 @@ function default_config(code::IRCode; verbose_linetable=false)
 end
 default_config(code::CodeInfo) = IRShowConfig(statementidx_lineinfo_printer(code))
 
-function show_ir(io::IO, code::Union{IRCode, CodeInfo}, config::IRShowConfig=default_config(code);
-                 pop_new_node! = code isa IRCode ? ircode_new_nodes_iter(code) : Returns(nothing))
-    stmts = code isa IRCode ? code.stmts : code.code
-    used = stmts_used(io, code)
-    cfg = code isa IRCode ? code.cfg : compute_basic_blocks(stmts)
-    bb_idx = 1
-
-    for idx in 1:length(stmts)
-        if config.should_print_stmt(code, idx, used)
-            bb_idx = show_ir_stmt(io, code, idx, config, used, cfg, bb_idx; pop_new_node!)
+function show_ir_stmts(io::IO, ir::Union{IRCode, CodeInfo, IncrementalCompact}, inds, config::IRShowConfig,
+                       used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing))
+    for idx in inds
+        if config.should_print_stmt(ir, idx, used)
+            bb_idx = show_ir_stmt(io, ir, idx, config, used, cfg, bb_idx; pop_new_node!)
         elseif bb_idx <= length(cfg.blocks) && idx == cfg.blocks[bb_idx].stmts.stop
             bb_idx += 1
         end
     end
+    return bb_idx
+end
 
+function finish_show_ir(io::IO, cfg::CFG, config::IRShowConfig)
     max_bb_idx_size = length(string(length(cfg.blocks)))
     config.line_info_preprinter(io, " "^(max_bb_idx_size + 2), 0)
-    nothing
+    return nothing
+end
+
+function show_ir(io::IO, ir::IRCode, config::IRShowConfig=default_config(ir);
+                 pop_new_node! = new_nodes_iter(ir))
+    used = stmts_used(io, ir)
+    cfg = ir.cfg
+    maxssaid = length(ir.stmts) + Core.Compiler.length(ir.new_nodes)
+    let io = IOContext(io, :maxssaid=>maxssaid)
+        show_ir_stmts(io, ir, 1:length(ir.stmts), config, used, cfg, 1; pop_new_node!)
+    end
+    finish_show_ir(io, cfg, config)
+end
+
+function show_ir(io::IO, ci::CodeInfo, config::IRShowConfig=default_config(ci);
+                 pop_new_node! = Returns(nothing))
+    used = stmts_used(io, ci)
+    cfg = compute_basic_blocks(ci.code)
+    let io = IOContext(io, :maxssaid=>length(ci.code))
+        show_ir_stmts(io, ci, 1:length(ci.code), config, used, cfg, 1; pop_new_node!)
+    end
+    finish_show_ir(io, cfg, config)
+end
+
+function show_ir(io::IO, compact::IncrementalCompact, config::IRShowConfig=default_config(compact.ir))
+    cfg = compact.ir.cfg
+
+
+    # First print everything that has already been compacted
+
+    # merge uses in uncompacted region into compacted uses
+    used_compacted = BitSet(i for (i, x) in pairs(compact.used_ssas) if x != 0)
+    used_uncompacted = stmts_used(io, compact.ir)
+    for (i, ssa) = enumerate(compact.ssa_rename)
+        if isa(ssa, SSAValue) && ssa.id in used_uncompacted
+            push!(used_compacted, i)
+        end
+    end
+
+    # while compacting, the end of the active result bb will not have been determined
+    # (this is done post-hoc by `finish_current_bb!`), so determine it here from scratch.
+    result_bbs = copy(compact.cfg_transform.result_bbs)
+    if compact.active_result_bb <= length(result_bbs)
+        # count the total number of nodes we'll add to this block
+        input_bb_idx = block_for_inst(compact.ir.cfg, compact.idx)
+        input_bb = compact.ir.cfg.blocks[input_bb_idx]
+        count = 0
+        for input_idx in input_bb.stmts.start:input_bb.stmts.stop
+            pop_new_node! = new_nodes_iter(compact.ir)
+            while pop_new_node!(input_idx) !== nothing
+                count += 1
+            end
+            while pop_new_node!(input_idx; attach_after=true) !== nothing
+                count += 1
+            end
+        end
+
+        still_to_be_inserted = (last(input_bb.stmts) - compact.idx) + count
+
+        result_bb = result_bbs[compact.active_result_bb]
+        result_bbs[compact.active_result_bb] = Core.Compiler.BasicBlock(result_bb,
+            Core.Compiler.StmtRange(first(result_bb.stmts), compact.result_idx+still_to_be_inserted))
+    end
+    compact_cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)])
+
+    pop_new_node! = new_nodes_iter(compact)
+    maxssaid = length(compact.result) + Core.Compiler.length(compact.new_new_nodes)
+    bb_idx = let io = IOContext(io, :maxssaid=>maxssaid)
+        show_ir_stmts(io, compact, 1:compact.result_idx-1, config, used_compacted,
+                      compact_cfg, 1; pop_new_node!)
+    end
+
+
+    # Print uncompacted nodes from the original IR
+
+    # print a separator
+    (_, width) = displaysize(io)
+    stmts = compact.ir.stmts
+    indent = length(string(length(stmts)))
+    # config.line_info_preprinter(io, "", compact.idx)
+    printstyled(io, "─"^(width-indent-1), '\n', color=:red)
+
+    # while compacting, the start of the active uncompacted bb will have been overwritten.
+    # this manifests as a stmt range end that is less than the start, so correct that.
+    inputs_bbs = copy(cfg.blocks)
+    for (i, bb) in enumerate(inputs_bbs)
+        if bb.stmts.stop < bb.stmts.start
+            inputs_bbs[i] = Core.Compiler.BasicBlock(bb,
+                Core.Compiler.StmtRange(last(bb.stmts), last(bb.stmts)))
+            # this is not entirely correct, and will result in the bb starting again,
+            # but is the best we can do without changing how `finish_current_bb!` works.
+        end
+    end
+    uncompacted_cfg = CFG(inputs_bbs, Int[first(inputs_bbs[i].stmts) for i in 2:length(inputs_bbs)])
+
+    pop_new_node! = new_nodes_iter(compact.ir, compact.new_nodes_idx)
+    maxssaid = length(compact.ir.stmts) + Core.Compiler.length(compact.ir.new_nodes)
+    let io = IOContext(io, :maxssaid=>maxssaid)
+        # first show any new nodes to be attached after the last compacted statement
+        if compact.idx > 1
+            show_ir_stmt(io, compact.ir, compact.idx-1, config, used_uncompacted,
+                        uncompacted_cfg, bb_idx; pop_new_node!, only_after=true)
+        end
+
+        # then show the actual uncompacted IR
+        show_ir_stmts(io, compact.ir, compact.idx:length(stmts), config, used_uncompacted,
+                      uncompacted_cfg, bb_idx; pop_new_node!)
+    end
+
+    finish_show_ir(io, uncompacted_cfg, config)
 end
 
-tristate_letter(t::TriState) = t === ALWAYS_TRUE ? '+' : t === ALWAYS_FALSE ? '!' : '?'
-tristate_color(t::TriState) = t === ALWAYS_TRUE ? :green : t === ALWAYS_FALSE ? :red : :yellow
-tristate_repr(t::TriState) =
-    t === ALWAYS_TRUE ? "ALWAYS_TRUE" :
-    t === ALWAYS_FALSE ? "ALWAYS_FALSE" :
-    t === TRISTATE_UNKNOWN ? "TRISTATE_UNKNOWN" : nothing
+function effectbits_letter(effects::Effects, name::Symbol, suffix::Char)
+    ft = fieldtype(Effects, name)
+    if ft === UInt8
+        prefix = getfield(effects, name) === ALWAYS_TRUE ? '+' :
+                 getfield(effects, name) === ALWAYS_FALSE ? '!' : '?'
+    elseif ft === Bool
+        prefix = getfield(effects, name) ? '+' : '!'
+    else
+        error("unsupported effectbits type given")
+    end
+    return string(prefix, suffix)
+end
+
+function effectbits_color(effects::Effects, name::Symbol)
+    ft = fieldtype(Effects, name)
+    if ft === UInt8
+        color = getfield(effects, name) === ALWAYS_TRUE ? :green :
+                getfield(effects, name) === ALWAYS_FALSE ? :red : :yellow
+    elseif ft === Bool
+        color = getfield(effects, name) ? :green : :red
+    else
+        error("unsupported effectbits type given")
+    end
+    return color
+end
 
-function Base.show(io::IO, e::Core.Compiler.Effects)
+function Base.show(io::IO, e::Effects)
     print(io, "(")
-    printstyled(io, string(tristate_letter(e.consistent), 'c'); color=tristate_color(e.consistent))
+    printstyled(io, effectbits_letter(e, :consistent,  'c'); color=effectbits_color(e, :consistent))
+    print(io, ',')
+    printstyled(io, effectbits_letter(e, :effect_free, 'e'); color=effectbits_color(e, :effect_free))
+    print(io, ',')
+    printstyled(io, effectbits_letter(e, :nothrow,     'n'); color=effectbits_color(e, :nothrow))
     print(io, ',')
-    printstyled(io, string(tristate_letter(e.effect_free), 'e'); color=tristate_color(e.effect_free))
+    printstyled(io, effectbits_letter(e, :terminates,  't'); color=effectbits_color(e, :terminates))
     print(io, ',')
-    printstyled(io, string(tristate_letter(e.nothrow), 'n'); color=tristate_color(e.nothrow))
+    printstyled(io, effectbits_letter(e, :notaskstate, 's'); color=effectbits_color(e, :notaskstate))
     print(io, ',')
-    printstyled(io, string(tristate_letter(e.terminates), 't'); color=tristate_color(e.terminates))
+    printstyled(io, effectbits_letter(e, :inaccessiblememonly, 'm'); color=effectbits_color(e, :inaccessiblememonly))
     print(io, ',')
-    printstyled(io, string(tristate_letter(e.notaskstate), 's'); color=tristate_color(e.notaskstate))
+    printstyled(io, effectbits_letter(e, :noub, 'u'); color=effectbits_color(e, :noub))
     print(io, ')')
     e.nonoverlayed || printstyled(io, '′'; color=:red)
 end
 
-function Base.show(io::IO, t::TriState)
-    s = tristate_repr(t)
-    if s !== nothing
-        printstyled(io, s; color = tristate_color(t))
-    else # unknown state, redirect to the fallback printing
-        @invoke show(io::IO, t::Any)
-    end
-end
-
 @specialize
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index 7d534e5bd647a..c2a769acbe9e9 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -15,24 +15,24 @@ function scan_entry!(result::Vector{SlotInfo}, idx::Int, @nospecialize(stmt))
         push!(result[slot_id(stmt.slot)].defs, idx)
         return
     elseif isexpr(stmt, :(=))
-        if isa(stmt.args[1], SlotNumber)
-            push!(result[slot_id(stmt.args[1])].defs, idx)
+        arg1 = stmt.args[1]
+        if isa(arg1, SlotNumber)
+            push!(result[slot_id(arg1)].defs, idx)
         end
         stmt = stmt.args[2]
     end
-    if isa(stmt, Union{SlotNumber, TypedSlot})
+    if isa(stmt, SlotNumber)
         push!(result[slot_id(stmt)].uses, idx)
         return
     end
     for op in userefs(stmt)
         val = op[]
-        if isa(val, Union{SlotNumber, TypedSlot})
+        if isa(val, SlotNumber)
             push!(result[slot_id(val)].uses, idx)
         end
     end
 end
 
-
 function scan_slot_def_use(nargs::Int, ci::CodeInfo, code::Vector{Any})
     nslots = length(ci.slotflags)
     result = SlotInfo[SlotInfo() for i = 1:nslots]
@@ -62,13 +62,12 @@ function renumber_ssa!(@nospecialize(stmt), ssanums::Vector{SSAValue}, new_ssa::
     return ssamap(val->renumber_ssa(val, ssanums, new_ssa), stmt)
 end
 
-function make_ssa!(ci::CodeInfo, code::Vector{Any}, idx, slot, @nospecialize(typ))
-    (idx == 0) && return Argument(slot)
+function make_ssa!(ci::CodeInfo, code::Vector{Any}, idx::Int, @nospecialize(typ))
     stmt = code[idx]
     @assert isexpr(stmt, :(=))
     code[idx] = stmt.args[2]
     (ci.ssavaluetypes::Vector{Any})[idx] = typ
-    idx
+    return SSAValue(idx)
 end
 
 function new_to_regular(@nospecialize(stmt), new_offset::Int)
@@ -82,81 +81,71 @@ function new_to_regular(@nospecialize(stmt), new_offset::Int)
     return urs[]
 end
 
-function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, @nospecialize(stmt::Union{SlotNumber, TypedSlot}), @nospecialize(ssa))
+function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, @nospecialize(ssa), @nospecialize(def_ssa))
     # We don't really have the information here to get rid of these.
     # We'll do so later
     if ssa === UNDEF_TOKEN
         insert_node!(ir, idx, NewInstruction(
             Expr(:throw_undef_if_not, ci.slotnames[slot], false), Any))
         return UNDEF_TOKEN
-    end
-    if !isa(ssa, Argument) && !(ssa === nothing) && ((ci.slotflags[slot] & SLOT_USEDUNDEF) != 0)
-        # insert a temporary node. type_lift_pass! will remove it
+    elseif def_ssa !== true
         insert_node!(ir, idx, NewInstruction(
-            Expr(:undefcheck, ci.slotnames[slot], ssa), Any))
-    end
-    if isa(stmt, SlotNumber)
-        return ssa
-    elseif isa(stmt, TypedSlot)
-        return NewSSAValue(insert_node!(ir, idx, NewInstruction(PiNode(ssa, stmt.typ), stmt.typ)).id - length(ir.stmts))
+            Expr(:throw_undef_if_not, ci.slotnames[slot], def_ssa), Any))
     end
-    @assert false # unreachable
+    return ssa
 end
 
-function fixemup!(cond, rename, ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt))
-    if isa(stmt, Union{SlotNumber, TypedSlot}) && cond(stmt)
-        return fixup_slot!(ir, ci, idx, slot_id(stmt), stmt, rename(stmt))
+function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt))
+    if isa(stmt, SlotNumber) && slot_filter(stmt)
+        return fixup_slot!(ir, ci, idx, slot_id(stmt), rename_slot(stmt)...)
     end
     if isexpr(stmt, :(=))
-        stmt.args[2] = fixemup!(cond, rename, ir, ci, idx, stmt.args[2])
+        stmt.args[2] = fixemup!(slot_filter, rename_slot, ir, ci, idx, stmt.args[2])
         return stmt
     end
     if isa(stmt, PhiNode)
         for i = 1:length(stmt.edges)
             isassigned(stmt.values, i) || continue
             val = stmt.values[i]
-            isa(val, Union{SlotNumber, TypedSlot}) || continue
-            cond(val) || continue
+            isa(val, SlotNumber) || continue
+            slot_filter(val) || continue
             bb_idx = block_for_inst(ir.cfg, Int(stmt.edges[i]))
             from_bb_terminator = last(ir.cfg.blocks[bb_idx].stmts)
-            stmt.values[i] = fixup_slot!(ir, ci, from_bb_terminator, slot_id(val), val, rename(val))
+            stmt.values[i] = fixup_slot!(ir, ci, from_bb_terminator, slot_id(val), rename_slot(val)...)
         end
         return stmt
     end
     if isexpr(stmt, :isdefined)
         val = stmt.args[1]
-        if isa(val, Union{SlotNumber, TypedSlot})
-            slot = slot_id(val)
-            if (ci.slotflags[slot] & SLOT_USEDUNDEF) == 0
-                return true
-            else
-                ssa = rename(val)
-                if ssa === UNDEF_TOKEN
-                    return false
-                elseif !isa(ssa, SSAValue) && !isa(ssa, NewSSAValue)
-                    return true
-                end
-            end
-            # temporarily corrupt the isdefined node. type_lift_pass! will fix it
-            stmt.args[1] = ssa
+        if isa(val, SlotNumber)
+            ssa, undef_ssa = rename_slot(val)
+            return undef_ssa
         end
         return stmt
     end
     urs = userefs(stmt)
     for op in urs
         val = op[]
-        if isa(val, Union{SlotNumber, TypedSlot}) && cond(val)
-            x = fixup_slot!(ir, ci, idx, slot_id(val), val, rename(val))
+        if isa(val, SlotNumber) && slot_filter(val)
+            x = fixup_slot!(ir, ci, idx, slot_id(val), rename_slot(val)...)
             # We inserted an undef error node. Delete subsequent statement
             # to avoid confusing the optimizer
             if x === UNDEF_TOKEN
                 return nothing
             end
             op[] = x
-        elseif isa(val, GlobalRef) && !(isdefined(val.mod, val.name) && isconst(val.mod, val.name)) ||
-               (isa(val, Expr) && val.head === :static_parameter)
-            op[] = NewSSAValue(insert_node!(ir, idx,
-                NewInstruction(val, typ_for_val(val, ci, ir.sptypes, idx, Any[]))).id - length(ir.stmts))
+        elseif isa(val, GlobalRef) && !(isdefined(val.mod, val.name) && isconst(val.mod, val.name))
+            typ = typ_for_val(val, ci, ir, idx, Any[])
+            new_inst = NewInstruction(val, typ)
+            op[] = NewSSAValue(insert_node!(ir, idx, new_inst).id - length(ir.stmts))
+        elseif isexpr(val, :static_parameter)
+            ty = typ_for_val(val, ci, ir, idx, Any[])
+            if isa(ty, Const)
+                inst = NewInstruction(quoted(ty.val), ty)
+            else
+                inst = NewInstruction(val, ty)
+            end
+            op[] = NewSSAValue(insert_node!(ir, idx, inst).id - length(ir.stmts))
         end
     end
     return urs[]
@@ -164,15 +153,15 @@ end
 
 function fixup_uses!(ir::IRCode, ci::CodeInfo, code::Vector{Any}, uses::Vector{Int}, slot::Int, @nospecialize(ssa))
     for use in uses
-        code[use] = fixemup!(stmt->slot_id(stmt)==slot, stmt->ssa, ir, ci, use, code[use])
+        code[use] = fixemup!(x::SlotNumber->slot_id(x)==slot, stmt::SlotNumber->(ssa, true), ir, ci, use, code[use])
     end
 end
 
-function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), renames::Vector{Any})
-    return fixemup!(stmt->true, stmt->renames[slot_id(stmt)], ir, ci, idx, stmt)
+function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), renames::Vector{Pair{Any, Any}})
+    return fixemup!(stmt::SlotNumber->true, stmt::SlotNumber->renames[slot_id(stmt)], ir, ci, idx, stmt)
 end
 
-function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any})
+function strip_trailing_junk!(ci::CodeInfo, cfg::CFG, code::Vector{Any}, info::Vector{CallInfo})
     # Remove `nothing`s at the end, we don't handle them well
     # (we expect the last instruction to be a terminator)
     ssavaluetypes = ci.ssavaluetypes::Vector{Any}
@@ -194,40 +183,42 @@ function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any}
         push!(code, ReturnNode())
         push!(ssavaluetypes, Union{})
         push!(codelocs, 0)
-        push!(info, nothing)
-        push!(ssaflags, IR_FLAG_NULL)
+        push!(info, NoCallInfo())
+        push!(ssaflags, IR_FLAG_NOTHROW)
+
+        # Update CFG to include appended terminator
+        old_range = cfg.blocks[end].stmts
+        new_range = StmtRange(first(old_range), last(old_range) + 1)
+        cfg.blocks[end] = BasicBlock(cfg.blocks[end], new_range)
+        (length(cfg.index) == length(cfg.blocks)) && (cfg.index[end] += 1)
     end
     nothing
 end
 
-struct DelayedTyp
-    phi::NewSSAValue
-end
-
 # maybe use expr_type?
-function typ_for_val(@nospecialize(x), ci::CodeInfo, sptypes::Vector{Any}, idx::Int, slottypes::Vector{Any})
+function typ_for_val(@nospecialize(x), ci::CodeInfo, ir::IRCode, idx::Int, slottypes::Vector{Any})
     if isa(x, Expr)
         if x.head === :static_parameter
-            return sptypes[x.args[1]::Int]
+            return ir.sptypes[x.args[1]::Int].typ
         elseif x.head === :boundscheck
             return Bool
         elseif x.head === :copyast
-            return typ_for_val(x.args[1], ci, sptypes, idx, slottypes)
+            return typ_for_val(x.args[1], ci, ir, idx, slottypes)
         end
         return (ci.ssavaluetypes::Vector{Any})[idx]
     end
-    isa(x, GlobalRef) && return abstract_eval_global(x.mod, x.name)
+    isa(x, GlobalRef) && return abstract_eval_globalref_type(x)
     isa(x, SSAValue) && return (ci.ssavaluetypes::Vector{Any})[x.id]
     isa(x, Argument) && return slottypes[x.n]
-    isa(x, NewSSAValue) && return DelayedTyp(x)
+    isa(x, NewSSAValue) && return types(ir)[new_to_regular(x, length(ir.stmts))]
     isa(x, QuoteNode) && return Const(x.value)
-    isa(x, Union{Symbol, PiNode, PhiNode, SlotNumber, TypedSlot}) && error("unexpected val type")
+    isa(x, Union{Symbol, PiNode, PhiNode, SlotNumber}) && error("unexpected val type")
     return Const(x)
 end
 
 struct BlockLiveness
     def_bbs::Vector{Int}
-    live_in_bbs::Vector{Int}
+    live_in_bbs::Union{Vector{Int}, Nothing}
 end
 
 """
@@ -273,24 +264,25 @@ needs to make sure that we always visit `B` before `A`.
          DOI: <https://doi.org/10.1145/199448.199464>.
 """
 function iterated_dominance_frontier(cfg::CFG, liveness::BlockLiveness, domtree::DomTree)
-    # This should be a priority queue, but TODO - sorted array for now
     defs = liveness.def_bbs
-    pq = Tuple{Int, Int}[(defs[i], domtree.nodes[defs[i]].level) for i in 1:length(defs)]
-    sort!(pq, by=x->x[2])
+    heap = Tuple{Int, Int}[(defs[i], domtree.nodes[defs[i]].level) for i in 1:length(defs)]
+    heap_order = By(x -> -x[2])
+    heapify!(heap, heap_order)
     phiblocks = Int[]
     # This bitset makes sure we only add a phi node to a given block once.
     processed = BitSet()
     # This bitset implements the `key insight` mentioned above. In particular, it prevents
     # us from visiting a subtree that we have already visited before.
     visited = BitSet()
-    while !isempty(pq)
+    while !isempty(heap)
         # We pop from the end of the array - i.e. the element with the highest level.
-        node, level = pop!(pq)
+        node, level = heappop!(heap, heap_order)
         worklist = Int[]
         push!(worklist, node)
         while !isempty(worklist)
             active = pop!(worklist)
-            for succ in cfg.blocks[active].succs
+            succs = cfg.blocks[active].succs
+            for succ in succs
                 # Check whether the current root (`node`) dominates succ.
                 # We are guaranteed that `node` dominates `active`, since
                 # we've arrived at `active` by following dominator tree edges.
@@ -305,7 +297,7 @@ function iterated_dominance_frontier(cfg::CFG, liveness::BlockLiveness, domtree:
                 # unless liveness said otherwise.
                 succ in processed && continue
                 push!(processed, succ)
-                if !(succ in liveness.live_in_bbs)
+                if liveness.live_in_bbs !== nothing && !(succ in liveness.live_in_bbs)
                     continue
                 end
                 push!(phiblocks, succ)
@@ -315,8 +307,7 @@ function iterated_dominance_frontier(cfg::CFG, liveness::BlockLiveness, domtree:
                 # because succ_level <= level, which is the greatest level we have currently
                 # processed. Thus, we have not yet processed any subtrees of level < succ_level.
                 if !(succ in defs)
-                    push!(pq, (succ, succ_level))
-                    sort!(pq, by=x->x[2])
+                    heappush!(heap, (succ, succ_level), heap_order)
                 end
             end
             # Recurse down the current subtree
@@ -330,8 +321,10 @@ function iterated_dominance_frontier(cfg::CFG, liveness::BlockLiveness, domtree:
     phiblocks
 end
 
-function rename_incoming_edge(old_edge, old_to, result_order, bb_rename)
+function rename_incoming_edge(old_edge::Int, old_to::Int, result_order::Vector{Int}, bb_rename::Vector{Int})
+    old_edge == 0 && return 0
     new_edge_from = bb_rename[old_edge]
+    new_edge_from < 0 && return new_edge_from
     if old_edge == old_to - 1
         # Could have been a crit edge break
         if new_edge_from < length(result_order) && result_order[new_edge_from + 1] == 0
@@ -341,7 +334,7 @@ function rename_incoming_edge(old_edge, old_to, result_order, bb_rename)
     new_edge_from
 end
 
-function rename_outgoing_edge(old_to, old_from, result_order, bb_rename)
+function rename_outgoing_edge(old_to::Int, old_from::Int, result_order::Vector{Int}, bb_rename::Vector{Int})
     new_edge_to = bb_rename[old_to]
     if old_from == old_to - 1
         # Could have been a crit edge break
@@ -352,12 +345,12 @@ function rename_outgoing_edge(old_to, old_from, result_order, bb_rename)
     new_edge_to
 end
 
-function rename_phinode_edges(node, bb, result_order, bb_rename)
+function rename_phinode_edges(node::PhiNode, bb::Int, result_order::Vector{Int}, bb_rename::Vector{Int})
     new_values = Any[]
     new_edges = Int32[]
     for (idx, edge) in pairs(node.edges)
         edge = Int(edge)
-        (edge == 0 || haskey(bb_rename, edge)) || continue
+        (edge == 0 || bb_rename[edge] != -1) || continue
         new_edge_from = edge == 0 ? 0 : rename_incoming_edge(edge, bb, result_order, bb_rename)
         push!(new_edges, new_edge_from)
         if isassigned(node.values, idx)
@@ -380,47 +373,42 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
     # First compute the new order of basic blocks
     result_order = Int[]
     stack = Int[]
+    bb_rename = fill(-1, length(ir.cfg.blocks))
     node = 1
     ncritbreaks = 0
     nnewfallthroughs = 0
     while node !== -1
         push!(result_order, node)
+        bb_rename[node] = length(result_order)
         cs = domtree.nodes[node].children
-        terminator = ir.stmts[last(ir.cfg.blocks[node].stmts)][:inst]
-        iscondbr = isa(terminator, GotoIfNot)
-        let old_node = node + 1
-            if length(cs) >= 1
-                # Adding the nodes in reverse sorted order attempts to retain
-                # the original source order of the nodes as much as possible.
-                # This is not required for correctness, but is easier on the humans
-                if old_node in cs
-                    # Schedule the fall through node first,
-                    # so we can retain the fall through
-                    append!(stack, reverse(sort(filter(x -> (x != old_node), cs))))
-                    node = node + 1
-                else
-                    append!(stack, reverse(sort(cs)))
-                    node = pop!(stack)
-                end
+        terminator = ir[SSAValue(last(ir.cfg.blocks[node].stmts))][:stmt]
+        next_node = node + 1
+        node = -1
+        # Adding the nodes in reverse sorted order attempts to retain
+        # the original source order of the nodes as much as possible.
+        # This is not required for correctness, but is easier on the humans
+        for child in Iterators.Reverse(cs)
+            if child == next_node
+                # Schedule the fall through node first,
+                # so we can retain the fall through
+                node = next_node
             else
-                if isempty(stack)
-                    node = -1
-                else
-                    node = pop!(stack)
-                end
+                push!(stack, child)
             end
-            if node != old_node && !isa(terminator, Union{GotoNode, ReturnNode})
-                if isa(terminator, GotoIfNot)
-                    # Need to break the critical edge
-                    ncritbreaks += 1
-                    push!(result_order, 0)
-                else
-                    nnewfallthroughs += 1
-                end
+        end
+        if node == -1 && !isempty(stack)
+            node = pop!(stack)
+        end
+        if node != next_node && !isa(terminator, Union{GotoNode, ReturnNode})
+            if isa(terminator, GotoIfNot)
+                # Need to break the critical edge
+                ncritbreaks += 1
+                push!(result_order, 0)
+            else
+                nnewfallthroughs += 1
             end
         end
     end
-    bb_rename = IdDict{Int,Int}(i=>x for (x, i) in pairs(result_order) if i !== 0)
     new_bbs = Vector{BasicBlock}(undef, length(result_order))
     nstmts = 0
     for i in result_order
@@ -441,10 +429,10 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
     for (new_bb, bb) in pairs(result_order)
         if bb == 0
             nidx = bb_start_off + 1
-            inst = result[nidx][:inst]
-            @assert isa(inst, GotoNode)
+            stmt = result[nidx][:stmt]
+            @assert isa(stmt, GotoNode)
             # N.B.: The .label has already been renamed when it was created.
-            new_bbs[new_bb] = BasicBlock(nidx:nidx, [new_bb - 1], [inst.label])
+            new_bbs[new_bb] = BasicBlock(nidx:nidx, [new_bb - 1], [stmt.label])
             bb_start_off += 1
             continue
         end
@@ -452,22 +440,22 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
         inst_range = (bb_start_off+1):(bb_start_off+length(old_inst_range))
         for (nidx, idx) in zip(inst_range, old_inst_range)
             inst_rename[idx] = SSAValue(nidx)
-            @assert !isassigned(result.inst, nidx)
+            @assert !isassigned(result.stmt, nidx)
             node = result[nidx]
             node[] = ir.stmts[idx]
-            inst = node[:inst]
-            if isa(inst, PhiNode)
-                node[:inst] = rename_phinode_edges(inst, bb, result_order, bb_rename)
+            stmt = node[:stmt]
+            if isa(stmt, PhiNode)
+                node[:stmt] = rename_phinode_edges(stmt, bb, result_order, bb_rename)
             end
         end
         # Now fix up the terminator
-        terminator = result[inst_range[end]][:inst]
+        terminator = result[inst_range[end]][:stmt]
         if isa(terminator, GotoNode)
             # Convert to implicit fall through
             if bb_rename[terminator.label] == new_bb + 1
-                result[inst_range[end]][:inst] = nothing
+                result[inst_range[end]][:stmt] = nothing
             else
-                result[inst_range[end]][:inst] = GotoNode(bb_rename[terminator.label])
+                result[inst_range[end]][:stmt] = GotoNode(bb_rename[terminator.label])
             end
         elseif isa(terminator, GotoIfNot)
             # Check if we need to break the critical edge
@@ -476,33 +464,31 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
                 # Add an explicit goto node in the next basic block (we accounted for this above)
                 nidx = inst_range[end] + 1
                 node = result[nidx]
-                node[:inst], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, 0
+                node[:stmt], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, 0
             end
-            result[inst_range[end]][:inst] = GotoIfNot(terminator.cond, bb_rename[terminator.dest])
+            result[inst_range[end]][:stmt] = GotoIfNot(terminator.cond, bb_rename[terminator.dest])
         elseif !isa(terminator, ReturnNode)
-            if isa(terminator, Expr)
-                if terminator.head == :enter
-                    terminator.args[1] = bb_rename[terminator.args[1]]
-                end
+            if isa(terminator, EnterNode)
+                result[inst_range[end]][:stmt] = EnterNode(terminator, terminator.catch_dest == 0 ? 0 : bb_rename[terminator.catch_dest])
             end
             if bb_rename[bb + 1] != new_bb + 1
                 # Add an explicit goto node
                 nidx = inst_range[end] + 1
                 node = result[nidx]
-                node[:inst], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, 0
+                node[:stmt], node[:type], node[:line] = GotoNode(bb_rename[bb + 1]), Any, 0
                 inst_range = first(inst_range):(last(inst_range) + 1)
             end
         end
         bb_start_off += length(inst_range)
         local new_preds, new_succs
         let bb = bb, bb_rename = bb_rename, result_order = result_order
-            new_preds = Int[rename_incoming_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].preds if haskey(bb_rename, i)]
-            new_succs = Int[rename_outgoing_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].succs if haskey(bb_rename, i)]
+            new_preds = Int[bb for bb in (rename_incoming_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].preds) if bb != -1]
+            new_succs = Int[             rename_outgoing_edge(i, bb, result_order, bb_rename) for i in ir.cfg.blocks[bb].succs]
         end
         new_bbs[new_bb] = BasicBlock(inst_range, new_preds, new_succs)
     end
     for i in 1:length(result)
-        result[i][:inst] = renumber_ssa!(result[i][:inst], inst_rename, true)
+        result[i][:stmt] = renumber_ssa!(result[i][:stmt], inst_rename, true)
     end
     cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]])
     new_new_nodes = NewNodeStream(length(ir.new_nodes))
@@ -512,11 +498,11 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
         new_new_nodes.info[i] = new_new_info
         new_node = new_new_nodes.stmts[i]
         new_node[] = ir.new_nodes.stmts[i]
-        new_node_inst = new_node[:inst]
+        new_node_inst = new_node[:stmt]
         if isa(new_node_inst, PhiNode)
             new_node_inst = rename_phinode_edges(new_node_inst, block_for_inst(ir.cfg, new_info.pos), result_order, bb_rename)
         end
-        new_node[:inst] = renumber_ssa!(new_node_inst, inst_rename, true)
+        new_node[:stmt] = renumber_ssa!(new_node_inst, inst_rename, true)
     end
     new_ir = IRCode(ir, result, cfg, new_new_nodes)
     return new_ir
@@ -528,23 +514,23 @@ function compute_live_ins(cfg::CFG, defs::Vector{Int}, uses::Vector{Int})
     # We remove from `uses` any block where all uses are dominated
     # by a def. This prevents insertion of dead phi nodes at the top
     # of such a block if that block happens to be in a loop
-    ordered = Tuple{Int, Int, Bool}[(x, block_for_inst(cfg, x), true) for x in uses]
-    for x in defs
-        push!(ordered, (x, block_for_inst(cfg, x), false))
-    end
-    ordered = sort(ordered, by=x->x[1])
-    bb_defs = Int[]
-    bb_uses = Int[]
-    last_bb = last_def_bb = 0
-    for (_, bb, is_use) in ordered
-        if bb != last_bb && is_use
-            push!(bb_uses, bb)
-        end
-        last_bb = bb
-        if last_def_bb != bb && !is_use
-            push!(bb_defs, bb)
-            last_def_bb = bb
-        end
+    bb_defs = Int[] # blocks with a def
+    bb_uses = Int[] # blocks with a use that is not dominated by a def
+
+    # We do a sorted joint iteration over the instructions listed
+    # in defs and uses following a pattern similar to mergesort
+    last_block, block_has_def = 0, false
+    defs_i = uses_i = 1
+    while defs_i <= lastindex(defs) || uses_i <= lastindex(uses)
+        is_def = uses_i > lastindex(uses) || defs_i <= lastindex(defs) && defs[defs_i] < uses[uses_i]
+        block = block_for_inst(cfg, is_def ? defs[defs_i] : uses[uses_i])
+        defs_i += is_def
+        uses_i += !is_def
+        if last_block != block || is_def && !block_has_def
+            push!(is_def ? bb_defs : bb_uses, block)
+            block_has_def = is_def
+        end
+        last_block = block
     end
     # To obtain live ins from bb_uses, recursively add predecessors
     extra_liveins = BitSet()
@@ -562,58 +548,48 @@ function compute_live_ins(cfg::CFG, defs::Vector{Int}, uses::Vector{Int})
     BlockLiveness(bb_defs, bb_uses)
 end
 
-function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode, sptypes::Vector{Any}, slottypes::Vector{Any}, nstmts::Int)
-    new_typ = Union{}
-    for i = 1:length(node.values)
-        if isa(node, PhiNode) && !isassigned(node.values, i)
-            if !isa(new_typ, MaybeUndef)
-                new_typ = MaybeUndef(new_typ)
-            end
-            continue
-        end
-        typ = typ_for_val(node.values[i], ci, sptypes, -1, slottypes)
-        was_maybe_undef = false
-        if isa(typ, MaybeUndef)
-            typ = typ.typ
-            was_maybe_undef = true
-        end
-        @assert !isa(typ, MaybeUndef)
-        while isa(typ, DelayedTyp)
-            typ = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)]
-        end
-        new_typ = tmerge(new_typ, was_maybe_undef ? MaybeUndef(typ) : typ)
-    end
-    return new_typ
+struct TryCatchRegion
+    enter_block::Int
+    leave_block::Int
+end
+struct NewSlotPhi{Phi}
+    ssaval::NewSSAValue
+    node::Phi
+    undef_ssaval::Union{NewSSAValue, Bool}
+    undef_node::Union{Phi, Nothing}
 end
 
-function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
-                        defuses::Vector{SlotInfo}, slottypes::Vector{Any})
-    code = ir.stmts.inst
+const NewPhiNode2 = NewSlotPhi{PhiNode}
+
+struct NewPhiCNode2
+    slot::SlotNumber
+    insert::NewSlotPhi{PhiCNode}
+end
+
+function construct_ssa!(ci::CodeInfo, ir::IRCode, sv::OptimizationState,
+                        domtree::DomTree, defuses::Vector{SlotInfo},
+                        𝕃ₒ::AbstractLattice)
+    code = ir.stmts.stmt
     cfg = ir.cfg
-    catch_entry_blocks = Tuple{Int, Int}[]
+    catch_entry_blocks = TryCatchRegion[]
     for idx in 1:length(code)
         stmt = code[idx]
-        if isexpr(stmt, :enter)
-            push!(catch_entry_blocks, (block_for_inst(cfg, idx), block_for_inst(cfg, stmt.args[1]::Int)))
+        if isa(stmt, EnterNode)
+            push!(catch_entry_blocks, TryCatchRegion(
+                block_for_inst(cfg, idx),
+                block_for_inst(cfg, stmt.catch_dest)))
         end
     end
 
-    exc_handlers = IdDict{Int, Tuple{Int, Int}}()
-    # Record the correct exception handler for all cricitcal sections
-    for (enter_block, exc) in catch_entry_blocks
-        exc_handlers[enter_block+1] = (enter_block, exc)
-        # TODO: Cut off here if the terminator is a leave corresponding to this enter
-        for block in dominated(domtree, enter_block+1)
-            exc_handlers[block] = (enter_block, exc)
-        end
-    end
+    # Record the correct exception handler for all critical sections
+    handler_at, handlers = compute_trycatch(code, BitSet())
 
-    phi_slots = Vector{Int}[Vector{Int}() for _ = 1:length(ir.cfg.blocks)]
-    phi_nodes = Vector{Pair{NewSSAValue,PhiNode}}[Vector{Pair{NewSSAValue,PhiNode}}() for _ = 1:length(cfg.blocks)]
-    phi_ssas = SSAValue[]
-    phicnodes = IdDict{Int, Vector{Tuple{SlotNumber, NewSSAValue, PhiCNode}}}()
-    for (_, exc) in catch_entry_blocks
-        phicnodes[exc] = Vector{Tuple{SlotNumber, NewSSAValue, PhiCNode}}()
+    phi_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)]
+    live_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)]
+    new_phi_nodes = Vector{NewPhiNode2}[NewPhiNode2[] for _ = 1:length(cfg.blocks)]
+    new_phic_nodes = IdDict{Int, Vector{NewPhiCNode2}}()
+    for (; leave_block) in catch_entry_blocks
+        new_phic_nodes[leave_block] = NewPhiCNode2[]
     end
     @timeit "idf" for (idx, slot) in Iterators.enumerate(defuses)
         # No uses => no need for phi nodes
@@ -621,11 +597,11 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
         # TODO: Restore this optimization
         if false # length(slot.defs) == 1 && slot.any_newvar
             if slot.defs[] == 0
-                typ = slottypes[idx]
+                typ = sv.slottypes[idx]
                 ssaval = Argument(idx)
                 fixup_uses!(ir, ci, code, slot.uses, idx, ssaval)
             elseif isa(code[slot.defs[]], NewvarNode)
-                typ = MaybeUndef(Union{})
+                typ = Union{}
                 ssaval = nothing
                 for use in slot.uses[]
                     insert_node!(ir, use,
@@ -634,24 +610,43 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                 fixup_uses!(ir, ci, code, slot.uses, idx, nothing)
             else
                 val = code[slot.defs[]].args[2]
-                typ = typ_for_val(val, ci, ir.sptypes, slot.defs[], slottypes)
-                ssaval = SSAValue(make_ssa!(ci, code, slot.defs[], idx, typ))
+                typ = typ_for_val(val, ci, ir, slot.defs[], sv.slottypes)
+                ssaval = make_ssa!(ci, code, slot.defs[], typ)
                 fixup_uses!(ir, ci, code, slot.uses, idx, ssaval)
             end
             continue
         end
+
         @timeit "liveness" (live = compute_live_ins(cfg, slot))
         for li in live.live_in_bbs
-            cidx = findfirst(x->x[2] == li, catch_entry_blocks)
+            push!(live_slots[li], idx)
+            cidx = findfirst(x::TryCatchRegion->x.leave_block==li, catch_entry_blocks)
             if cidx !== nothing
                 # The slot is live-in into this block. We need to
                 # Create a PhiC node in the catch entry block and
                 # an upsilon node in the corresponding enter block
+                varstate = sv.bb_vartables[li]
+                if varstate === nothing
+                    continue
+                end
                 node = PhiCNode(Any[])
+                insertpoint = first_insert_for_bb(code, cfg, li)
+                vt = varstate[idx]
                 phic_ssa = NewSSAValue(
-                    insert_node!(ir, first_insert_for_bb(code, cfg, li),
-                        NewInstruction(node, Union{})).id - length(ir.stmts))
-                push!(phicnodes[li], (SlotNumber(idx), phic_ssa, node))
+                    insert_node!(ir, insertpoint,
+                        NewInstruction(node, vt.typ)).id - length(ir.stmts))
+                undef_node = undef_ssaval = nothing
+                if vt.typ === Union{}
+                    undef_ssaval = false
+                elseif !vt.undef
+                    undef_ssaval = true
+                else
+                    undef_node = PhiCNode(Any[])
+                    undef_ssaval = NewSSAValue(insert_node!(ir,
+                        insertpoint, NewInstruction(undef_node, Bool)).id - length(ir.stmts))
+                end
+                push!(new_phic_nodes[li], NewPhiCNode2(SlotNumber(idx),
+                    NewSlotPhi{PhiCNode}(phic_ssa, node, undef_ssaval, undef_node)))
                 # Inform IDF that we now have a def in the catch block
                 if !(li in live.def_bbs)
                     push!(live.def_bbs, li)
@@ -662,27 +657,42 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
         for block in phiblocks
             push!(phi_slots[block], idx)
             node = PhiNode()
-            ssa = NewSSAValue(insert_node!(ir,
-                first_insert_for_bb(code, cfg, block), NewInstruction(node, Union{})).id - length(ir.stmts))
-            push!(phi_nodes[block], ssa=>node)
+            varstate = sv.bb_vartables[block]
+            @assert varstate !== nothing
+            vt = varstate[idx]
+            ssaval = NewSSAValue(insert_node!(ir,
+                first_insert_for_bb(code, cfg, block), NewInstruction(node, vt.typ)).id - length(ir.stmts))
+            undef_node = undef_ssaval = nothing
+            if vt.typ === Union{}
+                undef_ssaval = false
+            elseif !vt.undef
+                undef_ssaval = true
+            else
+                undef_node = PhiNode()
+                undef_ssaval = NewSSAValue(insert_node!(ir,
+                    first_insert_for_bb(code, cfg, block), NewInstruction(undef_node, Bool)).id - length(ir.stmts))
+            end
+            push!(new_phi_nodes[block], NewPhiNode2(ssaval, node, undef_ssaval, undef_node))
         end
     end
     # Perform SSA renaming
-    initial_incoming_vals = Any[
+    initial_incoming_vals = Pair{Any, Any}[
         if 0 in defuses[x].defs
-            Argument(x)
+            Pair{Any, Any}(Argument(x), true)
         elseif !defuses[x].any_newvar
-            UNDEF_TOKEN
+            Pair{Any, Any}(UNDEF_TOKEN, false)
         else
-            SSAValue(-2)
+            Pair{Any, Any}(SSAValue(-2), false)
         end for x in 1:length(ci.slotflags)
     ]
-    worklist = Tuple{Int, Int, Vector{Any}}[(1, 0, initial_incoming_vals)]
+    worklist = Tuple{Int, Int, Vector{Pair{Any, Any}}}[(1, 0, initial_incoming_vals)]
     visited = BitSet()
-    type_refine_phi = BitSet()
     new_nodes = ir.new_nodes
     @timeit "SSA Rename" while !isempty(worklist)
         (item::Int, pred, incoming_vals) = pop!(worklist)
+        if sv.bb_vartables[item] === nothing
+            continue
+        end
         # Rename existing phi nodes first, because their uses occur on the edge
         # TODO: This isn't necessary if inlining stops replacing arguments by slots.
         for idx in cfg.blocks[item].stmts
@@ -701,8 +711,8 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
         end
         # Insert phi nodes if necessary
         for (idx, slot) in Iterators.enumerate(phi_slots[item])
-            ssaval, node = phi_nodes[item][idx]
-            incoming_val = incoming_vals[slot]
+            (; ssaval, node, undef_ssaval, undef_node) = new_phi_nodes[item][idx]
+            (incoming_val, incoming_def) = incoming_vals[slot]
             if incoming_val === SSAValue(-1)
                 # Optimistically omit this path.
                 # Liveness analysis would probably have prevented us from inserting this phi node
@@ -714,39 +724,58 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             else
                 push!(node.values, incoming_val)
             end
-            # TODO: Remove the next line, it shouldn't be necessary
-            push!(type_refine_phi, ssaval.id)
-            if isa(incoming_val, NewSSAValue)
-                push!(type_refine_phi, ssaval.id)
-            end
-            typ = incoming_val === UNDEF_TOKEN ? MaybeUndef(Union{}) : typ_for_val(incoming_val, ci, ir.sptypes, -1, slottypes)
-            old_entry = new_nodes.stmts[ssaval.id]
-            if isa(typ, DelayedTyp)
-                push!(type_refine_phi, ssaval.id)
+            if undef_node !== nothing
+                push!(undef_node.edges, pred)
+                push!(undef_node.values, incoming_def)
             end
-            new_typ = isa(typ, DelayedTyp) ? Union{} : tmerge(old_entry[:type], typ)
-            old_entry[:type] = new_typ
-            old_entry[:inst] = node
-            incoming_vals[slot] = ssaval
+
+            incoming_vals[slot] = Pair{Any, Any}(ssaval, undef_ssaval)
         end
         (item in visited) && continue
         # Record phi_C nodes if necessary
-        if haskey(phicnodes, item)
-            for (slot, ssa, _) in phicnodes[item]
-                incoming_vals[slot_id(slot)] = ssa
+        if haskey(new_phic_nodes, item)
+            for (; slot, insert) in new_phic_nodes[item]
+                (; ssaval, undef_ssaval) = insert
+                incoming_vals[slot_id(slot)] = Pair{Any, Any}(ssaval, undef_ssaval)
+            end
+        end
+        # Record Pi nodes if necessary
+        has_pinode = fill(false, length(sv.slottypes))
+        for slot in live_slots[item]
+            (ival, idef) = incoming_vals[slot]
+            (ival === SSAValue(-1)) && continue
+            (ival === SSAValue(-2)) && continue
+            (ival === UNDEF_TOKEN) && continue
+
+            varstate = sv.bb_vartables[item]
+            @assert varstate !== nothing
+            typ = varstate[slot].typ
+            if !⊑(𝕃ₒ, sv.slottypes[slot], typ)
+                node = PiNode(ival, typ)
+                ival = NewSSAValue(insert_node!(ir,
+                    first_insert_for_bb(code, cfg, item), NewInstruction(node, typ)).id - length(ir.stmts))
+                incoming_vals[slot] = Pair{Any, Any}(ival, idef)
+                has_pinode[slot] = true
             end
         end
         # Record initial upsilon nodes if necessary
-        eidx = findfirst(x->x[1] == item, catch_entry_blocks)
+        eidx = findfirst((; enter_block)::TryCatchRegion->enter_block==item, catch_entry_blocks)
         if eidx !== nothing
-            for (slot, _, node) in phicnodes[catch_entry_blocks[eidx][2]]
-                ival = incoming_vals[slot_id(slot)]
+            for (; slot, insert) in new_phic_nodes[catch_entry_blocks[eidx].leave_block]
+                (; node, undef_node) = insert
+                (ival, idef) = incoming_vals[slot_id(slot)]
                 ivalundef = ival === UNDEF_TOKEN
-                unode = ivalundef ? UpsilonNode() : UpsilonNode(ival)
-                typ = ivalundef ? MaybeUndef(Union{}) : typ_for_val(ival, ci, ir.sptypes, -1, slottypes)
-                push!(node.values,
-                    NewSSAValue(insert_node!(ir, first_insert_for_bb(code, cfg, item),
-                                 NewInstruction(unode, typ), true).id - length(ir.stmts)))
+                Υ = NewInstruction(ivalundef ? UpsilonNode() : UpsilonNode(ival),
+                                   ivalundef ? Union{} : typ_for_val(ival, ci, ir, -1, sv.slottypes))
+                insertpos = first_insert_for_bb(code, cfg, item)
+                # insert `UpsilonNode` immediately before the `:enter` expression
+                Υssa = insert_node!(ir, insertpos, Υ)
+                push!(node.values, NewSSAValue(Υssa.id - length(ir.stmts)))
+                if undef_node !== nothing
+                    Υundef = NewInstruction(UpsilonNode(idef), Bool)
+                    Υssaundef = insert_node!(ir, insertpos, Υundef)
+                    push!(undef_node.values, NewSSAValue(Υssaundef.id - length(ir.stmts)))
+                end
             end
         end
         push!(visited, item)
@@ -754,7 +783,8 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             stmt = code[idx]
             (isa(stmt, PhiNode) || (isexpr(stmt, :(=)) && isa(stmt.args[2], PhiNode))) && continue
             if isa(stmt, NewvarNode)
-                incoming_vals[slot_id(stmt.slot)] = UNDEF_TOKEN
+                incoming_vals[slot_id(stmt.slot)] = Pair{Any, Any}(UNDEF_TOKEN, false)
+                has_pinode[slot_id(stmt.slot)] = false
                 code[idx] = nothing
             else
                 stmt = rename_uses!(ir, ci, idx, stmt, incoming_vals)
@@ -764,35 +794,56 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
                 end
                 code[idx] = stmt
                 # Record a store
-                if isexpr(stmt, :(=)) && isa(stmt.args[1], SlotNumber)
-                    id = slot_id(stmt.args[1])
-                    val = stmt.args[2]
-                    typ = typ_for_val(val, ci, ir.sptypes, idx, slottypes)
-                    # Having UNDEF_TOKEN appear on the RHS is possible if we're on a dead branch.
-                    # Do something reasonable here, by marking the LHS as undef as well.
-                    if val !== UNDEF_TOKEN
-                        incoming_vals[id] = SSAValue(make_ssa!(ci, code, idx, id, typ)::Int)
-                    else
-                        code[idx] = nothing
-                        incoming_vals[id] = UNDEF_TOKEN
-                    end
-                    eidx = item
-                    while haskey(exc_handlers, eidx)
-                        (eidx, exc) = exc_handlers[eidx]
-                        cidx = findfirst(x->slot_id(x[1]) == id, phicnodes[exc])
-                        if cidx !== nothing
-                            node = UpsilonNode(incoming_vals[id])
-                            if incoming_vals[id] === UNDEF_TOKEN
-                                node = UpsilonNode()
-                                typ = MaybeUndef(Union{})
+                if isexpr(stmt, :(=))
+                    arg1 = stmt.args[1]
+                    if isa(arg1, SlotNumber)
+                        id = slot_id(arg1)
+                        val = stmt.args[2]
+                        typ = typ_for_val(val, ci, ir, idx, sv.slottypes)
+                        # Having UNDEF_TOKEN appear on the RHS is possible if we're on a dead branch.
+                        # Do something reasonable here, by marking the LHS as undef as well.
+                        if val !== UNDEF_TOKEN
+                            thisdef = true
+                            thisval = make_ssa!(ci, code, idx, typ)
+                        else
+                            code[idx] = nothing
+                            thisval = UNDEF_TOKEN
+                            thisdef = false
+                        end
+                        incoming_vals[id] = Pair{Any, Any}(thisval, thisdef)
+                        has_pinode[id] = false
+                        enter_idx = idx
+                        while handler_at[enter_idx][1] != 0
+                            (; enter_idx) = handlers[handler_at[enter_idx][1]]
+                            leave_block = block_for_inst(cfg, (code[enter_idx]::EnterNode).catch_dest)
+                            cidx = findfirst((; slot)::NewPhiCNode2->slot_id(slot)==id, new_phic_nodes[leave_block])
+                            if cidx !== nothing
+                                node = thisdef ? UpsilonNode(thisval) : UpsilonNode()
+                                if incoming_vals[id] === UNDEF_TOKEN
+                                    node = UpsilonNode()
+                                    typ = Union{}
+                                end
+                                insert = new_phic_nodes[leave_block][cidx].insert
+                                push!(insert.node.values,
+                                      NewSSAValue(insert_node!(ir, idx, NewInstruction(node, typ), true).id - length(ir.stmts)))
+                                if insert.undef_node !== nothing
+                                    push!(insert.undef_node.values,
+                                          NewSSAValue(insert_node!(ir, idx, NewInstruction(UpsilonNode(thisdef), Bool), true).id - length(ir.stmts)))
+                                end
                             end
-                            push!(phicnodes[exc][cidx][3].values,
-                                NewSSAValue(insert_node!(ir, idx, NewInstruction(node, typ), true).id - length(ir.stmts)))
                         end
                     end
                 end
             end
         end
+        # Unwrap any PiNodes before continuing, since they weren't considered during our
+        # dominance frontier calculation and so have to be used locally in each BB.
+        for (i, (ival, idef)) in enumerate(incoming_vals)
+            if has_pinode[i]
+                stmt = ir[new_to_regular(ival::NewSSAValue, length(ir.stmts))][:stmt]
+                incoming_vals[i] = Pair{Any, Any}(stmt.val, idef)
+            end
+        end
         for succ in cfg.blocks[item].succs
             push!(worklist, (succ, item, copy(incoming_vals)))
         end
@@ -812,7 +863,6 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
     nstmts = length(ir.stmts)
     new_code = Vector{Any}(undef, nstmts)
     ssavalmap = fill(SSAValue(-1), length(ssavaluetypes) + 1)
-    result_types = Any[Any for _ in 1:nstmts]
     # Detect statement positions for assignments and construct array
     for (bb, idx) in bbidxiter(ir)
         stmt = code[idx]
@@ -827,15 +877,15 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             else
                 new_code[idx] = GotoIfNot(stmt.cond, new_dest)
             end
-        elseif isexpr(stmt, :enter)
-            new_code[idx] = Expr(:enter, block_for_inst(cfg, stmt.args[1]::Int))
+        elseif isa(stmt, EnterNode)
+            except_bb = stmt.catch_dest == 0 ? 0 : block_for_inst(cfg, stmt.catch_dest)
+            new_code[idx] = EnterNode(stmt, except_bb)
             ssavalmap[idx] = SSAValue(idx) # Slot to store token for pop_exception
         elseif isexpr(stmt, :leave) || isexpr(stmt, :(=)) || isa(stmt, ReturnNode) ||
             isexpr(stmt, :meta) || isa(stmt, NewvarNode)
             new_code[idx] = stmt
         else
             ssavalmap[idx] = SSAValue(idx)
-            result_types[idx] = ssavaluetypes[idx]
             if isa(stmt, PhiNode)
                 edges = Int32[edge == 0 ? 0 : block_for_inst(cfg, Int(edge)) for edge in stmt.edges]
                 new_code[idx] = PhiNode(edges, stmt.values)
@@ -844,64 +894,17 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
             end
         end
     end
-    for (_, nodes) in phicnodes
-        for (_, ssa, node) in nodes
-            new_typ = Union{}
-            # TODO: This could just be the ones that depend on other phis
-            push!(type_refine_phi, ssa.id)
-            new_idx = ssa.id
-            node = new_nodes.stmts[new_idx]
-            phic_values = (node[:inst]::PhiCNode).values
-            for i = 1:length(phic_values)
-                orig_typ = typ = typ_for_val(phic_values[i], ci, ir.sptypes, -1, slottypes)
-                @assert !isa(typ, MaybeUndef)
-                while isa(typ, DelayedTyp)
-                    typ = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)]
-                end
-                new_typ = tmerge(new_typ, typ)
-            end
-            node[:type] = new_typ
-        end
-    end
-    # This is a bit awkward, because it basically duplicates what type
-    # inference does. Ideally, we'd just use this representation earlier
-    # to make sure phi nodes have accurate types
-    changed = true
-    while changed
-        changed = false
-        for new_idx in type_refine_phi
-            node = new_nodes.stmts[new_idx]
-            new_typ = recompute_type(node[:inst]::Union{PhiNode,PhiCNode}, ci, ir, ir.sptypes, slottypes, nstmts)
-            if !(node[:type] ⊑ new_typ) || !(new_typ ⊑ node[:type])
-                node[:type] = new_typ
-                changed = true
-            end
-        end
-    end
-    for i in 1:length(result_types)
-        rt_i = result_types[i]
-        if rt_i isa DelayedTyp
-            result_types[i] = types(ir)[new_to_regular(rt_i.phi::NewSSAValue, nstmts)]
-        end
-    end
-    for i = 1:length(new_nodes)
-        local node = new_nodes.stmts[i]
-        local typ = node[:type]
-        if isa(typ, DelayedTyp)
-            node[:type] = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)]
-        end
-    end
     # Renumber SSA values
     @assert isempty(ir.stmts.type)
     resize!(ir.stmts.type, nstmts)
     for i in 1:nstmts
         local node = ir.stmts[i]
-        node[:inst] = new_to_regular(renumber_ssa!(new_code[i], ssavalmap), nstmts)
-        node[:type] = result_types[i]
+        node[:stmt] = new_to_regular(renumber_ssa!(new_code[i], ssavalmap), nstmts)
+        node[:type] = ssavaluetypes[i]
     end
     for i = 1:length(new_nodes)
         local node = new_nodes.stmts[i]
-        node[:inst] = new_to_regular(renumber_ssa!(node[:inst], ssavalmap), nstmts)
+        node[:stmt] = new_to_regular(renumber_ssa!(node[:stmt], ssavalmap), nstmts)
     end
     @timeit "domsort" ir = domsort_ssa!(ir, domtree)
     return ir
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
index ec43a0e142699..9eded81d9d84b 100644
--- a/base/compiler/ssair/verify.jl
+++ b/base/compiler/ssair/verify.jl
@@ -1,17 +1,28 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+function maybe_show_ir(ir::IRCode)
+    if isdefined(Core, :Main)
+        invokelatest(Core.Main.Base.display, ir)
+    end
+end
+
 if !isdefined(@__MODULE__, Symbol("@verify_error"))
     macro verify_error(arg)
         arg isa String && return esc(:(print && println(stderr, $arg)))
-        (arg isa Expr && arg.head === :string) || error("verify_error macro expected a string expression")
+        isexpr(arg, :string) || error("verify_error macro expected a string expression")
         pushfirst!(arg.args, GlobalRef(Core, :stderr))
         pushfirst!(arg.args, :println)
         arg.head = :call
-        return esc(arg)
+        return esc(quote
+            $arg
+            maybe_show_ir(ir)
+        end)
     end
 end
 
-function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int, allow_frontend_forms::Bool)
+is_toplevel_expr_head(head::Symbol) = head === :global || head === :method || head === :thunk
+is_value_pos_expr_head(head::Symbol) = head === :static_parameter
+function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, printed_use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int, allow_frontend_forms::Bool)
     if isa(op, SSAValue)
         if op.id > length(ir.stmts)
             def_bb = block_for_inst(ir.cfg, ir.new_nodes.info[op.id - length(ir.stmts)].pos)
@@ -30,11 +41,19 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int,
         else
             if !dominates(domtree, def_bb, use_bb) && !(bb_unreachable(domtree, def_bb) && bb_unreachable(domtree, use_bb))
                 # At the moment, we allow GC preserve tokens outside the standard domination notion
-                #@Base.show ir
-                @verify_error "Basic Block $def_bb does not dominate block $use_bb (tried to use value $(op.id))"
+                @verify_error "Basic Block $def_bb does not dominate block $use_bb (tried to use value %$(op.id) at %$(printed_use_idx))"
                 error("")
             end
         end
+
+        use_inst = ir[op]
+        if isa(use_inst[:stmt], Union{GotoIfNot, GotoNode, ReturnNode}) && !(isa(use_inst[:stmt], ReturnNode) && !isdefined(use_inst[:stmt], :val))
+            # Allow uses of `unreachable`, which may have been inserted when
+            # an earlier block got deleted, but for some reason we didn't figure
+            # out yet that this entire block is dead also.
+            @verify_error "At statement %$use_idx: Invalid use of value statement or terminator %$(op.id)"
+            error("")
+        end
     elseif isa(op, GlobalRef)
         if !isdefined(op.mod, op.name) || !isconst(op.mod, op.name)
             @verify_error "Unbound GlobalRef not allowed in value position"
@@ -46,17 +65,16 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int,
             # Allow a tuple in symbol position for foreigncall - this isn't actually
             # a real call - it's interpreted in global scope by codegen. However,
             # we do need to keep this a real use, because it could also be a pointer.
-        elseif op.head !== :boundscheck
+        elseif !is_value_pos_expr_head(op.head)
             if !allow_frontend_forms || op.head !== :opaque_closure_method
                 @verify_error "Expr not allowed in value position"
                 error("")
             end
         end
     elseif isa(op, Union{OldSSAValue, NewSSAValue})
-        #@Base.show ir
         @verify_error "Left over SSA marker"
         error("")
-    elseif isa(op, Union{SlotNumber, TypedSlot})
+    elseif isa(op, SlotNumber)
         @verify_error "Left over slot detected in converted IR"
         error("")
     end
@@ -72,25 +90,46 @@ function count_int(val::Int, arr::Vector{Int})
     n
 end
 
-function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=false)
+function verify_ir(ir::IRCode, print::Bool=true,
+                   allow_frontend_forms::Bool=false,
+                   𝕃ₒ::AbstractLattice = SimpleInferenceLattice.instance)
+    # Verify CFG graph. Must be well formed to construct domtree
+    if !(length(ir.cfg.blocks) - 1 <= length(ir.cfg.index) <= length(ir.cfg.blocks))
+        @verify_error "CFG index length ($(length(ir.cfg.index))) does not correspond to # of blocks $(length(ir.cfg.blocks))"
+        error("")
+    end
+    if length(ir.stmts.stmt) != length(ir.stmts)
+        @verify_error "IR stmt length is invalid $(length(ir.stmts.stmt)) / $(length(ir.stmts))"
+        error("")
+    end
+    if length(ir.stmts.type) != length(ir.stmts)
+        @verify_error "IR type length is invalid $(length(ir.stmts.type)) / $(length(ir.stmts))"
+        error("")
+    end
+    if length(ir.stmts.info) != length(ir.stmts)
+        @verify_error "IR info length is invalid $(length(ir.stmts.info)) / $(length(ir.stmts))"
+        error("")
+    end
+    if length(ir.stmts.line) != length(ir.stmts)
+        @verify_error "IR line length is invalid $(length(ir.stmts.line)) / $(length(ir.stmts))"
+        error("")
+    end
+    if length(ir.stmts.flag) != length(ir.stmts)
+        @verify_error "IR flag length is invalid $(length(ir.stmts.flag)) / $(length(ir.stmts))"
+        error("")
+    end
     # For now require compact IR
     # @assert isempty(ir.new_nodes)
     # Verify CFG
     last_end = 0
-    # Verify statements
-    domtree = construct_domtree(ir.cfg.blocks)
+    # Verify CFG graph. Must be well formed to construct domtree
     for (idx, block) in pairs(ir.cfg.blocks)
-        if first(block.stmts) != last_end + 1
-            #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)]
-            @verify_error "First statement of BB $idx ($(first(block.stmts))) does not match end of previous ($last_end)"
-            error("")
-        end
-        last_end = last(block.stmts)
-        terminator = ir.stmts[last_end][:inst]
-
-        bb_unreachable(domtree, idx) && continue
         for p in block.preds
             p == 0 && continue
+            if !(1 <= p <= length(ir.cfg.blocks))
+                @verify_error "Predecessor $p of block $idx out of bounds for IR"
+                error("")
+            end
             c = count_int(idx, ir.cfg.blocks[p].succs)
             if c == 0
                 @verify_error "Predecessor $p of block $idx not in successor list"
@@ -102,6 +141,44 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
                 end
             end
         end
+        for s in block.succs
+            if !(1 <= s <= length(ir.cfg.blocks))
+                @verify_error "Successor $s of block $idx out of bounds for IR"
+                error("")
+            end
+            if !(idx in ir.cfg.blocks[s].preds)
+                #Base.@show ir.cfg
+                #Base.@show ir
+                #Base.@show ir.argtypes
+                @verify_error "Successor $s of block $idx not in predecessor list"
+                error("")
+            end
+        end
+        if !(1 <= first(block.stmts) <= length(ir.stmts))
+            @verify_error "First statement of BB $idx ($(first(block.stmts))) out of bounds for IR (length=$(length(ir.stmts)))"
+            error("")
+        end
+        if !(1 <= last(block.stmts) <= length(ir.stmts))
+            @verify_error "Last statement of BB $idx ($(last(block.stmts))) out of bounds for IR (length=$(length(ir.stmts)))"
+            error("")
+        end
+        if idx <= length(ir.cfg.index) && last(block.stmts) + 1 != ir.cfg.index[idx]
+            @verify_error "End of BB $idx ($(last(block.stmts))) is not one less than CFG index ($(ir.cfg.index[idx]))"
+            error("")
+        end
+    end
+    # Verify statements
+    domtree = construct_domtree(ir.cfg.blocks)
+    for (idx, block) in pairs(ir.cfg.blocks)
+        if first(block.stmts) != last_end + 1
+            #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)]
+            @verify_error "First statement of BB $idx ($(first(block.stmts))) does not match end of previous ($last_end)"
+            error("")
+        end
+        last_end = last(block.stmts)
+        terminator = ir[SSAValue(last_end)][:stmt]
+
+        bb_unreachable(domtree, idx) && continue
         if isa(terminator, ReturnNode)
             if !isempty(block.succs)
                 @verify_error "Block $idx ends in return or unreachable, but has successors"
@@ -109,7 +186,7 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
             end
         elseif isa(terminator, GotoNode)
             if length(block.succs) != 1 || block.succs[1] != terminator.label
-                @verify_error "Block $idx successors ($(block.succs)), does not match GotoNode terminator"
+                @verify_error "Block $idx successors ($(block.succs)), does not match GotoNode terminator ($(terminator.label))"
                 error("")
             end
         elseif isa(terminator, GotoIfNot)
@@ -121,9 +198,9 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
                 @verify_error "Block $idx successors ($(block.succs)), does not match GotoIfNot terminator"
                 error("")
             end
-        elseif isexpr(terminator, :enter)
+        elseif isa(terminator, EnterNode)
             @label enter_check
-            if length(block.succs) != 2 || (block.succs != Int[terminator.args[1], idx+1] && block.succs != Int[idx+1, terminator.args[1]])
+            if length(block.succs) != 2 || (block.succs != Int[terminator.catch_dest, idx+1] && block.succs != Int[idx+1, terminator.catch_dest])
                 @verify_error "Block $idx successors ($(block.succs)), does not match :enter terminator"
                 error("")
             end
@@ -131,58 +208,81 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
             if length(block.succs) != 1 || block.succs[1] != idx + 1
                 # As a special case, we allow extra statements in the BB of an :enter
                 # statement, until we can do proper CFG manipulations during compaction.
-                for idx in first(block.stmts):last(block.stmts)
-                    stmt = ir.stmts[idx][:inst]
-                    if isexpr(stmt, :enter)
+                for stmt_idx in first(block.stmts):last(block.stmts)
+                    stmt = ir[SSAValue(stmt_idx)][:stmt]
+                    if isa(stmt, EnterNode)
                         terminator = stmt
                         @goto enter_check
                     end
                     isa(stmt, PhiNode) || break
                 end
-                @verify_error "Block $idx successors ($(block.succs)), does not match fall-through terminator ($terminator)"
-                error("")
-            end
-        end
-        for s in block.succs
-            if !(idx in ir.cfg.blocks[s].preds)
-                #@Base.show ir.cfg
-                #@Base.show ir
-                #@Base.show ir.argtypes
-                @verify_error "Successor $s of block $idx not in predecessor list"
-                error("")
+                termidx = last(block.stmts)
+                stmttyp = ir.stmts[termidx][:type]
+                if isempty(block.succs) && stmttyp == Union{}
+                    # Allow fallthrough terminators that are known to error to
+                    # be removed from the CFG. Ideally we'd add an unreachable
+                    # here, but that isn't always possible.
+                else
+                    @verify_error "Block $idx successors ($(block.succs)), does not match fall-through terminator %$termidx ($terminator)::$stmttyp"
+                    error("")
+                end
             end
         end
     end
+    if length(ir.stmts) != last(ir.cfg.blocks[end].stmts)
+        @verify_error "End of last BB $(last(ir.cfg.blocks[end].stmts)) does not match last IR statement $(length(ir.stmts))"
+        error("")
+    end
+    lastbb = 0
+    is_phinode_block = false
+    firstidx = 1
+    lastphi = 1
     for (bb, idx) in bbidxiter(ir)
+        if bb != lastbb
+            is_phinode_block = true
+            lastphi = firstidx = idx
+            lastbb = bb
+        end
         # We allow invalid IR in dead code to avoid passes having to detect when
         # they're generating dead code.
         bb_unreachable(domtree, bb) && continue
-        stmt = ir.stmts[idx][:inst]
+        stmt = ir[SSAValue(idx)][:stmt]
         stmt === nothing && continue
         if isa(stmt, PhiNode)
+            if !is_phinode_block
+                @verify_error "φ node $idx is not at the beginning of the basic block $bb"
+                error("")
+            end
+            lastphi = idx
             @assert length(stmt.edges) == length(stmt.values)
             for i = 1:length(stmt.edges)
                 edge = stmt.edges[i]
                 for j = (i+1):length(stmt.edges)
                     edge′ = stmt.edges[j]
                     if edge == edge′
-                        # TODO: Move `unique` to Core.Compiler. For now we assume the predecessor list is
+                        # TODO: Move `unique` to Core.Compiler. For now we assume the predecessor list is always unique.
                         @verify_error "Edge list φ node $idx in bb $bb not unique (double edge?)"
                         error("")
                     end
                 end
                 if !(edge == 0 && bb == 1) && !(edge in ir.cfg.blocks[bb].preds)
-                    #@Base.show ir.argtypes
-                    #@Base.show ir
+                    #Base.@show ir.argtypes
+                    #Base.@show ir
                     @verify_error "Edge $edge of φ node $idx not in predecessor list"
                     error("")
                 end
                 edge == 0 && continue
+                if bb_unreachable(domtree, Int(edge))
+                    # TODO: Disallow?
+                    #@verify_error "Unreachable edge from #$edge should have been cleaned up at idx $idx"
+                    #error("")
+                    continue
+                end
                 isassigned(stmt.values, i) || continue
                 val = stmt.values[i]
                 phiT = ir.stmts[idx][:type]
                 if isa(val, SSAValue)
-                    if !(types(ir)[val] ⊑ phiT)
+                    if !⊑(𝕃ₒ, types(ir)[val], phiT)
                         #@verify_error """
                         #    PhiNode $idx, has operand $(val.id), whose type is not a sub lattice element.
                         #    PhiNode type was $phiT
@@ -191,25 +291,49 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
                         #error("")
                     end
                 end
-                check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, print, false, i, allow_frontend_forms)
+                check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, idx, print, false, i, allow_frontend_forms)
             end
-        elseif isa(stmt, PhiCNode)
+            continue
+        end
+
+        if is_phinode_block && !is_valid_phiblock_stmt(stmt)
+            if !isa(stmt, Expr) || !is_value_pos_expr_head(stmt.head)
+                # Go back and check that all non-PhiNodes are valid value-position
+                for validate_idx in firstidx:(lastphi-1)
+                    validate_stmt = ir[SSAValue(validate_idx)][:stmt]
+                    isa(validate_stmt, PhiNode) && continue
+                    check_op(ir, domtree, validate_stmt, bb, idx, idx, print, false, 0, allow_frontend_forms)
+                end
+                is_phinode_block = false
+            end
+        end
+        if isa(stmt, PhiCNode)
             for i = 1:length(stmt.values)
                 val = stmt.values[i]
                 if !isa(val, SSAValue)
                     @verify_error "Operand $i of PhiC node $idx must be an SSA Value."
                     error("")
                 end
-                if !isa(ir[val][:inst], UpsilonNode)
+                if !isa(ir[val][:stmt], UpsilonNode)
                     @verify_error "Operand $i of PhiC node $idx must reference an Upsilon node."
                     error("")
                 end
             end
+        elseif (isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, EnterNode)) && idx != last(ir.cfg.blocks[bb].stmts)
+            @verify_error "Terminator $idx in bb $bb is not the last statement in the block"
+            error("")
         else
             if isa(stmt, Expr) || isa(stmt, ReturnNode) # TODO: make sure everything has line info
+                if (stmt isa ReturnNode)
+                    if isdefined(stmt, :val)
+                        # TODO: Disallow unreachable returns?
+                        # bb_unreachable(domtree, Int64(edge))
+                    else
+                        #@verify_error "Missing line number information for statement $idx of $ir"
+                    end
+                end
                 if !(stmt isa ReturnNode && !isdefined(stmt, :val)) # not actually a return node, but an unreachable marker
                     if ir.stmts[idx][:line] <= 0
-                        #@verify_error "Missing line number information for statement $idx of $ir"
                     end
                 end
             end
@@ -232,15 +356,35 @@ function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=fals
                 elseif stmt.head === :foreigncall
                     isforeigncall = true
                 elseif stmt.head === :isdefined && length(stmt.args) == 1 &&
-                        (stmt.args[1] isa GlobalRef || (stmt.args[1] isa Expr && stmt.args[1].head === :static_parameter))
+                        (stmt.args[1] isa GlobalRef || isexpr(stmt.args[1], :static_parameter))
                     # a GlobalRef or static_parameter isdefined check does not evaluate its argument
                     continue
+                elseif stmt.head === :call
+                    f = stmt.args[1]
+                    if f isa GlobalRef && f.name === :cglobal
+                        # TODO: these are not yet linearized
+                        continue
+                    end
+                elseif stmt.head === :leave
+                    for i in 1:length(stmt.args)
+                        arg = stmt.args[i]
+                        if !isa(arg, Union{Nothing, SSAValue})
+                            @verify_error "Malformed :leave - Expected `Nothing` or SSAValue"
+                            error()
+                        elseif isa(arg, SSAValue)
+                            enter_stmt = ir[arg::SSAValue][:stmt]
+                            if !isa(enter_stmt, Nothing) && !isa(enter_stmt, EnterNode)
+                                @verify_error "Malformed :leave - argument ssavalue should point to `nothing` or :enter"
+                                error()
+                            end
+                        end
+                    end
                 end
             end
             n = 1
             for op in userefs(stmt)
                 op = op[]
-                check_op(ir, domtree, op, bb, idx, print, isforeigncall, n, allow_frontend_forms)
+                check_op(ir, domtree, op, bb, idx, idx, print, isforeigncall, n, allow_frontend_forms)
                 n += 1
             end
         end
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
index 72b4c8b829c06..e28858eea60aa 100644
--- a/base/compiler/stmtinfo.jl
+++ b/base/compiler/stmtinfo.jl
@@ -10,24 +10,30 @@ and any additional information (`call.info`) for a given generic call.
 """
 struct CallMeta
     rt::Any
+    exct::Any
     effects::Effects
-    info::Any
+    info::CallInfo
 end
 
+struct NoCallInfo <: CallInfo end
+
 """
-    info::MethodMatchInfo
+    info::MethodMatchInfo <: CallInfo
 
 Captures the result of a `:jl_matching_methods` lookup for the given call (`info.results`).
 This info may then be used by the optimizer to inline the matches, without having
 to re-consult the method table. This info is illegal on any statement that is
 not a call to a generic function.
 """
-struct MethodMatchInfo
+struct MethodMatchInfo <: CallInfo
     results::MethodLookupResult
 end
+nsplit_impl(info::MethodMatchInfo) = 1
+getsplit_impl(info::MethodMatchInfo, idx::Int) = (@assert idx == 1; info.results)
+getresult_impl(::MethodMatchInfo, ::Int) = nothing
 
 """
-    info::UnionSplitInfo
+    info::UnionSplitInfo <: CallInfo
 
 If inference decides to partition the method search space by splitting unions,
 it will issue a method lookup query for each such partition. This info indicates
@@ -35,7 +41,7 @@ that such partitioning happened and wraps the corresponding `MethodMatchInfo` fo
 each partition (`info.matches::Vector{MethodMatchInfo}`).
 This info is illegal on any statement that is not a call to a generic function.
 """
-struct UnionSplitInfo
+struct UnionSplitInfo <: CallInfo
     matches::Vector{MethodMatchInfo}
 end
 
@@ -47,12 +53,17 @@ function nmatches(info::UnionSplitInfo)
     end
     return n
 end
+nsplit_impl(info::UnionSplitInfo) = length(info.matches)
+getsplit_impl(info::UnionSplitInfo, idx::Int) = getsplit_impl(info.matches[idx], 1)
+getresult_impl(::UnionSplitInfo, ::Int) = nothing
+
+abstract type ConstResult end
 
-struct ConstPropResult
+struct ConstPropResult <: ConstResult
     result::InferenceResult
 end
 
-struct ConcreteResult
+struct ConcreteResult <: ConstResult
     mi::MethodInstance
     effects::Effects
     result
@@ -60,49 +71,65 @@ struct ConcreteResult
     ConcreteResult(mi::MethodInstance, effects::Effects, @nospecialize val) = new(mi, effects, val)
 end
 
-const ConstResult = Union{ConstPropResult,ConcreteResult}
+struct SemiConcreteResult <: ConstResult
+    mi::MethodInstance
+    ir::IRCode
+    effects::Effects
+end
+
+# XXX Technically this does not represent a result of constant inference, but rather that of
+#     regular edge inference. It might be more appropriate to rename `ConstResult` and
+#     `ConstCallInfo` to better reflect the fact that they represent either of local or
+#     volatile inference result.
+struct VolatileInferenceResult <: ConstResult
+    inf_result::InferenceResult
+end
 
 """
-    info::ConstCallInfo
+    info::ConstCallInfo <: CallInfo
 
 The precision of this call was improved using constant information.
 In addition to the original call information `info.call`, this info also keeps the results
 of constant inference `info.results::Vector{Union{Nothing,ConstResult}}`.
 """
-struct ConstCallInfo
+struct ConstCallInfo <: CallInfo
     call::Union{MethodMatchInfo,UnionSplitInfo}
     results::Vector{Union{Nothing,ConstResult}}
 end
+nsplit_impl(info::ConstCallInfo) = nsplit(info.call)
+getsplit_impl(info::ConstCallInfo, idx::Int) = getsplit(info.call, idx)
+getresult_impl(info::ConstCallInfo, idx::Int) = info.results[idx]
 
 """
-    info::MethodResultPure
+    info::MethodResultPure <: CallInfo
 
 This struct represents a method result constant was proven to be
 effect-free, including being no-throw (typically because the value was computed
 by calling an `@pure` function).
 """
-struct MethodResultPure
-    info::Any
+struct MethodResultPure <: CallInfo
+    info::CallInfo
 end
-let instance = MethodResultPure(false)
+let instance = MethodResultPure(NoCallInfo())
     global MethodResultPure
     MethodResultPure() = instance
 end
 
 """
-    info::AbstractIterationInfo
+    ainfo::AbstractIterationInfo
 
 Captures all the information for abstract iteration analysis of a single value.
-Each (abstract) call to `iterate`, corresponds to one entry in `info.each::Vector{CallMeta}`.
+Each (abstract) call to `iterate`, corresponds to one entry in `ainfo.each::Vector{CallMeta}`.
 """
 struct AbstractIterationInfo
     each::Vector{CallMeta}
+    complete::Bool
 end
 
 const MaybeAbstractIterationInfo = Union{Nothing, AbstractIterationInfo}
 
 """
-    info::ApplyCallInfo
+    info::ApplyCallInfo <: CallInfo
 
 This info applies to any call of `_apply_iterate(...)` and captures both the
 info of the actual call being applied and the info for any implicit call
@@ -111,7 +138,7 @@ to be yet another `_apply_iterate`, in which case the `info.call` field will
 be another `ApplyCallInfo`. This info is illegal on any statement that is
 not an `_apply_iterate` call.
 """
-struct ApplyCallInfo
+struct ApplyCallInfo <: CallInfo
     # The info for the call itself
     call::Any
     # AbstractIterationInfo for each argument, if applicable
@@ -119,12 +146,12 @@ struct ApplyCallInfo
 end
 
 """
-    info::UnionSplitApplyCallInfo
+    info::UnionSplitApplyCallInfo <: CallInfo
 
 Like `UnionSplitInfo`, but for `ApplyCallInfo` rather than `MethodMatchInfo`.
 This info is illegal on any statement that is not an `_apply_iterate` call.
 """
-struct UnionSplitApplyCallInfo
+struct UnionSplitApplyCallInfo <: CallInfo
     infos::Vector{ApplyCallInfo}
 end
 
@@ -135,7 +162,7 @@ Represents a resolved call to `Core.invoke`, carrying the `info.match::MethodMat
 the method that has been processed.
 Optionally keeps `info.result::InferenceResult` that keeps constant information.
 """
-struct InvokeCallInfo
+struct InvokeCallInfo <: CallInfo
     match::MethodMatch
     result::Union{Nothing,ConstResult}
 end
@@ -147,20 +174,20 @@ Represents a resolved call of opaque closure, carrying the `info.match::MethodMa
 the method that has been processed.
 Optionally keeps `info.result::InferenceResult` that keeps constant information.
 """
-struct OpaqueClosureCallInfo
+struct OpaqueClosureCallInfo <: CallInfo
     match::MethodMatch
     result::Union{Nothing,ConstResult}
 end
 
 """
-    info::OpaqueClosureCreateInfo
+    info::OpaqueClosureCreateInfo <: CallInfo
 
 This info may be constructed upon opaque closure construction, with `info.unspec::CallMeta`
 carrying out inference result of an unreal, partially specialized call (i.e. specialized on
 the closure environment, but not on the argument types of the opaque closure) in order to
 allow the optimizer to rewrite the return type parameter of the `OpaqueClosure` based on it.
 """
-struct OpaqueClosureCreateInfo
+struct OpaqueClosureCreateInfo <: CallInfo
     unspec::CallMeta
     function OpaqueClosureCreateInfo(unspec::CallMeta)
         @assert isa(unspec.info, OpaqueClosureCallInfo)
@@ -173,25 +200,35 @@ end
 # the AbstractInterpreter.
 
 """
-    info::ReturnTypeCallInfo
+    info::ReturnTypeCallInfo <: CallInfo
 
 Represents a resolved call of `Core.Compiler.return_type`.
 `info.call` wraps the info corresponding to the call that `Core.Compiler.return_type` call
 was supposed to analyze.
 """
-struct ReturnTypeCallInfo
-    info::Any
+struct ReturnTypeCallInfo <: CallInfo
+    info::CallInfo
 end
 
 """
-    info::FinalizerInfo
+    info::FinalizerInfo <: CallInfo
 
 Represents the information of a potential (later) call to the finalizer on the given
 object type.
 """
-struct FinalizerInfo
-    info::Any
-    effects::Effects
+struct FinalizerInfo <: CallInfo
+    info::CallInfo   # the callinfo for the finalizer call
+    effects::Effects # the effects for the finalizer call
+end
+
+"""
+    info::ModifyFieldInfo <: CallInfo
+
+Represents a resolved all of `modifyfield!(obj, name, op, x, [order])`.
+`info.info` wraps the call information of `op(getfield(obj, name), x)`.
+"""
+struct ModifyFieldInfo <: CallInfo
+    info::CallInfo # the callinfo for the `op(getfield(obj, name), x)` call
 end
 
 @specialize
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index 096e47c1e500d..bb8712458e0be 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -4,9 +4,51 @@
 # constants #
 #############
 
-@nospecialize
-
-const _NAMEDTUPLE_NAME = NamedTuple.body.body.name
+"""
+    @nospecs def
+
+Adds `@nospecialize` annotation to non-annotated arguments of `def`.
+```julia
+(Core.Compiler) julia> @macroexpand @nospecs function tfunc(𝕃::AbstractLattice, x, y::Bool, zs...)
+                           x, ys
+                       end
+:(function tfunc(\$(Expr(:meta, :specialize, :(𝕃::AbstractLattice))), x, y::Bool, zs...)
+      #= REPL[3]:1 =#
+      \$(Expr(:meta, :nospecialize, :x, :zs))
+      #= REPL[3]:2 =#
+      (x, ys)
+  end)
+```
+"""
+macro nospecs(ex)
+    is_function_def(ex) || throw(ArgumentError("expected function definition"))
+    args, body = ex.args
+    if isexpr(args, :call)
+        args = args.args[2:end] # skip marking `@nospecialize` on the function itself
+    else
+        @assert isexpr(args, :tuple) # anonymous function
+        args = args.args
+    end
+    names = Symbol[]
+    for arg in args
+        isexpr(arg, :macrocall) && continue
+        if isexpr(arg, :...)
+            arg = arg.args[1]
+        elseif isexpr(arg, :kw)
+            arg = arg.args[1]
+        end
+        isexpr(arg, :(::)) && continue
+        @assert arg isa Symbol
+        push!(names, arg)
+    end
+    @assert isexpr(body, :block)
+    if !isempty(names)
+        lin = first(body.args)::LineNumberNode
+        nospec = Expr(:macrocall, Symbol("@nospecialize"), lin, names...)
+        insert!(body.args, 2, nospec)
+    end
+    return esc(ex)
+end
 
 const INT_INF = typemax(Int) # integer infinity
 
@@ -25,6 +67,7 @@ function find_tfunc(@nospecialize f)
 end
 
 const DATATYPE_TYPES_FIELDINDEX = fieldindex(DataType, :types)
+const DATATYPE_NAME_FIELDINDEX = fieldindex(DataType, :name)
 
 ##########
 # tfuncs #
@@ -39,39 +82,44 @@ function add_tfunc(f::IntrinsicFunction, minarg::Int, maxarg::Int, @nospecialize
     T_IFUNC[idx] = (minarg, maxarg, tfunc)
     T_IFUNC_COST[idx] = cost
 end
-# TODO: add @nospecialize on `f` and declare its type as `Builtin` when that's supported
-function add_tfunc(f::Function, minarg::Int, maxarg::Int, @nospecialize(tfunc), cost::Int)
+function add_tfunc(@nospecialize(f::Builtin), minarg::Int, maxarg::Int, @nospecialize(tfunc), cost::Int)
     push!(T_FFUNC_KEY, f)
     push!(T_FFUNC_VAL, (minarg, maxarg, tfunc))
     push!(T_FFUNC_COST, cost)
 end
 
-add_tfunc(throw, 1, 1, (@nospecialize(x)) -> Bottom, 0)
+add_tfunc(throw, 1, 1, @nospecs((𝕃::AbstractLattice, x)->Bottom), 0)
 
 # the inverse of typeof_tfunc
 # returns (type, isexact, isconcrete, istype)
 # if isexact is false, the actual runtime type may (will) be a subtype of t
 # if isconcrete is true, the actual runtime type is definitely concrete (unreachable if not valid as a typeof)
 # if istype is true, the actual runtime value will definitely be a type (e.g. this is false for Union{Type{Int}, Int})
-function instanceof_tfunc(@nospecialize(t))
+function instanceof_tfunc(@nospecialize(t), astag::Bool=false, @nospecialize(troot) = t)
     if isa(t, Const)
-        if isa(t.val, Type) && valid_as_lattice(t.val)
+        if isa(t.val, Type) && valid_as_lattice(t.val, astag)
             return t.val, true, isconcretetype(t.val), true
         end
         return Bottom, true, false, false # runtime throws on non-Type
     end
     t = widenconst(t)
+    troot = widenconst(troot)
     if t === Bottom
         return Bottom, true, true, false # runtime unreachable
     elseif t === typeof(Bottom) || !hasintersect(t, Type)
         return Bottom, true, false, false # literal Bottom or non-Type
     elseif isType(t)
         tp = t.parameters[1]
-        valid_as_lattice(tp) || return Bottom, true, false, false # runtime unreachable / throws on non-Type
+        valid_as_lattice(tp, astag) || return Bottom, true, false, false # runtime unreachable / throws on non-Type
+        if troot isa UnionAll
+            # Free `TypeVar`s inside `Type` has violated the "diagonal" rule.
+            # Widen them before `UnionAll` rewraping to relax concrete constraint.
+            tp = widen_diagonal(tp, troot)
+        end
         return tp, !has_free_typevars(tp), isconcretetype(tp), true
     elseif isa(t, UnionAll)
         t′ = unwrap_unionall(t)
-        t′′, isexact, isconcrete, istype = instanceof_tfunc(t′)
+        t′′, isexact, isconcrete, istype = instanceof_tfunc(t′, astag, rewrap_unionall(t, troot))
         tr = rewrap_unionall(t′′, t)
         if t′′ isa DataType && t′′.name !== Tuple.name && !has_free_typevars(tr)
             # a real instance must be within the declared bounds of the type,
@@ -86,8 +134,8 @@ function instanceof_tfunc(@nospecialize(t))
         end
         return tr, isexact, isconcrete, istype
     elseif isa(t, Union)
-        ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(t.a)
-        tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(t.b)
+        ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(t.a, astag, troot)
+        tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(t.b, astag, troot)
         isconcrete = isconcrete_a && isconcrete_b
         istype = istype_a && istype_b
         # most users already handle the Union case, so here we assume that
@@ -99,58 +147,95 @@ function instanceof_tfunc(@nospecialize(t))
     end
     return Any, false, false, false
 end
-bitcast_tfunc(@nospecialize(t), @nospecialize(x)) = instanceof_tfunc(t)[1]
-math_tfunc(@nospecialize(x)) = widenconst(x)
-math_tfunc(@nospecialize(x), @nospecialize(y)) = widenconst(x)
-math_tfunc(@nospecialize(x), @nospecialize(y), @nospecialize(z)) = widenconst(x)
-fptoui_tfunc(@nospecialize(t), @nospecialize(x)) = bitcast_tfunc(t, x)
-fptosi_tfunc(@nospecialize(t), @nospecialize(x)) = bitcast_tfunc(t, x)
-
-    ## conversion ##
-add_tfunc(bitcast, 2, 2, bitcast_tfunc, 1)
-add_tfunc(sext_int, 2, 2, bitcast_tfunc, 1)
-add_tfunc(zext_int, 2, 2, bitcast_tfunc, 1)
-add_tfunc(trunc_int, 2, 2, bitcast_tfunc, 1)
-add_tfunc(fptoui, 2, 2, fptoui_tfunc, 1)
-add_tfunc(fptosi, 2, 2, fptosi_tfunc, 1)
-add_tfunc(uitofp, 2, 2, bitcast_tfunc, 1)
-add_tfunc(sitofp, 2, 2, bitcast_tfunc, 1)
-add_tfunc(fptrunc, 2, 2, bitcast_tfunc, 1)
-add_tfunc(fpext, 2, 2, bitcast_tfunc, 1)
-    ## arithmetic ##
-add_tfunc(neg_int, 1, 1, math_tfunc, 1)
+
+# IntrinsicFunction
+# =================
+
+# conversion
+# ----------
+
+@nospecs bitcast_tfunc(𝕃::AbstractLattice, t, x) = bitcast_tfunc(widenlattice(𝕃), t, x)
+@nospecs bitcast_tfunc(::JLTypeLattice, t, x) = instanceof_tfunc(t, true)[1]
+@nospecs conversion_tfunc(𝕃::AbstractLattice, t, x) = conversion_tfunc(widenlattice(𝕃), t, x)
+@nospecs conversion_tfunc(::JLTypeLattice, t, x) = instanceof_tfunc(t, true)[1]
+
+add_tfunc(bitcast, 2, 2, bitcast_tfunc, 0)
+add_tfunc(sext_int, 2, 2, conversion_tfunc, 0)
+add_tfunc(zext_int, 2, 2, conversion_tfunc, 0)
+add_tfunc(trunc_int, 2, 2, conversion_tfunc, 0)
+add_tfunc(fptoui, 2, 2, conversion_tfunc, 1)
+add_tfunc(fptosi, 2, 2, conversion_tfunc, 1)
+add_tfunc(uitofp, 2, 2, conversion_tfunc, 1)
+add_tfunc(sitofp, 2, 2, conversion_tfunc, 1)
+add_tfunc(fptrunc, 2, 2, conversion_tfunc, 1)
+add_tfunc(fpext, 2, 2, conversion_tfunc, 1)
+
+# arithmetic
+# ----------
+
+@nospecs math_tfunc(𝕃::AbstractLattice, args...) = math_tfunc(widenlattice(𝕃), args...)
+@nospecs math_tfunc(::JLTypeLattice, x, xs...) = widenconst(x)
+
+add_tfunc(neg_int, 1, 1, math_tfunc, 0)
 add_tfunc(add_int, 2, 2, math_tfunc, 1)
 add_tfunc(sub_int, 2, 2, math_tfunc, 1)
-add_tfunc(mul_int, 2, 2, math_tfunc, 4)
-add_tfunc(sdiv_int, 2, 2, math_tfunc, 30)
-add_tfunc(udiv_int, 2, 2, math_tfunc, 30)
-add_tfunc(srem_int, 2, 2, math_tfunc, 30)
-add_tfunc(urem_int, 2, 2, math_tfunc, 30)
+add_tfunc(mul_int, 2, 2, math_tfunc, 3)
+add_tfunc(sdiv_int, 2, 2, math_tfunc, 20)
+add_tfunc(udiv_int, 2, 2, math_tfunc, 20)
+add_tfunc(srem_int, 2, 2, math_tfunc, 20)
+add_tfunc(urem_int, 2, 2, math_tfunc, 20)
 add_tfunc(add_ptr, 2, 2, math_tfunc, 1)
 add_tfunc(sub_ptr, 2, 2, math_tfunc, 1)
 add_tfunc(neg_float, 1, 1, math_tfunc, 1)
-add_tfunc(add_float, 2, 2, math_tfunc, 1)
-add_tfunc(sub_float, 2, 2, math_tfunc, 1)
-add_tfunc(mul_float, 2, 2, math_tfunc, 4)
-add_tfunc(div_float, 2, 2, math_tfunc, 20)
-add_tfunc(rem_float, 2, 2, math_tfunc, 20)
-add_tfunc(fma_float, 3, 3, math_tfunc, 5)
-add_tfunc(muladd_float, 3, 3, math_tfunc, 5)
-    ## fast arithmetic ##
+add_tfunc(add_float, 2, 2, math_tfunc, 2)
+add_tfunc(sub_float, 2, 2, math_tfunc, 2)
+add_tfunc(mul_float, 2, 2, math_tfunc, 8)
+add_tfunc(div_float, 2, 2, math_tfunc, 10)
+add_tfunc(fma_float, 3, 3, math_tfunc, 8)
+add_tfunc(muladd_float, 3, 3, math_tfunc, 8)
+
+# fast arithmetic
 add_tfunc(neg_float_fast, 1, 1, math_tfunc, 1)
-add_tfunc(add_float_fast, 2, 2, math_tfunc, 1)
-add_tfunc(sub_float_fast, 2, 2, math_tfunc, 1)
-add_tfunc(mul_float_fast, 2, 2, math_tfunc, 2)
+add_tfunc(add_float_fast, 2, 2, math_tfunc, 2)
+add_tfunc(sub_float_fast, 2, 2, math_tfunc, 2)
+add_tfunc(mul_float_fast, 2, 2, math_tfunc, 8)
 add_tfunc(div_float_fast, 2, 2, math_tfunc, 10)
-add_tfunc(rem_float_fast, 2, 2, math_tfunc, 10)
-    ## bitwise operators ##
-add_tfunc(and_int, 2, 2, math_tfunc, 1)
-add_tfunc(or_int, 2, 2, math_tfunc, 1)
+
+# bitwise operators
+# -----------------
+
+@nospecs and_int_tfunc(𝕃::AbstractLattice, x, y) = and_int_tfunc(widenlattice(𝕃), x, y)
+@nospecs function and_int_tfunc(𝕃::ConstsLattice, x, y)
+    if isa(x, Const) && x.val === false && widenconst(y) === Bool
+        return Const(false)
+    elseif isa(y, Const) && y.val === false && widenconst(x) === Bool
+        return Const(false)
+    end
+    return and_int_tfunc(widenlattice(𝕃), x, y)
+end
+@nospecs and_int_tfunc(::JLTypeLattice, x, y) = widenconst(x)
+
+@nospecs or_int_tfunc(𝕃::AbstractLattice, x, y) = or_int_tfunc(widenlattice(𝕃), x, y)
+@nospecs function or_int_tfunc(𝕃::ConstsLattice, x, y)
+    if isa(x, Const) && x.val === true && widenconst(y) === Bool
+        return Const(true)
+    elseif isa(y, Const) && y.val === true && widenconst(x) === Bool
+        return Const(true)
+    end
+    return or_int_tfunc(widenlattice(𝕃), x, y)
+end
+@nospecs or_int_tfunc(::JLTypeLattice, x, y) = widenconst(x)
+
+@nospecs shift_tfunc(𝕃::AbstractLattice, x, y) = shift_tfunc(widenlattice(𝕃), x, y)
+@nospecs shift_tfunc(::JLTypeLattice, x, y) = widenconst(x)
+
+add_tfunc(and_int, 2, 2, and_int_tfunc, 1)
+add_tfunc(or_int, 2, 2, or_int_tfunc, 1)
 add_tfunc(xor_int, 2, 2, math_tfunc, 1)
 add_tfunc(not_int, 1, 1, math_tfunc, 0) # usually used as not_int(::Bool) to negate a condition
-add_tfunc(shl_int, 2, 2, math_tfunc, 1)
-add_tfunc(lshr_int, 2, 2, math_tfunc, 1)
-add_tfunc(ashr_int, 2, 2, math_tfunc, 1)
+add_tfunc(shl_int, 2, 2, shift_tfunc, 1)
+add_tfunc(lshr_int, 2, 2, shift_tfunc, 1)
+add_tfunc(ashr_int, 2, 2, shift_tfunc, 1)
 add_tfunc(bswap_int, 1, 1, math_tfunc, 1)
 add_tfunc(ctpop_int, 1, 1, math_tfunc, 1)
 add_tfunc(ctlz_int, 1, 1, math_tfunc, 1)
@@ -159,7 +244,10 @@ add_tfunc(checked_sdiv_int, 2, 2, math_tfunc, 40)
 add_tfunc(checked_udiv_int, 2, 2, math_tfunc, 40)
 add_tfunc(checked_srem_int, 2, 2, math_tfunc, 40)
 add_tfunc(checked_urem_int, 2, 2, math_tfunc, 40)
-    ## functions ##
+
+# functions
+# ---------
+
 add_tfunc(abs_float, 1, 1, math_tfunc, 2)
 add_tfunc(copysign_float, 2, 2, math_tfunc, 2)
 add_tfunc(flipsign_int, 2, 2, math_tfunc, 1)
@@ -169,8 +257,13 @@ add_tfunc(trunc_llvm, 1, 1, math_tfunc, 10)
 add_tfunc(rint_llvm, 1, 1, math_tfunc, 10)
 add_tfunc(sqrt_llvm, 1, 1, math_tfunc, 20)
 add_tfunc(sqrt_llvm_fast, 1, 1, math_tfunc, 20)
-    ## same-type comparisons ##
-cmp_tfunc(@nospecialize(x), @nospecialize(y)) = Bool
+
+# comparisons
+# -----------
+
+@nospecs cmp_tfunc(𝕃::AbstractLattice, a, b) = cmp_tfunc(widenlattice(𝕃), a, b)
+@nospecs cmp_tfunc(::JLTypeLattice, a, b) = Bool
+
 add_tfunc(eq_int, 2, 2, cmp_tfunc, 1)
 add_tfunc(ne_int, 2, 2, cmp_tfunc, 1)
 add_tfunc(slt_int, 2, 2, cmp_tfunc, 1)
@@ -187,24 +280,41 @@ add_tfunc(ne_float_fast, 2, 2, cmp_tfunc, 1)
 add_tfunc(lt_float_fast, 2, 2, cmp_tfunc, 1)
 add_tfunc(le_float_fast, 2, 2, cmp_tfunc, 1)
 
-    ## checked arithmetic ##
-chk_tfunc(@nospecialize(x), @nospecialize(y)) = Tuple{widenconst(x), Bool}
-add_tfunc(checked_sadd_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_uadd_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_ssub_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_usub_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_smul_int, 2, 2, chk_tfunc, 10)
-add_tfunc(checked_umul_int, 2, 2, chk_tfunc, 10)
-    ## other, misc intrinsics ##
-add_tfunc(Core.Intrinsics.llvmcall, 3, INT_INF,
-          (@nospecialize(fptr), @nospecialize(rt), @nospecialize(at), a...) -> instanceof_tfunc(rt)[1], 10)
-cglobal_tfunc(@nospecialize(fptr)) = Ptr{Cvoid}
-cglobal_tfunc(@nospecialize(fptr), @nospecialize(t)) = (isType(t) ? Ptr{t.parameters[1]} : Ptr)
-cglobal_tfunc(@nospecialize(fptr), t::Const) = (isa(t.val, Type) ? Ptr{t.val} : Ptr)
+# checked arithmetic
+# ------------------
+
+@nospecs chk_tfunc(𝕃::AbstractLattice, x, y) = chk_tfunc(widenlattice(𝕃), x, y)
+@nospecs chk_tfunc(::JLTypeLattice, x, y) = Tuple{widenconst(x), Bool}
+
+add_tfunc(checked_sadd_int, 2, 2, chk_tfunc, 2)
+add_tfunc(checked_uadd_int, 2, 2, chk_tfunc, 2)
+add_tfunc(checked_ssub_int, 2, 2, chk_tfunc, 2)
+add_tfunc(checked_usub_int, 2, 2, chk_tfunc, 2)
+add_tfunc(checked_smul_int, 2, 2, chk_tfunc, 5)
+add_tfunc(checked_umul_int, 2, 2, chk_tfunc, 5)
+
+# other, misc
+# -----------
+
+@nospecs function llvmcall_tfunc(𝕃::AbstractLattice, fptr, rt, at, a...)
+    return instanceof_tfunc(rt)[1]
+end
+add_tfunc(Core.Intrinsics.llvmcall, 3, INT_INF, llvmcall_tfunc, 10)
+
+@nospecs cglobal_tfunc(𝕃::AbstractLattice, fptr) = Ptr{Cvoid}
+@nospecs function cglobal_tfunc(𝕃::AbstractLattice, fptr, t)
+    isa(t, Const) && return isa(t.val, Type) ? Ptr{t.val} : Ptr
+    return isType(t) ? Ptr{t.parameters[1]} : Ptr
+end
 add_tfunc(Core.Intrinsics.cglobal, 1, 2, cglobal_tfunc, 5)
-add_tfunc(Core.Intrinsics.have_fma, 1, 1, @nospecialize(x)->Bool, 1)
 
-function ifelse_tfunc(@nospecialize(cnd), @nospecialize(x), @nospecialize(y))
+add_tfunc(Core.Intrinsics.have_fma, 1, 1, @nospecs((𝕃::AbstractLattice, x)->Bool), 1)
+
+# builtin functions
+# =================
+
+@nospecs function ifelse_tfunc(𝕃::AbstractLattice, cnd, x, y)
+    cnd = widenslotwrapper(cnd)
     if isa(cnd, Const)
         if cnd.val === true
             return x
@@ -213,58 +323,86 @@ function ifelse_tfunc(@nospecialize(cnd), @nospecialize(x), @nospecialize(y))
         else
             return Bottom
         end
-    elseif isa(cnd, Conditional)
-        # optimized (if applicable) in abstract_call
-    elseif !(Bool ⊑ cnd)
+    elseif !hasintersect(widenconst(cnd), Bool)
         return Bottom
     end
-    return tmerge(x, y)
+    return tmerge(𝕃, x, y)
 end
 add_tfunc(Core.ifelse, 3, 3, ifelse_tfunc, 1)
 
-function egal_tfunc(@nospecialize(x), @nospecialize(y))
-    xx = widenconditional(x)
-    yy = widenconditional(y)
-    if isa(x, Conditional) && isa(yy, Const)
-        yy.val === false && return Conditional(x.slot, x.elsetype, x.thentype)
-        yy.val === true && return x
-        return Const(false)
-    elseif isa(y, Conditional) && isa(xx, Const)
-        xx.val === false && return Conditional(y.slot, y.elsetype, y.thentype)
-        xx.val === true && return y
-        return Const(false)
-    elseif isa(xx, Const) && isa(yy, Const)
-        return Const(xx.val === yy.val)
-    elseif !hasintersect(widenconst(xx), widenconst(yy))
-        return Const(false)
-    elseif (isa(xx, Const) && y === typeof(xx.val) && isdefined(y, :instance)) ||
-           (isa(yy, Const) && x === typeof(yy.val) && isdefined(x, :instance))
+@nospecs function ifelse_nothrow(𝕃::AbstractLattice, cond, x, y)
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    return cond ⊑ Bool
+end
+
+@nospecs egal_tfunc(𝕃::AbstractLattice, x, y) = egal_tfunc(widenlattice(𝕃), x, y)
+@nospecs function egal_tfunc(𝕃::MustAliasesLattice, x, y)
+    return egal_tfunc(widenlattice(𝕃), widenmustalias(x), widenmustalias(y))
+end
+@nospecs function egal_tfunc(𝕃::ConditionalsLattice, x, y)
+    if isa(x, Conditional)
+        y = widenconditional(y)
+        if isa(y, Const)
+            y.val === false && return Conditional(x.slot, x.elsetype, x.thentype)
+            y.val === true && return x
+            return Const(false)
+        end
+    elseif isa(y, Conditional)
+        x = widenconditional(x)
+        if isa(x, Const)
+            x.val === false && return Conditional(y.slot, y.elsetype, y.thentype)
+            x.val === true && return y
+            return Const(false)
+        end
+    end
+    return egal_tfunc(widenlattice(𝕃), x, y)
+end
+@nospecs function egal_tfunc(𝕃::ConstsLattice, x, y)
+    if isa(x, Const) && isa(y, Const)
+        return Const(x.val === y.val)
+    elseif (isa(x, Const) && y === typeof(x.val) && issingletontype(x)) ||
+           (isa(y, Const) && x === typeof(y.val) && issingletontype(y))
         return Const(true)
     end
+    return egal_tfunc(widenlattice(𝕃), x, y)
+end
+@nospecs function egal_tfunc(::JLTypeLattice, x, y)
+    hasintersect(widenconst(x), widenconst(y)) || return Const(false)
     return Bool
 end
 add_tfunc(===, 2, 2, egal_tfunc, 1)
 
-function isdefined_nothrow(argtypes::Array{Any, 1})
-    length(argtypes) == 2 || return false
-    a1, a2 = argtypes[1], argtypes[2]
-    if hasintersect(widenconst(a1), Module)
-        return a2 ⊑ Symbol
+function isdefined_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any})
+    if length(argtypes) ≠ 2
+        # TODO prove nothrow when ordering is specified
+        return false
+    end
+    return isdefined_nothrow(𝕃, argtypes[1], argtypes[2])
+end
+@nospecs function isdefined_nothrow(𝕃::AbstractLattice, x, name)
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    isvarargtype(x) && return false
+    isvarargtype(name) && return false
+    if hasintersect(widenconst(x), Module)
+        return name ⊑ Symbol
     else
-        return a2 ⊑ Symbol || a2 ⊑ Int
+        return name ⊑ Symbol || name ⊑ Int
     end
 end
-isdefined_tfunc(arg1, sym, order) = (@nospecialize; isdefined_tfunc(arg1, sym))
-function isdefined_tfunc(@nospecialize(arg1), @nospecialize(sym))
+
+@nospecs function isdefined_tfunc(𝕃::AbstractLattice, arg1, sym, order)
+    return isdefined_tfunc(𝕃, arg1, sym)
+end
+@nospecs function isdefined_tfunc(𝕃::AbstractLattice, arg1, sym)
     if isa(arg1, Const)
-        a1 = typeof(arg1.val)
+        arg1t = typeof(arg1.val)
     else
-        a1 = widenconst(arg1)
+        arg1t = widenconst(arg1)
     end
-    if isType(a1)
+    if isType(arg1t)
         return Bool
     end
-    a1 = unwrap_unionall(a1)
+    a1 = unwrap_unionall(arg1t)
     if isa(a1, DataType) && !isabstracttype(a1)
         if a1 === Module
             hasintersect(widenconst(sym), Symbol) || return Bottom
@@ -307,11 +445,14 @@ function isdefined_tfunc(@nospecialize(arg1), @nospecialize(sym))
             end
         end
     elseif isa(a1, Union)
-        return tmerge(isdefined_tfunc(a1.a, sym),
-                      isdefined_tfunc(a1.b, sym))
+        # Results can only be `Const` or `Bool`
+        return tmerge(𝕃,
+                      isdefined_tfunc(𝕃, rewrap_unionall(a1.a, arg1t), sym),
+                      isdefined_tfunc(𝕃, rewrap_unionall(a1.b, arg1t), sym))
     end
     return Bool
 end
+
 add_tfunc(isdefined, 2, 3, isdefined_tfunc, 1)
 
 function sizeof_nothrow(@nospecialize(x))
@@ -319,15 +460,13 @@ function sizeof_nothrow(@nospecialize(x))
         if !isa(x.val, Type) || x.val === DataType
             return true
         end
-    elseif isa(x, Conditional)
-        return true
     end
     xu = unwrap_unionall(x)
     if isa(xu, Union)
         return sizeof_nothrow(rewrap_unionall(xu.a, x)) &&
                sizeof_nothrow(rewrap_unionall(xu.b, x))
     end
-    t, exact, isconcrete = instanceof_tfunc(x)
+    t, exact, isconcrete = instanceof_tfunc(x, false)
     if t === Bottom
         # x must be an instance (not a Type) or is the Bottom type object
         x = widenconst(x)
@@ -354,8 +493,8 @@ function sizeof_nothrow(@nospecialize(x))
 end
 
 function _const_sizeof(@nospecialize(x))
-    # Constant Vector does not have constant size
-    isa(x, Vector) && return Int
+    # Constant GenericMemory does not have constant size
+    isa(x, GenericMemory) && return Int
     size = try
             Core.sizeof(x)
         catch ex
@@ -367,18 +506,19 @@ function _const_sizeof(@nospecialize(x))
         end
     return Const(size)
 end
-function sizeof_tfunc(@nospecialize(x),)
+@nospecs function sizeof_tfunc(𝕃::AbstractLattice, x)
+    x = widenmustalias(x)
     isa(x, Const) && return _const_sizeof(x.val)
     isa(x, Conditional) && return _const_sizeof(Bool)
     isconstType(x) && return _const_sizeof(x.parameters[1])
     xu = unwrap_unionall(x)
     if isa(xu, Union)
-        return tmerge(sizeof_tfunc(rewrap_unionall(xu.a, x)),
-                      sizeof_tfunc(rewrap_unionall(xu.b, x)))
+        return tmerge(sizeof_tfunc(𝕃, rewrap_unionall(xu.a, x)),
+                      sizeof_tfunc(𝕃, rewrap_unionall(xu.b, x)))
     end
     # Core.sizeof operates on either a type or a value. First check which
     # case we're in.
-    t, exact = instanceof_tfunc(x)
+    t, exact = instanceof_tfunc(x, false)
     if t !== Bottom
         # The value corresponding to `x` at runtime could be a type.
         # Normalize the query to ask about that type.
@@ -397,28 +537,36 @@ function sizeof_tfunc(@nospecialize(x),)
     return Int
 end
 add_tfunc(Core.sizeof, 1, 1, sizeof_tfunc, 1)
-function nfields_tfunc(@nospecialize(x))
+@nospecs function nfields_tfunc(𝕃::AbstractLattice, x)
     isa(x, Const) && return Const(nfields(x.val))
     isa(x, Conditional) && return Const(0)
-    x = unwrap_unionall(widenconst(x))
+    xt = widenconst(x)
+    x = unwrap_unionall(xt)
     isconstType(x) && return Const(nfields(x.parameters[1]))
     if isa(x, DataType) && !isabstracttype(x)
-        if !(x.name === Tuple.name && isvatuple(x)) &&
-           !(x.name === _NAMEDTUPLE_NAME && !isconcretetype(x))
+        if x.name === Tuple.name
+            isvatuple(x) && return Int
+            return Const(length(x.types))
+        elseif x.name === _NAMEDTUPLE_NAME
+            length(x.parameters) == 2 || return Int
+            names = x.parameters[1]
+            isa(names, Tuple{Vararg{Symbol}}) || return nfields_tfunc(𝕃, rewrap_unionall(x.parameters[2], xt))
+            return Const(length(names))
+        else
             return Const(isdefined(x, :types) ? length(x.types) : length(x.name.names))
         end
     end
     if isa(x, Union)
-        na = nfields_tfunc(x.a)
+        na = nfields_tfunc(𝕃, x.a)
         na === Int && return Int
-        return tmerge(na, nfields_tfunc(x.b))
+        return tmerge(na, nfields_tfunc(𝕃, x.b))
     end
     return Int
 end
 add_tfunc(nfields, 1, 1, nfields_tfunc, 1)
-add_tfunc(Core._expr, 1, INT_INF, (@nospecialize args...)->Expr, 100)
-add_tfunc(svec, 0, INT_INF, (@nospecialize args...)->SimpleVector, 20)
-function typevar_tfunc(@nospecialize(n), @nospecialize(lb_arg), @nospecialize(ub_arg))
+add_tfunc(Core._expr, 1, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->Expr), 100)
+add_tfunc(svec, 0, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->SimpleVector), 20)
+@nospecs function typevar_tfunc(𝕃::AbstractLattice, n, lb_arg, ub_arg)
     lb = Union{}
     ub = Any
     ub_certain = lb_certain = true
@@ -427,26 +575,32 @@ function typevar_tfunc(@nospecialize(n), @nospecialize(lb_arg), @nospecialize(ub
         isa(nval, Symbol) || return Union{}
         if isa(lb_arg, Const)
             lb = lb_arg.val
-        elseif isType(lb_arg)
-            lb = lb_arg.parameters[1]
-            lb_certain = false
         else
-            return TypeVar
+            lb_arg = widenslotwrapper(lb_arg)
+            if isType(lb_arg)
+                lb = lb_arg.parameters[1]
+                lb_certain = false
+            else
+                return TypeVar
+            end
         end
         if isa(ub_arg, Const)
             ub = ub_arg.val
-        elseif isType(ub_arg)
-            ub = ub_arg.parameters[1]
-            ub_certain = false
         else
-            return TypeVar
+            ub_arg = widenslotwrapper(ub_arg)
+            if isType(ub_arg)
+                ub = ub_arg.parameters[1]
+                ub_certain = false
+            else
+                return TypeVar
+            end
         end
         tv = TypeVar(nval, lb, ub)
         return PartialTypeVar(tv, lb_certain, ub_certain)
     end
     return TypeVar
 end
-function typebound_nothrow(b)
+@nospecs function typebound_nothrow(b)
     b = widenconst(b)
     (b ⊑ TypeVar) && return true
     if isType(b)
@@ -454,34 +608,14 @@ function typebound_nothrow(b)
     end
     return false
 end
-function typevar_nothrow(n, lb, ub)
-    (n ⊑ Symbol) || return false
+@nospecs function typevar_nothrow(𝕃::AbstractLattice, n, lb, ub)
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    n ⊑ Symbol || return false
     typebound_nothrow(lb) || return false
     typebound_nothrow(ub) || return false
     return true
 end
 add_tfunc(Core._typevar, 3, 3, typevar_tfunc, 100)
-add_tfunc(applicable, 1, INT_INF, (@nospecialize(f), args...)->Bool, 100)
-add_tfunc(Core.Intrinsics.arraylen, 1, 1, @nospecialize(x)->Int, 4)
-
-function arraysize_tfunc(@nospecialize(ary), @nospecialize(dim))
-    hasintersect(widenconst(ary), Array) || return Bottom
-    hasintersect(widenconst(dim), Int) || return Bottom
-    return Int
-end
-add_tfunc(arraysize, 2, 2, arraysize_tfunc, 4)
-
-function arraysize_nothrow(argtypes::Vector{Any})
-    length(argtypes) == 2 || return false
-    ary = argtypes[1]
-    dim = argtypes[2]
-    ary ⊑ Array || return false
-    if isa(dim, Const)
-        dimval = dim.val
-        return isa(dimval, Int) && dimval > 0
-    end
-    return false
-end
 
 struct MemoryOrder x::Cint end
 const MEMORY_ORDER_UNSPECIFIED = MemoryOrder(-2)
@@ -520,28 +654,45 @@ function pointer_eltype(@nospecialize(ptr))
         unw = unwrap_unionall(a)
         if isa(unw, DataType) && unw.name === Ptr.body.name
             T = unw.parameters[1]
-            valid_as_lattice(T) || return Bottom
+            valid_as_lattice(T, true) || return Bottom
             return rewrap_unionall(T, a)
         end
     end
     return Any
 end
-function atomic_pointermodify_tfunc(ptr, op, v, order)
-    @nospecialize
+
+@nospecs function pointerref_tfunc(𝕃::AbstractLattice, a, i, align)
+    return pointer_eltype(a)
+end
+@nospecs function pointerset_tfunc(𝕃::AbstractLattice, a, v, i, align)
+    return a
+end
+@nospecs function atomic_fence_tfunc(𝕃::AbstractLattice, order)
+    return Nothing
+end
+@nospecs function atomic_pointerref_tfunc(𝕃::AbstractLattice, a, order)
+    return pointer_eltype(a)
+end
+@nospecs function atomic_pointerset_tfunc(𝕃::AbstractLattice, a, v, order)
+    return a
+end
+@nospecs function atomic_pointerswap_tfunc(𝕃::AbstractLattice, a, v, order)
+    return pointer_eltype(a)
+end
+@nospecs function atomic_pointermodify_tfunc(𝕃::AbstractLattice, ptr, op, v, order)
     a = widenconst(ptr)
     if !has_free_typevars(a)
         unw = unwrap_unionall(a)
         if isa(unw, DataType) && unw.name === Ptr.body.name
             T = unw.parameters[1]
             # note: we could sometimes refine this to a PartialStruct if we analyzed `op(T, T)::T`
-            valid_as_lattice(T) || return Bottom
+            valid_as_lattice(T, true) || return Bottom
             return rewrap_unionall(Pair{T, T}, a)
         end
     end
     return Pair
 end
-function atomic_pointerreplace_tfunc(ptr, x, v, success_order, failure_order)
-    @nospecialize
+@nospecs function atomic_pointerreplace_tfunc(𝕃::AbstractLattice, ptr, x, v, success_order, failure_order)
     a = widenconst(ptr)
     if !has_free_typevars(a)
         unw = unwrap_unionall(a)
@@ -553,16 +704,37 @@ function atomic_pointerreplace_tfunc(ptr, x, v, success_order, failure_order)
     end
     return ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
 end
-add_tfunc(pointerref, 3, 3, (a, i, align) -> (@nospecialize; pointer_eltype(a)), 4)
-add_tfunc(pointerset, 4, 4, (a, v, i, align) -> (@nospecialize; a), 5)
-add_tfunc(atomic_fence, 1, 1, (order) -> (@nospecialize; Nothing), 4)
-add_tfunc(atomic_pointerref, 2, 2, (a, order) -> (@nospecialize; pointer_eltype(a)), 4)
-add_tfunc(atomic_pointerset, 3, 3, (a, v, order) -> (@nospecialize; a), 5)
-add_tfunc(atomic_pointerswap, 3, 3, (a, v, order) -> (@nospecialize; pointer_eltype(a)), 5)
+add_tfunc(pointerref, 3, 3, pointerref_tfunc, 4)
+add_tfunc(pointerset, 4, 4, pointerset_tfunc, 5)
+add_tfunc(atomic_fence, 1, 1, atomic_fence_tfunc, 4)
+add_tfunc(atomic_pointerref, 2, 2, atomic_pointerref_tfunc, 4)
+add_tfunc(atomic_pointerset, 3, 3, atomic_pointerset_tfunc, 5)
+add_tfunc(atomic_pointerswap, 3, 3, atomic_pointerswap_tfunc, 5)
 add_tfunc(atomic_pointermodify, 4, 4, atomic_pointermodify_tfunc, 5)
 add_tfunc(atomic_pointerreplace, 5, 5, atomic_pointerreplace_tfunc, 5)
-add_tfunc(donotdelete, 0, INT_INF, (@nospecialize args...)->Nothing, 0)
-add_tfunc(Core.finalizer, 2, 4, (@nospecialize args...)->Nothing, 5)
+add_tfunc(donotdelete, 0, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->Nothing), 0)
+@nospecs function compilerbarrier_tfunc(𝕃::AbstractLattice, setting, val)
+    # strongest barrier if a precise information isn't available at compiler time
+    # XXX we may want to have "compile-time" error instead for such case
+    isa(setting, Const) || return Any
+    setting = setting.val
+    isa(setting, Symbol) || return Any
+    if setting === :const
+        return widenconst(val)
+    elseif setting === :conditional
+        return widenconditional(val)
+    elseif setting === :type
+        return Any
+    else
+        return Bottom
+    end
+end
+add_tfunc(compilerbarrier, 2, 2, compilerbarrier_tfunc, 5)
+add_tfunc(Core.finalizer, 2, 4, @nospecs((𝕃::AbstractLattice, args...)->Nothing), 5)
+
+@nospecs function compilerbarrier_nothrow(setting, val)
+    return isa(setting, Const) && contains_is((:type, :const, :conditional), setting.val)
+end
 
 # more accurate typeof_tfunc for vararg tuples abstract only in length
 function typeof_concrete_vararg(t::DataType)
@@ -570,8 +742,12 @@ function typeof_concrete_vararg(t::DataType)
     for i = 1:np
         p = t.parameters[i]
         if i == np && isvarargtype(p)
-            if isdefined(p, :T) && !isdefined(p, :N) && isconcretetype(p.T)
-                return Type{Tuple{t.parameters[1:np-1]..., Vararg{p.T, N}}} where N
+            if isdefined(p, :T) && isconcretetype(p.T)
+                t = Type{Tuple{t.parameters[1:np-1]..., Vararg{p.T, N}}} where N
+                if isdefined(p, :N)
+                    return t{p.N}
+                end
+                return t
             end
         elseif !isconcretetype(p)
             break
@@ -580,7 +756,7 @@ function typeof_concrete_vararg(t::DataType)
     return nothing
 end
 
-function typeof_tfunc(@nospecialize(t))
+@nospecs function typeof_tfunc(𝕃::AbstractLattice, t)
     isa(t, Const) && return Const(typeof(t.val))
     t = widenconst(t)
     if isType(t)
@@ -601,8 +777,8 @@ function typeof_tfunc(@nospecialize(t))
             return Type{<:t}
         end
     elseif isa(t, Union)
-        a = widenconst(_typeof_tfunc(t.a))
-        b = widenconst(_typeof_tfunc(t.b))
+        a = widenconst(_typeof_tfunc(𝕃, t.a))
+        b = widenconst(_typeof_tfunc(𝕃, t.b))
         return Union{a, b}
     elseif isa(t, UnionAll)
         u = unwrap_unionall(t)
@@ -616,28 +792,39 @@ function typeof_tfunc(@nospecialize(t))
                 return rewrap_unionall(Type{u}, t)
             end
         end
-        return rewrap_unionall(widenconst(typeof_tfunc(u)), t)
+        return rewrap_unionall(widenconst(typeof_tfunc(𝕃, u)), t)
     end
     return DataType # typeof(anything)::DataType
 end
 # helper function of `typeof_tfunc`, which accepts `TypeVar`
-function _typeof_tfunc(@nospecialize(t))
+@nospecs function _typeof_tfunc(𝕃::AbstractLattice, t)
     if isa(t, TypeVar)
-        return t.ub !== Any ? _typeof_tfunc(t.ub) : DataType
+        return t.ub !== Any ? _typeof_tfunc(𝕃, t.ub) : DataType
     end
-    return typeof_tfunc(t)
+    return typeof_tfunc(𝕃, t)
 end
 add_tfunc(typeof, 1, 1, typeof_tfunc, 1)
 
-function typeassert_tfunc(@nospecialize(v), @nospecialize(t))
-    t = instanceof_tfunc(t)[1]
+@nospecs function typeassert_tfunc(𝕃::AbstractLattice, v, t)
+    t = instanceof_tfunc(t, true)[1]
     t === Any && return v
-    return tmeet(v, t)
+    return tmeet(𝕃, v, t)
 end
 add_tfunc(typeassert, 2, 2, typeassert_tfunc, 4)
 
-function isa_tfunc(@nospecialize(v), @nospecialize(tt))
-    t, isexact = instanceof_tfunc(tt)
+@nospecs function typeassert_nothrow(𝕃::AbstractLattice, v, t)
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    # ty, exact = instanceof_tfunc(t, true)
+    # return exact && v ⊑ ty
+    if (isType(t) && !has_free_typevars(t) && v ⊑ t.parameters[1]) ||
+        (isa(t, Const) && isa(t.val, Type) && v ⊑ t.val)
+        return true
+    end
+    return false
+end
+
+@nospecs function isa_tfunc(𝕃::AbstractLattice, v, tt)
+    t, isexact = instanceof_tfunc(tt, true)
     if t === Bottom
         # check if t could be equivalent to typeof(Bottom), since that's valid in `isa`, but the set of `v` is empty
         # if `t` cannot have instances, it's also invalid on the RHS of isa
@@ -645,7 +832,7 @@ function isa_tfunc(@nospecialize(v), @nospecialize(tt))
         return Const(false)
     end
     if !has_free_typevars(t)
-        if v ⊑ t
+        if ⊑(𝕃, v, t)
             if isexact && isnotbrokensubtype(v, t)
                 return Const(true)
             end
@@ -671,9 +858,14 @@ function isa_tfunc(@nospecialize(v), @nospecialize(tt))
 end
 add_tfunc(isa, 2, 2, isa_tfunc, 1)
 
-function subtype_tfunc(@nospecialize(a), @nospecialize(b))
-    a, isexact_a = instanceof_tfunc(a)
-    b, isexact_b = instanceof_tfunc(b)
+@nospecs function isa_nothrow(𝕃::AbstractLattice, obj, typ)
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    return typ ⊑ Type
+end
+
+@nospecs function subtype_tfunc(𝕃::AbstractLattice, a, b)
+    a, isexact_a = instanceof_tfunc(a, false)
+    b, isexact_b = instanceof_tfunc(b, false)
     if !has_free_typevars(a) && !has_free_typevars(b)
         if a <: b
             if isexact_b || a === Bottom
@@ -689,19 +881,21 @@ function subtype_tfunc(@nospecialize(a), @nospecialize(b))
 end
 add_tfunc(<:, 2, 2, subtype_tfunc, 10)
 
+@nospecs function subtype_nothrow(𝕃::AbstractLattice, lty, rty)
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    return lty ⊑ Type && rty ⊑ Type
+end
+
 function fieldcount_noerror(@nospecialize t)
     if t isa UnionAll || t isa Union
         t = argument_datatype(t)
         if t === nothing
             return nothing
         end
-        t = t::DataType
     elseif t === Union{}
         return 0
     end
-    if !(t isa DataType)
-        return nothing
-    end
+    t isa DataType || return nothing
     if t.name === _NAMEDTUPLE_NAME
         names, types = t.parameters
         if names isa Tuple
@@ -710,18 +904,16 @@ function fieldcount_noerror(@nospecialize t)
         if types isa DataType && types <: Tuple
             return fieldcount_noerror(types)
         end
-        abstr = true
-    else
-        abstr = isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
-    end
-    if abstr
+        return nothing
+    elseif isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
         return nothing
     end
     return isdefined(t, :types) ? length(t.types) : length(t.name.names)
 end
 
-
-function try_compute_fieldidx(typ::DataType, @nospecialize(field))
+function try_compute_fieldidx(@nospecialize(typ), @nospecialize(field))
+    typ = argument_datatype(typ)
+    typ === nothing && return nothing
     if isa(field, Symbol)
         field = fieldindex(typ, field, false)
         field == 0 && return nothing
@@ -736,39 +928,61 @@ function try_compute_fieldidx(typ::DataType, @nospecialize(field))
     return field
 end
 
-function getfield_boundscheck(argtypes::Vector{Any}) # ::Union{Bool, Nothing, Type{Bool}}
+function getfield_boundscheck(argtypes::Vector{Any})
     if length(argtypes) == 2
-        boundscheck = Bool
+        isvarargtype(argtypes[2]) && return :unsafe
+        return :on
     elseif length(argtypes) == 3
         boundscheck = argtypes[3]
-        if boundscheck === Const(:not_atomic) # TODO: this is assuming not atomic
-            boundscheck = Bool
+        isvarargtype(boundscheck) && return :unsafe
+        if widenconst(boundscheck) === Symbol
+            return :on
         end
     elseif length(argtypes) == 4
         boundscheck = argtypes[4]
+        isvarargtype(boundscheck) && return :unsafe
     else
-        return nothing
+        return :unsafe
     end
-    widenconst(boundscheck) !== Bool && return nothing
     boundscheck = widenconditional(boundscheck)
-    if isa(boundscheck, Const)
-        return boundscheck.val
-    else
-        return Bool
+    if widenconst(boundscheck) === Bool
+        if isa(boundscheck, Const)
+            return boundscheck.val::Bool ? :on : :off
+        end
+        return :unknown # including a case when specified as `:boundscheck`
     end
+    return :unsafe
 end
 
-function getfield_nothrow(argtypes::Vector{Any})
-    boundscheck = getfield_boundscheck(argtypes)
-    boundscheck === nothing && return false
-    return getfield_nothrow(argtypes[1], argtypes[2], !(boundscheck === false))
+function getfield_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, boundscheck::Symbol=getfield_boundscheck(argtypes))
+    boundscheck === :unsafe && return false
+    ordering = Const(:not_atomic)
+    if length(argtypes) == 3
+        isvarargtype(argtypes[3]) && return false
+        if widenconst(argtypes[3]) !== Bool
+            ordering = argtypes[3]
+        end
+    elseif length(argtypes) == 4
+        ordering = argtypes[3]
+    elseif length(argtypes) ≠ 2
+        return false
+    end
+    isa(ordering, Const) || return false
+    ordering = ordering.val
+    isa(ordering, Symbol) || return false
+    if ordering !== :not_atomic # TODO: this is assuming not atomic
+        return false
+    end
+    return getfield_nothrow(𝕃, argtypes[1], argtypes[2], !(boundscheck === :off))
 end
-function getfield_nothrow(@nospecialize(s00), @nospecialize(name), boundscheck::Bool)
-    # If we don't have boundscheck and don't know the field, don't even bother
+@nospecs function getfield_nothrow(𝕃::AbstractLattice, s00, name, boundscheck::Bool)
+    # If we don't have boundscheck off and don't know the field, don't even bother
     if boundscheck
         isa(name, Const) || return false
     end
 
+    ⊑ = Core.Compiler.:⊑(𝕃)
+
     # If we have s00 being a const, we can potentially refine our type-based analysis above
     if isa(s00, Const) || isconstType(s00)
         if !isa(s00, Const)
@@ -784,35 +998,39 @@ function getfield_nothrow(@nospecialize(s00), @nospecialize(name), boundscheck::
             end
             return isdefined(sv, nval)
         end
-        if !boundscheck && !isa(sv, Module)
-            # If bounds checking is disabled and all fields are assigned,
-            # we may assume that we don't throw
-            for i = 1:fieldcount(typeof(sv))
-                isdefined(sv, i) || return false
-            end
-            return true
+        boundscheck && return false
+        # If bounds checking is disabled and all fields are assigned,
+        # we may assume that we don't throw
+        isa(sv, Module) && return false
+        name ⊑ Int || name ⊑ Symbol || return false
+        for i = 1:fieldcount(typeof(sv))
+            isdefined(sv, i) || return false
         end
-        return false
+        return true
     end
 
     s0 = widenconst(s00)
     s = unwrap_unionall(s0)
     if isa(s, Union)
-        return getfield_nothrow(rewrap_unionall(s.a, s00), name, boundscheck) &&
-               getfield_nothrow(rewrap_unionall(s.b, s00), name, boundscheck)
-    elseif isa(s, DataType)
+        return getfield_nothrow(𝕃, rewrap_unionall(s.a, s00), name, boundscheck) &&
+               getfield_nothrow(𝕃, rewrap_unionall(s.b, s00), name, boundscheck)
+    elseif isType(s) && isTypeDataType(s.parameters[1])
+        s = s0 = DataType
+    end
+    if isa(s, DataType)
         # Can't say anything about abstract types
         isabstracttype(s) && return false
-        s.name.atomicfields == C_NULL || return false # TODO: currently we're only testing for ordering == :not_atomic
-        # If all fields are always initialized, and bounds check is disabled, we can assume
-        # we don't throw
+        # If all fields are always initialized, and bounds check is disabled,
+        # we can assume we don't throw
         if !boundscheck && s.name.n_uninitialized == 0
+            name ⊑ Int || name ⊑ Symbol || return false
             return true
         end
         # Else we need to know what the field is
         isa(name, Const) || return false
         field = try_compute_fieldidx(s, name.val)
         field === nothing && return false
+        isfieldatomic(s, field) && return false # TODO: currently we're only testing for ordering === :not_atomic
         field <= datatype_min_ninitialized(s) && return true
         # `try_compute_fieldidx` already check for field index bound.
         !isvatuple(s) && isbitstype(fieldtype(s0, field)) && return true
@@ -821,15 +1039,13 @@ function getfield_nothrow(@nospecialize(s00), @nospecialize(name), boundscheck::
     return false
 end
 
-function getfield_tfunc(s00, name, boundscheck_or_order)
-    @nospecialize
+@nospecs function getfield_tfunc(𝕃::AbstractLattice, s00, name, boundscheck_or_order)
     t = isvarargtype(boundscheck_or_order) ? unwrapva(boundscheck_or_order) :
         widenconst(boundscheck_or_order)
     hasintersect(t, Symbol) || hasintersect(t, Bool) || return Bottom
-    return getfield_tfunc(s00, name)
+    return getfield_tfunc(𝕃, s00, name)
 end
-function getfield_tfunc(s00, name, order, boundscheck)
-    @nospecialize
+@nospecs function getfield_tfunc(𝕃::AbstractLattice, s00, name, order, boundscheck)
     hasintersect(widenconst(order), Symbol) || return Bottom
     if isvarargtype(boundscheck)
         t = unwrapva(boundscheck)
@@ -837,18 +1053,75 @@ function getfield_tfunc(s00, name, order, boundscheck)
     else
         hasintersect(widenconst(boundscheck), Bool) || return Bottom
     end
-    return getfield_tfunc(s00, name)
+    return getfield_tfunc(𝕃, s00, name)
+end
+@nospecs function getfield_tfunc(𝕃::AbstractLattice, s00, name)
+    _getfield_tfunc(𝕃, s00, name, false)
+end
+
+function _getfield_fieldindex(s::DataType, name::Const)
+    nv = name.val
+    if isa(nv, Symbol)
+        nv = fieldindex(s, nv, false)
+    end
+    if isa(nv, Int)
+        return nv
+    end
+    return nothing
+end
+
+function _getfield_tfunc_const(@nospecialize(sv), name::Const)
+    nv = _getfield_fieldindex(typeof(sv), name)
+    nv === nothing && return Bottom
+    if isa(sv, DataType) && nv == DATATYPE_TYPES_FIELDINDEX && isdefined(sv, nv)
+        return Const(getfield(sv, nv))
+    end
+    if isconst(typeof(sv), nv)
+        if isdefined(sv, nv)
+            return Const(getfield(sv, nv))
+        end
+        return Bottom
+    end
+    return nothing
+end
+
+@nospecs function _getfield_tfunc(𝕃::InferenceLattice, s00, name, setfield::Bool)
+    if isa(s00, LimitedAccuracy)
+        # This will error, but it's better than duplicating the error here
+        s00 = widenconst(s00)
+    end
+    return _getfield_tfunc(widenlattice(𝕃), s00, name, setfield)
 end
-getfield_tfunc(@nospecialize(s00), @nospecialize(name)) = _getfield_tfunc(s00, name, false)
-function _getfield_tfunc(@nospecialize(s00), @nospecialize(name), setfield::Bool)
-    if isa(s00, Conditional)
+
+@nospecs function _getfield_tfunc(𝕃::AnyConditionalsLattice, s00, name, setfield::Bool)
+    if isa(s00, AnyConditional)
         return Bottom # Bool has no fields
-    elseif isa(s00, Const) || isconstType(s00)
-        if !isa(s00, Const)
-            sv = s00.parameters[1]
-        else
-            sv = s00.val
+    end
+    return _getfield_tfunc(widenlattice(𝕃), s00, name, setfield)
+end
+
+@nospecs function _getfield_tfunc(𝕃::AnyMustAliasesLattice, s00, name, setfield::Bool)
+    return _getfield_tfunc(widenlattice(𝕃), widenmustalias(s00), name, setfield)
+end
+
+@nospecs function _getfield_tfunc(𝕃::PartialsLattice, s00, name, setfield::Bool)
+    if isa(s00, PartialStruct)
+        s = widenconst(s00)
+        sty = unwrap_unionall(s)::DataType
+        if isa(name, Const)
+            nv = _getfield_fieldindex(sty, name)
+            if isa(nv, Int) && 1 <= nv <= length(s00.fields)
+                return unwrapva(s00.fields[nv])
+            end
         end
+        s00 = s
+    end
+    return _getfield_tfunc(widenlattice(𝕃), s00, name, setfield)
+end
+
+@nospecs function _getfield_tfunc(𝕃::ConstsLattice, s00, name, setfield::Bool)
+    if isa(s00, Const)
+        sv = s00.val
         if isa(name, Const)
             nv = name.val
             if isa(sv, Module)
@@ -858,45 +1131,44 @@ function _getfield_tfunc(@nospecialize(s00), @nospecialize(name), setfield::Bool
                 end
                 return Bottom
             end
-            if isa(nv, Symbol)
-                nv = fieldindex(typeof(sv), nv, false)
-            end
-            if !isa(nv, Int)
-                return Bottom
-            end
-            if isa(sv, DataType) && nv == DATATYPE_TYPES_FIELDINDEX && isdefined(sv, nv)
-                return Const(getfield(sv, nv))
-            end
-            if isconst(typeof(sv), nv)
-                if isdefined(sv, nv)
-                    return Const(getfield(sv, nv))
-                end
-                return Union{}
-            end
+            r = _getfield_tfunc_const(sv, name)
+            r !== nothing && return r
         end
-        s = typeof(sv)
-    elseif isa(s00, PartialStruct)
-        s = widenconst(s00)
-        sty = unwrap_unionall(s)::DataType
-        if isa(name, Const)
-            nv = name.val
-            if isa(nv, Symbol)
-                nv = fieldindex(sty, nv, false)
+        s00 = widenconst(s00)
+    end
+    return _getfield_tfunc(widenlattice(𝕃), s00, name, setfield)
+end
+
+@nospecs function _getfield_tfunc(𝕃::JLTypeLattice, s00, name, setfield::Bool)
+    s = unwrap_unionall(s00)
+    if isa(s, Union)
+        return tmerge(_getfield_tfunc(𝕃, rewrap_unionall(s.a, s00), name, setfield),
+                      _getfield_tfunc(𝕃, rewrap_unionall(s.b, s00), name, setfield))
+    end
+    if isType(s)
+        if isconstType(s)
+            sv = (s00::DataType).parameters[1]
+            if isa(name, Const)
+                r = _getfield_tfunc_const(sv, name)
+                r !== nothing && return r
             end
-            if isa(nv, Int) && 1 <= nv <= length(s00.fields)
-                return unwrapva(s00.fields[nv])
+            s = typeof(sv)
+        else
+            sv = s.parameters[1]
+            if isTypeDataType(sv) && isa(name, Const)
+                nv = _getfield_fieldindex(DataType, name)::Int
+                if nv == DATATYPE_NAME_FIELDINDEX
+                    # N.B. This only works for fields that do not depend on type
+                    # parameters (which we do not know here).
+                    return Const(sv.name)
+                end
+                s = DataType
             end
         end
-    else
-        s = unwrap_unionall(s00)
-    end
-    if isa(s, Union)
-        return tmerge(_getfield_tfunc(rewrap_unionall(s.a, s00), name, setfield),
-                      _getfield_tfunc(rewrap_unionall(s.b, s00), name, setfield))
     end
     isa(s, DataType) || return Any
     isabstracttype(s) && return Any
-    if s <: Tuple && !(Int <: widenconst(name))
+    if s <: Tuple && !hasintersect(widenconst(name), Int)
         return Bottom
     end
     if s <: Module
@@ -919,7 +1191,7 @@ function _getfield_tfunc(@nospecialize(s00), @nospecialize(name), setfield::Bool
         if !(_ts <: Tuple)
             return Any
         end
-        return _getfield_tfunc(_ts, name, setfield)
+        return _getfield_tfunc(𝕃, _ts, name, setfield)
     end
     ftypes = datatype_fieldtypes(s)
     nf = length(ftypes)
@@ -937,61 +1209,124 @@ function _getfield_tfunc(@nospecialize(s00), @nospecialize(name), setfield::Bool
             return Bottom
         end
         if nf == 1
-            return rewrap_unionall(unwrapva(ftypes[1]), s00)
-        end
-        # union together types of all fields
-        t = Bottom
-        for i in 1:nf
-            _ft = ftypes[i]
-            setfield && isconst(s, i) && continue
-            t = tmerge(t, rewrap_unionall(unwrapva(_ft), s00))
-            t === Any && break
+            fld = 1
+        else
+            # union together types of all fields
+            t = Bottom
+            for i in 1:nf
+                _ft = unwrapva(ftypes[i])
+                valid_as_lattice(_ft, true) || continue
+                setfield && isconst(s, i) && continue
+                t = tmerge(t, rewrap_unionall(_ft, s00))
+                t === Any && break
+            end
+            return t
         end
-        return t
-    end
-    fld = name.val
-    if isa(fld, Symbol)
-        fld = fieldindex(s, fld, false)
-    end
-    if !isa(fld, Int)
-        return Bottom
+    else
+        fld = _getfield_fieldindex(s, name)
+        fld === nothing && return Bottom
     end
     if s <: Tuple && fld >= nf && isvarargtype(ftypes[nf])
-        return rewrap_unionall(unwrapva(ftypes[nf]), s00)
+        R = unwrapva(ftypes[nf])
+    else
+        if fld < 1 || fld > nf
+            return Bottom
+        elseif setfield && isconst(s, fld)
+            return Bottom
+        end
+        R = ftypes[fld]
+        valid_as_lattice(R, true) || return Bottom
+        if isempty(s.parameters)
+            return R
+        end
     end
-    if fld < 1 || fld > nf
-        return Bottom
-    elseif setfield && isconst(s, fld)
-        return Bottom
+    return rewrap_unionall(R, s00)
+end
+
+@nospecs function getfield_notundefined(typ0, name)
+    if isa(typ0, Const) && isa(name, Const)
+        typv = typ0.val
+        namev = name.val
+        isa(typv, Module) && return true
+        if isa(namev, Symbol) || isa(namev, Int)
+            # Fields are not allowed to transition from defined to undefined, so
+            # even if the field is not const, all we need to check here is that
+            # it is defined here.
+            return isdefined(typv, namev)
+        end
+    end
+    typ0 = widenconst(typ0)
+    typ = unwrap_unionall(typ0)
+    if isa(typ, Union)
+        return getfield_notundefined(rewrap_unionall(typ.a, typ0), name) &&
+               getfield_notundefined(rewrap_unionall(typ.b, typ0), name)
+    end
+    isa(typ, DataType) || return false
+    if typ.name === Tuple.name || typ.name === _NAMEDTUPLE_NAME
+        # tuples and named tuples can't be instantiated with undefined fields,
+        # so we don't need to be conservative here
+        return true
     end
-    R = ftypes[fld]
-    if isempty(s.parameters)
-        return R
+    if !isa(name, Const)
+        isvarargtype(name) && return false
+        if !hasintersect(widenconst(name), Union{Int,Symbol})
+            return true # no undefined behavior if thrown
+        end
+        # field isn't known precisely, but let's check if all the fields can't be
+        # initialized with undefined value so to avoid being too conservative
+        fcnt = fieldcount_noerror(typ)
+        fcnt === nothing && return false
+        all(i::Int->is_undefref_fieldtype(fieldtype(typ,i)), (datatype_min_ninitialized(typ)+1):fcnt) && return true
+        return false
     end
-    return rewrap_unionall(R, s00)
+    name = name.val
+    if isa(name, Symbol)
+        fidx = fieldindex(typ, name, false)
+        fidx === nothing && return true # no undefined behavior if thrown
+    elseif isa(name, Int)
+        fidx = name
+    else
+        return true # no undefined behavior if thrown
+    end
+    fcnt = fieldcount_noerror(typ)
+    fcnt === nothing && return false
+    0 < fidx ≤ fcnt || return true # no undefined behavior if thrown
+    fidx ≤ datatype_min_ninitialized(typ) && return true # always defined
+    ftyp = fieldtype(typ, fidx)
+    is_undefref_fieldtype(ftyp) && return true # always initialized
+    return false
+end
+# checks if a field of this type is guaranteed to be defined to a value
+# and that access to an uninitialized field will cause an `UndefRefError` or return zero
+# - is_undefref_fieldtype(String) === true
+# - is_undefref_fieldtype(Integer) === true
+# - is_undefref_fieldtype(Any) === true
+# - is_undefref_fieldtype(Int) === false
+# - is_undefref_fieldtype(Union{Int32,Int64}) === false
+# - is_undefref_fieldtype(T) === false
+function is_undefref_fieldtype(@nospecialize ftyp)
+    return !has_free_typevars(ftyp) && !allocatedinline(ftyp)
 end
 
-function setfield!_tfunc(o, f, v, order)
-    @nospecialize
+@nospecs function setfield!_tfunc(𝕃::AbstractLattice, o, f, v, order)
     if !isvarargtype(order)
         hasintersect(widenconst(order), Symbol) || return Bottom
     end
-    return setfield!_tfunc(o, f, v)
+    return setfield!_tfunc(𝕃, o, f, v)
 end
-function setfield!_tfunc(o, f, v)
-    @nospecialize
+@nospecs function setfield!_tfunc(𝕃::AbstractLattice, o, f, v)
     mutability_errorcheck(o) || return Bottom
-    ft = _getfield_tfunc(o, f, true)
+    ft = _getfield_tfunc(𝕃, o, f, true)
     ft === Bottom && return Bottom
     hasintersect(widenconst(v), widenconst(ft)) || return Bottom
     return v
 end
-function mutability_errorcheck(@nospecialize obj)
-    objt0 = widenconst(obj)
+mutability_errorcheck(@nospecialize obj) = _mutability_errorcheck(widenconst(obj))
+function _mutability_errorcheck(@nospecialize objt0)
     objt = unwrap_unionall(objt0)
     if isa(objt, Union)
-        return mutability_errorcheck(rewrap_unionall(objt.a, objt0)) ||
-               mutability_errorcheck(rewrap_unionall(objt.b, objt0))
+        return _mutability_errorcheck(rewrap_unionall(objt.a, objt0)) ||
+               _mutability_errorcheck(rewrap_unionall(objt.b, objt0))
     elseif isa(objt, DataType)
         # Can't say anything about abstract types
         isabstracttype(objt) && return true
@@ -1000,87 +1335,89 @@ function mutability_errorcheck(@nospecialize obj)
     return true
 end
 
-function setfield!_nothrow(argtypes::Vector{Any})
-    if length(argtypes) == 4
-        order = argtypes[4]
-        order === Const(:not_atomic) || return false # currently setfield!_nothrow is assuming not atomic
-    else
-        length(argtypes) == 3 || return false
-    end
-    return setfield!_nothrow(argtypes[1], argtypes[2], argtypes[3])
+@nospecs function setfield!_nothrow(𝕃::AbstractLattice, s00, name, v, order)
+    order === Const(:not_atomic) || return false # currently setfield!_nothrow is assuming not atomic
+    return setfield!_nothrow(𝕃, s00, name, v)
 end
-function setfield!_nothrow(s00, name, v)
-    @nospecialize
+@nospecs function setfield!_nothrow(𝕃::AbstractLattice, s00, name, v)
+    ⊑ = Core.Compiler.:⊑(𝕃)
     s0 = widenconst(s00)
     s = unwrap_unionall(s0)
     if isa(s, Union)
-        return setfield!_nothrow(rewrap_unionall(s.a, s00), name, v) &&
-               setfield!_nothrow(rewrap_unionall(s.b, s00), name, v)
+        return setfield!_nothrow(𝕃, rewrap_unionall(s.a, s00), name, v) &&
+               setfield!_nothrow(𝕃, rewrap_unionall(s.b, s00), name, v)
     elseif isa(s, DataType)
         # Can't say anything about abstract types
         isabstracttype(s) && return false
         ismutabletype(s) || return false
-        s.name.atomicfields == C_NULL || return false # TODO: currently we're only testing for ordering == :not_atomic
         isa(name, Const) || return false
         field = try_compute_fieldidx(s, name.val)
         field === nothing && return false
         # `try_compute_fieldidx` already check for field index bound.
         isconst(s, field) && return false
+        isfieldatomic(s, field) && return false # TODO: currently we're only testing for ordering === :not_atomic
         v_expected = fieldtype(s0, field)
         return v ⊑ v_expected
     end
     return false
 end
 
-swapfield!_tfunc(o, f, v, order) = (@nospecialize; getfield_tfunc(o, f))
-swapfield!_tfunc(o, f, v) = (@nospecialize; getfield_tfunc(o, f))
-modifyfield!_tfunc(o, f, op, v, order) = (@nospecialize; modifyfield!_tfunc(o, f, op, v))
-function modifyfield!_tfunc(o, f, op, v)
-    @nospecialize
-    T = _fieldtype_tfunc(o, isconcretetype(o), f)
+@nospecs function swapfield!_tfunc(𝕃::AbstractLattice, o, f, v, order)
+    return getfield_tfunc(𝕃, o, f)
+end
+@nospecs function swapfield!_tfunc(𝕃::AbstractLattice, o, f, v)
+    return getfield_tfunc(𝕃, o, f)
+end
+@nospecs function modifyfield!_tfunc(𝕃::AbstractLattice, o, f, op, v, order)
+    return modifyfield!_tfunc(𝕃, o, f, op, v)
+end
+@nospecs function modifyfield!_tfunc(𝕃::AbstractLattice, o, f, op, v)
+    T = _fieldtype_tfunc(𝕃, o, f, isconcretetype(o))
     T === Bottom && return Bottom
     PT = Const(Pair)
-    return instanceof_tfunc(apply_type_tfunc(PT, T, T))[1]
+    return instanceof_tfunc(apply_type_tfunc(𝕃, PT, T, T), true)[1]
 end
-function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState)
+function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
     nargs = length(argtypes)
     if !isempty(argtypes) && isvarargtype(argtypes[nargs])
-        nargs - 1 <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, false)
-        nargs > 3 || return CallMeta(Any, EFFECTS_UNKNOWN, false)
+        nargs - 1 <= 6 || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+        nargs > 3 || return CallMeta(Any, Any, Effects(), NoCallInfo())
     else
-        5 <= nargs <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, false)
+        5 <= nargs <= 6 || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
     end
+    𝕃ᵢ = typeinf_lattice(interp)
     o = unwrapva(argtypes[2])
     f = unwrapva(argtypes[3])
-    RT = modifyfield!_tfunc(o, f, Any, Any)
-    info = false
+    RT = modifyfield!_tfunc(𝕃ᵢ, o, f, Any, Any)
+    info = NoCallInfo()
     if nargs >= 5 && RT !== Bottom
         # we may be able to refine this to a PartialStruct by analyzing `op(o.f, v)::T`
         # as well as compute the info for the method matches
         op = unwrapva(argtypes[4])
         v = unwrapva(argtypes[5])
-        TF = getfield_tfunc(o, f)
-        push!(sv.ssavalue_uses[sv.currpc], sv.currpc) # temporarily disable `call_result_unused` check for this call
-        callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), sv, #=max_methods=# 1)
-        pop!(sv.ssavalue_uses[sv.currpc], sv.currpc)
+        TF = getfield_tfunc(𝕃ᵢ, o, f)
+        callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true), sv, #=max_methods=#1)
         TF2 = tmeet(callinfo.rt, widenconst(TF))
         if TF2 === Bottom
             RT = Bottom
-        elseif isconcretetype(RT) && has_nontrivial_const_info(TF2) # isconcrete condition required to form a PartialStruct
+        elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct
             RT = PartialStruct(RT, Any[TF, TF2])
         end
-        info = callinfo.info
+        info = ModifyFieldInfo(callinfo.info)
     end
-    return CallMeta(RT, Effects(), info)
+    return CallMeta(RT, Any, Effects(), info)
+end
+@nospecs function replacefield!_tfunc(𝕃::AbstractLattice, o, f, x, v, success_order, failure_order)
+    return replacefield!_tfunc(𝕃, o, f, x, v)
 end
-replacefield!_tfunc(o, f, x, v, success_order, failure_order) = (@nospecialize; replacefield!_tfunc(o, f, x, v))
-replacefield!_tfunc(o, f, x, v, success_order) = (@nospecialize; replacefield!_tfunc(o, f, x, v))
-function replacefield!_tfunc(o, f, x, v)
-    @nospecialize
-    T = _fieldtype_tfunc(o, isconcretetype(o), f)
+@nospecs function replacefield!_tfunc(𝕃::AbstractLattice, o, f, x, v, success_order)
+    return replacefield!_tfunc(𝕃, o, f, x, v)
+end
+@nospecs function replacefield!_tfunc(𝕃::AbstractLattice, o, f, x, v)
+    T = _fieldtype_tfunc(𝕃, o, f, isconcretetype(o))
     T === Bottom && return Bottom
     PT = Const(ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T)
-    return instanceof_tfunc(apply_type_tfunc(PT, T))[1]
+    return instanceof_tfunc(apply_type_tfunc(𝕃, PT, T), true)[1]
 end
 
 # we could use tuple_tfunc instead of widenconst, but `o` is mutable, so that is unlikely to be beneficial
@@ -1092,8 +1429,9 @@ add_tfunc(swapfield!, 3, 4, swapfield!_tfunc, 3)
 add_tfunc(modifyfield!, 4, 5, modifyfield!_tfunc, 3)
 add_tfunc(replacefield!, 4, 6, replacefield!_tfunc, 3)
 
-function fieldtype_nothrow(@nospecialize(s0), @nospecialize(name))
+@nospecs function fieldtype_nothrow(𝕃::AbstractLattice, s0, name)
     s0 === Bottom && return true # unreachable
+    ⊑ = Core.Compiler.:⊑(𝕃)
     if s0 === Any || s0 === Type || DataType ⊑ s0 || UnionAll ⊑ s0
         # We have no idea
         return false
@@ -1107,11 +1445,11 @@ function fieldtype_nothrow(@nospecialize(s0), @nospecialize(name))
 
     su = unwrap_unionall(s0)
     if isa(su, Union)
-        return fieldtype_nothrow(rewrap_unionall(su.a, s0), name) &&
-               fieldtype_nothrow(rewrap_unionall(su.b, s0), name)
+        return fieldtype_nothrow(𝕃, rewrap_unionall(su.a, s0), name) &&
+               fieldtype_nothrow(𝕃, rewrap_unionall(su.b, s0), name)
     end
 
-    s, exact = instanceof_tfunc(s0)
+    s, exact = instanceof_tfunc(s0, false)
     s === Bottom && return false # always
     return _fieldtype_nothrow(s, exact, name)
 end
@@ -1149,8 +1487,11 @@ function _fieldtype_nothrow(@nospecialize(s), exact::Bool, name::Const)
     return true
 end
 
-fieldtype_tfunc(s0, name, boundscheck) = (@nospecialize; fieldtype_tfunc(s0, name))
-function fieldtype_tfunc(@nospecialize(s0), @nospecialize(name))
+@nospecs function fieldtype_tfunc(𝕃::AbstractLattice, s0, name, boundscheck)
+    return fieldtype_tfunc(𝕃, s0, name)
+end
+@nospecs function fieldtype_tfunc(𝕃::AbstractLattice, s0, name)
+    s0 = widenmustalias(s0)
     if s0 === Bottom
         return Bottom
     end
@@ -1169,25 +1510,25 @@ function fieldtype_tfunc(@nospecialize(s0), @nospecialize(name))
 
     su = unwrap_unionall(s0)
     if isa(su, Union)
-        return tmerge(fieldtype_tfunc(rewrap_unionall(su.a, s0), name),
-                      fieldtype_tfunc(rewrap_unionall(su.b, s0), name))
+        return tmerge(fieldtype_tfunc(𝕃, rewrap_unionall(su.a, s0), name),
+                      fieldtype_tfunc(𝕃, rewrap_unionall(su.b, s0), name))
     end
 
-    s, exact = instanceof_tfunc(s0)
+    s, exact = instanceof_tfunc(s0, false)
     s === Bottom && return Bottom
-    return _fieldtype_tfunc(s, exact, name)
+    return _fieldtype_tfunc(𝕃, s, name, exact)
 end
 
-function _fieldtype_tfunc(@nospecialize(s), exact::Bool, @nospecialize(name))
+@nospecs function _fieldtype_tfunc(𝕃::AbstractLattice, s, name, exact::Bool)
     exact = exact && !has_free_typevars(s)
     u = unwrap_unionall(s)
     if isa(u, Union)
-        ta0 = _fieldtype_tfunc(rewrap_unionall(u.a, s), exact, name)
-        tb0 = _fieldtype_tfunc(rewrap_unionall(u.b, s), exact, name)
+        ta0 = _fieldtype_tfunc(𝕃, rewrap_unionall(u.a, s), name, exact)
+        tb0 = _fieldtype_tfunc(𝕃, rewrap_unionall(u.b, s), name, exact)
         ta0 ⊑ tb0 && return tb0
         tb0 ⊑ ta0 && return ta0
-        ta, exacta, _, istypea = instanceof_tfunc(ta0)
-        tb, exactb, _, istypeb = instanceof_tfunc(tb0)
+        ta, exacta, _, istypea = instanceof_tfunc(ta0, false)
+        tb, exactb, _, istypeb = instanceof_tfunc(tb0, false)
         if exact && exacta && exactb
             return Const(Union{ta, tb})
         end
@@ -1282,22 +1623,11 @@ end
 add_tfunc(fieldtype, 2, 3, fieldtype_tfunc, 0)
 
 # Like `valid_tparam`, but in the type domain.
-function valid_tparam_type(T::DataType)
-    T === Symbol && return true
-    isbitstype(T) && return true
-    if T <: Tuple
-        isconcretetype(T) || return false
-        for P in T.parameters
-            (P === Symbol || isbitstype(P)) || return false
-        end
-        return true
-    end
-    return false
-end
+valid_tparam_type(T::DataType) = valid_typeof_tparam(T)
 valid_tparam_type(U::Union) = valid_tparam_type(U.a) && valid_tparam_type(U.b)
 valid_tparam_type(U::UnionAll) = valid_tparam_type(unwrap_unionall(U))
 
-function apply_type_nothrow(argtypes::Array{Any, 1}, @nospecialize(rt))
+function apply_type_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospecialize(rt))
     rt === Type && return false
     length(argtypes) >= 1 || return false
     headtypetype = argtypes[1]
@@ -1313,10 +1643,11 @@ function apply_type_nothrow(argtypes::Array{Any, 1}, @nospecialize(rt))
     (headtype === Union) && return true
     isa(rt, Const) && return true
     u = headtype
+    # TODO: implement optimization for isvarargtype(u) and istuple occurrences (which are valid but are not UnionAll)
     for i = 2:length(argtypes)
         isa(u, UnionAll) || return false
         ai = widenconditional(argtypes[i])
-        if ai ⊑ TypeVar || ai === DataType
+        if ⊑(𝕃, ai, TypeVar) || ai === DataType
             # We don't know anything about the bounds of this typevar, but as
             # long as the UnionAll is not constrained, that's ok.
             if !(u.var.lb === Union{} && u.var.ub === Any)
@@ -1331,7 +1662,7 @@ function apply_type_nothrow(argtypes::Array{Any, 1}, @nospecialize(rt))
                 return false
             end
         else
-            T, exact, _, istype = instanceof_tfunc(ai)
+            T, exact, _, istype = instanceof_tfunc(ai, false)
             if T === Bottom
                 if !(u.var.lb === Union{} && u.var.ub === Any)
                     return false
@@ -1341,7 +1672,7 @@ function apply_type_nothrow(argtypes::Array{Any, 1}, @nospecialize(rt))
                 end
             else
                 istype || return false
-                if !(T <: u.var.ub)
+                if isa(u.var.ub, TypeVar) || !(T <: u.var.ub)
                     return false
                 end
                 if exact ? !(u.var.lb <: T) : !(u.var.lb === Bottom)
@@ -1358,7 +1689,8 @@ const _tvarnames = Symbol[:_A, :_B, :_C, :_D, :_E, :_F, :_G, :_H, :_I, :_J, :_K,
                           :_N, :_O, :_P, :_Q, :_R, :_S, :_T, :_U, :_V, :_W, :_X, :_Y, :_Z]
 
 # TODO: handle e.g. apply_type(T, R::Union{Type{Int32},Type{Float64}})
-function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
+@nospecs function apply_type_tfunc(𝕃::AbstractLattice, headtypetype, args...)
+    headtypetype = widenslotwrapper(headtypetype)
     if isa(headtypetype, Const)
         headtype = headtypetype.val
     elseif isconstType(headtypetype)
@@ -1394,9 +1726,7 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
             end
         end
         if largs == 1 # Union{T} --> T
-            u1 = typeintersect(widenconst(args[1]), Type)
-            valid_as_lattice(u1) || return Bottom
-            return u1
+            return tmeet(widenconst(args[1]), Union{Type,TypeVar})
         end
         hasnonType && return Type
         ty = Union{}
@@ -1422,10 +1752,24 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
     canconst = true
     tparams = Any[]
     outervars = TypeVar[]
+
+    # first push the tailing vars from headtype into outervars
+    outer_start, ua = 0, headtype
+    while isa(ua, UnionAll)
+        if (outer_start += 1) > largs
+            push!(outervars, ua.var)
+        end
+        ua = ua.body
+    end
+    if largs > outer_start && isa(headtype, UnionAll) # e.g. !isvarargtype(ua) && !istuple
+        return Bottom # too many arguments
+    end
+    outer_start = outer_start - largs + 1
+
     varnamectr = 1
     ua = headtype
     for i = 1:largs
-        ai = widenconditional(args[i])
+        ai = widenslotwrapper(args[i])
         if isType(ai)
             aip1 = ai.parameters[1]
             canconst &= !has_free_typevars(aip1)
@@ -1438,34 +1782,61 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
             push!(tparams, ai.tv)
         else
             uncertain = true
-            # These blocks improve type info but make compilation a bit slower.
-            # XXX
-            #unw = unwrap_unionall(ai)
-            #isT = isType(unw)
-            #if isT && isa(ai,UnionAll) && contains_is(outervars, ai.var)
-            #    ai = rename_unionall(ai)
-            #    unw = unwrap_unionall(ai)
-            #end
-            ai_w = widenconst(ai)
-            ub = ai_w isa Type && ai_w <: Type ? instanceof_tfunc(ai)[1] : Any
+            unw = unwrap_unionall(ai)
+            isT = isType(unw)
+            # compute our desired upper bound value
+            if isT
+                ub = rewrap_unionall(unw.parameters[1], ai)
+            else
+                ub = Any
+            end
+            if !istuple && unionall_depth(ai) > 3
+                # Heuristic: if we are adding more than N unknown parameters here to the
+                # outer type, use the wrapper type, instead of letting it nest more
+                # complexity here. This is not monotonic, but seems to work out pretty well.
+                if isT
+                    ub = unwrap_unionall(unw.parameters[1])
+                    if ub isa DataType
+                        ub = ub.name.wrapper
+                        unw = Type{unwrap_unionall(ub)}
+                        ai = rewrap_unionall(unw, ub)
+                    else
+                        isT = false
+                        ai = unw = ub = Any
+                    end
+                else
+                    isT = false
+                    ai = unw = ub = Any
+                end
+            elseif !isT
+                # if we didn't have isType to compute ub directly, try to use instanceof_tfunc to refine this guess
+                ai_w = widenconst(ai)
+                ub = ai_w isa Type && ai_w <: Type ? instanceof_tfunc(ai, false)[1] : Any
+            end
             if istuple
                 # in the last parameter of a Tuple type, if the upper bound is Any
                 # then this could be a Vararg type.
                 if i == largs && ub === Any
-                    push!(tparams, Vararg)
-                # XXX
-                #elseif isT
-                #    push!(tparams, rewrap_unionall(unw.parameters[1], ai))
-                else
-                    push!(tparams, Any)
+                    ub = Vararg
+                end
+                push!(tparams, ub)
+            elseif isT
+                tai = ai
+                while isa(tai, UnionAll)
+                    # make sure vars introduced here are unique
+                    if contains_is(outervars, tai.var)
+                        ai = rename_unionall(ai)
+                        unw = unwrap_unionall(ai)::DataType
+                        # ub = rewrap_unionall(unw, ai)
+                        break
+                    end
+                    tai = tai.body
+                end
+                push!(tparams, unw.parameters[1])
+                while isa(ai, UnionAll)
+                    push!(outervars, ai.var)
+                    ai = ai.body
                 end
-            # XXX
-            #elseif isT
-            #    push!(tparams, unw.parameters[1])
-            #    while isa(ai, UnionAll)
-            #        push!(outervars, ai.var)
-            #        ai = ai.body
-            #    end
             else
                 # Is this the second parameter to a NamedTuple?
                 if isa(uw, DataType) && uw.name === _NAMEDTUPLE_NAME && isa(ua, UnionAll) && uw.parameters[2] === ua.var
@@ -1485,19 +1856,41 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
                 push!(outervars, v)
             end
         end
-        if isa(ua, UnionAll)
+        if ua isa UnionAll
             ua = ua.body
-        else
-            ua = nothing
+            #otherwise, sometimes ua isa Vararg (Core.TypeofVararg) or Tuple (DataType)
         end
     end
     local appl
     try
         appl = apply_type(headtype, tparams...)
     catch ex
-        # type instantiation might fail if one of the type parameters
-        # doesn't match, which could happen if a type estimate is too coarse
-        return isvarargtype(headtype) ? TypeofVararg : Type{<:headtype}
+        ex isa InterruptException && rethrow()
+        # type instantiation might fail if one of the type parameters doesn't
+        # match, which could happen only if a type estimate is too coarse
+        # and might guess a concrete value while the actual type for it is Bottom
+        if !uncertain
+            return Union{}
+        end
+        canconst = false
+        uncertain = true
+        empty!(outervars)
+        outer_start = 1
+        # FIXME: if these vars are substituted with TypeVar here, the result
+        # might be wider than the input, so should we use the `.name.wrapper`
+        # object here instead, to replace all of these outervars with
+        # unconstrained ones? Note that this code is nearly unreachable though,
+        # and possibly should simply return Union{} here also, since
+        # `apply_type` is already quite conservative about detecting and
+        # throwing errors.
+        appl = headtype
+        if isa(appl, UnionAll)
+            for _ = 1:largs
+                appl = appl::UnionAll
+                push!(outervars, appl.var)
+                appl = appl.body
+            end
+        end
     end
     !uncertain && canconst && return Const(appl)
     if isvarargtype(appl)
@@ -1507,23 +1900,25 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
         return Type{<:appl}
     end
     ans = Type{appl}
-    for i = length(outervars):-1:1
+    for i = length(outervars):-1:outer_start
         ans = UnionAll(outervars[i], ans)
     end
     return ans
 end
 add_tfunc(apply_type, 1, INT_INF, apply_type_tfunc, 10)
 
-function has_struct_const_info(x)
-    isa(x, PartialTypeVar) && return true
-    isa(x, Conditional) && return true
-    return has_nontrivial_const_info(x)
-end
-
 # convert the dispatch tuple type argtype to the real (concrete) type of
 # the tuple of those values
-function tuple_tfunc(argtypes::Vector{Any})
-    argtypes = anymap(widenconditional, argtypes)
+function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any})
+    isempty(argtypes) && return Const(())
+    argtypes = anymap(widenslotwrapper, argtypes)
+    if isvarargtype(argtypes[end]) && unwrapva(argtypes[end]) === Union{}
+        # Drop the Vararg in Tuple{...,Vararg{Union{}}} since it must be length 0.
+        # If there is a Vararg num also, it must be a TypeVar, and it must be
+        # zero, but that generally shouldn't show up here, since it implies a
+        # UnionAll context is missing around this.
+        pop!(argtypes)
+    end
     all_are_const = true
     for i in 1:length(argtypes)
         if !isa(argtypes[i], Const)
@@ -1532,13 +1927,13 @@ function tuple_tfunc(argtypes::Vector{Any})
         end
     end
     if all_are_const
-        return Const(ntuple(i -> argtypes[i].val, length(argtypes)))
+        return Const(ntuple(i::Int->argtypes[i].val, length(argtypes)))
     end
     params = Vector{Any}(undef, length(argtypes))
     anyinfo = false
     for i in 1:length(argtypes)
         x = argtypes[i]
-        if has_struct_const_info(x)
+        if has_nontrivial_extended_info(𝕃, x)
             anyinfo = true
         else
             if !isvarargtype(x)
@@ -1566,6 +1961,8 @@ function tuple_tfunc(argtypes::Vector{Any})
                 params[i] = x
             elseif !isvarargtype(x) && hasintersect(x, Type)
                 params[i] = Union{x, Type}
+            elseif x === Union{}
+                return Bottom # argtypes is malformed, but try not to crash
             else
                 params[i] = x
             end
@@ -1573,59 +1970,91 @@ function tuple_tfunc(argtypes::Vector{Any})
     end
     typ = Tuple{params...}
     # replace a singleton type with its equivalent Const object
-    isdefined(typ, :instance) && return Const(typ.instance)
+    issingletontype(typ) && return Const(typ.instance)
     return anyinfo ? PartialStruct(typ, argtypes) : typ
 end
 
-arrayref_tfunc(@nospecialize(boundscheck), @nospecialize(ary), @nospecialize idxs...) =
-    _arrayref_tfunc(boundscheck, ary, idxs)
-function _arrayref_tfunc(@nospecialize(boundscheck), @nospecialize(ary),
-    @nospecialize idxs::Tuple)
-    isempty(idxs) && return Bottom
-    array_builtin_common_errorcheck(boundscheck, ary, idxs) || return Bottom
-    return array_elmtype(ary)
+@nospecs function memoryrefget_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
+    return _memoryrefget_tfunc(𝕃, mem, order, boundscheck)
 end
-add_tfunc(arrayref, 3, INT_INF, arrayref_tfunc, 20)
-add_tfunc(const_arrayref, 3, INT_INF, arrayref_tfunc, 20)
+@nospecs function _memoryrefget_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
+    memoryref_builtin_common_errorcheck(mem, order, boundscheck) || return Bottom
+    return memoryref_elemtype(mem)
+end
+add_tfunc(memoryrefget, 3, 3, memoryrefget_tfunc, 20)
 
-function arrayset_tfunc(@nospecialize(boundscheck), @nospecialize(ary), @nospecialize(item),
-    @nospecialize idxs...)
-    hasintersect(widenconst(item), _arrayref_tfunc(boundscheck, ary, idxs)) || return Bottom
-    return ary
+@nospecs function memoryrefset!_tfunc(𝕃::AbstractLattice, mem, item, order, boundscheck)
+    hasintersect(widenconst(item), _memoryrefget_tfunc(𝕃, mem, order, boundscheck)) || return Bottom
+    return mem
+end
+add_tfunc(memoryrefset!, 4, 4, memoryrefset!_tfunc, 20)
+
+@nospecs function memoryref_isassigned_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
+    return _memoryref_isassigned_tfunc(𝕃, mem, order, boundscheck)
+end
+@nospecs function _memoryref_isassigned_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
+    memoryref_builtin_common_errorcheck(mem, order, boundscheck) || return Bottom
+    return Bool
 end
-add_tfunc(arrayset, 4, INT_INF, arrayset_tfunc, 20)
+add_tfunc(memoryref_isassigned, 3, 3, memoryref_isassigned_tfunc, 20)
 
-function array_builtin_common_errorcheck(@nospecialize(boundscheck), @nospecialize(ary),
-    @nospecialize idxs::Tuple)
-    hasintersect(widenconst(boundscheck), Bool) || return false
-    hasintersect(widenconst(ary), Array) || return false
-    for i = 1:length(idxs)
-        idx = getfield(idxs, i)
-        idx = isvarargtype(idx) ? unwrapva(idx) : widenconst(idx)
-        hasintersect(idx, Int) || return false
+@nospecs function memoryref_tfunc(𝕃::AbstractLattice, mem)
+    a = widenconst(unwrapva(mem))
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === GenericMemory.body.body.body.name
+            A = unw.parameters[1]
+            T = unw.parameters[2]
+            AS = unw.parameters[3]
+            T isa Type || T isa TypeVar || return Bottom
+            return rewrap_unionall(GenericMemoryRef{A, T, AS}, a)
+        end
     end
+    return GenericMemoryRef
+end
+@nospecs function memoryref_tfunc(𝕃::AbstractLattice, ref, idx)
+    if isvarargtype(idx)
+        idx = unwrapva(idx)
+    end
+    return memoryref_tfunc(𝕃, ref, idx, Const(true))
+end
+@nospecs function memoryref_tfunc(𝕃::AbstractLattice, ref, idx, boundscheck)
+    memoryref_builtin_common_errorcheck(ref, Const(:not_atomic), boundscheck) || return Bottom
+    hasintersect(widenconst(idx), Int) || return Bottom
+    return ref
+end
+add_tfunc(memoryref, 1, 3, memoryref_tfunc, 1)
+
+@nospecs function memoryrefoffset_tfunc(𝕃::AbstractLattice, mem)
+    hasintersect(widenconst(mem), GenericMemoryRef) || return Bottom
+    return Int
+end
+add_tfunc(memoryrefoffset, 1, 1, memoryrefoffset_tfunc, 5)
+
+@nospecs function memoryref_builtin_common_errorcheck(mem, order, boundscheck)
+    hasintersect(widenconst(mem), GenericMemoryRef) || return false
+    hasintersect(widenconst(order), Symbol) || return false
+    hasintersect(widenconst(unwrapva(boundscheck)), Bool) || return false
     return true
 end
 
-function array_elmtype(@nospecialize ary)
-    a = widenconst(ary)
-    if !has_free_typevars(a) && a <: Array
-        a0 = a
-        if isa(a, UnionAll)
-            a = unwrap_unionall(a0)
+function memoryref_elemtype(@nospecialize mem)
+    m = widenconst(mem)
+    if !has_free_typevars(m) && m <: GenericMemoryRef
+        m0 = m
+        if isa(m, UnionAll)
+            m = unwrap_unionall(m0)
         end
-        if isa(a, DataType)
-            T = a.parameters[1]
-            valid_as_lattice(T) || return Bottom
-            return rewrap_unionall(T, a0)
+        if isa(m, DataType)
+            T = m.parameters[2]
+            valid_as_lattice(T, true) || return Bottom
+            return rewrap_unionall(T, m0)
         end
     end
     return Any
 end
 
-function _opaque_closure_tfunc(@nospecialize(arg), @nospecialize(lb), @nospecialize(ub),
-        @nospecialize(source), env::Vector{Any}, linfo::MethodInstance)
-
+@nospecs function opaque_closure_tfunc(𝕃::AbstractLattice, arg, lb, ub, source, env::Vector{Any}, linfo::MethodInstance)
     argt, argt_exact = instanceof_tfunc(arg)
     lbt, lb_exact = instanceof_tfunc(lb)
     if !lb_exact
@@ -1639,146 +2068,193 @@ function _opaque_closure_tfunc(@nospecialize(arg), @nospecialize(lb), @nospecial
 
     (isa(source, Const) && isa(source.val, Method)) || return t
 
-    return PartialOpaque(t, tuple_tfunc(env), linfo, source.val)
+    return PartialOpaque(t, tuple_tfunc(𝕃, env), linfo, source.val)
 end
 
 # whether getindex for the elements can potentially throw UndefRef
 function array_type_undefable(@nospecialize(arytype))
+    arytype = unwrap_unionall(arytype)
     if isa(arytype, Union)
         return array_type_undefable(arytype.a) || array_type_undefable(arytype.b)
-    elseif isa(arytype, UnionAll)
-        return true
+    elseif arytype isa DataType
+        elmtype = memoryref_elemtype(arytype)
+        # TODO: use arraytype layout instead to derive this
+        return !((elmtype isa DataType && isbitstype(elmtype)) || (elmtype isa Union && isbitsunion(elmtype)))
+    end
+    return true
+end
+
+@nospecs function memoryset_typecheck(memtype, elemtype)
+    # Check that we can determine the element type
+    isa(memtype, DataType) || return false
+    elemtype_expected = memoryref_elemtype(memtype)
+    elemtype_expected === Union{} && return false
+    # Check that the element type is compatible with the element we're assigning
+    elemtype ⊑ elemtype_expected || return false
+    return true
+end
+
+function memoryref_builtin_common_nothrow(argtypes::Vector{Any})
+    if length(argtypes) == 1
+        memtype = widenconst(argtypes[1])
+        return memtype ⊑ GenericMemory
     else
-        elmtype = (arytype::DataType).parameters[1]
-        return !(elmtype isa Type && (isbitstype(elmtype) || isbitsunion(elmtype)))
+        if length(argtypes) == 2
+            boundscheck = Const(true)
+        elseif length(argtypes) == 3
+            boundscheck = argtypes[3]
+        else
+            return false
+        end
+        memtype = widenconst(argtypes[1])
+        idx = widenconst(argtypes[2])
+        idx ⊑ Int || return false
+        boundscheck ⊑ Bool || return false
+        memtype ⊑ GenericMemoryRef || return false
+        # If we have @inbounds (last argument is false), we're allowed to assume
+        # we don't throw bounds errors.
+        if isa(boundscheck, Const)
+            boundscheck.val::Bool || return true
+        end
+        # Else we can't really say anything here
+        # TODO: In the future we may be able to track the minimum length though inference.
+        return false
     end
 end
 
-function array_builtin_common_nothrow(argtypes::Vector{Any}, first_idx_idx::Int)
-    length(argtypes) >= 4 || return false
-    boundscheck = argtypes[1]
-    arytype = argtypes[2]
-    array_builtin_common_typecheck(boundscheck, arytype, argtypes, first_idx_idx) || return false
-    # If we could potentially throw undef ref errors, bail out now.
-    arytype = widenconst(arytype)
-    array_type_undefable(arytype) && return false
-    # If we have @inbounds (first argument is false), we're allowed to assume
+function memoryrefop_builtin_common_nothrow(argtypes::Vector{Any}, @nospecialize f)
+    ismemoryset = f === memoryrefset!
+    nargs = ismemoryset ? 4 : 3
+    length(argtypes) == nargs || return false
+    order = argtypes[2 + ismemoryset]
+    boundscheck = argtypes[3 + ismemoryset]
+    memtype = widenconst(argtypes[1])
+    memoryref_builtin_common_typecheck(boundscheck, memtype, order) || return false
+    if ismemoryset
+        # Additionally check element type compatibility
+        memoryset_typecheck(memtype, argtypes[2]) || return false
+    elseif f === memoryrefget
+        # If we could potentially throw undef ref errors, bail out now.
+        array_type_undefable(memtype) && return false
+    end
+    # If we have @inbounds (last argument is false), we're allowed to assume
     # we don't throw bounds errors.
     if isa(boundscheck, Const)
-        !(boundscheck.val::Bool) && return true
+        boundscheck.val::Bool || return true
     end
     # Else we can't really say anything here
-    # TODO: In the future we may be able to track the shapes of arrays though
-    # inference.
+    # TODO: In the future we may be able to track the minimum length though inference.
     return false
 end
 
-function array_builtin_common_typecheck(
-    @nospecialize(boundscheck), @nospecialize(arytype),
-    argtypes::Vector{Any}, first_idx_idx::Int)
-    (boundscheck ⊑ Bool && arytype ⊑ Array) || return false
-    for i = first_idx_idx:length(argtypes)
-        argtypes[i] ⊑ Int || return false
-    end
-    return true
-end
-
-function arrayset_typecheck(@nospecialize(arytype), @nospecialize(elmtype))
-    # Check that we can determine the element type
-    arytype = widenconst(arytype)
-    isa(arytype, DataType) || return false
-    elmtype_expected = arytype.parameters[1]
-    isa(elmtype_expected, Type) || return false
-    # Check that the element type is compatible with the element we're assigning
-    elmtype ⊑ elmtype_expected || return false
-    return true
+@nospecs function memoryref_builtin_common_typecheck(boundscheck, memtype, order)
+    return boundscheck ⊑ Bool && memtype ⊑ GenericMemoryRef && order ⊑ Symbol
 end
 
 # Query whether the given builtin is guaranteed not to throw given the argtypes
-function _builtin_nothrow(@nospecialize(f), argtypes::Array{Any,1}, @nospecialize(rt))
-    if f === arrayset
-        array_builtin_common_nothrow(argtypes, 4) || return true
-        # Additionally check element type compatibility
-        return arrayset_typecheck(argtypes[2], argtypes[3])
-    elseif f === arrayref || f === const_arrayref
-        return array_builtin_common_nothrow(argtypes, 3)
-    elseif f === arraysize
-        return arraysize_nothrow(argtypes)
+@nospecs function _builtin_nothrow(𝕃::AbstractLattice, f, argtypes::Vector{Any}, rt)
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    if f === memoryref
+        return memoryref_builtin_common_nothrow(argtypes)
+    elseif f === memoryrefoffset
+        length(argtypes) == 1 || return false
+        memtype = widenconst(argtypes[1])
+        return memtype ⊑ GenericMemoryRef
+    elseif f === memoryrefset!
+        return memoryrefop_builtin_common_nothrow(argtypes, f)
+    elseif f === memoryrefget
+        return memoryrefop_builtin_common_nothrow(argtypes, f)
+    elseif f === memoryref_isassigned
+        return memoryrefop_builtin_common_nothrow(argtypes, f)
     elseif f === Core._expr
         length(argtypes) >= 1 || return false
         return argtypes[1] ⊑ Symbol
-    elseif f === Core._typevar
-        length(argtypes) == 3 || return false
-        return typevar_nothrow(argtypes[1], argtypes[2], argtypes[3])
+    end
+
+    # These builtins are not-vararg, so if we have varars, here, we can't guarantee
+    # the correct number of arguments.
+    na = length(argtypes)
+    (na ≠ 0 && isvarargtype(argtypes[end])) && return false
+    if f === Core._typevar
+        na == 3 || return false
+        return typevar_nothrow(𝕃, argtypes[1], argtypes[2], argtypes[3])
     elseif f === invoke
         return false
     elseif f === getfield
-        return getfield_nothrow(argtypes)
+        return getfield_nothrow(𝕃, argtypes)
     elseif f === setfield!
-        return setfield!_nothrow(argtypes)
+        if na == 3
+            return setfield!_nothrow(𝕃, argtypes[1], argtypes[2], argtypes[3])
+        elseif na == 4
+            return setfield!_nothrow(𝕃, argtypes[1], argtypes[2], argtypes[3], argtypes[4])
+        end
+        return false
     elseif f === fieldtype
-        length(argtypes) == 2 || return false
-        return fieldtype_nothrow(argtypes[1], argtypes[2])
+        na == 2 || return false
+        return fieldtype_nothrow(𝕃, argtypes[1], argtypes[2])
     elseif f === apply_type
-        return apply_type_nothrow(argtypes, rt)
+        return apply_type_nothrow(𝕃, argtypes, rt)
     elseif f === isa
-        length(argtypes) == 2 || return false
-        return argtypes[2] ⊑ Type
+        na == 2 || return false
+        return isa_nothrow(𝕃, nothing, argtypes[2])
     elseif f === (<:)
-        length(argtypes) == 2 || return false
-        return argtypes[1] ⊑ Type && argtypes[2] ⊑ Type
+        na == 2 || return false
+        return subtype_nothrow(𝕃, argtypes[1], argtypes[2])
     elseif f === UnionAll
-        return length(argtypes) == 2 &&
-            (argtypes[1] ⊑ TypeVar && argtypes[2] ⊑ Type)
+        return na == 2 && (argtypes[1] ⊑ TypeVar && argtypes[2] ⊑ Type)
     elseif f === isdefined
-        return isdefined_nothrow(argtypes)
+        return isdefined_nothrow(𝕃, argtypes)
     elseif f === Core.sizeof
-        length(argtypes) == 1 || return false
+        na == 1 || return false
         return sizeof_nothrow(argtypes[1])
-    elseif f === Core.kwfunc
-        length(argtypes) == 1 || return false
-        return isa(rt, Const)
     elseif f === Core.ifelse
-        length(argtypes) == 3 || return false
-        return argtypes[1] ⊑ Bool
+        na == 3 || return false
+        return ifelse_nothrow(𝕃, argtypes[1], nothing, nothing)
     elseif f === typeassert
-        length(argtypes) == 2 || return false
-        a3 = argtypes[2]
-        if (isType(a3) && !has_free_typevars(a3) && argtypes[1] ⊑ a3.parameters[1]) ||
-            (isa(a3, Const) && isa(a3.val, Type) && argtypes[1] ⊑ a3.val)
-            return true
+        na == 2 || return false
+        return typeassert_nothrow(𝕃, argtypes[1], argtypes[2])
+    elseif f === getglobal
+        if na == 2
+            return getglobal_nothrow(argtypes[1], argtypes[2])
+        elseif na == 3
+            return getglobal_nothrow(argtypes[1], argtypes[2], argtypes[3])
         end
         return false
-    elseif f === getglobal
-        return getglobal_nothrow(argtypes)
     elseif f === setglobal!
-        return setglobal!_nothrow(argtypes)
+        if na == 3
+            return setglobal!_nothrow(argtypes[1], argtypes[2], argtypes[3])
+        elseif na == 4
+            return setglobal!_nothrow(argtypes[1], argtypes[2], argtypes[3], argtypes[4])
+        end
+        return false
     elseif f === Core.get_binding_type
-        length(argtypes) == 2 || return false
-        return argtypes[1] ⊑ Module && argtypes[2] ⊑ Symbol
+        na == 2 || return false
+        return get_binding_type_nothrow(𝕃, argtypes[1], argtypes[2])
     elseif f === donotdelete
         return true
     elseif f === Core.finalizer
-        2 <= length(argtypes) <= 4 || return false
+        2 <= na <= 4 || return false
         # Core.finalizer does no error checking - that's done in Base.finalizer
         return true
+    elseif f === Core.compilerbarrier
+        na == 2 || return false
+        return compilerbarrier_nothrow(argtypes[1], nothing)
     end
     return false
 end
 
 # known to be always effect-free (in particular nothrow)
-const _PURE_BUILTINS = Any[tuple, svec, ===, typeof, nfields]
-
-# known to be effect-free (but not necessarily nothrow)
-const _EFFECT_FREE_BUILTINS = [
-    fieldtype, apply_type, isa, UnionAll,
-    getfield, arrayref, const_arrayref, isdefined, Core.sizeof,
-    Core.kwfunc, Core.ifelse, Core._typevar, (<:),
-    typeassert, throw, arraysize, getglobal,
+const _PURE_BUILTINS = Any[
+    tuple,
+    svec,
+    ===,
+    typeof,
+    nfields,
 ]
 
 const _CONSISTENT_BUILTINS = Any[
-    tuple, # tuple is immutable, thus tuples of egal arguments are egal
+    tuple, # Tuple is immutable, thus tuples of egal arguments are egal
+    svec,  # SimpleVector is immutable, thus svecs of egal arguments are egal
     ===,
     typeof,
     nfields,
@@ -1787,99 +2263,385 @@ const _CONSISTENT_BUILTINS = Any[
     isa,
     UnionAll,
     Core.sizeof,
-    Core.kwfunc,
     Core.ifelse,
     (<:),
     typeassert,
-    throw
+    throw,
+    setfield!,
+    donotdelete
+]
+
+# known to be effect-free (but not necessarily nothrow)
+const _EFFECT_FREE_BUILTINS = [
+    fieldtype,
+    apply_type,
+    isa,
+    UnionAll,
+    getfield,
+    memoryref,
+    memoryrefoffset,
+    memoryrefget,
+    memoryref_isassigned,
+    isdefined,
+    Core.sizeof,
+    Core.ifelse,
+    Core._typevar,
+    (<:),
+    typeassert,
+    throw,
+    getglobal,
+    compilerbarrier,
+]
+
+const _INACCESSIBLEMEM_BUILTINS = Any[
+    (<:),
+    (===),
+    apply_type,
+    Core.ifelse,
+    Core.sizeof,
+    svec,
+    fieldtype,
+    isa,
+    nfields,
+    throw,
+    tuple,
+    typeassert,
+    typeof,
+    compilerbarrier,
+    Core._typevar,
+    donotdelete
+]
+
+const _ARGMEM_BUILTINS = Any[
+    memoryref,
+    memoryrefoffset,
+    memoryrefget,
+    memoryref_isassigned,
+    memoryrefset!,
+    modifyfield!,
+    replacefield!,
+    setfield!,
+    swapfield!,
+]
+
+const _INCONSISTENT_INTRINSICS = Any[
+    Intrinsics.pointerref,      # this one is volatile
+    Intrinsics.sqrt_llvm_fast,  # this one may differ at runtime (by a few ulps)
+    Intrinsics.have_fma,        # this one depends on the runtime environment
+    Intrinsics.cglobal,         # cglobal lookup answer changes at runtime
+    # ... and list fastmath intrinsics:
+    # join(string.("Intrinsics.", sort(filter(endswith("_fast")∘string, names(Core.Intrinsics)))), ",\n")
+    Intrinsics.add_float_fast,
+    Intrinsics.div_float_fast,
+    Intrinsics.eq_float_fast,
+    Intrinsics.le_float_fast,
+    Intrinsics.lt_float_fast,
+    Intrinsics.mul_float_fast,
+    Intrinsics.ne_float_fast,
+    Intrinsics.neg_float_fast,
+    Intrinsics.sqrt_llvm_fast,
+    Intrinsics.sub_float_fast,
+    # TODO needs to revive #31193 to mark this as inconsistent to be accurate
+    # while preserving the currently optimizations for many math operations
+    # Intrinsics.muladd_float,    # this is not interprocedurally consistent
 ]
 
 const _SPECIAL_BUILTINS = Any[
-    Core._apply_iterate
+    Core._apply_iterate,
 ]
 
-function builtin_effects(f::Builtin, argtypes::Vector{Any}, @nospecialize rt)
+function isdefined_effects(𝕃::AbstractLattice, argtypes::Vector{Any})
+    # consistent if the first arg is immutable
+    na = length(argtypes)
+    2 ≤ na ≤ 3 || return EFFECTS_THROWS
+    obj, sym = argtypes
+    wobj = unwrapva(obj)
+    consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
+    if is_immutable_argtype(wobj)
+        consistent = ALWAYS_TRUE
+    else
+        # Bindings/fields are not allowed to transition from defined to undefined, so even
+        # if the object is not immutable, we can prove `:consistent`-cy if it is defined:
+        if isa(wobj, Const) && isa(sym, Const)
+            objval = wobj.val
+            symval = sym.val
+            if isa(objval, Module)
+                if isa(symval, Symbol) && isdefined(objval, symval)
+                    consistent = ALWAYS_TRUE
+                end
+            elseif (isa(symval, Symbol) || isa(symval, Int)) && isdefined(objval, symval)
+                consistent = ALWAYS_TRUE
+            end
+        end
+    end
+    nothrow = isdefined_nothrow(𝕃, argtypes)
+    if hasintersect(widenconst(wobj), Module)
+        inaccessiblememonly = ALWAYS_FALSE
+    elseif is_mutation_free_argtype(wobj)
+        inaccessiblememonly = ALWAYS_TRUE
+    else
+        inaccessiblememonly = INACCESSIBLEMEM_OR_ARGMEMONLY
+    end
+    return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly)
+end
+
+function getfield_effects(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospecialize(rt))
+    length(argtypes) < 2 && return EFFECTS_THROWS
+    obj = argtypes[1]
+    if isvarargtype(obj)
+        return Effects(EFFECTS_TOTAL;
+            consistent=CONSISTENT_IF_INACCESSIBLEMEMONLY,
+            nothrow=false,
+            inaccessiblememonly=ALWAYS_FALSE,
+            noub=ALWAYS_FALSE)
+    end
+    # :consistent if the argtype is immutable
+    consistent = (is_immutable_argtype(obj) || is_mutation_free_argtype(obj)) ?
+        ALWAYS_TRUE : CONSISTENT_IF_INACCESSIBLEMEMONLY
+    # taint `:consistent` if accessing `isbitstype`-type object field that may be initialized
+    # with undefined value: note that we don't need to taint `:consistent` if accessing
+    # uninitialized non-`isbitstype` field since it will simply throw `UndefRefError`
+    # NOTE `getfield_notundefined` conservatively checks if this field is never initialized
+    # with undefined value to avoid tainting `:consistent` too aggressively
+    # TODO this should probably taint `:noub`, however, it would hinder concrete eval for
+    # `REPLInterpreter` that can ignore `:consistent-cy`, causing worse completions
+    if !(length(argtypes) ≥ 2 && getfield_notundefined(obj, argtypes[2]))
+        consistent = ALWAYS_FALSE
+    end
+    noub = ALWAYS_TRUE
+    bcheck = getfield_boundscheck(argtypes)
+    nothrow = getfield_nothrow(𝕃, argtypes, bcheck)
+    if !nothrow
+        if bcheck !== :on
+            # If we cannot independently prove inboundsness, taint `:noub`.
+            # The inbounds-ness assertion requires dynamic reachability,
+            # while `:noub` needs to be true for all input values.
+            # However, as a special exception, we do allow literal `:boundscheck`.
+            # `:noub` will be tainted in any caller using `@inbounds`
+            # based on the `:noinbounds` effect.
+            # N.B. We do not taint for `--check-bounds=no` here.
+            # That is handled in concrete evaluation.
+            noub = ALWAYS_FALSE
+        end
+    end
+    if hasintersect(widenconst(obj), Module)
+        inaccessiblememonly = getglobal_effects(argtypes, rt).inaccessiblememonly
+    elseif is_mutation_free_argtype(obj)
+        inaccessiblememonly = ALWAYS_TRUE
+    else
+        inaccessiblememonly = INACCESSIBLEMEM_OR_ARGMEMONLY
+    end
+    return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly, noub)
+end
+
+function getglobal_effects(argtypes::Vector{Any}, @nospecialize(rt))
+    2 ≤ length(argtypes) ≤ 3 || return EFFECTS_THROWS
+    consistent = inaccessiblememonly = ALWAYS_FALSE
+    nothrow = false
+    M, s = argtypes[1], argtypes[2]
+    if (length(argtypes) == 3 ? getglobal_nothrow(M, s, argtypes[3]) : getglobal_nothrow(M, s))
+        nothrow = true
+        # typeasserts below are already checked in `getglobal_nothrow`
+        Mval, sval = (M::Const).val::Module, (s::Const).val::Symbol
+        if isconst(Mval, sval)
+            consistent = ALWAYS_TRUE
+            if is_mutation_free_argtype(rt)
+                inaccessiblememonly = ALWAYS_TRUE
+            end
+        end
+    end
+    return Effects(EFFECTS_TOTAL; consistent, nothrow, inaccessiblememonly)
+end
+
+"""
+    builtin_effects(𝕃::AbstractLattice, f::Builtin, argtypes::Vector{Any}, rt) -> Effects
+
+Compute the effects of a builtin function call. `argtypes` should not include `f` itself.
+"""
+function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argtypes::Vector{Any}, @nospecialize(rt))
     if isa(f, IntrinsicFunction)
         return intrinsic_effects(f, argtypes)
     end
 
     @assert !contains_is(_SPECIAL_BUILTINS, f)
 
-    argtypes′ = argtypes[2:end]
-    if (f === Core.getfield || f === Core.isdefined) && length(argtypes) >= 3
-        # consistent if the argtype is immutable
-        if isvarargtype(argtypes[2])
-            return Effects(; effect_free=ALWAYS_TRUE, terminates=ALWAYS_TRUE, nonoverlayed=true)
-        end
-        s = widenconst(argtypes[2])
-        if isType(s) || !isa(s, DataType) || isabstracttype(s)
-            return Effects(; effect_free=ALWAYS_TRUE, terminates=ALWAYS_TRUE, nonoverlayed=true)
-        end
-        s = s::DataType
-        consistent = !ismutabletype(s) ? ALWAYS_TRUE : ALWAYS_FALSE
-        if f === Core.getfield && !isvarargtype(argtypes[end]) && getfield_boundscheck(argtypes′) !== true
-            # If we cannot independently prove inboundsness, taint consistency.
-            # The inbounds-ness assertion requires dynamic reachability, while
-            # :consistent needs to be true for all input values.
-            # N.B. We do not taint for `--check-bounds=no` here -that happens in
-            # InferenceState.
-            if getfield_nothrow(argtypes[2], argtypes[3], true)
-                nothrow = ALWAYS_TRUE
-            else
-                consistent = nothrow = ALWAYS_FALSE
+    if f === getfield
+        return getfield_effects(𝕃, argtypes, rt)
+    end
+
+    # if this builtin call deterministically throws,
+    # don't bother to taint the other effects other than :nothrow:
+    # note this is safe only if we accounted for :noub already
+    rt === Bottom && return EFFECTS_THROWS
+
+    if f === isdefined
+        return isdefined_effects(𝕃, argtypes)
+    elseif f === getglobal
+        return getglobal_effects(argtypes, rt)
+    elseif f === Core.get_binding_type
+        length(argtypes) == 2 || return EFFECTS_THROWS
+        effect_free = get_binding_type_effect_free(argtypes[1], argtypes[2]) ? ALWAYS_TRUE : ALWAYS_FALSE
+        return Effects(EFFECTS_TOTAL; effect_free)
+    elseif f === compilerbarrier
+        length(argtypes) == 2 || return Effects(EFFECTS_THROWS; consistent=ALWAYS_FALSE)
+        setting = argtypes[1]
+        return Effects(EFFECTS_TOTAL;
+            consistent = (isa(setting, Const) && setting.val === :conditional) ? ALWAYS_TRUE : ALWAYS_FALSE,
+            nothrow = compilerbarrier_nothrow(setting, nothing))
+    elseif f === Core.current_scope
+        nothrow = true
+        if length(argtypes) != 0
+            if length(argtypes) != 1 || !isvarargtype(argtypes[1])
+                return EFFECTS_THROWS
             end
+            nothrow = false
+        end
+        return Effects(EFFECTS_TOTAL;
+            consistent = ALWAYS_FALSE,
+            notaskstate = false,
+            nothrow
+        )
+    else
+        if contains_is(_CONSISTENT_BUILTINS, f)
+            consistent = ALWAYS_TRUE
+        elseif f === memoryref || f === memoryrefoffset
+            consistent = ALWAYS_TRUE
+        elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned
+            consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
+        elseif f === Core._typevar
+            consistent = CONSISTENT_IF_NOTRETURNED
+        else
+            consistent = ALWAYS_FALSE
+        end
+        if f === setfield! || f === memoryrefset!
+            effect_free = EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+        elseif contains_is(_EFFECT_FREE_BUILTINS, f) || contains_is(_PURE_BUILTINS, f)
+            effect_free = ALWAYS_TRUE
+        else
+            effect_free = ALWAYS_FALSE
+        end
+        nothrow = (isempty(argtypes) || !isvarargtype(argtypes[end])) && builtin_nothrow(𝕃, f, argtypes, rt)
+        if contains_is(_INACCESSIBLEMEM_BUILTINS, f)
+            inaccessiblememonly = ALWAYS_TRUE
+        elseif contains_is(_ARGMEM_BUILTINS, f)
+            inaccessiblememonly = INACCESSIBLEMEM_OR_ARGMEMONLY
         else
-            nothrow = (!isvarargtype(argtypes[end]) && builtin_nothrow(f, argtypes′, rt)) ?
-                ALWAYS_TRUE : ALWAYS_FALSE
-        end
-        effect_free = ALWAYS_TRUE
-    elseif f === getglobal && length(argtypes) >= 3
-        if getglobal_nothrow(argtypes′)
-            consistent = isconst( # types are already checked in `getglobal_nothrow`
-                (argtypes[2]::Const).val::Module, (argtypes[3]::Const).val::Symbol) ?
-                ALWAYS_TRUE : ALWAYS_FALSE
-            nothrow = ALWAYS_TRUE
+            inaccessiblememonly = ALWAYS_FALSE
+        end
+        if f === memoryref || f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned
+            noub = memoryop_noub(f, argtypes) ? ALWAYS_TRUE : ALWAYS_FALSE
         else
-            consistent = nothrow = ALWAYS_FALSE
+            noub = ALWAYS_TRUE
         end
-        effect_free = ALWAYS_TRUE
+        return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow, inaccessiblememonly, noub)
+    end
+end
+
+function memoryop_noub(@nospecialize(f), argtypes::Vector{Any})
+    nargs = length(argtypes)
+    nargs == 0 && return true # must throw and noub
+    lastargtype = argtypes[end]
+    isva = isvarargtype(lastargtype)
+    if f === memoryref
+        if nargs == 1 && !isva
+            return true
+        elseif nargs == 2 && !isva
+            return true
+        end
+        expected_nargs = 3
+    elseif f === memoryrefget || f === memoryref_isassigned
+        expected_nargs = 3
     else
-        consistent = contains_is(_CONSISTENT_BUILTINS, f) ? ALWAYS_TRUE : ALWAYS_FALSE
-        effect_free = (contains_is(_EFFECT_FREE_BUILTINS, f) || contains_is(_PURE_BUILTINS, f)) ?
-            ALWAYS_TRUE : ALWAYS_FALSE
-        nothrow = (!isvarargtype(argtypes[end]) && builtin_nothrow(f, argtypes′, rt)) ?
-            ALWAYS_TRUE : ALWAYS_FALSE
+        @assert f === memoryrefset! "unexpected memoryop is given"
+        expected_nargs = 4
+    end
+    if nargs == expected_nargs && !isva
+        boundscheck = widenconditional(lastargtype)
+        hasintersect(widenconst(boundscheck), Bool) || return true # must throw and noub
+        boundscheck isa Const && boundscheck.val === true && return true
+    elseif nargs > expected_nargs + 1
+        return true # must throw and noub
+    elseif !isva
+        return true # must throw and noub
     end
+    return false
+end
 
-    return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow)
+function current_scope_tfunc(interp::AbstractInterpreter, sv::InferenceState)
+    pc = sv.currpc
+    while true
+        handleridx = sv.handler_at[pc][1]
+        if handleridx == 0
+            # No local scope available - inherited from the outside
+            return Any
+        end
+        pchandler = sv.handlers[handleridx]
+        # Remember that we looked at this handler, so we get re-scheduled
+        # if the scope information changes
+        isdefined(pchandler, :scope_uses) || (pchandler.scope_uses = Int[])
+        pcbb = block_for_inst(sv.cfg, pc)
+        if findfirst(==(pcbb), pchandler.scope_uses) === nothing
+            push!(pchandler.scope_uses, pcbb)
+        end
+        scope = pchandler.scopet
+        if scope !== nothing
+            # Found the scope - forward it
+            return scope
+        end
+        pc = pchandler.enter_idx
+    end
 end
+current_scope_tfunc(interp::AbstractInterpreter, sv) = Any
+
+"""
+    builtin_nothrow(𝕃::AbstractLattice, f::Builtin, argtypes::Vector{Any}, rt) -> Bool
 
-function builtin_nothrow(@nospecialize(f), argtypes::Array{Any, 1}, @nospecialize(rt))
+Compute throw-ness of a builtin function call. `argtypes` should not include `f` itself.
+"""
+function builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f), argtypes::Vector{Any}, @nospecialize(rt))
     rt === Bottom && return false
     contains_is(_PURE_BUILTINS, f) && return true
-    return _builtin_nothrow(f, argtypes, rt)
+    return _builtin_nothrow(𝕃, f, argtypes, rt)
 end
 
-function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtypes::Array{Any,1},
-                           sv::Union{InferenceState,Nothing})
-    if f === tuple
-        return tuple_tfunc(argtypes)
-    end
+function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtypes::Vector{Any},
+                           sv::Union{AbsIntState, Nothing})
+    𝕃ᵢ = typeinf_lattice(interp)
     if isa(f, IntrinsicFunction)
-        if is_pure_intrinsic_infer(f) && _all(@nospecialize(a) -> isa(a, Const), argtypes)
+        if is_pure_intrinsic_infer(f) && all(@nospecialize(a) -> isa(a, Const), argtypes)
             argvals = anymap(@nospecialize(a) -> (a::Const).val, argtypes)
             try
+                # unroll a few cases which have specialized codegen
+                if length(argvals) == 1
+                    return Const(f(argvals[1]))
+                elseif length(argvals) == 2
+                    return Const(f(argvals[1], argvals[2]))
+                elseif length(argvals) == 3
+                    return Const(f(argvals[1], argvals[2], argvals[3]))
+                end
                 return Const(f(argvals...))
-            catch
+            catch ex # expected ErrorException, TypeError, ConcurrencyViolationError, DivideError etc.
+                ex isa InterruptException && rethrow()
+                return Bottom
             end
         end
         iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
         if iidx < 0 || iidx > length(T_IFUNC)
-            # invalid intrinsic
+            # unknown intrinsic
             return Any
         end
         tf = T_IFUNC[iidx]
     else
+        if f === tuple
+            return tuple_tfunc(𝕃ᵢ, argtypes)
+        elseif f === Core.current_scope
+            if length(argtypes) != 0
+                if length(argtypes) != 1 || !isvarargtype(argtypes[1])
+                    return Bottom
+                end
+            end
+            return current_scope_tfunc(interp, sv)
+        end
         fidx = find_tfunc(f)
         if fidx === nothing
             # unknown/unhandled builtin function
@@ -1909,16 +2671,16 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
         # wrong # of args
         return Bottom
     end
-    return tf[3](argtypes...)
+    return tf[3](𝕃ᵢ, argtypes...)
 end
 
 # Query whether the given intrinsic is nothrow
 
-_iszero(x) = x === Intrinsics.xor_int(x, x)
-_isneg1(x) = _iszero(Intrinsics.not_int(x))
-_istypemin(x) = !_iszero(x) && Intrinsics.neg_int(x) === x
+_iszero(@nospecialize x) = x === Intrinsics.xor_int(x, x)
+_isneg1(@nospecialize x) = _iszero(Intrinsics.not_int(x))
+_istypemin(@nospecialize x) = !_iszero(x) && Intrinsics.neg_int(x) === x
 
-function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Array{Any, 1})
+function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Vector{Any})
     # First check that we have the correct number of arguments
     iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
     if iidx < 1 || iidx > length(T_IFUNC)
@@ -1938,16 +2700,17 @@ function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Array{Any, 1})
     f === Intrinsics.llvmcall && return false
     if f === Intrinsics.checked_udiv_int || f === Intrinsics.checked_urem_int || f === Intrinsics.checked_srem_int || f === Intrinsics.checked_sdiv_int
         # Nothrow as long as the second argument is guaranteed not to be zero
-        isa(argtypes[2], Const) || return false
-        if !isprimitivetype(widenconst(argtypes[1])) ||
-           (widenconst(argtypes[1]) !== widenconst(argtypes[2]))
-            return false
-        end
-        den_val = argtypes[2].val
+        arg2 = argtypes[2]
+        isa(arg2, Const) || return false
+        arg1 = argtypes[1]
+        warg1 = widenconst(arg1)
+        warg2 = widenconst(arg2)
+        (warg1 === warg2 && isprimitivetype(warg1)) || return false
+        den_val = arg2.val
         _iszero(den_val) && return false
         f !== Intrinsics.checked_sdiv_int && return true
         # Nothrow as long as we additionally don't do typemin(T)/-1
-        return !_isneg1(den_val) || (isa(argtypes[1], Const) && !_istypemin(argtypes[1].val))
+        return !_isneg1(den_val) || (isa(arg1, Const) && !_istypemin(arg1.val))
     end
     if f === Intrinsics.pointerref
         # Nothrow as long as the types are ok. N.B.: dereferencability is not
@@ -1961,11 +2724,8 @@ function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Array{Any, 1})
         isprimitivetype(eT) || return false
         return argtypes[2] ⊑ eT && argtypes[3] ⊑ Int && argtypes[4] ⊑ Int
     end
-    if f === Intrinsics.arraylen
-        return argtypes[1] ⊑ Array
-    end
     if f === Intrinsics.bitcast
-        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1])
+        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1], true)
         xty = widenconst(argtypes[2])
         return isconcrete && isprimitivetype(ty) && isprimitivetype(xty) && Core.sizeof(ty) === Core.sizeof(xty)
     end
@@ -1974,12 +2734,12 @@ function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Array{Any, 1})
              Intrinsics.sitofp, Intrinsics.fptrunc, Intrinsics.fpext)
         # If !isconcrete, `ty` may be Union{} at runtime even if we have
         # isprimitivetype(ty).
-        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1])
+        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1], true)
         xty = widenconst(argtypes[2])
         return isconcrete && isprimitivetype(ty) && isprimitivetype(xty)
     end
     if f === Intrinsics.have_fma
-        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1])
+        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1], true)
         return isconcrete && isprimitivetype(ty)
     end
     # The remaining intrinsics are math/bits/comparison intrinsics. They work on all
@@ -2001,15 +2761,17 @@ function is_pure_intrinsic_infer(f::IntrinsicFunction)
     return !(f === Intrinsics.pointerref || # this one is volatile
              f === Intrinsics.pointerset || # this one is never effect-free
              f === Intrinsics.llvmcall ||   # this one is never effect-free
-             f === Intrinsics.arraylen ||   # this one is volatile
              f === Intrinsics.sqrt_llvm_fast ||  # this one may differ at runtime (by a few ulps)
              f === Intrinsics.have_fma ||  # this one depends on the runtime environment
              f === Intrinsics.cglobal)  # cglobal lookup answer changes at runtime
 end
 
 # whether `f` is effect free if nothrow
-intrinsic_effect_free_if_nothrow(f) = f === Intrinsics.pointerref ||
-    f === Intrinsics.have_fma || is_pure_intrinsic_infer(f)
+function intrinsic_effect_free_if_nothrow(@nospecialize f)
+    return f === Intrinsics.pointerref ||
+           f === Intrinsics.have_fma ||
+           is_pure_intrinsic_infer(f)
+end
 
 function intrinsic_effects(f::IntrinsicFunction, argtypes::Vector{Any})
     if f === Intrinsics.llvmcall
@@ -2017,73 +2779,174 @@ function intrinsic_effects(f::IntrinsicFunction, argtypes::Vector{Any})
         return Effects()
     end
 
-    consistent = !(
-        f === Intrinsics.pointerref ||      # this one is volatile
-        f === Intrinsics.arraylen ||        # this one is volatile
-        f === Intrinsics.sqrt_llvm_fast ||  # this one may differ at runtime (by a few ulps)
-        f === Intrinsics.have_fma ||        # this one depends on the runtime environment
-        f === Intrinsics.cglobal            # cglobal lookup answer changes at runtime
-        ) ? ALWAYS_TRUE : ALWAYS_FALSE
+    if contains_is(_INCONSISTENT_INTRINSICS, f)
+        consistent = ALWAYS_FALSE
+    else
+        consistent = ALWAYS_TRUE
+    end
     effect_free = !(f === Intrinsics.pointerset) ? ALWAYS_TRUE : ALWAYS_FALSE
-    nothrow = (!isvarargtype(argtypes[end]) && intrinsic_nothrow(f, argtypes[2:end])) ?
-        ALWAYS_TRUE : ALWAYS_FALSE
-
-    return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow)
+    nothrow = (isempty(argtypes) || !isvarargtype(argtypes[end])) && intrinsic_nothrow(f, argtypes)
+    inaccessiblememonly = ALWAYS_TRUE
+    return Effects(EFFECTS_TOTAL; consistent, effect_free, nothrow, inaccessiblememonly)
 end
 
 # TODO: this function is a very buggy and poor model of the return_type function
 # since abstract_call_gf_by_type is a very inaccurate model of _method and of typeinf_type,
 # while this assumes that it is an absolutely precise and accurate and exact model of both
-function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState)
+function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
+    UNKNOWN = CallMeta(Type, Any, EFFECTS_THROWS, NoCallInfo())
+    if !(2 <= length(argtypes) <= 3)
+        return UNKNOWN
+    end
+
+    tt = widenslotwrapper(argtypes[end])
+    if !isa(tt, Const) && !(isType(tt) && !has_free_typevars(tt))
+        return UNKNOWN
+    end
+
+    af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
+    if !isa(af_argtype, DataType) || !(af_argtype <: Tuple)
+        return UNKNOWN
+    end
+
     if length(argtypes) == 3
-        tt = argtypes[3]
-        if isa(tt, Const) || (isType(tt) && !has_free_typevars(tt))
-            aft = argtypes[2]
-            if isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) ||
-                   (isconcretetype(aft) && !(aft <: Builtin))
-                af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
-                if isa(af_argtype, DataType) && af_argtype <: Tuple
-                    argtypes_vec = Any[aft, af_argtype.parameters...]
-                    if contains_is(argtypes_vec, Union{})
-                        return CallMeta(Const(Union{}), EFFECTS_TOTAL, false)
-                    end
-                    # Run the abstract_call without restricting abstract call
-                    # sites. Otherwise, our behavior model of abstract_call
-                    # below will be wrong.
-                    old_restrict = sv.restrict_abstract_call_sites
-                    sv.restrict_abstract_call_sites = false
-                    call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), sv, -1)
-                    sv.restrict_abstract_call_sites = old_restrict
-                    info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure()
-                    rt = widenconditional(call.rt)
-                    if isa(rt, Const)
-                        # output was computed to be constant
-                        return CallMeta(Const(typeof(rt.val)), EFFECTS_TOTAL, info)
-                    end
-                    rt = widenconst(rt)
-                    if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
-                        # output cannot be improved so it is known for certain
-                        return CallMeta(Const(rt), EFFECTS_TOTAL, info)
-                    elseif !isempty(sv.pclimitations)
-                        # conservatively express uncertainty of this result
-                        # in two ways: both as being a subtype of this, and
-                        # because of LimitedAccuracy causes
-                        return CallMeta(Type{<:rt}, EFFECTS_TOTAL, info)
-                    elseif (isa(tt, Const) || isconstType(tt)) &&
-                        (isa(aft, Const) || isconstType(aft))
-                        # input arguments were known for certain
-                        # XXX: this doesn't imply we know anything about rt
-                        return CallMeta(Const(rt), EFFECTS_TOTAL, info)
-                    elseif isType(rt)
-                        return CallMeta(Type{rt}, EFFECTS_TOTAL, info)
-                    else
-                        return CallMeta(Type{<:rt}, EFFECTS_TOTAL, info)
-                    end
-                end
+        aft = widenslotwrapper(argtypes[2])
+        argtypes_vec = Any[aft, af_argtype.parameters...]
+    else
+        argtypes_vec = Any[af_argtype.parameters...]
+        isempty(argtypes_vec) && push!(argtypes_vec, Union{})
+        aft = argtypes_vec[1]
+    end
+    if !(isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) ||
+            (isconcretetype(aft) && !(aft <: Builtin) && !iskindtype(aft)))
+        return UNKNOWN
+    end
+
+    if contains_is(argtypes_vec, Union{})
+        return CallMeta(Const(Union{}), Union{}, EFFECTS_TOTAL, NoCallInfo())
+    end
+
+    # Run the abstract_call without restricting abstract call
+    # sites. Otherwise, our behavior model of abstract_call
+    # below will be wrong.
+    if isa(sv, InferenceState)
+        old_restrict = sv.restrict_abstract_call_sites
+        sv.restrict_abstract_call_sites = false
+        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
+        sv.restrict_abstract_call_sites = old_restrict
+    else
+        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
+    end
+    info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure()
+    rt = widenslotwrapper(call.rt)
+    if isa(rt, Const)
+        # output was computed to be constant
+        return CallMeta(Const(typeof(rt.val)), Union{}, EFFECTS_TOTAL, info)
+    end
+    rt = widenconst(rt)
+    if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
+        # output cannot be improved so it is known for certain
+        return CallMeta(Const(rt), Union{}, EFFECTS_TOTAL, info)
+    elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
+        # conservatively express uncertainty of this result
+        # in two ways: both as being a subtype of this, and
+        # because of LimitedAccuracy causes
+        return CallMeta(Type{<:rt}, Union{}, EFFECTS_TOTAL, info)
+    elseif isa(tt, Const) || isconstType(tt)
+        # input arguments were known for certain
+        # XXX: this doesn't imply we know anything about rt
+        return CallMeta(Const(rt), Union{}, EFFECTS_TOTAL, info)
+    elseif isType(rt)
+        return CallMeta(Type{rt}, Union{}, EFFECTS_TOTAL, info)
+    else
+        return CallMeta(Type{<:rt}, Union{}, EFFECTS_TOTAL, info)
+    end
+end
+
+# a simplified model of abstract_call_gf_by_type for applicable
+function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any},
+                             sv::AbsIntState, max_methods::Int)
+    length(argtypes) < 2 && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    isvarargtype(argtypes[2]) && return CallMeta(Bool, Any, EFFECTS_UNKNOWN, NoCallInfo())
+    argtypes = argtypes[2:end]
+    atype = argtypes_to_type(argtypes)
+    matches = find_matching_methods(typeinf_lattice(interp), argtypes, atype, method_table(interp),
+        InferenceParams(interp).max_union_splitting, max_methods)
+    if isa(matches, FailedMethodMatch)
+        rt = Bool # too many matches to analyze
+    else
+        (; valid_worlds, applicable) = matches
+        update_valid_age!(sv, valid_worlds)
+
+        # also need an edge to the method table in case something gets
+        # added that did not intersect with any existing method
+        if isa(matches, MethodMatches)
+            matches.fullmatch || add_mt_backedge!(sv, matches.mt, atype)
+        else
+            for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
+                thisfullmatch || add_mt_backedge!(sv, mt, atype)
+            end
+        end
+
+        napplicable = length(applicable)
+        if napplicable == 0
+            rt = Const(false) # never any matches
+        else
+            rt = Const(true) # has applicable matches
+            for i in 1:napplicable
+                match = applicable[i]::MethodMatch
+                edge = specialize_method(match)::MethodInstance
+                add_backedge!(sv, edge)
+            end
+
+            if isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) :
+                    (!all(matches.fullmatches) || any_ambig(matches))
+                # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+                rt = Bool
             end
         end
     end
-    return CallMeta(Type, EFFECTS_THROWS, false)
+    return CallMeta(rt, Union{}, EFFECTS_TOTAL, NoCallInfo())
+end
+add_tfunc(applicable, 1, INT_INF, @nospecs((𝕃::AbstractLattice, f, args...)->Bool), 40)
+
+# a simplified model of abstract_invoke for Core._hasmethod
+function _hasmethod_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState)
+    if length(argtypes) == 3 && !isvarargtype(argtypes[3])
+        ft′ = argtype_by_index(argtypes, 2)
+        ft = widenconst(ft′)
+        ft === Bottom && return CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo())
+        typeidx = 3
+    elseif length(argtypes) == 2 && !isvarargtype(argtypes[2])
+        typeidx = 2
+    else
+        return CallMeta(Any, Any, Effects(), NoCallInfo())
+    end
+    (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, typeidx), false)
+    isexact || return CallMeta(Bool, Any, Effects(), NoCallInfo())
+    unwrapped = unwrap_unionall(types)
+    if types === Bottom || !(unwrapped isa DataType) || unwrapped.name !== Tuple.name
+        return CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo())
+    end
+    if typeidx == 3
+        isdispatchelem(ft) || return CallMeta(Bool, Any, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
+        types = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type
+    end
+    mt = ccall(:jl_method_table_for, Any, (Any,), types)
+    if !isa(mt, MethodTable)
+        return CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo())
+    end
+    match, valid_worlds = findsup(types, method_table(interp))
+    update_valid_age!(sv, valid_worlds)
+    if match === nothing
+        rt = Const(false)
+        add_mt_backedge!(sv, mt, types) # this should actually be an invoke-type backedge
+    else
+        rt = Const(true)
+        edge = specialize_method(match)::MethodInstance
+        add_invoke_backedge!(sv, types, edge)
+    end
+    return CallMeta(rt, Any, EFFECTS_TOTAL, NoCallInfo())
 end
 
 # N.B.: typename maps type equivalence classes to a single value
@@ -2103,12 +2966,11 @@ function global_order_nothrow(@nospecialize(o), loading::Bool, storing::Bool)
     end
     return false
 end
-function getglobal_nothrow(argtypes::Vector{Any})
-    2 ≤ length(argtypes) ≤ 3 || return false
-    if length(argtypes) == 3
-        global_order_nothrow(argtypes[3], #=loading=#true, #=storing=#false) || return false
-    end
-    M, s = argtypes
+@nospecs function getglobal_nothrow(M, s, o)
+    global_order_nothrow(o, #=loading=#true, #=storing=#false) || return false
+    return getglobal_nothrow(M, s)
+end
+@nospecs function getglobal_nothrow(M, s)
     if M isa Const && s isa Const
         M, s = M.val, s.val
         if M isa Module && s isa Symbol
@@ -2117,7 +2979,7 @@ function getglobal_nothrow(argtypes::Vector{Any})
     end
     return false
 end
-function getglobal_tfunc(@nospecialize(M), @nospecialize(s), @nospecialize(_=Symbol))
+@nospecs function getglobal_tfunc(𝕃::AbstractLattice, M, s, order=Symbol)
     if M isa Const && s isa Const
         M, s = M.val, s.val
         if M isa Module && s isa Symbol
@@ -2129,8 +2991,7 @@ function getglobal_tfunc(@nospecialize(M), @nospecialize(s), @nospecialize(_=Sym
     end
     return Any
 end
-function setglobal!_tfunc(@nospecialize(M), @nospecialize(s), @nospecialize(v),
-                          @nospecialize(_=Symbol))
+@nospecs function setglobal!_tfunc(𝕃::AbstractLattice, M, s, v, order=Symbol)
     if !(hasintersect(widenconst(M), Module) && hasintersect(widenconst(s), Symbol))
         return Bottom
     end
@@ -2138,37 +2999,38 @@ function setglobal!_tfunc(@nospecialize(M), @nospecialize(s), @nospecialize(v),
 end
 add_tfunc(getglobal, 2, 3, getglobal_tfunc, 1)
 add_tfunc(setglobal!, 3, 4, setglobal!_tfunc, 3)
-function setglobal!_nothrow(argtypes::Vector{Any})
-    3 ≤ length(argtypes) ≤ 4 || return false
-    if length(argtypes) == 4
-        global_order_nothrow(argtypes[4], #=loading=#false, #=storing=#true) || return false
-    end
-    M, s, newty = argtypes
+@nospecs function setglobal!_nothrow(M, s, newty, o)
+    global_order_nothrow(o, #=loading=#false, #=storing=#true) || return false
+    return setglobal!_nothrow(M, s, newty)
+end
+@nospecs function setglobal!_nothrow(M, s, newty)
     if M isa Const && s isa Const
         M, s = M.val, s.val
-        return global_assignment_nothrow(M, s, newty)
+        if isa(M, Module) && isa(s, Symbol)
+            return global_assignment_nothrow(M, s, newty)
+        end
     end
     return false
 end
 
 function global_assignment_nothrow(M::Module, s::Symbol, @nospecialize(newty))
     if isdefined(M, s) && !isconst(M, s)
-        ty = ccall(:jl_binding_type, Any, (Any, Any), M, s)
+        ty = ccall(:jl_get_binding_type, Any, (Any, Any), M, s)
         return ty === nothing || newty ⊑ ty
     end
     return false
 end
 
-function get_binding_type_effect_free(@nospecialize(M), @nospecialize(s))
+@nospecs function get_binding_type_effect_free(M, s)
     if M isa Const && s isa Const
         M, s = M.val, s.val
         if M isa Module && s isa Symbol
-            return ccall(:jl_binding_type, Any, (Any, Any), M, s) !== nothing
+            return ccall(:jl_get_binding_type, Any, (Any, Any), M, s) !== nothing
         end
     end
     return false
 end
-function get_binding_type_tfunc(@nospecialize(M), @nospecialize(s))
+@nospecs function get_binding_type_tfunc(𝕃::AbstractLattice, M, s)
     if get_binding_type_effect_free(M, s)
         return Const(Core.get_binding_type((M::Const).val, (s::Const).val))
     end
@@ -2176,4 +3038,46 @@ function get_binding_type_tfunc(@nospecialize(M), @nospecialize(s))
 end
 add_tfunc(Core.get_binding_type, 2, 2, get_binding_type_tfunc, 0)
 
-@specialize
+@nospecs function get_binding_type_nothrow(𝕃::AbstractLattice, M, s)
+    ⊑ = Core.Compiler.:⊑(𝕃)
+    return M ⊑ Module && s ⊑ Symbol
+end
+
+# foreigncall
+# ===========
+
+# N.B. the `abstract_eval` callback below allows us to use these queries
+# both during abstract interpret and optimization
+
+const FOREIGNCALL_ARG_START = 6
+
+function foreigncall_effects(@specialize(abstract_eval), e::Expr)
+    args = e.args
+    name = args[1]
+    isa(name, QuoteNode) && (name = name.value)
+    if name === :jl_alloc_genericmemory
+        nothrow = new_genericmemory_nothrow(abstract_eval, args)
+        return Effects(EFFECTS_TOTAL; consistent=CONSISTENT_IF_NOTRETURNED, nothrow)
+    end
+    return EFFECTS_UNKNOWN
+end
+
+function new_genericmemory_nothrow(@nospecialize(abstract_eval), args::Vector{Any})
+    length(args) ≥ 1+FOREIGNCALL_ARG_START || return false
+    mtype = instanceof_tfunc(abstract_eval(args[FOREIGNCALL_ARG_START]))[1]
+    isa(mtype, DataType) || return false
+    isdefined(mtype, :instance) || return false
+    elsz = Int(datatype_layoutsize(mtype))
+    arrayelem = datatype_arrayelem(mtype)
+    dim = abstract_eval(args[1+FOREIGNCALL_ARG_START])
+    isa(dim, Const) || return false
+    dimval = dim.val
+    isa(dimval, Int) || return false
+    0 < dimval < typemax(Int) || return false
+    tot, ovflw = Intrinsics.checked_smul_int(dimval, elsz)
+    ovflw && return false
+    isboxed = 1; isunion = 2
+    tot, ovflw = Intrinsics.checked_sadd_int(tot, arrayelem == isunion ? 1 + dimval : 1)
+    ovflw && return false
+    return true
+end
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 86aa6e4affa46..958993847a48e 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -1,14 +1,14 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Tracking of newly-inferred MethodInstances during precompilation
+# Tracking of newly-inferred CodeInstances during precompilation
 const track_newly_inferred = RefValue{Bool}(false)
-const newly_inferred = MethodInstance[]
+const newly_inferred = CodeInstance[]
 
 # build (and start inferring) the inference frame for the top-level MethodInstance
-function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol)
-    frame = InferenceState(result, cache, interp)
+function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache_mode::Symbol)
+    frame = InferenceState(result, cache_mode, interp)
     frame === nothing && return false
-    cache === :global && lock_mi_inference(interp, result.linfo)
+    cache_mode === :global && lock_mi_inference(interp, result.linfo)
     return typeinf(interp, frame)
 end
 
@@ -29,7 +29,7 @@ using Core.Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inlin
 struct InferenceFrameInfo
     mi::Core.MethodInstance
     world::UInt64
-    sptypes::Vector{Any}
+    sptypes::Vector{Core.Compiler.VarState}
     slottypes::Vector{Any}
     nargs::Int
 end
@@ -45,6 +45,8 @@ function _typeinf_identifier(frame::Core.Compiler.InferenceState)
     return mi_info
 end
 
+_typeinf_identifier(frame::InferenceFrameInfo) = frame
+
 """
     Core.Compiler.Timing(mi_info, start_time, ...)
 
@@ -87,7 +89,7 @@ function reset_timings()
     empty!(_timings)
     push!(_timings, Timing(
         # The MethodInstance for ROOT(), and default empty values for other fields.
-        InferenceFrameInfo(ROOTmi, 0x0, Any[], Any[Core.Const(ROOT)], 1),
+        InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1),
         _time_ns()))
     return nothing
 end
@@ -202,8 +204,9 @@ If set to `true`, record per-method-instance timings within type inference in th
 __set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
 const __measure_typeinf__ = fill(false)
 
-# Wrapper around _typeinf that optionally records the exclusive time for each invocation.
-function typeinf(interp::AbstractInterpreter, frame::InferenceState)
+# Wrapper around `_typeinf` that optionally records the exclusive time for
+# each inference performed by `NativeInterpreter`.
+function typeinf(interp::NativeInterpreter, frame::InferenceState)
     if __measure_typeinf__[]
         Timings.enter_new_timer(frame)
         v = _typeinf(interp, frame)
@@ -213,17 +216,30 @@ function typeinf(interp::AbstractInterpreter, frame::InferenceState)
         return _typeinf(interp, frame)
     end
 end
+typeinf(interp::AbstractInterpreter, frame::InferenceState) = _typeinf(interp, frame)
 
-function finish!(interp::AbstractInterpreter, caller::InferenceResult)
-    # If we didn't transform the src for caching, we may have to transform
-    # it anyway for users like typeinf_ext. Do that here.
-    opt = caller.src
-    if opt isa OptimizationState # implies `may_optimize(interp) === true`
-        if opt.ir !== nothing
-            caller.src = ir_to_codeinf!(opt)
-        end
+function finish!(interp::AbstractInterpreter, caller::InferenceState)
+    result = caller.result
+    valid_worlds = result.valid_worlds
+    if last(valid_worlds) >= get_world_counter()
+        # if we aren't cached, we don't need this edge
+        # but our caller might, so let's just make it anyways
+        store_backedges(result, caller.stmt_edges[1])
+    end
+    opt = result.src
+    if opt isa OptimizationState
+        result.src = opt = ir_to_codeinf!(opt)
     end
-    return caller.src
+    if opt isa CodeInfo
+        opt.min_world = first(valid_worlds)
+        opt.max_world = last(valid_worlds)
+        caller.src = opt
+    else
+        # In this case `caller.src` is invalid for clients (such as `typeinf_ext`) to use
+        # but that is what's permitted by `caller.cache_mode`.
+        # This is hopefully unreachable from such clients using `NativeInterpreter`.
+    end
+    return nothing
 end
 
 function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
@@ -240,57 +256,37 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
     end
     for caller in frames
         caller.valid_worlds = valid_worlds
-        finish(caller, interp)
-        # finalize and record the linfo result
-        caller.inferred = true
-    end
-    # collect results for the new expanded frame
-    results = Tuple{InferenceResult, Vector{Any}, Bool}[
-            ( frames[i].result,
-              frames[i].stmt_edges[1]::Vector{Any},
-              frames[i].cached )
-        for i in 1:length(frames) ]
-    empty!(frames)
-    for (caller, _, _) in results
-        opt = caller.src
-        if opt isa OptimizationState # implies `may_optimize(interp) === true`
-            analyzed = optimize(interp, opt, OptimizationParams(interp), caller)
-            if isa(analyzed, ConstAPI)
-                # XXX: The work in ir_to_codeinf! is essentially wasted. The only reason
-                # we're doing it is so that code_llvm can return the code
-                # for the `return ...::Const` (which never runs anyway). We should do this
-                # as a post processing step instead.
-                ir_to_codeinf!(opt)
-                caller.src = analyzed
-            end
-            caller.valid_worlds = (opt.inlining.et::EdgeTracker).valid_worlds[]
-        end
+        finish(caller, caller.interp)
     end
-    for (caller, edges, cached) in results
-        valid_worlds = caller.valid_worlds
-        if last(valid_worlds) >= get_world_counter()
-            # if we aren't cached, we don't need this edge
-            # but our caller might, so let's just make it anyways
-            store_backedges(caller, edges)
+    for caller in frames
+        opt = caller.result.src
+        if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
+            optimize(caller.interp, opt, caller.result)
         end
-        if cached
-            cache_result!(interp, caller)
+    end
+    for caller in frames
+        finish!(caller.interp, caller)
+        if is_cached(caller)
+            cache_result!(caller.interp, caller.result)
         end
-        finish!(interp, caller)
     end
+    empty!(frames)
     return true
 end
 
-function CodeInstance(
-    result::InferenceResult, @nospecialize(inferred_result), valid_worlds::WorldRange)
+function is_result_constabi_eligible(result::InferenceResult)
+    result_type = result.result
+    return isa(result_type, Const) && is_foldable_nothrow(result.ipo_effects) && is_inlineable_constant(result_type.val)
+end
+function CodeInstance(interp::AbstractInterpreter, result::InferenceResult,
+                      valid_worlds::WorldRange)
     local const_flags::Int32
     result_type = result.result
-    @assert !(result_type isa LimitedAccuracy)
-    if inferred_result isa ConstAPI
+    @assert !(result_type === nothing || result_type isa LimitedAccuracy)
+    if is_result_constabi_eligible(result)
         # use constant calling convention
-        rettype_const = inferred_result.val
+        rettype_const = result_type.val
         const_flags = 0x3
-        inferred_result = nothing
     else
         if isa(result_type, Const)
             rettype_const = result_type.val
@@ -307,28 +303,42 @@ function CodeInstance(
         elseif isa(result_type, InterConditional)
             rettype_const = result_type
             const_flags = 0x2
+        elseif isa(result_type, InterMustAlias)
+            rettype_const = result_type
+            const_flags = 0x2
         else
             rettype_const = nothing
             const_flags = 0x00
         end
     end
-    relocatability = isa(inferred_result, Vector{UInt8}) ? inferred_result[end] : UInt8(0)
+    relocatability = 0x0
+    if const_flags == 0x3 && may_discard_trees(interp)
+        inferred_result = nothing
+        relocatability = 0x1
+    else
+        inferred_result = transform_result_for_cache(interp, result.linfo, valid_worlds, result)
+        if isa(inferred_result, String)
+            t = @_gc_preserve_begin inferred_result
+            relocatability = unsafe_load(unsafe_convert(Ptr{UInt8}, inferred_result), Core.sizeof(inferred_result))
+            @_gc_preserve_end t
+        elseif inferred_result === nothing
+            relocatability = 0x1
+        end
+    end
+    # relocatability = isa(inferred_result, String) ? inferred_result[end] : UInt8(0)
     return CodeInstance(result.linfo,
-        widenconst(result_type), rettype_const, inferred_result,
+        widenconst(result_type), widenconst(result.exc_result), rettype_const, inferred_result,
         const_flags, first(valid_worlds), last(valid_worlds),
         # TODO: Actually do something with non-IPO effects
-	    encode_effects(result.ipo_effects), encode_effects(result.ipo_effects), result.argescapes,
+        encode_effects(result.ipo_effects), encode_effects(result.ipo_effects), result.analysis_results,
         relocatability)
 end
 
 function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInstance, ci::CodeInfo)
     def = linfo.def
-    toplevel = !isa(def, Method)
-    if toplevel
-        return ci
-    end
+    isa(def, Method) || return ci # don't compress toplevel code
     if may_discard_trees(interp)
-        cache_the_tree = ci.inferred && (ci.inlineable || isa_compileable_sig(linfo.specTypes, def))
+        cache_the_tree = ci.inferred && (is_inlineable(ci) || isa_compileable_sig(linfo.specTypes, linfo.sparam_vals, def))
     else
         cache_the_tree = true
     end
@@ -337,7 +347,7 @@ function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInsta
             nslots = length(ci.slotflags)
             resize!(ci.slottypes::Vector{Any}, nslots)
             resize!(ci.slotnames, nslots)
-            return ccall(:jl_compress_ir, Vector{UInt8}, (Any, Any), def, ci)
+            return ccall(:jl_compress_ir, String, (Any, Any), def, ci)
         else
             return ci
         end
@@ -346,21 +356,16 @@ function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInsta
     end
 end
 
-function transform_result_for_cache(interp::AbstractInterpreter, linfo::MethodInstance,
-                                    valid_worlds::WorldRange, @nospecialize(inferred_result),
-                                    ipo_effects::Effects)
-    # If we decided not to optimize, drop the OptimizationState now.
-    # External interpreters can override as necessary to cache additional information
-    if inferred_result isa OptimizationState
-        inferred_result = ir_to_codeinf!(inferred_result)
-    end
+function transform_result_for_cache(interp::AbstractInterpreter,
+    linfo::MethodInstance, valid_worlds::WorldRange, result::InferenceResult)
+    inferred_result = result.src
     if inferred_result isa CodeInfo
-        inferred_result.min_world = first(valid_worlds)
-        inferred_result.max_world = last(valid_worlds)
+        uncompressed = inferred_result
         inferred_result = maybe_compress_codeinfo(interp, linfo, inferred_result)
+        result.is_src_volatile |= uncompressed !== inferred_result
     end
     # The global cache can only handle objects that codegen understands
-    if !isa(inferred_result, Union{CodeInfo, Vector{UInt8}, ConstAPI})
+    if !isa(inferred_result, MaybeCompressed)
         inferred_result = nothing
     end
     return inferred_result
@@ -375,24 +380,23 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
     end
     # check if the existing linfo metadata is also sufficient to describe the current inference result
     # to decide if it is worth caching this
-    linfo = result.linfo
-    already_inferred = already_inferred_quick_test(interp, linfo)
-    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), linfo)
+    mi = result.linfo
+    already_inferred = already_inferred_quick_test(interp, mi)
+    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), mi)
         already_inferred = true
     end
 
     # TODO: also don't store inferred code if we've previously decided to interpret this function
     if !already_inferred
-        inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result.src, result.ipo_effects)
-        code_cache(interp)[linfo] = CodeInstance(result, inferred_result, valid_worlds)
+        code_cache(interp)[mi] = ci = CodeInstance(interp, result, valid_worlds)
         if track_newly_inferred[]
-            m = linfo.def
-            if isa(m, Method)
-                m.module != Core && push!(newly_inferred, linfo)
+            m = mi.def
+            if isa(m, Method) && m.module != Core
+                ccall(:jl_push_newly_inferred, Cvoid, (Any,), ci)
             end
         end
     end
-    unlock_mi_inference(interp, linfo)
+    unlock_mi_inference(interp, mi)
     nothing
 end
 
@@ -418,54 +422,95 @@ function cycle_fix_limited(@nospecialize(typ), sv::InferenceState)
     return typ
 end
 
+function adjust_effects(ipo_effects::Effects, def::Method)
+    # override the analyzed effects using manually annotated effect settings
+    override = decode_effects_override(def.purity)
+    if is_effect_overridden(override, :consistent)
+        ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
+    end
+    if is_effect_overridden(override, :effect_free)
+        ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_TRUE)
+    end
+    if is_effect_overridden(override, :nothrow)
+        ipo_effects = Effects(ipo_effects; nothrow=true)
+    end
+    if is_effect_overridden(override, :terminates_globally)
+        ipo_effects = Effects(ipo_effects; terminates=true)
+    end
+    if is_effect_overridden(override, :notaskstate)
+        ipo_effects = Effects(ipo_effects; notaskstate=true)
+    end
+    if is_effect_overridden(override, :inaccessiblememonly)
+        ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
+    end
+    if is_effect_overridden(override, :noub)
+        ipo_effects = Effects(ipo_effects; noub=ALWAYS_TRUE)
+    elseif is_effect_overridden(override, :noub_if_noinbounds) && ipo_effects.noub !== ALWAYS_TRUE
+        ipo_effects = Effects(ipo_effects; noub=NOUB_IF_NOINBOUNDS)
+    end
+    return ipo_effects
+end
+
 function adjust_effects(sv::InferenceState)
-    ipo_effects = Effects(sv)
+    ipo_effects = sv.ipo_effects
 
     # refine :consistent-cy effect using the return type information
     # TODO this adjustment tries to compromise imprecise :consistent-cy information,
     # that is currently modeled in a flow-insensitive way: ideally we want to model it
     # with a proper dataflow analysis instead
     rt = sv.bestguess
-    if !ipo_effects.inbounds_taints_consistency && rt === Bottom
+    if rt === Bottom
         # always throwing an error counts or never returning both count as consistent
         ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
-    elseif ipo_effects.consistent === TRISTATE_UNKNOWN && is_consistent_rt(rt)
+    end
+    if sv.exc_bestguess === Bottom
+        # if the exception type of this frame is known to be `Bottom`,
+        # this frame is known to be safe
+        ipo_effects = Effects(ipo_effects; nothrow=true)
+    end
+    if is_inaccessiblemem_or_argmemonly(ipo_effects) && all(1:narguments(sv, #=include_va=#true)) do i::Int
+            return is_mutation_free_argtype(sv.slottypes[i])
+        end
+        ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
+    end
+    if is_consistent_if_notreturned(ipo_effects) && is_identity_free_argtype(rt)
         # in a case when the :consistent-cy here is only tainted by mutable allocations
-        # (indicated by `TRISTATE_UNKNOWN`), we may be able to refine it if the return
+        # (indicated by `CONSISTENT_IF_NOTRETURNED`), we may be able to refine it if the return
         # type guarantees that the allocations are never returned
-        ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
+        consistent = ipo_effects.consistent & ~CONSISTENT_IF_NOTRETURNED
+        ipo_effects = Effects(ipo_effects; consistent)
+    end
+    if is_consistent_if_inaccessiblememonly(ipo_effects)
+        if is_inaccessiblememonly(ipo_effects)
+            consistent = ipo_effects.consistent & ~CONSISTENT_IF_INACCESSIBLEMEMONLY
+            ipo_effects = Effects(ipo_effects; consistent)
+        elseif is_inaccessiblemem_or_argmemonly(ipo_effects)
+        else # `:inaccessiblememonly` is already tainted, there will be no chance to refine this
+            ipo_effects = Effects(ipo_effects; consistent=ALWAYS_FALSE)
+        end
+    end
+    if is_effect_free_if_inaccessiblememonly(ipo_effects)
+        if is_inaccessiblememonly(ipo_effects)
+            effect_free = ipo_effects.effect_free & ~EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+            ipo_effects = Effects(ipo_effects; effect_free)
+        elseif is_inaccessiblemem_or_argmemonly(ipo_effects)
+        else # `:inaccessiblememonly` is already tainted, there will be no chance to refine this
+            ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_FALSE)
+        end
     end
 
     # override the analyzed effects using manually annotated effect settings
     def = sv.linfo.def
     if isa(def, Method)
-        override = decode_effects_override(def.purity)
-        if is_effect_overridden(override, :consistent)
-            ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
-        end
-        if is_effect_overridden(override, :effect_free)
-            ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_TRUE)
-        end
-        if is_effect_overridden(override, :nothrow)
-            ipo_effects = Effects(ipo_effects; nothrow=ALWAYS_TRUE)
-        end
-        if is_effect_overridden(override, :terminates_globally)
-            ipo_effects = Effects(ipo_effects; terminates=ALWAYS_TRUE)
-        end
-        if is_effect_overridden(override, :notaskstate)
-            ipo_effects = Effects(ipo_effects; notaskstate=ALWAYS_TRUE)
-        end
+        ipo_effects = adjust_effects(ipo_effects, def)
     end
 
     return ipo_effects
 end
 
-is_consistent_rt(@nospecialize rt) = _is_consistent_rt(widenconst(ignorelimited(rt)))
-function _is_consistent_rt(@nospecialize ty)
-    if isa(ty, Union)
-        return _is_consistent_rt(ty.a) && _is_consistent_rt(ty.b)
-    end
-    return ty === Symbol || isbitstype(ty)
+function refine_exception_type(@nospecialize(exc_bestguess), ipo_effects::Effects)
+    ipo_effects.nothrow && return Bottom
+    return exc_bestguess
 end
 
 # inference completed on `me`
@@ -488,8 +533,9 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
     end
     # inspect whether our inference had a limited result accuracy,
     # else it may be suitable to cache
-    me.bestguess = cycle_fix_limited(me.bestguess, me)
-    limited_ret = me.bestguess isa LimitedAccuracy
+    bestguess = me.bestguess = cycle_fix_limited(me.bestguess, me)
+    exc_bestguess = me.exc_bestguess = cycle_fix_limited(me.exc_bestguess, me)
+    limited_ret = bestguess isa LimitedAccuracy || exc_bestguess isa LimitedAccuracy
     limited_src = false
     if !limited_ret
         gt = me.ssavaluetypes
@@ -501,78 +547,61 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
             end
         end
     end
+    me.result.valid_worlds = me.valid_worlds
+    me.result.result = bestguess
+    ipo_effects = me.result.ipo_effects = me.ipo_effects = adjust_effects(me)
+    me.result.exc_result = me.exc_bestguess = refine_exception_type(me.exc_bestguess, ipo_effects)
+
     if limited_ret
         # a parent may be cached still, but not this intermediate work:
         # we can throw everything else away now
         me.result.src = nothing
-        me.cached = false
-        me.src.inlineable = false
+        me.cache_mode = CACHE_MODE_NULL
+        set_inlineable!(me.src, false)
         unlock_mi_inference(interp, me.linfo)
     elseif limited_src
         # a type result will be cached still, but not this intermediate work:
         # we can throw everything else away now
         me.result.src = nothing
-        me.src.inlineable = false
+        set_inlineable!(me.src, false)
     else
         # annotate fulltree with type information,
         # either because we are the outermost code, or we might use this later
-        doopt = (me.cached || me.parent !== nothing)
-        recompute_cfg = type_annotate!(me, doopt)
+        type_annotate!(interp, me)
+        doopt = (me.cache_mode != CACHE_MODE_NULL || me.parent !== nothing)
+        # Disable the optimizer if we've already determined that there's nothing for
+        # it to do.
+        if may_discard_trees(interp) && is_result_constabi_eligible(me.result)
+            doopt = false
+        end
         if doopt && may_optimize(interp)
-            me.result.src = OptimizationState(me, OptimizationParams(interp), interp, recompute_cfg)
+            me.result.src = OptimizationState(me, interp)
         else
-            me.result.src = me.src::CodeInfo # stash a convenience copy of the code (e.g. for reflection)
+            me.result.src = me.src # for reflection etc.
         end
     end
-    me.result.valid_worlds = me.valid_worlds
-    me.result.result = me.bestguess
-    me.ipo_effects = me.result.ipo_effects = adjust_effects(me)
+
     validate_code_in_debug_mode(me.linfo, me.src, "inferred")
     nothing
 end
 
 # record the backedges
-function store_backedges(frame::InferenceResult, edges::Vector{Any})
-    toplevel = !isa(frame.linfo.def, Method)
-    if !toplevel
-        store_backedges(frame.linfo, edges)
-    end
-    nothing
+function store_backedges(caller::InferenceResult, edges::Vector{Any})
+    isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
+    return store_backedges(caller.linfo, edges)
 end
 
 function store_backedges(caller::MethodInstance, edges::Vector{Any})
-    i = 1
-    while i <= length(edges)
-        to = edges[i]
-        if isa(to, MethodInstance)
-            ccall(:jl_method_instance_add_backedge, Cvoid, (Any, Any), to, caller)
-            i += 1
+    for itr in BackedgeIterator(edges)
+        callee = itr.caller
+        if isa(callee, MethodInstance)
+            ccall(:jl_method_instance_add_backedge, Cvoid, (Any, Any, Any), callee, itr.sig, caller)
         else
-            typeassert(to, Core.MethodTable)
-            typ = edges[i + 1]
-            ccall(:jl_method_table_add_backedge, Cvoid, (Any, Any, Any), to, typ, caller)
-            i += 2
-        end
-    end
-end
-
-# widen all Const elements in type annotations
-function widen_all_consts!(src::CodeInfo)
-    ssavaluetypes = src.ssavaluetypes::Vector{Any}
-    for i = 1:length(ssavaluetypes)
-        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
-    end
-
-    for i = 1:length(src.code)
-        x = src.code[i]
-        if isa(x, PiNode)
-            src.code[i] = PiNode(x.val, widenconst(x.typ))
+            typeassert(callee, MethodTable)
+            ccall(:jl_method_table_add_backedge, Cvoid, (Any, Any, Any), callee, itr.sig, caller)
         end
     end
-
-    src.rettype = widenconst(src.rettype)
-
-    return src
+    return nothing
 end
 
 function record_slot_assign!(sv::InferenceState)
@@ -616,51 +645,11 @@ function record_bestguess!(sv::InferenceState)
     return nothing
 end
 
-function annotate_slot_load!(undefs::Vector{Bool}, idx::Int, sv::InferenceState, @nospecialize x)
-    if isa(x, SlotNumber)
-        id = slot_id(x)
-        pc = find_dominating_assignment(id, idx, sv)
-        if pc === nothing
-            block = block_for_inst(sv.cfg, idx)
-            state = sv.bb_vartables[block]::VarTable
-            vt = state[id]
-            undefs[id] |= vt.undef
-            typ = widenconditional(ignorelimited(vt.typ))
-        else
-            typ = sv.ssavaluetypes[pc]
-            @assert typ !== NOT_FOUND "active slot in unreached region"
-        end
-        # add type annotations where needed
-        if !(sv.slottypes[id] ⊑ typ)
-            return TypedSlot(id, typ)
-        end
-        return x
-    elseif isa(x, Expr)
-        head = x.head
-        i0 = 1
-        if is_meta_expr_head(head) || head === :const
-            return x
-        end
-        if head === :(=) || head === :method
-            i0 = 2
-        end
-        for i = i0:length(x.args)
-            x.args[i] = annotate_slot_load!(undefs, idx, sv, x.args[i])
-        end
-        return x
-    elseif isa(x, ReturnNode) && isdefined(x, :val)
-        return ReturnNode(annotate_slot_load!(undefs, idx, sv, x.val))
-    elseif isa(x, GotoIfNot)
-        return GotoIfNot(annotate_slot_load!(undefs, idx, sv, x.cond), x.dest)
-    end
-    return x
-end
-
 # find the dominating assignment to the slot `id` in the block containing statement `idx`,
 # returns `nothing` otherwise
 function find_dominating_assignment(id::Int, idx::Int, sv::InferenceState)
     block = block_for_inst(sv.cfg, idx)
-    for pc in reverse(sv.cfg.blocks[block].stmts) # N.B. reverse since the last assignement is dominating this block
+    for pc in reverse(sv.cfg.blocks[block].stmts) # N.B. reverse since the last assignment is dominating this block
         pc < idx || continue # N.B. needs pc ≠ idx as `id` can be assigned at `idx`
         stmt = sv.src.code[pc]
         isexpr(stmt, :(=)) || continue
@@ -672,8 +661,14 @@ function find_dominating_assignment(id::Int, idx::Int, sv::InferenceState)
     return nothing
 end
 
-# annotate types of all symbols in AST
-function type_annotate!(sv::InferenceState, run_optimizer::Bool)
+# annotate types of all symbols in AST, preparing for optimization
+function type_annotate!(interp::AbstractInterpreter, sv::InferenceState)
+    # widen `Conditional`s from `slottypes`
+    slottypes = sv.slottypes
+    for i = 1:length(slottypes)
+        slottypes[i] = widenconditional(slottypes[i])
+    end
+
     # compute the required type for each slot
     # to hold all of the items assigned into it
     record_slot_assign!(sv)
@@ -681,79 +676,43 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
     record_bestguess!(sv)
 
     # annotate variables load types
-    # remove dead code optimization
-    # and compute which variables may be used undef
-    stmt_info = sv.stmt_info
     src = sv.src
-    body = src.code
-    nexpr = length(body)
-    codelocs = src.codelocs
+    stmts = src.code
+    nstmt = length(stmts)
     ssavaluetypes = sv.ssavaluetypes
-    ssaflags = src.ssaflags
-    slotflags = src.slotflags
-    nslots = length(slotflags)
-    undefs = fill(false, nslots)
-
-    # this statement traversal does five things:
-    # 1. introduce temporary `TypedSlot`s that are supposed to be replaced with π-nodes later
-    # 2. mark used-undef slots (required by the `slot2reg` conversion)
-    # 3. mark unreached statements for a bulk code deletion (see issue #7836)
-    # 4. widen `Conditional`s and remove `NOT_FOUND` from `ssavaluetypes`
-    #    NOTE because of this, `was_reached` will no longer be available after this point
-    # 5. eliminate GotoIfNot if either branch target is unreachable
-    changemap = nothing # initialized if there is any dead region
-    for i = 1:nexpr
-        expr = body[i]
+    nslots = length(src.slotflags)
+
+    # widen slot wrappers (`Conditional` and `MustAlias`) and remove `NOT_FOUND` from `ssavaluetypes`
+    # and mark any unreachable statements by wrapping them in Const(...), to distinguish them from
+    # must-throw statements which also have type Bottom
+    for i = 1:nstmt
+        expr = stmts[i]
         if was_reached(sv, i)
-            if run_optimizer
-                if isa(expr, GotoIfNot) && widenconst(argextype(expr.cond, src, sv.sptypes)) === Bool
-                    # 5: replace this live GotoIfNot with:
-                    # - GotoNode if the fallthrough target is unreachable
-                    # - no-op if the branch target is unreachable
-                    if !was_reached(sv, i+1)
-                        expr = GotoNode(expr.dest)
-                    elseif !was_reached(sv, expr.dest)
-                        expr = nothing
-                    end
-                end
-            end
-            body[i] = annotate_slot_load!(undefs, i, sv, expr) # 1&2
-            ssavaluetypes[i] = widenconditional(ssavaluetypes[i]) # 4
+            ssavaluetypes[i] = widenslotwrapper(ssavaluetypes[i]) # 3
         else # i.e. any runtime execution will never reach this statement
+            push!(sv.unreachable, i)
             if is_meta_expr(expr) # keep any lexically scoped expressions
-                ssavaluetypes[i] = Any # 4
-            elseif run_optimizer
-                if changemap === nothing
-                    changemap = fill(0, nexpr)
-                end
-                changemap[i] = -1 # 3&4: mark for the bulk deletion
+                ssavaluetypes[i] = Any # 3
             else
-                ssavaluetypes[i] = Bottom # 4
-                body[i] = Const(expr) # annotate that this statement actually is dead
+                ssavaluetypes[i] = Bottom # 3
+                # annotate that this statement actually is dead
+                stmts[i] = Const(expr)
             end
         end
     end
 
-    # finish marking used-undef variables
-    for j = 1:nslots
-        if undefs[j]
-            slotflags[j] |= SLOT_USEDUNDEF | SLOT_STATICUNDEF
+    # widen slot wrappers (`Conditional` and `MustAlias`) in `bb_vartables`
+    for varstate in sv.bb_vartables
+        if varstate !== nothing
+            for slot in 1:nslots
+                vt = varstate[slot]
+                widened_type = widenslotwrapper(ignorelimited(vt.typ))
+                varstate[slot] = VarState(widened_type, vt.undef)
+            end
         end
     end
 
-    # do the bulk deletion of unreached statements
-    if changemap !== nothing
-        inds = Int[i for (i,v) in enumerate(changemap) if v == -1]
-        deleteat!(body, inds)
-        deleteat!(ssavaluetypes, inds)
-        deleteat!(codelocs, inds)
-        deleteat!(stmt_info, inds)
-        deleteat!(ssaflags, inds)
-        renumber_ir_elements!(body, changemap)
-        return true
-    else
-        return false
-    end
+    return nothing
 end
 
 # at the end, all items in b's cycle
@@ -775,125 +734,121 @@ function union_caller_cycle!(a::InferenceState, b::InferenceState)
     return
 end
 
-function merge_call_chain!(parent::InferenceState, ancestor::InferenceState, child::InferenceState)
+function merge_call_chain!(interp::AbstractInterpreter, parent::InferenceState, ancestor::InferenceState, child::InferenceState)
     # add backedge of parent <- child
     # then add all backedges of parent <- parent.parent
     # and merge all of the callers into ancestor.callers_in_cycle
     # and ensure that walking the parent list will get the same result (DAG) from everywhere
-    # Also taint the termination effect, because we can no longer guarantee the absence
-    # of recursion.
-    tristate_merge!(parent, Effects(EFFECTS_TOTAL; terminates=ALWAYS_FALSE))
     while true
-        add_cycle_backedge!(child, parent, parent.currpc)
+        add_cycle_backedge!(parent, child, parent.currpc)
         union_caller_cycle!(ancestor, child)
-        tristate_merge!(child, Effects(EFFECTS_TOTAL; terminates=ALWAYS_FALSE))
         child = parent
         child === ancestor && break
-        parent = child.parent::InferenceState
+        parent = frame_parent(child)
+        while !isa(parent, InferenceState)
+            # XXX we may miss some edges here?
+            parent = frame_parent(parent::IRInterpretationState)
+        end
+        parent = parent::InferenceState
     end
 end
 
-function is_same_frame(interp::AbstractInterpreter, linfo::MethodInstance, frame::InferenceState)
-    return linfo === frame.linfo
+function is_same_frame(interp::AbstractInterpreter, mi::MethodInstance, frame::InferenceState)
+    return mi === frame_instance(frame)
 end
 
-function poison_callstack(infstate::InferenceState, topmost::InferenceState)
+function poison_callstack!(infstate::InferenceState, topmost::InferenceState)
     push!(infstate.pclimitations, topmost)
     nothing
 end
 
-# Walk through `linfo`'s upstream call chain, starting at `parent`. If a parent
-# frame matching `linfo` is encountered, then there is a cycle in the call graph
-# (i.e. `linfo` is a descendant callee of itself). Upon encountering this cycle,
+# Walk through `mi`'s upstream call chain, starting at `parent`. If a parent
+# frame matching `mi` is encountered, then there is a cycle in the call graph
+# (i.e. `mi` is a descendant callee of itself). Upon encountering this cycle,
 # we "resolve" it by merging the call chain, which entails unioning each intermediary
 # frame's `callers_in_cycle` field and adding the appropriate backedges. Finally,
-# we return `linfo`'s pre-existing frame. If no cycles are found, `nothing` is
+# we return `mi`'s pre-existing frame. If no cycles are found, `nothing` is
 # returned instead.
-function resolve_call_cycle!(interp::AbstractInterpreter, linfo::MethodInstance, parent::InferenceState)
+function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, parent::AbsIntState)
+    # TODO (#48913) implement a proper recursion handling for irinterp:
+    # This works just because currently the `:terminate` condition guarantees that
+    # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
+    # We should revisit this once we have a better story for handling cycles in irinterp.
+    isa(parent, InferenceState) || return false
     frame = parent
     uncached = false
     while isa(frame, InferenceState)
-        uncached |= !frame.cached # ensure we never add an uncached frame to a cycle
-        if is_same_frame(interp, linfo, frame)
+        uncached |= !is_cached(frame) # ensure we never add an uncached frame to a cycle
+        if is_same_frame(interp, mi, frame)
             if uncached
                 # our attempt to speculate into a constant call lead to an undesired self-cycle
                 # that cannot be converged: poison our call-stack (up to the discovered duplicate frame)
                 # with the limited flag and abort (set return type to Any) now
-                poison_callstack(parent, frame)
+                poison_callstack!(parent, frame)
                 return true
             end
-            merge_call_chain!(parent, frame, frame)
+            merge_call_chain!(interp, parent, frame, frame)
             return frame
         end
-        for caller in frame.callers_in_cycle
-            if is_same_frame(interp, linfo, caller)
+        for caller in callers_in_cycle(frame)
+            if is_same_frame(interp, mi, caller)
                 if uncached
-                    poison_callstack(parent, frame)
+                    poison_callstack!(parent, frame)
                     return true
                 end
-                merge_call_chain!(parent, frame, caller)
+                merge_call_chain!(interp, parent, frame, caller)
                 return caller
             end
         end
-        frame = frame.parent
+        frame = frame_parent(frame)
     end
     return false
 end
 
-generating_sysimg() = ccall(:jl_generating_output, Cint, ()) != 0 && JLOptions().incremental == 0
-
 ipo_effects(code::CodeInstance) = decode_effects(code.ipo_purity_bits)
 
 struct EdgeCallResult
-    rt #::Type
+    rt
+    exct
     edge::Union{Nothing,MethodInstance}
     effects::Effects
-    function EdgeCallResult(@nospecialize(rt),
+    volatile_inf_result::Union{Nothing,VolatileInferenceResult}
+    function EdgeCallResult(@nospecialize(rt), @nospecialize(exct),
                             edge::Union{Nothing,MethodInstance},
-                            effects::Effects)
-        return new(rt, edge, effects)
+                            effects::Effects,
+                            volatile_inf_result::Union{Nothing,VolatileInferenceResult} = nothing)
+        return new(rt, exct, edge, effects, volatile_inf_result)
     end
 end
 
 # compute (and cache) an inferred AST and return the current best estimate of the result type
-function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::InferenceState)
+function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState)
     mi = specialize_method(method, atype, sparams)::MethodInstance
     code = get(code_cache(interp), mi, nothing)
+    force_inline = is_stmt_inline(get_curr_ssaflag(caller))
     if code isa CodeInstance # return existing rettype if the code is already inferred
-        if code.inferred === nothing && is_stmt_inline(get_curr_ssaflag(caller))
+        inferred = @atomic :monotonic code.inferred
+        if inferred === nothing && force_inline
             # we already inferred this edge before and decided to discard the inferred code,
-            # nevertheless we re-infer it here again and keep it around in the local cache
-            # since the inliner will request to use it later
-            cache = :local
+            # nevertheless we re-infer it here again in order to propagate the re-inferred
+            # source to the inliner as a volatile result
+            cache_mode = CACHE_MODE_VOLATILE
         else
+            rt = cached_return_type(code)
             effects = ipo_effects(code)
             update_valid_age!(caller, WorldRange(min_world(code), max_world(code)))
-            rettype = code.rettype
-            if isdefined(code, :rettype_const)
-                rettype_const = code.rettype_const
-                # the second subtyping conditions are necessary to distinguish usual cases
-                # from rare cases when `Const` wrapped those extended lattice type objects
-                if isa(rettype_const, Vector{Any}) && !(Vector{Any} <: rettype)
-                    rettype = PartialStruct(rettype, rettype_const)
-                elseif isa(rettype_const, PartialOpaque) && rettype <: Core.OpaqueClosure
-                    rettype = rettype_const
-                elseif isa(rettype_const, InterConditional) && !(InterConditional <: rettype)
-                    rettype = rettype_const
-                else
-                    rettype = Const(rettype_const)
-                end
-            end
-            return EdgeCallResult(rettype, mi, effects)
+            return EdgeCallResult(rt, code.exctype, mi, effects)
         end
     else
-        cache = :global # cache edge targets by default
+        cache_mode = CACHE_MODE_GLOBAL # cache edge targets globally by default
     end
-    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_sysimg()
-        return EdgeCallResult(Any, nothing, Effects())
+    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_output(#=incremental=#false)
+        add_remark!(interp, caller, "Inference is disabled for the target module")
+        return EdgeCallResult(Any, Any, nothing, Effects())
     end
-    if !caller.cached && caller.parent === nothing
+    if !is_cached(caller) && frame_parent(caller) === nothing
         # this caller exists to return to the user
-        # (if we asked resolve_call_cyle, it might instead detect that there is a cycle that it can't merge)
+        # (if we asked resolve_call_cycle!, it might instead detect that there is a cycle that it can't merge)
         frame = false
     else
         frame = resolve_call_cycle!(interp, mi, caller)
@@ -901,142 +856,194 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
     if frame === false
         # completely new
         lock_mi_inference(interp, mi)
-        result = InferenceResult(mi)
-        frame = InferenceState(result, cache, interp) # always use the cache for edge targets
+        result = InferenceResult(mi, typeinf_lattice(interp))
+        frame = InferenceState(result, cache_mode, interp) # always use the cache for edge targets
         if frame === nothing
+            add_remark!(interp, caller, "Failed to retrieve source")
             # can't get the source for this, so we know nothing
             unlock_mi_inference(interp, mi)
-            return EdgeCallResult(Any, nothing, Effects())
+            return EdgeCallResult(Any, Any, nothing, Effects())
         end
-        if caller.cached || caller.parent !== nothing # don't involve uncached functions in cycle resolution
+        if is_cached(caller) || frame_parent(caller) !== nothing # don't involve uncached functions in cycle resolution
             frame.parent = caller
         end
         typeinf(interp, frame)
-        update_valid_age!(frame, caller)
-        edge = frame.inferred ? mi : nothing
-        return EdgeCallResult(frame.bestguess, edge, Effects(frame)) # effects are adjusted already within `finish`
+        update_valid_age!(caller, frame.valid_worlds)
+        isinferred = is_inferred(frame)
+        edge = isinferred ? mi : nothing
+        effects = isinferred ? frame.result.ipo_effects : adjust_effects(Effects(), method) # effects are adjusted already within `finish` for ipo_effects
+        exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
+        # propagate newly inferred source to the inliner, allowing efficient inlining w/o deserialization:
+        # note that this result is cached globally exclusively, we can use this local result destructively
+        volatile_inf_result = isinferred && let inferred_src = result.src
+                isa(inferred_src, CodeInfo) && (is_inlineable(inferred_src) || force_inline)
+            end ? VolatileInferenceResult(result) : nothing
+        return EdgeCallResult(frame.bestguess, exc_bestguess, edge, effects, volatile_inf_result)
     elseif frame === true
         # unresolvable cycle
-        return EdgeCallResult(Any, nothing, Effects())
+        return EdgeCallResult(Any, Any, nothing, Effects())
     end
     # return the current knowledge about this cycle
     frame = frame::InferenceState
-    update_valid_age!(frame, caller)
-    return EdgeCallResult(frame.bestguess, nothing, adjust_effects(frame))
+    update_valid_age!(caller, frame.valid_worlds)
+    effects = adjust_effects(Effects(), method)
+    exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
+    return EdgeCallResult(frame.bestguess, exc_bestguess, nothing, effects)
+end
+
+function cached_return_type(code::CodeInstance)
+    rettype = code.rettype
+    isdefined(code, :rettype_const) || return rettype
+    rettype_const = code.rettype_const
+    # the second subtyping/egal conditions are necessary to distinguish usual cases
+    # from rare cases when `Const` wrapped those extended lattice type objects
+    if isa(rettype_const, Vector{Any}) && !(Vector{Any} <: rettype)
+        return PartialStruct(rettype, rettype_const)
+    elseif isa(rettype_const, PartialOpaque) && rettype <: Core.OpaqueClosure
+        return rettype_const
+    elseif isa(rettype_const, InterConditional) && rettype !== InterConditional
+        return rettype_const
+    elseif isa(rettype_const, InterMustAlias) && rettype !== InterMustAlias
+        return rettype_const
+    else
+        return Const(rettype_const)
+    end
 end
 
 #### entry points for inferring a MethodInstance given a type signature ####
 
+function codeinfo_for_const(interp::AbstractInterpreter, mi::MethodInstance, worlds::WorldRange, @nospecialize(val))
+    method = mi.def::Method
+    tree = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
+    tree.code = Any[ ReturnNode(quoted(val)) ]
+    nargs = Int(method.nargs)
+    tree.slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), method.slot_syms)
+    tree.slotflags = fill(0x00, nargs)
+    tree.ssavaluetypes = 1
+    tree.codelocs = Int32[1]
+    tree.linetable = LineInfoNode[LineInfoNode(method.module, method.name, method.file, method.line, Int32(0))]
+    tree.ssaflags = UInt32[0]
+    set_inlineable!(tree, true)
+    tree.parent = mi
+    tree.rettype = Core.Typeof(val)
+    tree.min_world = worlds.min_world
+    tree.max_world = worlds.max_world
+    tree.inferred = true
+    return tree
+end
+
+result_is_constabi(interp::AbstractInterpreter, run_optimizer::Bool, result::InferenceResult) =
+    run_optimizer && may_discard_trees(interp) && is_result_constabi_eligible(result)
+
 # compute an inferred AST and return type
-function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
-    frame = typeinf_frame(interp, method, atype, sparams, run_optimizer)
+typeinf_code(interp::AbstractInterpreter, match::MethodMatch, run_optimizer::Bool) =
+    typeinf_code(interp, specialize_method(match), run_optimizer)
+typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector,
+             run_optimizer::Bool) =
+    typeinf_code(interp, specialize_method(method, atype, sparams), run_optimizer)
+function typeinf_code(interp::AbstractInterpreter, mi::MethodInstance, run_optimizer::Bool)
+    frame = typeinf_frame(interp, mi, run_optimizer)
     frame === nothing && return nothing, Any
-    frame.inferred || return nothing, Any
+    is_inferred(frame) || return nothing, Any
+    if result_is_constabi(interp, run_optimizer, frame.result)
+        rt = frame.result.result::Const
+        return codeinfo_for_const(interp, frame.linfo, frame.result.valid_worlds, rt.val), widenconst(rt)
+    end
     code = frame.src
     rt = widenconst(ignorelimited(frame.result.result))
     return code, rt
 end
 
 """
-    typeinf_ircode(
-        interp::AbstractInterpreter,
-        method::Method,
-        atype,
-        sparams::SimpleVector,
-        optimize_until::Union{Integer,AbstractString,Nothing},
-    ) -> (ir::Union{IRCode,Nothing}, returntype::Type)
+    typeinf_ircode(interp::AbstractInterpreter, match::MethodMatch,
+                   optimize_until::Union{Integer,AbstractString,Nothing}) -> (ir::Union{IRCode,Nothing}, returntype::Type)
+    typeinf_ircode(interp::AbstractInterpreter,
+                   method::Method, atype, sparams::SimpleVector,
+                   optimize_until::Union{Integer,AbstractString,Nothing}) -> (ir::Union{IRCode,Nothing}, returntype::Type)
+    typeinf_ircode(interp::AbstractInterpreter, mi::MethodInstance,
+                   optimize_until::Union{Integer,AbstractString,Nothing}) -> (ir::Union{IRCode,Nothing}, returntype::Type)
 
 Infer a `method` and return an `IRCode` with inferred `returntype` on success.
 """
-function typeinf_ircode(
-    interp::AbstractInterpreter,
-    method::Method,
-    @nospecialize(atype),
-    sparams::SimpleVector,
-    optimize_until::Union{Integer,AbstractString,Nothing},
-)
-    ccall(:jl_typeinf_begin, Cvoid, ())
-    frame = typeinf_frame(interp, method, atype, sparams, false)
+typeinf_ircode(interp::AbstractInterpreter, match::MethodMatch,
+               optimize_until::Union{Integer,AbstractString,Nothing}) =
+    typeinf_ircode(interp, specialize_method(match), optimize_until)
+typeinf_ircode(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector,
+               optimize_until::Union{Integer,AbstractString,Nothing}) =
+    typeinf_ircode(interp, specialize_method(method, atype, sparams), optimize_until)
+function typeinf_ircode(interp::AbstractInterpreter, mi::MethodInstance,
+                        optimize_until::Union{Integer,AbstractString,Nothing})
+    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
+    frame = typeinf_frame(interp, mi, false)
     if frame === nothing
-        ccall(:jl_typeinf_end, Cvoid, ())
+        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
         return nothing, Any
     end
     (; result) = frame
-    opt_params = OptimizationParams(interp)
-    opt = OptimizationState(frame, opt_params, interp)
-    ir = run_passes(opt.src, opt, result, optimize_until)
+    opt = OptimizationState(frame, interp)
+    ir = run_passes_ipo_safe(opt.src, opt, result, optimize_until)
     rt = widenconst(ignorelimited(result.result))
-    ccall(:jl_typeinf_end, Cvoid, ())
+    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
     return ir, rt
 end
 
 # compute an inferred frame
-function typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
-    mi = specialize_method(method, atype, sparams)::MethodInstance
-    ccall(:jl_typeinf_begin, Cvoid, ())
-    result = InferenceResult(mi)
-    frame = InferenceState(result, run_optimizer ? :global : :no, interp)
+typeinf_frame(interp::AbstractInterpreter, match::MethodMatch, run_optimizer::Bool) =
+    typeinf_frame(interp, specialize_method(match), run_optimizer)
+typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector,
+              run_optimizer::Bool) =
+    typeinf_frame(interp, specialize_method(method, atype, sparams), run_optimizer)
+function typeinf_frame(interp::AbstractInterpreter, mi::MethodInstance, run_optimizer::Bool)
+    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
+    result = InferenceResult(mi, typeinf_lattice(interp))
+    cache_mode = run_optimizer ? :global : :no
+    frame = InferenceState(result, cache_mode, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
-    ccall(:jl_typeinf_end, Cvoid, ())
+    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
     return frame
 end
 
 # compute (and cache) an inferred AST and return type
 function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
     method = mi.def::Method
-    for i = 1:2 # test-and-lock-and-test
-        i == 2 && ccall(:jl_typeinf_begin, Cvoid, ())
-        code = get(code_cache(interp), mi, nothing)
-        if code isa CodeInstance
-            # see if this code already exists in the cache
-            inf = code.inferred
-            if use_const_api(code)
-                i == 2 && ccall(:jl_typeinf_end, Cvoid, ())
-                tree = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
-                rettype_const = code.rettype_const
-                tree.code = Any[ ReturnNode(quoted(rettype_const)) ]
-                nargs = Int(method.nargs)
-                tree.slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), method.slot_syms)
-                tree.slotflags = fill(IR_FLAG_NULL, nargs)
-                tree.ssavaluetypes = 1
-                tree.codelocs = Int32[1]
-                tree.linetable = [LineInfoNode(method.module, method.name, method.file, method.line, Int32(0))]
-                tree.inferred = true
-                tree.ssaflags = UInt8[0]
-                tree.pure = true
-                tree.inlineable = true
-                tree.parent = mi
-                tree.rettype = Core.Typeof(rettype_const)
-                tree.min_world = code.min_world
-                tree.max_world = code.max_world
-                return tree
-            elseif isa(inf, CodeInfo)
-                i == 2 && ccall(:jl_typeinf_end, Cvoid, ())
-                if !(inf.min_world == code.min_world &&
-                     inf.max_world == code.max_world &&
-                     inf.rettype === code.rettype)
-                    inf = copy(inf)
-                    inf.min_world = code.min_world
-                    inf.max_world = code.max_world
-                    inf.rettype = code.rettype
-                end
-                return inf
-            elseif isa(inf, Vector{UInt8})
-                i == 2 && ccall(:jl_typeinf_end, Cvoid, ())
-                inf = _uncompressed_ir(code, inf)
-                return inf
+    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
+    code = get(code_cache(interp), mi, nothing)
+    if code isa CodeInstance
+        # see if this code already exists in the cache
+        inf = @atomic :monotonic code.inferred
+        if use_const_api(code)
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+            return codeinfo_for_const(interp, mi, WorldRange(code.min_world, code.max_world), code.rettype_const)
+        elseif isa(inf, CodeInfo)
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+            if !(inf.min_world == code.min_world &&
+                    inf.max_world == code.max_world &&
+                    inf.rettype === code.rettype)
+                inf = copy(inf)
+                inf.min_world = code.min_world
+                inf.max_world = code.max_world
+                inf.rettype = code.rettype
             end
+            return inf
+        elseif isa(inf, String)
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+            inf = _uncompressed_ir(code, inf)
+            return inf
         end
     end
-    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_sysimg()
-        return retrieve_code_info(mi)
+    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_output(#=incremental=#false)
+        return retrieve_code_info(mi, get_world_counter(interp))
     end
     lock_mi_inference(interp, mi)
-    frame = InferenceState(InferenceResult(mi), #=cache=#:global, interp)
+    result = InferenceResult(mi, typeinf_lattice(interp))
+    frame = InferenceState(result, #=cache_mode=#:global, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
-    ccall(:jl_typeinf_end, Cvoid, ())
+    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+    if result_is_constabi(interp, true, frame.result)
+        return codeinfo_for_const(interp, frame.linfo, frame.result.valid_worlds, frame.result.result.val)
+    end
     frame.src.inferred || return nothing
     return frame.src
 end
@@ -1046,42 +1053,44 @@ function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize
     if contains_is(unwrap_unionall(atype).parameters, Union{})
         return Union{} # don't ask: it does weird and unnecessary things, if it occurs during bootstrap
     end
-    mi = specialize_method(method, atype, sparams)::MethodInstance
-    for i = 1:2 # test-and-lock-and-test
-        i == 2 && ccall(:jl_typeinf_begin, Cvoid, ())
-        code = get(code_cache(interp), mi, nothing)
-        if code isa CodeInstance
-            # see if this rettype already exists in the cache
-            i == 2 && ccall(:jl_typeinf_end, Cvoid, ())
-            return code.rettype
-        end
+    return typeinf_type(interp, specialize_method(method, atype, sparams))
+end
+typeinf_type(interp::AbstractInterpreter, match::MethodMatch) =
+    typeinf_type(interp, specialize_method(match))
+function typeinf_type(interp::AbstractInterpreter, mi::MethodInstance)
+    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
+    code = get(code_cache(interp), mi, nothing)
+    if code isa CodeInstance
+        # see if this rettype already exists in the cache
+        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+        return code.rettype
     end
-    result = InferenceResult(mi)
+    result = InferenceResult(mi, typeinf_lattice(interp))
     typeinf(interp, result, :global)
-    ccall(:jl_typeinf_end, Cvoid, ())
-    result.result isa InferenceState && return nothing
+    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+    is_inferred(result) || return nothing
     return widenconst(ignorelimited(result.result))
 end
 
 # This is a bridge for the C code calling `jl_typeinf_func()`
 typeinf_ext_toplevel(mi::MethodInstance, world::UInt) = typeinf_ext_toplevel(NativeInterpreter(world), mi)
-function typeinf_ext_toplevel(interp::AbstractInterpreter, linfo::MethodInstance)
-    if isa(linfo.def, Method)
+function typeinf_ext_toplevel(interp::AbstractInterpreter, mi::MethodInstance)
+    if isa(mi.def, Method)
         # method lambda - infer this specialization via the method cache
-        src = typeinf_ext(interp, linfo)
+        src = typeinf_ext(interp, mi)
     else
-        src = linfo.uninferred::CodeInfo
+        src = mi.uninferred::CodeInfo
         if !src.inferred
             # toplevel lambda - infer directly
-            ccall(:jl_typeinf_begin, Cvoid, ())
+            start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
             if !src.inferred
-                result = InferenceResult(linfo)
-                frame = InferenceState(result, src, #=cache=#:global, interp)
+                result = InferenceResult(mi, typeinf_lattice(interp))
+                frame = InferenceState(result, src, #=cache_mode=#:global, interp)
                 typeinf(interp, frame)
-                @assert frame.inferred # TODO: deal with this better
+                @assert is_inferred(frame) # TODO: deal with this better
                 src = frame.src
             end
-            ccall(:jl_typeinf_end, Cvoid, ())
+            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
         end
     end
     return src
@@ -1117,8 +1126,7 @@ function _return_type(interp::AbstractInterpreter, t::DataType)
         rt = widenconst(rt)
     else
         for match in _methods_by_ftype(t, -1, get_world_counter(interp))::Vector
-            match = match::MethodMatch
-            ty = typeinf_type(interp, match.method, match.spec_types, match.sparams)
+            ty = typeinf_type(interp, match::MethodMatch)
             ty === nothing && return Any
             rt = tmerge(rt, ty)
             rt === Any && break
diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl
index ace54c1316c45..5987c30be2b91 100644
--- a/base/compiler/typelattice.jl
+++ b/base/compiler/typelattice.jl
@@ -4,7 +4,7 @@
 # structs/constants #
 #####################
 
-# N.B.: Const/PartialStruct are defined in Core, to allow them to be used
+# N.B.: Const/PartialStruct/InterConditional are defined in Core, to allow them to be used
 # inside the global code cache.
 #
 # # The type of a value might be constant
@@ -17,7 +17,7 @@
 #     fields::Vector{Any} # elements are other type lattice members
 # end
 import Core: Const, PartialStruct
-function PartialStruct(typ::DataType, fields::Vector{Any})
+function PartialStruct(@nospecialize(typ), fields::Vector{Any})
     for i = 1:length(fields)
         assert_nested_slotwrapper(fields[i])
     end
@@ -65,23 +65,88 @@ This is separate from `Conditional` to catch logic errors: the lattice element n
 while processing a call, then `Conditional` everywhere else. Thus `InterConditional` does not appear in
 `CompilerTypes`—these type's usages are disjoint—though we define the lattice for `InterConditional`.
 """
-struct InterConditional
-    slot::Int
-    thentype
-    elsetype
-    function InterConditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype))
-        assert_nested_slotwrapper(thentype)
-        assert_nested_slotwrapper(elsetype)
-        return new(slot, thentype, elsetype)
-    end
-end
+:(InterConditional)
+import Core: InterConditional
+# struct InterConditional
+#     slot::Int
+#     thentype
+#     elsetype
+#     InterConditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype)) =
+#         new(slot, thentype, elsetype)
+# end
 InterConditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype)) =
     InterConditional(slot_id(var), thentype, elsetype)
 
 const AnyConditional = Union{Conditional,InterConditional}
-Conditional(cnd::InterConditional) = Conditinal(cnd.slot, cnd.thentype, cnd.elsetype)
+Conditional(cnd::InterConditional) = Conditional(cnd.slot, cnd.thentype, cnd.elsetype)
 InterConditional(cnd::Conditional) = InterConditional(cnd.slot, cnd.thentype, cnd.elsetype)
 
+"""
+    alias::MustAlias
+
+This lattice element wraps a reference to object field while recoding the identity of the
+parent object. It allows certain constraints that can be imposed on the object field type
+by built-in functions like `isa` and `===` to be propagated to another reference to the
+same object field.
+One important note is that this lattice element assumes the invariant that the field of
+wrapped slot object never changes until the slot object is re-assigned. This means, the
+wrapped object field should be constant as inference currently doesn't track any memory
+effects on per-object basis. Particularly `maybe_const_fldidx` takes the lift to check if
+a given lattice element is eligible to be wrapped by `MustAlias`. Example:
+```juila
+let alias = getfield(x::Some{Union{Nothing,String}}, :value)::MustAlias(x, Some{Union{Nothing,String}}, 1, Union{Nothing,String})
+    if alias === nothing
+        # May assume `getfield(x, :value)` is `nothing` now
+    else
+        # May assume `getfield(x, :value)` is `::String` now
+    end
+end
+```
+N.B. currently this lattice element is only used in abstractinterpret, not in optimization
+"""
+struct MustAlias
+    slot::Int
+    vartyp::Any
+    fldidx::Int
+    fldtyp::Any
+    function MustAlias(slot::Int, @nospecialize(vartyp), fldidx::Int, @nospecialize(fldtyp))
+        assert_nested_slotwrapper(vartyp)
+        assert_nested_slotwrapper(fldtyp)
+        # @assert !isalreadyconst(vartyp) "vartyp is already const"
+        # @assert !isalreadyconst(fldtyp) "fldtyp is already const"
+        return new(slot, vartyp, fldidx, fldtyp)
+    end
+end
+MustAlias(var::SlotNumber, @nospecialize(vartyp), fldidx::Int, @nospecialize(fldtyp)) =
+    MustAlias(slot_id(var), vartyp, fldidx, fldtyp)
+
+_uniontypes(x::MustAlias, ts) = _uniontypes(widenconst(x), ts)
+
+"""
+    alias::InterMustAlias
+
+This lattice element used in a very similar way as `InterConditional`, but corresponds to `MustAlias`.
+"""
+struct InterMustAlias
+    slot::Int
+    vartyp::Any
+    fldidx::Int
+    fldtyp::Any
+    function InterMustAlias(slot::Int, @nospecialize(vartyp), fldidx::Int, @nospecialize(fldtyp))
+        assert_nested_slotwrapper(vartyp)
+        assert_nested_slotwrapper(fldtyp)
+        # @assert !isalreadyconst(vartyp) "vartyp is already const"
+        # @assert !isalreadyconst(fldtyp) "fldtyp is already const"
+        return new(slot, vartyp, fldidx, fldtyp)
+    end
+end
+InterMustAlias(var::SlotNumber, @nospecialize(vartyp), fldidx::Int, @nospecialize(fldtyp)) =
+    InterMustAlias(slot_id(var), vartyp, fldidx, fldtyp)
+
+const AnyMustAlias = Union{MustAlias,InterMustAlias}
+MustAlias(alias::InterMustAlias) = MustAlias(alias.slot, alias.vartyp, alias.fldidx, alias.fldtyp)
+InterMustAlias(alias::MustAlias) = InterMustAlias(alias.slot, alias.vartyp, alias.fldidx, alias.fldtyp)
+
 struct PartialTypeVar
     tv::TypeVar
     # N.B.: Currently unused, but would allow turning something back
@@ -91,14 +156,6 @@ struct PartialTypeVar
     PartialTypeVar(tv::TypeVar, lb_certain::Bool, ub_certain::Bool) = new(tv, lb_certain, ub_certain)
 end
 
-# Wraps a type and represents that the value may also be undef at this point.
-# (only used in optimize, not abstractinterpret)
-# N.B. in the lattice, this is epsilon bigger than `typ` (even Any)
-struct MaybeUndef
-    typ
-    MaybeUndef(@nospecialize(typ)) = new(typ)
-end
-
 struct StateUpdate
     var::SlotNumber
     vtype::VarState
@@ -106,9 +163,44 @@ struct StateUpdate
     conditional::Bool
 end
 
-# Represent that the type estimate has been approximated, due to "causes"
-# (only used in abstract interpretion, doesn't appear in optimization)
-# N.B. in the lattice, this is epsilon smaller than `typ` (except Union{})
+"""
+    struct LimitedAccuracy
+
+A `LimitedAccuracy` lattice element is used to indicate that the true inference
+result was approximate due to heuristic termination of a recursion. For example,
+consider two call stacks starting from `A` and `B` that look like:
+
+    A -> C -> A -> D
+    B -> C -> A -> D
+
+In the first case, inference may have decided that `A->C->A` constitutes a cycle,
+widening the result it obtained for `C`, even if it might otherwise have been
+able to obtain a result. In this case, the result inferred for `C` will be
+annotated with this lattice type to indicate that the obtained result is an
+upper bound for the non-limited inference. In particular, this means that the
+call stack originating at `B` will re-perform inference without being poisoned
+by the potentially inaccurate result obtained during the inference of `A`.
+
+N.B.: We do *not* take any efforts to ensure the reverse. For example, if `B`
+is inferred first, then we may cache a precise result for `C` and re-use this
+result while inferring `A`, even if inference of `A` would have not been able
+to obtain this result due to limiting. This is undesirable, because it makes
+some inference results order dependent, but there it is unclear how this situation
+could be avoided.
+
+A `LimitedAccuracy` element wraps another lattice element (let's call it `T`)
+and additionally tracks the `causes` due to which limitation occurred. As a
+lattice element, `LimitedAccuracy(T)` is considered ε smaller than the
+corresponding lattice element `T`, but in particular, all lattice elements that
+are `⊑ T` (but not equal `T`) are also `⊑ LimitedAccuracy(T)`.
+
+The `causes` list is used to determine whether a particular cause of limitation is
+inevitable and if so, widening `LimitedAccuracy(T)` back to `T`. For example,
+in the call stacks above, if any call to `A` always leads back to `A`, then
+it does not matter whether we start at `A` or reach it via `B`: Any inference
+that reaches `A` will always hit the same limitation and the result may thus
+be cached.
+"""
 struct LimitedAccuracy
     typ
     causes::IdSet{InferenceState}
@@ -117,12 +209,13 @@ struct LimitedAccuracy
         return new(typ, causes)
     end
 end
+LimitedAccuracy(@nospecialize(T), ::Nothing) = T
 
 """
     struct NotFound end
     const NOT_FOUND = NotFound()
 
-A special sigleton that represents a variable has not been analyzed yet.
+A special singleton that represents a variable has not been analyzed yet.
 Particularly, all SSA value types are initialized as `NOT_FOUND` when creating a new `InferenceState`.
 Note that this is only used for `smerge`, which updates abstract state `VarTable`,
 and thus we don't define the lattice for this.
@@ -131,7 +224,7 @@ struct NotFound end
 
 const NOT_FOUND = NotFound()
 
-const CompilerTypes = Union{MaybeUndef, Const, Conditional, NotFound, PartialStruct}
+const CompilerTypes = Union{Const, Conditional, MustAlias, NotFound, PartialStruct}
 ==(x::CompilerTypes, y::CompilerTypes) = x === y
 ==(x::Type, y::CompilerTypes) = false
 ==(x::CompilerTypes, y::Type) = false
@@ -143,21 +236,34 @@ const CompilerTypes = Union{MaybeUndef, Const, Conditional, NotFound, PartialStr
 # slot wrappers
 # =============
 
-function assert_nested_slotwrapper(@nospecialize t)
+@nospecializeinfer function assert_nested_slotwrapper(@nospecialize t)
     @assert !(t isa Conditional)      "found nested Conditional"
     @assert !(t isa InterConditional) "found nested InterConditional"
+    @assert !(t isa MustAlias)        "found nested MustAlias"
+    @assert !(t isa InterMustAlias)   "found nested InterMustAlias"
     return t
 end
 
-widenslotwrapper(@nospecialize typ) = typ
-widenslotwrapper(typ::AnyConditional) = widenconditional(typ)
-widenwrappedslotwrapper(@nospecialize typ) = widenslotwrapper(typ)
-widenwrappedslotwrapper(typ::LimitedAccuracy) = LimitedAccuracy(widenslotwrapper(typ.typ), typ.causes)
+@nospecializeinfer function widenslotwrapper(@nospecialize typ)
+    if isa(typ, AnyConditional)
+        return widenconditional(typ)
+    elseif isa(typ, AnyMustAlias)
+        return widenmustalias(typ)
+    end
+    return typ
+end
+
+@nospecializeinfer function widenwrappedslotwrapper(@nospecialize typ)
+    if isa(typ, LimitedAccuracy)
+        return LimitedAccuracy(widenslotwrapper(typ.typ), typ.causes)
+    end
+    return widenslotwrapper(typ)
+end
 
 # Conditional
 # ===========
 
-function widenconditional(@nospecialize typ)
+@nospecializeinfer function widenconditional(@nospecialize typ)
     if isa(typ, AnyConditional)
         if typ.thentype === Union{}
             return Const(false)
@@ -166,19 +272,24 @@ function widenconditional(@nospecialize typ)
         else
             return Bool
         end
+    elseif isa(typ, LimitedAccuracy)
+        error("unhandled LimitedAccuracy")
     end
     return typ
 end
-widenconditional(::LimitedAccuracy) = error("unhandled LimitedAccuracy")
-widenwrappedconditional(@nospecialize typ) = widenconditional(typ)
-widenwrappedconditional(typ::LimitedAccuracy) = LimitedAccuracy(widenconditional(typ.typ), typ.causes)
+@nospecializeinfer function widenwrappedconditional(@nospecialize typ)
+    if isa(typ, LimitedAccuracy)
+        return LimitedAccuracy(widenconditional(typ.typ), typ.causes)
+    end
+    return widenconditional(typ)
+end
 
 # `Conditional` and `InterConditional` are valid in opposite contexts
 # (i.e. local inference and inter-procedural call), as such they will never be compared
-function issubconditional(a::C, b::C) where {C<:AnyConditional}
+@nospecializeinfer function issubconditional(lattice::AbstractLattice, a::C, b::C) where {C<:AnyConditional}
     if is_same_conditionals(a, b)
-        if a.thentype ⊑ b.thentype
-            if a.elsetype ⊑ b.elsetype
+        if ⊑(lattice, a.thentype, b.thentype)
+            if ⊑(lattice, a.elsetype, b.elsetype)
                 return true
             end
         end
@@ -188,7 +299,7 @@ end
 
 is_same_conditionals(a::C, b::C) where C<:AnyConditional = a.slot == b.slot
 
-is_lattice_bool(@nospecialize(typ)) = typ !== Bottom && typ ⊑ Bool
+@nospecializeinfer is_lattice_bool(lattice::AbstractLattice, @nospecialize(typ)) = typ !== Bottom && ⊑(lattice, typ, Bool)
 
 maybe_extract_const_bool(c::Const) = (val = c.val; isa(val, Bool)) ? val : nothing
 function maybe_extract_const_bool(c::AnyConditional)
@@ -196,7 +307,82 @@ function maybe_extract_const_bool(c::AnyConditional)
     (c.elsetype === Bottom && !(c.thentype === Bottom)) && return true
     nothing
 end
-maybe_extract_const_bool(@nospecialize c) = nothing
+@nospecializeinfer maybe_extract_const_bool(@nospecialize c) = nothing
+
+# MustAlias
+# =========
+
+@nospecializeinfer function widenmustalias(@nospecialize typ)
+    if isa(typ, AnyMustAlias)
+        return typ.fldtyp
+    elseif isa(typ, LimitedAccuracy)
+        error("unhandled LimitedAccuracy")
+    end
+    return typ
+end
+
+@nospecializeinfer function isalreadyconst(@nospecialize t)
+    isa(t, Const) && return true
+    issingletontype(t) && return true
+    return isconstType(t)
+end
+
+@nospecializeinfer function maybe_const_fldidx(@nospecialize(objtyp), @nospecialize(fldval))
+    t = widenconst(objtyp)
+    if isa(fldval, Int)
+        fldidx = fldval
+    elseif isa(fldval, Symbol)
+        isa(t, DataType) || isa(t, UnionAll) || return nothing
+        fldidx = fieldindex(t, fldval, false)
+    else
+        return nothing
+    end
+    fldidx == 0 && return nothing
+    isconst(t, fldidx) || return nothing
+    fldcnt = fieldcount_noerror(t)
+    (fldcnt === nothing || fldcnt == 0) && return nothing
+    return fldidx
+end
+
+@nospecializeinfer function form_mustalias_conditional(alias::MustAlias, @nospecialize(thentype), @nospecialize(elsetype))
+    (; slot, vartyp, fldidx) = alias
+    if isa(vartyp, PartialStruct)
+        fields = vartyp.fields
+        thenfields = thentype === Bottom ? nothing : copy(fields)
+        elsefields = elsetype === Bottom ? nothing : copy(fields)
+        for i in 1:length(fields)
+            if i == fldidx
+                thenfields === nothing || (thenfields[i] = thentype)
+                elsefields === nothing || (elsefields[i] = elsetype)
+            end
+        end
+        return Conditional(slot,
+            thenfields === nothing ? Bottom : PartialStruct(vartyp.typ, thenfields),
+            elsefields === nothing ? Bottom : PartialStruct(vartyp.typ, elsefields))
+    else
+        vartyp_widened = widenconst(vartyp)
+        thenfields = thentype === Bottom ? nothing : Any[]
+        elsefields = elsetype === Bottom ? nothing : Any[]
+        for i in 1:fieldcount(vartyp_widened)
+            if i == fldidx
+                thenfields === nothing || push!(thenfields, thentype)
+                elsefields === nothing || push!(elsefields, elsetype)
+            else
+                t = fieldtype(vartyp_widened, i)
+                thenfields === nothing || push!(thenfields, t)
+                elsefields === nothing || push!(elsefields, t)
+            end
+        end
+        return Conditional(slot,
+            thenfields === nothing ? Bottom : PartialStruct(vartyp_widened, thenfields),
+            elsefields === nothing ? Bottom : PartialStruct(vartyp_widened, elsefields))
+    end
+end
+
+function issubalias(a::AnyMustAlias, b::AnyMustAlias)
+    return a.slot == b.slot && a.fldidx == b.fldidx &&
+        a.vartyp ⊑ b.vartyp && a.fldtyp ⊑ b.fldtyp
+end
 
 # LimitedAccuracy
 # ===============
@@ -207,68 +393,103 @@ ignorelimited(typ::LimitedAccuracy) = typ.typ
 # lattice order
 # =============
 
-"""
-    a ⊑ b -> Bool
+@nospecializeinfer function ⊑(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b))
+    r = ⊑(widenlattice(lattice), ignorelimited(a), ignorelimited(b))
+    r || return false
+    isa(b, LimitedAccuracy) || return true
 
-The non-strict partial order over the type inference lattice.
-"""
-@nospecialize(a) ⊑ @nospecialize(b) = begin
-    if isa(b, LimitedAccuracy)
-        if !isa(a, LimitedAccuracy)
-            return false
-        end
-        if b.causes ⊈ a.causes
-            return false
-        end
-        b = b.typ
-    end
-    isa(a, LimitedAccuracy) && (a = a.typ)
-    if isa(a, MaybeUndef) && !isa(b, MaybeUndef)
-        return false
+    # We've found that ignorelimited(a) ⊑ ignorelimited(b).
+    # Now perform the reverse query to check for equality.
+    ab_eq = ⊑(widenlattice(lattice), b.typ, ignorelimited(a))
+
+    if !ab_eq
+        # a's unlimited type is strictly smaller than b's
+        return true
     end
-    isa(a, MaybeUndef) && (a = a.typ)
-    isa(b, MaybeUndef) && (b = b.typ)
+
+    # a and b's unlimited types are equal.
+    isa(a, LimitedAccuracy) || return false # b is limited, so ε smaller
+    return b.causes ⊆ a.causes
+end
+
+@nospecializeinfer function ⊑(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b))
+    # Fast paths for common cases
     b === Any && return true
     a === Any && return false
     a === Union{} && return true
     b === Union{} && return false
-    @assert !isa(a, TypeVar) "invalid lattice item"
-    @assert !isa(b, TypeVar) "invalid lattice item"
-    if isa(a, AnyConditional)
-        if isa(b, AnyConditional)
-            return issubconditional(a, b)
+    ConditionalT = isa(lattice, ConditionalsLattice) ? Conditional : InterConditional
+    if isa(a, ConditionalT)
+        if isa(b, ConditionalT)
+            return issubconditional(lattice, a, b)
         elseif isa(b, Const) && isa(b.val, Bool)
             return maybe_extract_const_bool(a) === b.val
         end
         a = Bool
-    elseif isa(b, AnyConditional)
+    elseif isa(b, ConditionalT)
         return false
     end
+    return ⊑(widenlattice(lattice), a, b)
+end
+
+@nospecializeinfer function ⊑(𝕃::AnyMustAliasesLattice, @nospecialize(a), @nospecialize(b))
+    MustAliasT = isa(𝕃, MustAliasesLattice) ? MustAlias : InterMustAlias
+    if isa(a, MustAliasT)
+        if isa(b, MustAliasT)
+            return issubalias(a, b)
+        end
+        a = widenmustalias(a)
+    elseif isa(b, MustAliasT)
+        return ⊏(widenlattice(𝕃), a, widenmustalias(b))
+    end
+    return ⊑(widenlattice(𝕃), a, b)
+end
+
+@nospecializeinfer function ⊑(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b))
     if isa(a, PartialStruct)
         if isa(b, PartialStruct)
             if !(length(a.fields) == length(b.fields) && a.typ <: b.typ)
                 return false
             end
             for i in 1:length(b.fields)
-                # XXX: let's handle varargs later
-                ⊑(a.fields[i], b.fields[i]) || return false
+                af = a.fields[i]
+                bf = b.fields[i]
+                if i == length(b.fields)
+                    if isvarargtype(af)
+                        # If `af` is vararg, so must bf by the <: above
+                        @assert isvarargtype(bf)
+                        continue
+                    elseif isvarargtype(bf)
+                        # If `bf` is vararg, it must match the information
+                        # in the type, so there's nothing to check here.
+                        continue
+                    end
+                end
+                ⊑(lattice, af, bf) || return false
             end
             return true
         end
         return isa(b, Type) && a.typ <: b
     elseif isa(b, PartialStruct)
         if isa(a, Const)
-            nfields(a.val) == length(b.fields) || return false
-            widenconst(b).name === widenconst(a).name || return false
+            nf = nfields(a.val)
+            nf == length(b.fields) || return false
+            widea = widenconst(a)::DataType
+            wideb = widenconst(b)
+            wideb′ = unwrap_unionall(wideb)::DataType
+            widea.name === wideb′.name || return false
             # We can skip the subtype check if b is a Tuple, since in that
             # case, the ⊑ of the elements is sufficient.
-            if b.typ.name !== Tuple.name && !(widenconst(a) <: widenconst(b))
+            if wideb′.name !== Tuple.name && !(widea <: wideb)
                 return false
             end
-            for i in 1:nfields(a.val)
-                # XXX: let's handle varargs later
+            for i in 1:nf
                 isdefined(a.val, i) || continue # since ∀ T Union{} ⊑ T
-                ⊑(Const(getfield(a.val, i)), b.fields[i]) || return false
+                bfᵢ = b.fields[i]
+                if i == nf
+                    bfᵢ = unwrapva(bfᵢ)
+                end
+                ⊑(lattice, Const(getfield(a.val, i)), bfᵢ) || return false
             end
             return true
         end
@@ -278,10 +499,16 @@ The non-strict partial order over the type inference lattice.
         if isa(b, PartialOpaque)
             (a.parent === b.parent && a.source === b.source) || return false
             return (widenconst(a) <: widenconst(b)) &&
-                ⊑(a.env, b.env)
+                ⊑(lattice, a.env, b.env)
         end
-        return widenconst(a) ⊑ b
+        return ⊑(widenlattice(lattice), widenconst(a), b)
+    elseif isa(b, PartialOpaque)
+        return false
     end
+    return ⊑(widenlattice(lattice), a, b)
+end
+
+@nospecializeinfer function ⊑(lattice::ConstsLattice, @nospecialize(a), @nospecialize(b))
     if isa(a, Const)
         if isa(b, Const)
             return a.val === b.val
@@ -291,112 +518,112 @@ The non-strict partial order over the type inference lattice.
         # most conservative option.
         return isa(b, Type) && isa(a.val, b)
     elseif isa(b, Const)
-        if isa(a, DataType) && isdefined(a, :instance)
+        if issingletontype(a)
             return a.instance === b.val
         end
         return false
-    elseif isa(a, PartialTypeVar) && b === TypeVar
-        return true
-    elseif isa(a, Type) && isa(b, Type)
-        return a <: b
-    else # handle this conservatively in the remaining cases
-        return a === b
+    elseif isa(a, PartialTypeVar)
+        return b === TypeVar || a === b
+    elseif isa(b, PartialTypeVar)
+        return false
     end
+    return ⊑(widenlattice(lattice), a, b)
 end
 
-"""
-    a ⊏ b -> Bool
-
-The strict partial order over the type inference lattice.
-This is defined as the irreflexive kernel of `⊑`.
-"""
-@nospecialize(a) ⊏ @nospecialize(b) = a ⊑ b && !⊑(b, a)
-
-"""
-    a ⋤ b -> Bool
-
-This order could be used as a slightly more efficient version of the strict order `⊏`,
-where we can safely assume `a ⊑ b` holds.
-"""
-@nospecialize(a) ⋤ @nospecialize(b) = !⊑(b, a)
+@nospecializeinfer function is_lattice_equal(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b))
+    if isa(a, LimitedAccuracy)
+        isa(b, LimitedAccuracy) || return false
+        a.causes == b.causes || return false
+        a = a.typ
+        b = b.typ
+    elseif isa(b, LimitedAccuracy)
+        return false
+    end
+    return is_lattice_equal(widenlattice(lattice), a, b)
+end
 
-"""
-    is_lattice_equal(a, b) -> Bool
+@nospecializeinfer function is_lattice_equal(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b))
+    ConditionalT = isa(lattice, ConditionalsLattice) ? Conditional : InterConditional
+    if isa(a, ConditionalT) || isa(b, ConditionalT)
+        # TODO: Unwrap these and recurse to is_lattice_equal
+        return ⊑(lattice, a, b) && ⊑(lattice, b, a)
+    end
+    return is_lattice_equal(widenlattice(lattice), a, b)
+end
 
-Check if two lattice elements are partial order equivalent.
-This is basically `a ⊑ b && b ⊑ a` but with extra performance optimizations.
-"""
-function is_lattice_equal(@nospecialize(a), @nospecialize(b))
-    a === b && return true
+@nospecializeinfer function is_lattice_equal(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b))
     if isa(a, PartialStruct)
         isa(b, PartialStruct) || return false
         length(a.fields) == length(b.fields) || return false
         widenconst(a) == widenconst(b) || return false
+        a.fields === b.fields && return true # fast path
         for i in 1:length(a.fields)
-            is_lattice_equal(a.fields[i], b.fields[i]) || return false
+            is_lattice_equal(lattice, a.fields[i], b.fields[i]) || return false
         end
         return true
     end
     isa(b, PartialStruct) && return false
+    if isa(a, PartialOpaque)
+        isa(b, PartialOpaque) || return false
+        widenconst(a) == widenconst(b) || return false
+        a.source === b.source || return false
+        a.parent === b.parent || return false
+        return is_lattice_equal(lattice, a.env, b.env)
+    end
+    isa(b, PartialOpaque) && return false
+    return is_lattice_equal(widenlattice(lattice), a, b)
+end
+
+@nospecializeinfer function is_lattice_equal(lattice::ConstsLattice, @nospecialize(a), @nospecialize(b))
+    a === b && return true
     if a isa Const
         if issingletontype(b)
             return a.val === b.instance
         end
+        # N.B. Assumes a === b checked above
         return false
     end
     if b isa Const
         if issingletontype(a)
             return a.instance === b.val
         end
+        # N.B. Assumes a === b checked above
         return false
     end
-    if isa(a, PartialOpaque)
-        isa(b, PartialOpaque) || return false
-        widenconst(a) == widenconst(b) || return false
-        a.source === b.source || return false
-        a.parent === b.parent || return false
-        return is_lattice_equal(a.env, b.env)
+    if isa(a, PartialTypeVar) || isa(b, PartialTypeVar)
+        return false
     end
-    return a ⊑ b && b ⊑ a
+    return is_lattice_equal(widenlattice(lattice), a, b)
 end
 
 # lattice operations
 # ==================
 
-"""
-    tmeet(v, t::Type) -> x
-
-Computes typeintersect over the extended inference lattice, as precisely as we can,
-where `v` is in the extended lattice, and `t` is a `Type`.
-"""
-function tmeet(@nospecialize(v), @nospecialize(t::Type))
-    if isa(v, Const)
-        if !has_free_typevars(t) && !isa(v.val, t)
-            return Bottom
-        end
-        return v
-    elseif isa(v, PartialStruct)
+@nospecializeinfer function tmeet(lattice::PartialsLattice, @nospecialize(v), @nospecialize(t::Type))
+    if isa(v, PartialStruct)
         has_free_typevars(t) && return v
         widev = widenconst(v)
-        if widev <: t
+        ti = typeintersect(widev, t)
+        if ti === widev
             return v
         end
-        ti = typeintersect(widev, t)
-        valid_as_lattice(ti) || return Bottom
-        @assert widev <: Tuple
-        new_fields = Vector{Any}(undef, length(v.fields))
-        for i = 1:length(new_fields)
-            vfi = v.fields[i]
-            if isvarargtype(vfi)
-                new_fields[i] = vfi
-            else
-                new_fields[i] = tmeet(vfi, widenconst(getfield_tfunc(t, Const(i))))
-                if new_fields[i] === Bottom
-                    return Bottom
+        valid_as_lattice(ti, true) || return Bottom
+        if widev <: Tuple
+            new_fields = Vector{Any}(undef, length(v.fields))
+            for i = 1:length(new_fields)
+                vfi = v.fields[i]
+                if isvarargtype(vfi)
+                    new_fields[i] = vfi
+                else
+                    nfi = new_fields[i] = tmeet(lattice, vfi, widenconst(getfield_tfunc(lattice, t, Const(i))))
+                    if nfi === Bottom
+                        return Bottom
+                    end
                 end
             end
+            return tuple_tfunc(lattice, new_fields)
         end
-        return tuple_tfunc(new_fields)
+        v = widev
     elseif isa(v, PartialOpaque)
         has_free_typevars(t) && return v
         widev = widenconst(v)
@@ -404,17 +631,56 @@ function tmeet(@nospecialize(v), @nospecialize(t::Type))
             return v
         end
         ti = typeintersect(widev, t)
-        valid_as_lattice(ti) || return Bottom
+        valid_as_lattice(ti, true) || return Bottom
         return PartialOpaque(ti, v.env, v.parent, v.source)
-    elseif isa(v, Conditional)
+    end
+    return tmeet(widenlattice(lattice), v, t)
+end
+
+@nospecializeinfer function tmeet(lattice::ConstsLattice, @nospecialize(v), @nospecialize(t::Type))
+    if isa(v, Const)
+        if !has_free_typevars(t) && !isa(v.val, t)
+            return Bottom
+        end
+        return v
+    end
+    tmeet(widenlattice(lattice), widenconst(v), t)
+end
+
+@nospecializeinfer function tmeet(lattice::ConditionalsLattice, @nospecialize(v), @nospecialize(t::Type))
+    if isa(v, Conditional)
         if !(Bool <: t)
             return Bottom
         end
         return v
     end
-    ti = typeintersect(widenconst(v), t)
-    valid_as_lattice(ti) || return Bottom
-    return ti
+    tmeet(widenlattice(lattice), v, t)
+end
+
+@nospecializeinfer function tmeet(𝕃::MustAliasesLattice, @nospecialize(v), @nospecialize(t::Type))
+    if isa(v, MustAlias)
+        v = widenmustalias(v)
+    end
+    return tmeet(widenlattice(𝕃), v, t)
+end
+
+@nospecializeinfer function tmeet(lattice::InferenceLattice, @nospecialize(v), @nospecialize(t::Type))
+    # TODO: This can probably happen and should be handled
+    @assert !isa(v, LimitedAccuracy)
+    tmeet(widenlattice(lattice), v, t)
+end
+
+@nospecializeinfer function tmeet(lattice::InterConditionalsLattice, @nospecialize(v), @nospecialize(t::Type))
+    # TODO: This can probably happen and should be handled
+    @assert !isa(v, AnyConditional)
+    tmeet(widenlattice(lattice), v, t)
+end
+
+@nospecializeinfer function tmeet(𝕃::InterMustAliasesLattice, @nospecialize(v), @nospecialize(t::Type))
+    if isa(v, InterMustAlias)
+        v = widenmustalias(v)
+    end
+    return tmeet(widenlattice(𝕃), v, t)
 end
 
 """
@@ -423,12 +689,12 @@ end
 Widens extended lattice element `x` to native `Type` representation.
 """
 widenconst(::AnyConditional) = Bool
+widenconst(a::AnyMustAlias) = widenconst(widenmustalias(a))
 widenconst(c::Const) = (v = c.val; isa(v, Type) ? Type{v} : typeof(v))
-widenconst(m::MaybeUndef) = widenconst(m.typ)
 widenconst(::PartialTypeVar) = TypeVar
 widenconst(t::PartialStruct) = t.typ
 widenconst(t::PartialOpaque) = t.typ
-widenconst(t::Type) = t
+@nospecializeinfer widenconst(@nospecialize t::Type) = t
 widenconst(::TypeVar) = error("unhandled TypeVar")
 widenconst(::TypeofVararg) = error("unhandled Vararg")
 widenconst(::LimitedAccuracy) = error("unhandled LimitedAccuracy")
@@ -437,31 +703,28 @@ widenconst(::LimitedAccuracy) = error("unhandled LimitedAccuracy")
 # state management #
 ####################
 
-issubstate(a::VarState, b::VarState) = (a.typ ⊑ b.typ && a.undef <= b.undef)
-
-function smerge(sa::Union{NotFound,VarState}, sb::Union{NotFound,VarState})
+function smerge(lattice::AbstractLattice, sa::Union{NotFound,VarState}, sb::Union{NotFound,VarState})
     sa === sb && return sa
     sa === NOT_FOUND && return sb
     sb === NOT_FOUND && return sa
-    issubstate(sa, sb) && return sb
-    issubstate(sb, sa) && return sa
-    return VarState(tmerge(sa.typ, sb.typ), sa.undef | sb.undef)
+    return VarState(tmerge(lattice, sa.typ, sb.typ), sa.undef | sb.undef)
 end
 
-@inline tchanged(@nospecialize(n), @nospecialize(o)) = o === NOT_FOUND || (n !== NOT_FOUND && !(n ⊑ o))
-@inline schanged(@nospecialize(n), @nospecialize(o)) = (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !issubstate(n::VarState, o::VarState)))
+@nospecializeinfer @inline schanged(lattice::AbstractLattice, @nospecialize(n), @nospecialize(o)) =
+    (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !(n.undef <= o.undef && ⊑(lattice, n.typ, o.typ))))
 
 # remove any lattice elements that wrap the reassigned slot object from the vartable
 function invalidate_slotwrapper(vt::VarState, changeid::Int, ignore_conditional::Bool)
     newtyp = ignorelimited(vt.typ)
-    if (!ignore_conditional && isa(newtyp, Conditional) && newtyp.slot == changeid)
-        newtyp = widenwrappedslotwrapper(vt.typ)
+    if (!ignore_conditional && isa(newtyp, Conditional) && newtyp.slot == changeid) ||
+       (isa(newtyp, MustAlias) && newtyp.slot == changeid)
+        newtyp = @noinline widenwrappedslotwrapper(vt.typ)
         return VarState(newtyp, vt.undef)
     end
     return nothing
 end
 
-function stupdate!(state::VarTable, changes::StateUpdate)
+function stupdate!(lattice::AbstractLattice, state::VarTable, changes::StateUpdate)
     changed = false
     changeid = slot_id(changes.var)
     for i = 1:length(state)
@@ -475,45 +738,27 @@ function stupdate!(state::VarTable, changes::StateUpdate)
             newtype = invalidated
         end
         oldtype = state[i]
-        if schanged(newtype, oldtype)
-            state[i] = smerge(oldtype, newtype)
+        if schanged(lattice, newtype, oldtype)
+            state[i] = smerge(lattice, oldtype, newtype)
             changed = true
         end
     end
     return changed
 end
 
-function stupdate!(state::VarTable, changes::VarTable)
+function stupdate!(lattice::AbstractLattice, state::VarTable, changes::VarTable)
     changed = false
     for i = 1:length(state)
         newtype = changes[i]
         oldtype = state[i]
-        if schanged(newtype, oldtype)
-            state[i] = smerge(oldtype, newtype)
+        if schanged(lattice, newtype, oldtype)
+            state[i] = smerge(lattice, oldtype, newtype)
             changed = true
         end
     end
     return changed
 end
 
-function stupdate1!(state::VarTable, change::StateUpdate)
-    changeid = slot_id(change.var)
-    for i = 1:length(state)
-        invalidated = invalidate_slotwrapper(state[i], changeid, change.conditional)
-        if invalidated !== nothing
-            state[i] = invalidated
-        end
-    end
-    # and update the type of it
-    newtype = change.vtype
-    oldtype = state[changeid]
-    if schanged(newtype, oldtype)
-        state[changeid] = smerge(oldtype, newtype)
-        return true
-    end
-    return false
-end
-
 function stoverwrite!(state::VarTable, newstate::VarTable)
     for i = 1:length(state)
         state[i] = newstate[i]
diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl
index d44619fa508df..55502bfb75d2b 100644
--- a/base/compiler/typelimits.jl
+++ b/base/compiler/typelimits.jl
@@ -6,7 +6,6 @@
 
 const MAX_TYPEUNION_COMPLEXITY = 3
 const MAX_TYPEUNION_LENGTH = 3
-const MAX_INLINE_CONST_SIZE = 256
 
 #########################
 # limitation heuristics #
@@ -36,6 +35,12 @@ end
 # try to find `type` somewhere in `comparison` type
 # at a minimum nesting depth of `mindepth`
 function is_derived_type(@nospecialize(t), @nospecialize(c), mindepth::Int)
+    if has_free_typevars(t) || has_free_typevars(c)
+        # Don't allow finding types with free typevars. These strongly depend
+        # on identity and we do not make any effort to make sure this returns
+        # sensible results in that case.
+        return false
+    end
     if t === c
         return mindepth <= 1
     end
@@ -88,10 +93,7 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
         return t # fast path: unparameterized are always simple
     else
         ut = unwrap_unionall(t)
-        if isa(ut, DataType) && isa(c, Type) && c !== Union{} && c <: t
-            # TODO: need to check that the UnionAll bounds on t are limited enough too
-            return t # t is already wider than the comparison in the type lattice
-        elseif is_derived_type_from_any(ut, sources, depth)
+        if is_derived_type_from_any(ut, sources, depth)
             return t # t isn't something new
         end
     end
@@ -114,15 +116,31 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
             return Union{a, b}
         end
     elseif isa(t, DataType)
-        if isType(t) # see equivalent case in type_more_complex
-            tt = unwrap_unionall(t.parameters[1])
-            if isa(tt, Union) || isa(tt, TypeVar) || isType(tt)
-                is_derived_type_from_any(tt, sources, depth + 1) && return t
+        if isType(t)
+            # Type is fairly important, so do not widen it as fast as other types if avoidable
+            tt = t.parameters[1]
+            ttu = unwrap_unionall(tt) # TODO: use argument_datatype(tt) after #50692 fixed
+            # must forbid nesting through this if we detect that potentially occurring
+            # we already know !is_derived_type_from_any so refuse to recurse here
+            if !isa(ttu, DataType)
+                return Type
+            elseif isType(ttu)
+                return Type{<:Type}
+            end
+            # try to peek into c to get a comparison object, but if we can't perhaps t is already simple enough on its own
+            if isType(c)
+                ct = c.parameters[1]
             else
-                isType(c) && (c = unwrap_unionall(c.parameters[1]))
-                type_more_complex(tt, c, sources, depth, 0, 0) || return t
+                ct = Union{}
             end
-            return Type
+            Qt = __limit_type_size(tt, ct, sources, depth + 1, 0)
+            Qt === tt && return t
+            Qt === Any && return Type
+            # Can't form Type{<:Qt} just yet, without first make sure we limited the depth
+            # enough, since this moves Qt outside of Type for is_derived_type_from_any
+            Qt = __limit_type_size(tt, ct, sources, depth + 2, 0)
+            Qt === Any && return Type
+            return Type{<:Qt}
         elseif isa(c, DataType)
             tP = t.parameters
             cP = c.parameters
@@ -136,7 +154,7 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
                     Q = Any[ tP[i] for i in 1:np ]
                     if ltP > np
                         # combine tp[np:end] into tP[np] using Vararg
-                        Q[np] = tuple_tail_elem(Bottom, Any[ tP[i] for i in np:ltP ])
+                        Q[np] = tuple_tail_elem(fallback_lattice, Bottom, Any[ tP[i] for i in np:ltP ])
                     end
                     for i = 1:np
                         # now apply limit element-wise to Q
@@ -155,10 +173,11 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
             end
         end
         if allowed_tuplelen < 1 && t.name === Tuple.name
+            # forbid nesting Tuple{Tuple{Tuple...}} through this
             return Any
         end
         widert = t.name.wrapper
-        if !(t <: widert)
+        if !(t <: widert) # XXX: we should call has_free_typevars(t) here, but usually t does not have those wrappers by the time it got here
             # This can happen when a typevar has bounds too wide for its context, e.g.
             # `Complex{T} where T` is not a subtype of `Complex`. In that case widen even
             # faster to something safe to ensure the result is a supertype of the input.
@@ -209,26 +228,25 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
         return false # Bottom is as simple as they come
     elseif isa(t, DataType) && isempty(t.parameters)
         return false # fastpath: unparameterized types are always finite
-    elseif tupledepth > 0 && isa(unwrap_unionall(t), DataType) && isa(c, Type) && c !== Union{} && c <: t
-        # TODO: need to check that the UnionAll bounds on t are limited enough too
-        return false # t is already wider than the comparison in the type lattice
-    elseif tupledepth > 0 && is_derived_type_from_any(unwrap_unionall(t), sources, depth)
+    elseif is_derived_type_from_any(unwrap_unionall(t), sources, depth)
         return false # t isn't something new
     end
     # peel off wrappers
     isvarargtype(t) && (t = unwrapva(t))
     isvarargtype(c) && (c = unwrapva(c))
     if isa(c, UnionAll)
-        # allow wrapping type with fewer UnionAlls than comparison if in a covariant context
+        # allow wrapping type with fewer UnionAlls than comparison only if in a covariant context
         if !isa(t, UnionAll) && tupledepth == 0
             return true
         end
-        t = unwrap_unionall(t)
         c = unwrap_unionall(c)
     end
+    if isa(t, UnionAll)
+        t = unwrap_unionall(t)
+    end
     # rules for various comparison types
     if isa(c, TypeVar)
-        tupledepth = 1 # allow replacing a TypeVar with a concrete value (since we know the UnionAll must be in covariant position)
+        tupledepth = 1
         if isa(t, TypeVar)
             return !(t.lb === Union{} || t.lb === c.lb) || # simplify lb towards Union{}
                    type_more_complex(t.ub, c.ub, sources, depth + 1, tupledepth, 0)
@@ -249,20 +267,27 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
     if isa(t, DataType)
         tP = t.parameters
         if isType(t)
-            # Treat Type{T} and T as equivalent to allow taking typeof any
-            # source type (DataType) anywhere as Type{...}, as long as it isn't
-            # nesting as Type{Type{...}}
-            tt = unwrap_unionall(t.parameters[1])
-            if isa(tt, Union) || isa(tt, TypeVar) || isType(tt)
-                return !is_derived_type_from_any(tt, sources, depth + 1)
+            # Type is fairly important, so do not widen it as fast as other types if avoidable
+            tt = tP[1]
+            ttu = unwrap_unionall(tt) # TODO: use argument_datatype(tt) after #50692 fixed
+            if isType(c)
+                ct = c.parameters[1]
+            else
+                ct = Union{}
+                tupledepth == 0 && return true # cannot allow nesting
+            end
+            # allow creating variation within a nested Type, but not very deep
+            if tupledepth > 1
+                tupledepth = 1
             else
-                isType(c) && (c = unwrap_unionall(c.parameters[1]))
-                return type_more_complex(tt, c, sources, depth, 0, 0)
+                tupledepth = 0
             end
+            return type_more_complex(tt, ct, sources, depth + 1, tupledepth, 0)
         elseif isa(c, DataType) && t.name === c.name
             cP = c.parameters
             length(cP) < length(tP) && return true
-            length(cP) > length(tP) && !isvarargtype(tP[end]) && depth == 1 && return false
+            isempty(tP) && return false
+            length(cP) > length(tP) && !isvarargtype(tP[end]) && depth == 1 && return false # is this line necessary?
             ntail = length(cP) - length(tP) # assume parameters were dropped from the tuple head
             # allow creating variation within a nested tuple, but only so deep
             if t.name === Tuple.name && tupledepth > 0
@@ -270,22 +295,9 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
             else
                 tupledepth = 0
             end
-            isgenerator = (t.name.name === :Generator && t.name.module === _topmod(t.name.module))
             for i = 1:length(tP)
                 tPi = tP[i]
                 cPi = cP[i + ntail]
-                if isgenerator
-                    let tPi = unwrap_unionall(tPi),
-                        cPi = unwrap_unionall(cPi)
-                        if isa(tPi, DataType) && isa(cPi, DataType) &&
-                            !isabstracttype(tPi) && !isabstracttype(cPi) &&
-                                sym_isless(cPi.name.name, tPi.name.name)
-                            # allow collect on (anonymous) Generators to nest, provided that their functions are appropriately ordered
-                            # TODO: is there a better way?
-                            continue
-                        end
-                    end
-                end
                 type_more_complex(tPi, cPi, sources, depth + 1, tupledepth, 0) && return true
             end
             return false
@@ -298,31 +310,35 @@ union_count_abstract(x::Union) = union_count_abstract(x.a) + union_count_abstrac
 union_count_abstract(@nospecialize(x)) = !isdispatchelem(x)
 
 function issimpleenoughtype(@nospecialize t)
+    ut = unwrap_unionall(t)
+    ut isa DataType && ut.name.wrapper == t && return true
     return unionlen(t) + union_count_abstract(t) <= MAX_TYPEUNION_LENGTH &&
            unioncomplexity(t) <= MAX_TYPEUNION_COMPLEXITY
 end
 
+# We may want to apply a stricter limit than issimpleenoughtype to
+# tupleelements individually, to try to keep the whole tuple under the limit,
+# even after complicated recursion and other operations on it elsewhere
+const issimpleenoughtupleelem = issimpleenoughtype
+
 # A simplified type_more_complex query over the extended lattice
 # (assumes typeb ⊑ typea)
-function issimplertype(@nospecialize(typea), @nospecialize(typeb))
-    typea = ignorelimited(typea)
-    typeb = ignorelimited(typeb)
-    typea isa MaybeUndef && (typea = typea.typ) # n.b. does not appear in inference
-    typeb isa MaybeUndef && (typeb = typeb.typ) # n.b. does not appear in inference
+@nospecializeinfer function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecialize(typeb))
+    @assert !isa(typea, LimitedAccuracy) && !isa(typeb, LimitedAccuracy) "LimitedAccuracy not supported by simplertype lattice" # n.b. the caller was supposed to handle these
     typea === typeb && return true
     if typea isa PartialStruct
         aty = widenconst(typea)
         for i = 1:length(typea.fields)
             ai = unwrapva(typea.fields[i])
             bi = fieldtype(aty, i)
-            is_lattice_equal(ai, bi) && continue
+            is_lattice_equal(𝕃, ai, bi) && continue
             tni = _typename(widenconst(ai))
             if tni isa Const
                 bi = (tni.val::Core.TypeName).wrapper
-                is_lattice_equal(ai, bi) && continue
+                is_lattice_equal(𝕃, ai, bi) && continue
             end
-            bi = getfield_tfunc(typeb, Const(i))
-            is_lattice_equal(ai, bi) && continue
+            bi = getfield_tfunc(𝕃, typeb, Const(i))
+            is_lattice_equal(𝕃, ai, bi) && continue
             # It is not enough for ai to be simpler than bi: it must exactly equal
             # (for this, an invariant struct field, by contrast to
             # type_more_complex above which handles covariant tuples).
@@ -330,66 +346,139 @@ function issimplertype(@nospecialize(typea), @nospecialize(typeb))
         end
     elseif typea isa Type
         return issimpleenoughtype(typea)
-    # elseif typea isa Const # fall-through good
+    # elseif typea isa Const # fall-through to true is good
     elseif typea isa Conditional # follow issubconditional query
         typeb isa Const && return true
         typeb isa Conditional || return false
         is_same_conditionals(typea, typeb) || return false
-        issimplertype(typea.thentype, typeb.thentype) || return false
-        issimplertype(typea.elsetype, typeb.elsetype) || return false
+        issimplertype(𝕃, typea.thentype, typeb.thentype) || return false
+        issimplertype(𝕃, typea.elsetype, typeb.elsetype) || return false
     elseif typea isa InterConditional # ibid
         typeb isa Const && return true
         typeb isa InterConditional || return false
         is_same_conditionals(typea, typeb) || return false
-        issimplertype(typea.thentype, typeb.thentype) || return false
-        issimplertype(typea.elsetype, typeb.elsetype) || return false
+        issimplertype(𝕃, typea.thentype, typeb.thentype) || return false
+        issimplertype(𝕃, typea.elsetype, typeb.elsetype) || return false
+    elseif typea isa MustAlias
+        typeb isa MustAlias || return false
+        issubalias(typeb, typea) || return false
+        issimplertype(𝕃, typea.vartyp, typeb.vartyp) || return false
+        issimplertype(𝕃, typea.fldtyp, typeb.fldtyp) || return false
+    elseif typea isa InterMustAlias
+        typeb isa InterMustAlias || return false
+        issubalias(typeb, typea) || return false
+        issimplertype(𝕃, typea.vartyp, typeb.vartyp) || return false
+        issimplertype(𝕃, typea.fldtyp, typeb.fldtyp) || return false
     elseif typea isa PartialOpaque
         # TODO
+        typeb isa PartialOpaque || return false
+        aty = widenconst(typea)
+        bty = widenconst(typeb)
+        if typea.source === typeb.source && typea.parent === typeb.parent && aty == bty && typea.env == typeb.env
+            return false
+        end
+        return false
     end
     return true
 end
 
-# pick a wider type that contains both typea and typeb,
-# with some limits on how "large" it can get,
-# but without losing too much precision in common cases
-# and also trying to be mostly associative and commutative
-function tmerge(@nospecialize(typea), @nospecialize(typeb))
+@inline function tmerge_fast_path(lattice::AbstractLattice, @nospecialize(typea), @nospecialize(typeb))
+    # Fast paths
     typea === Union{} && return typeb
     typeb === Union{} && return typea
     typea === typeb && return typea
 
-    suba = typea ⊑ typeb
-    suba && issimplertype(typeb, typea) && return typeb
-    subb = typeb ⊑ typea
+    suba = ⊑(lattice, typea, typeb)
+    suba && issimplertype(lattice, typeb, typea) && return typeb
+    subb = ⊑(lattice, typeb, typea)
     suba && subb && return typea
-    subb && issimplertype(typea, typeb) && return typea
+    subb && issimplertype(lattice, typea, typeb) && return typea
+    return nothing
+end
 
-    # type-lattice for LimitedAccuracy wrapper
-    # the merge create a slightly narrower type than needed, but we can't
-    # represent the precise intersection of causes and don't attempt to
-    # enumerate some of these cases where we could
+function union_causes(causesa::IdSet{InferenceState}, causesb::IdSet{InferenceState})
+    if causesa ⊆ causesb
+        return causesb
+    elseif causesb ⊆ causesa
+        return causesa
+    else
+        return union!(copy(causesa), causesb)
+    end
+end
+
+function merge_causes(causesa::IdSet{InferenceState}, causesb::IdSet{InferenceState})
+    # TODO: When lattice elements are equal, we're allowed to discard one or the
+    # other set, but we'll need to come up with a consistent rule. For now, we
+    # just check the length, but other heuristics may be applicable.
+    if length(causesa) < length(causesb)
+        return causesa
+    elseif length(causesb) < length(causesa)
+        return causesb
+    else
+        return union!(copy(causesa), causesb)
+    end
+end
+
+@nospecializeinfer @noinline function tmerge_limited(lattice::InferenceLattice, @nospecialize(typea), @nospecialize(typeb))
+    typea === Union{} && return typeb
+    typeb === Union{} && return typea
+
+    # Like tmerge_fast_path, but tracking which causes need to be preserved at
+    # the same time.
     if isa(typea, LimitedAccuracy) && isa(typeb, LimitedAccuracy)
-        if typea.causes ⊆ typeb.causes
-            causes = typeb.causes
-        elseif typeb.causes ⊆ typea.causes
-            causes = typea.causes
+        causesa = typea.causes
+        causesb = typeb.causes
+        typea = typea.typ
+        typeb = typeb.typ
+        suba = ⊑(lattice, typea, typeb)
+        subb = ⊑(lattice, typeb, typea)
+
+        # Approximated types are lattice equal. Merge causes.
+        if suba && subb
+            return LimitedAccuracy(typeb, merge_causes(causesa, causesb))
+        elseif suba
+            issimplertype(lattice, typeb, typea) && return LimitedAccuracy(typeb, causesb)
+            causes = causesb
+            # `a`'s causes may be discarded
+        elseif subb
+            causes = causesa
         else
-            causes = union!(copy(typea.causes), typeb.causes)
+            causes = union_causes(causesa, causesb)
         end
-        return LimitedAccuracy(tmerge(typea.typ, typeb.typ), causes)
-    elseif isa(typea, LimitedAccuracy)
-        return LimitedAccuracy(tmerge(typea.typ, typeb), typea.causes)
-    elseif isa(typeb, LimitedAccuracy)
-        return LimitedAccuracy(tmerge(typea, typeb.typ), typeb.causes)
+    else
+        if isa(typeb, LimitedAccuracy)
+            (typea, typeb) = (typeb, typea)
+        end
+        typea = typea::LimitedAccuracy
+
+        causes = typea.causes
+        typea = typea.typ
+
+        suba = ⊑(lattice, typea, typeb)
+        if suba
+            issimplertype(lattice, typeb, typea) && return typeb
+            # `typea` was narrower than `typeb`. Whatever tmerge produces,
+            # we know it must be wider than `typeb`, so we may drop the
+            # causes.
+            causes = nothing
+        end
+        subb = ⊑(lattice, typeb, typea)
     end
 
-    # type-lattice for MaybeUndef wrapper
-    if isa(typea, MaybeUndef) || isa(typeb, MaybeUndef)
-        return MaybeUndef(tmerge(
-            isa(typea, MaybeUndef) ? typea.typ : typea,
-            isa(typeb, MaybeUndef) ? typeb.typ : typeb))
+    suba && subb && return LimitedAccuracy(typea, causes)
+    subb && issimplertype(lattice, typea, typeb) && return LimitedAccuracy(typea, causes)
+    return LimitedAccuracy(tmerge(widenlattice(lattice), typea, typeb), causes)
+end
+
+@nospecializeinfer function tmerge(lattice::InferenceLattice, @nospecialize(typea), @nospecialize(typeb))
+    if isa(typea, LimitedAccuracy) || isa(typeb, LimitedAccuracy)
+        return tmerge_limited(lattice, typea, typeb)
     end
 
+    return tmerge(widenlattice(lattice), typea, typeb)
+end
+
+@nospecializeinfer function tmerge(lattice::ConditionalsLattice, @nospecialize(typea), @nospecialize(typeb))
     # type-lattice for Conditional wrapper (NOTE never be merged with InterConditional)
     if isa(typea, Conditional) && isa(typeb, Const)
         if typeb.val === true
@@ -407,8 +496,8 @@ function tmerge(@nospecialize(typea), @nospecialize(typeb))
     end
     if isa(typea, Conditional) && isa(typeb, Conditional)
         if is_same_conditionals(typea, typeb)
-            thentype = tmerge(typea.thentype, typeb.thentype)
-            elsetype = tmerge(typea.elsetype, typeb.elsetype)
+            thentype = tmerge(widenlattice(lattice), typea.thentype, typeb.thentype)
+            elsetype = tmerge(widenlattice(lattice), typea.elsetype, typeb.elsetype)
             if thentype !== elsetype
                 return Conditional(typea.slot, thentype, elsetype)
             end
@@ -419,6 +508,17 @@ function tmerge(@nospecialize(typea), @nospecialize(typeb))
         end
         return Bool
     end
+    if isa(typea, Conditional)
+        typeb === Union{} && return typea
+        typea = widenconditional(typea)
+    elseif isa(typeb, Conditional)
+        typea === Union{} && return typeb
+        typeb = widenconditional(typeb)
+    end
+    return tmerge(widenlattice(lattice), typea, typeb)
+end
+
+@nospecializeinfer function tmerge(lattice::InterConditionalsLattice, @nospecialize(typea), @nospecialize(typeb))
     # type-lattice for InterConditional wrapper (NOTE never be merged with Conditional)
     if isa(typea, InterConditional) && isa(typeb, Const)
         if typeb.val === true
@@ -436,8 +536,8 @@ function tmerge(@nospecialize(typea), @nospecialize(typeb))
     end
     if isa(typea, InterConditional) && isa(typeb, InterConditional)
         if is_same_conditionals(typea, typeb)
-            thentype = tmerge(typea.thentype, typeb.thentype)
-            elsetype = tmerge(typea.elsetype, typeb.elsetype)
+            thentype = tmerge(widenlattice(lattice), typea.thentype, typeb.thentype)
+            elsetype = tmerge(widenlattice(lattice), typea.elsetype, typeb.elsetype)
             if thentype !== elsetype
                 return InterConditional(typea.slot, thentype, elsetype)
             end
@@ -448,80 +548,190 @@ function tmerge(@nospecialize(typea), @nospecialize(typeb))
         end
         return Bool
     end
+    if isa(typea, InterConditional)
+        typeb === Union{} && return typea
+        typea = widenconditional(typea)
+    elseif isa(typeb, InterConditional)
+        typea === Union{} && return typeb
+        typeb = widenconditional(typeb)
+    end
+    return tmerge(widenlattice(lattice), typea, typeb)
+end
 
-    # type-lattice for Const and PartialStruct wrappers
-    if ((isa(typea, PartialStruct) || isa(typea, Const)) &&
-        (isa(typeb, PartialStruct) || isa(typeb, Const)))
-        aty = widenconst(typea)
-        bty = widenconst(typeb)
-        if aty === bty
-            # must have egal here, since we do not create PartialStruct for non-concrete types
-            typea_nfields = nfields_tfunc(typea)
-            typeb_nfields = nfields_tfunc(typeb)
-            isa(typea_nfields, Const) || return aty
-            isa(typeb_nfields, Const) || return aty
-            type_nfields = typea_nfields.val::Int
-            type_nfields === typeb_nfields.val::Int || return aty
-            type_nfields == 0 && return aty
-            fields = Vector{Any}(undef, type_nfields)
-            anyrefine = false
-            for i = 1:type_nfields
-                ai = getfield_tfunc(typea, Const(i))
-                bi = getfield_tfunc(typeb, Const(i))
-                ft = fieldtype(aty, i)
-                if is_lattice_equal(ai, bi) || is_lattice_equal(ai, ft)
-                    # Since ai===bi, the given type has no restrictions on complexity.
-                    # and can be used to refine ft
-                    tyi = ai
-                elseif is_lattice_equal(bi, ft)
-                    tyi = bi
+@nospecializeinfer function tmerge(𝕃::AnyMustAliasesLattice, @nospecialize(typea), @nospecialize(typeb))
+    if is_valid_lattice_norec(𝕃, typea)
+        typeb === Union{} && return typea
+        typea = widenmustalias(typea)
+    end
+    if is_valid_lattice_norec(𝕃, typeb)
+        typea === Union{} && return typeb
+        typeb = widenmustalias(typeb)
+    end
+    return tmerge(widenlattice(𝕃), typea, typeb)
+end
+
+# N.B. This can also be called with both typea::Const and typeb::Const to
+# to recover PartialStruct from `Const`s with overlapping fields.
+@nospecializeinfer function tmerge_partial_struct(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb))
+    aty = widenconst(typea)
+    bty = widenconst(typeb)
+    if aty === bty
+        # must have egal here, since we do not create PartialStruct for non-concrete types
+        typea_nfields = nfields_tfunc(lattice, typea)
+        typeb_nfields = nfields_tfunc(lattice, typeb)
+        isa(typea_nfields, Const) || return nothing
+        isa(typeb_nfields, Const) || return nothing
+        type_nfields = typea_nfields.val::Int
+        type_nfields === typeb_nfields.val::Int || return nothing
+        type_nfields == 0 && return nothing
+        fields = Vector{Any}(undef, type_nfields)
+        anyrefine = false
+        for i = 1:type_nfields
+            ai = getfield_tfunc(lattice, typea, Const(i))
+            bi = getfield_tfunc(lattice, typeb, Const(i))
+            # N.B.: We're assuming here that !isType(aty), because that case
+            # only arises when typea === typeb, which should have been caught
+            # before calling this.
+            ft = fieldtype(aty, i)
+            if is_lattice_equal(lattice, ai, bi) || is_lattice_equal(lattice, ai, ft)
+                # Since ai===bi, the given type has no restrictions on complexity.
+                # and can be used to refine ft
+                tyi = ai
+            elseif is_lattice_equal(lattice, bi, ft)
+                tyi = bi
+            elseif (tyi′ = tmerge_field(lattice, ai, bi); tyi′ !== nothing)
+                # allow external lattice implementation to provide a custom field-merge strategy
+                tyi = tyi′
+            else
+                # Otherwise use the default aggressive field-merge implementation, and
+                # choose between using the fieldtype or some other simple merged type.
+                # The wrapper type never has restrictions on complexity,
+                # so try to use that to refine the estimated type too.
+                tni = _typename(widenconst(ai))
+                if tni isa Const && tni === _typename(widenconst(bi))
+                    # A tmeet call may cause tyi to become complex, but since the inputs were
+                    # strictly limited to being egal, this has no restrictions on complexity.
+                    # (Otherwise, we would need to use <: and take the narrower one without
+                    # intersection. See the similar comment in abstract_call_method.)
+                    tyi = typeintersect(ft, (tni.val::Core.TypeName).wrapper)
                 else
-                    # Otherwise choose between using the fieldtype or some other simple merged type.
-                    # The wrapper type never has restrictions on complexity,
-                    # so try to use that to refine the estimated type too.
-                    tni = _typename(widenconst(ai))
-                    if tni isa Const && tni === _typename(widenconst(bi))
-                        # A tmeet call may cause tyi to become complex, but since the inputs were
-                        # strictly limited to being egal, this has no restrictions on complexity.
-                        # (Otherwise, we would need to use <: and take the narrower one without
-                        # intersection. See the similar comment in abstract_call_method.)
-                        tyi = typeintersect(ft, (tni.val::Core.TypeName).wrapper)
-                    else
-                        # Since aty===bty, the fieldtype has no restrictions on complexity.
-                        tyi = ft
-                    end
-                end
-                fields[i] = tyi
-                if !anyrefine
-                    anyrefine = has_nontrivial_const_info(tyi) || # constant information
-                                tyi ⋤ ft                          # just a type-level information, but more precise than the declared type
+                    # Since aty===bty, the fieldtype has no restrictions on complexity.
+                    tyi = ft
                 end
             end
-            return anyrefine ? PartialStruct(aty, fields) : aty
+            fields[i] = tyi
+            if !anyrefine
+                anyrefine = has_nontrivial_extended_info(lattice, tyi) || # extended information
+                            ⋤(lattice, tyi, ft) # just a type-level information, but more precise than the declared type
+            end
         end
+        anyrefine && return PartialStruct(aty, fields)
     end
+    return nothing
+end
+
+@nospecializeinfer function tmerge(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb))
+    r = tmerge_fast_path(lattice, typea, typeb)
+    r !== nothing && return r
+
+    # type-lattice for Const and PartialStruct wrappers
+    aps = isa(typea, PartialStruct)
+    bps = isa(typeb, PartialStruct)
+    acp = aps || isa(typea, Const)
+    bcp = bps || isa(typeb, Const)
+    if acp && bcp
+        typea === typeb && return typea
+        psrt = tmerge_partial_struct(lattice, typea, typeb)
+        psrt !== nothing && return psrt
+    end
+
+    # Don't widen const here - external AbstractInterpreter might insert lattice
+    # layers between us and `ConstsLattice`.
+    wl = widenlattice(lattice)
+    aps && (typea = widenlattice(wl, typea))
+    bps && (typeb = widenlattice(wl, typeb))
 
     # type-lattice for PartialOpaque wrapper
-    if isa(typea, PartialOpaque) && isa(typeb, PartialOpaque) && widenconst(typea) == widenconst(typeb)
-        if !(typea.source === typeb.source &&
-             typea.parent === typeb.parent)
-            return widenconst(typea)
+    apo = isa(typea, PartialOpaque)
+    bpo = isa(typeb, PartialOpaque)
+    if apo && bpo
+        aty = widenconst(typea)
+        bty = widenconst(typeb)
+        if aty == bty
+            if !(typea.source === typeb.source &&
+                typea.parent === typeb.parent)
+                return widenconst(typea)
+            end
+            return PartialOpaque(typea.typ, tmerge(lattice, typea.env, typeb.env),
+                typea.parent, typea.source)
         end
-        return PartialOpaque(typea.typ, tmerge(typea.env, typeb.env),
-            typea.parent, typea.source)
+        typea = aty
+        typeb = bty
+    elseif apo
+        typea = widenlattice(wl, typea)
+    elseif bpo
+        typeb = widenlattice(wl, typeb)
     end
 
-    # no special type-inference lattice, join the types
-    typea, typeb = widenconst(typea), widenconst(typeb)
-    if !isa(typea, Type) || !isa(typeb, Type)
-        # XXX: this should never happen
-        return Any
-    end
-    typea == typeb && return typea
+    return tmerge(wl, typea, typeb)
+end
+
+@nospecializeinfer function tmerge(lattice::ConstsLattice, @nospecialize(typea), @nospecialize(typeb))
+    acp = isa(typea, Const) || isa(typea, PartialTypeVar)
+    bcp = isa(typeb, Const) || isa(typeb, PartialTypeVar)
+    if acp && bcp
+        typea === typeb && return typea
+    end
+    wl = widenlattice(lattice)
+    acp && (typea = widenlattice(wl, typea))
+    bcp && (typeb = widenlattice(wl, typeb))
+    return tmerge(wl, typea, typeb)
+end
+
+@nospecializeinfer function tmerge(lattice::JLTypeLattice, @nospecialize(typea::Type), @nospecialize(typeb::Type))
     # it's always ok to form a Union of two concrete types
-    if (isconcretetype(typea) || isType(typea)) && (isconcretetype(typeb) || isType(typeb))
+    act = isconcretetype(typea)
+    bct = isconcretetype(typeb)
+    if act && bct
+        # Extra fast path for pointer-egal concrete types
+        (pointer_from_objref(typea) === pointer_from_objref(typeb)) && return typea
+    end
+    if (act || isType(typea)) && (bct || isType(typeb))
         return Union{typea, typeb}
     end
+    u = tmerge_fast_path(lattice, typea, typeb)
+    u === nothing || return u
+    return tmerge_types_slow(typea, typeb)
+end
+
+@nospecializeinfer @noinline function tname_intersect(aname::Core.TypeName, bname::Core.TypeName)
+    aname === bname && return aname
+    if !isabstracttype(aname.wrapper) && !isabstracttype(bname.wrapper)
+        return nothing # fast path
+    end
+    Any.name === aname && return aname
+    a = unwrap_unionall(aname.wrapper)
+    heighta = 0
+    while a !== Any
+        heighta += 1
+        a = a.super
+    end
+    b = unwrap_unionall(bname.wrapper)
+    heightb = 0
+    while b !== Any
+        b.name === aname && return aname
+        heightb += 1
+        b = b.super
+    end
+    a = unwrap_unionall(aname.wrapper)
+    while heighta > heightb
+        a = a.super
+        heighta -= 1
+    end
+    return a.name === bname ? bname : nothing
+end
+
+@nospecializeinfer @noinline function tmerge_types_slow(@nospecialize(typea::Type), @nospecialize(typeb::Type))
     # collect the list of types from past tmerge calls returning Union
     # and then reduce over that list
     types = Any[]
@@ -544,73 +754,95 @@ function tmerge(@nospecialize(typea), @nospecialize(typeb))
     # see if any of the union elements have the same TypeName
     # in which case, simplify this tmerge by replacing it with
     # the widest possible version of itself (the wrapper)
+    simplify = falses(length(types))
     for i in 1:length(types)
+        typenames[i] === Any.name && continue
         ti = types[i]
         for j in (i + 1):length(types)
-            if typenames[i] === typenames[j]
+            typenames[j] === Any.name && continue
+            ijname = tname_intersect(typenames[i], typenames[j])
+            if !(ijname === nothing)
                 tj = types[j]
                 if ti <: tj
                     types[i] = Union{}
                     typenames[i] = Any.name
+                    simplify[i] = false
+                    simplify[j] = true
                     break
                 elseif tj <: ti
                     types[j] = Union{}
                     typenames[j] = Any.name
+                    simplify[j] = false
+                    simplify[i] = true
                 else
-                    if typenames[i] === Tuple.name
+                    if ijname === Tuple.name
                         # try to widen Tuple slower: make a single non-concrete Tuple containing both
                         # converge the Tuple element-wise if they are the same length
                         # see 4ee2b41552a6bc95465c12ca66146d69b354317b, be59686f7613a2ccfd63491c7b354d0b16a95c05,
                         widen = tuplemerge(unwrap_unionall(ti)::DataType, unwrap_unionall(tj)::DataType)
                         widen = rewrap_unionall(rewrap_unionall(widen, ti), tj)
+                        simplify[j] = false
                     else
-                        wr = typenames[i].wrapper
+                        wr = ijname.wrapper
                         uw = unwrap_unionall(wr)::DataType
                         ui = unwrap_unionall(ti)::DataType
+                        while ui.name !== ijname
+                            ui = ui.super
+                        end
                         uj = unwrap_unionall(tj)::DataType
-                        merged = wr
+                        while uj.name !== ijname
+                            uj = uj.super
+                        end
+                        p = Vector{Any}(undef, length(uw.parameters))
+                        usep = true
+                        widen = wr
                         for k = 1:length(uw.parameters)
                             ui_k = ui.parameters[k]
                             if ui_k === uj.parameters[k] && !has_free_typevars(ui_k)
-                                merged = merged{ui_k}
+                                p[k] = ui_k
+                                usep = true
                             else
-                                merged = merged{uw.parameters[k]}
+                                p[k] = uw.parameters[k]
                             end
                         end
-                        widen = rewrap_unionall(merged, wr)
+                        if usep
+                            widen = rewrap_unionall(wr{p...}, wr)
+                        end
+                        simplify[j] = !usep
                     end
                     types[i] = Union{}
                     typenames[i] = Any.name
+                    simplify[i] = false
                     types[j] = widen
                     break
                 end
             end
         end
     end
-    u = Union{types...}
-    # don't let type unions get too big, if the above didn't reduce it enough
-    if issimpleenoughtype(u)
-        return u
-    end
-    # don't let the slow widening of Tuple cause the whole type to grow too fast
+    # don't let elements of the union get too big, if the above didn't reduce something enough
+    # Specifically widen Tuple{..., Union{lots of stuff}...} to Tuple{..., Any, ...}
+    # Don't let Val{<:Val{<:Val}} keep nesting abstract levels either
     for i in 1:length(types)
+        simplify[i] || continue
+        ti = types[i]
+        issimpleenoughtype(ti) && continue
         if typenames[i] === Tuple.name
-            widen = unwrap_unionall(types[i])
-            if isa(widen, DataType) && !isvatuple(widen)
-                widen = NTuple{length(widen.parameters), Any}
-            else
-                widen = Tuple
-            end
-            types[i] = widen
-            u = Union{types...}
-            if issimpleenoughtype(u)
-                return u
+            # otherwise we need to do a simple version of tuplemerge for one element now
+            tip = (unwrap_unionall(ti)::DataType).parameters
+            lt = length(tip)
+            p = Vector{Any}(undef, lt)
+            for j = 1:lt
+                ui = tip[j]
+                p[j] = issimpleenoughtupleelem(unwrapva(ui)) ? ui : isvarargtype(ui) ? Vararg : Any
             end
-            break
+            types[i] = rewrap_unionall(Tuple{p...}, ti)
+        else
+            # this element is not simple enough yet, make it so now
+            types[i] = typenames[i].wrapper
         end
     end
-    # finally, just return the widest possible type
-    return Any
+    u = Union{types...}
+    return u
 end
 
 # the inverse of switchtupleunion, with limits on max element union size
@@ -632,7 +864,7 @@ function tuplemerge(a::DataType, b::DataType)
     p = Vector{Any}(undef, lt + vt)
     for i = 1:lt
         ui = Union{ap[i], bp[i]}
-        p[i] = issimpleenoughtype(ui) ? ui : Any
+        p[i] = issimpleenoughtupleelem(ui) ? ui : Any
     end
     # merge the remaining tail into a single, simple Tuple{Vararg{T}} (#22120)
     if vt
@@ -650,8 +882,10 @@ function tuplemerge(a::DataType, b::DataType)
                 #   or (equivalently?) iteratively took super-types until reaching a common wrapper
                 #   e.g. consider the results of `tuplemerge(Tuple{Complex}, Tuple{Number, Int})` and of
                 #   `tuplemerge(Tuple{Int}, Tuple{String}, Tuple{Int, String})`
-                if !(ti <: tail)
-                    if tail <: ti
+                #   c.f. tname_intersect in the algorithm above
+                hasfree = has_free_typevars(ti)
+                if hasfree || !(ti <: tail)
+                    if !hasfree && tail <: ti
                         tail = ti # widen to ti
                     else
                         uw = unwrap_unionall(tail)
@@ -679,11 +913,16 @@ function tuplemerge(a::DataType, b::DataType)
                         end
                     end
                 end
-                tail === Any && return Tuple # short-circuit loop
+                tail === Any && return Tuple # short-circuit loops
             end
         end
         @assert !(tail === Union{})
-        p[lt + 1] = Vararg{tail}
+        if !issimpleenoughtupleelem(tail) || tail === Any
+            p[lt + 1] = Vararg
+            lt == 0 && return Tuple
+        else
+            p[lt + 1] = Vararg{tail}
+        end
     end
     return Tuple{p...}
 end
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index 21075b3e87d16..b98cf09ff7cf1 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -16,187 +16,395 @@ the following methods to satisfy the `AbstractInterpreter` API requirement:
 - `get_inference_cache(interp::NewInterpreter)` - return the local inference cache
 - `code_cache(interp::NewInterpreter)` - return the global inference cache
 """
-abstract type AbstractInterpreter end
+:(AbstractInterpreter)
+
+abstract type AbstractLattice end
 
 struct ArgInfo
     fargs::Union{Nothing,Vector{Any}}
     argtypes::Vector{Any}
 end
 
+struct StmtInfo
+    """
+    If `used` is false, we know that the return value is statically unused and
+    need thus not be computed.
+    """
+    used::Bool
+end
+
+struct MethodInfo
+    propagate_inbounds::Bool
+    method_for_inference_limit_heuristics::Union{Nothing,Method}
+end
+MethodInfo(src::CodeInfo) = MethodInfo(
+    src.propagate_inbounds,
+    src.method_for_inference_limit_heuristics::Union{Nothing,Method})
+
 """
-    InferenceResult
+    v::VarState
+
+A special wrapper that represents a local variable of a method being analyzed.
+This does not participate in the native type system nor the inference lattice, and it thus
+should be always unwrapped to `v.typ` when performing any type or lattice operations on it.
+`v.undef` represents undefined-ness of this static parameter. If `true`, it means that the
+variable _may_ be undefined at runtime, otherwise it is guaranteed to be defined.
+If `v.typ === Bottom` it means that the variable is strictly undefined.
+"""
+struct VarState
+    typ
+    undef::Bool
+    VarState(@nospecialize(typ), undef::Bool) = new(typ, undef)
+end
+
+abstract type ForwardableArgtypes end
+
+struct AnalysisResults
+    result
+    next::AnalysisResults
+    AnalysisResults(@nospecialize(result), next::AnalysisResults) = new(result, next)
+    AnalysisResults(@nospecialize(result)) = new(result)
+    # NullAnalysisResults() = new(nothing)
+    # global const NULL_ANALYSIS_RESULTS = NullAnalysisResults()
+end
+const NULL_ANALYSIS_RESULTS = AnalysisResults(nothing)
+
+"""
+    InferenceResult(linfo::MethodInstance, [argtypes::ForwardableArgtypes, 𝕃::AbstractLattice])
 
 A type that represents the result of running type inference on a chunk of code.
+
+See also [`matching_cache_argtypes`](@ref).
 """
 mutable struct InferenceResult
-    linfo::MethodInstance
-    argtypes::Vector{Any}
-    overridden_by_const::BitVector
-    result                   # ::Type, or InferenceState if WIP
-    src                      # ::Union{CodeInfo, OptimizationState} if inferred copy is available, nothing otherwise
+    const linfo::MethodInstance
+    const argtypes::Vector{Any}
+    const overridden_by_const::BitVector
+    result                   # extended lattice element if inferred, nothing otherwise
+    exc_result               # like `result`, but for the thrown value
+    src                      # ::Union{CodeInfo, IRCode, OptimizationState} if inferred copy is available, nothing otherwise
     valid_worlds::WorldRange # if inference and optimization is finished
     ipo_effects::Effects     # if inference is finished
     effects::Effects         # if optimization is finished
-    argescapes               # ::ArgEscapeCache if optimized, nothing otherwise
-    # NOTE the main constructor is defined within inferencestate.jl
-    global function _InferenceResult(
-        linfo::MethodInstance,
-        arginfo#=::Union{Nothing,Tuple{ArgInfo,InferenceState}}=#)
-        argtypes, overridden_by_const = matching_cache_argtypes(linfo, arginfo)
-        return new(linfo, argtypes, overridden_by_const, Any, nothing,
-            WorldRange(), Effects(), Effects(), nothing)
+    analysis_results::AnalysisResults # AnalysisResults with e.g. result::ArgEscapeCache if optimized, otherwise NULL_ANALYSIS_RESULTS
+    is_src_volatile::Bool    # `src` has been cached globally as the compressed format already, allowing `src` to be used destructively
+    function InferenceResult(linfo::MethodInstance, cache_argtypes::Vector{Any}, overridden_by_const::BitVector)
+        # def = linfo.def
+        # nargs = def isa Method ? Int(def.nargs) : 0
+        # @assert length(cache_argtypes) == nargs
+        return new(linfo, cache_argtypes, overridden_by_const, nothing, nothing, nothing,
+            WorldRange(), Effects(), Effects(), NULL_ANALYSIS_RESULTS, false)
     end
 end
+InferenceResult(linfo::MethodInstance, 𝕃::AbstractLattice=fallback_lattice) =
+    InferenceResult(linfo, matching_cache_argtypes(𝕃, linfo)...)
+InferenceResult(linfo::MethodInstance, argtypes::ForwardableArgtypes, 𝕃::AbstractLattice=fallback_lattice) =
+    InferenceResult(linfo, matching_cache_argtypes(𝕃, linfo, argtypes)...)
 
-"""
-    OptimizationParams
+function stack_analysis_result!(inf_result::InferenceResult, @nospecialize(result))
+    return inf_result.analysis_results = AnalysisResults(result, inf_result.analysis_results)
+end
 
-Parameters that control optimizer operation.
-"""
-struct OptimizationParams
-    inlining::Bool              # whether inlining is enabled
-    inline_cost_threshold::Int  # number of CPU cycles beyond which it's not worth inlining
-    inline_nonleaf_penalty::Int # penalty for dynamic dispatch
-    inline_tupleret_bonus::Int  # extra inlining willingness for non-concrete tuple return types (in hopes of splitting it up)
-    inline_error_path_cost::Int # cost of (un-optimized) calls in blocks that throw
-
-    trust_inference::Bool
-
-    # Duplicating for now because optimizer inlining requires it.
-    # Keno assures me this will be removed in the near future
-    MAX_METHODS::Int
-    MAX_TUPLE_SPLAT::Int
-    MAX_UNION_SPLITTING::Int
-
-    function OptimizationParams(;
-            inlining::Bool = inlining_enabled(),
-            inline_cost_threshold::Int = 100,
-            inline_nonleaf_penalty::Int = 1000,
-            inline_tupleret_bonus::Int = 250,
-            inline_error_path_cost::Int = 20,
-            max_methods::Int = 3,
-            tuple_splat::Int = 32,
-            union_splitting::Int = 4,
-            trust_inference::Bool = false
-        )
-        return new(
-            inlining,
-            inline_cost_threshold,
-            inline_nonleaf_penalty,
-            inline_tupleret_bonus,
-            inline_error_path_cost,
-            trust_inference,
-            max_methods,
-            tuple_splat,
-            union_splitting
-        )
+function traverse_analysis_results(callback, (;analysis_results)::Union{InferenceResult,CodeInstance})
+    analysis_results isa AnalysisResults || return nothing
+    while isdefined(analysis_results, :next)
+        if (result = callback(analysis_results.result)) !== nothing
+            return result
+        end
+        analysis_results = analysis_results.next
     end
+    return nothing
 end
 
 """
-    InferenceParams
+    inf_params::InferenceParams
 
-Parameters that control type inference operation.
+Parameters that control abstract interpretation-based type inference operation.
+
+---
+- `inf_params.max_methods::Int = 3`\\
+  Type inference gives up analysis on a call when there are more than `max_methods` matching
+  methods. This trades off between compiler latency and generated code performance.
+  Typically, considering many methods means spending _lots_ of time obtaining poor type
+  information, so this option should be kept low. [`Base.Experimental.@max_methods`](@ref)
+  can have a more fine-grained control on this configuration with per-module or per-method
+  annotation basis.
+---
+- `inf_params.max_union_splitting::Int = 4`\\
+  Specifies the maximum number of union-tuples to swap or expand before computing the set of
+  matching methods or conditional types.
+---
+- `inf_params.max_apply_union_enum::Int = 8`\\
+  Specifies the maximum number of union-tuples to swap or expand when inferring a call to
+  `Core._apply_iterate`.
+---
+- `inf_params.max_tuple_splat::Int = 32`\\
+  When attempting to infer a call to `Core._apply_iterate`, abort the analysis if the tuple
+  contains more than this many elements.
+---
+- `inf_params.tuple_complexity_limit_depth::Int = 3`\\
+  Specifies the maximum depth of large tuple type that can appear as specialized method
+  signature when inferring a recursive call graph.
+---
+- `inf_params.ipo_constant_propagation::Bool = true`\\
+  If `false`, disables analysis with extended lattice information, i.e. disables any of
+  the concrete evaluation, semi-concrete interpretation and constant propagation entirely.
+  [`Base.@constprop :none`](@ref Base.@constprop) can have a more fine-grained control on
+  this configuration with per-method annotation basis.
+---
+- `inf_params.aggressive_constant_propagation::Bool = false`\\
+  If `true`, forces constant propagation on any methods when any extended lattice
+  information available. [`Base.@constprop :aggressive`](@ref Base.@constprop) can have a
+  more fine-grained control on this configuration with per-method annotation basis.
+---
+- `inf_params.unoptimize_throw_blocks::Bool = true`\\
+  If `true`, skips inferring calls that are in a block that is known to `throw`.
+  It may improve the compiler latency without sacrificing the runtime performance
+  in common situations.
+---
+- `inf_params.assume_bindings_static::Bool = false`\\
+  If `true`, assumes that no new bindings will be added, i.e. a non-existing binding at
+  inference time can be assumed to always not exist at runtime (and thus e.g. any access to
+  it will `throw`). Defaults to `false` since this assumption does not hold in Julia's
+  semantics for native code execution.
+---
 """
 struct InferenceParams
+    max_methods::Int
+    max_union_splitting::Int
+    max_apply_union_enum::Int
+    max_tuple_splat::Int
+    tuple_complexity_limit_depth::Int
     ipo_constant_propagation::Bool
     aggressive_constant_propagation::Bool
     unoptimize_throw_blocks::Bool
-
-    # don't consider more than N methods. this trades off between
-    # compiler performance and generated code performance.
-    # typically, considering many methods means spending lots of time
-    # obtaining poor type information.
-    # It is important for N to be >= the number of methods in the error()
-    # function, so we can still know that error() is always Bottom.
-    MAX_METHODS::Int
-    # the maximum number of union-tuples to swap / expand
-    # before computing the set of matching methods
-    MAX_UNION_SPLITTING::Int
-    # the maximum number of union-tuples to swap / expand
-    # when inferring a call to _apply_iterate
-    MAX_APPLY_UNION_ENUM::Int
-
-    # parameters limiting large (tuple) types
-    TUPLE_COMPLEXITY_LIMIT_DEPTH::Int
-
-    # when attempting to inline _apply_iterate, abort the optimization if the
-    # tuple contains more than this many elements
-    MAX_TUPLE_SPLAT::Int
-
-    function InferenceParams(;
-            ipo_constant_propagation::Bool = true,
-            aggressive_constant_propagation::Bool = false,
-            unoptimize_throw_blocks::Bool = true,
-            max_methods::Int = 3,
-            union_splitting::Int = 4,
-            apply_union_enum::Int = 8,
-            tupletype_depth::Int = 3,
-            tuple_splat::Int = 32,
-        )
+    assume_bindings_static::Bool
+    ignore_recursion_hardlimit::Bool
+
+    function InferenceParams(
+        max_methods::Int,
+        max_union_splitting::Int,
+        max_apply_union_enum::Int,
+        max_tuple_splat::Int,
+        tuple_complexity_limit_depth::Int,
+        ipo_constant_propagation::Bool,
+        aggressive_constant_propagation::Bool,
+        unoptimize_throw_blocks::Bool,
+        assume_bindings_static::Bool,
+        ignore_recursion_hardlimit::Bool)
         return new(
+            max_methods,
+            max_union_splitting,
+            max_apply_union_enum,
+            max_tuple_splat,
+            tuple_complexity_limit_depth,
             ipo_constant_propagation,
             aggressive_constant_propagation,
             unoptimize_throw_blocks,
-            max_methods,
-            union_splitting,
-            apply_union_enum,
-            tupletype_depth,
-            tuple_splat,
-        )
+            assume_bindings_static,
+            ignore_recursion_hardlimit)
+    end
+end
+function InferenceParams(
+    params::InferenceParams = InferenceParams( # default constructor
+        #=max_methods::Int=# 3,
+        #=max_union_splitting::Int=# 4,
+        #=max_apply_union_enum::Int=# 8,
+        #=max_tuple_splat::Int=# 32,
+        #=tuple_complexity_limit_depth::Int=# 3,
+        #=ipo_constant_propagation::Bool=# true,
+        #=aggressive_constant_propagation::Bool=# false,
+        #=unoptimize_throw_blocks::Bool=# true,
+        #=assume_bindings_static::Bool=# false,
+        #=ignore_recursion_hardlimit::Bool=# false);
+    max_methods::Int = params.max_methods,
+    max_union_splitting::Int = params.max_union_splitting,
+    max_apply_union_enum::Int = params.max_apply_union_enum,
+    max_tuple_splat::Int = params.max_tuple_splat,
+    tuple_complexity_limit_depth::Int = params.tuple_complexity_limit_depth,
+    ipo_constant_propagation::Bool = params.ipo_constant_propagation,
+    aggressive_constant_propagation::Bool = params.aggressive_constant_propagation,
+    unoptimize_throw_blocks::Bool = params.unoptimize_throw_blocks,
+    assume_bindings_static::Bool = params.assume_bindings_static,
+    ignore_recursion_hardlimit::Bool = params.ignore_recursion_hardlimit)
+    return InferenceParams(
+        max_methods,
+        max_union_splitting,
+        max_apply_union_enum,
+        max_tuple_splat,
+        tuple_complexity_limit_depth,
+        ipo_constant_propagation,
+        aggressive_constant_propagation,
+        unoptimize_throw_blocks,
+        assume_bindings_static,
+        ignore_recursion_hardlimit)
+end
+
+"""
+    opt_params::OptimizationParams
+
+Parameters that control optimizer operation.
+
+---
+- `opt_params.inlining::Bool = inlining_enabled()`\\
+  Controls whether or not inlining is enabled.
+---
+- `opt_params.inline_cost_threshold::Int = 100`\\
+  Specifies the number of CPU cycles beyond which it's not worth inlining.
+---
+- `opt_params.inline_nonleaf_penalty::Int = 1000`\\
+  Specifies the penalty cost for a dynamic dispatch.
+---
+- `opt_params.inline_tupleret_bonus::Int = 250`\\
+  Specifies the extra inlining willingness for a method specialization with non-concrete
+  tuple return types (in hopes of splitting it up). `opt_params.inline_tupleret_bonus` will
+  be added to `opt_params.inline_cost_threshold` when making inlining decision.
+---
+- `opt_params.inline_error_path_cost::Int = 20`\\
+  Specifies the penalty cost for an un-optimized dynamic call in a block that is known to
+  `throw`. See also [`(inf_params::InferenceParams).unoptimize_throw_blocks`](@ref InferenceParams).
+---
+- `opt_params.max_tuple_splat::Int = 32`\\
+  When attempting to inline `Core._apply_iterate`, abort the optimization if the tuple
+  contains more than this many elements.
+---
+- `opt_params.compilesig_invokes::Bool = true`\\
+  If `true`, gives the inliner license to change which `MethodInstance` to invoke when
+  generating `:invoke` expression based on the [`@nospecialize`](@ref) annotation,
+  in order to avoid over-specialization.
+---
+- `opt_params.assume_fatal_throw::Bool = false`\\
+  If `true`, gives the optimizer license to assume that any `throw` is fatal and thus the
+  state after a `throw` is not externally observable. In particular, this gives the
+  optimizer license to move side effects (that are proven not observed within a particular
+  code path) across a throwing call. Defaults to `false`.
+---
+- `opt_params.preserve_local_sources::Bool = false`\\
+  If `true`, the inliner is restricted from modifying locally-cached sources that are
+  retained in `CallInfo` objects and always makes their copies before inlining them into
+  caller context. Defaults to `false`.
+---
+"""
+struct OptimizationParams
+    inlining::Bool
+    inline_cost_threshold::Int
+    inline_nonleaf_penalty::Int
+    inline_tupleret_bonus::Int
+    inline_error_path_cost::Int
+    max_tuple_splat::Int
+    compilesig_invokes::Bool
+    assume_fatal_throw::Bool
+    preserve_local_sources::Bool
+
+    function OptimizationParams(
+        inlining::Bool,
+        inline_cost_threshold::Int,
+        inline_nonleaf_penalty::Int,
+        inline_tupleret_bonus::Int,
+        inline_error_path_cost::Int,
+        max_tuple_splat::Int,
+        compilesig_invokes::Bool,
+        assume_fatal_throw::Bool,
+        preserve_local_sources::Bool)
+        return new(
+            inlining,
+            inline_cost_threshold,
+            inline_nonleaf_penalty,
+            inline_tupleret_bonus,
+            inline_error_path_cost,
+            max_tuple_splat,
+            compilesig_invokes,
+            assume_fatal_throw,
+            preserve_local_sources)
     end
 end
+function OptimizationParams(
+    params::OptimizationParams = OptimizationParams(
+        #=inlining::Bool=# inlining_enabled(),
+        #=inline_cost_threshold::Int=# 100,
+        #=inline_nonleaf_penalty::Int=# 1000,
+        #=inline_tupleret_bonus::Int=# 250,
+        #=inline_error_path_cost::Int=# 20,
+        #=max_tuple_splat::Int=# 32,
+        #=compilesig_invokes::Bool=# true,
+        #=assume_fatal_throw::Bool=# false,
+        #=preserve_local_sources::Bool=# false);
+    inlining::Bool = params.inlining,
+    inline_cost_threshold::Int = params.inline_cost_threshold,
+    inline_nonleaf_penalty::Int = params.inline_nonleaf_penalty,
+    inline_tupleret_bonus::Int = params.inline_tupleret_bonus,
+    inline_error_path_cost::Int = params.inline_error_path_cost,
+    max_tuple_splat::Int = params.max_tuple_splat,
+    compilesig_invokes::Bool = params.compilesig_invokes,
+    assume_fatal_throw::Bool = params.assume_fatal_throw,
+    preserve_local_sources::Bool = params.preserve_local_sources)
+    return OptimizationParams(
+        inlining,
+        inline_cost_threshold,
+        inline_nonleaf_penalty,
+        inline_tupleret_bonus,
+        inline_error_path_cost,
+        max_tuple_splat,
+        compilesig_invokes,
+        assume_fatal_throw,
+        preserve_local_sources)
+end
 
 """
-    NativeInterpreter
+    NativeInterpreter <: AbstractInterpreter
 
 This represents Julia's native type inference algorithm and the Julia-LLVM codegen backend.
-It contains many parameters used by the compilation pipeline.
 """
 struct NativeInterpreter <: AbstractInterpreter
-    # Cache of inference results for this particular interpreter
-    cache::Vector{InferenceResult}
     # The world age we're working inside of
     world::UInt
 
+    # method table to lookup for during inference on this world age
+    method_table::CachedMethodTable{InternalMethodTable}
+
+    # Cache of inference results for this particular interpreter
+    inf_cache::Vector{InferenceResult}
+
     # Parameters for inference and optimization
     inf_params::InferenceParams
     opt_params::OptimizationParams
+end
 
-    function NativeInterpreter(world::UInt = get_world_counter();
-                               inf_params = InferenceParams(),
-                               opt_params = OptimizationParams(),
-                               )
-        # Sometimes the caller is lazy and passes typemax(UInt).
-        # we cap it to the current world age
-        if world == typemax(UInt)
-            world = get_world_counter()
-        end
+function NativeInterpreter(world::UInt = get_world_counter();
+                           inf_params::InferenceParams = InferenceParams(),
+                           opt_params::OptimizationParams = OptimizationParams())
+    # Sometimes the caller is lazy and passes typemax(UInt).
+    # we cap it to the current world age for correctness
+    if world == typemax(UInt)
+        world = get_world_counter()
+    end
 
-        # If they didn't pass typemax(UInt) but passed something more subtly
-        # incorrect, fail out loudly.
-        @assert world <= get_world_counter()
+    # If they didn't pass typemax(UInt) but passed something more subtly
+    # incorrect, fail out loudly.
+    @assert world <= get_world_counter()
 
-        return new(
-            # Initially empty cache
-            Vector{InferenceResult}(),
+    method_table = CachedMethodTable(InternalMethodTable(world))
 
-            # world age counter
-            world,
+    inf_cache = Vector{InferenceResult}() # Initially empty cache
 
-            # parameters for inference and optimization
-            inf_params,
-            opt_params,
-        )
-    end
+    return NativeInterpreter(world, method_table, inf_cache, inf_params, opt_params)
+end
+
+function NativeInterpreter(interp::NativeInterpreter;
+                           world::UInt = interp.world,
+                           method_table::CachedMethodTable{InternalMethodTable} = interp.method_table,
+                           inf_cache::Vector{InferenceResult} = interp.inf_cache,
+                           inf_params::InferenceParams = interp.inf_params,
+                           opt_params::OptimizationParams = interp.opt_params)
+    return NativeInterpreter(world, method_table, inf_cache, inf_params, opt_params)
 end
 
 # Quickly and easily satisfy the AbstractInterpreter API contract
-InferenceParams(ni::NativeInterpreter) = ni.inf_params
-OptimizationParams(ni::NativeInterpreter) = ni.opt_params
-get_world_counter(ni::NativeInterpreter) = ni.world
-get_inference_cache(ni::NativeInterpreter) = ni.cache
-code_cache(ni::NativeInterpreter) = WorldView(GLOBAL_CI_CACHE, get_world_counter(ni))
+InferenceParams(interp::NativeInterpreter) = interp.inf_params
+OptimizationParams(interp::NativeInterpreter) = interp.opt_params
+get_world_counter(interp::NativeInterpreter) = interp.world
+get_inference_cache(interp::NativeInterpreter) = interp.inf_cache
+code_cache(interp::NativeInterpreter) = WorldView(GLOBAL_CI_CACHE, get_world_counter(interp))
 
 """
     already_inferred_quick_test(::AbstractInterpreter, ::MethodInstance)
@@ -251,6 +459,7 @@ External `AbstractInterpreter` can optionally return `OverlayMethodTable` here
 to incorporate customized dispatches for the overridden methods.
 """
 method_table(interp::AbstractInterpreter) = InternalMethodTable(get_world_counter(interp))
+method_table(interp::NativeInterpreter) = interp.method_table
 
 """
 By default `AbstractInterpreter` implements the following inference bail out logic:
@@ -276,3 +485,21 @@ to the call site signature.
 """
 infer_compilation_signature(::AbstractInterpreter) = false
 infer_compilation_signature(::NativeInterpreter) = true
+
+typeinf_lattice(::AbstractInterpreter) = InferenceLattice(BaseInferenceLattice.instance)
+ipo_lattice(::AbstractInterpreter) = InferenceLattice(IPOResultLattice.instance)
+optimizer_lattice(::AbstractInterpreter) = SimpleInferenceLattice.instance
+
+abstract type CallInfo end
+
+@nospecialize
+
+nsplit(info::CallInfo) = nsplit_impl(info)::Union{Nothing,Int}
+getsplit(info::CallInfo, idx::Int) = getsplit_impl(info, idx)::MethodLookupResult
+getresult(info::CallInfo, idx::Int) = getresult_impl(info, idx)
+
+nsplit_impl(::CallInfo) = nothing
+getsplit_impl(::CallInfo, ::Int) = error("unexpected call into `getsplit`")
+getresult_impl(::CallInfo, ::Int) = nothing
+
+@specialize
diff --git a/base/compiler/typeutils.jl b/base/compiler/typeutils.jl
index 75675e60e1ca4..a4499e003cf2c 100644
--- a/base/compiler/typeutils.jl
+++ b/base/compiler/typeutils.jl
@@ -4,8 +4,6 @@
 # lattice utilities #
 #####################
 
-isType(@nospecialize t) = isa(t, DataType) && t.name === _TYPE_NAME
-
 # true if Type{T} is inlineable as constant T
 # requires that T is a singleton, s.t. T == S implies T === S
 isconstType(@nospecialize t) = isType(t) && hasuniquerep(t.parameters[1])
@@ -25,15 +23,30 @@ function hasuniquerep(@nospecialize t)
     return false
 end
 
-function has_nontrivial_const_info(@nospecialize t)
-    isa(t, PartialStruct) && return true
-    isa(t, PartialOpaque) && return true
-    isa(t, Const) || return false
-    val = t.val
-    return !isdefined(typeof(val), :instance) && !(isa(val, Type) && hasuniquerep(val))
+"""
+    isTypeDataType(@nospecialize t) -> Bool
+
+For a type `t` test whether ∀S s.t. `isa(S, rewrap_unionall(Type{t}, ...))`,
+we have `isa(S, DataType)`. In particular, if a statement is typed as `Type{t}`
+(potentially wrapped in some `UnionAll`), then we are guaranteed that this statement
+will be a `DataType` at runtime (and not e.g. a `Union` or `UnionAll` typeequal to it).
+"""
+function isTypeDataType(@nospecialize t)
+    isa(t, DataType) || return false
+    isType(t) && return false
+    # Could be Union{} at runtime
+    t === Core.TypeofBottom && return false
+    if t.name === Tuple.name
+        # If we have a Union parameter, could have been redistributed at runtime,
+        # e.g. `Tuple{Union{Int, Float64}, Int}` is a DataType, but
+        # `Union{Tuple{Int, Int}, Tuple{Float64, Int}}` is typeequal to it and
+        # is not.
+        return all(isTypeDataType, t.parameters)
+    end
+    return true
 end
 
-has_const_info(@nospecialize x) = (!isa(x, Type) && !isvarargtype(x)) || isType(x)
+has_extended_info(@nospecialize x) = (!isa(x, Type) && !isvarargtype(x)) || isType(x)
 
 # Subtyping currently intentionally answers certain queries incorrectly for kind types. For
 # some of these queries, this check can be used to somewhat protect against making incorrect
@@ -53,7 +66,7 @@ end
 # (therefore also a lower bound on the number of fields)
 function datatype_min_ninitialized(t::DataType)
     isabstracttype(t) && return 0
-    if t.name === NamedTuple_typename
+    if t.name === _NAMEDTUPLE_NAME
         names, types = t.parameters[1], t.parameters[2]
         if names isa Tuple
             return length(names)
@@ -80,14 +93,15 @@ function datatype_min_ninitialized(t::DataType)
     return length(t.name.names) - t.name.n_uninitialized
 end
 
-has_concrete_subtype(d::DataType) = d.flags & 0x20 == 0x20 # n.b. often computed only after setting the type and layout fields
+has_concrete_subtype(d::DataType) = d.flags & 0x0020 == 0x0020 # n.b. often computed only after setting the type and layout fields
 
-# determine whether x is a valid lattice element tag
+# determine whether x is a valid lattice element
 # For example, Type{v} is not valid if v is a value
-# Accepts TypeVars also, since it assumes the user will rewrap it correctly
-function valid_as_lattice(@nospecialize(x))
+# Accepts TypeVars and has_free_typevar also, since it assumes the user will rewrap it correctly
+# If astag is true, then also requires that it be a possible type tag for a valid object
+function valid_as_lattice(@nospecialize(x), astag::Bool=false)
     x === Bottom && false
-    x isa TypeVar && return valid_as_lattice(x.ub)
+    x isa TypeVar && return valid_as_lattice(x.ub, astag)
     x isa UnionAll && (x = unwrap_unionall(x))
     if x isa Union
         # the Union constructor ensures this (and we'll recheck after
@@ -98,52 +112,64 @@ function valid_as_lattice(@nospecialize(x))
         if isType(x)
             p = x.parameters[1]
             p isa Type || p isa TypeVar || return false
+        elseif astag && isstructtype(x)
+            datatype_fieldtypes(x) # force computation of has_concrete_subtype to be updated now
+            return has_concrete_subtype(x)
         end
         return true
     end
     return false
 end
 
-# test if non-Type, non-TypeVar `x` can be used to parameterize a type
-function valid_tparam(@nospecialize(x))
-    if isa(x, Tuple)
-        for t in x
-            isa(t, Symbol) || isbits(t) || return false
+function valid_typeof_tparam(@nospecialize(t))
+    if t === Symbol || t === Module || isbitstype(t)
+        return true
+    end
+    isconcretetype(t) || return false
+    if t <: NamedTuple
+        t = t.parameters[2]::DataType
+    end
+    if t <: Tuple
+        for p in t.parameters
+            valid_typeof_tparam(p) || return false
         end
         return true
     end
-    return isa(x, Symbol) || isbits(x)
+    return false
 end
 
+# test if non-Type, non-TypeVar `x` can be used to parameterize a type
+valid_tparam(@nospecialize(x)) = valid_typeof_tparam(typeof(x))
+
 function compatible_vatuple(a::DataType, b::DataType)
     vaa = a.parameters[end]
-    vab = a.parameters[end]
+    vab = b.parameters[end]
     if !(isvarargtype(vaa) && isvarargtype(vab))
         return isvarargtype(vaa) == isvarargtype(vab)
     end
-    (isdefined(vaa, :N) == isdefined(vab, :N)) || return false
-    !isdefined(vaa, :N) && return true
+    isdefined(vaa, :N) || return !isdefined(vab, :N)
+    isdefined(vab, :N) || return false
     return vaa.N === vab.N
 end
 
 # return an upper-bound on type `a` with type `b` removed
+# and also any contents that are not valid type tags on any objects
 # such that `return <: a` && `Union{return, b} == Union{a, b}`
-function typesubtract(@nospecialize(a), @nospecialize(b), MAX_UNION_SPLITTING::Int)
+function typesubtract(@nospecialize(a), @nospecialize(b), max_union_splitting::Int)
     if a <: b && isnotbrokensubtype(a, b)
         return Bottom
     end
     ua = unwrap_unionall(a)
     if isa(ua, Union)
-        uua = typesubtract(rewrap_unionall(ua.a, a), b, MAX_UNION_SPLITTING)
-        uub = typesubtract(rewrap_unionall(ua.b, a), b, MAX_UNION_SPLITTING)
-        return Union{valid_as_lattice(uua) ? uua : Union{},
-                     valid_as_lattice(uub) ? uub : Union{}}
+        uua = typesubtract(rewrap_unionall(ua.a, a), b, max_union_splitting)
+        uub = typesubtract(rewrap_unionall(ua.b, a), b, max_union_splitting)
+        return Union{valid_as_lattice(uua, true) ? uua : Union{},
+                     valid_as_lattice(uub, true) ? uub : Union{}}
     elseif a isa DataType
         ub = unwrap_unionall(b)
         if ub isa DataType
-            if a.name === ub.name === Tuple.name &&
-                    length(a.parameters) == length(ub.parameters)
-                if 1 < unionsplitcost(a.parameters) <= MAX_UNION_SPLITTING
+            if a.name === ub.name === Tuple.name && length(a.parameters) == length(ub.parameters)
+                if 1 < unionsplitcost(JLTypeLattice(), a.parameters) <= max_union_splitting
                     ta = switchtupleunion(a)
                     return typesubtract(Union{ta...}, b, 0)
                 elseif b isa DataType
@@ -164,7 +190,8 @@ function typesubtract(@nospecialize(a), @nospecialize(b), MAX_UNION_SPLITTING::I
                             ap = a.parameters[i]
                             bp = b.parameters[i]
                             (isvarargtype(ap) || isvarargtype(bp)) && return a
-                            ta[i] = typesubtract(ap, bp, min(2, MAX_UNION_SPLITTING))
+                            ta[i] = typesubtract(ap, bp, min(2, max_union_splitting))
+                            ta[i] === Union{} && return Union{}
                             return Tuple{ta...}
                         end
                     end
@@ -190,11 +217,11 @@ end
 _typename(union::UnionAll) = _typename(union.body)
 _typename(a::DataType) = Const(a.name)
 
-function tuple_tail_elem(@nospecialize(init), ct::Vector{Any})
+function tuple_tail_elem(𝕃::AbstractLattice, @nospecialize(init), ct::Vector{Any})
     t = init
     for x in ct
         # FIXME: this is broken: it violates subtyping relations and creates invalid types with free typevars
-        t = tmerge(t, unwraptv(unwrapva(x)))
+        t = tmerge(𝕃, t, unwraptv(unwrapva(x)))
     end
     return Vararg{widenconst(t)}
 end
@@ -205,10 +232,13 @@ end
 # or outside of the Tuple/Union nesting, though somewhat more expensive to be
 # outside than inside because the representation is larger (because and it
 # informs the callee whether any splitting is possible).
-function unionsplitcost(argtypes::Union{SimpleVector,Vector{Any}})
+function unionsplitcost(𝕃::AbstractLattice, argtypes::Union{SimpleVector,Vector{Any}})
     nu = 1
     max = 2
     for ti in argtypes
+        if has_extended_unionsplit(𝕃) && !isvarargtype(ti)
+            ti = widenconst(ti)
+        end
         if isa(ti, Union)
             nti = unionlen(ti)
             if nti > max
@@ -226,12 +256,12 @@ end
 # and `Union{return...} == ty`
 function switchtupleunion(@nospecialize(ty))
     tparams = (unwrap_unionall(ty)::DataType).parameters
-    return _switchtupleunion(Any[tparams...], length(tparams), [], ty)
+    return _switchtupleunion(JLTypeLattice(), Any[tparams...], length(tparams), [], ty)
 end
 
-switchtupleunion(argtypes::Vector{Any}) = _switchtupleunion(argtypes, length(argtypes), [], nothing)
+switchtupleunion(𝕃::AbstractLattice, argtypes::Vector{Any}) = _switchtupleunion(𝕃, argtypes, length(argtypes), [], nothing)
 
-function _switchtupleunion(t::Vector{Any}, i::Int, tunion::Vector{Any}, @nospecialize(origt))
+function _switchtupleunion(𝕃::AbstractLattice, t::Vector{Any}, i::Int, tunion::Vector{Any}, @nospecialize(origt))
     if i == 0
         if origt === nothing
             push!(tunion, copy(t))
@@ -240,15 +270,22 @@ function _switchtupleunion(t::Vector{Any}, i::Int, tunion::Vector{Any}, @nospeci
             push!(tunion, tpl)
         end
     else
-        ti = t[i]
+        origti = ti = t[i]
+        # TODO remove this to implement callsite refinement of MustAlias
         if isa(ti, Union)
-            for ty in uniontypes(ti::Union)
+            for ty in uniontypes(ti)
                 t[i] = ty
-                _switchtupleunion(t, i - 1, tunion, origt)
+                _switchtupleunion(𝕃, t, i - 1, tunion, origt)
             end
-            t[i] = ti
+            t[i] = origti
+        elseif has_extended_unionsplit(𝕃) && !isa(ti, Const) && !isvarargtype(ti) && isa(widenconst(ti), Union)
+            for ty in uniontypes(ti)
+                t[i] = ty
+                _switchtupleunion(𝕃, t, i - 1, tunion, origt)
+            end
+            t[i] = origti
         else
-            _switchtupleunion(t, i - 1, tunion, origt)
+            _switchtupleunion(𝕃, t, i - 1, tunion, origt)
         end
     end
     return tunion
@@ -259,7 +296,7 @@ end
 unioncomplexity(@nospecialize x) = _unioncomplexity(x)::Int
 function _unioncomplexity(@nospecialize x)
     if isa(x, DataType)
-        x.name === Tuple.name || isvarargtype(x) || return 0
+        x.name === Tuple.name || return 0
         c = 0
         for ti in x.parameters
             c = max(c, unioncomplexity(ti))
@@ -270,33 +307,76 @@ function _unioncomplexity(@nospecialize x)
     elseif isa(x, UnionAll)
         return max(unioncomplexity(x.body), unioncomplexity(x.var.ub))
     elseif isa(x, TypeofVararg)
-        return isdefined(x, :T) ? unioncomplexity(x.T) : 0
+        return isdefined(x, :T) ? unioncomplexity(x.T) + 1 : 1
     else
         return 0
     end
 end
 
-# convert a Union of Tuple types to a Tuple of Unions
-function unswitchtupleunion(u::Union)
-    ts = uniontypes(u)
-    n = -1
-    for t in ts
-        if t isa DataType && t.name === Tuple.name && length(t.parameters) != 0 && !isvarargtype(t.parameters[end])
-            if n == -1
-                n = length(t.parameters)
-            elseif n != length(t.parameters)
-                return u
-            end
-        else
-            return u
-        end
+function unionall_depth(@nospecialize ua) # aka subtype_env_size
+    depth = 0
+    while ua isa UnionAll
+        depth += 1
+        ua = ua.body
     end
-    Tuple{Any[ Union{Any[(t::DataType).parameters[i] for t in ts]...} for i in 1:n ]...}
+    return depth
 end
 
-function unwraptv(@nospecialize t)
+function unwraptv_ub(@nospecialize t)
     while isa(t, TypeVar)
         t = t.ub
     end
     return t
 end
+function unwraptv_lb(@nospecialize t)
+    while isa(t, TypeVar)
+        t = t.lb
+    end
+    return t
+end
+const unwraptv = unwraptv_ub
+
+"""
+    is_identity_free_argtype(argtype) -> Bool
+
+Return `true` if the `argtype` object is identity free in the sense that this type or any
+reachable through its fields has non-content-based identity (see `Base.isidentityfree`).
+This query is specifically designed for `adjust_effects`, enabling it to refine the
+`:consistent` effect property tainted by mutable allocation(s) within the analyzed call
+graph when the return value type is `is_identity_free_argtype`, ensuring that the allocated
+mutable objects are never returned.
+"""
+is_identity_free_argtype(@nospecialize ty) = is_identity_free_type(widenconst(ignorelimited(ty)))
+is_identity_free_type(@nospecialize ty) = isidentityfree(ty)
+
+"""
+    is_immutable_argtype(argtype) -> Bool
+
+Return `true` if the `argtype` object is known to be immutable.
+This query is specifically designed for `getfield_effects` and `isdefined_effects`, allowing
+them to prove `:consistent`-cy of `getfield` / `isdefined` calls when applied to immutable
+objects. Otherwise, we need to additionally prove that the non-immutable object is not a
+global object to prove the `:consistent`-cy.
+"""
+is_immutable_argtype(@nospecialize argtype) = is_immutable_type(widenconst(ignorelimited(argtype)))
+is_immutable_type(@nospecialize ty) = _is_immutable_type(unwrap_unionall(ty))
+function _is_immutable_type(@nospecialize ty)
+    if isa(ty, Union)
+        return _is_immutable_type(ty.a) && _is_immutable_type(ty.b)
+    end
+    return !isabstracttype(ty) && !ismutabletype(ty)
+end
+
+"""
+    is_mutation_free_argtype(argtype) -> Bool
+
+Return `true` if `argtype` object is mutation free in the sense that no mutable memory
+is reachable from this type (either in the type itself) or through any fields
+(see `Base.ismutationfree`).
+This query is specifically written for analyzing the `:inaccessiblememonly` effect property
+and is supposed to improve the analysis accuracy by not tainting the `:inaccessiblememonly`
+property when there is access to mutation-free global object.
+"""
+is_mutation_free_argtype(@nospecialize(argtype)) =
+    is_mutation_free_type(widenconst(ignorelimited(argtype)))
+is_mutation_free_type(@nospecialize ty) = ismutationfree(ty)
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index 7ef006f244aa6..d8ca4d9551656 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -66,8 +66,6 @@ end
 is_meta_expr_head(head::Symbol) = head === :boundscheck || head === :meta || head === :loopinfo
 is_meta_expr(@nospecialize x) = isa(x, Expr) && is_meta_expr_head(x.head)
 
-sym_isless(a::Symbol, b::Symbol) = ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b) < 0
-
 function is_self_quoting(@nospecialize(x))
     return isa(x,Number) || isa(x,AbstractString) || isa(x,Tuple) || isa(x,Type) ||
         isa(x,Char) || x === nothing || isa(x,Function)
@@ -77,15 +75,21 @@ function quoted(@nospecialize(x))
     return is_self_quoting(x) ? x : QuoteNode(x)
 end
 
+############
+# inlining #
+############
+
+const MAX_INLINE_CONST_SIZE = 256
+
 function count_const_size(@nospecialize(x), count_self::Bool = true)
-    (x isa Type || x isa Symbol) && return 0
+    (x isa Type || x isa Core.TypeName || x isa Symbol) && return 0
     ismutable(x) && return MAX_INLINE_CONST_SIZE + 1
     isbits(x) && return Core.sizeof(x)
     dt = typeof(x)
     sz = count_self ? sizeof(dt) : 0
     sz > MAX_INLINE_CONST_SIZE && return MAX_INLINE_CONST_SIZE + 1
     dtfd = DataTypeFieldDesc(dt)
-    for i = 1:nfields(x)
+    for i = 1:Int(datatype_nfields(dt))
         isdefined(x, i) || continue
         f = getfield(x, i)
         if !dtfd[i].isptr && datatype_pointerfree(typeof(f))
@@ -101,6 +105,10 @@ function is_inlineable_constant(@nospecialize(x))
     return count_const_size(x) <= MAX_INLINE_CONST_SIZE
 end
 
+is_nospecialized(method::Method) = method.nospecialize ≠ 0
+
+is_nospecializeinfer(method::Method) = method.nospecializeinfer && is_nospecialized(method)
+
 ###########################
 # MethodInstance/CodeInfo #
 ###########################
@@ -108,28 +116,31 @@ end
 invoke_api(li::CodeInstance) = ccall(:jl_invoke_api, Cint, (Any,), li)
 use_const_api(li::CodeInstance) = invoke_api(li) == 2
 
-function get_staged(mi::MethodInstance)
+function get_staged(mi::MethodInstance, world::UInt)
     may_invoke_generator(mi) || return nothing
     try
         # user code might throw errors – ignore them
-        ci = ccall(:jl_code_for_staged, Any, (Any,), mi)::CodeInfo
+        ci = ccall(:jl_code_for_staged, Any, (Any, UInt), mi, world)::CodeInfo
         return ci
     catch
         return nothing
     end
 end
 
-function retrieve_code_info(linfo::MethodInstance)
+function retrieve_code_info(linfo::MethodInstance, world::UInt)
     m = linfo.def::Method
     c = nothing
     if isdefined(m, :generator)
         # user code might throw errors – ignore them
-        c = get_staged(linfo)
+        c = get_staged(linfo, world)
     end
     if c === nothing && isdefined(m, :source)
         src = m.source
-        if isa(src, Array{UInt8,1})
-            c = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), m, C_NULL, src)
+        if src === nothing
+            # can happen in images built with --strip-ir
+            return nothing
+        elseif isa(src, String)
+            c = _uncompressed_ir(m, src)
         else
             c = copy(src::CodeInfo)
         end
@@ -143,14 +154,22 @@ end
 
 function get_compileable_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
     isa(atype, DataType) || return nothing
-    mt = ccall(:jl_method_table_for, Any, (Any,), atype)
+    mt = ccall(:jl_method_get_table, Any, (Any,), method)
     mt === nothing && return nothing
-    return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any),
-        mt, atype, sparams, method)
+    return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any, Cint),
+        mt, atype, sparams, method, #=int return_if_compileable=#1)
+end
+
+function get_nospecializeinfer_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    isa(atype, DataType) || return method.sig
+    mt = ccall(:jl_method_get_table, Any, (Any,), method)
+    mt === nothing && return method.sig
+    return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any, Cint),
+        mt, atype, sparams, method, #=int return_if_compileable=#0)
 end
 
-isa_compileable_sig(@nospecialize(atype), method::Method) =
-    !iszero(ccall(:jl_isa_compileable_sig, Int32, (Any, Any), atype, method))
+isa_compileable_sig(@nospecialize(atype), sparams::SimpleVector, method::Method) =
+    !iszero(ccall(:jl_isa_compileable_sig, Int32, (Any, Any, Any), atype, sparams, method))
 
 # eliminate UnionAll vars that might be degenerate due to having identical bounds,
 # or a concrete upper bound and appearing covariantly.
@@ -173,6 +192,8 @@ function subst_trivial_bounds(@nospecialize(atype))
     return UnionAll(v, subst_trivial_bounds(atype.body))
 end
 
+has_typevar(@nospecialize(t), v::TypeVar) = ccall(:jl_has_typevar, Cint, (Any, Any), t, v) != 0
+
 # If removing trivial vars from atype results in an equivalent type, use that
 # instead. Otherwise we can get a case like issue #38888, where a signature like
 #   f(x::S) where S<:Int
@@ -188,14 +209,12 @@ function normalize_typevars(method::Method, @nospecialize(atype), sparams::Simpl
 end
 
 # get a handle to the unique specialization object representing a particular instantiation of a call
-function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false, compilesig::Bool=false)
+@inline function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false)
     if isa(atype, UnionAll)
         atype, sparams = normalize_typevars(method, atype, sparams)
     end
-    if compilesig
-        new_atype = get_compileable_sig(method, atype, sparams)
-        new_atype === nothing && return nothing
-        atype = new_atype
+    if is_nospecializeinfer(method)
+        atype = get_nospecializeinfer_sig(method, atype, sparams)
     end
     if preexisting
         # check cached specializations
@@ -209,21 +228,145 @@ function specialize_method(match::MethodMatch; kwargs...)
     return specialize_method(match.method, match.spec_types, match.sparams; kwargs...)
 end
 
+"""
+    is_declared_inline(method::Method) -> Bool
+
+Check if `method` is declared as `@inline`.
+"""
+is_declared_inline(method::Method) = _is_declared_inline(method, true)
+
+"""
+    is_declared_noinline(method::Method) -> Bool
+
+Check if `method` is declared as `@noinline`.
+"""
+is_declared_noinline(method::Method) = _is_declared_inline(method, false)
+
+function _is_declared_inline(method::Method, inline::Bool)
+    isdefined(method, :source) || return false
+    src = method.source
+    isa(src, MaybeCompressed) || return false
+    return (inline ? is_declared_inline : is_declared_noinline)(src)
+end
+
+"""
+    is_aggressive_constprop(method::Union{Method,CodeInfo}) -> Bool
+
+Check if `method` is declared as `Base.@constprop :aggressive`.
+"""
+is_aggressive_constprop(method::Union{Method,CodeInfo}) = method.constprop == 0x01
+
+"""
+    is_no_constprop(method::Union{Method,CodeInfo}) -> Bool
+
+Check if `method` is declared as `Base.@constprop :none`.
+"""
+is_no_constprop(method::Union{Method,CodeInfo}) = method.constprop == 0x02
+
+#############
+# backedges #
+#############
+
+"""
+    BackedgeIterator(backedges::Vector{Any})
+
+Return an iterator over a list of backedges. Iteration returns `(sig, caller)` elements,
+which will be one of the following:
+
+- `BackedgePair(nothing, caller::MethodInstance)`: a call made by ordinary inferable dispatch
+- `BackedgePair(invokesig::Type, caller::MethodInstance)`: a call made by `invoke(f, invokesig, args...)`
+- `BackedgePair(specsig::Type, mt::MethodTable)`: an abstract call
+
+# Examples
+
+```julia
+julia> callme(x) = x+1
+callme (generic function with 1 method)
+
+julia> callyou(x) = callme(x)
+callyou (generic function with 1 method)
+
+julia> callyou(2.0)
+3.0
+
+julia> mi = which(callme, (Any,)).specializations
+MethodInstance for callme(::Float64)
+
+julia> @eval Core.Compiler for (; sig, caller) in BackedgeIterator(Main.mi.backedges)
+           println(sig)
+           println(caller)
+       end
+nothing
+callyou(Float64) from callyou(Any)
+```
+"""
+struct BackedgeIterator
+    backedges::Vector{Any}
+end
+
+const empty_backedge_iter = BackedgeIterator(Any[])
+
+struct BackedgePair
+    sig # ::Union{Nothing,Type}
+    caller::Union{MethodInstance,MethodTable}
+    BackedgePair(@nospecialize(sig), caller::Union{MethodInstance,MethodTable}) = new(sig, caller)
+end
+
+function iterate(iter::BackedgeIterator, i::Int=1)
+    backedges = iter.backedges
+    i > length(backedges) && return nothing
+    item = backedges[i]
+    isa(item, MethodInstance) && return BackedgePair(nothing, item), i+1      # regular dispatch
+    isa(item, MethodTable) && return BackedgePair(backedges[i+1], item), i+2  # abstract dispatch
+    return BackedgePair(item, backedges[i+1]::MethodInstance), i+2            # `invoke` calls
+end
+
+"""
+    add_invalidation_callback!(callback, mi::MethodInstance)
+
+Register `callback` to be triggered upon the invalidation of `mi`.
+`callback` should a function taking two arguments, `callback(replaced::MethodInstance, max_world::UInt32)`,
+and it will be recursively invoked on `MethodInstance`s within the invalidation graph.
+"""
+function add_invalidation_callback!(@nospecialize(callback), mi::MethodInstance)
+    if !isdefined(mi, :callbacks)
+        callbacks = mi.callbacks = Any[callback]
+    else
+        callbacks = mi.callbacks::Vector{Any}
+        if !any(@nospecialize(cb)->cb===callback, callbacks)
+            push!(callbacks, callback)
+        end
+    end
+    return callbacks
+end
+
 #########
 # types #
 #########
 
-function singleton_type(@nospecialize(ft))
+@nospecializeinfer function singleton_type(@nospecialize(ft))
+    ft = widenslotwrapper(ft)
     if isa(ft, Const)
         return ft.val
     elseif isconstType(ft)
         return ft.parameters[1]
-    elseif ft isa DataType && isdefined(ft, :instance)
+    elseif issingletontype(ft)
         return ft.instance
     end
     return nothing
 end
 
+@nospecializeinfer function maybe_singleton_const(@nospecialize(t))
+    if isa(t, DataType)
+        if issingletontype(t)
+            return Const(t.instance)
+        elseif isconstType(t)
+            return Const(t.parameters[1])
+        end
+    end
+    return t
+end
+
 ###################
 # SSAValues/Slots #
 ###################
@@ -239,7 +382,7 @@ function ssamap(f, @nospecialize(stmt))
     return urs[]
 end
 
-function foreachssa(f, @nospecialize(stmt))
+function foreachssa(@specialize(f), @nospecialize(stmt))
     urs = userefs(stmt)
     for op in urs
         val = op[]
@@ -249,11 +392,22 @@ function foreachssa(f, @nospecialize(stmt))
     end
 end
 
+function foreach_anyssa(@specialize(f), @nospecialize(stmt))
+    urs = userefs(stmt)
+    for op in urs
+        val = op[]
+        if isa(val, AnySSAValue)
+            f(val)
+        end
+    end
+end
+
 function find_ssavalue_uses(body::Vector{Any}, nvals::Int)
     uses = BitSet[ BitSet() for i = 1:nvals ]
     for line in 1:length(body)
         e = body[line]
         if isa(e, ReturnNode)
+            isdefined(e, :val) || continue
             e = e.val
         elseif isa(e, GotoIfNot)
             e = e.cond
@@ -292,11 +446,14 @@ function find_ssavalue_uses(e::PhiNode, uses::Vector{BitSet}, line::Int)
     end
 end
 
-function is_throw_call(e::Expr)
+function is_throw_call(e::Expr, code::Vector{Any})
     if e.head === :call
         f = e.args[1]
+        if isa(f, SSAValue)
+            f = code[f.id]
+        end
         if isa(f, GlobalRef)
-            ff = abstract_eval_global(f.mod, f.name)
+            ff = abstract_eval_globalref_type(f)
             if isa(ff, Const) && ff.val === Core.throw
                 return true
             end
@@ -305,14 +462,14 @@ function is_throw_call(e::Expr)
     return false
 end
 
-function mark_throw_blocks!(src::CodeInfo, handler_at::Vector{Int})
+function mark_throw_blocks!(src::CodeInfo, handler_at::Vector{Tuple{Int, Int}})
     for stmt in find_throw_blocks(src.code, handler_at)
         src.ssaflags[stmt] |= IR_FLAG_THROW_BLOCK
     end
     return nothing
 end
 
-function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Int})
+function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Tuple{Int, Int}})
     stmts = BitSet()
     n = length(code)
     for i in n:-1:1
@@ -324,8 +481,8 @@ function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Int})
                 end
             elseif s.head === :return
                 # see `ReturnNode` handling
-            elseif is_throw_call(s)
-                if handler_at[i] == 0
+            elseif is_throw_call(s, code)
+                if handler_at[i][1] == 0
                     push!(stmts, i)
                 end
             elseif i+1 in stmts
@@ -351,39 +508,9 @@ function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Int})
 end
 
 # using a function to ensure we can infer this
-@inline slot_id(s) = isa(s, SlotNumber) ? (s::SlotNumber).id :
-    isa(s, Argument) ? (s::Argument).n : (s::TypedSlot).id
-
-######################
-# IncrementalCompact #
-######################
-
-# specifically meant to be used with body1 = compact.result and body2 = compact.new_new_nodes, with nvals == length(compact.used_ssas)
-function find_ssavalue_uses1(compact)
-    body1, body2 = compact.result.inst, compact.new_new_nodes.stmts.inst
-    nvals = length(compact.used_ssas)
-    nbody1 = length(body1)
-    nbody2 = length(body2)
-
-    uses = zeros(Int, nvals)
-    function increment_uses(ssa::SSAValue)
-        uses[ssa.id] += 1
-    end
-
-    for line in 1:(nbody1 + nbody2)
-        # index into the right body
-        if line <= nbody1
-            isassigned(body1, line) || continue
-            e = body1[line]
-        else
-            line -= nbody1
-            isassigned(body2, line) || continue
-            e = body2[line]
-        end
-
-        foreachssa(increment_uses, e)
-    end
-    return uses
+@inline function slot_id(s)
+    isa(s, SlotNumber) && return s.id
+    return (s::Argument).n
 end
 
 ###########
@@ -394,7 +521,7 @@ is_root_module(m::Module) = false
 
 inlining_enabled() = (JLOptions().can_inline == 1)
 function coverage_enabled(m::Module)
-    ccall(:jl_generating_output, Cint, ()) == 0 || return false # don't alter caches
+    generating_output() && return false # don't alter caches
     cov = JLOptions().code_coverage
     if cov == 1 # user
         m = moduleroot(m)
diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl
index 0931686184a2e..2428ea8a38892 100644
--- a/base/compiler/validation.jl
+++ b/base/compiler/validation.jl
@@ -13,13 +13,13 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :new => 1:typemax(Int),
     :splatnew => 2:2,
     :the_exception => 0:0,
-    :enter => 1:1,
-    :leave => 1:1,
+    :enter => 1:2,
+    :leave => 1:typemax(Int),
     :pop_exception => 1:1,
     :inbounds => 1:1,
     :inline => 1:1,
     :noinline => 1:1,
-    :boundscheck => 0:0,
+    :boundscheck => 0:1,
     :copyast => 1:1,
     :meta => 0:typemax(Int),
     :global => 1:1,
@@ -103,11 +103,11 @@ function _validate_val!(@nospecialize(x), errors, ssavals::BitSet)
 end
 
 """
-    validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo)
+    validate_code!(errors::Vector{InvalidCodeError}, c::CodeInfo)
 
 Validate `c`, logging any violation by pushing an `InvalidCodeError` into `errors`.
 """
-function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_level::Bool = false)
+function validate_code!(errors::Vector{InvalidCodeError}, c::CodeInfo, is_top_level::Bool = false)
     ssavals = BitSet()
     lhs_slotnums = BitSet()
 
@@ -160,6 +160,13 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_
                 push!(errors, InvalidCodeError(INVALID_CALL_ARG, x.cond))
             end
             validate_val!(x.cond)
+        elseif isa(x, EnterNode)
+            if isdefined(x, :scope)
+                if !is_valid_argument(x.scope)
+                    push!(errors, InvalidCodeError(INVALID_CALL_ARG, x.scope))
+                end
+                validate_val!(x.scope)
+            end
         elseif isa(x, ReturnNode)
             if isdefined(x, :val)
                 if !is_valid_return(x.val)
@@ -199,16 +206,15 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_
 end
 
 """
-    validate_code!(errors::Vector{>:InvalidCodeError}, mi::MethodInstance,
-                   c::Union{Nothing,CodeInfo} = Core.Compiler.retrieve_code_info(mi))
+    validate_code!(errors::Vector{InvalidCodeError}, mi::MethodInstance,
+                   c::Union{Nothing,CodeInfo})
 
 Validate `mi`, logging any violation by pushing an `InvalidCodeError` into `errors`.
 
 If `isa(c, CodeInfo)`, also call `validate_code!(errors, c)`. It is assumed that `c` is
-the `CodeInfo` instance associated with `mi`.
+a `CodeInfo` instance associated with `mi`.
 """
-function validate_code!(errors::Vector{>:InvalidCodeError}, mi::Core.MethodInstance,
-                        c::Union{Nothing,CodeInfo} = Core.Compiler.retrieve_code_info(mi))
+function validate_code!(errors::Vector{InvalidCodeError}, mi::Core.MethodInstance, c::Union{Nothing,CodeInfo})
     is_top_level = mi.def isa Module
     if is_top_level
         mnargs = 0
@@ -231,13 +237,13 @@ end
 
 validate_code(args...) = validate_code!(Vector{InvalidCodeError}(), args...)
 
-is_valid_lvalue(@nospecialize(x)) = isa(x, Slot) || isa(x, GlobalRef)
+is_valid_lvalue(@nospecialize(x)) = isa(x, SlotNumber) || isa(x, GlobalRef)
 
 function is_valid_argument(@nospecialize(x))
-    if isa(x, Slot) || isa(x, Argument) || isa(x, SSAValue) || isa(x, GlobalRef) || isa(x, QuoteNode) ||
-        (isa(x,Expr) && (x.head in (:static_parameter, :boundscheck))) ||
-        isa(x, Number) || isa(x, AbstractString) || isa(x, AbstractChar) || isa(x, Tuple) ||
-        isa(x, Type) || isa(x, Core.Box) || isa(x, Module) || x === nothing
+    if isa(x, SlotNumber) || isa(x, Argument) || isa(x, SSAValue) ||
+       isa(x, GlobalRef) || isa(x, QuoteNode) || (isa(x, Expr) && is_value_pos_expr_head(x.head))  ||
+       isa(x, Number) || isa(x, AbstractString) || isa(x, AbstractChar) || isa(x, Tuple) ||
+       isa(x, Type) || isa(x, Core.Box) || isa(x, Module) || x === nothing
         return true
     end
     # TODO: consider being stricter about what needs to be wrapped with QuoteNode
@@ -254,5 +260,3 @@ function is_valid_rvalue(@nospecialize(x))
 end
 
 is_valid_return(@nospecialize(x)) = is_valid_argument(x) || (isa(x, Expr) && x.head === :lambda)
-
-is_flag_set(byte::UInt8, flag::UInt8) = (byte & flag) == flag
diff --git a/base/complex.jl b/base/complex.jl
index f68e519386d93..148bbdcf11f5e 100644
--- a/base/complex.jl
+++ b/base/complex.jl
@@ -120,6 +120,7 @@ Float64
 real(T::Type) = typeof(real(zero(T)))
 real(::Type{T}) where {T<:Real} = T
 real(C::Type{<:Complex}) = fieldtype(C, 1)
+real(::Type{Union{}}, slurp...) = Union{}(im)
 
 """
     isreal(x) -> Bool
@@ -133,6 +134,9 @@ is true.
 julia> isreal(5.)
 true
 
+julia> isreal(1 - 3im)
+false
+
 julia> isreal(Inf + 0im)
 true
 
@@ -174,7 +178,7 @@ complex(x::Real, y::Real) = Complex(x, y)
     complex(T::Type)
 
 Return an appropriate type which can represent a value of type `T` as a complex number.
-Equivalent to `typeof(complex(zero(T)))`.
+Equivalent to `typeof(complex(zero(T)))` if `T` does not contain `Missing`.
 
 # Examples
 ```jldoctest
@@ -183,6 +187,9 @@ Complex{Int64}
 
 julia> complex(Int)
 Complex{Int64}
+
+julia> complex(Union{Int, Missing})
+Union{Missing, Complex{Int64}}
 ```
 """
 complex(::Type{T}) where {T<:Real} = Complex{T}
@@ -192,7 +199,7 @@ flipsign(x::Complex, y::Real) = ifelse(signbit(y), -x, x)
 
 function show(io::IO, z::Complex)
     r, i = reim(z)
-    compact = get(io, :compact, false)
+    compact = get(io, :compact, false)::Bool
     show(io, r)
     if signbit(i) && !isnan(i)
         print(io, compact ? "-" : " - ")
@@ -241,7 +248,9 @@ bswap(z::Complex) = Complex(bswap(real(z)), bswap(imag(z)))
 ==(z::Complex, x::Real) = isreal(z) && real(z) == x
 ==(x::Real, z::Complex) = isreal(z) && real(z) == x
 
-isequal(z::Complex, w::Complex) = isequal(real(z),real(w)) & isequal(imag(z),imag(w))
+isequal(z::Complex, w::Complex) = isequal(real(z),real(w))::Bool & isequal(imag(z),imag(w))::Bool
+isequal(z::Complex, w::Real) = isequal(real(z),w)::Bool & isequal(imag(z),zero(w))::Bool
+isequal(z::Real, w::Complex) = isequal(z,real(w))::Bool & isequal(zero(z),imag(w))::Bool
 
 in(x::Complex, r::AbstractRange{<:Real}) = isreal(x) && real(x) in r
 
@@ -469,9 +478,13 @@ function inv(z::Complex{T}) where T<:Union{Float16,Float32}
 end
 function inv(w::ComplexF64)
     c, d = reim(w)
-    (isinf(c) | isinf(d)) && return complex(copysign(0.0, c), flipsign(-0.0, d))
     absc, absd = abs(c), abs(d)
-    cd = ifelse(absc>absd, absc, absd) # cheap `max`: don't need sign- and nan-checks here
+    cd, dc = ifelse(absc>absd, (absc, absd), (absd, absc))
+    # no overflow from abs2
+    if sqrt(floatmin(Float64)/2) <= cd <= sqrt(floatmax(Float64)/2)
+        return conj(w) / muladd(cd, cd, dc*dc)
+    end
+    (isinf(c) | isinf(d)) && return complex(copysign(0.0, c), flipsign(-0.0, d))
 
     ϵ  = eps(Float64)
     bs = 2/(ϵ*ϵ)
@@ -490,12 +503,13 @@ function inv(w::ComplexF64)
     else
         q, p = robust_cinv(-d, -c)
     end
-    return ComplexF64(p*s, q*s) # undo scaling
+    return ComplexF64(p*s, q*s)
 end
 function robust_cinv(c::Float64, d::Float64)
     r = d/c
-    p = inv(muladd(d, r, c))
-    q = -r*p
+    z = muladd(d, r, c)
+    p = 1.0/z
+    q = -r/z
     return p, q
 end
 
@@ -591,7 +605,7 @@ julia> cispi(10000)
 1.0 + 0.0im
 
 julia> cispi(0.25 + 1im)
-0.030556854645952924 + 0.030556854645952924im
+0.030556854645954562 + 0.03055685464595456im
 ```
 
 !!! compat "Julia 1.6"
@@ -601,8 +615,9 @@ function cispi end
 cispi(theta::Real) = Complex(reverse(sincospi(theta))...)
 
 function cispi(z::Complex)
-    sipi, copi = sincospi(z)
-    return complex(real(copi) - imag(sipi), imag(copi) + real(sipi))
+    v = exp(-(pi*imag(z)))
+    s, c = sincospi(real(z))
+    Complex(v * c, v * s)
 end
 
 """
@@ -738,7 +753,7 @@ function log1p(z::Complex{T}) where T
         # allegedly due to Kahan, only modified to handle real(u) <= 0
         # differently to avoid inaccuracy near z==-2 and for correct branch cut
         u = one(float(T)) + z
-        u == 1 ? convert(typeof(u), z) : real(u) <= 0 ? log(u) : log(u)*z/(u-1)
+        u == 1 ? convert(typeof(u), z) : real(u) <= 0 ? log(u) : log(u)*(z/(u-1))
     elseif isnan(zr)
         Complex(zr, zr)
     elseif isfinite(zi)
@@ -1063,18 +1078,32 @@ end
 #Requires two different RoundingModes for the real and imaginary components
 """
     round(z::Complex[, RoundingModeReal, [RoundingModeImaginary]])
-    round(z::Complex[, RoundingModeReal, [RoundingModeImaginary]]; digits=, base=10)
-    round(z::Complex[, RoundingModeReal, [RoundingModeImaginary]]; sigdigits=, base=10)
+    round(z::Complex[, RoundingModeReal, [RoundingModeImaginary]]; digits=0, base=10)
+    round(z::Complex[, RoundingModeReal, [RoundingModeImaginary]]; sigdigits, base=10)
 
 Return the nearest integral value of the same type as the complex-valued `z` to `z`,
 breaking ties using the specified [`RoundingMode`](@ref)s. The first
 [`RoundingMode`](@ref) is used for rounding the real components while the
 second is used for rounding the imaginary components.
 
+
+`RoundingModeReal` and `RoundingModeImaginary` default to [`RoundNearest`](@ref),
+which rounds to the nearest integer, with ties (fractional values of 0.5)
+being rounded to the nearest even integer.
+
 # Example
 ```jldoctest
 julia> round(3.14 + 4.5im)
 3.0 + 4.0im
+
+julia> round(3.14 + 4.5im, RoundUp, RoundNearestTiesUp)
+4.0 + 5.0im
+
+julia> round(3.14159 + 4.512im; digits = 1)
+3.1 + 4.5im
+
+julia> round(3.14159 + 4.512im; sigdigits = 3)
+3.14 + 4.51im
 ```
 """
 function round(z::Complex, rr::RoundingMode=RoundNearest, ri::RoundingMode=rr; kwargs...)
diff --git a/base/condition.jl b/base/condition.jl
index 4965b43a7019b..9f62593afaf77 100644
--- a/base/condition.jl
+++ b/base/condition.jl
@@ -78,12 +78,16 @@ islocked(c::GenericCondition) = islocked(c.lock)
 lock(f, c::GenericCondition) = lock(f, c.lock)
 
 # have waiter wait for c
-function _wait2(c::GenericCondition, waiter::Task)
+function _wait2(c::GenericCondition, waiter::Task, first::Bool=false)
     ct = current_task()
     assert_havelock(c)
-    push!(c.waitq, waiter)
+    if first
+        pushfirst!(c.waitq, waiter)
+    else
+        push!(c.waitq, waiter)
+    end
     # since _wait2 is similar to schedule, we should observe the sticky bit now
-    if waiter.sticky && Threads.threadid(waiter) == 0
+    if waiter.sticky && Threads.threadid(waiter) == 0 && !GC.in_finalizer()
         # Issue #41324
         # t.sticky && tid == 0 is a task that needs to be co-scheduled with
         # the parent task. If the parent (current_task) is not sticky we must
@@ -103,7 +107,9 @@ Block the current task until some event occurs, depending on the type of the arg
 
 * [`Channel`](@ref): Wait for a value to be appended to the channel.
 * [`Condition`](@ref): Wait for [`notify`](@ref) on a condition and return the `val`
-  parameter passed to `notify`.
+  parameter passed to `notify`. Waiting on a condition additionally allows passing
+  `first=true` which results in the waiter being put _first_ in line to wake up on `notify`
+  instead of the usual first-in-first-out behavior.
 * `Process`: Wait for a process or process chain to exit. The `exitcode` field of a process
   can be used to determine success or failure.
 * [`Task`](@ref): Wait for a `Task` to finish. If the task fails with an exception, a
@@ -116,14 +122,14 @@ restarted by an explicit call to [`schedule`](@ref) or [`yieldto`](@ref).
 Often `wait` is called within a `while` loop to ensure a waited-for condition is met before
 proceeding.
 """
-function wait(c::GenericCondition)
+function wait(c::GenericCondition; first::Bool=false)
     ct = current_task()
-    _wait2(c, ct)
+    _wait2(c, ct, first)
     token = unlockall(c.lock)
     try
         return wait()
     catch
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
         rethrow()
     finally
         relockall(c.lock, token)
@@ -154,8 +160,6 @@ end
 
 notify_error(c::GenericCondition, err) = notify(c, err, true, true)
 
-n_waiters(c::GenericCondition) = length(c.waitq)
-
 """
     isempty(condition)
 
@@ -171,8 +175,9 @@ isempty(c::GenericCondition) = isempty(c.waitq)
 
 Create an edge-triggered event source that tasks can wait for. Tasks that call [`wait`](@ref) on a
 `Condition` are suspended and queued. Tasks are woken up when [`notify`](@ref) is later called on
-the `Condition`. Edge triggering means that only tasks waiting at the time [`notify`](@ref) is
-called can be woken up. For level-triggered notifications, you must keep extra state to keep
+the `Condition`. Waiting on a condition can return a value or raise an error if the optional arguments
+of [`notify`](@ref) are used. Edge triggering means that only tasks waiting at the time [`notify`](@ref)
+is called can be woken up. For level-triggered notifications, you must keep extra state to keep
 track of whether a notification has happened. The [`Channel`](@ref) and [`Threads.Event`](@ref) types do
 this, and can be used for level-triggered events.
 
diff --git a/base/coreio.jl b/base/coreio.jl
index 3e508c64a0a64..7fc608111d5f2 100644
--- a/base/coreio.jl
+++ b/base/coreio.jl
@@ -11,6 +11,7 @@ struct DevNull <: IO end
 const devnull = DevNull()
 write(::DevNull, ::UInt8) = 1
 unsafe_write(::DevNull, ::Ptr{UInt8}, n::UInt)::Int = n
+closewrite(::DevNull) = nothing
 close(::DevNull) = nothing
 wait_close(::DevNull) = wait()
 bytesavailable(io::DevNull) = 0
diff --git a/base/cpuid.jl b/base/cpuid.jl
index 476e4b49fa1dc..48930d8064ba9 100644
--- a/base/cpuid.jl
+++ b/base/cpuid.jl
@@ -21,7 +21,7 @@ Base.:<=(a::ISA, b::ISA) = a.features <= b.features
 Base.:<(a::ISA,  b::ISA) = a.features <  b.features
 Base.isless(a::ISA,  b::ISA) = a < b
 
-include("../features_h.jl")
+include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "features_h.jl"))  # include($BUILDROOT/base/features_h.jl)
 
 # Keep in sync with `arch_march_isa_mapping`.
 const ISAs_by_family = Dict(
diff --git a/base/ctypes.jl b/base/ctypes.jl
index 26640ed82bef5..45f01b684902f 100644
--- a/base/ctypes.jl
+++ b/base/ctypes.jl
@@ -113,3 +113,7 @@ const Cfloat = Float32
 Equivalent to the native `double` c-type ([`Float64`](@ref)).
 """
 const Cdouble = Float64
+
+
+# we have no `Float16` alias, because C does not define a standard fp16 type. Julia follows
+# the _Float16 C ABI; if that becomes standard, we can add an appropriate alias here.
diff --git a/base/deepcopy.jl b/base/deepcopy.jl
index 317d999004c42..e0cb6c5e781fa 100644
--- a/base/deepcopy.jl
+++ b/base/deepcopy.jl
@@ -9,8 +9,11 @@
     deepcopy(x)
 
 Create a deep copy of `x`: everything is copied recursively, resulting in a fully
-independent object. For example, deep-copying an array produces a new array whose elements
-are deep copies of the original elements. Calling `deepcopy` on an object should generally
+independent object. For example, deep-copying an array creates deep copies of all
+the objects it contains and produces a new array with the consistent relationship
+structure (e.g., if the first two elements are the same object in the original array,
+the first two elements of the new array will also be the same `deepcopy`ed object).
+Calling `deepcopy` on an object should generally
 have the same effect as serializing and then deserializing it.
 
 While it isn't normally necessary, user-defined types can override the default `deepcopy`
@@ -21,7 +24,7 @@ so far within the recursion. Within the definition, `deepcopy_internal` should b
 in place of `deepcopy`, and the `dict` variable should be
 updated as appropriate before returning.
 """
-function deepcopy(x)
+function deepcopy(@nospecialize x)
     isbitstype(typeof(x)) && return x
     return deepcopy_internal(x, IdDict())::typeof(x)
 end
@@ -85,30 +88,51 @@ function deepcopy_internal(@nospecialize(x), stackdict::IdDict)
     return y::T
 end
 
-function deepcopy_internal(x::Array, stackdict::IdDict)
+function deepcopy_internal(x::Memory, stackdict::IdDict)
     if haskey(stackdict, x)
         return stackdict[x]::typeof(x)
     end
-    _deepcopy_array_t(x, eltype(x), stackdict)
+    _deepcopy_memory_t(x, eltype(x), stackdict)
 end
 
-function _deepcopy_array_t(@nospecialize(x::Array), T, stackdict::IdDict)
+function _deepcopy_memory_t(@nospecialize(x::Memory), T, stackdict::IdDict)
     if isbitstype(T)
         return (stackdict[x]=copy(x))
     end
-    dest = similar(x)
+    dest = typeof(x)(undef, length(x))
     stackdict[x] = dest
+    xr = Core.memoryref(x)
+    dr = Core.memoryref(dest)
     for i = 1:length(x)
-        if ccall(:jl_array_isassigned, Cint, (Any, Csize_t), x, i-1) != 0
-            xi = ccall(:jl_arrayref, Any, (Any, Csize_t), x, i-1)
+        xi = Core.memoryref(xr, i, false)
+        if Core.memoryref_isassigned(xi, :not_atomic, false)
+            xi = Core.memoryrefget(xi, :not_atomic, false)
             if !isbits(xi)
                 xi = deepcopy_internal(xi, stackdict)::typeof(xi)
             end
-            ccall(:jl_arrayset, Cvoid, (Any, Any, Csize_t), dest, xi, i-1)
+            di = Core.memoryref(dr, i, false)
+            di = Core.memoryrefset!(di, xi, :not_atomic, false)
         end
     end
     return dest
 end
+@eval function deepcopy_internal(x::Array{T, N}, stackdict::IdDict) where {T, N}
+    if haskey(stackdict, x)
+        return stackdict[x]::typeof(x)
+    end
+    stackdict[x] = $(Expr(:new, :(Array{T, N}), :(deepcopy_internal(x.ref, stackdict)), :(x.size)))
+end
+function deepcopy_internal(x::GenericMemoryRef, stackdict::IdDict)
+    if haskey(stackdict, x)
+        return stackdict[x]::typeof(x)
+    end
+    mem = getfield(x, :mem)
+    dest = GenericMemoryRef(deepcopy_internal(mem, stackdict)::typeof(mem))
+    i = memoryrefoffset(x)
+    i == 1 || (dest = Core.memoryref(dest, i, true))
+    return dest
+end
+
 
 function deepcopy_internal(x::Union{Dict,IdDict}, stackdict::IdDict)
     if haskey(stackdict, x)
@@ -140,7 +164,7 @@ function deepcopy_internal(x::GenericCondition, stackdict::IdDict)
     if haskey(stackdict, x)
         return stackdict[x]
     end
-    y = typeof(x)(deepcopy_internal(x.lock))
+    y = typeof(x)(deepcopy_internal(x.lock, stackdict))
     stackdict[x] = y
     return y
 end
diff --git a/base/deprecated.jl b/base/deprecated.jl
index f3f127e81b795..5a7ec69979728 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -36,10 +36,19 @@ julia> @deprecate old(x) new(x) false
 old (generic function with 1 method)
 ```
 
-Calls to `@deprecate` without explicit type-annotations will define deprecated methods
-accepting arguments of type `Any`. To restrict deprecation to a specific signature, annotate
-the arguments of `old`. For example,
-```jldoctest; filter = r"@ .*"
+Calls to `@deprecate` without explicit type-annotations will define
+deprecated methods accepting any number of positional and keyword
+arguments of type `Any`.
+
+!!! compat "Julia 1.9"
+    Keyword arguments are forwarded when there is no explicit type
+    annotation as of Julia 1.9. For older versions, you can manually
+    forward positional and keyword arguments by doing `@deprecate
+    old(args...; kwargs...) new(args...; kwargs...)`.
+
+To restrict deprecation to a specific signature, annotate the
+arguments of `old`. For example,
+```jldoctest; filter = r"@ .*"a
 julia> new(x::Int) = x;
 
 julia> new(x::Float64) = 2x;
@@ -101,10 +110,10 @@ macro deprecate(old, new, export_old=true)
         end
         Expr(:toplevel,
             export_old ? Expr(:export, esc(old)) : nothing,
-            :(function $(esc(old))(args...)
+            :(function $(esc(old))(args...; kwargs...)
                   $meta
                   depwarn($"`$old` is deprecated, use `$new` instead.", Core.Typeof($(esc(old))).name.mt.name)
-                  $(esc(new))(args...)
+                  $(esc(new))(args...; kwargs...)
               end))
     end
 end
@@ -158,11 +167,8 @@ function firstcaller(bt::Vector, funcsyms)
             if !found
                 li = lkup.linfo
                 if li isa Core.MethodInstance
-                    ft = ccall(:jl_first_argument_datatype, Any, (Any,), (li.def::Method).sig)
-                    if isa(ft, DataType) && ft.name === Type.body.name
-                        ft = unwrap_unionall(ft.parameters[1])
-                        found = (isa(ft, DataType) && ft.name.name in funcsyms)
-                    end
+                    def = li.def
+                    found = def isa Method && def.name in funcsyms
                 end
             end
         end
@@ -265,14 +271,10 @@ getindex(match::Core.MethodMatch, field::Int) =
 # these were internal functions, but some packages seem to be relying on them
 tuple_type_head(T::Type) = fieldtype(T, 1)
 tuple_type_cons(::Type, ::Type{Union{}}) = Union{}
-function tuple_type_cons(::Type{S}, ::Type{T}) where T<:Tuple where S
-    @_foldable_meta
+@assume_effects :foldable tuple_type_cons(::Type{S}, ::Type{T}) where T<:Tuple where S =
     Tuple{S, T.parameters...}
-end
-function parameter_upper_bound(t::UnionAll, idx)
-    @_foldable_meta
-    return rewrap_unionall((unwrap_unionall(t)::DataType).parameters[idx], t)
-end
+@assume_effects :foldable parameter_upper_bound(t::UnionAll, idx) =
+    rewrap_unionall((unwrap_unionall(t)::DataType).parameters[idx], t)
 
 # these were internal functions, but some packages seem to be relying on them
 @deprecate cat_shape(dims, shape::Tuple{}, shapes::Tuple...) cat_shape(dims, shapes) false
@@ -281,7 +283,7 @@ cat_shape(dims, shape::Tuple{}) = () # make sure `cat_shape(dims, ())` do not re
 @deprecate unsafe_indices(A) axes(A) false
 @deprecate unsafe_length(r) length(r) false
 
-# these were internal type aliases, but some pacakges seem to be relying on them
+# these were internal type aliases, but some packages seem to be relying on them
 const Any16{N} = Tuple{Any,Any,Any,Any,Any,Any,Any,Any,
                         Any,Any,Any,Any,Any,Any,Any,Any,Vararg{Any,N}}
 const All16{T,N} = Tuple{T,T,T,T,T,T,T,T,
@@ -312,6 +314,78 @@ const var"@_noinline_meta" = var"@noinline"
 
 # BEGIN 1.9 deprecations
 
-@deprecate splat(x) Splat(x) false
+# We'd generally like to avoid direct external access to internal fields
+# Core.Compiler.is_inlineable and Core.Compiler.set_inlineable! move towards this direction,
+# but we need to keep these around for compat
+function getproperty(ci::CodeInfo, s::Symbol)
+    s === :inlineable && return Core.Compiler.is_inlineable(ci)
+    return getfield(ci, s)
+end
+
+function setproperty!(ci::CodeInfo, s::Symbol, v)
+    s === :inlineable && return Core.Compiler.set_inlineable!(ci, v)
+    return setfield!(ci, s, convert(fieldtype(CodeInfo, s), v))
+end
+
+@eval Threads nthreads() = threadpoolsize()
+
+@eval Threads begin
+    """
+        resize_nthreads!(A, copyvalue=A[1])
+
+    Resize the array `A` to length [`nthreads()`](@ref).   Any new
+    elements that are allocated are initialized to `deepcopy(copyvalue)`,
+    where `copyvalue` defaults to `A[1]`.
+
+    This is typically used to allocate per-thread variables, and
+    should be called in `__init__` if `A` is a global constant.
+
+    !!! warning
+
+        This function is deprecated, since as of Julia v1.9 the number of
+        threads can change at run time. Instead, per-thread state should be
+        created as needed based on the thread id of the caller.
+    """
+    function resize_nthreads!(A::AbstractVector, copyvalue=A[1])
+        nthr = nthreads()
+        nold = length(A)
+        resize!(A, nthr)
+        for i = nold+1:nthr
+            A[i] = deepcopy(copyvalue)
+        end
+        return A
+    end
+end
 
 # END 1.9 deprecations
+
+# BEGIN 1.10 deprecations
+
+"""
+    @pure ex
+
+`@pure` gives the compiler a hint for the definition of a pure function,
+helping for type inference.
+
+!!! warning
+    This macro is intended for internal compiler use and may be subject to changes.
+
+!!! warning
+    In Julia 1.8 and higher, it is favorable to use [`@assume_effects`](@ref) instead of `@pure`.
+    This is because `@assume_effects` allows a finer grained control over Julia's purity
+    modeling and the effect system enables a wider range of optimizations.
+"""
+macro pure(ex)
+    return esc(:(Base.@assume_effects :foldable $ex))
+end
+
+# END 1.10 deprecations
+
+# BEGIN 1.11 deprecations
+
+# these were never a part of the public API and so they can be removed without deprecation
+# in a minor release but we're being nice and trying to avoid transient breakage.
+@deprecate permute!!(a, p::AbstractVector{<:Integer}) permute!(a, p) false
+@deprecate invpermute!!(a, p::AbstractVector{<:Integer}) invpermute!(a, p) false
+
+# END 1.11 deprecations
diff --git a/base/dict.jl b/base/dict.jl
index 22fd8a3a9f844..89592d06d7bb6 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -53,12 +53,20 @@ Dict{String, Int64} with 2 entries:
   "B" => 2
   "A" => 1
 ```
+
+!!! warning
+
+    Keys are allowed to be mutable, but if you do mutate stored
+    keys, the hash table may become internally inconsistent, in which case
+    the `Dict` will not work properly. [`IdDict`](@ref) can be an
+    alternative if you need to mutate keys.
+
 """
 mutable struct Dict{K,V} <: AbstractDict{K,V}
     # Metadata: empty => 0x00, removed => 0x7f, full => 0b1[7 most significant hash bits]
-    slots::Vector{UInt8}
-    keys::Array{K,1}
-    vals::Array{V,1}
+    slots::Memory{UInt8}
+    keys::Memory{K}
+    vals::Memory{V}
     ndel::Int
     count::Int
     age::UInt
@@ -67,13 +75,15 @@ mutable struct Dict{K,V} <: AbstractDict{K,V}
 
     function Dict{K,V}() where V where K
         n = 16
-        new(zeros(UInt8,n), Vector{K}(undef, n), Vector{V}(undef, n), 0, 0, 0, n, 0)
+        slots = Memory{UInt8}(undef,n)
+        fill!(slots, 0x0)
+        new(slots, Memory{K}(undef, n), Memory{V}(undef, n), 0, 0, 0, n, 0)
     end
     function Dict{K,V}(d::Dict{K,V}) where V where K
         new(copy(d.slots), copy(d.keys), copy(d.vals), d.ndel, d.count, d.age,
             d.idxfloor, d.maxprobe)
     end
-    function Dict{K, V}(slots, keys, vals, ndel, count, age, idxfloor, maxprobe) where {K, V}
+    function Dict{K, V}(slots::Memory{UInt8}, keys::Memory{K}, vals::Memory{V}, ndel::Int, count::Int, age::UInt, idxfloor::Int, maxprobe::Int) where {K, V}
         new(slots, keys, vals, ndel, count, age, idxfloor, maxprobe)
     end
 end
@@ -147,8 +157,7 @@ end
 empty(a::AbstractDict, ::Type{K}, ::Type{V}) where {K, V} = Dict{K, V}()
 
 # Gets 7 most significant bits from the hash (hsh), first bit is 1
-_shorthash7(hsh::UInt32) = (hsh >> UInt(25))%UInt8 | 0x80
-_shorthash7(hsh::UInt64) = (hsh >> UInt(57))%UInt8 | 0x80
+_shorthash7(hsh::UInt) = (hsh >> (8sizeof(UInt)-7))%UInt8 | 0x80
 
 # hashindex (key, sz) - computes optimal position and shorthash7
 #     idx - optimal position in the hash table
@@ -172,17 +181,20 @@ end
     h.age += 1
     h.idxfloor = 1
     if h.count == 0
-        resize!(h.slots, newsz)
+        # TODO: tryresize
+        h.slots = Memory{UInt8}(undef, newsz)
         fill!(h.slots, 0x0)
-        resize!(h.keys, newsz)
-        resize!(h.vals, newsz)
+        h.keys = Memory{K}(undef, newsz)
+        h.vals = Memory{V}(undef, newsz)
         h.ndel = 0
+        h.maxprobe = 0
         return h
     end
 
-    slots = zeros(UInt8,newsz)
-    keys = Vector{K}(undef, newsz)
-    vals = Vector{V}(undef, newsz)
+    slots = Memory{UInt8}(undef, newsz)
+    fill!(slots, 0x0)
+    keys = Memory{K}(undef, newsz)
+    vals = Memory{V}(undef, newsz)
     age0 = h.age
     count = 0
     maxprobe = 0
@@ -205,7 +217,7 @@ end
         end
     end
 
-    @assert h.age == age0 "Muliple concurent writes to Dict detected!"
+    @assert h.age == age0 "Multiple concurrent writes to Dict detected!"
     h.age += 1
     h.slots = slots
     h.keys = keys
@@ -216,13 +228,13 @@ end
     return h
 end
 
-function sizehint!(d::Dict{T}, newsz) where T
+function sizehint!(d::Dict{T}, newsz; shrink::Bool=true) where T
     oldsz = length(d.slots)
     # limit new element count to max_values of the key type
     newsz = min(max(newsz, length(d)), max_values(T)::Int)
     # need at least 1.5n space to hold n elements
     newsz = _tablesz(cld(3 * newsz, 2))
-    return newsz == oldsz ? d : rehash!(d, newsz)
+    return (shrink ? newsz == oldsz : newsz <= oldsz) ? d : rehash!(d, newsz)
 end
 
 """
@@ -246,12 +258,13 @@ Dict{String, Int64}()
 function empty!(h::Dict{K,V}) where V where K
     fill!(h.slots, 0x0)
     sz = length(h.slots)
-    empty!(h.keys)
-    empty!(h.vals)
-    resize!(h.keys, sz)
-    resize!(h.vals, sz)
+    for i in 1:sz
+        _unsetindex!(h.keys, i)
+        _unsetindex!(h.vals, i)
+    end
     h.ndel = 0
     h.count = 0
+    h.maxprobe = 0
     h.age += 1
     h.idxfloor = sz
     return h
@@ -263,12 +276,13 @@ function ht_keyindex(h::Dict{K,V}, key) where V where K
     sz = length(h.keys)
     iter = 0
     maxprobe = h.maxprobe
+    maxprobe < sz || throw(AssertionError()) # This error will never trigger, but is needed for terminates_locally to be valid
     index, sh = hashindex(key, sz)
     keys = h.keys
 
-    @inbounds while true
+    @assume_effects :terminates_locally :noub @inbounds while true
         isslotempty(h,index) && return -1
-        if h.slots[index] == sh
+        if sh == h.slots[index]
             k = keys[index]
             if (key ===  k || isequal(key, k))
                 return index
@@ -339,6 +353,7 @@ end
 ht_keyindex2!(h::Dict, key) = ht_keyindex2_shorthash!(h, key)[1]
 
 @propagate_inbounds function _setindex!(h::Dict, v, key, index, sh = _shorthash7(hash(key)))
+    h.ndel -= isslotmissing(h, index)
     h.slots[index] = sh
     h.keys[index] = key
     h.vals[index] = v
@@ -350,23 +365,27 @@ ht_keyindex2!(h::Dict, key) = ht_keyindex2_shorthash!(h, key)[1]
 
     sz = length(h.keys)
     # Rehash now if necessary
-    if h.ndel >= ((3*sz)>>2) || h.count*3 > sz*2
-        # > 3/4 deleted or > 2/3 full
+    if (h.count + h.ndel)*3 > sz*2
+        # > 2/3 full (including tombstones)
         rehash!(h, h.count > 64000 ? h.count*2 : h.count*4)
     end
     nothing
 end
 
 function setindex!(h::Dict{K,V}, v0, key0) where V where K
-    key = convert(K, key0)
-    if !isequal(key, key0)
-        throw(ArgumentError("$(limitrepr(key0)) is not a valid key for type $K"))
+    if key0 isa K
+        key = key0
+    else
+        key = convert(K, key0)::K
+        if !(isequal(key, key0)::Bool)
+            throw(KeyTypeError(K, key0))
+        end
     end
     setindex!(h, v0, key)
 end
 
 function setindex!(h::Dict{K,V}, v0, key::K) where V where K
-    v = convert(V, v0)
+    v = v0 isa V ? v0 : convert(V, v0)::V
     index, sh = ht_keyindex2_shorthash!(h, key)
 
     if index > 0
@@ -423,7 +442,7 @@ Dict{String, Int64} with 4 entries:
 get!(collection, key, default)
 
 """
-    get!(f::Function, collection, key)
+    get!(f::Union{Function, Type}, collection, key)
 
 Return the value stored for the given key, or if no mapping for the key is present, store
 `key => f()`, and return `f()`.
@@ -449,12 +468,16 @@ Dict{Int64, Int64} with 1 entry:
   2 => 4
 ```
 """
-get!(f::Function, collection, key)
+get!(f::Callable, collection, key)
 
 function get!(default::Callable, h::Dict{K,V}, key0) where V where K
-    key = convert(K, key0)
-    if !isequal(key, key0)
-        throw(ArgumentError("$(limitrepr(key0)) is not a valid key for type $K"))
+    if key0 isa K
+        key = key0
+    else
+        key = convert(K, key0)::K
+        if !isequal(key, key0)
+            throw(KeyTypeError(K, key0))
+        end
     end
     return get!(default, h, key)
 end
@@ -465,7 +488,10 @@ function get!(default::Callable, h::Dict{K,V}, key::K) where V where K
     index > 0 && return h.vals[index]
 
     age0 = h.age
-    v = convert(V, default())
+    v = default()
+    if !isa(v, V)
+        v = convert(V, v)::V
+    end
     if h.age != age0
         index, sh = ht_keyindex2_shorthash!(h, key)
     end
@@ -481,7 +507,7 @@ end
 
 function getindex(h::Dict{K,V}, key) where V where K
     index = ht_keyindex(h, key)
-    @inbounds return (index < 0) ? throw(KeyError(key)) : h.vals[index]::V
+    return index < 0 ? throw(KeyError(key)) : @assume_effects :noub @inbounds h.vals[index]::V
 end
 
 """
@@ -512,7 +538,7 @@ function get(h::Dict{K,V}, key, default) where V where K
 end
 
 """
-    get(f::Function, collection, key)
+    get(f::Union{Function, Type}, collection, key)
 
 Return the value stored for the given key, or if no mapping for the key is present, return
 `f()`.  Use [`get!`](@ref) to also store the default value in the dictionary.
@@ -526,7 +552,7 @@ get(dict, key) do
 end
 ```
 """
-get(::Function, collection, key)
+get(::Callable, collection, key)
 
 function get(default::Callable, h::Dict{K,V}, key) where V where K
     index = ht_keyindex(h, key)
@@ -629,13 +655,30 @@ function pop!(h::Dict)
 end
 
 function _delete!(h::Dict{K,V}, index) where {K,V}
-    @inbounds h.slots[index] = 0x7f
-    @inbounds _unsetindex!(h.keys, index)
-    @inbounds _unsetindex!(h.vals, index)
-    h.ndel += 1
+    @inbounds begin
+    slots = h.slots
+    sz = length(slots)
+    _unsetindex!(h.keys, index)
+    _unsetindex!(h.vals, index)
+    # if the next slot is empty we don't need a tombstone
+    # and can remove all tombstones that were required by the element we just deleted
+    ndel = 1
+    nextind = (index & (sz-1)) + 1
+    if isslotempty(h, nextind)
+        while true
+            ndel -= 1
+            slots[index] = 0x00
+            index = ((index - 2) & (sz-1)) + 1
+            isslotmissing(h, index) || break
+        end
+    else
+        slots[index] = 0x7f
+    end
+    h.ndel += ndel
     h.count -= 1
     h.age += 1
     return h
+    end
 end
 
 """
@@ -695,6 +738,8 @@ end
 isempty(t::Dict) = (t.count == 0)
 length(t::Dict) = t.count
 
+@propagate_inbounds Iterators.only(t::Dict) = Iterators._only(t, first)
+
 @propagate_inbounds function Base.iterate(v::T, i::Int = v.dict.idxfloor) where T <: Union{KeySet{<:Any, <:Dict}, ValueIterator{<:Dict}}
     i == 0 && return nothing
     i = skip_deleted(v.dict, i)
@@ -732,16 +777,23 @@ function map!(f, iter::ValueIterator{<:Dict})
 end
 
 function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V}
-    haslength(d2) && sizehint!(d1, length(d1) + length(d2))
+    haslength(d2) && sizehint!(d1, length(d1) + length(d2), shrink=false)
     for (k, v) in d2
         i, sh = ht_keyindex2_shorthash!(d1, k)
         if i > 0
             d1.vals[i] = combine(d1.vals[i], v)
         else
-            if !isequal(k, convert(K, k))
-                throw(ArgumentError("$(limitrepr(k)) is not a valid key for type $K"))
+            if !(k isa K)
+                k1 = convert(K, k)::K
+                if !isequal(k, k1)
+                    throw(KeyTypeError(K, k))
+                end
+                k = k1
             end
-            @inbounds _setindex!(d1, convert(V, v), k, -i, sh)
+            if !isa(v, V)
+                v = convert(V, v)::V
+            end
+            @inbounds _setindex!(d1, v, k, -i, sh)
         end
     end
     return d1
@@ -833,3 +885,172 @@ empty(::ImmutableDict, ::Type{K}, ::Type{V}) where {K, V} = ImmutableDict{K,V}()
 _similar_for(c::AbstractDict, ::Type{Pair{K,V}}, itr, isz, len) where {K, V} = empty(c, K, V)
 _similar_for(c::AbstractDict, ::Type{T}, itr, isz, len) where {T} =
     throw(ArgumentError("for AbstractDicts, similar requires an element type of Pair;\n  if calling map, consider a comprehension instead"))
+
+
+include("hamt.jl")
+using .HashArrayMappedTries
+using Core.OptimizedGenerics: KeyValue
+const HAMT = HashArrayMappedTries
+
+struct PersistentDict{K,V} <: AbstractDict{K,V}
+    trie::HAMT.HAMT{K,V}
+    # Serves as a marker for an empty initialization
+    @noinline function KeyValue.set(::Type{PersistentDict{K, V}}) where {K, V}
+        new{K, V}(HAMT.HAMT{K,V}())
+    end
+    @noinline function KeyValue.set(::Type{PersistentDict{K, V}}, ::Nothing, key, val) where {K, V}
+        new{K, V}(HAMT.HAMT{K, V}(key => val))
+    end
+    @noinline function KeyValue.set(dict::PersistentDict{K, V}, key, val) where {K, V}
+        trie = dict.trie
+        h = HAMT.HashState(key)
+        found, present, trie, i, bi, top, hs = HAMT.path(trie, key, h, #=persistent=# true)
+        HAMT.insert!(found, present, trie, i, bi, hs, val)
+        return new{K, V}(top)
+    end
+    @noinline function KeyValue.set(dict::PersistentDict{K, V}, key) where {K, V}
+        trie = dict.trie
+        h = HAMT.HashState(key)
+        found, present, trie, i, bi, top, _ = HAMT.path(trie, key, h, #=persistent=# true)
+        if found && present
+            deleteat!(trie.data, i)
+            HAMT.unset!(trie, bi)
+        end
+        return new{K, V}(top)
+    end
+end
+
+"""
+    PersistentDict
+
+`PersistentDict` is a dictionary implemented as an hash array mapped trie,
+which is optimal for situations where you need persistence, each operation
+returns a new dictionary separate from the previous one, but the underlying
+implementation is space-efficient and may share storage across multiple
+separate dictionaries.
+
+!!! note
+    It behaves like an IdDict.
+
+```julia
+PersistentDict(KV::Pair)
+```
+
+# Examples
+
+```jldoctest
+julia> dict = Base.PersistentDict(:a=>1)
+Base.PersistentDict{Symbol, Int64} with 1 entry:
+  :a => 1
+
+julia> dict2 = Base.delete(dict, :a)
+Base.PersistentDict{Symbol, Int64}()
+
+julia> dict3 = Base.PersistentDict(dict, :a=>2)
+Base.PersistentDict{Symbol, Int64} with 1 entry:
+  :a => 2
+```
+"""
+PersistentDict
+
+PersistentDict{K,V}() where {K, V} = KeyValue.set(PersistentDict{K,V})
+function PersistentDict{K,V}(KV::Pair) where {K,V}
+    KeyValue.set(
+        PersistentDict{K, V},
+        nothing,
+        KV...)
+end
+function PersistentDict(KV::Pair{K,V}) where {K,V}
+    KeyValue.set(
+        PersistentDict{K, V},
+        nothing,
+        KV...)
+end
+PersistentDict(dict::PersistentDict, pair::Pair) = PersistentDict(dict, pair...)
+PersistentDict{K,V}(dict::PersistentDict{K,V}, pair::Pair) where {K,V} = PersistentDict(dict, pair...)
+
+
+function PersistentDict(dict::PersistentDict{K,V}, key, val) where {K,V}
+    key = convert(K, key)
+    val = convert(V, val)
+    return KeyValue.set(dict, key, val)
+end
+
+function PersistentDict{K,V}(KV::Pair, rest::Pair...) where {K,V}
+    dict = PersistentDict{K,V}(KV)
+    for (key, value) in rest
+        dict = PersistentDict(dict, key, value)
+    end
+    return dict
+end
+
+function PersistentDict(kv::Pair, rest::Pair...)
+    dict = PersistentDict(kv)
+    for (key, value) in rest
+        dict = PersistentDict(dict, key, value)
+    end
+    return dict
+end
+
+eltype(::PersistentDict{K,V}) where {K,V} = Pair{K,V}
+
+function in(key_val::Pair{K,V}, dict::PersistentDict{K,V}, valcmp=(==)) where {K,V}
+    key, val = key_val
+    found = KeyValue.get(dict, key)
+    found === nothing && return false
+    return valcmp(val, only(found))
+end
+
+function haskey(dict::PersistentDict{K}, key::K) where K
+    return KeyValue.get(dict, key) !== nothing
+end
+
+function getindex(dict::PersistentDict{K,V}, key::K) where {K,V}
+    found = KeyValue.get(dict, key)
+    found === nothing && throw(KeyError(key))
+    return only(found)
+end
+
+function get(dict::PersistentDict{K,V}, key::K, default) where {K,V}
+    found = KeyValue.get(dict, key)
+    found === nothing && return default
+    return only(found)
+end
+
+@noinline function KeyValue.get(dict::PersistentDict{K, V}, key) where {K, V}
+    trie = dict.trie
+    if HAMT.islevel_empty(trie)
+        return nothing
+    end
+    h = HAMT.HashState(key)
+    found, present, trie, i, _, _, _ = HAMT.path(trie, key, h)
+    if found && present
+        leaf = @inbounds trie.data[i]::HAMT.Leaf{K,V}
+        return (leaf.val,)
+    end
+    return nothing
+end
+
+@noinline function KeyValue.get(default, dict::PersistentDict, key)
+    found = KeyValue.get(dict, key)
+    found === nothing && return default()
+    return only(found)
+end
+
+function get(default::Callable, dict::PersistentDict{K,V}, key::K) where {K,V}
+    found = KeyValue.get(dict, key)
+    found === nothing && return default()
+    return only(found)
+end
+
+function delete(dict::PersistentDict{K}, key::K) where K
+    return KeyValue.set(dict, key)
+end
+
+iterate(dict::PersistentDict, state=nothing) = HAMT.iterate(dict.trie, state)
+
+length(dict::PersistentDict) = HAMT.length(dict.trie)
+isempty(dict::PersistentDict) = HAMT.isempty(dict.trie)
+empty(::PersistentDict, ::Type{K}, ::Type{V}) where {K, V} = PersistentDict{K, V}()
+
+@propagate_inbounds Iterators.only(dict::PersistentDict) = Iterators._only(dict, first)
diff --git a/base/div.jl b/base/div.jl
index 7b172ecc95a63..9c2187e662ee9 100644
--- a/base/div.jl
+++ b/base/div.jl
@@ -5,10 +5,10 @@
 """
     div(x, y, r::RoundingMode=RoundToZero)
 
-The quotient from Euclidean (integer) division. Computes x/y, rounded to
+The quotient from Euclidean (integer) division. Computes `x / y`, rounded to
 an integer according to the rounding mode `r`. In other words, the quantity
 
-    round(x/y,r)
+    round(x / y, r)
 
 without any intermediate rounding.
 
@@ -52,12 +52,12 @@ div(a, b) = div(a, b, RoundToZero)
 Compute the remainder of `x` after integer division by `y`, with the quotient rounded
 according to the rounding mode `r`. In other words, the quantity
 
-    x - y*round(x/y,r)
+    x - y * round(x / y, r)
 
 without any intermediate rounding.
 
 - if `r == RoundNearest`, then the result is exact, and in the interval
-  ``[-|y|/2, |y|/2]``. See also [`RoundNearest`](@ref).
+  ``[-|y| / 2, |y| / 2]``. See also [`RoundNearest`](@ref).
 
 - if `r == RoundToZero` (default), then the result is exact, and in the interval
   ``[0, |y|)`` if `x` is positive, or ``(-|y|, 0]`` otherwise. See also [`RoundToZero`](@ref).
@@ -66,12 +66,12 @@ without any intermediate rounding.
   ``(y, 0]`` otherwise. The result may not be exact if `x` and `y` have different signs, and
   `abs(x) < abs(y)`. See also [`RoundDown`](@ref).
 
-- if `r == RoundUp`, then the result is in the interval `(-y,0]` if `y` is positive, or
-  `[0,-y)` otherwise. The result may not be exact if `x` and `y` have the same sign, and
+- if `r == RoundUp`, then the result is in the interval ``(-y, 0]`` if `y` is positive, or
+  ``[0, -y)`` otherwise. The result may not be exact if `x` and `y` have the same sign, and
   `abs(x) < abs(y)`. See also [`RoundUp`](@ref).
 
-- if `r == RoundFromZero`, then the result is in the interval `(-y, 0]` if `y` is positive, or
-  `[0, -y)` otherwise. The result may not be exact if `x` and `y` have the same sign, and
+- if `r == RoundFromZero`, then the result is in the interval ``(-y, 0]`` if `y` is positive, or
+  ``[0, -y)`` otherwise. The result may not be exact if `x` and `y` have the same sign, and
   `abs(x) < abs(y)`. See also [`RoundFromZero`](@ref).
 
 !!! compat "Julia 1.9"
@@ -97,7 +97,7 @@ rem(x, y, r::RoundingMode)
 rem(x, y, ::RoundingMode{:ToZero}) = rem(x, y)
 rem(x, y, ::RoundingMode{:Down}) = mod(x, y)
 rem(x, y, ::RoundingMode{:Up}) = mod(x, -y)
-rem(x, y, r::RoundingMode{:Nearest}) = x - y*div(x, y, r)
+rem(x, y, r::RoundingMode{:Nearest}) = x - y * div(x, y, r)
 rem(x::Integer, y::Integer, r::RoundingMode{:Nearest}) = divrem(x, y, r)[2]
 
 function rem(x, y, ::typeof(RoundFromZero))
@@ -107,13 +107,13 @@ end
 """
     fld(x, y)
 
-Largest integer less than or equal to `x/y`. Equivalent to `div(x, y, RoundDown)`.
+Largest integer less than or equal to `x / y`. Equivalent to `div(x, y, RoundDown)`.
 
 See also [`div`](@ref), [`cld`](@ref), [`fld1`](@ref).
 
 # Examples
 ```jldoctest
-julia> fld(7.3,5.5)
+julia> fld(7.3, 5.5)
 1.0
 
 julia> fld.(-5:5, 3)'
@@ -123,11 +123,11 @@ julia> fld.(-5:5, 3)'
 Because `fld(x, y)` implements strictly correct floored rounding based on the true
 value of floating-point numbers, unintuitive situations can arise. For example:
 ```jldoctest
-julia> fld(6.0,0.1)
+julia> fld(6.0, 0.1)
 59.0
-julia> 6.0/0.1
+julia> 6.0 / 0.1
 60.0
-julia> 6.0/big(0.1)
+julia> 6.0 / big(0.1)
 59.99999999999999666933092612453056361837965690217069245739573412231113406246995
 ```
 What is happening here is that the true value of the floating-point number written
@@ -141,13 +141,13 @@ fld(a, b) = div(a, b, RoundDown)
 """
     cld(x, y)
 
-Smallest integer larger than or equal to `x/y`. Equivalent to `div(x, y, RoundUp)`.
+Smallest integer larger than or equal to `x / y`. Equivalent to `div(x, y, RoundUp)`.
 
 See also [`div`](@ref), [`fld`](@ref).
 
 # Examples
 ```jldoctest
-julia> cld(5.5,2.2)
+julia> cld(5.5, 2.2)
 3.0
 
 julia> cld.(-5:5, 3)'
@@ -162,17 +162,17 @@ cld(a, b) = div(a, b, RoundUp)
     divrem(x, y, r::RoundingMode=RoundToZero)
 
 The quotient and remainder from Euclidean division.
-Equivalent to `(div(x,y,r), rem(x,y,r))`. Equivalently, with the default
-value of `r`, this call is equivalent to `(x÷y, x%y)`.
+Equivalent to `(div(x, y, r), rem(x, y, r))`. Equivalently, with the default
+value of `r`, this call is equivalent to `(x ÷ y, x % y)`.
 
 See also: [`fldmod`](@ref), [`cld`](@ref).
 
 # Examples
 ```jldoctest
-julia> divrem(3,7)
+julia> divrem(3, 7)
 (0, 3)
 
-julia> divrem(7,3)
+julia> divrem(7, 3)
 (2, 1)
 ```
 """
@@ -190,23 +190,24 @@ function divrem(a, b, r::RoundingMode)
         (div(a, b, r), rem(a, b, r))
     end
 end
-#avoids calling rem for Integers-Integers (all modes),
-#a-d*b not precise for Floats - AbstractFloat, AbstractIrrational. Rationals are still slower
+# avoids calling rem for Integers-Integers (all modes),
+# a - d * b not precise for Floats - AbstractFloat, AbstractIrrational.
+# Rationals are still slower
 function divrem(a::Integer, b::Integer, r::Union{typeof(RoundUp),
                                                 typeof(RoundDown),
                                                 typeof(RoundToZero)})
     if r === RoundToZero
         # For compat. Remove in 2.0.
         d = div(a, b)
-        (d, a - d*b)
+        (d, a - d * b)
     elseif r === RoundDown
         # For compat. Remove in 2.0.
         d = fld(a, b)
-        (d, a - d*b)
+        (d, a - d * b)
     elseif r === RoundUp
         # For compat. Remove in 2.0.
         d = div(a, b, r)
-        (d, a - d*b)
+        (d, a - d * b)
     end
 end
 function divrem(x::Integer, y::Integer, rnd::typeof(RoundNearest))
@@ -266,11 +267,11 @@ end
     fldmod(x, y)
 
 The floored quotient and modulus after division. A convenience wrapper for
-`divrem(x, y, RoundDown)`. Equivalent to `(fld(x,y), mod(x,y))`.
+`divrem(x, y, RoundDown)`. Equivalent to `(fld(x, y), mod(x, y))`.
 
 See also: [`fld`](@ref), [`cld`](@ref), [`fldmod1`](@ref).
 """
-fldmod(x,y) = divrem(x, y, RoundDown)
+fldmod(x, y) = divrem(x, y, RoundDown)
 
 # We definite generic rounding methods for other rounding modes in terms of
 # RoundToZero.
@@ -322,11 +323,11 @@ div(a::UInt128, b::UInt128, ::typeof(RoundToZero)) = div(a, b)
 rem(a::Int128, b::Int128, ::typeof(RoundToZero)) = rem(a, b)
 rem(a::UInt128, b::UInt128, ::typeof(RoundToZero)) = rem(a, b)
 
-# These are kept for compatibility with external packages overriding fld/cld.
-# In 2.0, packages should extend div(a,b,r) instead, in which case, these can
+# These are kept for compatibility with external packages overriding fld / cld.
+# In 2.0, packages should extend div(a, b, r) instead, in which case, these can
 # be removed.
-fld(x::Real, y::Real) = div(promote(x,y)..., RoundDown)
-cld(x::Real, y::Real) = div(promote(x,y)..., RoundUp)
+fld(x::Real, y::Real) = div(promote(x, y)..., RoundDown)
+cld(x::Real, y::Real) = div(promote(x, y)..., RoundUp)
 fld(x::Signed, y::Unsigned) = div(x, y, RoundDown)
 fld(x::Unsigned, y::Signed) = div(x, y, RoundDown)
 cld(x::Signed, y::Unsigned) = div(x, y, RoundUp)
@@ -346,14 +347,14 @@ function div(x::Real, y::Real, r::RoundingMode)
 end
 
 # Integers
-# fld(x,y) == div(x,y) - ((x>=0) != (y>=0) && rem(x,y) != 0 ? 1 : 0)
-div(x::T, y::T, ::typeof(RoundDown)) where {T<:Unsigned} = div(x,y)
+# fld(x, y) == div(x, y) - ((x >= 0) != (y >= 0) && rem(x, y) != 0 ? 1 : 0)
+div(x::T, y::T, ::typeof(RoundDown)) where {T<:Unsigned} = div(x, y)
 function div(x::T, y::T, ::typeof(RoundDown)) where T<:Integer
     d = div(x, y, RoundToZero)
     return d - (signbit(x ⊻ y) & (d * y != x))
 end
 
-# cld(x,y) = div(x,y) + ((x>0) == (y>0) && rem(x,y) != 0 ? 1 : 0)
+# cld(x, y) = div(x, y) + ((x > 0) == (y > 0) && rem(x, y) != 0 ? 1 : 0)
 function div(x::T, y::T, ::typeof(RoundUp)) where T<:Unsigned
     d = div(x, y, RoundToZero)
     return d + (d * y != x)
@@ -366,5 +367,4 @@ end
 # Real
 # NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division,
 # so it is used here as the basis of float div().
-div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T,round((x-rem(x,y,r))/y))
-rem(x::T, y::T, ::typeof(RoundUp)) where {T<:AbstractFloat} = convert(T,x-y*ceil(x/y))
+div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T, round((x - rem(x, y, r)) / y))
diff --git a/base/docs/Docs.jl b/base/docs/Docs.jl
index 2c52d8f921ef2..87b4a45c4dc80 100644
--- a/base/docs/Docs.jl
+++ b/base/docs/Docs.jl
@@ -33,13 +33,13 @@ The macro has special parsing so that the documented object may occur on the nex
 By default, documentation is written as Markdown, but any object can be used as
 the first argument.
 
-## Documenting objects after they are defined
-You can document an object after its definition by
+## Documenting objects separately from their definitions
+You can document an object before or after its definition with
 
     @doc "foo" function_to_doc
     @doc "bar" TypeToDoc
 
-For macros, the syntax is `@doc "macro doc" :(@Module.macro)` or `@doc "macro doc"
+For macros, the syntax is `@doc "macro doc" :(Module.@macro)` or `@doc "macro doc"
 :(string_macro"")` for string macros. Without the quote `:()` the expansion of the macro
 will be documented.
 
@@ -73,9 +73,9 @@ const modules = Module[]
 const META    = gensym(:meta)
 const METAType = IdDict{Any,Any}
 
-function meta(m::Module)
+function meta(m::Module; autoinit::Bool=true)
     if !isdefined(m, META) || getfield(m, META) === nothing
-        initmeta(m)
+        autoinit ? initmeta(m) : return nothing
     end
     return getfield(m, META)::METAType
 end
@@ -161,7 +161,8 @@ end
 function docstr(binding::Binding, typesig = Union{})
     @nospecialize typesig
     for m in modules
-        dict = meta(m)
+        dict = meta(m; autoinit=false)
+        isnothing(dict) && continue
         if haskey(dict, binding)
             docs = dict[binding].docs
             if haskey(docs, typesig)
@@ -285,12 +286,26 @@ catdoc(xs...) = vcat(xs...)
 const keywords = Dict{Symbol, DocStr}()
 
 function unblock(@nospecialize ex)
+    while isexpr(ex, :var"hygienic-scope")
+        isexpr(ex.args[1], :escape) || break
+        ex = ex.args[1].args[1]
+    end
     isexpr(ex, :block) || return ex
     exs = filter(ex -> !(isa(ex, LineNumberNode) || isexpr(ex, :line)), ex.args)
     length(exs) == 1 || return ex
     return unblock(exs[1])
 end
 
+# peek through ex to figure out what kind of expression it may eventually act like
+# but ignoring scopes and line numbers
+function unescape(@nospecialize ex)
+    ex = unblock(ex)
+    while isexpr(ex, :escape) || isexpr(ex, :var"hygienic-scope")
+       ex = unblock(ex.args[1])
+    end
+    return ex
+end
+
 uncurly(@nospecialize ex) = isexpr(ex, :curly) ? ex.args[1] : ex
 
 namify(@nospecialize x) = astname(x, isexpr(x, :macro))::Union{Symbol,Expr,GlobalRef}
@@ -299,9 +314,8 @@ function astname(x::Expr, ismacro::Bool)
     head = x.head
     if head === :.
         ismacro ? macroname(x) : x
-    # Call overloading, e.g. `(a::A)(b) = b` or `function (a::A)(b) b end` should document `A(b)`
-    elseif (head === :function || head === :(=)) && isexpr(x.args[1], :call) && isexpr((x.args[1]::Expr).args[1], :(::))
-        return astname(((x.args[1]::Expr).args[1]::Expr).args[end], ismacro)
+    elseif head === :call && isexpr(x.args[1], :(::))
+        return astname((x.args[1]::Expr).args[end], ismacro)
     else
         n = isexpr(x, (:module, :struct)) ? 2 : 1
         astname(x.args[n], ismacro)
@@ -351,18 +365,19 @@ function metadata(__source__, __module__, expr, ismodule)
         fields = P[]
         last_docstr = nothing
         for each in (expr.args[3]::Expr).args
-            if isa(each, Symbol) || isexpr(each, :(::))
+            eachex = unescape(each)
+            if isa(eachex, Symbol) || isexpr(eachex, :(::))
                 # a field declaration
                 if last_docstr !== nothing
-                    push!(fields, P(namify(each::Union{Symbol,Expr}), last_docstr))
+                    push!(fields, P(namify(eachex::Union{Symbol,Expr}), last_docstr))
                     last_docstr = nothing
                 end
-            elseif isexpr(each, :function) || isexpr(each, :(=))
+            elseif isexpr(eachex, :function) || isexpr(eachex, :(=))
                 break
-            elseif isa(each, String) || isexpr(each, :string) || isexpr(each, :call) ||
-                (isexpr(each, :macrocall) && each.args[1] === Symbol("@doc_str"))
+            elseif isa(eachex, String) || isexpr(eachex, :string) || isexpr(eachex, :call) ||
+                (isexpr(eachex, :macrocall) && eachex.args[1] === Symbol("@doc_str"))
                 # forms that might be doc strings
-                last_docstr = each::Union{String,Expr}
+                last_docstr = each
             end
         end
         dict = :($(Dict{Symbol,Any})($([(:($(P)($(quot(f)), $d)))::Expr for (f, d) in fields]...)))
@@ -517,11 +532,12 @@ function docm(source::LineNumberNode, mod::Module, ex)
     @nospecialize ex
     if isexpr(ex, :->) && length(ex.args) > 1
         return docm(source, mod, ex.args...)
-    else
+    elseif isassigned(Base.REPL_MODULE_REF)
         # TODO: this is a shim to continue to allow `@doc` for looking up docstrings
         REPL = Base.REPL_MODULE_REF[]
-        return REPL.lookup_doc(ex)
+        return invokelatest(REPL.lookup_doc, ex)
     end
+    return nothing
 end
 # Drop incorrect line numbers produced by nested macro calls.
 docm(source::LineNumberNode, mod::Module, _, _, x...) = docm(source, mod, x...)
@@ -626,8 +642,9 @@ function loaddocs(docs::Vector{Core.SimpleVector})
     for (mod, ex, str, file, line) in docs
         data = Dict{Symbol,Any}(:path => string(file), :linenumber => line)
         doc = docstr(str, data)
-        docstring = docm(LineNumberNode(line, file), mod, doc, ex, false) # expand the real @doc macro now
-        Core.eval(mod, Expr(Core.unescape, docstring, Docs))
+        lno = LineNumberNode(line, file)
+        docstring = docm(lno, mod, doc, ex, false) # expand the real @doc macro now
+        Core.eval(mod, Expr(:var"hygienic-scope", docstring, Docs, lno))
     end
     empty!(docs)
     nothing
@@ -638,4 +655,24 @@ function parsedoc end
 function apropos end
 function doc end
 
+
+"""
+    Docs.hasdoc(mod::Module, sym::Symbol)::Bool
+
+Return `true` if `sym` in `mod` has a docstring and `false` otherwise.
+"""
+hasdoc(mod::Module, sym::Symbol) = hasdoc(Docs.Binding(mod, sym))
+function hasdoc(binding::Docs.Binding, sig::Type = Union{})
+    # this function is based on the Base.Docs.doc method implemented
+    # in REPL/src/docview.jl.  TODO: refactor and unify these methods.
+    defined(binding) && !isnothing(getdoc(resolve(binding), sig)) && return true
+    for mod in modules
+        dict = meta(mod; autoinit=false)
+        !isnothing(dict) && haskey(dict, binding) && return true
+    end
+    alias = aliasof(binding)
+    return alias == binding ? false : hasdoc(alias, sig)
+end
+
+
 end
diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
index e9aec30e48990..b2d81f482b646 100644
--- a/base/docs/basedocs.jl
+++ b/base/docs/basedocs.jl
@@ -52,13 +52,48 @@ kw"import"
 """
     export
 
-`export` is used within modules to tell Julia which functions should be
+`export` is used within modules to tell Julia which names should be
 made available to the user. For example: `export foo` makes the name
 `foo` available when [`using`](@ref) the module.
 See the [manual section about modules](@ref modules) for details.
 """
 kw"export"
 
+"""
+    public
+
+`public` is used within modules to tell Julia which names are part of the
+public API of the module . For example: `public foo` indicates that the name
+`foo` is public, without making it available when [`using`](@ref)
+the module. See the [manual section about modules](@ref modules) for details.
+
+!!! compat "Julia 1.11"
+    The public keyword was added in Julia 1.11. Prior to this the notion
+    of publicness was less explicit.
+"""
+kw"public"
+
+"""
+    as
+
+`as` is used as a keyword to rename an identifier brought into scope by
+`import` or `using`, for the purpose of working around name conflicts as
+well as for shortening names.  (Outside of `import` or `using` statements,
+`as` is not a keyword and can be used as an ordinary identifier.)
+
+`import LinearAlgebra as LA` brings the imported `LinearAlgebra` standard library
+into scope as `LA`.
+
+`import LinearAlgebra: eigen as eig, cholesky as chol` brings the `eigen` and `cholesky` methods
+from `LinearAlgebra` into scope as `eig` and `chol` respectively.
+
+`as` works with `using` only when individual identifiers are brought into scope.
+For example, `using LinearAlgebra: eigen as eig` or `using LinearAlgebra: eigen as eig, cholesky as chol` works,
+but `using LinearAlgebra as LA` is invalid syntax, since it is nonsensical to
+rename *all* exported names from `LinearAlgebra` to `LA`.
+"""
+kw"as"
+
 """
     abstract type
 
@@ -80,7 +115,7 @@ kw"abstract type", kw"abstract"
 
 `module` declares a [`Module`](@ref), which is a separate global variable workspace. Within a
 module, you can control which names from other modules are visible (via importing), and
-specify which of your names are intended to be public (via exporting).
+specify which of your names are intended to be public (via `export` and `public`).
 Modules allow you to create top-level definitions without worrying about name conflicts
 when your code is used together with somebody else’s.
 See the [manual section about modules](@ref modules) for more details.
@@ -130,7 +165,7 @@ kw"__init__"
     baremodule
 
 `baremodule` declares a module that does not contain `using Base` or local definitions of
-[`eval`](@ref Base.MainInclude.eval) and [`include`](@ref Base.include). It does still import `Core`. In other words,
+[`eval`](@ref Main.eval) and [`include`](@ref Base.include). It does still import `Core`. In other words,
 
 ```julia
 module Mod
@@ -182,7 +217,7 @@ kw"primitive type"
 A macro maps a sequence of argument expressions to a returned expression, and the
 resulting expression is substituted directly into the program at the point where
 the macro is invoked.
-Macros are a way to run generated code without calling [`eval`](@ref Base.MainInclude.eval),
+Macros are a way to run generated code without calling [`eval`](@ref Main.eval),
 since the generated code instead simply becomes part of the surrounding program.
 Macro arguments may include expressions, literal values, and symbols. Macros can be defined for
 variable number of arguments (varargs), but do not accept keyword arguments.
@@ -190,6 +225,8 @@ Every macro also implicitly gets passed the arguments `__source__`, which contai
 and file name the macro is called from, and `__module__`, which is the module the macro is expanded
 in.
 
+See the manual section on [Metaprogramming](@ref) for more information about how to write a macro.
+
 # Examples
 ```jldoctest
 julia> macro sayhello(name)
@@ -497,7 +534,8 @@ sense to write something like `let x = x`, since the two `x` variables are disti
 the left-hand side locally shadowing the `x` from the outer scope. This can even
 be a useful idiom as new local variables are freshly created each time local scopes
 are entered, but this is only observable in the case of variables that outlive their
-scope via closures.
+scope via closures.  A `let` variable without an assignment, such as `var2` in the
+example above, declares a new local variable that is not yet bound to a value.
 
 By contrast, [`begin`](@ref) blocks also group multiple expressions together but do
 not introduce scope or have the special assignment syntax.
@@ -683,12 +721,11 @@ Expr
 Expr
 
 """
-    (:)(expr)
+    :expr
 
-`:expr` quotes the expression `expr`, returning the abstract syntax tree (AST) of `expr`.
+Quote an expression `expr`, returning the abstract syntax tree (AST) of `expr`.
 The AST may be of type `Expr`, `Symbol`, or a literal value.
-Which of these three types are returned for any given expression is an
-implementation detail.
+The syntax `:identifier` evaluates to a `Symbol`.
 
 See also: [`Expr`](@ref), [`Symbol`](@ref), [`Meta.parse`](@ref)
 
@@ -793,7 +830,7 @@ julia> f(2)
 7
 ```
 
-Anonymous functions can also be defined for multiple argumets.
+Anonymous functions can also be defined for multiple arguments.
 ```jldoctest
 julia> g = (x,y) -> x^2 + y^2
 #2 (generic function with 1 method)
@@ -1140,8 +1177,16 @@ Adding `;` at the end of a line in the REPL will suppress printing the result of
 
 In function declarations, and optionally in calls, `;` separates regular arguments from keywords.
 
-While constructing arrays, if the arguments inside the square brackets are separated by `;`
-then their contents are vertically concatenated together.
+In array literals, arguments separated by semicolons have their contents
+concatenated together. A separator made of a single `;` concatenates vertically
+(i.e. along the first dimension), `;;` concatenates horizontally (second
+dimension), `;;;` concatenates along the third dimension, etc. Such a separator
+can also be used in last position in the square brackets to add trailing
+dimensions of length 1.
+
+A `;` in first position inside of parentheses can be used to construct a named
+tuple. The same `(; ...)` syntax on the left side of an assignment allows for
+property destructuring.
 
 In the standard REPL, typing `;` on an empty line will switch to shell mode.
 
@@ -1165,11 +1210,40 @@ julia> function plot(x, y; style="solid", width=1, color="black")
            ###
        end
 
-julia> [1 2; 3 4]
+julia> A = [1 2; 3 4]
 2×2 Matrix{Int64}:
  1  2
  3  4
 
+julia> [1; 3;; 2; 4;;; 10*A]
+2×2×2 Array{Int64, 3}:
+[:, :, 1] =
+ 1  2
+ 3  4
+
+[:, :, 2] =
+ 10  20
+ 30  40
+
+julia> [2; 3;;;]
+2×1×1 Array{Int64, 3}:
+[:, :, 1] =
+ 2
+ 3
+
+julia> nt = (; x=1) # without the ; or a trailing comma this would assign to x
+(x = 1,)
+
+julia> key = :a; c = 3;
+
+julia> nt2 = (; key => 1, b=2, c, nt.x)
+(a = 1, b = 2, c = 3, x = 1)
+
+julia> (; b, x) = nt2; # set variables b and x using property destructuring
+
+julia> b, x
+(2, 1)
+
 julia> ; # upon typing ;, the prompt changes (in place) to: shell>
 shell> echo hello
 hello
@@ -1255,7 +1329,11 @@ a tuple of types. All types, as well as the LLVM code, should be specified as li
 not as variables or expressions (it may be necessary to use `@eval` to generate these
 literals).
 
-See `test/llvmcall.jl` for usage examples.
+[Opaque pointers](https://llvm.org/docs/OpaquePointers.html) (written as `ptr`) are not allowed in the LLVM code.
+
+See
+[`test/llvmcall.jl`](https://github.com/JuliaLang/julia/blob/v$VERSION/test/llvmcall.jl)
+for usage examples.
 """
 Core.Intrinsics.llvmcall
 
@@ -1410,13 +1488,6 @@ parser rather than being implemented as a normal string macro `@var_str`.
 """
 kw"var\"name\"", kw"@var_str"
 
-"""
-    ans
-
-A variable referring to the last computed value, automatically set at the interactive prompt.
-"""
-kw"ans"
-
 """
     devnull
 
@@ -1677,7 +1748,7 @@ The argument `val` to a function or constructor is outside the valid domain.
 ```jldoctest
 julia> sqrt(-1)
 ERROR: DomainError with -1.0:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
 ```
@@ -1691,6 +1762,12 @@ Create a `Task` (i.e. coroutine) to execute the given function `func` (which
 must be callable with no arguments). The task exits when this function returns.
 The task will run in the "world age" from the parent at construction when [`schedule`](@ref)d.
 
+!!! warning
+    By default tasks will have the sticky bit set to true `t.sticky`. This models the
+    historic default for [`@async`](@ref). Sticky tasks can only be run on the worker thread
+    they are first scheduled on. To obtain the behavior of [`Threads.@spawn`](@ref) set the sticky
+    bit manually to `false`.
+
 # Examples
 ```jldoctest
 julia> a() = sum(i for i in 1:1000);
@@ -1741,14 +1818,14 @@ In these examples, `a` is a [`Rational`](@ref), which has two fields.
 nfields
 
 """
-    UndefVarError(var::Symbol)
+    UndefVarError(var::Symbol, [scope])
 
 A symbol in the current scope is not defined.
 
 # Examples
 ```jldoctest
 julia> a
-ERROR: UndefVarError: a not defined
+ERROR: UndefVarError: `a` not defined in `Main`
 
 julia> a = 1;
 
@@ -1771,7 +1848,7 @@ julia> function my_func(;my_arg)
 my_func (generic function with 1 method)
 
 julia> my_func()
-ERROR: UndefKeywordError: keyword argument my_arg not assigned
+ERROR: UndefKeywordError: keyword argument `my_arg` not assigned
 Stacktrace:
  [1] my_func() at ./REPL[1]:2
  [2] top-level scope at REPL[2]:1
@@ -2077,7 +2154,7 @@ Symbol(x...)
 
 Construct a tuple of the given objects.
 
-See also [`Tuple`](@ref), [`NamedTuple`](@ref).
+See also [`Tuple`](@ref), [`ntuple`](@ref), [`NamedTuple`](@ref).
 
 # Examples
 ```jldoctest
@@ -2204,6 +2281,83 @@ instruction, otherwise it'll use a loop.
 """
 replacefield!
 
+"""
+    getglobal(module::Module, name::Symbol, [order::Symbol=:monotonic])
+
+Retrieve the value of the binding `name` from the module `module`. Optionally, an
+atomic ordering can be defined for the operation, otherwise it defaults to
+monotonic.
+
+While accessing module bindings using [`getfield`](@ref) is still supported to
+maintain compatibility, using `getglobal` should always be preferred since
+`getglobal` allows for control over atomic ordering (`getfield` is always
+monotonic) and better signifies the code's intent both to the user as well as the
+compiler.
+
+Most users should not have to call this function directly -- The
+[`getproperty`](@ref Base.getproperty) function or corresponding syntax (i.e.
+`module.name`) should be preferred in all but few very specific use cases.
+
+!!! compat "Julia 1.9"
+    This function requires Julia 1.9 or later.
+
+See also [`getproperty`](@ref Base.getproperty) and [`setglobal!`](@ref).
+
+# Examples
+```jldoctest
+julia> a = 1
+1
+
+julia> module M
+       a = 2
+       end;
+
+julia> getglobal(@__MODULE__, :a)
+1
+
+julia> getglobal(M, :a)
+2
+```
+"""
+getglobal
+
+"""
+    setglobal!(module::Module, name::Symbol, x, [order::Symbol=:monotonic])
+
+Set or change the value of the binding `name` in the module `module` to `x`. No
+type conversion is performed, so if a type has already been declared for the
+binding, `x` must be of appropriate type or an error is thrown.
+
+Additionally, an atomic ordering can be specified for this operation, otherwise it
+defaults to monotonic.
+
+Users will typically access this functionality through the
+[`setproperty!`](@ref Base.setproperty!) function or corresponding syntax
+(i.e. `module.name = x`) instead, so this is intended only for very specific use
+cases.
+
+!!! compat "Julia 1.9"
+    This function requires Julia 1.9 or later.
+
+See also [`setproperty!`](@ref Base.setproperty!) and [`getglobal`](@ref)
+
+# Examples
+```jldoctest
+julia> module M end;
+
+julia> M.a  # same as `getglobal(M, :a)`
+ERROR: UndefVarError: `a` not defined in `M`
+Suggestion: check for spelling errors or missing imports.
+
+julia> setglobal!(M, :a, 1)
+1
+
+julia> M.a
+1
+```
+"""
+setglobal!
+
 """
     typeof(x)
 
@@ -2267,6 +2421,42 @@ false
 """
 isdefined
 
+"""
+    Memory{T}(undef, n)
+
+Construct an uninitialized [`Memory{T}`](@ref) of length `n`. All Memory
+objects of length 0 might alias, since there is no reachable mutable content
+from them.
+
+# Examples
+```julia-repl
+julia> Memory{Float64}(undef, 3)
+3-element Memory{Float64}:
+ 6.90966e-310
+ 6.90966e-310
+ 6.90966e-310
+```
+"""
+Memory{T}(::UndefInitializer, n)
+
+"""
+    MemoryRef(memory)
+
+Construct a MemoryRef from a memory object. This does not fail, but the
+resulting memory may point out-of-bounds if the memory is empty.
+"""
+MemoryRef(::Memory)
+
+"""
+    MemoryRef(::Memory, index::Integer)
+    MemoryRef(::MemoryRef, index::Integer)
+
+Construct a MemoryRef from a memory object and an offset index (1-based) which
+can also be negative. This always returns an inbounds object, and will throw an
+error if that is not possible (because the index would result in a shift
+out-of-bounds of the underlying memory).
+"""
+MemoryRef(::Union{Memory,MemoryRef}, ::Integer)
 
 """
     Vector{T}(undef, n)
@@ -2668,23 +2858,33 @@ kw"Union{}", Base.Bottom
 """
     Union{Types...}
 
-A type union is an abstract type which includes all instances of any of its argument types. The empty
-union [`Union{}`](@ref) is the bottom type of Julia.
+A `Union` type is an abstract type which includes all instances of any of its argument types.
+This means that `T <: Union{T,S}` and `S <: Union{T,S}`.
+
+Like other abstract types, it cannot be instantiated, even if all of its arguments are non
+abstract.
 
 # Examples
 ```jldoctest
 julia> IntOrString = Union{Int,AbstractString}
 Union{Int64, AbstractString}
 
-julia> 1 isa IntOrString
+julia> 1 isa IntOrString # instance of Int is included in the union
 true
 
-julia> "Hello!" isa IntOrString
+julia> "Hello!" isa IntOrString # String is also included
 true
 
-julia> 1.0 isa IntOrString
+julia> 1.0 isa IntOrString # Float64 is not included because it is neither Int nor AbstractString
 false
 ```
+
+# Extended Help
+
+Unlike most other parametric types, unions are covariant in their parameters. For example,
+`Union{Real, String}` is a subtype of `Union{Number, AbstractString}`.
+
+The empty union [`Union{}`](@ref) is the bottom type of Julia.
 """
 Union
 
@@ -2775,17 +2975,48 @@ Vararg
 """
     Tuple{Types...}
 
-Tuples are an abstraction of the arguments of a function – without the function itself. The salient aspects of
-a function's arguments are their order and their types. Therefore a tuple type is similar to a parameterized
-immutable type where each parameter is the type of one field. Tuple types may have any number of parameters.
+A tuple is a fixed-length container that can hold any values of different
+types, but cannot be modified (it is immutable). The values can be accessed via
+indexing. Tuple literals are written with commas and parentheses:
+
+```jldoctest
+julia> (1, 1+1)
+(1, 2)
+
+julia> (1,)
+(1,)
+
+julia> x = (0.0, "hello", 6*7)
+(0.0, "hello", 42)
+
+julia> x[2]
+"hello"
+
+julia> typeof(x)
+Tuple{Float64, String, Int64}
+```
+
+A length-1 tuple must be written with a comma, `(1,)`, since `(1)` would just
+be a parenthesized value. `()` represents the empty (length-0) tuple.
+
+A tuple can be constructed from an iterator by using a `Tuple` type as constructor:
+
+```jldoctest
+julia> Tuple(["a", 1])
+("a", 1)
+
+julia> Tuple{String, Float64}(["a", 1])
+("a", 1.0)
+```
 
 Tuple types are covariant in their parameters: `Tuple{Int}` is a subtype of `Tuple{Any}`. Therefore `Tuple{Any}`
 is considered an abstract type, and tuple types are only concrete if their parameters are. Tuples do not have
 field names; fields are only accessed by index.
+Tuple types may have any number of parameters.
 
 See the manual section on [Tuple Types](@ref).
 
-See also [`Vararg`](@ref), [`NTuple`](@ref), [`tuple`](@ref), [`NamedTuple`](@ref).
+See also [`Vararg`](@ref), [`NTuple`](@ref), [`ntuple`](@ref), [`tuple`](@ref), [`NamedTuple`](@ref).
 """
 Tuple
 
@@ -2849,8 +3080,8 @@ the syntax `@atomic a.b` calls `getproperty(a, :b, :sequentially_consistent)`.
 
 # Examples
 ```jldoctest
-julia> struct MyType
-           x
+julia> struct MyType{T <: Number}
+           x::T
        end
 
 julia> function Base.getproperty(obj::MyType, sym::Symbol)
@@ -2870,6 +3101,11 @@ julia> obj.x
 1
 ```
 
+One should overload `getproperty` only when necessary, as it can be confusing if
+the behavior of the syntax `obj.f` is unusual.
+Also note that using methods is often preferable. See also this style guide documentation
+for more information: [Prefer exported methods over direct field access](@ref).
+
 See also [`getfield`](@ref Core.getfield),
 [`propertynames`](@ref Base.propertynames) and
 [`setproperty!`](@ref Base.setproperty!).
@@ -2897,7 +3133,7 @@ Base.setproperty!
     swapproperty!(x, f::Symbol, v, order::Symbol=:not_atomic)
 
 The syntax `@atomic a.b, _ = c, a.b` returns `(c, swapproperty!(a, :b, c, :sequentially_consistent))`,
-where there must be one getfield expression common to both sides.
+where there must be one `getproperty` expression common to both sides.
 
 See also [`swapfield!`](@ref Core.swapfield!)
 and [`setproperty!`](@ref Base.setproperty!).
@@ -2907,9 +3143,9 @@ Base.swapproperty!
 """
     modifyproperty!(x, f::Symbol, op, v, order::Symbol=:not_atomic)
 
-The syntax `@atomic max(a().b, c)` returns `modifyproperty!(a(), :b,
-max, c, :sequentially_consistent))`, where the first argument must be a
-`getfield` expression and is modified atomically.
+The syntax `@atomic op(x.f, v)` (and its equivalent `@atomic x.f op v`) returns
+`modifyproperty!(x, :f, op, v, :sequentially_consistent)`, where the first argument
+must be a `getproperty` expression and is modified atomically.
 
 Invocation of `op(getproperty(x, f), v)` must return a value that can be stored in the field
 `f` of the object `x` by default.  In particular, unlike the default behavior of
@@ -2943,7 +3179,7 @@ with elements of type `T` and `N` dimensions.
 If `A` is a `StridedArray`, then its elements are stored in memory with offsets, which may
 vary between dimensions but are constant within a dimension. For example, `A` could
 have stride 2 in dimension 1, and stride 3 in dimension 2. Incrementing `A` along
-dimension `d` jumps in memory by [`strides(A, d)`] slots. Strided arrays are
+dimension `d` jumps in memory by [`stride(A, d)`] slots. Strided arrays are
 particularly important and useful because they can sometimes be passed directly
 as pointers to foreign language libraries like BLAS.
 """
@@ -3019,7 +3255,7 @@ QuoteNode
 
 """
     "
-`"` Is used to delimit string literals.
+`"` Is used to delimit string literals. A trailing `\\` can be used to continue a string literal on the next line.
 
 # Examples
 
@@ -3029,6 +3265,10 @@ julia> "Hello World!"
 
 julia> "Hello World!\\n"
 "Hello World!\\n"
+
+julia> "Hello \\
+        World"
+"Hello World"
 ```
 
 See also [`\"""`](@ref \"\"\").
@@ -3059,7 +3299,16 @@ See also [`"`](@ref \")
 kw"\"\"\""
 
 """
-    donotdelete(args...)
+Unsafe pointer operations are compatible with loading and storing pointers declared with
+`_Atomic` and `std::atomic` type in C11 and C++23 respectively. An error may be thrown if
+there is not support for atomically loading the Julia type `T`.
+
+See also: [`unsafe_load`](@ref), [`unsafe_modify!`](@ref), [`unsafe_replace!`](@ref), [`unsafe_store!`](@ref), [`unsafe_swap!`](@ref)
+"""
+kw"atomic"
+
+"""
+    Base.donotdelete(args...)
 
 This function prevents dead-code elimination (DCE) of itself and any arguments
 passed to it, but is otherwise the lightest barrier possible. In particular,
@@ -3076,16 +3325,17 @@ This is intended for use in benchmarks that want to guarantee that `args` are
 actually computed. (Otherwise DCE may see that the result of the benchmark is
 unused and delete the entire benchmark code).
 
-**Note**: `donotdelete` does not affect constant folding. For example, in
-          `donotdelete(1+1)`, no add instruction needs to be executed at runtime and
-          the code is semantically equivalent to `donotdelete(2).`
+!!! note
+    `donotdelete` does not affect constant folding. For example, in
+    `donotdelete(1+1)`, no add instruction needs to be executed at runtime and
+    the code is semantically equivalent to `donotdelete(2).`
 
 # Examples
 
 ```julia
 function loop()
     for i = 1:1000
-        # The complier must guarantee that there are 1000 program points (in the correct
+        # The compiler must guarantee that there are 1000 program points (in the correct
         # order) at which the value of `i` is in a register, but has otherwise
         # total control over the program.
         donotdelete(i)
@@ -3095,6 +3345,60 @@ end
 """
 Base.donotdelete
 
+"""
+    Base.compilerbarrier(setting::Symbol, val)
+
+This function acts a compiler barrier at a specified compilation phase.
+The dynamic semantics of this intrinsic are to return the `val` argument, unmodified.
+However, depending on the `setting`, the compiler is prevented from assuming this behavior.
+
+Currently either of the following `setting`s is allowed:
+- Barriers on abstract interpretation:
+  * `:type`: the return type of this function call will be inferred as `Any` always
+    (the strongest barrier on abstract interpretation)
+  * `:const`: the return type of this function call will be inferred with widening
+    constant information on `val`
+  * `:conditional`: the return type of this function call will be inferred with widening
+    conditional information on `val` (see the example below)
+- Any barriers on optimization aren't implemented yet
+
+!!! note
+    This function is expected to be used with `setting` known precisely at compile-time.
+    If the `setting` is not known precisely at compile-time, the compiler will emit the
+    strongest barrier(s). No compile-time warning is issued.
+
+# Examples
+
+```julia
+julia> Base.return_types((Int,)) do a
+           x = compilerbarrier(:type, a) # `x` won't be inferred as `x::Int`
+           return x
+       end |> only
+Any
+
+julia> Base.return_types() do
+           x = compilerbarrier(:const, 42)
+           if x == 42 # no constant information here, so inference also accounts for the else branch
+               return x # but `x` is still inferred as `x::Int` at least here
+           else
+               return nothing
+           end
+       end |> only
+Union{Nothing, Int64}
+
+julia> Base.return_types((Union{Int,Nothing},)) do a
+           if compilerbarrier(:conditional, isa(a, Int))
+               # the conditional information `a::Int` isn't available here (leading to less accurate return type inference)
+               return a
+           else
+               return nothing
+           end
+       end |> only
+Union{Nothing, Int64}
+```
+"""
+Base.compilerbarrier
+
 """
     Core.finalizer(f, o)
 
@@ -3108,12 +3412,14 @@ but there are a number of small differences. They are documented here for
 completeness only and (unlike `Base.finalizer`) have no stability guarantees.
 
 The current differences are:
-    - `Core.finalizer` does not check for mutability of `o`. Attempting to register
-      a finalizer for an immutable object is undefined behavior.
-    - The value `f` must be a Julia object. `Core.finalizer` does not support a
-      raw C function pointer.
-    - `Core.finalizer` returns `nothing` rather than `o`.
+- `Core.finalizer` does not check for mutability of `o`. Attempting to register
+  a finalizer for an immutable object is undefined behavior.
+- The value `f` must be a Julia object. `Core.finalizer` does not support a
+  raw C function pointer.
+- `Core.finalizer` returns `nothing` rather than `o`.
 """
 Core.finalizer
 
+Base.include(BaseDocs, "intrinsicsdocs.jl")
+
 end
diff --git a/base/docs/intrinsicsdocs.jl b/base/docs/intrinsicsdocs.jl
new file mode 100644
index 0000000000000..9f6ec773ff9a8
--- /dev/null
+++ b/base/docs/intrinsicsdocs.jl
@@ -0,0 +1,63 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+    Core.IR
+
+The `Core.IR` module exports the IR object model.
+"""
+Core.IR
+
+"""
+    Core.IntrinsicFunction <: Core.Builtin <: Function
+
+The `Core.IntrinsicFunction` function define some basic primitives for what defines the
+abilities and behaviors of a Julia program
+"""
+Core.IntrinsicFunction
+
+"""
+    Core.Intrinsics
+
+The `Core.Intrinsics` module holds the `Core.IntrinsicFunction` objects.
+"""
+Core.Intrinsics
+
+"""
+    Core.memoryref(::GenericMemory)
+    Core.memoryref(::GenericMemoryRef, index::Int, [boundscheck::Bool])
+
+Return a `GenericMemoryRef` for a `GenericMemory`. See [`MemoryRef`](@ref).
+"""
+Core.memoryref
+
+"""
+    Core..memoryrefoffset(::GenericMemoryRef)
+
+Return the offset index that was used to construct the `MemoryRef`. See [`Core.memoryref`](@ref).
+"""
+Core.memoryrefoffset
+
+"""
+    Core.memoryrefget(::GenericMemoryRef, ordering::Symbol, boundscheck::Bool)
+
+Return the value stored at the `MemoryRef`, throwing a `BoundsError` if the `Memory` is empty. See `ref[]`.
+The memory ordering specified must be compatible with the `isatomic` parameter.
+"""
+Core.memoryrefget
+
+"""
+    Core.memoryrefset!(::GenericMemoryRef, value, ordering::Symbol, boundscheck::Bool)
+
+Store the value to the `MemoryRef`, throwing a `BoundsError` if the `Memory` is empty. See `ref[] = value`.
+The memory ordering specified must be compatible with the `isatomic` parameter.
+"""
+Core.memoryrefset!
+
+"""
+    Core.memoryref_isassigned(::GenericMemoryRef, ordering::Symbol, boundscheck::Bool)
+
+Return whether there is a value stored at the `MemoryRef`, returning false if the `Memory`
+is empty. See [`isassigned(::Base.RefValue)`](@ref), [`Core.memoryrefget`](@ref).
+The memory ordering specified must be compatible with the `isatomic` parameter.
+"""
+Core.memoryref_isassigned
diff --git a/base/env.jl b/base/env.jl
index 4fdc02e582a4c..27594acff6b7f 100644
--- a/base/env.jl
+++ b/base/env.jl
@@ -3,12 +3,29 @@
 if Sys.iswindows()
     const ERROR_ENVVAR_NOT_FOUND = UInt32(203)
 
+    const env_dict = IdDict{String, Vector{Cwchar_t}}()
+    const env_lock = ReentrantLock()
+
+    function memoized_env_lookup(str::AbstractString)
+        # Windows environment variables have a different format from Linux / MacOS, and previously
+        # incurred allocations because we had to convert a String to a Vector{Cwchar_t} each time
+        # an environment variable was looked up. This function memoizes that lookup process, storing
+        # the String => Vector{Cwchar_t} pairs in env_dict
+        var = get(env_dict, str, nothing)
+        if isnothing(var)
+            var = @lock env_lock begin
+                env_dict[str] = cwstring(str)
+            end
+        end
+        var
+    end
+
     _getenvlen(var::Vector{UInt16}) = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),var,C_NULL,0)
     _hasenv(s::Vector{UInt16}) = _getenvlen(s) != 0 || Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND
-    _hasenv(s::AbstractString) = _hasenv(cwstring(s))
+    _hasenv(s::AbstractString) = _hasenv(memoized_env_lookup(s))
 
     function access_env(onError::Function, str::AbstractString)
-        var = cwstring(str)
+        var = memoized_env_lookup(str)
         len = _getenvlen(var)
         if len == 0
             return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? "" : onError(str)
@@ -21,7 +38,7 @@ if Sys.iswindows()
     end
 
     function _setenv(svar::AbstractString, sval::AbstractString, overwrite::Bool=true)
-        var = cwstring(svar)
+        var = memoized_env_lookup(svar)
         val = cwstring(sval)
         if overwrite || !_hasenv(var)
             ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),var,val)
@@ -30,7 +47,7 @@ if Sys.iswindows()
     end
 
     function _unsetenv(svar::AbstractString)
-        var = cwstring(svar)
+        var = memoized_env_lookup(svar)
         ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),var,C_NULL)
         windowserror(:setenv, ret == 0 && Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND)
     end
@@ -73,12 +90,75 @@ variables.
 all keys to uppercase for display, iteration, and copying. Portable code should not rely on the
 ability to distinguish variables by case, and should beware that setting an ostensibly lowercase
 variable may result in an uppercase `ENV` key.)
+
+!!! warning
+    Mutating the environment is not thread-safe.
+
+# Examples
+```julia-repl
+julia> ENV
+Base.EnvDict with "50" entries:
+  "SECURITYSESSIONID"            => "123"
+  "USER"                         => "username"
+  "MallocNanoZone"               => "0"
+  ⋮                              => ⋮
+
+julia> ENV["JULIA_EDITOR"] = "vim"
+"vim"
+
+julia> ENV["JULIA_EDITOR"]
+"vim"
+```
+
+See also: [`withenv`](@ref), [`addenv`](@ref).
 """
 const ENV = EnvDict()
 
+const get_bool_env_truthy = (
+    "t", "T",
+    "true", "True", "TRUE",
+    "y", "Y",
+    "yes", "Yes", "YES",
+    "1")
+const get_bool_env_falsy = (
+    "f", "F",
+    "false", "False", "FALSE",
+    "n", "N",
+    "no", "No", "NO",
+    "0")
+
+"""
+    Base.get_bool_env(name::String, default::Bool)::Union{Bool,Nothing}
+
+Evaluate whether the value of environnment variable `name` is a truthy or falsy string,
+and return `nothing` if it is not recognized as either. If the variable is not set, or is set to "",
+return `default`.
+
+Recognized values are the following, and their Capitalized and UPPERCASE forms:
+    truthy: "t", "true", "y", "yes", "1"
+    falsy:  "f", "false", "n", "no", "0"
+"""
+function get_bool_env(name::String, default::Bool)
+    haskey(ENV, name) || return default
+    val = ENV[name]
+    if isempty(val)
+        return default
+    elseif val in get_bool_env_truthy
+        return true
+    elseif val in get_bool_env_falsy
+        return false
+    else
+        return nothing
+    end
+end
+
 getindex(::EnvDict, k::AbstractString) = access_env(k->throw(KeyError(k)), k)
 get(::EnvDict, k::AbstractString, def) = access_env(Returns(def), k)
 get(f::Callable, ::EnvDict, k::AbstractString) = access_env(k->f(), k)
+function get!(default::Callable, ::EnvDict, k::AbstractString)
+    haskey(ENV, k) && return ENV[k]
+    ENV[k] = default()
+end
 in(k::AbstractString, ::KeySet{String, EnvDict}) = _hasenv(k)
 pop!(::EnvDict, k::AbstractString) = (v = ENV[k]; _unsetenv(k); v)
 pop!(::EnvDict, k::AbstractString, def) = haskey(ENV,k) ? pop!(ENV,k) : def
@@ -117,7 +197,7 @@ if Sys.iswindows()
                 m = nothing
             end
             if m === nothing
-                @warn "malformed environment entry: $env"
+                @warn "malformed environment entry" env
                 continue
             end
             return (Pair{String,String}(winuppercase(env[1:prevind(env, m)]), env[nextind(env, m):end]), (pos, blk))
@@ -131,8 +211,8 @@ else # !windows
             env = env::String
             m = findfirst('=', env)
             if m === nothing
-                @warn "malformed environment entry: $env"
-                nothing
+                @warn "malformed environment entry" env
+                continue
             end
             return (Pair{String,String}(env[1:prevind(env, m)], env[nextind(env, m):end]), i+1)
         end
@@ -162,6 +242,10 @@ by zero or more `"var"=>val` arguments `kv`. `withenv` is generally used via the
 `withenv(kv...) do ... end` syntax. A value of `nothing` can be used to temporarily unset an
 environment variable (if it is set). When `withenv` returns, the original environment has
 been restored.
+
+!!! warning
+    Changing the environment is not thread-safe. For running external commands with a different
+    environment from the parent process, prefer using [`addenv`](@ref) over `withenv`.
 """
 function withenv(f, keyvals::Pair{T}...) where T<:AbstractString
     old = Dict{T,Any}()
diff --git a/base/error.jl b/base/error.jl
index 4459e54def19b..37ceb39253e38 100644
--- a/base/error.jl
+++ b/base/error.jl
@@ -37,7 +37,7 @@ error(s::AbstractString) = throw(ErrorException(s))
 """
     error(msg...)
 
-Raise an `ErrorException` with the given message.
+Raise an `ErrorException` with a message constructed by `string(msg...)`.
 """
 function error(s::Vararg{Any,N}) where {N}
     @noinline
@@ -162,7 +162,7 @@ end
 ## keyword arg lowering generates calls to this ##
 function kwerr(kw, args::Vararg{Any,N}) where {N}
     @noinline
-    throw(MethodError(typeof(args[1]).name.mt.kwsorter, (kw,args...)))
+    throw(MethodError(Core.kwcall, (kw, args...)))
 end
 
 ## system error handling ##
@@ -197,15 +197,17 @@ windowserror(p, code::UInt32=Libc.GetLastError(); extrainfo=nothing) = throw(Mai
 """
     @assert cond [text]
 
-Throw an [`AssertionError`](@ref) if `cond` is `false`. Preferred syntax for writing assertions.
-Message `text` is optionally displayed upon assertion failure.
+Throw an [`AssertionError`](@ref) if `cond` is `false`. This is the preferred syntax for
+writing assertions, which are conditions that are assumed to be true, but that the user
+might decide to check anyways, as an aid to debugging if they fail.
+The optional message `text` is displayed upon assertion failure.
 
 !!! warning
-    An assert might be disabled at various optimization levels.
+    An assert might be disabled at some optimization levels.
     Assert should therefore only be used as a debugging tool
-    and not used for authentication verification (e.g., verifying passwords),
-    nor should side effects needed for the function to work correctly
-    be used inside of asserts.
+    and not used for authentication verification (e.g., verifying passwords or checking array bounds).
+    The code must not rely on the side effects of running `cond` for the correct behavior
+    of a function.
 
 # Examples
 ```jldoctest
@@ -295,7 +297,6 @@ function retry(f;  delays=ExponentialBackOff(), check=nothing)
             try
                 return f(args...; kwargs...)
             catch e
-                y === nothing && rethrow()
                 if check !== nothing
                     result = check(state, e)
                     state, retry_or_not = length(result) == 2 ? result : (state, result)
diff --git a/base/errorshow.jl b/base/errorshow.jl
index aaf040cd71b8d..24663604f1a74 100644
--- a/base/errorshow.jl
+++ b/base/errorshow.jl
@@ -35,6 +35,13 @@ show_index(io::IO, x::LogicalIndex) = summary(io, x.mask)
 show_index(io::IO, x::OneTo) = print(io, "1:", x.stop)
 show_index(io::IO, x::Colon) = print(io, ':')
 
+function showerror(io::IO, ex::Meta.ParseError)
+    if isnothing(ex.detail)
+        print(io, "ParseError(", repr(ex.msg), ")")
+    else
+        showerror(io, ex.detail)
+    end
+end
 
 function showerror(io::IO, ex::BoundsError)
     print(io, "BoundsError")
@@ -63,6 +70,8 @@ function showerror(io::IO, ex::TypeError)
     print(io, "TypeError: ")
     if ex.expected === Bool
         print(io, "non-boolean (", typeof(ex.got), ") used in boolean context")
+    elseif ex.func === :var"dict key"
+        print(io, "$(limitrepr(ex.got)) is not a valid key for type $(ex.expected)")
     else
         if isvarargtype(ex.got)
             targs = (ex.got,)
@@ -73,7 +82,7 @@ function showerror(io::IO, ex::TypeError)
         end
         if ex.context == ""
             ctx = "in $(ex.func)"
-        elseif ex.func === Symbol("keyword argument")
+        elseif ex.func === :var"keyword argument"
             ctx = "in keyword argument $(ex.context)"
         else
             ctx = "in $(ex.func), in $(ex.context)"
@@ -107,8 +116,8 @@ showerror(io::IO, ex::InitError) = showerror(io, ex, [])
 
 function showerror(io::IO, ex::DomainError)
     if isa(ex.val, AbstractArray)
-        compact = get(io, :compact, true)
-        limit = get(io, :limit, true)
+        compact = get(io, :compact, true)::Bool
+        limit = get(io, :limit, true)::Bool
         print(IOContext(io, :compact => compact, :limit => limit),
               "DomainError with ", ex.val)
     else
@@ -157,17 +166,29 @@ showerror(io::IO, ex::AssertionError) = print(io, "AssertionError: ", ex.msg)
 showerror(io::IO, ex::OverflowError) = print(io, "OverflowError: ", ex.msg)
 
 showerror(io::IO, ex::UndefKeywordError) =
-    print(io, "UndefKeywordError: keyword argument $(ex.var) not assigned")
+    print(io, "UndefKeywordError: keyword argument `$(ex.var)` not assigned")
 
 function showerror(io::IO, ex::UndefVarError)
-    print(io, "UndefVarError: $(ex.var) not defined")
+    print(io, "UndefVarError: `$(ex.var)` not defined")
+    if isdefined(ex, :scope)
+        scope = ex.scope
+        if scope isa Module
+            print(io, " in `$scope`")
+        elseif scope === :static_parameter
+            print(io, " in static parameter matching")
+        else
+            print(io, " in $scope scope")
+        end
+    end
     Experimental.show_error_hints(io, ex)
 end
 
 function showerror(io::IO, ex::InexactError)
     print(io, "InexactError: ", ex.func, '(')
-    nameof(ex.T) === ex.func || print(io, ex.T, ", ")
-    print(io, ex.val, ')')
+    T = first(ex.args)
+    nameof(T) === ex.func || print(io, T, ", ")
+    join(io, ex.args[2:end], ", ")
+    print(io, ")")
     Experimental.show_error_hints(io, ex)
 end
 
@@ -181,6 +202,7 @@ function print_with_compare(io::IO, @nospecialize(a::DataType), @nospecialize(b:
     if a.name === b.name
         Base.show_type_name(io, a.name)
         n = length(a.parameters)
+        n > 0 || return
         print(io, '{')
         for i = 1:n
             if i > length(b.parameters)
@@ -232,51 +254,48 @@ function showerror(io::IO, ex::MethodError)
         return showerror_ambiguous(io, meth, f, arg_types)
     end
     arg_types_param::SimpleVector = arg_types.parameters
-    show_candidates = true
     print(io, "MethodError: ")
     ft = typeof(f)
-    name = ft.name.mt.name
     f_is_function = false
     kwargs = ()
-    if endswith(string(ft.name.name), "##kw")
-        f = ex.args[2]
+    if f === Core.kwcall && !is_arg_types
+        f = (ex.args::Tuple)[2]
         ft = typeof(f)
-        name = ft.name.mt.name
         arg_types_param = arg_types_param[3:end]
         kwargs = pairs(ex.args[1])
-        ex = MethodError(f, ex.args[3:end::Int])
+        ex = MethodError(f, ex.args[3:end::Int], ex.world)
     end
+    name = ft.name.mt.name
     if f === Base.convert && length(arg_types_param) == 2 && !is_arg_types
         f_is_function = true
         show_convert_error(io, ex, arg_types_param)
-    elseif f === mapreduce_empty || f === reduce_empty
-        print(io, "reducing over an empty collection is not allowed; consider supplying `init` to the reducer")
-        show_candidates = false
     elseif isempty(methods(f)) && isa(f, DataType) && isabstracttype(f)
         print(io, "no constructors have been defined for ", f)
     elseif isempty(methods(f)) && !isa(f, Function) && !isa(f, Type)
         print(io, "objects of type ", ft, " are not callable")
     else
-        if ft <: Function && isempty(ft.parameters) &&
-                isdefined(ft.name.module, name) &&
-                ft == typeof(getfield(ft.name.module, name))
+        if ft <: Function && isempty(ft.parameters) && _isself(ft)
             f_is_function = true
         end
         print(io, "no method matching ")
-        show_signature_function(io, isa(f, Type) ? Type{f} : typeof(f))
-        print(io, "(")
+        iob = IOContext(IOBuffer(), io)     # for type abbreviation as in #49795; some, like `convert(T, x)`, should not abbreviate
+        show_signature_function(iob, isa(f, Type) ? Type{f} : typeof(f))
+        print(iob, "(")
         for (i, typ) in enumerate(arg_types_param)
-            print(io, "::", typ)
-            i == length(arg_types_param) || print(io, ", ")
+            print(iob, "::", typ)
+            i == length(arg_types_param) || print(iob, ", ")
         end
         if !isempty(kwargs)
-            print(io, "; ")
+            print(iob, "; ")
             for (i, (k, v)) in enumerate(kwargs)
-                print(io, k, "::", typeof(v))
-                i == length(kwargs)::Int || print(io, ", ")
+                print(iob, k, "::", typeof(v))
+                i == length(kwargs)::Int || print(iob, ", ")
             end
         end
-        print(io, ")")
+        print(iob, ")")
+        str = String(take!(unwrapcontext(iob)[1]))
+        str = type_limited_string_from_context(io, str)
+        print(io, str)
     end
     # catch the two common cases of element-wise addition and subtraction
     if (f === Base.:+ || f === Base.:-) && length(arg_types_param) == 2
@@ -323,11 +342,12 @@ function showerror(io::IO, ex::MethodError)
         end
     end
     Experimental.show_error_hints(io, ex, arg_types_param, kwargs)
-    show_candidates && try
+    try
         show_method_candidates(io, ex, kwargs)
     catch ex
         @error "Error showing method candidates, aborted" exception=ex,catch_backtrace()
     end
+    nothing
 end
 
 striptype(::Type{T}) where {T} = T
@@ -346,7 +366,7 @@ function showerror_ambiguous(io::IO, meths, f, args)
     sigfix = Any
     for m in meths
         print(io, "  ")
-        show(io, m; digit_align_width=-2)
+        show_method(io, m; digit_align_width=0)
         println(io)
         sigfix = typeintersect(m.sig, sigfix)
     end
@@ -375,12 +395,9 @@ function showerror_nostdio(err, msg::AbstractString)
     ccall(:jl_printf, Cint, (Ptr{Cvoid},Cstring), stderr_stream, "\n")
 end
 
-stacktrace_expand_basepaths()::Bool =
-    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_EXPAND_BASEPATHS", "false")) === true
-stacktrace_contract_userdir()::Bool =
-    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_CONTRACT_HOMEDIR", "true")) === true
-stacktrace_linebreaks()::Bool =
-    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_LINEBREAKS", "false")) === true
+stacktrace_expand_basepaths()::Bool = Base.get_bool_env("JULIA_STACKTRACE_EXPAND_BASEPATHS", false) === true
+stacktrace_contract_userdir()::Bool = Base.get_bool_env("JULIA_STACKTRACE_CONTRACT_HOMEDIR", true) === true
+stacktrace_linebreaks()::Bool = Base.get_bool_env("JULIA_STACKTRACE_LINEBREAKS", false) === true
 
 function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=())
     is_arg_types = isa(ex.args, DataType)
@@ -400,7 +417,7 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
     # pool MethodErrors for these two functions.
     if f === convert && !isempty(arg_types_param)
         at1 = arg_types_param[1]
-        if isa(at1,DataType) && (at1::DataType).name === Type.body.name && !Core.Compiler.has_free_typevars(at1)
+        if isType(at1) && !Core.Compiler.has_free_typevars(at1)
             push!(funcs, (at1.parameters[1], arg_types_param[2:end]))
         end
     end
@@ -422,17 +439,17 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
             end
             sig0 = sig0::DataType
             s1 = sig0.parameters[1]
-            sig = sig0.parameters[2:end]
-            print(iob, "  ")
-            if !isa(func, rewrap_unionall(s1, method.sig))
-                # function itself doesn't match
+            if sig0 === Tuple || !isa(func, rewrap_unionall(s1, method.sig))
+                # function itself doesn't match or is a builtin
                 continue
             else
+                print(iob, "  ")
                 show_signature_function(iob, s1)
             end
             print(iob, "(")
             t_i = copy(arg_types_param)
             right_matches = 0
+            sig = sig0.parameters[2:end]
             for i = 1 : min(length(t_i), length(sig))
                 i > 1 && print(iob, ", ")
                 # If isvarargtype then it checks whether the rest of the input arguments matches
@@ -451,7 +468,7 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                 # the type of the first argument is not matched.
                 t_in === Union{} && special && i == 1 && break
                 if t_in === Union{}
-                    if get(io, :color, false)
+                    if get(io, :color, false)::Bool
                         let sigstr=sigstr
                             Base.with_output_color(Base.error_color(), iob) do iob
                                 print(iob, "::", sigstr...)
@@ -495,7 +512,11 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                         if !((min(length(t_i), length(sig)) == 0) && k==1)
                             print(iob, ", ")
                         end
-                        if get(io, :color, false)
+                        if k == 1 && Base.isvarargtype(sigtype)
+                            # There wasn't actually a mismatch - the method match failed for
+                            # some other reason, e.g. world age. Just print the sigstr.
+                            print(iob, sigstr...)
+                        elseif get(io, :color, false)::Bool
                             let sigstr=sigstr
                                 Base.with_output_color(Base.error_color(), iob) do iob
                                     print(iob, "::", sigstr...)
@@ -542,9 +563,9 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                 end
                 println(iob)
 
-                m = parentmodule_before_main(method.module)
-                color = get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m)
-                print_module_path_file(iob, m, string(file), line, color, 1)
+                m = parentmodule_before_main(method)
+                modulecolor = get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m)
+                print_module_path_file(iob, m, string(file), line; modulecolor, digit_align_width = 3)
 
                 # TODO: indicate if it's in the wrong world
                 push!(lines, (buf, right_matches))
@@ -669,7 +690,7 @@ function show_reduced_backtrace(io::IO, t::Vector)
             repetitions = repeated_cycle[1][3]
             popfirst!(repeated_cycle)
             printstyled(io,
-                "--- the last ", cycle_length, " lines are repeated ",
+                "--- the above ", cycle_length, " lines are repeated ",
                   repetitions, " more time", repetitions>1 ? "s" : "", " ---", color = :light_black)
             if i < length(displayed_stackframes)
                 println(io)
@@ -685,7 +706,7 @@ end
 # Print a stack frame where the module color is determined by looking up the parent module in
 # `modulecolordict`. If the module does not have a color, yet, a new one can be drawn
 # from `modulecolorcycler`.
-function print_stackframe(io, i, frame::StackFrame, n::Int, digit_align_width, modulecolordict, modulecolorcycler)
+function print_stackframe(io, i, frame::StackFrame, n::Int, ndigits_max, modulecolordict, modulecolorcycler)
     m = Base.parentmodule(frame)
     modulecolor = if m !== nothing
         m = parentmodule_before_main(m)
@@ -693,11 +714,11 @@ function print_stackframe(io, i, frame::StackFrame, n::Int, digit_align_width, m
     else
         :default
     end
-    print_stackframe(io, i, frame, n, digit_align_width, modulecolor)
+    print_stackframe(io, i, frame, n, ndigits_max, modulecolor)
 end
 
 # Gets the topmost parent module that isn't Main
-function parentmodule_before_main(m)
+function parentmodule_before_main(m::Module)
     while parentmodule(m) !== m
         pm = parentmodule(m)
         pm == Main && break
@@ -705,13 +726,11 @@ function parentmodule_before_main(m)
     end
     m
 end
+parentmodule_before_main(x) = parentmodule_before_main(parentmodule(x))
 
 # Print a stack frame where the module color is set manually with `modulecolor`.
-function print_stackframe(io, i, frame::StackFrame, n::Int, digit_align_width, modulecolor)
+function print_stackframe(io, i, frame::StackFrame, n::Int, ndigits_max, modulecolor)
     file, line = string(frame.file), frame.line
-    file = fixup_stdlib_path(file)
-    stacktrace_expand_basepaths() && (file = something(find_source_file(file), file))
-    stacktrace_contract_userdir() && (file = contractuser(file))
 
     # Used by the REPL to make it possible to open
     # the location of a stackframe/method in the editor.
@@ -722,8 +741,10 @@ function print_stackframe(io, i, frame::StackFrame, n::Int, digit_align_width, m
     inlined = getfield(frame, :inlined)
     modul = parentmodule(frame)
 
+    digit_align_width = ndigits_max + 2
+
     # frame number
-    print(io, " ", lpad("[" * string(i) * "]", digit_align_width + 2))
+    print(io, " ", lpad("[" * string(i) * "]", digit_align_width))
     print(io, " ")
 
     StackTraces.show_spec_linfo(IOContext(io, :backtrace=>true), frame)
@@ -733,14 +754,14 @@ function print_stackframe(io, i, frame::StackFrame, n::Int, digit_align_width, m
     println(io)
 
     # @ Module path / file : line
-    print_module_path_file(io, modul, file, line, modulecolor, digit_align_width)
+    print_module_path_file(io, modul, file, line; modulecolor, digit_align_width)
 
     # inlined
     printstyled(io, inlined ? " [inlined]" : "", color = :light_black)
 end
 
-function print_module_path_file(io, modul, file, line, modulecolor = :light_black, digit_align_width = 0)
-    printstyled(io, " " ^ (digit_align_width + 2) * "@", color = :light_black)
+function print_module_path_file(io, modul, file, line; modulecolor = :light_black, digit_align_width = 0)
+    printstyled(io, " " ^ digit_align_width * "@", color = :light_black)
 
     # module
     if modul !== nothing && modulecolor !== nothing
@@ -749,6 +770,7 @@ function print_module_path_file(io, modul, file, line, modulecolor = :light_blac
     end
 
     # filepath
+    file = fixup_stdlib_path(file)
     stacktrace_expand_basepaths() && (file = something(find_source_file(file), file))
     stacktrace_contract_userdir() && (file = contractuser(file))
     print(io, " ")
@@ -774,7 +796,7 @@ function show_backtrace(io::IO, t::Vector)
 
     if length(filtered) == 1 && StackTraces.is_top_level_frame(filtered[1][1])
         f = filtered[1][1]::StackFrame
-        if f.line == 0 && f.file === Symbol("")
+        if f.line == 0 && f.file === :var""
             # don't show a single top-level frame with no location info
             return
         end
@@ -783,20 +805,15 @@ function show_backtrace(io::IO, t::Vector)
     if length(filtered) > BIG_STACKTRACE_SIZE
         show_reduced_backtrace(IOContext(io, :backtrace => true), filtered)
         return
+    else
+        try invokelatest(update_stackframes_callback[], filtered) catch end
+        # process_backtrace returns a Vector{Tuple{Frame, Int}}
+        show_full_backtrace(io, filtered; print_linebreaks = stacktrace_linebreaks())
     end
-
-    try invokelatest(update_stackframes_callback[], filtered) catch end
-    # process_backtrace returns a Vector{Tuple{Frame, Int}}
-    show_full_backtrace(io, filtered; print_linebreaks = stacktrace_linebreaks())
-    return
+    nothing
 end
 
 
-function is_kw_sorter_name(name::Symbol)
-    sn = string(name)
-    return !startswith(sn, '#') && endswith(sn, "##kw")
-end
-
 # For improved user experience, filter out frames for include() implementation
 # - see #33065. See also #35371 for extended discussion of internal frames.
 function _simplify_include_frames(trace)
@@ -831,6 +848,72 @@ function _simplify_include_frames(trace)
     return trace[kept_frames]
 end
 
+# Collapse frames that have the same location (in some cases)
+function _collapse_repeated_frames(trace)
+    kept_frames = trues(length(trace))
+    last_frame = nothing
+    for i in 1:length(trace)
+        frame::StackFrame, _ = trace[i]
+        if last_frame !== nothing && frame.file == last_frame.file && frame.line == last_frame.line
+            #=
+            Handles this case:
+
+            f(g, a; kw...) = error();
+            @inline f(a; kw...) = f(identity, a; kw...);
+            f(1)
+
+            which otherwise ends up as:
+
+            [4] #f#4 <-- useless
+            @ ./REPL[2]:1 [inlined]
+            [5] f(a::Int64)
+            @ Main ./REPL[2]:1
+            =#
+            if startswith(sprint(show, last_frame), "#")
+                kept_frames[i-1] = false
+            end
+
+            #= Handles this case
+            g(x, y=1, z=2) = error();
+            g(1)
+
+            which otherwise ends up as:
+
+            [2] g(x::Int64, y::Int64, z::Int64)
+            @ Main ./REPL[1]:1
+            [3] g(x::Int64) <-- useless
+            @ Main ./REPL[1]:1
+            =#
+            if frame.linfo isa MethodInstance && last_frame.linfo isa MethodInstance &&
+                frame.linfo.def isa Method && last_frame.linfo.def isa Method
+                m, last_m = frame.linfo.def::Method, last_frame.linfo.def::Method
+                params, last_params = Base.unwrap_unionall(m.sig).parameters, Base.unwrap_unionall(last_m.sig).parameters
+                if last_m.nkw != 0
+                    pos_sig_params = last_params[(last_m.nkw+2):end]
+                    issame = true
+                    if pos_sig_params == params
+                        kept_frames[i] = false
+                    end
+                end
+                if length(last_params) > length(params)
+                    issame = true
+                    for i = 1:length(params)
+                        issame &= params[i] == last_params[i]
+                    end
+                    if issame
+                        kept_frames[i] = false
+                    end
+                end
+            end
+
+            # TODO: Detect more cases that can be collapsed
+        end
+        last_frame = frame
+    end
+    return trace[kept_frames]
+end
+
+
 function process_backtrace(t::Vector, limit::Int=typemax(Int); skipC = true)
     n = 0
     last_frame = StackTraces.UNKNOWN
@@ -848,15 +931,27 @@ function process_backtrace(t::Vector, limit::Int=typemax(Int); skipC = true)
                 continue
             end
 
-            if (lkup.from_c && skipC) || is_kw_sorter_name(lkup.func)
+            if (lkup.from_c && skipC)
                 continue
             end
+            code = lkup.linfo
+            if code isa MethodInstance
+                def = code.def
+                if def isa Method && def.name !== :kwcall && def.sig <: Tuple{typeof(Core.kwcall),NamedTuple,Any,Vararg}
+                    # hide kwcall() methods, which are probably internal keyword sorter methods
+                    # (we print the internal method instead, after demangling
+                    # the argument list, since it has the right line number info)
+                    continue
+                end
+            elseif !lkup.from_c
+                lkup.func === :kwcall && continue
+            end
             count += 1
             if count > limit
                 break
             end
 
-            if lkup.file != last_frame.file || lkup.line != last_frame.line || lkup.func != last_frame.func || lkup.linfo !== lkup.linfo
+            if lkup.file != last_frame.file || lkup.line != last_frame.line || lkup.func != last_frame.func || lkup.linfo !== last_frame.linfo
                 if n > 0
                     push!(ret, (last_frame, n))
                 end
@@ -871,7 +966,9 @@ function process_backtrace(t::Vector, limit::Int=typemax(Int); skipC = true)
     if n > 0
         push!(ret, (last_frame, n))
     end
-    return _simplify_include_frames(ret)
+    trace = _simplify_include_frames(ret)
+    trace = _collapse_repeated_frames(trace)
+    return trace
 end
 
 function show_exception_stack(io::IO, stack)
@@ -913,6 +1010,19 @@ end
 
 Experimental.register_error_hint(noncallable_number_hint_handler, MethodError)
 
+# Display a hint in case the user tries to use the + operator on strings
+# (probably attempting concatenation)
+function string_concatenation_hint_handler(io, ex, arg_types, kwargs)
+    @nospecialize
+    if (ex.f === +) && all(i -> i <: AbstractString, arg_types)
+        print(io, "\nString concatenation is performed with ")
+        printstyled(io, "*", color=:cyan)
+        print(io, " (See also: https://docs.julialang.org/en/v1/manual/strings/#man-concatenation).")
+    end
+end
+
+Experimental.register_error_hint(string_concatenation_hint_handler, MethodError)
+
 # ExceptionStack implementation
 size(s::ExceptionStack) = size(s.stack)
 getindex(s::ExceptionStack, i::Int) = s.stack[i]
diff --git a/base/essentials.jl b/base/essentials.jl
index 906c36ad9c003..a9f3bfc40f622 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -1,17 +1,21 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Core: CodeInfo, SimpleVector, donotdelete, arrayref
+using Core: CodeInfo, SimpleVector, donotdelete, compilerbarrier, memoryref, memoryrefget, memoryrefset!
 
 const Callable = Union{Function,Type}
 
 const Bottom = Union{}
 
 # Define minimal array interface here to help code used in macros:
-length(a::Array) = arraylen(a)
+length(a::Array{T, 0}) where {T} = 1
+length(a::Array{T, 1}) where {T} = getfield(a, :size)[1]
+length(a::Array) = getfield(getfield(getfield(a, :ref), :mem), :length)
+length(a::GenericMemory) = getfield(a, :length)
+throw_boundserror(A, I) = (@noinline; throw(BoundsError(A, I)))
 
-# This is more complicated than it needs to be in order to get Win64 through bootstrap
-eval(:(getindex(A::Array, i1::Int) = arrayref($(Expr(:boundscheck)), A, i1)))
-eval(:(getindex(A::Array, i1::Int, i2::Int, I::Int...) = (@inline; arrayref($(Expr(:boundscheck)), A, i1, i2, I...))))
+# multidimensional getindex will be defined later on
+
+==(a::GlobalRef, b::GlobalRef) = a.mod === b.mod && a.name === b.name
 
 """
     AbstractSet{T}
@@ -30,21 +34,6 @@ An `AbstractDict{K, V}` should be an iterator of `Pair{K, V}`.
 """
 abstract type AbstractDict{K,V} end
 
-"""
-    Iterators.Pairs(values, keys) <: AbstractDict{eltype(keys), eltype(values)}
-
-Transforms an indexable container into a Dictionary-view of the same data.
-Modifying the key-space of the underlying data may invalidate this object.
-"""
-struct Pairs{K, V, I, A} <: AbstractDict{K, V}
-    data::A
-    itr::I
-end
-Pairs{K, V}(data::A, itr::I) where {K, V, I, A} = Pairs{K, V, I, A}(data, itr)
-Pairs{K}(data::A, itr::I) where {K, I, A} = Pairs{K, eltype(A), I, A}(data, itr)
-Pairs(data::A, itr::I) where  {I, A} = Pairs{eltype(I), eltype(A), I, A}(data, itr)
-pairs(::Type{NamedTuple}) = Pairs{Symbol, V, NTuple{N, Symbol}, NamedTuple{names, T}} where {V, N, names, T<:NTuple{N, Any}}
-
 ## optional pretty printer:
 #const NamedTuplePair{N, V, names, T<:NTuple{N, Any}} = Pairs{Symbol, V, NTuple{N, Symbol}, NamedTuple{names, T}}
 #export NamedTuplePair
@@ -98,7 +87,8 @@ f(y) = [x for x in y]
 !!! note
     `@nospecialize` affects code generation but not inference: it limits the diversity
     of the resulting native code, but it does not impose any limitations (beyond the
-    standard ones) on type-inference.
+    standard ones) on type-inference. Use [`Base.@nospecializeinfer`](@ref) together with
+    `@nospecialize` to additionally suppress inference.
 
 # Example
 
@@ -190,6 +180,19 @@ macro isdefined(s::Symbol)
     return Expr(:escape, Expr(:isdefined, s))
 end
 
+"""
+    nameof(m::Module) -> Symbol
+
+Get the name of a `Module` as a [`Symbol`](@ref).
+
+# Examples
+```jldoctest
+julia> nameof(Base.Broadcast)
+:Broadcast
+```
+"""
+nameof(m::Module) = ccall(:jl_module_name, Ref{Symbol}, (Any,), m)
+
 function _is_internal(__module__)
     if ccall(:jl_base_relative_to, Any, (Any,), __module__)::Module === Core.Compiler ||
        nameof(__module__) === :Base
@@ -198,10 +201,6 @@ function _is_internal(__module__)
     return false
 end
 
-# can be used in place of `@pure` (supposed to be used for bootstrapping)
-macro _pure_meta()
-    return _is_internal(__module__) && Expr(:meta, :pure)
-end
 # can be used in place of `@assume_effects :total` (supposed to be used for bootstrapping)
 macro _total_meta()
     return _is_internal(__module__) && Expr(:meta, Expr(:purity,
@@ -210,7 +209,10 @@ macro _total_meta()
         #=:nothrow=#true,
         #=:terminates_globally=#true,
         #=:terminates_locally=#false,
-        #=:notaskstate=#true))
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#true,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false))
 end
 # can be used in place of `@assume_effects :foldable` (supposed to be used for bootstrapping)
 macro _foldable_meta()
@@ -220,13 +222,88 @@ macro _foldable_meta()
         #=:nothrow=#false,
         #=:terminates_globally=#true,
         #=:terminates_locally=#false,
-        #=:notaskstate=#false))
+        #=:notaskstate=#true,
+        #=:inaccessiblememonly=#true,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false))
+end
+# can be used in place of `@assume_effects :terminates_locally` (supposed to be used for bootstrapping)
+macro _terminates_locally_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false))
+end
+# can be used in place of `@assume_effects :terminates_globally` (supposed to be used for bootstrapping)
+macro _terminates_globally_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false))
+end
+# can be used in place of `@assume_effects :effect_free :terminates_locally` (supposed to be used for bootstrapping)
+macro _effect_free_terminates_locally_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#true,
+        #=:nothrow=#false,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#true,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#false))
+end
+# can be used in place of `@assume_effects :nothrow :noub` (supposed to be used for bootstrapping)
+macro _nothrow_noub_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#true,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#false,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#true,
+        #=:noub_if_noinbounds=#false))
+end
+# can be used in place of `@assume_effects :noub_if_noinbounds` (supposed to be used for bootstrapping)
+macro _noub_if_noinbounds_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#false,
+        #=:effect_free=#false,
+        #=:nothrow=#false,
+        #=:terminates_globally=#false,
+        #=:terminates_locally=#false,
+        #=:notaskstate=#false,
+        #=:inaccessiblememonly=#false,
+        #=:noub=#false,
+        #=:noub_if_noinbounds=#true))
 end
 
 # another version of inlining that propagates an inbounds context
 macro _propagate_inbounds_meta()
     return Expr(:meta, :inline, :propagate_inbounds)
 end
+macro _nospecializeinfer_meta()
+    return Expr(:meta, :nospecializeinfer)
+end
+
+getindex(A::GenericMemory{:not_atomic}, i::Int) = (@_noub_if_noinbounds_meta;
+    memoryrefget(memoryref(memoryref(A), i, @_boundscheck), :not_atomic, false))
+getindex(A::GenericMemoryRef{:not_atomic}) = memoryrefget(A, :not_atomic, @_boundscheck)
 
 function iterate end
 
@@ -279,7 +356,9 @@ See also: [`round`](@ref), [`trunc`](@ref), [`oftype`](@ref), [`reinterpret`](@r
 """
 function convert end
 
-convert(::Type{Union{}}, @nospecialize x) = throw(MethodError(convert, (Union{}, x)))
+# ensure this is never ambiguous, and therefore fast for lookup
+convert(T::Type{Union{}}, x...) = throw(ArgumentError("cannot convert a value to Union{} for assignment"))
+
 convert(::Type{Type}, x::Type) = x # the ssair optimizer is strongly dependent on this method existing to avoid over-specialization
                                    # in the absence of inlining-enabled
                                    # (due to fields typed as `Type`, which is generally a bad idea)
@@ -299,6 +378,26 @@ macro eval(mod, ex)
     return Expr(:escape, Expr(:call, GlobalRef(Core, :eval), mod, Expr(:quote, ex)))
 end
 
+# use `@eval` here to directly form `:new` expressions avoid implicit `convert`s
+# in order to achieve better effects inference
+@eval struct Pairs{K, V, I, A} <: AbstractDict{K, V}
+    data::A
+    itr::I
+    Pairs{K, V, I, A}(data, itr) where {K, V, I, A} = $(Expr(:new, :(Pairs{K, V, I, A}), :(data isa A ? data : convert(A, data)), :(itr isa I ? itr : convert(I, itr))))
+    Pairs{K, V}(data::A, itr::I) where {K, V, I, A} = $(Expr(:new, :(Pairs{K, V, I, A}), :data, :itr))
+    Pairs{K}(data::A, itr::I) where {K, I, A} = $(Expr(:new, :(Pairs{K, eltype(A), I, A}), :data, :itr))
+    Pairs(data::A, itr::I) where  {I, A} = $(Expr(:new, :(Pairs{eltype(I), eltype(A), I, A}), :data, :itr))
+end
+pairs(::Type{NamedTuple}) = Pairs{Symbol, V, NTuple{N, Symbol}, NamedTuple{names, T}} where {V, N, names, T<:NTuple{N, Any}}
+
+"""
+    Base.Pairs(values, keys) <: AbstractDict{eltype(keys), eltype(values)}
+
+Transforms an indexable container into a Dictionary-view of the same data.
+Modifying the key-space of the underlying data may invalidate this object.
+"""
+Pairs
+
 argtail(x, rest...) = rest
 
 """
@@ -321,6 +420,7 @@ tail(x::Tuple) = argtail(x...)
 tail(::Tuple{}) = throw(ArgumentError("Cannot call tail on an empty tuple."))
 
 function unwrap_unionall(@nospecialize(a))
+    @_foldable_meta
     while isa(a,UnionAll)
         a = a.body
     end
@@ -328,6 +428,7 @@ function unwrap_unionall(@nospecialize(a))
 end
 
 function rewrap_unionall(@nospecialize(t), @nospecialize(u))
+    @_foldable_meta
     if !isa(u, UnionAll)
         return t
     end
@@ -335,6 +436,7 @@ function rewrap_unionall(@nospecialize(t), @nospecialize(u))
 end
 
 function rewrap_unionall(t::Core.TypeofVararg, @nospecialize(u))
+    @_foldable_meta
     isdefined(t, :T) || return t
     if !isa(u, UnionAll)
         return t
@@ -351,22 +453,23 @@ function rename_unionall(@nospecialize(u))
     if !isa(u, UnionAll)
         return u
     end
-    body = rename_unionall(u.body)
-    if body === u.body
-        body = u
-    else
-        body = UnionAll(u.var, body)
-    end
     var = u.var::TypeVar
+    body = UnionAll(var, rename_unionall(u.body))
     nv = TypeVar(var.name, var.lb, var.ub)
     return UnionAll(nv, body{nv})
 end
 
+# remove concrete constraint on diagonal TypeVar if it comes from troot
+function widen_diagonal(@nospecialize(t), troot::UnionAll)
+    body = ccall(:jl_widen_diagonal, Any, (Any, Any), t, troot)
+end
+
 function isvarargtype(@nospecialize(t))
     return isa(t, Core.TypeofVararg)
 end
 
 function isvatuple(@nospecialize(t))
+    @_foldable_meta
     t = unwrap_unionall(t)
     if isa(t, DataType)
         n = length(t.parameters)
@@ -375,8 +478,10 @@ function isvatuple(@nospecialize(t))
     return false
 end
 
-unwrapva(t::Core.TypeofVararg) = isdefined(t, :T) ? t.T : Any
-unwrapva(@nospecialize(t)) = t
+function unwrapva(@nospecialize(t))
+    isa(t, Core.TypeofVararg) || return t
+    return isdefined(t, :T) ? t.T : Any
+end
 
 function unconstrain_vararg_length(va::Core.TypeofVararg)
     # construct a new Vararg type where its length is unconstrained,
@@ -404,7 +509,13 @@ function convert(::Type{T}, x::NTuple{N,Any}) where {N, T<:Tuple}
     if typeintersect(NTuple{N,Any}, T) === Union{}
         _tuple_error(T, x)
     end
-    cvt1(n) = (@inline; convert(fieldtype(T, n), getfield(x, n, #=boundscheck=#false)))
+    function cvt1(n)
+        @inline
+        Tn = fieldtype(T, n)
+        xn = getfield(x, n, #=boundscheck=#false)
+        xn isa Tn && return xn
+        return convert(Tn, xn)
+    end
     return ntuple(cvt1, Val(N))::NTuple{N,Any}
 end
 
@@ -442,7 +553,7 @@ end
 """
     oftype(x, y)
 
-Convert `y` to the type of `x` (`convert(typeof(x), y)`).
+Convert `y` to the type of `x` i.e. `convert(typeof(x), y)`.
 
 # Examples
 ```jldoctest
@@ -457,7 +568,7 @@ julia> oftype(y, x)
 4.0
 ```
 """
-oftype(x, y) = convert(typeof(x), y)
+oftype(x, y) = y isa typeof(x) ? y : convert(typeof(x), y)::typeof(x)
 
 unsigned(x::Int) = reinterpret(UInt, x)
 signed(x::UInt) = reinterpret(Int, x)
@@ -478,33 +589,49 @@ Neither `convert` nor `cconvert` should take a Julia object and turn it into a `
 """
 function cconvert end
 
-cconvert(T::Type, x) = convert(T, x) # do the conversion eagerly in most cases
+cconvert(T::Type, x) = x isa T ? x : convert(T, x) # do the conversion eagerly in most cases
+cconvert(::Type{Union{}}, x...) = convert(Union{}, x...)
 cconvert(::Type{<:Ptr}, x) = x # but defer the conversion to Ptr to unsafe_convert
 unsafe_convert(::Type{T}, x::T) where {T} = x # unsafe_convert (like convert) defaults to assuming the convert occurred
 unsafe_convert(::Type{T}, x::T) where {T<:Ptr} = x  # to resolve ambiguity with the next method
 unsafe_convert(::Type{P}, x::Ptr) where {P<:Ptr} = convert(P, x)
 
 """
-    reinterpret(type, A)
+    reinterpret(::Type{Out}, x::In)
 
-Change the type-interpretation of a block of memory.
-For arrays, this constructs a view of the array with the same binary data as the given
-array, but with the specified element type.
-For example,
-`reinterpret(Float32, UInt32(7))` interprets the 4 bytes corresponding to `UInt32(7)` as a
+Change the type-interpretation of the binary data in the isbits value `x`
+to that of the isbits type `Out`.
+The size (ignoring padding) of `Out` has to be the same as that of the type of `x`.
+For example, `reinterpret(Float32, UInt32(7))` interprets the 4 bytes corresponding to `UInt32(7)` as a
 [`Float32`](@ref).
 
-# Examples
 ```jldoctest
 julia> reinterpret(Float32, UInt32(7))
 1.0f-44
 
-julia> reinterpret(Float32, UInt32[1 2 3 4 5])
-1×5 reinterpret(Float32, ::Matrix{UInt32}):
- 1.0f-45  3.0f-45  4.0f-45  6.0f-45  7.0f-45
+julia> reinterpret(NTuple{2, UInt8}, 0x1234)
+(0x34, 0x12)
+
+julia> reinterpret(UInt16, (0x34, 0x12))
+0x1234
+
+julia> reinterpret(Tuple{UInt16, UInt8}, (0x01, 0x0203))
+(0x0301, 0x02)
 ```
+
+!!! warning
+
+    Use caution if some combinations of bits in `Out` are not considered valid and would
+    otherwise be prevented by the type's constructors and methods. Unexpected behavior
+    may result without additional validation.
 """
-reinterpret(::Type{T}, x) where {T} = bitcast(T, x)
+function reinterpret(::Type{Out}, x) where {Out}
+    if isprimitivetype(Out) && isprimitivetype(typeof(x))
+        return bitcast(Out, x)
+    end
+    # only available when Base is fully loaded.
+    return _reinterpret(Out, x)
+end
 
 """
     sizeof(T::DataType)
@@ -528,6 +655,17 @@ julia> sizeof(1.0)
 
 julia> sizeof(collect(1.0:10.0))
 80
+
+julia> struct StructWithPadding
+           x::Int64
+           flag::Bool
+       end
+
+julia> sizeof(StructWithPadding) # not the sum of `sizeof` of fields due to padding
+16
+
+julia> sizeof(Int64) + sizeof(Bool) # different from above
+9
 ```
 
 If `DataType` `T` does not have a specific size, an error is thrown.
@@ -557,9 +695,6 @@ julia> ifelse(1 > 2, 1, 2)
 """
 ifelse(condition::Bool, x, y) = Core.ifelse(condition, x, y)
 
-# simple Array{Any} operations needed for bootstrap
-@eval setindex!(A::Array{Any}, @nospecialize(x), i::Int) = arrayset($(Expr(:boundscheck)), A, x, i)
-
 """
     esc(e)
 
@@ -675,17 +810,32 @@ macro goto(name::Symbol)
     return esc(Expr(:symbolicgoto, name))
 end
 
-# SimpleVector
-
-function getindex(v::SimpleVector, i::Int)
-    @boundscheck if !(1 <= i <= length(v))
-        throw(BoundsError(v,i))
-    end
-    return ccall(:jl_svec_ref, Any, (Any, Int), v, i - 1)
+# linear indexing
+function getindex(A::Array, i::Int)
+    @_noub_if_noinbounds_meta
+    @boundscheck ult_int(bitcast(UInt, sub_int(i, 1)), bitcast(UInt, length(A))) || throw_boundserror(A, (i,))
+    memoryrefget(memoryref(getfield(A, :ref), i, false), :not_atomic, false)
 end
+# simple Array{Any} operations needed for bootstrap
+function setindex!(A::Array{Any}, @nospecialize(x), i::Int)
+    @_noub_if_noinbounds_meta
+    @boundscheck ult_int(bitcast(UInt, sub_int(i, 1)), bitcast(UInt, length(A))) || throw_boundserror(A, (i,))
+    memoryrefset!(memoryref(getfield(A, :ref), i, false), x, :not_atomic, false)
+    return A
+end
+setindex!(A::Memory{Any}, @nospecialize(x), i::Int) = (memoryrefset!(memoryref(memoryref(A), i, @_boundscheck), x, :not_atomic, @_boundscheck); A)
+setindex!(A::MemoryRef{T}, x) where {T} = memoryrefset!(A, convert(T, x), :not_atomic, @_boundscheck)
+setindex!(A::MemoryRef{Any}, @nospecialize(x)) = memoryrefset!(A, x, :not_atomic, @_boundscheck)
 
+# SimpleVector
+
+getindex(v::SimpleVector, i::Int) = (@_foldable_meta; Core._svec_ref(v, i))
 function length(v::SimpleVector)
-    return ccall(:jl_svec_len, Int, (Any,), v)
+    @_total_meta
+    t = @_gc_preserve_begin v
+    len = unsafe_load(Ptr{Int}(pointer_from_objref(v)))
+    @_gc_preserve_end t
+    return len
 end
 firstindex(v::SimpleVector) = 1
 lastindex(v::SimpleVector) = length(v)
@@ -740,7 +890,7 @@ function isassigned end
 
 function isassigned(v::SimpleVector, i::Int)
     @boundscheck 1 <= i <= length(v) || return false
-    return ccall(:jl_svec_isassigned, Bool, (Any, Int), v, i - 1)
+    return true
 end
 
 
@@ -760,6 +910,7 @@ struct Colon <: Function
 end
 const (:) = Colon()
 
+
 """
     Val(c)
 
@@ -794,13 +945,16 @@ e.g. long-running event loops or callback functions that may
 call obsolete versions of a function `f`.
 (The drawback is that `invokelatest` is somewhat slower than calling
 `f` directly, and the type of the result cannot be inferred by the compiler.)
+
+!!! compat "Julia 1.9"
+    Prior to Julia 1.9, this function was not exported, and was called as `Base.invokelatest`.
 """
 function invokelatest(@nospecialize(f), @nospecialize args...; kwargs...)
     kwargs = merge(NamedTuple(), kwargs)
     if isempty(kwargs)
         return Core._call_latest(f, args...)
     end
-    return Core._call_latest(Core.kwfunc(f), kwargs, f, args...)
+    return Core._call_latest(Core.kwcall, kwargs, f, args...)
 end
 
 """
@@ -834,11 +988,10 @@ function invoke_in_world(world::UInt, @nospecialize(f), @nospecialize args...; k
     if isempty(kwargs)
         return Core._call_in_world(world, f, args...)
     end
-    return Core._call_in_world(world, Core.kwfunc(f), kwargs, f, args...)
+    return Core._call_in_world(world, Core.kwcall, kwargs, f, args...)
 end
 
-# TODO: possibly make this an intrinsic
-inferencebarrier(@nospecialize(x)) = RefValue{Any}(x).x
+inferencebarrier(@nospecialize(x)) = compilerbarrier(:type, x)
 
 """
     isempty(collection) -> Bool
@@ -848,9 +1001,10 @@ Determine whether a collection is empty (has no elements).
 !!! warning
 
     `isempty(itr)` may consume the next element of a stateful iterator `itr`
-    unless an appropriate `Base.isdone(itr)` or `isempty` method is defined.
-    Use of `isempty` should therefore be avoided when writing generic
-    code which should support any iterator type.
+    unless an appropriate [`Base.isdone(itr)`](@ref) method is defined.
+    Stateful iterators *should* implement `isdone`, but you may want to avoid
+    using `isempty` when writing generic code which should support any iterator
+    type.
 
 # Examples
 ```jldoctest
@@ -925,7 +1079,7 @@ function popfirst! end
     peek(stream[, T=UInt8])
 
 Read and return a value of type `T` from a stream without advancing the current position
-in the stream.
+in the stream.   See also [`startswith(stream, char_or_string)`](@ref).
 
 # Examples
 
@@ -959,17 +1113,21 @@ end
 
 # Iteration
 """
-    isdone(itr, state...) -> Union{Bool, Missing}
+    isdone(itr, [state]) -> Union{Bool, Missing}
 
 This function provides a fast-path hint for iterator completion.
-This is useful for mutable iterators that want to avoid having elements
-consumed, if they are not going to be exposed to the user (e.g. to check
-for done-ness in `isempty` or `zip`). Mutable iterators that want to
-opt into this feature should define an isdone method that returns
-true/false depending on whether the iterator is done or not. Stateless
-iterators need not implement this function. If the result is `missing`,
-callers may go ahead and compute `iterate(x, state...) === nothing` to
-compute a definite answer.
+This is useful for stateful iterators that want to avoid having elements
+consumed if they are not going to be exposed to the user (e.g. when checking
+for done-ness in `isempty` or `zip`).
+
+Stateful iterators that want to opt into this feature should define an `isdone`
+method that returns true/false depending on whether the iterator is done or
+not. Stateless iterators need not implement this function.
+
+If the result is `missing`, callers may go ahead and compute
+`iterate(x, state) === nothing` to compute a definite answer.
+
+See also [`iterate`](@ref), [`isempty`](@ref)
 """
 isdone(itr, state...) = missing
 
diff --git a/base/experimental.jl b/base/experimental.jl
index 174d532ad1f4d..ccc041b41ec05 100644
--- a/base/experimental.jl
+++ b/base/experimental.jl
@@ -9,7 +9,7 @@
 """
 module Experimental
 
-using Base: Threads, sync_varname
+using Base: Threads, sync_varname, is_function_def, @propagate_inbounds
 using Base.Meta
 
 """
@@ -28,10 +28,7 @@ end
 Base.IndexStyle(::Type{<:Const}) = IndexLinear()
 Base.size(C::Const) = size(C.a)
 Base.axes(C::Const) = axes(C.a)
-@eval Base.getindex(A::Const, i1::Int) =
-    (Base.@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1))
-@eval Base.getindex(A::Const, i1::Int, i2::Int, I::Int...) =
-  (Base.@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
+@propagate_inbounds Base.getindex(A::Const, i1::Int, I::Int...) = A.a[i1, I...]
 
 """
     @aliasscope expr
@@ -86,11 +83,16 @@ end
 """
     Experimental.@sync
 
-Wait until all lexically-enclosed uses of `@async`, `@spawn`, `@spawnat` and `@distributed`
+Wait until all lexically-enclosed uses of [`@async`](@ref), [`@spawn`](@ref Threads.@spawn),
+`Distributed.@spawnat` and `Distributed.@distributed`
 are complete, or at least one of them has errored. The first exception is immediately
 rethrown. It is the responsibility of the user to cancel any still-running operations
 during error handling.
 
+!!! Note
+    This is different to [`@sync`](@ref) in that errors from wrapped tasks are thrown immediately,
+    potentially before all tasks have returned.
+
 !!! Note
     This interface is experimental and subject to change or removal without notice.
 """
@@ -141,7 +143,7 @@ code to resort to runtime dispatch instead.
 Supported values are `1`, `2`, `3`, `4`, and `default` (currently equivalent to `3`).
 """
 macro max_methods(n::Int)
-    0 < n < 5 || error("We must have that `1 <= max_methods <= 4`, but `max_methods = $n`.")
+    1 <= n <= 4 || error("We must have that `1 <= max_methods <= 4`, but `max_methods = $n`.")
     return Expr(:meta, :max_methods, n)
 end
 
@@ -154,13 +156,13 @@ for max_methods. This setting is global for the entire generic function (or more
 the MethodTable).
 """
 macro max_methods(n::Int, fdef::Expr)
-    0 < n <= 255 || error("We must have that `1 <= max_methods <= 255`, but `max_methods = $n`.")
-    (fdef.head == :function && length(fdef.args) == 1) || error("Second argument must be a function forward declaration")
+    1 <= n <= 255 || error("We must have that `1 <= max_methods <= 255`, but `max_methods = $n`.")
+    (fdef.head === :function && length(fdef.args) == 1) || error("Second argument must be a function forward declaration")
     return :(typeof($(esc(fdef))).name.max_methods = $(UInt8(n)))
 end
 
 """
-    Experimental.@compiler_options optimize={0,1,2,3} compile={yes,no,all,min} infer={yes,no} max_methods={default,1,2,3,...}
+    Experimental.@compiler_options optimize={0,1,2,3} compile={yes,no,all,min} infer={yes,no} max_methods={default,1,2,3,4}
 
 Set compiler options for code in the enclosing module. Options correspond directly to
 command-line options with the same name, where applicable. The following options
@@ -193,7 +195,7 @@ macro compiler_options(args...)
             elseif ex.args[1] === :max_methods
                 a = ex.args[2]
                 a = a === :default ? 3 :
-                  a isa Int ? ((0 < a < 5) ? a : error("We must have that `1 <= max_methods <= 4`, but `max_methods = $a`.")) :
+                  a isa Int ? ((1 <= a <= 4) ? a : error("We must have that `1 <= max_methods <= 4`, but `max_methods = $a`.")) :
                   error("invalid argument to \"max_methods\" option")
                 push!(opts.args, Expr(:meta, :max_methods, a))
             else
@@ -333,21 +335,25 @@ Define a method and add it to the method table `mt` instead of to the global met
 This can be used to implement a method override mechanism. Regular compilation will not
 consider these methods, and you should customize the compilation flow to look in these
 method tables (e.g., using [`Core.Compiler.OverlayMethodTable`](@ref)).
-
 """
 macro overlay(mt, def)
     def = macroexpand(__module__, def) # to expand @inline, @generated, etc
-    if !isexpr(def, [:function, :(=)])
-        error("@overlay requires a function Expr")
-    end
-    if isexpr(def.args[1], :call)
-        def.args[1].args[1] = Expr(:overlay, mt, def.args[1].args[1])
-    elseif isexpr(def.args[1], :where)
-        def.args[1].args[1].args[1] = Expr(:overlay, mt, def.args[1].args[1].args[1])
+    is_function_def(def) || error("@overlay requires a function definition")
+    return esc(overlay_def!(mt, def))
+end
+
+function overlay_def!(mt, @nospecialize ex)
+    arg1 = ex.args[1]
+    if isexpr(arg1, :call)
+        arg1.args[1] = Expr(:overlay, mt, arg1.args[1])
+    elseif isexpr(arg1, :(::))
+        overlay_def!(mt, arg1)
+    elseif isexpr(arg1, :where)
+        overlay_def!(mt, arg1)
     else
-        error("@overlay requires a function Expr")
+        error("@overlay requires a function definition")
     end
-    esc(def)
+    return ex
 end
 
 let new_mt(name::Symbol, mod::Module) = begin
diff --git a/base/exports.jl b/base/exports.jl
index 428e6894bbafe..92525b85c7635 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -238,6 +238,7 @@ export
     bitrotate,
     bswap,
     cbrt,
+    fourthroot,
     ceil,
     cis,
     cispi,
@@ -352,6 +353,7 @@ export
     tan,
     tand,
     tanh,
+    tanpi,
     trailing_ones,
     trailing_zeros,
     trunc,
@@ -363,6 +365,7 @@ export
     zero,
     √,
     ∛,
+    ∜,
     ≈,
     ≉,
 
@@ -445,6 +448,7 @@ export
     sortperm!,
     sortslices,
     dropdims,
+    stack,
     step,
     stride,
     strides,
@@ -454,6 +458,7 @@ export
     vcat,
     vec,
     view,
+    wrap,
     zeros,
 
 # search, find, match and related functions
@@ -528,6 +533,7 @@ export
     getkey,
     haskey,
     in,
+    in!,
     intersect!,
     intersect,
     isdisjoint,
@@ -589,6 +595,7 @@ export
     digits,
     digits!,
     eachsplit,
+    eachrsplit,
     escape_string,
     hex2bytes,
     hex2bytes!,
@@ -644,6 +651,11 @@ export
     sprint,
     summary,
 
+# ScopedValue
+    with,
+    @with,
+    ScopedValue,
+
 # logging
     @debug,
     @info,
@@ -660,7 +672,6 @@ export
 
 # iteration
     iterate,
-
     enumerate,  # re-exported from Iterators
     zip,
     only,
@@ -772,6 +783,7 @@ export
 # syntax
     esc,
     gensym,
+    @kwdef,
     macroexpand,
     @macroexpand1,
     @macroexpand,
@@ -794,6 +806,9 @@ export
     names,
     which,
     @isdefined,
+    @invoke,
+    invokelatest,
+    @invokelatest,
 
 # loading source files
     __precompile__,
@@ -812,7 +827,7 @@ export
     atreplinit,
     exit,
     ntuple,
-    Splat,
+    splat,
 
 # I/O and events
     close,
@@ -850,6 +865,8 @@ export
     readline,
     readlines,
     readuntil,
+    copyuntil,
+    copyline,
     redirect_stdio,
     redirect_stderr,
     redirect_stdin,
@@ -937,6 +954,7 @@ export
     pwd,
     readlink,
     rm,
+    samefile,
     stat,
     symlink,
     tempdir,
@@ -971,8 +989,11 @@ export
     reenable_sigint,
     unsafe_copyto!,
     unsafe_load,
+    unsafe_modify!,
     unsafe_pointer_to_objref,
+    unsafe_replace!,
     unsafe_store!,
+    unsafe_swap!,
 
 # implemented in Random module
     rand,
@@ -996,6 +1017,7 @@ export
     @v_str,    # version number
     @raw_str,  # raw string with no interpolation/unescaping
     @NamedTuple,
+    @Kwargs,
     @lazy_str, # lazy string
 
     # documentation
@@ -1013,6 +1035,7 @@ export
     @timev,
     @elapsed,
     @allocated,
+    @allocations,
 
     # tasks
     @sync,
@@ -1048,4 +1071,102 @@ export
     @view,
     @views,
     @static,
-    @invoke
+
+    @main
+
+public
+# Modules
+    Checked,
+    Filesystem,
+    Order,
+    Sort,
+
+# Types
+    AbstractLock,
+    AsyncCondition,
+    CodeUnits,
+    Event,
+    Fix1,
+    Fix2,
+    Generator,
+    ImmutableDict,
+    OneTo,
+    AnnotatedString,
+    AnnotatedChar,
+    UUID,
+
+# Annotated strings
+    annotatedstring,
+    annotate!,
+    annotations,
+
+# Semaphores
+    Semaphore,
+    acquire,
+    release,
+
+# collections
+    IteratorEltype,
+    IteratorSize,
+    to_index,
+    vect,
+    isdone,
+    front,
+    rest,
+    split_rest,
+    tail,
+    checked_length,
+
+# Loading
+    DL_LOAD_PATH,
+    load_path,
+    active_project,
+
+# Reflection and introspection
+    isambiguous,
+    isexpr,
+    isidentifier,
+    issingletontype,
+    identify_package,
+    locate_package,
+    moduleroot,
+    jit_total_bytes,
+    summarysize,
+    isexported,
+    ispublic,
+
+# Opperators
+    operator_associativity,
+    operator_precedence,
+    isbinaryoperator,
+    isoperator,
+    isunaryoperator,
+
+# C interface
+    cconvert,
+    unsafe_convert,
+
+# Error handling
+    exit_on_sigint,
+    windowserror,
+
+# Macros
+    @assume_effects,
+    @constprop,
+    @locals,
+    @propagate_inbounds,
+
+# IO
+    # types
+    BufferStream,
+    IOServer,
+    OS_HANDLE,
+    PipeEndpoint,
+    TTY,
+    # functions
+    reseteof,
+
+# misc
+    notnothing,
+    runtests,
+    text_colors
diff --git a/base/expr.jl b/base/expr.jl
index 8e83926a332f3..553f5a360dff1 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -33,6 +33,9 @@ macro gensym(names...)
     return blk
 end
 
+## line numbers ##
+convert(::Type{LineNumberNode}, lin::Core.LineInfoNode) = LineNumberNode(Int(lin.line), lin.file)
+
 ## expressions ##
 
 isexpr(@nospecialize(ex), head::Symbol) = isa(ex, Expr) && ex.head === head
@@ -75,6 +78,9 @@ function copy(c::CodeInfo)
     cnew.code = copy_exprargs(cnew.code)
     cnew.slotnames = copy(cnew.slotnames)
     cnew.slotflags = copy(cnew.slotflags)
+    if cnew.slottypes !== nothing
+        cnew.slottypes = copy(cnew.slottypes)
+    end
     cnew.codelocs  = copy(cnew.codelocs)
     cnew.linetable = copy(cnew.linetable::Union{Vector{Any},Vector{Core.LineInfoNode}})
     cnew.ssaflags  = copy(cnew.ssaflags)
@@ -123,9 +129,10 @@ function macroexpand(m::Module, @nospecialize(x); recursive=true)
 end
 
 """
-    @macroexpand
+    @macroexpand [mod,] ex
 
 Return equivalent expression with all macros removed (expanded).
+If two arguments are provided, the first is the module to evaluate in.
 
 There are differences between `@macroexpand` and [`macroexpand`](@ref).
 
@@ -160,20 +167,28 @@ julia> M.f()
 ```
 With `@macroexpand` the expression expands where `@macroexpand` appears in the code (module `M` in the example).
 With `macroexpand` the expression expands in the module given as the first argument.
+
+!!! compat "Julia 1.11"
+    The two-argument form requires at least Julia 1.11.
 """
 macro macroexpand(code)
     return :(macroexpand($__module__, $(QuoteNode(code)), recursive=true))
 end
-
+macro macroexpand(mod, code)
+    return :(macroexpand($(esc(mod)), $(QuoteNode(code)), recursive=true))
+end
 
 """
-    @macroexpand1
+    @macroexpand1 [mod,] ex
 
 Non recursive version of [`@macroexpand`](@ref).
 """
 macro macroexpand1(code)
     return :(macroexpand($__module__, $(QuoteNode(code)), recursive=false))
 end
+macro macroexpand1(mod, code)
+    return :(macroexpand($(esc(mod)), $(QuoteNode(code)), recursive=false))
+end
 
 ## misc syntax ##
 
@@ -193,7 +208,7 @@ Small functions typically do not need the `@inline` annotation,
 as the compiler does it automatically. By using `@inline` on bigger functions,
 an extra nudge can be given to the compiler to inline it.
 
-`@inline` can be applied immediately before the definition or in its function body.
+`@inline` can be applied immediately before a function definition or within a function body.
 
 ```julia
 # annotate long-form definition
@@ -271,7 +286,7 @@ Small functions are typically inlined automatically.
 By using `@noinline` on small functions, auto-inlining can be
 prevented.
 
-`@noinline` can be applied immediately before the definition or in its function body.
+`@noinline` can be applied immediately before a function definition or within a function body.
 
 ```julia
 # annotate long-form definition
@@ -340,83 +355,144 @@ macro noinline(x)
 end
 
 """
-    @pure ex
-
-`@pure` gives the compiler a hint for the definition of a pure function,
-helping for type inference.
-
-!!! warning
-    This macro is intended for internal compiler use and may be subject to changes.
-
-!!! warning
-    In Julia 1.8 and higher, it is favorable to use [`@assume_effects`](@ref) instead of `@pure`.
-    This is because `@assume_effects` allows a finer grained control over Julia's purity
-    modeling and the effect system enables a wider range of optimizations.
-"""
-macro pure(ex)
-    esc(isa(ex, Expr) ? pushmeta!(ex, :pure) : ex)
-end
+    Base.@constprop setting [ex]
 
-"""
-    @constprop setting ex
+Control the mode of interprocedural constant propagation for the annotated function.
 
-`@constprop` controls the mode of interprocedural constant propagation for the
-annotated function. Two `setting`s are supported:
+Two `setting`s are supported:
 
-- `@constprop :aggressive ex`: apply constant propagation aggressively.
+- `Base.@constprop :aggressive [ex]`: apply constant propagation aggressively.
   For a method where the return type depends on the value of the arguments,
   this can yield improved inference results at the cost of additional compile time.
-- `@constprop :none ex`: disable constant propagation. This can reduce compile
+- `Base.@constprop :none [ex]`: disable constant propagation. This can reduce compile
   times for functions that Julia might otherwise deem worthy of constant-propagation.
   Common cases are for functions with `Bool`- or `Symbol`-valued arguments or keyword arguments.
+
+`Base.@constprop` can be applied immediately before a function definition or within a function body.
+
+```julia
+# annotate long-form definition
+Base.@constprop :aggressive function longdef(x)
+    ...
+end
+
+# annotate short-form definition
+Base.@constprop :aggressive shortdef(x) = ...
+
+# annotate anonymous function that a `do` block creates
+f() do
+    Base.@constprop :aggressive
+    ...
+end
+```
+
+!!! compat "Julia 1.10"
+    The usage within a function body requires at least Julia 1.10.
 """
 macro constprop(setting, ex)
-    if isa(setting, QuoteNode)
-        setting = setting.value
+    sym = constprop_setting(setting)
+    isa(ex, Expr) && return esc(pushmeta!(ex, sym))
+    throw(ArgumentError(LazyString("Bad expression `", ex, "` in `@constprop settings ex`")))
+end
+macro constprop(setting)
+    sym = constprop_setting(setting)
+    return Expr(:meta, sym)
+end
+
+function constprop_setting(@nospecialize setting)
+    isa(setting, QuoteNode) && (setting = setting.value)
+    if setting === :aggressive
+        return :aggressive_constprop
+    elseif setting === :none
+        return :no_constprop
     end
-    setting === :aggressive && return esc(isa(ex, Expr) ? pushmeta!(ex, :aggressive_constprop) : ex)
-    setting === :none && return esc(isa(ex, Expr) ? pushmeta!(ex, :no_constprop) : ex)
-    throw(ArgumentError("@constprop $setting not supported"))
+    throw(ArgumentError(LazyString("@constprop "), setting, "not supported"))
 end
 
 """
-    @assume_effects setting... ex
-
-`@assume_effects` overrides the compiler's effect modeling for the given method.
-`ex` must be a method definition or `@ccall` expression.
+    Base.@assume_effects setting... [ex]
 
-!!! compat "Julia 1.8"
-    Using `Base.@assume_effects` requires Julia version 1.8.
+Override the compiler's effect modeling.
+This macro can be used in several contexts:
+1. Immediately before a method definition, to override the entire effect modeling of the applied method.
+2. Within a function body without any arguments, to override the entire effect modeling of the enclosing method.
+3. Applied to a code block, to override the local effect modeling of the applied code block.
 
+# Examples
 ```jldoctest
-julia> Base.@assume_effects :terminates_locally function pow(x)
-           # this :terminates_locally allows `pow` to be constant-folded
+julia> Base.@assume_effects :terminates_locally function fact(x)
+           # usage 1:
+           # this :terminates_locally allows `fact` to be constant-folded
            res = 1
-           1 < x < 20 || error("bad pow")
+           0 ≤ x < 20 || error("bad fact")
            while x > 1
                res *= x
                x -= 1
            end
            return res
        end
-pow (generic function with 1 method)
+fact (generic function with 1 method)
 
 julia> code_typed() do
-           pow(12)
-       end
-1-element Vector{Any}:
- CodeInfo(
+           fact(12)
+       end |> only
+CodeInfo(
 1 ─     return 479001600
 ) => Int64
 
-julia> Base.@assume_effects :total !:nothrow @ccall jl_type_intersection(Vector{Int}::Any, Vector{<:Integer}::Any)::Any
-Vector{Int64} (alias for Array{Int64, 1})
+julia> code_typed() do
+           map((2,3,4)) do x
+               # usage 2:
+               # this :terminates_locally allows this anonymous function to be constant-folded
+               Base.@assume_effects :terminates_locally
+               res = 1
+               0 ≤ x < 20 || error("bad fact")
+               while x > 1
+                   res *= x
+                   x -= 1
+               end
+               return res
+           end
+       end |> only
+CodeInfo(
+1 ─     return (2, 6, 24)
+) => Tuple{Int64, Int64, Int64}
+
+julia> code_typed() do
+           map((2,3,4)) do x
+               res = 1
+               0 ≤ x < 20 || error("bad fact")
+               # usage 3:
+               # with this :terminates_locally annotation the compiler skips tainting
+               # `:terminates` effect within this `while` block, allowing the parent
+               # anonymous function to be constant-folded
+               Base.@assume_effects :terminates_locally while x > 1
+                   res *= x
+                   x -= 1
+               end
+               return res
+           end
+       end |> only
+CodeInfo(
+1 ─     return (2, 6, 24)
+) => Tuple{Int64, Int64, Int64}
 ```
 
+!!! compat "Julia 1.8"
+    Using `Base.@assume_effects` requires Julia version 1.8.
+
+!!! compat "Julia 1.10"
+    The usage within a function body requires at least Julia 1.10.
+
+!!! compact "Julia 1.11"
+    The code block annotation requires at least Julia 1.11.
+
 !!! warning
     Improper use of this macro causes undefined behavior (including crashes,
-    incorrect answers, or other hard to track bugs). Use with care and only if
-    absolutely required.
+    incorrect answers, or other hard to track bugs). Use with care and only as a
+    last resort if absolutely required. Even in such a case, you SHOULD take all
+    possible steps to minimize the strength of the effect assertion (e.g.,
+    do not use `:total` if `:nothrow` would have been sufficient).
 
 In general, each `setting` value makes an assertion about the behavior of the
 function, without requiring the compiler to prove that this behavior is indeed
@@ -431,7 +507,11 @@ The following `setting`s are supported.
 - `:terminates_globally`
 - `:terminates_locally`
 - `:notaskstate`
+- `:inaccessiblememonly`
+- `:noub`
+- `:noub_if_noinbounds`
 - `:foldable`
+- `:removable`
 - `:total`
 
 # Extended help
@@ -444,8 +524,8 @@ The `:consistent` setting asserts that for egal (`===`) inputs:
 - If the method returns, the results will always be egal.
 
 !!! note
-    This in particular implies that the return value of the method must be
-    immutable. Multiple allocations of mutable objects (even with identical
+    This in particular implies that the method must not return a freshly allocated
+    mutable object. Multiple allocations of mutable objects (even with identical
     contents) are not egal.
 
 !!! note
@@ -504,7 +584,7 @@ were not executed.
 ---
 ## `:nothrow`
 
-The `:nothrow` settings asserts that this method does not terminate abnormally
+The `:nothrow` settings asserts that this method does not throw an exception
 (i.e. will either always return a value or never return).
 
 !!! note
@@ -513,7 +593,11 @@ The `:nothrow` settings asserts that this method does not terminate abnormally
     method itself.
 
 !!! note
-    `MethodErrors` and similar exceptions count as abnormal termination.
+    If the execution of a method may raise `MethodError`s and similar exceptions, then
+    the method is not considered as `:nothrow`.
+    However, note that environment-dependent errors like `StackOverflowError` or `InterruptException`
+    are not modeled by this effect and thus a method that may result in `StackOverflowError`
+    does not necessarily need to be `!:nothrow` (although it should usually be `!:terminates` too).
 
 ---
 ## `:terminates_globally`
@@ -526,7 +610,7 @@ The `:terminates_globally` settings asserts that this method will eventually ter
 
 !!! note
     The compiler will consider this a strong indication that the method will
-    terminate relatively *quickly* and may (if otherwise legal), call this
+    terminate relatively *quickly* and may (if otherwise legal) call this
     method at compile time. I.e. it is a bad idea to annotate this setting
     on a method that *technically*, but not *practically*, terminates.
 
@@ -568,16 +652,42 @@ moved between tasks without observable results.
     code that is not `:notaskstate`, but is `:effect_free` and `:consistent`
     may still be dead-code-eliminated and thus promoted to `:total`.
 
+---
+## `:inaccessiblememonly`
+
+The `:inaccessiblememonly` setting asserts that the method does not access or modify
+externally accessible mutable memory. This means the method can access or modify mutable
+memory for newly allocated objects that is not accessible by other methods or top-level
+execution before return from the method, but it can not access or modify any mutable
+global state or mutable memory pointed to by its arguments.
+
+!!! note
+    Below is an incomplete list of examples that invalidate this assumption:
+    - a global reference or `getglobal` call to access a mutable global variable
+    - a global assignment or `setglobal!` call to perform assignment to a non-constant global variable
+    - `setfield!` call that changes a field of a global mutable variable
+
+!!! note
+    This `:inaccessiblememonly` assertion covers any other methods called by the annotated method.
+
+---
+## `:noub`
+
+The `:noub` setting asserts that the method will not execute any undefined behavior
+(for any input). Note that undefined behavior may technically cause the method to violate
+any other effect assertions (such as `:consistent` or `:effect_free`) as well, but we do
+not model this, and they assume the absence of undefined behavior.
+
 ---
 ## `:foldable`
 
 This setting is a convenient shortcut for the set of effects that the compiler
 requires to be guaranteed to constant fold a call at compile time. It is
 currently equivalent to the following `setting`s:
-
 - `:consistent`
 - `:effect_free`
 - `:terminates_globally`
+- `:noub`
 
 !!! note
     This list in particular does not include `:nothrow`. The compiler will still
@@ -585,6 +695,20 @@ currently equivalent to the following `setting`s:
     however, that by the `:consistent`-cy requirements, any such annotated call
     must consistently throw given the same argument values.
 
+!!! note
+    An explicit `@inbounds` annotation inside the function will also disable
+    constant folding and not be overridden by `:foldable`.
+
+---
+## `:removable`
+
+This setting is a convenient shortcut for the set of effects that the compiler
+requires to be guaranteed to delete a call whose result is unused at compile time.
+It is currently equivalent to the following `setting`s:
+- `:effect_free`
+- `:nothrow`
+- `:terminates_globally`
+
 ---
 ## `:total`
 
@@ -595,6 +719,8 @@ the following other `setting`s:
 - `:nothrow`
 - `:terminates_globally`
 - `:notaskstate`
+- `:inaccessiblememonly`
+- `:noub`
 
 !!! warning
     `:total` is a very strong assertion and will likely gain additional semantics
@@ -611,62 +737,126 @@ the following other `setting`s:
 Effect names may be prefixed by `!` to indicate that the effect should be removed
 from an earlier meta effect. For example, `:total !:nothrow` indicates that while
 the call is generally total, it may however throw.
-
----
-## Comparison to `@pure`
-
-`@assume_effects :foldable` is similar to [`@pure`](@ref) with the primary
-distinction that the `:consistent`-cy requirement applies world-age wise rather
-than globally as described above. However, in particular, a method annotated
-`@pure` should always be at least `:foldable`.
-Another advantage is that effects introduced by `@assume_effects` are propagated to
-callers interprocedurally while a purity defined by `@pure` is not.
 """
 macro assume_effects(args...)
-    (consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate) =
-        (false, false, false, false, false, false, false)
-    for org_setting in args[1:end-1]
-        (setting, val) = compute_assumed_setting(org_setting)
-        if setting === :consistent
-            consistent = val
-        elseif setting === :effect_free
-            effect_free = val
-        elseif setting === :nothrow
-            nothrow = val
-        elseif setting === :terminates_globally
-            terminates_globally = val
-        elseif setting === :terminates_locally
-            terminates_locally = val
-        elseif setting === :notaskstate
-            notaskstate = val
-        elseif setting === :foldable
-            consistent = effect_free = terminates_globally = val
-        elseif setting === :total
-            consistent = effect_free = nothrow = terminates_globally = notaskstate = val
-        else
-            throw(ArgumentError("@assume_effects $org_setting not supported"))
-        end
+    lastex = args[end]
+    override = compute_assumed_settings(args[begin:end-1])
+    if is_function_def(unwrap_macrocalls(lastex))
+        return esc(pushmeta!(lastex, form_purity_expr(override)))
+    elseif isexpr(lastex, :macrocall) && lastex.args[1] === Symbol("@ccall")
+        lastex.args[1] = GlobalRef(Base, Symbol("@ccall_effects"))
+        insert!(lastex.args, 3, Core.Compiler.encode_effects_override(override))
+        return esc(lastex)
     end
-    ex = args[end]
-    isa(ex, Expr) || throw(ArgumentError("Bad expression `$ex` in `@assume_effects [settings] ex`"))
-    if ex.head === :macrocall && ex.args[1] == Symbol("@ccall")
-        ex.args[1] = GlobalRef(Base, Symbol("@ccall_effects"))
-        insert!(ex.args, 3, Core.Compiler.encode_effects_override(Core.Compiler.EffectsOverride(
-            consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate
-        )))
-        return esc(ex)
+    override′ = compute_assumed_setting(override, lastex)
+    if override′ !== nothing
+        # anonymous function case
+        return Expr(:meta, form_purity_expr(override′))
+    else
+        # call site annotation case
+        return Expr(:block,
+                    form_purity_expr(override),
+                    Expr(:local, Expr(:(=), :val, esc(lastex))),
+                    Expr(:purity), # region end token
+                    :val)
+    end
+end
+
+function compute_assumed_settings(settings)
+    override = EffectsOverride()
+    for setting in settings
+        override = compute_assumed_setting(override, setting)
+        override === nothing &&
+            throw(ArgumentError("@assume_effects $setting not supported"))
     end
-    return esc(pushmeta!(ex, :purity, consistent, effect_free, nothrow, terminates_globally, terminates_locally, notaskstate))
+    return override
 end
 
-function compute_assumed_setting(@nospecialize(setting), val::Bool=true)
+using Core.Compiler: EffectsOverride
+
+function compute_assumed_setting(override::EffectsOverride, @nospecialize(setting), val::Bool=true)
     if isexpr(setting, :call) && setting.args[1] === :(!)
-        return compute_assumed_setting(setting.args[2], !val)
+        return compute_assumed_setting(override, setting.args[2], !val)
     elseif isa(setting, QuoteNode)
-        return compute_assumed_setting(setting.value, val)
-    else
-        return (setting, val)
+        return compute_assumed_setting(override, setting.value, val)
+    end
+    if setting === :consistent
+        return EffectsOverride(override; consistent = val)
+    elseif setting === :effect_free
+        return EffectsOverride(override; effect_free = val)
+    elseif setting === :nothrow
+        return EffectsOverride(override; nothrow = val)
+    elseif setting === :terminates_globally
+        return EffectsOverride(override; terminates_globally = val)
+    elseif setting === :terminates_locally
+        return EffectsOverride(override; terminates_locally = val)
+    elseif setting === :notaskstate
+        return EffectsOverride(override; notaskstate = val)
+    elseif setting === :inaccessiblememonly
+        return EffectsOverride(override; inaccessiblememonly = val)
+    elseif setting === :noub
+        return EffectsOverride(override; noub = val)
+    elseif setting === :noub_if_noinbounds
+        return EffectsOverride(override; noub_if_noinbounds = val)
+    elseif setting === :foldable
+        consistent = effect_free = terminates_globally = noub = val
+        return EffectsOverride(override; consistent, effect_free, terminates_globally, noub)
+    elseif setting === :removable
+        effect_free = nothrow = terminates_globally = val
+        return EffectsOverride(override; effect_free, nothrow, terminates_globally)
+    elseif setting === :total
+        consistent = effect_free = nothrow = terminates_globally = notaskstate =
+            inaccessiblememonly = noub = val
+        return EffectsOverride(override;
+            consistent, effect_free, nothrow, terminates_globally, notaskstate,
+            inaccessiblememonly, noub)
     end
+    return nothing
+end
+
+function form_purity_expr(override::EffectsOverride)
+    return Expr(:purity,
+        override.consistent, override.effect_free, override.nothrow,
+        override.terminates_globally, override.terminates_locally, override.notaskstate,
+        override.inaccessiblememonly, override.noub, override.noub_if_noinbounds)
+end
+
+"""
+    Base.@nospecializeinfer function f(args...)
+        @nospecialize ...
+        ...
+    end
+    Base.@nospecializeinfer f(@nospecialize args...) = ...
+
+Tells the compiler to infer `f` using the declared types of `@nospecialize`d arguments.
+This can be used to limit the number of compiler-generated specializations during inference.
+
+# Example
+
+```julia
+julia> f(A::AbstractArray) = g(A)
+f (generic function with 1 method)
+
+julia> @noinline Base.@nospecializeinfer g(@nospecialize(A::AbstractArray)) = A[1]
+g (generic function with 1 method)
+
+julia> @code_typed f([1.0])
+CodeInfo(
+1 ─ %1 = invoke Main.g(_2::AbstractArray)::Any
+└──      return %1
+) => Any
+```
+
+In this example, `f` will be inferred for each specific type of `A`,
+but `g` will only be inferred once with the declared argument type `A::AbstractArray`,
+meaning that the compiler will not likely see the excessive inference time on it
+while it can not infer the concrete return type of it.
+Without the `@nospecializeinfer`, `f([1.0])` would infer the return type of `g` as `Float64`,
+indicating that inference ran for `g(::Vector{Float64})` despite the prohibition on
+specialized code generation.
+"""
+macro nospecializeinfer(ex)
+    esc(isa(ex, Expr) ? pushmeta!(ex, :nospecializeinfer) : ex)
 end
 
 """
@@ -702,23 +892,17 @@ function unwrap_macrocalls(ex::Expr)
     return inner
 end
 
-function pushmeta!(ex::Expr, sym::Symbol, args::Any...)
-    if isempty(args)
-        tag = sym
-    else
-        tag = Expr(sym, args...)::Expr
-    end
-
+function pushmeta!(ex::Expr, tag::Union{Symbol,Expr})
     inner = unwrap_macrocalls(ex)
-
     idx, exargs = findmeta(inner)
     if idx != 0
-        push!(exargs[idx].args, tag)
+        metastmt = exargs[idx]::Expr
+        push!(metastmt.args, tag)
     else
         body = inner.args[2]::Expr
         pushfirst!(body.args, Expr(:meta, tag))
     end
-    ex
+    return ex
 end
 
 popmeta!(body, sym) = _getmeta(body, sym, true)
@@ -835,6 +1019,26 @@ function remove_linenums!(src::CodeInfo)
     return src
 end
 
+replace_linenums!(ex, ln::LineNumberNode) = ex
+function replace_linenums!(ex::Expr, ln::LineNumberNode)
+    if ex.head === :block || ex.head === :quote
+        # replace line number expressions from metadata (not argument literal or inert) position
+        map!(ex.args, ex.args) do @nospecialize(x)
+            isa(x, Expr) && x.head === :line && length(x.args) == 1 && return Expr(:line, ln.line)
+            isa(x, Expr) && x.head === :line && length(x.args) == 2 && return Expr(:line, ln.line, ln.file)
+            isa(x, LineNumberNode) && return ln
+            return x
+        end
+    end
+    # preserve any linenums inside `esc(...)` guards
+    if ex.head !== :escape
+        for subex in ex.args
+            subex isa Expr && replace_linenums!(subex, ln)
+        end
+    end
+    return ex
+end
+
 macro generated()
     return Expr(:generated)
 end
@@ -850,7 +1054,7 @@ the global scope or depending on mutable elements.
 
 See [Metaprogramming](@ref) for further details.
 
-## Example:
+# Examples
 ```jldoctest
 julia> @generated function bar(x)
            if x <: Integer
@@ -872,16 +1076,12 @@ macro generated(f)
     if isa(f, Expr) && (f.head === :function || is_short_function_def(f))
         body = f.args[2]
         lno = body.args[1]
-        tmp = gensym("tmp")
         return Expr(:escape,
                     Expr(f.head, f.args[1],
                          Expr(:block,
                               lno,
                               Expr(:if, Expr(:generated),
-                                   # https://github.com/JuliaLang/julia/issues/25678
-                                   Expr(:block,
-                                        :(local $tmp = $body),
-                                        :(if $tmp isa $(GlobalRef(Core, :CodeInfo)); return $tmp; else $tmp; end)),
+                                   body,
                                    Expr(:block,
                                         Expr(:meta, :generated_only),
                                         Expr(:return, nothing))))))
@@ -896,6 +1096,7 @@ end
     @atomic order ex
 
 Mark `var` or `ex` as being performed atomically, if `ex` is a supported expression.
+If no `order` is specified it defaults to :sequentially_consistent.
 
     @atomic a.b.x = new
     @atomic a.b.x += addend
@@ -924,6 +1125,7 @@ This operation translates to a `modifyproperty!(a.b, :x, func, arg2)` call.
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
+# Examples
 ```jldoctest
 julia> mutable struct Atomic{T}; @atomic x::T; end
 
@@ -1023,6 +1225,7 @@ This operation translates to a `swapproperty!(a.b, :x, new)` call.
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
+# Examples
 ```jldoctest
 julia> mutable struct Atomic{T}; @atomic x::T; end
 
@@ -1069,6 +1272,7 @@ This operation translates to a `replaceproperty!(a.b, :x, expected, desired)` ca
 
 See [Per-field atomics](@ref man-atomics) section in the manual for more details.
 
+# Examples
 ```jldoctest
 julia> mutable struct Atomic{T}; @atomic x::T; end
 
@@ -1084,7 +1288,7 @@ julia> @atomic a.x # fetch field x of a, with sequential consistency
 julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with sequential consistency
 (old = 2, success = false)
 
-julia> xchg = 2 => 0; # replace field x of a with 0 if it was 1, with sequential consistency
+julia> xchg = 2 => 0; # replace field x of a with 0 if it was 2, with sequential consistency
 
 julia> @atomicreplace a.x xchg
 (old = 2, success = true)
diff --git a/base/fastmath.jl b/base/fastmath.jl
index 05a5ce0503e68..44440ebad2050 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -28,7 +28,7 @@ module FastMath
 export @fastmath
 
 import Core.Intrinsics: sqrt_llvm_fast, neg_float_fast,
-    add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast,
+    add_float_fast, sub_float_fast, mul_float_fast, div_float_fast,
     eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast
 
 const fast_op =
@@ -41,6 +41,8 @@ const fast_op =
          :!= => :ne_fast,
          :< => :lt_fast,
          :<= => :le_fast,
+         :> => :gt_fast,
+         :>= => :ge_fast,
          :abs => :abs_fast,
          :abs2 => :abs2_fast,
          :cmp => :cmp_fast,
@@ -82,7 +84,12 @@ const fast_op =
          :sinh => :sinh_fast,
          :sqrt => :sqrt_fast,
          :tan => :tan_fast,
-         :tanh => :tanh_fast)
+         :tanh => :tanh_fast,
+         # reductions
+         :maximum => :maximum_fast,
+         :minimum => :minimum_fast,
+         :maximum! => :maximum!_fast,
+         :minimum! => :minimum!_fast)
 
 const rewrite_op =
     Dict(:+= => :+,
@@ -105,19 +112,10 @@ function make_fastmath(expr::Expr)
         if isa(var, Symbol)
             # simple assignment
             expr = :($var = $op($var, $rhs))
-        elseif isa(var, Expr) && var.head === :ref
-            var = var::Expr
-            # array reference
-            arr = var.args[1]
-            inds = var.args[2:end]
-            arrvar = gensym()
-            indvars = Any[gensym() for _ in inds]
-            expr = quote
-                $(Expr(:(=), arrvar, arr))
-                $(Expr(:(=), Base.exprarray(:tuple, indvars), Base.exprarray(:tuple, inds)))
-                $arrvar[$(indvars...)] = $op($arrvar[$(indvars...)], $rhs)
-            end
         end
+        # It is hard to optimize array[i += 1] += 1
+        # and array[end] += 1 without bugs. (#47241)
+        # We settle for not optimizing the op= call.
     end
     Base.exprarray(make_fastmath(expr.head), Base.mapany(make_fastmath, expr.args))
 end
@@ -166,7 +164,6 @@ add_fast(x::T, y::T) where {T<:FloatTypes} = add_float_fast(x, y)
 sub_fast(x::T, y::T) where {T<:FloatTypes} = sub_float_fast(x, y)
 mul_fast(x::T, y::T) where {T<:FloatTypes} = mul_float_fast(x, y)
 div_fast(x::T, y::T) where {T<:FloatTypes} = div_float_fast(x, y)
-rem_fast(x::T, y::T) where {T<:FloatTypes} = rem_float_fast(x, y)
 
 add_fast(x::T, y::T, zs::T...) where {T<:FloatTypes} =
     add_fast(add_fast(x, y), zs...)
@@ -182,6 +179,8 @@ eq_fast(x::T, y::T) where {T<:FloatTypes} = eq_float_fast(x, y)
 ne_fast(x::T, y::T) where {T<:FloatTypes} = ne_float_fast(x, y)
 lt_fast(x::T, y::T) where {T<:FloatTypes} = lt_float_fast(x, y)
 le_fast(x::T, y::T) where {T<:FloatTypes} = le_float_fast(x, y)
+gt_fast(x, y) = lt_fast(y, x)
+ge_fast(x, y) = le_fast(y, x)
 
 isinf_fast(x) = false
 isfinite_fast(x) = true
@@ -295,6 +294,11 @@ sincos_fast(v::AbstractFloat) = (sin_fast(v), cos_fast(v))
 sincos_fast(v::Real) = sincos_fast(float(v)::AbstractFloat)
 sincos_fast(v) = (sin_fast(v), cos_fast(v))
 
+
+function rem_fast(x::T, y::T) where {T<:FloatTypes}
+    return @fastmath copysign(Base.rem_internal(abs(x), abs(y)), x)
+end
+
 @fastmath begin
     hypot_fast(x::T, y::T) where {T<:FloatTypes} = sqrt(x*x + y*y)
 
@@ -362,4 +366,27 @@ for f in (:^, :atan, :hypot, :log)
     end
 end
 
+# Reductions
+
+maximum_fast(a; kw...) = Base.reduce(max_fast, a; kw...)
+minimum_fast(a; kw...) = Base.reduce(min_fast, a; kw...)
+
+maximum_fast(f, a; kw...) = Base.mapreduce(f, max_fast, a; kw...)
+minimum_fast(f, a; kw...) = Base.mapreduce(f, min_fast, a; kw...)
+
+Base.reducedim_init(f, ::typeof(max_fast), A::AbstractArray, region) =
+    Base.reducedim_init(f, max, A::AbstractArray, region)
+Base.reducedim_init(f, ::typeof(min_fast), A::AbstractArray, region) =
+    Base.reducedim_init(f, min, A::AbstractArray, region)
+
+maximum!_fast(r::AbstractArray, A::AbstractArray; kw...) =
+    maximum!_fast(identity, r, A; kw...)
+minimum!_fast(r::AbstractArray, A::AbstractArray; kw...) =
+    minimum!_fast(identity, r, A; kw...)
+
+maximum!_fast(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) =
+    Base.mapreducedim!(f, max_fast, Base.initarray!(r, f, max, init, A), A)
+minimum!_fast(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) =
+    Base.mapreducedim!(f, min_fast, Base.initarray!(r, f, min, init, A), A)
+
 end
diff --git a/base/file.jl b/base/file.jl
index eaff9efae43d3..e63ed67ae249b 100644
--- a/base/file.jl
+++ b/base/file.jl
@@ -32,7 +32,7 @@ export
 # get and set current directory
 
 """
-    pwd() -> AbstractString
+    pwd() -> String
 
 Get the current working directory.
 
@@ -105,7 +105,7 @@ if Sys.iswindows()
     end
 else
     function cd(f::Function, dir::AbstractString)
-        fd = ccall(:open, Int32, (Cstring, Int32), :., 0)
+        fd = ccall(:open, Int32, (Cstring, Int32, UInt32...), :., 0)
         systemerror(:open, fd == -1)
         try
             cd(dir)
@@ -294,7 +294,7 @@ function rm(path::AbstractString; force::Bool=false, recursive::Bool=false)
                     rm(joinpath(path, p), force=force, recursive=true)
                 end
             catch err
-                if !(force && isa(err, IOError) && err.code==Base.UV_EACCES)
+                if !(isa(err, IOError) && err.code==Base.UV_EACCES)
                     rethrow(err)
                 end
             end
@@ -303,7 +303,9 @@ function rm(path::AbstractString; force::Bool=false, recursive::Bool=false)
         try
             ret = ccall(:uv_fs_rmdir, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}), C_NULL, req, path, C_NULL)
             uv_fs_req_cleanup(req)
-            ret < 0 && uv_error("rm($(repr(path)))", ret)
+            if ret < 0 && !(force && ret == Base.UV_ENOENT)
+                uv_error("rm($(repr(path)))", ret)
+            end
             nothing
         finally
             Libc.free(req)
@@ -519,34 +521,70 @@ const TEMP_CLEANUP = Dict{String,Bool}()
 const TEMP_CLEANUP_LOCK = ReentrantLock()
 
 function temp_cleanup_later(path::AbstractString; asap::Bool=false)
-    lock(TEMP_CLEANUP_LOCK)
+    @lock TEMP_CLEANUP_LOCK begin
     # each path should only be inserted here once, but if there
     # is a collision, let !asap win over asap: if any user might
     # still be using the path, don't delete it until process exit
     TEMP_CLEANUP[path] = get(TEMP_CLEANUP, path, true) & asap
     if length(TEMP_CLEANUP) > TEMP_CLEANUP_MAX[]
-        temp_cleanup_purge()
+        temp_cleanup_purge_prelocked(false)
         TEMP_CLEANUP_MAX[] = max(TEMP_CLEANUP_MIN[], 2*length(TEMP_CLEANUP))
     end
-    unlock(TEMP_CLEANUP_LOCK)
-    return nothing
+    end
+    nothing
+end
+
+function temp_cleanup_forget(path::AbstractString)
+    @lock TEMP_CLEANUP_LOCK delete!(TEMP_CLEANUP, path)
+    nothing
 end
 
-function temp_cleanup_purge(; force::Bool=false)
-    need_gc = Sys.iswindows()
-    for (path, asap) in TEMP_CLEANUP
+function temp_cleanup_purge_prelocked(force::Bool)
+    filter!(TEMP_CLEANUP) do (path, asap)
         try
-            if (force || asap) && ispath(path)
-                need_gc && GC.gc(true)
-                need_gc = false
+            ispath(path) || return false
+            if force || asap
                 prepare_for_deletion(path)
                 rm(path, recursive=true, force=true)
             end
-            !ispath(path) && delete!(TEMP_CLEANUP, path)
+            return ispath(path)
         catch ex
-            @warn "temp cleanup" _group=:file exception=(ex, catch_backtrace())
+            @warn """
+                Failed to clean up temporary path $(repr(path))
+                $ex
+                """ _group=:file
+            ex isa InterruptException && rethrow()
+            return true
         end
     end
+    nothing
+end
+
+function temp_cleanup_purge_all()
+    may_need_gc = false
+    @lock TEMP_CLEANUP_LOCK filter!(TEMP_CLEANUP) do (path, asap)
+        try
+            ispath(path) || return false
+            may_need_gc = true
+            return true
+        catch ex
+            ex isa InterruptException && rethrow()
+            return true
+        end
+    end
+    if may_need_gc
+        # this is only usually required on Sys.iswindows(), but may as well do it everywhere
+        GC.gc(true)
+    end
+    @lock TEMP_CLEANUP_LOCK temp_cleanup_purge_prelocked(true)
+    nothing
+end
+
+# deprecated internal function used by some packages
+temp_cleanup_purge(; force=false) = force ? temp_cleanup_purge_all() : @lock TEMP_CLEANUP_LOCK temp_cleanup_purge_prelocked(false)
+
+function __postinit__()
+    Base.atexit(temp_cleanup_purge_all)
 end
 
 const temp_prefix = "jl_"
@@ -629,7 +667,7 @@ end # os-test
 
 Generate a temporary file path. This function only returns a path; no file is
 created. The path is likely to be unique, but this cannot be guaranteed due to
-the very remote posibility of two simultaneous calls to `tempname` generating
+the very remote possibility of two simultaneous calls to `tempname` generating
 the same file name. The name is guaranteed to differ from all files already
 existing at the time of the call to `tempname`.
 
@@ -642,7 +680,7 @@ The `cleanup` option controls whether the process attempts to delete the
 returned path automatically when the process exits. Note that the `tempname`
 function does not create any file or directory at the returned location, so
 there is nothing to cleanup unless you create a file or directory there. If
-you do and `clean` is `true` it will be deleted upon process termination.
+you do and `cleanup` is `true` it will be deleted upon process termination.
 
 !!! compat "Julia 1.4"
     The `parent` and `cleanup` arguments were added in 1.4. Prior to Julia 1.4
@@ -675,8 +713,9 @@ mktemp(parent)
     mktempdir(parent=tempdir(); prefix=$(repr(temp_prefix)), cleanup=true) -> path
 
 Create a temporary directory in the `parent` directory with a name
-constructed from the given prefix and a random suffix, and return its path.
-Additionally, any trailing `X` characters may be replaced with random characters.
+constructed from the given `prefix` and a random suffix, and return its path.
+Additionally, on some platforms, any trailing `'X'` characters in `prefix` may be replaced
+with random characters.
 If `parent` does not exist, throw an error. The `cleanup` option controls whether
 the temporary directory is automatically deleted when the process exits.
 
@@ -727,10 +766,11 @@ temporary file upon completion.
 See also: [`mktempdir`](@ref).
 """
 function mktemp(fn::Function, parent::AbstractString=tempdir())
-    (tmp_path, tmp_io) = mktemp(parent, cleanup=false)
+    (tmp_path, tmp_io) = mktemp(parent)
     try
         fn(tmp_path, tmp_io)
     finally
+        temp_cleanup_forget(tmp_path)
         try
             close(tmp_io)
             ispath(tmp_path) && rm(tmp_path)
@@ -755,10 +795,11 @@ See also: [`mktemp`](@ref), [`mkdir`](@ref).
 """
 function mktempdir(fn::Function, parent::AbstractString=tempdir();
     prefix::AbstractString=temp_prefix)
-    tmpdir = mktempdir(parent; prefix=prefix, cleanup=false)
+    tmpdir = mktempdir(parent; prefix=prefix)
     try
         fn(tmpdir)
     finally
+        temp_cleanup_forget(tmpdir)
         try
             if ispath(tmpdir)
                 prepare_for_deletion(tmpdir)
@@ -840,7 +881,7 @@ julia> readdir("base", join=true)
  ⋮
  "base/version_git.sh"
  "base/views.jl"
- "base/weakkeydict.jl"```
+ "base/weakkeydict.jl"
 
 julia> readdir(abspath("base"), join=true)
 145-element Array{String,1}:
@@ -1062,7 +1103,7 @@ See also: [`hardlink`](@ref).
 
 !!! compat "Julia 1.6"
     The `dir_target` keyword argument was added in Julia 1.6.  Prior to this,
-    symlinks to nonexistant paths on windows would always be file symlinks, and
+    symlinks to nonexistent paths on windows would always be file symlinks, and
     relative symlinks to directories were not supported.
 """
 function symlink(target::AbstractString, link::AbstractString;
@@ -1109,7 +1150,7 @@ function symlink(target::AbstractString, link::AbstractString;
 end
 
 """
-    readlink(path::AbstractString) -> AbstractString
+    readlink(path::AbstractString) -> String
 
 Return the target location a symbolic link `path` points to.
 """
diff --git a/base/filesystem.jl b/base/filesystem.jl
index 863eedf8ade9d..d9760ec08a8a9 100644
--- a/base/filesystem.jl
+++ b/base/filesystem.jl
@@ -36,7 +36,6 @@ export File,
        # open,
        futime,
        write,
-       JL_O_ACCMODE,
        JL_O_WRONLY,
        JL_O_RDONLY,
        JL_O_RDWR,
@@ -91,7 +90,7 @@ uv_fs_req_cleanup(req) = ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
 include("path.jl")
 include("stat.jl")
 include("file.jl")
-include("../file_constants.jl")
+include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "file_constants.jl"))  # include($BUILDROOT/base/file_constants.jl)
 
 ## Operations with File (fd) objects ##
 
@@ -144,6 +143,8 @@ function close(f::File)
     nothing
 end
 
+closewrite(f::File) = nothing
+
 # sendfile is the most efficient way to copy from a file descriptor
 function sendfile(dst::File, src::File, src_offset::Int64, bytes::Int)
     check_open(dst)
@@ -201,11 +202,12 @@ end
 
 function read(f::File, ::Type{Char})
     b0 = read(f, UInt8)
-    l = 8 * (4 - leading_ones(b0))
+    l = 0x08 * (0x04 - UInt8(leading_ones(b0)))
     c = UInt32(b0) << 24
-    if l < 24
+    if l ≤ 0x10
         s = 16
         while s ≥ l && !eof(f)
+            # this works around lack of peek(::File)
             p = position(f)
             b = read(f, UInt8)
             if b & 0xc0 != 0x80
diff --git a/base/float.jl b/base/float.jl
index 60850b7e02f64..d26d279e47895 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -101,6 +101,8 @@ exponent_one(::Type{Float16}) =     0x3c00
 exponent_half(::Type{Float16}) =    0x3800
 significand_mask(::Type{Float16}) = 0x03ff
 
+mantissa(x::T) where {T} = reinterpret(Unsigned, x) & significand_mask(T)
+
 for T in (Float16, Float32, Float64)
     @eval significand_bits(::Type{$T}) = $(trailing_ones(significand_mask(T)))
     @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - significand_bits(T) - 1)
@@ -135,6 +137,79 @@ i.e. the maximum integer value representable by [`exponent_bits(T)`](@ref) bits.
 """
 function exponent_raw_max end
 
+"""
+IEEE 754 definition of the minimum exponent.
+"""
+ieee754_exponent_min(::Type{T}) where {T<:IEEEFloat} = Int(1 - exponent_max(T))::Int
+
+exponent_min(::Type{Float16}) = ieee754_exponent_min(Float16)
+exponent_min(::Type{Float32}) = ieee754_exponent_min(Float32)
+exponent_min(::Type{Float64}) = ieee754_exponent_min(Float64)
+
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, exponent_field::Integer, significand_field::Integer
+) where {F<:IEEEFloat}
+    T = uinttype(F)
+    ret::T = sign_bit
+    ret <<= exponent_bits(F)
+    ret |= exponent_field
+    ret <<= significand_bits(F)
+    ret |= significand_field
+end
+
+# ±floatmax(T)
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, ::Val{:omega}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, exponent_raw_max(F) - 1, significand_mask(F))
+end
+
+# NaN or an infinity
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, significand_field::Integer, ::Val{:nan}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, exponent_raw_max(F), significand_field)
+end
+
+# NaN with default payload
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, ::Val{:nan}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, one(uinttype(F)) << (significand_bits(F) - 1), Val(:nan))
+end
+
+# Infinity
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, ::Val{:inf}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, false, Val(:nan))
+end
+
+# Subnormal or zero
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, significand_field::Integer, ::Val{:subnormal}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, false, significand_field)
+end
+
+# Zero
+function ieee754_representation(
+    ::Type{F}, sign_bit::Bool, ::Val{:zero}
+) where {F<:IEEEFloat}
+    ieee754_representation(F, sign_bit, false, Val(:subnormal))
+end
+
+"""
+    uabs(x::Integer)
+
+Return the absolute value of `x`, possibly returning a different type should the
+operation be susceptible to overflow. This typically arises when `x` is a two's complement
+signed integer, so that `abs(typemin(x)) == typemin(x) < 0`, in which case the result of
+`uabs(x)` will be an unsigned integer of the same size.
+"""
+uabs(x::Integer) = abs(x)
+uabs(x::BitSigned) = unsigned(abs(x))
+
 ## conversions to floating-point ##
 
 # TODO: deprecate in 2.0
@@ -165,38 +240,50 @@ promote_rule(::Type{Float16}, ::Type{UInt128}) = Float16
 promote_rule(::Type{Float16}, ::Type{Int128}) = Float16
 
 function Float64(x::UInt128)
-    x == 0 && return 0.0
-    n = 128-leading_zeros(x) # ndigits0z(x,2)
-    if n <= 53
-        y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff
-    else
-        y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit
-        y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent)
-        y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even
+    if x < UInt128(1) << 104 # Can fit it in two 52 bits mantissas
+        low_exp = 0x1p52
+        high_exp = 0x1p104
+        low_bits = (x % UInt64) & Base.significand_mask(Float64)
+        low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp
+        high_bits = ((x >> 52) % UInt64)
+        high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp
+        low_value + high_value
+    else # Large enough that low bits only affect rounding, pack low bits
+        low_exp = 0x1p76
+        high_exp = 0x1p128
+        low_bits = ((x >> 12) % UInt64) >> 12 | (x % UInt64) & 0xFFFFFF
+        low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp
+        high_bits = ((x >> 76) % UInt64)
+        high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp
+        low_value + high_value
     end
-    d = ((n+1022) % UInt64) << 52
-    reinterpret(Float64, d + y)
 end
 
 function Float64(x::Int128)
-    x == 0 && return 0.0
-    s = ((x >>> 64) % UInt64) & 0x8000_0000_0000_0000 # sign bit
-    x = abs(x) % UInt128
-    n = 128-leading_zeros(x) # ndigits0z(x,2)
-    if n <= 53
-        y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff
-    else
-        y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit
-        y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent)
-        y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even
+    sign_bit = ((x >> 127) % UInt64) << 63
+    ux = uabs(x)
+    if ux < UInt128(1) << 104 # Can fit it in two 52 bits mantissas
+        low_exp = 0x1p52
+        high_exp = 0x1p104
+        low_bits = (ux % UInt64) & Base.significand_mask(Float64)
+        low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp
+        high_bits = ((ux >> 52) % UInt64)
+        high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp
+        reinterpret(Float64, sign_bit | reinterpret(UInt64, low_value + high_value))
+    else # Large enough that low bits only affect rounding, pack low bits
+        low_exp = 0x1p76
+        high_exp = 0x1p128
+        low_bits = ((ux >> 12) % UInt64) >> 12 | (ux % UInt64) & 0xFFFFFF
+        low_value = reinterpret(Float64, reinterpret(UInt64, low_exp) | low_bits) - low_exp
+        high_bits = ((ux >> 76) % UInt64)
+        high_value = reinterpret(Float64, reinterpret(UInt64, high_exp) | high_bits) - high_exp
+        reinterpret(Float64, sign_bit | reinterpret(UInt64, low_value + high_value))
     end
-    d = ((n+1022) % UInt64) << 52
-    reinterpret(Float64, s | d + y)
 end
 
 function Float32(x::UInt128)
     x == 0 && return 0f0
-    n = 128-leading_zeros(x) # ndigits0z(x,2)
+    n = top_set_bit(x) # ndigits0z(x,2)
     if n <= 24
         y = ((x % UInt32) << (24-n)) & 0x007f_ffff
     else
@@ -212,7 +299,7 @@ function Float32(x::Int128)
     x == 0 && return 0f0
     s = ((x >>> 96) % UInt32) & 0x8000_0000 # sign bit
     x = abs(x) % UInt128
-    n = 128-leading_zeros(x) # ndigits0z(x,2)
+    n = top_set_bit(x) # ndigits0z(x,2)
     if n <= 24
         y = ((x % UInt32) << (24-n)) & 0x007f_ffff
     else
@@ -225,8 +312,8 @@ function Float32(x::Int128)
 end
 
 # TODO: optimize
-Float16(x::UInt128) = convert(Float16, Float32(x))
-Float16(x::Int128)  = convert(Float16, Float32(x))
+Float16(x::UInt128) = convert(Float16, Float64(x))
+Float16(x::Int128)  = convert(Float16, Float64(x))
 
 Float16(x::Float32) = fptrunc(Float16, x)
 Float16(x::Float64) = fptrunc(Float16, x)
@@ -285,6 +372,7 @@ Float64
 """
 float(::Type{T}) where {T<:Number} = typeof(float(zero(T)))
 float(::Type{T}) where {T<:AbstractFloat} = T
+float(::Type{Union{}}, slurp...) = Union{}(0.0)
 
 """
     unsafe_trunc(T, x)
@@ -348,26 +436,15 @@ unsafe_trunc(::Type{UInt128}, x::Float16) = unsafe_trunc(UInt128, Float32(x))
 unsafe_trunc(::Type{Int128}, x::Float16) = unsafe_trunc(Int128, Float32(x))
 
 # matches convert methods
-# also determines floor, ceil, round
-trunc(::Type{Signed}, x::IEEEFloat) = trunc(Int,x)
-trunc(::Type{Unsigned}, x::IEEEFloat) = trunc(UInt,x)
-trunc(::Type{Integer}, x::IEEEFloat) = trunc(Int,x)
-
-# fallbacks
-floor(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundDown))
-ceil(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundUp))
-round(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundNearest))
-
-# Bool
-trunc(::Type{Bool}, x::AbstractFloat) = (-1 < x < 2) ? 1 <= x : throw(InexactError(:trunc, Bool, x))
-floor(::Type{Bool}, x::AbstractFloat) = (0 <= x < 2) ? 1 <= x : throw(InexactError(:floor, Bool, x))
-ceil(::Type{Bool}, x::AbstractFloat)  = (-1 < x <= 1) ? 0 < x : throw(InexactError(:ceil, Bool, x))
-round(::Type{Bool}, x::AbstractFloat) = (-0.5 <= x < 1.5) ? 0.5 < x : throw(InexactError(:round, Bool, x))
-
-round(x::IEEEFloat, r::RoundingMode{:ToZero})  = trunc_llvm(x)
-round(x::IEEEFloat, r::RoundingMode{:Down})    = floor_llvm(x)
-round(x::IEEEFloat, r::RoundingMode{:Up})      = ceil_llvm(x)
-round(x::IEEEFloat, r::RoundingMode{:Nearest}) = rint_llvm(x)
+# also determines trunc, floor, ceil
+round(::Type{Signed},   x::IEEEFloat, r::RoundingMode) = round(Int, x, r)
+round(::Type{Unsigned}, x::IEEEFloat, r::RoundingMode) = round(UInt, x, r)
+round(::Type{Integer},  x::IEEEFloat, r::RoundingMode) = round(Int, x, r)
+
+round(x::IEEEFloat, ::RoundingMode{:ToZero})  = trunc_llvm(x)
+round(x::IEEEFloat, ::RoundingMode{:Down})    = floor_llvm(x)
+round(x::IEEEFloat, ::RoundingMode{:Up})      = ceil_llvm(x)
+round(x::IEEEFloat, ::RoundingMode{:Nearest}) = rint_llvm(x)
 
 ## floating point promotions ##
 promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
@@ -391,11 +468,109 @@ muladd(x::T, y::T, z::T) where {T<:IEEEFloat} = muladd_float(x, y, z)
 # TODO: faster floating point fld?
 # TODO: faster floating point mod?
 
-rem(x::T, y::T) where {T<:IEEEFloat} = rem_float(x, y)
+function unbiased_exponent(x::T) where {T<:IEEEFloat}
+    return (reinterpret(Unsigned, x) & exponent_mask(T)) >> significand_bits(T)
+end
+
+function explicit_mantissa_noinfnan(x::T) where {T<:IEEEFloat}
+    m = mantissa(x)
+    issubnormal(x) || (m |= significand_mask(T) + uinttype(T)(1))
+    return m
+end
+
+function _to_float(number::U, ep) where {U<:Unsigned}
+    F = floattype(U)
+    S = signed(U)
+    epint = unsafe_trunc(S,ep)
+    lz::signed(U) = unsafe_trunc(S, Core.Intrinsics.ctlz_int(number) - U(exponent_bits(F)))
+    number <<= lz
+    epint -= lz
+    bits = U(0)
+    if epint >= 0
+        bits = number & significand_mask(F)
+        bits |= ((epint + S(1)) << significand_bits(F)) & exponent_mask(F)
+    else
+        bits = (number >> -epint) & significand_mask(F)
+    end
+    return reinterpret(F, bits)
+end
+
+@assume_effects :terminates_locally :nothrow function rem_internal(x::T, y::T) where {T<:IEEEFloat}
+    xuint = reinterpret(Unsigned, x)
+    yuint = reinterpret(Unsigned, y)
+    if xuint <= yuint
+        if xuint < yuint
+            return x
+        end
+        return zero(T)
+    end
+
+    e_x = unbiased_exponent(x)
+    e_y = unbiased_exponent(y)
+    # Most common case where |y| is "very normal" and |x/y| < 2^EXPONENT_WIDTH
+    if e_y > (significand_bits(T)) && (e_x - e_y) <= (exponent_bits(T))
+        m_x = explicit_mantissa_noinfnan(x)
+        m_y = explicit_mantissa_noinfnan(y)
+        d = urem_int((m_x << (e_x - e_y)),  m_y)
+        iszero(d) && return zero(T)
+        return _to_float(d, e_y - uinttype(T)(1))
+    end
+    # Both are subnormals
+    if e_x == 0 && e_y == 0
+        return reinterpret(T, urem_int(xuint, yuint) & significand_mask(T))
+    end
+
+    m_x = explicit_mantissa_noinfnan(x)
+    e_x -= uinttype(T)(1)
+    m_y = explicit_mantissa_noinfnan(y)
+    lz_m_y = uinttype(T)(exponent_bits(T))
+    if e_y > 0
+        e_y -= uinttype(T)(1)
+    else
+        m_y = mantissa(y)
+        lz_m_y = Core.Intrinsics.ctlz_int(m_y)
+    end
+
+    tz_m_y = Core.Intrinsics.cttz_int(m_y)
+    sides_zeroes_cnt = lz_m_y + tz_m_y
+
+    # n>0
+    exp_diff = e_x - e_y
+    # Shift hy right until the end or n = 0
+    right_shift = min(exp_diff, tz_m_y)
+    m_y >>= right_shift
+    exp_diff -= right_shift
+    e_y += right_shift
+    # Shift hx left until the end or n = 0
+    left_shift = min(exp_diff, uinttype(T)(exponent_bits(T)))
+    m_x <<= left_shift
+    exp_diff -= left_shift
+
+    m_x = urem_int(m_x, m_y)
+    iszero(m_x) && return zero(T)
+    iszero(exp_diff) && return _to_float(m_x, e_y)
+
+    while exp_diff > sides_zeroes_cnt
+        exp_diff -= sides_zeroes_cnt
+        m_x <<= sides_zeroes_cnt
+        m_x = urem_int(m_x, m_y)
+    end
+    m_x <<= exp_diff
+    m_x = urem_int(m_x, m_y)
+    return _to_float(m_x, e_y)
+end
 
-cld(x::T, y::T) where {T<:AbstractFloat} = -fld(-x,y)
+function rem(x::T, y::T) where {T<:IEEEFloat}
+    if isfinite(x) && !iszero(x) && isfinite(y) && !iszero(y)
+        return copysign(rem_internal(abs(x), abs(y)), x)
+    elseif isinf(x) || isnan(y) || iszero(y)  # y can still be Inf
+        return T(NaN)
+    else
+        return x
+    end
+end
 
-function mod(x::T, y::T) where T<:AbstractFloat
+function mod(x::T, y::T) where {T<:AbstractFloat}
     r = rem(x,y)
     if r == 0
         copysign(r,y)
@@ -496,7 +671,7 @@ See also: [`iszero`](@ref), [`isone`](@ref), [`isinf`](@ref), [`ismissing`](@ref
 isnan(x::AbstractFloat) = (x != x)::Bool
 isnan(x::Number) = false
 
-isfinite(x::AbstractFloat) = x - x == 0
+isfinite(x::AbstractFloat) = !isnan(x - x)
 isfinite(x::Real) = decompose(x)[3] != 0
 isfinite(x::Integer) = true
 
@@ -508,33 +683,43 @@ Test whether a number is infinite.
 See also: [`Inf`](@ref), [`iszero`](@ref), [`isfinite`](@ref), [`isnan`](@ref).
 """
 isinf(x::Real) = !isnan(x) & !isfinite(x)
+isinf(x::IEEEFloat) = abs(x) === oftype(x, Inf)
 
 const hx_NaN = hash_uint64(reinterpret(UInt64, NaN))
-let Tf = Float64, Tu = UInt64, Ti = Int64
-    @eval function hash(x::$Tf, h::UInt)
-        # see comments on trunc and hash(Real, UInt)
-        if $(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))
-            xi = fptosi($Ti, x)
-            if isequal(xi, x)
-                return hash(xi, h)
-            end
-        elseif $(Tf(typemin(Tu))) <= x < $(Tf(typemax(Tu)))
-            xu = fptoui($Tu, x)
-            if isequal(xu, x)
-                return hash(xu, h)
-            end
-        elseif isnan(x)
-            return hx_NaN ⊻ h # NaN does not have a stable bit pattern
+function hash(x::Float64, h::UInt)
+    # see comments on trunc and hash(Real, UInt)
+    if typemin(Int64) <= x < typemax(Int64)
+        xi = fptosi(Int64, x)
+        if isequal(xi, x)
+            return hash(xi, h)
+        end
+    elseif typemin(UInt64) <= x < typemax(UInt64)
+        xu = fptoui(UInt64, x)
+        if isequal(xu, x)
+            return hash(xu, h)
         end
-        return hash_uint64(bitcast(UInt64, x)) - 3h
+    elseif isnan(x)
+        return hx_NaN ⊻ h # NaN does not have a stable bit pattern
     end
+    return hash_uint64(bitcast(UInt64, x)) - 3h
 end
 
 hash(x::Float32, h::UInt) = hash(Float64(x), h)
-hash(x::Float16, h::UInt) = hash(Float64(x), h)
 
-## generic hashing for rational values ##
+function hash(x::Float16, h::UInt)
+    # see comments on trunc and hash(Real, UInt)
+    if isfinite(x) # all finite Float16 fit in Int64
+        xi = fptosi(Int64, x)
+        if isequal(xi, x)
+            return hash(xi, h)
+        end
+    elseif isnan(x)
+        return hx_NaN ⊻ h # NaN does not have a stable bit pattern
+    end
+    return hash_uint64(bitcast(UInt64, Float64(x))) - 3h
+end
 
+## generic hashing for rational values ##
 function hash(x::Real, h::UInt)
     # decompose x as num*2^pow/den
     num, pow, den = decompose(x)
@@ -549,32 +734,30 @@ function hash(x::Real, h::UInt)
         num = -num
         den = -den
     end
-    z = trailing_zeros(num)
-    if z != 0
-        num >>= z
-        pow += z
-    end
-    z = trailing_zeros(den)
-    if z != 0
-        den >>= z
-        pow -= z
-    end
-
-    # handle values representable as Int64, UInt64, Float64
+    num_z = trailing_zeros(num)
+    num >>= num_z
+    den_z = trailing_zeros(den)
+    den >>= den_z
+    pow += num_z - den_z
+    # If the real can be represented as an Int64, UInt64, or Float64, hash as those types.
+    # To be an Integer the denominator must be 1 and the power must be non-negative.
     if den == 1
-        left = ndigits0z(num,2) + pow
-        right = trailing_zeros(num) + pow
-        if -1074 <= right
-            if 0 <= right && left <= 64
-                left <= 63                     && return hash(Int64(num) << Int(pow), h)
-                signbit(num) == signbit(den)   && return hash(UInt64(num) << Int(pow), h)
+        # left = ceil(log2(num*2^pow))
+        left = top_set_bit(abs(num)) + pow
+        # 2^-1074 is the minimum Float64 so if the power is smaller, not a Float64
+        if -1074 <= pow
+            if 0 <= pow # if pow is non-negative, it is an integer
+                left <= 63 && return hash(Int64(num) << Int(pow), h)
+                left <= 64 && !signbit(num) && return hash(UInt64(num) << Int(pow), h)
             end # typemin(Int64) handled by Float64 case
-            left <= 1024 && left - right <= 53 && return hash(ldexp(Float64(num),pow), h)
+            # 2^1024 is the maximum Float64 so if the power is greater, not a Float64
+            # Float64s only have 53 mantisa bits (including implicit bit)
+            left <= 1024 && left - pow <= 53 && return hash(ldexp(Float64(num), pow), h)
         end
+    else
+        h = hash_integer(den, h)
     end
-
     # handle generic rational values
-    h = hash_integer(den, h)
     h = hash_integer(pow, h)
     h = hash_integer(num, h)
     return h
@@ -664,17 +847,6 @@ end
 precision(::Type{T}; base::Integer=2) where {T<:AbstractFloat} = _precision(T, base)
 precision(::T; base::Integer=2) where {T<:AbstractFloat} = precision(T; base)
 
-"""
-    uabs(x::Integer)
-
-Return the absolute value of `x`, possibly returning a different type should the
-operation be susceptible to overflow. This typically arises when `x` is a two's complement
-signed integer, so that `abs(typemin(x)) == typemin(x) < 0`, in which case the result of
-`uabs(x)` will be an unsigned integer of the same size.
-"""
-uabs(x::Integer) = abs(x)
-uabs(x::BitSigned) = unsigned(abs(x))
-
 
 """
     nextfloat(x::AbstractFloat, n::Integer)
@@ -753,15 +925,18 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn
             # directly. `Tf(typemax(Ti))+1` is either always exactly representable, or
             # rounded to `Inf` (e.g. when `Ti==UInt128 && Tf==Float32`).
             @eval begin
-                function trunc(::Type{$Ti},x::$Tf)
+                function round(::Type{$Ti},x::$Tf,::RoundingMode{:ToZero})
                     if $(Tf(typemin(Ti))-one(Tf)) < x < $(Tf(typemax(Ti))+one(Tf))
                         return unsafe_trunc($Ti,x)
                     else
-                        throw(InexactError(:trunc, $Ti, x))
+                        throw(InexactError(:round, $Ti, x, RoundToZero))
                     end
                 end
                 function (::Type{$Ti})(x::$Tf)
-                    if ($(Tf(typemin(Ti))) <= x <= $(Tf(typemax(Ti)))) && (round(x, RoundToZero) == x)
+                    # When typemax(Ti) is not representable by Tf but typemax(Ti) + 1 is,
+                    # then < Tf(typemax(Ti) + 1) is stricter than <= Tf(typemax(Ti)). Using
+                    # the former causes us to throw on UInt64(Float64(typemax(UInt64))+1)
+                    if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti))+one(Tf))) && isinteger(x)
                         return unsafe_trunc($Ti,x)
                     else
                         throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
@@ -774,15 +949,15 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn
             # be rounded up. This assumes that `Tf(typemin(Ti)) > -Inf`, which is true for
             # these types, but not for `Float16` or larger integer types.
             @eval begin
-                function trunc(::Type{$Ti},x::$Tf)
+                function round(::Type{$Ti},x::$Tf,::RoundingMode{:ToZero})
                     if $(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))
                         return unsafe_trunc($Ti,x)
                     else
-                        throw(InexactError(:trunc, $Ti, x))
+                        throw(InexactError(:round, $Ti, x, RoundToZero))
                     end
                 end
                 function (::Type{$Ti})(x::$Tf)
-                    if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))) && (round(x, RoundToZero) == x)
+                    if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))) && isinteger(x)
                         return unsafe_trunc($Ti,x)
                     else
                         throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x))
@@ -797,6 +972,21 @@ end
     issubnormal(f) -> Bool
 
 Test whether a floating point number is subnormal.
+
+An IEEE floating point number is [subnormal](https://en.wikipedia.org/wiki/Subnormal_number)
+when its exponent bits are zero and its significand is not zero.
+
+# Examples
+```jldoctest
+julia> floatmin(Float32)
+1.1754944f-38
+
+julia> issubnormal(1.0f-37)
+false
+
+julia> issubnormal(1.0f-38)
+true
+```
 """
 function issubnormal(x::T) where {T<:IEEEFloat}
     y = reinterpret(Unsigned, x)
@@ -824,13 +1014,24 @@ isodd(x::AbstractFloat) = isinteger(x) && abs(x) ≤ maxintfloat(x) && isodd(Int
     floatmax(::Type{Float32}) = $(bitcast(Float32, 0x7f7fffff))
     floatmax(::Type{Float64}) = $(bitcast(Float64, 0x7fefffffffffffff))
 
-    eps(x::AbstractFloat) = isfinite(x) ? abs(x) >= floatmin(x) ? ldexp(eps(typeof(x)), exponent(x)) : nextfloat(zero(x)) : oftype(x, NaN)
     eps(::Type{Float16}) = $(bitcast(Float16, 0x1400))
     eps(::Type{Float32}) = $(bitcast(Float32, 0x34000000))
     eps(::Type{Float64}) = $(bitcast(Float64, 0x3cb0000000000000))
     eps() = eps(Float64)
 end
 
+eps(x::AbstractFloat) = isfinite(x) ? abs(x) >= floatmin(x) ? ldexp(eps(typeof(x)), exponent(x)) : nextfloat(zero(x)) : oftype(x, NaN)
+
+function eps(x::T) where T<:IEEEFloat
+    # For isfinite(x), toggling the LSB will produce either prevfloat(x) or
+    # nextfloat(x) but will never change the sign or exponent.
+    # For !isfinite(x), this will map Inf to NaN and NaN to NaN or Inf.
+    y = reinterpret(T, reinterpret(Unsigned, x) ⊻ true)
+    # The absolute difference between these values is eps(x). This is true even
+    # for Inf/NaN values.
+    return abs(x - y)
+end
+
 """
     floatmin(T = Float64)
 
diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl
index 4276ec0daecaf..a2a0f60bcf399 100644
--- a/base/floatfuncs.jl
+++ b/base/floatfuncs.jl
@@ -44,85 +44,7 @@ maxintfloat() = maxintfloat(Float64)
 
 isinteger(x::AbstractFloat) = (x - trunc(x) == 0)
 
-"""
-    round([T,] x, [r::RoundingMode])
-    round(x, [r::RoundingMode]; digits::Integer=0, base = 10)
-    round(x, [r::RoundingMode]; sigdigits::Integer, base = 10)
-
-Rounds the number `x`.
-
-Without keyword arguments, `x` is rounded to an integer value, returning a value of type
-`T`, or of the same type of `x` if no `T` is provided. An [`InexactError`](@ref) will be
-thrown if the value is not representable by `T`, similar to [`convert`](@ref).
-
-If the `digits` keyword argument is provided, it rounds to the specified number of digits
-after the decimal place (or before if negative), in base `base`.
-
-If the `sigdigits` keyword argument is provided, it rounds to the specified number of
-significant digits, in base `base`.
-
-The [`RoundingMode`](@ref) `r` controls the direction of the rounding; the default is
-[`RoundNearest`](@ref), which rounds to the nearest integer, with ties (fractional values
-of 0.5) being rounded to the nearest even integer. Note that `round` may give incorrect
-results if the global rounding mode is changed (see [`rounding`](@ref)).
-
-# Examples
-```jldoctest
-julia> round(1.7)
-2.0
-
-julia> round(Int, 1.7)
-2
-
-julia> round(1.5)
-2.0
-
-julia> round(2.5)
-2.0
-
-julia> round(pi; digits=2)
-3.14
-
-julia> round(pi; digits=3, base=2)
-3.125
-
-julia> round(123.456; sigdigits=2)
-120.0
-
-julia> round(357.913; sigdigits=4, base=2)
-352.0
-```
-
-!!! note
-    Rounding to specified digits in bases other than 2 can be inexact when
-    operating on binary floating point numbers. For example, the [`Float64`](@ref)
-    value represented by `1.15` is actually *less* than 1.15, yet will be
-    rounded to 1.2. For example:
-
-    ```jldoctest; setup = :(using Printf)
-    julia> x = 1.15
-    1.15
-
-    julia> @sprintf "%.20f" x
-    "1.14999999999999991118"
-
-    julia> x < 115//100
-    true
-
-    julia> round(x, digits=1)
-    1.2
-    ```
-
-# Extensions
-
-To extend `round` to new numeric types, it is typically sufficient to define `Base.round(x::NewType, r::RoundingMode)`.
-"""
-round(T::Type, x)
-
-function round(::Type{T}, x::AbstractFloat, r::RoundingMode) where {T<:Integer}
-    r != RoundToZero && (x = round(x,r))
-    trunc(T, x)
-end
+# See rounding.jl for docstring.
 
 # NOTE: this relies on the current keyword dispatch behaviour (#9498).
 function round(x::Real, r::RoundingMode=RoundNearest;
@@ -150,12 +72,6 @@ function round(x::Real, r::RoundingMode=RoundNearest;
     end
 end
 
-trunc(x::Real; kwargs...) = round(x, RoundToZero; kwargs...)
-floor(x::Real; kwargs...) = round(x, RoundDown; kwargs...)
-ceil(x::Real; kwargs...)  = round(x, RoundUp; kwargs...)
-
-round(x::Integer, r::RoundingMode) = x
-
 # round x to multiples of 1/invstep
 function _round_invstep(x, invstep, r::RoundingMode)
     y = round(x * invstep, r) / invstep
@@ -245,8 +161,8 @@ end
 
 Inexact equality comparison. Two numbers compare equal if their relative distance *or* their
 absolute distance is within tolerance bounds: `isapprox` returns `true` if
-`norm(x-y) <= max(atol, rtol*max(norm(x), norm(y)))`. The default `atol` is zero and the
-default `rtol` depends on the types of `x` and `y`. The keyword argument `nans` determines
+`norm(x-y) <= max(atol, rtol*max(norm(x), norm(y)))`. The default `atol` (absolute tolerance) is zero and the
+default `rtol` (relative tolerance) depends on the types of `x` and `y`. The keyword argument `nans` determines
 whether or not NaN values are considered equal (defaults to false).
 
 For real or complex floating-point values, if an `atol > 0` is not specified, `rtol` defaults to
@@ -304,7 +220,20 @@ true
 function isapprox(x::Number, y::Number;
                   atol::Real=0, rtol::Real=rtoldefault(x,y,atol),
                   nans::Bool=false, norm::Function=abs)
-    x == y || (isfinite(x) && isfinite(y) && norm(x-y) <= max(atol, rtol*max(norm(x), norm(y)))) || (nans && isnan(x) && isnan(y))
+    x′, y′ = promote(x, y) # to avoid integer overflow
+    x == y ||
+        (isfinite(x) && isfinite(y) && norm(x-y) <= max(atol, rtol*max(norm(x′), norm(y′)))) ||
+         (nans && isnan(x) && isnan(y))
+end
+
+function isapprox(x::Integer, y::Integer;
+                  atol::Real=0, rtol::Real=rtoldefault(x,y,atol),
+                  nans::Bool=false, norm::Function=abs)
+    if norm === abs && atol < 1 && rtol == 0
+        return x == y
+    else
+        return norm(x - y) <= max(atol, rtol*max(norm(x), norm(y)))
+    end
 end
 
 """
diff --git a/base/gcutils.jl b/base/gcutils.jl
index 0e5d4c16e550a..ea39bc8ab6130 100644
--- a/base/gcutils.jl
+++ b/base/gcutils.jl
@@ -1,5 +1,36 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+
+"""
+    WeakRef(x)
+
+`w = WeakRef(x)` constructs a [weak reference](https://en.wikipedia.org/wiki/Weak_reference)
+to the Julia value `x`: although `w` contains a reference to `x`, it does not prevent `x` from being
+garbage collected. `w.value` is either `x` (if `x` has not been garbage-collected yet) or `nothing`
+(if `x` has been garbage-collected).
+
+```jldoctest
+julia> x = "a string"
+"a string"
+
+julia> w = WeakRef(x)
+WeakRef("a string")
+
+julia> GC.gc()
+
+julia> w           # a reference is maintained via `x`
+WeakRef("a string")
+
+julia> x = nothing # clear reference
+
+julia> GC.gc()
+
+julia> w
+WeakRef(nothing)
+```
+"""
+WeakRef
+
 ==(w::WeakRef, v::WeakRef) = isequal(w.value, v.value)
 ==(w::WeakRef, v) = isequal(w.value, v)
 ==(w, v::WeakRef) = isequal(w, v.value)
@@ -88,9 +119,12 @@ const GC_INCREMENTAL = 2
     GC.gc([full=true])
 
 Perform garbage collection. The argument `full` determines the kind of
-collection: A full collection (default) sweeps all objects, which makes the
-next GC scan much slower, while an incremental collection may only sweep
-so-called young objects.
+collection: a full collection (default) traverses all live objects (i.e. full mark)
+and should reclaim memory from all unreachable objects. An incremental collection only
+reclaims memory from young objects which are not reachable.
+
+The GC may decide to perform a full collection even if an incremental collection was
+requested.
 
 !!! warning
     Excessive use will likely lead to poor performance.
@@ -131,6 +165,23 @@ function disable_finalizers() @inline
     ccall(:jl_gc_disable_finalizers_internal, Cvoid, ())
 end
 
+"""
+    GC.in_finalizer()::Bool
+
+Returns `true` if the current task is running a finalizer, returns `false`
+otherwise. Will also return `false` within a finalizer which was inlined by the
+compiler's eager finalization optimization, or if `finalize` is called on the
+finalizer directly.
+
+The result of this function may be useful, for example, when a finalizer must
+wait on a resource to become available; instead of polling the resource in a
+`yield` loop (which is not legal to execute within a task running finalizers),
+busy polling or an `@async` continuation could be used instead.
+"""
+function in_finalizer() @inline
+    ccall(:jl_gc_is_in_finalizer, Int8, ()) > 0
+end
+
 """
     GC.@preserve x1 x2 ... xn expr
 
diff --git a/base/generator.jl b/base/generator.jl
index d11742fe5b72f..1f981de8dc788 100644
--- a/base/generator.jl
+++ b/base/generator.jl
@@ -5,25 +5,26 @@
 
 Given a function `f` and an iterator `iter`, construct an iterator that yields
 the values of `f` applied to the elements of `iter`.
-The syntax for constructing an instance of this type is `f(x) for x in iter [if cond(x)::Bool] `.
-The `[if cond(x)::Bool]` expression is optional and acts as a "guard", effectively
-filtering out values where the condition is false.
+The syntax `f(x) for x in iter` is syntax for constructing an instance of this
+type.
 
 ```jldoctest
-julia> g = (abs2(x) for x in 1:5 if x != 3);
+julia> g = (abs2(x) for x in 1:5);
 
 julia> for x in g
            println(x)
        end
 1
 4
+9
 16
 25
 
 julia> collect(g)
-4-element Vector{Int64}:
+5-element Vector{Int64}:
   1
   4
+  9
  16
  25
 ```
@@ -92,13 +93,13 @@ Base.HasLength()
 """
 IteratorSize(x) = IteratorSize(typeof(x))
 IteratorSize(::Type) = HasLength()  # HasLength is the default
+IteratorSize(::Type{Union{}}, slurp...) = throw(ArgumentError("Union{} does not have elements"))
+IteratorSize(::Type{Any}) = SizeUnknown()
 
 IteratorSize(::Type{<:Tuple}) = HasLength()
 IteratorSize(::Type{<:AbstractArray{<:Any,N}})  where {N} = HasShape{N}()
 IteratorSize(::Type{Generator{I,F}}) where {I,F} = IteratorSize(I)
 
-IteratorSize(::Type{Any}) = SizeUnknown()
-
 haslength(iter) = IteratorSize(iter) isa Union{HasShape, HasLength}
 
 abstract type IteratorEltype end
@@ -126,7 +127,7 @@ Base.HasEltype()
 """
 IteratorEltype(x) = IteratorEltype(typeof(x))
 IteratorEltype(::Type) = HasEltype()  # HasEltype is the default
+IteratorEltype(::Type{Union{}}, slurp...) = throw(ArgumentError("Union{} does not have elements"))
+IteratorEltype(::Type{Any}) = EltypeUnknown()
 
 IteratorEltype(::Type{Generator{I,T}}) where {I,T} = EltypeUnknown()
-
-IteratorEltype(::Type{Any}) = EltypeUnknown()
diff --git a/base/genericmemory.jl b/base/genericmemory.jl
new file mode 100644
index 0000000000000..51b25d453e5c4
--- /dev/null
+++ b/base/genericmemory.jl
@@ -0,0 +1,267 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## genericmemory.jl: Managed Memory
+
+"""
+    GenericMemory{kind::Symbol, T, addrspace::Int} <: AbstractVector{T}
+
+One-dimensional dense array with elements of type `T`.
+"""
+GenericMemory
+"""
+    Memory{T} == GenericMemory{:not_atomic, T, Core.CPU}
+
+One-dimensional dense array with elements of type `T`.
+"""
+Memory
+
+## Basic functions ##
+
+using Core: memoryrefoffset, memoryref_isassigned # import more functions which were not essential
+
+size(a::GenericMemory, d::Int) =
+    d < 1 ? error("dimension out of range") :
+    d == 1 ? length(a) :
+    1
+size(a::GenericMemory, d::Integer) =  size(a, convert(d, Int))
+size(a::GenericMemory) = (length(a),)
+
+IndexStyle(::Type{<:GenericMemory}) = IndexLinear()
+
+pointer(mem::GenericMemoryRef) = unsafe_convert(Ptr{Cvoid}, mem) # no bounds check, even for empty array
+
+_unsetindex!(A::Memory, i::Int) =  (@_propagate_inbounds_meta; _unsetindex!(GenericMemoryRef(A, i)); A)
+function _unsetindex!(A::MemoryRef{T}) where T
+    @_terminates_locally_meta
+    @_propagate_inbounds_meta
+    @inline
+    @boundscheck GenericMemoryRef(A, 1)
+    mem = A.mem
+    MemT = typeof(mem)
+    arrayelem = datatype_arrayelem(MemT)
+    elsz = datatype_layoutsize(MemT)
+    isboxed = 1; isunion = 2
+    t = @_gc_preserve_begin mem
+    p = Ptr{Ptr{Cvoid}}(@inbounds pointer(A))
+    if arrayelem == isboxed
+        Intrinsics.atomic_pointerset(p, C_NULL, :monotonic)
+    elseif arrayelem != isunion
+        if !datatype_pointerfree(T::DataType)
+            for j = 1:Core.sizeof(Ptr{Cvoid}):elsz
+                Intrinsics.atomic_pointerset(p + j - 1, C_NULL, :monotonic)
+            end
+        end
+    end
+    @_gc_preserve_end t
+    return A
+end
+
+elsize(@nospecialize _::Type{A}) where {T,A<:GenericMemory{<:Any,T}} = aligned_sizeof(T)
+sizeof(a::GenericMemory) = Core.sizeof(a)
+
+# multi arg case will be overwritten later. This is needed for bootstrapping
+function isassigned(a::Memory, i::Int)
+    @inline
+    @boundscheck (i - 1)%UInt < length(a)%UInt || return false
+    return @inbounds memoryref_isassigned(GenericMemoryRef(a, i), :not_atomic, false)
+end
+
+isassigned(a::GenericMemoryRef) = memoryref_isassigned(a, :not_atomic, @_boundscheck)
+
+## copy ##
+function unsafe_copyto!(dest::MemoryRef{T}, src::MemoryRef{T}, n) where {T}
+    @_terminates_globally_meta
+    n == 0 && return dest
+    @boundscheck GenericMemoryRef(dest, n), GenericMemoryRef(src, n)
+    ccall(:jl_genericmemory_copyto, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), dest.mem, dest.ptr_or_offset, src.mem, src.ptr_or_offset, Int(n))
+    return dest
+end
+
+function unsafe_copyto!(dest::GenericMemoryRef, src::GenericMemoryRef, n)
+    n == 0 && return dest
+    @boundscheck GenericMemoryRef(dest, n), GenericMemoryRef(src, n)
+    unsafe_copyto!(dest.mem, memoryrefoffset(dest), src.mem, memoryrefoffset(src), n)
+    return dest
+end
+
+function unsafe_copyto!(dest::Memory{T}, doffs, src::Memory{T}, soffs, n) where{T}
+    n == 0 && return dest
+    unsafe_copyto!(GenericMemoryRef(dest, doffs), GenericMemoryRef(src, soffs), n)
+    return dest
+end
+
+function unsafe_copyto!(dest::Memory, doffs, src::Memory, soffs, n)
+    @_terminates_locally_meta
+    n == 0 && return dest
+    # use pointer math to determine if they are deemed to alias
+    destp = pointer(dest, doffs)
+    srcp = pointer(src, soffs)
+    endp = pointer(src, soffs + n - 1)
+    @inbounds if destp < srcp || destp > endp
+        for i = 1:n
+            if isassigned(src, soffs + i - 1)
+                dest[doffs + i - 1] = src[soffs + i - 1]
+            else
+                _unsetindex!(dest, doffs + i - 1)
+            end
+        end
+    else
+        for i = n:-1:1
+            if isassigned(src, soffs + i - 1)
+                dest[doffs + i - 1] = src[soffs + i - 1]
+            else
+                _unsetindex!(dest, doffs + i - 1)
+            end
+        end
+    end
+    return dest
+end
+
+copy(a::T) where {T<:Memory} = ccall(:jl_genericmemory_copy, Ref{T}, (Any,), a)
+
+function copyto!(dest::Memory, doffs::Integer, src::Memory, soffs::Integer, n::Integer)
+    n < 0 && _throw_argerror("Number of elements to copy must be non-negative.")
+    unsafe_copyto!(dest, doffs, src, soffs, n)
+    return dest
+end
+
+
+## Constructors ##
+
+similar(a::Memory{T}) where {T}                   = Memory{T}(undef, length(a))
+similar(a::Memory{T}, S::Type) where {T}          = Memory{S}(undef, length(a))
+similar(a::Memory{T}, m::Int) where {T}           = Memory{T}(undef, m)
+similar(a::Memory, T::Type, dims::Dims{1})        = Memory{T}(undef, dims[1])
+similar(a::Memory{T}, dims::Dims{1}) where {T}    = Memory{T}(undef, dims[1])
+
+function fill!(a::Union{Memory{UInt8}, Memory{Int8}}, x::Integer)
+    t = @_gc_preserve_begin a
+    p = unsafe_convert(Ptr{Cvoid}, a)
+    T = eltype(a)
+    memset(p, x isa T ? x : convert(T, x), length(a))
+    @_gc_preserve_end t
+    return a
+end
+
+## Conversions ##
+
+convert(::Type{T}, a::AbstractArray) where {T<:GenericMemory} = a isa T ? a : T(a)::T
+
+promote_rule(a::Type{Memory{T}}, b::Type{Memory{S}}) where {T,S} = el_same(promote_type(T,S), a, b)
+promote_rule(a::Type{GenericMemory{:atomic,T,Core.CPU}}, b::Type{GenericMemory{:atomic,S,Core.CPU}}) where {T,S} = el_same(promote_type(T,S), a, b)
+
+## Constructors ##
+
+if nameof(@__MODULE__) === :Base  # avoid method overwrite
+# constructors should make copies
+Memory{T}(x::AbstractArray{S,1}) where {T,S} = copyto_axcheck!(Memory{T}(undef, size(x)), x)
+end
+
+## copying iterators to containers
+
+## Iteration ##
+
+iterate(A::Memory, i=1) = (@inline; (i - 1)%UInt < length(A)%UInt ? (@inbounds A[i], i + 1) : nothing)
+
+## Indexing: getindex ##
+
+# Faster contiguous indexing using copyto! for AbstractUnitRange and Colon
+function getindex(A::Memory, I::AbstractUnitRange{<:Integer})
+    @inline
+    @boundscheck checkbounds(A, I)
+    lI = length(I)
+    X = similar(A, axes(I))
+    if lI > 0
+        copyto!(X, firstindex(X), A, first(I), lI)
+    end
+    return X
+end
+
+# getindex for carrying out logical indexing for AbstractUnitRange{Bool} as Bool <: Integer
+getindex(a::Memory, r::AbstractUnitRange{Bool}) = getindex(a, to_index(r))
+
+getindex(A::Memory, c::Colon) = copy(A)
+
+## Indexing: setindex! ##
+
+function setindex!(A::Memory{T}, x, i1::Int) where {T}
+    val = x isa T ? x : convert(T,x)::T
+    ref = memoryref(memoryref(A), i1, @_boundscheck)
+    memoryrefset!(ref, val, :not_atomic, @_boundscheck)
+    return A
+end
+function setindex!(A::Memory{T}, x, i1::Int, i2::Int, I::Int...) where {T}
+    @inline
+    @boundscheck (i2 == 1 && all(==(1), I)) || throw_boundserror(A, (i1, i2, I...))
+    setindex!(A, x, i1)
+end
+
+# Faster contiguous setindex! with copyto!
+function setindex!(A::Memory{T}, X::Memory{T}, I::AbstractUnitRange{Int}) where T
+    @inline
+    @boundscheck checkbounds(A, I)
+    lI = length(I)
+    @boundscheck setindex_shape_check(X, lI)
+    if lI > 0
+        unsafe_copyto!(A, first(I), X, 1, lI)
+    end
+    return A
+end
+function setindex!(A::Memory{T}, X::Memory{T}, c::Colon) where T
+    @inline
+    lI = length(A)
+    @boundscheck setindex_shape_check(X, lI)
+    if lI > 0
+        unsafe_copyto!(A, 1, X, 1, lI)
+    end
+    return A
+end
+
+# use memcmp for cmp on byte arrays
+function cmp(a::Memory{UInt8}, b::Memory{UInt8})
+    ta = @_gc_preserve_begin a
+    tb = @_gc_preserve_begin b
+    pa = unsafe_convert(Ptr{Cvoid}, a)
+    pb = unsafe_convert(Ptr{Cvoid}, b)
+    c = memcmp(pa, pb, min(length(a),length(b)))
+    @_gc_preserve_end ta
+    @_gc_preserve_end tb
+    return c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b))
+end
+
+const BitIntegerMemory{N} = Union{map(T->Memory{T}, BitInteger_types)...}
+# use memcmp for == on bit integer types
+function ==(a::M, b::M) where {M <: BitIntegerMemory}
+    if length(a) == length(b)
+        ta = @_gc_preserve_begin a
+        tb = @_gc_preserve_begin b
+        pa = unsafe_convert(Ptr{Cvoid}, a)
+        pb = unsafe_convert(Ptr{Cvoid}, b)
+        c = memcmp(pa, pb, sizeof(eltype(M)) * length(a))
+        @_gc_preserve_end ta
+        @_gc_preserve_end tb
+        return c == 0
+    else
+        return false
+    end
+end
+
+function findall(pred::Fix2{typeof(in),<:Union{Memory{<:Real},Real}}, x::Memory{<:Real})
+    if issorted(x, Sort.Forward) && issorted(pred.x, Sort.Forward)
+        return _sortedfindin(x, pred.x)
+    else
+        return _findin(x, pred.x)
+    end
+end
+
+# Copying subregions
+function indcopy(sz::Dims, I::GenericMemory)
+    n = length(I)
+    s = sz[n]
+    for i = n+1:length(sz)
+        s *= sz[i]
+    end
+    dst = eltype(I)[_findin(I[i], i < n ? (1:sz[i]) : (1:s)) for i = 1:n]
+    src = eltype(I)[I[i][_findin(I[i], i < n ? (1:sz[i]) : (1:s))] for i = 1:n]
+    dst, src
+end
diff --git a/base/gmp.jl b/base/gmp.jl
index 5d3cabac87e40..b66c2eed9ae8d 100644
--- a/base/gmp.jl
+++ b/base/gmp.jl
@@ -10,7 +10,7 @@ import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor,
              trailing_zeros, trailing_ones, count_ones, count_zeros, tryparse_internal,
              bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb,
              widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit,
-             sign, hastypemax, isodd, iseven, digits!, hash, hash_integer
+             sign, hastypemax, isodd, iseven, digits!, hash, hash_integer, top_set_bit
 
 if Clong == Int32
     const ClongMax = Union{Int8, Int16, Int32}
@@ -21,8 +21,16 @@ else
 end
 const CdoubleMax = Union{Float16, Float32, Float64}
 
-version() = VersionNumber(unsafe_string(unsafe_load(cglobal((:__gmp_version, :libgmp), Ptr{Cchar}))))
-bits_per_limb() = Int(unsafe_load(cglobal((:__gmp_bits_per_limb, :libgmp), Cint)))
+if Sys.iswindows()
+    const libgmp = "libgmp-10.dll"
+elseif Sys.isapple()
+    const libgmp = "@rpath/libgmp.10.dylib"
+else
+    const libgmp = "libgmp.so.10"
+end
+
+version() = VersionNumber(unsafe_string(unsafe_load(cglobal((:__gmp_version, libgmp), Ptr{Cchar}))))
+bits_per_limb() = Int(unsafe_load(cglobal((:__gmp_bits_per_limb, libgmp), Cint)))
 
 const VERSION = version()
 const BITS_PER_LIMB = bits_per_limb()
@@ -54,7 +62,7 @@ mutable struct BigInt <: Signed
 
     function BigInt(; nbits::Integer=0)
         b = MPZ.init2!(new(), nbits)
-        finalizer(cglobal((:__gmpz_clear, :libgmp)), b)
+        finalizer(cglobal((:__gmpz_clear, libgmp)), b)
         return b
     end
 end
@@ -100,7 +108,7 @@ function __init__()
             bits_per_limb() != BITS_PER_LIMB ? @error(msg) : @warn(msg)
         end
 
-        ccall((:__gmp_set_memory_functions, :libgmp), Cvoid,
+        ccall((:__gmp_set_memory_functions, libgmp), Cvoid,
               (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}),
               cglobal(:jl_gc_counted_malloc),
               cglobal(:jl_gc_counted_realloc_with_old_size),
@@ -112,7 +120,7 @@ function __init__()
     end
     # This only works with a patched version of GMP, ignore otherwise
     try
-        ccall((:__gmp_set_alloc_overflow_function, :libgmp), Cvoid,
+        ccall((:__gmp_set_alloc_overflow_function, libgmp), Cvoid,
               (Ptr{Cvoid},),
               cglobal(:jl_throw_out_of_memory_error))
         ALLOC_OVERFLOW_FUNCTION[] = true
@@ -129,23 +137,23 @@ module MPZ
 # wrapping of libgmp functions
 # - "output parameters" are labeled x, y, z, and are returned when appropriate
 # - constant input parameters are labeled a, b, c
-# - a method modifying its input has a "!" appendend to its name, according to Julia's conventions
+# - a method modifying its input has a "!" appended to its name, according to Julia's conventions
 # - some convenient methods are added (in addition to the pure MPZ ones), e.g. `add(a, b) = add!(BigInt(), a, b)`
 #   and `add!(x, a) = add!(x, x, a)`.
-using ..GMP: BigInt, Limb, BITS_PER_LIMB
+using ..GMP: BigInt, Limb, BITS_PER_LIMB, libgmp
 
 const mpz_t = Ref{BigInt}
 const bitcnt_t = Culong
 
-gmpz(op::Symbol) = (Symbol(:__gmpz_, op), :libgmp)
+gmpz(op::Symbol) = (Symbol(:__gmpz_, op), libgmp)
 
-init!(x::BigInt) = (ccall((:__gmpz_init, :libgmp), Cvoid, (mpz_t,), x); x)
-init2!(x::BigInt, a) = (ccall((:__gmpz_init2, :libgmp), Cvoid, (mpz_t, bitcnt_t), x, a); x)
+init!(x::BigInt) = (ccall((:__gmpz_init, libgmp), Cvoid, (mpz_t,), x); x)
+init2!(x::BigInt, a) = (ccall((:__gmpz_init2, libgmp), Cvoid, (mpz_t, bitcnt_t), x, a); x)
 
-realloc2!(x, a) = (ccall((:__gmpz_realloc2, :libgmp), Cvoid, (mpz_t, bitcnt_t), x, a); x)
+realloc2!(x, a) = (ccall((:__gmpz_realloc2, libgmp), Cvoid, (mpz_t, bitcnt_t), x, a); x)
 realloc2(a) = realloc2!(BigInt(), a)
 
-sizeinbase(a::BigInt, b) = Int(ccall((:__gmpz_sizeinbase, :libgmp), Csize_t, (mpz_t, Cint), a, b))
+sizeinbase(a::BigInt, b) = Int(ccall((:__gmpz_sizeinbase, libgmp), Csize_t, (mpz_t, Cint), a, b))
 
 for (op, nbits) in (:add => :(BITS_PER_LIMB*(1 + max(abs(a.size), abs(b.size)))),
                     :sub => :(BITS_PER_LIMB*(1 + max(abs(a.size), abs(b.size)))),
@@ -161,7 +169,7 @@ for (op, nbits) in (:add => :(BITS_PER_LIMB*(1 + max(abs(a.size), abs(b.size))))
 end
 
 invert!(x::BigInt, a::BigInt, b::BigInt) =
-    ccall((:__gmpz_invert, :libgmp), Cint, (mpz_t, mpz_t, mpz_t), x, a, b)
+    ccall((:__gmpz_invert, libgmp), Cint, (mpz_t, mpz_t, mpz_t), x, a, b)
 invert(a::BigInt, b::BigInt) = invert!(BigInt(), a, b)
 invert!(x::BigInt, b::BigInt) = invert!(x, x, b)
 
@@ -174,7 +182,7 @@ for op in (:add_ui, :sub_ui, :mul_ui, :mul_2exp, :fdiv_q_2exp, :pow_ui, :bin_ui)
     end
 end
 
-ui_sub!(x::BigInt, a, b::BigInt) = (ccall((:__gmpz_ui_sub, :libgmp), Cvoid, (mpz_t, Culong, mpz_t), x, a, b); x)
+ui_sub!(x::BigInt, a, b::BigInt) = (ccall((:__gmpz_ui_sub, libgmp), Cvoid, (mpz_t, Culong, mpz_t), x, a, b); x)
 ui_sub(a, b::BigInt) = ui_sub!(BigInt(), a, b)
 
 for op in (:scan1, :scan0)
@@ -183,7 +191,7 @@ for op in (:scan1, :scan0)
     @eval $op(a::BigInt, b) = Int(signed(ccall($(gmpz(op)), Culong, (mpz_t, Culong), a, b)))
 end
 
-mul_si!(x::BigInt, a::BigInt, b) = (ccall((:__gmpz_mul_si, :libgmp), Cvoid, (mpz_t, mpz_t, Clong), x, a, b); x)
+mul_si!(x::BigInt, a::BigInt, b) = (ccall((:__gmpz_mul_si, libgmp), Cvoid, (mpz_t, mpz_t, Clong), x, a, b); x)
 mul_si(a::BigInt, b) = mul_si!(BigInt(), a, b)
 mul_si!(x::BigInt, b) = mul_si!(x, x, b)
 
@@ -205,47 +213,58 @@ for (op, T) in ((:fac_ui, Culong), (:set_ui, Culong), (:set_si, Clong), (:set_d,
     end
 end
 
-popcount(a::BigInt) = Int(signed(ccall((:__gmpz_popcount, :libgmp), Culong, (mpz_t,), a)))
+popcount(a::BigInt) = Int(signed(ccall((:__gmpz_popcount, libgmp), Culong, (mpz_t,), a)))
 
-mpn_popcount(d::Ptr{Limb}, s::Integer) = Int(ccall((:__gmpn_popcount, :libgmp), Culong, (Ptr{Limb}, Csize_t), d, s))
+mpn_popcount(d::Ptr{Limb}, s::Integer) = Int(ccall((:__gmpn_popcount, libgmp), Culong, (Ptr{Limb}, Csize_t), d, s))
 mpn_popcount(a::BigInt) = mpn_popcount(a.d, abs(a.size))
 
 function tdiv_qr!(x::BigInt, y::BigInt, a::BigInt, b::BigInt)
-    ccall((:__gmpz_tdiv_qr, :libgmp), Cvoid, (mpz_t, mpz_t, mpz_t, mpz_t), x, y, a, b)
+    ccall((:__gmpz_tdiv_qr, libgmp), Cvoid, (mpz_t, mpz_t, mpz_t, mpz_t), x, y, a, b)
     x, y
 end
 tdiv_qr(a::BigInt, b::BigInt) = tdiv_qr!(BigInt(), BigInt(), a, b)
 
 powm!(x::BigInt, a::BigInt, b::BigInt, c::BigInt) =
-    (ccall((:__gmpz_powm, :libgmp), Cvoid, (mpz_t, mpz_t, mpz_t, mpz_t), x, a, b, c); x)
+    (ccall((:__gmpz_powm, libgmp), Cvoid, (mpz_t, mpz_t, mpz_t, mpz_t), x, a, b, c); x)
 powm(a::BigInt, b::BigInt, c::BigInt) = powm!(BigInt(), a, b, c)
 powm!(x::BigInt, b::BigInt, c::BigInt) = powm!(x, x, b, c)
 
 function gcdext!(x::BigInt, y::BigInt, z::BigInt, a::BigInt, b::BigInt)
-    ccall((:__gmpz_gcdext, :libgmp), Cvoid, (mpz_t, mpz_t, mpz_t, mpz_t, mpz_t), x, y, z, a, b)
+    ccall((:__gmpz_gcdext, libgmp), Cvoid, (mpz_t, mpz_t, mpz_t, mpz_t, mpz_t), x, y, z, a, b)
     x, y, z
 end
 gcdext(a::BigInt, b::BigInt) = gcdext!(BigInt(), BigInt(), BigInt(), a, b)
 
-cmp(a::BigInt, b::BigInt) = Int(ccall((:__gmpz_cmp, :libgmp), Cint, (mpz_t, mpz_t), a, b))
-cmp_si(a::BigInt, b) = Int(ccall((:__gmpz_cmp_si, :libgmp), Cint, (mpz_t, Clong), a, b))
-cmp_ui(a::BigInt, b) = Int(ccall((:__gmpz_cmp_ui, :libgmp), Cint, (mpz_t, Culong), a, b))
-cmp_d(a::BigInt, b) = Int(ccall((:__gmpz_cmp_d, :libgmp), Cint, (mpz_t, Cdouble), a, b))
+cmp(a::BigInt, b::BigInt) = Int(ccall((:__gmpz_cmp, libgmp), Cint, (mpz_t, mpz_t), a, b))
+cmp_si(a::BigInt, b) = Int(ccall((:__gmpz_cmp_si, libgmp), Cint, (mpz_t, Clong), a, b))
+cmp_ui(a::BigInt, b) = Int(ccall((:__gmpz_cmp_ui, libgmp), Cint, (mpz_t, Culong), a, b))
+cmp_d(a::BigInt, b) = Int(ccall((:__gmpz_cmp_d, libgmp), Cint, (mpz_t, Cdouble), a, b))
 
-mpn_cmp(a::Ptr{Limb}, b::Ptr{Limb}, c) = ccall((:__gmpn_cmp, :libgmp), Cint, (Ptr{Limb}, Ptr{Limb}, Clong), a, b, c)
+mpn_cmp(a::Ptr{Limb}, b::Ptr{Limb}, c) = ccall((:__gmpn_cmp, libgmp), Cint, (Ptr{Limb}, Ptr{Limb}, Clong), a, b, c)
 mpn_cmp(a::BigInt, b::BigInt, c) = mpn_cmp(a.d, b.d, c)
 
-get_str!(x, a, b::BigInt) = (ccall((:__gmpz_get_str,:libgmp), Ptr{Cchar}, (Ptr{Cchar}, Cint, mpz_t), x, a, b); x)
-set_str!(x::BigInt, a, b) = Int(ccall((:__gmpz_set_str, :libgmp), Cint, (mpz_t, Ptr{UInt8}, Cint), x, a, b))
-get_d(a::BigInt) = ccall((:__gmpz_get_d, :libgmp), Cdouble, (mpz_t,), a)
+get_str!(x, a, b::BigInt) = (ccall((:__gmpz_get_str,libgmp), Ptr{Cchar}, (Ptr{Cchar}, Cint, mpz_t), x, a, b); x)
+set_str!(x::BigInt, a, b) = Int(ccall((:__gmpz_set_str, libgmp), Cint, (mpz_t, Ptr{UInt8}, Cint), x, a, b))
+get_d(a::BigInt) = ccall((:__gmpz_get_d, libgmp), Cdouble, (mpz_t,), a)
+
+function export!(a::AbstractVector{T}, n::BigInt; order::Integer=-1, nails::Integer=0, endian::Integer=0) where {T<:Base.BitInteger}
+    stride(a, 1) == 1 || throw(ArgumentError("a must have stride 1"))
+    ndigits = cld(sizeinbase(n, 2), 8*sizeof(T) - nails)
+    length(a) < ndigits && resize!(a, ndigits)
+    count = Ref{Csize_t}()
+    ccall((:__gmpz_export, libgmp), Ptr{T}, (Ptr{T}, Ref{Csize_t}, Cint, Csize_t, Cint, Csize_t, mpz_t),
+        a, count, order, sizeof(T), endian, nails, n)
+    @assert count[] ≤ length(a)
+    return a, Int(count[])
+end
 
-limbs_write!(x::BigInt, a) = ccall((:__gmpz_limbs_write, :libgmp), Ptr{Limb}, (mpz_t, Clong), x, a)
-limbs_finish!(x::BigInt, a) = ccall((:__gmpz_limbs_finish, :libgmp), Cvoid, (mpz_t, Clong), x, a)
-import!(x::BigInt, a, b, c, d, e, f) = ccall((:__gmpz_import, :libgmp), Cvoid,
+limbs_write!(x::BigInt, a) = ccall((:__gmpz_limbs_write, libgmp), Ptr{Limb}, (mpz_t, Clong), x, a)
+limbs_finish!(x::BigInt, a) = ccall((:__gmpz_limbs_finish, libgmp), Cvoid, (mpz_t, Clong), x, a)
+import!(x::BigInt, a, b, c, d, e, f) = ccall((:__gmpz_import, libgmp), Cvoid,
     (mpz_t, Csize_t, Cint, Csize_t, Cint, Csize_t, Ptr{Cvoid}), x, a, b, c, d, e, f)
 
-setbit!(x, a) = (ccall((:__gmpz_setbit, :libgmp), Cvoid, (mpz_t, bitcnt_t), x, a); x)
-tstbit(a::BigInt, b) = ccall((:__gmpz_tstbit, :libgmp), Cint, (mpz_t, bitcnt_t), a, b) % Bool
+setbit!(x, a) = (ccall((:__gmpz_setbit, libgmp), Cvoid, (mpz_t, bitcnt_t), x, a); x)
+tstbit(a::BigInt, b) = ccall((:__gmpz_tstbit, libgmp), Cint, (mpz_t, bitcnt_t), a, b) % Bool
 
 end # module MPZ
 
@@ -301,11 +320,6 @@ function BigInt(x::Float64)
     unsafe_trunc(BigInt,x)
 end
 
-function trunc(::Type{BigInt}, x::Union{Float16,Float32,Float64})
-    isfinite(x) || throw(InexactError(:trunc, BigInt, x))
-    unsafe_trunc(BigInt,x)
-end
-
 BigInt(x::Float16) = BigInt(Float64(x))
 BigInt(x::Float32) = BigInt(Float64(x))
 
@@ -396,7 +410,7 @@ function Float64(x::BigInt, ::RoundingMode{:Nearest})
         z = Float64((unsafe_load(x.d, 2) % UInt64) << BITS_PER_LIMB + unsafe_load(x.d))
     else
         y1 = unsafe_load(x.d, xsize) % UInt64
-        n = 64 - leading_zeros(y1)
+        n = top_set_bit(y1)
         # load first 54(1 + 52 bits of fraction + 1 for rounding)
         y = y1 >> (n - (precision(Float64)+1))
         if Limb == UInt64
@@ -586,6 +600,12 @@ Number of ones in the binary representation of abs(x).
 """
 count_ones_abs(x::BigInt) = iszero(x) ? 0 : MPZ.mpn_popcount(x)
 
+function top_set_bit(x::BigInt)
+    isneg(x) && throw(DomainError(x, "top_set_bit only supports negative arguments when they have type BitSigned."))
+    iszero(x) && return 0
+    x.size * sizeof(Limb) << 3 - leading_zeros(GC.@preserve x unsafe_load(x.d, x.size))
+end
+
 divrem(x::BigInt, y::BigInt) = MPZ.tdiv_qr(x, y)
 divrem(x::BigInt, y::Integer) = MPZ.tdiv_qr(x, big(y))
 
@@ -603,11 +623,11 @@ isqrt(x::BigInt) = MPZ.sqrt(x)
 ^(x::BigInt, y::Culong) = MPZ.pow_ui(x, y)
 
 function bigint_pow(x::BigInt, y::Integer)
+    x == 1 && return x
+    x == -1 && return isodd(y) ? x : -x
     if y<0; throw(DomainError(y, "`y` cannot be negative.")); end
     @noinline throw1(y) =
         throw(OverflowError("exponent $y is too large and computation will overflow"))
-    if x== 1; return x; end
-    if x==-1; return isodd(y) ? x : -x; end
     if y>typemax(Culong)
        x==0 && return x
 
@@ -668,18 +688,30 @@ function prod(arr::AbstractArray{BigInt})
     # to account for the rounding to limbs in MPZ.mul!
     # (BITS_PER_LIMB-1 would typically be enough, to which we add
     # 1 for the initial multiplication by init=1 in foldl)
-    nbits = GC.@preserve arr sum(arr; init=BITS_PER_LIMB) do x
-        abs(x.size) * BITS_PER_LIMB - leading_zeros(unsafe_load(x.d))
+    nbits = BITS_PER_LIMB
+    for x in arr
+        iszero(x) && return zero(BigInt)
+        xsize = abs(x.size)
+        lz = GC.@preserve x leading_zeros(unsafe_load(x.d, xsize))
+        nbits += xsize * BITS_PER_LIMB - lz
     end
     init = BigInt(; nbits)
     MPZ.set_si!(init, 1)
     foldl(MPZ.mul!, arr; init)
 end
 
-factorial(x::BigInt) = isneg(x) ? BigInt(0) : MPZ.fac_ui(x)
+factorial(n::BigInt) = !isneg(n) ? MPZ.fac_ui(n) : throw(DomainError(n, "`n` must not be negative."))
 
-binomial(n::BigInt, k::UInt) = MPZ.bin_ui(n, k)
-binomial(n::BigInt, k::Integer) = k < 0 ? BigInt(0) : binomial(n, UInt(k))
+function binomial(n::BigInt, k::Integer)
+    k < 0 && return BigInt(0)
+    k <= typemax(Culong) && return binomial(n, Culong(k))
+    n < 0 && return isodd(k) ? -binomial(k - n - 1, k) : binomial(k - n - 1, k)
+    κ = n - k
+    κ < 0 && return BigInt(0)
+    κ <= typemax(Culong) && return binomial(n, Culong(κ))
+    throw(OverflowError("Computation would exceed memory"))
+end
+binomial(n::BigInt, k::Culong) = MPZ.bin_ui(n, k)
 
 ==(x::BigInt, y::BigInt) = cmp(x,y) == 0
 ==(x::BigInt, i::Integer) = cmp(x,i) == 0
@@ -732,19 +764,29 @@ function string(n::BigInt; base::Integer = 10, pad::Integer = 1)
 end
 
 function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:Integer}
-    if 2 ≤ base ≤ 62
-        s = codeunits(string(n; base))
-        i, j = firstindex(a)-1, length(s)+1
-        lasti = min(lastindex(a), firstindex(a) + length(s)-1 - isneg(n))
-        while i < lasti
-            # base ≤ 36: 0-9, plus a-z for 10-35
-            # base > 36: 0-9, plus A-Z for 10-35 and a-z for 36..61
-            x = s[j -= 1]
-            a[i += 1] = base ≤ 36 ? (x>0x39 ? x-0x57 : x-0x30) : (x>0x39 ? (x>0x60 ? x-0x3d : x-0x37) : x-0x30)
+    if base ≥ 2
+        if base ≤ 62
+            # fast path using mpz_get_str via string(n; base)
+            s = codeunits(string(n; base))
+            i, j = firstindex(a)-1, length(s)+1
+            lasti = min(lastindex(a), firstindex(a) + length(s)-1 - isneg(n))
+            while i < lasti
+                # base ≤ 36: 0-9, plus a-z for 10-35
+                # base > 36: 0-9, plus A-Z for 10-35 and a-z for 36..61
+                x = s[j -= 1]
+                a[i += 1] = base ≤ 36 ? (x>0x39 ? x-0x57 : x-0x30) : (x>0x39 ? (x>0x60 ? x-0x3d : x-0x37) : x-0x30)
+            end
+            lasti = lastindex(a)
+            while i < lasti; a[i+=1] = zero(T); end
+            return isneg(n) ? map!(-,a,a) : a
+        elseif a isa StridedVector{<:Base.BitInteger} && stride(a,1) == 1 && ispow2(base) && base-1 ≤ typemax(T)
+            # fast path using mpz_export
+            origlen = length(a)
+            _, writelen = MPZ.export!(a, n; nails = 8sizeof(T) - trailing_zeros(base))
+            length(a) != origlen && resize!(a, origlen) # truncate to least-significant digits
+            a[begin+writelen:end] .= zero(T)
+            return isneg(n) ? map!(-,a,a) : a
         end
-        lasti = lastindex(a)
-        while i < lasti; a[i+=1] = zero(T); end
-        return isneg(n) ? map!(-,a,a) : a
     end
     return invoke(digits!, Tuple{typeof(a), Integer}, a, n; base) # slow generic fallback
 end
@@ -796,8 +838,8 @@ Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), st
 
 ## streamlined hashing for BigInt, by avoiding allocation from shifts ##
 
-if Limb === UInt
-    # this condition is true most (all?) of the time, and in this case we can define
+if Limb === UInt64 === UInt
+    # On 64 bit systems we can define
     # an optimized version for BigInt of hash_integer (used e.g. for Rational{BigInt}),
     # and of hash
 
@@ -807,7 +849,7 @@ if Limb === UInt
         GC.@preserve n begin
             s = n.size
             s == 0 && return hash_integer(0, h)
-            p = convert(Ptr{UInt}, n.d)
+            p = convert(Ptr{UInt64}, n.d)
             b = unsafe_load(p)
             h ⊻= hash_uint(ifelse(s < 0, -b, b) ⊻ h)
             for k = 2:abs(s)
@@ -817,14 +859,11 @@ if Limb === UInt
         end
     end
 
-    _divLimb(n) = UInt === UInt64 ? n >>> 6 : n >>> 5
-    _modLimb(n) = UInt === UInt64 ? n & 63 : n & 31
-
     function hash(x::BigInt, h::UInt)
         GC.@preserve x begin
             sz = x.size
             sz == 0 && return hash(0, h)
-            ptr = Ptr{UInt}(x.d)
+            ptr = Ptr{UInt64}(x.d)
             if sz == 1
                 return hash(unsafe_load(ptr), h)
             elseif sz == -1
@@ -833,8 +872,8 @@ if Limb === UInt
             end
             pow = trailing_zeros(x)
             nd = Base.ndigits0z(x, 2)
-            idx = _divLimb(pow) + 1
-            shift = _modLimb(pow) % UInt
+            idx = (pow >>> 6) + 1
+            shift = (pow & 63) % UInt
             upshift = BITS_PER_LIMB - shift
             asz = abs(sz)
             if shift == 0
@@ -847,7 +886,6 @@ if Limb === UInt
             if nd <= 1024 && nd - pow <= 53
                 return hash(ldexp(flipsign(Float64(limb), sz), pow), h)
             end
-            h = hash_integer(1, h)
             h = hash_integer(pow, h)
             h ⊻= hash_uint(flipsign(limb, sz) ⊻ h)
             for idx = idx+1:asz
@@ -874,9 +912,9 @@ module MPQ
 
 # Rational{BigInt}
 import .Base: unsafe_rational, __throw_rational_argerror_zero
-import ..GMP: BigInt, MPZ, Limb, isneg
+import ..GMP: BigInt, MPZ, Limb, isneg, libgmp
 
-gmpq(op::Symbol) = (Symbol(:__gmpq_, op), :libgmp)
+gmpq(op::Symbol) = (Symbol(:__gmpq_, op), libgmp)
 
 mutable struct _MPQ
     num_alloc::Cint
@@ -913,20 +951,20 @@ function Rational{BigInt}(num::BigInt, den::BigInt)
         return set_si(flipsign(1, num), 0)
     end
     xq = _MPQ(MPZ.set(num), MPZ.set(den))
-    ccall((:__gmpq_canonicalize, :libgmp), Cvoid, (mpq_t,), xq)
+    ccall((:__gmpq_canonicalize, libgmp), Cvoid, (mpq_t,), xq)
     return sync_rational!(xq)
 end
 
 # define set, set_ui, set_si, set_z, and their inplace versions
 function set!(z::Rational{BigInt}, x::Rational{BigInt})
     zq = _MPQ(z)
-    ccall((:__gmpq_set, :libgmp), Cvoid, (mpq_t, mpq_t), zq, _MPQ(x))
+    ccall((:__gmpq_set, libgmp), Cvoid, (mpq_t, mpq_t), zq, _MPQ(x))
     return sync_rational!(zq)
 end
 
 function set_z!(z::Rational{BigInt}, x::BigInt)
     zq = _MPQ(z)
-    ccall((:__gmpq_set_z, :libgmp), Cvoid, (mpq_t, MPZ.mpz_t), zq, x)
+    ccall((:__gmpq_set_z, libgmp), Cvoid, (mpq_t, MPZ.mpz_t), zq, x)
     return sync_rational!(zq)
 end
 
@@ -958,7 +996,7 @@ function add!(z::Rational{BigInt}, x::Rational{BigInt}, y::Rational{BigInt})
         return set!(z, iszero(x.den) ? x : y)
     end
     zq = _MPQ(z)
-    ccall((:__gmpq_add, :libgmp), Cvoid,
+    ccall((:__gmpq_add, libgmp), Cvoid,
           (mpq_t,mpq_t,mpq_t), zq, _MPQ(x), _MPQ(y))
     return sync_rational!(zq)
 end
@@ -972,7 +1010,7 @@ function sub!(z::Rational{BigInt}, x::Rational{BigInt}, y::Rational{BigInt})
         return set_si!(z, flipsign(-1, y.num), 0)
     end
     zq = _MPQ(z)
-    ccall((:__gmpq_sub, :libgmp), Cvoid,
+    ccall((:__gmpq_sub, libgmp), Cvoid,
           (mpq_t,mpq_t,mpq_t), zq, _MPQ(x), _MPQ(y))
     return sync_rational!(zq)
 end
@@ -985,7 +1023,7 @@ function mul!(z::Rational{BigInt}, x::Rational{BigInt}, y::Rational{BigInt})
         return set_si!(z, ifelse(xor(isneg(x.num), isneg(y.num)), -1, 1), 0)
     end
     zq = _MPQ(z)
-    ccall((:__gmpq_mul, :libgmp), Cvoid,
+    ccall((:__gmpq_mul, libgmp), Cvoid,
           (mpq_t,mpq_t,mpq_t), zq, _MPQ(x), _MPQ(y))
     return sync_rational!(zq)
 end
@@ -1006,7 +1044,7 @@ function div!(z::Rational{BigInt}, x::Rational{BigInt}, y::Rational{BigInt})
         return set_si!(z, flipsign(1, x.num), 0)
     end
     zq = _MPQ(z)
-    ccall((:__gmpq_div, :libgmp), Cvoid,
+    ccall((:__gmpq_div, libgmp), Cvoid,
           (mpq_t,mpq_t,mpq_t), zq, _MPQ(x), _MPQ(y))
     return sync_rational!(zq)
 end
@@ -1020,7 +1058,7 @@ for (fJ, fC) in ((:+, :add), (:-, :sub), (:*, :mul), (://, :div))
 end
 
 function Base.cmp(x::Rational{BigInt}, y::Rational{BigInt})
-    Int(ccall((:__gmpq_cmp, :libgmp), Cint, (mpq_t, mpq_t), _MPQ(x), _MPQ(y)))
+    Int(ccall((:__gmpq_cmp, libgmp), Cint, (mpq_t, mpq_t), _MPQ(x), _MPQ(y)))
 end
 
 end # MPQ module
diff --git a/base/hamt.jl b/base/hamt.jl
new file mode 100644
index 0000000000000..b8bf60cb22afb
--- /dev/null
+++ b/base/hamt.jl
@@ -0,0 +1,275 @@
+module HashArrayMappedTries
+
+export HAMT
+
+##
+# Implements "Ideal Hash Trees" Phil Bagwell 2000
+#
+# Notable divergence is that we forgo a resizable root table.
+# Root tables improve lookup performance for large sizes, but
+# limit space efficiency if the HAMT is used for a persistent
+# dictionary, since each persistent operation would duplicate
+# the root table.
+#
+# We do not handle perfect hash-collision. We would need to
+# add an additional node type for Collisions. Perfect hash
+# collisions should not occur in practice since we perform
+# rehashing after using 55 bits (MAX_SHIFT) of the original hash.
+#
+# Use https://github.com/vchuravy/HashArrayMappedTries.jl if
+# you want to use this implementation in a package.
+#
+# A HAMT is formed by tree of levels, where at each level
+# we use a portion of the bits of the hash for indexing
+#
+# We use a branching width (ENTRY_COUNT) of 32, giving us
+# 5bits of indexing per level
+# 0000_00000_00000_00000_00000_00000_00000_00000_00000_00000_00000_00000
+# L11  L10   L9    L8    L7    L6    L5    L4    L3    L2    L1    L0
+#
+# At each level we use a 32bit bitmap to store which elements are occupied.
+# Since our storage is "sparse" we need to map from index in [0,31] to
+# the actual storage index. We mask the bitmap with (1 << i) - 1 and count
+# the ones in the result. The number of set ones (+1) gives us the index
+# into the storage array.
+#
+# HAMT can be both persistent and non-persistent.
+# The `path` function searches for a matching entries, and for persistency
+# optionally copies the path so that it can be safely mutated.
+
+# TODO:
+# When `trie.data` becomes empty we could remove it from it's parent,
+# but we only know so fairly late. Maybe have a compact function?
+
+const ENTRY_COUNT = UInt(32)
+const BITMAP = UInt32
+const NBITS = sizeof(UInt) * 8
+# @assert ispow2(ENTRY_COUNT)
+const BITS_PER_LEVEL = trailing_zeros(ENTRY_COUNT)
+const LEVEL_MASK = (UInt(1) << BITS_PER_LEVEL) - UInt(1)
+const MAX_SHIFT = (NBITS ÷ BITS_PER_LEVEL - 1) *  BITS_PER_LEVEL
+
+mutable struct Leaf{K, V}
+    const key::K
+    const val::V
+end
+
+"""
+    HAMT{K,V}
+
+A HashArrayMappedTrie that optionally supports persistence.
+"""
+mutable struct HAMT{K, V}
+    const data::Vector{Union{Leaf{K, V}, HAMT{K, V}}}
+    bitmap::BITMAP
+    HAMT{K,V}(data, bitmap) where {K,V} = new{K,V}(data, bitmap)
+    HAMT{K, V}() where {K, V} = new{K,V}(Vector{Union{Leaf{K, V}, HAMT{K, V}}}(undef, 0), zero(BITMAP))
+end
+
+Base.@assume_effects :nothrow :effect_free function init_hamt(K, V, k, v)
+    # For a single element we can't have a 'hash-collision
+    trie = HAMT{K,V}(Vector{Union{Leaf{K, V}, HAMT{K, V}}}(undef, 1), zero(BITMAP))
+    trie.data[1] = Leaf{K,V}(k,v)
+    return trie
+end
+
+Base.@assume_effects :effect_free function HAMT{K,V}((k,v)::Pair{K,V}) where {K, V}
+    trie = init_hamt(K, V, k, v)
+    bi = BitmapIndex(HashState(k))
+    set!(trie, bi)
+    return trie
+end
+HAMT{K,V}(kv::Pair) where {K, V} = HAMT{K,V}(convert(Pair{K,V}, kv))
+
+HAMT(pair::Pair{K,V}) where {K, V} = HAMT{K,V}(pair)
+
+# TODO: Parameterize by hash function
+struct HashState{K}
+    key::K
+    hash::UInt
+    depth::Int
+    shift::Int
+end
+HashState(key) = HashState(key, objectid(key), 0, 0)
+# Reconstruct
+function HashState(other::HashState, key)
+    h = HashState(key)
+    while h.depth !== other.depth
+        h = next(h)
+    end
+    return h
+end
+
+function next(h::HashState)
+    depth = h.depth + 1
+    shift = h.shift + BITS_PER_LEVEL
+    @assert h.shift <= MAX_SHIFT
+    if shift > MAX_SHIFT
+        # Note we use `UInt(depth ÷ BITS_PER_LEVEL)` to seed the hash function
+        # the hash docs, do we need to hash `UInt(depth ÷ BITS_PER_LEVEL)` first?
+        h_hash = hash(objectid(h.key), UInt(depth ÷ BITS_PER_LEVEL))
+        shift = 0
+    else
+        h_hash = h.hash
+    end
+    return HashState(h.key, h_hash, depth, shift)
+end
+
+struct BitmapIndex
+    x::UInt
+end
+BitmapIndex(h::HashState) = BitmapIndex((h.hash >> h.shift) & LEVEL_MASK)
+
+Base.:(<<)(v, bi::BitmapIndex) = v << bi.x
+Base.:(>>)(v, bi::BitmapIndex) = v >> bi.x
+
+isset(trie::HAMT, bi::BitmapIndex) = isodd(trie.bitmap >> bi)
+function set!(trie::HAMT, bi::BitmapIndex)
+    trie.bitmap |= (UInt32(1) << bi)
+    # Invariant: count_ones(trie.bitmap) == Base.length(trie.data)
+end
+
+function unset!(trie::HAMT, bi::BitmapIndex)
+    trie.bitmap &= ~(UInt32(1) << bi)
+    # Invariant: count_ones(trie.bitmap) == Base.length(trie.data)
+end
+
+function entry_index(trie::HAMT, bi::BitmapIndex)
+    mask = (UInt32(1) << bi.x) - UInt32(1)
+    count_ones(trie.bitmap & mask) + 1
+end
+
+islevel_empty(trie::HAMT) = trie.bitmap == 0
+islevel_empty(::Leaf) = false
+
+"""
+    path(trie, h, copyf)::(found, present, trie, i, top, level)
+
+Internal function that walks a HAMT and finds the slot for hash.
+Returns if a value is `present` and a value is `found`.
+
+It returns the `trie` and the index `i` into `trie.data`, as well
+as the current `level`.
+
+If a copy function is provided `copyf` use the return `top` for the
+new persistent tree.
+"""
+@inline function path(trie::HAMT{K,V}, key, h::HashState, copy=false) where {K, V}
+    if copy
+        trie = top = HAMT{K,V}(Base.copy(trie.data), trie.bitmap)
+    else
+        trie = top = trie
+    end
+    while true
+        bi = BitmapIndex(h)
+        i = entry_index(trie, bi)
+        if isset(trie, bi)
+            next = @inbounds trie.data[i]
+            if next isa Leaf{K,V}
+                # Check if key match if not we will need to grow.
+                found = next.key === h.key
+                return found, true, trie, i, bi, top, h
+            end
+            if copy
+                next = HAMT{K,V}(Base.copy(next.data), next.bitmap)
+                @inbounds trie.data[i] = next
+            end
+            trie = next::HAMT{K,V}
+        else
+            # found empty slot
+            return true, false, trie, i, bi, top, h
+        end
+        h = HashArrayMappedTries.next(h)
+    end
+end
+
+"""
+Internal function that given an obtained path, either set the value
+or grows the HAMT by inserting a new trie instead.
+"""
+@inline function insert!(found, present, trie::HAMT{K,V}, i, bi, h, val) where {K,V}
+    if found # we found a slot, just set it to the new leaf
+        # replace or insert
+        if present # replace
+            @inbounds trie.data[i] = Leaf{K, V}(h.key, val)
+        else
+            Base.insert!(trie.data, i, Leaf{K, V}(h.key, val))
+        end
+        set!(trie, bi)
+    else
+        @assert present
+        # collision -> grow
+        leaf = @inbounds trie.data[i]::Leaf{K,V}
+        leaf_h = HashState(h, leaf.key)
+        if leaf_h.hash == h.hash
+            error("Perfect hash collision")
+        end
+        while true
+            new_trie = HAMT{K, V}()
+            if present
+                @inbounds trie.data[i] = new_trie
+            else
+                i = entry_index(trie, bi)
+                Base.insert!(trie.data, i, new_trie)
+            end
+            set!(trie, bi)
+
+            h = next(h)
+            leaf_h = next(leaf_h)
+            bi_new = BitmapIndex(h)
+            bi_old = BitmapIndex(leaf_h)
+            if bi_new == bi_old # collision in new trie -> retry
+                trie = new_trie
+                bi = bi_new
+                present = false
+                continue
+            end
+            i_new = entry_index(new_trie, bi_new)
+            Base.insert!(new_trie.data, i_new, Leaf{K, V}(h.key, val))
+            set!(new_trie, bi_new)
+
+            i_old = entry_index(new_trie, bi_old)
+            Base.insert!(new_trie.data, i_old, leaf)
+            set!(new_trie, bi_old)
+
+            break
+        end
+    end
+end
+
+length(::Leaf) = 1
+length(trie::HAMT) = sum((length(trie.data[i]) for i in eachindex(trie.data)), init=0)
+
+isempty(::Leaf) = false
+function isempty(trie::HAMT)
+    if islevel_empty(trie)
+        return true
+    end
+    return all(isempty(trie.data[i]) for i in eachindex(trie.data))
+end
+
+# DFS
+function iterate(trie::HAMT, state=nothing)
+    if state === nothing
+        state = (;parent=nothing, trie, i=1)
+    end
+    while state !== nothing
+        i = state.i
+        if i > Base.length(state.trie.data)
+            state = state.parent
+            continue
+        end
+        trie = state.trie.data[i]
+        state = (;parent=state.parent, trie=state.trie, i=i+1)
+        if trie isa Leaf
+            return (trie.key => trie.val, state)
+        else
+            # we found a new level
+            state = (;parent=state, trie, i=1)
+            continue
+        end
+    end
+    return nothing
+end
+
+end # module HashArrayMapTries
diff --git a/base/hashing.jl b/base/hashing.jl
index 0989fecb29839..7de9f47de3182 100644
--- a/base/hashing.jl
+++ b/base/hashing.jl
@@ -15,6 +15,8 @@ Typically, any type that implements `hash` should also implement its own [`==`](
 (operator `-`) should also implement [`widen`](@ref), which is required to hash
 values inside heterogeneous arrays.
 
+The hash value may change when a new Julia process is started.
+
 ```jldoctest
 julia> a = hash(10)
 0x95ea2955abd45275
@@ -28,6 +30,9 @@ See also: [`objectid`](@ref), [`Dict`](@ref), [`Set`](@ref).
 hash(x::Any) = hash(x, zero(UInt))
 hash(w::WeakRef, h::UInt) = hash(w.value, h)
 
+# Types can't be deleted, so marking as total allows the compiler to look up the hash
+hash(T::Type, h::UInt) = hash_uint(3h - @assume_effects :total ccall(:jl_type_hash, UInt, (Any,), T))
+
 ## hashing general objects ##
 
 hash(@nospecialize(x), h::UInt) = hash_uint(3h - objectid(x))
@@ -110,7 +115,7 @@ end
 const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
 const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81
 
-function hash(s::String, h::UInt)
+@assume_effects :total function hash(s::String, h::UInt)
     h += memhash_seed
     ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), s, sizeof(s), h % UInt32) + h
 end
diff --git a/base/iddict.jl b/base/iddict.jl
index 7247a85c9afc8..73f532af9bba6 100644
--- a/base/iddict.jl
+++ b/base/iddict.jl
@@ -24,10 +24,10 @@ IdDict{Any, String} with 3 entries:
 ```
 """
 mutable struct IdDict{K,V} <: AbstractDict{K,V}
-    ht::Vector{Any}
+    ht::Memory{Any}
     count::Int
     ndel::Int
-    IdDict{K,V}() where {K, V} = new{K,V}(Vector{Any}(undef, 32), 0, 0)
+    IdDict{K,V}() where {K, V} = new{K,V}(Memory{Any}(undef, 32), 0, 0)
 
     function IdDict{K,V}(itr) where {K, V}
         d = IdDict{K,V}()
@@ -68,8 +68,8 @@ end
 
 empty(d::IdDict, ::Type{K}, ::Type{V}) where {K, V} = IdDict{K,V}()
 
-function rehash!(d::IdDict, newsz = length(d.ht))
-    d.ht = ccall(:jl_idtable_rehash, Vector{Any}, (Any, Csize_t), d.ht, newsz)
+function rehash!(d::IdDict, newsz = length(d.ht)%UInt)
+    d.ht = ccall(:jl_idtable_rehash, Memory{Any}, (Any, Csize_t), d.ht, newsz)
     d
 end
 
@@ -84,16 +84,16 @@ function sizehint!(d::IdDict, newsz)
 end
 
 function setindex!(d::IdDict{K,V}, @nospecialize(val), @nospecialize(key)) where {K, V}
-    !isa(key, K) && throw(ArgumentError("$(limitrepr(key)) is not a valid key for type $K"))
+    !isa(key, K) && throw(KeyTypeError(K, key))
     if !(val isa V) # avoid a dynamic call
-        val = convert(V, val)
+        val = convert(V, val)::V
     end
     if d.ndel >= ((3*length(d.ht))>>2)
-        rehash!(d, max(length(d.ht)>>1, 32))
+        rehash!(d, max((length(d.ht)%UInt)>>1, 32))
         d.ndel = 0
     end
     inserted = RefValue{Cint}(0)
-    d.ht = ccall(:jl_eqtable_put, Array{Any,1}, (Any, Any, Any, Ptr{Cint}), d.ht, key, val, inserted)
+    d.ht = ccall(:jl_eqtable_put, Memory{Any}, (Any, Any, Any, Ptr{Cint}), d.ht, key, val, inserted)
     d.count += inserted[]
     return d
 end
@@ -133,8 +133,11 @@ function delete!(d::IdDict{K}, @nospecialize(key)) where K
 end
 
 function empty!(d::IdDict)
-    resize!(d.ht, 32)
-    ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), d.ht, 0, sizeof(d.ht))
+    d.ht = Memory{Any}(undef, 32)
+    ht = d.ht
+    t = @_gc_preserve_begin ht
+    memset(unsafe_convert(Ptr{Cvoid}, ht), 0, sizeof(ht))
+    @_gc_preserve_end t
     d.ndel = 0
     d.count = 0
     return d
@@ -143,7 +146,7 @@ end
 _oidd_nextind(a, i) = reinterpret(Int, ccall(:jl_eqtable_nextind, Csize_t, (Any, Csize_t), a, i))
 
 function iterate(d::IdDict{K,V}, idx=0) where {K, V}
-    idx = _oidd_nextind(d.ht, idx)
+    idx = _oidd_nextind(d.ht, idx%UInt)
     idx == -1 && return nothing
     return (Pair{K, V}(d.ht[idx + 1]::K, d.ht[idx + 2]::V), idx + 2)
 end
@@ -155,7 +158,7 @@ copy(d::IdDict) = typeof(d)(d)
 function get!(d::IdDict{K,V}, @nospecialize(key), @nospecialize(default)) where {K, V}
     val = ccall(:jl_eqtable_get, Any, (Any, Any, Any), d.ht, key, secret_table_token)
     if val === secret_table_token
-        val = isa(default, V) ? default : convert(V, default)
+        val = isa(default, V) ? default : convert(V, default)::V
         setindex!(d, val, key)
         return val
     else
diff --git a/base/idset.jl b/base/idset.jl
index 0a4d4275b4231..6e0b7a85a5b23 100644
--- a/base/idset.jl
+++ b/base/idset.jl
@@ -1,13 +1,13 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Like Set, but using IdDict
-mutable struct IdSet{T} <: AbstractSet{T}
-    dict::IdDict{T,Nothing}
-
-    IdSet{T}() where {T} = new(IdDict{T,Nothing}())
-    IdSet{T}(s::IdSet{T}) where {T} = new(copy(s.dict))
+mutable struct IdSet{K} <: AbstractSet{K}
+    list::Memory{Any}
+    idxs::Union{Memory{UInt8}, Memory{UInt16}, Memory{UInt32}}
+    count::Int
+    max::Int # n.b. always <= length(list)
+    IdSet{T}() where {T} = new(Memory{Any}(undef, 0), Memory{UInt8}(undef, 0), 0, 0)
+    IdSet{T}(s::IdSet{T}) where {T} = new(copy(s.list), copy(s.idxs), s.count, s.max)
 end
-
 IdSet{T}(itr) where {T} = union!(IdSet{T}(), itr)
 IdSet() = IdSet{Any}()
 
@@ -15,22 +15,77 @@ copymutable(s::IdSet) = typeof(s)(s)
 emptymutable(s::IdSet{T}, ::Type{U}=T) where {T,U} = IdSet{U}()
 copy(s::IdSet) = typeof(s)(s)
 
-isempty(s::IdSet) = isempty(s.dict)
-length(s::IdSet)  = length(s.dict)
-in(@nospecialize(x), s::IdSet) = haskey(s.dict, x)
-push!(s::IdSet, @nospecialize(x)) = (s.dict[x] = nothing; s)
-pop!(s::IdSet, @nospecialize(x)) = (pop!(s.dict, x); x)
-pop!(s::IdSet, @nospecialize(x), @nospecialize(default)) = (x in s ? pop!(s, x) : default)
-delete!(s::IdSet, @nospecialize(x)) = (delete!(s.dict, x); s)
+haskey(s::IdSet, @nospecialize(key)) = ccall(:jl_idset_peek_bp, Int, (Any, Any, Any), s.list, s.idxs, key) != -1
+isempty(s::IdSet) = s.count == 0
+length(s::IdSet)  = s.count
+in(@nospecialize(x), s::IdSet) = haskey(s, x)
+function push!(s::IdSet, @nospecialize(x))
+    idx = ccall(:jl_idset_peek_bp, Int, (Any, Any, Any), s.list, s.idxs, x)
+    if idx >= 0
+        s.list[idx + 1] = x
+    else
+        if s.max < length(s.list)
+            idx = s.max
+            @assert !isassigned(s.list, idx + 1)
+            s.list[idx + 1] = x
+            s.max = idx + 1
+        else
+            newidx = RefValue{Int}(0)
+            setfield!(s, :list, ccall(:jl_idset_put_key, Any, (Any, Any, Ptr{Int}), s.list, x, newidx))
+            idx = newidx[]
+            s.max = idx < 0 ? -idx : idx + 1
+        end
+        @assert s.list[s.max] === x
+        setfield!(s, :idxs, ccall(:jl_idset_put_idx, Any, (Any, Any, Int), s.list, s.idxs, idx))
+        s.count += 1
+    end
+    s
+end
+function _pop!(s::IdSet, @nospecialize(x))
+    removed = ccall(:jl_idset_pop, Int, (Any, Any, Any), s.list, s.idxs, x)
+    if removed != -1
+        s.count -= 1
+        while s.max > 0 && !isassigned(s.list, s.max)
+            s.max -= 1
+        end
+    end
+    removed
+end
+pop!(s::IdSet, @nospecialize(x)) = _pop!(s, x) == -1 ? throw(KeyError(x)) : x
+pop!(s::IdSet, @nospecialize(x), @nospecialize(default)) = _pop!(s, x) == -1 ? default : x
+delete!(s::IdSet, @nospecialize(x)) = (_pop!(s, x); s)
 
-sizehint!(s::IdSet, newsz) = (sizehint!(s.dict, newsz); s)
-empty!(s::IdSet) = (empty!(s.dict); s)
+function sizehint!(s::IdSet, newsz)
+    # TODO: grow/compact list and perform rehash, if profitable?
+    # TODO: shrink?
+    # s.list = resize(s.list, newsz)
+    # newsz = _tablesz(newsz)
+    # oldsz = length(s.idxs)
+    # #grow at least 25%
+    # if newsz < (oldsz*5)>>2
+    #     return s
+    # end
+    # rehash!(s, newsz)
+    nothing
+end
+
+function empty!(s::IdSet)
+    fill!(s.idxs, 0x00)
+    list = s.list
+    for i = 1:s.max
+        _unsetindex!(list, i)
+    end
+    s.count = 0
+    s.max = 0
+    s
+end
 
 filter!(f, d::IdSet) = unsafe_filter!(f, d)
 
-function iterate(s::IdSet, state...)
-    y = iterate(s.dict, state...)
-    y === nothing && return nothing
-    ((k, _), i) = y
-    return (k, i)
+function iterate(s::IdSet{S}, state=0) where {S}
+    while true
+        state += 1
+        state > s.max && return nothing
+        isassigned(s.list, state) && return s.list[state]::S, state
+    end
 end
diff --git a/base/indices.jl b/base/indices.jl
index c12d4fac69745..810b9fd5b8627 100644
--- a/base/indices.jl
+++ b/base/indices.jl
@@ -30,7 +30,7 @@ to implement indexing (and indexed assignment) with a single `Int` index;
 all other indexing expressions — including multidimensional accesses — will
 be recomputed to the linear index.  For example, if `A` were a `2×3` custom
 matrix with linear indexing, and we referenced `A[1, 3]`, this would be
-recomputed to the equivalent linear index and call `A[5]` since `2*1 + 3 = 5`.
+recomputed to the equivalent linear index and call `A[5]` since `1 + 2*(3 - 1) = 5`.
 
 See also [`IndexCartesian`](@ref).
 """
@@ -53,7 +53,7 @@ to implement indexing (and indexed assignment) with exactly `N` `Int` indices;
 all other indexing expressions — including linear indexing — will
 be recomputed to the equivalent Cartesian location.  For example, if `A` were a `2×3` custom
 matrix with cartesian indexing, and we referenced `A[5]`, this would be
-recomputed to the equivalent Cartesian index and call `A[1, 3]` since `5 = 2*1 + 3`.
+recomputed to the equivalent Cartesian index and call `A[1, 3]` since `5 = 1 + 2*(3 - 1)`.
 
 It is significantly more expensive to compute Cartesian indices from a linear index than it is
 to go the other way.  The former operation requires division — a very costly operation — whereas
@@ -92,7 +92,7 @@ particular, [`eachindex`](@ref) creates an iterator whose type depends
 on the setting of this trait.
 """
 IndexStyle(A::AbstractArray) = IndexStyle(typeof(A))
-IndexStyle(::Type{Union{}}) = IndexLinear()
+IndexStyle(::Type{Union{}}, slurp...) = IndexLinear()
 IndexStyle(::Type{<:AbstractArray}) = IndexCartesian()
 IndexStyle(::Type{<:Array}) = IndexLinear()
 IndexStyle(::Type{<:AbstractRange}) = IndexLinear()
@@ -476,7 +476,7 @@ struct LinearIndices{N,R<:NTuple{N,AbstractUnitRange{Int}}} <: AbstractArray{Int
     indices::R
 end
 convert(::Type{LinearIndices{N,R}}, inds::LinearIndices{N}) where {N,R<:NTuple{N,AbstractUnitRange{Int}}} =
-    LinearIndices{N,R}(convert(R, inds.indices))
+    LinearIndices{N,R}(convert(R, inds.indices))::LinearIndices{N,R}
 
 LinearIndices(::Tuple{}) = LinearIndices{0,typeof(())}(())
 LinearIndices(inds::NTuple{N,AbstractUnitRange{<:Integer}}) where {N} =
@@ -485,7 +485,7 @@ LinearIndices(inds::NTuple{N,Union{<:Integer,AbstractUnitRange{<:Integer}}}) whe
     LinearIndices(map(_convert2ind, inds))
 LinearIndices(A::Union{AbstractArray,SimpleVector}) = LinearIndices(axes(A))
 
-_convert2ind(i::Integer) = Base.OneTo(i)
+_convert2ind(i::Integer) = oneto(i)
 _convert2ind(ind::AbstractUnitRange) = first(ind):last(ind)
 
 function indices_promote_type(::Type{Tuple{R1,Vararg{R1,N}}}, ::Type{Tuple{R2,Vararg{R2,N}}}) where {R1,R2,N}
@@ -504,6 +504,7 @@ promote_rule(a::Type{IdentityUnitRange{T1}}, b::Type{IdentityUnitRange{T2}}) whe
 IndexStyle(::Type{<:LinearIndices}) = IndexLinear()
 axes(iter::LinearIndices) = map(axes1, iter.indices)
 size(iter::LinearIndices) = map(length, iter.indices)
+isassigned(iter::LinearIndices, i::Int) = checkbounds(Bool, iter, i)
 function getindex(iter::LinearIndices, i::Int)
     @inline
     @boundscheck checkbounds(iter, i)
diff --git a/base/initdefs.jl b/base/initdefs.jl
index 89fc88b0673a3..6309781579500 100644
--- a/base/initdefs.jl
+++ b/base/initdefs.jl
@@ -25,7 +25,7 @@ Stop the program with an exit code. The default exit code is zero, indicating th
 program completed successfully. In an interactive session, `exit()` can be called with
 the keyboard shortcut `^D`.
 """
-exit(n) = ccall(:jl_exit, Cvoid, (Int32,), n)
+exit(n) = ccall(:jl_exit, Union{}, (Int32,), n)
 exit() = exit(0)
 
 const roottask = current_task()
@@ -73,13 +73,21 @@ environment variable if set.
 Each entry in `DEPOT_PATH` is a path to a directory which contains subdirectories used by Julia for various purposes.
 Here is an overview of some of the subdirectories that may exist in a depot:
 
+* `artifacts`: Contains content that packages use for which Pkg manages the installation of.
 * `clones`: Contains full clones of package repos. Maintained by `Pkg.jl` and used as a cache.
+* `config`: Contains julia-level configuration such as a `startup.jl`
 * `compiled`: Contains precompiled `*.ji` files for packages. Maintained by Julia.
 * `dev`: Default directory for `Pkg.develop`. Maintained by `Pkg.jl` and the user.
 * `environments`: Default package environments. For instance the global environment for a specific julia version. Maintained by `Pkg.jl`.
 * `logs`: Contains logs of `Pkg` and `REPL` operations. Maintained by `Pkg.jl` and `Julia`.
 * `packages`: Contains packages, some of which were explicitly installed and some which are implicit dependencies. Maintained by `Pkg.jl`.
 * `registries`: Contains package registries. By default only `General`. Maintained by `Pkg.jl`.
+* `scratchspaces`: Contains content that a package itself installs via the [`Scratch.jl`](https://github.com/JuliaPackaging/Scratch.jl) package. `Pkg.gc()` will delete content that is known to be unused.
+
+!!! note
+    Packages that want to store content should use the `scratchspaces` subdirectory via
+    [`Scratch.jl`](https://github.com/JuliaPackaging/Scratch.jl) instead of creating new
+    subdirectories in the depot root.
 
 See also [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH), and
 [Code Loading](@ref code-loading).
@@ -247,11 +255,24 @@ end
 function load_path_expand(env::AbstractString)::Union{String, Nothing}
     # named environment?
     if startswith(env, '@')
-        # `@` in JULIA_LOAD_PATH is expanded early (at startup time)
-        # if you put a `@` in LOAD_PATH manually, it's expanded late
+        # `@.` in JULIA_LOAD_PATH is expanded early (at startup time)
+        # if you put a `@.` in LOAD_PATH manually, it's expanded late
         env == "@" && return active_project(false)
         env == "@." && return current_project()
         env == "@stdlib" && return Sys.STDLIB
+        if startswith(env, "@scriptdir")
+            if @isdefined(PROGRAM_FILE)
+                dir = dirname(PROGRAM_FILE)
+            else
+                cmds = unsafe_load_commands(opts.commands)
+                if any((cmd, arg)->cmd_suppresses_program(cmd), cmds)
+                    # Usage error. The user did not pass a script.
+                    return nothing
+                end
+                dir = dirname(ARGS[1])
+            end
+            return abspath(replace(env, "@scriptdir" => dir))
+        end
         env = replace(env, '#' => VERSION.major, count=1)
         env = replace(env, '#' => VERSION.minor, count=1)
         env = replace(env, '#' => VERSION.patch, count=1)
@@ -315,6 +336,9 @@ end
     set_active_project(projfile::Union{AbstractString,Nothing})
 
 Set the active `Project.toml` file to `projfile`. See also [`Base.active_project`](@ref).
+
+!!! compat "Julia 1.8"
+    This function requires at least Julia 1.8.
 """
 function set_active_project(projfile::Union{AbstractString,Nothing})
     ACTIVE_PROJECT[] = projfile
@@ -333,6 +357,10 @@ end
 
 Return the fully expanded value of [`LOAD_PATH`](@ref) that is searched for projects and
 packages.
+
+!!! note
+    `load_path` may return a reference to a cached value so it is not safe to modify the
+    returned vector.
 """
 function load_path()
     cache = LOADING_CACHE[]
@@ -347,32 +375,72 @@ end
 
 ## atexit: register exit hooks ##
 
-const atexit_hooks = Callable[
-    () -> Filesystem.temp_cleanup_purge(force=true)
-]
+const atexit_hooks = Callable[]
+const _atexit_hooks_lock = ReentrantLock()
+global _atexit_hooks_finished::Bool = false
 
 """
     atexit(f)
 
-Register a zero-argument function `f()` to be called at process exit. `atexit()` hooks are
-called in last in first out (LIFO) order and run before object finalizers.
+Register a zero- or one-argument function `f()` to be called at process exit.
+`atexit()` hooks are called in last in first out (LIFO) order and run before
+object finalizers.
+
+If `f` has a method defined for one integer argument, it will be called as
+`f(n::Int32)`, where `n` is the current exit code, otherwise it will be called
+as `f()`.
+
+!!! compat "Julia 1.9"
+    The one-argument form requires Julia 1.9
 
 Exit hooks are allowed to call `exit(n)`, in which case Julia will exit with
 exit code `n` (instead of the original exit code). If more than one exit hook
 calls `exit(n)`, then Julia will exit with the exit code corresponding to the
 last called exit hook that calls `exit(n)`. (Because exit hooks are called in
 LIFO order, "last called" is equivalent to "first registered".)
+
+Note: Once all exit hooks have been called, no more exit hooks can be registered,
+and any call to `atexit(f)` after all hooks have completed will throw an exception.
+This situation may occur if you are registering exit hooks from background Tasks that
+may still be executing concurrently during shutdown.
 """
-atexit(f::Function) = (pushfirst!(atexit_hooks, f); nothing)
+function atexit(f::Function)
+    Base.@lock _atexit_hooks_lock begin
+        _atexit_hooks_finished && error("cannot register new atexit hook; already exiting.")
+        pushfirst!(atexit_hooks, f)
+        return nothing
+    end
+end
+
+function _atexit(exitcode::Cint)
+    # Don't hold the lock around the iteration, just in case any other thread executing in
+    # parallel tries to register a new atexit hook while this is running. We don't want to
+    # block that thread from proceeding, and we can allow it to register its hook which we
+    # will immediately run here.
+    while true
+        local f
+        @lock _atexit_hooks_lock begin
+            # If this is the last iteration, atomically disable atexit hooks to prevent
+            # someone from registering a hook that will never be run.
+            # (We do this inside the loop, so that it is atomic: no one can have registered
+            #  a hook that never gets run, and we run all the hooks we know about until
+            #  the vector is empty.)
+            if isempty(atexit_hooks)
+                global _atexit_hooks_finished = true
+                break
+            end
 
-function _atexit()
-    while !isempty(atexit_hooks)
-        f = popfirst!(atexit_hooks)
+            f = popfirst!(atexit_hooks)
+        end
         try
-            f()
+            if hasmethod(f, (Cint,))
+                f(exitcode)
+            else
+                f()
+            end
         catch ex
             showerror(stderr, ex)
-            Base.show_backtrace(stderr, catch_backtrace())
+            show_backtrace(stderr, catch_backtrace())
             println(stderr)
         end
     end
@@ -392,7 +460,7 @@ function _postoutput()
             f()
         catch ex
             showerror(stderr, ex)
-            Base.show_backtrace(stderr, catch_backtrace())
+            show_backtrace(stderr, catch_backtrace())
             println(stderr)
         end
     end
diff --git a/base/int.jl b/base/int.jl
index 8f60312551086..61576d4360835 100644
--- a/base/int.jl
+++ b/base/int.jl
@@ -174,8 +174,12 @@ julia> abs(-3)
 julia> abs(1 + im)
 1.4142135623730951
 
-julia> abs(typemin(Int64))
--9223372036854775808
+julia> abs.(Int8[-128 -127 -126 0 126 127])  # overflow at typemin(Int8)
+1×6 Matrix{Int8}:
+ -128  127  126  0  126  127
+
+julia> maximum(abs, [1, -2, 3, -4])
+4
 ```
 """
 function abs end
@@ -198,8 +202,11 @@ See also: [`signed`](@ref), [`sign`](@ref), [`signbit`](@ref).
 julia> unsigned(-2)
 0xfffffffffffffffe
 
-julia> unsigned(2)
-0x0000000000000002
+julia> unsigned(Int8(2))
+0x02
+
+julia> typeof(ans)
+UInt8
 
 julia> signed(unsigned(-2))
 -2
@@ -475,6 +482,32 @@ julia> trailing_ones(3)
 """
 trailing_ones(x::Integer) = trailing_zeros(~x)
 
+"""
+    top_set_bit(x::Integer) -> Integer
+
+The number of bits in `x`'s binary representation, excluding leading zeros.
+
+Equivalently, the position of the most significant set bit in `x`'s binary
+representation, measured from the least significant side.
+
+Negative `x` are only supported when `x::BitSigned`.
+
+See also: [`ndigits0z`](@ref), [`ndigits`](@ref).
+
+# Examples
+```jldoctest
+julia> Base.top_set_bit(4)
+3
+
+julia> Base.top_set_bit(0)
+0
+
+julia> Base.top_set_bit(-1)
+64
+```
+"""
+top_set_bit(x::BitInteger) = 8sizeof(x) - leading_zeros(x)
+
 ## integer comparisons ##
 
 (< )(x::T, y::T) where {T<:BitUnsigned} = ult_int(x, y)
@@ -507,11 +540,11 @@ trailing_ones(x::Integer) = trailing_zeros(~x)
 
 for to in BitInteger_types, from in (BitInteger_types..., Bool)
     if !(to === from)
-        if to.size < from.size
+        if Core.sizeof(to) < Core.sizeof(from)
             @eval rem(x::($from), ::Type{$to}) = trunc_int($to, x)
         elseif from === Bool
             @eval rem(x::($from), ::Type{$to}) = convert($to, x)
-        elseif from.size < to.size
+        elseif Core.sizeof(from) < Core.sizeof(to)
             if from <: Signed
                 @eval rem(x::($from), ::Type{$to}) = sext_int($to, x)
             else
@@ -571,8 +604,17 @@ if nameof(@__MODULE__) === :Base
 
         # Examples
         ```jldoctest
-        julia> 129 % Int8
+        julia> x = 129 % Int8
         -127
+
+        julia> typeof(x)
+        Int8
+
+        julia> x = 129 % BigInt
+        129
+
+        julia> typeof(x)
+        BigInt
         ```
         """ $fname(x::Integer, T::Type{<:Integer})
     end
@@ -587,70 +629,6 @@ mod(x::Integer, ::Type{T}) where {T<:Integer} = rem(x, T)
 
 unsafe_trunc(::Type{T}, x::Integer) where {T<:Integer} = rem(x, T)
 
-"""
-    trunc([T,] x)
-    trunc(x; digits::Integer= [, base = 10])
-    trunc(x; sigdigits::Integer= [, base = 10])
-
-`trunc(x)` returns the nearest integral value of the same type as `x` whose absolute value
-is less than or equal to the absolute value of `x`.
-
-`trunc(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is
-not representable.
-
-Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
-
-See also: [`%`](@ref rem), [`floor`](@ref), [`unsigned`](@ref), [`unsafe_trunc`](@ref).
-
-# Examples
-```jldoctest
-julia> trunc(2.22)
-2.0
-
-julia> trunc(-2.22, digits=1)
--2.2
-
-julia> trunc(Int, -2.22)
--2
-```
-"""
-function trunc end
-
-"""
-    floor([T,] x)
-    floor(x; digits::Integer= [, base = 10])
-    floor(x; sigdigits::Integer= [, base = 10])
-
-`floor(x)` returns the nearest integral value of the same type as `x` that is less than or
-equal to `x`.
-
-`floor(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is
-not representable.
-
-Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
-"""
-function floor end
-
-"""
-    ceil([T,] x)
-    ceil(x; digits::Integer= [, base = 10])
-    ceil(x; sigdigits::Integer= [, base = 10])
-
-`ceil(x)` returns the nearest integral value of the same type as `x` that is greater than or
-equal to `x`.
-
-`ceil(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is not
-representable.
-
-Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
-"""
-function ceil end
-
-round(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
-trunc(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
-floor(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
- ceil(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
-
 ## integer construction ##
 
 """
@@ -712,6 +690,15 @@ julia> big"_"
 ERROR: ArgumentError: invalid number format _ for BigInt or BigFloat
 [...]
 ```
+
+!!! warning
+    Using `@big_str` for constructing [`BigFloat`](@ref) values may not result
+    in the behavior that might be naively expected: as a macro, `@big_str`
+    obeys the global precision ([`setprecision`](@ref)) and rounding mode
+    ([`setrounding`](@ref)) settings as they are at *load time*. Thus, a
+    function like `() -> precision(big"0.3")` returns a constant whose value
+    depends on the value of the precision at the point when the function is
+    defined, **not** at the precision at the time when the function is called.
 """
 macro big_str(s)
     message = "invalid number format $s for BigInt or BigFloat"
@@ -765,13 +752,24 @@ promote_rule(::Type{UInt128}, ::Type{Int128}) = UInt128
 
 The lowest value representable by the given (real) numeric DataType `T`.
 
+See also: [`floatmin`](@ref), [`typemax`](@ref), [`eps`](@ref).
+
 # Examples
 ```jldoctest
+julia> typemin(Int8)
+-128
+
+julia> typemin(UInt32)
+0x00000000
+
 julia> typemin(Float16)
 -Inf16
 
 julia> typemin(Float32)
 -Inf32
+
+julia> nextfloat(-Inf32)  # smallest finite Float32 floating point number
+-3.4028235f38
 ```
 """
 function typemin end
@@ -794,7 +792,10 @@ julia> typemax(UInt32)
 julia> typemax(Float64)
 Inf
 
-julia> floatmax(Float32)  # largest finite floating point number
+julia> typemax(Float32)
+Inf32
+
+julia> floatmax(Float32)  # largest finite Float32 floating point number
 3.4028235f38
 ```
 """
diff --git a/base/intfuncs.jl b/base/intfuncs.jl
index 00632667f659e..ea35da0910736 100644
--- a/base/intfuncs.jl
+++ b/base/intfuncs.jl
@@ -48,33 +48,46 @@ function gcd(a::T, b::T) where T<:Integer
 end
 
 function gcd(a::T, b::T) where T<:BitInteger
-    a == 0 && return checked_abs(b)
-    b == 0 && return checked_abs(a)
-    r = _gcd(a, b)
-    signbit(r) && __throw_gcd_overflow(a, b)
-    return r
+    a == 0 && return Base.checked_abs(b)
+    b == 0 && return Base.checked_abs(a)
+    if a isa Signed && a == typemin(T)
+        if a == b
+            Base.__throw_gcd_overflow(a, b)
+        else
+            a, b = b, a
+        end
+    end
+    return _gcd(a, b)
 end
 @noinline __throw_gcd_overflow(a, b) =
     throw(OverflowError(LazyString("gcd(", a, ", ", b, ") overflows")))
 
+function absdiff(x::T,y::T) where {T<:Unsigned}
+    d = max(x,y) - min(x,y)
+    d, d
+end
+function absdiff(x::T,y::T) where {T<:Signed}
+    d = x - y
+    abs(d), d
+end
 # binary GCD (aka Stein's) algorithm
 # about 1.7x (2.1x) faster for random Int64s (Int128s)
 # Unfortunately, we need to manually annotate this as `@assume_effects :terminates_locally` to work around #41694.
 # Since this is used in the Rational constructor, constant folding is something we do care about here.
-@assume_effects :terminates_locally function _gcd(a::T, b::T) where T<:BitInteger
-    za = trailing_zeros(a)
-    zb = trailing_zeros(b)
+@assume_effects :terminates_locally function _gcd(ain::T, bin::T) where T<:BitInteger
+    zb = trailing_zeros(bin)
+    za = trailing_zeros(ain)
+    a = abs(ain)
+    b = abs(bin >> zb)
     k = min(za, zb)
-    u = unsigned(abs(a >> za))
-    v = unsigned(abs(b >> zb))
-    while u != v
-        if u > v
-            u, v = v, u
-        end
-        v -= u
-        v >>= trailing_zeros(v)
+    while a != 0
+        a >>= za
+        absd, diff = absdiff(a, b)
+        za = trailing_zeros(diff)
+        b = min(a, b)
+        a = absd
     end
-    r = u << k
+    r = b << k
     return r % T
 end
 
@@ -177,7 +190,7 @@ julia> gcdx(240, 46)
     Bézout coefficients that are computed by the extended Euclidean algorithm.
     (Ref: D. Knuth, TAoCP, 2/e, p. 325, Algorithm X.)
     For signed integers, these coefficients `u` and `v` are minimal in
-    the sense that ``|u| < |y/d|`` and ``|v| < |x/d|``. Furthermore,
+    the sense that ``|u| < |b/d|`` and ``|v| < |a/d|``. Furthermore,
     the signs of `u` and `v` are chosen so that `d` is positive.
     For unsigned integers, the coefficients `u` and `v` might be near
     their `typemax`, and the identity then holds only via the unsigned
@@ -188,7 +201,7 @@ Base.@assume_effects :terminates_locally function gcdx(a::Integer, b::Integer)
     # a0, b0 = a, b
     s0, s1 = oneunit(T), zero(T)
     t0, t1 = s1, s0
-    # The loop invariant is: s0*a0 + t0*b0 == a
+    # The loop invariant is: s0*a0 + t0*b0 == a && s1*a0 + t1*b0 == b
     x = a % T
     y = b % T
     while y != 0
@@ -205,7 +218,7 @@ gcdx(a::T, b::T) where T<:Real = throw(MethodError(gcdx, (a,b)))
 # multiplicative inverse of n mod m, error if none
 
 """
-    invmod(n, m)
+    invmod(n::Integer, m::Integer)
 
 Take the inverse of `n` modulo `m`: `y` such that ``n y = 1 \\pmod m``,
 and ``div(y,m) = 0``. This will throw an error if ``m = 0``, or if
@@ -244,6 +257,43 @@ function invmod(n::Integer, m::Integer)
     return mod(x, m)
 end
 
+"""
+    invmod(n::Integer, T) where {T <: Base.BitInteger}
+    invmod(n::T) where {T <: Base.BitInteger}
+
+Compute the modular inverse of `n` in the integer ring of type `T`, i.e. modulo
+`2^N` where `N = 8*sizeof(T)` (e.g. `N = 32` for `Int32`). In other words these
+methods satisfy the following identities:
+```
+n * invmod(n) == 1
+(n * invmod(n, T)) % T == 1
+(n % T) * invmod(n, T) == 1
+```
+Note that `*` here is modular multiplication in the integer ring, `T`.
+
+Specifying the modulus implied by an integer type as an explicit value is often
+inconvenient since the modulus is by definition too big to be represented by the
+type.
+
+The modular inverse is computed much more efficiently than the general case
+using the algorithm described in https://arxiv.org/pdf/2204.04342.pdf.
+
+!!! compat "Julia 1.11"
+    The `invmod(n)` and `invmod(n, T)` methods require Julia 1.11 or later.
+"""
+invmod(n::Integer, ::Type{T}) where {T<:BitInteger} = invmod(n % T)
+
+function invmod(n::T) where {T<:BitInteger}
+    isodd(n) || throw(DomainError(n, "Argument must be odd."))
+    x = (3*n ⊻ 2) % T
+    y = (1 - n*x) % T
+    for _ = 1:trailing_zeros(2*sizeof(T))
+        x *= y + true
+        y *= y
+    end
+    return x
+end
+
 # ^ for any x supporting *
 to_power_type(x) = convert(Base._return_type(*, Tuple{typeof(x), typeof(x)}), x)
 @noinline throw_domerr_powbysq(::Any, p) = throw(DomainError(p, LazyString(
@@ -369,13 +419,26 @@ julia> powermod(5, 3, 19)
 ```
 """
 function powermod(x::Integer, p::Integer, m::T) where T<:Integer
-    p < 0 && return powermod(invmod(x, m), -p, m)
     p == 0 && return mod(one(m),m)
+    # When the concrete type of p is signed and has the lowest value,
+    # `p != 0 && p == -p` is equivalent to `p == typemin(typeof(p))` for 2's complement representation.
+    # but will work for integer types like `BigInt` that don't have `typemin` defined
+    # It needs special handling otherwise will cause overflow problem.
+    if p == -p
+        imod = invmod(x, m)
+        rhalf = powermod(imod, -(p÷2), m)
+        r::T = mod(widemul(rhalf, rhalf), m)
+        isodd(p) && (r = mod(widemul(r, imod), m))
+        #else odd
+        return r
+    elseif p < 0
+        return powermod(invmod(x, m), -p, m)
+    end
     (m == 1 || m == -1) && return zero(m)
     b = oftype(m,mod(x,m))  # this also checks for divide by zero
 
     t = prevpow(2, p)
-    r::T = 1
+    r = 1
     while true
         if p >= t
             r = mod(widemul(r,b),m)
@@ -391,9 +454,9 @@ end
 # optimization: promote the modulus m to BigInt only once (cf. widemul in generic powermod above)
 powermod(x::Integer, p::Integer, m::Union{Int128,UInt128}) = oftype(m, powermod(x, p, big(m)))
 
-_nextpow2(x::Unsigned) = oneunit(x)<<((sizeof(x)<<3)-leading_zeros(x-oneunit(x)))
+_nextpow2(x::Unsigned) = oneunit(x)<<(top_set_bit(x-oneunit(x)))
 _nextpow2(x::Integer) = reinterpret(typeof(x),x < 0 ? -_nextpow2(unsigned(-x)) : _nextpow2(unsigned(x)))
-_prevpow2(x::Unsigned) = one(x) << unsigned((sizeof(x)<<3)-leading_zeros(x)-1)
+_prevpow2(x::Unsigned) = one(x) << unsigned(top_set_bit(x)-1)
 _prevpow2(x::Integer) = reinterpret(typeof(x),x < 0 ? -_prevpow2(unsigned(-x)) : _prevpow2(unsigned(x)))
 
 """
@@ -526,13 +589,13 @@ const powers_of_ten = [
     0x002386f26fc10000, 0x016345785d8a0000, 0x0de0b6b3a7640000, 0x8ac7230489e80000,
 ]
 function bit_ndigits0z(x::Base.BitUnsigned64)
-    lz = (sizeof(x)<<3)-leading_zeros(x)
+    lz = top_set_bit(x)
     nd = (1233*lz)>>12+1
     nd -= x < powers_of_ten[nd]
 end
 function bit_ndigits0z(x::UInt128)
     n = 0
-    while x > 0x8ac7230489e80000
+    while x > 0x8ac7230489e80000 # 10e18
         x = div(x,0x8ac7230489e80000)
         n += 19
     end
@@ -571,12 +634,12 @@ function ndigits0zpb(x::Integer, b::Integer)
     x = abs(x)
     if x isa Base.BitInteger
         x = unsigned(x)::Unsigned
-        b == 2  && return sizeof(x)<<3 - leading_zeros(x)
-        b == 8  && return (sizeof(x)<<3 - leading_zeros(x) + 2) ÷ 3
+        b == 2  && return top_set_bit(x)
+        b == 8  && return (top_set_bit(x) + 2) ÷ 3
         b == 16 && return sizeof(x)<<1 - leading_zeros(x)>>2
         b == 10 && return bit_ndigits0z(x)
         if ispow2(b)
-            dv, rm = divrem(sizeof(x)<<3 - leading_zeros(x), trailing_zeros(b))
+            dv, rm = divrem(top_set_bit(x), trailing_zeros(b))
             return iszero(rm) ? dv : dv + 1
         end
     end
@@ -638,6 +701,9 @@ function ndigits0z(x::Integer, b::Integer)
     end
 end
 
+# Extends the definition in base/int.jl
+top_set_bit(x::Integer) = ceil(Integer, log2(x + oneunit(x)))
+
 """
     ndigits(n::Integer; base::Integer=10, pad::Integer=1)
 
@@ -649,6 +715,9 @@ See also [`digits`](@ref), [`count_ones`](@ref).
 
 # Examples
 ```jldoctest
+julia> ndigits(0)
+1
+
 julia> ndigits(12345)
 5
 
@@ -670,7 +739,7 @@ ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, ba
 ## integer to string functions ##
 
 function bin(x::Unsigned, pad::Int, neg::Bool)
-    m = 8 * sizeof(x) - leading_zeros(x)
+    m = top_set_bit(x)
     n = neg + max(pad, m)
     a = StringVector(n)
     # for i in 0x0:UInt(n-1) # automatic vectorization produces redundant codes
@@ -692,12 +761,12 @@ function bin(x::Unsigned, pad::Int, neg::Bool)
         x >>= 0x1
         i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
     String(a)
 end
 
 function oct(x::Unsigned, pad::Int, neg::Bool)
-    m = div(8 * sizeof(x) - leading_zeros(x) + 2, 3)
+    m = div(top_set_bit(x) + 2, 3)
     n = neg + max(pad, m)
     a = StringVector(n)
     i = n
@@ -706,29 +775,77 @@ function oct(x::Unsigned, pad::Int, neg::Bool)
         x >>= 0x3
         i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
     String(a)
 end
 
 # 2-digit decimal characters ("00":"99")
-const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
+const _dec_d100 = UInt16[
+# generating expression: UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
+#    0 0,    0 1,    0 2,    0 3, and so on in little-endian
+  0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930,
+  0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931,
+  0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932,
+  0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933,
+  0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934,
+  0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935,
+  0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936,
+  0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937,
+  0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938,
+  0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939
+]
 
-function dec(x::Unsigned, pad::Int, neg::Bool)
-    n = neg + ndigits(x, pad=pad)
-    a = StringVector(n)
-    i = n
-    @inbounds while i >= 2
-        d, r = divrem(x, 0x64)
-        d100 = _dec_d100[(r % Int)::Int + 1]
-        a[i-1] = d100 % UInt8
-        a[i] = (d100 >> 0x8) % UInt8
-        x = oftype(x, d)
+function append_c_digits(olength::Int, digits::Unsigned, buf, pos::Int)
+    i = olength
+    while i >= 2
+        d, c = divrem(digits, 0x64)
+        digits = oftype(digits, d)
+        @inbounds d100 = _dec_d100[(c % Int) + 1]
+        @inbounds buf[pos + i - 2] = d100 % UInt8
+        @inbounds buf[pos + i - 1] = (d100 >> 0x8) % UInt8
         i -= 2
     end
-    if i > neg
-        @inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
+    if i == 1
+        @inbounds buf[pos] = UInt8('0') + rem(digits, 0xa) % UInt8
+        i -= 1
+    end
+    return pos + olength
+end
+
+function append_nine_digits(digits::Unsigned, buf, pos::Int)
+    if digits == 0
+        for _ = 1:9
+            @inbounds buf[pos] = UInt8('0')
+            pos += 1
+        end
+        return pos
+    end
+    return @inline append_c_digits(9, digits, buf, pos) # force loop-unrolling on the length
+end
+
+function append_c_digits_fast(olength::Int, digits::Unsigned, buf, pos::Int)
+    i = olength
+    # n.b. olength may be larger than required to print all of `digits` (and will be padded
+    # with zeros), but the printed number will be undefined if it is smaller, and may include
+    # bits of both the high and low bytes.
+    maxpow10 = 0x3b9aca00 # 10e9 as UInt32
+    while i > 9 && digits > typemax(UInt)
+        # do everything in cheap math chunks, using the processor's native math size
+        d, c = divrem(digits, maxpow10)
+        digits = oftype(digits, d)
+        append_nine_digits(c % UInt32, buf, pos + i - 9)
+        i -= 9
     end
-    if neg; @inbounds a[1]=0x2d; end
+    append_c_digits(i, digits % UInt, buf, pos)
+    return pos + olength
+end
+
+
+function dec(x::Unsigned, pad::Int, neg::Bool)
+    n = neg + ndigits(x, pad=pad)
+    a = StringVector(n)
+    append_c_digits_fast(n, x, a, 1)
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
     String(a)
 end
 
@@ -749,7 +866,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool)
         d = (x % UInt8)::UInt8 & 0xf
         @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30)
     end
-    if neg; @inbounds a[1]=0x2d; end
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
     String(a)
 end
 
@@ -774,7 +891,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
         end
         i -= 1
     end
-    if neg; @inbounds a[1]=0x2d; end
+    neg && (@inbounds a[1] = 0x2d) # UInt8('-')
     String(a)
 end
 
@@ -901,7 +1018,7 @@ end
 """
     hastypemax(T::Type) -> Bool
 
-Return true if and only if the extrema `typemax(T)` and `typemin(T)` are defined.
+Return `true` if and only if the extrema `typemax(T)` and `typemin(T)` are defined.
 """
 hastypemax(::Base.BitIntegerType) = true
 hastypemax(::Type{Bool}) = true
@@ -1015,7 +1132,7 @@ julia> factorial(big(21))
 * [Factorial](https://en.wikipedia.org/wiki/Factorial) on Wikipedia.
 """
 function factorial(n::Integer)
-    n < 0 && throw(DomainError(n, "`n` must be nonnegative."))
+    n < 0 && throw(DomainError(n, "`n` must be non-negative."))
     f::typeof(n*n) = 1
     for i::typeof(n*n) = 2:n
         f *= i
@@ -1079,9 +1196,40 @@ Base.@assume_effects :terminates_locally function binomial(n::T, k::T) where T<:
     while rr <= k
         xt = div(widemul(x, nn), rr)
         x = xt % T
-        x == xt || throw(OverflowError(LazyString("binomial(", n0, ", ", k0, " overflows")))
+        x == xt || throw(OverflowError(LazyString("binomial(", n0, ", ", k0, ") overflows")))
         rr += one(T)
         nn += one(T)
     end
     copysign(x, sgn)
 end
+
+"""
+    binomial(x::Number, k::Integer)
+
+The generalized binomial coefficient, defined for `k ≥ 0` by
+the polynomial
+```math
+\\frac{1}{k!} \\prod_{j=0}^{k-1} (x - j)
+```
+When `k < 0` it returns zero.
+
+For the case of integer `x`, this is equivalent to the ordinary
+integer binomial coefficient
+```math
+\\binom{n}{k} = \\frac{n!}{k! (n-k)!}
+```
+
+Further generalizations to non-integer `k` are mathematically possible, but
+involve the Gamma function and/or the beta function, which are
+not provided by the Julia standard library but are available
+in external packages such as [SpecialFunctions.jl](https://github.com/JuliaMath/SpecialFunctions.jl).
+
+# External links
+* [Binomial coefficient](https://en.wikipedia.org/wiki/Binomial_coefficient) on Wikipedia.
+"""
+function binomial(x::Number, k::Integer)
+    k < 0 && return zero(x)/one(k)
+    # we don't use prod(i -> (x-i+1), 1:k) / factorial(k),
+    # and instead divide each term by i, to avoid spurious overflow.
+    return prod(i -> (x-(i-1))/i, OneTo(k), init=oneunit(x)/one(k))
+end
diff --git a/base/io.jl b/base/io.jl
index ca96075a1b11e..fd286297ec090 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -25,6 +25,14 @@ end
 
 lock(::IO) = nothing
 unlock(::IO) = nothing
+
+"""
+    reseteof(io)
+
+Clear the EOF flag from IO so that further reads (and possibly writes) are
+again allowed. Note that it may immediately get re-set, if the underlying
+stream object is at EOF and cannot be resumed.
+"""
 reseteof(x::IO) = nothing
 
 const SZ_UNBUFFERED_IO = 65536
@@ -68,6 +76,10 @@ Shutdown the write half of a full-duplex I/O stream. Performs a [`flush`](@ref)
 first. Notify the other end that no more data will be written to the underlying
 file. This is not supported by all IO types.
 
+If implemented, `closewrite` causes subsequent `read` or `eof` calls that would
+block to instead throw EOF or return true, respectively. If the stream is
+already closed, this is idempotent.
+
 # Examples
 ```jldoctest
 julia> io = Base.BufferStream(); # this never blocks, so we can read and write on the same Task
@@ -173,6 +185,19 @@ function will block to wait for more data if necessary, and then return `false`.
 it is always safe to read one byte after seeing `eof` return `false`. `eof` will return
 `false` as long as buffered data is still available, even if the remote end of a connection
 is closed.
+
+# Examples
+```jldoctest
+julia> b = IOBuffer("my buffer");
+
+julia> eof(b)
+false
+
+julia> seekend(b);
+
+julia> eof(b)
+true
+```
 """
 function eof end
 
@@ -206,10 +231,11 @@ julia> read(io, String)
 ```
 """
 read(stream, t)
+read(stream, ::Type{Union{}}, slurp...; kwargs...) = error("cannot read a value of type Union{}")
+
 
 """
     write(io::IO, x)
-    write(filename::AbstractString, x)
 
 Write the canonical binary representation of a value to the given I/O stream or file.
 Return the number of bytes written into the stream. See also [`print`](@ref) to
@@ -426,10 +452,10 @@ for f in (
 end
 read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io)::IO, byte)::UInt8
 unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io)::IO, p, nb)
-readuntil(io::AbstractPipe, arg::UInt8; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
-readuntil(io::AbstractPipe, arg::AbstractChar; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
-readuntil(io::AbstractPipe, arg::AbstractString; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
-readuntil(io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
+copyuntil(out::IO, io::AbstractPipe, arg::UInt8; kw...) = copyuntil(out, pipe_reader(io)::IO, arg; kw...)
+copyuntil(out::IO, io::AbstractPipe, arg::AbstractChar; kw...) = copyuntil(out, pipe_reader(io)::IO, arg; kw...)
+copyuntil(out::IO, io::AbstractPipe, arg::AbstractString; kw...) = copyuntil(out, pipe_reader(io)::IO, arg; kw...)
+copyuntil(out::IO, io::AbstractPipe, arg::AbstractVector; kw...) = copyuntil(out, pipe_reader(io)::IO, arg; kw...)
 readuntil_vector!(io::AbstractPipe, target::AbstractVector, keep::Bool, out) = readuntil_vector!(pipe_reader(io)::IO, target, keep, out)
 readbytes!(io::AbstractPipe, target::AbstractVector{UInt8}, n=length(target)) = readbytes!(pipe_reader(io)::IO, target, n)
 peek(io::AbstractPipe, ::Type{T}) where {T} = peek(pipe_reader(io)::IO, T)::T
@@ -443,17 +469,29 @@ wait_close(io::AbstractPipe) = (wait_close(pipe_writer(io)::IO); wait_close(pipe
 
 # Exception-safe wrappers (io = open(); try f(io) finally close(io))
 
+
+"""
+    write(filename::AbstractString, content)
+
+Write the canonical binary representation of `content` to a file, which will be created if it does not exist yet or overwritten if it does exist.
+
+Return the number of bytes written into the file.
+"""
 write(filename::AbstractString, a1, args...) = open(io->write(io, a1, args...), convert(String, filename)::String, "w")
 
 """
-    read(filename::AbstractString, args...)
+    read(filename::AbstractString)
 
-Open a file and read its contents. `args` is passed to `read`: this is equivalent to
-`open(io->read(io, args...), filename)`.
+Read the entire contents of a file as a `Vector{UInt8}`.
 
     read(filename::AbstractString, String)
 
 Read the entire contents of a file as a string.
+
+    read(filename::AbstractString, args...)
+
+Open a file and read its contents. `args` is passed to `read`: this is equivalent to
+`open(io->read(io, args...), filename)`.
 """
 read(filename::AbstractString, args...) = open(io->read(io, args...), convert(String, filename)::String)
 
@@ -473,11 +511,15 @@ read!(filename::AbstractString, a) = open(io->read!(io, a), convert(String, file
     readuntil(stream::IO, delim; keep::Bool = false)
     readuntil(filename::AbstractString, delim; keep::Bool = false)
 
-Read a string from an I/O stream or a file, up to the given delimiter.
+Read a string from an I/O `stream` or a file, up to the given delimiter.
 The delimiter can be a `UInt8`, `AbstractChar`, string, or vector.
 Keyword argument `keep` controls whether the delimiter is included in the result.
 The text is assumed to be encoded in UTF-8.
 
+Return a `String` if `delim` is an `AbstractChar` or a string
+or otherwise return a `Vector{typeof(delim)}`.   See also [`copyuntil`](@ref)
+to instead write in-place to another stream (which can be a preallocated [`IOBuffer`](@ref)).
+
 # Examples
 ```jldoctest
 julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n");
@@ -491,7 +533,40 @@ julia> readuntil("my_file.txt", '.', keep = true)
 julia> rm("my_file.txt")
 ```
 """
-readuntil(filename::AbstractString, args...; kw...) = open(io->readuntil(io, args...; kw...), convert(String, filename)::String)
+readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String)
+readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=70), stream, delim; kw...))
+readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(_unsafe_take!(copyuntil(IOBuffer(sizehint=70), stream, delim; kw...)))
+readuntil(stream::IO, delim::T; keep::Bool=false) where T = _copyuntil(Vector{T}(), stream, delim, keep)
+
+
+"""
+    copyuntil(out::IO, stream::IO, delim; keep::Bool = false)
+    copyuntil(out::IO, filename::AbstractString, delim; keep::Bool = false)
+
+Copy a string from an I/O `stream` or a file, up to the given delimiter, to
+the `out` stream, returning `out`.
+The delimiter can be a `UInt8`, `AbstractChar`, string, or vector.
+Keyword argument `keep` controls whether the delimiter is included in the result.
+The text is assumed to be encoded in UTF-8.
+
+Similar to [`readuntil`](@ref), which returns a `String`; in contrast,
+`copyuntil` writes directly to `out`, without allocating a string.
+(This can be used, for example, to read data into a pre-allocated [`IOBuffer`](@ref).)
+
+# Examples
+```jldoctest
+julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n");
+
+julia> String(take!(copyuntil(IOBuffer(), "my_file.txt", 'L')))
+"Julia"
+
+julia> String(take!(copyuntil(IOBuffer(), "my_file.txt", '.', keep = true)))
+"JuliaLang is a GitHub organization."
+
+julia> rm("my_file.txt")
+```
+"""
+copyuntil(out::IO, filename::AbstractString, delim; kw...) = open(io->copyuntil(out, io, delim; kw...), convert(String, filename)::String)
 
 """
     readline(io::IO=stdin; keep::Bool=false)
@@ -504,6 +579,11 @@ false (as it is by default), these trailing newline characters are removed from
 line before it is returned. When `keep` is true, they are returned as part of the
 line.
 
+Return a `String`.   See also [`copyline`](@ref) to instead write in-place
+to another stream (which can be a preallocated [`IOBuffer`](@ref)).
+
+See also [`readuntil`](@ref) for reading until more general delimiters.
+
 # Examples
 ```jldoctest
 julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n");
@@ -525,21 +605,63 @@ Logan
 "Logan"
 ```
 """
-function readline(filename::AbstractString; keep::Bool=false)
-    open(filename) do f
-        readline(f, keep=keep)
-    end
-end
+readline(filename::AbstractString; keep::Bool=false) =
+    open(io -> readline(io; keep), filename)
+readline(s::IO=stdin; keep::Bool=false) =
+    String(_unsafe_take!(copyline(IOBuffer(sizehint=70), s; keep)))
+
+"""
+    copyline(out::IO, io::IO=stdin; keep::Bool=false)
+    copyline(out::IO, filename::AbstractString; keep::Bool=false)
+
+Copy a single line of text from an I/O `stream` or a file to the `out` stream,
+returning `out`.
 
-function readline(s::IO=stdin; keep::Bool=false)
-    line = readuntil(s, 0x0a, keep=true)::Vector{UInt8}
-    i = length(line)
-    if keep || i == 0 || line[i] != 0x0a
-        return String(line)
-    elseif i < 2 || line[i-1] != 0x0d
-        return String(resize!(line,i-1))
+When reading from a file, the text is assumed to be encoded in UTF-8. Lines in the
+input end with `'\\n'` or `"\\r\\n"` or the end of an input stream. When `keep` is
+false (as it is by default), these trailing newline characters are removed from the
+line before it is returned. When `keep` is true, they are returned as part of the
+line.
+
+Similar to [`readline`](@ref), which returns a `String`; in contrast,
+`copyline` writes directly to `out`, without allocating a string.
+(This can be used, for example, to read data into a pre-allocated [`IOBuffer`](@ref).)
+
+See also [`copyuntil`](@ref) for reading until more general delimiters.
+
+# Examples
+```jldoctest
+julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n");
+
+julia> String(take!(copyline(IOBuffer(), "my_file.txt")))
+"JuliaLang is a GitHub organization."
+
+julia> String(take!(copyline(IOBuffer(), "my_file.txt", keep=true)))
+"JuliaLang is a GitHub organization.\\n"
+
+julia> rm("my_file.txt")
+```
+"""
+copyline(out::IO, filename::AbstractString; keep::Bool=false) =
+    open(io -> copyline(out, io; keep), filename)
+
+# fallback to optimized methods for IOBuffer in iobuffer.jl
+function copyline(out::IO, s::IO; keep::Bool=false)
+    if keep
+        return copyuntil(out, s, 0x0a, keep=true)
     else
-        return String(resize!(line,i-2))
+        # more complicated to deal with CRLF logic
+        while !eof(s)
+            b = read(s, UInt8)
+            b == 0x0a && break
+            if b == 0x0d && !eof(s)
+                b = read(s, UInt8)
+                b == 0x0a && break
+                write(out, 0x0d)
+            end
+            write(out, b)
+        end
+        return out
     end
 end
 
@@ -670,10 +792,17 @@ end
 @noinline unsafe_write(s::IO, p::Ref{T}, n::Integer) where {T} =
     unsafe_write(s, unsafe_convert(Ref{T}, p)::Ptr, n) # mark noinline to ensure ref is gc-rooted somewhere (by the caller)
 unsafe_write(s::IO, p::Ptr, n::Integer) = unsafe_write(s, convert(Ptr{UInt8}, p), convert(UInt, n))
-write(s::IO, x::Ref{T}) where {T} = unsafe_write(s, x, Core.sizeof(T))
+function write(s::IO, x::Ref{T}) where {T}
+    x isa Ptr && error("write cannot copy from a Ptr")
+    if isbitstype(T)
+        unsafe_write(s, x, Core.sizeof(T))
+    else
+        write(s, x[])
+    end
+end
 write(s::IO, x::Int8) = write(s, reinterpret(UInt8, x))
 function write(s::IO, x::Union{Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128,Float16,Float32,Float64})
-    return write(s, Ref(x))
+    return unsafe_write(s, Ref(x), Core.sizeof(x))
 end
 
 write(s::IO, x::Bool) = write(s, UInt8(x))
@@ -684,38 +813,43 @@ function write(s::IO, A::AbstractArray)
         error("`write` is not supported on non-isbits arrays")
     end
     nb = 0
+    r = Ref{eltype(A)}()
     for a in A
-        nb += write(s, a)
+        r[] = a
+        nb += @noinline unsafe_write(s, r, Core.sizeof(r)) # r must be heap-allocated
     end
     return nb
 end
 
-function write(s::IO, a::Array)
-    if isbitstype(eltype(a))
-        return GC.@preserve a unsafe_write(s, pointer(a), sizeof(a))
-    else
+function write(s::IO, A::StridedArray)
+    if !isbitstype(eltype(A))
         error("`write` is not supported on non-isbits arrays")
     end
-end
-
-function write(s::IO, a::SubArray{T,N,<:Array}) where {T,N}
-    if !isbitstype(T) || !isa(a, StridedArray)
-        return invoke(write, Tuple{IO, AbstractArray}, s, a)
+    _checkcontiguous(Bool, A) &&
+        return GC.@preserve A unsafe_write(s, pointer(A), elsize(A) * length(A))
+    sz::Dims = size(A)
+    st::Dims = strides(A)
+    msz, mst, n = merge_adjacent_dim(sz, st)
+    mst == 1 || return invoke(write, Tuple{IO, AbstractArray}, s, A)
+    n == ndims(A) &&
+        return GC.@preserve A unsafe_write(s, pointer(A), elsize(A) * length(A))
+    sz′, st′ = tail(sz), tail(st)
+    while n > 1
+        sz′ = (tail(sz′)..., 1)
+        st′ = (tail(st′)..., 0)
+        n -= 1
     end
-    elsz = elsize(a)
-    colsz = size(a,1) * elsz
-    GC.@preserve a if stride(a,1) != 1
-        for idxs in CartesianIndices(size(a))
-            unsafe_write(s, pointer(a, idxs), elsz)
-        end
-        return elsz * length(a)
-    elseif N <= 1
-        return unsafe_write(s, pointer(a, 1), colsz)
-    else
-        for colstart in CartesianIndices((1, size(a)[2:end]...))
-            unsafe_write(s, pointer(a, colstart), colsz)
+    GC.@preserve A begin
+        nb = 0
+        iter = CartesianIndices(sz′)
+        for I in iter
+            p = pointer(A)
+            for i in 1:length(sz′)
+                p += elsize(A) * st′[i] * (I[i] - 1)
+            end
+            nb += unsafe_write(s, p, elsize(A) * msz)
         end
-        return colsz * trailingsize(a,2)
+        return nb
     end
 end
 
@@ -746,37 +880,81 @@ end
 
 @noinline unsafe_read(s::IO, p::Ref{T}, n::Integer) where {T} = unsafe_read(s, unsafe_convert(Ref{T}, p)::Ptr, n) # mark noinline to ensure ref is gc-rooted somewhere (by the caller)
 unsafe_read(s::IO, p::Ptr, n::Integer) = unsafe_read(s, convert(Ptr{UInt8}, p), convert(UInt, n))
-read!(s::IO, x::Ref{T}) where {T} = (unsafe_read(s, x, Core.sizeof(T)); x)
+function read!(s::IO, x::Ref{T}) where {T}
+    x isa Ptr && error("read! cannot copy into a Ptr")
+    if isbitstype(T)
+        unsafe_read(s, x, Core.sizeof(T))
+    else
+        x[] = read(s, T)
+    end
+    return x
+end
 
 read(s::IO, ::Type{Int8}) = reinterpret(Int8, read(s, UInt8))
 function read(s::IO, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64},Type{Int128},Type{UInt128},Type{Float16},Type{Float32},Type{Float64}})
-    return read!(s, Ref{T}(0))[]::T
+    r = Ref{T}(0)
+    unsafe_read(s, r, Core.sizeof(T))
+    return r[]
 end
 
 read(s::IO, ::Type{Bool}) = (read(s, UInt8) != 0)
 read(s::IO, ::Type{Ptr{T}}) where {T} = convert(Ptr{T}, read(s, UInt))
 
-function read!(s::IO, a::Array{UInt8})
-    GC.@preserve a unsafe_read(s, pointer(a), sizeof(a))
-    return a
+function read!(s::IO, A::AbstractArray{T}) where {T}
+    if isbitstype(T) && _checkcontiguous(Bool, A)
+        GC.@preserve A unsafe_read(s, pointer(A), elsize(A) * length(A))
+    else
+        if isbitstype(T)
+            r = Ref{T}()
+            for i in eachindex(A)
+                @noinline unsafe_read(s, r, Core.sizeof(r)) # r must be heap-allocated
+                A[i] = r[]
+            end
+        else
+            for i in eachindex(A)
+                A[i] = read(s, T)
+            end
+        end
+    end
+    return A
 end
 
-function read!(s::IO, a::AbstractArray{T}) where T
-    if isbitstype(T) && (a isa Array || a isa FastContiguousSubArray{T,<:Any,<:Array{T}})
-        GC.@preserve a unsafe_read(s, pointer(a), sizeof(a))
+function read!(s::IO, A::StridedArray{T}) where {T}
+    if !isbitstype(T) || _checkcontiguous(Bool, A)
+        return invoke(read!, Tuple{IO, AbstractArray}, s, A)
+    end
+    sz::Dims = size(A)
+    st::Dims = strides(A)
+    msz, mst, n = merge_adjacent_dim(sz, st)
+    mst == 1 || return invoke(read!, Tuple{IO, AbstractArray}, s, A)
+    if n == ndims(A)
+        GC.@preserve A unsafe_read(s, pointer(A), elsize(A) * length(A))
     else
-        for i in eachindex(a)
-            a[i] = read(s, T)
+        sz′, st′ = tail(sz), tail(st)
+        while n > 1
+            sz′ = (tail(sz′)..., 1)
+            st′ = (tail(st′)..., 0)
+            n -= 1
+        end
+        GC.@preserve A begin
+            iter = CartesianIndices(sz′)
+            for I in iter
+                p = pointer(A)
+                for i in 1:length(sz′)
+                    p += elsize(A) * st′[i] * (I[i] - 1)
+                end
+                unsafe_read(s, p, elsize(A) * msz)
+            end
         end
     end
-    return a
+    return A
 end
 
 function read(io::IO, ::Type{Char})
     b0 = read(io, UInt8)::UInt8
-    l = 8(4-leading_ones(b0))
+    l = 0x08 * (0x04 - UInt8(leading_ones(b0)))
     c = UInt32(b0) << 24
-    if l < 24
+    if l ≤ 0x10
         s = 16
         while s ≥ l && !eof(io)::Bool
             peek(io) & 0xc0 == 0x80 || break
@@ -790,15 +968,10 @@ end
 # read(io, T) is not defined for other AbstractChar: implementations
 # must provide their own encoding-specific method.
 
-# readuntil_string is useful below since it has
-# an optimized method for s::IOStream
-readuntil_string(s::IO, delim::UInt8, keep::Bool) = String(readuntil(s, delim, keep=keep))::String
-
-function readuntil(s::IO, delim::AbstractChar; keep::Bool=false)
+function copyuntil(out::IO, s::IO, delim::AbstractChar; keep::Bool=false)
     if delim ≤ '\x7f'
-        return readuntil_string(s, delim % UInt8, keep)
+        return copyuntil(out, s, delim % UInt8; keep)
     end
-    out = IOBuffer()
     for c in readeach(s, Char)
         if c == delim
             keep && write(out, c)
@@ -806,17 +979,22 @@ function readuntil(s::IO, delim::AbstractChar; keep::Bool=false)
         end
         write(out, c)
     end
-    return String(take!(out))
+    return out
 end
 
-function readuntil(s::IO, delim::T; keep::Bool=false) where T
-    out = (T === UInt8 ? StringVector(0) : Vector{T}())
+# note: optimized methods of copyuntil for IOStreams and delim::UInt8 in iostream.jl
+#       and for IOBuffer with delim::UInt8 in iobuffer.jl
+copyuntil(out::IO, s::IO, delim; keep::Bool=false) = _copyuntil(out, s, delim, keep)
+
+# supports out::Union{IO, AbstractVector} for use with both copyuntil & readuntil
+function _copyuntil(out, s::IO, delim::T, keep::Bool) where T
+    output! = isa(out, IO) ? write : push!
     for c in readeach(s, T)
         if c == delim
-            keep && push!(out, c)
+            keep && output!(out, c)
             break
         end
-        push!(out, c)
+        output!(out, c)
     end
     return out
 end
@@ -907,27 +1085,29 @@ function readuntil_vector!(io::IO, target::AbstractVector{T}, keep::Bool, out) w
     return false
 end
 
-function readuntil(io::IO, target::AbstractString; keep::Bool=false)
+function copyuntil(out::IO, io::IO, target::AbstractString; keep::Bool=false)
     # small-string target optimizations
     x = Iterators.peel(target)
-    isnothing(x) && return ""
+    isnothing(x) && return out
     c, rest = x
     if isempty(rest) && c <= '\x7f'
-        return readuntil_string(io, c % UInt8, keep)
+        return copyuntil(out, io, c % UInt8; keep)
     end
     # convert String to a utf8-byte-iterator
     if !(target isa String) && !(target isa SubString{String})
         target = String(target)
     end
     target = codeunits(target)::AbstractVector
-    return String(readuntil(io, target, keep=keep))
+    return copyuntil(out, io, target, keep=keep)
 end
 
 function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T
-    out = (T === UInt8 ? StringVector(0) : Vector{T}())
+    out = (T === UInt8 ? resize!(StringVector(70), 0) : Vector{T}())
     readuntil_vector!(io, target, keep, out)
     return out
 end
+copyuntil(out::IO, io::IO, target::AbstractVector; keep::Bool=false) =
+    (readuntil_vector!(io, target, keep, out); out)
 
 """
     readchomp(x)
@@ -988,7 +1168,7 @@ function read(s::IO, nb::Integer = typemax(Int))
     return resize!(b, nr)
 end
 
-read(s::IO, ::Type{String}) = String(read(s))
+read(s::IO, ::Type{String}) = String(read(s)::Vector{UInt8})
 read(s::IO, T::Type) = error("The IO stream does not support reading objects of type $T.")
 
 ## high-level iterator interfaces ##
@@ -1102,7 +1282,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state)
         buf.size = _stripnewline(r.itr.keep, buf.size, buf.data)
         empty!(chunks) # will cause next iteration to terminate
         seekend(r.itr.stream) # reposition to end of stream for isdone
-        s = String(take!(buf))
+        s = String(_unsafe_take!(buf))
     else
         # extract the string from chunks[ichunk][inewline+1] to chunks[jchunk][jnewline]
         if ichunk == jchunk # common case: current and previous newline in same chunk
@@ -1119,7 +1299,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state)
             end
             write(buf, view(chunks[jchunk], 1:jnewline))
             buf.size = _stripnewline(r.itr.keep, buf.size, buf.data)
-            s = String(take!(buf))
+            s = String(_unsafe_take!(buf))
 
             # overwrite obsolete chunks (ichunk+1:jchunk)
             i = jchunk
@@ -1291,6 +1471,7 @@ end
 
 """
     countlines(io::IO; eol::AbstractChar = '\\n')
+    countlines(filename::AbstractString; eol::AbstractChar = '\\n')
 
 Read `io` until the end of the stream/file and count the number of lines. To specify a file
 pass the filename as the first argument. EOL markers other than `'\\n'` are supported by
@@ -1318,6 +1499,19 @@ julia> io = IOBuffer("JuliaLang is a GitHub organization.");
 
 julia> countlines(io, eol = '.')
 1
+```
+```jldoctest
+julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\n")
+36
+
+julia> countlines("my_file.txt")
+1
+
+julia> countlines("my_file.txt", eol = 'n')
+4
+
+julia> rm("my_file.txt")
+
 ```
 """
 function countlines(io::IO; eol::AbstractChar='\n')
diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index e08a019d84a2c..895205549bc7e 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -5,6 +5,7 @@
 # Stateful string
 mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
     data::T # T should support: getindex, setindex!, length, copyto!, and resize!
+    reinit::Bool # if true, data needs to be re-allocated (after take!)
     readable::Bool
     writable::Bool
     seekable::Bool # if not seekable, implementation is free to destroy (compact) past read data
@@ -17,7 +18,7 @@ mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
     function GenericIOBuffer{T}(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
                                 maxsize::Integer) where T<:AbstractVector{UInt8}
         require_one_based_indexing(data)
-        new(data,readable,writable,seekable,append,length(data),maxsize,1,-1)
+        new(data,false,readable,writable,seekable,append,length(data),maxsize,1,-1)
     end
 end
 const IOBuffer = GenericIOBuffer{Vector{UInt8}}
@@ -122,10 +123,11 @@ function IOBuffer(;
     return buf
 end
 
-# PipeBuffers behave like Unix Pipes. They are typically readable and writable, they act appendable, and are not seekable.
+# PipeBuffers behave somewhat more like Unix Pipes (than Files). They are typically readable and writable, they act appendable, and are not seekable.
+# However, they do not support stream notification, so for that there is the BufferStream wrapper around this.
 
 """
-    PipeBuffer(data::Vector{UInt8}=UInt8[]; maxsize::Integer = typemax(Int))
+    PipeBuffer(data::AbstractVector{UInt8}=UInt8[]; maxsize::Integer = typemax(Int))
 
 An [`IOBuffer`](@ref) that allows reading and performs writes by appending.
 Seeking and truncating are not supported.
@@ -133,12 +135,16 @@ See [`IOBuffer`](@ref) for the available constructors.
 If `data` is given, creates a `PipeBuffer` to operate on a data vector,
 optionally specifying a size beyond which the underlying `Array` may not be grown.
 """
-PipeBuffer(data::Vector{UInt8}=UInt8[]; maxsize::Int = typemax(Int)) =
-    GenericIOBuffer(data,true,true,false,true,maxsize)
+PipeBuffer(data::AbstractVector{UInt8}=UInt8[]; maxsize::Int = typemax(Int)) =
+    GenericIOBuffer(data, true, true, false, true, maxsize)
 PipeBuffer(maxsize::Integer) = (x = PipeBuffer(StringVector(maxsize), maxsize = maxsize); x.size=0; x)
 
+_similar_data(b::GenericIOBuffer, len::Int) = similar(b.data, len)
+_similar_data(b::IOBuffer, len::Int) = StringVector(len)
+
 function copy(b::GenericIOBuffer)
-    ret = typeof(b)(b.writable ? copy(b.data) : b.data,
+    ret = typeof(b)(b.reinit ? _similar_data(b, 0) : b.writable ?
+                    copyto!(_similar_data(b, length(b.data)), b.data) : b.data,
                     b.readable, b.writable, b.seekable, b.append, b.maxsize)
     ret.size = b.size
     ret.ptr  = b.ptr
@@ -203,7 +209,7 @@ function read_sub(from::GenericIOBuffer, a::AbstractArray{T}, offs, nel) where T
         GC.@preserve a unsafe_read(from, pointer(a, offs), nb)
     else
         for i = offs:offs+nel-1
-            a[i] = read(to, T)
+            a[i] = read(from, T)
         end
     end
     return a
@@ -270,7 +276,10 @@ function truncate(io::GenericIOBuffer, n::Integer)
     io.seekable || throw(ArgumentError("truncate failed, IOBuffer is not seekable"))
     n < 0 && throw(ArgumentError("truncate failed, n bytes must be ≥ 0, got $n"))
     n > io.maxsize && throw(ArgumentError("truncate failed, $(n) bytes is exceeds IOBuffer maxsize $(io.maxsize)"))
-    if n > length(io.data)
+    if io.reinit
+        io.data = _similar_data(io, n)
+        io.reinit = false
+    elseif n > length(io.data)
         resize!(io.data, n)
     end
     io.data[io.size+1:n] .= 0
@@ -325,9 +334,14 @@ end
         ensureroom_slowpath(io, nshort)
     end
     n = min((nshort % Int) + (io.append ? io.size : io.ptr-1), io.maxsize)
-    l = length(io.data)
-    if n > l
-        _growend!(io.data, (n - l) % UInt)
+    if io.reinit
+        io.data = _similar_data(io, n)
+        io.reinit = false
+    else
+        l = length(io.data)
+        if n > l
+            _growend!(io.data, (n - l) % UInt)
+        end
     end
     return io
 end
@@ -336,7 +350,6 @@ eof(io::GenericIOBuffer) = (io.ptr-1 == io.size)
 
 function closewrite(io::GenericIOBuffer)
     io.writable = false
-    # OR throw(_UVError("closewrite", UV_ENOTSOCK))
     nothing
 end
 
@@ -390,18 +403,26 @@ end
 function take!(io::IOBuffer)
     ismarked(io) && unmark(io)
     if io.seekable
-        data = io.data
         if io.writable
-            maxsize = (io.maxsize == typemax(Int) ? 0 : min(length(io.data),io.maxsize))
-            io.data = StringVector(maxsize)
+            if io.reinit
+                data = StringVector(0)
+            else
+                data = resize!(io.data, io.size)
+                io.reinit = true
+            end
         else
-            data = copy(data)
+            data = copyto!(StringVector(io.size), 1, io.data, 1, io.size)
         end
-        resize!(data,io.size)
     else
         nbytes = bytesavailable(io)
-        a = StringVector(nbytes)
-        data = read!(io, a)
+        if io.writable
+            data = io.data
+            io.reinit = true
+            _deletebeg!(data, io.ptr-1)
+            resize!(data, nbytes)
+        else
+            data = read!(io, StringVector(nbytes))
+        end
     end
     if io.writable
         io.ptr = 1
@@ -410,6 +431,19 @@ function take!(io::IOBuffer)
     return data
 end
 
+"""
+    _unsafe_take!(io::IOBuffer)
+
+This simply returns the raw resized `io.data`, with no checks to be
+sure that `io` is readable etcetera, and leaves `io` in an inconsistent
+state.  This should only be used internally for performance-critical
+`String` routines that immediately discard `io` afterwards, and it
+*assumes* that `io` is writable and seekable.
+
+It saves no allocations compared to `take!`, it just omits some checks.
+"""
+_unsafe_take!(io::IOBuffer) = resize!(io.data, io.size)
+
 function write(to::IO, from::GenericIOBuffer)
     if to === from
         from.ptr = from.size + 1
@@ -482,33 +516,53 @@ function occursin(delim::UInt8, buf::GenericIOBuffer)
     return false
 end
 
-function readuntil(io::GenericIOBuffer, delim::UInt8; keep::Bool=false)
-    lb = 70
-    A = StringVector(lb)
-    nread = 0
-    nout = 0
-    data = io.data
-    for i = io.ptr : io.size
-        @inbounds b = data[i]
-        nread += 1
-        if keep || b != delim
-            nout += 1
-            if nout > lb
-                lb = nout*2
-                resize!(A, lb)
-            end
-            @inbounds A[nout] = b
-        end
-        if b == delim
-            break
-        end
+function copyuntil(out::IO, io::GenericIOBuffer, delim::UInt8; keep::Bool=false)
+    data = view(io.data, io.ptr:io.size)
+    # note: findfirst + copyto! is much faster than a single loop
+    #       except for nout ≲ 20.  A single loop is 2x faster for nout=5.
+    nout = nread = something(findfirst(==(delim), data), length(data))
+    if !keep && nout > 0 && data[nout] == delim
+        nout -= 1
     end
+    write(out, view(io.data, io.ptr:io.ptr+nout-1))
     io.ptr += nread
-    if lb != nout
-        resize!(A, nout)
+    return out
+end
+
+function copyline(out::GenericIOBuffer, s::IO; keep::Bool=false)
+    copyuntil(out, s, 0x0a, keep=true)
+    line = out.data
+    i = out.size
+    if keep || i == 0 || line[i] != 0x0a
+        return out
+    elseif i < 2 || line[i-1] != 0x0d
+        i -= 1
+    else
+        i -= 2
     end
-    A
+    out.size = i
+    if !out.append
+        out.ptr = i+1
+    end
+    return out
+end
+
+function _copyline(out::IO, io::GenericIOBuffer; keep::Bool=false)
+    data = view(io.data, io.ptr:io.size)
+    # note: findfirst + copyto! is much faster than a single loop
+    #       except for nout ≲ 20.  A single loop is 2x faster for nout=5.
+    nout = nread = something(findfirst(==(0x0a), data), length(data))
+    if !keep && nout > 0 && data[nout] == 0x0a
+        nout -= 1
+        nout > 0 && data[nout] == 0x0d && (nout -= 1)
+    end
+    write(out, view(io.data, io.ptr:io.ptr+nout-1))
+    io.ptr += nread
+    return out
 end
+copyline(out::IO, io::GenericIOBuffer; keep::Bool=false) = _copyline(out, io; keep)
+copyline(out::GenericIOBuffer, io::GenericIOBuffer; keep::Bool=false) = _copyline(out, io; keep)
+
 
 # copy-free crc32c of IOBuffer:
 function _crc32c(io::IOBuffer, nb::Integer, crc::UInt32=0x00000000)
diff --git a/base/iostream.jl b/base/iostream.jl
index 23dfb53256e82..ba422cd692fcd 100644
--- a/base/iostream.jl
+++ b/base/iostream.jl
@@ -63,6 +63,8 @@ function close(s::IOStream)
     systemerror("close", bad)
 end
 
+closewrite(s::IOStream) = nothing
+
 function flush(s::IOStream)
     sigatomic_begin()
     bad = @_lock_ios s ccall(:ios_flush, Cint, (Ptr{Cvoid},), s.ios) != 0
@@ -443,11 +445,46 @@ end
 function readuntil_string(s::IOStream, delim::UInt8, keep::Bool)
     @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, delim, 1, !keep)
 end
+readuntil(s::IOStream, delim::AbstractChar; keep::Bool=false) =
+    delim ≤ '\x7f' ? readuntil_string(s, delim % UInt8, keep) :
+    String(unsafe_take!(copyuntil(IOBuffer(sizehint=70), s, delim; keep)))
 
 function readline(s::IOStream; keep::Bool=false)
     @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2)
 end
 
+function copyuntil(out::IOBuffer, s::IOStream, delim::UInt8; keep::Bool=false)
+    ensureroom(out, 1) # make sure we can read at least 1 byte, for iszero(n) check below
+    ptr = (out.append ? out.size+1 : out.ptr)
+    d = out.data
+    len = length(d)
+    while true
+        GC.@preserve d @_lock_ios s n=
+            Int(ccall(:jl_readuntil_buf, Csize_t, (Ptr{Cvoid}, UInt8, Ptr{UInt8}, Csize_t),
+                s.ios, delim, pointer(d, ptr), (len - ptr + 1) % Csize_t))
+        iszero(n) && break
+        ptr += n
+        if d[ptr-1] == delim
+            keep || (ptr -= 1)
+            break
+        end
+        (eof(s) || len == out.maxsize) && break
+        len = min(2len + 64, out.maxsize)
+        resize!(d, len)
+    end
+    out.size = max(out.size, ptr - 1)
+    if !out.append
+        out.ptr = ptr
+    end
+    return out
+end
+
+function copyuntil(out::IOStream, s::IOStream, delim::UInt8; keep::Bool=false)
+    @_lock_ios out @_lock_ios s ccall(:ios_copyuntil, Csize_t,
+        (Ptr{Cvoid}, Ptr{Cvoid}, UInt8, Cint), out.ios, s.ios, delim, keep)
+    return out
+end
+
 function readbytes_all!(s::IOStream,
                         b::Union{Array{UInt8}, FastContiguousSubArray{UInt8,<:Any,<:Array{UInt8}}},
                         nb::Integer)
diff --git a/base/irrationals.jl b/base/irrationals.jl
index adfceef615a3c..6513e3269a4d7 100644
--- a/base/irrationals.jl
+++ b/base/irrationals.jl
@@ -31,10 +31,10 @@ struct Irrational{sym} <: AbstractIrrational end
 show(io::IO, x::Irrational{sym}) where {sym} = print(io, sym)
 
 function show(io::IO, ::MIME"text/plain", x::Irrational{sym}) where {sym}
-    if get(io, :compact, false)
+    if get(io, :compact, false)::Bool
         print(io, sym)
     else
-        print(io, sym, " = ", string(float(x))[1:15], "...")
+        print(io, sym, " = ", string(float(x))[1:min(end,15)], "...")
     end
 end
 
@@ -165,13 +165,51 @@ end
 round(x::Irrational, r::RoundingMode) = round(float(x), r)
 
 """
-    @irrational sym val def
-    @irrational(sym, val, def)
+    @irrational sym [val] def
 
-Define a new `Irrational` value, `sym`, with pre-computed `Float64` value `val`,
-and arbitrary-precision definition in terms of `BigFloat`s given by the expression `def`.
+Define a new `Irrational` value, `sym`, with arbitrary-precision definition in terms
+of `BigFloat`s given by the expression `def`.
+
+Optionally provide a pre-computed `Float64` value `val` which must equal `Float64(def)`.
+`val` will be computed automatically if omitted.
+
+An `AssertionError` is thrown when either `big(def) isa BigFloat` or `Float64(val) == Float64(def)`
+returns `false`.
+
+!!! warning
+    This macro should not be used outside of `Base` Julia.
+
+    The macro creates a new type `Irrational{:sym}` regardless of where it's invoked. This can
+    lead to conflicting definitions if two packages define an irrational number with the same
+    name but different values.
+
+
+# Examples
+```jldoctest
+julia> Base.@irrational twoπ 2*big(π)
+
+julia> twoπ
+twoπ = 6.2831853071795...
+
+julia> Base.@irrational sqrt2 1.4142135623730950488 √big(2)
+
+julia> sqrt2
+sqrt2 = 1.4142135623730...
+
+julia> Base.@irrational sqrt2 1.4142135623730950488 big(2)
+ERROR: AssertionError: big($(Expr(:escape, :sqrt2))) isa BigFloat
+
+julia> Base.@irrational sqrt2 1.41421356237309 √big(2)
+ERROR: AssertionError: Float64($(Expr(:escape, :sqrt2))) == Float64(big($(Expr(:escape, :sqrt2))))
+```
 """
 macro irrational(sym, val, def)
+    irrational(sym, val, def)
+end
+macro irrational(sym, def)
+    irrational(sym, :(big($(esc(sym)))), def)
+end
+function irrational(sym, val, def)
     esym = esc(sym)
     qsym = esc(Expr(:quote, sym))
     bigconvert = isa(def,Symbol) ? quote
@@ -191,8 +229,10 @@ macro irrational(sym, val, def)
     quote
         const $esym = Irrational{$qsym}()
         $bigconvert
-        Base.Float64(::Irrational{$qsym}) = $val
-        Base.Float32(::Irrational{$qsym}) = $(Float32(val))
+        let v = $val, v64 = Float64(v), v32 = Float32(v)
+            Base.Float64(::Irrational{$qsym}) = v64
+            Base.Float32(::Irrational{$qsym}) = v32
+        end
         @assert isa(big($esym), BigFloat)
         @assert Float64($esym) == Float64(big($esym))
         @assert Float32($esym) == Float32(big($esym))
diff --git a/base/iterators.jl b/base/iterators.jl
index 40fad992958d5..14cbd104c09d1 100644
--- a/base/iterators.jl
+++ b/base/iterators.jl
@@ -3,16 +3,29 @@
 """
 Methods for working with Iterators.
 """
-module Iterators
+baremodule Iterators
 
 # small dance to make this work from Base or Intrinsics
 import ..@__MODULE__, ..parentmodule
 const Base = parentmodule(@__MODULE__)
 using .Base:
-    @inline, Pair, Pairs, AbstractDict, IndexLinear, IndexCartesian, IndexStyle, AbstractVector, Vector,
-    tail, SizeUnknown, HasLength, HasShape, IsInfinite, EltypeUnknown, HasEltype, OneTo,
-    @propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator, AbstractRange,
-    LinearIndices, (:), |, +, -, !==, !, <=, <, missing, any, _counttuple
+    @inline, Pair, Pairs, AbstractDict, IndexLinear, IndexStyle, AbstractVector, Vector,
+    SizeUnknown, HasLength, HasShape, IsInfinite, EltypeUnknown, HasEltype, OneTo,
+    @propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator, IdDict,
+    AbstractRange, AbstractUnitRange, UnitRange, LinearIndices, TupleOrBottom,
+    (:), |, +, -, *, !==, !, ==, !=, <=, <, >, >=, missing,
+    any, _counttuple, eachindex, ntuple, zero, prod, reduce, in, firstindex, lastindex,
+    tail, fieldtypes, min, max, minimum, zero, oneunit, promote, promote_shape
+using Core: @doc
+
+if Base !== Core.Compiler
+using .Base:
+    cld, fld, SubArray, view, resize!, IndexCartesian
+using .Base.Checked: checked_mul
+else
+    # Checked.checked_mul is not available during bootstrapping:
+    const checked_mul = *
+end
 
 import .Base:
     first, last,
@@ -20,9 +33,13 @@ import .Base:
     eltype, IteratorSize, IteratorEltype,
     haskey, keys, values, pairs,
     getindex, setindex!, get, iterate,
-    popfirst!, isdone, peek
+    popfirst!, isdone, peek, intersect
 
-export enumerate, zip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, partition, flatmap
+export enumerate, zip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, flatmap
+
+if Base !== Core.Compiler
+export partition
+end
 
 """
     Iterators.map(f, iterators...)
@@ -149,10 +166,12 @@ end
 An iterator that yields `(i, x)` where `i` is a counter starting at 1,
 and `x` is the `i`th value from the given iterator. It's useful when
 you need not only the values `x` over which you are iterating, but
-also the number of iterations so far. Note that `i` may not be valid
-for indexing `iter`; it's also possible that `x != iter[i]`, if `iter`
-has indices that do not start at 1. See the `pairs(IndexLinear(),
-iter)` method if you want to ensure that `i` is an index.
+also the number of iterations so far.
+
+Note that `i` may not be valid for indexing `iter`, or may index a
+different element. This will happen if `iter` has indices that do not
+start at 1, and may happen for strings, dictionaries, etc.
+See the `pairs(IndexLinear(), iter)` method if you want to ensure that `i` is an index.
 
 # Examples
 ```jldoctest
@@ -164,6 +183,18 @@ julia> for (index, value) in enumerate(a)
 1 a
 2 b
 3 c
+
+julia> str = "naïve";
+
+julia> for (i, val) in enumerate(str)
+           print("i = ", i, ", val = ", val, ", ")
+           try @show(str[i]) catch e println(e) end
+       end
+i = 1, val = n, str[i] = 'n'
+i = 2, val = a, str[i] = 'a'
+i = 3, val = ï, str[i] = 'ï'
+i = 4, val = v, StringIndexError("naïve", 4)
+i = 5, val = e, str[i] = 'v'
 ```
 """
 enumerate(iter) = Enumerate(iter)
@@ -178,7 +209,7 @@ size(e::Enumerate) = size(e.itr)
 end
 last(e::Enumerate) = (length(e.itr), e.itr[end])
 
-eltype(::Type{Enumerate{I}}) where {I} = Tuple{Int, eltype(I)}
+eltype(::Type{Enumerate{I}}) where {I} = TupleOrBottom(Int, eltype(I))
 
 IteratorSize(::Type{Enumerate{I}}) where {I} = IteratorSize(I)
 IteratorEltype(::Type{Enumerate{I}}) where {I} = IteratorEltype(I)
@@ -208,7 +239,7 @@ of `A`.
 
 Specifying [`IndexLinear()`](@ref) ensures that `i` will be an integer;
 specifying [`IndexCartesian()`](@ref) ensures that `i` will be a
-[`CartesianIndex`](@ref); specifying `IndexStyle(A)` chooses whichever has
+[`Base.CartesianIndex`](@ref); specifying `IndexStyle(A)` chooses whichever has
 been defined as the native indexing style for array `A`.
 
 Mutation of the bounds of the underlying array will invalidate this iterator.
@@ -241,23 +272,26 @@ CartesianIndex(2, 2) e
 See also [`IndexStyle`](@ref), [`axes`](@ref).
 """
 pairs(::IndexLinear,    A::AbstractArray) = Pairs(A, LinearIndices(A))
-pairs(::IndexCartesian, A::AbstractArray) = Pairs(A, CartesianIndices(axes(A)))
 
 # preserve indexing capabilities for known indexable types
 # faster than zip(keys(a), values(a)) for arrays
 pairs(tuple::Tuple) = Pairs{Int}(tuple, keys(tuple))
 pairs(nt::NamedTuple) = Pairs{Symbol}(nt, keys(nt))
 pairs(v::Core.SimpleVector) = Pairs(v, LinearIndices(v))
-pairs(A::AbstractArray)  = pairs(IndexCartesian(), A)
 pairs(A::AbstractVector) = pairs(IndexLinear(), A)
 # pairs(v::Pairs) = v # listed for reference, but already defined from being an AbstractDict
 
+if Base !== Core.Compiler
+pairs(::IndexCartesian, A::AbstractArray) = Pairs(A, Base.CartesianIndices(axes(A)))
+pairs(A::AbstractArray)  = pairs(IndexCartesian(), A)
+end
+
 length(v::Pairs) = length(getfield(v, :itr))
 axes(v::Pairs) = axes(getfield(v, :itr))
 size(v::Pairs) = size(getfield(v, :itr))
 
-@propagate_inbounds function _pairs_elt(p::Pairs{K, V}, idx) where {K, V}
-    return Pair{K, V}(idx, getfield(p, :data)[idx])
+Base.@eval @propagate_inbounds function _pairs_elt(p::Pairs{K, V}, idx) where {K, V}
+    return $(Expr(:new, :(Pair{K, V}), :idx, :(getfield(p, :data)[idx])))
 end
 
 @propagate_inbounds function iterate(p::Pairs{K, V}, state...) where {K, V}
@@ -277,7 +311,7 @@ end
 @inline isdone(v::Pairs, state...) = isdone(getfield(v, :itr), state...)
 
 IteratorSize(::Type{<:Pairs{<:Any, <:Any, I}}) where {I} = IteratorSize(I)
-IteratorSize(::Type{<:Pairs{<:Any, <:Any, <:Base.AbstractUnitRange, <:Tuple}}) = HasLength()
+IteratorSize(::Type{<:Pairs{<:Any, <:Any, <:AbstractUnitRange, <:Tuple}}) = HasLength()
 
 function last(v::Pairs{K, V}) where {K, V}
     idx = last(getfield(v, :itr))
@@ -308,7 +342,11 @@ the `zip` iterator is a tuple of values of its subiterators.
     `zip` orders the calls to its subiterators in such a way that stateful iterators will
     not advance when another iterator finishes in the current iteration.
 
-See also: [`enumerate`](@ref), [`Splat`](@ref Base.Splat).
+!!! note
+
+    `zip()` with no arguments yields an infinite iterator of empty tuples.
+
+See also: [`enumerate`](@ref), [`Base.splat`](@ref).
 
 # Examples
 ```jldoctest
@@ -349,6 +387,22 @@ function _zip_min_length(is)
     end
 end
 _zip_min_length(is::Tuple{}) = nothing
+
+# For a collection of iterators `is`, returns a tuple (b, n), where
+# `b` is true when every component of `is` has a statically-known finite
+# length and all such lengths are equal. Otherwise, `b` is false.
+# `n` is an implementation detail, and will be the `length` of the first
+# iterator if it is statically-known and finite. Otherwise, `n` is `nothing`.
+function _zip_lengths_finite_equal(is)
+    i = is[1]
+    if IteratorSize(i) isa Union{IsInfinite, SizeUnknown}
+        return (false, nothing)
+    else
+        b, n = _zip_lengths_finite_equal(tail(is))
+        return (b && (n === nothing || n == length(i)), length(i))
+    end
+end
+_zip_lengths_finite_equal(is::Tuple{}) = (true, nothing)
 size(z::Zip) = _promote_tuple_shape(Base.map(size, z.is)...)
 axes(z::Zip) = _promote_tuple_shape(Base.map(axes, z.is)...)
 _promote_tuple_shape((a,)::Tuple{OneTo}, (b,)::Tuple{OneTo}) = (intersect(a, b),)
@@ -356,7 +410,7 @@ _promote_tuple_shape((m,)::Tuple{Integer}, (n,)::Tuple{Integer}) = (min(m, n),)
 _promote_tuple_shape(a, b) = promote_shape(a, b)
 _promote_tuple_shape(a, b...) = _promote_tuple_shape(a, _promote_tuple_shape(b...))
 _promote_tuple_shape(a) = a
-eltype(::Type{Zip{Is}}) where {Is<:Tuple} = Tuple{map(eltype, fieldtypes(Is))...}
+eltype(::Type{Zip{Is}}) where {Is<:Tuple} = TupleOrBottom(map(eltype, fieldtypes(Is))...)
 #eltype(::Type{Zip{Tuple{}}}) = Tuple{}
 #eltype(::Type{Zip{Tuple{A}}}) where {A} = Tuple{eltype(A)}
 #eltype(::Type{Zip{Tuple{A, B}}}) where {A, B} = Tuple{eltype(A), eltype(B)}
@@ -430,8 +484,13 @@ zip_iteratoreltype() = HasEltype()
 zip_iteratoreltype(a) = a
 zip_iteratoreltype(a, tail...) = and_iteratoreltype(a, zip_iteratoreltype(tail...))
 
-reverse(z::Zip) = Zip(Base.map(reverse, z.is)) # n.b. we assume all iterators are the same length
 last(z::Zip) = getindex.(z.is, minimum(Base.map(lastindex, z.is)))
+function reverse(z::Zip)
+    if !first(_zip_lengths_finite_equal(z.is))
+        throw(ArgumentError("Cannot reverse zipped iterators of unknown, infinite, or unequal lengths"))
+    end
+    Zip(Base.map(reverse, z.is))
+end
 
 # filter
 
@@ -453,6 +512,15 @@ invocation of `filter`. Calls to `flt` will be made when iterating over the
 returned iterable object. These calls are not cached and repeated calls will be
 made when reiterating.
 
+!!! warning
+    Subsequent *lazy* transformations on the iterator returned from `filter`, such
+    as those performed by `Iterators.reverse` or `cycle`, will also delay calls to `flt`
+    until collecting or iterating over the returned iterable object. If the filter
+    predicate is nondeterministic or its return values depend on the order of iteration
+    over the elements of `itr`, composition with lazy transformations may result in
+    surprising behavior. If this is undesirable, either ensure that `flt` is a pure
+    function or collect intermediate `filter` iterators before further transformations.
+
 See [`Base.filter`](@ref) for an eager implementation of filtering for arrays.
 
 # Examples
@@ -667,7 +735,7 @@ struct Take{I}
     xs::I
     n::Int
     function Take(xs::I, n::Integer) where {I}
-        n < 0 && throw(ArgumentError("Take length must be nonnegative"))
+        n < 0 && throw(ArgumentError("Take length must be non-negative"))
         return new{I}(xs, n)
     end
 end
@@ -677,7 +745,7 @@ end
 
 An iterator that generates at most the first `n` elements of `iter`.
 
-See also: [`drop`](@ref Iterators.drop), [`peel`](@ref Iterators.peel), [`first`](@ref), [`take!`](@ref).
+See also: [`drop`](@ref Iterators.drop), [`peel`](@ref Iterators.peel), [`first`](@ref), [`Base.take!`](@ref).
 
 # Examples
 ```jldoctest
@@ -726,7 +794,7 @@ struct Drop{I}
     xs::I
     n::Int
     function Drop(xs::I, n::Integer) where {I}
-        n < 0 && throw(ArgumentError("Drop length must be nonnegative"))
+        n < 0 && throw(ArgumentError("Drop length must be non-negative"))
         return new{I}(xs, n)
     end
 end
@@ -890,7 +958,7 @@ end
 An iterator that cycles through `iter` forever.
 If `iter` is empty, so is `cycle(iter)`.
 
-See also: [`Iterators.repeated`](@ref), [`repeat`](@ref).
+See also: [`Iterators.repeated`](@ref), [`Base.repeat`](@ref).
 
 # Examples
 ```jldoctest
@@ -932,7 +1000,7 @@ repeated(x) = Repeated(x)
 An iterator that generates the value `x` forever. If `n` is specified, generates `x` that
 many times (equivalent to `take(repeated(x), n)`).
 
-See also: [`Iterators.cycle`](@ref), [`repeat`](@ref).
+See also: [`Iterators.cycle`](@ref), [`Base.repeat`](@ref).
 
 # Examples
 ```jldoctest
@@ -1021,7 +1089,7 @@ _prod_axes1(a, A) =
     throw(ArgumentError("Cannot compute indices for object of type $(typeof(a))"))
 
 ndims(p::ProductIterator) = length(axes(p))
-length(P::ProductIterator) = prod(size(P))
+length(P::ProductIterator) = reduce(checked_mul, size(P); init=1)
 
 IteratorEltype(::Type{ProductIterator{Tuple{}}}) = HasEltype()
 IteratorEltype(::Type{ProductIterator{Tuple{I}}}) where {I} = IteratorEltype(I)
@@ -1034,8 +1102,7 @@ end
 
 eltype(::Type{ProductIterator{I}}) where {I} = _prod_eltype(I)
 _prod_eltype(::Type{Tuple{}}) = Tuple{}
-_prod_eltype(::Type{I}) where {I<:Tuple} =
-    Tuple{ntuple(n -> eltype(fieldtype(I, n)), _counttuple(I)::Int)...}
+_prod_eltype(::Type{I}) where {I<:Tuple} = TupleOrBottom(ntuple(n -> eltype(fieldtype(I, n)), _counttuple(I)::Int)...)
 
 iterate(::ProductIterator{Tuple{}}) = (), true
 iterate(::ProductIterator{Tuple{}}, state) = nothing
@@ -1049,6 +1116,7 @@ iterate(::ProductIterator{Tuple{}}, state) = nothing
     done1 === true || return done1 # false or missing
     return _pisdone(tail(iters), tail(states)) # check tail
 end
+@inline isdone(::ProductIterator{Tuple{}}, states) = true
 @inline isdone(P::ProductIterator, states) = _pisdone(P.iterators, states)
 
 @inline _piterate() = ()
@@ -1089,6 +1157,7 @@ end
 
 reverse(p::ProductIterator) = ProductIterator(Base.map(reverse, p.iterators))
 last(p::ProductIterator) = Base.map(last, p.iterators)
+intersect(a::ProductIterator, b::ProductIterator) = ProductIterator(intersect.(a.iterators, b.iterators))
 
 # flatten an iterator of iterators
 
@@ -1131,6 +1200,7 @@ IteratorEltype(::Type{Flatten{Tuple{}}}) = IteratorEltype(Tuple{})
 _flatteneltype(I, ::HasEltype) = IteratorEltype(eltype(I))
 _flatteneltype(I, et) = EltypeUnknown()
 
+flatten_iteratorsize(::Union{HasShape, HasLength}, ::Type{Union{}}, slurp...) = HasLength() # length==0
 flatten_iteratorsize(::Union{HasShape, HasLength}, ::Type{<:NTuple{N,Any}}) where {N} = HasLength()
 flatten_iteratorsize(::Union{HasShape, HasLength}, ::Type{<:Tuple}) = SizeUnknown()
 flatten_iteratorsize(::Union{HasShape, HasLength}, ::Type{<:Number}) = HasLength()
@@ -1142,6 +1212,7 @@ _flatten_iteratorsize(sz, ::HasEltype, ::Type{Tuple{}}) = HasLength()
 
 IteratorSize(::Type{Flatten{I}}) where {I} = _flatten_iteratorsize(IteratorSize(I), IteratorEltype(I), I)
 
+flatten_length(f, T::Type{Union{}}, slurp...) = 0
 function flatten_length(f, T::Type{<:NTuple{N,Any}}) where {N}
     return N * length(f.it)
 end
@@ -1182,7 +1253,7 @@ See also [`Iterators.flatten`](@ref), [`Iterators.map`](@ref).
 
 # Examples
 ```jldoctest
-julia> Iterators.flatmap(n->-n:2:n, 1:3) |> collect
+julia> Iterators.flatmap(n -> -n:2:n, 1:3) |> collect
 9-element Vector{Int64}:
  -1
   1
@@ -1193,11 +1264,26 @@ julia> Iterators.flatmap(n->-n:2:n, 1:3) |> collect
  -1
   1
   3
+
+julia> stack(n -> -n:2:n, 1:3)
+ERROR: DimensionMismatch: stack expects uniform slices, got axes(x) == (1:3,) while first had (1:2,)
+[...]
+
+julia> Iterators.flatmap(n -> (-n, 10n), 1:2) |> collect
+4-element Vector{Int64}:
+ -1
+ 10
+ -2
+ 20
+
+julia> ans == vec(stack(n -> (-n, 10n), 1:2))
+true
 ```
 """
 flatmap(f, c...) = flatten(map(f, c...))
 
-"""
+if Base !== Core.Compiler # views are not defined
+@doc """
     partition(collection, n)
 
 Iterate over a collection `n` elements at a time.
@@ -1210,8 +1296,7 @@ julia> collect(Iterators.partition([1,2,3,4,5], 2))
  [3, 4]
  [5]
 ```
-"""
-function partition(c, n::Integer)
+""" function partition(c, n::Integer)
     n < 1 && throw(ArgumentError("cannot create partitions of length $n"))
     return PartitionIterator(c, Int(n))
 end
@@ -1221,7 +1306,7 @@ struct PartitionIterator{T}
     n::Int
 end
 # Partitions are explicitly a linear indexing operation, so reshape to 1-d immediately
-PartitionIterator(A::AbstractArray, n::Int) = PartitionIterator(vec(A), n)
+PartitionIterator(A::AbstractArray, n::Int) = PartitionIterator(Base.vec(A), n)
 PartitionIterator(v::AbstractVector, n::Int) = PartitionIterator{typeof(v)}(v, n)
 
 eltype(::Type{PartitionIterator{T}}) where {T} = Vector{eltype(T)}
@@ -1279,7 +1364,7 @@ function iterate(itr::PartitionIterator, state...)
     return resize!(v, i), y === nothing ? IterationCutShort() : y[2]
 end
 
-"""
+@doc """
     Stateful(itr)
 
 There are several different ways to think about this iterator wrapper:
@@ -1292,7 +1377,7 @@ There are several different ways to think about this iterator wrapper:
    whenever an item is produced.
 
 `Stateful` provides the regular iterator interface. Like other mutable iterators
-(e.g. [`Channel`](@ref)), if iteration is stopped early (e.g. by a [`break`](@ref) in a [`for`](@ref) loop),
+(e.g. [`Base.Channel`](@ref)), if iteration is stopped early (e.g. by a [`break`](@ref) in a [`for`](@ref) loop),
 iteration can be resumed from the same spot by continuing to iterate over the
 same iterator object (in contrast, an immutable iterator would restart from the
 beginning).
@@ -1337,49 +1422,63 @@ julia> sum(a) # Sum the remaining elements
 7
 ```
 """
-mutable struct Stateful{T, VS}
+mutable struct Stateful{T, VS, N<:Integer}
     itr::T
     # A bit awkward right now, but adapted to the new iteration protocol
     nextvalstate::Union{VS, Nothing}
-    taken::Int
+
+    # Number of remaining elements, if itr is HasLength or HasShape.
+    # if not, store -1 - number_of_consumed_elements.
+    # This allows us to defer calculating length until asked for.
+    # See PR #45924
+    remaining::N
     @inline function Stateful{<:Any, Any}(itr::T) where {T}
-        new{T, Any}(itr, iterate(itr), 0)
+        itl = iterlength(itr)
+        new{T, Any, typeof(itl)}(itr, iterate(itr), itl)
     end
     @inline function Stateful(itr::T) where {T}
         VS = approx_iter_type(T)
-        return new{T, VS}(itr, iterate(itr)::VS, 0)
+        itl = iterlength(itr)
+        return new{T, VS, typeof(itl)}(itr, iterate(itr)::VS, itl)
+    end
+end
+
+function iterlength(it)::Signed
+    if IteratorSize(it) isa Union{HasShape, HasLength}
+       return length(it)
+    else
+        -1
     end
 end
 
 function reset!(s::Stateful{T,VS}, itr::T=s.itr) where {T,VS}
     s.itr = itr
+    itl = iterlength(itr)
     setfield!(s, :nextvalstate, iterate(itr))
-    s.taken = 0
+    s.remaining = itl
     s
 end
 
-if Base === Core.Compiler
-    approx_iter_type(a::Type) = Any
-else
-    # Try to find an appropriate type for the (value, state tuple),
-    # by doing a recursive unrolling of the iteration protocol up to
-    # fixpoint.
-    approx_iter_type(itrT::Type) = _approx_iter_type(itrT, Base._return_type(iterate, Tuple{itrT}))
-    # Not actually called, just passed to return type to avoid
-    # having to typesplit on Nothing
-    function doiterate(itr, valstate::Union{Nothing, Tuple{Any, Any}})
-        valstate === nothing && return nothing
-        val, st = valstate
-        return iterate(itr, st)
-    end
-    function _approx_iter_type(itrT::Type, vstate::Type)
-        vstate <: Union{Nothing, Tuple{Any, Any}} || return Any
-        vstate <: Union{} && return Union{}
-        nextvstate = Base._return_type(doiterate, Tuple{itrT, vstate})
-        return (nextvstate <: vstate ? vstate : Any)
-    end
+# Try to find an appropriate type for the (value, state tuple),
+# by doing a recursive unrolling of the iteration protocol up to
+# fixpoint.
+approx_iter_type(itrT::Type) = _approx_iter_type(itrT, Base._return_type(iterate, Tuple{itrT}))
+# Not actually called, just passed to return type to avoid
+# having to typesplit on Nothing
+function doiterate(itr, valstate::Union{Nothing, Tuple{Any, Any}})
+    valstate === nothing && return nothing
+    val, st = valstate
+    return iterate(itr, st)
+end
+function _approx_iter_type(itrT::Type, vstate::Type)
+    vstate <: Union{Nothing, Tuple{Any, Any}} || return Any
+    vstate <: Union{} && return Union{}
+    itrT <: Union{} && return Union{}
+    nextvstate = Base._return_type(doiterate, Tuple{itrT, vstate})
+    return (nextvstate <: vstate ? vstate : Any)
 end
 
+Stateful(x::Stateful) = x
 convert(::Type{Stateful}, itr) = Stateful(itr)
 
 @inline isdone(s::Stateful, st=nothing) = s.nextvalstate === nothing
@@ -1387,11 +1486,12 @@ convert(::Type{Stateful}, itr) = Stateful(itr)
 @inline function popfirst!(s::Stateful)
     vs = s.nextvalstate
     if vs === nothing
-        throw(EOFError())
+        throw(Base.EOFError())
     else
         val, state = vs
         Core.setfield!(s, :nextvalstate, iterate(s.itr, state))
-        s.taken += 1
+        rem = s.remaining
+        s.remaining = rem - typeof(rem)(1)
         return val
     end
 end
@@ -1401,10 +1501,21 @@ end
     return ns !== nothing ? ns[1] : sentinel
 end
 @inline iterate(s::Stateful, state=nothing) = s.nextvalstate === nothing ? nothing : (popfirst!(s), nothing)
-IteratorSize(::Type{Stateful{T,VS}}) where {T,VS} = IteratorSize(T) isa HasShape ? HasLength() : IteratorSize(T)
-eltype(::Type{Stateful{T, VS}} where VS) where {T} = eltype(T)
-IteratorEltype(::Type{Stateful{T,VS}}) where {T,VS} = IteratorEltype(T)
-length(s::Stateful) = length(s.itr) - s.taken
+IteratorSize(::Type{<:Stateful{T}}) where {T} = IteratorSize(T) isa HasShape ? HasLength() : IteratorSize(T)
+eltype(::Type{<:Stateful{T}}) where {T} = eltype(T)
+IteratorEltype(::Type{<:Stateful{T}}) where {T} = IteratorEltype(T)
+
+function length(s::Stateful)
+    rem = s.remaining
+    # If rem is actually remaining length, return it.
+    # else, rem is number of consumed elements.
+    if rem >= 0
+        rem
+    else
+        length(s.itr) - (typeof(rem)(1) - rem)
+    end
+end
+end # if statement several hundred lines above
 
 """
     only(x)
@@ -1436,7 +1547,9 @@ Stacktrace:
 [...]
 ```
 """
-@propagate_inbounds function only(x)
+@propagate_inbounds only(x) = _only(x, iterate)
+
+@propagate_inbounds function _only(x, ::typeof(iterate))
     i = iterate(x)
     @boundscheck if i === nothing
         throw(ArgumentError("Collection is empty, must contain exactly 1 element"))
@@ -1448,21 +1561,23 @@ Stacktrace:
     return ret
 end
 
-# Collections of known size
-only(x::Ref) = x[]
-only(x::Number) = x
-only(x::Char) = x
+@inline function _only(x, ::typeof(first))
+    @boundscheck if length(x) != 1
+        throw(ArgumentError("Collection must contain exactly 1 element"))
+    end
+    @inbounds first(x)
+end
+
+@propagate_inbounds only(x::IdDict) = _only(x, first)
+
+# Specific error messages for tuples and named tuples
 only(x::Tuple{Any}) = x[1]
 only(x::Tuple) = throw(
     ArgumentError("Tuple contains $(length(x)) elements, must contain exactly 1 element")
 )
-only(a::AbstractArray{<:Any, 0}) = @inbounds return a[]
 only(x::NamedTuple{<:Any, <:Tuple{Any}}) = first(x)
 only(x::NamedTuple) = throw(
     ArgumentError("NamedTuple contains $(length(x)) elements, must contain exactly 1 element")
 )
 
-
-Base.intersect(a::ProductIterator, b::ProductIterator) = ProductIterator(intersect.(a.iterators, b.iterators))
-
 end
diff --git a/base/libc.jl b/base/libc.jl
index a14920ec4f6b8..1fcf763e4e124 100644
--- a/base/libc.jl
+++ b/base/libc.jl
@@ -6,15 +6,18 @@ Interface to libc, the C standard library.
 """ Libc
 
 import Base: transcode, windowserror, show
+# these need to be defined separately for bootstrapping but belong to Libc
+import Base: memcpy, memmove, memset, memcmp
 import Core.Intrinsics: bitcast
 
-export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, calloc, realloc,
-    errno, strerror, flush_cstdio, systemsleep, time, transcode
+export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, memcpy,
+    memmove, memset, calloc, realloc, errno, strerror, flush_cstdio, systemsleep, time,
+    transcode, mkfifo
 if Sys.iswindows()
     export GetLastError, FormatMessage
 end
 
-include("../errno_h.jl")
+include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "errno_h.jl"))  # include($BUILDROOT/base/errno_h.jl)
 
 ## RawFD ##
 
@@ -72,6 +75,34 @@ end
 
 ## FILE (not auto-finalized) ##
 
+"""
+    FILE(::Ptr)
+    FILE(::IO)
+
+A libc `FILE*`, representing an opened file.
+
+It can be passed as a `Ptr{FILE}` argument to [`ccall`](@ref) and also supports
+[`seek`](@ref), [`position`](@ref) and [`close`](@ref).
+
+A `FILE` can be constructed from an ordinary `IO` object, provided it is an open file. It
+must be closed afterward.
+
+# Examples
+```jldoctest
+julia> using Base.Libc
+
+julia> mktemp() do _, io
+           # write to the temporary file using `puts(char*, FILE*)` from libc
+           file = FILE(io)
+           ccall(:fputs, Cint, (Cstring, Ptr{FILE}), "hello world", file)
+           close(file)
+           # read the file again
+           seek(io, 0)
+           read(io, String)
+       end
+"hello world"
+```
+"""
 struct FILE
     ptr::Ptr{Cvoid}
 end
@@ -225,7 +256,7 @@ function strptime(fmt::AbstractString, timestr::AbstractString)
     @static if Sys.isapple()
         # if we didn't explicitly parse the weekday or year day, use mktime
         # to fill them in automatically.
-        if !occursin(r"([^%]|^)%(a|A|j|w|Ow)", fmt)
+        if !occursin(r"([^%]|^)%(a|A|j|w|Ow)"a, fmt)
             ccall(:mktime, Int, (Ref{TmStruct},), tm)
         end
     end
@@ -235,14 +266,14 @@ end
 # system date in seconds
 
 """
-    time(t::TmStruct)
+    time(t::TmStruct) -> Float64
 
 Converts a `TmStruct` struct to a number of seconds since the epoch.
 """
 time(tm::TmStruct) = Float64(ccall(:mktime, Int, (Ref{TmStruct},), tm))
 
 """
-    time()
+    time() -> Float64
 
 Get the system time in seconds since the epoch, with fairly high (typically, microsecond) resolution.
 """
@@ -260,7 +291,7 @@ getpid() = ccall(:uv_os_getpid, Int32, ())
 ## network functions ##
 
 """
-    gethostname() -> AbstractString
+    gethostname() -> String
 
 Get the local machine's host name.
 """
@@ -336,7 +367,6 @@ if Sys.iswindows()
 end
 
 ## Memory related ##
-
 """
     free(addr::Ptr)
 
@@ -346,6 +376,8 @@ be freed by the free functions defined in that library, to avoid assertion failu
 multiple `libc` libraries exist on the system.
 """
 free(p::Ptr) = ccall(:free, Cvoid, (Ptr{Cvoid},), p)
+free(p::Cstring) = free(convert(Ptr{UInt8}, p))
+free(p::Cwstring) = free(convert(Ptr{Cwchar_t}, p))
 
 """
     malloc(size::Integer) -> Ptr{Cvoid}
@@ -371,8 +403,7 @@ Call `calloc` from the C standard library.
 """
 calloc(num::Integer, size::Integer) = ccall(:calloc, Ptr{Cvoid}, (Csize_t, Csize_t), num, size)
 
-free(p::Cstring) = free(convert(Ptr{UInt8}, p))
-free(p::Cwstring) = free(convert(Ptr{Cwchar_t}, p))
+
 
 ## Random numbers ##
 
@@ -406,6 +437,33 @@ function srand(seed::Integer=_make_uint64_seed())
     ccall(:jl_srand, Cvoid, (UInt64,), seed % UInt64)
 end
 
+"""
+    mkfifo(path::AbstractString, [mode::Integer]) -> path
+
+Make a FIFO special file (a named pipe) at `path`.  Return `path` as-is on success.
+
+`mkfifo` is supported only in Unix platforms.
+
+!!! compat "Julia 1.11"
+    `mkfifo` requires at least Julia 1.11.
+"""
+function mkfifo(
+    path::AbstractString,
+    mode::Integer = Base.S_IRUSR | Base.S_IWUSR | Base.S_IRGRP | Base.S_IWGRP |
+                    Base.S_IROTH | Base.S_IWOTH,
+)
+    @static if Sys.isunix()
+        # Default `mode` is compatible with `mkfifo` CLI in coreutils.
+        ret = ccall(:mkfifo, Cint, (Cstring, Base.Cmode_t), path, mode)
+        systemerror("mkfifo", ret == -1)
+        return path
+    else
+        # Using normal `error` because `systemerror("mkfifo", ENOTSUP)` does not
+        # seem to work on Windows.
+        error("mkfifo: Operation not supported")
+    end
+end
+
 struct Cpasswd
    username::Cstring
    uid::Culong
@@ -435,6 +493,26 @@ struct Group
     mem::Vector{String}
 end
 
+# Gets password-file entry for default user, or a subset thereof
+# (e.g., uid and guid are set to -1 on Windows)
+function getpw()
+    ref_pd = Ref(Cpasswd())
+    ret = ccall(:uv_os_get_passwd, Cint, (Ref{Cpasswd},), ref_pd)
+    Base.uv_error("getpw", ret)
+
+    pd = ref_pd[]
+    pd = Passwd(
+        pd.username == C_NULL ? "" : unsafe_string(pd.username),
+        pd.uid,
+        pd.gid,
+        pd.shell == C_NULL ? "" : unsafe_string(pd.shell),
+        pd.homedir == C_NULL ? "" : unsafe_string(pd.homedir),
+        pd.gecos == C_NULL ? "" : unsafe_string(pd.gecos),
+    )
+    ccall(:uv_os_free_passwd, Cvoid, (Ref{Cpasswd},), ref_pd)
+    return pd
+end
+
 function getpwuid(uid::Unsigned, throw_error::Bool=true)
     ref_pd = Ref(Cpasswd())
     ret = ccall(:uv_os_get_passwd2, Cint, (Ref{Cpasswd}, Culong), ref_pd, uid)
@@ -454,6 +532,7 @@ function getpwuid(uid::Unsigned, throw_error::Bool=true)
     ccall(:uv_os_free_passwd, Cvoid, (Ref{Cpasswd},), ref_pd)
     return pd
 end
+
 function getgrgid(gid::Unsigned, throw_error::Bool=true)
     ref_gp = Ref(Cgroup())
     ret = ccall(:uv_os_get_group, Cint, (Ref{Cgroup}, Culong), ref_gp, gid)
@@ -484,6 +563,5 @@ geteuid() = ccall(:jl_geteuid, Culong, ())
 
 # Include dlopen()/dlpath() code
 include("libdl.jl")
-using .Libdl
 
 end # module
diff --git a/base/libdl.jl b/base/libdl.jl
index 4f29260bb24f8..09f4ad4ea2159 100644
--- a/base/libdl.jl
+++ b/base/libdl.jl
@@ -9,7 +9,7 @@ import Base.DL_LOAD_PATH
 
 export DL_LOAD_PATH, RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL,
     RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW, dlclose, dlopen, dlopen_e, dlsym, dlsym_e,
-    dlpath, find_library, dlext, dllist
+    dlpath, find_library, dlext, dllist, LazyLibrary, LazyLibraryPath, BundledLazyLibraryPath
 
 """
     DL_LOAD_PATH
@@ -45,6 +45,9 @@ applicable.
 """
 (RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL, RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW)
 
+# The default flags for `dlopen()`
+const default_rtld_flags = RTLD_LAZY | RTLD_DEEPBIND
+
 """
     dlsym(handle, sym; throw_error::Bool = true)
 
@@ -72,8 +75,8 @@ end
 Look up a symbol from a shared library handle, silently return `C_NULL` on lookup failure.
 This method is now deprecated in favor of `dlsym(handle, sym; throw_error=false)`.
 """
-function dlsym_e(hnd::Ptr, s::Union{Symbol,AbstractString})
-    return something(dlsym(hnd, s; throw_error=false), C_NULL)
+function dlsym_e(args...)
+    return something(dlsym(args...; throw_error=false), C_NULL)
 end
 
 """
@@ -110,10 +113,10 @@ If the library cannot be found, this method throws an error, unless the keyword
 """
 function dlopen end
 
-dlopen(s::Symbol, flags::Integer = RTLD_LAZY | RTLD_DEEPBIND; kwargs...) =
+dlopen(s::Symbol, flags::Integer = default_rtld_flags; kwargs...) =
     dlopen(string(s), flags; kwargs...)
 
-function dlopen(s::AbstractString, flags::Integer = RTLD_LAZY | RTLD_DEEPBIND; throw_error::Bool = true)
+function dlopen(s::AbstractString, flags::Integer = default_rtld_flags; throw_error::Bool = true)
     ret = ccall(:jl_load_dynamic_library, Ptr{Cvoid}, (Cstring,UInt32,Cint), s, flags, Cint(throw_error))
     if ret == C_NULL
         return nothing
@@ -138,10 +141,10 @@ vendor = dlopen("libblas") do lib
 end
 ```
 """
-function dlopen(f::Function, args...; kwargs...)
+function dlopen(f::Function, name, args...; kwargs...)
     hdl = nothing
     try
-        hdl = dlopen(args...; kwargs...)
+        hdl = dlopen(name, args...; kwargs...)
         f(hdl)
     finally
         dlclose(hdl)
@@ -185,7 +188,7 @@ function dlclose(p::Nothing)
 end
 
 """
-    find_library(names, locations)
+    find_library(names [, locations])
 
 Searches for the first library in `names` in the paths in the `locations` list,
 `DL_LOAD_PATH`, or system library paths (in that order) which can successfully be dlopen'd.
@@ -314,4 +317,135 @@ function dllist()
     return dynamic_libraries
 end
 
+
+"""
+    LazyLibraryPath
+
+Helper type for lazily constructed library paths for use with `LazyLibrary`.
+Arguments are passed to `joinpath()`.  Arguments must be able to have
+`string()` called on them.
+
+```
+libfoo = LazyLibrary(LazyLibraryPath(prefix, "lib/libfoo.so.1.2.3"))
+```
+"""
+struct LazyLibraryPath
+    pieces::Vector
+    LazyLibraryPath(pieces::Vector) = new(pieces)
+end
+LazyLibraryPath(args...) = LazyLibraryPath(collect(args))
+Base.string(llp::LazyLibraryPath) = joinpath(string.(llp.pieces)...)
+Base.cconvert(::Type{Cstring}, llp::LazyLibraryPath) = Base.cconvert(Cstring, string(llp))
+# Define `print` so that we can wrap this in a `LazyString`
+Base.print(io::IO, llp::LazyLibraryPath) = print(io, string(llp))
+
+# Helper to get `Sys.BINDIR` at runtime
+struct SysBindirGetter; end
+Base.string(::SysBindirGetter) = dirname(Sys.BINDIR)
+
+"""
+    BundledLazyLibraryPath
+
+Helper type for lazily constructed library paths that are stored within the
+bundled Julia distribution, primarily for use by Base modules.
+
+```
+libfoo = LazyLibrary(BundledLazyLibraryPath("lib/libfoo.so.1.2.3"))
+```
+"""
+BundledLazyLibraryPath(subpath) = LazyLibraryPath(SysBindirGetter(), subpath)
+
+
+"""
+    LazyLibrary(name, flags = <default dlopen flags>,
+                dependencies = LazyLibrary[], on_load_callback = nothing)
+
+Represents a lazily-loaded library that opens itself and its dependencies on first usage
+in a `dlopen()`, `dlsym()`, or `ccall()` usage.  While this structure contains the
+ability to run arbitrary code on first load via `on_load_callback`, we caution that this
+should be used sparingly, as it is not expected that `ccall()` should result in large
+amounts of Julia code being run.  You may call `ccall()` from within the
+`on_load_callback` but only for the current library and its dependencies, and user should
+not call `wait()` on any tasks within the on load callback.
+"""
+mutable struct LazyLibrary
+    # Name and flags to open with
+    const path
+    const flags::UInt32
+
+    # Dependencies that must be loaded before we can load
+    dependencies::Vector{LazyLibrary}
+
+    # Function that get called once upon initial load
+    on_load_callback
+    const lock::Base.ReentrantLock
+
+    # Pointer that we eventually fill out upon first `dlopen()`
+    @atomic handle::Ptr{Cvoid}
+    function LazyLibrary(path; flags = default_rtld_flags, dependencies = LazyLibrary[],
+                         on_load_callback = nothing)
+        return new(
+            path,
+            UInt32(flags),
+            collect(dependencies),
+            on_load_callback,
+            Base.ReentrantLock(),
+            C_NULL,
+        )
+    end
+end
+
+# We support adding dependencies only because of very special situations
+# such as LBT needing to have OpenBLAS_jll added as a dependency dynamically.
+function add_dependency!(ll::LazyLibrary, dep::LazyLibrary)
+    @lock ll.lock begin
+        push!(ll.dependencies, dep)
+    end
+end
+
+# Register `jl_libdl_dlopen_func` so that `ccall()` lowering knows
+# how to call `dlopen()`, during bootstrap.
+# See  `post_image_load_hooks` for non-bootstrapping.
+Base.unsafe_store!(cglobal(:jl_libdl_dlopen_func, Any), dlopen)
+
+function dlopen(ll::LazyLibrary, flags::Integer = ll.flags; kwargs...)
+    handle = @atomic :acquire ll.handle
+    if handle == C_NULL
+        @lock ll.lock begin
+            # Check to see if another thread has already run this
+            if ll.handle == C_NULL
+                # Ensure that all dependencies are loaded
+                for dep in ll.dependencies
+                    dlopen(dep; kwargs...)
+                end
+
+                # Load our library
+                handle = dlopen(string(ll.path), flags; kwargs...)
+                @atomic :release ll.handle = handle
+
+                # Only the thread that loaded the library calls the `on_load_callback()`.
+                if ll.on_load_callback !== nothing
+                    ll.on_load_callback()
+                end
+            end
+        end
+    else
+        # Invoke our on load callback, if it exists
+        if ll.on_load_callback !== nothing
+            # This empty lock protects against the case where we have updated
+            # `ll.handle` in the branch above, but not exited the lock.  We want
+            # a second thread that comes in at just the wrong time to have to wait
+            # for that lock to be released (and thus for the on_load_callback to
+            # have finished), hence the empty lock here. But we want the
+            # on_load_callback thread to bypass this, which will be happen thanks
+            # to the fact that we're using a reentrant lock here.
+            @lock ll.lock begin end
+        end
+    end
+
+    return handle
+end
+dlopen(x::Any) = throw(TypeError(:dlopen, "", Union{Symbol,String,LazyLibrary}, x))
+dlsym(ll::LazyLibrary, args...; kwargs...) = dlsym(dlopen(ll), args...; kwargs...)
+dlpath(ll::LazyLibrary) = dlpath(dlopen(ll))
 end # module Libdl
diff --git a/base/libuv.jl b/base/libuv.jl
index ea3d64072378f..4c56af29e7e60 100644
--- a/base/libuv.jl
+++ b/base/libuv.jl
@@ -2,7 +2,7 @@
 
 # Core definitions for interacting with the libuv library from Julia
 
-include("../uv_constants.jl")
+include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "uv_constants.jl"))  # include($BUILDROOT/base/uv_constants.jl)
 
 # convert UV handle data to julia object, checking for null
 function uv_sizeof_handle(handle)
@@ -103,6 +103,18 @@ uv_error(prefix::AbstractString, c::Integer) = c < 0 ? throw(_UVError(prefix, c)
 
 eventloop() = ccall(:jl_global_event_loop, Ptr{Cvoid}, ())
 
+function uv_unref(h::Ptr{Cvoid})
+    iolock_begin()
+    ccall(:uv_unref, Cvoid, (Ptr{Cvoid},), h)
+    iolock_end()
+end
+
+function uv_ref(h::Ptr{Cvoid})
+    iolock_begin()
+    ccall(:uv_ref, Cvoid, (Ptr{Cvoid},), h)
+    iolock_end()
+end
+
 function process_events()
     return ccall(:jl_process_events, Int32, ())
 end
diff --git a/base/linking.jl b/base/linking.jl
new file mode 100644
index 0000000000000..2d68ea730c0fb
--- /dev/null
+++ b/base/linking.jl
@@ -0,0 +1,169 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+module Linking
+
+import Base.Libc: Libdl
+
+# inlined LLD_jll
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+const PATH_list = String[]
+const LIBPATH_list = String[]
+const lld_path = Ref{String}()
+const lld_exe = Sys.iswindows() ? "lld.exe" : "lld"
+const dsymutil_path = Ref{String}()
+const dsymutil_exe = Sys.iswindows() ? "dsymutil.exe" : "dsymutil"
+
+if Sys.iswindows()
+    const LIBPATH_env = "PATH"
+    const LIBPATH_default = ""
+    const pathsep = ';'
+elseif Sys.isapple()
+    const LIBPATH_env = "DYLD_FALLBACK_LIBRARY_PATH"
+    const LIBPATH_default = "~/lib:/usr/local/lib:/lib:/usr/lib"
+    const pathsep = ':'
+else
+    const LIBPATH_env = "LD_LIBRARY_PATH"
+    const LIBPATH_default = ""
+    const pathsep = ':'
+end
+
+function adjust_ENV!(env::Dict, PATH::String, LIBPATH::String, adjust_PATH::Bool, adjust_LIBPATH::Bool)
+    if adjust_LIBPATH
+        LIBPATH_base = get(env, LIBPATH_env, expanduser(LIBPATH_default))
+        if !isempty(LIBPATH_base)
+            env[LIBPATH_env] = string(LIBPATH, pathsep, LIBPATH_base)
+        else
+            env[LIBPATH_env] = LIBPATH
+        end
+    end
+    if adjust_PATH && (LIBPATH_env != "PATH" || !adjust_LIBPATH)
+        if !isempty(get(env, "PATH", ""))
+            env["PATH"] = string(PATH, pathsep, env["PATH"])
+        else
+            env["PATH"] = PATH
+        end
+    end
+    return env
+end
+
+function __init_lld_path()
+    # Prefer our own bundled lld, but if we don't have one, pick it up off of the PATH
+    # If this is an in-tree build, `lld` will live in `tools`.  Otherwise, it'll be in `private_libexecdir`
+    for bundled_lld_path in (joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, lld_exe),
+                             joinpath(Sys.BINDIR, "..", "tools", lld_exe),
+                             joinpath(Sys.BINDIR, lld_exe))
+        if isfile(bundled_lld_path)
+            lld_path[] = abspath(bundled_lld_path)
+            return
+        end
+    end
+    lld_path[] = something(Sys.which(lld_exe), lld_exe)
+    return
+end
+
+function __init_dsymutil_path()
+    #Same as with lld but for dsymutil
+    for bundled_dsymutil_path in (joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, dsymutil_exe),
+                             joinpath(Sys.BINDIR, "..", "tools", dsymutil_exe),
+                             joinpath(Sys.BINDIR, dsymutil_exe))
+        if isfile(bundled_dsymutil_path)
+            dsymutil_path[] = abspath(bundled_dsymutil_path)
+            return
+        end
+    end
+    dsymutil_path[] = something(Sys.which(dsymutil_exe), dsymutil_exe)
+    return
+end
+
+const VERBOSE = Ref{Bool}(false)
+
+function __init__()
+    VERBOSE[] = Base.get_bool_env("JULIA_VERBOSE_LINKING", false)
+
+    __init_lld_path()
+    __init_dsymutil_path()
+    PATH[] = dirname(lld_path[])
+    if Sys.iswindows()
+        # On windows, the dynamic libraries (.dll) are in Sys.BINDIR ("usr\\bin")
+        append!(LIBPATH_list, [abspath(Sys.BINDIR, Base.LIBDIR, "julia"), Sys.BINDIR])
+    else
+        append!(LIBPATH_list, [abspath(Sys.BINDIR, Base.LIBDIR, "julia"), abspath(Sys.BINDIR, Base.LIBDIR)])
+    end
+    LIBPATH[] = join(LIBPATH_list, pathsep)
+    return
+end
+
+function lld(; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true)
+    env = adjust_ENV!(copy(ENV), PATH[], LIBPATH[], adjust_PATH, adjust_LIBPATH)
+    return Cmd(Cmd([lld_path[]]); env)
+end
+
+function dsymutil(; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true)
+    env = adjust_ENV!(copy(ENV), PATH[], LIBPATH[], adjust_PATH, adjust_LIBPATH)
+    return Cmd(Cmd([dsymutil_path[]]); env)
+end
+
+function ld()
+    default_args = ``
+    @static if Sys.iswindows()
+        # LLD supports mingw style linking
+        flavor = "gnu"
+        m = Sys.ARCH == :x86_64 ? "i386pep" : "i386pe"
+        default_args = `-m $m -Bdynamic --enable-auto-image-base --allow-multiple-definition`
+    elseif Sys.isapple()
+        flavor = "darwin"
+        arch = Sys.ARCH == :aarch64 ? :arm64 : Sys.ARCH
+        default_args = `-arch $arch -undefined dynamic_lookup -platform_version macos $(Base.MACOS_PRODUCT_VERSION) $(Base.MACOS_PLATFORM_VERSION)`
+    else
+        flavor = "gnu"
+    end
+
+    `$(lld()) -flavor $flavor $default_args`
+end
+
+const WHOLE_ARCHIVE = if Sys.isapple()
+    "-all_load"
+else
+    "--whole-archive"
+end
+
+const NO_WHOLE_ARCHIVE = if Sys.isapple()
+    ""
+else
+    "--no-whole-archive"
+end
+
+const SHARED = if Sys.isapple()
+    "-dylib"
+else
+    "-shared"
+end
+
+is_debug() = ccall(:jl_is_debugbuild, Cint, ()) == 1
+libdir() = abspath(Sys.BINDIR, Base.LIBDIR)
+private_libdir() = abspath(Sys.BINDIR, Base.PRIVATE_LIBDIR)
+if Sys.iswindows()
+    shlibdir() = Sys.BINDIR
+else
+    shlibdir() = libdir()
+end
+
+function link_image_cmd(path, out)
+    PRIVATE_LIBDIR = "-L$(private_libdir())"
+    SHLIBDIR = "-L$(shlibdir())"
+    LIBS = is_debug() ? ("-ljulia-debug", "-ljulia-internal-debug") :
+                        ("-ljulia", "-ljulia-internal")
+    @static if Sys.iswindows()
+        LIBS = (LIBS..., "-lopenlibm", "-lssp", "-lgcc_s", "-lgcc", "-lmsvcrt")
+    end
+
+    V = VERBOSE[] ? "--verbose" : ""
+    `$(ld()) $V $SHARED -o $out $WHOLE_ARCHIVE $path $NO_WHOLE_ARCHIVE $PRIVATE_LIBDIR $SHLIBDIR $LIBS`
+end
+
+function link_image(path, out, internal_stderr::IO=stderr, internal_stdout::IO=stdout)
+    run(link_image_cmd(path, out), Base.DevNull(), internal_stderr, internal_stdout)
+end
+
+end # module Linking
diff --git a/base/loading.jl b/base/loading.jl
index 187f1bee4eea0..4e7ca940d15a1 100644
--- a/base/loading.jl
+++ b/base/loading.jl
@@ -42,7 +42,7 @@ elseif Sys.isapple()
     #    char filename[max_filename_length];
     # };
     # Buffer buf;
-    # getattrlist(path, &attr_list, &buf, sizeof(buf), FSOPT_NOFOLLOW);
+    # getattrpath(path, &attr_list, &buf, sizeof(buf), FSOPT_NOFOLLOW);
     function isfile_casesensitive(path)
         isaccessiblefile(path) || return false
         path_basename = String(basename(path))
@@ -167,7 +167,8 @@ function dummy_uuid(project_file::String)
     end
     project_path = try
         realpath(project_file)
-    catch
+    catch ex
+        ex isa IOError || rethrow()
         project_file
     end
     uuid = uuid5(ns_dummy_uuid, project_path)
@@ -231,12 +232,10 @@ end
 
 function get_updated_dict(p::TOML.Parser, f::CachedTOMLDict)
     s = stat(f.path)
-    time_since_cached = time() - f.mtime
-    rough_mtime_granularity = 0.1 # seconds
-    # In case the file is being updated faster than the mtime granularity,
-    # and have the same size after the update we might miss that it changed. Therefore
-    # always check the hash in case we recently created the cache.
-    if time_since_cached < rough_mtime_granularity || s.inode != f.inode || s.mtime != f.mtime || f.size != s.size
+    # note, this might miss very rapid in-place updates, such that mtime is
+    # identical but that is solvable by not doing in-place updates, and not
+    # rapidly changing these files
+    if s.inode != f.inode || s.mtime != f.mtime || f.size != s.size
         content = read(f.path)
         new_hash = _crc32c(content)
         if new_hash != f.hash
@@ -257,9 +256,12 @@ struct LoadingCache
     env_project_file::Dict{String, Union{Bool, String}}
     project_file_manifest_path::Dict{String, Union{Nothing, String}}
     require_parsed::Set{String}
+    identified_where::Dict{Tuple{PkgId, String}, Union{Nothing, Tuple{PkgId, Union{Nothing, String}}}}
+    identified::Dict{String, Union{Nothing, Tuple{PkgId, Union{Nothing, String}}}}
+    located::Dict{Tuple{PkgId, Union{String, Nothing}}, Union{Tuple{Union{String, Nothing}, Union{String, Nothing}}, Nothing}}
 end
 const LOADING_CACHE = Ref{Union{LoadingCache, Nothing}}(nothing)
-LoadingCache() = LoadingCache(load_path(), Dict(), Dict(), Dict(), Set())
+LoadingCache() = LoadingCache(load_path(), Dict(), Dict(), Dict(), Set(), Dict(), Dict(), Dict())
 
 
 struct TOMLCache
@@ -297,11 +299,68 @@ end
 
 # Used by Pkg but not used in loading itself
 function find_package(arg)
-    pkg = identify_package(arg)
-    pkg === nothing && return nothing
-    return locate_package(pkg)
+    pkgenv = identify_package_env(arg)
+    pkgenv === nothing && return nothing
+    pkg, env = pkgenv
+    return locate_package(pkg, env)
+end
+
+"""
+    Base.identify_package_env(name::String)::Union{Tuple{PkgId, String}, Nothing}
+    Base.identify_package_env(where::Union{Module,PkgId}, name::String)::Union{Tuple{PkgId, String} Nothing}
+
+Same as [`Base.identify_package`](@ref) except that the path to the environment where the package is identified
+is also returned.
+"""
+identify_package_env(where::Module, name::String) = identify_package_env(PkgId(where), name)
+function identify_package_env(where::PkgId, name::String)
+    cache = LOADING_CACHE[]
+    if cache !== nothing
+        pkg_env = get(cache.identified_where, (where, name), nothing)
+        pkg_env === nothing || return pkg_env
+    end
+    pkg_env = nothing
+    if where.name === name
+        pkg_env = where, nothing
+    elseif where.uuid === nothing
+        pkg_env = identify_package_env(name) # ignore `where`
+    else
+        for env in load_path()
+            pkgid = manifest_deps_get(env, where, name)
+            pkgid === nothing && continue # not found--keep looking
+            if pkgid.uuid !== nothing
+                pkg_env = pkgid, env # found in explicit environment--use it
+            end
+            break # found in implicit environment--return "not found"
+        end
+    end
+    if cache !== nothing
+        cache.identified_where[(where, name)] = pkg_env
+    end
+    return pkg_env
+end
+function identify_package_env(name::String)
+    cache = LOADING_CACHE[]
+    if cache !== nothing
+        pkg_env = get(cache.identified, name, nothing)
+        pkg_env === nothing || return pkg_env
+    end
+    pkg_env = nothing
+    for env in load_path()
+        pkg = project_deps_get(env, name)
+        if pkg !== nothing
+            pkg_env = pkg, env # found--return it
+            break
+        end
+    end
+    if cache !== nothing
+        cache.identified[name] = pkg_env
+    end
+    return pkg_env
 end
 
+_nothing_or_first(x) = x === nothing ? nothing : first(x)
+
 """
     Base.identify_package(name::String)::Union{PkgId, Nothing}
     Base.identify_package(where::Union{Module,PkgId}, name::String)::Union{PkgId, Nothing}
@@ -312,11 +371,11 @@ its `PkgId`, or `nothing` if it cannot be found.
 If only the `name` argument is provided, it searches each environment in the
 stack and its named direct dependencies.
 
-There `where` argument provides the context from where to search for the
+The `where` argument provides the context from where to search for the
 package: in this case it first checks if the name matches the context itself,
 otherwise it searches all recursive dependencies (from the resolved manifest of
 each environment) until it locates the context `where`, and from there
-identifies the depdencency with with the corresponding name.
+identifies the dependency with the corresponding name.
 
 ```julia-repl
 julia> Base.identify_package("Pkg") # Pkg is a dependency of the default environment
@@ -325,27 +384,69 @@ Pkg [44cfe95a-1eb2-52ea-b672-e2afdf69b78f]
 julia> using LinearAlgebra
 
 julia> Base.identify_package(LinearAlgebra, "Pkg") # Pkg is not a dependency of LinearAlgebra
-
-````
+```
 """
-identify_package(where::Module, name::String) = identify_package(PkgId(where), name)
-function identify_package(where::PkgId, name::String)::Union{Nothing,PkgId}
-    where.name === name && return where
-    where.uuid === nothing && return identify_package(name) # ignore `where`
-    for env in load_path()
-        pkgid = manifest_deps_get(env, where, name)
-        pkgid === nothing && continue # not found--keep looking
-        pkgid.uuid === nothing || return pkgid # found in explicit environment--use it
-        return nothing # found in implicit environment--return "not found"
+identify_package(where::Module, name::String) = _nothing_or_first(identify_package_env(where, name))
+identify_package(where::PkgId, name::String)  = _nothing_or_first(identify_package_env(where, name))
+identify_package(name::String)                = _nothing_or_first(identify_package_env(name))
+
+function locate_package_env(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)
+    cache = LOADING_CACHE[]
+    if cache !== nothing
+        pathenv = get(cache.located, (pkg, stopenv), nothing)
+        pathenv === nothing || return pathenv
     end
-    return nothing
-end
-function identify_package(name::String)::Union{Nothing,PkgId}
-    for env in load_path()
-        uuid = project_deps_get(env, name)
-        uuid === nothing || return uuid # found--return it
+    path = nothing
+    env′ = nothing
+    if pkg.uuid === nothing
+        for env in load_path()
+            env′ = env
+            # look for the toplevel pkg `pkg.name` in this entry
+            found = project_deps_get(env, pkg.name)
+            if found !== nothing
+                @assert found.name == pkg.name
+                if found.uuid === nothing
+                    # pkg.name is present in this directory or project file,
+                    # return the path the entry point for the code, if it could be found
+                    # otherwise, signal failure
+                    path = implicit_manifest_uuid_path(env, pkg)
+                    @goto done
+                end
+            end
+            if !(loading_extension || precompiling_extension)
+                stopenv == env && @goto done
+            end
+        end
+    else
+        for env in load_path()
+            env′ = env
+            path = manifest_uuid_path(env, pkg)
+            # missing is used as a sentinel to stop looking further down in envs
+            if path === missing
+                path = nothing
+                @goto done
+            end
+            if path !== nothing
+                path = entry_path(path, pkg.name)
+                @goto done
+            end
+            if !(loading_extension || precompiling_extension)
+                stopenv == env && break
+            end
+        end
+        # Allow loading of stdlibs if the name/uuid are given
+        # e.g. if they have been explicitly added to the project/manifest
+        mbypath = manifest_uuid_path(Sys.STDLIB, pkg)
+        if mbypath isa String
+            path = entry_path(mbypath, pkg.name)
+            @goto done
+        end
     end
-    return nothing
+    @label done
+    if cache !== nothing
+        cache.located[(pkg, stopenv)] = path, env′
+    end
+    return path, env′
 end
 
 """
@@ -362,32 +463,8 @@ julia> Base.locate_package(pkg)
 "/path/to/julia/stdlib/v$(VERSION.major).$(VERSION.minor)/Pkg/src/Pkg.jl"
 ```
 """
-function locate_package(pkg::PkgId)::Union{Nothing,String}
-    if pkg.uuid === nothing
-        for env in load_path()
-            # look for the toplevel pkg `pkg.name` in this entry
-            found = project_deps_get(env, pkg.name)
-            found === nothing && continue
-            if pkg == found
-                # pkg.name is present in this directory or project file,
-                # return the path the entry point for the code, if it could be found
-                # otherwise, signal failure
-                return implicit_manifest_uuid_path(env, pkg)
-            end
-            @assert found.uuid !== nothing
-            return locate_package(found) # restart search now that we know the uuid for pkg
-        end
-    else
-        for env in load_path()
-            path = manifest_uuid_path(env, pkg)
-            path === nothing || return entry_path(path, pkg.name)
-        end
-        # Allow loading of stdlibs if the name/uuid are given
-        # e.g. if they have been explicitly added to the project/manifest
-        path = manifest_uuid_path(Sys.STDLIB, pkg)
-        path === nothing || return entry_path(path, pkg.name)
-    end
-    return nothing
+function locate_package(pkg::PkgId, stopenv::Union{String, Nothing}=nothing)::Union{Nothing,String}
+    _nothing_or_first(locate_package_env(pkg, stopenv))
 end
 
 """
@@ -398,6 +475,8 @@ or `nothing` if `m` was not imported from a package.
 
 Use [`dirname`](@ref) to get the directory part and [`basename`](@ref)
 to get the file name part of the path.
+
+See also [`pkgdir`](@ref).
 """
 function pathof(m::Module)
     @lock require_lock begin
@@ -414,12 +493,12 @@ end
 """
     pkgdir(m::Module[, paths::String...])
 
-Return the root directory of the package that imported module `m`,
-or `nothing` if `m` was not imported from a package. Optionally further
+Return the root directory of the package that declared module `m`,
+or `nothing` if `m` was not declared in a package. Optionally further
 path component strings can be provided to construct a path within the
 package root.
 
-To get the root directory of the package that imported the current module
+To get the root directory of the package that implements the current module
 the form `pkgdir(@__MODULE__)` can be used.
 
 ```julia-repl
@@ -430,6 +509,8 @@ julia> pkgdir(Foo, "src", "file.jl")
 "/path/to/Foo.jl/src/file.jl"
 ```
 
+See also [`pathof`](@ref).
+
 !!! compat "Julia 1.7"
     The optional argument `paths` requires at least Julia 1.7.
 """
@@ -440,6 +521,18 @@ function pkgdir(m::Module, paths::String...)
     return joinpath(dirname(dirname(path)), paths...)
 end
 
+function get_pkgversion_from_path(path)
+    project_file = locate_project_file(path)
+    if project_file isa String
+        d = parsed_toml(project_file)
+        v = get(d, "version", nothing)
+        if v !== nothing
+            return VersionNumber(v::String)
+        end
+    end
+    return nothing
+end
+
 """
     pkgversion(m::Module)
 
@@ -457,16 +550,28 @@ the form `pkgversion(@__MODULE__)` can be used.
     This function was introduced in Julia 1.9.
 """
 function pkgversion(m::Module)
-    rootmodule = moduleroot(m)
-    pkg = PkgId(rootmodule)
-    pkgorigin = get(pkgorigins, pkg, nothing)
-    return pkgorigin === nothing ? nothing : pkgorigin.version
+    path = pkgdir(m)
+    path === nothing && return nothing
+    @lock require_lock begin
+        v = get_pkgversion_from_path(path)
+        pkgorigin = get(pkgorigins, PkgId(moduleroot(m)), nothing)
+        # Cache the version
+        if pkgorigin !== nothing && pkgorigin.version === nothing
+            pkgorigin.version = v
+        end
+        return v
+    end
 end
 
 ## generic project & manifest API ##
 
 const project_names = ("JuliaProject.toml", "Project.toml")
-const manifest_names = ("JuliaManifest.toml", "Manifest.toml")
+const manifest_names = (
+    "JuliaManifest-v$(VERSION.major).$(VERSION.minor).toml",
+    "Manifest-v$(VERSION.major).$(VERSION.minor).toml",
+    "JuliaManifest.toml",
+    "Manifest.toml",
+)
 const preferences_names = ("JuliaLocalPreferences.toml", "LocalPreferences.toml")
 
 function locate_project_file(env::String)
@@ -480,7 +585,7 @@ function locate_project_file(env::String)
 end
 
 # classify the LOAD_PATH entry to be one of:
-#  - `false`: nonexistant / nothing to see here
+#  - `false`: nonexistent / nothing to see here
 #  - `true`: `env` is an implicit environment
 #  - `path`: the path of an explicit project file
 function env_project_file(env::String)::Union{Bool,String}
@@ -527,8 +632,26 @@ function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothi
             pkg_uuid = explicit_project_deps_get(project_file, name)
             return PkgId(pkg_uuid, name)
         end
+        d = parsed_toml(project_file)
+        exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing}
+        if exts !== nothing
+            # Check if `where` is an extension of the project
+            if where.name in keys(exts) && where.uuid == uuid5(proj.uuid::UUID, where.name)
+                # Extensions can load weak deps...
+                weakdeps = get(d, "weakdeps", nothing)::Union{Dict{String, Any}, Nothing}
+                if weakdeps !== nothing
+                    wuuid = get(weakdeps, name, nothing)::Union{String, Nothing}
+                    if wuuid !== nothing
+                        return PkgId(UUID(wuuid), name)
+                    end
+                end
+                # ... and they can load same deps as the project itself
+                mby_uuid = explicit_project_deps_get(project_file, name)
+                mby_uuid === nothing || return PkgId(mby_uuid, name)
+            end
+        end
         # look for manifest file and `where` stanza
-        return explicit_manifest_deps_get(project_file, uuid, name)
+        return explicit_manifest_deps_get(project_file, where, name)
     elseif project_file
         # if env names a directory, search it
         return implicit_manifest_deps_get(env, where, name)
@@ -536,19 +659,53 @@ function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothi
     return nothing
 end
 
-function manifest_uuid_path(env::String, pkg::PkgId)::Union{Nothing,String}
+function manifest_uuid_path(env::String, pkg::PkgId)::Union{Nothing,String,Missing}
     project_file = env_project_file(env)
     if project_file isa String
         proj = project_file_name_uuid(project_file, pkg.name)
         if proj == pkg
             # if `pkg` matches the project, return the project itself
-            return project_file_path(project_file, pkg.name)
+            return project_file_path(project_file)
         end
+        mby_ext = project_file_ext_path(project_file, pkg.name)
+        mby_ext === nothing || return mby_ext
         # look for manifest file and `where` stanza
         return explicit_manifest_uuid_path(project_file, pkg)
     elseif project_file
         # if env names a directory, search it
-        return implicit_manifest_uuid_path(env, pkg)
+        proj = implicit_manifest_uuid_path(env, pkg)
+        proj === nothing || return proj
+        # if not found
+        parentid = get(EXT_PRIMED, pkg, nothing)
+        if parentid !== nothing
+            _, parent_project_file = entry_point_and_project_file(env, parentid.name)
+            if parent_project_file !== nothing
+                parentproj = project_file_name_uuid(parent_project_file, parentid.name)
+                if parentproj == parentid
+                    mby_ext = project_file_ext_path(parent_project_file, pkg.name)
+                    mby_ext === nothing || return mby_ext
+                end
+            end
+        end
+    end
+    return nothing
+end
+
+
+function find_ext_path(project_path::String, extname::String)
+    extfiledir = joinpath(project_path, "ext", extname, extname * ".jl")
+    isfile(extfiledir) && return extfiledir
+    return joinpath(project_path, "ext", extname * ".jl")
+end
+
+function project_file_ext_path(project_file::String, name::String)
+    d = parsed_toml(project_file)
+    p = project_file_path(project_file)
+    exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing}
+    if exts !== nothing
+        if name in keys(exts)
+            return find_ext_path(p, name)
+        end
     end
     return nothing
 end
@@ -562,7 +719,7 @@ function project_file_name_uuid(project_file::String, name::String)::PkgId
     return PkgId(uuid, name)
 end
 
-function project_file_path(project_file::String, name::String)
+function project_file_path(project_file::String)
     d = parsed_toml(project_file)
     joinpath(dirname(project_file), get(d, "path", "")::String)
 end
@@ -655,10 +812,10 @@ function explicit_project_deps_get(project_file::String, name::String)::Union{No
     return nothing
 end
 
-function is_v1_format_manifest(raw_manifest::Dict)
+function is_v1_format_manifest(raw_manifest::Dict{String})
     if haskey(raw_manifest, "manifest_format")
         mf = raw_manifest["manifest_format"]
-        if mf isa Dict && haskey(mf, "uuid")
+        if mf isa Dict{String} && haskey(mf, "uuid")
             # the off-chance where an old format manifest has a dep called "manifest_format"
             return true
         end
@@ -680,7 +837,7 @@ end
 
 # find `where` stanza and return the PkgId for `name`
 # return `nothing` if it did not find `where` (indicating caller should continue searching)
-function explicit_manifest_deps_get(project_file::String, where::UUID, name::String)::Union{Nothing,PkgId}
+function explicit_manifest_deps_get(project_file::String, where::PkgId, name::String)::Union{Nothing,PkgId}
     manifest_file = project_file_manifest_path(project_file)
     manifest_file === nothing && return nothing # manifest not found--keep searching LOAD_PATH
     d = get_deps(parsed_toml(manifest_file))
@@ -692,16 +849,15 @@ function explicit_manifest_deps_get(project_file::String, where::UUID, name::Str
             entry = entry::Dict{String, Any}
             uuid = get(entry, "uuid", nothing)::Union{String, Nothing}
             uuid === nothing && continue
-            if UUID(uuid) === where
+            if UUID(uuid) === where.uuid
                 found_where = true
                 # deps is either a list of names (deps = ["DepA", "DepB"]) or
                 # a table of entries (deps = {"DepA" = "6ea...", "DepB" = "55d..."}
                 deps = get(entry, "deps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing}
-                deps === nothing && continue
                 if deps isa Vector{String}
                     found_name = name in deps
                     break
-                else
+                elseif deps isa Dict{String, Any}
                     deps = deps::Dict{String, Any}
                     for (dep, uuid) in deps
                         uuid::String
@@ -710,6 +866,36 @@ function explicit_manifest_deps_get(project_file::String, where::UUID, name::Str
                         end
                     end
                 end
+            else # Check for extensions
+                extensions = get(entry, "extensions", nothing)
+                if extensions !== nothing
+                    if haskey(extensions, where.name) && where.uuid == uuid5(UUID(uuid), where.name)
+                        found_where = true
+                        if name == dep_name
+                            return PkgId(UUID(uuid), name)
+                        end
+                        exts = extensions[where.name]::Union{String, Vector{String}}
+                        if (exts isa String && name == exts) || (exts isa Vector{String} && name in exts)
+                            weakdeps = get(entry, "weakdeps", nothing)::Union{Vector{String}, Dict{String, Any}, Nothing}
+                            if weakdeps !== nothing
+                                if weakdeps isa Vector{String}
+                                    found_name = name in weakdeps
+                                    break
+                                elseif weakdeps isa Dict{String, Any}
+                                    weakdeps = weakdeps::Dict{String, Any}
+                                    for (dep, uuid) in weakdeps
+                                        uuid::String
+                                        if dep === name
+                                            return PkgId(UUID(uuid), name)
+                                        end
+                                    end
+                                end
+                            end
+                        end
+                        # `name` is not an ext, do standard lookup as if this was the parent
+                        return identify_package(PkgId(UUID(uuid), dep_name), name)
+                    end
+                end
             end
         end
     end
@@ -727,19 +913,36 @@ function explicit_manifest_deps_get(project_file::String, where::UUID, name::Str
 end
 
 # find `uuid` stanza, return the corresponding path
-function explicit_manifest_uuid_path(project_file::String, pkg::PkgId)::Union{Nothing,String}
+function explicit_manifest_uuid_path(project_file::String, pkg::PkgId)::Union{Nothing,String,Missing}
     manifest_file = project_file_manifest_path(project_file)
     manifest_file === nothing && return nothing # no manifest, skip env
 
     d = get_deps(parsed_toml(manifest_file))
     entries = get(d, pkg.name, nothing)::Union{Nothing, Vector{Any}}
-    entries === nothing && return nothing # TODO: allow name to mismatch?
-    for entry in entries
-        entry = entry::Dict{String, Any}
-        uuid = get(entry, "uuid", nothing)::Union{Nothing, String}
-        uuid === nothing && continue
-        if UUID(uuid) === pkg.uuid
-            return explicit_manifest_entry_path(manifest_file, pkg, entry)
+    if entries !== nothing
+        for entry in entries
+            entry = entry::Dict{String, Any}
+            uuid = get(entry, "uuid", nothing)::Union{Nothing, String}
+            uuid === nothing && continue
+            if UUID(uuid) === pkg.uuid
+                return explicit_manifest_entry_path(manifest_file, pkg, entry)
+            end
+        end
+    end
+    # Extensions
+    for (name, entries) in d
+        entries = entries::Vector{Any}
+        for entry in entries
+            uuid = get(entry, "uuid", nothing)::Union{Nothing, String}
+            extensions = get(entry, "extensions", nothing)::Union{Nothing, Dict{String, Any}}
+            if extensions !== nothing && haskey(extensions, pkg.name) && uuid !== nothing && uuid5(UUID(uuid), pkg.name) == pkg.uuid
+                parent_path = locate_package(PkgId(UUID(uuid), name))
+                if parent_path === nothing
+                    error("failed to find source of parent package: \"$name\"")
+                end
+                p = normpath(dirname(parent_path), "..")
+                return find_ext_path(p, pkg.name)
+            end
         end
     end
     return nothing
@@ -762,7 +965,8 @@ function explicit_manifest_entry_path(manifest_file::String, pkg::PkgId, entry::
             ispath(path) && return abspath(path)
         end
     end
-    return nothing
+    # no depot contains the package, return missing to stop looking
+    return missing
 end
 
 ## implicit project & manifest API ##
@@ -810,15 +1014,18 @@ end
 
 function find_source_file(path::AbstractString)
     (isabspath(path) || isfile(path)) && return path
-    base_path = joinpath(Sys.BINDIR, DATAROOTDIR, "julia", "src", "base", path)
+    base_path = joinpath(Sys.BINDIR, DATAROOTDIR, "julia", "base", path)
     return isfile(base_path) ? normpath(base_path) : nothing
 end
 
-cache_file_entry(pkg::PkgId) = joinpath(
-    "compiled",
-    "v$(VERSION.major).$(VERSION.minor)",
-    pkg.uuid === nothing ? ""       : pkg.name),
-    pkg.uuid === nothing ? pkg.name : package_slug(pkg.uuid)
+function cache_file_entry(pkg::PkgId)
+    uuid = pkg.uuid
+    return joinpath(
+        "compiled",
+        "v$(VERSION.major).$(VERSION.minor)",
+        uuid === nothing ? ""       : pkg.name),
+        uuid === nothing ? pkg.name : package_slug(uuid)
+end
 
 function find_all_in_cache_path(pkg::PkgId)
     paths = String[]
@@ -827,7 +1034,8 @@ function find_all_in_cache_path(pkg::PkgId)
         isdir(path) || continue
         for file in readdir(path, sort = false) # no sort given we sort later
             if !((pkg.uuid === nothing && file == entryfile * ".ji") ||
-                 (pkg.uuid !== nothing && startswith(file, entryfile * "_")))
+                 (pkg.uuid !== nothing && startswith(file, entryfile * "_") &&
+                  endswith(file, ".ji")))
                  continue
             end
             filepath = joinpath(path, file)
@@ -835,24 +1043,121 @@ function find_all_in_cache_path(pkg::PkgId)
         end
     end
     if length(paths) > 1
-        # allocating the sort vector is less expensive than using sort!(.. by=mtime), which would
-        # call the relatively slow mtime multiple times per path
-        p = sortperm(mtime.(paths), rev = true)
+        function sort_by(path)
+            # when using pkgimages, consider those cache files first
+            pkgimage = if JLOptions().use_pkgimages != 0
+                io = open(path, "r")
+                try
+                    if iszero(isvalid_cache_header(io))
+                        false
+                    else
+                        _, _, _, _, _, _, _, flags = parse_cache_header(io, path)
+                        CacheFlags(flags).use_pkgimages
+                    end
+                finally
+                    close(io)
+                end
+            else
+                false
+            end
+            (; pkgimage, mtime=mtime(path))
+        end
+        function sort_lt(a, b)
+            if a.pkgimage != b.pkgimage
+                return a.pkgimage < b.pkgimage
+            end
+            return a.mtime < b.mtime
+        end
+
+        # allocating the sort vector is less expensive than using sort!(.. by=sort_by),
+        # which would call the relatively slow mtime multiple times per path
+        p = sortperm(sort_by.(paths), lt=sort_lt, rev=true)
         return paths[p]
     else
         return paths
     end
 end
 
+ocachefile_from_cachefile(cachefile) = string(chopsuffix(cachefile, ".ji"), ".", Libc.Libdl.dlext)
+cachefile_from_ocachefile(cachefile) = string(chopsuffix(cachefile, ".$(Libc.Libdl.dlext)"), ".ji")
+
+
+# use an Int counter so that nested @time_imports calls all remain open
+const TIMING_IMPORTS = Threads.Atomic{Int}(0)
+
 # these return either the array of modules loaded from the path / content given
 # or an Exception that describes why it couldn't be loaded
 # and it reconnects the Base.Docs.META
-function _include_from_serialized(pkg::PkgId, path::String, depmods::Vector{Any})
-    sv = ccall(:jl_restore_incremental, Any, (Cstring, Any), path, depmods)
+function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any}, ignore_native::Union{Nothing,Bool}=nothing)
+    if isnothing(ignore_native)
+        if JLOptions().code_coverage == 0 && JLOptions().malloc_log == 0
+            ignore_native = false
+        else
+            io = open(path, "r")
+            try
+                iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.")
+                _, (includes, _, _), _, _, _, _, _, _ = parse_cache_header(io, path)
+                ignore_native = pkg_tracked(includes)
+            finally
+                close(io)
+            end
+        end
+    end
+    assert_havelock(require_lock)
+    timing_imports = TIMING_IMPORTS[] > 0
+    try
+    if timing_imports
+        t_before = time_ns()
+        cumulative_compile_timing(true)
+        t_comp_before = cumulative_compile_time_ns()
+    end
+
+    if ocachepath !== nothing
+        @debug "Loading object cache file $ocachepath for $pkg"
+        sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint, Cstring, Cint), ocachepath, depmods, false, pkg.name, ignore_native)
+    else
+        @debug "Loading cache file $path for $pkg"
+        sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint, Cstring), path, depmods, false, pkg.name)
+    end
     if isa(sv, Exception)
         return sv
     end
-    sv = sv::SimpleVector
+
+    restored = register_restored_modules(sv, pkg, path)
+
+    for M in restored
+        M = M::Module
+        if parentmodule(M) === M && PkgId(M) == pkg
+            if timing_imports
+                elapsed = round((time_ns() - t_before) / 1e6, digits = 1)
+                comp_time, recomp_time = cumulative_compile_time_ns() .- t_comp_before
+                print(lpad(elapsed, 9), " ms  ")
+                parentid = get(EXT_PRIMED, pkg, nothing)
+                if parentid !== nothing
+                    print(parentid.name, " → ")
+                end
+                print(pkg.name)
+                if comp_time > 0
+                    printstyled(" ", Ryu.writefixed(Float64(100 * comp_time / (elapsed * 1e6)), 2), "% compilation time", color = Base.info_color())
+                end
+                if recomp_time > 0
+                    perc = Float64(100 * recomp_time / comp_time)
+                    printstyled(" (", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% recompilation)", color = Base.warn_color())
+                end
+                println()
+            end
+            return M
+        end
+    end
+    return ErrorException("Required dependency $pkg failed to load from a cache file.")
+
+    finally
+        timing_imports && cumulative_compile_timing(false)
+    end
+end
+
+function register_restored_modules(sv::SimpleVector, pkg::PkgId, path::String)
+    # This function is also used by PkgCacheInspector.jl
     restored = sv[1]::Vector{Any}
     for M in restored
         M = M::Module
@@ -872,7 +1177,9 @@ function _include_from_serialized(pkg::PkgId, path::String, depmods::Vector{Any}
     if !isempty(inits)
         unlock(require_lock) # temporarily _unlock_ during these callbacks
         try
-            ccall(:jl_init_restored_modules, Cvoid, (Any,), inits)
+            for (i, mod) in pairs(inits)
+                run_module_init(mod, i)
+            end
         finally
             lock(require_lock)
         end
@@ -880,7 +1187,41 @@ function _include_from_serialized(pkg::PkgId, path::String, depmods::Vector{Any}
     return restored
 end
 
+function run_module_init(mod::Module, i::Int=1)
+    # `i` informs ordering for the `@time_imports` report formatting
+    if TIMING_IMPORTS[] == 0
+        ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
+    else
+        if isdefined(mod, :__init__)
+            connector = i > 1 ? "├" : "┌"
+            printstyled("               $connector ", color = :light_black)
+
+            elapsedtime = time_ns()
+            cumulative_compile_timing(true)
+            compile_elapsedtimes = cumulative_compile_time_ns()
+
+            ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
+
+            elapsedtime = (time_ns() - elapsedtime) / 1e6
+            cumulative_compile_timing(false);
+            comp_time, recomp_time = (cumulative_compile_time_ns() .- compile_elapsedtimes) ./ 1e6
+
+            print(round(elapsedtime, digits=1), " ms $mod.__init__() ")
+            if comp_time > 0
+                printstyled(Ryu.writefixed(Float64(100 * comp_time / elapsedtime), 2), "% compilation time", color = Base.info_color())
+            end
+            if recomp_time > 0
+                perc = Float64(100 * recomp_time / comp_time)
+                printstyled(" (", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% recompilation)", color = Base.warn_color())
+            end
+            println()
+        end
+    end
+end
+
 function run_package_callbacks(modkey::PkgId)
+    run_extension_callbacks(modkey)
+    assert_havelock(require_lock)
     unlock(require_lock)
     try
         for callback in package_callbacks
@@ -896,40 +1237,380 @@ function run_package_callbacks(modkey::PkgId)
     nothing
 end
 
-function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64, modpath::Union{Nothing, String}, depth::Int = 0)
+
+##############
+# Extensions #
+##############
+
+mutable struct ExtensionId
+    const id::PkgId
+    const parentid::PkgId # just need the name, for printing
+    ntriggers::Int # how many more packages must be defined until this is loaded
+end
+
+const EXT_PRIMED = Dict{PkgId, PkgId}() # Extension -> Parent
+const EXT_DORMITORY = Dict{PkgId,Vector{ExtensionId}}() # Trigger -> Extensions that can be triggered by it
+const EXT_DORMITORY_FAILED = ExtensionId[]
+
+function insert_extension_triggers(pkg::PkgId)
+    pkg.uuid === nothing && return
+    path_env_loc = locate_package_env(pkg)
+    path_env_loc === nothing && return
+    path, env_loc = path_env_loc
+    if path === nothing || env_loc === nothing
+        return
+    end
+    insert_extension_triggers(env_loc, pkg)
+end
+
+function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missing}
+    project_file = env_project_file(env)
+    if project_file isa String || project_file
+        implicit_project_file = project_file
+        if !(implicit_project_file isa String)
+            # if env names a directory, search it for an implicit project file (for stdlibs)
+            path, implicit_project_file = entry_point_and_project_file(env, pkg.name)
+            if !(implicit_project_file isa String)
+                return nothing
+            end
+        end
+        # Look in project for extensions to insert
+        proj_pkg = project_file_name_uuid(implicit_project_file, pkg.name)
+        if pkg == proj_pkg
+            d_proj = parsed_toml(implicit_project_file)
+            weakdeps = get(d_proj, "weakdeps", nothing)::Union{Nothing, Vector{String}, Dict{String,Any}}
+            extensions = get(d_proj, "extensions", nothing)::Union{Nothing, Dict{String, Any}}
+            extensions === nothing && return
+            weakdeps === nothing && return
+            if weakdeps isa Dict{String, Any}
+                return _insert_extension_triggers(pkg, extensions, weakdeps)
+            end
+        end
+
+        # Now look in manifest
+        project_file isa String || return nothing
+        manifest_file = project_file_manifest_path(project_file)
+        manifest_file === nothing && return
+        d = get_deps(parsed_toml(manifest_file))
+        for (dep_name, entries) in d
+            entries::Vector{Any}
+            for entry in entries
+                entry = entry::Dict{String, Any}
+                uuid = get(entry, "uuid", nothing)::Union{String, Nothing}
+                uuid === nothing && continue
+                if UUID(uuid) == pkg.uuid
+                    weakdeps = get(entry, "weakdeps", nothing)::Union{Nothing, Vector{String}, Dict{String,Any}}
+                    extensions = get(entry, "extensions", nothing)::Union{Nothing, Dict{String, Any}}
+                    extensions === nothing && return
+                    weakdeps === nothing && return
+                    if weakdeps isa Dict{String, Any}
+                        return _insert_extension_triggers(pkg, extensions, weakdeps)
+                    end
+
+                    d_weakdeps = Dict{String, Any}()
+                    for (dep_name, entries) in d
+                        dep_name in weakdeps || continue
+                        entries::Vector{Any}
+                        if length(entries) != 1
+                            error("expected a single entry for $(repr(dep_name)) in $(repr(project_file))")
+                        end
+                        entry = first(entries)::Dict{String, Any}
+                        uuid = entry["uuid"]::String
+                        d_weakdeps[dep_name] = uuid
+                    end
+                    @assert length(d_weakdeps) == length(weakdeps)
+                    return _insert_extension_triggers(pkg, extensions, d_weakdeps)
+                end
+            end
+        end
+    end
+    return nothing
+end
+
+function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}, weakdeps::Dict{String, Any})
+    for (ext, triggers) in extensions
+        triggers = triggers::Union{String, Vector{String}}
+        triggers isa String && (triggers = [triggers])
+        id = PkgId(uuid5(parent.uuid::UUID, ext), ext)
+        if id in keys(EXT_PRIMED) || haskey(Base.loaded_modules, id)
+            continue  # extension is already primed or loaded, don't add it again
+        end
+        EXT_PRIMED[id] = parent
+        gid = ExtensionId(id, parent, 1 + length(triggers))
+        trigger1 = get!(Vector{ExtensionId}, EXT_DORMITORY, parent)
+        push!(trigger1, gid)
+        for trigger in triggers
+            # TODO: Better error message if this lookup fails?
+            uuid_trigger = UUID(weakdeps[trigger]::String)
+            trigger_id = PkgId(uuid_trigger, trigger)
+            if !haskey(Base.loaded_modules, trigger_id) || haskey(package_locks, trigger_id)
+                trigger1 = get!(Vector{ExtensionId}, EXT_DORMITORY, trigger_id)
+                push!(trigger1, gid)
+            else
+                gid.ntriggers -= 1
+            end
+        end
+    end
+end
+
+loading_extension::Bool = false
+precompiling_extension::Bool = false
+function run_extension_callbacks(extid::ExtensionId)
+    assert_havelock(require_lock)
+    succeeded = try
+        # Used by Distributed to now load extensions in the package callback
+        global loading_extension = true
+        _require_prelocked(extid.id)
+        @debug "Extension $(extid.id.name) of $(extid.parentid.name) loaded"
+        true
+    catch
+        # Try to continue loading if loading an extension errors
+        errs = current_exceptions()
+        @error "Error during loading of extension $(extid.id.name) of $(extid.parentid.name), \
+                use `Base.retry_load_extensions()` to retry." exception=errs
+        false
+    finally
+        global loading_extension = false
+    end
+    return succeeded
+end
+
+function run_extension_callbacks(pkgid::PkgId)
+    assert_havelock(require_lock)
+    # take ownership of extids that depend on this pkgid
+    extids = pop!(EXT_DORMITORY, pkgid, nothing)
+    extids === nothing && return
+    for extid in extids
+        if extid.ntriggers > 0
+            # indicate pkgid is loaded
+            extid.ntriggers -= 1
+        end
+        if extid.ntriggers < 0
+            # indicate pkgid is loaded
+            extid.ntriggers += 1
+            succeeded = false
+        else
+            succeeded = true
+        end
+        if extid.ntriggers == 0
+            # actually load extid, now that all dependencies are met,
+            # and record the result
+            succeeded = succeeded && run_extension_callbacks(extid)
+            succeeded || push!(EXT_DORMITORY_FAILED, extid)
+        end
+    end
+    return
+end
+
+"""
+    retry_load_extensions()
+
+Loads all the (not yet loaded) extensions that have their extension-dependencies loaded.
+This is used in cases where the automatic loading of an extension failed
+due to some problem with the extension. Instead of restarting the Julia session,
+the extension can be fixed, and this function run.
+"""
+function retry_load_extensions()
+    @lock require_lock begin
+    # this copy is desired since run_extension_callbacks will release this lock
+    # so this can still mutate the list to drop successful ones
+    failed = copy(EXT_DORMITORY_FAILED)
+    empty!(EXT_DORMITORY_FAILED)
+    filter!(failed) do extid
+        return !run_extension_callbacks(extid)
+    end
+    prepend!(EXT_DORMITORY_FAILED, failed)
+    end
+    return
+end
+
+"""
+    get_extension(parent::Module, extension::Symbol)
+
+Return the module for `extension` of `parent` or return `nothing` if the extension is not loaded.
+"""
+get_extension(parent::Module, ext::Symbol) = get_extension(PkgId(parent), ext)
+function get_extension(parentid::PkgId, ext::Symbol)
+    parentid.uuid === nothing && return nothing
+    extid = PkgId(uuid5(parentid.uuid, string(ext)), string(ext))
+    return get(loaded_modules, extid, nothing)
+end
+
+# End extensions
+
+# should sync with the types of arguments of `stale_cachefile`
+const StaleCacheKey = Tuple{Base.PkgId, UInt128, String, String}
+
+"""
+    Base.isprecompiled(pkg::PkgId; ignore_loaded::Bool=false)
+
+Returns whether a given PkgId within the active project is precompiled.
+
+By default this check observes the same approach that code loading takes
+with respect to when different versions of dependencies are currently loaded
+to that which is expected. To ignore loaded modules and answer as if in a
+fresh julia session specify `ignore_loaded=true`.
+
+!!! compat "Julia 1.10"
+    This function requires at least Julia 1.10.
+"""
+function isprecompiled(pkg::PkgId;
+        ignore_loaded::Bool=false,
+        stale_cache::Dict{StaleCacheKey,Bool}=Dict{StaleCacheKey, Bool}(),
+        cachepaths::Vector{String}=Base.find_all_in_cache_path(pkg),
+        sourcepath::Union{String,Nothing}=Base.locate_package(pkg)
+    )
+    isnothing(sourcepath) && error("Cannot locate source for $(repr(pkg))")
+    for path_to_try in cachepaths
+        staledeps = stale_cachefile(sourcepath, path_to_try, ignore_loaded = true)
+        if staledeps === true
+            continue
+        end
+        staledeps, _ = staledeps::Tuple{Vector{Any}, Union{Nothing, String}}
+        # finish checking staledeps module graph
+        for i in 1:length(staledeps)
+            dep = staledeps[i]
+            dep isa Module && continue
+            modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
+            modpaths = find_all_in_cache_path(modkey)
+            for modpath_to_try in modpaths::Vector{String}
+                stale_cache_key = (modkey, modbuild_id, modpath, modpath_to_try)::StaleCacheKey
+                if get!(() -> stale_cachefile(stale_cache_key...; ignore_loaded) === true,
+                        stale_cache, stale_cache_key)
+                    continue
+                end
+                @goto check_next_dep
+            end
+            @goto check_next_path
+            @label check_next_dep
+        end
+        try
+            # update timestamp of precompilation file so that it is the first to be tried by code loading
+            touch(path_to_try)
+        catch ex
+            # file might be read-only and then we fail to update timestamp, which is fine
+            ex isa IOError || rethrow()
+        end
+        return true
+        @label check_next_path
+    end
+    return false
+end
+
+# loads a precompile cache file, after checking stale_cachefile tests
+function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128)
+    assert_havelock(require_lock)
+    loaded = nothing
     if root_module_exists(modkey)
-        M = root_module(modkey)
-        if PkgId(M) == modkey && module_build_id(M) === build_id
-            return M
+        loaded = root_module(modkey)
+    else
+        loaded = start_loading(modkey)
+        if loaded === nothing
+            try
+                modpath = locate_package(modkey)
+                modpath === nothing && return nothing
+                set_pkgorigin_version_path(modkey, String(modpath))
+                loaded = _require_search_from_serialized(modkey, String(modpath), build_id)
+            finally
+                end_loading(modkey, loaded)
+            end
+            if loaded isa Module
+                insert_extension_triggers(modkey)
+                run_package_callbacks(modkey)
+            end
         end
+    end
+    if !(loaded isa Module) || PkgId(loaded) != modkey
+        return ErrorException("Required dependency $modkey failed to load from a cache file.")
+    end
+    return loaded
+end
+
+# loads a precompile cache file, ignoring stale_cachefile tests
+# assuming all depmods are already loaded and everything is valid
+function _tryrequire_from_serialized(modkey::PkgId, path::String, ocachepath::Union{Nothing, String}, sourcepath::String, depmods::Vector{Any})
+    assert_havelock(require_lock)
+    loaded = nothing
+    if root_module_exists(modkey)
+        loaded = root_module(modkey)
     else
-        if modpath === nothing
-            modpath = locate_package(modkey)
-            modpath === nothing && return nothing
-        end
-        mod = _require_search_from_serialized(modkey, String(modpath), depth)
-        get!(PkgOrigin, pkgorigins, modkey).path = modpath
-        if !isa(mod, Bool)
-            run_package_callbacks(modkey)
-            for M in mod::Vector{Any}
-                M = M::Module
-                if PkgId(M) == modkey && module_build_id(M) === build_id
-                    return M
+        loaded = start_loading(modkey)
+        if loaded === nothing
+            try
+                for i in 1:length(depmods)
+                    dep = depmods[i]
+                    dep isa Module && continue
+                    _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128}
+                    @assert root_module_exists(depkey)
+                    dep = root_module(depkey)
+                    depmods[i] = dep
+                end
+                set_pkgorigin_version_path(modkey, sourcepath)
+                loaded = _include_from_serialized(modkey, path, ocachepath, depmods)
+            finally
+                end_loading(modkey, loaded)
+            end
+            if loaded isa Module
+                insert_extension_triggers(modkey)
+                run_package_callbacks(modkey)
+            end
+        end
+    end
+    if !(loaded isa Module) || PkgId(loaded) != modkey
+        return ErrorException("Required dependency $modkey failed to load from a cache file.")
+    end
+    return loaded
+end
+
+# returns whether the package is tracked in coverage or malloc tracking based on
+# JLOptions and includes
+function pkg_tracked(includes)
+    if JLOptions().code_coverage == 0 && JLOptions().malloc_log == 0
+        return false
+    elseif JLOptions().code_coverage == 1 || JLOptions().malloc_log == 1 # user
+        # Just say true. Pkgimages aren't in Base
+        return true
+    elseif JLOptions().code_coverage == 2 || JLOptions().malloc_log == 2 # all
+        return true
+    elseif JLOptions().code_coverage == 3 || JLOptions().malloc_log == 3 # tracked path
+        if JLOptions().tracked_path == C_NULL
+            return false
+        else
+            tracked_path = unsafe_string(JLOptions().tracked_path)
+            if isempty(tracked_path)
+                return false
+            else
+                return any(includes) do inc
+                    startswith(inc.filename, tracked_path)
                 end
             end
         end
     end
-    return nothing
 end
 
-function _require_from_serialized(pkg::PkgId, path::String)
-    # loads a precompile cache file, ignoring stale_cachfile tests
-    # load all of the dependent modules first
+# loads a precompile cache file, ignoring stale_cachefile tests
+# load the best available (non-stale) version of all dependent modules first
+function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String})
+    assert_havelock(require_lock)
     local depmodnames
     io = open(path, "r")
+    ignore_native = false
     try
-        isvalid_cache_header(io) || return ArgumentError("Invalid header in cache file $path.")
-        depmodnames = parse_cache_header(io)[3]
+        iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.")
+        _, (includes, _, _), depmodnames, _, _, _, clone_targets, _ = parse_cache_header(io, path)
+
+        ignore_native = pkg_tracked(includes)
+
+        pkgimage = !isempty(clone_targets)
+        if pkgimage
+            ocachepath !== nothing || return ArgumentError("Expected ocachepath to be provided")
+            isfile(ocachepath) || return ArgumentError("Ocachepath $ocachepath is not a file.")
+            ocachepath == ocachefile_from_cachefile(path) || return ArgumentError("$ocachepath is not the expected ocachefile")
+            # TODO: Check for valid clone_targets?
+            isvalid_pkgimage_crc(io, ocachepath) || return ArgumentError("Invalid checksum in cache file $ocachepath.")
+        else
+            @assert ocachepath === nothing
+        end
         isvalid_file_crc(io) || return ArgumentError("Invalid checksum in cache file $path.")
     finally
         close(io)
@@ -938,86 +1619,132 @@ function _require_from_serialized(pkg::PkgId, path::String)
     depmods = Vector{Any}(undef, ndeps)
     for i in 1:ndeps
         modkey, build_id = depmodnames[i]
-        dep = _tryrequire_from_serialized(modkey, build_id, nothing)
-        dep === nothing && return ErrorException("Required dependency $modkey failed to load from a cache file.")
-        depmods[i] = dep::Module
+        dep = _tryrequire_from_serialized(modkey, build_id)
+        if !isa(dep, Module)
+            return dep
+        end
+        depmods[i] = dep
     end
     # then load the file
-    return _include_from_serialized(pkg, path, depmods)
+    return _include_from_serialized(pkg, path, ocachepath, depmods, ignore_native)
 end
 
-# use an Int counter so that nested @time_imports calls all remain open
-const TIMING_IMPORTS = Threads.Atomic{Int}(0)
-
-# returns `true` if require found a precompile cache for this sourcepath, but couldn't load it
-# returns `false` if the module isn't known to be precompilable
+# returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it
 # returns the set of modules restored if the cache load succeeded
-@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, depth::Int = 0)
-    timing_imports = TIMING_IMPORTS[] > 0
-    try
-    if timing_imports
-        t_before = time_ns()
-        cumulative_compile_timing(true)
-        t_comp_before = cumulative_compile_time_ns()
-    end
+@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128)
+    assert_havelock(require_lock)
     paths = find_all_in_cache_path(pkg)
     for path_to_try in paths::Vector{String}
-        staledeps = stale_cachefile(sourcepath, path_to_try)
+        staledeps = stale_cachefile(pkg, build_id, sourcepath, path_to_try)
         if staledeps === true
             continue
         end
-        staledeps = staledeps::Vector{Any}
+        staledeps, ocachefile = staledeps::Tuple{Vector{Any}, Union{Nothing, String}}
+        # finish checking staledeps module graph
+        for i in 1:length(staledeps)
+            dep = staledeps[i]
+            dep isa Module && continue
+            modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
+            modpaths = find_all_in_cache_path(modkey)
+            for modpath_to_try in modpaths
+                modstaledeps = stale_cachefile(modkey, modbuild_id, modpath, modpath_to_try)
+                if modstaledeps === true
+                    continue
+                end
+                modstaledeps, modocachepath = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}}
+                staledeps[i] = (modpath, modkey, modpath_to_try, modstaledeps, modocachepath)
+                @goto check_next_dep
+            end
+            @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache."
+            @goto check_next_path
+            @label check_next_dep
+        end
         try
             touch(path_to_try) # update timestamp of precompilation file
-        catch # file might be read-only and then we fail to update timestamp, which is fine
+        catch ex # file might be read-only and then we fail to update timestamp, which is fine
+            ex isa IOError || rethrow()
         end
         # finish loading module graph into staledeps
         for i in 1:length(staledeps)
             dep = staledeps[i]
             dep isa Module && continue
-            modpath, modkey, build_id = dep::Tuple{String, PkgId, UInt64}
-            dep = _tryrequire_from_serialized(modkey, build_id, modpath, depth + 1)
-            if dep === nothing
-                @debug "Required dependency $modkey failed to load from cache file for $modpath."
-                staledeps = true
-                break
+            modpath, modkey, modcachepath, modstaledeps, modocachepath = dep::Tuple{String, PkgId, String, Vector{Any}, Union{Nothing, String}}
+            dep = _tryrequire_from_serialized(modkey, modcachepath, modocachepath, modpath, modstaledeps)
+            if !isa(dep, Module)
+                @debug "Rejecting cache file $path_to_try because required dependency $modkey failed to load from cache file for $modcachepath." exception=dep
+                @goto check_next_path
+            end
+            staledeps[i] = dep
+        end
+        restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps)
+        isa(restored, Module) && return restored
+        @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
+        continue
+        @label check_next_path
+    end
+    return nothing
+end
+
+# to synchronize multiple tasks trying to import/using something
+const package_locks = Dict{PkgId,Pair{Task,Threads.Condition}}()
+
+debug_loading_deadlocks::Bool = true # Enable a slightly more expensive, but more complete algorithm that can handle simultaneous tasks.
+                               # This only triggers if you have multiple tasks trying to load the same package at the same time,
+                               # so it is unlikely to make a difference normally.
+function start_loading(modkey::PkgId)
+    # handle recursive calls to require
+    assert_havelock(require_lock)
+    loading = get(package_locks, modkey, nothing)
+    if loading !== nothing
+        # load already in progress for this module on the task
+        task, cond = loading
+        deps = String[modkey.name]
+        pkgid = modkey
+        assert_havelock(cond.lock)
+        if debug_loading_deadlocks && current_task() !== task
+            waiters = Dict{Task,Pair{Task,PkgId}}() # invert to track waiting tasks => loading tasks
+            for each in package_locks
+                cond2 = each[2][2]
+                assert_havelock(cond2.lock)
+                for waiting in cond2.waitq
+                    push!(waiters, waiting => (each[2][1] => each[1]))
+                end
+            end
+            while true
+                running = get(waiters, task, nothing)
+                running === nothing && break
+                task, pkgid = running
+                push!(deps, pkgid.name)
+                task === current_task() && break
             end
-            staledeps[i] = dep::Module
-        end
-        if staledeps === true
-            continue
         end
-        restored = _include_from_serialized(pkg, path_to_try, staledeps)
-        if isa(restored, Exception)
-            @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
-        else
-            if timing_imports
-                elapsed = round((time_ns() - t_before) / 1e6, digits = 1)
-                comp_time, recomp_time = cumulative_compile_time_ns() .- t_comp_before
-                tree_prefix = depth == 0 ? "" : "  "^(depth-1)*"┌ "
-                print(lpad(elapsed, 9), " ms  ")
-                printstyled(tree_prefix, color = :light_black)
-                print(pkg.name)
-                if comp_time > 0
-                    printstyled(" ", Ryu.writefixed(Float64(100 * comp_time / (elapsed * 1e6)), 2), "% compilation time", color = Base.info_color())
-                end
-                if recomp_time > 0
-                    perc = Float64(100 * recomp_time / comp_time)
-                    printstyled(" (", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% recompilation)", color = Base.warn_color())
+        if current_task() === task
+            others = String[modkey.name] # repeat this to emphasize the cycle here
+            for each in package_locks # list the rest of the packages being loaded too
+                if each[2][1] === task
+                    other = each[1].name
+                    other == modkey.name || other == pkgid.name || push!(others, other)
                 end
-                println()
             end
-            return restored
+            msg = sprint(deps, others) do io, deps, others
+                print(io, "deadlock detected in loading ")
+                join(io, deps, " -> ")
+                print(io, " -> ")
+                join(io, others, " && ")
+            end
+            throw(ConcurrencyViolationError(msg))
         end
+        return wait(cond)
     end
-    return !isempty(paths)
-    finally
-        timing_imports && cumulative_compile_timing(false)
-    end
+    package_locks[modkey] = current_task() => Threads.Condition(require_lock)
+    return
 end
 
-# to synchronize multiple tasks trying to import/using something
-const package_locks = Dict{PkgId,Threads.Condition}()
+function end_loading(modkey::PkgId, @nospecialize loaded)
+    loading = pop!(package_locks, modkey)
+    notify(loading[2], loaded, all=true)
+    nothing
+end
 
 # to notify downstream consumers that a module was successfully loaded
 # Callbacks take the form (mod::Base.PkgId) -> nothing.
@@ -1029,10 +1756,10 @@ const package_callbacks = Any[]
 const include_callbacks = Any[]
 
 # used to optionally track dependencies when requiring a module:
-const _concrete_dependencies = Pair{PkgId,UInt64}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
-const _require_dependencies = Any[] # a list of (mod, path, mtime) tuples that are the file dependencies of the module currently being precompiled
+const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
+const _require_dependencies = Any[] # a list of (mod, abspath, fsize, hash, mtime) tuples that are the file dependencies of the module currently being precompiled
 const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies
-function _include_dependency(mod::Module, _path::AbstractString)
+function _include_dependency(mod::Module, _path::AbstractString; track_content=true)
     prev = source_path(nothing)
     if prev === nothing
         path = abspath(_path)
@@ -1041,7 +1768,15 @@ function _include_dependency(mod::Module, _path::AbstractString)
     end
     if _track_dependencies[]
         @lock require_lock begin
-        push!(_require_dependencies, (mod, path, mtime(path)))
+            if track_content
+                @assert isfile(path) "can only hash files"
+                # use mtime=-1.0 here so that fsize==0 && mtime==0.0 corresponds to a missing include_dependency
+                push!(_require_dependencies,
+                      (mod, path, filesize(path), open(_crc32c, path, "r"), -1.0))
+            else
+                push!(_require_dependencies,
+                      (mod, path, UInt64(0), UInt32(0), mtime(path)))
+            end
         end
     end
     return path, prev
@@ -1050,22 +1785,22 @@ end
 """
     include_dependency(path::AbstractString)
 
-In a module, declare that the file specified by `path` (relative or absolute) is a
-dependency for precompilation; that is, the module will need to be recompiled if this file
-changes.
+In a module, declare that the file, directory, or symbolic link specified by `path`
+(relative or absolute) is a dependency for precompilation; that is, the module will need
+to be recompiled if the modification time of `path` changes.
 
-This is only needed if your module depends on a file that is not used via [`include`](@ref). It has
+This is only needed if your module depends on a path that is not used via [`include`](@ref). It has
 no effect outside of compilation.
 """
 function include_dependency(path::AbstractString)
-    _include_dependency(Main, path)
+    _include_dependency(Main, path, track_content=false)
     return nothing
 end
 
 # we throw PrecompilableError when a module doesn't want to be precompiled
-struct PrecompilableError <: Exception end
+import Core: PrecompilableError
 function show(io::IO, ex::PrecompilableError)
-    print(io, "Declaring __precompile__(false) is not allowed in files that are being precompiled.")
+    print(io, "Error when precompiling module, potentially caused by a __precompile__(false) declaration in the module.")
 end
 precompilableerror(ex::PrecompilableError) = true
 precompilableerror(ex::WrappedException) = precompilableerror(ex.error)
@@ -1080,7 +1815,7 @@ If a module or file is *not* safely precompilable, it should call `__precompile_
 order to throw an error if Julia attempts to precompile it.
 """
 @noinline function __precompile__(isprecompilable::Bool=true)
-    if !isprecompilable && ccall(:jl_generating_output, Cint, ()) != 0
+    if !isprecompilable && generating_output()
         throw(PrecompilableError())
     end
     nothing
@@ -1089,6 +1824,8 @@ end
 # require always works in Main scope and loads files from node 1
 const toplevel_load = Ref(true)
 
+const _require_world_age = Ref{UInt}(typemax(UInt))
+
 """
     require(into::Module, module::Symbol)
 
@@ -1111,12 +1848,20 @@ For more details regarding code loading, see the manual sections on [modules](@r
 [parallel computing](@ref code-availability).
 """
 function require(into::Module, mod::Symbol)
+    if _require_world_age[] != typemax(UInt)
+        Base.invoke_in_world(_require_world_age[], __require, into, mod)
+    else
+        @invokelatest __require(into, mod)
+    end
+end
+
+function __require(into::Module, mod::Symbol)
     @lock require_lock begin
     LOADING_CACHE[] = LoadingCache()
     try
-        uuidkey = identify_package(into, String(mod))
-        # Core.println("require($(PkgId(into)), $mod) -> $uuidkey")
-        if uuidkey === nothing
+        uuidkey_env = identify_package_env(into, String(mod))
+        # Core.println("require($(PkgId(into)), $mod) -> $uuidkey_env")
+        if uuidkey_env === nothing
             where = PkgId(into)
             if where.uuid === nothing
                 hint, dots = begin
@@ -1134,9 +1879,10 @@ function require(into::Module, mod::Symbol)
                     Package $mod not found in current path$hint_message.
                     - $start_sentence `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package."""))
             else
+                manifest_warnings = collect_manifest_warnings()
                 throw(ArgumentError("""
                 Package $(where.name) does not have $mod in its dependencies:
-                - You may have a partially installed environment. Try `Pkg.instantiate()`
+                $manifest_warnings- You may have a partially installed environment. Try `Pkg.instantiate()`
                   to ensure all packages in the environment are installed.
                 - Or, if you have $(where.name) checked out for development and have
                   added $mod as a dependency but haven't updated your primary
@@ -1144,42 +1890,106 @@ function require(into::Module, mod::Symbol)
                 - Otherwise you may need to report an issue with $(where.name)"""))
             end
         end
+        uuidkey, env = uuidkey_env
         if _track_dependencies[]
-            push!(_require_dependencies, (into, binpack(uuidkey), 0.0))
+            path = binpack(uuidkey)
+            push!(_require_dependencies, (into, path, UInt64(0), UInt32(0), 0.0))
         end
-        return _require_prelocked(uuidkey)
+        return _require_prelocked(uuidkey, env)
     finally
         LOADING_CACHE[] = nothing
     end
     end
 end
 
-mutable struct PkgOrigin
-    path::Union{String,Nothing}
-    cachepath::Union{String,Nothing}
-    version::Union{VersionNumber,Nothing}
+function find_unsuitable_manifests_versions()
+    unsuitable_manifests = String[]
+    dev_manifests = String[]
+    for env in load_path()
+        project_file = env_project_file(env)
+        project_file isa String || continue # no project file
+        manifest_file = project_file_manifest_path(project_file)
+        manifest_file isa String || continue # no manifest file
+        m = parsed_toml(manifest_file)
+        man_julia_version = get(m, "julia_version", nothing)
+        man_julia_version isa String || @goto mark
+        man_julia_version = VersionNumber(man_julia_version)
+        thispatch(man_julia_version) != thispatch(VERSION) && @goto mark
+        isempty(man_julia_version.prerelease) != isempty(VERSION.prerelease) && @goto mark
+        isempty(man_julia_version.prerelease) && continue
+        man_julia_version.prerelease[1] != VERSION.prerelease[1] && @goto mark
+        if VERSION.prerelease[1] == "DEV"
+            # manifests don't store the 2nd part of prerelease, so cannot check further
+            # so treat them specially in the warning
+            push!(dev_manifests, manifest_file)
+        end
+        continue
+        @label mark
+        push!(unsuitable_manifests, string(manifest_file, " (v", man_julia_version, ")"))
+    end
+    return unsuitable_manifests, dev_manifests
+end
+
+function collect_manifest_warnings()
+    unsuitable_manifests, dev_manifests = find_unsuitable_manifests_versions()
+    msg = ""
+    if !isempty(unsuitable_manifests)
+        msg *= """
+        - Note that the following manifests in the load path were resolved with a different
+          julia version, which may be the cause of the error:
+            $(join(unsuitable_manifests, "\n    "))
+        """
+    end
+    if !isempty(dev_manifests)
+        msg *= """
+        - Note that the following manifests in the load path were resolved a potentially
+          different DEV version of the current version, which may be the cause
+          of the error:
+            $(join(dev_manifests, "\n    "))
+        """
+    end
+    return msg
 end
-PkgOrigin() = PkgOrigin(nothing, nothing, nothing)
-const pkgorigins = Dict{PkgId,PkgOrigin}()
 
 require(uuidkey::PkgId) = @lock require_lock _require_prelocked(uuidkey)
 
-function _require_prelocked(uuidkey::PkgId)
-    just_loaded_pkg = false
+const REPL_PKGID = PkgId(UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL")
+
+function _require_prelocked(uuidkey::PkgId, env=nothing)
+    if _require_world_age[] != typemax(UInt)
+        Base.invoke_in_world(_require_world_age[], __require_prelocked, uuidkey, env)
+    else
+        @invokelatest __require_prelocked(uuidkey, env)
+    end
+end
+
+function __require_prelocked(uuidkey::PkgId, env=nothing)
+    assert_havelock(require_lock)
     if !root_module_exists(uuidkey)
-        _require(uuidkey)
+        newm = _require(uuidkey, env)
+        if newm === nothing
+            error("package `$(uuidkey.name)` did not define the expected \
+                  module `$(uuidkey.name)`, check for typos in package module name")
+        end
+        insert_extension_triggers(uuidkey)
         # After successfully loading, notify downstream consumers
         run_package_callbacks(uuidkey)
-        just_loaded_pkg = true
-    end
-    if just_loaded_pkg && !root_module_exists(uuidkey)
-        error("package `$(uuidkey.name)` did not define the expected \
-              module `$(uuidkey.name)`, check for typos in package module name")
+    else
+        newm = root_module(uuidkey)
     end
-    return root_module(uuidkey)
+    return newm
+end
+
+mutable struct PkgOrigin
+    path::Union{String,Nothing}
+    cachepath::Union{String,Nothing}
+    version::Union{VersionNumber,Nothing}
 end
+PkgOrigin() = PkgOrigin(nothing, nothing, nothing)
+const pkgorigins = Dict{PkgId,PkgOrigin}()
 
 const loaded_modules = Dict{PkgId,Module}()
+const loaded_modules_order = Vector{Module}()
 const module_keys = IdDict{Module,PkgId}() # the reverse
 
 is_root_module(m::Module) = @lock require_lock haskey(module_keys, m)
@@ -1193,9 +2003,14 @@ root_module_key(m::Module) = @lock require_lock module_keys[m]
     if haskey(loaded_modules, key)
         oldm = loaded_modules[key]
         if oldm !== m
-            @warn "Replacing module `$(key.name)`"
+            if generating_output(#=incremental=#true)
+                error("Replacing module `$(key.name)`")
+            else
+                @warn "Replacing module `$(key.name)`"
+            end
         end
     end
+    push!(loaded_modules_order, m)
     loaded_modules[key] = m
     module_keys[m] = key
     end
@@ -1215,12 +2030,15 @@ end
 
 # get a top-level Module from the given key
 root_module(key::PkgId) = @lock require_lock loaded_modules[key]
-root_module(where::Module, name::Symbol) =
-    root_module(identify_package(where, String(name)))
+function root_module(where::Module, name::Symbol)
+    key = identify_package(where, String(name))
+    key isa PkgId || throw(KeyError(name))
+    return root_module(key)
+end
 maybe_root_module(key::PkgId) = @lock require_lock get(loaded_modules, key, nothing)
 
 root_module_exists(key::PkgId) = @lock require_lock haskey(loaded_modules, key)
-loaded_modules_array() = @lock require_lock collect(values(loaded_modules))
+loaded_modules_array() = @lock require_lock copy(loaded_modules_order)
 
 function unreference_module(key::PkgId)
     if haskey(loaded_modules, key)
@@ -1230,50 +2048,50 @@ function unreference_module(key::PkgId)
     end
 end
 
-function set_pkgorigin_version_path(pkg, path)
+# whoever takes the package_locks[pkg] must call this function immediately
+function set_pkgorigin_version_path(pkg::PkgId, path::Union{String,Nothing})
+    assert_havelock(require_lock)
     pkgorigin = get!(PkgOrigin, pkgorigins, pkg)
     if path !== nothing
-        project_file = locate_project_file(joinpath(dirname(path), ".."))
-        if project_file isa String
-            d = parsed_toml(project_file)
-            v = get(d, "version", nothing)
-            if v !== nothing
-                pkgorigin.version = VersionNumber(v::AbstractString)
-            end
+        # Pkg needs access to the version of packages in the sysimage.
+        if generating_output(#=incremental=#false)
+            pkgorigin.version = get_pkgversion_from_path(joinpath(dirname(path), ".."))
         end
     end
     pkgorigin.path = path
+    nothing
 end
 
-# Returns `nothing` or the name of the newly-created cachefile
-function _require(pkg::PkgId)
-    # handle recursive calls to require
-    loading = get(package_locks, pkg, false)
-    if loading !== false
-        # load already in progress for this module
-        wait(loading)
-        return
-    end
-    package_locks[pkg] = Threads.Condition(require_lock)
+# A hook to allow code load to use Pkg.precompile
+const PKG_PRECOMPILE_HOOK = Ref{Function}()
+
+# Returns `nothing` or the new(ish) module
+function _require(pkg::PkgId, env=nothing)
+    assert_havelock(require_lock)
+    loaded = start_loading(pkg)
+    loaded === nothing || return loaded
 
     last = toplevel_load[]
     try
         toplevel_load[] = false
         # perform the search operation to select the module file require intends to load
-        path = locate_package(pkg)
-        set_pkgorigin_version_path(pkg, path)
+        path = locate_package(pkg, env)
         if path === nothing
             throw(ArgumentError("""
                 Package $pkg is required but does not seem to be installed:
                  - Run `Pkg.instantiate()` to install all recorded dependencies.
                 """))
         end
+        set_pkgorigin_version_path(pkg, path)
+
+        pkg_precompile_attempted = false # being safe to avoid getting stuck in a Pkg.precompile loop
 
         # attempt to load the module file via the precompile cache locations
         if JLOptions().use_compiled_modules != 0
-            m = _require_search_from_serialized(pkg, path)
-            if !isa(m, Bool)
-                return
+            @label load_from_cache
+            m = _require_search_from_serialized(pkg, path, UInt128(0))
+            if m isa Module
+                return m
             end
         end
 
@@ -1281,7 +2099,7 @@ function _require(pkg::PkgId)
         # but it was not handled by the precompile loader, complain
         for (concrete_pkg, concrete_build_id) in _concrete_dependencies
             if pkg == concrete_pkg
-                @warn """Module $(pkg.name) with build ID $concrete_build_id is missing from the cache.
+                @warn """Module $(pkg.name) with build ID $((UUID(concrete_build_id))) is missing from the cache.
                      This may mean $pkg does not support precompilation but is imported by a module that does."""
                 if JLOptions().incremental != 0
                     # during incremental precompilation, this should be fail-fast
@@ -1290,27 +2108,50 @@ function _require(pkg::PkgId)
             end
         end
 
-        if JLOptions().use_compiled_modules != 0
-            if (0 == ccall(:jl_generating_output, Cint, ())) || (JLOptions().incremental != 0)
+        if JLOptions().use_compiled_modules == 1
+            if !generating_output(#=incremental=#false)
+                if !pkg_precompile_attempted && isinteractive() && isassigned(PKG_PRECOMPILE_HOOK)
+                    pkg_precompile_attempted = true
+                    unlock(require_lock)
+                    try
+                        @invokelatest PKG_PRECOMPILE_HOOK[](pkg.name, _from_loading = true)
+                    finally
+                        lock(require_lock)
+                    end
+                    @goto load_from_cache
+                end
                 # spawn off a new incremental pre-compile task for recursive `require` calls
-                # or if the require search declared it was pre-compiled before (and therefore is expected to still be pre-compilable)
-                cachefile = compilecache(pkg, path)
-                if isa(cachefile, Exception)
+                cachefile_or_module = maybe_cachefile_lock(pkg, path) do
+                    # double-check now that we have lock
+                    m = _require_search_from_serialized(pkg, path, UInt128(0))
+                    m isa Module && return m
+                    compilecache(pkg, path)
+                end
+                cachefile_or_module isa Module && return cachefile_or_module::Module
+                cachefile = cachefile_or_module
+                if isnothing(cachefile) # maybe_cachefile_lock returns nothing if it had to wait for another process
+                    @goto load_from_cache # the new cachefile will have the newest mtime so will come first in the search
+                elseif isa(cachefile, Exception)
                     if precompilableerror(cachefile)
                         verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
                         @logmsg verbosity "Skipping precompilation since __precompile__(false). Importing $pkg."
                     else
                         @warn "The call to compilecache failed to create a usable precompiled cache file for $pkg" exception=m
                     end
-                    # fall-through to loading the file locally
+                    # fall-through to loading the file locally if not incremental
                 else
-                    m = _require_from_serialized(pkg, cachefile)
-                    if isa(m, Exception)
+                    cachefile, ocachefile = cachefile::Tuple{String, Union{Nothing, String}}
+                    m = _tryrequire_from_serialized(pkg, cachefile, ocachefile)
+                    if !isa(m, Module)
                         @warn "The call to compilecache failed to create a usable precompiled cache file for $pkg" exception=m
                     else
-                        return
+                        return m
                     end
                 end
+                if JLOptions().incremental != 0
+                    # during incremental precompilation, this should be fail-fast
+                    throw(PrecompilableError())
+                end
             end
         end
 
@@ -1325,7 +2166,7 @@ function _require(pkg::PkgId)
         unlock(require_lock)
         try
             include(__toplevel__, path)
-            return
+            loaded = get(loaded_modules, pkg, nothing)
         finally
             lock(require_lock)
             if uuid !== old_uuid
@@ -1334,12 +2175,26 @@ function _require(pkg::PkgId)
         end
     finally
         toplevel_load[] = last
-        loading = pop!(package_locks, pkg)
-        notify(loading, all=true)
+        end_loading(pkg, loaded)
+    end
+    return loaded
+end
+
+# Only used from test/precompile.jl
+function _require_from_serialized(uuidkey::PkgId, path::String, ocachepath::Union{String, Nothing})
+    @lock require_lock begin
+    set_pkgorigin_version_path(uuidkey, nothing)
+    newm = _tryrequire_from_serialized(uuidkey, path, ocachepath)
+    newm isa Module || throw(newm)
+    insert_extension_triggers(uuidkey)
+    # After successfully loading, notify downstream consumers
+    run_package_callbacks(uuidkey)
+    return newm
     end
-    nothing
 end
 
+
+
 # relative-path load
 
 """
@@ -1445,8 +2300,27 @@ end
 """
     evalfile(path::AbstractString, args::Vector{String}=String[])
 
-Load the file using [`include`](@ref), evaluate all expressions,
-and return the value of the last one.
+Load the file into an anonymous module using [`include`](@ref), evaluate all expressions,
+and return the value of the last expression.
+The optional `args` argument can be used to set the input arguments of the script (i.e. the global `ARGS` variable).
+Note that definitions (e.g. methods, globals) are evaluated in the anonymous module and do not affect the current module.
+
+# Example
+
+```jldoctest
+julia> write("testfile.jl", \"\"\"
+           @show ARGS
+           1 + 1
+       \"\"\");
+
+julia> x = evalfile("testfile.jl", ["ARG1", "ARG2"]);
+ARGS = ["ARG1", "ARG2"]
+
+julia> x
+2
+
+julia> rm("testfile.jl")
+```
 """
 function evalfile(path::AbstractString, args::Vector{String}=String[])
     return Core.eval(Module(:__anon__),
@@ -1478,9 +2352,41 @@ function load_path_setup_code(load_path::Bool=true)
     return code
 end
 
+"""
+    check_src_module_wrap(srcpath::String)
+
+Checks that a package entry file `srcpath` has a module declaration, and that it is before any using/import statements.
+"""
+function check_src_module_wrap(pkg::PkgId, srcpath::String)
+    module_rgx = r"^(|end |\"\"\" )\s*(?:@)*(?:bare)?module\s"
+    load_rgx = r"\b(?:using|import)\s"
+    load_seen = false
+    inside_string = false
+    for s in eachline(srcpath)
+        if count("\"\"\"", s) == 1
+            # ignore module docstrings
+            inside_string = !inside_string
+        end
+        inside_string && continue
+        if contains(s, module_rgx)
+            if load_seen
+                throw(ErrorException("Package $pkg source file $srcpath has a using/import before a module declaration."))
+            end
+            return true
+        end
+        if startswith(s, load_rgx)
+            load_seen = true
+        end
+    end
+    throw(ErrorException("Package $pkg source file $srcpath does not contain a module declaration."))
+end
+
 # this is called in the external process that generates precompiled package files
 function include_package_for_output(pkg::PkgId, input::String, depot_path::Vector{String}, dl_load_path::Vector{String}, load_path::Vector{String},
                                     concrete_deps::typeof(_concrete_dependencies), source::Union{Nothing,String})
+
+    check_src_module_wrap(pkg, input)
+
     append!(empty!(Base.DEPOT_PATH), depot_path)
     append!(empty!(Base.DL_LOAD_PATH), dl_load_path)
     append!(empty!(Base.LOAD_PATH), load_path)
@@ -1496,6 +2402,7 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
         task_local_storage()[:SOURCE_PATH] = source
     end
 
+    ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred)
     Core.Compiler.track_newly_inferred.x = true
     try
         Base.include(Base.__toplevel__, input)
@@ -1506,16 +2413,33 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
     finally
         Core.Compiler.track_newly_inferred.x = false
     end
-    ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred)
 end
 
 const PRECOMPILE_TRACE_COMPILE = Ref{String}()
-function create_expr_cache(pkg::PkgId, input::String, output::String, concrete_deps::typeof(_concrete_dependencies), internal_stderr::IO = stderr, internal_stdout::IO = stdout)
+function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::Union{Nothing, String},
+                           concrete_deps::typeof(_concrete_dependencies), internal_stderr::IO = stderr, internal_stdout::IO = stdout)
     @nospecialize internal_stderr internal_stdout
     rm(output, force=true)   # Remove file if it exists
+    output_o === nothing || rm(output_o, force=true)
     depot_path = map(abspath, DEPOT_PATH)
     dl_load_path = map(abspath, DL_LOAD_PATH)
     load_path = map(abspath, Base.load_path())
+    # if pkg is a stdlib, append its parent Project.toml to the load path
+    parentid = get(EXT_PRIMED, pkg, nothing)
+    if parentid !== nothing
+        for env in load_path
+            project_file = env_project_file(env)
+            if project_file === true
+                _, parent_project_file = entry_point_and_project_file(env, parentid.name)
+                if parent_project_file !== nothing
+                    parentproj = project_file_name_uuid(parent_project_file, parentid.name)
+                    if parentproj == parentid
+                        push!(load_path, parent_project_file)
+                    end
+                end
+            end
+        end
+    end
     path_sep = Sys.iswindows() ? ';' : ':'
     any(path -> path_sep in path, load_path) &&
         error("LOAD_PATH entries cannot contain $(repr(path_sep))")
@@ -1531,18 +2455,34 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, concrete_d
     for (pkg, build_id) in concrete_deps
         push!(deps_strs, "$(pkg_str(pkg)) => $(repr(build_id))")
     end
+
+    if output_o !== nothing
+        @debug "Generating object cache file for $pkg"
+        cpu_target = get(ENV, "JULIA_CPU_TARGET", nothing)
+        opt_level = Base.JLOptions().opt_level
+        opts = `-O$(opt_level) --output-o $(output_o) --output-ji $(output) --output-incremental=yes`
+    else
+        @debug "Generating cache file for $pkg"
+        cpu_target = nothing
+        opts = `-O0 --output-ji $(output) --output-incremental=yes`
+    end
+
     deps_eltype = sprint(show, eltype(concrete_deps); context = :module=>nothing)
     deps = deps_eltype * "[" * join(deps_strs, ",") * "]"
     trace = isassigned(PRECOMPILE_TRACE_COMPILE) ? `--trace-compile=$(PRECOMPILE_TRACE_COMPILE[])` : ``
-    io = open(pipeline(`$(julia_cmd()::Cmd) -O0
-                       --output-ji $output --output-incremental=yes
-                       --startup-file=no --history-file=no --warn-overwrite=yes
-                       --color=$(have_color === nothing ? "auto" : have_color ? "yes" : "no")
-                       $trace
-                       -`, stderr = internal_stderr, stdout = internal_stdout),
+    io = open(pipeline(addenv(`$(julia_cmd(;cpu_target)::Cmd) $(opts)
+                              --startup-file=no --history-file=no --warn-overwrite=yes
+                              --color=$(have_color === nothing ? "auto" : have_color ? "yes" : "no")
+                              $trace
+                              -`,
+                              "OPENBLAS_NUM_THREADS" => 1,
+                              "JULIA_NUM_THREADS" => 1),
+                       stderr = internal_stderr, stdout = internal_stdout),
               "w", stdout)
     # write data over stdin to avoid the (unlikely) case of exceeding max command line size
     write(io.in, """
+        empty!(Base.EXT_DORMITORY) # If we have a custom sysimage with `EXT_DORMITORY` prepopulated
+        Base.precompiling_extension = $(loading_extension)
         Base.include_package_for_output($(pkg_str(pkg)), $(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)),
             $(repr(load_path)), $deps, $(repr(source_path(nothing))))
         """)
@@ -1555,16 +2495,24 @@ function compilecache_dir(pkg::PkgId)
     return joinpath(DEPOT_PATH[1], entrypath)
 end
 
-function compilecache_path(pkg::PkgId, prefs_hash::UInt64)::String
+function compilecache_path(pkg::PkgId, prefs_hash::UInt64; project::String=something(Base.active_project(), ""))::String
     entrypath, entryfile = cache_file_entry(pkg)
     cachepath = joinpath(DEPOT_PATH[1], entrypath)
     isdir(cachepath) || mkpath(cachepath)
     if pkg.uuid === nothing
         abspath(cachepath, entryfile) * ".ji"
     else
-        crc = _crc32c(something(Base.active_project(), ""))
+        crc = _crc32c(project)
         crc = _crc32c(unsafe_string(JLOptions().image_file), crc)
         crc = _crc32c(unsafe_string(JLOptions().julia_bin), crc)
+        crc = _crc32c(ccall(:jl_cache_flags, UInt8, ()), crc)
+
+        cpu_target = get(ENV, "JULIA_CPU_TARGET", nothing)
+        if cpu_target === nothing
+            cpu_target = unsafe_string(JLOptions().cpu_target)
+        end
+        crc = _crc32c(cpu_target, crc)
+
         crc = _crc32c(prefs_hash, crc)
         project_precompile_slug = slug(crc, 5)
         abspath(cachepath, string(entryfile, "_", project_precompile_slug, ".ji"))
@@ -1589,7 +2537,7 @@ end
 const MAX_NUM_PRECOMPILE_FILES = Ref(10)
 
 function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, internal_stdout::IO = stdout,
-                      ignore_loaded_modules::Bool = true)
+                      keep_loaded_modules::Bool = true)
 
     @nospecialize internal_stderr internal_stdout
     # decide where to put the resulting cache file
@@ -1597,10 +2545,10 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
 
     # build up the list of modules that we want the precompile process to preserve
     concrete_deps = copy(_concrete_dependencies)
-    if ignore_loaded_modules
-        for (key, mod) in loaded_modules
+    if keep_loaded_modules
+        for mod in loaded_modules_array()
             if !(mod === Main || mod === Core || mod === Base)
-                push!(concrete_deps, key => module_build_id(mod))
+                push!(concrete_deps, PkgId(mod) => module_build_id(mod))
             end
         end
     end
@@ -1611,61 +2559,198 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
     # create a temporary file in `cachepath` directory, write the cache in it,
     # write the checksum, _and then_ atomically move the file to `cachefile`.
     mkpath(cachepath)
+    cache_objects = JLOptions().use_pkgimages == 1
     tmppath, tmpio = mktemp(cachepath)
+
+    if cache_objects
+        tmppath_o, tmpio_o = mktemp(cachepath)
+        tmppath_so, tmpio_so = mktemp(cachepath)
+    else
+        tmppath_o = nothing
+    end
     local p
     try
         close(tmpio)
-        p = create_expr_cache(pkg, path, tmppath, concrete_deps, internal_stderr, internal_stdout)
+        if cache_objects
+            close(tmpio_o)
+            close(tmpio_so)
+        end
+        p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, internal_stderr, internal_stdout)
+
         if success(p)
-            # append checksum to the end of the .ji file:
-            open(tmppath, "a+") do f
-                write(f, _crc32c(seekstart(f)))
+            if cache_objects
+                # Run linker over tmppath_o
+                Linking.link_image(tmppath_o, tmppath_so)
             end
-            # inherit permission from the source file (and make them writable)
-            chmod(tmppath, filemode(path) & 0o777 | 0o200)
 
             # Read preferences hash back from .ji file (we can't precompute because
             # we don't actually know what the list of compile-time preferences are without compiling)
             prefs_hash = preferences_hash(tmppath)
             cachefile = compilecache_path(pkg, prefs_hash)
+            ocachefile = cache_objects ? ocachefile_from_cachefile(cachefile) : nothing
+
+            # append checksum for so to the end of the .ji file:
+            crc_so = UInt32(0)
+            if cache_objects
+                crc_so = open(_crc32c, tmppath_so, "r")
+            end
+
+            # append extra crc to the end of the .ji file:
+            open(tmppath, "r+") do f
+                if iszero(isvalid_cache_header(f))
+                    error("Invalid header for $pkg in new cache file $(repr(tmppath)).")
+                end
+                seekend(f)
+                write(f, crc_so)
+                seekstart(f)
+                write(f, _crc32c(f))
+            end
+
+            # inherit permission from the source file (and make them writable)
+            chmod(tmppath, filemode(path) & 0o777 | 0o200)
 
             # prune the directory with cache files
             if pkg.uuid !== nothing
                 entrypath, entryfile = cache_file_entry(pkg)
-                cachefiles = filter!(x -> startswith(x, entryfile * "_"), readdir(cachepath))
+                cachefiles = filter!(x -> startswith(x, entryfile * "_") && endswith(x, ".ji"), readdir(cachepath))
                 if length(cachefiles) >= MAX_NUM_PRECOMPILE_FILES[]
                     idx = findmin(mtime.(joinpath.(cachepath, cachefiles)))[2]
-                    rm(joinpath(cachepath, cachefiles[idx]); force=true)
+                    evicted_cachefile = joinpath(cachepath, cachefiles[idx])
+                    @debug "Evicting file from cache" evicted_cachefile
+                    rm(evicted_cachefile; force=true)
+                    try
+                        rm(ocachefile_from_cachefile(evicted_cachefile); force=true)
+                        @static if Sys.isapple()
+                            rm(ocachefile_from_cachefile(evicted_cachefile) * ".dSYM"; force=true, recursive=true)
+                        end
+                    catch e
+                        e isa IOError || rethrow()
+                    end
                 end
             end
 
-            # this is atomic according to POSIX:
+            if cache_objects
+                try
+                    rename(tmppath_so, ocachefile::String; force=true)
+                catch e
+                    e isa IOError || rethrow()
+                    isfile(ocachefile::String) || rethrow()
+                    # Windows prevents renaming a file that is in use so if there is a Julia session started
+                    # with a package image loaded, we cannot rename that file.
+                    # The code belows append a `_i` to the name of the cache file where `i` is the smallest number such that
+                    # that cache file does not exist.
+                    ocachename, ocacheext = splitext(ocachefile::String)
+                    old_cachefiles = Set(readdir(cachepath))
+                    num = 1
+                    while true
+                        ocachefile = ocachename * "_$num" * ocacheext
+                        in(basename(ocachefile), old_cachefiles) || break
+                        num += 1
+                    end
+                    # TODO: Risk for a race here if some other process grabs this name before us
+                    cachefile = cachefile_from_ocachefile(ocachefile)
+                    rename(tmppath_so, ocachefile::String; force=true)
+                end
+                @static if Sys.isapple()
+                    run(`$(Linking.dsymutil()) $ocachefile`, Base.DevNull(), Base.DevNull(), Base.DevNull())
+                end
+            end
+            # this is atomic according to POSIX (not Win32):
             rename(tmppath, cachefile; force=true)
-            return cachefile
+            return cachefile, ocachefile
         end
     finally
         rm(tmppath, force=true)
+        if cache_objects
+            rm(tmppath_o::String, force=true)
+            rm(tmppath_so, force=true)
+        end
     end
     if p.exitcode == 125
         return PrecompilableError()
     else
-        error("Failed to precompile $pkg to $tmppath.")
+        error("Failed to precompile $pkg to $(repr(tmppath)).")
     end
 end
 
-module_build_id(m::Module) = ccall(:jl_module_build_id, UInt64, (Any,), m)
+function module_build_id(m::Module)
+    hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m)
+    return (UInt128(hi) << 64) | lo
+end
+
+function isvalid_cache_header(f::IOStream)
+    pkgimage = Ref{UInt8}()
+    checksum = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid}, Ptr{UInt8}, Ptr{Int64}, Ptr{Int64}), f.ios, pkgimage, Ref{Int64}(), Ref{Int64}()) # returns checksum id or zero
 
-isvalid_cache_header(f::IOStream) = (0 != ccall(:jl_read_verify_header, Cint, (Ptr{Cvoid},), f.ios))
+    if !iszero(checksum) && pkgimage[] != 0
+        @debug "Cache header was for pkgimage"
+        return UInt64(0) # We somehow read the header for a pkgimage and not a ji
+    end
+    return checksum
+end
 isvalid_file_crc(f::IOStream) = (_crc32c(seekstart(f), filesize(f) - 4) == read(f, UInt32))
 
-struct CacheHeaderIncludes
-    id::PkgId
+function isvalid_pkgimage_crc(f::IOStream, ocachefile::String)
+    seekstart(f) # TODO necessary
+    seek(f, filesize(f) - 8)
+    expected_crc_so = read(f, UInt32)
+    crc_so = open(_crc32c, ocachefile, "r")
+    expected_crc_so == crc_so
+end
+
+mutable struct CacheHeaderIncludes
+    const id::PkgId
     filename::String
-    mtime::Float64
-    modpath::Vector{String}   # seemingly not needed in Base, but used by Revise
+    const fsize::UInt64
+    const hash::UInt32
+    const mtime::Float64
+    const modpath::Vector{String}   # seemingly not needed in Base, but used by Revise
+end
+
+function replace_depot_path(path::AbstractString)
+    for depot in DEPOT_PATH
+        !isdir(depot) && continue
+
+        # Strip extraneous pathseps through normalization.
+        if isdirpath(depot)
+            depot = dirname(depot)
+        end
+
+        if startswith(path, depot)
+            path = replace(path, depot => "@depot"; count=1)
+            break
+        end
+    end
+    return path
 end
 
-function parse_cache_header(f::IO)
+function restore_depot_path(path::AbstractString, depot::AbstractString)
+    replace(path, r"^@depot" => depot; count=1)
+end
+
+# Find depot in DEPOT_PATH for which all @depot tags from the `includes`
+# can be replaced so that they point to a file on disk each.
+function resolve_depot(includes::Union{AbstractVector,AbstractSet})
+    # `all` because it's possible to have a mixture of includes inside and outside of the depot
+    if all(includes) do inc
+            !startswith(inc, "@depot")
+        end
+        return :fully_outside_depot
+    end
+    for depot in DEPOT_PATH
+        # `any` because it's possible to have a mixture of includes inside and outside of the depot
+        if any(includes) do inc
+                isfile(restore_depot_path(inc, depot))
+            end
+            return depot
+        end
+    end
+    return :no_depot_found
+end
+
+
+function parse_cache_header(f::IO, cachefile::AbstractString)
+    flags = read(f, UInt8)
     modules = Vector{Pair{PkgId, UInt64}}()
     while true
         n = read(f, Int32)
@@ -1675,7 +2760,7 @@ function parse_cache_header(f::IO)
         build_id = read(f, UInt64) # build UUID (mostly just a timestamp)
         push!(modules, PkgId(uuid, sym) => build_id)
     end
-    totbytes = read(f, Int64) # total bytes for file dependencies + preferences
+    totbytes = Int64(read(f, UInt64)) # total bytes for file dependencies + preferences
     # read the list of requirements
     # and split the list into include and requires statements
     includes = CacheHeaderIncludes[]
@@ -1688,6 +2773,10 @@ function parse_cache_header(f::IO)
         end
         depname = String(read(f, n2))
         totbytes -= n2
+        fsize = read(f, UInt64)
+        totbytes -= 8
+        hash = read(f, UInt32)
+        totbytes -= 4
         mtime = read(f, Float64)
         totbytes -= 8
         n1 = read(f, Int32)
@@ -1710,7 +2799,7 @@ function parse_cache_header(f::IO)
         if depname[1] == '\0'
             push!(requires, modkey => binunpack(depname))
         else
-            push!(includes, CacheHeaderIncludes(modkey, depname, mtime, modpath))
+            push!(includes, CacheHeaderIncludes(modkey, depname, fsize, hash, mtime, modpath))
         end
     end
     prefs = String[]
@@ -1729,81 +2818,101 @@ function parse_cache_header(f::IO)
     totbytes -= 8
     @assert totbytes == 0 "header of cache file appears to be corrupt (totbytes == $(totbytes))"
     # read the list of modules that are required to be present during loading
-    required_modules = Vector{Pair{PkgId, UInt64}}()
+    required_modules = Vector{Pair{PkgId, UInt128}}()
     while true
         n = read(f, Int32)
         n == 0 && break
         sym = String(read(f, n)) # module name
         uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID
-        build_id = read(f, UInt64) # build id
+        build_id = UInt128(read(f, UInt64)) << 64
+        build_id |= read(f, UInt64)
         push!(required_modules, PkgId(uuid, sym) => build_id)
     end
-    return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash
+    l = read(f, Int32)
+    clone_targets = read(f, l)
+
+    # determine path for @depot replacement from srctext files only, e.g. ignore any include_dependency files
+    srcfiles = srctext_files(f, srctextpos, includes)
+    depot = resolve_depot(srcfiles)
+    keepidx = Int[]
+    for (i, chi) in enumerate(includes)
+        chi.filename ∈ srcfiles && push!(keepidx, i)
+    end
+    if depot === :no_depot_found
+        @debug("Unable to resolve @depot tag in cache file $cachefile", srcfiles)
+    elseif depot === :fully_outside_depot
+        @debug("All include dependencies in cache file $cachefile are outside of a depot.", srcfiles)
+    else
+        for inc in includes
+            inc.filename = restore_depot_path(inc.filename, depot)
+        end
+    end
+    includes_srcfiles_only = includes[keepidx]
+
+    return modules, (includes, includes_srcfiles_only, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags
 end
 
-function parse_cache_header(cachefile::String; srcfiles_only::Bool=false)
+function parse_cache_header(cachefile::String)
     io = open(cachefile, "r")
     try
-        !isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
-        ret = parse_cache_header(io)
-        srcfiles_only || return ret
-        modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = ret
-        srcfiles = srctext_files(io, srctextpos)
-        delidx = Int[]
-        for (i, chi) in enumerate(includes)
-            chi.filename ∈ srcfiles || push!(delidx, i)
-        end
-        deleteat!(includes, delidx)
-        return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash
+        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
+        ret = parse_cache_header(io, cachefile)
+        return ret
     finally
         close(io)
     end
 end
 
-
-
-preferences_hash(f::IO) = parse_cache_header(f)[end]
+preferences_hash(f::IO, cachefile::AbstractString) = parse_cache_header(f, cachefile)[6]
 function preferences_hash(cachefile::String)
     io = open(cachefile, "r")
     try
-        if !isvalid_cache_header(io)
+        if iszero(isvalid_cache_header(io))
             throw(ArgumentError("Invalid header in cache file $cachefile."))
         end
-        return preferences_hash(io)
+        return preferences_hash(io, cachefile)
     finally
         close(io)
     end
 end
 
-
-function cache_dependencies(f::IO)
-    defs, (includes, requires), modules, srctextpos, prefs, prefs_hash = parse_cache_header(f)
-    return modules, map(chi -> (chi.filename, chi.mtime), includes)  # return just filename and mtime
+function cache_dependencies(f::IO, cachefile::AbstractString)
+    _, (includes, _, _), modules, _... = parse_cache_header(f, cachefile)
+    return modules, map(chi -> chi.filename, includes)  # return just filename
 end
 
 function cache_dependencies(cachefile::String)
     io = open(cachefile, "r")
     try
-        !isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
-        return cache_dependencies(io)
+        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
+        return cache_dependencies(io, cachefile)
     finally
         close(io)
     end
 end
 
-function read_dependency_src(io::IO, filename::AbstractString)
-    modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = parse_cache_header(io)
+function read_dependency_src(io::IO, cachefile::AbstractString, filename::AbstractString)
+    _, (includes, _, _), _, srctextpos, _, _, _, _ = parse_cache_header(io, cachefile)
     srctextpos == 0 && error("no source-text stored in cache file")
     seek(io, srctextpos)
-    return _read_dependency_src(io, filename)
+    return _read_dependency_src(io, filename, includes)
 end
 
-function _read_dependency_src(io::IO, filename::AbstractString)
+function _read_dependency_src(io::IO, filename::AbstractString, includes::Vector{CacheHeaderIncludes}=CacheHeaderIncludes[])
     while !eof(io)
         filenamelen = read(io, Int32)
         filenamelen == 0 && break
-        fn = String(read(io, filenamelen))
+        depotfn = String(read(io, filenamelen))
         len = read(io, UInt64)
+        fn = if !startswith(depotfn, "@depot")
+            depotfn
+        else
+            basefn = restore_depot_path(depotfn, "")
+            idx = findfirst(includes) do inc
+                endswith(inc.filename, basefn)
+            end
+            isnothing(idx) ? depotfn : includes[idx].filename
+        end
         if fn == filename
             return String(read(io, len))
         end
@@ -1815,23 +2924,23 @@ end
 function read_dependency_src(cachefile::String, filename::AbstractString)
     io = open(cachefile, "r")
     try
-        !isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
-        return read_dependency_src(io, filename)
+        iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
+        return read_dependency_src(io, cachefile, filename)
     finally
         close(io)
     end
 end
 
-function srctext_files(f::IO, srctextpos::Int64)
+function srctext_files(f::IO, srctextpos::Int64, includes::Vector{CacheHeaderIncludes})
     files = Set{String}()
     srctextpos == 0 && return files
     seek(f, srctextpos)
     while !eof(f)
         filenamelen = read(f, Int32)
         filenamelen == 0 && break
-        fn = String(read(f, filenamelen))
+        filename = String(read(f, filenamelen))
         len = read(f, UInt64)
-        push!(files, fn)
+        push!(files, filename)
         seek(f, position(f) + len)
     end
     return files
@@ -1839,7 +2948,7 @@ end
 
 # Test to see if this UUID is mentioned in this `Project.toml`; either as
 # the top-level UUID (e.g. that of the project itself), as a dependency,
-# or as an extra for Preferences.
+# or as an extra/weakdep for Preferences.
 function get_uuid_name(project::Dict{String, Any}, uuid::UUID)
     uuid_p = get(project, "uuid", nothing)::Union{Nothing, String}
     name = get(project, "name", nothing)::Union{Nothing, String}
@@ -1854,7 +2963,7 @@ function get_uuid_name(project::Dict{String, Any}, uuid::UUID)
             end
         end
     end
-    for subkey in ("deps", "extras")
+    for subkey in ("deps", "extras", "weakdeps")
         subsection = get(project, subkey, nothing)::Union{Nothing, Dict{String, Any}}
         if subsection !== nothing
             for (k, v) in subsection
@@ -2002,17 +3111,226 @@ get_compiletime_preferences(uuid::UUID) = collect(get(Vector{String}, COMPILETIM
 get_compiletime_preferences(m::Module) = get_compiletime_preferences(PkgId(m).uuid)
 get_compiletime_preferences(::Nothing) = String[]
 
-# returns true if it "cachefile.ji" is stale relative to "modpath.jl"
+function check_clone_targets(clone_targets)
+    rejection_reason = ccall(:jl_check_pkgimage_clones, Any, (Ptr{Cchar},), clone_targets)
+    if rejection_reason !== nothing
+        return rejection_reason
+    end
+end
+
+struct CacheFlags
+    # OOICCDDP - see jl_cache_flags
+    use_pkgimages::Bool
+    debug_level::Int
+    check_bounds::Int
+    inline::Bool
+    opt_level::Int
+
+    function CacheFlags(f::UInt8)
+        use_pkgimages = Bool(f & 1)
+        debug_level = Int((f >> 1) & 3)
+        check_bounds = Int((f >> 3) & 3)
+        inline = Bool((f >> 5) & 1)
+        opt_level = Int((f >> 6) & 3) # define OPT_LEVEL in statiddata_utils
+        new(use_pkgimages, debug_level, check_bounds, inline, opt_level)
+    end
+end
+CacheFlags(f::Int) = CacheFlags(UInt8(f))
+CacheFlags() = CacheFlags(ccall(:jl_cache_flags, UInt8, ()))
+
+function show(io::IO, cf::CacheFlags)
+    print(io, "use_pkgimages = ", cf.use_pkgimages)
+    print(io, ", debug_level = ", cf.debug_level)
+    print(io, ", check_bounds = ", cf.check_bounds)
+    print(io, ", inline = ", cf.inline)
+    print(io, ", opt_level = ", cf.opt_level)
+end
+
+struct ImageTarget
+    name::String
+    flags::Int32
+    ext_features::String
+    features_en::Vector{UInt8}
+    features_dis::Vector{UInt8}
+end
+
+function parse_image_target(io::IO)
+    flags = read(io, Int32)
+    nfeature = read(io, Int32)
+    feature_en = read(io, 4*nfeature)
+    feature_dis = read(io, 4*nfeature)
+    name_len = read(io, Int32)
+    name = String(read(io, name_len))
+    ext_features_len = read(io, Int32)
+    ext_features = String(read(io, ext_features_len))
+    ImageTarget(name, flags, ext_features, feature_en, feature_dis)
+end
+
+function parse_image_targets(targets::Vector{UInt8})
+    io = IOBuffer(targets)
+    ntargets = read(io, Int32)
+    targets = Vector{ImageTarget}(undef, ntargets)
+    for i in 1:ntargets
+        targets[i] = parse_image_target(io)
+    end
+    return targets
+end
+
+function current_image_targets()
+    targets = @ccall jl_reflect_clone_targets()::Vector{UInt8}
+    return parse_image_targets(targets)
+end
+
+struct FeatureName
+    name::Cstring
+    bit::UInt32 # bit index into a `uint32_t` array;
+    llvmver::UInt32 # 0 if it is available on the oldest LLVM version we support
+end
+
+function feature_names()
+    fnames = Ref{Ptr{FeatureName}}()
+    nf = Ref{Csize_t}()
+    @ccall jl_reflect_feature_names(fnames::Ptr{Ptr{FeatureName}}, nf::Ptr{Csize_t})::Cvoid
+    if fnames[] == C_NULL
+        @assert nf[] == 0
+        return Vector{FeatureName}(undef, 0)
+    end
+    Base.unsafe_wrap(Array, fnames[], nf[], own=false)
+end
+
+function test_feature(features::Vector{UInt8}, feat::FeatureName)
+    bitidx = feat.bit
+    u8idx = div(bitidx, 8) + 1
+    bit = bitidx % 8
+    return (features[u8idx] & (1 << bit)) != 0
+end
+
+function show(io::IO, it::ImageTarget)
+    print(io, it.name)
+    if !isempty(it.ext_features)
+        print(io, ",", it.ext_features)
+    end
+    print(io, "; flags=", it.flags)
+    print(io, "; features_en=(")
+    first = true
+    for feat in feature_names()
+        if test_feature(it.features_en, feat)
+            name = Base.unsafe_string(feat.name)
+            if first
+                first = false
+                print(io, name)
+            else
+                print(io, ", ", name)
+            end
+        end
+    end
+    print(io, ")")
+    # Is feature_dis useful?
+end
+
+# Set by FileWatching.__init__()
+global mkpidlock_hook
+global trymkpidlock_hook
+global parse_pidfile_hook
+
+# The preferences hash is only known after precompilation so just assume no preferences.
+# Also ignore the active project, which means that if all other conditions are equal,
+# the same package cannot be precompiled from different projects and/or different preferences at the same time.
+compilecache_pidfile_path(pkg::PkgId) = compilecache_path(pkg, UInt64(0); project="") * ".pidfile"
+
+const compilecache_pidlock_stale_age = 10
+
+# Allows processes to wait if another process is precompiling a given source already.
+# The lock file mtime will be updated when held at most every `stale_age/2` seconds, with expected
+# variance of 10 seconds or more being infrequent but not unusual.
+# After `stale_age` seconds beyond the mtime of the lock file, the lock file is deleted and
+# precompilation will proceed if the locking process no longer exists or after `stale_age * 5`
+# seconds if the process does still exist.
+# If the lock is held by another host, it will conservatively wait `stale_age * 5`
+# seconds since processes cannot be checked remotely
+function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=compilecache_pidlock_stale_age)
+    if @isdefined(mkpidlock_hook) && @isdefined(trymkpidlock_hook) && @isdefined(parse_pidfile_hook)
+        pidfile = compilecache_pidfile_path(pkg)
+        cachefile = invokelatest(trymkpidlock_hook, f, pidfile; stale_age)
+        if cachefile === false
+            pid, hostname, age = invokelatest(parse_pidfile_hook, pidfile)
+            verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
+            if isempty(hostname) || hostname == gethostname()
+                @logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $pkg. Pidfile: $pidfile"
+            else
+                @logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $pkg. Pidfile: $pidfile"
+            end
+            # wait until the lock is available, but don't actually acquire it
+            # returning nothing indicates a process waited for another
+            return invokelatest(mkpidlock_hook, Returns(nothing), pidfile; stale_age)
+        end
+        return cachefile
+    else
+        # for packages loaded before FileWatching.__init__()
+        f()
+    end
+end
+# returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey
 # otherwise returns the list of dependencies to also check
 @constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false)
+    return stale_cachefile(PkgId(""), UInt128(0), modpath, cachefile; ignore_loaded)
+end
+@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt128, modpath::String, cachefile::String; ignore_loaded::Bool = false)
     io = open(cachefile, "r")
     try
-        if !isvalid_cache_header(io)
+        checksum = isvalid_cache_header(io)
+        if iszero(checksum)
             @debug "Rejecting cache file $cachefile due to it containing an invalid cache header"
             return true # invalid cache file
         end
-        modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = parse_cache_header(io)
-        id = isempty(modules) ? nothing : first(modules).first
+        modules, (includes, _, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io, cachefile)
+        if isempty(modules)
+            return true # ignore empty file
+        end
+        if ccall(:jl_match_cache_flags, UInt8, (UInt8,), flags) == 0
+            @debug """
+            Rejecting cache file $cachefile for $modkey since the flags are mismatched
+              current session: $(CacheFlags())
+              cache file:      $(CacheFlags(flags))
+            """
+            return true
+        end
+        pkgimage = !isempty(clone_targets)
+        if pkgimage
+            ocachefile = ocachefile_from_cachefile(cachefile)
+            if JLOptions().use_pkgimages == 0
+                # presence of clone_targets means native code cache
+                @debug "Rejecting cache file $cachefile for $modkey since it would require usage of pkgimage"
+                return true
+            end
+            rejection_reasons = check_clone_targets(clone_targets)
+            if !isnothing(rejection_reasons)
+                @debug("Rejecting cache file $cachefile for $modkey:",
+                    Reasons=rejection_reasons,
+                    var"Image Targets"=parse_image_targets(clone_targets),
+                    var"Current Targets"=current_image_targets())
+                return true
+            end
+            if !isfile(ocachefile)
+                @debug "Rejecting cache file $cachefile for $modkey since pkgimage $ocachefile was not found"
+                return true
+            end
+        else
+            ocachefile = nothing
+        end
+        id = first(modules)
+        if id.first != modkey && modkey != PkgId("")
+            @debug "Rejecting cache file $cachefile for $modkey since it is for $id instead"
+            return true
+        end
+        if build_id != UInt128(0)
+            id_build = (UInt128(checksum) << 64) | id.second
+            if id_build != build_id
+                @debug "Ignoring cache file $cachefile for $modkey ($((UUID(id_build)))) since it does not provide desired build_id ($((UUID(build_id))))"
+                return true
+            end
+        end
+        id = id.first
         modules = Dict{PkgId, UInt64}(modules)
 
         # Check if transitive dependencies can be fulfilled
@@ -2039,7 +3357,6 @@ get_compiletime_preferences(::Nothing) = String[]
                     @debug "Rejecting cache file $cachefile because dependency $req_key not found."
                     return true # Won't be able to fulfill dependency
                 end
-                set_pkgorigin_version_path(req_key, path)
                 depmods[i] = (path, req_key, req_build_id)
             end
         end
@@ -2047,44 +3364,73 @@ get_compiletime_preferences(::Nothing) = String[]
         # check if this file is going to provide one of our concrete dependencies
         # or if it provides a version that conflicts with our concrete dependencies
         # or neither
-        skip_timecheck = false
+        skip_check = false
         for (req_key, req_build_id) in _concrete_dependencies
             build_id = get(modules, req_key, UInt64(0))
             if build_id !== UInt64(0)
+                build_id |= UInt128(checksum) << 64
                 if build_id === req_build_id
-                    skip_timecheck = true
+                    skip_check = true
                     break
                 end
-                @debug "Rejecting cache file $cachefile because it provides the wrong uuid (got $build_id) for $req_key (want $req_build_id)"
+                @debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))"
                 return true # cachefile doesn't provide the required version of the dependency
             end
         end
 
-        # now check if this file is fresh relative to its source files
-        if !skip_timecheck
-            if !samefile(includes[1].filename, modpath)
+        # now check if this file's content hash has changed relative to its source files
+        if !skip_check
+            if !samefile(includes[1].filename, modpath) && !samefile(fixup_stdlib_path(includes[1].filename), modpath)
                 @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename) not file $modpath"
                 return true # cache file was compiled from a different path
             end
             for (modkey, req_modkey) in requires
                 # verify that `require(modkey, name(req_modkey))` ==> `req_modkey`
-                if identify_package(modkey, req_modkey.name) != req_modkey
-                    @debug "Rejecting cache file $cachefile because uuid mapping for $modkey => $req_modkey has changed"
+                pkg = identify_package(modkey, req_modkey.name)
+                if pkg != req_modkey
+                    @debug "Rejecting cache file $cachefile because uuid mapping for $modkey => $req_modkey has changed, expected $modkey => $pkg"
                     return true
                 end
             end
             for chi in includes
-                f, ftime_req = chi.filename, chi.mtime
-                ftime = mtime(f)
-                is_stale = ( ftime != ftime_req ) &&
-                           ( ftime != floor(ftime_req) ) &&           # Issue #13606, PR #13613: compensate for Docker images rounding mtimes
-                           ( ftime != trunc(ftime_req, digits=6) ) && # Issue #20837, PR #20840: compensate for GlusterFS truncating mtimes to microseconds
-                           ( ftime != 1.0 )  &&                       # PR #43090: provide compatibility with Nix mtime.
-                           !( 0 < (ftime_req - ftime) < 1e-6 )        # PR #45552: Compensate for Windows tar giving mtimes that may be incorrect by up to one microsecond
-                if is_stale
-                    @debug "Rejecting stale cache file $cachefile (mtime $ftime_req) because file $f (mtime $ftime) has changed"
+                f, fsize_req, hash_req, ftime_req = chi.filename, chi.fsize, chi.hash, chi.mtime
+                if startswith(f, "@depot/")
+                    @debug("Rejecting stale cache file $cachefile because its depot could not be resolved")
+                    return true
+                end
+                if !ispath(f)
+                    _f = fixup_stdlib_path(f)
+                    if isfile(_f) && startswith(_f, Sys.STDLIB)
+                        continue
+                    end
+                    @debug "Rejecting stale cache file $cachefile because file $f does not exist"
                     return true
                 end
+                if ftime_req >= 0.0
+                    # this is an include_dependency for which we only recorded the mtime
+                    ftime = mtime(f)
+                    is_stale = ( ftime != ftime_req ) &&
+                               ( ftime != floor(ftime_req) ) &&           # Issue #13606, PR #13613: compensate for Docker images rounding mtimes
+                               ( ftime != ceil(ftime_req) ) &&            # PR: #47433 Compensate for CirceCI's truncating of timestamps in its caching
+                               ( ftime != trunc(ftime_req, digits=6) ) && # Issue #20837, PR #20840: compensate for GlusterFS truncating mtimes to microseconds
+                               ( ftime != 1.0 )  &&                       # PR #43090: provide compatibility with Nix mtime.
+                               !( 0 < (ftime_req - ftime) < 1e-6 )        # PR #45552: Compensate for Windows tar giving mtimes that may be incorrect by up to one microsecond
+                    if is_stale
+                        @debug "Rejecting stale cache file $cachefile because mtime of include_dependency $f has changed (mtime $ftime, before $ftime_req)"
+                        return true
+                    end
+                else
+                    fsize = filesize(f)
+                    if fsize != fsize_req
+                        @debug "Rejecting stale cache file $cachefile because file size of $f has changed (file size $fsize, before $fsize_req)"
+                        return true
+                    end
+                    hash = open(_crc32c, f, "r")
+                    if hash != hash_req
+                        @debug "Rejecting stale cache file $cachefile because hash of $f has changed (hash $hash, before $hash_req)"
+                        return true
+                    end
+                end
             end
         end
 
@@ -2093,22 +3439,27 @@ get_compiletime_preferences(::Nothing) = String[]
             return true
         end
 
-        if isa(id, PkgId)
-            curr_prefs_hash = get_preferences_hash(id.uuid, prefs)
-            if prefs_hash != curr_prefs_hash
-                @debug "Rejecting cache file $cachefile because preferences hash does not match 0x$(string(prefs_hash, base=16)) != 0x$(string(curr_prefs_hash, base=16))"
+        if pkgimage
+            if !isvalid_pkgimage_crc(io, ocachefile::String)
+                @debug "Rejecting cache file $cachefile because $ocachefile has an invalid checksum"
                 return true
             end
         end
 
-        return depmods # fresh cachefile
+        curr_prefs_hash = get_preferences_hash(id.uuid, prefs)
+        if prefs_hash != curr_prefs_hash
+            @debug "Rejecting cache file $cachefile because preferences hash does not match 0x$(string(prefs_hash, base=16)) != 0x$(string(curr_prefs_hash, base=16))"
+            return true
+        end
+
+        return depmods, ocachefile # fresh cachefile
     finally
         close(io)
     end
 end
 
 """
-    @__FILE__ -> AbstractString
+    @__FILE__ -> String
 
 Expand to a string with the path to the file containing the
 macrocall, or an empty string if evaluated by `julia -e <expr>`.
@@ -2121,7 +3472,7 @@ macro __FILE__()
 end
 
 """
-    @__DIR__ -> AbstractString
+    @__DIR__ -> String
 
 Expand to a string with the absolute path to the directory of the file
 containing the macrocall.
@@ -2134,12 +3485,12 @@ macro __DIR__()
 end
 
 """
-    precompile(f, args::Tuple{Vararg{Any}})
+    precompile(f, argtypes::Tuple{Vararg{Any}})
 
-Compile the given function `f` for the argument tuple (of types) `args`, but do not execute it.
+Compile the given function `f` for the argument tuple (of types) `argtypes`, but do not execute it.
 """
-function precompile(@nospecialize(f), @nospecialize(args::Tuple))
-    precompile(Tuple{Core.Typeof(f), args...})
+function precompile(@nospecialize(f), @nospecialize(argtypes::Tuple))
+    precompile(Tuple{Core.Typeof(f), argtypes...})
 end
 
 const ENABLE_PRECOMPILE_WARNINGS = Ref(false)
@@ -2151,6 +3502,28 @@ function precompile(@nospecialize(argt::Type))
     return ret
 end
 
+# Variants that work for `invoke`d calls for which the signature may not be sufficient
+precompile(mi::Core.MethodInstance, world::UInt=get_world_counter()) =
+    (ccall(:jl_compile_method_instance, Cvoid, (Any, Any, UInt), mi, C_NULL, world); return true)
+
+"""
+    precompile(f, argtypes::Tuple{Vararg{Any}}, m::Method)
+
+Precompile a specific method for the given argument types. This may be used to precompile
+a different method than the one that would ordinarily be chosen by dispatch, thus
+mimicking `invoke`.
+"""
+function precompile(@nospecialize(f), @nospecialize(argtypes::Tuple), m::Method)
+    precompile(Tuple{Core.Typeof(f), argtypes...}, m)
+end
+
+function precompile(@nospecialize(argt::Type), m::Method)
+    atype, sparams = ccall(:jl_type_intersection_with_env, Any, (Any, Any), argt, m.sig)::SimpleVector
+    mi = Core.Compiler.specialize_method(m, atype, sparams)
+    return precompile(mi)
+end
+
 precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing))
 precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), String))
-precompile(create_expr_cache, (PkgId, String, String, typeof(_concrete_dependencies), IO, IO))
+precompile(create_expr_cache, (PkgId, String, String, String, typeof(_concrete_dependencies), IO, IO))
+precompile(create_expr_cache, (PkgId, String, String, Nothing, typeof(_concrete_dependencies), IO, IO))
diff --git a/base/lock.jl b/base/lock.jl
index 8a15d3f95b239..b473b4033e2de 100644
--- a/base/lock.jl
+++ b/base/lock.jl
@@ -7,11 +7,11 @@ const ThreadSynchronizer = GenericCondition{Threads.SpinLock}
     ReentrantLock()
 
 Creates a re-entrant lock for synchronizing [`Task`](@ref)s. The same task can
-acquire the lock as many times as required. Each [`lock`](@ref) must be matched
-with an [`unlock`](@ref).
+acquire the lock as many times as required (this is what the "Reentrant" part
+of the name means). Each [`lock`](@ref) must be matched with an [`unlock`](@ref).
 
-Calling 'lock' will also inhibit running of finalizers on that thread until the
-corresponding 'unlock'. Use of the standard lock pattern illustrated below
+Calling `lock` will also inhibit running of finalizers on that thread until the
+corresponding `unlock`. Use of the standard lock pattern illustrated below
 should naturally be supported, but beware of inverting the try/lock order or
 missing the try block entirely (e.g. attempting to return with the lock still
 held):
@@ -435,10 +435,10 @@ This provides an acquire & release memory ordering on notify/wait.
     The `autoreset` functionality and memory ordering guarantee requires at least Julia 1.8.
 """
 mutable struct Event
-    notify::Threads.Condition
-    autoreset::Bool
+    const notify::ThreadSynchronizer
+    const autoreset::Bool
     @atomic set::Bool
-    Event(autoreset::Bool=false) = new(Threads.Condition(), autoreset, false)
+    Event(autoreset::Bool=false) = new(ThreadSynchronizer(), autoreset, false)
 end
 
 function wait(e::Event)
@@ -481,8 +481,8 @@ end
 """
     reset(::Event)
 
-Reset an Event back into an un-set state. Then any future calls to `wait` will
-block until `notify` is called again.
+Reset an [`Event`](@ref) back into an un-set state. Then any future calls to `wait` will
+block until [`notify`](@ref) is called again.
 """
 function reset(e::Event)
     @atomic e.set = false # full barrier
diff --git a/base/locks-mt.jl b/base/locks-mt.jl
index bfa3ac1b8352e..5d355b9ed200c 100644
--- a/base/locks-mt.jl
+++ b/base/locks-mt.jl
@@ -43,7 +43,7 @@ function lock(l::SpinLock)
         if @inline trylock(l)
             return
         end
-        ccall(:jl_cpu_pause, Cvoid, ())
+        ccall(:jl_cpu_suspend, Cvoid, ())
         # Temporary solution before we have gc transition support in codegen.
         ccall(:jl_gc_safepoint, Cvoid, ())
     end
diff --git a/base/logging.jl b/base/logging.jl
index d2b6fa637c1bc..f6a34aee2f516 100644
--- a/base/logging.jl
+++ b/base/logging.jl
@@ -42,7 +42,7 @@ function handle_message end
 """
     shouldlog(logger, level, _module, group, id)
 
-Return true when `logger` accepts a message at `level`, generated for
+Return `true` when `logger` accepts a message at `level`, generated for
 `_module`, `group` and with unique log identifier `id`.
 """
 function shouldlog end
@@ -58,7 +58,7 @@ function min_enabled_level end
 """
     catch_exceptions(logger)
 
-Return true if the logger should catch exceptions which happen during log
+Return `true` if the logger should catch exceptions which happen during log
 record construction.  By default, messages are caught
 
 By default all exceptions are caught to prevent log message generation from
@@ -159,6 +159,9 @@ Alias for [`LogLevel(2000)`](@ref LogLevel).
 const Error         = LogLevel(    2000)
 const AboveMaxLevel = LogLevel( 1000001)
 
+# Global log limiting mechanism for super fast but inflexible global log limiting.
+const _min_enabled_level = Ref{LogLevel}(Debug)
+
 function show(io::IO, level::LogLevel)
     if     level == BelowMinLevel  print(io, "BelowMinLevel")
     elseif level == Debug          print(io, "Debug")
@@ -319,6 +322,15 @@ function issimplekw(@nospecialize val)
     return false
 end
 
+# helper function to get the current logger, if enabled for the specified message type
+@noinline Base.@constprop :none function current_logger_for_env(std_level::LogLevel, group, _module)
+    logstate = @inline current_logstate()
+    if std_level >= logstate.min_enabled_level || env_override_minlevel(group, _module)
+        return logstate.logger
+    end
+    return nothing
+end
+
 # Generate code for logging macros
 function logmsg_code(_module, file, line, level, message, exs...)
     @nospecialize
@@ -335,12 +347,12 @@ function logmsg_code(_module, file, line, level, message, exs...)
         checkerrors = nothing
         for kwarg in reverse(log_data.kwargs)
             if isa(kwarg.args[2].args[1], Symbol)
-                checkerrors = Expr(:if, Expr(:isdefined, kwarg.args[2]), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(kwarg.args[2].args[1])))
+                checkerrors = Expr(:if, Expr(:isdefined, kwarg.args[2]), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(kwarg.args[2].args[1]), QuoteNode(:local)))
             end
         end
         if isa(message, Symbol)
             message = esc(message)
-            checkerrors = Expr(:if, Expr(:isdefined, message), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(message.args[1])))
+            checkerrors = Expr(:if, Expr(:isdefined, message), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(message.args[1]), QuoteNode(:local)))
         end
         logrecord = quote
             let err = $checkerrors
@@ -349,7 +361,7 @@ function logmsg_code(_module, file, line, level, message, exs...)
                     kwargs = (;$(log_data.kwargs...))
                     true
                 else
-                    logging_error(logger, level, _module, group, id, file, line, err, false)
+                    @invokelatest logging_error(logger, level, _module, group, id, file, line, err, false)
                     false
                 end
             end
@@ -361,7 +373,7 @@ function logmsg_code(_module, file, line, level, message, exs...)
                 kwargs = (;$(log_data.kwargs...))
                 true
             catch err
-                logging_error(logger, level, _module, group, id, file, line, err, true)
+                @invokelatest logging_error(logger, level, _module, group, id, file, line, err, true)
                 false
             end
         end
@@ -369,23 +381,24 @@ function logmsg_code(_module, file, line, level, message, exs...)
     return quote
         let
             level = $level
-            std_level = convert(LogLevel, level)
-            if std_level >= _min_enabled_level[]
+            # simplify std_level code emitted, if we know it is one of our global constants
+            std_level = $(level isa Symbol ? :level : :(level isa $LogLevel ? level : convert($LogLevel, level)::$LogLevel))
+            if std_level >= $(_min_enabled_level)[]
                 group = $(log_data._group)
                 _module = $(log_data._module)
-                logger = current_logger_for_env(std_level, group, _module)
+                logger = $(current_logger_for_env)(std_level, group, _module)
                 if !(logger === nothing)
                     id = $(log_data._id)
                     # Second chance at an early bail-out (before computing the message),
                     # based on arbitrary logger-specific logic.
-                    if _invoked_shouldlog(logger, level, _module, group, id)
+                    if invokelatest($shouldlog, logger, level, _module, group, id)
                         file = $(log_data._file)
                         if file isa String
                             file = Base.fixup_stdlib_path(file)
                         end
                         line = $(log_data._line)
                         local msg, kwargs
-                        $(logrecord) && handle_message(
+                        $(logrecord) && invokelatest($handle_message,
                             logger, level, msg, _module, group, id, file, line;
                             kwargs...)
                     end
@@ -445,7 +458,7 @@ function default_group_code(file)
         QuoteNode(default_group(file))  # precompute if we can
     else
         ref = Ref{Symbol}()  # memoized run-time execution
-        :(isassigned($ref) ? $ref[] : $ref[] = default_group(something($file, "")))
+        :(isassigned($ref) ? $ref[] : $ref[] = default_group(something($file, ""))::Symbol)
     end
 end
 
@@ -480,9 +493,6 @@ function logmsg_shim(level, message, _module, group, id, file, line, kwargs)
     nothing
 end
 
-# Global log limiting mechanism for super fast but inflexible global log limiting.
-const _min_enabled_level = Ref{LogLevel}(Debug)
-
 # LogState - a cache of data extracted from the logger, plus the logger itself.
 struct LogState
     min_enabled_level::LogLevel
@@ -491,31 +501,14 @@ end
 
 LogState(logger) = LogState(LogLevel(_invoked_min_enabled_level(logger)), logger)
 
-function current_logstate()
-    logstate = current_task().logstate
-    return (logstate !== nothing ? logstate : _global_logstate)::LogState
-end
+const CURRENT_LOGSTATE = ScopedValue{LogState}()
 
-# helper function to get the current logger, if enabled for the specified message type
-@noinline Base.@constprop :none function current_logger_for_env(std_level::LogLevel, group, _module)
-    logstate = current_logstate()
-    if std_level >= logstate.min_enabled_level || env_override_minlevel(group, _module)
-        return logstate.logger
-    end
-    return nothing
+function current_logstate()
+    maybe = @inline Base.ScopedValues.get(CURRENT_LOGSTATE)
+    return something(maybe, _global_logstate)::LogState
 end
 
-function with_logstate(f::Function, logstate)
-    @nospecialize
-    t = current_task()
-    old = t.logstate
-    try
-        t.logstate = logstate
-        f()
-    finally
-        t.logstate = old
-    end
-end
+with_logstate(f::Function, logstate) = @with(CURRENT_LOGSTATE => logstate, f())
 
 #-------------------------------------------------------------------------------
 # Control of the current logger and early log filtering
@@ -586,6 +579,8 @@ end
 end
 
 
+global _global_logstate::LogState
+
 """
     global_logger()
 
@@ -671,8 +666,8 @@ function handle_message(logger::SimpleLogger, level::LogLevel, message, _module,
         remaining > 0 || return
     end
     buf = IOBuffer()
-    stream = logger.stream
-    if !isopen(stream)
+    stream::IO = logger.stream
+    if !(isopen(stream)::Bool)
         stream = stderr
     end
     iob = IOContext(buf, stream)
diff --git a/base/math.jl b/base/math.jl
index 9550a0a54b496..ca2f064e49ee7 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -5,12 +5,12 @@ module Math
 export sin, cos, sincos, tan, sinh, cosh, tanh, asin, acos, atan,
        asinh, acosh, atanh, sec, csc, cot, asec, acsc, acot,
        sech, csch, coth, asech, acsch, acoth,
-       sinpi, cospi, sincospi, sinc, cosc,
+       sinpi, cospi, sincospi, tanpi, sinc, cosc,
        cosd, cotd, cscd, secd, sind, tand, sincosd,
        acosd, acotd, acscd, asecd, asind, atand,
        rad2deg, deg2rad,
        log, log2, log10, log1p, exponent, exp, exp2, exp10, expm1,
-       cbrt, sqrt, significand,
+       cbrt, sqrt, fourthroot, significand,
        hypot, max, min, minmax, ldexp, frexp,
        clamp, clamp!, modf, ^, mod2pi, rem2pi,
        @evalpoly, evalpoly
@@ -31,7 +31,11 @@ using .Base: IEEEFloat
 
 @noinline function throw_complex_domainerror(f::Symbol, x)
     throw(DomainError(x,
-        LazyString(f," will only return a complex result if called with a complex argument. Try ", f,"(Complex(x)).")))
+        LazyString(f," was called with a negative real argument but will only return a complex result if called with a complex argument. Try ", f,"(Complex(x)).")))
+end
+@noinline function throw_complex_domainerror_neg1(f::Symbol, x)
+    throw(DomainError(x,
+        LazyString(f," was called with a real argument < -1 but will only return a complex result if called with a complex argument. Try ", f,"(Complex(x)).")))
 end
 @noinline function throw_exp_domainerror(x)
     throw(DomainError(x, LazyString(
@@ -42,7 +46,12 @@ end
 
 # non-type specific math functions
 
-@inline function two_mul(x::Float64, y::Float64)
+function two_mul(x::T, y::T) where {T<:Number}
+    xy = x*y
+    xy, fma(x, y, -xy)
+end
+
+@assume_effects :consistent @inline function two_mul(x::Float64, y::Float64)
     if Core.Intrinsics.have_fma(Float64)
         xy = x*y
         return xy, fma(x, y, -xy)
@@ -50,7 +59,7 @@ end
     return Base.twomul(x,y)
 end
 
-@inline function two_mul(x::T, y::T) where T<: Union{Float16, Float32}
+@assume_effects :consistent @inline function two_mul(x::T, y::T) where T<: Union{Float16, Float32}
     if Core.Intrinsics.have_fma(T)
         xy = x*y
         return xy, fma(x, y, -xy)
@@ -190,6 +199,7 @@ end
 evalpoly(x, p::AbstractVector) = _evalpoly(x, p)
 
 function _evalpoly(x, p)
+    Base.require_one_based_indexing(p)
     N = length(p)
     ex = p[end]
     for i in N-1:-1:1
@@ -229,6 +239,7 @@ evalpoly(z::Complex, p::Tuple{<:Any}) = p[1]
 evalpoly(z::Complex, p::AbstractVector) = _evalpoly(z, p)
 
 function _evalpoly(z::Complex, p)
+    Base.require_one_based_indexing(p)
     length(p) == 1 && return p[1]
     N = length(p)
     a = p[end]
@@ -293,14 +304,19 @@ end
 
 # polynomial evaluation using compensated summation.
 # much more accurate, especially when lo can be combined with other rounding errors
-@inline function exthorner(x, p::Tuple)
-    hi, lo = p[end], zero(x)
-    for i in length(p)-1:-1:1
-        pi = p[i]
-        prod, err = two_mul(hi,x)
-        hi = pi+prod
-        lo = fma(lo, x, prod - (hi - pi) + err)
-    end
+@inline function exthorner(x::T, p::Tuple{T,T,T}) where T<:Union{Float32,Float64}
+    hi, lo = p[lastindex(p)], zero(x)
+    hi, lo = _exthorner(2, x, p, hi, lo)
+    hi, lo = _exthorner(1, x, p, hi, lo)
+    return hi, lo
+end
+
+@inline function _exthorner(i::Int, x::T, p::Tuple{T,T,T}, hi::T, lo::T) where T<:Union{Float32,Float64}
+    i == 2 || i == 1 || error("unexpected index")
+    pi = p[i]
+    prod, err = two_mul(hi,x)
+    hi = pi+prod
+    lo = fma(lo, x, prod - (hi - pi) + err)
     return hi, lo
 end
 
@@ -309,6 +325,8 @@ end
 
 Convert `x` from radians to degrees.
 
+See also [`deg2rad`](@ref).
+
 # Examples
 ```jldoctest
 julia> rad2deg(pi)
@@ -322,7 +340,7 @@ rad2deg(z::AbstractFloat) = z * (180 / oftype(z, pi))
 
 Convert `x` from degrees to radians.
 
-See also: [`rad2deg`](@ref), [`sind`](@ref).
+See also [`rad2deg`](@ref), [`sind`](@ref), [`pi`](@ref).
 
 # Examples
 ```jldoctest
@@ -354,14 +372,14 @@ julia> log(4,2)
 
 julia> log(-2, 3)
 ERROR: DomainError with -2.0:
-log will only return a complex result if called with a complex argument. Try log(Complex(x)).
+log was called with a negative real argument but will only return a complex result if called with a complex argument. Try log(Complex(x)).
 Stacktrace:
  [1] throw_complex_domainerror(::Symbol, ::Float64) at ./math.jl:31
 [...]
 
 julia> log(2, -3)
 ERROR: DomainError with -3.0:
-log will only return a complex result if called with a complex argument. Try log(Complex(x)).
+log was called with a negative real argument but will only return a complex result if called with a complex argument. Try log(Complex(x)).
 Stacktrace:
  [1] throw_complex_domainerror(::Symbol, ::Float64) at ./math.jl:31
 [...]
@@ -404,6 +422,28 @@ cosh(x::Number)
     tanh(x)
 
 Compute hyperbolic tangent of `x`.
+
+See also [`tan`](@ref), [`atanh`](@ref).
+
+# Examples
+
+```jldoctest
+julia> tanh.(-3:3f0)  # Here 3f0 isa Float32
+7-element Vector{Float32}:
+ -0.9950548
+ -0.9640276
+ -0.7615942
+  0.0
+  0.7615942
+  0.9640276
+  0.9950548
+
+julia> tan.(im .* (1:3))
+3-element Vector{ComplexF64}:
+ 0.0 + 0.7615941559557649im
+ 0.0 + 0.9640275800758169im
+ 0.0 + 0.9950547536867306im
+```
 """
 tanh(x::Number)
 
@@ -420,6 +460,21 @@ For two arguments, this is the angle in radians between the positive *x*-axis an
 point (*x*, *y*), returning a value in the interval ``[-\\pi, \\pi]``. This corresponds to a
 standard [`atan2`](https://en.wikipedia.org/wiki/Atan2) function. Note that by convention
 `atan(0.0,x)` is defined as ``\\pi`` and `atan(-0.0,x)` is defined as ``-\\pi`` when `x < 0`.
+
+See also [`atand`](@ref) for degrees.
+
+# Examples
+
+```jldoctest
+julia> rad2deg(atan(-1/√3))
+-30.000000000000004
+
+julia> rad2deg(atan(-1, √3))
+-30.000000000000004
+
+julia> rad2deg(atan(1, -√3))
+150.0
+```
 """
 atan(x::Number)
 
@@ -442,7 +497,29 @@ asinh(x::Number)
 
 Compute sine of `x`, where `x` is in radians.
 
-See also [`sind`](@ref), [`sinpi`](@ref), [`sincos`](@ref), [`cis`](@ref).
+See also [`sind`](@ref), [`sinpi`](@ref), [`sincos`](@ref), [`cis`](@ref), [`asin`](@ref).
+
+# Examples
+```jldoctest
+julia> round.(sin.(range(0, 2pi, length=9)'), digits=3)
+1×9 Matrix{Float64}:
+ 0.0  0.707  1.0  0.707  0.0  -0.707  -1.0  -0.707  -0.0
+
+julia> sind(45)
+0.7071067811865476
+
+julia> sinpi(1/4)
+0.7071067811865475
+
+julia> round.(sincos(pi/6), digits=3)
+(0.5, 0.866)
+
+julia> round(cis(pi/6), digits=3)
+0.866 + 0.5im
+
+julia> round(exp(im*pi/6), digits=3)
+0.866 + 0.5im
+```
 """
 sin(x::Number)
 
@@ -466,6 +543,17 @@ tan(x::Number)
     asin(x)
 
 Compute the inverse sine of `x`, where the output is in radians.
+
+See also [`asind`](@ref) for output in degrees.
+
+# Examples
+```jldoctest
+julia> asin.((0, 1/2, 1))
+(0.0, 0.5235987755982989, 1.5707963267948966)
+
+julia> asind.((0, 1/2, 1))
+(0.0, 30.000000000000004, 90.0)
+```
 """
 asin(x::Number)
 
@@ -496,7 +584,7 @@ atanh(x::Number)
 Compute the natural logarithm of `x`. Throws [`DomainError`](@ref) for negative
 [`Real`](@ref) arguments. Use complex negative arguments to obtain complex results.
 
-See also [`log1p`](@ref), [`log2`](@ref), [`log10`](@ref).
+See also [`ℯ`](@ref), [`log1p`](@ref), [`log2`](@ref), [`log10`](@ref).
 
 # Examples
 ```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*"
@@ -505,10 +593,16 @@ julia> log(2)
 
 julia> log(-3)
 ERROR: DomainError with -3.0:
-log will only return a complex result if called with a complex argument. Try log(Complex(x)).
+log was called with a negative real argument but will only return a complex result if called with a complex argument. Try log(Complex(x)).
 Stacktrace:
  [1] throw_complex_domainerror(::Symbol, ::Float64) at ./math.jl:31
 [...]
+
+julia> log.(exp.(-1:1))
+3-element Vector{Float64}:
+ -1.0
+  0.0
+  1.0
 ```
 """
 log(x::Number)
@@ -531,10 +625,16 @@ julia> log2(10)
 
 julia> log2(-2)
 ERROR: DomainError with -2.0:
-log2 will only return a complex result if called with a complex argument. Try log2(Complex(x)).
+log2 was called with a negative real argument but will only return a complex result if called with a complex argument. Try log2(Complex(x)).
 Stacktrace:
  [1] throw_complex_domainerror(f::Symbol, x::Float64) at ./math.jl:31
 [...]
+
+julia> log2.(2.0 .^ (-1:1))
+3-element Vector{Float64}:
+ -1.0
+  0.0
+  1.0
 ```
 """
 log2(x)
@@ -555,7 +655,7 @@ julia> log10(2)
 
 julia> log10(-2)
 ERROR: DomainError with -2.0:
-log10 will only return a complex result if called with a complex argument. Try log10(Complex(x)).
+log10 was called with a negative real argument but will only return a complex result if called with a complex argument. Try log10(Complex(x)).
 Stacktrace:
  [1] throw_complex_domainerror(f::Symbol, x::Float64) at ./math.jl:31
 [...]
@@ -579,7 +679,7 @@ julia> log1p(0)
 
 julia> log1p(-2)
 ERROR: DomainError with -2.0:
-log1p will only return a complex result if called with a complex argument. Try log1p(Complex(x)).
+log1p was called with a real argument < -1 but will only return a complex result if called with a complex argument. Try log1p(Complex(x)).
 Stacktrace:
  [1] throw_complex_domainerror(::Symbol, ::Float64) at ./math.jl:31
 [...]
@@ -625,6 +725,13 @@ julia> .√(1:4)
 """
 sqrt(x)
 
+"""
+    fourthroot(x)
+
+Return the fourth root of `x` by applying `sqrt` twice successively.
+"""
+fourthroot(x::Number) = sqrt(sqrt(x))
+
 """
     hypot(x, y)
 
@@ -651,7 +758,7 @@ julia> hypot(a, a)
 
 julia> √(a^2 + a^2) # a^2 overflows
 ERROR: DomainError with -2.914184810805068e18:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
 
@@ -755,20 +862,70 @@ function _hypot(x::NTuple{N,<:Number}) where {N}
     end
 end
 
+function _hypot(x::NTuple{N,<:IEEEFloat}) where {N}
+    T = eltype(x)
+    infT = convert(T, Inf)
+    x = abs.(x) # doesn't change result but enables computational shortcuts
+    # note: any() was causing this to not inline for N=3 but mapreduce() was not
+    mapreduce(==(infT), |, x) && return infT # return Inf even if an argument is NaN
+    maxabs = reinterpret(T, maximum(z -> reinterpret(Signed, z), x)) # for abs(::IEEEFloat) values, a ::BitInteger cast does not change the result
+    maxabs > zero(T) || return maxabs # catch NaN before the @fastmath below, but also shortcut 0 since we can (remove if no more @fastmath below)
+    scale,invscale = scaleinv(maxabs)
+     # @fastmath(+) to allow reassociation (see #48129)
+    add_fast(x, y) = Core.Intrinsics.add_float_fast(x, y) # @fastmath is not available during bootstrap
+    return scale * sqrt(mapreduce(y -> abs2(y * invscale), add_fast, x))
+end
+
 atan(y::Real, x::Real) = atan(promote(float(y),float(x))...)
 atan(y::T, x::T) where {T<:AbstractFloat} = Base.no_op_err("atan", T)
 
-max(x::T, y::T) where {T<:AbstractFloat} = ifelse((y > x) | (signbit(y) < signbit(x)),
-                                    ifelse(isnan(x), x, y), ifelse(isnan(y), y, x))
+_isless(x::T, y::T) where {T<:AbstractFloat} = (x < y) || (signbit(x) > signbit(y))
+min(x::T, y::T) where {T<:AbstractFloat} = isnan(x) || ~isnan(y) && _isless(x, y) ? x : y
+max(x::T, y::T) where {T<:AbstractFloat} = isnan(x) || ~isnan(y) && _isless(y, x) ? x : y
+minmax(x::T, y::T) where {T<:AbstractFloat} = min(x, y), max(x, y)
 
+_isless(x::Float16, y::Float16) = signbit(widen(x) - widen(y))
 
-min(x::T, y::T) where {T<:AbstractFloat} = ifelse((y < x) | (signbit(y) > signbit(x)),
-                                    ifelse(isnan(x), x, y), ifelse(isnan(y), y, x))
+const has_native_fminmax = Sys.ARCH === :aarch64
+@static if has_native_fminmax
+    @eval begin
+        Base.@assume_effects :total @inline llvm_min(x::Float64, y::Float64) = ccall("llvm.minimum.f64", llvmcall, Float64, (Float64, Float64), x, y)
+        Base.@assume_effects :total @inline llvm_min(x::Float32, y::Float32) = ccall("llvm.minimum.f32", llvmcall, Float32, (Float32, Float32), x, y)
+        Base.@assume_effects :total @inline llvm_max(x::Float64, y::Float64) = ccall("llvm.maximum.f64", llvmcall, Float64, (Float64, Float64), x, y)
+        Base.@assume_effects :total @inline llvm_max(x::Float32, y::Float32) = ccall("llvm.maximum.f32", llvmcall, Float32, (Float32, Float32), x, y)
+    end
+end
 
-minmax(x::T, y::T) where {T<:AbstractFloat} =
-    ifelse(isnan(x) | isnan(y), ifelse(isnan(x), (x,x), (y,y)),
-           ifelse((y > x) | (signbit(x) > signbit(y)), (x,y), (y,x)))
+function min(x::T, y::T) where {T<:Union{Float32,Float64}}
+    @static if has_native_fminmax
+        return llvm_min(x,y)
+    end
+    diff = x - y
+    argmin = ifelse(signbit(diff), x, y)
+    anynan = isnan(x)|isnan(y)
+    return ifelse(anynan, diff, argmin)
+end
 
+function max(x::T, y::T) where {T<:Union{Float32,Float64}}
+    @static if has_native_fminmax
+        return llvm_max(x,y)
+    end
+    diff = x - y
+    argmax = ifelse(signbit(diff), y, x)
+    anynan = isnan(x)|isnan(y)
+    return ifelse(anynan, diff, argmax)
+end
+
+function minmax(x::T, y::T) where {T<:Union{Float32,Float64}}
+    @static if has_native_fminmax
+        return llvm_min(x, y), llvm_max(x, y)
+    end
+    diff = x - y
+    sdiff = signbit(diff)
+    min, max = ifelse(sdiff, x, y), ifelse(sdiff, y, x)
+    anynan = isnan(x)|isnan(y)
+    return ifelse(anynan, diff, min), ifelse(anynan, diff, max)
+end
 
 """
     ldexp(x, n)
@@ -827,18 +984,28 @@ end
 ldexp(x::Float16, q::Integer) = Float16(ldexp(Float32(x), q))
 
 """
-    exponent(x::AbstractFloat) -> Int
+    exponent(x::Real) -> Int
 
-Get the exponent of a normalized floating-point number.
 Returns the largest integer `y` such that `2^y ≤ abs(x)`.
 
+Throws a `DomainError` when `x` is zero, infinite, or [`NaN`](@ref).
+For any other non-subnormal floating-point number `x`, this corresponds to the exponent bits of `x`.
+
+See also [`signbit`](@ref), [`significand`](@ref), [`frexp`](@ref), [`issubnormal`](@ref), [`log2`](@ref).
+
 # Examples
 ```jldoctest
+julia> exponent(8)
+3
+
 julia> exponent(6.5)
 2
 
-julia> exponent(16.0)
-4
+julia> exponent(-1//4)
+-2
+
+julia> exponent(floatmin(Float32)), exponent(nextfloat(0.0f0))
+(-126, -149)
 ```
 """
 function exponent(x::T) where T<:IEEEFloat
@@ -935,6 +1102,40 @@ function frexp(x::T) where T<:IEEEFloat
     return reinterpret(T, xu), k
 end
 
+"""
+    $(@__MODULE__).scaleinv(x)
+
+Compute `(scale, invscale)` where `scale` and `invscale` are non-subnormal
+(https://en.wikipedia.org/wiki/Subnormal_number) finite powers of two such that
+`scale * invscale == 1` and `scale` is roughly on the order of `abs(x)`.
+Inf, NaN, and zero inputs also result in finite nonzero outputs.
+These values are useful to scale computations to prevent overflow and underflow
+without round-off errors or division.
+
+UNSTABLE DETAIL: For `x isa IEEEFLoat`, `scale` is chosen to be the
+`prevpow(2,abs(x))` when possible, but is never less than floatmin(x) or greater
+than inv(floatmin(x)). `Inf` and `NaN` resolve to `inv(floatmin(x))`. This
+behavior is subject to change.
+
+# Examples
+```jldoctest
+julia> $(@__MODULE__).scaleinv(7.5)
+(4.0, 0.25)
+```
+"""
+function scaleinv(x::T) where T<:IEEEFloat
+    # by removing the sign and significand and restricting values to a limited range,
+    # we can invert a number using bit-twiddling instead of division
+    U = uinttype(T)
+    umin = reinterpret(U, floatmin(T))
+    umax = reinterpret(U, inv(floatmin(T)))
+    emask = exponent_mask(T) # used to strip sign and significand
+    u = clamp(reinterpret(U, x) & emask, umin, umax)
+    scale = reinterpret(T, u)
+    invscale = reinterpret(T, umin + umax - u) # inv(scale)
+    return scale, invscale
+end
+
 # NOTE: This `rem` method is adapted from the msun `remainder` and `remainderf`
 # functions, which are under the following license:
 #
@@ -997,14 +1198,18 @@ end
 # @constprop aggressive to help the compiler see the switch between the integer and float
 # variants for callers with constant `y`
 @constprop :aggressive function ^(x::Float64, y::Float64)
-    yint = unsafe_trunc(Int, y) # Note, this is actually safe since julia freezes the result
-    y == yint && return x^yint
-    #numbers greater than 2*inv(eps(T)) must be even, and the pow will overflow
-    y >= 2*inv(eps()) && return x^(typemax(Int64)-1)
     xu = reinterpret(UInt64, x)
-    x<0 && y > -4e18 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
-    x === 1.0 && return 1.0
-    x==0 && return abs(y)*Inf*(!(y>0))
+    xu == reinterpret(UInt64, 1.0) && return 1.0
+    # Exponents greater than this will always overflow or underflow.
+    # Note that NaN can pass through this, but that will end up fine.
+    if !(abs(y)<0x1.8p62)
+        isnan(y) && return y
+        y = sign(y)*0x1.8p62
+    end
+    yint = unsafe_trunc(Int64, y) # This is actually safe since julia freezes the result
+    y == yint && return @noinline x^yint
+    2*xu==0 && return abs(y)*Inf*(!(y>0)) # if x==0
+    x<0 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
     !isfinite(x) && return x*(y>0 || isnan(x))           # x is inf or NaN
     if xu < (UInt64(1)<<52) # x is subnormal
         xu = reinterpret(UInt64, x * 0x1p52) # normalize x
@@ -1015,7 +1220,7 @@ end
 end
 
 @inline function pow_body(xu::UInt64, y::Float64)
-    logxhi,logxlo = Base.Math._log_ext(xu)
+    logxhi,logxlo = _log_ext(xu)
     xyhi, xylo = two_mul(logxhi,y)
     xylo = muladd(logxlo, y, xylo)
     hi = xyhi+xylo
@@ -1023,18 +1228,23 @@ end
 end
 
 @constprop :aggressive function ^(x::T, y::T) where T <: Union{Float16, Float32}
-    yint = unsafe_trunc(Int64, y) # Note, this is actually safe since julia freezes the result
+    x == 1 && return one(T)
+    # Exponents greater than this will always overflow or underflow.
+    # Note that NaN can pass through this, but that will end up fine.
+    max_exp = T == Float16 ? T(3<<14) : T(0x1.Ap30)
+    if !(abs(y)<max_exp)
+        isnan(y) && return y
+        y = sign(y)*max_exp
+    end
+    yint = unsafe_trunc(Int32, y) # This is actually safe since julia freezes the result
     y == yint && return x^yint
-    #numbers greater than 2*inv(eps(T)) must be even, and the pow will overflow
-    y >= 2*inv(eps(T)) && return x^(typemax(Int64)-1)
-    x < 0 && y > -4e18 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
+    x < 0 && throw_exp_domainerror(x)
+    !isfinite(x) && return x*(y>0 || isnan(x))
+    x==0 && return abs(y)*T(Inf)*(!(y>0))
     return pow_body(x, y)
 end
 
 @inline function pow_body(x::T, y::T) where T <: Union{Float16, Float32}
-    x == 1 && return one(T)
-    !isfinite(x) && return x*(y>0 || isnan(x))
-    x==0 && return abs(y)*T(Inf)*(!(y>0))
     return T(exp2(log2(abs(widen(x))) * y))
 end
 
@@ -1050,7 +1260,7 @@ end
     n == 3 && return x*x*x # keep compatibility with literal_pow
     if n < 0
         rx = inv(x)
-        n==-2 && return rx*rx #keep compatability with literal_pow
+        n==-2 && return rx*rx #keep compatibility with literal_pow
         isfinite(x) && (xnlo = -fma(x, rx, -1.) * rx)
         x = rx
         n = -n
@@ -1066,8 +1276,8 @@ end
         xnlo += err
         n >>>= 1
     end
-    !isfinite(x) && return x*y
-    return muladd(x, y, muladd(y, xnlo, x*ynlo))
+    err = muladd(y, xnlo, x*ynlo)
+    return ifelse(isfinite(x) & isfinite(err), muladd(x, y, err), x*y)
 end
 
 function ^(x::Float32, n::Integer)
@@ -1138,6 +1348,9 @@ julia> rem2pi(7pi/4, RoundDown)
 """
 function rem2pi end
 function rem2pi(x::Float64, ::RoundingMode{:Nearest})
+    isnan(x) && return x
+    isinf(x) && return NaN
+
     abs(x) < pi && return x
 
     n,y = rem_pio2_kernel(x)
@@ -1161,6 +1374,9 @@ function rem2pi(x::Float64, ::RoundingMode{:Nearest})
     end
 end
 function rem2pi(x::Float64, ::RoundingMode{:ToZero})
+    isnan(x) && return x
+    isinf(x) && return NaN
+
     ax = abs(x)
     ax <= 2*Float64(pi,RoundDown) && return x
 
@@ -1186,6 +1402,9 @@ function rem2pi(x::Float64, ::RoundingMode{:ToZero})
     copysign(z,x)
 end
 function rem2pi(x::Float64, ::RoundingMode{:Down})
+    isnan(x) && return x
+    isinf(x) && return NaN
+
     if x < pi4o2_h
         if x >= 0
             return x
@@ -1215,6 +1434,9 @@ function rem2pi(x::Float64, ::RoundingMode{:Down})
     end
 end
 function rem2pi(x::Float64, ::RoundingMode{:Up})
+    isnan(x) && return x
+    isinf(x) && return NaN
+
     if x > -pi4o2_h
         if x <= 0
             return x
@@ -1337,7 +1559,7 @@ include("special/log.jl")
 # Float16 definitions
 
 for func in (:sin,:cos,:tan,:asin,:acos,:atan,:cosh,:tanh,:asinh,:acosh,
-             :atanh,:log,:log2,:log10,:sqrt,:lgamma,:log1p)
+             :atanh,:log,:log2,:log10,:sqrt,:fourthroot,:log1p)
     @eval begin
         $func(a::Float16) = Float16($func(Float32(a)))
         $func(a::ComplexF16) = ComplexF16($func(ComplexF32(a)))
@@ -1355,7 +1577,7 @@ sincos(a::Float16) = Float16.(sincos(Float32(a)))
 for f in (:sin, :cos, :tan, :asin, :atan, :acos,
           :sinh, :cosh, :tanh, :asinh, :acosh, :atanh,
           :exp, :exp2, :exp10, :expm1, :log, :log2, :log10, :log1p,
-          :exponent, :sqrt, :cbrt)
+          :exponent, :sqrt, :cbrt, :sinpi, :cospi, :sincospi, :tanpi)
     @eval function ($f)(x::Real)
         xf = float(x)
         x === xf && throw(MethodError($f, (x,)))
@@ -1373,5 +1595,6 @@ end
 exp2(x::AbstractFloat) = 2^x
 exp10(x::AbstractFloat) = 10^x
 clamp(::Missing, lo, hi) = missing
+fourthroot(::Missing) = missing
 
 end # module
diff --git a/base/mathconstants.jl b/base/mathconstants.jl
index 3bb4bb52ad07f..4bb8c409acf00 100644
--- a/base/mathconstants.jl
+++ b/base/mathconstants.jl
@@ -10,11 +10,11 @@ module MathConstants
 
 export π, pi, ℯ, e, γ, eulergamma, catalan, φ, golden
 
-Base.@irrational π        3.14159265358979323846  pi
-Base.@irrational ℯ        2.71828182845904523536  exp(big(1))
-Base.@irrational γ        0.57721566490153286061  euler
-Base.@irrational φ        1.61803398874989484820  (1+sqrt(big(5)))/2
-Base.@irrational catalan  0.91596559417721901505  catalan
+Base.@irrational π        pi
+Base.@irrational ℯ        exp(big(1))
+Base.@irrational γ        euler
+Base.@irrational φ        (1+sqrt(big(5)))/2
+Base.@irrational catalan  catalan
 
 # aliases
 """
diff --git a/base/meta.jl b/base/meta.jl
index cf59d3fa3274e..545b1dbf5243b 100644
--- a/base/meta.jl
+++ b/base/meta.jl
@@ -48,7 +48,7 @@ quot(ex) = Expr(:quote, ex)
 """
     Meta.isexpr(ex, head[, n])::Bool
 
-Return true if `ex` is an `Expr` with the given type `head` and optionally that
+Return `true` if `ex` is an `Expr` with the given type `head` and optionally that
 the argument list is of length `n`. `head` may be a `Symbol` or collection of
 `Symbol`s. For example, to check that a macro was passed a function call
 expression, you might use `isexpr(ex, :call)`.
@@ -96,7 +96,7 @@ rather than line 2 where `@test` is used as an implementation detail.
 """
 function replace_sourceloc!(sourceloc, @nospecialize(ex))
     if ex isa Expr
-        if ex.head == :macrocall
+        if ex.head === :macrocall
             ex.args[2] = sourceloc
         end
         map!(e -> replace_sourceloc!(sourceloc, e), ex.args, ex.args)
@@ -187,8 +187,11 @@ expression.
 """
 struct ParseError <: Exception
     msg::String
+    detail::Any
 end
 
+ParseError(msg::AbstractString) = ParseError(msg, nothing)
+
 function _parse_string(text::AbstractString, filename::AbstractString,
                        lineno::Integer, index::Integer, options)
     if index < 1 || index > ncodeunits(text) + 1
@@ -199,7 +202,7 @@ function _parse_string(text::AbstractString, filename::AbstractString,
 end
 
 """
-    parse(str, start; greedy=true, raise=true, depwarn=true)
+    parse(str, start; greedy=true, raise=true, depwarn=true, filename="none")
 
 Parse the expression string and return an expression (which could later be
 passed to eval for execution). `start` is the code unit index into `str` of the
@@ -211,6 +214,7 @@ return `Expr(:incomplete, "(error message)")`. If `raise` is `true` (default),
 syntax errors other than incomplete expressions will raise an error. If `raise`
 is `false`, `parse` will return an expression that will raise an error upon
 evaluation. If `depwarn` is `false`, deprecation warnings will be suppressed.
+The `filename` argument is used to display diagnostics when an error is raised.
 
 ```jldoctest
 julia> Meta.parse("(α, β) = 3, 5", 1) # start of string
@@ -229,43 +233,50 @@ julia> Meta.parse("(α, β) = 3, 5", 11, greedy=false)
 (3, 13)
 ```
 """
-function parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool=true,
-               depwarn::Bool=true)
-    ex, pos = _parse_string(str, "none", 1, pos, greedy ? :statement : :atom)
-    if raise && isa(ex,Expr) && ex.head === :error
-        throw(ParseError(ex.args[1]))
+function parse(str::AbstractString, pos::Integer;
+               filename="none", greedy::Bool=true, raise::Bool=true, depwarn::Bool=true)
+    ex, pos = _parse_string(str, String(filename), 1, pos, greedy ? :statement : :atom)
+    if raise && isexpr(ex, :error)
+        err = ex.args[1]
+        if err isa String
+            err = ParseError(err) # For flisp parser
+        end
+        throw(err)
     end
     return ex, pos
 end
 
 """
-    parse(str; raise=true, depwarn=true)
+    parse(str; raise=true, depwarn=true, filename="none")
 
 Parse the expression string greedily, returning a single expression. An error is thrown if
 there are additional characters after the first expression. If `raise` is `true` (default),
 syntax errors will raise an error; otherwise, `parse` will return an expression that will
 raise an error upon evaluation.  If `depwarn` is `false`, deprecation warnings will be
-suppressed.
+suppressed. The `filename` argument is used to display diagnostics when an error is raised.
 
-```jldoctest
+```jldoctest; filter=r"(?<=Expr\\(:error).*|(?<=Expr\\(:incomplete).*"
 julia> Meta.parse("x = 3")
 :(x = 3)
 
-julia> Meta.parse("x = ")
-:($(Expr(:incomplete, "incomplete: premature end of input")))
-
 julia> Meta.parse("1.0.2")
-ERROR: Base.Meta.ParseError("invalid numeric constant \\\"1.0.\\\"")
-Stacktrace:
+ERROR: ParseError:
+# Error @ none:1:1
+1.0.2
+└──┘ ── invalid numeric constant
 [...]
 
 julia> Meta.parse("1.0.2"; raise = false)
-:($(Expr(:error, "invalid numeric constant \"1.0.\"")))
+:(\$(Expr(:error, "invalid numeric constant \"1.0.\"")))
+
+julia> Meta.parse("x = ")
+:(\$(Expr(:incomplete, "incomplete: premature end of input")))
 ```
 """
-function parse(str::AbstractString; raise::Bool=true, depwarn::Bool=true)
-    ex, pos = parse(str, 1, greedy=true, raise=raise, depwarn=depwarn)
-    if isa(ex,Expr) && ex.head === :error
+function parse(str::AbstractString;
+               filename="none", raise::Bool=true, depwarn::Bool=true)
+    ex, pos = parse(str, 1; filename, greedy=true, raise, depwarn)
+    if isexpr(ex, :error)
         return ex
     end
     if pos <= ncodeunits(str)
@@ -366,6 +377,9 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
             x.dest + statement_offset,
         )
     end
+    if isa(x, Core.EnterNode)
+        return Core.EnterNode(x, x.catch_dest + statement_offset)
+    end
     if isa(x, Expr)
         head = x.head
         if head === :static_parameter
@@ -413,8 +427,6 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
                                            static_param_values, slot_offset,
                                            statement_offset, boundscheck)
             x.args[2] += statement_offset
-        elseif head === :enter
-            x.args[1] += statement_offset
         elseif head === :isdefined
             arg = x.args[1]
             # inlining a QuoteNode or literal into `Expr(:isdefined, x)` is invalid, replace with true
diff --git a/base/methodshow.jl b/base/methodshow.jl
index d65dd9b00d595..0eb99dc88303f 100644
--- a/base/methodshow.jl
+++ b/base/methodshow.jl
@@ -7,11 +7,11 @@ function strip_gensym(sym)
     if sym === :var"#self#" || sym === :var"#unused#"
         return empty_sym
     end
-    return Symbol(replace(String(sym), r"^(.*)#(.*#)?\d+$" => s"\1"))
+    return Symbol(replace(String(sym), r"^(.*)#(.*#)?\d+$"sa => s"\1"))
 end
 
 function argtype_decl(env, n, @nospecialize(sig::DataType), i::Int, nargs, isva::Bool) # -> (argname, argtype)
-    t = sig.parameters[unwrapva(min(i, end))]
+    t = unwrapva(sig.parameters[min(i, end)])
     if i == nargs && isva
         va = sig.parameters[end]
         if isvarargtype(va) && (!isdefined(va, :N) || !isa(va.N, Int))
@@ -78,13 +78,9 @@ end
 
 # NOTE: second argument is deprecated and is no longer used
 function kwarg_decl(m::Method, kwtype = nothing)
-    if m.sig === Tuple # OpaqueClosure
-        return Symbol[]
-    end
-    mt = get_methodtable(m)
-    if isdefined(mt, :kwsorter)
-        kwtype = typeof(mt.kwsorter)
-        sig = rewrap_unionall(Tuple{kwtype, Any, (unwrap_unionall(m.sig)::DataType).parameters...}, m.sig)
+    if m.sig !== Tuple # OpaqueClosure or Builtin
+        kwtype = typeof(Core.kwcall)
+        sig = rewrap_unionall(Tuple{kwtype, NamedTuple, (unwrap_unionall(m.sig)::DataType).parameters...}, m.sig)
         kwli = ccall(:jl_methtable_lookup, Any, (Any, Any, UInt), kwtype.name.mt, sig, get_world_counter())
         if kwli !== nothing
             kwli = kwli::Method
@@ -97,6 +93,7 @@ function kwarg_decl(m::Method, kwtype = nothing)
                 push!(kws, kws[i])
                 deleteat!(kws, i)
             end
+            isempty(kws) && push!(kws,  :var"...")
             return kws
         end
     end
@@ -164,7 +161,7 @@ functionloc(m::Core.MethodInstance) = functionloc(m.def)
 """
     functionloc(m::Method)
 
-Returns a tuple `(filename,line)` giving the location of a `Method` definition.
+Return a tuple `(filename,line)` giving the location of a `Method` definition.
 """
 function functionloc(m::Method)
     file, ln = updated_methodloc(m)
@@ -177,7 +174,7 @@ end
 """
     functionloc(f::Function, types)
 
-Returns a tuple `(filename,line)` giving the location of a generic `Function` definition.
+Return a tuple `(filename,line)` giving the location of a generic `Function` definition.
 """
 functionloc(@nospecialize(f), @nospecialize(types)) = functionloc(which(f,types))
 
@@ -197,6 +194,9 @@ function functionloc(@nospecialize(f))
 end
 
 function sym_to_string(sym)
+    if sym === :var"..."
+        return "..."
+    end
     s = String(sym)
     if endswith(s, "...")
         return string(sprint(show_sym, Symbol(s[1:end-3])), "...")
@@ -205,7 +205,12 @@ function sym_to_string(sym)
     end
 end
 
-function show(io::IO, m::Method; modulecolor = :light_black, digit_align_width = -1)
+# default compact view
+show(io::IO, m::Method; kwargs...) = show_method(IOContext(io, :compact=>true), m; kwargs...)
+
+show(io::IO, ::MIME"text/plain", m::Method; kwargs...) = show_method(io, m; kwargs...)
+
+function show_method(io::IO, m::Method; modulecolor = :light_black, digit_align_width = 1)
     tv, decls, file, line = arg_decl_parts(m)
     sig = unwrap_unionall(m.sig)
     if sig === Tuple
@@ -241,9 +246,12 @@ function show(io::IO, m::Method; modulecolor = :light_black, digit_align_width =
         show_method_params(io, tv)
     end
 
+    if !(get(io, :compact, false)::Bool) # single-line mode
+        println(io)
+        digit_align_width += 4
+    end
     # module & file, re-using function from errorshow.jl
-    println(io)
-    print_module_path_file(io, m.module, string(file), line, modulecolor, digit_align_width+4)
+    print_module_path_file(io, parentmodule(m), string(file), line; modulecolor, digit_align_width)
 end
 
 function show_method_list_header(io::IO, ms::MethodList, namefmt::Function)
@@ -259,7 +267,7 @@ function show_method_list_header(io::IO, ms::MethodList, namefmt::Function)
     if hasname
         what = (startswith(sname, '@') ?
                     "macro"
-               : mt.module === Core && last(ms).sig === Tuple ?
+               : mt.module === Core && mt.defs isa Core.TypeMapEntry && (mt.defs.func::Method).sig === Tuple ?
                     "builtin function"
                : # else
                     "generic function")
@@ -292,7 +300,7 @@ function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=tru
     last_shown_line_infos = get(io, :last_shown_line_infos, nothing)
     last_shown_line_infos === nothing || empty!(last_shown_line_infos)
 
-    modul = if mt === _TYPE_NAME.mt # type constructor
+    modul = if mt === _TYPE_NAME.mt && length(ms) > 0 # type constructor
             which(ms.ms[1].module, ms.ms[1].name)
         else
             mt.module
@@ -307,13 +315,13 @@ function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=tru
 
             print(io, " ", lpad("[$n]", digit_align_width + 2), " ")
 
-            modulecolor = if meth.module == modul
+            modulecolor = if parentmodule(meth) == modul
                 nothing
             else
-                m = parentmodule_before_main(meth.module)
+                m = parentmodule_before_main(meth)
                 get!(() -> popfirst!(STACKTRACE_MODULECOLORS), STACKTRACE_FIXEDCOLORS, m)
             end
-            show(io, meth; modulecolor)
+            show_method(io, meth; modulecolor)
 
             file, line = updated_methodloc(meth)
             if last_shown_line_infos !== nothing
@@ -327,7 +335,7 @@ function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=tru
     if rest > 0
         println(io)
         if rest == 1
-            show(io, last)
+            show_method(io, last)
         else
             print(io, "... $rest methods not shown")
             if hasname
@@ -353,11 +361,11 @@ end
 fileurl(file) = let f = find_source_file(file); f === nothing ? "" : "file://"*f; end
 
 function url(m::Method)
-    M = m.module
+    M = parentmodule(m)
     (m.file === :null || m.file === :string) && return ""
     file = string(m.file)
     line = m.line
-    line <= 0 || occursin(r"In\[[0-9]+\]", file) && return ""
+    line <= 0 || occursin(r"In\[[0-9]+\]"a, file) && return ""
     Sys.iswindows() && (file = replace(file, '\\' => '/'))
     libgit2_id = PkgId(UUID((0x76f85450_5226_5b5a,0x8eaa_529ad045b433)), "LibGit2")
     if inbase(M)
@@ -374,7 +382,7 @@ function url(m::Method)
             return LibGit2.with(LibGit2.GitRepoExt(d)) do repo
                 LibGit2.with(LibGit2.GitConfig(repo)) do cfg
                     u = LibGit2.get(cfg, "remote.origin.url", "")
-                    u = match(LibGit2.GITHUB_REGEX,u).captures[1]
+                    u = (match(LibGit2.GITHUB_REGEX,u)::AbstractMatch).captures[1]
                     commit = string(LibGit2.head_oid(repo))
                     root = LibGit2.path(repo)
                     if startswith(file, root) || startswith(realpath(file), root)
@@ -397,7 +405,7 @@ function show(io::IO, ::MIME"text/html", m::Method)
     sig = unwrap_unionall(m.sig)
     if sig === Tuple
         # Builtin
-        print(io, m.name, "(...) in ", m.module)
+        print(io, m.name, "(...) in ", parentmodule(m))
         return
     end
     print(io, decls[1][2], "(")
@@ -421,7 +429,7 @@ function show(io::IO, ::MIME"text/html", m::Method)
         show_method_params(io, tv)
         print(io,"</i>")
     end
-    print(io, " in ", m.module)
+    print(io, " in ", parentmodule(m))
     if line > 0
         file, line = updated_methodloc(m)
         u = url(m)
@@ -463,6 +471,8 @@ function show(io::IO, mime::MIME"text/plain", mt::AbstractVector{Method})
             push!(last_shown_line_infos, (string(file), line))
         end
     end
+    first && summary(io, mt)
+    nothing
 end
 
 function show(io::IO, mime::MIME"text/html", mt::AbstractVector{Method})
@@ -475,4 +485,5 @@ function show(io::IO, mime::MIME"text/html", mt::AbstractVector{Method})
         end
         print(io, "</ul>")
     end
+    nothing
 end
diff --git a/base/missing.jl b/base/missing.jl
index e1988064aadc1..35e1b4034643c 100644
--- a/base/missing.jl
+++ b/base/missing.jl
@@ -12,7 +12,7 @@ where it is not supported. The error message, in the `msg` field
 may provide more specific details.
 """
 struct MissingException <: Exception
-    msg::String
+    msg::AbstractString
 end
 
 showerror(io::IO, ex::MissingException) =
@@ -41,6 +41,7 @@ nonmissingtype(::Type{T}) where {T} = typesplit(T, Missing)
 function nonmissingtype_checked(T::Type)
     R = nonmissingtype(T)
     R >: T && error("could not compute non-missing type")
+    R <: Union{} && error("cannot convert a value to missing for assignment")
     return R
 end
 
@@ -69,7 +70,6 @@ convert(::Type{T}, x::T) where {T>:Union{Missing, Nothing}} = x
 convert(::Type{T}, x) where {T>:Missing} = convert(nonmissingtype_checked(T), x)
 convert(::Type{T}, x) where {T>:Union{Missing, Nothing}} = convert(nonmissingtype_checked(nonnothingtype_checked(T)), x)
 
-
 # Comparison operators
 ==(::Missing, ::Missing) = missing
 ==(::Missing, ::Any) = missing
@@ -136,29 +136,20 @@ max(::Missing, ::Missing) = missing
 max(::Missing, ::Any)     = missing
 max(::Any,     ::Missing) = missing
 
+missing_conversion_msg(@nospecialize T) =
+    LazyString("cannot convert a missing value to type ", T, ": use Union{", T, ", Missing} instead")
+
 # Rounding and related functions
 round(::Missing, ::RoundingMode=RoundNearest; sigdigits::Integer=0, digits::Integer=0, base::Integer=0) = missing
 round(::Type{>:Missing}, ::Missing, ::RoundingMode=RoundNearest) = missing
 round(::Type{T}, ::Missing, ::RoundingMode=RoundNearest) where {T} =
-    throw(MissingException("cannot convert a missing value to type $T: use Union{$T, Missing} instead"))
+    throw(MissingException(missing_conversion_msg(T)))
 round(::Type{T}, x::Any, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r)
 # to fix ambiguities
+round(::Type{T}, x::Real, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r)
 round(::Type{T}, x::Rational{Tr}, r::RoundingMode=RoundNearest) where {T>:Missing,Tr} = round(nonmissingtype_checked(T), x, r)
 round(::Type{T}, x::Rational{Bool}, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r)
 
-# Handle ceil, floor, and trunc separately as they have no RoundingMode argument
-for f in (:(ceil), :(floor), :(trunc))
-    @eval begin
-        ($f)(::Missing; sigdigits::Integer=0, digits::Integer=0, base::Integer=0) = missing
-        ($f)(::Type{>:Missing}, ::Missing) = missing
-        ($f)(::Type{T}, ::Missing) where {T} =
-            throw(MissingException("cannot convert a missing value to type $T: use Union{$T, Missing} instead"))
-        ($f)(::Type{T}, x::Any) where {T>:Missing} = $f(nonmissingtype_checked(T), x)
-        # to fix ambiguities
-        ($f)(::Type{T}, x::Rational) where {T>:Missing} = $f(nonmissingtype_checked(T), x)
-    end
-end
-
 # to avoid ambiguity warnings
 (^)(::Missing, ::Integer) = missing
 
@@ -265,7 +256,7 @@ keys(itr::SkipMissing) =
     Iterators.filter(i -> @inbounds(itr.x[i]) !== missing, keys(itr.x))
 @propagate_inbounds function getindex(itr::SkipMissing, I...)
     v = itr.x[I...]
-    v === missing && throw(MissingException("the value at index $I is missing"))
+    v === missing && throw(MissingException(LazyString("the value at index ", I, " is missing")))
     v
 end
 
diff --git a/base/mpfr.jl b/base/mpfr.jl
index 60f59cdb0af7e..276fd430ff1e0 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -8,7 +8,7 @@ export
 
 import
     .Base: *, +, -, /, <, <=, ==, >, >=, ^, ceil, cmp, convert, copysign, div,
-        inv, exp, exp2, exponent, factorial, floor, fma, hypot, isinteger,
+        inv, exp, exp2, exponent, factorial, floor, fma, muladd, hypot, isinteger,
         isfinite, isinf, isnan, ldexp, log, log2, log10, max, min, mod, modf,
         nextfloat, prevfloat, promote_rule, rem, rem2pi, round, show, float,
         sum, sqrt, string, print, trunc, precision, _precision, exp10, expm1, log1p,
@@ -16,16 +16,32 @@ import
         cosh, sinh, tanh, sech, csch, coth, acosh, asinh, atanh, lerpi,
         cbrt, typemax, typemin, unsafe_trunc, floatmin, floatmax, rounding,
         setrounding, maxintfloat, widen, significand, frexp, tryparse, iszero,
-        isone, big, _string_n, decompose
+        isone, big, _string_n, decompose, minmax,
+        sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
+        uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask,
+        RawBigIntRoundingIncrementHelper, truncated, RawBigInt
 
-import ..Rounding: rounding_raw, setrounding_raw
 
-import ..GMP: ClongMax, CulongMax, CdoubleMax, Limb
+using .Base.Libc
+import ..Rounding:
+    rounding_raw, setrounding_raw, rounds_to_nearest, rounds_away_from_zero,
+    tie_breaker_is_to_even, correct_rounding_requires_increment
+
+import ..GMP: ClongMax, CulongMax, CdoubleMax, Limb, libgmp
 
 import ..FastMath.sincos_fast
 
-version() = VersionNumber(unsafe_string(ccall((:mpfr_get_version,:libmpfr), Ptr{Cchar}, ())))
-patches() = split(unsafe_string(ccall((:mpfr_get_patches,:libmpfr), Ptr{Cchar}, ())),' ')
+if Sys.iswindows()
+    const libmpfr = "libmpfr-6.dll"
+elseif Sys.isapple()
+    const libmpfr = "@rpath/libmpfr.6.dylib"
+else
+    const libmpfr = "libmpfr.so.6"
+end
+
+
+version() = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Ptr{Cchar}, ())))
+patches() = split(unsafe_string(ccall((:mpfr_get_patches,libmpfr), Ptr{Cchar}, ())),' ')
 
 function __init__()
     try
@@ -77,11 +93,32 @@ function convert(::Type{RoundingMode}, r::MPFRRoundingMode)
     end
 end
 
+rounds_to_nearest(m::MPFRRoundingMode) = m == MPFRRoundNearest
+function rounds_away_from_zero(m::MPFRRoundingMode, sign_bit::Bool)
+    if m == MPFRRoundToZero
+        false
+    elseif m == MPFRRoundUp
+        !sign_bit
+    elseif m == MPFRRoundDown
+        sign_bit
+    else
+        # Assuming `m == MPFRRoundFromZero`
+        true
+    end
+end
+tie_breaker_is_to_even(::MPFRRoundingMode) = true
+
 const ROUNDING_MODE = Ref{MPFRRoundingMode}(MPFRRoundNearest)
 const DEFAULT_PRECISION = Ref{Clong}(256)
 
 # Basic type and initialization definitions
 
+# Warning: the constants are MPFR implementation details from
+# `src/mpfr-impl.h`, search for `MPFR_EXP_ZERO`.
+const mpfr_special_exponent_zero = typemin(Clong) + true
+const mpfr_special_exponent_nan = mpfr_special_exponent_zero + true
+const mpfr_special_exponent_inf = mpfr_special_exponent_nan + true
+
 """
     BigFloat <: AbstractFloat
 
@@ -100,24 +137,27 @@ mutable struct BigFloat <: AbstractFloat
     global function _BigFloat(prec::Clong, sign::Cint, exp::Clong, d::String)
         # ccall-based version, inlined below
         #z = new(zero(Clong), zero(Cint), zero(Clong), C_NULL, d)
-        #ccall((:mpfr_custom_init,:libmpfr), Cvoid, (Ptr{Limb}, Clong), d, prec) # currently seems to be a no-op in mpfr
+        #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), d, prec) # currently seems to be a no-op in mpfr
         #NAN_KIND = Cint(0)
-        #ccall((:mpfr_custom_init_set,:libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, d)
+        #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, d)
         #return z
         return new(prec, sign, exp, pointer(d), d)
     end
 
     function BigFloat(; precision::Integer=DEFAULT_PRECISION[])
         precision < 1 && throw(DomainError(precision, "`precision` cannot be less than 1."))
-        nb = ccall((:mpfr_custom_get_size,:libmpfr), Csize_t, (Clong,), precision)
+        nb = ccall((:mpfr_custom_get_size,libmpfr), Csize_t, (Clong,), precision)
         nb = (nb + Core.sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this
         #d = Vector{Limb}(undef, nb)
         d = _string_n(nb * Core.sizeof(Limb))
-        EXP_NAN = Clong(1) - Clong(typemax(Culong) >> 1)
+        EXP_NAN = mpfr_special_exponent_nan
         return _BigFloat(Clong(precision), one(Cint), EXP_NAN, d) # +NAN
     end
 end
 
+# The rounding mode here shouldn't matter.
+significand_limb_count(x::BigFloat) = div(sizeof(x._d), sizeof(Limb), RoundToZero)
+
 rounding_raw(::Type{BigFloat}) = ROUNDING_MODE[]
 setrounding_raw(::Type{BigFloat}, r::MPFRRoundingMode) = ROUNDING_MODE[]=r
 
@@ -185,7 +225,7 @@ function BigFloat(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::I
         return x
     else
         z = BigFloat(;precision=precision)
-        ccall((:mpfr_set, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode),
+        ccall((:mpfr_set, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode),
               z, x, r)
         return z
     end
@@ -193,7 +233,7 @@ end
 
 function _duplicate(x::BigFloat)
     z = BigFloat(;precision=_precision(x))
-    ccall((:mpfr_set, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Int32), z, x, 0)
+    ccall((:mpfr_set, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Int32), z, x, 0)
     return z
 end
 
@@ -202,24 +242,58 @@ for (fJ, fC) in ((:si,:Clong), (:ui,:Culong))
     @eval begin
         function BigFloat(x::($fC), r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
             z = BigFloat(;precision=precision)
-            ccall(($(string(:mpfr_set_,fJ)), :libmpfr), Int32, (Ref{BigFloat}, $fC, MPFRRoundingMode), z, x, r)
+            ccall(($(string(:mpfr_set_,fJ)), libmpfr), Int32, (Ref{BigFloat}, $fC, MPFRRoundingMode), z, x, r)
             return z
         end
     end
 end
 
 function BigFloat(x::Float64, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
-    z = BigFloat(;precision=precision)
-    ccall((:mpfr_set_d, :libmpfr), Int32, (Ref{BigFloat}, Float64, MPFRRoundingMode), z, x, r)
-    if isnan(x) && signbit(x) != signbit(z)
-        z.sign = -z.sign
+    z = BigFloat(;precision)
+    # punt on the hard case where we might have to deal with rounding
+    # we could use this path in all cases, but mpfr_set_d has a lot of overhead.
+    if precision <= Base.significand_bits(Float64)
+        ccall((:mpfr_set_d, libmpfr), Int32, (Ref{BigFloat}, Float64, MPFRRoundingMode), z, x, r)
+        if isnan(x) && signbit(x) != signbit(z)
+            z.sign = -z.sign
+        end
+        return z
     end
-    return z
+    z.sign = 1-2*signbit(x)
+    if iszero(x) || !isfinite(x)
+        if isinf(x)
+            z.exp = mpfr_special_exponent_inf
+        elseif isnan(x)
+            z.exp = mpfr_special_exponent_nan
+        else
+            z.exp = mpfr_special_exponent_zero
+        end
+        return z
+    end
+    z.exp = 1 + exponent(x)
+    # BigFloat doesn't have an implicit bit
+    val = reinterpret(UInt64, significand(x))<<11 | typemin(Int64)
+    nlimbs = (precision + 8*Core.sizeof(Limb) - 1) ÷ (8*Core.sizeof(Limb))
+
+    # Limb is a CLong which is a UInt32 on windows (thank M$) which makes this more complicated and slower.
+    if Limb === UInt64
+        for i in 1:nlimbs-1
+            unsafe_store!(z.d, 0x0, i)
+        end
+        unsafe_store!(z.d, val, nlimbs)
+    else
+        for i in 1:nlimbs-2
+            unsafe_store!(z.d, 0x0, i)
+        end
+        unsafe_store!(z.d, val % UInt32, nlimbs-1)
+        unsafe_store!(z.d, (val >> 32) % UInt32, nlimbs)
+    end
+    z
 end
 
 function BigFloat(x::BigInt, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
     z = BigFloat(;precision=precision)
-    ccall((:mpfr_set_z, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, r)
+    ccall((:mpfr_set_z, libmpfr), Int32, (Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, r)
     return z
 end
 
@@ -247,7 +321,7 @@ end
 function tryparse(::Type{BigFloat}, s::AbstractString; base::Integer=0, precision::Integer=DEFAULT_PRECISION[], rounding::MPFRRoundingMode=ROUNDING_MODE[])
     !isempty(s) && isspace(s[end]) && return tryparse(BigFloat, rstrip(s), base = base)
     z = BigFloat(precision=precision)
-    err = ccall((:mpfr_set_str, :libmpfr), Int32, (Ref{BigFloat}, Cstring, Int32, MPFRRoundingMode), z, s, base, rounding)
+    err = ccall((:mpfr_set_str, libmpfr), Int32, (Ref{BigFloat}, Cstring, Int32, MPFRRoundingMode), z, s, base, rounding)
     err == 0 ? z : nothing
 end
 
@@ -268,16 +342,16 @@ BigFloat(x::AbstractString, r::RoundingMode; precision::Integer=DEFAULT_PRECISIO
 _unchecked_cast(T, x::BigFloat, r::RoundingMode) = _unchecked_cast(T, x, convert(MPFRRoundingMode, r))
 
 function _unchecked_cast(::Type{Int64}, x::BigFloat, r::MPFRRoundingMode)
-    ccall((:__gmpfr_mpfr_get_sj,:libmpfr), Cintmax_t, (Ref{BigFloat}, MPFRRoundingMode), x, r)
+    ccall((:__gmpfr_mpfr_get_sj,libmpfr), Cintmax_t, (Ref{BigFloat}, MPFRRoundingMode), x, r)
 end
 
 function _unchecked_cast(::Type{UInt64}, x::BigFloat, r::MPFRRoundingMode)
-    ccall((:__gmpfr_mpfr_get_uj,:libmpfr), Cuintmax_t, (Ref{BigFloat}, MPFRRoundingMode), x, r)
+    ccall((:__gmpfr_mpfr_get_uj,libmpfr), Cuintmax_t, (Ref{BigFloat}, MPFRRoundingMode), x, r)
 end
 
 function _unchecked_cast(::Type{BigInt}, x::BigFloat, r::MPFRRoundingMode)
     z = BigInt()
-    ccall((:mpfr_get_z, :libmpfr), Int32, (Ref{BigInt}, Ref{BigFloat}, MPFRRoundingMode), z, x, r)
+    ccall((:mpfr_get_z, libmpfr), Int32, (Ref{BigInt}, Ref{BigFloat}, MPFRRoundingMode), z, x, r)
     return z
 end
 
@@ -306,18 +380,15 @@ round(::Type{T}, x::BigFloat, r::RoundingMode) where T<:Union{Signed, Unsigned}
     invoke(round, Tuple{Type{<:Union{Signed, Unsigned}}, BigFloat, Union{RoundingMode, MPFRRoundingMode}}, T, x, r)
 round(::Type{BigInt}, x::BigFloat, r::RoundingMode) =
     invoke(round, Tuple{Type{BigInt}, BigFloat, Union{RoundingMode, MPFRRoundingMode}}, BigInt, x, r)
-round(::Type{<:Integer}, x::BigFloat, r::RoundingMode) = throw(MethodError(round, (Integer, x, r)))
 
 
 unsafe_trunc(::Type{T}, x::BigFloat) where {T<:Integer} = unsafe_trunc(T, _unchecked_cast(T, x, RoundToZero))
 unsafe_trunc(::Type{BigInt}, x::BigFloat) = _unchecked_cast(BigInt, x, RoundToZero)
 
-# TODO: Ideally the base fallbacks for these would already exist
-for (f, rnd) in zip((:trunc, :floor, :ceil, :round),
-                 (RoundToZero, RoundDown, RoundUp, :(ROUNDING_MODE[])))
-    @eval $f(::Type{T}, x::BigFloat) where T<:Union{Unsigned, Signed, BigInt} = round(T, x, $rnd)
-    @eval $f(::Type{Integer}, x::BigFloat) = $f(BigInt, x)
-end
+round(::Type{T}, x::BigFloat) where T<:Integer = round(T, x, ROUNDING_MODE[])
+# these two methods are split to increase their precedence in disambiguation:
+round(::Type{Integer}, x::BigFloat, r::RoundingMode) = round(BigInt, x, r)
+round(::Type{Integer}, x::BigFloat, r::MPFRRoundingMode) = round(BigInt, x, r)
 
 function Bool(x::BigFloat)
     iszero(x) && return false
@@ -334,35 +405,69 @@ function (::Type{T})(x::BigFloat) where T<:Integer
     trunc(T,x)
 end
 
-## BigFloat -> AbstractFloat
-_cpynansgn(x::AbstractFloat, y::BigFloat) = isnan(x) && signbit(x) != signbit(y) ? -x : x
-
-Float64(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) =
-    _cpynansgn(ccall((:mpfr_get_d,:libmpfr), Float64, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
-Float64(x::BigFloat, r::RoundingMode) = Float64(x, convert(MPFRRoundingMode, r))
-
-Float32(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) =
-    _cpynansgn(ccall((:mpfr_get_flt,:libmpfr), Float32, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
-Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r))
-
-function Float16(x::BigFloat) :: Float16
-    res = Float32(x)
-    resi = reinterpret(UInt32, res)
-    if (resi&0x7fffffff) < 0x38800000 # if Float16(res) is subnormal
-        #shift so that the mantissa lines up where it would for normal Float16
-        shift = 113-((resi & 0x7f800000)>>23)
-        if shift<23
-            resi |= 0x0080_0000 # set implicit bit
-            resi >>= shift
+function to_ieee754(::Type{T}, x::BigFloat, rm) where {T<:AbstractFloat}
+    sb = signbit(x)
+    is_zero = iszero(x)
+    is_inf = isinf(x)
+    is_nan = isnan(x)
+    is_regular = !is_zero & !is_inf & !is_nan
+    ieee_exp = Int(x.exp) - 1
+    ieee_precision = precision(T)
+    ieee_exp_max = exponent_max(T)
+    ieee_exp_min = exponent_min(T)
+    exp_diff = ieee_exp - ieee_exp_min
+    is_normal = 0 ≤ exp_diff
+    (rm_is_to_zero, rm_is_from_zero) = if rounds_to_nearest(rm)
+        (false, false)
+    else
+        let from = rounds_away_from_zero(rm, sb)
+            (!from, from)
         end
-    end
-    if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values
-        # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
-        res = nextfloat(res, cmp(x, res))
-    end
-    return res
+    end::NTuple{2,Bool}
+    exp_is_huge_p = ieee_exp_max < ieee_exp
+    exp_is_huge_n = signbit(exp_diff + ieee_precision)
+    rounds_to_inf = is_regular & exp_is_huge_p & !rm_is_to_zero
+    rounds_to_zero = is_regular & exp_is_huge_n & !rm_is_from_zero
+    U = uinttype(T)
+
+    ret_u = if is_regular & !rounds_to_inf & !rounds_to_zero
+        if !exp_is_huge_p
+            # significand
+            v = RawBigInt{Limb}(x._d, significand_limb_count(x))
+            len = max(ieee_precision + min(exp_diff, 0), 0)::Int
+            signif = truncated(U, v, len) & significand_mask(T)
+
+            # round up if necessary
+            rh = RawBigIntRoundingIncrementHelper(v, len)
+            incr = correct_rounding_requires_increment(rh, rm, sb)
+
+            # exponent
+            exp_field = max(exp_diff, 0) + is_normal
+
+            ieee754_representation(T, sb, exp_field, signif) + incr
+        else
+            ieee754_representation(T, sb, Val(:omega))
+        end
+    else
+        if is_zero | rounds_to_zero
+            ieee754_representation(T, sb, Val(:zero))
+        elseif is_inf | rounds_to_inf
+            ieee754_representation(T, sb, Val(:inf))
+        else
+            ieee754_representation(T, sb, Val(:nan))
+        end
+    end::U
+
+    reinterpret(T, ret_u)
 end
 
+Float16(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) = to_ieee754(Float16, x, r)
+Float32(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) = to_ieee754(Float32, x, r)
+Float64(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) = to_ieee754(Float64, x, r)
+Float16(x::BigFloat, r::RoundingMode) = to_ieee754(Float16, x, r)
+Float32(x::BigFloat, r::RoundingMode) = to_ieee754(Float32, x, r)
+Float64(x::BigFloat, r::RoundingMode) = to_ieee754(Float64, x, r)
+
 promote_rule(::Type{BigFloat}, ::Type{<:Real}) = BigFloat
 promote_rule(::Type{BigInt}, ::Type{<:AbstractFloat}) = BigFloat
 promote_rule(::Type{BigFloat}, ::Type{<:AbstractFloat}) = BigFloat
@@ -385,14 +490,14 @@ for (fJ, fC) in ((:+,:add), (:*,:mul))
         # BigFloat
         function ($fJ)(x::BigFloat, y::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)),:libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)),libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
             return z
         end
 
         # Unsigned Integer
         function ($fJ)(x::BigFloat, c::CulongMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_ui)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_ui)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
             return z
         end
         ($fJ)(c::CulongMax, x::BigFloat) = ($fJ)(x,c)
@@ -400,7 +505,7 @@ for (fJ, fC) in ((:+,:add), (:*,:mul))
         # Signed Integer
         function ($fJ)(x::BigFloat, c::ClongMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_si)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_si)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
             return z
         end
         ($fJ)(c::ClongMax, x::BigFloat) = ($fJ)(x,c)
@@ -408,7 +513,7 @@ for (fJ, fC) in ((:+,:add), (:*,:mul))
         # Float32/Float64
         function ($fJ)(x::BigFloat, c::CdoubleMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_d)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_d)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
             return z
         end
         ($fJ)(c::CdoubleMax, x::BigFloat) = ($fJ)(x,c)
@@ -416,7 +521,7 @@ for (fJ, fC) in ((:+,:add), (:*,:mul))
         # BigInt
         function ($fJ)(x::BigFloat, c::BigInt)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_z)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_z)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
             return z
         end
         ($fJ)(c::BigInt, x::BigFloat) = ($fJ)(x,c)
@@ -428,50 +533,50 @@ for (fJ, fC) in ((:-,:sub), (:/,:div))
         # BigFloat
         function ($fJ)(x::BigFloat, y::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)),:libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)),libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
             return z
         end
 
         # Unsigned Int
         function ($fJ)(x::BigFloat, c::CulongMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_ui)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_ui)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
             return z
         end
         function ($fJ)(c::CulongMax, x::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,:ui_,fC)), :libmpfr), Int32, (Ref{BigFloat}, Culong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,:ui_,fC)), libmpfr), Int32, (Ref{BigFloat}, Culong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
             return z
         end
 
         # Signed Integer
         function ($fJ)(x::BigFloat, c::ClongMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_si)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_si)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
             return z
         end
         function ($fJ)(c::ClongMax, x::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,:si_,fC)), :libmpfr), Int32, (Ref{BigFloat}, Clong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,:si_,fC)), libmpfr), Int32, (Ref{BigFloat}, Clong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
             return z
         end
 
         # Float32/Float64
         function ($fJ)(x::BigFloat, c::CdoubleMax)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_d)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_d)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
             return z
         end
         function ($fJ)(c::CdoubleMax, x::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,:d_,fC)), :libmpfr), Int32, (Ref{BigFloat}, Cdouble, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,:d_,fC)), libmpfr), Int32, (Ref{BigFloat}, Cdouble, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
             return z
         end
 
         # BigInt
         function ($fJ)(x::BigFloat, c::BigInt)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC,:_z)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC,:_z)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, ROUNDING_MODE[])
             return z
         end
         # no :mpfr_z_div function
@@ -480,7 +585,7 @@ end
 
 function -(c::BigInt, x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_z_sub, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigInt}, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
+    ccall((:mpfr_z_sub, libmpfr), Int32, (Ref{BigFloat}, Ref{BigInt}, Ref{BigFloat}, MPFRRoundingMode), z, c, x, ROUNDING_MODE[])
     return z
 end
 
@@ -488,66 +593,68 @@ inv(x::BigFloat) = one(Clong) / x # faster than fallback one(x)/x
 
 function fma(x::BigFloat, y::BigFloat, z::BigFloat)
     r = BigFloat()
-    ccall(("mpfr_fma",:libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), r, x, y, z, ROUNDING_MODE[])
+    ccall(("mpfr_fma",libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), r, x, y, z, ROUNDING_MODE[])
     return r
 end
 
+muladd(x::BigFloat, y::BigFloat, z::BigFloat) = fma(x, y, z)
+
 # div
 # BigFloat
 function div(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_div,:libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, RoundToZero)
-    ccall((:mpfr_trunc, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
+    ccall((:mpfr_div,libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, RoundToZero)
+    ccall((:mpfr_trunc, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
     return z
 end
 
 # Unsigned Int
 function div(x::BigFloat, c::CulongMax)
     z = BigFloat()
-    ccall((:mpfr_div_ui, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, RoundToZero)
-    ccall((:mpfr_trunc, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
+    ccall((:mpfr_div_ui, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, c, RoundToZero)
+    ccall((:mpfr_trunc, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
     return z
 end
 function div(c::CulongMax, x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_ui_div, :libmpfr), Int32, (Ref{BigFloat}, Culong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, RoundToZero)
-    ccall((:mpfr_trunc, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
+    ccall((:mpfr_ui_div, libmpfr), Int32, (Ref{BigFloat}, Culong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, RoundToZero)
+    ccall((:mpfr_trunc, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
     return z
 end
 
 # Signed Integer
 function div(x::BigFloat, c::ClongMax)
     z = BigFloat()
-    ccall((:mpfr_div_si, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, RoundToZero)
-    ccall((:mpfr_trunc, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
+    ccall((:mpfr_div_si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, c, RoundToZero)
+    ccall((:mpfr_trunc, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
     return z
 end
 function div(c::ClongMax, x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_si_div, :libmpfr), Int32, (Ref{BigFloat}, Clong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, RoundToZero)
-    ccall((:mpfr_trunc, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
+    ccall((:mpfr_si_div, libmpfr), Int32, (Ref{BigFloat}, Clong, Ref{BigFloat}, MPFRRoundingMode), z, c, x, RoundToZero)
+    ccall((:mpfr_trunc, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
     return z
 end
 
 # Float32/Float64
 function div(x::BigFloat, c::CdoubleMax)
     z = BigFloat()
-    ccall((:mpfr_div_d, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, RoundToZero)
-    ccall((:mpfr_trunc, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
+    ccall((:mpfr_div_d, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Cdouble, MPFRRoundingMode), z, x, c, RoundToZero)
+    ccall((:mpfr_trunc, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
     return z
 end
 function div(c::CdoubleMax, x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_d_div, :libmpfr), Int32, (Ref{BigFloat}, Cdouble, Ref{BigFloat}, MPFRRoundingMode), z, c, x, RoundToZero)
-    ccall((:mpfr_trunc, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
+    ccall((:mpfr_d_div, libmpfr), Int32, (Ref{BigFloat}, Cdouble, Ref{BigFloat}, MPFRRoundingMode), z, c, x, RoundToZero)
+    ccall((:mpfr_trunc, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
     return z
 end
 
 # BigInt
 function div(x::BigFloat, c::BigInt)
     z = BigFloat()
-    ccall((:mpfr_div_z, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, RoundToZero)
-    ccall((:mpfr_trunc, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
+    ccall((:mpfr_div_z, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, c, RoundToZero)
+    ccall((:mpfr_trunc, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, z)
     return z
 end
 
@@ -557,23 +664,23 @@ for (fJ, fC, fI) in ((:+, :add, 0), (:*, :mul, 1))
     @eval begin
         function ($fJ)(a::BigFloat, b::BigFloat, c::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, ROUNDING_MODE[])
             return z
         end
         function ($fJ)(a::BigFloat, b::BigFloat, c::BigFloat, d::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, d, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, d, ROUNDING_MODE[])
             return z
         end
         function ($fJ)(a::BigFloat, b::BigFloat, c::BigFloat, d::BigFloat, e::BigFloat)
             z = BigFloat()
-            ccall(($(string(:mpfr_,fC)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, d, ROUNDING_MODE[])
-            ccall(($(string(:mpfr_,fC)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, e, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, a, b, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, c, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, d, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,fC)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, e, ROUNDING_MODE[])
             return z
         end
     end
@@ -581,14 +688,14 @@ end
 
 function -(x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_neg, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+    ccall((:mpfr_neg, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
     return z
 end
 
 function sqrt(x::BigFloat)
     isnan(x) && return x
     z = BigFloat()
-    ccall((:mpfr_sqrt, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+    ccall((:mpfr_sqrt, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
     isnan(z) && throw(DomainError(x, "NaN result for non-NaN input."))
     return z
 end
@@ -597,25 +704,25 @@ sqrt(x::BigInt) = sqrt(BigFloat(x))
 
 function ^(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_pow, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_pow, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
     return z
 end
 
 function ^(x::BigFloat, y::CulongMax)
     z = BigFloat()
-    ccall((:mpfr_pow_ui, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_pow_ui, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
     return z
 end
 
 function ^(x::BigFloat, y::ClongMax)
     z = BigFloat()
-    ccall((:mpfr_pow_si, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_pow_si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
     return z
 end
 
 function ^(x::BigFloat, y::BigInt)
     z = BigFloat()
-    ccall((:mpfr_pow_z, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_pow_z, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigInt}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
     return z
 end
 
@@ -625,7 +732,7 @@ end
 for f in (:exp, :exp2, :exp10, :expm1, :cosh, :sinh, :tanh, :sech, :csch, :coth, :cbrt)
     @eval function $f(x::BigFloat)
         z = BigFloat()
-        ccall(($(string(:mpfr_,f)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+        ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
         return z
     end
 end
@@ -633,7 +740,7 @@ end
 function sincos_fast(v::BigFloat)
     s = BigFloat()
     c = BigFloat()
-    ccall((:mpfr_sin_cos, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), s, c, v, ROUNDING_MODE[])
+    ccall((:mpfr_sin_cos, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), s, c, v, ROUNDING_MODE[])
     return (s, c)
 end
 sincos(v::BigFloat) = sincos_fast(v)
@@ -641,18 +748,18 @@ sincos(v::BigFloat) = sincos_fast(v)
 # return log(2)
 function big_ln2()
     c = BigFloat()
-    ccall((:mpfr_const_log2, :libmpfr), Cint, (Ref{BigFloat}, MPFRRoundingMode), c, MPFR.ROUNDING_MODE[])
+    ccall((:mpfr_const_log2, libmpfr), Cint, (Ref{BigFloat}, MPFRRoundingMode), c, MPFR.ROUNDING_MODE[])
     return c
 end
 
 function ldexp(x::BigFloat, n::Clong)
     z = BigFloat()
-    ccall((:mpfr_mul_2si, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, n, ROUNDING_MODE[])
+    ccall((:mpfr_mul_2si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, x, n, ROUNDING_MODE[])
     return z
 end
 function ldexp(x::BigFloat, n::Culong)
     z = BigFloat()
-    ccall((:mpfr_mul_2ui, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, n, ROUNDING_MODE[])
+    ccall((:mpfr_mul_2ui, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, n, ROUNDING_MODE[])
     return z
 end
 ldexp(x::BigFloat, n::ClongMax) = ldexp(x, convert(Clong, n))
@@ -665,70 +772,73 @@ function factorial(x::BigFloat)
     end
     ui = convert(Culong, x)
     z = BigFloat()
-    ccall((:mpfr_fac_ui, :libmpfr), Int32, (Ref{BigFloat}, Culong, MPFRRoundingMode), z, ui, ROUNDING_MODE[])
+    ccall((:mpfr_fac_ui, libmpfr), Int32, (Ref{BigFloat}, Culong, MPFRRoundingMode), z, ui, ROUNDING_MODE[])
     return z
 end
 
 function hypot(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_hypot, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_hypot, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
     return z
 end
 
 for f in (:log, :log2, :log10)
     @eval function $f(x::BigFloat)
         if x < 0
-            throw(DomainError(x, string($f, " will only return a complex result if called ",
+            throw(DomainError(x, string($f, " was called with a negative real argument but ",
+                              "will only return a complex result if called ",
                               "with a complex argument. Try ", $f, "(complex(x)).")))
         end
         z = BigFloat()
-        ccall(($(string(:mpfr_,f)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+        ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
         return z
     end
 end
 
 function log1p(x::BigFloat)
     if x < -1
-        throw(DomainError(x, string("log1p will only return a complex result if called ",
+        throw(DomainError(x, string("log1p was called with a real argument < -1 but ",
+                          "will only return a complex result if called ",
                           "with a complex argument. Try log1p(complex(x)).")))
     end
     z = BigFloat()
-    ccall((:mpfr_log1p, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+    ccall((:mpfr_log1p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
     return z
 end
 
-function max(x::BigFloat, y::BigFloat)
-    isnan(x) && return x
-    isnan(y) && return y
-    z = BigFloat()
-    ccall((:mpfr_max, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
-    return z
+# For `min`/`max`, general fallback for `AbstractFloat` is good enough.
+# Only implement `minmax` and `_extrema_rf` to avoid repeated calls.
+function minmax(x::BigFloat, y::BigFloat)
+    isnan(x) && return x, x
+    isnan(y) && return y, y
+    Base.Math._isless(x, y) ? (x, y) : (y, x)
 end
 
-function min(x::BigFloat, y::BigFloat)
-    isnan(x) && return x
-    isnan(y) && return y
-    z = BigFloat()
-    ccall((:mpfr_min, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
-    return z
+function Base._extrema_rf(x::NTuple{2,BigFloat}, y::NTuple{2,BigFloat})
+    (x1, x2), (y1, y2) = x, y
+    isnan(x1) && return x
+    isnan(y1) && return y
+    z1 = Base.Math._isless(x1, y1) ? x1 : y1
+    z2 = Base.Math._isless(x2, y2) ? y2 : x2
+    z1, z2
 end
 
 function modf(x::BigFloat)
     zint = BigFloat()
     zfloat = BigFloat()
-    ccall((:mpfr_modf, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), zint, zfloat, x, ROUNDING_MODE[])
+    ccall((:mpfr_modf, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), zint, zfloat, x, ROUNDING_MODE[])
     return (zfloat, zint)
 end
 
 function rem(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_fmod, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_fmod, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
     return z
 end
 
 function rem(x::BigFloat, y::BigFloat, ::RoundingMode{:Nearest})
     z = BigFloat()
-    ccall((:mpfr_remainder, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_remainder, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
     return z
 end
 
@@ -738,49 +848,76 @@ rem2pi(x::BigFloat, r::RoundingMode) = rem(x, 2*BigFloat(pi), r)
 function sum(arr::AbstractArray{BigFloat})
     z = BigFloat(0)
     for i in arr
-        ccall((:mpfr_add, :libmpfr), Int32,
+        ccall((:mpfr_add, libmpfr), Int32,
             (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, z, i, ROUNDING_MODE[])
     end
     return z
 end
 
 # Functions for which NaN results are converted to DomainError, following Base
-for f in (:sin, :cos, :tan, :sec, :csc, :acos, :asin, :atan, :acosh, :asinh, :atanh)
+for f in (:sin, :cos, :tan, :sec, :csc, :acos, :asin, :atan, :acosh, :asinh, :atanh, :sinpi, :cospi, :tanpi)
     @eval begin
         function ($f)(x::BigFloat)
             isnan(x) && return x
             z = BigFloat()
-            ccall(($(string(:mpfr_,f)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
+            ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, ROUNDING_MODE[])
             isnan(z) && throw(DomainError(x, "NaN result for non-NaN input."))
             return z
         end
     end
 end
+sincospi(x::BigFloat) = (sinpi(x), cospi(x))
 
 function atan(y::BigFloat, x::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_atan2, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, y, x, ROUNDING_MODE[])
+    ccall((:mpfr_atan2, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, y, x, ROUNDING_MODE[])
+    return z
+end
+
+# degree functions
+for f in (:sin, :cos, :tan)
+    @eval begin
+        function ($(Symbol(f,:d)))(x::BigFloat)
+            isnan(x) && return x
+            z = BigFloat()
+            ccall(($(string(:mpfr_,f,:u)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, 360, ROUNDING_MODE[])
+            isnan(z) && throw(DomainError(x, "NaN result for non-NaN input."))
+            return z
+        end
+        function ($(Symbol(:a,f,:d)))(x::BigFloat)
+            isnan(x) && return x
+            z = BigFloat()
+            ccall(($(string(:mpfr_a,f,:u)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, x, 360, ROUNDING_MODE[])
+            isnan(z) && throw(DomainError(x, "NaN result for non-NaN input."))
+            return z
+        end
+    end
+end
+function atand(y::BigFloat, x::BigFloat)
+    z = BigFloat()
+    ccall((:mpfr_atan2u, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, Culong, MPFRRoundingMode), z, y, x, 360, ROUNDING_MODE[])
     return z
 end
 
+
 # Utility functions
-==(x::BigFloat, y::BigFloat) = ccall((:mpfr_equal_p, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
-<=(x::BigFloat, y::BigFloat) = ccall((:mpfr_lessequal_p, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
->=(x::BigFloat, y::BigFloat) = ccall((:mpfr_greaterequal_p, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
-<(x::BigFloat, y::BigFloat) = ccall((:mpfr_less_p, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
->(x::BigFloat, y::BigFloat) = ccall((:mpfr_greater_p, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
+==(x::BigFloat, y::BigFloat) = ccall((:mpfr_equal_p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
+<=(x::BigFloat, y::BigFloat) = ccall((:mpfr_lessequal_p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
+>=(x::BigFloat, y::BigFloat) = ccall((:mpfr_greaterequal_p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
+<(x::BigFloat, y::BigFloat) = ccall((:mpfr_less_p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
+>(x::BigFloat, y::BigFloat) = ccall((:mpfr_greater_p, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), x, y) != 0
 
 function cmp(x::BigFloat, y::BigInt)
     isnan(x) && return 1
-    ccall((:mpfr_cmp_z, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigInt}), x, y)
+    ccall((:mpfr_cmp_z, libmpfr), Int32, (Ref{BigFloat}, Ref{BigInt}), x, y)
 end
 function cmp(x::BigFloat, y::ClongMax)
     isnan(x) && return 1
-    ccall((:mpfr_cmp_si, :libmpfr), Int32, (Ref{BigFloat}, Clong), x, y)
+    ccall((:mpfr_cmp_si, libmpfr), Int32, (Ref{BigFloat}, Clong), x, y)
 end
 function cmp(x::BigFloat, y::CulongMax)
     isnan(x) && return 1
-    ccall((:mpfr_cmp_ui, :libmpfr), Int32, (Ref{BigFloat}, Culong), x, y)
+    ccall((:mpfr_cmp_ui, libmpfr), Int32, (Ref{BigFloat}, Culong), x, y)
 end
 cmp(x::BigFloat, y::Integer) = cmp(x,big(y))
 cmp(x::Integer, y::BigFloat) = -cmp(y,x)
@@ -788,7 +925,7 @@ cmp(x::Integer, y::BigFloat) = -cmp(y,x)
 function cmp(x::BigFloat, y::CdoubleMax)
     isnan(x) && return isnan(y) ? 0 : 1
     isnan(y) && return -1
-    ccall((:mpfr_cmp_d, :libmpfr), Int32, (Ref{BigFloat}, Cdouble), x, y)
+    ccall((:mpfr_cmp_d, libmpfr), Int32, (Ref{BigFloat}, Cdouble), x, y)
 end
 cmp(x::CdoubleMax, y::BigFloat) = -cmp(y,x)
 
@@ -807,7 +944,10 @@ cmp(x::CdoubleMax, y::BigFloat) = -cmp(y,x)
 <=(x::BigFloat, y::CdoubleMax) = !isnan(x) && !isnan(y) && cmp(x,y) <= 0
 <=(x::CdoubleMax, y::BigFloat) = !isnan(x) && !isnan(y) && cmp(y,x) >= 0
 
-signbit(x::BigFloat) = ccall((:mpfr_signbit, :libmpfr), Int32, (Ref{BigFloat},), x) != 0
+# Note: this inlines the implementation of `mpfr_signbit` to avoid a
+# `ccall`.
+signbit(x::BigFloat) = signbit(x.sign)
+
 function sign(x::BigFloat)
     c = cmp(x, 0)
     (c == 0 || isnan(x)) && return x
@@ -815,7 +955,7 @@ function sign(x::BigFloat)
 end
 
 function _precision(x::BigFloat)  # precision of an object of type BigFloat
-    return ccall((:mpfr_get_prec, :libmpfr), Clong, (Ref{BigFloat},), x)
+    return ccall((:mpfr_get_prec, libmpfr), Clong, (Ref{BigFloat},), x)
 end
 precision(x::BigFloat; base::Integer=2) = _precision(x, base)
 
@@ -851,7 +991,7 @@ maxintfloat(::Type{BigFloat}) = BigFloat(2)^precision(BigFloat)
 
 function copysign(x::BigFloat, y::BigFloat)
     z = BigFloat()
-    ccall((:mpfr_copysign, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
+    ccall((:mpfr_copysign, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), z, x, y, ROUNDING_MODE[])
     return z
 end
 
@@ -860,27 +1000,27 @@ function exponent(x::BigFloat)
         throw(DomainError(x, "`x` must be non-zero and finite."))
     end
     # The '- 1' is to make it work as Base.exponent
-    return ccall((:mpfr_get_exp, :libmpfr), Clong, (Ref{BigFloat},), x) - 1
+    return ccall((:mpfr_get_exp, libmpfr), Clong, (Ref{BigFloat},), x) - 1
 end
 
 function frexp(x::BigFloat)
     z = BigFloat()
     c = Ref{Clong}()
-    ccall((:mpfr_frexp, :libmpfr), Int32, (Ptr{Clong}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), c, z, x, ROUNDING_MODE[])
+    ccall((:mpfr_frexp, libmpfr), Int32, (Ptr{Clong}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), c, z, x, ROUNDING_MODE[])
     return (z, c[])
 end
 
 function significand(x::BigFloat)
     z = BigFloat()
     c = Ref{Clong}()
-    ccall((:mpfr_frexp, :libmpfr), Int32, (Ptr{Clong}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), c, z, x, ROUNDING_MODE[])
+    ccall((:mpfr_frexp, libmpfr), Int32, (Ptr{Clong}, Ref{BigFloat}, Ref{BigFloat}, MPFRRoundingMode), c, z, x, ROUNDING_MODE[])
     # Double the significand to make it work as Base.significand
-    ccall((:mpfr_mul_si, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, z, 2, ROUNDING_MODE[])
+    ccall((:mpfr_mul_si, libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Clong, MPFRRoundingMode), z, z, 2, ROUNDING_MODE[])
     return z
 end
 
 function isinteger(x::BigFloat)
-    return ccall((:mpfr_integer_p, :libmpfr), Int32, (Ref{BigFloat},), x) != 0
+    return ccall((:mpfr_integer_p, libmpfr), Int32, (Ref{BigFloat},), x) != 0
 end
 
 for (f,R) in ((:roundeven, :Nearest),
@@ -891,23 +1031,23 @@ for (f,R) in ((:roundeven, :Nearest),
     @eval begin
         function round(x::BigFloat, ::RoundingMode{$(QuoteNode(R))})
             z = BigFloat()
-            ccall(($(string(:mpfr_,f)), :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, x)
+            ccall(($(string(:mpfr_,f)), libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}), z, x)
             return z
         end
     end
 end
 
 function isinf(x::BigFloat)
-    return ccall((:mpfr_inf_p, :libmpfr), Int32, (Ref{BigFloat},), x) != 0
+    return x.exp == mpfr_special_exponent_inf
 end
 
 function isnan(x::BigFloat)
-    return ccall((:mpfr_nan_p, :libmpfr), Int32, (Ref{BigFloat},), x) != 0
+    return x.exp == mpfr_special_exponent_nan
 end
 
 isfinite(x::BigFloat) = !isinf(x) && !isnan(x)
 
-iszero(x::BigFloat) = x == Clong(0)
+iszero(x::BigFloat) = x.exp == mpfr_special_exponent_zero
 isone(x::BigFloat) = x == Clong(1)
 
 @eval typemax(::Type{BigFloat}) = $(BigFloat(Inf))
@@ -916,7 +1056,7 @@ isone(x::BigFloat) = x == Clong(1)
 function nextfloat!(x::BigFloat, n::Integer=1)
     signbit(n) && return prevfloat!(x, abs(n))
     for i = 1:n
-        ccall((:mpfr_nextabove, :libmpfr), Int32, (Ref{BigFloat},), x)
+        ccall((:mpfr_nextabove, libmpfr), Int32, (Ref{BigFloat},), x)
     end
     return x
 end
@@ -924,7 +1064,7 @@ end
 function prevfloat!(x::BigFloat, n::Integer=1)
     signbit(n) && return nextfloat!(x, abs(n))
     for i = 1:n
-        ccall((:mpfr_nextbelow, :libmpfr), Int32, (Ref{BigFloat},), x)
+        ccall((:mpfr_nextbelow, libmpfr), Int32, (Ref{BigFloat},), x)
     end
     return x
 end
@@ -970,7 +1110,7 @@ setprecision(f::Function, prec::Integer; base::Integer=2) = setprecision(f, BigF
 
 function string_mpfr(x::BigFloat, fmt::String)
     pc = Ref{Ptr{UInt8}}()
-    n = ccall((:mpfr_asprintf,:libmpfr), Cint,
+    n = ccall((:mpfr_asprintf,libmpfr), Cint,
               (Ptr{Ptr{UInt8}}, Ptr{UInt8}, Ref{BigFloat}...),
               pc, fmt, x)
     p = pc[]
@@ -982,7 +1122,7 @@ function string_mpfr(x::BigFloat, fmt::String)
         end
     end
     str = unsafe_string(p)
-    ccall((:mpfr_free_str, :libmpfr), Cvoid, (Ptr{UInt8},), p)
+    ccall((:mpfr_free_str, libmpfr), Cvoid, (Ptr{UInt8},), p)
     return str
 end
 
@@ -1018,14 +1158,14 @@ function _string(x::BigFloat, fmt::String)::String
     isfinite(x) || return string(Float64(x))
     _prettify_bigfloat(string_mpfr(x, fmt))
 end
-_string(x::BigFloat) = _string(x, "%.Re")
+_string(x::BigFloat) = _string(x, "%Re")
 _string(x::BigFloat, k::Integer) = _string(x, "%.$(k)Re")
 
 string(b::BigFloat) = _string(b)
 
 print(io::IO, b::BigFloat) = print(io, string(b))
 function show(io::IO, b::BigFloat)
-    if get(io, :compact, false)
+    if get(io, :compact, false)::Bool
         print(io, _string(b, 5))
     else
         print(io, _string(b))
@@ -1033,17 +1173,17 @@ function show(io::IO, b::BigFloat)
 end
 
 # get/set exponent min/max
-get_emax() = ccall((:mpfr_get_emax, :libmpfr), Clong, ())
-get_emax_min() = ccall((:mpfr_get_emax_min, :libmpfr), Clong, ())
-get_emax_max() = ccall((:mpfr_get_emax_max, :libmpfr), Clong, ())
+get_emax() = ccall((:mpfr_get_emax, libmpfr), Clong, ())
+get_emax_min() = ccall((:mpfr_get_emax_min, libmpfr), Clong, ())
+get_emax_max() = ccall((:mpfr_get_emax_max, libmpfr), Clong, ())
 
-get_emin() = ccall((:mpfr_get_emin, :libmpfr), Clong, ())
-get_emin_min() = ccall((:mpfr_get_emin_min, :libmpfr), Clong, ())
-get_emin_max() = ccall((:mpfr_get_emin_max, :libmpfr), Clong, ())
+get_emin() = ccall((:mpfr_get_emin, libmpfr), Clong, ())
+get_emin_min() = ccall((:mpfr_get_emin_min, libmpfr), Clong, ())
+get_emin_max() = ccall((:mpfr_get_emin_max, libmpfr), Clong, ())
 
 check_exponent_err(ret) = ret == 0 || throw(ArgumentError("Invalid MPFR exponent range"))
-set_emax!(x) = check_exponent_err(ccall((:mpfr_set_emax, :libmpfr), Cint, (Clong,), x))
-set_emin!(x) = check_exponent_err(ccall((:mpfr_set_emin, :libmpfr), Cint, (Clong,), x))
+set_emax!(x) = check_exponent_err(ccall((:mpfr_set_emax, libmpfr), Cint, (Clong,), x))
+set_emin!(x) = check_exponent_err(ccall((:mpfr_set_emin, libmpfr), Cint, (Clong,), x))
 
 function Base.deepcopy_internal(x::BigFloat, stackdict::IdDict)
     get!(stackdict, x) do
@@ -1051,7 +1191,7 @@ function Base.deepcopy_internal(x::BigFloat, stackdict::IdDict)
         d = x._d
         d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String
         y = _BigFloat(x.prec, x.sign, x.exp, d′)
-        #ccall((:mpfr_custom_move,:libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary
+        #ccall((:mpfr_custom_move,libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary
         return y
     end
 end
@@ -1063,8 +1203,8 @@ function decompose(x::BigFloat)::Tuple{BigInt, Int, Int}
     s = BigInt()
     s.size = cld(x.prec, 8*sizeof(Limb)) # limbs
     b = s.size * sizeof(Limb)            # bytes
-    ccall((:__gmpz_realloc2, :libgmp), Cvoid, (Ref{BigInt}, Culong), s, 8b) # bits
-    ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), s.d, x.d, b) # bytes
+    ccall((:__gmpz_realloc2, libgmp), Cvoid, (Ref{BigInt}, Culong), s, 8b) # bits
+    memcpy(s.d, x.d, b)
     s, x.exp - 8b, x.sign
 end
 
@@ -1074,11 +1214,11 @@ function lerpi(j::Integer, d::Integer, a::BigFloat, b::BigFloat)
 end
 
 # flags
-clear_flags() = ccall((:mpfr_clear_flags, :libmpfr), Cvoid, ())
-had_underflow() = ccall((:mpfr_underflow_p, :libmpfr), Cint, ()) != 0
-had_overflow() = ccall((:mpfr_underflow_p, :libmpfr), Cint, ()) != 0
-had_nan() = ccall((:mpfr_nanflag_p, :libmpfr), Cint, ()) != 0
-had_inexact_exception() = ccall((:mpfr_inexflag_p, :libmpfr), Cint, ()) != 0
-had_range_exception() = ccall((:mpfr_erangeflag_p, :libmpfr), Cint, ()) != 0
+clear_flags() = ccall((:mpfr_clear_flags, libmpfr), Cvoid, ())
+had_underflow() = ccall((:mpfr_underflow_p, libmpfr), Cint, ()) != 0
+had_overflow() = ccall((:mpfr_underflow_p, libmpfr), Cint, ()) != 0
+had_nan() = ccall((:mpfr_nanflag_p, libmpfr), Cint, ()) != 0
+had_inexact_exception() = ccall((:mpfr_inexflag_p, libmpfr), Cint, ()) != 0
+had_range_exception() = ccall((:mpfr_erangeflag_p, libmpfr), Cint, ()) != 0
 
 end #module
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index 3eecdf17e5318..0ca2fbc36e6df 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -2,8 +2,8 @@
 
 ### Multidimensional iterators
 module IteratorsMD
-    import .Base: eltype, length, size, first, last, in, getindex, setindex!, IndexStyle,
-                  min, max, zero, oneunit, isless, eachindex, ndims, IteratorSize,
+    import .Base: eltype, length, size, first, last, in, getindex, setindex!,
+                  min, max, zero, oneunit, isless, eachindex,
                   convert, show, iterate, promote_rule
 
     import .Base: +, -, *, (:)
@@ -32,6 +32,10 @@ module IteratorsMD
     A `CartesianIndex` is sometimes produced by [`eachindex`](@ref), and
     always when iterating with an explicit [`CartesianIndices`](@ref).
 
+    An `I::CartesianIndex` is treated as a "scalar" (not a container)
+    for `broadcast`.   In order to iterate over the components of a
+    `CartesianIndex`, convert it to a tuple with `Tuple(I)`.
+
     # Examples
     ```jldoctest
     julia> A = reshape(Vector(1:16), (2, 2, 2, 2))
@@ -61,6 +65,10 @@ module IteratorsMD
     julia> A[CartesianIndex((1, 1, 2, 1))]
     5
     ```
+
+    !!! compat "Julia 1.10"
+        Using a `CartesianIndex` as a "scalar" for `broadcast` requires
+        Julia 1.10; in previous releases, use `Ref(I)`.
     """
     struct CartesianIndex{N} <: AbstractCartesianIndex{N}
         I::NTuple{N,Int}
@@ -75,6 +83,7 @@ module IteratorsMD
     CartesianIndex{N}(index::Integer...) where {N} = CartesianIndex{N}(index)
     CartesianIndex{N}() where {N} = CartesianIndex{N}(())
     # Un-nest passed CartesianIndexes
+    CartesianIndex{N}(index::CartesianIndex{N}) where {N} = index
     CartesianIndex(index::Union{Integer, CartesianIndex}...) = CartesianIndex(flatten(index))
     flatten(::Tuple{}) = ()
     flatten(I::Tuple{Any}) = Tuple(I[1])
@@ -158,6 +167,19 @@ module IteratorsMD
     Base.iterate(::CartesianIndex) =
         error("iteration is deliberately unsupported for CartesianIndex. Use `I` rather than `I...`, or use `Tuple(I)...`")
 
+    # ranges are deliberately disabled to prevent ambiguities with the colon constructor
+    Base.range_start_step_length(start::CartesianIndex, step::CartesianIndex, len::Integer) =
+        error("range with a specified length is deliberately unsupported for CartesianIndex arguments."*
+            " Use StepRangeLen($start, $step, $len) to construct this range")
+
+    # show is special-cased to avoid the start:stop:step display,
+    # which constructs a CartesianIndices
+    # See #50784
+    function show(io::IO, r::StepRangeLen{<:CartesianIndex})
+        print(io, "StepRangeLen(", first(r), ", ",
+                    step(r), ", ", length(r), ")")
+    end
+
     # Iteration
     const OrdinalRangeInt = OrdinalRange{Int, Int}
     """
@@ -259,7 +281,7 @@ module IteratorsMD
     CartesianIndices(A::AbstractArray) = CartesianIndices(axes(A))
 
     _convert2ind(sz::Bool) = Base.OneTo(Int8(sz))
-    _convert2ind(sz::Integer) = Base.OneTo(sz)
+    _convert2ind(sz::Integer) = Base.oneto(sz)
     _convert2ind(sz::AbstractUnitRange) = first(sz):last(sz)
     _convert2ind(sz::OrdinalRange) = first(sz):step(sz):last(sz)
 
@@ -325,7 +347,7 @@ module IteratorsMD
         convert(Tuple{Vararg{UnitRange{Int}}}, R)
 
     convert(::Type{CartesianIndices{N,R}}, inds::CartesianIndices{N}) where {N,R} =
-        CartesianIndices(convert(R, inds.indices))
+        CartesianIndices(convert(R, inds.indices))::CartesianIndices{N,R}
 
     # equality
     Base.:(==)(a::CartesianIndices{N}, b::CartesianIndices{N}) where N =
@@ -334,7 +356,14 @@ module IteratorsMD
 
     # AbstractArray implementation
     Base.axes(iter::CartesianIndices{N,R}) where {N,R} = map(Base.axes1, iter.indices)
-    Base.IndexStyle(::Type{CartesianIndices{N,R}}) where {N,R} = IndexCartesian()
+    Base.has_offset_axes(iter::CartesianIndices) = Base.has_offset_axes(iter.indices...)
+    @propagate_inbounds function isassigned(iter::CartesianIndices{N,R}, I::Vararg{Int, N}) where {N,R}
+        for i in 1:N
+            isassigned(iter.indices[i], I[i]) || return false
+        end
+        return true
+    end
+
     # getindex for a 0D CartesianIndices is necessary for disambiguation
     @propagate_inbounds function Base.getindex(iter::CartesianIndices{0,R}) where {R}
         CartesianIndex()
@@ -374,10 +403,6 @@ module IteratorsMD
         getindex(c, C)
     end
 
-    ndims(R::CartesianIndices) = ndims(typeof(R))
-    ndims(::Type{CartesianIndices{N}}) where {N} = N
-    ndims(::Type{CartesianIndices{N,TT}}) where {N,TT} = N
-
     eachindex(::IndexCartesian, A::AbstractArray) = CartesianIndices(axes(A))
 
     @inline function eachindex(::IndexCartesian, A::AbstractArray, B::AbstractArray...)
@@ -386,10 +411,6 @@ module IteratorsMD
         CartesianIndices(axsA)
     end
 
-    eltype(::Type{CartesianIndices{N}}) where {N} = CartesianIndex{N}
-    eltype(::Type{CartesianIndices{N,TT}}) where {N,TT} = CartesianIndex{N}
-    IteratorSize(::Type{<:CartesianIndices{N}}) where {N} = Base.HasShape{N}()
-
     @inline function iterate(iter::CartesianIndices)
         iterfirst = first(iter)
         if !all(map(in, iterfirst.I, iter.indices))
@@ -417,29 +438,19 @@ module IteratorsMD
     @inline function __inc(state::Tuple{Int}, indices::Tuple{OrdinalRangeInt})
         rng = indices[1]
         I = state[1] + step(rng)
-        valid = __is_valid_range(I, rng) && state[1] != last(rng)
-        return valid, (I, )
+        valid = state[1] != last(rng)
+        return valid, (I,)
     end
     @inline function __inc(state::Tuple{Int,Int,Vararg{Int}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
         rng = indices[1]
         I = state[1] + step(rng)
-        if __is_valid_range(I, rng) && state[1] != last(rng)
+        if state[1] != last(rng)
             return true, (I, tail(state)...)
         end
         valid, I = __inc(tail(state), tail(indices))
         return valid, (first(rng), I...)
     end
 
-    @inline __is_valid_range(I, rng::AbstractUnitRange) = I in rng
-    @inline function __is_valid_range(I, rng::OrdinalRange)
-        if step(rng) > 0
-            lo, hi = first(rng), last(rng)
-        else
-            lo, hi = last(rng), first(rng)
-        end
-        lo <= I <= hi
-    end
-
     # 0-d cartesian ranges are special-cased to iterate once and only once
     iterate(iter::CartesianIndices{0}, done=false) = done ? nothing : (CartesianIndex(), true)
 
@@ -500,8 +511,30 @@ module IteratorsMD
     end
 
     # reversed CartesianIndices iteration
+    @inline function Base._reverse(iter::CartesianIndices, ::Colon)
+        CartesianIndices(reverse.(iter.indices))
+    end
 
-    Base.reverse(iter::CartesianIndices) = CartesianIndices(reverse.(iter.indices))
+    Base.@constprop :aggressive function Base._reverse(iter::CartesianIndices, dim::Integer)
+        1 <= dim <= ndims(iter) || throw(ArgumentError(Base.LazyString("invalid dimension ", dim, " in reverse")))
+        ndims(iter) == 1 && return Base._reverse(iter, :)
+        indices = iter.indices
+        return CartesianIndices(Base.setindex(indices, reverse(indices[dim]), dim))
+    end
+
+    Base.@constprop :aggressive function Base._reverse(iter::CartesianIndices, dims::Tuple{Vararg{Integer}})
+        indices = iter.indices
+        # use `sum` to force const fold
+        dimrev = ntuple(i -> sum(==(i), dims; init = 0) == 1, Val(length(indices)))
+        length(dims) == sum(dimrev) || throw(ArgumentError(Base.LazyString("invalid dimensions ", dims, " in reverse")))
+        length(dims) == length(indices) && return Base._reverse(iter, :)
+        indices′ = map((i, f) -> f ? (@noinline reverse(i)) : i, indices, dimrev)
+        return CartesianIndices(indices′)
+    end
+
+    # fix ambiguity with array.jl:
+    Base._reverse(iter::CartesianIndices{1}, dims::Tuple{Integer}) =
+        Base._reverse(iter, first(dims))
 
     @inline function iterate(r::Reverse{<:CartesianIndices})
         iterfirst = last(r.itr)
@@ -527,13 +560,13 @@ module IteratorsMD
     @inline function __dec(state::Tuple{Int}, indices::Tuple{OrdinalRangeInt})
         rng = indices[1]
         I = state[1] - step(rng)
-        valid = __is_valid_range(I, rng) && state[1] != first(rng)
+        valid = state[1] != first(rng)
         return valid, (I,)
     end
     @inline function __dec(state::Tuple{Int,Int,Vararg{Int}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
         rng = indices[1]
         I = state[1] - step(rng)
-        if __is_valid_range(I, rng) && state[1] != first(rng)
+        if state[1] != first(rng)
             return true, (I, tail(state)...)
         end
         valid, I = __dec(tail(state), tail(indices))
@@ -711,7 +744,7 @@ checkindex(::Type{Bool}, inds::Tuple, I::CartesianIndices) = all(checkindex.(Boo
 
 # combined count of all indices, including CartesianIndex and
 # AbstractArray{CartesianIndex}
-# rather than returning N, it returns an NTuple{N,Bool} so the result is inferrable
+# rather than returning N, it returns an NTuple{N,Bool} so the result is inferable
 @inline index_ndims(i1, I...) = (true, index_ndims(I...)...)
 @inline function index_ndims(i1::CartesianIndex, I...)
     (map(Returns(true), i1.I)..., index_ndims(I...)...)
@@ -722,7 +755,7 @@ end
 index_ndims() = ()
 
 # combined dimensionality of all indices
-# rather than returning N, it returns an NTuple{N,Bool} so the result is inferrable
+# rather than returning N, it returns an NTuple{N,Bool} so the result is inferable
 @inline index_dimsum(i1, I...) = (index_dimsum(I...)...,)
 @inline index_dimsum(::Colon, I...) = (true, index_dimsum(I...)...)
 @inline index_dimsum(::AbstractArray{Bool}, I...) = (true, index_dimsum(I...)...)
@@ -1150,8 +1183,7 @@ circshift!(dest::AbstractArray, src, ::Tuple{}) = copyto!(dest, src)
 Circularly shift, i.e. rotate, the data in `src`, storing the result in
 `dest`. `shifts` specifies the amount to shift in each dimension.
 
-The `dest` array must be distinct from the `src` array (they cannot
-alias each other).
+$(_DOCS_ALIASING_WARNING)
 
 See also [`circshift`](@ref).
 """
@@ -1209,6 +1241,8 @@ their indices; any offset results in a (circular) wraparound. If the
 arrays have overlapping indices, then on the domain of the overlap
 `dest` agrees with `src`.
 
+$(_DOCS_ALIASING_WARNING)
+
 See also: [`circshift`](@ref).
 
 # Examples
@@ -1347,7 +1381,7 @@ end
 
 # Note: the next two functions rely on the following definition of the conversion to Bool:
 #   convert(::Type{Bool}, x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(...))
-# they're used to pre-emptively check in bulk when possible, which is much faster.
+# they're used to preemptively check in bulk when possible, which is much faster.
 # Also, the functions can be overloaded for custom types T<:Real :
 #  a) in the unlikely eventuality that they use a different logic for Bool conversion
 #  b) to skip the check if not necessary
@@ -1534,7 +1568,28 @@ end
 end
 
 isassigned(a::AbstractArray, i::CartesianIndex) = isassigned(a, Tuple(i)...)
-isassigned(a::AbstractArray, i::Union{Integer, CartesianIndex}...) = isassigned(a, CartesianIndex(i))
+function isassigned(A::AbstractArray, i::Union{Integer, CartesianIndex}...)
+    isa(i, Tuple{Vararg{Int}}) || return isassigned(A, CartesianIndex(to_indices(A, i)))
+    @boundscheck checkbounds(Bool, A, i...) || return false
+    S = IndexStyle(A)
+    ninds = length(i)
+    if (isa(S, IndexLinear) && ninds != 1)
+        return @inbounds isassigned(A, _to_linear_index(A, i...))
+    elseif (!isa(S, IndexLinear) && ninds != ndims(A))
+        return @inbounds isassigned(A, _to_subscript_indices(A, i...)...)
+    else
+       try
+            A[i...]
+            true
+        catch e
+            if isa(e, BoundsError) || isa(e, UndefRefError)
+                return false
+            else
+                rethrow()
+            end
+        end
+    end
+end
 
 ## permutedims
 
@@ -1793,7 +1848,7 @@ but the result order will be row-major instead.
 
 # Higher dimensional examples
 ```
-julia> A = permutedims(reshape([4 3; 2 1; 'A' 'B'; 'C' 'D'], (2, 2, 2)), (1, 3, 2))
+julia> A = [4 3; 2 1 ;;; 'A' 'B'; 'C' 'D']
 2×2×2 Array{Any, 3}:
 [:, :, 1] =
  4  3
@@ -1842,39 +1897,25 @@ julia> sortslices(reshape([5; 4; 3; 2; 1], (1,1,5)), dims=3, by=x->x[1,1])
 ```
 """
 function sortslices(A::AbstractArray; dims::Union{Integer, Tuple{Vararg{Integer}}}, kws...)
-    _sortslices(A, Val{dims}(); kws...)
-end
+    if A isa Matrix && dims isa Integer && dims == 1
+        # TODO: remove once the generic version becomes as fast or faster
+        perm = sortperm(eachslice(A; dims); kws...)
+        return A[perm, :]
+    end
 
-# Works around inference's lack of ability to recognize partial constness
-struct DimSelector{dims, T}
-    A::T
+    B = similar(A)
+    _sortslices!(B, A, Val{dims}(); kws...)
+    B
 end
-DimSelector{dims}(x::T) where {dims, T} = DimSelector{dims, T}(x)
-(ds::DimSelector{dims, T})(i) where {dims, T} = i in dims ? axes(ds.A, i) : (:,)
 
-_negdims(n, dims) = filter(i->!(i in dims), 1:n)
+function _sortslices!(B, A, ::Val{dims}; kws...) where dims
+    ves = vec(eachslice(A; dims))
+    perm = sortperm(ves; kws...)
+    bes = eachslice(B; dims)
 
-function compute_itspace(A, ::Val{dims}) where {dims}
-    negdims = _negdims(ndims(A), dims)
-    axs = Iterators.product(ntuple(DimSelector{dims}(A), ndims(A))...)
-    vec(permutedims(collect(axs), (dims..., negdims...)))
-end
-
-function _sortslices(A::AbstractArray, d::Val{dims}; kws...) where dims
-    itspace = compute_itspace(A, d)
-    vecs = map(its->view(A, its...), itspace)
-    p = sortperm(vecs; kws...)
-    if ndims(A) == 2 && isa(dims, Integer) && isa(A, Array)
-        # At the moment, the performance of the generic version is subpar
-        # (about 5x slower). Hardcode a fast-path until we're able to
-        # optimize this.
-        return dims == 1 ? A[p, :] : A[:, p]
-    else
-        B = similar(A)
-        for (x, its) in zip(p, itspace)
-            B[its...] = vecs[x]
-        end
-        B
+    # TODO for further optimization: traverse in memory order
+    for (slice, i) in zip(eachslice(B; dims), perm)
+        slice .= ves[i]
     end
 end
 
diff --git a/base/multimedia.jl b/base/multimedia.jl
index 308cc07a05a53..e634a19b7d6aa 100644
--- a/base/multimedia.jl
+++ b/base/multimedia.jl
@@ -57,7 +57,7 @@ print(io::IO, ::MIME{mime}) where {mime} = print(io, mime)
 """
     showable(mime, x)
 
-Returns a boolean value indicating whether or not the object `x` can be written
+Return a boolean value indicating whether or not the object `x` can be written
 as the given `mime` type.
 
 (By default, this is determined automatically by the existence of the
@@ -125,7 +125,7 @@ show(io::IO, m::AbstractString, x) = show(io, MIME(m), x)
 """
     repr(mime, x; context=nothing)
 
-Returns an `AbstractString` or `Vector{UInt8}` containing the representation of
+Return an `AbstractString` or `Vector{UInt8}` containing the representation of
 `x` in the requested `mime` type, as written by [`show(io, mime, x)`](@ref) (throwing a
 [`MethodError`](@ref) if no appropriate `show` is available). An `AbstractString` is
 returned for MIME types with textual representations (such as `"text/html"` or
@@ -232,7 +232,7 @@ display(mime::AbstractString, @nospecialize x) = display(MIME(mime), x)
     displayable(mime) -> Bool
     displayable(d::AbstractDisplay, mime) -> Bool
 
-Returns a boolean value indicating whether the given `mime` type (string) is displayable by
+Return a boolean value indicating whether the given `mime` type (string) is displayable by
 any of the displays in the current display stack, or specifically by the display `d` in the
 second variant.
 """
@@ -244,7 +244,7 @@ displayable(mime::AbstractString) = displayable(MIME(mime))
 """
     TextDisplay(io::IO)
 
-Returns a `TextDisplay <: AbstractDisplay`, which displays any object as the text/plain MIME type
+Return a `TextDisplay <: AbstractDisplay`, which displays any object as the text/plain MIME type
 (by default), writing the text representation to the given I/O stream. (This is how
 objects are printed in the Julia REPL.)
 """
diff --git a/base/multinverses.jl b/base/multinverses.jl
index 4342a9a5f5cf7..70033de12fcd8 100644
--- a/base/multinverses.jl
+++ b/base/multinverses.jl
@@ -14,7 +14,7 @@ unsigned(::Type{Int64}) = UInt64
 unsigned(::Type{Int128}) = UInt128
 unsigned(::Type{T}) where {T<:Unsigned} = T
 
-abstract type  MultiplicativeInverse{T} end
+abstract type  MultiplicativeInverse{T} <: Number end
 
 # Computes integer division by a constant using multiply, add, and bitshift.
 
@@ -28,7 +28,7 @@ abstract type  MultiplicativeInverse{T} end
 # Division of Int32 by 3:
 #   floor((2^32+2)/3 * n/2^32) = floor(n/3 + 2n/(3*2^32))
 # The correction term, 2n/(3*2^32), is strictly less than 1/3 for any
-# nonnegative n::Int32, so this divides any nonnegative Int32 by 3.
+# non-negative n::Int32, so this divides any non-negative Int32 by 3.
 # (When n < 0, we add 1, and one can show that this computes
 # ceil(n/d) = -floor(abs(n)/d).)
 #
@@ -97,7 +97,6 @@ struct UnsignedMultiplicativeInverse{T<:Unsigned} <: MultiplicativeInverse{T}
 
     function UnsignedMultiplicativeInverse{T}(d::T) where T<:Unsigned
         d == 0 && throw(ArgumentError("cannot compute magic for d == $d"))
-        u2 = convert(T, 2)
         add = false
         signedmin = one(d) << (sizeof(d)*8-1)
         signedmax = signedmin - one(T)
@@ -135,13 +134,33 @@ struct UnsignedMultiplicativeInverse{T<:Unsigned} <: MultiplicativeInverse{T}
 end
 UnsignedMultiplicativeInverse(x::Unsigned) = UnsignedMultiplicativeInverse{typeof(x)}(x)
 
+# Returns the higher half of the product a*b
+function _mul_high(a::T, b::T) where {T<:Union{Signed, Unsigned}}
+    ((widen(a)*b) >>> (sizeof(a)*8)) % T
+end
+
+function _mul_high(a::UInt128, b::UInt128)
+    shift = sizeof(a)*4
+    mask = typemax(UInt128) >> shift
+    a1, a2 = a >>> shift, a & mask
+    b1, b2 = b >>> shift, b & mask
+    a1b1, a1b2, a2b1, a2b2 = a1*b1, a1*b2, a2*b1, a2*b2
+    carry = ((a1b2 & mask) + (a2b1 & mask) + (a2b2 >>> shift)) >>> shift
+    a1b1 + (a1b2 >>> shift) + (a2b1 >>> shift) + carry
+end
+function _mul_high(a::Int128, b::Int128)
+    shift = sizeof(a)*8 - 1
+    t1, t2 = (a >> shift) & b % UInt128, (b >> shift) & a % UInt128
+    (_mul_high(a % UInt128, b % UInt128) - t1 - t2) % Int128
+end
+
 function div(a::T, b::SignedMultiplicativeInverse{T}) where T
-    x = ((widen(a)*b.multiplier) >>> (sizeof(a)*8)) % T
+    x = _mul_high(a, b.multiplier)
     x += (a*b.addmul) % T
     ifelse(abs(b.divisor) == 1, a*b.divisor, (signbit(x) + (x >> b.shift)) % T)
 end
 function div(a::T, b::UnsignedMultiplicativeInverse{T}) where T
-    x = ((widen(a)*b.multiplier) >>> (sizeof(a)*8)) % T
+    x = _mul_high(a, b.multiplier)
     x = ifelse(b.add, convert(T, convert(T, (convert(T, a - x) >>> 1)) + x), x)
     ifelse(b.divisor == 1, a, x >>> b.shift)
 end
diff --git a/base/namedtuple.jl b/base/namedtuple.jl
index b2ebb3f9d0d7e..5d9b119b4c38e 100644
--- a/base/namedtuple.jl
+++ b/base/namedtuple.jl
@@ -8,6 +8,12 @@ tuple-like collection of values, where each entry has a unique name, represented
 [`Symbol`](@ref). Like `Tuple`s, `NamedTuple`s are immutable; neither the names nor the values
 can be modified in place after construction.
 
+A named tuple can be created as a tuple literal with keys, e.g. `(a=1, b=2)`,
+or as a tuple literal with semicolon after the opening parenthesis, e.g. `(;
+a=1, b=2)` (this form also accepts programmatically generated names as
+described below), or using a `NamedTuple` type as constructor, e.g.
+`NamedTuple{(:a, :b)}((1,2))`.
+
 Accessing the value associated with a name in a named tuple can be done using field
 access syntax, e.g. `x.a`, or using [`getindex`](@ref), e.g. `x[:a]` or `x[(:a, :b)]`.
 A tuple of the names can be obtained using [`keys`](@ref), and a tuple of the values
@@ -51,16 +57,35 @@ julia> collect(pairs(x))
 ```
 
 In a similar fashion as to how one can define keyword arguments programmatically,
-a named tuple can be created by giving a pair `name::Symbol => value` or splatting
-an iterator yielding such pairs after a semicolon inside a tuple literal:
+a named tuple can be created by giving pairs `name::Symbol => value` after a
+semicolon inside a tuple literal. This and the `name=value` syntax can be mixed:
 
 ```jldoctest
-julia> (; :a => 1)
-(a = 1,)
+julia> (; :a => 1, :b => 2, c=3)
+(a = 1, b = 2, c = 3)
+```
+
+The name-value pairs can also be provided by splatting a named tuple or any
+iterator that yields two-value collections holding each a symbol as first
+value:
 
+```jldoctest
 julia> keys = (:a, :b, :c); values = (1, 2, 3);
 
-julia> (; zip(keys, values)...)
+julia> NamedTuple{keys}(values)
+(a = 1, b = 2, c = 3)
+
+julia> (; (keys .=> values)...)
+(a = 1, b = 2, c = 3)
+
+julia> nt1 = (a=1, b=2);
+
+julia> nt2 = (c=3, d=4);
+
+julia> (; nt1..., nt2..., b=20) # the final b overwrites the value from nt1
+(a = 1, b = 20, c = 3, d = 4)
+
+julia> (; zip(keys, values)...) # zip yields tuples such as (:a, 1)
 (a = 1, b = 2, c = 3)
 ```
 
@@ -99,7 +124,10 @@ end
 function NamedTuple{names, T}(nt::NamedTuple) where {names, T <: Tuple}
     if @generated
         Expr(:new, :(NamedTuple{names, T}),
-             Any[ :(convert(fieldtype(T, $n), getfield(nt, $(QuoteNode(names[n]))))) for n in 1:length(names) ]...)
+             Any[ :(let Tn = fieldtype(T, $n),
+                      ntn = getfield(nt, $(QuoteNode(names[n])))
+                      ntn isa Tn ? ntn : convert(Tn, ntn)
+                  end) for n in 1:length(names) ]...)
     else
         NamedTuple{names, T}(map(Fix1(getfield, nt), names))
     end
@@ -111,8 +139,9 @@ function NamedTuple{names}(nt::NamedTuple) where {names}
         types = Tuple{(fieldtype(nt, idx[n]) for n in 1:length(idx))...}
         Expr(:new, :(NamedTuple{names, $types}), Any[ :(getfield(nt, $(idx[n]))) for n in 1:length(idx) ]...)
     else
-        types = Tuple{(fieldtype(typeof(nt), names[n]) for n in 1:length(names))...}
-        NamedTuple{names, types}(map(Fix1(getfield, nt), names))
+        length_names = length(names::Tuple)
+        types = Tuple{(fieldtype(typeof(nt), names[n]) for n in 1:length_names)...}
+        _new_NamedTuple(NamedTuple{names, types}, map(Fix1(getfield, nt), names))
     end
 end
 
@@ -126,6 +155,12 @@ NamedTuple{names, Union{}}(itr::Tuple) where {names} = throw(MethodError(NamedTu
 
 end # if Base
 
+# Like NamedTuple{names, T} as a constructor, but omits the additional
+# `convert` call, when the types are known to match the fields
+@eval function _new_NamedTuple(T::Type{NamedTuple{NTN, NTT}} where {NTN, NTT}, args::Tuple)
+    $(Expr(:splatnew, :T, :args))
+end
+
 length(t::NamedTuple) = nfields(t)
 iterate(t::NamedTuple, iter=1) = iter > nfields(t) ? nothing : (getfield(t, iter), iter + 1)
 rest(t::NamedTuple) = t
@@ -134,6 +169,7 @@ firstindex(t::NamedTuple) = 1
 lastindex(t::NamedTuple) = nfields(t)
 getindex(t::NamedTuple, i::Int) = getfield(t, i)
 getindex(t::NamedTuple, i::Symbol) = getfield(t, i)
+getindex(t::NamedTuple, ::Colon) = t
 @inline getindex(t::NamedTuple, idxs::Tuple{Vararg{Symbol}}) = NamedTuple{idxs}(t)
 @inline getindex(t::NamedTuple, idxs::AbstractVector{Symbol}) = NamedTuple{Tuple(idxs)}(t)
 indexed_iterate(t::NamedTuple, i::Int, state=1) = (getfield(t, i), i+1)
@@ -144,16 +180,24 @@ empty(::NamedTuple) = NamedTuple()
 prevind(@nospecialize(t::NamedTuple), i::Integer) = Int(i)-1
 nextind(@nospecialize(t::NamedTuple), i::Integer) = Int(i)+1
 
-convert(::Type{NamedTuple{names,T}}, nt::NamedTuple{names,T}) where {names,T<:Tuple} = nt
-convert(::Type{NamedTuple{names}}, nt::NamedTuple{names}) where {names} = nt
+convert(::Type{NT}, nt::NT) where {names, NT<:NamedTuple{names}} = nt
+convert(::Type{NT}, nt::NT) where {names, T<:Tuple, NT<:NamedTuple{names,T}} = nt
 
 function convert(::Type{NamedTuple{names,T}}, nt::NamedTuple{names}) where {names,T<:Tuple}
-    NamedTuple{names,T}(T(nt))
+    NamedTuple{names,T}(T(nt))::NamedTuple{names,T}
+end
+
+function convert(::Type{NT}, nt::NamedTuple{names}) where {names, NT<:NamedTuple{names}}
+    # converting abstract NT to an abstract Tuple type, to a concrete NT1, is not straightforward, so this could just be an error, but we define it anyways
+    # _tuple_error(NT, nt)
+    T1 = Tuple{ntuple(i -> fieldtype(NT, i), Val(length(names)))...}
+    NT1 = NamedTuple{names, T1}
+    return NT1(T1(nt))::NT1::NT
 end
 
 if nameof(@__MODULE__) === :Base
     Tuple(nt::NamedTuple) = (nt...,)
-    (::Type{T})(nt::NamedTuple) where {T <: Tuple} = convert(T, Tuple(nt))
+    (::Type{T})(nt::NamedTuple) where {T <: Tuple} = (t = Tuple(nt); t isa T ? t : convert(T, t)::T)
 end
 
 function show(io::IO, t::NamedTuple)
@@ -193,6 +237,12 @@ eltype(::Type{T}) where T<:NamedTuple = nteltype(T)
 nteltype(::Type) = Any
 nteltype(::Type{NamedTuple{names,T}} where names) where {T} = eltype(T)
 
+keytype(@nospecialize nt::NamedTuple) = keytype(typeof(nt))
+keytype(@nospecialize T::Type{<:NamedTuple}) = Symbol
+
+valtype(@nospecialize nt::NamedTuple) = valtype(typeof(nt))
+valtype(@nospecialize T::Type{<:NamedTuple}) = eltype(T)
+
 ==(a::NamedTuple{n}, b::NamedTuple{n}) where {n} = Tuple(a) == Tuple(b)
 ==(a::NamedTuple, b::NamedTuple) = false
 
@@ -218,8 +268,9 @@ function map(f, nt::NamedTuple{names}, nts::NamedTuple...) where names
     NamedTuple{names}(map(f, map(Tuple, (nt, nts...))...))
 end
 
-@assume_effects :total function merge_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
-    @nospecialize an bn
+function merge_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_total_meta
     names = Symbol[an...]
     for n in bn
         if !sym_in(n, an)
@@ -229,12 +280,32 @@ end
     (names...,)
 end
 
-@assume_effects :total function merge_types(names::Tuple{Vararg{Symbol}}, a::Type{<:NamedTuple}, b::Type{<:NamedTuple})
-    @nospecialize names a b
+function merge_types(names::Tuple{Vararg{Symbol}}, a::Type{<:NamedTuple}, b::Type{<:NamedTuple})
+    @nospecialize
+    @_total_meta
     bn = _nt_names(b)
     return Tuple{Any[ fieldtype(sym_in(names[n], bn) ? b : a, names[n]) for n in 1:length(names) ]...}
 end
 
+function merge_fallback(a::NamedTuple, b::NamedTuple,
+                        an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_foldable_meta
+    names = merge_names(an, bn)
+    types = merge_types(names, typeof(a), typeof(b))
+    n = length(names)
+    A = Memory{Any}(undef, n)
+    for i=1:n
+        n = names[i]
+        A[i] = getfield(sym_in(n, bn) ? b : a, n)
+    end
+    _new_NamedTuple(NamedTuple{names, types}, (A...,))
+end
+
+# This is `Experimental.@max_methods 4 function merge end`, which is not
+# defined at this point in bootstrap.
+typeof(function merge end).name.max_methods = UInt8(4)
+
 """
     merge(a::NamedTuple, bs::NamedTuple...)
 
@@ -265,11 +336,9 @@ function merge(a::NamedTuple{an}, b::NamedTuple{bn}) where {an, bn}
         names = merge_names(an, bn)
         types = merge_types(names, a, b)
         vals = Any[ :(getfield($(sym_in(names[n], bn) ? :b : :a), $(QuoteNode(names[n])))) for n in 1:length(names) ]
-        :( NamedTuple{$names,$types}(($(vals...),)) )
+        :( _new_NamedTuple(NamedTuple{$names,$types}, ($(vals...),)) )
     else
-        names = merge_names(an, bn)
-        types = merge_types(names, typeof(a), typeof(b))
-        NamedTuple{names,types}(map(n->getfield(sym_in(n, bn) ? b : a, n), names))
+        merge_fallback(a, b, an, bn)
     end
 end
 
@@ -313,16 +382,18 @@ function merge(a::NamedTuple, itr)
     merge(a, NamedTuple{(names...,)}((vals...,)))
 end
 
-keys(nt::NamedTuple{names}) where {names} = names
+keys(nt::NamedTuple{names}) where {names} = names::Tuple{Vararg{Symbol}}
 values(nt::NamedTuple) = Tuple(nt)
 haskey(nt::NamedTuple, key::Union{Integer, Symbol}) = isdefined(nt, key)
 get(nt::NamedTuple, key::Union{Integer, Symbol}, default) = isdefined(nt, key) ? getfield(nt, key) : default
 get(f::Callable, nt::NamedTuple, key::Union{Integer, Symbol}) = isdefined(nt, key) ? getfield(nt, key) : f()
-tail(t::NamedTuple{names}) where names = NamedTuple{tail(names)}(t)
-front(t::NamedTuple{names}) where names = NamedTuple{front(names)}(t)
+tail(t::NamedTuple{names}) where names = NamedTuple{tail(names::Tuple)}(t)
+front(t::NamedTuple{names}) where names = NamedTuple{front(names::Tuple)}(t)
+reverse(nt::NamedTuple) = NamedTuple{reverse(keys(nt))}(reverse(values(nt)))
 
-@assume_effects :total function diff_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
-    @nospecialize an bn
+function diff_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_total_meta
     names = Symbol[]
     for n in an
         if !sym_in(n, bn)
@@ -332,8 +403,29 @@ front(t::NamedTuple{names}) where names = NamedTuple{front(names)}(t)
     (names...,)
 end
 
+function diff_types(a::NamedTuple, names::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_foldable_meta
+    return Tuple{Any[ fieldtype(typeof(a), names[n]) for n in 1:length(names) ]...}
+end
+
+function diff_fallback(a::NamedTuple, an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+    @nospecialize
+    @_foldable_meta
+    names = diff_names(an, bn)
+    isempty(names) && return (;)
+    types = diff_types(a, names)
+    n = length(names)
+    A = Memory{Any}(undef, n)
+    for i=1:n
+        n = names[i]
+        A[i] = getfield(a, n)
+    end
+    _new_NamedTuple(NamedTuple{names, types}, (A...,))
+end
+
 """
-    structdiff(a::NamedTuple{an}, b::Union{NamedTuple{bn},Type{NamedTuple{bn}}}) where {an,bn}
+    structdiff(a::NamedTuple, b::Union{NamedTuple,Type{NamedTuple}})
 
 Construct a copy of named tuple `a`, except with fields that exist in `b` removed.
 `b` can be a named tuple, or a type of the form `NamedTuple{field_names}`.
@@ -341,14 +433,13 @@ Construct a copy of named tuple `a`, except with fields that exist in `b` remove
 function structdiff(a::NamedTuple{an}, b::Union{NamedTuple{bn}, Type{NamedTuple{bn}}}) where {an, bn}
     if @generated
         names = diff_names(an, bn)
+        isempty(names) && return (;) # just a fast pass
         idx = Int[ fieldindex(a, names[n]) for n in 1:length(names) ]
         types = Tuple{Any[ fieldtype(a, idx[n]) for n in 1:length(idx) ]...}
         vals = Any[ :(getfield(a, $(idx[n]))) for n in 1:length(idx) ]
-        :( NamedTuple{$names,$types}(($(vals...),)) )
+        return :( _new_NamedTuple(NamedTuple{$names,$types}, ($(vals...),)) )
     else
-        names = diff_names(an, bn)
-        types = Tuple{Any[ fieldtype(typeof(a), names[n]) for n in 1:length(names) ]...}
-        NamedTuple{names,types}(map(Fix1(getfield, a), names))
+        return diff_fallback(a, an, bn)
     end
 end
 
@@ -386,20 +477,20 @@ This macro gives a more convenient syntax for declaring `NamedTuple` types. It r
 type with the given keys and types, equivalent to `NamedTuple{(:key1, :key2, ...), Tuple{Type1,Type2,...}}`.
 If the `::Type` declaration is omitted, it is taken to be `Any`.   The `begin ... end` form allows the
 declarations to be split across multiple lines (similar to a `struct` declaration), but is otherwise
-equivalent.
+equivalent. The `NamedTuple` macro is used when printing `NamedTuple` types to e.g. the REPL.
 
-For example, the tuple `(a=3.1, b="hello")` has a type `NamedTuple{(:a, :b),Tuple{Float64,String}}`, which
+For example, the tuple `(a=3.1, b="hello")` has a type `NamedTuple{(:a, :b), Tuple{Float64, String}}`, which
 can also be declared via `@NamedTuple` as:
 
 ```jldoctest
 julia> @NamedTuple{a::Float64, b::String}
-NamedTuple{(:a, :b), Tuple{Float64, String}}
+@NamedTuple{a::Float64, b::String}
 
 julia> @NamedTuple begin
            a::Float64
            b::String
        end
-NamedTuple{(:a, :b), Tuple{Float64, String}}
+@NamedTuple{a::Float64, b::String}
 ```
 
 !!! compat "Julia 1.5"
@@ -416,7 +507,66 @@ macro NamedTuple(ex)
     return :(NamedTuple{($(vars...),), Tuple{$(types...)}})
 end
 
-function split_rest(t::NamedTuple{names}, n::Int, st...) where {names}
+"""
+    @Kwargs{key1::Type1, key2::Type2, ...}
+
+This macro gives a convenient way to construct the type representation of keyword arguments
+from the same syntax as [`@NamedTuple`](@ref).
+For example, when we have a function call like `func([positional arguments]; kw1=1.0, kw2="2")`,
+we can use this macro to construct the internal type representation of the keyword arguments
+as `@Kwargs{kw1::Float64, kw2::String}`.
+The macro syntax is specifically designed to simplify the signature type of a keyword method
+when it is printed in the stack trace view.
+
+```julia
+julia> @Kwargs{init::Int} # the internal representation of keyword arguments
+Base.Pairs{Symbol, Int64, Tuple{Symbol}, @NamedTuple{init::Int64}}
+
+julia> sum("julia"; init=1)
+ERROR: MethodError: no method matching +(::Char, ::Char)
+
+Closest candidates are:
+  +(::Any, ::Any, ::Any, ::Any...)
+   @ Base operators.jl:585
+  +(::Integer, ::AbstractChar)
+   @ Base char.jl:247
+  +(::T, ::Integer) where T<:AbstractChar
+   @ Base char.jl:237
+
+Stacktrace:
+  [1] add_sum(x::Char, y::Char)
+    @ Base ./reduce.jl:24
+  [2] BottomRF
+    @ Base ./reduce.jl:86 [inlined]
+  [3] _foldl_impl(op::Base.BottomRF{typeof(Base.add_sum)}, init::Int64, itr::String)
+    @ Base ./reduce.jl:62
+  [4] foldl_impl(op::Base.BottomRF{typeof(Base.add_sum)}, nt::Int64, itr::String)
+    @ Base ./reduce.jl:48 [inlined]
+  [5] mapfoldl_impl(f::typeof(identity), op::typeof(Base.add_sum), nt::Int64, itr::String)
+    @ Base ./reduce.jl:44 [inlined]
+  [6] mapfoldl(f::typeof(identity), op::typeof(Base.add_sum), itr::String; init::Int64)
+    @ Base ./reduce.jl:175 [inlined]
+  [7] mapreduce(f::typeof(identity), op::typeof(Base.add_sum), itr::String; kw::@Kwargs{init::Int64})
+    @ Base ./reduce.jl:307 [inlined]
+  [8] sum(f::typeof(identity), a::String; kw::@Kwargs{init::Int64})
+    @ Base ./reduce.jl:535 [inlined]
+  [9] sum(a::String; kw::@Kwargs{init::Int64})
+    @ Base ./reduce.jl:564 [inlined]
+ [10] top-level scope
+    @ REPL[12]:1
+```
+
+!!! compat "Julia 1.10"
+    This macro is available as of Julia 1.10.
+"""
+macro Kwargs(ex)
+    return :(let
+        NT = @NamedTuple $ex
+        Base.Pairs{keytype(NT),eltype(NT),typeof(NT.parameters[1]),NT}
+    end)
+end
+
+@constprop :aggressive function split_rest(t::NamedTuple{names}, n::Int, st...) where {names}
     _check_length_split_rest(length(t), n)
     names_front, names_last_n = split_rest(names, n, st...)
     return NamedTuple{names_front}(t), NamedTuple{names_last_n}(t)
diff --git a/base/ntuple.jl b/base/ntuple.jl
index 6f70b49481223..4720e7a3915a8 100644
--- a/base/ntuple.jl
+++ b/base/ntuple.jl
@@ -3,7 +3,7 @@
 # `ntuple`, for constructing tuples of a given length
 
 """
-    ntuple(f::Function, n::Integer)
+    ntuple(f, n::Integer)
 
 Create a tuple of length `n`, computing each element as `f(i)`,
 where `i` is the index of the element.
@@ -33,17 +33,17 @@ end
 
 function _ntuple(f::F, n) where F
     @noinline
-    (n >= 0) || throw(ArgumentError(string("tuple length should be ≥ 0, got ", n)))
+    (n >= 0) || throw(ArgumentError(LazyString("tuple length should be ≥ 0, got ", n)))
     ([f(i) for i = 1:n]...,)
 end
 
 function ntupleany(f, n)
     @noinline
-    (n >= 0) || throw(ArgumentError(string("tuple length should be ≥ 0, got ", n)))
+    (n >= 0) || throw(ArgumentError(LazyString("tuple length should be ≥ 0, got ", n)))
     (Any[f(i) for i = 1:n]...,)
 end
 
-# inferrable ntuple (enough for bootstrapping)
+# inferable ntuple (enough for bootstrapping)
 ntuple(f, ::Val{0}) = ()
 ntuple(f, ::Val{1}) = (@inline; (f(1),))
 ntuple(f, ::Val{2}) = (@inline; (f(1), f(2)))
@@ -68,12 +68,9 @@ julia> ntuple(i -> 2*i, Val(4))
 """
 @inline function ntuple(f::F, ::Val{N}) where {F,N}
     N::Int
-    (N >= 0) || throw(ArgumentError(string("tuple length should be ≥ 0, got ", N)))
+    (N >= 0) || throw(ArgumentError(LazyString("tuple length should be ≥ 0, got ", N)))
     if @generated
-        quote
-            @nexprs $N i -> t_i = f(i)
-            @ncall $N tuple t
-        end
+        :(@ntuple $N i -> f(i))
     else
         Tuple(f(i) for i = 1:N)
     end
@@ -82,7 +79,7 @@ end
 @inline function fill_to_length(t::Tuple, val, ::Val{_N}) where {_N}
     M = length(t)
     N = _N::Int
-    M > N && throw(ArgumentError("input tuple of length $M, requested $N"))
+    M > N && throw(ArgumentError(LazyString("input tuple of length ", M, ", requested ", N)))
     if @generated
         quote
             (t..., $(fill(:val, (_N::Int) - length(t.parameters))...))
diff --git a/base/number.jl b/base/number.jl
index 7436655bfad38..923fc907d4038 100644
--- a/base/number.jl
+++ b/base/number.jl
@@ -4,7 +4,7 @@
 
 # Numbers are convertible
 convert(::Type{T}, x::T)      where {T<:Number} = x
-convert(::Type{T}, x::Number) where {T<:Number} = T(x)
+convert(::Type{T}, x::Number) where {T<:Number} = T(x)::T
 
 """
     isinteger(x) -> Bool
@@ -95,12 +95,12 @@ keys(::Number) = OneTo(1)
 getindex(x::Number) = x
 function getindex(x::Number, i::Integer)
     @inline
-    @boundscheck i == 1 || throw(BoundsError())
+    @boundscheck i == 1 || throw(BoundsError(x, i))
     x
 end
 function getindex(x::Number, I::Integer...)
     @inline
-    @boundscheck all(isone, I) || throw(BoundsError())
+    @boundscheck all(isone, I) || throw(BoundsError(x, I))
     x
 end
 get(x::Number, i::Integer, default) = isone(i) ? x : default
@@ -115,7 +115,7 @@ copy(x::Number) = x # some code treats numbers as collection-like
 """
     signbit(x)
 
-Returns `true` if the value of the sign of `x` is negative, otherwise `false`.
+Return `true` if the value of the sign of `x` is negative, otherwise `false`.
 
 See also [`sign`](@ref) and [`copysign`](@ref).
 
@@ -168,10 +168,21 @@ abs(x::Real) = ifelse(signbit(x), -x, x)
 
 Squared absolute value of `x`.
 
+This can be faster than `abs(x)^2`, especially for complex
+numbers where `abs(x)` requires a square root via [`hypot`](@ref).
+
+See also [`abs`](@ref), [`conj`](@ref), [`real`](@ref).
+
 # Examples
 ```jldoctest
 julia> abs2(-3)
 9
+
+julia> abs2(3.0 + 4.0im)
+25.0
+
+julia> sum(abs2, [1+2im, 3+4im])  # LinearAlgebra.norm(x)^2
+30
 ```
 """
 abs2(x::Number) = abs(x)^2
@@ -296,6 +307,7 @@ julia> zero(rand(2,2))
 """
 zero(x::Number) = oftype(x,0)
 zero(::Type{T}) where {T<:Number} = convert(T,0)
+zero(::Type{Union{}}, slurp...) = Union{}(0)
 
 """
     one(x)
@@ -334,6 +346,7 @@ julia> import Dates; one(Dates.Day(1))
 """
 one(::Type{T}) where {T<:Number} = convert(T,1)
 one(x::T) where {T<:Number} = one(T)
+one(::Type{Union{}}, slurp...) = Union{}(1)
 # note that convert(T, 1) should throw an error if T is dimensionful,
 # so this fallback definition should be okay.
 
@@ -341,7 +354,7 @@ one(x::T) where {T<:Number} = one(T)
     oneunit(x::T)
     oneunit(T::Type)
 
-Returns `T(one(x))`, where `T` is either the type of the argument or
+Return `T(one(x))`, where `T` is either the type of the argument or
 (if a type is passed) the argument.  This differs from [`one`](@ref) for
 dimensionful quantities: `one` is dimensionless (a multiplicative identity)
 while `oneunit` is dimensionful (of the same type as `x`, or of type `T`).
@@ -357,6 +370,7 @@ julia> import Dates; oneunit(Dates.Day)
 """
 oneunit(x::T) where {T} = T(one(x))
 oneunit(::Type{T}) where {T} = T(one(T))
+oneunit(::Type{Union{}}, slurp...) = Union{}(1)
 
 """
     big(T::Type)
@@ -377,3 +391,4 @@ Complex{BigInt}
 ```
 """
 big(::Type{T}) where {T<:Number} = typeof(big(zero(T)))
+big(::Type{Union{}}, slurp...) = Union{}(0)
diff --git a/base/opaque_closure.jl b/base/opaque_closure.jl
index ac2ae2e8bf3c0..cb3c00b128dcb 100644
--- a/base/opaque_closure.jl
+++ b/base/opaque_closure.jl
@@ -26,13 +26,13 @@ macro opaque(ty, ex)
 end
 
 # OpaqueClosure construction from pre-inferred CodeInfo/IRCode
-using Core.Compiler: IRCode
+using Core.Compiler: IRCode, SSAValue
 using Core: CodeInfo
 
 function compute_ir_rettype(ir::IRCode)
     rt = Union{}
     for i = 1:length(ir.stmts)
-        stmt = ir.stmts[i][:inst]
+        stmt = ir[SSAValue(i)][:stmt]
         if isa(stmt, Core.Compiler.ReturnNode) && isdefined(stmt, :val)
             rt = Core.Compiler.tmerge(Core.Compiler.argextype(stmt.val, ir), rt)
         end
@@ -40,30 +40,55 @@ function compute_ir_rettype(ir::IRCode)
     return Core.Compiler.widenconst(rt)
 end
 
-function Core.OpaqueClosure(ir::IRCode, env...;
-        nargs::Int = length(ir.argtypes)-1,
-        isva::Bool = false,
-        rt = compute_ir_rettype(ir))
-    if (isva && nargs > length(ir.argtypes)) || (!isva && nargs != length(ir.argtypes)-1)
-        throw(ArgumentError("invalid argument count"))
+function compute_oc_signature(ir::IRCode, nargs::Int, isva::Bool)
+    argtypes = Vector{Any}(undef, nargs)
+    for i = 1:nargs
+        argtypes[i] = Core.Compiler.widenconst(ir.argtypes[i+1])
     end
+    if isva
+        lastarg = pop!(argtypes)
+        if lastarg <: Tuple
+            append!(argtypes, lastarg.parameters)
+        else
+            push!(argtypes, Vararg{Any})
+        end
+    end
+    return Tuple{argtypes...}
+end
+
+function Core.OpaqueClosure(ir::IRCode, @nospecialize env...;
+                            isva::Bool = false,
+                            do_compile::Bool = true)
+    # NOTE: we need ir.argtypes[1] == typeof(env)
+    ir = Core.Compiler.copy(ir)
+    nargs = length(ir.argtypes)-1
+    sig = compute_oc_signature(ir, nargs, isva)
+    rt = compute_ir_rettype(ir)
     src = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
-    src.slotflags = UInt8[]
     src.slotnames = fill(:none, nargs+1)
+    src.slotflags = fill(zero(UInt8), length(ir.argtypes))
     src.slottypes = copy(ir.argtypes)
-    Core.Compiler.replace_code_newstyle!(src, ir, nargs+1)
-    Core.Compiler.widen_all_consts!(src)
-    src.inferred = true
-    # NOTE: we need ir.argtypes[1] == typeof(env)
-
-    ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any),
-          Tuple{ir.argtypes[2:end]...}, Union{}, rt, @__MODULE__, src, 0, nothing, nargs, isva, env)
+    src.rettype = rt
+    src = Core.Compiler.ir_to_codeinf!(src, ir)
+    return generate_opaque_closure(sig, Union{}, rt, src, nargs, isva, env...; do_compile)
 end
 
-function Core.OpaqueClosure(src::CodeInfo, env...)
-    M = src.parent.def
-    sig = Base.tuple_type_tail(src.parent.specTypes)
+function Core.OpaqueClosure(src::CodeInfo, @nospecialize env...)
+    src.inferred || throw(ArgumentError("Expected inferred src::CodeInfo"))
+    mi = src.parent::Core.MethodInstance
+    sig = Base.tuple_type_tail(mi.specTypes)
+    method = mi.def::Method
+    nargs = method.nargs-1
+    isva = method.isva
+    return generate_opaque_closure(sig, Union{}, src.rettype, src, nargs, isva, env...)
+end
 
-    ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any),
-          sig, Union{}, src.rettype, @__MODULE__, src, 0, nothing, M.nargs - 1, M.isva, env)
+function generate_opaque_closure(@nospecialize(sig), @nospecialize(rt_lb), @nospecialize(rt_ub),
+                                 src::CodeInfo, nargs::Int, isva::Bool, @nospecialize env...;
+                                 mod::Module=@__MODULE__,
+                                 lineno::Int=0,
+                                 file::Union{Nothing,Symbol}=nothing,
+                                 do_compile::Bool=true)
+    return ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any, Cint),
+        sig, rt_lb, rt_ub, mod, src, lineno, file, nargs, isva, env, do_compile)
 end
diff --git a/base/operators.jl b/base/operators.jl
index f0647be1b65ad..5c403e5764563 100644
--- a/base/operators.jl
+++ b/base/operators.jl
@@ -79,7 +79,7 @@ also implement [`<`](@ref) to ensure consistency of comparisons.
 ==
 
 """
-    isequal(x, y)
+    isequal(x, y) -> Bool
 
 Similar to [`==`](@ref), except for the treatment of floating point numbers
 and of missing values. `isequal` treats all floating-point `NaN` values as equal
@@ -143,7 +143,7 @@ isequal(x::AbstractFloat, y::Real         ) = (isnan(x) & isnan(y)) | signequal(
     isless(x, y)
 
 Test whether `x` is less than `y`, according to a fixed total order (defined together with
-[`isequal`](@ref)). `isless` is not defined on all pairs of values `(x, y)`. However, if it
+[`isequal`](@ref)). `isless` is not defined for pairs `(x, y)` of all types. However, if it
 is defined, it is expected to satisfy the following:
 - If `isless(x, y)` is defined, then so is `isless(y, x)` and `isequal(x, y)`,
   and exactly one of those three yields `true`.
@@ -154,13 +154,13 @@ Values that are normally unordered, such as `NaN`,
 are ordered after regular values.
 [`missing`](@ref) values are ordered last.
 
-This is the default comparison used by [`sort`](@ref).
+This is the default comparison used by [`sort!`](@ref).
 
 # Implementation
 Non-numeric types with a total order should implement this function.
 Numeric types only need to implement it if they have special values such as `NaN`.
 Types with a partial order should implement [`<`](@ref).
-See the documentation on [Alternate orderings](@ref) for how to define alternate
+See the documentation on [Alternate Orderings](@ref) for how to define alternate
 ordering methods that can be used in sorting and related functions.
 
 # Examples
@@ -178,6 +178,13 @@ isless(x::AbstractFloat, y::AbstractFloat) = (!isnan(x) & (isnan(y) | signless(x
 isless(x::Real,          y::AbstractFloat) = (!isnan(x) & (isnan(y) | signless(x, y))) | (x < y)
 isless(x::AbstractFloat, y::Real         ) = (!isnan(x) & (isnan(y) | signless(x, y))) | (x < y)
 
+# Performance optimization to reduce branching
+# This is useful for sorting tuples of integers
+# TODO: remove this when the compiler can optimize the generic version better
+# See #48724 and #48753
+isless(a::Tuple{BitInteger, BitInteger}, b::Tuple{BitInteger, BitInteger}) =
+    isless(a[1], b[1]) | (isequal(a[1], b[1]) & isless(a[2], b[2]))
+
 """
     isgreater(x, y)
 
@@ -185,7 +192,7 @@ Not the inverse of `isless`! Test whether `x` is greater than `y`, according to
 a fixed total order compatible with `min`.
 
 Defined with `isless`, this function is usually `isless(y, x)`, but `NaN` and
-[`missing`](@ref) are ordered as smaller than any ordinary value with `missing`
+[`missing`](@ref) are ordered as smaller than any regular value with `missing`
 smaller than `NaN`.
 
 So `isless` defines an ascending total order with `NaN` and `missing` as the
@@ -328,6 +335,8 @@ New types with a canonical partial order should implement this function for
 two arguments of the new type.
 Types with a canonical total order should implement [`isless`](@ref) instead.
 
+See also [`isunordered`](@ref).
+
 # Examples
 ```jldoctest
 julia> 'a' < 'b'
@@ -455,13 +464,17 @@ cmp(x::Integer, y::Integer) = ifelse(isless(x, y), -1, ifelse(isless(y, x), 1, 0
 """
     max(x, y, ...)
 
-Return the maximum of the arguments (with respect to [`isless`](@ref)). See also the [`maximum`](@ref) function
-to take the maximum element from a collection.
+Return the maximum of the arguments, with respect to [`isless`](@ref).
+If any of the arguments is [`missing`](@ref), return `missing`.
+See also the [`maximum`](@ref) function to take the maximum element from a collection.
 
 # Examples
 ```jldoctest
 julia> max(2, 5, 1)
 5
+
+julia> max(5, missing, 6)
+missing
 ```
 """
 max(x, y) = ifelse(isless(y, x), x, y)
@@ -469,13 +482,17 @@ max(x, y) = ifelse(isless(y, x), x, y)
 """
     min(x, y, ...)
 
-Return the minimum of the arguments (with respect to [`isless`](@ref)). See also the [`minimum`](@ref) function
-to take the minimum element from a collection.
+Return the minimum of the arguments, with respect to [`isless`](@ref).
+If any of the arguments is [`missing`](@ref), return `missing`.
+See also the [`minimum`](@ref) function to take the minimum element from a collection.
 
 # Examples
 ```jldoctest
 julia> min(2, 5, 1)
 1
+
+julia> min(4, missing, 6)
+missing
 ```
 """
 min(x,y) = ifelse(isless(y, x), y, x)
@@ -513,8 +530,6 @@ julia> identity("Well, what did you expect?")
 identity(@nospecialize x) = x
 
 +(x::Number) = x
--(x) = Int8(-1)*x
--(x, y) = x + (-y)
 *(x::Number) = x
 (&)(x::Integer) = x
 (|)(x::Integer) = x
@@ -570,7 +585,7 @@ function afoldl(op, a, bs...)
     end
     return y
 end
-typeof(afoldl).name.mt.max_args = 34
+setfield!(typeof(afoldl).name.mt, :max_args, 34, :monotonic)
 
 for op in (:+, :*, :&, :|, :xor, :min, :max, :kron)
     @eval begin
@@ -615,9 +630,7 @@ julia> inv(A) * x
  -7.0
 ```
 """
-\(x, y) = inv(x) * y
-
-/(x, y) = x * inv(y)
+\(x,y) = adjoint(adjoint(y)/adjoint(x))
 
 # Core <<, >>, and >>> take either Int or UInt as second arg. Signed shift
 # counts can shift in either direction, and are translated here to unsigned
@@ -662,9 +675,9 @@ end
     >>(x, n)
 
 Right bit shift operator, `x >> n`. For `n >= 0`, the result is `x` shifted
-right by `n` bits, where `n >= 0`, filling with `0`s if `x >= 0`, `1`s if `x <
-0`, preserving the sign of `x`. This is equivalent to `fld(x, 2^n)`. For `n <
-0`, this is equivalent to `x << -n`.
+right by `n` bits, filling with `0`s if `x >= 0`, `1`s if `x < 0`, preserving
+the sign of `x`. This is equivalent to `fld(x, 2^n)`. For `n < 0`, this is
+equivalent to `x << -n`.
 
 # Examples
 ```jldoctest
@@ -703,8 +716,8 @@ end
     >>>(x, n)
 
 Unsigned right bit shift operator, `x >>> n`. For `n >= 0`, the result is `x`
-shifted right by `n` bits, where `n >= 0`, filling with `0`s. For `n < 0`, this
-is equivalent to `x << -n`.
+shifted right by `n` bits, filling with `0`s. For `n < 0`, this is equivalent
+to `x << -n`.
 
 For [`Unsigned`](@ref) integer types, this is equivalent to [`>>`](@ref). For
 [`Signed`](@ref) integer types, this is equivalent to `signed(unsigned(x) >> n)`.
@@ -846,7 +859,7 @@ julia> x == (fld1(x, y) - 1) * y + mod1(x, y)
 true
 ```
 """
-fld1(x::T, y::T) where {T<:Real} = (m = mod1(x, y); fld(x + y - m, y))
+fld1(x::T, y::T) where {T<:Real} = (m = mod1(x, y); fld((x - m) + y, y))
 function fld1(x::T, y::T) where T<:Integer
     d = div(x, y)
     return d + (!signbit(x ⊻ y) & (d * y != x))
@@ -885,23 +898,35 @@ julia> widen(1.5f0)
 """
 widen(x::T) where {T} = convert(widen(T), x)
 widen(x::Type{T}) where {T} = throw(MethodError(widen, (T,)))
+widen(x::Type{Union{}}, slurp...) = throw(MethodError(widen, (Union{},)))
 
 # function pipelining
 
 """
     |>(x, f)
 
-Applies a function to the preceding argument. This allows for easy function chaining.
-When used with anonymous functions, parentheses are typically required around the definition to get the intended chain.
+Infix operator which applies function `f` to the argument `x`.
+This allows `f(g(x))` to be written `x |> g |> f`.
+When used with anonymous functions, parentheses are typically required around
+the definition to get the intended chain.
 
 # Examples
 ```jldoctest
-julia> [1:5;] .|> (x -> x^2) |> sum |> inv
-0.01818181818181818
+julia> 4 |> inv
+0.25
+
+julia> [2, 3, 5] |> sum |> inv
+0.1
+
+julia> [0 1; 2 3] .|> (x -> x^2) |> sum
+14
 ```
 """
 |>(x, f) = f(x)
 
+_stable_typeof(x) = typeof(x)
+_stable_typeof(::Type{T}) where {T} = @isdefined(T) ? Type{T} : DataType
+
 """
     f = Returns(value)
 
@@ -928,7 +953,7 @@ julia> f.value
 struct Returns{V} <: Function
     value::V
     Returns{V}(value) where {V} = new{V}(value)
-    Returns(value) = new{Core.Typeof(value)}(value)
+    Returns(value) = new{_stable_typeof(value)}(value)
 end
 
 (obj::Returns)(@nospecialize(args...); @nospecialize(kw...)) = obj.value
@@ -944,6 +969,7 @@ entered in the Julia REPL (and most editors, appropriately configured) by typing
 Function composition also works in prefix form: `∘(f, g)` is the same as `f ∘ g`.
 The prefix form supports composition of multiple functions: `∘(f, g, h) = f ∘ g ∘ h`
 and splatting `∘(fs...)` for composing an iterable collection of functions.
+The last argument to `∘` execute first.
 
 !!! compat "Julia 1.4"
     Multiple function composition requires at least Julia 1.4.
@@ -962,15 +988,21 @@ julia> map(uppercase∘first, ["apple", "banana", "carrot"])
  'B': ASCII/Unicode U+0042 (category Lu: Letter, uppercase)
  'C': ASCII/Unicode U+0043 (category Lu: Letter, uppercase)
 
+julia> (==(6)∘length).(["apple", "banana", "carrot"])
+3-element BitVector:
+ 0
+ 1
+ 1
+
 julia> fs = [
            x -> 2x
-           x -> x/2
            x -> x-1
+           x -> x/2
            x -> x+1
        ];
 
 julia> ∘(fs...)(3)
-3.0
+2.0
 ```
 See also [`ComposedFunction`](@ref), [`!f::Function`](@ref).
 """
@@ -983,7 +1015,7 @@ Represents the composition of two callable objects `outer::Outer` and `inner::In
 ```julia
 ComposedFunction(outer, inner)(args...; kw...) === outer(inner(args...; kw...))
 ```
-The preferred way to construct instance of `ComposedFunction` is to use the composition operator [`∘`](@ref):
+The preferred way to construct an instance of `ComposedFunction` is to use the composition operator [`∘`](@ref):
 ```jldoctest
 julia> sin ∘ cos === ComposedFunction(sin, cos)
 true
@@ -1014,7 +1046,16 @@ struct ComposedFunction{O,I} <: Function
     ComposedFunction(outer, inner) = new{Core.Typeof(outer),Core.Typeof(inner)}(outer, inner)
 end
 
-(c::ComposedFunction)(x...; kw...) = c.outer(c.inner(x...; kw...))
+(c::ComposedFunction)(x...; kw...) = call_composed(unwrap_composed(c), x, kw)
+unwrap_composed(c::ComposedFunction) = (unwrap_composed(c.outer)..., unwrap_composed(c.inner)...)
+unwrap_composed(c) = (maybeconstructor(c),)
+call_composed(fs, x, kw) = (@inline; fs[1](call_composed(tail(fs), x, kw)))
+call_composed(fs::Tuple{Any}, x, kw) = fs[1](x...; kw...)
+
+struct Constructor{F} <: Function end
+(::Constructor{F})(args...; kw...) where {F} = (@inline; F(args...; kw...))
+maybeconstructor(::Type{F}) where {F} = Constructor{F}()
+maybeconstructor(f) = f
 
 ∘(f) = f
 ∘(f, g) = ComposedFunction(f, g)
@@ -1078,8 +1119,8 @@ struct Fix1{F,T} <: Function
     f::F
     x::T
 
-    Fix1(f::F, x::T) where {F,T} = new{F,T}(f, x)
-    Fix1(f::Type{F}, x::T) where {F,T} = new{Type{F},T}(f, x)
+    Fix1(f::F, x) where {F} = new{F,_stable_typeof(x)}(f, x)
+    Fix1(f::Type{F}, x) where {F} = new{Type{F},_stable_typeof(x)}(f, x)
 end
 
 (f::Fix1)(y) = f.f(f.x, y)
@@ -1095,8 +1136,8 @@ struct Fix2{F,T} <: Function
     f::F
     x::T
 
-    Fix2(f::F, x::T) where {F,T} = new{F,T}(f, x)
-    Fix2(f::Type{F}, x::T) where {F,T} = new{Type{F},T}(f, x)
+    Fix2(f::F, x) where {F} = new{F,_stable_typeof(x)}(f, x)
+    Fix2(f::Type{F}, x) where {F} = new{Type{F},_stable_typeof(x)}(f, x)
 end
 
 (f::Fix2)(y) = f.f(y, f.x)
@@ -1189,41 +1230,54 @@ used to implement specialized methods.
 <(x) = Fix2(<, x)
 
 """
-    Splat(f)
+    splat(f)
 
 Equivalent to
 ```julia
     my_splat(f) = args->f(args...)
 ```
 i.e. given a function returns a new function that takes one argument and splats
-its argument into the original function. This is useful as an adaptor to pass
-a multi-argument function in a context that expects a single argument, but
-passes a tuple as that single argument. Additionally has pretty printing.
-
-!!! compat "Julia 1.9"
-    This function was introduced in Julia 1.9, replacing `Base.splat(f)`.
+it into the original function. This is useful as an adaptor to pass a
+multi-argument function in a context that expects a single argument, but passes
+a tuple as that single argument.
 
 # Example usage:
 ```jldoctest
-julia> map(Base.Splat(+), zip(1:3,4:6))
+julia> map(splat(+), zip(1:3,4:6))
 3-element Vector{Int64}:
  5
  7
  9
 
-julia> my_add = Base.Splat(+)
-Splat(+)
+julia> my_add = splat(+)
+splat(+)
 
 julia> my_add((1,2,3))
 6
 ```
 """
+splat(f) = Splat(f)
+
+"""
+    Base.Splat{F} <: Function
+
+Represents a splatted function. That is
+```julia
+Base.Splat(f)(args) === f(args...)
+```
+The preferred way to construct an instance of `Base.Splat` is to use the [`splat`](@ref) function.
+
+!!! compat "Julia 1.9"
+    Splat requires at least Julia 1.9. In earlier versions `splat` returns an anonymous function instead.
+
+See also [`splat`](@ref).
+"""
 struct Splat{F} <: Function
     f::F
     Splat(f) = new{Core.Typeof(f)}(f)
 end
 (s::Splat)(args) = s.f(args...)
-print(io::IO, s::Splat) = print(io, "Splat(", s.f, ')')
+print(io::IO, s::Splat) = print(io, "splat(", s.f, ')')
 show(io::IO, s::Splat) = print(io, s)
 
 ## in and related operators
@@ -1241,17 +1295,22 @@ used to implement specialized methods.
 """
 in(x) = Fix2(in, x)
 
-function in(x, itr)
-    anymissing = false
-    for y in itr
-        v = (y == x)
-        if ismissing(v)
-            anymissing = true
-        elseif v
-            return true
+for ItrT = (Tuple,Any)
+    # define a generic method and a specialized version for `Tuple`,
+    # whose method bodies are identical, while giving better effects to the later
+    @eval function in(x, itr::$ItrT)
+        $(ItrT === Tuple ? :(@_terminates_locally_meta) : :nothing)
+        anymissing = false
+        for y in itr
+            v = (y == x)
+            if ismissing(v)
+                anymissing = true
+            elseif v
+                return true
+            end
         end
+        return anymissing ? missing : false
     end
-    return anymissing ? missing : false
 end
 
 const ∈ = in
@@ -1286,20 +1345,19 @@ a function equivalent to `y -> item in y`.
 
 Determine whether an item is in the given collection, in the sense that it is
 [`==`](@ref) to one of the values generated by iterating over the collection.
-Returns a `Bool` value, except if `item` is [`missing`](@ref) or `collection`
+Return a `Bool` value, except if `item` is [`missing`](@ref) or `collection`
 contains `missing` but not `item`, in which case `missing` is returned
 ([three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
 matching the behavior of [`any`](@ref) and [`==`](@ref)).
 
 Some collections follow a slightly different definition. For example,
-[`Set`](@ref)s check whether the item [`isequal`](@ref) to one of the elements.
-[`Dict`](@ref)s look for `key=>value` pairs, and the key is compared using
-[`isequal`](@ref). To test for the presence of a key in a dictionary,
-use [`haskey`](@ref) or `k in keys(dict)`. For these collections, the result
-is always a `Bool` and never `missing`.
+[`Set`](@ref)s check whether the item [`isequal`](@ref) to one of the elements;
+[`Dict`](@ref)s look for `key=>value` pairs, and the `key` is compared using
+[`isequal`](@ref).
 
-To determine whether an item is not in a given collection, see [`:∉`](@ref).
-You may also negate the `in` by doing `!(a in b)` which is logically similar to "not in".
+To test for the presence of a key in a dictionary, use [`haskey`](@ref)
+or `k in keys(dict)`. For the collections mentioned above,
+the result is always a `Bool`.
 
 When broadcasting with `in.(items, collection)` or `items .∈ collection`, both
 `item` and `collection` are broadcasted over, which is often not what is intended.
@@ -1309,6 +1367,8 @@ corresponding position in `collection`. To get a vector indicating whether each
 in `items` is in `collection`, wrap `collection` in a tuple or a `Ref` like this:
 `in.(items, Ref(collection))` or `items .∈ Ref(collection)`.
 
+See also: [`∉`](@ref), [`insorted`](@ref), [`contains`](@ref), [`occursin`](@ref), [`issubset`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = 1:3:20
@@ -1332,11 +1392,8 @@ true
 julia> missing in Set([1, 2])
 false
 
-julia> !(21 in a)
-true
-
-julia> !(19 in a)
-false
+julia> (1=>missing) in Dict(1=>10, 2=>20)
+missing
 
 julia> [1, 2] .∈ [2, 3]
 2-element BitVector:
@@ -1348,8 +1405,6 @@ julia> [1, 2] .∈ ([2, 3],)
  0
  1
 ```
-
-See also: [`insorted`](@ref), [`contains`](@ref), [`occursin`](@ref), [`issubset`](@ref).
 """
 in
 
diff --git a/base/optimized_generics.jl b/base/optimized_generics.jl
new file mode 100644
index 0000000000000..86b54a294564d
--- /dev/null
+++ b/base/optimized_generics.jl
@@ -0,0 +1,57 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module OptimizedGenerics
+
+# This file defines interfaces that are recognized and optimized by the compiler
+# They are intended to be used by data structure implementations that wish to
+# opt into some level of compiler optimizations. These interfaces are
+# EXPERIMENTAL and currently intended for use by Base only. They are subject
+# to change or removal without notice. It is undefined behavior to add methods
+# to these generics that do not conform to the specified interface.
+#
+# The intended way to use these generics is that data structures will provide
+# appropriate implementations for a generic. In the absence of compiler
+# optimizations, these behave like regular methods. However, the compiler is
+# semantically allowed to perform certain structural optimizations on
+# appropriate combinations of these intrinsics without proving correctness.
+
+# Compiler-recognized generics for immutable key-value stores (dicts, etc.)
+"""
+    module KeyValue
+
+Implements a key-value like interface where the compiler has liberty to perform
+the following transformations. The core optimization semantically allowed for
+the compiler is:
+
+    get(set(x, key, val), key) -> (val,)
+
+where the compiler will recursively look through `x`. Keys are compared by
+egality.
+
+Implementations must observe the following constraints:
+
+1. It is undefined behavior for `get` not to return the exact (by egality) val
+   stored for a given `key`.
+"""
+module KeyValue
+    """
+        set(collection, [key [, val]])
+        set(T, collection, key, val)
+
+    Set the `key` in `collection` to `val`. If `val` is omitted, deletes the
+    value from the collection. If `key` is omitted as well, deletes all elements
+    of the collection.
+    """
+    function set end
+
+    """
+        get(collection, key)
+
+    Retrieve the value corresponding to `key` in `collection` as a single
+    element tuple or `nothing` if no value corresponding to the key was found.
+    `key`s are compared by egal.
+    """
+    function get end
+end
+
+end
diff --git a/base/options.jl b/base/options.jl
index 48a8f7ff59d38..a94936391fa8d 100644
--- a/base/options.jl
+++ b/base/options.jl
@@ -11,6 +11,8 @@ struct JLOptions
     cpu_target::Ptr{UInt8}
     nthreadpools::Int16
     nthreads::Int16
+    nmarkthreads::Int16
+    nsweepthreads::Int8
     nthreads_per_pool::Ptr{Int16}
     nprocs::Int32
     machine_file::Ptr{UInt8}
@@ -38,6 +40,7 @@ struct JLOptions
     handle_signals::Int8
     use_sysimage_native_code::Int8
     use_compiled_modules::Int8
+    use_pkgimages::Int8
     bindto::Ptr{UInt8}
     outputbc::Ptr{UInt8}
     outputunoptbc::Ptr{UInt8}
@@ -52,6 +55,7 @@ struct JLOptions
     rr_detach::Int8
     strip_metadata::Int8
     strip_ir::Int8
+    permalloc_pkgimg::Int8
     heap_size_hint::UInt64
 end
 
diff --git a/base/ordering.jl b/base/ordering.jl
index e49102159c962..585824bbeadfe 100644
--- a/base/ordering.jl
+++ b/base/ordering.jl
@@ -21,7 +21,8 @@ export # not exported by Base
 """
     Base.Order.Ordering
 
-Abstract type which represents a total order on some set of elements.
+Abstract type which represents a strict weak order on some set of elements. See
+[`sort!`](@ref) for more.
 
 Use [`Base.Order.lt`](@ref) to compare two elements according to the ordering.
 """
@@ -87,8 +88,8 @@ By(by) = By(by, Forward)
 """
     Lt(lt)
 
-`Ordering` which calls `lt(a, b)` to compare elements. `lt` should
-obey the same rules as implementations of [`isless`](@ref).
+`Ordering` that calls `lt(a, b)` to compare elements. `lt` must
+obey the same rules as the `lt` parameter of [`sort!`](@ref).
 """
 struct Lt{T} <: Ordering
     lt::T
@@ -110,7 +111,7 @@ ReverseOrdering(by::By) = By(by.by, ReverseOrdering(by.order))
 ReverseOrdering(perm::Perm) = Perm(ReverseOrdering(perm.order), perm.data)
 
 """
-    lt(o::Ordering, a, b)
+    lt(o::Ordering, a, b) -> Bool
 
 Test whether `a` is less than `b` according to the ordering `o`.
 """
@@ -122,21 +123,18 @@ lt(o::Lt,                    a, b) = o.lt(a,b)
 @propagate_inbounds function lt(p::Perm, a::Integer, b::Integer)
     da = p.data[a]
     db = p.data[b]
-    lt(p.order, da, db) | (!lt(p.order, db, da) & (a < b))
+    (lt(p.order, da, db)::Bool) | (!(lt(p.order, db, da)::Bool) & (a < b))
 end
 
-_ord(lt::typeof(isless), by::typeof(identity), order::Ordering) = order
-_ord(lt::typeof(isless), by,                   order::Ordering) = By(by, order)
-
-function _ord(lt, by, order::Ordering)
-    if order === Forward
-        return Lt((x, y) -> lt(by(x), by(y)))
-    elseif order === Reverse
-        return Lt((x, y) -> lt(by(y), by(x)))
-    else
-        error("Passing both lt= and order= arguments is ambiguous; please pass order=Forward or order=Reverse (or leave default)")
-    end
-end
+
+_ord(lt::typeof(isless), by, order::Ordering)                         = _by(by, order)
+_ord(lt::typeof(isless), by, order::ForwardOrdering)                  = _by(by, order)  # disambiguation
+_ord(lt::typeof(isless), by, order::ReverseOrdering{ForwardOrdering}) = _by(by, order)  # disambiguation
+_ord(lt,                 by, order::ForwardOrdering)                  = _by(by, Lt(lt))
+_ord(lt,                 by, order::ReverseOrdering{ForwardOrdering}) = reverse(_by(by, Lt(lt)))
+_ord(lt,                 by, order::Ordering) = error("Passing both lt= and order= arguments is ambiguous; please pass order=Forward or order=Reverse (or leave default)")
+_by(by, order::Ordering) = By(by, order)
+_by(::typeof(identity), order::Ordering) = order
 
 """
     ord(lt, by, rev::Union{Bool, Nothing}, order::Ordering=Forward)
@@ -146,8 +144,8 @@ Construct an [`Ordering`](@ref) object from the same arguments used by
 Elements are first transformed by the function `by` (which may be
 [`identity`](@ref)) and are then compared according to either the function `lt`
 or an existing ordering `order`. `lt` should be [`isless`](@ref) or a function
-which obeys similar rules. Finally, the resulting order is reversed if
-`rev=true`.
+that obeys the same rules as the `lt` parameter of [`sort!`](@ref). Finally,
+the resulting order is reversed if `rev=true`.
 
 Passing an `lt` other than `isless` along with an `order` other than
 [`Base.Order.Forward`](@ref) or [`Base.Order.Reverse`](@ref) is not permitted,
diff --git a/base/osutils.jl b/base/osutils.jl
index 1f5a708d30c7a..95d0562540e5a 100644
--- a/base/osutils.jl
+++ b/base/osutils.jl
@@ -16,7 +16,7 @@ macro static(ex)
         @label loop
         hd = ex.head
         if hd ∈ (:if, :elseif, :&&, :||)
-            cond = Core.eval(__module__, ex.args[1])
+            cond = Core.eval(__module__, ex.args[1])::Bool
             if xor(cond, hd === :||)
                 return esc(ex.args[2])
             elseif length(ex.args) == 3
diff --git a/base/pair.jl b/base/pair.jl
index b5dffbb4e7e86..1953dc2886053 100644
--- a/base/pair.jl
+++ b/base/pair.jl
@@ -28,6 +28,11 @@ julia> for x in p
        end
 foo
 7
+
+julia> replace.(["xops", "oxps"], "x" => "o")
+2-element Vector{String}:
+ "oops"
+ "oops"
 ```
 """
 Pair, =>
@@ -39,7 +44,7 @@ indexed_iterate(p::Pair, i::Int, state=1) = (getfield(p, i), i + 1)
 hash(p::Pair, h::UInt) = hash(p.second, hash(p.first, h))
 
 ==(p::Pair, q::Pair) = (p.first==q.first) & (p.second==q.second)
-isequal(p::Pair, q::Pair) = isequal(p.first,q.first) & isequal(p.second,q.second)
+isequal(p::Pair, q::Pair) = isequal(p.first,q.first)::Bool & isequal(p.second,q.second)::Bool
 
 isless(p::Pair, q::Pair) = ifelse(!isequal(p.first,q.first), isless(p.first,q.first),
                                                              isless(p.second,q.second))
@@ -55,7 +60,11 @@ last(p::Pair) = p.second
 
 convert(::Type{Pair{A,B}}, x::Pair{A,B}) where {A,B} = x
 function convert(::Type{Pair{A,B}}, x::Pair) where {A,B}
-    Pair{A,B}(convert(A, x[1]), convert(B, x[2]))
+    a = getfield(x, :first)
+    a isa A || (a = convert(A, a))
+    b = getfield(x, :second)
+    b isa B || (b = convert(B, b))
+    return Pair{A,B}(a, b)::Pair{A,B}
 end
 
 promote_rule(::Type{Pair{A1,B1}}, ::Type{Pair{A2,B2}}) where {A1,B1,A2,B2} =
diff --git a/base/parse.jl b/base/parse.jl
index 1c911c96e1479..ab1adb7c30895 100644
--- a/base/parse.jl
+++ b/base/parse.jl
@@ -36,6 +36,7 @@ julia> parse(Complex{Float64}, "3.2e-1 + 4.5im")
 ```
 """
 parse(T::Type, str; base = Int)
+parse(::Type{Union{}}, slurp...; kwargs...) = error("cannot parse a value as Union{}")
 
 function parse(::Type{T}, c::AbstractChar; base::Integer = 10) where T<:Integer
     a::Int = (base <= 36 ? 10 : 36)
@@ -89,17 +90,22 @@ function parseint_preamble(signed::Bool, base::Int, s::AbstractString, startpos:
     return sgn, base, j
 end
 
-@inline function __convert_digit(_c::UInt32, base)
+# '0':'9' -> 0:9
+# 'A':'Z' -> 10:35
+# 'a':'z' -> 10:35 if base <= 36, 36:61 otherwise
+# input outside of that is mapped to base
+@inline function __convert_digit(_c::UInt32, base::UInt32)
     _0 = UInt32('0')
     _9 = UInt32('9')
     _A = UInt32('A')
     _a = UInt32('a')
     _Z = UInt32('Z')
     _z = UInt32('z')
-    a::UInt32 = base <= 36 ? 10 : 36
+    a = base <= 36 ? UInt32(10) : UInt32(36) # converting here instead of via a type assertion prevents typeassert related errors
     d = _0 <= _c <= _9 ? _c-_0             :
         _A <= _c <= _Z ? _c-_A+ UInt32(10) :
-        _a <= _c <= _z ? _c-_a+a           : UInt32(base)
+        _a <= _c <= _z ? _c-_a+a           :
+        base
 end
 
 
@@ -110,7 +116,7 @@ function tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos::
         return nothing
     end
     if !(2 <= base <= 62)
-        raise && throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
+        raise && throw(ArgumentError(LazyString("invalid base: base must be 2 ≤ base ≤ 62, got ", base)))
         return nothing
     end
     if i == 0
@@ -132,7 +138,7 @@ function tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos::
     while n <= m
         # Fast path from `UInt32(::Char)`; non-ascii will be >= 0x80
         _c = reinterpret(UInt32, c) >> 24
-        d::T = __convert_digit(_c, base)
+        d::T = __convert_digit(_c, base % UInt32) # we know 2 <= base <= 62, so prevent an incorrect InexactError here
         if d >= base
             raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
             return nothing
@@ -150,7 +156,7 @@ function tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos::
     while !isspace(c)
         # Fast path from `UInt32(::Char)`; non-ascii will be >= 0x80
         _c = reinterpret(UInt32, c) >> 24
-        d::T = __convert_digit(_c, base)
+        d::T = __convert_digit(_c, base % UInt32) # we know 2 <= base <= 62
         if d >= base
             raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
             return nothing
@@ -176,7 +182,7 @@ function tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos::
     return n
 end
 
-function tryparse_internal(::Type{Bool}, sbuff::Union{String,SubString{String}},
+function tryparse_internal(::Type{Bool}, sbuff::AbstractString,
         startpos::Int, endpos::Int, base::Integer, raise::Bool)
     if isempty(sbuff)
         raise && throw(ArgumentError("input string is empty"))
@@ -202,10 +208,17 @@ function tryparse_internal(::Type{Bool}, sbuff::Union{String,SubString{String}},
     end
 
     len = endpos - startpos + 1
-    p   = pointer(sbuff) + startpos - 1
-    GC.@preserve sbuff begin
-        (len == 4) && (0 == _memcmp(p, "true", 4)) && (return true)
-        (len == 5) && (0 == _memcmp(p, "false", 5)) && (return false)
+    if sbuff isa Union{String, SubString{String}}
+        p = pointer(sbuff) + startpos - 1
+        truestr = "true"
+        falsestr = "false"
+        GC.@preserve sbuff truestr falsestr begin
+            (len == 4) && (0 == memcmp(p, unsafe_convert(Ptr{UInt8}, truestr), 4)) && (return true)
+            (len == 5) && (0 == memcmp(p, unsafe_convert(Ptr{UInt8}, falsestr), 5)) && (return false)
+        end
+    else
+        (len == 4) && (SubString(sbuff, startpos:startpos+3) == "true") && (return true)
+        (len == 5) && (SubString(sbuff, startpos:startpos+4) == "false") && (return false)
     end
 
     if raise
@@ -241,6 +254,7 @@ function parse(::Type{T}, s::AbstractString; base::Union{Nothing,Integer} = noth
     convert(T, tryparse_internal(T, s, firstindex(s), lastindex(s),
                                  base===nothing ? 0 : check_valid_base(base), true))
 end
+tryparse(::Type{Union{}}, slurp...; kwargs...) = error("cannot parse a value as Union{}")
 
 ## string to float functions ##
 
@@ -307,14 +321,14 @@ function tryparse_internal(::Type{Complex{T}}, s::Union{String,SubString{String}
     if i₊ == i # leading ± sign
         i₊ = something(findnext(in(('+','-')), s, i₊+1), 0)
     end
-    if i₊ != 0 && s[i₊-1] in ('e','E') # exponent sign
+    if i₊ != 0 && s[prevind(s, i₊)] in ('e','E') # exponent sign
         i₊ = something(findnext(in(('+','-')), s, i₊+1), 0)
     end
 
     # find trailing im/i/j
     iᵢ = something(findprev(in(('m','i','j')), s, e), 0)
     if iᵢ > 0 && s[iᵢ] == 'm' # im
-        iᵢ -= 1
+        iᵢ = prevind(s, iᵢ)
         if s[iᵢ] != 'i'
             raise && throw(ArgumentError("expected trailing \"im\", found only \"m\""))
             return nothing
@@ -323,7 +337,7 @@ function tryparse_internal(::Type{Complex{T}}, s::Union{String,SubString{String}
 
     if i₊ == 0 # purely real or imaginary value
         if iᵢ > i && !(iᵢ == i+1 && s[i] in ('+','-')) # purely imaginary (not "±inf")
-            x = tryparse_internal(T, s, i, iᵢ-1, raise)
+            x = tryparse_internal(T, s, i, prevind(s, iᵢ), raise)
             x === nothing && return nothing
             return Complex{T}(zero(x),x)
         else # purely real
@@ -339,11 +353,11 @@ function tryparse_internal(::Type{Complex{T}}, s::Union{String,SubString{String}
     end
 
     # parse real part
-    re = tryparse_internal(T, s, i, i₊-1, raise)
+    re = tryparse_internal(T, s, i, prevind(s, i₊), raise)
     re === nothing && return nothing
 
     # parse imaginary part
-    im = tryparse_internal(T, s, i₊+1, iᵢ-1, raise)
+    im = tryparse_internal(T, s, i₊+1, prevind(s, iᵢ), raise)
     im === nothing && return nothing
 
     return Complex{T}(re, s[i₊]=='-' ? -im : im)
diff --git a/base/partr.jl b/base/partr.jl
index a4cfcb60fe520..8c95e3668ee74 100644
--- a/base/partr.jl
+++ b/base/partr.jl
@@ -2,7 +2,7 @@
 
 module Partr
 
-using ..Threads: SpinLock, nthreads, threadid
+using ..Threads: SpinLock, maxthreadid, threadid
 
 # a task minheap
 mutable struct taskheap
@@ -18,15 +18,9 @@ end
 const heap_d = UInt32(8)
 const heaps = [Vector{taskheap}(undef, 0), Vector{taskheap}(undef, 0)]
 const heaps_lock = [SpinLock(), SpinLock()]
-const cong_unbias = [typemax(UInt32), typemax(UInt32)]
 
 
-cong(max::UInt32, unbias::UInt32) =
-    ccall(:jl_rand_ptls, UInt32, (UInt32, UInt32), max, unbias) + UInt32(1)
-
-function unbias_cong(max::UInt32)
-    return typemax(UInt32) - ((typemax(UInt32) % max) + UInt32(1))
-end
+cong(max::UInt32) = iszero(max) ? UInt32(0) : ccall(:jl_rand_ptls, UInt32, (UInt32,), max) + UInt32(1)
 
 
 function multiq_sift_up(heap::taskheap, idx::Int32)
@@ -86,7 +80,6 @@ function multiq_size(tpid::Int8)
             newheaps[i] = taskheap()
         end
         heaps[tp] = newheaps
-        cong_unbias[tp] = unbias_cong(heap_p)
     end
 
     return heap_p
@@ -95,15 +88,16 @@ end
 
 function multiq_insert(task::Task, priority::UInt16)
     tpid = ccall(:jl_get_task_threadpoolid, Int8, (Any,), task)
+    @assert tpid > -1
     heap_p = multiq_size(tpid)
     tp = tpid + 1
 
     task.priority = priority
 
-    rn = cong(heap_p, cong_unbias[tp])
+    rn = cong(heap_p)
     tpheaps = heaps[tp]
     while !trylock(tpheaps[rn].lock)
-        rn = cong(heap_p, cong_unbias[tp])
+        rn = cong(heap_p)
     end
 
     heap = tpheaps[rn]
@@ -131,6 +125,9 @@ function multiq_deletemin()
 
     tid = Threads.threadid()
     tp = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1) + 1
+    if tp == 0 # Foreign thread
+        return nothing
+    end
     tpheaps = heaps[tp]
 
     @label retry
@@ -140,8 +137,8 @@ function multiq_deletemin()
         if i == heap_p
             return nothing
         end
-        rn1 = cong(heap_p, cong_unbias[tp])
-        rn2 = cong(heap_p, cong_unbias[tp])
+        rn1 = cong(heap_p)
+        rn2 = cong(heap_p)
         prio1 = tpheaps[rn1].priority
         prio2 = tpheaps[rn2].priority
         if prio1 > prio2
@@ -179,13 +176,15 @@ function multiq_deletemin()
     return task
 end
 
-
 function multiq_check_empty()
-    for j = UInt32(1):length(heaps)
-        for i = UInt32(1):length(heaps[j])
-            if heaps[j][i].ntasks != 0
-                return false
-            end
+    tid = Threads.threadid()
+    tp = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1) + 1
+    if tp == 0 # Foreign thread
+        return true
+    end
+    for i = UInt32(1):length(heaps[tp])
+        if heaps[tp][i].ntasks != 0
+            return false
         end
     end
     return true
diff --git a/base/path.jl b/base/path.jl
index dea1a1e3eef9d..493add6b0f6d0 100644
--- a/base/path.jl
+++ b/base/path.jl
@@ -20,22 +20,39 @@ export
 
 if Sys.isunix()
     const path_separator    = "/"
-    const path_separator_re = r"/+"
-    const path_directory_re = r"(?:^|/)\.{0,2}$"
-    const path_dir_splitter = r"^(.*?)(/+)([^/]*)$"
-    const path_ext_splitter = r"^((?:.*/)?(?:\.|[^/\.])[^/]*?)(\.[^/\.]*|)$"
+    const path_separator_re = r"/+"sa
+    const path_directory_re = r"(?:^|/)\.{0,2}$"sa
+    const path_dir_splitter = r"^(.*?)(/+)([^/]*)$"sa
+    const path_ext_splitter = r"^((?:.*/)?(?:\.|[^/\.])[^/]*?)(\.[^/\.]*|)$"sa
 
     splitdrive(path::String) = ("",path)
 elseif Sys.iswindows()
     const path_separator    = "\\"
-    const path_separator_re = r"[/\\]+"
-    const path_absolute_re  = r"^(?:[A-Za-z]+:)?[/\\]"
-    const path_directory_re = r"(?:^|[/\\])\.{0,2}$"
-    const path_dir_splitter = r"^(.*?)([/\\]+)([^/\\]*)$"
-    const path_ext_splitter = r"^((?:.*[/\\])?(?:\.|[^/\\\.])[^/\\]*?)(\.[^/\\\.]*|)$"
+    const path_separator_re = r"[/\\]+"sa
+    const path_absolute_re  = r"^(?:[A-Za-z]+:)?[/\\]"sa
+    const path_directory_re = r"(?:^|[/\\])\.{0,2}$"sa
+    const path_dir_splitter = r"^(.*?)([/\\]+)([^/\\]*)$"sa
+    const path_ext_splitter = r"^((?:.*[/\\])?(?:\.|[^/\\\.])[^/\\]*?)(\.[^/\\\.]*|)$"sa
+
+    const splitdrive_re = let
+        # Slash in either direction.
+        S = raw"[\\/]"
+        # Not a slash in either direction.
+        N = raw"[^\\/]"
+        # Drive letter, e.g. `C:`
+        drive = "$(N)+:"
+        # UNC path, e.g. `\\server\share`
+        unc = "$(S)$(S)$(N)+$(S)$(N)+"
+        # Long drive letter, e.g. `\\?\C:`
+        long_drive = "$(S)$(S)\\?$(S)$(drive)"
+        # Long UNC path, e.g. `\\?\UNC\server\share`
+        long_unc = "$(S)$(S)\\?$(S)UNC$(S)$(N)+$(S)$(N)+"
+        # Need to match the long patterns first so they get priority.
+        Regex("^($long_unc|$long_drive|$unc|$drive|)(.*)\$", "sa")
+    end
 
     function splitdrive(path::String)
-        m = match(r"^([^\\]+:|\\\\[^\\]+\\[^\\]+|\\\\\?\\UNC\\[^\\]+\\[^\\]+|\\\\\?\\[^\\]+:|)(.*)$"s, path)
+        m = match(splitdrive_re, path)::AbstractMatch
         String(something(m.captures[1])), String(something(m.captures[2]))
     end
 else
@@ -145,7 +162,7 @@ function _splitdir_nodrive(a::String, b::String)
 end
 
 """
-    dirname(path::AbstractString) -> AbstractString
+    dirname(path::AbstractString) -> String
 
 Get the directory part of a path. Trailing characters ('/' or '\\') in the path are
 counted as part of the path.
@@ -161,10 +178,10 @@ julia> dirname("/home/myuser/")
 
 See also [`basename`](@ref).
 """
- dirname(path::AbstractString) = splitdir(path)[1]
+dirname(path::AbstractString) = splitdir(path)[1]
 
 """
-    basename(path::AbstractString) -> AbstractString
+    basename(path::AbstractString) -> String
 
 Get the file name part of a path.
 
@@ -186,7 +203,7 @@ See also [`dirname`](@ref).
 basename(path::AbstractString) = splitdir(path)[2]
 
 """
-    splitext(path::AbstractString) -> (AbstractString, AbstractString)
+    splitext(path::AbstractString) -> (String, String)
 
 If the last component of a path contains one or more dots, split the path into everything before the
 last dot and everything including and after the dot. Otherwise, return a tuple of the argument
@@ -415,6 +432,16 @@ normpath(a::AbstractString, b::AbstractString...) = normpath(joinpath(a,b...))
 
 Convert a path to an absolute path by adding the current directory if necessary.
 Also normalizes the path as in [`normpath`](@ref).
+
+# Example
+
+If you are in a directory called `JuliaExample` and the data you are using is two levels up relative to the `JuliaExample` directory, you could write:
+
+abspath("../../data")
+
+Which gives a path like `"/home/JuliaUser/data/"`.
+
+See also [`joinpath`](@ref), [`pwd`](@ref), [`expanduser`](@ref).
 """
 function abspath(a::String)::String
     if !isabspath(a)
@@ -532,7 +559,7 @@ contractuser(path::AbstractString)
 
 
 """
-    relpath(path::AbstractString, startpath::AbstractString = ".") -> AbstractString
+    relpath(path::AbstractString, startpath::AbstractString = ".") -> String
 
 Return a relative filepath to `path` either from the current directory or from an optional
 start directory. This is a path computation: the filesystem is not accessed to confirm the
@@ -542,8 +569,8 @@ On Windows, case sensitivity is applied to every part of the path except drive l
 `path` and `startpath` refer to different drives, the absolute path of `path` is returned.
 """
 function relpath(path::String, startpath::String = ".")
-    isempty(path) && throw(ArgumentError("`path` must be specified"))
-    isempty(startpath) && throw(ArgumentError("`startpath` must be specified"))
+    isempty(path) && throw(ArgumentError("`path` must be non-empty"))
+    isempty(startpath) && throw(ArgumentError("`startpath` must be non-empty"))
     curdir = "."
     pardir = ".."
     path == startpath && return curdir
diff --git a/base/pcre.jl b/base/pcre.jl
index 963b3ee4726a2..614ffa694af0e 100644
--- a/base/pcre.jl
+++ b/base/pcre.jl
@@ -6,7 +6,8 @@ module PCRE
 
 import ..RefValue
 
-include("../pcre_h.jl")
+# include($BUILDROOT/base/pcre_h.jl)
+include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "pcre_h.jl"))
 
 const PCRE_LIB = "libpcre2-8"
 
@@ -28,7 +29,7 @@ THREAD_MATCH_CONTEXTS::Vector{Ptr{Cvoid}} = [C_NULL]
 PCRE_COMPILE_LOCK = nothing
 
 _tid() = Int(ccall(:jl_threadid, Int16, ())) + 1
-_nth() = Int(unsafe_load(cglobal(:jl_n_threads, Cint)))
+_mth() = Int(Core.Intrinsics.atomic_pointerref(cglobal(:jl_n_threads, Cint), :acquire))
 
 function get_local_match_context()
     tid = _tid()
@@ -40,7 +41,7 @@ function get_local_match_context()
         try
             ctxs = THREAD_MATCH_CONTEXTS
             if length(ctxs) < tid
-                global THREAD_MATCH_CONTEXTS = ctxs = copyto!(fill(C_NULL, _nth()), ctxs)
+                global THREAD_MATCH_CONTEXTS = ctxs = copyto!(fill(C_NULL, length(ctxs) + _mth()), ctxs)
             end
         finally
             unlock(l)
@@ -195,10 +196,12 @@ function err_message(errno::Integer)
     return GC.@preserve buffer unsafe_string(pointer(buffer))
 end
 
-function exec(re, subject, offset, options, match_data)
-    if !(subject isa Union{String,SubString{String}})
-        subject = String(subject)
-    end
+exec(re, subject::Union{String,SubString{String}}, offset, options, match_data) =
+    _exec(re, subject, offset, options, match_data)
+exec(re, subject, offset, options, match_data) =
+    _exec(re, String(subject), offset, options, match_data)
+
+function _exec(re, subject, offset, options, match_data)
     rc = ccall((:pcre2_match_8, PCRE_LIB), Cint,
                (Ptr{Cvoid}, Ptr{UInt8}, Csize_t, Csize_t, UInt32, Ptr{Cvoid}, Ptr{Cvoid}),
                re, subject, ncodeunits(subject), offset, options, match_data, get_local_match_context())
diff --git a/base/permuteddimsarray.jl b/base/permuteddimsarray.jl
index dae288584aa89..c66f441d6f963 100644
--- a/base/permuteddimsarray.jl
+++ b/base/permuteddimsarray.jl
@@ -48,10 +48,9 @@ end
 Base.parent(A::PermutedDimsArray) = A.parent
 Base.size(A::PermutedDimsArray{T,N,perm}) where {T,N,perm} = genperm(size(parent(A)), perm)
 Base.axes(A::PermutedDimsArray{T,N,perm}) where {T,N,perm} = genperm(axes(parent(A)), perm)
-
+Base.has_offset_axes(A::PermutedDimsArray) = Base.has_offset_axes(A.parent)
 Base.similar(A::PermutedDimsArray, T::Type, dims::Base.Dims) = similar(parent(A), T, dims)
-
-Base.unsafe_convert(::Type{Ptr{T}}, A::PermutedDimsArray{T}) where {T} = Base.unsafe_convert(Ptr{T}, parent(A))
+Base.cconvert(::Type{Ptr{T}}, A::PermutedDimsArray{T}) where {T} = Base.cconvert(Ptr{T}, parent(A))
 
 # It's OK to return a pointer to the first element, and indeed quite
 # useful for wrapping C routines that require a different storage
@@ -77,18 +76,79 @@ end
     val
 end
 
+function Base.isassigned(A::PermutedDimsArray{T,N,perm,iperm}, I::Vararg{Int,N}) where {T,N,perm,iperm}
+    @boundscheck checkbounds(Bool, A, I...) || return false
+    @inbounds x = isassigned(A.parent, genperm(I, iperm)...)
+    x
+end
+
 @inline genperm(I::NTuple{N,Any}, perm::Dims{N}) where {N} = ntuple(d -> I[perm[d]], Val(N))
 @inline genperm(I, perm::AbstractVector{Int}) = genperm(I, (perm...,))
 
 """
     permutedims(A::AbstractArray, perm)
+    permutedims(A::AbstractMatrix)
 
-Permute the dimensions of array `A`. `perm` is a vector or a tuple of length `ndims(A)`
+Permute the dimensions (axes) of array `A`. `perm` is a tuple or vector of `ndims(A)` integers
 specifying the permutation.
 
+If `A` is a 2d array ([`AbstractMatrix`](@ref)), then
+`perm` defaults to `(2,1)`, swapping the two axes of `A` (the rows and columns
+of the matrix).   This differs from [`transpose`](@ref) in that the
+operation is not recursive, which is especially useful for arrays of non-numeric values
+(where the recursive `transpose` would throw an error) and/or 2d arrays that do not represent
+linear operators.
+
+For 1d arrays, see [`permutedims(v::AbstractVector)`](@ref), which returns a 1-row “matrix”.
+
 See also [`permutedims!`](@ref), [`PermutedDimsArray`](@ref), [`transpose`](@ref), [`invperm`](@ref).
 
 # Examples
+
+## 2d arrays:
+Unlike `transpose`, `permutedims` can be used to swap rows and columns of 2d arrays of
+arbitrary non-numeric elements, such as strings:
+```jldoctest
+julia> A = ["a" "b" "c"
+            "d" "e" "f"]
+2×3 Matrix{String}:
+ "a"  "b"  "c"
+ "d"  "e"  "f"
+
+julia> permutedims(A)
+3×2 Matrix{String}:
+ "a"  "d"
+ "b"  "e"
+ "c"  "f"
+```
+And `permutedims` produces results that differ from `transpose`
+for matrices whose elements are themselves numeric matrices:
+```jldoctest; setup = :(using LinearAlgebra)
+julia> a = [1 2; 3 4];
+
+julia> b = [5 6; 7 8];
+
+julia> c = [9 10; 11 12];
+
+julia> d = [13 14; 15 16];
+
+julia> X = [[a] [b]; [c] [d]]
+2×2 Matrix{Matrix{Int64}}:
+ [1 2; 3 4]     [5 6; 7 8]
+ [9 10; 11 12]  [13 14; 15 16]
+
+julia> permutedims(X)
+2×2 Matrix{Matrix{Int64}}:
+ [1 2; 3 4]  [9 10; 11 12]
+ [5 6; 7 8]  [13 14; 15 16]
+
+julia> transpose(X)
+2×2 transpose(::Matrix{Matrix{Int64}}) with eltype Transpose{Int64, Matrix{Int64}}:
+ [1 3; 2 4]  [9 11; 10 12]
+ [5 7; 6 8]  [13 15; 14 16]
+```
+
+## Multi-dimensional arrays
 ```jldoctest
 julia> A = reshape(Vector(1:8), (2,2,2))
 2×2×2 Array{Int64, 3}:
@@ -131,60 +191,69 @@ julia> size(B)
 
 julia> size(A)[perm] == ans
 true
+```
 """
 function permutedims(A::AbstractArray, perm)
     dest = similar(A, genperm(axes(A), perm))
     permutedims!(dest, A, perm)
 end
 
-"""
-    permutedims(m::AbstractMatrix)
-
-Permute the dimensions of the matrix `m`, by flipping the elements across the diagonal of
-the matrix. Differs from `LinearAlgebra`'s [`transpose`](@ref) in that the
-operation is not recursive.
-
-# Examples
-```jldoctest; setup = :(using LinearAlgebra)
-julia> a = [1 2; 3 4];
-
-julia> b = [5 6; 7 8];
-
-julia> c = [9 10; 11 12];
-
-julia> d = [13 14; 15 16];
-
-julia> X = [[a] [b]; [c] [d]]
-2×2 Matrix{Matrix{Int64}}:
- [1 2; 3 4]     [5 6; 7 8]
- [9 10; 11 12]  [13 14; 15 16]
-
-julia> permutedims(X)
-2×2 Matrix{Matrix{Int64}}:
- [1 2; 3 4]  [9 10; 11 12]
- [5 6; 7 8]  [13 14; 15 16]
-
-julia> transpose(X)
-2×2 transpose(::Matrix{Matrix{Int64}}) with eltype Transpose{Int64, Matrix{Int64}}:
- [1 3; 2 4]  [9 11; 10 12]
- [5 7; 6 8]  [13 15; 14 16]
-```
-"""
 permutedims(A::AbstractMatrix) = permutedims(A, (2,1))
 
 """
     permutedims(v::AbstractVector)
 
 Reshape vector `v` into a `1 × length(v)` row matrix.
-Differs from `LinearAlgebra`'s [`transpose`](@ref) in that
-the operation is not recursive.
+Differs from [`transpose`](@ref) in that
+the operation is not recursive, which is especially useful for arrays of non-numeric values
+(where the recursive `transpose` might throw an error).
 
 # Examples
+Unlike `transpose`, `permutedims` can be used on vectors of
+arbitrary non-numeric elements, such as strings:
+```jldoctest
+julia> permutedims(["a", "b", "c"])
+1×3 Matrix{String}:
+ "a"  "b"  "c"
+```
+For vectors of numbers, `permutedims(v)` works much like `transpose(v)`
+except that the return type differs (it uses [`reshape`](@ref)
+rather than a `LinearAlgebra.Transpose` view, though both
+share memory with the original array `v`):
 ```jldoctest; setup = :(using LinearAlgebra)
-julia> permutedims([1, 2, 3, 4])
+julia> v = [1, 2, 3, 4]
+4-element Vector{Int64}:
+ 1
+ 2
+ 3
+ 4
+
+julia> p = permutedims(v)
 1×4 Matrix{Int64}:
  1  2  3  4
 
+julia> r = transpose(v)
+1×4 transpose(::Vector{Int64}) with eltype Int64:
+ 1  2  3  4
+
+julia> p == r
+true
+
+julia> typeof(r)
+Transpose{Int64, Vector{Int64}}
+
+julia> p[1] = 5; r[2] = 6; # mutating p or r also changes v
+
+julia> v # shares memory with both p and r
+4-element Vector{Int64}:
+ 5
+ 6
+ 3
+ 4
+```
+However, `permutedims` produces results that differ from `transpose`
+for vectors whose elements are themselves numeric matrices:
+```jldoctest; setup = :(using LinearAlgebra)
 julia> V = [[[1 2; 3 4]]; [[5 6; 7 8]]]
 2-element Vector{Matrix{Int64}}:
  [1 2; 3 4]
@@ -274,11 +343,21 @@ end
     P
 end
 
-function Base._mapreduce_dim(f, op, init::Base._InitialValue, A::PermutedDimsArray, dims::Colon)
+const CommutativeOps = Union{typeof(+),typeof(Base.add_sum),typeof(min),typeof(max),typeof(Base._extrema_rf),typeof(|),typeof(&)}
+
+function Base._mapreduce_dim(f, op::CommutativeOps, init::Base._InitialValue, A::PermutedDimsArray, dims::Colon)
+    Base._mapreduce_dim(f, op, init, parent(A), dims)
+end
+function Base._mapreduce_dim(f::typeof(identity), op::Union{typeof(Base.mul_prod),typeof(*)}, init::Base._InitialValue, A::PermutedDimsArray{<:Union{Real,Complex}}, dims::Colon)
     Base._mapreduce_dim(f, op, init, parent(A), dims)
 end
 
-function Base.mapreducedim!(f, op, B::AbstractArray{T,N}, A::PermutedDimsArray{T,N,perm,iperm}) where {T,N,perm,iperm}
+function Base.mapreducedim!(f, op::CommutativeOps, B::AbstractArray{T,N}, A::PermutedDimsArray{S,N,perm,iperm}) where {T,S,N,perm,iperm}
+    C = PermutedDimsArray{T,N,iperm,perm,typeof(B)}(B) # make the inverse permutation for the output
+    Base.mapreducedim!(f, op, C, parent(A))
+    B
+end
+function Base.mapreducedim!(f::typeof(identity), op::Union{typeof(Base.mul_prod),typeof(*)}, B::AbstractArray{T,N}, A::PermutedDimsArray{<:Union{Real,Complex},N,perm,iperm}) where {T,N,perm,iperm}
     C = PermutedDimsArray{T,N,iperm,perm,typeof(B)}(B) # make the inverse permutation for the output
     Base.mapreducedim!(f, op, C, parent(A))
     B
diff --git a/base/pointer.jl b/base/pointer.jl
index 60db18f2ca855..86513c076ade6 100644
--- a/base/pointer.jl
+++ b/base/pointer.jl
@@ -20,14 +20,14 @@ const C_NULL = bitcast(Ptr{Cvoid}, 0)
 # TODO: deprecate these conversions. C doesn't even allow them.
 
 # pointer to integer
-convert(::Type{T}, x::Ptr) where {T<:Integer} = T(UInt(x))
+convert(::Type{T}, x::Ptr) where {T<:Integer} = T(UInt(x))::T
 
 # integer to pointer
 convert(::Type{Ptr{T}}, x::Union{Int,UInt}) where {T} = Ptr{T}(x)
 
 # pointer to pointer
 convert(::Type{Ptr{T}}, p::Ptr{T}) where {T} = p
-convert(::Type{Ptr{T}}, p::Ptr) where {T} = bitcast(Ptr{T}, p)
+convert(::Type{Ptr{T}}, p::Ptr) where {T} = bitcast(Ptr{T}, p)::Ptr{T}
 
 # object to pointer (when used with ccall)
 
@@ -54,17 +54,39 @@ See also [`cconvert`](@ref)
 """
 function unsafe_convert end
 
+# convert strings to String etc. to pass as pointers
+cconvert(::Type{Ptr{UInt8}}, s::AbstractString) = String(s)
+cconvert(::Type{Ptr{Int8}}, s::AbstractString) = String(s)
 unsafe_convert(::Type{Ptr{UInt8}}, x::Symbol) = ccall(:jl_symbol_name, Ptr{UInt8}, (Any,), x)
 unsafe_convert(::Type{Ptr{Int8}}, x::Symbol) = ccall(:jl_symbol_name, Ptr{Int8}, (Any,), x)
 unsafe_convert(::Type{Ptr{UInt8}}, s::String) = ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s)
 unsafe_convert(::Type{Ptr{Int8}}, s::String) = ccall(:jl_string_ptr, Ptr{Int8}, (Any,), s)
-# convert strings to String etc. to pass as pointers
-cconvert(::Type{Ptr{UInt8}}, s::AbstractString) = String(s)
-cconvert(::Type{Ptr{Int8}}, s::AbstractString) = String(s)
 
-unsafe_convert(::Type{Ptr{T}}, a::Array{T}) where {T} = ccall(:jl_array_ptr, Ptr{T}, (Any,), a)
+cconvert(::Type{<:Ptr}, a::Array) = getfield(a, :ref)
 unsafe_convert(::Type{Ptr{S}}, a::AbstractArray{T}) where {S,T} = convert(Ptr{S}, unsafe_convert(Ptr{T}, a))
 unsafe_convert(::Type{Ptr{T}}, a::AbstractArray{T}) where {T} = error("conversion to pointer not defined for $(typeof(a))")
+# TODO: add this deprecation to give a better error:
+# cconvert(::Type{<:Ptr}, a::AbstractArray) = error("conversion to pointer not defined for $(typeof(a))")
+# unsafe_convert(::Type{Ptr{T}}, a::AbstractArray{T}) where {T} = error("missing call to cconvert for call to unsafe_convert for AbstractArray")
+
+cconvert(::Type{<:Ptr}, a::GenericMemory) = a
+unsafe_convert(::Type{Ptr{Cvoid}}, a::GenericMemory{T}) where {T} = getfield(a, :ptr)
+unsafe_convert(::Type{Ptr{T}}, a::GenericMemory) where {T} = convert(Ptr{T}, getfield(a, :ptr))
+
+function unsafe_convert(::Type{Ptr{Cvoid}}, a::GenericMemoryRef{<:Any,T,Core.CPU}) where {T}
+    mem = getfield(a, :mem)
+    offset = getfield(a, :ptr_or_offset)
+    MemT = typeof(mem)
+    arrayelem = datatype_arrayelem(MemT)
+    elsz = datatype_layoutsize(MemT)
+    isboxed = 1; isunion = 2
+    if arrayelem == isunion || elsz == 0
+        offset = UInt(offset) * elsz
+        offset += unsafe_convert(Ptr{Cvoid}, mem)
+    end
+    return offset
+end
+unsafe_convert(::Type{Ptr{T}}, a::GenericMemoryRef) where {T} = convert(Ptr{T}, unsafe_convert(Ptr{Cvoid}, a))
 
 # unsafe pointer to array conversions
 """
@@ -92,37 +114,164 @@ function unsafe_wrap(::Union{Type{Array},Type{Array{T}},Type{Array{T,1}}},
     ccall(:jl_ptr_to_array_1d, Array{T,1},
           (Any, Ptr{Cvoid}, Csize_t, Cint), Array{T,1}, p, d, own)
 end
-unsafe_wrap(Atype::Type, p::Ptr, dims::NTuple{N,<:Integer}; own::Bool = false) where {N} =
+function unsafe_wrap(::Union{Type{GenericMemory{kind,<:Any,Core.CPU}},Type{GenericMemory{kind,T,Core.CPU}}},
+                     p::Ptr{T}, dims::Tuple{Int}; own::Bool = false) where {kind,T}
+    ccall(:jl_ptr_to_genericmemory, Ref{GenericMemory{kind,T,Core.CPU}},
+          (Any, Ptr{Cvoid}, Csize_t, Cint), GenericMemory{kind,T,Core.CPU}, p, dim[1], own)
+end
+function unsafe_wrap(::Union{Type{GenericMemory{kind,<:Any,Core.CPU}},Type{GenericMemory{kind,T,Core.CPU}}},
+                     p::Ptr{T}, d::Integer; own::Bool = false) where {kind,T}
+    ccall(:jl_ptr_to_genericmemory, Ref{GenericMemory{kind,T,Core.CPU}},
+          (Any, Ptr{Cvoid}, Csize_t, Cint), GenericMemory{kind,T,Core.CPU}, p, d, own)
+end
+unsafe_wrap(Atype::Union{Type{Array},Type{Array{T}},Type{Array{T,N}},Type{GenericMemory{kind,<:Any,Core.CPU}},Type{GenericMemory{kind,T,Core.CPU}}} where {kind},
+            p::Ptr{T}, dims::NTuple{N,<:Integer}; own::Bool = false) where {T,N} =
     unsafe_wrap(Atype, p, convert(Tuple{Vararg{Int}}, dims), own = own)
 
+
 """
     unsafe_load(p::Ptr{T}, i::Integer=1)
+    unsafe_load(p::Ptr{T}, order::Symbol)
+    unsafe_load(p::Ptr{T}, i::Integer, order::Symbol)
 
 Load a value of type `T` from the address of the `i`th element (1-indexed) starting at `p`.
-This is equivalent to the C expression `p[i-1]`.
+This is equivalent to the C expression `p[i-1]`. Optionally, an atomic memory ordering can
+be provided.
 
 The `unsafe` prefix on this function indicates that no validation is performed on the
 pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring
 that referenced memory is not freed or garbage collected while invoking this function.
 Incorrect usage may segfault your program or return garbage answers. Unlike C, dereferencing
 memory region allocated as different type may be valid provided that the types are compatible.
+
+!!! compat "Julia 1.10"
+     The `order` argument is available as of Julia 1.10.
+
+See also: [`atomic`](@ref)
 """
 unsafe_load(p::Ptr, i::Integer=1) = pointerref(p, Int(i), 1)
+unsafe_load(p::Ptr, order::Symbol) = atomic_pointerref(p, order)
+function unsafe_load(p::Ptr, i::Integer, order::Symbol)
+    unsafe_load(p + (elsize(typeof(p)) * (Int(i) - 1)), order)
+end
 
 """
     unsafe_store!(p::Ptr{T}, x, i::Integer=1)
+    unsafe_store!(p::Ptr{T}, x, order::Symbol)
+    unsafe_store!(p::Ptr{T}, x, i::Integer, order::Symbol)
 
 Store a value of type `T` to the address of the `i`th element (1-indexed) starting at `p`.
-This is equivalent to the C expression `p[i-1] = x`.
+This is equivalent to the C expression `p[i-1] = x`. Optionally, an atomic memory ordering
+can be provided.
 
 The `unsafe` prefix on this function indicates that no validation is performed on the
 pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring
 that referenced memory is not freed or garbage collected while invoking this function.
 Incorrect usage may segfault your program. Unlike C, storing memory region allocated as
 different type may be valid provided that that the types are compatible.
+
+!!! compat "Julia 1.10"
+     The `order` argument is available as of Julia 1.10.
+
+See also: [`atomic`](@ref)
 """
 unsafe_store!(p::Ptr{Any}, @nospecialize(x), i::Integer=1) = pointerset(p, x, Int(i), 1)
 unsafe_store!(p::Ptr{T}, x, i::Integer=1) where {T} = pointerset(p, convert(T,x), Int(i), 1)
+unsafe_store!(p::Ptr{T}, x, order::Symbol) where {T} = atomic_pointerset(p, x isa T ? x : convert(T,x), order)
+function unsafe_store!(p::Ptr, x, i::Integer, order::Symbol)
+    unsafe_store!(p + (elsize(typeof(p)) * (Int(i) - 1)), x, order)
+end
+
+"""
+    unsafe_modify!(p::Ptr{T}, op, x, [order::Symbol]) -> Pair
+
+These atomically perform the operations to get and set a memory address after applying
+the function `op`. If supported by the hardware (for example, atomic increment), this may be
+optimized to the appropriate hardware instruction, otherwise its execution will be
+similar to:
+
+    y = unsafe_load(p)
+    z = op(y, x)
+    unsafe_store!(p, z)
+    return y => z
+
+The `unsafe` prefix on this function indicates that no validation is performed on the
+pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring
+that referenced memory is not freed or garbage collected while invoking this function.
+Incorrect usage may segfault your program.
+
+!!! compat "Julia 1.10"
+     This function requires at least Julia 1.10.
+
+See also: [`modifyproperty!`](@ref Base.modifyproperty!), [`atomic`](@ref)
+"""
+function unsafe_modify!(p::Ptr, op, x, order::Symbol=:not_atomic)
+    return atomic_pointermodify(p, op, x, order)
+end
+
+"""
+    unsafe_replace!(p::Ptr{T}, expected, desired,
+                   [success_order::Symbol[, fail_order::Symbol=success_order]]) -> (; old, success::Bool)
+
+These atomically perform the operations to get and conditionally set a memory address to
+a given value. If supported by the hardware, this may be optimized to the appropriate
+hardware instruction, otherwise its execution will be similar to:
+
+    y = unsafe_load(p, fail_order)
+    ok = y === expected
+    if ok
+        unsafe_store!(p, desired, success_order)
+    end
+    return (; old = y, success = ok)
+
+The `unsafe` prefix on this function indicates that no validation is performed on the
+pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring
+that referenced memory is not freed or garbage collected while invoking this function.
+Incorrect usage may segfault your program.
+
+!!! compat "Julia 1.10"
+     This function requires at least Julia 1.10.
+
+See also: [`replaceproperty!`](@ref Base.replaceproperty!), [`atomic`](@ref)
+"""
+function unsafe_replace!(p::Ptr{T}, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order) where {T}
+    @inline
+    xT = desired isa T ? desired : convert(T, desired)
+    return atomic_pointerreplace(p, expected, xT, success_order, fail_order)
+end
+function unsafe_replace!(p::Ptr{Any}, @nospecialize(expected), @nospecialize(desired), success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+    return atomic_pointerreplace(p, expected, desired, success_order, fail_order)
+end
+
+"""
+    unsafe_swap!(p::Ptr{T}, x, [order::Symbol])
+
+These atomically perform the operations to simultaneously get and set a memory address.
+If supported by the hardware, this may be optimized to the appropriate hardware
+instruction, otherwise its execution will be similar to:
+
+    y = unsafe_load(p)
+    unsafe_store!(p, x)
+    return y
+
+The `unsafe` prefix on this function indicates that no validation is performed on the
+pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring
+that referenced memory is not freed or garbage collected while invoking this function.
+Incorrect usage may segfault your program.
+
+!!! compat "Julia 1.10"
+     This function requires at least Julia 1.10.
+
+See also: [`swapproperty!`](@ref Base.swapproperty!), [`atomic`](@ref)
+"""
+function unsafe_swap!(p::Ptr{Any}, x, order::Symbol=:not_atomic)
+    return atomic_pointerswap(p, x, order)
+end
+function unsafe_swap!(p::Ptr{T}, x, order::Symbol=:not_atomic) where {T}
+    @inline
+    xT = x isa T ? x : convert(T, x)
+    return atomic_pointerswap(p, xT, order)
+end
 
 # convert a raw Ptr to an object reference, and vice-versa
 """
diff --git a/base/process.jl b/base/process.jl
index 42bf6335b071c..24e676138bfae 100644
--- a/base/process.jl
+++ b/base/process.jl
@@ -6,11 +6,12 @@ mutable struct Process <: AbstractPipe
     in::IO
     out::IO
     err::IO
+    syncd::Vector{Task}
     exitcode::Int64
     termsignal::Int32
     exitnotify::ThreadSynchronizer
-    function Process(cmd::Cmd, handle::Ptr{Cvoid})
-        this = new(cmd, handle, devnull, devnull, devnull,
+    function Process(cmd::Cmd, handle::Ptr{Cvoid}, syncd::Vector{Task})
+        this = new(cmd, handle, devnull, devnull, devnull, syncd,
                    typemin(fieldtype(Process, :exitcode)),
                    typemin(fieldtype(Process, :termsignal)),
                    ThreadSynchronizer())
@@ -35,6 +36,15 @@ end
 pipe_reader(p::ProcessChain) = p.out
 pipe_writer(p::ProcessChain) = p.in
 
+# a lightweight pair of a child OS_HANDLE and associated Task that will
+# complete only after all content has been read from it for synchronizing
+# state without the kernel to aide
+struct SyncCloseFD
+    fd
+    t::Task
+end
+rawhandle(io::SyncCloseFD) = rawhandle(io.fd)
+
 # release ownership of the libuv handle
 function uvfinalize(proc::Process)
     if proc.handle != C_NULL
@@ -74,8 +84,8 @@ function _uv_hook_close(proc::Process)
     nothing
 end
 
-const SpawnIO  = Union{IO, RawFD, OS_HANDLE}
-const SpawnIOs = Vector{SpawnIO} # convenience name for readability
+const SpawnIO  = Union{IO, RawFD, OS_HANDLE, SyncCloseFD} # internal copy of Redirectable, removing FileRedirect and adding SyncCloseFD
+const SpawnIOs = Memory{SpawnIO} # convenience name for readability (used for dispatch also to clearly distinguish from Vector{Redirectable})
 
 function as_cpumask(cpus::Vector{UInt16})
     n = max(Int(maximum(cpus)), Int(ccall(:uv_cpumask_size, Cint, ())))
@@ -100,6 +110,7 @@ end
                 error("invalid spawn handle $h from $io")
             end
             for io in stdio]
+        syncd = Task[io.t for io in stdio if io isa SyncCloseFD]
         handle = Libc.malloc(_sizeof_uv_process)
         disassociate_julia_struct(handle)
         (; exec, flags, env, dir) = cmd
@@ -117,7 +128,7 @@ end
             cpumask === nothing ? 0 : length(cpumask),
             @cfunction(uv_return_spawn, Cvoid, (Ptr{Cvoid}, Int64, Int32)))
         if err == 0
-            pp = Process(cmd, handle)
+            pp = Process(cmd, handle, syncd)
             associate_julia_struct(handle, pp)
         else
             ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), handle) # will call free on handle eventually
@@ -130,23 +141,24 @@ end
     return pp
 end
 
-_spawn(cmds::AbstractCmd) = _spawn(cmds, SpawnIO[])
+_spawn(cmds::AbstractCmd) = _spawn(cmds, SpawnIOs())
 
-# optimization: we can spawn `Cmd` directly without allocating the ProcessChain
-function _spawn(cmd::Cmd, stdios::SpawnIOs)
-    isempty(cmd.exec) && throw(ArgumentError("cannot spawn empty command"))
+function _spawn(cmd::AbstractCmd, stdios::Vector{Redirectable})
     pp = setup_stdios(stdios) do stdios
-        return _spawn_primitive(cmd.exec[1], cmd, stdios)
+        return _spawn(cmd, stdios)
     end
     return pp
 end
 
+# optimization: we can spawn `Cmd` directly without allocating the ProcessChain
+function _spawn(cmd::Cmd, stdios::SpawnIOs)
+    isempty(cmd.exec) && throw(ArgumentError("cannot spawn empty command"))
+    return _spawn_primitive(cmd.exec[1], cmd, stdios)
+end
+
 # assume that having a ProcessChain means that the stdio are setup
 function _spawn(cmds::AbstractCmd, stdios::SpawnIOs)
-    pp = setup_stdios(stdios) do stdios
-        return _spawn(cmds, stdios, ProcessChain())
-    end
-    return pp
+    return _spawn(cmds, stdios, ProcessChain())
 end
 
 # helper function for making a copy of a SpawnIOs, with replacement
@@ -212,7 +224,7 @@ end
 
 
 # open the child end of each element of `stdios`, and initialize the parent end
-function setup_stdios(f, stdios::SpawnIOs)
+function setup_stdios(f, stdios::Vector{Redirectable})
     nstdio = length(stdios)
     open_io = SpawnIOs(undef, nstdio)
     close_io = falses(nstdio)
@@ -295,25 +307,26 @@ function setup_stdio(stdio::IO, child_readable::Bool)
     child = child_readable ? rd : wr
     try
         let in = (child_readable ? parent : stdio),
-            out = (child_readable ? stdio : parent)
-            @async try
+            out = (child_readable ? stdio : parent),
+            t = @async try
                 write(in, out)
             catch ex
                 @warn "Process I/O error" exception=(ex, catch_backtrace())
+                rethrow()
             finally
                 close(parent)
-                child_readable || closewrite(stdio)
             end
+            return (SyncCloseFD(child, t), true)
         end
     catch
         close_pipe_sync(child)
         rethrow()
     end
-    return (child, true)
 end
 
-close_stdio(stdio::OS_HANDLE) = close_pipe_sync(stdio)
 close_stdio(stdio) = close(stdio)
+close_stdio(stdio::OS_HANDLE) = close_pipe_sync(stdio)
+close_stdio(stdio::SyncCloseFD) = close_stdio(stdio.fd)
 
 # INTERNAL
 # pad out stdio to have at least three elements,
@@ -325,19 +338,19 @@ close_stdio(stdio) = close(stdio)
 #   - An Filesystem.File or IOStream object to redirect the output to
 #   - A FileRedirect, containing a string specifying a filename to be opened for the child
 
-spawn_opts_swallow(stdios::StdIOSet) = SpawnIO[stdios...]
-spawn_opts_inherit(stdios::StdIOSet) = SpawnIO[stdios...]
+spawn_opts_swallow(stdios::StdIOSet) = Redirectable[stdios...]
+spawn_opts_inherit(stdios::StdIOSet) = Redirectable[stdios...]
 spawn_opts_swallow(in::Redirectable=devnull, out::Redirectable=devnull, err::Redirectable=devnull) =
-    SpawnIO[in, out, err]
+    Redirectable[in, out, err]
 # pass original descriptors to child processes by default, because we might
 # have already exhausted and closed the libuv object for our standard streams.
 # ref issue #8529
 spawn_opts_inherit(in::Redirectable=RawFD(0), out::Redirectable=RawFD(1), err::Redirectable=RawFD(2)) =
-    SpawnIO[in, out, err]
+    Redirectable[in, out, err]
 
 function eachline(cmd::AbstractCmd; keep::Bool=false)
     out = PipeEndpoint()
-    processes = _spawn(cmd, SpawnIO[devnull, out, stderr])
+    processes = _spawn(cmd, Redirectable[devnull, out, stderr])
     # if the user consumes all the data, also check process exit status for success
     ondone = () -> (success(processes) || pipeline_error(processes); nothing)
     return EachLine(out, keep=keep, ondone=ondone)::EachLine
@@ -385,20 +398,20 @@ function open(cmds::AbstractCmd, stdio::Redirectable=devnull; write::Bool=false,
         stdio === devnull || throw(ArgumentError("no stream can be specified for `stdio` in read-write mode"))
         in = PipeEndpoint()
         out = PipeEndpoint()
-        processes = _spawn(cmds, SpawnIO[in, out, stderr])
+        processes = _spawn(cmds, Redirectable[in, out, stderr])
         processes.in = in
         processes.out = out
     elseif read
         out = PipeEndpoint()
-        processes = _spawn(cmds, SpawnIO[stdio, out, stderr])
+        processes = _spawn(cmds, Redirectable[stdio, out, stderr])
         processes.out = out
     elseif write
         in = PipeEndpoint()
-        processes = _spawn(cmds, SpawnIO[in, stdio, stderr])
+        processes = _spawn(cmds, Redirectable[in, stdio, stderr])
         processes.in = in
     else
         stdio === devnull || throw(ArgumentError("no stream can be specified for `stdio` in no-access mode"))
-        processes = _spawn(cmds, SpawnIO[devnull, devnull, stderr])
+        processes = _spawn(cmds, Redirectable[devnull, devnull, stderr])
     end
     return processes
 end
@@ -413,14 +426,20 @@ process failed, or if the process attempts to print anything to stdout.
 """
 function open(f::Function, cmds::AbstractCmd, args...; kwargs...)
     P = open(cmds, args...; kwargs...)
-    function waitkill(P::Process)
+    function waitkill(P::Union{Process,ProcessChain})
         close(P)
-        # 0.1 seconds after we hope it dies (from closing stdio),
-        # we kill the process with SIGTERM (15)
-        local t = Timer(0.1) do t
+        # shortly after we hope it starts cleanup and dies (from closing
+        # stdio), we kill the process with SIGTERM (15) so that we can proceed
+        # with throwing the error and hope it will exit soon from that
+        local t = Timer(2) do t
             process_running(P) && kill(P)
         end
-        wait(P)
+        # pass false to indicate that we do not care about data-races on the
+        # Julia stdio objects after this point, since we already know this is
+        # an error path and the state of them is fairly unpredictable anyways
+        # in that case. Since we closed P some of those should come crumbling
+        # down already, and we don't want to throw that error here either.
+        wait(P, false)
         close(t)
     end
     ret = try
@@ -430,10 +449,23 @@ function open(f::Function, cmds::AbstractCmd, args...; kwargs...)
         rethrow()
     end
     close(P.in)
-    if !eof(P.out)
+    closestdio = @async begin
+        # wait for P to complete (including sync'd), then mark the output streams for EOF (if applicable to that stream type)
+        wait(P)
+        err = P.err
+        applicable(closewrite, err) && closewrite(err)
+        out = P.out
+        applicable(closewrite, out) && closewrite(out)
+        nothing
+    end
+    # now verify that the output stream is at EOF, and the user didn't fail to consume it successfully
+    # (we do not currently verify the user dealt with the stderr stream)
+    if !(eof(P.out)::Bool)
         waitkill(P)
         throw(_UVError("open(do)", UV_EPIPE))
     end
+    # make sure to closestdio is completely done to avoid data-races later
+    wait(closestdio)
     success(P) || pipeline_error(P)
     return ret
 end
@@ -447,7 +479,7 @@ function read(cmd::AbstractCmd)
     procs = open(cmd, "r", devnull)
     bytes = read(procs.out)
     success(procs) || pipeline_error(procs)
-    return bytes
+    return bytes::Vector{UInt8}
 end
 
 """
@@ -650,26 +682,31 @@ function process_status(s::Process)
            error("process status error")
 end
 
-function wait(x::Process)
-    process_exited(x) && return
-    iolock_begin()
+function wait(x::Process, syncd::Bool=true)
     if !process_exited(x)
-        preserve_handle(x)
-        lock(x.exitnotify)
-        iolock_end()
-        try
-            wait(x.exitnotify)
-        finally
-            unlock(x.exitnotify)
-            unpreserve_handle(x)
+        iolock_begin()
+        if !process_exited(x)
+            preserve_handle(x)
+            lock(x.exitnotify)
+            iolock_end()
+            try
+                wait(x.exitnotify)
+            finally
+                unlock(x.exitnotify)
+                unpreserve_handle(x)
+            end
+        else
+            iolock_end()
         end
-    else
-        iolock_end()
+    end
+    # and make sure all sync'd Tasks are complete too
+    syncd && for t in x.syncd
+        wait(t)
     end
     nothing
 end
 
-wait(x::ProcessChain) = foreach(wait, x.processes)
+wait(x::ProcessChain, syncd::Bool=true) = foreach(p -> wait(p, syncd), x.processes)
 
 show(io::IO, p::Process) = print(io, "Process(", p.cmd, ", ", process_status(p), ")")
 
diff --git a/base/promotion.jl b/base/promotion.jl
index 39d01fcbbfb42..16769f8566b92 100644
--- a/base/promotion.jl
+++ b/base/promotion.jl
@@ -3,16 +3,26 @@
 ## type join (closest common ancestor, or least upper bound) ##
 
 """
-    typejoin(T, S)
+    typejoin(T, S, ...)
 
-Return the closest common ancestor of `T` and `S`, i.e. the narrowest type from which
-they both inherit.
+Return the closest common ancestor of types `T` and `S`, i.e. the narrowest type from which
+they both inherit. Recurses on additional varargs.
+
+# Examples
+```jldoctest
+julia> typejoin(Int, Float64)
+Real
+
+julia> typejoin(Int, Float64, ComplexF32)
+Number
+```
 """
 typejoin() = Bottom
-typejoin(@nospecialize(t)) = t
-typejoin(@nospecialize(t), ts...) = (@_total_meta; typejoin(t, typejoin(ts...)))
+typejoin(@nospecialize(t)) = (@_nospecializeinfer_meta; t)
+typejoin(@nospecialize(t), ts...) = (@_foldable_meta; @_nospecializeinfer_meta; typejoin(t, typejoin(ts...)))
 function typejoin(@nospecialize(a), @nospecialize(b))
-    @_total_meta
+    @_foldable_meta
+    @_nospecializeinfer_meta
     if isa(a, TypeVar)
         return typejoin(a.ub, b)
     elseif isa(b, TypeVar)
@@ -81,9 +91,9 @@ function typejoin(@nospecialize(a), @nospecialize(b))
     elseif b <: Tuple
         return Any
     end
-    while b !== Any
+    while !(b === Any)
         if a <: b.name.wrapper
-            while a.name !== b.name
+            while !(a.name === b.name)
                 a = supertype(a)::DataType
             end
             if a.name === Type.body.name
@@ -107,9 +117,10 @@ function typejoin(@nospecialize(a), @nospecialize(b))
                 if ai === bi || (isa(ai,Type) && isa(bi,Type) && ai <: bi && bi <: ai)
                     aprimary = aprimary{ai}
                 else
+                    aprimary = aprimary::UnionAll
                     # pushfirst!(vars, aprimary.var)
                     _growbeg!(vars, 1)
-                    arrayset(false, vars, aprimary.var, 1)
+                    vars[1] = aprimary.var
                     aprimary = aprimary.body
                 end
             end
@@ -129,6 +140,7 @@ end
 #          (Core.Compiler.isnotbrokensubtype), use only simple types for `b`
 function typesplit(@nospecialize(a), @nospecialize(b))
     @_foldable_meta
+    @_nospecializeinfer_meta
     if a <: b
         return Bottom
     end
@@ -162,7 +174,12 @@ function promote_typejoin(@nospecialize(a), @nospecialize(b))
     c = typejoin(_promote_typesubtract(a), _promote_typesubtract(b))
     return Union{a, b, c}::Type
 end
-_promote_typesubtract(@nospecialize(a)) = typesplit(a, Union{Nothing, Missing})
+_promote_typesubtract(@nospecialize(a)) =
+    a === Any ? a :
+    a >: Union{Nothing, Missing} ? typesplit(a, Union{Nothing, Missing}) :
+    a >: Nothing ? typesplit(a, Nothing) :
+    a >: Missing ? typesplit(a, Missing) :
+    a
 
 function promote_typejoin_union(::Type{T}) where T
     if T === Union{}
@@ -224,7 +241,8 @@ function full_va_len(p::Core.SimpleVector)
 end
 
 # reduce typejoin over A[i:end]
-function tailjoin(A, i)
+function tailjoin(A::SimpleVector, i::Int)
+    @_foldable_meta
     if i > length(A)
         return unwrapva(A[end])
     end
@@ -308,6 +326,12 @@ it for new types as appropriate.
 function promote_rule end
 
 promote_rule(::Type, ::Type) = Bottom
+# Define some methods to avoid needing to enumerate unrelated possibilities when presented
+# with Type{<:T}, and return a value in general accordance with the result given by promote_type
+promote_rule(::Type{Bottom}, slurp...) = Bottom
+promote_rule(::Type{Bottom}, ::Type{Bottom}, slurp...) = Bottom # not strictly necessary, since the next method would match unambiguously anyways
+promote_rule(::Type{Bottom}, ::Type{T}, slurp...) where {T} = T
+promote_rule(::Type{T}, ::Type{Bottom}, slurp...) where {T} = T
 
 promote_result(::Type,::Type,::Type{T},::Type{S}) where {T,S} = (@inline; promote_type(T,S))
 # If no promote_rule is defined, both directions give Bottom. In that
@@ -320,12 +344,25 @@ promote_result(::Type{T},::Type{S},::Type{Bottom},::Type{Bottom}) where {T,S} =
 Convert all arguments to a common type, and return them all (as a tuple).
 If no arguments can be converted, an error is raised.
 
-See also: [`promote_type`], [`promote_rule`].
+See also: [`promote_type`](@ref), [`promote_rule`](@ref).
 
 # Examples
 ```jldoctest
 julia> promote(Int8(1), Float16(4.5), Float32(4.1))
 (1.0f0, 4.5f0, 4.1f0)
+
+julia> promote_type(Int8, Float16, Float32)
+Float32
+
+julia> reduce(Base.promote_typejoin, (Int8, Float16, Float32))
+Real
+
+julia> promote(1, "x")
+ERROR: promotion of types Int64 and String failed to change any arguments
+[...]
+
+julia> promote_type(Int, String)
+Any
 ```
 """
 function promote end
@@ -444,18 +481,124 @@ else
     _return_type(@nospecialize(f), @nospecialize(t)) = Any
 end
 
+function TupleOrBottom(tt...)
+    any(p -> p === Union{}, tt) && return Union{}
+    return Tuple{tt...}
+end
+
 """
     promote_op(f, argtypes...)
 
 Guess what an appropriate container eltype would be for storing results of
 `f(::argtypes...)`. The guess is in part based on type inference, so can change any time.
 
+Accordingly, return a type `R` such that `f(args...) isa R` where `args isa T`.
+
 !!! warning
     Due to its fragility, use of `promote_op` should be avoided. It is preferable to base
     the container eltype on the type of the actual elements. Only in the absence of any
     elements (for an empty result container), it may be unavoidable to call `promote_op`.
+
+The type `R` obtained from `promote_op` is merely an upper bound. There may exist a stricter
+type `S` such that `f(args...) isa S` for every `args isa T` with `S <: R` and `S != R`.
+Furthermore, the exact type `R` obtained from `promote_op` depends on various factors
+including but not limited to the exact Julia version used, packages loaded, and command line
+options. As such, when used in publicly registered packages, **it is the package authors'
+responsibility to ensure that the API guarantees provided by the package do not depend on
+the exact type `R` obtained from `promote_op`.**
+
+Additionally, the result may return overly exact types, such as `DataType`, `Type`, or
+`Union{...}`, while the desired inputs or outputs may be different from those. The internal
+`promote_typejoin_union` function may be helpful to improve the result in some of these
+cases.
+
+# Extended help
+
+## Examples
+
+The following function is an invalid use-case of `promote_op`.
+
+```julia
+\"""
+    invalid_usecase1(f, xs::AbstractArray) -> ys::Array
+
+Return an array `ys` such that `vec(ys)` is `isequal`-equivalent to
+
+    [f(xs[1]), f(xs[2]), ..., f(xs[end])]
+\"""
+function invalid_usecase1(f, xs)
+    R = promote_op(f, eltype(xs))
+    ys = similar(xs, R)
+    for i in eachindex(xs, ys)
+        ys[i] = f(xs[i])
+    end
+    return ys
+end
+```
+
+This is because the value obtained through `eltype(invalid_usecase1(f, xs))` depends on
+exactly what `promote_op` returns. It may be improved by re-computing the element type
+before returning the result.
+
+```julia
+function valid_usecase1(f, xs)
+    R = promote_typejoin_union(promote_op(f, eltype(xs)))
+    ys = similar(xs, R)
+    S = Union{}
+    for i in eachindex(xs, ys)
+        ys[i] = f(xs[i])
+        S = promote_type(S, typeof(ys[i]))
+    end
+    if S != R
+        zs = similar(xs, S)
+        copyto!(zs, ys)
+        return zs
+    end
+    return ys
+end
+```
+
+Note that using [`isconcretetype`](@ref) on the result is not enough to safely use
+`promote_op`. The following function is another invalid use-case of `promote_op`.
+
+```julia
+function invalid_usecase2(f, xs)
+    R = promote_op(f, eltype(xs))
+    if isconcretetype(R)
+        ys = similar(xs, R)
+    else
+        ys = similar(xs, Any)
+    end
+    for i in eachindex(xs, ys)
+        ys[i] = f(xs[i])
+    end
+    return ys
+end
+```
+
+This is because whether or not the caller gets `Any` element type depends on if `promote_op`
+can infer a concrete return type of the given function. A fix similar to `valid_usecase1`
+can be used.
+
+*Technically*, another possible fix for `invalid_usecase1` and `invalid_usecase2` is to
+loosen the API guarantee:
+
+>     another_valid_usecase1(f, xs::AbstractArray) -> ys::Array
+>
+> Return an array `ys` such that every element in `xs` with the same index
+> is mapped with `f`.
+>
+> The element type of `ys` is _undefined_. It must not be used with generic
+> functions whose behavior depend on the element type of `ys`.
+
+However, it is discouraged to define such unconventional API guarantees.
 """
-promote_op(f, S::Type...) = _return_type(f, Tuple{S...})
+function promote_op(f, S::Type...)
+    argT = TupleOrBottom(S...)
+    argT === Union{} && return Union{}
+    return _return_type(f, argT)
+end
+
 
 ## catch-alls to prevent infinite recursion when definitions are missing ##
 
@@ -476,7 +619,7 @@ xor(x::T, y::T) where {T<:Integer} = no_op_err("xor", T)
 
 (==)(x::T, y::T) where {T<:Number} = x === y
 (< )(x::T, y::T) where {T<:Real} = no_op_err("<" , T)
-(<=)(x::T, y::T) where {T<:Real} = no_op_err("<=", T)
+(<=)(x::T, y::T) where {T<:Real} = (x == y) | (x < y)
 
 rem(x::T, y::T) where {T<:Real} = no_op_err("rem", T)
 mod(x::T, y::T) where {T<:Real} = no_op_err("mod", T)
diff --git a/base/range.jl b/base/range.jl
index 10fa753d7538a..e7d57fa3c7e85 100644
--- a/base/range.jl
+++ b/base/range.jl
@@ -24,17 +24,27 @@
 _colon(::Ordered, ::Any, start::T, step, stop::T) where {T} = StepRange(start, step, stop)
 # for T<:Union{Float16,Float32,Float64} see twiceprecision.jl
 _colon(::Ordered, ::ArithmeticRounds, start::T, step, stop::T) where {T} =
-    StepRangeLen(start, step, floor(Integer, (stop-start)/step)+1)
+    StepRangeLen(start, step, convert(Integer, fld(stop - start, step)) + 1)
 _colon(::Any, ::Any, start::T, step, stop::T) where {T} =
-    StepRangeLen(start, step, floor(Integer, (stop-start)/step)+1)
+    StepRangeLen(start, step, convert(Integer, fld(stop - start, step)) + 1)
 
 """
     (:)(start, [step], stop)
 
-Range operator. `a:b` constructs a range from `a` to `b` with a step size of 1 (a [`UnitRange`](@ref))
-, and `a:s:b` is similar but uses a step size of `s` (a [`StepRange`](@ref)).
+Range operator. `a:b` constructs a range from `a` to `b` with a step size
+equal to 1, which produces:
 
-`:` is also used in indexing to select whole dimensions, e.g. in `A[:, 1]`.
+* a [`UnitRange`](@ref) when `a` and `b` are integers, or
+* a [`StepRange`](@ref) when `a` and `b` are characters, or
+* a [`StepRangeLen`](@ref) when `a` and/or `b` are floating-point.
+
+`a:s:b` is similar but uses a step size of `s` (a [`StepRange`](@ref) or
+[`StepRangeLen`](@ref)). See also [`range`](@ref) for more control.
+
+The operator `:` is also used in indexing to select whole dimensions, e.g. in `A[:, 1]`.
+
+`:` is also used to [`quote`](@ref) code, e.g. `:(x + y) isa Expr` and `:x isa Symbol`.
+Since `:2 isa Int`, it does *not* create a range in indexing: `v[:2] == v[2] != v[begin:2]`.
 """
 (:)(start::T, step, stop::T) where {T} = _colon(start, step, stop)
 (:)(start::T, step, stop::T) where {T<:Real} = _colon(start, step, stop)
@@ -252,7 +262,7 @@ abstract type AbstractRange{T} <: AbstractArray{T,1} end
 RangeStepStyle(::Type{<:AbstractRange}) = RangeStepIrregular()
 RangeStepStyle(::Type{<:AbstractRange{<:Integer}}) = RangeStepRegular()
 
-convert(::Type{T}, r::AbstractRange) where {T<:AbstractRange} = r isa T ? r : T(r)
+convert(::Type{T}, r::AbstractRange) where {T<:AbstractRange} = r isa T ? r : T(r)::T
 
 ## ordinal ranges
 
@@ -283,7 +293,7 @@ abstract type AbstractUnitRange{T} <: OrdinalRange{T,T} end
 Ranges with elements of type `T` with spacing of type `S`. The step
 between each element is constant, and the range is defined in terms
 of a `start` and `stop` of type `T` and a `step` of type `S`. Neither
-`T` nor `S` should be floating point types. The syntax `a:b:c` with `b > 1`
+`T` nor `S` should be floating point types. The syntax `a:b:c` with `b != 0`
 and `a`, `b`, and `c` all integers creates a `StepRange`.
 
 # Examples
@@ -337,10 +347,11 @@ function steprange_last(start, step, stop)::typeof(stop)
             # (to simplify handling both signed and unsigned T and checking for signed overflow):
             absdiff, absstep = stop > start ? (stop - start, step) : (start - stop, -step)
 
-            # Compute remainder as a nonnegative number:
+            # Compute remainder as a non-negative number:
             if absdiff isa Signed && absdiff < zero(absdiff)
                 # unlikely, but handle the signed overflow case with unsigned rem
-                remain = convert(typeof(absdiff), unsigned(absdiff) % absstep)
+                overflow_case(absdiff, absstep) = (@noinline; convert(typeof(absdiff), unsigned(absdiff) % absstep))
+                remain = overflow_case(absdiff, absstep)
             else
                 remain = convert(typeof(absdiff), absdiff % absstep)
             end
@@ -361,6 +372,7 @@ function steprange_last_empty(start::Integer, step, stop)::typeof(stop)
     end
     return last
 end
+steprange_last_empty(start::Bool, step, stop) = start ⊻ (step > zero(step)) # isnegative(step) ? start : !start
 # For types where x+oneunit(x) may not be well-defined use the user-given value for stop
 steprange_last_empty(start, step, stop) = stop
 
@@ -393,7 +405,11 @@ struct UnitRange{T<:Real} <: AbstractUnitRange{T}
 end
 UnitRange{T}(start, stop) where {T<:Real} = UnitRange{T}(convert(T, start), convert(T, stop))
 UnitRange(start::T, stop::T) where {T<:Real} = UnitRange{T}(start, stop)
-UnitRange(start, stop) = UnitRange(promote(start, stop)...)
+function UnitRange(start, stop)
+    startstop_promoted = promote(start, stop)
+    not_sametype((start, stop), startstop_promoted)
+    UnitRange(startstop_promoted...)
+end
 
 # if stop and start are integral, we know that their difference is a multiple of 1
 unitrange_last(start::Integer, stop::Integer) =
@@ -432,7 +448,7 @@ distinction that the lower limit is guaranteed (by the type system) to
 be 1.
 """
 struct OneTo{T<:Integer} <: AbstractUnitRange{T}
-    stop::T
+    stop::T # invariant: stop >= zero(stop)
     function OneTo{T}(stop) where {T<:Integer}
         throwbool(r)  = (@noinline; throw(ArgumentError("invalid index: $r of type Bool")))
         T === Bool && throwbool(stop)
@@ -448,6 +464,8 @@ struct OneTo{T<:Integer} <: AbstractUnitRange{T}
         T === Bool && throwbool(r)
         return new(max(zero(T), last(r)))
     end
+
+    global unchecked_oneto(stop::Integer) = new{typeof(stop)}(stop)
 end
 OneTo(stop::T) where {T<:Integer} = OneTo{T}(stop)
 OneTo(r::AbstractRange{T}) where {T<:Integer} = OneTo{T}(r)
@@ -464,9 +482,12 @@ A range `r` where `r[i]` produces values of type `T` (in the first
 form, `T` is deduced automatically), parameterized by a `ref`erence
 value, a `step`, and the `len`gth. By default `ref` is the starting
 value `r[1]`, but alternatively you can supply it as the value of
-`r[offset]` for some other index `1 <= offset <= len`. In conjunction
-with `TwicePrecision` this can be used to implement ranges that are
-free of roundoff error.
+`r[offset]` for some other index `1 <= offset <= len`. The syntax `a:b`
+or `a:b:c`, where any of `a`, `b`, or `c` are floating-point numbers, creates a
+`StepRangeLen`.
+
+!!! compat "Julia 1.7"
+    The 4th type parameter `L` requires at least Julia 1.7.
 """
 struct StepRangeLen{T,R,S,L<:Integer} <: AbstractRange{T}
     ref::R       # reference value (might be smallest-magnitude value in the range)
@@ -577,9 +598,10 @@ function show(io::IO, r::LinRange{T}) where {T}
     print(io, "LinRange{")
     show(io, T)
     print(io, "}(")
-    show(io, first(r))
+    ioc = IOContext(io, :typeinfo=>T)
+    show(ioc, first(r))
     print(io, ", ")
-    show(io, last(r))
+    show(ioc, last(r))
     print(io, ", ")
     show(io, length(r))
     print(io, ')')
@@ -684,7 +706,8 @@ step(r::LinRange) = (last(r)-first(r))/r.lendiv
 step_hp(r::StepRangeLen) = r.step
 step_hp(r::AbstractRange) = step(r)
 
-axes(r::AbstractRange) = (oneto(length(r)),)
+# Needed to ensure `has_offset_axes` can constant-fold.
+has_offset_axes(::StepRange) = false
 
 # n.b. checked_length for these is defined iff checked_add and checked_sub are
 # defined between the relevant types
@@ -747,64 +770,66 @@ length(r::OneTo) = Integer(r.stop - zero(r.stop))
 length(r::StepRangeLen) = r.len
 length(r::LinRange) = r.len
 
-let bigints = Union{Int, UInt, Int64, UInt64, Int128, UInt128}
-    global length, checked_length
+let bigints = Union{Int, UInt, Int64, UInt64, Int128, UInt128},
+    smallints = (Int === Int64 ?
+                Union{Int8, UInt8, Int16, UInt16, Int32, UInt32} :
+                Union{Int8, UInt8, Int16, UInt16}),
+    bitints = Union{bigints, smallints}
+    global length, checked_length, firstindex
     # compile optimization for which promote_type(T, Int) == T
     length(r::OneTo{T}) where {T<:bigints} = r.stop
     # slightly more accurate length and checked_length in extreme cases
     # (near typemax) for types with known `unsigned` functions
     function length(r::OrdinalRange{T}) where T<:bigints
         s = step(r)
-        isempty(r) && return zero(T)
         diff = last(r) - first(r)
+        isempty(r) && return zero(diff)
         # if |s| > 1, diff might have overflowed, but unsigned(diff)÷s should
         # therefore still be valid (if the result is representable at all)
         # n.b. !(s isa T)
         if s isa Unsigned || -1 <= s <= 1 || s == -s
-            a = div(diff, s) % T
+            a = div(diff, s) % typeof(diff)
         elseif s < 0
-            a = div(unsigned(-diff), -s) % T
+            a = div(unsigned(-diff), -s) % typeof(diff)
         else
-            a = div(unsigned(diff), s) % T
+            a = div(unsigned(diff), s) % typeof(diff)
         end
-        return a + oneunit(T)
+        return a + oneunit(a)
     end
     function checked_length(r::OrdinalRange{T}) where T<:bigints
         s = step(r)
-        isempty(r) && return zero(T)
         stop, start = last(r), first(r)
+        ET = promote_type(typeof(stop), typeof(start))
+        isempty(r) && return zero(ET)
         # n.b. !(s isa T)
         if s > 1
             diff = stop - start
-            a = convert(T, div(unsigned(diff), s))
+            a = convert(ET, div(unsigned(diff), s))
         elseif s < -1
             diff = start - stop
-            a = convert(T, div(unsigned(diff), -s))
+            a = convert(ET, div(unsigned(diff), -s))
         elseif s > 0
-            a = div(checked_sub(stop, start), s)
+            a = convert(ET, div(checked_sub(stop, start), s))
         else
-            a = div(checked_sub(start, stop), -s)
+            a = convert(ET, div(checked_sub(start, stop), -s))
         end
-        return checked_add(convert(T, a), oneunit(T))
+        return checked_add(a, oneunit(a))
     end
-end
+    firstindex(r::StepRange{<:bigints,<:bitints}) = one(last(r)-first(r))
 
-# some special cases to favor default Int type
-let smallints = (Int === Int64 ?
-                Union{Int8, UInt8, Int16, UInt16, Int32, UInt32} :
-                Union{Int8, UInt8, Int16, UInt16})
-    global length, checked_length
-    # n.b. !(step isa T)
+    # some special cases to favor default Int type
     function length(r::OrdinalRange{<:smallints})
         s = step(r)
         isempty(r) && return 0
-        return div(Int(last(r)) - Int(first(r)), s) + 1
+        # n.b. !(step isa T)
+        return Int(div(Int(last(r)) - Int(first(r)), s)) + 1
     end
     length(r::AbstractUnitRange{<:smallints}) = Int(last(r)) - Int(first(r)) + 1
     length(r::OneTo{<:smallints}) = Int(r.stop)
     checked_length(r::OrdinalRange{<:smallints}) = length(r)
     checked_length(r::AbstractUnitRange{<:smallints}) = length(r)
     checked_length(r::OneTo{<:smallints}) = length(r)
+    firstindex(::StepRange{<:smallints,<:bitints}) = 1
 end
 
 first(r::OrdinalRange{T}) where {T} = convert(T, r.start)
@@ -881,11 +906,20 @@ end
 
 ## indexing
 
+function isassigned(r::AbstractRange, i::Integer)
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+    firstindex(r) <= i <= lastindex(r)
+end
+
+# `_getindex` is like `getindex` but does not check if `i isa Bool`
+function _getindex(v::AbstractRange, i::Integer)
+    @boundscheck checkbounds(v, i)
+    unsafe_getindex(v, i)
+end
+
 _in_unit_range(v::UnitRange, val, i::Integer) = i > 0 && val <= v.stop && val >= v.start
 
-function getindex(v::UnitRange{T}, i::Integer) where T
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+function _getindex(v::UnitRange{T}, i::Integer) where T
     val = convert(T, v.start + (i - oneunit(i)))
     @boundscheck _in_unit_range(v, val, i) || throw_boundserror(v, i)
     val
@@ -894,64 +928,38 @@ end
 const OverflowSafe = Union{Bool,Int8,Int16,Int32,Int64,Int128,
                            UInt8,UInt16,UInt32,UInt64,UInt128}
 
-function getindex(v::UnitRange{T}, i::Integer) where {T<:OverflowSafe}
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+function _getindex(v::UnitRange{T}, i::Integer) where {T<:OverflowSafe}
     val = v.start + (i - oneunit(i))
     @boundscheck _in_unit_range(v, val, i) || throw_boundserror(v, i)
     val % T
 end
 
-function getindex(v::OneTo{T}, i::Integer) where T
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    @boundscheck ((i > 0) & (i <= v.stop)) || throw_boundserror(v, i)
-    convert(T, i)
-end
-
-function getindex(v::AbstractRange{T}, i::Integer) where T
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    ret = convert(T, first(v) + (i - oneunit(i))*step_hp(v))
-    ok = ifelse(step(v) > zero(step(v)),
-                (ret <= last(v)) & (ret >= first(v)),
-                (ret <= first(v)) & (ret >= last(v)))
-    @boundscheck ((i > 0) & ok) || throw_boundserror(v, i)
-    ret
-end
-
-function getindex(r::Union{StepRangeLen,LinRange}, i::Integer)
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    @boundscheck checkbounds(r, i)
-    unsafe_getindex(r, i)
+let BitInteger64 = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64} # for bootstrapping
+    function checkbounds(::Type{Bool}, v::StepRange{<:BitInteger64, <:BitInteger64}, i::BitInteger64)
+        res = widemul(step(v), i-oneunit(i)) + first(v)
+        (0 < i) & ifelse(0 < step(v), res <= last(v), res >= last(v))
+    end
 end
 
-# This is separate to make it useful even when running with --check-bounds=yes
+# unsafe_getindex is separate to make it useful even when running with --check-bounds=yes
+# it assumes the index is inbounds but does not segfault even if the index is out of bounds.
+# it does not check if the index isa bool.
+unsafe_getindex(v::OneTo{T}, i::Integer) where T = convert(T, i)
+unsafe_getindex(v::AbstractRange{T}, i::Integer) where T = convert(T, first(v) + (i - oneunit(i))*step_hp(v))
 function unsafe_getindex(r::StepRangeLen{T}, i::Integer) where T
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    u = i - r.offset
+    u = oftype(r.offset, i) - r.offset
     T(r.ref + u*r.step)
 end
-
-function _getindex_hiprec(r::StepRangeLen, i::Integer)  # without rounding by T
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    u = i - r.offset
-    r.ref + u*r.step
-end
-
-function unsafe_getindex(r::LinRange, i::Integer)
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    lerpi(i-oneunit(i), r.lendiv, r.start, r.stop)
-end
+unsafe_getindex(r::LinRange, i::Integer) = lerpi(i-oneunit(i), r.lendiv, r.start, r.stop)
 
 function lerpi(j::Integer, d::Integer, a::T, b::T) where T
-    @inline
     t = j/d # ∈ [0,1]
     # compute approximately fma(t, b, -fma(t, a, a))
     return T((1-t)*a + t*b)
 end
 
+# non-scalar indexing
+
 getindex(r::AbstractRange, ::Colon) = copy(r)
 
 function getindex(r::AbstractUnitRange, s::AbstractUnitRange{T}) where {T<:Integer}
@@ -980,13 +988,14 @@ function getindex(r::AbstractUnitRange, s::StepRange{T}) where {T<:Integer}
     @boundscheck checkbounds(r, s)
 
     if T === Bool
-        return range(first(s) ? first(r) : last(r), step=oneunit(eltype(r)), length=last(s))
+        len = Int(last(s))
+        return range(first(s) ? first(r) : last(r), step=oneunit(eltype(r)), length=len)
     else
         f = first(r)
         start = oftype(f, f + s.start - firstindex(r))
         st = step(s)
         len = length(s)
-        stop = oftype(f, start + (len - oneunit(len)) * st)
+        stop = oftype(f, start + (len - oneunit(len)) * (iszero(len) ? copysign(oneunit(st), st) : st))
         return range(start, stop; step=st)
     end
 end
@@ -996,26 +1005,22 @@ function getindex(r::StepRange, s::AbstractRange{T}) where {T<:Integer}
     @boundscheck checkbounds(r, s)
 
     if T === Bool
-        if length(s) == 0
-            start, len = first(r), 0
-        elseif length(s) == 1
-            if first(s)
-                start, len = first(r), 1
-            else
-                start, len = first(r), 0
-            end
-        else # length(s) == 2
-            start, len = last(r), 1
-        end
-        return range(start, step=step(r); length=len)
+        # treat as a zero, one, or two-element vector, where at most one element is true
+        # staying inbounds on the original range (preserving either start or
+        # stop as either stop or start, depending on the length)
+        st = step(s)
+        nonempty = st > zero(st) ? last(s) : first(s)
+        # n.b. isempty(r) implies isempty(r) which means !nonempty and !first(s)
+        range((first(s) ⊻ nonempty) ⊻ isempty(r) ? last(r) : first(r), step=step(r), length=Int(nonempty))
     else
         f = r.start
         fs = first(s)
         st = r.step
-        start = oftype(f, f + (fs - oneunit(fs)) * st)
-        st = st * step(s)
+        start = oftype(f, f + (fs - firstindex(r)) * st)
+        st *= step(s)
         len = length(s)
-        stop = oftype(f, start + (len - oneunit(len)) * st)
+        # mimic steprange_last_empty here, to try to avoid overflow
+        stop = oftype(f, start + (len - oneunit(len)) * (iszero(len) ? copysign(oneunit(st), st) : st))
         return range(start, stop; step=st)
     end
 end
@@ -1050,6 +1055,11 @@ function getindex(r::StepRangeLen{T}, s::OrdinalRange{S}) where {T, S<:Integer}
     end
 end
 
+function _getindex_hiprec(r::StepRangeLen, i::Integer)  # without rounding by T
+    u = oftype(r.offset, i) - r.offset
+    r.ref + u*r.step
+end
+
 function getindex(r::LinRange{T}, s::OrdinalRange{S}) where {T, S<:Integer}
     @inline
     @boundscheck checkbounds(r, s)
@@ -1079,7 +1089,7 @@ show(io::IO, r::AbstractRange) = print(io, repr(first(r)), ':', repr(step(r)), '
 show(io::IO, r::UnitRange) = print(io, repr(first(r)), ':', repr(last(r)))
 show(io::IO, r::OneTo) = print(io, "Base.OneTo(", r.stop, ")")
 function show(io::IO, r::StepRangeLen)
-    if step(r) != 0
+    if !iszero(step(r))
         print(io, repr(first(r)), ':', repr(step(r)), ':', repr(last(r)))
     else
         # ugly temporary printing, to avoid 0:0:0 etc.
@@ -1222,19 +1232,17 @@ end
 
 # _findin (the index of intersection)
 function _findin(r::AbstractRange{<:Integer}, span::AbstractUnitRange{<:Integer})
-    local ifirst
-    local ilast
     fspan = first(span)
     lspan = last(span)
     fr = first(r)
     lr = last(r)
     sr = step(r)
     if sr > 0
-        ifirst = fr >= fspan ? 1 : ceil(Integer,(fspan-fr)/sr)+1
-        ilast = lr <= lspan ? length(r) : length(r) - ceil(Integer,(lr-lspan)/sr)
+        ifirst = fr >= fspan ? 1 : cld(fspan-fr, sr)+1
+        ilast = lr <= lspan ? length(r) : length(r) - cld(lr-lspan, sr)
     elseif sr < 0
-        ifirst = fr <= lspan ? 1 : ceil(Integer,(lspan-fr)/sr)+1
-        ilast = lr >= fspan ? length(r) : length(r) - ceil(Integer,(lr-fspan)/sr)
+        ifirst = fr <= lspan ? 1 : cld(lspan-fr, sr)+1
+        ilast = lr >= fspan ? length(r) : length(r) - cld(lr-fspan, sr)
     else
         ifirst = fr >= fspan ? 1 : length(r)+1
         ilast = fr <= lspan ? length(r) : 0
@@ -1263,11 +1271,6 @@ el_same(::Type{T}, a::Type{<:AbstractArray{T,n}}, b::Type{<:AbstractArray{S,n}})
 el_same(::Type{T}, a::Type{<:AbstractArray{S,n}}, b::Type{<:AbstractArray{T,n}}) where {T,S,n} = b
 el_same(::Type, a, b) = promote_typejoin(a, b)
 
-promote_result(::Type{<:AbstractArray}, ::Type{<:AbstractArray}, ::Type{T}, ::Type{S}) where {T,S} = (@inline; promote_type(T,S))
-promote_result(::Type{T}, ::Type{S}, ::Type{Bottom}, ::Type{Bottom}) where {T<:AbstractArray,S<:AbstractArray} = (@inline; promote_typejoin(T,S))
-# If no promote_rule is defined, both directions give Bottom. In that case use typejoin on the eltypes instead and give Array as the container.
-promote_result(::Type{<:AbstractArray{T,n}}, ::Type{<:AbstractArray{S,n}}, ::Type{Bottom}, ::Type{Bottom}) where {T,S,n} = (@inline; Array{promote_type(T,S),n})
-
 promote_rule(a::Type{UnitRange{T1}}, b::Type{UnitRange{T2}}) where {T1,T2} =
     el_same(promote_type(T1, T2), a, b)
 UnitRange{T}(r::UnitRange{T}) where {T<:Real} = r
@@ -1354,8 +1357,21 @@ function vcat(rs::AbstractRange{T}...) where T
     return a
 end
 
-Array{T,1}(r::AbstractRange{T}) where {T} = vcat(r)
-collect(r::AbstractRange) = vcat(r)
+# This method differs from that for AbstractArrays as it
+# use iteration instead of indexing. This works even if certain
+# non-standard ranges don't support indexing.
+# See https://github.com/JuliaLang/julia/pull/27302
+# Similarly, collect(r::AbstractRange) uses iteration
+function Array{T,1}(r::AbstractRange{T}) where {T}
+    a = Vector{T}(undef, length(r))
+    i = 1
+    for x in r
+        @inbounds a[i] = x
+        i += 1
+    end
+    return a
+end
+collect(r::AbstractRange) = Array(r)
 
 _reverse(r::OrdinalRange, ::Colon) = (:)(last(r), negate(step(r)), first(r))
 function _reverse(r::StepRangeLen, ::Colon)
@@ -1388,14 +1404,13 @@ function sum(r::AbstractRange{<:Real})
 end
 
 function _in_range(x, r::AbstractRange)
-    if !isfinite(x)
-        return false
-    elseif iszero(step(r))
-        return !isempty(r) && first(r) == x
-    else
-        n = round(Integer, (x - first(r)) / step(r)) + 1
-        return n >= 1 && n <= length(r) && r[n] == x
-    end
+    isempty(r) && return false
+    f, l = first(r), last(r)
+    # check for NaN, Inf, and large x that may overflow in the next calculation
+    f <= x <= l || l <= x <= f || return false
+    iszero(step(r)) && return true
+    n = round(Integer, (x - f) / step(r)) + 1
+    n >= 1 && n <= length(r) && r[n] == x
 end
 in(x::Real, r::AbstractRange{<:Real}) = _in_range(x, r)
 # This method needs to be defined separately since -(::T, ::T) can be implemented
diff --git a/base/rational.jl b/base/rational.jl
index 782b05e587e1b..5288492e72e71 100644
--- a/base/rational.jl
+++ b/base/rational.jl
@@ -17,12 +17,10 @@ end
 unsafe_rational(num::T, den::T) where {T<:Integer} = unsafe_rational(T, num, den)
 unsafe_rational(num::Integer, den::Integer) = unsafe_rational(promote(num, den)...)
 
-@noinline __throw_rational_argerror_typemin(T) = throw(ArgumentError("invalid rational: denominator can't be typemin($T)"))
 function checked_den(::Type{T}, num::T, den::T) where T<:Integer
     if signbit(den)
-        den = -den
-        signbit(den) && __throw_rational_argerror_typemin(typeof(den))
-        num = -num
+        den = checked_neg(den)
+        num = checked_neg(num)
     end
     return unsafe_rational(T, num, den)
 end
@@ -49,6 +47,12 @@ end
     //(num, den)
 
 Divide two integers or rational numbers, giving a [`Rational`](@ref) result.
+More generally, `//` can be used for exact rational division of other numeric types
+with integer or rational components, such as complex numbers with integer components.
+
+Note that floating-point ([`AbstractFloat`](@ref)) arguments are not permitted by `//`
+(even if the values are rational).
+The arguments must be subtypes of [`Integer`](@ref), `Rational`, or composites thereof.
 
 # Examples
 ```jldoctest
@@ -57,6 +61,13 @@ julia> 3 // 5
 
 julia> (3 // 5) // (2 // 1)
 3//10
+
+julia> (1+2im) // (3+4im)
+11//25 + 2//25*im
+
+julia> 1.0 // 2
+ERROR: MethodError: no method matching //(::Float64, ::Int64)
+[...]
 ```
 """
 //(n::Integer,  d::Integer) = Rational(n,d)
@@ -83,6 +94,11 @@ end
 
 function show(io::IO, x::Rational)
     show(io, numerator(x))
+
+    if isone(denominator(x)) && get(io, :typeinfo, Any) <: Rational
+        return
+    end
+
     print(io, "//")
     show(io, denominator(x))
 end
@@ -168,10 +184,11 @@ julia> typeof(numerator(a))
 BigInt
 ```
 """
-function rationalize(::Type{T}, x::AbstractFloat, tol::Real) where T<:Integer
+function rationalize(::Type{T}, x::Union{AbstractFloat, Rational}, tol::Real) where T<:Integer
     if tol < 0
         throw(ArgumentError("negative tolerance $tol"))
     end
+
     T<:Unsigned && x < 0 && __throw_negate_unsigned()
     isnan(x) && return T(x)//one(T)
     isinf(x) && return unsafe_rational(x < 0 ? -one(T) : one(T), zero(T))
@@ -183,7 +200,6 @@ function rationalize(::Type{T}, x::AbstractFloat, tol::Real) where T<:Integer
     a = trunc(x)
     r = x-a
     y = one(x)
-
     tolx = oftype(x, tol)
     nt, t, tt = tolx, zero(tolx), tolx
     ia = np = nq = zero(T)
@@ -228,10 +244,21 @@ function rationalize(::Type{T}, x::AbstractFloat, tol::Real) where T<:Integer
         return p // q
     end
 end
-rationalize(::Type{T}, x::AbstractFloat; tol::Real = eps(x)) where {T<:Integer} = rationalize(T, x, tol)::Rational{T}
-rationalize(x::AbstractFloat; kvs...) = rationalize(Int, x; kvs...)
-rationalize(::Type{T}, x::Complex; kvs...) where {T<:Integer} = Complex(rationalize(T, x.re, kvs...)::Rational{T}, rationalize(T, x.im, kvs...)::Rational{T})
-rationalize(x::Complex; kvs...) = Complex(rationalize(Int, x.re, kvs...), rationalize(Int, x.im, kvs...))
+rationalize(::Type{T}, x::AbstractFloat; tol::Real = eps(x)) where {T<:Integer} = rationalize(T, x, tol)
+rationalize(x::Real; kvs...) = rationalize(Int, x; kvs...)
+rationalize(::Type{T}, x::Complex; kvs...) where {T<:Integer} = Complex(rationalize(T, x.re; kvs...), rationalize(T, x.im; kvs...))
+rationalize(x::Complex; kvs...) = Complex(rationalize(Int, x.re; kvs...), rationalize(Int, x.im; kvs...))
+rationalize(::Type{T}, x::Rational; tol::Real = 0) where {T<:Integer} = rationalize(T, x, tol)
+rationalize(x::Rational; kvs...) = x
+rationalize(x::Integer; kvs...) = Rational(x)
+function rationalize(::Type{T}, x::Integer; kvs...) where {T<:Integer}
+    if Base.hastypemax(T) # BigInt doesn't
+        x < typemin(T) && return unsafe_rational(-one(T), zero(T))
+        x > typemax(T) && return unsafe_rational(one(T), zero(T))
+    end
+    return Rational{T}(x)
+end
+
 
 """
     numerator(x)
@@ -272,7 +299,7 @@ signbit(x::Rational) = signbit(x.num)
 copysign(x::Rational, y::Real) = unsafe_rational(copysign(x.num, y), x.den)
 copysign(x::Rational, y::Rational) = unsafe_rational(copysign(x.num, y.num), x.den)
 
-abs(x::Rational) = Rational(abs(x.num), x.den)
+abs(x::Rational) = unsafe_rational(checked_abs(x.num), x.den)
 
 typemin(::Type{Rational{T}}) where {T<:Signed} = unsafe_rational(T, -one(T), zero(T))
 typemin(::Type{Rational{T}}) where {T<:Integer} = unsafe_rational(T, zero(T), one(T))
@@ -468,10 +495,6 @@ for (S, T) in ((Rational, Integer), (Integer, Rational), (Rational, Rational))
     end
 end
 
-trunc(::Type{T}, x::Rational) where {T} = round(T, x, RoundToZero)
-floor(::Type{T}, x::Rational) where {T} = round(T, x, RoundDown)
-ceil(::Type{T}, x::Rational) where {T} = round(T, x, RoundUp)
-
 round(x::Rational, r::RoundingMode=RoundNearest) = round(typeof(x), x, r)
 
 function round(::Type{T}, x::Rational{Tr}, r::RoundingMode=RoundNearest) where {T,Tr}
@@ -533,25 +556,29 @@ function hash(x::Rational{<:BitInteger64}, h::UInt)
     num, den = Base.numerator(x), Base.denominator(x)
     den == 1 && return hash(num, h)
     den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h)
-    if isodd(den)
+    if isodd(den) # since den != 1, this rational can't be a Float64
         pow = trailing_zeros(num)
         num >>= pow
+        h = hash_integer(den, h)
     else
         pow = trailing_zeros(den)
         den >>= pow
         pow = -pow
-        if den == 1 && abs(num) < 9007199254740992
-            return hash(ldexp(Float64(num),pow),h)
+        if den == 1
+            if uabs(num) < UInt64(maxintfloat(Float64))
+                return hash(ldexp(Float64(num),pow),h)
+            end
+        else
+            h = hash_integer(den, h)
         end
     end
-    h = hash_integer(den, h)
     h = hash_integer(pow, h)
     h = hash_integer(num, h)
     return h
 end
 
 # These methods are only needed for performance. Since `first(r)` and `last(r)` have the
-# same denominator (because their difference is an integer), `length(r)` can be calulated
+# same denominator (because their difference is an integer), `length(r)` can be calculated
 # without calling `gcd`.
 function length(r::AbstractUnitRange{T}) where T<:Rational
     @inline
diff --git a/base/rawbigints.jl b/base/rawbigints.jl
new file mode 100644
index 0000000000000..6508bea05be0f
--- /dev/null
+++ b/base/rawbigints.jl
@@ -0,0 +1,150 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+Segment of raw words of bits interpreted as a big integer. Less
+significant words come first. Each word is in machine-native bit-order.
+"""
+struct RawBigInt{T<:Unsigned}
+    d::String
+    word_count::Int
+
+    function RawBigInt{T}(d::String, word_count::Int) where {T<:Unsigned}
+        new{T}(d, word_count)
+    end
+end
+
+elem_count(x::RawBigInt, ::Val{:words}) = x.word_count
+elem_count(x::Unsigned, ::Val{:bits}) = sizeof(x) * 8
+word_length(::RawBigInt{T}) where {T} = elem_count(zero(T), Val(:bits))
+elem_count(x::RawBigInt{T}, ::Val{:bits}) where {T} = word_length(x) * elem_count(x, Val(:words))
+reversed_index(n::Int, i::Int) = n - i - 1
+reversed_index(x, i::Int, v::Val) = reversed_index(elem_count(x, v), i)::Int
+split_bit_index(x::RawBigInt, i::Int) = divrem(i, word_length(x), RoundToZero)
+
+"""
+`i` is the zero-based index of the wanted word in `x`, starting from
+the less significant words.
+"""
+function get_elem(x::RawBigInt{T}, i::Int, ::Val{:words}, ::Val{:ascending}) where {T}
+    # `i` must be non-negative and less than `x.word_count`
+    d = x.d
+    (GC.@preserve d unsafe_load(Ptr{T}(pointer(d)), i + 1))::T
+end
+
+function get_elem(x, i::Int, v::Val, ::Val{:descending})
+    j = reversed_index(x, i, v)
+    get_elem(x, j, v, Val(:ascending))
+end
+
+word_is_nonzero(x::RawBigInt, i::Int, v::Val) = !iszero(get_elem(x, i, Val(:words), v))
+
+word_is_nonzero(x::RawBigInt, v::Val) = let x = x
+    i -> word_is_nonzero(x, i, v)
+end
+
+"""
+Returns a `Bool` indicating whether the `len` least significant words
+of `x` are nonzero.
+"""
+function tail_is_nonzero(x::RawBigInt, len::Int, ::Val{:words})
+    any(word_is_nonzero(x, Val(:ascending)), 0:(len - 1))
+end
+
+"""
+Returns a `Bool` indicating whether the `len` least significant bits of
+the `i`-th (zero-based index) word of `x` are nonzero.
+"""
+function tail_is_nonzero(x::RawBigInt, len::Int, i::Int, ::Val{:word})
+    !iszero(len) &&
+    !iszero(get_elem(x, i, Val(:words), Val(:ascending)) << (word_length(x) - len))
+end
+
+"""
+Returns a `Bool` indicating whether the `len` least significant bits of
+`x` are nonzero.
+"""
+function tail_is_nonzero(x::RawBigInt, len::Int, ::Val{:bits})
+    if 0 < len
+        word_count, bit_count_in_word = split_bit_index(x, len)
+        tail_is_nonzero(x, bit_count_in_word, word_count, Val(:word)) ||
+        tail_is_nonzero(x, word_count, Val(:words))
+    else
+        false
+    end::Bool
+end
+
+"""
+Returns a `Bool` that is the `i`-th (zero-based index) bit of `x`.
+"""
+function get_elem(x::Unsigned, i::Int, ::Val{:bits}, ::Val{:ascending})
+    (x >>> i) % Bool
+end
+
+"""
+Returns a `Bool` that is the `i`-th (zero-based index) bit of `x`.
+"""
+function get_elem(x::RawBigInt, i::Int, ::Val{:bits}, v::Val{:ascending})
+    vb = Val(:bits)
+    if 0 ≤ i < elem_count(x, vb)
+        word_index, bit_index_in_word = split_bit_index(x, i)
+        word = get_elem(x, word_index, Val(:words), v)
+        get_elem(word, bit_index_in_word, vb, v)
+    else
+        false
+    end::Bool
+end
+
+"""
+Returns an integer of type `R`, consisting of the `len` most
+significant bits of `x`.
+"""
+function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer}
+    ret = zero(R)
+    if 0 < len
+        word_count, bit_count_in_word = split_bit_index(x, len)
+        k = word_length(x)
+        vals = (Val(:words), Val(:descending))
+
+        for w ∈ 0:(word_count - 1)
+            ret <<= k
+            word = get_elem(x, w, vals...)
+            ret |= R(word)
+        end
+
+        if !iszero(bit_count_in_word)
+            ret <<= bit_count_in_word
+            wrd = get_elem(x, word_count, vals...)
+            ret |= R(wrd >>> (k - bit_count_in_word))
+        end
+    end
+    ret::R
+end
+
+struct RawBigIntRoundingIncrementHelper{T<:Unsigned}
+    n::RawBigInt{T}
+    trunc_len::Int
+
+    final_bit::Bool
+    round_bit::Bool
+
+    function RawBigIntRoundingIncrementHelper{T}(n::RawBigInt{T}, len::Int) where {T<:Unsigned}
+        vals = (Val(:bits), Val(:descending))
+        f = get_elem(n, len - 1, vals...)
+        r = get_elem(n, len    , vals...)
+        new{T}(n, len, f, r)
+    end
+end
+
+function RawBigIntRoundingIncrementHelper(n::RawBigInt{T}, len::Int) where {T<:Unsigned}
+    RawBigIntRoundingIncrementHelper{T}(n, len)
+end
+
+(h::RawBigIntRoundingIncrementHelper)(::Rounding.FinalBit) = h.final_bit
+
+(h::RawBigIntRoundingIncrementHelper)(::Rounding.RoundBit) = h.round_bit
+
+function (h::RawBigIntRoundingIncrementHelper)(::Rounding.StickyBit)
+    v = Val(:bits)
+    n = h.n
+    tail_is_nonzero(n, elem_count(n, v) - h.trunc_len - 1, v)
+end
diff --git a/base/reduce.jl b/base/reduce.jl
index 45284d884a279..6a0d46c61fcd9 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -64,6 +64,11 @@ function _foldl_impl(op::OP, init, itr) where {OP}
     return v
 end
 
+function _foldl_impl(op, init, itr::Union{Tuple,NamedTuple})
+    length(itr) <= 32 && return afoldl(op, init, itr...)
+    @invoke _foldl_impl(op, init, itr::Any)
+end
+
 struct _InitialValue end
 
 """
@@ -140,17 +145,25 @@ what is returned is `itr′` and
 
     op′ = (xfₙ ∘ ... ∘ xf₂ ∘ xf₁)(op)
 """
-_xfadjoint(op, itr) = (op, itr)
-_xfadjoint(op, itr::Generator) =
-    if itr.f === identity
-        _xfadjoint(op, itr.iter)
-    else
-        _xfadjoint(MappingRF(itr.f, op), itr.iter)
-    end
-_xfadjoint(op, itr::Filter) =
-    _xfadjoint(FilteringRF(itr.flt, op), itr.itr)
-_xfadjoint(op, itr::Flatten) =
-    _xfadjoint(FlatteningRF(op), itr.it)
+function _xfadjoint(op, itr)
+    itr′, wrap = _xfadjoint_unwrap(itr)
+    wrap(op), itr′
+end
+
+_xfadjoint_unwrap(itr) = itr, identity
+function _xfadjoint_unwrap(itr::Generator)
+    itr′, wrap = _xfadjoint_unwrap(itr.iter)
+    itr.f === identity && return itr′, wrap
+    return itr′, wrap ∘ Fix1(MappingRF, itr.f)
+end
+function _xfadjoint_unwrap(itr::Filter)
+    itr′, wrap = _xfadjoint_unwrap(itr.itr)
+    return itr′, wrap ∘ Fix1(FilteringRF, itr.flt)
+end
+function _xfadjoint_unwrap(itr::Flatten)
+    itr′, wrap = _xfadjoint_unwrap(itr.it)
+    return itr′, wrap ∘ FlatteningRF
+end
 
 """
     mapfoldl(f, op, itr; [init])
@@ -188,11 +201,11 @@ foldl(op, itr; kw...) = mapfoldl(identity, op, itr; kw...)
 
 function mapfoldr_impl(f, op, nt, itr)
     op′, itr′ = _xfadjoint(BottomRF(FlipArgs(op)), Generator(f, itr))
-    return foldl_impl(op′, nt, _reverse(itr′))
+    return foldl_impl(op′, nt, _reverse_iter(itr′))
 end
 
-_reverse(itr) = Iterators.reverse(itr)
-_reverse(itr::Tuple) = reverse(itr)  #33235
+_reverse_iter(itr) = Iterators.reverse(itr)
+_reverse_iter(itr::Union{Tuple,NamedTuple}) = length(itr) <= 32 ? reverse(itr) : Iterators.reverse(itr) #33235
 
 struct FlipArgs{F}
     f::F
@@ -303,10 +316,11 @@ pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096
 
 
 # handling empty arrays
-_empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed"))
-_empty_reduce_error(@nospecialize(f), @nospecialize(T::Type)) = throw(ArgumentError("""
-    reducing with $f over an empty collection of element type $T is not allowed.
-    You may be able to prevent this error by supplying an `init` value to the reducer."""))
+_empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed; consider supplying `init` to the reducer"))
+reduce_empty(f, T) = _empty_reduce_error()
+mapreduce_empty(f, op, T) = _empty_reduce_error()
+reduce_empty(f, ::Type{Union{}}, splat...) = _empty_reduce_error()
+mapreduce_empty(f, op, ::Type{Union{}}, splat...) = _empty_reduce_error()
 
 """
     Base.reduce_empty(op, T)
@@ -326,20 +340,16 @@ is generally ambiguous, and especially so when the element type is unknown).
 
 As an alternative, consider supplying an `init` value to the reducer.
 """
-reduce_empty(::typeof(+), ::Type{Union{}}) = _empty_reduce_error(+, Union{})
 reduce_empty(::typeof(+), ::Type{T}) where {T} = zero(T)
 reduce_empty(::typeof(+), ::Type{Bool}) = zero(Int)
-reduce_empty(::typeof(*), ::Type{Union{}}) = _empty_reduce_error(*, Union{})
 reduce_empty(::typeof(*), ::Type{T}) where {T} = one(T)
 reduce_empty(::typeof(*), ::Type{<:AbstractChar}) = ""
 reduce_empty(::typeof(&), ::Type{Bool}) = true
 reduce_empty(::typeof(|), ::Type{Bool}) = false
 
-reduce_empty(::typeof(add_sum), ::Type{Union{}}) = _empty_reduce_error(add_sum, Union{})
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T} = reduce_empty(+, T)
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallSigned}  = zero(Int)
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallUnsigned} = zero(UInt)
-reduce_empty(::typeof(mul_prod), ::Type{Union{}}) = _empty_reduce_error(mul_prod, Union{})
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T} = reduce_empty(*, T)
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallSigned}  = one(Int)
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallUnsigned} = one(UInt)
@@ -380,7 +390,7 @@ reduce_empty_iter(op, itr, ::EltypeUnknown) = throw(ArgumentError("""
 
 The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref`) or
 [`foldr`](@ref) with reduction `op` over an iterator which contains a single element
-`x`. This value may also used to initialise the recursion, so that `reduce(op, [x, y])`
+`x`. This value may also be used to initialise the recursion, so that `reduce(op, [x, y])`
 may call `op(reduce_first(op, x), y)`.
 
 The default is `x` for most types. The main purpose is to ensure type stability, so
@@ -403,8 +413,8 @@ reduce_first(::typeof(mul_prod), x::SmallUnsigned) = UInt(x)
 
 The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref`) or
 [`mapfoldr`](@ref) with map `f` and reduction `op` over an iterator which contains a
-single element `x`. This value may also used to initialise the recursion, so that
-`mapreduce(f, op, [x, y])` may call `op(reduce_first(op, f, x), f(y))`.
+single element `x`. This value may also be used to initialise the recursion, so that
+`mapreduce(f, op, [x, y])` may call `op(mapreduce_first(f, op, x), f(y))`.
 
 The default is `reduce_first(op, f(x))`.
 """
@@ -451,8 +461,10 @@ For empty collections, providing `init` will be necessary, except for some speci
 neutral element of `op`.
 
 Reductions for certain commonly-used operators may have special implementations, and
-should be used instead: `maximum(itr)`, `minimum(itr)`, `sum(itr)`, `prod(itr)`,
- `any(itr)`, `all(itr)`.
+should be used instead: [`maximum`](@ref)`(itr)`, [`minimum`](@ref)`(itr)`, [`sum`](@ref)`(itr)`,
+[`prod`](@ref)`(itr)`, [`any`](@ref)`(itr)`, [`all`](@ref)`(itr)`.
+There are efficient methods for concatenating certain arrays of arrays
+by calling `reduce(`[`vcat`](@ref)`, arr)` or `reduce(`[`hcat`](@ref)`, arr)`.
 
 The associativity of the reduction is implementation dependent. This means that you can't
 use non-associative operations like `-` because it is undefined whether `reduce(-,[1,2,3])`
@@ -522,7 +534,7 @@ sum(f, a; kw...) = mapreduce(f, add_sum, a; kw...)
 """
     sum(itr; [init])
 
-Returns the sum of all elements in a collection.
+Return the sum of all elements in a collection.
 
 The return type is `Int` for signed integers of less than system word size, and
 `UInt` for unsigned integers of less than system word size.  For all other
@@ -554,7 +566,7 @@ sum(a::AbstractArray{Bool}; kw...) =
 """
     prod(f, itr; [init])
 
-Returns the product of `f` applied to each element of `itr`.
+Return the product of `f` applied to each element of `itr`.
 
 The return type is `Int` for signed integers of less than system word size, and
 `UInt` for unsigned integers of less than system word size.  For all other
@@ -578,7 +590,7 @@ prod(f, a; kw...) = mapreduce(f, mul_prod, a; kw...)
 """
     prod(itr; [init])
 
-Returns the product of all elements of a collection.
+Return the product of all elements of a collection.
 
 The return type is `Int` for signed integers of less than system word size, and
 `UInt` for unsigned integers of less than system word size.  For all other
@@ -665,7 +677,7 @@ end
 """
     maximum(f, itr; [init])
 
-Returns the largest result of calling function `f` on each element of `itr`.
+Return the largest result of calling function `f` on each element of `itr`.
 
 The value returned for empty `itr` can be specified by `init`. It must be
 a neutral element for `max` (i.e. which is less than or equal to any
@@ -692,7 +704,7 @@ maximum(f, a; kw...) = mapreduce(f, max, a; kw...)
 """
     minimum(f, itr; [init])
 
-Returns the smallest result of calling function `f` on each element of `itr`.
+Return the smallest result of calling function `f` on each element of `itr`.
 
 The value returned for empty `itr` can be specified by `init`. It must be
 a neutral element for `min` (i.e. which is greater than or equal to any
@@ -719,7 +731,7 @@ minimum(f, a; kw...) = mapreduce(f, min, a; kw...)
 """
     maximum(itr; [init])
 
-Returns the largest element in a collection.
+Return the largest element in a collection.
 
 The value returned for empty `itr` can be specified by `init`. It must be
 a neutral element for `max` (i.e. which is less than or equal to any
@@ -738,7 +750,7 @@ julia> maximum([1,2,3])
 3
 
 julia> maximum(())
-ERROR: MethodError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
+ERROR: ArgumentError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
 Stacktrace:
 [...]
 
@@ -751,7 +763,7 @@ maximum(a; kw...) = mapreduce(identity, max, a; kw...)
 """
     minimum(itr; [init])
 
-Returns the smallest element in a collection.
+Return the smallest element in a collection.
 
 The value returned for empty `itr` can be specified by `init`. It must be
 a neutral element for `min` (i.e. which is greater than or equal to any
@@ -770,7 +782,7 @@ julia> minimum([1,2,3])
 1
 
 julia> minimum([])
-ERROR: MethodError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
+ERROR: ArgumentError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
 Stacktrace:
 [...]
 
@@ -847,19 +859,27 @@ end
 ExtremaMap(::Type{T}) where {T} = ExtremaMap{Type{T}}(T)
 @inline (f::ExtremaMap)(x) = (y = f.f(x); (y, y))
 
-# TODO: optimize for inputs <: AbstractFloat
 @inline _extrema_rf((min1, max1), (min2, max2)) = (min(min1, min2), max(max1, max2))
+# optimization for IEEEFloat
+function _extrema_rf(x::NTuple{2,T}, y::NTuple{2,T}) where {T<:IEEEFloat}
+    (x1, x2), (y1, y2) = x, y
+    anynan = isnan(x1)|isnan(y1)
+    z1 = ifelse(anynan, x1-y1, ifelse(signbit(x1-y1), x1, y1))
+    z2 = ifelse(anynan, x1-y1, ifelse(signbit(x2-y2), y2, x2))
+    z1, z2
+end
 
 ## findmax, findmin, argmax & argmin
 
 """
     findmax(f, domain) -> (f(x), index)
 
-Returns a pair of a value in the codomain (outputs of `f`) and the index of
+Return a pair of a value in the codomain (outputs of `f`) and the index or key of
 the corresponding value in the `domain` (inputs to `f`) such that `f(x)` is maximised.
 If there are multiple maximal points, then the first one will be returned.
 
-`domain` must be a non-empty iterable.
+`domain` must be a non-empty iterable supporting [`keys`](@ref). Indices
+are of the same type as those returned by [`keys(domain)`](@ref).
 
 Values are compared with `isless`.
 
@@ -893,6 +913,9 @@ Return the maximal element of the collection `itr` and its index or key.
 If there are multiple maximal elements, then the first one will be returned.
 Values are compared with `isless`.
 
+Indices are of the same type as those returned by [`keys(itr)`](@ref)
+and [`pairs(itr)`](@ref).
+
 See also: [`findmin`](@ref), [`argmax`](@ref), [`maximum`](@ref).
 
 # Examples
@@ -914,12 +937,15 @@ _findmax(a, ::Colon) = findmax(identity, a)
 """
     findmin(f, domain) -> (f(x), index)
 
-Returns a pair of a value in the codomain (outputs of `f`) and the index of
+Return a pair of a value in the codomain (outputs of `f`) and the index or key of
 the corresponding value in the `domain` (inputs to `f`) such that `f(x)` is minimised.
 If there are multiple minimal points, then the first one will be returned.
 
 `domain` must be a non-empty iterable.
 
+Indices are of the same type as those returned by [`keys(domain)`](@ref)
+and [`pairs(domain)`](@ref).
+
 `NaN` is treated as less than all other values except `missing`.
 
 !!! compat "Julia 1.7"
@@ -953,6 +979,9 @@ Return the minimal element of the collection `itr` and its index or key.
 If there are multiple minimal elements, then the first one will be returned.
 `NaN` is treated as less than all other values except `missing`.
 
+Indices are of the same type as those returned by [`keys(itr)`](@ref)
+and [`pairs(itr)`](@ref).
+
 See also: [`findmax`](@ref), [`argmin`](@ref), [`minimum`](@ref).
 
 # Examples
@@ -974,7 +1003,7 @@ _findmin(a, ::Colon) = findmin(identity, a)
 """
     argmax(f, domain)
 
-Return a value `x` in the domain of `f` for which `f(x)` is maximised.
+Return a value `x` from `domain` for which `f(x)` is maximised.
 If there are multiple maximal values for `f(x)` then the first one will be found.
 
 `domain` must be a non-empty iterable.
@@ -1005,6 +1034,9 @@ If there are multiple maximal elements, then the first one will be returned.
 
 The collection must not be empty.
 
+Indices are of the same type as those returned by [`keys(itr)`](@ref)
+and [`pairs(itr)`](@ref).
+
 Values are compared with `isless`.
 
 See also: [`argmin`](@ref), [`findmax`](@ref).
@@ -1026,7 +1058,7 @@ argmax(itr) = findmax(itr)[2]
 """
     argmin(f, domain)
 
-Return a value `x` in the domain of `f` for which `f(x)` is minimised.
+Return a value `x` from `domain` for which `f(x)` is minimised.
 If there are multiple minimal values for `f(x)` then the first one will be found.
 
 `domain` must be a non-empty iterable.
@@ -1060,6 +1092,9 @@ If there are multiple minimal elements, then the first one will be returned.
 
 The collection must not be empty.
 
+Indices are of the same type as those returned by [`keys(itr)`](@ref)
+and [`pairs(itr)`](@ref).
+
 `NaN` is treated as less than all other values except `missing`.
 
 See also: [`argmax`](@ref), [`findmin`](@ref).
@@ -1192,26 +1227,31 @@ false
 """
 any(f, itr) = _any(f, itr, :)
 
-function _any(f, itr, ::Colon)
-    anymissing = false
-    for x in itr
-        v = f(x)
-        if ismissing(v)
-            anymissing = true
-        elseif v
-            return true
+for ItrT = (Tuple,Any)
+    # define a generic method and a specialized version for `Tuple`,
+    # whose method bodies are identical, while giving better effects to the later
+    @eval function _any(f, itr::$ItrT, ::Colon)
+        $(ItrT === Tuple ? :(@_terminates_locally_meta) : :nothing)
+        anymissing = false
+        for x in itr
+            v = f(x)
+            if ismissing(v)
+                anymissing = true
+            else
+                v && return true
+            end
         end
+        return anymissing ? missing : false
     end
-    return anymissing ? missing : false
 end
 
-# Specialized versions of any(f, ::Tuple), avoiding type instabilities for small tuples
-# containing mixed types.
+# Specialized versions of any(f, ::Tuple)
 # We fall back to the for loop implementation all elements have the same type or
 # if the tuple is too large.
-any(f, itr::NTuple) = _any(f, itr, :)  # case of homogeneous tuple
-function any(f, itr::Tuple)            # case of tuple with mixed types
-    length(itr) > 32 && return _any(f, itr, :)
+function any(f, itr::Tuple)
+    if itr isa NTuple || length(itr) > 32
+        return _any(f, itr, :)
+    end
     _any_tuple(f, false, itr...)
 end
 
@@ -1260,27 +1300,30 @@ true
 """
 all(f, itr) = _all(f, itr, :)
 
-function _all(f, itr, ::Colon)
-    anymissing = false
-    for x in itr
-        v = f(x)
-        if ismissing(v)
-            anymissing = true
-        # this syntax allows throwing a TypeError for non-Bool, for consistency with any
-        elseif v
-            continue
-        else
-            return false
+for ItrT = (Tuple,Any)
+    # define a generic method and a specialized version for `Tuple`,
+    # whose method bodies are identical, while giving better effects to the later
+    @eval function _all(f, itr::$ItrT, ::Colon)
+        $(ItrT === Tuple ? :(@_terminates_locally_meta) : :nothing)
+        anymissing = false
+        for x in itr
+            v = f(x)
+            if ismissing(v)
+                anymissing = true
+            else
+                v || return false
+            end
         end
+        return anymissing ? missing : true
     end
-    return anymissing ? missing : true
 end
 
-# Specialized versions of all(f, ::Tuple), avoiding type instabilities for small tuples
-# containing mixed types. This is similar to any(f, ::Tuple) defined above.
-all(f, itr::NTuple) = _all(f, itr, :)
+# Specialized versions of all(f, ::Tuple),
+# This is similar to any(f, ::Tuple) defined above.
 function all(f, itr::Tuple)
-    length(itr) > 32 && return _all(f, itr, :)
+    if itr isa NTuple || length(itr) > 32
+        return _all(f, itr, :)
+    end
     _all_tuple(f, false, itr...)
 end
 
@@ -1331,15 +1374,7 @@ count(itr; init=0) = count(identity, itr; init)
 
 count(f, itr; init=0) = _simple_count(f, itr, init)
 
-_simple_count(pred, itr, init) = _simple_count_helper(Generator(pred, itr), init)
-
-function _simple_count_helper(g, init::T) where {T}
-    n::T = init
-    for x in g
-        n += x::Bool
-    end
-    return n
-end
+_simple_count(pred, itr, init) = sum(_bool(pred), itr; init)
 
 function _simple_count(::typeof(identity), x::Array{Bool}, init::T=0) where {T}
     n::T = init
diff --git a/base/reducedim.jl b/base/reducedim.jl
index dc34b4feb1f6a..21dff3b46ab37 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -146,16 +146,18 @@ for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min
             T = _realtype(f, promote_union(eltype(A)))
             Tr = v0 isa T ? T : typeof(v0)
 
-            # but NaNs and missing need to be avoided as initial values
+            # but NaNs, missing and unordered values need to be avoided as initial values
             if v0 isa Number && isnan(v0)
                 # v0 is NaN
                 v0 = oftype(v0, $initval)
             elseif isunordered(v0)
                 # v0 is missing or a third-party unordered value
                 Tnm = nonmissingtype(Tr)
-                # TODO: Some types, like BigInt, don't support typemin/typemax.
-                # So a Matrix{Union{BigInt, Missing}} can still error here.
-                v0 = $typeextreme(Tnm)
+                if Tnm <: Union{BitInteger, IEEEFloat, BigFloat}
+                    v0 = $typeextreme(Tnm)
+                elseif !all(isunordered, A1)
+                    v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1))
+                end
             end
             # v0 may have changed type.
             Tr = v0 isa T ? T : typeof(v0)
@@ -186,12 +188,18 @@ function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray
 
     # but NaNs and missing need to be avoided as initial values
     if v0[1] isa Number && isnan(v0[1])
+        # v0 is NaN
         v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf)
     elseif isunordered(v0[1])
         # v0 is missing or a third-party unordered value
-        # TODO: Some types, like BigInt, don't support typemin/typemax.
-        # So a Matrix{Union{BigInt, Missing}} can still error here.
-        v0 = typemax(nonmissingtype(Tmin)), typemin(nonmissingtype(Tmax))
+        Tminnm = nonmissingtype(Tmin)
+        Tmaxnm = nonmissingtype(Tmax)
+        if Tminnm <: Union{BitInteger, IEEEFloat, BigFloat} &&
+            Tmaxnm <: Union{BitInteger, IEEEFloat, BigFloat}
+            v0 = (typemax(Tminnm), typemin(Tmaxnm))
+        elseif !all(isunordered, A1)
+            v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1)))
+        end
     end
     # v0 may have changed type.
     Tmin = v0[1] isa T ? T : typeof(v0[1])
@@ -211,8 +219,8 @@ reducedim_init(f, op::typeof(|), A::AbstractArrayOrBroadcasted, region) = reduce
 let
     BitIntFloat = Union{BitInteger, IEEEFloat}
     T = Union{
-        [AbstractArray{t} for t in uniontypes(BitIntFloat)]...,
-        [AbstractArray{Complex{t}} for t in uniontypes(BitIntFloat)]...}
+        Any[AbstractArray{t} for t in uniontypes(BitIntFloat)]...,
+        Any[AbstractArray{Complex{t}} for t in uniontypes(BitIntFloat)]...}
 
     global function reducedim_init(f, op::Union{typeof(+),typeof(add_sum)}, A::T, region)
         z = zero(f(zero(eltype(A))))
@@ -448,6 +456,8 @@ _count(f, A::AbstractArrayOrBroadcasted, dims, init) = mapreduce(_bool(f), add_s
 Count the number of elements in `A` for which `f` returns `true` over the
 singleton dimensions of `r`, writing the result into `r` in-place.
 
+$(_DOCS_ALIASING_WARNING)
+
 !!! compat "Julia 1.5"
     inplace `count!` was added in Julia 1.5.
 
@@ -526,6 +536,8 @@ sum(f, A::AbstractArray; dims)
 
 Sum elements of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -599,6 +611,8 @@ prod(f, A::AbstractArray; dims)
 
 Multiply elements of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -676,6 +690,8 @@ maximum(f, A::AbstractArray; dims)
 
 Compute the maximum value of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -753,6 +769,8 @@ minimum(f, A::AbstractArray; dims)
 
 Compute the minimum value of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -818,6 +836,8 @@ extrema(f, A::AbstractArray; dims)
 
 Compute the minimum and maximum value of `A` over the singleton dimensions of `r`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 !!! compat "Julia 1.8"
     This method requires Julia 1.8 or later.
 
@@ -893,6 +913,8 @@ all(::Function, ::AbstractArray; dims)
 
 Test whether all values in `A` along the singleton dimensions of `r` are `true`, and write results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [true false; true false]
@@ -966,6 +988,8 @@ any(::Function, ::AbstractArray; dims)
 Test whether any values in `A` along the singleton dimensions of `r` are `true`, and write
 results to `r`.
 
+$(_DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> A = [true false; true false]
@@ -1083,6 +1107,8 @@ end
 Find the minimum of `A` and the corresponding linear index along singleton
 dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
 `NaN` is treated as less than all other values except `missing`.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function findmin!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
                   init::Bool=true)
@@ -1154,6 +1180,8 @@ end
 Find the maximum of `A` and the corresponding linear index along singleton
 dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
 `NaN` is treated as greater than all other values except `missing`.
+
+$(_DOCS_ALIASING_WARNING)
 """
 function findmax!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
                   init::Bool=true)
diff --git a/base/reflection.jl b/base/reflection.jl
index 644714c8440cb..19dd289efbebf 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -2,19 +2,6 @@
 
 # name and module reflection
 
-"""
-    nameof(m::Module) -> Symbol
-
-Get the name of a `Module` as a [`Symbol`](@ref).
-
-# Examples
-```jldoctest
-julia> nameof(Base.Broadcast)
-:Broadcast
-```
-"""
-nameof(m::Module) = ccall(:jl_module_name, Ref{Symbol}, (Any,), m)
-
 """
     parentmodule(m::Module) -> Module
 
@@ -88,22 +75,103 @@ end
 """
     names(x::Module; all::Bool = false, imported::Bool = false)
 
-Get an array of the names exported by a `Module`, excluding deprecated names.
-If `all` is true, then the list also includes non-exported names defined in the module,
+Get an array of the public names of a `Module`, excluding deprecated names.
+If `all` is true, then the list also includes non-public names defined in the module,
 deprecated names, and compiler-generated names.
 If `imported` is true, then names explicitly imported from other modules
 are also included.
 
-As a special case, all names defined in `Main` are considered \"exported\",
-since it is not idiomatic to explicitly export names from `Main`.
+As a special case, all names defined in `Main` are considered \"public\",
+since it is not idiomatic to explicitly mark names from `Main` as public.
 
-See also: [`@locals`](@ref Base.@locals), [`@__MODULE__`](@ref).
+See also: [`isexported`](@ref), [`ispublic`](@ref), [`@locals`](@ref Base.@locals), [`@__MODULE__`](@ref).
 """
 names(m::Module; all::Bool = false, imported::Bool = false) =
-    sort!(ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint), m, all, imported))
+    sort!(unsorted_names(m; all, imported))
+unsorted_names(m::Module; all::Bool = false, imported::Bool = false) =
+    ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint), m, all, imported)
+
+"""
+    isexported(m::Module, s::Symbol) -> Bool
+
+Returns whether a symbol is exported from a module.
+
+See also: [`ispublic`](@ref), [`names`](@ref)
+
+```jldoctest
+julia> module Mod
+           export foo
+           public bar
+       end
+Mod
+
+julia> Base.isexported(Mod, :foo)
+true
 
+julia> Base.isexported(Mod, :bar)
+false
+
+julia> Base.isexported(Mod, :baz)
+false
+```
+"""
 isexported(m::Module, s::Symbol) = ccall(:jl_module_exports_p, Cint, (Any, Any), m, s) != 0
+
+"""
+    ispublic(m::Module, s::Symbol) -> Bool
+
+Returns whether a symbol is marked as public in a module.
+
+Exported symbols are considered public.
+
+!!! compat "Julia 1.11"
+    This function and the notion of publicity were added in Julia 1.11.
+
+See also: [`isexported`](@ref), [`names`](@ref)
+
+```jldoctest
+julia> module Mod
+           export foo
+           public bar
+       end
+Mod
+
+julia> Base.ispublic(Mod, :foo)
+true
+
+julia> Base.ispublic(Mod, :bar)
+true
+
+julia> Base.ispublic(Mod, :baz)
+false
+```
+"""
+ispublic(m::Module, s::Symbol) = ccall(:jl_module_public_p, Cint, (Any, Any), m, s) != 0
+
+# TODO: this is vaguely broken because it only works for explicit calls to
+# `Base.deprecate`, not the @deprecated macro:
 isdeprecated(m::Module, s::Symbol) = ccall(:jl_is_binding_deprecated, Cint, (Any, Any), m, s) != 0
+
+"""
+    isbindingresolved(m::Module, s::Symbol) -> Bool
+
+Returns whether the binding of a symbol in a module is resolved.
+
+See also: [`isexported`](@ref), [`ispublic`](@ref), [`isdeprecated`](@ref)
+
+```jldoctest
+julia> module Mod
+           foo() = 17
+       end
+Mod
+
+julia> Base.isbindingresolved(Mod, :foo)
+true
+
+julia> Base.isbindingresolved(Mod, :bar)
+false
+```
+"""
 isbindingresolved(m::Module, var::Symbol) = ccall(:jl_binding_resolved_p, Cint, (Any, Any), m, var) != 0
 
 function binding_module(m::Module, s::Symbol)
@@ -112,17 +180,10 @@ function binding_module(m::Module, s::Symbol)
     return unsafe_pointer_to_objref(p)::Module
 end
 
-function resolve(g::GlobalRef; force::Bool=false)
-    if force || isbindingresolved(g.mod, g.name)
-        return GlobalRef(binding_module(g.mod, g.name), g.name)
-    end
-    return g
-end
-
-const NamedTuple_typename = NamedTuple.body.body.name
+const _NAMEDTUPLE_NAME = NamedTuple.body.body.name
 
 function _fieldnames(@nospecialize t)
-    if t.name === NamedTuple_typename
+    if t.name === _NAMEDTUPLE_NAME
         if t.parameters[1] isa Tuple
             return t.parameters[1]
         else
@@ -265,17 +326,23 @@ Determine whether a global is declared `const` in a given module `m`.
 isconst(m::Module, s::Symbol) =
     ccall(:jl_is_const, Cint, (Any, Any), m, s) != 0
 
+function isconst(g::GlobalRef)
+    return ccall(:jl_globalref_is_const, Cint, (Any,), g) != 0
+end
+
 """
     isconst(t::DataType, s::Union{Int,Symbol}) -> Bool
 
 Determine whether a field `s` is declared `const` in a given type `t`.
 """
 function isconst(@nospecialize(t::Type), s::Symbol)
+    @_foldable_meta
     t = unwrap_unionall(t)
     isa(t, DataType) || return false
     return isconst(t, fieldindex(t, s, false))
 end
 function isconst(@nospecialize(t::Type), s::Int)
+    @_foldable_meta
     t = unwrap_unionall(t)
     # TODO: what to do for `Union`?
     isa(t, DataType) || return false # uncertain
@@ -287,6 +354,29 @@ function isconst(@nospecialize(t::Type), s::Int)
     return unsafe_load(Ptr{UInt32}(constfields), 1 + s÷32) & (1 << (s%32)) != 0
 end
 
+"""
+    isfieldatomic(t::DataType, s::Union{Int,Symbol}) -> Bool
+
+Determine whether a field `s` is declared `@atomic` in a given type `t`.
+"""
+function isfieldatomic(@nospecialize(t::Type), s::Symbol)
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    isa(t, DataType) || return false
+    return isfieldatomic(t, fieldindex(t, s, false))
+end
+function isfieldatomic(@nospecialize(t::Type), s::Int)
+    @_foldable_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    isa(t, DataType) || return false # uncertain
+    ismutabletype(t) || return false # immutable structs are never atomic
+    1 <= s <= length(t.name.names) || return false # OOB reads are not atomic (they always throw)
+    atomicfields = t.name.atomicfields
+    atomicfields === C_NULL && return false
+    s -= 1
+    return unsafe_load(Ptr{UInt32}(atomicfields), 1 + s÷32) & (1 << (s%32)) != 0
+end
 
 """
     @locals()
@@ -327,20 +417,12 @@ macro locals()
     return Expr(:locals)
 end
 
-"""
-    objectid(x) -> UInt
-
-Get a hash value for `x` based on object identity. `objectid(x)==objectid(y)` if `x === y`.
-
-See also [`hash`](@ref), [`IdDict`](@ref).
-"""
-objectid(@nospecialize(x)) = ccall(:jl_object_id, UInt, (Any,), x)
-
 # concrete datatype predicates
 
 datatype_fieldtypes(x::DataType) = ccall(:jl_get_fieldtypes, Core.SimpleVector, (Any,), x)
 
 struct DataTypeLayout
+    size::UInt32
     nfields::UInt32
     npointers::UInt32
     firstptr::Int32
@@ -348,13 +430,16 @@ struct DataTypeLayout
     flags::UInt16
     # haspadding : 1;
     # fielddesc_type : 2;
+    # arrayelem_isboxed : 1;
+    # arrayelem_isunion : 1;
 end
 
 """
     Base.datatype_alignment(dt::DataType) -> Int
 
 Memory allocation minimum alignment for instances of this type.
-Can be called on any `isconcretetype`.
+Can be called on any `isconcretetype`, although for Memory it will give the
+alignment of the elements, not the whole object.
 """
 function datatype_alignment(dt::DataType)
     @_foldable_meta
@@ -375,15 +460,18 @@ LLT_ALIGN(x, sz) = (x + sz - 1) & -sz
 # amount of total space taken by T when stored in a container
 function aligned_sizeof(@nospecialize T::Type)
     @_foldable_meta
-    if isbitsunion(T)
-        _, sz, al = uniontype_layout(T)
-        return LLT_ALIGN(sz, al)
+    if isa(T, Union)
+        if allocatedinline(T)
+            # NOTE this check is equivalent to `isbitsunion(T)`, we can improve type
+            # inference in the second branch with the outer `isa(T, Union)` check
+            _, sz, al = uniontype_layout(T)
+            return LLT_ALIGN(sz, al)
+        end
     elseif allocatedinline(T)
         al = datatype_alignment(T)
         return LLT_ALIGN(Core.sizeof(T), al)
-    else
-        return Core.sizeof(Ptr{Cvoid})
     end
+    return Core.sizeof(Ptr{Cvoid})
 end
 
 gc_alignment(sz::Integer) = Int(ccall(:jl_alignment, Cint, (Csize_t,), sz))
@@ -393,7 +481,8 @@ gc_alignment(T::Type) = gc_alignment(Core.sizeof(T))
     Base.datatype_haspadding(dt::DataType) -> Bool
 
 Return whether the fields of instances of this type are packed in memory,
-with no intervening padding bytes.
+with no intervening padding bits (defined as bits whose value does not uniquely
+impact the egal test when applied to the struct fields).
 Can be called on any `isconcretetype`.
 """
 function datatype_haspadding(dt::DataType)
@@ -404,9 +493,10 @@ function datatype_haspadding(dt::DataType)
 end
 
 """
-    Base.datatype_nfields(dt::DataType) -> Bool
+    Base.datatype_nfields(dt::DataType) -> UInt32
 
-Return the number of fields known to this datatype's layout.
+Return the number of fields known to this datatype's layout. This may be
+different from the number of actual fields of the type for opaque types.
 Can be called on any `isconcretetype`.
 """
 function datatype_nfields(dt::DataType)
@@ -444,6 +534,31 @@ function datatype_fielddesc_type(dt::DataType)
     return (flags >> 1) & 3
 end
 
+"""
+    Base.datatype_arrayelem(dt::DataType) -> Int
+
+Return the behavior of the trailing array types allocations.
+Can be called on any `isconcretetype`, but only meaningful on `Memory`.
+
+0 = inlinealloc
+1 = isboxed
+2 = isbitsunion
+"""
+function datatype_arrayelem(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
+    return (flags >> 3) & 3
+end
+
+function datatype_layoutsize(dt::DataType)
+    @_foldable_meta
+    dt.layout == C_NULL && throw(UndefRefError())
+    size = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).size
+    return size % Int
+end
+
+
 # For type stability, we only expose a single struct that describes everything
 struct FieldDesc
     isforeign::Bool
@@ -470,7 +585,7 @@ end
 
 function getindex(dtfd::DataTypeFieldDesc, i::Int)
     layout_ptr = convert(Ptr{DataTypeLayout}, dtfd.dt.layout)
-    fd_ptr = layout_ptr + sizeof(DataTypeLayout)
+    fd_ptr = layout_ptr + Core.sizeof(DataTypeLayout)
     layout = unsafe_load(layout_ptr)
     fielddesc_type = (layout.flags >> 1) & 3
     nfields = layout.nfields
@@ -491,8 +606,12 @@ end
     ismutable(v) -> Bool
 
 Return `true` if and only if value `v` is mutable.  See [Mutable Composite Types](@ref)
-for a discussion of immutability. Note that this function works on values, so if you give it
-a type, it will tell you that a value of `DataType` is mutable.
+for a discussion of immutability. Note that this function works on values, so if you
+give it a `DataType`, it will tell you that a value of the type is mutable.
+
+!!! note
+    For technical reasons, `ismutable` returns `true` for values of certain special types
+    (for example `String` and `Symbol`) even though they cannot be mutated in a permissible way.
 
 See also [`isbits`](@ref), [`isstructtype`](@ref).
 
@@ -508,13 +627,16 @@ true
 !!! compat "Julia 1.5"
     This function requires at least Julia 1.5.
 """
-ismutable(@nospecialize(x)) = (@_total_meta; typeof(x).name.flags & 0x2 == 0x2)
+ismutable(@nospecialize(x)) = (@_total_meta; (typeof(x).name::Core.TypeName).flags & 0x2 == 0x2)
+# The type assertion above is required to fix some invalidations.
+# See also https://github.com/JuliaLang/julia/issues/52134
 
 """
     ismutabletype(T) -> Bool
 
 Determine whether type `T` was declared as a mutable type
 (i.e. using `mutable struct` keyword).
+If `T` is not a type, then return `false`.
 
 !!! compat "Julia 1.7"
     This function requires at least Julia 1.7.
@@ -523,37 +645,39 @@ function ismutabletype(@nospecialize t)
     @_total_meta
     t = unwrap_unionall(t)
     # TODO: what to do for `Union`?
-    return isa(t, DataType) && t.name.flags & 0x2 == 0x2
+    return isa(t, DataType) && ismutabletypename(t.name)
 end
 
+ismutabletypename(tn::Core.TypeName) = tn.flags & 0x2 == 0x2
+
 """
     isstructtype(T) -> Bool
 
 Determine whether type `T` was declared as a struct type
 (i.e. using the `struct` or `mutable struct` keyword).
+If `T` is not a type, then return `false`.
 """
 function isstructtype(@nospecialize t)
     @_total_meta
     t = unwrap_unionall(t)
     # TODO: what to do for `Union`?
     isa(t, DataType) || return false
-    hasfield = !isdefined(t, :types) || !isempty(t.types)
-    return hasfield || (t.size == 0 && !isabstracttype(t))
+    return !isprimitivetype(t) && !isabstracttype(t)
 end
 
 """
     isprimitivetype(T) -> Bool
 
 Determine whether type `T` was declared as a primitive type
-(i.e. using the `primitive` keyword).
+(i.e. using the `primitive type` syntax).
+If `T` is not a type, then return `false`.
 """
 function isprimitivetype(@nospecialize t)
     @_total_meta
     t = unwrap_unionall(t)
     # TODO: what to do for `Union`?
     isa(t, DataType) || return false
-    hasfield = !isdefined(t, :types) || !isempty(t.types)
-    return !hasfield && t.size != 0 && !isabstracttype(t)
+    return (t.flags & 0x0080) == 0x0080
 end
 
 """
@@ -567,6 +691,7 @@ Typical examples are numeric types such as [`UInt8`](@ref),
 This category of types is significant since they are valid as type parameters,
 may not track [`isdefined`](@ref) / [`isassigned`](@ref) status,
 and have a defined layout that is compatible with C.
+If `T` is not a type, then return `false`.
 
 See also [`isbits`](@ref), [`isprimitivetype`](@ref), [`ismutable`](@ref).
 
@@ -579,14 +704,28 @@ julia> isbitstype(Complex)
 false
 ```
 """
-isbitstype(@nospecialize t) = (@_total_meta; isa(t, DataType) && (t.flags & 0x8) == 0x8)
+isbitstype(@nospecialize t) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0008) == 0x0008)
 
 """
     isbits(x)
 
 Return `true` if `x` is an instance of an [`isbitstype`](@ref) type.
 """
-isbits(@nospecialize x) = (@_total_meta; typeof(x).flags & 0x8 == 0x8)
+isbits(@nospecialize x) = isbitstype(typeof(x))
+
+"""
+    objectid(x) -> UInt
+
+Get a hash value for `x` based on object identity.
+
+If `x === y` then `objectid(x) == objectid(y)`, and usually when `x !== y`, `objectid(x) != objectid(y)`.
+
+See also [`hash`](@ref), [`IdDict`](@ref).
+"""
+function objectid(@nospecialize(x))
+    @_total_meta
+    return ccall(:jl_object_id, UInt, (Any,), x)
+end
 
 """
     isdispatchtuple(T)
@@ -594,26 +733,73 @@ isbits(@nospecialize x) = (@_total_meta; typeof(x).flags & 0x8 == 0x8)
 Determine whether type `T` is a tuple "leaf type",
 meaning it could appear as a type signature in dispatch
 and has no subtypes (or supertypes) which could appear in a call.
+If `T` is not a type, then return `false`.
+"""
+isdispatchtuple(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0004) == 0x0004)
+
+datatype_ismutationfree(dt::DataType) = (@_total_meta; (dt.flags & 0x0100) == 0x0100)
+
+"""
+    Base.ismutationfree(T)
+
+Determine whether type `T` is mutation free in the sense that no mutable memory
+is reachable from this type (either in the type itself) or through any fields.
+Note that the type itself need not be immutable. For example, an empty mutable
+type is `ismutabletype`, but also `ismutationfree`.
+If `T` is not a type, then return `false`.
+"""
+function ismutationfree(@nospecialize(t))
+    t = unwrap_unionall(t)
+    if isa(t, DataType)
+        return datatype_ismutationfree(t)
+    elseif isa(t, Union)
+        return ismutationfree(t.a) && ismutationfree(t.b)
+    end
+    # TypeVar, etc.
+    return false
+end
+
+datatype_isidentityfree(dt::DataType) = (@_total_meta; (dt.flags & 0x0200) == 0x0200)
+
 """
-isdispatchtuple(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x4) == 0x4)
+    Base.isidentityfree(T)
+
+Determine whether type `T` is identity free in the sense that this type or any
+reachable through its fields has non-content-based identity.
+If `T` is not a type, then return `false`.
+"""
+function isidentityfree(@nospecialize(t))
+    t = unwrap_unionall(t)
+    if isa(t, DataType)
+        return datatype_isidentityfree(t)
+    elseif isa(t, Union)
+        return isidentityfree(t.a) && isidentityfree(t.b)
+    end
+    # TypeVar, etc.
+    return false
+end
 
 iskindtype(@nospecialize t) = (t === DataType || t === UnionAll || t === Union || t === typeof(Bottom))
 isconcretedispatch(@nospecialize t) = isconcretetype(t) && !iskindtype(t)
-has_free_typevars(@nospecialize(t)) = ccall(:jl_has_free_typevars, Cint, (Any,), t) != 0
+has_free_typevars(@nospecialize(t)) = (@_total_meta; ccall(:jl_has_free_typevars, Cint, (Any,), t) != 0)
 
 # equivalent to isa(v, Type) && isdispatchtuple(Tuple{v}) || v === Union{}
 # and is thus perhaps most similar to the old (pre-1.0) `isleaftype` query
-const _TYPE_NAME = Type.body.name
 function isdispatchelem(@nospecialize v)
     return (v === Bottom) || (v === typeof(Bottom)) || isconcretedispatch(v) ||
-        (isa(v, DataType) && v.name === _TYPE_NAME && !has_free_typevars(v)) # isType(v)
+        (isType(v) && !has_free_typevars(v))
 end
 
+const _TYPE_NAME = Type.body.name
+isType(@nospecialize t) = isa(t, DataType) && t.name === _TYPE_NAME
+
 """
     isconcretetype(T)
 
 Determine whether type `T` is a concrete type, meaning it could have direct instances
 (values `x` such that `typeof(x) === T`).
+Note that this is not the negation of `isabstracttype(T)`.
+If `T` is not a type, then return `false`.
 
 See also: [`isbits`](@ref), [`isabstracttype`](@ref), [`issingletontype`](@ref).
 
@@ -638,13 +824,15 @@ julia> isconcretetype(Union{Int,String})
 false
 ```
 """
-isconcretetype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x2) == 0x2)
+isconcretetype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x0002) == 0x0002)
 
 """
     isabstracttype(T)
 
 Determine whether type `T` was declared as an abstract type
-(i.e. using the `abstract` keyword).
+(i.e. using the `abstract type` syntax).
+Note that this is not the negation of `isconcretetype(T)`.
+If `T` is not a type, then return `false`.
 
 # Examples
 ```jldoctest
@@ -666,9 +854,10 @@ end
     Base.issingletontype(T)
 
 Determine whether type `T` has exactly one possible instance; for example, a
-struct type with no fields.
+struct type with no fields except other singleton values.
+If `T` is not a concrete type, then return `false`.
 """
-issingletontype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && isdefined(t, :instance))
+issingletontype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && isdefined(t, :instance) && datatype_layoutsize(t) == 0 && datatype_pointerfree(t))
 
 """
     typeintersect(T::Type, S::Type)
@@ -752,23 +941,55 @@ julia> Base.fieldindex(Foo, :z, false)
 ```
 """
 function fieldindex(T::DataType, name::Symbol, err::Bool=true)
+    return err ? _fieldindex_maythrow(T, name) : _fieldindex_nothrow(T, name)
+end
+
+function _fieldindex_maythrow(T::DataType, name::Symbol)
     @_foldable_meta
-    return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, err)+1)
+    @noinline
+    return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, true)+1)
+end
+
+function _fieldindex_nothrow(T::DataType, name::Symbol)
+    @_total_meta
+    @noinline
+    return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, false)+1)
 end
 
 function fieldindex(t::UnionAll, name::Symbol, err::Bool=true)
     t = argument_datatype(t)
     if t === nothing
-        throw(ArgumentError("type does not have definite fields"))
+        err && throw(ArgumentError("type does not have definite fields"))
+        return 0
     end
     return fieldindex(t, name, err)
 end
 
 function argument_datatype(@nospecialize t)
     @_total_meta
+    @noinline
     return ccall(:jl_argument_datatype, Any, (Any,), t)::Union{Nothing,DataType}
 end
 
+function datatype_fieldcount(t::DataType)
+    if t.name === _NAMEDTUPLE_NAME
+        names, types = t.parameters[1], t.parameters[2]
+        if names isa Tuple
+            return length(names)
+        end
+        if types isa DataType && types <: Tuple
+            return fieldcount(types)
+        end
+        return nothing
+    elseif isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
+        return nothing
+    end
+    if isdefined(t, :types)
+        return length(t.types)
+    end
+    return length(t.name.names)
+end
+
 """
     fieldcount(t::Type)
 
@@ -788,25 +1009,11 @@ function fieldcount(@nospecialize t)
     if !(t isa DataType)
         throw(TypeError(:fieldcount, DataType, t))
     end
-    if t.name === NamedTuple_typename
-        names, types = t.parameters[1], t.parameters[2]
-        if names isa Tuple
-            return length(names)
-        end
-        if types isa DataType && types <: Tuple
-            return fieldcount(types)
-        end
-        abstr = true
-    else
-        abstr = isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
-    end
-    if abstr
+    fcount = datatype_fieldcount(t)
+    if fcount === nothing
         throw(ArgumentError("type does not have a definite number of fields"))
     end
-    if isdefined(t, :types)
-        return length(t.types)
-    end
-    return length(t.name.names)
+    return fcount
 end
 
 """
@@ -853,9 +1060,9 @@ function to_tuple_type(@nospecialize(t))
         t = Tuple{t...}
     end
     if isa(t, Type) && t <: Tuple
-        for p in unwrap_unionall(t).parameters
+        for p in (unwrap_unionall(t)::DataType).parameters
             if isa(p, Core.TypeofVararg)
-                p = p.T
+                p = unwrapva(p)
             end
             if !(isa(p, Type) || isa(p, TypeVar))
                 error("argument tuple type must contain only types")
@@ -867,14 +1074,11 @@ function to_tuple_type(@nospecialize(t))
     t
 end
 
-function signature_type(@nospecialize(f), @nospecialize(args))
-    f_type = isa(f, Type) ? Type{f} : typeof(f)
-    if isa(args, Type)
-        u = unwrap_unionall(args)
-        return rewrap_unionall(Tuple{f_type, u.parameters...}, args)
-    else
-        return Tuple{f_type, args...}
-    end
+function signature_type(@nospecialize(f), @nospecialize(argtypes))
+    argtypes = to_tuple_type(argtypes)
+    ft = Core.Typeof(f)
+    u = unwrap_unionall(argtypes)::DataType
+    return rewrap_unionall(Tuple{ft, u.parameters...}, argtypes)
 end
 
 """
@@ -902,10 +1106,11 @@ function code_lowered(@nospecialize(f), @nospecialize(t=Tuple); generated::Bool=
     if debuginfo !== :source && debuginfo !== :none
         throw(ArgumentError("'debuginfo' must be either :source or :none"))
     end
-    return map(method_instances(f, t)) do m
+    world = get_world_counter()
+    return map(method_instances(f, t, world)) do m
         if generated && hasgenerator(m)
             if may_invoke_generator(m)
-                return ccall(:jl_code_for_staged, Any, (Any,), m)::CodeInfo
+                return ccall(:jl_code_for_staged, Any, (Any, UInt), m, world)::CodeInfo
             else
                 error("Could not expand generator for `@generated` method ", m, ". ",
                       "This can happen if the provided argument types (", t, ") are ",
@@ -942,7 +1147,7 @@ function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing
     return _methods_by_ftype(t, mt, lim, world, false, RefValue{UInt}(typemin(UInt)), RefValue{UInt}(typemax(UInt)), Ptr{Int32}(C_NULL))
 end
 function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt, ambig::Bool, min::Ref{UInt}, max::Ref{UInt}, has_ambig::Ref{Int32})
-    return ccall(:jl_matching_methods, Any, (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, mt, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
+    return ccall(:jl_matching_methods, Any, (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, mt, lim, ambig, world, min, max, has_ambig)::Union{Vector{Any},Nothing}
 end
 
 # high-level, more convenient method lookup functions
@@ -976,17 +1181,17 @@ A list of modules can also be specified as an array.
 !!! compat "Julia 1.4"
     At least Julia 1.4 is required for specifying a module.
 
-See also: [`which`](@ref) and `@which`.
+See also: [`which`](@ref), [`@which`](@ref Main.InteractiveUtils.@which) and [`methodswith`](@ref Main.InteractiveUtils.methodswith).
 """
 function methods(@nospecialize(f), @nospecialize(t),
                  mod::Union{Tuple{Module},AbstractArray{Module},Nothing}=nothing)
-    t = to_tuple_type(t)
     world = get_world_counter()
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
     # Lack of specialization => a comprehension triggers too many invalidations via _collect, so collect the methods manually
     ms = Method[]
     for m in _methods(f, t, -1, world)::Vector
         m = m::Core.MethodMatch
-        (mod === nothing || m.method.module ∈ mod) && push!(ms, m.method)
+        (mod === nothing || parentmodule(m.method) ∈ mod) && push!(ms, m.method)
     end
     MethodList(ms, typeof(f).name.mt)
 end
@@ -995,6 +1200,7 @@ methods(@nospecialize(f), @nospecialize(t), mod::Module) = methods(f, t, (mod,))
 function methods_including_ambiguous(@nospecialize(f), @nospecialize(t))
     tt = signature_type(f, t)
     world = get_world_counter()
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
     min = RefValue{UInt}(typemin(UInt))
     max = RefValue{UInt}(typemax(UInt))
     ms = _methods_by_ftype(tt, nothing, -1, world, true, min, max, Ptr{Int32}(C_NULL))::Vector
@@ -1012,29 +1218,31 @@ function visit(f, mt::Core.MethodTable)
     nothing
 end
 function visit(f, mc::Core.TypeMapLevel)
-    if mc.targ !== nothing
-        e = mc.targ::Vector{Any}
+    function avisit(f, e::Memory{Any})
         for i in 2:2:length(e)
-            isassigned(e, i) && visit(f, e[i])
+            isassigned(e, i) || continue
+            ei = e[i]
+            if ei isa Memory{Any}
+                for j in 2:2:length(ei)
+                    isassigned(ei, j) || continue
+                    visit(f, ei[j])
+                end
+            else
+                visit(f, ei)
+            end
         end
     end
+    if mc.targ !== nothing
+        avisit(f, mc.targ::Memory{Any})
+    end
     if mc.arg1 !== nothing
-        e = mc.arg1::Vector{Any}
-        for i in 2:2:length(e)
-            isassigned(e, i) && visit(f, e[i])
-        end
+        avisit(f, mc.arg1::Memory{Any})
     end
     if mc.tname !== nothing
-        e = mc.tname::Vector{Any}
-        for i in 2:2:length(e)
-            isassigned(e, i) && visit(f, e[i])
-        end
+        avisit(f, mc.tname::Memory{Any})
     end
     if mc.name1 !== nothing
-        e = mc.name1::Vector{Any}
-        for i in 2:2:length(e)
-            isassigned(e, i) && visit(f, e[i])
-        end
+        avisit(f, mc.name1::Memory{Any})
     end
     mc.list !== nothing && visit(f, mc.list)
     mc.any !== nothing && visit(f, mc.any)
@@ -1047,6 +1255,34 @@ function visit(f, d::Core.TypeMapEntry)
     end
     nothing
 end
+struct MethodSpecializations
+    specializations::Union{Nothing, Core.MethodInstance, Core.SimpleVector}
+end
+"""
+    specializations(m::Method) → itr
+
+Return an iterator `itr` of all compiler-generated specializations of `m`.
+"""
+specializations(m::Method) = MethodSpecializations(isdefined(m, :specializations) ? m.specializations : nothing)
+function iterate(specs::MethodSpecializations)
+    s = specs.specializations
+    s === nothing && return nothing
+    isa(s, Core.MethodInstance) && return (s, nothing)
+    return iterate(specs, 0)
+end
+iterate(specs::MethodSpecializations, ::Nothing) = nothing
+function iterate(specs::MethodSpecializations, i::Int)
+    s = specs.specializations::Core.SimpleVector
+    n = length(s)
+    i >= n && return nothing
+    item = nothing
+    while i < n && item === nothing
+        item = s[i+=1]
+    end
+    item === nothing && return nothing
+    return (item, i)
+end
+length(specs::MethodSpecializations) = count(Returns(true), specs)
 
 function length(mt::Core.MethodTable)
     n = 0
@@ -1061,15 +1297,17 @@ uncompressed_ir(m::Method) = isdefined(m, :source) ? _uncompressed_ir(m, m.sourc
                              isdefined(m, :generator) ? error("Method is @generated; try `code_lowered` instead.") :
                              error("Code for this Method is not available.")
 _uncompressed_ir(m::Method, s::CodeInfo) = copy(s)
-_uncompressed_ir(m::Method, s::Array{UInt8,1}) = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), m, C_NULL, s)::CodeInfo
-_uncompressed_ir(ci::Core.CodeInstance, s::Array{UInt8,1}) = ccall(:jl_uncompress_ir, Any, (Any, Any, Any), ci.def.def::Method, ci, s)::CodeInfo
+_uncompressed_ir(m::Method, s::String) = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), m, C_NULL, s)::CodeInfo
+_uncompressed_ir(ci::Core.CodeInstance, s::String) = ccall(:jl_uncompress_ir, Any, (Any, Any, Any), ci.def.def::Method, ci, s)::CodeInfo
 # for backwards compat
 const uncompressed_ast = uncompressed_ir
 const _uncompressed_ast = _uncompressed_ir
 
-function method_instances(@nospecialize(f), @nospecialize(t), world::UInt=get_world_counter())
+function method_instances(@nospecialize(f), @nospecialize(t), world::UInt)
     tt = signature_type(f, t)
     results = Core.MethodInstance[]
+    # this make a better error message than the typeassert that follows
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
     for match in _methods_by_ftype(tt, -1, world)::Vector
         instance = Core.Compiler.specialize_method(match)
         push!(results, instance)
@@ -1081,26 +1319,112 @@ default_debug_info_kind() = unsafe_load(cglobal(:jl_default_debug_info_kind, Cin
 
 # this type mirrors jl_cgparams_t (documented in julia.h)
 struct CodegenParams
+    """
+    If enabled, generate the necessary code to support the --track-allocations
+    command line flag to julia itself. Note that the option itself does not enable
+    allocation tracking. Rather, it merely generates the support code necessary
+    to perform allocation tracking if requested by the command line option.
+    """
     track_allocations::Cint
+
+    """
+    If enabled, generate the necessary code to support the --code-coverage
+    command line flag to julia itself. Note that the option itself does not enable
+    code coverage. Rather, it merely generates the support code necessary
+    to code coverage if requested by the command line option.
+    """
     code_coverage::Cint
+
+    """
+    If enabled, force the compiler to use the specialized signature
+    for all generated functions, whenever legal. If disabled, the choice is made
+    heuristically and specsig is only used when deemed profitable.
+    """
     prefer_specsig::Cint
+
+    """
+    If enabled, enable emission of `.debug_names` sections.
+    """
     gnu_pubnames::Cint
+
+    """
+    Controls what level of debug info to emit. Currently supported values are:
+    - 0: no debug info
+    - 1: full debug info
+    - 2: Line tables only
+    - 3: Debug directives only
+
+    The integer values currently match the llvm::DICompilerUnit::DebugEmissionKind enum,
+    although this is not guaranteed.
+    """
     debug_info_kind::Cint
 
+    """
+    Controls the debug_info_level parameter, equivalent to the -g command line option.
+    """
+    debug_info_level::Cint
+
+    """
+    If enabled, generate a GC safepoint at the entry to every function. Emitting
+    these extra safepoints can reduce the amount of time that other threads are
+    waiting for the currently running thread to reach a safepoint. The cost for
+    a safepoint is small, but non-zero. The option is enabled by default.
+    """
+    safepoint_on_entry::Cint
+
+    """
+    If enabled, add an implicit argument to each function call that is used to
+    pass down the current task local state pointer. This argument is passed
+    using the `swiftself` convention, which in the ordinary case means that the
+    pointer is kept in a register and accesses are thus very fast. If this option
+    is disabled, the task local state pointer must be loaded from thread local
+    storage, which incurs a small amount of additional overhead. The option is enabled by
+    default.
+    """
+    gcstack_arg::Cint
+
+    """
+    If enabled, use the Julia PLT mechanism to support lazy-resolution of `ccall`
+    targets. The option may be disabled for use in environments where the julia
+    runtime is unavailable, but is otherwise recommended to be enabled, even if
+    lazy resolution is not required, as the Julia PLT mechanism may have superior
+    performance compared to the native platform mechanism. The options is enabled by default.
+    """
+    use_jlplt::Cint
+
+    """
+    A pointer of type
+
+    typedef jl_value_t *(*jl_codeinstance_lookup_t)(jl_method_instance_t *mi JL_PROPAGATES_ROOT,
+    size_t min_world, size_t max_world);
+
+    that may be used by external compilers as a callback to look up the code instance corresponding
+    to a particular method instance.
+    """
     lookup::Ptr{Cvoid}
 
-    generic_context::Any
-
     function CodegenParams(; track_allocations::Bool=true, code_coverage::Bool=true,
                    prefer_specsig::Bool=false,
-                   gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(),
-                   lookup::Ptr{Cvoid}=cglobal(:jl_rettype_inferred),
-                   generic_context = nothing)
+                   gnu_pubnames::Bool=true, debug_info_kind::Cint = default_debug_info_kind(),
+                   debug_info_level::Cint = Cint(JLOptions().debug_level), safepoint_on_entry::Bool=true,
+                   gcstack_arg::Bool=true, use_jlplt::Bool=true,
+                   lookup::Ptr{Cvoid}=unsafe_load(cglobal(:jl_rettype_inferred_addr, Ptr{Cvoid})))
         return new(
             Cint(track_allocations), Cint(code_coverage),
             Cint(prefer_specsig),
             Cint(gnu_pubnames), debug_info_kind,
-            lookup, generic_context)
+            debug_info_level, Cint(safepoint_on_entry),
+            Cint(gcstack_arg), Cint(use_jlplt),
+            lookup)
+    end
+end
+
+# this type mirrors jl_emission_params_t (documented in julia.h)
+struct EmissionParams
+    emit_metadata::Cint
+
+    function EmissionParams(; emit_metadata::Bool=true)
+        return new(Cint(emit_metadata))
     end
 end
 
@@ -1137,41 +1461,49 @@ function may_invoke_generator(method::Method, @nospecialize(atype), sparams::Sim
     # generator only has one method
     generator = method.generator
     isa(generator, Core.GeneratedFunctionStub) || return false
-    gen_mthds = methods(generator.gen)::MethodList
+    tt = Tuple{typeof(generator.gen), Vararg{Any}}
+    gen_mthds = _methods_by_ftype(tt, #=lim=#1, method.primary_world)
+    gen_mthds isa Vector || return false
     length(gen_mthds) == 1 || return false
 
-    generator_method = first(gen_mthds)
+    generator_method = first(gen_mthds).method
     nsparams = length(sparams)
     isdefined(generator_method, :source) || return false
     code = generator_method.source
     nslots = ccall(:jl_ir_nslots, Int, (Any,), code)
-    at = unwrap_unionall(atype)::DataType
+    at = unwrap_unionall(atype)
+    at isa DataType || return false
     (nslots >= 1 + length(sparams) + length(at.parameters)) || return false
 
+    firstarg = 1
     for i = 1:nsparams
         if isa(sparams[i], TypeVar)
-            if (ast_slotflag(code, 1 + i) & SLOT_USED) != 0
+            if (ast_slotflag(code, firstarg + i) & SLOT_USED) != 0
                 return false
             end
         end
     end
-    for i = 1:length(at.parameters)
+    nargs = Int(method.nargs)
+    non_va_args = method.isva ? nargs - 1 : nargs
+    for i = 1:non_va_args
         if !isdispatchelem(at.parameters[i])
-            if (ast_slotflag(code, 1 + i + nsparams) & SLOT_USED) != 0
+            if (ast_slotflag(code, firstarg + i + nsparams) & SLOT_USED) != 0
                 return false
             end
         end
     end
-    return true
-end
-
-# give a decent error message if we try to instantiate a staged function on non-leaf types
-function func_for_method_checked(m::Method, @nospecialize(types), sparams::SimpleVector)
-    if isdefined(m, :generator) && !may_invoke_generator(m, types, sparams)
-        error("cannot call @generated function `", m, "` ",
-              "with abstract argument types: ", types)
+    if method.isva
+        # If the va argument is used, we need to ensure that all arguments that
+        # contribute to the va tuple are dispatchelemes
+        if (ast_slotflag(code, firstarg + nargs + nsparams) & SLOT_USED) != 0
+            for i = (non_va_args+1):length(at.parameters)
+                if !isdispatchelem(at.parameters[i])
+                    return false
+                end
+            end
+        end
     end
-    return m
+    return true
 end
 
 """
@@ -1182,19 +1514,20 @@ generic function and type signature.
 
 # Keyword Arguments
 
-- `optimize=true`: controls whether additional optimizations, such as inlining, are also applied.
-- `debuginfo=:default`: controls the amount of code metadata present in the output,
-possible options are `:source` or `:none`.
+- `optimize::Bool = true`: optional, controls whether additional optimizations,
+  such as inlining, are also applied.
+- `debuginfo::Symbol = :default`: optional, controls the amount of code metadata present
+  in the output, possible options are `:source` or `:none`.
 
 # Internal Keyword Arguments
 
 This section should be considered internal, and is only for who understands Julia compiler
 internals.
 
-- `world=Base.get_world_counter()`: optional, controls the world age to use when looking up methods,
-use current world age if not specified.
-- `interp=Core.Compiler.NativeInterpreter(world)`: optional, controls the interpreter to use,
-use the native interpreter Julia uses if not specified.
+- `world::UInt = Base.get_world_counter()`: optional, controls the world age to use
+  when looking up methods, use current world age if not specified.
+- `interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world)`:
+  optional, controls the abstract interpreter to use, use the native interpreter if not specified.
 
 # Example
 
@@ -1209,22 +1542,12 @@ julia> code_typed(+, (Float64, Float64))
 ) => Float64
 ```
 """
-function code_typed(@nospecialize(f), @nospecialize(types=default_tt(f));
-                    optimize=true,
-                    debuginfo::Symbol=:default,
-                    world = get_world_counter(),
-                    interp = Core.Compiler.NativeInterpreter(world))
+function code_typed(@nospecialize(f), @nospecialize(types=default_tt(f)); kwargs...)
     if isa(f, Core.OpaqueClosure)
-        return code_typed_opaque_closure(f; optimize, debuginfo, interp)
+        return code_typed_opaque_closure(f; kwargs...)
     end
-    ft = Core.Typeof(f)
-    if isa(types, Type)
-        u = unwrap_unionall(types)
-        tt = rewrap_unionall(Tuple{ft, u.parameters...}, types)
-    else
-        tt = Tuple{ft, types...}
-    end
-    return code_typed_by_type(tt; optimize, debuginfo, world, interp)
+    tt = signature_type(f, types)
+    return code_typed_by_type(tt; kwargs...)
 end
 
 # returns argument tuple type which is supposed to be used for `code_typed` and its family;
@@ -1246,11 +1569,12 @@ Similar to [`code_typed`](@ref), except the argument is a tuple type describing
 a full signature to query.
 """
 function code_typed_by_type(@nospecialize(tt::Type);
-                            optimize=true,
+                            optimize::Bool=true,
                             debuginfo::Symbol=:default,
-                            world = get_world_counter(),
-                            interp = Core.Compiler.NativeInterpreter(world))
-    ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
+                            world::UInt=get_world_counter(),
+                            interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
+    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
+        error("code reflection cannot be used from generated functions")
     if @isdefined(IRShow)
         debuginfo = IRShow.debuginfo(debuginfo)
     elseif debuginfo === :default
@@ -1260,14 +1584,13 @@ function code_typed_by_type(@nospecialize(tt::Type);
         throw(ArgumentError("'debuginfo' must be either :source or :none"))
     end
     tt = to_tuple_type(tt)
-    matches = _methods_by_ftype(tt, -1, world)::Vector
+    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
     asts = []
     for match in matches
         match = match::Core.MethodMatch
-        meth = func_for_method_checked(match.method, tt, match.sparams)
-        (code, ty) = Core.Compiler.typeinf_code(interp, meth, match.spec_types, match.sparams, optimize)
+        (code, ty) = Core.Compiler.typeinf_code(interp, match, optimize)
         if code === nothing
-            push!(asts, meth => Any)
+            push!(asts, match.method => Any)
         else
             debuginfo === :none && remove_linenums!(code)
             push!(asts, code => ty)
@@ -1277,7 +1600,7 @@ function code_typed_by_type(@nospecialize(tt::Type);
 end
 
 function code_typed_opaque_closure(@nospecialize(oc::Core.OpaqueClosure);
-    debuginfo::Symbol=:default, __...)
+                                   debuginfo::Symbol=:default, _...)
     ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
     m = oc.source
     if isa(m, Method)
@@ -1304,20 +1627,21 @@ See also: [`code_typed`](@ref)
 This section should be considered internal, and is only for who understands Julia compiler
 internals.
 
-- `world=Base.get_world_counter()`: optional, controls the world age to use when looking up
-  methods, use current world age if not specified.
-- `interp=Core.Compiler.NativeInterpreter(world)`: optional, controls the interpreter to
-  use, use the native interpreter Julia uses if not specified.
-- `optimize_until`: optional, controls the optimization passes to run.  If it is a string,
-  it specifies the name of the pass up to which the optimizer is run.  If it is an integer,
-  it specifies the number of passes to run.  If it is `nothing` (default), all passes are
-  run.
+- `world::UInt = Base.get_world_counter()`: optional, controls the world age to use
+  when looking up methods, use current world age if not specified.
+- `interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world)`:
+  optional, controls the abstract interpreter to use, use the native interpreter if not specified.
+- `optimize_until::Union{Integer,AbstractString,Nothing} = nothing`: optional,
+  controls the optimization passes to run.
+  If it is a string, it specifies the name of the pass up to which the optimizer is run.
+  If it is an integer, it specifies the number of passes to run.
+  If it is `nothing` (default), all passes are run.
 
 # Example
 
 One can put the argument types in a tuple to get the corresponding `code_ircode`.
 
-```jldoctest
+```julia
 julia> Base.code_ircode(+, (Float64, Int64))
 1-element Vector{Any}:
  388 1 ─ %1 = Base.sitofp(Float64, _3)::Float64
@@ -1333,24 +1657,12 @@ julia> Base.code_ircode(+, (Float64, Int64); optimize_until = "compact 1")
      => Float64
 ```
 """
-function code_ircode(
-    @nospecialize(f),
-    @nospecialize(types = default_tt(f));
-    world = get_world_counter(),
-    interp = Core.Compiler.NativeInterpreter(world),
-    optimize_until::Union{Integer,AbstractString,Nothing} = nothing,
-)
+function code_ircode(@nospecialize(f), @nospecialize(types = default_tt(f)); kwargs...)
     if isa(f, Core.OpaqueClosure)
         error("OpaqueClosure not supported")
     end
-    ft = Core.Typeof(f)
-    if isa(types, Type)
-        u = unwrap_unionall(types)
-        tt = rewrap_unionall(Tuple{ft,u.parameters...}, types)
-    else
-        tt = Tuple{ft,types...}
-    end
-    return code_ircode_by_type(tt; world, interp, optimize_until)
+    tt = signature_type(f, types)
+    return code_ircode_by_type(tt; kwargs...)
 end
 
 """
@@ -1361,27 +1673,20 @@ a full signature to query.
 """
 function code_ircode_by_type(
     @nospecialize(tt::Type);
-    world = get_world_counter(),
-    interp = Core.Compiler.NativeInterpreter(world),
-    optimize_until::Union{Integer,AbstractString,Nothing} = nothing,
+    world::UInt=get_world_counter(),
+    interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world),
+    optimize_until::Union{Integer,AbstractString,Nothing}=nothing,
 )
-    ccall(:jl_is_in_pure_context, Bool, ()) &&
+    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
         error("code reflection cannot be used from generated functions")
     tt = to_tuple_type(tt)
-    matches = _methods_by_ftype(tt, -1, world)::Vector
+    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
     asts = []
     for match in matches
         match = match::Core.MethodMatch
-        meth = func_for_method_checked(match.method, tt, match.sparams)
-        (code, ty) = Core.Compiler.typeinf_ircode(
-            interp,
-            meth,
-            match.spec_types,
-            match.sparams,
-            optimize_until,
-        )
+        (code, ty) = Core.Compiler.typeinf_ircode(interp, match, optimize_until)
         if code === nothing
-            push!(asts, meth => Any)
+            push!(asts, match.method => Any)
         else
             push!(asts, code => ty)
         end
@@ -1389,50 +1694,412 @@ function code_ircode_by_type(
     return asts
 end
 
+function _builtin_return_type(interp::Core.Compiler.AbstractInterpreter,
+                              @nospecialize(f::Core.Builtin), @nospecialize(types))
+    argtypes = Any[to_tuple_type(types).parameters...]
+    rt = Core.Compiler.builtin_tfunction(interp, f, argtypes, nothing)
+    return Core.Compiler.widenconst(rt)
+end
+
+function _builtin_effects(interp::Core.Compiler.AbstractInterpreter,
+                          @nospecialize(f::Core.Builtin), @nospecialize(types))
+    argtypes = Any[to_tuple_type(types).parameters...]
+    rt = Core.Compiler.builtin_tfunction(interp, f, argtypes, nothing)
+    return Core.Compiler.builtin_effects(Core.Compiler.typeinf_lattice(interp), f, argtypes, rt)
+end
+
+check_generated_context(world::UInt) =
+    (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) &&
+        error("code reflection cannot be used from generated functions")
+
+# TODO rename `Base.return_types` to `Base.infer_return_types`
+
+"""
+    Base.return_types(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::NativeInterpreter=Core.Compiler.NativeInterpreter(world)) -> rts::Vector{Any}
+
+Return a list of possible return types for a given function `f` and argument types `types`.
+The list corresponds to the results of type inference on all the possible method match
+candidates for `f` and `types` (see also [`methods(f, types)`](@ref methods).
+
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `rts::Vector{Any}`: The list of return types that are figured out by inference on
+  methods matching with the given `f` and `types`. The list's order matches the order
+  returned by `methods(f, types)`.
+
+# Example
+
+```julia
+julia> Base.return_types(sum, Tuple{Vector{Int}})
+1-element Vector{Any}:
+ Int64
+
+julia> methods(sum, (Union{Vector{Int},UnitRange{Int}},))
+# 2 methods for generic function "sum" from Base:
+ [1] sum(r::AbstractRange{<:Real})
+     @ range.jl:1399
+ [2] sum(a::AbstractArray; dims, kw...)
+     @ reducedim.jl:1010
+
+julia> Base.return_types(sum, (Union{Vector{Int},UnitRange{Int}},))
+2-element Vector{Any}:
+ Int64 # the result of inference on sum(r::AbstractRange{<:Real})
+ Int64 # the result of inference on sum(a::AbstractArray; dims, kw...)
+```
+
+!!! warning
+    The `Base.return_types` function should not be used from generated functions;
+    doing so will result in an error.
+"""
 function return_types(@nospecialize(f), @nospecialize(types=default_tt(f));
-                      world = get_world_counter(),
-                      interp = Core.Compiler.NativeInterpreter(world))
-    ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
+                      world::UInt=get_world_counter(),
+                      interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
+    check_generated_context(world)
     if isa(f, Core.OpaqueClosure)
         _, rt = only(code_typed_opaque_closure(f))
         return Any[rt]
     end
-    types = to_tuple_type(types)
-    rt = []
-    for match in _methods(f, types, -1, world)::Vector
-        match = match::Core.MethodMatch
-        meth = func_for_method_checked(match.method, types, match.sparams)
-        ty = Core.Compiler.typeinf_type(interp, meth, match.spec_types, match.sparams)
-        push!(rt, something(ty, Any))
+    if isa(f, Core.Builtin)
+        rt = _builtin_return_type(interp, f, types)
+        return Any[rt]
+    end
+    rts = Any[]
+    tt = signature_type(f, types)
+    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
+    for match in matches
+        ty = Core.Compiler.typeinf_type(interp, match::Core.MethodMatch)
+        push!(rts, something(ty, Any))
+    end
+    return rts
+end
+
+"""
+    Base.infer_return_type(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) -> rt::Type
+
+Returns an inferred return type of the function call specified by `f` and `types`.
+
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `rt::Type`: An inferred return type of the function call specified by the given call signature.
+
+!!! note
+    Note that, different from [`Base.return_types`](@ref), this doesn't give you the list
+    return types of every possible method matching with the given `f` and `types`.
+    It returns a single return type, taking into account all potential outcomes of
+    any function call entailed by the given signature type.
+
+# Example
+
+```julia
+julia> checksym(::Symbol) = :symbol;
+
+julia> checksym(x::Any) = x;
+
+julia> Base.infer_return_type(checksym, (Union{Symbol,String},))
+Union{String, Symbol}
+
+julia> Base.return_types(checksym, (Union{Symbol,String},))
+2-element Vector{Any}:
+ Symbol
+ Union{String, Symbol}
+```
+
+It's important to note the difference here: `Base.return_types` gives back inferred results
+for each method that matches the given signature `checksum(::Union{Symbol,String})`.
+On the other hand `Base.infer_return_type` returns one collective result that sums up all those possibilities.
+
+!!! warning
+    The `Base.infer_return_type` function should not be used from generated functions;
+    doing so will result in an error.
+"""
+function infer_return_type(@nospecialize(f), @nospecialize(types=default_tt(f));
+                           world::UInt=get_world_counter(),
+                           interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
+    check_generated_context(world)
+    if isa(f, Core.OpaqueClosure)
+        return last(only(code_typed_opaque_closure(f)))
+    end
+    if isa(f, Core.Builtin)
+        return _builtin_return_type(interp, f, types)
+    end
+    tt = signature_type(f, types)
+    matches = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
+    if matches === nothing
+        # unanalyzable call, i.e. the interpreter world might be newer than the world where
+        # the `f` is defined, return the unknown return type
+        return Any
+    end
+    rt = Union{}
+    for match in matches.matches
+        ty = Core.Compiler.typeinf_type(interp, match::Core.MethodMatch)
+        rt = Core.Compiler.tmerge(rt, something(ty, Any))
     end
     return rt
 end
 
-function infer_effects(@nospecialize(f), @nospecialize(types=default_tt(f));
-                       world = get_world_counter(),
-                       interp = Core.Compiler.NativeInterpreter(world))
-    ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
-    types = to_tuple_type(types)
+"""
+    Base.infer_exception_types(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::NativeInterpreter=Core.Compiler.NativeInterpreter(world)) -> excts::Vector{Any}
+
+Return a list of possible exception types for a given function `f` and argument types `types`.
+The list corresponds to the results of type inference on all the possible method match
+candidates for `f` and `types` (see also [`methods(f, types)`](@ref methods).
+It works like [`Base.return_types`](@ref), but it infers the exception types instead of the return types.
+
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `excts::Vector{Any}`: The list of exception types that are figured out by inference on
+  methods matching with the given `f` and `types`. The list's order matches the order
+  returned by `methods(f, types)`.
+
+# Example
+
+```julia
+julia> throw_if_number(::Number) = error("number is given");
+
+julia> throw_if_number(::Any) = nothing;
+
+julia> Base.infer_exception_types(throw_if_number, (Int,))
+1-element Vector{Any}:
+ ErrorException
+
+julia> methods(throw_if_number, (Any,))
+# 2 methods for generic function "throw_if_number" from Main:
+ [1] throw_if_number(x::Number)
+     @ REPL[1]:1
+ [2] throw_if_number(::Any)
+     @ REPL[2]:1
+
+julia> Base.infer_exception_types(throw_if_number, (Any,))
+2-element Vector{Any}:
+ ErrorException # the result of inference on `throw_if_number(::Number)`
+ Union{}        # the result of inference on `throw_if_number(::Any)`
+```
+
+!!! warning
+    The `Base.infer_exception_types` function should not be used from generated functions;
+    doing so will result in an error.
+"""
+function infer_exception_types(@nospecialize(f), @nospecialize(types=default_tt(f));
+                               world::UInt=get_world_counter(),
+                               interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
+    check_generated_context(world)
+    if isa(f, Core.OpaqueClosure)
+        return Any[Any] # TODO
+    end
     if isa(f, Core.Builtin)
-        args = Any[types.parameters...]
-        rt = Core.Compiler.builtin_tfunction(interp, f, args, nothing)
-        return Core.Compiler.builtin_effects(f, args, rt)
-    else
-        effects = Core.Compiler.EFFECTS_TOTAL
-        matches = _methods(f, types, -1, world)::Vector
-        if isempty(matches)
-            # this call is known to throw MethodError
-            return Core.Compiler.Effects(effects; nothrow=Core.Compiler.ALWAYS_FALSE)
-        end
-        for match in matches
-            match = match::Core.MethodMatch
-            frame = Core.Compiler.typeinf_frame(interp,
-                match.method, match.spec_types, match.sparams, #=run_optimizer=#false)
-            frame === nothing && return Core.Compiler.Effects()
-            effects = Core.Compiler.tristate_merge(effects, frame.ipo_effects)
+        effects = _builtin_effects(interp, f, types)
+        exct = Core.Compiler.is_nothrow(effects) ? Union{} : Any
+        return Any[exct]
+    end
+    excts = Any[]
+    tt = signature_type(f, types)
+    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
+    for match in matches
+        match = match::Core.MethodMatch
+        frame = Core.Compiler.typeinf_frame(interp, match, #=run_optimizer=#false)
+        if frame === nothing
+            exct = Any
+        else
+            exct = Core.Compiler.widenconst(frame.result.exc_result)
         end
-        return effects
+        push!(excts, exct)
+    end
+    return excts
+end
+
+_may_throw_methoderror(matches#=::Core.Compiler.MethodLookupResult=#) =
+    matches.ambig || !any(match::Core.MethodMatch->match.fully_covers, matches.matches)
+
+"""
+    Base.infer_exception_type(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) -> exct::Type
+
+Returns the type of exception potentially thrown by the function call specified by `f` and `types`.
+
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `exct::Type`: The inferred type of exception that can be thrown by the function call
+  specified by the given call signature.
+
+!!! note
+    Note that, different from [`Base.infer_exception_types`](@ref), this doesn't give you the list
+    exception types for every possible matching method with the given `f` and `types`.
+    It returns a single exception type, taking into account all potential outcomes of
+    any function call entailed by the given signature type.
+
+# Example
+
+```julia
+julia> f1(x) = x * 2;
+
+julia> Base.infer_exception_type(f1, (Int,))
+Union{}
+```
+
+The exception inferred as `Union{}` indicates that `f1(::Int)` will not throw any exception.
+
+```julia
+julia> f2(x::Int) = x * 2;
+
+julia> Base.infer_exception_type(f2, (Integer,))
+MethodError
+```
+
+This case is pretty much the same as with `f1`, but there's a key difference to note. For
+`f2`, the argument type is limited to `Int`, while the argument type is given as `Tuple{Integer}`.
+Because of this, taking into account the chance of the method error entailed by the call
+signature, the exception type is widened to `MethodError`.
+
+!!! warning
+    The `Base.infer_exception_type` function should not be used from generated functions;
+    doing so will result in an error.
+"""
+function infer_exception_type(@nospecialize(f), @nospecialize(types=default_tt(f));
+                              world::UInt=get_world_counter(),
+                              interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
+    check_generated_context(world)
+    if isa(f, Core.OpaqueClosure)
+        return Any # TODO
+    end
+    if isa(f, Core.Builtin)
+        effects = _builtin_effects(interp, f, types)
+        return Core.Compiler.is_nothrow(effects) ? Union{} : Any
+    end
+    tt = signature_type(f, types)
+    matches = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
+    if matches === nothing
+        # unanalyzable call, i.e. the interpreter world might be newer than the world where
+        # the `f` is defined, return the unknown exception type
+        return Any
+    end
+    exct = Union{}
+    if _may_throw_methoderror(matches)
+        # account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+        exct = Core.Compiler.tmerge(exct, MethodError)
+    end
+    for match in matches.matches
+        match = match::Core.MethodMatch
+        frame = Core.Compiler.typeinf_frame(interp, match, #=run_optimizer=#false)
+        frame === nothing && return Any
+        exct = Core.Compiler.tmerge(exct, Core.Compiler.widenconst(frame.result.exc_result))
+    end
+    return exct
+end
+
+"""
+    Base.infer_effects(
+        f, types=default_tt(f);
+        world::UInt=get_world_counter(),
+        interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) -> effects::Effects
+
+Returns the possible computation effects of the function call specified by `f` and `types`.
+
+# Arguments
+- `f`: The function to analyze.
+- `types` (optional): The argument types of the function. Defaults to the default tuple type of `f`.
+- `world` (optional): The world counter to use for the analysis. Defaults to the current world counter.
+- `interp` (optional): The abstract interpreter to use for the analysis. Defaults to a new `Core.Compiler.NativeInterpreter` with the specified `world`.
+
+# Returns
+- `effects::Effects`: The computed effects of the function call specified by the given call signature.
+  See the documentation of [`Effects`](@ref Core.Compiler.Effects) or [`Base.@assume_effects`](@ref)
+  for more information on the various effect properties.
+
+!!! note
+    Note that, different from [`Base.return_types`](@ref), this doesn't give you the list
+    effect analysis results for every possible matching method with the given `f` and `types`.
+    It returns a single effect, taking into account all potential outcomes of any function
+    call entailed by the given signature type.
+
+# Example
+
+```julia
+julia> f1(x) = x * 2;
+
+julia> Base.infer_effects(f1, (Int,))
+(+c,+e,+n,+t,+s,+m,+i)
+```
+
+This function will return an `Effects` object with information about the computational
+effects of the function `f1` when called with an `Int` argument.
+
+```julia
+julia> f2(x::Int) = x * 2;
+
+julia> Base.infer_effects(f2, (Integer,))
+(+c,+e,!n,+t,+s,+m,+i)
+```
+
+This case is pretty much the same as with `f1`, but there's a key difference to note. For
+`f2`, the argument type is limited to `Int`, while the argument type is given as `Tuple{Integer}`.
+Because of this, taking into account the chance of the method error entailed by the call
+signature, the `:nothrow` bit gets tainted.
+
+!!! warning
+    The `Base.infer_effects` function should not be used from generated functions;
+    doing so will result in an error.
+
+# See Also
+- [`Core.Compiler.Effects`](@ref): A type representing the computational effects of a method call.
+- [`Base.@assume_effects`](@ref): A macro for making assumptions about the effects of a method.
+"""
+function infer_effects(@nospecialize(f), @nospecialize(types=default_tt(f));
+                       world::UInt=get_world_counter(),
+                       interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
+    check_generated_context(world)
+    if isa(f, Core.Builtin)
+        return _builtin_effects(interp, f, types)
+    end
+    tt = signature_type(f, types)
+    matches = Core.Compiler.findall(tt, Core.Compiler.method_table(interp))
+    if matches === nothing
+        # unanalyzable call, i.e. the interpreter world might be newer than the world where
+        # the `f` is defined, return the unknown effects
+        return Core.Compiler.Effects()
+    end
+    effects = Core.Compiler.EFFECTS_TOTAL
+    if _may_throw_methoderror(matches)
+        # account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+        effects = Core.Compiler.Effects(effects; nothrow=false)
     end
+    for match in matches.matches
+        match = match::Core.MethodMatch
+        frame = Core.Compiler.typeinf_frame(interp, match, #=run_optimizer=#true)
+        frame === nothing && return Core.Compiler.Effects()
+        effects = Core.Compiler.merge_effects(effects, frame.result.ipo_effects)
+    end
+    return effects
 end
 
 """
@@ -1447,22 +2114,24 @@ function print_statement_costs(io::IO, @nospecialize(f), @nospecialize(t); kwarg
 end
 
 function print_statement_costs(io::IO, @nospecialize(tt::Type);
-                               world = get_world_counter(),
-                               interp = Core.Compiler.NativeInterpreter(world))
-    matches = _methods_by_ftype(tt, -1, world)::Vector
+                               world::UInt=get_world_counter(),
+                               interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world))
+    tt = to_tuple_type(tt)
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
+    matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector
     params = Core.Compiler.OptimizationParams(interp)
     cst = Int[]
     for match in matches
         match = match::Core.MethodMatch
-        meth = func_for_method_checked(match.method, tt, match.sparams)
-        println(io, meth)
-        (code, ty) = Core.Compiler.typeinf_code(interp, meth, match.spec_types, match.sparams, true)
+        println(io, match.method)
+        (code, ty) = Core.Compiler.typeinf_code(interp, match, true)
         if code === nothing
             println(io, "  inference not successful")
         else
             empty!(cst)
             resize!(cst, length(code.code))
-            maxcost = Core.Compiler.statement_costs!(cst, code.code, code, Any[match.sparams...], false, params)
+            sptypes = Core.Compiler.VarState[Core.Compiler.VarState(sp, false) for sp in match.sparams]
+            maxcost = Core.Compiler.statement_costs!(cst, code.code, code, sptypes, params)
             nd = ndigits(maxcost)
             irshow_config = IRShow.IRShowConfig() do io, linestart, idx
                 print(io, idx > 0 ? lpad(cst[idx], nd+1) : " "^(nd+1), " ")
@@ -1476,10 +2145,22 @@ end
 
 print_statement_costs(args...; kwargs...) = print_statement_costs(stdout, args...; kwargs...)
 
-function _which(@nospecialize(tt::Type), world=get_world_counter())
-    match, _ = Core.Compiler._findsup(tt, nothing, world)
+function _which(@nospecialize(tt::Type);
+    method_table::Union{Nothing,Core.MethodTable,Core.Compiler.MethodTableView}=nothing,
+    world::UInt=get_world_counter(),
+    raise::Bool=true)
+    world == typemax(UInt) && error("code reflection cannot be used from generated functions")
+    if method_table === nothing
+        table = Core.Compiler.InternalMethodTable(world)
+    elseif method_table isa Core.MethodTable
+        table = Core.Compiler.OverlayMethodTable(world, method_table)
+    else
+        table = method_table
+    end
+    match, = Core.Compiler.findsup(tt, table)
     if match === nothing
-        error("no unique matching method found for the specified argument types")
+        raise && error("no unique matching method found for the specified argument types")
+        return nothing
     end
     return match
 end
@@ -1491,10 +2172,9 @@ Returns the method of `f` (a `Method` object) that would be called for arguments
 
 If `types` is an abstract type, then the method that would be called by `invoke` is returned.
 
-See also: [`parentmodule`](@ref), and `@which` and `@edit` in [`InteractiveUtils`](@ref man-interactive-utils).
+See also: [`parentmodule`](@ref), [`@which`](@ref Main.InteractiveUtils.@which), and [`@edit`](@ref Main.InteractiveUtils.@edit).
 """
 function which(@nospecialize(f), @nospecialize(t))
-    t = to_tuple_type(t)
     tt = signature_type(f, t)
     return which(tt)
 end
@@ -1555,16 +2235,27 @@ parentmodule(f::Function) = parentmodule(typeof(f))
 """
     parentmodule(f::Function, types) -> Module
 
-Determine the module containing a given definition of a generic function.
+Determine the module containing the first method of a generic function `f` matching
+the specified `types`.
 """
 function parentmodule(@nospecialize(f), @nospecialize(types))
     m = methods(f, types)
     if isempty(m)
         error("no matching methods")
     end
-    return first(m).module
+    return parentmodule(first(m))
 end
 
+"""
+    parentmodule(m::Method) -> Module
+
+Return the module in which the given method `m` is defined.
+
+!!! compat "Julia 1.9"
+    Passing a `Method` as an argument requires Julia 1.9 or later.
+"""
+parentmodule(m::Method) = m.module
+
 """
     hasmethod(f, t::Type{<:Tuple}[, kwnames]; world=get_world_counter()) -> Bool
 
@@ -1601,21 +2292,30 @@ julia> hasmethod(g, Tuple{}, (:a, :b, :c, :d))  # g accepts arbitrary kwargs
 true
 ```
 """
-function hasmethod(@nospecialize(f), @nospecialize(t); world::UInt=get_world_counter())
-    t = to_tuple_type(t)
-    t = signature_type(f, t)
-    return ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), t, nothing, world) !== nothing
+function hasmethod(@nospecialize(f), @nospecialize(t))
+    return Core._hasmethod(f, t isa Type ? t : to_tuple_type(t))
+end
+
+function Core.kwcall(kwargs::NamedTuple, ::typeof(hasmethod), @nospecialize(f), @nospecialize(t))
+    world = kwargs.world::UInt # make sure this is the only local, to avoid confusing kwarg_decl()
+    return ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), signature_type(f, t), nothing, world) !== nothing
 end
 
-function hasmethod(@nospecialize(f), @nospecialize(t), kwnames::Tuple{Vararg{Symbol}}; world::UInt=get_world_counter())
-    # TODO: this appears to be doing the wrong queries
-    hasmethod(f, t, world=world) || return false
-    isempty(kwnames) && return true
-    m = which(f, t)
-    kws = kwarg_decl(m)
+function hasmethod(f, t, kwnames::Tuple{Vararg{Symbol}}; world::UInt=get_world_counter())
+    @nospecialize
+    isempty(kwnames) && return hasmethod(f, t; world)
+    t = to_tuple_type(t)
+    ft = Core.Typeof(f)
+    u = unwrap_unionall(t)::DataType
+    tt = rewrap_unionall(Tuple{typeof(Core.kwcall), NamedTuple, ft, u.parameters...}, t)
+    match = ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), tt, nothing, world)
+    match === nothing && return false
+    kws = ccall(:jl_uncompress_argnames, Array{Symbol,1}, (Any,), (match::Method).slot_syms)
+    isempty(kws) && return true # some kwfuncs simply forward everything directly
     for kw in kws
         endswith(String(kw), "...") && return true
     end
+    kwnames = Symbol[kwnames[i] for i in 1:length(kwnames)]
     return issubset(kwnames, kws)
 end
 
@@ -1627,41 +2327,43 @@ as written, called after all missing keyword-arguments have been assigned defaul
 `basemethod` is the method you obtain via [`which`](@ref) or [`methods`](@ref).
 """
 function bodyfunction(basemethod::Method)
-    function getsym(arg)
-        isa(arg, Symbol) && return arg
-        isa(arg, GlobalRef) && return arg.name
-        return nothing
-    end
-
-    fmod = basemethod.module
+    fmod = parentmodule(basemethod)
     # The lowered code for `basemethod` should look like
     #   %1 = mkw(kwvalues..., #self#, args...)
     #        return %1
     # where `mkw` is the name of the "active" keyword body-function.
     ast = uncompressed_ast(basemethod)
-    f = nothing
     if isa(ast, Core.CodeInfo) && length(ast.code) >= 2
         callexpr = ast.code[end-1]
-        if isa(callexpr, Expr) && callexpr.head == :call
+        if isa(callexpr, Expr) && callexpr.head === :call
             fsym = callexpr.args[1]
-            if isa(fsym, Symbol)
-                f = getfield(fmod, fsym)
-            elseif isa(fsym, GlobalRef)
-                newsym = nothing
-                if fsym.mod === Core && fsym.name === :_apply
-                    newsym = getsym(callexpr.args[2])
-                elseif fsym.mod === Core && fsym.name === :_apply_iterate
-                    newsym = getsym(callexpr.args[3])
-                end
-                if isa(newsym, Symbol)
-                    f = getfield(basemethod.module, newsym)::Function
+            while true
+                if isa(fsym, Symbol)
+                    return getfield(fmod, fsym)
+                elseif isa(fsym, GlobalRef)
+                    if fsym.mod === Core && fsym.name === :_apply
+                        fsym = callexpr.args[2]
+                    elseif fsym.mod === Core && fsym.name === :_apply_iterate
+                        fsym = callexpr.args[3]
+                    end
+                    if isa(fsym, Symbol)
+                        return getfield(fmod, fsym)::Function
+                    elseif isa(fsym, GlobalRef)
+                        return getfield(fsym.mod, fsym.name)::Function
+                    elseif isa(fsym, Core.SSAValue)
+                        fsym = ast.code[fsym.id]
+                    else
+                        return nothing
+                    end
+                elseif isa(fsym, Core.SSAValue)
+                    fsym = ast.code[fsym.id]
                 else
-                    f = getfield(fsym.mod, fsym.name)::Function
+                    return nothing
                 end
             end
         end
     end
-    return f
+    return nothing
 end
 
 """
@@ -1708,6 +2410,7 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
             has_bottom_parameter(ti) && return false
         end
         world = get_world_counter()
+        world == typemax(UInt) && return true # intersecting methods are always ambiguous in the generator world, which is true, albeit maybe confusing for some
         min = Ref{UInt}(typemin(UInt))
         max = Ref{UInt}(typemax(UInt))
         has_ambig = Ref{Int32}(0)
@@ -1719,7 +2422,9 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
             end
         end
         # if ml-matches reported the existence of an ambiguity over their
-        # intersection, see if both m1 and m2 may be involved in it
+        # intersection, see if both m1 and m2 seem to be involved in it
+        # (if one was fully dominated by a different method, we want to will
+        # report the other ambiguous pair)
         have_m1 = have_m2 = false
         for match in ms
             match = match::Core.MethodMatch
@@ -1744,18 +2449,14 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
                     minmax = m
                 end
             end
-            if minmax === nothing
+            if minmax === nothing || minmax == m1 || minmax == m2
                 return true
             end
             for match in ms
                 m = match.method
                 m === minmax && continue
-                if match.fully_covers
-                    if !morespecific(minmax.sig, m.sig)
-                        return true
-                    end
-                else
-                    if morespecific(m.sig, minmax.sig)
+                if !morespecific(minmax.sig, m.sig)
+                    if match.fully_covers || !morespecific(m.sig, minmax.sig)
                         return true
                     end
                 end
@@ -1772,12 +2473,12 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
         if ti2 <: m1.sig && ti2 <: m2.sig
             ti = ti2
         elseif ti != ti2
-            # TODO: this would be the correct way to handle this case, but
+            # TODO: this would be the more correct way to handle this case, but
             #       people complained so we don't do it
-            # inner(ti2) || return false
-            return false # report that the type system failed to decide if it was ambiguous by saying they definitely aren't
+            #inner(ti2) || return false # report that the type system failed to decide if it was ambiguous by saying they definitely are
+            return false # report that the type system failed to decide if it was ambiguous by saying they definitely are not
         else
-            return false # report that the type system failed to decide if it was ambiguous by saying they definitely aren't
+            return false # report that the type system failed to decide if it was ambiguous by saying they definitely are not
         end
     end
     inner(ti) || return false
@@ -1795,7 +2496,7 @@ function delete_method(m::Method)
 end
 
 function get_methodtable(m::Method)
-    return ccall(:jl_method_table_for, Any, (Any,), m.sig)::Core.MethodTable
+    return ccall(:jl_method_get_table, Any, (Any,), m)::Core.MethodTable
 end
 
 """
@@ -1831,7 +2532,7 @@ as well to get the properties of an instance of the type.
 
 `propertynames(x)` may return only "public" property names that are part
 of the documented interface of `x`.   If you want it to also return "private"
-fieldnames intended for internal use, pass `true` for the optional second argument.
+property names intended for internal use, pass `true` for the optional second argument.
 REPL tab completion on `x.` shows only the `private=false` properties.
 
 See also: [`hasproperty`](@ref), [`hasfield`](@ref).
@@ -1839,6 +2540,7 @@ See also: [`hasproperty`](@ref), [`hasfield`](@ref).
 propertynames(x) = fieldnames(typeof(x))
 propertynames(m::Module) = names(m)
 propertynames(x, private::Bool) = propertynames(x) # ignore private flag by default
+propertynames(x::Array) = () # hide the fields from tab completion to discourage calling `x.size` instead of `size(x)`, even though they are equivalent
 
 """
     hasproperty(x, s::Symbol)
@@ -1861,6 +2563,12 @@ When an argument's type annotation is omitted, it's replaced with `Core.Typeof`
 To invoke a method where an argument is untyped or explicitly typed as `Any`, annotate the
 argument with `::Any`.
 
+It also supports the following syntax:
+- `@invoke (x::X).f` expands to `invoke(getproperty, Tuple{X,Symbol}, x, :f)`
+- `@invoke (x::X).f = v::V` expands to `invoke(setproperty!, Tuple{X,Symbol,V}, x, :f, v)`
+- `@invoke (xs::Xs)[i::I]` expands to `invoke(getindex, Tuple{Xs,I}, xs, i)`
+- `@invoke (xs::Xs)[i::I] = v::V` expands to `invoke(setindex!, Tuple{Xs,V,I}, xs, v, i)`
+
 # Examples
 
 ```jldoctest
@@ -1869,6 +2577,18 @@ julia> @macroexpand @invoke f(x::T, y)
 
 julia> @invoke 420::Integer % Unsigned
 0x00000000000001a4
+
+julia> @macroexpand @invoke (x::X).f
+:(Core.invoke(Base.getproperty, Tuple{X, Core.Typeof(:f)}, x, :f))
+
+julia> @macroexpand @invoke (x::X).f = v::V
+:(Core.invoke(Base.setproperty!, Tuple{X, Core.Typeof(:f), V}, x, :f, v))
+
+julia> @macroexpand @invoke (xs::Xs)[i::I]
+:(Core.invoke(Base.getindex, Tuple{Xs, I}, xs, i))
+
+julia> @macroexpand @invoke (xs::Xs)[i::I] = v::V
+:(Core.invoke(Base.setindex!, Tuple{Xs, V, I}, xs, v, i))
 ```
 
 !!! compat "Julia 1.7"
@@ -1876,9 +2596,13 @@ julia> @invoke 420::Integer % Unsigned
 
 !!! compat "Julia 1.9"
     This macro is exported as of Julia 1.9.
+
+!!! compat "Julia 1.10"
+    The additional syntax is supported as of Julia 1.10.
 """
 macro invoke(ex)
-    f, args, kwargs = destructure_callex(ex)
+    topmod = Core.Compiler._topmod(__module__) # well, except, do not get it via CC but define it locally
+    f, args, kwargs = destructure_callex(topmod, ex)
     types = Expr(:curly, :Tuple)
     out = Expr(:call, GlobalRef(Core, :invoke))
     isempty(kwargs) || push!(out.args, Expr(:parameters, kwargs...))
@@ -1899,33 +2623,117 @@ end
 """
     @invokelatest f(args...; kwargs...)
 
-Provides a convenient way to call [`Base.invokelatest`](@ref).
+Provides a convenient way to call [`invokelatest`](@ref).
 `@invokelatest f(args...; kwargs...)` will simply be expanded into
 `Base.invokelatest(f, args...; kwargs...)`.
 
+It also supports the following syntax:
+- `@invokelatest x.f` expands to `Base.invokelatest(getproperty, x, :f)`
+- `@invokelatest x.f = v` expands to `Base.invokelatest(setproperty!, x, :f, v)`
+- `@invokelatest xs[i]` expands to `Base.invokelatest(getindex, xs, i)`
+- `@invokelatest xs[i] = v` expands to `Base.invokelatest(setindex!, xs, v, i)`
+
+```jldoctest
+julia> @macroexpand @invokelatest f(x; kw=kwv)
+:(Base.invokelatest(f, x; kw = kwv))
+
+julia> @macroexpand @invokelatest x.f
+:(Base.invokelatest(Base.getproperty, x, :f))
+
+julia> @macroexpand @invokelatest x.f = v
+:(Base.invokelatest(Base.setproperty!, x, :f, v))
+
+julia> @macroexpand @invokelatest xs[i]
+:(Base.invokelatest(Base.getindex, xs, i))
+
+julia> @macroexpand @invokelatest xs[i] = v
+:(Base.invokelatest(Base.setindex!, xs, v, i))
+```
+
 !!! compat "Julia 1.7"
     This macro requires Julia 1.7 or later.
+
+!!! compat "Julia 1.9"
+    Prior to Julia 1.9, this macro was not exported, and was called as `Base.@invokelatest`.
+
+!!! compat "Julia 1.10"
+    The additional `x.f` and `xs[i]` syntax requires Julia 1.10.
 """
 macro invokelatest(ex)
-    f, args, kwargs = destructure_callex(ex)
-    return esc(:($(GlobalRef(@__MODULE__, :invokelatest))($(f), $(args...); $(kwargs...))))
+    topmod = Core.Compiler._topmod(__module__) # well, except, do not get it via CC but define it locally
+    f, args, kwargs = destructure_callex(topmod, ex)
+    out = Expr(:call, GlobalRef(Base, :invokelatest))
+    isempty(kwargs) || push!(out.args, Expr(:parameters, kwargs...))
+    push!(out.args, f)
+    append!(out.args, args)
+    return esc(out)
 end
 
-function destructure_callex(ex)
-    isexpr(ex, :call) || throw(ArgumentError("a call expression f(args...; kwargs...) should be given"))
+function destructure_callex(topmod::Module, @nospecialize(ex))
+    function flatten(xs)
+        out = Any[]
+        for x in xs
+            if isexpr(x, :tuple)
+                append!(out, x.args)
+            else
+                push!(out, x)
+            end
+        end
+        return out
+    end
 
-    f = first(ex.args)
-    args = []
-    kwargs = []
-    for x in ex.args[2:end]
-        if isexpr(x, :parameters)
-            append!(kwargs, x.args)
-        elseif isexpr(x, :kw)
-            push!(kwargs, x)
+    kwargs = Any[]
+    if isexpr(ex, :call) # `f(args...)`
+        f = first(ex.args)
+        args = Any[]
+        for x in ex.args[2:end]
+            if isexpr(x, :parameters)
+                append!(kwargs, x.args)
+            elseif isexpr(x, :kw)
+                push!(kwargs, x)
+            else
+                push!(args, x)
+            end
+        end
+    elseif isexpr(ex, :.)   # `x.f`
+        f = GlobalRef(topmod, :getproperty)
+        args = flatten(ex.args)
+    elseif isexpr(ex, :ref) # `x[i]`
+        f = GlobalRef(topmod, :getindex)
+        args = flatten(ex.args)
+    elseif isexpr(ex, :(=)) # `x.f = v` or `x[i] = v`
+        lhs, rhs = ex.args
+        if isexpr(lhs, :.)
+            f = GlobalRef(topmod, :setproperty!)
+            args = flatten(Any[lhs.args..., rhs])
+        elseif isexpr(lhs, :ref)
+            f = GlobalRef(topmod, :setindex!)
+            args = flatten(Any[lhs.args[1], rhs, lhs.args[2]])
         else
-            push!(args, x)
+            throw(ArgumentError("expected a `setproperty!` expression `x.f = v` or `setindex!` expression `x[i] = v`"))
         end
+    else
+        throw(ArgumentError("expected a `:call` expression `f(args...; kwargs...)`"))
     end
-
     return f, args, kwargs
 end
+
+"""
+    Base.generating_output([incremental::Bool])::Bool
+
+Return `true` if the current process is being used to pre-generate a
+code cache via any of the `--output-*` command line arguments. The optional
+`incremental` argument further specifies the precompilation mode: when set
+to `true`, the function will return `true` only for package precompilation;
+when set to `false`, it will return `true` only for system image generation.
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+"""
+function generating_output(incremental::Union{Bool,Nothing}=nothing)
+    ccall(:jl_generating_output, Cint, ()) == 0 && return false
+    if incremental !== nothing
+        JLOptions().incremental == incremental || return false
+    end
+    return true
+end
diff --git a/base/refpointer.jl b/base/refpointer.jl
index cd179c87b30d5..ce2c6fc00560d 100644
--- a/base/refpointer.jl
+++ b/base/refpointer.jl
@@ -42,9 +42,18 @@ A `C_NULL` instance of `Ptr` can be passed to a `ccall` `Ref` argument to initia
 # Examples
 
 ```jldoctest
-julia> Ref(5)
+julia> r = Ref(5) # Create a Ref with an initial value
 Base.RefValue{Int64}(5)
 
+julia> r[] # Getting a value from a Ref
+5
+
+julia> r[] = 7 # Storing a new value in a Ref
+7
+
+julia> r # The Ref now contains 7
+Base.RefValue{Int64}(7)
+
 julia> isa.(Ref([1,2,3]), [Array, Dict, Int]) # Treat reference values as scalar during broadcasting
 3-element BitVector:
  1
@@ -65,9 +74,6 @@ julia> Ref{Int64}()[]; # A reference to a bitstype refers to an undetermined val
 
 julia> isassigned(Ref{Int64}()) # A reference to a bitstype is always assigned
 true
-
-julia> Ref{Int64}(0)[] == 0 # Explicitly give a value for a bitstype reference
-true
 ```
 """
 Ref
@@ -83,6 +89,7 @@ else
     primitive type Cwstring 32 end
 end
 
+
 ### General Methods for Ref{T} type
 
 eltype(x::Type{<:Ref{T}}) where {T} = @isdefined(T) ? T : Any
@@ -101,7 +108,7 @@ IteratorSize(::Type{<:Ref}) = HasShape{0}()
 unsafe_convert(::Type{Ref{T}}, x::Ref{T}) where {T} = unsafe_convert(Ptr{T}, x)
 unsafe_convert(::Type{Ref{T}}, x) where {T} = unsafe_convert(Ptr{T}, x)
 
-convert(::Type{Ref{T}}, x) where {T} = RefValue{T}(x)
+convert(::Type{Ref{T}}, x) where {T} = RefValue{T}(x)::RefValue{T}
 
 ### Methods for a Ref object that is backed by an array at index i
 struct RefArray{T,A<:AbstractArray{T},R} <: Ref{T}
@@ -112,6 +119,8 @@ struct RefArray{T,A<:AbstractArray{T},R} <: Ref{T}
 end
 RefArray(x::AbstractArray{T}, i::Int, roots::Any) where {T} = RefArray{T,typeof(x),Any}(x, i, roots)
 RefArray(x::AbstractArray{T}, i::Int=1, roots::Nothing=nothing) where {T} = RefArray{T,typeof(x),Nothing}(x, i, nothing)
+RefArray(x::AbstractArray{T}, i::Integer, roots::Any) where {T} = RefArray{T,typeof(x),Any}(x, Int(i), roots)
+RefArray(x::AbstractArray{T}, i::Integer, roots::Nothing=nothing) where {T} = RefArray{T,typeof(x),Nothing}(x, Int(i), nothing)
 convert(::Type{Ref{T}}, x::AbstractArray{T}) where {T} = RefArray(x, 1)
 
 function unsafe_convert(P::Union{Type{Ptr{T}},Type{Ptr{Cvoid}}}, b::RefArray{T})::P where T
@@ -139,13 +148,14 @@ if is_primary_base_module
     Ref(x::Ptr{T}, i::Integer) where {T} = x + (i - 1) * Core.sizeof(T)
 
     # convert Arrays to pointer arrays for ccall
-    function Ref{P}(a::Array{<:Union{Ptr,Cwstring,Cstring}}) where P<:Union{Ptr,Cwstring,Cstring}
-        return RefArray(a) # effectively a no-op
-    end
+    # For example `["a", "b"]` to Ptr{Cstring} for `char **argv`
     function Ref{P}(a::Array{T}) where P<:Union{Ptr,Cwstring,Cstring} where T
-        if (!isbitstype(T) && T <: eltype(P))
+        if P == T
+            return getfield(a, :ref)
+        elseif (isbitstype(T) ? T <: Ptr || T <: Union{Cwstring,Cstring} : T <: eltype(P))
             # this Array already has the right memory layout for the requested Ref
-            return RefArray(a,1,false) # root something, so that this function is type-stable
+            # but the wrong eltype for the constructor
+            return RefArray{P,typeof(a),Nothing}(a, 1, nothing) # effectively a no-op
         else
             ptrs = Vector{P}(undef, length(a)+1)
             roots = Vector{Any}(undef, length(a))
@@ -155,14 +165,14 @@ if is_primary_base_module
                 roots[i] = root
             end
             ptrs[length(a)+1] = C_NULL
-            return RefArray(ptrs,1,roots)
+            return RefArray{P,typeof(ptrs),typeof(roots)}(ptrs, 1, roots)
         end
     end
     Ref(x::AbstractArray, i::Integer) = RefArray(x, i)
 end
 
-cconvert(::Type{Ptr{P}}, a::Array{<:Ptr}) where {P<:Ptr} = a
-cconvert(::Type{Ref{P}}, a::Array{<:Ptr}) where {P<:Ptr} = a
+cconvert(::Type{Ptr{P}}, a::Array{P}) where {P<:Union{Ptr,Cwstring,Cstring}} = getfield(a, :ref)
+cconvert(::Type{Ref{P}}, a::Array{P}) where {P<:Union{Ptr,Cwstring,Cstring}} = getfield(a, :ref)
 cconvert(::Type{Ptr{P}}, a::Array) where {P<:Union{Ptr,Cwstring,Cstring}} = Ref{P}(a)
 cconvert(::Type{Ref{P}}, a::Array) where {P<:Union{Ptr,Cwstring,Cstring}} = Ref{P}(a)
 
diff --git a/base/refvalue.jl b/base/refvalue.jl
index 7cbb651d41aee..000088ff0ce76 100644
--- a/base/refvalue.jl
+++ b/base/refvalue.jl
@@ -45,7 +45,10 @@ function unsafe_convert(P::Union{Type{Ptr{T}},Type{Ptr{Cvoid}}}, b::RefValue{T})
         # If it is actually an immutable, then we can't take it's pointer directly
         # Instead, explicitly load the pointer from the `RefValue`,
         # which also ensures this returns same pointer as the one rooted in the `RefValue` object.
-        p = pointerref(Ptr{Ptr{Cvoid}}(pointer_from_objref(b)), 1, Core.sizeof(Ptr{Cvoid}))
+        p = atomic_pointerref(Ptr{Ptr{Cvoid}}(pointer_from_objref(b)), :monotonic)
+    end
+    if p == C_NULL
+        throw(UndefRefError())
     end
     return p
 end
diff --git a/base/regex.jl b/base/regex.jl
index 27e0391f8a6c8..78eefa1741b0c 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -8,7 +8,7 @@ const DEFAULT_COMPILER_OPTS = PCRE.UTF | PCRE.MATCH_INVALID_UTF | PCRE.ALT_BSUX
 const DEFAULT_MATCH_OPTS = PCRE.NO_UTF_CHECK
 
 """
-    Regex(pattern[, flags])
+    Regex(pattern[, flags]) <: AbstractPattern
 
 A type representing a regular expression. `Regex` objects can be used to match strings
 with [`match`](@ref).
@@ -46,19 +46,24 @@ mutable struct Regex <: AbstractPattern
 end
 
 function Regex(pattern::AbstractString, flags::AbstractString)
-    options = DEFAULT_COMPILER_OPTS
+    compile_options = DEFAULT_COMPILER_OPTS
+    match_options = DEFAULT_MATCH_OPTS
     for f in flags
         if f == 'a'
-            options &= ~PCRE.UCP
+            # instruct pcre2 to treat the strings as simple bytes (aka "ASCII"), not char encodings
+            compile_options &= ~PCRE.UCP  # user can re-enable with (*UCP)
+            compile_options &= ~PCRE.UTF # user can re-enable with (*UTF)
+            compile_options &= ~PCRE.MATCH_INVALID_UTF # this would force on UTF
+            match_options &= ~PCRE.NO_UTF_CHECK # if the user did force on UTF, we should check it for safety
         else
-            options |= f=='i' ? PCRE.CASELESS  :
-                       f=='m' ? PCRE.MULTILINE :
-                       f=='s' ? PCRE.DOTALL    :
-                       f=='x' ? PCRE.EXTENDED  :
-                       throw(ArgumentError("unknown regex flag: $f"))
+            compile_options |= f=='i' ? PCRE.CASELESS  :
+                               f=='m' ? PCRE.MULTILINE :
+                               f=='s' ? PCRE.DOTALL    :
+                               f=='x' ? PCRE.EXTENDED  :
+                               throw(ArgumentError("unknown regex flag: $f"))
         end
     end
-    Regex(pattern, options, DEFAULT_MATCH_OPTS)
+    Regex(pattern, compile_options, match_options)
 end
 Regex(pattern::AbstractString) = Regex(pattern, DEFAULT_COMPILER_OPTS, DEFAULT_MATCH_OPTS)
 
@@ -94,11 +99,17 @@ listed after the ending quote, to change its behaviour:
 - `m` treats the `^` and `\$` tokens as matching the start and end of individual lines, as
   opposed to the whole string.
 - `s` allows the `.` modifier to match newlines.
-- `x` enables "comment mode": whitespace is enabled except when escaped with `\\`, and `#`
-  is treated as starting a comment.
-- `a` disables `UCP` mode (enables ASCII mode). By default `\\B`, `\\b`, `\\D`, `\\d`, `\\S`,
-  `\\s`, `\\W`, `\\w`, etc. match based on Unicode character properties. With this option,
-  these sequences only match ASCII characters.
+- `x` enables "free-spacing mode": whitespace between regex tokens is ignored except when escaped with `\\`,
+   and `#` in the regex is treated as starting a comment (which is ignored to the line ending).
+- `a` enables ASCII mode (disables `UTF` and `UCP` modes). By default `\\B`, `\\b`, `\\D`,
+  `\\d`, `\\S`, `\\s`, `\\W`, `\\w`, etc. match based on Unicode character properties. With
+  this option, these sequences only match ASCII characters. This includes `\\u` also, which
+  will emit the specified character value directly as a single byte, and not attempt to
+  encode it into UTF-8. Importantly, this option allows matching against invalid UTF-8
+  strings, by treating both matcher and target as simple bytes (as if they were ISO/IEC
+  8859-1 / Latin-1 bytes) instead of as character encodings. In this case, this option is
+  often combined with `s`. This option can be further refined by starting the pattern with
+  (*UCP) or (*UTF).
 
 See [`Regex`](@ref) if interpolation is needed.
 
@@ -112,23 +123,38 @@ This regex has the first three flags enabled.
 macro r_str(pattern, flags...) Regex(pattern, flags...) end
 
 function show(io::IO, re::Regex)
-    imsxa = PCRE.CASELESS|PCRE.MULTILINE|PCRE.DOTALL|PCRE.EXTENDED|PCRE.UCP
+    imsx = PCRE.CASELESS|PCRE.MULTILINE|PCRE.DOTALL|PCRE.EXTENDED
+    ac = PCRE.UTF|PCRE.MATCH_INVALID_UTF|PCRE.UCP
+    am = PCRE.NO_UTF_CHECK
     opts = re.compile_options
-    if (opts & ~imsxa) == (DEFAULT_COMPILER_OPTS & ~imsxa)
+    mopts = re.match_options
+    default = ((opts & ~imsx) | ac) == DEFAULT_COMPILER_OPTS
+    if default
+       if (opts & ac) == ac
+           default = mopts == DEFAULT_MATCH_OPTS
+       elseif (opts & ac) == 0
+           default = mopts == (DEFAULT_MATCH_OPTS & ~am)
+       else
+           default = false
+       end
+   end
+    if default
         print(io, "r\"")
         escape_raw_string(io, re.pattern)
         print(io, "\"")
-        if (opts & PCRE.CASELESS ) != 0; print(io, 'i'); end
-        if (opts & PCRE.MULTILINE) != 0; print(io, 'm'); end
-        if (opts & PCRE.DOTALL   ) != 0; print(io, 's'); end
-        if (opts & PCRE.EXTENDED ) != 0; print(io, 'x'); end
-        if (opts & PCRE.UCP      ) == 0; print(io, 'a'); end
+        if (opts & PCRE.CASELESS ) != 0; print(io, "i"); end
+        if (opts & PCRE.MULTILINE) != 0; print(io, "m"); end
+        if (opts & PCRE.DOTALL   ) != 0; print(io, "s"); end
+        if (opts & PCRE.EXTENDED ) != 0; print(io, "x"); end
+        if (opts & ac            ) == 0; print(io, "a"); end
     else
         print(io, "Regex(")
         show(io, re.pattern)
-        print(io, ',')
+        print(io, ", ")
         show(io, opts)
-        print(io, ')')
+        print(io, ", ")
+        show(io, mopts)
+        print(io, ")")
     end
 end
 
@@ -139,9 +165,9 @@ in a string using an `AbstractPattern`.
 abstract type AbstractMatch end
 
 """
-    RegexMatch
+    RegexMatch <: AbstractMatch
 
-A type representing a single match to a `Regex` found in a string.
+A type representing a single match to a [`Regex`](@ref) found in a string.
 Typically created from the [`match`](@ref) function.
 
 The `match` field stores the substring of the entire matched string.
@@ -167,26 +193,37 @@ See [`keys`](@ref keys(::RegexMatch)) for more information.
 julia> m = match(r"(?<hour>\\d+):(?<minute>\\d+)(am|pm)?", "11:30 in the morning")
 RegexMatch("11:30", hour="11", minute="30", 3=nothing)
 
-julia> hr, min, ampm = m;
+julia> m.match
+"11:30"
+
+julia> m.captures
+3-element Vector{Union{Nothing, SubString{String}}}:
+ "11"
+ "30"
+ nothing
 
-julia> hr
-"11"
 
 julia> m["minute"]
 "30"
 
-julia> m.match
-"11:30"
+julia> hr, min, ampm = m; # destructure capture groups by iteration
+
+julia> hr
+"11"
 ```
 """
-struct RegexMatch <: AbstractMatch
-    match::SubString{String}
-    captures::Vector{Union{Nothing,SubString{String}}}
+struct RegexMatch{S<:AbstractString} <: AbstractMatch
+    match::SubString{S}
+    captures::Vector{Union{Nothing,SubString{S}}}
     offset::Int
     offsets::Vector{Int}
     regex::Regex
 end
 
+RegexMatch(match::SubString{S}, captures::Vector{Union{Nothing,SubString{S}}},
+           offset::Union{Int, UInt}, offsets::Vector{Int}, regex::Regex) where {S<:AbstractString} =
+    RegexMatch{S}(match, captures, offset, offsets, regex)
+
 """
     keys(m::RegexMatch) -> Vector
 
@@ -197,8 +234,8 @@ That is, `idx` will be in the return value even if `m[idx] == nothing`.
 Unnamed capture groups will have integer keys corresponding to their index.
 Named capture groups will have string keys.
 
-!!! compat "Julia 1.6"
-    This method was added in Julia 1.6
+!!! compat "Julia 1.7"
+    This method was added in Julia 1.7
 
 # Examples
 ```jldoctest
@@ -236,19 +273,17 @@ end
 
 # Capture group extraction
 getindex(m::RegexMatch, idx::Integer) = m.captures[idx]
-function getindex(m::RegexMatch, name::Symbol)
+function getindex(m::RegexMatch, name::Union{AbstractString,Symbol})
     idx = PCRE.substring_number_from_name(m.regex.regex, name)
     idx <= 0 && error("no capture group named $name found in regex")
     m[idx]
 end
-getindex(m::RegexMatch, name::AbstractString) = m[Symbol(name)]
 
 haskey(m::RegexMatch, idx::Integer) = idx in eachindex(m.captures)
-function haskey(m::RegexMatch, name::Symbol)
+function haskey(m::RegexMatch, name::Union{AbstractString,Symbol})
     idx = PCRE.substring_number_from_name(m.regex.regex, name)
     return idx > 0
 end
-haskey(m::RegexMatch, name::AbstractString) = haskey(m, Symbol(name))
 
 iterate(m::RegexMatch, args...) = iterate(m.captures, args...)
 length(m::RegexMatch) = length(m.captures)
@@ -392,15 +427,42 @@ function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer,
     return result
 end
 
+function _annotatedmatch(m::RegexMatch{S}, str::AnnotatedString{S}) where {S<:AbstractString}
+    RegexMatch{AnnotatedString{S}}(
+        (@inbounds SubString{AnnotatedString{S}}(
+            str, m.match.offset, m.match.ncodeunits, Val(:noshift))),
+        Union{Nothing,SubString{AnnotatedString{S}}}[
+            if !isnothing(cap)
+                (@inbounds SubString{AnnotatedString{S}}(
+                    str, cap.offset, cap.ncodeunits, Val(:noshift)))
+            end for cap in m.captures],
+        m.offset, m.offsets, m.regex)
+end
+
+function match(re::Regex, str::AnnotatedString)
+    m = match(re, str.string)
+    if !isnothing(m)
+        _annotatedmatch(m, str)
+    end
+end
+
+function match(re::Regex, str::AnnotatedString, idx::Integer, add_opts::UInt32=UInt32(0))
+    m = match(re, str.string, idx, add_opts)
+    if !isnothing(m)
+        _annotatedmatch(m, str)
+    end
+end
+
 match(r::Regex, s::AbstractString) = match(r, s, firstindex(s))
 match(r::Regex, s::AbstractString, i::Integer) = throw(ArgumentError(
-    "regex matching is only available for the String type; use String(s) to convert"
+    "regex matching is only available for the String and AnnotatedString types; use String(s) to convert"
 ))
 
 findnext(re::Regex, str::Union{String,SubString}, idx::Integer) = _findnext_re(re, str, idx, C_NULL)
 
 # TODO: return only start index and update deprecation
-function _findnext_re(re::Regex, str::Union{String,SubString}, idx::Integer, match_data::Ptr{Cvoid})
+# duck-type str so that external UTF-8 string packages like StringViews can hook in
+function _findnext_re(re::Regex, str, idx::Integer, match_data::Ptr{Cvoid})
     if idx > nextind(str,lastindex(str))
         throw(BoundsError())
     end
@@ -466,6 +528,18 @@ original string, otherwise they must be from disjoint character ranges.
 
 !!! compat "Julia 1.7"
       Using a character as the pattern requires at least Julia 1.7.
+
+# Examples
+```jldoctest
+julia> count('a', "JuliaLang")
+2
+
+julia> count(r"a(.)a", "cabacabac", overlap=true)
+3
+
+julia> count(r"a(.)a", "cabacabac")
+2
+```
 """
 function count(t::Union{AbstractChar,AbstractString,AbstractPattern}, s::AbstractString; overlap::Bool=false)
     n = 0
@@ -482,7 +556,7 @@ function count(t::Union{AbstractChar,AbstractString,AbstractPattern}, s::Abstrac
 end
 
 """
-    SubstitutionString(substr)
+    SubstitutionString(substr) <: AbstractString
 
 Stores the given string `substr` as a `SubstitutionString`, for use in regular expression
 substitutions. Most commonly constructed using the [`@s_str`](@ref) macro.
@@ -627,18 +701,19 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
     end
 end
 
-struct RegexMatchIterator
+struct RegexMatchIterator{S <: AbstractString}
     regex::Regex
-    string::String
+    string::S
     overlap::Bool
 
-    function RegexMatchIterator(regex::Regex, string::AbstractString, ovr::Bool=false)
-        new(regex, string, ovr)
-    end
+    RegexMatchIterator(regex::Regex, string::AbstractString, ovr::Bool=false) =
+        new{String}(regex, String(string), ovr)
+    RegexMatchIterator(regex::Regex, string::AnnotatedString, ovr::Bool=false) =
+        new{AnnotatedString{String}}(regex, AnnotatedString(String(string.string), string.annotations), ovr)
 end
 compile(itr::RegexMatchIterator) = (compile(itr.regex); itr)
-eltype(::Type{RegexMatchIterator}) = RegexMatch
-IteratorSize(::Type{RegexMatchIterator}) = SizeUnknown()
+eltype(::Type{<:RegexMatchIterator}) = RegexMatch
+IteratorSize(::Type{<:RegexMatchIterator}) = SizeUnknown()
 
 function iterate(itr::RegexMatchIterator, (offset,prevempty)=(1,false))
     opts_nonempty = UInt32(PCRE.ANCHORED | PCRE.NOTEMPTY_ATSTART)
@@ -683,7 +758,7 @@ julia> rx = r"a.a"
 r"a.a"
 
 julia> m = eachmatch(rx, "a1a2a3a")
-Base.RegexMatchIterator(r"a.a", "a1a2a3a", false)
+Base.RegexMatchIterator{String}(r"a.a", "a1a2a3a", false)
 
 julia> collect(m)
 2-element Vector{RegexMatch}:
@@ -801,7 +876,7 @@ end
 
 
 """
-    ^(s::Regex, n::Integer)
+    ^(s::Regex, n::Integer) -> Regex
 
 Repeat a regex `n` times.
 
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
index e2253736e4fe7..23bfb38a28654 100644
--- a/base/reinterpretarray.jl
+++ b/base/reinterpretarray.jl
@@ -25,6 +25,28 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
     end
 
     global reinterpret
+
+    @doc """
+        reinterpret(T::DataType, A::AbstractArray)
+
+    Construct a view of the array with the same binary data as the given
+    array, but with `T` as element type.
+
+    This function also works on "lazy" array whose elements are not computed until they are explicitly retrieved.
+    For instance, `reinterpret` on the range `1:6` works similarly as on the dense vector `collect(1:6)`:
+
+    ```jldoctest
+    julia> reinterpret(Float32, UInt32[1 2 3 4 5])
+    1×5 reinterpret(Float32, ::Matrix{UInt32}):
+     1.0f-45  3.0f-45  4.0f-45  6.0f-45  7.0f-45
+
+    julia> reinterpret(Complex{Int}, 1:6)
+    3-element reinterpret(Complex{$Int}, ::UnitRange{$Int}):
+     1 + 2im
+     3 + 4im
+     5 + 6im
+    ```
+    """
     function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
         function thrownonint(S::Type, T::Type, dim)
             @noinline
@@ -152,23 +174,15 @@ strides(a::Union{DenseArray,StridedReshapedArray,StridedReinterpretArray}) = siz
 stride(A::Union{DenseArray,StridedReshapedArray,StridedReinterpretArray}, k::Integer) =
     k ≤ ndims(A) ? strides(A)[k] : length(A)
 
-function strides(a::ReshapedReinterpretArray)
-    ap = parent(a)
-    els, elp = elsize(a), elsize(ap)
-    stp = strides(ap)
-    els == elp && return stp
-    els < elp && return (1, _checked_strides(stp, els, elp)...)
-    stp[1] == 1 || throw(ArgumentError("Parent must be contiguous in the 1st dimension!"))
-    return _checked_strides(tail(stp), els, elp)
-end
-
-function strides(a::NonReshapedReinterpretArray)
-    ap = parent(a)
-    els, elp = elsize(a), elsize(ap)
-    stp = strides(ap)
-    els == elp && return stp
+function strides(a::ReinterpretArray{T,<:Any,S,<:AbstractArray{S},IsReshaped}) where {T,S,IsReshaped}
+    _checkcontiguous(Bool, a) && return size_to_strides(1, size(a)...)
+    stp = strides(parent(a))
+    els, elp = sizeof(T), sizeof(S)
+    els == elp && return stp # 0dim parent is also handled here.
+    IsReshaped && els < elp && return (1, _checked_strides(stp, els, elp)...)
     stp[1] == 1 || throw(ArgumentError("Parent must be contiguous in the 1st dimension!"))
-    return (1, _checked_strides(tail(stp), els, elp)...)
+    st′ = _checked_strides(tail(stp), els, elp)
+    return IsReshaped ? st′ : (1, st′...)
 end
 
 @inline function _checked_strides(stp::Tuple, els::Integer, elp::Integer)
@@ -333,10 +347,12 @@ function axes(a::ReshapedReinterpretArray{T,N,S} where {N}) where {T,S}
 end
 axes(a::NonReshapedReinterpretArray{T,0}) where {T} = ()
 
+has_offset_axes(a::ReinterpretArray) = has_offset_axes(a.parent)
+
 elsize(::Type{<:ReinterpretArray{T}}) where {T} = sizeof(T)
-unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = Ptr{T}(unsafe_convert(Ptr{S},a.parent))
+cconvert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = cconvert(Ptr{S}, a.parent)
 
-@inline @propagate_inbounds function getindex(a::NonReshapedReinterpretArray{T,0,S}) where {T,S}
+@propagate_inbounds function getindex(a::NonReshapedReinterpretArray{T,0,S}) where {T,S}
     if isprimitivetype(T) && isprimitivetype(S)
         reinterpret(T, a.parent[])
     else
@@ -344,15 +360,24 @@ unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} =
     end
 end
 
-@inline @propagate_inbounds getindex(a::ReinterpretArray) = a[firstindex(a)]
+check_ptr_indexable(a::ReinterpretArray, sz = elsize(a)) = check_ptr_indexable(parent(a), sz)
+check_ptr_indexable(a::ReshapedArray, sz) = check_ptr_indexable(parent(a), sz)
+check_ptr_indexable(a::FastContiguousSubArray, sz) = check_ptr_indexable(parent(a), sz)
+check_ptr_indexable(a::Array, sz) = sizeof(eltype(a)) !== sz
+check_ptr_indexable(a::Memory, sz) = true
+check_ptr_indexable(a::AbstractArray, sz) = false
 
-@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
+@propagate_inbounds getindex(a::ReinterpretArray) = a[firstindex(a)]
+
+@propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
     check_readable(a)
+    check_ptr_indexable(a) && return _getindex_ptr(a, inds...)
     _getindex_ra(a, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, i::Int) where {T,N,S}
+@propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, i::Int) where {T,N,S}
     check_readable(a)
+    check_ptr_indexable(a) && return _getindex_ptr(a, i)
     if isa(IndexStyle(a), IndexLinear)
         return _getindex_ra(a, i, ())
     end
@@ -362,18 +387,22 @@ end
     isempty(inds) ? _getindex_ra(a, 1, ()) : _getindex_ra(a, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function getindex(a::ReshapedReinterpretArray{T,N,S}, ind::SCartesianIndex2) where {T,N,S}
+@propagate_inbounds function getindex(a::ReshapedReinterpretArray{T,N,S}, ind::SCartesianIndex2) where {T,N,S}
     check_readable(a)
     s = Ref{S}(a.parent[ind.j])
-    GC.@preserve s begin
-        tptr = Ptr{T}(unsafe_convert(Ref{S}, s))
-        return unsafe_load(tptr, ind.i)
-    end
+    tptr = Ptr{T}(unsafe_convert(Ref{S}, s))
+    GC.@preserve s return unsafe_load(tptr, ind.i)
 end
 
-@inline _memcpy!(dst, src, n) = ccall(:memcpy, Cvoid, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), dst, src, n)
+@inline function _getindex_ptr(a::ReinterpretArray{T}, inds...) where {T}
+    @boundscheck checkbounds(a, inds...)
+    li = _to_linear_index(a, inds...)
+    ap = cconvert(Ptr{T}, a)
+    p = unsafe_convert(Ptr{T}, ap) + sizeof(T) * (li - 1)
+    GC.@preserve ap return unsafe_load(p)
+end
 
-@inline @propagate_inbounds function _getindex_ra(a::NonReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
+@propagate_inbounds function _getindex_ra(a::NonReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
         if issingletontype(T) # singleton types
@@ -418,7 +447,7 @@ end
                 while nbytes_copied < sizeof(T)
                     s[] = a.parent[ind_start + i, tailinds...]
                     nb = min(sizeof(S) - sidx, sizeof(T)-nbytes_copied)
-                    _memcpy!(tptr + nbytes_copied, sptr + sidx, nb)
+                    memcpy(tptr + nbytes_copied, sptr + sidx, nb)
                     nbytes_copied += nb
                     sidx = 0
                     i += 1
@@ -429,7 +458,7 @@ end
     end
 end
 
-@inline @propagate_inbounds function _getindex_ra(a::ReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
+@propagate_inbounds function _getindex_ra(a::ReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
         if issingletontype(T) # singleton types
@@ -476,7 +505,7 @@ end
     end
 end
 
-@inline @propagate_inbounds function setindex!(a::NonReshapedReinterpretArray{T,0,S}, v) where {T,S}
+@propagate_inbounds function setindex!(a::NonReshapedReinterpretArray{T,0,S}, v) where {T,S}
     if isprimitivetype(S) && isprimitivetype(T)
         a.parent[] = reinterpret(S, v)
         return a
@@ -484,15 +513,17 @@ end
     setindex!(a, v, firstindex(a))
 end
 
-@inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = setindex!(a, v, firstindex(a))
+@propagate_inbounds setindex!(a::ReinterpretArray, v) = setindex!(a, v, firstindex(a))
 
-@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S}
+@propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S}
     check_writable(a)
+    check_ptr_indexable(a) && return _setindex_ptr!(a, v, inds...)
     _setindex_ra!(a, v, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, i::Int) where {T,N,S}
+@propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, i::Int) where {T,N,S}
     check_writable(a)
+    check_ptr_indexable(a) && return _setindex_ptr!(a, v, i)
     if isa(IndexStyle(a), IndexLinear)
         return _setindex_ra!(a, v, i, ())
     end
@@ -500,7 +531,7 @@ end
     _setindex_ra!(a, v, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function setindex!(a::ReshapedReinterpretArray{T,N,S}, v, ind::SCartesianIndex2) where {T,N,S}
+@propagate_inbounds function setindex!(a::ReshapedReinterpretArray{T,N,S}, v, ind::SCartesianIndex2) where {T,N,S}
     check_writable(a)
     v = convert(T, v)::T
     s = Ref{S}(a.parent[ind.j])
@@ -512,7 +543,16 @@ end
     return a
 end
 
-@inline @propagate_inbounds function _setindex_ra!(a::NonReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
+@inline function _setindex_ptr!(a::ReinterpretArray{T}, v, inds...) where {T}
+    @boundscheck checkbounds(a, inds...)
+    li = _to_linear_index(a, inds...)
+    ap = cconvert(Ptr{T}, a)
+    p = unsafe_convert(Ptr{T}, ap) + sizeof(T) * (li - 1)
+    GC.@preserve ap unsafe_store!(p, v)
+    return a
+end
+
+@propagate_inbounds function _setindex_ra!(a::NonReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
     v = convert(T, v)::T
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
@@ -558,7 +598,7 @@ end
                 if sidx != 0
                     s[] = a.parent[ind_start + i, tailinds...]
                     nb = min((sizeof(S) - sidx) % UInt, sizeof(T) % UInt)
-                    _memcpy!(sptr + sidx, tptr, nb)
+                    memcpy(sptr + sidx, tptr, nb)
                     nbytes_copied += nb
                     a.parent[ind_start + i, tailinds...] = s[]
                     i += 1
@@ -567,7 +607,7 @@ end
                 # Deal with the main body of elements
                 while nbytes_copied < sizeof(T) && (sizeof(T) - nbytes_copied) > sizeof(S)
                     nb = min(sizeof(S), sizeof(T) - nbytes_copied)
-                    _memcpy!(sptr, tptr + nbytes_copied, nb)
+                    memcpy(sptr, tptr + nbytes_copied, nb)
                     nbytes_copied += nb
                     a.parent[ind_start + i, tailinds...] = s[]
                     i += 1
@@ -576,7 +616,7 @@ end
                 if nbytes_copied < sizeof(T)
                     s[] = a.parent[ind_start + i, tailinds...]
                     nb = min(sizeof(S), sizeof(T) - nbytes_copied)
-                    _memcpy!(sptr, tptr + nbytes_copied, nb)
+                    memcpy(sptr, tptr + nbytes_copied, nb)
                     a.parent[ind_start + i, tailinds...] = s[]
                 end
             end
@@ -585,7 +625,7 @@ end
     return a
 end
 
-@inline @propagate_inbounds function _setindex_ra!(a::ReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
+@propagate_inbounds function _setindex_ra!(a::ReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
     v = convert(T, v)::T
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
@@ -637,8 +677,8 @@ end
 
 # Padding
 struct Padding
-    offset::Int
-    size::Int
+    offset::Int # 0-indexed offset of the next valid byte; sizeof(T) indicates trailing padding
+    size::Int   # bytes of padding before a valid byte
 end
 function intersect(p1::Padding, p2::Padding)
     start = max(p1.offset, p2.offset)
@@ -646,7 +686,7 @@ function intersect(p1::Padding, p2::Padding)
     Padding(start, max(0, stop-start))
 end
 
-struct PaddingError
+struct PaddingError <: Exception
     S::Type
     T::Type
 end
@@ -658,7 +698,7 @@ end
 """
     CyclePadding(padding, total_size)
 
-Cylces an iterator of `Padding` structs, restarting the padding at `total_size`.
+Cycles an iterator of `Padding` structs, restarting the padding at `total_size`.
 E.g. if `padding` is all the padding in a struct and `total_size` is the total
 aligned size of that array, `CyclePadding` will correspond to the padding in an
 infinite vector of such structs.
@@ -682,53 +722,150 @@ function iterate(cp::CyclePadding, state::Tuple)
 end
 
 """
-    Compute the location of padding in a type.
+    Compute the location of padding in an isbits datatype. Recursive over the fields of that type.
 """
-function padding(T)
-    padding = Padding[]
-    last_end::Int = 0
+@assume_effects :foldable function padding(T::DataType, baseoffset::Int = 0)
+    pads = Padding[]
+    last_end::Int = baseoffset
     for i = 1:fieldcount(T)
-        offset = fieldoffset(T, i)
+        offset = baseoffset + Int(fieldoffset(T, i))
         fT = fieldtype(T, i)
+        append!(pads, padding(fT, offset))
         if offset != last_end
-            push!(padding, Padding(offset, offset-last_end))
+            push!(pads, Padding(offset, offset-last_end))
         end
         last_end = offset + sizeof(fT)
     end
-    padding
+    if 0 < last_end - baseoffset < sizeof(T)
+        push!(pads, Padding(baseoffset + sizeof(T), sizeof(T) - last_end + baseoffset))
+    end
+    return Core.svec(pads...)
 end
 
 function CyclePadding(T::DataType)
     a, s = datatype_alignment(T), sizeof(T)
     as = s + (a - (s % a)) % a
     pad = padding(T)
-    s != as && push!(pad, Padding(s, as - s))
+    if s != as
+        pad = Core.svec(pad..., Padding(s, as - s))
+    end
     CyclePadding(pad, as)
 end
 
-using .Iterators: Stateful
 @assume_effects :total function array_subpadding(S, T)
-    checked_size = 0
     lcm_size = lcm(sizeof(S), sizeof(T))
-    s, t = Stateful{<:Any, Any}(CyclePadding(S)),
-           Stateful{<:Any, Any}(CyclePadding(T))
-    isempty(t) && return true
-    isempty(s) && return false
+    s, t = CyclePadding(S), CyclePadding(T)
+    checked_size = 0
+    # use of Stateful harms inference and makes this vulnerable to invalidation
+    (pad, tstate) = let
+        it = iterate(t)
+        it === nothing && return true
+        it
+    end
+    (ps, sstate) = let
+        it = iterate(s)
+        it === nothing && return false
+        it
+    end
     while checked_size < lcm_size
-        # Take padding in T
-        pad = popfirst!(t)
-        # See if there's corresponding padding in S
         while true
-            ps = peek(s)
+            # See if there's corresponding padding in S
             ps.offset > pad.offset && return false
             intersect(ps, pad) == pad && break
-            popfirst!(s)
+            ps, sstate = iterate(s, sstate)
         end
         checked_size = pad.offset + pad.size
+        pad, tstate = iterate(t, tstate)
     end
     return true
 end
 
+@assume_effects :foldable function struct_subpadding(::Type{Out}, ::Type{In}) where {Out, In}
+    padding(Out) == padding(In)
+end
+
+@assume_effects :foldable function packedsize(::Type{T}) where T
+    pads = padding(T)
+    return sizeof(T) - sum((p.size for p ∈ pads), init = 0)
+end
+
+@assume_effects :foldable ispacked(::Type{T}) where T = isempty(padding(T))
+
+function _copytopacked!(ptr_out::Ptr{Out}, ptr_in::Ptr{In}) where {Out, In}
+    writeoffset = 0
+    for i ∈ 1:fieldcount(In)
+        readoffset = fieldoffset(In, i)
+        fT = fieldtype(In, i)
+        if ispacked(fT)
+            readsize = sizeof(fT)
+            memcpy(ptr_out + writeoffset, ptr_in + readoffset, readsize)
+            writeoffset += readsize
+        else # nested padded type
+            _copytopacked!(ptr_out + writeoffset, Ptr{fT}(ptr_in + readoffset))
+            writeoffset += packedsize(fT)
+        end
+    end
+end
+
+function _copyfrompacked!(ptr_out::Ptr{Out}, ptr_in::Ptr{In}) where {Out, In}
+    readoffset = 0
+    for i ∈ 1:fieldcount(Out)
+        writeoffset = fieldoffset(Out, i)
+        fT = fieldtype(Out, i)
+        if ispacked(fT)
+            writesize = sizeof(fT)
+            memcpy(ptr_out + writeoffset, ptr_in + readoffset, writesize)
+            readoffset += writesize
+        else # nested padded type
+            _copyfrompacked!(Ptr{fT}(ptr_out + writeoffset), ptr_in + readoffset)
+            readoffset += packedsize(fT)
+        end
+    end
+end
+
+@inline function _reinterpret(::Type{Out}, x::In) where {Out, In}
+    # handle non-primitive types
+    isbitstype(Out) || throw(ArgumentError("Target type for `reinterpret` must be isbits"))
+    isbitstype(In) || throw(ArgumentError("Source type for `reinterpret` must be isbits"))
+    inpackedsize = packedsize(In)
+    outpackedsize = packedsize(Out)
+    inpackedsize == outpackedsize ||
+        throw(ArgumentError("Packed sizes of types $Out and $In do not match; got $outpackedsize \
+            and $inpackedsize, respectively."))
+    in = Ref{In}(x)
+    out = Ref{Out}()
+    if struct_subpadding(Out, In)
+        # if packed the same, just copy
+        GC.@preserve in out begin
+            ptr_in = unsafe_convert(Ptr{In}, in)
+            ptr_out = unsafe_convert(Ptr{Out}, out)
+            memcpy(ptr_out, ptr_in, sizeof(Out))
+        end
+        return out[]
+    else
+        # mismatched padding
+        GC.@preserve in out begin
+            ptr_in = unsafe_convert(Ptr{In}, in)
+            ptr_out = unsafe_convert(Ptr{Out}, out)
+
+            if fieldcount(In) > 0 && ispacked(Out)
+                _copytopacked!(ptr_out, ptr_in)
+            elseif fieldcount(Out) > 0 && ispacked(In)
+                _copyfrompacked!(ptr_out, ptr_in)
+            else
+                packed = Ref{NTuple{inpackedsize, UInt8}}()
+                GC.@preserve packed begin
+                    ptr_packed = unsafe_convert(Ptr{NTuple{inpackedsize, UInt8}}, packed)
+                    _copytopacked!(ptr_packed, ptr_in)
+                    _copyfrompacked!(ptr_out, ptr_packed)
+                end
+            end
+        end
+        return out[]
+    end
+end
+
+
 # Reductions with IndexSCartesian2
 
 function _mapreduce(f::F, op::OP, style::IndexSCartesian2{K}, A::AbstractArrayOrBroadcasted) where {F,OP,K}
diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl
index 060b831283970..344858e76764a 100644
--- a/base/reshapedarray.jl
+++ b/base/reshapedarray.jl
@@ -37,20 +37,30 @@ eltype(::Type{<:ReshapedArrayIterator{I}}) where {I} = @isdefined(I) ? ReshapedI
 
 ## reshape(::Array, ::Dims) returns an Array, except for isbitsunion eltypes (issue #28611)
 # reshaping to same # of dimensions
-function reshape(a::Array{T,M}, dims::NTuple{N,Int}) where {T,N,M}
+@eval function reshape(a::Array{T,M}, dims::NTuple{N,Int}) where {T,N,M}
     throw_dmrsa(dims, len) =
         throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $len"))
-
-    if prod(dims) != length(a)
+    len = Core.checked_dims(dims...) # make sure prod(dims) doesn't overflow (and because of the comparison to length(a))
+    if len != length(a)
         throw_dmrsa(dims, length(a))
     end
     isbitsunion(T) && return ReshapedArray(a, dims, ())
     if N == M && dims == size(a)
         return a
     end
-    ccall(:jl_reshape_array, Array{T,N}, (Any, Any, Any), Array{T,N}, a, dims)
+    ref = a.ref
+    if M == 1 && N !== 1
+        mem = ref.mem::Memory{T}
+        if !(ref === GenericMemoryRef(mem) && len === mem.length)
+            mem = ccall(:jl_genericmemory_slice, Memory{T}, (Any, Ptr{Cvoid}, Int), mem, ref.ptr_or_offset, len)
+            ref = GenericMemoryRef(mem)::typeof(ref)
+        end
+    end
+    # or we could use `a = Array{T,N}(undef, ntuple(0, Val(N))); a.ref = ref; a.size = dims; return a` here
+    return $(Expr(:new, :(Array{T,N}), :ref, :dims))
 end
 
+
 """
     reshape(A, dims...) -> AbstractArray
     reshape(A, dims) -> AbstractArray
@@ -226,6 +236,19 @@ end
 offset_if_vec(i::Integer, axs::Tuple{<:AbstractUnitRange}) = i + first(axs[1]) - 1
 offset_if_vec(i::Integer, axs::Tuple) = i
 
+@inline function isassigned(A::ReshapedArrayLF, index::Int)
+    @boundscheck checkbounds(Bool, A, index) || return false
+    @inbounds ret = isassigned(parent(A), index)
+    ret
+end
+@inline function isassigned(A::ReshapedArray{T,N}, indices::Vararg{Int, N}) where {T,N}
+    @boundscheck checkbounds(Bool, A, indices...) || return false
+    axp = axes(A.parent)
+    i = offset_if_vec(_sub2ind(size(A), indices...), axp)
+    I = ind2sub_rs(axp, A.mi, i)
+    @inbounds isassigned(A.parent, I...)
+end
+
 @inline function getindex(A::ReshapedArrayLF, index::Int)
     @boundscheck checkbounds(A, index)
     @inbounds ret = parent(A)[index]
@@ -280,7 +303,7 @@ setindex!(A::ReshapedRange, val, index::ReshapedIndex) = _rs_setindex!_err()
 
 @noinline _rs_setindex!_err() = error("indexed assignment fails for a reshaped range; consider calling collect")
 
-unsafe_convert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = unsafe_convert(Ptr{T}, parent(a))
+cconvert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = cconvert(Ptr{T}, parent(a))
 
 # Add a few handy specializations to further speed up views of reshaped ranges
 const ReshapedUnitRange{T,N,A<:AbstractUnitRange} = ReshapedArray{T,N,A,Tuple{}}
@@ -291,18 +314,64 @@ compute_offset1(parent::AbstractVector, stride1::Integer, I::Tuple{ReshapedRange
     (@inline; first(I[1]) - first(axes1(I[1]))*stride1)
 substrides(strds::NTuple{N,Int}, I::Tuple{ReshapedUnitRange, Vararg{Any}}) where N =
     (size_to_strides(strds[1], size(I[1])...)..., substrides(tail(strds), tail(I))...)
-unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{RangeIndex,ReshapedUnitRange}}}}) where {T,N,P} =
-    unsafe_convert(Ptr{T}, V.parent) + (first_index(V)-1)*sizeof(T)
 
+# cconvert(::Type{<:Ptr}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{RangeIndex,ReshapedUnitRange}}}}) where {T,N,P} = V
+function unsafe_convert(::Type{Ptr{S}}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{RangeIndex,ReshapedUnitRange}}}}) where {S,T,N,P}
+    parent = V.parent
+    p = cconvert(Ptr{T}, parent) # XXX: this should occur in cconvert, the result is not GC-rooted
+    Δmem = if _checkcontiguous(Bool, parent)
+        (first_index(V) - firstindex(parent)) * elsize(parent)
+    else
+        _memory_offset(parent, map(first, V.indices)...)
+    end
+    return Ptr{S}(unsafe_convert(Ptr{T}, p) + Δmem)
+end
 
-_checkcontiguous(::Type{Bool}, A::AbstractArray) = size_to_strides(1, size(A)...) == strides(A)
-_checkcontiguous(::Type{Bool}, A::Array) = true
+_checkcontiguous(::Type{Bool}, A::AbstractArray) = false
+# `strides(A::DenseArray)` calls `size_to_strides` by default.
+# Thus it's OK to assume all `DenseArray`s are contiguously stored.
+_checkcontiguous(::Type{Bool}, A::DenseArray) = true
 _checkcontiguous(::Type{Bool}, A::ReshapedArray) = _checkcontiguous(Bool, parent(A))
 _checkcontiguous(::Type{Bool}, A::FastContiguousSubArray) = _checkcontiguous(Bool, parent(A))
 
 function strides(a::ReshapedArray)
-    # We can handle non-contiguous parent if it's a StridedVector
-    ndims(parent(a)) == 1 && return size_to_strides(only(strides(parent(a))), size(a)...)
-    _checkcontiguous(Bool, a) || throw(ArgumentError("Parent must be contiguous."))
-    size_to_strides(1, size(a)...)
+    _checkcontiguous(Bool, a) && return size_to_strides(1, size(a)...)
+    apsz::Dims = size(a.parent)
+    apst::Dims = strides(a.parent)
+    msz, mst, n = merge_adjacent_dim(apsz, apst) # Try to perform "lazy" reshape
+    n == ndims(a.parent) && return size_to_strides(mst, size(a)...) # Parent is stridevector like
+    return _reshaped_strides(size(a), 1, msz, mst, n, apsz, apst)
+end
+
+function _reshaped_strides(::Dims{0}, reshaped::Int, msz::Int, ::Int, ::Int, ::Dims, ::Dims)
+    reshaped == msz && return ()
+    throw(ArgumentError("Input is not strided."))
+end
+function _reshaped_strides(sz::Dims, reshaped::Int, msz::Int, mst::Int, n::Int, apsz::Dims, apst::Dims)
+    st = reshaped * mst
+    reshaped = reshaped * sz[1]
+    if length(sz) > 1 && reshaped == msz && sz[2] != 1
+        msz, mst, n = merge_adjacent_dim(apsz, apst, n + 1)
+        reshaped = 1
+    end
+    sts = _reshaped_strides(tail(sz), reshaped, msz, mst, n, apsz, apst)
+    return (st, sts...)
+end
+
+merge_adjacent_dim(::Dims{0}, ::Dims{0}) = 1, 1, 0
+merge_adjacent_dim(apsz::Dims{1}, apst::Dims{1}) = apsz[1], apst[1], 1
+function merge_adjacent_dim(apsz::Dims{N}, apst::Dims{N}, n::Int = 1) where {N}
+    sz, st = apsz[n], apst[n]
+    while n < N
+        szₙ, stₙ = apsz[n+1], apst[n+1]
+        if sz == 1
+            sz, st = szₙ, stₙ
+        elseif stₙ == st * sz || szₙ == 1
+            sz *= szₙ
+        else
+            break
+        end
+        n += 1
+    end
+    return sz, st, n
 end
diff --git a/base/rounding.jl b/base/rounding.jl
index 25cfe2dc09829..2da605bc36f8f 100644
--- a/base/rounding.jl
+++ b/base/rounding.jl
@@ -109,6 +109,65 @@ Rounds to nearest integer, with ties rounded toward positive infinity (Java/Java
 """
 const RoundNearestTiesUp = RoundingMode{:NearestTiesUp}()
 
+# Rounding mode predicates. TODO: better names
+
+# Overload these for other rounding modes
+rounds_to_nearest(::RoundingMode) = false
+rounds_to_nearest(::RoundingMode{:Nearest}) = true
+rounds_to_nearest(::RoundingMode{:NearestTiesUp}) = true
+rounds_to_nearest(::RoundingMode{:NearestTiesAway}) = true
+rounds_away_from_zero(::RoundingMode{:Up},   sign_bit::Bool) = !sign_bit
+rounds_away_from_zero(::RoundingMode{:Down}, sign_bit::Bool) = sign_bit
+rounds_away_from_zero(::RoundingMode{:FromZero}, ::Bool) = true
+rounds_away_from_zero(::RoundingMode{:ToZero},   ::Bool) = false
+tie_breaker_is_to_even(::RoundingMode{:Nearest}) = true
+tie_breaker_is_to_even(::RoundingMode{:NearestTiesUp}) = false
+tie_breaker_is_to_even(::RoundingMode{:NearestTiesAway}) = false
+tie_breaker_rounds_away_from_zero(::RoundingMode{:NearestTiesUp}, sign_bit::Bool) = !sign_bit
+tie_breaker_rounds_away_from_zero(::RoundingMode{:NearestTiesAway},       ::Bool) = true
+
+rounds_to_nearest(t::Tuple{Any,Bool}) = rounds_to_nearest(first(t))
+rounds_away_from_zero(t::Tuple{Any,Bool}) = rounds_away_from_zero(t...)
+tie_breaker_is_to_even(t::Tuple{Any,Bool}) = tie_breaker_is_to_even(first(t))
+tie_breaker_rounds_away_from_zero(t::Tuple{Any,Bool}) = tie_breaker_rounds_away_from_zero(t...)
+
+abstract type RoundingIncrementHelper end
+struct FinalBit <: RoundingIncrementHelper end
+struct RoundBit <: RoundingIncrementHelper end
+struct StickyBit <: RoundingIncrementHelper end
+
+function correct_rounding_requires_increment(x, rounding_mode, sign_bit::Bool)
+    r = (rounding_mode, sign_bit)
+    f = let y = x
+        (z::RoundingIncrementHelper) -> y(z)::Bool
+    end
+    if rounds_to_nearest(r)
+        if f(RoundBit())
+            if f(StickyBit())
+                true
+            else
+                if tie_breaker_is_to_even(r)
+                    f(FinalBit())
+                else
+                    tie_breaker_rounds_away_from_zero(r)::Bool
+                end
+            end
+        else
+            false
+        end
+    else
+        if rounds_away_from_zero(r)
+            if f(RoundBit())
+                true
+            else
+                f(StickyBit())
+            end
+        else
+            false
+        end
+    end::Bool
+end
+
 to_fenv(::RoundingMode{:Nearest}) = JL_FE_TONEAREST
 to_fenv(::RoundingMode{:ToZero}) = JL_FE_TOWARDZERO
 to_fenv(::RoundingMode{:Up}) = JL_FE_UPWARD
@@ -224,6 +283,8 @@ function _convert_rounding(::Type{T}, x::Real, r::RoundingMode{:ToZero}) where T
     end
 end
 
+# Default definitions
+
 """
     set_zero_subnormals(yes::Bool) -> Bool
 
@@ -254,3 +315,158 @@ for IEEE arithmetic, and `true` if they might be converted to zeros.
 get_zero_subnormals() = ccall(:jl_get_zero_subnormals,Int32,())!=0
 
 end #module
+using .Rounding
+
+"""
+    round([T,] x, [r::RoundingMode])
+    round(x, [r::RoundingMode]; digits::Integer=0, base = 10)
+    round(x, [r::RoundingMode]; sigdigits::Integer, base = 10)
+
+Rounds the number `x`.
+
+Without keyword arguments, `x` is rounded to an integer value, returning a value of type
+`T`, or of the same type of `x` if no `T` is provided. An [`InexactError`](@ref) will be
+thrown if the value is not representable by `T`, similar to [`convert`](@ref).
+
+If the `digits` keyword argument is provided, it rounds to the specified number of digits
+after the decimal place (or before if negative), in base `base`.
+
+If the `sigdigits` keyword argument is provided, it rounds to the specified number of
+significant digits, in base `base`.
+
+The [`RoundingMode`](@ref) `r` controls the direction of the rounding; the default is
+[`RoundNearest`](@ref), which rounds to the nearest integer, with ties (fractional values
+of 0.5) being rounded to the nearest even integer. Note that `round` may give incorrect
+results if the global rounding mode is changed (see [`rounding`](@ref)).
+
+# Examples
+```jldoctest
+julia> round(1.7)
+2.0
+
+julia> round(Int, 1.7)
+2
+
+julia> round(1.5)
+2.0
+
+julia> round(2.5)
+2.0
+
+julia> round(pi; digits=2)
+3.14
+
+julia> round(pi; digits=3, base=2)
+3.125
+
+julia> round(123.456; sigdigits=2)
+120.0
+
+julia> round(357.913; sigdigits=4, base=2)
+352.0
+```
+
+!!! note
+    Rounding to specified digits in bases other than 2 can be inexact when
+    operating on binary floating point numbers. For example, the [`Float64`](@ref)
+    value represented by `1.15` is actually *less* than 1.15, yet will be
+    rounded to 1.2. For example:
+
+    ```jldoctest
+    julia> x = 1.15
+    1.15
+
+    julia> big(1.15)
+    1.149999999999999911182158029987476766109466552734375
+
+    julia> x < 115//100
+    true
+
+    julia> round(x, digits=1)
+    1.2
+    ```
+
+# Extensions
+
+To extend `round` to new numeric types, it is typically sufficient to define `Base.round(x::NewType, r::RoundingMode)`.
+"""
+function round end
+
+"""
+    trunc([T,] x)
+    trunc(x; digits::Integer= [, base = 10])
+    trunc(x; sigdigits::Integer= [, base = 10])
+
+`trunc(x)` returns the nearest integral value of the same type as `x` whose absolute value
+is less than or equal to the absolute value of `x`.
+
+`trunc(T, x)` converts the result to type `T`, throwing an `InexactError` if the truncated
+value is not representable a `T`.
+
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
+
+To support `trunc` for a new type, define `Base.round(x::NewType, ::RoundingMode{:ToZero})`.
+
+See also: [`%`](@ref rem), [`floor`](@ref), [`unsigned`](@ref), [`unsafe_trunc`](@ref).
+
+# Examples
+```jldoctest
+julia> trunc(2.22)
+2.0
+
+julia> trunc(-2.22, digits=1)
+-2.2
+
+julia> trunc(Int, -2.22)
+-2
+```
+"""
+function trunc end
+
+"""
+    floor([T,] x)
+    floor(x; digits::Integer= [, base = 10])
+    floor(x; sigdigits::Integer= [, base = 10])
+
+`floor(x)` returns the nearest integral value of the same type as `x` that is less than or
+equal to `x`.
+
+`floor(T, x)` converts the result to type `T`, throwing an `InexactError` if the floored
+value is not representable a `T`.
+
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
+
+To support `floor` for a new type, define `Base.round(x::NewType, ::RoundingMode{:Down})`.
+"""
+function floor end
+
+"""
+    ceil([T,] x)
+    ceil(x; digits::Integer= [, base = 10])
+    ceil(x; sigdigits::Integer= [, base = 10])
+
+`ceil(x)` returns the nearest integral value of the same type as `x` that is greater than or
+equal to `x`.
+
+`ceil(T, x)` converts the result to type `T`, throwing an `InexactError` if the ceiled
+value is not representable as a `T`.
+
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
+
+To support `ceil` for a new type, define `Base.round(x::NewType, ::RoundingMode{:Up})`.
+"""
+function ceil end
+
+trunc(x; kws...) = round(x, RoundToZero; kws...)
+floor(x; kws...) = round(x, RoundDown; kws...)
+ ceil(x; kws...) = round(x, RoundUp; kws...)
+round(x; kws...) = round(x, RoundNearest; kws...)
+
+trunc(::Type{T}, x) where T = round(T, x, RoundToZero)
+floor(::Type{T}, x) where T = round(T, x, RoundDown)
+ ceil(::Type{T}, x) where T = round(T, x, RoundUp)
+round(::Type{T}, x) where T = round(T, x, RoundNearest)
+
+round(::Type{T}, x, r::RoundingMode) where T = convert(T, round(x, r))
+
+round(x::Integer, r::RoundingMode) = x
diff --git a/base/ryu/Ryu.jl b/base/ryu/Ryu.jl
index 81d1c41f4c19f..9b236caeb6ff1 100644
--- a/base/ryu/Ryu.jl
+++ b/base/ryu/Ryu.jl
@@ -1,5 +1,6 @@
 module Ryu
 
+using .Base.Libc
 import .Base: significand_bits, significand_mask, exponent_bits, exponent_mask, exponent_bias, exponent_max, uinttype
 
 include("utils.jl")
diff --git a/base/ryu/exp.jl b/base/ryu/exp.jl
index 30291212d014d..b38b2c7ae9a29 100644
--- a/base/ryu/exp.jl
+++ b/base/ryu/exp.jl
@@ -8,33 +8,33 @@ function writeexp(buf, pos, v::T,
 
     # special cases
     if x == 0
-        buf[pos] = UInt8('0')
+        @inbounds buf[pos] = UInt8('0')
         pos += 1
         if precision > 0 && !trimtrailingzeros
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
             for _ = 1:precision
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
         elseif hash
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
         end
-        buf[pos] = expchar
-        buf[pos + 1] = UInt8('+')
-        buf[pos + 2] = UInt8('0')
-        buf[pos + 3] = UInt8('0')
+        @inbounds buf[pos] = expchar
+        @inbounds buf[pos + 1] = UInt8('+')
+        @inbounds buf[pos + 2] = UInt8('0')
+        @inbounds buf[pos + 3] = UInt8('0')
         return pos + 4
     elseif isnan(x)
-        buf[pos] = UInt8('N')
-        buf[pos + 1] = UInt8('a')
-        buf[pos + 2] = UInt8('N')
+        @inbounds buf[pos] = UInt8('N')
+        @inbounds buf[pos + 1] = UInt8('a')
+        @inbounds buf[pos + 2] = UInt8('N')
         return pos + 3
     elseif !isfinite(x)
-        buf[pos] = UInt8('I')
-        buf[pos + 1] = UInt8('n')
-        buf[pos + 2] = UInt8('f')
+        @inbounds buf[pos] = UInt8('I')
+        @inbounds buf[pos + 1] = UInt8('n')
+        @inbounds buf[pos + 2] = UInt8('f')
         return pos + 3
     end
 
@@ -80,10 +80,10 @@ function writeexp(buf, pos, v::T,
                 if precision > 1
                     pos = append_d_digits(availableDigits, digits, buf, pos, decchar)
                 else
-                    buf[pos] = UInt8('0') + digits
+                    @inbounds buf[pos] = UInt8('0') + digits
                     pos += 1
                     if hash
-                        buf[pos] = decchar
+                        @inbounds buf[pos] = decchar
                         pos += 1
                     end
                 end
@@ -121,10 +121,10 @@ function writeexp(buf, pos, v::T,
                 if precision > 1
                     pos = append_d_digits(availableDigits, digits, buf, pos, decchar)
                 else
-                    buf[pos] = UInt8('0') + digits
+                    @inbounds buf[pos] = UInt8('0') + digits
                     pos += 1
                     if hash
-                        buf[pos] = decchar
+                        @inbounds buf[pos] = decchar
                         pos += 1
                     end
                 end
@@ -147,7 +147,7 @@ function writeexp(buf, pos, v::T,
     end
     roundUp = 0
     if lastDigit != 5
-        roundUp = lastDigit > 5
+        roundUp = lastDigit > 5 ? 1 : 0
     else
         rexp = precision - e
         requiredTwos = -e2 - rexp
@@ -162,7 +162,7 @@ function writeexp(buf, pos, v::T,
     if printedDigits != 0
         if digits == 0
             for _ = 1:maximum
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
         else
@@ -172,10 +172,10 @@ function writeexp(buf, pos, v::T,
         if precision > 1
             pos = append_d_digits(maximum, digits, buf, pos, decchar)
         else
-            buf[pos] = UInt8('0') + digits
+            @inbounds buf[pos] = UInt8('0') + digits
             pos += 1
             if hash
-                buf[pos] = decchar
+                @inbounds buf[pos] = decchar
                 pos += 1
             end
         end
@@ -184,52 +184,56 @@ function writeexp(buf, pos, v::T,
         roundPos = pos
         while true
             roundPos -= 1
-            if roundPos == (startpos - 1) || buf[roundPos] == UInt8('-') || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
-                buf[roundPos + 1] = UInt8('1')
+            if roundPos == (startpos - 1) || (@inbounds buf[roundPos]) == UInt8('-') || (plus && (@inbounds buf[roundPos]) == UInt8('+')) || (space && (@inbounds buf[roundPos]) == UInt8(' '))
+                @inbounds buf[roundPos + 1] = UInt8('1')
                 e += 1
                 break
             end
-            c = roundPos > 0 ? buf[roundPos] : 0x00
+            c = roundPos > 0 ? (@inbounds buf[roundPos]) : 0x00
             if c == decchar
                 continue
             elseif c == UInt8('9')
-                buf[roundPos] = UInt8('0')
+                @inbounds buf[roundPos] = UInt8('0')
                 roundUp = 1
                 continue
             else
                 if roundUp == 2 && UInt8(c) % 2 == 0
                     break
                 end
-                buf[roundPos] = c + 1
+                @inbounds buf[roundPos] = c + 1
                 break
             end
         end
     end
     if trimtrailingzeros
-        while buf[pos - 1] == UInt8('0')
+        while @inbounds buf[pos - 1] == UInt8('0')
             pos -= 1
         end
-        if buf[pos - 1] == decchar && !hash
+        if @inbounds buf[pos - 1] == decchar && !hash
             pos -= 1
         end
     end
     buf[pos] = expchar
     pos += 1
     if e < 0
-        buf[pos] = UInt8('-')
+        @inbounds buf[pos] = UInt8('-')
         pos += 1
         e = -e
     else
-        buf[pos] = UInt8('+')
+        @inbounds buf[pos] = UInt8('+')
         pos += 1
     end
     if e >= 100
         c = e % 10
-        unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * div(e, 10) + 1, 2)
-        buf[pos + 2] = UInt8('0') + c
+        @inbounds d100 = DIGIT_TABLE16[div(e, 10) + 1]
+        @inbounds buf[pos] = d100 % UInt8
+        @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
+        @inbounds buf[pos + 2] = UInt8('0') + c
         pos += 3
     else
-        unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * e + 1, 2)
+        @inbounds d100 = DIGIT_TABLE16[e + 1]
+        @inbounds buf[pos] = d100 % UInt8
+        @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
         pos += 2
     end
     return pos
diff --git a/base/ryu/fixed.jl b/base/ryu/fixed.jl
index e0085f5c66dab..96777059bc284 100644
--- a/base/ryu/fixed.jl
+++ b/base/ryu/fixed.jl
@@ -38,7 +38,7 @@ function writefixed(buf, pos, v::T,
     mant = bits & MANTISSA_MASK
     exp = Int((bits >> 52) & EXP_MASK)
 
-    if exp == 0
+    if exp == 0 # subnormal
         e2 = 1 - 1023 - 52
         m2 = mant
     else
@@ -53,13 +53,13 @@ function writefixed(buf, pos, v::T,
         i = len - 1
         while i >= 0
             j = p10bits - e2
-            #=@inbounds=# mula, mulb, mulc = POW10_SPLIT[POW10_OFFSET[idx + 1] + i + 1]
+            mula, mulb, mulc = POW10_SPLIT[POW10_OFFSET[idx + 1] + i + 1]
             digits = mulshiftmod1e9(m2 << 8, mula, mulb, mulc, j + 8)
             if nonzero
                 pos = append_nine_digits(digits, buf, pos)
             elseif digits != 0
                 olength = decimallength(digits)
-                pos = append_n_digits(olength, digits, buf, pos)
+                pos = append_c_digits(olength, digits, buf, pos)
                 nonzero = true
             end
             i -= 1
@@ -103,7 +103,7 @@ function writefixed(buf, pos, v::T,
                 end
                 break
             end
-            #=@inbounds=# mula, mulb, mulc = POW10_SPLIT_2[p + 1]
+            mula, mulb, mulc = POW10_SPLIT_2[p + 1]
             digits = mulshiftmod1e9(m2 << 8, mula, mulb, mulc, j + 8)
             if i < blocks - 1
                 pos = append_nine_digits(digits, buf, pos)
@@ -118,11 +118,11 @@ function writefixed(buf, pos, v::T,
                     k += 1
                 end
                 if lastDigit != 5
-                    roundUp = lastDigit > 5
+                    roundUp = lastDigit > 5 ? 1 : 0
                 else
                     requiredTwos = -e2 - precision - 1
                     trailingZeros = requiredTwos <= 0 || (requiredTwos < 60 && pow2(m2, requiredTwos))
-                    roundUp = trailingZeros ? 2 : 1
+                    roundUp = trailingZeros ? 2 : 1 # 2 means round only if odd
                 end
                 if maximum > 0
                     pos = append_c_digits(maximum, digits, buf, pos)
@@ -137,13 +137,13 @@ function writefixed(buf, pos, v::T,
             while true
                 roundPos -= 1
                 if roundPos == (startpos - 1) || (buf[roundPos] == UInt8('-')) || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
+                    buf[pos] = UInt8('0')
                     buf[roundPos + 1] = UInt8('1')
                     if dotPos > 1
                         buf[dotPos] = UInt8('0')
                         buf[dotPos + 1] = decchar
                         hasfractional = true
                     end
-                    buf[pos] = UInt8('0')
                     pos += 1
                     break
                 end
diff --git a/base/ryu/shortest.jl b/base/ryu/shortest.jl
index f95c09d235e6d..32aa993467e7a 100644
--- a/base/ryu/shortest.jl
+++ b/base/ryu/shortest.jl
@@ -232,79 +232,79 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T,
     # special cases
     if x == 0
         if typed && x isa Float16
-            buf[pos] = UInt8('F')
-            buf[pos + 1] = UInt8('l')
-            buf[pos + 2] = UInt8('o')
-            buf[pos + 3] = UInt8('a')
-            buf[pos + 4] = UInt8('t')
-            buf[pos + 5] = UInt8('1')
-            buf[pos + 6] = UInt8('6')
-            buf[pos + 7] = UInt8('(')
+            @inbounds buf[pos] = UInt8('F')
+            @inbounds buf[pos + 1] = UInt8('l')
+            @inbounds buf[pos + 2] = UInt8('o')
+            @inbounds buf[pos + 3] = UInt8('a')
+            @inbounds buf[pos + 4] = UInt8('t')
+            @inbounds buf[pos + 5] = UInt8('1')
+            @inbounds buf[pos + 6] = UInt8('6')
+            @inbounds buf[pos + 7] = UInt8('(')
             pos += 8
         end
         pos = append_sign(x, plus, space, buf, pos)
-        buf[pos] = UInt8('0')
+        @inbounds buf[pos] = UInt8('0')
         pos += 1
         if hash
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
         end
         if precision == -1
-            buf[pos] = UInt8('0')
+            @inbounds buf[pos] = UInt8('0')
             pos += 1
             if typed && x isa Float32
-                buf[pos] = UInt8('f')
-                buf[pos + 1] = UInt8('0')
+                @inbounds buf[pos] = UInt8('f')
+                @inbounds buf[pos + 1] = UInt8('0')
                 pos += 2
             end
             if typed && x isa Float16
-                buf[pos] = UInt8(')')
+                @inbounds buf[pos] = UInt8(')')
                 pos += 1
             end
             return pos
         end
         while hash && precision > 1
-            buf[pos] = UInt8('0')
+            @inbounds buf[pos] = UInt8('0')
             pos += 1
             precision -= 1
         end
         if typed && x isa Float32
-            buf[pos] = UInt8('f')
-            buf[pos + 1] = UInt8('0')
+            @inbounds buf[pos] = UInt8('f')
+            @inbounds buf[pos + 1] = UInt8('0')
             pos += 2
         end
         if typed && x isa Float16
-            buf[pos] = UInt8(')')
+            @inbounds buf[pos] = UInt8(')')
             pos += 1
         end
         return pos
     elseif isnan(x)
         pos = append_sign(x, plus, space, buf, pos)
-        buf[pos] = UInt8('N')
-        buf[pos + 1] = UInt8('a')
-        buf[pos + 2] = UInt8('N')
+        @inbounds buf[pos] = UInt8('N')
+        @inbounds buf[pos + 1] = UInt8('a')
+        @inbounds buf[pos + 2] = UInt8('N')
         if typed
             if x isa Float32
-                buf[pos + 3] = UInt8('3')
-                buf[pos + 4] = UInt8('2')
+                @inbounds buf[pos + 3] = UInt8('3')
+                @inbounds buf[pos + 4] = UInt8('2')
             elseif x isa Float16
-                buf[pos + 3] = UInt8('1')
-                buf[pos + 4] = UInt8('6')
+                @inbounds buf[pos + 3] = UInt8('1')
+                @inbounds buf[pos + 4] = UInt8('6')
             end
         end
         return pos + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0)
     elseif !isfinite(x)
         pos = append_sign(x, plus, space, buf, pos)
-        buf[pos] = UInt8('I')
-        buf[pos + 1] = UInt8('n')
-        buf[pos + 2] = UInt8('f')
+        @inbounds buf[pos] = UInt8('I')
+        @inbounds buf[pos + 1] = UInt8('n')
+        @inbounds buf[pos + 2] = UInt8('f')
         if typed
             if x isa Float32
-                buf[pos + 3] = UInt8('3')
-                buf[pos + 4] = UInt8('2')
+                @inbounds buf[pos + 3] = UInt8('3')
+                @inbounds buf[pos + 4] = UInt8('2')
             elseif x isa Float16
-                buf[pos + 3] = UInt8('1')
-                buf[pos + 4] = UInt8('6')
+                @inbounds buf[pos + 3] = UInt8('1')
+                @inbounds buf[pos + 4] = UInt8('6')
             end
         end
         return pos + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0)
@@ -313,14 +313,14 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T,
     output, nexp = reduce_shortest(x, compact ? 999_999 : nothing)
 
     if typed && x isa Float16
-        buf[pos] = UInt8('F')
-        buf[pos + 1] = UInt8('l')
-        buf[pos + 2] = UInt8('o')
-        buf[pos + 3] = UInt8('a')
-        buf[pos + 4] = UInt8('t')
-        buf[pos + 5] = UInt8('1')
-        buf[pos + 6] = UInt8('6')
-        buf[pos + 7] = UInt8('(')
+        @inbounds buf[pos] = UInt8('F')
+        @inbounds buf[pos + 1] = UInt8('l')
+        @inbounds buf[pos + 2] = UInt8('o')
+        @inbounds buf[pos + 3] = UInt8('a')
+        @inbounds buf[pos + 4] = UInt8('t')
+        @inbounds buf[pos + 5] = UInt8('1')
+        @inbounds buf[pos + 6] = UInt8('6')
+        @inbounds buf[pos + 7] = UInt8('(')
         pos += 8
     end
     pos = append_sign(x, plus, space, buf, pos)
@@ -332,161 +332,122 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T,
         !(pt >= olength && abs(mod(x + 0.05, 10^(pt - olength)) - 0.05) > 0.05)
         exp_form = false
         if pt <= 0
-            buf[pos] = UInt8('0')
+            @inbounds buf[pos] = UInt8('0')
             pos += 1
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += 1
             for _ = 1:abs(pt)
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
-            # elseif pt >= olength
+        # elseif pt >= olength
             # nothing to do at this point
-            # else
+        # else
             # nothing to do at this point
         end
     else
+        # make space for decchar
         pos += 1
     end
-    i = 0
-    ptr = pointer(buf)
-    ptr2 = pointer(DIGIT_TABLE)
-    if (output >> 32) != 0
-        q = output ÷ 100000000
-        output2 = (output % UInt32) - UInt32(100000000) * (q % UInt32)
-        output = q
 
-        c = output2 % UInt32(10000)
-        output2 = div(output2, UInt32(10000))
-        d = output2 % UInt32(10000)
-        c0 = (c % 100) << 1
-        c1 = (c ÷ 100) << 1
-        d0 = (d % 100) << 1
-        d1 = (d ÷ 100) << 1
-        memcpy(ptr, pos + olength - 2, ptr2, c0 + 1, 2)
-        memcpy(ptr, pos + olength - 4, ptr2, c1 + 1, 2)
-        memcpy(ptr, pos + olength - 6, ptr2, d0 + 1, 2)
-        memcpy(ptr, pos + olength - 8, ptr2, d1 + 1, 2)
-        i += 8
-    end
-    output2 = output % UInt32
-    while output2 >= 10000
-        c = output2 % UInt32(10000)
-        output2 = div(output2, UInt32(10000))
-        c0 = (c % 100) << 1
-        c1 = (c ÷ 100) << 1
-        memcpy(ptr, pos + olength - i - 2, ptr2, c0 + 1, 2)
-        memcpy(ptr, pos + olength - i - 4, ptr2, c1 + 1, 2)
-        i += 4
-    end
-    if output2 >= 100
-        c = (output2 % UInt32(100)) << 1
-        output2 = div(output2, UInt32(100))
-        memcpy(ptr, pos + olength - i - 2, ptr2, c + 1, 2)
-        i += 2
-    end
-    if output2 >= 10
-        c = output2 << 1
-        buf[pos + 1] = DIGIT_TABLE[c + 2]
-        buf[pos - exp_form] = DIGIT_TABLE[c + 1]
-    else
-        buf[pos - exp_form] = UInt8('0') + (output2 % UInt8)
-    end
+    append_c_digits(olength, output, buf, pos)
 
     if !exp_form
         if pt <= 0
             pos += olength
             precision -= olength
-            while hash && precision > 0
-                buf[pos] = UInt8('0')
-                pos += 1
-                precision -= 1
-            end
         elseif pt >= olength
             pos += olength
             precision -= olength
             for _ = 1:nexp
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
                 precision -= 1
             end
             if hash
-                buf[pos] = decchar
+                @inbounds buf[pos] = decchar
                 pos += 1
                 if precision < 0
-                    buf[pos] = UInt8('0')
+                    @inbounds buf[pos] = UInt8('0')
                     pos += 1
                 end
-                while precision > 0
-                    buf[pos] = UInt8('0')
-                    pos += 1
-                    precision -= 1
-                end
             end
         else
             pointoff = olength - abs(nexp)
-            memmove(ptr, pos + pointoff + 1, ptr, pos + pointoff, olength - pointoff + 1)
-            buf[pos + pointoff] = decchar
+            # shift bytes after pointoff to make room for decchar
+            ptr = pointer(buf)
+            memmove(ptr + pos + pointoff, ptr + pos + pointoff - 1, olength - pointoff + 1)
+            @inbounds buf[pos + pointoff] = decchar
             pos += olength + 1
             precision -= olength
-            while hash && precision > 0
-                buf[pos] = UInt8('0')
+        end
+        if hash
+            while precision > 0
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
                 precision -= 1
             end
         end
         if typed && x isa Float32
-            buf[pos] = UInt8('f')
-            buf[pos + 1] = UInt8('0')
+            @inbounds buf[pos] = UInt8('f')
+            @inbounds buf[pos + 1] = UInt8('0')
             pos += 2
         end
     else
+        # move leading digit into place
+        @inbounds buf[pos - 1] = buf[pos]
         if olength > 1 || hash
-            buf[pos] = decchar
+            @inbounds buf[pos] = decchar
             pos += olength
             precision -= olength
         end
-        if hash && olength == 1
-            buf[pos] = UInt8('0')
-            pos += 1
-        end
-        while hash && precision > 0
-            buf[pos] = UInt8('0')
-            pos += 1
-            precision -= 1
+        if hash
+            if olength == 1
+                @inbounds buf[pos] = UInt8('0')
+                pos += 1
+            end
+            while precision > 0
+                @inbounds buf[pos] = UInt8('0')
+                pos += 1
+                precision -= 1
+            end
         end
 
-        buf[pos] = expchar
+        @inbounds buf[pos] = expchar
         pos += 1
         exp2 = nexp + olength - 1
         if exp2 < 0
-            buf[pos] = UInt8('-')
+            @inbounds buf[pos] = UInt8('-')
             pos += 1
             exp2 = -exp2
         elseif padexp
-            buf[pos] = UInt8('+')
+            @inbounds buf[pos] = UInt8('+')
             pos += 1
         end
 
         if exp2 >= 100
             c = exp2 % 10
-            memcpy(ptr, pos, ptr2, 2 * div(exp2, 10) + 1, 2)
-            buf[pos + 2] = UInt8('0') + (c % UInt8)
+            @inbounds d100 = DIGIT_TABLE16[(div(exp2, 10) % Int) + 1]
+            @inbounds buf[pos] = d100 % UInt8
+            @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
+            @inbounds buf[pos + 2] = UInt8('0') + (c % UInt8)
             pos += 3
         elseif exp2 >= 10
-            memcpy(ptr, pos, ptr2, 2 * exp2 + 1, 2)
+            @inbounds d100 = DIGIT_TABLE16[(exp2 % Int) + 1]
+            @inbounds buf[pos] = d100 % UInt8
+            @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
             pos += 2
         else
             if padexp
-                buf[pos] = UInt8('0')
+                @inbounds buf[pos] = UInt8('0')
                 pos += 1
             end
-            buf[pos] = UInt8('0') + (exp2 % UInt8)
+            @inbounds buf[pos] = UInt8('0') + (exp2 % UInt8)
             pos += 1
         end
     end
     if typed && x isa Float16
-        buf[pos] = UInt8(')')
+        @inbounds buf[pos] = UInt8(')')
         pos += 1
     end
 
diff --git a/base/ryu/utils.jl b/base/ryu/utils.jl
index 352f8f19cb9be..2064dfbefcecd 100644
--- a/base/ryu/utils.jl
+++ b/base/ryu/utils.jl
@@ -1,9 +1,6 @@
 const MANTISSA_MASK = Base.significand_mask(Float64)
 const EXP_MASK = Base.exponent_mask(Float64) >> Base.significand_bits(Float64)
 
-memcpy(d, doff, s, soff, n) = (ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), d + doff - 1, s + soff - 1, n); nothing)
-memmove(d, doff, s, soff, n) = (ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), d + doff - 1, s + soff - 1, n); nothing)
-
 # Note: these are smaller than the values given in Figure 4 from the paper
 # see https://github.com/ulfjack/ryu/issues/119
 pow5_bitcount(::Type{Float16}) = 30
@@ -49,7 +46,7 @@ pow5bits(e) = ((e * 1217359) >> 19) + 1
 
 Compute `(m * mul) >> j`, where `j >= 8*sizeof(U)`. The type of the results is the larger of `U` or `UInt32`.
 """
-@inline function mulshift(m::U, mul, j) where {U<:Unsigned}
+function mulshift(m::U, mul, j) where {U<:Unsigned}
     W = widen(U)
     nbits = 8*sizeof(U)
     return ((((W(m) * (mul % U)) >> nbits) + W(m) * (mul >> nbits)) >> (j - nbits)) % promote_type(U,UInt32)
@@ -64,16 +61,7 @@ lengthforindex(idx) = div(((Int64(16 * idx) * 1292913986) >> 32) + 1 + 16 + 8, 9
 
 Return `true` if `5^p` is a divisor of `x`.
 """
-@inline function pow5(x, p)
-    count = 0
-    while true
-        q = div(x, 5)
-        r = x - 5 * q
-        r != 0 && return count >= p
-        x = q
-        count += 1
-    end
-end
+pow5(x, p) = x % (UInt64(5)^p) == 0
 
 """
     Ryu.pow2(x, p)
@@ -87,7 +75,7 @@ pow2(x, p) = (x & ((Int64(1) << p) - 1)) == 0
 
 The number of decimal digits of the integer `v`.
 """
-@inline function decimallength(v)
+function decimallength(v)
     v >= 10000000000000000 && return 17
     v >= 1000000000000000 && return 16
     v >= 100000000000000 && return 15
@@ -106,7 +94,7 @@ The number of decimal digits of the integer `v`.
     v >= 10 && return 2
     return 1
 end
-@inline function decimallength(v::UInt32)
+function decimallength(v::UInt32)
     v >= 100000000 && return 9
     v >= 10000000 && return 8
     v >= 1000000 && return 7
@@ -117,7 +105,7 @@ end
     v >= 10 && return 2
     return 1
 end
-@inline function decimallength(v::UInt16)
+function decimallength(v::UInt16)
     v >= 10000 && return 5
     v >= 1000 && return 4
     v >= 100 && return 3
@@ -125,7 +113,7 @@ end
     return 1
 end
 
-@inline function mulshiftinvsplit(::Type{T}, mv, mp, mm, i, j) where {T}
+function mulshiftinvsplit(::Type{T}, mv, mp, mm, i, j) where {T}
     mul = pow5invsplit_lookup(T, i)
     vr = mulshift(mv, mul, j)
     vp = mulshift(mp, mul, j)
@@ -133,7 +121,7 @@ end
     return vr, vp, vm
 end
 
-@inline function mulshiftsplit(::Type{T}, mv, mp, mm, i, j) where {T}
+function mulshiftsplit(::Type{T}, mv, mp, mm, i, j) where {T}
     mul = pow5split_lookup(T, i)
     vr = mulshift(mv, mul, j)
     vp = mulshift(mp, mul, j)
@@ -146,7 +134,7 @@ end
 
 Compute `p = a*b` where `b = bLo + bHi<<64`, returning the result as `pLo, pHi` where `p = pLo + pHi<<128`.
 """
-@inline function umul256(a, bHi, bLo)
+function umul256(a::UInt128, bHi::UInt64, bLo::UInt64)
     aLo = a % UInt64
     aHi = (a >> 64) % UInt64
 
@@ -176,14 +164,14 @@ end
 
 Compute `pHi = (a*b)>>128` where `b = bLo + bHi<<64`.
 """
-@inline umul256_hi(a, bHi, bLo) = umul256(a, bHi, bLo)[2]
+umul256_hi(a::UInt128, bHi::UInt64, bLo::UInt64) = umul256(a, bHi, bLo)[2]
 
 """
     Ryu.mulshiftmod1e9(m, mula, mulb, mulc, j)::UInt32
 
 Compute `(m * mul) >> j % 10^9` where `mul = mula + mulb<<64 + mulc<<128`, and `j >= 128`.
 """
-@inline function mulshiftmod1e9(m, mula, mulb, mulc, j)
+function mulshiftmod1e9(m, mula, mulb, mulc, j)
     b0 = UInt128(m) * mula
     b1 = UInt128(m) * mulb
     b2 = UInt128(m) * mulc
@@ -195,7 +183,7 @@ Compute `(m * mul) >> j % 10^9` where `mul = mula + mulb<<64 + mulc<<128`, and `
     return (v % UInt32) - UInt32(1000000000) * shifted
 end
 
-@inline function append_sign(x, plus, space, buf, pos)
+function append_sign(x, plus::Bool, space::Bool, buf, pos::Int)
     if signbit(x) && !isnan(x)  # suppress minus sign for signaling NaNs
         buf[pos] = UInt8('-')
         pos += 1
@@ -209,101 +197,14 @@ end
     return pos
 end
 
-@inline function append_n_digits(olength, digits, buf, pos)
-    i = 0
-    while digits >= 10000
-        c = digits % 10000
-        digits = div(digits, 10000)
-        c0 = (c % 100) << 1
-        c1 = div(c, 100) << 1
-        unsafe_copyto!(buf, pos + olength - i - 2, DIGIT_TABLE, c0 + 1, 2)
-        unsafe_copyto!(buf, pos + olength - i - 4, DIGIT_TABLE, c1 + 1, 2)
-        i += 4
-    end
-    if digits >= 100
-        c = (digits % 100) << 1
-        digits = div(digits, 100)
-        unsafe_copyto!(buf, pos + olength - i - 2, DIGIT_TABLE, c + 1, 2)
-        i += 2
-    end
-    if digits >= 10
-        c = digits << 1
-        unsafe_copyto!(buf, pos + olength - i - 2, DIGIT_TABLE, c + 1, 2)
-        i += 2
-    else
-        buf[pos] = UInt8('0') + digits
-        i += 1
-    end
-    return pos + i
-end
 
-@inline function append_d_digits(olength, digits, buf, pos, decchar)
-    i = 0
-    while digits >= 10000
-        c = digits % 10000
-        digits = div(digits, 10000)
-        c0 = (c % 100) << 1
-        c1 = div(c, 100) << 1
-        unsafe_copyto!(buf, pos + olength + 1 - i - 2, DIGIT_TABLE, c0 + 1, 2)
-        unsafe_copyto!(buf, pos + olength + 1 - i - 4, DIGIT_TABLE, c1 + 1, 2)
-        i += 4
-    end
-    if digits >= 100
-        c = (digits % 100) << 1
-        digits = div(digits, 100)
-        unsafe_copyto!(buf, pos + olength + 1 - i - 2, DIGIT_TABLE, c + 1, 2)
-        i += 2
-    end
-    if digits >= 10
-        c = digits << 1
-        buf[pos] = DIGIT_TABLE[c + 1]
-        buf[pos + 1] = decchar
-        buf[pos + 2] = DIGIT_TABLE[c + 2]
-        i += 3
-    else
-        buf[pos] = UInt8('0') + digits
-        buf[pos + 1] = decchar
-        i += 2
-    end
-    return pos + i
-end
+import Base: append_c_digits_fast as append_c_digits, append_nine_digits
 
-@inline function append_c_digits(count, digits, buf, pos)
-    i = 0
-    while i < count - 1
-        c = (digits % 100) << 1
-        digits = div(digits, 100)
-        unsafe_copyto!(buf, pos + count - i - 2, DIGIT_TABLE, c + 1, 2)
-        i += 2
-    end
-    if i < count
-        buf[pos + count - i - 1] = UInt8('0') + (digits % 10)
-        i += 1
-    end
-    return pos + i
-end
-
-@inline function append_nine_digits(digits, buf, pos)
-    if digits == 0
-        for _ = 1:9
-            buf[pos] = UInt8('0')
-            pos += 1
-        end
-        return pos
-    end
-    i = 0
-    while i < 5
-        c = digits % 10000
-        digits = div(digits, 10000)
-        c0 = (c % 100) << 1
-        c1 = div(c, 100) << 1
-        unsafe_copyto!(buf, pos + 7 - i, DIGIT_TABLE, c0 + 1, 2)
-        unsafe_copyto!(buf, pos + 5 - i, DIGIT_TABLE, c1 + 1, 2)
-        i += 4
-    end
-    buf[pos] = UInt8('0') + digits
-    i += 1
-    return pos + i
+function append_d_digits(olength::Int, digits::Unsigned, buf, pos::Int, decchar)
+    newpos = append_c_digits(olength, digits, buf, pos + 1)
+    @inbounds buf[pos] = buf[pos + 1]
+    @inbounds buf[pos + 1] = decchar
+    return newpos # == pos + olength + 1
 end
 
 const BIG_MASK = (big(1) << 64) - 1
@@ -402,18 +303,7 @@ for T in (Float64, Float32, Float16)
     @eval pow5split_lookup(::Type{$T}, i) = @inbounds($table_sym[i+1])
 end
 
-const DIGIT_TABLE = UInt8[
-  '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
-  '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
-  '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
-  '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
-  '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
-  '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
-  '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
-  '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
-  '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
-  '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
-]
+const DIGIT_TABLE16 = Base._dec_d100
 
 const POW10_OFFSET = UInt16[
   0, 2, 5, 8, 12, 16, 21, 26, 32, 39,
diff --git a/base/scopedvalues.jl b/base/scopedvalues.jl
new file mode 100644
index 0000000000000..6eb1004a1d30f
--- /dev/null
+++ b/base/scopedvalues.jl
@@ -0,0 +1,177 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module ScopedValues
+
+export ScopedValue, with, @with
+
+"""
+    ScopedValue(x)
+
+Create a container that propagates values across dynamic scopes.
+Use [`with`](@ref) to create and enter a new dynamic scope.
+
+Values can only be set when entering a new dynamic scope,
+and the value referred to will be constant during the
+execution of a dynamic scope.
+
+Dynamic scopes are propagated across tasks.
+
+# Examples
+
+```jldoctest
+julia> const sval = ScopedValue(1);
+
+julia> sval[]
+1
+
+julia> with(sval => 2) do
+           sval[]
+       end
+2
+
+julia> sval[]
+1
+```
+
+!!! compat "Julia 1.11"
+    Scoped values were introduced in Julia 1.11. In Julia 1.8+ a compatible
+    implementation is available from the package ScopedValues.jl.
+"""
+mutable struct ScopedValue{T}
+    # NOTE this struct must be defined as mutable one since it's used as a key of
+    #      `ScopeStorage` dictionary and thus needs object identity
+    const has_default::Bool # this field is necessary since isbitstype `default` field may be initialized with undefined value
+    const default::T
+    ScopedValue{T}() where T = new(false)
+    ScopedValue{T}(val) where T = new{T}(true, val)
+    ScopedValue(val::T) where T = new{T}(true, val)
+end
+
+Base.eltype(::ScopedValue{T}) where {T} = T
+
+"""
+    isassigned(val::ScopedValue)
+
+Test if the ScopedValue has a default value.
+"""
+Base.isassigned(val::ScopedValue) = val.has_default
+
+const ScopeStorage = Base.PersistentDict{ScopedValue, Any}
+
+struct Scope
+    values::ScopeStorage
+end
+
+function Scope(parent::Union{Nothing, Scope}, key::ScopedValue{T}, value) where T
+    val = convert(T, value)
+    if parent === nothing
+        return Scope(ScopeStorage(key=>val))
+    end
+    return Scope(ScopeStorage(parent.values, key=>val))
+end
+
+function Scope(scope, pair::Pair{<:ScopedValue})
+    return Scope(scope, pair...)
+end
+
+function Scope(scope, pair1::Pair{<:ScopedValue}, pair2::Pair{<:ScopedValue}, pairs::Pair{<:ScopedValue}...)
+    # Unroll this loop through recursion to make sure that
+    # our compiler optimization support works
+    return Scope(Scope(scope, pair1...), pair2, pairs...)
+end
+Scope(::Nothing) = nothing
+
+function Base.show(io::IO, scope::Scope)
+    print(io, Scope, "(")
+    first = true
+    for (key, value) in scope.values
+        if first
+            first = false
+        else
+            print(io, ", ")
+        end
+        print(io, typeof(key), "@")
+        show(io, Base.objectid(key))
+        print(io, " => ")
+        show(IOContext(io, :typeinfo => eltype(key)), value)
+    end
+    print(io, ")")
+end
+
+struct NoValue end
+const novalue = NoValue()
+
+"""
+    get(val::ScopedValue{T})::Union{Nothing, Some{T}}
+
+If the scoped value isn't set and doesn't have a default value,
+return `nothing`. Otherwise returns `Some{T}` with the current
+value.
+"""
+function get(val::ScopedValue{T}) where {T}
+    scope = Core.current_scope()::Union{Scope, Nothing}
+    if scope === nothing
+        isassigned(val) && return Some{T}(val.default)
+        return nothing
+    end
+    scope = scope::Scope
+    if isassigned(val)
+        return Some{T}(Base.get(scope.values, val, val.default)::T)
+    else
+        v = Base.get(scope.values, val, novalue)
+        v === novalue || return Some{T}(v::T)
+    end
+    return nothing
+end
+
+function Base.getindex(val::ScopedValue{T})::T where T
+    maybe = get(val)
+    maybe === nothing && throw(KeyError(val))
+    return something(maybe)::T
+end
+
+function Base.show(io::IO, val::ScopedValue)
+    print(io, ScopedValue)
+    print(io, '{', eltype(val), '}')
+    print(io, '(')
+    v = get(val)
+    if v === nothing
+        print(io, "undefined")
+    else
+        show(IOContext(io, :typeinfo => eltype(val)), something(v))
+    end
+    print(io, ')')
+end
+
+"""
+    @with vars... expr
+
+Macro version of `with(f, vars...)` but with `expr` instead of `f` function.
+This is similar to using [`with`](@ref) with a `do` block, but avoids creating
+a closure.
+"""
+macro with(exprs...)
+    if length(exprs) > 1
+        ex = last(exprs)
+        exprs = exprs[1:end-1]
+    elseif length(exprs) == 1
+        ex = only(exprs)
+        exprs = ()
+    else
+        error("@with expects at least one argument")
+    end
+    exprs = map(esc, exprs)
+    Expr(:tryfinally, esc(ex), :(), :(Scope(Core.current_scope()::Union{Nothing, Scope}, $(exprs...))))
+end
+
+"""
+    with(f, (var::ScopedValue{T} => val::T)...)
+
+Execute `f` in a new scope with `var` set to `val`.
+"""
+function with(f, pair::Pair{<:ScopedValue}, rest::Pair{<:ScopedValue}...)
+    @with(pair, rest..., f())
+end
+with(@nospecialize(f)) = f()
+
+end # module ScopedValues
diff --git a/base/secretbuffer.jl b/base/secretbuffer.jl
index 935c50fb80fd6..61eeffdb4f18d 100644
--- a/base/secretbuffer.jl
+++ b/base/secretbuffer.jl
@@ -29,12 +29,12 @@ true
 ```
 """
 mutable struct SecretBuffer <: IO
-    data::Vector{UInt8}
+    data::Memory{UInt8}
     size::Int
     ptr::Int
 
     function SecretBuffer(; sizehint=128)
-        s = new(Vector{UInt8}(undef, sizehint), 0, 1)
+        s = new(Memory{UInt8}(undef, sizehint), 0, 1)
         finalizer(final_shred!, s)
         return s
     end
@@ -49,7 +49,7 @@ Strings are bad at keeping secrets because they are unable to be securely
 zeroed or destroyed. Therefore, avoid using this constructor with secret data.
 Instead of starting with a string, either construct the `SecretBuffer`
 incrementally with `SecretBuffer()` and [`write`](@ref), or use a `Vector{UInt8}` with
-the `Base.SecretBuffer!(::Vector{UInt8})` constructor.
+the `Base.SecretBuffer!(::AbstractVector{UInt8})` constructor.
 """
 SecretBuffer(str::AbstractString) = SecretBuffer(String(str))
 function SecretBuffer(str::String)
@@ -68,7 +68,7 @@ convert(::Type{SecretBuffer}, s::AbstractString) = SecretBuffer(String(s))
 
 Initialize a new `SecretBuffer` from `data`, securely zeroing `data` afterwards.
 """
-function SecretBuffer!(d::Vector{UInt8})
+function SecretBuffer!(d::AbstractVector{UInt8})
     len = length(d)
     s = SecretBuffer(sizehint=len)
     for i in 1:len
@@ -106,7 +106,7 @@ show(io::IO, s::SecretBuffer) = print(io, "SecretBuffer(\"*******\")")
 ==(s1::SecretBuffer, s2::SecretBuffer) = (s1.ptr == s2.ptr) && (s1.size == s2.size) && (UInt8(0) == _bufcmp(s1.data, s2.data, min(s1.size, s2.size)))
 # Also attempt a constant time buffer comparison algorithm — the length of the secret might be
 # inferred by a timing attack, but not its values.
-@noinline function _bufcmp(data1::Vector{UInt8}, data2::Vector{UInt8}, sz::Int)
+@noinline function _bufcmp(data1::Memory{UInt8}, data2::Memory{UInt8}, sz::Int)
     res = UInt8(0)
     for i = 1:sz
         res |= xor(data1[i], data2[i])
@@ -117,11 +117,23 @@ end
 const _sb_hash = UInt === UInt32 ? 0x111c0925 : 0xb06061e370557428
 hash(s::SecretBuffer, h::UInt) = hash(_sb_hash, h)
 
+copy(s::SecretBuffer) = copy!(SecretBuffer(sizehint=length(s.data)), s)
+function copy!(dest::SecretBuffer, src::SecretBuffer)
+    if length(dest.data) != length(src.data)
+        securezero!(dest.data)
+        dest.data = copy(src.data)
+    else
+        copyto!(dest.data, src.data)
+    end
+    dest.size = src.size
+    dest.ptr = src.ptr
+    return dest
+end
 
 function write(io::SecretBuffer, b::UInt8)
     if io.ptr > length(io.data)
         # We need to resize! the array: do this manually to ensure no copies are left behind
-        newdata = Vector{UInt8}(undef, (io.size+16)*2)
+        newdata = Memory{UInt8}(undef, (io.size+16)*2)
         copyto!(newdata, io.data)
         securezero!(io.data)
         io.data = newdata
@@ -140,8 +152,7 @@ function write(io::IO, s::SecretBuffer)
     return nb
 end
 
-cconvert(::Type{Cstring}, s::SecretBuffer) = unsafe_convert(Cstring, s)
-function unsafe_convert(::Type{Cstring}, s::SecretBuffer)
+function cconvert(::Type{Cstring}, s::SecretBuffer)
     # Ensure that no nuls appear in the valid region
     if any(==(0x00), s.data[i] for i in 1:s.size)
         throw(ArgumentError("`SecretBuffers` containing nul bytes cannot be converted to C strings"))
@@ -152,8 +163,10 @@ function unsafe_convert(::Type{Cstring}, s::SecretBuffer)
     write(s, '\0')
     s.ptr = p
     s.size -= 1
-    return Cstring(unsafe_convert(Ptr{Cchar}, s.data))
+    return s.data
 end
+# optional shim for manual calls to unsafe_convert:
+#   unsafe_convert(::Type{Cstring}, s::SecretBuffer) = unsafe_convert(Cstring, cconvert(Cstring, s))
 
 seek(io::SecretBuffer, n::Integer) = (io.ptr = max(min(n+1, io.size+1), 1); io)
 seekend(io::SecretBuffer) = seek(io, io.size+1)
@@ -179,6 +192,21 @@ function final_shred!(s::SecretBuffer)
     shred!(s)
 end
 
+"""
+    shred!(s::SecretBuffer)
+
+Shreds the contents of a `SecretBuffer` by securely zeroing its data and
+resetting its pointer and size.
+This function is used to securely erase the sensitive data held in the buffer,
+reducing the potential for information leaks.
+
+# Example
+```julia
+s = SecretBuffer()
+write(s, 's', 'e', 'c', 'r', 'e', 't')
+shred!(s)  # s is now empty
+```
+"""
 function shred!(s::SecretBuffer)
     securezero!(s.data)
     s.ptr = 1
@@ -188,6 +216,13 @@ end
 
 isshredded(s::SecretBuffer) = all(iszero, s.data)
 
+"""
+    shred!(f::Function, x)
+
+Applies function `f` to the argument `x` and then shreds `x`.
+This function is useful when you need to perform some operations on e.g. a
+`SecretBuffer` and then want to ensure that it is securely shredded afterwards.
+"""
 function shred!(f::Function, x)
     try
         f(x)
diff --git a/base/set.jl b/base/set.jl
index 3b5635ccb5a33..76502ee9d22ef 100644
--- a/base/set.jl
+++ b/base/set.jl
@@ -13,7 +13,7 @@ See also: [`AbstractSet`](@ref), [`BitSet`](@ref), [`Dict`](@ref),
 [`push!`](@ref), [`empty!`](@ref), [`union!`](@ref), [`in`](@ref), [`isequal`](@ref)
 
 # Examples
-```jldoctest filter = r"^\\S.+"
+```jldoctest; filter = r"^  '.'"ma
 julia> s = Set("aaBca")
 Set{Char} with 3 elements:
   'a'
@@ -23,9 +23,9 @@ Set{Char} with 3 elements:
 julia> push!(s, 'b')
 Set{Char} with 4 elements:
   'a'
-  'c'
   'b'
   'B'
+  'c'
 
 julia> s = Set([NaN, 0.0, 1.0, 2.0]);
 
@@ -91,18 +91,70 @@ isempty(s::Set) = isempty(s.dict)
 length(s::Set)  = length(s.dict)
 in(x, s::Set) = haskey(s.dict, x)
 
-# This avoids hashing and probing twice and it works the same as
-# in!(x, s::Set) = in(x, s) ? true : (push!(s, x); false)
+"""
+    in!(x, s::AbstractSet) -> Bool
+
+If `x` is in `s`, return `true`. If not, push `x` into `s` and return `false`.
+This is equivalent to `in(x, s) ? true : (push!(s, x); false)`, but may have a
+more efficient implementation.
+
+See also: [`in`](@ref), [`push!`](@ref), [`Set`](@ref)
+
+!!! compat "Julia 1.11"
+    This function requires at least 1.11.
+
+# Examples
+```jldoctest; filter = r"^  [1234]\$"
+julia> s = Set{Any}([1, 2, 3]); in!(4, s)
+false
+
+julia> length(s)
+4
+
+julia> in!(0x04, s)
+true
+
+julia> s
+Set{Any} with 4 elements:
+  4
+  2
+  3
+  1
+```
+"""
+function in!(x, s::AbstractSet)
+    x ∈ s ? true : (push!(s, x); false)
+end
+
 function in!(x, s::Set)
-    idx, sh = ht_keyindex2_shorthash!(s.dict, x)
+    xT = convert(eltype(s), x)
+    idx, sh = ht_keyindex2_shorthash!(s.dict, xT)
     idx > 0 && return true
-    _setindex!(s.dict, nothing, x, -idx, sh)
+    _setindex!(s.dict, nothing, xT, -idx, sh)
     return false
 end
 
 push!(s::Set, x) = (s.dict[x] = nothing; s)
-pop!(s::Set, x) = (pop!(s.dict, x); x)
-pop!(s::Set, x, default) = (x in s ? pop!(s, x) : default)
+
+function pop!(s::Set, x, default)
+    dict = s.dict
+    index = ht_keyindex(dict, x)
+    if index > 0
+        @inbounds key = dict.keys[index]
+        _delete!(dict, index)
+        return key
+    else
+        return default
+    end
+end
+
+function pop!(s::Set, x)
+    index = ht_keyindex(s.dict, x)
+    index < 1 && throw(KeyError(x))
+    result = @inbounds s.dict.keys[index]
+    _delete!(s.dict, index)
+    result
+end
 
 function pop!(s::Set)
     isempty(s) && throw(ArgumentError("set must be non-empty"))
@@ -117,12 +169,14 @@ copymutable(s::Set{T}) where {T} = Set{T}(s)
 # Set is the default mutable fall-back
 copymutable(s::AbstractSet{T}) where {T} = Set{T}(s)
 
-sizehint!(s::Set, newsz) = (sizehint!(s.dict, newsz); s)
+sizehint!(s::Set, newsz; shrink::Bool=true) = (sizehint!(s.dict, newsz; shrink); s)
 empty!(s::Set) = (empty!(s.dict); s)
 rehash!(s::Set) = (rehash!(s.dict); s)
 
 iterate(s::Set, i...)       = iterate(KeySet(s.dict), i...)
 
+@propagate_inbounds Iterators.only(s::Set) = Iterators._only(s, first)
+
 # In case the size(s) is smaller than size(t) its more efficient to iterate through
 # elements of s instead and only delete the ones also contained in t.
 # The threshold for this decision boils down to a tradeoff between
@@ -210,7 +264,7 @@ unique(r::AbstractRange) = allunique(r) ? r : oftype(r, r[begin:begin])
 """
     unique(f, itr)
 
-Returns an array containing one value from `itr` for each unique value produced by `f`
+Return an array containing one value from `itr` for each unique value produced by `f`
 applied to elements of `itr`.
 
 # Examples
@@ -428,6 +482,8 @@ end
 
 Return `true` if all values from `itr` are distinct when compared with [`isequal`](@ref).
 
+`allunique` may use a specialized implementation when the input is sorted.
+
 See also: [`unique`](@ref), [`issorted`](@ref), [`allequal`](@ref).
 
 # Examples
@@ -476,7 +532,31 @@ allunique(::Union{AbstractSet,AbstractDict}) = true
 
 allunique(r::AbstractRange) = !iszero(step(r)) || length(r) <= 1
 
-allunique(A::StridedArray) = length(A) < 32 ? _indexed_allunique(A) : _hashed_allunique(A)
+function allunique(A::StridedArray)
+    if length(A) < 32
+        _indexed_allunique(A)
+    elseif OrderStyle(eltype(A)) === Ordered()
+        a1, rest1 = Iterators.peel(A)
+        a2, rest = Iterators.peel(rest1)
+        if !isequal(a1, a2)
+            compare = isless(a1, a2) ? isless : (a,b) -> isless(b,a)
+            for a in rest
+                if compare(a2, a)
+                    a2 = a
+                elseif isequal(a2, a)
+                    return false
+                else
+                    return _hashed_allunique(A)
+                end
+            end
+        else # isequal(a1, a2)
+            return false
+        end
+        return true
+    else
+        _hashed_allunique(A)
+    end
+end
 
 function _indexed_allunique(A)
     length(A) < 2 && return true
@@ -548,7 +628,7 @@ function hash(s::AbstractSet, h::UInt)
 end
 
 convert(::Type{T}, s::T) where {T<:AbstractSet} = s
-convert(::Type{T}, s::AbstractSet) where {T<:AbstractSet} = T(s)
+convert(::Type{T}, s::AbstractSet) where {T<:AbstractSet} = T(s)::T
 
 
 ## replace/replace! ##
@@ -617,7 +697,7 @@ function replace_pairs!(res, A, count::Int, old_new::Tuple{Vararg{Pair}})
 end
 
 """
-    replace!(new::Function, A; [count::Integer])
+    replace!(new::Union{Function, Type}, A; [count::Integer])
 
 Replace each element `x` in collection `A` by `new(x)`.
 If `count` is specified, then replace at most `count` values in total
@@ -710,7 +790,7 @@ subtract_singletontype(::Type{T}, x::Pair{K}, y::Pair...) where {T, K} =
     subtract_singletontype(subtract_singletontype(T, y...), x)
 
 """
-    replace(new::Function, A; [count::Integer])
+    replace(new::Union{Function, Type}, A; [count::Integer])
 
 Return a copy of `A` where each value `x` in `A` is replaced by `new(x)`.
 If `count` is specified, then replace at most `count` values in total
diff --git a/base/shell.jl b/base/shell.jl
index f443a1f9c094a..48214418bdee5 100644
--- a/base/shell.jl
+++ b/base/shell.jl
@@ -18,12 +18,11 @@ end
 
 function shell_parse(str::AbstractString, interpolate::Bool=true;
                      special::AbstractString="", filename="none")
-    s = SubString(str, firstindex(str))
+    last_arg = firstindex(str) # N.B.: This is used by REPLCompletions
+    s = SubString(str, last_arg)
     s = rstrip_shell(lstrip(s))
 
-    # N.B.: This is used by REPLCompletions
-    last_parse = 0:-1
-    isempty(s) && return interpolate ? (Expr(:tuple,:()),last_parse) : ([],last_parse)
+    isempty(s) && return interpolate ? (Expr(:tuple,:()), last_arg) : ([], last_arg)
 
     in_single_quotes = false
     in_double_quotes = false
@@ -32,6 +31,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
     arg = []
     i = firstindex(s)
     st = Iterators.Stateful(pairs(s))
+    update_last_arg = false # true after spaces or interpolate
 
     function push_nonempty!(list, x)
         if !isa(x,AbstractString) || !isempty(x)
@@ -54,6 +54,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
     for (j, c) in st
         j, c = j::Int, c::C
         if !in_single_quotes && !in_double_quotes && isspace(c)
+            update_last_arg = true
             i = consume_upto!(arg, s, i, j)
             append_2to1!(args, arg)
             while !isempty(st)
@@ -77,12 +78,17 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
                 # use parseatom instead of parse to respect filename (#28188)
                 ex, j = Meta.parseatom(s, stpos, filename=filename)
             end
-            last_parse = (stpos:prevind(s, j)) .+ s.offset
-            push_nonempty!(arg, ex)
+            last_arg = stpos + s.offset
+            update_last_arg = true
+            push!(arg, ex)
             s = SubString(s, j)
             Iterators.reset!(st, pairs(s))
             i = firstindex(s)
         else
+            if update_last_arg
+                last_arg = i + s.offset
+                update_last_arg = false
+            end
             if !in_double_quotes && c == '\''
                 in_single_quotes = !in_single_quotes
                 i = consume_upto!(arg, s, i, j)
@@ -124,14 +130,14 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
     push_nonempty!(arg, s[i:end])
     append_2to1!(args, arg)
 
-    interpolate || return args, last_parse
+    interpolate || return args, last_arg
 
     # construct an expression
     ex = Expr(:tuple)
     for arg in args
         push!(ex.args, Expr(:tuple, arg...))
     end
-    return ex, last_parse
+    return ex, last_arg
 end
 
 function shell_split(s::AbstractString)
@@ -216,7 +222,7 @@ function print_shell_escaped_posixly(io::IO, args::AbstractString...)
         function isword(c::AbstractChar)
             if '0' <= c <= '9' || 'a' <= c <= 'z' || 'A' <= c <= 'Z'
                 # word characters
-            elseif c == '_' || c == '/' || c == '+' || c == '-'
+            elseif c == '_' || c == '/' || c == '+' || c == '-' || c == '.'
                 # other common characters
             elseif c == '\''
                 have_single = true
@@ -292,9 +298,9 @@ function shell_escape_csh(io::IO, args::AbstractString...)
         first = false
         i = 1
         while true
-            for (r,e) = (r"^[A-Za-z0-9/\._-]+\z" => "",
-                         r"^[^']*\z" => "'", r"^[^\$\`\"]*\z" => "\"",
-                         r"^[^']+"  => "'", r"^[^\$\`\"]+"  => "\"")
+            for (r,e) = (r"^[A-Za-z0-9/\._-]+\z"sa => "",
+                         r"^[^']*\z"sa => "'", r"^[^\$\`\"]*\z"sa => "\"",
+                         r"^[^']+"sa  => "'", r"^[^\$\`\"]+"sa  => "\"")
                 if ((m = match(r, SubString(arg, i))) !== nothing)
                     write(io, e)
                     write(io, replace(m.match, '\n' => "\\\n"))
@@ -361,12 +367,12 @@ cmdargs = Base.shell_escape_wincmd("Passing args with %cmdargs% works 100%!")
 run(setenv(`cmd /C echo %cmdargs%`, "cmdargs" => cmdargs))
 ```
 
-!warning
+!!! warning
     The argument parsing done by CMD when calling batch files (either inside
     `.bat` files or as arguments to them) is not fully compatible with the
     output of this function. In particular, the processing of `%` is different.
 
-!important
+!!! important
     Due to a peculiar behavior of the CMD parser/interpreter, each command
     after a literal `|` character (indicating a command pipeline) must have
     `shell_escape_wincmd` applied twice since it will be parsed twice by CMD.
@@ -391,7 +397,7 @@ julia> Base.shell_escape_wincmd("a^\\"^o\\"^u\\"")
 """
 function shell_escape_wincmd(io::IO, s::AbstractString)
     # https://stackoverflow.com/a/4095133/1990689
-    occursin(r"[\r\n\0]", s) &&
+    occursin(r"[\r\n\0]"sa, s) &&
         throw(ArgumentError("control character unsupported by CMD.EXE"))
     i = 1
     len = ncodeunits(s)
@@ -446,7 +452,7 @@ function escape_microsoft_c_args(io::IO, args::AbstractString...)
         else
             write(io, ' ')  # separator
         end
-        if isempty(arg) || occursin(r"[ \t\"]", arg)
+        if isempty(arg) || occursin(r"[ \t\"]"sa, arg)
             # Julia raw strings happen to use the same escaping convention
             # as the argv[] parser in Microsoft's C runtime library.
             write(io, '"')
diff --git a/base/show.jl b/base/show.jl
index cfcf634582e71..eb9f7bcece49d 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -1,8 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+using Core.Compiler: has_typevar
+
 function show(io::IO, ::MIME"text/plain", u::UndefInitializer)
     show(io, u)
-    get(io, :compact, false) && return
+    get(io, :compact, false)::Bool && return
     print(io, ": array initializer with undefined values")
 end
 
@@ -21,24 +23,29 @@ function show(io::IO, ::MIME"text/plain", r::LinRange)
     print_range(io, r)
 end
 
+function _isself(ft::DataType)
+    ftname = ft.name
+    isdefined(ftname, :mt) || return false
+    name = ftname.mt.name
+    mod = parentmodule(ft)  # NOTE: not necessarily the same as ft.name.mt.module
+    return isdefined(mod, name) && ft == typeof(getfield(mod, name))
+end
+
 function show(io::IO, ::MIME"text/plain", f::Function)
-    get(io, :compact, false) && return show(io, f)
+    get(io, :compact, false)::Bool && return show(io, f)
     ft = typeof(f)
-    mt = ft.name.mt
+    name = ft.name.mt.name
     if isa(f, Core.IntrinsicFunction)
         print(io, f)
         id = Core.Intrinsics.bitcast(Int32, f)
         print(io, " (intrinsic function #$id)")
     elseif isa(f, Core.Builtin)
-        print(io, mt.name, " (built-in function)")
+        print(io, name, " (built-in function)")
     else
-        name = mt.name
-        isself = isdefined(ft.name.module, name) &&
-                 ft == typeof(getfield(ft.name.module, name))
         n = length(methods(f))
         m = n==1 ? "method" : "methods"
         sname = string(name)
-        ns = (isself || '#' in sname) ? sname : string("(::", ft, ")")
+        ns = (_isself(ft) || '#' in sname) ? sname : string("(::", ft, ")")
         what = startswith(ns, '@') ? "macro" : "generic function"
         print(io, ns, " (", what, " with $n $m)")
     end
@@ -48,59 +55,66 @@ show(io::IO, ::MIME"text/plain", c::ComposedFunction) = show(io, c)
 show(io::IO, ::MIME"text/plain", c::Returns) = show(io, c)
 show(io::IO, ::MIME"text/plain", s::Splat) = show(io, s)
 
-const ansi_regex = r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])"
-# An iterator similar to `pairs` but skips over "tokens" corresponding to
-# ansi sequences
-struct IgnoreAnsiIterator
-    captures::Base.RegexMatchIterator
+const ansi_regex = r"(?s)(?:\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~]))|."
+
+# Pseudo-character representing an ANSI delimiter
+struct ANSIDelimiter
+    del::SubString{String}
 end
-IgnoreAnsiIterator(s::AbstractString) =
-    IgnoreAnsiIterator(eachmatch(ansi_regex, s))
+ncodeunits(c::ANSIDelimiter) = ncodeunits(c.del)
+textwidth(::ANSIDelimiter) = 0
 
-Base.IteratorSize(::Type{IgnoreAnsiIterator}) = Base.SizeUnknown()
-function iterate(I::IgnoreAnsiIterator, (i, m_st)=(1, iterate(I.captures)))
-    # Advance until the next non ansi sequence
-    if m_st !== nothing
-        m, j = m_st
-        if m.offset == i
-            i += sizeof(m.match)
-            return iterate(I, (i, iterate(I.captures, j)))
-        end
-    end
-    ci = iterate(I.captures.string, i)
-    ci === nothing && return nothing
-    i_prev = i
-    (c, i) = ci
-    return (i_prev => c), (i, m_st)
+# An iterator similar to `pairs(::String)` but whose values are Char or ANSIDelimiter
+struct ANSIIterator
+    captures::RegexMatchIterator
+end
+ANSIIterator(s::AbstractString) = ANSIIterator(eachmatch(ansi_regex, s))
+
+IteratorSize(::Type{ANSIIterator}) = SizeUnknown()
+eltype(::Type{ANSIIterator}) = Pair{Int, Union{Char,ANSIDelimiter}}
+function iterate(I::ANSIIterator, (i, m_st)=(1, iterate(I.captures)))
+    m_st === nothing && return nothing
+    m, (j, new_m_st) = m_st
+    c = lastindex(m.match) == 1 ? only(m.match) : ANSIDelimiter(m.match)
+    return (i => c, (j, iterate(I.captures, (j, new_m_st))))
 end
+textwidth(I::ANSIIterator) = mapreduce(textwidth∘last, +, I; init=0)
 
-function _truncate_at_width_or_chars(ignore_ansi::Bool, str, width, chars="", truncmark="…")
+function _truncate_at_width_or_chars(ignore_ANSI::Bool, str, width, rpad=false, chars="\r\n", truncmark="…")
     truncwidth = textwidth(truncmark)
     (width <= 0 || width < truncwidth) && return ""
     wid = truncidx = lastidx = 0
-    ignore_ansi &= match(ansi_regex, str) !== nothing
-    I = ignore_ansi ? IgnoreAnsiIterator(str) : pairs(str)
-    for (_lastidx, c) in I
-        lastidx = _lastidx
-        wid += textwidth(c)
-        if wid >= (width - truncwidth) && truncidx == 0
-            truncidx = lastidx
-        end
-        (wid >= width || c in chars) && break
-    end
-    if lastidx != 0 && str[lastidx] in chars
-        lastidx = prevind(str, lastidx)
-    end
+    # if str needs to be truncated, truncidx is the index of truncation.
+    stop = false # once set, only ANSI delimiters will be kept as new characters.
+    needANSIend = false # set if the last ANSI delimiter before truncidx is not "\033[0m".
+    I = ignore_ANSI ? ANSIIterator(str) : pairs(str)
+    for (i, c) in I
+        if c isa ANSIDelimiter
+            truncidx == 0 && (needANSIend = c != "\033[0m")
+            lastidx = i + ncodeunits(c) - 1
+        else
+            stop && break
+            wid += textwidth(c)
+            truncidx == 0 && wid > (width - truncwidth) && (truncidx = lastidx)
+            lastidx = i
+            c in chars && break
+            stop = wid >= width
+        end
+    end
+    lastidx == 0 && return rpad ? ' '^width : ""
+    str[lastidx] in chars && (lastidx = prevind(str, lastidx))
+    ANSIend = needANSIend ? "\033[0m" : ""
+    pad = rpad ? repeat(' ', max(0, width-wid)) : ""
     truncidx == 0 && (truncidx = lastidx)
     if lastidx < lastindex(str)
-        return String(SubString(str, 1, truncidx) * truncmark)
+        return string(SubString(str, 1, truncidx), ANSIend, truncmark, pad)
     else
-        return String(str)
+        return string(str, ANSIend, pad)
     end
 end
 
 function show(io::IO, ::MIME"text/plain", iter::Union{KeySet,ValueIterator})
-    isempty(iter) && get(io, :compact, false) && return show(io, iter)
+    isempty(iter) && get(io, :compact, false)::Bool && return show(io, iter)
     summary(io, iter)
     isempty(iter) && return
     print(io, ". ", isa(iter,KeySet) ? "Keys" : "Values", ":")
@@ -122,7 +136,7 @@ function show(io::IO, ::MIME"text/plain", iter::Union{KeySet,ValueIterator})
 
         if limit
             str = sprint(show, v, context=io, sizehint=0)
-            str = _truncate_at_width_or_chars(get(io, :color, false), str, cols, "\r\n")
+            str = _truncate_at_width_or_chars(get(io, :color, false)::Bool, str, cols)
             print(io, str)
         else
             show(io, v)
@@ -154,19 +168,20 @@ function show(io::IO, ::MIME"text/plain", t::AbstractDict{K,V}) where {K,V}
         rows -= 1 # Subtract the summary
 
         # determine max key width to align the output, caching the strings
+        hascolor = get(recur_io, :color, false)
         ks = Vector{String}(undef, min(rows, length(t)))
         vs = Vector{String}(undef, min(rows, length(t)))
-        keylen = 0
-        vallen = 0
+        keywidth = 0
+        valwidth = 0
         for (i, (k, v)) in enumerate(t)
             i > rows && break
             ks[i] = sprint(show, k, context=recur_io_k, sizehint=0)
             vs[i] = sprint(show, v, context=recur_io_v, sizehint=0)
-            keylen = clamp(length(ks[i]), keylen, cols)
-            vallen = clamp(length(vs[i]), vallen, cols)
+            keywidth = clamp(hascolor ? textwidth(ANSIIterator(ks[i])) : textwidth(ks[i]), keywidth, cols)
+            valwidth = clamp(hascolor ? textwidth(ANSIIterator(vs[i])) : textwidth(vs[i]), valwidth, cols)
         end
-        if keylen > max(div(cols, 2), cols - vallen)
-            keylen = max(cld(cols, 3), cols - vallen)
+        if keywidth > max(div(cols, 2), cols - valwidth)
+            keywidth = max(cld(cols, 3), cols - valwidth)
         end
     else
         rows = cols = typemax(Int)
@@ -175,12 +190,12 @@ function show(io::IO, ::MIME"text/plain", t::AbstractDict{K,V}) where {K,V}
     for (i, (k, v)) in enumerate(t)
         print(io, "\n  ")
         if i == rows < length(t)
-            print(io, rpad("⋮", keylen), " => ⋮")
+            print(io, rpad("⋮", keywidth), " => ⋮")
             break
         end
 
         if limit
-            key = rpad(_truncate_at_width_or_chars(get(recur_io, :color, false), ks[i], keylen, "\r\n"), keylen)
+            key = _truncate_at_width_or_chars(hascolor, ks[i], keywidth, true)
         else
             key = sprint(show, k, context=recur_io_k, sizehint=0)
         end
@@ -188,7 +203,7 @@ function show(io::IO, ::MIME"text/plain", t::AbstractDict{K,V}) where {K,V}
         print(io, " => ")
 
         if limit
-            val = _truncate_at_width_or_chars(get(recur_io, :color, false), vs[i], cols - keylen, "\r\n")
+            val = _truncate_at_width_or_chars(hascolor, vs[i], cols - keywidth)
             print(io, val)
         else
             show(recur_io_v, v)
@@ -232,7 +247,7 @@ function show(io::IO, ::MIME"text/plain", t::AbstractSet{T}) where T
 
         if limit
             str = sprint(show, v, context=recur_io, sizehint=0)
-            print(io, _truncate_at_width_or_chars(get(io, :color, false), str, cols, "\r\n"))
+            print(io, _truncate_at_width_or_chars(get(io, :color, false)::Bool, str, cols))
         else
             show(recur_io, v)
         end
@@ -293,7 +308,7 @@ function IOContext(io::IO, dict::ImmutableDict)
     IOContext{typeof(io0)}(io0, dict)
 end
 
-convert(::Type{IOContext}, io::IO) = IOContext(unwrapcontext(io)...)
+convert(::Type{IOContext}, io::IO) = IOContext(unwrapcontext(io)...)::IOContext
 
 IOContext(io::IO) = convert(IOContext, io)
 
@@ -424,9 +439,10 @@ Julia code when possible.
 
 [`repr`](@ref) returns the output of `show` as a string.
 
-To customize human-readable text output for objects of type `T`, define
-`show(io::IO, ::MIME"text/plain", ::T)` instead. Checking the `:compact`
-[`IOContext`](@ref) property of `io` in such methods is recommended,
+For a more verbose human-readable text output for objects of type `T`, define
+`show(io::IO, ::MIME"text/plain", ::T)` in addition. Checking the `:compact`
+[`IOContext`](@ref) key (often checked as `get(io, :compact, false)::Bool`)
+of `io` in such methods is recommended,
 since some containers show their elements by calling this method with
 `:compact => true`.
 
@@ -482,9 +498,11 @@ function _show_default(io::IO, @nospecialize(x))
     print(io,')')
 end
 
-active_module()::Module = isdefined(Base, :active_repl) && isdefined(Base.active_repl, :mistate) && Base.active_repl.mistate !== nothing ?
-                      Base.active_repl.mistate.active_module :
-                      Main
+function active_module()
+    isassigned(REPL_MODULE_REF) || return Main
+    REPL = REPL_MODULE_REF[]
+    return invokelatest(REPL.active_module)::Module
+end
 
 # Check if a particular symbol is exported from a standard library module
 function is_exported_from_stdlib(name::Symbol, mod::Module)
@@ -543,8 +561,6 @@ function print_without_params(@nospecialize(x))
     return isa(b, DataType) && b.name.wrapper === x
 end
 
-has_typevar(@nospecialize(t), v::TypeVar) = ccall(:jl_has_typevar, Cint, (Any, Any), t, v)!=0
-
 function io_has_tvar_name(io::IOContext, name::Symbol, @nospecialize(x))
     for (key, val) in io.dict
         if key === :unionall_env && val isa TypeVar && val.name === name && has_typevar(x, val)
@@ -559,7 +575,7 @@ modulesof!(s::Set{Module}, x::TypeVar) = modulesof!(s, x.ub)
 function modulesof!(s::Set{Module}, x::Type)
     x = unwrap_unionall(x)
     if x isa DataType
-        push!(s, x.name.module)
+        push!(s, parentmodule(x))
     elseif x isa Union
         modulesof!(s, x.a)
         modulesof!(s, x.b)
@@ -595,7 +611,7 @@ function make_typealias(@nospecialize(x::Type))
     end
     x isa UnionAll && push!(xenv, x)
     for mod in mods
-        for name in names(mod)
+        for name in unsorted_names(mod)
             if isdefined(mod, name) && !isdeprecated(mod, name) && isconst(mod, name)
                 alias = getfield(mod, name)
                 if alias isa Type && !has_free_typevars(alias) && !print_without_params(alias) && x <: alias
@@ -605,7 +621,7 @@ function make_typealias(@nospecialize(x::Type))
                         env = env::SimpleVector
                         # TODO: In some cases (such as the following), the `env` is over-approximated.
                         #       We'd like to disable `fix_inferred_var_bound` since we'll already do that fix-up here.
-                        #       (or detect and reverse the compution of it here).
+                        #       (or detect and reverse the computation of it here).
                         #   T = Array{Array{T,1}, 1} where T
                         #   (ti, env) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), T, Vector)
                         #   env[1].ub.var == T.var
@@ -799,7 +815,7 @@ function make_typealiases(@nospecialize(x::Type))
     end
     x isa UnionAll && push!(xenv, x)
     for mod in mods
-        for name in names(mod)
+        for name in unsorted_names(mod)
             if isdefined(mod, name) && !isdeprecated(mod, name) && isconst(mod, name)
                 alias = getfield(mod, name)
                 if alias isa Type && !has_free_typevars(alias) && !print_without_params(alias) && !(alias <: Tuple)
@@ -941,13 +957,13 @@ function _show_type(io::IO, @nospecialize(x::Type))
     if print_without_params(x)
         show_type_name(io, (unwrap_unionall(x)::DataType).name)
         return
-    elseif get(io, :compact, true) && show_typealias(io, x)
+    elseif get(io, :compact, true)::Bool && show_typealias(io, x)
         return
     elseif x isa DataType
         show_datatype(io, x)
         return
     elseif x isa Union
-        if get(io, :compact, true) && show_unionaliases(io, x)
+        if get(io, :compact, true)::Bool && show_unionaliases(io, x)
             return
         end
         print(io, "Union")
@@ -990,9 +1006,9 @@ end
 # If an object with this name exists in 'from', we need to check that it's the same binding
 # and that it's not deprecated.
 function isvisible(sym::Symbol, parent::Module, from::Module)
-    owner = ccall(:jl_binding_owner, Any, (Any, Any), parent, sym)
-    from_owner = ccall(:jl_binding_owner, Any, (Any, Any), from, sym)
-    return owner !== nothing && from_owner === owner &&
+    owner = ccall(:jl_binding_owner, Ptr{Cvoid}, (Any, Any), parent, sym)
+    from_owner = ccall(:jl_binding_owner, Ptr{Cvoid}, (Any, Any), from, sym)
+    return owner !== C_NULL && from_owner === owner &&
         !isdeprecated(parent, sym) &&
         isdefined(from, sym) # if we're going to return true, force binding resolution
 end
@@ -1043,42 +1059,131 @@ function show_type_name(io::IO, tn::Core.TypeName)
     nothing
 end
 
+function maybe_kws_nt(x::DataType)
+    x.name === typename(Pairs) || return nothing
+    length(x.parameters) == 4 || return nothing
+    x.parameters[1] === Symbol || return nothing
+    p4 = x.parameters[4]
+    if (isa(p4, DataType) && p4.name === typename(NamedTuple) && length(p4.parameters) == 2)
+        syms, types = p4.parameters
+        types isa DataType || return nothing
+        x.parameters[2] === eltype(p4) || return nothing
+        isa(syms, Tuple) || return nothing
+        x.parameters[3] === typeof(syms) || return nothing
+        return p4
+    end
+    return nothing
+end
+
 function show_datatype(io::IO, x::DataType, wheres::Vector{TypeVar}=TypeVar[])
     parameters = x.parameters::SimpleVector
     istuple = x.name === Tuple.name
+    isnamedtuple = x.name === typename(NamedTuple)
+    kwsnt = maybe_kws_nt(x)
     n = length(parameters)
 
     # Print tuple types with homogeneous tails longer than max_n compactly using `NTuple` or `Vararg`
-    max_n = 3
     if istuple
+        if n == 0
+            print(io, "Tuple{}")
+            return
+        end
+
+        # find the length of the homogeneous tail
+        max_n = 3
         taillen = 1
-        for i in (n-1):-1:1
-            if parameters[i] === parameters[n]
-                taillen += 1
+        pn = parameters[n]
+        fulln = n
+        vakind = :none
+        vaN = 0
+        if pn isa Core.TypeofVararg
+            if isdefined(pn, :N)
+                vaN = pn.N
+                if vaN isa Int
+                    taillen = vaN
+                    fulln += taillen - 1
+                    vakind = :fixed
+                else
+                    vakind = :bound
+                end
             else
-                break
+                vakind = :unbound
+            end
+            pn = unwrapva(pn)
+        end
+        if !(pn isa TypeVar || pn isa Type)
+            # prefer Tuple over NTuple if it contains something other than types
+            # (e.g. if the user has switched the N and T accidentally)
+            taillen = 0
+        elseif vakind === :none || vakind === :fixed
+            for i in (n-1):-1:1
+                if parameters[i] === pn
+                    taillen += 1
+                else
+                    break
+                end
             end
         end
-        if n == taillen > max_n
-            print(io, "NTuple{", n, ", ")
-            show(io, parameters[1])
+
+        # prefer NTuple over Tuple if it is a Vararg without a fixed length
+        # and prefer Tuple for short lists of elements
+        if (vakind == :bound && n == 1 == taillen) || (vakind === :fixed && taillen == fulln > max_n) ||
+           (vakind === :none && taillen == fulln > max_n)
+            print(io, "NTuple{")
+            vakind === :bound ? show(io, vaN) : print(io, fulln)
+            print(io, ", ")
+            show(io, pn)
             print(io, "}")
         else
             print(io, "Tuple{")
-            for i = 1:(taillen > max_n ? n-taillen : n)
+            headlen = (taillen > max_n ? fulln - taillen : fulln)
+            for i = 1:headlen
                 i > 1 && print(io, ", ")
-                show(io, parameters[i])
+                show(io, vakind === :fixed && i >= n ? pn : parameters[i])
             end
-            if taillen > max_n
-                print(io, ", Vararg{")
-                show(io, parameters[n])
-                print(io, ", ", taillen, "}")
+            if headlen < fulln
+                headlen > 0 && print(io, ", ")
+                print(io, "Vararg{")
+                show(io, pn)
+                print(io, ", ", fulln - headlen, "}")
             end
             print(io, "}")
         end
-    else
-        show_type_name(io, x.name)
-        show_typeparams(io, parameters, (unwrap_unionall(x.name.wrapper)::DataType).parameters, wheres)
+        return
+    elseif isnamedtuple
+        syms, types = parameters
+        if syms isa Tuple && types isa DataType
+            print(io, "@NamedTuple{")
+            show_at_namedtuple(io, syms, types)
+            print(io, "}")
+            return
+        end
+    elseif get(io, :backtrace, false)::Bool && kwsnt !== nothing
+        # simplify the type representation of keyword arguments
+        # when printing signature of keyword method in the stack trace
+        print(io, "@Kwargs{")
+        show_at_namedtuple(io, kwsnt.parameters[1]::Tuple, kwsnt.parameters[2]::DataType)
+        print(io, "}")
+        return
+    end
+
+    show_type_name(io, x.name)
+    show_typeparams(io, parameters, (unwrap_unionall(x.name.wrapper)::DataType).parameters, wheres)
+end
+
+function show_at_namedtuple(io::IO, syms::Tuple, types::DataType)
+    first = true
+    for i in 1:length(syms)
+        if !first
+            print(io, ", ")
+        end
+        print(io, syms[i])
+        typ = types.parameters[i]
+        if typ !== Any
+            print(io, "::")
+            show(io, typ)
+        end
+        first = false
     end
 end
 
@@ -1160,7 +1265,7 @@ function show(io::IO, p::Pair)
         isdelimited(io_i, p[i]) || print(io, "(")
         show(io_i, p[i])
         isdelimited(io_i, p[i]) || print(io, ")")
-        i == 1 && print(io, get(io, :compact, false) ? "=>" : " => ")
+        i == 1 && print(io, get(io, :compact, false)::Bool ? "=>" : " => ")
     end
 end
 
@@ -1190,12 +1295,12 @@ function print_fullname(io::IO, m::Module)
     end
 end
 
-function sourceinfo_slotnames(src::CodeInfo)
-    slotnames = src.slotnames
+sourceinfo_slotnames(src::CodeInfo) = sourceinfo_slotnames(src.slotnames)
+function sourceinfo_slotnames(slotnames::Vector{Symbol})
     names = Dict{String,Int}()
     printnames = Vector{String}(undef, length(slotnames))
     for i in eachindex(slotnames)
-        if slotnames[i] == :var"#unused#"
+        if slotnames[i] === :var"#unused#"
             printnames[i] = "_"
             continue
         end
@@ -1355,12 +1460,12 @@ show(io::IO, s::Symbol) = show_unquoted_quote_expr(io, s, 0, 0, 0)
 #
 # This is consistent with many other show methods, i.e.:
 #   show(Set([1,2,3]))                     # ==> "Set{Int64}([2,3,1])"
-#   eval(Meta.parse("Set{Int64}([2,3,1])”) # ==> An actual set
+#   eval(Meta.parse("Set{Int64}([2,3,1])")) # ==> An actual set
 # While this isn’t true of ALL show methods, it is of all ASTs.
 
-const ExprNode = Union{Expr, QuoteNode, Slot, LineNumberNode, SSAValue,
-                       GotoNode, GlobalRef, PhiNode, PhiCNode, UpsilonNode,
-                       Core.Compiler.GotoIfNot, Core.Compiler.ReturnNode}
+const ExprNode = Union{Expr, QuoteNode, SlotNumber, LineNumberNode, SSAValue,
+                       GotoNode, GotoIfNot, GlobalRef, PhiNode, PhiCNode, UpsilonNode,
+                       ReturnNode}
 # Operators have precedence levels from 1-N, and show_unquoted defaults to a
 # precedence level of 0 (the fourth argument). The top-level print and show
 # methods use a precedence of -1 to specially allow space-separated macro syntax.
@@ -1560,9 +1665,7 @@ unquoted(ex::Expr)       = ex.args[1]
 function printstyled end
 function with_output_color end
 
-is_expected_union(u::Union) = u.a == Nothing || u.b == Nothing || u.a == Missing || u.b == Missing
-
-emphasize(io, str::AbstractString, col = Base.error_color()) = get(io, :color, false) ?
+emphasize(io, str::AbstractString, col = Base.error_color()) = get(io, :color, false)::Bool ?
     printstyled(io, str; color=col, bold=true) :
     print(io, uppercase(str))
 
@@ -1686,7 +1789,14 @@ end
 
 ## AST printing ##
 
-show_unquoted(io::IO, val::SSAValue, ::Int, ::Int)      = print(io, "%", val.id)
+function show_unquoted(io::IO, val::SSAValue, ::Int, ::Int)
+    if get(io, :maxssaid, typemax(Int))::Int < val.id
+        # invalid SSAValue, print this in red for better recognition
+        printstyled(io, "%", val.id; color=:red)
+    else
+        print(io, "%", val.id)
+    end
+end
 show_unquoted(io::IO, sym::Symbol, ::Int, ::Int)        = show_sym(io, sym, allow_macroname=false)
 show_unquoted(io::IO, ex::LineNumberNode, ::Int, ::Int) = show_linenumber(io, ex.line, ex.file)
 show_unquoted(io::IO, ex::GotoNode, ::Int, ::Int)       = print(io, "goto %", ex.label)
@@ -1704,19 +1814,14 @@ function show_globalref(io::IO, ex::GlobalRef; allow_macroname=false)
     nothing
 end
 
-function show_unquoted(io::IO, ex::Slot, ::Int, ::Int)
-    typ = isa(ex, TypedSlot) ? ex.typ : Any
+function show_unquoted(io::IO, ex::SlotNumber, ::Int, ::Int)
     slotid = ex.id
     slotnames = get(io, :SOURCE_SLOTNAMES, false)
-    if (isa(slotnames, Vector{String}) &&
-        slotid <= length(slotnames::Vector{String}))
-        print(io, (slotnames::Vector{String})[slotid])
+    if isa(slotnames, Vector{String}) && slotid ≤ length(slotnames)
+        print(io, slotnames[slotid])
     else
         print(io, "_", slotid)
     end
-    if typ !== Any && isa(ex, TypedSlot)
-        print(io, "::", typ)
-    end
 end
 
 function show_unquoted(io::IO, ex::QuoteNode, indent::Int, prec::Int)
@@ -1799,10 +1904,16 @@ function show_import_path(io::IO, ex, quote_level)
         end
     elseif ex.head === :(.)
         for i = 1:length(ex.args)
-            if ex.args[i] === :(.)
+            sym = ex.args[i]::Symbol
+            if sym === :(.)
                 print(io, '.')
             else
-                show_sym(io, ex.args[i]::Symbol, allow_macroname=(i==length(ex.args)))
+                if sym === :(..)
+                    # special case for https://github.com/JuliaLang/julia/issues/49168
+                    print(io, "(..)")
+                else
+                    show_sym(io, sym, allow_macroname=(i==length(ex.args)))
+                end
                 i < length(ex.args) && print(io, '.')
             end
         end
@@ -1823,9 +1934,10 @@ function allow_macroname(ex)
     end
 end
 
-function is_core_macro(arg, macro_name::AbstractString)
-    arg === GlobalRef(Core, Symbol(macro_name))
-end
+is_core_macro(arg::GlobalRef, macro_name::AbstractString) = is_core_macro(arg, Symbol(macro_name))
+is_core_macro(arg::GlobalRef, macro_name::Symbol) = arg == GlobalRef(Core, macro_name)
+is_core_macro(@nospecialize(arg), macro_name::AbstractString) = false
+is_core_macro(@nospecialize(arg), macro_name::Symbol) = false
 
 # symbol for IOContext flag signaling whether "begin" is treated
 # as an ordinary symbol, which is true in indexing expressions.
@@ -1861,8 +1973,12 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
             # .
             print(io, '.')
             # item
-            parens = !(field isa Symbol) || (field::Symbol in quoted_syms)
-            quoted = parens || isoperator(field)
+            if isa(field, Symbol)
+                parens = field in quoted_syms
+                quoted = parens || isoperator(field)
+            else
+                parens = quoted = true
+            end
             quoted && print(io, ':')
             parens && print(io, '(')
             show_unquoted(io, field, indent, 0, quote_level)
@@ -1986,10 +2102,11 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
 
         # binary operator (i.e. "x + y")
         elseif func_prec > 0 # is a binary operator
+            func = func::Symbol    # operator_precedence returns func_prec == 0 for non-Symbol
             na = length(func_args)
-            if (na == 2 || (na > 2 && isa(func, Symbol) && func in (:+, :++, :*)) || (na == 3 && func === :(:))) &&
+            if (na == 2 || (na > 2 && func in (:+, :++, :*)) || (na == 3 && func === :(:))) &&
                     all(a -> !isa(a, Expr) || a.head !== :..., func_args)
-                sep = func === :(:) ? "$func" : " " * convert(String, string(func))::String * " "   # if func::Any, avoid string interpolation (invalidation)
+                sep = func === :(:) ? "$func" : " $func "
 
                 if func_prec <= prec
                     show_enclosed_list(io, '(', func_args, sep, ')', indent, func_prec, quote_level, true)
@@ -2019,7 +2136,7 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
     # other call-like expressions ("A[1,2]", "T{X,Y}", "f.(X,Y)")
     elseif haskey(expr_calls, head) && nargs >= 1  # :ref/:curly/:calldecl/:(.)
         funcargslike = head === :(.) ? (args[2]::Expr).args : args[2:end]
-        show_call(head == :ref ? IOContext(io, beginsym=>true) : io, head, args[1], funcargslike, indent, quote_level, head !== :curly)
+        show_call(head === :ref ? IOContext(io, beginsym=>true) : io, head, args[1], funcargslike, indent, quote_level, head !== :curly)
 
     # comprehensions
     elseif head === :typed_comprehension && nargs == 2
@@ -2045,7 +2162,7 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
 
     # comparison (i.e. "x < y < z")
     elseif head === :comparison && nargs >= 3 && (nargs&1==1)
-        comp_prec = minimum(operator_precedence, args[2:2:end])
+        comp_prec = minimum(operator_precedence, args[2:2:end]; init=typemax(Int))
         if comp_prec <= prec
             show_enclosed_list(io, '(', args, " ", ')', indent, comp_prec, quote_level)
         else
@@ -2077,10 +2194,16 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
 
     # block with argument
     elseif head in (:for,:while,:function,:macro,:if,:elseif,:let) && nargs==2
+        if head === :function && is_expr(args[1], :...)
+            # fix printing of "function (x...) x end"
+            block_args = Expr(:tuple, args[1])
+        else
+            block_args = args[1]
+        end
         if is_expr(args[2], :block)
-            show_block(IOContext(io, beginsym=>false), head, args[1], args[2], indent, quote_level)
+            show_block(IOContext(io, beginsym=>false), head, block_args, args[2], indent, quote_level)
         else
-            show_block(IOContext(io, beginsym=>false), head, args[1], Expr(:block, args[2]), indent, quote_level)
+            show_block(IOContext(io, beginsym=>false), head, block_args, Expr(:block, args[2]), indent, quote_level)
         end
         print(io, "end")
 
@@ -2141,19 +2264,19 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
         print(io, head, ' ')
         show_list(io, args, ", ", indent, 0, quote_level)
 
-    elseif head === :export
+    elseif head in (:export, :public)
         print(io, head, ' ')
         show_list(io, mapany(allow_macroname, args), ", ", indent)
 
     elseif head === :macrocall && nargs >= 2
         # handle some special syntaxes
         # `a b c`
-        if is_core_macro(args[1], "@cmd")
+        if is_core_macro(args[1], :var"@cmd")
             print(io, "`", args[3], "`")
         # 11111111111111111111, 0xfffffffffffffffff, 1111...many digits...
-        elseif is_core_macro(args[1], "@int128_str") ||
-               is_core_macro(args[1], "@uint128_str") ||
-               is_core_macro(args[1], "@big_str")
+        elseif is_core_macro(args[1], :var"@int128_str") ||
+               is_core_macro(args[1], :var"@uint128_str") ||
+               is_core_macro(args[1], :var"@big_str")
             print(io, args[3])
         # x"y" and x"y"z
         elseif isa(args[1], Symbol) && nargs >= 3 && isa(args[3], String) &&
@@ -2384,16 +2507,14 @@ end
 # `io` should contain the UnionAll env of the signature
 function show_signature_function(io::IO, @nospecialize(ft), demangle=false, fargname="", html=false, qualified=false)
     uw = unwrap_unionall(ft)
-    if ft <: Function && isa(uw, DataType) && isempty(uw.parameters) &&
-        isdefined(uw.name.module, uw.name.mt.name) &&
-        ft == typeof(getfield(uw.name.module, uw.name.mt.name))
-        if qualified && !is_exported_from_stdlib(uw.name.mt.name, uw.name.module) && uw.name.module !== Main
-            print_within_stacktrace(io, uw.name.module, '.', bold=true)
+    if ft <: Function && isa(uw, DataType) && isempty(uw.parameters) && _isself(uw)
+        uwmod = parentmodule(uw)
+        if qualified && !is_exported_from_stdlib(uw.name.mt.name, uwmod) && uwmod !== Main
+            print_within_stacktrace(io, uwmod, '.', bold=true)
         end
         s = sprint(show_sym, (demangle ? demangle_function_name : identity)(uw.name.mt.name), context=io)
         print_within_stacktrace(io, s, bold=true)
-    elseif isa(ft, DataType) && ft.name === Type.body.name &&
-        (f = ft.parameters[1]; !isa(f, TypeVar))
+    elseif isType(ft) && (f = ft.parameters[1]; !isa(f, TypeVar))
         uwf = unwrap_unionall(f)
         parens = isa(f, UnionAll) && !(isa(uwf, DataType) && f === uwf.name.wrapper)
         parens && print(io, "(")
@@ -2417,15 +2538,16 @@ function print_within_stacktrace(io, s...; color=:normal, bold=false)
     end
 end
 
-function show_tuple_as_call(io::IO, name::Symbol, sig::Type;
+function show_tuple_as_call(out::IO, name::Symbol, sig::Type;
                             demangle=false, kwargs=nothing, argnames=nothing,
                             qualified=false, hasfirst=true)
     # print a method signature tuple for a lambda definition
     if sig === Tuple
-        print(io, demangle ? demangle_function_name(name) : name, "(...)")
+        print(out, demangle ? demangle_function_name(name) : name, "(...)")
         return
     end
     tv = Any[]
+    io = IOContext(IOBuffer(), out)
     env_io = io
     while isa(sig, UnionAll)
         push!(tv, sig.var)
@@ -2448,7 +2570,7 @@ function show_tuple_as_call(io::IO, name::Symbol, sig::Type;
             print_within_stacktrace(io, argnames[i]; color=:light_black)
         end
         print(io, "::")
-        print_type_bicolor(env_io, sig[i]; use_color = get(io, :backtrace, false))
+        print_type_bicolor(env_io, sig[i]; use_color = get(io, :backtrace, false)::Bool)
     end
     if kwargs !== nothing
         print(io, "; ")
@@ -2457,15 +2579,129 @@ function show_tuple_as_call(io::IO, name::Symbol, sig::Type;
             first || print(io, ", ")
             first = false
             print_within_stacktrace(io, k; color=:light_black)
-            print(io, "::")
-            print_type_bicolor(io, t; use_color = get(io, :backtrace, false))
+            if t == pairs(NamedTuple)
+                # omit type annotation for splat keyword argument
+                print(io, "...")
+            else
+                print(io, "::")
+                print_type_bicolor(io, t; use_color = get(io, :backtrace, false)::Bool)
+            end
         end
     end
     print_within_stacktrace(io, ")", bold=true)
     show_method_params(io, tv)
+    str = String(take!(unwrapcontext(io)[1]))
+    str = type_limited_string_from_context(out, str)
+    print(out, str)
     nothing
 end
 
+function type_limited_string_from_context(out::IO, str::String)
+    typelimitflag = get(out, :stacktrace_types_limited, nothing)
+    if typelimitflag isa RefValue{Bool}
+        sz = get(out, :displaysize, displaysize(out))::Tuple{Int, Int}
+        str_lim = type_depth_limit(str, max(sz[2], 120))
+        if sizeof(str_lim) < sizeof(str)
+            typelimitflag[] = true
+        end
+        str = str_lim
+    end
+    return str
+end
+
+# limit nesting depth of `{ }` until string textwidth is less than `n`
+function type_depth_limit(str::String, n::Int; maxdepth = nothing)
+    depth = 0
+    width_at = Int[]                       # total textwidth at each nesting depth
+    depths = zeros(Int16, lastindex(str))  # depth at each character index
+    levelcount = Int[]                     # number of nodes at each level
+    strwid = 0
+    st_0, st_backslash, st_squote, st_dquote = 0,1,2,4
+    state::Int = st_0
+    stateis(s) = (state & s) != 0
+    quoted() = stateis(st_squote) || stateis(st_dquote)
+    enter(s) = (state |= s)
+    leave(s) = (state &= ~s)
+    for (i, c) in ANSIIterator(str)
+        if c isa ANSIDelimiter
+            depths[i] = depth
+            continue
+        end
+
+        if c == '\\' && quoted()
+            enter(st_backslash)
+        elseif c == '\''
+            if stateis(st_backslash) || stateis(st_dquote)
+            elseif stateis(st_squote)
+                leave(st_squote)
+            else
+                enter(st_squote)
+            end
+        elseif c == '"'
+            if stateis(st_backslash) || stateis(st_squote)
+            elseif stateis(st_dquote)
+                leave(st_dquote)
+            else
+                enter(st_dquote)
+            end
+        end
+        if c == '}' && !quoted()
+            depth -= 1
+        end
+
+        wid = textwidth(c)
+        strwid += wid
+        if depth > 0
+            width_at[depth] += wid
+        end
+        depths[i] = depth
+
+        if c == '{' && !quoted()
+            depth += 1
+            if depth > length(width_at)
+                push!(width_at, 0)
+                push!(levelcount, 0)
+            end
+            levelcount[depth] += 1
+        end
+        if c != '\\' && stateis(st_backslash)
+            leave(st_backslash)
+        end
+    end
+    if maxdepth === nothing
+        limit_at = length(width_at) + 1
+        while strwid > n
+            limit_at -= 1
+            limit_at <= 1 && break
+            # add levelcount[] to include space taken by `…`
+            strwid = strwid - width_at[limit_at] + levelcount[limit_at]
+            if limit_at < length(width_at)
+                # take away the `…` from the previous considered level
+                strwid -= levelcount[limit_at+1]
+            end
+        end
+    else
+        limit_at = maxdepth
+    end
+    output = IOBuffer()
+    prev = 0
+    for (i, c) in ANSIIterator(str)
+        di = depths[i]
+        if di < limit_at
+            if c isa ANSIDelimiter
+                write(output, c.del)
+            else
+                write(output, c)
+            end
+        end
+        if di > prev && di == limit_at
+            write(output, "…")
+        end
+        prev = di
+    end
+    return String(take!(output))
+end
+
 function print_type_bicolor(io, type; kwargs...)
     str = sprint(show, type, context=io)
     print_type_bicolor(io, str; kwargs...)
@@ -2565,9 +2801,9 @@ module IRShow
     const Compiler = Core.Compiler
     using Core.IR
     import ..Base
-    import .Compiler: IRCode, ReturnNode, GotoIfNot, CFG, scan_ssa_use!, Argument,
-        isexpr, compute_basic_blocks, block_for_inst,
-        TriState, Effects, ALWAYS_TRUE, ALWAYS_FALSE, TRISTATE_UNKNOWN
+    import .Compiler: IRCode, CFG, scan_ssa_use!,
+        isexpr, compute_basic_blocks, block_for_inst, IncrementalCompact,
+        Effects, ALWAYS_TRUE, ALWAYS_FALSE
     Base.getindex(r::Compiler.StmtRange, ind::Integer) = Compiler.getindex(r, ind)
     Base.size(r::Compiler.StmtRange) = Compiler.size(r)
     Base.first(r::Compiler.StmtRange) = Compiler.first(r)
@@ -2576,6 +2812,7 @@ module IRShow
     Base.iterate(is::Compiler.InstructionStream, st::Int=1) = (st <= Compiler.length(is)) ? (is[st], st + 1) : nothing
     Base.getindex(is::Compiler.InstructionStream, idx::Int) = Compiler.getindex(is, idx)
     Base.getindex(node::Compiler.Instruction, fld::Symbol) = Compiler.getindex(node, fld)
+    Base.getindex(ir::IRCode, ssa::SSAValue) = Compiler.getindex(ir, ssa)
     include("compiler/ssair/show.jl")
 
     const __debuginfo = Dict{Symbol, Any}(
@@ -2611,19 +2848,28 @@ function show(io::IO, src::CodeInfo; debuginfo::Symbol=:source)
 end
 
 function show(io::IO, inferred::Core.Compiler.InferenceResult)
-    tt = inferred.linfo.specTypes.parameters[2:end]
+    mi = inferred.linfo
+    tt = mi.specTypes.parameters[2:end]
     tts = join(["::$(t)" for t in tt], ", ")
     rettype = inferred.result
     if isa(rettype, Core.Compiler.InferenceState)
         rettype = rettype.bestguess
     end
-    print(io, "$(inferred.linfo.def.name)($(tts)) => $(rettype)")
+    if isa(mi.def, Method)
+        print(io, mi.def.name, "(", tts, " => ", rettype, ")")
+    else
+        print(io, "Toplevel MethodInstance thunk from ", mi.def, " => ", rettype)
+    end
 end
 
-function show(io::IO, ::Core.Compiler.NativeInterpreter)
+show(io::IO, sv::Core.Compiler.InferenceState) =
+    (print(io, "InferenceState for "); show(io, sv.linfo))
+
+show(io::IO, ::Core.Compiler.NativeInterpreter) =
     print(io, "Core.Compiler.NativeInterpreter(...)")
-end
 
+show(io::IO, cache::Core.Compiler.CachedMethodTable) =
+    print(io, typeof(cache), "(", Core.Compiler.length(cache.cache), " entries)")
 
 function dump(io::IOContext, x::SimpleVector, n::Int, indent)
     if isempty(x)
@@ -2706,7 +2952,7 @@ function dump(io::IOContext, x::Array, n::Int, indent)
             println(io)
             recur_io = IOContext(io, :SHOWN_SET => x)
             lx = length(x)
-            if get(io, :limit, false)
+            if get(io, :limit, false)::Bool
                 dump_elts(recur_io, x, n, indent, 1, (lx <= 10 ? lx : 5))
                 if lx > 10
                     println(io)
@@ -2736,7 +2982,7 @@ function dump(io::IOContext, x::DataType, n::Int, indent)
                 tvar_io = IOContext(tvar_io, :unionall_env => tparam)
             end
         end
-        if x.name === NamedTuple_typename && !(x.parameters[1] isa Tuple)
+        if x.name === _NAMEDTUPLE_NAME && !(x.parameters[1] isa Tuple)
             # named tuple type with unknown field names
             return
         end
@@ -2744,8 +2990,11 @@ function dump(io::IOContext, x::DataType, n::Int, indent)
         fieldtypes = datatype_fieldtypes(x)
         for idx in 1:length(fields)
             println(io)
-            print(io, indent, "  ", fields[idx], "::")
-            print(tvar_io, fieldtypes[idx])
+            print(io, indent, "  ", fields[idx])
+            if isassigned(fieldtypes, idx)
+                print(io, "::")
+                print(tvar_io, fieldtypes[idx])
+            end
         end
     end
     nothing
diff --git a/base/simdloop.jl b/base/simdloop.jl
index 29e2382cf39aa..797b77ed75a99 100644
--- a/base/simdloop.jl
+++ b/base/simdloop.jl
@@ -100,7 +100,7 @@ The object iterated over in a `@simd for` loop should be a one-dimensional range
 By using `@simd`, you are asserting several properties of the loop:
 
 * It is safe to execute iterations in arbitrary or overlapping order, with special consideration for reduction variables.
-* Floating-point operations on reduction variables can be reordered, possibly causing different results than without `@simd`.
+* Floating-point operations on reduction variables can be reordered or contracted, possibly causing different results than without `@simd`.
 
 In many cases, Julia is able to automatically vectorize inner for loops without the use of `@simd`.
 Using `@simd` gives the compiler a little extra leeway to make it possible in more situations. In
diff --git a/base/slicearray.jl b/base/slicearray.jl
index c9371622f6aff..e5a433cdb8d2a 100644
--- a/base/slicearray.jl
+++ b/base/slicearray.jl
@@ -40,7 +40,8 @@ unitaxis(::AbstractArray) = Base.OneTo(1)
 
 function Slices(A::P, slicemap::SM, ax::AX) where {P,SM,AX}
     N = length(ax)
-    S = Base._return_type(view, Tuple{P, map((a,l) -> l === (:) ? Colon : eltype(a), axes(A), slicemap)...})
+    argT = map((a,l) -> l === (:) ? Colon : eltype(a), axes(A), slicemap)
+    S = Base.promote_op(view, P, argT...)
     Slices{P,SM,AX,S,N}(A, slicemap, ax)
 end
 
@@ -51,7 +52,7 @@ function _slice_check_dims(N, dim, dims...)
     _slice_check_dims(N,dims...)
 end
 
-@inline function _eachslice(A::AbstractArray{T,N}, dims::NTuple{M,Integer}, drop::Bool) where {T,N,M}
+@constprop :aggressive function _eachslice(A::AbstractArray{T,N}, dims::NTuple{M,Integer}, drop::Bool) where {T,N,M}
     _slice_check_dims(N,dims...)
     if drop
         # if N = 4, dims = (3,1) then
@@ -85,6 +86,8 @@ the ordering of the dimensions will match those in `dims`. If `drop = false`, th
 `Slices` will have the same dimensionality as the underlying array, with inner
 dimensions having size 1.
 
+See [`stack`](@ref)`(slices; dims)` for the inverse of `eachslice(A; dims::Integer)`.
+
 See also [`eachrow`](@ref), [`eachcol`](@ref), [`mapslices`](@ref) and [`selectdim`](@ref).
 
 !!! compat "Julia 1.1"
@@ -131,6 +134,8 @@ end
 Create a [`RowSlices`](@ref) object that is a vector of rows of matrix or vector `A`.
 Row slices are returned as `AbstractVector` views of `A`.
 
+For the inverse, see [`stack`](@ref)`(rows; dims=1)`.
+
 See also [`eachcol`](@ref), [`eachslice`](@ref) and [`mapslices`](@ref).
 
 !!! compat "Julia 1.1"
@@ -167,6 +172,8 @@ eachrow(A::AbstractVector) = eachrow(reshape(A, size(A,1), 1))
 Create a [`ColumnSlices`](@ref) object that is a vector of columns of matrix or vector `A`.
 Column slices are returned as `AbstractVector` views of `A`.
 
+For the inverse, see [`stack`](@ref)`(cols)` or `reduce(`[`hcat`](@ref)`, cols)`.
+
 See also [`eachrow`](@ref), [`eachslice`](@ref) and [`mapslices`](@ref).
 
 !!! compat "Julia 1.1"
@@ -226,9 +233,13 @@ size(s::Slices) = map(length, s.axes)
     return map(l -> l === (:) ? (:) : c[l], s.slicemap)
 end
 
-Base.@propagate_inbounds getindex(s::Slices{P,SM,AX,S,N}, I::Vararg{Int,N}) where {P,SM,AX,S,N} =
-    view(s.parent, _slice_index(s, I...)...)
-Base.@propagate_inbounds setindex!(s::Slices{P,SM,AX,S,N}, val, I::Vararg{Int,N}) where {P,SM,AX,S,N} =
-    s.parent[_slice_index(s, I...)...] = val
+@inline function getindex(s::Slices{P,SM,AX,S,N}, I::Vararg{Int,N}) where {P,SM,AX,S,N}
+    @boundscheck checkbounds(s, I...)
+    @inbounds view(s.parent, _slice_index(s, I...)...)
+end
+@inline function setindex!(s::Slices{P,SM,AX,S,N}, val, I::Vararg{Int,N}) where {P,SM,AX,S,N}
+    @boundscheck checkbounds(s, I...)
+    @inbounds s.parent[_slice_index(s, I...)...] = val
+end
 
 parent(s::Slices) = s.parent
diff --git a/base/some.jl b/base/some.jl
index 8be58739a4df4..0d538cbed6c23 100644
--- a/base/some.jl
+++ b/base/some.jl
@@ -29,15 +29,14 @@ end
 function nonnothingtype_checked(T::Type)
     R = nonnothingtype(T)
     R >: T && error("could not compute non-nothing type")
+    R <: Union{} && error("cannot convert a value to nothing for assignment")
     return R
 end
 
 convert(::Type{T}, x::T) where {T>:Nothing} = x
 convert(::Type{T}, x) where {T>:Nothing} = convert(nonnothingtype_checked(T), x)
-convert(::Type{Nothing}, x) = throw(MethodError(convert, (Nothing, x)))
-convert(::Type{Nothing}, ::Nothing) = nothing
 convert(::Type{Some{T}}, x::Some{T}) where {T} = x
-convert(::Type{Some{T}}, x::Some) where {T} = Some{T}(convert(T, x.value))
+convert(::Type{Some{T}}, x::Some) where {T} = Some{T}(convert(T, x.value))::Some{T}
 
 function show(io::IO, x::Some)
     if get(io, :typeinfo, Any) == typeof(x)
@@ -65,7 +64,7 @@ Return `true` if `x === nothing`, and return `false` if not.
 !!! compat "Julia 1.1"
     This function requires at least Julia 1.1.
 
-See also [`something`](@ref), [`notnothing`](@ref), [`ismissing`](@ref).
+See also [`something`](@ref), [`Base.notnothing`](@ref), [`ismissing`](@ref).
 """
 isnothing(x) = x === nothing
 
@@ -87,6 +86,9 @@ julia> something(nothing, 1)
 julia> something(Some(1), nothing)
 1
 
+julia> something(Some(nothing), 2) === nothing
+true
+
 julia> something(missing, nothing)
 missing
 
diff --git a/base/sort.jl b/base/sort.jl
index 85bfebfe3c17b..9272846618a0e 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -2,20 +2,12 @@
 
 module Sort
 
-import ..@__MODULE__, ..parentmodule
-const Base = parentmodule(@__MODULE__)
-using .Base.Order
-using .Base: copymutable, LinearIndices, length, (:), iterate, OneTo,
-    eachindex, axes, first, last, similar, zip, OrdinalRange, firstindex, lastindex,
-    AbstractVector, @inbounds, AbstractRange, @eval, @inline, Vector, @noinline,
-    AbstractMatrix, AbstractUnitRange, isless, identity, eltype, >, <, <=, >=, |, +, -, *, !,
-    extrema, sub_with_overflow, add_with_overflow, oneunit, div, getindex, setindex!,
-    length, resize!, fill, Missing, require_one_based_indexing, keytype, UnitRange,
-    min, max, reinterpret, signed, unsigned, Signed, Unsigned, typemin, xor, Type, BitSigned, Val
-
-using .Base: >>>, !==
-
-import .Base:
+using Base.Order
+
+using Base: copymutable, midpoint, require_one_based_indexing, uinttype,
+    sub_with_overflow, add_with_overflow, OneTo, BitSigned, BitIntegerType, top_set_bit
+
+import Base:
     sort,
     sort!,
     issorted,
@@ -51,6 +43,7 @@ export # not exported by Base
     SMALL_ALGORITHM,
     SMALL_THRESHOLD
 
+abstract type Algorithm end
 
 ## functions requiring only ordering ##
 
@@ -71,8 +64,8 @@ end
 """
     issorted(v, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
 
-Test whether a vector is in sorted order. The `lt`, `by` and `rev` keywords modify what
-order is considered to be sorted just as they do for [`sort`](@ref).
+Test whether a collection is in sorted order. The keywords modify what
+order is considered sorted, as described in the [`sort!`](@ref) documentation.
 
 # Examples
 ```jldoctest
@@ -87,6 +80,9 @@ false
 
 julia> issorted([(1, "b"), (2, "a")], by = x -> x[2], rev=true)
 true
+
+julia> issorted([1, 2, -2, 3], by=abs)
+true
 ```
 """
 issorted(itr;
@@ -94,7 +90,15 @@ issorted(itr;
     issorted(itr, ord(lt,by,rev,order))
 
 function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering)
-    sort!(v, firstindex(v), lastindex(v), PartialQuickSort(k), o)
+    # TODO move k from `alg` to `kw`
+    # Don't perform InitialOptimizations before Bracketing. The optimizations take O(n)
+    # time and so does the whole sort. But do perform them before recursive calls because
+    # that can cause significant speedups when the target range is large so the runtime is
+    # dominated by k log k and the optimizations runs in O(k) time.
+    _sort!(v, BoolOptimization(
+        Small{12}( # Very small inputs should go straight to insertion sort
+            BracketedSort(k))),
+        o, (;))
     maybeview(v, k)
 end
 
@@ -102,14 +106,16 @@ maybeview(v, k) = view(v, k)
 maybeview(v, k::Integer) = v[k]
 
 """
-    partialsort!(v, k; by=<transform>, lt=<comparison>, rev=false)
+    partialsort!(v, k; by=identity, lt=isless, rev=false)
+
+Partially sort the vector `v` in place so that the value at index `k` (or
+range of adjacent values if `k` is a range) occurs
+at the position where it would appear if the array were fully sorted. If `k` is a single
+index, that value is returned; if `k` is a range, an array of values at those indices is
+returned. Note that `partialsort!` may not fully sort the input array.
+
+For the keyword arguments, see the documentation of [`sort!`](@ref).
 
-Partially sort the vector `v` in place, according to the order specified by `by`, `lt` and
-`rev` so that the value at index `k` (or range of adjacent values if `k` is a range) occurs
-at the position where it would appear if the array were fully sorted via a non-stable
-algorithm. If `k` is a single index, that value is returned; if `k` is a range, an array of
-values at those indices is returned. Note that `partialsort!` does not fully sort the input
-array.
 
 # Examples
 ```jldoctest
@@ -157,40 +163,37 @@ partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange};
     partialsort!(v, k, ord(lt,by,rev,order))
 
 """
-    partialsort(v, k, by=<transform>, lt=<comparison>, rev=false)
+    partialsort(v, k, by=identity, lt=isless, rev=false)
 
-Variant of [`partialsort!`](@ref) which copies `v` before partially sorting it, thereby returning the
+Variant of [`partialsort!`](@ref) that copies `v` before partially sorting it, thereby returning the
 same thing as `partialsort!` but leaving `v` unmodified.
 """
 partialsort(v::AbstractVector, k::Union{Integer,OrdinalRange}; kws...) =
     partialsort!(copymutable(v), k; kws...)
 
-# This implementation of `midpoint` is performance-optimized but safe
-# only if `lo <= hi`.
-midpoint(lo::T, hi::T) where T<:Integer = lo + ((hi - lo) >>> 0x01)
-midpoint(lo::Integer, hi::Integer) = midpoint(promote(lo, hi)...)
-
 # reference on sorted binary search:
 #   http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary
 
-# index of the first value of vector a that is greater than or equal to x;
+# index of the first value of vector a that is greater than or equivalent to x;
 # returns lastindex(v)+1 if x is greater than all values in v.
 function searchsortedfirst(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keytype(v) where T<:Integer
-    u = T(1)
-    lo = lo - u
-    hi = hi + u
-    @inbounds while lo < hi - u
-        m = midpoint(lo, hi)
+    hi = hi + T(1)
+    len = hi - lo
+    @inbounds while len != 0
+        half_len = len >>> 0x01
+        m = lo + half_len
         if lt(o, v[m], x)
-            lo = m
+            lo = m + 1
+            len -= half_len + 1
         else
             hi = m
+            len = half_len
         end
     end
-    return hi
+    return lo
 end
 
-# index of the last value of vector a that is less than or equal to x;
+# index of the last value of vector a that is less than or equivalent to x;
 # returns firstindex(v)-1 if x is less than all values of v.
 function searchsortedlast(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keytype(v) where T<:Integer
     u = T(1)
@@ -207,7 +210,7 @@ function searchsortedlast(v::AbstractVector, x, lo::T, hi::T, o::Ordering)::keyt
     return lo
 end
 
-# returns the range of indices of v equal to x
+# returns the range of indices of v equivalent to x
 # if v does not contain x, returns a 0-length range
 # indicating the insertion point of x
 function searchsorted(v::AbstractVector, x, ilo::T, ihi::T, o::Ordering)::UnitRange{keytype(v)} where T<:Integer
@@ -229,7 +232,10 @@ function searchsorted(v::AbstractVector, x, ilo::T, ihi::T, o::Ordering)::UnitRa
     return (lo + 1) : (hi - 1)
 end
 
-function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)::keytype(a)
+
+const FastRangeOrderings = Union{DirectOrdering,Lt{typeof(<)},ReverseOrdering{Lt{typeof(<)}}}
+
+function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::FastRangeOrderings)::keytype(a)
     require_one_based_indexing(a)
     f, h, l = first(a), step(a), last(a)
     if lt(o, x, f)
@@ -242,7 +248,7 @@ function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering):
     end
 end
 
-function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)::keytype(a)
+function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::FastRangeOrderings)::keytype(a)
     require_one_based_indexing(a)
     f, h, l = first(a), step(a), last(a)
     if !lt(o, f, x)
@@ -255,7 +261,7 @@ function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)
     end
 end
 
-function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::DirectOrdering)::keytype(a)
+function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::FastRangeOrderings)::keytype(a)
     require_one_based_indexing(a)
     f, h, l = first(a), step(a), last(a)
     if lt(o, x, f)
@@ -263,7 +269,7 @@ function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::DirectOrderin
     elseif h == 0 || !lt(o, x, l)
         length(a)
     else
-        if o isa ForwardOrdering
+        if !(o isa ReverseOrdering)
             fld(floor(Integer, x) - f, h) + 1
         else
             fld(ceil(Integer, x) - f, h) + 1
@@ -271,7 +277,7 @@ function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::DirectOrderin
     end
 end
 
-function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::DirectOrdering)::keytype(a)
+function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::FastRangeOrderings)::keytype(a)
     require_one_based_indexing(a)
     f, h, l = first(a), step(a), last(a)
     if !lt(o, f, x)
@@ -279,7 +285,7 @@ function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::DirectOrderi
     elseif h == 0 || lt(o, l, x)
         length(a) + 1
     else
-        if o isa ForwardOrdering
+        if !(o isa ReverseOrdering)
             cld(ceil(Integer, x) - f, h) + 1
         else
             cld(floor(Integer, x) - f, h) + 1
@@ -287,7 +293,7 @@ function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::DirectOrderi
     end
 end
 
-searchsorted(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering) =
+searchsorted(a::AbstractRange{<:Real}, x::Real, o::FastRangeOrderings) =
     searchsortedfirst(a, x, o) : searchsortedlast(a, x, o)
 
 for s in [:searchsortedfirst, :searchsortedlast, :searchsorted]
@@ -300,14 +306,19 @@ for s in [:searchsortedfirst, :searchsortedlast, :searchsorted]
 end
 
 """
-    searchsorted(a, x; by=<transform>, lt=<comparison>, rev=false)
+    searchsorted(v, x; by=identity, lt=isless, rev=false)
 
-Return the range of indices of `a` which compare as equal to `x` (using binary search)
-according to the order specified by the `by`, `lt` and `rev` keywords, assuming that `a`
-is already sorted in that order. Return an empty range located at the insertion point
-if `a` does not contain values equal to `x`.
+Return the range of indices in `v` where values are equivalent to `x`, or an
+empty range located at the insertion point if `v` does not contain values
+equivalent to `x`. The vector `v` must be sorted according to the order defined
+by the keywords. Refer to [`sort!`](@ref) for the meaning of the keywords and
+the definition of equivalence. Note that the `by` function is applied to the
+searched value `x` as well as the values in `v`.
 
-See also: [`insorted`](@ref), [`searchsortedfirst`](@ref), [`sort`](@ref), [`findall`](@ref).
+The range is generally found using binary search, but there are optimized
+implementations for some inputs.
+
+See also: [`searchsortedfirst`](@ref), [`sort!`](@ref), [`insorted`](@ref), [`findall`](@ref).
 
 # Examples
 ```jldoctest
@@ -325,15 +336,26 @@ julia> searchsorted([1, 2, 4, 5, 5, 7], 9) # no match, insert at end
 
 julia> searchsorted([1, 2, 4, 5, 5, 7], 0) # no match, insert at start
 1:0
+
+julia> searchsorted([1=>"one", 2=>"two", 2=>"two", 4=>"four"], 2=>"two", by=first) # compare the keys of the pairs
+2:3
 ```
 """ searchsorted
 
 """
-    searchsortedfirst(a, x; by=<transform>, lt=<comparison>, rev=false)
+    searchsortedfirst(v, x; by=identity, lt=isless, rev=false)
 
-Return the index of the first value in `a` greater than or equal to `x`, according to the
-specified order. Return `lastindex(a) + 1` if `x` is greater than all values in `a`.
-`a` is assumed to be sorted.
+Return the index of the first value in `v` that is not ordered before `x`.
+If all values in `v` are ordered before `x`, return `lastindex(v) + 1`.
+
+The vector `v` must be sorted according to the order defined by the keywords.
+`insert!`ing `x` at the returned index will maintain the sorted order.
+Refer to [`sort!`](@ref) for the meaning and use of the keywords.
+Note that the `by` function is applied to the searched value `x` as well as the
+values in `v`.
+
+The index is generally found using binary search, but there are optimized
+implementations for some inputs.
 
 See also: [`searchsortedlast`](@ref), [`searchsorted`](@ref), [`findfirst`](@ref).
 
@@ -353,15 +375,26 @@ julia> searchsortedfirst([1, 2, 4, 5, 5, 7], 9) # no match, insert at end
 
 julia> searchsortedfirst([1, 2, 4, 5, 5, 7], 0) # no match, insert at start
 1
+
+julia> searchsortedfirst([1=>"one", 2=>"two", 4=>"four"], 3=>"three", by=first) # compare the keys of the pairs
+3
 ```
 """ searchsortedfirst
 
 """
-    searchsortedlast(a, x; by=<transform>, lt=<comparison>, rev=false)
+    searchsortedlast(v, x; by=identity, lt=isless, rev=false)
 
-Return the index of the last value in `a` less than or equal to `x`, according to the
-specified order. Return `firstindex(a) - 1` if `x` is less than all values in `a`. `a` is
-assumed to be sorted.
+Return the index of the last value in `v` that is not ordered after `x`.
+If all values in `v` are ordered after `x`, return `firstindex(v) - 1`.
+
+The vector `v` must be sorted according to the order defined by the keywords.
+`insert!`ing `x` immediately after the returned index will maintain the sorted order.
+Refer to [`sort!`](@ref) for the meaning and use of the keywords.
+Note that the `by` function is applied to the searched value `x` as well as the
+values in `v`.
+
+The index is generally found using binary search, but there are optimized
+implementations for some inputs
 
 # Examples
 ```jldoctest
@@ -379,16 +412,23 @@ julia> searchsortedlast([1, 2, 4, 5, 5, 7], 9) # no match, insert at end
 
 julia> searchsortedlast([1, 2, 4, 5, 5, 7], 0) # no match, insert at start
 0
+
+julia> searchsortedlast([1=>"one", 2=>"two", 4=>"four"], 3=>"three", by=first) # compare the keys of the pairs
+2
 ```
 """ searchsortedlast
 
 """
-    insorted(x, a; by=<transform>, lt=<comparison>, rev=false) -> Bool
+    insorted(x, v; by=identity, lt=isless, rev=false) -> Bool
 
-Determine whether an item `x` is in the sorted collection `a`, in the sense that
-it is [`==`](@ref) to one of the values of the collection according to the order
-specified by the `by`, `lt` and `rev` keywords, assuming that `a` is already
-sorted in that order, see [`sort`](@ref) for the keywords.
+Determine whether a vector `v` contains any value equivalent to `x`.
+The vector `v` must be sorted according to the order defined by the keywords.
+Refer to [`sort!`](@ref) for the meaning of the keywords and the definition of
+equivalence. Note that the `by` function is applied to the searched value `x`
+as well as the values in `v`.
+
+The check is generally done using binary search, but there are optimized
+implementations for some inputs.
 
 See also [`in`](@ref).
 
@@ -408,6 +448,9 @@ false
 
 julia> insorted(0, [1, 2, 4, 5, 5, 7]) # no match
 false
+
+julia> insorted(2=>"TWO", [1=>"one", 2=>"two", 4=>"four"], by=first) # compare the keys of the pairs
+true
 ```
 
 !!! compat "Julia 1.6"
@@ -417,279 +460,930 @@ function insorted end
 insorted(x, v::AbstractVector; kw...) = !isempty(searchsorted(v, x; kw...))
 insorted(x, r::AbstractRange) = in(x, r)
 
-## sorting algorithms ##
+## Alternative keyword management
 
-abstract type Algorithm end
+macro getkw(syms...)
+    getters = (getproperty(Sort, Symbol(:_, sym)) for sym in syms)
+    Expr(:block, (:($(esc(:((kw, $sym) = $getter(v, o, kw))))) for (sym, getter) in zip(syms, getters))...)
+end
+
+for (sym, exp, type) in [
+        (:lo, :(firstindex(v)), Integer),
+        (:hi, :(lastindex(v)),  Integer),
+        (:mn, :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))),
+        (:mx, :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))),
+        (:scratch, nothing, :(Union{Nothing, Vector})), # could have different eltype
+        (:legacy_dispatch_entry, nothing, Union{Nothing, Algorithm})]
+    usym = Symbol(:_, sym)
+    @eval function $usym(v, o, kw)
+        # using missing instead of nothing because scratch could === nothing.
+        res = get(kw, $(Expr(:quote, sym)), missing)
+        res !== missing && return kw, res::$type
+        $sym = $exp
+        (;kw..., $sym), $sym::$type
+    end
+end
+
+## Scratch space management
+
+"""
+    make_scratch(scratch::Union{Nothing, Vector}, T::Type, len::Integer)
+
+Returns `(s, t)` where `t` is an `AbstractVector` of type `T` with length at least `len`
+that is backed by the `Vector` `s`. If `scratch !== nothing`, then `s === scratch`.
+
+This function will allocate a new vector if `scratch === nothing`, `resize!` `scratch` if it
+is too short, and `reinterpret` `scratch` if its eltype is not `T`.
+"""
+function make_scratch(scratch::Nothing, T::Type, len::Integer)
+    s = Vector{T}(undef, len)
+    s, s
+end
+function make_scratch(scratch::Vector{T}, ::Type{T}, len::Integer) where T
+    len > length(scratch) && resize!(scratch, len)
+    scratch, scratch
+end
+function make_scratch(scratch::Vector, T::Type, len::Integer)
+    len_bytes = len * sizeof(T)
+    len_scratch = div(len_bytes, sizeof(eltype(scratch)))
+    len_scratch > length(scratch) && resize!(scratch, len_scratch)
+    scratch, reinterpret(T, scratch)
+end
 
-struct InsertionSortAlg <: Algorithm end
-struct QuickSortAlg     <: Algorithm end
-struct MergeSortAlg     <: Algorithm end
+
+## sorting algorithm components ##
 
 """
-    AdaptiveSort(fallback)
+    _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw; t, offset)
+
+An internal function that sorts `v` using the algorithm `a` under the ordering `o`,
+subject to specifications provided in `kw` (such as `lo` and `hi` in which case it only
+sorts `view(v, lo:hi)`)
 
-Indicate that a sorting function should use the fastest available algorithm.
+Returns a scratch space if provided or constructed during the sort, or `nothing` if
+no scratch space is present.
 
-Adaptive sort will use the algorithm specified by `fallback` for types and orders that are
-not [`UIntMappable`](@ref). Otherwise, it will typically use:
-  * Insertion sort for short vectors
-  * Radix sort for long vectors
-  * Counting sort for vectors of integers spanning a short range
+!!! note
+    `_sort!` modifies but does not return `v`.
 
-Adaptive sort is guaranteed to be stable if the fallback algorithm is stable.
+A returned scratch space will be a `Vector{T}` where `T` is usually the eltype of `v`. There
+are some exceptions, for example if `eltype(v) == Union{Missing, T}` then the scratch space
+may be be a `Vector{T}` due to `MissingOptimization` changing the eltype of `v` to `T`.
+
+`t` is an appropriate scratch space for the algorithm at hand, to be accessed as
+`t[i + offset]`. `t` is used for an algorithm to pass a scratch space back to itself in
+internal or recursive calls.
+"""
+function _sort! end
+
+# TODO: delete this optimization when views have no overhead.
+const UnwrappableSubArray = SubArray{T, 1, <:AbstractArray{T}, <:Tuple{AbstractUnitRange, Vararg{Number}}, true} where T
 """
-struct AdaptiveSort{Fallback <: Algorithm} <: Algorithm
-    fallback::Fallback
+    SubArrayOptimization(next) <: Algorithm
+
+Unwrap certain known SubArrays because views have a performance overhead 😢
+
+Specifically, unwraps some instances of the type
+
+    $UnwrappableSubArray
+"""
+struct SubArrayOptimization{T <: Algorithm} <: Algorithm
+    next::T
+end
+
+_sort!(v::AbstractVector, a::SubArrayOptimization, o::Ordering, kw) = _sort!(v, a.next, o, kw)
+function _sort!(v::UnwrappableSubArray, a::SubArrayOptimization, o::Ordering, kw)
+    @getkw lo hi
+    # @assert v.stride1 == 1
+    parent = v.parent
+    if parent isa Array && !(parent isa Vector) && hi - lo < 100
+        # vec(::Array{T, ≠1}) allocates and is therefore somewhat expensive.
+        # We don't want that for small inputs.
+        _sort!(v, a.next, o, kw)
+    else
+        _sort!(vec(parent), a.next, o, (;kw..., lo = lo + v.offset1, hi = hi + v.offset1))
+    end
 end
+
 """
-    PartialQuickSort{T <: Union{Integer,OrdinalRange}}
+    MissingOptimization(next) <: Algorithm
 
-Indicate that a sorting function should use the partial quick sort
-algorithm. Partial quick sort returns the smallest `k` elements sorted from smallest
-to largest, finding them and sorting them using [`QuickSort`](@ref).
+Filter out missing values.
 
-Characteristics:
-  * *not stable*: does not preserve the ordering of elements which
-    compare equal (e.g. "a" and "A" in a sort of letters which
-    ignores case).
-  * *in-place* in memory.
-  * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
+Missing values are placed after other values according to `DirectOrdering`s. This pass puts
+them there and passes on a view into the original vector that excludes the missing values.
+This pass is triggered for both `sort([1, missing, 3])` and `sortperm([1, missing, 3])`.
 """
-struct PartialQuickSort{T <: Union{Integer,OrdinalRange}} <: Algorithm
-    k::T
+struct MissingOptimization{T <: Algorithm} <: Algorithm
+    next::T
 end
 
+struct WithoutMissingVector{T, U} <: AbstractVector{T}
+    data::U
+    function WithoutMissingVector(data; unsafe=false)
+        if !unsafe && any(ismissing, data)
+            throw(ArgumentError("data must not contain missing values"))
+        end
+        new{nonmissingtype(eltype(data)), typeof(data)}(data)
+    end
+end
+Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i::Integer)
+    out = v.data[i]
+    @assert !(out isa Missing)
+    out::eltype(v)
+end
+Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector, x, i::Integer)
+    v.data[i] = x
+    v
+end
+Base.size(v::WithoutMissingVector) = size(v.data)
+Base.axes(v::WithoutMissingVector) = axes(v.data)
 
 """
-    InsertionSort
+    send_to_end!(f::Function, v::AbstractVector; [lo, hi])
 
-Indicate that a sorting function should use the insertion sort
-algorithm. Insertion sort traverses the collection one element
-at a time, inserting each element into its correct, sorted position in
-the output vector.
+Send every element of `v` for which `f` returns `true` to the end of the vector and return
+the index of the last element for which `f` returns `false`.
 
-Characteristics:
-  * *stable*: preserves the ordering of elements which
-    compare equal (e.g. "a" and "A" in a sort of letters
-    which ignores case).
-  * *in-place* in memory.
-  * *quadratic performance* in the number of elements to be sorted:
-    it is well-suited to small collections but should not be used for large ones.
+`send_to_end!(f, v, lo, hi)` is equivalent to `send_to_end!(f, view(v, lo:hi))+lo-1`
+
+Preserves the order of the elements that are not sent to the end.
 """
-const InsertionSort = InsertionSortAlg()
+function send_to_end!(f::F, v::AbstractVector; lo=firstindex(v), hi=lastindex(v)) where F <: Function
+    i = lo
+    @inbounds while i <= hi && !f(v[i])
+        i += 1
+    end
+    j = i + 1
+    @inbounds while j <= hi
+        if !f(v[j])
+            v[i], v[j] = v[j], v[i]
+            i += 1
+        end
+        j += 1
+    end
+    i - 1
+end
 """
-    QuickSort
+    send_to_end!(f::Function, v::AbstractVector, o::DirectOrdering[, end_stable]; lo, hi)
 
-Indicate that a sorting function should use the quick sort
-algorithm, which is *not* stable.
+Return `(a, b)` where `v[a:b]` are the elements that are not sent to the end.
 
-Characteristics:
-  * *not stable*: does not preserve the ordering of elements which
-    compare equal (e.g. "a" and "A" in a sort of letters which
-    ignores case).
-  * *in-place* in memory.
-  * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
-  * *good performance* for large collections.
+If `o isa ReverseOrdering` then the "end" of `v` is `v[lo]`.
+
+If `end_stable` is set, the elements that are sent to the end are stable instead of the
+elements that are not
 """
-const QuickSort     = QuickSortAlg()
+@inline send_to_end!(f::F, v::AbstractVector, ::ForwardOrdering, end_stable=false; lo, hi) where F <: Function =
+    end_stable ? (lo, hi-send_to_end!(!f, view(v, hi:-1:lo))) : (lo, send_to_end!(f, v; lo, hi))
+@inline send_to_end!(f::F, v::AbstractVector, ::ReverseOrdering, end_stable=false; lo, hi) where F <: Function =
+    end_stable ? (send_to_end!(!f, v; lo, hi)+1, hi) : (hi-send_to_end!(f, view(v, hi:-1:lo))+1, hi)
+
+
+function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw)
+    @getkw lo hi
+    if o isa DirectOrdering && eltype(v) >: Missing && nonmissingtype(eltype(v)) != eltype(v)
+        lo, hi = send_to_end!(ismissing, v, o; lo, hi)
+        _sort!(WithoutMissingVector(v, unsafe=true), a.next, o, (;kw..., lo, hi))
+    elseif o isa Perm && o.order isa DirectOrdering && eltype(v) <: Integer &&
+                eltype(o.data) >: Missing && nonmissingtype(eltype(o.data)) != eltype(o.data) &&
+                all(i === j for (i,j) in zip(v, eachindex(o.data)))
+        # TODO make this branch known at compile time
+        # This uses a custom function because we need to ensure stability of both sides and
+        # we can assume v is equal to eachindex(o.data) which allows a copying partition
+        # without allocations.
+        lo_i, hi_i = lo, hi
+        cv = eachindex(o.data) # equal to copy(v)
+        for i in lo:hi
+            x = o.data[cv[i]]
+            if ismissing(x) == (o.order == Reverse) # should x go at the beginning/end?
+                v[lo_i] = i
+                lo_i += 1
+            else
+                v[hi_i] = i
+                hi_i -= 1
+            end
+        end
+        reverse!(v, lo_i, hi)
+        if o.order == Reverse
+            lo = lo_i
+        else
+            hi = hi_i
+        end
+
+        _sort!(v, a.next, Perm(o.order, WithoutMissingVector(o.data, unsafe=true)), (;kw..., lo, hi))
+    else
+        _sort!(v, a.next, o, kw)
+    end
+end
+
+
 """
-    MergeSort
+    IEEEFloatOptimization(next) <: Algorithm
 
-Indicate that a sorting function should use the merge sort
-algorithm. Merge sort divides the collection into
-subcollections and repeatedly merges them, sorting each
-subcollection at each step, until the entire
-collection has been recombined in sorted form.
+Move NaN values to the end, partition by sign, and reinterpret the rest as unsigned integers.
 
-Characteristics:
-  * *stable*: preserves the ordering of elements which compare
-    equal (e.g. "a" and "A" in a sort of letters which ignores
-    case).
-  * *not in-place* in memory.
-  * *divide-and-conquer* sort strategy.
+IEEE floating point numbers (`Float64`, `Float32`, and `Float16`) compare the same as
+unsigned integers with the bits with a few exceptions. This pass
+
+This pass is triggered for both `sort([1.0, NaN, 3.0])` and `sortperm([1.0, NaN, 3.0])`.
 """
-const MergeSort     = MergeSortAlg()
+struct IEEEFloatOptimization{T <: Algorithm} <: Algorithm
+    next::T
+end
 
-const DEFAULT_UNSTABLE = AdaptiveSort(QuickSort)
-const DEFAULT_STABLE   = AdaptiveSort(MergeSort)
-const SMALL_ALGORITHM  = InsertionSort
-const SMALL_THRESHOLD  = 20
+after_zero(::ForwardOrdering, x) = !signbit(x)
+after_zero(::ReverseOrdering, x) = signbit(x)
+is_concrete_IEEEFloat(T::Type) = T <: Base.IEEEFloat && isconcretetype(T)
+function _sort!(v::AbstractVector, a::IEEEFloatOptimization, o::Ordering, kw)
+    @getkw lo hi
+    if is_concrete_IEEEFloat(eltype(v)) && o isa DirectOrdering
+        lo, hi = send_to_end!(isnan, v, o, true; lo, hi)
+        iv = reinterpret(uinttype(eltype(v)), v)
+        j = send_to_end!(x -> after_zero(o, x), v; lo, hi)
+        scratch = _sort!(iv, a.next, Reverse, (;kw..., lo, hi=j))
+        if scratch === nothing # Union split
+            _sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, scratch))
+        else
+            _sort!(iv, a.next, Forward, (;kw..., lo=j+1, hi, scratch))
+        end
+    elseif eltype(v) <: Integer && o isa Perm && o.order isa DirectOrdering && is_concrete_IEEEFloat(eltype(o.data))
+        lo, hi = send_to_end!(i -> isnan(@inbounds o.data[i]), v, o.order, true; lo, hi)
+        ip = reinterpret(uinttype(eltype(o.data)), o.data)
+        j = send_to_end!(i -> after_zero(o.order, @inbounds o.data[i]), v; lo, hi)
+        scratch = _sort!(v, a.next, Perm(Reverse, ip), (;kw..., lo, hi=j))
+        if scratch === nothing # Union split
+            _sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi, scratch))
+        else
+            _sort!(v, a.next, Perm(Forward, ip), (;kw..., lo=j+1, hi, scratch))
+        end
+    else
+        _sort!(v, a.next, o, kw)
+    end
+end
+
+
+"""
+    BoolOptimization(next) <: Algorithm
+
+Sort `AbstractVector{Bool}`s using a specialized version of counting sort.
+
+Accesses each element at most twice (one read and one write), and performs at most two
+comparisons.
+"""
+struct BoolOptimization{T <: Algorithm} <: Algorithm
+    next::T
+end
+_sort!(v::AbstractVector, a::BoolOptimization, o::Ordering, kw) = _sort!(v, a.next, o, kw)
+function _sort!(v::AbstractVector{Bool}, ::BoolOptimization, o::Ordering, kw)
+    first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v
+    @getkw lo hi scratch
+    count = 0
+    @inbounds for i in lo:hi
+        if v[i] == first
+            count += 1
+        end
+    end
+    @inbounds v[lo:lo+count-1] .= first
+    @inbounds v[lo+count:hi] .= !first
+    scratch
+end
+
+
+"""
+    IsUIntMappable(yes, no) <: Algorithm
+
+Determines if the elements of a vector can be mapped to unsigned integers while preserving
+their order under the specified ordering.
+
+If they can be, dispatch to the `yes` algorithm and record the unsigned integer type that
+the elements may be mapped to. Otherwise dispatch to the `no` algorithm.
+"""
+struct IsUIntMappable{T <: Algorithm, U <: Algorithm} <: Algorithm
+    yes::T
+    no::U
+end
+function _sort!(v::AbstractVector, a::IsUIntMappable, o::Ordering, kw)
+    if UIntMappable(eltype(v), o) !== nothing
+        _sort!(v, a.yes, o, kw)
+    else
+        _sort!(v, a.no, o, kw)
+    end
+end
+
+
+"""
+    Small{N}(small=SMALL_ALGORITHM, big) <: Algorithm
+
+Sort inputs with `length(lo:hi) <= N` using the `small` algorithm. Otherwise use the `big`
+algorithm.
+"""
+struct Small{N, T <: Algorithm, U <: Algorithm} <: Algorithm
+    small::T
+    big::U
+end
+Small{N}(small, big) where N = Small{N, typeof(small), typeof(big)}(small, big)
+Small{N}(big) where N = Small{N}(SMALL_ALGORITHM, big)
+function _sort!(v::AbstractVector, a::Small{N}, o::Ordering, kw) where N
+    @getkw lo hi
+    if (hi-lo) < N
+        _sort!(v, a.small, o, kw)
+    else
+        _sort!(v, a.big, o, kw)
+    end
+end
+
+
+struct InsertionSortAlg <: Algorithm end
+
+"""
+    InsertionSort
+
+Use the insertion sort algorithm.
+
+Insertion sort traverses the collection one element at a time, inserting
+each element into its correct, sorted position in the output vector.
+
+Characteristics:
+* *stable*: preserves the ordering of elements that compare equal
+(e.g. "a" and "A" in a sort of letters that ignores case).
+* *in-place* in memory.
+* *quadratic performance* in the number of elements to be sorted:
+it is well-suited to small collections but should not be used for large ones.
+"""
+const InsertionSort = InsertionSortAlg()
+const SMALL_ALGORITHM = InsertionSortAlg()
 
-function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::InsertionSortAlg, o::Ordering)
-    @inbounds for i = lo+1:hi
+function _sort!(v::AbstractVector, ::InsertionSortAlg, o::Ordering, kw)
+    @getkw lo hi scratch
+    lo_plus_1 = (lo + 1)::Integer
+    @inbounds for i = lo_plus_1:hi
         j = i
         x = v[i]
-        while j > lo && lt(o, x, v[j-1])
-            v[j] = v[j-1]
+        while j > lo
+            y = v[j-1]
+            if !(lt(o, x, y)::Bool)
+                break
+            end
+            v[j] = y
             j -= 1
         end
         v[j] = x
     end
-    return v
+    scratch
 end
 
-# selectpivot!
-#
-# Given 3 locations in an array (lo, mi, and hi), sort v[lo], v[mi], v[hi]) and
-# choose the middle value as a pivot
-#
-# Upon return, the pivot is in v[lo], and v[hi] is guaranteed to be
-# greater than the pivot
 
-@inline function selectpivot!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
-    @inbounds begin
-        mi = midpoint(lo, hi)
-
-        # sort v[mi] <= v[lo] <= v[hi] such that the pivot is immediately in place
-        if lt(o, v[lo], v[mi])
-            v[mi], v[lo] = v[lo], v[mi]
-        end
+"""
+    CheckSorted(next) <: Algorithm
 
-        if lt(o, v[hi], v[lo])
-            if lt(o, v[hi], v[mi])
-                v[hi], v[lo], v[mi] = v[lo], v[mi], v[hi]
-            else
-                v[hi], v[lo] = v[lo], v[hi]
-            end
-        end
+Check if the input is already sorted and for large inputs, also check if it is
+reverse-sorted. The reverse-sorted check is unstable.
+"""
+struct CheckSorted{T <: Algorithm} <: Algorithm
+    next::T
+end
+function _sort!(v::AbstractVector, a::CheckSorted, o::Ordering, kw)
+    @getkw lo hi scratch
 
-        # return the pivot
-        return v[lo]
+    # For most arrays, a presorted check is cheap (overhead < 5%) and for most large
+    # arrays it is essentially free (<1%).
+    _issorted(v, lo, hi, o) && return scratch
+
+    # For most large arrays, a reverse-sorted check is essentially free (overhead < 1%)
+    if hi-lo >= 500 && _issorted(v, lo, hi, ReverseOrdering(o))
+        # If reversing is valid, do so. This violates stability.
+        reverse!(v, lo, hi)
+        return scratch
     end
+
+    _sort!(v, a.next, o, kw)
 end
 
-# partition!
-#
-# select a pivot, and partition v according to the pivot
 
-function partition!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
-    pivot = selectpivot!(v, lo, hi, o)
-    # pivot == v[lo], v[hi] > pivot
-    i, j = lo, hi
-    @inbounds while true
-        i += 1; j -= 1
-        while lt(o, v[i], pivot); i += 1; end;
-        while lt(o, pivot, v[j]); j -= 1; end;
-        i >= j && break
-        v[i], v[j] = v[j], v[i]
+"""
+    ComputeExtrema(next) <: Algorithm
+
+Compute the extrema of the input under the provided order.
+
+If the minimum is no less than the maximum, then the input is already sorted. Otherwise,
+dispatch to the `next` algorithm.
+"""
+struct ComputeExtrema{T <: Algorithm} <: Algorithm
+    next::T
+end
+function _sort!(v::AbstractVector, a::ComputeExtrema, o::Ordering, kw)
+    @getkw lo hi scratch
+    mn = mx = v[lo]
+    @inbounds for i in (lo+1):hi
+        vi = v[i]
+        lt(o, vi, mn) && (mn = vi)
+        lt(o, mx, vi) && (mx = vi)
     end
-    v[j], v[lo] = pivot, v[j]
 
-    # v[j] == pivot
-    # v[k] >= pivot for k > j
-    # v[i] <= pivot for i < j
-    return j
+    lt(o, mn, mx) || return scratch # all same
+
+    _sort!(v, a.next, o, (;kw..., mn, mx))
 end
 
-function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::QuickSortAlg, o::Ordering)
-    @inbounds while lo < hi
-        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
-        j = partition!(v, lo, hi, o)
-        if j-lo < hi-j
-            # recurse on the smaller chunk
-            # this is necessary to preserve O(log(n))
-            # stack space in the worst case (rather than O(n))
-            lo < (j-1) && sort!(v, lo, j-1, a, o)
-            lo = j+1
-        else
-            j+1 < hi && sort!(v, j+1, hi, a, o)
-            hi = j-1
-        end
+
+"""
+    ConsiderCountingSort(counting=CountingSort(), next) <: Algorithm
+
+If the input's range is small enough, use the `counting` algorithm. Otherwise, dispatch to
+the `next` algorithm.
+
+For most types, the threshold is if the range is shorter than half the length, but for types
+larger than Int64, bitshifts are expensive and RadixSort is not viable, so the threshold is
+much more generous.
+"""
+struct ConsiderCountingSort{T <: Algorithm, U <: Algorithm} <: Algorithm
+    counting::T
+    next::U
+end
+ConsiderCountingSort(next) = ConsiderCountingSort(CountingSort(), next)
+function _sort!(v::AbstractVector{<:Integer}, a::ConsiderCountingSort, o::DirectOrdering, kw)
+    @getkw lo hi mn mx
+    range = maybe_unsigned(o === Reverse ? mn-mx : mx-mn)
+
+    if range < (sizeof(eltype(v)) > 8 ? 5(hi-lo)-100 : div(hi-lo, 2))
+        _sort!(v, a.counting, o, kw)
+    else
+        _sort!(v, a.next, o, kw)
     end
-    return v
 end
+_sort!(v::AbstractVector, a::ConsiderCountingSort, o::Ordering, kw) = _sort!(v, a.next, o, kw)
 
-function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering,
-        t0::Union{AbstractVector{T}, Nothing}=nothing) where T
-    @inbounds if lo < hi
-        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
 
-        m = midpoint(lo, hi)
+"""
+    CountingSort <: Algorithm
 
-        t = t0 === nothing ? similar(v, m-lo+1) : t0
-        length(t) < m-lo+1 && resize!(t, m-lo+1)
-        Base.require_one_based_indexing(t)
+Use the counting sort algorithm.
 
-        sort!(v, lo,  m,  a, o, t)
-        sort!(v, m+1, hi, a, o, t)
+`CountingSort` is an algorithm for sorting integers that runs in Θ(length + range) time and
+space. It counts the number of occurrences of each value in the input and then iterates
+through those counts repopulating the input with the values in sorted order.
+"""
+struct CountingSort <: Algorithm end
+maybe_reverse(o::ForwardOrdering, x) = x
+maybe_reverse(o::ReverseOrdering, x) = reverse(x)
+function _sort!(v::AbstractVector{<:Integer}, ::CountingSort, o::DirectOrdering, kw)
+    @getkw lo hi mn mx scratch
+    range = maybe_unsigned(o === Reverse ? mn-mx : mx-mn)
+    offs = 1 - (o === Reverse ? mx : mn)
+
+    counts = fill(0, range+1) # TODO use scratch (but be aware of type stability)
+    @inbounds for i = lo:hi
+        counts[v[i] + offs] += 1
+    end
 
-        i, j = 1, lo
-        while j <= m
-            t[i] = v[j]
-            i += 1
-            j += 1
+    idx = lo
+    @inbounds for i = maybe_reverse(o, 1:range+1)
+        lastidx = idx + counts[i] - 1
+        val = i-offs
+        for j = idx:lastidx
+            v[j] = val isa Unsigned && eltype(v) <: Signed ? signed(val) : val
         end
+        idx = lastidx + 1
+    end
 
-        i, k = 1, lo
-        while k < j <= hi
-            if lt(o, v[j], t[i])
-                v[k] = v[j]
-                j += 1
-            else
-                v[k] = t[i]
-                i += 1
-            end
-            k += 1
+    scratch
+end
+
+
+"""
+    ConsiderRadixSort(radix=RadixSort(), next) <: Algorithm
+
+If the number of bits in the input's range is small enough and the input supports efficient
+bitshifts, use the `radix` algorithm. Otherwise, dispatch to the `next` algorithm.
+"""
+struct ConsiderRadixSort{T <: Algorithm, U <: Algorithm} <: Algorithm
+    radix::T
+    next::U
+end
+ConsiderRadixSort(next) = ConsiderRadixSort(RadixSort(), next)
+function _sort!(v::AbstractVector, a::ConsiderRadixSort, o::DirectOrdering, kw)
+    @getkw lo hi mn mx
+    urange = uint_map(mx, o)-uint_map(mn, o)
+    bits = unsigned(top_set_bit(urange))
+    if sizeof(eltype(v)) <= 8 && bits+70 < 22log(hi-lo)
+        _sort!(v, a.radix, o, kw)
+    else
+        _sort!(v, a.next, o, kw)
+    end
+end
+
+
+"""
+    RadixSort <: Algorithm
+
+Use the radix sort algorithm.
+
+`RadixSort` is a stable least significant bit first radix sort algorithm that runs in
+`O(length * log(range))` time and linear space.
+
+It first sorts the entire vector by the last `chunk_size` bits, then by the second
+to last `chunk_size` bits, and so on. Stability means that it will not reorder two elements
+that compare equal. This is essential so that the order introduced by earlier,
+less significant passes is preserved by later passes.
+
+Each pass divides the input into `2^chunk_size == mask+1` buckets. To do this, it
+ * counts the number of entries that fall into each bucket
+ * uses those counts to compute the indices to move elements of those buckets into
+ * moves elements into the computed indices in the swap array
+ * switches the swap and working array
+
+`chunk_size` is larger for larger inputs and determined by an empirical heuristic.
+"""
+struct RadixSort <: Algorithm end
+function _sort!(v::AbstractVector, a::RadixSort, o::DirectOrdering, kw)
+    @getkw lo hi mn mx scratch
+    umn = uint_map(mn, o)
+    urange = uint_map(mx, o)-umn
+    bits = unsigned(top_set_bit(urange))
+
+    # At this point, we are committed to radix sort.
+    u = uint_map!(v, lo, hi, o)
+
+    # we subtract umn to avoid radixing over unnecessary bits. For example,
+    # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002]
+    # which uses all 32 bits, but once we subtract umn = 0x7fffffff, we are left with
+    # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and
+    # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4]
+    # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits.
+    # the overhead for this subtraction is small enough that it is worthwhile in many cases.
+
+    # this is faster than u[lo:hi] .-= umn as of v1.9.0-DEV.100
+    @inbounds for i in lo:hi
+        u[i] -= umn
+    end
+
+    scratch, t = make_scratch(scratch, eltype(v), hi-lo+1)
+    tu = reinterpret(eltype(u), t)
+    if radix_sort!(u, lo, hi, bits, tu, 1-lo)
+        uint_unmap!(v, u, lo, hi, o, umn)
+    else
+        uint_unmap!(v, tu, lo, hi, o, umn, 1-lo)
+    end
+    scratch
+end
+
+
+"""
+    ScratchQuickSort(next::Algorithm=SMALL_ALGORITHM) <: Algorithm
+    ScratchQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}=lo, next::Algorithm=SMALL_ALGORITHM) <: Algorithm
+
+Use the `ScratchQuickSort` algorithm with the `next` algorithm as a base case.
+
+`ScratchQuickSort` is like `QuickSort`, but utilizes scratch space to operate faster and allow
+for the possibility of maintaining stability.
+
+If `lo` and `hi` are provided, finds and sorts the elements in the range `lo:hi`, reordering
+but not necessarily sorting other elements in the process. If `lo` or `hi` is `missing`, it
+is treated as the first or last index of the input, respectively.
+
+`lo` and `hi` may be specified together as an `AbstractUnitRange`.
+
+Characteristics:
+  * *stable*: preserves the ordering of elements that compare equal
+    (e.g. "a" and "A" in a sort of letters that ignores case).
+  * *not in-place* in memory.
+  * *divide-and-conquer*: sort strategy similar to [`QuickSort`](@ref).
+  * *linear runtime* if `length(lo:hi)` is constant
+  * *quadratic worst case runtime* in pathological cases
+  (vanishingly rare for non-malicious input)
+"""
+struct ScratchQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm
+    lo::L
+    hi::H
+    next::T
+end
+ScratchQuickSort(next::Algorithm=SMALL_ALGORITHM) = ScratchQuickSort(missing, missing, next)
+ScratchQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}) = ScratchQuickSort(lo, hi, SMALL_ALGORITHM)
+ScratchQuickSort(lo::Union{Integer, Missing}, next::Algorithm=SMALL_ALGORITHM) = ScratchQuickSort(lo, lo, next)
+ScratchQuickSort(r::OrdinalRange, next::Algorithm=SMALL_ALGORITHM) = ScratchQuickSort(first(r), last(r), next)
+
+# select a pivot, partition v[lo:hi] according
+# to the pivot, and store the result in t[lo:hi].
+#
+# sets `pivot_dest[pivot_index+pivot_index_offset] = pivot` and returns that index.
+function partition!(t::AbstractVector, lo::Integer, hi::Integer, offset::Integer, o::Ordering,
+        v::AbstractVector, rev::Bool, pivot_dest::AbstractVector, pivot_index_offset::Integer)
+    # Ideally we would use `pivot_index = rand(lo:hi)`, but that requires Random.jl
+    # and would mutate the global RNG in sorting.
+    pivot_index = mod(hash(lo), lo:hi)
+    @inbounds begin
+        pivot = v[pivot_index]
+        while lo < pivot_index
+            x = v[lo]
+            fx = rev ? !lt(o, x, pivot) : lt(o, pivot, x)
+            t[(fx ? hi : lo) - offset] = x
+            offset += fx
+            lo += 1
         end
-        while k < j
-            v[k] = t[i]
-            k += 1
-            i += 1
+        while lo < hi
+            x = v[lo+1]
+            fx = rev ? lt(o, pivot, x) : !lt(o, x, pivot)
+            t[(fx ? hi : lo) - offset] = x
+            offset += fx
+            lo += 1
         end
+        pivot_index = lo-offset + pivot_index_offset
+        pivot_dest[pivot_index] = pivot
     end
 
-    return v
+    # t_pivot_index = lo-offset (i.e. without pivot_index_offset)
+    # t[t_pivot_index] is whatever it was before unless t is the pivot_dest
+    # t[<t_pivot_index] <* pivot, stable
+    # t[>t_pivot_index] >* pivot, reverse stable
+
+    pivot_index
 end
 
-function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort,
-               o::Ordering)
-    @inbounds while lo < hi
-        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
-        j = partition!(v, lo, hi, o)
+function _sort!(v::AbstractVector, a::ScratchQuickSort, o::Ordering, kw;
+                t=nothing, offset=nothing, swap=false, rev=false)
+    @getkw lo hi scratch
 
-        if j <= first(a.k)
+    if t === nothing
+        scratch, t = make_scratch(scratch, eltype(v), hi-lo+1)
+        offset = 1-lo
+        kw = (;kw..., scratch)
+    end
+
+    while lo < hi && hi - lo > SMALL_THRESHOLD
+        j = if swap
+            partition!(v, lo+offset, hi+offset, offset, o, t, rev, v, 0)
+        else
+            partition!(t, lo, hi, -offset, o, v, rev, v, -offset)
+        end
+        swap = !swap
+
+        # For ScratchQuickSort(), a.lo === a.hi === missing, so the first two branches get skipped
+        if !ismissing(a.lo) && j <= a.lo # Skip sorting the lower part
+            swap && copyto!(v, lo, t, lo+offset, j-lo)
+            rev && reverse!(v, lo, j-1)
             lo = j+1
-        elseif j >= last(a.k)
+            rev = !rev
+        elseif !ismissing(a.hi) && a.hi <= j # Skip sorting the upper part
+            swap && copyto!(v, j+1, t, j+1+offset, hi-j)
+            rev || reverse!(v, j+1, hi)
+            hi = j-1
+        elseif j-lo < hi-j
+            # Sort the lower part recursively because it is smaller. Recursing on the
+            # smaller part guarantees O(log(n)) stack space even on pathological inputs.
+            _sort!(v, a, o, (;kw..., lo, hi=j-1); t, offset, swap, rev)
+            lo = j+1
+            rev = !rev
+        else # Sort the higher part recursively
+            _sort!(v, a, o, (;kw..., lo=j+1, hi); t, offset, swap, rev=!rev)
             hi = j-1
+        end
+    end
+    hi < lo && return scratch
+    swap && copyto!(v, lo, t, lo+offset, hi-lo+1)
+    rev && reverse!(v, lo, hi)
+    _sort!(v, a.next, o, (;kw..., lo, hi))
+end
+
+
+"""
+    BracketedSort(target[, next::Algorithm]) <: Algorithm
+
+Perform a partialsort for the elements that fall into the indices specified by the `target`
+using BracketedSort with the `next` algorithm for subproblems.
+
+BracketedSort takes a random* sample of the input, estimates the quantiles of the input
+using the quantiles of the sample to find signposts that almost certainly bracket the target
+values, filters the value in the input that fall between the signpost values to the front of
+the input, and then, if that "almost certainly" turned out to be true, finds the target
+within the small chunk that are, by value, between the signposts and now by position, at the
+front of the vector. On small inputs or when target is close to the size of the input,
+BracketedSort falls back to the `next` algorithm directly. Otherwise, BracketedSort uses the
+`next` algorithm only to compute quantiles of the sample and to find the target within the
+small chunk.
+
+## Performance
+
+If the `next` algorithm has `O(n * log(n))` runtime and the input is not pathological then
+the runtime of this algorithm is `O(n + k * log(k))` where `n` is the length of the input
+and `k` is `length(target)`. On pathological inputs the asymptotic runtime is the same as
+the runtime of the `next` algorithm.
+
+BracketedSort itself does not allocate. If `next` is in-place then BracketedSort is also
+in-place. If `next` is not in place, and it's space usage increases monotonically with input
+length then BracketedSort's maximum space usage will never be more than the space usage
+of `next` on the input BracketedSort receives. For large nonpathological inputs and targets
+substantially smaller than the size of the input, BracketedSort's maximum memory usage will
+be much less than `next`'s. If the maximum additional space usage of `next` scales linearly
+then for small k the average* maximum additional space usage of BracketedSort will be
+`O(n^(2.3/3))`.
+
+By default, BracketedSort uses the `O(n)` space and `O(n + k log k)` runtime
+`ScratchQuickSort` algorithm recursively.
+
+*Sorting is unable to depend on Random.jl because Random.jl depends on sorting.
+ Consequently, we use `hash` as a source of randomness. The average runtime guarantees
+ assume that `hash(x::Int)` produces a random result. However, as this randomization is
+ deterministic, if you try hard enough you can find inputs that consistently reach the
+ worst case bounds. Actually constructing such inputs is an exercise left to the reader.
+ Have fun :).
+
+Characteristics:
+  * *unstable*: does not preserve the ordering of elements that compare equal
+    (e.g. "a" and "A" in a sort of letters that ignores case).
+  * *in-place* in memory if the `next` algorithm is in-place.
+  * *estimate-and-filter*: strategy
+  * *linear runtime* if `length(target)` is constant and `next` is reasonable
+  * *n + k log k* worst case runtime if `next` has that runtime.
+  * *pathological inputs* can significantly increase constant factors.
+"""
+struct BracketedSort{T, F} <: Algorithm
+    target::T
+    get_next::F
+end
+
+# TODO: this composition between BracketedSort and ScratchQuickSort does not bring me joy
+BracketedSort(k) = BracketedSort(k, k -> InitialOptimizations(ScratchQuickSort(k)))
+
+function bracket_kernel!(v::AbstractVector, lo, hi, lo_signpost, hi_signpost, o)
+    i = 0
+    count_below = 0
+    checkbounds(v, lo:hi)
+    for j in lo:hi
+        x = @inbounds v[j]
+        a = lo_signpost !== nothing && lt(o, x, lo_signpost)
+        b = hi_signpost === nothing || !lt(o, hi_signpost, x)
+        count_below += a
+        # if a != b # This branch is almost never taken, so making it branchless is bad.
+        #     @inbounds v[i], v[j] = v[j], v[i]
+        #     i += 1
+        # end
+        c = a != b # JK, this is faster.
+        k = i * c + j
+        # Invariant: @assert firstindex(v) ≤ lo ≤ i + j ≤ k ≤ j ≤ hi ≤ lastindex(v)
+        @inbounds v[j], v[k] = v[k], v[j]
+        i += c - 1
+    end
+    count_below, i+hi
+end
+
+function move!(v, target, source)
+    # This function never dominates runtime—only add `@inbounds` if you can demonstrate a
+    # performance improvement. And if you do, also double check behavior when `target`
+    # is out of bounds.
+    @assert length(target) == length(source)
+    if length(target) == 1 || isdisjoint(target, source)
+        for (i, j) in zip(target, source)
+            v[i], v[j] = v[j], v[i]
+        end
+    else
+        @assert minimum(source) <= minimum(target)
+        reverse!(v, minimum(source), maximum(target))
+        reverse!(v, minimum(target), maximum(target))
+    end
+end
+
+function _sort!(v::AbstractVector, a::BracketedSort, o::Ordering, kw)
+    @getkw lo hi scratch
+    # TODO for further optimization: reuse scratch between trials better, from signpost
+    # selection to recursive calls, and from the fallback (but be aware of type stability,
+    # especially when sorting IEEE floats.
+
+    # We don't need to bounds check target because that is done higher up in the stack
+    # However, we cannot assume the target is inbounds.
+    lo < hi || return scratch
+    ln = hi - lo + 1
+
+    # This is simply a precomputed short-circuit to avoid doing scalar math for small inputs.
+    # It does not change dispatch at all.
+    ln < 260 && return _sort!(v, a.get_next(a.target), o, kw)
+
+    target = a.target
+    k = cbrt(ln)
+    k2 = round(Int, k^2)
+    k2ln = k2/ln
+    offset = .15k*top_set_bit(k2) # TODO for further optimization: tune this
+    lo_signpost_i, hi_signpost_i =
+        (floor(Int, (tar - lo) * k2ln + lo + off) for (tar, off) in
+            ((minimum(target), -offset), (maximum(target), offset)))
+    lastindex_sample = lo+k2-1
+    expected_middle_ln = (min(lastindex_sample, hi_signpost_i) - max(lo, lo_signpost_i) + 1) / k2ln
+    # This heuristic is complicated because it fairly accurately reflects the runtime of
+    # this algorithm which is necessary to get good dispatch when both the target is large
+    # and the input are large.
+    # expected_middle_ln is a float and k2 is significantly below typemax(Int), so this will
+    # not overflow:
+    # TODO move target from alg to kw to avoid this ickyness:
+    ln <= 130 + 2k2 + 2expected_middle_ln && return _sort!(v, a.get_next(a.target), o, kw)
+
+    # We store the random sample in
+    #     sample = view(v, lo:lo+k2)
+    # but views are not quite as fast as using the input array directly,
+    # so we don't actually construct this view at runtime.
+
+    # TODO for further optimization: handle lots of duplicates better.
+    # Right now lots of duplicates rounds up when it could use some super fast optimizations
+    # in some cases.
+    # e.g.
+    #
+    # Target:                      |----|
+    # Sorted input: 000000000000000000011111112222223333333333
+    #
+    # Will filter all zeros and ones to the front when it could just take the first few
+    # it encounters. This optimization would be especially potent when `allequal(ans)` and
+    # equal elements are egal.
+
+    # 3 random trials should typically give us 0.99999 reliability; we can assume
+    # the input is pathological and abort to fallback if we fail three trials.
+    seed = hash(ln, Int === Int64 ? 0x85eb830e0216012d : 0xae6c4e15)
+    for attempt in 1:3
+        seed = hash(attempt, seed)
+        for i in lo:lo+k2-1
+            j = mod(hash(i, seed), i:hi) # TODO for further optimization: be sneaky and remove this division
+            v[i], v[j] = v[j], v[i]
+        end
+        count_below, lastindex_middle = if lo_signpost_i <= lo && lastindex_sample <= hi_signpost_i
+            # The heuristics higher up in this function that dispatch to the `next`
+            # algorithm should prevent this from happening.
+            # Specifically, this means that expected_middle_ln == ln, so
+            # ln <= ... + 2.0expected_middle_ln && return ...
+            # will trigger.
+            @assert false
+            # But if it does happen, the kernel reduces to
+            0, hi
+        elseif lo_signpost_i <= lo
+            _sort!(v, a.get_next(hi_signpost_i), o, (;kw..., hi=lastindex_sample))
+            bracket_kernel!(v, lo, hi, nothing, v[hi_signpost_i], o)
+        elseif lastindex_sample <= hi_signpost_i
+            _sort!(v, a.get_next(lo_signpost_i), o, (;kw..., hi=lastindex_sample))
+            bracket_kernel!(v, lo, hi, v[lo_signpost_i], nothing, o)
         else
-            # recurse on the smaller chunk
-            # this is necessary to preserve O(log(n))
-            # stack space in the worst case (rather than O(n))
-            if j-lo < hi-j
-                lo < (j-1) && sort!(v, lo, j-1, a, o)
-                lo = j+1
-            else
-                hi > (j+1) && sort!(v, j+1, hi, a, o)
-                hi = j-1
-            end
+            # TODO for further optimization: don't sort the middle elements
+            _sort!(v, a.get_next(lo_signpost_i:hi_signpost_i), o, (;kw..., hi=lastindex_sample))
+            bracket_kernel!(v, lo, hi, v[lo_signpost_i], v[hi_signpost_i], o)
+        end
+        target_in_middle = target .- count_below
+        if lo <= minimum(target_in_middle) && maximum(target_in_middle) <= lastindex_middle
+            scratch = _sort!(v, a.get_next(target_in_middle), o, (;kw..., hi=lastindex_middle))
+            move!(v, target, target_in_middle)
+            return scratch
         end
+        # This line almost never runs.
     end
-    return v
+    # This line only runs on pathological inputs. Make sure it's covered by tests :)
+    _sort!(v, a.get_next(target), o, kw)
 end
 
-# This is a stable least significant bit first radix sort.
-#
-# That is, it first sorts the entire vector by the last chunk_size bits, then by the second
-# to last chunk_size bits, and so on. Stability means that it will not reorder two elements
-# that compare equal. This is essential so that the order introduced by earlier,
-# less significant passes is preserved by later passes.
-#
-# Each pass divides the input into 2^chunk_size == mask+1 buckets. To do this, it
-#  * counts the number of entries that fall into each bucket
-#  * uses those counts to compute the indices to move elements of those buckets into
-#  * moves elements into the computed indices in the swap array
-#  * switches the swap and working array
-#
-# In the case of an odd number of passes, the returned vector will === the input vector t,
-# not v. This is one of the many reasons radix_sort! is not exported.
-function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned,
-                     t::AbstractVector{U}, chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned
-    # bits is unsigned for performance reasons.
-    mask = UInt(1) << chunk_size - 1
-    counts = Vector{Int}(undef, mask+2)
 
-    @inbounds for shift in 0:chunk_size:bits-1
+"""
+    StableCheckSorted(next) <: Algorithm
+
+Check if an input is sorted and/or reverse-sorted.
+
+The definition of reverse-sorted is that for every pair of adjacent elements, the latter is
+less than the former. This is stricter than `issorted(v, Reverse(o))` to avoid swapping pairs
+of elements that compare equal.
+"""
+struct StableCheckSorted{T<:Algorithm} <: Algorithm
+    next::T
+end
+function _sort!(v::AbstractVector, a::StableCheckSorted, o::Ordering, kw)
+    @getkw lo hi scratch
+    if _issorted(v, lo, hi, o)
+        return scratch
+    elseif _issorted(v, lo, hi, Lt((x, y) -> !lt(o, x, y)))
+        # Reverse only if necessary. Using issorted(..., Reverse(o)) would violate stability.
+        reverse!(v, lo, hi)
+        return scratch
+    end
+
+    _sort!(v, a.next, o, kw)
+end
+
 
+# The return value indicates whether v is sorted (true) or t is sorted (false)
+# This is one of the many reasons radix_sort! is not exported.
+function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned,
+                     t::AbstractVector{U}, offset::Integer,
+                     chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned
+    # bits is unsigned for performance reasons.
+    counts = Vector{Int}(undef, 1 << chunk_size + 1) # TODO use scratch for this
+
+    shift = 0
+    while true
+        @noinline radix_sort_pass!(t, lo, hi, offset, counts, v, shift, chunk_size)
+        # the latest data resides in t
+        shift += chunk_size
+        shift < bits || return false
+        @noinline radix_sort_pass!(v, lo+offset, hi+offset, -offset, counts, t, shift, chunk_size)
+        # the latest data resides in v
+        shift += chunk_size
+        shift < bits || return true
+    end
+end
+function radix_sort_pass!(t, lo, hi, offset, counts, v, shift, chunk_size)
+    mask = UInt(1) << chunk_size - 1  # mask is defined in pass so that the compiler
+    @inbounds begin                   #  ↳ knows it's shape
         # counts[2:mask+2] will store the number of elements that fall into each bucket.
         # if chunk_size = 8, counts[2] is bucket 0x00 and counts[257] is bucket 0xff.
         counts .= 0
@@ -699,7 +1393,7 @@ function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsig
             counts[i] += 1            # increment that bucket's count
         end
 
-        counts[1] = lo                # set target index for the first bucket
+        counts[1] = lo + offset       # set target index for the first bucket
         cumsum!(counts, counts)       # set target indices for subsequent buckets
         # counts[1:mask+1] now stores indices where the first member of each bucket
         # belongs, not the number of elements in each bucket. We will put the first element
@@ -713,12 +1407,7 @@ function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsig
             t[j] = x                  # put the element where it belongs
             counts[i] = j + 1         # increment the target index for the next
         end                           #  ↳ element in this bucket
-
-        v, t = t, v # swap the now sorted destination vector t back into primary vector v
-
     end
-
-    v
 end
 function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned)
     # chunk_size is the number of bits to radix over at once.
@@ -733,172 +1422,217 @@ function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned)
     UInt8(cld(bits, cld(bits, guess)))
 end
 
-# For AbstractVector{Bool}, counting sort is always best.
-# This is an implementation of counting sort specialized for Bools.
-# Accepts unused workspace to avoid method ambiguity.
-function sort!(v::AbstractVector{B}, lo::Integer, hi::Integer, a::AdaptiveSort, o::Ordering,
-        t::Union{AbstractVector{B}, Nothing}=nothing) where {B <: Bool}
-    first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v
-    count = 0
-    @inbounds for i in lo:hi
-        if v[i] == first
-            count += 1
-        end
-    end
-    @inbounds v[lo:lo+count-1] .= first
-    @inbounds v[lo+count:hi] .= !first
-    v
-end
-
 maybe_unsigned(x::Integer) = x # this is necessary to avoid calling unsigned on BigInt
 maybe_unsigned(x::BitSigned) = unsigned(x)
-function _extrema(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
-    mn = mx = v[lo]
+function _issorted(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
+    @boundscheck checkbounds(v, lo:hi)
     @inbounds for i in (lo+1):hi
-        vi = v[i]
-        lt(o, vi, mn) && (mn = vi)
-        lt(o, mx, vi) && (mx = vi)
+        lt(o, v[i], v[i-1]) && return false
     end
-    mn, mx
-end
-function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::AdaptiveSort, o::Ordering,
-            t::Union{AbstractVector{T}, Nothing}=nothing) where T
-    # if the sorting task is not UIntMappable, then we can't radix sort or sort_int_range!
-    # so we skip straight to the fallback algorithm which is comparison based.
-    U = UIntMappable(T, o)
-    U === nothing && return sort!(v, lo, hi, a.fallback, o)
-
-    # to avoid introducing excessive detection costs for the trivial sorting problem
-    # and to avoid overflow, we check for small inputs before any other runtime checks
-    hi <= lo && return v
-    lenm1 = maybe_unsigned(hi-lo) # adding 1 would risk overflow
-    # only count sort on a short range can compete with insertion sort when lenm1 < 40
-    # and the optimization is not worth the detection cost, so we use insertion sort.
-    lenm1 < 40 && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
+    true
+end
 
-    # For most arrays, a presorted check is cheap (overhead < 5%) and for most large
-    # arrays it is essentially free (<1%). Insertion sort runs in a fast O(n) on presorted
-    # input and this guarantees presorted input will always be efficiently handled
-    issorted(view(v, lo:hi), o) && return v
-
-    # For large arrays, a reverse-sorted check is essentially free (overhead < 1%)
-    if lenm1 >= 500 && issorted(view(v, lo:hi), ReverseOrdering(o))
-        reverse!(view(v, lo:hi))
-        return v
-    end
 
-    # UInt128 does not support fast bit shifting so we never
-    # dispatch to radix sort but we may still perform count sort
-    if sizeof(U) > 8
-        if T <: Integer && o isa DirectOrdering
-            v_min, v_max = _extrema(v, lo, hi, Forward)
-            v_range = maybe_unsigned(v_max-v_min)
-            v_range == 0 && return v # all same
-
-            # we know lenm1 ≥ 40, so this will never underflow.
-            # if lenm1 > 3.7e18 (59 exabytes), then this may incorrectly dispatch to fallback
-            if v_range < 5lenm1-100 # count sort will outperform comparison sort if v's range is small
-                return sort_int_range!(v, Int(v_range+1), v_min, o === Forward ? identity : reverse, lo, hi)
-            end
-        end
-        return sort!(v, lo, hi, a.fallback, o)
-    end
+## default sorting policy ##
 
-    v_min, v_max = _extrema(v, lo, hi, o)
-    lt(o, v_min, v_max) || return v # all same
-    if T <: Integer && o isa DirectOrdering
-        R = o === Reverse
-        v_range = maybe_unsigned(R ? v_min-v_max : v_max-v_min)
-        if v_range < div(lenm1, 2) # count sort will be superior if v's range is very small
-            return sort_int_range!(v, Int(v_range+1), R ? v_max : v_min, R ? reverse : identity, lo, hi)
-        end
-    end
+"""
+    InitialOptimizations(next) <: Algorithm
 
-    u_min, u_max = uint_map(v_min, o), uint_map(v_max, o)
-    u_range = maybe_unsigned(u_max-u_min)
-    if u_range < div(lenm1, 2) # count sort will be superior if u's range is very small
-        u = uint_map!(v, lo, hi, o)
-        sort_int_range!(u, Int(u_range+1), u_min, identity, lo, hi)
-        return uint_unmap!(v, u, lo, hi, o)
-    end
+Attempt to apply a suite of low-cost optimizations to the input vector before sorting. These
+optimizations may be automatically applied by the `sort!` family of functions when
+`alg=InsertionSort`, `alg=MergeSort`, or `alg=QuickSort` is passed as an argument.
 
-    # if u's range is small, then once we subtract out v_min, we'll get a vector like
-    # UInt16[0x001a, 0x0015, 0x0006, 0x001b, 0x0008, 0x000c, 0x0001, 0x000e, 0x001c, 0x0009]
-    # where we only need to radix over the last few bits (5, in the example).
-    bits = unsigned(8sizeof(u_range) - leading_zeros(u_range))
-
-    # radix sort runs in O(bits * lenm1), insertion sort runs in O(lenm1^2). Radix sort
-    # has a constant factor that is three times higher, so radix runtime is 3bits * lenm1
-    # and insertion runtime is lenm1^2. Empirically, insertion is faster than radix iff
-    # lenm1 < 3bits.
-    # Insertion < Radix
-    #   lenm1^2 < 3 * bits * lenm1
-    #     lenm1 < 3bits
-    if lenm1 < 3bits
-        # at lenm1 = 64*3-1, QuickSort is about 20% faster than InsertionSort.
-        alg = a.fallback === QuickSort && lenm1 > 120 ? QuickSort : SMALL_ALGORITHM
-        return sort!(v, lo, hi, alg, o)
-    end
+`InitialOptimizations` is an implementation detail and subject to change or removal in
+future versions of Julia.
 
-    # At this point, we are committed to radix sort.
-    u = uint_map!(v, lo, hi, o)
+If `next` is stable, then `InitialOptimizations(next)` is also stable.
+
+The specific optimizations attempted by `InitialOptimizations` are
+[`SubArrayOptimization`](@ref), [`MissingOptimization`](@ref), [`BoolOptimization`](@ref),
+dispatch to [`InsertionSort`](@ref) for inputs with `length <= 10`, and
+[`IEEEFloatOptimization`](@ref).
+"""
+InitialOptimizations(next) = SubArrayOptimization(
+    MissingOptimization(
+        BoolOptimization(
+            Small{10}(
+                IEEEFloatOptimization(
+                    next)))))
+"""
+    DEFAULT_STABLE
+
+The default sorting algorithm.
+
+This algorithm is guaranteed to be stable (i.e. it will not reorder elements that compare
+equal). It makes an effort to be fast for most inputs.
+
+The algorithms used by `DEFAULT_STABLE` are an implementation detail. See extended help
+for the current dispatch system.
+
+# Extended Help
+
+`DEFAULT_STABLE` is composed of two parts: the [`InitialOptimizations`](@ref) and a hybrid
+of Radix, Insertion, Counting, Quick sorts.
+
+We begin with MissingOptimization because it has no runtime cost when it is not
+triggered and can enable other optimizations to be applied later. For example,
+BoolOptimization cannot apply to an `AbstractVector{Union{Missing, Bool}}`, but after
+[`MissingOptimization`](@ref) is applied, that input will be converted into am
+`AbstractVector{Bool}`.
+
+We next apply [`BoolOptimization`](@ref) because it also has no runtime cost when it is not
+triggered and when it is triggered, it is an incredibly efficient algorithm (sorting `Bool`s
+is quite easy).
+
+Next, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 10`. This dispatch
+occurs before the [`IEEEFloatOptimization`](@ref) pass because the
+[`IEEEFloatOptimization`](@ref)s are not beneficial for very small inputs.
+
+To conclude the [`InitialOptimizations`](@ref), we apply [`IEEEFloatOptimization`](@ref).
+
+After these optimizations, we branch on whether radix sort and related algorithms can be
+applied to the input vector and ordering. We conduct this branch by testing if
+`UIntMappable(v, order) !== nothing`. That is, we see if we know of a reversible mapping
+from `eltype(v)` to `UInt` that preserves the ordering `order`. We perform this check after
+the initial optimizations because they can change the input vector's type and ordering to
+make them `UIntMappable`.
+
+If the input is not [`UIntMappable`](@ref), then we perform a presorted check and dispatch
+to [`ScratchQuickSort`](@ref).
+
+Otherwise, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 40` and then
+perform a presorted check ([`CheckSorted`](@ref)).
+
+We check for short inputs before performing the presorted check to avoid the overhead of the
+check for small inputs. Because the alternate dispatch is to [`InsertionSort`](@ref) which
+has efficient `O(n)` runtime on presorted inputs, the check is not necessary for small
+inputs.
+
+We check if the input is reverse-sorted for long vectors (more than 500 elements) because
+the check is essentially free unless the input is almost entirely reverse sorted.
+
+Note that once the input is determined to be [`UIntMappable`](@ref), we know the order forms
+a [total order](wikipedia.org/wiki/Total_order) over the inputs and so it is impossible to
+perform an unstable sort because no two elements can compare equal unless they _are_ equal,
+in which case switching them is undetectable. We utilize this fact to perform a more
+aggressive reverse sorted check that will reverse the vector `[3, 2, 2, 1]`.
+
+After these potential fast-paths are tried and failed, we [`ComputeExtrema`](@ref) of the
+input. This computation has a fairly fast `O(n)` runtime, but we still try to delay it until
+it is necessary.
+
+Next, we [`ConsiderCountingSort`](@ref). If the range the input is small compared to its
+length, we apply [`CountingSort`](@ref).
+
+Next, we [`ConsiderRadixSort`](@ref). This is similar to the dispatch to counting sort,
+but we consider the number of _bits_ in the range, rather than the range itself.
+Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that reach this
+stage.
+
+Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and
+otherwise we dispatch to [`ScratchQuickSort`](@ref).
+"""
+const DEFAULT_STABLE = InitialOptimizations(
+    IsUIntMappable(
+        Small{40}(
+            CheckSorted(
+                ComputeExtrema(
+                    ConsiderCountingSort(
+                        ConsiderRadixSort(
+                            Small{80}(
+                                ScratchQuickSort())))))),
+        StableCheckSorted(
+            ScratchQuickSort())))
+"""
+    DEFAULT_UNSTABLE
 
-    # we subtract u_min to avoid radixing over unnecessary bits. For example,
-    # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002]
-    # which uses all 32 bits, but once we subtract u_min = 0x7fffffff, we are left with
-    # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and
-    # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4]
-    # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits.
-    # the overhead for this subtraction is small enough that it is worthwhile in many cases.
+An efficient sorting algorithm.
 
-    # this is faster than u[lo:hi] .-= u_min as of v1.9.0-DEV.100
-    @inbounds for i in lo:hi
-        u[i] -= u_min
-    end
+The algorithms used by `DEFAULT_UNSTABLE` are an implementation detail. They are currently
+the same as those used by [`DEFAULT_STABLE`](@ref), but this is subject to change in future.
+"""
+const DEFAULT_UNSTABLE = DEFAULT_STABLE
+const SMALL_THRESHOLD  = 20
 
-    if t !== nothing && checkbounds(Bool, t, lo:hi) # Fully preallocated and aligned workspace
-        u2 = radix_sort!(u, lo, hi, bits, reinterpret(U, t))
-        uint_unmap!(v, u2, lo, hi, o, u_min)
-    elseif t !== nothing && (applicable(resize!, t) || length(t) >= hi-lo+1) # Viable workspace
-        length(t) >= hi-lo+1 || resize!(t, hi-lo+1)
-        t1 = axes(t, 1) isa OneTo ? t : view(t, firstindex(t):lastindex(t))
-        u2 = radix_sort!(view(u, lo:hi), 1, hi-lo+1, bits, reinterpret(U, t1))
-        uint_unmap!(view(v, lo:hi), u2, 1, hi-lo+1, o, u_min)
-    else # No viable workspace
-        u2 = radix_sort!(u, lo, hi, bits, similar(u))
-        uint_unmap!(v, u2, lo, hi, o, u_min)
+function Base.show(io::IO, alg::Algorithm)
+    print_tree(io, alg, 0)
+end
+function print_tree(io::IO, alg::Algorithm, cols::Int)
+    print(io, "    "^cols)
+    show_type(io, alg)
+    print(io, '(')
+    for (i, name) in enumerate(fieldnames(typeof(alg)))
+        arg = getproperty(alg, name)
+        i > 1 && print(io, ',')
+        if arg isa Algorithm
+            println(io)
+            print_tree(io, arg, cols+1)
+        else
+            i > 1 && print(io, ' ')
+            print(io, arg)
+        end
     end
+    print(io, ')')
 end
-
-## generic sorting methods ##
+show_type(io::IO, alg::Algorithm) = Base.show_type_name(io, typeof(alg).name)
+show_type(io::IO, alg::Small{N}) where N = print(io, "Base.Sort.Small{$N}")
 
 defalg(v::AbstractArray) = DEFAULT_STABLE
 defalg(v::AbstractArray{<:Union{Number, Missing}}) = DEFAULT_UNSTABLE
 defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation
 defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation
 
-function sort!(v::AbstractVector{T}, alg::Algorithm,
-        order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T
-    sort!(v, firstindex(v), lastindex(v), alg, order, t)
-end
-
-function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, alg::Algorithm,
-        order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T
-    sort!(v, lo, hi, alg, order)
-end
-
 """
     sort!(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
 
-Sort the vector `v` in place. [`QuickSort`](@ref) is used by default for numeric arrays while
-[`MergeSort`](@ref) is used for other arrays. You can specify an algorithm to use via the `alg`
-keyword (see [Sorting Algorithms](@ref) for available algorithms). The `by` keyword lets you provide
-a function that will be applied to each element before comparison; the `lt` keyword allows
-providing a custom "less than" function (note that for every `x` and `y`, only one of `lt(x,y)`
-and `lt(y,x)` can return `true`); use `rev=true` to reverse the sorting order. These
-options are independent and can be used together in all possible combinations: if both `by`
-and `lt` are specified, the `lt` function is applied to the result of the `by` function;
-`rev=true` reverses whatever ordering specified via the `by` and `lt` keywords.
+Sort the vector `v` in place. A stable algorithm is used by default: the
+ordering of elements that compare equal is preserved. A specific algorithm can
+be selected via the `alg` keyword (see [Sorting Algorithms](@ref) for available
+algorithms).
+
+Elements are first transformed with the function `by` and then compared
+according to either the function `lt` or the ordering `order`. Finally, the
+resulting order is reversed if `rev=true` (this preserves forward stability:
+elements that compare equal are not reversed). The current implementation applies
+the `by` transformation before each comparison rather than once per element.
+
+Passing an `lt` other than `isless` along with an `order` other than
+[`Base.Order.Forward`](@ref) or [`Base.Order.Reverse`](@ref) is not permitted,
+otherwise all options are independent and can be used together in all possible
+combinations. Note that `order` can also include a "by" transformation, in
+which case it is applied after that defined with the `by` keyword. For more
+information on `order` values see the documentation on [Alternate
+Orderings](@ref).
+
+Relations between two elements are defined as follows (with "less" and
+"greater" exchanged when `rev=true`):
+
+* `x` is less than `y` if `lt(by(x), by(y))` (or `Base.Order.lt(order, by(x), by(y))`) yields true.
+* `x` is greater than `y` if `y` is less than `x`.
+* `x` and `y` are equivalent if neither is less than the other ("incomparable"
+  is sometimes used as a synonym for "equivalent").
+
+The result of `sort!` is sorted in the sense that every element is greater than
+or equivalent to the previous one.
+
+The `lt` function must define a strict weak order, that is, it must be
+
+* irreflexive: `lt(x, x)` always yields `false`,
+* asymmetric: if `lt(x, y)` yields `true` then `lt(y, x)` yields `false`,
+* transitive: `lt(x, y) && lt(y, z)` implies `lt(x, z)`,
+* transitive in equivalence: `!lt(x, y) && !lt(y, x)` and `!lt(y, z) && !lt(z,
+  y)` together imply `!lt(x, z) && !lt(z, x)`. In words: if `x` and `y` are
+  equivalent and `y` and `z` are equivalent then `x` and `z` must be
+  equivalent.
+
+For example `<` is a valid `lt` function for `Int` values but `≤` is not: it
+violates irreflexivity. For `Float64` values even `<` is invalid as it violates
+the fourth condition: `1.0` and `NaN` are equivalent and so are `NaN` and `2.0`
+but `1.0` and `2.0` are not equivalent.
+
+See also [`sort`](@ref), [`sortperm`](@ref), [`sortslices`](@ref),
+[`partialsort!`](@ref), [`partialsortperm`](@ref), [`issorted`](@ref),
+[`searchsorted`](@ref), [`insorted`](@ref), [`Base.Order.ord`](@ref).
 
 # Examples
 ```jldoctest
@@ -925,6 +1659,29 @@ julia> v = [(1, "c"), (3, "a"), (2, "b")]; sort!(v, by = x -> x[2]); v
  (3, "a")
  (2, "b")
  (1, "c")
+
+julia> sort(0:3, by=x->x-2, order=Base.Order.By(abs)) # same as sort(0:3, by=abs(x->x-2))
+4-element Vector{Int64}:
+ 2
+ 1
+ 3
+ 0
+
+julia> sort([2, NaN, 1, NaN, 3]) # correct sort with default lt=isless
+5-element Vector{Float64}:
+   1.0
+   2.0
+   3.0
+ NaN
+ NaN
+
+julia> sort([2, NaN, 1, NaN, 3], lt=<) # wrong sort due to invalid lt. This behavior is undefined.
+5-element Vector{Float64}:
+   2.0
+ NaN
+   1.0
+ NaN
+   3.0
 ```
 """
 function sort!(v::AbstractVector{T};
@@ -933,31 +1690,9 @@ function sort!(v::AbstractVector{T};
                by=identity,
                rev::Union{Bool,Nothing}=nothing,
                order::Ordering=Forward,
-               workspace::Union{AbstractVector{T}, Nothing}=nothing) where T
-    sort!(v, alg, ord(lt,by,rev,order), workspace)
-end
-
-# sort! for vectors of few unique integers
-function sort_int_range!(x::AbstractVector{<:Integer}, rangelen, minval, maybereverse,
-                         lo=firstindex(x), hi=lastindex(x))
-    offs = 1 - minval
-
-    counts = fill(0, rangelen)
-    @inbounds for i = lo:hi
-        counts[x[i] + offs] += 1
-    end
-
-    idx = lo
-    @inbounds for i = maybereverse(1:rangelen)
-        lastidx = idx + counts[i] - 1
-        val = i-offs
-        for j = idx:lastidx
-            x[j] = val
-        end
-        idx = lastidx + 1
-    end
-
-    return x
+               scratch::Union{Vector{T}, Nothing}=nothing) where T
+    _sort!(v, maybe_apply_initial_optimizations(alg), ord(lt,by,rev,order), (;scratch))
+    v
 end
 
 """
@@ -987,15 +1722,15 @@ sort(v::AbstractVector; kws...) = sort!(copymutable(v); kws...)
 ## partialsortperm: the permutation to sort the first k elements of an array ##
 
 """
-    partialsortperm(v, k; by=<transform>, lt=<comparison>, rev=false)
+    partialsortperm(v, k; by=ientity, lt=isless, rev=false)
 
 Return a partial permutation `I` of the vector `v`, so that `v[I]` returns values of a fully
 sorted version of `v` at index `k`. If `k` is a range, a vector of indices is returned; if
 `k` is an integer, a single index is returned. The order is specified using the same
-keywords as `sort!`. The permutation is stable, meaning that indices of equal elements
-appear in ascending order.
+keywords as `sort!`. The permutation is stable: the indices of equal elements
+will appear in ascending order.
 
-Note that this function is equivalent to, but more efficient than, calling `sortperm(...)[k]`.
+This function is equivalent to, but more efficient than, calling `sortperm(...)[k]`.
 
 # Examples
 ```jldoctest
@@ -1018,25 +1753,18 @@ julia> v[p]
 ```
 """
 partialsortperm(v::AbstractVector, k::Union{Integer,OrdinalRange}; kwargs...) =
-    partialsortperm!(similar(Vector{eltype(k)}, axes(v,1)), v, k; kwargs..., initialized=false)
+    partialsortperm!(similar(Vector{eltype(k)}, axes(v,1)), v, k; kwargs...)
 
 """
-    partialsortperm!(ix, v, k; by=<transform>, lt=<comparison>, rev=false, initialized=false)
+    partialsortperm!(ix, v, k; by=identity, lt=isless, rev=false)
 
 Like [`partialsortperm`](@ref), but accepts a preallocated index vector `ix` the same size as
 `v`, which is used to store (a permutation of) the indices of `v`.
 
-If the index vector `ix` is initialized with the indices of `v` (or a permutation thereof), `initialized` should be set to
-`true`.
-
-If `initialized` is `false` (the default), then `ix` is initialized to contain the indices of `v`.
-
-If `initialized` is `true`, but `ix` does not contain (a permutation of) the indices of `v`, the behavior of
-`partialsortperm!` is undefined.
+`ix` is initialized to contain the indices of `v`.
 
 (Typically, the indices of `v` will be `1:length(v)`, although if `v` has an alternative array type
-with non-one-based indices, such as an `OffsetArray`, `ix` must also be an `OffsetArray` with the same
-indices, and must contain as values (a permutation of) these same indices.)
+with non-one-based indices, such as an `OffsetArray`, `ix` must share those same indices)
 
 Upon return, `ix` is guaranteed to have the indices `k` in their sorted positions, such that
 
@@ -1048,6 +1776,8 @@ v[ix[k]] == partialsort(v, k)
 The return value is the `k`th element of `ix` if `k` is an integer, or view into `ix` if `k` is
 a range.
 
+$(Base._DOCS_ALIASING_WARNING)
+
 # Examples
 ```jldoctest
 julia> v = [3, 1, 2, 1];
@@ -1059,7 +1789,7 @@ julia> partialsortperm!(ix, v, 1)
 
 julia> ix = [1:4;];
 
-julia> partialsortperm!(ix, v, 2:3, initialized=true)
+julia> partialsortperm!(ix, v, 2:3)
 2-element view(::Vector{Int64}, 2:3) with eltype Int64:
  4
  3
@@ -1073,17 +1803,15 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector,
                           order::Ordering=Forward,
                           initialized::Bool=false)
     if axes(ix,1) != axes(v,1)
-        throw(ArgumentError("The index vector is used as a workspace and must have the " *
+        throw(ArgumentError("The index vector is used as scratch space and must have the " *
                             "same length/indices as the source vector, $(axes(ix,1)) != $(axes(v,1))"))
     end
-    if !initialized
-        @inbounds for i in eachindex(ix)
-            ix[i] = i
-        end
+    @inbounds for i in eachindex(ix)
+        ix[i] = i
     end
 
     # do partial quicksort
-    sort!(ix, PartialQuickSort(k), Perm(ord(lt, by, rev, order), v))
+    _sort!(ix, InitialOptimizations(ScratchQuickSort(k)), Perm(ord(lt, by, rev, order), v), (;))
 
     maybeview(ix, k)
 end
@@ -1096,12 +1824,15 @@ end
 Return a permutation vector or array `I` that puts `A[I]` in sorted order along the given dimension.
 If `A` has more than one dimension, then the `dims` keyword argument must be specified. The order is specified
 using the same keywords as [`sort!`](@ref). The permutation is guaranteed to be stable even
-if the sorting algorithm is unstable, meaning that indices of equal elements appear in
+if the sorting algorithm is unstable: the indices of equal elements will appear in
 ascending order.
 
 See also [`sortperm!`](@ref), [`partialsortperm`](@ref), [`invperm`](@ref), [`indexin`](@ref).
 To sort slices of an array, refer to [`sortslices`](@ref).
 
+!!! compat "Julia 1.9"
+    The method accepting `dims` requires at least Julia 1.9.
+
 # Examples
 ```jldoctest
 julia> v = [3, 1, 2];
@@ -1140,30 +1871,41 @@ function sortperm(A::AbstractArray;
                   by=identity,
                   rev::Union{Bool,Nothing}=nothing,
                   order::Ordering=Forward,
-                  workspace::Union{AbstractVector{<:Integer}, Nothing}=nothing,
+                  scratch::Union{Vector{<:Integer}, Nothing}=nothing,
                   dims...) #to optionally specify dims argument
-    ordr = ord(lt,by,rev,order)
-    if ordr === Forward && isa(A,Vector) && eltype(A)<:Integer
+    if rev === true
+        _sortperm(A; alg, order=ord(lt, by, true, order), scratch, dims...)
+    else
+        _sortperm(A; alg, order=ord(lt, by, nothing, order), scratch, dims...)
+    end
+end
+function _sortperm(A::AbstractArray; alg, order, scratch, dims...)
+    if order === Forward && isa(A,Vector) && eltype(A)<:Integer
         n = length(A)
         if n > 1
             min, max = extrema(A)
             (diff, o1) = sub_with_overflow(max, min)
             (rangelen, o2) = add_with_overflow(diff, oneunit(diff))
-            if !o1 && !o2 && rangelen < div(n,2)
+            if !(o1 || o2)::Bool && rangelen < div(n,2)
                 return sortperm_int_range(A, rangelen, min)
             end
         end
     end
     ix = copymutable(LinearIndices(A))
-    sort!(ix; alg, order = Perm(ordr, vec(A)), workspace, dims...)
+    sort!(ix; alg, order = Perm(order, vec(A)), scratch, dims...)
 end
 
 
 """
-    sortperm!(ix, A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward, initialized::Bool=false, [dims::Integer])
+    sortperm!(ix, A; alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward, [dims::Integer])
+
+Like [`sortperm`](@ref), but accepts a preallocated index vector or array `ix` with the same `axes` as `A`.
+`ix` is initialized to contain the values `LinearIndices(A)`.
 
-Like [`sortperm`](@ref), but accepts a preallocated index vector or array `ix` with the same `axes` as `A`.  If `initialized` is `false`
-(the default), `ix` is initialized to contain the values `LinearIndices(A)`.
+$(Base._DOCS_ALIASING_WARNING)
+
+!!! compat "Julia 1.9"
+    The method accepting `dims` requires at least Julia 1.9.
 
 # Examples
 ```jldoctest
@@ -1194,22 +1936,24 @@ julia> sortperm!(p, A; dims=2); p
  2  4
 ```
 """
-function sortperm!(ix::AbstractArray{T}, A::AbstractArray;
+@inline function sortperm!(ix::AbstractArray{T}, A::AbstractArray;
                    alg::Algorithm=DEFAULT_UNSTABLE,
                    lt=isless,
                    by=identity,
                    rev::Union{Bool,Nothing}=nothing,
                    order::Ordering=Forward,
                    initialized::Bool=false,
-                   workspace::Union{AbstractVector{T}, Nothing}=nothing,
+                   scratch::Union{Vector{T}, Nothing}=nothing,
                    dims...) where T <: Integer #to optionally specify dims argument
     (typeof(A) <: AbstractVector) == (:dims in keys(dims)) && throw(ArgumentError("Dims argument incorrect for type $(typeof(A))"))
     axes(ix) == axes(A) || throw(ArgumentError("index array must have the same size/axes as the source array, $(axes(ix)) != $(axes(A))"))
 
-    if !initialized
-        ix .= LinearIndices(A)
+    ix .= LinearIndices(A)
+    if rev === true
+        sort!(ix; alg, order=Perm(ord(lt, by, true, order), vec(A)), scratch, dims...)
+    else
+        sort!(ix; alg, order=Perm(ord(lt, by, nothing, order), vec(A)), scratch, dims...)
     end
-    sort!(ix; alg, order = Perm(ord(lt, by, rev, order), vec(A)), workspace, dims...)
 end
 
 # sortperm for vectors of few unique integers
@@ -1241,7 +1985,7 @@ end
 ## sorting multi-dimensional arrays ##
 
 """
-    sort(A; dims::Integer, alg::Algorithm=DEFAULT_UNSTABLE, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
+    sort(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
 
 Sort a multidimensional array `A` along the given dimension.
 See [`sort!`](@ref) for a description of possible
@@ -1269,12 +2013,12 @@ julia> sort(A, dims = 2)
 """
 function sort(A::AbstractArray{T};
               dims::Integer,
-              alg::Algorithm=DEFAULT_UNSTABLE,
+              alg::Algorithm=defalg(A),
               lt=isless,
               by=identity,
               rev::Union{Bool,Nothing}=nothing,
               order::Ordering=Forward,
-              workspace::Union{AbstractVector{T}, Nothing}=similar(A, size(A, dims))) where T
+              scratch::Union{Vector{T}, Nothing}=nothing) where T
     dim = dims
     order = ord(lt,by,rev,order)
     n = length(axes(A, dim))
@@ -1282,28 +2026,42 @@ function sort(A::AbstractArray{T};
         pdims = (dim, setdiff(1:ndims(A), dim)...)  # put the selected dimension first
         Ap = permutedims(A, pdims)
         Av = vec(Ap)
-        sort_chunks!(Av, n, alg, order, workspace)
+        sort_chunks!(Av, n, maybe_apply_initial_optimizations(alg), order, scratch)
         permutedims(Ap, invperm(pdims))
     else
         Av = A[:]
-        sort_chunks!(Av, n, alg, order, workspace)
+        sort_chunks!(Av, n, maybe_apply_initial_optimizations(alg), order, scratch)
         reshape(Av, axes(A))
     end
 end
 
-@noinline function sort_chunks!(Av, n, alg, order, t)
+@noinline function sort_chunks!(Av, n, alg, order, scratch)
     inds = LinearIndices(Av)
-    for s = first(inds):n:last(inds)
-        sort!(Av, s, s+n-1, alg, order, t)
+    sort_chunks!(Av, n, alg, order, scratch, first(inds), last(inds))
+end
+
+@noinline function sort_chunks!(Av, n, alg, order, scratch::Nothing, fst, lst)
+    for lo = fst:n:lst
+        s = _sort!(Av, alg, order, (; lo, hi=lo+n-1, scratch))
+        s !== nothing && return sort_chunks!(Av, n, alg, order, s, lo+n, lst)
+    end
+    Av
+end
+
+@noinline function sort_chunks!(Av, n, alg, order, scratch::AbstractVector, fst, lst)
+    for lo = fst:n:lst
+        _sort!(Av, alg, order, (; lo, hi=lo+n-1, scratch))
     end
     Av
 end
 
+
 """
     sort!(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
 
 Sort the multidimensional array `A` along dimension `dims`.
-See [`sort!`](@ref) for a description of possible keyword arguments.
+See the one-dimensional version of [`sort!`](@ref) for a description of
+possible keyword arguments.
 
 To sort slices of an array, refer to [`sortslices`](@ref).
 
@@ -1334,25 +2092,38 @@ function sort!(A::AbstractArray{T};
                lt=isless,
                by=identity,
                rev::Union{Bool,Nothing}=nothing,
-               order::Ordering=Forward,
-               workspace::Union{AbstractVector{T}, Nothing}=similar(A, size(A, dims))) where T
-    _sort!(A, Val(dims), alg, ord(lt, by, rev, order), workspace)
-end
-function _sort!(A::AbstractArray{T}, ::Val{K},
-                alg::Algorithm,
-                order::Ordering,
-                workspace::Union{AbstractVector{T}, Nothing}) where {K,T}
+               order::Ordering=Forward, # TODO stop eagerly over-allocating.
+               scratch::Union{Vector{T}, Nothing}=size(A, dims) < 10 ? nothing : Vector{T}(undef, size(A, dims))) where T
     nd = ndims(A)
-
-    1 <= K <= nd || throw(ArgumentError("dimension out of range"))
-
-    remdims = ntuple(i -> i == K ? 1 : axes(A, i), nd)
-    for idx in CartesianIndices(remdims)
-        Av = view(A, ntuple(i -> i == K ? Colon() : idx[i], nd)...)
-        sort!(Av, alg, order, workspace)
+    1 <= dims <= nd || throw(ArgumentError("dimension out of range"))
+    alg2 = maybe_apply_initial_optimizations(alg)
+    order2 = ord(lt, by, rev, order)
+    foreach(ntuple(Val, nd)) do d
+        get_value(d) == dims || return
+        # We assume that an Integer between 1 and nd must be equal to one of the
+        # values 1:nd. If this assumption is false, then what's an integer? and
+        # also sort! will silently do nothing.
+
+        idxs = CartesianIndices(ntuple(i -> i == get_value(d) ? 1 : axes(A, i), ndims(A)))
+        get_view(idx) = view(A, ntuple(i -> i == get_value(d) ? Colon() : idx[i], ndims(A))...)
+        if d == Val(1) || size(A, get_value(d)) < 30
+            for idx in idxs
+                sort!(get_view(idx); alg=alg2, order=order2, scratch)
+            end
+        else
+            v = similar(get_view(first(idxs)))
+            for idx in idxs
+                vw = get_view(idx)
+                v .= vw
+                sort!(v; alg=alg2, order=order2, scratch)
+                vw .= v
+            end
+        end
+        A
     end
     A
 end
+get_value(::Val{x}) where x = x
 
 
 ## uint mapping to allow radix sorting primitives other than UInts ##
@@ -1403,10 +2174,7 @@ uint_map(x::Signed, ::ForwardOrdering) =
 uint_unmap(::Type{T}, u::Unsigned, ::ForwardOrdering) where T <: Signed =
     xor(signed(u), typemin(T))
 
-# unsigned(Int) is not available during bootstrapping.
-for (U, S) in [(UInt8, Int8), (UInt16, Int16), (UInt32, Int32), (UInt64, Int64), (UInt128, Int128)]
-    @eval UIntMappable(::Type{<:Union{$U, $S}}, ::ForwardOrdering) = $U
-end
+UIntMappable(T::BitIntegerType, ::ForwardOrdering) = unsigned(T)
 
 # Floats are not UIntMappable under regular orderings because they fail on NaN edge cases.
 # uint mappings for floats are defined in Float, where the Left and Right orderings
@@ -1435,169 +2203,269 @@ function uint_map!(v::AbstractVector, lo::Integer, hi::Integer, order::Ordering)
 end
 
 function uint_unmap!(v::AbstractVector, u::AbstractVector{U}, lo::Integer, hi::Integer,
-                     order::Ordering, offset::U=zero(U)) where U <: Unsigned
+                     order::Ordering, offset::U=zero(U),
+                     index_offset::Integer=0) where U <: Unsigned
     @inbounds for i in lo:hi
-        v[i] = uint_unmap(eltype(v), u[i]+offset, order)
+        v[i] = uint_unmap(eltype(v), u[i+index_offset]+offset, order)
     end
     v
 end
 
 
-## fast clever sorting for floats ##
 
-module Float
-using ..Sort
-using ...Order
-using ..Base: @inbounds, AbstractVector, Vector, last, firstindex, lastindex, Missing, Type, reinterpret
+### Unused constructs for backward compatibility ###
 
-import Core.Intrinsics: slt_int
-import ..Sort: sort!, UIntMappable, uint_map, uint_unmap
-import ...Order: lt, DirectOrdering
+## Old algorithms ##
 
-const Floats = Union{Float32,Float64}
-const FPSortable = Union{ # Mixed Float32 and Float64 are not allowed.
-    AbstractVector{Union{Float32, Missing}},
-    AbstractVector{Union{Float64, Missing}},
-    AbstractVector{Float32},
-    AbstractVector{Float64},
-    AbstractVector{Missing}}
+struct QuickSortAlg     <: Algorithm end
+struct MergeSortAlg     <: Algorithm end
 
-struct Left <: Ordering end
-struct Right <: Ordering end
+"""
+    PartialQuickSort{T <: Union{Integer,OrdinalRange}}
 
-left(::DirectOrdering) = Left()
-right(::DirectOrdering) = Right()
+Indicate that a sorting function should use the partial quick sort algorithm.
+`PartialQuickSort(k)` is like `QuickSort`, but is only required to find and
+sort the elements that would end up in `v[k]` were `v` fully sorted.
 
-left(o::Perm) = Perm(left(o.order), o.data)
-right(o::Perm) = Perm(right(o.order), o.data)
+Characteristics:
+  * *not stable*: does not preserve the ordering of elements that
+    compare equal (e.g. "a" and "A" in a sort of letters that
+    ignores case).
+  * *in-place* in memory.
+  * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
 
-lt(::Left, x::T, y::T) where {T<:Floats} = slt_int(y, x)
-lt(::Right, x::T, y::T) where {T<:Floats} = slt_int(x, y)
+Note that `PartialQuickSort(k)` does not necessarily sort the whole array. For example,
 
-uint_map(x::Float32, ::Left) = ~reinterpret(UInt32, x)
-uint_unmap(::Type{Float32}, u::UInt32, ::Left) = reinterpret(Float32, ~u)
-uint_map(x::Float32, ::Right) = reinterpret(UInt32, x)
-uint_unmap(::Type{Float32}, u::UInt32, ::Right) = reinterpret(Float32, u)
-UIntMappable(::Type{Float32}, ::Union{Left, Right}) = UInt32
+```jldoctest
+julia> x = rand(100);
 
-uint_map(x::Float64, ::Left) = ~reinterpret(UInt64, x)
-uint_unmap(::Type{Float64}, u::UInt64, ::Left) = reinterpret(Float64, ~u)
-uint_map(x::Float64, ::Right) = reinterpret(UInt64, x)
-uint_unmap(::Type{Float64}, u::UInt64, ::Right) = reinterpret(Float64, u)
-UIntMappable(::Type{Float64}, ::Union{Left, Right}) = UInt64
+julia> k = 50:100;
 
-isnan(o::DirectOrdering, x::Floats) = (x!=x)
-isnan(o::DirectOrdering, x::Missing) = false
-isnan(o::Perm, i::Integer) = isnan(o.order,o.data[i])
+julia> s1 = sort(x; alg=QuickSort);
 
-ismissing(o::DirectOrdering, x::Floats) = false
-ismissing(o::DirectOrdering, x::Missing) = true
-ismissing(o::Perm, i::Integer) = ismissing(o.order,o.data[i])
+julia> s2 = sort(x; alg=PartialQuickSort(k));
 
-allowsmissing(::AbstractVector{T}, ::DirectOrdering) where {T} = T >: Missing
-allowsmissing(::AbstractVector{<:Integer},
-              ::Perm{<:DirectOrdering,<:AbstractVector{T}}) where {T} =
-    T >: Missing
+julia> map(issorted, (s1, s2))
+(true, false)
 
-function specials2left!(testf::Function, v::AbstractVector, o::Ordering,
-                        lo::Integer=firstindex(v), hi::Integer=lastindex(v))
-    i = lo
-    @inbounds while i <= hi && testf(o,v[i])
-        i += 1
-    end
-    j = i + 1
-    @inbounds while j <= hi
-        if testf(o,v[j])
-            v[i], v[j] = v[j], v[i]
-            i += 1
-        end
-        j += 1
-    end
-    return i, hi
+julia> map(x->issorted(x[k]), (s1, s2))
+(true, true)
+
+julia> s1[k] == s2[k]
+true
+```
+"""
+struct PartialQuickSort{T <: Union{Integer,OrdinalRange}} <: Algorithm
+    k::T
 end
-function specials2right!(testf::Function, v::AbstractVector, o::Ordering,
-                         lo::Integer=firstindex(v), hi::Integer=lastindex(v))
-    i = hi
-    @inbounds while lo <= i && testf(o,v[i])
-        i -= 1
-    end
-    j = i - 1
-    @inbounds while lo <= j
-        if testf(o,v[j])
-            v[i], v[j] = v[j], v[i]
-            i -= 1
+
+"""
+    QuickSort
+
+Indicate that a sorting function should use the quick sort
+algorithm, which is *not* stable.
+
+Characteristics:
+  * *not stable*: does not preserve the ordering of elements that
+    compare equal (e.g. "a" and "A" in a sort of letters that
+    ignores case).
+  * *in-place* in memory.
+  * *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
+  * *good performance* for large collections.
+"""
+const QuickSort     = QuickSortAlg()
+
+"""
+    MergeSort
+
+Indicate that a sorting function should use the merge sort
+algorithm. Merge sort divides the collection into
+subcollections and repeatedly merges them, sorting each
+subcollection at each step, until the entire
+collection has been recombined in sorted form.
+
+Characteristics:
+  * *stable*: preserves the ordering of elements that compare
+    equal (e.g. "a" and "A" in a sort of letters that ignores
+    case).
+  * *not in-place* in memory.
+  * *divide-and-conquer* sort strategy.
+  * *good performance* for large collections but typically not quite as
+    fast as [`QuickSort`](@ref).
+"""
+const MergeSort     = MergeSortAlg()
+
+maybe_apply_initial_optimizations(alg::Algorithm) = alg
+maybe_apply_initial_optimizations(alg::QuickSortAlg) = InitialOptimizations(alg)
+maybe_apply_initial_optimizations(alg::MergeSortAlg) = InitialOptimizations(alg)
+maybe_apply_initial_optimizations(alg::InsertionSortAlg) = InitialOptimizations(alg)
+
+# selectpivot!
+#
+# Given 3 locations in an array (lo, mi, and hi), sort v[lo], v[mi], v[hi] and
+# choose the middle value as a pivot
+#
+# Upon return, the pivot is in v[lo], and v[hi] is guaranteed to be
+# greater than the pivot
+
+@inline function selectpivot!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
+    @inbounds begin
+        mi = midpoint(lo, hi)
+
+        # sort v[mi] <= v[lo] <= v[hi] such that the pivot is immediately in place
+        if lt(o, v[lo], v[mi])
+            v[mi], v[lo] = v[lo], v[mi]
+        end
+
+        if lt(o, v[hi], v[lo])
+            if lt(o, v[hi], v[mi])
+                v[hi], v[lo], v[mi] = v[lo], v[mi], v[hi]
+            else
+                v[hi], v[lo] = v[lo], v[hi]
+            end
         end
-        j -= 1
+
+        # return the pivot
+        return v[lo]
     end
-    return lo, i
 end
 
-function specials2left!(v::AbstractVector, a::Algorithm, o::Ordering)
-    lo, hi = firstindex(v), lastindex(v)
-    if allowsmissing(v, o)
-        i, _ = specials2left!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi)
-        sort!(v, lo, i-1, a, o)
-        return i, hi
-    else
-        return specials2left!(isnan, v, o, lo, hi)
+# partition!
+#
+# select a pivot, and partition v according to the pivot
+
+function partition!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
+    pivot = selectpivot!(v, lo, hi, o)
+    # pivot == v[lo], v[hi] > pivot
+    i, j = lo, hi
+    @inbounds while true
+        i += 1; j -= 1
+        while lt(o, v[i], pivot); i += 1; end;
+        while lt(o, pivot, v[j]); j -= 1; end;
+        i >= j && break
+        v[i], v[j] = v[j], v[i]
     end
+    v[j], v[lo] = pivot, v[j]
+
+    # v[j] == pivot
+    # v[k] >= pivot for k > j
+    # v[i] <= pivot for i < j
+    return j
 end
-function specials2right!(v::AbstractVector, a::Algorithm, o::Ordering)
-    lo, hi = firstindex(v), lastindex(v)
-    if allowsmissing(v, o)
-        _, i = specials2right!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi)
-        sort!(v, i+1, hi, a, o)
-        return lo, i
-    else
-        return specials2right!(isnan, v, o, lo, hi)
+
+function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::QuickSortAlg, o::Ordering)
+    @inbounds while lo < hi
+        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
+        j = partition!(v, lo, hi, o)
+        if j-lo < hi-j
+            # recurse on the smaller chunk
+            # this is necessary to preserve O(log(n))
+            # stack space in the worst case (rather than O(n))
+            lo < (j-1) && sort!(v, lo, j-1, a, o)
+            lo = j+1
+        else
+            j+1 < hi && sort!(v, j+1, hi, a, o)
+            hi = j-1
+        end
     end
+    return v
 end
 
-specials2end!(v::AbstractVector, a::Algorithm, o::ForwardOrdering) =
-    specials2right!(v, a, o)
-specials2end!(v::AbstractVector, a::Algorithm, o::ReverseOrdering) =
-    specials2left!(v, a, o)
-specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ForwardOrdering}) =
-    specials2right!(v, a, o)
-specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ReverseOrdering}) =
-    specials2left!(v, a, o)
+sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering, t0::Vector{T}) where T =
+    invoke(sort!, Tuple{typeof.((v, lo, hi, a, o))..., AbstractVector{T}}, v, lo, hi, a, o, t0) # For disambiguation
+function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering,
+        t0::Union{AbstractVector{T}, Nothing}=nothing) where T
+    @inbounds if lo < hi
+        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
+
+        m = midpoint(lo, hi)
 
-issignleft(o::ForwardOrdering, x::Floats) = lt(o, x, zero(x))
-issignleft(o::ReverseOrdering, x::Floats) = lt(o, x, -zero(x))
-issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i])
+        t = t0 === nothing ? similar(v, m-lo+1) : t0
+        length(t) < m-lo+1 && resize!(t, m-lo+1)
+        Base.require_one_based_indexing(t)
 
-function fpsort!(v::AbstractVector{T}, a::Algorithm, o::Ordering,
-        t::Union{AbstractVector{T}, Nothing}=nothing) where T
-    # fpsort!'s optimizations speed up comparisons, of which there are O(nlogn).
-    # The overhead is O(n). For n < 10, it's not worth it.
-    length(v) < 10 && return sort!(v, firstindex(v), lastindex(v), SMALL_ALGORITHM, o, t)
+        sort!(v, lo,  m,  a, o, t)
+        sort!(v, m+1, hi, a, o, t)
 
-    i, j = lo, hi = specials2end!(v,a,o)
-    @inbounds while true
-        while i <= j &&  issignleft(o,v[i]); i += 1; end
-        while i <= j && !issignleft(o,v[j]); j -= 1; end
-        i <= j || break
-        v[i], v[j] = v[j], v[i]
-        i += 1; j -= 1
+        i, j = 1, lo
+        while j <= m
+            t[i] = v[j]
+            i += 1
+            j += 1
+        end
+
+        i, k = 1, lo
+        while k < j <= hi
+            if lt(o, v[j], t[i])
+                v[k] = v[j]
+                j += 1
+            else
+                v[k] = t[i]
+                i += 1
+            end
+            k += 1
+        end
+        while k < j
+            v[k] = t[i]
+            k += 1
+            i += 1
+        end
     end
-    sort!(v, lo, j,  a, left(o), t)
-    sort!(v, i,  hi, a, right(o), t)
+
     return v
 end
 
+function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort,
+               o::Ordering)
+    @inbounds while lo < hi
+        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
+        j = partition!(v, lo, hi, o)
+
+        if j <= first(a.k)
+            lo = j+1
+        elseif j >= last(a.k)
+            hi = j-1
+        else
+            # recurse on the smaller chunk
+            # this is necessary to preserve O(log(n))
+            # stack space in the worst case (rather than O(n))
+            if j-lo < hi-j
+                lo < (j-1) && sort!(v, lo, j-1, a, o)
+                lo = j+1
+            else
+                hi > (j+1) && sort!(v, j+1, hi, a, o)
+                hi = j-1
+            end
+        end
+    end
+    return v
+end
 
-fpsort!(v::AbstractVector, a::Sort.PartialQuickSort, o::Ordering) =
-    sort!(v, firstindex(v), lastindex(v), a, o)
+## Old extensibility mechanisms ##
 
-function sort!(v::FPSortable, a::Algorithm, o::DirectOrdering,
-        t::Union{FPSortable, Nothing}=nothing)
-    fpsort!(v, a, o, t)
+# Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way
+sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o)
+function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering)
+    _sort!(v, a, o, (; lo, hi, legacy_dispatch_entry=a))
+    v
 end
-function sort!(v::AbstractVector{T}, a::Algorithm, o::Perm{<:DirectOrdering,<:FPSortable},
-        t::Union{AbstractVector{T}, Nothing}=nothing) where T <: Union{Signed, Unsigned}
-    fpsort!(v, a, o, t)
+sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, _) = sort!(v, lo, hi, a, o)
+function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, scratch::Vector)
+    _sort!(v, a, o, (; lo, hi, scratch, legacy_dispatch_entry=a))
+    v
 end
 
-end # module Sort.Float
+# Support dispatch on custom algorithms in the old way
+# sort!(::AbstractVector, ::Integer, ::Integer, ::MyCustomAlgorithm, ::Ordering) = ...
+function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw)
+    @getkw lo hi scratch legacy_dispatch_entry
+    if legacy_dispatch_entry === a
+        # This error prevents infinite recursion for unknown algorithms
+        throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o)), ::Any) is not defined"))
+    else
+        sort!(v, lo, hi, a, o)
+        scratch
+    end
+end
 
 end # module Sort
diff --git a/base/special/exp.jl b/base/special/exp.jl
index c3c7c6e6c194f..8e940a4d85ad9 100644
--- a/base/special/exp.jl
+++ b/base/special/exp.jl
@@ -70,31 +70,30 @@ LogB(::Val{:ℯ}, ::Type{Float16}) = -0.6931472f0
 LogB(::Val{10}, ::Type{Float16}) = -0.30103f0
 
 # Range reduced kernels
-@inline function expm1b_kernel(::Val{2}, x::Float64)
+function expm1b_kernel(::Val{2}, x::Float64)
     return x * evalpoly(x, (0.6931471805599393, 0.24022650695910058,
                             0.05550411502333161, 0.009618129548366803))
 end
-@inline function expm1b_kernel(::Val{:ℯ}, x::Float64)
+function expm1b_kernel(::Val{:ℯ}, x::Float64)
     return x * evalpoly(x, (0.9999999999999912, 0.4999999999999997,
                             0.1666666857598779, 0.04166666857598777))
 end
-
-@inline function expm1b_kernel(::Val{10}, x::Float64)
+function expm1b_kernel(::Val{10}, x::Float64)
     return x * evalpoly(x, (2.3025850929940255, 2.6509490552391974,
                             2.034678825384765, 1.1712552025835192))
 end
 
-@inline function expb_kernel(::Val{2}, x::Float32)
+function expb_kernel(::Val{2}, x::Float32)
     return evalpoly(x, (1.0f0, 0.6931472f0, 0.2402265f0,
                         0.05550411f0, 0.009618025f0,
                         0.0013333423f0, 0.00015469732f0, 1.5316464f-5))
 end
-@inline function expb_kernel(::Val{:ℯ}, x::Float32)
+function expb_kernel(::Val{:ℯ}, x::Float32)
     return evalpoly(x, (1.0f0, 1.0f0, 0.5f0, 0.16666667f0,
                         0.041666217f0, 0.008333249f0,
                         0.001394858f0, 0.00019924171f0))
 end
-@inline function expb_kernel(::Val{10}, x::Float32)
+function expb_kernel(::Val{10}, x::Float32)
     return evalpoly(x, (1.0f0, 2.3025851f0, 2.650949f0,
                         2.0346787f0, 1.1712426f0, 0.53937745f0,
                         0.20788547f0, 0.06837386f0))
@@ -175,12 +174,10 @@ const J_TABLE = (0x0000000000000000, 0xaac00b1afa5abcbe, 0x9b60163da9fb3335, 0xa
                  0xa66f0f9c1cb64129, 0x93af252b376bba97, 0xacdf3ac948dd7273, 0x99df50765b6e4540, 0x9faf6632798844f8,
                  0xa12f7bfdad9cbe13, 0xaeef91d802243c88, 0x874fa7c1819e90d8, 0xacdfbdba3692d513, 0x62efd3c22b8f71f1, 0x74afe9d96b2a23d9)
 
-# XXX we want to mark :consistent-cy here so that this function can be concrete-folded,
-# because the effect analysis currently can't prove it in the presence of `@inbounds` or
-# `:boundscheck`, but still the access to `J_TABLE` is really safe here
-Base.@assume_effects :consistent @inline function table_unpack(ind::Int32)
+# :nothrow needed since the compiler can't prove `ind` is inbounds.
+Base.@assume_effects :nothrow function table_unpack(ind::Int32)
     ind = ind & 255 + 1 # 255 == length(J_TABLE) - 1
-    j = @inbounds J_TABLE[ind]
+    j = getfield(J_TABLE, ind) # use getfield so the compiler can prove consistent
     jU = reinterpret(Float64, JU_CONST | (j&JU_MASK))
     jL = reinterpret(Float64, JL_CONST | (j>>8))
     return jU, jL
@@ -224,7 +221,7 @@ end
         if k <= -53
             # The UInt64 forces promotion. (Only matters for 32 bit systems.)
             twopk = (k + UInt64(53)) << 52
-            return reinterpret(T, twopk + reinterpret(UInt64, small_part))*(2.0^-53)
+            return reinterpret(T, twopk + reinterpret(UInt64, small_part))*0x1p-53
         end
         #k == 1024 && return (small_part * 2.0) * 2.0^1023
     end
@@ -241,15 +238,17 @@ end
     r = muladd(N_float, LogBo256L(base, T), r)
     k = N >> 8
     jU, jL = table_unpack(N)
-    very_small = muladd(jU, expm1b_kernel(base, r), jL)
-    small_part =  muladd(jU,xlo,very_small) + jU
+    kern = expm1b_kernel(base, r)
+    very_small = muladd(kern, jU*xlo, jL)
+    hi, lo = Base.canonicalize2(1.0, kern)
+    small_part = fma(jU, hi, muladd(jU, (lo+xlo), very_small))
     if !(abs(x) <= SUBNORM_EXP(base, T))
         x >= MAX_EXP(base, T) && return Inf
         x <= MIN_EXP(base, T) && return 0.0
         if k <= -53
             # The UInt64 forces promotion. (Only matters for 32 bit systems.)
             twopk = (k + UInt64(53)) << 52
-            return reinterpret(T, twopk + reinterpret(UInt64, small_part))*(2.0^-53)
+            return reinterpret(T, twopk + reinterpret(UInt64, small_part))*0x1p-53
         end
         #k == 1024 && return (small_part * 2.0) * 2.0^1023
     end
@@ -325,8 +324,8 @@ for (func, fast_func, base) in ((:exp2,  :exp2_fast,  Val(2)),
                                 (:exp,   :exp_fast,   Val(:ℯ)),
                                 (:exp10, :exp10_fast, Val(10)))
     @eval begin
-        $func(x::Union{Float16,Float32,Float64}) = exp_impl(x, $base)
-        $fast_func(x::Union{Float32,Float64}) = exp_impl_fast(x, $base)
+        @noinline $func(x::Union{Float16,Float32,Float64}) = exp_impl(x, $base)
+        @noinline $fast_func(x::Union{Float32,Float64}) = exp_impl_fast(x, $base)
     end
 end
 
@@ -461,7 +460,7 @@ function expm1(x::Float32)
     end
     x = Float64(x)
     N_float = round(x*Ln2INV(Float64))
-    N = unsafe_trunc(UInt64, N_float)
+    N = unsafe_trunc(Int64, N_float)
     r = muladd(N_float, Ln2(Float64), x)
     hi = evalpoly(r, (1.0, .5, 0.16666667546642386, 0.041666183019487026,
                       0.008332997481506921, 0.0013966479175977883, 0.0002004037059220124))
@@ -478,7 +477,7 @@ function expm1(x::Float16)
         return Float16(x*evalpoly(x, (1f0, .5f0, 0.16666628f0, 0.04166785f0, 0.008351848f0, 0.0013675707f0)))
     end
     N_float = round(x*Ln2INV(Float32))
-    N = unsafe_trunc(UInt32, N_float)
+    N = unsafe_trunc(Int32, N_float)
     r = muladd(N_float, Ln2(Float32), x)
     hi = evalpoly(r, (1f0, .5f0, 0.16666667f0, 0.041665863f0, 0.008333111f0, 0.0013981499f0, 0.00019983904f0))
     small_part = r*hi
diff --git a/base/special/hyperbolic.jl b/base/special/hyperbolic.jl
index 74f750064c7c2..333951b6f6024 100644
--- a/base/special/hyperbolic.jl
+++ b/base/special/hyperbolic.jl
@@ -175,7 +175,7 @@ function asinh(x::T) where T <: Union{Float32, Float64}
     #        return sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1)))
     #    d) |x| >= 2^28
     #        return sign(x)*(log(x)+ln2))
-    if isnan(x) || isinf(x)
+    if !isfinite(x)
         return x
     end
     absx = abs(x)
diff --git a/base/special/log.jl b/base/special/log.jl
index f257f49b0e642..029394b7a63f1 100644
--- a/base/special/log.jl
+++ b/base/special/log.jl
@@ -155,14 +155,11 @@ logbU(::Type{Float64},::Val{10}) = 0.4342944819032518
 logbL(::Type{Float64},::Val{10}) = 1.098319650216765e-17
 
 # Procedure 1
-# XXX we want to mark :consistent-cy here so that this function can be concrete-folded,
-# because the effect analysis currently can't prove it in the presence of `@inbounds` or
-# `:boundscheck`, but still the access to `t_log_Float64` is really safe here
-Base.@assume_effects :consistent @inline function log_proc1(y::Float64,mf::Float64,F::Float64,f::Float64,base=Val(:ℯ))
+@inline function log_proc1(y::Float64,mf::Float64,F::Float64,f::Float64,base=Val(:ℯ))
     jp = unsafe_trunc(Int,128.0*F)-127
 
     ## Steps 1 and 2
-    @inbounds hi,lo = t_log_Float64[jp]
+    Base.@assume_effects :nothrow :noub @inbounds hi,lo = t_log_Float64[jp]
     l_hi = mf* 0.6931471805601177 + hi
     l_lo = mf*-1.7239444525614835e-13 + lo
 
@@ -216,14 +213,11 @@ end
 end
 
 # Procedure 1
-# XXX we want to mark :consistent-cy here so that this function can be concrete-folded,
-# because the effect analysis currently can't prove it in the presence of `@inbounds` or
-# `:boundscheck`, but still the access to `t_log_Float32` is really safe here
-Base.@assume_effects :consistent @inline function log_proc1(y::Float32,mf::Float32,F::Float32,f::Float32,base=Val(:ℯ))
+@inline function log_proc1(y::Float32,mf::Float32,F::Float32,f::Float32,base=Val(:ℯ))
     jp = unsafe_trunc(Int,128.0f0*F)-127
 
     ## Steps 1 and 2
-    @inbounds hi = t_log_Float32[jp]
+    Base.@assume_effects :nothrow :noub @inbounds hi = t_log_Float32[jp]
     l = mf*0.6931471805599453 + hi
 
     ## Step 3
@@ -260,14 +254,14 @@ end
     Float32(logb(Float32, base)*(u64 + q))
 end
 
-log2(x::Float32)  = _log(x, Val(2),  :log2)
-log(x::Float32)   = _log(x, Val(:ℯ), :log)
-log10(x::Float32) = _log(x, Val(10), :log10)
-log2(x::Float64)  = _log(x, Val(2),  :log2)
-log(x::Float64)   = _log(x, Val(:ℯ), :log)
-log10(x::Float64) = _log(x, Val(10), :log10)
+@noinline log2(x::Float32)  = _log(x, Val(2),  :log2)
+@noinline log(x::Float32)   = _log(x, Val(:ℯ), :log)
+@noinline log10(x::Float32) = _log(x, Val(10), :log10)
+@noinline log2(x::Float64)  = _log(x, Val(2),  :log2)
+@noinline log(x::Float64)   = _log(x, Val(:ℯ), :log)
+@noinline log10(x::Float64) = _log(x, Val(10), :log10)
 
-function _log(x::Float64, base, func)
+@inline function _log(x::Float64, base, func::Symbol)
     if x > 0.0
         x == Inf && return x
 
@@ -294,15 +288,15 @@ function _log(x::Float64, base, func)
 
         return log_proc1(y,mf,F,f,base)
     elseif x == 0.0
-        -Inf
+        return -Inf
     elseif isnan(x)
-        NaN
+        return NaN
     else
         throw_complex_domainerror(func, x)
     end
 end
 
-function _log(x::Float32, base, func)
+@inline function _log(x::Float32, base, func::Symbol)
     if x > 0f0
         x == Inf32 && return x
 
@@ -327,11 +321,11 @@ function _log(x::Float32, base, func)
         F = (y + 65536.0f0) - 65536.0f0 # 0x1p-7*round(0x1p7*y)
         f = y-F
 
-        log_proc1(y,mf,F,f,base)
+        return log_proc1(y,mf,F,f,base)
     elseif x == 0f0
-        -Inf32
+        return -Inf32
     elseif isnan(x)
-        NaN32
+        return NaN32
     else
         throw_complex_domainerror(func, x)
     end
@@ -367,7 +361,7 @@ function log1p(x::Float64)
     elseif isnan(x)
         NaN
     else
-        throw_complex_domainerror(:log1p, x)
+        throw_complex_domainerror_neg1(:log1p, x)
     end
 end
 
@@ -399,7 +393,7 @@ function log1p(x::Float32)
     elseif isnan(x)
         NaN32
     else
-        throw_complex_domainerror(:log1p, x)
+        throw_complex_domainerror_neg1(:log1p, x)
     end
 end
 
@@ -418,8 +412,8 @@ end
 #    end
 #    return Tuple(table)
 #end
-#const t_log_table_compat = make_compact_table(128)
-const t_log_table_compat = (
+#const t_log_table_compact = make_compact_table(128)
+const t_log_table_compact = (
     (0xbfd62c82f2b9c8b5, 5.929407345889625e-15),
     (0xbfd5d1bdbf5808b4, -2.544157440035963e-14),
     (0xbfd57677174558b3, -3.443525940775045e-14),
@@ -556,25 +550,26 @@ const t_log_table_compat = (
 end
 
 # Log implementation that returns 2 numbers which sum to give true value with about 68 bits of precision
-# Since `log` only makes sense for positive exponents, we speed up the implimentation by stealing the sign bit
+# Since `log` only makes sense for positive exponents, we speed up the implementation by stealing the sign bit
 # of the input for an extra bit of the exponent which is used to normalize subnormal inputs.
 # Does not normalize results.
 # Adapted and modified from https://github.com/ARM-software/optimized-routines/blob/master/math/pow.c
 # Copyright (c) 2018-2020, Arm Limited. (which is also MIT licensed)
 # note that this isn't an exact translation as this version compacts the table to reduce cache pressure.
-function _log_ext(xu)
+function _log_ext(xu::UInt64)
     # x = 2^k z; where z is in range [0x1.69555p-1,0x1.69555p-0) and exact.
     # The range is split into N subintervals.
     # The ith subinterval contains z and c is near the center of the interval.
     tmp = reinterpret(Int64, xu - 0x3fe6955500000000) #0x1.69555p-1
-    i = (tmp >> 45) & 127
     z = reinterpret(Float64, xu - (tmp & 0xfff0000000000000))
     k = Float64(tmp >> 52)
     # log(x) = k*Ln2 + log(c) + log1p(z/c-1).
-    t, logctail = t_log_table_compat[i+1]
+    # N.B. :nothrow and :noub since `idx` is known to be `1 ≤ idx ≤ length(t_log_table_compact)`
+    idx = (tmp >> 45) & (length(t_log_table_compact)-1) + 1
+    t, logctail = Base.@assume_effects :nothrow :noub @inbounds t_log_table_compact[idx]
     invc, logc = log_tab_unpack(t)
     # Note: invc is j/N or j/N/2 where j is an integer in [N,2N) and
-    # |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.
+    # |z/c - 1| < 1/N, so r = z/c - 1 is exactly representable.
     r = fma(z, invc, -1.0)
     # k*Ln2 + log(c) + r.
     t1 = muladd(k, 0.6931471805598903, logc) #ln(2) hi part
diff --git a/base/special/rem_pio2.jl b/base/special/rem_pio2.jl
index c9767f50358c6..b0a17fdc25087 100644
--- a/base/special/rem_pio2.jl
+++ b/base/special/rem_pio2.jl
@@ -23,6 +23,7 @@
 #        @printf "0x%016x,\n" k
 #        I -= k
 #    end
+
 const INV_2PI = (
     0x28be_60db_9391_054a,
     0x7f09_d5f4_7d4d_3770,
@@ -93,9 +94,9 @@ end
     return unsafe_trunc(Int, fn), DoubleFloat64(y1, y2)
 end
 
+
 """
     fromfraction(f::Int128)
-
 Compute a tuple of values `(z1,z2)` such that
     ``z1 + z2 == f / 2^128``
 and the significand of `z1` has 27 trailing zeros.
@@ -108,7 +109,7 @@ function fromfraction(f::Int128)
     # 1. get leading term truncated to 26 bits
     s = ((f < 0) % UInt64) << 63     # sign bit
     x = abs(f) % UInt128             # magnitude
-    n1 = 128-leading_zeros(x)         # ndigits0z(x,2)
+    n1 = Base.top_set_bit(x)          # ndigits0z(x,2)
     m1 = ((x >> (n1-26)) % UInt64) << 27
     d1 = ((n1-128+1021) % UInt64) << 52
     z1 = reinterpret(Float64, s | (d1 + m1))
@@ -118,17 +119,14 @@ function fromfraction(f::Int128)
     if x2 == 0
         return (z1, 0.0)
     end
-    n2 = 128-leading_zeros(x2)
+    n2 = Base.top_set_bit(x2)
     m2 = (x2 >> (n2-53)) % UInt64
     d2 = ((n2-128+1021) % UInt64) << 52
     z2 = reinterpret(Float64,  s | (d2 + m2))
     return (z1,z2)
 end
 
-# XXX we want to mark :consistent-cy here so that this function can be concrete-folded,
-# because the effect analysis currently can't prove it in the presence of `@inbounds` or
-# `:boundscheck`, but still the accesses to `INV_2PI` are really safe here
-Base.@assume_effects :consistent function paynehanek(x::Float64)
+function paynehanek(x::Float64)
     # 1. Convert to form
     #
     #    x = X * 2^k,
@@ -167,15 +165,15 @@ Base.@assume_effects :consistent function paynehanek(x::Float64)
     idx = k >> 6
 
     shift = k - (idx << 6)
-    if shift == 0
-        @inbounds a1 = INV_2PI[idx+1]
-        @inbounds a2 = INV_2PI[idx+2]
-        @inbounds a3 = INV_2PI[idx+3]
+    Base.@assume_effects :nothrow :noub @inbounds if shift == 0
+        a1 = INV_2PI[idx+1]
+        a2 = INV_2PI[idx+2]
+        a3 = INV_2PI[idx+3]
     else
         # use shifts to extract the relevant 64 bit window
-        @inbounds a1 = (idx < 0 ? zero(UInt64) : INV_2PI[idx+1] << shift) | (INV_2PI[idx+2] >> (64 - shift))
-        @inbounds a2 = (INV_2PI[idx+2] << shift) | (INV_2PI[idx+3] >> (64 - shift))
-        @inbounds a3 = (INV_2PI[idx+3] << shift) | (INV_2PI[idx+4] >> (64 - shift))
+        a1 = (idx < 0 ? zero(UInt64) : INV_2PI[idx+1] << shift) | (INV_2PI[idx+2] >> (64 - shift))
+        a2 = (INV_2PI[idx+2] << shift) | (INV_2PI[idx+3] >> (64 - shift))
+        a3 = (INV_2PI[idx+3] << shift) | (INV_2PI[idx+4] >> (64 - shift))
     end
 
     # 3. Perform the multiplication:
@@ -213,14 +211,13 @@ end
 
 """
     rem_pio2_kernel(x::Union{Float32, Float64})
-
 Calculate `x` divided by `π/2` accurately for arbitrarily large `x`.
 Returns a pair `(k, r)`, where `k` is the quadrant of the result
 (multiple of π/2) and `r` is the remainder, such that ``k * π/2 = x - r``.
 The remainder is given as a double-double pair.
 `k` is positive if `x > 0` and is negative if `x ≤ 0`.
 """
-@inline function rem_pio2_kernel(x::Float64)
+@inline function rem_pio2_kernel(x::Float64) # accurate to 1e-22
     xhp = poshighword(x)
     #  xhp <= highword(5pi/4) implies |x| ~<= 5pi/4,
     if xhp <= 0x400f6a7a
@@ -282,50 +279,15 @@ The remainder is given as a double-double pair.
     return paynehanek(x)
 end
 
-## Float32
 @inline function rem_pio2_kernel(x::Float32)
-    pio2_1 = 1.57079631090164184570e+00
-    pio2_1t = 1.58932547735281966916e-08
-    inv_pio2 = 6.36619772367581382433e-01
     xd = convert(Float64, x)
-    absxd = abs(xd)
-    # it is assumed that NaN and Infs have been checked
-    if absxd <= pi*5/4
-        if absxd <= pi*3/4
-            if x > 0
-                return 1, DoubleFloat32(xd - pi/2)
-            else
-                return -1, DoubleFloat32(xd + pi/2)
-            end
-        end
-        if x > 0
-            return 2, DoubleFloat32(xd - pi)
-        else
-            return -2, DoubleFloat32(xd + pi)
-        end
-    elseif absxd <= pi*9/4
-        if absxd <= pi*7/4
-            if x > 0
-                return 3, DoubleFloat32(xd - pi*3/2)
-            else
-                return -3, DoubleFloat32(xd + pi*3/2)
-            end
-        end
-        if x > 0
-            return 4, DoubleFloat32(xd - pi*4/2)
-        else
-            return -4, DoubleFloat32(xd + pi*4/2)
-        end
-    end
-    #/* 33+53 bit pi is good enough for medium size */
-    if absxd < Float32(pi)/2*2.0f0^28 # medium size */
-        # use Cody Waite reduction with two coefficients
-        fn = round(xd*inv_pio2)
-        r  = xd-fn*pio2_1
-        w  = fn*pio2_1t
-        y = r-w;
+    # use Cody Waite reduction with two coefficients
+    if abs(x) < Float32(pi*0x1p27) # x < 2^28 * pi/2
+        fn = round(xd * (2/pi))
+        r  = fma(fn, -pi/2, xd)
+        y = fma(fn, -6.123233995736766e-17, r) # big(pi)/2 - pi/2 remainder
         return unsafe_trunc(Int, fn), DoubleFloat32(y)
     end
-    n, y = rem_pio2_kernel(xd)
+    n, y = @noinline paynehanek(xd)
     return n, DoubleFloat32(y.hi)
 end
diff --git a/base/special/trig.jl b/base/special/trig.jl
index e3033aab6c272..99b4ca91608f2 100644
--- a/base/special/trig.jl
+++ b/base/special/trig.jl
@@ -34,7 +34,7 @@ function sin(x::T) where T<:Union{Float32, Float64}
         end
         return sin_kernel(x)
     elseif isnan(x)
-        return T(NaN)
+        return x
     elseif isinf(x)
         sin_domain_error(x)
     end
@@ -103,7 +103,7 @@ function cos(x::T) where T<:Union{Float32, Float64}
         end
         return cos_kernel(x)
     elseif isnan(x)
-        return T(NaN)
+        return x
     elseif isinf(x)
         cos_domain_error(x)
     else
@@ -127,6 +127,7 @@ const DC3 = 2.48015872894767294178e-05
 const DC4 = -2.75573143513906633035e-07
 const DC5 = 2.08757232129817482790e-09
 const DC6 = -1.13596475577881948265e-11
+
 """
     cos_kernel(y)
 
@@ -178,7 +179,7 @@ function sincos(x::T) where T<:Union{Float32, Float64}
         end
         return sincos_kernel(x)
     elseif isnan(x)
-        return T(NaN), T(NaN)
+        return x, x
     elseif isinf(x)
         sincos_domain_error(x)
     end
@@ -220,7 +221,7 @@ function tan(x::T) where T<:Union{Float32, Float64}
         end
         return tan_kernel(x)
     elseif isnan(x)
-        return T(NaN)
+        return x
     elseif isinf(x)
         tan_domain_error(x)
     end
@@ -255,7 +256,7 @@ end
     #
     #       Note: tan(y+z) = tan(y) + tan'(y)*z
     #                  ~ tan(y) + (1+y*y)*z
-    #       Therefore, for better accuracz in computing tan(y+z), let
+    #       Therefore, for better accuracy in computing tan(y+z), let
     #             3      2      2       2       2
     #        r = y *(T2+y *(T3+y *(...+y *(T12+y *T13))))
     #       then
@@ -581,8 +582,8 @@ function atan(y::T, x::T) where T<:Union{Float32, Float64}
     #    S8) ATAN2(+-INF,+INF ) is +-pi/4 ;
     #    S9) ATAN2(+-INF,-INF ) is +-3pi/4;
     #    S10) ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
-    if isnan(x) || isnan(y) # S1 or S2
-        return T(NaN)
+    if isnan(x) | isnan(y) # S1 or S2
+        return isnan(x) ? x : y
     end
 
     if x == T(1.0) # then y/x = y and x > 0, see M2
@@ -722,23 +723,64 @@ function acos(x::T) where T <: Union{Float32, Float64}
     end
 end
 
-# multiply in extended precision
-function mulpi_ext(x::Float64)
-    m = 3.141592653589793
-    m_hi = 3.1415926218032837
-    m_lo = 3.178650954705639e-8
+# Uses minimax polynomial of sin(π * x) for π * x in [0, .25]
+@inline function sinpi_kernel(x::Float64)
+    sinpi_kernel_wide(x)
+end
+@inline function sinpi_kernel_wide(x::Float64)
+    x² = x*x
+    x⁴ = x²*x²
+    r  = evalpoly(x², (2.5501640398773415, -0.5992645293202981, 0.08214588658006512,
+                       -7.370429884921779e-3, 4.662827319453555e-4, -2.1717412523382308e-5))
+    return muladd(3.141592653589793, x, x*muladd(-5.16771278004997,
+                  x², muladd(x⁴, r,  1.2245907532225998e-16)))
+end
+@inline function sinpi_kernel(x::Float32)
+    Float32(sinpi_kernel_wide(x))
+end
+@inline function sinpi_kernel_wide(x::Float32)
+    x = Float64(x)
+    return x*evalpoly(x*x, (3.1415926535762266, -5.167712769188119,
+                            2.5501626483206374, -0.5992021090314925, 0.08100185277841528))
+end
 
-    x_hi = reinterpret(Float64, reinterpret(UInt64,x) & 0xffff_ffff_f800_0000)
-    x_lo = x-x_hi
+@inline function sinpi_kernel(x::Float16)
+    Float16(sinpi_kernel_wide(x))
+end
+@inline function sinpi_kernel_wide(x::Float16)
+    x = Float32(x)
+    return x*evalpoly(x*x, (3.1415927f0, -5.1677127f0, 2.5501626f0, -0.5992021f0, 0.081001855f0))
+end
 
-    y_hi = m*x
-    y_lo = x_hi * m_lo + (x_lo* m_hi + ((x_hi*m_hi-y_hi) + x_lo*m_lo))
+# Uses minimax polynomial of cos(π * x) for π * x in [0, .25]
+@inline function cospi_kernel(x::Float64)
+    cospi_kernel_wide(x)
+end
+@inline function cospi_kernel_wide(x::Float64)
+    x² = x*x
+    r = x²*evalpoly(x², (4.058712126416765, -1.3352627688537357, 0.23533063027900392,
+                         -0.025806887811869204, 1.9294917136379183e-3, -1.0368935675474665e-4))
+    a_x² = 4.934802200544679 * x²
+    a_x²lo = muladd(3.109686485461973e-16, x², muladd(4.934802200544679, x², -a_x²))
 
-    DoubleFloat64(y_hi,y_lo)
+    w  = 1.0-a_x²
+    return w + muladd(x², r, ((1.0-w)-a_x²) - a_x²lo)
+end
+@inline function cospi_kernel(x::Float32)
+    Float32(cospi_kernel_wide(x))
+end
+@inline function cospi_kernel_wide(x::Float32)
+    x = Float64(x)
+    return evalpoly(x*x, (1.0, -4.934802200541122, 4.058712123568637,
+                          -1.3352624040152927, 0.23531426791507182, -0.02550710082498761))
+end
+@inline function cospi_kernel(x::Float16)
+    Float16(cospi_kernel_wide(x))
+end
+@inline function cospi_kernel_wide(x::Float16)
+    x = Float32(x)
+    return evalpoly(x*x, (1.0f0, -4.934802f0, 4.058712f0, -1.3352624f0, 0.23531426f0, -0.0255071f0))
 end
-mulpi_ext(x::Float32) = DoubleFloat32(pi*Float64(x))
-mulpi_ext(x::Rational) = mulpi_ext(float(x))
-mulpi_ext(x::Real) = pi*x # Fallback
 
 """
     sinpi(x)
@@ -747,118 +789,58 @@ Compute ``\\sin(\\pi x)`` more accurately than `sin(pi*x)`, especially for large
 
 See also [`sind`](@ref), [`cospi`](@ref), [`sincospi`](@ref).
 """
-function sinpi(x::T) where T<:AbstractFloat
+function sinpi(_x::T) where T<:IEEEFloat
+    x = abs(_x)
     if !isfinite(x)
         isnan(x) && return x
         throw(DomainError(x, "`x` cannot be infinite."))
     end
-
-    ax = abs(x)
-    s = maxintfloat(T)/2
-    ax >= s && return copysign(zero(T),x) # integer-valued
-
-    # reduce to interval [-1,1]
-    # assumes RoundNearest rounding mode
-    t = 3*s
-    rx = x-((x+t)-t) # zeros may be incorrectly signed
-    arx = abs(rx)
-
-    if (arx == 0) | (arx == 1)
-        copysign(zero(T),x)
-    elseif arx < 0.25
-        sin_kernel(mulpi_ext(rx))
-    elseif arx < 0.75
-        y = mulpi_ext(T(0.5) - arx)
-        copysign(cos_kernel(y),rx)
-    else
-        y = mulpi_ext(copysign(one(T),rx) - rx)
-        sin_kernel(y)
-    end
-end
-
-# Rationals
-function sinpi(x::T) where T<:Rational
-    Tf = float(T)
-    if !isfinite(x)
-        throw(DomainError(x, "`x` must be finite."))
-    end
-
-    # until we get an IEEE remainder function (#9283)
-    rx = rem(x,2)
-    if rx > 1
-        rx -= 2
-    elseif rx < -1
-        rx += 2
-    end
-    arx = abs(rx)
-
-    if (arx == 0) | (arx == 1)
-        copysign(zero(Tf),x)
-    elseif arx < 0.25
-        sin_kernel(mulpi_ext(rx))
-    elseif arx < 0.75
-        y = mulpi_ext(T(0.5) - arx)
-        copysign(cos_kernel(y),rx)
+    # For large x, answers are all 1 or zero.
+    x >= maxintfloat(T) && return copysign(zero(T), _x)
+
+    # reduce to interval [0, 0.5]
+    n = round(2*x)
+    rx = float(muladd(T(-.5), n, x))
+    n = Int64(n) & 3
+    if n==0
+        res = sinpi_kernel(rx)
+    elseif n==1
+        res = cospi_kernel(rx)
+    elseif n==2
+        res = zero(T)-sinpi_kernel(rx)
     else
-        y = mulpi_ext(copysign(one(T),rx) - rx)
-        sin_kernel(y)
+        res = zero(T)-cospi_kernel(rx)
     end
+    return ifelse(signbit(_x), -res, res)
 end
-
 """
     cospi(x)
 
 Compute ``\\cos(\\pi x)`` more accurately than `cos(pi*x)`, especially for large `x`.
 """
-function cospi(x::T) where T<:AbstractFloat
+function cospi(x::T) where T<:IEEEFloat
+    x = abs(x)
     if !isfinite(x)
         isnan(x) && return x
         throw(DomainError(x, "`x` cannot be infinite."))
     end
-
-    ax = abs(x)
-    s = maxintfloat(T)
-    ax >= s && return one(T) # even integer-valued
-
-    # reduce to interval [-1,1], then [0,1]
-    # assumes RoundNearest rounding mode
-    rx = abs(ax-((ax+s)-s))
-
-    if rx <= 0.25
-        cos_kernel(mulpi_ext(rx))
-    elseif rx < 0.75
-        y = mulpi_ext(T(0.5) - rx)
-        sin_kernel(y)
-    else
-        y = mulpi_ext(one(T) - rx)
-        -cos_kernel(y)
-    end
-end
-
-# Rationals
-function cospi(x::T) where T<:Rational
-    if !isfinite(x)
-        throw(DomainError(x, "`x` must be finite."))
-    end
-
-    ax = abs(x)
-    # until we get an IEEE remainder function (#9283)
-    rx = rem(ax,2)
-    if rx > 1
-        rx = 2-rx
-    end
-
-    if rx <= 0.25
-        cos_kernel(mulpi_ext(rx))
-    elseif rx < 0.75
-        y = mulpi_ext(T(0.5) - rx)
-        sin_kernel(y)
+    # For large x, answers are all 1 or zero.
+    x >= maxintfloat(T) && return one(T)
+
+    # reduce to interval [0, 0.5]
+    n = round(2*x)
+    rx = float(muladd(T(-.5), n, x))
+    n = Int64(n) & 3
+    if n==0
+        return cospi_kernel(rx)
+    elseif n==1
+        return zero(T)-sinpi_kernel(rx)
+    elseif n==2
+        return zero(T)-cospi_kernel(rx)
     else
-        y = mulpi_ext(one(T) - rx)
-        -cos_kernel(y)
+        return sinpi_kernel(rx)
     end
 end
-
 """
     sincospi(x)
 
@@ -870,74 +852,83 @@ where `x` is in radians), returning a tuple `(sine, cosine)`.
 
 See also: [`cispi`](@ref), [`sincosd`](@ref), [`sinpi`](@ref).
 """
-function sincospi(x::T) where T<:AbstractFloat
+function sincospi(_x::T) where T<:IEEEFloat
+    x = abs(_x)
     if !isfinite(x)
         isnan(x) && return x, x
         throw(DomainError(x, "`x` cannot be infinite."))
     end
-
-    ax = abs(x)
-    s = maxintfloat(T)
-    ax >= s && return (copysign(zero(T), x), one(T)) # even integer-valued
-
-    # reduce to interval [-1,1]
-    # assumes RoundNearest rounding mode
-    t = 3*(s/2)
-    rx = x-((x+t)-t) # zeros may be incorrectly signed
-    arx = abs(rx)
-
-    # same selection scheme as sinpi and cospi
-    if (arx == 0) | (arx == 1)
-        return copysign(zero(T), x), ifelse(ax % 2 == 0, one(T), -one(T))
-    elseif arx < 0.25
-        return sincos_kernel(mulpi_ext(rx))
-    elseif arx < 0.75
-        y = mulpi_ext(T(0.5) - arx)
-        return copysign(cos_kernel(y), rx), sin_kernel(y)
+    # For large x, answers are all 1 or zero.
+    x >= maxintfloat(T) && return (copysign(zero(T), _x), one(T))
+
+    # reduce to interval [0, 0.5]
+    n = round(2*x)
+    rx = float(muladd(T(-.5), n, x))
+    n = Int64(n) & 3
+    si, co = sinpi_kernel(rx),cospi_kernel(rx)
+    if n==0
+        si, co = si, co
+    elseif n==1
+        si, co  = co, zero(T)-si
+    elseif n==2
+        si, co  = zero(T)-si, zero(T)-co
     else
-        y_si = mulpi_ext(copysign(one(T), rx) - rx)
-        y_co = mulpi_ext(one(T) - arx)
-        return sin_kernel(y_si), -cos_kernel(y_co)
+        si, co  = zero(T)-co, si
     end
+    si = ifelse(signbit(_x), -si, si)
+    return si, co
 end
 
-# Rationals
-function sincospi(x::T) where T<:Rational
-    Tf = float(T)
-    if !isfinite(x)
-        throw(DomainError(x, "`x` must be finite."))
-    end
+"""
+    tanpi(x)
 
-    # until we get an IEEE remainder function (#9283)
-    rx = rem(x,2)
-    if rx > 1
-        rx -= 2
-    elseif rx < -1
-        rx += 2
-    end
-    arx = abs(rx)
+Compute ``\\tan(\\pi x)`` more accurately than `tan(pi*x)`, especially for large `x`.
+
+!!! compat "Julia 1.10"
+    This function requires at least Julia 1.10.
 
-    # same selection scheme as sinpi and cospi
-    if (arx == 0) | (arx == 1)
-        return copysign(zero(Tf),x), ifelse(iseven(numerator(x)), one(Tf), -one(Tf))
-    elseif arx < 0.25
-        return sincos_kernel(mulpi_ext(rx))
-    elseif arx < 0.75
-        y = mulpi_ext(T(0.5) - arx)
-        return copysign(cos_kernel(y), rx), sin_kernel(y)
+See also [`tand`](@ref), [`sinpi`](@ref), [`cospi`](@ref), [`sincospi`](@ref).
+"""
+function tanpi(_x::T) where T<:IEEEFloat
+    # This is modified from sincospi.
+    # Would it be faster or more accurate to make a tanpi_kernel?
+    x = abs(_x)
+    if !isfinite(x)
+        isnan(x) && return x
+        throw(DomainError(x, "`x` cannot be infinite."))
+    end
+    # For large x, answers are all zero.
+    # All integer values for floats larger than maxintfloat are even.
+    x >= maxintfloat(T) && return copysign(zero(T), _x)
+
+    # reduce to interval [0, 0.5]
+    n = round(2*x)
+    rx = float(muladd(T(-.5), n, x))
+    n = Int64(n) & 3
+    si, co = sinpi_kernel_wide(rx), cospi_kernel_wide(rx)
+    if n==0
+        si, co = si, co
+    elseif n==1
+        si, co  = co, zero(T)-si
+    elseif n==2
+        si, co  = zero(T)-si, zero(T)-co
     else
-        y_si = mulpi_ext(copysign(one(T), rx) - rx)
-        y_co = mulpi_ext(one(T) - arx)
-        return sin_kernel(y_si), -cos_kernel(y_co)
+        si, co  = zero(T)-co, si
     end
+    si = ifelse(signbit(_x), -si, si)
+    return float(T)(si / co)
 end
 
 sinpi(x::Integer) = x >= 0 ? zero(float(x)) : -zero(float(x))
 cospi(x::Integer) = isodd(x) ? -one(float(x)) : one(float(x))
+tanpi(x::Integer) = x >= 0 ? (isodd(x) ? -zero(float(x)) : zero(float(x))) :
+                             (isodd(x) ? zero(float(x)) : -zero(float(x)))
 sincospi(x::Integer) = (sinpi(x), cospi(x))
-sinpi(x::Real) = sinpi(float(x))
-cospi(x::Real) = cospi(float(x))
-sincospi(x::Real) = sincospi(float(x))
+sinpi(x::AbstractFloat) = sin(pi*x)
+cospi(x::AbstractFloat) = cos(pi*x)
+sincospi(x::AbstractFloat) = sincos(pi*x)
+tanpi(x::AbstractFloat) = tan(pi*x)
+tanpi(x::Complex) = sinpi(x) / cospi(x) # Is there a better way to do this?
 
 function sinpi(z::Complex{T}) where T
     F = float(T)
@@ -1074,7 +1065,7 @@ isinf_real(x::Number) = false
 """
     sinc(x)
 
-Compute ``\\sin(\\pi x) / (\\pi x)`` if ``x \\neq 0``, and ``1`` if ``x = 0``.
+Compute normalized sinc function ``\\operatorname{sinc}(x) = \\sin(\\pi x) / (\\pi x)`` if ``x \\neq 0``, and ``1`` if ``x = 0``.
 
 See also [`cosc`](@ref), its derivative.
 """
@@ -1093,6 +1084,8 @@ _sinc(x::ComplexF16) = ComplexF16(_sinc(ComplexF32(x)))
 
 Compute ``\\cos(\\pi x) / x - \\sin(\\pi x) / (\\pi x^2)`` if ``x \\neq 0``, and ``0`` if
 ``x = 0``. This is the derivative of `sinc(x)`.
+
+See also [`sinc`](@ref).
 """
 cosc(x::Number) = _cosc(float(x))
 function _cosc(x::Number)
@@ -1191,7 +1184,7 @@ function sind(x::Real)
     if isinf(x)
         return throw(DomainError(x, "`x` cannot be infinite."))
     elseif isnan(x)
-        return oftype(x,NaN)
+        return x
     end
 
     rx = copysign(float(rem(x,360)),x)
@@ -1222,7 +1215,7 @@ function cosd(x::Real)
     if isinf(x)
         return throw(DomainError(x, "`x` cannot be infinite."))
     elseif isnan(x)
-        return oftype(x,NaN)
+        return x
     end
 
     rx = abs(float(rem(x,360)))
diff --git a/base/stacktraces.jl b/base/stacktraces.jl
index 3cb81d82bd3f7..bb70b7ea1c099 100644
--- a/base/stacktraces.jl
+++ b/base/stacktraces.jl
@@ -20,9 +20,10 @@ Stack information representing execution context, with the following fields:
 
   The name of the function containing the execution context.
 
-- `linfo::Union{Core.MethodInstance, CodeInfo, Nothing}`
+- `linfo::Union{Core.MethodInstance, Method, Module, Core.CodeInfo, Nothing}`
 
-  The MethodInstance containing the execution context (if it could be found).
+  The MethodInstance or CodeInfo containing the execution context (if it could be found), \
+     or Module (for macro expansions)"
 
 - `file::Symbol`
 
@@ -52,8 +53,9 @@ struct StackFrame # this type should be kept platform-agnostic so that profiles
     file::Symbol
     "the line number in the file containing the execution context"
     line::Int
-    "the MethodInstance or CodeInfo containing the execution context (if it could be found)"
-    linfo::Union{MethodInstance, CodeInfo, Nothing}
+    "the MethodInstance or CodeInfo containing the execution context (if it could be found), \
+     or Module (for macro expansions)"
+    linfo::Union{MethodInstance, Method, Module, CodeInfo, Nothing}
     "true if the code is from C"
     from_c::Bool
     "true if the code is from an inlined frame"
@@ -95,6 +97,86 @@ function hash(frame::StackFrame, h::UInt)
     return h
 end
 
+get_inlinetable(::Any) = nothing
+function get_inlinetable(mi::MethodInstance)
+    isdefined(mi, :def) && mi.def isa Method && isdefined(mi, :cache) && isdefined(mi.cache, :inferred) &&
+        mi.cache.inferred !== nothing || return nothing
+    linetable = ccall(:jl_uncompress_ir, Any, (Any, Any, Any), mi.def, mi.cache, mi.cache.inferred).linetable
+    return filter!(x -> x.inlined_at > 0, linetable)
+end
+
+get_method_instance_roots(::Any) = nothing
+function get_method_instance_roots(mi::Union{Method, MethodInstance})
+    m = mi isa MethodInstance ? mi.def : mi
+    m isa Method && isdefined(m, :roots) || return nothing
+    return filter(x -> x isa MethodInstance, m.roots)
+end
+
+function lookup_inline_frame_info(func::Symbol, file::Symbol, linenum::Int, inlinetable::Vector{Core.LineInfoNode})
+    #REPL frames and some base files lack this prefix while others have it; should fix?
+    filestripped = Symbol(lstrip(string(file), ('.', '\\', '/')))
+    linfo = nothing
+    #=
+    Some matching entries contain the MethodInstance directly.
+    Other matching entries contain only a Method or Symbol (function name); such entries
+    are located after the entry with the MethodInstance, so backtracking is required.
+    If backtracking fails, the Method or Module is stored for return, but we continue
+    the search in case a MethodInstance is found later.
+    TODO: If a backtrack has failed, do we need to backtrack again later if another Method
+    or Symbol match is found? Or can a limit on the subsequent backtracks be placed?
+    =#
+    for (i, line) in enumerate(inlinetable)
+        Base.IRShow.method_name(line) === func && line.file ∈ (file, filestripped) && line.line == linenum || continue
+        if line.method isa MethodInstance
+            linfo = line.method
+            break
+        elseif line.method isa Method || line.method isa Symbol
+            linfo = line.method isa Method ? line.method : line.module
+            # backtrack to find the matching MethodInstance, if possible
+            for j in (i - 1):-1:1
+                nextline = inlinetable[j]
+                nextline.inlined_at == line.inlined_at && Base.IRShow.method_name(line) === Base.IRShow.method_name(nextline) && line.file === nextline.file || break
+                if nextline.method isa MethodInstance
+                    linfo = nextline.method
+                    break
+                end
+            end
+        end
+    end
+    return linfo
+end
+
+function lookup_inline_frame_info(func::Symbol, file::Symbol, miroots::Vector{Any})
+    # REPL frames and some base files lack this prefix while others have it; should fix?
+    filestripped = Symbol(lstrip(string(file), ('.', '\\', '/')))
+    matches = filter(miroots) do x
+        x.def isa Method || return false
+        m = x.def::Method
+        return m.name == func && m.file ∈ (file, filestripped)
+    end
+    if length(matches) > 1
+        # ambiguous, check if method is same and return that instead
+        all_matched = true
+        for m in matches
+            all_matched = m.def.line == matches[1].def.line &&
+                m.def.module == matches[1].def.module
+            all_matched || break
+        end
+        if all_matched
+            return matches[1].def
+        end
+        # all else fails, return module if they match, or give up
+        all_matched = true
+        for m in matches
+            all_matched = m.def.module == matches[1].def.module
+            all_matched || break
+        end
+        return all_matched ? matches[1].def.module : nothing
+    elseif length(matches) == 1
+        return matches[1]
+    end
+    return nothing
+end
 
 """
     lookup(pointer::Ptr{Cvoid}) -> Vector{StackFrame}
@@ -107,11 +189,25 @@ Base.@constprop :none function lookup(pointer::Ptr{Cvoid})
     infos = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint), pointer, false)::Core.SimpleVector
     pointer = convert(UInt64, pointer)
     isempty(infos) && return [StackFrame(empty_sym, empty_sym, -1, nothing, true, false, pointer)] # this is equal to UNKNOWN
+    parent_linfo = infos[end][4]
+    inlinetable = get_inlinetable(parent_linfo)
+    miroots = inlinetable === nothing ? get_method_instance_roots(parent_linfo) : nothing # fallback if linetable missing
     res = Vector{StackFrame}(undef, length(infos))
-    for i in 1:length(infos)
+    for i in reverse(1:length(infos))
         info = infos[i]::Core.SimpleVector
         @assert(length(info) == 6)
-        res[i] = StackFrame(info[1]::Symbol, info[2]::Symbol, info[3]::Int, info[4], info[5]::Bool, info[6]::Bool, pointer)
+        func = info[1]::Symbol
+        file = info[2]::Symbol
+        linenum = info[3]::Int
+        linfo = info[4]
+        if i < length(infos)
+            if inlinetable !== nothing
+                linfo = lookup_inline_frame_info(func, file, linenum, inlinetable)
+            elseif miroots !== nothing
+                linfo = lookup_inline_frame_info(func, file, miroots)
+            end
+        end
+        res[i] = StackFrame(func, file, linenum, linfo, info[5]::Bool, info[6]::Bool, pointer)
     end
     return res
 end
@@ -153,7 +249,7 @@ end
 """
     stacktrace([trace::Vector{Ptr{Cvoid}},] [c_funcs::Bool=false]) -> StackTrace
 
-Returns a stack trace in the form of a vector of `StackFrame`s. (By default stacktrace
+Return a stack trace in the form of a vector of `StackFrame`s. (By default stacktrace
 doesn't return C functions, but this can be enabled.) When called without specifying a
 trace, `stacktrace` first calls `backtrace`.
 """
@@ -200,7 +296,7 @@ end
 """
     remove_frames!(stack::StackTrace, m::Module)
 
-Returns the `StackTrace` with all `StackFrame`s from the provided `Module` removed.
+Return the `StackTrace` with all `StackFrame`s from the provided `Module` removed.
 """
 function remove_frames!(stack::StackTrace, m::Module)
     filter!(f -> !from(f, m), stack)
@@ -219,35 +315,49 @@ function show_spec_linfo(io::IO, frame::StackFrame)
         else
             Base.print_within_stacktrace(io, Base.demangle_function_name(string(frame.func)), bold=true)
         end
+    elseif linfo isa CodeInfo
+        print(io, "top-level scope")
+    elseif linfo isa Module
+        Base.print_within_stacktrace(io, Base.demangle_function_name(string(frame.func)), bold=true)
     elseif linfo isa MethodInstance
         def = linfo.def
-        if isa(def, Method)
-            sig = linfo.specTypes
-            argnames = Base.method_argnames(def)
-            if def.nkw > 0
-                # rearrange call kw_impl(kw_args..., func, pos_args...) to func(pos_args...)
-                kwarg_types = Any[ fieldtype(sig, i) for i = 2:(1+def.nkw) ]
-                uw = Base.unwrap_unionall(sig)::DataType
-                pos_sig = Base.rewrap_unionall(Tuple{uw.parameters[(def.nkw+2):end]...}, sig)
-                kwnames = argnames[2:(def.nkw+1)]
-                for i = 1:length(kwnames)
-                    str = string(kwnames[i])::String
-                    if endswith(str, "...")
-                        kwnames[i] = Symbol(str[1:end-3])
-                    end
-                end
-                Base.show_tuple_as_call(io, def.name, pos_sig;
-                                        demangle=true,
-                                        kwargs=zip(kwnames, kwarg_types),
-                                        argnames=argnames[def.nkw+2:end])
-            else
-                Base.show_tuple_as_call(io, def.name, sig; demangle=true, argnames)
-            end
+        if def isa Module
+            Base.show_mi(io, linfo, #=from_stackframe=#true)
         else
-            Base.show_mi(io, linfo, true)
+            show_spec_sig(io, def, linfo.specTypes)
         end
-    elseif linfo isa CodeInfo
-        print(io, "top-level scope")
+    else
+        m = linfo::Method
+        show_spec_sig(io, m, m.sig)
+    end
+end
+
+function show_spec_sig(io::IO, m::Method, @nospecialize(sig::Type))
+    if get(io, :limit, :false)::Bool
+        if !haskey(io, :displaysize)
+            io = IOContext(io, :displaysize => displaysize(io))
+        end
+    end
+    argnames = Base.method_argnames(m)
+    argnames = replace(argnames, :var"#unused#" => :var"")
+    if m.nkw > 0
+        # rearrange call kw_impl(kw_args..., func, pos_args...) to func(pos_args...; kw_args)
+        kwarg_types = Any[ fieldtype(sig, i) for i = 2:(1+m.nkw) ]
+        uw = Base.unwrap_unionall(sig)::DataType
+        pos_sig = Base.rewrap_unionall(Tuple{uw.parameters[(m.nkw+2):end]...}, sig)
+        kwnames = argnames[2:(m.nkw+1)]
+        for i = 1:length(kwnames)
+            str = string(kwnames[i])::String
+            if endswith(str, "...")
+                kwnames[i] = Symbol(str[1:end-3])
+            end
+        end
+        Base.show_tuple_as_call(io, m.name, pos_sig;
+                                demangle=true,
+                                kwargs=zip(kwnames, kwarg_types),
+                                argnames=argnames[m.nkw+2:end])
+    else
+        Base.show_tuple_as_call(io, m.name, sig; demangle=true, argnames)
     end
 end
 
@@ -277,9 +387,13 @@ function Base.parentmodule(frame::StackFrame)
         else
             return (def::Method).module
         end
+    elseif linfo isa Method
+        return linfo.module
+    elseif linfo isa Module
+        return linfo
     else
-        # The module is not always available (common reasons include inlined
-        # frames and frames arising from the interpreter)
+        # The module is not always available (common reasons include
+        # frames arising from the interpreter)
         nothing
     end
 end
@@ -287,7 +401,7 @@ end
 """
     from(frame::StackFrame, filter_mod::Module) -> Bool
 
-Returns whether the `frame` is from the provided `Module`
+Return whether the `frame` is from the provided `Module`
 """
 function from(frame::StackFrame, m::Module)
     return parentmodule(frame) === m
diff --git a/base/stat.jl b/base/stat.jl
index 3b6294e65e7f2..56b960c4f74ea 100644
--- a/base/stat.jl
+++ b/base/stat.jl
@@ -25,6 +25,30 @@ export
     stat,
     uperm
 
+"""
+    StatStruct
+
+A struct which stores the information from `stat`.
+The following fields of this struct is considered public API:
+
+| Name    | Type                            | Description                                                        |
+|:--------|:--------------------------------|:-------------------------------------------------------------------|
+| desc    | `Union{String, Base.OS_HANDLE}` | The path or OS file descriptor                                     |
+| size    | `Int64`                         | The size (in bytes) of the file                                    |
+| device  | `UInt`                          | ID of the device that contains the file                            |
+| inode   | `UInt`                          | The inode number of the file                                       |
+| mode    | `UInt`                          | The protection mode of the file                                    |
+| nlink   | `Int`                           | The number of hard links to the file                               |
+| uid     | `UInt`                          | The user id of the owner of the file                               |
+| gid     | `UInt`                          | The group id of the file owner                                     |
+| rdev    | `UInt`                          | If this file refers to a device, the ID of the device it refers to |
+| blksize | `Int64`                         | The file-system preferred block size for the file                  |
+| blocks  | `Int64`                         | The number of 512-byte blocks allocated                            |
+| mtime   | `Float64`                       | Unix timestamp of when the file was last modified                  |
+| ctime   | `Float64`                       | Unix timestamp of when the file's metadata was changed             |
+
+See also: [`stat`](@ref)
+"""
 struct StatStruct
     desc    :: Union{String, OS_HANDLE} # for show method, not included in equality or hash
     device  :: UInt
@@ -170,25 +194,24 @@ stat(fd::Integer)           = stat(RawFD(fd))
 """
     stat(file)
 
-Returns a structure whose fields contain information about the file.
+Return a structure whose fields contain information about the file.
 The fields of the structure are:
 
-| Name    | Description                                                        |
-|:--------|:-------------------------------------------------------------------|
-| desc    | The path or OS file descriptor                                     |
-| size    | The size (in bytes) of the file                                    |
-| device  | ID of the device that contains the file                            |
-| inode   | The inode number of the file                                       |
-| mode    | The protection mode of the file                                    |
-| nlink   | The number of hard links to the file                               |
-| uid     | The user id of the owner of the file                               |
-| gid     | The group id of the file owner                                     |
-| rdev    | If this file refers to a device, the ID of the device it refers to |
-| blksize | The file-system preferred block size for the file                  |
-| blocks  | The number of such blocks allocated                                |
-| mtime   | Unix timestamp of when the file was last modified                  |
-| ctime   | Unix timestamp of when the file's metadata was changed             |
-
+| Name    | Type                            | Description                                                        |
+|:--------|:--------------------------------|:-------------------------------------------------------------------|
+| desc    | `Union{String, Base.OS_HANDLE}` | The path or OS file descriptor                                     |
+| size    | `Int64`                         | The size (in bytes) of the file                                    |
+| device  | `UInt`                          | ID of the device that contains the file                            |
+| inode   | `UInt`                          | The inode number of the file                                       |
+| mode    | `UInt`                          | The protection mode of the file                                    |
+| nlink   | `Int`                           | The number of hard links to the file                               |
+| uid     | `UInt`                          | The user id of the owner of the file                               |
+| gid     | `UInt`                          | The group id of the file owner                                     |
+| rdev    | `UInt`                          | If this file refers to a device, the ID of the device it refers to |
+| blksize | `Int64`                         | The file-system preferred block size for the file                  |
+| blocks  | `Int64`                         | The number of 512-byte blocks allocated                            |
+| mtime   | `Float64`                       | Unix timestamp of when the file was last modified                  |
+| ctime   | `Float64`                       | Unix timestamp of when the file's metadata was changed             |
 """
 stat(path...) = stat(joinpath(path...))
 
@@ -464,17 +487,17 @@ end
 islink(path...) = islink(lstat(path...))
 
 # samefile can be used for files and directories: #11145#issuecomment-99511194
-samefile(a::StatStruct, b::StatStruct) = a.device==b.device && a.inode==b.inode
-function samefile(a::AbstractString, b::AbstractString)
-    infoa = stat(a)
-    infob = stat(b)
-    if ispath(infoa) && ispath(infob)
-        samefile(infoa, infob)
-    else
-        return false
-    end
+function samefile(a::StatStruct, b::StatStruct)
+    ispath(a) && ispath(b) && a.device == b.device && a.inode == b.inode
 end
 
+"""
+    samefile(path_a::AbstractString, path_b::AbstractString)
+
+Check if the paths `path_a` and `path_b` refer to the same existing file or directory.
+"""
+samefile(a::AbstractString, b::AbstractString) = samefile(stat(a), stat(b))
+
 """
     ismount(path) -> Bool
 
diff --git a/base/stream.jl b/base/stream.jl
index 948c12ad604b4..3124b8b0c0a24 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -105,7 +105,7 @@ function eof(s::LibuvStream)
     bytesavailable(s) > 0 && return false
     wait_readnb(s, 1)
     # This function is race-y if used from multiple threads, but we guarantee
-    # it to never return false until the stream is definitively exhausted
+    # it to never return true until the stream is definitively exhausted
     # and that we won't return true if there's a readerror pending (it'll instead get thrown).
     # This requires some careful ordering here (TODO: atomic loads)
     bytesavailable(s) > 0 && return false
@@ -409,7 +409,7 @@ function wait_readnb(x::LibuvStream, nb::Int)
         while bytesavailable(x.buffer) < nb
             x.readerror === nothing || throw(x.readerror)
             isopen(x) || break
-            x.status != StatusEOF || break
+            x.status == StatusEOF && break
             x.throttle = max(nb, x.throttle)
             start_reading(x) # ensure we are reading
             iolock_end()
@@ -436,7 +436,10 @@ end
 
 function closewrite(s::LibuvStream)
     iolock_begin()
-    check_open(s)
+    if !iswritable(s)
+        iolock_end()
+        return
+    end
     req = Libc.malloc(_sizeof_uv_shutdown)
     uv_req_set_data(req, C_NULL) # in case we get interrupted before arriving at the wait call
     err = ccall(:uv_shutdown, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}),
@@ -457,7 +460,7 @@ function closewrite(s::LibuvStream)
         # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
         sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we won't get spurious notifications later
@@ -565,7 +568,6 @@ displaysize() = (parse(Int, get(ENV, "LINES",   "24")),
                  parse(Int, get(ENV, "COLUMNS", "80")))::Tuple{Int, Int}
 
 function displaysize(io::TTY)
-    # A workaround for #34620 and #26687 (this still has the TOCTOU problem).
     check_open(io)
 
     local h::Int, w::Int
@@ -588,6 +590,7 @@ function displaysize(io::TTY)
     s1 = Ref{Int32}(0)
     s2 = Ref{Int32}(0)
     iolock_begin()
+    check_open(io)
     Base.uv_error("size (TTY)", ccall(:uv_tty_get_winsize,
                                       Int32, (Ptr{Cvoid}, Ptr{Int32}, Ptr{Int32}),
                                       io, s1, s2) != 0)
@@ -665,9 +668,11 @@ function uv_readcb(handle::Ptr{Cvoid}, nread::Cssize_t, buf::Ptr{Cvoid})
             elseif nread == UV_EOF # libuv called uv_stop_reading already
                 if stream.status != StatusClosing
                     stream.status = StatusEOF
-                    if stream isa TTY # TODO: || ccall(:uv_is_writable, Cint, (Ptr{Cvoid},), stream.handle) != 0
-                        # stream can still be used either by reseteof # TODO: or write
-                        notify(stream.cond)
+                    notify(stream.cond)
+                    if stream isa TTY
+                        # stream can still be used by reseteof (or possibly write)
+                    elseif !(stream isa PipeEndpoint) && ccall(:uv_is_writable, Cint, (Ptr{Cvoid},), stream.handle) != 0
+                        # stream can still be used by write
                     else
                         # underlying stream is no longer useful: begin finalization
                         ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
@@ -676,6 +681,7 @@ function uv_readcb(handle::Ptr{Cvoid}, nread::Cssize_t, buf::Ptr{Cvoid})
                 end
             else
                 stream.readerror = _UVError("read", nread)
+                notify(stream.cond)
                 # This is a fatal connection error
                 ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
                 stream.status = StatusClosing
@@ -1047,7 +1053,7 @@ function uv_write(s::LibuvStream, p::Ptr{UInt8}, n::UInt)
         # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
         sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(uvw) != C_NULL
             # uvw is still alive,
             # so make sure we won't get spurious notifications later
@@ -1358,7 +1364,7 @@ julia> io1 = open("same/path", "w")
 
 julia> io2 = open("same/path", "w")
 
-julia> redirect_stdio(f, stdout=io1, stderr=io2) # not suppored
+julia> redirect_stdio(f, stdout=io1, stderr=io2) # not supported
 ```
 Also the `stdin` argument may not be the same descriptor as `stdout` or `stderr`.
 ```julia-repl
@@ -1486,7 +1492,7 @@ closewrite(s::BufferStream) = close(s)
 function close(s::BufferStream)
     lock(s.cond) do
         s.status = StatusClosed
-        notify(s.cond)
+        notify(s.cond) # aka flush
         nothing
     end
 end
@@ -1546,6 +1552,7 @@ stop_reading(s::BufferStream) = nothing
 write(s::BufferStream, b::UInt8) = write(s, Ref{UInt8}(b))
 function unsafe_write(s::BufferStream, p::Ptr{UInt8}, nb::UInt)
     nwrite = lock(s.cond) do
+        check_open(s)
         rv = unsafe_write(s.buffer, p, nb)
         s.buffer_writes || notify(s.cond)
         rv
@@ -1566,9 +1573,18 @@ end
 buffer_writes(s::BufferStream, bufsize=0) = (s.buffer_writes = true; s)
 function flush(s::BufferStream)
     lock(s.cond) do
+        check_open(s)
         notify(s.cond)
         nothing
     end
 end
 
 skip(s::BufferStream, n) = skip(s.buffer, n)
+
+function reseteof(x::BufferStream)
+    lock(s.cond) do
+        s.status = StatusOpen
+        nothing
+    end
+    nothing
+end
diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl
new file mode 100644
index 0000000000000..c0f3623f41696
--- /dev/null
+++ b/base/strings/annotated.jl
@@ -0,0 +1,388 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+    AnnotatedString{S <: AbstractString} <: AbstractString
+
+A string with metadata, in the form of annotated regions.
+
+More specifically, this is a simple wrapper around any other
+[`AbstractString`](@ref) that allows for regions of the wrapped string to be
+annotated with labeled values.
+
+```text
+                           C
+                    ┌──────┸─────────┐
+  "this is an example annotated string"
+  └──┰────────┼─────┘         │
+     A        └─────┰─────────┘
+                    B
+```
+
+The above diagram represents a `AnnotatedString` where three ranges have been
+annotated (labeled `A`, `B`, and `C`). Each annotation holds a label (`Symbol`)
+and a value (`Any`), paired together as a `Pair{Symbol, <:Any}`.
+
+Labels do not need to be unique, the same region can hold multiple annotations
+with the same label.
+
+See also [`AnnotatedChar`](@ref), [`annotatedstring`](@ref),
+[`annotations`](@ref), and [`annotate!`](@ref).
+
+!!! warning
+    While the constructors are part of the Base public API, the fields
+    of `AnnotatedString` are not. This is to allow for potential future
+    changes in the implementation of this type. Instead use the
+    [`annotations`](@ref), and [`annotate!`](@ref) getter/setter
+    functions.
+
+# Constructors
+
+```julia
+AnnotatedString(s::S<:AbstractString) -> AnnotatedString{S}
+AnnotatedString(s::S<:AbstractString, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, <:Any}}})
+```
+
+A AnnotatedString can also be created with [`annotatedstring`](@ref), which acts much
+like [`string`](@ref) but preserves any annotations present in the arguments.
+
+# Example
+
+```julia-repl
+julia> AnnotatedString("this is an example annotated string",
+                    [(1:18, :A => 1), (12:28, :B => 2), (18:35, :C => 3)])
+"this is an example annotated string"
+```
+"""
+struct AnnotatedString{S <: AbstractString} <: AbstractString
+    string::S
+    annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}
+end
+
+"""
+    AnnotatedChar{S <: AbstractChar} <: AbstractChar
+
+A Char with annotations.
+
+More specifically, this is a simple wrapper around any other
+[`AbstractChar`](@ref), which holds a list of arbitrary labeled annotations
+(`Pair{Symbol, <:Any}`) with the wrapped character.
+
+See also: [`AnnotatedString`](@ref), [`annotatedstring`](@ref), `annotations`,
+and `annotate!`.
+
+!!! warning
+    While the constructors are part of the Base public API, the fields
+    of `AnnotatedChar` are not. This it to allow for potential future
+    changes in the implementation of this type. Instead use the
+    [`annotations`](@ref), and [`annotate!`](@ref) getter/setter
+    functions.
+
+# Constructors
+
+```julia
+AnnotatedChar(s::S) -> AnnotatedChar{S}
+AnnotatedChar(s::S, annotations::Vector{Pair{Symbol, <:Any}})
+```
+
+# Examples
+
+```julia-repl
+julia> AnnotatedChar('j', :label => 1)
+'j': ASCII/Unicode U+006A (category Ll: Letter, lowercase)
+```
+"""
+struct AnnotatedChar{C <: AbstractChar} <: AbstractChar
+    char::C
+    annotations::Vector{Pair{Symbol, Any}}
+end
+
+## Constructors ##
+
+# When called with overly-specialised arguments
+
+AnnotatedString(s::AbstractString, annots::Vector{<:Tuple{UnitRange{Int}, <:Pair{Symbol, <:Any}}}) =
+    AnnotatedString(s, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}(annots))
+
+AnnotatedChar(c::AbstractChar, annots::Vector{<:Pair{Symbol, <:Any}}) =
+    AnnotatedChar(c, Vector{Pair{Symbol, Any}}(annots))
+
+# Constructors to avoid recursive wrapping
+
+AnnotatedString(s::AnnotatedString, annots::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}) =
+    AnnotatedString(s.string, vcat(s.annotations, annots))
+
+AnnotatedChar(c::AnnotatedChar, annots::Vector{Pair{Symbol, Any}}) =
+    AnnotatedChar(c.char, vcat(s.annotations, annots))
+
+String(s::AnnotatedString{String}) = s.string # To avoid pointless overhead
+
+## Conversion/promotion ##
+
+convert(::Type{AnnotatedString}, s::AnnotatedString) = s
+convert(::Type{AnnotatedString{S}}, s::S) where {S <: AbstractString} =
+    AnnotatedString(s, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}())
+convert(::Type{AnnotatedString}, s::S) where {S <: AbstractString} =
+    convert(AnnotatedString{S}, s)
+AnnotatedString(s::S) where {S <: AbstractString} = convert(AnnotatedString{S}, s)
+
+convert(::Type{AnnotatedChar}, c::AnnotatedChar) = c
+convert(::Type{AnnotatedChar{C}}, c::C) where { C <: AbstractChar } =
+    AnnotatedChar{C}(c, Vector{Pair{Symbol, Any}}())
+convert(::Type{AnnotatedChar}, c::C) where { C <: AbstractChar } =
+    convert(AnnotatedChar{C}, c)
+
+AnnotatedChar(c::AbstractChar) = convert(AnnotatedChar, c)
+AnnotatedChar(c::UInt32) = convert(AnnotatedChar, Char(c))
+AnnotatedChar{C}(c::UInt32) where {C <: AbstractChar} = convert(AnnotatedChar, C(c))
+
+promote_rule(::Type{<:AnnotatedString}, ::Type{<:AbstractString}) = AnnotatedString
+
+## AbstractString interface ##
+
+ncodeunits(s::AnnotatedString) = ncodeunits(s.string)
+codeunits(s::AnnotatedString) = codeunits(s.string)
+codeunit(s::AnnotatedString) = codeunit(s.string)
+codeunit(s::AnnotatedString, i::Integer) = codeunit(s.string, i)
+isvalid(s::AnnotatedString, i::Integer) = isvalid(s.string, i)
+@propagate_inbounds iterate(s::AnnotatedString, i::Integer=firstindex(s)) =
+    if i <= lastindex(s.string); (s[i], nextind(s, i)) end
+eltype(::Type{<:AnnotatedString{S}}) where {S} = AnnotatedChar{eltype(S)}
+firstindex(s::AnnotatedString) = firstindex(s.string)
+lastindex(s::AnnotatedString) = lastindex(s.string)
+
+function getindex(s::AnnotatedString, i::Integer)
+    @boundscheck checkbounds(s, i)
+    @inbounds if isvalid(s, i)
+        AnnotatedChar(s.string[i], annotations(s, i))
+    else
+        string_index_err(s, i)
+    end
+end
+
+## AbstractChar interface ##
+
+ncodeunits(c::AnnotatedChar) = ncodeunits(c.char)
+codepoint(c::AnnotatedChar) = codepoint(c.char)
+
+# Avoid the iteration fallback with comparison
+cmp(a::AnnotatedString, b::AbstractString) = cmp(a.string, b)
+cmp(a::AbstractString, b::AnnotatedString) = cmp(a, b.string)
+# To avoid method ambiguity
+cmp(a::AnnotatedString, b::AnnotatedString) = cmp(a.string, b.string)
+
+==(a::AnnotatedString, b::AnnotatedString) =
+    a.string == b.string && a.annotations == b.annotations
+
+==(a::AnnotatedString, b::AbstractString) = isempty(a.annotations) && a.string == b
+==(a::AbstractString, b::AnnotatedString) = isempty(b.annotations) && a == b.string
+
+"""
+    annotatedstring(values...)
+
+Create a `AnnotatedString` from any number of `values` using their
+[`print`](@ref)ed representation.
+
+This acts like [`string`](@ref), but takes care to preserve any annotations
+present (in the form of [`AnnotatedString`](@ref) or [`AnnotatedChar`](@ref) values).
+
+See also [`AnnotatedString`](@ref) and [`AnnotatedChar`](@ref).
+
+## Examples
+
+```julia-repl
+julia> annotatedstring("now a AnnotatedString")
+"now a AnnotatedString"
+
+julia> annotatedstring(AnnotatedString("annotated", [(1:9, :label => 1)]), ", and unannotated")
+"annotated, and unannotated"
+```
+"""
+function annotatedstring(xs...)
+    isempty(xs) && return AnnotatedString("")
+    size = mapreduce(_str_sizehint, +, xs)
+    s = IOContext(IOBuffer(sizehint=size), :color => true)
+    annotations = Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}()
+    for x in xs
+        if x isa AnnotatedString
+            for (region, annot) in x.annotations
+                push!(annotations, (s.io.size .+ (region), annot))
+            end
+            print(s, x.string)
+        elseif x isa SubString{<:AnnotatedString}
+            for (region, annot) in x.string.annotations
+                start, stop = first(region), last(region)
+                if start <= x.offset + x.ncodeunits && stop > x.offset
+                    rstart = s.io.size + max(0, start - x.offset - 1) + 1
+                    rstop = s.io.size + min(stop, x.offset + x.ncodeunits) - x.offset
+                    push!(annotations, (rstart:rstop, annot))
+                end
+            end
+            print(s, SubString(x.string.string, x.offset, x.ncodeunits, Val(:noshift)))
+        elseif x isa AnnotatedChar
+            for annot in x.annotations
+                push!(annotations, (1+s.io.size:1+s.io.size, annot))
+            end
+            print(s, x.char)
+        else
+            print(s, x)
+        end
+    end
+    str = String(resize!(s.io.data, s.io.size))
+    AnnotatedString(str, annotations)
+end
+
+annotatedstring(s::AnnotatedString) = s
+annotatedstring(c::AnnotatedChar) =
+    AnnotatedString(string(c.char), [(1:ncodeunits(c), annot) for annot in c.annotations])
+
+AnnotatedString(s::SubString{<:AnnotatedString}) = annotatedstring(s)
+
+"""
+    annotatedstring_optimize!(str::AnnotatedString)
+
+Merge contiguous identical annotations in `str`.
+"""
+function annotatedstring_optimize!(s::AnnotatedString)
+    last_seen = Dict{Pair{Symbol, Any}, Int}()
+    i = 1
+    while i <= length(s.annotations)
+        region, keyval = s.annotations[i]
+        prev = get(last_seen, keyval, 0)
+        if prev > 0
+            lregion, _ = s.annotations[prev]
+            if last(lregion) + 1 == first(region)
+                s.annotations[prev] =
+                    setindex(s.annotations[prev],
+                             first(lregion):last(region),
+                             1)
+                deleteat!(s.annotations, i)
+            else
+                delete!(last_seen, keyval)
+            end
+        else
+            last_seen[keyval] = i
+            i += 1
+        end
+    end
+    s
+end
+
+function repeat(str::AnnotatedString, r::Integer)
+    r == 0 && return one(AnnotatedString)
+    r == 1 && return str
+    unannot = repeat(str.string, r)
+    annotations = Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}()
+    len = ncodeunits(str)
+    fullregion = firstindex(str):lastindex(str)
+    for (region, annot) in str.annotations
+        if region == fullregion
+            push!(annotations, (firstindex(unannot):lastindex(unannot), annot))
+        end
+    end
+    for offset in 0:len:(r-1)*len
+        for (region, annot) in str.annotations
+            if region != fullregion
+                push!(annotations, (region .+ offset, annot))
+            end
+        end
+    end
+    AnnotatedString(unannot, annotations) |> annotatedstring_optimize!
+end
+
+repeat(str::SubString{<:AnnotatedString}, r::Integer) =
+    repeat(AnnotatedString(str), r)
+
+function repeat(c::AnnotatedChar, r::Integer)
+    str = repeat(c.char, r)
+    fullregion = firstindex(str):lastindex(str)
+    AnnotatedString(str, [(fullregion, annot) for annot in c.annotations])
+end
+
+function reverse(s::AnnotatedString)
+    lastind = lastindex(s)
+    AnnotatedString(reverse(s.string),
+                 [(UnitRange(1 + lastind - last(region),
+                             1 + lastind - first(region)),
+                   annot)
+                  for (region, annot) in s.annotations])
+end
+
+# TODO optimise?
+reverse(s::SubString{<:AnnotatedString}) = reverse(AnnotatedString(s))
+
+# TODO implement `replace(::AnnotatedString, ...)`
+
+## End AbstractString interface ##
+
+"""
+    annotate!(str::AnnotatedString, [range::UnitRange{Int}], label::Symbol => value)
+    annotate!(str::SubString{AnnotatedString}, [range::UnitRange{Int}], label::Symbol => value)
+
+Annotate a `range` of `str` (or the entire string) with a labeled value (`label` => `value`).
+To remove existing `label` annotations, use a value of `nothing`.
+"""
+function annotate!(s::AnnotatedString, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any}))
+    label, val = labelval
+    indices = searchsorted(s.annotations, (range,), by=first)
+    if val === nothing
+        labelindex = filter(i -> first(s.annotations[i][2]) === label, indices)
+        for index in Iterators.reverse(labelindex)
+            deleteat!(s.annotations, index)
+        end
+    else
+        splice!(s.annotations, indices, [(range, Pair{Symbol, Any}(label, val))])
+    end
+    s
+end
+
+annotate!(ss::AnnotatedString, @nospecialize(labelval::Pair{Symbol, <:Any})) =
+    annotate!(ss, firstindex(ss):lastindex(ss), labelval)
+
+annotate!(s::SubString{<:AnnotatedString}, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any})) =
+    (annotate!(s.string, s.offset .+ (range), labelval); s)
+
+annotate!(s::SubString{<:AnnotatedString}, @nospecialize(labelval::Pair{Symbol, <:Any})) =
+    (annotate!(s.string, s.offset .+ (1:s.ncodeunits), labelval); s)
+
+"""
+    annotate!(char::AnnotatedChar, label::Symbol => value)
+
+Annotate `char` with the pair `label => value`.
+"""
+annotate!(c::AnnotatedChar, @nospecialize(labelval::Pair{Symbol, <:Any})) =
+    (push!(c.annotations, labelval); c)
+
+"""
+    annotations(str::AnnotatedString, [position::Union{Integer, UnitRange}])
+    annotations(str::SubString{AnnotatedString}, [position::Union{Integer, UnitRange}])
+
+Get all annotations that apply to `str`. Should `position` be provided, only
+annotations that overlap with `position` will be returned.
+
+See also: `annotate!`.
+"""
+annotations(s::AnnotatedString) = s.annotations
+
+annotations(s::SubString{<:AnnotatedString}) =
+    annotations(s, s.offset+1:s.offset+s.ncodeunits)
+
+function annotations(s::AnnotatedString, pos::UnitRange{<:Integer})
+    # TODO optimise
+    annots = filter(label -> !isempty(intersect(pos, first(label))),
+                    s.annotations)
+    last.(annots)
+end
+
+annotations(s::AnnotatedString, pos::Integer) = annotations(s, pos:pos)
+
+annotations(s::SubString{<:AnnotatedString}, pos::Integer) =
+    annotations(s.string, s.offset + pos)
+annotations(s::SubString{<:AnnotatedString}, pos::UnitRange{<:Integer}) =
+    annotations(s.string, first(pos)+s.offset:last(pos)+s.offset)
+
+"""
+    annotations(chr::AnnotatedChar)
+
+Get all annotations of `chr`.
+"""
+annotations(c::AnnotatedChar) = c.annotations
diff --git a/base/strings/basic.jl b/base/strings/basic.jl
index 306ecc5cc214a..4136571963f99 100644
--- a/base/strings/basic.jl
+++ b/base/strings/basic.jl
@@ -16,9 +16,7 @@ about strings:
   * Each `AbstractChar` in a string is encoded by one or more code units
   * Only the index of the first code unit of an `AbstractChar` is a valid index
   * The encoding of an `AbstractChar` is independent of what precedes or follows it
-  * String encodings are [self-synchronizing] – i.e. `isvalid(s, i)` is O(1)
-
-[self-synchronizing]: https://en.wikipedia.org/wiki/Self-synchronizing_code
+  * String encodings are [self-synchronizing](https://en.wikipedia.org/wiki/Self-synchronizing_code) – i.e. `isvalid(s, i)` is O(1)
 
 Some string functions that extract code units, characters or substrings from
 strings error if you pass them out-of-bounds or invalid string indices. This
@@ -31,7 +29,7 @@ types may choose different "imaginary" character sizes as makes sense for their
 implementations (e.g. substrings may pass index arithmetic through to the
 underlying string they provide a view into). Relaxed indexing functions include
 those intended for index arithmetic: `thisind`, `nextind` and `prevind`. This
-model allows index arithmetic to work with out-of- bounds indices as
+model allows index arithmetic to work with out-of-bounds indices as
 intermediate values so long as one never uses them to retrieve a character,
 which often helps avoid needing to code around edge cases.
 
@@ -181,6 +179,8 @@ firstindex(s::AbstractString) = 1
 lastindex(s::AbstractString) = thisind(s, ncodeunits(s)::Int)
 isempty(s::AbstractString) = iszero(ncodeunits(s)::Int)
 
+@propagate_inbounds first(s::AbstractString) = s[firstindex(s)]
+
 function getindex(s::AbstractString, i::Integer)
     @boundscheck checkbounds(s, i)
     @inbounds return isvalid(s, i) ? (iterate(s, i)::NTuple{2,Any})[1] : string_index_err(s, i)
@@ -229,7 +229,7 @@ Symbol(s::AbstractString) = Symbol(String(s))
 Symbol(x...) = Symbol(string(x...))
 
 convert(::Type{T}, s::T) where {T<:AbstractString} = s
-convert(::Type{T}, s::AbstractString) where {T<:AbstractString} = T(s)
+convert(::Type{T}, s::AbstractString) where {T<:AbstractString} = T(s)::T
 
 ## summary ##
 
@@ -243,9 +243,10 @@ end
 """
     *(s::Union{AbstractString, AbstractChar}, t::Union{AbstractString, AbstractChar}...) -> AbstractString
 
-Concatenate strings and/or characters, producing a [`String`](@ref). This is equivalent
-to calling the [`string`](@ref) function on the arguments. Concatenation of built-in
-string types always produces a value of type `String` but other string types may choose
+Concatenate strings and/or characters, producing a [`String`](@ref) or
+[`AnnotatedString`](@ref) (as appropriate). This is equivalent to calling the
+[`string`](@ref) or [`annotatedstring`](@ref) function on the arguments. Concatenation of built-in string
+types always produces a value of type `String` but other string types may choose
 to return a string of a different type as appropriate.
 
 # Examples
@@ -257,7 +258,15 @@ julia> 'j' * "ulia"
 "julia"
 ```
 """
-(*)(s1::Union{AbstractChar, AbstractString}, ss::Union{AbstractChar, AbstractString}...) = string(s1, ss...)
+function (*)(s1::Union{AbstractChar, AbstractString}, ss::Union{AbstractChar, AbstractString}...)
+    isannotated = s1 isa AnnotatedString || s1 isa AnnotatedChar ||
+        any(s -> s isa AnnotatedString || s isa AnnotatedChar, ss)
+    if isannotated
+        annotatedstring(s1, ss...)
+    else
+        string(s1, ss...)
+    end
+end
 
 one(::Union{T,Type{T}}) where {T<:AbstractString} = convert(T, "")
 
@@ -298,19 +307,21 @@ julia> cmp("b", "β")
 """
 function cmp(a::AbstractString, b::AbstractString)
     a === b && return 0
-    a, b = Iterators.Stateful(a), Iterators.Stateful(b)
-    for (c::AbstractChar, d::AbstractChar) in zip(a, b)
+    (iv1, iv2) = (iterate(a), iterate(b))
+    while iv1 !== nothing && iv2 !== nothing
+        (c, d) = (first(iv1)::AbstractChar, first(iv2)::AbstractChar)
         c ≠ d && return ifelse(c < d, -1, 1)
+        (iv1, iv2) = (iterate(a, last(iv1)), iterate(b, last(iv2)))
     end
-    isempty(a) && return ifelse(isempty(b), 0, -1)
-    return 1
+    return iv1 === nothing ? (iv2 === nothing ? 0 : -1) : 1
 end
 
 """
     ==(a::AbstractString, b::AbstractString) -> Bool
 
 Test whether two strings are equal character by character (technically, Unicode
-code point by code point).
+code point by code point). Should either string be a [`AnnotatedString`](@ref) the
+string properties must match too.
 
 # Examples
 ```jldoctest
@@ -345,7 +356,9 @@ isless(a::AbstractString, b::AbstractString) = cmp(a, b) < 0
 
 # faster comparisons for symbols
 
-cmp(a::Symbol, b::Symbol) = Int(sign(ccall(:strcmp, Int32, (Cstring, Cstring), a, b)))
+@assume_effects :total function cmp(a::Symbol, b::Symbol)
+    Int(sign(ccall(:strcmp, Int32, (Cstring, Cstring), a, b)))
+end
 
 isless(a::Symbol, b::Symbol) = cmp(a, b) < 0
 
@@ -610,6 +623,38 @@ isascii(c::Char) = bswap(reinterpret(UInt32, c)) < 0x80
 isascii(s::AbstractString) = all(isascii, s)
 isascii(c::AbstractChar) = UInt32(c) < 0x80
 
+@inline function _isascii(code_units::AbstractVector{CU}, first, last) where {CU}
+    r = zero(CU)
+    for n = first:last
+        @inbounds r |= code_units[n]
+    end
+    return 0 ≤ r < 0x80
+end
+
+#The chunking algorithm makes the last two chunks overlap inorder to keep the size fixed
+@inline function  _isascii_chunks(chunk_size,cu::AbstractVector{CU}, first,last) where {CU}
+    n=first
+    while n <= last - chunk_size
+        _isascii(cu,n,n+chunk_size-1) || return false
+        n += chunk_size
+    end
+    return  _isascii(cu,last-chunk_size+1,last)
+end
+"""
+    isascii(cu::AbstractVector{CU}) where {CU <: Integer} -> Bool
+
+Test whether all values in the vector belong to the ASCII character set (0x00 to 0x7f).
+This function is intended to be used by other string implementations that need a fast ASCII check.
+"""
+function isascii(cu::AbstractVector{CU}) where {CU <: Integer}
+    chunk_size = 1024
+    chunk_threshold =  chunk_size + (chunk_size ÷ 2)
+    first = firstindex(cu);   last = lastindex(cu)
+    l = last - first + 1
+    l < chunk_threshold && return _isascii(cu,first,last)
+    return _isascii_chunks(chunk_size,cu,first,last)
+end
+
 ## string map, filter ##
 
 function map(f, s::AbstractString)
@@ -633,7 +678,7 @@ function filter(f, s::AbstractString)
     for c in s
         f(c) && write(out, c)
     end
-    String(take!(out))
+    String(_unsafe_take!(out))
 end
 
 ## string first and last ##
@@ -715,7 +760,7 @@ julia> repeat("ha", 3)
 repeat(s::AbstractString, r::Integer) = repeat(String(s), r)
 
 """
-    ^(s::Union{AbstractString,AbstractChar}, n::Integer)
+    ^(s::Union{AbstractString,AbstractChar}, n::Integer) -> AbstractString
 
 Repeat a string or character `n` times. This can also be written as `repeat(s, n)`.
 
@@ -757,8 +802,8 @@ IndexStyle(::Type{<:CodeUnits}) = IndexLinear()
 
 write(io::IO, s::CodeUnits) = write(io, s.s)
 
-unsafe_convert(::Type{Ptr{T}},    s::CodeUnits{T}) where {T} = unsafe_convert(Ptr{T}, s.s)
-unsafe_convert(::Type{Ptr{Int8}}, s::CodeUnits{UInt8}) = unsafe_convert(Ptr{Int8}, s.s)
+cconvert(::Type{Ptr{T}},    s::CodeUnits{T}) where {T} = cconvert(Ptr{T}, s.s)
+cconvert(::Type{Ptr{Int8}}, s::CodeUnits{UInt8}) = cconvert(Ptr{Int8}, s.s)
 
 """
     codeunits(s::AbstractString)
diff --git a/base/strings/cstring.jl b/base/strings/cstring.jl
new file mode 100644
index 0000000000000..3a377ab0e7b1e
--- /dev/null
+++ b/base/strings/cstring.jl
@@ -0,0 +1,314 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+import Core.Intrinsics: bitcast
+
+"""
+    Cwstring
+
+A C-style string composed of the native wide character type
+[`Cwchar_t`](@ref)s. `Cwstring`s are NUL-terminated. For
+C-style strings composed of the native character
+type, see [`Cstring`](@ref). For more information
+about string interoperability with C, see the
+[manual](@ref man-bits-types).
+
+"""
+Cwstring
+
+"""
+    Cstring
+
+A C-style string composed of the native character type
+[`Cchar`](@ref)s. `Cstring`s are NUL-terminated. For
+C-style strings composed of the native wide character
+type, see [`Cwstring`](@ref). For more information
+about string interoperability with C, see the
+[manual](@ref man-bits-types).
+"""
+Cstring
+
+# construction from pointers
+Cstring(p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = bitcast(Cstring, p)
+Cwstring(p::Union{Ptr{Cwchar_t},Ptr{Cvoid}})       = bitcast(Cwstring, p)
+Ptr{T}(p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = bitcast(Ptr{T}, p)
+Ptr{T}(p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}}  = bitcast(Ptr{Cwchar_t}, p)
+
+convert(::Type{Cstring}, p::Union{Ptr{Int8},Ptr{UInt8},Ptr{Cvoid}}) = Cstring(p)
+convert(::Type{Cwstring}, p::Union{Ptr{Cwchar_t},Ptr{Cvoid}}) = Cwstring(p)
+convert(::Type{Ptr{T}}, p::Cstring) where {T<:Union{Int8,UInt8,Cvoid}} = Ptr{T}(p)
+convert(::Type{Ptr{T}}, p::Cwstring) where {T<:Union{Cwchar_t,Cvoid}} = Ptr{T}(p)
+
+"""
+    pointer(array [, index])
+
+Get the native address of an array or string, optionally at a given location `index`.
+
+This function is "unsafe". Be careful to ensure that a Julia reference to
+`array` exists as long as this pointer will be used. The [`GC.@preserve`](@ref)
+macro should be used to protect the `array` argument from garbage collection
+within a given block of code.
+
+Calling [`Ref(array[, index])`](@ref Ref) is generally preferable to this function as it guarantees validity.
+"""
+function pointer end
+
+pointer(p::Cstring) = convert(Ptr{Cchar}, p)
+pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
+
+# comparisons against pointers (mainly to support `cstr==C_NULL`)
+==(x::Union{Cstring,Cwstring}, y::Ptr) = pointer(x) == y
+==(x::Ptr, y::Union{Cstring,Cwstring}) = x == pointer(y)
+
+unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s))
+
+# convert strings to String etc. to pass as pointers
+cconvert(::Type{Cstring}, s::String) = s
+cconvert(::Type{Cstring}, s::AbstractString) =
+    cconvert(Cstring, String(s)::String)
+
+function cconvert(::Type{Cwstring}, s::AbstractString)
+    v = transcode(Cwchar_t, String(s))
+    push!(v, 0)
+    return cconvert(Cwstring, v)
+end
+
+eltype(::Type{Cstring}) = Cchar
+eltype(::Type{Cwstring}) = Cwchar_t
+
+containsnul(p::Ptr, len) =
+    C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len)
+containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
+containsnul(s::AbstractString) = '\0' in s
+
+function unsafe_convert(::Type{Cstring}, s::String)
+    p = unsafe_convert(Ptr{Cchar}, s)
+    containsnul(p, sizeof(s)) &&
+        throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
+    return Cstring(p)
+end
+
+unsafe_convert(::Type{Cstring}, s::Union{Memory{UInt8},Memory{Int8}}) = Cstring(unsafe_convert(Ptr{Cvoid}, s))
+
+function cconvert(::Type{Cwstring}, v::Vector{Cwchar_t})
+    for i = 1:length(v)-1
+        v[i] == 0 &&
+            throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(v))"))
+    end
+    v[end] == 0 ||
+        throw(ArgumentError("C string data must be NUL terminated: $(repr(v))"))
+    return cconvert(Ptr{Cwchar_t}, v)
+end
+unsafe_convert(::Type{Cwstring}, s) = Cwstring(unsafe_convert(Ptr{Cwchar_t}, s))
+unsafe_convert(::Type{Cwstring}, s::Cwstring) = s
+
+# symbols are guaranteed not to contain embedded NUL
+cconvert(::Type{Cstring}, s::Symbol) = s
+unsafe_convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))
+
+if ccall(:jl_get_UNAME, Any, ()) === :NT
+"""
+    Base.cwstring(s)
+
+Converts a string `s` to a NUL-terminated `Vector{Cwchar_t}`, suitable for passing to C
+functions expecting a `Ptr{Cwchar_t}`. The main advantage of using this over the implicit
+conversion provided by [`Cwstring`](@ref) is if the function is called multiple times with the
+same argument.
+
+This is only available on Windows.
+"""
+function cwstring(s::AbstractString)
+    bytes = codeunits(String(s))
+    0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
+    return push!(transcode(UInt16, bytes), 0)
+end
+end
+
+# transcoding between data in UTF-8 and UTF-16 for Windows APIs,
+# and also UTF-32 for APIs using Cwchar_t on other platforms.
+
+"""
+    transcode(T, src)
+
+Convert string data between Unicode encodings. `src` is either a
+`String` or a `Vector{UIntXX}` of UTF-XX code units, where
+`XX` is 8, 16, or 32. `T` indicates the encoding of the return value:
+`String` to return a (UTF-8 encoded) `String` or `UIntXX`
+to return a `Vector{UIntXX}` of UTF-`XX` data. (The alias [`Cwchar_t`](@ref)
+can also be used as the integer type, for converting `wchar_t*` strings
+used by external C libraries.)
+
+The `transcode` function succeeds as long as the input data can be
+reasonably represented in the target encoding; it always succeeds for
+conversions between UTF-XX encodings, even for invalid Unicode data.
+
+Only conversion to/from UTF-8 is currently supported.
+
+# Examples
+```jldoctest
+julia> str = "αβγ"
+"αβγ"
+
+julia> transcode(UInt16, str)
+3-element Vector{UInt16}:
+ 0x03b1
+ 0x03b2
+ 0x03b3
+
+julia> transcode(String, transcode(UInt16, str))
+"αβγ"
+```
+"""
+function transcode end
+
+transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
+transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
+transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} =
+    transcode(T, String(Vector(src)))
+transcode(::Type{T}, src::CodeUnits{UInt8,String}) where {T<:Union{Int32,UInt32}} =
+    transcode(T, String(src))
+
+function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
+    buf = IOBuffer()
+    for c in src
+        print(buf, Char(c))
+    end
+    take!(buf)
+end
+transcode(::Type{String}, src::String) = src
+transcode(T, src::String) = transcode(T, codeunits(src))
+transcode(::Type{String}, src) = String(transcode(UInt8, src))
+
+function transcode(::Type{UInt16}, src::AbstractVector{UInt8})
+    require_one_based_indexing(src)
+    dst = UInt16[]
+    i, n = 1, length(src)
+    n > 0 || return dst
+    sizehint!(dst, 2n)
+    a = src[1]
+    while true
+        if i < n && -64 <= a % Int8 <= -12 # multi-byte character
+            b = src[i += 1]
+            if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
+                # invalid UTF-8 (non-continuation or too-high code point)
+                push!(dst, a)
+                a = b; continue
+            elseif a < 0xe0 # 2-byte UTF-8
+                push!(dst, xor(0x3080, UInt16(a) << 6, b))
+            elseif i < n # 3/4-byte character
+                c = src[i += 1]
+                if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
+                    push!(dst, a, b)
+                    a = c; continue
+                elseif a < 0xf0 # 3-byte UTF-8
+                    push!(dst, xor(0x2080, UInt16(a) << 12, UInt16(b) << 6, c))
+                elseif i < n
+                    d = src[i += 1]
+                    if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
+                        push!(dst, a, b, c)
+                        a = d; continue
+                    elseif a == 0xf0 && b < 0x90 # overlong encoding
+                        push!(dst, xor(0x2080, UInt16(b) << 12, UInt16(c) << 6, d))
+                    else # 4-byte UTF-8
+                        push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
+                                   xor(0xdc80, UInt16(c & 0xf) << 6, d))
+                    end
+                else # too short
+                    push!(dst, a, b, c)
+                    break
+                end
+            else # too short
+                push!(dst, a, b)
+                break
+            end
+        else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
+            push!(dst, a)
+        end
+        i < n || break
+        a = src[i += 1]
+    end
+    return dst
+end
+
+function transcode(::Type{UInt8}, src::AbstractVector{UInt16})
+    require_one_based_indexing(src)
+    n = length(src)
+    n == 0 && return UInt8[]
+
+    # Precompute m = sizeof(dst).   This involves annoying duplication
+    # of the loop over the src array.   However, this is not just an
+    # optimization: it is problematic for security reasons to grow
+    # dst dynamically, because Base.winprompt uses this function to
+    # convert passwords to UTF-8 and we don't want to make unintentional
+    # copies of the password data.
+    a = src[1]
+    i, m = 1, 0
+    while true
+        if a < 0x80
+            m += 1
+        elseif a < 0x800 # 2-byte UTF-8
+            m += 2
+        elseif a & 0xfc00 == 0xd800 && i < length(src)
+            b = src[i += 1]
+            if (b & 0xfc00) == 0xdc00 # 2-unit UTF-16 sequence => 4-byte UTF-8
+                m += 4
+            else
+                m += 3
+                a = b; continue
+            end
+        else
+            # 1-unit high UTF-16 or unpaired high surrogate
+            # either way, encode as 3-byte UTF-8 code point
+            m += 3
+        end
+        i < n || break
+        a = src[i += 1]
+    end
+
+    dst = StringVector(m)
+    a = src[1]
+    i, j = 1, 0
+    while true
+        if a < 0x80 # ASCII
+            dst[j += 1] = a % UInt8
+        elseif a < 0x800 # 2-byte UTF-8
+            dst[j += 1] = 0xc0 | ((a >> 6) % UInt8)
+            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
+        elseif a & 0xfc00 == 0xd800 && i < n
+            b = src[i += 1]
+            if (b & 0xfc00) == 0xdc00
+                # 2-unit UTF-16 sequence => 4-byte UTF-8
+                a += 0x2840
+                dst[j += 1] = 0xf0 | ((a >> 8) % UInt8)
+                dst[j += 1] = 0x80 | ((a % UInt8) >> 2)
+                dst[j += 1] = xor(0xf0, ((a % UInt8) << 4) & 0x3f, (b >> 6) % UInt8)
+                dst[j += 1] = 0x80 | ((b % UInt8) & 0x3f)
+            else
+                dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
+                dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
+                dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
+                a = b; continue
+            end
+        else
+            # 1-unit high UTF-16 or unpaired high surrogate
+            # either way, encode as 3-byte UTF-8 code point
+            dst[j += 1] = 0xe0 | ((a >> 12) % UInt8)
+            dst[j += 1] = 0x80 | (((a >> 6) % UInt8) & 0x3f)
+            dst[j += 1] = 0x80 | ((a % UInt8) & 0x3f)
+        end
+        i < n || break
+        a = src[i += 1]
+    end
+    return dst
+end
+
+function unsafe_string(p::Ptr{T}, length::Integer) where {T<:Union{UInt16,UInt32,Cwchar_t}}
+    transcode(String, unsafe_wrap(Array, p, length; own=false))
+end
+function unsafe_string(cw::Cwstring)
+    p = convert(Ptr{Cwchar_t}, cw)
+    n = 1
+    while unsafe_load(p, n) != 0
+        n += 1
+    end
+    return unsafe_string(p, n - 1)
+end
diff --git a/base/strings/io.jl b/base/strings/io.jl
index d1bf7a763e93c..c1e45775f6ca0 100644
--- a/base/strings/io.jl
+++ b/base/strings/io.jl
@@ -113,7 +113,7 @@ function sprint(f::Function, args...; context=nothing, sizehint::Integer=0)
     else
         f(s, args...)
     end
-    String(resize!(s.data, s.size))
+    String(_unsafe_take!(s))
 end
 
 function _str_sizehint(x)
@@ -125,6 +125,10 @@ function _str_sizehint(x)
         return sizeof(x)
     elseif x isa Char
         return ncodeunits(x)
+    elseif x isa UInt64 || x isa UInt32
+        return ndigits(x)
+    elseif x isa Int64 || x isa Int32
+        return ndigits(x) + (x < zero(x))
     else
         return 8
     end
@@ -143,7 +147,7 @@ function print_to_string(xs...)
     for x in xs
         print(s, x)
     end
-    String(resize!(s.data, s.size))
+    String(_unsafe_take!(s))
 end
 
 function string_with_env(env, xs...)
@@ -160,7 +164,7 @@ function string_with_env(env, xs...)
     for x in xs
         print(env_io, x)
     end
-    String(resize!(s.data, s.size))
+    String(_unsafe_take!(s))
 end
 
 """
@@ -201,7 +205,7 @@ function show(
 )
     # compute limit in default case
     if limit === nothing
-        get(io, :limit, false) || return show(io, str)
+        get(io, :limit, false)::Bool || return show(io, str)
         limit = max(20, displaysize(io)[2])
         # one line in collection, seven otherwise
         get(io, :typeinfo, nothing) === nothing && (limit *= 7)
@@ -233,7 +237,7 @@ function show(
     if 4t ≤ n || t ≤ n && t ≤ length(str, head, tail-1)
         skip = skip_text(n)
         show(io, SubString(str, 1:prevind(str, head)))
-        print(io, skip) # TODO: bold styled
+        printstyled(io, skip; color=:light_yellow, bold=true)
         show(io, SubString(str, tail))
     else
         show(io, str)
@@ -349,9 +353,31 @@ function join(io::IO, iterator, delim="")
     end
 end
 
-join(iterator) = sprint(join, iterator)
-join(iterator, delim) = sprint(join, iterator, delim)
-join(iterator, delim, last) = sprint(join, iterator, delim, last)
+# TODO: If/when we have `AnnotatedIO`, we can revisit this and
+# implement it more nicely.
+function join_annotated(iterator, delim="", last=delim)
+    xs = zip(iterator, Iterators.repeated(delim)) |> Iterators.flatten |> collect
+    xs = xs[1:end-1]
+    if length(xs) > 1
+        xs[end-1] = last
+    end
+    annotatedstring(xs...)::AnnotatedString{String}
+end
+
+function _join_maybe_annotated(args...)
+    if any(function (arg)
+               t = eltype(arg)
+               !(t == Union{}) && (t <: AnnotatedString || t <: AnnotatedChar)
+           end, args)
+        join_annotated(args...)
+    else
+        sprint(join, args...)
+    end
+end
+
+join(iterator) = _join_maybe_annotated(iterator)
+join(iterator, delim) = _join_maybe_annotated(iterator, delim)
+join(iterator, delim, last) = _join_maybe_annotated(iterator, delim, last)
 
 ## string escaping & unescaping ##
 
@@ -586,14 +612,14 @@ julia> println(raw"\\\\x \\\\\\"")
 macro raw_str(s); s; end
 
 """
-    escape_raw_string(s::AbstractString)
-    escape_raw_string(io, s::AbstractString)
+    escape_raw_string(s::AbstractString, delim='"') -> AbstractString
+    escape_raw_string(io, s::AbstractString, delim='"')
 
 Escape a string in the manner used for parsing raw string literals.
-For each double-quote (`"`) character in input string `s`, this
-function counts the number _n_ of preceding backslash (`\\`) characters,
-and then increases there the number of backslashes from _n_ to 2_n_+1
-(even for _n_ = 0). It also doubles a sequence of backslashes at the end
+For each double-quote (`"`) character in input string `s` (or `delim` if
+specified), this function counts the number _n_ of preceding backslash (`\\`)
+characters, and then increases there the number of backslashes from _n_ to
+2_n_+1 (even for _n_ = 0). It also doubles a sequence of backslashes at the end
 of the string.
 
 This escaping convention is used in raw strings and other non-standard
@@ -603,36 +629,41 @@ command-line string into the argv[] array.)
 
 See also [`escape_string`](@ref).
 """
-function escape_raw_string(io, str::AbstractString)
+function escape_raw_string(io::IO, str::AbstractString, delim::Char='"')
+    total = 0
     escapes = 0
     for c in str
         if c == '\\'
             escapes += 1
         else
-            if c == '"'
+            if c == delim
                 # if one or more backslashes are followed by
                 # a double quote then escape all backslashes
                 # and the double quote
-                escapes = escapes * 2 + 1
-            end
-            while escapes > 0
-                write(io, '\\')
-                escapes -= 1
+                escapes += 1
+                total += escapes
+                while escapes > 0
+                    write(io, '\\')
+                    escapes -= 1
+                end
             end
             escapes = 0
-            write(io, c)
         end
+        write(io, c)
     end
     # also escape any trailing backslashes,
     # so they do not affect the closing quote
+    total += escapes
     while escapes > 0
-        write(io, '\\')
         write(io, '\\')
         escapes -= 1
     end
+    total
+end
+function escape_raw_string(str::AbstractString, delim::Char='"')
+    total = escape_raw_string(devnull, str, delim) # check whether the string even needs to be copied and how much to allocate for it
+    return total == 0 ? str : sprint(escape_raw_string, str, delim; sizehint = sizeof(str) + total)
 end
-escape_raw_string(str::AbstractString) = sprint(escape_raw_string, str;
-                                                sizehint = lastindex(str) + 2)
 
 ## multiline strings ##
 
@@ -760,3 +791,26 @@ function String(chars::AbstractVector{<:AbstractChar})
         end
     end
 end
+
+function AnnotatedString(chars::AbstractVector{C}) where {C<:AbstractChar}
+    str = if C <: AnnotatedChar
+        String(getfield.(chars, :char))
+    else
+        sprint(sizehint=length(chars)) do io
+            for c in chars
+                print(io, c)
+            end
+        end
+    end
+    annots = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[]
+    point = 1
+    for c in chars
+        if c isa AnnotatedChar
+            for annot in c.annotations
+                push!(annots, (point:point, annot))
+            end
+        end
+        point += ncodeunits(c)
+    end
+    AnnotatedString(str, annots)
+end
diff --git a/base/strings/lazy.jl b/base/strings/lazy.jl
index 3510afc9b4f11..eaaa6397d37f2 100644
--- a/base/strings/lazy.jl
+++ b/base/strings/lazy.jl
@@ -67,7 +67,7 @@ macro lazy_str(text)
     parts = Any[]
     lastidx = idx = 1
     while (idx = findnext('$', text, idx)) !== nothing
-        lastidx < idx && push!(parts, text[lastidx:idx-1])
+        lastidx < idx && push!(parts, text[lastidx:prevind(text, idx)])
         idx += 1
         expr, idx = Meta.parseatom(text, idx; filename=string(__source__.file))
         push!(parts, esc(expr))
diff --git a/base/strings/search.jl b/base/strings/search.jl
index eade1fbe74158..e2b3dc96b98cf 100644
--- a/base/strings/search.jl
+++ b/base/strings/search.jl
@@ -13,7 +13,7 @@ abstract type AbstractPattern end
 nothing_sentinel(i) = i == 0 ? nothing : i
 
 function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
-                  s::String, i::Integer)
+                  s::Union{String, SubString{String}}, i::Integer)
     if i < 1 || i > sizeof(s)
         i == sizeof(s) + 1 && return nothing
         throw(BoundsError(s, i))
@@ -38,7 +38,7 @@ findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a:
 findfirst(::typeof(iszero), a::ByteArray) = nothing_sentinel(_search(a, zero(UInt8)))
 findnext(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_search(a, zero(UInt8), i))
 
-function _search(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = 1)
+function _search(a::Union{String,SubString{String},ByteArray}, b::Union{Int8,UInt8}, i::Integer = 1)
     if i < 1
         throw(BoundsError(a, i))
     end
@@ -55,7 +55,7 @@ function _search(a::ByteArray, b::AbstractChar, i::Integer = 1)
     if isascii(b)
         _search(a,UInt8(b),i)
     else
-        _search(a,unsafe_wrap(Vector{UInt8},string(b)),i).start
+        _search(a,codeunits(string(b)),i).start
     end
 end
 
@@ -98,7 +98,7 @@ function _rsearch(a::ByteArray, b::AbstractChar, i::Integer = length(a))
     if isascii(b)
         _rsearch(a,UInt8(b),i)
     else
-        _rsearch(a,unsafe_wrap(Vector{UInt8},string(b)),i).start
+        _rsearch(a,codeunits(string(b)),i).start
     end
 end
 
@@ -189,7 +189,7 @@ function _searchindex(s::Union{AbstractString,ByteArray},
         if i === nothing return 0 end
         ii = nextind(s, i)::Int
         a = Iterators.Stateful(trest)
-        matched = all(Splat(==), zip(SubString(s, ii), a))
+        matched = all(splat(==), zip(SubString(s, ii), a))
         (isempty(a) && matched) && return i
         i = ii
     end
@@ -201,13 +201,13 @@ function _search_bloom_mask(c)
     UInt64(1) << (c & 63)
 end
 
-_nthbyte(s::String, i) = codeunit(s, i)
+_nthbyte(s::Union{String, SubString{String}}, i) = codeunit(s, i)
 _nthbyte(t::AbstractVector, index) = t[index + (firstindex(t)-1)]
 
-function _searchindex(s::String, t::String, i::Integer)
+function _searchindex(s::Union{String, SubString{String}}, t::Union{String, SubString{String}}, i::Integer)
     # Check for fast case of a single byte
     lastindex(t) == 1 && return something(findnext(isequal(t[1]), s, i), 0)
-    _searchindex(unsafe_wrap(Vector{UInt8},s), unsafe_wrap(Vector{UInt8},t), i)
+    _searchindex(codeunits(s), codeunits(t), i)
 end
 
 function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}},
@@ -506,7 +506,7 @@ function _rsearchindex(s::AbstractString,
         a = Iterators.Stateful(trest)
         b = Iterators.Stateful(Iterators.reverse(
             pairs(SubString(s, 1, ii))))
-        matched = all(Splat(==), zip(a, (x[2] for x in b)))
+        matched = all(splat(==), zip(a, (x[2] for x in b)))
         if matched && isempty(a)
             isempty(b) && return firstindex(s)
             return nextind(s, popfirst!(b)[1])::Int
@@ -521,7 +521,7 @@ function _rsearchindex(s::String, t::String, i::Integer)
         return something(findprev(isequal(t[1]), s, i), 0)
     elseif lastindex(t) != 0
         j = i ≤ ncodeunits(s) ? nextind(s, i)-1 : i
-        return _rsearchindex(unsafe_wrap(Vector{UInt8}, s), unsafe_wrap(Vector{UInt8}, t), j)
+        return _rsearchindex(codeunits(s), codeunits(t), j)
     elseif i > sizeof(s)
         return 0
     elseif i == 0
@@ -709,6 +709,17 @@ The returned function is of type `Base.Fix2{typeof(occursin)}`.
 
 !!! compat "Julia 1.6"
     This method requires Julia 1.6 or later.
+
+# Examples
+```jldoctest
+julia> search_f = occursin("JuliaLang is a programming language");
+
+julia> search_f("JuliaLang")
+true
+
+julia> search_f("Python")
+false
+```
 """
 occursin(haystack) = Base.Fix2(occursin, haystack)
 
diff --git a/base/strings/string.jl b/base/strings/string.jl
index e44746f9834d9..29216ae97aa37 100644
--- a/base/strings/string.jl
+++ b/base/strings/string.jl
@@ -17,10 +17,12 @@ function Base.showerror(io::IO, exc::StringIndexError)
     if firstindex(s) <= exc.index <= ncodeunits(s)
         iprev = thisind(s, exc.index)
         inext = nextind(s, iprev)
+        escprev = escape_string(s[iprev:iprev])
         if inext <= ncodeunits(s)
-            print(io, ", valid nearby indices [$iprev]=>'$(s[iprev])', [$inext]=>'$(s[inext])'")
+            escnext = escape_string(s[inext:inext])
+            print(io, ", valid nearby indices [$iprev]=>'$escprev', [$inext]=>'$escnext'")
         else
-            print(io, ", valid nearby index [$iprev]=>'$(s[iprev])'")
+            print(io, ", valid nearby index [$iprev]=>'$escprev'")
         end
     end
 end
@@ -85,7 +87,7 @@ end
 
 # This is @assume_effects :effect_free :nothrow :terminates_globally @ccall jl_alloc_string(n::Csize_t)::Ref{String},
 # but the macro is not available at this time in bootstrap, so we write it manually.
-@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String}, Expr(:call, Expr(:core, :svec), :Csize_t), 1, QuoteNode((:ccall,0xe)), :(convert(Csize_t, n))))
+@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String}, Expr(:call, Expr(:core, :svec), :Csize_t), 1, QuoteNode((:ccall,0x000e)), :(convert(Csize_t, n))))
 
 """
     String(s::AbstractString)
@@ -96,6 +98,7 @@ String(s::AbstractString) = print_to_string(s)
 @assume_effects :total String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s))
 
 unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s)
+unsafe_wrap(::Type{Vector{UInt8}}, s::FastContiguousSubArray{UInt8,1,Vector{UInt8}}) = unsafe_wrap(Vector{UInt8}, pointer(s), size(s))
 
 Vector{UInt8}(s::CodeUnits{UInt8,String}) = copyto!(Vector{UInt8}(undef, length(s)), s)
 Vector{UInt8}(s::String) = Vector{UInt8}(codeunits(s))
@@ -111,7 +114,8 @@ pointer(s::String, i::Integer) = pointer(s) + Int(i)::Int - 1
 ncodeunits(s::String) = Core.sizeof(s)
 codeunit(s::String) = UInt8
 
-@inline function codeunit(s::String, i::Integer)
+codeunit(s::String, i::Integer) = codeunit(s, Int(i))
+@assume_effects :foldable @inline function codeunit(s::String, i::Int)
     @boundscheck checkbounds(s, i)
     b = GC.@preserve s unsafe_load(pointer(s, i))
     return b
@@ -119,20 +123,24 @@ end
 
 ## comparison ##
 
-_memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}, len) =
-    ccall(:memcmp, Cint, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), a, b, len % Csize_t) % Int
+@assume_effects :total _memcmp(a::String, b::String) = @invoke _memcmp(a::Union{Ptr{UInt8},AbstractString},b::Union{Ptr{UInt8},AbstractString})
+
+_memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}) = _memcmp(a, b, min(sizeof(a), sizeof(b)))
+function _memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}, len::Int)
+    GC.@preserve a b begin
+        pa = unsafe_convert(Ptr{UInt8}, a)
+        pb = unsafe_convert(Ptr{UInt8}, b)
+        memcmp(pa, pb, len % Csize_t) % Int
+    end
+end
 
 function cmp(a::String, b::String)
     al, bl = sizeof(a), sizeof(b)
-    c = _memcmp(a, b, min(al,bl))
+    c = _memcmp(a, b)
     return c < 0 ? -1 : c > 0 ? +1 : cmp(al,bl)
 end
 
-function ==(a::String, b::String)
-    pointer_from_objref(a) == pointer_from_objref(b) && return true
-    al = sizeof(a)
-    return al == sizeof(b) && 0 == _memcmp(a, b, al)
-end
+==(a::String, b::String) = a===b
 
 typemin(::Type{String}) = ""
 typemin(::String) = typemin(String)
@@ -149,15 +157,18 @@ typemin(::String) = typemin(String)
     @boundscheck between(i, 1, n) || throw(BoundsError(s, i))
     @inbounds b = codeunit(s, i)
     (b & 0xc0 == 0x80) & (i-1 > 0) || return i
-    @inbounds b = codeunit(s, i-1)
-    between(b, 0b11000000, 0b11110111) && return i-1
-    (b & 0xc0 == 0x80) & (i-2 > 0) || return i
-    @inbounds b = codeunit(s, i-2)
-    between(b, 0b11100000, 0b11110111) && return i-2
-    (b & 0xc0 == 0x80) & (i-3 > 0) || return i
-    @inbounds b = codeunit(s, i-3)
-    between(b, 0b11110000, 0b11110111) && return i-3
-    return i
+    (@noinline function _thisind_continued(s, i, n) # mark the rest of the function as a slow-path
+        local b
+        @inbounds b = codeunit(s, i-1)
+        between(b, 0b11000000, 0b11110111) && return i-1
+        (b & 0xc0 == 0x80) & (i-2 > 0) || return i
+        @inbounds b = codeunit(s, i-2)
+        between(b, 0b11100000, 0b11110111) && return i-2
+        (b & 0xc0 == 0x80) & (i-3 > 0) || return i
+        @inbounds b = codeunit(s, i-3)
+        between(b, 0b11110000, 0b11110111) && return i-3
+        return i
+    end)(s, i, n)
 end
 
 @propagate_inbounds nextind(s::String, i::Int) = _nextind_str(s, i)
@@ -168,35 +179,229 @@ end
     n = ncodeunits(s)
     @boundscheck between(i, 1, n) || throw(BoundsError(s, i))
     @inbounds l = codeunit(s, i)
-    (l < 0x80) | (0xf8 ≤ l) && return i+1
-    if l < 0xc0
-        i′ = @inbounds thisind(s, i)
-        return i′ < i ? @inbounds(nextind(s, i′)) : i+1
-    end
-    # first continuation byte
-    (i += 1) > n && return i
-    @inbounds b = codeunit(s, i)
-    b & 0xc0 ≠ 0x80 && return i
-    ((i += 1) > n) | (l < 0xe0) && return i
-    # second continuation byte
-    @inbounds b = codeunit(s, i)
-    b & 0xc0 ≠ 0x80 && return i
-    ((i += 1) > n) | (l < 0xf0) && return i
-    # third continuation byte
-    @inbounds b = codeunit(s, i)
-    ifelse(b & 0xc0 ≠ 0x80, i, i+1)
+    between(l, 0x80, 0xf7) || return i+1
+    (@noinline function _nextind_continued(s, i, n, l) # mark the rest of the function as a slow-path
+        if l < 0xc0
+            # handle invalid codeunit index by scanning back to the start of this index
+            # (which may be the same as this index)
+            i′ = @inbounds thisind(s, i)
+            i′ >= i && return i+1
+            i = i′
+            @inbounds l = codeunit(s, i)
+            (l < 0x80) | (0xf8 ≤ l) && return i+1
+            @assert l >= 0xc0
+        end
+        # first continuation byte
+        (i += 1) > n && return i
+        @inbounds b = codeunit(s, i)
+        b & 0xc0 ≠ 0x80 && return i
+        ((i += 1) > n) | (l < 0xe0) && return i
+        # second continuation byte
+        @inbounds b = codeunit(s, i)
+        b & 0xc0 ≠ 0x80 && return i
+        ((i += 1) > n) | (l < 0xf0) && return i
+        # third continuation byte
+        @inbounds b = codeunit(s, i)
+        return ifelse(b & 0xc0 ≠ 0x80, i, i+1)
+    end)(s, i, n, l)
 end
 
 ## checking UTF-8 & ACSII validity ##
+#=
+    The UTF-8 Validation is performed by a shift based DFA.
+    ┌───────────────────────────────────────────────────────────────────┐
+    │    UTF-8 DFA State Diagram    ┌──────────────2──────────────┐     │
+    │                               ├────────3────────┐           │     │
+    │                 ┌──────────┐  │     ┌─┐        ┌▼┐          │     │
+    │      ASCII      │  UTF-8   │  ├─5──►│9├───1────► │          │     │
+    │                 │          │  │     ├─┤        │ │         ┌▼┐    │
+    │                 │  ┌─0─┐   │  ├─6──►│8├─1,7,9──►4├──1,7,9──► │    │
+    │      ┌─0─┐      │  │   │   │  │     ├─┤        │ │         │ │    │
+    │      │   │      │ ┌▼───┴┐  │  ├─11─►│7├──7,9───► │ ┌───────►3├─┐  │
+    │     ┌▼───┴┐     │ │     │  ▼  │     └─┘        └─┘ │       │ │ │  │
+    │     │  0  ├─────┘ │  1  ├─► ──┤                    │  ┌────► │ │  │
+    │     └─────┘       │     │     │     ┌─┐            │  │    └─┘ │  │
+    │                   └──▲──┘     ├─10─►│5├─────7──────┘  │        │  │
+    │                      │        │     ├─┤               │        │  │
+    │                      │        └─4──►│6├─────1,9───────┘        │  │
+    │          INVALID     │              └─┘                        │  │
+    │           ┌─*─┐      └──────────────────1,7,9──────────────────┘  │
+    │          ┌▼───┴┐                                                  │
+    │          │  2  ◄─── All undefined transitions result in state 2   │
+    │          └─────┘                                                  │
+    └───────────────────────────────────────────────────────────────────┘
+
+        Validation States
+            0 -> _UTF8_DFA_ASCII is the start state and will only stay in this state if the string is only ASCII characters
+                        If the DFA ends in this state the string is ASCII only
+            1 -> _UTF8_DFA_ACCEPT is the valid complete character state of the DFA once it has encountered a UTF-8 Unicode character
+            2 -> _UTF8_DFA_INVALID is only reached by invalid bytes and once in this state it will not change
+                    as seen by all 1s in that column of table below
+            3 -> One valid continuation byte needed to return to state 0
+        4,5,6 -> Two valid continuation bytes needed to return to state 0
+        7,8,9 -> Three valids continuation bytes needed to return to state 0
+
+                        Current State
+                    0̲  1̲  2̲  3̲  4̲  5̲  6̲  7̲  8̲  9̲
+                0 | 0  1  2  2  2  2  2  2  2  2
+                1 | 2  2  2  1  3  2  3  2  4  4
+                2 | 3  3  2  2  2  2  2  2  2  2
+                3 | 4  4  2  2  2  2  2  2  2  2
+                4 | 6  6  2  2  2  2  2  2  2  2
+    Character   5 | 9  9  2  2  2  2  2  2  2  2     <- Next State
+    Class       6 | 8  8  2  2  2  2  2  2  2  2
+                7 | 2  2  2  1  3  3  2  4  4  2
+                8 | 2  2  2  2  2  2  2  2  2  2
+                9 | 2  2  2  1  3  2  3  4  4  2
+               10 | 5  5  2  2  2  2  2  2  2  2
+               11 | 7  7  2  2  2  2  2  2  2  2
+
+           Shifts | 0  4 10 14 18 24  8 20 12 26
+
+    The shifts that represent each state were derived using the SMT solver Z3, to ensure when encoded into
+    the rows the correct shift was a result.
+
+    Each character class row is encoding 10 states with shifts as defined above. By shifting the bitsof a row by
+    the current state then masking the result with 0x11110 give the shift for the new state
+
+
+=#
+
+#State type used by UTF-8 DFA
+const _UTF8DFAState = UInt32
+# Fill the table with 256 UInt64 representing the DFA transitions for all bytes
+const _UTF8_DFA_TABLE = let # let block rather than function doesn't pollute base
+    num_classes=12
+    num_states=10
+    bit_per_state = 6
+
+    # These shifts were derived using a SMT solver
+    state_shifts = [0, 4, 10, 14, 18, 24, 8, 20, 12, 26]
+
+    character_classes = [   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                            9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+                            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+                            7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+                            8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                            10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
+                            11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ]
+
+    # These are the rows discussed in comments above
+    state_arrays = [ 0  1  2  2  2  2  2  2  2  2;
+                     2  2  2  1  3  2  3  2  4  4;
+                     3  3  2  2  2  2  2  2  2  2;
+                     4  4  2  2  2  2  2  2  2  2;
+                     6  6  2  2  2  2  2  2  2  2;
+                     9  9  2  2  2  2  2  2  2  2;
+                     8  8  2  2  2  2  2  2  2  2;
+                     2  2  2  1  3  3  2  4  4  2;
+                     2  2  2  2  2  2  2  2  2  2;
+                     2  2  2  1  3  2  3  4  4  2;
+                     5  5  2  2  2  2  2  2  2  2;
+                     7  7  2  2  2  2  2  2  2  2]
+
+    #This converts the state_arrays into the shift encoded _UTF8DFAState
+    class_row = zeros(_UTF8DFAState, num_classes)
+
+    for i = 1:num_classes
+        row = _UTF8DFAState(0)
+        for j in 1:num_states
+            #Calculate the shift required for the next state
+            to_shift = UInt8((state_shifts[state_arrays[i,j]+1]) )
+            #Shift the next state into the position of the current state
+            row = row | (_UTF8DFAState(to_shift) << state_shifts[j])
+        end
+        class_row[i]=row
+    end
+
+    map(c->class_row[c+1],character_classes)
+end
+
+
+const _UTF8_DFA_ASCII = _UTF8DFAState(0) #This state represents the start and end of any valid string
+const _UTF8_DFA_ACCEPT = _UTF8DFAState(4) #This state represents the start and end of any valid string
+const _UTF8_DFA_INVALID = _UTF8DFAState(10) # If the state machine is ever in this state just stop
 
-byte_string_classify(s::Union{String,Vector{UInt8},FastContiguousSubArray{UInt8,1,Vector{UInt8}}}) =
-    ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s))
+# The dfa step is broken out so that it may be used in other functions. The mask was calculated to work with state shifts above
+@inline _utf_dfa_step(state::_UTF8DFAState, byte::UInt8) = @inbounds (_UTF8_DFA_TABLE[byte+1] >> state) & _UTF8DFAState(0x0000001E)
+
+@inline function _isvalid_utf8_dfa(state::_UTF8DFAState, bytes::AbstractVector{UInt8}, first::Int = firstindex(bytes), last::Int = lastindex(bytes))
+    for i = first:last
+       @inbounds state = _utf_dfa_step(state, bytes[i])
+    end
+    return (state)
+end
+
+@inline function  _find_nonascii_chunk(chunk_size,cu::AbstractVector{CU}, first,last) where {CU}
+    n=first
+    while n <= last - chunk_size
+        _isascii(cu,n,n+chunk_size-1) || return n
+        n += chunk_size
+    end
+    n= last-chunk_size+1
+    _isascii(cu,n,last) || return n
+    return nothing
+end
+
+##
+
+# Classifcations of string
     # 0: neither valid ASCII nor UTF-8
     # 1: valid ASCII
     # 2: valid UTF-8
+ byte_string_classify(s::AbstractString) = byte_string_classify(codeunits(s))
+
+
+function byte_string_classify(bytes::AbstractVector{UInt8})
+    chunk_size = 1024
+    chunk_threshold =  chunk_size + (chunk_size ÷ 2)
+    n = length(bytes)
+    if n > chunk_threshold
+        start = _find_nonascii_chunk(chunk_size,bytes,1,n)
+        isnothing(start) && return 1
+    else
+        _isascii(bytes,1,n) && return 1
+        start = 1
+    end
+    return _byte_string_classify_nonascii(bytes,start,n)
+end
 
-isvalid(::Type{String}, s::Union{Vector{UInt8},FastContiguousSubArray{UInt8,1,Vector{UInt8}},String}) = byte_string_classify(s) ≠ 0
-isvalid(s::String) = isvalid(String, s)
+function _byte_string_classify_nonascii(bytes::AbstractVector{UInt8}, first::Int, last::Int)
+    chunk_size = 256
+
+    start = first
+    stop = min(last,first + chunk_size - 1)
+    state = _UTF8_DFA_ACCEPT
+    while start <= last
+        # try to process ascii chunks
+        while state == _UTF8_DFA_ACCEPT
+            _isascii(bytes,start,stop) || break
+            (start = start + chunk_size) <= last || break
+            stop = min(last,stop + chunk_size)
+        end
+        # Process non ascii chunk
+        state = _isvalid_utf8_dfa(state,bytes,start,stop)
+        state == _UTF8_DFA_INVALID && return 0
+
+        start = start + chunk_size
+        stop = min(last,stop + chunk_size)
+    end
+    return ifelse(state == _UTF8_DFA_ACCEPT,2,0)
+end
+
+isvalid(::Type{String}, bytes::AbstractVector{UInt8}) = (@inline byte_string_classify(bytes)) ≠ 0
+isvalid(::Type{String}, s::AbstractString) =  (@inline byte_string_classify(s)) ≠ 0
+
+@inline isvalid(s::AbstractString) = @inline isvalid(String, codeunits(s))
 
 is_valid_continuation(c) = c & 0xc0 == 0x80
 
@@ -210,7 +415,8 @@ is_valid_continuation(c) = c & 0xc0 == 0x80
     return iterate_continued(s, i, u)
 end
 
-function iterate_continued(s::String, i::Int, u::UInt32)
+# duck-type s so that external UTF-8 string packages like StringViews can hook in
+function iterate_continued(s, i::Int, u::UInt32)
     u < 0xc0000000 && (i += 1; @goto ret)
     n = ncodeunits(s)
     # first continuation byte
@@ -239,7 +445,8 @@ end
     return getindex_continued(s, i, u)
 end
 
-function getindex_continued(s::String, i::Int, u::UInt32)
+# duck-type s so that external UTF-8 string packages like StringViews can hook in
+function getindex_continued(s, i::Int, u::UInt32)
     if u < 0xc0000000
         # called from `getindex` which checks bounds
         @inbounds isvalid(s, i) && @goto ret
@@ -282,9 +489,11 @@ getindex(s::String, r::AbstractUnitRange{<:Integer}) = s[Int(first(r)):Int(last(
     return ss
 end
 
-length(s::String) = length_continued(s, 1, ncodeunits(s), ncodeunits(s))
+# nothrow because we know the start and end indices are valid
+@assume_effects :nothrow length(s::String) = length_continued(s, 1, ncodeunits(s), ncodeunits(s))
 
-@inline function length(s::String, i::Int, j::Int)
+# effects needed because @inbounds
+@assume_effects :consistent :effect_free @inline function length(s::String, i::Int, j::Int)
     @boundscheck begin
         0 < i ≤ ncodeunits(s)+1 || throw(BoundsError(s, i))
         0 ≤ j < ncodeunits(s)+1 || throw(BoundsError(s, j))
@@ -292,13 +501,13 @@ length(s::String) = length_continued(s, 1, ncodeunits(s), ncodeunits(s))
     j < i && return 0
     @inbounds i, k = thisind(s, i), i
     c = j - i + (i == k)
-    length_continued(s, i, j, c)
+    @inbounds length_continued(s, i, j, c)
 end
 
-@inline function length_continued(s::String, i::Int, n::Int, c::Int)
+@assume_effects :terminates_locally @inline @propagate_inbounds function length_continued(s::String, i::Int, n::Int, c::Int)
     i < n || return c
-    @inbounds b = codeunit(s, i)
-    @inbounds while true
+    b = codeunit(s, i)
+    while true
         while true
             (i += 1) ≤ n || return c
             0xc0 ≤ b ≤ 0xf7 && break
@@ -324,12 +533,10 @@ end
 
 isvalid(s::String, i::Int) = checkbounds(Bool, s, i) && thisind(s, i) == i
 
-function isascii(s::String)
-    @inbounds for i = 1:ncodeunits(s)
-        codeunit(s, i) >= 0x80 && return false
-    end
-    return true
-end
+isascii(s::String) = isascii(codeunits(s))
+
+# don't assume effects for general integers since we cannot know their implementation
+@assume_effects :foldable repeat(c::Char, r::BitInteger) = @invoke repeat(c::Char, r::Integer)
 
 """
     repeat(c::AbstractChar, r::Integer) -> String
@@ -343,8 +550,8 @@ julia> repeat('A', 3)
 "AAA"
 ```
 """
-repeat(c::AbstractChar, r::Integer) = repeat(Char(c), r) # fallback
-function repeat(c::Char, r::Integer)
+function repeat(c::AbstractChar, r::Integer)
+    c = Char(c)::Char
     r == 0 && return ""
     r < 0 && throw(ArgumentError("can't repeat a character $r times"))
     u = bswap(reinterpret(UInt32, c))
@@ -352,7 +559,7 @@ function repeat(c::Char, r::Integer)
     s = _string_n(n*r)
     p = pointer(s)
     GC.@preserve s if n == 1
-        ccall(:memset, Ptr{Cvoid}, (Ptr{UInt8}, Cint, Csize_t), p, u % UInt8, r)
+        memset(p, u % UInt8, r)
     elseif n == 2
         p16 = reinterpret(Ptr{UInt16}, p)
         for i = 1:r
diff --git a/base/strings/strings.jl b/base/strings/strings.jl
index 07e43674fed97..8dae311f475b4 100644
--- a/base/strings/strings.jl
+++ b/base/strings/strings.jl
@@ -1,10 +1,13 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+include("strings/annotated.jl")
 include("strings/search.jl")
 include("strings/unicode.jl")
 
 import .Unicode: textwidth, islowercase, isuppercase, isletter, isdigit, isnumeric, iscntrl, ispunct,
     isspace, isprint, isxdigit, lowercase, uppercase, titlecase, lowercasefirst, uppercasefirst
 
+import .Iterators: PartitionIterator
+
 include("strings/util.jl")
 include("strings/io.jl")
diff --git a/base/strings/substring.jl b/base/strings/substring.jl
index b8a0de1948326..dfd8770b08d47 100644
--- a/base/strings/substring.jl
+++ b/base/strings/substring.jl
@@ -36,9 +36,18 @@ struct SubString{T<:AbstractString} <: AbstractString
         end
         return new(s, i-1, nextind(s,j)-i)
     end
+    function SubString{T}(s::T, i::Int, j::Int, ::Val{:noshift}) where T<:AbstractString
+        @boundscheck begin
+            si, sj = i + 1, prevind(s, j + i + 1)
+            @inbounds isvalid(s, si) || string_index_err(s, si)
+            @inbounds isvalid(s, sj) || string_index_err(s, sj)
+        end
+        new(s, i, j)
+    end
 end
 
 @propagate_inbounds SubString(s::T, i::Int, j::Int) where {T<:AbstractString} = SubString{T}(s, i, j)
+@propagate_inbounds SubString(s::T, i::Int, j::Int, v::Val{:noshift}) where {T<:AbstractString} = SubString{T}(s, i, j, v)
 @propagate_inbounds SubString(s::AbstractString, i::Integer, j::Integer=lastindex(s)) = SubString(s, Int(i), Int(j))
 @propagate_inbounds SubString(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, first(r), last(r))
 
@@ -55,13 +64,13 @@ SubString{T}(s::T) where {T<:AbstractString} = SubString{T}(s, 1, lastindex(s)::
 @propagate_inbounds maybeview(s::AbstractString, args...) = getindex(s, args...)
 
 convert(::Type{SubString{S}}, s::AbstractString) where {S<:AbstractString} =
-    SubString(convert(S, s))
+    SubString(convert(S, s))::SubString{S}
 convert(::Type{T}, s::T) where {T<:SubString} = s
 
 # Regex match allows only Union{String, SubString{String}} so define conversion to this type
 convert(::Type{Union{String, SubString{String}}}, s::String) = s
 convert(::Type{Union{String, SubString{String}}}, s::SubString{String}) = s
-convert(::Type{Union{String, SubString{String}}}, s::AbstractString) = convert(String, s)
+convert(::Type{Union{String, SubString{String}}}, s::AbstractString) = convert(String, s)::String
 
 function String(s::SubString{String})
     parent = s.string
@@ -92,31 +101,27 @@ function getindex(s::SubString, i::Integer)
     @inbounds return getindex(s.string, s.offset + i)
 end
 
+isascii(ss::SubString{String}) = isascii(codeunits(ss))
+
 function isvalid(s::SubString, i::Integer)
     ib = true
     @boundscheck ib = checkbounds(Bool, s, i)
     @inbounds return ib && isvalid(s.string, s.offset + i)::Bool
 end
 
-byte_string_classify(s::SubString{String}) =
-    ccall(:u8_isvalid, Int32, (Ptr{UInt8}, Int), s, sizeof(s))
-
-isvalid(::Type{String}, s::SubString{String}) = byte_string_classify(s) ≠ 0
-isvalid(s::SubString{String}) = isvalid(String, s)
-
 thisind(s::SubString{String}, i::Int) = _thisind_str(s, i)
 nextind(s::SubString{String}, i::Int) = _nextind_str(s, i)
 
+parent(s::SubString) = s.string
+parentindices(s::SubString) = (s.offset + 1 : thisind(s.string, s.offset + s.ncodeunits),)
+
 function ==(a::Union{String, SubString{String}}, b::Union{String, SubString{String}})
-    s = sizeof(a)
-    s == sizeof(b) && 0 == _memcmp(a, b, s)
+    sizeof(a) == sizeof(b) && _memcmp(a, b) == 0
 end
 
 function cmp(a::SubString{String}, b::SubString{String})
-    na = sizeof(a)
-    nb = sizeof(b)
-    c = _memcmp(a, b, min(na, nb))
-    return c < 0 ? -1 : c > 0 ? +1 : cmp(na, nb)
+    c = _memcmp(a, b)
+    return c < 0 ? -1 : c > 0 ? +1 : cmp(sizeof(a), sizeof(b))
 end
 
 # don't make unnecessary copies when passing substrings to C functions
@@ -207,19 +212,30 @@ end
     return n
 end
 
-@inline function __unsafe_string!(out, s::Union{String, SubString{String}}, offs::Integer)
+@assume_effects :nothrow @inline function __unsafe_string!(out, s::String, offs::Integer)
     n = sizeof(s)
     GC.@preserve s out unsafe_copyto!(pointer(out, offs), pointer(s), n)
     return n
 end
 
-@inline function __unsafe_string!(out, s::Symbol, offs::Integer)
+@inline function __unsafe_string!(out, s::SubString{String}, offs::Integer)
+    n = sizeof(s)
+    GC.@preserve s out unsafe_copyto!(pointer(out, offs), pointer(s), n)
+    return n
+end
+
+@assume_effects :nothrow @inline function __unsafe_string!(out, s::Symbol, offs::Integer)
     n = sizeof(s)
     GC.@preserve s out unsafe_copyto!(pointer(out, offs), unsafe_convert(Ptr{UInt8},s), n)
     return n
 end
 
-function string(a::Union{Char, String, SubString{String}, Symbol}...)
+# nothrow needed here because for v in a can't prove the indexing is inbounds.
+@assume_effects :foldable :nothrow string(a::Union{Char, String, Symbol}...) = _string(a...)
+
+string(a::Union{Char, String, SubString{String}, Symbol}...) = _string(a...)
+
+function _string(a::Union{Char, String, SubString{String}, Symbol}...)
     n = 0
     for v in a
         # 4 types is too many for automatic Union-splitting, so we split manually
@@ -248,6 +264,10 @@ function string(a::Union{Char, String, SubString{String}, Symbol}...)
     return out
 end
 
+# don't assume effects for general integers since we cannot know their implementation
+# not nothrow because r<0 throws
+@assume_effects :foldable repeat(s::String, r::BitInteger) = @invoke repeat(s::String, r::Integer)
+
 function repeat(s::Union{String, SubString{String}}, r::Integer)
     r < 0 && throw(ArgumentError("can't repeat a string $r times"))
     r == 0 && return ""
@@ -256,7 +276,7 @@ function repeat(s::Union{String, SubString{String}}, r::Integer)
     out = _string_n(n*r)
     if n == 1 # common case: repeating a single-byte string
         @inbounds b = codeunit(s, 1)
-        ccall(:memset, Ptr{Cvoid}, (Ptr{UInt8}, Cint, Csize_t), out, b, r)
+        memset(unsafe_convert(Ptr{UInt8}, out), b, r)
     else
         for i = 0:r-1
             GC.@preserve s out unsafe_copyto!(pointer(out, i*n+1), pointer(s), n)
diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl
index 36af24bda857b..2e04633b87487 100644
--- a/base/strings/unicode.jl
+++ b/base/strings/unicode.jl
@@ -11,7 +11,7 @@ import Base: show, ==, hash, string, Symbol, isless, length, eltype,
 """
     isvalid(value) -> Bool
 
-Returns `true` if the given value is valid for its type, which currently can be either
+Return `true` if the given value is valid for its type, which currently can be either
 `AbstractChar` or `String` or `SubString{String}`.
 
 # Examples
@@ -31,7 +31,7 @@ isvalid(value)
 """
     isvalid(T, value) -> Bool
 
-Returns `true` if the given value is valid for that type. Types currently can
+Return `true` if the given value is valid for that type. Types currently can
 be either `AbstractChar` or `String`. Values for `AbstractChar` can be of type `AbstractChar` or [`UInt32`](@ref).
 Values for `String` can be of that type, `SubString{String}`, `Vector{UInt8}`,
 or a contiguous subarray thereof.
@@ -179,6 +179,7 @@ const _julia_charmap = Dict{UInt32,UInt32}(
     0x00B7 => 0x22C5,
     0x0387 => 0x22C5,
     0x2212 => 0x002D,
+    0x210F => 0x0127,
 )
 
 utf8proc_map(s::AbstractString, flags::Integer, chartransform=identity) = utf8proc_map(String(s), flags, chartransform)
@@ -468,7 +469,7 @@ A character is classified as numeric if it belongs to the Unicode general catego
 i.e. a character whose category code begins with 'N'.
 
 Note that this broad category includes characters such as ¾ and ௰.
-Use [`isdigit`](@ref) to check whether a character a decimal digit between 0 and 9.
+Use [`isdigit`](@ref) to check whether a character is a decimal digit between 0 and 9.
 
 # Examples
 ```jldoctest
diff --git a/base/strings/util.jl b/base/strings/util.jl
index bfc361e92d16f..a3f8e9a4773a6 100644
--- a/base/strings/util.jl
+++ b/base/strings/util.jl
@@ -67,6 +67,25 @@ function startswith(a::Union{String, SubString{String}},
     end
 end
 
+"""
+    startswith(io::IO, prefix::Union{AbstractString,Base.Chars})
+
+Check if an `IO` object starts with a prefix.  See also [`peek`](@ref).
+"""
+function Base.startswith(io::IO, prefix::Base.Chars)
+    mark(io)
+    c = read(io, Char)
+    reset(io)
+    return c in prefix
+end
+function Base.startswith(io::IO, prefix::Union{String,SubString{String}})
+    mark(io)
+    s = read(io, ncodeunits(prefix))
+    reset(io)
+    return s == codeunits(prefix)
+end
+Base.startswith(io::IO, prefix::AbstractString) = startswith(io, String(prefix))
+
 function endswith(a::Union{String, SubString{String}},
                   b::Union{String, SubString{String}})
     cub = ncodeunits(b)
@@ -350,6 +369,7 @@ function lstrip(f, s::AbstractString)
 end
 lstrip(s::AbstractString) = lstrip(isspace, s)
 lstrip(s::AbstractString, chars::Chars) = lstrip(in(chars), s)
+lstrip(::AbstractString, ::AbstractString) = throw(ArgumentError("Both arguments are strings. The second argument should be a `Char` or collection of `Char`s"))
 
 """
     rstrip([pred=isspace,] str::AbstractString) -> SubString
@@ -383,6 +403,8 @@ function rstrip(f, s::AbstractString)
 end
 rstrip(s::AbstractString) = rstrip(isspace, s)
 rstrip(s::AbstractString, chars::Chars) = rstrip(in(chars), s)
+rstrip(::AbstractString, ::AbstractString) = throw(ArgumentError("Both arguments are strings. The second argument should be a `Char` or collection of `Char`s"))
+
 
 """
     strip([pred=isspace,] str::AbstractString) -> SubString
@@ -410,6 +432,7 @@ julia> strip("{3, 5}\\n", ['{', '}', '\\n'])
 """
 strip(s::AbstractString) = lstrip(rstrip(s))
 strip(s::AbstractString, chars::Chars) = lstrip(rstrip(s, chars), chars)
+strip(::AbstractString, ::AbstractString) = throw(ArgumentError("Both arguments are strings. The second argument should be a `Char` or collection of `Char`s"))
 strip(f, s::AbstractString) = lstrip(f, rstrip(f, s))
 
 ## string padding functions ##
@@ -435,13 +458,15 @@ function lpad(
     s::Union{AbstractChar,AbstractString},
     n::Integer,
     p::Union{AbstractChar,AbstractString}=' ',
-) :: String
+)
+    stringfn = if any(isa.((s, p), Union{AnnotatedString, AnnotatedChar, SubString{<:AnnotatedString}}))
+        annotatedstring else string end
     n = Int(n)::Int
     m = signed(n) - Int(textwidth(s))::Int
-    m ≤ 0 && return string(s)
+    m ≤ 0 && return stringfn(s)
     l = textwidth(p)
     q, r = divrem(m, l)
-    r == 0 ? string(p^q, s) : string(p^q, first(p, r), s)
+    r == 0 ? stringfn(p^q, s) : stringfn(p^q, first(p, r), s)
 end
 
 """
@@ -465,13 +490,15 @@ function rpad(
     s::Union{AbstractChar,AbstractString},
     n::Integer,
     p::Union{AbstractChar,AbstractString}=' ',
-) :: String
+)
+    stringfn = if any(isa.((s, p), Union{AnnotatedString, AnnotatedChar, SubString{<:AnnotatedString}}))
+        annotatedstring else string end
     n = Int(n)::Int
     m = signed(n) - Int(textwidth(s))::Int
-    m ≤ 0 && return string(s)
+    m ≤ 0 && return stringfn(s)
     l = textwidth(p)
     q, r = divrem(m, l)
-    r == 0 ? string(s, p^q) : string(s, p^q, first(p, r))
+    r == 0 ? stringfn(s, p^q) : stringfn(s, p^q, first(p, r))
 end
 
 """
@@ -500,7 +527,10 @@ See also [`split`](@ref).
 julia> a = "Ma.rch"
 "Ma.rch"
 
-julia> collect(eachsplit(a, "."))
+julia> b = eachsplit(a, ".")
+Base.SplitIterator{String, String}("Ma.rch", ".", 0, true)
+
+julia> collect(b)
 2-element Vector{SubString{String}}:
  "Ma"
  "rch"
@@ -530,7 +560,7 @@ function iterate(iter::SplitIterator, (i, k, n)=(firstindex(iter.str), firstinde
     r = findnext(iter.splitter, iter.str, k)::Union{Nothing,Int,UnitRange{Int}}
     while r !== nothing && n != iter.limit - 1 && first(r) <= ncodeunits(iter.str)
         j, k = first(r), nextind(iter.str, last(r))::Int
-        k_ = k <= j ? nextind(iter.str, j) : k
+        k_ = k <= j ? nextind(iter.str, j)::Int : k
         if i < k
             substr = @inbounds SubString(iter.str, i, prevind(iter.str, j)::Int)
             (iter.keepempty || i < j) && return (substr, (k, k_, n + 1))
@@ -543,6 +573,17 @@ function iterate(iter::SplitIterator, (i, k, n)=(firstindex(iter.str), firstinde
     @inbounds SubString(iter.str, i), (ncodeunits(iter.str) + 2, k, n + 1)
 end
 
+# Specialization for partition(s,n) to return a SubString
+eltype(::Type{PartitionIterator{T}}) where {T<:AbstractString} = SubString{T}
+# SubStrings do not nest
+eltype(::Type{PartitionIterator{T}}) where {T<:SubString} = T
+
+function iterate(itr::PartitionIterator{<:AbstractString}, state = firstindex(itr.c))
+    state > ncodeunits(itr.c) && return nothing
+    r = min(nextind(itr.c, state, itr.n - 1), lastindex(itr.c))
+    return SubString(itr.c, state, r), nextind(itr.c, r)
+end
+
 eachsplit(str::T, splitter; limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString} =
     SplitIterator(str, splitter, limit, keepempty)
 
@@ -557,6 +598,101 @@ eachsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) wher
 eachsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
     eachsplit(str, isspace; limit, keepempty)
 
+"""
+    eachrsplit(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
+    eachrsplit(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
+
+Return an iterator over `SubString`s of `str`, produced when splitting on
+the delimiter(s) `dlm`, and yielded in reverse order (from right to left).
+`dlm` can be any of the formats allowed by [`findprev`](@ref)'s first argument
+(i.e. a string, a single character or a function), or a collection of characters.
+
+If `dlm` is omitted, it defaults to [`isspace`](@ref), and `keepempty` default to `false`.
+
+The optional keyword arguments are:
+ - If `limit > 0`, the iterator will split at most `limit - 1` times before returning
+   the rest of the string unsplit. `limit < 1` implies no cap to splits (default).
+ - `keepempty`: whether empty fields should be returned when iterating
+   Default is `false` without a `dlm` argument, `true` with a `dlm` argument.
+
+Note that unlike [`split`](@ref), [`rsplit`](@ref) and [`eachsplit`](@ref), this
+function iterates the substrings right to left as they occur in the input.
+
+See also [`eachsplit`](@ref), [`rsplit`](@ref).
+
+!!! compat "Julia 1.11"
+    This function requires Julia 1.11 or later.
+
+# Examples
+```jldoctest
+julia> a = "Ma.r.ch";
+
+julia> collect(eachrsplit(a, ".")) == ["ch", "r", "Ma"]
+true
+
+julia> collect(eachrsplit(a, "."; limit=2)) == ["ch", "Ma.r"]
+true
+```
+"""
+function eachrsplit end
+
+struct RSplitIterator{S <: AbstractString, F}
+    str::S
+    splitter::F
+    limit::Int
+    keepempty::Bool
+end
+
+eltype(::Type{<:RSplitIterator{T}}) where T = SubString{T}
+eltype(::Type{<:RSplitIterator{<:SubString{T}}}) where T = SubString{T}
+
+IteratorSize(::Type{<:RSplitIterator}) = SizeUnknown()
+
+eachrsplit(str::T, splitter; limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString} =
+    RSplitIterator(str, splitter, limit, keepempty)
+
+eachrsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
+          limit::Integer=0, keepempty=true) where {T<:AbstractString} =
+    eachrsplit(str, in(splitter); limit, keepempty)
+
+eachrsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) where {T<:AbstractString} =
+    eachrsplit(str, isequal(splitter); limit, keepempty)
+
+# a bit oddball, but standard behavior in Perl, Ruby & Python:
+eachrsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
+    eachrsplit(str, isspace; limit, keepempty)
+
+function Base.iterate(it::RSplitIterator, (to, remaining_splits)=(lastindex(it.str), it.limit-1))
+    to < 0 && return nothing
+    from = 1
+    next_to = -1
+    while !iszero(remaining_splits)
+        pos = findprev(it.splitter, it.str, to)
+        # If no matches: It returns the rest of the string, then the iterator stops.
+        if pos === nothing
+            from = 1
+            next_to = -1
+            break
+        else
+            from = nextind(it.str, last(pos))
+            # pos can be empty if we search for a zero-width delimiter, in which
+            # case pos is to:to-1.
+            # In this case, next_to must be to - 1, except if to is 0 or 1, in
+            # which case, we must stop iteration for some reason.
+            next_to = (isempty(pos) & (to < 2)) ? -1 : prevind(it.str, first(pos))
+
+            # If the element we emit is empty, discard it based on keepempty
+            if from > to && !(it.keepempty)
+                to = next_to
+                continue
+            end
+            break
+        end
+    end
+    from > to && !(it.keepempty) && return nothing
+    return (SubString(it.str, from, to), (next_to, remaining_splits-1))
+end
+
 """
     split(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
     split(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
@@ -625,37 +761,15 @@ julia> rsplit(a, "."; limit=2)
  "h"
 ```
 """
-function rsplit end
-
 function rsplit(str::T, splitter;
-                limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _rsplit(str, splitter, limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-function rsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
-                limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _rsplit(str, in(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-function rsplit(str::T, splitter::AbstractChar;
-                limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _rsplit(str, isequal(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-
-function _rsplit(str::AbstractString, splitter, limit::Integer, keepempty::Bool, strs::Array)
-    n = lastindex(str)::Int
-    r = something(findlast(splitter, str)::Union{Nothing,Int,UnitRange{Int}}, 0)
-    j, k = first(r), last(r)
-    while j > 0 && k > 0 && length(strs) != limit-1
-        (keepempty || k < n) && pushfirst!(strs, @inbounds SubString(str,nextind(str,k)::Int,n))
-        n = prevind(str, j)::Int
-        r = something(findprev(splitter,str,n)::Union{Nothing,Int,UnitRange{Int}}, 0)
-        j, k = first(r), last(r)
-    end
-    (keepempty || n > 0) && pushfirst!(strs, SubString(str,1,n))
-    return strs
+               limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
+    reverse!(collect(eachrsplit(str, splitter; limit, keepempty)))
 end
+
+# a bit oddball, but standard behavior in Perl, Ruby & Python:
 rsplit(str::AbstractString;
       limit::Integer=0, keepempty::Bool=false) =
-    rsplit(str, isspace; limit=limit, keepempty=keepempty)
+    rsplit(str, isspace; limit, keepempty)
 
 _replace(io, repl, str, r, pattern) = print(io, repl)
 _replace(io, repl::Function, str, r, pattern) =
@@ -669,12 +783,11 @@ _free_pat_replacer(x) = nothing
 _pat_replacer(x::AbstractChar) = isequal(x)
 _pat_replacer(x::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}}) = in(x)
 
-function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(Int)) where N
-    count == 0 && return str
+# note: leave str untyped here to make it easier for packages like StringViews to hook in
+function _replace_init(str, pat_repl::NTuple{N, Pair}, count::Int) where N
     count < 0 && throw(DomainError(count, "`count` must be non-negative."))
-    n = 1
-    e1 = nextind(str, lastindex(str)) # sizeof(str)
-    i = a = firstindex(str)
+    e1 = nextind(str, lastindex(str)) # sizeof(str)+1
+    a = firstindex(str)
     patterns = map(p -> _pat_replacer(first(p)), pat_repl)
     replaces = map(last, pat_repl)
     rs = map(patterns) do p
@@ -685,11 +798,14 @@ function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(I
         r isa Int && (r = r:r) # findnext / performance fix
         return r
     end
-    if all(>(e1), map(first, rs))
-        foreach(_free_pat_replacer, patterns)
-        return str
-    end
-    out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str)))
+    return e1, patterns, replaces, rs, all(>(e1), map(first, rs))
+end
+
+# note: leave str untyped here to make it easier for packages like StringViews to hook in
+function _replace_finish(io::IO, str, count::Int,
+                         e1::Int, patterns::Tuple, replaces::Tuple, rs::Tuple)
+    n = 1
+    i = a = firstindex(str)
     while true
         p = argmin(map(first, rs)) # TODO: or argmin(rs), to pick the shortest first match ?
         r = rs[p]
@@ -697,9 +813,9 @@ function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(I
         j > e1 && break
         if i == a || i <= k
             # copy out preserved portion
-            GC.@preserve str unsafe_write(out, pointer(str, i), UInt(j-i))
+            GC.@preserve str unsafe_write(io, pointer(str, i), UInt(j-i))
             # copy out replacement string
-            _replace(out, replaces[p], str, r, patterns[p])
+            _replace(io, replaces[p], str, r, patterns[p])
         end
         if k < j
             i = j
@@ -724,13 +840,39 @@ function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(I
         n += 1
     end
     foreach(_free_pat_replacer, patterns)
-    write(out, SubString(str, i))
-    return String(take!(out))
+    write(io, SubString(str, i))
+    return io
 end
 
+# note: leave str untyped here to make it easier for packages like StringViews to hook in
+function _replace_(io::IO, str, pat_repl::NTuple{N, Pair}, count::Int) where N
+    if count == 0
+        write(io, str)
+        return io
+    end
+    e1, patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count)
+    if notfound
+        foreach(_free_pat_replacer, patterns)
+        write(io, str)
+        return io
+    end
+    return _replace_finish(io, str, count, e1, patterns, replaces, rs)
+end
+
+# note: leave str untyped here to make it easier for packages like StringViews to hook in
+function _replace_(str, pat_repl::NTuple{N, Pair}, count::Int) where N
+    count == 0 && return String(str)
+    e1, patterns, replaces, rs, notfound = _replace_init(str, pat_repl, count)
+    if notfound
+        foreach(_free_pat_replacer, patterns)
+        return String(str)
+    end
+    out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str)))
+    return String(take!(_replace_finish(out, str, count, e1, patterns, replaces, rs)))
+end
 
 """
-    replace(s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer])
+    replace([io::IO], s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer])
 
 Search for the given pattern `pat` in `s`, and replace each occurrence with `r`.
 If `count` is provided, replace at most `count` occurrences.
@@ -743,6 +885,11 @@ If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then
 references in `r` are replaced with the corresponding matched text.
 To remove instances of `pat` from `string`, set `r` to the empty `String` (`""`).
 
+The return value is a new string after the replacements.  If the `io::IO` argument
+is supplied, the transformed string is instead written to `io` (returning `io`).
+(For example, this can be used in conjunction with an [`IOBuffer`](@ref) to re-use
+a pre-allocated buffer array in-place.)
+
 Multiple patterns can be specified, and they will be applied left-to-right
 simultaneously, so only one pattern will be applied to any character, and the
 patterns will only be applied to the input text, not the replacements.
@@ -750,6 +897,9 @@ patterns will only be applied to the input text, not the replacements.
 !!! compat "Julia 1.7"
     Support for multiple patterns requires version 1.7.
 
+!!! compat "Julia 1.10"
+    The `io::IO` argument requires version 1.10.
+
 # Examples
 ```jldoctest
 julia> replace("Python is a programming language.", "Python" => "Julia")
@@ -768,8 +918,12 @@ julia> replace("abcabc", "a" => "b", "b" => "c", r".+" => "a")
 "bca"
 ```
 """
+replace(io::IO, s::AbstractString, pat_f::Pair...; count=typemax(Int)) =
+    _replace_(io, String(s), pat_f, Int(count))
+
 replace(s::AbstractString, pat_f::Pair...; count=typemax(Int)) =
-    replace(String(s), pat_f..., count=count)
+    _replace_(String(s), pat_f, Int(count))
+
 
 # TODO: allow transform as the first argument to replace?
 
@@ -818,7 +972,7 @@ julia> hex2bytes(a)
 """
 function hex2bytes end
 
-hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s)
+hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s)::Int >> 1), s)
 
 # special case - valid bytes are checked in the generic implementation
 function hex2bytes!(dest::AbstractArray{UInt8}, s::String)
diff --git a/base/subarray.jl b/base/subarray.jl
index 17bd6450f0d79..fcc417fa1d157 100644
--- a/base/subarray.jl
+++ b/base/subarray.jl
@@ -52,8 +52,10 @@ viewindexing(I::Tuple{Slice, Slice, Vararg{Any}}) = (@inline; viewindexing(tail(
 # A UnitRange can follow Slices, but only if all other indices are scalar
 viewindexing(I::Tuple{Slice, AbstractUnitRange, Vararg{ScalarIndex}}) = IndexLinear()
 viewindexing(I::Tuple{Slice, Slice, Vararg{ScalarIndex}}) = IndexLinear() # disambiguate
-# In general, ranges are only fast if all other indices are scalar
-viewindexing(I::Tuple{AbstractRange, Vararg{ScalarIndex}}) = IndexLinear()
+# In general, scalar ranges are only fast if all other indices are scalar
+# Other ranges, such as those of `CartesianIndex`es, are not fast even if these
+# are followed by `ScalarIndex`es
+viewindexing(I::Tuple{AbstractRange{<:ScalarIndex}, Vararg{ScalarIndex}}) = IndexLinear()
 # All other index combinations are slow
 viewindexing(I::Tuple{Vararg{Any}}) = IndexCartesian()
 # Of course, all other array types are slow
@@ -81,7 +83,7 @@ parentindices(V::SubArray) = V.indices
 """
     parentindices(A)
 
-Return the indices in the [`parent`](@ref) which correspond to the array view `A`.
+Return the indices in the [`parent`](@ref) which correspond to the view `A`.
 
 # Examples
 ```jldoctest
@@ -96,6 +98,8 @@ julia> parentindices(V)
 (1, Base.Slice(Base.OneTo(2)))
 ```
 """
+function parentindices end
+
 parentindices(a::AbstractArray) = map(oneto, size(a))
 
 ## Aliasing detection
@@ -106,16 +110,41 @@ unaliascopy(A::SubArray) = typeof(A)(unaliascopy(A.parent), map(unaliascopy, A.i
 
 # When the parent is an Array we can trim the size down a bit. In the future this
 # could possibly be extended to any mutable array.
-function unaliascopy(V::SubArray{T,N,A,I,LD}) where {T,N,A<:Array,I<:Tuple{Vararg{Union{Real,AbstractRange,Array}}},LD}
-    dest = Array{T}(undef, index_lengths(V.indices...))
-    copyto!(dest, V)
+function unaliascopy(V::SubArray{T,N,A,I,LD}) where {T,N,A<:Array,I<:Tuple{Vararg{Union{ScalarIndex,AbstractRange{<:ScalarIndex},Array{<:Union{ScalarIndex,AbstractCartesianIndex}}}}},LD}
+    dest = Array{T}(undef, _trimmedshape(V.indices...))
+    trimmedpind = _trimmedpind(V.indices...)
+    vdest = trimmedpind isa Tuple{Vararg{Union{Slice,Colon}}} ? dest : view(dest, trimmedpind...)
+    copyto!(vdest, view(V, _trimmedvind(V.indices...)...))
     SubArray{T,N,A,I,LD}(dest, map(_trimmedindex, V.indices), 0, Int(LD))
 end
+# Get the proper trimmed shape
+_trimmedshape(::ScalarIndex, rest...) = (1, _trimmedshape(rest...)...)
+_trimmedshape(i::AbstractRange, rest...) = (maximum(i), _trimmedshape(rest...)...)
+_trimmedshape(i::Union{UnitRange,StepRange,OneTo}, rest...) = (length(i), _trimmedshape(rest...)...)
+_trimmedshape(i::AbstractArray{<:ScalarIndex}, rest...) = (length(i), _trimmedshape(rest...)...)
+_trimmedshape(i::AbstractArray{<:AbstractCartesianIndex{0}}, rest...) = _trimmedshape(rest...)
+_trimmedshape(i::AbstractArray{<:AbstractCartesianIndex{N}}, rest...) where {N} = (length(i), ntuple(Returns(1), Val(N - 1))..., _trimmedshape(rest...)...)
+_trimmedshape() = ()
+# We can avoid the repeation from `AbstractArray{CartesianIndex{0}}`
+_trimmedpind(i, rest...) = (map(Returns(:), axes(i))..., _trimmedpind(rest...)...)
+_trimmedpind(i::AbstractRange, rest...) = (i, _trimmedpind(rest...)...)
+_trimmedpind(i::Union{UnitRange,StepRange,OneTo}, rest...) = ((:), _trimmedpind(rest...)...)
+_trimmedpind(i::AbstractArray{<:AbstractCartesianIndex{0}}, rest...) = _trimmedpind(rest...)
+_trimmedpind() = ()
+_trimmedvind(i, rest...) = (map(Returns(:), axes(i))..., _trimmedvind(rest...)...)
+_trimmedvind(i::AbstractArray{<:AbstractCartesianIndex{0}}, rest...) = (map(first, axes(i))..., _trimmedvind(rest...)...)
+_trimmedvind() = ()
 # Transform indices to be "dense"
-_trimmedindex(i::Real) = oftype(i, 1)
-_trimmedindex(i::AbstractUnitRange) = oftype(i, oneto(length(i)))
-_trimmedindex(i::AbstractArray) = oftype(i, reshape(eachindex(IndexLinear(), i), axes(i)))
-
+_trimmedindex(i::ScalarIndex) = oftype(i, 1)
+_trimmedindex(i::AbstractRange) = i
+_trimmedindex(i::Union{UnitRange,StepRange,OneTo}) = oftype(i, oneto(length(i)))
+_trimmedindex(i::AbstractArray{<:ScalarIndex}) = oftype(i, reshape(eachindex(IndexLinear(), i), axes(i)))
+_trimmedindex(i::AbstractArray{<:AbstractCartesianIndex{0}}) = oftype(i, copy(i))
+function _trimmedindex(i::AbstractArray{<:AbstractCartesianIndex{N}}) where {N}
+    padding = ntuple(Returns(1), Val(N - 1))
+    ax1 = eachindex(IndexLinear(), i)
+    return oftype(i, reshape(CartesianIndices((ax1, padding...)), axes(i)))
+end
 ## SubArray creation
 # We always assume that the dimensionality of the parent matches the number of
 # indices that end up getting passed to it, so we store the parent as a
@@ -305,6 +334,15 @@ function getindex(V::FastContiguousSubArray, i::Int)
     @inbounds r = V.parent[V.offset1 + i]
     r
 end
+# parents of FastContiguousSubArrays may support fast indexing with AbstractUnitRanges,
+# so we may just forward the indexing to the parent
+function getindex(V::FastContiguousSubArray, i::AbstractUnitRange{Int})
+    @inline
+    @boundscheck checkbounds(V, i)
+    @inbounds r = V.parent[V.offset1 .+ i]
+    r
+end
+
 # For vector views with linear indexing, we disambiguate to favor the stride/offset
 # computation as that'll generally be faster than (or just as fast as) re-indexing into a range.
 function getindex(V::FastSubArray{<:Any, 1}, i::Int)
@@ -319,6 +357,7 @@ function getindex(V::FastContiguousSubArray{<:Any, 1}, i::Int)
     @inbounds r = V.parent[V.offset1 + i]
     r
 end
+@inline getindex(V::FastContiguousSubArray, i::Colon) = getindex(V, to_indices(V, (:,))...)
 
 # Indexed assignment follows the same pattern as `getindex` above
 function setindex!(V::SubArray{T,N}, x, I::Vararg{Int,N}) where {T,N}
@@ -339,6 +378,19 @@ function setindex!(V::FastContiguousSubArray, x, i::Int)
     @inbounds V.parent[V.offset1 + i] = x
     V
 end
+function setindex!(V::FastSubArray, x, i::AbstractUnitRange{Int})
+    @inline
+    @boundscheck checkbounds(V, i)
+    @inbounds V.parent[V.offset1 .+ V.stride1 .* i] = x
+    V
+end
+function setindex!(V::FastContiguousSubArray, x, i::AbstractUnitRange{Int})
+    @inline
+    @boundscheck checkbounds(V, i)
+    @inbounds V.parent[V.offset1 .+ i] = x
+    V
+end
+
 function setindex!(V::FastSubArray{<:Any, 1}, x, i::Int)
     @inline
     @boundscheck checkbounds(V, i)
@@ -351,6 +403,38 @@ function setindex!(V::FastContiguousSubArray{<:Any, 1}, x, i::Int)
     @inbounds V.parent[V.offset1 + i] = x
     V
 end
+@inline setindex!(V::FastSubArray, x, i::Colon) = setindex!(V, x, to_indices(V, (i,))...)
+
+function isassigned(V::SubArray{T,N}, I::Vararg{Int,N}) where {T,N}
+    @inline
+    @boundscheck checkbounds(Bool, V, I...) || return false
+    @inbounds r = isassigned(V.parent, reindex(V.indices, I)...)
+    r
+end
+function isassigned(V::FastSubArray, i::Int)
+    @inline
+    @boundscheck checkbounds(Bool, V, i) || return false
+    @inbounds r = isassigned(V.parent, V.offset1 + V.stride1*i)
+    r
+end
+function isassigned(V::FastContiguousSubArray, i::Int)
+    @inline
+    @boundscheck checkbounds(Bool, V, i) || return false
+    @inbounds r = isassigned(V.parent, V.offset1 + i)
+    r
+end
+function isassigned(V::FastSubArray{<:Any, 1}, i::Int)
+    @inline
+    @boundscheck checkbounds(Bool, V, i) || return false
+    @inbounds r = isassigned(V.parent, V.offset1 + V.stride1*i)
+    r
+end
+function isassigned(V::FastContiguousSubArray{<:Any, 1}, i::Int)
+    @inline
+    @boundscheck checkbounds(Bool, V, i) || return false
+    @inbounds r = isassigned(V.parent, V.offset1 + i)
+    r
+end
 
 IndexStyle(::Type{<:FastSubArray}) = IndexLinear()
 IndexStyle(::Type{<:SubArray}) = IndexCartesian()
@@ -383,12 +467,8 @@ iscontiguous(A::SubArray) = iscontiguous(typeof(A))
 iscontiguous(::Type{<:SubArray}) = false
 iscontiguous(::Type{<:FastContiguousSubArray}) = true
 
-first_index(V::FastSubArray) = V.offset1 + V.stride1 # cached for fast linear SubArrays
-function first_index(V::SubArray)
-    P, I = parent(V), V.indices
-    s1 = compute_stride1(P, I)
-    s1 + compute_offset1(P, s1, I)
-end
+first_index(V::FastSubArray) = V.offset1 + V.stride1 * firstindex(V) # cached for fast linear SubArrays
+first_index(V::SubArray) = compute_linindex(parent(V), V.indices)
 
 # Computing the first index simply steps through the indices, accumulating the
 # sum of index each multiplied by the parent's stride.
@@ -414,11 +494,6 @@ function compute_linindex(parent, I::NTuple{N,Any}) where N
     IP = fill_to_length(axes(parent), OneTo(1), Val(N))
     compute_linindex(first(LinearIndices(parent)), 1, IP, I)
 end
-function compute_linindex(f, s, IP::Tuple, I::Tuple{ScalarIndex, Vararg{Any}})
-    @inline
-    Δi = I[1]-first(IP[1])
-    compute_linindex(f + Δi*s, s*length(IP[1]), tail(IP), tail(I))
-end
 function compute_linindex(f, s, IP::Tuple, I::Tuple{Any, Vararg{Any}})
     @inline
     Δi = first(I[1])-first(IP[1])
@@ -433,10 +508,6 @@ find_extended_inds(::ScalarIndex, I...) = (@inline; find_extended_inds(I...))
 find_extended_inds(i1, I...) = (@inline; (i1, find_extended_inds(I...)...))
 find_extended_inds() = ()
 
-function unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{RangeIndex}}}) where {T,N,P}
-    return unsafe_convert(Ptr{T}, V.parent) + _memory_offset(V.parent, map(first, V.indices)...)
-end
-
 pointer(V::FastSubArray, i::Int) = pointer(V.parent, V.offset1 + V.stride1*i)
 pointer(V::FastContiguousSubArray, i::Int) = pointer(V.parent, V.offset1 + i)
 
@@ -459,3 +530,12 @@ function _indices_sub(i1::AbstractArray, I...)
     @inline
     (axes(i1)..., _indices_sub(I...)...)
 end
+
+has_offset_axes(S::SubArray) = has_offset_axes(S.indices...)
+
+function replace_in_print_matrix(S::SubArray{<:Any,2,<:AbstractMatrix}, i::Integer, j::Integer, s::AbstractString)
+    replace_in_print_matrix(S.parent, to_indices(S.parent, reindex(S.indices, (i,j)))..., s)
+end
+function replace_in_print_matrix(S::SubArray{<:Any,1,<:AbstractVector}, i::Integer, j::Integer, s::AbstractString)
+    replace_in_print_matrix(S.parent, to_indices(S.parent, reindex(S.indices, (i,)))..., j, s)
+end
diff --git a/base/summarysize.jl b/base/summarysize.jl
index 849edee206454..2505824768099 100644
--- a/base/summarysize.jl
+++ b/base/summarysize.jl
@@ -26,7 +26,7 @@ julia> Base.summarysize(1.0)
 8
 
 julia> Base.summarysize(Ref(rand(100)))
-848
+864
 
 julia> sizeof(Ref(rand(100)))
 8
@@ -49,9 +49,9 @@ function summarysize(obj;
             if isassigned(x, i)
                 val = x[i]
             end
-        elseif isa(x, Array)
+        elseif isa(x, GenericMemory)
             nf = length(x)
-            if ccall(:jl_array_isassigned, Cint, (Any, UInt), x, i - 1) != 0
+            if @inbounds @inline isassigned(x, i)
                 val = x[i]
             end
         else
@@ -77,7 +77,7 @@ end
 (ss::SummarySize)(@nospecialize obj) = _summarysize(ss, obj)
 # define the general case separately to make sure it is not specialized for every type
 @noinline function _summarysize(ss::SummarySize, @nospecialize obj)
-    isdefined(typeof(obj), :instance) && return 0
+    issingletontype(typeof(obj)) && return 0
     # NOTE: this attempts to discover multiple copies of the same immutable value,
     # and so is somewhat approximate.
     key = ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), obj)
@@ -126,14 +126,14 @@ function (ss::SummarySize)(obj::Core.TypeName)
     return Core.sizeof(obj) + (isdefined(obj, :mt) ? ss(obj.mt) : 0)
 end
 
-function (ss::SummarySize)(obj::Array)
+function (ss::SummarySize)(obj::GenericMemory)
     haskey(ss.seen, obj) ? (return 0) : (ss.seen[obj] = true)
-    headersize = 4*sizeof(Int) + 8 + max(0, ndims(obj)-2)*sizeof(Int)
+    headersize = 2*sizeof(Int)
     size::Int = headersize
     datakey = unsafe_convert(Ptr{Cvoid}, obj)
     if !haskey(ss.seen, datakey)
         ss.seen[datakey] = true
-        dsize = Core.sizeof(obj)
+        dsize = sizeof(obj)
         T = eltype(obj)
         if isbitsunion(T)
             # add 1 union selector byte for each element
@@ -170,7 +170,7 @@ function (ss::SummarySize)(obj::Module)
                 if isa(value, UnionAll)
                     value = unwrap_unionall(value)
                 end
-                if isa(value, DataType) && value.name.module === obj && value.name.name === binding
+                if isa(value, DataType) && parentmodule(value) === obj && nameof(value) === binding
                     # charge a TypeName to its module (but not to the type)
                     size += ss(value.name)::Int
                 end
diff --git a/base/sysimg.jl b/base/sysimg.jl
index f5a7fb22bf2bd..4ae876a929f0e 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -5,79 +5,95 @@ Core.include(Main, "Base.jl")
 using .Base
 
 # Set up Main module
-import Base.MainInclude: eval, include
+using Base.MainInclude # ans, err, and sometimes Out
+
+# These definitions calls Base._include rather than Base.include to get
+# one-frame stacktraces for the common case of using include(fname) in Main.
+
+"""
+    include([mapexpr::Function,] path::AbstractString)
+
+Evaluate the contents of the input source file in the global scope of the containing module.
+Every module (except those defined with `baremodule`) has its own
+definition of `include`, which evaluates the file in that module.
+Returns the result of the last evaluated expression of the input file. During including,
+a task-local include path is set to the directory containing the file. Nested calls to
+`include` will search relative to that path. This function is typically used to load source
+interactively, or to combine files in packages that are broken into multiple source files.
+The argument `path` is normalized using [`normpath`](@ref) which will resolve
+relative path tokens such as `..` and convert `/` to the appropriate path separator.
+
+The optional first argument `mapexpr` can be used to transform the included code before
+it is evaluated: for each parsed expression `expr` in `path`, the `include` function
+actually evaluates `mapexpr(expr)`.  If it is omitted, `mapexpr` defaults to [`identity`](@ref).
+
+Use [`Base.include`](@ref) to evaluate a file into another module.
+
+!!! compat "Julia 1.5"
+    Julia 1.5 is required for passing the `mapexpr` argument.
+"""
+include(mapexpr::Function, fname::AbstractString) = Base._include(mapexpr, Main, fname)
+function include(fname::AbstractString)
+    isa(fname, String) || (fname = Base.convert(String, fname)::String)
+    Base._include(identity, Main, fname)
+end
+
+"""
+    eval(expr)
+
+Evaluate an expression in the global scope of the containing module.
+Every `Module` (except those defined with `baremodule`) has its own 1-argument
+definition of `eval`, which evaluates expressions in that module.
+"""
+eval(x) = Core.eval(Main, x)
 
 # Ensure this file is also tracked
-pushfirst!(Base._included_files, (@__MODULE__, joinpath(@__DIR__, "Base.jl")))
-pushfirst!(Base._included_files, (@__MODULE__, joinpath(@__DIR__, "sysimg.jl")))
+pushfirst!(Base._included_files, (@__MODULE__, abspath(@__FILE__)))
+
+# set up depot & load paths to be able to find stdlib packages
+Base.init_depot_path()
+Base.init_load_path()
 
 if Base.is_primary_base_module
 # load some stdlib packages but don't put their names in Main
 let
-    # set up depot & load paths to be able to find stdlib packages
-    push!(empty!(LOAD_PATH), "@stdlib")
-    Base.append_default_depot_path!(DEPOT_PATH)
+    # Loading here does not call __init__(). This leads to uninitialized RNG
+    # state which causes rand(::UnitRange{Int}) to hang. This is a workaround:
+    task = current_task()
+    task.rngState0 = 0x5156087469e170ab
+    task.rngState1 = 0x7431eaead385992c
+    task.rngState2 = 0x503e1d32781c2608
+    task.rngState3 = 0x3a77f7189200c20b
+    task.rngState4 = 0x5502376d099035ae
 
     # Stdlibs sorted in dependency, then alphabetical, order by contrib/print_sorted_stdlibs.jl
     # Run with the `--exclude-jlls` option to filter out all JLL packages
     stdlibs = [
         # No dependencies
-        :ArgTools,
-        :Artifacts,
-        :Base64,
-        :CRC32c,
-        :FileWatching,
-        :Libdl,
-        :Logging,
-        :Mmap,
-        :NetworkOptions,
-        :SHA,
-        :Serialization,
-        :Sockets,
-        :Unicode,
+        :FileWatching, # used by loading.jl -- implicit assumption that init runs
+        :Libdl, # Transitive through LinAlg
+        :Artifacts, # Transitive through LinAlg
+        :SHA, # transitive through Random
+        :Sockets, # used by stream.jl
+
+        # Transitive through LingAlg
+        # OpenBLAS_jll
+        # libblastrampoline_jll
 
         # 1-depth packages
-        :LinearAlgebra,
-        :Markdown,
-        :Printf,
-        :Random,
-        :Tar,
-
-        # 2-depth packages
-        :Dates,
-        :Distributed,
-        :Future,
-        :InteractiveUtils,
-        :LibGit2,
-        :Profile,
-        :UUIDs,
-
-        # 3-depth packages
-        :REPL,
-        :SharedArrays,
-        :TOML,
-        :Test,
-
-        # 4-depth packages
-        :LibCURL,
-
-        # 5-depth packages
-        :Downloads,
-
-        # 6-depth packages
-        :Pkg,
-
-        # 7-depth packages
-        :LazyArtifacts,
+        :LinearAlgebra, # Commits type-piracy and GEMM
+        :Random, # Can't be removed due to rand being exported by Base
     ]
-    maxlen = reduce(max, textwidth.(string.(stdlibs)); init=0)
+    # PackageCompiler can filter out stdlibs so it can be empty
+    maxlen = maximum(textwidth.(string.(stdlibs)); init=0)
 
     tot_time_stdlib = 0.0
     # use a temp module to avoid leaving the type of this closure in Main
+    push!(empty!(LOAD_PATH), "@stdlib")
     m = Module()
     GC.@preserve m begin
         print_time = @eval m (mod, t) -> (print(rpad(string(mod) * "  ", $maxlen + 3, "─"));
-                                          Base.time_print(t * 10^9); println())
+                                          Base.time_print(stdout, t * 10^9); println())
         print_time(Base, (Base.end_base_include - Base.start_base_include) * 10^(-9))
 
         Base._track_dependencies[] = true
@@ -86,8 +102,9 @@ let
             print_time(stdlib, tt)
         end
         for dep in Base._require_dependencies
-            dep[3] == 0.0 && continue
-            push!(Base._included_files, dep[1:2])
+            mod, path, fsize, mtime = dep[1], dep[2], dep[3], dep[5]
+            (fsize == 0 || mtime == 0.0) && continue
+            push!(Base._included_files, (mod, path))
         end
         empty!(Base._require_dependencies)
         Base._track_dependencies[] = false
@@ -99,22 +116,22 @@ let
     empty!(Core.ARGS)
     empty!(Base.ARGS)
     empty!(LOAD_PATH)
-    @eval Base creating_sysimg = false
     Base.init_load_path() # want to be able to find external packages in userimg.jl
 
     ccall(:jl_clear_implicit_imports, Cvoid, (Any,), Main)
+
     tot_time_userimg = @elapsed (isfile("userimg.jl") && Base.include(Main, "userimg.jl"))
 
     tot_time_base = (Base.end_base_include - Base.start_base_include) * 10.0^(-9)
     tot_time = tot_time_base + tot_time_stdlib + tot_time_userimg
 
     println("Sysimage built. Summary:")
-    print("Base ──────── "); Base.time_print(tot_time_base    * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_base    / tot_time) * 100); println("%")
-    print("Stdlibs ───── "); Base.time_print(tot_time_stdlib  * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_stdlib  / tot_time) * 100); println("%")
+    print("Base ──────── "); Base.time_print(stdout, tot_time_base    * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_base    / tot_time) * 100); println("%")
+    print("Stdlibs ───── "); Base.time_print(stdout, tot_time_stdlib  * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_stdlib  / tot_time) * 100); println("%")
     if isfile("userimg.jl")
-    print("Userimg ───── "); Base.time_print(tot_time_userimg * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_userimg / tot_time) * 100); println("%")
+    print("Userimg ───── "); Base.time_print(stdout, tot_time_userimg * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_userimg / tot_time) * 100); println("%")
     end
-    print("Total ─────── "); Base.time_print(tot_time         * 10^9); println();
+    print("Total ─────── "); Base.time_print(stdout, tot_time         * 10^9); println();
 
     empty!(LOAD_PATH)
     empty!(DEPOT_PATH)
diff --git a/base/sysinfo.jl b/base/sysinfo.jl
index f0852f32fc17d..53835366d3171 100644
--- a/base/sysinfo.jl
+++ b/base/sysinfo.jl
@@ -20,6 +20,8 @@ export BINDIR,
        loadavg,
        free_memory,
        total_memory,
+       free_physical_memory,
+       total_physical_memory,
        isapple,
        isbsd,
        isdragonfly,
@@ -31,6 +33,7 @@ export BINDIR,
        iswindows,
        isjsvm,
        isexecutable,
+       username,
        which
 
 import ..Base: show
@@ -246,19 +249,45 @@ function loadavg()
     return loadavg_
 end
 
+"""
+    Sys.free_physical_memory()
+
+Get the free memory of the system in bytes. The entire amount may not be available to the
+current process; use `Sys.free_memory()` for the actually available amount.
+"""
+free_physical_memory() = ccall(:uv_get_free_memory, UInt64, ())
+
+"""
+    Sys.total_physical_memory()
+
+Get the total memory in RAM (including that which is currently used) in bytes. The entire
+amount may not be available to the current process; see `Sys.total_memory()`.
+"""
+total_physical_memory() = ccall(:uv_get_total_memory, UInt64, ())
+
 """
     Sys.free_memory()
 
 Get the total free memory in RAM in bytes.
 """
-free_memory() = ccall(:uv_get_free_memory, UInt64, ())
+free_memory() = ccall(:uv_get_available_memory, UInt64, ())
 
 """
     Sys.total_memory()
 
 Get the total memory in RAM (including that which is currently used) in bytes.
-"""
-total_memory() = ccall(:uv_get_total_memory, UInt64, ())
+This amount may be constrained, e.g., by Linux control groups. For the unconstrained
+amount, see `Sys.physical_memory()`.
+"""
+function total_memory()
+    constrained = ccall(:uv_get_constrained_memory, UInt64, ())
+    physical = total_physical_memory()
+    if 0 < constrained <= physical
+        return constrained
+    else
+        return physical
+    end
+end
 
 """
     Sys.get_process_title()
@@ -306,7 +335,7 @@ function isunix(os::Symbol)
     elseif os === :Emscripten
         # Emscripten implements the POSIX ABI and provides traditional
         # Unix-style operating system functions such as file system support.
-        # Therefor, we consider it a unix, even though this need not be
+        # Therefore, we consider it a unix, even though this need not be
         # generally true for a jsvm embedding.
         return true
     else
@@ -515,9 +544,21 @@ function which(program_name::String)
     for path_dir in path_dirs
         for pname in program_names
             program_path = joinpath(path_dir, pname)
-            # If we find something that matches our name and we can execute
-            if isfile(program_path) && isexecutable(program_path)
-                return program_path
+            try
+                # If we find something that matches our name and we can execute
+                if isfile(program_path) && isexecutable(program_path)
+                    return program_path
+                end
+            catch e
+                # If we encounter a permission error, we skip this directory
+                # and continue to the next directory in the PATH variable.
+                if isa(e, Base.IOError) && e.code == Base.UV_EACCES
+                    # Permission denied, continue searching
+                    continue
+                else
+                    # Rethrow the exception if it's not a permission error
+                    rethrow(e)
+                end
             end
         end
     end
@@ -527,4 +568,25 @@ function which(program_name::String)
 end
 which(program_name::AbstractString) = which(String(program_name))
 
+"""
+    Sys.username() -> String
+
+Return the username for the current user. If the username cannot be determined
+or is empty, this function throws an error.
+
+To retrieve a username that is overridable via an environment variable,
+e.g., `USER`, consider using
+```julia
+user = get(Sys.username, ENV, "USER")
+```
+
+!!! compat "Julia 1.11"
+    This function requires at least Julia 1.11.
+"""
+function username()
+    pw = Libc.getpw()
+    isempty(pw.username) && Base.uv_error("username", Base.UV_ENOENT)
+    return pw.username
+end
+
 end # module Sys
diff --git a/base/task.jl b/base/task.jl
index c405deaf56423..ba96d7bca5095 100644
--- a/base/task.jl
+++ b/base/task.jl
@@ -70,7 +70,7 @@ end
 """
     TaskFailedException
 
-This exception is thrown by a `wait(t)` call when task `t` fails.
+This exception is thrown by a [`wait(t)`](@ref) call when task `t` fails.
 `TaskFailedException` wraps the failed task `t`.
 """
 struct TaskFailedException <: Exception
@@ -104,7 +104,9 @@ function show_task_exception(io::IO, t::Task; indent = true)
 end
 
 function show(io::IO, t::Task)
-    print(io, "Task ($(t.state)) @0x$(string(convert(UInt, pointer_from_objref(t)), base = 16, pad = Sys.WORD_SIZE>>2))")
+    state = t.state
+    state_str = "$state" * ((state == :runnable && istaskstarted(t)) ? ", started" : "")
+    print(io, "Task ($state_str) @0x$(string(convert(UInt, pointer_from_objref(t)), base = 16, pad = Sys.WORD_SIZE>>2))")
 end
 
 """
@@ -131,7 +133,8 @@ true
 ```
 """
 macro task(ex)
-    :(Task(()->$(esc(ex))))
+    thunk = Base.replace_linenums!(:(()->$(esc(ex))), __source__)
+    :(Task($thunk))
 end
 
 """
@@ -177,11 +180,20 @@ end
     elseif field === :exception
         # TODO: this field name should be deprecated in 2.0
         return t._isexception ? t.result : nothing
+    elseif field === :scope
+        error("Querying `scope` is disallowed. Use `current_scope` instead.")
     else
         return getfield(t, field)
     end
 end
 
+@inline function setproperty!(t::Task, field::Symbol, @nospecialize(v))
+    if field === :scope
+        istaskstarted(t) && error("Setting scope on a started task directly is disallowed.")
+    end
+    return @invoke setproperty!(t::Any, field::Symbol, v::Any)
+end
+
 """
     istaskdone(t::Task) -> Bool
 
@@ -253,7 +265,7 @@ istaskfailed(t::Task) = (load_state_acquire(t) === task_state_failed)
 Threads.threadid(t::Task) = Int(ccall(:jl_get_task_tid, Int16, (Any,), t)+1)
 function Threads.threadpool(t::Task)
     tpid = ccall(:jl_get_task_threadpoolid, Int8, (Any,), t)
-    return tpid == 0 ? :default : :interactive
+    return Threads._tpid_to_sym(tpid)
 end
 
 task_result(t::Task) = t.result
@@ -302,13 +314,14 @@ end
 # just wait for a task to be done, no error propagation
 function _wait(t::Task)
     if !istaskdone(t)
-        lock(t.donenotify)
+        donenotify = t.donenotify::ThreadSynchronizer
+        lock(donenotify)
         try
             while !istaskdone(t)
-                wait(t.donenotify)
+                wait(donenotify)
             end
         finally
-            unlock(t.donenotify)
+            unlock(donenotify)
         end
     end
     nothing
@@ -317,25 +330,26 @@ end
 # have `waiter` wait for `t`
 function _wait2(t::Task, waiter::Task)
     if !istaskdone(t)
-        lock(t.donenotify)
+        # since _wait2 is similar to schedule, we should observe the sticky
+        # bit, even if we don't call `schedule` with early-return below
+        if waiter.sticky && Threads.threadid(waiter) == 0 && !GC.in_finalizer()
+            # Issue #41324
+            # t.sticky && tid == 0 is a task that needs to be co-scheduled with
+            # the parent task. If the parent (current_task) is not sticky we must
+            # set it to be sticky.
+            # XXX: Ideally we would be able to unset this
+            current_task().sticky = true
+            tid = Threads.threadid()
+            ccall(:jl_set_task_tid, Cint, (Any, Cint), waiter, tid-1)
+        end
+        donenotify = t.donenotify::ThreadSynchronizer
+        lock(donenotify)
         if !istaskdone(t)
-            push!(t.donenotify.waitq, waiter)
-            unlock(t.donenotify)
-            # since _wait2 is similar to schedule, we should observe the sticky
-            # bit, even if we aren't calling `schedule` due to this early-return
-            if waiter.sticky && Threads.threadid(waiter) == 0
-                # Issue #41324
-                # t.sticky && tid == 0 is a task that needs to be co-scheduled with
-                # the parent task. If the parent (current_task) is not sticky we must
-                # set it to be sticky.
-                # XXX: Ideally we would be able to unset this
-                current_task().sticky = true
-                tid = Threads.threadid()
-                ccall(:jl_set_task_tid, Cint, (Any, Cint), waiter, tid-1)
-            end
+            push!(donenotify.waitq, waiter)
+            unlock(donenotify)
             return nothing
         else
-            unlock(t.donenotify)
+            unlock(donenotify)
         end
     end
     schedule(waiter)
@@ -361,8 +375,8 @@ fetch(@nospecialize x) = x
 """
     fetch(t::Task)
 
-Wait for a Task to finish, then return its result value.
-If the task fails with an exception, a `TaskFailedException` (which wraps the failed task)
+Wait for a [`Task`](@ref) to finish, then return its result value.
+If the task fails with an exception, a [`TaskFailedException`](@ref) (which wraps the failed task)
 is thrown.
 """
 function fetch(t::Task)
@@ -424,19 +438,21 @@ function sync_end(c::Channel{Any})
     # Capture all waitable objects scheduled after the end of `@sync` and
     # include them in the exception. This way, the user can check what was
     # scheduled by examining at the exception object.
-    local racy
-    for r in c
-        if !@isdefined(racy)
-            racy = []
+    if isready(c)
+        local racy
+        for r in c
+            if !@isdefined(racy)
+                racy = []
+            end
+            push!(racy, r)
         end
-        push!(racy, r)
-    end
-    if @isdefined(racy)
-        if !@isdefined(c_ex)
-            c_ex = CompositeException()
+        if @isdefined(racy)
+            if !@isdefined(c_ex)
+                c_ex = CompositeException()
+            end
+            # Since this is a clear programming error, show this exception first:
+            pushfirst!(c_ex, ScheduledAfterSyncException(racy))
         end
-        # Since this is a clear programming error, show this exception first:
-        pushfirst!(c_ex, ScheduledAfterSyncException(racy))
     end
 
     if @isdefined(c_ex)
@@ -450,9 +466,23 @@ const sync_varname = gensym(:sync)
 """
     @sync
 
-Wait until all lexically-enclosed uses of `@async`, `@spawn`, `@spawnat` and `@distributed`
+Wait until all lexically-enclosed uses of [`@async`](@ref), [`@spawn`](@ref Threads.@spawn),
+`Distributed.@spawnat` and `Distributed.@distributed`
 are complete. All exceptions thrown by enclosed async operations are collected and thrown as
-a `CompositeException`.
+a [`CompositeException`](@ref).
+
+# Examples
+```julia-repl
+julia> Threads.nthreads()
+4
+
+julia> @sync begin
+           Threads.@spawn println("Thread-id \$(Threads.threadid()), task 1")
+           Threads.@spawn println("Thread-id \$(Threads.threadid()), task 2")
+       end;
+Thread-id 3, task 1
+Thread-id 1, task 2
+```
 """
 macro sync(block)
     var = esc(sync_varname)
@@ -488,15 +518,15 @@ isolating the asynchronous code from changes to the variable's value in the curr
     Interpolating values via `\$` is available as of Julia 1.4.
 """
 macro async(expr)
-    do_async_macro(expr)
+    do_async_macro(expr, __source__)
 end
 
 # generate the code for @async, possibly wrapping the task in something before
 # pushing it to the wait queue.
-function do_async_macro(expr; wrap=identity)
+function do_async_macro(expr, linenums; wrap=identity)
     letargs = Base._lift_one_interp!(expr)
 
-    thunk = esc(:(()->($expr)))
+    thunk = Base.replace_linenums!(:(()->($(esc(expr)))), linenums)
     var = esc(sync_varname)
     quote
         let $(letargs...)
@@ -511,7 +541,7 @@ function do_async_macro(expr; wrap=identity)
 end
 
 # task wrapper that doesn't create exceptions wrapped in TaskFailedException
-struct UnwrapTaskFailedException
+struct UnwrapTaskFailedException <: Exception
     task::Task
 end
 
@@ -536,13 +566,21 @@ fetch(t::UnwrapTaskFailedException) = unwrap_task_failed(fetch, t)
 
 # macro for running async code that doesn't throw wrapped exceptions
 macro async_unwrap(expr)
-    do_async_macro(expr, wrap=task->:(Base.UnwrapTaskFailedException($task)))
+    do_async_macro(expr, __source__, wrap=task->:(Base.UnwrapTaskFailedException($task)))
 end
 
 """
     errormonitor(t::Task)
 
 Print an error log to `stderr` if task `t` fails.
+
+# Examples
+```julia-repl
+julia> Base._wait(errormonitor(Threads.@spawn error("task failed")))
+Unhandled Task ERROR: task failed
+Stacktrace:
+[...]
+```
 """
 function errormonitor(t::Task)
     t2 = Task() do
@@ -665,7 +703,7 @@ end
 
 ## scheduler and work queue
 
-struct IntrusiveLinkedListSynchronized{T}
+mutable struct IntrusiveLinkedListSynchronized{T}
     queue::IntrusiveLinkedList{T}
     lock::Threads.SpinLock
     IntrusiveLinkedListSynchronized{T}() where {T} = new(IntrusiveLinkedList{T}(), Threads.SpinLock())
@@ -727,11 +765,12 @@ function workqueue_for(tid::Int)
         return @inbounds qs[tid]
     end
     # slow path to allocate it
+    @assert tid > 0
     l = Workqueues_lock
     @lock l begin
         qs = Workqueues
         if length(qs) < tid
-            nt = Threads.nthreads()
+            nt = Threads.maxthreadid()
             @assert tid <= nt
             global Workqueues = qs = copyto!(typeof(qs)(undef, length(qs) + nt - 1), qs)
         end
@@ -744,22 +783,41 @@ end
 
 function enq_work(t::Task)
     (t._state === task_state_runnable && t.queue === nothing) || error("schedule: Task not runnable")
-    if t.sticky || Threads.nthreads() == 1
+
+    # Sticky tasks go into their thread's work queue.
+    if t.sticky
         tid = Threads.threadid(t)
         if tid == 0
-            # Issue #41324
-            # t.sticky && tid == 0 is a task that needs to be co-scheduled with
-            # the parent task. If the parent (current_task) is not sticky we must
-            # set it to be sticky.
-            # XXX: Ideally we would be able to unset this
-            current_task().sticky = true
-            tid = Threads.threadid()
-            ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid-1)
+            # The task is not yet stuck to a thread. Stick it to the current
+            # thread and do the same to the parent task (the current task) so
+            # that the tasks are correctly co-scheduled (issue #41324).
+            # XXX: Ideally we would be able to unset this.
+            if GC.in_finalizer()
+                # The task was launched in a finalizer. There is no thread to sticky it
+                # to, so just allow it to run anywhere as if it had been non-sticky.
+                t.sticky = false
+                @goto not_sticky
+            else
+                tid = Threads.threadid()
+                ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid-1)
+                current_task().sticky = true
+            end
         end
         push!(workqueue_for(tid), t)
     else
-        Partr.multiq_insert(t, t.priority)
-        tid = 0
+        @label not_sticky
+        tp = Threads.threadpool(t)
+        if tp === :foreign || Threads.threadpoolsize(tp) == 1
+            # There's only one thread in the task's assigned thread pool;
+            # use its work queue.
+            tid = (tp === :interactive) ? 1 : Threads.threadpoolsize(:interactive)+1
+            ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid-1)
+            push!(workqueue_for(tid), t)
+        else
+            # Otherwise, put the task in the multiqueue.
+            Partr.multiq_insert(t, t.priority)
+            tid = 0
+        end
     end
     ccall(:jl_wakeup_thread, Cvoid, (Int16,), (tid - 1) % Int16)
     return t
@@ -805,7 +863,7 @@ function schedule(t::Task, @nospecialize(arg); error=false)
     # schedule a task to be (re)started with the given value or exception
     t._state === task_state_runnable || Base.error("schedule: Task not runnable")
     if error
-        t.queue === nothing || Base.list_deletefirst!(t.queue, t)
+        t.queue === nothing || Base.list_deletefirst!(t.queue::IntrusiveLinkedList{Task}, t)
         setfield!(t, :result, arg)
         setfield!(t, :_isexception, true)
     else
@@ -829,7 +887,7 @@ function yield()
     try
         wait()
     catch
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
         rethrow()
     end
 end
@@ -924,7 +982,7 @@ function trypoptask(W::StickyWorkqueue)
             # can't throw here, because it's probably not the fault of the caller to wait
             # and don't want to use print() here, because that may try to incur a task switch
             ccall(:jl_safe_printf, Cvoid, (Ptr{UInt8}, Int32...),
-                "\nWARNING: Workqueue inconsistency detected: popfirst!(Workqueue).state != :runnable\n")
+                "\nWARNING: Workqueue inconsistency detected: popfirst!(Workqueue).state !== :runnable\n")
             continue
         end
         return t
diff --git a/base/terminfo.jl b/base/terminfo.jl
new file mode 100644
index 0000000000000..ff7e6fab7f1f7
--- /dev/null
+++ b/base/terminfo.jl
@@ -0,0 +1,306 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+include("terminfo_data.jl")
+
+"""
+    struct TermInfoRaw
+
+A structured representation of a terminfo file, without any knowledge of
+particular capabilities, solely based on `term(5)`.
+
+!!! warning
+  This is not part of the public API, and thus subject to change without notice.
+
+# Fields
+
+- `names::Vector{String}`: The names this terminal is known by.
+- `flags::BitVector`: A list of 0–$(length(TERM_FLAGS)) flag values.
+- `numbers::Union{Vector{UInt16}, Vector{UInt32}}`: A list of 0–$(length(TERM_NUMBERS))
+  number values. A value of `typemax(eltype(numbers))` is used to skip over
+  unspecified capabilities while ensuring value indices are correct.
+- `strings::Vector{Union{String, Nothing}}`: A list of 0–$(length(TERM_STRINGS))
+  string values. A value of `nothing` is used to skip over unspecified
+  capabilities while ensuring value indices are correct.
+- `extended::Union{Nothing, Dict{Symbol, Union{Bool, Int, String}}}`: Should an
+  extended info section exist, this gives the entire extended info as a
+  dictionary. Otherwise `nothing`.
+
+See also: `TermInfo` and `TermCapability`.
+"""
+struct TermInfoRaw
+    names::Vector{String}
+    flags::BitVector
+    numbers::Union{Vector{UInt16}, Vector{UInt32}}
+    strings::Vector{Union{String, Nothing}}
+    extended::Union{Nothing, Dict{Symbol, Union{Bool, Int, String}}}
+end
+
+"""
+    struct TermInfo
+
+A parsed terminfo paired with capability information.
+
+!!! warning
+  This is not part of the public API, and thus subject to change without notice.
+
+# Fields
+
+- `names::Vector{String}`: The names this terminal is known by.
+- `flags::Int`: The number of flags specified.
+- `numbers::BitVector`: A mask indicating which of `TERM_NUMBERS` have been
+  specified.
+- `strings::BitVector`: A mask indicating which of `TERM_STRINGS` have been
+  specified.
+- `extensions::Vector{Symbol}`: A list of extended capability variable names.
+- `capabilities::Dict{Symbol, Union{Bool, Int, String}}`: The capability values
+  themselves.
+
+See also: `TermInfoRaw` and `TermCapability`.
+"""
+struct TermInfo
+    names::Vector{String}
+    flags::Int
+    numbers::BitVector
+    strings::BitVector
+    extensions::Vector{Symbol}
+    capabilities::Dict{Symbol, Union{Bool, Int, String}}
+end
+
+TermInfo() = TermInfo([], 0, [], [], [], Dict())
+
+function read(data::IO, ::Type{TermInfoRaw})
+    # Parse according to `term(5)`
+    # Header
+    magic = read(data, UInt16) |> ltoh
+    NumInt = if magic == 0o0432
+        UInt16
+    elseif magic == 0o01036
+        UInt32
+    else
+        throw(ArgumentError("Terminfo data did not start with the magic number 0o0432 or 0o01036"))
+    end
+    name_bytes = read(data, UInt16) |> ltoh
+    flag_bytes = read(data, UInt16) |> ltoh
+    numbers_count = read(data, UInt16) |> ltoh
+    string_count = read(data, UInt16) |> ltoh
+    table_bytes = read(data, UInt16) |> ltoh
+    # Terminal Names
+    term_names = split(String(read(data, name_bytes - 1)), '|') .|> String
+    0x00 == read(data, UInt8) ||
+        throw(ArgumentError("Terminfo data did not contain a null byte after the terminal names section"))
+    # Boolean Flags
+    flags = read(data, flag_bytes) .== 0x01
+    if position(data) % 2 != 0
+        0x00 == read(data, UInt8) ||
+            throw(ArgumentError("Terminfo did not contain a null byte after the flag section, expected to position the start of the numbers section on an even byte"))
+    end
+    # Numbers, Strings, Table
+    numbers = map(ltoh, reinterpret(NumInt, read(data, numbers_count * sizeof(NumInt))))
+    string_indices = map(ltoh, reinterpret(UInt16, read(data, string_count * sizeof(UInt16))))
+    strings_table = read(data, table_bytes)
+    strings = map(string_indices) do idx
+        if idx ∉ (0xffff, 0xfffe)
+            len = findfirst(==(0x00), view(strings_table, 1+idx:length(strings_table)))
+            !isnothing(len) ||
+                throw(ArgumentError("Terminfo string table entry does not terminate with a null byte"))
+            String(strings_table[1+idx:idx+len-1])
+        end
+    end
+    TermInfoRaw(term_names, flags, numbers, strings,
+                if !eof(data) extendedterminfo(data, NumInt) end)
+end
+
+"""
+    extendedterminfo(data::IO; NumInt::Union{Type{UInt16}, Type{UInt32}})
+
+Read an extended terminfo section from `data`, with `NumInt` as the numbers type.
+
+This will accept any terminfo content that conforms with `term(5)`.
+
+See also: `read(::IO, ::Type{TermInfoRaw})`
+"""
+function extendedterminfo(data::IO, NumInt::Union{Type{UInt16}, Type{UInt32}})
+    # Extended info
+    if position(data) % 2 != 0
+        0x00 == read(data, UInt8) ||
+            throw(ArgumentError("Terminfo did not contain a null byte before the extended section, expected to position the start on an even byte"))
+    end
+    # Extended header
+    flag_bytes = read(data, UInt16) |> ltoh
+    numbers_count = read(data, UInt16) |> ltoh
+    string_count = read(data, UInt16) |> ltoh
+    table_count = read(data, UInt16) |> ltoh
+    table_bytes = read(data, UInt16) |> ltoh
+    # Extended flags/numbers/strings
+    flags = read(data, flag_bytes) .== 0x01
+    if flag_bytes % 2 != 0
+        0x00 == read(data, UInt8) ||
+            throw(ArgumentError("Terminfo did not contain a null byte after the extended flag section, expected to position the start of the numbers section on an even byte"))
+    end
+    numbers = map(n -> Int(ltoh(n)), reinterpret(NumInt, read(data, numbers_count * sizeof(NumInt))))
+    table_indices = map(ltoh, reinterpret(UInt16, read(data, table_count * sizeof(UInt16))))
+    table_strings = [String(readuntil(data, 0x00)) for _ in 1:length(table_indices)]
+    info = Dict{Symbol, Union{Bool, Int, String}}()
+    strings = table_strings[1:string_count]
+    labels = table_strings[string_count+1:end]
+    for (label, val) in zip(labels, vcat(flags, numbers, strings))
+        info[Symbol(label)] = val
+    end
+    return info
+end
+
+"""
+    TermInfo(raw::TermInfoRaw)
+
+Construct a `TermInfo` from `raw`, using known terminal capabilities (as of
+NCurses 6.3, see `TERM_FLAGS`, `TERM_NUMBERS`, and `TERM_STRINGS`).
+"""
+function TermInfo(raw::TermInfoRaw)
+    capabilities = Dict{Symbol, Union{Bool, Int, String}}()
+    sizehint!(capabilities, 2 * (length(raw.flags) + length(raw.numbers) + length(raw.strings)))
+    for (flag, value) in zip(TERM_FLAGS, raw.flags)
+        capabilities[flag.short] = value
+        capabilities[flag.long] = value
+    end
+    for (num, value) in zip(TERM_NUMBERS, raw.numbers)
+        if value != typemax(eltype(raw.numbers))
+            capabilities[num.short] = Int(value)
+            capabilities[num.long] = Int(value)
+        end
+    end
+    for (str, value) in zip(TERM_STRINGS, raw.strings)
+        if !isnothing(value)
+            capabilities[str.short] = value
+            capabilities[str.long] = value
+        end
+    end
+    extensions = if !isnothing(raw.extended)
+        capabilities = merge(capabilities, raw.extended)
+        keys(raw.extended) |> collect
+    else
+        Symbol[]
+    end
+    TermInfo(raw.names, length(raw.flags),
+             map(n-> n != typemax(typeof(n)), raw.numbers),
+             map(!isnothing, raw.strings),
+             extensions, capabilities)
+end
+
+getindex(ti::TermInfo, key::Symbol) = ti.capabilities[key]
+get(ti::TermInfo, key::Symbol, default::D) where D<:Union{Bool, Int, String} =
+    get(ti.capabilities, key, default)::D
+get(ti::TermInfo, key::Symbol, default) = get(ti.capabilities, key, default)
+keys(ti::TermInfo) = keys(ti.capabilities)
+haskey(ti::TermInfo, key::Symbol) = haskey(ti.capabilities, key)
+
+function show(io::IO, ::MIME"text/plain", ti::TermInfo)
+    print(io, "TermInfo(", ti.names, "; ", ti.flags, " flags, ",
+          sum(ti.numbers), " numbers, ", sum(ti.strings), " strings")
+    !isempty(ti.extensions) > 0 &&
+        print(io, ", ", length(ti.extensions), " extended capabilities")
+    print(io, ')')
+end
+
+"""
+    find_terminfo_file(term::String)
+
+Locate the terminfo file for `term`, return `nothing` if none could be found.
+
+The lookup policy is described in `terminfo(5)` "Fetching Compiled
+Descriptions".
+"""
+function find_terminfo_file(term::String)
+    isempty(term) && return
+    chr, chrcode = string(first(term)), string(Int(first(term)), base=16)
+    terminfo_dirs = if haskey(ENV, "TERMINFO")
+        [ENV["TERMINFO"]]
+    elseif isdir(joinpath(homedir(), ".terminfo"))
+        [joinpath(homedir(), ".terminfo")]
+    elseif haskey(ENV, "TERMINFO_DIRS")
+        split(ENV["TERMINFO_DIRS"], ':')
+    elseif Sys.isunix()
+        ["/usr/share/terminfo"]
+    else
+        String[]
+    end
+    for dir in terminfo_dirs
+        if isfile(joinpath(dir, chr, term))
+            return joinpath(dir, chr, term)
+        elseif isfile(joinpath(dir, chrcode, term))
+            return joinpath(dir, chrcode, term)
+        end
+    end
+end
+
+"""
+    load_terminfo(term::String)
+
+Load the `TermInfo` for `term`, falling back on a blank `TermInfo`.
+"""
+function load_terminfo(term::String)
+    file = find_terminfo_file(term)
+    isnothing(file) && return TermInfo()
+    try
+        TermInfo(read(file, TermInfoRaw))
+    catch err
+        if err isa ArgumentError || err isa IOError
+            TermInfo()
+        else
+            rethrow()
+        end
+    end
+end
+
+"""
+The terminfo of the current terminal.
+"""
+current_terminfo::TermInfo = TermInfo()
+
+# Legacy/TTY methods and the `:color` parameter
+
+if Sys.iswindows()
+    ttyhascolor(term_type = nothing) = true
+else
+    function ttyhascolor(term_type = get(ENV, "TERM", ""))
+        startswith(term_type, "xterm") ||
+            haskey(current_terminfo, :setaf)
+    end
+end
+
+"""
+    ttyhastruecolor()
+
+Return a boolean signifying whether the current terminal supports 24-bit colors.
+
+This uses the `COLORTERM` environment variable if possible, returning true if it
+is set to either `"truecolor"` or `"24bit"`.
+
+As a fallback, first on unix systems the `colors` terminal capability is checked
+— should more than 256 colors be reported, this is taken to signify 24-bit
+support.
+"""
+function ttyhastruecolor()
+    get(ENV, "COLORTERM", "") ∈ ("truecolor", "24bit") ||
+        @static if Sys.isunix()
+            get(current_terminfo, :colors, 0) > 256
+        else
+            false
+        end
+end
+
+function get_have_color()
+    global have_color
+    have_color === nothing && (have_color = ttyhascolor())
+    return have_color::Bool
+end
+
+function get_have_truecolor()
+    global have_truecolor
+    have_truecolor === nothing && (have_truecolor = ttyhastruecolor())
+    return have_truecolor::Bool
+end
+
+in(key_value::Pair{Symbol,Bool}, ::TTY) = key_value.first === :color && key_value.second === get_have_color()
+haskey(::TTY, key::Symbol) = key === :color
+getindex(::TTY, key::Symbol) = key === :color ? get_have_color() : throw(KeyError(key))
+get(::TTY, key::Symbol, default) = key === :color ? get_have_color() : default
diff --git a/base/terminfo_data.jl b/base/terminfo_data.jl
new file mode 100644
index 0000000000000..38c058f414f07
--- /dev/null
+++ b/base/terminfo_data.jl
@@ -0,0 +1,540 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+    struct TermCapability
+
+Specification of a single terminal capability.
+
+!!! warning
+  This is not part of the public API, and thus subject to change without notice.
+
+# Fields
+
+- `short::Symbol`: The *Cap-name* of the capability
+- `long::Symbol`: The name of the terminfo capability variable
+- `description::String`: A description of the purpose of the capability
+
+See also: `TermInfo`, `TERM_FLAGS`, `TERM_NUMBERS`, and `TERM_STRINGS`.
+"""
+struct TermCapability
+    short::Symbol
+    long::Symbol
+    description::String
+end
+
+# Terminfo Capabilities as of NCurses 6.3
+
+"""
+Ordered list of known terminal capability flag fields, as of NCurses 6.3.
+"""
+const TERM_FLAGS = [
+    TermCapability(:bw,    :auto_left_margin,         "cub1 wraps from column 0 to last column"),
+    TermCapability(:am,    :auto_right_margin,        "terminal has automatic margins"),
+    TermCapability(:xsb,   :no_esc_ctlc,              "beehive (f1=escape, f2=ctrl C)"),
+    TermCapability(:xhp,   :ceol_standout_glitch,     "standout not erased by overwriting (hp)"),
+    TermCapability(:xenl,  :eat_newline_glitch,       "newline ignored after 80 cols (concept)"),
+    TermCapability(:eo,    :erase_overstrike,         "can erase overstrikes with a blank"),
+    TermCapability(:gn,    :generic_type,             "generic line type"),
+    TermCapability(:hc,    :hard_copy,                "hardcopy terminal"),
+    TermCapability(:km,    :has_meta_key,             "Has a meta key (i.e., sets 8th-bit)"),
+    TermCapability(:hs,    :has_status_line,          "has extra status line"),
+    TermCapability(:in,    :insert_null_glitch,       "insert mode distinguishes nulls"),
+    TermCapability(:db,    :memory_below,             "display may be retained below the screen"),
+    TermCapability(:da,    :memory_above,             "display may be retained above the screen"),
+    TermCapability(:mir,   :move_insert_mode,         "safe to move while in insert mode"),
+    TermCapability(:msgr,  :move_standout_mode,       "safe to move while in standout mode"),
+    TermCapability(:os,    :over_strike,              "terminal can overstrike"),
+    TermCapability(:eslok, :status_line_esc_ok,       "escape can be used on the status line"),
+    TermCapability(:xt,    :dest_tabs_magic_smso,     "tabs destructive, magic so char (t1061)"),
+    TermCapability(:hz,    :tilde_glitch,             "cannot print ~'s (Hazeltine)"),
+    TermCapability(:ul,    :transparent_underline,    "underline character overstrikes"),
+    TermCapability(:xon,   :xon_xoff,                 "terminal uses xon/xoff handshaking"),
+    TermCapability(:nxon,  :needs_xon_xoff,           "padding will not work, xon/xoff required"),
+    TermCapability(:mc5i,  :prtr_silent,              "printer will not echo on screen"),
+    TermCapability(:chts,  :hard_cursor,              "cursor is hard to see"),
+    TermCapability(:nrrmc, :non_rev_rmcup,            "smcup does not reverse rmcup"),
+    TermCapability(:npc,   :no_pad_char,              "pad character does not exist"),
+    TermCapability(:ndscr, :non_dest_scroll_region,   "scrolling region is non-destructive"),
+    TermCapability(:ccc,   :can_change,               "terminal can re-define existing colors"),
+    TermCapability(:bce,   :back_color_erase,         "screen erased with background color"),
+    TermCapability(:hls,   :hue_lightness_saturation, "terminal uses only HLS color notation (Tektronix)"),
+    TermCapability(:xhpa,  :col_addr_glitch,          "only positive motion for hpa/mhpa caps"),
+    TermCapability(:crxm,  :cr_cancels_micro_mode,    "using cr turns off micro mode"),
+    TermCapability(:daisy, :has_print_wheel,          "printer needs operator to change character set"),
+    TermCapability(:xvpa,  :row_addr_glitch,          "only positive motion for vpa/mvpa caps"),
+    TermCapability(:sam,   :semi_auto_right_margin,   "printing in last column causes cr"),
+    TermCapability(:cpix,  :cpi_changes_res,          "changing character pitch changes resolution"),
+    TermCapability(:lpix,  :lpi_changes_res,          "changing line pitch changes resolution"),
+    TermCapability(:OTbs,  :backspaces_with_bs,       "uses ^H to move left"),
+    TermCapability(:OTns,  :crt_no_scrolling,         "crt cannot scroll"),
+    TermCapability(:OTnc,  :no_correctly_working_cr,  "no way to go to start of line"),
+    TermCapability(:OTMT,  :gnu_has_meta_key,         "has meta key"),
+    TermCapability(:OTNL,  :linefeed_is_newline,      "move down with \n"),
+    TermCapability(:OTpt,  :has_hardware_tabs,        "has 8-char tabs invoked with ^I"),
+    TermCapability(:OTxr,  :return_does_clr_eol,      "return clears the line"),
+]
+
+"""
+Ordered list of known terminal capability number fields, as of NCurses 6.3.
+"""
+const TERM_NUMBERS = [
+    TermCapability(:cols,   :columns,                 "number of columns in a line"),
+    TermCapability(:it,     :init_tabs,               "tabs initially every # spaces"),
+    TermCapability(:lines,  :lines,                   "number of lines on screen or page"),
+    TermCapability(:lm,     :lines_of_memory,         "lines of memory if > line. 0 means varies"),
+    TermCapability(:xmc,    :magic_cookie_glitch,     "number of blank characters left by smso or rmso"),
+    TermCapability(:pb,     :padding_baud_rate,       "lowest baud rate where padding needed"),
+    TermCapability(:vt,     :virtual_terminal,        "virtual terminal number (CB/unix)"),
+    TermCapability(:wsl,    :width_status_line,       "number of columns in status line"),
+    TermCapability(:nlab,   :num_labels,              "number of labels on screen"),
+    TermCapability(:lh,     :label_height,            "rows in each label"),
+    TermCapability(:lw,     :label_width,             "columns in each label"),
+    TermCapability(:ma,     :max_attributes,          "maximum combined attributes terminal can handle"),
+    TermCapability(:wnum,   :maximum_windows,         "maximum number of definable windows"),
+    TermCapability(:colors, :max_colors,              "maximum number of colors on screen"),
+    TermCapability(:pairs,  :max_pairs,               "maximum number of color-pairs on the screen"),
+    TermCapability(:ncv,    :no_color_video,          "video attributes that cannot be used with colors"),
+    TermCapability(:bufsz,  :buffer_capacity,         "numbers of bytes buffered before printing"),
+    TermCapability(:spinv,  :dot_vert_spacing,        "spacing of pins vertically in pins per inch"),
+    TermCapability(:spinh,  :dot_horz_spacing,        "spacing of dots horizontally in dots per inch"),
+    TermCapability(:maddr,  :max_micro_address,       "maximum value in micro_..._address"),
+    TermCapability(:mjump,  :max_micro_jump,          "maximum value in parm_..._micro"),
+    TermCapability(:mcs,    :micro_col_size,          "character step size when in micro mode"),
+    TermCapability(:mls,    :micro_line_size,         "line step size when in micro mode"),
+    TermCapability(:npins,  :number_of_pins,          "numbers of pins in print-head"),
+    TermCapability(:orc,    :output_res_char,         "horizontal resolution in units per line"),
+    TermCapability(:orl,    :output_res_line,         "vertical resolution in units per line"),
+    TermCapability(:orhi,   :output_res_horz_inch,    "horizontal resolution in units per inch"),
+    TermCapability(:orvi,   :output_res_vert_inch,    "vertical resolution in units per inch"),
+    TermCapability(:cps,    :print_rate,              "print rate in characters per second"),
+    TermCapability(:widcs,  :wide_char_size,          "character step size when in double wide mode"),
+    TermCapability(:btns,   :buttons,                 "number of buttons on mouse"),
+    TermCapability(:bitwin, :bit_image_entwining,     "number of passes for each bit-image row"),
+    TermCapability(:bitype, :bit_image_type,          "type of bit-image device"),
+    TermCapability(:OTug,   :magic_cookie_glitch_ul,  "number of blanks left by ul"),
+    TermCapability(:OTdC,   :carriage_return_delay,   "pad needed for CR"),
+    TermCapability(:OTdN,   :new_line_delay,          "pad needed for LF"),
+    TermCapability(:OTdB,   :backspace_delay,         "padding required for ^H"),
+    TermCapability(:OTdT,   :horizontal_tab_delay,    "padding required for ^I"),
+    TermCapability(:OTkn,   :number_of_function_keys, "count of function keys"),
+]
+
+"""
+Ordered list of known terminal capability string fields, as of NCurses 6.3.
+"""
+const TERM_STRINGS = [
+    TermCapability(:cbt,      :back_tab,                  "back tab (P)"),
+    TermCapability(:bel,      :bell,                      "audible signal (bell) (P)"),
+    TermCapability(:cr,       :carriage_return,           "carriage return (P*) (P*)"),
+    TermCapability(:csr,      :change_scroll_region,      "change region to line #1 to line #2 (P)"),
+    TermCapability(:tbc,      :clear_all_tabs,            "clear all tab stops (P)"),
+    TermCapability(:clear,    :clear_screen,              "clear screen and home cursor (P*)"),
+    TermCapability(:el,       :clr_eol,                   "clear to end of line (P)"),
+    TermCapability(:ed,       :clr_eos,                   "clear to end of screen (P*)"),
+    TermCapability(:hpa,      :column_address,            "horizontal position #1, absolute (P)"),
+    TermCapability(:cmdch,    :command_character,         "terminal settable cmd character in prototype !?"),
+    TermCapability(:cup,      :cursor_address,            "move to row #1 columns #2"),
+    TermCapability(:cud1,     :cursor_down,               "down one line"),
+    TermCapability(:home,     :cursor_home,               "home cursor (if no cup)"),
+    TermCapability(:civis,    :cursor_invisible,          "make cursor invisible"),
+    TermCapability(:cub1,     :cursor_left,               "move left one space"),
+    TermCapability(:mrcup,    :cursor_mem_address,        "memory relative cursor addressing, move to row #1 columns #2"),
+    TermCapability(:cnorm,    :cursor_normal,             "make cursor appear normal (undo civis/cvvis)"),
+    TermCapability(:cuf1,     :cursor_right,              "non-destructive space (move right one space)"),
+    TermCapability(:ll,       :cursor_to_ll,              "last line, first column (if no cup)"),
+    TermCapability(:cuu1,     :cursor_up,                 "up one line"),
+    TermCapability(:cvvis,    :cursor_visible,            "make cursor very visible"),
+    TermCapability(:dch1,     :delete_character,          "delete character (P*)"),
+    TermCapability(:dl1,      :delete_line,               "delete line (P*)"),
+    TermCapability(:dsl,      :dis_status_line,           "disable status line"),
+    TermCapability(:hd,       :down_half_line,            "half a line down"),
+    TermCapability(:smacs,    :enter_alt_charset_mode,    "start alternate character set (P)"),
+    TermCapability(:blink,    :enter_blink_mode,          "turn on blinking"),
+    TermCapability(:bold,     :enter_bold_mode,           "turn on bold (extra bright) mode"),
+    TermCapability(:smcup,    :enter_ca_mode,             "string to start programs using cup"),
+    TermCapability(:smdc,     :enter_delete_mode,         "enter delete mode"),
+    TermCapability(:dim,      :enter_dim_mode,            "turn on half-bright mode"),
+    TermCapability(:smir,     :enter_insert_mode,         "enter insert mode"),
+    TermCapability(:invis,    :enter_secure_mode,         "turn on blank mode (characters invisible)"),
+    TermCapability(:prot,     :enter_protected_mode,      "turn on protected mode"),
+    TermCapability(:rev,      :enter_reverse_mode,        "turn on reverse video mode"),
+    TermCapability(:smso,     :enter_standout_mode,       "begin standout mode"),
+    TermCapability(:smul,     :enter_underline_mode,      "begin underline mode"),
+    TermCapability(:ech,      :erase_chars,               "erase #1 characters (P)"),
+    TermCapability(:rmacs,    :exit_alt_charset_mode,     "end alternate character set (P)"),
+    TermCapability(:sgr0,     :exit_attribute_mode,       "turn off all attributes"),
+    TermCapability(:rmcup,    :exit_ca_mode,              "strings to end programs using cup"),
+    TermCapability(:rmdc,     :exit_delete_mode,          "end delete mode"),
+    TermCapability(:rmir,     :exit_insert_mode,          "exit insert mode"),
+    TermCapability(:rmso,     :exit_standout_mode,        "exit standout mode"),
+    TermCapability(:rmul,     :exit_underline_mode,       "exit underline mode"),
+    TermCapability(:flash,    :flash_screen,              "visible bell (may not move cursor)"),
+    TermCapability(:ff,       :form_feed,                 "hardcopy terminal page eject (P*)"),
+    TermCapability(:fsl,      :from_status_line,          "return from status line"),
+    TermCapability(:is1,      :init_1string,              "initialization string"),
+    TermCapability(:is2,      :init_2string,              "initialization string"),
+    TermCapability(:is3,      :init_3string,              "initialization string"),
+    TermCapability(:if,       :init_file,                 "name of initialization file"),
+    TermCapability(:ich1,     :insert_character,          "insert character (P)"),
+    TermCapability(:il1,      :insert_line,               "insert line (P*)"),
+    TermCapability(:ip,       :insert_padding,            "insert padding after inserted character"),
+    TermCapability(:kbs,      :key_backspace,             "backspace key"),
+    TermCapability(:ktbc,     :key_catab,                 "clear-all-tabs key"),
+    TermCapability(:kclr,     :key_clear,                 "clear-screen or erase key"),
+    TermCapability(:kctab,    :key_ctab,                  "clear-tab key"),
+    TermCapability(:kdch1,    :key_dc,                    "delete-character key"),
+    TermCapability(:kdl1,     :key_dl,                    "delete-line key"),
+    TermCapability(:kcud1,    :key_down,                  "down-arrow key"),
+    TermCapability(:krmir,    :key_eic,                   "sent by rmir or smir in insert mode"),
+    TermCapability(:kel,      :key_eol,                   "clear-to-end-of-line key"),
+    TermCapability(:ked,      :key_eos,                   "clear-to-end-of-screen key"),
+    TermCapability(:kf0,      :key_f0,                    "F0 function key"),
+    TermCapability(:kf1,      :key_f1,                    "F1 function key"),
+    TermCapability(:kf10,     :key_f10,                   "F10 function key"),
+    TermCapability(:kf2,      :key_f2,                    "F2 function key"),
+    TermCapability(:kf3,      :key_f3,                    "F3 function key"),
+    TermCapability(:kf4,      :key_f4,                    "F4 function key"),
+    TermCapability(:kf5,      :key_f5,                    "F5 function key"),
+    TermCapability(:kf6,      :key_f6,                    "F6 function key"),
+    TermCapability(:kf7,      :key_f7,                    "F7 function key"),
+    TermCapability(:kf8,      :key_f8,                    "F8 function key"),
+    TermCapability(:kf9,      :key_f9,                    "F9 function key"),
+    TermCapability(:khome,    :key_home,                  "home key"),
+    TermCapability(:kich1,    :key_ic,                    "insert-character key"),
+    TermCapability(:kil1,     :key_il,                    "insert-line key"),
+    TermCapability(:kcub1,    :key_left,                  "left-arrow key"),
+    TermCapability(:kll,      :key_ll,                    "lower-left key (home down)"),
+    TermCapability(:knp,      :key_npage,                 "next-page key"),
+    TermCapability(:kpp,      :key_ppage,                 "previous-page key"),
+    TermCapability(:kcuf1,    :key_right,                 "right-arrow key"),
+    TermCapability(:kind,     :key_sf,                    "scroll-forward key"),
+    TermCapability(:kri,      :key_sr,                    "scroll-backward key"),
+    TermCapability(:khts,     :key_stab,                  "set-tab key"),
+    TermCapability(:kcuu1,    :key_up,                    "up-arrow key"),
+    TermCapability(:rmkx,     :keypad_local,              "leave 'keyboard_transmit' mode"),
+    TermCapability(:smkx,     :keypad_xmit,               "enter 'keyboard_transmit' mode"),
+    TermCapability(:lf0,      :lab_f0,                    "label on function key f0 if not f0"),
+    TermCapability(:lf1,      :lab_f1,                    "label on function key f1 if not f1"),
+    TermCapability(:lf10,     :lab_f10,                   "label on function key f10 if not f10"),
+    TermCapability(:lf2,      :lab_f2,                    "label on function key f2 if not f2"),
+    TermCapability(:lf3,      :lab_f3,                    "label on function key f3 if not f3"),
+    TermCapability(:lf4,      :lab_f4,                    "label on function key f4 if not f4"),
+    TermCapability(:lf5,      :lab_f5,                    "label on function key f5 if not f5"),
+    TermCapability(:lf6,      :lab_f6,                    "label on function key f6 if not f6"),
+    TermCapability(:lf7,      :lab_f7,                    "label on function key f7 if not f7"),
+    TermCapability(:lf8,      :lab_f8,                    "label on function key f8 if not f8"),
+    TermCapability(:lf9,      :lab_f9,                    "label on function key f9 if not f9"),
+    TermCapability(:rmm,      :meta_off,                  "turn off meta mode"),
+    TermCapability(:smm,      :meta_on,                   "turn on meta mode (8th-bit on)"),
+    TermCapability(:nel,      :newline,                   "newline (behave like cr followed by lf)"),
+    TermCapability(:pad,      :pad_char,                  "padding char (instead of null)"),
+    TermCapability(:dch,      :parm_dch,                  "delete #1 characters (P*)"),
+    TermCapability(:dl,       :parm_delete_line,          "delete #1 lines (P*)"),
+    TermCapability(:cud,      :parm_down_cursor,          "down #1 lines (P*)"),
+    TermCapability(:ich,      :parm_ich,                  "insert #1 characters (P*)"),
+    TermCapability(:indn,     :parm_index,                "scroll forward #1 lines (P)"),
+    TermCapability(:il,       :parm_insert_line,          "insert #1 lines (P*)"),
+    TermCapability(:cub,      :parm_left_cursor,          "move #1 characters to the left (P)"),
+    TermCapability(:cuf,      :parm_right_cursor,         "move #1 characters to the right (P*)"),
+    TermCapability(:rin,      :parm_rindex,               "scroll back #1 lines (P)"),
+    TermCapability(:cuu,      :parm_up_cursor,            "up #1 lines (P*)"),
+    TermCapability(:pfkey,    :pkey_key,                  "program function key #1 to type string #2"),
+    TermCapability(:pfloc,    :pkey_local,                "program function key #1 to execute string #2"),
+    TermCapability(:pfx,      :pkey_xmit,                 "program function key #1 to transmit string #2"),
+    TermCapability(:mc0,      :print_screen,              "print contents of screen"),
+    TermCapability(:mc4,      :prtr_off,                  "turn off printer"),
+    TermCapability(:mc5,      :prtr_on,                   "turn on printer"),
+    TermCapability(:rep,      :repeat_char,               "repeat char #1 #2 times (P*)"),
+    TermCapability(:rs1,      :reset_1string,             "reset string"),
+    TermCapability(:rs2,      :reset_2string,             "reset string"),
+    TermCapability(:rs3,      :reset_3string,             "reset string"),
+    TermCapability(:rf,       :reset_file,                "name of reset file"),
+    TermCapability(:rc,       :restore_cursor,            "restore cursor to position of last save_cursor"),
+    TermCapability(:vpa,      :row_address,               "vertical position #1 absolute (P)"),
+    TermCapability(:sc,       :save_cursor,               "save current cursor position (P)"),
+    TermCapability(:ind,      :scroll_forward,            "scroll text up (P)"),
+    TermCapability(:ri,       :scroll_reverse,            "scroll text down (P)"),
+    TermCapability(:sgr,      :set_attributes,            "define video attributes #1-#9 (PG9)"),
+    TermCapability(:hts,      :set_tab,                   "set a tab in every row, current columns"),
+    TermCapability(:wind,     :set_window,                "current window is lines #1-#2 cols #3-#4"),
+    TermCapability(:ht,       :tab,                       "tab to next 8-space hardware tab stop"),
+    TermCapability(:tsl,      :to_status_line,            "move to status line, column #1"),
+    TermCapability(:uc,       :underline_char,            "underline char and move past it"),
+    TermCapability(:hu,       :up_half_line,              "half a line up"),
+    TermCapability(:iprog,    :init_prog,                 "path name of program for initialization"),
+    TermCapability(:ka1,      :key_a1,                    "upper left of keypad"),
+    TermCapability(:ka3,      :key_a3,                    "upper right of keypad"),
+    TermCapability(:kb2,      :key_b2,                    "center of keypad"),
+    TermCapability(:kc1,      :key_c1,                    "lower left of keypad"),
+    TermCapability(:kc3,      :key_c3,                    "lower right of keypad"),
+    TermCapability(:mc5p,     :prtr_non,                  "turn on printer for #1 bytes"),
+    TermCapability(:rmp,      :char_padding,              "like ip but when in insert mode"),
+    TermCapability(:acsc,     :acs_chars,                 "graphics charset pairs, based on vt100"),
+    TermCapability(:pln,      :plab_norm,                 "program label #1 to show string #2"),
+    TermCapability(:kcbt,     :key_btab,                  "back-tab key"),
+    TermCapability(:smxon,    :enter_xon_mode,            "turn on xon/xoff handshaking"),
+    TermCapability(:rmxon,    :exit_xon_mode,             "turn off xon/xoff handshaking"),
+    TermCapability(:smam,     :enter_am_mode,             "turn on automatic margins"),
+    TermCapability(:rmam,     :exit_am_mode,              "turn off automatic margins"),
+    TermCapability(:xonc,     :xon_character,             "XON character"),
+    TermCapability(:xoffc,    :xoff_character,            "XOFF character"),
+    TermCapability(:enacs,    :ena_acs,                   "enable alternate char set"),
+    TermCapability(:smln,     :label_on,                  "turn on soft labels"),
+    TermCapability(:rmln,     :label_off,                 "turn off soft labels"),
+    TermCapability(:kbeg,     :key_beg,                   "begin key"),
+    TermCapability(:kcan,     :key_cancel,                "cancel key"),
+    TermCapability(:kclo,     :key_close,                 "close key"),
+    TermCapability(:kcmd,     :key_command,               "command key"),
+    TermCapability(:kcpy,     :key_copy,                  "copy key"),
+    TermCapability(:kcrt,     :key_create,                "create key"),
+    TermCapability(:kend,     :key_end,                   "end key"),
+    TermCapability(:kent,     :key_enter,                 "enter/send key"),
+    TermCapability(:kext,     :key_exit,                  "exit key"),
+    TermCapability(:kfnd,     :key_find,                  "find key"),
+    TermCapability(:khlp,     :key_help,                  "help key"),
+    TermCapability(:kmrk,     :key_mark,                  "mark key"),
+    TermCapability(:kmsg,     :key_message,               "message key"),
+    TermCapability(:kmov,     :key_move,                  "move key"),
+    TermCapability(:knxt,     :key_next,                  "next key"),
+    TermCapability(:kopn,     :key_open,                  "open key"),
+    TermCapability(:kopt,     :key_options,               "options key"),
+    TermCapability(:kprv,     :key_previous,              "previous key"),
+    TermCapability(:kprt,     :key_print,                 "print key"),
+    TermCapability(:krdo,     :key_redo,                  "redo key"),
+    TermCapability(:kref,     :key_reference,             "reference key"),
+    TermCapability(:krfr,     :key_refresh,               "refresh key"),
+    TermCapability(:krpl,     :key_replace,               "replace key"),
+    TermCapability(:krst,     :key_restart,               "restart key"),
+    TermCapability(:kres,     :key_resume,                "resume key"),
+    TermCapability(:ksav,     :key_save,                  "save key"),
+    TermCapability(:kspd,     :key_suspend,               "suspend key"),
+    TermCapability(:kund,     :key_undo,                  "undo key"),
+    TermCapability(:kBEG,     :key_sbeg,                  "shifted begin key"),
+    TermCapability(:kCAN,     :key_scancel,               "shifted cancel key"),
+    TermCapability(:kCMD,     :key_scommand,              "shifted command key"),
+    TermCapability(:kCPY,     :key_scopy,                 "shifted copy key"),
+    TermCapability(:kCRT,     :key_screate,               "shifted create key"),
+    TermCapability(:kDC,      :key_sdc,                   "shifted delete-character key"),
+    TermCapability(:kDL,      :key_sdl,                   "shifted delete-line key"),
+    TermCapability(:kslt,     :key_select,                "select key"),
+    TermCapability(:kEND,     :key_send,                  "shifted end key"),
+    TermCapability(:kEOL,     :key_seol,                  "shifted clear-to-end-of-line key"),
+    TermCapability(:kEXT,     :key_sexit,                 "shifted exit key"),
+    TermCapability(:kFND,     :key_sfind,                 "shifted find key"),
+    TermCapability(:kHLP,     :key_shelp,                 "shifted help key"),
+    TermCapability(:kHOM,     :key_shome,                 "shifted home key"),
+    TermCapability(:kIC,      :key_sic,                   "shifted insert-character key"),
+    TermCapability(:kLFT,     :key_sleft,                 "shifted left-arrow key"),
+    TermCapability(:kMSG,     :key_smessage,              "shifted message key"),
+    TermCapability(:kMOV,     :key_smove,                 "shifted move key"),
+    TermCapability(:kNXT,     :key_snext,                 "shifted next key"),
+    TermCapability(:kOPT,     :key_soptions,              "shifted options key"),
+    TermCapability(:kPRV,     :key_sprevious,             "shifted previous key"),
+    TermCapability(:kPRT,     :key_sprint,                "shifted print key"),
+    TermCapability(:kRDO,     :key_sredo,                 "shifted redo key"),
+    TermCapability(:kRPL,     :key_sreplace,              "shifted replace key"),
+    TermCapability(:kRIT,     :key_sright,                "shifted right-arrow key"),
+    TermCapability(:kRES,     :key_srsume,                "shifted resume key"),
+    TermCapability(:kSAV,     :key_ssave,                 "shifted save key"),
+    TermCapability(:kSPD,     :key_ssuspend,              "shifted suspend key"),
+    TermCapability(:kUND,     :key_sundo,                 "shifted undo key"),
+    TermCapability(:rfi,      :req_for_input,             "send next input char (for ptys)"),
+    TermCapability(:kf11,     :key_f11,                   "F11 function key"),
+    TermCapability(:kf12,     :key_f12,                   "F12 function key"),
+    TermCapability(:kf13,     :key_f13,                   "F13 function key"),
+    TermCapability(:kf14,     :key_f14,                   "F14 function key"),
+    TermCapability(:kf15,     :key_f15,                   "F15 function key"),
+    TermCapability(:kf16,     :key_f16,                   "F16 function key"),
+    TermCapability(:kf17,     :key_f17,                   "F17 function key"),
+    TermCapability(:kf18,     :key_f18,                   "F18 function key"),
+    TermCapability(:kf19,     :key_f19,                   "F19 function key"),
+    TermCapability(:kf20,     :key_f20,                   "F20 function key"),
+    TermCapability(:kf21,     :key_f21,                   "F21 function key"),
+    TermCapability(:kf22,     :key_f22,                   "F22 function key"),
+    TermCapability(:kf23,     :key_f23,                   "F23 function key"),
+    TermCapability(:kf24,     :key_f24,                   "F24 function key"),
+    TermCapability(:kf25,     :key_f25,                   "F25 function key"),
+    TermCapability(:kf26,     :key_f26,                   "F26 function key"),
+    TermCapability(:kf27,     :key_f27,                   "F27 function key"),
+    TermCapability(:kf28,     :key_f28,                   "F28 function key"),
+    TermCapability(:kf29,     :key_f29,                   "F29 function key"),
+    TermCapability(:kf30,     :key_f30,                   "F30 function key"),
+    TermCapability(:kf31,     :key_f31,                   "F31 function key"),
+    TermCapability(:kf32,     :key_f32,                   "F32 function key"),
+    TermCapability(:kf33,     :key_f33,                   "F33 function key"),
+    TermCapability(:kf34,     :key_f34,                   "F34 function key"),
+    TermCapability(:kf35,     :key_f35,                   "F35 function key"),
+    TermCapability(:kf36,     :key_f36,                   "F36 function key"),
+    TermCapability(:kf37,     :key_f37,                   "F37 function key"),
+    TermCapability(:kf38,     :key_f38,                   "F38 function key"),
+    TermCapability(:kf39,     :key_f39,                   "F39 function key"),
+    TermCapability(:kf40,     :key_f40,                   "F40 function key"),
+    TermCapability(:kf41,     :key_f41,                   "F41 function key"),
+    TermCapability(:kf42,     :key_f42,                   "F42 function key"),
+    TermCapability(:kf43,     :key_f43,                   "F43 function key"),
+    TermCapability(:kf44,     :key_f44,                   "F44 function key"),
+    TermCapability(:kf45,     :key_f45,                   "F45 function key"),
+    TermCapability(:kf46,     :key_f46,                   "F46 function key"),
+    TermCapability(:kf47,     :key_f47,                   "F47 function key"),
+    TermCapability(:kf48,     :key_f48,                   "F48 function key"),
+    TermCapability(:kf49,     :key_f49,                   "F49 function key"),
+    TermCapability(:kf50,     :key_f50,                   "F50 function key"),
+    TermCapability(:kf51,     :key_f51,                   "F51 function key"),
+    TermCapability(:kf52,     :key_f52,                   "F52 function key"),
+    TermCapability(:kf53,     :key_f53,                   "F53 function key"),
+    TermCapability(:kf54,     :key_f54,                   "F54 function key"),
+    TermCapability(:kf55,     :key_f55,                   "F55 function key"),
+    TermCapability(:kf56,     :key_f56,                   "F56 function key"),
+    TermCapability(:kf57,     :key_f57,                   "F57 function key"),
+    TermCapability(:kf58,     :key_f58,                   "F58 function key"),
+    TermCapability(:kf59,     :key_f59,                   "F59 function key"),
+    TermCapability(:kf60,     :key_f60,                   "F60 function key"),
+    TermCapability(:kf61,     :key_f61,                   "F61 function key"),
+    TermCapability(:kf62,     :key_f62,                   "F62 function key"),
+    TermCapability(:kf63,     :key_f63,                   "F63 function key"),
+    TermCapability(:el1,      :clr_bol,                   "Clear to beginning of line"),
+    TermCapability(:mgc,      :clear_margins,             "clear right and left soft margins"),
+    TermCapability(:smgl,     :set_left_margin,           "set left soft margin at current column. (ML is not in BSD termcap)."),
+    TermCapability(:smgr,     :set_right_margin,          "set right soft margin at current column"),
+    TermCapability(:fln,      :label_format,              "label format"),
+    TermCapability(:sclk,     :set_clock,                 "set clock, #1 hrs #2 mins #3 secs"),
+    TermCapability(:dclk,     :display_clock,             "display clock"),
+    TermCapability(:rmclk,    :remove_clock,              "remove clock"),
+    TermCapability(:cwin,     :create_window,             "define a window #1 from #2, #3 to #4, #5"),
+    TermCapability(:wingo,    :goto_window,               "go to window #1"),
+    TermCapability(:hup,      :hangup,                    "hang-up phone"),
+    TermCapability(:dial,     :dial_phone,                "dial number #1"),
+    TermCapability(:qdial,    :quick_dial,                "dial number #1 without checking"),
+    TermCapability(:tone,     :tone,                      "select touch tone dialing"),
+    TermCapability(:pulse,    :pulse,                     "select pulse dialing"),
+    TermCapability(:hook,     :flash_hook,                "flash switch hook"),
+    TermCapability(:pause,    :fixed_pause,               "pause for 2-3 seconds"),
+    TermCapability(:wait,     :wait_tone,                 "wait for dial-tone"),
+    TermCapability(:u0,       :user0,                     "User string #0"),
+    TermCapability(:u1,       :user1,                     "User string #1"),
+    TermCapability(:u2,       :user2,                     "User string #2"),
+    TermCapability(:u3,       :user3,                     "User string #3"),
+    TermCapability(:u4,       :user4,                     "User string #4"),
+    TermCapability(:u5,       :user5,                     "User string #5"),
+    TermCapability(:u6,       :user6,                     "User string #6"),
+    TermCapability(:u7,       :user7,                     "User string #7"),
+    TermCapability(:u8,       :user8,                     "User string #8"),
+    TermCapability(:u9,       :user9,                     "User string #9"),
+    TermCapability(:op,       :orig_pair,                 "Set default pair to its original value"),
+    TermCapability(:oc,       :orig_colors,               "Set all color pairs to the original ones"),
+    TermCapability(:initc,    :initialize_color,          "Initialize color #1 to (#2, #3, #4)"),
+    TermCapability(:initp,    :initialize_pair,           "Initialize color pair #1 to fg=(#2, #3, #4), bg=(#5,#6,#7)"),
+    TermCapability(:scp,      :set_color_pair,            "Set current color pair to #1"),
+    TermCapability(:setf,     :set_foreground,            "Set foreground color #1"),
+    TermCapability(:setb,     :set_background,            "Set background color #1"),
+    TermCapability(:cpi,      :change_char_pitch,         "Change number of characters per inch to #1"),
+    TermCapability(:lpi,      :change_line_pitch,         "Change number of lines per inch to #1"),
+    TermCapability(:chr,      :change_res_horz,           "Change horizontal resolution to #1"),
+    TermCapability(:cvr,      :change_res_vert,           "Change vertical resolution to #1"),
+    TermCapability(:defc,     :define_char,               "Define a character #1, #2 dots wide, descender #3"),
+    TermCapability(:swidm,    :enter_doublewide_mode,     "Enter double-wide mode"),
+    TermCapability(:sdrfq,    :enter_draft_quality,       "Enter draft-quality mode"),
+    TermCapability(:sitm,     :enter_italics_mode,        "Enter italic mode"),
+    TermCapability(:slm,      :enter_leftward_mode,       "Start leftward carriage motion"),
+    TermCapability(:smicm,    :enter_micro_mode,          "Start micro-motion mode"),
+    TermCapability(:snlq,     :enter_near_letter_quality, "Enter NLQ mode"),
+    TermCapability(:snrmq,    :enter_normal_quality,      "Enter normal-quality mode"),
+    TermCapability(:sshm,     :enter_shadow_mode,         "Enter shadow-print mode"),
+    TermCapability(:ssubm,    :enter_subscript_mode,      "Enter subscript mode"),
+    TermCapability(:ssupm,    :enter_superscript_mode,    "Enter superscript mode"),
+    TermCapability(:sum,      :enter_upward_mode,         "Start upward carriage motion"),
+    TermCapability(:rwidm,    :exit_doublewide_mode,      "End double-wide mode"),
+    TermCapability(:ritm,     :exit_italics_mode,         "End italic mode"),
+    TermCapability(:rlm,      :exit_leftward_mode,        "End left-motion mode"),
+    TermCapability(:rmicm,    :exit_micro_mode,           "End micro-motion mode"),
+    TermCapability(:rshm,     :exit_shadow_mode,          "End shadow-print mode"),
+    TermCapability(:rsubm,    :exit_subscript_mode,       "End subscript mode"),
+    TermCapability(:rsupm,    :exit_superscript_mode,     "End superscript mode"),
+    TermCapability(:rum,      :exit_upward_mode,          "End reverse character motion"),
+    TermCapability(:mhpa,     :micro_column_address,      "Like column_address in micro mode"),
+    TermCapability(:mcud1,    :micro_down,                "Like cursor_down in micro mode"),
+    TermCapability(:mcub1,    :micro_left,                "Like cursor_left in micro mode"),
+    TermCapability(:mcuf1,    :micro_right,               "Like cursor_right in micro mode"),
+    TermCapability(:mvpa,     :micro_row_address,         "Like row_address #1 in micro mode"),
+    TermCapability(:mcuu1,    :micro_up,                  "Like cursor_up in micro mode"),
+    TermCapability(:porder,   :order_of_pins,             "Match software bits to print-head pins"),
+    TermCapability(:mcud,     :parm_down_micro,           "Like parm_down_cursor in micro mode"),
+    TermCapability(:mcub,     :parm_left_micro,           "Like parm_left_cursor in micro mode"),
+    TermCapability(:mcuf,     :parm_right_micro,          "Like parm_right_cursor in micro mode"),
+    TermCapability(:mcuu,     :parm_up_micro,             "Like parm_up_cursor in micro mode"),
+    TermCapability(:scs,      :select_char_set,           "Select character set, #1"),
+    TermCapability(:smgb,     :set_bottom_margin,         "Set bottom margin at current line"),
+    TermCapability(:smgbp,    :set_bottom_margin_parm,    "Set bottom margin at line #1 or (if smgtp is not given) #2 lines from bottom"),
+    TermCapability(:smglp,    :set_left_margin_parm,      "Set left (right) margin at column #1"),
+    TermCapability(:smgrp,    :set_right_margin_parm,     "Set right margin at column #1"),
+    TermCapability(:smgt,     :set_top_margin,            "Set top margin at current line"),
+    TermCapability(:smgtp,    :set_top_margin_parm,       "Set top (bottom) margin at row #1"),
+    TermCapability(:sbim,     :start_bit_image,           "Start printing bit image graphics"),
+    TermCapability(:scsd,     :start_char_set_def,        "Start character set definition #1, with #2 characters in the set"),
+    TermCapability(:rbim,     :stop_bit_image,            "Stop printing bit image graphics"),
+    TermCapability(:rcsd,     :stop_char_set_def,         "End definition of character set #1"),
+    TermCapability(:subcs,    :subscript_characters,      "List of subscriptable characters"),
+    TermCapability(:supcs,    :superscript_characters,    "List of superscriptable characters"),
+    TermCapability(:docr,     :these_cause_cr,            "Printing any of these characters causes CR"),
+    TermCapability(:zerom,    :zero_motion,               "No motion for subsequent character"),
+    TermCapability(:csnm,     :char_set_names,            "Produce #1'th item from list of character set names"),
+    TermCapability(:kmous,    :key_mouse,                 "Mouse event has occurred"),
+    TermCapability(:minfo,    :mouse_info,                "Mouse status information"),
+    TermCapability(:reqmp,    :req_mouse_pos,             "Request mouse position"),
+    TermCapability(:getm,     :get_mouse,                 "Curses should get button events, parameter #1 not documented."),
+    TermCapability(:setaf,    :set_a_foreground,          "Set foreground color to #1, using ANSI escape"),
+    TermCapability(:setab,    :set_a_background,          "Set background color to #1, using ANSI escape"),
+    TermCapability(:pfxl,     :pkey_plab,                 "Program function key #1 to type string #2 and show string #3"),
+    TermCapability(:devt,     :device_type,               "Indicate language/codeset support"),
+    TermCapability(:csin,     :code_set_init,             "Init sequence for multiple codesets"),
+    TermCapability(:s0ds,     :set0_des_seq,              "Shift to codeset 0 (EUC set 0, ASCII)"),
+    TermCapability(:s1ds,     :set1_des_seq,              "Shift to codeset 1"),
+    TermCapability(:s2ds,     :set2_des_seq,              "Shift to codeset 2"),
+    TermCapability(:s3ds,     :set3_des_seq,              "Shift to codeset 3"),
+    TermCapability(:smglr,    :set_lr_margin,             "Set both left and right margins to #1, #2. (ML is not in BSD termcap)."),
+    TermCapability(:smgtb,    :set_tb_margin,             "Sets both top and bottom margins to #1, #2"),
+    TermCapability(:birep,    :bit_image_repeat,          "Repeat bit image cell #1 #2 times"),
+    TermCapability(:binel,    :bit_image_newline,         "Move to next row of the bit image"),
+    TermCapability(:bicr,     :bit_image_carriage_return, "Move to beginning of same row"),
+    TermCapability(:colornm,  :color_names,               "Give name for color #1"),
+    TermCapability(:defbi,    :define_bit_image_region,   "Define rectangular bit image region"),
+    TermCapability(:endbi,    :end_bit_image_region,      "End a bit-image region"),
+    TermCapability(:setcolor, :set_color_band,            "Change to ribbon color #1"),
+    TermCapability(:slines,   :set_page_length,           "Set page length to #1 lines"),
+    TermCapability(:dispc,    :display_pc_char,           "Display PC character #1"),
+    TermCapability(:smpch,    :enter_pc_charset_mode,     "Enter PC character display mode"),
+    TermCapability(:rmpch,    :exit_pc_charset_mode,      "Exit PC character display mode"),
+    TermCapability(:smsc,     :enter_scancode_mode,       "Enter PC scancode mode"),
+    TermCapability(:rmsc,     :exit_scancode_mode,        "Exit PC scancode mode"),
+    TermCapability(:pctrm,    :pc_term_options,           "PC terminal options"),
+    TermCapability(:scesc,    :scancode_escape,           "Escape for scancode emulation"),
+    TermCapability(:scesa,    :alt_scancode_esc,          "Alternate escape for scancode emulation"),
+    TermCapability(:ehhlm,    :enter_horizontal_hl_mode,  "Enter horizontal highlight mode"),
+    TermCapability(:elhlm,    :enter_left_hl_mode,        "Enter left highlight mode"),
+    TermCapability(:elohlm,   :enter_low_hl_mode,         "Enter low highlight mode"),
+    TermCapability(:erhlm,    :enter_right_hl_mode,       "Enter right highlight mode"),
+    TermCapability(:ethlm,    :enter_top_hl_mode,         "Enter top highlight mode"),
+    TermCapability(:evhlm,    :enter_vertical_hl_mode,    "Enter vertical highlight mode"),
+    TermCapability(:sgr1,     :set_a_attributes,          "Define second set of video attributes #1-#6"),
+    TermCapability(:slength,  :set_pglen_inch,            "Set page length to #1 hundredth of an inch (some implementations use sL for termcap)."),
+    TermCapability(:OTi2,     :termcap_init2,             "secondary initialization string"),
+    TermCapability(:OTrs,     :termcap_reset,             "terminal reset string"),
+    TermCapability(:OTnl,     :linefeed_if_not_lf,        "use to move down"),
+    TermCapability(:OTbs,     :backspaces_with_bs,        "uses ^H to move left"),
+    TermCapability(:OTko,     :other_non_function_keys,   "list of self-mapped keycaps"),
+    TermCapability(:OTma,     :arrow_key_map,             "map motion-keys for vi version 2"),
+    TermCapability(:OTG2,     :acs_ulcorner,              "single upper left"),
+    TermCapability(:OTG3,     :acs_llcorner,              "single lower left"),
+    TermCapability(:OTG1,     :acs_urcorner,              "single upper right"),
+    TermCapability(:OTG4,     :acs_lrcorner,              "single lower right"),
+    TermCapability(:OTGR,     :acs_ltee,                  "tee pointing right"),
+    TermCapability(:OTGL,     :acs_rtee,                  "tee pointing left"),
+    TermCapability(:OTGU,     :acs_btee,                  "tee pointing up"),
+    TermCapability(:OTGD,     :acs_ttee,                  "tee pointing down"),
+    TermCapability(:OTGH,     :acs_hline,                 "single horizontal line"),
+    TermCapability(:OTGV,     :acs_vline,                 "single vertical line"),
+    TermCapability(:OTGC,     :acs_plus,                  "single intersection"),
+    TermCapability(:meml,     :memory_lock,               "lock memory above cursor"),
+    TermCapability(:memu,     :memory_unlock,             "unlock memory"),
+    TermCapability(:box1,     :box_chars_1,               "box characters primary set"),
+]
diff --git a/base/threadcall.jl b/base/threadcall.jl
index 45965fdbc6c65..fbc1a87a20980 100644
--- a/base/threadcall.jl
+++ b/base/threadcall.jl
@@ -1,8 +1,9 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 const max_ccall_threads = parse(Int, get(ENV, "UV_THREADPOOL_SIZE", "4"))
-const thread_notifiers = Union{Base.Condition, Nothing}[nothing for i in 1:max_ccall_threads]
+const thread_notifiers = Union{Event, Nothing}[nothing for i in 1:max_ccall_threads]
 const threadcall_restrictor = Semaphore(max_ccall_threads)
+const threadcall_lock = Threads.SpinLock()
 
 """
     @threadcall((cfunc, clib), rettype, (argtypes...), argvals...)
@@ -47,7 +48,7 @@ macro threadcall(f, rettype, argtypes, argvals...)
     push!(body, :(return Int(Core.sizeof($rettype))))
 
     # return code to generate wrapper function and send work request thread queue
-    wrapper = Expr(Symbol("hygienic-scope"), wrapper, @__MODULE__)
+    wrapper = Expr(:var"hygienic-scope", wrapper, @__MODULE__, __source__)
     return :(let fun_ptr = @cfunction($wrapper, Int, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}))
         # use cglobal to look up the function on the calling thread
         do_threadcall(fun_ptr, cglobal($f), $rettype, Any[$(argtypes...)], Any[$(argvals...)])
@@ -81,8 +82,11 @@ function do_threadcall(fun_ptr::Ptr{Cvoid}, cfptr::Ptr{Cvoid}, rettype::Type, ar
 
     # wait for a worker thread to be available
     acquire(threadcall_restrictor)
-    idx = findfirst(isequal(nothing), thread_notifiers)::Int
-    thread_notifiers[idx] = Base.Condition()
+    idx = -1
+    @lock threadcall_lock begin
+        idx = findfirst(isequal(nothing), thread_notifiers)::Int
+        thread_notifiers[idx] = Event()
+    end
 
     GC.@preserve args_arr ret_arr roots begin
         # queue up the work to be done
@@ -92,7 +96,9 @@ function do_threadcall(fun_ptr::Ptr{Cvoid}, cfptr::Ptr{Cvoid}, rettype::Type, ar
 
         # wait for a result & return it
         wait(thread_notifiers[idx])
-        thread_notifiers[idx] = nothing
+        @lock threadcall_lock begin
+            thread_notifiers[idx] = nothing
+        end
         release(threadcall_restrictor)
 
         r = unsafe_load(convert(Ptr{rettype}, pointer(ret_arr)))
diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl
index b00dfb389ce3b..a5a1294be049b 100644
--- a/base/threadingconstructs.jl
+++ b/base/threadingconstructs.jl
@@ -8,46 +8,88 @@ export threadid, nthreads, @threads, @spawn,
 
 Get the ID number of the current thread of execution. The master thread has
 ID `1`.
+
+# Examples
+```julia-repl
+julia> Threads.threadid()
+1
+
+julia> Threads.@threads for i in 1:4
+          println(Threads.threadid())
+       end
+4
+2
+5
+4
+```
+
+!!! note
+    The thread that a task runs on may change if the task yields, which is known as [`Task Migration`](@ref man-task-migration).
+    For this reason in most cases it is not safe to use `threadid()` to index into, say, a vector of buffer or stateful objects.
+
 """
 threadid() = Int(ccall(:jl_threadid, Int16, ())+1)
 
+# lower bound on the largest threadid()
 """
-    Threads.nthreads([:default|:interactive]) -> Int
+    Threads.maxthreadid() -> Int
 
-Get the number of threads (across all thread pools or within the specified
-thread pool) available to Julia. The number of threads across all thread
-pools is the inclusive upper bound on [`threadid()`](@ref).
+Get a lower bound on the number of threads (across all thread pools) available
+to the Julia process, with atomic-acquire semantics. The result will always be
+greater than or equal to [`threadid()`](@ref) as well as `threadid(task)` for
+any task you were able to observe before calling `maxthreadid`.
+"""
+maxthreadid() = Int(Core.Intrinsics.atomic_pointerref(cglobal(:jl_n_threads, Cint), :acquire))
 
-See also: `BLAS.get_num_threads` and `BLAS.set_num_threads` in the
-[`LinearAlgebra`](@ref man-linalg) standard library, and `nprocs()` in the
-[`Distributed`](@ref man-distributed) standard library.
 """
-function nthreads end
-
-nthreads() = Int(unsafe_load(cglobal(:jl_n_threads, Cint)))
-function nthreads(pool::Symbol)
-    if pool == :default
-        tpid = Int8(0)
-    elseif pool == :interactive
-        tpid = Int8(1)
-    else
-        error("invalid threadpool specified")
-    end
-    return _nthreads_in_pool(tpid)
-end
+    Threads.nthreads(:default | :interactive) -> Int
+
+Get the current number of threads within the specified thread pool. The threads in default
+have id numbers `1:nthreads(:default)`.
+
+See also `BLAS.get_num_threads` and `BLAS.set_num_threads` in the [`LinearAlgebra`](@ref
+man-linalg) standard library, and `nprocs()` in the [`Distributed`](@ref man-distributed)
+standard library and [`Threads.maxthreadid()`](@ref).
+"""
+nthreads(pool::Symbol) = threadpoolsize(pool)
+
 function _nthreads_in_pool(tpid::Int8)
     p = unsafe_load(cglobal(:jl_n_threads_per_pool, Ptr{Cint}))
     return Int(unsafe_load(p, tpid + 1))
 end
 
+function _tpid_to_sym(tpid::Int8)
+    if tpid == 0
+        return :interactive
+    elseif tpid == 1
+        return :default
+    elseif tpid == -1
+        return :foreign
+    else
+        throw(ArgumentError("Unrecognized threadpool id $tpid"))
+    end
+end
+
+function _sym_to_tpid(tp::Symbol)
+    if tp === :interactive
+        return Int8(0)
+    elseif tp === :default
+        return Int8(1)
+    elseif tp == :foreign
+        return Int8(-1)
+    else
+        throw(ArgumentError("Unrecognized threadpool name `$(repr(tp))`"))
+    end
+end
+
 """
     Threads.threadpool(tid = threadid()) -> Symbol
 
-Returns the specified thread's threadpool; either `:default` or `:interactive`.
+Returns the specified thread's threadpool; either `:default`, `:interactive`, or `:foreign`.
 """
 function threadpool(tid = threadid())
     tpid = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1)
-    return tpid == 0 ? :default : :interactive
+    return _tpid_to_sym(tpid)
 end
 
 """
@@ -57,15 +99,66 @@ Returns the number of threadpools currently configured.
 """
 nthreadpools() = Int(unsafe_load(cglobal(:jl_n_threadpools, Cint)))
 
+"""
+    Threads.threadpoolsize(pool::Symbol = :default) -> Int
+
+Get the number of threads available to the default thread pool (or to the
+specified thread pool).
+
+See also: `BLAS.get_num_threads` and `BLAS.set_num_threads` in the
+[`LinearAlgebra`](@ref man-linalg) standard library, and `nprocs()` in the
+[`Distributed`](@ref man-distributed) standard library.
+"""
+function threadpoolsize(pool::Symbol = :default)
+    if pool === :default || pool === :interactive
+        tpid = _sym_to_tpid(pool)
+    elseif pool == :foreign
+        error("Threadpool size of `:foreign` is indeterminant")
+    else
+        error("invalid threadpool specified")
+    end
+    return _nthreads_in_pool(tpid)
+end
+
+"""
+    threadpooltids(pool::Symbol)
+
+Returns a vector of IDs of threads in the given pool.
+"""
+function threadpooltids(pool::Symbol)
+    ni = _nthreads_in_pool(Int8(0))
+    if pool === :interactive
+        return collect(1:ni)
+    elseif pool === :default
+        return collect(ni+1:ni+_nthreads_in_pool(Int8(1)))
+    else
+        error("invalid threadpool specified")
+    end
+end
+
+"""
+    Threads.ngcthreads() -> Int
+
+Returns the number of GC threads currently configured.
+This includes both mark threads and concurrent sweep threads.
+"""
+ngcthreads() = Int(unsafe_load(cglobal(:jl_n_gcthreads, Cint))) + 1
 
 function threading_run(fun, static)
     ccall(:jl_enter_threaded_region, Cvoid, ())
-    n = nthreads()
+    n = threadpoolsize()
+    tid_offset = threadpoolsize(:interactive)
     tasks = Vector{Task}(undef, n)
     for i = 1:n
         t = Task(() -> fun(i)) # pass in tid
         t.sticky = static
-        static && ccall(:jl_set_task_tid, Cint, (Any, Cint), t, i-1)
+        if static
+            ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid_offset + i-1)
+        else
+            # TODO: this should be the current pool (except interactive) if there
+            # are ever more than two pools.
+            @assert ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, _sym_to_tpid(:default)) == 1
+        end
         tasks[i] = t
         schedule(t)
     end
@@ -73,7 +166,7 @@ function threading_run(fun, static)
         Base._wait(tasks[i])
     end
     ccall(:jl_exit_threaded_region, Cvoid, ())
-    failed_tasks = filter(istaskfailed, tasks)
+    failed_tasks = filter!(istaskfailed, tasks)
     if !isempty(failed_tasks)
         throw(CompositeException(map(TaskFailedException, failed_tasks)))
     end
@@ -93,7 +186,7 @@ function _threadsfor(iter, lbody, schedule)
                 tid = 1
                 len, rem = lenr, 0
             else
-                len, rem = divrem(lenr, nthreads())
+                len, rem = divrem(lenr, threadpoolsize())
             end
             # not enough iterations for all the threads?
             if len == 0
@@ -161,12 +254,12 @@ unsynchronized memory accesses may result in undefined behavior.
 
 For example, the above conditions imply that:
 
-- The lock taken in an iteration *must* be released within the same iteration.
+- A lock taken in an iteration *must* be released within the same iteration.
 - Communicating between iterations using blocking primitives like `Channel`s is incorrect.
 - Write only to locations not shared across iterations (unless a lock or atomic operation is
   used).
-- The value of [`threadid()`](@ref Threads.threadid) may change even within a single
-  iteration.
+- Unless the `:static` schedule is used, the value of [`threadid()`](@ref Threads.threadid)
+  may change even within a single iteration. See [`Task Migration`](@ref man-task-migration).
 
 ## Schedulers
 
@@ -185,7 +278,7 @@ assumption may be removed in the future.
 This scheduling option is merely a hint to the underlying execution mechanism. However, a
 few properties can be expected. The number of `Task`s used by `:dynamic` scheduler is
 bounded by a small constant multiple of the number of available worker threads
-([`nthreads()`](@ref Threads.nthreads)). Each task processes contiguous regions of the
+([`Threads.threadpoolsize()`](@ref)). Each task processes contiguous regions of the
 iteration space. Thus, `@threads :dynamic for x in xs; f(x); end` is typically more
 efficient than `@sync for x in xs; @spawn f(x); end` if `length(xs)` is significantly
 larger than the number of the worker threads and the run-time of `f(x)` is relatively
@@ -199,7 +292,7 @@ microseconds).
 
 `:static` scheduler creates one task per thread and divides the iterations equally among
 them, assigning each task specifically to each thread. In particular, the value of
-[`threadid()`](@ref Threads.threadid) is guranteed to be constant within one iteration.
+[`threadid()`](@ref Threads.threadid) is guaranteed to be constant within one iteration.
 Specifying `:static` is an error if used from inside another `@threads` loop or from a
 thread other than 1.
 
@@ -222,7 +315,7 @@ julia> function busywait(seconds)
 
 julia> @time begin
             Threads.@spawn busywait(5)
-            Threads.@threads :static for i in 1:Threads.nthreads()
+            Threads.@threads :static for i in 1:Threads.threadpoolsize()
                 busywait(1)
             end
         end
@@ -230,7 +323,7 @@ julia> @time begin
 
 julia> @time begin
             Threads.@spawn busywait(5)
-            Threads.@threads :dynamic for i in 1:Threads.nthreads()
+            Threads.@threads :dynamic for i in 1:Threads.threadpoolsize()
                 busywait(1)
             end
         end
@@ -268,6 +361,15 @@ macro threads(args...)
     return _threadsfor(ex.args[1], ex.args[2], sched)
 end
 
+function _spawn_set_thrpool(t::Task, tp::Symbol)
+    tpid = _sym_to_tpid(tp)
+    if tpid == -1 || _nthreads_in_pool(tpid) == 0
+        tpid = _sym_to_tpid(:default)
+    end
+    @assert ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), t, tpid) == 1
+    nothing
+end
+
 """
     Threads.@spawn [:default|:interactive] expr
 
@@ -283,8 +385,10 @@ the _value_ of a variable, isolating the asynchronous code from changes to
 the variable's value in the current task.
 
 !!! note
-    See the manual chapter on [multi-threading](@ref man-multithreading)
-    for important caveats. See also the chapter on [threadpools](@ref man-threadpools).
+    The thread that the task runs on may change if the task yields, therefore `threadid()` should not
+    be treated as constant for a task. See [`Task Migration`](@ref man-task-migration), and the broader
+    [multi-threading](@ref man-multithreading) manual for further important caveats.
+    See also the chapter on [threadpools](@ref man-threadpools).
 
 !!! compat "Julia 1.3"
     This macro is available as of Julia 1.3.
@@ -294,22 +398,31 @@ the variable's value in the current task.
 
 !!! compat "Julia 1.9"
     A threadpool may be specified as of Julia 1.9.
+
+# Examples
+```julia-repl
+julia> t() = println("Hello from ", Threads.threadid());
+
+julia> tasks = fetch.([Threads.@spawn t() for i in 1:4]);
+Hello from 1
+Hello from 1
+Hello from 3
+Hello from 4
+```
 """
 macro spawn(args...)
-    tpid = Int8(0)
+    tp = QuoteNode(:default)
     na = length(args)
     if na == 2
         ttype, ex = args
         if ttype isa QuoteNode
             ttype = ttype.value
-        elseif ttype isa Symbol
-            # TODO: allow unquoted symbols
-            ttype = nothing
-        end
-        if ttype === :interactive
-            tpid = Int8(1)
-        elseif ttype !== :default
-            throw(ArgumentError("unsupported threadpool in @spawn: $ttype"))
+            if ttype !== :interactive && ttype !== :default
+                throw(ArgumentError("unsupported threadpool in @spawn: $ttype"))
+            end
+            tp = QuoteNode(ttype)
+        else
+            tp = ttype
         end
     elseif na == 1
         ex = args[1]
@@ -319,13 +432,13 @@ macro spawn(args...)
 
     letargs = Base._lift_one_interp!(ex)
 
-    thunk = esc(:(()->($ex)))
+    thunk = Base.replace_linenums!(:(()->($(esc(ex)))), __source__)
     var = esc(Base.sync_varname)
     quote
         let $(letargs...)
             local task = Task($thunk)
             task.sticky = false
-            ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), task, $tpid)
+            _spawn_set_thrpool(task, $(esc(tp)))
             if $(Expr(:islocal, var))
                 put!($var, task)
             end
diff --git a/base/threads.jl b/base/threads.jl
index 2b68c7104ee5e..bdd6677c5a955 100644
--- a/base/threads.jl
+++ b/base/threads.jl
@@ -8,28 +8,6 @@ module Threads
 global Condition # we'll define this later, make sure we don't import Base.Condition
 
 include("threadingconstructs.jl")
-include("atomics.jl")
 include("locks-mt.jl")
 
-
-"""
-    resize_nthreads!(A, copyvalue=A[1])
-
-Resize the array `A` to length [`nthreads()`](@ref).   Any new
-elements that are allocated are initialized to `deepcopy(copyvalue)`,
-where `copyvalue` defaults to `A[1]`.
-
-This is typically used to allocate per-thread variables, and
-should be called in `__init__` if `A` is a global constant.
-"""
-function resize_nthreads!(A::AbstractVector, copyvalue=A[1])
-    nthr = nthreads()
-    nold = length(A)
-    resize!(A, nthr)
-    for i = nold+1:nthr
-        A[i] = deepcopy(copyvalue)
-    end
-    return A
-end
-
 end
diff --git a/base/threads_overloads.jl b/base/threads_overloads.jl
index a0d4bbeda2288..ccbc7e50d227b 100644
--- a/base/threads_overloads.jl
+++ b/base/threads_overloads.jl
@@ -3,7 +3,7 @@
 """
     Threads.foreach(f, channel::Channel;
                     schedule::Threads.AbstractSchedule=Threads.FairSchedule(),
-                    ntasks=Threads.nthreads())
+                    ntasks=Threads.threadpoolsize())
 
 Similar to `foreach(f, channel)`, but iteration over `channel` and calls to
 `f` are split across `ntasks` tasks spawned by `Threads.@spawn`. This function
@@ -20,12 +20,27 @@ to load-balancing. This approach thus may be more suitable for fine-grained,
 uniform workloads, but may perform worse than `FairSchedule` in concurrence
 with other multithreaded workloads.
 
+# Examples
+```julia-repl
+julia> n = 20
+
+julia> c = Channel{Int}(ch -> foreach(i -> put!(ch, i), 1:n), 1)
+
+julia> d = Channel{Int}(n) do ch
+           f = i -> put!(ch, i^2)
+           Threads.foreach(f, c)
+       end
+
+julia> collect(d)
+collect(d) = [1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256, 289, 324, 361, 400]
+```
+
 !!! compat "Julia 1.6"
     This function requires Julia 1.6 or later.
 """
 function Threads.foreach(f, channel::Channel;
                          schedule::Threads.AbstractSchedule=Threads.FairSchedule(),
-                         ntasks=Threads.nthreads())
+                         ntasks=Threads.threadpoolsize())
     apply = _apply_for_schedule(schedule)
     stop = Threads.Atomic{Bool}(false)
     @sync for _ in 1:ntasks
diff --git a/base/timing.jl b/base/timing.jl
index 02ed5fc5ae35c..472efcd94cb08 100644
--- a/base/timing.jl
+++ b/base/timing.jl
@@ -12,18 +12,20 @@ struct GC_Num
     freecall        ::Int64
     total_time      ::Int64
     total_allocd    ::Int64 # GC internal
-    since_sweep     ::Int64 # GC internal
     collect         ::Csize_t # GC internal
     pause           ::Cint
     full_sweep      ::Cint
     max_pause       ::Int64
     max_memory      ::Int64
-    time_to_safepoint             ::Int64
-    max_time_to_safepointp        ::Int64
+    time_to_safepoint           ::Int64
+    max_time_to_safepoint       ::Int64
+    total_time_to_safepoint     ::Int64
     sweep_time      ::Int64
     mark_time       ::Int64
     total_sweep_time  ::Int64
     total_mark_time   ::Int64
+    last_full_sweep ::Int64
+    last_incremental_sweep ::Int64
 end
 
 gc_num() = ccall(:jl_gc_num, GC_Num, ())
@@ -96,6 +98,13 @@ function gc_live_bytes()
     Int(ccall(:jl_gc_live_bytes, Int64, ())) + num.allocd + num.deferred_alloc
 end
 
+# must be kept in sync with the value from `src/julia_threads.h``
+const JL_GC_N_MAX_POOLS = 51
+function gc_page_utilization_data()
+    page_utilization_raw = cglobal(:jl_gc_page_utilization_stats, Float64)
+    return Base.unsafe_wrap(Array, page_utilization_raw, JL_GC_N_MAX_POOLS, own=false)
+end
+
 """
     Base.jit_total_bytes()
 
@@ -103,7 +112,7 @@ Return the total amount (in bytes) allocated by the just-in-time compiler
 for e.g. native code and data.
 """
 function jit_total_bytes()
-    return Int(ccall(:jl_jit_total_bytes, Csize_t, ()))
+    return ccall(:jl_jit_total_bytes, Csize_t, ())
 end
 
 # print elapsed time, return expression value
@@ -126,19 +135,25 @@ function padded_nonzero_print(value, str, always_print = true)
     end
 end
 
-function format_bytes(bytes) # also used by InteractiveUtils
-    bytes, mb = prettyprint_getunits(bytes, length(_mem_units), Int64(1024))
+function format_bytes(bytes; binary=true) # also used by InteractiveUtils
+    units = binary ? _mem_units : _cnt_units
+    factor = binary ? 1024 : 1000
+    bytes, mb = prettyprint_getunits(bytes, length(units), Int64(factor))
     if mb == 1
-        return string(Int(bytes), " ", _mem_units[mb], bytes==1 ? "" : "s")
+        return string(Int(bytes), " ", units[mb], bytes==1 ? "" : "s")
     else
-        return string(Ryu.writefixed(Float64(bytes), 3), " ", _mem_units[mb])
+        return string(Ryu.writefixed(Float64(bytes), 3), binary ? " $(units[mb])" : "$(units[mb])B")
     end
 end
 
-function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false, _lpad=true)
+function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false; msg::Union{String,Nothing}=nothing)
     timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
     str = sprint() do io
-        _lpad && print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
+        if msg isa String
+            print(io, msg, ": ")
+        else
+            print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
+        end
         print(io, timestr, " seconds")
         parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
         parens && print(io, " (")
@@ -169,16 +184,17 @@ function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, re
             print(io, ": ", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% of which was recompilation")
         end
         parens && print(io, ")")
+        newline && print(io, "\n")
     end
-    newline ? println(str) : print(str)
+    print(io, str)
     nothing
 end
 
-function timev_print(elapsedtime, diff::GC_Diff, compile_times, _lpad)
+function timev_print(elapsedtime, diff::GC_Diff, compile_times; msg::Union{String,Nothing}=nothing)
     allocs = gc_alloc_count(diff)
     compile_time = first(compile_times)
     recompile_time = last(compile_times)
-    time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true, _lpad)
+    time_print(stdout, elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true; msg)
     padded_nonzero_print(elapsedtime,       "elapsed time (ns)")
     padded_nonzero_print(diff.total_time,   "gc time (ns)")
     padded_nonzero_print(diff.allocd,       "bytes allocated")
@@ -218,8 +234,8 @@ In some cases the system will look inside the `@time` expression and compile som
 called code before execution of the top-level expression begins. When that happens, some
 compilation time will not be counted. To include this time you can run `@time @eval ...`.
 
-See also [`@showtime`](@ref), [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
-[`@allocated`](@ref).
+See also [`@showtime`](@ref), [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ref),
+[`@allocated`](@ref), and [`@allocations`](@ref).
 
 !!! note
     For more serious benchmarking, consider the `@btime` macro from the BenchmarkTools.jl
@@ -229,8 +245,7 @@ See also [`@showtime`](@ref), [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@
 !!! compat "Julia 1.8"
     The option to add a description was introduced in Julia 1.8.
 
-!!! compat "Julia 1.9"
-    Recompilation time being shown separately from compilation time was introduced in Julia 1.9
+    Recompilation time being shown separately from compilation time was introduced in Julia 1.8
 
 ```julia-repl
 julia> x = rand(10,10);
@@ -278,9 +293,7 @@ macro time(msg, ex)
         )
         local diff = GC_Diff(gc_num(), stats)
         local _msg = $(esc(msg))
-        local has_msg = !isnothing(_msg)
-        has_msg && print(_msg, ": ")
-        time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true, !has_msg)
+        time_print(stdout, elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true; msg=_msg)
         val
     end
 end
@@ -319,8 +332,8 @@ Optionally provide a description string to print before the time report.
 !!! compat "Julia 1.8"
     The option to add a description was introduced in Julia 1.8.
 
-See also [`@time`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
-[`@allocated`](@ref).
+See also [`@time`](@ref), [`@timed`](@ref), [`@elapsed`](@ref),
+[`@allocated`](@ref), and [`@allocations`](@ref).
 
 ```julia-repl
 julia> x = rand(10,10);
@@ -353,16 +366,16 @@ macro timev(msg, ex)
         Experimental.@force_compile
         local stats = gc_num()
         local elapsedtime = time_ns()
+        cumulative_compile_timing(true)
         local compile_elapsedtimes = cumulative_compile_time_ns()
         local val = @__tryfinally($(esc(ex)),
             (elapsedtime = time_ns() - elapsedtime;
+            cumulative_compile_timing(false);
             compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes)
         )
         local diff = GC_Diff(gc_num(), stats)
         local _msg = $(esc(msg))
-        local has_msg = !isnothing(_msg)
-        has_msg && print(_msg, ": ")
-        timev_print(elapsedtime, diff, compile_elapsedtimes, !has_msg)
+        timev_print(elapsedtime, diff, compile_elapsedtimes; msg=_msg)
         val
     end
 end
@@ -378,7 +391,7 @@ called code before execution of the top-level expression begins. When that happe
 compilation time will not be counted. To include this time you can run `@elapsed @eval ...`.
 
 See also [`@time`](@ref), [`@timev`](@ref), [`@timed`](@ref),
-and [`@allocated`](@ref).
+[`@allocated`](@ref), and [`@allocations`](@ref).
 
 ```julia-repl
 julia> @elapsed sleep(0.3)
@@ -409,7 +422,7 @@ end
 A macro to evaluate an expression, discarding the resulting value, instead returning the
 total number of bytes allocated during evaluation of the expression.
 
-See also [`@time`](@ref), [`@timev`](@ref), [`@timed`](@ref),
+See also [`@allocations`](@ref), [`@time`](@ref), [`@timev`](@ref), [`@timed`](@ref),
 and [`@elapsed`](@ref).
 
 ```julia-repl
@@ -429,6 +442,33 @@ macro allocated(ex)
     end
 end
 
+"""
+    @allocations
+
+A macro to evaluate an expression, discard the resulting value, and instead return the
+total number of allocations during evaluation of the expression.
+
+See also [`@allocated`](@ref), [`@time`](@ref), [`@timev`](@ref), [`@timed`](@ref),
+and [`@elapsed`](@ref).
+
+```julia-repl
+julia> @allocations rand(10^6)
+2
+```
+
+!!! compat "Julia 1.9"
+    This macro was added in Julia 1.9.
+"""
+macro allocations(ex)
+    quote
+        Experimental.@force_compile
+        local stats = Base.gc_num()
+        $(esc(ex))
+        local diff = Base.GC_Diff(Base.gc_num(), stats)
+        Base.gc_alloc_count(diff)
+    end
+end
+
 """
     @timed
 
@@ -440,8 +480,8 @@ In some cases the system will look inside the `@timed` expression and compile so
 called code before execution of the top-level expression begins. When that happens, some
 compilation time will not be counted. To include this time you can run `@timed @eval ...`.
 
-See also [`@time`](@ref), [`@timev`](@ref), [`@elapsed`](@ref), and
-[`@allocated`](@ref).
+See also [`@time`](@ref), [`@timev`](@ref), [`@elapsed`](@ref),
+[`@allocated`](@ref), and [`@allocations`](@ref).
 
 ```julia-repl
 julia> stats = @timed rand(10^6);
diff --git a/base/toml_parser.jl b/base/toml_parser.jl
index 323f954cf8b11..086b7d99580c0 100644
--- a/base/toml_parser.jl
+++ b/base/toml_parser.jl
@@ -80,7 +80,7 @@ mutable struct Parser
     # Filled in in case we are parsing a file to improve error messages
     filepath::Union{String, Nothing}
 
-    # Get's populated with the Dates stdlib if it exists
+    # Gets populated with the Dates stdlib if it exists
     Dates::Union{Module, Nothing}
 end
 
@@ -194,6 +194,7 @@ end
     # Inline tables
     ErrExpectedCommaBetweenItemsInlineTable
     ErrTrailingCommaInlineTable
+    ErrInlineTableRedefine
 
     # Numbers
     ErrUnderscoreNotSurroundedByDigits
@@ -202,6 +203,7 @@ end
     ErrLeadingDot
     ErrNoTrailingDigitAfterDot
     ErrTrailingUnderscoreNumber
+    ErrSignInNonBase10Number
 
     # DateTime
     ErrParsingDateTime
@@ -229,6 +231,7 @@ const err_message = Dict(
     ErrUnexpectedEndString                  => "string literal ended unexpectedly",
     ErrExpectedEndOfTable                   => "expected end of table ']'",
     ErrAddKeyToInlineTable                  => "tried to add a new key to an inline table",
+    ErrInlineTableRedefine                  => "inline table overwrote key from other table",
     ErrArrayTreatedAsDictionary             => "tried to add a key to an array",
     ErrAddArrayToStaticArray                => "tried to append to a statically defined array",
     ErrGenericValueError                    => "failed to parse value",
@@ -244,7 +247,8 @@ const err_message = Dict(
     ErrOverflowError                        => "overflowed when parsing integer",
     ErrInvalidUnicodeScalar                 => "invalid unicode scalar",
     ErrInvalidEscapeCharacter               => "invalid escape character",
-    ErrUnexpectedEofExpectedValue           => "unexpected end of file, expected a value"
+    ErrUnexpectedEofExpectedValue           => "unexpected end of file, expected a value",
+    ErrSignInNonBase10Number                => "number not in base 10 is not allowed to have a sign",
 )
 
 for err in instances(ErrorType)
@@ -326,7 +330,7 @@ function Base.showerror(io::IO, err::ParserError)
     str1, err1 = point_to_line(err.str::String, pos, pos, io)
     @static if VERSION <= v"1.6.0-DEV.121"
         # See https://github.com/JuliaLang/julia/issues/36015
-        format_fixer = get(io, :color, false) == true ? "\e[0m" : ""
+        format_fixer = get(io, :color, false)::Bool == true ? "\e[0m" : ""
         println(io, "$format_fixer  ", str1)
         print(io, "$format_fixer  ", err1)
     else
@@ -363,7 +367,7 @@ end
 @inline peek(l::Parser) = l.current_char
 
 # Return true if the character was accepted. When a character
-# is accepted it get's eaten and we move to the next character
+# is accepted it gets eaten and we move to the next character
 @inline function accept(l::Parser, f::Union{Function, Char})::Bool
     c = peek(l)
     c == EOF_CHAR && return false
@@ -467,7 +471,7 @@ function parse_toplevel(l::Parser)::Err{Nothing}
         l.active_table = l.root
         @try parse_table(l)
         skip_ws_comment(l)
-        if !(peek(l) == '\n' || peek(l) == '\r' || peek(l) == EOF_CHAR)
+        if !(peek(l) == '\n' || peek(l) == '\r' || peek(l) == '#' || peek(l) == EOF_CHAR)
             eat_char(l)
             return ParserError(ErrExpectedNewLineKeyValue)
         end
@@ -475,7 +479,7 @@ function parse_toplevel(l::Parser)::Err{Nothing}
         @try parse_entry(l, l.active_table)
         skip_ws_comment(l)
         # SPEC: "There must be a newline (or EOF) after a key/value pair."
-        if !(peek(l) == '\n' || peek(l) == '\r' || peek(l) == EOF_CHAR)
+        if !(peek(l) == '\n' || peek(l) == '\r' || peek(l) == '#' || peek(l) == EOF_CHAR)
             c = eat_char(l)
             return ParserError(ErrExpectedNewLineKeyValue)
         end
@@ -563,6 +567,10 @@ function parse_entry(l::Parser, d)::Union{Nothing, ParserError}
 
     skip_ws(l)
     value = @try parse_value(l)
+    # Not allowed to overwrite a value with an inline dict
+    if value isa Dict && haskey(d, last_key_part)
+        return ParserError(ErrInlineTableRedefine)
+    end
     # TODO: Performance, hashing `last_key_part` again here
     d[last_key_part] = value
     return
@@ -603,7 +611,7 @@ function _parse_key(l::Parser)
     else
         set_marker!(l)
         if accept_batch(l, isvalid_barekey_char)
-            if !(peek(l) == '.' || peek(l) == ' ' || peek(l) == ']' || peek(l) == '=')
+            if !(peek(l) == '.' || iswhitespace(peek(l)) || peek(l) == ']' || peek(l) == '=')
                 c = eat_char(l)
                 return ParserError(ErrInvalidBareKeyCharacter, c)
             end
@@ -657,7 +665,7 @@ end
 #########
 
 function push!!(v::Vector, el)
-    # Since these types are typically non-inferrable, they are a big invalidation risk,
+    # Since these types are typically non-inferable, they are a big invalidation risk,
     # and since it's used by the package-loading infrastructure the cost of invalidation
     # is high. Therefore, this is written to reduce the "exposed surface area": e.g., rather
     # than writing `T[el]` we write it as `push!(Vector{T}(undef, 1), el)` so that there
@@ -789,9 +797,11 @@ function parse_number_or_date_start(l::Parser)
 
     set_marker!(l)
     sgn = 1
+    parsed_sign = false
     if accept(l, '+')
-        # do nothing
+        parsed_sign = true
     elseif accept(l, '-')
+        parsed_sign = true
         sgn = -1
     end
     if accept(l, 'i')
@@ -811,14 +821,17 @@ function parse_number_or_date_start(l::Parser)
         if ok_end_value(peek(l))
             return Int64(0)
         elseif accept(l, 'x')
+            parsed_sign && return ParserError(ErrSignInNonBase10Number)
             ate, contains_underscore = @try accept_batch_underscore(l, isvalid_hex)
-            ate && return parse_int(l, contains_underscore)
+            ate && return parse_hex(l, contains_underscore)
         elseif accept(l, 'o')
+            parsed_sign && return ParserError(ErrSignInNonBase10Number)
             ate, contains_underscore = @try accept_batch_underscore(l, isvalid_oct)
-            ate && return parse_int(l, contains_underscore)
+            ate && return parse_oct(l, contains_underscore)
         elseif accept(l, 'b')
+            parsed_sign && return ParserError(ErrSignInNonBase10Number)
             ate, contains_underscore = @try accept_batch_underscore(l, isvalid_binary)
-            ate && return parse_int(l, contains_underscore)
+            ate && return parse_bin(l, contains_underscore)
         elseif accept(l, isdigit)
             return parse_local_time(l)
         end
@@ -886,15 +899,28 @@ function parse_float(l::Parser, contains_underscore)::Err{Float64}
     return v
 end
 
-function parse_int(l::Parser, contains_underscore, base=nothing)::Err{Int64}
-    s = take_string_or_substring(l, contains_underscore)
-    v = try
-        Base.parse(Int64, s; base=base)
-    catch e
-        e isa Base.OverflowError && return(ParserError(ErrOverflowError))
-        error("internal parser error: did not correctly discredit $(repr(s)) as an int")
+for (name, T1, T2, n1, n2) in (("int", Int64,  Int128,  17,  33),
+                               ("hex", UInt64, UInt128, 18,  34),
+                               ("oct", UInt64, UInt128, 24,  45),
+                               ("bin", UInt64, UInt128, 66, 130),
+                               )
+    @eval function $(Symbol("parse_", name))(l::Parser, contains_underscore, base=nothing)::Err{Union{$(T1), $(T2), BigInt}}
+        s = take_string_or_substring(l, contains_underscore)
+        len = length(s)
+        v = try
+            if len ≤ $(n1)
+                Base.parse($(T1), s; base)
+            elseif $(n1) < len ≤ $(n2)
+                Base.parse($(T2), s; base)
+            else
+                Base.parse(BigInt, s; base)
+            end
+        catch e
+            e isa Base.OverflowError && return(ParserError(ErrOverflowError))
+            error("internal parser error: did not correctly discredit $(repr(s)) as an int")
+        end
+        return v
     end
-    return v
 end
 
 
diff --git a/base/traits.jl b/base/traits.jl
index 53ae14b12c61e..47ab8ddc0c7ac 100644
--- a/base/traits.jl
+++ b/base/traits.jl
@@ -11,7 +11,7 @@ OrderStyle(::Type{<:Real}) = Ordered()
 OrderStyle(::Type{<:AbstractString}) = Ordered()
 OrderStyle(::Type{Symbol}) = Ordered()
 OrderStyle(::Type{<:Any}) = Unordered()
-OrderStyle(::Type{Union{}}) = Ordered()
+OrderStyle(::Type{Union{}}, slurp...) = Ordered()
 
 # trait for objects that support arithmetic
 abstract type ArithmeticStyle end
@@ -23,6 +23,7 @@ ArithmeticStyle(instance) = ArithmeticStyle(typeof(instance))
 ArithmeticStyle(::Type{<:AbstractFloat}) = ArithmeticRounds()
 ArithmeticStyle(::Type{<:Integer}) = ArithmeticWraps()
 ArithmeticStyle(::Type{<:Any}) = ArithmeticUnknown()
+ArithmeticStyle(::Type{Union{}}, slurp...) = ArithmeticUnknown()
 
 # trait for objects that support ranges with regular step
 """
@@ -58,5 +59,6 @@ ranges with an element type which is a subtype of `Integer`.
 abstract type RangeStepStyle end
 struct RangeStepRegular   <: RangeStepStyle end # range with regular step
 struct RangeStepIrregular <: RangeStepStyle end # range with rounding error
+RangeStepStyle(::Type{Union{}}, slurp...) = RangeStepIrregular()
 
 RangeStepStyle(instance) = RangeStepStyle(typeof(instance))
diff --git a/base/ttyhascolor.jl b/base/ttyhascolor.jl
deleted file mode 100644
index 5984dba6d592e..0000000000000
--- a/base/ttyhascolor.jl
+++ /dev/null
@@ -1,27 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-if Sys.iswindows()
-    ttyhascolor(term_type = nothing) = true
-else
-    function ttyhascolor(term_type = get(ENV, "TERM", ""))
-        startswith(term_type, "xterm") && return true
-        try
-            @static if Sys.KERNEL === :FreeBSD
-                return success(`tput AF 0`)
-            else
-                return success(`tput setaf 0`)
-            end
-        catch e
-            return false
-        end
-    end
-end
-function get_have_color()
-    global have_color
-    have_color === nothing && (have_color = ttyhascolor())
-    return have_color::Bool
-end
-in(key_value::Pair{Symbol,Bool}, ::TTY) = key_value.first === :color && key_value.second === get_have_color()
-haskey(::TTY, key::Symbol) = key === :color
-getindex(::TTY, key::Symbol) = key === :color ? get_have_color() : throw(KeyError(key))
-get(::TTY, key::Symbol, default) = key === :color ? get_have_color() : default
diff --git a/base/tuple.jl b/base/tuple.jl
index 694d282fdb8dd..2e1c5972c407d 100644
--- a/base/tuple.jl
+++ b/base/tuple.jl
@@ -11,6 +11,8 @@ A compact way of representing the type for a tuple of length `N` where all eleme
 julia> isa((1, 2, 3, 4, 5, 6), NTuple{6, Int})
 true
 ```
+
+See also [`ntuple`](@ref).
 """
 NTuple
 
@@ -26,8 +28,9 @@ firstindex(@nospecialize t::Tuple) = 1
 lastindex(@nospecialize t::Tuple) = length(t)
 size(@nospecialize(t::Tuple), d::Integer) = (d == 1) ? length(t) : throw(ArgumentError("invalid tuple dimension $d"))
 axes(@nospecialize t::Tuple) = (OneTo(length(t)),)
-@eval getindex(@nospecialize(t::Tuple), i::Int) = getfield(t, i, $(Expr(:boundscheck)))
-@eval getindex(@nospecialize(t::Tuple), i::Integer) = getfield(t, convert(Int, i), $(Expr(:boundscheck)))
+getindex(@nospecialize(t::Tuple), i::Int) = getfield(t, i, @_boundscheck)
+getindex(@nospecialize(t::Tuple), i::Integer) = getfield(t, convert(Int, i), @_boundscheck)
+__safe_getindex(@nospecialize(t::Tuple), i::Int) = (@_nothrow_noub_meta; getfield(t, i, false))
 getindex(t::Tuple, r::AbstractArray{<:Any,1}) = (eltype(t)[t[ri] for ri in r]...,)
 getindex(t::Tuple, b::AbstractArray{Bool,1}) = length(b) == length(t) ? getindex(t, findall(b)) : throw(BoundsError(t, b))
 getindex(t::Tuple, c::Colon) = t
@@ -38,9 +41,9 @@ get(f::Callable, t::Tuple, i::Integer) = i in 1:length(t) ? getindex(t, i) : f()
 # returns new tuple; N.B.: becomes no-op if `i` is out-of-bounds
 
 """
-    setindex(c::Tuple, v, i::Integer)
+    setindex(t::Tuple, v, i::Integer)
 
-Creates a new tuple similar to `x` with the value at index `i` set to `v`.
+Creates a new tuple similar to `t` with the value at index `i` set to `v`.
 Throws a `BoundsError` when out of bounds.
 
 # Examples
@@ -55,9 +58,9 @@ function setindex(x::Tuple, v, i::Integer)
     _setindex(v, i, x...)
 end
 
-function _setindex(v, i::Integer, args...)
+function _setindex(v, i::Integer, args::Vararg{Any,N}) where {N}
     @inline
-    return ntuple(j -> ifelse(j == i, v, args[j]), length(args))
+    return ntuple(j -> ifelse(j == i, v, args[j]), Val{N}())
 end
 
 
@@ -65,7 +68,7 @@ end
 
 function iterate(@nospecialize(t::Tuple), i::Int=1)
     @inline
-    return (1 <= i <= length(t)) ? (@inbounds t[i], i + 1) : nothing
+    return (1 <= i <= length(t)) ? (t[i], i + 1) : nothing
 end
 
 keys(@nospecialize t::Tuple) = OneTo(length(t))
@@ -186,7 +189,7 @@ function _split_rest(a::Union{AbstractArray, Core.SimpleVector}, n::Int)
     return a[begin:end-n], a[end-n+1:end]
 end
 
-split_rest(t::Tuple, n::Int, i=1) = t[i:end-n], t[end-n+1:end]
+@eval split_rest(t::Tuple, n::Int, i=1) = ($(Expr(:meta, :aggressive_constprop)); (t[i:end-n], t[end-n+1:end]))
 
 # Use dispatch to avoid a branch in first
 first(::Tuple{}) = throw(ArgumentError("tuple must be non-empty"))
@@ -195,43 +198,47 @@ first(t::Tuple) = t[1]
 # eltype
 
 eltype(::Type{Tuple{}}) = Bottom
-function eltype(t::Type{<:Tuple{Vararg{E}}}) where {E}
-    if @isdefined(E)
-        return E
-    else
-        # TODO: need to guard against E being miscomputed by subtyping (ref #23017)
-        # and compute the result manually in this case
-        return _compute_eltype(t)
-    end
-end
+# the <: here makes the runtime a bit more complicated (needing to check isdefined), but really helps inference
+eltype(t::Type{<:Tuple{Vararg{E}}}) where {E} = @isdefined(E) ? (E isa Type ? E : Union{}) : _compute_eltype(t)
 eltype(t::Type{<:Tuple}) = _compute_eltype(t)
-function _tuple_unique_fieldtypes(@nospecialize t)
+function _compute_eltype(@nospecialize t)
     @_total_meta
-    types = IdSet()
+    has_free_typevars(t) && return Any
     t´ = unwrap_unionall(t)
     # Given t = Tuple{Vararg{S}} where S<:Real, the various
     # unwrapping/wrapping/va-handling here will return Real
-    if t isa Union
-        union!(types, _tuple_unique_fieldtypes(rewrap_unionall(t´.a, t)))
-        union!(types, _tuple_unique_fieldtypes(rewrap_unionall(t´.b, t)))
-    else
-        r = Union{}
-        for ti in (t´::DataType).parameters
-            r = push!(types, rewrap_unionall(unwrapva(ti), t))
-        end
+    if t´ isa Union
+        return promote_typejoin(_compute_eltype(rewrap_unionall(t´.a, t)),
+                                _compute_eltype(rewrap_unionall(t´.b, t)))
     end
-    return Core.svec(types...)
-end
-function _compute_eltype(@nospecialize t)
-    @_total_meta # TODO: the compiler shouldn't need this
-    types = _tuple_unique_fieldtypes(t)
-    return afoldl(types...) do a, b
-        # if we've already reached Any, it can't widen any more
-        a === Any && return Any
-        b === Any && return Any
-        return promote_typejoin(a, b)
+    p = (t´::DataType).parameters
+    length(p) == 0 && return Union{}
+    elt = rewrap_unionall(unwrapva(p[1]), t)
+    elt isa Type || return Union{} # Tuple{2} is legal as a Type, but the eltype is Union{} since it is uninhabited
+    r = elt
+    for i in 2:length(p)
+        r === Any && return r # if we've already reached Any, it can't widen any more
+        elt = rewrap_unionall(unwrapva(p[i]), t)
+        elt isa Type || return Union{} # Tuple{2} is legal as a Type, but the eltype is Union{} since it is uninhabited
+        r = promote_typejoin(elt, r)
     end
-end
+    return r
+end
+
+# We'd like to be able to infer eltype(::Tuple), which needs to be able to
+# look at these four methods:
+#
+# julia> methods(Base.eltype, Tuple{Type{<:Tuple}})
+# 4 methods for generic function "eltype" from Base:
+# [1] eltype(::Type{Union{}})
+#  @ abstractarray.jl:234
+# [2] eltype(::Type{Tuple{}})
+#  @ tuple.jl:199
+# [3] eltype(t::Type{<:Tuple{Vararg{E}}}) where E
+#  @ tuple.jl:200
+# [4] eltype(t::Type{<:Tuple})
+#  @ tuple.jl:209
+typeof(function eltype end).name.max_methods = UInt8(4)
 
 # version of tail that doesn't throw on empty tuples (used in array indexing)
 safe_tail(t::Tuple) = tail(t)
@@ -295,6 +302,8 @@ function map(f, t::Any32)
 end
 # 2 argument function
 map(f, t::Tuple{},        s::Tuple{})        = ()
+map(f, t::Tuple,          s::Tuple{})        = ()
+map(f, t::Tuple{},        s::Tuple)          = ()
 map(f, t::Tuple{Any,},    s::Tuple{Any,})    = (@inline; (f(t[1],s[1]),))
 map(f, t::Tuple{Any,Any}, s::Tuple{Any,Any}) = (@inline; (f(t[1],s[1]), f(t[2],s[2])))
 function map(f, t::Tuple, s::Tuple)
@@ -302,7 +311,7 @@ function map(f, t::Tuple, s::Tuple)
     (f(t[1],s[1]), map(f, tail(t), tail(s))...)
 end
 function map(f, t::Any32, s::Any32)
-    n = length(t)
+    n = min(length(t), length(s))
     A = Vector{Any}(undef, n)
     for i = 1:n
         A[i] = f(t[i], s[i])
@@ -313,12 +322,16 @@ end
 heads(ts::Tuple...) = map(t -> t[1], ts)
 tails(ts::Tuple...) = map(tail, ts)
 map(f, ::Tuple{}...) = ()
+anyempty(x::Tuple{}, xs...) = true
+anyempty(x::Tuple, xs...) = anyempty(xs...)
+anyempty() = false
 function map(f, t1::Tuple, t2::Tuple, ts::Tuple...)
     @inline
+    anyempty(t1, t2, ts...) && return ()
     (f(heads(t1, t2, ts...)...), map(f, tails(t1, t2, ts...)...)...)
 end
 function map(f, t1::Any32, t2::Any32, ts::Any32...)
-    n = length(t1)
+    n = min(length(t1), length(t2), minimum(length, ts))
     A = Vector{Any}(undef, n)
     for i = 1:n
         A[i] = f(t1[i], t2[i], map(t -> t[i], ts)...)
@@ -326,8 +339,6 @@ function map(f, t1::Any32, t2::Any32, ts::Any32...)
     (A...,)
 end
 
-_foldl_impl(op, init, itr::Tuple) = afoldl(op, init, itr...)
-
 # type-stable padding
 fill_to_length(t::NTuple{N,Any}, val, ::Val{N}) where {N} = t
 fill_to_length(t::Tuple{}, val, ::Val{1}) = (val,)
@@ -361,7 +372,7 @@ function tuple_type_tail(T::Type)
     end
 end
 
-(::Type{T})(x::Tuple) where {T<:Tuple} = convert(T, x)  # still use `convert` for tuples
+(::Type{T})(x::Tuple) where {T<:Tuple} = x isa T ? x : convert(T, x)  # still use `convert` for tuples
 
 Tuple(x::Ref) = tuple(getindex(x))  # faster than iterator for one element
 Tuple(x::Array{T,0}) where {T} = tuple(getindex(x))
@@ -379,7 +390,9 @@ function _totuple(::Type{T}, itr, s::Vararg{Any,N}) where {T,N}
     @inline
     y = iterate(itr, s...)
     y === nothing && _totuple_err(T)
-    t1 = convert(fieldtype(T, 1), y[1])
+    T1 = fieldtype(T, 1)
+    y1 = y[1]
+    t1 = y1 isa T1 ? y1 : convert(T1, y1)::T1
     # inference may give up in recursive calls, so annotate here to force accurate return type to be propagated
     rT = tuple_type_tail(T)
     ts = _totuple(rT, itr, y[2])::rT
@@ -534,7 +547,7 @@ isless(::Tuple, ::Tuple{}) = false
 """
     isless(t1::Tuple, t2::Tuple)
 
-Returns true when t1 is less than t2 in lexicographic order.
+Return `true` when `t1` is less than `t2` in lexicographic order.
 """
 function isless(t1::Tuple, t2::Tuple)
     a, b = t1[1], t2[1]
@@ -581,30 +594,22 @@ any(x::Tuple{Bool}) = x[1]
 any(x::Tuple{Bool, Bool}) = x[1]|x[2]
 any(x::Tuple{Bool, Bool, Bool}) = x[1]|x[2]|x[3]
 
-# equivalent to any(f, t), to be used only in bootstrap
-_tuple_any(f::Function, t::Tuple) = _tuple_any(f, false, t...)
-function _tuple_any(f::Function, tf::Bool, a, b...)
-    @inline
-    _tuple_any(f, tf | f(a), b...)
-end
-_tuple_any(f::Function, tf::Bool) = tf
-
-
 # a version of `in` esp. for NamedTuple, to make it pure, and not compiled for each tuple length
-function sym_in(x::Symbol, @nospecialize itr::Tuple{Vararg{Symbol}})
+function sym_in(x::Symbol, itr::Tuple{Vararg{Symbol}})
+    @noinline
     @_total_meta
     for y in itr
         y === x && return true
     end
     return false
 end
-in(x::Symbol, @nospecialize itr::Tuple{Vararg{Symbol}}) = sym_in(x, itr)
+in(x::Symbol, itr::Tuple{Vararg{Symbol}}) = sym_in(x, itr)
 
 
 """
     empty(x::Tuple)
 
-Returns an empty tuple, `()`.
+Return an empty tuple, `()`.
 """
 empty(@nospecialize x::Tuple) = ()
 
diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl
index a8611b21052b5..955bfc97b16ff 100644
--- a/base/twiceprecision.jl
+++ b/base/twiceprecision.jl
@@ -112,8 +112,8 @@ julia> Float64(hi) + Float64(lo)
 ```
 """
 function mul12(x::T, y::T) where {T<:AbstractFloat}
-    h = x * y
-    ifelse(iszero(h) | !isfinite(h), (h, h), canonicalize2(h, fma(x, y, -h)))
+    (h, l) = Math.two_mul(x, y)
+    ifelse(!isfinite(h), (h, h), (h, l))
 end
 mul12(x::T, y::T) where {T} = (p = x * y; (p, zero(p)))
 mul12(x, y) = mul12(promote(x, y)...)
@@ -141,6 +141,7 @@ julia> hi, lo = Base.div12(x, y)
 
 julia> Float64(hi) + Float64(lo)
 1.0134170444063066
+```
 """
 function div12(x::T, y::T) where {T<:AbstractFloat}
     # We lose precision if any intermediate calculation results in a subnormal.
@@ -164,7 +165,7 @@ div12(x, y) = div12(promote(x, y)...)
 A number with twice the precision of `T`, e.g., quad-precision if `T =
 Float64`.
 
-!!! warn
+!!! warning
     `TwicePrecision` is an internal type used to increase the
     precision of floating-point ranges, and not intended for external use.
     If you encounter them in real code, the most likely explanation is
@@ -199,16 +200,14 @@ end
 
 TwicePrecision{T}(x::T) where {T} = TwicePrecision{T}(x, zero(T))
 
+TwicePrecision{T}(x::TwicePrecision{T}) where {T} = x
+
 function TwicePrecision{T}(x) where {T}
-    xT = convert(T, x)
+    xT = T(x)
     Δx = x - xT
     TwicePrecision{T}(xT, T(Δx))
 end
 
-function TwicePrecision{T}(x::TwicePrecision) where {T}
-    TwicePrecision{T}(x.hi, x.lo)
-end
-
 TwicePrecision{T}(i::Integer) where {T<:AbstractFloat} =
     TwicePrecision{T}(canonicalize2(splitprec(T, i)...)...)
 
@@ -254,7 +253,7 @@ nbitslen(::Type{T}, len, offset) where {T<:IEEEFloat} =
     min(cld(precision(T), 2), nbitslen(len, offset))
 # The +1 here is for safety, because the precision of the significand
 # is 1 bit higher than the number that are explicitly stored.
-nbitslen(len, offset) = len < 2 ? 0 : ceil(Int, log2(max(offset-1, len-offset))) + 1
+nbitslen(len, offset) = len < 2 ? 0 : top_set_bit(max(offset-1, len-offset) - 1) + 1
 
 eltype(::Type{TwicePrecision{T}}) where {T} = T
 
@@ -263,15 +262,14 @@ promote_rule(::Type{TwicePrecision{R}}, ::Type{TwicePrecision{S}}) where {R,S} =
 promote_rule(::Type{TwicePrecision{R}}, ::Type{S}) where {R,S<:Number} =
     TwicePrecision{promote_type(R,S)}
 
-(::Type{T})(x::TwicePrecision) where {T<:Number} = T(x.hi + x.lo)::T
-TwicePrecision{T}(x::Number) where {T} = TwicePrecision{T}(T(x), zero(T))
+(::Type{T})(x::TwicePrecision) where {T<:Number} = (T(x.hi) + T(x.lo))::T
 
 convert(::Type{TwicePrecision{T}}, x::TwicePrecision{T}) where {T} = x
 convert(::Type{TwicePrecision{T}}, x::TwicePrecision) where {T} =
-    TwicePrecision{T}(convert(T, x.hi), convert(T, x.lo))
+    TwicePrecision{T}(convert(T, x.hi), convert(T, x.lo))::TwicePrecision{T}
 
-convert(::Type{T}, x::TwicePrecision) where {T<:Number} = T(x)
-convert(::Type{TwicePrecision{T}}, x::Number) where {T} = TwicePrecision{T}(x)
+convert(::Type{T}, x::TwicePrecision) where {T<:Number} = T(x)::T
+convert(::Type{TwicePrecision{T}}, x::Number) where {T} = TwicePrecision{T}(x)::TwicePrecision{T}
 
 float(x::TwicePrecision{<:AbstractFloat}) = x
 float(x::TwicePrecision) = TwicePrecision(float(x.hi), float(x.lo))
@@ -310,7 +308,7 @@ function *(x::TwicePrecision, v::Number)
 end
 function *(x::TwicePrecision{<:IEEEFloat}, v::Integer)
     v == 0 && return TwicePrecision(x.hi*v, x.lo*v)
-    nb = ceil(Int, log2(abs(v)))
+    nb = top_set_bit(abs(v)-1)
     u = truncbits(x.hi, nb)
     TwicePrecision(canonicalize2(u*v, ((x.hi-u) + x.lo)*v)...)
 end
@@ -478,9 +476,7 @@ end
 # This assumes that r.step has already been split so that (0:len-1)*r.step.hi is exact
 function unsafe_getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, i::Integer) where T
     # Very similar to _getindex_hiprec, but optimized to avoid a 2nd call to add12
-    @inline
-    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    u = i - r.offset
+    u = oftype(r.offset, i) - r.offset
     shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
     x_hi, x_lo = add12(r.ref.hi, shift_hi)
     T(x_hi + (x_lo + (shift_lo + r.ref.lo)))
@@ -488,7 +484,7 @@ end
 
 function _getindex_hiprec(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}, i::Integer)
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
-    u = i - r.offset
+    u = oftype(r.offset, i) - r.offset
     shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
     x_hi, x_lo = add12(r.ref.hi, shift_hi)
     x_hi, x_lo = add12(x_hi, x_lo + (shift_lo + r.ref.lo))
diff --git a/base/util.jl b/base/util.jl
index 46e7f36475b98..38e27520a39cc 100644
--- a/base/util.jl
+++ b/base/util.jl
@@ -22,6 +22,7 @@ const text_colors = Dict{Union{Symbol,Int},String}(
     :normal        => "\033[0m",
     :default       => "\033[39m",
     :bold          => "\033[1m",
+    :italic        => "\033[3m",
     :underline     => "\033[4m",
     :blink         => "\033[5m",
     :reverse       => "\033[7m",
@@ -35,6 +36,7 @@ end
 
 const disable_text_style = Dict{Symbol,String}(
     :bold      => "\033[22m",
+    :italic    => "\033[23m",
     :underline => "\033[24m",
     :blink     => "\033[25m",
     :reverse   => "\033[27m",
@@ -47,7 +49,7 @@ const disable_text_style = Dict{Symbol,String}(
 # Create a docstring with an automatically generated list
 # of colors.
 let color_syms = collect(Iterators.filter(x -> !isa(x, Integer), keys(text_colors))),
-    formatting_syms = [:normal, :bold, :default]
+    formatting_syms = [:normal, :bold, :italic, :default]
     global const available_text_colors = cat(
         sort!(intersect(color_syms, formatting_syms), rev=true),
         sort!(setdiff(  color_syms, formatting_syms));
@@ -69,7 +71,7 @@ Printing with the color `:nothing` will print the string without modifications.
 text_colors
 
 function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}, io::IO, args...;
-        bold::Bool = false, underline::Bool = false, blink::Bool = false,
+        bold::Bool = false, italic::Bool = false, underline::Bool = false, blink::Bool = false,
         reverse::Bool = false, hidden::Bool = false)
     buf = IOBuffer()
     iscolor = get(io, :color, false)::Bool
@@ -80,12 +82,14 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
             print(io, str)
         else
             bold && color === :bold && (color = :nothing)
+            italic && color === :italic && (color = :nothing)
             underline && color === :underline && (color = :nothing)
             blink && color === :blink && (color = :nothing)
             reverse && color === :reverse && (color = :nothing)
             hidden && color === :hidden && (color = :nothing)
             enable_ansi  = get(text_colors, color, text_colors[:default]) *
                                (bold ? text_colors[:bold] : "") *
+                               (italic ? text_colors[:italic] : "") *
                                (underline ? text_colors[:underline] : "") *
                                (blink ? text_colors[:blink] : "") *
                                (reverse ? text_colors[:reverse] : "") *
@@ -96,6 +100,7 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
                            (blink ? disable_text_style[:blink] : "") *
                            (underline ? disable_text_style[:underline] : "") *
                            (bold ? disable_text_style[:bold] : "") *
+                           (italic ? disable_text_style[:italic] : "") *
                                get(disable_text_style, color, text_colors[:default])
             first = true
             for line in eachsplit(str, '\n')
@@ -110,48 +115,64 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
 end
 
 """
-    printstyled([io], xs...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Symbol,Int}=:normal)
+    printstyled([io], xs...; bold::Bool=false, italic::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Symbol,Int}=:normal)
 
 Print `xs` in a color specified as a symbol or integer, optionally in bold.
 
 Keyword `color` may take any of the values $(Base.available_text_colors_docstring)
 or an integer between 0 and 255 inclusive. Note that not all terminals support 256 colors.
 
-Keywords `bold=true`, `underline=true`, `blink=true` are self-explanatory.
+Keywords `bold=true`, `italic=true`, `underline=true`, `blink=true` are self-explanatory.
 Keyword `reverse=true` prints with foreground and background colors exchanged,
-and `hidden=true` should be invisibe in the terminal but can still be copied.
+and `hidden=true` should be invisible in the terminal but can still be copied.
 These properties can be used in any combination.
 
 See also [`print`](@ref), [`println`](@ref), [`show`](@ref).
 
+!!! note
+    Not all terminals support italic output. Some terminals interpret italic as reverse or
+    blink.
+
 !!! compat "Julia 1.7"
     Keywords except `color` and `bold` were added in Julia 1.7.
+!!! compat "Julia 1.10"
+    Support for italic output was added in Julia 1.10.
 """
-@constprop :none printstyled(io::IO, msg...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
-    with_output_color(print, color, io, msg...; bold=bold, underline=underline, blink=blink, reverse=reverse, hidden=hidden)
-@constprop :none printstyled(msg...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
-    printstyled(stdout, msg...; bold=bold, underline=underline, blink=blink, reverse=reverse, hidden=hidden, color=color)
+@constprop :none printstyled(io::IO, msg...; bold::Bool=false, italic::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
+    with_output_color(print, color, io, msg...; bold=bold, italic=italic, underline=underline, blink=blink, reverse=reverse, hidden=hidden)
+@constprop :none printstyled(msg...; bold::Bool=false, italic::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
+    printstyled(stdout, msg...; bold=bold, italic=italic, underline=underline, blink=blink, reverse=reverse, hidden=hidden, color=color)
 
 """
-    Base.julia_cmd(juliapath=joinpath(Sys.BINDIR, julia_exename()))
+    Base.julia_cmd(juliapath=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Union{Nothing,String}=nothing)
 
 Return a julia command similar to the one of the running process.
 Propagates any of the `--cpu-target`, `--sysimage`, `--compile`, `--sysimage-native-code`,
-`--compiled-modules`, `--inline`, `--check-bounds`, `--optimize`, `-g`,
+`--compiled-modules`, `--pkgimages`, `--inline`, `--check-bounds`, `--optimize`, `--min-optlevel`, `-g`,
 `--code-coverage`, `--track-allocation`, `--color`, `--startup-file`, and `--depwarn`
 command line arguments that are not at their default values.
 
 Among others, `--math-mode`, `--warn-overwrite`, and `--trace-compile` are notably not propagated currently.
 
+Unless set to `nothing`, the `cpu_target` keyword argument can be used to override the CPU target set for the running process.
+
+To get the julia command without propagated command line arguments, `julia_cmd()[1]` can be used.
+
 !!! compat "Julia 1.1"
     Only the `--cpu-target`, `--sysimage`, `--depwarn`, `--compile` and `--check-bounds` flags were propagated before Julia 1.1.
 
 !!! compat "Julia 1.5"
     The flags `--color` and `--startup-file` were added in Julia 1.5.
+
+!!! compat "Julia 1.9"
+    The keyword argument `cpu_target` was added in 1.9.
+    The flag `--pkgimages` was added in Julia 1.9.
 """
-function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()))
+function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Union{Nothing,String} = nothing)
     opts = JLOptions()
-    cpu_target = unsafe_string(opts.cpu_target)
+    if cpu_target === nothing
+        cpu_target = unsafe_string(opts.cpu_target)
+    end
     image_file = unsafe_string(opts.image_file)
     addflags = String[]
     let compile = if opts.compile_enabled == 0
@@ -185,6 +206,9 @@ function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()))
     end
     opts.can_inline == 0 && push!(addflags, "--inline=no")
     opts.use_compiled_modules == 0 && push!(addflags, "--compiled-modules=no")
+    opts.use_compiled_modules == 2 && push!(addflags, "--compiled-modules=existing")
+    opts.use_pkgimages == 0 && push!(addflags, "--pkgimages=no")
+    opts.use_pkgimages == 2 && push!(addflags, "--pkgimages=existing")
     opts.opt_level == 2 || push!(addflags, "-O$(opts.opt_level)")
     opts.opt_level_min == 0 || push!(addflags, "--min-optlevel=$(opts.opt_level_min)")
     push!(addflags, "-g$(opts.debug_level)")
@@ -220,11 +244,11 @@ function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()))
     if opts.use_sysimage_native_code == 0
         push!(addflags, "--sysimage-native-code=no")
     end
-    return `$julia -C$cpu_target -J$image_file $addflags`
+    return `$julia -C $cpu_target -J$image_file $addflags`
 end
 
 function julia_exename()
-    if ccall(:jl_is_debugbuild, Cint, ()) == 0
+    if !Base.isdebugbuild()
         return @static Sys.iswindows() ? "julia.exe" : "julia"
     else
         return @static Sys.iswindows() ? "julia-debug.exe" : "julia-debug"
@@ -242,7 +266,7 @@ will always be called.
 function securezero! end
 @noinline securezero!(a::AbstractArray{<:Number}) = fill!(a, 0)
 @noinline unsafe_securezero!(p::Ptr{T}, len::Integer=1) where {T} =
-    ccall(:memset, Ptr{T}, (Ptr{T}, Cint, Csize_t), p, 0, len*sizeof(T))
+    memset(p, 0, len*sizeof(T))
 unsafe_securezero!(p::Ptr{Cvoid}, len::Integer=1) = Ptr{Cvoid}(unsafe_securezero!(Ptr{UInt8}(p), len))
 
 """
@@ -251,32 +275,90 @@ unsafe_securezero!(p::Ptr{Cvoid}, len::Integer=1) = Ptr{Cvoid}(unsafe_securezero
 Display a message and wait for the user to input a secret, returning an `IO`
 object containing the secret.
 
-Note that on Windows, the secret might be displayed as it is typed; see
-`Base.winprompt` for securely retrieving username/password pairs from a
-graphical interface.
+!!! info "Windows"
+    Note that on Windows, the secret might be displayed as it is typed; see
+    `Base.winprompt` for securely retrieving username/password pairs from a
+    graphical interface.
 """
 function getpass end
 
-_getch() = UInt8(ccall(:jl_getch, Cint, ()))
+# Note, this helper only works within `with_raw_tty()` on POSIX platforms!
+function _getch()
+    @static if Sys.iswindows()
+        return UInt8(ccall(:_getch, Cint, ()))
+    else
+        return read(stdin, UInt8)
+    end
+end
+
+const termios_size = Int(ccall(:jl_termios_size, Cint, ()))
+make_termios() = zeros(UInt8, termios_size)
+
+# These values seem to hold on all OSes we care about:
+# glibc Linux, musl Linux, macOS, FreeBSD
+@enum TCSETATTR_FLAGS TCSANOW=0 TCSADRAIN=1 TCSAFLUSH=2
+
+function tcgetattr(fd::RawFD, termios)
+    ret = ccall(:tcgetattr, Cint, (Cint, Ptr{Cvoid}), fd, termios)
+    if ret != 0
+        throw(IOError("tcgetattr failed", ret))
+    end
+end
+function tcsetattr(fd::RawFD, termios, mode::TCSETATTR_FLAGS = TCSADRAIN)
+    ret = ccall(:tcsetattr, Cint, (Cint, Cint, Ptr{Cvoid}), fd, Cint(mode), termios)
+    if ret != 0
+        throw(IOError("tcsetattr failed", ret))
+    end
+end
+cfmakeraw(termios) = ccall(:cfmakeraw, Cvoid, (Ptr{Cvoid},), termios)
+
+function with_raw_tty(f::Function, input::TTY)
+    input === stdin || throw(ArgumentError("with_raw_tty only works for stdin"))
+    fd = RawFD(0)
+
+    # If we're on windows, we do nothing, as we have access to `_getch()` quite easily
+    @static if Sys.iswindows()
+        return f()
+    end
+
+    # Get the current terminal mode
+    old_termios = make_termios()
+    tcgetattr(fd, old_termios)
+    try
+        # Set a new, raw, terminal mode
+        new_termios = copy(old_termios)
+        cfmakeraw(new_termios)
+        tcsetattr(fd, new_termios)
+
+        # Call the user-supplied callback
+        f()
+    finally
+        # Always restore the terminal mode
+        tcsetattr(fd, old_termios)
+    end
+end
+
 function getpass(input::TTY, output::IO, prompt::AbstractString)
     input === stdin || throw(ArgumentError("getpass only works for stdin"))
-    print(output, prompt, ": ")
-    flush(output)
-    s = SecretBuffer()
-    plen = 0
-    while true
-        c = _getch()
-        if c == 0xff || c == UInt8('\n') || c == UInt8('\r') || c == 0x04
-            break # EOF or return
-        elseif c == 0x00 || c == 0xe0
-            _getch() # ignore function/arrow keys
-        elseif c == UInt8('\b') && plen > 0
-            plen -= 1 # delete last character on backspace
-        elseif !iscntrl(Char(c)) && plen < 128
-            write(s, c)
+    with_raw_tty(stdin) do
+        print(output, prompt, ": ")
+        flush(output)
+        s = SecretBuffer()
+        plen = 0
+        while true
+            c = _getch()
+            if c == 0xff || c == UInt8('\n') || c == UInt8('\r') || c == 0x04
+                break # EOF or return
+            elseif c == 0x00 || c == 0xe0
+                _getch() # ignore function/arrow keys
+            elseif c == UInt8('\b') && plen > 0
+                plen -= 1 # delete last character on backspace
+            elseif !iscntrl(Char(c)) && plen < 128
+                write(s, c)
+            end
         end
+        return seekstart(s)
     end
-    return seekstart(s)
 end
 
 # allow new getpass methods to be defined if stdin has been
@@ -290,7 +372,7 @@ Displays the `message` then waits for user input. Input is terminated when a new
 is encountered or EOF (^D) character is entered on a blank line. If a `default` is provided
 then the user can enter just a newline character to select the `default`.
 
-See also `Base.getpass` and `Base.winprompt` for secure entry of passwords.
+See also `Base.winprompt` (for Windows) and `Base.getpass` for secure entry of passwords.
 
 # Example
 
@@ -411,7 +493,9 @@ _crc32c(a::NTuple{<:Any, UInt8}, crc::UInt32=0x00000000) =
 _crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) =
     unsafe_crc32c(a, length(a) % Csize_t, crc)
 
-_crc32c(s::String, crc::UInt32=0x00000000) = unsafe_crc32c(s, sizeof(s) % Csize_t, crc)
+function _crc32c(s::Union{String, SubString{String}}, crc::UInt32=0x00000000)
+    unsafe_crc32c(s, sizeof(s) % Csize_t, crc)
+end
 
 function _crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000)
     nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0, got $nb"))
@@ -427,10 +511,17 @@ function _crc32c(io::IO, nb::Integer, crc::UInt32=0x00000000)
 end
 _crc32c(io::IO, crc::UInt32=0x00000000) = _crc32c(io, typemax(Int64), crc)
 _crc32c(io::IOStream, crc::UInt32=0x00000000) = _crc32c(io, filesize(io)-position(io), crc)
-_crc32c(uuid::UUID, crc::UInt32=0x00000000) =
-    ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt128}, Csize_t), crc, uuid.value, 16)
+_crc32c(uuid::UUID, crc::UInt32=0x00000000) = _crc32c(uuid.value, crc)
+_crc32c(x::UInt128, crc::UInt32=0x00000000) =
+    ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt128}, Csize_t), crc, x, 16)
 _crc32c(x::UInt64, crc::UInt32=0x00000000) =
     ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt64}, Csize_t), crc, x, 8)
+_crc32c(x::UInt32, crc::UInt32=0x00000000) =
+    ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt32}, Csize_t), crc, x, 4)
+_crc32c(x::UInt16, crc::UInt32=0x00000000) =
+    ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt16}, Csize_t), crc, x, 2)
+_crc32c(x::UInt8, crc::UInt32=0x00000000) =
+    ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt8}, Csize_t), crc, x, 1)
 
 """
     @kwdef typedef
@@ -449,9 +540,12 @@ order to function correctly with the keyword outer constructor.
     `Base.@kwdef` for parametric structs, and structs with supertypes
     requires at least Julia 1.1.
 
+!!! compat "Julia 1.9"
+    This macro is exported as of Julia 1.9.
+
 # Examples
 ```jldoctest
-julia> Base.@kwdef struct Foo
+julia> @kwdef struct Foo
            a::Int = 1         # specified default
            b::String          # required keyword
        end
@@ -461,15 +555,14 @@ julia> Foo(b="hi")
 Foo(1, "hi")
 
 julia> Foo()
-ERROR: UndefKeywordError: keyword argument b not assigned
+ERROR: UndefKeywordError: keyword argument `b` not assigned
 Stacktrace:
 [...]
 ```
 """
 macro kwdef(expr)
     expr = macroexpand(__module__, expr) # to expand @static
-    expr isa Expr && expr.head === :struct || error("Invalid usage of @kwdef")
-    expr = expr::Expr
+    isexpr(expr, :struct) || error("Invalid usage of @kwdef")
     T = expr.args[2]
     if T isa Expr && T.head === :<:
         T = T.args[1]
@@ -483,29 +576,33 @@ macro kwdef(expr)
     # overflow on construction
     if !isempty(params_ex.args)
         if T isa Symbol
-            kwdefs = :(($(esc(T)))($params_ex) = ($(esc(T)))($(call_args...)))
-        elseif T isa Expr && T.head === :curly
-            T = T::Expr
+            sig = :(($(esc(T)))($params_ex))
+            call = :(($(esc(T)))($(call_args...)))
+            body = Expr(:block, __source__, call)
+            kwdefs = Expr(:function, sig, body)
+        elseif isexpr(T, :curly)
             # if T == S{A<:AA,B<:BB}, define two methods
             #   S(...) = ...
             #   S{A,B}(...) where {A<:AA,B<:BB} = ...
             S = T.args[1]
             P = T.args[2:end]
-            Q = Any[U isa Expr && U.head === :<: ? U.args[1] : U for U in P]
+            Q = Any[isexpr(U, :<:) ? U.args[1] : U for U in P]
             SQ = :($S{$(Q...)})
-            kwdefs = quote
-                ($(esc(S)))($params_ex) =($(esc(S)))($(call_args...))
-                ($(esc(SQ)))($params_ex) where {$(esc.(P)...)} =
-                    ($(esc(SQ)))($(call_args...))
-            end
+            body1 = Expr(:block, __source__, :(($(esc(S)))($(call_args...))))
+            sig1 = :(($(esc(S)))($params_ex))
+            def1 = Expr(:function, sig1, body1)
+            body2 = Expr(:block, __source__, :(($(esc(SQ)))($(call_args...))))
+            sig2 = :(($(esc(SQ)))($params_ex) where {$(esc.(P)...)})
+            def2 = Expr(:function, sig2, body2)
+            kwdefs = Expr(:block, def1, def2)
         else
             error("Invalid usage of @kwdef")
         end
     else
         kwdefs = nothing
     end
-    quote
-        Base.@__doc__($(esc(expr)))
+    return quote
+        $(esc(:($Base.@__doc__ $expr)))
         $kwdefs
     end
 end
@@ -520,7 +617,16 @@ function _kwdef!(blk, params_args, call_args)
             push!(params_args, ei)
             push!(call_args, ei)
         elseif ei isa Expr
-            if ei.head === :(=)
+            is_atomic = ei.head === :atomic
+            ei = is_atomic ? first(ei.args) : ei # strip "@atomic" and add it back later
+            is_const = ei.head === :const
+            ei = is_const ? first(ei.args) : ei # strip "const" and add it back later
+            # Note: `@atomic const ..` isn't valid, but reconstruct it anyway to serve a nice error
+            if ei isa Symbol
+                # const var
+                push!(params_args, ei)
+                push!(call_args, ei)
+            elseif ei.head === :(=)
                 lhs = ei.args[1]
                 if lhs isa Symbol
                     #  var = defexpr
@@ -536,7 +642,9 @@ function _kwdef!(blk, params_args, call_args)
                 defexpr = ei.args[2]  # defexpr
                 push!(params_args, Expr(:kw, var, esc(defexpr)))
                 push!(call_args, var)
-                blk.args[i] = lhs
+                lhs = is_const ? Expr(:const, lhs) : lhs
+                lhs = is_atomic ? Expr(:atomic, lhs) : lhs
+                blk.args[i] = lhs # overrides arg
             elseif ei.head === :(::) && ei.args[1] isa Symbol
                 # var::Typ
                 var = ei.args[1]
@@ -578,8 +686,9 @@ function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.CPU_THREADS / 2),
     seed !== nothing && push!(tests, "--seed=0x$(string(seed % UInt128, base=16))") # cast to UInt128 to avoid a minus sign
     ENV2 = copy(ENV)
     ENV2["JULIA_CPU_THREADS"] = "$ncores"
-    ENV2["JULIA_DEPOT_PATH"] = mktempdir(; cleanup = true)
-    delete!(ENV2, "JULIA_LOAD_PATH")
+    pathsep = Sys.iswindows() ? ";" : ":"
+    ENV2["JULIA_DEPOT_PATH"] = string(mktempdir(; cleanup = true), pathsep) # make sure the default depots can be loaded
+    ENV2["JULIA_LOAD_PATH"] = string("@", pathsep, "@stdlib")
     delete!(ENV2, "JULIA_PROJECT")
     try
         run(setenv(`$(julia_cmd()) $(joinpath(Sys.BINDIR,
@@ -588,9 +697,20 @@ function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.CPU_THREADS / 2),
     catch
         buf = PipeBuffer()
         original_load_path = copy(Base.LOAD_PATH); empty!(Base.LOAD_PATH); pushfirst!(Base.LOAD_PATH, "@stdlib")
-        Base.require(Base, :InteractiveUtils).versioninfo(buf)
+        let InteractiveUtils = Base.require(Base, :InteractiveUtils)
+            @invokelatest InteractiveUtils.versioninfo(buf)
+        end
         empty!(Base.LOAD_PATH); append!(Base.LOAD_PATH, original_load_path)
         error("A test has failed. Please submit a bug report (https://github.com/JuliaLang/julia/issues)\n" *
               "including error messages above and the output of versioninfo():\n$(read(buf, String))")
     end
 end
+
+"""
+    isdebugbuild()
+
+Return `true` if julia is a debug version.
+"""
+function isdebugbuild()
+    return ccall(:jl_is_debugbuild, Cint, ()) != 0
+end
diff --git a/base/version.jl b/base/version.jl
index 978abbba1a8aa..e078b5f9d4ac9 100644
--- a/base/version.jl
+++ b/base/version.jl
@@ -11,11 +11,18 @@ const VInt = UInt32
 Version number type which follows the specifications of
 [semantic versioning (semver)](https://semver.org/), composed of major, minor
 and patch numeric values, followed by pre-release and build
-alpha-numeric annotations.
+alphanumeric annotations.
 
 `VersionNumber` objects can be compared with all of the standard comparison
 operators (`==`, `<`, `<=`, etc.), with the result following semver rules.
 
+`VersionNumber` has the the following public fields:
+- `v.major::Integer`
+- `v.minor::Integer`
+- `v.patch::Integer`
+- `v.prerelease::Tuple{Vararg{Union{Integer, AbstractString}}}`
+- `v.build::Tuple{Vararg{Union{Integer, AbstractString}}}`
+
 See also [`@v_str`](@ref) to efficiently construct `VersionNumber` objects
 from semver-format literal strings, [`VERSION`](@ref) for the `VersionNumber`
 of Julia itself, and [Version Number Literals](@ref man-version-number-literals)
@@ -266,59 +273,3 @@ else
 end
 
 libllvm_path() = ccall(:jl_get_libllvm, Any, ())
-
-function banner(io::IO = stdout)
-    if GIT_VERSION_INFO.tagged_commit
-        commit_string = TAGGED_RELEASE_BANNER
-    elseif isempty(GIT_VERSION_INFO.commit)
-        commit_string = ""
-    else
-        days = Int(floor((ccall(:jl_clock_now, Float64, ()) - GIT_VERSION_INFO.fork_master_timestamp) / (60 * 60 * 24)))
-        days = max(0, days)
-        unit = days == 1 ? "day" : "days"
-        distance = GIT_VERSION_INFO.fork_master_distance
-        commit = GIT_VERSION_INFO.commit_short
-
-        if distance == 0
-            commit_string = "Commit $(commit) ($(days) $(unit) old master)"
-        else
-            branch = GIT_VERSION_INFO.branch
-            commit_string = "$(branch)/$(commit) (fork: $(distance) commits, $(days) $(unit))"
-        end
-    end
-
-    commit_date = isempty(Base.GIT_VERSION_INFO.date_string) ? "" : " ($(split(Base.GIT_VERSION_INFO.date_string)[1]))"
-
-    if get(io, :color, false)
-        c = text_colors
-        tx = c[:normal] # text
-        jl = c[:normal] # julia
-        d1 = c[:bold] * c[:blue]    # first dot
-        d2 = c[:bold] * c[:red]     # second dot
-        d3 = c[:bold] * c[:green]   # third dot
-        d4 = c[:bold] * c[:magenta] # fourth dot
-
-        print(io,"""               $(d3)_$(tx)
-           $(d1)_$(tx)       $(jl)_$(tx) $(d2)_$(d3)(_)$(d4)_$(tx)     |  Documentation: https://docs.julialang.org
-          $(d1)(_)$(jl)     | $(d2)(_)$(tx) $(d4)(_)$(tx)    |
-           $(jl)_ _   _| |_  __ _$(tx)   |  Type \"?\" for help, \"]?\" for Pkg help.
-          $(jl)| | | | | | |/ _` |$(tx)  |
-          $(jl)| | |_| | | | (_| |$(tx)  |  Version $(VERSION)$(commit_date)
-         $(jl)_/ |\\__'_|_|_|\\__'_|$(tx)  |  $(commit_string)
-        $(jl)|__/$(tx)                   |
-
-        """)
-    else
-        print(io,"""
-                       _
-           _       _ _(_)_     |  Documentation: https://docs.julialang.org
-          (_)     | (_) (_)    |
-           _ _   _| |_  __ _   |  Type \"?\" for help, \"]?\" for Pkg help.
-          | | | | | | |/ _` |  |
-          | | |_| | | | (_| |  |  Version $(VERSION)$(commit_date)
-         _/ |\\__'_|_|_|\\__'_|  |  $(commit_string)
-        |__/                   |
-
-        """)
-    end
-end
diff --git a/base/version_git.sh b/base/version_git.sh
index 2a3352d1066ef..76092e9800594 100644
--- a/base/version_git.sh
+++ b/base/version_git.sh
@@ -3,7 +3,7 @@
 
 # This file collects git info and create a julia file with the GIT_VERSION_INFO struct
 
-echo "# This file was autogenerated in base/version_git.sh"
+echo "# This file was autogenerated by base/version_git.sh"
 echo "struct GitVersionInfo"
 echo "    commit::String"
 echo "    commit_short::String"
@@ -41,7 +41,15 @@ if [ -n "$(git status --porcelain)" ]; then
     # append dirty mark '*' if the repository has uncommitted changes
     commit_short="$commit_short"*
 fi
-branch=$(git rev-parse --abbrev-ref HEAD)
+
+# Our CI system checks commits out as a detached head, and so we must
+# use the provided branch name, as we cannot autodetect this commit as
+# the tip of any such branch.
+if [ -n "${BUILDKITE_BRANCH}" ]; then
+    branch="${BUILDKITE_BRANCH}"
+else
+    branch=$(git rev-parse --abbrev-ref HEAD)
+fi
 
 topdir=$(git rev-parse --show-toplevel)
 verchanged=$(git blame -L ,1 -sl -- "$topdir/VERSION" | cut -f 1 -d " ")
@@ -52,17 +60,21 @@ else
     build_number=$(git rev-list --count HEAD "^$verchanged")
 fi
 
-date_string=$git_time
 case $(uname) in
   Darwin | FreeBSD)
-    date_string="$(/bin/date -jr $git_time -u '+%Y-%m-%d %H:%M %Z')"
+    if (date --version 2>/dev/null | grep -q 'GNU coreutils')
+    then # GNU date installed and earlier on PATH than BSD date
+        date_string="$(date --date="@$git_time" -u '+%Y-%m-%d %H:%M %Z')"
+    else # otherwise assume BSD date
+        date_string="$(date -jr $git_time -u '+%Y-%m-%d %H:%M %Z')"
+    fi
     ;;
   MINGW*)
     git_time=$(git log -1 --pretty=format:%ci)
-    date_string="$(/bin/date --date="$git_time" -u '+%Y-%m-%d %H:%M %Z')"
+    date_string="$(date --date="$git_time" -u '+%Y-%m-%d %H:%M %Z')"
     ;;
   *)
-    date_string="$(/bin/date --date="@$git_time" -u '+%Y-%m-%d %H:%M %Z')"
+    date_string="$(date --date="@$git_time" -u '+%Y-%m-%d %H:%M %Z')"
     ;;
 esac
 if [ $(git describe --tags --exact-match 2> /dev/null) ]; then
diff --git a/base/views.jl b/base/views.jl
index 70d4c1d9110ee..d83005c86f46c 100644
--- a/base/views.jl
+++ b/base/views.jl
@@ -224,16 +224,16 @@ Similarly, `@views` converts string slices into [`SubString`](@ref) views.
     occurs in functions called by that code.
 
 !!! compat "Julia 1.5"
-    Using `begin` in an indexing expression to refer to the first index requires at least
-    Julia 1.5.
+    Using `begin` in an indexing expression to refer to the first index was implemented
+    in Julia 1.4, but was only supported by `@views` starting in Julia 1.5.
 
 # Examples
 ```jldoctest
 julia> A = zeros(3, 3);
 
 julia> @views for row in 1:3
-           b = A[row, :]
-           b[:] .= row
+           b = A[row, :] # b is a view, not a copy
+           b .= row      # assign every element to the row index
        end
 
 julia> A
diff --git a/base/weakkeydict.jl b/base/weakkeydict.jl
index 0a9987671ea9b..b827f0d1495da 100644
--- a/base/weakkeydict.jl
+++ b/base/weakkeydict.jl
@@ -12,6 +12,8 @@ referenced in a hash table.
 See [`Dict`](@ref) for further help.  Note, unlike [`Dict`](@ref),
 `WeakKeyDict` does not convert keys on insertion, as this would imply the key
 object was unreferenced anywhere before insertion.
+
+See also [`WeakRef`](@ref).
 """
 mutable struct WeakKeyDict{K,V} <: AbstractDict{K,V}
     ht::Dict{WeakRef,V}
@@ -21,7 +23,7 @@ mutable struct WeakKeyDict{K,V} <: AbstractDict{K,V}
 
     # Constructors mirror Dict's
     function WeakKeyDict{K,V}() where V where K
-        t = new(Dict{Any,V}(), ReentrantLock(), identity, 0)
+        t = new(Dict{WeakRef,V}(), ReentrantLock(), identity, 0)
         t.finalizer = k -> t.dirty = true
         return t
     end
@@ -78,7 +80,7 @@ function _cleanup_locked(h::WeakKeyDict)
     return h
 end
 
-sizehint!(d::WeakKeyDict, newsz) = sizehint!(d.ht, newsz)
+sizehint!(d::WeakKeyDict, newsz; shrink::Bool = true) = @lock d sizehint!(d.ht, newsz; shrink = shrink)
 empty(d::WeakKeyDict, ::Type{K}, ::Type{V}) where {K, V} = WeakKeyDict{K, V}()
 
 IteratorSize(::Type{<:WeakKeyDict}) = SizeUnknown()
@@ -211,4 +213,6 @@ function iterate(t::WeakKeyDict{K,V}, state...) where {K, V}
     end
 end
 
+@propagate_inbounds Iterators.only(d::WeakKeyDict) = Iterators._only(d, first)
+
 filter!(f, d::WeakKeyDict) = filter_in_one_pass!(f, d)
diff --git a/cli/Makefile b/cli/Makefile
index 11855ee6244dc..c72ebff2b9bfd 100644
--- a/cli/Makefile
+++ b/cli/Makefile
@@ -1,7 +1,6 @@
 SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 JULIAHOME := $(abspath $(SRCDIR)/..)
 BUILDDIR ?= .
-include $(JULIAHOME)/deps/Versions.make
 include $(JULIAHOME)/Make.inc
 include $(JULIAHOME)/deps/llvm-ver.make
 
@@ -13,13 +12,16 @@ LOADER_LDFLAGS = $(JLDFLAGS) -ffreestanding -L$(build_shlibdir) -L$(build_libdir
 
 ifeq ($(OS),WINNT)
 LOADER_CFLAGS += -municode -mconsole -nostdlib -fno-stack-check -fno-stack-protector -mno-stack-arg-probe
+else ifeq ($(OS),Linux)
+LOADER_CFLAGS += -DGLIBCXX_LEAST_VERSION_SYMBOL=\"$(shell echo "$(CSL_NEXT_GLIBCXX_VERSION)" | cut -d'|' -f1 | sed 's/\\//g')\"
 endif
 
 ifeq ($(OS),WINNT)
 LOADER_LDFLAGS += -municode -mconsole -nostdlib --disable-auto-import \
                   --disable-runtime-pseudo-reloc -lntdll -lkernel32 -lpsapi
 else ifeq ($(OS),Linux)
-LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed
+# textoff and notext are aliases to the same option which suppress the TEXTREL warning for i686
+LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed -Wl,-z,notext
 else ifeq ($(OS),FreeBSD)
 LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed
 else ifeq ($(OS),Darwin)
@@ -29,6 +31,14 @@ endif
 # Build list of dependent libraries that must be opened
 SHIPFLAGS  += -DDEP_LIBS="\"$(LOADER_BUILD_DEP_LIBS)\""
 DEBUGFLAGS += -DDEP_LIBS="\"$(LOADER_DEBUG_BUILD_DEP_LIBS)\""
+ifneq (,$(findstring MINGW,$(shell uname)))
+# In MSYS2, do not perform path conversion for `DEP_LIBS`.
+# https://www.msys2.org/wiki/Porting/#filesystem-namespaces
+# We define this environment variable for only these two object files,
+# as they're the only ones that require it at the time of writing.
+$(BUILDDIR)/loader_lib.o: export MSYS2_ARG_CONV_EXCL = -DDEP_LIBS=
+$(BUILDDIR)/loader_lib.dbg.obj: export MSYS2_ARG_CONV_EXCL = -DDEP_LIBS=
+endif # MSYS2
 
 EXE_OBJS := $(BUILDDIR)/loader_exe.o
 EXE_DOBJS := $(BUILDDIR)/loader_exe.dbg.obj
@@ -46,9 +56,9 @@ all: release debug
 release debug :  % : julia-% libjulia-%
 
 $(BUILDDIR)/loader_lib.o : $(SRCDIR)/loader_lib.c $(HEADERS) $(JULIAHOME)/VERSION
-	@$(call PRINT_CC, $(CC) -DLIBRARY_EXPORTS $(SHIPFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
+	@$(call PRINT_CC, $(CC) -DJL_LIBRARY_EXPORTS $(SHIPFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
 $(BUILDDIR)/loader_lib.dbg.obj : $(SRCDIR)/loader_lib.c $(HEADERS) $(JULIAHOME)/VERSION
-	@$(call PRINT_CC, $(CC) -DLIBRARY_EXPORTS $(DEBUGFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
+	@$(call PRINT_CC, $(CC) -DJL_LIBRARY_EXPORTS $(DEBUGFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
 $(BUILDDIR)/loader_exe.o : $(SRCDIR)/loader_exe.c $(HEADERS) $(JULIAHOME)/VERSION
 	@$(call PRINT_CC, $(CC) $(SHIPFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
 $(BUILDDIR)/loader_exe.dbg.obj : $(SRCDIR)/loader_exe.c $(HEADERS) $(JULIAHOME)/VERSION
@@ -61,8 +71,7 @@ dump-trampolines: $(SRCDIR)/trampolines/trampolines_$(ARCH).S
 	$(CC) $(SHIPFLAGS) $(LOADER_CFLAGS) $< -S | sed -E 's/ ((%%)|;) /\n/g' | sed -E 's/.global/\n.global/g'
 
 DIRS = $(build_bindir) $(build_libdir)
-$(DIRS):
-	@mkdir -p $@
+$(foreach dir,$(DIRS),$(eval $(call dir_target,$(dir))))
 
 ifeq ($(OS),WINNT)
 $(BUILDDIR)/julia_res.o: $(JULIAHOME)/contrib/windows/julia.rc $(JULIAHOME)/VERSION
@@ -96,25 +105,31 @@ julia-debug: $(build_bindir)/julia-debug$(EXE)
 libjulia-release: $(build_shlibdir)/libjulia.$(SHLIB_EXT)
 libjulia-debug: $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT)
 
+ifneq (,$(filter $(OS), Linux FreeBSD))
+VERSIONSCRIPT := -Wl,--version-script=$(BUILDDIR)/julia.expmap
+endif
+
 ifeq ($(OS),WINNT)
 # On Windows we need to strip out exported functions from the generated import library.
 STRIP_EXPORTED_FUNCS := $(shell $(CPP_STDOUT) -I$(JULIAHOME)/src $(SRCDIR)/list_strip_symbols.h)
 endif
 
-$(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir)
-	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -DLIBRARY_EXPORTS -shared $(SHIPFLAGS) $(LIB_OBJS) -o $@ \
-		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(RPATH_LIB) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT)))
-	@$(INSTALL_NAME_CMD)libjulia.$(SHLIB_EXT) $@
+$(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h $(BUILDDIR)/julia.expmap | $(build_shlibdir) $(build_libdir)
+	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(SHIPFLAGS) $(LIB_OBJS) -o $@ \
+		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(VERSIONSCRIPT) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT)))
+	@$(INSTALL_NAME_CMD)libjulia.$(JL_MAJOR_SHLIB_EXT) $@
+	@$(DSYMUTIL) $@
 ifeq ($(OS), WINNT)
 	@# Note that if the objcopy command starts getting too long, we can use `@file` to read
 	@# command-line options from `file` instead.
 	@$(call PRINT_ANALYZE, $(OBJCOPY) $(build_libdir)/$(notdir $@).tmp.a $(STRIP_EXPORTED_FUNCS) $(build_libdir)/$(notdir $@).a && rm $(build_libdir)/$(notdir $@).tmp.a)
 endif
 
-$(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir)
-	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -DLIBRARY_EXPORTS -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \
-		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(RPATH_LIB) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT)))
-	@$(INSTALL_NAME_CMD)libjulia-debug.$(SHLIB_EXT) $@
+$(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h $(BUILDDIR)/julia.expmap | $(build_shlibdir) $(build_libdir)
+	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \
+		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(VERSIONSCRIPT) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT)))
+	@$(INSTALL_NAME_CMD)libjulia-debug.$(JL_MAJOR_SHLIB_EXT) $@
+	@$(DSYMUTIL) $@
 ifeq ($(OS), WINNT)
 	@$(call PRINT_ANALYZE, $(OBJCOPY) $(build_libdir)/$(notdir $@).tmp.a $(STRIP_EXPORTED_FUNCS) $(build_libdir)/$(notdir $@).a && rm $(build_libdir)/$(notdir $@).tmp.a)
 endif
@@ -134,8 +149,12 @@ $(build_bindir)/julia$(EXE): $(EXE_OBJS) $(build_shlibdir)/libjulia.$(SHLIB_EXT)
 $(build_bindir)/julia-debug$(EXE): $(EXE_DOBJS) $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT) | $(build_bindir)
 	@$(call PRINT_LINK, $(CC) $(LOADER_CFLAGS) $(DEBUGFLAGS) $(EXE_DOBJS) -o $@ $(LOADER_LDFLAGS) $(RPATH) -ljulia-debug)
 
+$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in
+	sed <'$<' >'$@' -e 's/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/'
+
 clean: | $(CLEAN_TARGETS)
 	rm -f $(BUILDDIR)/*.o $(BUILDDIR)/*.dbg.obj
 	rm -f $(build_bindir)/julia*
+	rm -f $(BUILDDIR)/julia.expmap
 
 .PHONY: clean release debug julia-release julia-debug
diff --git a/cli/jl_exports.h b/cli/jl_exports.h
index e9be7c6f2f819..f1a05b504d9da 100644
--- a/cli/jl_exports.h
+++ b/cli/jl_exports.h
@@ -16,6 +16,10 @@ JL_EXPORTED_DATA_POINTERS(XX)
 JL_EXPORTED_DATA_SYMBOLS(XX)
 #undef XX
 
+// define a copy of exported data
+#define jl_max_tags 64
+JL_DLLEXPORT void *jl_small_typeof[(jl_max_tags << 4) / sizeof(void*)]; // 16-bit aligned, like the GC
+
 // Declare list of exported functions (sans type)
 #define XX(name)    JL_DLLEXPORT void name(void);
 typedef void (anonfunc)(void);
diff --git a/cli/julia.expmap.in b/cli/julia.expmap.in
new file mode 100644
index 0000000000000..b6fa72e9aca03
--- /dev/null
+++ b/cli/julia.expmap.in
@@ -0,0 +1 @@
+@JULIA_SHLIB_SYMBOL_VERSION@ { global: *; };
diff --git a/cli/loader.h b/cli/loader.h
index 2d0b977f7142f..be5195583b29f 100644
--- a/cli/loader.h
+++ b/cli/loader.h
@@ -5,25 +5,6 @@
 #include "../src/support/dirpath.h"
 #include "../src/julia_fasttls.h"
 
-#ifdef _OS_WINDOWS_
-/* We need to reimplement a bunch of standard library stuff on windows,
- * but we want to make sure that it doesn't conflict with the actual implementations
- * once those get linked into this process. */
-#define fwrite loader_fwrite
-#define fputs loader_fputs
-#define exit loader_exit
-#define strlen loader_strlen
-#define wcslen loader_wcslen
-#define strncat loader_strncat
-#define memcpy loader_memcpy
-#define dirname loader_dirname
-#define strchr loader_strchr
-#define malloc loader_malloc
-#define realloc loader_realloc
-#endif
-
-#include <stdint.h>
-
 #ifdef _OS_WINDOWS_
 
 #define WIN32_LEAN_AND_MEAN
@@ -46,25 +27,26 @@
 #include <libgen.h>
 #include <unistd.h>
 #include <dlfcn.h>
+#include <signal.h>
 
 #endif
 
+#include <stdint.h>
+
 // Borrow definition from `support/dtypes.h`
 #ifdef _OS_WINDOWS_
-# ifdef LIBRARY_EXPORTS
+# ifdef JL_LIBRARY_EXPORTS
 #  define JL_DLLEXPORT __declspec(dllexport)
-# else
-#  define JL_DLLEXPORT __declspec(dllimport)
 # endif
+#  define JL_DLLIMPORT __declspec(dllimport)
 #define JL_HIDDEN
 #else
-# if defined(LIBRARY_EXPORTS) && defined(_OS_LINUX_)
-#  define JL_DLLEXPORT __attribute__ ((visibility("protected")))
-# else
-#  define JL_DLLEXPORT __attribute__ ((visibility("default")))
-# endif
+# define JL_DLLIMPORT __attribute__ ((visibility("default")))
 #define JL_HIDDEN    __attribute__ ((visibility("hidden")))
 #endif
+#ifndef JL_DLLEXPORT
+#  define JL_DLLEXPORT JL_DLLIMPORT
+#endif
 /*
  * DEP_LIBS is our list of dependent libraries that must be loaded before `libjulia`.
  * Note that order matters, as each entry will be opened in-order.  We define here a
@@ -92,8 +74,8 @@ static void * lookup_symbol(const void * lib_handle, const char * symbol_name);
 
 #ifdef _OS_WINDOWS_
 LPWSTR *CommandLineToArgv(LPWSTR lpCmdLine, int *pNumArgs);
-int wchar_to_utf8(const wchar_t * wstr, char *str, size_t maxlen);
-int utf8_to_wchar(const char * str, wchar_t *wstr, size_t maxlen);
+char *wchar_to_utf8(const wchar_t * wstr);
+wchar_t *utf8_to_wchar(const char * str);
 void setup_stdio(void);
 #endif
 
diff --git a/cli/loader_exe.c b/cli/loader_exe.c
index 07a0bddcd4b87..5fc8e73189ac6 100644
--- a/cli/loader_exe.c
+++ b/cli/loader_exe.c
@@ -15,7 +15,7 @@ extern "C" {
 JULIA_DEFINE_FAST_TLS
 
 #ifdef _COMPILER_ASAN_ENABLED_
-JL_DLLEXPORT const char* __asan_default_options()
+JL_DLLEXPORT const char* __asan_default_options(void)
 {
     return "allow_user_segv_handler=1:detect_leaks=0";
     // FIXME: enable LSAN after fixing leaks & defining __lsan_default_suppressions(),
@@ -36,7 +36,7 @@ int main(int argc, char * argv[])
 {
 #endif
 
-#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_)
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
     // ASAN/TSAN do not support RTLD_DEEPBIND
     // https://github.com/google/sanitizers/issues/611
     putenv("LBT_USE_RTLD_DEEPBIND=0");
@@ -45,9 +45,8 @@ int main(int argc, char * argv[])
     // Convert Windows wchar_t values to UTF8
 #ifdef _OS_WINDOWS_
     for (int i = 0; i < argc; i++) {
-        size_t max_arg_len = 4*wcslen(wargv[i]);
-        argv[i] = (char *)malloc(max_arg_len);
-        if (!wchar_to_utf8(wargv[i], argv[i], max_arg_len)) {
+        argv[i] = wchar_to_utf8(wargv[i]);
+        if (!argv[i]) {
             jl_loader_print_stderr("Unable to convert all arguments to UTF-8!\n");
             return 1;
         }
diff --git a/cli/loader_lib.c b/cli/loader_lib.c
index 74241510ffd25..02030cf2717a5 100644
--- a/cli/loader_lib.c
+++ b/cli/loader_lib.c
@@ -13,10 +13,15 @@ extern "C" {
 /* Bring in helper functions for windows without libgcc. */
 #ifdef _OS_WINDOWS_
 #include "loader_win_utils.c"
+
+#include <fileapi.h>
+static int win_file_exists(wchar_t* wpath) {
+  return GetFileAttributesW(wpath) == INVALID_FILE_ATTRIBUTES ? 0 : 1;
+}
 #endif
 
 // Save DEP_LIBS to a variable that is explicitly sized for expansion
-static char dep_libs[1024] = DEP_LIBS;
+static char dep_libs[1024] = "\0" DEP_LIBS;
 
 JL_DLLEXPORT void jl_loader_print_stderr(const char * msg)
 {
@@ -31,9 +36,15 @@ void jl_loader_print_stderr3(const char * msg1, const char * msg2, const char *
 }
 
 /* Wrapper around dlopen(), with extra relative pathing thrown in*/
+/* If err, then loads the library successfully or panics.
+ * If !err, then loads the library or returns null if the file does not exist,
+ * or panics if opening failed for any other reason. */
+/* Currently the only use of this function with !err is in opening libjulia-codegen,
+ * which the user can delete to save space if generating new code is not necessary.
+ * However, if it exists and cannot be loaded, that's a problem. So, we alert the user
+ * and abort the process. */
 static void * load_library(const char * rel_path, const char * src_dir, int err) {
     void * handle = NULL;
-
     // See if a handle is already open to the basename
     const char *basename = rel_path + strlen(rel_path);
     while (basename-- > rel_path)
@@ -55,19 +66,33 @@ static void * load_library(const char * rel_path, const char * src_dir, int err)
     strncat(path, rel_path, sizeof(path) - 1);
 
 #if defined(_OS_WINDOWS_)
-    wchar_t wpath[2*JL_PATH_MAX + 1] = {0};
-    if (!utf8_to_wchar(path, wpath, 2*JL_PATH_MAX)) {
+#define PATH_EXISTS() win_file_exists(wpath)
+    wchar_t *wpath = utf8_to_wchar(path);
+    if (!wpath) {
         jl_loader_print_stderr3("ERROR: Unable to convert path ", path, " to wide string!\n");
         exit(1);
     }
     handle = (void *)LoadLibraryExW(wpath, NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
 #else
+#define PATH_EXISTS() !access(path, F_OK)
     handle = dlopen(path, RTLD_NOW | (err ? RTLD_GLOBAL : RTLD_LOCAL));
 #endif
-
-    if (handle == NULL) {
-        if (!err)
+    if (handle != NULL) {
+#if defined(_OS_WINDOWS_)
+        free(wpath);
+#endif
+    }
+    else {
+        if (!err && !PATH_EXISTS()) {
+#if defined(_OS_WINDOWS_)
+            free(wpath);
+#endif
             return NULL;
+        }
+#if defined(_OS_WINDOWS_)
+        free(wpath);
+#endif
+#undef PATH_EXISTS
         jl_loader_print_stderr3("ERROR: Unable to load dependent library ", path, "\n");
 #if defined(_OS_WINDOWS_)
         LPWSTR wmsg = TEXT("");
@@ -78,9 +103,9 @@ static void * load_library(const char * rel_path, const char * src_dir, int err)
                        NULL, GetLastError(),
                        MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US),
                        (LPWSTR)&wmsg, 0, NULL);
-        char err[256] = {0};
-        wchar_to_utf8(wmsg, err, 255);
-        jl_loader_print_stderr3("Message:", err, "\n");
+        char *errmsg = wchar_to_utf8(wmsg);
+        jl_loader_print_stderr3("Message:", errmsg, "\n");
+        free(errmsg);
 #else
         char *dlerr = dlerror();
         if (dlerr != NULL) {
@@ -101,20 +126,20 @@ static void * lookup_symbol(const void * lib_handle, const char * symbol_name) {
 }
 
 // Find the location of libjulia.
-char lib_dir[JL_PATH_MAX];
+char *lib_dir = NULL;
 JL_DLLEXPORT const char * jl_get_libdir()
 {
     // Reuse the path if this is not the first call.
-    if (lib_dir[0] != 0) {
+    if (lib_dir) {
         return lib_dir;
     }
 #if defined(_OS_WINDOWS_)
     // On Windows, we use GetModuleFileNameW
-    wchar_t libjulia_path[JL_PATH_MAX];
+    wchar_t *libjulia_path = utf8_to_wchar(LIBJULIA_NAME);
     HMODULE libjulia = NULL;
 
     // Get a handle to libjulia.
-    if (!utf8_to_wchar(LIBJULIA_NAME, libjulia_path, JL_PATH_MAX)) {
+    if (!libjulia_path) {
         jl_loader_print_stderr3("ERROR: Unable to convert path ", LIBJULIA_NAME, " to wide string!\n");
         exit(1);
     }
@@ -123,14 +148,18 @@ JL_DLLEXPORT const char * jl_get_libdir()
         jl_loader_print_stderr3("ERROR: Unable to load ", LIBJULIA_NAME, "!\n");
         exit(1);
     }
-    if (!GetModuleFileNameW(libjulia, libjulia_path, JL_PATH_MAX)) {
+    free(libjulia_path);
+    libjulia_path = (wchar_t*)malloc(32768 * sizeof(wchar_t)); // max long path length
+    if (!GetModuleFileNameW(libjulia, libjulia_path, 32768)) {
         jl_loader_print_stderr("ERROR: GetModuleFileName() failed\n");
         exit(1);
     }
-    if (!wchar_to_utf8(libjulia_path, lib_dir, JL_PATH_MAX)) {
+    lib_dir = wchar_to_utf8(libjulia_path);
+    if (!lib_dir) {
         jl_loader_print_stderr("ERROR: Unable to convert julia path to UTF-8\n");
         exit(1);
     }
+    free(libjulia_path);
 #else
     // On all other platforms, use dladdr()
     Dl_info info;
@@ -142,7 +171,7 @@ JL_DLLEXPORT const char * jl_get_libdir()
         }
         exit(1);
     }
-    strcpy(lib_dir, info.dli_fname);
+    lib_dir = strdup(info.dli_fname);
 #endif
     // Finally, convert to dirname
     const char * new_dir = dirname(lib_dir);
@@ -153,58 +182,309 @@ JL_DLLEXPORT const char * jl_get_libdir()
     return lib_dir;
 }
 
-void * libjulia_internal = NULL;
+// On Linux, it can happen that the system has a newer libstdc++ than the one we ship,
+// which can break loading of some system libraries: <https://github.com/JuliaLang/julia/issues/34276>.
+// As a fix, on linux we probe the system libstdc++ to see if it is newer, and then load it if it is.
+// Otherwise, we load the bundled one. This improves compatibility with third party dynamic libs that
+// may depend on symbols exported by the system libstdxc++.
+#ifdef _OS_LINUX_
+#ifndef GLIBCXX_LEAST_VERSION_SYMBOL
+#warning GLIBCXX_LEAST_VERSION_SYMBOL should always be defined in the makefile.
+#define GLIBCXX_LEAST_VERSION_SYMBOL "GLIBCXX_a.b.c" /* Appease the linter */
+#endif
+
+#include <link.h>
+#include <sys/wait.h>
+
+// write(), but handle errors and avoid EINTR
+static void write_wrapper(int fd, const char *str, size_t len)
+{
+    size_t written_sofar = 0;
+    while (len) {
+        ssize_t bytes_written = write(fd, str + written_sofar, len);
+        if (bytes_written == -1 && errno == EINTR) continue;
+        if (bytes_written == -1 && errno != EINTR) {
+            perror("(julia) child libstdcxxprobe write");
+            _exit(1);
+        }
+        len -= bytes_written;
+        written_sofar += bytes_written;
+    }
+}
+
+// read(), but handle errors and avoid EINTR
+static void read_wrapper(int fd, char **ret, size_t *ret_len)
+{
+    // Allocate an initial buffer
+    size_t len = JL_PATH_MAX;
+    char *buf = (char *)malloc(len + 1);
+    if (!buf) {
+        perror("(julia) malloc");
+        exit(1);
+    }
+
+    // Read into it, reallocating as necessary
+    size_t have_read = 0;
+    while (1) {
+        ssize_t n = read(fd, buf + have_read, len - have_read);
+        if (n == 0) break;
+        if (n == -1 && errno != EINTR) {
+            perror("(julia) libstdcxxprobe read");
+            exit(1);
+        }
+        if (n == -1 && errno == EINTR) continue;
+        have_read += n;
+        if (have_read == len) {
+            buf = (char *)realloc(buf, 1 + (len *= 2));
+            if (!buf) {
+                perror("(julia) realloc");
+                exit(1);
+            }
+        }
+    }
+
+    *ret = buf;
+    *ret_len = have_read;
+}
+
+// Return the path to the libstdcxx to load.
+// If the path is found, return it.
+// Otherwise, print the error and exit.
+// The path returned must be freed.
+static char *libstdcxxprobe(void)
+{
+    // Create the pipe and child process.
+    int fork_pipe[2];
+    int ret = pipe(fork_pipe);
+    if (ret == -1) {
+        perror("(julia) Error during libstdcxxprobe: pipe");
+        exit(1);
+    }
+    pid_t pid = fork();
+    if (pid == -1)  {
+        perror("Error during libstdcxxprobe:\nfork");
+        exit(1);
+    }
+    if (pid == (pid_t) 0) { // Child process.
+        close(fork_pipe[0]);
+
+        // Open the first available libstdc++.so.
+        // If it can't be found, report so by exiting zero.
+        // The star is there to prevent the compiler from merging constants
+        // with "\0*libstdc++.so.6", which we string replace inside the .so during
+        // make install.
+        void *handle = dlopen("libstdc++.so.6\0*", RTLD_LAZY);
+        if (!handle) {
+            _exit(0);
+        }
+
+        // See if the version is compatible
+        char *dlerr = dlerror(); // clear out dlerror
+        void *sym = dlsym(handle, GLIBCXX_LEAST_VERSION_SYMBOL);
+        (void)sym;
+        dlerr = dlerror();
+        if (dlerr) {
+            // We can't use the library that was found, so don't write anything.
+            // The main process will see that nothing was written,
+            // then exit the function and return null.
+            _exit(0);
+        }
+
+        // No error means the symbol was found, we can use this library.
+        // Get the path to it, and write it to the parent process.
+        struct link_map *lm;
+        ret = dlinfo(handle, RTLD_DI_LINKMAP, &lm);
+        if (ret == -1) {
+            char *errbuf = dlerror();
+            char *errdesc = (char*)"Error during libstdcxxprobe in child process:\ndlinfo: ";
+            write_wrapper(STDERR_FILENO, errdesc, strlen(errdesc));
+            write_wrapper(STDERR_FILENO, errbuf, strlen(errbuf));
+            write_wrapper(STDERR_FILENO, "\n", 1);
+            _exit(1);
+        }
+        char *libpath = lm->l_name;
+        write_wrapper(fork_pipe[1], libpath, strlen(libpath));
+        _exit(0);
+    }
+    else { // Parent process.
+        close(fork_pipe[1]);
+
+        // Read the absolute path to the lib from the child process.
+        char *path;
+        size_t pathlen;
+        read_wrapper(fork_pipe[0], &path, &pathlen);
+
+        // Close the read end of the pipe
+        close(fork_pipe[0]);
+
+        // Wait for the child to complete.
+        while (1) {
+            int wstatus;
+            pid_t npid = waitpid(pid, &wstatus, 0);
+            if (npid == -1) {
+                if (errno == EINTR) continue;
+                if (errno != EINTR) {
+                    perror("Error during libstdcxxprobe in parent process:\nwaitpid");
+                    exit(1);
+                }
+            }
+            else if (!WIFEXITED(wstatus)) {
+                const char *err_str = "Error during libstdcxxprobe in parent process:\n"
+                                      "The child process did not exit normally.\n";
+                size_t err_strlen = strlen(err_str);
+                write_wrapper(STDERR_FILENO, err_str, err_strlen);
+                exit(1);
+            }
+            else if (WEXITSTATUS(wstatus)) {
+                // The child has printed an error and exited, so the parent should exit too.
+                exit(1);
+            }
+            break;
+        }
+
+        if (!pathlen) {
+            free(path);
+            return NULL;
+        }
+        // Ensure that `path` is zero-terminated.
+        path[pathlen] = '\0';
+        return path;
+    }
+}
+#endif
+
+void *libjulia_internal = NULL;
+void *libjulia_codegen = NULL;
 __attribute__((constructor)) void jl_load_libjulia_internal(void) {
+#if defined(_OS_LINUX_)
+    // Julia uses `sigwait()` to handle signals, and all threads are required
+    // to mask the corresponding handlers so that the signals can be waited on.
+    // Here, we setup that masking early, so that it is inherited by any threads
+    // spawned (e.g. by constructors) when loading deps of libjulia-internal.
+
+    sigset_t all_signals, prev_mask;
+    sigfillset(&all_signals);
+    pthread_sigmask(SIG_BLOCK, &all_signals, &prev_mask);
+#endif
+
     // Only initialize this once
     if (libjulia_internal != NULL) {
         return;
     }
 
     // Introspect to find our own path
-    const char * lib_dir = jl_get_libdir();
+    const char *lib_dir = jl_get_libdir();
 
     // Pre-load libraries that libjulia-internal needs.
-    int deps_len = strlen(dep_libs);
-    char * curr_dep = &dep_libs[0];
+    char *curr_dep = &dep_libs[1];
 
     // We keep track of "special" libraries names (ones whose name is prefixed with `@`)
-    // which are libraries that we want to load in some special, custom way, such as
-    // `libjulia-internal` or `libjulia-codegen`.
+    // which are libraries that we want to load in some special, custom way.
+    // The current list is:
+    //   libstdc++
+    //   libjulia-internal
+    //   libjulia-codegen
+    const int NUM_SPECIAL_LIBRARIES = 3;
     int special_idx = 0;
-    char * special_library_names[2] = {NULL};
     while (1) {
         // try to find next colon character; if we can't, break out
         char * colon = strchr(curr_dep, ':');
         if (colon == NULL)
             break;
 
-        // Chop the string at the colon so it's a valid-ending-string
-        *colon = '\0';
-
         // If this library name starts with `@`, don't open it here (but mark it as special)
         if (curr_dep[0] == '@') {
-            if (special_idx > sizeof(special_library_names)/sizeof(char *)) {
+            special_idx += 1;
+            if (special_idx > NUM_SPECIAL_LIBRARIES) {
                 jl_loader_print_stderr("ERROR: Too many special library names specified, check LOADER_BUILD_DEP_LIBS and friends!\n");
                 exit(1);
             }
-            special_library_names[special_idx] = curr_dep + 1;
-            special_idx += 1;
-        } else {
-            load_library(curr_dep, lib_dir, 1);
         }
 
-        // Skip ahead to next dependency
+        // Skip to next dep
         curr_dep = colon + 1;
     }
 
-    if (special_idx != sizeof(special_library_names)/sizeof(char *)) {
+    // Assert that we have exactly the right number of special library names
+    if (special_idx != NUM_SPECIAL_LIBRARIES) {
         jl_loader_print_stderr("ERROR: Too few special library names specified, check LOADER_BUILD_DEP_LIBS and friends!\n");
         exit(1);
     }
 
-    // Unpack our special library names.  This is why ordering of library names matters.
-    libjulia_internal = load_library(special_library_names[0], lib_dir, 1);
-    void *libjulia_codegen = load_library(special_library_names[1], lib_dir, 0);
+    // Now that we've asserted that we have the right number of special
+    // libraries, actually run a loop over the deps loading them in-order.
+    // If it's a special library, we do slightly different things, especially
+    // for libstdc++, where we actually probe for a system libstdc++ and
+    // load that if it's newer.
+    special_idx = 0;
+    curr_dep = &dep_libs[1];
+    while (1) {
+        // try to find next colon character; if we can't, break out
+        char * colon = strchr(curr_dep, ':');
+        if (colon == NULL)
+            break;
+
+        // Chop the string at the colon so it's a valid-ending-string
+        *colon = '\0';
+
+        // If this library name starts with `@`, it's a special library
+        // and requires special handling:
+        if (curr_dep[0] == '@') {
+            // Skip the `@` for future function calls.
+            curr_dep += 1;
+
+            // First special library to be loaded is `libstdc++`; perform probing here.
+            if (special_idx == 0) {
+#if defined(_OS_LINUX_)
+                int do_probe = 1;
+                int probe_successful = 0;
+
+                // Check to see if the user has disabled libstdc++ probing
+                char *probevar = getenv("JULIA_PROBE_LIBSTDCXX");
+                if (probevar) {
+                    if (strcmp(probevar, "1") == 0 || strcmp(probevar, "yes") == 0)
+                        do_probe = 1;
+                    else if (strcmp(probevar, "0") == 0 || strcmp(probevar, "no") == 0)
+                        do_probe = 0;
+                }
+                if (do_probe) {
+                    char *cxxpath = libstdcxxprobe();
+                    if (cxxpath) {
+                        void *cxx_handle = dlopen(cxxpath, RTLD_LAZY);
+                        (void)cxx_handle;
+                        const char *dlr = dlerror();
+                        if (dlr) {
+                            jl_loader_print_stderr("ERROR: Unable to dlopen(cxxpath) in parent!\n");
+                            jl_loader_print_stderr3("Message: ", dlr, "\n");
+                            exit(1);
+                        }
+                        free(cxxpath);
+                        probe_successful = 1;
+                    }
+                }
+                // If the probe rejected the system libstdc++ (or didn't find one!)
+                // just load our bundled libstdc++ as identified by curr_dep;
+                if (!probe_successful) {
+                    load_library(curr_dep, lib_dir, 1);
+                }
+#endif
+            } else if (special_idx == 1) {
+                // This special library is `libjulia-internal`
+                libjulia_internal = load_library(curr_dep, lib_dir, 1);
+            } else if (special_idx == 2) {
+                // This special library is `libjulia-codegen`
+                libjulia_codegen = load_library(curr_dep, lib_dir, 0);
+            }
+            special_idx++;
+        } else {
+            // Otherwise, just load it as "normal"
+            load_library(curr_dep, lib_dir, 1);
+        }
+
+        // Skip ahead to next dependency
+        curr_dep = colon + 1;
+    }
+
     const char * const * codegen_func_names;
     const char *codegen_liberr;
     if (libjulia_codegen == NULL) {
@@ -248,13 +528,25 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
     }
     void *fptr = lookup_symbol(RTLD_DEFAULT, "jl_get_pgcstack_static");
     void *(*key)(void) = lookup_symbol(RTLD_DEFAULT, "jl_pgcstack_addr_static");
-    if (fptr != NULL && key != NULL)
-        jl_pgcstack_setkey(fptr, key);
+    _Atomic(char) *semaphore = lookup_symbol(RTLD_DEFAULT, "jl_pgcstack_static_semaphore");
+    if (fptr != NULL && key != NULL && semaphore != NULL) {
+        char already_used = 0;
+        atomic_compare_exchange_strong(semaphore, &already_used, 1);
+        if (already_used == 0) // RMW succeeded - we have exclusive access
+            jl_pgcstack_setkey(fptr, key);
+    }
 #endif
 
     // jl_options must be initialized very early, in case an embedder sets some
     // values there before calling jl_init
     ((void (*)(void))jl_init_options_addr)();
+
+#if defined(_OS_LINUX_)
+    // Restore the original signal mask. `jl_init()` will later setup blocking
+    // for the specific set of signals we `sigwait()` on, and any threads spawned
+    // during loading above will still retain their inherited signal mask.
+    pthread_sigmask(SIG_SETMASK, &prev_mask, NULL);
+#endif
 }
 
 // Load libjulia and run the REPL with the given arguments (in UTF-8 format)
@@ -278,7 +570,7 @@ JL_DLLEXPORT int jl_load_repl(int argc, char * argv[]) {
 }
 
 #ifdef _OS_WINDOWS_
-int __stdcall DllMainCRTStartup(void* instance, unsigned reason, void* reserved) {
+int __stdcall DllMainCRTStartup(void *instance, unsigned reason, void *reserved) {
     setup_stdio();
 
     // Because we override DllMainCRTStartup, we have to manually call our constructor methods
diff --git a/cli/loader_win_utils.c b/cli/loader_win_utils.c
index 621834a030c52..ed585a7a64ff0 100644
--- a/cli/loader_win_utils.c
+++ b/cli/loader_win_utils.c
@@ -12,32 +12,42 @@ static FILE _stderr = { INVALID_HANDLE_VALUE };
 FILE *stdout = &_stdout;
 FILE *stderr = &_stderr;
 
-int loader_fwrite(const WCHAR *str, size_t nchars, FILE *out) {
+int JL_HIDDEN fwrite(const char *str, size_t nchars, FILE *out) {
     DWORD written;
     if (out->isconsole) {
-        if (WriteConsole(out->fd, str, nchars, &written, NULL))
+        // Windows consoles do not support UTF-8 (for reading input, though new Windows Terminal does for writing), only UTF-16.
+        wchar_t* wstr = utf8_to_wchar(str);
+        if (!wstr)
+            return -1;
+        if (WriteConsoleW(out->fd, wstr, wcslen(wstr), &written, NULL)) {
+            free(wstr);
             return written;
+        }
+        free(wstr);
     } else {
-        if (WriteFile(out->fd, str, sizeof(WCHAR) * nchars, &written, NULL))
+        // However, we want to print UTF-8 if the output is a file.
+        if (WriteFile(out->fd, str, nchars, &written, NULL))
             return written;
     }
     return -1;
 }
 
-int loader_fputs(const char *str, FILE *out) {
-    wchar_t wstr[1024];
-    utf8_to_wchar(str, wstr, 1024);
-    return fwrite(wstr, wcslen(wstr), out);
+int JL_HIDDEN fputs(const char *str, FILE *out) {
+    return fwrite(str, strlen(str), out);
 }
 
-void * loader_malloc(const size_t size) {
+void JL_HIDDEN *malloc(const size_t size) {
     return HeapAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, size);
 }
 
-void * loader_realloc(void * mem, const size_t size) {
+void JL_HIDDEN *realloc(void * mem, const size_t size) {
     return HeapReAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, mem, size);
 }
 
+void JL_HIDDEN free(void* mem) {
+    HeapFree(GetProcessHeap(), 0, mem);
+}
+
 LPWSTR *CommandLineToArgv(LPWSTR lpCmdLine, int *pNumArgs) {
     LPWSTR out = lpCmdLine;
     LPWSTR cmd = out;
@@ -100,59 +110,59 @@ void setup_stdio() {
     _stderr.isconsole = GetConsoleMode(_stderr.fd, &mode);
 }
 
-void loader_exit(int code) {
+void JL_HIDDEN exit(int code) {
     ExitProcess(code);
 }
 
 
 /* Utilities to convert from Windows' wchar_t stuff to UTF-8 */
-int wchar_to_utf8(const wchar_t * wstr, char *str, size_t maxlen) {
+char *wchar_to_utf8(const wchar_t * wstr) {
     /* Fast-path empty strings, as WideCharToMultiByte() returns zero for them. */
     if (wstr[0] == L'\0') {
+        char *str = malloc(1);
         str[0] = '\0';
-        return 1;
+        return str;
     }
     size_t len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
     if (!len)
-        return 0;
-    if (len > maxlen)
-        return 0;
+        return NULL;
+    char *str = (char *)malloc(len);
     if (!WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL))
-        return 0;
-    return 1;
+        return NULL;
+    return str;
 }
 
-int utf8_to_wchar(const char * str, wchar_t * wstr, size_t maxlen) {
-    /* Fast-path empty strings, as WideCharToMultiByte() returns zero for them. */
+wchar_t *utf8_to_wchar(const char * str) {
+    /* Fast-path empty strings, as MultiByteToWideChar() returns zero for them. */
     if (str[0] == '\0') {
+        wchar_t *wstr = malloc(sizeof(wchar_t));
         wstr[0] = L'\0';
-        return 1;
+        return wstr;
     }
     size_t len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
     if (!len)
-        return 0;
-    if (len > maxlen)
-        return 0;
+        return NULL;
+    wchar_t *wstr = (wchar_t *)malloc(len * sizeof(wchar_t));
     if (!MultiByteToWideChar(CP_UTF8, 0, str, -1, wstr, len))
-        return 0;
-    return 1;
+        return NULL;
+    return wstr;
 }
 
-size_t loader_strlen(const char * x) {
+size_t JL_HIDDEN strlen(const char * x) {
     int idx = 0;
     while (x[idx] != 0)
         idx++;
     return idx;
 }
 
-size_t loader_wcslen(const wchar_t * x) {
+size_t JL_HIDDEN wcslen(const wchar_t * x) {
     int idx = 0;
     while (x[idx] != 0)
         idx++;
     return idx;
 }
 
-char * loader_strncat(char * base, const char * tail, size_t maxlen) {
+char JL_HIDDEN *strncat(char * base, const char * tail, size_t maxlen) {
     int base_len = strlen(base);
     int tail_len = strlen(tail);
     for (int idx=base_len; idx<min(maxlen, base_len + tail_len); ++idx) {
@@ -161,14 +171,21 @@ char * loader_strncat(char * base, const char * tail, size_t maxlen) {
     return base;
 }
 
-void * loader_memcpy(void * dest, const void * src, size_t len) {
+void JL_HIDDEN *memcpy(void * dest, const void * src, size_t len) {
     for (int idx=0; idx<len; ++idx) {
         ((char *)dest)[idx] = ((const char *)src)[idx];
     }
     return dest;
 }
 
-char * loader_dirname(char * x) {
+void JL_HIDDEN *memset(void *s, int c, size_t n) {
+  unsigned char* p = s;
+  while(n--)
+    *p++ = (unsigned char)c;
+  return s;
+}
+
+char JL_HIDDEN *dirname(char * x) {
     int idx = strlen(x);
     while (idx > 0 && x[idx] != PATHSEPSTRING[0]) {
         idx -= 1;
@@ -188,7 +205,7 @@ char * loader_dirname(char * x) {
     return x;
 }
 
-char * loader_strchr(const char * haystack, int needle) {
+char JL_HIDDEN *strchr(const char * haystack, int needle) {
     int idx=0;
     while (haystack[idx] != needle) {
         if (haystack[idx] == 0) {
diff --git a/cli/trampolines/trampolines_i686.S b/cli/trampolines/trampolines_i686.S
index 3d9cacf0ce652..f6c46fd6ee49b 100644
--- a/cli/trampolines/trampolines_i686.S
+++ b/cli/trampolines/trampolines_i686.S
@@ -3,13 +3,41 @@
 #include "common.h"
 #include "../../src/jl_exported_funcs.inc"
 
+// set this option to 1 to get very slightly slower trampolines which however do not trigger
+// this linker warning:
+//   ld: ./loader_trampolines.o: warning: relocation against `jl_***_addr' in read-only section `.text'
+//   ld: warning: creating DT_TEXTREL in a shared object
+// If you have a large libjulia.so file or other restrictions on using TEXTREL for some
+// reason, this may be worthwhile.
+// This is not relevant on Windows (though it is valid there), since it always uses
+// DT_TEXTREL anyways, and does not support this notion of PIC.
+#define USE_PC32 0
+
+#if USE_PC32
+.cfi_startproc
+julia__x86.get_pc_thunk.ax:
+    mov    (%esp),%eax
+    ret
+.cfi_endproc
+
+#define CALL(name) \
+    call julia__x86.get_pc_thunk.ax; \
+    jmpl *(CNAMEADDR(name) - .)(%eax); \
+
+#else
+
+#define CALL(name) \
+    jmpl *(CNAMEADDR(name)); \
+
+#endif
+
 #define XX(name) \
 DEBUGINFO(CNAME(name)); \
 .global CNAME(name); \
 .cfi_startproc; \
 CNAME(name)##:; \
     CET_START(); \
-    jmpl *(CNAMEADDR(name)); \
+    CALL(name); \
     ud2; \
 .cfi_endproc; \
 EXPORT(name); \
diff --git a/contrib/README.md b/contrib/README.md
index f75dc4488fb0b..46058bbf46642 100644
--- a/contrib/README.md
+++ b/contrib/README.md
@@ -13,10 +13,22 @@ Installation
 |[ install.sh ](https://github.com/JuliaLang/julia/blob/master/contrib/install.sh) | Installation script with different permissions |
 |[ julia.appdata.xml ](https://github.com/JuliaLang/julia/blob/master/contrib/julia.appdata.xml) | Appdata config file |
 |[ julia-config.jl ](https://github.com/JuliaLang/julia/blob/master/contrib/julia-config.jl) | Determines build parameters required by an embedded Julia |
-|[ julia.desktop ](https://github.com/JuliaLang/julia/blob/master/contrib/julia.desktop) | GNOME desktop config file |
+|[ julia.desktop ](https://github.com/JuliaLang/julia/blob/master/contrib/julia.desktop) | Desktop entry file |
+|[ julia.png ](https://github.com/JuliaLang/julia/blob/master/contrib/julia.png) | Julia png image file |
+|[ julia.svg ](https://github.com/JuliaLang/julia/blob/master/contrib/julia.svg) | Julia svg image file |
 |[ relative_path.py ](https://github.com/JuliaLang/julia/blob/master/contrib/relative_path.py) | Convert absolute paths into   relative paths |
 |[ stringreplace.c ](https://github.com/JuliaLang/julia/blob/master/contrib/stringreplace.c) | Replace strings to hardcoded paths in binaries during `make install` |
 
+Packagers may want to run this command via a script after package installation.
+
+```
+if [ -e /usr/share/icons/hicolor/icon-theme.cache ]; then
+  if [ -x /usr/bin/gtk-update-icon-cache ]; then
+    /usr/bin/gtk-update-icon-cache -f /usr/share/icons/hicolor >/dev/null 2>&1
+  fi
+fi
+```
+
 Debugging
 =========
 
diff --git a/contrib/check-whitespace.jl b/contrib/check-whitespace.jl
index 4d078d400daea..b92f0c88a53a6 100755
--- a/contrib/check-whitespace.jl
+++ b/contrib/check-whitespace.jl
@@ -18,6 +18,18 @@ const patterns = split("""
     *Makefile
 """)
 
+# Note: `git ls-files` gives `/` as a path separator on Windows,
+#   so we just use `/` for all platforms.
+allow_tabs(path) =
+    path == "Make.inc" ||
+    endswith(path, "Makefile") ||
+    endswith(path, ".make") ||
+    endswith(path, ".mk") ||
+    startswith(path, "src/support") ||
+    startswith(path, "src/flisp") ||
+    endswith(path, "test/syntax.jl") ||
+    endswith(path, "test/triplequote.jl")
+
 const errors = Set{Tuple{String,Int,String}}()
 
 for path in eachline(`git ls-files -- $patterns`)
@@ -27,10 +39,13 @@ for path in eachline(`git ls-files -- $patterns`)
     file_err(msg) = push!(errors, (path, 0, msg))
     line_err(msg) = push!(errors, (path, lineno, msg))
 
+    isfile(path) || continue
     for line in eachline(path, keep=true)
         lineno += 1
         contains(line, '\r')   && file_err("non-UNIX line endings")
         contains(line, '\ua0') && line_err("non-breaking space")
+        allow_tabs(path) ||
+        contains(line, '\t')   && line_err("tab")
         endswith(line, '\n')   || line_err("no trailing newline")
         line = chomp(line)
         endswith(line, r"\s")  && line_err("trailing whitespace")
diff --git a/contrib/codesign.sh b/contrib/codesign.sh
deleted file mode 100755
index 03866c4bb1ac1..0000000000000
--- a/contrib/codesign.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/sh
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Codesign binary files for macOS.
-
-usage() {
-    echo "Usage: ${0} MACOS_CODESIGN_IDENTITY FILE-OR-DIRECTORY"
-    exit 0
-}
-
-# Default codesign identity to `-` if not provided
-if [ -z "${1}" ]; then
-    MACOS_CODESIGN_IDENTITY="-"
-    ENTITLEMENTS=""
-else
-    MACOS_CODESIGN_IDENTITY="${1}"
-    ENTITLEMENTS="--entitlements $(dirname "${0}")/mac/app/Entitlements.plist"
-fi
-
-if [ "${#}" -eq 2 ]; then
-    if [ -f "${2}" ]; then
-        # Codesign only the given file
-        MACHO_FILES="${2}"
-    elif [ -d "${2}" ]; then
-        # Find all files in the given directory
-        MACHO_FILES=$(find "${2}" -type f -perm -0111 | cut -d: -f1)
-    else
-        usage
-    fi
-else
-    usage
-fi
-
-echo "Codesigning with identity ${MACOS_CODESIGN_IDENTITY}"
-for f in ${MACHO_FILES}; do
-    echo "Codesigning ${f}..."
-    codesign -s "${MACOS_CODESIGN_IDENTITY}" --option=runtime ${ENTITLEMENTS} -vvv --timestamp --deep --force "${f}"
-done
diff --git a/contrib/excise_stdlib.sh b/contrib/excise_stdlib.sh
new file mode 100755
index 0000000000000..3da9ff437bf83
--- /dev/null
+++ b/contrib/excise_stdlib.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# Uses https://github.com/newren/git-filter-repo
+# Recommended use of `Github cli`
+
+set -e
+set -f
+set -x
+
+if [ -z "$*" ]; then echo "Expected name of stdlib"; fi
+
+STDLIB=$1
+WORKDIR=$(mktemp -d)
+
+echo "Excising stdlib $STDLIB; workdir $WORKDIR"
+pushd $WORKDIR
+git clone https://github.com/JuliaLang/julia $STDLIB
+pushd $STDLIB
+
+echo "Filtering repo"
+git filter-repo --subdirectory-filter stdlib/$STDLIB --path LICENSE.md \
+    --message-callback 'return re.sub(b"(\W)(#\d+)", lambda m: m.group(1) + b"JuliaLang/julia" + m.group(2), message)'
+
+
+echo "Deleting branches"
+git branch -l | grep -v release- | grep -v master | xargs git branch -v -D
+
+popd
+popd
+echo "Done! Inspect the result and push it!"
+echo """
+      cd $WORKDIR/$STDLIB
+      gh repo create JuliaLang/$STDLIB.jl --push --source=. --public
+      git push --all
+      git push --tags"""
+
+echo """
+     Remember to:
+     1. Add a README.md
+     2. Setup GHA or similar for CI
+     """
diff --git a/contrib/fixup-libstdc++.sh b/contrib/fixup-libstdc++.sh
index 1c19d98a54b1e..7442d995448a1 100755
--- a/contrib/fixup-libstdc++.sh
+++ b/contrib/fixup-libstdc++.sh
@@ -11,7 +11,8 @@ fi
 libdir="$1"
 private_libdir="$2"
 
-if [ ! -f "$private_libdir/libjulia-internal.so" ]; then
+if [ ! -f "$private_libdir/libjulia-internal.so" ] && \
+   [ ! -f "$private_libdir/libjulia-internal-debug.so" ]; then
     echo "ERROR: Could not open $private_libdir/libjulia-internal.so" >&2
     exit 2
 fi
@@ -24,7 +25,11 @@ find_shlib ()
 }
 
 # Discover libstdc++ location and name
-LIBSTD=$(find_shlib "$private_libdir/libjulia-internal.so" "libstdc++.so")
+if [ -f "$private_libdir/libjulia-internal.so" ]; then
+    LIBSTD=$(find_shlib "$private_libdir/libjulia-internal.so" "libstdc++.so")
+elif [ -f "$private_libdir/libjulia-internal-debug.so" ]; then
+    LIBSTD=$(find_shlib "$private_libdir/libjulia-internal-debug.so" "libstdc++.so")
+fi
 LIBSTD_NAME=$(basename $LIBSTD)
 LIBSTD_DIR=$(dirname $LIBSTD)
 
diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl
index acd61be502465..a685f28a2c735 100644
--- a/contrib/generate_precompile.jl
+++ b/contrib/generate_precompile.jl
@@ -1,13 +1,14 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-if Threads.nthreads() != 1
-    @warn "Running this file with multiple Julia threads may lead to a build error" Threads.nthreads()
+# Prevent this from putting anything into the Main namespace
+@eval Module() begin
+
+if Threads.maxthreadid() != 1
+    @warn "Running this file with multiple Julia threads may lead to a build error" Threads.maxthreadid()
 end
 
 if Base.isempty(Base.ARGS) || Base.ARGS[1] !== "0"
 Sys.__init_build()
-# Prevent this from being put into the Main namespace
-@eval Module() begin
 if !isdefined(Base, :uv_eventloop)
     Base.reinit_stdio()
 end
@@ -15,11 +16,41 @@ Base.include(@__MODULE__, joinpath(Sys.BINDIR, "..", "share", "julia", "test", "
 import .FakePTYs: open_fake_pty
 using Base.Meta
 
+## Debugging options
+# Disable parallel precompiles generation by setting `false`
+const PARALLEL_PRECOMPILATION = true
+
+# View the code sent to the repl by setting this to `stdout`
+const debug_output = devnull # or stdout
+
+# Disable fancy printing
+const fancyprint = (stdout isa Base.TTY) && Base.get_bool_env("CI", false) !== true
+##
+
 CTRL_C = '\x03'
+CTRL_R = '\x12'
 UP_ARROW = "\e[A"
 DOWN_ARROW = "\e[B"
 
 hardcoded_precompile_statements = """
+precompile(Base.unsafe_string, (Ptr{UInt8},))
+precompile(Base.unsafe_string, (Ptr{Int8},))
+
+# loading.jl
+precompile(Base.__require_prelocked, (Base.PkgId, Nothing))
+precompile(Base._require, (Base.PkgId, Nothing))
+
+# REPL
+precompile(isequal, (String, String))
+precompile(Base.check_open, (Base.TTY,))
+precompile(Base.getproperty, (Base.TTY, Symbol))
+precompile(write, (Base.TTY, String))
+precompile(Tuple{typeof(Base.get), Base.TTY, Symbol, Bool})
+precompile(Tuple{typeof(Base.hashindex), String, Int64})
+precompile(Tuple{typeof(Base.write), Base.GenericIOBuffer{Array{UInt8, 1}}, String})
+precompile(Tuple{typeof(Base.indexed_iterate), Tuple{Nothing, Int64}, Int64})
+precompile(Tuple{typeof(Base.indexed_iterate), Tuple{Nothing, Int64}, Int64, Int64})
+
 # used by Revise.jl
 precompile(Tuple{typeof(Base.parse_cache_header), String})
 precompile(Base.read_dependency_src, (String, String))
@@ -31,8 +62,12 @@ precompile(Tuple{typeof(delete!), Dict{Base.PkgId,Vector{Function}}, Base.PkgId}
 precompile(Tuple{typeof(push!), Vector{Function}, Function})
 
 # miscellaneous
+precompile(Tuple{typeof(Base.exit)})
 precompile(Tuple{typeof(Base.require), Base.PkgId})
 precompile(Tuple{typeof(Base.recursive_prefs_merge), Base.Dict{String, Any}})
+precompile(Tuple{typeof(Base.recursive_prefs_merge), Base.Dict{String, Any}, Base.Dict{String, Any}, Vararg{Base.Dict{String, Any}}})
+precompile(Tuple{typeof(Base.hashindex), Tuple{Base.PkgId, Nothing}, Int64})
+precompile(Tuple{typeof(Base.hashindex), Tuple{Base.PkgId, String}, Int64})
 precompile(Tuple{typeof(isassigned), Core.SimpleVector, Int})
 precompile(Tuple{typeof(getindex), Core.SimpleVector, Int})
 precompile(Tuple{typeof(Base.Experimental.register_error_hint), Any, Type})
@@ -42,6 +77,7 @@ precompile(Base.CoreLogging.current_logger_for_env, (Base.CoreLogging.LogLevel,
 precompile(Base.CoreLogging.current_logger_for_env, (Base.CoreLogging.LogLevel, Symbol, Module))
 precompile(Base.CoreLogging.env_override_minlevel, (Symbol, Module))
 precompile(Base.StackTraces.lookup, (Ptr{Nothing},))
+precompile(Tuple{typeof(Base.run_module_init), Module, Int})
 """
 
 for T in (Float16, Float32, Float64), IO in (IOBuffer, IOContext{IOBuffer}, Base.TTY, IOContext{Base.TTY})
@@ -49,28 +85,6 @@ for T in (Float16, Float32, Float64), IO in (IOBuffer, IOContext{IOBuffer}, Base
     hardcoded_precompile_statements *= "precompile(Tuple{typeof(show), $IO, $T})\n"
 end
 
-repl_script = """
-2+2
-print("")
-printstyled("a", "b")
-display([1])
-display([1 2; 3 4])
-@time 1+1
-; pwd
-$CTRL_C
-? reinterpret
-using Ra\t$CTRL_C
-\\alpha\t$CTRL_C
-\e[200~paste here ;)\e[201~"$CTRL_C
-$UP_ARROW$DOWN_ARROW$CTRL_C
-123\b\b\b$CTRL_C
-\b\b$CTRL_C
-f(x) = x03
-f(1,2)
-[][1]
-cd("complet_path\t\t$CTRL_C
-"""
-
 precompile_script = """
 # NOTE: these were moved to the end of Base.jl. TODO: move back here.
 # # Used by Revise & its dependencies
@@ -108,36 +122,6 @@ precompile_script = """
 
 julia_exepath() = joinpath(Sys.BINDIR, Base.julia_exename())
 
-have_repl =  haskey(Base.loaded_modules,
-                    Base.PkgId(Base.UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL"))
-if have_repl
-    hardcoded_precompile_statements *= """
-    precompile(Tuple{typeof(getproperty), REPL.REPLBackend, Symbol})
-    """
-end
-
-Distributed = get(Base.loaded_modules,
-          Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed"),
-          nothing)
-if Distributed !== nothing
-    hardcoded_precompile_statements *= """
-    precompile(Tuple{typeof(Distributed.remotecall),Function,Int,Module,Vararg{Any, 100}})
-    precompile(Tuple{typeof(Distributed.procs)})
-    precompile(Tuple{typeof(Distributed.finalize_ref), Distributed.Future})
-    """
-# This is disabled because it doesn't give much benefit
-# and the code in Distributed is poorly typed causing many invalidations
-#=
-    precompile_script *= """
-    using Distributed
-    addprocs(2)
-    pmap(x->iseven(x) ? 1 : 0, 1:4)
-    @distributed (+) for i = 1:100 Int(rand(Bool)) end
-    """
-=#
-end
-
-
 Artifacts = get(Base.loaded_modules,
           Base.PkgId(Base.UUID("56f22d72-fd6d-98f1-02f0-08ddc0907c33"), "Artifacts"),
           nothing)
@@ -152,20 +136,10 @@ if Artifacts !== nothing
     artifacts = Artifacts.load_artifacts_toml(artifacts_toml)
     platforms = [Artifacts.unpack_platform(e, "HelloWorldC", artifacts_toml) for e in artifacts["HelloWorldC"]]
     best_platform = select_platform(Dict(p => triplet(p) for p in platforms))
-    dlopen("libjulia$(ccall(:jl_is_debugbuild, Cint, ()) != 0 ? "-debug" : "")", RTLD_LAZY | RTLD_DEEPBIND)
+    dlopen("libjulia$(Base.isdebugbuild() ? "-debug" : "")", RTLD_LAZY | RTLD_DEEPBIND)
     """
 end
 
-
-Pkg = get(Base.loaded_modules,
-          Base.PkgId(Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg"),
-          nothing)
-
-if Pkg !== nothing
-    # TODO: Split Pkg precompile script into REPL and script part
-    repl_script *= Pkg.precompile_script
-end
-
 FileWatching = get(Base.loaded_modules,
           Base.PkgId(Base.UUID("7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"), "FileWatching"),
           nothing)
@@ -186,75 +160,81 @@ if Libdl !== nothing
     """
 end
 
-Test = get(Base.loaded_modules,
-          Base.PkgId(Base.UUID("8dfed614-e22c-5e08-85e1-65c5234f0b40"), "Test"),
-          nothing)
-if Test !== nothing
-    hardcoded_precompile_statements *= """
-    precompile(Tuple{typeof(Test.do_test), Test.ExecutionResult, Any})
-    precompile(Tuple{typeof(Test.testset_beginend_call), Tuple{String, Expr}, Expr, LineNumberNode})
-    precompile(Tuple{Type{Test.DefaultTestSet}, String})
-    precompile(Tuple{Type{Test.DefaultTestSet}, AbstractString})
-    precompile(Tuple{Core.kwftype(Type{Test.DefaultTestSet}), Any, Type{Test.DefaultTestSet}, AbstractString})
-    precompile(Tuple{typeof(Test.finish), Test.DefaultTestSet})
-    precompile(Tuple{typeof(Test.eval_test), Expr, Expr, LineNumberNode, Bool})
-    precompile(Tuple{typeof(Test._inferred), Expr, Module})
-    precompile(Tuple{typeof(Test.push_testset), Test.DefaultTestSet})
-    precompile(Tuple{typeof(Test.get_alignment), Test.DefaultTestSet, Int})
-    precompile(Tuple{typeof(Test.get_test_result), Any, Any})
-    precompile(Tuple{typeof(Test.do_test_throws), Test.ExecutionResult, Any, Any})
-    precompile(Tuple{typeof(Test.print_counts), Test.DefaultTestSet, Int, Int, Int, Int, Int, Int, Int})
-    precompile(Tuple{typeof(Test._check_testset), Type, Expr})
-    precompile(Tuple{typeof(Test.test_expr!), Any, Any})
-    precompile(Tuple{typeof(Test.test_expr!), Any, Any, Vararg{Any, 100}})
-    precompile(Tuple{typeof(Test.pop_testset)})
-    precompile(Tuple{typeof(Test.match_logs), Function, Tuple{Symbol, Regex}})
-    precompile(Tuple{typeof(Test.match_logs), Function, Tuple{String, Regex}})
-    precompile(Tuple{typeof(Base.CoreLogging.shouldlog), Test.TestLogger, Base.CoreLogging.LogLevel, Module, Symbol, Symbol})
-    precompile(Tuple{typeof(Base.CoreLogging.handle_message), Test.TestLogger, Base.CoreLogging.LogLevel, String, Module, Symbol, Symbol, String, Int})
-    precompile(Tuple{typeof(Core.kwfunc(Base.CoreLogging.handle_message)), typeof((exception=nothing,)), typeof(Base.CoreLogging.handle_message), Test.TestLogger, Base.CoreLogging.LogLevel, String, Module, Symbol, Symbol, String, Int})
-    precompile(Tuple{typeof(Test.detect_ambiguities), Any})
-    precompile(Tuple{typeof(Test.collect_test_logs), Function})
-    precompile(Tuple{typeof(Test.do_broken_test), Test.ExecutionResult, Any})
-    precompile(Tuple{typeof(Test.record), Test.DefaultTestSet, Union{Test.Error, Test.Fail}})
-    precompile(Tuple{typeof(Test.filter_errors), Test.DefaultTestSet})
-    """
-end
-
-Profile = get(Base.loaded_modules,
-          Base.PkgId(Base.UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile"),
-          nothing)
-if Profile !== nothing
-    repl_script *= Profile.precompile_script
-    hardcoded_precompile_statements *= """
-    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
-    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})
-    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UInt})
-    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UnitRange{UInt}})
-    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Vector{Int}, Vector{UInt}})
-    """
+# Printing the current state
+let
+    global print_state
+    print_lk = ReentrantLock()
+    status = Dict{String, String}(
+        "step1" => "W",
+        "step3" => "W",
+        "clock" => "◐",
+    )
+    function print_status(key::String)
+        txt = status[key]
+        if startswith(txt, "W") # Waiting
+            printstyled("? ", color=Base.warn_color()); print(txt[2:end])
+        elseif startswith(txt, "R") # Running
+            print(status["clock"], " ", txt[2:end])
+        elseif startswith(txt, "F") # Finished
+            printstyled("✓ ", color=:green); print(txt[2:end])
+        else
+            print(txt)
+        end
+    end
+    function print_state(args::Pair{String,String}...)
+        lock(print_lk) do
+            isempty(args) || push!(status, args...)
+            print("\r└ Collect (Basic: ")
+            print_status("step1")
+            print(") => Execute ")
+            print_status("step3")
+        end
+    end
 end
 
-const JULIA_PROMPT = "julia> "
-const PKG_PROMPT = "pkg> "
-const SHELL_PROMPT = "shell> "
-const HELP_PROMPT = "help?> "
-
-function generate_precompile_statements()
+ansi_enablecursor = "\e[?25h"
+ansi_disablecursor = "\e[?25l"
+blackhole = Sys.isunix() ? "/dev/null" : "nul"
+procenv = Dict{String,Any}(
+        "JULIA_HISTORY" => blackhole,
+        "JULIA_PROJECT" => nothing, # remove from environment
+        "JULIA_LOAD_PATH" => "@stdlib",
+        "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":",
+        "TERM" => "",
+        "JULIA_FALLBACK_REPL" => "true")
+
+generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printed
     start_time = time_ns()
-    debug_output = devnull # or stdout
     sysimg = Base.unsafe_string(Base.JLOptions().image_file)
 
     # Extract the precompile statements from the precompile file
-    statements = Set{String}()
+    statements_step1 = Channel{String}(Inf)
 
     # From hardcoded statements
     for statement in split(hardcoded_precompile_statements::String, '\n')
-        push!(statements, statement)
+        push!(statements_step1, statement)
+    end
+
+    println("Collecting and executing precompile statements")
+    fancyprint && print(ansi_disablecursor)
+    print_state()
+    clock = @async begin
+        t = Timer(0; interval=1/10)
+        anim_chars = ["◐","◓","◑","◒"]
+        current = 1
+        if fancyprint
+            while isopen(statements_step1) || !isempty(statements_step1)
+                print_state("clock" => anim_chars[current])
+                wait(t)
+                current = current == 4 ? 1 : current + 1
+            end
+        end
+        close(t)
     end
 
     # Collect statements from running the script
-    mktempdir() do prec_path
+    step1 = @async mktempdir() do prec_path
+        print_state("step1" => "R")
         # Also precompile a package here
         pkgname = "__PackagePrecompilationStatementModule"
         mkpath(joinpath(prec_path, pkgname, "src"))
@@ -272,97 +252,23 @@ function generate_precompile_statements()
             Base.compilecache(Base.PkgId($(repr(pkgname))), $(repr(path)))
             $precompile_script
             """
-        run(`$(julia_exepath()) -O0 --sysimage $sysimg --trace-compile=$tmp_proc --startup-file=no -Cnative -e $s`)
+        p = run(pipeline(addenv(`$(julia_exepath()) -O0 --trace-compile=$tmp_proc --sysimage $sysimg
+                --cpu-target=native --startup-file=no --color=yes`, procenv),
+                 stdin=IOBuffer(s), stdout=debug_output))
+        n_step1 = 0
         for f in (tmp_prec, tmp_proc)
+            isfile(f) || continue
             for statement in split(read(f, String), '\n')
-                occursin("Main.", statement) && continue
-                push!(statements, statement)
-            end
-        end
-    end
-
-    mktemp() do precompile_file, precompile_file_h
-        # Collect statements from running a REPL process and replaying our REPL script
-        pts, ptm = open_fake_pty()
-        blackhole = Sys.isunix() ? "/dev/null" : "nul"
-        if have_repl
-            cmdargs = ```--color=yes
-                      -e 'import REPL; REPL.Terminals.is_precompiling[] = true'
-                      ```
-        else
-            cmdargs = `-e nothing`
-        end
-        p = withenv("JULIA_HISTORY" => blackhole,
-                    "JULIA_PROJECT" => nothing, # remove from environment
-                    "JULIA_LOAD_PATH" => Sys.iswindows() ? "@;@stdlib" : "@:@stdlib",
-                    "JULIA_PKG_PRECOMPILE_AUTO" => "0",
-                    "TERM" => "") do
-            run(```$(julia_exepath()) -O0 --trace-compile=$precompile_file --sysimage $sysimg
-                   --cpu-target=native --startup-file=no -i $cmdargs```,
-                   pts, pts, pts; wait=false)
-        end
-        Base.close_stdio(pts)
-        # Prepare a background process to copy output from process until `pts` is closed
-        output_copy = Base.BufferStream()
-        tee = @async try
-            while !eof(ptm)
-                l = readavailable(ptm)
-                write(debug_output, l)
-                Sys.iswindows() && (sleep(0.1); yield(); yield()) # workaround hang - probably a libuv issue?
-                write(output_copy, l)
-            end
-        catch ex
-            if !(ex isa Base.IOError && ex.code == Base.UV_EIO)
-                rethrow() # ignore EIO on ptm after pts dies
+                push!(statements_step1, statement)
+                n_step1 += 1
             end
-        finally
-            close(output_copy)
-            close(ptm)
-        end
-        # wait for the definitive prompt before start writing to the TTY
-        readuntil(output_copy, JULIA_PROMPT)
-        sleep(0.1)
-        readavailable(output_copy)
-        # Input our script
-        if have_repl
-            precompile_lines = split(repl_script::String, '\n'; keepempty=false)
-            curr = 0
-            for l in precompile_lines
-                sleep(0.1)
-                curr += 1
-                print("\rGenerating REPL precompile statements... $curr/$(length(precompile_lines))")
-                # consume any other output
-                bytesavailable(output_copy) > 0 && readavailable(output_copy)
-                # push our input
-                write(debug_output, "\n#### inputting statement: ####\n$(repr(l))\n####\n")
-                write(ptm, l, "\n")
-                readuntil(output_copy, "\n")
-                # wait for the next prompt-like to appear
-                readuntil(output_copy, "\n")
-                strbuf = ""
-                while true
-                    strbuf *= String(readavailable(output_copy))
-                    occursin(JULIA_PROMPT, strbuf) && break
-                    occursin(PKG_PROMPT, strbuf) && break
-                    occursin(SHELL_PROMPT, strbuf) && break
-                    occursin(HELP_PROMPT, strbuf) && break
-                    sleep(0.1)
-                end
-            end
-            println()
-        end
-        write(ptm, "exit()\n")
-        wait(tee)
-        success(p) || Base.pipeline_error(p)
-        close(ptm)
-        write(debug_output, "\n#### FINISHED ####\n")
-
-        for statement in split(read(precompile_file, String), '\n')
-            # Main should be completely clean
-            occursin("Main.", statement) && continue
-            push!(statements, statement)
         end
+        close(statements_step1)
+        print_state("step1" => "F$n_step1")
+        return :ok
     end
+    Base.errormonitor(step1)
+    !PARALLEL_PRECOMPILATION && wait(step1)
 
     # Create a staging area where all the loaded packages are available
     PrecompileStagingArea = Module()
@@ -372,88 +278,85 @@ function generate_precompile_statements()
         end
     end
 
-    # Execute the collected precompile statements
     n_succeeded = 0
-    include_time = @elapsed for statement in sort!(collect(statements))
+    # Make statements unique
+    statements = Set{String}()
+    # Execute the precompile statements
+    for sts in [statements_step1,], statement in sts
+        # Main should be completely clean
+        occursin("Main.", statement) && continue
+        Base.in!(statement, statements) && continue
         # println(statement)
-        # XXX: skip some that are broken. these are caused by issue #39902
-        occursin("Tuple{Artifacts.var\"#@artifact_str\", LineNumberNode, Module, Any, Any}", statement) && continue
-        occursin("Tuple{Base.Cartesian.var\"#@ncall\", LineNumberNode, Module, Int64, Any, Vararg{Any}}", statement) && continue
-        occursin("Tuple{Base.Cartesian.var\"#@ncall\", LineNumberNode, Module, Int32, Any, Vararg{Any}}", statement) && continue
-        occursin("Tuple{Base.Cartesian.var\"#@nloops\", LineNumberNode, Module, Any, Any, Any, Vararg{Any}}", statement) && continue
-        occursin("Tuple{Core.var\"#@doc\", LineNumberNode, Module, Vararg{Any}}", statement) && continue
-        # XXX: this is strange, as this isn't the correct representation of this
-        occursin("typeof(Core.IntrinsicFunction)", statement) && continue
-        # XXX: this is strange, as this method should not be getting compiled
-        occursin(", Core.Compiler.AbstractInterpreter, ", statement) && continue
         try
             ps = Meta.parse(statement)
-            isexpr(ps, :call) || continue
+            if !isexpr(ps, :call)
+                # these are typically comments
+                @debug "skipping statement because it does not parse as an expression" statement
+                delete!(statements, statement)
+                continue
+            end
             popfirst!(ps.args) # precompile(...)
             ps.head = :tuple
-            l = ps.args[end]
-            if (isexpr(l, :tuple) || isexpr(l, :curly)) && length(l.args) > 0 # Tuple{...} or (...)
-                # XXX: precompile doesn't currently handle overloaded Vararg arguments very well.
-                # Replacing N with a large number works around it.
-                l = l.args[end]
-                if isexpr(l, :curly) && length(l.args) == 2 && l.args[1] === :Vararg # Vararg{T}
-                    push!(l.args, 100) # form Vararg{T, 100} instead
-                end
-            end
             # println(ps)
             ps = Core.eval(PrecompileStagingArea, ps)
-            # XXX: precompile doesn't currently handle overloaded nospecialize arguments very well.
-            # Skipping them avoids the warning.
-            ms = length(ps) == 1 ? Base._methods_by_ftype(ps[1], 1, Base.get_world_counter()) : Base.methods(ps...)
-            ms isa Vector || continue
-            precompile(ps...)
-            n_succeeded += 1
-            print("\rExecuting precompile statements... $n_succeeded/$(length(statements))")
+            if precompile(ps...)
+                n_succeeded += 1
+            else
+                @warn "Failed to precompile expression" form=statement _module=nothing _file=nothing _line=0
+            end
+            failed = length(statements) - n_succeeded
+            yield() # Make clock spinning
+            print_state("step3" => string("R$n_succeeded", failed > 0 ? " ($failed failed)" : ""))
         catch ex
+            @show backtrace()
             # See #28808
             @warn "Failed to precompile expression" form=statement exception=ex _module=nothing _file=nothing _line=0
         end
     end
+    wait(clock) # Stop asynchronous printing
+    failed = length(statements) - n_succeeded
+    print_state("step3" => string("F$n_succeeded", failed > 0 ? " ($failed failed)" : ""))
     println()
-    if have_repl
-        # Seems like a reasonable number right now, adjust as needed
-        # comment out if debugging script
-        n_succeeded > 1200 || @warn "Only $n_succeeded precompile statements"
-    end
+    # Seems like a reasonable number right now, adjust as needed
+    # comment out if debugging script
+    n_succeeded > (have_repl ? 650 : 90) || @warn "Only $n_succeeded precompile statements"
 
-    include_time *= 1e9
-    gen_time = (time_ns() - start_time) - include_time
-    tot_time = time_ns() - start_time
+    fetch(step1) == :ok || throw("Step 1 of collecting precompiles failed.")
 
+    tot_time = time_ns() - start_time
     println("Precompilation complete. Summary:")
-    print("Generation ── "); Base.time_print(gen_time);     print(" "); show(IOContext(stdout, :compact=>true), gen_time / tot_time * 100);     println("%")
-    print("Execution ─── "); Base.time_print(include_time); print(" "); show(IOContext(stdout, :compact=>true), include_time / tot_time * 100); println("%")
-    print("Total ─────── "); Base.time_print(tot_time);     println()
-
+    print("Total ─────── "); Base.time_print(stdout, tot_time); println()
+finally
+    fancyprint && print(ansi_enablecursor)
+    GC.gc(true); GC.gc(false); # reduce memory footprint
     return
 end
 
 generate_precompile_statements()
 
-# As a last step in system image generation,
-# remove some references to build time environment for a more reproducible build.
-Base.Filesystem.temp_cleanup_purge(force=true)
-@eval Base PROGRAM_FILE = ""
-@eval Sys begin
-    BINDIR = ""
-    STDLIB = ""
-end
-empty!(Base.ARGS)
-empty!(Core.ARGS)
+let stdout = Ref{IO}(stdout)
+    Base.PROGRAM_FILE = ""
+    Sys.BINDIR = ""
+    Sys.STDLIB = ""
+    empty!(Base.ARGS)
+    empty!(Core.ARGS)
+    empty!(Base.TOML_CACHE.d)
+    Base.TOML.reinit!(Base.TOML_CACHE.p, "")
 
-end # @eval
-end # if
+    println("Outputting sysimage file...")
+    Base.stdout = Core.stdout
+    Base.stderr = Core.stderr
 
-println("Outputting sysimage file...")
-let pre_output_time = time_ns()
     # Print report after sysimage has been saved so all time spent can be captured
+    pre_output_time = time_ns()
     Base.postoutput() do
         output_time = time_ns() - pre_output_time
-        print("Output ────── "); Base.time_print(output_time); println()
+        let stdout = stdout[]
+            print(stdout, "Output ────── "); Base.time_print(stdout, output_time); println(stdout)
+        end
+        stdout[] = Core.stdout
     end
 end
+
+end # if
+end # @eval
diff --git a/contrib/julia-config.jl b/contrib/julia-config.jl
index 9c6e39216d817..df17b967c1ed7 100755
--- a/contrib/julia-config.jl
+++ b/contrib/julia-config.jl
@@ -17,7 +17,7 @@ function shell_escape(str)
 end
 
 function libDir()
-    return if ccall(:jl_is_debugbuild, Cint, ()) != 0
+    return if Base.isdebugbuild()
         if Base.DARWIN_FRAMEWORK
             joinpath(dirname(abspath(Libdl.dlpath(Base.DARWIN_FRAMEWORK_NAME * "_debug"))),"lib")
         else
@@ -33,7 +33,7 @@ function libDir()
 end
 
 function frameworkDir()
-    libjulia = ccall(:jl_is_debugbuild, Cint, ()) != 0 ?
+    libjulia = Base.isdebugbuild() ?
         Libdl.dlpath(Base.DARWIN_FRAMEWORK_NAME * "_debug") :
         Libdl.dlpath(Base.DARWIN_FRAMEWORK_NAME)
     normpath(joinpath(dirname(abspath(libjulia)),"..","..",".."))
@@ -61,7 +61,7 @@ function ldlibs(doframework)
     # If the user wants the debug framework, DYLD_IMAGE_SUFFIX=_debug
     # should be used (refer to man 1 dyld).
     doframework && return "-framework $(Base.DARWIN_FRAMEWORK_NAME)"
-    libname = if ccall(:jl_is_debugbuild, Cint, ()) != 0
+    libname = if Base.isdebugbuild()
         "julia-debug"
     else
         "julia"
diff --git a/contrib/julia.appdata.xml b/contrib/julia.appdata.xml
index 3d451197098b2..f53a653af78d0 100644
--- a/contrib/julia.appdata.xml
+++ b/contrib/julia.appdata.xml
@@ -28,7 +28,7 @@
  </description>
  <screenshots>
   <screenshot type="default">
-   <image>https://julialang.org/images/julia-gnome.png</image>
+   <image>https://julialang.org/assets/images/julia-gnome.png</image>
   </screenshot>
  </screenshots>
  <url type="homepage">https://julialang.org/</url>
diff --git a/contrib/julia.desktop b/contrib/julia.desktop
index 6b41981354769..037f6d865a9e4 100644
--- a/contrib/julia.desktop
+++ b/contrib/julia.desktop
@@ -1,8 +1,17 @@
+# To use uxterm, change to these values.
+#
+# Exec=uxterm -e julia
+# Terminal=false
+#
+# To use a .png icon specify the full path and file extension.
+#
+# Icon=/usr/share/icons/hicolor/48x48/apps/julia.png
+#
 [Desktop Entry]
 Name=Julia
-Comment=High-level, high-performance dynamic language for technical computing
+Comment=High-performance language for technical computing
 Exec=julia
 Icon=julia
 Terminal=true
 Type=Application
-Categories=Development;ComputerScience;Building;Science;Math;NumericalAnalysis;ParallelComputing;DataVisualization;ConsoleOnly;
+Categories=Development;
diff --git a/contrib/julia.png b/contrib/julia.png
new file mode 100644
index 0000000000000..d05f2861b784d
Binary files /dev/null and b/contrib/julia.png differ
diff --git a/contrib/julia.svg b/contrib/julia.svg
new file mode 100644
index 0000000000000..ed7f17bb32f18
--- /dev/null
+++ b/contrib/julia.svg
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="325pt" height="300pt" viewBox="0 0 325 300" version="1.1">
+<g id="surface91">
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(79.6%,23.5%,20%);fill-opacity:1;" d="M 150.898438 225 C 150.898438 266.421875 117.320312 300 75.898438 300 C 34.476562 300 0.898438 266.421875 0.898438 225 C 0.898438 183.578125 34.476562 150 75.898438 150 C 117.320312 150 150.898438 183.578125 150.898438 225 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(22%,59.6%,14.9%);fill-opacity:1;" d="M 237.5 75 C 237.5 116.421875 203.921875 150 162.5 150 C 121.078125 150 87.5 116.421875 87.5 75 C 87.5 33.578125 121.078125 0 162.5 0 C 203.921875 0 237.5 33.578125 237.5 75 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(58.4%,34.5%,69.8%);fill-opacity:1;" d="M 324.101562 225 C 324.101562 266.421875 290.523438 300 249.101562 300 C 207.679688 300 174.101562 266.421875 174.101562 225 C 174.101562 183.578125 207.679688 150 249.101562 150 C 290.523438 150 324.101562 183.578125 324.101562 225 "/>
+</g>
+</svg>
diff --git a/contrib/mac/app/renotarize_dmg.sh b/contrib/mac/app/renotarize_dmg.sh
index f0d6d0a197e5f..c532ddb778e24 100755
--- a/contrib/mac/app/renotarize_dmg.sh
+++ b/contrib/mac/app/renotarize_dmg.sh
@@ -39,7 +39,7 @@ APP_NAME=$(basename dmg/*.app)
 VOL_NAME=$(basename /Volumes/Julia-*)
 
 if [[ ! -d dmg/${APP_NAME} ]]; then
-    echo "ERORR: Unable to auto-detect APP_NAME, check dmg folder!" >&2
+    echo "ERROR: Unable to auto-detect APP_NAME, check dmg folder!" >&2
     exit 1
 fi
 # Unmount everything again
diff --git a/contrib/mac/frameworkapp/JuliaLauncher/AppDelegate.m b/contrib/mac/frameworkapp/JuliaLauncher/AppDelegate.m
index db2f13b485189..1d20d6ed3efa1 100644
--- a/contrib/mac/frameworkapp/JuliaLauncher/AppDelegate.m
+++ b/contrib/mac/frameworkapp/JuliaLauncher/AppDelegate.m
@@ -51,7 +51,7 @@ + (ExecSandboxController *)sharedController {
 
 @end
 
-/// Location of an installed variant of Julia (frameowrk or nix hier).
+/// Location of an installed variant of Julia (framework or nix hier).
 @interface JuliaVariant : NSObject
 @property(readonly, nullable) NSBundle *bundle;
 @property(readonly, nonnull) NSURL *juliaexe;
diff --git a/contrib/normalize_triplet.py b/contrib/normalize_triplet.py
index 43c9d492a4b2e..77c047b360b76 100755
--- a/contrib/normalize_triplet.py
+++ b/contrib/normalize_triplet.py
@@ -2,8 +2,8 @@
 
 import re, sys
 
-# This script designed to mimick `src/PlatformNames.jl` in `BinaryProvider.jl`, which has
-# a method `platform_key_abi()` to parse uname-like output into something standarized.
+# This script designed to mimic `src/PlatformNames.jl` in `BinaryProvider.jl`, which has
+# a method `platform_key_abi()` to parse uname-like output into something standardized.
 
 if len(sys.argv) < 2:
     print("Usage: {} <host triplet> [<gcc version>] [<cxxabi11>]".format(sys.argv[0]))
@@ -113,9 +113,16 @@ def p(x):
         if not sys.argv[2]:
             libgfortran_version = "libgfortran5"
         else:
-            # Take the last thing that looks like a version number, and extract its major component
-            version_numbers = list(filter(lambda x: re.match("\d+\.\d+(\.\d+)?", x), sys.argv[2].split()))
-            major_ver = int(version_numbers[-1].split('.')[0])
+            # Grab the first number in the last word with a number
+            # This will be the major version number.
+            major_ver = -1
+            words = sys.argv[2].split()
+            for word in words[::-1]:
+                major_ver = re.search("[0-9]+", word)
+                if major_ver:
+                    major_ver = int(major_ver.group())
+                    break
+
             if major_ver <= 6:
                 libgfortran_version = "libgfortran3"
             elif major_ver <= 7:
diff --git a/contrib/prepare_release.sh b/contrib/prepare_release.sh
index 7d4e55e3a402e..2772e44a858f1 100755
--- a/contrib/prepare_release.sh
+++ b/contrib/prepare_release.sh
@@ -56,12 +56,6 @@ curl -L -o julia-$version-win32.exe \
   $julianightlies/winnt/x86/$majmin/julia-$majminpatch-$shashort-win32.exe
 cp julia-$version-win32.exe julia-$majmin-latest-win32.exe
 
-if [ -e codesign.sh ]; then
-  # code signing needs to run on windows, script is not checked in since it
-  # hard-codes a few things. TODO: see if signtool.exe can run in wine
-  ./codesign.sh
-fi
-
 shasum -a 256 julia-$version* | grep -v -e sha256 -e md5 -e asc > julia-$version.sha256
 md5sum julia-$version* | grep -v -e sha256 -e md5 -e asc > julia-$version.md5
 
diff --git a/contrib/print_sorted_stdlibs.jl b/contrib/print_sorted_stdlibs.jl
index bbf890328cb4e..6bc2023c4f1cc 100644
--- a/contrib/print_sorted_stdlibs.jl
+++ b/contrib/print_sorted_stdlibs.jl
@@ -12,11 +12,12 @@ function check_flag(flag)
 end
 
 if check_flag("--help") || check_flag("-h")
-    println("Usage: julia print_sorted_stdlibs.jl [stdlib_dir] [--exclude-jlls]")
+    println("Usage: julia print_sorted_stdlibs.jl [stdlib_dir] [--exclude-jlls] [--exclude-sysimage]")
 end
 
 # Allow users to ask for JLL or no JLLs
 exclude_jlls = check_flag("--exclude-jlls")
+exclude_sysimage = check_flag("--exclude-sysimage")
 
 # Default to the `stdlib/vX.Y` directory
 STDLIB_DIR = get(ARGS, 1, joinpath(@__DIR__, "..", "usr", "share", "julia", "stdlib"))
@@ -27,9 +28,9 @@ end
 
 project_deps = Dict{String,Set{String}}()
 for project_dir in readdir(STDLIB_DIR, join=true)
-    files = readdir(project_dir)
-    if "Project.toml" in files
-        project = TOML.parsefile(joinpath(project_dir, "Project.toml"))
+    project_file = joinpath(project_dir, "Project.toml")
+    if isfile(project_file)
+        project = TOML.parsefile(project_file)
 
         if !haskey(project, "name")
             continue
@@ -80,12 +81,20 @@ if exclude_jlls
     filter!(p -> !endswith(p, "_jll"), sorted_projects)
 end
 
+if exclude_sysimage
+    loaded_modules = Set(map(k->k.name, collect(keys(Base.loaded_modules))))
+    filter!(p->!in(p, loaded_modules), sorted_projects)
+end
+
 # Print out sorted projects, ready to be pasted into `sysimg.jl`
 last_depth = 0
 println("    # Stdlibs sorted in dependency, then alphabetical, order by contrib/print_sorted_stdlibs.jl")
 if exclude_jlls
     println("    # Run with the `--exclude-jlls` option to filter out all JLL packages")
 end
+if exclude_sysimage
+    println("    # Run with the `--exclude-sysimage` option to filter out all packages included in the system image")
+end
 println("    stdlibs = [")
 println("        # No dependencies")
 for p in sorted_projects
diff --git a/contrib/refresh_checksums.mk b/contrib/refresh_checksums.mk
index fc632728e9a9e..f67088141ccd4 100644
--- a/contrib/refresh_checksums.mk
+++ b/contrib/refresh_checksums.mk
@@ -24,11 +24,11 @@ CLANG_TRIPLETS=$(filter %-darwin %-freebsd,$(TRIPLETS))
 NON_CLANG_TRIPLETS=$(filter-out %-darwin %-freebsd,$(TRIPLETS))
 
 # These are the projects currently using BinaryBuilder; both GCC-expanded and non-GCC-expanded:
-BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline
+BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline libtracyclient
 BB_GCC_EXPANDED_PROJECTS=openblas csl
 BB_CXX_EXPANDED_PROJECTS=gmp llvm clang llvm-tools lld
 # These are non-BB source-only deps
-NON_BB_PROJECTS=patchelf mozillacert lapack libwhich utf8proc
+NON_BB_PROJECTS=patchelf mozillacert lapack libwhich utf8proc ittapi
 
 ifneq ($(VERBOSE),1)
 QUIET_MAKE := -s
diff --git a/contrib/tsan/Make.user.tsan b/contrib/tsan/Make.user.tsan
index 01c9874a85182..b192c36e4cfee 100644
--- a/contrib/tsan/Make.user.tsan
+++ b/contrib/tsan/Make.user.tsan
@@ -11,6 +11,3 @@ USE_BINARYBUILDER_LLVM=1
 
 override SANITIZE=1
 override SANITIZE_THREAD=1
-
-# default to a debug build for better line number reporting
-override JULIA_BUILD_MODE=debug
diff --git a/contrib/windows/build-installer.iss b/contrib/windows/build-installer.iss
index 4f5f0259d2f2c..a63cf853d4373 100644
--- a/contrib/windows/build-installer.iss
+++ b/contrib/windows/build-installer.iss
@@ -150,6 +150,9 @@ begin
   case CurPageID of
     wpWelcome: WizardForm.Color := WizardForm.WelcomePage.Color;
     wpFinished: WizardForm.Color := WizardForm.FinishedPage.Color;
+
+    //change button text from "next" to "install" when ReadyPage is disabled.
+    wpSelectTasks: WizardForm.NextButton.Caption := SetupMessage(msgButtonInstall);
   else
     WizardForm.Color := WizardForm.InnerPage.Color;
   end;
diff --git a/deps/JuliaSyntax.mk b/deps/JuliaSyntax.mk
new file mode 100644
index 0000000000000..4a8afa8fbd53c
--- /dev/null
+++ b/deps/JuliaSyntax.mk
@@ -0,0 +1,16 @@
+$(eval $(call git-external,JuliaSyntax,JULIASYNTAX,,,$(BUILDDIR)))
+
+$(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted
+	@# no build steps
+	echo 1 > $@
+
+$(eval $(call symlink_install,JuliaSyntax,$$(JULIASYNTAX_SRC_DIR),$$(BUILDROOT)/base))
+
+clean-JuliaSyntax:
+	-rm -f $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled
+get-JuliaSyntax: $(JULIASYNTAX_SRC_FILE)
+extract-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted
+configure-JuliaSyntax: extract-JuliaSyntax
+compile-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled
+fastcheck-JuliaSyntax: check-JuliaSyntax
+check-JuliaSyntax: compile-JuliaSyntax
diff --git a/deps/JuliaSyntax.version b/deps/JuliaSyntax.version
new file mode 100644
index 0000000000000..7f124715024ce
--- /dev/null
+++ b/deps/JuliaSyntax.version
@@ -0,0 +1,4 @@
+JULIASYNTAX_BRANCH = main
+JULIASYNTAX_SHA1 = 4f1731d6ce7c2465fc21ea245110b7a39f34658a
+JULIASYNTAX_GIT_URL := https://github.com/JuliaLang/JuliaSyntax.jl.git
+JULIASYNTAX_TAR_URL = https://api.github.com/repos/JuliaLang/JuliaSyntax.jl/tarball/$1
diff --git a/deps/Makefile b/deps/Makefile
index ac0dbe7afcb1a..27f5fdbb693d5 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -8,7 +8,6 @@ BUILDDIR := scratch
 else
 BUILDDIR := .
 endif
-include $(SRCDIR)/Versions.make
 include $(JULIAHOME)/Make.inc
 include $(SRCDIR)/tools/common.mk
 include $(SRCDIR)/tools/git-external.mk
@@ -25,9 +24,9 @@ BUILDDIR := $(BUILDDIR)$(MAYBE_HOST)
 #
 # autoconf configure-driven scripts: pcre unwind gmp mpfr patchelf libuv curl
 # custom Makefile rules: openlibm dsfmt libsuitesparse lapack blastrampoline openblas utf8proc objconv libwhich
-# CMake libs: llvm llvmunwind libgit2 libssh2 mbedtls
+# CMake libs: llvm llvmunwind libgit2 libssh2 mbedtls libtracyclient
 #
-# downloadable via git: llvm-svn, libuv, libopenlibm, utf8proc, libgit2, libssh2
+# downloadable via git: llvm-svn, libuv, libopenlibm, utf8proc, libgit2, libssh2, libtracyclient
 #
 # to debug 'define' rules, replace eval at the usage site with info or error
 
@@ -37,7 +36,7 @@ BUILDDIR := $(BUILDDIR)$(MAYBE_HOST)
 # prevent installing libs into usr/lib64 on opensuse
 unexport CONFIG_SITE
 
-DEP_LIBS :=
+DEP_LIBS := JuliaSyntax
 
 ifeq ($(USE_SYSTEM_LIBBLASTRAMPOLINE), 0)
 DEP_LIBS += blastrampoline
@@ -47,10 +46,18 @@ ifeq ($(USE_SYSTEM_CSL), 0)
 DEP_LIBS += csl
 endif
 
+ifeq ($(SANITIZE), 1)
+DEP_LIBS += sanitizers
+endif
+
 ifeq ($(USE_SYSTEM_LIBUV), 0)
 DEP_LIBS += libuv
 endif
 
+ifeq ($(WITH_TRACY), 1)
+DEP_LIBS += libtracyclient
+endif
+
 ifeq ($(DISABLE_LIBUNWIND), 0)
 ifeq ($(USE_SYSTEM_LIBUNWIND), 0)
 ifeq ($(OS), Linux)
@@ -89,6 +96,10 @@ ifeq ($(USE_SYSTEM_LLVM), 0)
 DEP_LIBS += llvm
 endif
 
+ifeq ($(USE_SYSTEM_LLD), 0)
+DEP_LIBS += lld
+endif
+
 ifeq ($(USE_SYSTEM_PCRE), 0)
 DEP_LIBS += pcre
 endif
@@ -148,6 +159,16 @@ ifeq ($(USE_SYSTEM_P7ZIP), 0)
 DEP_LIBS += p7zip
 endif
 
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+ifeq ($(USE_BINARYBUILDER_LLVM), 0)
+DEP_LIBS += ittapi
+endif
+endif
+
+ifeq ($(WITH_ITTAPI),1)
+DEP_LIBS += ittapi
+endif
+
 
 # Only compile standalone LAPACK if we are not using OpenBLAS.
 # OpenBLAS otherwise compiles LAPACK as part of its build.
@@ -165,11 +186,13 @@ DEP_LIBS += libwhich
 endif
 endif
 
+DEP_LIBS_STAGED := $(DEP_LIBS)
+
 # list all targets
 DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \
 	openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \
 	objconv mbedtls libssh2 nghttp2 curl libgit2 libwhich zlib p7zip csl \
-	libsuitesparse
+	sanitizers libsuitesparse lld libtracyclient ittapi JuliaSyntax
 DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL)
 
 ifneq ($(USE_BINARYBUILDER_OPENBLAS),0)
@@ -204,6 +227,8 @@ distcleanall: $(addprefix distclean-, $(DEP_LIBS_ALL))
 getall: $(addprefix get-, $(DEP_LIBS_ALL))
 
 include $(SRCDIR)/csl.mk
+include $(SRCDIR)/sanitizers.mk
+include $(SRCDIR)/ittapi.mk
 include $(SRCDIR)/llvm.mk
 include $(SRCDIR)/libuv.mk
 include $(SRCDIR)/pcre.mk
@@ -226,5 +251,9 @@ include $(SRCDIR)/curl.mk
 include $(SRCDIR)/libgit2.mk
 include $(SRCDIR)/libwhich.mk
 include $(SRCDIR)/p7zip.mk
+include $(SRCDIR)/libtracyclient.mk
+
+# vendored Julia libs
+include $(SRCDIR)/JuliaSyntax.mk
 
 include $(SRCDIR)/tools/uninstallers.mk
diff --git a/deps/Versions.make b/deps/Versions.make
deleted file mode 100644
index 07dfc88f288f0..0000000000000
--- a/deps/Versions.make
+++ /dev/null
@@ -1,120 +0,0 @@
-## Dependencies and where to find them, listed in alphabetical order
-
-# To define a new dependency, you need to know the following pieces of information:
-#
-#  * The Makefile variable stem; for LibCURL this is just "CURL".
-#  * The JLL name; for GMP this is "GMP", while for LLVM it could be "LLVM_full" or "LLVM_full_assert"
-#  * The upstream source version; for dSFMT this is currently "2.2.3"
-#
-# Everything else will be auto-generated.  In particular, the version listed here
-# represents the upstream source version; the JLL binary version that gets downloaded is
-# controlled by the `Project.toml` files in `stdlib/XXX_jll/`.
-
-# Compiler Support Libraries
-CSL_JLL_NAME := CompilerSupportLibraries
-
-# Clang (paired with LLVM, only here as a JLL download)
-CLANG_JLL_NAME := Clang
-CLANG_JLL_VER  := 14.0.5+0
-
-# DSFMT
-DSFMT_VER := 2.2.4
-DSFMT_JLL_NAME := dSFMT
-
-# GMP
-GMP_VER := 6.2.1
-GMP_JLL_NAME := GMP
-
-# LibCURL
-CURL_VER := 7.83.1
-CURL_JLL_NAME := LibCURL
-
-# LAPACK, source-only
-LAPACK_VER := 3.9.0
-
-# LibGit2
-LIBGIT2_JLL_NAME := LibGit2
-
-# LibSSH2
-LIBSSH2_VER := 1.10.2
-LIBSSH2_JLL_NAME := LibSSH2
-
-# LibUV
-LIBUV_VER := 2
-LIBUV_JLL_NAME := LibUV
-
-# LLVM
-LLVM_VER := 14.0.2
-LLVM_ASSERT_JLL_VER := 14.0.5+0
-LLVM_JLL_NAME := libLLVM
-
-# LLVM_tools (downloads LLVM_jll to get things like `lit` and `opt`)
-LLVM_TOOLS_JLL_NAME := LLVM
-LLVM_TOOLS_JLL_VER := 14.0.5+0
-LLVM_TOOLS_ASSERT_JLL_VER := 14.0.5+0
-
-# LLD
-LLD_JLL_NAME := LLD
-LLD_JLL_VER := 14.0.5+0
-
-# LLVM libunwind
-LLVMUNWIND_VER := 12.0.1
-LLVMUNWIND_JLL_NAME := LLVMLibUnwind
-
-# MbedTLS
-MBEDTLS_VER := 2.28.0
-MBEDTLS_JLL_NAME := MbedTLS
-
-# MPFR
-MPFR_VER := 4.1.0
-MPFR_JLL_NAME := MPFR
-
-# nghttp2
-NGHTTP2_VER := 1.47.0
-NGHTTP2_JLL_NAME := nghttp2
-
-# Objconv (we don't ship this, so no need for a fake JLL; therefore we specify the JLL_VER here)
-OBJCONV_VER := 2.49.1
-OBJCONV_JLL_NAME := Objconv
-OBJCONV_JLL_VER  := 2.49.1+0
-
-# blastrampoline
-BLASTRAMPOLINE_VER := 5.1.1
-BLASTRAMPOLINE_JLL_NAME := libblastrampoline
-
-# OpenBLAS
-OPENBLAS_VER := 0.3.17
-OPENBLAS_JLL_NAME := OpenBLAS
-
-# OpenLibm
-OPENLIBM_VER := 0.8.1
-OPENLIBM_JLL_NAME := OpenLibm
-
-# Patchelf (we don't ship this or even use a JLL, we just always build it)
-PATCHELF_VER := 0.13
-
-# p7zip
-P7ZIP_VER := 17.04
-P7ZIP_JLL_NAME := p7zip
-
-# PCRE
-PCRE_VER := 10.40
-PCRE_JLL_NAME := PCRE2
-
-# SuiteSparse
-LIBSUITESPARSE_VER := 5.10.1
-LIBSUITESPARSE_JLL_NAME := SuiteSparse
-
-# unwind
-UNWIND_VER := 1.5.0
-UNWIND_VER_TAG := 1.5
-UNWIND_JLL_NAME := LibUnwind
-
-# zlib
-ZLIB_VER := 1.2.12
-ZLIB_JLL_NAME := Zlib
-
-# Specify the version of the Mozilla CA Certificate Store to obtain.
-# The versions of cacert.pem are identified by the date (YYYY-MM-DD) of their changes.
-# See https://curl.haxx.se/docs/caextract.html for more details.
-MOZILLA_CACERT_VERSION := 2022-02-01
diff --git a/deps/blastrampoline.mk b/deps/blastrampoline.mk
index bde21174a12a6..bd1cb65c6ae2d 100644
--- a/deps/blastrampoline.mk
+++ b/deps/blastrampoline.mk
@@ -6,16 +6,25 @@ BLASTRAMPOLINE_GIT_URL := https://github.com/JuliaLinearAlgebra/libblastrampolin
 BLASTRAMPOLINE_TAR_URL = https://api.github.com/repos/JuliaLinearAlgebra/libblastrampoline/tarball/$1
 $(eval $(call git-external,blastrampoline,BLASTRAMPOLINE,,,$(BUILDDIR)))
 
+BLASTRAMPOLINE_BUILD_OPTS := $(MAKE_COMMON) CC="$(CC) $(SANITIZE_OPTS)" CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)"
+BLASTRAMPOLINE_BUILD_OPTS += ARCH="$(ARCH)" OS="$(OS)"
+
 $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured: $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/source-extracted
 	mkdir -p $(dir $@)
 	echo 1 > $@
 
+BLASTRAMPOLINE_BUILD_ROOT := $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/src
 $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-compiled: $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured
-	cd $(dir $@)/src && $(MAKE) $(MAKE_COMMON)
+	cd $(dir $@)/src && $(MAKE) $(BLASTRAMPOLINE_BUILD_OPTS)
+ifeq ($(OS), WINNT)
+	# Windows doesn't like soft link, use hard link
+	cd $(BLASTRAMPOLINE_BUILD_ROOT)/build/ && \
+		cp -f --dereference --link libblastrampoline.dll libblastrampoline.dll
+endif
 	echo 1 > $@
 
 define BLASTRAMPOLINE_INSTALL
-	$(MAKE) -C $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/src $(MAKE_COMMON) install DESTDIR="$2"
+	$(MAKE) -C $(BLASTRAMPOLINE_BUILD_ROOT) install $(BLASTRAMPOLINE_BUILD_OPTS) DESTDIR="$2"
 endef
 $(eval $(call staged-install, \
 	blastrampoline,$(BLASTRAMPOLINE_SRC_DIR), \
@@ -23,6 +32,11 @@ $(eval $(call staged-install, \
 	$$(BLASTRAMPOLINE_OBJ_TARGET), \
 	$$(INSTALL_NAME_CMD)libblastrampoline.$$(SHLIB_EXT) $$(build_shlibdir)/libblastrampoline.$$(SHLIB_EXT)))
 
+clean-blastrampoline:
+	-$(MAKE) -C $(BLASTRAMPOLINE_BUILD_ROOT) clean
+	-$(RM) $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-compiled \
+		$(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured
+
 get-blastrampoline: $(BLASTRAMPOLINE_SRC_FILE)
 extract-blastrampoline: $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/source-extracted
 configure-blastrampoline: extract-blastrampoline
diff --git a/deps/blastrampoline.version b/deps/blastrampoline.version
index b034fe1402f36..616300377e3e6 100644
--- a/deps/blastrampoline.version
+++ b/deps/blastrampoline.version
@@ -1,2 +1,7 @@
-BLASTRAMPOLINE_BRANCH=v5.1.1
-BLASTRAMPOLINE_SHA1=bac2f810d523003fbb431ecc6e9ea81c8b86e2d6
+## jll artifact
+BLASTRAMPOLINE_JLL_NAME := libblastrampoline
+
+## source build
+BLASTRAMPOLINE_VER := 5.8.0
+BLASTRAMPOLINE_BRANCH=v5.8.0
+BLASTRAMPOLINE_SHA1=81316155d4838392e8462a92bcac3eebe9acd0c7
diff --git a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5 b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5
deleted file mode 100644
index 915ee5c4bb6bf..0000000000000
--- a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-22c097ca7784442f1f10733db7961cc3
diff --git a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512 b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512
deleted file mode 100644
index b824dbcb73a08..0000000000000
--- a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-915791ab9837f09db428060bd128e182dda38c8dc10e13f32f059eb8e8b477548e8ae2cd691522f98c88c510b78b2693018264b62d9cc76d5005ea8104d1539a
diff --git a/deps/checksums/ArgTools-4eccde45ddc27e4f7fc9094b2861c684e062adb2.tar.gz/md5 b/deps/checksums/ArgTools-4eccde45ddc27e4f7fc9094b2861c684e062adb2.tar.gz/md5
new file mode 100644
index 0000000000000..e0fdf5c76bcc5
--- /dev/null
+++ b/deps/checksums/ArgTools-4eccde45ddc27e4f7fc9094b2861c684e062adb2.tar.gz/md5
@@ -0,0 +1 @@
+87d5afd4bf8c66b6e598da521dafad41
diff --git a/deps/checksums/ArgTools-4eccde45ddc27e4f7fc9094b2861c684e062adb2.tar.gz/sha512 b/deps/checksums/ArgTools-4eccde45ddc27e4f7fc9094b2861c684e062adb2.tar.gz/sha512
new file mode 100644
index 0000000000000..3339d6d582c3e
--- /dev/null
+++ b/deps/checksums/ArgTools-4eccde45ddc27e4f7fc9094b2861c684e062adb2.tar.gz/sha512
@@ -0,0 +1 @@
+0586684ecb31c68840643fa0006a3bb5c042845b89182ca9c67dd6c92863e73c045f5c5dabe9e2d758c9c42288c957935dab1b48c91820cfcf2b240b6902f015
diff --git a/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/md5 b/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/md5
new file mode 100644
index 0000000000000..9c6e4e44927fe
--- /dev/null
+++ b/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/md5
@@ -0,0 +1 @@
+ee5afca99801e37fd3a42a9455ae986b
diff --git a/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/sha512 b/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/sha512
new file mode 100644
index 0000000000000..69a50a7282781
--- /dev/null
+++ b/deps/checksums/DelimitedFiles-db79c842f95f55b1f8d8037c0d3363ab21cd3b90.tar.gz/sha512
@@ -0,0 +1 @@
+2adec92de521df1668eb13f2903ffdb01efd6afa5f04ce6fbd1737caa4948f7b629cdda7f75a895853a0cd49dccf8b388860d5c19c29e4d4aad6c7f8fa6b7209
diff --git a/deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/md5 b/deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/md5
deleted file mode 100644
index 93a2d414cff7d..0000000000000
--- a/deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ba99caf3dbe9c1c40e67033898ccea2d
diff --git a/deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/sha512 b/deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/sha512
deleted file mode 100644
index 99c68c413c411..0000000000000
--- a/deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c39a90233d3d47431ac7bcbcc47cea9502a9e3a778caf1a67d8bd8364e273ccbe34c9c53f01ba4cfec97ca87b5e7bf9b7901889385061f6dd609413192635b40
diff --git a/deps/checksums/Distributed-41c01069533e22a6ce6b794746e4b3aa9f5a25cd.tar.gz/md5 b/deps/checksums/Distributed-41c01069533e22a6ce6b794746e4b3aa9f5a25cd.tar.gz/md5
new file mode 100644
index 0000000000000..91dfaf6968e2b
--- /dev/null
+++ b/deps/checksums/Distributed-41c01069533e22a6ce6b794746e4b3aa9f5a25cd.tar.gz/md5
@@ -0,0 +1 @@
+5c6463034c358a254074e86ab81720b1
diff --git a/deps/checksums/Distributed-41c01069533e22a6ce6b794746e4b3aa9f5a25cd.tar.gz/sha512 b/deps/checksums/Distributed-41c01069533e22a6ce6b794746e4b3aa9f5a25cd.tar.gz/sha512
new file mode 100644
index 0000000000000..0ad09e2e6bf50
--- /dev/null
+++ b/deps/checksums/Distributed-41c01069533e22a6ce6b794746e4b3aa9f5a25cd.tar.gz/sha512
@@ -0,0 +1 @@
+bebc472ed89afc8f48eabe0ec0673cf416c5fc3b2f3c51599b388f47e47e6c9905a8a033a3f4b5c38a29be515c43fd70ef44750e7d6f8c6ecd8dc491b3012ac0
diff --git a/deps/checksums/Downloads-78255d4927312181542b29ec6f063b0d5880189e.tar.gz/md5 b/deps/checksums/Downloads-78255d4927312181542b29ec6f063b0d5880189e.tar.gz/md5
deleted file mode 100644
index 1e6941e0ba3d2..0000000000000
--- a/deps/checksums/Downloads-78255d4927312181542b29ec6f063b0d5880189e.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-fe3fd0496c74e3bac89ff726dc332215
diff --git a/deps/checksums/Downloads-78255d4927312181542b29ec6f063b0d5880189e.tar.gz/sha512 b/deps/checksums/Downloads-78255d4927312181542b29ec6f063b0d5880189e.tar.gz/sha512
deleted file mode 100644
index a76890917a129..0000000000000
--- a/deps/checksums/Downloads-78255d4927312181542b29ec6f063b0d5880189e.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1be660af912922a79301fbe9a4d2dcfbcf3e9b0d7b6277ea8823e51771f1267c222e4b7c4e08b0496b2e735f6748f3f5aa3acb6b4bcc246ab76f4afbc45dc302
diff --git a/deps/checksums/Downloads-8a614d592810b15d17885838dec61da244a12e09.tar.gz/md5 b/deps/checksums/Downloads-8a614d592810b15d17885838dec61da244a12e09.tar.gz/md5
new file mode 100644
index 0000000000000..d59a1fcc37890
--- /dev/null
+++ b/deps/checksums/Downloads-8a614d592810b15d17885838dec61da244a12e09.tar.gz/md5
@@ -0,0 +1 @@
+e88a4630ab5cd62b1d2a6213cd2942b8
diff --git a/deps/checksums/Downloads-8a614d592810b15d17885838dec61da244a12e09.tar.gz/sha512 b/deps/checksums/Downloads-8a614d592810b15d17885838dec61da244a12e09.tar.gz/sha512
new file mode 100644
index 0000000000000..af6d2ecf264ac
--- /dev/null
+++ b/deps/checksums/Downloads-8a614d592810b15d17885838dec61da244a12e09.tar.gz/sha512
@@ -0,0 +1 @@
+988a20fdd50c11dd66318d00032f14ae4c8b7809b507639d9907744cfdd0cff75c299ac67f99e62c25f527de1557ea6f736ee54ef18f2c54fbee76035de88aa5
diff --git a/deps/checksums/JuliaSyntax-4f1731d6ce7c2465fc21ea245110b7a39f34658a.tar.gz/md5 b/deps/checksums/JuliaSyntax-4f1731d6ce7c2465fc21ea245110b7a39f34658a.tar.gz/md5
new file mode 100644
index 0000000000000..c2663955ec773
--- /dev/null
+++ b/deps/checksums/JuliaSyntax-4f1731d6ce7c2465fc21ea245110b7a39f34658a.tar.gz/md5
@@ -0,0 +1 @@
+8c9d9579eeab1ba40f978a32c9db9900
diff --git a/deps/checksums/JuliaSyntax-4f1731d6ce7c2465fc21ea245110b7a39f34658a.tar.gz/sha512 b/deps/checksums/JuliaSyntax-4f1731d6ce7c2465fc21ea245110b7a39f34658a.tar.gz/sha512
new file mode 100644
index 0000000000000..46647cb3e432b
--- /dev/null
+++ b/deps/checksums/JuliaSyntax-4f1731d6ce7c2465fc21ea245110b7a39f34658a.tar.gz/sha512
@@ -0,0 +1 @@
+1bdad624f61482b55deba8727fea1c087bfaea9e1f8afa3b44b984441fb7e663dac067baa4a96ae2d4cbd4a46ae8c87e9d20d2dfcd17046ad194711304184e57
diff --git a/deps/checksums/LazyArtifacts-e9a36338d5d0dfa4b222f4e11b446cbb7ea5836c.tar.gz/md5 b/deps/checksums/LazyArtifacts-e9a36338d5d0dfa4b222f4e11b446cbb7ea5836c.tar.gz/md5
new file mode 100644
index 0000000000000..4d14c85460418
--- /dev/null
+++ b/deps/checksums/LazyArtifacts-e9a36338d5d0dfa4b222f4e11b446cbb7ea5836c.tar.gz/md5
@@ -0,0 +1 @@
+8355c253fadfc3f9222e05cb67845dd6
diff --git a/deps/checksums/LazyArtifacts-e9a36338d5d0dfa4b222f4e11b446cbb7ea5836c.tar.gz/sha512 b/deps/checksums/LazyArtifacts-e9a36338d5d0dfa4b222f4e11b446cbb7ea5836c.tar.gz/sha512
new file mode 100644
index 0000000000000..d44f215e67673
--- /dev/null
+++ b/deps/checksums/LazyArtifacts-e9a36338d5d0dfa4b222f4e11b446cbb7ea5836c.tar.gz/sha512
@@ -0,0 +1 @@
+6d965199ed02446e694789a38f05249ff60ac00f8295fe32bf91a79cca34649829e38eaf46cc0b0b72ff2df7e184c2eaeb610600ebb5158251b331c61e9dfc5d
diff --git a/deps/checksums/NetworkOptions-4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a.tar.gz/md5 b/deps/checksums/NetworkOptions-4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a.tar.gz/md5
deleted file mode 100644
index f1a62f3d38760..0000000000000
--- a/deps/checksums/NetworkOptions-4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-47be3a2c46e5279714bcb7837127c08a
diff --git a/deps/checksums/NetworkOptions-4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a.tar.gz/sha512 b/deps/checksums/NetworkOptions-4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a.tar.gz/sha512
deleted file mode 100644
index 27b5e2397013c..0000000000000
--- a/deps/checksums/NetworkOptions-4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-df45c5fa22619da686481b78de76b60573c798c14a9bbf3a9dd449f52ffca1f2b8dd3b247e2a7679d3dd55ba115787f3734cf03d29a10eba8fecdd78890891b5
diff --git a/deps/checksums/NetworkOptions-aab83e5dd900c874826d430e25158dff43559d78.tar.gz/md5 b/deps/checksums/NetworkOptions-aab83e5dd900c874826d430e25158dff43559d78.tar.gz/md5
new file mode 100644
index 0000000000000..433b89eaedcc2
--- /dev/null
+++ b/deps/checksums/NetworkOptions-aab83e5dd900c874826d430e25158dff43559d78.tar.gz/md5
@@ -0,0 +1 @@
+73b81916dc08382ad34b8110983b61ad
diff --git a/deps/checksums/NetworkOptions-aab83e5dd900c874826d430e25158dff43559d78.tar.gz/sha512 b/deps/checksums/NetworkOptions-aab83e5dd900c874826d430e25158dff43559d78.tar.gz/sha512
new file mode 100644
index 0000000000000..01f34b83dc80b
--- /dev/null
+++ b/deps/checksums/NetworkOptions-aab83e5dd900c874826d430e25158dff43559d78.tar.gz/sha512
@@ -0,0 +1 @@
+a860163ef4fa5c2ff8d8712fff5ad5bb16e7697acc53538a0dda4435b910f4ad94f9c206e0d81864762361fdbcfbc57eb2e1708b33326f4f1c8d95997a495fe3
diff --git a/deps/checksums/Pkg-85f1e5564d733c9b04199d3523aeef0607f564e2.tar.gz/md5 b/deps/checksums/Pkg-85f1e5564d733c9b04199d3523aeef0607f564e2.tar.gz/md5
new file mode 100644
index 0000000000000..aa3fbc2f13b9f
--- /dev/null
+++ b/deps/checksums/Pkg-85f1e5564d733c9b04199d3523aeef0607f564e2.tar.gz/md5
@@ -0,0 +1 @@
+7d9833fd8933f957c7060ff8ff61909d
diff --git a/deps/checksums/Pkg-85f1e5564d733c9b04199d3523aeef0607f564e2.tar.gz/sha512 b/deps/checksums/Pkg-85f1e5564d733c9b04199d3523aeef0607f564e2.tar.gz/sha512
new file mode 100644
index 0000000000000..6608a48614ed2
--- /dev/null
+++ b/deps/checksums/Pkg-85f1e5564d733c9b04199d3523aeef0607f564e2.tar.gz/sha512
@@ -0,0 +1 @@
+a4634d94c5136b59aa1683a5c82fd9d8cc0d27483c2042f60fba88ebf7ca54bb5c0c6244fb69054b89aa480742cce81e5e6e69905f07bd570ca61019896e6ee3
diff --git a/deps/checksums/Pkg-98d0cc276cc59817eb9c2e18e747fe027d7282a2.tar.gz/md5 b/deps/checksums/Pkg-98d0cc276cc59817eb9c2e18e747fe027d7282a2.tar.gz/md5
deleted file mode 100644
index 927835aebd23b..0000000000000
--- a/deps/checksums/Pkg-98d0cc276cc59817eb9c2e18e747fe027d7282a2.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-8c6bc28980648ed4b7315544c79c8b75
diff --git a/deps/checksums/Pkg-98d0cc276cc59817eb9c2e18e747fe027d7282a2.tar.gz/sha512 b/deps/checksums/Pkg-98d0cc276cc59817eb9c2e18e747fe027d7282a2.tar.gz/sha512
deleted file mode 100644
index ec9e546ace65a..0000000000000
--- a/deps/checksums/Pkg-98d0cc276cc59817eb9c2e18e747fe027d7282a2.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-84b8345212bb08e8801e049f2b2678edcde05bba7223965f2d75c902381b842555ea737ab084af2faa3804a7047c4b2cf9be533b153941c3dc761ddd459b4c43
diff --git a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/md5 b/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/md5
deleted file mode 100644
index f682cf3518658..0000000000000
--- a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-de53629eb0b1ce98ac6b245bdbf14e9d
diff --git a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/sha512 b/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/sha512
deleted file mode 100644
index 870098ef7aada..0000000000000
--- a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-71cdc58b03cc4f42f8c4b9c2353d6f94d77b4ac5c9d374387d435c57ba85e966f3be4e8c8447b34e184cb8e665c42b3cd2c9d9742c86f7fb5c71a85df5087966
diff --git a/deps/checksums/SHA-aaf2df61ff8c3898196587a375d3cf213bd40b41.tar.gz/md5 b/deps/checksums/SHA-aaf2df61ff8c3898196587a375d3cf213bd40b41.tar.gz/md5
new file mode 100644
index 0000000000000..3b51189e187a3
--- /dev/null
+++ b/deps/checksums/SHA-aaf2df61ff8c3898196587a375d3cf213bd40b41.tar.gz/md5
@@ -0,0 +1 @@
+dec1d21e890c88e57a0d4eb085633d57
diff --git a/deps/checksums/SHA-aaf2df61ff8c3898196587a375d3cf213bd40b41.tar.gz/sha512 b/deps/checksums/SHA-aaf2df61ff8c3898196587a375d3cf213bd40b41.tar.gz/sha512
new file mode 100644
index 0000000000000..cbe1ff2eea29e
--- /dev/null
+++ b/deps/checksums/SHA-aaf2df61ff8c3898196587a375d3cf213bd40b41.tar.gz/sha512
@@ -0,0 +1 @@
+fb611794a539c6725000ff6eda13e0af5dd3f82e22466bdff650ffa0e4edbba5ac4707195035531645a4161ecbb5f873f4f6b1040ce33e9b1adf9c1d34187718
diff --git a/deps/checksums/SparseArrays-0759ceac6e89d03b9abcea27ba1b7bfa6d806721.tar.gz/md5 b/deps/checksums/SparseArrays-0759ceac6e89d03b9abcea27ba1b7bfa6d806721.tar.gz/md5
deleted file mode 100644
index cd2013946933b..0000000000000
--- a/deps/checksums/SparseArrays-0759ceac6e89d03b9abcea27ba1b7bfa6d806721.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-9e1b18aaaeee1ed2f1c6f5b26541a3c8
diff --git a/deps/checksums/SparseArrays-0759ceac6e89d03b9abcea27ba1b7bfa6d806721.tar.gz/sha512 b/deps/checksums/SparseArrays-0759ceac6e89d03b9abcea27ba1b7bfa6d806721.tar.gz/sha512
deleted file mode 100644
index cddb9ab51aa46..0000000000000
--- a/deps/checksums/SparseArrays-0759ceac6e89d03b9abcea27ba1b7bfa6d806721.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e799c89d4b9079690ef129e61992f2294ad173cb9f9c9e8106220e11483986ff024cf65221a91d9e19e446eae836680f376ee8eeb66b2f2e971e5e055eac4668
diff --git a/deps/checksums/SparseArrays-2bbdd7a12ead8207593655c541ba347761a9c663.tar.gz/md5 b/deps/checksums/SparseArrays-2bbdd7a12ead8207593655c541ba347761a9c663.tar.gz/md5
deleted file mode 100644
index 6ade8ae4a3ded..0000000000000
--- a/deps/checksums/SparseArrays-2bbdd7a12ead8207593655c541ba347761a9c663.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-db4be14b084094a3a083160fce142b01
diff --git a/deps/checksums/SparseArrays-2bbdd7a12ead8207593655c541ba347761a9c663.tar.gz/sha512 b/deps/checksums/SparseArrays-2bbdd7a12ead8207593655c541ba347761a9c663.tar.gz/sha512
deleted file mode 100644
index a50b52aa22586..0000000000000
--- a/deps/checksums/SparseArrays-2bbdd7a12ead8207593655c541ba347761a9c663.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-886dbede7bbc484cb8aa4c5ac7d05940890f00bff32b31a489dcbb873c7f6d956d15ab92638d67b7ba3638f429371af57be76a4949a860efa52d457941ed7f47
diff --git a/deps/checksums/SparseArrays-f154de2b6801ec8d5afaf58b73b830c8e71013c3.tar.gz/md5 b/deps/checksums/SparseArrays-f154de2b6801ec8d5afaf58b73b830c8e71013c3.tar.gz/md5
new file mode 100644
index 0000000000000..9a38667523ea0
--- /dev/null
+++ b/deps/checksums/SparseArrays-f154de2b6801ec8d5afaf58b73b830c8e71013c3.tar.gz/md5
@@ -0,0 +1 @@
+e4c17513749ebdd898daf62f932fc9b3
diff --git a/deps/checksums/SparseArrays-f154de2b6801ec8d5afaf58b73b830c8e71013c3.tar.gz/sha512 b/deps/checksums/SparseArrays-f154de2b6801ec8d5afaf58b73b830c8e71013c3.tar.gz/sha512
new file mode 100644
index 0000000000000..6547864574540
--- /dev/null
+++ b/deps/checksums/SparseArrays-f154de2b6801ec8d5afaf58b73b830c8e71013c3.tar.gz/sha512
@@ -0,0 +1 @@
+15a6dfcf64ebc2ff3893373107f317a1abf8518d852219092ead1bdf1db602dc4ac36d836e0c8412b6b1e0b42147deab95d355741fc72234b52f3399a56ba8f9
diff --git a/deps/checksums/Statistics-68869af06e8cdeb7aba1d5259de602da7328057f.tar.gz/md5 b/deps/checksums/Statistics-68869af06e8cdeb7aba1d5259de602da7328057f.tar.gz/md5
new file mode 100644
index 0000000000000..9ba42f555d535
--- /dev/null
+++ b/deps/checksums/Statistics-68869af06e8cdeb7aba1d5259de602da7328057f.tar.gz/md5
@@ -0,0 +1 @@
+01b84d67052d1558e51619d5159e7a8b
diff --git a/deps/checksums/Statistics-68869af06e8cdeb7aba1d5259de602da7328057f.tar.gz/sha512 b/deps/checksums/Statistics-68869af06e8cdeb7aba1d5259de602da7328057f.tar.gz/sha512
new file mode 100644
index 0000000000000..31c9c6ca42cec
--- /dev/null
+++ b/deps/checksums/Statistics-68869af06e8cdeb7aba1d5259de602da7328057f.tar.gz/sha512
@@ -0,0 +1 @@
+6ab55ba6f93d2e8b34b19f53cb51a4bfc97b336d451b98f7b95ff81f04fee4fb90a2e4d04aa4bbf3ccffc99c36d9c82c9d00dbae283474308de4a27a91c2e0b7
diff --git a/deps/checksums/Statistics-c38dd4418738bc595bd8229eb4ee91b717de64af.tar.gz/md5 b/deps/checksums/Statistics-c38dd4418738bc595bd8229eb4ee91b717de64af.tar.gz/md5
deleted file mode 100644
index 69ee3fd518626..0000000000000
--- a/deps/checksums/Statistics-c38dd4418738bc595bd8229eb4ee91b717de64af.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-20ea909b7e95726c2aec676501b28af2
diff --git a/deps/checksums/Statistics-c38dd4418738bc595bd8229eb4ee91b717de64af.tar.gz/sha512 b/deps/checksums/Statistics-c38dd4418738bc595bd8229eb4ee91b717de64af.tar.gz/sha512
deleted file mode 100644
index 7229c045089a6..0000000000000
--- a/deps/checksums/Statistics-c38dd4418738bc595bd8229eb4ee91b717de64af.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-fb102582b868571c2c8d95f723df54aee2fe5693badb6171296207e267c4fbe0bcc8003c9e01ce6d3245ae79f977cad86fe18c14ef2cf440a764c10d11941d1e
diff --git a/deps/checksums/StyledStrings-61e7b105b157b40807ed0b4840166a25b0948549.tar.gz/md5 b/deps/checksums/StyledStrings-61e7b105b157b40807ed0b4840166a25b0948549.tar.gz/md5
new file mode 100644
index 0000000000000..5d9fdfb8ebd7e
--- /dev/null
+++ b/deps/checksums/StyledStrings-61e7b105b157b40807ed0b4840166a25b0948549.tar.gz/md5
@@ -0,0 +1 @@
+311f5b6b7e109fea852303ec09324b00
diff --git a/deps/checksums/StyledStrings-61e7b105b157b40807ed0b4840166a25b0948549.tar.gz/sha512 b/deps/checksums/StyledStrings-61e7b105b157b40807ed0b4840166a25b0948549.tar.gz/sha512
new file mode 100644
index 0000000000000..8a3c1c9238514
--- /dev/null
+++ b/deps/checksums/StyledStrings-61e7b105b157b40807ed0b4840166a25b0948549.tar.gz/sha512
@@ -0,0 +1 @@
+fbf4b2fdde4fd2c2bb321b915d9833ea34952aec1bf9fdcf51e229e6bae5fc0fbfd30db31e410c634955144c6a4295289a313185621e2c5f16b06f22a049739f
diff --git a/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5 b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5
new file mode 100644
index 0000000000000..2f81a0d9191b5
--- /dev/null
+++ b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5
@@ -0,0 +1 @@
+46541001073d1c3c85e18d910f8308f3
diff --git a/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512 b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512
new file mode 100644
index 0000000000000..e2eb44845e276
--- /dev/null
+++ b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512
@@ -0,0 +1 @@
+f7470a447b934ca9315e216a07b97e363f11bc93186f9aa057b20b2d05092c58ae4f1b733de362de4a0730861c00be4ca5588d0b3ba65f018c1798b9122b9672
diff --git a/deps/checksums/Tar-56062695b92920c8b75e997fb0c8c3b015d04b78.tar.gz/md5 b/deps/checksums/Tar-56062695b92920c8b75e997fb0c8c3b015d04b78.tar.gz/md5
deleted file mode 100644
index 3be44f2d90718..0000000000000
--- a/deps/checksums/Tar-56062695b92920c8b75e997fb0c8c3b015d04b78.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2e6f1656df70500842c4de4d0f941f89
diff --git a/deps/checksums/Tar-56062695b92920c8b75e997fb0c8c3b015d04b78.tar.gz/sha512 b/deps/checksums/Tar-56062695b92920c8b75e997fb0c8c3b015d04b78.tar.gz/sha512
deleted file mode 100644
index 7c1626b841ee0..0000000000000
--- a/deps/checksums/Tar-56062695b92920c8b75e997fb0c8c3b015d04b78.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2d2ed113bb9e9469b29a680172d1ab06b2ad8e0574788c3eb16467e621b9127c6159afbddd1014694d46bf9945cfcecbe8cbc315448e0a06fe14b45a4b10ae83
diff --git a/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/md5 b/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/md5
new file mode 100644
index 0000000000000..921ffb0a2561e
--- /dev/null
+++ b/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/md5
@@ -0,0 +1 @@
+b3d21b3f38cd106e64fa9d058d095651
diff --git a/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/sha512 b/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/sha512
new file mode 100644
index 0000000000000..cbf6ad4952258
--- /dev/null
+++ b/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/sha512
@@ -0,0 +1 @@
+61bd3555de7a2cec265ae72d58b4635f84ec75b993b9dab2dc5be64375b6057972a2786337f90742ad3b91c57f5008372a3a4f8a5b589e2cf4d5cd1a8056e03c
diff --git a/deps/checksums/blastrampoline b/deps/checksums/blastrampoline
index 0276f885e5768..d72a584fd1b0c 100644
--- a/deps/checksums/blastrampoline
+++ b/deps/checksums/blastrampoline
@@ -1,34 +1,34 @@
-blastrampoline-bac2f810d523003fbb431ecc6e9ea81c8b86e2d6.tar.gz/md5/070218f52aee906ebebb035e6c504aef
-blastrampoline-bac2f810d523003fbb431ecc6e9ea81c8b86e2d6.tar.gz/sha512/eff4c34f19fd444cf3379c81836db82848287aca6106d952127565a0ee2d36797fa36b9f48b77db6a9a0c27dd307400385236ed335d7e58ecc7ec92de32af2c6
-libblastrampoline.v5.1.1+0.aarch64-apple-darwin.tar.gz/md5/a6475f23420c26d97b1baf1e37cc13b5
-libblastrampoline.v5.1.1+0.aarch64-apple-darwin.tar.gz/sha512/96386a4e0b57bc50cbefbb0eb75b037571e3d9ae3900122bb8d4f7f14db017b9e8a6dd2eceff07c9880dda2e072b89df7d21432fd5a08bef87a282cfc3bfbb82
-libblastrampoline.v5.1.1+0.aarch64-linux-gnu.tar.gz/md5/c28450dc1999d9304414288b267d72f2
-libblastrampoline.v5.1.1+0.aarch64-linux-gnu.tar.gz/sha512/19303d32b316cbce29f93dfb713987d6567946262158f1aa5f447a86197843d2875915fc6282264f49747237844f8cf32f9e5b2a0d6f67d514474823e7929de5
-libblastrampoline.v5.1.1+0.aarch64-linux-musl.tar.gz/md5/a40854c55588b88c57994fc8e3d3247a
-libblastrampoline.v5.1.1+0.aarch64-linux-musl.tar.gz/sha512/c2fbc67fd8ab61bc854722949ac87d19fb7ae3e732f01e9ed855204605ef1b2756db4272688807a9928eba3cfe949099a3e74ea68c432219c023216d82e44b1b
-libblastrampoline.v5.1.1+0.armv6l-linux-gnueabihf.tar.gz/md5/2d564a40dafc6e3001bcb13f2460306a
-libblastrampoline.v5.1.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/2ba59a5ea48bb4e9fafc5a34b8bc09fda9f4aa15917e41a87410d888ff69832fbd54a6ed6a401e0686dd2fd46e90603969ee42497691270921cf5688c8a1d2f7
-libblastrampoline.v5.1.1+0.armv6l-linux-musleabihf.tar.gz/md5/41cd8967ea13f76301e2760ce20b16b9
-libblastrampoline.v5.1.1+0.armv6l-linux-musleabihf.tar.gz/sha512/40f69ae9e352215e8faa65ca8451d5850090cafc3b71207df2f588ebd06d247fab4af02a544e5389a9e5a89a38d5a89f71ad8d1bf7bc695d9cf8903e9654ac87
-libblastrampoline.v5.1.1+0.armv7l-linux-gnueabihf.tar.gz/md5/a689ed70eba7f191a32508c5e266952a
-libblastrampoline.v5.1.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/47e5e1f1ef3f7dbf22c48bc9a09c0abb5abb967885c288c74b51249a22aab0cf475887e612f219e5abb905eab3018d5b5225682bfcc908debd6ff8d509e1a23c
-libblastrampoline.v5.1.1+0.armv7l-linux-musleabihf.tar.gz/md5/ed08534ca3f065d391c2484c5fe6fd6b
-libblastrampoline.v5.1.1+0.armv7l-linux-musleabihf.tar.gz/sha512/014d10a154ce3d35dd428dae52d4d52445d1cc1d501aed5f490332b663438a000b02992946b0ce18bf2e829339a35e163f684568f3484c83ca4f8584da4cc405
-libblastrampoline.v5.1.1+0.i686-linux-gnu.tar.gz/md5/b5f315c6e3b719991f4750d0451ac13b
-libblastrampoline.v5.1.1+0.i686-linux-gnu.tar.gz/sha512/b67a478b532b664c1729a151d62f070308806476a2ca38bde3d20648676f1ed7f41ada42650641f98eb165beba984d40ddbe667b49b99213321c54d72c2f0f81
-libblastrampoline.v5.1.1+0.i686-linux-musl.tar.gz/md5/69b0b2128c7b482bc6f7b769d30322cc
-libblastrampoline.v5.1.1+0.i686-linux-musl.tar.gz/sha512/97621e6f17deb137ba63af5a413efa67bc60ccd6a6776ff6fad8b1393e8a4b9a4586b5a4015471a64314b85e81e8421d5fa85b55f7bc48f4affd30d89a5d4082
-libblastrampoline.v5.1.1+0.i686-w64-mingw32.tar.gz/md5/b16bdd51b0d3336bca03374cd23884da
-libblastrampoline.v5.1.1+0.i686-w64-mingw32.tar.gz/sha512/f323fae462a4d1210fbab1f6b253224b385c5a3c5e259cd4ce57fc4f77ba53293b8f14a3cd9db1f7c8ee2dab461aa36d62a8ec8e9693f3c257b8401de6550cc1
-libblastrampoline.v5.1.1+0.powerpc64le-linux-gnu.tar.gz/md5/d8f0d6980b97ae48a9d97dbfa28e6d1c
-libblastrampoline.v5.1.1+0.powerpc64le-linux-gnu.tar.gz/sha512/f1137c5357153c0c309277d39398c2338297be73de995ae083397da5c170c4b1bec6939b6e160601b98ea40c42f9b563ac5ac1625341cde1ece6b1b5f5ec01f5
-libblastrampoline.v5.1.1+0.x86_64-apple-darwin.tar.gz/md5/088b8d27b76be56fcd7ed4383e5912d3
-libblastrampoline.v5.1.1+0.x86_64-apple-darwin.tar.gz/sha512/52741282b55f1ee0ded1aa63e4313a84be0862209f8a4439ef2076a03010c0d91083ca35cacbf187de77817ad864625a3dfd2769881764e3d9434ae387405778
-libblastrampoline.v5.1.1+0.x86_64-linux-gnu.tar.gz/md5/562215ad47d93c83c6587051ef201f0c
-libblastrampoline.v5.1.1+0.x86_64-linux-gnu.tar.gz/sha512/9217f6afa0f3ef534c361fc09d14bfdf8322a8942c5e2ca0fc9234839e48d56339f03126aa9706b2ef067f88433d79f7d6f8824bb5763b99f64ef42919c3ab0b
-libblastrampoline.v5.1.1+0.x86_64-linux-musl.tar.gz/md5/bd9b17ebc05ae50fc125c3cf1df8f990
-libblastrampoline.v5.1.1+0.x86_64-linux-musl.tar.gz/sha512/68b0ea95d404508038ca84b426c3ec02ae98b129e92a0f661766ab08bf38750f92a8aa41c53327bc2f6787b42504025011eaf79bb98febace4c41e628caf2094
-libblastrampoline.v5.1.1+0.x86_64-unknown-freebsd.tar.gz/md5/0308d4a7312bacc62446438f4d4b6894
-libblastrampoline.v5.1.1+0.x86_64-unknown-freebsd.tar.gz/sha512/d4085d81e85b9c1ffefd5a6147deea9f04436e1145eca73e5b63dba048aeaab9c497df725dc3104a77c834597363b7205ef7270f96ae94f06c950f7574e25d07
-libblastrampoline.v5.1.1+0.x86_64-w64-mingw32.tar.gz/md5/2a883d986c884be08ef332bcdc3ab52e
-libblastrampoline.v5.1.1+0.x86_64-w64-mingw32.tar.gz/sha512/dacbcbe09910b7965448b22f3dbd55945bbe22d06c60a92d2c97da83f0b08d00278ff870eada470213fe22fa3c8acfcc0be8b753a885d98898d048e896c909ad
+blastrampoline-81316155d4838392e8462a92bcac3eebe9acd0c7.tar.gz/md5/0478361eac783b99002b1ad985182f05
+blastrampoline-81316155d4838392e8462a92bcac3eebe9acd0c7.tar.gz/sha512/2489ce5770a9861889a2d07e61440ba4f233a92efd4a3544747f83320e0e7a229a8fe01553d99f5f1d98713316f2506daf0adb7d024a46e32b3de1bb2966d637
+libblastrampoline.v5.8.0+1.aarch64-apple-darwin.tar.gz/md5/a28837b9838fef2b3831de3278ec7949
+libblastrampoline.v5.8.0+1.aarch64-apple-darwin.tar.gz/sha512/111ac2fe5f8f8102f2f7c9e9e6aa1d1a12d2db941238c949ff8e64b30335e8b2f6ecce0d5f577879c231eb839c06e259302b709f3d34e94a97047bfa984222f6
+libblastrampoline.v5.8.0+1.aarch64-linux-gnu.tar.gz/md5/9e781a026e03118df81347fb90f10d45
+libblastrampoline.v5.8.0+1.aarch64-linux-gnu.tar.gz/sha512/89469f32a666efd46437351a8fb16758c35e5aecc563d202b480c10ddf9fa5350a5a321076b79b0a1a07ec2cea0b73aa5c28979cc382a198fa96cca0b5899d25
+libblastrampoline.v5.8.0+1.aarch64-linux-musl.tar.gz/md5/b7acda2fdd157bbb183d0dd33643beef
+libblastrampoline.v5.8.0+1.aarch64-linux-musl.tar.gz/sha512/cf4125a47334fe2ec0d5a4b11624b12e1366ec031500218f680ad5a53152b9d752c0c02a0b92d0e07f3eb21f2f8f58d0c587438a4869a72197bbd5e91531369d
+libblastrampoline.v5.8.0+1.armv6l-linux-gnueabihf.tar.gz/md5/eafabd99fb1287d495acb8efb8091fde
+libblastrampoline.v5.8.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/63ff4e6bc400fa8ee713a1c5ae4af0a8e152d49860c6f5e94a17e426ad9f780d41cc0f84d33c75ea5347af1a53f07fc012798d603b6a94ea39f37cfd651a0719
+libblastrampoline.v5.8.0+1.armv6l-linux-musleabihf.tar.gz/md5/9788f74b375ef6b84c16c080f2be5bdd
+libblastrampoline.v5.8.0+1.armv6l-linux-musleabihf.tar.gz/sha512/f00ebf794927404e2294a2fbb759b1e3e57836c7f683525fac0b2ac570da2c75904e43f154cf76fce310a624f9b35fbd40e6c7757882bb6f30db790f4221a543
+libblastrampoline.v5.8.0+1.armv7l-linux-gnueabihf.tar.gz/md5/4492bace63d8274d68ecdaa735e47e99
+libblastrampoline.v5.8.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/8868283e6c5224b80145fdfd17f13f713053ba94e49c170f38f0cbf9f794185d7dec9c107ce65dc76121d3ac5b21d2f3857f619d8279bede86a906230ff59a71
+libblastrampoline.v5.8.0+1.armv7l-linux-musleabihf.tar.gz/md5/d66b6ed1d4e5f6a130f36791063e651d
+libblastrampoline.v5.8.0+1.armv7l-linux-musleabihf.tar.gz/sha512/414ad07574a6e9aa670bbfea13eaea11da13129c9ccb4193cad708014c31493ff10ff427558b90cb16040fa64c8a325c2e375e3310c39fb37bb3e7fdb6a72a5f
+libblastrampoline.v5.8.0+1.i686-linux-gnu.tar.gz/md5/595199a3a01174cfa4d9ce3407bf30dc
+libblastrampoline.v5.8.0+1.i686-linux-gnu.tar.gz/sha512/02c3b0c3c0a411d5090a081f3bbbe38aaae40eaa5fe63d0690e0582e233cd9ce76483922557d4f65dc457e29a4e84d86ee5af20a60b082aec7bec4ca8607c1ca
+libblastrampoline.v5.8.0+1.i686-linux-musl.tar.gz/md5/5832d0044842cb84f4e1e1b0a04b8205
+libblastrampoline.v5.8.0+1.i686-linux-musl.tar.gz/sha512/d28954d0feef6a33fa0bfeb59acb68821222d36a4e353eaf41936ee2c9aace719c2d0f0b0f080eafe2baecc67a29de4cacc0446aac776bbb615c4426d35c9c8f
+libblastrampoline.v5.8.0+1.i686-w64-mingw32.tar.gz/md5/46391ac222980a0ad2c2d6d2b54db26d
+libblastrampoline.v5.8.0+1.i686-w64-mingw32.tar.gz/sha512/6dd3434648a297639ef327efa5827d6aea70df551774e52ba395cdf187bfb603d365eed84780913fda5f3d12512ac54ccf784da3cf6317671ab346211f5984b3
+libblastrampoline.v5.8.0+1.powerpc64le-linux-gnu.tar.gz/md5/5f76f5c6a88c0caaa6419ba212f8cb94
+libblastrampoline.v5.8.0+1.powerpc64le-linux-gnu.tar.gz/sha512/785071e682075b2cebd992394e66169f4ee2db3a8e23affb88dc05d9abf55f49d597b2a7400a13c83ad106ad825b5ee666b01f8625e51aec267132573273991e
+libblastrampoline.v5.8.0+1.x86_64-apple-darwin.tar.gz/md5/21beb51d448bd22e4608a16b3f4fde05
+libblastrampoline.v5.8.0+1.x86_64-apple-darwin.tar.gz/sha512/620ba64d93ef416e483f813617aa313957282d8361f920b5444702fa911ff0051d1f8a8814b5fa0b082fd4dc77d96cb8b763937c786959bbc97cbb6131617152
+libblastrampoline.v5.8.0+1.x86_64-linux-gnu.tar.gz/md5/14c1045ba4d400f490ddea5343a46f04
+libblastrampoline.v5.8.0+1.x86_64-linux-gnu.tar.gz/sha512/0fdae83f4df93b28951521cf426736367f568c1e76fb68eea42b045cc9a288b6836abb3206a6d61e4f88adcf198553e911c45231aecb0f552e06de28eb3bec54
+libblastrampoline.v5.8.0+1.x86_64-linux-musl.tar.gz/md5/59b110676fcb2fcfdcf670a5d435d555
+libblastrampoline.v5.8.0+1.x86_64-linux-musl.tar.gz/sha512/57a5022e9fabc0637a29f3c32f6180cb4f6a90282191232e299df6cea5265b535e4a0af4fde15c8fe80e5a59edea0fae96dd3a510f5720ecd78e85a2a9ffbfe0
+libblastrampoline.v5.8.0+1.x86_64-unknown-freebsd.tar.gz/md5/c22da112cfc7f9fa0f103d08f4b78965
+libblastrampoline.v5.8.0+1.x86_64-unknown-freebsd.tar.gz/sha512/ace02fac0dc6df472456007a081e0aaa85a6b17290321fb214349aac0f2d0f893df602dca28fc26ddfd4ed574fd9063bacff343249e5a1109f5d92dc9cb7a1d3
+libblastrampoline.v5.8.0+1.x86_64-w64-mingw32.tar.gz/md5/34fdc53745245887f968f420b2f02ed9
+libblastrampoline.v5.8.0+1.x86_64-w64-mingw32.tar.gz/sha512/bbf478736b7bd57b340ccd5b6744d526a7a95fc524d30fdf9af6e9d79285641be26fae5f9e5302d71a5be76b05c379e969a829e259d8100ba9c6ce202b632b3d
diff --git a/deps/checksums/cacert-2022-02-01.pem/md5 b/deps/checksums/cacert-2022-02-01.pem/md5
deleted file mode 100644
index e287f024b8e18..0000000000000
--- a/deps/checksums/cacert-2022-02-01.pem/md5
+++ /dev/null
@@ -1 +0,0 @@
-3b89462e00eba6769fae30eebfb9997f
diff --git a/deps/checksums/cacert-2022-02-01.pem/sha512 b/deps/checksums/cacert-2022-02-01.pem/sha512
deleted file mode 100644
index a5d8840598343..0000000000000
--- a/deps/checksums/cacert-2022-02-01.pem/sha512
+++ /dev/null
@@ -1 +0,0 @@
-75f5222c23d14d194856d3fa58eb605a6400cbf0068e208e1bc75a4821f841c39a95dde161b904db54ce922efa384796ad5f2e2b6ef75327475f711e72652388
diff --git a/deps/checksums/cacert-2023-01-10.pem/md5 b/deps/checksums/cacert-2023-01-10.pem/md5
new file mode 100644
index 0000000000000..92063050b50f3
--- /dev/null
+++ b/deps/checksums/cacert-2023-01-10.pem/md5
@@ -0,0 +1 @@
+e7cf471ba7c88f4e313f492a76e624b3
diff --git a/deps/checksums/cacert-2023-01-10.pem/sha512 b/deps/checksums/cacert-2023-01-10.pem/sha512
new file mode 100644
index 0000000000000..d3322e5890f81
--- /dev/null
+++ b/deps/checksums/cacert-2023-01-10.pem/sha512
@@ -0,0 +1 @@
+08cd35277bf2260cb3232d7a7ca3cce6b2bd58af9221922d2c6e9838a19c2f96d1ca6d77f3cc2a3ab611692f9fec939e9b21f67442282e867a487b0203ee0279
diff --git a/deps/checksums/clang b/deps/checksums/clang
index 465223ff16402..78743a32a2956 100644
--- a/deps/checksums/clang
+++ b/deps/checksums/clang
@@ -1,6 +1,108 @@
-Clang.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/844e9b145b056f96f7be76a9c0e6c84f
-Clang.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/0e634e884aa0244324f36918cd5c5c6ce624f7b59d9296437b820d00f3d495ae435f8568835d337e74f62d66075ccbc2304dd7a04b636ea949099a739b5f1d27
-Clang.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/881797109bf0058b68bd2f8f261a0ed8
-Clang.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/2f41d2d2a10953bdca23e38537f3445301b6b1f6dc0ad04425195752c0002dc8275a3a3b1a8eece3f2c8f1e173c5b7708b77e337f007c4c05745f586d015b1d8
-Clang.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/a9d7e9fd69bb05c8d24ab8b08d310af8
-Clang.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/d5f04eb64d0fad218ab4520820a3ffc7558e07fbafe080c4d65273734224151cf9c8e114020e7008d6b42149da4d26c5c3fb6351b3c5e91fb302a1bd023531d5
+Clang.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/b064c4494822dc829611629c4f2bd5eb
+Clang.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/6f4fcfebbea9df7964eb6f7b4c33b91baaa2abb03c44e0c51150d894d22d4f99e1ccb6b6f9acfb45f532bbb61e2ff11515d306a56004be46f67d60134259bdfd
+Clang.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/8a17e03287b24b2e9e08a4f753e12fe0
+Clang.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/065a53430ddfc2cf202ad0d18933967af3f4ebe07bd59ac41856d0ea9e67a59f958850b7bb2312f98b1927d5e74bdd9015867145eebd242930a081c58a9bbd19
+Clang.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/d7d43a6f98be3c09c3c5b9cebd6fc930
+Clang.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/290cc24e1278b7e6648d006669bccfec1c41d797718e16c9996588969b7fc9eda360cfa0d2a7d100b0c5e403fabdc77487c11d0a812fe51e9cc2ec2a814192e7
+Clang.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/95036da7fb8a37330ea3a74f721460e7
+Clang.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/7d1df0c6c8514153a7295a75b79f827d724d53542f39773d0572be9cc24d8793ab02e35ca818f17661bd777bae7e346e8318f139e9f141736f10214b80d7461f
+Clang.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/81854d7e53af48122a80448106a5cec9
+Clang.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/0744f60891c18a8fdfe8e4c11b827fa42ed98285bb0557837f7b6dafcb1eabcf763d787c4b4b541d0a3e40905008c6e7b2d76d76ac7edf8a470cfc39e635f43d
+Clang.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/e381e3ec37796905cf0d1863f34d316e
+Clang.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/417218e1447ffcdf642d9e28e7ffae98138efafe5292f6dae83cf2490eb89b3cf39cbff5c77b6c442a0e04173f9eb22ff0dbb0469cdfc622f7eefe8bdd65a5ef
+Clang.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/7972214282a5b81512a185e023488970
+Clang.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/01483ffbb7e5878ed95843d8e750ae163018fb7fd2bc044aa42d55024248d17fbc094154968b7297aad6131aa7976630af282be4dbda1b9b6114cd660d8cb075
+Clang.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/22d08b77071e480d81a1e3c0b7ad7f66
+Clang.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/b7e3c2b1cb6f00fe1280b0bb9f72f43e5883df37fc50b750e663845083d7c797f4a4a47c4703169a29a0c70899e92b8f9d861674563164590b4689d7bf39cf4f
+Clang.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/abb72f91e3ee55c95a5364e1ec22c161
+Clang.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/899fb8b9734c0041b524d6e8befe61d7c0980fb697be82297f4c091a71c30e1f56c1f8fd76149853d7f7a4026148d934aef906ef4a35fb00d9dbc51a56bdb5e7
+Clang.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/745d75f7eb0d8b905182f08c0c1f9911
+Clang.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/9f249ad4078fc023874c8d8990b981aef1663828533c41841bb5ed860832fde7f9b37f19d78dcf06bbbff73b24286204a686d051abeada7dc484a1fc5bf8d9ee
+Clang.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/d2b4126d32ff0b9e28b85d91feeced15
+Clang.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/d319064266d2d264ffec849d44efddc6f1835058a7a0e33d3e78dd2122ddc18b0303c38a265d86145debfc5cae9b98c3794b447fa688e042bbcb4f75119f9788
+Clang.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/4161a2b194e54c35badc65124a19b81f
+Clang.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/7302ecb2f05b697ad212fe1d3a1d24740d3d5a8bd1b01a61955ba84c1592f80e7ca3b5ceb7b62361ee61eaadec762a2eb7c899c53b7b4e06fccd8c71ab3ead46
+Clang.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/c2d179b7b8d9afd783e9be03f258884b
+Clang.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/4455aed97627d50caaa28624ab15a099b9e174988bddd13ca558e1796791606c25a22689beec358e03d4e2760f6778a2d54a76538e370637169e3b9120e664a8
+Clang.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/c6aa69e3061de0bc80b17108ac89f6d6
+Clang.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/012aa640b4359842081c06c76bf51c208bba6a494e6c8167c1abf57b8a01677376c23917b0bf66edc7afd5dcf83f08090fb5a51ac1604213d6e3ed48bdd64118
+Clang.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/1abf2235894e554d19b1e5372149f38f
+Clang.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/f35cceb0f39dc444435e9db90c6bd8d39adf49cc9c885bc190df051e38c13379e5d5ea5618034763c608abcce06f363c93397fcccc493989f4fdb910e800aa7e
+Clang.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/5ecd609bac911e14fd7e36fef5ad8e65
+Clang.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/d05457ce07abd12eeee5a036742f66ec4dac99190e98e4fa26c7b59d00b5c90a98bfc65352355ccc609ffb7114106f66dfea166de1eff1d3553180c2cc5484c7
+Clang.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/49a60af7a47d4a5ba9838594af44b695
+Clang.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/06bc663411ea5b156fee0deab246abbce42d8c180489cd38cc0a1bfa9c73c4d21e382407decec65750423269bb71c758bf2401788aea884394e8ce40aa75f9bc
+Clang.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/b555bc479f57daae26101118b71e5b9a
+Clang.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/aabe9e63b6f11101d6b9be4c56b945014c49373ddf670346baec1ef255c504341c9c3f8e729684c17df8ca50b108e0f4b2441b7d5b55a8e2afde3934866b5f6c
+Clang.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/5ef0a90f103a00c80c68309b5bcf1f4d
+Clang.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/37266e4253e853b68a41d4ae21e3c6f8b76d9799f8b802d347b8de72e33e529364c4ff48de0e11fabfb85f5d982a3b0fba594f4098f6b428c883fced9767517c
+Clang.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/915cacbf726988039454f100fd007f76
+Clang.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/15bfafb2b04655617190f8ba8cf1f5a92ed277eefb84e22b8ebbe05f19b384cf296db893e2d2abffb1228749c07e3e5a59e42d8becb3c32591970d060beb71db
+Clang.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/dc2c2c78c6446cd17fc55e7499ebd9b5
+Clang.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/a4713dc6fabe3b374632534452a4ffc997588d2e15f4ba08087a9c0dc23b1f447ab5d33557f56eb0523433ce534da2def7de76600021825781d662bfbe09e094
+Clang.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/855f38cf2d8b2761319ed6670897a32a
+Clang.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/eca1511c2eaaca9a538efe2f04f17234dc2c0920d2e4081b7e1b92565000028749f7d379574c636f04754e51baa04c193437e9bf755fcdbbfd5ce3a96c7c5a1e
+Clang.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/8601f1d8e99af31a38e044daeddc3622
+Clang.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/dca2e5434f83ed1829e16e6cf7abed86bdd3251492998bfc808dd117a253a08886234d5e1086bcec76a38f3373ae76a36a4c1a81a1967fda567b95aadbc354c7
+Clang.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/82c18bf06d579467d7eebeb646238b25
+Clang.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/4669a4a7e62af66271259bc15b38dc63e72227097eab35b93d44c483d48205b45403a83fa779c10f926bed2fab1347a73d93956930550194343525e3ff642855
+Clang.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/99f9eb32064c66ad460fbf73d4ec379b
+Clang.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/074773e19ba60ddf152e7a65689bcb2c0ff255813cd4079190ddad14442be698edbf9917a9f042b005a666a879261c026f265309254d2c4e6ac07e85dfefddee
+Clang.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/ab5c8cac975d0348bdab3f55850db1a8
+Clang.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/f8ded9ea7c2209a63349e5ba221adfc76689c1299087680108939474c6bc965506245632788c9af4975b3cf099c9a85e94aa3fdf0f2f139b8462ee2f9b306704
+Clang.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/9cd24f4c1519ed384dbe5a5325943e86
+Clang.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/579177600090fe900edf8e6bd5ab7e8617d74c411769e6726a424492f468c3fa9938101673ee2a13ae1e3ce5b0fc22d74589e4eb6e31e6a411d2f0b50eac05f2
+Clang.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/970230c1742c8005358514ff5c44e6b5
+Clang.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/f24e1bf1c92bea7519a6b767c4391566bc80d1668e2562f4c083f7d68b7166e9330b83fa45406129ba3a0caba5ff8d18a90273e9c7992d93f4b915ee54c6f5a9
+Clang.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/6b331d43437f38918216388402529ff4
+Clang.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/d302d08b7ed46a7c907a0b04e25947ba0f84e5e12486d3dcd4b0b875e9d686902fffe89eb7db631b9be33606c05b83a13181295bcde45653c6959f7f5c1ba9d8
+Clang.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/46e24960499c926aa047f1b29da282e3
+Clang.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/6efddeb8de7bb9f6245c604c1f80a1a1449278b1b63ac8787b27092713324e4cfb483a03684b7e58e2dd14d15f39fe6e315bd654d2cf788c03b333fe5068ed10
+Clang.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/1540ce6fb7925df9124ebfe27ace9ae1
+Clang.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/9acc77743c34ccab9f362dcd5cdf761ecfc2dc0c7974d3f62b4dde852427de327e9c53981988cf5ebd6c29ea89221d938bfd23783ba64522fcb7bad5cff7f79c
+Clang.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/2bc9b9a0d15a35e7311ea30d6240d04d
+Clang.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/4ec99c6c90a870485bc4eb526bbed1cc1aba5f35f23ff47d32c86e64871806d6ff94244869b14600f82d5e8f0dc91ae307e87d808c3b89ec92b1e9c8519fbec1
+Clang.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/73b8c5fee1a5c1f3a59d75e4ab7482fd
+Clang.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/f4eb27d2dac224585e274ae5bd9012e60c044220d39daa2a46e00da54e8cb26b20f9447f111c71364d05594683773d73fd22e06cacd465c64ce5c0fe0b51eb43
+Clang.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/7c2c723aebc97f0df6a06ab04117e7a6
+Clang.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/e2d28c9a69c7d384cc82c077138fad2f8c87c89ae4400a6ebb468dd8c26695db42e4889542fb91f515cd53d9e7d71859ff7b9614cdbb6a6c6a40a67402737495
+Clang.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/e58dfa6c7f5647eeb320e12b0be71e94
+Clang.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/1817b521f096597b8be1a08ba4cff23725c4191988844423c1ee13c62202093c9fdc677554c8a416e68c2c8efadcf39b7b2011d4ef6662704f2793121827508b
+Clang.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/a617b5275ee7b13a9ca8d45a8a1bbcc4
+Clang.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/074b61e67d6ac161912ac8cba8dad2b5d0daa310dc7a23a91659f9734248c94a31f1a3af97aa02cc3aa8d226bed6cd25d80a4dcd15024b68b417df973a37e59b
+Clang.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/2fc553d70042a737fb6f8919ba8c7586
+Clang.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/98a4ac67f40d48c86819ceb5fe7692f367207b039bcaea7068d04137e529c7019d7369f7495ee8aadec66968736e665e81185732da26be12ba06be331f7e9bff
+Clang.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/7191ddb4f1263b75b475a4ca7e258dad
+Clang.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/91dc613500fa03aa0f972800ea33a5602b25542314ed47f4e3457aed1d218b14f988f3d6a6a66d0ff14df152aab7df4ff798a5725078edb208c06540e9ec93e2
+Clang.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/5c8b2900fe9fc12db25ee97dcb489f85
+Clang.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/bc7b36f3ba8f7ba3463a36b49f2eed6e5a3683c50f02c2901f6828d0e8ef31e61850aeb2f3668df206703123b34281a1038820be9ea38381a4be1204b5af1325
+Clang.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/f4567028c971108cb7f6970041eb58eb
+Clang.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/9f6f5e65480720bebefd46a207ce32958304ded6f6ba6135ab700ee6263bf9e5fca55cba6eebb9f651fa8b6bd03de5df1bcdcddd17ce33c3b19ef137e998eb20
+Clang.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/6e3487ee5dc66c1d30bc269b53f78176
+Clang.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/0479baed38a2810e0ccb813f00d40c1159acaa54270d7074df364e7d39dcfab524cd83a63b712ed3bd992792eee596a53202ec5363097f019fbaa07a7a5123e6
+Clang.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/ac5de702c96c77d8257d72dc003ab347
+Clang.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/f74a9ba8cef5cb0fdc4a2229ccd911c4c9f612a6f2446a85fdca13918a034a6e43f365e0b46580a0a0a690c871a07a17ef8cd0d40da74ed210b78de8cfa6c253
+Clang.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/1889148c5bef3f759430f78c9806ffaf
+Clang.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/7e5f5a3834abd1a95b03bc5b2bd095d4c16950d6a67d015d82e7985f5b918cc879ae1197a636ec6582fb71929d392a5758dba48d8a9ab6563454170caeb46421
+Clang.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/1c2833125dea9030c8d36c372367ed4b
+Clang.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/0399915bd90fc8d798494b4eaf21dcc8587b8634003bd0bff0be66f4986587b8fdec3fb324defc3fd2ebf6793db8d3444e1188d2d300b8b1aacd3d1eb5a48114
+Clang.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/64c0ac9f6a4a4f8a47c9a944f79f6760
+Clang.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/901ac11e5f210c0a6f06e88e816333316c880f0b92df482eef7154393535b3a4342534167b1e572b529284d42adbb251e5522dfec1ca027443cba346bb89c467
+Clang.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/bbf33438b1316d8900155c74583e9bcf
+Clang.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/5c3eb14173b1bd623007820d8731cf15a2e30cc02cb28d0552bc2e79029cdeaa8e86a516e980fdbd87988acb3667254902287e7a78e1a753f1b338e17e1e183f
+Clang.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/725287948e706bfc5749b0bc0844980d
+Clang.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/e419a863044afc67471b5f0f1f53dc19f26cd0ee06cbe6aa3f5367e5e8056564abc6625b9323cf24a0a625bf07a9783b0a569738e4ab8c2ee841c0c37c0f6b17
+Clang.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/55adeeb7f199bdfdb0c72e50420bbc38
+Clang.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/94272d7b12ec6342c03865b6f8eda72d71119bc153aa14958f6c324e9844f870feeb97adb2e7950162897aa606c74e95f37aeaaefb1b37985025055212255aea
+Clang.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/a8e552a0a30f6f764a40c525bebdd220
+Clang.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/ccd1d96adfb4aeba8b8502c25f747acb0a33c8806c956898ad80f1289077a5ea3f16877129db76af965d7cb232f8ff029b35319bf331ebc3fb4d4f291f11ac65
+Clang.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/182c624c223b3b50be67c59293b990c0
+Clang.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/3e60390a5d20812998554836f249e404ef3e3b63c86852a710c3a47794fe8cd661632f5e0ffbbb7ed71df003491255a4c05ae7bc64fbcef07fe8556c98c294fa
+Clang.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/a181cc98814d4a0725664e9e8a1f785d
+Clang.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/d19646a1d9413048fed8e63786a5e4a2adabb36bba625c4be02ef3302780e3e67bc1096f94d265aeb77e146986cfc86513004c19962de7bed156e3e2779270fc
+Clang.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/7ea1ff3c09bc37acee30d727a81dde09
+Clang.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/b862b29b4fba7d218e5dcb7a4c4bd48d5fdd7417032f1caa9f9c0889d934a1f9bae6bedc25194feece6a768bdc28fce319c7ff5e0bdce2540e3c6e6904eb1a89
+Clang.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/df89bf4ab33ec28f42994939e793388e
+Clang.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/e68f083291a3b8f16132dadb779c35b23e3755fd401a86ba8f4619245c3f5e3bd13a8f938800941853fe8fd508e8844b8dcaf061b6ebc9fb0adb955636680123
+Clang.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/f77f3bfa69a9f2e0e8f7191a364d720a
+Clang.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/3502d48863f5936cf136a9c61cc56dd4efb92ddf11ef4cc07f9e49f3ee420403bb9dc2480247052e5a99ed467d6e4fc83d1251ed6873787e994693c4b8dc2011
diff --git a/deps/checksums/compilersupportlibraries b/deps/checksums/compilersupportlibraries
index 86250fdc63390..2dcfecfb56b26 100644
--- a/deps/checksums/compilersupportlibraries
+++ b/deps/checksums/compilersupportlibraries
@@ -1,92 +1,92 @@
-CompilerSupportLibraries.v0.5.2+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/e0651fbefd39d405ec97d7530f2887d7
-CompilerSupportLibraries.v0.5.2+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/0a067b7e37d98a4c96dd1400b8c1a07c82cc223d11a93a0ee2455c3b55b394eee0cb251e26206495453f2cf8866822fb586ffe105f44e3380fa949adffe8b83c
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/1f4a5e98cd88a08029326ca5e9d47e9c
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/696f359746de592d4e30dc9ad19d5e07ebc1e6635e1f082e249747c42338ef04ce885fee5ad5915ec39fa2866af4265bb6ef580c75874c091a15b64d02626123
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/8285fd34164fac0410fcec6bb9d8b8e4
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/df0869d357326c803d8ff33c9734f01457d877e80c4af33745d4ca016144eb0c52fba7aad7e1098eecde3fc4cf41ed971638b4b6f901c7306a2072e8c14c3513
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/82add6093bda667442236c04d84b6934
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/81538d75950cdf931f9aaa932d1f9cf40998bc256924c3231e984179f6a5c3eca0f7e1ba315b21f2add3bf9376e3a45ee59ccd8d9f6d765105e05da25bf65cfc
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/ee0d6a9f0a1372e36a02a95b6c07aefc
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/f248e57249af88520f9c7ac32dba45ca03e5904606b4edb682ea514c31a9a775198d02f0892e79124326e184d7906b7a13b0e4f3e7721352b8105cdfa72f89ed
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/dddc8f7a9be9f07e9738e2a027fe8a0c
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/36f9b94f470d451b9c3c2429026292463434427625563240467f50374624a69fbca7ddcb0678937a58d22d32a8157571d3e201c47cc9a2484d1d75d4c0f77ebc
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/12b7eb088023eaf9583ffa6f9f0e18ac
-CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/a5f5a6053e63ea1fb0185a0c3a7752a938373da847dffb872c1227ed3a0a80f2de1e4394baaaeeb8e0d8f2a4da123433896742cfdca6f94343bd4d0ab3578c65
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/e5e6918571981e4cfa5a2951e59f2df7
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/5d7b0f4f55b6726ae7317edb170cafb6a2c4563b0f4a90c619da95c120edd8fdce118bbd1e7168110f75cc899b857472fd524a396deb6d9f2552f53c861faeb7
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/7ae11706e9c6c043ad771f2700d06591
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/4f2f01aed00a58f4393cfd4608df1a6df6c9bff6e352a02a2b9af13f14a4436611769d64d082d3b151ba23d3d905ae2700bf469b9858249757ad7b5aae716d6a
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/e922dad7dad1d5f80cc154a6ddb6de35
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/3fabbcedbbc4abfe1e0c01c387bbe2537105937674877122b5b66d6015944a58f547106da1e185c1434de0c1883d356f8dc52968f075a00c6a8a52edaaf88957
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/53741f61d806efe045a5abe0e748aa36
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/b975a8fdfb736ef2b1aede2c89e390df261bfe8aaf8ffdb37887add09263d95f46642c3898ac19ec6098cdfdfc7f0726436dc273e9f70f10fe1abf4ea945277a
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/9687cf768c6c2879261e385c44ba490c
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/02f9accf8273597f6889677de64255e4e399d67377b5363ed31dea7e2118cc24d3b7fad7c0632aea79dee44250b1ff74bf2fa22e4f3e7755de65871854112c14
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/b62a81b9f43903b3de6fa1c78c03b89f
-CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/d44eecb30ccf19bc8dca41c738dbedd2bd2cb6e379a3ab181c955cb9cdf9bae8efeaf7a90c85dc7434520ead7e910d38e92b448cff7aecaef0902684e9b06c9f
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/e31780333339ac64f54ad434578d6294
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c3b91ed90f3393dfc72e7e2feefa60afe6ad457971950b163ffbecafa41cea43a15cdfadd8f402fd8fb61652c224f5b1a04c432fb0f43593749f51ed1340116
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/0f7bdfb908aa3d721428a1ee8412b594
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/3199da41c3df3d702a557c8b5e9fdde3a47c12d4c45fb9094fd194cbbe667663334b6cc0a5169fcc755790c4b5fada71c5094dc8d9a7f8b6c836d3f4c4c6e509
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/f455758e436750092ba2df65adcfd380
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/b5d0dbdff19b5ce076b8ae7b907da25fdbe05eabd47e46987f9987690a3a670d14bd3d2c2343d366ca1ee861b85fcbaccc1460ba3a73571686ef9e4330427b65
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/4cf3790d881b829b4b8da882987d5a40
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/ef5810243af32135da0cb7d08ae35ff8a2cce50c05200450154aa860c181719844466b787faae551aa71bd94e721f2d7d17ab14a049d0558666037862aff2f6a
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/a49e1fa6e040ac86ddd85a3188f83a76
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/cb0292651392a14f952181eb7a4a0ea6359632e96b017169cf4f1792f44f2846b5d6b2b5d334dee490262dd1c2d421de49d1f4a919402392f77fdaf60c1d19a3
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/3f64969e0e70dc8644fe09637dd1cbe7
-CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/0a71f8b731911019666bdc82f42e306ff1801321362ce6fe58988c9a1b110cd032a01c11fd0f9a6a3fbf6c6545f3287e363f5b3c40ef2eab0659638c38687196
-CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran3.tar.gz/md5/28f58931f66a3405fc4c99ce40724ece
-CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/d5290079264cfc6f716dcc9171f8412369e685c7ba0b9e82ae3d764de41671fbb4a24fdf7ebae9a9b913393837c2e41951326dbf3e870340fba7121709ebba8b
-CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran4.tar.gz/md5/f98763aae801cc7d88124bea422f13ca
-CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/da2095a462637ffcd0825949f4bcc86be9484c9e009648dc3c2e22e2fa19c65124e5e45f2694e85616df49b1181e2f4d2b886d3b83401c09ca58207db461ea23
-CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran5.tar.gz/md5/1bfee57db4f2bdd788e59e34d0bb4506
-CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/8f4814d97d6cd6c1f0c1d23fce875c40b6df7de7a8dc66e66681ba3c533120cb14d9d018808ff4e33dec53bb8958fbcedc9be6ac70817839ff89a0db5c0d18a8
-CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran3.tar.gz/md5/5da7af0483ffde929c58f3ae411f6489
-CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran3.tar.gz/sha512/97e56fe4fe0e10fa0d57ec10882a62d290829940049ffce7a8d81a843b91c7844e53d737bcdbc7a5e8206ca9820a7066fcdd7d0eed1e831d7af96222ccca1224
-CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran4.tar.gz/md5/a0b5cf513f2f02107c8887ea5e30cdda
-CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran4.tar.gz/sha512/aeeacfb58094751fe5cec87825ebb02a22c58d3e7300b6ca6066eb717e28ebecff230838c32935ac11376a6efdd5a0c44fe0c8e7d5b9a1f0165171c2b67a2d8b
-CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran5.tar.gz/md5/569ef42292d8cfd157026b434e93fe4d
-CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran5.tar.gz/sha512/daf543fbe7e80fd63220f7c08e0d6b51d45ce9e0af592a591eecadcaac9b859ce596df2bf8fcb3fb72fb799f869d0caac28acb5d26b3c3aed6dc80245b90dcce
-CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/f4e0f3d40f7f77d32f26424dedff850f
-CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/57e35c39c4c93919cdbbe33891b5938918d33840ad33ed51a010f9deab791d60fa2d030d3e14df6e445e0607dc9280b07ca287a3273630bf7e245d6ab8069cbd
-CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/d366731c11314cb908fca2032e7fefca
-CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/a7e087e718f9d8cb4957b8bf3a4554faae97510b25d88a3e9ae4241cb69efa5b520bd9424a0072e7d712c9435e6900690c56004a716a716838367e91fe20e11d
-CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/eff855bb45f038c9d74c67ae2eed5641
-CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/e674d60247086bb8029270406d246a4857e668442a77299a431ec837446387bd1ed2de5e0f9f6985cc6e5d15b6692f40b18e0016e7c9d4e95a3770dffc19b44d
-CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/0bfe78d226b3d89a83b54c6ff39239e1
-CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/fed14514c9603a1e4772d2fd5f4a48da751c10e34b6fba5e0c35ff40b8ed165af6daebc051fa86751bdffb8f820ac779215dc3b38c4ff5c1624214b61d7ad1b0
-CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/d5219b60117555a3ccd41ab406d485f4
-CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/9268d7c2c6ef649dc753757f9afc7ac1382e521d02c58a91eead9873f2a80f215f3b67f9a33abad53c8bca18c19ae3e63804e01e3109c939d33555c7ec8c5b1a
-CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/1f620c9a049e00b8b11c3970a23f2761
-CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/6ac900dfac9268334c9b54badbfbec323151353e8d87d3199f875a505febf863766ded0c52bce2939e5975fa6e35a28cc16c88e7c1cce37d65725fe275813606
-CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/c21c35b00ed7ad0171d63006f1a4170d
-CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/f993a616a75b1f5ee140ed47b6e4aa981cffbbffd795fc0cf9df9397a6366a4507a158530e961c398bab656e7d51a27be026088678e0c19485ef0bad136bb69a
-CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/f0cd5c8631256f3b903e95ad3623d702
-CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/81de3f699169254fa83a3ab8b6063ddfd300065edf90f15239b0a304f3feea9534acba7d982058a7712ce94dcdb1ae036502f276813a96f8254e323787556d63
-CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/6030c114c1250e99958a0727da9d6daf
-CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/1d4be1c0718aeab056368653b7f34bd5ac3c85edb9fbdc2752b8c4877fcf5d080774506519cf285954485d806bccc18323f6c45f069db8bd314d064a2cc1ed66
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/b45ac0c04357de9d013df598dd13f3bf
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/42174d05c7165f87693efa09facc9405c9d6eab490c4b5fc74ba02e1e2e871799a24dcb7496e0693f30f9c3fd7e81020b77a3dd946832288769063f6d2a31aba
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/761998b08e4b460cec95468adb850c31
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/32853dcb3202e735325e1e0e3d88e2e446d7c88d45bc462d4e91f7d57dfd78b0f3381302e72163fafdb1c2cef53d4822e1c52289081e06b7b74d67e2ed0d34c2
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/dfd50d071702f903213ea0c6a42ad81b
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/3d6ecca7689bcb1925801d26a328790228c564bb731f6fa25d88763eeb22cccc4409dd6376c7b574ec242fbf85e41fd82d038a2650f8d33bb850b9a9a9f9a722
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/0b374bc55dd0d5f4cf34a12d4901c022
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/10db23cc1d1367f40fed6c6cfc232fdc49f55e666d3623faa1af40dd781ea7a5d37b6b5a39524f0fc57d6d49947f429389bbf7075f10163090d7ea48903e688a
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/1e28cdc7937a500b081a1f4d340190f2
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/0b635b8f594739453033fd1dc5496976a8fff314dd078e2d8248d3c2136abaaa610ebc45252a81d16db9d91a0ec20a552f1bcb65ed3b50a627e40168e7f100e0
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/f6fcf32044f69d8305a718eeb7651614
-CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/5940a145a3203d5a4a9b7cd9aab45b8bcff08a43a69a8fea67a9e18535625c8ecc051ba344421253b2f96eaa1a007d42555897a8f8aa0e8bd5dbf1ddbd38f197
-CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/eb46728ef7d3ce955d5a497a556138c2
-CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/922d3a85059e7cedc6e0e52687cd6f22cb708677a65fcab86f7571737d8f17455f15b3f1af7442ee5fd04a437f226d4eee374d0f353a10f8f7a87160d7a2351d
-CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/fc1f4fc44c08f0c3040b976558a35e3e
-CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/5406251fe1d1d1901ac4e6af3b8e9394fcaee2fa6a4f3d2817161a1626bc6b45d7b184f9bdd3d2e6571640f40b4e06c61f321358ad8fe484871ab9b878801a95
-CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/e1b52fdb233c9667610867e278e7719a
-CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/efadc4efc419808cb289c8c8f52664a72f2646bad2e8e02533456cf9afd613d4cbacd121da786316206df8f65b5264498f25adb04f7673121b2a58a20c4a75b9
-CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/a449351de41a3140534d278aacedc54e
-CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/db5bfbd161eba076598465cfee277418c6e9f4f0f7c4672a437c68ceff374f600917fdcaaa9dfdb945103d2b5c9786663e8e9403f6fdc796cda7c529dadf28ba
-CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/facd6a008270b85d08ca835556921127
-CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/236438e05eb3f50063aea90522e61f10a03c474f3c26117c071bf94d4ca24fae56e09a565cbf00dc5d1eabefec804fa5503ecbcc324b5da00a65b5471fccfadf
-CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/cd294be65ddd327d6c0feeca8b13f922
-CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/73dc99009d25fa0ebafa77d7c5747d21a6e0778a6266a2408df885d9553e4b8029c104e1fe174526d9261252bb564128ae7cf9058268475d168c79d19ee4f0c0
+CompilerSupportLibraries.v1.1.0+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4
+CompilerSupportLibraries.v1.1.0+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1
+CompilerSupportLibraries.v1.1.0+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/e084a4374be45ba52279682c640449bc
+CompilerSupportLibraries.v1.1.0+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/d4aedf5c08e13fd9596476330f69e374af64373f7bca0e4df6cbb4d1710d695dce23f2655ee368c3df2049b7b0ca1848c9e01a437fadc0eb08937c6a7cdf2a27
+CompilerSupportLibraries.v1.1.0+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/6b3975f25be16ea1370ef0bf353ac752
+CompilerSupportLibraries.v1.1.0+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/d6298517be1ce350a61d1c1a1bf9b27a541382aa8ccf3e86eeadd7c47e2fe88facd17139a3878adb939df869a330264a942d280e3468d53b61325df7e31daaad
+CompilerSupportLibraries.v1.1.0+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/7134b79b71059d4da79224df1ca0853e
+CompilerSupportLibraries.v1.1.0+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/e66590a37e756ff33a84514a7ca2bbe2e1517f3e901bc66e40139e8a318d6cd8e329e0c2a7c557ea39e6db2f56802d485ab81b87be1373717b78474b1c7bf7d7
+CompilerSupportLibraries.v1.1.0+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/fd37789f5745a17cc9a85902cebf4698
+CompilerSupportLibraries.v1.1.0+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/9ff4e9be39f115af2e5bb6a5c88b3940f15a010952cebf39da22e7a5c6744be2f905bebccba092db0a89cf82e8c0e1a3e61b74d4204d2a6648b5469f3ccb0d12
+CompilerSupportLibraries.v1.1.0+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/e9286ae9299c57d5df7b795997b4adf5
+CompilerSupportLibraries.v1.1.0+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/ea64858481095e0374be330aa2ac84b394bc3e5351b9326137c9cd5d15e6bec47d6e5f672a216572dcb80c3aa6fcb08950cc10157c264f429a93c235028d79a4
+CompilerSupportLibraries.v1.1.0+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/b19c6cbc5b2a62ea76dea64b0f8ae488
+CompilerSupportLibraries.v1.1.0+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/090d659f7e4a7034117e2bb2dcad0ef544cdca898bf032222cdb81d32af6e6528be842d2cf55839fe397c2ace05dd4ce920cf98cc96324ae18832016516e6cc3
+CompilerSupportLibraries.v1.1.0+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/26fb41031e1b797373aea7a7c4d7be3c
+CompilerSupportLibraries.v1.1.0+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/8fd0b6c990681789caec528067b4bcb9661f9c0a5e0268927d4e88565fa7005db3b592fb8e7830cf32b3fb4ce54d6db747dfde896f93bd38f65b7a1290a2399a
+CompilerSupportLibraries.v1.1.0+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/6ed3f3e94f662177c3cf3c3734a5c1ec
+CompilerSupportLibraries.v1.1.0+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b8165630cced0f7880cb6fd6263cf39bbbbda668eccc94219720078a85a641c3b1b20648960041aa3a51108ab6df087b909c572d0690aacf8b99dc5496ff7db6
+CompilerSupportLibraries.v1.1.0+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/86060cbbe966a59f18f92a9b2fab95d4
+CompilerSupportLibraries.v1.1.0+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/0aa0ca0ff3a4d541c7a9599ca1adae7391fdd3fa841f3055ecb8635096d0d95a0763758d7533c887b38a655af55174dfcb63f470147b28a256b75a85c8e47801
+CompilerSupportLibraries.v1.1.0+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/4acf6f8929fb8ec9fdb8a0f1af06260d
+CompilerSupportLibraries.v1.1.0+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/3b2e0a5f62bd93434d07848c3045479a1a05bd8589dc976a5680e13805db5adcd9abdcca82edee7b28b4c4a9413ce795784a8a0f0a8fb7346a439322c27c96d9
+CompilerSupportLibraries.v1.1.0+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/a75a927c3e14bee6dca29b4907def681
+CompilerSupportLibraries.v1.1.0+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/7853fd187f9289a8282d34112b5277bad13abe9dd9b6c796498db2f1a080b2c81faa6119df9ececd09725a019bf99706894765c9c20f618e359adc153c3181a2
+CompilerSupportLibraries.v1.1.0+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/ba8545cc20e6c602a0526a3b1fc1d2f1
+CompilerSupportLibraries.v1.1.0+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/7a7f3a7761deb068efc00ffc5d4bf4df365cb27674ce73abbe2305b678285161f1526f4facbe27fc11076d99b2079976507f78f5b463bd9057ed008e9d52f9cf
+CompilerSupportLibraries.v1.1.0+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/26fb41031e1b797373aea7a7c4d7be3c
+CompilerSupportLibraries.v1.1.0+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/8fd0b6c990681789caec528067b4bcb9661f9c0a5e0268927d4e88565fa7005db3b592fb8e7830cf32b3fb4ce54d6db747dfde896f93bd38f65b7a1290a2399a
+CompilerSupportLibraries.v1.1.0+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/6ed3f3e94f662177c3cf3c3734a5c1ec
+CompilerSupportLibraries.v1.1.0+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b8165630cced0f7880cb6fd6263cf39bbbbda668eccc94219720078a85a641c3b1b20648960041aa3a51108ab6df087b909c572d0690aacf8b99dc5496ff7db6
+CompilerSupportLibraries.v1.1.0+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/86060cbbe966a59f18f92a9b2fab95d4
+CompilerSupportLibraries.v1.1.0+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/0aa0ca0ff3a4d541c7a9599ca1adae7391fdd3fa841f3055ecb8635096d0d95a0763758d7533c887b38a655af55174dfcb63f470147b28a256b75a85c8e47801
+CompilerSupportLibraries.v1.1.0+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/4acf6f8929fb8ec9fdb8a0f1af06260d
+CompilerSupportLibraries.v1.1.0+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/3b2e0a5f62bd93434d07848c3045479a1a05bd8589dc976a5680e13805db5adcd9abdcca82edee7b28b4c4a9413ce795784a8a0f0a8fb7346a439322c27c96d9
+CompilerSupportLibraries.v1.1.0+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/a75a927c3e14bee6dca29b4907def681
+CompilerSupportLibraries.v1.1.0+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/7853fd187f9289a8282d34112b5277bad13abe9dd9b6c796498db2f1a080b2c81faa6119df9ececd09725a019bf99706894765c9c20f618e359adc153c3181a2
+CompilerSupportLibraries.v1.1.0+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/ba8545cc20e6c602a0526a3b1fc1d2f1
+CompilerSupportLibraries.v1.1.0+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/7a7f3a7761deb068efc00ffc5d4bf4df365cb27674ce73abbe2305b678285161f1526f4facbe27fc11076d99b2079976507f78f5b463bd9057ed008e9d52f9cf
+CompilerSupportLibraries.v1.1.0+0.i686-linux-gnu-libgfortran3.tar.gz/md5/39dc387fd58ef02c461c7906ceb110e3
+CompilerSupportLibraries.v1.1.0+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/1296ac707fdad620c65256686523f2b027c8359f54d1f8354ef5d1ba514992c7269aad26b706575509b5e29d0ad3dec1c7d32fe3bcff0d723d6a4890819eca46
+CompilerSupportLibraries.v1.1.0+0.i686-linux-gnu-libgfortran4.tar.gz/md5/21a76d54d875ef09db2cdce77d328c2e
+CompilerSupportLibraries.v1.1.0+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/9c6bf15338ffbc7113c536e145e53bfaa693007b971f83ee2db820d7d54018bd1cfdbedb6bbce000ee7aaadad1561e91f5ac0e0519bbfccbc3bc57fdfc0eb7e7
+CompilerSupportLibraries.v1.1.0+0.i686-linux-gnu-libgfortran5.tar.gz/md5/f028f2c94f28201701ef6ba4fec9abc9
+CompilerSupportLibraries.v1.1.0+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/c231af1bb0fd4f733278f883837fddf574689bbd7c4dd46cfcd1478d784cbeae1fd785d7cf9f4b0f98cda08819b63a20d5026c6beb892a188fc979b7893697bc
+CompilerSupportLibraries.v1.1.0+0.i686-linux-musl-libgfortran3.tar.gz/md5/184436dc05207a653f13aae3d82a2e1b
+CompilerSupportLibraries.v1.1.0+0.i686-linux-musl-libgfortran3.tar.gz/sha512/b6e1f969528a168de087f472eebd23a4daf907aa48f7c5b42c35960b1cae3e6ca8f512982d69b757f39d6dc07b46f74c84e549cb22354a2f55d1265cba7b7013
+CompilerSupportLibraries.v1.1.0+0.i686-linux-musl-libgfortran4.tar.gz/md5/545bee22cb35d1c4c1381009e72eebca
+CompilerSupportLibraries.v1.1.0+0.i686-linux-musl-libgfortran4.tar.gz/sha512/78a65b9e7cda79cd648a1ae09daea970eba9d04fd5ea41bc1e37b065cf5c53974f759590292876f57c7f65139be66a6c381aa6756cdda7b36845cfed1bb7fddc
+CompilerSupportLibraries.v1.1.0+0.i686-linux-musl-libgfortran5.tar.gz/md5/3f1a08601a6a7bbd4ecfa36c8f6abbd9
+CompilerSupportLibraries.v1.1.0+0.i686-linux-musl-libgfortran5.tar.gz/sha512/0e225e0a7b651f6b3fbccf760d08d66f2d8af1e329d14ef67fd3968a46905e062edcf75f60d7540f0cd7dabcd3ac9130fa0f63e198869bdc6a9aabd391652805
+CompilerSupportLibraries.v1.1.0+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/9cfea65fa6c1b587d9b4b84ee64af166
+CompilerSupportLibraries.v1.1.0+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/b30e24982d4140c312759b8c26d4b72845fc1fa4d7fdf49ccfe9994f7bbf1815ed006a228f6a2185c5b8f9d596d0b04debd1d8392e705c530e5177a22c7c081d
+CompilerSupportLibraries.v1.1.0+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/af99905c4f054fe13842559f7201b3ad
+CompilerSupportLibraries.v1.1.0+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/96513ff22dc16cc259ad392862f1765218474bff24e561f14c1e0d349a6bc433952d9b7b73236b56722fd971e0b864b178d8a9f8d9499de4595bc9857ef17a95
+CompilerSupportLibraries.v1.1.0+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/1be078cd374d3b501b20d9ce679009ee
+CompilerSupportLibraries.v1.1.0+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/63097283c987dc439f02d72a6f70423acd962e4da25acc04185e654c7f16a617e34ad7efabd624fd2e70119e79e4d4806f76286d36d56c353f9e53814e75d3e4
+CompilerSupportLibraries.v1.1.0+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/156ae44ab4172903ad40932ca78a57ed
+CompilerSupportLibraries.v1.1.0+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/e800c20342dd9886c4c3f57e92278d6d41c544adba202ef3f5a6a4f8211fbbd8fab65f169adf7320b7be8a2ea02c0aa1afedbaf0b3f9afbfb691759aaaaccc4c
+CompilerSupportLibraries.v1.1.0+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/cb01c02fdcbd319784034744172e1eb9
+CompilerSupportLibraries.v1.1.0+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/0ba635d39958672a0a55069521e20ca6c0f9c81a9f55c360f6043acb415709edb72bfe8d0e83c25cdf9ace8a9e9ba10e39457e234e3905c988eb95e0e0ecff3d
+CompilerSupportLibraries.v1.1.0+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/f9592263c6e72228c492ed2ed216f29e
+CompilerSupportLibraries.v1.1.0+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/cbe29742959906e3fe9a356991ca1f09d4d8cc2a02a9af8624b3e02b4ab59e33bc05082826f7c67c73c6b91cc8e1e5c4a0c275c21c5f8eab8b58ed942cdcb55c
+CompilerSupportLibraries.v1.1.0+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/8f0db2ff4688c3f9e1337a28976d833a
+CompilerSupportLibraries.v1.1.0+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/98502e07499ad9e22147a977b1fe55320e75b6229c3993f1cd1b71e47a09ae6bf78e2341ce978ea72d33b111d09b813a332bfe8f4f6dfb669509c300fcec2561
+CompilerSupportLibraries.v1.1.0+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/980a1b8e6262c4a7b8f86b84f7234043
+CompilerSupportLibraries.v1.1.0+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/c0755d4fbb1b6fd7755d7508d7df929feabe7e5778661397ef0205e21aa3be565b39ccc2a08ed0d958e812c0c759be68ef52de09fe92ebab6da342b309a0810d
+CompilerSupportLibraries.v1.1.0+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/4b3cdb65e6114c77fd1e51da69e41afa
+CompilerSupportLibraries.v1.1.0+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/803cb771875d94eda554bade8197b31aab988ab0c957a2f8853d82d01418be9fee7d9d4b7ef6f5b7fc8d1825ab22083a71d467eb976d5076fc5d73a9a7a30440
+CompilerSupportLibraries.v1.1.0+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/36638a444b185954bf12169edace1914
+CompilerSupportLibraries.v1.1.0+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/33f657775258d0da1a57fc03c5e8ed203946944581ebf70af7b0205f9bff7fcd4f2bde5b6fa3b01659c51f106d0e6df5c7533ab8d3372c4895675854688e01dc
+CompilerSupportLibraries.v1.1.0+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/36ac52a361fd0f4be5c66572345af7a4
+CompilerSupportLibraries.v1.1.0+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/802bd8089bb2a3b5959a47dbade2199b46c247d0a793cbf6fcbc97b9a1dccd6d8585ac7694ae4bef1dc3ba21796ae5b53f995c8793ccd7316e8fde68ac121f83
+CompilerSupportLibraries.v1.1.0+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/5911da90a0fc86d665aa86cba12e9d61
+CompilerSupportLibraries.v1.1.0+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/c966936dfd272d9706aa51ed44abcb8cded899b0caa8b12ee787a0fb1569fa90a1cba89c9a9b83e05c0993facc615feb851399f4799c06956ae3064d172c964d
+CompilerSupportLibraries.v1.1.0+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/5f42a52e72f0e79530d71733a93811bf
+CompilerSupportLibraries.v1.1.0+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/51078ef0e447bb181003a50b899b39a9d1ee8ecc92fc293f5a358d836ddf21d03dc44433ae28aa21fdf756c2912b2d3f1e374a5ba108c8c34552fcf32f93fd0b
+CompilerSupportLibraries.v1.1.0+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/f3bbee1114cb85c266a45f64632c6911
+CompilerSupportLibraries.v1.1.0+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/102e638f49ff0f62644f15a931c71a16b96f02f4c90d1b8bd378e0d7c54f4e8a150cdb5ffdbc3dcbafb83131bef84f9071cb77e8debdd98d8929c7b65401fc54
+CompilerSupportLibraries.v1.1.0+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/ff2b0ebdc7ef83cf8b48bd2ae76c6430
+CompilerSupportLibraries.v1.1.0+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/0730ecf1b9476612cadc3f3e7c1b227a1967edc091c88cd0cc19477079d1739fd5e7b1022ff686c0c6a2404edaebfb02c810dcfc1aa4187e7ecddb54998ad96c
+CompilerSupportLibraries.v1.1.0+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/35642304a9a2f435cf5214b2715198fe
+CompilerSupportLibraries.v1.1.0+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/a67f41ba31c99a064f504f508711537f9e90089ca5352bfc2698c3fcd3e499ca716f07ffeac4fb1b88c2c934f7f380f262af8c863d3b16ac7e805d5c805ab358
+CompilerSupportLibraries.v1.1.0+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/01df0fbb265e5ff1a480a7a5e23b0835
+CompilerSupportLibraries.v1.1.0+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/57a79f2b8e846c1514dcb18420f26ae2889962040f410b746836cab4395749155fa9cd9d00d4c25954c0ffa72f9f3823b1b50688a20ddf675301f64e0d4b5c7e
+CompilerSupportLibraries.v1.1.0+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/1f1f6380ce8815cc9cedcea0b40860e7
+CompilerSupportLibraries.v1.1.0+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/a88ea8af8c8df792861812bfdf7f1bcaae31582ab78ce78b47a0dc6fd57b93441c0471f529ce23877131ac9701c6eed72ce89241746e18271f3686fbd718138c
+CompilerSupportLibraries.v1.1.0+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/5eab740e86bfa7656f6a08038fe2fa63
+CompilerSupportLibraries.v1.1.0+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/3dc6b7ec39ff7dcb71478376c86ce34a35a62f049f6203722c5414b7b635ff1b412e02d8d24c13c123d18b2e914780da4639538676694e342a1a6b507691ef25
+CompilerSupportLibraries.v1.1.0+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/9718c79244ed31c367e715f1f563b8cd
+CompilerSupportLibraries.v1.1.0+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/eec2380c4e182f4e923142736a2c4aaf11a525a5f966fed7e4ec4b431ee28f3842a4e73495df116604f74b419e6d398576ee3dd21d3c0c53b92167dcfd0f6b84
+CompilerSupportLibraries.v1.1.0+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/58d7b0b79a22f3aade7e4f39eec898e7
+CompilerSupportLibraries.v1.1.0+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/efecc0ca09ec6b7b8898c2ffd333c7e0a6a44706d72ac0e5010409aba92ee70a88b6fd77434bedafe0e013561f8d0c74b5a274808a6c9499f6a3005a7691785f
diff --git a/deps/checksums/curl b/deps/checksums/curl
index 77cb46923aefd..a6eeccca3833c 100644
--- a/deps/checksums/curl
+++ b/deps/checksums/curl
@@ -1,36 +1,36 @@
-LibCURL-fd8af649b38ae20c3ff7f5dca53753512ca00376.tar.gz/md5/f082283e6a35fcba5b63c9a6219d8003
-LibCURL-fd8af649b38ae20c3ff7f5dca53753512ca00376.tar.gz/sha512/3bea5fa3fb6d29651daa923ae6bcb8eeb356ab9f2a1f3e005a6b746b617b0cf609aed4cadda4181783959840873c04b18e34e45ab973549169d19775a05ea01e
-LibCURL.v7.83.1+1.aarch64-apple-darwin.tar.gz/md5/de0048ffcd0cf779f648c58df4d87ea9
-LibCURL.v7.83.1+1.aarch64-apple-darwin.tar.gz/sha512/874d1f83e0ff21ff8a5e39f29ca03588142e5f292a7e3bfb36f6f6f4f3e5b518b76dc8c0272a2df1167daed108b92f0e620277e6f3e2c091aa60934d18c292e4
-LibCURL.v7.83.1+1.aarch64-linux-gnu.tar.gz/md5/55bb17c62f5cf9894770bbc6e9fcce45
-LibCURL.v7.83.1+1.aarch64-linux-gnu.tar.gz/sha512/bb1e2246bb169ad7cc36749d56cf4bf6d3bd57bb9d141c5d807be5048ecc3cb3adeef95438d52c6360b5e70ba0ec75efb134c381affc812d0f5e1d8e76ff9884
-LibCURL.v7.83.1+1.aarch64-linux-musl.tar.gz/md5/52ce54a88113140c7f7c57895054d68c
-LibCURL.v7.83.1+1.aarch64-linux-musl.tar.gz/sha512/dbd385d28ba6cf9e7c0ca05e9b10bafc041320c307ea7571bb972ae90b71a29ffa50d7c934d358c9e35cb168d3a378589cf0de66d5f13fe69da8a44ba1712284
-LibCURL.v7.83.1+1.armv6l-linux-gnueabihf.tar.gz/md5/68150dd7d41938065f444a1fc162d8d0
-LibCURL.v7.83.1+1.armv6l-linux-gnueabihf.tar.gz/sha512/0d8eccd3fc30160899789b91ff12ae08d97f48c08c25dcbcf737ceb9a9388fb082b7abac53da6e4711f9a5ff40700ac735d748f13895ea5205f919449182711b
-LibCURL.v7.83.1+1.armv6l-linux-musleabihf.tar.gz/md5/963de5f46421087fc4f0c0e3674d6a5b
-LibCURL.v7.83.1+1.armv6l-linux-musleabihf.tar.gz/sha512/a9b491384a19d4cb26ab48a09dff8e58989b0e2ba8f143a0740daa582ddcf4a29c21216045baaeec5d121922a2dc38e9072174aa8f5deaf2d38ea1997a1c6ba5
-LibCURL.v7.83.1+1.armv7l-linux-gnueabihf.tar.gz/md5/b64791ed06518e53d5e0bc713bf82af4
-LibCURL.v7.83.1+1.armv7l-linux-gnueabihf.tar.gz/sha512/30dcbbb3f944da18a9764728850fe24ba7612d11fe0b81f6c56e7735479128b0a55bd43d29cb326db20dc8f1fc9a1407bb7f54da1526d5fa182ab223e11377d0
-LibCURL.v7.83.1+1.armv7l-linux-musleabihf.tar.gz/md5/fc64fc8de930b1f2deee6910706da54e
-LibCURL.v7.83.1+1.armv7l-linux-musleabihf.tar.gz/sha512/04e9cfdf55403ce2c7077356f05a98fe6a94772b5846ceff0cc81f0ebac95df85e259ecf4ded2baa369f55580892d083c74460e436a33c0286a797db60497558
-LibCURL.v7.83.1+1.i686-linux-gnu.tar.gz/md5/44a4f66754105b24102135fe62691aab
-LibCURL.v7.83.1+1.i686-linux-gnu.tar.gz/sha512/9200ec12725fbf93039e534625f8cb14607be820df27ac4bcabcf8332f2e5214604b6c1efd6f4d1ae6c554b8cdd0808a1dda0f9e1fba7764484c0b00e351db7b
-LibCURL.v7.83.1+1.i686-linux-musl.tar.gz/md5/bf0a521a03bb216430e66d29e9bd597e
-LibCURL.v7.83.1+1.i686-linux-musl.tar.gz/sha512/ef549d533d1a1d40a0e10ec68611f586878fd3a218a9d388ae3328e4fad3dc613ed700671bbbd1f62554555073a7ab224c122fb31e7bcc6c751a7d0ce6fba9f6
-LibCURL.v7.83.1+1.i686-w64-mingw32.tar.gz/md5/c48af4c27cecbc38694cce627412eceb
-LibCURL.v7.83.1+1.i686-w64-mingw32.tar.gz/sha512/9dbdbc8cbeafa913debfeed88b0514355fec89a48945716a43baae94e9855cb84cb9ba794cd022958636858a5be9f671f92a40ad3cd3b5145245c94cb26112d7
-LibCURL.v7.83.1+1.powerpc64le-linux-gnu.tar.gz/md5/50256b715d014ef9a2b328668a71a5dd
-LibCURL.v7.83.1+1.powerpc64le-linux-gnu.tar.gz/sha512/730eef536baa0be00fc9f1e87f82fb84a051141bab277f11873e7e2fdaeced3964e9a0e4343504e1cb7b89fbf92df8890fa33eaed9b3c6555171c8a8adbf9dcf
-LibCURL.v7.83.1+1.x86_64-apple-darwin.tar.gz/md5/367d7944167a83ff2a8d4982c8504e47
-LibCURL.v7.83.1+1.x86_64-apple-darwin.tar.gz/sha512/591f268ecbb0f5c43266876e9e0f33235b5c2e96aae4386d22c50785a4466e4b3f14e5b48117f1751733492c4ccc54638bfcf10c904d12145db7881e07778a23
-LibCURL.v7.83.1+1.x86_64-linux-gnu.tar.gz/md5/57bf4c88945b3f83e336754b075b35f7
-LibCURL.v7.83.1+1.x86_64-linux-gnu.tar.gz/sha512/71984f5240c5962422cf69069b3f0d0529a64c9ccb9995b9f26742a19dc12ae9700e888fe8b79b17edfcaa1b13b24a56b4d776453d83cce233dfa9c3fdb79660
-LibCURL.v7.83.1+1.x86_64-linux-musl.tar.gz/md5/64f3026a24b6a7df77e8325a108e76db
-LibCURL.v7.83.1+1.x86_64-linux-musl.tar.gz/sha512/bf0c16b90b7b6ef33ed7d4678df539f88d041f5a78942ca5549d9d0e7ce8cef38af8da1f68d9d3999f969805dd1da546da3d289b32dad442ec1b2b5e44d158cb
-LibCURL.v7.83.1+1.x86_64-unknown-freebsd.tar.gz/md5/578ba7e5607ce2de16132ab8f7a213d9
-LibCURL.v7.83.1+1.x86_64-unknown-freebsd.tar.gz/sha512/42c5892038aaedbbb19e192fc867e00d354da7cdf11c90151124f3c9006883960107663eaa865ee482895ee5784b5c5f487ea8aeef2a8ebbbe51f59d693e0778
-LibCURL.v7.83.1+1.x86_64-w64-mingw32.tar.gz/md5/5e5bb662234dd4520f4e4f73f8536daa
-LibCURL.v7.83.1+1.x86_64-w64-mingw32.tar.gz/sha512/4553dc10d464771166b8a53473e68a23baa6fb8f65f09a5a274826d313dafc3289348e0e8026abcec6fea98e461aca31001176387526afcf3966167b71ec2178
-curl-7.83.1.tar.bz2/md5/08626822d50cbef47503f220718b920b
-curl-7.83.1.tar.bz2/sha512/c43ec2da9c8609a312f723c0b3eff7e171ed1258c6ed1af16020190d4253e6bea63ca3905f04d0ca46a97986a8bb79be1d532f8d68fcbdbacfa80cc42f134db1
+LibCURL-a65b64f6eabc932f63c2c0a4a5fb5d75f3e688d0.tar.gz/md5/e8c53aa3fb963c80921787d5d565eb2c
+LibCURL-a65b64f6eabc932f63c2c0a4a5fb5d75f3e688d0.tar.gz/sha512/8e442ea834299df9c02acb87226c121395ad8e550025ac5ee1103df09c6ff43817e9e48dd1bcbc92c80331ef3ddff531962430269115179acbec2bab2de5b011
+LibCURL.v8.4.0+0.aarch64-apple-darwin.tar.gz/md5/7e1b8b96f4f38cd775c1be5bfd4d9b14
+LibCURL.v8.4.0+0.aarch64-apple-darwin.tar.gz/sha512/598c8418731770387f9d1b489eb0794978c4b98c9098f2c9b024fe758e6550ff82202fa4c911a6029ac39a9d395f1dccb9b5539dac788c29a85e79c958ab3bf9
+LibCURL.v8.4.0+0.aarch64-linux-gnu.tar.gz/md5/c8ef2231800a5ad488e2952e14a48710
+LibCURL.v8.4.0+0.aarch64-linux-gnu.tar.gz/sha512/1cc786ce4836e6ae904685bb44e47354eabf66dd82229d84cbeaa0d5b549900b77b68f878d915f04049c4f4118e5849262a6706fa9fac845b8a976e02140dea2
+LibCURL.v8.4.0+0.aarch64-linux-musl.tar.gz/md5/7fdc14704e467d7d9d71b54d48f690f4
+LibCURL.v8.4.0+0.aarch64-linux-musl.tar.gz/sha512/1af938d244e141c6ad77844c115afbff0ab75145e4e1422cf1444226ec0183a7449040c4a86c3dfb445820377278567427f8df77823a5ae0bede705a9b01335e
+LibCURL.v8.4.0+0.armv6l-linux-gnueabihf.tar.gz/md5/77534d50dbb631146c85b6f2b92c7f84
+LibCURL.v8.4.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/b7a4294ae8f7d24b5dcc10df52d76dd35ca1b4d0dc955307968f6f1a6a2edef194d134bcf6891a960d63e8763b1bc2738786db84393183210730d4d2d486dcdb
+LibCURL.v8.4.0+0.armv6l-linux-musleabihf.tar.gz/md5/35e4dee0f70564d3d16d2dd8ef7c2c5d
+LibCURL.v8.4.0+0.armv6l-linux-musleabihf.tar.gz/sha512/e9292a6dc52be228919a9af2b1e73a6968af843d508ffb216ae956a651822ddc1bcbb9fce495da05a11ffb9211903462f7504aa5da78f19f4db5c63c80fc9baf
+LibCURL.v8.4.0+0.armv7l-linux-gnueabihf.tar.gz/md5/dbb847d7f7162b4a2ea5395eca8d7c30
+LibCURL.v8.4.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/3a2c2779392eb19fe9202397688466fc05813a8e35ea6bf85d628e802759c97fae346939902e897313914ce2b38dcfabe0775be6a4a1b1d4264a327687d6a2f3
+LibCURL.v8.4.0+0.armv7l-linux-musleabihf.tar.gz/md5/f9e9557080e09fda47c92f14d5c89de4
+LibCURL.v8.4.0+0.armv7l-linux-musleabihf.tar.gz/sha512/603821f647d17daa1d731a63c7fff35b756b052ee53b6c86d9f35582356d57914336f8b9bb44567f10b437b2c0c677177959ae691e0244bab1262818eb61aa1e
+LibCURL.v8.4.0+0.i686-linux-gnu.tar.gz/md5/eabf3f3392cf8717782c2cb1c1b71478
+LibCURL.v8.4.0+0.i686-linux-gnu.tar.gz/sha512/9584f28cf8f72a6e15afb96a2c27d82b74a12042311c07aecb61732fd6525227f082d448b81950a67c6cc880b39c0921cf039e97bf0bdd4329e2196de42ec81e
+LibCURL.v8.4.0+0.i686-linux-musl.tar.gz/md5/dceef0a3aa971e3b7e3db44b5de159d1
+LibCURL.v8.4.0+0.i686-linux-musl.tar.gz/sha512/483dfe8a3b4589e59794b3b4efba1e60baf9fb45efad6c0b1b9626ad6e492fd803fda3f99545303a32749010df5b9bb89faf608d57eb4fee4c3f387d0b197a5e
+LibCURL.v8.4.0+0.i686-w64-mingw32.tar.gz/md5/2a0a87e126ebaf7bcaff746e30f6905f
+LibCURL.v8.4.0+0.i686-w64-mingw32.tar.gz/sha512/5a2867e8ac27a82e30bb88ea4c3f52faddfbd0b199f25dbef3a77a47be16a4b9299883fc84d50ce965aa2d01d473310b4cbc28c60dad3602f2ee6b56345ea6a5
+LibCURL.v8.4.0+0.powerpc64le-linux-gnu.tar.gz/md5/1f532601778a4dcf9932681e0e47833e
+LibCURL.v8.4.0+0.powerpc64le-linux-gnu.tar.gz/sha512/6ddf56c44124e682eda9cedacb8893ada1e07b6a2cb10fd301afc04b3d1c981b25129b2495efb4d5e37784ee2bb5e47da13be9c2f367ff397a8ce7122136f6e2
+LibCURL.v8.4.0+0.x86_64-apple-darwin.tar.gz/md5/d7c9ab15d9739df8fb4329c0bb2546f6
+LibCURL.v8.4.0+0.x86_64-apple-darwin.tar.gz/sha512/5d5f17534053504dbc3c0b1251f329358bac60c14b734f6b82b4321f96f944b1733254cfd7489504c41d3d2cda5ae991e6cb4f9fa864990a6660cb4464f14530
+LibCURL.v8.4.0+0.x86_64-linux-gnu.tar.gz/md5/f8697c76204df4ac5a04608b6a480f3b
+LibCURL.v8.4.0+0.x86_64-linux-gnu.tar.gz/sha512/ed583b6abc52f6b5a8ee0c93a4e9b877d2830eb131117ac17da044d2fadb218bc293ec6c625a526583aea01f41994a3f9bb5ed5b4263838bab8c251c672b8160
+LibCURL.v8.4.0+0.x86_64-linux-musl.tar.gz/md5/fcb9be4148376414f94d3ddb54f42d12
+LibCURL.v8.4.0+0.x86_64-linux-musl.tar.gz/sha512/8e53219d68fdb4c412de8a19f53572dbd74dc35bba3c6f3c5aab622f05aa76a28e386201d9dd1c78d37d0ea636bb43ad98d1e19599480ba307cc45098600818a
+LibCURL.v8.4.0+0.x86_64-unknown-freebsd.tar.gz/md5/aa4b18c0b8be4be432e24c9cc88670da
+LibCURL.v8.4.0+0.x86_64-unknown-freebsd.tar.gz/sha512/69548673aec3d199a0514ae0e90d6f8700ace47579631522a93ea351151af258127f3bd1fc82f110f22ac6b7c3eab884665773d99842dfc56fa94040a69f1043
+LibCURL.v8.4.0+0.x86_64-w64-mingw32.tar.gz/md5/b477d32139cef0e810e88ceed68b322c
+LibCURL.v8.4.0+0.x86_64-w64-mingw32.tar.gz/sha512/3035a1e9af9eda70018ef56a5a620d5b07b46f63739e0f2e028518c6144577d51a9f9a9e76ed1bf28ee4a5811ea4f502ddeab46694432e441e0151056d5c7e6d
+curl-8.4.0.tar.bz2/md5/1a61fde1fe5c7db5c29c1196435188a5
+curl-8.4.0.tar.bz2/sha512/27a27c5427acce3501833ac37125daaa03c90165692252115329c820a5a6396cdc79c11d12fe962af37a329549c2051addce3c91e8a8bc3ce3a48cb09d860086
diff --git a/deps/checksums/dsfmt b/deps/checksums/dsfmt
index edadf5c01b1d7..63c7e26f0eb43 100644
--- a/deps/checksums/dsfmt
+++ b/deps/checksums/dsfmt
@@ -1,34 +1,34 @@
-dSFMT.v2.2.4+1.aarch64-apple-darwin.tar.gz/md5/0299af20dae6bed519635900687f4aeb
-dSFMT.v2.2.4+1.aarch64-apple-darwin.tar.gz/sha512/5f20bd7602f09dcb23299d979372453db9a0e76a66129d69cc93c4b45a65ad377486f3cecb7093ff65307f515358420dc318b19eaf5945ff2fbfbe6886e95efa
-dSFMT.v2.2.4+1.aarch64-linux-gnu.tar.gz/md5/78a0fa53ad3db17f2849c744246a6bc6
-dSFMT.v2.2.4+1.aarch64-linux-gnu.tar.gz/sha512/b855bf3349f1ee33978d2c35999fe24a91ee17c5af345881e26351099cea05477528da9df43773d82e20917b4b1dd8c5590eb5ebb99cb5c9c425d03e38192e32
-dSFMT.v2.2.4+1.aarch64-linux-musl.tar.gz/md5/45829fa624e98f806e184cfdbb918a7a
-dSFMT.v2.2.4+1.aarch64-linux-musl.tar.gz/sha512/28823838bba7c1bb40f636835b1f7b15c49c5395a72a1261f3d5eb22c54b487b98bbcd7cd79f12297ffb37c62bfc3b69f0ce9b7036b5c2662d6305497d2e09dc
-dSFMT.v2.2.4+1.armv6l-linux-gnueabihf.tar.gz/md5/fdf55ed8c59b6cc0409fc8154777d57e
-dSFMT.v2.2.4+1.armv6l-linux-gnueabihf.tar.gz/sha512/8249e8a74ea958ebdd1e4e967d1020158c49643bc33855a5f0043b77026c250371d60b95e8370e0f67dbbaa380a7c02fc1c4eff7d49933a5f471393abc2a266f
-dSFMT.v2.2.4+1.armv6l-linux-musleabihf.tar.gz/md5/4814dcf836033065745f3532ceabeb33
-dSFMT.v2.2.4+1.armv6l-linux-musleabihf.tar.gz/sha512/75b8df698762e421cbef208cf7fda556f812f7e9c0481f83ddf38e468459ffa6fbdde86b5942f28f47225c73901c9863246f77eed93abd73052b6d0918717444
-dSFMT.v2.2.4+1.armv7l-linux-gnueabihf.tar.gz/md5/bff0a088b3bdf557dcebc48c2b260bb5
-dSFMT.v2.2.4+1.armv7l-linux-gnueabihf.tar.gz/sha512/609b45b7330bbf6e093fe6277dd14c9e23fd8c8c5e4db6a7275d29c6436f7602cd5069a0912a6ae3fd02d492a25c56400166b25c02b7379eb152eba3aa019dcb
-dSFMT.v2.2.4+1.armv7l-linux-musleabihf.tar.gz/md5/5193c1f1c2d056b3ffd265f4ef18965b
-dSFMT.v2.2.4+1.armv7l-linux-musleabihf.tar.gz/sha512/3e1b0af492a83c076087923d317291fd473670626599d3d03ed86a4515362a24610f3a9b2bd4b71c15bf86b03e44a11fd973f9f16d8b01bfdabbf7ee1ea7f4bb
-dSFMT.v2.2.4+1.i686-linux-gnu.tar.gz/md5/69b959d409030f86eefbe1a0d4196787
-dSFMT.v2.2.4+1.i686-linux-gnu.tar.gz/sha512/0ff871b96031c5f11e5c5fbb4fd35c8bf5e3b1fa5c43dcece275bc847a82b89f0f60db5b273bef2dd31572e89c98694fd1cbc2b442ee3a5fdf3b44e8707ef338
-dSFMT.v2.2.4+1.i686-linux-musl.tar.gz/md5/1fd3b4d5169be306b86cca9dfa6f014c
-dSFMT.v2.2.4+1.i686-linux-musl.tar.gz/sha512/d5e129abf6ff8a1077bb9de27fdc17c131f26d9c3707c189c02649290b50699f26e39230ef875fd172b54b1e28b1b595cbf835c6a8c36e1101951444e174f02a
-dSFMT.v2.2.4+1.i686-w64-mingw32.tar.gz/md5/2a6ea60fe134309ecafb0efd5364b186
-dSFMT.v2.2.4+1.i686-w64-mingw32.tar.gz/sha512/50ffad9c0071746acff16532b71d84d53c6f11039aa10167f49ac9293f4819a905f63f521c93a45daed5068df0fea1699a15a1a1d6c100dce0932cce4165442d
-dSFMT.v2.2.4+1.powerpc64le-linux-gnu.tar.gz/md5/060a4ed22e6e36a661b08c804a7272bd
-dSFMT.v2.2.4+1.powerpc64le-linux-gnu.tar.gz/sha512/f1367b910512b996c4e6bfcb4a99afc3640a4ad5ec8e6a2fc092d677c7eb68527800c4d248188a2cd7a2d427cab472a2fdb48978aeab39805a62f774dc58bb50
-dSFMT.v2.2.4+1.x86_64-apple-darwin.tar.gz/md5/d768332dd9902c4e3869a160fb002af3
-dSFMT.v2.2.4+1.x86_64-apple-darwin.tar.gz/sha512/db3e43ea9b884fb2ddc9585a224d85835ead169f5996ffb20930a8970893f9cbbd8b54832a4fc78745c7dcd7991f973e929965ffded32ae8289c0be68316e60d
-dSFMT.v2.2.4+1.x86_64-linux-gnu.tar.gz/md5/671e5a06c68d23854051c78268bfb9ed
-dSFMT.v2.2.4+1.x86_64-linux-gnu.tar.gz/sha512/0b8eb9e527cea444fdc33a3089684f9b85a8889370fe0b240718d32332523f1175e38a9b51fdabf4a38bad4a820e956baceac253001213b1fc3e7a5eabf8664a
-dSFMT.v2.2.4+1.x86_64-linux-musl.tar.gz/md5/65929d7a40fea8e8783cdeb77205ee06
-dSFMT.v2.2.4+1.x86_64-linux-musl.tar.gz/sha512/ce1b49365b764cf67ef4757f91078ea11afc6e07c4a776258a09f58c9ff84ece440d80714a491c1a21da06ea6a67bd27d2933b862dbfecf3c357f3c32ebb4fc1
-dSFMT.v2.2.4+1.x86_64-unknown-freebsd.tar.gz/md5/e27869ac4f1ea6774ade7d3b53cd301b
-dSFMT.v2.2.4+1.x86_64-unknown-freebsd.tar.gz/sha512/762571a5d5773c2d9780586603859272f48ed67d6c8b09cd95c92fd62dc9bb03c274b12c2c04e05f426c9a42edbbc8e33beba3c79865f2c49459eca2d588b14c
-dSFMT.v2.2.4+1.x86_64-w64-mingw32.tar.gz/md5/74e5c27ba9eb654b4e998ce73719e724
-dSFMT.v2.2.4+1.x86_64-w64-mingw32.tar.gz/sha512/59badcef14b06f14f8f5bce1c72de6750c8310ae18581e24b5d663edefe1bed3d120b4cebb87b53dc664411b62d9802f75aefde4e5236ada1dec740e6ef2445d
+dSFMT.v2.2.4+4.aarch64-apple-darwin.tar.gz/md5/43b52709b7794c92931286174854c886
+dSFMT.v2.2.4+4.aarch64-apple-darwin.tar.gz/sha512/018b67a06cdf42dda2a906025e8a12e026af9b39fe8281890dc90d66a422c3af2a8430d42677f79d123fd0ab0e8d5c37db2e0a00ef03731d35cbb65f9e59b108
+dSFMT.v2.2.4+4.aarch64-linux-gnu.tar.gz/md5/260e14855dbc7773a2ca906d58cc57f2
+dSFMT.v2.2.4+4.aarch64-linux-gnu.tar.gz/sha512/820ca4c6afde931e855b74015150f4ffbb513276c3fa7dbcc1ec8d34c02d4989fb7424a6e4f81f93d054811b5f54f8633d955b05acdb088387ee90f1c3b00915
+dSFMT.v2.2.4+4.aarch64-linux-musl.tar.gz/md5/7ddccbad6b5c9de4be187fe76637a0d8
+dSFMT.v2.2.4+4.aarch64-linux-musl.tar.gz/sha512/e3c225da00927096e3a6cd4abc681fba8f469cb74828e7054d4f5684d71dcb8e75c9a81f14fa10bfbb78f62f9567a31a92edcca8d797e5810a2a44a3fc17bc84
+dSFMT.v2.2.4+4.armv6l-linux-gnueabihf.tar.gz/md5/a70329e0a6c57009c6b6950fd34089f6
+dSFMT.v2.2.4+4.armv6l-linux-gnueabihf.tar.gz/sha512/4418c42165660adc050e872ef834f920c89ed6a0d2b816821672b1e862e947aad7efd023289da9bf05bb2eb9ec4b9d2561c403e2d5384d5314a4ba016b1f9cfc
+dSFMT.v2.2.4+4.armv6l-linux-musleabihf.tar.gz/md5/6ffc798b8a0c847fa5cb93640bd66ab3
+dSFMT.v2.2.4+4.armv6l-linux-musleabihf.tar.gz/sha512/94e5ae07d0b1420abd7290519bce6f77deae634bbb4df31e3f02416bf509e555a9b1c9d19dd77ca76a308c2b86d5c9d4718b9ef83c13167b88a8181d8ca7e73a
+dSFMT.v2.2.4+4.armv7l-linux-gnueabihf.tar.gz/md5/660d95aa08580ca1716a89c4d8b1eb24
+dSFMT.v2.2.4+4.armv7l-linux-gnueabihf.tar.gz/sha512/bc757a9f805047be5375f92c10a3f3eab69345a4ec5cc997f763e66be36144a74d414ff926df8e17b9d5a2394189269c3188c55e0b7c75a72495394d65510cef
+dSFMT.v2.2.4+4.armv7l-linux-musleabihf.tar.gz/md5/78c487049092fe61949d506637c713bb
+dSFMT.v2.2.4+4.armv7l-linux-musleabihf.tar.gz/sha512/03ddada4478f05eab7d2971b2deaf2cba91f084d7ce66fc8219bcb3cf5c308ea13959fed95568ca80f4ce11794e197092984919265716de8f2558e2cb30d94ce
+dSFMT.v2.2.4+4.i686-linux-gnu.tar.gz/md5/b0f535336cca76f1dcdacca29c6f8410
+dSFMT.v2.2.4+4.i686-linux-gnu.tar.gz/sha512/cc03a246b32875037a41a45c1004834abc7c67f90bf17e1b41cc604ee9893147b1ca3978a2e103b94c94ac617380570473de1f66bff15de8e4ee05c5a3c21059
+dSFMT.v2.2.4+4.i686-linux-musl.tar.gz/md5/a61405f72c9a3bba5718f078c68e61a5
+dSFMT.v2.2.4+4.i686-linux-musl.tar.gz/sha512/726f130bbbfd0dece4185b89a25a73f3b5b950ebfb7f86aea6e9cbcf9ae932e591d20b854de0b4985103dbf8b4b7cb3560661c5070af971cd2c1f3ec3e1ea7d2
+dSFMT.v2.2.4+4.i686-w64-mingw32.tar.gz/md5/93670f43a98f7c6045427dc9ddd89a4a
+dSFMT.v2.2.4+4.i686-w64-mingw32.tar.gz/sha512/b76c2be073312ffec8c778b83d3e37b5d0c5dba770ffcc95a6ebfba516a948beb08419428192fcd5dda83357a64e90c4e3a40144688f128133400284a7363b8e
+dSFMT.v2.2.4+4.powerpc64le-linux-gnu.tar.gz/md5/fd8c73961ef7c82201e6d86e8bf4324c
+dSFMT.v2.2.4+4.powerpc64le-linux-gnu.tar.gz/sha512/1bd0ebd019cfc6f25f7ba007547c5ee297854655b93c55e90d8ead420875de5a087e38956693d5e901ff2abf667c72aa66fb34f587b82adf4b91b3d5d666b5c7
+dSFMT.v2.2.4+4.x86_64-apple-darwin.tar.gz/md5/b57ec1491ffdd40c72860b9f1869160c
+dSFMT.v2.2.4+4.x86_64-apple-darwin.tar.gz/sha512/c3a192dbcd3e768712d12d3ac851f46bfa1517eca16c9a187025553076c8fb886b925e4e3f5f20531180f72b73e7eaa5281f54d5b7d4e6a4d53f4542c4bb33b6
+dSFMT.v2.2.4+4.x86_64-linux-gnu.tar.gz/md5/fa671f4ca14b171d53c8866d03f9162a
+dSFMT.v2.2.4+4.x86_64-linux-gnu.tar.gz/sha512/2e242a1448da0508ea88cc1a106f1e74f8d7e7562cd82b80d86abf9a8b454653ad7612e25c30ce00c23757e8a5b7b5736253b00a52f9473af6c5d4df768138f2
+dSFMT.v2.2.4+4.x86_64-linux-musl.tar.gz/md5/c648294163882ec539ab646542c74880
+dSFMT.v2.2.4+4.x86_64-linux-musl.tar.gz/sha512/9e96a47d660854b6517364f0db40a2f4e0e3b814499a0349f7cf550b1c8d04589fca5eb4a75bf34f36d1b5d1b2277b3e9a961c887092abedd08f438e025329e7
+dSFMT.v2.2.4+4.x86_64-unknown-freebsd.tar.gz/md5/b4497d34d72ce134ce110b6185a82393
+dSFMT.v2.2.4+4.x86_64-unknown-freebsd.tar.gz/sha512/23d0fb273edbb5a08920a3683398e11d6f4df137dabcfc5f395a9175ddf14ab8999eb961ae8f4b76715a5a2dd2b77757f752abce35c1f752b800201e93aae874
+dSFMT.v2.2.4+4.x86_64-w64-mingw32.tar.gz/md5/d380963292bc54d27d39a3f94adbd5ac
+dSFMT.v2.2.4+4.x86_64-w64-mingw32.tar.gz/sha512/ef2f99b17b1a36e61fb4d149d8a8fccc9e804b3b727f2426fca917c265c2d7ada4e3abaa5383e25136dec8de262c1d11970a01d7cfb513a55f1d86a23534e864
 dsfmt-2.2.4.tar.gz/md5/ed30e63552d62df48d709dde4f755660
 dsfmt-2.2.4.tar.gz/sha512/fe84e986cbf198172340adfac0436b08f087643eca3f1ceccacde146cbfd8c41e3eb0dfbb062f7ca5f462db13c386abd7c269bc0cbefc9a0ecf97a8a8870a2e4
diff --git a/deps/checksums/gmp b/deps/checksums/gmp
index 0c45aa6a00ca9..c9f6deac6e19b 100644
--- a/deps/checksums/gmp
+++ b/deps/checksums/gmp
@@ -1,60 +1,60 @@
-GMP.v6.2.1+2.aarch64-apple-darwin.tar.gz/md5/37a4c537149a1d6d7424833294e61dac
-GMP.v6.2.1+2.aarch64-apple-darwin.tar.gz/sha512/33dd86279b5b3b08496180c92971c2e7ef84715e9ed3a80071a178ee94de6231ea3cf7b4dd4fa7e0dbd0b386a1a04c4f6b28446e86cb92c100ebb295b2f5ee3a
-GMP.v6.2.1+2.aarch64-linux-gnu-cxx03.tar.gz/md5/44ef76b228cdc4cf54e5d4b40a29034d
-GMP.v6.2.1+2.aarch64-linux-gnu-cxx03.tar.gz/sha512/255a680c75d3e8ca542dffc47050adfce038e25a12a4131c18dc719d36b364c1a6488ee5743d1c5de445b4bc5ccbb932399f7071083d86fe5bd2befc521cfbfd
-GMP.v6.2.1+2.aarch64-linux-gnu-cxx11.tar.gz/md5/0289ffc3621b5d62dc2f9e1b36c41f9f
-GMP.v6.2.1+2.aarch64-linux-gnu-cxx11.tar.gz/sha512/f27b82efb5aa1d7eaaed7574d3312969664eac38f45cf40c6de13ca20b256d45481546fc1a402e6c04bee416c842a092a4e57b8df702bbcdc52f742555d07aa7
-GMP.v6.2.1+2.aarch64-linux-musl-cxx03.tar.gz/md5/9ff4c76804f59056b49a9bf5b6a02099
-GMP.v6.2.1+2.aarch64-linux-musl-cxx03.tar.gz/sha512/d86afa10bdc4e20fa259a17ce7d0a5dca2524b42752bc7d5c33e4323973587d234d4c420900deef34670bfce8ab8c6725e7edb45bfd3896b2644a42ec187dfd7
-GMP.v6.2.1+2.aarch64-linux-musl-cxx11.tar.gz/md5/cc9857a965afcdcbc2b378a368360690
-GMP.v6.2.1+2.aarch64-linux-musl-cxx11.tar.gz/sha512/c46bff9fdcbecc71c12914dadb31ee9fd5b4293cb45bda782200daa18d7f7e8b588e0c0f68a39c2fec7cc3d026bcef3620dae35ae2dd3acf2505dcfc084d11bd
-GMP.v6.2.1+2.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/5b3343367896e31b29571fe0d2b90390
-GMP.v6.2.1+2.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/65a501db63c386727aa336d6dbecdff0417628bc9ff7ac1b2161922246d94f8caa71b63fc3789ec6bb10aff03b96d5d0c22c37c82bd95d74e557df8de7e8a09c
-GMP.v6.2.1+2.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/cc04dda18412fa11f228e66eb5a03aad
-GMP.v6.2.1+2.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/49fdd452fe8f0129ee06795e04a0cc0238132f9d6f60a124dd2c7395fabbb71f005c16d95fdc00d87f8bf82b048cc54e07f162fbc38223c644854cc72c4d26b0
-GMP.v6.2.1+2.armv6l-linux-musleabihf-cxx03.tar.gz/md5/675599595f3dedb8ca11151168da7110
-GMP.v6.2.1+2.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/eedcdc2230fd81d613d54be356679a97b59491f5f9a17c518239b5504c3dd5da15721d553f57ae21f1c55d253e808e7afd1d1651b8c666379c55c7b48f71217e
-GMP.v6.2.1+2.armv6l-linux-musleabihf-cxx11.tar.gz/md5/9a74abbc46439ae8268ca926f0045691
-GMP.v6.2.1+2.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/6329506f7a886d0dd907b051d6cbab1bd0cd21b2d5715f55402bf9ad6cb1ae33e058931bdf6cba17658b0e455f9e4fb7f9aad274755a159106cfe1c4d1ea328a
-GMP.v6.2.1+2.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/8c20e0def927a202f2d23aed78aadb4a
-GMP.v6.2.1+2.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/b7f42efae6fce864c9e07714056444ba74befb9cc9a766ffe14e676240f23f83d3241b1bf3a8f4a282acbdc197287fffb27dadedf3055505ad63bb0b9df573c6
-GMP.v6.2.1+2.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/423a625816b3c52efa6021e76f6009b7
-GMP.v6.2.1+2.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/21cbbfd647d4a7c884344dc66e0fd83d654d22c3338669539e8eab515bdc6bbd772b47f949d28280789e4343e9a8d6319a73dc9e11c23da381b8a452ef7fb098
-GMP.v6.2.1+2.armv7l-linux-musleabihf-cxx03.tar.gz/md5/7d67f981538d7a69ab1e458a54bf56f4
-GMP.v6.2.1+2.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/8aefbcddc326d4ef289dcdba8d3bd56a5f9656a7be30c83b4dbd9a0b8ee26a963c6a2f4294c94b8a8f2f712f1e1c9e17b8b9dcc9967d64294ca466e51656f7c7
-GMP.v6.2.1+2.armv7l-linux-musleabihf-cxx11.tar.gz/md5/ed8713b71636ea75fcc0c9fbc4a8618d
-GMP.v6.2.1+2.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/d7f50d06a256fd9176d5fbf682ff599a5ffba62bb35fb37321ab41e88970921a9d9fa4531bd74e73e471c7e15fcae568d0536d3e32a2b2d7f81dc9cd1f0c039f
-GMP.v6.2.1+2.i686-linux-gnu-cxx03.tar.gz/md5/875f0bc57172788cb80ca2b80ff3065f
-GMP.v6.2.1+2.i686-linux-gnu-cxx03.tar.gz/sha512/808a3c2422b5168260dbf7a3875d5c8151e10b20a8ec87a66bf08f71ad7cf5de20fb7a4f3457c3ab2b4ffc9627764c743baa96f409629c70f2233ea7a5b628b9
-GMP.v6.2.1+2.i686-linux-gnu-cxx11.tar.gz/md5/09ae13f2a6a0dc317d2bca5700d2bf59
-GMP.v6.2.1+2.i686-linux-gnu-cxx11.tar.gz/sha512/9c986e2904247de937e30c05b29e0179986d7747b217468c59bc56af6d4c48d4575f24dace521dc8d66d84230eebd695fe0538972bfd744182ca940a23a9239c
-GMP.v6.2.1+2.i686-linux-musl-cxx03.tar.gz/md5/45f53fd95dd69a6ee6b43463976b5aa6
-GMP.v6.2.1+2.i686-linux-musl-cxx03.tar.gz/sha512/4df57d6c88f0ff86e0ee78da8f6ad02decf7a38884ae8c785c114e0e38e791b733e0d046c90712327c08645dd40b7f0391fcb3258cb3bfb8b6a62c59c27d6e83
-GMP.v6.2.1+2.i686-linux-musl-cxx11.tar.gz/md5/8b15988bfb1ba0543eefab73b3ac3439
-GMP.v6.2.1+2.i686-linux-musl-cxx11.tar.gz/sha512/e32dec7ded9bf6fc26033df83521481dde851c68d7cc45efaabeded7603417cdc5016de45f78a956b69aaed00a55a91aa8b1cd5bbe5431b01074dafce2c47751
-GMP.v6.2.1+2.i686-w64-mingw32-cxx03.tar.gz/md5/4138d0b5185f722aef4e1f215f381275
-GMP.v6.2.1+2.i686-w64-mingw32-cxx03.tar.gz/sha512/255d4ecf178b9440b667c56e542baa4422d731f83a67accd41b76268274c2344fbbf94979fddbbd1f6b5751bac2d228a8ef49a93365de78c1772146edd1b4845
-GMP.v6.2.1+2.i686-w64-mingw32-cxx11.tar.gz/md5/606b4b453af25ded1323aee9e085c132
-GMP.v6.2.1+2.i686-w64-mingw32-cxx11.tar.gz/sha512/8605b764ff6e5d81767432fd8e70c25c5ad76f2cac7c2b3d6ed0596df692300973803487c970a896a0a316d46de3e3cae31b21d4e11fe2961e228cd389da13da
-GMP.v6.2.1+2.powerpc64le-linux-gnu-cxx03.tar.gz/md5/3fbd157df4ae738da6820b26fb75e75e
-GMP.v6.2.1+2.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/6e64c5c4e393c0001bd7085e627126134b5999c2d8df2fa9b72c9f9835d6b0f0ad440a2f58fe6537ec446a517f8df2667881871fce9b4d61c356d2b52080d641
-GMP.v6.2.1+2.powerpc64le-linux-gnu-cxx11.tar.gz/md5/35608e3166278d52a482d7e19313eca6
-GMP.v6.2.1+2.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/a9550fe2b94e0e111a487159c0cd8fb6f1a21b8941ada7bb281572079dbbece921f80b0275bcc8f88117ecc72e7f8e93219350f5444b67295620db1aa9ae947d
-GMP.v6.2.1+2.x86_64-apple-darwin.tar.gz/md5/b5004a436660a2533b94b41c592b686c
-GMP.v6.2.1+2.x86_64-apple-darwin.tar.gz/sha512/b7b4dc8025ce304c5b899084f42c8f5aad5bbe03509bada17dbe6be952f98306729180a22b5d0a095692f349406db0b98f99f5e3f2be5f2165825e6f7f7d1813
-GMP.v6.2.1+2.x86_64-linux-gnu-cxx03.tar.gz/md5/47ba899c9ac714a4594f999d845f45cf
-GMP.v6.2.1+2.x86_64-linux-gnu-cxx03.tar.gz/sha512/99624ec71865d6285ab409ef54f4cf12ba246de6233de56a2fb9f70806574891539efed32e711202003570c157918fde8d53534c695fd5b8476e0d4e0ecd1bd4
-GMP.v6.2.1+2.x86_64-linux-gnu-cxx11.tar.gz/md5/3b0c1258ecafcaf96e549f9b979420ee
-GMP.v6.2.1+2.x86_64-linux-gnu-cxx11.tar.gz/sha512/b94d8f25d23597f96cc0cf0aebd1708755a8714ec4a481108add852b77addc737d3d8feba566ec410db019698ca2de826583b1a6105f0d2188679e7f72331df0
-GMP.v6.2.1+2.x86_64-linux-musl-cxx03.tar.gz/md5/061cfe5f416c1365e98d6b1ed89abd63
-GMP.v6.2.1+2.x86_64-linux-musl-cxx03.tar.gz/sha512/b6847f7ff599fa811851788a6ec6ce69ba02dbb3672d0a64b03b7056b35215536b059287709b3d207bc977094e994a7d744061b7ecf95886510285489bb89578
-GMP.v6.2.1+2.x86_64-linux-musl-cxx11.tar.gz/md5/81911acbc0c3607338c6455b1798cab8
-GMP.v6.2.1+2.x86_64-linux-musl-cxx11.tar.gz/sha512/e007441194abc5c80d9521a17e2ab9e6fb54f319571f4045fec2f7464ffaa99652d3252416c15d110dbf9deaad2c1dc94f81c638e28ce620cf543f554eb7d1e0
-GMP.v6.2.1+2.x86_64-unknown-freebsd.tar.gz/md5/ef7173194848e8d00d73ef05fc520f0e
-GMP.v6.2.1+2.x86_64-unknown-freebsd.tar.gz/sha512/512c3cf8fb951fe0ef7b1715b78202d0bdf5844fe33e16c4674a19e6335440fb5352d7bde71fce83e8e373efe43281d05b160b11657a582a9d3a0201ce97a189
-GMP.v6.2.1+2.x86_64-w64-mingw32-cxx03.tar.gz/md5/882c6749f217f5a691b744ef728ad089
-GMP.v6.2.1+2.x86_64-w64-mingw32-cxx03.tar.gz/sha512/53424ad8a9dcfb8e0e738d4521b2ab1c75aaf54668a54a76b8bcab2404308e69b531dc25b3dc18bc8eaa7ebd9e2914d6624c5d371e6c0ecb9e8d24aa575e99ab
-GMP.v6.2.1+2.x86_64-w64-mingw32-cxx11.tar.gz/md5/bcdd7bcbc69161744397d249a9c82e45
-GMP.v6.2.1+2.x86_64-w64-mingw32-cxx11.tar.gz/sha512/b7f8fb4f5aaf5034d4d2f60e29cc7b5e06c13d4b677af30f30831e1fc95925a575275ebffda36efcc09e29ccd78ba56475c1be3ad0627e28862057764f1ef74e
+GMP.v6.2.1+6.aarch64-apple-darwin.tar.gz/md5/8123f7925ae9aa60b6998313b21a9db9
+GMP.v6.2.1+6.aarch64-apple-darwin.tar.gz/sha512/5c7927ecfd47409dd4116cd4209768294ba229b51472ed220da498823dc1e7f9100292ec4b3a990491acd27f16ce3a3dce7a7c6e20dcd515982a9c8e364d91bc
+GMP.v6.2.1+6.aarch64-linux-gnu-cxx03.tar.gz/md5/0d0d2ee67cff251941e3474341280b34
+GMP.v6.2.1+6.aarch64-linux-gnu-cxx03.tar.gz/sha512/69fb2f1476e0bb73f89ad2f73b58ec4da1b99e099124666e6da93b7705fde23913daa59f2ad479f99fcb4f0df152603bb0ba4875420b583f01fded0fec280a15
+GMP.v6.2.1+6.aarch64-linux-gnu-cxx11.tar.gz/md5/86ba1313c8ab4ca1ae8313cbf96e1e7d
+GMP.v6.2.1+6.aarch64-linux-gnu-cxx11.tar.gz/sha512/05c306c01d1b0e9e4dc7ce937075eeaede4e5e0791826a8892fae2eb73cdb7f22c4873cf31cea3cfe3db996ac77387346f4f8a851ce52c29883146678f3851fd
+GMP.v6.2.1+6.aarch64-linux-musl-cxx03.tar.gz/md5/2fbbb9adee7db794f5888442b7b7688c
+GMP.v6.2.1+6.aarch64-linux-musl-cxx03.tar.gz/sha512/d8a1719e529374d00ba6372013d0c7ddc9f44f9f6ee0f966b4ed16d731ce74c26b6e6a807403b3396bed67dd3e775e18c1e70c247a371d622a6e7013eb6b8905
+GMP.v6.2.1+6.aarch64-linux-musl-cxx11.tar.gz/md5/b6a8c494d4c90decb6eacbca3ce3f22a
+GMP.v6.2.1+6.aarch64-linux-musl-cxx11.tar.gz/sha512/6798406e20cc4d58647c266a2b1b8d0670e62f19bf4bff991c39eef13cf92c043f00717e7289bcc00007d7e248e943b37ba2eef89c9e68c42e30f0e2be9dd589
+GMP.v6.2.1+6.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/a6866ee9784e9359e32dc18f417b2be7
+GMP.v6.2.1+6.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/548953ccc8444886316d4dfd7081783e397ec180e88a1d17a464e4b1d0a27f51ee7f6a1936ddab499db192d3cdfdc87d572731c5ab2f87d528609dabfccad2d3
+GMP.v6.2.1+6.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/6b78c826a4aedc8107c1bbfccbe5c097
+GMP.v6.2.1+6.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/e8c075c29e4d8a916f087faeb2db50168e1a5546fcb02fc841477cf82a39188c3b9e7703b5354d4842880b5ac7215a32d022abe08aacc5e23238b63c6b994af4
+GMP.v6.2.1+6.armv6l-linux-musleabihf-cxx03.tar.gz/md5/57e1a6c71b3c5b4047bf08bfc4d4f22d
+GMP.v6.2.1+6.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/0f72c675ab3005ea183393bc4e5b4a157c13042367fd1bb3b03b3f7742e09604bddffb89f1478dc0dab4d992939519578549d05f9885b89319b0b51678b8a619
+GMP.v6.2.1+6.armv6l-linux-musleabihf-cxx11.tar.gz/md5/65a13f49cbdaa9d3a8e20d0b84bbc701
+GMP.v6.2.1+6.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/0487b18d1c9c59d990e6c4ec435b8dff91ae02d5d56c665b12aaaea105f7d2ab5beae9dfcbb133c990f70774b0d32e55df7f2e91e2d0a85c391a4090dcadf080
+GMP.v6.2.1+6.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/30e20c183153f8ce60e564b35e4b54bd
+GMP.v6.2.1+6.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/41bdabc2610b46b215043e98eaddb2e2ad0695ae15f3088c9beef24a97864dce4088ae68993de928d952baaf123f279d74705664fffbf96be9b7436f1ba7692b
+GMP.v6.2.1+6.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/5f2cba31677e6681666c0b6ebd33c3ad
+GMP.v6.2.1+6.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/a89399bf84bebf4b8432e48aae6dce5547bb6f1c048364697c577541c4f1a555b976370634624e9cf039fcbcb70e449a2f55563f0a4f48e60ee4653a185cf7dd
+GMP.v6.2.1+6.armv7l-linux-musleabihf-cxx03.tar.gz/md5/4a682d832109d7ab5743832f73ca33d2
+GMP.v6.2.1+6.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/d5062bd8eee926eb1177e70e5d9e8d6ed7a00a17c25d2b165b974c01aa79d45ca97e219b26ded752b5f323546192d595b838b474c61bdd87e641549db9e9ef5d
+GMP.v6.2.1+6.armv7l-linux-musleabihf-cxx11.tar.gz/md5/caa51529cb1b6dc8db765e202e1b7737
+GMP.v6.2.1+6.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/d11ae870e68ca8d28bbcdf799a04769c3df2fbd169f6f2b16d88a556c40866b39636820ac3497e869086e638ba31dc1c87ec780add2d1aafe5e4ca178641678e
+GMP.v6.2.1+6.i686-linux-gnu-cxx03.tar.gz/md5/dfcb024b9cfba37f80da5b7cc0c5b1ad
+GMP.v6.2.1+6.i686-linux-gnu-cxx03.tar.gz/sha512/10eb086228b4250ecce11ad5bbec15e2bfff2429530cfd700602ead7f108163bc48fc83d9714443cbf5a93e7dd5f9046bdc15ef324486475f6b4be1cf34bad4b
+GMP.v6.2.1+6.i686-linux-gnu-cxx11.tar.gz/md5/e889c1d65c9ca710c859129ae99ef322
+GMP.v6.2.1+6.i686-linux-gnu-cxx11.tar.gz/sha512/4d97ebdd6a12d39907ccc9bad00266e286c949b3f99a306c1c4a4380a292694d944f275c351d9ddf465d020c8197b3b19dfccb5080249c75e3f5ffb9aa77a1c4
+GMP.v6.2.1+6.i686-linux-musl-cxx03.tar.gz/md5/d57b3948e7a120bafeae67c28fe40869
+GMP.v6.2.1+6.i686-linux-musl-cxx03.tar.gz/sha512/88165c809a73007d2b5e750d23c619fbb088f6de200aae1dee34b5e3783949150d91b94774cd1881d2a621d092c0e7e7332707ed4737ff8426686dfce7e0313a
+GMP.v6.2.1+6.i686-linux-musl-cxx11.tar.gz/md5/e3c53fc468a9f48f9d06fdf51eafae62
+GMP.v6.2.1+6.i686-linux-musl-cxx11.tar.gz/sha512/3c6a99acd84c226d7a48177c8e18624a677ea2a3df15fb2d54002eb5a6d55144b6f51f82ff491373366f32e92252fd14747503166621c2d2359029bdb1b20741
+GMP.v6.2.1+6.i686-w64-mingw32-cxx03.tar.gz/md5/64b9bed188f9a300200659efdb9facef
+GMP.v6.2.1+6.i686-w64-mingw32-cxx03.tar.gz/sha512/f7ed47cc29be31f99e612abd1db0d806ece84c117677cd639e04e2f6b08bbbfa4056ed9504bb073ec5f722de6955db668934f3d3ca05ddde0f22b096afcea2e3
+GMP.v6.2.1+6.i686-w64-mingw32-cxx11.tar.gz/md5/a8f38cefb46dc9c3faddfd597d0e1a4c
+GMP.v6.2.1+6.i686-w64-mingw32-cxx11.tar.gz/sha512/f02c3458c05869fab493d9be5ea98390baf6eed136fe2916cd6214c4f24a6f22d0716d59f352454fd4c799df71a8fd90e3a169644e1c6ffe89f3620f2a52f158
+GMP.v6.2.1+6.powerpc64le-linux-gnu-cxx03.tar.gz/md5/7f8da2b7e16ef4cb593fea4bdb2e43eb
+GMP.v6.2.1+6.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/d0105fe7dfcc1daf7024d2f58b53240bab473c3ae44a904833d009beeb8e41f5487430f68e79bd79fc5c74b55f1111eb7479fedc84bcb45fe4dff3d8c3ac3e4f
+GMP.v6.2.1+6.powerpc64le-linux-gnu-cxx11.tar.gz/md5/31fb7b6e37c650f0b8c3a2d475cb2b5b
+GMP.v6.2.1+6.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/d03f3f1303996008ff267682de5b9d6e3be78ca1b0d6aa7cadbf4a612b331fe70460b689125f4ededa1c6078092ad3dafaad32c68a98d31713764a7a7461cf98
+GMP.v6.2.1+6.x86_64-apple-darwin.tar.gz/md5/9276d90b4f850f167f673f731c7d3781
+GMP.v6.2.1+6.x86_64-apple-darwin.tar.gz/sha512/f914452a49988b0694915483547c2f878c0ba71be2079fd1228b3e583cb08e92d8c958a052f29025054ded74cacb699893a5a6ef27749c851e83607ad3f1fe8f
+GMP.v6.2.1+6.x86_64-linux-gnu-cxx03.tar.gz/md5/cded149fcef93ab1ba89c51d7cc58b73
+GMP.v6.2.1+6.x86_64-linux-gnu-cxx03.tar.gz/sha512/8f97582d6323df6f86e3079b9a2534425bd4e64bb4cec337c21059605d50c1220fd006e55bdb34e8aa7195cd79ef518f1541c1b1a92187ed928f7939b3128dd6
+GMP.v6.2.1+6.x86_64-linux-gnu-cxx11.tar.gz/md5/0529bb60dcf584222cd91e9e11510f24
+GMP.v6.2.1+6.x86_64-linux-gnu-cxx11.tar.gz/sha512/0532821e81a4e51363570f87ec59c37dea24cab59a94e43127837ce4b388d1951853d50e52d4c9f30b4a21cfe222e368207239ce8ac0f1ee1e9375f51fb10127
+GMP.v6.2.1+6.x86_64-linux-musl-cxx03.tar.gz/md5/2d332d096da5515581ee92128aff88ab
+GMP.v6.2.1+6.x86_64-linux-musl-cxx03.tar.gz/sha512/b17f7b762bd4d61fa4c4be8124275c2b337383da167bdeaca34e44d71f20716b182b46bc5a6714a798a0951d73b335ab9c87f451cf4c5456edbe76cf3ad36ba4
+GMP.v6.2.1+6.x86_64-linux-musl-cxx11.tar.gz/md5/a9dae953f9d59589162a3ea149c46d1e
+GMP.v6.2.1+6.x86_64-linux-musl-cxx11.tar.gz/sha512/31e568aba38a29ec6713dda9eb1c7d7b50c2a736e8883ae8ff2eaf16840b15c93e6dc53025e7750d3ac3e4ffc7d2c91787bda5b799ecfdeea3d928657176b1b3
+GMP.v6.2.1+6.x86_64-unknown-freebsd.tar.gz/md5/6f42d7486fa85ce1bf0cac409d1dd5ae
+GMP.v6.2.1+6.x86_64-unknown-freebsd.tar.gz/sha512/5111751619388e51d1b3c0e32548a6de0aa02b7967994a4b4b78cdc9e0e852dae9d78bf48a503a6fb67e3b08343ddcf5a9f0b7a64a803c4d5067d69e4cb2edee
+GMP.v6.2.1+6.x86_64-w64-mingw32-cxx03.tar.gz/md5/39cca70db2d23bc73a47870a0ee5156c
+GMP.v6.2.1+6.x86_64-w64-mingw32-cxx03.tar.gz/sha512/a2877a6641e4cccd39e7ef093dd9ba7501c6e312f160b2924880d129195aadb74badfbf198fd6ee11035a6a7c99d64c0965c44526104a43569ca0d97fa565b5a
+GMP.v6.2.1+6.x86_64-w64-mingw32-cxx11.tar.gz/md5/e2e03ed150558405ca1993ca14488662
+GMP.v6.2.1+6.x86_64-w64-mingw32-cxx11.tar.gz/sha512/50995f6382ed2a4c425097e7abf762b847872734c104847f6a042090be132c68e864d34bb24baf64832d3636810cb631464767949eb2df2fedaa7ccd9824f78b
 gmp-6.2.1.tar.bz2/md5/28971fc21cf028042d4897f02fd355ea
 gmp-6.2.1.tar.bz2/sha512/8904334a3bcc5c896ececabc75cda9dec642e401fb5397c4992c4fabea5e962c9ce8bd44e8e4233c34e55c8010cc28db0545f5f750cbdbb5f00af538dc763be9
diff --git a/deps/checksums/ittapi b/deps/checksums/ittapi
new file mode 100644
index 0000000000000..896e44d8f2907
--- /dev/null
+++ b/deps/checksums/ittapi
@@ -0,0 +1,2 @@
+ittapi-0014aec56fea2f30c1374f40861e1bccdd53d0cb.tar.gz/md5/932501cdb0e1c7841e23c12da7740419
+ittapi-0014aec56fea2f30c1374f40861e1bccdd53d0cb.tar.gz/sha512/4dd3343837398ada0cdcdaaff630d8d91738d166897d86b77770facde30da99dbb90931b58a4a887399e6bc9a7a1c245057d0a0f63762230d577d71da871701f
diff --git a/deps/checksums/libgit2 b/deps/checksums/libgit2
index 736ed68142baf..63d67671b12a4 100644
--- a/deps/checksums/libgit2
+++ b/deps/checksums/libgit2
@@ -1,34 +1,34 @@
-LibGit2.v1.4.3+2.aarch64-apple-darwin.tar.gz/md5/df6f108f17778bafe0dec2db18a4a312
-LibGit2.v1.4.3+2.aarch64-apple-darwin.tar.gz/sha512/d9ad1f441fd705b8c91a9fbfd39e97a1fe84753a0435af67e19344476c390dd301ed53a138867cc61552a9d4f26e6bac9ddf5284354c5631312893eb828c0b27
-LibGit2.v1.4.3+2.aarch64-linux-gnu.tar.gz/md5/044f354966ea77f380eef9ec8093b13e
-LibGit2.v1.4.3+2.aarch64-linux-gnu.tar.gz/sha512/e6feef32df7a5ffff48a34afbba5efb02452406de756a411c5675de9434641f4678ba7a0bec849d0f74de0df089a9a3769eb4ce466570b976442ae217ea62509
-LibGit2.v1.4.3+2.aarch64-linux-musl.tar.gz/md5/d985b03be81552fff6f5d451319f6a23
-LibGit2.v1.4.3+2.aarch64-linux-musl.tar.gz/sha512/0d606b358010839c9ee1a25c557c347f6532f6cafad66b0ce8d28945d2d6c84745b245193765a168e3b0aec93fbd7f3bc1c80afffdc96fb18fbf27c8b340ae8c
-LibGit2.v1.4.3+2.armv6l-linux-gnueabihf.tar.gz/md5/b1c839415fcceb2b0c3c75606cbf3494
-LibGit2.v1.4.3+2.armv6l-linux-gnueabihf.tar.gz/sha512/92ffd4ad4ab754ddab7be786374a54fed97694714ac99cf93372829821540f78eaa071c974efddbb3bdb9ad7824a95a6b935bb19c222f402a407b7a36e162b94
-LibGit2.v1.4.3+2.armv6l-linux-musleabihf.tar.gz/md5/3cada4ec0a62e441169b0247f1b86daf
-LibGit2.v1.4.3+2.armv6l-linux-musleabihf.tar.gz/sha512/8ced3cad5b25370348ed68b932f03e0b67974d7a3fa973a954247cd783e9e2647adb4ced66e8dccd3918429dc5df0afbde2a28c979a5c6fe7b5d0b103f88ddb5
-LibGit2.v1.4.3+2.armv7l-linux-gnueabihf.tar.gz/md5/dc4b8c69e534beae8a7b31a990cecda7
-LibGit2.v1.4.3+2.armv7l-linux-gnueabihf.tar.gz/sha512/05327ba85893ff3aa478c35ea3e12ceddbca7e53d5714474bec640c9d613e362975e89569aa84cc713facdae90a0292b144fbdfd1a4c8a1f21ab6916b467e0a8
-LibGit2.v1.4.3+2.armv7l-linux-musleabihf.tar.gz/md5/8f9defcc523bf0a6ae6b1623e250fc8e
-LibGit2.v1.4.3+2.armv7l-linux-musleabihf.tar.gz/sha512/2770b6f969d23389724a2f4b14671fa1dcd4b344abd2a7c2a5c5bef7ffd06a95f262066d2541c1df39f1394efa66a1bef07e5a031f05b12397c997ce9d71d17d
-LibGit2.v1.4.3+2.i686-linux-gnu.tar.gz/md5/a014ce0eefc4371e77cec90ee073c78e
-LibGit2.v1.4.3+2.i686-linux-gnu.tar.gz/sha512/d762404b6554078af5e681a4b766d9586e6b1f40c1f297ec1f7a1f70b00a426dc6429ef781581c757754ee31f14b662a60d7b40fefc1106ff7dc79aeb734a2fd
-LibGit2.v1.4.3+2.i686-linux-musl.tar.gz/md5/ceb843d699ed19384c6a11c0cbf37ce5
-LibGit2.v1.4.3+2.i686-linux-musl.tar.gz/sha512/ba169256ae760543a5513d06f260a00c27e2907c72d545e74af10341e29a8376dc980d6b19603b8d73354df07a7e8c58fd9473513f93f742a77bcf863519570e
-LibGit2.v1.4.3+2.i686-w64-mingw32.tar.gz/md5/98fecb082adac2b6dcaa992c018f6def
-LibGit2.v1.4.3+2.i686-w64-mingw32.tar.gz/sha512/566fdd275e01f3756134d998879a8fba15ac779505f4e7524ea3928dbb52d2212579de2896659e497c56292d69f2f3f661c712ed483f09835b80854472c713df
-LibGit2.v1.4.3+2.powerpc64le-linux-gnu.tar.gz/md5/324fd370a11e082b5c1e61c9be2fbd01
-LibGit2.v1.4.3+2.powerpc64le-linux-gnu.tar.gz/sha512/983bbb9b0922da4120cf61ed62e310ba6b5bdf42c734632e0cb531fd2053ba6c90a5afcbe9c94568a14122ef0a1271e6c654236df903e9fc769e6a65be0ce6a0
-LibGit2.v1.4.3+2.x86_64-apple-darwin.tar.gz/md5/6abf91ca41140499ab280fcea01303e4
-LibGit2.v1.4.3+2.x86_64-apple-darwin.tar.gz/sha512/66cba364b542df5f443b6761cc037704cb1e99b883285fe0af17bed644e310b6cfb6ac09a4f7119f9baa5d96b79d2a365fa9a572b40b01210ad325bf1cdcc025
-LibGit2.v1.4.3+2.x86_64-linux-gnu.tar.gz/md5/ae8d8f3e916dd528b3f4368bf4a51ac4
-LibGit2.v1.4.3+2.x86_64-linux-gnu.tar.gz/sha512/f2235440df3ef9162b14de3d6ff06a7122e2884ef6b81f8493a475d2814dc7b41ec322f18ab11c8d04fccc7028f48b9bf7febf3b75141a43a77c57df25233887
-LibGit2.v1.4.3+2.x86_64-linux-musl.tar.gz/md5/98110121f786e127adef201b21e3a4f6
-LibGit2.v1.4.3+2.x86_64-linux-musl.tar.gz/sha512/d248d5a1691deb38752a71f768724a31527c2594cd9175411f7d3f5ba6e4248ecb3207859004316993a75668c7d9c35615a3e4578e874745d37cc33a66dddbdf
-LibGit2.v1.4.3+2.x86_64-unknown-freebsd.tar.gz/md5/985c14f55e2f9d7c2a00543f97e0195b
-LibGit2.v1.4.3+2.x86_64-unknown-freebsd.tar.gz/sha512/a7fd6adae3386ccf409f43c756fe806a1c31f75762e9c422dcc4a6a5ce237a8efa0e7606c88c3f6f684b795e81cd2d58c638043cb3bc9cfac37e29279c5d1705
-LibGit2.v1.4.3+2.x86_64-w64-mingw32.tar.gz/md5/d4fba0b0ccefb72b3e78f49a366e7170
-LibGit2.v1.4.3+2.x86_64-w64-mingw32.tar.gz/sha512/a0e6cd5ca6b6635f46aa9f565b75b45828dc2d1a7a0f4f00654f41bf293c67f66f213c0854a3ebe0d1f93d114cb26313dbf178ca6353ba2a441b6bf3ab0ca36f
-libgit2-465bbf88ea939a965fbcbade72870c61f815e457.tar.gz/md5/b91c544293f15b00acc04315eb38c2b5
-libgit2-465bbf88ea939a965fbcbade72870c61f815e457.tar.gz/sha512/ac1d47e6308ad7a7620b683fd56568390be49cd8120b475fd6617aed8e7635036fce7e99a50f2611d0adeff28082aa673292475c1782f4e9dec9fa7dde8f1e77
+LibGit2.v1.7.1+0.aarch64-apple-darwin.tar.gz/md5/80102fd8cd633a4875a1257bd61d4e17
+LibGit2.v1.7.1+0.aarch64-apple-darwin.tar.gz/sha512/3cc3679923c36e0020e692e79112a8fa71b53c1b83c9bea8d6defda124722a67c2859089d36fddef7be4547539575483db32de8137b43f7fc97843e579a02696
+LibGit2.v1.7.1+0.aarch64-linux-gnu.tar.gz/md5/74be95a3f7886a9804964f024df5311f
+LibGit2.v1.7.1+0.aarch64-linux-gnu.tar.gz/sha512/3ad8a3c9ced9be2ab5fefe651f445a26900beae743127dcd1f887d01a7672d5d6c523641ba7d402620f3c44a1cc9557e43e11ad1692726c8cfabecca59a030e9
+LibGit2.v1.7.1+0.aarch64-linux-musl.tar.gz/md5/e63f4351250b4f4ac60d66b0bed2ddf5
+LibGit2.v1.7.1+0.aarch64-linux-musl.tar.gz/sha512/8f2dd17fe55b7cf7cf60504e5b630b22ce27e4e89d75f7e93dba3b112f662470612987e09abd82c2e3df48fc3c0fe1dbf98c690d972edb50c10a5571741cd9e8
+LibGit2.v1.7.1+0.armv6l-linux-gnueabihf.tar.gz/md5/f06611068a36fa575ec8eb219c068723
+LibGit2.v1.7.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/04c554b9617300cea7750d18590e1a5629e70274ef1e1e8fdabbb4347e46fd8a68e82ba21714d7cd3809c3b6de2e254baca35ff60a4be87485643c460b10ac73
+LibGit2.v1.7.1+0.armv6l-linux-musleabihf.tar.gz/md5/7135ca6e52bf63855c5b6aa45d59ad80
+LibGit2.v1.7.1+0.armv6l-linux-musleabihf.tar.gz/sha512/e542180d2d8a0896ec586edac03b91d48d2ece3d22220d09b6e717b1b95a38bc1de2ae0faeed39dd3e99150684441bfb0504b55b3e18e543e00561f91147d405
+LibGit2.v1.7.1+0.armv7l-linux-gnueabihf.tar.gz/md5/7ffc92c821ec99bd76865ece43f5face
+LibGit2.v1.7.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/c945a0895be07479fd94c3c127060e58866bc9b9c93e80dc923ecdda6689e43d566896ecf4bfc7d85ca710b9ee51e6d85dec423edc48a3a2066db0fbb118d073
+LibGit2.v1.7.1+0.armv7l-linux-musleabihf.tar.gz/md5/3d00a6223612c23ac6b3c1f44bff8119
+LibGit2.v1.7.1+0.armv7l-linux-musleabihf.tar.gz/sha512/c3ef7783f140b28ad2d10e1c16f5be683d3332a4f9db5d26fdf3f2ac2b750aa0ceaec928740a9bdf7f4d9e83f666aa6e5fdd9c019630bf46f6797000943e1510
+LibGit2.v1.7.1+0.i686-linux-gnu.tar.gz/md5/6ea4e6777f5a8630f9fa98fb6a4a4ac7
+LibGit2.v1.7.1+0.i686-linux-gnu.tar.gz/sha512/d62a46b54dfc491a88fa35d06f3ed9c76ce676473b33acd5382e72ce07e6a313505755476c4732b7a22cd774ddcdf4ea5e8a5b62b93eb48b67363911655ac177
+LibGit2.v1.7.1+0.i686-linux-musl.tar.gz/md5/9f74dc4e93886f011412a4f61dfb487f
+LibGit2.v1.7.1+0.i686-linux-musl.tar.gz/sha512/1370cab2ef917aa759dd4986f247a6b4878f12c4b17399fa75c9a2878f86c136e6f2f998a396df0757bf36ac09d5d194e4b7688705d115f09c176f4a5ab22347
+LibGit2.v1.7.1+0.i686-w64-mingw32.tar.gz/md5/ce866e600b2ad8c0fd54ff8c57dc015c
+LibGit2.v1.7.1+0.i686-w64-mingw32.tar.gz/sha512/c7848b39f3515452e13fb156ee645f9a8d3917374ba874b10437b417b3c8e9a108e014b3baf30c7ced5fd0034d4f37de7e7d76fb105358d8e953dca30c873dc6
+LibGit2.v1.7.1+0.powerpc64le-linux-gnu.tar.gz/md5/b7c2f120e33f499860cb1e096923e7fe
+LibGit2.v1.7.1+0.powerpc64le-linux-gnu.tar.gz/sha512/3df8e54d2086fbedb55b5dc31a2010f2ecd277089293473607e780340882bda5b2f9a2cc1c53c88bd7fcca0791cc2530645ceda17de3f37bb1ff98a19ccb85cf
+LibGit2.v1.7.1+0.x86_64-apple-darwin.tar.gz/md5/b5334bd7e44c2c28705bb816fe03b9b7
+LibGit2.v1.7.1+0.x86_64-apple-darwin.tar.gz/sha512/d91cfde393499687cc699d55184c58ee5f543108902bf1f08fde2270dec0f38e0d70cbc7af04ffe46952afad12ce008e745f4aae9084f23df58982c14b48117c
+LibGit2.v1.7.1+0.x86_64-linux-gnu.tar.gz/md5/9e2e2fe324a40bb0a5364d218c5ce45e
+LibGit2.v1.7.1+0.x86_64-linux-gnu.tar.gz/sha512/da7e28c20c09c5c0731fd5cdff6fa6c319b2c4757d5c4228fc287238cd649f98c689814480119f21cbb938a29f52c895021b44c74eccc2f93ae51766555d9b6a
+LibGit2.v1.7.1+0.x86_64-linux-musl.tar.gz/md5/7147480b9520116eb63ee3c30fa60a21
+LibGit2.v1.7.1+0.x86_64-linux-musl.tar.gz/sha512/f3dfb2a416cb786f229fe9eb3ef653a30ba5ebf3b978475f0a10fa79fa68b7bce9b6d99aed19f8dfb5599d988e3c6d4ede9ef1a6ccdbb3c2ea61f76b97d7fb29
+LibGit2.v1.7.1+0.x86_64-unknown-freebsd.tar.gz/md5/39e1a6d463e52ca0b2a1a8e6c3c4a286
+LibGit2.v1.7.1+0.x86_64-unknown-freebsd.tar.gz/sha512/3978ba9923cc8a188aca36d7320d46b2788de27142d11920976c47ad43574ad7056539812cebab62550e656b263c2d277754c341bd83d013de608a91e6a0aad3
+LibGit2.v1.7.1+0.x86_64-w64-mingw32.tar.gz/md5/7d92c546023f460741a8187999b76bbe
+LibGit2.v1.7.1+0.x86_64-w64-mingw32.tar.gz/sha512/da00d54f969ce3b70cc95dda281ddfafee72073164c31d7999053ed704a59401d64894ad702306d6e19eb1a60e5e98e5960c9c7e9a0e1645a0f3048422e62eb9
+libgit2-e6325351ceee58cf56f58bdce61b38907805544f.tar.gz/md5/08777cc257825f218ceac1a24abafdc9
+libgit2-e6325351ceee58cf56f58bdce61b38907805544f.tar.gz/sha512/ebeaf3bb12ce7d58cd6d36e0123168de3af8f083f707dc20df9781537e38188a176667ac51daf8d9006d54f2beed13fbfff6c26fbb48e3228988578ef8fbc9b7
diff --git a/deps/checksums/libssh2 b/deps/checksums/libssh2
index f8c1fc5da8d37..697601a037132 100644
--- a/deps/checksums/libssh2
+++ b/deps/checksums/libssh2
@@ -1,34 +1,34 @@
-LibSSH2.v1.10.2+0.aarch64-apple-darwin.tar.gz/md5/b692a93b8f7e45edd5c5d397cd1d3725
-LibSSH2.v1.10.2+0.aarch64-apple-darwin.tar.gz/sha512/8863fb372e3bccb9d6ff7f33494754b1391f0081426d1a42a3f3da69ced9d1b6246b7aa84269b7ec2844c27991d5998a6c58561b277f86daa96b577dec57b514
-LibSSH2.v1.10.2+0.aarch64-linux-gnu.tar.gz/md5/ea2e202282947c4900d656c83ba30953
-LibSSH2.v1.10.2+0.aarch64-linux-gnu.tar.gz/sha512/1c3a035a2e711ad013acb460293e929d18b58d345f84f4a7cda93510dca5e46d466f08b2b96e5742c16c509dc6ed6b6e2b13399bbd1c48340326e3e6d73f9322
-LibSSH2.v1.10.2+0.aarch64-linux-musl.tar.gz/md5/9dd3b1813cd1cc6246b31c5bd2df538b
-LibSSH2.v1.10.2+0.aarch64-linux-musl.tar.gz/sha512/075d681235961838e2bb14f2034daa65909a40972cf638b44646464f66973d139de9af9d653073c24510cd40e5068d3a41c09f6ff12835a8278259d8530a6720
-LibSSH2.v1.10.2+0.armv6l-linux-gnueabihf.tar.gz/md5/17a33524ad9e6dfcf239b076803e3c84
-LibSSH2.v1.10.2+0.armv6l-linux-gnueabihf.tar.gz/sha512/09ef31e3a6ee1e6055c6cf5c0f45fc2704701bdeb565b82a1896e8c83cc34c9dcf1b2e761d1c78b5d513291802a02ef3567a8a6d06d9d7ad946f4233e973c5c9
-LibSSH2.v1.10.2+0.armv6l-linux-musleabihf.tar.gz/md5/18db6a0698da98ec8247ab0f86e9a2e9
-LibSSH2.v1.10.2+0.armv6l-linux-musleabihf.tar.gz/sha512/11657f0b411ee81d84d3c02befd34d53d25da0485214f82e6ac9601fd065127e01ac74b592d328481a0ed7d04c231b37f4fec773e06d1c1f5186f6eb7eae57ce
-LibSSH2.v1.10.2+0.armv7l-linux-gnueabihf.tar.gz/md5/0a49a14c15176f32867f49a6e487de77
-LibSSH2.v1.10.2+0.armv7l-linux-gnueabihf.tar.gz/sha512/aa81a8504529b90e6e212f1495e8732118f5d0eececd4a809cecdeb88b97f0ca4a1081e669660ea8513b15f71299854da9eb5f8352f099796df4fde33f89072d
-LibSSH2.v1.10.2+0.armv7l-linux-musleabihf.tar.gz/md5/0bb9d7c5c40d88e19a2d9f3178c4de64
-LibSSH2.v1.10.2+0.armv7l-linux-musleabihf.tar.gz/sha512/cf361ac498daa4c3a0b044171165756e54402f70c75fd5d877e6a24db9b6930c678c3f45c16850b9138e8f97cab9f1cb4ba82262e48fad269a36fc556215899d
-LibSSH2.v1.10.2+0.i686-linux-gnu.tar.gz/md5/651955e1c129d88d9dd0e9b048505db7
-LibSSH2.v1.10.2+0.i686-linux-gnu.tar.gz/sha512/b48d3e5eb36d4e0ef36bc512f1fe65a85e0ddf16dab8da8190f642d2460b6ab94677838f263ad280f37a8bd838c4c8283a3cc706247d4241d8760fde797fc163
-LibSSH2.v1.10.2+0.i686-linux-musl.tar.gz/md5/74b4741009fea8bb10ab3f6a44937fb1
-LibSSH2.v1.10.2+0.i686-linux-musl.tar.gz/sha512/13b64fd49d6a6b80dede0c049db871c5b22fec2f093adbe6699f2e467f7458684cd38093230c5d2fc837e500c8d1e86cc2966b9805a2ed7a705d50108a95803f
-LibSSH2.v1.10.2+0.i686-w64-mingw32.tar.gz/md5/20c1c7d0825cba67a0bbfa822348a245
-LibSSH2.v1.10.2+0.i686-w64-mingw32.tar.gz/sha512/2ac02ff310911998c07861493b699837168e43e40172372c33fc7769ff6eae2f2f2c65b10319c2f10316d34c519ec2fc5308b327b62caeb415ac7c5c692fa81d
-LibSSH2.v1.10.2+0.powerpc64le-linux-gnu.tar.gz/md5/6d180f087e415adbafa516c471315ce2
-LibSSH2.v1.10.2+0.powerpc64le-linux-gnu.tar.gz/sha512/23ea211b5d1219454c14316e59cb94195195abebd9a4e7a6812c4d824abcac7c5b896c460c2dae3511abaae7e0afb5ead40a5836e5d94ec0c3a2b8076dd29e3e
-LibSSH2.v1.10.2+0.x86_64-apple-darwin.tar.gz/md5/dff956043faefa3396fc31bddbf83b1e
-LibSSH2.v1.10.2+0.x86_64-apple-darwin.tar.gz/sha512/00aa279251a04684d968e413bd9f652c6740bf4a6e860ba9b999c8584561499f1f589ca2eb3f06a01c539a952fffb41787a37a6e514d689b97693a5a7bf4c18f
-LibSSH2.v1.10.2+0.x86_64-linux-gnu.tar.gz/md5/a50d390c1a8ea77d7d78c07a759fa79e
-LibSSH2.v1.10.2+0.x86_64-linux-gnu.tar.gz/sha512/c985018dbc79c9d41ca3df940fcda15d7f57f0a4e869268ab7c255b4fbc7aa9bd5088281258de22523c777bc9210ce3c9e1f0f76693c0575506ea840619c0306
-LibSSH2.v1.10.2+0.x86_64-linux-musl.tar.gz/md5/a8b647a12439a7ec9b2d5111a4fd605c
-LibSSH2.v1.10.2+0.x86_64-linux-musl.tar.gz/sha512/7790bf3b88513a026f3b58090c5b39b0544d873f7bee4f4c06fb23f513954f580ff2d5d552f15f8b725fd3264585390c33283906f1844cf20ce6d2eee54495a7
-LibSSH2.v1.10.2+0.x86_64-unknown-freebsd.tar.gz/md5/c4f76951ed337bc87d21993d1997dac8
-LibSSH2.v1.10.2+0.x86_64-unknown-freebsd.tar.gz/sha512/3db3c44e2100d00a537c87590dcd5493dc9ec74913ce903ce7bca697ab590417bd55ec6475a0a790ab49e9444d79ece539733ac25b0b82eaab735c8c96c0e992
-LibSSH2.v1.10.2+0.x86_64-w64-mingw32.tar.gz/md5/b58d05eb572149dbfec7b53a75dc4d6f
-LibSSH2.v1.10.2+0.x86_64-w64-mingw32.tar.gz/sha512/422fb36c6d7d3f7153b52547fb98d70268da1506a4957e2772184ba52bf06455f869f1c491d82852494459189830c68569219fbb2c795ddb49d7e8a8e95d6988
-libssh2-635caa90787220ac3773c1d5ba11f1236c22eae8.tar.gz/md5/d0b060310da22a245fc488a300288198
-libssh2-635caa90787220ac3773c1d5ba11f1236c22eae8.tar.gz/sha512/17770f8de4f081840e765d6f7842d562e20f46972fb53a15e3c9e10421f3654a559c5dd1dfbafd7b4a0e5205d800e848b9c9c26ec1d8fc0d229d5070b6d19463
+LibSSH2.v1.11.0+1.aarch64-apple-darwin.tar.gz/md5/462442a50f5dd719d251f65e7170dbf6
+LibSSH2.v1.11.0+1.aarch64-apple-darwin.tar.gz/sha512/8f0d9916c1e1abb8ba2d4baef0f850f9f5aa8d24f5eeefd7c7d30697b15d33180b32588f50328f4999e1d4136a2951c4e3319c5a5dca6f34a84fba30ac90518f
+LibSSH2.v1.11.0+1.aarch64-linux-gnu.tar.gz/md5/63d8a681bcce23e76650719cf3c6999b
+LibSSH2.v1.11.0+1.aarch64-linux-gnu.tar.gz/sha512/9dc722d866d27f378481e4787a5bb932facc0c7b828e75e812c9c875ac10e7194d090d94e01a46bb2b3c5624e18d013f0226c5f574269df96f1de0ed16897571
+LibSSH2.v1.11.0+1.aarch64-linux-musl.tar.gz/md5/e85cfee900145726857d3609b541f7b5
+LibSSH2.v1.11.0+1.aarch64-linux-musl.tar.gz/sha512/ad42ddf4674f6359e61fb23ef6fb5942e716c6f0cd50378e93b8c3af799b9b21cc9cefc471d27e19bc31686c7aa469a5ed81918ea9926d5d432c3c5f70e83fcb
+LibSSH2.v1.11.0+1.armv6l-linux-gnueabihf.tar.gz/md5/acb3b46ec386e9d41dd73cb3c9b60d1e
+LibSSH2.v1.11.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/bddc50ab698c280256d819cd7ea1a39a33366f2eb6004df6b66f6a45ce4b5bdb1525f724aee35f3b9796809f9e68db4a6dab01ab035c8a88bec291b59fd55854
+LibSSH2.v1.11.0+1.armv6l-linux-musleabihf.tar.gz/md5/d4efa89a3b4e1d3be80f6790e183ad07
+LibSSH2.v1.11.0+1.armv6l-linux-musleabihf.tar.gz/sha512/8da36af6514189b7bf3422ccbbbf69d271208e7d9b0af469cbbd476ddd3d62f2f9a0d25f38f22123a603e448c285936f0692a31d91d6d79dfc66baacb8af4522
+LibSSH2.v1.11.0+1.armv7l-linux-gnueabihf.tar.gz/md5/d4eeaf06f64997a4f46737798c15ccb0
+LibSSH2.v1.11.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/4cb24ec2b13c9f56220a6cd3bb0ea1dda687a7ebbf062caa8fad9d17c903a2982f12340b93e82b42567e29b4326acda373f81c0ebb4f4d968a12ff7807f5d066
+LibSSH2.v1.11.0+1.armv7l-linux-musleabihf.tar.gz/md5/319db985d453fc32a69eaad85bdbeac4
+LibSSH2.v1.11.0+1.armv7l-linux-musleabihf.tar.gz/sha512/5f7f35e3cb1362ecda3236c1686a211409937e90fdb29dd9e4032d541b8fe45c28f24067cd012edef879d668d7b1040e887ea594eac03ffe5412d5f2c49cc294
+LibSSH2.v1.11.0+1.i686-linux-gnu.tar.gz/md5/e7cb7d9d6db13b46250da30969f8504d
+LibSSH2.v1.11.0+1.i686-linux-gnu.tar.gz/sha512/fe8983aa012787361cadb5a78df8eec6ac5640a514c3fe4b4ab23d584b018ba4fa7d3514f0951d2b687cf56bf9ee17e247049d99a4e9360aec4ecbb636d2a6f1
+LibSSH2.v1.11.0+1.i686-linux-musl.tar.gz/md5/d7c5d3e8ecd86e216d97d4912457a66c
+LibSSH2.v1.11.0+1.i686-linux-musl.tar.gz/sha512/4d64996d837cfec15b42ca7f850cda32ee3b3a8f93001d24f95ff6f8c94b76517e5dfc7e525e8335fc8788ecf7e196bdb7cc64c8c53f536c592afb337d98ee82
+LibSSH2.v1.11.0+1.i686-w64-mingw32.tar.gz/md5/9562e41b5dda94c23150668559e9b123
+LibSSH2.v1.11.0+1.i686-w64-mingw32.tar.gz/sha512/d13d022ec9eb82f8afa3954c730bd1840a8d1bb16cdbd07a89fff6ce07b1c0c2ba6a9e934b2585abf7dddcb0522e1ba0f38df4385447c66986d5fcb6dddc2d15
+LibSSH2.v1.11.0+1.powerpc64le-linux-gnu.tar.gz/md5/4e2463eb11a5dde8c3e0d3fab6c58169
+LibSSH2.v1.11.0+1.powerpc64le-linux-gnu.tar.gz/sha512/d8794571d051cf206da87a4f7f0f71c8b41be061691b08333df7678c4e40fb9abbe63812a5ff5243fabca9eb2e599d81a69520cd854afbddbc16b44678fbe1b7
+LibSSH2.v1.11.0+1.x86_64-apple-darwin.tar.gz/md5/5729492b3a91e0d8e3fcc459de784157
+LibSSH2.v1.11.0+1.x86_64-apple-darwin.tar.gz/sha512/8e49b2ff1c9b5ae499f14be238776d7da2b64231592f1d46a6f769f200681342ff157d76102fa7c16b2972c7fe603919f7d2ce7c8a30b46c98ecaf2ef809fe3c
+LibSSH2.v1.11.0+1.x86_64-linux-gnu.tar.gz/md5/0f7f2214d453f562107fe22af5004e8a
+LibSSH2.v1.11.0+1.x86_64-linux-gnu.tar.gz/sha512/bd510f25dc26c05362ff204adcc356521bfdacd665411a99e07071ea0c4638274a2e78f009644cdf5e472a1d81c80e353f4673071d90411e6122b55160140f3e
+LibSSH2.v1.11.0+1.x86_64-linux-musl.tar.gz/md5/ee98538d67b8b091c4ddcf632e72e7d1
+LibSSH2.v1.11.0+1.x86_64-linux-musl.tar.gz/sha512/38cbb59ffa420e4299db848b6ae95098e8e3e6e1334e05369089d17b04fcd0787ad22ba69465611f3e335a1efab228990db776916f40a937b21b18ca511a4484
+LibSSH2.v1.11.0+1.x86_64-unknown-freebsd.tar.gz/md5/0abe5a971abe63c3db4353e7e9b181e9
+LibSSH2.v1.11.0+1.x86_64-unknown-freebsd.tar.gz/sha512/8e8199bc0634a8ec51460cb8628c3b33d765b7060ca79aac3e80a0b29d55de5562cdbb36945b4a9feab5392f7f16d2dc48684e43d54c62a9fdb1f07f36b71810
+LibSSH2.v1.11.0+1.x86_64-w64-mingw32.tar.gz/md5/d30fc8e9b74388a1c642cb15ed33dba3
+LibSSH2.v1.11.0+1.x86_64-w64-mingw32.tar.gz/sha512/e278b87d081fbbe15f88bafa8870f0f294cea3ff8c8c1fc9a6c10228da91717aa3caa268cdb10f78c8e55651a90243962f85101eeb4433f85c1dfacc1461d96d
+libssh2-1c3f1b7da588f2652260285529ec3c1f1125eb4e.tar.gz/md5/7b52de6ff70e16aa78cd699fee3e997a
+libssh2-1c3f1b7da588f2652260285529ec3c1f1125eb4e.tar.gz/sha512/7c99d33e60862e2d89bbcc4a09ffff3fbfb921ed674e0d3166a255c72a8e0622a9c6be64f0eb2f8c12ddd80da0307b05d761615b876194bd854aa2c62e26f4ad
diff --git a/deps/checksums/libtracyclient b/deps/checksums/libtracyclient
new file mode 100644
index 0000000000000..a212c415f09e5
--- /dev/null
+++ b/deps/checksums/libtracyclient
@@ -0,0 +1,34 @@
+LibTracyClient.v0.9.1+5.aarch64-apple-darwin.tar.gz/md5/c6768380fef203f5310d2cb3ab6fe509
+LibTracyClient.v0.9.1+5.aarch64-apple-darwin.tar.gz/sha512/309216fbc5be52319241ccdd2360c8960ffecf50c963bf248ee6aab6a43a6cb4c3a703391c7d3e1b07cb077badce930673f85f28c0924975b75909a4654ca3a6
+LibTracyClient.v0.9.1+5.aarch64-linux-gnu.tar.gz/md5/20d6c3ef5032d458817f18aa7f92b44b
+LibTracyClient.v0.9.1+5.aarch64-linux-gnu.tar.gz/sha512/d2e341ff18bd06b57094f2356fdb36a3f9dcf56f3340b83006dc02d41d6e5040f145a23a06f86ccd1c9800e93fc4461ddd7902b6eccb93b78b365c824e5d764c
+LibTracyClient.v0.9.1+5.aarch64-linux-musl.tar.gz/md5/58cd88ab771a31326fb4e3448325a17f
+LibTracyClient.v0.9.1+5.aarch64-linux-musl.tar.gz/sha512/dfa11ac4fa5261bad557a244458b2f67c20c761c5d49d31488a9b944345e32e55f1a40515097b4f5f4896fe5046e3bbc70faa40ff4dd504d4d97cfa42e46bc33
+LibTracyClient.v0.9.1+5.armv6l-linux-gnueabihf.tar.gz/md5/cce5ef56b53255494b334df157e285bd
+LibTracyClient.v0.9.1+5.armv6l-linux-gnueabihf.tar.gz/sha512/1287f734669b7a7a6f43b6cf3b725de650e64edcd4284d51120e7371f91ca18c63d4981d58c28ce2da9355eefa5a02e0bc0b35303b096733a7842bc952da2df6
+LibTracyClient.v0.9.1+5.armv6l-linux-musleabihf.tar.gz/md5/60508f4f9c757678b8e52ed0342079eb
+LibTracyClient.v0.9.1+5.armv6l-linux-musleabihf.tar.gz/sha512/92d2d8521a8b6c6901f36ad71f59fa1b2eb58d6dd06da02140cc26b6c44d2741ddd0cd3107bbd3e70ca1713a33fda48d5288bec59304264d0afc20d4e0167a50
+LibTracyClient.v0.9.1+5.armv7l-linux-gnueabihf.tar.gz/md5/64563cbf8fe18fe84a2ec9e9fda19e6b
+LibTracyClient.v0.9.1+5.armv7l-linux-gnueabihf.tar.gz/sha512/b0158367834f32fd7449d45b983f37bcfed71727bcea3febf382a779265ce4b99845ae32f5c3969d83089bbf5e072680b11138a45b7b7030364ce341f285e220
+LibTracyClient.v0.9.1+5.armv7l-linux-musleabihf.tar.gz/md5/1d272a82701889fb695edde6bdde21bc
+LibTracyClient.v0.9.1+5.armv7l-linux-musleabihf.tar.gz/sha512/1539f06593eb769ba35ef198f90b9fa6c11d7146124f21e35c8fee577d8fcff3d71f4e38e6d26d84dc8f66b06a26a130f4bc740a201cb27573ec8e6816d489e2
+LibTracyClient.v0.9.1+5.i686-linux-gnu.tar.gz/md5/4e14c36ea3b1e54a26897767d4a010d6
+LibTracyClient.v0.9.1+5.i686-linux-gnu.tar.gz/sha512/4c1d9cda642a4ea3084b73c0b536edd0f33a216aa02c59f914ab428e0e97120ba3f81e7eb2262a2242884f553fd319b80ea7b013344e87e173dc1ee9b3421ef0
+LibTracyClient.v0.9.1+5.i686-linux-musl.tar.gz/md5/75ced50efcc6ce1c17064a3447933fb1
+LibTracyClient.v0.9.1+5.i686-linux-musl.tar.gz/sha512/969c41de91d288e4e37a36f990341c2f71d6788d93bb34eb6708532ea60bfa1bae6760871de33b42cca50b61dbf8028f639538f34ab9bebef2312d449c259f4c
+LibTracyClient.v0.9.1+5.i686-w64-mingw32.tar.gz/md5/0f8c6cd2e1aa738b340e10e1ce81732b
+LibTracyClient.v0.9.1+5.i686-w64-mingw32.tar.gz/sha512/76824a28f16650e14b06051f49a5c42cd8692dbcf808c2d47ab21ac986cf49e17508062ce5e0e71f2a689112098897dd7fcd47b819cab6967080b7e23224bf1e
+LibTracyClient.v0.9.1+5.powerpc64le-linux-gnu.tar.gz/md5/573e5c6aca49845b7e9e881c7bc8f755
+LibTracyClient.v0.9.1+5.powerpc64le-linux-gnu.tar.gz/sha512/7451731c6f0bf0ac08c966f925b0dd628f6f4c0ff7e2d65e8bd3d27231e3fbb2512169c917431baeca3fe8e66af4bbbea7ca4ba79dd7d3b9e1d43b08a580dc76
+LibTracyClient.v0.9.1+5.x86_64-apple-darwin.tar.gz/md5/b037ea1027e6466d5dd9c0fb41f65ded
+LibTracyClient.v0.9.1+5.x86_64-apple-darwin.tar.gz/sha512/81e2d00bd8eaa1cbcbd5c0ee4552028ccedffcc072beea3dc08ac3181677da93406e8dfc581a78434175fa5bb861df06848dd3012f8adbbb6dc72efcbb5094a0
+LibTracyClient.v0.9.1+5.x86_64-linux-gnu.tar.gz/md5/886d5b76711252176eaf7e41dd2db0a5
+LibTracyClient.v0.9.1+5.x86_64-linux-gnu.tar.gz/sha512/783c6469c586520c7f1206f5c2eae6a909a2dac7c3f726f439da91b0f3dde970fc4f17c6e3b54aa8924ae537b1b6a14729cd1305e6488c458db68ffe973c2ced
+LibTracyClient.v0.9.1+5.x86_64-linux-musl.tar.gz/md5/170a53b0c63e7e130bf3d71590e27193
+LibTracyClient.v0.9.1+5.x86_64-linux-musl.tar.gz/sha512/ccda22a897358d9eb55b500dbeb35c8d136f484a649c29066d5f2d0665d5b71526502237dbc374c2d018fa212896fa6a6c6903fc7c4833f814d1e60c3f12fa83
+LibTracyClient.v0.9.1+5.x86_64-unknown-freebsd.tar.gz/md5/58fdabdbbdaa7b4f53bc4249a7c57059
+LibTracyClient.v0.9.1+5.x86_64-unknown-freebsd.tar.gz/sha512/ad0d4b74c707da86fbcf7210cbcc9a3ebdef770ad0bf12f33e3476c2e455d48e389f021fc47e50b85d1c4741072396b71dc034cb95bfd38f775f4879d06b5998
+LibTracyClient.v0.9.1+5.x86_64-w64-mingw32.tar.gz/md5/cf5b40edd556a7b8ab23d28bf0eecb12
+LibTracyClient.v0.9.1+5.x86_64-w64-mingw32.tar.gz/sha512/8ece28dae598418c3435cfd323609e6e615ce8299370040fdd3a000007faecf87c5ffdfebcca80ea9b4180ad9bf13d3f3d65fed2fafe6f54d4a6068f9cae61ca
+libtracyclient-897aec5b062664d2485f4f9a213715d2e527e0ca.tar.gz/md5/51986311723ba88ac305ad2c1e3e86c6
+libtracyclient-897aec5b062664d2485f4f9a213715d2e527e0ca.tar.gz/sha512/f92c5bd71fd3e933f03e3535c0668a9afddc7ea19531aaee11b22bde09c57cc8a555f7f17f489d4221645fb6d73ecf9299d5bb11949d7529987beec3e7d91763
diff --git a/deps/checksums/libuv b/deps/checksums/libuv
index c94d0b551cde4..6ad61210bc0cc 100644
--- a/deps/checksums/libuv
+++ b/deps/checksums/libuv
@@ -1,34 +1,34 @@
-LibUV.v2.0.1+6.aarch64-apple-darwin.tar.gz/md5/bff12bc642215646c8c03f2003a3c5ef
-LibUV.v2.0.1+6.aarch64-apple-darwin.tar.gz/sha512/9c0bb5e648d1e967caec07c700e4657c97ea9db8b48625887eb4e91af286be62380f5c85bc51bc51c87ed6104ffc26bbd498f501e3892ca1d41eb96bab88d955
-LibUV.v2.0.1+6.aarch64-linux-gnu.tar.gz/md5/af5b11ff1354c591990285e29840d83d
-LibUV.v2.0.1+6.aarch64-linux-gnu.tar.gz/sha512/67f6c6a7c780b15b9e4b317c44450a325f6966fd2948d28e113f7d4b0c2893b8b5f9b1eb6da73cce683fa7176b5587e1c73b5b1faaf09d2ad378d8b085a75392
-LibUV.v2.0.1+6.aarch64-linux-musl.tar.gz/md5/2bda667ab6f9b7f8962ec675272be6b2
-LibUV.v2.0.1+6.aarch64-linux-musl.tar.gz/sha512/271772a7acff9d2cce1ab36a46f0807bf2f30a00227d0cfbcbb8eac4c583e0bd406c6406a7e9b5afa720e844b1b2bcc01ec60cae3d907d0d004a7a40ed182397
-LibUV.v2.0.1+6.armv6l-linux-gnueabihf.tar.gz/md5/5765a268e960ebbff2e7f6a386435b06
-LibUV.v2.0.1+6.armv6l-linux-gnueabihf.tar.gz/sha512/31d1a223b57dfd859f6a6633c75b53507b99a3eeccbef9d47f12e0dbf1e4b5a77e489348bda625f0cb6ecf5450edcb751d4fc4603beebb01fde73aceb7ae6d2b
-LibUV.v2.0.1+6.armv6l-linux-musleabihf.tar.gz/md5/be91036ac0626c1b5a9b28a15026e942
-LibUV.v2.0.1+6.armv6l-linux-musleabihf.tar.gz/sha512/0e8a338f84ce24ba99357110aa6982956a9970715202005ac4a748d3a78cb75816a9063b3ad5a96569261966792f87fe698777d33b6fa428068ec07ceb944fdf
-LibUV.v2.0.1+6.armv7l-linux-gnueabihf.tar.gz/md5/921038ac4396791a555e1c2a8f5af558
-LibUV.v2.0.1+6.armv7l-linux-gnueabihf.tar.gz/sha512/45519d49d857721f025bdb08522e3c08262f264b8a00bc36d9ca4bd05d6a32ce0b1b40ba7c9cfc98bbd1201e6b4592632aa8852652abb61604bcd324abc17c76
-LibUV.v2.0.1+6.armv7l-linux-musleabihf.tar.gz/md5/06b404efd3d62d107f9331ab85deb893
-LibUV.v2.0.1+6.armv7l-linux-musleabihf.tar.gz/sha512/3e73341346060df832fcc591bc447f713a8188c06f22961ae03cba4620d524edae7b84e63ac8fd5b675abb62bf0e12f176468f09e7014fbb8df6cc763dda12b6
-LibUV.v2.0.1+6.i686-linux-gnu.tar.gz/md5/e6b31595a27a91bf34b7a5aeae48d459
-LibUV.v2.0.1+6.i686-linux-gnu.tar.gz/sha512/b59516d2340ed469be8d86dc903e3497867b522082dc6096683b23fec4b03bdc5e0c643bc2cf36ca49c2dfa11689946bd5f7e92bd68978ff2a409935203ba533
-LibUV.v2.0.1+6.i686-linux-musl.tar.gz/md5/49a84d0c90ec136b933fcd939f371716
-LibUV.v2.0.1+6.i686-linux-musl.tar.gz/sha512/1abff45b3a0894b78d20e31c4dcda8673a3e3b6d3e8fa89e8f57da115ae8feff58bcb16cd3107b4c768e9c6bfb777864056fab47de5b2babead3eaa508b2e748
-LibUV.v2.0.1+6.i686-w64-mingw32.tar.gz/md5/6ef4d726e171dc8f2aaa5603180b154b
-LibUV.v2.0.1+6.i686-w64-mingw32.tar.gz/sha512/0699afa096208829d7b3795ee150a94e2e0446a17e77c204a7e013f63f51791df0f8c8416c0549809cb0d0c3b1f52fb525310153a68f80652e6c8def9bf17903
-LibUV.v2.0.1+6.powerpc64le-linux-gnu.tar.gz/md5/72cc19fa36b7803a4973c3913c720d46
-LibUV.v2.0.1+6.powerpc64le-linux-gnu.tar.gz/sha512/694d96e8127e4a206496388db4f09d0af0673818f5168fc3ffaa9bd15da132d5af843f068c89f057a0c62404f1e3171725b86e1cdade3e27a3f0e8b6be8e9b2c
-LibUV.v2.0.1+6.x86_64-apple-darwin.tar.gz/md5/e3c076ab2aaf47f423f9de96bcd50faa
-LibUV.v2.0.1+6.x86_64-apple-darwin.tar.gz/sha512/3a3e31ccb0e2a1c1aec1b2ac52ff33f7116ef84452d70bb0f680a276411a5a9ff4aad5e5533bb7d3d981f168974a94f1ea90d41b4ddc6dab1a334f16000bf812
-LibUV.v2.0.1+6.x86_64-linux-gnu.tar.gz/md5/752545518774845ee93933fce9c9516c
-LibUV.v2.0.1+6.x86_64-linux-gnu.tar.gz/sha512/458494e07a096793552ee4f9e0bd302d160186e20d702e7c0691b50984692c5725042faa49df0b1595b3d6f2459bd6d73225af1385e4ff5a9d7e4dd5baaa4dae
-LibUV.v2.0.1+6.x86_64-linux-musl.tar.gz/md5/6988efa401aaf11e82a916632b26141e
-LibUV.v2.0.1+6.x86_64-linux-musl.tar.gz/sha512/95abfa548c8581be9f512041c1b904532ab8e62610e70b2e184d6638d1bb2552883d946565e3071e6c8f3127a524313d432df370d6d6361a5f0ce5d3c60649ec
-LibUV.v2.0.1+6.x86_64-unknown-freebsd.tar.gz/md5/5e35a7220027cd6a8ded93611fed1a57
-LibUV.v2.0.1+6.x86_64-unknown-freebsd.tar.gz/sha512/218b2f40bc1c49d91c9457b9014d536b6fd6b1f6c3704a6aeec2739bcf2ecbadda1bfd36a9ef84ffb2aebd1cb6b1903276658259d4a2d873cd61780a9762934d
-LibUV.v2.0.1+6.x86_64-w64-mingw32.tar.gz/md5/1aa9e7ff08da10c79984ac470b31a701
-LibUV.v2.0.1+6.x86_64-w64-mingw32.tar.gz/sha512/675adf9330de80fee97e9ebf7a6de7763a3cafad20b6aa9e009832a590a1a20272578861bb357e3ca41961a247e2be178e4455ad107951d88ce8d3467504c235
-libuv-1b2d16477fe1142adea952168d828a066e03ee4c.tar.gz/md5/054bbd1c7203b67679fbefb8d92d61d8
-libuv-1b2d16477fe1142adea952168d828a066e03ee4c.tar.gz/sha512/0cc8288429e66a9731f153fba949e3c97fba84d1ff3f392d0df4b5f8335c0ac583663269f426cf136914d1dd41131bd53f4ea0167c80970b9fa9ed4f749bf20a
+LibUV.v2.0.1+14.aarch64-apple-darwin.tar.gz/md5/1a58ce9dc88984c3b5f7df97af6cbf83
+LibUV.v2.0.1+14.aarch64-apple-darwin.tar.gz/sha512/2bfd482ac759ac88d885371854affa8e358a10fea6c7756e0d1b366bc82ecbea56bdf24ca634525fb2a6fc2b3a5c77b07a4c6dec2923d8bffe2bc962bd3e7f84
+LibUV.v2.0.1+14.aarch64-linux-gnu.tar.gz/md5/7f270dd1e3046c8db432e350dd5cf114
+LibUV.v2.0.1+14.aarch64-linux-gnu.tar.gz/sha512/c0debcf17b54ba9f1588d4b267d610751f739d8ff96936c9d5fb6d8742039f8736c63fa70037322705569e221d73fb83c03b6ba9fb4454442fffd3a9f1a1a2da
+LibUV.v2.0.1+14.aarch64-linux-musl.tar.gz/md5/07f56c32d5a2c12e6c351cf9f705631c
+LibUV.v2.0.1+14.aarch64-linux-musl.tar.gz/sha512/8037d7aa0cb06850f055fd19cebdcfcf3146dde0d12768a9669bf05dcab91fdf3708798203258cb3f452158bdec7faae41e6afbb0e60b21403e683db3e23a1c9
+LibUV.v2.0.1+14.armv6l-linux-gnueabihf.tar.gz/md5/5558a7f68c7c375f40bc64da59fef0ad
+LibUV.v2.0.1+14.armv6l-linux-gnueabihf.tar.gz/sha512/92ed6601cb5aa9a3ea2478a1485849543c9e847c8e85542e72f372a2d37c4c8b90f5ecb1bee1e462db31e1e8dba460f584b3cca9c833989c2b9ee404e355654e
+LibUV.v2.0.1+14.armv6l-linux-musleabihf.tar.gz/md5/de6bfb7f0c0468b79e8895f166fb6340
+LibUV.v2.0.1+14.armv6l-linux-musleabihf.tar.gz/sha512/7948d007171bf57b827b489f3627ac74df447f4d696e8226e54e95ef0c8eed5a5ddbf758fbad841bc367f78cd61e6a5899eb478003dca3a79cb494b38cab830b
+LibUV.v2.0.1+14.armv7l-linux-gnueabihf.tar.gz/md5/5be35de1d881f80981647c369b9b4ec8
+LibUV.v2.0.1+14.armv7l-linux-gnueabihf.tar.gz/sha512/458e5058ea4e794e0dc790da4c98569676056bac336df69762e8ccfec8f2955dcc55e8d090daa1b191c0ffa41392a04530c9bc28aa27cf411c1df2f1ba14bb97
+LibUV.v2.0.1+14.armv7l-linux-musleabihf.tar.gz/md5/8d034490da1ec2ef3dd3c69336177654
+LibUV.v2.0.1+14.armv7l-linux-musleabihf.tar.gz/sha512/7f595a8ab8b664d229cf6144e9ed1b5936ba8aaa70b92611ddb85bbe9046bb1b94d8417355a5abf058fb00023d4d56be0b2ddfd5dba896cd7b64e84e32dbfc5a
+LibUV.v2.0.1+14.i686-linux-gnu.tar.gz/md5/ccb9aba78456c99b8473e8ddd328f90e
+LibUV.v2.0.1+14.i686-linux-gnu.tar.gz/sha512/d382d90137db308933257a75e51d90988d6d07663b3b2915478547127d32f73ae6cdb4575d5ee20758f8850c7e85908fe4710c053cb361826621f22bc5b6502d
+LibUV.v2.0.1+14.i686-linux-musl.tar.gz/md5/5ade48f16aa26bb68dc046d285c73043
+LibUV.v2.0.1+14.i686-linux-musl.tar.gz/sha512/f5728a5dc567268e59aa2697deb793ae427e11dcb6796c577e3da3ac24225ece5d4a6c4f903d4a7b184d3c3a3c8c1586c34b97e4a75de0a4e23ace720020fa8c
+LibUV.v2.0.1+14.i686-w64-mingw32.tar.gz/md5/541210fef837c2ef7cffa508d282f9bb
+LibUV.v2.0.1+14.i686-w64-mingw32.tar.gz/sha512/4541a02c59b66f97099b5264dce0cad90fcdf9a4d7ccd8e950cc1f3a530616a0fb2aa43db21b5b1f52819efef22cd0b68595d419e2e5b05924e344b0333f8bf8
+LibUV.v2.0.1+14.powerpc64le-linux-gnu.tar.gz/md5/26656d4eaae8739099c55054bad54f57
+LibUV.v2.0.1+14.powerpc64le-linux-gnu.tar.gz/sha512/f85f8cfd91e7b1b02b073931ef9a3bb05620641d18ada039744a92b8c40e5a3de8d7c5efa7189b88baf1eb11fbcf9e6d16031b86e40f99f1b7cfebb0f5c5adf1
+LibUV.v2.0.1+14.x86_64-apple-darwin.tar.gz/md5/c7da6b91394a20c43acdf6f680cb62e2
+LibUV.v2.0.1+14.x86_64-apple-darwin.tar.gz/sha512/238d22bd299ae3b0dfd24a5b38d6d0d07b751fb301487a2d1d2f5313ae3596f33492388ea9fbff549293787505fc527e174ebcd4068f1bda43b40bc19e016d89
+LibUV.v2.0.1+14.x86_64-linux-gnu.tar.gz/md5/8c8913068263257cce5042b725918e0e
+LibUV.v2.0.1+14.x86_64-linux-gnu.tar.gz/sha512/a848381012d5a20a0c881f5835e479cfff811928ce508cc57041d69668782f2135c14c7e5388e7dbf693ae57aa1825d911f6f450b9e909cce45487b03a581a23
+LibUV.v2.0.1+14.x86_64-linux-musl.tar.gz/md5/16747c066b6d7fe56850c77f66ea7478
+LibUV.v2.0.1+14.x86_64-linux-musl.tar.gz/sha512/833a02f9191edf3b56f1e02f5671f22de6cb27ec3c9f770530ec95d8da7ba0b9c05bcdf6b094224ea8e43ba70918e1599f3237bd98900763daef80c327d3d2de
+LibUV.v2.0.1+14.x86_64-unknown-freebsd.tar.gz/md5/e828eb79728e75766a72d7b304c9f989
+LibUV.v2.0.1+14.x86_64-unknown-freebsd.tar.gz/sha512/37df5b966f70b3d1e0eae603d4a6b00c84dffdfc3632ca581669a99a0cd894a81aff4361de3beed53ec032273f62cf397cf52085c6c387d0bbb2c57b59ae84fe
+LibUV.v2.0.1+14.x86_64-w64-mingw32.tar.gz/md5/bded9d94435a70fd0dfff3f0fc605736
+LibUV.v2.0.1+14.x86_64-w64-mingw32.tar.gz/sha512/48793a386f6231d12f01b4718d87aaab409f0b807b03a3577e2401f7493caef36a5072fdc33f3cd3ce9733ba50ab344cb2e2fa6a21ba5adb56d6cca642afad0c
+libuv-2723e256e952be0b015b3c0086f717c3d365d97e.tar.gz/md5/d2284d7f6fa75d6a35673d22e1be058b
+libuv-2723e256e952be0b015b3c0086f717c3d365d97e.tar.gz/sha512/68d6ab740945b9ce3475118ce3d186fb67d7e8125784cc0c827df23d63f50c40c0261ef37365d8c11ab9462a8dd4e2e6b19e91e3c84b64d8fb84fd3894afc4ac
diff --git a/deps/checksums/lld b/deps/checksums/lld
index 4221bbe19dc42..63a7d923451d0 100644
--- a/deps/checksums/lld
+++ b/deps/checksums/lld
@@ -1,116 +1,108 @@
-LLD.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/20012b55da73fd1272d2a084ee55251d
-LLD.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/b3e06b080fe1e6c980f7b7528b19509ab64ce13500b93cbc25cb8e51112021ce3eff39464db24e01c9146bd8705345fd2de0168ee376a5fe0986acb965b8118f
-LLD.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.tar.gz/md5/0eb86a0560dc8da58d3d5cd8054e869e
-LLD.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.tar.gz/sha512/c99d5faae6f934ff4af0c650313d9089c56111e32b07e503cd0e184fc1d0b2ca47a4af34b63b23c9bb4f6ae81ab89d2e9c4019a5cb3de446625eaf1157ee68f5
-LLD.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/393db9b4add7f2251adbc24b8053fa6c
-LLD.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/b1908b8b44e1c0ff308532bd146fdc90207900caebe14d89c0c88b8153dab69c01a68b832d2fcae42452d65975b332ef5a057a43a23d127118905e5f940926bd
-LLD.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/65554a74a635cd467afcc64ff385081e
-LLD.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/d4fb9b34582d962c910b22f8fd42da98e223556f3b7e7fcddc2582dd446b1d566835a033ac029917fba6794b03786bf66f4ccbc8cee76300cbbf247fd73d5756
-LLD.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/27888328cbb54fa378e5a1b8712d1f7d
-LLD.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/d22cb0bd7e5ac94ee65605c559dc9bf1e7b0c759b8ff327aea1454f294c8ff6ec15adc95d5edd47a0bacd4faaddbd1fdc7854f60dce546013496f4cd3fc048b5
-LLD.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/e305b36d24a47373df7ecadab97a767a
-LLD.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/a68d79048837ba8349811d464ade0bb60bb1285ba8b5e1b9786b6520648bf39da57c7199390fecc4e5ab45e4a85762fcc264d5c0d1c33859f98a6dc706c8b858
-LLD.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/34803d5db44ff844d122ff0dbfa1526b
-LLD.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/6113fb5c0a30a062a35c845accda4ace45945807a156945975b6cd85b76072c6d2e2ac29b2db7618ba3eeb94a8b4cc5bfa6b4f048f59ba96193ddc3fbd30d3fd
-LLD.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/24b54200454dc336948ddc857052d268
-LLD.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/73c3e30fb6b0d164a03bfd5c24c771b1c78c220f7d9ebd5587c58168d25d8584be5ca7c22caef154796221a5bb4ee73e64843dac10d1f7684221350f9cdb1850
-LLD.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/aa0b11d536e3695d46f6087930443670
-LLD.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/b251de1bb147e70d8a717847d8bc80d4584f1a76b7983af1834d2bce5b52bedbb5caa801405ea181ebc726382729ca927f9fce4de8f75d598dfb518700c1294e
-LLD.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/7f4ee17f1130332ea35088b49d60de49
-LLD.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/33cc1d830a5ffc81dc270407adaaf4efd9b8f03ab10cd4cbc898ca5b7bc99730e0302b576bbb4ba0fb98d7be1db3bc04ed68ef23d240075967251d4374fba543
-LLD.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/352858cb187670b14011a814927ee402
-LLD.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/0da5153e96128faf0a46d6b3a674d6c4daaff4dbd72459c6dd0eb4b8ecb6a08288ee8a44136bb3e993f901945e63da420c402fbff33327354e069e3e3126bf4b
-LLD.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/69fb704f6220618ed3ee1336c70e9ca5
-LLD.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/17a819a855c89f973f7ccad77a3f0769f73fa7bb1e1588e0c8865240170e5b87961e793784e9b7150210d387b46702424da3f0443bfdccbfcc5bb323608b6df0
-LLD.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/3483f15dd9064c1bbfff9900d86c91d7
-LLD.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/87a162dd46b4f912558c6dae0d9ae839c19d075d8be65c1889272bb96a0b6808ceaadbb90cf49ae2309a2d74ccd7f1a59762c760c3ce8af58fea165d8b99251b
-LLD.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/b494a49f29668722bbbe8b00e8034f20
-LLD.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/708b46b0039b217a6cb28656899f515b3c68e1872f9b8d130211dd183853dac0850c9f69dfd2522a4d6c54c7125586998a6e2da1bdbfbe9938a0bc5d19aaea02
-LLD.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/7a8060cf75d245abfe7ca2abc2f88f20
-LLD.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/83035dc4194e25a5564ee4b80d15ea700d8e035d0a0a9ed894031f7bc0f83c0f614ec5612a5be71c9bcb15b7689ea93103fa39e7d2eef7a5e56653e911ccad42
-LLD.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/9d659f659bac6423a049148429d5555b
-LLD.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/442a2b54f07a4bd3d45e3621fc90df76d9eee39f3a7891c4d663e6456a50f9db8b5729abfab590275c004dd29df3720d107740926c4e34485cf9b10f66c7929b
-LLD.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/f89da437c0601eac278f487edd6fba48
-LLD.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/bde34f67c344251f04acc4ab3cbff8cd5316cf8645771ec5272ba7e7ad0f69fcb2f74c91120649b99c29485e091554115452f8393203b4d3805abcfc70e93ba5
-LLD.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/f3758e4b58017ec19d6c18841faf263d
-LLD.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/cc3fc80e3307fc2c2d7c0e74449b896673073aec5650408e7163e041ec275d95e45690075d7179ed1532dca123fecfcd8d0415ec64ab50b6559d9bab92cf1239
-LLD.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/a93d6416b51cfb5bf887dc913a7338e7
-LLD.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/95addd0d105d984bbb21c64050e30121ba09dc08d7e46dc2d2b6309f07f05dc21c7ba49b10e9c99d77abc7aac3d1b3ab7e54c642476190e47dd3e818fbbabdec
-LLD.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/149134c9b6be60959d0575dab31a36a5
-LLD.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/6691dd6ca12e60278cdc4a0c38a999bee75a3161ec68170a3f491552e98db66ea8f23756f39621e6e03ecac3576ee2e4be8aff4ab47727e534295a09560e001b
-LLD.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/b97e1b0c49d7d60a40faba4bc2260f68
-LLD.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/c57d2ff065bba6ff12bf06dbcd84883a5b67b7ce8feab092a687cb3b74a8e32e1fa51d2cf16be02ab73e19f8f55c2f61c35098aea41c9c1dddaa05d24331df73
-LLD.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/4327ceb7e619cff6105a7591446d78cd
-LLD.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/e5b3069517c1615d158603a38346c5edd942efc466f0429aa25fa51439d9d81b1ddf7ef8e4e44cdcb96f990ba0c018d61c0276072db500f5a0347b12e42d9418
-LLD.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/935cf4ba618f8f1a7ba74652deaa4616
-LLD.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/4b2709aff10bb3930429ed1c4f636e711b752efac1f45b36e75b6613fcc5243090a5339a1956f705afceccf47e04ecdf23b69f8a14fe853958b5589490b6cba8
-LLD.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/947647161676c6498d193270813b1f53
-LLD.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/b7bacb86ac965957607c5e946644409549d3fed589115ec2ea6c20c85dc53bac1118ac0f85725ea0a111c530302b60d1113eaca81061d0d36fc4d8dbc780f89c
-LLD.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/422931dc14fe1544d80d3a17f8fd0c50
-LLD.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/ee954ced2707633cf45ba3842803e81253f54ce018f522d8b218858f7c681df24709949a0ee2b9654262ddbe7680afd5df6f1b92666600563e10ac847cdf33a5
-LLD.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/33289a5ad559f48cc9b92d5b9e2e4eb7
-LLD.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/f2374b40ee525e5913c252c89725e08de70b901dc7a75ad4cb951dd821408f4b393c3400f5abbee5f8b2f1800df948a6653fd9c37e9ef9be43050706e41aa02e
-LLD.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/f4c0de6da8d796d5f7befe82e119ec2c
-LLD.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/557435c30c0a82283bf7bc3d594b0613ddb744aca2adc73ffb69b7b7d2b5f66c3f3afa3afbf19785f2625807f6605fb5070f8898e5e138c6afc9e9250e080d32
-LLD.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/eeb9b6b3c4afcd2893b48506e17e06a1
-LLD.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/2e84823c35bb2d48e982982861cddb1c5fe54f5ff600c54310d746b333160b32e8958607a8977c8c10e126bbfb04a2edacb0ae6e23e3f27680e57cd1cb3e1d2e
-LLD.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/ef54fc27de7ded6865114f934ef4c0fc
-LLD.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/94cb0e0a4e888d9c4f8b5d6a5700cc75ea468fe38566472d6ae59fb12bb6b417b4d2aec847a3108b5fa5b07ef5813270197fe7d6f6a91e88a9d4b9000a6a5467
-LLD.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/642ab4336eff0d23a779eeeac48b192d
-LLD.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/b29cdb725228c9279fa2df657d4a5e9ac83c87cfa7df7f68b94eb9b12b281e6d8c3ca5541bfd823c79ca86c22efbf62fab6c64dfbfb07de66ecd2eb0550a3e3f
-LLD.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/2b8ddb0fa6f65e34045832085a34749e
-LLD.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/05d4c4d417cff242f8b5d728b7e775e306ddd764f472eeae59103997c56d8ea21bbb81846200b5a6a550d7e9d0c0d40da4cd4c30e4376785166eab700b04112a
-LLD.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/md5/63057315e682e90505cb821a5fe6cff6
-LLD.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/54ae33f4894df45e002c0528bee9bbada0f3dae4b72c7a6fe6f6dfec80e73e15e7e1138fd458c13adfae4c3afbb4556b1ac380132f210978fdd1e902f7aae016
-LLD.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/c7240eddd6a0c00dedbd5a78cc857a51
-LLD.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/78dcf9ca0a38e93af46528678e566325b0a9ac229ccd9d3ecb581c9a590424ec5aaa34c2c7bbf6a5937b6dd2d230dca1a16cd5551ae5fa536820bbc9d16dac28
-LLD.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/md5/1e88b8f9657824a97d2eb0465ba42862
-LLD.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/5ad4798e7609f996fd118e0f46c0f964535821ca323c8d77282d499c0fdcad16f3e3014a07b4b4664e91e17358c01bd57173458b0596f6b99474edf94c8ddb05
-LLD.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/1f678f0231e57402f3fa04dd377a3f2b
-LLD.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/d1ffe94d145f1aa4e34600d576949a9ba09d286f686a80726c0d0096d58deb2220cff12a0630eadeb742a5c09e25d4a4f78ec707cd80e46a6e411607b527e2c4
-LLD.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/4ddcbe7799c5e76081176e3f38eafe35
-LLD.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/544f27286d86ce35d46bf856da3b0847c60699f77a551ecedd5b17b508f260b5f8530d0720a07fd3bbfe679b9b388f929a6030a5cd5bb1fabddf9a340b1c90ce
-LLD.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/671626899b42c47c5b8494bae67f7195
-LLD.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/305e31ce6f63e1ecfc970dad58cdf6e229f7fd38abf422fed21939fb279816082e706687acf86bf04c2826bef70e8af33f28724209478cb8a0cc34ab7111588d
-LLD.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/ae1d3da5fd86a1405a8081c54219b10a
-LLD.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/ce12ca7925dab2f847a18753b285a2d2387d15a9b8195add02430ee36c95fe10a46c3a79d82c7cccb558d1edaff46cc418faa9409a519bc24a62fb2b985faac5
-LLD.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/aa6a9c2df9bdd47559237ee4f6dc8905
-LLD.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/6bf4b62864fb5b257ce6d7cec6aac5960e1eb70f67bd7b882f4f81f19d0898fbbfd5f7d34ce2aef1791b242431d09ec6e880addeb449e2ad574bb14c54ef2f3e
-LLD.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/ef05e6bfb63012b0e2bc58fc4fda96eb
-LLD.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/7a3a7252a5e7a64164876ad071c3e0326925653b007349dd6f0f99d7b58de2617d94809504e4498c18e369a850f33ebbeb5ac320255d9a63c9f83b729b804448
-LLD.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/e3172fcad5d26bc30007aace41ee1ef9
-LLD.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/cd3b2677faca30fbcf569994b90ea06688db56295103e7e62176da420beb6b31ccc8046ec65759e99f43486bdea916b80ffd6e144b6c172be10a232edfdc7b1b
-LLD.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/9c4fbf757b683f62f304d660502b521a
-LLD.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/c98afdf4e0a7bcd39bb452caa32cfc386e687b791faeb8fd8b716982c78246ea97bc1b6a28d0fde68e32b90b72e691171f982280b9186c305f550ed7640668db
-LLD.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/316932ff1703a4e8f682c311fe64a294
-LLD.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/6b417ed776c78b89d6a6d0a621967f6e4cf035c5c109fe7cc8ed58a9206980a14a6bb8115ea71c06ec29b9b523e4511230566730baf709456fbc540140c9f5b8
-LLD.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.tar.gz/md5/ffa9bb14eb0c83da9449ce1aaf47335f
-LLD.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.tar.gz/sha512/839a5ff94218afa7e643074abe7e0a1ce0a4092e7b522470092762a53584c06df023733939123e49e466fd211f90edb08a104402f017267ef77d70e899ccaecc
-LLD.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/5988523125978ad0200c15eff2bb89d9
-LLD.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/871bae17bfd03585b339f05552ba13c82ec1f86a6309f19a0e543e7fabdb6269d8383cfccc75380d56531a69c85ab696dc754e1bb42e061f69ec490b7e1e0c55
-LLD.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/f2c3bd66884ef20c40d4451e06726c44
-LLD.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/d6a89fc6b716d3d7dc4975077d4eee52716153b0135f5b7952baa2cf5040a54712200b3974b80bf62b845b5c13979f566cea8e1f82aba3a3c9df774ea2269f39
-LLD.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/40d73658b5b8ebda902eda8bad866ad7
-LLD.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/1a5c6ded1c41e2de72243e54622fb25a2524a9afc1e35217cdc031e33257ee1ee4ec1fde53bf57d163c0852f14d5a162c17524105d3907158dbed30744350df4
-LLD.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/3d5edab3e2c95a4bf890178f49aba0e1
-LLD.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/10e2302d7eb00f93e76b13891e67ee8c8a08165d631c7864bfe0d5163ae9024efb481deda641b979b8bab878da6b2154b4bf156c7c10467d4096ad430da37e34
-LLD.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/a02baf61b1aa531b81a941c190ce4554
-LLD.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/3e62671eb62d378cef91464e1e00cc993154ac6568aaaafaa37ca07c1a5cd1ffa7fc7fdd26c0b4d029d0913151963895027705e9b7d03517d2c13131d65313b8
-LLD.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/03c97cf0c8cf301a0be1b0a2e999c510
-LLD.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/1528f8ad757ee3f08a67525adf28b2f3f9f8ba5bd6a73917a730edce6a3b2d279cec7304c9b04c7362a1a9075718549244a4fb19d5fff50ebcb3fae6ab067f8f
-LLD.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/a601d20696276654c71a22714111e4a7
-LLD.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/a6549c56cfc6bb7ecc16cdfc50a34224bcb44aaef0d6908bd53e2854fb515fe2625b4256b1a5d4f7c12864fb40434edf21b97f66ffb5e077772f60c5fe4c7d04
-LLD.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/5e3fcd3a79f8ebeb690f20bcdb4ca477
-LLD.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/9044bfad563f68a876a28283ec1953f218a5117784088627e2ec620526ee33f75376eb61a522c2463a17e65acbfe880708743b43b5c665b511461f30e063a33b
-LLD.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/md5/f47870392b92f061c4239f80760ef59d
-LLD.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/sha512/8f2d02662112ef5071a48dfd6da426926c12d901e7953debd394ddd6677cd90ae9a310e517fc02a731f96c7b5e1cc87041f3e83ab8b968ba2ba46c6a02ce288f
-LLD.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/md5/b2fad0e880cbb6d649e9a7e67651b720
-LLD.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/sha512/afc57bc009b7bb3af526da83910e82f51f4a992d2d5c78c7474a3e34792627c1cffc6ce82c78aa9308de546cc49ba273b3e3c377ab6c73f9ada34e406b27c83c
-LLD.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/9243ccfdff355ee87108c078113fbaa3
-LLD.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/4a025c06423ce3db52f01abf82767baef325d34dcffdd88f77ea2ebfc57043bd81e2d3bc6f082aba9c7ed3e0155d2de92cc9c3cfb2f6bdd8068092dd70e7af43
-LLD.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/57b9550381b628e4719541791c70e6d4
-LLD.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/1a5c8052d569b6ba89cebba516e0d4c616c41590ab9b53a392026ae5d29bdabd22a8389cf5229e42b52334af27a56b3fe5872ef317f4dbcec7ab80538927bce8
-LLD.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/e791dd0dafe62743da9236ecca184915
-LLD.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/dd224d5ab5b0aa813404c8cad9884f8d1527857e8f1da9cd0271512c0b8e10fc44cfba3882ae7e6204d62880732cd4e9f3a8776cbccb82064a67dd468f17b08e
-LLD.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/54179599fda9be13c1458898fa317606
-LLD.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/07db5ae6f208a4d2f2bc4157609108234f6481f0b5e9ddd17823cf8956a00765b1e76c44241be95a6800698e6a4bd663d31c763471c539f9c3e2a5869a9fb4c1
+LLD.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/09c320b356e69a8d2c91aafdb0d735c8
+LLD.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/42d0ef4bce4425832c535f16b83a86570c0851736ec671f94248b3216600e0c0d8fb0a9f7133d9f1ed6a3bb95e9ea5536db99b35f2f29050faa77a13db21d5a9
+LLD.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/ed7b9ba513235c91d96e0258355eefec
+LLD.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/4cfbb5198a0f59c54f81c3bac4d880b09e02531c113b985bb512a161dfb66d28e1ebe5a2d27eb567e067545141d25c5b6f7658439dc20c39f0cb324b406d87d4
+LLD.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/0fccd87c8d6016a812c842255b4318ca
+LLD.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/6b44fe0337b74bc80f387ef41dc11b06f7277375ddae3de079071cc0bb743b42a90068453c3d9e2613d7cbced47d134c67297fb7dce6ce098673354fe49a6292
+LLD.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/bbce3af58d39cb571ca851bf86c38ca6
+LLD.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/e577f2ec2f1a15115d850a850bc0679c80e2dcff6a9f3982ac84cea6497c71d7bc269ff1fd262d427d3a98bff9f72eb4a127ea841471af96078f7e43bea6c5c5
+LLD.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/a461bae1e190f5f7f72a64336db88df8
+LLD.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/312f465093817c39f8b17cd004108166241322bd6559545f64bcc5b0c3247f7f7b85e618587a37e51cbeb8decaaebdc69d60f34bf8ddc53e5efebc4d9192ca9b
+LLD.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/017a1ffae7b74dc5ae069c00b4ee16e9
+LLD.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/62cc6fced8358ac46562887fa03c4196d77b61031b4f80ce3ad339bc6b8ff1f6079b99a862bfbf899d11201b744bc495f7b44315672e72886ae6046c6130d488
+LLD.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/417d35e8169ef5ab07ba41d2de46d7e4
+LLD.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/fff169fe51910680110fd3df8d369fe529f69785a151b4274b127223fe9c660f160366044b74c17a63c24abc8f8c659cfec50624e98c99e7d60ab84716f12888
+LLD.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/a2bedb0bfad8a556040b7e5ca0e2848e
+LLD.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/0bcc2183e787547d5181d203f1c132b63186a100f1b9348886443eb268eff70086009c3dce17266d6cfcf6285bd15c50e18bbe1a30517cbdc5b1769e5f01ea94
+LLD.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/773083d5c46259aab09522dd7be7586c
+LLD.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/d6e240cdb8fef431f88ff80364c0c493eb198ef599db13e91628da08b624ed13070a1add19d2507fc4bc006f3fb9709a4de601a4e2adbb30b3967c72082da583
+LLD.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/b44023d4e9557f997fba9fdcbedabc2e
+LLD.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/1a4f92bf5eb73919384c89d84ad3059c3ac585258662f8b450c1cb878c7271659cf1a3e90ed7337123348399580b39fbd29e9a68b64e94a66ac56f1637b6f9d1
+LLD.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/f6fa081c30f619170b9607d9c374a19e
+LLD.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/4ce50a67739d5aebd8b96ee6c7441685a7fb00e816b23f685c102c84300b9da98b83b8f9f2d7bcdfd1ddc288c1b7a03e884dbdeedb665aa16108e97e58a8f4d9
+LLD.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/ac3a61e234e6ad8d6a7b8f56ea3fdd59
+LLD.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/45a6ffaffd514628569f9463a4467d07049c9c115fae7cdffe8881e1151968e5b5f1f196b969e4a4319fdf7af4e667e5bba5401b98b26038fba72fbed95d9fe3
+LLD.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/8843ae687a9a697c49e7b2718a819fc5
+LLD.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/6eddcb1ceaeada1134c4b8cc8641e4c7c8998b975b98adf6c0b460bfb891f0ea7bdfa086bc41f58d0c9ffadcd44d8ccb9e15895cdf7127317d9bb11659c3e0b9
+LLD.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/d21e19634ba816b5d8ed06e8e29fe264
+LLD.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/098c299191c527ca953a201e27e9ca7202c5a8093e3f338b31c5f54fafb243bf2aaa366eb32b18a3770f86036f7583bc5d55d7ae39cb7bd61af280e20d57a5c7
+LLD.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/5962ef5d8db691edc3483e8b94ac445b
+LLD.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/076f1b0995e1ad40a4237a25c5de03ce29025eed9e65db33a935537bc93e84b886af6ecf88ff1a5efd075c3837ab2b80addde99580325043a190a4cd6864cf59
+LLD.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/e8c0db4a2c24728314fa5d338144277e
+LLD.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/703fc4caf03390afea0dc070c6ca9b2b402bee87d8c62053503395e858a67eeab369bae4ec3ec52fbb618856fd582aaa10db1b3843648e5343ba10359e2846dd
+LLD.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/eb595bc0386677a420f356ffa2eb16a0
+LLD.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/a9c6c1b8464332ec5ab9eaf405ae18efd06a339a970f8f28cc88fe8d45712b1420e5b8add02e97f82204c49b653ed1c7f3fc87a0b4eada873e46e53d0e132f0c
+LLD.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/553a1eaa6982bcd82dea38f81c6e024a
+LLD.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/9e0007d255c01dbd08e0c51355200d53219850b0ea60d5747a99a4f3241a79f5c171315f7b9fa2529ab6f870d281c76d9b5a4ec3e10d58e41b3d2486d3a38861
+LLD.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/713c09bb19b6f47ede0b4ced61d5039b
+LLD.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/553b9d20b7291521e607b2df7a2964e69a58599622ca3084f795bcb77239617a88c08bbf0bde07cd7e37cab0f53dc6d500bebad957415a5e66223af9acd0373b
+LLD.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/f878e4d2464cc1a70eeb5fa57e80978e
+LLD.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/3e341e44f54434d70b99ac1d71a9e79999a31656b0d442c1692731e4bdca9b95863ea9f83c3da968a774f6f693c9088374aad25836da924846f5ba05f05f968a
+LLD.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/2e4fae7565ddd6717a34c8240dab8115
+LLD.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/f1bc27876f546e0f926a63c0e0b878bc33c2836e5902809f186c54a148f9c96f2f88d892910d4cac14ab4697723212ce0389a79364d74420aec075f034bb3867
+LLD.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/e064f11cf58b6a870212a9082a2a4844
+LLD.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/75bb485d234b077ef3cd607d30568943559cc50ccc5d5aabe9556c7fc104a1cbab24ada562cd6b8b1060af58bed10da6697fcd9f05fc7d669ae694ca5fe010fd
+LLD.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/b0754db228f635d247ffb5ea9938aab9
+LLD.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/650b7ebcc5aca07b1a9a4a8d7c2d1dac60bb61586279aafebaa718283933735929dc2fc8b08c8901626533d1834be4071fc03cb5a8183311e519d245cff33bad
+LLD.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/ddd06a6b9f67a95053f4f06d85ad118d
+LLD.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/e7e87fb0e2c394dce36134bf0024c6ed613ab6707da6a472a708771f7f2915ccbe4539aeaf3fef2c5db08eb5cd5b9fadc30c18f6b34cadc9d56e1aca89fc0b25
+LLD.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/89673b45e8b6ad861df5c469ce9e0dc4
+LLD.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/ad654354e7da47d6d1db8b9acc26a02412ed02e842e0c6eb8c403785fa502bf75453d575ee01927db4f39e4803549b25b3ed4cd2e0354b01292877b45a4a7b84
+LLD.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/84a8693bdf7a669b523bcc1475b6e868
+LLD.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/d784c66624069a51b7df50773e9946db73e7015af617ba745c8dd7e926936295e88bdbdaeb175c363903340c422193ef0437e035a15cc7595c4fbc0fd3bd56c1
+LLD.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/d4005209edfb3793556a946c6a6943a7
+LLD.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/80c21eee46810d4a5dae35cd42604266c463e73f399e7d0c9cc62b90ae14f3c8fb49b34254612b0efc7af2bc52213c35f1a50c3a4b4fb30d4b9f06273e31d7fe
+LLD.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/0563df5ee7aa191b4c1054fed3ed228c
+LLD.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/578dc162a37c38a3ebc9f45f5ce220ded71df1104a1bd830fd5055ae78d634e3110fe6a0b8c86bc08aae145c16e7c964dbe301a1df752db4a597f98b653a5e17
+LLD.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/d0e448c7694f03e7221020a5deac6fe8
+LLD.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/f1e1605bc9a0872d19e74dfde78982b39fb0cd3b28c8af65b2960ac292c4910afc84328ac1edbf70fd2c20e697d2e826b729a6325949831aa008d3ac565d4d4b
+LLD.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/881ca8f1640a296af635e255f671b3cf
+LLD.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/67146381b1debe7cd71d819d3d4e930b5e9f904b30a41cbd87681675e6a263ee8008347f1b6879d13098f2f261555eda7e67494e111536b11299561d9bde4b08
+LLD.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/3054aa99b31c9fe01f018542b1ee1628
+LLD.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/57326651e05dc18436166dbe31cc2e373de78a705d1c12204db6126a905092cb81e81f66d81229b6694b82ae279049ccdc3a1decc14dd0e5d9706b9079aefcd2
+LLD.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/2ba48f3fb8db80e4145e5495583279d5
+LLD.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/ddb3ffb64a7326ae511e65bc825b01cc14a403fd129b29e9b01f00f87346cec127ffb4eadb994d4c00ae49b78f59f8d32f2ab131ed7dc59e636f83aa182ca2f7
+LLD.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/03df4914136836a7ac62e5f9c74e2c58
+LLD.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/0e5d0e32a1c817cb81169503a94ee428a4fde8b39cc2174f749e8df2b23526e738e21b028e66b44c8abba1e0be0d93ef3f341c7ae7d20fbc22bac60c38ee887b
+LLD.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/58c367d5d536469831b740d6181e1909
+LLD.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/e55d2a0152fcc7629007416f8af7601de163da794939c0f1bff9b9083641b15fb365f7ab9e78f984e77246d994781a74c56397da26d81aa66cf926ed280cc2e7
+LLD.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/c822be796e7e20e1b081a1c6c9fcb4c6
+LLD.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/a057a957b55f97c0da5991d0f8f73049ac30733aad062645614b32db17e4f6efced6c82b77a689c90bbc37b610219f235bb02b6b29d1476ed1b5f615b30b5267
+LLD.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/8dad1af20464f8e3e96f77d8acf7b180
+LLD.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/f6769fecba508ec9136dbe03035ce0bbf7eea14f3c806b3e21b2e32de449fd9a698a489507412f6f603af86e8139dd67f2c91d51ac3385d82fae02ae4bf41531
+LLD.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/454a8a9c011d795de525c088b0fdc329
+LLD.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/904f8846ccd8885d15eeca2ce08d327f3b13cead1368f50cbee3986edba590bfd6d696b9e0fc081f7a8a127a3b83daaa99dd74a0597027c84797099c9b0c807e
+LLD.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/36302281868d2a02af18601d5b7625a1
+LLD.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/fcbb43e37308c99dcc7e2e152923eaf3413e4143124dc598d18467fdb6e8c561c0b99ef19d5d0797565f6a18f86a087de95bc21599c65ccdba42089a678b9796
+LLD.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/4468fed82218b2380295799030f1fad6
+LLD.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/37cddefa5e24c8451e9ae2d64a931f4dd2bffea498cc818a9b7dd08d4df57ac81f96dc573e18e90449e63cb104cb641286cd755e24d6749c8f9bc943e248c5c4
+LLD.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/3089a2c2700d0a645d1ef4cc26a7dca8
+LLD.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/e935b01349d844e8c6cb9693e6d0dcb8a99973bb63b53df010a207833c4ea78da3493421c28f26122e92bc109f4fbfd55c547a7459bea2fc39ebb3c9318571dd
+LLD.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/aa3aa224c3403e9a955e8cabd4efab4b
+LLD.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/255f6f5eba7835041d510a57cf706b273f87153bfa402b4e0950a418c71a582232a99c94cec4262fb8e3e2ece973f5ef9197a625425515541c61fce398f439b1
+LLD.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/363e88347921c781d163844446f78f0c
+LLD.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/2e12a0d74cfe145a2c12224d9a2fc5be0a0e7f1332d17b89fcd694b0d72c1bfe7dfff030cbee16d02cc7c9df1d350541685fcfe1ebfac38e4fa4d6172216bba6
+LLD.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/7abf726bc36460e40dc2799b65dcb0b4
+LLD.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/19b4a38448d503a6dbfd5f972f8bb60df4d54735541559271be242d977519436d83a9558b42984a4128a2b467bc3e22bf5c37725db2e03c8bd3fc4369a526d7d
+LLD.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/f1f685b8f111ab299a3979de7f5b3718
+LLD.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/657d404e9295170696830f94bedffc7a3b638ce5e515c24b3a60e3da1ee9ef9d00919db6bf324f9fe9692b916394ea1d93a47840358ec1d56a9734fff0c6f9f0
+LLD.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/7968b42dd7370573a820c3a4aa9765fa
+LLD.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/716a6ded0379cdb441b51031919814afab5ead38cd8004491e022b20c2a2aaf6a1481d4b6396e70e30a8f2c4b81f03450ac5c1591e001bfac345d492933c1556
+LLD.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/8a375261e55943a26956036a3f99b72b
+LLD.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/4678ef639b1b4ecdf9bd818881f3f01b134f19b3f43c152f2f05cc8ee2b2f8f68526788cab1b0f0d3154bf55683488ce05e21df93767e749eecfa78e7c5146b5
+LLD.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/9e654f59807553bc6b63649561d42db1
+LLD.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/bb4401fc7733e00469d73a761e7eb9e1c6c4a2a9ebd420bd048fbd189b25a6d17e3eec177479b63e8e71471189de1d5becc68022cd9e4d8b0ef96890400eff24
+LLD.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/967160242a405064609a4828e5506e53
+LLD.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/a2e02f065dff865c2447e689d1a080073fc0a160a8a4492912633f8748b0637cbc6bf6923560fc7f31091bfbb968f339f48ffa62e768440dbfcc8f1c7ea2ef38
+LLD.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/89be1c975c0c240608f40fc89de9cc79
+LLD.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/86fa05bdd9f544dee43dd5ed701a43bbaac137d0f05e3bccb0ff32cb51fba790a6446c814f7dfe644ee82661a25e34ae4fa905b88bd65c71978d905489240c5f
+LLD.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/b8dafd04d9213317e7c6c99e2cc02c4f
+LLD.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/107d0a97eece870f86d658531478a5930597210fb05458fdf5ceee48953b126a206c910837caa3637cd7b626a343ca5bf6684f2da47ab9a3e83166d4d0de4b86
+LLD.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/249c60b944d6ee16c4b28fec414353ac
+LLD.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/2ff5ae01910b33c49331cd60d1f3eb9906d28f81c1c8e6a56c4079c74d56bfedd316707a7fec38385112e873e0be4a05d587f9863394d7d62945bdfc8c1e81e1
+LLD.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/a331a6f81958efc3ef061f1282ae63dd
+LLD.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/aa93b56e06f8e0ffbe15f50c36b1d7bb5f77022d731fafbbb6d502f93dc229ccd79f44adf6f6d1dd0ea94ecc5a05490c326533a8d25bb1df316922f86c6371b1
+LLD.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/72427b0c7a896027a47c27b0ab2bafab
+LLD.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/5aaa2fdf91385133bcb0b563a7f2ed3c8c5ff3fb307b02387c94afea863759b78de097855a7949ba41449e7ee444f166fa4f6867f41ed3a561bb6c53bd747729
+LLD.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/6985692a2c8a41b34c5d2d103a969348
+LLD.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/8339b3aa2143b105c7133171ecfdb375a19fb9358e9fde0c41fcb57baca9a5603c23cbe8ee70936fba86452c1a57b4c84ba20250805513445ac609804f35adc9
diff --git a/deps/checksums/llvm b/deps/checksums/llvm
index 21a4f0fe78333..715ba89ad559e 100644
--- a/deps/checksums/llvm
+++ b/deps/checksums/llvm
@@ -1,229 +1,111 @@
-Clang.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/41b99d6134289882716b1ade196d3482
-Clang.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/2fd80802954731a8f492d4232eb1309ba8508f8aa2aa927d5125b1412f98d67cf6031e57d12283f3f4ea8d70e4938111f4d05de576572511852fd097de22a5cc
-Clang.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.tar.gz/md5/4fdb63392efe6abd33f226a48e91111d
-Clang.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.tar.gz/sha512/9ef8aefa588baf1f169571f4cdf564fd6d259e00265e7fb56a096c2f635fdb3425e49d0303bb12feb263b6f4f55d8fba1878e69a2c076bac37eab3e7dc9c4fa9
-Clang.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/efa9b2bb19bebca1cb5ccd0fce6a10de
-Clang.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/e38cf8f34355c1e941c0abe6664ba2b69c6d3d1e098b2d0f4db16587b0a05affd6bec937b4ce667c344e274d396d10357f18203d3dd5534e8676922e7ec4f2c4
-Clang.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/707fcc8e721ccd1081967b80165ae9d5
-Clang.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/799b106d41624bf90a342fa7c0e53c6ea8620d967615af626d74ecffb8728200fe0fc802734f412a6d19158cbf8052547282f5eb7910ef05c2324ba85c1990a8
-Clang.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/90590e9e4007cccd46f6eea8ef896801
-Clang.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/41d931dbf07e0933d32e4b7d5bc57d9ea7f09b6d9f53f65be557eaf54f3c7ea144ba302c7bcfdd8b62ba62daafcde4b3dbab5200b492127e1053922ab79e00c8
-Clang.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/6ba08960c18f9ef86271988d2bfa6bbf
-Clang.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/78e802a55b47136e5836150e63ee2d11de34938536ad983e55677fe37f0cb1ac2fb936586022744fce234295d9e991a72daff1bb6cd195a6c4c239d2a03007ed
-Clang.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/9afbb3c3ffdd07b85bece0d2a0897d6f
-Clang.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/d918804a94e4275a277d4d644fab2a21c18c7a8c282a917328a54997bd2d68315da02fdd86ee244e4a414aa686a1ee90c1ab1f385f08dd232d51fec6196606b8
-Clang.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/b5614ed993ff53f32d39ea617f13b446
-Clang.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/d5c8c7793328b8ec857a58cedea249c82eebcda48fcfab8c145832b905de113012e7ce476219a1b7ed3f5d1856716288536d9ffc9dd1e302bc7787df1e7ca718
-Clang.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/c8388301ddee43f09078503730651b2b
-Clang.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/4c054358ff82e01184c5916273c53e55b702c5b7c882f0fbbb703e323236b5ae649a8644edc75950ce6bb2da79ea696162ca3d10b2bb8fe1bfa3eb5a195cc903
-Clang.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/08500698891dd1aad7e99e3bc31e27de
-Clang.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/b80392a18aa520d1d99e9c27682ae2f180be5e49f3486149b5084e4b5c983bd5faa14473781b9d0f3f7122c56e6a135938d75bba41cf29ba118fd6b4b06df479
-Clang.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/63adf2e3ffd46f8ed73512c3d5791f61
-Clang.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/0855e19794cf966da1db65b325e093dc6959e4980c2b4d6df1c31a74808a2b37751c673a8c2648bbac3bad1adf8582ca8d9a70c46e3150a1a34b23daefc34572
-Clang.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/d2ba43b3975579eff77d0f43fecec9c1
-Clang.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/e2944942c622d6ded5b9951feca099cabe7529b648f42d03b73adda1494517d60e330d5648201c13a8b1360641ef76eaef45aad55db15539126ddbcef5ca9c34
-Clang.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/9b406ac84fbfac82bc1e9f67cacd15c9
-Clang.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/c62da13ec701f165907bb17459c7f769e450aea32275249924f36acaec4784cfc4b3ebc2352aa13508e16b5187c20683f537ff72eb8c01bfca542cc34759daea
-Clang.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/a67d7152aad4dfe5f766704ecc5cea53
-Clang.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/24e2d60165ca0b8e8f5b10a573713a5183ebbf0e070fcdcdeb05c6812524b650d13070f636f8f3a982f9af47eba0afd26f7f5e36bd9040c5789ba459023fdbcf
-Clang.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/74c8bba1f1294060d862e9d39b99df59
-Clang.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/6354f177e1197b7b181e0a858d3b0c4f7bca9267950b0c06e9d5a0443795e2fd85cde463f28f1f0d95243b123d33d2909ee70f4717905122dbeb6f327206e74d
-Clang.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/3ea61b4f4f852f842e412781c72e778a
-Clang.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/dd985bf86a53ccb92cc2da39ed8d5c7e422703328b583cbc28ce09934cbd3413ba5c91c2b334cc2f720565e12211e10dd0e98a5ae3efee128118010329f1d20c
-Clang.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/4f4ee2ff8dc293319271bae794ce751e
-Clang.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/d1c36c55b5ff9d34dc2db8e7d4ea6d3069934b2e44d6f720635e6d8aa90714251c41d2d5692a4ade974e2ef07ca7785dc80ba00bebf617393f9647a0f5d74e6c
-Clang.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/df604983e3422955835645a67b1788d8
-Clang.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/89ee59248f792dad9b8bacfa81f9749fffe6ce531b0557216eae807bd1afd2630a07c114e705b196951d5ee5145520ac4e09a8e619ca542d2d681f97c0d36c9d
-Clang.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/429ddf418778d8d58e034f344901d76b
-Clang.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/fcc239085223eebcf94ede13e758143439c3891cc360cc3bd31d6a6239859eb2d794505a9b69a8d24268c8ddc91af63e2b1257843d0af792fd0095491ec7bc64
-Clang.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/2664766221c63226470cac7664dcc2b5
-Clang.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/fba9aae85fbabfef32be3fc0b0a2d4721752afe31e6a6e46ec45bc6a8e87481f922d9d8bc59517d52981a096adb556358ca9c4c126c151b1f5a2bd4a0af6d4e6
-Clang.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/33d9d6d2baa21a5df2f5bddcc03c848c
-Clang.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/23e74ca456b9ee567e58f8f8605a0445def42e8055b5ceaab3bc79847a64acdddcf68d1339cc4cba2fd264288384f255f30a8987a0ee9c1bdfdf205ebcdc69c0
-Clang.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/47866b036f6137d772b3536ae7052c5e
-Clang.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/b9104370e1d36e4c7bc17f1af9261a347048d514535820755c1e1269632d6ce727993e8223fc16ea7fa353444ae57760b8b1f6b06247e93b1ca5b1a809290be9
-Clang.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/cea598b3e0e51b39b74c961bd42abe5b
-Clang.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/72cc3b9a371bbef408cd88a095fe326394ef865f139bbb76b97954f1ea10bcec6dc5015404fa6eeaa6dd2948ad08d1b5b389f72dfd80cb67372558c644689266
-Clang.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/9fbfc2144612ae9e878f75d56663d1e2
-Clang.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/b80ac0612393278ae6fc11f7f3259ca231fba29338bc3439f5b28c4e2c3c0d5150fb791723e3b9d70a5d756a907aa1ceaa46e79cb1391d2fbcb974b2978baac8
-Clang.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/d2525218028ee2dab90af5ea5ba6ad47
-Clang.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/c7fbd4f407879883f0858c9eb2cfa4ae26f396d79e74c437788824b57521e23452d34f74ad493ef9ad959c31c5221abe5dcd643d65a4b1cabc016b9427f26f20
-Clang.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/65806064e73364ce62449f86dee88576
-Clang.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/7917337169e83fdd885e456bbd08015715456872a3d6e1a9979ce1f6262cb108584193645382a2b863a2c64ae4121af7c760153e1770bd833c4ffacc8b2d3c26
-Clang.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/16fb6f9a346089523ced6fe4fa31b9be
-Clang.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/85adc6b8588c403000277228a6f3dd130eabf7ef0e811c85d3f71ede74a28152b9569309be00da19b81b307611c05d6ea7bb45b217170c5fe03ae9f350064265
-Clang.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/a245a70eb8be7a8577d0b2281b3c5f49
-Clang.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/5bb543656db7ee104c5bf29a93981c0cc36112553ee27b09d01fd1a2ebb5c410782b6cb1e76dbbe1b05e2a33ca86363f464cce23ddf19a6c7a3c094317d892fd
-Clang.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/8a03581e3f37e004159df9d4212198df
-Clang.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/7ce49c2605214fe7197c9d02887e976bda186203303c08e3c5372e0dddcc1aaa056f8b1128b0772c86d2ece86412b08a0cc2e15adcc22eed172cb5cddad6109d
-Clang.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/39fabd08101ccdd1733c98a33b297b9c
-Clang.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/cbd917f8a78ab2c412707f6bcc50ca2e31b2e882f8897cd987ef6cdde83d28ebc05b24fd23c32a0a2d1f198c42ec5f4975b08e8354de0c501ff10286a1c3bf40
-Clang.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/md5/5a329fd7d8ba102833a53f4afa585a71
-Clang.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/bdd6ac99d21dd7852f00d20fa1dd0f05987f4cfa0fc5ad30ebf09d1ffddf21d1a328ed6ab2103e490163ca6cad1226f5b0493de08fef599d34e23dddd8f5b8d0
-Clang.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/924a82b130a33e95324ddd5b7c962b26
-Clang.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/74ea5b080cff063a138a60e9c060c7c3f0771c5f34368a786a745e1a3de94d170d28a574db7b7fa497bc887aa00a8714aaafe138039c87a7ddd071d98c84f129
-Clang.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/md5/3dba716df1b38703566e1a338c8c6f33
-Clang.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/285c9cc97193bd55508885f894c21fd19bb0181a2fc429411a741a3846cc2eab9fcdea9b2822e8448e2a7a8fe4e7c659ef651fc22fc15bf0e3bcceb92deb11bc
-Clang.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/5f32ee63b2b4b71d3f2b66e9bc0a3ab1
-Clang.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/e404840f111eb4b7df13205a3033bc99e4716ddc120159c2c1d0c65b6f8a2689dbbfca4e43f0f06f32a998720c094d2e0c77496ab7b08c4364646a5a3aa7e98c
-Clang.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/e9eb43805293397afe99703b5fa37d76
-Clang.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/26224234d38ee4aa7003f9f607bb9538fb0aa1f18c24f832501ec7ad2f31db7c1bcc9cfa0c25c480957b09d0195dbfcedc34a22d975fd03e1da240cafb596f3d
-Clang.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/984540f856557fb5c5635623c09ab8f7
-Clang.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/1647dae32144fbb5674a2b57f84dd1d8f2a026a441c2ffac72c6ea89adc25769f34ffc2f699fc72340107ac0569abc03874762f09bcaa62c61fcfa3dbc7c28e1
-Clang.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/2659cb759aaa7a76f0b474b8a8351995
-Clang.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/9ccbed4766be40935e0e3cd538f455fb3e8146edfc81e2bf73b4b38acd0639881fb6a0bd73fe8e4dec33384f0beb456bcc714de123bd20c460ed1a80dd6356c3
-Clang.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/3961c37cd6819e9e55467e249dd8035c
-Clang.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/feb2b388ceb87411462eb048bc08c2ca24788b4542780e5993f365db793738679f7fe1e3058da7eefdbb2d07e50c49cfcfeb6146c2009fcf93adb09d7231af3b
-Clang.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/fbd9cc53baf325b1e71f1108e00b60cf
-Clang.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/7213e25268998ef5665fe28e10ecc7ef516a3443cfddc58d56fb83eba766e8d04b45914073e790dff31ca4df43253c7962289fe00078957603c1b5747d8732b5
-Clang.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/a1249b1dbbca3a8c66f67a7681c9ab51
-Clang.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/d711d5cb9b3c7c8c11e37fbd0870be5b13964642bea613c0f5bf5c4f54a92db984cbce79558d4cbaf04177822b4305d20ef93f4b0fb108eadac63cdb0880523e
-Clang.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/cd06d1fbd1c56371190c31740da82a9d
-Clang.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/65e5c6ce326c7ffb589d97b8c45b44ea1742c75f4a4a32a5e3d64aa91e10999293fbf9f11dfaa4e9470fa6912d7b8a0cfd2b88fcfe0cde67c6a21a7921f9787a
-Clang.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/1c148bb915b1fc51577e57ffa05de1d8
-Clang.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/f3397f03988bc5dcb385d6af00477b1b44b872f985ad1a0f2ad0b8c4fae75ea92e683a683f5ccf6a6ac34231e58f665d690c7b7a47dd495107da818c0c9e0d41
-Clang.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.tar.gz/md5/23cf7538952ee1d503d3fa86de28424c
-Clang.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.tar.gz/sha512/e4d8742de7190c8428477e8302ed738e1233cde77a13b241dbf5640206379b0da6722eae14d12d30a4c3f2d7280d643217e3357b32d34fe149e35b5fe56abde1
-Clang.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/aa5f281b5d334ae61994a4a96d6ed5c6
-Clang.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/eefd4fc31d884b0a2b2019bb6d29d830333f151ec72afaaa0a1e38c6acac5edb81771504d559765eedea05066a5631163afa11007059c3ce85c6f016f30c5e94
-Clang.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/3e91bcc9d9adae75e9a843ea14df473d
-Clang.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/c5aee3b441bc81c07688058c12a51fbd19b167b8a65d809e24b27c74ee8cb082608694d366d4ba7432ef6dc73d24b31f3969b95e84f525d3c65c6f46d25ce013
-Clang.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/b4369d3062a8efdfd951e9cb083f3dc7
-Clang.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/8e6ee31841bf8469dbf576d6351b15c2e4e77b759b8b211363c80acde37f6b36c021a40e05e99cd8fce040c1c694b9e348fad7eeebe6ae4f3789d4aeb3570d83
-Clang.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/0bfcbfa0b7615c9051947b22b6885857
-Clang.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/f978dbfcd7c39bfc77833d69d9132e4980abd9a84d610ec4867054123d41ecc991c67cfb8206d6af98da0df0491b52599e37d2d2137aef4df661441e3e13480c
-Clang.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/b5aab07189e61dc476feabba1c303a6f
-Clang.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/8446963ad5a6d6c9f0add8f86a4815be85474a00b66d5ccc5a0db99a4b41b07cdd41e910e4197a65e312860c3ca439dec8ec7b054435730bd5bedceb46541b41
-Clang.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/47e778625e4171d66b09c432436d4b53
-Clang.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/4b45a8cf83a679027e2b40e6fbe2f5d62adfa9bf7a22b477bfb529205b32b01eef212c2f084f028778e27f0651fe180b9a8c88ce9704a2c83de2344fe3787a7d
-Clang.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/md5/ba5f994580b7e5265b3c46c222607406
-Clang.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/sha512/4180017f34c473a7f8da945c2c23773d0f7223b2700f17b1965df21652303ac981742d6bfd69696db857db7af9a451d07b99131e6425390246de459a1c509cf4
-Clang.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/md5/bb7c3b4777d2e6ebe043b5abd7b3297d
-Clang.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/sha512/f116ea66fdad8110ace0321d01931ae5503613fbe127a06e96d864f031dd146079cbb37b14335c9f612038bed713d1b10e48011afc3d073e6a8350dced4649ed
-Clang.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/78c34993f04746e2caeca401444e1dbe
-Clang.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/4718cff8d7d8991d135c4d3248bb0ddfade9c1157334d9da64117c25dded9cdc9ac5eb117cc5236c0eea64de951656b6cb5aab1547e41fddd2bc9bf02bba5f89
-Clang.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/7c22612cf61c039f4b48d6977e84fbeb
-Clang.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/109f8563eae920d1052432d6a6481c4ed83b40123bb1f167d59b9b2d2b9b40ac325c4c9c7f063c730e8f76c7314cec91f82293caf498ec6b22f36c766a672382
-Clang.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/3d6ce4513b5c9cc2c824870fc9c20926
-Clang.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/c3957a0d7cab941ec0ca53ca752cebce419ed4491d93b4dfee228c74680a1fe7c4dd50e543d2d428cad663938302c80ed42cafe25bf9c046eccd6f3424544c59
-Clang.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/377e8bce91c58ac81587d3e001056494
-Clang.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/800666109b833b6707b212e601fba259281f307dc6228a0a378910ec5e274be0e7f6d10c644e7403eff7ff37e89f892c645f40b6136ffd370066699fbd60b61e
-LLVM.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/8022ab83a040677a190af9a97d776a30
-LLVM.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/1b1bc4c48dac21b2ba2536c86424cf2e9d8dc44a74554dd8fd9d52ac6231d4bf28f0341fca4d7721100a022b24ea80c39ca98d0d20e7d0724e328ec7db99d5fe
-LLVM.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.tar.gz/md5/94926f586da97edb48d671a9c367f847
-LLVM.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.tar.gz/sha512/eb05f9e239274ef5f75b5e622e1063007f29530848ddae2ec383522efeadb367e6c2fdc44cd1fa9d77f7ed5f394773e8b53892c5630e04e7fc1b1da7a737d8ce
-LLVM.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/9d552dda2a71b0366723305b4f933a59
-LLVM.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/a0416e77b19e3448930e3aacdb97711083e27c3ef41b884252254627c961f82ef809e0f3f686cb18bcd57bdfd15f9ced93ecf453a3cca435057a79f0b9e79dd2
-LLVM.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/f25c0801f0c09dd92fe5b81ef131e315
-LLVM.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/918e6aac4f05b0cef280ec0ff3ab1a7efdc3301edaf92096a6500b7585087cd7ca9b9a97d93844aaba8d7de3c7b1c60b7a00f062d006ab8664b1e2eaaa71d498
-LLVM.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/868a7af2df6e2a8981112ef3395681d0
-LLVM.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/50f03a46980c53fbc215f70b5a581ad507d32aa6231643e31bc111bb014b62bfc17d68c2e16076f06b0b6e478a5864f3087c378ffe56e2cd342db1b54d2d970d
-LLVM.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/a91f2a78bddc87a071417858496a80f7
-LLVM.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/959cdad53200873ca3eeaf8aef922a1f305f48c5fa0d12f2f0a903e1bf97027364a333e3fae06579f95b17bd068e5a376f7f9e27165384f496c014551d5a0d0f
-LLVM.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/21c7425ac3c46d6a6825945f87c504eb
-LLVM.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/582d202ad782f07d1037b46545ae210e430489d37a0a194f10eb690fd0f8f1c523bf294505e9953e6a0aa14da8dc38143b4150046512ed4abb56f8e9e03fb577
-LLVM.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/c4719da1ca1a41e684d9d9cbab06d0fc
-LLVM.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/6faa14d51fd0136b938e7d7789efe625c9f27ee6afdcdf673b4ae474c74a6f3c94c3617beb08c6ec911df4bede67e9bc4da4074a47d5169ccf221b7c7e142c82
-LLVM.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/2bb8321f5f13e92901a51603c3569f29
-LLVM.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/0a84cb002f160f6d4f61ce7ebd1952d40743a8bf1aab711f2c47ac77d30f4c8ba873b9d0d0f276be2d2fcab4820ee98de5e3fc0d0644d1435fc292cf0d82be30
-LLVM.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/7da4d2cd03866708b5b980af8d938e88
-LLVM.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/e222e416df009dcbe9de028fbadb82a2d96e266ca14496c93d4ae2488eb514754d3cec2dc937b247dd14832ef473e3a69ae6af4bdf54e3f1f7df40c24a58d8d3
-LLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/e88acc2a354c7004772e84b11561f847
-LLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/1458bc1530b6707297cb0d52a3b639c3267d08e4dd28c31f170c56845251438ccde93da3f694d995020d81c1ed5e71d5e98b20b51486faab5439857a39d80745
-LLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/a783f7505a732ebcc22c0029e5ebe552
-LLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/33c55bd3a694dba446313c46b3dbe45387e35d73fa55ea1a2241b24d34aa0ff21e92da46b61f62961108cccb3a63d53d8a709bf2faaf77eaa4d7a557114fb8e8
-LLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/e7db300bec771f9a820500f6715fbba0
-LLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/d64a493c1984ddefc34872f5b8a1107467dcd6f00c686c35b91d2a3fcef59c1447cfb3540576fbd76b84700d4f8e2f4678effb26a7a102cacd55719c57f780b4
-LLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/0adfd7f9571ed273602e7659ccfce692
-LLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/43f091d73a43c60ace04a47dc380d67abf980ba4ebade46280071b36662291311c933d4b3ea7a24ec91668fbf331f4df0f93ddbbb5a5941c3e1a5081905474a6
-LLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/a97c5a4d08557003dd5a0937551fc91f
-LLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/8d5e09aaf214a2046754b522c8916001d5cd2412b74eccad4d37ee32fa6fbe3d5cdf5c86e0947d5d5002e65f7ff0e53e0fdb86c137b96e6ba9f7551cf0daba1a
-LLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/b81901540cce70bc888431c01c34c087
-LLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/4a6bdaecb0205162f74685c91399d1efe5fc1e413677de1cea2e35ab5445ce445c6b0c4cafdee4d8f41dbe9457756831cc32c2320b4d104e2b9935e2938ff46c
-LLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/532cef20bd7540a94e91cadb044abbd6
-LLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/c5f7a3f5fa6e1996b8a351461d8230a2f7be6fd3a7e0aa81b9b89778be199b520b143e782b9ae27c7d27a6a9ff7afe0f2963bb4a77a66d19402de62a3a8acf43
-LLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/54ba2b4bdb525a00830b5dc99b260be0
-LLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/8add33588656242146af7f5531c2c4f820456b9fdd365af1caa183ab4c1346cc009ba2868be694fb6710e44a678631e2a687227c51a174ac0793394dd7a79e56
-LLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/60328e82b564aa43b5ad702471d38a46
-LLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/4874a5c593541c3a6ef2024eeba5ea0a870471b67b860cbae79072b6805b3498890240d59142bbaa0be01d8a974e52cc651c5291b6faa42951ab8b9c05f13ea3
-LLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/ca5e3aee60b24e3ed84bd76ad711fec6
-LLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/73f41fb64e3c324a30bfa0a7fed2c02e9fd1ba14e4f833abc9f7d8c29e44c3949bcf7ad8faf102e4d3a88444ce51c419640961d28814155980936c0b856385c8
-LLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/8d521cda3fe41fa67ca2f1111c446fcc
-LLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/9e6519bbe49c920cfb504981ff85ab39db3a3cab451d0c2be9108870c79ec68830585c42f1d751a9b9d0d9c2dd1d484ecd43a6438355bb13270fe03938c498da
-LLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/c86b36085bc8d0c48d550ea6a751bea7
-LLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/60c0cbfad30b8e22ce96fef3b2ecc9be32c64935e1b2c6f6a1b65108e9e09619e3520f8f3bbc4233c7e2f1e431632c9ced9caabb860414a57166c7d56a404b8b
-LLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/1e52401c5b9e18199dd71980770a8f67
-LLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/bb9feb772002dc66092f258b33e5acad64ede0ecf2d94cc60416bceb8c8bd84468efa27a7fb20033c0f14d3398e7531430d8fdf83fb34556712314a939df23c5
-LLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/b8562c72b96556e543d1fd52385abd4b
-LLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/238b0c8f0a7d8343bf4a69fc41edef2c32ac5557201e649c238ee943928dfea733d665bacf77945f3641e74c59ce45c722d9ed2fd6d4c1fcc05e522cbe5de750
-LLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/0fcba9f2227d0b2476f239a3aa8fe423
-LLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/b40eae50cb387fbdb607220a5ac4fce62f3af75ecd2f86ca4cdb1df2034b730ef4e32facae1f1af0effcd554fae6ebc7cc3ed12102b2f04874440358cb552412
-LLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/846b020fb5ca1bc107302d740320abb4
-LLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/a8a5e0cd0e66896ebe3726433635ec4f0c0c024c39560ca1781406c8eea1e1e5b5b41d6362b127545fc35f83d5b5bc2e41f97c9dd692fbe2e75b91f857ae1393
-LLVM.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/a9f241fe86ee05aff14e4d97d25adaf9
-LLVM.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/716f52e567736fa6e10b65983836c50188320617f52e2d06f0c67667cbd02ed9c7603f0c6f9d62cd4181df1b0226a142c7adad03e2ce82596d82905989c73f2c
-LLVM.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/1fd7235b54d334ffec9d4fee73c2ecd7
-LLVM.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/951a1d14fe9179e3f3ebd8b0f8affc5271f5f5ef7755a1ebcdb94d1d9f11a183ec5c6d52ebe62f70d4586d7ab35db35d63a01f6968d4d36fd2e3fe69f0a2fe13
-LLVM.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/e22ba633642ad91f1b58c295d15758c4
-LLVM.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/4e19ddc8c59a6d98aff7c1fe0a5edab95c493d9bf0fa2404a2c3089966636c11871e037f2c110163fd078f93c67e343b2f00d4579fc7d675009d3d98e739f903
-LLVM.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/ed6d10d0ad903dbd170337e0886a9f02
-LLVM.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/e365f3a2424bc6c43c39b853d95873a0784590d8c3996bdf66aaca1db82a576841567aca370def574bced93861e5b578ced13070ecba77936b02eb627fac8a01
-LLVM.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/44b8d01f25fcb8b74a91b12879524645
-LLVM.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/594115795ccaf3b476b5e049516bb1e9b768332b31064d8cefee16ca0959d16dffa507edac801526653a682cbb6bc11a836532ba88386b3d3ae527696db2b878
-LLVM.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/md5/ffcdce88dfa881871931e1ea646f5743
-LLVM.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/69104731e6f05691ef7eed7a8adcabe2bc1409432ae6c2bce4252a5017f28b68d39e24f03e5ee9bb75d5c2c9b94f927d1a69d09f495f360232a6c5ac1881cdfc
-LLVM.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/1a75ad28694848c8fdef3710cfabdcf7
-LLVM.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/89bd98f149a14c5e35b9a9d902eff92f975a56527a56c0b1a3768b11c41dad2f9da921ae76596a5727c658bad1f3508c986f34c3bca946f6b90fb1e7251fbfa5
-LLVM.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/md5/1f21c6b3f998cf56eabb55a071230aa3
-LLVM.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/2057930d25f78aa156655f1c1f5bbcee92fdb592e5bccf72d7fdef90ed42b4e226b6c40e7d7e9515b1eb5caada2a9cde3514da609f4ca33a5eba4a47e981dfd9
-LLVM.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/7b5b626bac6069075574bc1ed7eb4735
-LLVM.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/5ee7b6cd46f9bf86918864e430774c2cc52d559d1360255a3273dbc1d72c8844e7d0b26586540bdd31e16ae2560619703cf619f4f03840972dc6a28531cafdd7
-LLVM.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/948cbd0e893630556b6ca7331b9b289d
-LLVM.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/d9dd6637ff61f032a6a52c942f0ff44c4845fa6c5cfed514e6a83fa7bf0c5314d436bfacfa16e71cc33add7d3177277a218f663b45e8d2565a7cb937403b6256
-LLVM.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/c484b4650a0bf956c2892beebc3da230
-LLVM.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/1f6536cc7e152502f19183cad96016a89499ec7c765d706144672ded0635ff9a3618dccb15d0db0f8860f2f5f2bd60d531efaefdd847a433d50c0d6d2a498b27
-LLVM.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/0456b698fe380cee778a558591cedccd
-LLVM.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/263418e11f40f6baca6373c66fc0ebf9cde977917c903f42c869cae8c6ff259ce28006f2836fb3638e94939c4a057263abf515d45688ef5c7a792cb8479e4190
-LLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/f768a5fbaaa8d84b0933e9b188cdab99
-LLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/ee13421caab15ddc10926ae46622d39617336f5b3be784a6e616254adbf6479c664af21c59f897592e139635363f44586803e3fb5c7f6586c6ace6cfaa96bd45
-LLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/f3080c2420beade04c3f567a243c545a
-LLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/9ef6196f15a7d6facfed750f2b82d8772e375f46aafff442b6540a74200edc6305b25949e6de2643e71fc8058341d8e663b0add8699c304587f02e31774fde2b
-LLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/10d973165d6ec45c525e8b67acccb42d
-LLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/1683667a2c48fe1b215df579771aeff7dbe7bc040b9b063d9544dfe88dd9f573a3644be6d849bd445cd356d919ca4316e840a2918f66ec3fadbf8955d4c36ccb
-LLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/fefa36fbf15e887868d72b6fb97f584b
-LLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/af42b8525a06d6cc44cfba6be51c7cb00f524391b491ea5d5178738596812810b4b6a58e5c7edf76e888932933e5e6276db6382de8ab55d9cc21f0ff0044ba05
-LLVM.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/821c099cdbcd39fa624493bae0cddaf2
-LLVM.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/5d255ba92e5bd5c4580411daed93acfc59ca2db2f4757d8a53a15c74c2fd23667d8894929d9edb0276363ac75efd5063d9a12eb5babeb39c87e5118a534916f8
-LLVM.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.tar.gz/md5/155212b7a9343161cc2e4bb61b13e187
-LLVM.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.tar.gz/sha512/808c2d1930f1c5f10a86f750b6550165d97923221b1c3b41d131491ba350b13ff7e3084683c435d008c4b0f4f1d48dbba10d365c0a8903582a1aa15e1f1fbfec
-LLVM.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/9a3330a54b7c59908afc0add224d0211
-LLVM.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/193dd168bca5ee18d985f88bd67d0ce0a43d70421bd8ab3ffaf335f3a3db14c6ce92b3c1d6a2d43081b68a595f33285f9456726c21163ac163c2cab334a469c7
-LLVM.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/a6269e7034cb5be7b4985e7ad19e6106
-LLVM.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/2ef963b42ca800c48d073d02a9944c45993347ae2673463ef1cd836c7dd6da25dc78a45fc8749fb0221d0e8328ab6bf72757cd0c3560ca682fba54e21ac5021b
-LLVM.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/ea2ce735947dc5f7022658824b4169ee
-LLVM.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/45a56f2b8ccc66da609b3095b670ec919cbaf02000143183f6f69f1b4ab8a1028e5a4d6f4e5b7cb83ea95271bd7981388dc112bb7f1194134975ca5d86d59f5b
-LLVM.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/ed0e44b9aafc4e16586138046ff4518a
-LLVM.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/55a7f5b7121a86303a8113210a59443b0244c95e5b2ed9ec1468b3d19c7e8ab54277c769a4b33f9d688d9f784943a7e34f94d7db655ec40b976e2e7c9370ad91
-LLVM.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/57673b8f26fd04b96ef2809eaa646849
-LLVM.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/130ba67c025aa4f4e24fca41c045a53d6d6fe9022479119517e54bae5fec6b15263a7cbe50367fe5dd444efa00501b2a5126a901f8ad829736cd238cdfeb19d0
-LLVM.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/c19733cebfe488eb85086942cc8ae88c
-LLVM.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/18b12bafbe1ecd0d5d7a4cec952e09d13736b5a429b0b43e02fb3ddacf90ee7dc796a2d1852fbb0f0fc12d55cfe4cc792ada242e4daaf51731a2d0961e2e7c8d
-LLVM.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/be49960771dc82b451441b4773390749
-LLVM.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/0460d5d4ec03fa9b807487fb14e1c8ded5eec67c8f523cc8994996c30dec823584dc68e431f6b1a5f9eed237d9d48bedee479c6101f6329892572244f04ffc32
-LLVM.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/3238593015fbfc92fc3349556b7e5151
-LLVM.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/e555e1491fe9bc59069e7e7b410002b109ba5a479e30da2626ad5e3f3ec62f5bc9877bbee95371ab2f2821a7b997fd1da7962e6ae8abb45ce89755326fec7503
-LLVM.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/md5/0c252e8c9727c46d38d79559f6e619ca
-LLVM.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/sha512/2526c1c63726632849c0d7aab4a3283aedfb95b2a12fe3bfa84882758e79375201b0473cb76bec02049f302e6c7787b79150eeb300a776a85ccbf8c520aa3769
-LLVM.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/md5/daca0c6347addbe0f33f7270e8d07772
-LLVM.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/sha512/38bd1706d876f0f380a4aaca27ff06e745540d108b79a5ceeba5bf4aef3a2e9f17895be78cdfd3cf01110d20af3c33b616b42e55d34029584a36d14e1e7489f4
-LLVM.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/d4966f296c616ed8d19724df6730378e
-LLVM.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/797c394d4102e68db6d96a1ba5ad985bfda85cdef1dd6b3a932a298e381586fd98b6b82d8cb3025583bff213702b844dc588d3dddb878186186b0f7b40bfda34
-LLVM.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/71840071c3b7268e4f4f160ae5777605
-LLVM.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/44729cf456c331170aba8ac1e1c68897b3d9b6ebb1e1b86e35f35aa09b7d2d4163f61b398cb878ca253c8e4b0f76c2732dcdf2ceb87860a152e296aaab411e37
-LLVM.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/e4344b6e1795816824cb4f6c03624d0d
-LLVM.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/4009698e21197b96d84f1e8ff259d8a3f7d076c1dac1722ec02eb81f495ba3ab48c09aa0ca583df24af62b7fc9a34147e40ac76f07f86551f767e3a08bbf9362
-LLVM.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/0f44fabfae36c971e832aac89a2df90b
-LLVM.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/963aefe7e7664c43194d9da4cd49e96d44419e53c4b51cb2841b7b64a6d798e56cdaadd78b48b2a017e7442a7105c9da9207134606620544897222d6ffa66352
+LLVM.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/0d30873e7f66a55acee43d5948fa9c1f
+LLVM.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/5e18d759332b99aa4af44d4c74215e5bad61bda576681f9347a6824b12cc9920fd9c6e87aa73f8acf1f484184e21bf0929fde66f25a091c1189f946f10ff9e18
+LLVM.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/def878000c77970c9fd35071788d73b2
+LLVM.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/3b47f231f283d0a15171e3e8d3f41dd6354a86a5f986e85c739d5413426d0731afcdf0464b7d6c127a895d3d351aa867996004f69f9e8fb74a324421ad85c0f8
+LLVM.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/d507299a26c0a7a4f3d7c7e06a97fd63
+LLVM.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/56cc59f05e595c44a8ef9209f2760db1c312394d91afb09d37ad403cfedab956bb0dfc28c9c461d15e97f000037379404d3a14bc551b579c592f5f3d5f43af93
+LLVM.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/8b587dcc1feb233e7c503f3e89d707c0
+LLVM.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/592158975df17c5156ca1fc94518af938cdf76920eca4dd02cf15d8c6bf2a8e98588689ef863bbc24d1a538abdb11e15dd53444d9ce54c1d517902d4cdcdea4c
+LLVM.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/c0c9f2db22ca738bb71e11735a469d7b
+LLVM.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/0901683e377aae6ccc649103dcaad238968ef750c569b530c77f775dd6320343cb0337eeb99e5c69fca0de907705630994073025db288d267215624cb5993d77
+LLVM.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/91933ede30f956f2cbc66aa42760e5c8
+LLVM.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b4388126af2c542471cba0a6224aec92c59dc0376a7733a890e020fa267710ef3f77b73d7d1a4013c865d9ae3ddd80814b7c55e451cf930ca37a0bdb20f22a43
+LLVM.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/9bd64f73682db24333e149ffeb909ba9
+LLVM.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/5d2eaac3976de1a29dcce8dffa0ce49fe839d68ba288f6e06e902f359095fc6a0c40d22533ad49401629c21e0363f82c34d8ac0d8c348b201c5a0738390f97da
+LLVM.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/4893c85dc5294deb7d13932bd396569b
+LLVM.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/25f3d4596fea8e3a7375ea1e8cc282fb7ac4afaa68d1c9b02947457d53dd78aa5d0b4ba97ee7d5c2c7138fb78a25e16c3075e113465b70b73f99e9b553d19079
+LLVM.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/dfd4fbfcdf9d114982a8a332879ff69a
+LLVM.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/11d00cb09fb1d73dc55a4fe4dbaedabb266c56b7fdb0254f5c003b27d77373c1823322b2b03d40e06fab1a5015a0370b867e88e53a28c2e9714b66e97ea61c40
+LLVM.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/7f4da380cdfb8285c3449b97abdd89ba
+LLVM.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/714f50b99a44c00d4a03782c5472c6bafbaba8e2c35c82e1a94c82ef74a1cad94675ba58f5e414ff9ed53bd2ca0baf6b96d8a044ab88eeaed68ca9b1d80aa27d
+LLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/7061ed8e6c7ff30e641d09a986a6d48a
+LLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/96e50a24a5ce85fc2a8f784fb8c6fac027d01ef6236790dadae565c6d7abbf2733747263a6d32da4a01d26042786d960d9dfbb1a8817f3c8dc247e8f27007fec
+LLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/5f3d4e151c628eb1ea81fd0b56af9847
+LLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/8ec01f93cc7234abf710ff43025bc19661747cc2844dd1f93464802f3f42c83eb1f200ad2855e01657b7bcd3123d786de9a54c4687a62c499b326a40aa555590
+LLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/5a744d9068877b4455c26a0cf1ae2ead
+LLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/c1ca99bf3516eceb06409a428e6b988e6b5fc3f6097dd3800f6cdac8ea90278977b0b494285ef4c59bf34f52d17648d4ff46c119b46b6048235869cefaa08c8c
+LLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/b0cd7594ce3ace35ca733df1524df121
+LLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/1b856a2283a676162574f42acba2b59c2bd7e12888a83b21dd656774afad6cf40240bb1dfd2e544ab5f13d43d7b103d03886594f42ea31866b6f7bb0cdb84801
+LLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/d3d114c4c3dbf78c4657bb35472b9626
+LLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/8834f6685accdda8a64f16a907e7f980c435d327c19d6beb01dcd792b99db7e3f4e2b218a63dbf8b90079821a5c21b5c57b761484956d2f9f2547068fc03079e
+LLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/ffcb4dea25ca467e2c2ea448caf697f3
+LLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/41927a4ada35a543653c4dc10161b3186e826f7eb49507fb02bdcb7ac06c68685114cddeb7d5d0449e7407eda162522549d4597d1c216c2dbbe2bb8ace86724c
+LLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/d64410cb9328a1056bfa4244d6ec1ef8
+LLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/24b41c566851d0b98e4432657221f60e57f1cf04c073483401a8ea9b928d8e0676b5ce484bbf25209e2ff71d24ba7bef2b327f56be936fef1c7db7e75f3e4306
+LLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/34c1baed09eb46be39a532b305d1edc1
+LLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/53c5c59d773057296f4b51a32abaea949326ddbbe2c8342beef3939ee13778361012d0b6826bbbd8d8a4e7959e52bde5b4791b5d689bc1b0adfd264215b217b0
+LLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/785db67d3898a9ba9621259053e170b4
+LLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/3dfea783f30e85c51129d025912b50a57e9c8275096d6cc52fa1a93752b8c39c804f26dd8dc6fd4db01deaf283c48ebb5671ea45816f3d4d3d507e764b5e3281
+LLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/f4bc8c6adc9b21e3b034199c8585c50d
+LLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/2eb3a4d559d7885894e787942a719a433d955abfd9587af0e910a75ef77330c557bbaea3db600488e008696f05a56ffd24d367e1b91d692b31cf28d4ac3212de
+LLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/b7eb28506784b718d6dd9654ac9742c6
+LLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/6ed8273693c7b8947ff20d130c09e9d945fc39f50c2c306b0c5437ac97eaf29befca180e6e8f61f7eb7b4c5193f647032f9846aac68327e8194453d765791426
+LLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/4a6b352370dd9dfb5536d6a6788d4072
+LLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/289ad6c530ed1fc43ad355fce7420335e30df4cbd02b5b27373008f91a4f8415eee2681dd7a0c5704da83d4a21752089f5f7202cdbb2a9d3b3ece8fcb26d46b9
+LLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/1675466bc589ffb48c4626d7aca1e061
+LLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/1d36039fa1d9ccc4715a2bb6db76bc238c65a7416d5e31c15bc9526bfb6c3171d8f2541d5958710837009b95ff315ea09126f1d8ed1c41016352a40ac268b13b
+LLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/37525ca67fbaaf5e125f95943e804377
+LLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/30a1679e8bfb392a8ff66e30a7f704bff88da4fdbfb0694b0384451148219ee2fc9c256306775ce655ff193b993f11cf6ccaebde2bc8fe2b1553945400e32aa4
+LLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/aaf3be684e78fedc69be62ea69cc31ec
+LLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/8c6c9b027913439c3456a209539f25e30cb247b297523de18530aac51649717522c5d14ec531a9053f9c82392ef7e36e03d53bd32e329e90fbd9966467a560e0
+LLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/2514cd3b344b1c1df29b6ed42bb4e78d
+LLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/5ecbf336054fa831a1aecdb29c01641b3c5a67d44faf64fec1d4b9c86f997b561e58fd011ebb450234371257680222f12ec6bd24aa29b0cc1b4578f8692d4239
+LLVM.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/eb1c80df14af9b80176bcf3e9798e1ce
+LLVM.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/7e4f70d61e2e8b05e8561253968ff3677ea818314753101488f6df8c3024027ff5189b09d42a3b71855a9b919527e7d1a099463f77d21af5bac143acb50cad93
+LLVM.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/50b6ea315963df2469e7d014a0e190dd
+LLVM.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/f57ccc09f6ec82a9bb93626953a3ec07a0c1fe265c5b862e16c65751c5ef1d7a34d2c8d32275e2b140ea5ef0657abc0408a9f3f770c6ba615e057bfb63c5c99f
+LLVM.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/4b31b77bda90b61804fb82e75f3df05e
+LLVM.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/dcaa8dc33b02eed25e6a5b7315e56ac65b81352c8cb7966007a4018e54f29f02f4ead95e723e6ea70a378b893fc1036f3df7aac792f5a44552f4b0e2d4ee5dad
+LLVM.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/25a8fec502b17af9b56896436c8c9815
+LLVM.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/05f05b128c151a0946470ac2f609899299e74102817e7e46176728b6ce96612a32b9255603bc770d7f64976dbed0a06ccd289ca30d2d3cabec2a835b8756c750
+LLVM.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/464e16e268b36f7a9edf4a281cc83fa9
+LLVM.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/bec094bf52d95de0f6c49ed4e18be75e5cd655a7e9246c49b361d72db82ac7aa5838c154cf70eba4c6f292fbc867ad83ac0cedea6a278a4c4acca994226a9742
+LLVM.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/18dbfb7b104d56a1167e66e0de830287
+LLVM.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/71c0d3aa067d8a397ab55cd18621124fbe02038dbdac8b67e44485a49c4c5a55cecbda5827f87311448030e1ac98ebb5cfec242e8697907ced74ccde3b41e254
+LLVM.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/cb44e73504c3704c0909f55fe63a3e92
+LLVM.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/e1889df0eced29d82e20087169fcb8cf82fd9e230a9911ec49ff365a76b2cfc3e19ca7c7d14ab9a537d7abce9c169104f400b2756dd032a20fc5e8b211d1b52e
+LLVM.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/df717a78c5393e4a954047e8fe22814d
+LLVM.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/1957b6f3bb3f570f918e13a387c55b294808c3b690b99db4062be6321d4f8336911b17535fa04b359b6943a842ee9b54f136d203b1397d99e780c41c95876249
+LLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/92d555d261eda37e9e6cb078e3c2c6eb
+LLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/b89b748241bfe4537ff8b64686fa63dc495e64a5ba011240297add47adedb74547547dc2f80b87c686e36218471770ea3253b5b117bfc2e990eb7a524a774ccd
+LLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/8fec352d4d5499426c978be761d3ce82
+LLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/3dcd842bb640a2772f802bae326e92a09be33a25be51931c4bb1b7ec75b9f839a29fedb857aa86ef2b674d6265b1251589572debcacdb02ee7452bd5896af932
+LLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/96603a41127d3ccd9b832dfae5b26a30
+LLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/088473b8c712fa9e21d70515a7f829456970f0a93f592754905b107adeab5c144b5761382fe1e67fcfab911584e481133ea1f705207f8868e9ae2cc408fd14d4
+LLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/252df3705f7ae6544f502f0fd67701b5
+LLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/6c75f1f5e2539c50b6d8d35b483c8d24d1d701e6dbfc59d169d69e5db24c654dae021e1e7ae9c1e7f693ecb296ad08abbc6091f3c8d1df8edf0fc7bf973d1223
+LLVM.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/34c6138393b5407126f43f308d735b41
+LLVM.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/abee7517a142ca79041ef4a669969d86e93f421a40a25530ba21d92f88e13e91958002c967f1911ddfcb5950fa095e3f5e9b60ef8ed305439990f9ad6a697297
+LLVM.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/6e6baa28e352134d12b03326597866d4
+LLVM.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/20c259ce68504c318e3d2789090dcad5a2ca247b9ed3e5d2a6714193b117033003c4df262015fb86b3a2d2fcfc72d2135dff7bdcf71126e50182d2c608863b24
+LLVM.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/a97ee35283986ae5ce0f57ec58d459e9
+LLVM.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/6e529cb46a5d7f9be12c7c9fbb4e0db4d0cb009d55655fefcdf901923f9636f28110de7ae18fe634d551aebf4990c1f5c350b5c9d705daa2e30b08e35a218360
+LLVM.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/1d09d79c4c30ec32b7dc1f513cc6ba3b
+LLVM.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/91024fd81157e0d496fe5c4fdb9f2812a19eb87ffdba009a819ac0e2c6f60145631509f200286369826db707e03825dc62c8c2af02176d10d266e2230cade81d
+LLVM.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/81f2a02cb792bd664b782d7734029d66
+LLVM.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/ddad49a9fbbe902e7efc4849e4980d98b21be0e8af0be71b6e66ac50da01e73058984784863b2a4f4aa12ae62f8b3806a6298e8de38198547d03ab0ada17d853
+LLVM.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/bf7683be32c7792e1947e00062d7b8ef
+LLVM.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/b3ebeeed5c428fd5ed2643ef45511b74ee0bf0b02653b9211f002e7ecc9f8ff5437139ca0b282c7f25a0918b0e25d316e97b2522a69099d7614d3d1cbe243f84
+LLVM.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/371fd22021c2d6a150e7a7500dea52d7
+LLVM.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/635cc4cfbf7809b679d5fde80b3af72e22dc056778eb7fc66ff80dcb7706809b4819655e922ea3a1b78c913c116444805b05b8fb98ce84c062ea67b12bac417c
+LLVM.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/a60ed90e12ec86fd71be1e7c00626429
+LLVM.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/d86264d3d89c647ba75b919b338f2ce6dc496536970a3459abb88b3850216561603e4ef43e821d64a1db8cb53f27be12a2a462648b8aa8e5478290e78fc1b66c
+LLVM.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/8f6abe2c11ecadc4e7aced1373bab7b9
+LLVM.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/8746bf3261aeefd3af48b6cb277530add2d1ad7988140d0c086c386213d9c7e0c81c04569237d1f6194b6c7636166b78e87d19ab231a6963aa460e557e94ab3e
+LLVM.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/d81588b9e5be65ee8099f5ff063421d7
+LLVM.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/23f35975c450e0ad84617ed61d0a8fbe0c52e2152fb3f2207913e8c9ea4ec4cd7e1ecbfef283ea8c69ddf453e9978f9015965ac18bbedb95b8f802cea2a44c56
+LLVM.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/bb23932f8c246ba09182c9f35c714714
+LLVM.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/78ab81475ad9e62eae7068a7e0def61c9e10e5f1f97182de3fd1dbc5f4925b05c7f239a151035b086ea7bab97e697073643c26d762be63a0da67b1c1cb03a9a7
+LLVM.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/5c84a61bc941cb920e09a195080195a9
+LLVM.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/083fe443b5b8223ebc12e9f3393394ae790f2a9defcb81b8a1ccbbdb9d2631a6e5e8b4f3b000744d6cd2db6bafce18a1919da80bb563b79b7b008484eb5b4997
+LLVM.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/5078269b5db1fd0ba11b9dab936ec538
+LLVM.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/69ac651d6229cf4cdc1360ffd17a2a0b5ee3c1efcb13b70f651c3d58d50e9dceb6c53acf0d1f05fdeacf7361460be564662340b4e6eef82cc2422f8f46eede94
+LLVM.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/96c2dd5b8c37c516a45d30088bc9c67f
+LLVM.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/9f9904cbf0ecbe5e599cf6fee2c689d1b9bf28b1fcb434f98f732af9f02c8ed69fe6107abe12247dc50f7a7f7900a6b5d6e5ea38a0153d4f4e0238dcdecee7b6
+LLVM.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/45ac91912beee2f1a348c300830dba87
+LLVM.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/034dfb8ede8e63d208d7b07102108f1d69e28644d21919d5478e894d42f77e8bcd54774eb85080cb0b03dfd76f70fbd218d94b0d77867f78a8db5ec492d39545
+LLVM.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/82701bc015acc0ab092fac5eb1b7c854
+LLVM.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/1e72b2ed04759607907a9931b8180fbde059f04b1066f4eb3272e16752ab854aca1ce3579cb58853bf87a3ecf2b1acfe14fbad324b5bcfa31824987d45ecc225
 LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/b95ad4844e649bf46db43683b55b9f4f
 LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/15e0996aebe6db91fe58121001aa7ea4b23685ead3c26b5d89afae34b535e34b4e801a971f4854d8e1a1fbc805cece06272470622eef863e225358113a127913
 LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/md5/6d8783dc9b86c9884e0877f0d8ac4167
@@ -256,123 +138,115 @@ LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/54ac594b4c8e7f261034a8
 LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/a43756afd92081e6dd7244d162862fc318b41ca110a5e8be6e4ee2d8fdfd8fb0f79961ae55e48913e055779791bd1c0ecd34fd59281fb66b3c4f24a1f44128f0
 LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/md5/83cf8fc2a085a73b8af4245a82b7d32f
 LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/297a5c7b33bd3f57878871eccb3b9879ea5549639523a1b9db356b710cafb232906a74d668315340d60ba0c5087d3400f14ab92c3704e32e062e6b546abf7df6
-libLLVM.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/2cc8f201245012e7fc0c1294f4568fc4
-libLLVM.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/197d4bb40f06bb7431b7c1cf1a77dd082c4675d04131093522d54c57b65659cc3fbe6e4513b36538c9fa21a367d32fea28eb4338aec83e71ee0f6a3c972023b4
-libLLVM.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.tar.gz/md5/427d8c673733fa63db8f4de0d78f87a3
-libLLVM.v14.0.5+0.aarch64-apple-darwin-llvm_version+14.tar.gz/sha512/5d5be2644b577939d64793dcaf0f726a3df4047afbdb9d10219d5f90cf7328fb204029930d824443335fbdfcfbbc4b4eb60ca37837739cae889ec3512f499026
-libLLVM.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/fd9a3b1ff7f492000d876911faf00810
-libLLVM.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/946821ab1773422352d647c2fbce22587614fddeedd86a48f70684cda9532e9e4410622e5b0d2f9b4ab3d57b521250f6d862659128d00b82ccf02796968d33ec
-libLLVM.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/b960c20452b134d8278781bf7e551c9e
-libLLVM.v14.0.5+0.aarch64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/9536ded18d53817762583928c59bf8358b1a91a40c12ded2501daefdada9f6b49ff0c4f64140426001f967e9bfb97b241aaeee3f65757f7c880b4111a5a3bcc6
-libLLVM.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/e71824586b4b3841d5e953d8ca7137fb
-libLLVM.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/31a357c16d164465b5327e15b6c6a018842117969ee5463aebad6eaa97047bdc2f987e5d6e19b4abae931af8d6a0b2a72b0d61e7ef29efaa5a3bebf8b3acf715
-libLLVM.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/e873ce89f4e218c9f4e8ae1f4ba564ee
-libLLVM.v14.0.5+0.aarch64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/30ee73fb8416aead7913b34f370e05c34bf059abba0e69395dce4d881318d837153acd581304c630591933a577c3f6e733f840ca15334f9bba9e4eb7be1c02dd
-libLLVM.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/0cb43129f009292a9c97ca97484174de
-libLLVM.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/8082bef24cd2add39ab2c92a6df73bdb20f8f06c8a3dffeeda66be04cb15ab1b3649bf7fa074ee91ee05b56966f5de2c35e712882741f4a48e971f870cabe5bf
-libLLVM.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/5e6cbbbef960ce15224885b6b98b1d36
-libLLVM.v14.0.5+0.aarch64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/8aa74cb20cc70947eed4b8e4546060b731b74a25977cc5e5c8a742a489938f83a22a668026e451b71f5fc27df9ec7ede2c13a8b533cf4da177efec70363e024b
-libLLVM.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/07936e4e681576cf40ffdb9f5ebac295
-libLLVM.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/23a5af416d6e3264b34e7ca96b58a663c5fbc0420c05aff03dcdb1fe956daed18c3e3ef91cd752d8eb4c0d34aa04de8b777871b440cf877e5188ba4a67783ab0
-libLLVM.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/841da344fd61441a0ecf6fa16117e8dc
-libLLVM.v14.0.5+0.aarch64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/84d8caf9e4e91147aa9c4404f53c802bc58a01d347230735930fb88c13f286da2759af5555d0370f87cb5312c29515f925f295dc95340d1a0aacd6956541c46e
-libLLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/9eefac6ec375d0b9eb2be10d8dc991e1
-libLLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/59cd21498c42d7d7f90ddd11e66d5fd5f191d2f5986253578db4cb8d95ab65b9964f8ef4e4f7dec411f233862907d4c8a1df8ea342dc0ff1071427c67a43d8f4
-libLLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/826f9e1886770e4637065354ee90d103
-libLLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/cfe719f436b68702d578f10f52577f9fc1a6de022149b31ca6f5d29dab96c34466eac7012bf603bc9ef18d1d539cb9416841bc0d0802f7675874f156b5810b15
-libLLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/5c97a72d41efd1944d92750a8f144d43
-libLLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/00936665142c426d70db050a525fff846b5a46c2ee581883bcf3db72cfa463ef25088c611fab1978dd527c359b19cca1f7d553278c6943f338e491b10dacefbb
-libLLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/88c22a806ada34f9a4523a2a2c45b297
-libLLVM.v14.0.5+0.armv6l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/38b22031fe9b4905ffd6b823d28744d190a96fb6e2853600faf8d0f9dd15329cc28edd2889982c9560e59310abc84257350bf8355e112ba5eb5af5581a407973
-libLLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/02333f0edb98b19a98e4792cf32bf7ff
-libLLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/66598e35337528d9598048866f37c268c5e478280ec4d21160b13b3b4f7e236e19d715985280ed28ecd0a3404bb9eefda521b9e5ec62cb6e1a3fc78b603bcb7a
-libLLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/340b7460cf4c12f4bdffbfdd3965e11a
-libLLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/6f4c94d8109c17a3ff6f64799c3c974664ea0c7c82ea8bb5b409bf4178591e65d64cb2b8fd88f567369f474abd1d70be64d9eeb282e34cf8dd450630b205f1ca
-libLLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/ef131d4728f680bb6c157d5f4e761c0c
-libLLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/f3ad1231aeaca690fb8f563b26693a5797d4cdffa1bf9b2739fed5bcb8da690c54c702d8dfa9306142c9a6711933ebc56b392a0be7abc5358378bc684f6edc5f
-libLLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/63a856670c90a01edeac797f2b868eed
-libLLVM.v14.0.5+0.armv6l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/8ffab1f3981c9112b923e5c20adbb2607cdb3783639a74d9c647008e9a73171d64eeb0a10a7233f033c62dd369e4bc7cc52fe255cfbb75e43aa3313004245ae0
-libLLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/9b0f26a20a5729b40293a9a9c307429d
-libLLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/8ce80037507e4482d1d1b3dfc8013b65e7273232f571c3818df09885d3b263f7064793ffb9b66e5798ccd14202052d5af997c8bc8d4aa938e7ab2e0094fb5ef5
-libLLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/md5/f4de944fb95f237fc26d60139ded29c8
-libLLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx03-llvm_version+14.tar.gz/sha512/41605bedad4d68591403911229c16b773508b49b486958fad3725b32345cd4a12cec9f37acfc3a546d9efa3c3e056351a6b1c15425c6315d655e1b5e68b137c1
-libLLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/ca84c528b4b3e6b85c6c127ec5b3a1e3
-libLLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/51fa75e4fb721fbb86f71539873e5750478d132c35f8f202e374d3a7bce5564d323f7429af667a821e9b5f2920024a271be1fdad3558e7e9077e0f8584e6b790
-libLLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/md5/24d2380a962b296fb6b709b037b6b763
-libLLVM.v14.0.5+0.armv7l-linux-gnueabihf-cxx11-llvm_version+14.tar.gz/sha512/cb5b70c34ed0eb986a9fd71c2fa374ad5f76c24b05dd4993f1ad6d0ef29f596de8876d6f6a139a0cbdcf2335d219210e74197115f4f2834397f0ffd2d2cc9485
-libLLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/md5/14b221971d2f9ee606f4b477ee4ff4f0
-libLLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.asserts.tar.gz/sha512/c30ed18739eb6311ce5011138b8660b2bdbf95991d454371b58e91195ea76092a7e7c929a620583370d0340ea226277eea5b2ee369c8f15a498c693661c8bd12
-libLLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/md5/476d68d958f9cc5bbe6e05740e6cd329
-libLLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx03-llvm_version+14.tar.gz/sha512/e249015ea2b7784979385b25823f7bc94903742d028bf95d248ddc4ba8986ff04f1ef3f742825f380c4e0067246787d3d02a45e806b779ee8c78bee650d18d3b
-libLLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/md5/ffd135b79d31f697548386989fa38343
-libLLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.asserts.tar.gz/sha512/d9d8fad66a1ca3c0081f653450cc6959137dc91b9688754f96043279454585e470ad78ab275cdf4aedf4bc8314dd8167c064abfcd7ed502480da6444486c9109
-libLLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/md5/02dbdb199b0d91d4bef89ef2b1f5c31e
-libLLVM.v14.0.5+0.armv7l-linux-musleabihf-cxx11-llvm_version+14.tar.gz/sha512/00a10c2595093eb975c204765747b4d7aae7d8dd245053442c8e7ec1b3fdf2cac8b08a83a5f8063a831bcc29bba6f4bda22927eb4e4fa58a3ba91781439601d8
-libLLVM.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/750f469fba1cce24e93aca21283c1a1c
-libLLVM.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/85d42b6f880990b5d4a872a72866de6321def7fed979addb095296933928c0d2e8a0c7fe649bd04273a17747e055eaaf2f40c2eda838e0ee295ba97604484a16
-libLLVM.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/ca21345a48bb5b516b253ff8815b1590
-libLLVM.v14.0.5+0.i686-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/d3ebb9fc18a4076eccf992520863dcf098a61c503428358c84437b826331c658dd0a2952802b9c159e80712e7f9f1ac88d58e096e2e68d804042ebbde0616f44
-libLLVM.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/a4fcb1e8c911f4e342f2638f7173dfc7
-libLLVM.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/b2d45184313194f88b43549a1958fdb9a9787adc6e4e7ae6aa659c93a62a214c7da05d279a280680fcf0a071eb5a1e79b6f25e4de4f0de3660fe984c6ddda416
-libLLVM.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/a30e3ea417147b3d0bba1d7113c91aaa
-libLLVM.v14.0.5+0.i686-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/7f373b92070dab240f92ad5f5b242718d57c35a4a5783d6ea815ac7ca9bc99dce646aee39ad537a42861132c6b6b48039c101c77c6520e71ff994bb0862566b8
-libLLVM.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/d97e84ad2c2bbe603f3b9a7831f01d83
-libLLVM.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/2639b77a7ec7771084ee3378b5c22fc17f6bb7177cf8a885052c5cd2def682bb5b25522fcca47365d9ab43b33d2bb6f6f80cf2219d7043cd2162365d6204d3f7
-libLLVM.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/md5/20f6af4e5a2ae5fbfff9ff07f95ed3f0
-libLLVM.v14.0.5+0.i686-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/8a1d9c468efe9544604556dc0654d5be27bcc181c86dc81212196a7fe4b725a0ead6e262a4cbf31de4db4e1976786396453d0794dbc4d34bf9f29c1cdd2ced28
-libLLVM.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/03f7b6a9a9d5ebf479288d34727c417d
-libLLVM.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/c2d5f4e71a2fac75ec49970525fdb16182aeb4126d174387864b799b626a1756adca1e9a0a92622563a4ea04b1c29df142023f3a53112446ef9a7ffc36aef19f
-libLLVM.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/md5/63576dbc548f8ba965f4feed268c25d8
-libLLVM.v14.0.5+0.i686-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/b0f79cf29aa41a0efcef0cc5f1a6053d64a253fa6add98f68afda57f6f6d86c3a55d1faa7bff7400ae25111c25fb0f694aa3307e96ff20e6fb010dc2d89cee1c
-libLLVM.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/e38b074a91de872a93ede23ff6b1cdb3
-libLLVM.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/e2f3d07d4853d0d2564e4fcd3f3faf1904bf3716acba49635d2579345d0562a3dad5f51dc7a96420a4487d47cb134790dd6fb7e5a7e11a6daf2641639118544e
-libLLVM.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/f016364d6f7baa3166ed432dcb413b37
-libLLVM.v14.0.5+0.i686-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/f2760dc08a6b3b3ecb29f17c5717c2b419f0d0049773dd49c1d44fe5d6c8c440c934a147223f4785ec646dbca07a042feadddb75dd5d85eb5a0e43dce19b8228
-libLLVM.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/39cb218b4a360f7c613bafdbfa82a8b7
-libLLVM.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/59bacecbc8f001263febc00727e5bc4a5ef4291072a8082bbcfc1412c462692bc72b7e1c34542b5c20355b6b3945b640217bea17150ffdb00ab7cb4b0a6bd5d6
-libLLVM.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/62d450052f111595c418f5153b3bc868
-libLLVM.v14.0.5+0.i686-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/7cceb681e7a88cdf780d57235b37b4dcad7737b0679fcda4132966e7772c2f4b32c726fb199d093d8e99b349482946373041a5671fede75f3369ac7890cd3356
-libLLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/2fec6f89d79e060dcb7b73b296fc588d
-libLLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/34bd288353bf54ecbd721c54642aa75328a07383ffc4cd719f228b0d5bfc7d84d714a51bff821bf21ea5e80f617b7c53f6d01bb354ec5bd071a111fd865490b8
-libLLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/bf08fe83e1ad4d26e03aa80c32c74192
-libLLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/c69aafd4dd26093b39d8010be84d5ff4bf84048e3e2864e30a4b0db981d5151c4700a280ccb34839656e763774c29c76cbc64624b6accb5d0e893a35fa18520f
-libLLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/fd8b224b8eecbb63883bfd1eaa88c69b
-libLLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/f58420be7cfb46a2a88e0b3242c739d887eefb662a29021e33d15a526543be35ca840ef9d8042208b1e3e63ac81607f2a5d4f3c96cb07c61fe9fc3a50e91f541
-libLLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/81efd480f3327e8e05ef9e27dfbeccc4
-libLLVM.v14.0.5+0.powerpc64le-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/85494ae571edf905efe885a5580a195529adfa7f40dbdce7f8f15eaa118ab51ed3bcd9234257d6efa1e2955a94a09d463d401b3b05efae67f7e6b9737b331d06
-libLLVM.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/md5/a40a108ccc978db0434ce46e3747bbdf
-libLLVM.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.asserts.tar.gz/sha512/001739e2d05abb34fa36d5aa711e4a0bb39899de40812cab6f64d6429a47dd84e35d897bfad80dc725e635837878bc16c9c81c91f16bf733fe258b979de852e1
-libLLVM.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.tar.gz/md5/949204decc9b0ab572d1626a3b3aa3c7
-libLLVM.v14.0.5+0.x86_64-apple-darwin-llvm_version+14.tar.gz/sha512/c2582add149aa6d4b3534b24f479ee3a5e842905489682692774aa9e6f46162f8503d904ded137e1b69c2f826120621f80f4416b834e79ab1f463f71914128b9
-libLLVM.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/md5/959660c304ec416754b5e47de1e4c27b
-libLLVM.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.asserts.tar.gz/sha512/8d4d118bde3dd51c43d151a4f1a4216f1d9479ef673c8e3843ebac123a64e6b52c226ec6b8b2ddc84952ce31f7aef9aa5a8d5b4b70655aeb0fdc025501eb7132
-libLLVM.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/md5/a6ae95b57027ac1ce833136d531d8443
-libLLVM.v14.0.5+0.x86_64-linux-gnu-cxx03-llvm_version+14.tar.gz/sha512/40cb32590189202801c9e6f7ce75cc07eac5d8352e790cc25b76518481a3d7195481d31289f224a60bb3ab7747d0590342bb2d285c9ad4ee0cb069d4a7ca8ffe
-libLLVM.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/md5/5d43e6a08f991c3d6d98c2a38d5287be
-libLLVM.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.asserts.tar.gz/sha512/2fa5dd67c1bc8f8a5cb7ba57c0cab65ceeaca0f6b5313a1280031985a5d73b1083f7f632f6e08d56c5b6a6d578e970816182f39f9aacccb175b2767f84766024
-libLLVM.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/md5/09cd154695e6073520b538e1ecb49e9b
-libLLVM.v14.0.5+0.x86_64-linux-gnu-cxx11-llvm_version+14.tar.gz/sha512/945a89d9d42f0b0a6d1f5c4815690ac470c7e1646bf108cce46a7bc6da3bb57ab6da7c1c49ddef1775d9d46df7ca4632ff882e319b2abb5549d249b7bb587af0
-libLLVM.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/md5/d98001e73109ff4b0abd50c070d25f2c
-libLLVM.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.asserts.tar.gz/sha512/e508b2a7a44a404a43e58d78059c9334aab48bfd5a43074af85bce9fc094f3959efbc5a2cf1d296a0372488c4a4d461e98a46bd31ed4260921c2cda8431e8933
-libLLVM.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/md5/aa6219a3b89f48e13801cafc1feccbf0
-libLLVM.v14.0.5+0.x86_64-linux-musl-cxx03-llvm_version+14.tar.gz/sha512/cefe2a791db9881c0abb812a67ab8f306059c7e774d356ed97fecc28c0945bc98fb12ad75159894d7b054bfa03d2220b6d5e4e5a2cbb04258240d54fea050c08
-libLLVM.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/md5/e5102d1ac70c8e0cc6f4d74fd5b8aadd
-libLLVM.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.asserts.tar.gz/sha512/3f26d16f56b559923be698179ce371622fd0bf6c9be3b6e2afb015b3d1c28f657cf0b26922d3308e46304092d439587b6ac0cc3da7a43737f49bcc7deb048087
-libLLVM.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/md5/0ed2bd92af1afbd6c78a8f10b9f7b491
-libLLVM.v14.0.5+0.x86_64-linux-musl-cxx11-llvm_version+14.tar.gz/sha512/d827c670425ecbbcef49ce5cdd21884e3be4c68e4071dd43e5a1c69a2abf9f6e3da097cb5f683a1a950149ad5bcc0b54789f19d4d7a31df840f808fe0ed30a84
-libLLVM.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/md5/11fb6856c4f92deaa8ce6176df3566bc
-libLLVM.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.asserts.tar.gz/sha512/778ada97011d5baec9730439f67d458ba85d0ab082ee49d01ecbbba20679fa6e379edcd8a2ca5df3ae4ab545713195426312ad1fc3743b045c1b0a5d5070aa8d
-libLLVM.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/md5/93e6157d202c8fe6d4ffa74a4a2cd69d
-libLLVM.v14.0.5+0.x86_64-unknown-freebsd-llvm_version+14.tar.gz/sha512/aa294779422101fa6a13e5cd8f39e3e3eaf6dbcb2463b822474c0b2020ad7123ab8342d2987e618dae82f759a49cbbbf0e1c4b2c70019bf5a76c670183e153d8
-libLLVM.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/md5/c18e4de13d472b54558223e9937b8071
-libLLVM.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.asserts.tar.gz/sha512/5a76727b23556bd4a9f3820baf8484983c276baf28cc522a4425416427d26af979b1713c2a7ab11c133b93710f611b53e53ce9f706ed8f71271f48cbbdac4f55
-libLLVM.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/md5/6c1eb53673344af72a2467f1c7d7424a
-libLLVM.v14.0.5+0.x86_64-w64-mingw32-cxx03-llvm_version+14.tar.gz/sha512/890292fdaaa115ebccafad34847bd74bf7d54169a4f27839bb3c3774815830e91a96aa84c0827f9f3e85126515e7a8242023b177cc92e845cdde47baf2d3b71a
-libLLVM.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/md5/c2e127947a466a4ada5c4d7db2e22b94
-libLLVM.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.asserts.tar.gz/sha512/b3c660fac5571126ea95187476b0e2862cc70808e1e6a785c258e0d4a9c89359a729d7e4e9cb0be26cd1cce69ff4275b7525e34aa08bc2357ea63cf52b2b6cef
-libLLVM.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/md5/85da494d3713899f158e1beade949639
-libLLVM.v14.0.5+0.x86_64-w64-mingw32-cxx11-llvm_version+14.tar.gz/sha512/c984d10549138b2dcb85d433ce3d9d63754a5eeaefad9f6f72cdb0287a99e400857c8678011ac07732954776cf0c4438b0b39b21bcaa5aa91b30eb3b2cd739b1
-llvm-julia-14.0.5-0.tar.gz/md5/c7df1a3f2cc19201ece78996582f43ce
-llvm-julia-14.0.5-0.tar.gz/sha512/51c61d842cb61dab74df6d7263caa8c91e7b5e832bd8665cf40b3c2d8191a8c9665eb8b5ea1499607b6fba9013260f6b087c90ac850dd7e66f5fd37ebc407d15
+libLLVM.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/aafac8a677723e932e6a4cdb6bbb4411
+libLLVM.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/b584923d317db14a72434ec9d684a34e0c7df2c9b88fc4f005922784fa0fea6975894fd5aae3842b81dd7c7cabd34ecc8faadfa0b808bbb582c7eac0ca94ebd0
+libLLVM.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.tar.gz/md5/d890bc7254bf75563c79261b35feb8fd
+libLLVM.v15.0.7+10.aarch64-apple-darwin-llvm_version+15.tar.gz/sha512/55a6bdc18476cb2b98b6e9370da8efd519c2a25624eda32fd301cdd3f9d9863f4a113b631536b6e982d551bb3a0f3e8cdb690699ef0513926ae5c86f65db1f8a
+libLLVM.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/f4c53b8cea77f992d3c24088cf0c49ed
+libLLVM.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/2b61db1a554f33c5f46b00f4032c722922755267de8f05de04165907329b6b4e33986255084699291be8a2faa7aa5e36aa0bcb5eec2aa324cb8c16e451518adf
+libLLVM.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/52c355a71a585f09aef28ad4605cb51e
+libLLVM.v15.0.7+10.aarch64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/3a010828b7a6fb51101cf08a74702d2d3d5d85579a3e23a91dff1b3622ca6fd8f04948cefbbf74d03e3cf7955b566dc439f04ce73ba77efce9d8c8e1cc37d790
+libLLVM.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/bb96ff22c46a5587b066e5994c1d806d
+libLLVM.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/d643d142777122cc745dfe853070007f70abcb32a9fe5bdaff3ca0eae77f5b774323ccf1c51340ad2f18c72fbbf83bece673dde0c48f94b1ee4204a69bbcdbe4
+libLLVM.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/9a68655b71b38c61b59959ee890199f0
+libLLVM.v15.0.7+10.aarch64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/1b9e20d00114819029aad078aa201a1bcabbf5a5b66bf9867019479180a37cc930cd50127657f87474ae045d1ee808298378e3ac2b8079492a8a3ce4663bdd60
+libLLVM.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/ad4fb8227d147a16b19442b194a22467
+libLLVM.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/fb9c1184de6ea663b3c1b20ba5a5f65afa6bd2a0ea70472db8dd15246b7f0cbe7a0d29eb3dda66d969a6a9b48269f34c200431279e2215760f64e74de6f865c7
+libLLVM.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/8981bd41cb59d4680fff276a5cea6b17
+libLLVM.v15.0.7+10.aarch64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/5992cce6b5bfd38f62b3b370f1ac72d106ffe42c075d26cb6462651f4e9ca1d8bf8a91a254ae4d36b656643f5926229d5ff6fe96e1b31151c7e96f85160a2571
+libLLVM.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/f25e87b0d1c66dff45971bb6c5d2a6d1
+libLLVM.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/581ea7266aa711b355f3a0c676bb3e7bdd33e0297e24afdf420bc5e2ac9cc4bd9330150b8f49af977c9fbfad3c0987bf6f901e93743c844a3a1ee3a4d7902717
+libLLVM.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/a6a0cc01bdaafc1f34d35453ecc89fd7
+libLLVM.v15.0.7+10.aarch64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/cea08396c6617a6ab8ef8749c4af9d15b60171462fded556726f35dbccab888b763637002987f95dcccead210f70c947458068c9ab1fef696cba61334010ee9a
+libLLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/d0694876f0a4af8f738b1ef5b15b7483
+libLLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/9dc388988ad1a05212e7306a494a8800be76378bb797c01bb9148b8db02f6734d7bf74b1ac2d19eead331aee4121c0eeda7e05635196f5eb44ae4fada1e8f2be
+libLLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/92868cf79a7e9af62d91f4d1418aad4a
+libLLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/093ec44f15ec8f8cd855f4c56a70f51b244837cabff3804cd8d5513b3ba7a1ce4d021009a2cd1e484e8705aeab2af5c3f470af2d03b9c21cc3619ba3e5e80621
+libLLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/9ef52c4ddb07b6edb399fd6050af7067
+libLLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/e109335b53f7bc98dc837c4f90c085ba37864ee9e4c9b8c0888b6f00c5f083a2639e3af6c4a29287b3e9b0da6af555832b1cd4a8a514a963d8f45458d70bc291
+libLLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/894ceace311aad57cde915bc4f7e3068
+libLLVM.v15.0.7+10.armv6l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/a075d62dbcec46612ee7b58e00698a8aabcb53705396e8aaae9488517eca34c52bdb5daadd6d21ba721f87a0a46b79bfbbf2268e545020fd60bbd8222249f939
+libLLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/3a6cb7ab61c9739915ff6697ca6584ae
+libLLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/7e50687474b0cf3b1d01737b82d5d8076b2559e43a3fbdd9fd376d2f0e68cfb6535e458d7b8781a4a0535f00f28b3b29d0fe390cdceddb84ca4bfd360e814b66
+libLLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/3213bdb23eacbbf1dbb21e5758c038f1
+libLLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/51fb41605c5167486c866885eead1c95ccc11e9b96ca222875fc0965fd97c020afd11aea52abb883870138d5029434cdc5a27d6b5fd5d7966a61a9a11c5d1fe0
+libLLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/0ebd2f002adbfd00a1a39959b45dd4d1
+libLLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/01793b10cab22b3a78c53a9394655572696b2bad368d0f9432582109cd363432ce709c2de04130395ef66453479f154cef1d6a2249340ab6391f85ba4111ab1a
+libLLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/8ff3ccd7327e47d1772f6a7ed2d33bf4
+libLLVM.v15.0.7+10.armv6l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/cb01d95f48f41e17b3b4c6549d5fdf5b1c6c8d0e38462796eef6966ad75a81e05a829ed732b189d34e2d3af7f70dcc2530b6b2692cf151f43d804c88a1ce466e
+libLLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/dd9e9d296953f7cf32cbe39b519c9e59
+libLLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/e2e3d398c83bf3b7a0cb084577493455ce24ef551edc59297761a8df4007c07e6a7e3727a33f3a650a69639ba3d5de96051fdb6230a985a907582db3ef0e5d2f
+libLLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/md5/62df2ac64bb8195960a93cf1f1fee8b4
+libLLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx03-llvm_version+15.tar.gz/sha512/ba01d9e1e6ed1096a04fedec5f319497620a8df655535104726a1497f27a2df4766955d4ac1e161296c313bbd5f0cd8c2d524c41aec234a8c38a1717caf56a63
+libLLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/69e44d007b7f44fefb1622533e2964f9
+libLLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/9d934d951c3e67c6f008438db3159f7a4628b22db25e5f47fde21669120ae766bcea839adfee748a9b1dd7f3f3d758e88fd7bcf95e0d9289d378062805bfb954
+libLLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/md5/0a8eb8bac53734bec7f153ec6a46bd34
+libLLVM.v15.0.7+10.armv7l-linux-gnueabihf-cxx11-llvm_version+15.tar.gz/sha512/1ffab09538ec5267840d4d74f8b9fccc1213707471b19ed7a45789da49bcda2d78a4da0d6954de828b444a33fb020b771d3453fe943334ee12af62984a96b409
+libLLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/md5/320247c2e5d1bfbe966ff93e194bc6ca
+libLLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.asserts.tar.gz/sha512/4b38a64de0794befd2be7256419de25eb0be7642db922f15dc359c1ecbfd647e3e1e75f0671f71e66455853af34912dfc2960d313f269d9318040583d48d05e1
+libLLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/md5/1a15e742f9109325842b08296b900d73
+libLLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx03-llvm_version+15.tar.gz/sha512/1c3844c7eb23b7829bdc9d5928199cdaff42e98ee0a931912e9931724e110b965e7ec5b8443c3af79f4694397fa5093786e7e7257344df895cbb22cd827df86e
+libLLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/md5/d88d9d0e9eab5681bf92c91cf5d8ddde
+libLLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.asserts.tar.gz/sha512/a956df66e3a71fd995aeee43cd2ff8838c346de0a12ea4ec849742bf2f9530e9959a316b16c4ecdf184d5692293125309ac7668d2992f81f17b207657e2c1bf4
+libLLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/md5/1c0ce845334af339b4e797af5bda727f
+libLLVM.v15.0.7+10.armv7l-linux-musleabihf-cxx11-llvm_version+15.tar.gz/sha512/8be9c57a67331cdd5dbcf9466bd2346d21fcb100beae0e2027961d8b7b57d26ba03e785dc701f2cbbff60cc298e6aff54a8890fdafe4f4e505d3e91b8ac6adcb
+libLLVM.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/2bbbd07363fda9e70bfce36a6c5ad6eb
+libLLVM.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/0e4be179ecff5be8a715b744ac441a73cd742d98ee7ac521de4cec1d150fbb2485c29138d6f489289c7a15f74b5a68418133375162caa324a72f4ab1f95232f4
+libLLVM.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/dd655c71803ffae63e89e44f627a1649
+libLLVM.v15.0.7+10.i686-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/467974be3051359de81e2d08c7895cfdcac9ec9d6ba850c7515915b2358745795ed2ddd3c5cc12744942fa72a0eb28ce5f4ad4f622106d4a19f9e289e5357a27
+libLLVM.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/79c051e8492e8c6696f356bf450c641d
+libLLVM.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/da679eea0673b1c117363785147d08f372e3be95c8e00bfc5277e662784be3232a1a66c1552c9bb538a5343fcdca0f7a4131805acd239216b26c05b81eedd97d
+libLLVM.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/7d910217dcf88c9c59d3a1c4383c5067
+libLLVM.v15.0.7+10.i686-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/10ad390312d937f40e05c3444509f8adfbe0ee8fb6829d35a9a8ec7736106e75d6d5f48b9154e140e4cb59ce4d659dff624ae4a3f6353523d462f63fde4122d2
+libLLVM.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/168402080c3d56f85a298bbaea1c1b17
+libLLVM.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/27c29f50968c01adca981939bf1aba2010341ab4aafe60c4554b954209ef04e793a572404028a63f51ad9ed962bc72540f2270486b9a1f4501535fb547df11b3
+libLLVM.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/92a55a6891a199d49b1d523a3fb24766
+libLLVM.v15.0.7+10.i686-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/82e52b2b177758ee8c410407c6dc4a5f72816e2b97f9c971b6250e27d9bca1ebf73c5700dfd67c648a0ab0a48c48d6eabee0ee9bfa820ee8cf4fec64a708fc43
+libLLVM.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/441231777c430560297c930f1e41191c
+libLLVM.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/6ecbb8a014ab4e9ba5c930d81020d9ca47feb2105335e874a5f46a08aa0a2d8ad3622434a21d4361191baf5868e1fd6b9a04a69ada27d3ec1dac4334e92cd640
+libLLVM.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/5418d900f7eaecaa8d0258a498757fda
+libLLVM.v15.0.7+10.i686-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/fb25c480cc1826b3aa4a4d861281b6e701b1c792ac4030ad25265ab9510a159994a74aaf33d20fb8ed545526ca11eb693ae7cb0e79499d3248097d39f9691073
+libLLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/06b1e8271b1551b32c1cdadee06c40c1
+libLLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/7c8c10225e9cf75a6b23317e8a277cd548e8f2de41ccfef91cbc66338f94ba59c520c7fa3b4baecce8dbca6ec3e0f241aa09f898d44b1b459b738b7c243f636c
+libLLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/527a5f29eb6141c2ae43c186cd6c9f2c
+libLLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/466c863bde18d1ff889379ff18c79b7566b755a8207fe09e7a3faa770de242345e781aaa446bd2b656136905f0d2462f9062061052d0472c1079c966a03c68b1
+libLLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/bc24a37e40f45480aa5e458f022cc7ce
+libLLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/83121dad17e11c6bb2984ff1e7232ad77da362d3699f7ee1c15a8778d21635d87d89d483cb5848034552f23c2448f47c05419247a326c1b843eacd6cb9e99d0a
+libLLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/0b0ee5ee232cf0da7ad227609d1c09d1
+libLLVM.v15.0.7+10.powerpc64le-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/8465b942c400e7a811bd4d46596ca57bcb5ddfed80358f7256d5671db3f797342e9a8ca800d74785874c8643c45367c952f4109d63cebce4cba897db3671e0db
+libLLVM.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/md5/558e9e1ca9306d382a32f7fdd076a509
+libLLVM.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.asserts.tar.gz/sha512/7d1d0e8366a3541edeae36ef0a198db9bfaf7eaf54064c8b3eff34fc5efa5628d41001a7a94cea4ba6f960b64ab6849780ac79d25bfb5e30fb7255439c2fa012
+libLLVM.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.tar.gz/md5/8c5afffc8d684d7fc42ad46d2b3e0eb8
+libLLVM.v15.0.7+10.x86_64-apple-darwin-llvm_version+15.tar.gz/sha512/7a857e2b4842c2520600b245f9f3aa44e5206959fd013ab36ca2a9274862f04f5953d460e6024ed572f1ee9099d4afbc74187c310546476e6e3763cb4a68ac14
+libLLVM.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/md5/97b51e70fb151f680d268ef348f4e8ba
+libLLVM.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.asserts.tar.gz/sha512/5da888921c4bd3bb801c136c02a37f8866e468a02e7716290dc73ba85cfc3adb42c66c881e7bc55366b3d98f4190aa8aa10c5477464e1f53b6c3dbffd9b396a4
+libLLVM.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/md5/c505b9ab36ce3f2c3f22a03edd8c17ad
+libLLVM.v15.0.7+10.x86_64-linux-gnu-cxx03-llvm_version+15.tar.gz/sha512/c69f17c49286c352557bf2d5d34a6872184558612f435d0cda2b55892d3cbad91862f19cd0dd2129d5e8ccf6172127445b17a5d9b7c8a136abb15175f49b1c78
+libLLVM.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/md5/9aabdd2b8c2c448fa346727c89468d93
+libLLVM.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.asserts.tar.gz/sha512/25372337eb7b55bcd51407e9ee202d55633786b6742e38ad12677b0939c824bbcb2e10cef0d6f1bff6f4c0ad8b461c30fde862daf4ddc8a4bc4ea3d91436d4e7
+libLLVM.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/md5/58466a504b06dbfb8dc48c15184193c8
+libLLVM.v15.0.7+10.x86_64-linux-gnu-cxx11-llvm_version+15.tar.gz/sha512/d1d76bdaf3e74a1003fef3a623675be7a9ec0c64f7c5a0c88e7da0bd0384791b39e46dc297b3544f634a1820ffd051a3843c2e91d9f53319609d66542ea30e23
+libLLVM.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/md5/6b6f0d707452691ba0c3504616f2548b
+libLLVM.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.asserts.tar.gz/sha512/6c4bad2ab972cc89316f246b0af6c1907650d17685c0923ca2f23144aac7ecc4fdbc29308fcb76d1c67ec99b6148cb0e48f49319d62834c8cbe750a68735d604
+libLLVM.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/md5/319a680fa35212f8327c5f3cf8348241
+libLLVM.v15.0.7+10.x86_64-linux-musl-cxx03-llvm_version+15.tar.gz/sha512/c9ba68a75a3491320e9e7c6eae378ce30cb504450fca5e7e3953175182ba4ef7347fc5aff0219617200b508244aa8691427469cfc49a230ac70a690504ffee6a
+libLLVM.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/md5/230bf82575576a349873550a4e3f3f40
+libLLVM.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.asserts.tar.gz/sha512/1a7cab070cc58b5bc1d5c3397f0218fb7c24758d0a1b0e1f8ed3baf71e45b3014afc6c0ff94b3f9c8627dbd62603240a320a84888b4c69df1cfc485e16c9c1f6
+libLLVM.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/md5/bce28b0fde016756db056611d119b49a
+libLLVM.v15.0.7+10.x86_64-linux-musl-cxx11-llvm_version+15.tar.gz/sha512/8247dcba4f31dcc22d9771b3c5e6e96b29c5019df24a5b3f026c88a822877bbab75987e512108523951ec938b47113b0f6c161067b2935e9fb5735839b18fee2
+libLLVM.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/md5/7ac5d8081c414e6ca91bedb0f5cd5f78
+libLLVM.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.asserts.tar.gz/sha512/86899309b86b542dae0812e4ba78ba9dfb7dd6c5d9a9db3fb868d78930a906cd70e2dc63bcadf8a085b0fd5690af1df80c0cec9df3c1dabe2c312c049defff39
+libLLVM.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.tar.gz/md5/b5c2edca868b332968d63d161ad7a0fa
+libLLVM.v15.0.7+10.x86_64-unknown-freebsd-llvm_version+15.tar.gz/sha512/352e5abb30d78cae34d10d25f3cc0f856d79ff034a8905610ae6c25944216d3dd78444593f3b1355a7e541fb47adcb88df312ffb374604638cd08d3105c7dcca
+libLLVM.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/md5/3a818c98e5b2dcb086a58390ef61d82d
+libLLVM.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.asserts.tar.gz/sha512/c238fd4f9cb573f5b47c3b71e3d7b7a26457267a049f318295f3e2ab2498ac6418d79e0734dde6dc47df859b670d8e8588ab29dc47fd7bd6e4f49db1c4b252b6
+libLLVM.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/md5/3533470b13310d9cff7e7a7c3619d921
+libLLVM.v15.0.7+10.x86_64-w64-mingw32-cxx03-llvm_version+15.tar.gz/sha512/89a533d3485dba9611ad799f9b3f26af83ed5dede874943c230236a09c2e6ab36df167dbe34f7121af5f291d0720d0975096df46b5f5db34c9822602654572e5
+libLLVM.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/md5/bfa2faaee4ffa22513c5109e7c76e788
+libLLVM.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.asserts.tar.gz/sha512/dbc995dfcc6da651c990b2b9254aab4d46a8dc05fc2961d5fb7199fc9af3b44d0e90b3206f605d9f0e3e9acfac6b534c3e9e5b8d05f0dd17f5c3c664e72c6d7f
+libLLVM.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/md5/e92c1827d1e7b815675740bbc5000cde
+libLLVM.v15.0.7+10.x86_64-w64-mingw32-cxx11-llvm_version+15.tar.gz/sha512/b098df199c1f30b0a5084c91af8cc444fbf9699474ea78513489a5cc434f8235952359f0d769871c7a8b344216c8de4893c517ec7a6b12c4098b585cfa67666a
+llvm-julia-15.0.7-10.tar.gz/md5/e74093b6e760645ca1567aecce27d463
+llvm-julia-15.0.7-10.tar.gz/sha512/9d2b466c0368996bfa825cdc4fb8e00d0cd43521761946b310c8bc6fff461771f4454bf0b7f8be44d723462a9a2c3721504c4250a3fccdf969ffa789ee40d058
 llvmunwind-12.0.1.tar.xz/md5/4ec327cee517fdb1f6a20e83748e2c7b
 llvmunwind-12.0.1.tar.xz/sha512/847b6ba03010a43f4fdbfdc49bf16d18fd18474d01584712e651b11191814bf7c1cf53475021d9ee447ed78413202b4ed97973d7bdd851d3e49f8d06f55a7af4
diff --git a/deps/checksums/llvmunwind b/deps/checksums/llvmunwind
deleted file mode 100644
index 678ae7b0c3fc4..0000000000000
--- a/deps/checksums/llvmunwind
+++ /dev/null
@@ -1,34 +0,0 @@
-LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/b95ad4844e649bf46db43683b55b9f4f
-LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/15e0996aebe6db91fe58121001aa7ea4b23685ead3c26b5d89afae34b535e34b4e801a971f4854d8e1a1fbc805cece06272470622eef863e225358113a127913
-LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/md5/6d8783dc9b86c9884e0877f0d8ac4167
-LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/sha512/d3b0c81498220d77e4f3cc684fb2cc0653792c381207390e695ac30bc74249f96a333a406b2cebdaca14e0b0a27b188cba6209bb5c1cbbb5c184d5626dbdc7a0
-LLVMLibUnwind.v12.0.1+0.aarch64-linux-musl.tar.gz/md5/052a35e879d52244e4b0804be875a38f
-LLVMLibUnwind.v12.0.1+0.aarch64-linux-musl.tar.gz/sha512/d1b34fb97f9928e046d3131a050454710a93d38e60287b7e3c92f179f436586d3230cf90b0ca0eb8a3f9ef89fef7b1ffd7d52871645dfa233a8b07ca87ea2ee4
-LLVMLibUnwind.v12.0.1+0.armv6l-linux-gnueabihf.tar.gz/md5/1ad96a03a5dde506b5c05773b1849ec4
-LLVMLibUnwind.v12.0.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/82306fb7b920fa7c71bd53b23d6915e7f256e8da9679cc926a53bb0d879f1f4469f43efe556ca32c9ef59e27b435572c7b39859090652635db4eeefdec0d1685
-LLVMLibUnwind.v12.0.1+0.armv6l-linux-musleabihf.tar.gz/md5/6a24fcd3a4dc3b1a98bb7963b1bb4930
-LLVMLibUnwind.v12.0.1+0.armv6l-linux-musleabihf.tar.gz/sha512/9ba6b83ccec061a1e5260c807dc8afd6e18799431b25a7e65b97662cc4db02509d02ea07fe12025d80914cec7383624b1c8fc9add46511c668e184ede263ac52
-LLVMLibUnwind.v12.0.1+0.armv7l-linux-gnueabihf.tar.gz/md5/09f1bfcf58a4124561553ab5005f9538
-LLVMLibUnwind.v12.0.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/b0907cb857131183ffc338780c6c6dd1d48bf0ba61c3da1b8f20cf9a943373173b621cf9b2e8f1fbc657059a896b84aa025e6d4f0f1d1e8b623fac3e96541765
-LLVMLibUnwind.v12.0.1+0.armv7l-linux-musleabihf.tar.gz/md5/19158bcfae716b26f924d67c4e719342
-LLVMLibUnwind.v12.0.1+0.armv7l-linux-musleabihf.tar.gz/sha512/a90be57990b6699cb737ba96904e94e1f082601ca9d01e670f025b5500f526980741921c9cf672accab78cb5327714ab6ecdbb875174088f0773ebb627a98819
-LLVMLibUnwind.v12.0.1+0.i686-linux-gnu.tar.gz/md5/ba75556eb96b2bcdaf73ff68386d3bc3
-LLVMLibUnwind.v12.0.1+0.i686-linux-gnu.tar.gz/sha512/612fb765695b7aae11ef29608eedf8b959f60c021287a67b03a2a0f57a5814001ffa9b261c9d60d5f3d0582c06c2b41f75fd3afb66a045a248bd43d29e304c97
-LLVMLibUnwind.v12.0.1+0.i686-linux-musl.tar.gz/md5/2fcbceeb1bfde29be0cbca8bb6718bfe
-LLVMLibUnwind.v12.0.1+0.i686-linux-musl.tar.gz/sha512/58f281cfc70b3f8a59cf4faa7732824637c811ddc5ea6a058f294f4c3ed4fa6c8ddab5c007567b439f2854635cf4fd146284059bfbc73e7006000ced9383f705
-LLVMLibUnwind.v12.0.1+0.i686-w64-mingw32.tar.gz/md5/153c028d97dceb6924414a7a9a137e1e
-LLVMLibUnwind.v12.0.1+0.i686-w64-mingw32.tar.gz/sha512/7ae1f197600eabde9036ae58623de34a6d25636d7861777e324eb97902f65e26c6f3775e757178f8914b0cb6c2e925413f5ffc6abc9b6138470dc9e67a17f212
-LLVMLibUnwind.v12.0.1+0.powerpc64le-linux-gnu.tar.gz/md5/c08a6cf3e1baf156eb05003ed4e9ebe9
-LLVMLibUnwind.v12.0.1+0.powerpc64le-linux-gnu.tar.gz/sha512/f74e44986622329990842cb3ff549ff9254c81863d8bee468b0e58b7621067e7e7f7f18e4cbeafad6a05e0c107323de6828a78dc7afbcd7cd1892383ff417968
-LLVMLibUnwind.v12.0.1+0.x86_64-apple-darwin.tar.gz/md5/caf151150e56827be09acca6964d2b18
-LLVMLibUnwind.v12.0.1+0.x86_64-apple-darwin.tar.gz/sha512/cb3e7aa71367ec4a115bccc2e8ac6bd5d9f22b3935b3889eee1fbf7303c5f553d7d3108977bc1f6c9b6917a6ed9e10bff211fd56b8169233ceae287b112894c2
-LLVMLibUnwind.v12.0.1+0.x86_64-linux-gnu.tar.gz/md5/d95874cbf6f8b55bc314c3968a6a4563
-LLVMLibUnwind.v12.0.1+0.x86_64-linux-gnu.tar.gz/sha512/4986a8d9cc9d8761a99a4f02d017b424484233d4cbe2d4f49ccd371591384b1b8d1c4d31cb908505b86b00f2b164568e57751dd949d91af203ee4a582971798a
-LLVMLibUnwind.v12.0.1+0.x86_64-linux-musl.tar.gz/md5/89077d871e15425b1f4c2451fb19a1b2
-LLVMLibUnwind.v12.0.1+0.x86_64-linux-musl.tar.gz/sha512/b65a218b05ade2e2d1582188897b036a4596d09cf65558f178c49c1a1a62b7d992b1d99fbe86a027dc83b614f178e6061f3dfb695b18a8e2b6bf76779b741d96
-LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/54ac594b4c8e7f261034a8829dad5e34
-LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/a43756afd92081e6dd7244d162862fc318b41ca110a5e8be6e4ee2d8fdfd8fb0f79961ae55e48913e055779791bd1c0ecd34fd59281fb66b3c4f24a1f44128f0
-LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/md5/83cf8fc2a085a73b8af4245a82b7d32f
-LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/297a5c7b33bd3f57878871eccb3b9879ea5549639523a1b9db356b710cafb232906a74d668315340d60ba0c5087d3400f14ab92c3704e32e062e6b546abf7df6
-llvmunwind-12.0.1.tar.xz/md5/4ec327cee517fdb1f6a20e83748e2c7b
-llvmunwind-12.0.1.tar.xz/sha512/847b6ba03010a43f4fdbfdc49bf16d18fd18474d01584712e651b11191814bf7c1cf53475021d9ee447ed78413202b4ed97973d7bdd851d3e49f8d06f55a7af4
diff --git a/deps/checksums/mbedtls b/deps/checksums/mbedtls
index 723b9012bfe00..11ee2786abb98 100644
--- a/deps/checksums/mbedtls
+++ b/deps/checksums/mbedtls
@@ -1,34 +1,34 @@
-MbedTLS.v2.28.0+0.aarch64-apple-darwin.tar.gz/md5/ba33f960c7bcc3fda818c84f5e716df7
-MbedTLS.v2.28.0+0.aarch64-apple-darwin.tar.gz/sha512/3878531424317954417d09090b0a7618c6c0a6907bb04db34aef37d55a033972371455fcffca548ac03be41c0b0d1f8e51a9fe6e8f8fb4d8ef4fcbf91f15b3ea
-MbedTLS.v2.28.0+0.aarch64-linux-gnu.tar.gz/md5/9e7c78fc7c39fd19dcb170d57c8c0ec6
-MbedTLS.v2.28.0+0.aarch64-linux-gnu.tar.gz/sha512/59eaeec1a772265e62fa4049e0bc8c96cd7403d954213ac6098921acf6e128b624d6bc1ba5c6062c88ecb92aa8bf9d0a06e365eee241b6516ef0bfe2b4c47188
-MbedTLS.v2.28.0+0.aarch64-linux-musl.tar.gz/md5/44f939956834d5d8130ccb3bd5962b0c
-MbedTLS.v2.28.0+0.aarch64-linux-musl.tar.gz/sha512/f9797a44851222c005fd4068df6e0bcee68133c9a48e19e16d188b8a6927be56c620fec83264398d682eb5c89b7f01683e5898d3cbcb7aecf53e5ce678464db6
-MbedTLS.v2.28.0+0.armv6l-linux-gnueabihf.tar.gz/md5/fc07035dddd51e9c57e62edfc3fc5691
-MbedTLS.v2.28.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/ffb707ba7439050862654316b4388f52e8bd09bbeb7076cf6cdc924cb60c61f871c01ccfe14e1ae1e62a5733490487324ba60e8545d60902f3317039264db83b
-MbedTLS.v2.28.0+0.armv6l-linux-musleabihf.tar.gz/md5/fc54575519130bd468ee4dbe23da0ea9
-MbedTLS.v2.28.0+0.armv6l-linux-musleabihf.tar.gz/sha512/d4b9e1bd8877f7d93d1b4e0d1c4c3d4e5d2af6920e39222667e689ec84cf9817988c91a826755a734a60ce05fed913e5421b8aa9980f257450da7f51c5e9342a
-MbedTLS.v2.28.0+0.armv7l-linux-gnueabihf.tar.gz/md5/0753a99f4645ba7e1ceb27a03c65a107
-MbedTLS.v2.28.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/a7a65338ee6f93117d44975651d77c351f0c919a3ae2eea6e220719dd084f71617946adf04a08a82d55c22af0275d21fce3c692becf87ccf2d932c8aa32af7af
-MbedTLS.v2.28.0+0.armv7l-linux-musleabihf.tar.gz/md5/ff335caa1cec22366cfa2c2bf87f61f7
-MbedTLS.v2.28.0+0.armv7l-linux-musleabihf.tar.gz/sha512/a3ff7d53b45134165347dec209bc27f48be984b4fb58ddd54286a146b837d038ab21e22033f1e0713d359c72adc0b97e979532ebaa734495eb88bfceaf3c2155
-MbedTLS.v2.28.0+0.i686-linux-gnu.tar.gz/md5/c4c9728ee9d875685765eb4c9c3bf731
-MbedTLS.v2.28.0+0.i686-linux-gnu.tar.gz/sha512/214142ee7ca3a5b447a97928ffcbe0389fbb8c1fa68de387656e5c0e4406f02411e4183fb051b2107600b222bd5279b9fd3a5aec43a9d97a9556b08c5338cb7b
-MbedTLS.v2.28.0+0.i686-linux-musl.tar.gz/md5/2684f2bc8a04234ae67603150e6d0917
-MbedTLS.v2.28.0+0.i686-linux-musl.tar.gz/sha512/a533afd26893464bee62dbfa9babf6e4e1119a4be31ecb242e2ff28f5f6e3a3969057e2ce653c98c1b8d2a19e340df7a17dac8693fce270399df92cfbf3a32ca
-MbedTLS.v2.28.0+0.i686-w64-mingw32.tar.gz/md5/f205fd351e94f42cd38d34d3eff6e69a
-MbedTLS.v2.28.0+0.i686-w64-mingw32.tar.gz/sha512/cfdb819d3e6fa9ce3985e29ac733c2af6c988230ae49bbdc13f0fc234e82444d17ce5da4d3b6d8cc6ac45ea4a999f0ce03ac42533223c87bea066a371487ef1e
-MbedTLS.v2.28.0+0.powerpc64le-linux-gnu.tar.gz/md5/41b1f61ebda30a8e8f02dcd955ae0d40
-MbedTLS.v2.28.0+0.powerpc64le-linux-gnu.tar.gz/sha512/25b62106404cb3b9be3e0f778ed953bdcf9d18cb289be823f97f7a1759012c84cfe7240fc936f2e6e858273ce2022d75ecc2554d5696cea110eda6d059362416
-MbedTLS.v2.28.0+0.x86_64-apple-darwin.tar.gz/md5/e7b286dac94bef06915930180b2d3bac
-MbedTLS.v2.28.0+0.x86_64-apple-darwin.tar.gz/sha512/a2acaacb77ca6e2704144d8d99e51df49b1fc69c8751e43973e0c41219d023676d35ae05bd4ff7a3680dc0edf5438e51b67baa76f5b78947560dcc420623a3da
-MbedTLS.v2.28.0+0.x86_64-linux-gnu.tar.gz/md5/39662265088efadb142fdc7255a0b7a3
-MbedTLS.v2.28.0+0.x86_64-linux-gnu.tar.gz/sha512/a3648c78bebf4c024ddf491965cb7707df887ce10dec6f9e42eb6493bc7d1220e5b23c53f5e4e73dfe94e8d8dcf35ffc6860d1992deb9b63a0c4691d4167e59f
-MbedTLS.v2.28.0+0.x86_64-linux-musl.tar.gz/md5/1fbe9f2593bc11af031075b58a108bc8
-MbedTLS.v2.28.0+0.x86_64-linux-musl.tar.gz/sha512/d185ced64d471fba9ae1aa495b2eba0e60738e8e5ef918670b1c40cc8981389ecd48e4f17506229bafab4a11f7a257d3d544cfe87ad198482778931c2a7a8aa9
-MbedTLS.v2.28.0+0.x86_64-unknown-freebsd.tar.gz/md5/26beed62ee2abe8c6e52c1dbddbe0b1a
-MbedTLS.v2.28.0+0.x86_64-unknown-freebsd.tar.gz/sha512/f04a417d99e3b908383d3c14cf8512b2f13e4b226d07235e2334090aadb6aecce40a23ae8f8df9c0ed9618707e839aaac6de64d5fee6d7e3955b290bc564d3a2
-MbedTLS.v2.28.0+0.x86_64-w64-mingw32.tar.gz/md5/cc55fe5537719aa8bf3bbee981c01413
-MbedTLS.v2.28.0+0.x86_64-w64-mingw32.tar.gz/sha512/3436647e81fdb9db138063229f20f47e2c8405e6379ca3e7cf38fb9fde84d2b6618a5f29b8df19cbffe75af7f99e00e9583d67be7b53dcce27bff453b96dcf13
-mbedtls-2.28.0.tar.gz/md5/d64054513df877458493dbb28e2935fa
-mbedtls-2.28.0.tar.gz/sha512/907867edf532ba3b099f4fb7ce31f5773ceceb072a8d067b1d830e879d541f92f401d64f13bbe6b4eb0845e58bb765d7d28896be414bb0fc7ac5b3876066be5f
+MbedTLS.v2.28.2+1.aarch64-apple-darwin.tar.gz/md5/ef83fb4706100ee678cd8af3f7a5c762
+MbedTLS.v2.28.2+1.aarch64-apple-darwin.tar.gz/sha512/03dda8cc9afa3d79c3c733e45c77891e75d939dc2bcca5ba8eb7aa3bd01fb52011ea9323df9cf7294fe6dcf87eb86c1b1c4b2f3b8af6116929b3371698559fe4
+MbedTLS.v2.28.2+1.aarch64-linux-gnu.tar.gz/md5/ac46c3840d2d0cc7c573f31c2f3d0d61
+MbedTLS.v2.28.2+1.aarch64-linux-gnu.tar.gz/sha512/bb458f1dc9b8684a38f603136ee4ba1c51b47f5047c5a5cfe2c552be266e79dfcd8243b216b0831abf24390eeb6f4524bc7e43b2642eb2ad0227399222cd0d8a
+MbedTLS.v2.28.2+1.aarch64-linux-musl.tar.gz/md5/d74732e0bbcd03666243605e60bb345a
+MbedTLS.v2.28.2+1.aarch64-linux-musl.tar.gz/sha512/90b0699477b697b94c0ab1ba0607fb3e1cd40d66a80a51cb1e0f3b927de03ba201e7e280d453db672e6265db5b07d0145846e53ddbcb4b550afcabef1716470b
+MbedTLS.v2.28.2+1.armv6l-linux-gnueabihf.tar.gz/md5/65ce7c51884b50dcb8343a945644b862
+MbedTLS.v2.28.2+1.armv6l-linux-gnueabihf.tar.gz/sha512/e9df753e9f3a08fd645b15422be7cc0ec3aeac3f8d5f76e0c4c5ec24c54e1b653db320ed0c6799411802a05801241a5363bb449a8765fda7856413c7e3297721
+MbedTLS.v2.28.2+1.armv6l-linux-musleabihf.tar.gz/md5/7b7fc8eafc95416d75e3f1bfb2640e09
+MbedTLS.v2.28.2+1.armv6l-linux-musleabihf.tar.gz/sha512/68362114808fb4f986dea673ef1c7f104caad8233bed1c7f6a365d5d69bb7f7c92b234d6b1bfa5b014e7096411841c115a5cfe9932ae9ce642293cab962f8d38
+MbedTLS.v2.28.2+1.armv7l-linux-gnueabihf.tar.gz/md5/4a477379b15fafbf0c05435f5ab370ac
+MbedTLS.v2.28.2+1.armv7l-linux-gnueabihf.tar.gz/sha512/fd34b475bf94b411e3155f5a5166d1ad081fef3622d7b99f4915b592d4235f63a0b910e0559ba2a0c3d596df9ccc2d7ecb61984091debb20bd4b995942857132
+MbedTLS.v2.28.2+1.armv7l-linux-musleabihf.tar.gz/md5/fc6551ef5f189010a84230dd48f6bdfe
+MbedTLS.v2.28.2+1.armv7l-linux-musleabihf.tar.gz/sha512/d3a7199f3e1ffb1c289c5f0a4384f3b5d1af6e868eb1081d66d6cbfc60e6415e68a7e22afb497f2e7c7900678a19bf1ba2a4c888efa1019c03bce376af62154c
+MbedTLS.v2.28.2+1.i686-linux-gnu.tar.gz/md5/5f06aeeacb93e8419da5dcc6dbadff67
+MbedTLS.v2.28.2+1.i686-linux-gnu.tar.gz/sha512/48dd5de23dd1513dd496b7ae9c88bc5a4f206442c3916ffdd602232b6f5fdc621adf0a3a014821d70092e1c3c90d96e462bc0e7608a984b0ff428c4bdbe42ecf
+MbedTLS.v2.28.2+1.i686-linux-musl.tar.gz/md5/435b864b02d1d2c96e5d8dc32b433ae1
+MbedTLS.v2.28.2+1.i686-linux-musl.tar.gz/sha512/52e3a79a70b3ff4617c93cafdeb702105c13b34687fc0fa31eebc91aa5cacea356d5b6a6bdbbfd81417d77debe256ea8f0f2a43c8d140154099bde097740dce7
+MbedTLS.v2.28.2+1.i686-w64-mingw32.tar.gz/md5/09c0450a373e30ddef1ae31e06b288d4
+MbedTLS.v2.28.2+1.i686-w64-mingw32.tar.gz/sha512/59a3529e7826a2f2266c1482d5dbdae2fb578416b3b6ee3b0c8507df21c1395dcd681be65ad953e8306971c549efad342ee4e0725391a88b202475f56aebc062
+MbedTLS.v2.28.2+1.powerpc64le-linux-gnu.tar.gz/md5/26c8f09aa65e5b70be528311519d4376
+MbedTLS.v2.28.2+1.powerpc64le-linux-gnu.tar.gz/sha512/2d47567388b8554ce7714f4ded013fcbffbf94726dbc6a1b7287dc17b27d1fa35baba55cf7dac17c555892a5f4c74119afdf552b42b0e8f80f26621adaa4dbca
+MbedTLS.v2.28.2+1.x86_64-apple-darwin.tar.gz/md5/dfc263208b1a8d4c29b4ec3b6f10e5ce
+MbedTLS.v2.28.2+1.x86_64-apple-darwin.tar.gz/sha512/3b2941c4b151206a56a9a795f0f30519676ea4bc0c93f66b419b15568edc91bb976954f584116accb7f9bd067580712e61b3c580a249332640e27e6346ca51ff
+MbedTLS.v2.28.2+1.x86_64-linux-gnu.tar.gz/md5/94b908036eecbe59372722b41f0b1985
+MbedTLS.v2.28.2+1.x86_64-linux-gnu.tar.gz/sha512/c37a4c34eb450bd716c076c4105bd6022892731c470d64a854ac0fca6653dcf5a70b23982050e7d82cdfd67d02902d9efe4c94d2cf5e0d29d497c3c5ac03f8e8
+MbedTLS.v2.28.2+1.x86_64-linux-musl.tar.gz/md5/217866be499144eeb2e0944b0b60cc09
+MbedTLS.v2.28.2+1.x86_64-linux-musl.tar.gz/sha512/144180e1968da627c92173277a130283aea711157a04a2655786658234232e397985f63d5407166377fc5f38a7447c19797c51b66a9c4b1773601d9e7e01d0e0
+MbedTLS.v2.28.2+1.x86_64-unknown-freebsd.tar.gz/md5/5a1ec1b183f30cb7998550e5ce15c62d
+MbedTLS.v2.28.2+1.x86_64-unknown-freebsd.tar.gz/sha512/3d07fc1a54a832515a1340eaa5de03707fc52fe8770a75ac80106942f5d23e1d52297c6068d28ab07f55fd2b3f1c683b1e25e82bf4c34b4f14af58287a5b662f
+MbedTLS.v2.28.2+1.x86_64-w64-mingw32.tar.gz/md5/edb5477223f9a35054160585fdb07f7e
+MbedTLS.v2.28.2+1.x86_64-w64-mingw32.tar.gz/sha512/617779d6944ea153c63e6d9ce66d9bb33520e1539324a449151594937e648ef7ccb30364d3e7aa3eed3b68b02366e5724e14787565db565f0686334ab4df3701
+mbedtls-2.28.2.tar.gz/md5/421c47c18ef46095e3ad38ffc0543e11
+mbedtls-2.28.2.tar.gz/sha512/93cdb44f764b200131b8dbefb9363e5fa38760eaf01473a512f93673cc55db3515830e16b813e03b39cb819323ad78cee4cb7f3fa85861ec5e72e0f89541c7fc
diff --git a/deps/checksums/mpfr b/deps/checksums/mpfr
index 0eb73ceb693a2..1bb4eca6bf4ad 100644
--- a/deps/checksums/mpfr
+++ b/deps/checksums/mpfr
@@ -1,34 +1,34 @@
-MPFR.v4.1.1+1.aarch64-apple-darwin.tar.gz/md5/157265257536980394e0a025b9d28de1
-MPFR.v4.1.1+1.aarch64-apple-darwin.tar.gz/sha512/44064eb67f087c2c38857273b069eacec9ebc199dd908f975895ab28bcdeb761adaec1a20cb5c3a98788090eb9ec31678ab1c5802896b22738d120e379f1f6ad
-MPFR.v4.1.1+1.aarch64-linux-gnu.tar.gz/md5/ed45c58b6f9ee6993f34012570ffa6bd
-MPFR.v4.1.1+1.aarch64-linux-gnu.tar.gz/sha512/d90cc0826df50f359c49a5ad7a48639137d7f58649d480a50f1a8cd9b77ca09a2678b320aef29dbe0f07f65e40c1994f46ec6adec6047d345d7ed1cf100d0724
-MPFR.v4.1.1+1.aarch64-linux-musl.tar.gz/md5/9634a53796d208acb1353ed500685644
-MPFR.v4.1.1+1.aarch64-linux-musl.tar.gz/sha512/9fa2af227851bc9db79b8c4c381c07be12ce526a7e72e01bef76353b3488fe92cca17978d8df7ae38cbe610e1406b5a8d825b18b43932ced36809dca5ba81f46
-MPFR.v4.1.1+1.armv6l-linux-gnueabihf.tar.gz/md5/865fb6701c5b42b959c104387f8aaf08
-MPFR.v4.1.1+1.armv6l-linux-gnueabihf.tar.gz/sha512/726b07c8dc7b0f67416df2b86edbec8577187b1e6285e53b54c55c613493e3e2987037e29b83f861ff9f64b5700d8815985cc564813f55399d91c1e33e8fac6e
-MPFR.v4.1.1+1.armv6l-linux-musleabihf.tar.gz/md5/29e12f8ee50b1060fe9ebfa0ee4e18fe
-MPFR.v4.1.1+1.armv6l-linux-musleabihf.tar.gz/sha512/871f834e1336782e51aa42fbf3a06165de91e5d469d69bd3acffe743bdb63ca55d7fef9f6e064ed91512d733bd82dfd7b68a2351f9b9f38f1d853e74f6713b31
-MPFR.v4.1.1+1.armv7l-linux-gnueabihf.tar.gz/md5/23d59ed4fd3e8923b1db11bde9c77e5e
-MPFR.v4.1.1+1.armv7l-linux-gnueabihf.tar.gz/sha512/0093a048c0f56036c6a27830c7999a6da396acf58da93bc11c31b638d10e0fa2dd8518e6eac02f9f041b8b83b5c74bfbcc532f43e05c7662b2e6ad5b16943415
-MPFR.v4.1.1+1.armv7l-linux-musleabihf.tar.gz/md5/e7be267d931c33e1a5a97af9ee3d13f0
-MPFR.v4.1.1+1.armv7l-linux-musleabihf.tar.gz/sha512/da6d7ed8fbf01dfb8624f4aef27f095cd4ea88002f9587a51f877b05b9308ab2af277bb452ec9442cb71a82e322ec03fc30a90d17f43f3b9fabbcd5eca64c68c
-MPFR.v4.1.1+1.i686-linux-gnu.tar.gz/md5/26db9d697d5e40b3364cf3a52893b64c
-MPFR.v4.1.1+1.i686-linux-gnu.tar.gz/sha512/0ac65c66e669cd7bd9d951f61b06249c19579d280cc6146b8b2fb324482f1191c9fe1bba6187f5f67ba219506db2bfe2c71a00e6179b5a8995d4c91cc903b8fe
-MPFR.v4.1.1+1.i686-linux-musl.tar.gz/md5/8a012b83532aff4022784a4b85f5974b
-MPFR.v4.1.1+1.i686-linux-musl.tar.gz/sha512/182eb18ee7f4766d5f45adaa1eab703acd99e801a02812e8772a50fd59e7fcff3dedd9a008c85ae816c257ef106ca8d809315f95f38b34548307a9ea0e4fe692
-MPFR.v4.1.1+1.i686-w64-mingw32.tar.gz/md5/d59ad915170aa5dbb230a64a44e1ace8
-MPFR.v4.1.1+1.i686-w64-mingw32.tar.gz/sha512/96adfca120ae691e28741f8a2dadbba0df605fcae622fe4c83c17985ee66b3092761104e0cefb68315237900f413fa3790b60306c8aa82a0e4d7bf32311c684d
-MPFR.v4.1.1+1.powerpc64le-linux-gnu.tar.gz/md5/4796379b5d91ee63f5c37687b6962ac5
-MPFR.v4.1.1+1.powerpc64le-linux-gnu.tar.gz/sha512/b3567a40c1b105a24305d48ecf65aaba70ab2f44d1c7d9e7ac37a53393fedd56e6aa7f5c4395226eb7dd3c02f8aa9403485dd85e327f5d7c61e8fee5caf85d00
-MPFR.v4.1.1+1.x86_64-apple-darwin.tar.gz/md5/54e27d8dd1807dac1c0e77699c3e6180
-MPFR.v4.1.1+1.x86_64-apple-darwin.tar.gz/sha512/8e54dc8b24031ba66c53b45e537b7709dafa2736c2811ead8ca9062f03c22e78095579091dc8a4e7f69b666399c48906dfd22986657ce5f81a1f20043a80f504
-MPFR.v4.1.1+1.x86_64-linux-gnu.tar.gz/md5/7060b44302ca6544d372ec71b3b76aa8
-MPFR.v4.1.1+1.x86_64-linux-gnu.tar.gz/sha512/4f4e4f762106becf8c17790addada3a0a5f33444fde858359e4634041d877ee65a45b6d90f91f3126dc08e7bdad4506bcfdf3bcbda5994ed592267566393582a
-MPFR.v4.1.1+1.x86_64-linux-musl.tar.gz/md5/0c8110f6699a2ea27f2eeeb3949ce781
-MPFR.v4.1.1+1.x86_64-linux-musl.tar.gz/sha512/40c91daf959a9b78af513b054e4e8d0cd1c121a5f3e0e6cdf22446e97d28d3f056f79978092907ba08645c3f6e29b5134ef344ccc79a9c2bbaaeb2233140cc25
-MPFR.v4.1.1+1.x86_64-unknown-freebsd.tar.gz/md5/9dc9d9bb0662700510b89e6da4f44f2d
-MPFR.v4.1.1+1.x86_64-unknown-freebsd.tar.gz/sha512/14208fb683233d44eb2263e7674b9c5cf4f7f7151f025b2b00fb482e6609b78b2189eb25edd7c45b8634bca07e1aca746a6094af50d1449248847529ff58bcaa
-MPFR.v4.1.1+1.x86_64-w64-mingw32.tar.gz/md5/6159f631081b32b7df88e090af417f4c
-MPFR.v4.1.1+1.x86_64-w64-mingw32.tar.gz/sha512/5086da1de24b1f9431ea7dbe6407ae9c81df7a10b04845e8fe4a476a6a5dcb78d3e4b06ca81c85d1a8cf2d081948d20bb77672a4c9f6d20e194f384a323a1f71
-mpfr-4.1.0.tar.bz2/md5/44b892bc5a45bafb4294d134e13aad1d
-mpfr-4.1.0.tar.bz2/sha512/410208ee0d48474c1c10d3d4a59decd2dfa187064183b09358ec4c4666e34d74383128436b404123b831e585d81a9176b24c7ced9d913967c5fce35d4040a0b4
+MPFR.v4.2.0+1.aarch64-apple-darwin.tar.gz/md5/f9393a636497b19c846343b456b2dd7e
+MPFR.v4.2.0+1.aarch64-apple-darwin.tar.gz/sha512/a77a0387e84f572ef5558977096e70da8eb7b3674a8198cc6ae35462971f76d684145ffae7c2ddca32e2bd1c8b2ccb33e4447eb8606d5d5cd5958298472b3ea9
+MPFR.v4.2.0+1.aarch64-linux-gnu.tar.gz/md5/ade253017d195de694780c32f9161dcf
+MPFR.v4.2.0+1.aarch64-linux-gnu.tar.gz/sha512/1b68de5f8e557b7434c8c1bc016227b58683b56c0977b763422ea85a673bec446fcfee3a4f69e1d4689abb9bb6bf47f2a50fbb56ecac6a9d40096e66bd0f2080
+MPFR.v4.2.0+1.aarch64-linux-musl.tar.gz/md5/7dbd121c7192ccaf7191de5ab8d91afb
+MPFR.v4.2.0+1.aarch64-linux-musl.tar.gz/sha512/8614e3cb28491b24a0ec5060b44abaf264b61c91ddd29d70105ff583bd3112cff1b9bd5ed45e39f186265333982d5eeb8bf35fedc3b51b2a009cc7a51046b50b
+MPFR.v4.2.0+1.armv6l-linux-gnueabihf.tar.gz/md5/adb2b7fdf111c8b19df1516cfb278bb1
+MPFR.v4.2.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/0c47aeffd05a194802f6c4e0e2779d56fb46007e6c3e145ee6992854a21a317a9d51512c59a0ce4ddcd314c387945225c6557d6c2ab6961ae4848875e8983de8
+MPFR.v4.2.0+1.armv6l-linux-musleabihf.tar.gz/md5/c30358bdeffcff65ba9be906cd35889b
+MPFR.v4.2.0+1.armv6l-linux-musleabihf.tar.gz/sha512/2857ec27ae2d53a451d62dd241ce9b43f7ee182bee180ecd9ad92c907c66d0b0ab2d1ea3b20fe61cc176ae44ecbe6041305cc8a9343b396c9cb54dd77a1e2868
+MPFR.v4.2.0+1.armv7l-linux-gnueabihf.tar.gz/md5/a1e30436bade2150c9dc924177f0c321
+MPFR.v4.2.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/d2f4662c494fefda66847e7a085edda3ce396383aafb4e17fc2e176191b0f530541726c261cac3467f13136e8ec728c8a7cf0e352f3e9ebf960d153cbfe766b8
+MPFR.v4.2.0+1.armv7l-linux-musleabihf.tar.gz/md5/857e3c82804e7c853d21603f18caa715
+MPFR.v4.2.0+1.armv7l-linux-musleabihf.tar.gz/sha512/86cf3e940fd66820b5269e9aa2a49c3fc3077857bec037a08e0d301b0bf3cc5c79ac331cc6370d852e20f4acf8f601c49d5dbe24e96652e4411b3f33a11e3f45
+MPFR.v4.2.0+1.i686-linux-gnu.tar.gz/md5/5a432be79a112e67e970980f4bde13a0
+MPFR.v4.2.0+1.i686-linux-gnu.tar.gz/sha512/94198b23ac94dcb9dca95938a46b9899c3ef329bafbb13b32076cd3415b89f11908632c7c07e90549c01bd9ed7fc9a002dae07a645f85b8509234c49be729621
+MPFR.v4.2.0+1.i686-linux-musl.tar.gz/md5/4ce71dc250c2469f844a02c6ee6571a1
+MPFR.v4.2.0+1.i686-linux-musl.tar.gz/sha512/134b67b23de75ab172594cd0fac55b5c265730bfea195978698e3e6fbc47d65617652bd72d90ba092ed1bac4c29d5b2c109df5d8dc60b5d8f91159fd58575b67
+MPFR.v4.2.0+1.i686-w64-mingw32.tar.gz/md5/be7239432e8a26c59e2d418d310bd6e3
+MPFR.v4.2.0+1.i686-w64-mingw32.tar.gz/sha512/3144d84d41996fc19bfc9ed4f36755838470e17dce79895b37d93e32ae1cb1da428f2136948f939b19548d7dd62830ae43c434f88efbe192ed3184bae2df5970
+MPFR.v4.2.0+1.powerpc64le-linux-gnu.tar.gz/md5/d818894054b38232ba02ee0e129f6fe0
+MPFR.v4.2.0+1.powerpc64le-linux-gnu.tar.gz/sha512/0e73ca926f3e06466d1899f0b3e9ae4abe15102804dce6716ce23154344a571773c40d276f0038a0ae4e626799867ee715428e1d961334a01ad3091745367e8e
+MPFR.v4.2.0+1.x86_64-apple-darwin.tar.gz/md5/9652148df4e771be39713c4f43d3ff61
+MPFR.v4.2.0+1.x86_64-apple-darwin.tar.gz/sha512/91a0219fd1880dfa90d196fa403f4e1df0347ced58a4772492196b94476f346d80696885a4f3520424494bc09679cca0c0ccf2f6e9247d60b52ebdf564485e72
+MPFR.v4.2.0+1.x86_64-linux-gnu.tar.gz/md5/4de39327a792be708119ac7b43957628
+MPFR.v4.2.0+1.x86_64-linux-gnu.tar.gz/sha512/447b59d5589a8517061627668e8baed4366408cacc9d8e063528b9b795de6d27e4005844578310185f03f568f4948bc4a794624235875fb61b6187264b6f483b
+MPFR.v4.2.0+1.x86_64-linux-musl.tar.gz/md5/f9b8c3c094b339341b19828cc5e1d47c
+MPFR.v4.2.0+1.x86_64-linux-musl.tar.gz/sha512/c661e7c5bded3bdf11b2bd5e5ef4ad8e446934d9b82dfe26f0be1b83cea98d7e56e0903bfc1075f91c8d23401cc6b3b722f2d60f46d73cab884e81fe518aba27
+MPFR.v4.2.0+1.x86_64-unknown-freebsd.tar.gz/md5/e402dceae753abbdd8b11f3c8d96e0dd
+MPFR.v4.2.0+1.x86_64-unknown-freebsd.tar.gz/sha512/235f001f3b0101a6bafaeb45fb49d2992549b6c2f42a4e7ba38e1fa8c59246fe7463598e7cfda5ead50c9805dda0b82a23b5ae2af4ec993bb771611163e58907
+MPFR.v4.2.0+1.x86_64-w64-mingw32.tar.gz/md5/c5bbd2217060491e2773bdd84b055e5c
+MPFR.v4.2.0+1.x86_64-w64-mingw32.tar.gz/sha512/74b059b22990ab79f243284687f571f47447457ac2c1cb4c4548ea1f3d8ea01b7466281f48429cb39e2d11394fb86650bfada7acab639c6537a143a95bd6e7ca
+mpfr-4.2.0.tar.bz2/md5/f8c66d737283fd35f9fe433fb419b05f
+mpfr-4.2.0.tar.bz2/sha512/cb2a9314b94e34a4ea49ce2619802e9420c982e55258a4bc423f802740632646a3d420e7fcf373b19618385b8b2b412abfa127e8f473053863424cac233893c0
diff --git a/deps/checksums/nghttp2 b/deps/checksums/nghttp2
index 5aadf03f2bea7..9c419f5234ffc 100644
--- a/deps/checksums/nghttp2
+++ b/deps/checksums/nghttp2
@@ -1,34 +1,34 @@
-nghttp2-1.47.0.tar.bz2/md5/2bca98caef4b5c27d5bdc4732f36a5d6
-nghttp2-1.47.0.tar.bz2/sha512/4dbd0fe10f5c68d363ee0fff2aceb97f58a755a276796f16b078cd3bec3a17cd5e0dadf1e5027347d3342daa3572332b14df230a4d9675a9b57fff67f8f9e5a3
-nghttp2.v1.47.0+0.aarch64-apple-darwin.tar.gz/md5/76abe33c6e81346a133c3e26593db1b2
-nghttp2.v1.47.0+0.aarch64-apple-darwin.tar.gz/sha512/72a1302134ab4715f4c0b8f702a566498d4595aa7a3fd762e43d7e0ca5987506a9b1dc53318763595ad652d8c4a633c3c5e0500a8f4e3007cb6cf9e30341d9ff
-nghttp2.v1.47.0+0.aarch64-linux-gnu.tar.gz/md5/1e5ad3ad31290e017c930c2d1dbda38d
-nghttp2.v1.47.0+0.aarch64-linux-gnu.tar.gz/sha512/c8a2543f079751bcaf7165661f5f4053fd1b733cde0f82078736c898503c796fdd7ce587f0da2d1bb3d35a74a644fed6e8cc30a3520e577593d19700e822cc55
-nghttp2.v1.47.0+0.aarch64-linux-musl.tar.gz/md5/7079c203ec5e6fcf45d01bfa1ca0b1b8
-nghttp2.v1.47.0+0.aarch64-linux-musl.tar.gz/sha512/152f34f1e9a5f741d69d62587762a96fd290ecb41ec8eeff46fae39b5e606ff054755b88abe3bcaa07db640526fc12546769da4a3761a18240eb3d2699de8886
-nghttp2.v1.47.0+0.armv6l-linux-gnueabihf.tar.gz/md5/918f3e549998e34f2aa292a2ff7945be
-nghttp2.v1.47.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/545c5674a6492dbd5f109303383b920b0b011e37e8a4abfb329b22cab50a6a977d9f74aac6f4aaa833064fbaae4b5ebc019e83d2edb8b4af2515f36f4530937f
-nghttp2.v1.47.0+0.armv6l-linux-musleabihf.tar.gz/md5/1345980d4822c6e9c1934378e365e343
-nghttp2.v1.47.0+0.armv6l-linux-musleabihf.tar.gz/sha512/470c66205d257ba3b23b0db8ea93fe40bc71c219d50cd88a6b57abf8c105218bd9912b9a605da12903793893f37803b0e3357566e20035a079ed2b4bcc6d7b78
-nghttp2.v1.47.0+0.armv7l-linux-gnueabihf.tar.gz/md5/e831c03eeb810a48fbd34df2017c20be
-nghttp2.v1.47.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/e90270b1f6ae7c90ce746f60c2f451f2271ec6f682003f3a0ee8eb97e9054932495fff22b2ca1f40e3711f847c520fa605c49c7ae671db7f282f78f8d745a0db
-nghttp2.v1.47.0+0.armv7l-linux-musleabihf.tar.gz/md5/38d95842aa0d9e9ac9e77e468d18893d
-nghttp2.v1.47.0+0.armv7l-linux-musleabihf.tar.gz/sha512/5e595d143248fadd5cfffa1f15b09698f1793c04422b12d5f8e22c52e4ebc5d947845fe3ef7539845ad731d4457c8a7a6e6e2bc1dbb5b32fd3cd374963aa9833
-nghttp2.v1.47.0+0.i686-linux-gnu.tar.gz/md5/a3c54ab31e835ecbc12425b00a201bbf
-nghttp2.v1.47.0+0.i686-linux-gnu.tar.gz/sha512/375354d57b14b73d7e4cf751b69872b19e6806b7a110c104c0dc25794a33dd89642f9911216c2c1a2698d45878c12b7d735402e44b9b4ba60a5a9751a522c19b
-nghttp2.v1.47.0+0.i686-linux-musl.tar.gz/md5/2cdfc4b177bc88685e629362ac754cab
-nghttp2.v1.47.0+0.i686-linux-musl.tar.gz/sha512/cb741c7d6dbfe5815e1691c98fac46c2559b419cb3bc882b925779d9699e7b37332ab197bdb3b7cb944de45ea0cc3c6f6e5f8df04b7556dac25f796e992d7dc5
-nghttp2.v1.47.0+0.i686-w64-mingw32.tar.gz/md5/ec5f305e52c205a246db0e4ded79f6c8
-nghttp2.v1.47.0+0.i686-w64-mingw32.tar.gz/sha512/4bd5f81bd1502bbc04973f0721099a230248b2be907f66f044fd1111326bf05804aae4df123eda65e7e90445157bc07e87d9e837dfd2393038e4e042254c16df
-nghttp2.v1.47.0+0.powerpc64le-linux-gnu.tar.gz/md5/01431aaf0c383e2ab1460f41e3c64446
-nghttp2.v1.47.0+0.powerpc64le-linux-gnu.tar.gz/sha512/ef3ed6eb1c77a81f46f7c06e4748d766144231ab3cc4875fb3502c6a553ce04937ee6dcb1516532c91043921b76779c1ea1ca20070907d3639d2f0fb036d0d56
-nghttp2.v1.47.0+0.x86_64-apple-darwin.tar.gz/md5/12650859c3ba16131a35b63510816267
-nghttp2.v1.47.0+0.x86_64-apple-darwin.tar.gz/sha512/a91d6b572ed830bdcd5822d8d0dbca70ce45f0c2706a1fb83aeccacad1a72391ea09683169ae9d8ed6e84a1f28d55d2ee26e49a68cca405dd032e9c128e54858
-nghttp2.v1.47.0+0.x86_64-linux-gnu.tar.gz/md5/62fb16238af3cf50721b0a671d28dc8c
-nghttp2.v1.47.0+0.x86_64-linux-gnu.tar.gz/sha512/f662f30ad7057bc9c724fd48e15a2894aa0a345a24d35acaa0f3cb25d73b329772942d3499647ba7563c110d2186e96d4a3b12e8721d28d2cd6491d93df24e05
-nghttp2.v1.47.0+0.x86_64-linux-musl.tar.gz/md5/3224892e3e5c7d7ae24c2380fd731ab8
-nghttp2.v1.47.0+0.x86_64-linux-musl.tar.gz/sha512/35d18c52dee94846a85d5a7a19bff95ce2b05e5290096d532c7f3d144ee809a2ba9072dd24372905c485ee0dfa03309be8cebead2b62292518ab5d63d80c9e4a
-nghttp2.v1.47.0+0.x86_64-unknown-freebsd.tar.gz/md5/4b3c9032b11ba078d7a91a30d3cabc6a
-nghttp2.v1.47.0+0.x86_64-unknown-freebsd.tar.gz/sha512/21c9d1c95e26bf33a0cedc63ac6e81dcc670d6bc3fefc9a8efbf7faff718875cf6fc51dfdb192afb00acf86257104de7a0dfcaaf29119ba055b69885c31a4dd4
-nghttp2.v1.47.0+0.x86_64-w64-mingw32.tar.gz/md5/7d41384443541bf30b6165381b1c5305
-nghttp2.v1.47.0+0.x86_64-w64-mingw32.tar.gz/sha512/2febfcc452bd4f2a3200e6edb8127f678749a358a4beb219b7b29294ade66bb817e1fbdce665f0e3e20d923ab3bc68598f3c769bd4f09871866e452b6aab52d0
+nghttp2-1.58.0.tar.bz2/md5/f60a0340da2638eba448df73b6783bc3
+nghttp2-1.58.0.tar.bz2/sha512/7a40afe80587a13bdd7fd85fc7eb666c0dcc18b951a03c65ab4b52429bdf5d2eae27a9bfaa7303e7517e275239397f60dd466170364dfbd1077e25188ba54aca
+nghttp2.v1.58.0+0.aarch64-apple-darwin.tar.gz/md5/528dec6d4a54854532d527067dccade7
+nghttp2.v1.58.0+0.aarch64-apple-darwin.tar.gz/sha512/cfc993f23f95ef3b2841d40e704c17816dec22a173a16c5d0bc3c85a092f1d63b69e47112a8a1aeb4e85603a7087746b97fbf9ea91a0ec42b45d38deb78f3e0b
+nghttp2.v1.58.0+0.aarch64-linux-gnu.tar.gz/md5/c358e3407e956bddab6fde752c2bd2b5
+nghttp2.v1.58.0+0.aarch64-linux-gnu.tar.gz/sha512/207ef70f4ba231e8e4fd570a5da7931a488a297d2d525983499d874abef268440d3e8408cc953c3deab5219bc8c93034b89bf2f3e4c1179b52cede354ae6d27f
+nghttp2.v1.58.0+0.aarch64-linux-musl.tar.gz/md5/eef1a1209fdbf30a71af878e307c4fb2
+nghttp2.v1.58.0+0.aarch64-linux-musl.tar.gz/sha512/9ad61002303add92b3d9842a408092b8b35ac8ac5ab17fabb77f144261c4829a2695111177b4794901efba87f97ed9274aac9935fc702c60189395abbb4d9a3f
+nghttp2.v1.58.0+0.armv6l-linux-gnueabihf.tar.gz/md5/2864d534833745ff12ebb86378a90b0b
+nghttp2.v1.58.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/091c367466cd547859d06b6d0bd37d424b0ccefc0f41dd4819156a61f98e0cc1eebf6da4171810e2a876fec9b919675efd9240089681ca8d3cd8620f5f40f8f3
+nghttp2.v1.58.0+0.armv6l-linux-musleabihf.tar.gz/md5/30a7b272ed36c45a3a5f58e7406150e5
+nghttp2.v1.58.0+0.armv6l-linux-musleabihf.tar.gz/sha512/c1c96c2e987a700929c0158232ef745a0fe8d93c722174a91b32c767cdc0441ee73059da9f64daca4010367ee005a9037919cdb73079a12f9612f45852549b0d
+nghttp2.v1.58.0+0.armv7l-linux-gnueabihf.tar.gz/md5/3940e07955102a7a71030fe4c8aed59f
+nghttp2.v1.58.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/5b628901371a36937a16802211707334b02632c8651a5fd01d2e1508926e51c960b693abbea6f629cfb7ca750b6039b5e4385324f2d431f1fe6e47b121a57f30
+nghttp2.v1.58.0+0.armv7l-linux-musleabihf.tar.gz/md5/2d6d0b8adba7bfea20dabde3c87333df
+nghttp2.v1.58.0+0.armv7l-linux-musleabihf.tar.gz/sha512/10c72fdc2e635add9f0364dd95fd8404f187068bce05587531d4865f75126c8eaf4f7c154762175d3ed0e85914b21df90c193766f610da91bb6dd75fe93df5fa
+nghttp2.v1.58.0+0.i686-linux-gnu.tar.gz/md5/8753bf62f8863b5c3041ef73ed3fe36a
+nghttp2.v1.58.0+0.i686-linux-gnu.tar.gz/sha512/741a8f2bd5902df9f06de67ecdcb041951a21e0d32f5acffac780cba7a41be5d67507ca49f65bf636849baf1b07e53871d9ee5624d4e33e1d4ff4137bd785bda
+nghttp2.v1.58.0+0.i686-linux-musl.tar.gz/md5/a1b0f85e40bef6f2647a9e1dfa415196
+nghttp2.v1.58.0+0.i686-linux-musl.tar.gz/sha512/0e438073cc6fe39e2b47d7b89467c1cab1abe953e357f782b2cb16de5f9238f063b925ab602558a1d562b3c3f608167dd339a4ec54ec93b27c4f43bd2e54aaed
+nghttp2.v1.58.0+0.i686-w64-mingw32.tar.gz/md5/7cf81bd21105f4d9fd5dd4dbf61fcb70
+nghttp2.v1.58.0+0.i686-w64-mingw32.tar.gz/sha512/47624f56df68983d3bd881cab1e88be249b678cb184cc426443c4a8c85481af03cddc65f642f1d88afd300cad88950662a99c5dbd75a459bd99dc56d3f38231c
+nghttp2.v1.58.0+0.powerpc64le-linux-gnu.tar.gz/md5/a6e9e975026f658cb2b68418c5f2cee9
+nghttp2.v1.58.0+0.powerpc64le-linux-gnu.tar.gz/sha512/2007cf1420ac0866b4b6b5b463aa9c491abd85b2559bea259efa075e308301b10f1d8e72f86c36c21cf42e891f672ac289b5c81640a83f002c89aefebeefb355
+nghttp2.v1.58.0+0.x86_64-apple-darwin.tar.gz/md5/06c250253f1969f1c8460f3c65ec02a9
+nghttp2.v1.58.0+0.x86_64-apple-darwin.tar.gz/sha512/397bd1ea32617ff005ad4217cdd5652a6a58d5c98837cdb8366aa4d0fd86824673864850aeb3771c3304755d8d5466b881b4007603a9cded924cd6b8b18384f4
+nghttp2.v1.58.0+0.x86_64-linux-gnu.tar.gz/md5/2be0cc258a81106eaea132fbbdfce04f
+nghttp2.v1.58.0+0.x86_64-linux-gnu.tar.gz/sha512/1be2b3906b9a5e2cb9af24ea358b92b50e3c7be85e9533784e3721b28219bce555700ebe3edf789123ff471a062dfc22f10eef1d21a6f060a43151f8904495d0
+nghttp2.v1.58.0+0.x86_64-linux-musl.tar.gz/md5/55898f6770dd85835d76e52003e8a1f6
+nghttp2.v1.58.0+0.x86_64-linux-musl.tar.gz/sha512/ad2266563dca33543c5fae9d0cb4daf9f0c6f4ccab8efc30e2eb273be9df33ae43da3007bcf83d19f39b830c11c17f8577014c81fcf5e76dce159f28937166f2
+nghttp2.v1.58.0+0.x86_64-unknown-freebsd.tar.gz/md5/5f9886a4e321bc393dd086106a685466
+nghttp2.v1.58.0+0.x86_64-unknown-freebsd.tar.gz/sha512/d86ba3730bb63143f990bdbe453fa4a0692b434beab372aa9a8e7673c2fddc8c97ebd2e47ac109ffe50172121f2d56ddd40d8d500b73e61a58635edcede8d071
+nghttp2.v1.58.0+0.x86_64-w64-mingw32.tar.gz/md5/98ecb527506234cc69cb8a958a74a002
+nghttp2.v1.58.0+0.x86_64-w64-mingw32.tar.gz/sha512/3941233fdfa82af9a5590f5975728d51e42791eb1b966a4ae7913a24f52500c24dea05e66911b3c5768535b145c99b31e2dc2ab50354648492df4b4b63434578
diff --git a/deps/checksums/objconv b/deps/checksums/objconv
index f3dfb0de2ffab..ce20ae4acf76b 100644
--- a/deps/checksums/objconv
+++ b/deps/checksums/objconv
@@ -1,32 +1,32 @@
-Objconv.v2.49.1+0.aarch64-apple-darwin.tar.gz/md5/bdf95a776cfe782d30f48a41575e1414
-Objconv.v2.49.1+0.aarch64-apple-darwin.tar.gz/sha512/188b5e25d238a5e2f704c3ba8d2d57d6fe452f9d5c0e26b710ff225086581f906b8de6388c6240bbaa9d395cb58c0a73a67e65cbb8df6be7b98473101db467e0
-Objconv.v2.49.1+0.aarch64-linux-gnu.tar.gz/md5/2c2b88856921c38294a30671d4794dac
-Objconv.v2.49.1+0.aarch64-linux-gnu.tar.gz/sha512/2c0e6cf6da02e25386e89c51f5f2d39833b49653b20537c73f7938a4045805d07b0f520661d07332aa1372231d34a3a979ad490bf5eb91fc00fcc20da3e7a9bf
-Objconv.v2.49.1+0.aarch64-linux-musl.tar.gz/md5/11f6c06ee0d98b553781367d5404c76e
-Objconv.v2.49.1+0.aarch64-linux-musl.tar.gz/sha512/d93a742a08f873f9336f57a28af8a1eeff624d5d9dbcbceba0d58c17a2ee3791b363661af293d08997d701fc22177192e5b5154b827974163c189ad6511ea13a
-Objconv.v2.49.1+0.armv6l-linux-gnueabihf.tar.gz/md5/0151be530a0d54376590065cef28666a
-Objconv.v2.49.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/cb7cf5b00f211be4194b5b6acf11cc491b5f140d990fd8babc6590649e9864cf07a421e8a87ccdbe0b8720bc6473166837e384202bcbac6cedb2a9bd9c46711b
-Objconv.v2.49.1+0.armv6l-linux-musleabihf.tar.gz/md5/390251e8245a3d8d110a1786336663cc
-Objconv.v2.49.1+0.armv6l-linux-musleabihf.tar.gz/sha512/b7eb9e4a983e69ca970ce86bf306b7df11bfa8aefdd26cc02841c563ad0b5dddcb47f106fe7a0a420b20ae1d4890e6a8011c0db5a26e3493c80e63eeaadf86b0
-Objconv.v2.49.1+0.armv7l-linux-gnueabihf.tar.gz/md5/5f924d5bc16bac6447e9f2deb943e60f
-Objconv.v2.49.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/edaf3881754bc949ef3c60b058cc0cfff7e828d6486ca968940d216852baed5b06544dd48619cf045f3ef87df5ea00389ac3b298a1d4e9489995351e1e1ca952
-Objconv.v2.49.1+0.armv7l-linux-musleabihf.tar.gz/md5/c967a9ed1764d9692e905d879a03f45f
-Objconv.v2.49.1+0.armv7l-linux-musleabihf.tar.gz/sha512/08e9397bbd34734c3e9137f64a94a86ec11bc70eaf54811301e7bf782b581ffdcfa041936e29aa0a8ee46203591f8213d6170a7c6ea141a0ab625ac1156dcfbc
-Objconv.v2.49.1+0.i686-linux-gnu.tar.gz/md5/a59fd92a1ed62048edb1a1297d615aa7
-Objconv.v2.49.1+0.i686-linux-gnu.tar.gz/sha512/581fa0f5ea37e1802c9845bbc9df0b826fdad5900e712eed8767922e155026011570b4a4b8714430c038fb3c0d47965168a4c33b21bd28cd9080cb036fc9f033
-Objconv.v2.49.1+0.i686-linux-musl.tar.gz/md5/05524b26d550ad8fd045976f395cdf6a
-Objconv.v2.49.1+0.i686-linux-musl.tar.gz/sha512/5e6d3b27b80f96a4f1c278f2f8fe0ff1f9bdc2f1df223a7c4d1c235c18dd8eac0b8b74d37defda656142fb2882c1b590bb3e730cfed77e316012eb69b9580b53
-Objconv.v2.49.1+0.i686-w64-mingw32.tar.gz/md5/10e82481a5396b00f568eac690c47e0a
-Objconv.v2.49.1+0.i686-w64-mingw32.tar.gz/sha512/27d606acad2cf6789c9888c72887bb6a277c07f7b528fd8fe333f9738caae73e293df76ba9a0af5dceb00b8289bbc523ce235cb0eff0f0031bcf20300b4168cb
-Objconv.v2.49.1+0.powerpc64le-linux-gnu.tar.gz/md5/8755aecaacc983e2a9a948eff5c485d9
-Objconv.v2.49.1+0.powerpc64le-linux-gnu.tar.gz/sha512/8b2bf010ff7da164b59df7147cb4904ae6f2913a3095c649e20f4263f77fb92cf8513d9130a345576da2cca4caa30828cc43b9c8ae1870268e3140e0016ad557
-Objconv.v2.49.1+0.x86_64-apple-darwin.tar.gz/md5/0657a0ef9f278718c741da4d72c0952c
-Objconv.v2.49.1+0.x86_64-apple-darwin.tar.gz/sha512/ffd9247b02f72830d3b12e075124239ca472503701eef005b7457e21cd10103aaa13520206787818f11e9dcf35a156979e01cf5839dd554bab935ce757f032e0
-Objconv.v2.49.1+0.x86_64-linux-gnu.tar.gz/md5/0e029960584d00dbf8673ec4fcd9eb83
-Objconv.v2.49.1+0.x86_64-linux-gnu.tar.gz/sha512/ae747a84edccbc804239499c10d863c62bd5846b4ab87abab42c832c9fd446065024034d76ddc167d358821f90f8d2241c341232d9dd86cf31601e951e050a6e
-Objconv.v2.49.1+0.x86_64-linux-musl.tar.gz/md5/39fc1ec3602dcb6eb2f80448269588fa
-Objconv.v2.49.1+0.x86_64-linux-musl.tar.gz/sha512/e86114bf0b0da7297721c56b1cf246f52b9331083e4e73b53a30a1ff031f260a5d6bd97b455669c38af915689a363b99a30ea7ed743ebf49271b95e34bcfd85e
-Objconv.v2.49.1+0.x86_64-unknown-freebsd.tar.gz/md5/9d331f32792c394c0d11dc4e6c24ffb0
-Objconv.v2.49.1+0.x86_64-unknown-freebsd.tar.gz/sha512/0c9b7c2f58110b3c8df52d83cbadd5349fb81732acae6786617e72a3150aa9ae8da7afa1e9eb08639f4dd4e7e69f29b882f98e99a8a4404b569c545c904f5523
-Objconv.v2.49.1+0.x86_64-w64-mingw32.tar.gz/md5/c8ef7dd7742e2c9bf2d05d2b0310bb50
-Objconv.v2.49.1+0.x86_64-w64-mingw32.tar.gz/sha512/b47ac1f3a10ee4f958dcda72ac45f35b94fd436498d008642ce53b93ff517c0d4158a72cbb849336dc9d4a16e26021af13e7b6976f83610380cd78cce6a7deb1
+Objconv.v2.53.0+0.aarch64-apple-darwin.tar.gz/md5/ff9f237208e8bb48daa5eb4d18526f24
+Objconv.v2.53.0+0.aarch64-apple-darwin.tar.gz/sha512/5960cc291fd551dc288cba4cfbe9c2448ebb67da72487a52d364419199e91541a427c70d73c610e054b7fe6617c9ca715ee5d3e6e694d49f16331a067bb7df73
+Objconv.v2.53.0+0.aarch64-linux-gnu.tar.gz/md5/e9ead2b4711ff50a9c88272fc5b4f0a5
+Objconv.v2.53.0+0.aarch64-linux-gnu.tar.gz/sha512/9eeab07437e61af46d884d4d3bfa6bf7538c9bd996a8b217748ed70a2d53df25c20235af78d8767106603a3c92955a1f1870533d018d35e55ba8088b14e685cc
+Objconv.v2.53.0+0.aarch64-linux-musl.tar.gz/md5/c506fb9e5701a416c2204dd42a0e84b1
+Objconv.v2.53.0+0.aarch64-linux-musl.tar.gz/sha512/b6e2d91cddfe6d8c42d233e9aca1f322a4f4e12480ed2a0d148a327955b4dfcf049cb05e2151bc7a64d46ca3e3507eaae8e77416eb895bac16f394217f3e83ca
+Objconv.v2.53.0+0.armv6l-linux-gnueabihf.tar.gz/md5/2f353785310504e7cbed6bc8bad2cc64
+Objconv.v2.53.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/153e595c0385395d90cafd57d08ab1ab0cae9f0376c13f139c9dc6132763cfd19c64e74e8d50f33aa50e74173f3f57f11a11ad0afeef20c2f38c00badd339068
+Objconv.v2.53.0+0.armv6l-linux-musleabihf.tar.gz/md5/a5413f1376abb962de5665cebea77c39
+Objconv.v2.53.0+0.armv6l-linux-musleabihf.tar.gz/sha512/78e6778bdc4d31905a585ecdf87c499f7befad4ee4c46a2d26e6b1e9330d9175d3def536291a4bea004dd65cf95b8b751557afc40a1513a786502000c5a3769e
+Objconv.v2.53.0+0.armv7l-linux-gnueabihf.tar.gz/md5/9008ac2cbe75388fcd34cbaf523512f3
+Objconv.v2.53.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/ba877ea596965ab1ec419cc46e59d9010a96c52fba8036374e9df78157812926a34ceb3d29e997a5a27d38143d404343cbcf15c1e1b136835235b9a3c8543c77
+Objconv.v2.53.0+0.armv7l-linux-musleabihf.tar.gz/md5/86d14f65fb4f3c0dc0848a9d683f0fa9
+Objconv.v2.53.0+0.armv7l-linux-musleabihf.tar.gz/sha512/bd3a29982a16437936481b7244d237d9ec8d0cc7d78e50cc8f30c7c3f58475ba7a35e1fc0f1d1540d780b2573cfc2de2e59c8dd8d1f90de7b7fbb81a74f526a6
+Objconv.v2.53.0+0.i686-linux-gnu.tar.gz/md5/d22c35a1b47c64a291903f0ca5acc297
+Objconv.v2.53.0+0.i686-linux-gnu.tar.gz/sha512/b431d44839a628b75c189c5188f15462eeaf666a868e33c56e1c5f41041f1903e6daa3c4538b95a96a26c5842501a0055c7f2f12585e4f341ee2c8fb452855de
+Objconv.v2.53.0+0.i686-linux-musl.tar.gz/md5/dd583694f915309c8e7868cebe27f20d
+Objconv.v2.53.0+0.i686-linux-musl.tar.gz/sha512/279f36b7d1bda71ac37a1cd9c9e7776808499066fde94b456b000c187afa173fe7858894bcb4666831af2948c4363a15955740da77c01d63f6007627b61c1c0b
+Objconv.v2.53.0+0.i686-w64-mingw32.tar.gz/md5/42c7264dac94b87556dfb2d469c5c60a
+Objconv.v2.53.0+0.i686-w64-mingw32.tar.gz/sha512/a7eda3e7c5b3b19321d49264c6a7c35c526955ea7fc6b0201062f7e136eea6799a4d9269d78bd84770146d79fa3e49c1251a4568b5ffd1675c952e9544566553
+Objconv.v2.53.0+0.powerpc64le-linux-gnu.tar.gz/md5/33cbf415f36c0be59f34bc2e9b4c1226
+Objconv.v2.53.0+0.powerpc64le-linux-gnu.tar.gz/sha512/71cb373ade619cd431d64a8e097ff18e4979b39cff8b6be71b6ce77dc2dc4f663b25b1ddcd74e8d60834574cc3a5ed512618e6205df757d1f9752d78e35a0ac8
+Objconv.v2.53.0+0.x86_64-apple-darwin.tar.gz/md5/036603ce6fe365ccb838aebb6479ac58
+Objconv.v2.53.0+0.x86_64-apple-darwin.tar.gz/sha512/86f2f3e551bdc0595dd096925115671558970b4750c13d1ceec28fa47a3f61459cb1c4c57a5f930670035136f8cbd6073b1aced0dcc245f3a7815c26f14ad6c0
+Objconv.v2.53.0+0.x86_64-linux-gnu.tar.gz/md5/6a3ed48d4c8181934c02de60902b9f4f
+Objconv.v2.53.0+0.x86_64-linux-gnu.tar.gz/sha512/94f306e8053ac94d9c679a403fcecd0a5d3bbc7f62409886cdf45342fba3fff7c22a30679aec0bd9d76262a3ae753b8a68c243b32e5966f83557bde5d988d80c
+Objconv.v2.53.0+0.x86_64-linux-musl.tar.gz/md5/093aa158739b90fd5f5583868b5c898e
+Objconv.v2.53.0+0.x86_64-linux-musl.tar.gz/sha512/715ea896c0cfbd2505d1ae39f2d8c6950d56f0b262c108a0e378163a209e4a1581e2d7d1f3a7677d489baa21dea9e46695415edf7615ec66078c1a7978cc578e
+Objconv.v2.53.0+0.x86_64-unknown-freebsd.tar.gz/md5/a56c36c004cfb70f7feb7be0b681af19
+Objconv.v2.53.0+0.x86_64-unknown-freebsd.tar.gz/sha512/ea40c235145b00b730ea81e946a0be247a10950a564a3ac33a493c0fbcaa866f039ccc47b852d436b13fe3c52b7436f4903621a8c91e48d947cddfda42e6a482
+Objconv.v2.53.0+0.x86_64-w64-mingw32.tar.gz/md5/291a044f511f9529e9e2425aef8a7c16
+Objconv.v2.53.0+0.x86_64-w64-mingw32.tar.gz/sha512/4c43ed6842f53ab6c081bff2392e65b5295acffc7940caaa8d36a2f845368d37aa40259d0825f0ff08fad2ba58d5accd78dd96d51c8992396571fb96c81b1555
diff --git a/deps/checksums/openblas b/deps/checksums/openblas
index 1523372d709fa..b49af255be0e7 100644
--- a/deps/checksums/openblas
+++ b/deps/checksums/openblas
@@ -1,94 +1,94 @@
-OpenBLAS.v0.3.20+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/036acd7c7b68432f01f2a980bc4958be
-OpenBLAS.v0.3.20+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/db2c995b09b5ab046491257b44a8806fd5e254bbf4b4df6e9281ffc8d199745a3d6fea912da2fdd657447e194c73db52cf7acb348b49fd37758b6fbbbdfd3a93
-OpenBLAS.v0.3.20+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/7c5de800082f39fea05d1fdf9cdf2e79
-OpenBLAS.v0.3.20+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/78775b01c1f24848da6111d9f4746f0b44f5966aa202af00182c4da649e4b4cf630cd1bb90e8ed32f54dfdbee0f6d03b87c171f03fee9b37886634a20546d627
-OpenBLAS.v0.3.20+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/eefc198718aa837a04e0f8e6dbdc8b0f
-OpenBLAS.v0.3.20+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/cdc351d992b795732e02698df8f5f31c301dbcd6d995d2a35790461b08f3c942d70e8f7c031a943873eead4fcbd1e73649aafdfdb7450b955f4848be2e9a43de
-OpenBLAS.v0.3.20+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/8d9ced4a8e441713ceb0d79b72b43ca5
-OpenBLAS.v0.3.20+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/b1dfc3f4a539d01266e3e5d400864cd445c4bc561de464e2f6c9eb5704541aa436944f6bfc89be1948e9675f1a83098d77fe52f70886dc90d54206c81f350277
-OpenBLAS.v0.3.20+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/fa63d8009ac2605208ceea9f6183acdd
-OpenBLAS.v0.3.20+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/92b8e2fd2bc45c60aaf8d79c59f96b721d969cd3320c0b04989a5a48099cae213fd4a6aa9dca45910d881e495d87863513b23ee7c433c894655cf72c7b009323
-OpenBLAS.v0.3.20+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/68672f9cbcd9bee92c89b19599897034
-OpenBLAS.v0.3.20+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/4c19f3cb7afb52cd54c3852fef3815a23e57b5c2ebd9b647ad43ee62191b74474c787b22d6213555f38b8233b96d479631881d522c7bdd544954a9f04b51c509
-OpenBLAS.v0.3.20+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/7fd9458e1482d46f761d6a519999a648
-OpenBLAS.v0.3.20+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/2e20c845deb5c87c6e02a3512728a27204193a764f8ead1a66ce053b66d03bb853bbf40289727b1b635b17423416a7a69c633242c12f98d3ec1eae5e82a88613
-OpenBLAS.v0.3.20+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/0868668b73c84e14edb634482d59eddc
-OpenBLAS.v0.3.20+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/c87f91120db8d3b32cc12077b1e36110f89253fde22aae9de88945fc731ee74271acf31cabac9971635725f586b65cf6b1b9badebcbba5408b0ff4c68b580ccf
-OpenBLAS.v0.3.20+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/9e84b7585acf2bb71781002b2238d888
-OpenBLAS.v0.3.20+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/14b57f9d5691997cf01bc6187a1a1d58d07d162ab8eb2a480e7c42f0cff1583161c8b1a059c9eeb83e7ed276c8ffe2e193db001a3b51724e5af24c72f5e33572
-OpenBLAS.v0.3.20+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/a4768ea555e68fc755da169f1c7eb21c
-OpenBLAS.v0.3.20+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/490ce2b60cda0b5ed40df103e79b83ab75dd03779ea88b0ae5d3b76acadcf4810b35f69566e396b438d881130e43fd0dbff1672d0383dc7fe275f44574d8830b
-OpenBLAS.v0.3.20+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/1a4e7e7cfdefcd878c18bab39b9c80cc
-OpenBLAS.v0.3.20+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/15b512728b49782717770f044958ed3afcd54d6cc70b362a7c96dbadf7599bdcdd157ee021287a70e45957d0a856417540e64e2399cc392b9de55036d607fa29
-OpenBLAS.v0.3.20+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/63ce4aa67d1d56f2cf456285546d3eeb
-OpenBLAS.v0.3.20+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/ac0bd761ef574d3533fa7f6110b9ecf992edf7a68c20fff4faf4b7372d3de4c5ed558119dcdb669296aab5c0da5ce0f51f54abfe998958e1924cfa0eb958305e
-OpenBLAS.v0.3.20+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/581bcbd14328d82258511f8b91d8bf84
-OpenBLAS.v0.3.20+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/be66567c762f70885b187dc8912f83003c69dd5000387b5b82162ba9f47acb17d855f8f5bda2f31d3fc7e01d2aae3cd6b2392632d70ec34f2d648010a8b11f38
-OpenBLAS.v0.3.20+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/30dfd96f7f3d35df95e70d506f35c9f2
-OpenBLAS.v0.3.20+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/84213bbff84899882ab43599f3aeab1c6e3ee8f7158a3873ec2d6a3166e69036c16d742d25c476468f64b6644a2f798485e50427139880f1ae933ad507a2952c
-OpenBLAS.v0.3.20+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/4b82a4e68a43d29538a318763004aa94
-OpenBLAS.v0.3.20+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/94d35902c34e6fa68a0648cab65db49650c73ed21d69ee667350cbbb81028413b92fc30e16504648a6b42039f483d327264a3ff39d546cd30241f4672f9300a2
-OpenBLAS.v0.3.20+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/7e290717c23a468383bd66b46eb58fac
-OpenBLAS.v0.3.20+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/432cf42a320a265b9259d743eaca75b884663877858149b0feb83948436a941940955c0c89c6de9ca114f0bbf153127a046813195f4669a81cab1ce244cc5a6b
-OpenBLAS.v0.3.20+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/f72bf36862607c57fc9cee5dc3f94dac
-OpenBLAS.v0.3.20+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/caecc044e25d2939eec45924d69e64d3854fc54626a56126454fb3855ae2dabf36fc248d7ef9d240f15e8883787a43539e2a0d8dc68fc5c93a094ded94f3b976
-OpenBLAS.v0.3.20+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/66bfd606fc80e02999ad44243d3b686a
-OpenBLAS.v0.3.20+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/b3d76ccf40af1de018e829f5dd696c6d18ad1fd96657a06d190a9d4e939cad5062a3a2ffaeca2ce7f75e822694ae0b817568dd8f115e089a59590bb34af264f8
-OpenBLAS.v0.3.20+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/ef7aca842a623246b4e2876ff28c53ef
-OpenBLAS.v0.3.20+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/a59feb34806d651a2a3614bcc5203407db626e96dabeb6bb12b8d73915cfd87dc02b0e54704c5d0f1b8ab984d85ee64509a934884640d2522fc4a9835989aed8
-OpenBLAS.v0.3.20+0.i686-linux-gnu-libgfortran3.tar.gz/md5/f2ba9ed0f68447aeddfcf3ac883cf83b
-OpenBLAS.v0.3.20+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/1b6f300febf5ceeb0045c46cc3d6e9f2481cba2ceb97dcafff1667f06b8b96a2ad4975853e6bc2e3e6715ade28be5fb569fdae005f4fca2140a5557d4a0845ca
-OpenBLAS.v0.3.20+0.i686-linux-gnu-libgfortran4.tar.gz/md5/b39347f487b46996de98d9a453ae804e
-OpenBLAS.v0.3.20+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/a923a92467b4582f69ec9d96556c8f2ef55a3f99dacecf0491da9740912d14d09a9ba86bdb5fcfbaab87250c57a0c077c2f6ccc08bf3236ba5c7d98822e9c32a
-OpenBLAS.v0.3.20+0.i686-linux-gnu-libgfortran5.tar.gz/md5/6d9b4adf3fa54151c45b832b5869409e
-OpenBLAS.v0.3.20+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/f15583c15fb4e4b6a38353fbbce2aa57c8f46d58e6c5464a685e5fb0afd76f1bf9b3986c1d34af643a8c9b3a8a24ef63389982c2e8ffbf91a63e8f1ccca2cce5
-OpenBLAS.v0.3.20+0.i686-linux-musl-libgfortran3.tar.gz/md5/fa46f28f624e8c0752bb76abc04a41d5
-OpenBLAS.v0.3.20+0.i686-linux-musl-libgfortran3.tar.gz/sha512/76018ed804f25212760f1128f7d3823a1c8ba72b8cf5d83aa5be5c5f6e3de8076b04be9d5b659af75e3c2fd5cb9a0654dba59651f010534faf174a6c7d836cd3
-OpenBLAS.v0.3.20+0.i686-linux-musl-libgfortran4.tar.gz/md5/48411109935a2ada9d2e336515f36b6f
-OpenBLAS.v0.3.20+0.i686-linux-musl-libgfortran4.tar.gz/sha512/9be06c11fb248d6da47dab21f60d1eec6b486a137048f79f2138b5fe6818846ac198da7d73ab93ec161e8861d7e670b587b6eeb846c571497e96023934127903
-OpenBLAS.v0.3.20+0.i686-linux-musl-libgfortran5.tar.gz/md5/b0a81e44dd4a216c60b6ff139512d7b5
-OpenBLAS.v0.3.20+0.i686-linux-musl-libgfortran5.tar.gz/sha512/1b1c3cc5e62af6af8e106c60c59d7ff685d567e93dce19643ba8c0547200000bae96a3473573619ab235c34ff8e65745266001cdc868e948ff3ecaa9ba93389f
-OpenBLAS.v0.3.20+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/18988c19ea5bdb81d97f8ce4456319f6
-OpenBLAS.v0.3.20+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/466d6b05dcf00b6f09c1a8b8fda97a0035838d73d77954f6cd499358e8160af6cf3e8aac97d0f7ba7ced144db1362a9ba126fb113a4469c232a6b9706dc3dc32
-OpenBLAS.v0.3.20+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/d0aa399c07712e9a520a6cb8067bda63
-OpenBLAS.v0.3.20+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/7c3e0b1c18812719be4d86a641d25d927c9c8cbc6e1571c7a46ca27672ada00cbe3879faf0b5aeaaa0454907551953a20a56be0bc24b651df117532ace2f9067
-OpenBLAS.v0.3.20+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/90d51a2f41c11fc8d1896597dd106cd6
-OpenBLAS.v0.3.20+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/683c40193ec7a4612c4a36e9d9f6d9443bfb72dbfed7fa10b200305c94589fd75362670d9b4d7646f24b4f7933cfc55a2496030907e2d3fd30b0eed8b6a2d10b
-OpenBLAS.v0.3.20+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/30d5022d6f52adccfaf6b3dd837b6151
-OpenBLAS.v0.3.20+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/433a520458d6804eccf69c74fe357e6d819223b0398007f17420a6aa77a466177d9dcd4f467821b4d99f4397f5e0c1dc0864512a7f69c43f23bc40b6414449b6
-OpenBLAS.v0.3.20+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/2848232be1646333d6d413a588519d99
-OpenBLAS.v0.3.20+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/edb51d55f602d2a271109dbc12e59e23c232e58833bcc34dd857858d10d318eac99ba300fe4c6480b995e152ff036ff175218a2f4b29910a27f1861543d1e978
-OpenBLAS.v0.3.20+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/8bd4f4d571dc382eaf0084000596276e
-OpenBLAS.v0.3.20+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/f9507f6dc53c632e0f26de074bcd312956b2fb492e9f1d32e3cdf1a6099d6f2b17eea09ae825b2414a28dfbd6958813cffa289fde0a15cf7cba4e6b3653d2a28
-OpenBLAS.v0.3.20+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/c644f00642c69946d12b8f1f96a8e766
-OpenBLAS.v0.3.20+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/2bd51e9adda3e0955ab26c5c178e9a75a8d9c1b4cd2fd221bbb7b9eb72337cd5034f42b53aaddcf97a807e01f2b9836f9be95a5c6517c831374a3b5148b6e380
-OpenBLAS.v0.3.20+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/cea0d5ad3528298e4512c900a13f21ec
-OpenBLAS.v0.3.20+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/379ad13b723acde1d2239335c2611a9ebd2abe1432931d4c2395fce9f50bbd5d830a23fd5ea5afc1fc251704e4ed880468abde42bb0ea75b6bb0abb9a7753c5b
-OpenBLAS.v0.3.20+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/bc726288a19a8bdcef3205de12b5f172
-OpenBLAS.v0.3.20+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/3e26b8a2075f997ded8750d84e3257b895e7e05adac77d836e66fa7478b43368b7d4b7a458c6991cb642ce0d135b1b507dade7302c4f5a44aabe637849bc1acb
-OpenBLAS.v0.3.20+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/d162add49c7ee74dfc23b820bbd363b6
-OpenBLAS.v0.3.20+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/70bcc15f37e4cd822c2f95d8fd23e912829450825399d31c29c00a4ea219ca37f8831d3132ae4b5972fe9ec95c304bd1274a12ec8a8b289b1830cfb7ca0392d7
-OpenBLAS.v0.3.20+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/f036c51e0954b8b76e3023280144b5ff
-OpenBLAS.v0.3.20+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/2101747ec254f51fe5c2cfc49ce9599aeacf0d3e7bcb14c9ccaa59d8b0f7e9dcda98ab3ff38973817b736a33ddf654e17748d8a9c3b40e5352a198278484a2f0
-OpenBLAS.v0.3.20+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/143d8e7cf2fb615ccab6617bffa4acf7
-OpenBLAS.v0.3.20+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/6e72144f83cb329301feedea02581a100d137f3b209af4983500c432b6d23cc7473c85a7b1ba90e24965508e74a191b49cea8820b5899793440c3ce067acbe06
-OpenBLAS.v0.3.20+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/871863002d0053784a81409b4581c8cd
-OpenBLAS.v0.3.20+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/908936494c981e14bcd7818043efe979d9522ae1c9ebcd69feb853c46a2249da1cb5292844d0de7276762a21ad8680a1117229f3ad53332b536233d8722c4d85
-OpenBLAS.v0.3.20+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/ce4897980b12374801095fadfad11196
-OpenBLAS.v0.3.20+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/ba551942563a58fd22d182a29cee83ce5f51db10e52bc8cb27d979dc71632484e1acb713d4304d773c3111d5dba532bd65651374e91a364f8125295acacfffd4
-OpenBLAS.v0.3.20+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/301ae23724b44c1d10e4febdc6738df3
-OpenBLAS.v0.3.20+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/2f1479b1f1d10682751b025493bc38cd5eb9854620024b1f0ac45ba0f7a7621b4795c4c2f89eece5c80b671387d095b118d58d8ba201214f45bcea1ac64fca91
-OpenBLAS.v0.3.20+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/51088d57d2a9e9e50259128a0ac48727
-OpenBLAS.v0.3.20+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/c88b1eb662c16b75c46a25959f6fff22de2cfb2a97ff1c0cd482528e83d54a4d8bbf33c3e7d6a79ad75998d0c6d46ef6f245e8ad406d1a072907138d7ca4a34c
-OpenBLAS.v0.3.20+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/06167501fc4cc7b6587ead3696ef72af
-OpenBLAS.v0.3.20+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/a853a4c5163e0bc0266e75df0b208794e8439a008b625b520b51e7891825a355960f62fe2275e4f849c345862fabf0339d0d22d4bdcd87acfb17ffd65627f74d
-OpenBLAS.v0.3.20+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/73a43356c9bf374765a2bc8910e2eb49
-OpenBLAS.v0.3.20+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/0c2092789f4eeab1725cdfd7d308a2ede054b993d6d1a83f671c5c8e9f651565c282af7371c958c61a57679a233d3f62a287afb44225498dc31249f6821ddf98
-OpenBLAS.v0.3.20+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/46bd5ef0708671aeb2a533476a04591b
-OpenBLAS.v0.3.20+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/1b0a3f9e61101cbf455da70056dea75637f3008df727072a22150072e7bfc773294378fc42a492b2351f9af2d6b7866503c0039f8addeab07d4f4b5d0f42b5fb
-OpenBLAS.v0.3.20+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/7e412c9961e4762c40cca9c27e5c9aa2
-OpenBLAS.v0.3.20+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/6a275bd153bb0ba227f39ffbfe95ee1f84f42f79361f7d3a7b1a5c29ca253b8d8b2427ce389f10cf2b95fb87d91dcdf1144f24c82d11320a0aad7dfb8d3c0498
-OpenBLAS.v0.3.20+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/2a24ea7c7a9bdf8069d7f62c55d09bb5
-OpenBLAS.v0.3.20+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/7f9134df42be432199119b2a5ef5df2552247cca8647546fb755901d5903030fd5cb565c711248f173c71409cd3b30609a2adadf0213c9a096a9b70298b29a87
-openblas-0b678b19dc03f2a999d6e038814c4c50b9640a4e.tar.gz/md5/4586a405791fb16775eb9aecdd7daa59
-openblas-0b678b19dc03f2a999d6e038814c4c50b9640a4e.tar.gz/sha512/c34a498f2f1ecf65c5174a198022558bf6626eb6da0c4191762a35fd9d335c67dd17246cee3ef503301738a202650aaefe5e0073d8abefd3d1b8ba19cc953304
+OpenBLAS.v0.3.25+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/1acfa1d2dbebaf274812e36b59cdd0f7
+OpenBLAS.v0.3.25+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/4cf10b1e5a791dcaea814f4f34b328499405b39c68ed652569e3a070f14c51b935e18d04bc10440b19967a40d21d7ef4c061817a5e6abf1403613e25cd45ef2f
+OpenBLAS.v0.3.25+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/bf8cb41b65e7b60015b67727adca5c50
+OpenBLAS.v0.3.25+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/710646a8e7bcf1cb0320777006d328b20f90463dac29f37b0c0472d22a8e00e49bd8aad1a0e71772279bf452e5daef9a6e9b51a5ea2af806fcdcae2999039108
+OpenBLAS.v0.3.25+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/60ff3026347c9a53f88cc90fd0f29950
+OpenBLAS.v0.3.25+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/1dd600ac2b315a051c84529e4653aad77d43c228538ebe94c36b6f68ed14a15c5d7cfad17b49eeba4ce0ec857d7199b767c241a5e9863f7c60d5f516020a8d20
+OpenBLAS.v0.3.25+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/5b57d42de0fa82c123f792adb6835195
+OpenBLAS.v0.3.25+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/e534b6c26100a8c841a1a81dee75cd2ae8c34f5bc3ddcd6b5cc9afdc21d46419459046371fa448c5b9c6d02eeccfb64d10c887c53b52c4744e809c50d3b7d41d
+OpenBLAS.v0.3.25+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/d2417bfa94754ca621c875b926dadd66
+OpenBLAS.v0.3.25+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/249b95d0502f6f82ac7151a38380c8a6cf9314e856a9f545cc086b14eaa3f7804ff24eb8b3430144c3add912aeec8fbfaee40fe30e4245fcafec3e9df3a73484
+OpenBLAS.v0.3.25+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/d68bee54d083f93fa9a96ccaa0b10d6c
+OpenBLAS.v0.3.25+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/909d9d4a9581899c564f6a42f325e50ba7b82f1bf7d2912a9d47cc88e8e13ddda6dff8a40680720f8d0a4a1973f6b6c48ffd0ec51d0323cb08cc73268386a61c
+OpenBLAS.v0.3.25+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/f103f1d84b9bbac3d73d70efa79f8f8a
+OpenBLAS.v0.3.25+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/ad1bbfbfce09e7a7eb6d33e87f432530a3347d68b033d877029f8da9e290b129b2ab887eac510b27c9a1d9506679fb3e7e554dc0accfa05c5c950ae40c1501e6
+OpenBLAS.v0.3.25+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/8d92bf601f216b0512e1eb9bb0f4689e
+OpenBLAS.v0.3.25+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/8abd7f22db9d739c90df3c512ec4958989135e894d45ee91bf364e4c4a07b3dd8640351e596ef4b62f314bcf0ed22a35036499756611f5db1f98bdf1a3130033
+OpenBLAS.v0.3.25+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/c56a5861637867ca4d0a88e2d71e0301
+OpenBLAS.v0.3.25+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/301a30c2d5c1a6355516a0520caadf83f1e121e95480bbaf3c39baaedfb10902adea826b289f0e5411ec9bb12eeb0a63fb3630b1ef3780304689a2d2bb97add8
+OpenBLAS.v0.3.25+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/6c8f6582db03396ed30b41deeb706877
+OpenBLAS.v0.3.25+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/22e2c5ba0a9d6fe1dea222d24fe5da9b208525656185945a8ef90e201592e8c2692571bde7824651dde50850b0a81f25b5dfd8d4283a097d2f6193507128b587
+OpenBLAS.v0.3.25+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/546e7ec9501a6687784c9285b70cf2d4
+OpenBLAS.v0.3.25+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/8d306c18e810a22b637c5fa2f35b791b44aec67198b1a2632ff245cf5519cf138d2074eea84e85a348075b77b90eb5ff6342a920e66e508501eefd6c8cabfb6b
+OpenBLAS.v0.3.25+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/494c5b1a2df492723aedc05301958606
+OpenBLAS.v0.3.25+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/01d413b45df28b6c262d2e1c1859a499d10e24cbf32c8954b5fe5f66c6cbc13e113e7834db304beee02a31228f2c5c1bb83e83455551b63cc972cf0d604db843
+OpenBLAS.v0.3.25+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/0e14407a456d8354d4759140b8c2dcab
+OpenBLAS.v0.3.25+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/38b045a701d0dc1cc161877944a6db5154b36437d4122460727f489b05b902a7766e760ace5331a4056f804a8008fb3e123c905adae45839d1eeeadf1f209116
+OpenBLAS.v0.3.25+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/8d92bf601f216b0512e1eb9bb0f4689e
+OpenBLAS.v0.3.25+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/8abd7f22db9d739c90df3c512ec4958989135e894d45ee91bf364e4c4a07b3dd8640351e596ef4b62f314bcf0ed22a35036499756611f5db1f98bdf1a3130033
+OpenBLAS.v0.3.25+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/c56a5861637867ca4d0a88e2d71e0301
+OpenBLAS.v0.3.25+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/301a30c2d5c1a6355516a0520caadf83f1e121e95480bbaf3c39baaedfb10902adea826b289f0e5411ec9bb12eeb0a63fb3630b1ef3780304689a2d2bb97add8
+OpenBLAS.v0.3.25+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/6c8f6582db03396ed30b41deeb706877
+OpenBLAS.v0.3.25+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/22e2c5ba0a9d6fe1dea222d24fe5da9b208525656185945a8ef90e201592e8c2692571bde7824651dde50850b0a81f25b5dfd8d4283a097d2f6193507128b587
+OpenBLAS.v0.3.25+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/546e7ec9501a6687784c9285b70cf2d4
+OpenBLAS.v0.3.25+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/8d306c18e810a22b637c5fa2f35b791b44aec67198b1a2632ff245cf5519cf138d2074eea84e85a348075b77b90eb5ff6342a920e66e508501eefd6c8cabfb6b
+OpenBLAS.v0.3.25+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/494c5b1a2df492723aedc05301958606
+OpenBLAS.v0.3.25+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/01d413b45df28b6c262d2e1c1859a499d10e24cbf32c8954b5fe5f66c6cbc13e113e7834db304beee02a31228f2c5c1bb83e83455551b63cc972cf0d604db843
+OpenBLAS.v0.3.25+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/0e14407a456d8354d4759140b8c2dcab
+OpenBLAS.v0.3.25+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/38b045a701d0dc1cc161877944a6db5154b36437d4122460727f489b05b902a7766e760ace5331a4056f804a8008fb3e123c905adae45839d1eeeadf1f209116
+OpenBLAS.v0.3.25+0.i686-linux-gnu-libgfortran3.tar.gz/md5/fb62eeaa26adad79f27a4871abe4c8f2
+OpenBLAS.v0.3.25+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/1712755c80f7d48052fdc2ffd254b745ddd5d9a90bcf2f25532448d258317664895efdd8b92054a17271349e6371449b3dbf1cbe44d2bab2b2a11e77a0f8207e
+OpenBLAS.v0.3.25+0.i686-linux-gnu-libgfortran4.tar.gz/md5/bbb096eb2294179d4bbac42fa08dfa04
+OpenBLAS.v0.3.25+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/6de33c7a98077c82d2d730b2a3046f9d84cf779c46527e70b4e862c2b51b7487d3b5d66eb1693235ab19bb9b254f85677707bfe496ca96e8b2f9840413d52d86
+OpenBLAS.v0.3.25+0.i686-linux-gnu-libgfortran5.tar.gz/md5/25e6e7f428ca9d817ce06dfd1ce440d7
+OpenBLAS.v0.3.25+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/3d7d12c9de5d435fd7d9b8919c6b511d3a825e02d9d303a2222f16d13f7c93b620e6099fb290cd36648f51952c187b9100753e107530be2d40c1e006f44ac453
+OpenBLAS.v0.3.25+0.i686-linux-musl-libgfortran3.tar.gz/md5/0f09043138753743b33bdc1598ea2f98
+OpenBLAS.v0.3.25+0.i686-linux-musl-libgfortran3.tar.gz/sha512/a43510a70e63b534812fb0213af75743c0c78f8d153e61cfa0b3ec0b5a51172db3d3ca5b40429ccfd2fe99eb730db661566d61f825f0c97d079f75933ab17385
+OpenBLAS.v0.3.25+0.i686-linux-musl-libgfortran4.tar.gz/md5/b8c78d226bf548306cb0de09c296fcfd
+OpenBLAS.v0.3.25+0.i686-linux-musl-libgfortran4.tar.gz/sha512/165b85271b686da7daad21e2d3be683efcf8beb3208d2ba73e753cdc8405236c673ceb5713f84131a5b44045721c98de70242f600f7823c1597d38156f692ca6
+OpenBLAS.v0.3.25+0.i686-linux-musl-libgfortran5.tar.gz/md5/1f508ddbb3ef7aff7880b34a8caeb032
+OpenBLAS.v0.3.25+0.i686-linux-musl-libgfortran5.tar.gz/sha512/9f603451d957483b5be45dd8d705f0a45f0fa3fa4e32d9431a15b7dc986fbce200d7d8966717564c588ee3a367c949342b63f03b81de387bb90c691cb3b0d2a5
+OpenBLAS.v0.3.25+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/e85605ee1a4d33d43a2fb2e516eaa99f
+OpenBLAS.v0.3.25+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/98512575e60b34df6fe0a2d3f4a0a57d8f9ef1ef7a65b014427776104633acffbbd48bdb8aecdeb54dca1bdee39e1efd6f677dd68028911f2dfbe55f1180f054
+OpenBLAS.v0.3.25+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/673c70c7f29dab76a154b5171f6112a2
+OpenBLAS.v0.3.25+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/4ec6313ddbdcf3a98d48bc8510d5c120fb3232433738a28e9ccd9beec1e2a0ef401c625b0b97702cd7be7fd6061e2f8ed48f35ae36b46495d952053e0accaaae
+OpenBLAS.v0.3.25+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/7242fefbdb29d0ef5f08f39590fb204d
+OpenBLAS.v0.3.25+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/be253db33b2345388550d3e82b8cea4e9416874df5e01b24e96796faace87440178a10e93a47d228b4e31e21754de8b42a067e69aba8ab6267a19e7197f3d54c
+OpenBLAS.v0.3.25+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/1fd83d8f353195c7ae1a1fa9ea266171
+OpenBLAS.v0.3.25+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/0cb87fb1dbe6af1f1aef1bb3e346078a9de39d2ef33a51c99c0fafe67cdb494b1aae567429a9bec8fdfd41c651afe63815b7ecec20ea95fb3d0bedf06eb78188
+OpenBLAS.v0.3.25+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/f2a4141dea91feef792036dd149e552d
+OpenBLAS.v0.3.25+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/58dc721598f36380c65f34cb5d3efeb77bb03605afb82bb08689674893cb399af34dde47002105f072422d031df8f629fbfb4f463a9900c3b09fd6e98b3d007b
+OpenBLAS.v0.3.25+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/f472322bd6f463f335d7b9c5b2ae69be
+OpenBLAS.v0.3.25+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/b9530287d7972b599535b4d78a2540e00a49664660c077cf7c50ebcc86e79fb7fb2e6ddd2d1f3e80c1f4f6d6094c4f4a0641e3112994f7b770ac14868c704ec0
+OpenBLAS.v0.3.25+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/28c498d970d4c659d47981f915448a2e
+OpenBLAS.v0.3.25+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/f5a5d9d98013b39b26134910ba9e2cbd23caf1bfca1cb9867e972753081cafd90edfa5089747d94b87368aa4cecffcb40314326b82a47ddb4eafa3a63437482e
+OpenBLAS.v0.3.25+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/2888ac4b4ad4f693b652c0054187360b
+OpenBLAS.v0.3.25+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/0292f8d1ecbc5b357c1b9891944831cf652d1937d62a4d7486dc5c4dea3a9fa54bd1f0d76ba5553fe009d09e68edb79ee1310ac2b296ba61e33ea1ed794f4713
+OpenBLAS.v0.3.25+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/f3868395464c24b87fe543a630799cb8
+OpenBLAS.v0.3.25+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/4790f559458a301cb4ca9ce11bb2004b2bc0a72f7cf88c7dc45572f7c56adb154f7ab76a72e463ac0871019248e4b6ed4129356ab036bc71bee3700235f9f805
+OpenBLAS.v0.3.25+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/83fde9ea432e7c87413ebedea4062070
+OpenBLAS.v0.3.25+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/00428ed95ef6dd1191911e768d929d9a511f34ad1f098c6311df7f2176fb82d67e763326da95fc390ec05854a7a1482c3e2e4efd4b1975653c45a825b4b3ef68
+OpenBLAS.v0.3.25+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/415f468e44c3762451b39596eb6dd45d
+OpenBLAS.v0.3.25+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/bda9c30547cc219fdc00d3854ad02708d225a46850c7a95b786b527722e75185787ab2353185007cb9f692a4cd5a3c38d64018a46ee4d400df9d4612371a2a9b
+OpenBLAS.v0.3.25+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/7858a2a1c13b6707f6bee3b021811728
+OpenBLAS.v0.3.25+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/0a0e4995652e9459dd54325ee92262261244ace5f065f293459ad91433345efbbdf1e51996ae2629968498a8fc34b0fb64fadde7bc36498a48eeefafde987d5d
+OpenBLAS.v0.3.25+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/30f5deef5223ee198acb8c213a7c89b1
+OpenBLAS.v0.3.25+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/a933e33e6070475450312d0608626811bae98e129adb30d84cf5a3c7d525217e805b0ffb88490931a927b1fda1c9479af953282e47af1f469f993d4d2150da95
+OpenBLAS.v0.3.25+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/86fd8946da2bd4fb97f54c8df5883d5e
+OpenBLAS.v0.3.25+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/d61187e632a0d49c89854d441b9b42fcd9dc7517f4f38514cd17f599c2f9ad3600054bf8457d0e9fb766fe9c7d0c248cd3e5383be2600ac2c538f2d524a8e1de
+OpenBLAS.v0.3.25+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/d95539fa91c80db375310ed09d191379
+OpenBLAS.v0.3.25+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/ab786856a1c4ab47808e9ea2167c6ae42e69d6a11e539b7771464bbc0c7bb1d5482ed2c3c0154fae37bfb1579103e862637675e4d81726d4e3ed633cd7548cd9
+OpenBLAS.v0.3.25+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/09ea7329e39dac562e45f00e6b135ad6
+OpenBLAS.v0.3.25+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/c0b8b3b6f7c9a326b857e218014bc232165122865c18811bc5c9af28b54c494a4a0ed139101cdf30ff5e8b03f9e8bb4ce32865ab5ba4e5251f841a2bf612eb86
+OpenBLAS.v0.3.25+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/f874721e0ee45094c62cf4a006c5309c
+OpenBLAS.v0.3.25+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/03d57202ef4209f23f83a5faef01c772c2c6b04085d8448b835eec793b50b9f2f13dec2ba1d0a7ac55f6be7df8231a04163ac6d1fccb91976e965fd10bc0e3f1
+OpenBLAS.v0.3.25+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/69de0e673292ccaf3294e785a96cc22b
+OpenBLAS.v0.3.25+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/9ff1dd63a62d5f0d771833a7872d0abdf6e2307aabf57d7b5f1469f75aeda99525beb5a4c2f311cef0b8af3905daab321435b613c9e046a7d7c5c30e2548ba0b
+OpenBLAS.v0.3.25+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/2cd93681be2f2f781285ca7dbef7eba1
+OpenBLAS.v0.3.25+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/3163b20884e9be0a6d99735c400ca58f52f097fbbcb396be0ce9c29b3d4adebe5cee4ddb91f095a692e1f812c5558690a603bd455c5576c374e7c49e6cfaac7b
+OpenBLAS.v0.3.25+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/751aff51cf7674bcb68fe3bb94137603
+OpenBLAS.v0.3.25+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/b5fa7477f9033f5e55214fb4e0bd3bf923e2c580c6582b7241739237dc50d269448459efebf75da82415ceb2f547033210f118d9b8c9ab2585fc792253331309
+OpenBLAS.v0.3.25+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/de458200baed33ba568f8222a9b72865
+OpenBLAS.v0.3.25+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/d6acccb3a10e3eb01be2aacb4d77f084f022fef3ca3bbf05530cae5c13224b4ab12b62e8e8a3ce1ef6ab0b2cca9e88c687794a8620fe0af1c478323976268d5a
+openblas-9f815cf1bf16b4e64d4aee681b33558fc090b62a.tar.gz/md5/d890367a6452aa209f396359fb770bc7
+openblas-9f815cf1bf16b4e64d4aee681b33558fc090b62a.tar.gz/sha512/0627960fc2582e08fd039c77aab1f4105f18edb1beba52176a791813bb3f1ebb04f52d174814e156826c45c6adf5f3c1d2242e81fdb520a7fff2170cb1277228
diff --git a/deps/checksums/openlibm b/deps/checksums/openlibm
index 4c8ad913fc58d..452abb133c671 100644
--- a/deps/checksums/openlibm
+++ b/deps/checksums/openlibm
@@ -1,34 +1,34 @@
-OpenLibm.v0.8.1+0.aarch64-apple-darwin.tar.gz/md5/cbb86fb881fce48d837e40017272f7f5
-OpenLibm.v0.8.1+0.aarch64-apple-darwin.tar.gz/sha512/3fc7753ce12e0012b42e469084351ec97e20c1761b50add48af4b4d92ba61b9db4a015243f31f7ec9bf322a9a4e098cffad31cd4501424568bb49fe973b63a35
-OpenLibm.v0.8.1+0.aarch64-linux-gnu.tar.gz/md5/416b30b38b91a76d1613753ec85ac4a9
-OpenLibm.v0.8.1+0.aarch64-linux-gnu.tar.gz/sha512/9ee5821ee83cd5e29c4f338e0076fe3e3705925e8556abe95f0356151ae93f23be0bbcde42cbf6b382e8c29a201959cb31be349ef3d1447e2d19a414a476fc55
-OpenLibm.v0.8.1+0.aarch64-linux-musl.tar.gz/md5/b170fc0058803377d7c4d7d0c9e4b954
-OpenLibm.v0.8.1+0.aarch64-linux-musl.tar.gz/sha512/be311888953887745a2d64eb3d1d5755e2f37e9f46558c6f6722520c503ee23d3470e97d7bf28f0a03915a398c31080e789d6e1287a2b743b6fd3f37b3a2911a
-OpenLibm.v0.8.1+0.armv6l-linux-gnueabihf.tar.gz/md5/63f60d2f13acc6fd2ba3854a8ecf2b0b
-OpenLibm.v0.8.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/1853a8346f460cf7e26efefb27c36440976e40f000aefc22a81bb69bb25d07675a19f4b64c4dea8fedaaae76925467739cee8cd5743689ae55512e57dab54286
-OpenLibm.v0.8.1+0.armv6l-linux-musleabihf.tar.gz/md5/5b410e9611b188f34fcc5314c45d2297
-OpenLibm.v0.8.1+0.armv6l-linux-musleabihf.tar.gz/sha512/ecb2fd14728c40c7e3d2cf7c4f1dc06568f1dacc60677218ec59011cd913cab173c33db1c402a8b27b8f0556ca66667ebc033130222617cb4f5d9d8cfe7431ed
-OpenLibm.v0.8.1+0.armv7l-linux-gnueabihf.tar.gz/md5/0fc3732640b6bfd52759e74be75e2290
-OpenLibm.v0.8.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/81214ec825d33e37c9e889bea054f3aa72466e5fa585356247ef0ec4653b548f7836219092a8c7f0bc3c694e97582012cd026325e0b1c1a6fc113c461dfe49f7
-OpenLibm.v0.8.1+0.armv7l-linux-musleabihf.tar.gz/md5/7ba7a7f9461b43b8ac622b9fa9c0489a
-OpenLibm.v0.8.1+0.armv7l-linux-musleabihf.tar.gz/sha512/e088f806f9fad70b2e6ea28a39ffeb083b4c1c215b1cac73e848a06cb19efcf3ff100e4d401ec2a0ed225ecba6dad115f5d245a612a39c90334a753bc79947e6
-OpenLibm.v0.8.1+0.i686-linux-gnu.tar.gz/md5/4a2eb776682551a25bf1d27e9d8b645e
-OpenLibm.v0.8.1+0.i686-linux-gnu.tar.gz/sha512/716808c4a2a8c06439072a39db1f4b93a171a2f42e9677cb7f3eba04f204bc181f96c915ad8c74141952eb783cd82ecf3804ea60d830e3f5d9b88bfb4924223d
-OpenLibm.v0.8.1+0.i686-linux-musl.tar.gz/md5/1acd5b790b7b8d1c40c6b094b99fcdb6
-OpenLibm.v0.8.1+0.i686-linux-musl.tar.gz/sha512/22c28a5c5e9542ddfb23297523b56e0a964bc322d252d5681e763c28d4c876dd683d3456297e385f560ab4cf378e5848047aec6cc934850fd0a4df0ea6967b44
-OpenLibm.v0.8.1+0.i686-w64-mingw32.tar.gz/md5/8e974b3bafa9dfe1cdba1d31049d7e85
-OpenLibm.v0.8.1+0.i686-w64-mingw32.tar.gz/sha512/df9e5250dea575341ec1a40d94e3485701690542bc7dfede0504c04fdb7f3fd272d88debdd6546d8644563fb244373b5f4e214da1d0b0133db5b66cdafbf441f
-OpenLibm.v0.8.1+0.powerpc64le-linux-gnu.tar.gz/md5/83f68736e911b7c700bf7a8c79cc48a8
-OpenLibm.v0.8.1+0.powerpc64le-linux-gnu.tar.gz/sha512/b879f81d0c02f457310efc08269a7012fe6ed479d33bf79830e48dafce03976425566c5a210ed4833e106848eda038ae531f0c956971b3139c60002a5b1c1325
-OpenLibm.v0.8.1+0.x86_64-apple-darwin.tar.gz/md5/2cd7845dc3d5558cf77e8b6faac4a659
-OpenLibm.v0.8.1+0.x86_64-apple-darwin.tar.gz/sha512/f894c5b7541ebd4f652cb0028b2d367db6af13258d5a42722f19e0ac3a6bc438453e5c2bd17d8b491288c93796ba638c4a3a247f9d33abbac392a6db9169cbcb
-OpenLibm.v0.8.1+0.x86_64-linux-gnu.tar.gz/md5/eda96ecab33dfb9a015375813c41d14a
-OpenLibm.v0.8.1+0.x86_64-linux-gnu.tar.gz/sha512/641d35dfde15b0868c4ede7d2210ac136e4ca7bf7a969623cbecd792eb09e60824601a6178dbc2c8e4d49523aa105956b5f63566b789003ec736164a8fe5df4b
-OpenLibm.v0.8.1+0.x86_64-linux-musl.tar.gz/md5/8d8e6ffa1ad9574bd8ead8b99754e122
-OpenLibm.v0.8.1+0.x86_64-linux-musl.tar.gz/sha512/a66102e69688fdda1c1a51ab07697db0e183b6def21dfed411cd6a92e6c23b22eacd6cccab16e43b86e318d967f81d43dc26bc778746186965bc68c65c7e87a0
-OpenLibm.v0.8.1+0.x86_64-unknown-freebsd.tar.gz/md5/7634eb65eea08ab48a9224295071f058
-OpenLibm.v0.8.1+0.x86_64-unknown-freebsd.tar.gz/sha512/f2c0a6d5113de911aeb516308b1d0ea71a1d61f2ce54c367670a6567444e0030babd66546489e288891678aa6613f40fd0182261de3244f1aed1085c2a32e81c
-OpenLibm.v0.8.1+0.x86_64-w64-mingw32.tar.gz/md5/9de4a420caab2cb53ddf86eb0241885a
-OpenLibm.v0.8.1+0.x86_64-w64-mingw32.tar.gz/sha512/928675df75d6c6f1e1acc46a2bb554ef120d74671d23e8682adbe05732f668401eaebd98d534e54f7f34e1657d2e1b1d19e18b3822faa891cdf06afd0c3ee56b
+OpenLibm.v0.8.1+2.aarch64-apple-darwin.tar.gz/md5/9ce53048e8944f6edff44f75b731229c
+OpenLibm.v0.8.1+2.aarch64-apple-darwin.tar.gz/sha512/3a14e28db0656b47a473e19ca0afae1f8b72dd01e108d6b6cb52dc24fc03e4a43db867616b375369e82177bb274fbcfeb8f24b488ee68871e8da8463e9090adf
+OpenLibm.v0.8.1+2.aarch64-linux-gnu.tar.gz/md5/8b284fe2905c3e5315291f5e5f27ca8b
+OpenLibm.v0.8.1+2.aarch64-linux-gnu.tar.gz/sha512/d326181349ee7f74b73611cd71f933e93c38c11d6db9a1cd4fee49d1ac06c7f244f4cfc6ab373dd52909064117405b3d4fa39e5c626464c066ab53f1cd26dc4a
+OpenLibm.v0.8.1+2.aarch64-linux-musl.tar.gz/md5/dc40ad1f2e53a3b914dcca364b6ead77
+OpenLibm.v0.8.1+2.aarch64-linux-musl.tar.gz/sha512/3779d8cd23c5987a15666e2160e40f5a6fc5e7d350c9e3c86d8af8c99515a8cb1f3b5e8438dae0f3cf0b5e1cb2c0cb74c5dd5a06c65e0c2a2382d86dacfaf9fb
+OpenLibm.v0.8.1+2.armv6l-linux-gnueabihf.tar.gz/md5/7c9e56f6124b85e7dee74601f8c16abd
+OpenLibm.v0.8.1+2.armv6l-linux-gnueabihf.tar.gz/sha512/a78e15177992025462d334a9d5b10b9c7f6710d77ac36056fe7a1cc3bc3fada87f16696366578cfa5f325d5f746639c41c5d80b4885814014d29556d63bd4c7c
+OpenLibm.v0.8.1+2.armv6l-linux-musleabihf.tar.gz/md5/78d9e3178fdf93a35f7d2b0b00753dc6
+OpenLibm.v0.8.1+2.armv6l-linux-musleabihf.tar.gz/sha512/ff7b78786f7035eaa08770ddf7d4eb2984595a318c3ac4dfbe4091ca398e00638df2e77bc2ab5fd159defd0927d4fe46b7e824cf055fbae4860bfa12347e8c5b
+OpenLibm.v0.8.1+2.armv7l-linux-gnueabihf.tar.gz/md5/7c9e56f6124b85e7dee74601f8c16abd
+OpenLibm.v0.8.1+2.armv7l-linux-gnueabihf.tar.gz/sha512/a78e15177992025462d334a9d5b10b9c7f6710d77ac36056fe7a1cc3bc3fada87f16696366578cfa5f325d5f746639c41c5d80b4885814014d29556d63bd4c7c
+OpenLibm.v0.8.1+2.armv7l-linux-musleabihf.tar.gz/md5/78d9e3178fdf93a35f7d2b0b00753dc6
+OpenLibm.v0.8.1+2.armv7l-linux-musleabihf.tar.gz/sha512/ff7b78786f7035eaa08770ddf7d4eb2984595a318c3ac4dfbe4091ca398e00638df2e77bc2ab5fd159defd0927d4fe46b7e824cf055fbae4860bfa12347e8c5b
+OpenLibm.v0.8.1+2.i686-linux-gnu.tar.gz/md5/e9942dca99f024ae27876ea5ab1592a9
+OpenLibm.v0.8.1+2.i686-linux-gnu.tar.gz/sha512/406e39894a643bf99c493585fa631800bbbcd6c36aaa9e677de772f7ceaed93b462fdf797235174e22baf2f34c26527f400e282061954b34f05b389acaba1e29
+OpenLibm.v0.8.1+2.i686-linux-musl.tar.gz/md5/0037f2e2113282d49967eba72f215c4b
+OpenLibm.v0.8.1+2.i686-linux-musl.tar.gz/sha512/96666332a814232084340791384505acf964064dba4f7b62db51a7ae4416237decb40318dc07b9a041547fd4ff77f204f42bc5c7f029e590af1ee1dd6196d843
+OpenLibm.v0.8.1+2.i686-w64-mingw32.tar.gz/md5/73193f2e5149d07008902adfbf1b74b2
+OpenLibm.v0.8.1+2.i686-w64-mingw32.tar.gz/sha512/e8202b59b8f922bcc908b8b8e6687a674faa701689f5c6175d83fea0bcc5d73f74bed37660e60406f37873dab1d8489e0fd1506294791adfa61a069555eababf
+OpenLibm.v0.8.1+2.powerpc64le-linux-gnu.tar.gz/md5/01997fb48464f94f59f4708bd26eabc3
+OpenLibm.v0.8.1+2.powerpc64le-linux-gnu.tar.gz/sha512/1e1d8901fd3aab0948be5c387b8d5bd0db12766fe00bf800ee3100aa0d5973c7aa03ef9c9b4e34942e5e2b46b64035d7f8d7b070113db031d4611f2a7dd02ca3
+OpenLibm.v0.8.1+2.x86_64-apple-darwin.tar.gz/md5/6cb5a472d6c1446acfca11bb8f7283d6
+OpenLibm.v0.8.1+2.x86_64-apple-darwin.tar.gz/sha512/e52f399002544d94536c3bda742d3cc5b0995929d656eeb0e808954fb800fd8e5cfc0ab57279fbccab44fc33a1207ab345d78e685d519ff7f02cca8f554b9c06
+OpenLibm.v0.8.1+2.x86_64-linux-gnu.tar.gz/md5/e1c7dc61e98d5b8aa68de3462a2620a4
+OpenLibm.v0.8.1+2.x86_64-linux-gnu.tar.gz/sha512/fe6d74a2522d75374b87ac9746d444d75a768e069f24f3fbfc6a140aa9d073fa54e8899861f839e647b9261e660c5f2b5555f52fab39ef84a74685b632e89df9
+OpenLibm.v0.8.1+2.x86_64-linux-musl.tar.gz/md5/5fe8eb59d21732a80f432720419324b3
+OpenLibm.v0.8.1+2.x86_64-linux-musl.tar.gz/sha512/0d1b22ca01eda89caa1832b63b1d7ddafe0fedf5906680e817100e2176cbbae95f576409706a9ea1834bc692b72009f4fd244586df30228d18e626bf25fc040a
+OpenLibm.v0.8.1+2.x86_64-unknown-freebsd.tar.gz/md5/2bcdf32fdef91433763e32be029814d9
+OpenLibm.v0.8.1+2.x86_64-unknown-freebsd.tar.gz/sha512/97854736fc8c797abd5a5c331e5795dfa9124ac108a76fc2bcac518f5750a08884717d611bb98222b13387bcd27e1c3f4ec841547859e87fafbbe8c7dcd7381a
+OpenLibm.v0.8.1+2.x86_64-w64-mingw32.tar.gz/md5/e22079c6e610c9543cca0fb88495d989
+OpenLibm.v0.8.1+2.x86_64-w64-mingw32.tar.gz/sha512/67081bcf360a62eee3928bd1b9d5302ed29b4a176245721723692d5ef938a828379617847308f26a2c7bc0cb2d0dce129d4b8c65c0446c611126894c0aaa5ea8
 openlibm-ae2d91698508701c83cab83714d42a1146dccf85.tar.gz/md5/19408d70bf042a109e1c267a53740089
 openlibm-ae2d91698508701c83cab83714d42a1146dccf85.tar.gz/sha512/9597fdcbc4af8369e6eecc3f8e86f251661cc64d236578f3ee8a6b39e77a47951446e1a0fe1151513da153e7ed17bf39aa5a36c32153d0d0400232bed2839e22
diff --git a/deps/checksums/p7zip b/deps/checksums/p7zip
index b3c24a811a043..3a3986977e3cf 100644
--- a/deps/checksums/p7zip
+++ b/deps/checksums/p7zip
@@ -1,34 +1,34 @@
-p7zip.v17.4.0+0.aarch64-apple-darwin.tar.gz/md5/af8134ed9c24b99d69e4edb4d5226ca5
-p7zip.v17.4.0+0.aarch64-apple-darwin.tar.gz/sha512/b8bb6aee60a54cca37568af8b2d9baedd892ba0d4918b93bcb29d74189524af7115901f4fabafb1ca58ed17e97c59846fcdfbd460abc81059806802b0a7be840
-p7zip.v17.4.0+0.aarch64-linux-gnu.tar.gz/md5/20abac5ebb99f31742878013c02f96a3
-p7zip.v17.4.0+0.aarch64-linux-gnu.tar.gz/sha512/6d8ebf895b969b1f707d0c23a19db4cd0dee47957d076e6e389395e09404d55bfcb78bb14bb67bb35b93b6a0072f2b4f097d839503d1ccab62b4ce28939dc71d
-p7zip.v17.4.0+0.aarch64-linux-musl.tar.gz/md5/185c979c7419b7ded3832c0f5cfd3b77
-p7zip.v17.4.0+0.aarch64-linux-musl.tar.gz/sha512/722e880c9f111738cb4cde84bf62c36892dbefdba625ae2b9e0fae76a7b1eabfa481a9838fbf9667223f19f62b6f09fcfd42b50c2bff7a65af0fae3616250fc7
-p7zip.v17.4.0+0.armv6l-linux-gnueabihf.tar.gz/md5/dceb37181763f86bf12f8ca473cf3403
-p7zip.v17.4.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/51e409bbcd3c54838cb3219b2476c8b45c8340e0a2fd26cced0d8484ae7f51711723e06e9023fce9ae9a1b51b5fb94aba536428ce2a5c5902b38498a0b3c2b50
-p7zip.v17.4.0+0.armv6l-linux-musleabihf.tar.gz/md5/193ecd888787ea03a500d102a7e33afa
-p7zip.v17.4.0+0.armv6l-linux-musleabihf.tar.gz/sha512/d525aad33f5ed27dc993f31c6db2996b830716bfac9bc7c49cb462ea3f0b412d0d3267765b9952c85e9c9be31d36d095d55ba89c0fa2c92823d9490372389c95
-p7zip.v17.4.0+0.armv7l-linux-gnueabihf.tar.gz/md5/096f11a7f1af5ff730bb8cfef22e335e
-p7zip.v17.4.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/1866ffd0169e0795594aaa70f1af8102ebbd79b3cafaadfb9c6a537dac0cdbb6eb7c31ad5165a975508c1b850744f94b60d9c530d658cdcc5536a474203cff21
-p7zip.v17.4.0+0.armv7l-linux-musleabihf.tar.gz/md5/fef1576982f45d1922582f6f7a7d6665
-p7zip.v17.4.0+0.armv7l-linux-musleabihf.tar.gz/sha512/71061585b32fa1a8e0a403a60c07e9f90586291a9799d7e2d6f7e6ec9f7b0ebf4b45ed080efd87cad82c45f71ec9a14cbcf9134a73bad4f5e3329f23bc6df01a
-p7zip.v17.4.0+0.i686-linux-gnu.tar.gz/md5/8818389b3bf00f10c6a39fe0c4a331b4
-p7zip.v17.4.0+0.i686-linux-gnu.tar.gz/sha512/bec2051a258f7e8a762b7cd4324e7b8f00fe5d99d48f05fb3557c41604e8b08af9ab66ab830f4a48086656be41aaf011b2aae0fb530e0ffefec38689f85a3bb5
-p7zip.v17.4.0+0.i686-linux-musl.tar.gz/md5/4ed9c16a65ed1d656aa214013e46eb28
-p7zip.v17.4.0+0.i686-linux-musl.tar.gz/sha512/7a5b3e15d0038bea0de7fc28ce058d7f93b8e04f271e30953a6b52d2b5d71f59d10177033e888a50cf8dfeb4f44bcf3271c9b9d1b28d0122ab2b239decdad446
-p7zip.v17.4.0+0.i686-w64-mingw32.tar.gz/md5/d06cff2ec0b7c8415700587f931ce1ac
-p7zip.v17.4.0+0.i686-w64-mingw32.tar.gz/sha512/ed72440f5306a57465a70b00bff33185a83c3e223844a79aa0b0d1fbe30dbd35da75e6188725aa621f5c4574a09527daf1e4893c7c6979ab91b2c09b4979dbcb
-p7zip.v17.4.0+0.powerpc64le-linux-gnu.tar.gz/md5/949ca7d111e497b82c9c762e5ac63a6b
-p7zip.v17.4.0+0.powerpc64le-linux-gnu.tar.gz/sha512/4842e0d44bf6380100723209596f526181fefe8a81d59c28658d03ea16600e71d010d5c7898b4c943efdd9caaa2301c3fdb0dccb343d631d1734acda1c559f65
-p7zip.v17.4.0+0.x86_64-apple-darwin.tar.gz/md5/2322c7a08f62592ca394a716949008bc
-p7zip.v17.4.0+0.x86_64-apple-darwin.tar.gz/sha512/9549f3e1052730ce13414636b32f0d1a9a1ac944a2b622380eac0da144b11fd65d437afe877ba6797d651da9c4ec77f0ebd3e515146caceaa2524829419eda48
-p7zip.v17.4.0+0.x86_64-linux-gnu.tar.gz/md5/a21b12946a62ef3688d5fc965974e8f7
-p7zip.v17.4.0+0.x86_64-linux-gnu.tar.gz/sha512/d32faeac23acf8a023f65350ba1d62bb3d9f904e32570ae03b8fb0a5375758784dd95be8caeecd007cbde40e103854a077e2c817f62afa72491f3b8966deb738
-p7zip.v17.4.0+0.x86_64-linux-musl.tar.gz/md5/c448e872d4ad66beb2d46d9134952f2f
-p7zip.v17.4.0+0.x86_64-linux-musl.tar.gz/sha512/92588f4817e145ef655c718dec049e7f43dd93644f43f19cd320643fac5f5b2312837c7a6c3e782e97fd08747311c58ed4657484f8bc778942fc5206ff8ea4e5
-p7zip.v17.4.0+0.x86_64-unknown-freebsd.tar.gz/md5/2cca6259a2eb1b0fea777d566267bf05
-p7zip.v17.4.0+0.x86_64-unknown-freebsd.tar.gz/sha512/92f90e2be4a8b8fcd80a4ceacac8bbab750913526b85f9279f8ee9ed91b77248b5de2d35d0c6241d0ad51fda185f4cb1ead1dcc9d23e2bef35e0b61efe3c3170
-p7zip.v17.4.0+0.x86_64-w64-mingw32.tar.gz/md5/5d272c78d7ffb40da0f333463f3cc098
-p7zip.v17.4.0+0.x86_64-w64-mingw32.tar.gz/sha512/2d999c6df4786cec1bba396b3a651a63740f4b799e9fc11754afd24438076e898daae74b4d3c7072450428e89881991e8884711cd4c349879a00c7aeeb4e1d3e
 p7zip-17.04.tar.gz/md5/00acfd6be87848231722d2d53f89e4a5
 p7zip-17.04.tar.gz/sha512/ad176db5b657b1c39584f6792c47978d94f2f1ccb1cf5bdb0f52ab31a7356b3822f4a922152c4253f4aa7e79166ba052b6592530b7a38f548cd555fe9c008be3
+p7zip.v17.4.0+2.aarch64-apple-darwin.tar.gz/md5/b418adbae6512a13e04407c120ba78e0
+p7zip.v17.4.0+2.aarch64-apple-darwin.tar.gz/sha512/818f7afb0d3ffbff8079f5f4b8e9745a847148ac9cb5a261b6ca2f2f3a1dd722fa93f798645129bc9bc4a48f756bf2e55605791abb394a32635dfaef31f21e70
+p7zip.v17.4.0+2.aarch64-linux-gnu.tar.gz/md5/3f976d6514e6327a9aee4a3f21a25a64
+p7zip.v17.4.0+2.aarch64-linux-gnu.tar.gz/sha512/a4dd8be97c53b864e81aae40e248759f97249fbd6d8c5b91f0ac115a84126cbfc4825ffa3876f5e8b66652b014a78ba04e3ffc1ba1d9c96786b914b1279682c0
+p7zip.v17.4.0+2.aarch64-linux-musl.tar.gz/md5/b31699d7ea671c689fa9194913fbe7ee
+p7zip.v17.4.0+2.aarch64-linux-musl.tar.gz/sha512/5c8d95df66055ab8027b047b23534743ac929befd37dc8a8e591deece22006209f94524f7951de580a5ded9530ead2ce7ec3370c482865554830b53d09f41bf1
+p7zip.v17.4.0+2.armv6l-linux-gnueabihf.tar.gz/md5/fbe2ebeeaa6e5b33dcb71662fb7040f1
+p7zip.v17.4.0+2.armv6l-linux-gnueabihf.tar.gz/sha512/6ca1d7eb1d3f6a7c4dc9860ac3d5a835abce92cddcda015a93086ecde44ed1b3d9f83a3c1e1eddc510af429ec269716dde6bc5fae4aa6bbbc3dcfc9a51326786
+p7zip.v17.4.0+2.armv6l-linux-musleabihf.tar.gz/md5/dae4b1e6060bf4431d3ead53e6b3e167
+p7zip.v17.4.0+2.armv6l-linux-musleabihf.tar.gz/sha512/856c2283c63728d8c542ce5a3d58e38c985f190774c407fc421dd30f05e0ae3467e2844cb7d535aa8a6b8fb24b21b29af75b736fbd9af67c24340609ad6b5841
+p7zip.v17.4.0+2.armv7l-linux-gnueabihf.tar.gz/md5/42b6b9b19158303c4399d651ee5b14cf
+p7zip.v17.4.0+2.armv7l-linux-gnueabihf.tar.gz/sha512/4f8792639db8599af026f592496a8c594c0fd6a62dc949965add55b1b85a95d4edc2f99960cf5b21e7beeb8e1bca1d9c1a1a34600103df04dc20d0509410c486
+p7zip.v17.4.0+2.armv7l-linux-musleabihf.tar.gz/md5/e34f5585a50c0cfce29c79e9ece60cf4
+p7zip.v17.4.0+2.armv7l-linux-musleabihf.tar.gz/sha512/87f72568f5a877008d3a8a032a85f69c29d3af4293d7b42d70419bb4c9ca7e99dc13c6b22621ca83886d07f765118451ee9f2a3aee63979d8070910887bf7cdd
+p7zip.v17.4.0+2.i686-linux-gnu.tar.gz/md5/d917247133b1c62663334b6a908e30e9
+p7zip.v17.4.0+2.i686-linux-gnu.tar.gz/sha512/4839bec129b7fbd68c61d35fd3b3af9863c757d9fec0220926d45f1f58174d88d0bbb4a472d259d1d77775b906e9c58ba707fc20e2a4a060ca9030722609182d
+p7zip.v17.4.0+2.i686-linux-musl.tar.gz/md5/951614fc7597de8c12e0109cbd81bfa9
+p7zip.v17.4.0+2.i686-linux-musl.tar.gz/sha512/f0420ddd6df82d2b3e1ece9cc5cf537cb0803d291d274a495bb9a575bb253a4241cdae38a88e43ddafaab7f6911b310a30c1b874b0a0a9bc447f8c42c5a24652
+p7zip.v17.4.0+2.i686-w64-mingw32.tar.gz/md5/cc81daf0e40990c48db178cb53a95d08
+p7zip.v17.4.0+2.i686-w64-mingw32.tar.gz/sha512/ae5bcbcf32dad20db95319c3c2f874fdbb0cd41054d6c192f2ab106e0aece1b4b0b591055b37c2c909b07b303204a75dec5c4b3c224243c2041da811f99cd7e5
+p7zip.v17.4.0+2.powerpc64le-linux-gnu.tar.gz/md5/e97d74ac4dacfaa215c3119e055a2df0
+p7zip.v17.4.0+2.powerpc64le-linux-gnu.tar.gz/sha512/8b0596ebd84fa9947e8f15f63c426339980e08c81eb4c1474b4a66af6329f0a2fe1bd31eef964d147bf9cf0213e85bdc143fab1a4f1dbfa09da5ebd9e73a3d8d
+p7zip.v17.4.0+2.x86_64-apple-darwin.tar.gz/md5/4d9a26dbfc0a02a812c8f7de20ea5440
+p7zip.v17.4.0+2.x86_64-apple-darwin.tar.gz/sha512/3cba51ba9742b616afec13a14e8e3bd3c73c835256af8f6a49d4abf32f5ddf3f86ac8ae08ffd9bc331caa8a711dd1b63f4cd082443a7863e3d512f6ca2152bcd
+p7zip.v17.4.0+2.x86_64-linux-gnu.tar.gz/md5/37b7570712ecb8677059f4280a346201
+p7zip.v17.4.0+2.x86_64-linux-gnu.tar.gz/sha512/9445add6a475bdfc2924dc52c07917c2746b07a41a2dbfdab8ad4b4e5b87b0192c13f4da5da64e5d3544bbf9c79fda3c633664eecb372e8475031789770c41ee
+p7zip.v17.4.0+2.x86_64-linux-musl.tar.gz/md5/04d6ae950d05c81c6b165721de2ba7e7
+p7zip.v17.4.0+2.x86_64-linux-musl.tar.gz/sha512/524d8ed80a1af903b572d5e32710b384702175cacc83ce2305d7f7a35d45aae7d08e2afc14a9e40c934ba4eb578787afa9bece4f820e96e4b624869cb2bcec26
+p7zip.v17.4.0+2.x86_64-unknown-freebsd.tar.gz/md5/e2a3361e91258e39db541c9dec5a73fe
+p7zip.v17.4.0+2.x86_64-unknown-freebsd.tar.gz/sha512/ecc1db9a1823ebdac290548f6e001688b5d111caede4cbfab4e2ef492dbb31844690e9b69360ed9c6ebb2affded7f352d57c0e5cfe67be951876d1fc5e87d92d
+p7zip.v17.4.0+2.x86_64-w64-mingw32.tar.gz/md5/2b5f77bb31526c469e0fd48399d0cf9a
+p7zip.v17.4.0+2.x86_64-w64-mingw32.tar.gz/sha512/a3a17af4db98b82b71c8d4d09e5315dc4fa77b38cc19f0593654b63744bc7489383d40032e48c2141d6b55e330d1538c527819378a2575a245de436bc6daf532
diff --git a/deps/checksums/pcre b/deps/checksums/pcre
index 202265ee58060..9b9717b61688b 100644
--- a/deps/checksums/pcre
+++ b/deps/checksums/pcre
@@ -1,34 +1,34 @@
-PCRE2.v10.40.0+0.aarch64-apple-darwin.tar.gz/md5/3d6b01c094c9e1adad2c1d42a3e7c3a6
-PCRE2.v10.40.0+0.aarch64-apple-darwin.tar.gz/sha512/374f9f35ae7925a6db6249850822d90c56c11b1b49971b76f016203e85bcc14ea6ab7e017b0ad5ce56c47b0715b2a396099749656e7d7291008a2dc8cb393792
-PCRE2.v10.40.0+0.aarch64-linux-gnu.tar.gz/md5/0f4c7daae3c08e5438b0af3299cbb003
-PCRE2.v10.40.0+0.aarch64-linux-gnu.tar.gz/sha512/ee9c6275019ef09a2fd7c6a649ebe184b58dae4e65a9b38159bac596e0427819e086084ca56be0f2f2ad0eb98a50a2511999cb46d5e9d1f03d39b04ade5e270d
-PCRE2.v10.40.0+0.aarch64-linux-musl.tar.gz/md5/baf858fd38471dd933312079ebaf065d
-PCRE2.v10.40.0+0.aarch64-linux-musl.tar.gz/sha512/3b50f6380673d30d487a3b10e6c58b76ff47fbb5c774f59f15bcc0b92e7740e73ad04c62b86e8eab0c916d4c231449f5279eae37aa401fab1a46c6e11687e806
-PCRE2.v10.40.0+0.armv6l-linux-gnueabihf.tar.gz/md5/9c582d85fe43e205679d2ed8d1ee3df7
-PCRE2.v10.40.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/fb7df17fa39ac93c7af92f4afdcdd120b171682ce172561a65fae3c6e3b1c26c5715b1264007fd12713464cbff406fb19117adaf1d50bd239f0dc53e7842ca8e
-PCRE2.v10.40.0+0.armv6l-linux-musleabihf.tar.gz/md5/a9c6c90c69d3de7030bd5015092a1340
-PCRE2.v10.40.0+0.armv6l-linux-musleabihf.tar.gz/sha512/7030aaaac0d275e72f3a36fe5104d11eba9bd1909c3d7126c751c9409f619d25c7735c7d3354b48786aef1ca9f1be48a60e0bd04a04c6b098915e6c4b2935e5f
-PCRE2.v10.40.0+0.armv7l-linux-gnueabihf.tar.gz/md5/cc4add9c80f47ac3fb682aca3347aca3
-PCRE2.v10.40.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/4a21795524d3cf8112384d133b47e87738a8c1efa71606fb55f5fabe1cc4108b2921c2efb539506552a2b630398a6770d93c9c541d5123b7a84016aad7a112f0
-PCRE2.v10.40.0+0.armv7l-linux-musleabihf.tar.gz/md5/51c54233c6e536671f2c1af74e1773d5
-PCRE2.v10.40.0+0.armv7l-linux-musleabihf.tar.gz/sha512/3889cf1faacd16779c87ac00317fbc36e54f5a99733b838920add360196edbe388c12421380105a87041d3502e5f4bea74460dedc3d797aafde5cb0f960516d0
-PCRE2.v10.40.0+0.i686-linux-gnu.tar.gz/md5/368342965b12beed2c4c92e60f7dda8f
-PCRE2.v10.40.0+0.i686-linux-gnu.tar.gz/sha512/bdb3692412d0b1d07bf302fbd129755e4a53e6b39caf135df912da79088e5db29a788680b282292919c45560a795fab60d043feece63cae2296165a9909ecb57
-PCRE2.v10.40.0+0.i686-linux-musl.tar.gz/md5/79bf801c0d86614ebf95ef83016195e6
-PCRE2.v10.40.0+0.i686-linux-musl.tar.gz/sha512/d35d15ccc8b09a33088efb4bf631cbbb3ff332521f37fdaa5fc106e576a54cb57ad1243dc3db1ab17a8195fd1476889b8d548987437a195267fae7683769da38
-PCRE2.v10.40.0+0.i686-w64-mingw32.tar.gz/md5/930cbf007549542b027a1db72bab0e58
-PCRE2.v10.40.0+0.i686-w64-mingw32.tar.gz/sha512/e9bad56ca6e1871f2bf37c8b2b03ecbc77acd3f4b04c95dd6e63a4cb38487fc3349a97ca7f575c158fde8b948c363af3f7cffc4ad89af9df09e536119a1d743b
-PCRE2.v10.40.0+0.powerpc64le-linux-gnu.tar.gz/md5/cebf0e67b6ae67fa841e491bf8955ae0
-PCRE2.v10.40.0+0.powerpc64le-linux-gnu.tar.gz/sha512/e04087f3e3268d389c08068ac8ae45f017e742787f20235eb6e4d32257ae3a3e445c61dc80db5a2c73d3fea5721272ec517c8b3be428d8aca097e691a14eb659
-PCRE2.v10.40.0+0.x86_64-apple-darwin.tar.gz/md5/5ed58d794f55139baac9a1ee50da3647
-PCRE2.v10.40.0+0.x86_64-apple-darwin.tar.gz/sha512/e906c6953be8a894d4cfa1792843e85aef58cf3b87baf4bcba99d19c84bd7d67dfbde85f1ddad42cbd51d2b1fa36797ce2ad79d79b19a792ca886bf52632a919
-PCRE2.v10.40.0+0.x86_64-linux-gnu.tar.gz/md5/db3fd5e855ca47b90d9a1faf58c88279
-PCRE2.v10.40.0+0.x86_64-linux-gnu.tar.gz/sha512/9082201b6519a693cf0038cf667841a0a4e4158698e1b7455ed3e0db1a7796c7303cf105975ddf059a6dbf5865eaf99f33d4e42803364935da7fa9e9c3bcb5b5
-PCRE2.v10.40.0+0.x86_64-linux-musl.tar.gz/md5/ab3456b926864ab27d5a4ce8dd42d1e7
-PCRE2.v10.40.0+0.x86_64-linux-musl.tar.gz/sha512/4b9109d9fadde86b1d76c420cb3e8b884ccba6fa08fec4fb039c384af5f040cf52b3232fbf4921cf680f36e54683b28bdb77e3b2a8943acf974f446e99f93475
-PCRE2.v10.40.0+0.x86_64-unknown-freebsd.tar.gz/md5/ee7679ad09e13f3cf9a2089e761bd718
-PCRE2.v10.40.0+0.x86_64-unknown-freebsd.tar.gz/sha512/cce31108246bdc2947865339a7cdbb7f505baf3b1b94fa6f6d825416149d8bc888a0a55961873f041cb94bba623c27f5ecaef23dda284cc57b76b30987fb6f5b
-PCRE2.v10.40.0+0.x86_64-w64-mingw32.tar.gz/md5/8178c12311e6f74bc1155d6d49dfb612
-PCRE2.v10.40.0+0.x86_64-w64-mingw32.tar.gz/sha512/9d03dd7ee07fdce9af7e6995e533c59dc274417c0e39a27ccea397291b17d6865bf9c80bbc7c9aa8e908518ba33873b39b9cbfd36bc7137cb5b7432c5684e073
-pcre2-10.40.tar.bz2/md5/a5cc4e276129c177d4fffb40601019a4
-pcre2-10.40.tar.bz2/sha512/00e7b48a6554b9127cb6fe24c5cacf72783416a9754ec88f62f73c52f46ed72c86c1869e62c91a31b2ff2cbafbbedabca44b3f1eb7670bc92f49d8401c7374e8
+PCRE2.v10.42.0+1.aarch64-apple-darwin.tar.gz/md5/667a570d341396c3213749ee1e5b5fda
+PCRE2.v10.42.0+1.aarch64-apple-darwin.tar.gz/sha512/c1bb99e8928efded9b0ea3f294ceb41daea7254204ca30c0ff88686110ccd58138d8ea8b20b9a9d6d16a6d8d3f34e27e74e7b57d3c8fe6b051c9d8fa6f86431a
+PCRE2.v10.42.0+1.aarch64-linux-gnu.tar.gz/md5/1a758f275ff3306fbad7698df7b9b7be
+PCRE2.v10.42.0+1.aarch64-linux-gnu.tar.gz/sha512/d09508c0b255366d01f1b4d1ae6748a8e47f18c451498d30715f5f968784990949dab7540cd086396abd912f61b5f7c44c8c72a27efaba0a7fc08b71a167c057
+PCRE2.v10.42.0+1.aarch64-linux-musl.tar.gz/md5/e61147579fdc9b57a61b814bdf9c84bb
+PCRE2.v10.42.0+1.aarch64-linux-musl.tar.gz/sha512/eecaf4c1937fc04210b910ac65318524c02d690e8c4894c38e74eaba36d26c87a1fd9e1cc36f4307a11ff3552a79f081fa8f05085435eb34872dc2fdecce2d18
+PCRE2.v10.42.0+1.armv6l-linux-gnueabihf.tar.gz/md5/b4c484a3b87923c0e2e4d9cc5f140eb7
+PCRE2.v10.42.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/5931cf13d340971356a9b87f62c9efdb3656ba649e7b25f1722127a3fd70973d94c815a37b43cecab8eb0ed8d1ae02ef1a0c0a12051852c1b9242c3eaa01c496
+PCRE2.v10.42.0+1.armv6l-linux-musleabihf.tar.gz/md5/bc7b5bb1c5b0b99c121bad5a89299ca7
+PCRE2.v10.42.0+1.armv6l-linux-musleabihf.tar.gz/sha512/86b5ad4fa6f4b5bd1a76ad68ddff4b39916d0ed0acc03a3fee8eab5256aaed53abc0ff4ce9d9d9f8b9203c087211684da92fe6aa06ff5bc331ba1b3da2cba57e
+PCRE2.v10.42.0+1.armv7l-linux-gnueabihf.tar.gz/md5/3541eb26fa5a4d13e2c7d063dbd900d8
+PCRE2.v10.42.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/872181f931662edaf653351486c5e2a700e94cfa0966ca90eca893fdc75dd46eb40d9d45737c198aa4b9ad8ebab33fd78697ef35906985e4e1c9748ddf58d363
+PCRE2.v10.42.0+1.armv7l-linux-musleabihf.tar.gz/md5/fe059feb18fcc9312f1033362070fe34
+PCRE2.v10.42.0+1.armv7l-linux-musleabihf.tar.gz/sha512/5a96acf3908c964ccb4f296c449499388ed447d9a094c2760c979e02ef656fa710ede3926b9626e89fb5b0545c111e6eedff21e48416e923c17fc9ff129d0519
+PCRE2.v10.42.0+1.i686-linux-gnu.tar.gz/md5/d6c804ae6cc661d039ee3edd2f1dbcb6
+PCRE2.v10.42.0+1.i686-linux-gnu.tar.gz/sha512/256ca677b169854686ca34cf30af5a6709758b41b65f2c66d497c552858770a69a49834fd16daa2f7d481964b21a2e3ec68ff1b1fbd08f4e2257ec46b85c0063
+PCRE2.v10.42.0+1.i686-linux-musl.tar.gz/md5/092af10d8182cb4240cdd975efce4d7c
+PCRE2.v10.42.0+1.i686-linux-musl.tar.gz/sha512/79a48f4fd50ffdf49c8d57581e01ace38c1b3d7edd86d44db44b8efd93074d16faf035131a0d60c6631b8bf22f0fd8296acedba45908da56e8096c296122f047
+PCRE2.v10.42.0+1.i686-w64-mingw32.tar.gz/md5/bafc5fc1621d0f4fb2b7b271e2f66db1
+PCRE2.v10.42.0+1.i686-w64-mingw32.tar.gz/sha512/a5ac2b788fb2e4baf129fb339f28da04e9a5c9b7dbba0a1f43da2a7193917d361d961ba48abd0aeec30d2352ebaa401d667c8eec81c5f40859ef8adf8487edca
+PCRE2.v10.42.0+1.powerpc64le-linux-gnu.tar.gz/md5/0de1215b2a1e9c0efd131355e9fbf2c1
+PCRE2.v10.42.0+1.powerpc64le-linux-gnu.tar.gz/sha512/69dae12627685ae665db8c91264a79aba7c60ae97eccdc79ef889f2a5f69b465fa333aba298fc90bbb95710cfc324e3630bc427a97577855e8fb6c8fe227cfec
+PCRE2.v10.42.0+1.x86_64-apple-darwin.tar.gz/md5/c5c52b399921c5ab81a5f598b350d2ca
+PCRE2.v10.42.0+1.x86_64-apple-darwin.tar.gz/sha512/e6c8ba3aa3fbf54b37079301ab317104c6852812b23835f52ca40f31f0831678172d32e077fbaa712a8a2cb16d62bb97d475827004353e7807922a2d6e049b28
+PCRE2.v10.42.0+1.x86_64-linux-gnu.tar.gz/md5/b074dd1f85e24e723349e566350e2c78
+PCRE2.v10.42.0+1.x86_64-linux-gnu.tar.gz/sha512/236017e02c9f32b913b772dbf22897c8460e5791f196c86f8a073e329ad8925f6859afe48f3bf18ca057c265f08fedbde255360d8f859e2303c6569ab1b0e1bb
+PCRE2.v10.42.0+1.x86_64-linux-musl.tar.gz/md5/9f32ca77e79843fc9c4b5fc8ed336d11
+PCRE2.v10.42.0+1.x86_64-linux-musl.tar.gz/sha512/334a31724e9d69c6517568d922717ce76d85cf87dbc863b7262b25ab43c79734b457833cd42674eb6a004864e5c74da3ae1d0a45794b4cd459eea24d9669fac5
+PCRE2.v10.42.0+1.x86_64-unknown-freebsd.tar.gz/md5/76cde3c509ed39ca67a18fe58e728821
+PCRE2.v10.42.0+1.x86_64-unknown-freebsd.tar.gz/sha512/219c82067a242554c523be5be2b5561cd955609eac1addc336004df64a2a12e815ea40ff94d3f610970f7d0215b410f098d4baaa2c722f5cf21dab175b288b7e
+PCRE2.v10.42.0+1.x86_64-w64-mingw32.tar.gz/md5/b0771d5b0132b554776e7cee0e1374e6
+PCRE2.v10.42.0+1.x86_64-w64-mingw32.tar.gz/sha512/d4435ff703e51c88df7764a732d6b67b1ee4d3b09b915ac822af05a33347642691837818d4c389226ef1d70cd69dbac792ebe1e7de1d8900443fe162051916ae
+pcre2-10.42.tar.bz2/md5/a8e9ab2935d428a4807461f183034abe
+pcre2-10.42.tar.bz2/sha512/72fbde87fecec3aa4b47225dd919ea1d55e97f2cbcf02aba26e5a0d3b1ffb58c25a80a9ef069eb99f9cf4e41ba9604ad06a7ec159870e1e875d86820e12256d3
diff --git a/deps/checksums/suitesparse b/deps/checksums/suitesparse
index a21755c79c895..7578826fe3f0e 100644
--- a/deps/checksums/suitesparse
+++ b/deps/checksums/suitesparse
@@ -1,36 +1,34 @@
-SuiteSparse-5.10.1.tar.gz/md5/68bb912f3cf3d2b01f30ebafef690302
-SuiteSparse-5.10.1.tar.gz/sha512/8f85c6d63b76cba95707dfa732c51200df7794cb4c2599dbd92100475747b8d02b05089a47096e85c60b89bc852a8e768e0670f24902a82d29494a80ccf2bb5f
-SuiteSparse-ed89e0fe3d8908cede058f42f872ba60159af0a6.tar.gz/md5/3019404c83511b5aab962559c2924072
-SuiteSparse-ed89e0fe3d8908cede058f42f872ba60159af0a6.tar.gz/sha512/06fa991da05376ee7e55a30f6fa29ab60ed2cec79818e217290e0e256233ee321fb25a764cbe834c3e94755b02d5326c93c8f1b686c53da28023778787e6d57f
-SuiteSparse.v5.10.1+0.aarch64-apple-darwin.tar.gz/md5/b9392f8e71c0c40d37489e7b2071c5ad
-SuiteSparse.v5.10.1+0.aarch64-apple-darwin.tar.gz/sha512/109d67cb009e3b2931b94d63cbdaaee29d60dc190b731ebe3737181cd48d913b8a1333043c67be8179c73e4d3ae32ed1361ab4e34312c0f42e4b29f8a7afda3e
-SuiteSparse.v5.10.1+0.aarch64-linux-gnu.tar.gz/md5/1b2651ede4a74cd57f65505a65093314
-SuiteSparse.v5.10.1+0.aarch64-linux-gnu.tar.gz/sha512/753f986a749d139f9a6baedac059d8ed8efdd716ed28eacdbf00e6ebe863b4e17467f01a9693dcb39571d38b4b5c4c1375dbb790b88a7e704116e3fe83f7ff3e
-SuiteSparse.v5.10.1+0.aarch64-linux-musl.tar.gz/md5/051ff9bbbc95c57d58563df8a2c8eedd
-SuiteSparse.v5.10.1+0.aarch64-linux-musl.tar.gz/sha512/855979ed8d6290c529d9c9e82944fb15c88f9d9d8da7db1fa2fc34efb0ed985fc6554312882107f26956f2a18ae985918909cd834e068b874906c21a0f53b6c9
-SuiteSparse.v5.10.1+0.armv6l-linux-gnueabihf.tar.gz/md5/dbc5fb4844077084663612af26e180ce
-SuiteSparse.v5.10.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/b906f7275ab58006acd52927e7e04c79eec59b5f28e9d7e5d5b8556c0eedd54cfff87e494373702c205afa2384ee6b0f2bb5e811fd440b1b50d5c9eee1b47b99
-SuiteSparse.v5.10.1+0.armv6l-linux-musleabihf.tar.gz/md5/7770d256e76d5ce1484c3781508cc3ed
-SuiteSparse.v5.10.1+0.armv6l-linux-musleabihf.tar.gz/sha512/4f1d46cc8da5a7eff665b4bb96f9e21319f39231f98a6164d8c3d654d5b6f93c3e4477f55a39a80b7f8125a78d690cc5a1cc58f29143ba4c109a4182d7fa2110
-SuiteSparse.v5.10.1+0.armv7l-linux-gnueabihf.tar.gz/md5/ee1fa978bcfb264842749f915bbefd77
-SuiteSparse.v5.10.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/9592a42f6474fd89eea1144f62ecc2a23796ad251173a9c36ccbc9bc18dd88687ce49f51528974f56b5652e2ab15f0aa41634513f2cc0b3c54259de3b68350bd
-SuiteSparse.v5.10.1+0.armv7l-linux-musleabihf.tar.gz/md5/30f708421b92158c7741c82576e9047b
-SuiteSparse.v5.10.1+0.armv7l-linux-musleabihf.tar.gz/sha512/d8793d48757dbb62aa7a21c215b6d6e63a26ce4ba740f1f7f42a3e485ad3d9628744f021ad9cc96e29c8c88bfb2f02ea92865c26b971ca739d3c05c7f28875d9
-SuiteSparse.v5.10.1+0.i686-linux-gnu.tar.gz/md5/9018b6168b9a687bab0c9a9cbf45afba
-SuiteSparse.v5.10.1+0.i686-linux-gnu.tar.gz/sha512/308a92f441af6855517c40c6871b4935251677c05cc082c21fd1249e0137b635fa524f60cad61c7524026301a6de7ffea0ad1f4b9a4d9d6e3ced3f332a6719d4
-SuiteSparse.v5.10.1+0.i686-linux-musl.tar.gz/md5/99143f8d6de4f071ffa19942252b6dec
-SuiteSparse.v5.10.1+0.i686-linux-musl.tar.gz/sha512/9fb719fffea03296dfac8bc221bafc3ed8f7791749eca6c4b00265994de1be5d242e7e5184693603c745b39c4538feb11ab283204e0e33df2745f904cf0c7252
-SuiteSparse.v5.10.1+0.i686-w64-mingw32.tar.gz/md5/d049c943fbda2c8380dea33e16569275
-SuiteSparse.v5.10.1+0.i686-w64-mingw32.tar.gz/sha512/174768464432b991ecff88d5e5126caca83672fb5173115de59bc2387ef8aa75a56d3e84957fce625fabaf50ba462549f2ea828aea7258be7513835b7fea2e31
-SuiteSparse.v5.10.1+0.powerpc64le-linux-gnu.tar.gz/md5/f01f7e134f8ee77849f3a46e773c1ff2
-SuiteSparse.v5.10.1+0.powerpc64le-linux-gnu.tar.gz/sha512/dc0339f2b35f05d49fbd1dcf1822c774a07af122fabc8e00eb7435dc53fcf82b3c1ec24e2bb41b1a58d3f8ab8903830eb7ece19dc6fce3f5e73d90a3dc3c4194
-SuiteSparse.v5.10.1+0.x86_64-apple-darwin.tar.gz/md5/02975a8670660c5e79eab0a70b051a0b
-SuiteSparse.v5.10.1+0.x86_64-apple-darwin.tar.gz/sha512/e55685ed7a63318c5baa326795503f13f031e0a617c045c972d5c89252ab51e7325e2b0425ca10dfbd59e79c5b4200545f5a4944fddd376e7610b6ebf74ded14
-SuiteSparse.v5.10.1+0.x86_64-linux-gnu.tar.gz/md5/6c111d315fb25c529710722bd5ae6af0
-SuiteSparse.v5.10.1+0.x86_64-linux-gnu.tar.gz/sha512/c971aed91bd695a0f7f735f58ddcb075d32b9522a8a50a30ad383ba5ce2c8e572fec97644e6cb85745206f4e5da72d7865d9a9724eb63ce3c04e90a4eedc90c9
-SuiteSparse.v5.10.1+0.x86_64-linux-musl.tar.gz/md5/7c98daf0edfad31764c3078e6351b521
-SuiteSparse.v5.10.1+0.x86_64-linux-musl.tar.gz/sha512/2c4b3cae1bd8d1ce62dae6aeca3ffbf90c26a1b01c0da4fb7761d6fe4293b8fad0b6fbfd5f930cefe6ccaef7546a482022ff2f50dc59ecf17c5c0dfc6a5961f5
-SuiteSparse.v5.10.1+0.x86_64-unknown-freebsd.tar.gz/md5/aeca88a7bc3f9d239c61084996ce9182
-SuiteSparse.v5.10.1+0.x86_64-unknown-freebsd.tar.gz/sha512/0bee1ee07c3883fe28dd322c40195be9adb757d6dab3eb1730d7b0ff65dd4517520047696ccdda4ca618e671d898cdb45b787094594e142cb4b176549a74200b
-SuiteSparse.v5.10.1+0.x86_64-w64-mingw32.tar.gz/md5/63e449554eee134757e3d50ca8b5f47d
-SuiteSparse.v5.10.1+0.x86_64-w64-mingw32.tar.gz/sha512/95b58df4fe7520e2b526f9e3b199253909992789cd24ecca814ddb9a0c0bb37ff93c1de40239e5295a8503613cdb2431a87f0a70a3d657d94d4661f1778797f2
+SuiteSparse-7.2.1.tar.gz/md5/c341b4b2943b6d99ec147dc36ae64d51
+SuiteSparse-7.2.1.tar.gz/sha512/6385b699d2f109e8473bb58e95705671b8a5c2f1b281d17bba9f396a94b2e783700c4c64f4ab9495a4a64e23ba279052616054045783b4b8c8eb28a8f4f6be28
+SuiteSparse.v7.2.1+1.aarch64-apple-darwin.tar.gz/md5/1bd9c850b5bb6de56f4dfd0633ce7a6c
+SuiteSparse.v7.2.1+1.aarch64-apple-darwin.tar.gz/sha512/f0e932fa2b6d2843fd75c1e151b8304ed2521b679c732301877495d9a2437ec693ba0ebaaf52cb3a4f5c01bcd8c972a27b1080071c9c77462901fa4dec7de787
+SuiteSparse.v7.2.1+1.aarch64-linux-gnu.tar.gz/md5/ff52a5ef6546bbea2ce2d73db2821522
+SuiteSparse.v7.2.1+1.aarch64-linux-gnu.tar.gz/sha512/f5c2a54e40b36fc0489140397e6324bbd1050a87949fd9be3837012825c3becbef66258d28c286d0c45b0447361b2ddf736370402ed928b909e0fb7c5f4ee69c
+SuiteSparse.v7.2.1+1.aarch64-linux-musl.tar.gz/md5/2baa4103f4070f66d6278fc001317372
+SuiteSparse.v7.2.1+1.aarch64-linux-musl.tar.gz/sha512/17bc9b020850d9cc652d49987c3faa57204ed3beecd04ea812fd03b4f60f541ba7b250fa70c801a8ec3c440f3562a4771a3742299f0d4eb770e58010c43a3823
+SuiteSparse.v7.2.1+1.armv6l-linux-gnueabihf.tar.gz/md5/cdc1c60e50f6551a52e57ac71440564a
+SuiteSparse.v7.2.1+1.armv6l-linux-gnueabihf.tar.gz/sha512/9209f86ac97c061755169412565055847be4890140a389a92468297507cee240219d910bbcef94c52926771a4152762cfa05cfa33c26d31351d68265e5719bd3
+SuiteSparse.v7.2.1+1.armv6l-linux-musleabihf.tar.gz/md5/cd5e177e660d793426e4c4aeb2f9269c
+SuiteSparse.v7.2.1+1.armv6l-linux-musleabihf.tar.gz/sha512/a8a5ca739999a16336b2c98ec88873e00349720b5d966d643d5665338b1f9c8077352d87fac41a165cb65793ae5fb686e954b3eaa3f751aa8d002388a0ce6a13
+SuiteSparse.v7.2.1+1.armv7l-linux-gnueabihf.tar.gz/md5/eb43136009b370e93c6ab4c1b0eec40c
+SuiteSparse.v7.2.1+1.armv7l-linux-gnueabihf.tar.gz/sha512/e20a308911a36037c9b6da3c060f8624b1ff84b0e23cbd62189f62e989f6a5307b07a6286d95459e0886a8d16fd59ad5a34607dd2c644f7bedc786dd6567670c
+SuiteSparse.v7.2.1+1.armv7l-linux-musleabihf.tar.gz/md5/19f9246fc6c8bd2c7a4d2df498725abe
+SuiteSparse.v7.2.1+1.armv7l-linux-musleabihf.tar.gz/sha512/d2cba310fe33ddb11d9ada37ce04905dfc3f058a1cbf7b53ca1dc31c2c51bcf930a4976c39d55bfdacb19195ff772acb1c6876238640a6ed6277777934a8b53f
+SuiteSparse.v7.2.1+1.i686-linux-gnu.tar.gz/md5/8ff91e530528c8761411b8d9be56d1f0
+SuiteSparse.v7.2.1+1.i686-linux-gnu.tar.gz/sha512/42bd937fb1c476164b923b5093d3df3fc3cdd4e3bc148616ba48027d4616479d674a4c8f7291cf7004a43834508b459630f4cafbd90850d10402d53faa34e714
+SuiteSparse.v7.2.1+1.i686-linux-musl.tar.gz/md5/49bc8f22a227748680734d89f64a4cf7
+SuiteSparse.v7.2.1+1.i686-linux-musl.tar.gz/sha512/b78b84d330a8f22e7d9fdd72fe621e9830c1afd908946d4101705f05546aa892b2f5ef87988dec39ccd81cbe4dbeb95adc277d096d60e106485c5b6f81cf4403
+SuiteSparse.v7.2.1+1.i686-w64-mingw32.tar.gz/md5/cd593a3c801ba72bf6d77788c7ca06b9
+SuiteSparse.v7.2.1+1.i686-w64-mingw32.tar.gz/sha512/0b49795ed5cb773a5930d305e65d53ff35ff1d1ee0a84e8762f56ca442c2421752b50b667646fd6a977c0684c2214996011f405ff1c7fd6eeaf16d08262d7d05
+SuiteSparse.v7.2.1+1.powerpc64le-linux-gnu.tar.gz/md5/3858c87b8f62844520ff61c72d7b5a25
+SuiteSparse.v7.2.1+1.powerpc64le-linux-gnu.tar.gz/sha512/ea505fe14155ee69a715339fe7075603c04458d5c7f65fecb92bea69a86117b1d21da75dab832ac0f6cc9aa64bfa6d7f50cb679fefa9ec5b4d4d8826d3137ff9
+SuiteSparse.v7.2.1+1.x86_64-apple-darwin.tar.gz/md5/241dec5338e04fbf6084cec90bbd2f76
+SuiteSparse.v7.2.1+1.x86_64-apple-darwin.tar.gz/sha512/8477d2102be709aa2f74325df91aab4f9c894c8f516cd17d3780aab66bcbf920fa5771fa7e130a63793f94b99c6cfc4db6ab22e6a33a55670e25e36472770d59
+SuiteSparse.v7.2.1+1.x86_64-linux-gnu.tar.gz/md5/c6b6fa99a21a9000892d51b821f304a7
+SuiteSparse.v7.2.1+1.x86_64-linux-gnu.tar.gz/sha512/ad2e1200d0418c531758672b64e849c81cfe74ca73cff0e1a47797e73dbc4675c9a2ec855af628dddef58b135412d06fa18c15565c94de5e1e6d15e3b150ecbd
+SuiteSparse.v7.2.1+1.x86_64-linux-musl.tar.gz/md5/6c14129471a9c92464d36ae00f4c5a08
+SuiteSparse.v7.2.1+1.x86_64-linux-musl.tar.gz/sha512/e9051ceb7d551019deb16480b493d1ac5b622fe86c7e19b1023eb12af28d42f25e911e1e44870c35849d8f95d78e8e28c38699cde1fab250dac32818ebc58a2b
+SuiteSparse.v7.2.1+1.x86_64-unknown-freebsd.tar.gz/md5/ab0f6a9b7789f21aba5ea10659b03ed3
+SuiteSparse.v7.2.1+1.x86_64-unknown-freebsd.tar.gz/sha512/cc9136bfda474914107e68f97a200d46f81a1f36ea51c4e482ef04e818d3ac10d14b2895eef59b2570f6115261e987bd076dd8f9be0e6d2dc77931d3257db142
+SuiteSparse.v7.2.1+1.x86_64-w64-mingw32.tar.gz/md5/e804d9ed593d739326865dc1f60d5800
+SuiteSparse.v7.2.1+1.x86_64-w64-mingw32.tar.gz/sha512/20c9ac62cd41b19e0b9605c8f9a8bece9089f7f69e2cf57ace3058215acefe8cf9ce39d3c05010223443bfc45b1efb8391be677a1b5e9a59bbdfe89f89553f71
diff --git a/deps/checksums/unwind b/deps/checksums/unwind
index 5751b9111c9e0..637605677359d 100644
--- a/deps/checksums/unwind
+++ b/deps/checksums/unwind
@@ -1,26 +1,26 @@
-LibUnwind.v1.5.0+2.aarch64-linux-gnu.tar.gz/md5/95e3a6b8e1842e21f8793c489dc07f9b
-LibUnwind.v1.5.0+2.aarch64-linux-gnu.tar.gz/sha512/faf51cc25065f1493b32fb7520a4ababe631b918336ca51091345f75cae967977b29185476d5c12fd5f9533531c1cbcb84ec17fe941a8d7bfa5aef9396842f87
-LibUnwind.v1.5.0+2.aarch64-linux-musl.tar.gz/md5/c084f8c50371b3ddb85f6290534e79ef
-LibUnwind.v1.5.0+2.aarch64-linux-musl.tar.gz/sha512/2df5fead3bd8ea43c136431e7b1340c2a0b605c04c575e1d22edab479fe8074d898f9bd3c791eb5d91c3e52d31bb7f841dd8f11be97a3eb6d8ac61d1d809622e
-LibUnwind.v1.5.0+2.armv6l-linux-gnueabihf.tar.gz/md5/6c11ca045cbaeb4fe75363aa116a784e
-LibUnwind.v1.5.0+2.armv6l-linux-gnueabihf.tar.gz/sha512/5c337a850f184a0c514da8abca42aa5728cfa384a9ef3d0f36d67b10e322affe95d4a05c76faca69ca66cdb4e8535f4e8ee0f650ef39a27f897083e66570d6fb
-LibUnwind.v1.5.0+2.armv6l-linux-musleabihf.tar.gz/md5/8e4b4569abccd11fb577346f6df42d4a
-LibUnwind.v1.5.0+2.armv6l-linux-musleabihf.tar.gz/sha512/025660fe2dbb9d5904b865d6a3716553f368c5589b0cf8fd5f93a87e2204e5c66309b046de0d853ce643992dfa6433fc6214417bd477c4f363fd946ad6c97179
-LibUnwind.v1.5.0+2.armv7l-linux-gnueabihf.tar.gz/md5/c57b86157a00931608786578795e398a
-LibUnwind.v1.5.0+2.armv7l-linux-gnueabihf.tar.gz/sha512/f7b720f0ab208212b66fac6783e98edfc80bca9b2b903bf665da1a464a0a615aaa998ea1bee9088c73124879ded53b58fe9c5086ec547a50bcdf14be93652da1
-LibUnwind.v1.5.0+2.armv7l-linux-musleabihf.tar.gz/md5/a88e3a13a02c9d491ced12c7ba416508
-LibUnwind.v1.5.0+2.armv7l-linux-musleabihf.tar.gz/sha512/ef705a74750680e81daec6ff790797f247a7dbdb99731ab4083bc9a56f3f79da68c2c15321f5f6466d2f71b228aae5f59f793a16a06cf93a57366a051b748376
-LibUnwind.v1.5.0+2.i686-linux-gnu.tar.gz/md5/657a43f2b2e323ed3f298baae60bcd52
-LibUnwind.v1.5.0+2.i686-linux-gnu.tar.gz/sha512/138646a791044ab3106452111b5801710fccd2a0356b566751fee93d8e636a7f2cc14679d5cf515f1bdebcac5722af746c2047775a7e191f7ddc068914d29383
-LibUnwind.v1.5.0+2.i686-linux-musl.tar.gz/md5/cfe5281bca9498083c1da5eb787c2bac
-LibUnwind.v1.5.0+2.i686-linux-musl.tar.gz/sha512/b786d9000d2435f3284072ae527d172e89224373c59683ba265d24946ac89ab714d2ced6eb37a0191bea85de556a5ea1420a089aa5ba4f01ed9397e945841bd9
-LibUnwind.v1.5.0+2.powerpc64le-linux-gnu.tar.gz/md5/c2f19ab443307b986d9545bfce7e3f83
-LibUnwind.v1.5.0+2.powerpc64le-linux-gnu.tar.gz/sha512/034493ac5822d481976e4ee2d53db066788fab7fb0053bd472c6ef1d078700882487aebc4f7bb1be5bff9719eb048a24d8a7318a34154e04f9a192eef5fa56b8
-LibUnwind.v1.5.0+2.x86_64-linux-gnu.tar.gz/md5/f6c7ca4303e43dd3a22314dbab294037
-LibUnwind.v1.5.0+2.x86_64-linux-gnu.tar.gz/sha512/0c7d7793b606cbd51d1be85bbc8c62bf2a60b4b25279d4267e535d5ba53b8cc667f5cc92e607439ee8354bda8c03637315f93bee23bb09b47d83b3b4543c690d
-LibUnwind.v1.5.0+2.x86_64-linux-musl.tar.gz/md5/4ba92194d0e323839d2207093f365be9
-LibUnwind.v1.5.0+2.x86_64-linux-musl.tar.gz/sha512/49110890d2e4e0050c52c5b2f94288c2afe1c75cd3b54345a49f095a9ea6804122c7d1b4dac831a169dabf510247107c299031b732a23d8d217ab0fd4e1d0682
-LibUnwind.v1.5.0+2.x86_64-unknown-freebsd.tar.gz/md5/e9b4a61538244b4dc05147f94b4d31d4
-LibUnwind.v1.5.0+2.x86_64-unknown-freebsd.tar.gz/sha512/bcae20fdd5ac3da362b94a6059b2c055de111507a8da7ae311fe176cb3873429eb7b30aaf83210699fb24fc8a309648a30514f34c43615e02268528b6b29cb27
-libunwind-1.5.0.tar.gz/md5/c6923dda0675f6a4ef21426164dc8b6a
-libunwind-1.5.0.tar.gz/sha512/1df20ca7a8cee2f2e61294fa9b677e88fec52e9d5a329f88d05c2671c69fa462f6c18808c97ca9ff664ef57292537a844f00b18d142b1938c9da701ca95a4bab
+LibUnwind.v1.7.2+2.aarch64-linux-gnu.tar.gz/md5/2c8bdfc891b817cdcebad5cec3fe03ba
+LibUnwind.v1.7.2+2.aarch64-linux-gnu.tar.gz/sha512/c06eb08411c9759b7d0043258e239620eebac36b573c58d91f5cd3c6b801a33fb42833c21e0badcd958085adb1eddcf3d37386143b7584fd2c0631efa225acce
+LibUnwind.v1.7.2+2.aarch64-linux-musl.tar.gz/md5/3233eec783f73a9723f5c9c06a4085e9
+LibUnwind.v1.7.2+2.aarch64-linux-musl.tar.gz/sha512/91b5f6eba193b6c2abaac30ad805003142dd48b8bbda36c9d50e7802a17abf04f95b5d21c3d842697653834ee2c21c222f0b96d8f142a99f3819a58493817474
+LibUnwind.v1.7.2+2.armv6l-linux-gnueabihf.tar.gz/md5/75dc5ea25f5055959d348523ed6f1bdb
+LibUnwind.v1.7.2+2.armv6l-linux-gnueabihf.tar.gz/sha512/315e3e05caec95a81fef570aee3328f4f8b617a9567249b7e535a3b4246f9a39c6bc45dc0f3c061e6a796b94d839c308f0bba083a06093df4a9932328e1c6dee
+LibUnwind.v1.7.2+2.armv6l-linux-musleabihf.tar.gz/md5/99b02885777f3954cb9569849304f6fc
+LibUnwind.v1.7.2+2.armv6l-linux-musleabihf.tar.gz/sha512/b73ab8472cf60fb9e88d391543e5349246d05b90fa6b0ea793fca0e1a67ce91cc8f12a09c9e4594add5264c0363b157b879b89ce881ae33f0f7e3315b56aec5e
+LibUnwind.v1.7.2+2.armv7l-linux-gnueabihf.tar.gz/md5/ce447b3e3a9e8f990f914faa6cb37db0
+LibUnwind.v1.7.2+2.armv7l-linux-gnueabihf.tar.gz/sha512/efb8aff3a23dbbc8b65414f7f39af4a664311120f2da61b7b70dd8c7da30230582d8256c2b5421e00b2d70062e767694e358dafbbae83a69f550a5d0ac6a62b9
+LibUnwind.v1.7.2+2.armv7l-linux-musleabihf.tar.gz/md5/535b71f32d85b18208b911a13a9a0672
+LibUnwind.v1.7.2+2.armv7l-linux-musleabihf.tar.gz/sha512/267b72cdb3fe1fc63031fe76fd3ffc3afc97775a541dc2357df4f849d585064c0492b82ef3c16efce0f0573778ee56ed7886e97c6591f7a8cda27f705b0b65e4
+LibUnwind.v1.7.2+2.i686-linux-gnu.tar.gz/md5/97b6867d8d20abef8b96f5fc17e932b9
+LibUnwind.v1.7.2+2.i686-linux-gnu.tar.gz/sha512/ff48995503447288945dab6f32f85801b81be98ec9f5bcc933305230befb51a9c0aeb588e722ebde3694cc9e73ee6b443ae5eedec352c5c166603dca8c23c559
+LibUnwind.v1.7.2+2.i686-linux-musl.tar.gz/md5/3864ac880d38adb276e353904bf2b18d
+LibUnwind.v1.7.2+2.i686-linux-musl.tar.gz/sha512/36198cf175d6db1dfbb43ed498ad25eab2392958f21fc8a83aafdb4f54aacf8c3a473d313b117a835c81e56b7a20ff2bf152dee8f4280cdf7ef0e751d736a362
+LibUnwind.v1.7.2+2.powerpc64le-linux-gnu.tar.gz/md5/b4e6b56f160be18abbaac5f36d161753
+LibUnwind.v1.7.2+2.powerpc64le-linux-gnu.tar.gz/sha512/354f92c8658b465545b2ae77826f0ae4580084ec3e7723900e064245bdb0c38bc8dc9dc04bc505e83879d8bb3d2291493c091e83929aba23d42c99dc622a2bf0
+LibUnwind.v1.7.2+2.x86_64-linux-gnu.tar.gz/md5/1d82791f522deadb3bc3232997d094be
+LibUnwind.v1.7.2+2.x86_64-linux-gnu.tar.gz/sha512/9603731e4f40c89b6a157c754c1e89ca7c38349e2099e072957123967e6d9c8bd7df1725afc9ffe1c9137ada6e364deb19b4b91fc19d099d7acf1bced3caf2e0
+LibUnwind.v1.7.2+2.x86_64-linux-musl.tar.gz/md5/278dd4a9d36169b3c7719ac83f3d3fed
+LibUnwind.v1.7.2+2.x86_64-linux-musl.tar.gz/sha512/68952cbc9ac3e379412bc8482379621809bdd1d77095a44f18b96ff5f58bc338e502305941db0cfae0b68d4ee3719d7db898b64ccd815f1b71110cbc49a4b11f
+LibUnwind.v1.7.2+2.x86_64-unknown-freebsd.tar.gz/md5/d043be8787b39d5e9a467bd8ff90be1d
+LibUnwind.v1.7.2+2.x86_64-unknown-freebsd.tar.gz/sha512/44fda1ffe4a3f4d442dcccb414efe6c0d6ab895f619d74f4aa8cdcef26d9388d7b6975cb43fc18b207df1f2e61faf3555d65db8597f85e3ab4b053be5ce72e66
+libunwind-1.7.2.tar.gz/md5/35799cd8e475d3e157230ad2590c10f1
+libunwind-1.7.2.tar.gz/sha512/903f7e26c7d4c22e6ef4fe8954ca0f153fdf346cec40e1e8f7ab966d251110f4deb0a84d1fd150aee194ed966b5c1e01ee27c821cd043859852da33a94faae1f
diff --git a/deps/checksums/utf8proc b/deps/checksums/utf8proc
index 6c2b22983ec3d..543c1805afbd8 100644
--- a/deps/checksums/utf8proc
+++ b/deps/checksums/utf8proc
@@ -1,2 +1,2 @@
-utf8proc-8ca6144c85c165987cb1c5d8395c7314e13d4cd7.tar.gz/md5/af7d2e685f46ff6317fc4ab276bfade7
-utf8proc-8ca6144c85c165987cb1c5d8395c7314e13d4cd7.tar.gz/sha512/0b1c839457755db6679057c99a7872e72e3f17d8535e1e173749e139050bcf10f2e9a9b9fadccabde644ffcc865cfb9396429fc31e5a5a383f95856a01ea98a2
+utf8proc-34db3f7954e9298e89f42641ac78e0450f80a70d.tar.gz/md5/e70e4fd2c914b4d4c0e3f0e2ca6c96d4
+utf8proc-34db3f7954e9298e89f42641ac78e0450f80a70d.tar.gz/sha512/0037f144e1150abd1b330d8a0c3a46c8352903acc9f4c8aad6bddd1370b19cc34551f8def58752cdff4eaace3efe54180bc11439a0e35c5ccad2fec4678c017e
diff --git a/deps/checksums/zlib b/deps/checksums/zlib
index d524a3f588a18..72fd884183e47 100644
--- a/deps/checksums/zlib
+++ b/deps/checksums/zlib
@@ -1,34 +1,34 @@
-Zlib.v1.2.12+3.aarch64-apple-darwin.tar.gz/md5/2258883a6412fbdac0b807afd133834f
-Zlib.v1.2.12+3.aarch64-apple-darwin.tar.gz/sha512/6e82b57646dfe2b86978d51cb4401d565d00d6bdcfabe09ceb888ad8979bd1398fd9ea7652542f149d88c120110f6c3baa919616f01410e9238a5199f50f5dda
-Zlib.v1.2.12+3.aarch64-linux-gnu.tar.gz/md5/663aa0d0791b92464e4822a130ac7fa9
-Zlib.v1.2.12+3.aarch64-linux-gnu.tar.gz/sha512/e50f00d92600a78b2f540e0e8e1dce435d0d0499ea80ce3c3cd0e11c8e3b5b1a97eadca9ac863f597cee369e80bcd50ec1c0a0e0f1a87bb0ff94bbaf453dea2d
-Zlib.v1.2.12+3.aarch64-linux-musl.tar.gz/md5/471179a2364d59abb6426b378ea4e195
-Zlib.v1.2.12+3.aarch64-linux-musl.tar.gz/sha512/35208e4be5966343ecb2b78471a3e1a947489f83c828b562db3508506dd0493eae3318c7eb3a6b599e911416795023193df862fbb6fcc7389d44710dc30f16a8
-Zlib.v1.2.12+3.armv6l-linux-gnueabihf.tar.gz/md5/53601c0201dadc8c9ff038167d5c4277
-Zlib.v1.2.12+3.armv6l-linux-gnueabihf.tar.gz/sha512/19744283bb412a656b934347cb7a1d121fbaf7e5f9b1aac373ddf2466567b731817a2e72e3a4d993ca7e5b5eb1fd9bb9c24d0126778367b28bdb94721649298b
-Zlib.v1.2.12+3.armv6l-linux-musleabihf.tar.gz/md5/f7c923955fc600785aae455807e63c8b
-Zlib.v1.2.12+3.armv6l-linux-musleabihf.tar.gz/sha512/623cd1758465c9e40b0dad93981ae93097a03f4aa67487b7e1c7240be2d780d86f35f8db96743c35bbb329d572741b58e73735a2b1cfb9e18e77f4dbcc714063
-Zlib.v1.2.12+3.armv7l-linux-gnueabihf.tar.gz/md5/5ce0fe42f67e09de047626424d61bc82
-Zlib.v1.2.12+3.armv7l-linux-gnueabihf.tar.gz/sha512/322e32d6fe6cd7a3334f5146f8980d4f1fc85b9a1c60271659ba8b4bbfdec314f8d9e8c6c0719248f5dd18e3daefd946811a3dcc74fa3ae5505d6dd653e65309
-Zlib.v1.2.12+3.armv7l-linux-musleabihf.tar.gz/md5/5115c374df90393cb895dd45c77275c4
-Zlib.v1.2.12+3.armv7l-linux-musleabihf.tar.gz/sha512/b04b4f42220833b99923a3ff349e4a05ad9f67c2b62d4848de37c833b287420b1dbec8a039c09d2a95ab6b68a62c6dcbacb4ba7cc069a4e90a11f8592719d2b8
-Zlib.v1.2.12+3.i686-linux-gnu.tar.gz/md5/37e0186f765fada0d76b9cd6f28c8d5d
-Zlib.v1.2.12+3.i686-linux-gnu.tar.gz/sha512/1239675bbf46c6243131585283b0fc23baa32e68226fbb2f0b7a833c8979e2df33590947daade533e37bafe21838a10198e9f9de99e094c21fba6b218b2fceab
-Zlib.v1.2.12+3.i686-linux-musl.tar.gz/md5/a0d92af6481929eed3a9fec3dbb2e622
-Zlib.v1.2.12+3.i686-linux-musl.tar.gz/sha512/b448590129ef251083b675c3d7494a90151a03297fd9883efb70bde032d106f16f2ec7c28508d9b4a0d0e5a0be0bdb4bcf0d1a9e4b2ade034a6d6cfc4916536e
-Zlib.v1.2.12+3.i686-w64-mingw32.tar.gz/md5/cc38d9ec5430e2ed7fed4792c7ac9551
-Zlib.v1.2.12+3.i686-w64-mingw32.tar.gz/sha512/85ad3babb42682d7b2b69513a30fd5e992a56436dcd7e2a44800bf1bc30d60d09aff5769cfaeefd4f5668e7973a0c2d4ad4d28559ea5f28c1c5419ed595eae57
-Zlib.v1.2.12+3.powerpc64le-linux-gnu.tar.gz/md5/8f57d8c31d2355c64a05db0412462d58
-Zlib.v1.2.12+3.powerpc64le-linux-gnu.tar.gz/sha512/9a0208c7a4dbf71b6f7e1ccaf05e3f3a422507cf0431b6482aab1a7b1bea41bd135320567f7dba6666f37c26f48cb3a627f1a1ebd39bf5c2d61148aadf62a986
-Zlib.v1.2.12+3.x86_64-apple-darwin.tar.gz/md5/5d15bb591d26d24aa9d6c9c8cf3df097
-Zlib.v1.2.12+3.x86_64-apple-darwin.tar.gz/sha512/7d8b0ec5a46a85cef3c5de451823c5cfa73b5b7c5ac98699065bbc5692af556195664908cd5c35184b7a9586fc0adab41fc0f76ee8599ca09a740cf49b9be113
-Zlib.v1.2.12+3.x86_64-linux-gnu.tar.gz/md5/25df63b9e6cbef14b0f0bf2a9eec5d14
-Zlib.v1.2.12+3.x86_64-linux-gnu.tar.gz/sha512/2660b762d816491e6b877020d8dd4a1cf1b171d6232dd5e0f47c6ee7b15504b006cc8f051434df778e0910130ef7456e30d531464470d3c4a2502e8f9fd19e76
-Zlib.v1.2.12+3.x86_64-linux-musl.tar.gz/md5/3f0c85d248711608141046d15b2da339
-Zlib.v1.2.12+3.x86_64-linux-musl.tar.gz/sha512/e4256b1b9520d5b0d97fa7e7ca6f6b9aa2583c6e5f14967392d54e48f27e242461f77e522743b229ab9b333eec5fd51f6d7b1559b566bd68ca0741b05b96df3c
-Zlib.v1.2.12+3.x86_64-unknown-freebsd.tar.gz/md5/e67dae1456645930c9e2b2fef6f805c8
-Zlib.v1.2.12+3.x86_64-unknown-freebsd.tar.gz/sha512/5915ec48ae80be829c36a71e2ce580d2d14b7a9824c8f279ad5c69fea62d9a03345b665f224b9dde0bc4b808af246f89ec4f932d47a14236bc3b7db7651e5bec
-Zlib.v1.2.12+3.x86_64-w64-mingw32.tar.gz/md5/89b152b3de0068c7c2580b87ad529ed3
-Zlib.v1.2.12+3.x86_64-w64-mingw32.tar.gz/sha512/df4b585f6501f45bc85e8d00c1b03c482d70d3491081246f9e9f9560f90c5f6057b1174a81e653f725209323cd743cf05d3e1aba1385afd26cb6f8c50186f818
-zlib-21767c654d31d2dccdde4330529775c6c5fd5389.tar.gz/md5/1fb2320f871561306bc87b3894727b45
-zlib-21767c654d31d2dccdde4330529775c6c5fd5389.tar.gz/sha512/2ad1e728f97a81b65d24fe5bef66658c94222d717a3486a0d11682b61563d7eaaa578f7457078881e8ed8c91b87aec11634d4a64021546e23a3ecabb3285197a
+Zlib.v1.2.13+1.aarch64-apple-darwin.tar.gz/md5/bc44b2016065fb20cbd639b3cd5dbb88
+Zlib.v1.2.13+1.aarch64-apple-darwin.tar.gz/sha512/9cfecc16a29b0a13282846ed7d4c17c420b3f62379777d3fac61a8c9c4eeaf4214b826cd9f7479f480e951617b22c96e6ca2976a709345e16fbe7f81e9bdd83f
+Zlib.v1.2.13+1.aarch64-linux-gnu.tar.gz/md5/a2d3265543017db03bc47b9d9778d99d
+Zlib.v1.2.13+1.aarch64-linux-gnu.tar.gz/sha512/c8143445222e151d7f522a98ee8f2742571542f4e71d515e88086c9d7f27b952662ced93f40c795e0de42e3a07c0cb5e1d9d8e792347f3c068cb07ccc144a640
+Zlib.v1.2.13+1.aarch64-linux-musl.tar.gz/md5/c1f2a1c562f72c7aa4b228f57c2346d4
+Zlib.v1.2.13+1.aarch64-linux-musl.tar.gz/sha512/7ed89bc7696690c03617c7413f5456ff5a1caa0dd600880ae67132f6c9190672ae451a06d23956a1969be00bf5c8f29bfa4f5bc4ab646b3b375c350f67c993e5
+Zlib.v1.2.13+1.armv6l-linux-gnueabihf.tar.gz/md5/7dff966f7bc5dd2902fa9ce20444235b
+Zlib.v1.2.13+1.armv6l-linux-gnueabihf.tar.gz/sha512/49e7b4a7c84996b697cf944b11ce06ce6064983a6a911c4539587385afa1e0119e3b1dbf816703a2c132acc90f7f114ec10631647638b59b14954382c1a82014
+Zlib.v1.2.13+1.armv6l-linux-musleabihf.tar.gz/md5/6982f19d2446559c0fd369afe84ebe4a
+Zlib.v1.2.13+1.armv6l-linux-musleabihf.tar.gz/sha512/8f69dfb7fb91cd6f7c934e1acddd83f77c2ebcc1732553f41ae1adcb7805a3304d16062133ce5094a8aea18ff5eca5f7a2df5724ae5a5cb9137caee732c1bf36
+Zlib.v1.2.13+1.armv7l-linux-gnueabihf.tar.gz/md5/30579a91f8f1c96752fe9a82bc053523
+Zlib.v1.2.13+1.armv7l-linux-gnueabihf.tar.gz/sha512/64f6a0e66ee13b086609e0d070c8742de20052e1ef43da201be0007e478c65b2f0a28a3c19ca5be6537b7c8bbeb6a4b2886c15a1e47bb2bd1cfe9d5e1590a620
+Zlib.v1.2.13+1.armv7l-linux-musleabihf.tar.gz/md5/b052ad151dbc3bad78762bc06164d667
+Zlib.v1.2.13+1.armv7l-linux-musleabihf.tar.gz/sha512/b5d2de09a4d65d898cf9ba0db34327c712f42a78cd1fd0f1d77fd8798910502049be63ccfed23de5fe3b499d9e0fe3d4cbb07c72765fd54db275e92f8f1e4dc4
+Zlib.v1.2.13+1.i686-linux-gnu.tar.gz/md5/3074702010889f586b43aa3dbbda4ceb
+Zlib.v1.2.13+1.i686-linux-gnu.tar.gz/sha512/92aa87c5aa3831155305276c2f0da091b5be4e8a396772e1a28650c2837ceb116dd2207329732b653a97c011abd7dd6ac1fc9574ac64cb3049ccd36fa6700748
+Zlib.v1.2.13+1.i686-linux-musl.tar.gz/md5/eff02476825ea7a53ab26b346d58f96e
+Zlib.v1.2.13+1.i686-linux-musl.tar.gz/sha512/14b72607d524948198e999e3919ee01046c049b3ec441bc581c77642cf37c3d28cc3c5500a3c073d62e9b8dc1efc9661b23bb925ed9c80b5e69abaddbcb59115
+Zlib.v1.2.13+1.i686-w64-mingw32.tar.gz/md5/138cb27334b8f6f9e818131ac394bf43
+Zlib.v1.2.13+1.i686-w64-mingw32.tar.gz/sha512/07fbf4a21f6cb5a6120be253e5769b8bbdf60658f9f3705222307fbe203d8710de59fd3dab7a35714ebe1a7385600d4e1b01eae0b1addca47f9d8d862173e667
+Zlib.v1.2.13+1.powerpc64le-linux-gnu.tar.gz/md5/bc69de101d9159b22b7a334e2700faa6
+Zlib.v1.2.13+1.powerpc64le-linux-gnu.tar.gz/sha512/174eb4f154594d268d970d23eb6144dd2f6be41ddcfb9bc756b2ff48f0781ad0ed6571e2ead64dab0967da91517a02cd8db2b0e33a0bde9400103b5204f78e85
+Zlib.v1.2.13+1.x86_64-apple-darwin.tar.gz/md5/60279d648bce4801cd0e311ea95a6481
+Zlib.v1.2.13+1.x86_64-apple-darwin.tar.gz/sha512/921fc557317f87012d76f5d2cb0a7bbed29cdfdb2274ed6d37577f8e99dda2afb2a8dd4725d720eb8fb0a93c0d3afe68dd54fdd3a6e7cb07c15607a8aed72f82
+Zlib.v1.2.13+1.x86_64-linux-gnu.tar.gz/md5/b192d547d56124262e2ae744f385efd6
+Zlib.v1.2.13+1.x86_64-linux-gnu.tar.gz/sha512/c6dca3c0a713ef2e2296bc9e9afa75e103a4cc4f00b5c905ebc5cff688904d6a454f83ab5ef3b6c66bdf425daa2fcd25825e50a3534c0ff109b13affbb686179
+Zlib.v1.2.13+1.x86_64-linux-musl.tar.gz/md5/f2a466b38b2ff1c895f630982147a950
+Zlib.v1.2.13+1.x86_64-linux-musl.tar.gz/sha512/191261d37fc501591005bf680d76bf518da261252456c4fef1c12bc572f9200a855fbd1b125bb8ad10d803eedbc53d4c9d7a2861e9a35d629fb40f87e5306f5f
+Zlib.v1.2.13+1.x86_64-unknown-freebsd.tar.gz/md5/36e53efdafdb8b8e1fb18817ea40c9ab
+Zlib.v1.2.13+1.x86_64-unknown-freebsd.tar.gz/sha512/3067eace2a46b45c071dd1d2c046ab21e3f4a34b87346905bf4c00ef4ea57f41c4c30e32cbd5d4b60a560fa45aeeba7b0ce95566c0889f06f00f7a25de771cb1
+Zlib.v1.2.13+1.x86_64-w64-mingw32.tar.gz/md5/4c14730c6e89a3b05dcf352007f9c1e5
+Zlib.v1.2.13+1.x86_64-w64-mingw32.tar.gz/sha512/b6fbfe93d0c4fc6ebb740dbe0aebaa31aa5ecf352589452f6baac3ee28514531a1d0de9795634f97774ebb492dd23dee9f7865c2b8ba3f70c7f03cdc5430e85a
+zlib-04f42ceca40f73e2978b50e93806c2a18c1281fc.tar.gz/md5/60a49c89b9409dd91c1b039266f7bd0c
+zlib-04f42ceca40f73e2978b50e93806c2a18c1281fc.tar.gz/sha512/83122539da9399ce5f51c2ecbc38a627405334a9a6d53a024341353c1263a1e3aef7498f30ee281a49b3022be70e992eae475691e33da7a9c6a59b83207bd688
diff --git a/deps/clang.version b/deps/clang.version
new file mode 100644
index 0000000000000..6e50e2413f622
--- /dev/null
+++ b/deps/clang.version
@@ -0,0 +1,4 @@
+## jll artifact
+# Clang (paired with LLVM, only here as a JLL download)
+CLANG_JLL_NAME := Clang
+CLANG_JLL_VER  := 15.0.7+10
diff --git a/deps/csl.mk b/deps/csl.mk
index e3f84aa98974d..aaebc8f50c053 100644
--- a/deps/csl.mk
+++ b/deps/csl.mk
@@ -12,17 +12,16 @@ endef
 
 # CSL bundles lots of system compiler libraries, and while it is quite bleeding-edge
 # as compared to what most distros ship, if someone tries to build an older branch,
-# the version of CSL that ships with that branch may become relatively old.  This is
-# not a problem for code that is built in BB, but when we build Julia with the system
+# the version of CSL that ships with that branch may be relatively old. This is not
+# a problem for code that is built in BB, but when we build Julia with the system
 # compiler, that compiler uses the version of `libstdc++` that it is bundled with,
-# and we can get linker errors when trying to run that `julia` executable with the
+# and we can get linker errors when trying to run that 	`julia` executable with the
 # `libstdc++` that comes from the (now old) BB-built CSL.
 #
 # To fix this, we take note when the system `libstdc++.so` is newer than whatever we
 # would get from CSL (by searching for a `GLIBCXX_3.4.X` symbol that does not exist
 # in our CSL, but would in a newer one), and default to `USE_BINARYBUILDER_CSL=0` in
 # this case.
-CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.30|GLIBCXX_3\.5\.|GLIBCXX_4\.
 
 # First, check to see if BB is disabled on a global setting
 ifeq ($(USE_BINARYBUILDER),0)
@@ -33,8 +32,8 @@ ifeq ($(USE_SYSTEM_CSL),1)
 USE_BINARYBUILDER_CSL ?= 0
 else
 # If it's not, see if we should disable it due to `libstdc++` being newer:
-LIBSTDCXX_PATH := $(eval $(call pathsearch,libstdc++,$(STD_LIB_PATH)))
-ifneq (,$(and $(LIBSTDCXX_PATH),$(shell objdump -p $(LIBSTDCXX_PATH) | grep $(CSL_NEXT_GLIBCXX_VERSION))))
+LIBSTDCXX_PATH := $(call pathsearch,$(call versioned_libname,libstdc++,6),$(STD_LIB_PATH))
+ifneq (,$(and $(LIBSTDCXX_PATH),$(shell objdump -p '$(LIBSTDCXX_PATH)' | grep '$(CSL_NEXT_GLIBCXX_VERSION)')))
 # Found `libstdc++`, grepped it for strings and found a `GLIBCXX` symbol
 # that is newer that whatever we have in CSL.  Default to not using BB.
 USE_BINARYBUILDER_CSL ?= 0
@@ -51,8 +50,8 @@ ifeq ($(USE_BINARYBUILDER_CSL),0)
 define copy_csl
 install-csl: | $$(build_shlibdir) $$(build_shlibdir)/$(1)
 $$(build_shlibdir)/$(1): | $$(build_shlibdir)
-	-@SRC_LIB=$$(call pathsearch,$(1),$$(STD_LIB_PATH)); \
-	[ -n "$$$${SRC_LIB}" ] && cp $$$${SRC_LIB} $$(build_shlibdir)
+	-@SRC_LIB='$$(call pathsearch,$(1),$$(STD_LIB_PATH))'; \
+	[ -n "$$$${SRC_LIB}" ] && cp "$$$${SRC_LIB}" '$$(build_shlibdir)'
 endef
 
 # libgfortran has multiple names; we're just going to copy any version we can find
@@ -69,7 +68,7 @@ $(eval $(call copy_csl,$(call versioned_libname,libatomic,1)))
 $(eval $(call copy_csl,$(call versioned_libname,libgomp,1)))
 
 ifeq ($(OS),WINNT)
-# Windwos has special gcc_s names
+# Windows has special gcc_s names
 ifeq ($(ARCH),i686)
 $(eval $(call copy_csl,$(call versioned_libname,libgcc_s_sjlj,1)))
 else
@@ -105,4 +104,20 @@ distclean-csl: clean-csl
 
 else
 $(eval $(call bb-install,csl,CSL,true))
+ifeq ($(OS),WINNT)
+install-csl:
+	mkdir -p $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/13/libgcc_s.a $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/13/libgcc.a $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/13/libmsvcrt.a $(build_private_libdir)/
+	cp -a $(build_libdir)/gcc/$(BB_TRIPLET)/13/libssp.dll.a $(build_private_libdir)/
+endif
+endif
+ifeq ($(OS),WINNT)
+uninstall-csl: uninstall-gcc-libraries
+uninstall-gcc-libraries:
+	-rm -f $(build_private_libdir)/libgcc_s.a
+	-rm -f $(build_private_libdir)/libgcc.a
+	-rm -f $(build_private_libdir)/libmsvcrt.a
+	-rm -f $(build_private_libdir)/libssp.dll.a
 endif
diff --git a/deps/csl.version b/deps/csl.version
new file mode 100644
index 0000000000000..51af26c566c92
--- /dev/null
+++ b/deps/csl.version
@@ -0,0 +1,2 @@
+## jll artifact
+CSL_JLL_NAME := CompilerSupportLibraries
diff --git a/deps/curl.mk b/deps/curl.mk
index a7896c99b4669..a063dfe07fba0 100644
--- a/deps/curl.mk
+++ b/deps/curl.mk
@@ -1,4 +1,5 @@
 ## CURL ##
+include $(SRCDIR)/curl.version
 
 ifeq ($(USE_SYSTEM_LIBSSH2), 0)
 $(BUILDDIR)/curl-$(CURL_VER)/build-configured: | $(build_prefix)/manifest/libssh2
@@ -32,6 +33,17 @@ $(SRCCACHE)/curl-$(CURL_VER)/source-extracted: $(SRCCACHE)/curl-$(CURL_VER).tar.
 checksum-curl: $(SRCCACHE)/curl-$(CURL_VER).tar.bz2
 	$(JLCHECKSUM) $<
 
+## xref: https://github.com/JuliaPackaging/Yggdrasil/blob/master/L/LibCURL/common.jl
+# Disable....almost everything
+CURL_CONFIGURE_FLAGS := $(CONFIGURE_COMMON) \
+	--without-gnutls --without-libidn2 --without-librtmp \
+	--without-nss --without-libpsl --without-libgsasl --without-fish-functions-dir \
+	--disable-ares --disable-manual --disable-ldap --disable-ldaps --disable-static \
+	--without-gssapi --without-brotli
+# A few things we actually enable
+CURL_CONFIGURE_FLAGS += --enable-versioned-symbols \
+	--with-libssh2=${build_prefix} --with-zlib=${build_prefix} --with-nghttp2=${build_prefix}
+
 # We use different TLS libraries on different platforms.
 #   On Windows, we use schannel
 #   On MacOS, we use SecureTransport
@@ -43,21 +55,17 @@ CURL_TLS_CONFIGURE_FLAGS := --with-secure-transport
 else
 CURL_TLS_CONFIGURE_FLAGS := --with-mbedtls=$(build_prefix)
 endif
+CURL_CONFIGURE_FLAGS += $(CURL_TLS_CONFIGURE_FLAGS)
 
 $(BUILDDIR)/curl-$(CURL_VER)/build-configured: $(SRCCACHE)/curl-$(CURL_VER)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) --includedir=$(build_includedir) \
-		--without-ssl --without-gnutls --without-gssapi --disable-ares \
-		--without-libidn2 --without-librtmp --without-nss --without-libpsl \
-		--disable-ldap --disable-ldaps --without-zsh-functions-dir --disable-static \
-		--with-libssh2=$(build_prefix) --with-zlib=$(build_prefix) --with-nghttp2=$(build_prefix) \
-		$(CURL_TLS_CONFIGURE_FLAGS) \
+	$(dir $<)/configure $(CURL_CONFIGURE_FLAGS) \
 		CFLAGS="$(CFLAGS) $(CURL_CFLAGS)" LDFLAGS="$(LDFLAGS) $(CURL_LDFLAGS)"
 	echo 1 > $@
 
 $(BUILDDIR)/curl-$(CURL_VER)/build-compiled: $(BUILDDIR)/curl-$(CURL_VER)/build-configured
-	$(MAKE) -C $(dir $<) $(LIBTOOL_CCLD)
+	$(MAKE) -C $(dir $<) $(MAKE_COMMON)
 	echo 1 > $@
 
 $(BUILDDIR)/curl-$(CURL_VER)/build-checked: $(BUILDDIR)/curl-$(CURL_VER)/build-compiled
@@ -68,7 +76,7 @@ endif
 
 $(eval $(call staged-install, \
 	curl,curl-$$(CURL_VER), \
-	MAKE_INSTALL,$$(LIBTOOL_CCLD),, \
+	MAKE_INSTALL,,, \
 	$$(INSTALL_NAME_CMD)libcurl.$$(SHLIB_EXT) $$(build_shlibdir)/libcurl.$$(SHLIB_EXT)))
 
 clean-curl:
diff --git a/deps/curl.version b/deps/curl.version
new file mode 100644
index 0000000000000..288347a9935ed
--- /dev/null
+++ b/deps/curl.version
@@ -0,0 +1,6 @@
+# -*- makefile -*-
+## jll artifact
+CURL_JLL_NAME := LibCURL
+
+## source build
+CURL_VER := 8.4.0
diff --git a/deps/dsfmt.mk b/deps/dsfmt.mk
index 2300d0d5929f4..da57799053933 100644
--- a/deps/dsfmt.mk
+++ b/deps/dsfmt.mk
@@ -1,13 +1,17 @@
 ## DSFMT ##
+include $(SRCDIR)/dsfmt.version
 
 ifneq ($(USE_BINARYBUILDER_DSFMT),1)
 
-DSFMT_CFLAGS := $(CFLAGS) -DNDEBUG -DDSFMT_MEXP=19937 $(fPIC) -DDSFMT_DO_NOT_USE_OLD_NAMES -DDSFMT_SHLIB
+DSFMT_CFLAGS := $(CFLAGS) -DNDEBUG -DDSFMT_MEXP=19937 $(fPIC) -DDSFMT_DO_NOT_USE_OLD_NAMES -DDSFMT_SHLIB $(SANITIZE_OPTS)
 DSFMT_CFLAGS += -O3 -finline-functions -fomit-frame-pointer -fno-strict-aliasing \
-		--param max-inline-insns-single=1800 -Wall  -std=c99 -shared
+		-Wall  -std=c99 -shared
 ifeq ($(ARCH), x86_64)
 DSFMT_CFLAGS += -msse2 -DHAVE_SSE2
 endif
+ifneq ($(OS), emscripten)
+DSFMT_CFLAGS += --param max-inline-insns-single=1800
+endif
 
 $(SRCCACHE)/dsfmt-$(DSFMT_VER).tar.gz: | $(SRCCACHE)
 	$(JLDOWNLOAD) $@ https://github.com/MersenneTwister-Lab/dSFMT/archive/v$(DSFMT_VER).tar.gz
diff --git a/deps/dsfmt.version b/deps/dsfmt.version
new file mode 100644
index 0000000000000..bbb63417f46cd
--- /dev/null
+++ b/deps/dsfmt.version
@@ -0,0 +1,5 @@
+## jll artifact
+DSFMT_JLL_NAME := dSFMT
+
+## source build
+DSFMT_VER := 2.2.4
diff --git a/deps/gmp.mk b/deps/gmp.mk
index 66ad92ac910ef..491d649e9202f 100644
--- a/deps/gmp.mk
+++ b/deps/gmp.mk
@@ -1,4 +1,15 @@
 ## GMP ##
+include $(SRCDIR)/gmp.version
+
+ifneq ($(USE_BINARYBUILDER_GMP),1)
+
+GMP_CONFIGURE_OPTS := $(CONFIGURE_COMMON)
+GMP_CONFIGURE_OPTS += --enable-cxx --enable-shared --disable-static
+GMP_CONFIGURE_OPTS += CC_FOR_BUILD="$(HOSTCC)"
+
+ifeq ($(BUILD_ARCH),x86_64)
+GMP_CONFIGURE_OPTS += --enable-fat
+endif
 
 ifeq ($(SANITIZE),1)
 GMP_CONFIGURE_OPTS += --disable-assembly
@@ -8,7 +19,9 @@ ifeq ($(BUILD_OS),WINNT)
 GMP_CONFIGURE_OPTS += --srcdir="$(subst \,/,$(call mingw_to_dos,$(SRCCACHE)/gmp-$(GMP_VER)))"
 endif
 
-ifneq ($(USE_BINARYBUILDER_GMP),1)
+ifeq ($(OS),emscripten)
+GMP_CONFIGURE_OPTS += CFLAGS="-fPIC"
+endif
 
 $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2: | $(SRCCACHE)
 	$(JLDOWNLOAD) $@ https://gmplib.org/download/gmp/$(notdir $@)
@@ -26,56 +39,53 @@ checksum-gmp: $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2
 # Necessary for version 6.2.1, remove after next gmp release
 $(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted
 	cd $(dir $@) && \
-		patch -p1 < $(SRCDIR)/patches/gmp-HG-changeset.patch
+		patch -p1 -f < $(SRCDIR)/patches/gmp-HG-changeset.patch
 	echo 1 > $@
 
 $(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied
 	cd $(dir $@) && \
-		patch -p1 < $(SRCDIR)/patches/gmp-exception.patch
+		patch -p1 -f < $(SRCDIR)/patches/gmp-exception.patch
 	echo 1 > $@
 
 $(SRCCACHE)/gmp-$(GMP_VER)/gmp_alloc_overflow_func.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied
 	cd $(dir $@) && \
-		patch -p1 < $(SRCDIR)/patches/gmp_alloc_overflow_func.patch
+		patch -p1 -f < $(SRCDIR)/patches/gmp_alloc_overflow_func.patch
 	echo 1 > $@
 
 $(SRCCACHE)/gmp-$(GMP_VER)/gmp-CVE-2021-43618.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp_alloc_overflow_func.patch-applied
 	cd $(dir $@) && \
-		patch -p1 < $(SRCDIR)/patches/gmp-CVE-2021-43618.patch
+		patch -p1 -f < $(SRCDIR)/patches/gmp-CVE-2021-43618.patch
 	echo 1 > $@
 
-$(SRCCACHE)/gmp-$(GMP_VER)/source-patched: \
-	$(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied \
-	$(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied \
-	$(SRCCACHE)/gmp-$(GMP_VER)/gmp_alloc_overflow_func.patch-applied \
-	$(SRCCACHE)/gmp-$(GMP_VER)/gmp-CVE-2021-43618.patch-applied
+$(SRCCACHE)/gmp-$(GMP_VER)/gmp-more_alloc_overflow.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-CVE-2021-43618.patch-applied
+	cd $(dir $@) && \
+		patch -p1 -f < $(SRCDIR)/patches/gmp-more_alloc_overflow.patch
 	echo 1 > $@
 
-$(BUILDDIR)/gmp-$(GMP_VER)/build-configured: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted $(SRCCACHE)/gmp-$(GMP_VER)/source-patched
+$(SRCCACHE)/gmp-$(GMP_VER)/source-patched: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-more_alloc_overflow.patch-applied
+	echo 1 > $@
+
+$(BUILDDIR)/gmp-$(GMP_VER)/build-configured: $(SRCCACHE)/gmp-$(GMP_VER)/source-patched
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) F77= --enable-cxx --enable-shared --disable-static $(GMP_CONFIGURE_OPTS)
+	$(dir $<)/configure $(GMP_CONFIGURE_OPTS)
 	echo 1 > $@
 
 $(BUILDDIR)/gmp-$(GMP_VER)/build-compiled: $(BUILDDIR)/gmp-$(GMP_VER)/build-configured
-	$(MAKE) -C $(dir $<) $(LIBTOOL_CCLD)
+	$(MAKE) -C $(dir $<)
 	echo 1 > $@
 
 $(BUILDDIR)/gmp-$(GMP_VER)/build-checked: $(BUILDDIR)/gmp-$(GMP_VER)/build-compiled
 ifeq ($(OS),$(BUILD_OS))
-	$(MAKE) -C $(dir $@) $(LIBTOOL_CCLD) check
+	$(MAKE) -C $(dir $@) check
 endif
 	echo 1 > $@
 
-define GMP_INSTALL
-	mkdir -p $2/$(build_shlibdir) $2/$(build_includedir)
-	$(INSTALL_M) $1/.libs/libgmp*$(SHLIB_EXT)* $2/$(build_shlibdir)
-	$(INSTALL_F) $1/gmp.h $2/$(build_includedir)
-endef
 $(eval $(call staged-install, \
 	gmp,gmp-$(GMP_VER), \
-	GMP_INSTALL,,, \
-	$$(INSTALL_NAME_CMD)libgmp.$$(SHLIB_EXT) $$(build_shlibdir)/libgmp.$$(SHLIB_EXT)))
+	MAKE_INSTALL,,, \
+	$$(WIN_MAKE_HARD_LINK) $(build_bindir)/libgmp-*.dll $(build_bindir)/libgmp.dll && \
+		$$(INSTALL_NAME_CMD)libgmp.$$(SHLIB_EXT) $$(build_shlibdir)/libgmp.$$(SHLIB_EXT)))
 
 clean-gmp:
 	-rm -f $(BUILDDIR)/gmp-$(GMP_VER)/build-configured $(BUILDDIR)/gmp-$(GMP_VER)/build-compiled
@@ -96,4 +106,5 @@ check-gmp: $(BUILDDIR)/gmp-$(GMP_VER)/build-checked
 else # USE_BINARYBUILDER_GMP
 
 $(eval $(call bb-install,gmp,GMP,false,true))
+
 endif
diff --git a/deps/gmp.version b/deps/gmp.version
new file mode 100644
index 0000000000000..f77cac5906cea
--- /dev/null
+++ b/deps/gmp.version
@@ -0,0 +1,5 @@
+## jll artifact
+GMP_JLL_NAME := GMP
+
+## source build
+GMP_VER := 6.2.1
diff --git a/deps/ittapi.mk b/deps/ittapi.mk
new file mode 100644
index 0000000000000..1a47c3ae89390
--- /dev/null
+++ b/deps/ittapi.mk
@@ -0,0 +1,43 @@
+## ittapi ##
+include $(SRCDIR)/ittapi.version
+
+ITTAPI_GIT_URL := https://github.com/intel/ittapi.git
+ITTAPI_TAR_URL = https://api.github.com/repos/intel/ittapi/tarball/$1
+$(eval $(call git-external,ittapi,ITTAPI,CMakeLists.txt,,$(SRCCACHE)))
+
+ITTAPI_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=Release -DITT_API_IPT_SUPPORT= -DITT_API_FORTRAN_SUPPORT=0
+
+$(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-configured: $(SRCCACHE)/$(ITTAPI_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	cd $(dir $@) && \
+	$(CMAKE) $(dir $<) $(ITTAPI_OPTS)
+	echo 1 > $@
+
+$(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-compiled: $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-configured
+	$(MAKE) -C $(dir $<)
+	echo 1 > $@
+
+define ITTAPI_INSTALL
+	mkdir -p $2/$$(build_libdir)
+	mkdir -p $2/$$(build_includedir)/ittapi
+	cp -a $1/bin/libittnotify.a $2/$$(build_libdir)
+	cp -a $1/bin/libjitprofiling.a $2/$$(build_libdir)
+	# cp -a $1/bin/libadvisor.a $2/$$(build_libdir)
+	cp -a $(SRCCACHE)/$(ITTAPI_SRC_DIR)/include/ittnotify.h $2/$$(build_includedir)/ittapi/
+	cp -a $(SRCCACHE)/$(ITTAPI_SRC_DIR)/include/ittnotify-zca.h $2/$$(build_includedir)/ittapi/
+	cp -a $(SRCCACHE)/$(ITTAPI_SRC_DIR)/include/jitprofiling.h $2/$$(build_includedir)/ittapi/
+endef
+
+$(eval $(call staged-install, \
+	ittapi,$(ITTAPI_SRC_DIR), \
+	ITTAPI_INSTALL,,,))
+
+get-ittapi: $(ITTAPI_SRC_FILE)
+extract-ittapi: $(SRCCACHE)/$(ITTAPI_SRC_DIR)/source-extracted
+configure-ittapi: $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-configured
+compile-ittapi: $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-compiled
+fastcheck-ittapi: #none
+check-ittapi: #none
+
+clean-ittapi:
+	-rm -f $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-compiled $(build_libdir)/libopenlibm.a
diff --git a/deps/ittapi.version b/deps/ittapi.version
new file mode 100644
index 0000000000000..81afb6de2add2
--- /dev/null
+++ b/deps/ittapi.version
@@ -0,0 +1,3 @@
+## source build
+ITTAPI_BRANCH=v3.24.0
+ITTAPI_SHA1=0014aec56fea2f30c1374f40861e1bccdd53d0cb
diff --git a/deps/libgit2.mk b/deps/libgit2.mk
index 14a0287df6733..d68a7a80d6d5b 100644
--- a/deps/libgit2.mk
+++ b/deps/libgit2.mk
@@ -13,9 +13,14 @@ ifeq ($(USE_SYSTEM_MBEDTLS), 0)
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/mbedtls
 endif
 
-LIBGIT2_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=Release -DUSE_THREADS=ON -DUSE_BUNDLED_ZLIB=ON -DUSE_SSH=ON
+LIBGIT2_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=Release -DUSE_THREADS=ON -DUSE_BUNDLED_ZLIB=ON -DUSE_SSH=ON -DBUILD_CLI=OFF
 ifeq ($(OS),WINNT)
 LIBGIT2_OPTS += -DWIN32=ON -DMINGW=ON
+ifeq ($(USE_SYSTEM_LIBSSH2), 0)
+LIBGIT2_OPTS += -DLIBSSH2_LIBRARIES=libssh2.dll
+LIBGIT2_OPTS += -DLIBSSH2_LIBRARY_DIRS=$(build_prefix)/lib
+LIBGIT2_OPTS += -DLIBSSH2_INCLUDE_DIRS=$(build_prefix)/include
+endif # USE_SYSTEM_LIBSSH2=0
 ifneq ($(ARCH),x86_64)
 ifneq ($(USECLANG),1)
 LIBGIT2_OPTS += -DCMAKE_C_FLAGS="-mincoming-stack-boundary=2"
@@ -24,7 +29,7 @@ endif
 ifeq ($(BUILD_OS),WINNT)
 LIBGIT2_OPTS += -G"MSYS Makefiles"
 else
-LIBGIT2_OPTS += -DBUILD_CLAR=OFF -DDLLTOOL=`which $(CROSS_COMPILE)dlltool`
+LIBGIT2_OPTS += -DBUILD_TESTS=OFF -DDLLTOOL=`which $(CROSS_COMPILE)dlltool`
 LIBGIT2_OPTS += -DCMAKE_FIND_ROOT_PATH=/usr/$(XC_HOST) -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY
 endif
 endif
@@ -35,23 +40,6 @@ endif
 
 LIBGIT2_SRC_PATH := $(SRCCACHE)/$(LIBGIT2_SRC_DIR)
 
-$(LIBGIT2_SRC_PATH)/libgit2-agent-nonfatal.patch-applied: $(LIBGIT2_SRC_PATH)/source-extracted
-	cd $(LIBGIT2_SRC_PATH) && \
-		patch -p1 -f < $(SRCDIR)/patches/libgit2-agent-nonfatal.patch
-	echo 1 > $@
-
-$(LIBGIT2_SRC_PATH)/libgit2-hostkey.patch-applied: $(LIBGIT2_SRC_PATH)/libgit2-agent-nonfatal.patch-applied
-	cd $(LIBGIT2_SRC_PATH) && \
-		patch -p1 -f < $(SRCDIR)/patches/libgit2-hostkey.patch
-	echo 1 > $@
-
-$(LIBGIT2_SRC_PATH)/libgit2-win32-ownership.patch-applied: $(LIBGIT2_SRC_PATH)/libgit2-hostkey.patch-applied
-	cd $(LIBGIT2_SRC_PATH) && \
-		patch -p1 -f < $(SRCDIR)/patches/libgit2-win32-ownership.patch
-	echo 1 > $@
-
-$(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: $(LIBGIT2_SRC_PATH)/libgit2-win32-ownership.patch-applied
-
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: $(LIBGIT2_SRC_PATH)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
diff --git a/deps/libgit2.version b/deps/libgit2.version
index 4efb6ada585df..9bd56f1bd0001 100644
--- a/deps/libgit2.version
+++ b/deps/libgit2.version
@@ -1,2 +1,13 @@
-LIBGIT2_BRANCH=v1.4.3
-LIBGIT2_SHA1=465bbf88ea939a965fbcbade72870c61f815e457
+# -*- makefile -*-
+## jll artifact
+LIBGIT2_JLL_NAME := LibGit2
+
+## source build
+LIBGIT2_BRANCH=v1.7.1
+LIBGIT2_SHA1=a2bde63741977ca0f4ef7db2f609df320be67a08
+
+## Other deps
+# Specify the version of the Mozilla CA Certificate Store to obtain.
+# The versions of cacert.pem are identified by the date (YYYY-MM-DD) of their changes.
+# See https://curl.haxx.se/docs/caextract.html for more details.
+MOZILLA_CACERT_VERSION := 2023-01-10
diff --git a/deps/libssh2.mk b/deps/libssh2.mk
index e27a57a4078d1..3f9738515e4a1 100644
--- a/deps/libssh2.mk
+++ b/deps/libssh2.mk
@@ -30,14 +30,13 @@ endif
 
 LIBSSH2_SRC_PATH := $(SRCCACHE)/$(LIBSSH2_SRC_DIR)
 
- # Apply patch to fix v1.10.0 CVE (https://github.com/libssh2/libssh2/issues/649), drop with v1.11
-$(LIBSSH2_SRC_PATH)/libssh2-userauth-check.patch-applied: $(LIBSSH2_SRC_PATH)/source-extracted
+$(LIBSSH2_SRC_PATH)/libssh2-mbedtls-size_t.patch-applied: $(LIBSSH2_SRC_PATH)/source-extracted
 	cd $(LIBSSH2_SRC_PATH) && \
-		patch -p1 -f < $(SRCDIR)/patches/libssh2-userauth-check.patch
+		patch -p1 -f < $(SRCDIR)/patches/libssh2-mbedtls-size_t.patch
 	echo 1 > $@
 
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: \
-	$(LIBSSH2_SRC_PATH)/libssh2-userauth-check.patch-applied
+	$(LIBSSH2_SRC_PATH)/libssh2-mbedtls-size_t.patch-applied
 
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: $(LIBSSH2_SRC_PATH)/source-extracted
 	mkdir -p $(dir $@)
@@ -46,7 +45,7 @@ $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: $(LIBSSH2_SRC_PATH)/source-extr
 	echo 1 > $@
 
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-compiled: $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured
-	$(MAKE) -C $(dir $<) libssh2
+	$(MAKE) -C $(dir $<)
 	echo 1 > $@
 
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-checked: $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-compiled
diff --git a/deps/libssh2.version b/deps/libssh2.version
index 1c4d5412c0c09..7fff90885f6a3 100644
--- a/deps/libssh2.version
+++ b/deps/libssh2.version
@@ -1,2 +1,7 @@
-LIBSSH2_BRANCH=libssh2-1.10.0
-LIBSSH2_SHA1=635caa90787220ac3773c1d5ba11f1236c22eae8
+## jll artifact
+LIBSSH2_JLL_NAME := LibSSH2
+
+## source build
+LIBSSH2_VER := 1.11.0
+LIBSSH2_BRANCH=libssh2-1.11.0
+LIBSSH2_SHA1=1c3f1b7da588f2652260285529ec3c1f1125eb4e
diff --git a/deps/libsuitesparse.mk b/deps/libsuitesparse.mk
index a1c0b067e6634..fa1eda94b5c4f 100644
--- a/deps/libsuitesparse.mk
+++ b/deps/libsuitesparse.mk
@@ -1,38 +1,31 @@
 ## LIBSUITESPARSE ##
-
-ifeq ($(USE_BLAS64), 1)
-UMFPACK_CONFIG := -DLONGBLAS='long long'
-CHOLMOD_CONFIG := -DLONGBLAS='long long'
-SPQR_CONFIG := -DLONGBLAS='long long'
-UMFPACK_CONFIG += -DSUN64
-CHOLMOD_CONFIG += -DSUN64
-SPQR_CONFIG += -DSUN64
-endif
-
-# Disable linking to libmetis
-CHOLMOD_CONFIG += -DNPARTITION
+include $(SRCDIR)/libsuitesparse.version
 
 ifneq ($(USE_BINARYBUILDER_LIBSUITESPARSE), 1)
 
 LIBSUITESPARSE_PROJECTS := AMD BTF CAMD CCOLAMD COLAMD CHOLMOD LDL KLU UMFPACK RBio SPQR
 LIBSUITESPARSE_LIBS := $(addsuffix .*$(SHLIB_EXT)*,suitesparseconfig amd btf camd ccolamd colamd cholmod klu ldl umfpack rbio spqr)
 
-SUITESPARSE_LIB := $(LDFLAGS) -L"$(abspath $(BUILDDIR))/SuiteSparse-$(LIBSUITESPARSE_VER)/lib"
-ifeq ($(OS), Darwin)
-SUITESPARSE_LIB += $(RPATH_ESCAPED_ORIGIN)
-endif
-LIBSUITESPARSE_MFLAGS := CC="$(CC)" CXX="$(CXX)" F77="$(FC)" \
-	  AR="$(AR)" RANLIB="$(RANLIB)" \
-	  BLAS="-L$(build_shlibdir) -lblastrampoline" \
-	  LAPACK="-L$(build_shlibdir) -lblastrampoline" \
-	  LDFLAGS="$(SUITESPARSE_LIB)" CFOPENMP="" CUDA=no CUDA_PATH="" \
-	  UMFPACK_CONFIG="$(UMFPACK_CONFIG)" \
-	  CHOLMOD_CONFIG="$(CHOLMOD_CONFIG)" \
-	  SPQR_CONFIG="$(SPQR_CONFIG)"
-ifeq ($(OS),WINNT)
-LIBSUITESPARSE_MFLAGS += UNAME=Windows
-else
-LIBSUITESPARSE_MFLAGS += UNAME=$(OS)
+LIBSUITESPARSE_CMAKE_FLAGS := $(CMAKE_COMMON) \
+	  -DCMAKE_BUILD_TYPE=Release \
+	  -DENABLE_CUDA=0 \
+	  -DNFORTRAN=1 \
+	  -DNOPENMP=1 \
+	  -DNPARTITION=0 \
+	  -DNSTATIC=1 \
+	  -DBLAS_FOUND=1 \
+	  -DBLAS_LIBRARIES="$(build_shlibdir)/libblastrampoline.$(SHLIB_EXT)" \
+	  -DBLAS_LINKER_FLAGS="blastrampoline" \
+	  -DBLAS_UNDERSCORE=ON \
+	  -DBLA_VENDOR="blastrampoline" \
+	  -DBLAS64_SUFFIX="_64" \
+	  -DALLOW_64BIT_BLAS=ON \
+	  -DLAPACK_FOUND=1 \
+	  -DLAPACK_LIBRARIES="$(build_shlibdir)/libblastrampoline.$(SHLIB_EXT)" \
+	  -DLAPACK_LINKER_FLAGS="blastrampoline"
+
+ifneq (,$(findstring $(OS),Linux FreeBSD))
+LIBSUITESPARSE_CMAKE_FLAGS += -DCMAKE_INSTALL_RPATH="\$$ORIGIN"
 endif
 
 $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz: | $(SRCCACHE)
@@ -47,19 +40,16 @@ $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted: $(SRCCACHE)/Suit
 checksum-libsuitesparse: $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz
 	$(JLCHECKSUM) $<
 
-$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/SuiteSparse-shlib.patch-applied: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
-	cd $(dir $@) && patch -p1 < $(SRCDIR)/patches/SuiteSparse-shlib.patch
-	echo 1 > $@
-$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/SuiteSparse-shlib.patch-applied
-
 $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: | $(build_prefix)/manifest/blastrampoline
 
 $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
-	$(MAKE) -C $(dir $<)SuiteSparse_config library config $(LIBSUITESPARSE_MFLAGS)
-	$(INSTALL_NAME_CMD)libsuitesparseconfig.$(SHLIB_EXT) $(dir $<)lib/libsuitesparseconfig.$(SHLIB_EXT)
-	for PROJ in $(LIBSUITESPARSE_PROJECTS); do \
-		$(MAKE) -C $(dir $<)$${PROJ} library $(LIBSUITESPARSE_MFLAGS) || exit 1; \
-		$(INSTALL_NAME_CMD)lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) $(dir $<)lib/lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) || exit 1; \
+	cd $(dir $<); \
+	for PROJ in SuiteSparse_config $(LIBSUITESPARSE_PROJECTS); do \
+		cd $${PROJ}/build || exit 1; \
+		$(CMAKE) .. $(LIBSUITESPARSE_CMAKE_FLAGS) || exit 1; \
+		make || exit 1; \
+		make install || exit 1; \
+		cd ../..; \
 	done
 	echo 1 > $@
 
@@ -74,14 +64,9 @@ $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-checked: $(BUILDDIR)/SuiteSp
 	done
 	echo 1 > $@
 
-UNINSTALL_suitesparse := $(LIBSUITESPARSE_VER) manual_suitesparse $(LIBSUITESPARSE_LIBS)
+UNINSTALL_libsuitesparse := $(LIBSUITESPARSE_VER) manual_libsuitesparse $(LIBSUITESPARSE_LIBS)
 
 $(build_prefix)/manifest/libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled | $(build_prefix)/manifest $(build_shlibdir)
-	for lib in $(LIBSUITESPARSE_LIBS); do \
-		cp -a $(dir $<)lib/lib$${lib} $(build_shlibdir) || exit 1; \
-	done
-	#cp -a $(dir $<)lib/* $(build_shlibdir)
-	#cp -a $(dir $<)include/* $(build_includedir)
 	echo $(UNINSTALL_libsuitesparse) > $@
 
 clean-libsuitesparse: uninstall-libsuitesparse
diff --git a/deps/libsuitesparse.version b/deps/libsuitesparse.version
new file mode 100644
index 0000000000000..eea10d4f2beb8
--- /dev/null
+++ b/deps/libsuitesparse.version
@@ -0,0 +1,6 @@
+## jll artifact
+LIBSUITESPARSE_JLL_NAME := SuiteSparse
+
+## source build
+LIBSUITESPARSE_VER := 7.2.1
+LIBSUITESPARSE_SHA1=d6c84f7416eaee0d23d61c6c49ad1b73235d2ea2
diff --git a/deps/libtracyclient.mk b/deps/libtracyclient.mk
new file mode 100644
index 0000000000000..92d6bee4caea6
--- /dev/null
+++ b/deps/libtracyclient.mk
@@ -0,0 +1,83 @@
+## LIBTRACYCLIENT ##
+ifneq ($(USE_BINARYBUILDER_LIBTRACYCLIENT),1)
+LIBTRACYCLIENT_GIT_URL:=https://github.com/wolfpld/tracy.git
+LIBTRACYCLIENT_TAR_URL=https://api.github.com/repos/wolfpld/tracy/tarball/$1
+$(eval $(call git-external,libtracyclient,LIBTRACYCLIENT,,,$(BUILDDIR)))
+
+LIBTRACYCLIENT_BUILDDIR := $(BUILDDIR)/$(LIBTRACYCLIENT_SRC_DIR)
+LIBTRACYCLIENT_SRCCACHE := $(SRCCACHE)/$(LIBTRACYCLIENT_SRC_DIR)
+
+LIBTRACYCLIENT_CMAKE :=
+LIBTRACYCLIENT_CMAKE += -DBUILD_SHARED_LIBS=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_FIBERS=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_ONLY_LOCALHOST=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_NO_CODE_TRANSFER=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_NO_FRAME_IMAGE=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_NO_CRASH_HANDLER=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_ON_DEMAND=ON
+LIBTRACYCLIENT_CMAKE += -DTRACY_TIMER_FALLBACK=ON
+
+ifeq ($(WITH_TRACY_CALLSTACKS),1)
+LIBTRACYCLIENT_CMAKE += -DTRACY_CALLSTACK=32
+else
+LIBTRACYCLIENT_CMAKE += -DTRACY_NO_SAMPLING=ON
+endif
+
+$(LIBTRACYCLIENT_BUILDDIR)/cmake-patch-applied: $(LIBTRACYCLIENT_BUILDDIR)/source-extracted
+ifneq ($(OS),WINNT)
+	echo "target_compile_definitions(TracyClient PUBLIC __STDC_FORMAT_MACROS)" >> $(LIBTRACYCLIENT_BUILDDIR)/CMakeLists.txt
+else
+	echo "target_compile_definitions(TracyClient PUBLIC WINVER=0x0602 _WIN32_WINNT=0x0602)" >> $(LIBTRACYCLIENT_BUILDDIR)/CMakeLists.txt
+endif
+	echo 1 > $@
+
+$(LIBTRACYCLIENT_BUILDDIR)/libTracyClient-freebsd-elfw.patch-applied: $(LIBTRACYCLIENT_BUILDDIR)/cmake-patch-applied
+	cd $(LIBTRACYCLIENT_BUILDDIR) && \
+		patch -p1 -f < $(SRCDIR)/patches/libTracyClient-freebsd-elfw.patch
+	echo 1 > $@
+
+$(LIBTRACYCLIENT_BUILDDIR)/libTracyClient-no-sampling.patch-applied: $(LIBTRACYCLIENT_BUILDDIR)/libTracyClient-freebsd-elfw.patch-applied
+	cd $(LIBTRACYCLIENT_BUILDDIR) && \
+		patch -p1 -f < $(SRCDIR)/patches/libTracyClient-no-sampling.patch
+	echo 1 > $@
+
+$(LIBTRACYCLIENT_BUILDDIR)/libTracyClient-plot-config.patch-applied: $(LIBTRACYCLIENT_BUILDDIR)/libTracyClient-no-sampling.patch-applied
+	cd $(LIBTRACYCLIENT_BUILDDIR) && \
+		patch -p1 -f < $(SRCDIR)/patches/libTracyClient-plot-config.patch
+	echo 1 > $@
+
+$(LIBTRACYCLIENT_BUILDDIR)/build-configured: $(LIBTRACYCLIENT_BUILDDIR)/libTracyClient-plot-config.patch-applied
+	mkdir -p $(dir $@)
+	cd $(dir $@) && \
+		$(CMAKE) . $(CMAKE_GENERATOR_COMMAND) $(CMAKE_COMMON) $(LIBTRACYCLIENT_CMAKE) \
+		|| { echo '*** To install a newer version of cmake, run contrib/download_cmake.sh ***' && false; }
+	echo 1 > $@
+
+$(LIBTRACYCLIENT_BUILDDIR)/build-compiled: $(LIBTRACYCLIENT_BUILDDIR)/build-configured
+	cd $(LIBTRACYCLIENT_BUILDDIR) && \
+		$(if $(filter $(CMAKE_GENERATOR),make), \
+		  $(MAKE), \
+		  $(CMAKE) --build .)
+	echo 1 > $@
+
+$(eval $(call staged-install, \
+	libtracyclient,$$(LIBTRACYCLIENT_SRC_DIR), \
+	MAKE_INSTALL,,, \
+	$$(INSTALL_NAME_CMD)libtracyclient.$$(SHLIB_EXT) $$(build_shlibdir)/libtracyclient.$$(SHLIB_EXT)))
+
+clean-libtracyclient:
+	rm -rf $(LIBTRACYCLIENT_BUILDDIR)/build-configured $(LIBTRACYCLIENT_BUILDDIR)/build-compiled
+	-$(MAKE) -C $(LIBTRACYCLIENT_BUILDDIR) clean
+
+get-libtracyclient: $(LIBTRACYCLIENT_SRC_FILE)
+extract-libtracyclient: $(LIBTRACYCLIENT_BUILDDIR)/source-extracted
+configure-libtracyclient: $(LIBTRACYCLIENT_BUILDDIR)/build-configured
+compile-libtracyclient: $(LIBTRACYCLIENT_BUILDDIR)/build-compiled
+fastcheck-libtracyclient: check-libtracyclient
+check-libtracyclient: compile-libtracyclient
+
+else # USE_BINARYBUILDER_LIBTRACYCLIENT
+
+$(eval $(call bb-install,libtracyclient,LIBTRACYCLIENT,false))
+
+endif
diff --git a/deps/libtracyclient.version b/deps/libtracyclient.version
new file mode 100644
index 0000000000000..60b5a3e8ce630
--- /dev/null
+++ b/deps/libtracyclient.version
@@ -0,0 +1,8 @@
+## jll artifact
+LIBTRACYCLIENT_JLL_NAME := LibTracyClient
+LIBTRACYCLIENT_JLL_VER := 0.9.1+5
+
+## source build
+LIBTRACYCLIENT_VER := 0.9.1
+LIBTRACYCLIENT_BRANCH=v0.9.1
+LIBTRACYCLIENT_SHA1=897aec5b062664d2485f4f9a213715d2e527e0ca
diff --git a/deps/libuv.mk b/deps/libuv.mk
index cdcd12d8db4fa..eacabac55e34f 100644
--- a/deps/libuv.mk
+++ b/deps/libuv.mk
@@ -18,6 +18,21 @@ LIBUV_BUILDDIR := $(BUILDDIR)/$(LIBUV_SRC_DIR)
 ifneq ($(CLDFLAGS)$(SANITIZE_LDFLAGS),)
 $(LIBUV_BUILDDIR)/build-configured: LDFLAGS:=$(LDFLAGS) $(CLDFLAGS) $(SANITIZE_LDFLAGS)
 endif
+
+ifeq ($(OS), emscripten)
+$(LIBUV_BUILDDIR)/build-configured: $(SRCCACHE)/$(LIBUV_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	cd $(dir $@) && cmake -E env \
+		CMAKE_C_FLAGS="-pthread" \
+		CMAKE_SHARED_LINKER_FLAGS="-sTOTAL_MEMORY=65536000 -pthread" \
+		CMAKE_EXE_LINKER_FLAGS="-sTOTAL_MEMORY=65536000 -pthread" \
+		emcmake cmake $(dir $<) $(CMAKE_COMMON) -DBUILD_TESTING=OFF
+	echo 1 > $@
+
+$(LIBUV_BUILDDIR)/build-compiled: $(LIBUV_BUILDDIR)/build-configured
+	emmake $(MAKE) -C $(dir $<) $(UV_MFLAGS)
+	echo 1 > $@
+else
 $(LIBUV_BUILDDIR)/build-configured: $(SRCCACHE)/$(LIBUV_SRC_DIR)/source-extracted
 	touch -c $(SRCCACHE)/$(LIBUV_SRC_DIR)/aclocal.m4 # touch a few files to prevent autogen from getting called
 	touch -c $(SRCCACHE)/$(LIBUV_SRC_DIR)/Makefile.in
@@ -30,6 +45,7 @@ $(LIBUV_BUILDDIR)/build-configured: $(SRCCACHE)/$(LIBUV_SRC_DIR)/source-extracte
 $(LIBUV_BUILDDIR)/build-compiled: $(LIBUV_BUILDDIR)/build-configured
 	$(MAKE) -C $(dir $<) $(UV_MFLAGS)
 	echo 1 > $@
+endif
 
 $(LIBUV_BUILDDIR)/build-checked: $(LIBUV_BUILDDIR)/build-compiled
 ifeq ($(OS),$(BUILD_OS))
diff --git a/deps/libuv.version b/deps/libuv.version
index 0c6bdaaf78b58..01bf4fecc6dc6 100644
--- a/deps/libuv.version
+++ b/deps/libuv.version
@@ -1,2 +1,7 @@
-LIBUV_BRANCH=julia-uv2-1.44.1
-LIBUV_SHA1=1b2d16477fe1142adea952168d828a066e03ee4c
+## jll artifact
+LIBUV_JLL_NAME := LibUV
+
+## source build
+LIBUV_VER := 2
+LIBUV_BRANCH=julia-uv2-1.44.2
+LIBUV_SHA1=2723e256e952be0b015b3c0086f717c3d365d97e
diff --git a/deps/lld.version b/deps/lld.version
new file mode 100644
index 0000000000000..56303e3dc0292
--- /dev/null
+++ b/deps/lld.version
@@ -0,0 +1,3 @@
+## jll artifact
+LLD_JLL_NAME := LLD
+LLD_JLL_VER := 15.0.7+10
diff --git a/deps/llvm-tools.version b/deps/llvm-tools.version
new file mode 100644
index 0000000000000..e7fd7ae57622a
--- /dev/null
+++ b/deps/llvm-tools.version
@@ -0,0 +1,5 @@
+## jll artifact
+# LLVM_tools (downloads LLVM_jll to get things like `lit` and `opt`)
+LLVM_TOOLS_JLL_NAME := LLVM
+LLVM_TOOLS_JLL_VER := 15.0.7+10
+LLVM_TOOLS_ASSERT_JLL_VER := 15.0.7+10
diff --git a/deps/llvm-ver.make b/deps/llvm-ver.make
index c2c7f2bc56da7..3777d5b37915a 100644
--- a/deps/llvm-ver.make
+++ b/deps/llvm-ver.make
@@ -1,3 +1,5 @@
+include $(JULIAHOME)/deps/llvm.version
+
 LLVM_VER_MAJ:=$(word 1, $(subst ., ,$(LLVM_VER)))
 LLVM_VER_MIN:=$(word 2, $(subst ., ,$(LLVM_VER)))
 # define a "short" LLVM version for easy comparisons
@@ -10,3 +12,9 @@ LLVM_VER_PATCH:=$(word 3, $(subst ., ,$(LLVM_VER)))
 ifeq ($(LLVM_VER_PATCH),)
 LLVM_VER_PATCH := 0
 endif
+
+LLVM_SHARED_LIB_VER_SUFFIX := $(LLVM_VER_MAJ)jl
+# e.g.: "libLLVM-14jl"
+LLVM_SHARED_LIB_NAME := libLLVM-$(LLVM_SHARED_LIB_VER_SUFFIX)
+LLVM_SHARED_LINK_FLAG := -lLLVM-$(LLVM_SHARED_LIB_VER_SUFFIX)
+LLVM_SHLIB_SYMBOL_VERSION := JL_LLVM_$(LLVM_VER_MAJ).$(LLVM_VER_MIN)
diff --git a/deps/llvm.mk b/deps/llvm.mk
index 90605deefd115..a06db1fb0781b 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -1,4 +1,5 @@
 ## LLVM ##
+include $(SRCDIR)/llvm.version
 include $(SRCDIR)/llvm-ver.make
 include $(SRCDIR)/llvm-options.mk
 
@@ -55,12 +56,15 @@ endif
 ifeq ($(BUILD_LIBCXX), 1)
 LLVM_ENABLE_RUNTIMES := $(LLVM_ENABLE_RUNTIMES);libcxx;libcxxabi
 endif
+ifeq ($(BUILD_LLD), 1)
+LLVM_ENABLE_PROJECTS := $(LLVM_ENABLE_PROJECTS);lld
+endif
 
 
 LLVM_LIB_FILE := libLLVMCodeGen.a
 
 # Figure out which targets to build
-LLVM_TARGETS := host;NVPTX;AMDGPU;WebAssembly;BPF
+LLVM_TARGETS := host;NVPTX;AMDGPU;WebAssembly;BPF;AVR
 LLVM_EXPERIMENTAL_TARGETS :=
 
 LLVM_CFLAGS :=
@@ -97,7 +101,7 @@ endif
 LLVM_CMAKE += -DLLVM_TOOLS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir))
 LLVM_CMAKE += -DLLVM_UTILS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir))
 LLVM_CMAKE += -DLLVM_INCLUDE_UTILS=ON -DLLVM_INSTALL_UTILS=ON
-LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_HISTEDIT_H=Off -DHAVE_LIBEDIT=Off
+LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_HISTEDIT_H=Off -DHAVE_LIBEDIT=Off
 ifeq ($(LLVM_ASSERTIONS), 1)
 LLVM_CMAKE += -DLLVM_ENABLE_ASSERTIONS:BOOL=ON
 endif # LLVM_ASSERTIONS
@@ -116,7 +120,7 @@ ifeq ($(USE_LLVM_SHLIB),1)
 LLVM_CMAKE += -DLLVM_BUILD_LLVM_DYLIB:BOOL=ON -DLLVM_LINK_LLVM_DYLIB:BOOL=ON
 endif
 ifeq ($(USE_INTEL_JITEVENTS), 1)
-LLVM_CMAKE += -DLLVM_USE_INTEL_JITEVENTS:BOOL=ON
+LLVM_CMAKE += -DLLVM_USE_INTEL_JITEVENTS:BOOL=ON -DITTAPI_SOURCE_DIR=$(SRCCACHE)/$(ITTAPI_SRC_DIR)
 endif # USE_INTEL_JITEVENTS
 
 ifeq ($(USE_OPROFILE_JITEVENTS), 1)
@@ -146,7 +150,7 @@ endif
 ifeq ($(LLVM_SANITIZE),1)
 ifeq ($(SANITIZE_MEMORY),1)
 LLVM_CFLAGS += -fsanitize=memory -fsanitize-memory-track-origins
-LLVM_LDFLAGS += -fsanitize=memory -fsanitize-memory-track-origins
+LLVM_LDFLAGS += -fsanitize=memory -fsanitize-memory-track-origins -rpath $(build_shlibdir)
 LLVM_CXXFLAGS += -fsanitize=memory -fsanitize-memory-track-origins
 LLVM_CMAKE += -DLLVM_USE_SANITIZER="MemoryWithOrigins"
 endif
@@ -201,7 +205,7 @@ LLVM_CMAKE += -DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS)" \
 	-DCMAKE_SHARED_LINKER_FLAGS="$(LLVM_LDFLAGS)"
 
 # change the SONAME of Julia's private LLVM
-# i.e. libLLVM-6.0jl.so
+# i.e. libLLVM-14jl.so
 # see #32462
 LLVM_CMAKE += -DLLVM_VERSION_SUFFIX:STRING="jl"
 LLVM_CMAKE += -DLLVM_SHLIB_SYMBOL_VERSION:STRING="JL_LLVM_$(LLVM_VER_SHORT)"
@@ -210,7 +214,7 @@ LLVM_CMAKE += -DLLVM_SHLIB_SYMBOL_VERSION:STRING="JL_LLVM_$(LLVM_VER_SHORT)"
 LLVM_PATCH_PREV :=
 define LLVM_PATCH
 $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
-	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm && patch -p1 < $$(SRCDIR)/patches/$1.patch
+	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm && patch -p1 -f < $$(SRCDIR)/patches/$1.patch
 	echo 1 > $$@
 # declare that applying any patch must re-run the compile step
 $$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
@@ -219,13 +223,15 @@ endef
 
 define LLVM_PROJ_PATCH
 $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
-	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR) && patch -p1 < $$(SRCDIR)/patches/$1.patch
+	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR) && patch -p1 -f < $$(SRCDIR)/patches/$1.patch
 	echo 1 > $$@
 # declare that applying any patch must re-run the compile step
 $$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
 LLVM_PATCH_PREV := $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
 endef
 
+$(eval $(call LLVM_PATCH,llvm-ittapi-cmake))
+
 ifeq ($(USE_SYSTEM_ZLIB), 0)
 $(LLVM_BUILDDIR_withtype)/build-configured: | $(build_prefix)/manifest/zlib
 endif
@@ -235,6 +241,21 @@ endif
 # declare that all patches must be applied before running ./configure
 $(LLVM_BUILDDIR_withtype)/build-configured: | $(LLVM_PATCH_PREV)
 
+# Apply Julia's specific patches if requested, e.g. if not using Julia's fork of LLVM.
+ifeq ($(LLVM_APPLY_JULIA_PATCHES), 1)
+# Download Julia's patchset.
+$(BUILDDIR)/julia-patches.patch:
+	$(JLDOWNLOAD) $@ $(LLVM_JULIA_DIFF_GITHUB_REPO)/compare/$(LLVM_BASE_REF)...$(LLVM_JULIA_REF).diff
+
+# Apply the patch.
+$(SRCCACHE)/$(LLVM_SRC_DIR)/julia-patches.patch-applied: $(BUILDDIR)/julia-patches.patch $(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted
+	cd $(SRCCACHE)/$(LLVM_SRC_DIR) && patch -p1 -f < $(realpath $<)
+	echo 1 > $@
+
+# Require application of Julia's patchset before configuring LLVM.
+$(LLVM_BUILDDIR_withtype)/build-configured: | $(SRCCACHE)/$(LLVM_SRC_DIR)/julia-patches.patch-applied
+endif
+
 $(LLVM_BUILDDIR_withtype)/build-configured: $(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
@@ -258,10 +279,10 @@ endif
 
 LLVM_INSTALL = \
 	cd $1 && mkdir -p $2$$(build_depsbindir) && \
-    cp -r $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm/utils/lit $2$$(build_depsbindir)/ && \
-    $$(CMAKE) -DCMAKE_INSTALL_PREFIX="$2$$(build_prefix)" -P cmake_install.cmake
+	cp -r $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm/utils/lit $2$$(build_depsbindir)/ && \
+	$$(CMAKE) -DCMAKE_INSTALL_PREFIX="$2$$(build_prefix)" -P cmake_install.cmake
 ifeq ($(OS), WINNT)
-LLVM_INSTALL += && cp $2$$(build_shlibdir)/libLLVM.dll $2$$(build_depsbindir)
+LLVM_INSTALL += && cp $2$$(build_shlibdir)/$(LLVM_SHARED_LIB_NAME).dll $2$$(build_depsbindir)
 endif
 ifeq ($(OS),Darwin)
 # https://github.com/JuliaLang/julia/issues/29981
@@ -282,6 +303,11 @@ configure-llvm: $(LLVM_BUILDDIR_withtype)/build-configured
 compile-llvm: $(LLVM_BUILDDIR_withtype)/build-compiled
 fastcheck-llvm: #none
 check-llvm: $(LLVM_BUILDDIR_withtype)/build-checked
+
+ifeq ($(USE_INTEL_JITEVENTS),1)
+$(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted: $(SRCCACHE)/$(ITTAPI_SRC_DIR)/source-extracted
+endif
+
 #todo: LLVM make check target is broken on julia.mit.edu (and really slow elsewhere)
 
 else # USE_BINARYBUILDER_LLVM
@@ -304,10 +330,11 @@ LLVM_TOOLS_JLL_TAGS := -llvm_version+$(LLVM_VER_MAJ)
 endif
 
 $(eval $(call bb-install,llvm,LLVM,false,true))
-$(eval $(call bb-install,clang,CLANG,false,true))
 $(eval $(call bb-install,lld,LLD,false,true))
+$(eval $(call bb-install,clang,CLANG,false,true))
 $(eval $(call bb-install,llvm-tools,LLVM_TOOLS,false,true))
 
-install-lld install-clang install-llvm-tools: install-llvm
-
 endif # USE_BINARYBUILDER_LLVM
+
+get-lld: get-llvm
+install-lld install-clang install-llvm-tools: install-llvm
diff --git a/deps/llvm.version b/deps/llvm.version
index 232d44a614c15..918889e016f2c 100644
--- a/deps/llvm.version
+++ b/deps/llvm.version
@@ -1,2 +1,23 @@
-LLVM_BRANCH=julia-14.0.5-0
-LLVM_SHA1=julia-14.0.5-0
+# -*- makefile -*-
+
+## jll artifact
+LLVM_JLL_NAME := libLLVM
+LLVM_ASSERT_JLL_VER := 15.0.7+10
+## source build
+# Version number of LLVM
+LLVM_VER := 15.0.7
+# Git branch name in `LLVM_GIT_URL` repository
+LLVM_BRANCH=julia-15.0.7-10
+# Git ref in `LLVM_GIT_URL` repository
+LLVM_SHA1=julia-15.0.7-10
+
+## Following options are used to automatically fetch patchset from Julia's fork.  This is
+## useful if you want to build an external LLVM while still applying Julia's patches.
+# Set to 1 if you want to automatically apply Julia's patches to a different fork of LLVM.
+LLVM_APPLY_JULIA_PATCHES := 0
+# GitHub repository to use for fetching the Julia patches to apply to LLVM source code.
+LLVM_JULIA_DIFF_GITHUB_REPO := https://github.com/llvm/llvm-project
+# Base GitHub ref for generating the diff.
+LLVM_BASE_REF := llvm:llvmorg-15.0.7
+# Julia fork's GitHub ref for generating the diff.
+LLVM_JULIA_REF := JuliaLang:julia-15.0.7-9
diff --git a/deps/llvmunwind.version b/deps/llvmunwind.version
new file mode 100644
index 0000000000000..7d13af9a158f7
--- /dev/null
+++ b/deps/llvmunwind.version
@@ -0,0 +1,5 @@
+## jll artifact
+LLVMUNWIND_JLL_NAME := LLVMLibUnwind
+
+## source build
+LLVMUNWIND_VER := 12.0.1
diff --git a/deps/mbedtls.mk b/deps/mbedtls.mk
index 12788e1c03a1c..b4147c2c2684e 100644
--- a/deps/mbedtls.mk
+++ b/deps/mbedtls.mk
@@ -1,8 +1,9 @@
 ## mbedtls
+include $(SRCDIR)/mbedtls.version
 
 ifneq ($(USE_BINARYBUILDER_MBEDTLS), 1)
 MBEDTLS_SRC = mbedtls-$(MBEDTLS_VER)
-MBEDTLS_URL = https://github.com/ARMmbed/mbedtls/archive/v$(MBEDTLS_VER).tar.gz
+MBEDTLS_URL = https://github.com/Mbed-TLS/mbedtls/archive/v$(MBEDTLS_VER).tar.gz
 
 MBEDTLS_OPTS := $(CMAKE_COMMON) -DUSE_SHARED_MBEDTLS_LIBRARY=ON \
     -DUSE_STATIC_MBEDTLS_LIBRARY=OFF -DENABLE_PROGRAMS=OFF -DCMAKE_BUILD_TYPE=Release
diff --git a/deps/mbedtls.version b/deps/mbedtls.version
new file mode 100644
index 0000000000000..f262476af1684
--- /dev/null
+++ b/deps/mbedtls.version
@@ -0,0 +1,5 @@
+## jll artifact
+MBEDTLS_JLL_NAME := MbedTLS
+
+## source build
+MBEDTLS_VER := 2.28.2
diff --git a/deps/mpfr.mk b/deps/mpfr.mk
index 4598a319df6d5..5a0605ba6b601 100644
--- a/deps/mpfr.mk
+++ b/deps/mpfr.mk
@@ -1,4 +1,5 @@
 ## MPFR ##
+include $(SRCDIR)/mpfr.version
 
 ifeq ($(USE_SYSTEM_GMP), 0)
 $(BUILDDIR)/mpfr-$(MPFR_VER)/build-configured: | $(build_prefix)/manifest/gmp
@@ -6,32 +7,29 @@ endif
 
 ifneq ($(USE_BINARYBUILDER_MPFR),1)
 
-MPFR_OPTS := --enable-thread-safe --enable-shared-cache --disable-float128 --disable-decimal-float
-ifeq ($(USE_SYSTEM_GMP), 0)
-MPFR_OPTS += --with-gmp-include=$(abspath $(build_includedir)) --with-gmp-lib=$(abspath $(build_shlibdir))
-endif
-ifeq ($(BUILD_OS),WINNT)
-ifeq ($(OS),WINNT)
-MPFR_OPTS += CFLAGS="$(CFLAGS) -DNPRINTF_L -DNPRINTF_T -DNPRINTF_J"
-endif
-endif
-
+MPFR_CONFIGURE_OPTS := $(CONFIGURE_COMMON)
+MPFR_CONFIGURE_OPTS += --enable-thread-safe --enable-shared-cache --disable-float128 --disable-decimal-float
+MPFR_CONFIGURE_OPTS += --enable-shared --disable-static
 
-ifeq ($(OS),Darwin)
-MPFR_CHECK_MFLAGS := LDFLAGS="$(LDFLAGS) -Wl,-rpath,'$(build_libdir)'"
+ifeq ($(USE_SYSTEM_GMP), 0)
+MPFR_CONFIGURE_OPTS += --with-gmp=$(abspath $(build_prefix))
 endif
 
 ifeq ($(SANITIZE),1)
 # Force generic C build
-MPFR_OPTS += --host=none-unknown-linux
+MPFR_CONFIGURE_OPTS += --host=none-unknown-linux
+endif
+
+ifeq ($(OS),emscripten)
+MPFR_CONFIGURE_OPTS += CFLAGS="-fPIC"
 endif
 
 $(SRCCACHE)/mpfr-$(MPFR_VER).tar.bz2: | $(SRCCACHE)
 	$(JLDOWNLOAD) $@ https://www.mpfr.org/mpfr-$(MPFR_VER)/$(notdir $@)
+
 $(SRCCACHE)/mpfr-$(MPFR_VER)/source-extracted: $(SRCCACHE)/mpfr-$(MPFR_VER).tar.bz2
 	$(JLCHECKSUM) $<
 	cd $(dir $<) && $(TAR) -jxf $<
-	cp $(SRCDIR)/patches/config.sub $(SRCCACHE)/mpfr-$(MPFR_VER)/config.sub
 	touch -c $(SRCCACHE)/mpfr-$(MPFR_VER)/configure # old target
 	echo 1 > $@
 
@@ -41,23 +39,24 @@ checksum-mpfr: $(SRCCACHE)/mpfr-$(MPFR_VER).tar.bz2
 $(BUILDDIR)/mpfr-$(MPFR_VER)/build-configured: $(SRCCACHE)/mpfr-$(MPFR_VER)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) $(MPFR_OPTS) F77= --enable-shared --disable-static
+	$(dir $<)/configure $(MPFR_CONFIGURE_OPTS)
 	echo 1 > $@
 
 $(BUILDDIR)/mpfr-$(MPFR_VER)/build-compiled: $(BUILDDIR)/mpfr-$(MPFR_VER)/build-configured
-	$(MAKE) -C $(dir $<) $(LIBTOOL_CCLD)
+	$(MAKE) -C $(dir $<)
 	echo 1 > $@
 
 $(BUILDDIR)/mpfr-$(MPFR_VER)/build-checked: $(BUILDDIR)/mpfr-$(MPFR_VER)/build-compiled
 ifeq ($(OS),$(BUILD_OS))
-	$(MAKE) -C $(dir $@) $(LIBTOOL_CCLD) check $(MPFR_CHECK_MFLAGS)
+	$(MAKE) -C $(dir $@) check
 endif
 	echo 1 > $@
 
 $(eval $(call staged-install, \
 	mpfr,mpfr-$(MPFR_VER), \
-	MAKE_INSTALL,$$(LIBTOOL_CCLD),, \
-	$$(INSTALL_NAME_CMD)libmpfr.$$(SHLIB_EXT) $$(build_shlibdir)/libmpfr.$$(SHLIB_EXT)))
+	MAKE_INSTALL,,, \
+	$$(WIN_MAKE_HARD_LINK) $(build_bindir)/libmpfr-*.dll $(build_bindir)/libmpfr.dll && \
+		$$(INSTALL_NAME_CMD)libmpfr.$$(SHLIB_EXT) $$(build_shlibdir)/libmpfr.$$(SHLIB_EXT)))
 
 clean-mpfr:
 	-rm -f $(BUILDDIR)/mpfr-$(MPFR_VER)/build-configured $(BUILDDIR)/mpfr-$(MPFR_VER)/build-compiled
diff --git a/deps/mpfr.version b/deps/mpfr.version
new file mode 100644
index 0000000000000..e4f1c8a45aeb0
--- /dev/null
+++ b/deps/mpfr.version
@@ -0,0 +1,5 @@
+## jll artifact
+MPFR_JLL_NAME := MPFR
+
+## source build
+MPFR_VER := 4.2.0
diff --git a/deps/nghttp2.mk b/deps/nghttp2.mk
index 54fd6a241eaba..5c12a0155c017 100644
--- a/deps/nghttp2.mk
+++ b/deps/nghttp2.mk
@@ -1,4 +1,5 @@
 ## nghttp2
+include $(SRCDIR)/nghttp2.version
 
 ifneq ($(USE_BINARYBUILDER_NGHTTP2), 1)
 
diff --git a/deps/nghttp2.version b/deps/nghttp2.version
new file mode 100644
index 0000000000000..680be055e4443
--- /dev/null
+++ b/deps/nghttp2.version
@@ -0,0 +1,6 @@
+# -*- makefile -*-
+## jll artifact
+NGHTTP2_JLL_NAME := nghttp2
+
+## source build
+NGHTTP2_VER := 1.58.0
diff --git a/deps/objconv.mk b/deps/objconv.mk
index 8423e476d37c6..70c7289b07bfa 100644
--- a/deps/objconv.mk
+++ b/deps/objconv.mk
@@ -1,4 +1,5 @@
 ## objconv ##
+include $(SRCDIR)/objconv.version
 
 ifneq ($(USE_BINARYBUILDER_OBJCONV),1)
 
diff --git a/deps/objconv.version b/deps/objconv.version
new file mode 100644
index 0000000000000..185354e23b9e1
--- /dev/null
+++ b/deps/objconv.version
@@ -0,0 +1,7 @@
+## jll artifact
+# Objconv (we don't ship this, so no need for a fake JLL; therefore we specify the JLL_VER here instead of in a `stdlib/Objconv_jll/Project.toml` file)
+OBJCONV_JLL_NAME := Objconv
+OBJCONV_JLL_VER  := 2.53.0+0
+
+## source build
+OBJCONV_VER := 2.53.0
diff --git a/deps/openblas.mk b/deps/openblas.mk
index 770ca978deaa7..d890a5be6046a 100644
--- a/deps/openblas.mk
+++ b/deps/openblas.mk
@@ -5,12 +5,12 @@ OPENBLAS_GIT_URL := https://github.com/xianyi/OpenBLAS.git
 OPENBLAS_TAR_URL = https://api.github.com/repos/xianyi/OpenBLAS/tarball/$1
 $(eval $(call git-external,openblas,OPENBLAS,,,$(BUILDDIR)))
 
-OPENBLAS_BUILD_OPTS := CC="$(CC)" FC="$(FC)" LD="$(LD)" RANLIB="$(RANLIB)" TARGET=$(OPENBLAS_TARGET_ARCH) BINARY=$(BINARY)
+OPENBLAS_BUILD_OPTS := CC="$(CC) $(SANITIZE_OPTS)" FC="$(FC) $(SANITIZE_OPTS)" LD="$(LD) $(SANITIZE_LDFLAGS)" RANLIB="$(RANLIB)" BINARY=$(BINARY)
 
 # Thread support
 ifeq ($(OPENBLAS_USE_THREAD), 1)
 OPENBLAS_BUILD_OPTS += USE_THREAD=1
-OPENBLAS_BUILD_OPTS += GEMM_MULTITHREADING_THRESHOLD=50
+OPENBLAS_BUILD_OPTS += GEMM_MULTITHREADING_THRESHOLD=400
 # Maximum number of threads for parallelism
 OPENBLAS_BUILD_OPTS += NUM_THREADS=512
 else
@@ -21,9 +21,14 @@ endif
 OPENBLAS_BUILD_OPTS += NO_AFFINITY=1
 
 # Build for all architectures - required for distribution
+ifeq ($(SANITIZE_MEMORY),1)
+OPENBLAS_BUILD_OPTS += TARGET=GENERIC
+else
+OPENBLAS_BUILD_OPTS += TARGET=$(OPENBLAS_TARGET_ARCH)
 ifeq ($(OPENBLAS_DYNAMIC_ARCH), 1)
 OPENBLAS_BUILD_OPTS += DYNAMIC_ARCH=1
 endif
+endif
 
 # 64-bit BLAS interface
 ifeq ($(USE_BLAS64), 1)
diff --git a/deps/openblas.version b/deps/openblas.version
index ceb01600b0ea7..7b16df95c9c7f 100644
--- a/deps/openblas.version
+++ b/deps/openblas.version
@@ -1,2 +1,11 @@
-OPENBLAS_BRANCH=v0.3.20
-OPENBLAS_SHA1=0b678b19dc03f2a999d6e038814c4c50b9640a4e
+# -*- makefile -*-
+## jll artifact
+OPENBLAS_JLL_NAME := OpenBLAS
+
+## source build
+OPENBLAS_VER := 0.3.25
+OPENBLAS_BRANCH=v0.3.25
+OPENBLAS_SHA1=5e1a429eab44731b6668b8f6043c1ea951b0a80b
+
+# LAPACK, source-only
+LAPACK_VER := 3.9.0
diff --git a/deps/openlibm.version b/deps/openlibm.version
index 9edba0c1f257b..f35b291260380 100644
--- a/deps/openlibm.version
+++ b/deps/openlibm.version
@@ -1,2 +1,7 @@
+## jll artifact
+OPENLIBM_JLL_NAME := OpenLibm
+
+## source build
+OPENLIBM_VER := 0.8.1
 OPENLIBM_BRANCH=v0.8.1
 OPENLIBM_SHA1=ae2d91698508701c83cab83714d42a1146dccf85
diff --git a/deps/p7zip.mk b/deps/p7zip.mk
index d1e9e653e123b..c7c2874d49a5e 100644
--- a/deps/p7zip.mk
+++ b/deps/p7zip.mk
@@ -1,9 +1,10 @@
 ## p7zip ##
+include $(SRCDIR)/p7zip.version
 
 ifneq ($(USE_BINARYBUILDER_P7ZIP),1)
 
 $(SRCCACHE)/p7zip-$(P7ZIP_VER).tar.gz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://github.com/jinfeihan57/p7zip/archive/refs/tags/v$(P7ZIP_VER).tar.gz
+	$(JLDOWNLOAD) $@ https://github.com/p7zip-project/p7zip/archive/refs/tags/v$(P7ZIP_VER).tar.gz
 
 $(BUILDDIR)/p7zip-$(P7ZIP_VER)/source-extracted: $(SRCCACHE)/p7zip-$(P7ZIP_VER).tar.gz
 	$(JLCHECKSUM) $<
diff --git a/deps/p7zip.version b/deps/p7zip.version
new file mode 100644
index 0000000000000..d4a13155d9162
--- /dev/null
+++ b/deps/p7zip.version
@@ -0,0 +1,5 @@
+## jll artifact
+P7ZIP_JLL_NAME := p7zip
+
+## source build
+P7ZIP_VER := 17.04
diff --git a/deps/patchelf.mk b/deps/patchelf.mk
index 4d1a281ed2331..9b4947f183117 100644
--- a/deps/patchelf.mk
+++ b/deps/patchelf.mk
@@ -1,4 +1,5 @@
 ## patchelf ##
+include $(SRCDIR)/patchelf.version
 
 $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2: | $(SRCCACHE)
 	$(JLDOWNLOAD) $@ https://github.com/NixOS/patchelf/releases/download/$(PATCHELF_VER)/patchelf-$(PATCHELF_VER).tar.bz2
diff --git a/deps/patchelf.version b/deps/patchelf.version
new file mode 100644
index 0000000000000..bbeaa87d25136
--- /dev/null
+++ b/deps/patchelf.version
@@ -0,0 +1,3 @@
+## source build
+# Patchelf (we don't ship this or even use a JLL, we just always build it)
+PATCHELF_VER := 0.13
diff --git a/deps/patches/config.sub b/deps/patches/config.sub
deleted file mode 100755
index 3d9a8dc3d5a76..0000000000000
--- a/deps/patches/config.sub
+++ /dev/null
@@ -1,1851 +0,0 @@
-#! /bin/sh
-# Configuration validation subroutine script.
-#   Copyright 1992-2020 Free Software Foundation, Inc.
-
-timestamp='2020-07-10'
-
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <https://www.gnu.org/licenses/>.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that
-# program.  This Exception is an additional permission under section 7
-# of the GNU General Public License, version 3 ("GPLv3").
-
-
-# Please send patches to <config-patches@gnu.org>.
-#
-# Configuration subroutine to validate and canonicalize a configuration type.
-# Supply the specified configuration type as an argument.
-# If it is invalid, we print an error message on stderr and exit with code 1.
-# Otherwise, we print the canonical config type on stdout and succeed.
-
-# You can get the latest version of this script from:
-# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
-
-# This file is supposed to be the same for all GNU packages
-# and recognize all the CPU types, system types and aliases
-# that are meaningful with *any* GNU software.
-# Each package is responsible for reporting which valid configurations
-# it does not support.  The user should be able to distinguish
-# a failure to support a valid configuration from a meaningless
-# configuration.
-
-# The goal of this file is to map all the various variations of a given
-# machine specification into a single specification in the form:
-#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
-# or in some cases, the newer four-part form:
-#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
-# It is wrong to echo any other type of specification.
-
-me=`echo "$0" | sed -e 's,.*/,,'`
-
-usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
-
-Canonicalize a configuration name.
-
-Options:
-  -h, --help         print this help, then exit
-  -t, --time-stamp   print date of last modification, then exit
-  -v, --version      print version number, then exit
-
-Report bugs and patches to <config-patches@gnu.org>."
-
-version="\
-GNU config.sub ($timestamp)
-
-Copyright 1992-2020 Free Software Foundation, Inc.
-
-This is free software; see the source for copying conditions.  There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
-  case $1 in
-    --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit ;;
-    --version | -v )
-       echo "$version" ; exit ;;
-    --help | --h* | -h )
-       echo "$usage"; exit ;;
-    -- )     # Stop option processing
-       shift; break ;;
-    - )	# Use stdin as input.
-       break ;;
-    -* )
-       echo "$me: invalid option $1$help" >&2
-       exit 1 ;;
-
-    *local*)
-       # First pass through any local machine types.
-       echo "$1"
-       exit ;;
-
-    * )
-       break ;;
-  esac
-done
-
-case $# in
- 0) echo "$me: missing argument$help" >&2
-    exit 1;;
- 1) ;;
- *) echo "$me: too many arguments$help" >&2
-    exit 1;;
-esac
-
-# Split fields of configuration type
-# shellcheck disable=SC2162
-IFS="-" read field1 field2 field3 field4 <<EOF
-$1
-EOF
-
-# Separate into logical components for further validation
-case $1 in
-	*-*-*-*-*)
-		echo Invalid configuration \`"$1"\': more than four components >&2
-		exit 1
-		;;
-	*-*-*-*)
-		basic_machine=$field1-$field2
-		basic_os=$field3-$field4
-		;;
-	*-*-*)
-		# Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two
-		# parts
-		maybe_os=$field2-$field3
-		case $maybe_os in
-			nto-qnx* | linux-* | uclinux-uclibc* \
-			| uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
-			| netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
-			| storm-chaos* | os2-emx* | rtmk-nova*)
-				basic_machine=$field1
-				basic_os=$maybe_os
-				;;
-			android-linux)
-				basic_machine=$field1-unknown
-				basic_os=linux-android
-				;;
-			*)
-				basic_machine=$field1-$field2
-				basic_os=$field3
-				;;
-		esac
-		;;
-	*-*)
-		# A lone config we happen to match not fitting any pattern
-		case $field1-$field2 in
-			decstation-3100)
-				basic_machine=mips-dec
-				basic_os=
-				;;
-			*-*)
-				# Second component is usually, but not always the OS
-				case $field2 in
-					# Prevent following clause from handling this valid os
-					sun*os*)
-						basic_machine=$field1
-						basic_os=$field2
-						;;
-					# Manufacturers
-					dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \
-					| att* | 7300* | 3300* | delta* | motorola* | sun[234]* \
-					| unicom* | ibm* | next | hp | isi* | apollo | altos* \
-					| convergent* | ncr* | news | 32* | 3600* | 3100* \
-					| hitachi* | c[123]* | convex* | sun | crds | omron* | dg \
-					| ultra | tti* | harris | dolphin | highlevel | gould \
-					| cbm | ns | masscomp | apple | axis | knuth | cray \
-					| microblaze* | sim | cisco \
-					| oki | wec | wrs | winbond)
-						basic_machine=$field1-$field2
-						basic_os=
-						;;
-					*)
-						basic_machine=$field1
-						basic_os=$field2
-						;;
-				esac
-			;;
-		esac
-		;;
-	*)
-		# Convert single-component short-hands not valid as part of
-		# multi-component configurations.
-		case $field1 in
-			386bsd)
-				basic_machine=i386-pc
-				basic_os=bsd
-				;;
-			a29khif)
-				basic_machine=a29k-amd
-				basic_os=udi
-				;;
-			adobe68k)
-				basic_machine=m68010-adobe
-				basic_os=scout
-				;;
-			alliant)
-				basic_machine=fx80-alliant
-				basic_os=
-				;;
-			altos | altos3068)
-				basic_machine=m68k-altos
-				basic_os=
-				;;
-			am29k)
-				basic_machine=a29k-none
-				basic_os=bsd
-				;;
-			amdahl)
-				basic_machine=580-amdahl
-				basic_os=sysv
-				;;
-			amiga)
-				basic_machine=m68k-unknown
-				basic_os=
-				;;
-			amigaos | amigados)
-				basic_machine=m68k-unknown
-				basic_os=amigaos
-				;;
-			amigaunix | amix)
-				basic_machine=m68k-unknown
-				basic_os=sysv4
-				;;
-			apollo68)
-				basic_machine=m68k-apollo
-				basic_os=sysv
-				;;
-			apollo68bsd)
-				basic_machine=m68k-apollo
-				basic_os=bsd
-				;;
-			aros)
-				basic_machine=i386-pc
-				basic_os=aros
-				;;
-			aux)
-				basic_machine=m68k-apple
-				basic_os=aux
-				;;
-			balance)
-				basic_machine=ns32k-sequent
-				basic_os=dynix
-				;;
-			blackfin)
-				basic_machine=bfin-unknown
-				basic_os=linux
-				;;
-			cegcc)
-				basic_machine=arm-unknown
-				basic_os=cegcc
-				;;
-			convex-c1)
-				basic_machine=c1-convex
-				basic_os=bsd
-				;;
-			convex-c2)
-				basic_machine=c2-convex
-				basic_os=bsd
-				;;
-			convex-c32)
-				basic_machine=c32-convex
-				basic_os=bsd
-				;;
-			convex-c34)
-				basic_machine=c34-convex
-				basic_os=bsd
-				;;
-			convex-c38)
-				basic_machine=c38-convex
-				basic_os=bsd
-				;;
-			cray)
-				basic_machine=j90-cray
-				basic_os=unicos
-				;;
-			crds | unos)
-				basic_machine=m68k-crds
-				basic_os=
-				;;
-			da30)
-				basic_machine=m68k-da30
-				basic_os=
-				;;
-			decstation | pmax | pmin | dec3100 | decstatn)
-				basic_machine=mips-dec
-				basic_os=
-				;;
-			delta88)
-				basic_machine=m88k-motorola
-				basic_os=sysv3
-				;;
-			dicos)
-				basic_machine=i686-pc
-				basic_os=dicos
-				;;
-			djgpp)
-				basic_machine=i586-pc
-				basic_os=msdosdjgpp
-				;;
-			ebmon29k)
-				basic_machine=a29k-amd
-				basic_os=ebmon
-				;;
-			es1800 | OSE68k | ose68k | ose | OSE)
-				basic_machine=m68k-ericsson
-				basic_os=ose
-				;;
-			gmicro)
-				basic_machine=tron-gmicro
-				basic_os=sysv
-				;;
-			go32)
-				basic_machine=i386-pc
-				basic_os=go32
-				;;
-			h8300hms)
-				basic_machine=h8300-hitachi
-				basic_os=hms
-				;;
-			h8300xray)
-				basic_machine=h8300-hitachi
-				basic_os=xray
-				;;
-			h8500hms)
-				basic_machine=h8500-hitachi
-				basic_os=hms
-				;;
-			harris)
-				basic_machine=m88k-harris
-				basic_os=sysv3
-				;;
-			hp300 | hp300hpux)
-				basic_machine=m68k-hp
-				basic_os=hpux
-				;;
-			hp300bsd)
-				basic_machine=m68k-hp
-				basic_os=bsd
-				;;
-			hppaosf)
-				basic_machine=hppa1.1-hp
-				basic_os=osf
-				;;
-			hppro)
-				basic_machine=hppa1.1-hp
-				basic_os=proelf
-				;;
-			i386mach)
-				basic_machine=i386-mach
-				basic_os=mach
-				;;
-			isi68 | isi)
-				basic_machine=m68k-isi
-				basic_os=sysv
-				;;
-			m68knommu)
-				basic_machine=m68k-unknown
-				basic_os=linux
-				;;
-			magnum | m3230)
-				basic_machine=mips-mips
-				basic_os=sysv
-				;;
-			merlin)
-				basic_machine=ns32k-utek
-				basic_os=sysv
-				;;
-			mingw64)
-				basic_machine=x86_64-pc
-				basic_os=mingw64
-				;;
-			mingw32)
-				basic_machine=i686-pc
-				basic_os=mingw32
-				;;
-			mingw32ce)
-				basic_machine=arm-unknown
-				basic_os=mingw32ce
-				;;
-			monitor)
-				basic_machine=m68k-rom68k
-				basic_os=coff
-				;;
-			morphos)
-				basic_machine=powerpc-unknown
-				basic_os=morphos
-				;;
-			moxiebox)
-				basic_machine=moxie-unknown
-				basic_os=moxiebox
-				;;
-			msdos)
-				basic_machine=i386-pc
-				basic_os=msdos
-				;;
-			msys)
-				basic_machine=i686-pc
-				basic_os=msys
-				;;
-			mvs)
-				basic_machine=i370-ibm
-				basic_os=mvs
-				;;
-			nacl)
-				basic_machine=le32-unknown
-				basic_os=nacl
-				;;
-			ncr3000)
-				basic_machine=i486-ncr
-				basic_os=sysv4
-				;;
-			netbsd386)
-				basic_machine=i386-pc
-				basic_os=netbsd
-				;;
-			netwinder)
-				basic_machine=armv4l-rebel
-				basic_os=linux
-				;;
-			news | news700 | news800 | news900)
-				basic_machine=m68k-sony
-				basic_os=newsos
-				;;
-			news1000)
-				basic_machine=m68030-sony
-				basic_os=newsos
-				;;
-			necv70)
-				basic_machine=v70-nec
-				basic_os=sysv
-				;;
-			nh3000)
-				basic_machine=m68k-harris
-				basic_os=cxux
-				;;
-			nh[45]000)
-				basic_machine=m88k-harris
-				basic_os=cxux
-				;;
-			nindy960)
-				basic_machine=i960-intel
-				basic_os=nindy
-				;;
-			mon960)
-				basic_machine=i960-intel
-				basic_os=mon960
-				;;
-			nonstopux)
-				basic_machine=mips-compaq
-				basic_os=nonstopux
-				;;
-			os400)
-				basic_machine=powerpc-ibm
-				basic_os=os400
-				;;
-			OSE68000 | ose68000)
-				basic_machine=m68000-ericsson
-				basic_os=ose
-				;;
-			os68k)
-				basic_machine=m68k-none
-				basic_os=os68k
-				;;
-			paragon)
-				basic_machine=i860-intel
-				basic_os=osf
-				;;
-			parisc)
-				basic_machine=hppa-unknown
-				basic_os=linux
-				;;
-			psp)
-				basic_machine=mipsallegrexel-sony
-				basic_os=psp
-				;;
-			pw32)
-				basic_machine=i586-unknown
-				basic_os=pw32
-				;;
-			rdos | rdos64)
-				basic_machine=x86_64-pc
-				basic_os=rdos
-				;;
-			rdos32)
-				basic_machine=i386-pc
-				basic_os=rdos
-				;;
-			rom68k)
-				basic_machine=m68k-rom68k
-				basic_os=coff
-				;;
-			sa29200)
-				basic_machine=a29k-amd
-				basic_os=udi
-				;;
-			sei)
-				basic_machine=mips-sei
-				basic_os=seiux
-				;;
-			sequent)
-				basic_machine=i386-sequent
-				basic_os=
-				;;
-			sps7)
-				basic_machine=m68k-bull
-				basic_os=sysv2
-				;;
-			st2000)
-				basic_machine=m68k-tandem
-				basic_os=
-				;;
-			stratus)
-				basic_machine=i860-stratus
-				basic_os=sysv4
-				;;
-			sun2)
-				basic_machine=m68000-sun
-				basic_os=
-				;;
-			sun2os3)
-				basic_machine=m68000-sun
-				basic_os=sunos3
-				;;
-			sun2os4)
-				basic_machine=m68000-sun
-				basic_os=sunos4
-				;;
-			sun3)
-				basic_machine=m68k-sun
-				basic_os=
-				;;
-			sun3os3)
-				basic_machine=m68k-sun
-				basic_os=sunos3
-				;;
-			sun3os4)
-				basic_machine=m68k-sun
-				basic_os=sunos4
-				;;
-			sun4)
-				basic_machine=sparc-sun
-				basic_os=
-				;;
-			sun4os3)
-				basic_machine=sparc-sun
-				basic_os=sunos3
-				;;
-			sun4os4)
-				basic_machine=sparc-sun
-				basic_os=sunos4
-				;;
-			sun4sol2)
-				basic_machine=sparc-sun
-				basic_os=solaris2
-				;;
-			sun386 | sun386i | roadrunner)
-				basic_machine=i386-sun
-				basic_os=
-				;;
-			sv1)
-				basic_machine=sv1-cray
-				basic_os=unicos
-				;;
-			symmetry)
-				basic_machine=i386-sequent
-				basic_os=dynix
-				;;
-			t3e)
-				basic_machine=alphaev5-cray
-				basic_os=unicos
-				;;
-			t90)
-				basic_machine=t90-cray
-				basic_os=unicos
-				;;
-			toad1)
-				basic_machine=pdp10-xkl
-				basic_os=tops20
-				;;
-			tpf)
-				basic_machine=s390x-ibm
-				basic_os=tpf
-				;;
-			udi29k)
-				basic_machine=a29k-amd
-				basic_os=udi
-				;;
-			ultra3)
-				basic_machine=a29k-nyu
-				basic_os=sym1
-				;;
-			v810 | necv810)
-				basic_machine=v810-nec
-				basic_os=none
-				;;
-			vaxv)
-				basic_machine=vax-dec
-				basic_os=sysv
-				;;
-			vms)
-				basic_machine=vax-dec
-				basic_os=vms
-				;;
-			vsta)
-				basic_machine=i386-pc
-				basic_os=vsta
-				;;
-			vxworks960)
-				basic_machine=i960-wrs
-				basic_os=vxworks
-				;;
-			vxworks68)
-				basic_machine=m68k-wrs
-				basic_os=vxworks
-				;;
-			vxworks29k)
-				basic_machine=a29k-wrs
-				basic_os=vxworks
-				;;
-			xbox)
-				basic_machine=i686-pc
-				basic_os=mingw32
-				;;
-			ymp)
-				basic_machine=ymp-cray
-				basic_os=unicos
-				;;
-			*)
-				basic_machine=$1
-				basic_os=
-				;;
-		esac
-		;;
-esac
-
-# Decode 1-component or ad-hoc basic machines
-case $basic_machine in
-	# Here we handle the default manufacturer of certain CPU types.  It is in
-	# some cases the only manufacturer, in others, it is the most popular.
-	w89k)
-		cpu=hppa1.1
-		vendor=winbond
-		;;
-	op50n)
-		cpu=hppa1.1
-		vendor=oki
-		;;
-	op60c)
-		cpu=hppa1.1
-		vendor=oki
-		;;
-	ibm*)
-		cpu=i370
-		vendor=ibm
-		;;
-	orion105)
-		cpu=clipper
-		vendor=highlevel
-		;;
-	mac | mpw | mac-mpw)
-		cpu=m68k
-		vendor=apple
-		;;
-	pmac | pmac-mpw)
-		cpu=powerpc
-		vendor=apple
-		;;
-
-	# Recognize the various machine names and aliases which stand
-	# for a CPU type and a company and sometimes even an OS.
-	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
-		cpu=m68000
-		vendor=att
-		;;
-	3b*)
-		cpu=we32k
-		vendor=att
-		;;
-	bluegene*)
-		cpu=powerpc
-		vendor=ibm
-		basic_os=cnk
-		;;
-	decsystem10* | dec10*)
-		cpu=pdp10
-		vendor=dec
-		basic_os=tops10
-		;;
-	decsystem20* | dec20*)
-		cpu=pdp10
-		vendor=dec
-		basic_os=tops20
-		;;
-	delta | 3300 | motorola-3300 | motorola-delta \
-	      | 3300-motorola | delta-motorola)
-		cpu=m68k
-		vendor=motorola
-		;;
-	dpx2*)
-		cpu=m68k
-		vendor=bull
-		basic_os=sysv3
-		;;
-	encore | umax | mmax)
-		cpu=ns32k
-		vendor=encore
-		;;
-	elxsi)
-		cpu=elxsi
-		vendor=elxsi
-		basic_os=${basic_os:-bsd}
-		;;
-	fx2800)
-		cpu=i860
-		vendor=alliant
-		;;
-	genix)
-		cpu=ns32k
-		vendor=ns
-		;;
-	h3050r* | hiux*)
-		cpu=hppa1.1
-		vendor=hitachi
-		basic_os=hiuxwe2
-		;;
-	hp3k9[0-9][0-9] | hp9[0-9][0-9])
-		cpu=hppa1.0
-		vendor=hp
-		;;
-	hp9k2[0-9][0-9] | hp9k31[0-9])
-		cpu=m68000
-		vendor=hp
-		;;
-	hp9k3[2-9][0-9])
-		cpu=m68k
-		vendor=hp
-		;;
-	hp9k6[0-9][0-9] | hp6[0-9][0-9])
-		cpu=hppa1.0
-		vendor=hp
-		;;
-	hp9k7[0-79][0-9] | hp7[0-79][0-9])
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k78[0-9] | hp78[0-9])
-		# FIXME: really hppa2.0-hp
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
-		# FIXME: really hppa2.0-hp
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k8[0-9][13679] | hp8[0-9][13679])
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k8[0-9][0-9] | hp8[0-9][0-9])
-		cpu=hppa1.0
-		vendor=hp
-		;;
-	i*86v32)
-		cpu=`echo "$1" | sed -e 's/86.*/86/'`
-		vendor=pc
-		basic_os=sysv32
-		;;
-	i*86v4*)
-		cpu=`echo "$1" | sed -e 's/86.*/86/'`
-		vendor=pc
-		basic_os=sysv4
-		;;
-	i*86v)
-		cpu=`echo "$1" | sed -e 's/86.*/86/'`
-		vendor=pc
-		basic_os=sysv
-		;;
-	i*86sol2)
-		cpu=`echo "$1" | sed -e 's/86.*/86/'`
-		vendor=pc
-		basic_os=solaris2
-		;;
-	j90 | j90-cray)
-		cpu=j90
-		vendor=cray
-		basic_os=${basic_os:-unicos}
-		;;
-	iris | iris4d)
-		cpu=mips
-		vendor=sgi
-		case $basic_os in
-		    irix*)
-			;;
-		    *)
-			basic_os=irix4
-			;;
-		esac
-		;;
-	miniframe)
-		cpu=m68000
-		vendor=convergent
-		;;
-	*mint | mint[0-9]* | *MiNT | *MiNT[0-9]*)
-		cpu=m68k
-		vendor=atari
-		basic_os=mint
-		;;
-	news-3600 | risc-news)
-		cpu=mips
-		vendor=sony
-		basic_os=newsos
-		;;
-	next | m*-next)
-		cpu=m68k
-		vendor=next
-		case $basic_os in
-		    openstep*)
-		        ;;
-		    nextstep*)
-			;;
-		    ns2*)
-		      basic_os=nextstep2
-			;;
-		    *)
-		      basic_os=nextstep3
-			;;
-		esac
-		;;
-	np1)
-		cpu=np1
-		vendor=gould
-		;;
-	op50n-* | op60c-*)
-		cpu=hppa1.1
-		vendor=oki
-		basic_os=proelf
-		;;
-	pa-hitachi)
-		cpu=hppa1.1
-		vendor=hitachi
-		basic_os=hiuxwe2
-		;;
-	pbd)
-		cpu=sparc
-		vendor=tti
-		;;
-	pbb)
-		cpu=m68k
-		vendor=tti
-		;;
-	pc532)
-		cpu=ns32k
-		vendor=pc532
-		;;
-	pn)
-		cpu=pn
-		vendor=gould
-		;;
-	power)
-		cpu=power
-		vendor=ibm
-		;;
-	ps2)
-		cpu=i386
-		vendor=ibm
-		;;
-	rm[46]00)
-		cpu=mips
-		vendor=siemens
-		;;
-	rtpc | rtpc-*)
-		cpu=romp
-		vendor=ibm
-		;;
-	sde)
-		cpu=mipsisa32
-		vendor=sde
-		basic_os=${basic_os:-elf}
-		;;
-	simso-wrs)
-		cpu=sparclite
-		vendor=wrs
-		basic_os=vxworks
-		;;
-	tower | tower-32)
-		cpu=m68k
-		vendor=ncr
-		;;
-	vpp*|vx|vx-*)
-		cpu=f301
-		vendor=fujitsu
-		;;
-	w65)
-		cpu=w65
-		vendor=wdc
-		;;
-	w89k-*)
-		cpu=hppa1.1
-		vendor=winbond
-		basic_os=proelf
-		;;
-	none)
-		cpu=none
-		vendor=none
-		;;
-	leon|leon[3-9])
-		cpu=sparc
-		vendor=$basic_machine
-		;;
-	leon-*|leon[3-9]-*)
-		cpu=sparc
-		vendor=`echo "$basic_machine" | sed 's/-.*//'`
-		;;
-
-	*-*)
-		# shellcheck disable=SC2162
-		IFS="-" read cpu vendor <<EOF
-$basic_machine
-EOF
-		;;
-	# We use `pc' rather than `unknown'
-	# because (1) that's what they normally are, and
-	# (2) the word "unknown" tends to confuse beginning users.
-	i*86 | x86_64)
-		cpu=$basic_machine
-		vendor=pc
-		;;
-	# These rules are duplicated from below for sake of the special case above;
-	# i.e. things that normalized to x86 arches should also default to "pc"
-	pc98)
-		cpu=i386
-		vendor=pc
-		;;
-	x64 | amd64)
-		cpu=x86_64
-		vendor=pc
-		;;
-	# Recognize the basic CPU types without company name.
-	*)
-		cpu=$basic_machine
-		vendor=unknown
-		;;
-esac
-
-unset -v basic_machine
-
-# Decode basic machines in the full and proper CPU-Company form.
-case $cpu-$vendor in
-	# Here we handle the default manufacturer of certain CPU types in canonical form. It is in
-	# some cases the only manufacturer, in others, it is the most popular.
-	craynv-unknown)
-		vendor=cray
-		basic_os=${basic_os:-unicosmp}
-		;;
-	c90-unknown | c90-cray)
-		vendor=cray
-		basic_os=${Basic_os:-unicos}
-		;;
-	fx80-unknown)
-		vendor=alliant
-		;;
-	romp-unknown)
-		vendor=ibm
-		;;
-	mmix-unknown)
-		vendor=knuth
-		;;
-	microblaze-unknown | microblazeel-unknown)
-		vendor=xilinx
-		;;
-	rs6000-unknown)
-		vendor=ibm
-		;;
-	vax-unknown)
-		vendor=dec
-		;;
-	pdp11-unknown)
-		vendor=dec
-		;;
-	we32k-unknown)
-		vendor=att
-		;;
-	cydra-unknown)
-		vendor=cydrome
-		;;
-	i370-ibm*)
-		vendor=ibm
-		;;
-	orion-unknown)
-		vendor=highlevel
-		;;
-	xps-unknown | xps100-unknown)
-		cpu=xps100
-		vendor=honeywell
-		;;
-
-	# Here we normalize CPU types with a missing or matching vendor
-	dpx20-unknown | dpx20-bull)
-		cpu=rs6000
-		vendor=bull
-		basic_os=${basic_os:-bosx}
-		;;
-
-	# Here we normalize CPU types irrespective of the vendor
-	amd64-*)
-		cpu=x86_64
-		;;
-	blackfin-*)
-		cpu=bfin
-		basic_os=linux
-		;;
-	c54x-*)
-		cpu=tic54x
-		;;
-	c55x-*)
-		cpu=tic55x
-		;;
-	c6x-*)
-		cpu=tic6x
-		;;
-	e500v[12]-*)
-		cpu=powerpc
-		basic_os=${basic_os}"spe"
-		;;
-	mips3*-*)
-		cpu=mips64
-		;;
-	ms1-*)
-		cpu=mt
-		;;
-	m68knommu-*)
-		cpu=m68k
-		basic_os=linux
-		;;
-	m9s12z-* | m68hcs12z-* | hcs12z-* | s12z-*)
-		cpu=s12z
-		;;
-	openrisc-*)
-		cpu=or32
-		;;
-	parisc-*)
-		cpu=hppa
-		basic_os=linux
-		;;
-	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
-		cpu=i586
-		;;
-	pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
-		cpu=i686
-		;;
-	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
-		cpu=i686
-		;;
-	pentium4-*)
-		cpu=i786
-		;;
-	pc98-*)
-		cpu=i386
-		;;
-	ppc-* | ppcbe-*)
-		cpu=powerpc
-		;;
-	ppcle-* | powerpclittle-*)
-		cpu=powerpcle
-		;;
-	ppc64-*)
-		cpu=powerpc64
-		;;
-	ppc64le-* | powerpc64little-*)
-		cpu=powerpc64le
-		;;
-	sb1-*)
-		cpu=mipsisa64sb1
-		;;
-	sb1el-*)
-		cpu=mipsisa64sb1el
-		;;
-	sh5e[lb]-*)
-		cpu=`echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/'`
-		;;
-	spur-*)
-		cpu=spur
-		;;
-	strongarm-* | thumb-*)
-		cpu=arm
-		;;
-	tx39-*)
-		cpu=mipstx39
-		;;
-	tx39el-*)
-		cpu=mipstx39el
-		;;
-	x64-*)
-		cpu=x86_64
-		;;
-	xscale-* | xscalee[bl]-*)
-		cpu=`echo "$cpu" | sed 's/^xscale/arm/'`
-		;;
-	arm64-*)
-		cpu=aarch64
-		;;
-
-	# Recognize the canonical CPU Types that limit and/or modify the
-	# company names they are paired with.
-	cr16-*)
-		basic_os=${basic_os:-elf}
-		;;
-	crisv32-* | etraxfs*-*)
-		cpu=crisv32
-		vendor=axis
-		;;
-	cris-* | etrax*-*)
-		cpu=cris
-		vendor=axis
-		;;
-	crx-*)
-		basic_os=${basic_os:-elf}
-		;;
-	neo-tandem)
-		cpu=neo
-		vendor=tandem
-		;;
-	nse-tandem)
-		cpu=nse
-		vendor=tandem
-		;;
-	nsr-tandem)
-		cpu=nsr
-		vendor=tandem
-		;;
-	nsv-tandem)
-		cpu=nsv
-		vendor=tandem
-		;;
-	nsx-tandem)
-		cpu=nsx
-		vendor=tandem
-		;;
-	mipsallegrexel-sony)
-		cpu=mipsallegrexel
-		vendor=sony
-		;;
-	tile*-*)
-		basic_os=${basic_os:-linux-gnu}
-		;;
-
-	*)
-		# Recognize the canonical CPU types that are allowed with any
-		# company name.
-		case $cpu in
-			1750a | 580 \
-			| a29k \
-			| aarch64 | aarch64_be \
-			| abacus \
-			| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
-			| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
-			| alphapca5[67] | alpha64pca5[67] \
-			| am33_2.0 \
-			| amdgcn \
-			| arc | arceb \
-			| arm | arm[lb]e | arme[lb] | armv* \
-			| avr | avr32 \
-			| asmjs \
-			| ba \
-			| be32 | be64 \
-			| bfin | bpf | bs2000 \
-			| c[123]* | c30 | [cjt]90 | c4x \
-			| c8051 | clipper | craynv | csky | cydra \
-			| d10v | d30v | dlx | dsp16xx \
-			| e2k | elxsi | epiphany \
-			| f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
-			| h8300 | h8500 \
-			| hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
-			| hexagon \
-			| i370 | i*86 | i860 | i960 | ia16 | ia64 \
-			| ip2k | iq2000 \
-			| k1om \
-			| le32 | le64 \
-			| lm32 \
-			| m32c | m32r | m32rle \
-			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
-			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
-			| m88110 | m88k | maxq | mb | mcore | mep | metag \
-			| microblaze | microblazeel \
-			| mips | mipsbe | mipseb | mipsel | mipsle \
-			| mips16 \
-			| mips64 | mips64eb | mips64el \
-			| mips64octeon | mips64octeonel \
-			| mips64orion | mips64orionel \
-			| mips64r5900 | mips64r5900el \
-			| mips64vr | mips64vrel \
-			| mips64vr4100 | mips64vr4100el \
-			| mips64vr4300 | mips64vr4300el \
-			| mips64vr5000 | mips64vr5000el \
-			| mips64vr5900 | mips64vr5900el \
-			| mipsisa32 | mipsisa32el \
-			| mipsisa32r2 | mipsisa32r2el \
-			| mipsisa32r6 | mipsisa32r6el \
-			| mipsisa64 | mipsisa64el \
-			| mipsisa64r2 | mipsisa64r2el \
-			| mipsisa64r6 | mipsisa64r6el \
-			| mipsisa64sb1 | mipsisa64sb1el \
-			| mipsisa64sr71k | mipsisa64sr71kel \
-			| mipsr5900 | mipsr5900el \
-			| mipstx39 | mipstx39el \
-			| mmix \
-			| mn10200 | mn10300 \
-			| moxie \
-			| mt \
-			| msp430 \
-			| nds32 | nds32le | nds32be \
-			| nfp \
-			| nios | nios2 | nios2eb | nios2el \
-			| none | np1 | ns16k | ns32k | nvptx \
-			| open8 \
-			| or1k* \
-			| or32 \
-			| orion \
-			| picochip \
-			| pdp10 | pdp11 | pj | pjl | pn | power \
-			| powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
-			| pru \
-			| pyramid \
-			| riscv | riscv32 | riscv64 \
-			| rl78 | romp | rs6000 | rx \
-			| s390 | s390x \
-			| score \
-			| sh | shl \
-			| sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \
-			| sh[1234]e[lb] |  sh[12345][lb]e | sh[23]ele | sh64 | sh64le \
-			| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \
-			| sparclite \
-			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
-			| spu \
-			| tahoe \
-			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
-			| tron \
-			| ubicom32 \
-			| v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
-			| vax \
-			| visium \
-			| w65 \
-			| wasm32 | wasm64 \
-			| we32k \
-			| x86 | x86_64 | xc16x | xgate | xps100 \
-			| xstormy16 | xtensa* \
-			| ymp \
-			| z8k | z80)
-				;;
-
-			*)
-				echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
-				exit 1
-				;;
-		esac
-		;;
-esac
-
-# Here we canonicalize certain aliases for manufacturers.
-case $vendor in
-	digital*)
-		vendor=dec
-		;;
-	commodore*)
-		vendor=cbm
-		;;
-	*)
-		;;
-esac
-
-# Decode manufacturer-specific aliases for certain operating systems.
-
-if [ x$basic_os != x ]
-then
-
-# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just
-# set os.
-case $basic_os in
-	gnu/linux*)
-		kernel=linux
-		os=`echo $basic_os | sed -e 's|gnu/linux|gnu|'`
-		;;
-	nto-qnx*)
-		kernel=nto
-		os=`echo $basic_os | sed -e 's|nto-qnx|qnx|'`
-		;;
-	*-*)
-		# shellcheck disable=SC2162
-		IFS="-" read kernel os <<EOF
-$basic_os
-EOF
-		;;
-	# Default OS when just kernel was specified
-	nto*)
-		kernel=nto
-		os=`echo $basic_os | sed -e 's|nto|qnx|'`
-		;;
-	linux*)
-		kernel=linux
-		os=`echo $basic_os | sed -e 's|linux|gnu|'`
-		;;
-	*)
-		kernel=
-		os=$basic_os
-		;;
-esac
-
-# Now, normalize the OS (knowing we just have one component, it's not a kernel,
-# etc.)
-case $os in
-	# First match some system type aliases that might get confused
-	# with valid system types.
-	# solaris* is a basic system type, with this one exception.
-	auroraux)
-		os=auroraux
-		;;
-	bluegene*)
-		os=cnk
-		;;
-	solaris1 | solaris1.*)
-		os=`echo $os | sed -e 's|solaris1|sunos4|'`
-		;;
-	solaris)
-		os=solaris2
-		;;
-	unixware*)
-		os=sysv4.2uw
-		;;
-	# es1800 is here to avoid being matched by es* (a different OS)
-	es1800*)
-		os=ose
-		;;
-	# Some version numbers need modification
-	chorusos*)
-		os=chorusos
-		;;
-	isc)
-		os=isc2.2
-		;;
-	sco6)
-		os=sco5v6
-		;;
-	sco5)
-		os=sco3.2v5
-		;;
-	sco4)
-		os=sco3.2v4
-		;;
-	sco3.2.[4-9]*)
-		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
-		;;
-	sco*v* | scout)
-		# Don't match below
-		;;
-	sco*)
-		os=sco3.2v2
-		;;
-	psos*)
-		os=psos
-		;;
-	qnx*)
-		case $cpu in
-		    x86 | i*86)
-			;;
-		    *)
-			os=nto-$os
-			;;
-		esac
-		;;
-	hiux*)
-		os=hiuxwe2
-		;;
-	lynx*178)
-		os=lynxos178
-		;;
-	lynx*5)
-		os=lynxos5
-		;;
-	lynxos*)
-		# don't get caught up in next wildcard
-		;;
-	lynx*)
-		os=lynxos
-		;;
-	mac[0-9]*)
-		os=`echo "$os" | sed -e 's|mac|macos|'`
-		;;
-	opened*)
-		os=openedition
-		;;
-	os400*)
-		os=os400
-		;;
-	sunos5*)
-		os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
-		;;
-	sunos6*)
-		os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
-		;;
-	wince*)
-		os=wince
-		;;
-	utek*)
-		os=bsd
-		;;
-	dynix*)
-		os=bsd
-		;;
-	acis*)
-		os=aos
-		;;
-	atheos*)
-		os=atheos
-		;;
-	syllable*)
-		os=syllable
-		;;
-	386bsd)
-		os=bsd
-		;;
-	ctix* | uts*)
-		os=sysv
-		;;
-	nova*)
-		os=rtmk-nova
-		;;
-	ns2)
-		os=nextstep2
-		;;
-	# Preserve the version number of sinix5.
-	sinix5.*)
-		os=`echo $os | sed -e 's|sinix|sysv|'`
-		;;
-	sinix*)
-		os=sysv4
-		;;
-	tpf*)
-		os=tpf
-		;;
-	triton*)
-		os=sysv3
-		;;
-	oss*)
-		os=sysv3
-		;;
-	svr4*)
-		os=sysv4
-		;;
-	svr3)
-		os=sysv3
-		;;
-	sysvr4)
-		os=sysv4
-		;;
-	ose*)
-		os=ose
-		;;
-	*mint | mint[0-9]* | *MiNT | MiNT[0-9]*)
-		os=mint
-		;;
-	dicos*)
-		os=dicos
-		;;
-	pikeos*)
-		# Until real need of OS specific support for
-		# particular features comes up, bare metal
-		# configurations are quite functional.
-		case $cpu in
-		    arm*)
-			os=eabi
-			;;
-		    *)
-			os=elf
-			;;
-		esac
-		;;
-	*)
-		# No normalization, but not necessarily accepted, that comes below.
-		;;
-esac
-
-else
-
-# Here we handle the default operating systems that come with various machines.
-# The value should be what the vendor currently ships out the door with their
-# machine or put another way, the most popular os provided with the machine.
-
-# Note that if you're going to try to match "-MANUFACTURER" here (say,
-# "-sun"), then you have to tell the case statement up towards the top
-# that MANUFACTURER isn't an operating system.  Otherwise, code above
-# will signal an error saying that MANUFACTURER isn't an operating
-# system, and we'll never get to this point.
-
-kernel=
-case $cpu-$vendor in
-	score-*)
-		os=elf
-		;;
-	spu-*)
-		os=elf
-		;;
-	*-acorn)
-		os=riscix1.2
-		;;
-	arm*-rebel)
-		kernel=linux
-		os=gnu
-		;;
-	arm*-semi)
-		os=aout
-		;;
-	c4x-* | tic4x-*)
-		os=coff
-		;;
-	c8051-*)
-		os=elf
-		;;
-	clipper-intergraph)
-		os=clix
-		;;
-	hexagon-*)
-		os=elf
-		;;
-	tic54x-*)
-		os=coff
-		;;
-	tic55x-*)
-		os=coff
-		;;
-	tic6x-*)
-		os=coff
-		;;
-	# This must come before the *-dec entry.
-	pdp10-*)
-		os=tops20
-		;;
-	pdp11-*)
-		os=none
-		;;
-	*-dec | vax-*)
-		os=ultrix4.2
-		;;
-	m68*-apollo)
-		os=domain
-		;;
-	i386-sun)
-		os=sunos4.0.2
-		;;
-	m68000-sun)
-		os=sunos3
-		;;
-	m68*-cisco)
-		os=aout
-		;;
-	mep-*)
-		os=elf
-		;;
-	mips*-cisco)
-		os=elf
-		;;
-	mips*-*)
-		os=elf
-		;;
-	or32-*)
-		os=coff
-		;;
-	*-tti)	# must be before sparc entry or we get the wrong os.
-		os=sysv3
-		;;
-	sparc-* | *-sun)
-		os=sunos4.1.1
-		;;
-	pru-*)
-		os=elf
-		;;
-	*-be)
-		os=beos
-		;;
-	*-ibm)
-		os=aix
-		;;
-	*-knuth)
-		os=mmixware
-		;;
-	*-wec)
-		os=proelf
-		;;
-	*-winbond)
-		os=proelf
-		;;
-	*-oki)
-		os=proelf
-		;;
-	*-hp)
-		os=hpux
-		;;
-	*-hitachi)
-		os=hiux
-		;;
-	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
-		os=sysv
-		;;
-	*-cbm)
-		os=amigaos
-		;;
-	*-dg)
-		os=dgux
-		;;
-	*-dolphin)
-		os=sysv3
-		;;
-	m68k-ccur)
-		os=rtu
-		;;
-	m88k-omron*)
-		os=luna
-		;;
-	*-next)
-		os=nextstep
-		;;
-	*-sequent)
-		os=ptx
-		;;
-	*-crds)
-		os=unos
-		;;
-	*-ns)
-		os=genix
-		;;
-	i370-*)
-		os=mvs
-		;;
-	*-gould)
-		os=sysv
-		;;
-	*-highlevel)
-		os=bsd
-		;;
-	*-encore)
-		os=bsd
-		;;
-	*-sgi)
-		os=irix
-		;;
-	*-siemens)
-		os=sysv4
-		;;
-	*-masscomp)
-		os=rtu
-		;;
-	f30[01]-fujitsu | f700-fujitsu)
-		os=uxpv
-		;;
-	*-rom68k)
-		os=coff
-		;;
-	*-*bug)
-		os=coff
-		;;
-	*-apple)
-		os=macos
-		;;
-	*-atari*)
-		os=mint
-		;;
-	*-wrs)
-		os=vxworks
-		;;
-	*)
-		os=none
-		;;
-esac
-
-fi
-
-# Now, validate our (potentially fixed-up) OS.
-case $os in
-	# Sometimes we do "kernel-abi", so those need to count as OSes.
-	musl* | newlib* | uclibc*)
-		;;
-	# Likewise for "kernel-libc"
-	eabi | eabihf | gnueabi | gnueabihf)
-		;;
-	# Now accept the basic system types.
-	# The portable systems comes first.
-	# Each alternative MUST end in a * to match a version number.
-	gnu* | android* | bsd* | mach* | minix* | genix* | ultrix* | irix* \
-	     | *vms* | esix* | aix* | cnk* | sunos | sunos[34]* \
-	     | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
-	     | sym* |  plan9* | psp* | sim* | xray* | os68k* | v88r* \
-	     | hiux* | abug | nacl* | netware* | windows* \
-	     | os9* | macos* | osx* | ios* \
-	     | mpw* | magic* | mmixware* | mon960* | lnews* \
-	     | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
-	     | aos* | aros* | cloudabi* | sortix* | twizzler* \
-	     | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \
-	     | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \
-	     | mirbsd* | netbsd* | dicos* | openedition* | ose* \
-	     | bitrig* | openbsd* | solidbsd* | libertybsd* | os108* \
-	     | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \
-	     | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
-	     | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
-	     | udi* | lites* | ieee* | go32* | aux* | hcos* \
-	     | chorusrdb* | cegcc* | glidix* \
-	     | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
-	     | midipix* | mingw32* | mingw64* | mint* \
-	     | uxpv* | beos* | mpeix* | udk* | moxiebox* \
-	     | interix* | uwin* | mks* | rhapsody* | darwin* \
-	     | openstep* | oskit* | conix* | pw32* | nonstopux* \
-	     | storm-chaos* | tops10* | tenex* | tops20* | its* \
-	     | os2* | vos* | palmos* | uclinux* | nucleus* | morphos* \
-	     | scout* | superux* | sysv* | rtmk* | tpf* | windiss* \
-	     | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \
-	     | skyos* | haiku* | rdos* | toppers* | drops* | es* \
-	     | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
-	     | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
-	     | nsk* | powerunix* | genode* | zvmoe* )
-		;;
-	# This one is extra strict with allowed versions
-	sco3.2v2 | sco3.2v[4-9]* | sco5v6*)
-		# Don't forget version if it is 3.2v4 or newer.
-		;;
-	none)
-		;;
-	*)
-		echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2
-		exit 1
-		;;
-esac
-
-# As a final step for OS-related things, validate the OS-kernel combination
-# (given a valid OS), if there is a kernel.
-case $kernel-$os in
-	linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* | linux-musl* | linux-uclibc* )
-		;;
-	-dietlibc* | -newlib* | -musl* | -uclibc* )
-		# These are just libc implementations, not actual OSes, and thus
-		# require a kernel.
-		echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2
-		exit 1
-		;;
-	kfreebsd*-gnu* | kopensolaris*-gnu*)
-		;;
-	nto-qnx*)
-		;;
-	*-eabi* | *-gnueabi*)
-		;;
-	-*)
-		# Blank kernel with real OS is always fine.
-		;;
-	*-*)
-		echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2
-		exit 1
-		;;
-esac
-
-# Here we handle the case where we know the os, and the CPU type, but not the
-# manufacturer.  We pick the logical manufacturer.
-case $vendor in
-	unknown)
-		case $cpu-$os in
-			*-riscix*)
-				vendor=acorn
-				;;
-			*-sunos*)
-				vendor=sun
-				;;
-			*-cnk* | *-aix*)
-				vendor=ibm
-				;;
-			*-beos*)
-				vendor=be
-				;;
-			*-hpux*)
-				vendor=hp
-				;;
-			*-mpeix*)
-				vendor=hp
-				;;
-			*-hiux*)
-				vendor=hitachi
-				;;
-			*-unos*)
-				vendor=crds
-				;;
-			*-dgux*)
-				vendor=dg
-				;;
-			*-luna*)
-				vendor=omron
-				;;
-			*-genix*)
-				vendor=ns
-				;;
-			*-clix*)
-				vendor=intergraph
-				;;
-			*-mvs* | *-opened*)
-				vendor=ibm
-				;;
-			*-os400*)
-				vendor=ibm
-				;;
-			s390-* | s390x-*)
-				vendor=ibm
-				;;
-			*-ptx*)
-				vendor=sequent
-				;;
-			*-tpf*)
-				vendor=ibm
-				;;
-			*-vxsim* | *-vxworks* | *-windiss*)
-				vendor=wrs
-				;;
-			*-aux*)
-				vendor=apple
-				;;
-			*-hms*)
-				vendor=hitachi
-				;;
-			*-mpw* | *-macos*)
-				vendor=apple
-				;;
-			*-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*)
-				vendor=atari
-				;;
-			*-vos*)
-				vendor=stratus
-				;;
-		esac
-		;;
-esac
-
-echo "$cpu-$vendor-${kernel:+$kernel-}$os"
-exit
-
-# Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
diff --git a/deps/patches/gmp-more_alloc_overflow.patch b/deps/patches/gmp-more_alloc_overflow.patch
new file mode 100644
index 0000000000000..597f0d52d73e7
--- /dev/null
+++ b/deps/patches/gmp-more_alloc_overflow.patch
@@ -0,0 +1,37 @@
+diff -ur a/mpz/n_pow_ui.c b/mpz/n_pow_ui.c
+--- a/mpz/n_pow_ui.c
++++ b/mpz/n_pow_ui.c
+@@ -220,8 +220,7 @@
+   umul_ppmm (ovfl, rtwos_bits, e, btwos);
+   if (ovfl)
+     {
+-      fprintf (stderr, "gmp: overflow in mpz type\n");
+-      abort ();
++      __GMP_ALLOC_OVERFLOW_FUNC ();
+     }
+
+   rtwos_limbs += rtwos_bits / GMP_NUMB_BITS;
+@@ -382,8 +381,7 @@
+   umul_ppmm (ovfl, ralloc, (bsize*GMP_NUMB_BITS - cnt + GMP_NAIL_BITS), e);
+   if (ovfl)
+     {
+-      fprintf (stderr, "gmp: overflow in mpz type\n");
+-      abort ();
++      __GMP_ALLOC_OVERFLOW_FUNC ();
+     }
+   ralloc = ralloc / GMP_NUMB_BITS + 5;
+
+diff -ur a/tal-reent.c b/tal-reent.c
+--- a/tal-reent.c
++++ b/tal-reent.c
+@@ -61,6 +61,10 @@
+
+   total_size = size + HSIZ;
+   p = __GMP_ALLOCATE_FUNC_TYPE (total_size, char);
++  if (!p)
++    {
++      __GMP_ALLOC_OVERFLOW_FUNC ();
++    }
+   P->size = total_size;
+   P->next = *markp;
+   *markp = P;
diff --git a/deps/patches/libTracyClient-freebsd-elfw.patch b/deps/patches/libTracyClient-freebsd-elfw.patch
new file mode 100644
index 0000000000000..8feb738714e11
--- /dev/null
+++ b/deps/patches/libTracyClient-freebsd-elfw.patch
@@ -0,0 +1,33 @@
+diff --git a/public/TracyClient.cpp b/public/TracyClient.cpp
+index 77f81a4a..ebeb65c9 100644
+--- a/public/TracyClient.cpp
++++ b/public/TracyClient.cpp
+@@ -19,6 +19,28 @@
+ #  pragma warning(push, 0)
+ #endif
+
++#ifndef ElfW
++#  if defined(FREEBSD)
++#    if __ELF_WORD_SIZE == 32
++#      define ElfW(type) Elf32_##type
++#    else
++#      define ElfW(type) Elf64_##type
++#    endif
++#  elif defined(NETBSD) || defined(OPENBSD)
++#    if ELFSIZE == 32
++#      define ElfW(type) Elf32_##type
++#    else
++#      define ElfW(type) Elf64_##type
++#    endif
++#  else
++#    if !defined(ELF_CLASS) || ELF_CLASS == ELFCLASS32
++#      define ElfW(type) Elf32_##type
++#    else
++#      define ElfW(type) Elf64_##type
++#    endif
++#  endif
++#endif
++
+ #include "common/tracy_lz4.cpp"
+ #include "client/TracyProfiler.cpp"
+ #include "client/TracyCallstack.cpp"
diff --git a/deps/patches/libTracyClient-no-sampling.patch b/deps/patches/libTracyClient-no-sampling.patch
new file mode 100644
index 0000000000000..c4c8576099348
--- /dev/null
+++ b/deps/patches/libTracyClient-no-sampling.patch
@@ -0,0 +1,79 @@
+commit 6249999153a9497b32bc84e9dc95a1537a0af714
+Author: Cody Tapscott <topolarity@tapscott.me>
+Date:   Tue Apr 4 15:20:46 2023 -0400
+
+    linux: respect `TRACY_NO_SAMPLING` for sys-tracing
+
+    This compile-time flag was being ignored on Linux. This change adds
+    gating for software-sampled stack trace sampling following the same
+    pattern as other `TRACY_NO_SAMPLE_*` options.
+
+    If `TRACY_NO_SAMPLING=1` is provided as an environment variable,
+    software stack sampling is also disabled.
+
+diff --git a/public/client/TracySysTrace.cpp b/public/client/TracySysTrace.cpp
+index 4a562eaa..af0641fe 100644
+--- a/public/client/TracySysTrace.cpp
++++ b/public/client/TracySysTrace.cpp
+@@ -770,6 +770,13 @@ bool SysTraceStart( int64_t& samplingPeriod )
+     TracyDebug( "sched_wakeup id: %i\n", wakeupId );
+     TracyDebug( "drm_vblank_event id: %i\n", vsyncId );
+
++#ifdef TRACY_NO_SAMPLING
++    const bool noSoftwareSampling = true;
++#else
++    const char* noSoftwareSamplingEnv = GetEnvVar( "TRACY_NO_SAMPLING" );
++    const bool noSoftwareSampling = noSoftwareSamplingEnv && noSoftwareSamplingEnv[0] == '1';
++#endif
++
+ #ifdef TRACY_NO_SAMPLE_RETIREMENT
+     const bool noRetirement = true;
+ #else
+@@ -839,28 +846,31 @@ bool SysTraceStart( int64_t& samplingPeriod )
+     pe.clockid = CLOCK_MONOTONIC_RAW;
+ #endif
+
+-    TracyDebug( "Setup software sampling\n" );
+-    ProbePreciseIp( pe, currentPid );
+-    for( int i=0; i<s_numCpus; i++ )
++    if( !noSoftwareSampling )
+     {
+-        int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
+-        if( fd == -1 )
++        TracyDebug( "Setup software sampling\n" );
++        ProbePreciseIp( pe, currentPid );
++        for( int i=0; i<s_numCpus; i++ )
+         {
+-            pe.exclude_kernel = 1;
+-            ProbePreciseIp( pe, currentPid );
+-            fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
++            int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
+             if( fd == -1 )
+             {
+-                TracyDebug( "  Failed to setup!\n");
+-                break;
++                pe.exclude_kernel = 1;
++                ProbePreciseIp( pe, currentPid );
++                fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
++                if( fd == -1 )
++                {
++                    TracyDebug( "  Failed to setup!\n");
++                    break;
++                }
++                TracyDebug( "  No access to kernel samples\n" );
++            }
++            new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCallstack );
++            if( s_ring[s_numBuffers].IsValid() )
++            {
++                s_numBuffers++;
++                TracyDebug( "  Core %i ok\n", i );
+             }
+-            TracyDebug( "  No access to kernel samples\n" );
+-        }
+-        new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCallstack );
+-        if( s_ring[s_numBuffers].IsValid() )
+-        {
+-            s_numBuffers++;
+-            TracyDebug( "  Core %i ok\n", i );
+         }
+     }
diff --git a/deps/patches/libTracyClient-plot-config.patch b/deps/patches/libTracyClient-plot-config.patch
new file mode 100644
index 0000000000000..7162b39ee901c
--- /dev/null
+++ b/deps/patches/libTracyClient-plot-config.patch
@@ -0,0 +1,57 @@
+commit 7151c6afd9cc40877325c64bd19bcff7211fbd59
+Author: Bartosz Taudul <wolf@nereid.pl>
+Date:   Wed Mar 8 23:18:36 2023 +0100
+
+    Add support for configuring plots to C API.
+
+diff --git a/public/client/TracyProfiler.cpp b/public/client/TracyProfiler.cpp
+index 6104a7ed..38b5ea13 100644
+--- a/public/client/TracyProfiler.cpp
++++ b/public/client/TracyProfiler.cpp
+@@ -4149,6 +4149,7 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_
+ TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); }
+ TRACY_API void ___tracy_emit_plot_float( const char* name, float val ) { tracy::Profiler::PlotData( name, val ); }
+ TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ) { tracy::Profiler::PlotData( name, val ); }
++TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step, fill, color ); }
+ TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ) { tracy::Profiler::Message( txt, size, callstack ); }
+ TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy::Profiler::Message( txt, callstack ); }
+ TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); }
+diff --git a/public/tracy/TracyC.h b/public/tracy/TracyC.h
+index bedf5e16..736b51ed 100644
+--- a/public/tracy/TracyC.h
++++ b/public/tracy/TracyC.h
+@@ -11,6 +11,13 @@
+ extern "C" {
+ #endif
+
++enum TracyPlotFormatEnum
++{
++    TracyPlotFormatNumber,
++    TracyPlotFormatMemory,
++    TracyPlotFormatPercentage,
++};
++
+ TRACY_API void ___tracy_set_thread_name( const char* name );
+
+ #define TracyCSetThreadName( name ) ___tracy_set_thread_name( name );
+@@ -60,6 +67,8 @@ typedef const void* TracyCZoneCtx;
+ #define TracyCPlot(x,y)
+ #define TracyCPlotF(x,y)
+ #define TracyCPlotI(x,y)
++#define TracyCPlotConfig(x,y,z,w,a)
++
+ #define TracyCMessage(x,y)
+ #define TracyCMessageL(x)
+ #define TracyCMessageC(x,y,z)
+@@ -289,11 +298,13 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_
+ TRACY_API void ___tracy_emit_plot( const char* name, double val );
+ TRACY_API void ___tracy_emit_plot_float( const char* name, float val );
+ TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val );
++TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color );
+ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
+
+ #define TracyCPlot( name, val ) ___tracy_emit_plot( name, val );
+ #define TracyCPlotF( name, val ) ___tracy_emit_plot_float( name, val );
+ #define TracyCPlotI( name, val ) ___tracy_emit_plot_int( name, val );
++#define TracyCPlotConfig( name, type, step, fill, color ) ___tracy_emit_plot_config( name, type, step, fill, color );
+ #define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size );
diff --git a/deps/patches/libgit2-agent-nonfatal.patch b/deps/patches/libgit2-agent-nonfatal.patch
deleted file mode 100644
index 3ada9ecaed93f..0000000000000
--- a/deps/patches/libgit2-agent-nonfatal.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-commit 70020247d1903c7a1262d967cf205a44dc6f6ebe
-Author: Keno Fischer <kfischer@college.harvard.edu>
-Date:   Wed Jul 20 19:59:00 2016 -0400
-
-    Make failure to connect to ssh-agent non-fatal
-
-    Julia issue: https://github.com/JuliaLang/julia/pull/17459
-    Upstream: https://github.com/libgit2/libgit2/issues/3866
-
-diff --git a/src/transports/ssh.c b/src/transports/ssh.c
-index cfd5736..82d2c63 100644
---- a/src/transports/ssh.c
-+++ b/src/transports/ssh.c
-@@ -296,8 +296,10 @@ static int ssh_agent_auth(LIBSSH2_SESSION *session, git_cred_ssh_key *c) {
- 
- 	rc = libssh2_agent_connect(agent);
- 
--	if (rc != LIBSSH2_ERROR_NONE)
-+	if (rc != LIBSSH2_ERROR_NONE) {
-+		rc = LIBSSH2_ERROR_AUTHENTICATION_FAILED;
- 		goto shutdown;
-+	}
- 
- 	rc = libssh2_agent_list_identities(agent);
- 
diff --git a/deps/patches/libgit2-hostkey.patch b/deps/patches/libgit2-hostkey.patch
deleted file mode 100644
index 3791d4f19aae6..0000000000000
--- a/deps/patches/libgit2-hostkey.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-diff --git a/src/transports/ssh.c b/src/transports/ssh.c
-index 89f085230..b8bdca61a 100644
---- a/src/transports/ssh.c
-+++ b/src/transports/ssh.c
-@@ -467,6 +467,7 @@ static int _git_ssh_setup_conn(
- 	git_credential *cred = NULL;
- 	LIBSSH2_SESSION *session=NULL;
- 	LIBSSH2_CHANNEL *channel=NULL;
-+	char *host_and_port;
- 
- 	t->current_stream = NULL;
- 
-@@ -567,10 +568,18 @@ static int _git_ssh_setup_conn(
- 
- 		cert_ptr = &cert;
- 
-+		if (atoi(s->url.port) == SSH_DEFAULT_PORT) {
-+			host_and_port = s->url.host;
-+		} else {
-+			size_t n = strlen(s->url.host) + strlen(s->url.port) + 2;
-+			host_and_port = alloca(n);
-+			sprintf(host_and_port, "%s:%s", s->url.host, s->url.port);
-+		}
-+
- 		error = t->owner->connect_opts.callbacks.certificate_check(
- 			(git_cert *)cert_ptr,
- 			0,
--			s->url.host,
-+			host_and_port,
- 			t->owner->connect_opts.callbacks.payload);
- 
- 		if (error < 0 && error != GIT_PASSTHROUGH) {
diff --git a/deps/patches/libgit2-win32-ownership.patch b/deps/patches/libgit2-win32-ownership.patch
deleted file mode 100644
index d5a84d754dcd8..0000000000000
--- a/deps/patches/libgit2-win32-ownership.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From cdff2f0237f663e0f68155655a8b66d05c1ec716 Mon Sep 17 00:00:00 2001
-From: Edward Thomson <ethomson@edwardthomson.com>
-Date: Mon, 13 Jun 2022 21:34:01 -0400
-Subject: [PATCH] repo: allow administrator to own the configuration
-
-Update our ownership checks that were introduced in libgit2 v1.4.3
-(to combat CVE 2022-24765). These were not compatible with git's; git
-itself allows administrators to own the path. Our checks now match
-this behavior.
----
- src/libgit2/repository.c  | 2 +-
- tests/libgit2/repo/open.c | 5 +++--
- 2 files changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/src/repository.c b/src/repository.c
-index 48a0b70f519..d2484318f10 100644
---- a/src/repository.c
-+++ b/src/repository.c
-@@ -512,7 +512,7 @@ static int validate_ownership(const char *repo_path)
- 	bool is_safe;
- 	int error;
- 
--	if ((error = git_fs_path_owner_is_current_user(&is_safe, repo_path)) < 0) {
-+	if ((error = git_fs_path_owner_is_system_or_current_user(&is_safe, repo_path)) < 0) {
- 		if (error == GIT_ENOTFOUND)
- 			error = 0;
- 
diff --git a/deps/patches/libssh2-mbedtls-size_t.patch b/deps/patches/libssh2-mbedtls-size_t.patch
new file mode 100644
index 0000000000000..502adf6bdf439
--- /dev/null
+++ b/deps/patches/libssh2-mbedtls-size_t.patch
@@ -0,0 +1,105 @@
+From 6cad964056848d3d78ccc74600fbff6298baddcb Mon Sep 17 00:00:00 2001
+From: Viktor Szakats <commit@vsz.me>
+Date: Tue, 30 May 2023 17:28:03 +0000
+Subject: [PATCH 1/1] mbedtls: use more size_t to sync up with crypto.h
+
+Ref: 5a96f494ee0b00282afb2db2e091246fc5e1774a #846 #879
+
+Fixes #1053
+Closes #1054
+---
+ src/mbedtls.c | 14 ++++++++------
+ src/mbedtls.h | 13 ++++++-------
+ 2 files changed, 14 insertions(+), 13 deletions(-)
+
+diff --git a/src/mbedtls.c b/src/mbedtls.c
+index e387cdb..cd14a4b 100644
+--- a/src/mbedtls.c
++++ b/src/mbedtls.c
+@@ -186,7 +186,7 @@ _libssh2_mbedtls_cipher_dtor(_libssh2_cipher_ctx *ctx)
+ int
+ _libssh2_mbedtls_hash_init(mbedtls_md_context_t *ctx,
+                            mbedtls_md_type_t mdtype,
+-                           const unsigned char *key, unsigned long keylen)
++                           const unsigned char *key, size_t keylen)
+ {
+     const mbedtls_md_info_t *md_info;
+     int ret, hmac;
+@@ -221,7 +221,7 @@ _libssh2_mbedtls_hash_final(mbedtls_md_context_t *ctx, unsigned char *hash)
+ }
+ 
+ int
+-_libssh2_mbedtls_hash(const unsigned char *data, unsigned long datalen,
++_libssh2_mbedtls_hash(const unsigned char *data, size_t datalen,
+                       mbedtls_md_type_t mdtype, unsigned char *hash)
+ {
+     const mbedtls_md_info_t *md_info;
+@@ -497,8 +497,9 @@ int
+ _libssh2_mbedtls_rsa_sha2_verify(libssh2_rsa_ctx * rsactx,
+                                  size_t hash_len,
+                                  const unsigned char *sig,
+-                                 unsigned long sig_len,
+-                                 const unsigned char *m, unsigned long m_len)
++                                 size_t sig_len,
++                                 const unsigned char *m,
++                                 size_t m_len)
+ {
+     int ret;
+     int md_type;
+@@ -548,8 +549,9 @@ _libssh2_mbedtls_rsa_sha2_verify(libssh2_rsa_ctx * rsactx,
+ int
+ _libssh2_mbedtls_rsa_sha1_verify(libssh2_rsa_ctx * rsactx,
+                                  const unsigned char *sig,
+-                                 unsigned long sig_len,
+-                                 const unsigned char *m, unsigned long m_len)
++                                 size_t sig_len,
++                                 const unsigned char *m,
++                                 size_t m_len)
+ {
+     return _libssh2_mbedtls_rsa_sha2_verify(rsactx, SHA_DIGEST_LENGTH,
+                                             sig, sig_len, m, m_len);
+diff --git a/src/mbedtls.h b/src/mbedtls.h
+index d9592f7..03484da 100644
+--- a/src/mbedtls.h
++++ b/src/mbedtls.h
+@@ -478,12 +478,12 @@ _libssh2_mbedtls_cipher_dtor(_libssh2_cipher_ctx *ctx);
+ int
+ _libssh2_mbedtls_hash_init(mbedtls_md_context_t *ctx,
+                            mbedtls_md_type_t mdtype,
+-                           const unsigned char *key, unsigned long keylen);
++                           const unsigned char *key, size_t keylen);
+ 
+ int
+ _libssh2_mbedtls_hash_final(mbedtls_md_context_t *ctx, unsigned char *hash);
+ int
+-_libssh2_mbedtls_hash(const unsigned char *data, unsigned long datalen,
++_libssh2_mbedtls_hash(const unsigned char *data, size_t datalen,
+                       mbedtls_md_type_t mdtype, unsigned char *hash);
+ 
+ _libssh2_bn *
+@@ -526,9 +526,8 @@ _libssh2_mbedtls_rsa_new_private_frommemory(libssh2_rsa_ctx **rsa,
+ int
+ _libssh2_mbedtls_rsa_sha1_verify(libssh2_rsa_ctx *rsa,
+                                  const unsigned char *sig,
+-                                 unsigned long sig_len,
+-                                 const unsigned char *m,
+-                                 unsigned long m_len);
++                                 size_t sig_len,
++                                 const unsigned char *m, size_t m_len);
+ int
+ _libssh2_mbedtls_rsa_sha1_sign(LIBSSH2_SESSION *session,
+                                libssh2_rsa_ctx *rsa,
+@@ -540,8 +539,8 @@ int
+ _libssh2_mbedtls_rsa_sha2_verify(libssh2_rsa_ctx * rsactx,
+                                  size_t hash_len,
+                                  const unsigned char *sig,
+-                                 unsigned long sig_len,
+-                                 const unsigned char *m, unsigned long m_len);
++                                 size_t sig_len,
++                                 const unsigned char *m, size_t m_len);
+ int
+ _libssh2_mbedtls_rsa_sha2_sign(LIBSSH2_SESSION *session,
+                                libssh2_rsa_ctx *rsa,
+-- 
+2.31.0
+
diff --git a/deps/patches/libssh2-userauth-check.patch b/deps/patches/libssh2-userauth-check.patch
deleted file mode 100644
index 1dc6108ebece7..0000000000000
--- a/deps/patches/libssh2-userauth-check.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-From 37ee0aa214655b63e7869d1d74ff1ec9f9818a5e Mon Sep 17 00:00:00 2001
-From: Daniel Stenberg <daniel@haxx.se>
-Date: Fri, 17 Dec 2021 17:46:29 +0100
-Subject: [PATCH] userauth: check for too large userauth_kybd_auth_name_len
- (#650)
-
-... before using it.
-
-Reported-by: MarcoPoloPie
-Fixes #649
----
- src/userauth.c | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/src/userauth.c b/src/userauth.c
-index 40ef915..caa5635 100644
---- a/src/userauth.c
-+++ b/src/userauth.c
-@@ -1769,6 +1769,11 @@ userauth_keyboard_interactive(LIBSSH2_SESSION * session,
-             if(session->userauth_kybd_data_len >= 5) {
-                 /* string    name (ISO-10646 UTF-8) */
-                 session->userauth_kybd_auth_name_len = _libssh2_ntohu32(s);
-+                if(session->userauth_kybd_auth_name_len >
-+                   session->userauth_kybd_data_len - 5)
-+                    return _libssh2_error(session,
-+                                          LIBSSH2_ERROR_OUT_OF_BOUNDARY,
-+                                          "Bad keyboard auth name");
-                 s += 4;
-             }
-             else {
diff --git a/deps/patches/libunwind-cfa-rsp.patch b/deps/patches/libunwind-cfa-rsp.patch
deleted file mode 100644
index 6b2080c10c2cf..0000000000000
--- a/deps/patches/libunwind-cfa-rsp.patch
+++ /dev/null
@@ -1,368 +0,0 @@
-From 8c8c78e2db09c5dc66ad0188a088b1664483a13f Mon Sep 17 00:00:00 2001
-From: Keno Fischer <keno@juliacomputing.com>
-Date: Sun, 29 Aug 2021 11:07:54 -0700
-Subject: [PATCH] x86_64: Stop aliasing RSP and CFA
-
-RSP and CFA are different concepts. RSP refers to the physical
-register, CFA is a virtual register that serves as the base
-address for various other saved registers. It is true that
-in many frames these are set to alias, however this is not
-a requirement. For example, a function that performs a stack
-switch would likely change the rsp in the middle of the function,
-but would keep the CFA at the original RSP such that saved registers
-may be appropriately recovered.
-
-We are seeing incorrect unwinds in the Julia runtime when running
-julia under rr. This is because injects code (with correct CFI)
-that performs just such a stack switch [1]. GDB manages to unwind
-this correctly, but libunwind incorrectly sets the rsp to the CFA
-address, causing a misunwind.
-
-Tested on x86_64, patches for other architectures are ported, but
-not tested.
-
-[1] https://github.com/rr-debugger/rr/blob/469c22059a4a1798d33a8a224457faf22b2c178c/src/preload/syscall_hook.S#L454
----
- include/dwarf.h                 |  3 +-
- include/libunwind_i.h           |  4 ++
- include/tdep-x86/dwarf-config.h |  2 -
- include/tdep-x86/libunwind_i.h  | 73 ++++++++++++---------------------
- src/dwarf/Gparser.c             | 15 +++++--
- src/x86/Gos-freebsd.c           |  1 +
- src/x86/Gregs.c                 |  2 +-
- src/x86/Gstep.c                 |  4 +-
- src/x86_64/Gos-freebsd.c        |  1 +
- src/x86_64/Gregs.c              |  2 +-
- src/x86_64/Gstep.c              |  2 +-
- 11 files changed, 52 insertions(+), 57 deletions(-)
-
-diff --git a/include/dwarf.h b/include/dwarf.h
-index 175c419bb..23ff4c4f6 100644
---- a/include/dwarf.h
-+++ b/include/dwarf.h
-@@ -231,6 +231,7 @@ typedef enum
-     DWARF_WHERE_REG,            /* register saved in another register */
-     DWARF_WHERE_EXPR,           /* register saved */
-     DWARF_WHERE_VAL_EXPR,       /* register has computed value */
-+    DWARF_WHERE_CFA,            /* register is set to the computed cfa value */
-   }
- dwarf_where_t;
- 
-@@ -313,7 +314,7 @@ typedef struct dwarf_cursor
-     void *as_arg;               /* argument to address-space callbacks */
-     unw_addr_space_t as;        /* reference to per-address-space info */
- 
--    unw_word_t cfa;     /* canonical frame address; aka frame-/stack-pointer */
-+    unw_word_t cfa;     /* canonical frame address; aka frame-pointer */
-     unw_word_t ip;              /* instruction pointer */
-     unw_word_t args_size;       /* size of arguments */
-     unw_word_t eh_args[UNW_TDEP_NUM_EH_REGS];
-diff --git a/include/libunwind_i.h b/include/libunwind_i.h
-index fea5c2607..6c7dda9a8 100644
---- a/include/libunwind_i.h
-+++ b/include/libunwind_i.h
-@@ -346,6 +346,10 @@ static inline void invalidate_edi (struct elf_dyn_info *edi)
- 
- #include "tdep/libunwind_i.h"
- 
-+#ifndef TDEP_DWARF_SP
-+#define TDEP_DWARF_SP UNW_TDEP_SP
-+#endif
-+
- #ifndef tdep_get_func_addr
- # define tdep_get_func_addr(as,addr,v)          (*(v) = addr, 0)
- #endif
-diff --git a/include/tdep-x86/dwarf-config.h b/include/tdep-x86/dwarf-config.h
-index f76f9c1c4..11398e4e6 100644
---- a/include/tdep-x86/dwarf-config.h
-+++ b/include/tdep-x86/dwarf-config.h
-@@ -43,9 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
- typedef struct dwarf_loc
-   {
-     unw_word_t val;
--#ifndef UNW_LOCAL_ONLY
-     unw_word_t type;            /* see X86_LOC_TYPE_* macros.  */
--#endif
-   }
- dwarf_loc_t;
- 
-diff --git a/include/tdep-x86/libunwind_i.h b/include/tdep-x86/libunwind_i.h
-index d4c5ccdb1..ad4edc2f5 100644
---- a/include/tdep-x86/libunwind_i.h
-+++ b/include/tdep-x86/libunwind_i.h
-@@ -84,15 +84,26 @@ dwarf_get_uc(const struct dwarf_cursor *cursor)
- }
- 
- #define DWARF_GET_LOC(l)        ((l).val)
-+# define DWARF_LOC_TYPE_MEM     (0 << 0)
-+# define DWARF_LOC_TYPE_FP      (1 << 0)
-+# define DWARF_LOC_TYPE_REG     (1 << 1)
-+# define DWARF_LOC_TYPE_VAL     (1 << 2)
- 
--#ifdef UNW_LOCAL_ONLY
-+# define DWARF_IS_REG_LOC(l)    (((l).type & DWARF_LOC_TYPE_REG) != 0)
-+# define DWARF_IS_FP_LOC(l)     (((l).type & DWARF_LOC_TYPE_FP) != 0)
-+# define DWARF_IS_MEM_LOC(l)    ((l).type == DWARF_LOC_TYPE_MEM)
-+# define DWARF_IS_VAL_LOC(l)    (((l).type & DWARF_LOC_TYPE_VAL) != 0)
-+
-+# define DWARF_LOC(r, t)        ((dwarf_loc_t) { .val = (r), .type = (t) })
- # define DWARF_NULL_LOC         DWARF_LOC (0, 0)
--# define DWARF_IS_NULL_LOC(l)   (DWARF_GET_LOC (l) == 0)
--# define DWARF_LOC(r, t)        ((dwarf_loc_t) { .val = (r) })
--# define DWARF_IS_REG_LOC(l)    0
-+# define DWARF_IS_NULL_LOC(l)                                           \
-+                ({ dwarf_loc_t _l = (l); _l.val == 0 && _l.type == 0; })
-+# define DWARF_VAL_LOC(c,v)     DWARF_LOC ((v), DWARF_LOC_TYPE_VAL)
-+# define DWARF_MEM_LOC(c,m)     DWARF_LOC ((m), DWARF_LOC_TYPE_MEM)
-+
-+#ifdef UNW_LOCAL_ONLY
- # define DWARF_REG_LOC(c,r)     (DWARF_LOC((unw_word_t)                      \
-                                  tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
--# define DWARF_MEM_LOC(c,m)     DWARF_LOC ((m), 0)
- # define DWARF_FPREG_LOC(c,r)   (DWARF_LOC((unw_word_t)                      \
-                                  tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
- 
-@@ -114,35 +125,8 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
-   return 0;
- }
- 
--static inline int
--dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
--{
--  if (!DWARF_GET_LOC (loc))
--    return -1;
--  return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
--                                   0, c->as_arg);
--}
--
--static inline int
--dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
--{
--  if (!DWARF_GET_LOC (loc))
--    return -1;
--  return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
--                                   1, c->as_arg);
--}
--
- #else /* !UNW_LOCAL_ONLY */
--# define DWARF_LOC_TYPE_FP      (1 << 0)
--# define DWARF_LOC_TYPE_REG     (1 << 1)
--# define DWARF_NULL_LOC         DWARF_LOC (0, 0)
--# define DWARF_IS_NULL_LOC(l)                                           \
--                ({ dwarf_loc_t _l = (l); _l.val == 0 && _l.type == 0; })
--# define DWARF_LOC(r, t)        ((dwarf_loc_t) { .val = (r), .type = (t) })
--# define DWARF_IS_REG_LOC(l)    (((l).type & DWARF_LOC_TYPE_REG) != 0)
--# define DWARF_IS_FP_LOC(l)     (((l).type & DWARF_LOC_TYPE_FP) != 0)
- # define DWARF_REG_LOC(c,r)     DWARF_LOC((r), DWARF_LOC_TYPE_REG)
--# define DWARF_MEM_LOC(c,m)     DWARF_LOC ((m), 0)
- # define DWARF_FPREG_LOC(c,r)   DWARF_LOC((r), (DWARF_LOC_TYPE_REG      \
-                                                 | DWARF_LOC_TYPE_FP))
- 
-@@ -192,38 +176,33 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
-                                    1, c->as_arg);
- }
- 
-+#endif /* !UNW_LOCAL_ONLY */
-+
- static inline int
- dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
- {
-   if (DWARF_IS_NULL_LOC (loc))
-     return -UNW_EBADREG;
- 
--  /* If a code-generator were to save a value of type unw_word_t in a
--     floating-point register, we would have to support this case.  I
--     suppose it could happen with MMX registers, but does it really
--     happen?  */
--  assert (!DWARF_IS_FP_LOC (loc));
--
-   if (DWARF_IS_REG_LOC (loc))
-     return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
-                                      0, c->as_arg);
--  else
-+  if (DWARF_IS_MEM_LOC (loc))
-     return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
-                                      0, c->as_arg);
-+  assert(DWARF_IS_VAL_LOC (loc));
-+  *val = DWARF_GET_LOC (loc);
-+  return 0;
- }
- 
- static inline int
- dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
- {
-+  assert(!DWARF_IS_VAL_LOC (loc));
-+
-   if (DWARF_IS_NULL_LOC (loc))
-     return -UNW_EBADREG;
- 
--  /* If a code-generator were to save a value of type unw_word_t in a
--     floating-point register, we would have to support this case.  I
--     suppose it could happen with MMX registers, but does it really
--     happen?  */
--  assert (!DWARF_IS_FP_LOC (loc));
--
-   if (DWARF_IS_REG_LOC (loc))
-     return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
-                                      1, c->as_arg);
-@@ -232,7 +211,9 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
-                                      1, c->as_arg);
- }
- 
--#endif /* !UNW_LOCAL_ONLY */
-+// For historical reasons, the DWARF numbering does not match the libunwind
-+// numbering, necessitating this override
-+#define TDEP_DWARF_SP 4
- 
- #define tdep_getcontext_trace           unw_getcontext
- #define tdep_init_done                  UNW_OBJ(init_done)
-diff --git a/src/dwarf/Gparser.c b/src/dwarf/Gparser.c
-index da170d4b3..70a62c505 100644
---- a/src/dwarf/Gparser.c
-+++ b/src/dwarf/Gparser.c
-@@ -508,6 +508,9 @@ setup_fde (struct dwarf_cursor *c, dwarf_state_record_t *sr)
-   for (i = 0; i < DWARF_NUM_PRESERVED_REGS + 2; ++i)
-     set_reg (sr, i, DWARF_WHERE_SAME, 0);
- 
-+  // SP defaults to CFA (but is overridable)
-+  set_reg (sr, TDEP_DWARF_SP, DWARF_WHERE_CFA, 0);
-+
-   struct dwarf_cie_info *dci = c->pi.unwind_info;
-   sr->rs_current.ret_addr_column  = dci->ret_addr_column;
-   unw_word_t addr = dci->cie_instr_start;
-@@ -792,14 +795,14 @@ apply_reg_state (struct dwarf_cursor *c, struct dwarf_reg_state *rs)
-       /* As a special-case, if the stack-pointer is the CFA and the
-          stack-pointer wasn't saved, popping the CFA implicitly pops
-          the stack-pointer as well.  */
--      if ((rs->reg.val[DWARF_CFA_REG_COLUMN] == UNW_TDEP_SP)
--          && (UNW_TDEP_SP < ARRAY_SIZE(rs->reg.val))
--          && (rs->reg.where[UNW_TDEP_SP] == DWARF_WHERE_SAME))
-+      if ((rs->reg.val[DWARF_CFA_REG_COLUMN] == TDEP_DWARF_SP)
-+          && (TDEP_DWARF_SP < ARRAY_SIZE(rs->reg.val))
-+          && (DWARF_IS_NULL_LOC(c->loc[TDEP_DWARF_SP])))
-           cfa = c->cfa;
-       else
-         {
-           regnum = dwarf_to_unw_regnum (rs->reg.val[DWARF_CFA_REG_COLUMN]);
--          if ((ret = unw_get_reg ((unw_cursor_t *) c, regnum, &cfa)) < 0)
-+          if ((ret = unw_get_reg (dwarf_to_cursor(c), regnum, &cfa)) < 0)
-             return ret;
-         }
-       cfa += rs->reg.val[DWARF_CFA_OFF_COLUMN];
-@@ -836,6 +839,10 @@ apply_reg_state (struct dwarf_cursor *c, struct dwarf_reg_state *rs)
-         case DWARF_WHERE_SAME:
-           break;
- 
-+        case DWARF_WHERE_CFA:
-+          new_loc[i] = DWARF_VAL_LOC (c, cfa);
-+          break;
-+
-         case DWARF_WHERE_CFAREL:
-           new_loc[i] = DWARF_MEM_LOC (c, cfa + rs->reg.val[i]);
-           break;
-diff --git a/src/x86/Gos-freebsd.c b/src/x86/Gos-freebsd.c
-index 7dd014046..1b251d027 100644
---- a/src/x86/Gos-freebsd.c
-+++ b/src/x86/Gos-freebsd.c
-@@ -138,6 +138,7 @@ x86_handle_signal_frame (unw_cursor_t *cursor)
-     c->dwarf.loc[ST0] = DWARF_NULL_LOC;
-   } else if (c->sigcontext_format == X86_SCF_FREEBSD_SYSCALL) {
-     c->dwarf.loc[EIP] = DWARF_LOC (c->dwarf.cfa, 0);
-+    c->dwarf.loc[ESP] = DWARF_VAL_LOC (c, c->dwarf.cfa + 4);
-     c->dwarf.loc[EAX] = DWARF_NULL_LOC;
-     c->dwarf.cfa += 4;
-     c->dwarf.use_prev_instr = 1;
-diff --git a/src/x86/Gregs.c b/src/x86/Gregs.c
-index 4a9592617..9446d6c62 100644
---- a/src/x86/Gregs.c
-+++ b/src/x86/Gregs.c
-@@ -53,7 +53,6 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
-       break;
- 
-     case UNW_X86_CFA:
--    case UNW_X86_ESP:
-       if (write)
-         return -UNW_EREADONLYREG;
-       *valp = c->dwarf.cfa;
-@@ -81,6 +80,7 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
-     case UNW_X86_ECX: loc = c->dwarf.loc[ECX]; break;
-     case UNW_X86_EBX: loc = c->dwarf.loc[EBX]; break;
- 
-+    case UNW_X86_ESP: loc = c->dwarf.loc[ESP]; break;
-     case UNW_X86_EBP: loc = c->dwarf.loc[EBP]; break;
-     case UNW_X86_ESI: loc = c->dwarf.loc[ESI]; break;
-     case UNW_X86_EDI: loc = c->dwarf.loc[EDI]; break;
-diff --git a/src/x86/Gstep.c b/src/x86/Gstep.c
-index 129b739a3..061dcbaaa 100644
---- a/src/x86/Gstep.c
-+++ b/src/x86/Gstep.c
-@@ -47,7 +47,7 @@ unw_step (unw_cursor_t *cursor)
-     {
-       /* DWARF failed, let's see if we can follow the frame-chain
-          or skip over the signal trampoline.  */
--      struct dwarf_loc ebp_loc, eip_loc;
-+      struct dwarf_loc ebp_loc, eip_loc, esp_loc;
- 
-       /* We could get here because of missing/bad unwind information.
-          Validate all addresses before dereferencing. */
-@@ -77,6 +77,7 @@ unw_step (unw_cursor_t *cursor)
-                  c->dwarf.cfa);
- 
-           ebp_loc = DWARF_LOC (c->dwarf.cfa, 0);
-+          esp_loc = DWARF_VAL_LOC (c, c->dwarf.cfa + 8);
-           eip_loc = DWARF_LOC (c->dwarf.cfa + 4, 0);
-           c->dwarf.cfa += 8;
- 
-@@ -87,6 +88,7 @@ unw_step (unw_cursor_t *cursor)
-             c->dwarf.loc[i] = DWARF_NULL_LOC;
- 
-           c->dwarf.loc[EBP] = ebp_loc;
-+          c->dwarf.loc[ESP] = esp_loc;
-           c->dwarf.loc[EIP] = eip_loc;
-           c->dwarf.use_prev_instr = 1;
-         }
-diff --git a/src/x86_64/Gos-freebsd.c b/src/x86_64/Gos-freebsd.c
-index 8f28d1d8c..0c5a17940 100644
---- a/src/x86_64/Gos-freebsd.c
-+++ b/src/x86_64/Gos-freebsd.c
-@@ -133,6 +133,7 @@ x86_64_handle_signal_frame (unw_cursor_t *cursor)
-     c->dwarf.loc[RCX] = c->dwarf.loc[R10];
-     /*  rsp_loc = DWARF_LOC(c->dwarf.cfa - 8, 0);       */
-     /*  rbp_loc = c->dwarf.loc[RBP];                    */
-+    c->dwarf.loc[RSP] = DWARF_VAL_LOC (c, c->dwarf.cfa + 8);
-     c->dwarf.loc[RIP] = DWARF_LOC (c->dwarf.cfa, 0);
-     ret = dwarf_get (&c->dwarf, c->dwarf.loc[RIP], &c->dwarf.ip);
-     Debug (1, "Frame Chain [RIP=0x%Lx] = 0x%Lx\n",
-diff --git a/src/x86_64/Gregs.c b/src/x86_64/Gregs.c
-index baf8a24f0..dff5bcbe7 100644
---- a/src/x86_64/Gregs.c
-+++ b/src/x86_64/Gregs.c
-@@ -79,7 +79,6 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
-       break;
- 
-     case UNW_X86_64_CFA:
--    case UNW_X86_64_RSP:
-       if (write)
-         return -UNW_EREADONLYREG;
-       *valp = c->dwarf.cfa;
-@@ -107,6 +106,7 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
-     case UNW_X86_64_RCX: loc = c->dwarf.loc[RCX]; break;
-     case UNW_X86_64_RBX: loc = c->dwarf.loc[RBX]; break;
- 
-+    case UNW_X86_64_RSP: loc = c->dwarf.loc[RSP]; break;
-     case UNW_X86_64_RBP: loc = c->dwarf.loc[RBP]; break;
-     case UNW_X86_64_RSI: loc = c->dwarf.loc[RSI]; break;
-     case UNW_X86_64_RDI: loc = c->dwarf.loc[RDI]; break;
-diff --git a/src/x86_64/Gstep.c b/src/x86_64/Gstep.c
-index 3c5c3830f..fdad298c7 100644
---- a/src/x86_64/Gstep.c
-+++ b/src/x86_64/Gstep.c
-@@ -223,7 +223,7 @@ unw_step (unw_cursor_t *cursor)
-                   Debug (2, "RIP fixup didn't work, falling back\n");
-                   unw_word_t rbp1 = 0;
-                   rbp_loc = DWARF_LOC(rbp, 0);
--                  rsp_loc = DWARF_NULL_LOC;
-+                  rsp_loc = DWARF_VAL_LOC(c, rbp + 16);
-                   rip_loc = DWARF_LOC (rbp + 8, 0);
-                   ret = dwarf_get (&c->dwarf, rbp_loc, &rbp1);
-                   Debug (1, "[RBP=0x%lx] = 0x%lx (cfa = 0x%lx) -> 0x%lx\n",
diff --git a/deps/patches/libunwind-dwarf-table.patch b/deps/patches/libunwind-dwarf-table.patch
deleted file mode 100644
index 5905982f9a349..0000000000000
--- a/deps/patches/libunwind-dwarf-table.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From a5b5fd28ed03cb1ab524d24dc534c1fa167bf5a1 Mon Sep 17 00:00:00 2001
-From: Alex Arslan <ararslan@comcast.net>
-Date: Fri, 5 Nov 2021 16:58:41 -0700
-Subject: [PATCH] Fix table indexing in `dwarf_search_unwind_table`
-
-`table_len` is used as an index into `table`, assuming it represents the
-number of entries. However, it is defined as the number of entries
-multiplied by `sizeof(unw_word_t)`. This is accounted for in other
-places that use `table_len`, e.g. in `lookup`, which divides out the
-size of `unw_word_t`, but the indexing expression uses `table_len`
-directly. So when `table` has say 2 entries, we're actually looking at
-index 15 rather than 1 in the comparison. This can cause the conditional
-to erroneously evaluate to true, allowing the following line to
-segfault.
-
-This was observed with JIT compiled code from Julia with LLVM on
-FreeBSD.
-
-Co-Authored-By: Jameson Nash <vtjnash@gmail.com>
----
- src/dwarf/Gfind_proc_info-lsb.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/dwarf/Gfind_proc_info-lsb.c b/src/dwarf/Gfind_proc_info-lsb.c
-index 5e27a501..af4cbce8 100644
---- a/src/dwarf/Gfind_proc_info-lsb.c
-+++ b/src/dwarf/Gfind_proc_info-lsb.c
-@@ -866,7 +866,7 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
-   if (as == unw_local_addr_space)
-     {
-       e = lookup (table, table_len, ip - ip_base);
--      if (e && &e[1] < &table[table_len])
-+      if (e && &e[1] < &table[table_len / sizeof (unw_word_t)])
- 	last_ip = e[1].start_ip_offset + ip_base;
-       else
- 	last_ip = di->end_ip;
diff --git a/deps/patches/libunwind-non-empty-structs.patch b/deps/patches/libunwind-non-empty-structs.patch
deleted file mode 100644
index 0c04709a13184..0000000000000
--- a/deps/patches/libunwind-non-empty-structs.patch
+++ /dev/null
@@ -1,108 +0,0 @@
-From 1f35cd8f2bdcc1876af7352cc3e87bb7277e8162 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Mos=C3=A8=20Giordano?= <mose@gnu.org>
-Date: Sat, 18 Jun 2022 10:35:36 +0100
-Subject: [PATCH 1/1] Make some structs non-empty
-
-Backport of <https://github.com/libunwind/libunwind/pull/332>.
----
- include/libunwind-aarch64.h | 6 ++++++
- include/libunwind-arm.h     | 6 ++++++
- include/libunwind-x86.h     | 6 ++++++
- 3 files changed, 18 insertions(+)
-
-diff --git a/include/libunwind-aarch64.h b/include/libunwind-aarch64.h
-index aeaef630..b7066c51 100644
---- a/include/libunwind-aarch64.h
-+++ b/include/libunwind-aarch64.h
-@@ -35,6 +35,10 @@ extern "C" {
- #include <stddef.h>
- #include <ucontext.h>
- 
-+#ifndef UNW_EMPTY_STRUCT
-+#  define UNW_EMPTY_STRUCT uint8_t unused;
-+#endif
-+
- #define UNW_TARGET      aarch64
- #define UNW_TARGET_AARCH64      1
- 
-@@ -60,6 +64,7 @@ typedef long double unw_tdep_fpreg_t;
- typedef struct
-   {
-     /* no aarch64-specific auxiliary proc-info */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_proc_info_t;
- 
-@@ -169,6 +174,7 @@ aarch64_regnum_t;
- typedef struct unw_tdep_save_loc
-   {
-     /* Additional target-dependent info on a save location.  */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_save_loc_t;
- 
-diff --git a/include/libunwind-arm.h b/include/libunwind-arm.h
-index 6709b7ab..7c7005d1 100644
---- a/include/libunwind-arm.h
-+++ b/include/libunwind-arm.h
-@@ -32,6 +32,10 @@ extern "C" {
- #include <inttypes.h>
- #include <stddef.h>
- 
-+#ifndef UNW_EMPTY_STRUCT
-+#  define UNW_EMPTY_STRUCT uint8_t unused;
-+#endif
-+
- #define UNW_TARGET      arm
- #define UNW_TARGET_ARM  1
- 
-@@ -247,6 +251,7 @@ arm_regnum_t;
- typedef struct unw_tdep_save_loc
-   {
-     /* Additional target-dependent info on a save location.  */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_save_loc_t;
- 
-@@ -288,6 +293,7 @@ unw_tdep_context_t;
- typedef struct
-   {
-     /* no arm-specific auxiliary proc-info */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_proc_info_t;
- 
-diff --git a/include/libunwind-x86.h b/include/libunwind-x86.h
-index 40fe0464..d3b741d3 100644
---- a/include/libunwind-x86.h
-+++ b/include/libunwind-x86.h
-@@ -34,6 +34,10 @@ extern "C" {
- #include <inttypes.h>
- #include <ucontext.h>
- 
-+#ifndef UNW_EMPTY_STRUCT
-+#  define UNW_EMPTY_STRUCT uint8_t unused;
-+#endif
-+
- #define UNW_TARGET      x86
- #define UNW_TARGET_X86  1
- 
-@@ -158,6 +162,7 @@ x86_regnum_t;
- typedef struct unw_tdep_save_loc
-   {
-     /* Additional target-dependent info on a save location.  */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_save_loc_t;
- 
-@@ -169,6 +174,7 @@ typedef ucontext_t unw_tdep_context_t;
- typedef struct
-   {
-     /* no x86-specific auxiliary proc-info */
-+    UNW_EMPTY_STRUCT
-   }
- unw_tdep_proc_info_t;
- 
--- 
-2.36.1
-
diff --git a/deps/patches/libunwind-prefer-extbl.patch b/deps/patches/libunwind-prefer-extbl.patch
deleted file mode 100644
index 07b172604d623..0000000000000
--- a/deps/patches/libunwind-prefer-extbl.patch
+++ /dev/null
@@ -1,194 +0,0 @@
-From 2d6a50435bb743be1e4d88eee002372344348349 Mon Sep 17 00:00:00 2001
-From: Yichao Yu <yyc1992@gmail.com>
-Date: Sun, 29 Aug 2021 13:43:01 -0700
-Subject: [PATCH] Prefer EXTBL unwinding on ARM
-
-It is part of the C++ ABI so a EXTBL unwind info that's not `CANT_UNWIND`
-should always be reliable/correct.
-Ignore `ESTOPUNWIND` so that a `CANT_UNWIND` info can fallback to unwinding
-using the debug info instead.
----
- include/tdep-arm/libunwind_i.h |  4 +++
- src/arm/Gex_tables.c           | 18 ++++++++---
- src/arm/Gstep.c                | 55 ++++++++++++++++++++--------------
- 3 files changed, 51 insertions(+), 26 deletions(-)
-
-diff --git a/include/tdep-arm/libunwind_i.h b/include/tdep-arm/libunwind_i.h
-index 88ebfb069..5bd28c953 100644
---- a/include/tdep-arm/libunwind_i.h
-+++ b/include/tdep-arm/libunwind_i.h
-@@ -256,6 +256,7 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
- #define tdep_init_done                  UNW_OBJ(init_done)
- #define tdep_init                       UNW_OBJ(init)
- #define arm_find_proc_info              UNW_OBJ(find_proc_info)
-+#define arm_find_proc_info2             UNW_OBJ(find_proc_info2)
- #define arm_put_unwind_info             UNW_OBJ(put_unwind_info)
- /* Platforms that support UNW_INFO_FORMAT_TABLE need to define
-    tdep_search_unwind_table.  */
-@@ -297,6 +298,9 @@ extern void tdep_init (void);
- extern int arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
-                                unw_proc_info_t *pi, int need_unwind_info,
-                                void *arg);
-+extern int arm_find_proc_info2 (unw_addr_space_t as, unw_word_t ip,
-+                                unw_proc_info_t *pi, int need_unwind_info,
-+                                void *arg, int methods);
- extern void arm_put_unwind_info (unw_addr_space_t as,
-                                   unw_proc_info_t *pi, void *arg);
- extern int tdep_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
-diff --git a/src/arm/Gex_tables.c b/src/arm/Gex_tables.c
-index efdcf2978..083d2b2f7 100644
---- a/src/arm/Gex_tables.c
-+++ b/src/arm/Gex_tables.c
-@@ -506,18 +506,20 @@ arm_phdr_cb (struct dl_phdr_info *info, size_t size, void *data)
- }
- 
- HIDDEN int
--arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
--                    unw_proc_info_t *pi, int need_unwind_info, void *arg)
-+arm_find_proc_info2 (unw_addr_space_t as, unw_word_t ip,
-+                     unw_proc_info_t *pi, int need_unwind_info, void *arg,
-+                     int methods)
- {
-   int ret = -1;
-   intrmask_t saved_mask;
- 
-   Debug (14, "looking for IP=0x%lx\n", (long) ip);
- 
--  if (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF))
-+  if (UNW_TRY_METHOD (UNW_ARM_METHOD_DWARF) && (methods & UNW_ARM_METHOD_DWARF))
-     ret = dwarf_find_proc_info (as, ip, pi, need_unwind_info, arg);
- 
--  if (ret < 0 && UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX))
-+  if (ret < 0 && UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX) &&
-+      (methods & UNW_ARM_METHOD_EXIDX))
-     {
-       struct arm_cb_data cb_data;
- 
-@@ -540,6 +542,14 @@ arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
-   return ret;
- }
- 
-+HIDDEN int
-+arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
-+                    unw_proc_info_t *pi, int need_unwind_info, void *arg)
-+{
-+    return arm_find_proc_info2 (as, ip, pi, need_unwind_info, arg,
-+                                UNW_ARM_METHOD_ALL);
-+}
-+
- HIDDEN void
- arm_put_unwind_info (unw_addr_space_t as, unw_proc_info_t *proc_info, void *arg)
- {
-diff --git a/src/arm/Gstep.c b/src/arm/Gstep.c
-index 895e8a892..e4ada651b 100644
---- a/src/arm/Gstep.c
-+++ b/src/arm/Gstep.c
-@@ -54,17 +54,22 @@ arm_exidx_step (struct cursor *c)
-                                      c->dwarf.as_arg);
-   if (ret == -UNW_ENOINFO)
-     {
-+#ifdef UNW_LOCAL_ONLY
-+      if ((ret = arm_find_proc_info2 (c->dwarf.as, ip, &c->dwarf.pi,
-+                                      1, c->dwarf.as_arg,
-+                                      UNW_ARM_METHOD_EXIDX)) < 0)
-+        return ret;
-+#else
-       if ((ret = tdep_find_proc_info (&c->dwarf, ip, 1)) < 0)
-         return ret;
-+#endif
-     }
- 
-   if (c->dwarf.pi.format != UNW_INFO_FORMAT_ARM_EXIDX)
-     return -UNW_ENOINFO;
- 
-   ret = arm_exidx_extract (&c->dwarf, buf);
--  if (ret == -UNW_ESTOPUNWIND)
--    return 0;
--  else if (ret < 0)
-+  if (ret < 0)
-     return ret;
- 
-   ret = arm_exidx_decode (buf, ret, &c->dwarf);
-@@ -88,6 +93,7 @@ unw_step (unw_cursor_t *cursor)
- {
-   struct cursor *c = (struct cursor *) cursor;
-   int ret = -UNW_EUNSPEC;
-+  int has_stopunwind = 0;
- 
-   Debug (1, "(cursor=%p)\n", c);
- 
-@@ -95,17 +101,31 @@ unw_step (unw_cursor_t *cursor)
-   if (unw_is_signal_frame (cursor) > 0)
-      return arm_handle_signal_frame (cursor);
- 
-+  /* First, try extbl-based unwinding. */
-+  if (UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX))
-+    {
-+      ret = arm_exidx_step (c);
-+      Debug(1, "arm_exidx_step()=%d\n", ret);
-+      if (ret > 0)
-+        return 1;
-+      if (ret == 0)
-+        return ret;
-+      if (ret == -UNW_ESTOPUNWIND)
-+        has_stopunwind = 1;
-+    }
-+
- #ifdef CONFIG_DEBUG_FRAME
--  /* First, try DWARF-based unwinding. */
-+  /* Second, try DWARF-based unwinding. */
-   if (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF))
-     {
-+      Debug (13, "%s(ret=%d), trying extbl\n",
-+             UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) ? "arm_exidx_step() failed " : "",
-+             ret);
-       ret = dwarf_step (&c->dwarf);
-       Debug(1, "dwarf_step()=%d\n", ret);
- 
-       if (likely (ret > 0))
-         return 1;
--      else if (unlikely (ret == -UNW_ESTOPUNWIND))
--        return ret;
- 
-       if (ret < 0 && ret != -UNW_ENOINFO)
-         {
-@@ -115,18 +135,9 @@ unw_step (unw_cursor_t *cursor)
-     }
- #endif /* CONFIG_DEBUG_FRAME */
- 
--  /* Next, try extbl-based unwinding. */
--  if (UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX))
--    {
--      Debug (13, "%s(ret=%d), trying extbl\n",
--             UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() failed " : "",
--             ret);
--      ret = arm_exidx_step (c);
--      if (ret > 0)
--        return 1;
--      if (ret == -UNW_ESTOPUNWIND || ret == 0)
--        return ret;
--    }
-+  // Before trying the fallback, if any unwind info tell us to stop, do that.
-+  if (has_stopunwind)
-+    return -UNW_ESTOPUNWIND;
- 
-   /* Fall back on APCS frame parsing.
-      Note: This won't work in case the ARM EABI is used. */
-@@ -139,13 +150,13 @@ unw_step (unw_cursor_t *cursor)
-       if (UNW_TRY_METHOD(UNW_ARM_METHOD_FRAME))
-         {
-           Debug (13, "%s%s%s%s(ret=%d), trying frame-chain\n",
--                 UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() " : "",
--                 (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) && UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX)) ? "and " : "",
-                  UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) ? "arm_exidx_step() " : "",
--                 (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) || UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX)) ? "failed " : "",
-+                 (UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) && UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF)) ? "and " : "",
-+                 UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() " : "",
-+                 (UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) || UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF)) ? "failed " : "",
-                  ret);
-           ret = UNW_ESUCCESS;
--          /* DWARF unwinding failed, try to follow APCS/optimized APCS frame chain */
-+          /* EXIDX and/or DWARF unwinding failed, try to follow APCS/optimized APCS frame chain */
-           unw_word_t instr, i;
-           dwarf_loc_t ip_loc, fp_loc;
-           unw_word_t frame;
diff --git a/deps/patches/libunwind-revert_prelink_unwind.patch b/deps/patches/libunwind-revert_prelink_unwind.patch
new file mode 100644
index 0000000000000..603db03362759
--- /dev/null
+++ b/deps/patches/libunwind-revert_prelink_unwind.patch
@@ -0,0 +1,187 @@
+From 7ae19a08467254f0b3d7a513ef82651b283f38a9 Mon Sep 17 00:00:00 2001
+From: Tim Besard <tim.besard@gmail.com>
+Date: Wed, 27 Sep 2023 12:51:59 +0000
+Subject: [PATCH] Revert "Fix unwinding of pre-linked libraries"
+
+This reverts commit a4014f33775321b4106a1134b89020a7774902dd,
+which regresses unwinding on FreeBSD (JuliaLang/julia#51467).
+---
+ include/dwarf.h                 |  2 --
+ include/libunwind-dynamic.h     |  1 -
+ src/dwarf/Gfind_proc_info-lsb.c | 42 +++++++--------------------------
+ src/dwarf/Gfind_unwind_table.c  |  1 -
+ 4 files changed, 8 insertions(+), 38 deletions(-)
+
+diff --git a/include/dwarf.h b/include/dwarf.h
+index dd9014b7..312166cd 100644
+--- a/include/dwarf.h
++++ b/include/dwarf.h
+@@ -371,8 +371,6 @@ struct unw_debug_frame_list
+     /* The start (inclusive) and end (exclusive) of the described region.  */
+     unw_word_t start;
+     unw_word_t end;
+-    /* ELF load offset */
+-    unw_word_t load_offset;
+     /* The debug frame itself.  */
+     char *debug_frame;
+     size_t debug_frame_size;
+diff --git a/include/libunwind-dynamic.h b/include/libunwind-dynamic.h
+index a26f2c99..c902ccd9 100644
+--- a/include/libunwind-dynamic.h
++++ b/include/libunwind-dynamic.h
+@@ -141,7 +141,6 @@ typedef struct unw_dyn_info
+     unw_word_t gp;              /* global-pointer in effect for this entry */
+     int32_t format;             /* real type: unw_dyn_info_format_t */
+     int32_t pad;
+-    unw_word_t load_offset;     /* ELF load offset */
+     union
+       {
+         unw_dyn_proc_info_t pi;
+diff --git a/src/dwarf/Gfind_proc_info-lsb.c b/src/dwarf/Gfind_proc_info-lsb.c
+index 8ead48f0..154e9b5a 100644
+--- a/src/dwarf/Gfind_proc_info-lsb.c
++++ b/src/dwarf/Gfind_proc_info-lsb.c
+@@ -108,17 +108,13 @@ linear_search (unw_addr_space_t as, unw_word_t ip,
+ 
+ static int
+ load_debug_frame (const char *file, char **buf, size_t *bufsize, int is_local,
+-                  unw_word_t segbase, unw_word_t *load_offset)
++                  unw_word_t segbase)
+ {
+   struct elf_image ei;
+-  Elf_W (Ehdr) *ehdr;
+-  Elf_W (Phdr) *phdr;
+   Elf_W (Shdr) *shdr;
+-  int i;
+   int ret;
+ 
+   ei.image = NULL;
+-  *load_offset = 0;
+ 
+   ret = elf_w (load_debuglink) (file, &ei, is_local);
+   if (ret != 0)
+@@ -193,20 +189,6 @@ load_debug_frame (const char *file, char **buf, size_t *bufsize, int is_local,
+ #if defined(SHF_COMPRESSED)
+     }
+ #endif
+-
+-  ehdr = ei.image;
+-  phdr = (Elf_W (Phdr) *) ((char *) ei.image + ehdr->e_phoff);
+-
+-  for (i = 0; i < ehdr->e_phnum; ++i)
+-    if (phdr[i].p_type == PT_LOAD)
+-      {
+-        *load_offset = segbase - phdr[i].p_vaddr;
+-
+-        Debug (4, "%s load offset is 0x%zx\n", file, *load_offset);
+-
+-        break;
+-      }
+-
+   munmap(ei.image, ei.size);
+   return 0;
+ }
+@@ -259,7 +241,6 @@ locate_debug_info (unw_addr_space_t as, unw_word_t addr, unw_word_t segbase,
+   int err;
+   char *buf;
+   size_t bufsize;
+-  unw_word_t load_offset;
+ 
+   /* First, see if we loaded this frame already.  */
+ 
+@@ -287,7 +268,7 @@ locate_debug_info (unw_addr_space_t as, unw_word_t addr, unw_word_t segbase,
+     name = (char*) dlname;
+ 
+   err = load_debug_frame (name, &buf, &bufsize, as == unw_local_addr_space,
+-                          segbase, &load_offset);
++                          segbase);
+ 
+   if (!err)
+     {
+@@ -300,7 +281,6 @@ locate_debug_info (unw_addr_space_t as, unw_word_t addr, unw_word_t segbase,
+ 
+       fdesc->start = start;
+       fdesc->end = end;
+-      fdesc->load_offset = load_offset;
+       fdesc->debug_frame = buf;
+       fdesc->debug_frame_size = bufsize;
+       fdesc->index = NULL;
+@@ -497,7 +477,6 @@ dwarf_find_debug_frame (int found, unw_dyn_info_t *di_debug, unw_word_t ip,
+   di->format = UNW_INFO_FORMAT_TABLE;
+   di->start_ip = fdesc->start;
+   di->end_ip = fdesc->end;
+-  di->load_offset = fdesc->load_offset;
+   di->u.ti.name_ptr = (unw_word_t) (uintptr_t) obj_name;
+   di->u.ti.table_data = (unw_word_t *) fdesc;
+   di->u.ti.table_len = sizeof (*fdesc) / sizeof (unw_word_t);
+@@ -960,14 +939,12 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+     ip_base = segbase;
+   }
+ 
+-  Debug (6, "lookup IP 0x%lx\n", (long) (ip - ip_base - di->load_offset));
+-
+ #ifndef UNW_REMOTE_ONLY
+   if (as == unw_local_addr_space)
+     {
+-      e = lookup (table, table_len, ip - ip_base - di->load_offset);
++      e = lookup (table, table_len, ip - ip_base);
+       if (e && &e[1] < &table[table_len / sizeof (unw_word_t)])
+-	last_ip = e[1].start_ip_offset + ip_base + di->load_offset;
++	last_ip = e[1].start_ip_offset + ip_base;
+       else
+ 	last_ip = di->end_ip;
+     }
+@@ -975,7 +952,7 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+ #endif
+     {
+ #ifndef UNW_LOCAL_ONLY
+-      int32_t last_ip_offset = di->end_ip - ip_base - di->load_offset;
++      int32_t last_ip_offset = di->end_ip - ip_base;
+       segbase = di->u.rti.segbase;
+       if ((ret = remote_lookup (as, (uintptr_t) table, table_len,
+                                 ip - ip_base, &ent, &last_ip_offset, arg)) < 0)
+@@ -983,7 +960,7 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+       if (ret)
+ 	{
+ 	  e = &ent;
+-	  last_ip = last_ip_offset + ip_base + di->load_offset;
++	  last_ip = last_ip_offset + ip_base;
+ 	}
+       else
+         e = NULL;       /* no info found */
+@@ -997,8 +974,8 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+          unwind info.  */
+       return -UNW_ENOINFO;
+     }
+-  Debug (15, "ip=0x%lx, load_offset=0x%lx, start_ip=0x%lx\n",
+-         (long) ip, (long) di->load_offset, (long) (e->start_ip_offset));
++  Debug (15, "ip=0x%lx, start_ip=0x%lx\n",
++         (long) ip, (long) (e->start_ip_offset));
+   if (debug_frame_base)
+     fde_addr = e->fde_offset + debug_frame_base;
+   else
+@@ -1022,9 +999,6 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+       pi->flags = UNW_PI_FLAG_DEBUG_FRAME;
+     }
+ 
+-  pi->start_ip += di->load_offset;
+-  pi->end_ip += di->load_offset;
+-
+ #if defined(NEED_LAST_IP)
+   pi->last_ip = last_ip;
+ #else
+diff --git a/src/dwarf/Gfind_unwind_table.c b/src/dwarf/Gfind_unwind_table.c
+index fb20fea0..62feb26c 100644
+--- a/src/dwarf/Gfind_unwind_table.c
++++ b/src/dwarf/Gfind_unwind_table.c
+@@ -193,7 +193,6 @@ dwarf_find_unwind_table (struct elf_dyn_info *edi, unw_addr_space_t as,
+ 
+       edi->di_cache.start_ip = start_ip;
+       edi->di_cache.end_ip = end_ip;
+-      edi->di_cache.load_offset = 0;
+       edi->di_cache.format = UNW_INFO_FORMAT_REMOTE_TABLE;
+       edi->di_cache.u.rti.name_ptr = 0;
+       /* two 32-bit values (ip_offset/fde_offset) per table-entry: */
+-- 
+2.41.0
+
diff --git a/deps/patches/libunwind-static-arm.patch b/deps/patches/libunwind-static-arm.patch
deleted file mode 100644
index 92544a003b8b9..0000000000000
--- a/deps/patches/libunwind-static-arm.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/src/arm/Gex_tables.c b/src/arm/Gex_tables.c
-index d6573a65..1d64803e 100644
---- a/src/arm/Gex_tables.c
-+++ b/src/arm/Gex_tables.c
-@@ -381,7 +381,7 @@ arm_exidx_extract (struct dwarf_cursor *c, uint8_t *buf)
-   return nbuf;
- }
- 
--int
-+static int
- arm_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
- 			 unw_dyn_info_t *di, unw_proc_info_t *pi,
- 			 int need_unwind_info, void *arg)
diff --git a/deps/patches/llvm-ittapi-cmake.patch b/deps/patches/llvm-ittapi-cmake.patch
new file mode 100644
index 0000000000000..6746d21754283
--- /dev/null
+++ b/deps/patches/llvm-ittapi-cmake.patch
@@ -0,0 +1,47 @@
+diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
+index 0c5017c359d6..92777133e9de 100644
+--- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
++++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
+@@ -12,23 +12,23 @@ if(NOT DEFINED ITTAPI_SOURCE_DIR)
+     set(ITTAPI_SOURCE_DIR ${PROJECT_BINARY_DIR})
+ endif()
+
+-if(NOT EXISTS ${ITTAPI_SOURCE_DIR}/ittapi)
+-    execute_process(COMMAND ${GIT_EXECUTABLE} clone ${ITTAPI_GIT_REPOSITORY}
+-                    WORKING_DIRECTORY ${ITTAPI_SOURCE_DIR}
++if(NOT EXISTS ${ITTAPI_SOURCE_DIR})
++    execute_process(COMMAND ${GIT_EXECUTABLE} clone ${ITTAPI_GIT_REPOSITORY} ${ITTAPI_SOURCE_DIR}
++                    WORKING_DIRECTORY ${ITTAPI_SOURCE_DIR}/..
+                     RESULT_VARIABLE GIT_CLONE_RESULT)
+     if(NOT GIT_CLONE_RESULT EQUAL "0")
+         message(FATAL_ERROR "git clone ${ITTAPI_GIT_REPOSITORY} failed with ${GIT_CLONE_RESULT}, please clone ${ITTAPI_GIT_REPOSITORY}")
+     endif()
+-endif()
+
+-execute_process(COMMAND ${GIT_EXECUTABLE} checkout ${ITTAPI_GIT_TAG}
+-                WORKING_DIRECTORY ${ITTAPI_SOURCE_DIR}/ittapi
+-                RESULT_VARIABLE GIT_CHECKOUT_RESULT)
+-if(NOT GIT_CHECKOUT_RESULT EQUAL "0")
+-    message(FATAL_ERROR "git checkout ${ITTAPI_GIT_TAG} failed with ${GIT_CHECKOUT_RESULT}, please checkout ${ITTAPI_GIT_TAG} at ${ITTAPI_SOURCE_DIR}/ittapi")
++    execute_process(COMMAND ${GIT_EXECUTABLE} checkout ${ITTAPI_GIT_TAG}
++                    WORKING_DIRECTORY ${ITTAPI_SOURCE_DIR}
++                    RESULT_VARIABLE GIT_CHECKOUT_RESULT)
++    if(NOT GIT_CHECKOUT_RESULT EQUAL "0")
++        message(FATAL_ERROR "git checkout ${ITTAPI_GIT_TAG} failed with ${GIT_CHECKOUT_RESULT}, please checkout ${ITTAPI_GIT_TAG} at ${ITTAPI_SOURCE_DIR}")
++    endif()
+ endif()
+
+-include_directories( ${ITTAPI_SOURCE_DIR}/ittapi/include/ )
++include_directories( ${ITTAPI_SOURCE_DIR}/include/ )
+
+ if( HAVE_LIBDL )
+     set(LLVM_INTEL_JIT_LIBS ${CMAKE_DL_LIBS})
+@@ -40,7 +40,7 @@ set(LLVM_INTEL_JIT_LIBS ${LLVM_PTHREAD_LIB} ${LLVM_INTEL_JIT_LIBS})
+ add_llvm_component_library(LLVMIntelJITEvents
+   IntelJITEventListener.cpp
+   jitprofiling.c
+-  ${ITTAPI_SOURCE_DIR}/ittapi/src/ittnotify/ittnotify_static.c
++  ${ITTAPI_SOURCE_DIR}/src/ittnotify/ittnotify_static.c
+
+   LINK_LIBS ${LLVM_INTEL_JIT_LIBS}
+
diff --git a/deps/pcre.mk b/deps/pcre.mk
index eedb19faf5a57..cd1180d992885 100644
--- a/deps/pcre.mk
+++ b/deps/pcre.mk
@@ -1,10 +1,18 @@
 ## PCRE ##
+include $(SRCDIR)/pcre.version
 
 ifneq ($(USE_BINARYBUILDER_PCRE),1)
 # Force optimization for PCRE flags (Issue #11668)
 PCRE_CFLAGS := -O3
 PCRE_LDFLAGS := $(RPATH_ESCAPED_ORIGIN)
 
+ifeq ($(OS),emscripten)
+PCRE_CFLAGS += -fPIC
+PCRE_JIT = --disable-jit
+else
+PCRE_JIT = --enable-jit
+endif
+
 $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2: | $(SRCCACHE)
 	$(JLDOWNLOAD) $@ https://github.com/PCRE2Project/pcre2/releases/download/pcre2-$(PCRE_VER)/pcre2-$(PCRE_VER).tar.bz2
 
@@ -19,7 +27,7 @@ checksum-pcre: $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2
 $(BUILDDIR)/pcre2-$(PCRE_VER)/build-configured: $(SRCCACHE)/pcre2-$(PCRE_VER)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) --enable-jit --includedir=$(build_includedir) CFLAGS="$(CFLAGS) $(PCRE_CFLAGS) -g -O0" LDFLAGS="$(LDFLAGS) $(PCRE_LDFLAGS)"
+	$(dir $<)/configure $(CONFIGURE_COMMON) $(PCRE_JIT) --includedir=$(build_includedir) CFLAGS="$(CFLAGS) $(PCRE_CFLAGS) -g -O0" LDFLAGS="$(LDFLAGS) $(PCRE_LDFLAGS)"
 	echo 1 > $@
 
 $(BUILDDIR)/pcre2-$(PCRE_VER)/build-compiled: $(BUILDDIR)/pcre2-$(PCRE_VER)/build-configured
diff --git a/deps/pcre.version b/deps/pcre.version
new file mode 100644
index 0000000000000..ce27921435e1d
--- /dev/null
+++ b/deps/pcre.version
@@ -0,0 +1,5 @@
+## jll artifact
+PCRE_JLL_NAME := PCRE2
+
+## source build
+PCRE_VER := 10.42
diff --git a/deps/sanitizers.mk b/deps/sanitizers.mk
new file mode 100644
index 0000000000000..1a272321c05fa
--- /dev/null
+++ b/deps/sanitizers.mk
@@ -0,0 +1,29 @@
+# Interrogate the compiler about where it is keeping its sanitizer libraries
+ifeq ($(USECLANG),1)
+SANITIZER_LIB_PATH := $(shell LANG=C $(CC) -print-runtime-dir)
+else
+SANITIZER_LIB_PATH := $(dir $(shell LANG=C $(CC) -print-file-name=libasan.so))
+endif
+
+# Given a colon-separated list of paths in $(2), find the location of the library given in $(1)
+define pathsearch_all
+$(wildcard $(addsuffix /$(1),$(subst :, ,$(2))))
+endef
+
+define copy_sanitizer_lib
+install-sanitizers: $$(addprefix $$(build_libdir)/, $$(notdir $$(call pathsearch_all,$(1),$$(SANITIZER_LIB_PATH)))) | $$(build_shlibdir)
+$$(addprefix $$(build_shlibdir)/,$(2)): $$(addprefix $$(SANITIZER_LIB_PATH)/,$(2)) | $$(build_shlibdir)
+	-cp $$< $$@
+endef
+
+ifeq ($(USECLANG),1)
+
+## Clang libraries
+$(eval $(call copy_sanitizer_lib,$(call versioned_libname,libclang_rt.asan-*),$(call versioned_libname,libclang_rt.asan-%)))
+
+endif
+
+get-sanitizers:
+clean-sanitizers:
+	-rm -f $(build_shlibdir)/libclang_rt.asan*$(SHLIB_EXT)*
+distclean-sanitizers: clean-sanitizers
diff --git a/deps/tools/bb-install.mk b/deps/tools/bb-install.mk
index 4a56e990e5e0d..ee7f833a8ac2b 100644
--- a/deps/tools/bb-install.mk
+++ b/deps/tools/bb-install.mk
@@ -5,6 +5,7 @@
 #    4 cxx11)                  # signifies a cxx11 ABI dependency
 
 define bb-install
+include $$(SRCDIR)/$1.version
 TRIPLET_VAR := BB_TRIPLET
 ifeq ($(3),true)
 TRIPLET_VAR := $$(TRIPLET_VAR)_LIBGFORTRAN
diff --git a/deps/tools/common.mk b/deps/tools/common.mk
index e98557f9fb6bb..3cefc253cec3d 100644
--- a/deps/tools/common.mk
+++ b/deps/tools/common.mk
@@ -11,16 +11,16 @@ endif
 ifeq ($(OS),WINNT)
 CONFIGURE_COMMON += LDFLAGS="$(LDFLAGS) -Wl,--stack,8388608"
 else
-CONFIGURE_COMMON += LDFLAGS="$(LDFLAGS) $(RPATH_ESCAPED_ORIGIN)"
+CONFIGURE_COMMON += LDFLAGS="$(LDFLAGS) $(RPATH_ESCAPED_ORIGIN) $(SANITIZE_LDFLAGS)"
 endif
-CONFIGURE_COMMON += F77="$(FC)" CC="$(CC)" CXX="$(CXX)" LD="$(LD)"
-
-CMAKE_CC_ARG := $(CC_ARG)
-CMAKE_CXX_ARG := $(CXX_ARG)
+CONFIGURE_COMMON += F77="$(FC)" CC="$(CC) $(SANITIZE_OPTS)" CXX="$(CXX) $(SANITIZE_OPTS)" LD="$(LD)"
 
 CMAKE_COMMON := -DCMAKE_INSTALL_PREFIX:PATH=$(build_prefix) -DCMAKE_PREFIX_PATH=$(build_prefix)
-CMAKE_COMMON += -DCMAKE_INSTALL_LIBDIR=$(build_libdir) -DCMAKE_INSTALL_BINDIR=$(build_bindir)
 CMAKE_COMMON += -DLIB_INSTALL_DIR=$(build_shlibdir)
+ifneq ($(OS),WINNT)
+CMAKE_COMMON += -DCMAKE_INSTALL_LIBDIR=$(build_libdir)
+endif
+
 ifeq ($(OS), Darwin)
 CMAKE_COMMON += -DCMAKE_MACOSX_RPATH=1
 endif
@@ -28,14 +28,29 @@ endif
 ifneq ($(VERBOSE), 0)
 CMAKE_COMMON += -DCMAKE_VERBOSE_MAKEFILE=ON
 endif
-# The call to which here is to work around https://cmake.org/Bug/view.php?id=14366
-CMAKE_COMMON += -DCMAKE_C_COMPILER="$$(which $(CC_BASE))"
+
+# The calls to `which` are to work around https://cmake.org/Bug/view.php?id=14366
+ifeq ($(USECCACHE), 1)
+# `ccache` must be used as compiler launcher, not compiler itself.
+CMAKE_COMMON += -DCMAKE_C_COMPILER_LAUNCHER=ccache
+CMAKE_COMMON += -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+CMAKE_CC := "$$(which $(shell echo $(CC_ARG) | cut -d' ' -f1))"
+CMAKE_CXX := "$$(which $(shell echo $(CXX_ARG) | cut -d' ' -f1))"
+CMAKE_CC_ARG := $(shell echo $(CC_ARG) | cut -d' ' -f2-)
+CMAKE_CXX_ARG := $(shell echo $(CXX_ARG) | cut -d' ' -f2-)
+else
+CMAKE_CC := "$$(which $(CC_BASE))"
+CMAKE_CXX := "$$(which $(CXX_BASE))"
+CMAKE_CC_ARG := $(CC_ARG)
+CMAKE_CXX_ARG := $(CXX_ARG)
+endif
+CMAKE_COMMON += -DCMAKE_C_COMPILER=$(CMAKE_CC)
 ifneq ($(strip $(CMAKE_CC_ARG)),)
-CMAKE_COMMON += -DCMAKE_C_COMPILER_ARG1="$(CMAKE_CC_ARG)"
+CMAKE_COMMON += -DCMAKE_C_COMPILER_ARG1="$(CMAKE_CC_ARG) $(SANITIZE_OPTS)"
 endif
-CMAKE_COMMON += -DCMAKE_CXX_COMPILER="$(CXX_BASE)"
+CMAKE_COMMON += -DCMAKE_CXX_COMPILER=$(CMAKE_CXX)
 ifneq ($(strip $(CMAKE_CXX_ARG)),)
-CMAKE_COMMON += -DCMAKE_CXX_COMPILER_ARG1="$(CMAKE_CXX_ARG)"
+CMAKE_COMMON += -DCMAKE_CXX_COMPILER_ARG1="$(CMAKE_CXX_ARG) $(SANITIZE_OPTS)"
 endif
 CMAKE_COMMON += -DCMAKE_LINKER="$$(which $(LD))" -DCMAKE_AR="$$(which $(AR))" -DCMAKE_RANLIB="$$(which $(RANLIB))"
 
@@ -108,8 +123,8 @@ endif
 DIRS := $(sort $(build_bindir) $(build_depsbindir) $(build_libdir) $(build_includedir) $(build_sysconfdir) $(build_datarootdir) $(build_staging) $(build_prefix)/manifest)
 
 $(foreach dir,$(DIRS),$(eval $(call dir_target,$(dir))))
-
 $(build_prefix): | $(DIRS)
+
 $(eval $(call dir_target,$(SRCCACHE)))
 
 
@@ -175,6 +190,7 @@ $$(build_prefix)/manifest/$(strip $1): $$(build_staging)/$2.tar | $(build_prefix
 	$(UNTAR) $$< -C $$(build_prefix)
 	$6
 	echo '$$(UNINSTALL_$(strip $1))' > $$@
+.PHONY: $(addsuffix -$(strip $1),stage install distclean uninstall reinstall)
 endef
 
 define staged-uninstaller
@@ -193,14 +209,18 @@ endef
 define symlink_install # (target-name, rel-from, abs-to)
 clean-$1: uninstall-$1
 install-$1: $$(build_prefix)/manifest/$1
-reinstall-$1: install-$1
+reinstall-$1:
+	+$$(MAKE) uninstall-$1
+	+$$(MAKE) stage-$1
+	+$$(MAKE) install-$1
+.PHONY: $(addsuffix -$1,clean install reinstall)
 
 UNINSTALL_$(strip $1) := $2 symlink-uninstaller $3
 
-$$(build_prefix)/manifest/$1: $$(BUILDDIR)/$2/build-compiled | $3 $$(build_prefix)/manifest
+$$(build_prefix)/manifest/$1: $$(BUILDDIR)/$2/build-compiled | $$(abspath $$(dir $3/$1)) $$(abspath $$(dir $$(build_prefix)/manifest/$1))
 	-+[ ! \( -e $3/$1 -o -h $3/$1 \) ] || $$(MAKE) uninstall-$1
 ifeq ($$(BUILD_OS), WINNT)
-	cmd //C mklink //J $$(call mingw_to_dos,$3/$1,cd $3 &&) $$(call mingw_to_dos,$$(BUILDDIR)/$2,)
+	cmd //C mklink //J $$(call mingw_to_dos,$3/$1,cd $3/$(dir $1) &&) $$(call mingw_to_dos,$$(BUILDDIR)/$2,)
 else ifneq (,$$(findstring CYGWIN,$$(BUILD_OS)))
 	cmd /C mklink /J $$(call cygpath_w,$3/$1) $$(call cygpath_w,$$(BUILDDIR)/$2)
 else ifdef JULIA_VAGRANT_BUILD
@@ -214,7 +234,7 @@ endef
 define symlink-uninstaller
 uninstall-$1:
 ifeq ($$(BUILD_OS), WINNT)
-	-cmd //C rmdir $$(call mingw_to_dos,$3/$1,cd $3 &&)
+	-cmd //C rmdir $$(call mingw_to_dos,$3/$1,cd $3/$(dir $1) &&)
 else
 	rm -rf $3/$1
 endif
diff --git a/deps/tools/git-external.mk b/deps/tools/git-external.mk
index 65b40b87ee937..cf1610ac1bf5d 100644
--- a/deps/tools/git-external.mk
+++ b/deps/tools/git-external.mk
@@ -68,11 +68,12 @@ $5/$$($2_SRC_DIR)/source-extracted: $$($2_SRC_FILE)
 	$(TAR) -C $$(dir $$@) --strip-components 1 -xf $$<
 	echo 1 > $$@
 
-checksum-$(1): $$($2_SRC_FILE)
+checksum-$1: $$($2_SRC_FILE)
 	$$(JLCHECKSUM) $$<
 endif # DEPS_GIT
 
 $$(build_prefix)/manifest/$1: $$(SRCDIR)/$1.version # make the manifest stale if the version file is touched (causing re-install for compliant targets)
 distclean-$1:
 	rm -rf $5/$$($2_SRC_DIR) $$($2_SRC_FILE) $$(BUILDDIR)/$$($2_SRC_DIR)
+.PHONY: $(addsuffix -$1,checksum distclean)
 endef
diff --git a/deps/tools/stdlib-external.mk b/deps/tools/stdlib-external.mk
index 60f50b56ee2e0..0a99111605a45 100644
--- a/deps/tools/stdlib-external.mk
+++ b/deps/tools/stdlib-external.mk
@@ -16,12 +16,17 @@ $$(eval $$(call git-external,$1,$2,,,$$(BUILDDIR)))
 $$(BUILDDIR)/$$($2_SRC_DIR)/build-compiled: $$(BUILDDIR)/$$($2_SRC_DIR)/source-extracted
 	@# no build steps
 	echo 1 > $$@
-$$(eval $$(call symlink_install,$1,$$$$($2_SRC_DIR),$$$$(build_datarootdir)/julia/stdlib/$$$$(VERSDIR)))
+$$(eval $$(call symlink_install,$$$$(VERSDIR)/$1,$$$$($2_SRC_DIR),$$$$(build_datarootdir)/julia/stdlib))
 clean-$1:
 	-rm -f $$(BUILDDIR)/$$($2_SRC_DIR)/build-compiled
 get-$1: $$($2_SRC_FILE)
 extract-$1: $$(BUILDDIR)/$$($2_SRC_DIR)/source-extracted
 configure-$1: extract-$1
 compile-$1: $$(BUILDDIR)/$$($2_SRC_DIR)/build-compiled
-
+install-$1: install-$$(VERSDIR)/$1
+uninstall-$1: uninstall-$$(VERSDIR)/$1
+reinstall-$1: reinstall-$$(VERSDIR)/$1
+version-check-$1: version-check-$$(VERSDIR)/$1
+clean-$1: clean-$$(VERSDIR)/$1
+.PHONY: $(addsuffix -$1,get extract configure compile install uninstall reinstall clean)
 endef
diff --git a/deps/tools/uninstallers.mk b/deps/tools/uninstallers.mk
index 48387914643db..0051786ed1d0a 100644
--- a/deps/tools/uninstallers.mk
+++ b/deps/tools/uninstallers.mk
@@ -17,6 +17,7 @@ else
 uninstall-$1:
 	@echo "skipping uninstall: $1 not installed"
 endif
+.PHONY: uninstall-$1
 endef
 $(foreach dep,$(DEP_LIBS_STAGED_ALL),$(eval $(call define-uninstaller,$(dep))))
 
diff --git a/deps/unwind.mk b/deps/unwind.mk
index da303891c5d3c..66607845428c4 100644
--- a/deps/unwind.mk
+++ b/deps/unwind.mk
@@ -1,7 +1,9 @@
 ## UNWIND ##
+include $(SRCDIR)/unwind.version
+include $(SRCDIR)/llvmunwind.version
 
 ifneq ($(USE_BINARYBUILDER_LIBUNWIND),1)
-LIBUNWIND_CFLAGS := -U_FORTIFY_SOURCE $(fPIC) -lz
+LIBUNWIND_CFLAGS := -U_FORTIFY_SOURCE $(fPIC) -lz $(SANITIZE_OPTS)
 LIBUNWIND_CPPFLAGS :=
 
 ifeq ($(USE_SYSTEM_ZLIB),0)
@@ -24,30 +26,17 @@ $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted: $(SRCCACHE)/libunwind-$(UN
 checksum-unwind: $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz
 	$(JLCHECKSUM) $<
 
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-prefer-extbl.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f < $(SRCDIR)/patches/libunwind-prefer-extbl.patch
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-revert_prelink_unwind.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-revert_prelink_unwind.patch
 	echo 1 > $@
 
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-static-arm.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-prefer-extbl.patch-applied
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f < $(SRCDIR)/patches/libunwind-static-arm.patch
-	echo 1 > $@
-
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-cfa-rsp.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-static-arm.patch-applied
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u < $(SRCDIR)/patches/libunwind-cfa-rsp.patch
-	echo 1 > $@
-
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-dwarf-table.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-cfa-rsp.patch-applied
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-dwarf-table.patch
-	echo 1 > $@
-
-$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-non-empty-structs.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-dwarf-table.patch-applied
-	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-non-empty-structs.patch
-	echo 1 > $@
-
-$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-non-empty-structs.patch-applied
+# note minidebuginfo requires liblzma, which we do not have a source build for
+# (it will be enabled in BinaryBuilder-based downloads however)
+# since https://github.com/JuliaPackaging/Yggdrasil/commit/0149e021be9badcb331007c62442a4f554f3003c
+$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-revert_prelink_unwind.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-zlibdebuginfo
+	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-zlibdebuginfo --disable-conservative-checks
 	echo 1 > $@
 
 $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-compiled: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured
diff --git a/deps/unwind.version b/deps/unwind.version
new file mode 100644
index 0000000000000..1349f2d657e87
--- /dev/null
+++ b/deps/unwind.version
@@ -0,0 +1,6 @@
+## jll artifact
+UNWIND_JLL_NAME := LibUnwind
+
+## source build
+UNWIND_VER_TAG := 1.7.2
+UNWIND_VER := 1.7.2
diff --git a/deps/utf8proc.mk b/deps/utf8proc.mk
index 70cf4e396ff65..cc526d40250c5 100644
--- a/deps/utf8proc.mk
+++ b/deps/utf8proc.mk
@@ -5,7 +5,7 @@ $(eval $(call git-external,utf8proc,UTF8PROC,,,$(BUILDDIR)))
 
 UTF8PROC_OBJ_LIB    := $(build_libdir)/libutf8proc.a
 UTF8PROC_OBJ_HEADER := $(build_includedir)/utf8proc.h
-UTF8PROC_CFLAGS     := -O2
+UTF8PROC_CFLAGS     := -O2 $(SANITIZE_OPTS)
 UTF8PROC_MFLAGS     := CC="$(CC)" CFLAGS="$(CFLAGS) $(UTF8PROC_CFLAGS)" PICFLAG="$(fPIC)" AR="$(AR)"
 UTF8PROC_BUILDDIR   := $(BUILDDIR)/$(UTF8PROC_SRC_DIR)
 
diff --git a/deps/utf8proc.version b/deps/utf8proc.version
index 246a38de00bae..a026ca858cfd3 100644
--- a/deps/utf8proc.version
+++ b/deps/utf8proc.version
@@ -1,2 +1,2 @@
-UTF8PROC_BRANCH=v2.7.0
-UTF8PROC_SHA1=8ca6144c85c165987cb1c5d8395c7314e13d4cd7
+UTF8PROC_BRANCH=v2.9.0
+UTF8PROC_SHA1=34db3f7954e9298e89f42641ac78e0450f80a70d
diff --git a/deps/valgrind/valgrind.h b/deps/valgrind/valgrind.h
index 2e07a49d91dfa..b33fd70fab672 100644
--- a/deps/valgrind/valgrind.h
+++ b/deps/valgrind/valgrind.h
@@ -1065,7 +1065,7 @@ typedef
 
 /* Use these to write the name of your wrapper.  NOTE: duplicates
    VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h.  NOTE also: inserts
-   the default behaviour equivalance class tag "0000" into the name.
+   the default behaviour equivalence class tag "0000" into the name.
    See pub_tool_redir.h for details -- normally you don't need to
    think about this, though. */
 
diff --git a/deps/zlib.mk b/deps/zlib.mk
index d43f829c13111..5548a0791f4d2 100644
--- a/deps/zlib.mk
+++ b/deps/zlib.mk
@@ -4,9 +4,13 @@ ZLIB_GIT_URL := https://github.com/madler/zlib.git
 ZLIB_TAR_URL = https://api.github.com/repos/madler/zlib/tarball/$1
 $(eval $(call git-external,zlib,ZLIB,,,$(SRCCACHE)))
 
+# use `-DUNIX=true` to ensure that it is always named `libz`
+ZLIB_BUILD_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=Release -DUNIX=true
+ZLIB_BUILD_OPTS += -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+
 $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-configured: $(SRCCACHE)/$(ZLIB_SRC_DIR)/source-extracted
 	mkdir -p $(dir $@)
-	cd $(dir $@) && $(CMAKE) -DCMAKE_INSTALL_PREFIX=$(abspath $(build_prefix)) -DCMAKE_BUILD_TYPE=Release -DUNIX=true $(dir $<)
+	cd $(dir $@) && $(CMAKE) $(ZLIB_BUILD_OPTS) $(dir $<)
 	echo 1 > $@
 
 $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-compiled: $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-configured
@@ -19,12 +23,12 @@ $(eval $(call staged-install, \
 	$(INSTALL_NAME_CMD)libz.$(SHLIB_EXT) $(build_shlibdir)/libz.$(SHLIB_EXT)))
 
 clean-zlib:
-	-rm -f $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-compiled $(build_libdir)/libz.a* $(build_libdir)/libz.so* $(build_includedir)/zlib.h $(build_includedir)/zconf.h
-	-$(MAKE) -C $(BUILDDIR)/$(ZLIB_SRC_DIR) distclean $(ZLIB_FLAGS)
+	-rm -f $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-configured $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-compiled
+	-$(MAKE) -C $(BUILDDIR)/$(ZLIB_SRC_DIR) clean
 
 get-zlib: $(ZLIB_SRC_FILE)
 extract-zlib: $(BUILDDIR)/$(ZLIB_SRC_DIR)/source-extracted
-configure-zlib: extract-zlib
+configure-zlib: $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-configured
 compile-zlib: $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-compiled
 fastcheck-zlib: check-zlib
 check-zlib: compile-zlib
diff --git a/deps/zlib.version b/deps/zlib.version
index 0b16a7f662dd1..89a304c49b6dc 100644
--- a/deps/zlib.version
+++ b/deps/zlib.version
@@ -1,2 +1,8 @@
-ZLIB_BRANCH=v1.2.12
-ZLIB_SHA1=21767c654d31d2dccdde4330529775c6c5fd5389
+# -*- makefile -*-
+## jll artifact
+ZLIB_JLL_NAME := Zlib
+
+## source build
+ZLIB_VER := 1.2.13
+ZLIB_BRANCH=v1.2.13
+ZLIB_SHA1=04f42ceca40f73e2978b50e93806c2a18c1281fc
diff --git a/doc/Manifest.toml b/doc/Manifest.toml
index e3d56b7594251..3e973e043f60a 100644
--- a/doc/Manifest.toml
+++ b/doc/Manifest.toml
@@ -1,6 +1,6 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.9.0-DEV"
+julia_version = "1.11.0-DEV"
 manifest_format = "2.0"
 project_hash = "e0c77beb18dc1f6cce661ebd60658c0c1a77390f"
 
@@ -9,76 +9,216 @@ git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
 uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9"
 version = "0.0.1"
 
+[[deps.AbstractTrees]]
+git-tree-sha1 = "faa260e4cb5aba097a73fab382dd4b5819d8ec8c"
+uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
+version = "0.4.4"
+
+[[deps.ArgTools]]
+uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+version = "1.1.1"
+
+[[deps.Artifacts]]
+uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
+
 [[deps.Base64]]
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
 
 [[deps.Dates]]
 deps = ["Printf"]
 uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+version = "1.11.0"
 
 [[deps.DocStringExtensions]]
 deps = ["LibGit2"]
-git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b"
+git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d"
 uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.8.6"
+version = "0.9.3"
 
 [[deps.Documenter]]
-deps = ["ANSIColoredPrinters", "Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
-git-tree-sha1 = "122d031e8dcb2d3e767ed434bc4d1ae1788b5a7f"
+deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "Test", "Unicode"]
+git-tree-sha1 = "2613dbec8f4748273bbe30ba71fd5cb369966bac"
 uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-version = "0.27.17"
+version = "1.2.1"
+
+[[deps.Downloads]]
+deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
+uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+version = "1.6.0"
+
+[[deps.Expat_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "4558ab818dcceaab612d1bb8c19cee87eda2b83c"
+uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
+version = "2.5.0+0"
+
+[[deps.FileWatching]]
+uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+version = "1.11.0"
+
+[[deps.Git]]
+deps = ["Git_jll"]
+git-tree-sha1 = "51764e6c2e84c37055e846c516e9015b4a291c7d"
+uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2"
+version = "1.3.0"
+
+[[deps.Git_jll]]
+deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"]
+git-tree-sha1 = "bb8f7cc77ec1152414b2af6db533d9471cfbb2d1"
+uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb"
+version = "2.42.0+0"
 
 [[deps.IOCapture]]
 deps = ["Logging", "Random"]
-git-tree-sha1 = "f7be53659ab06ddc986428d3a9dcc95f6fa6705a"
+git-tree-sha1 = "d75853a0bdbfb1ac815478bacd89cd27b550ace6"
 uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
-version = "0.2.2"
+version = "0.2.3"
 
 [[deps.InteractiveUtils]]
 deps = ["Markdown"]
 uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
+
+[[deps.JLLWrappers]]
+deps = ["Artifacts", "Preferences"]
+git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca"
+uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
+version = "1.5.0"
 
 [[deps.JSON]]
 deps = ["Dates", "Mmap", "Parsers", "Unicode"]
-git-tree-sha1 = "3c837543ddb02250ef42f4738347454f95079d4e"
+git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a"
 uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-version = "0.21.3"
+version = "0.21.4"
+
+[[deps.LazilyInitializedFields]]
+git-tree-sha1 = "8f7f3cabab0fd1800699663533b6d5cb3fc0e612"
+uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf"
+version = "1.2.2"
+
+[[deps.LibCURL]]
+deps = ["LibCURL_jll", "MozillaCACerts_jll"]
+uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
+version = "0.6.4"
+
+[[deps.LibCURL_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
+uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+version = "8.4.0+0"
 
 [[deps.LibGit2]]
-deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
+deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
 uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
+
+[[deps.LibGit2_jll]]
+deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"]
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.7.1+0"
+
+[[deps.LibSSH2_jll]]
+deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.11.0+1"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
+
+[[deps.Libiconv_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "f9557a255370125b405568f9767d6d195822a175"
+uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
+version = "1.17.0+0"
 
 [[deps.Logging]]
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
 
 [[deps.Markdown]]
 deps = ["Base64"]
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
+
+[[deps.MarkdownAST]]
+deps = ["AbstractTrees", "Markdown"]
+git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899"
+uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391"
+version = "0.1.2"
+
+[[deps.MbedTLS_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+version = "2.28.2+1"
 
 [[deps.Mmap]]
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+version = "1.11.0"
+
+[[deps.MozillaCACerts_jll]]
+uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+version = "2023.1.10"
 
 [[deps.NetworkOptions]]
 uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
 version = "1.2.0"
 
+[[deps.OpenSSL_jll]]
+deps = ["Artifacts", "JLLWrappers", "Libdl"]
+git-tree-sha1 = "cc6e1927ac521b659af340e0ca45828a3ffc748f"
+uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
+version = "3.0.12+0"
+
+[[deps.PCRE2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+version = "10.42.0+1"
+
 [[deps.Parsers]]
-deps = ["Dates"]
-git-tree-sha1 = "1285416549ccfcdf0c50d4997a94331e88d68413"
+deps = ["Dates", "PrecompileTools", "UUIDs"]
+git-tree-sha1 = "a935806434c9d4c506ba941871b327b96d41f2bf"
 uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "2.3.1"
+version = "2.8.0"
+
+[[deps.Pkg]]
+deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+version = "1.11.0"
+
+[[deps.PrecompileTools]]
+deps = ["Preferences"]
+git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f"
+uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
+version = "1.2.0"
+
+[[deps.Preferences]]
+deps = ["TOML"]
+git-tree-sha1 = "00805cd429dcb4870060ff49ef443486c262e38e"
+uuid = "21216c6a-2e73-6563-6e65-726566657250"
+version = "1.4.1"
 
 [[deps.Printf]]
 deps = ["Unicode"]
 uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
 
 [[deps.REPL]]
 deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+version = "1.11.0"
 
 [[deps.Random]]
-deps = ["SHA", "Serialization"]
+deps = ["SHA"]
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
+
+[[deps.RegistryInstances]]
+deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"]
+git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51"
+uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3"
+version = "0.1.0"
 
 [[deps.SHA]]
 uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
@@ -86,13 +226,47 @@ version = "0.7.0"
 
 [[deps.Serialization]]
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
 
 [[deps.Sockets]]
 uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+version = "1.11.0"
+
+[[deps.TOML]]
+deps = ["Dates"]
+uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
+version = "1.0.3"
+
+[[deps.Tar]]
+deps = ["ArgTools", "SHA"]
+uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+version = "1.10.0"
 
 [[deps.Test]]
 deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
+
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+version = "1.11.0"
 
 [[deps.Unicode]]
 uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+version = "1.11.0"
+
+[[deps.Zlib_jll]]
+deps = ["Libdl"]
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.2.13+1"
+
+[[deps.nghttp2_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+version = "1.58.0+0"
+
+[[deps.p7zip_jll]]
+deps = ["Artifacts", "Libdl"]
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+version = "17.4.0+2"
diff --git a/doc/make.jl b/doc/make.jl
index 5028b89093cc4..cf266370acb01 100644
--- a/doc/make.jl
+++ b/doc/make.jl
@@ -8,6 +8,7 @@ using Pkg
 Pkg.instantiate()
 
 using Documenter
+import LibGit2
 
 baremodule GenStdLib end
 
@@ -42,6 +43,68 @@ cd(joinpath(@__DIR__, "src")) do
     end
 end
 
+# Because we have standard libraries that are hosted outside of the julia repo,
+# but their docs are included in the manual, we need to populate the remotes argument
+# of makedocs(), to make sure that Documenter knows how to resolve the directories
+# in stdlib/ to the correct remote Git repositories (for source and edit links).
+#
+# This function parses the *.version files in stdlib/, returning a dictionary with
+# all the key-value pairs from those files. *_GIT_URL and *_SHA1 fields are the ones
+# we will actually be interested in.
+function parse_stdlib_version_file(path)
+    values = Dict{String,String}()
+    for line in readlines(path)
+        m = match(r"^([A-Z0-9_]+)\s+:?=\s+(\S+)$", line)
+        if isnothing(m)
+            @warn "Unable to parse line in $(path)" line
+        else
+            values[m[1]] = m[2]
+        end
+    end
+    return values
+end
+# This generates the value that will be passed to the `remotes` argument of makedocs(),
+# by looking through all *.version files in stdlib/.
+documenter_stdlib_remotes = let stdlib_dir = realpath(joinpath(@__DIR__, "..", "stdlib"))
+    # Get a list of all *.version files in stdlib/..
+    version_files = filter(readdir(stdlib_dir)) do fname
+        isfile(joinpath(stdlib_dir, fname)) && endswith(fname, ".version")
+    end
+    # .. and then parse them, each becoming an entry for makedocs's remotes.
+    # The values for each are of the form path => (remote, sha1), where
+    #  - path: the path to the stdlib package's root directory, i.e. "stdlib/$PACKAGE"
+    #  - remote: a Documenter.Remote object, pointing to the Git repository where package is hosted
+    #  - sha1: the SHA1 of the commit that is included with the current Julia version
+    remotes_list = map(version_files) do version_fname
+        package = match(r"(.+)\.version", version_fname)[1]
+        versionfile = parse_stdlib_version_file(joinpath(stdlib_dir, version_fname))
+        # From the (all uppercase) $(package)_GIT_URL and $(package)_SHA1 fields, we'll determine
+        # the necessary information. If this logic happens to fail for some reason for any of the
+        # standard libraries, we'll crash the documentation build, so that it could be fixed.
+        remote = let git_url_key = "$(uppercase(package))_GIT_URL"
+            haskey(versionfile, git_url_key) || error("Missing $(git_url_key) in $version_fname")
+            m = match(LibGit2.GITHUB_REGEX, versionfile[git_url_key])
+            isnothing(m) && error("Unable to parse $(git_url_key)='$(versionfile[git_url_key])' in $version_fname")
+            Documenter.Remotes.GitHub(m[2], m[3])
+        end
+        package_sha = let sha_key = "$(uppercase(package))_SHA1"
+            haskey(versionfile, sha_key) || error("Missing $(sha_key) in $version_fname")
+            versionfile[sha_key]
+        end
+        # Construct the absolute (local) path to the stdlib package's root directory
+        package_root_dir = joinpath(stdlib_dir, "$(package)-$(package_sha)")
+        # Documenter needs package_root_dir to exist --- it's just a sanity check it does on the remotes= keyword.
+        # In normal (local) builds, this will be the case, since the Makefiles will have unpacked the standard
+        # libraries. However, on CI we do this thing where we actually build docs in a clean worktree, just
+        # unpacking the `usr/` directory from the main build, and the unpacked stdlibs will be missing, and this
+        # will cause Documenter to throw an error. However, we don't _actually_ need the source files of the standard
+        # libraries to be present, so we just generate empty root directories to satisfy the check in Documenter.
+        isdir(package_root_dir) || mkpath(package_root_dir)
+        package_root_dir => (remote, package_sha)
+    end
+    Dict(remotes_list)
+end
+
 # Check if we are building a PDF
 const render_pdf = "pdf" in ARGS
 
@@ -92,7 +155,6 @@ Manual = [
     "manual/environment-variables.md",
     "manual/embedding.md",
     "manual/code-loading.md",
-    "manual/profile.md",
     "manual/stacktraces.md",
     "manual/performance-tips.md",
     "manual/workflow-tips.md",
@@ -112,12 +174,14 @@ BaseDocs = [
     "base/arrays.md",
     "base/parallel.md",
     "base/multi-threading.md",
+    "base/scopedvalues.md",
     "base/constants.md",
     "base/file.md",
     "base/io-network.md",
     "base/punctuation.md",
     "base/sort.md",
     "base/iterators.md",
+    "base/reflection.md",
     "base/c.md",
     "base/libc.md",
     "base/stacktraces.md",
@@ -126,8 +190,13 @@ BaseDocs = [
 
 StdlibDocs = [stdlib.targetfile for stdlib in STDLIB_DOCS]
 
+Tutorials = [
+    "tutorials/creating-packages.md",
+    "tutorials/profile.md",
+    "tutorials/external.md",
+]
+
 DevDocs = [
-    "devdocs/reflection.md",
     "Documentation of Julia's Internals" => [
         "devdocs/init.md",
         "devdocs/ast.md",
@@ -142,6 +211,8 @@ DevDocs = [
         "devdocs/subarrays.md",
         "devdocs/isbitsunionarrays.md",
         "devdocs/sysimg.md",
+        "devdocs/pkgimg.md",
+        "devdocs/llvm-passes.md",
         "devdocs/llvm.md",
         "devdocs/stdio.md",
         "devdocs/boundscheck.md",
@@ -151,12 +222,18 @@ DevDocs = [
         "devdocs/inference.md",
         "devdocs/ssair.md",
         "devdocs/EscapeAnalysis.md",
+        "devdocs/aot.md",
         "devdocs/gc-sa.md",
+        "devdocs/gc.md",
+        "devdocs/jit.md",
+        "devdocs/builtins.md",
+        "devdocs/precompile_hang.md",
     ],
     "Developing/debugging Julia's C code" => [
         "devdocs/backtraces.md",
         "devdocs/debuggingtips.md",
         "devdocs/valgrind.md",
+        "devdocs/external_profilers.md",
         "devdocs/sanitizers.md",
         "devdocs/probes.md",
     ],
@@ -177,6 +254,7 @@ const PAGES = [
     "Manual" => ["index.md", Manual...],
     "Base" => BaseDocs,
     "Standard Library" => StdlibDocs,
+    "Tutorials" => Tutorials,
     # Add "Release Notes" to devdocs
     "Developer Documentation" => [DevDocs..., hide("NEWS.md")],
 ]
@@ -187,6 +265,7 @@ const PAGES = [
     "Manual" => Manual,
     "Base" => BaseDocs,
     "Standard Library" => StdlibDocs,
+    "Tutorials" => Tutorials,
     "Developer Documentation" => DevDocs,
 ]
 end
@@ -262,12 +341,6 @@ DocMeta.setdocmeta!(
     maybe_revise(:(using Base.BinaryPlatforms));
     recursive=true, warn=false,
 )
-DocMeta.setdocmeta!(
-    Pkg.LazilyInitializedFields,
-    :DocTestSetup,
-    maybe_revise(:(using Pkg.LazilyInitializedFields));
-    recursive=true, warn=false,
-)
 
 let r = r"buildroot=(.+)", i = findfirst(x -> occursin(r, x), ARGS)
     global const buildroot = i === nothing ? (@__DIR__) : first(match(r, ARGS[i]).captures)
@@ -289,6 +362,8 @@ else
         collapselevel = 1,
         sidebar_sitename = false,
         ansicolor = true,
+        size_threshold = 800 * 2^10, # 800 KiB
+        size_threshold_warn = 200 * 2^10, # the manual has quite a few large pages, so we warn at 200+ KiB only
     )
 end
 
@@ -300,12 +375,12 @@ makedocs(
     doctest   = ("doctest=fix" in ARGS) ? (:fix) : ("doctest=only" in ARGS) ? (:only) : ("doctest=true" in ARGS) ? true : false,
     linkcheck = "linkcheck=true" in ARGS,
     linkcheck_ignore = ["https://bugs.kde.org/show_bug.cgi?id=136779"], # fails to load from nanosoldier?
-    strict    = true,
     checkdocs = :none,
     format    = format,
     sitename  = "The Julia Language",
     authors   = "The Julia Project",
     pages     = PAGES,
+    remotes   = documenter_stdlib_remotes,
 )
 
 # Update URLs to external stdlibs (JuliaLang/julia#43199)
@@ -336,8 +411,14 @@ end
 
 # Define our own DeployConfig
 struct BuildBotConfig <: Documenter.DeployConfig end
+Documenter.authentication_method(::BuildBotConfig) = Documenter.HTTPS
+Documenter.authenticated_repo_url(::BuildBotConfig) = "https://github.com/JuliaLang/docs.julialang.org.git"
 function Documenter.deploy_folder(::BuildBotConfig; devurl, repo, branch, kwargs...)
-    haskey(ENV, "DOCUMENTER_KEY") || return Documenter.DeployDecision(; all_ok=false)
+    if !haskey(ENV, "DOCUMENTER_KEY")
+        @info "Unable to deploy the documentation: DOCUMENTER_KEY missing"
+        return Documenter.DeployDecision(; all_ok=false)
+    end
+    release = match(r"^release-([0-9]+\.[0-9]+)$", Base.GIT_VERSION_INFO.branch)
     if Base.GIT_VERSION_INFO.tagged_commit
         # Strip extra pre-release info (1.5.0-rc2.0 -> 1.5.0-rc2)
         ver = VersionNumber(VERSION.major, VERSION.minor, VERSION.patch,
@@ -346,7 +427,16 @@ function Documenter.deploy_folder(::BuildBotConfig; devurl, repo, branch, kwargs
         return Documenter.DeployDecision(; all_ok=true, repo, branch, subfolder)
     elseif Base.GIT_VERSION_INFO.branch == "master"
         return Documenter.DeployDecision(; all_ok=true, repo, branch, subfolder=devurl)
+    elseif !isnothing(release)
+        # If this is a non-tag build from a release-* branch, we deploy them as dev docs into the
+        # appropriate vX.Y-dev subdirectory.
+        return Documenter.DeployDecision(; all_ok=true, repo, branch, subfolder="v$(release[1])-dev")
     end
+    @info """
+    Unable to deploy the documentation: invalid GIT_VERSION_INFO
+    GIT_VERSION_INFO.tagged_commit: $(Base.GIT_VERSION_INFO.tagged_commit)
+    GIT_VERSION_INFO.branch: $(Base.GIT_VERSION_INFO.branch)
+    """
     return Documenter.DeployDecision(; all_ok=false)
 end
 
@@ -374,11 +464,16 @@ function Documenter.Writers.HTMLWriter.expand_versions(dir::String, v::Versions)
     return Documenter.Writers.HTMLWriter.expand_versions(dir, v.versions)
 end
 
-deploydocs(
-    repo = "github.com/JuliaLang/docs.julialang.org.git",
-    deploy_config = BuildBotConfig(),
-    target = joinpath(buildroot, "doc", "_build", "html", "en"),
-    dirname = "en",
-    devurl = devurl,
-    versions = Versions(["v#.#", devurl => devurl]),
-)
+if "deploy" in ARGS
+    deploydocs(
+        repo = "github.com/JuliaLang/docs.julialang.org.git",
+        deploy_config = BuildBotConfig(),
+        target = joinpath(buildroot, "doc", "_build", "html", "en"),
+        dirname = "en",
+        devurl = devurl,
+        versions = Versions(["v#.#", devurl => devurl]),
+        archive = get(ENV, "DOCUMENTER_ARCHIVE", nothing),
+    )
+else
+    @info "Skipping deployment ('deploy' not passed)"
+end
diff --git a/doc/man/julia.1 b/doc/man/julia.1
index 383c588c58dae..b0af07539cb41 100644
--- a/doc/man/julia.1
+++ b/doc/man/julia.1
@@ -21,7 +21,7 @@
 .\" - diagnostics
 .\" - notes
 
-.TH JULIA 1 2022-02-17 JULIA
+.TH JULIA 1 2023-09-01 JULIA
 
 .\" from the front page of https://julialang.org/
 .SH NAME
@@ -67,7 +67,7 @@ Print uncommon options not shown by `-h`
 
 .TP
 --project[=<dir>/@.]
-Set <dir> as the home project/environment. The default @. option will search
+Set <dir> as the active project/environment. The default @. option will search
 through parent directories until a Project.toml or JuliaProject.toml file is
 found.
 
@@ -93,8 +93,12 @@ Enable or disable Julia's default signal handlers
 Use native code from system image if available
 
 .TP
---compiled-modules={yes*|no}
-Enable or disable incremental precompilation of modules
+--compiled-modules={yes*|no|existing}
+Enable or disable incremental precompilation of modules.
+
+.TP
+--pkgimages={yes*|no|existing}
+Enable or disable usage of native code caching in the form of pkgimages
 
 .TP
 -e, --eval <expr>
@@ -118,6 +122,11 @@ supported (Linux and Windows). If this is not supported (macOS) or
 process affinity is not configured, it uses the number of CPU
 threads.
 
+.TP
+--gcthreads <n>
+Enable n GC threads; If unspecified is set to half of the
+compute worker threads.
+
 .TP
 -p, --procs {N|auto}
 Integer value N launches N additional local worker processes `auto` launches as many workers
@@ -272,6 +281,15 @@ See https://docs.julialang.org/en/v1/manual/environment-variables/
 Please report any bugs using the GitHub issue tracker:
 https://github.com/julialang/julia/issues?state=open
 
-
 .SH AUTHORS
 Contributors: https://github.com/JuliaLang/julia/graphs/contributors
+
+.SH INTERNET RESOURCES
+Website:  https://julialang.org/
+.br
+Documentation:  https://docs.julialang.org/
+.br
+Downloads:  https://julialang.org/downloads/
+
+.SH LICENSING
+Julia is an open-source project. It is made available under the MIT license.
diff --git a/doc/src/assets/preamble.tex b/doc/src/assets/preamble.tex
index fe26add788c5b..2c492535c16b0 100644
--- a/doc/src/assets/preamble.tex
+++ b/doc/src/assets/preamble.tex
@@ -22,9 +22,9 @@
 %       - Figure 9.2: Layout of a ToC
 %       - Table 9.3: Value of K in macros for styling entries
 \makeatletter
-% {part} to {chaper}
+% {part} to {chapter}
 \setlength{\cftbeforepartskip}{1.5em \@plus \p@}
-% {chaper} to {chaper}
+% {chapter} to {chapter}
 \setlength{\cftbeforechapterskip}{0.0em \@plus \p@}
 % Chapter num to chapter title spacing (Figure 9.2@memman)
 \setlength{\cftchapternumwidth}{2.5em \@plus \p@}
diff --git a/doc/src/base/arrays.md b/doc/src/base/arrays.md
index 853e4c7a4ec1b..ada748fdbe713 100644
--- a/doc/src/base/arrays.md
+++ b/doc/src/base/arrays.md
@@ -30,6 +30,8 @@ Base.StridedArray
 Base.StridedVector
 Base.StridedMatrix
 Base.StridedVecOrMat
+Base.Memory
+Base.MemoryRef
 Base.Slices
 Base.RowSlices
 Base.ColumnSlices
@@ -145,6 +147,7 @@ Base.vcat
 Base.hcat
 Base.hvcat
 Base.hvncat
+Base.stack
 Base.vect
 Base.circshift
 Base.circshift!
diff --git a/doc/src/base/base.md b/doc/src/base/base.md
index 2105817475fe6..c5d7320148e97 100644
--- a/doc/src/base/base.md
+++ b/doc/src/base/base.md
@@ -4,7 +4,8 @@
 
 Julia Base contains a range of functions and macros appropriate for performing
 scientific and numerical computing, but is also as broad as those of many general purpose programming
-languages.  Additional functionality is available from a growing collection of available packages.
+languages.  Additional functionality is available from a growing collection of
+[available packages](https://julialang.org/packages/).
 Functions are grouped by topic below.
 
 Some general notes:
@@ -29,7 +30,7 @@ Base.isinteractive
 Base.summarysize
 Base.__precompile__
 Base.include
-Base.MainInclude.include
+Main.include
 Base.include_string
 Base.include_dependency
 __init__
@@ -37,6 +38,7 @@ Base.which(::Any, ::Any)
 Base.methods
 Base.@show
 ans
+err
 Base.active_project
 Base.set_active_project
 ```
@@ -58,14 +60,18 @@ However, you can create variables with names:
 Finally:
 `where` is parsed as an infix operator for writing parametric method and type definitions;
 `in` and `isa` are parsed as infix operators;
-and `outer` is parsed as a keyword when used to modify the scope of a variable in an iteration specification of a `for` loop.
-Creation of variables named `where`, `in`, `isa` or `outer` is allowed though.
+`public` is parsed as a keyword when beginning a toplevel statement;
+`outer` is parsed as a keyword when used to modify the scope of a variable in an iteration specification of a `for` loop;
+and `as` is used as a keyword to rename an identifier brought into scope by `import` or `using`.
+Creation of variables named `where`, `in`, `isa`, `outer` and `as` is allowed, though.
 
 ```@docs
 module
 export
+public
 import
 using
+as
 baremodule
 function
 macro
@@ -88,6 +94,7 @@ outer
 const
 struct
 mutable struct
+@kwdef
 abstract type
 primitive type
 where
@@ -124,6 +131,7 @@ Core.:(===)
 Core.isa
 Base.isequal
 Base.isless
+Base.isunordered
 Base.ifelse
 Core.typeassert
 Core.typeof
@@ -137,17 +145,26 @@ Base.copy
 Base.deepcopy
 Base.getproperty
 Base.setproperty!
+Base.replaceproperty!
+Base.swapproperty!
+Base.modifyproperty!
 Base.propertynames
 Base.hasproperty
 Core.getfield
 Core.setfield!
+Core.modifyfield!
+Core.replacefield!
+Core.swapfield!
 Core.isdefined
+Core.getglobal
+Core.setglobal!
 Base.@isdefined
 Base.convert
 Base.promote
 Base.oftype
 Base.widen
 Base.identity
+Base.WeakRef
 ```
 
 ## Properties of Types
@@ -187,6 +204,7 @@ Base.fieldcount
 Base.hasfield
 Core.nfields
 Base.isconst
+Base.isfieldatomic
 ```
 
 ### Memory layout
@@ -226,6 +244,7 @@ Core.Tuple
 Core.NTuple
 Core.NamedTuple
 Base.@NamedTuple
+Base.@Kwargs
 Base.Val
 Core.Vararg
 Core.Nothing
@@ -257,7 +276,7 @@ new
 Base.:(|>)
 Base.:(∘)
 Base.ComposedFunction
-Base.Splat
+Base.splat
 Base.Fix1
 Base.Fix2
 ```
@@ -266,7 +285,7 @@ Base.Fix2
 
 ```@docs
 Core.eval
-Base.MainInclude.eval
+Main.eval
 Base.@eval
 Base.evalfile
 Base.esc
@@ -277,6 +296,8 @@ Base.@inline
 Base.@noinline
 Base.@nospecialize
 Base.@specialize
+Base.@nospecializeinfer
+Base.@constprop
 Base.gensym
 Base.@gensym
 var"name"
@@ -285,7 +306,6 @@ Base.@label
 Base.@simd
 Base.@polly
 Base.@generated
-Base.@pure
 Base.@assume_effects
 Base.@deprecate
 ```
@@ -331,6 +351,7 @@ Base.@timev
 Base.@timed
 Base.@elapsed
 Base.@allocated
+Base.@allocations
 Base.EnvDict
 Base.ENV
 Base.Sys.STDLIB
@@ -346,10 +367,13 @@ Base.Sys.iswindows
 Base.Sys.windows_version
 Base.Sys.free_memory
 Base.Sys.total_memory
+Base.Sys.free_physical_memory
+Base.Sys.total_physical_memory
 Base.Sys.uptime
 Base.Sys.isjsvm
 Base.Sys.loadavg
 Base.Sys.isexecutable
+Base.Sys.username
 Base.@static
 ```
 
@@ -430,6 +454,8 @@ Base.@__DIR__
 Base.@__LINE__
 Base.fullname
 Base.names
+Base.isexported
+Base.ispublic
 Base.nameof(::Function)
 Base.functionloc(::Any, ::Any)
 Base.functionloc(::Method)
@@ -443,6 +469,8 @@ Base.identify_package
 Base.locate_package
 Base.require
 Base.compilecache
+Base.isprecompiled
+Base.get_extension
 ```
 
 ## Internals
diff --git a/doc/src/base/c.md b/doc/src/base/c.md
index bdc64fa6d98b4..bf7e2577029fe 100644
--- a/doc/src/base/c.md
+++ b/doc/src/base/c.md
@@ -10,8 +10,11 @@ Base.unsafe_convert
 Base.cconvert
 Base.unsafe_load
 Base.unsafe_store!
+Base.unsafe_modify!
+Base.unsafe_replace!
+Base.unsafe_swap!
 Base.unsafe_copyto!{T}(::Ptr{T}, ::Ptr{T}, ::Any)
-Base.unsafe_copyto!{T}(::Array{T}, ::Any, ::Array{T}, ::Any, ::Any)
+Base.unsafe_copyto!(::Array, ::Any, ::Array, ::Any, ::Any)
 Base.copyto!
 Base.pointer
 Base.unsafe_wrap{T,N}(::Union{Type{Array},Type{Array{T}},Type{Array{T,N}}}, ::Ptr{T}, ::NTuple{N,Int})
diff --git a/doc/src/base/collections.md b/doc/src/base/collections.md
index 96f540086d021..23cb8d6dab7c2 100644
--- a/doc/src/base/collections.md
+++ b/doc/src/base/collections.md
@@ -64,6 +64,7 @@ Base.LinRange
 
 ```@docs
 Base.isempty
+Base.isdone
 Base.empty!
 Base.length
 Base.checked_length
@@ -89,6 +90,7 @@ Fully implemented by:
 ```@docs
 Base.in
 Base.:∉
+Base.hasfastin
 Base.eltype
 Base.indexin
 Base.unique
@@ -202,6 +204,7 @@ Base.Dict
 Base.IdDict
 Base.WeakKeyDict
 Base.ImmutableDict
+Base.PersistentDict
 Base.haskey
 Base.get
 Base.get!
@@ -234,6 +237,7 @@ Partially implemented by:
   * [`Array`](@ref)
   * [`BitArray`](@ref)
   * [`ImmutableDict`](@ref Base.ImmutableDict)
+  * [`PersistentDict`](@ref Base.PersistentDict)
   * [`Iterators.Pairs`](@ref)
 
 ## Set-Like Collections
@@ -251,6 +255,7 @@ Base.symdiff
 Base.symdiff!
 Base.intersect!
 Base.issubset
+Base.in!
 Base.:⊈
 Base.:⊊
 Base.issetequal
diff --git a/doc/src/base/constants.md b/doc/src/base/constants.md
index 4ba0e627b0c54..14ddbc02698d0 100644
--- a/doc/src/base/constants.md
+++ b/doc/src/base/constants.md
@@ -23,6 +23,3 @@ See also:
   * [`stderr`](@ref)
   * [`ENV`](@ref)
   * [`ENDIAN_BOM`](@ref)
-  * `Libc.MS_ASYNC`
-  * `Libc.MS_INVALIDATE`
-  * `Libc.MS_SYNC`
diff --git a/doc/src/base/file.md b/doc/src/base/file.md
index 5e4f34ba510ab..22799f882bb26 100644
--- a/doc/src/base/file.md
+++ b/doc/src/base/file.md
@@ -1,6 +1,8 @@
 # Filesystem
 
 ```@docs
+Base.read(::String)
+Base.write(::String, ::Any)
 Base.Filesystem.pwd
 Base.Filesystem.cd(::AbstractString)
 Base.Filesystem.cd(::Function)
@@ -59,6 +61,7 @@ Base.Filesystem.realpath
 Base.Filesystem.relpath
 Base.Filesystem.expanduser
 Base.Filesystem.contractuser
+Base.Filesystem.samefile
 Base.Filesystem.splitdir
 Base.Filesystem.splitdrive
 Base.Filesystem.splitext
diff --git a/doc/src/base/io-network.md b/doc/src/base/io-network.md
index 4e371039f1a9b..33fc113375e4e 100644
--- a/doc/src/base/io-network.md
+++ b/doc/src/base/io-network.md
@@ -6,6 +6,8 @@
 Base.stdout
 Base.stderr
 Base.stdin
+Base.read(::AbstractString)
+Base.write(::AbstractString, ::Any)
 Base.open
 Base.IOStream
 Base.IOBuffer
@@ -71,6 +73,8 @@ Base.readline
 Base.readuntil
 Base.readlines
 Base.eachline
+Base.copyline
+Base.copyuntil
 Base.displaysize
 ```
 
diff --git a/doc/src/base/libc.md b/doc/src/base/libc.md
index 0af1b74a79a71..c0448b04d9db7 100644
--- a/doc/src/base/libc.md
+++ b/doc/src/base/libc.md
@@ -4,6 +4,10 @@
 Base.Libc.malloc
 Base.Libc.calloc
 Base.Libc.realloc
+Base.Libc.memcpy
+Base.Libc.memmove
+Base.Libc.memset
+Base.Libc.memcmp
 Base.Libc.free
 Base.Libc.errno
 Base.Libc.strerror
@@ -13,6 +17,8 @@ Base.Libc.time(::Base.Libc.TmStruct)
 Base.Libc.strftime
 Base.Libc.strptime
 Base.Libc.TmStruct
+Base.Libc.FILE
 Base.Libc.flush_cstdio
 Base.Libc.systemsleep
+Base.Libc.mkfifo
 ```
diff --git a/doc/src/base/math.md b/doc/src/base/math.md
index bdf91c991183f..cb427c9458fe1 100644
--- a/doc/src/base/math.md
+++ b/doc/src/base/math.md
@@ -118,7 +118,7 @@ Base.exp10
 Base.Math.ldexp
 Base.Math.modf
 Base.expm1
-Base.round(::Type, ::Any)
+Base.round
 Base.Rounding.RoundingMode
 Base.Rounding.RoundNearest
 Base.Rounding.RoundNearestTiesAway
@@ -138,6 +138,7 @@ Base.minmax
 Base.Math.clamp
 Base.Math.clamp!
 Base.abs
+Base.Checked
 Base.Checked.checked_abs
 Base.Checked.checked_neg
 Base.Checked.checked_add
@@ -158,7 +159,7 @@ Base.signbit
 Base.flipsign
 Base.sqrt(::Number)
 Base.isqrt
-Base.Math.cbrt
+Base.Math.cbrt(::AbstractFloat)
 Base.real
 Base.imag
 Base.reim
@@ -208,5 +209,5 @@ The complete list is in the parser code:
 Those that are parsed like `*` (in terms of precedence) include
 `* / ÷ % & ⋅ ∘ × |\\| ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗`
 and those that are parsed like `+` include
-`+ - |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣`
+`+ - |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⟇ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣`
 There are many others that are related to arrows, comparisons, and powers.
diff --git a/doc/src/base/multi-threading.md b/doc/src/base/multi-threading.md
index 293857c1c6c65..45a60b14d541a 100644
--- a/doc/src/base/multi-threading.md
+++ b/doc/src/base/multi-threading.md
@@ -5,15 +5,22 @@ Base.Threads.@threads
 Base.Threads.foreach
 Base.Threads.@spawn
 Base.Threads.threadid
+Base.Threads.maxthreadid
 Base.Threads.nthreads
 Base.Threads.threadpool
 Base.Threads.nthreadpools
+Base.Threads.threadpoolsize
+Base.Threads.ngcthreads
 ```
 
 See also [Multi-Threading](@ref man-multithreading).
 
 ## Atomic operations
 
+```@docs
+atomic
+```
+
 ```@docs
 Base.@atomic
 Base.@atomicswap
diff --git a/doc/src/base/numbers.md b/doc/src/base/numbers.md
index 3c2cf2626a11e..8167650ac17d1 100644
--- a/doc/src/base/numbers.md
+++ b/doc/src/base/numbers.md
@@ -2,6 +2,37 @@
 
 ## Standard Numeric Types
 
+A type tree for all subtypes of `Number` in `Base` is shown below.
+Abstract types have been marked, the rest are concrete types.
+```
+Number  (Abstract Type)
+├─ Complex
+└─ Real  (Abstract Type)
+   ├─ AbstractFloat  (Abstract Type)
+   │  ├─ Float16
+   │  ├─ Float32
+   │  ├─ Float64
+   │  └─ BigFloat
+   ├─ Integer  (Abstract Type)
+   │  ├─ Bool
+   │  ├─ Signed  (Abstract Type)
+   │  │  ├─ Int8
+   │  │  ├─ Int16
+   │  │  ├─ Int32
+   │  │  ├─ Int64
+   │  │  ├─ Int128
+   │  │  └─ BigInt
+   │  └─ Unsigned  (Abstract Type)
+   │     ├─ UInt8
+   │     ├─ UInt16
+   │     ├─ UInt32
+   │     ├─ UInt64
+   │     └─ UInt128
+   ├─ Rational
+   └─ AbstractIrrational  (Abstract Type)
+      └─ Irrational
+```
+
 ### Abstract number types
 
 ```@docs
diff --git a/doc/src/base/parallel.md b/doc/src/base/parallel.md
index ee84f4b8b445d..c3106b8caf8c7 100644
--- a/doc/src/base/parallel.md
+++ b/doc/src/base/parallel.md
@@ -26,8 +26,6 @@ Base.schedule
 
 ## [Synchronization](@id lib-task-sync)
 
-## Synchronization
-
 ```@docs
 Base.errormonitor
 Base.@sync
@@ -52,6 +50,7 @@ Base.unlock
 Base.trylock
 Base.islocked
 Base.ReentrantLock
+Base.@lock
 ```
 
 ## Channels
diff --git a/doc/src/base/punctuation.md b/doc/src/base/punctuation.md
index 9cb9f1eb1dfbf..dbea97e4e3cb5 100644
--- a/doc/src/base/punctuation.md
+++ b/doc/src/base/punctuation.md
@@ -1,4 +1,4 @@
-# Punctuation
+# [Punctuation](@id man-punctuation)
 
 Extended documentation for mathematical symbols & functions is [here](@ref math-ops).
 
diff --git a/doc/src/devdocs/reflection.md b/doc/src/base/reflection.md
similarity index 84%
rename from doc/src/devdocs/reflection.md
rename to doc/src/base/reflection.md
index 8ffe305a0d724..d44bc474abbd2 100644
--- a/doc/src/devdocs/reflection.md
+++ b/doc/src/base/reflection.md
@@ -4,9 +4,9 @@ Julia provides a variety of runtime reflection capabilities.
 
 ## Module bindings
 
-The exported names for a `Module` are available using [`names(m::Module)`](@ref), which will return
-an array of [`Symbol`](@ref) elements representing the exported bindings. `names(m::Module, all = true)`
-returns symbols for all bindings in `m`, regardless of export status.
+The public names for a `Module` are available using [`names(m::Module)`](@ref), which will return
+an array of [`Symbol`](@ref) elements representing the public bindings. `names(m::Module, all = true)`
+returns symbols for all bindings in `m`, regardless of public status.
 
 ## DataType fields
 
@@ -51,9 +51,10 @@ The *direct* subtypes of any `DataType` may be listed using [`subtypes`](@ref).
 the abstract `DataType` [`AbstractFloat`](@ref) has four (concrete) subtypes:
 
 ```jldoctest; setup = :(using InteractiveUtils)
-julia> subtypes(AbstractFloat)
-4-element Vector{Any}:
+julia> InteractiveUtils.subtypes(AbstractFloat)
+5-element Vector{Any}:
  BigFloat
+ Core.BFloat16
  Float16
  Float32
  Float64
@@ -62,6 +63,9 @@ julia> subtypes(AbstractFloat)
 Any abstract subtype will also be included in this list, but further subtypes thereof will not;
 recursive application of [`subtypes`](@ref) may be used to inspect the full type tree.
 
+Note that [`subtypes`](@ref) is located inside [`InteractiveUtils`](@ref man-interactive-utils) but
+is automatically exported when using the REPL.
+
 ## DataType layout
 
 The internal representation of a `DataType` is critically important when interfacing with C code
@@ -82,7 +86,7 @@ the unquoted and interpolated expression ([`Expr`](@ref)) form for a given macro
 be passed instead!). For example:
 
 ```jldoctest; setup = :(using InteractiveUtils)
-julia> macroexpand(@__MODULE__, :(@edit println("")) )
+julia> InteractiveUtils.macroexpand(@__MODULE__, :(@edit println("")) )
 :(InteractiveUtils.edit(println, (Base.typesof)("")))
 ```
 
@@ -93,7 +97,7 @@ Finally, the [`Meta.lower`](@ref) function gives the `lowered` form of any expre
 particular interest for understanding how language constructs map to primitive operations such
 as assignments, branches, and calls:
 
-```jldoctest
+```jldoctest; setup = (using Base: +, sin)
 julia> Meta.lower(@__MODULE__, :( [1+2, sin(0.5)] ))
 :($(Expr(:thunk, CodeInfo(
     @ none within `top-level scope`
@@ -131,7 +135,7 @@ top:
 }
 ```
 
-For more informations see [`@code_lowered`](@ref), [`@code_typed`](@ref), [`@code_warntype`](@ref),
+For more information see [`@code_lowered`](@ref), [`@code_typed`](@ref), [`@code_warntype`](@ref),
 [`@code_llvm`](@ref), and [`@code_native`](@ref).
 
 ### Printing of debug information
@@ -139,10 +143,10 @@ For more informations see [`@code_lowered`](@ref), [`@code_typed`](@ref), [`@cod
 The aforementioned functions and macros take the keyword argument `debuginfo` that controls the level
 debug information printed.
 
-```julia-repl
-julia> @code_typed debuginfo=:source +(1,1)
+```jldoctest; setup = :(using InteractiveUtils), filter = r"int.jl:\d+"
+julia> InteractiveUtils.@code_typed debuginfo=:source +(1,1)
 CodeInfo(
-    @ int.jl:53 within `+'
+    @ int.jl:87 within `+`
 1 ─ %1 = Base.add_int(x, y)::Int64
 └──      return %1
 ) => Int64
diff --git a/doc/src/base/scopedvalues.md b/doc/src/base/scopedvalues.md
new file mode 100644
index 0000000000000..0de29308c5df8
--- /dev/null
+++ b/doc/src/base/scopedvalues.md
@@ -0,0 +1,281 @@
+# [Scoped Values](@id scoped-values)
+
+Scoped values provide an implementation of dynamic scoping in Julia.
+
+!!! note "Lexical scoping vs dynamic scoping"
+    [Lexical scoping](@ref scope-of-variables) is the default behavior in Julia.
+    Under lexical scoping the scope of a variable is determined by the lexical
+    (textual) structure of a program.
+    Under dynamic scoping a variable is bound to the most recent assigned value
+    during the program's execution.
+
+The state of a scoped value is dependent on the execution path of the program.
+This means that for a scoped value you may observe multiple different values
+concurrently.
+
+!!! compat "Julia 1.11"
+    Scoped values were introduced in Julia 1.11. In Julia 1.8+ a compatible
+    implementation is available from the package ScopedValues.jl.
+
+In its simplest form you can create a [`ScopedValue`](@ref) with a
+default value and then use [`with`](@ref Base.with) or [`@with`](@ref) to
+enter a new dynamic scope.
+
+The new scope will inherit all values from the parent scope
+(and recursively from all outer scopes) with the provided scoped
+value taking priority over previous definitions.
+
+Let's first look at an example of **lexical** scope:
+
+A `let` statements begins a new lexical scope within which the outer definition
+of `x` is shadowed by it's inner definition.
+
+```julia
+x = 1
+let x = 5
+    @show x # 5
+end
+@show x # 1
+```
+
+Since Julia uses lexical scope the variable `x` is bound within the function `f`
+to the global scope and entering a `let` scope does not change the value `f`
+observes.
+
+```julia
+x = 1
+f() = @show x
+let x = 5
+    f() # 1
+end
+f() # 1
+```
+
+Now using a `ScopedValue` we can use **dynamic** scoping.
+
+```julia
+x = ScopedValue(1)
+f() = @show x[]
+with(x=>5) do
+    f() # 5
+end
+f() # 1
+```
+
+Not that the observed value of the `ScopedValue` is dependent on the execution
+path of the program.
+
+It often makes sense to use a `const` variable to point to a scoped value,
+and you can set the value of multiple `ScopedValue`s with one call to `with`.
+
+
+```julia
+const scoped_val = ScopedValue(1)
+const scoped_val2 = ScopedValue(0)
+
+# Enter a new dynamic scope and set value
+@show scoped_val[] # 1
+@show scoped_val2[] # 0
+with(scoped_val => 2) do
+    @show scoped_val[] # 2
+    @show scoped_val2[] # 0
+    with(scoped_val => 3, scoped_val2 => 5) do
+        @show scoped_val[] # 3
+        @show scoped_val2[] # 5
+    end
+    @show scoped_val[] # 2
+    @show scoped_val2[] # 0
+end
+@show scoped_val[] # 1
+@show scoped_val2[] # 0
+```
+
+Since `with` requires a closure or a function and creates another call-frame,
+it can sometimes be beneficial to use the macro form.
+
+```julia
+const STATE = ScopedValue{State}()
+with_state(f, state::State) = @with(STATE => state, f())
+```
+
+!!! note
+    Dynamic scopes are inherited by [`Task`](@ref)s, at the moment of task creation. Dynamic scopes are **not** propagated through `Distributed.jl` operations.
+
+In the example below we open a new dynamic scope before launching a task.
+The parent task and the two child tasks observe independent values of the
+same scoped value at the same time.
+
+```julia
+import Base.Threads: @spawn
+const scoped_val = ScopedValue(1)
+@sync begin
+    with(scoped_val => 2)
+        @spawn @show scoped_val[] # 2
+    end
+    with(scoped_val => 3)
+        @spawn @show scoped_val[] # 3
+    end
+    @show scoped_val[] # 1
+end
+```
+
+Scoped values are constant throughout a scope, but you can store mutable
+state in a scoped value. Just keep in mind that the usual caveats
+for global variables apply in the context of concurrent programming.
+
+Care is also required when storing references to mutable state in scoped
+values. You might want to explicitly [unshare mutable state](@ref unshare_mutable_state)
+when entering a new dynamic scope.
+
+```julia
+import Base.Threads: @spawn
+const sval_dict = ScopedValue(Dict())
+
+# Example of using a mutable value wrongly
+@sync begin
+    # `Dict` is not thread-safe the usage below is invalid
+    @spawn (sval_dict[][:a] = 3)
+    @spawn (sval_dict[][:b] = 3)
+end
+
+@sync begin
+    # If we instead pass a unique dictionary to each
+    # task we can access the dictionaries race free.
+    with(sval_dict => Dict()) do
+        @spawn (sval_dict[][:a] = 3)
+    end
+    with(sval_dict => Dict()) do
+        @spawn (sval_dict[][:b] = 3)
+    end
+end
+```
+
+## Example
+
+In the example below we use a scoped value to implement a permission check in
+a web-application. After determining the permissions of the request,
+a new dynamic scope is entered and the scoped value `LEVEL` is set.
+Other parts of the application can query the scoped value and will receive
+the appropriate value. Other alternatives like task-local storage and global variables
+are not well suited for this kind of propagation; our only alternative would have
+been to thread a value through the entire call-chain.
+
+```julia
+const LEVEL = ScopedValue(:GUEST)
+
+function serve(request, response)
+    level = isAdmin(request) ? :ADMIN : :GUEST
+    with(LEVEL => level) do
+        Threads.@spawn handle(request, response)
+    end
+end
+
+function open(connection::Database)
+    level = LEVEL[]
+    if level !== :ADMIN
+        error("Access disallowed")
+    end
+    # ... open connection
+end
+
+function handle(request, response)
+    # ...
+    open(Database(#=...=#))
+    # ...
+end
+```
+
+## Idioms
+### [Unshare mutable state](@id unshare_mutable_state)
+
+```julia
+import Base.Threads: @spawn
+const sval_dict = ScopedValue(Dict())
+
+# If you want to add new values to the dict, instead of replacing
+# it, unshare the values explicitly. In this example we use `merge`
+# to unshare the state of the dictionary in parent scope.
+@sync begin
+    with(sval_dict => merge(sval_dict[], Dict(:a => 10))) do
+        @spawn @show sval_dict[][:a]
+    end
+    @spawn sval_dict[][:a] = 3 # Not a race since they are unshared.
+end
+```
+
+### Scoped values as globals
+
+In order to access the value of a scoped value, the scoped value itself has to
+be in (lexical) scope. This means most often you likely want to use scoped values
+as constant globals.
+
+```julia
+const sval = ScopedValue(1)
+```
+
+Indeed one can think of scoped values as hidden function arguments.
+
+This does not preclude their use as non-globals.
+
+```julia
+import Base.Threads: @spawn
+function main()
+    role = ScopedValue(:client)
+
+    function launch()
+        #...
+        role[]
+    end
+
+    @with role => :server @spawn launch()
+    launch()
+end
+```
+
+But it might have been simpler to just directly pass the function argument
+in these cases.
+
+### Very many ScopedValues
+
+If you find yourself creating many `ScopedValue`'s for one given module,
+it may be better to use a dedicated struct to hold them.
+
+```julia
+Base.@kwdef struct Configuration
+    color::Bool = false
+    verbose::Bool = false
+end
+
+const CONFIG = ScopedValue(Configuration())
+
+@with CONFIG => Configuration(CONFIG[], color=true) begin
+    @show CONFIG[].color # true
+    @show CONFIG[].verbose # false
+end
+```
+
+## API docs
+
+```@docs
+Base.ScopedValues.ScopedValue
+Base.ScopedValues.with
+Base.ScopedValues.@with
+Base.isassigned(::ScopedValue)
+Base.ScopedValues.get
+```
+
+## Implementation notes and performance
+
+`Scope`s use a persistent dictionary. Lookup and insertion is `O(log(32, n))`,
+upon dynamic scope entry a small amount of data is copied and the unchanged
+data is shared among other scopes.
+
+The `Scope` object itself is not user-facing and may be changed in a future
+version of Julia.
+
+## Design inspiration
+
+This design was heavily inspired by [JEPS-429](https://openjdk.org/jeps/429),
+which in turn was inspired by dynamically scoped free variables in many Lisp dialects. In particular Interlisp-D and it's deep binding strategy.
+
+A prior design discussed was context variables ala [PEPS-567](https://peps.python.org/pep-0567/) and implemented in Julia as [ContextVariablesX.jl](https://github.com/tkf/ContextVariablesX.jl).
diff --git a/doc/src/base/sort.md b/doc/src/base/sort.md
index 9f00381ab892c..b9d333ef2a939 100644
--- a/doc/src/base/sort.md
+++ b/doc/src/base/sort.md
@@ -1,7 +1,7 @@
 # Sorting and Related Functions
 
-Julia has an extensive, flexible API for sorting and interacting with already-sorted arrays of
-values. By default, Julia picks reasonable algorithms and sorts in standard ascending order:
+Julia has an extensive, flexible API for sorting and interacting with already-sorted arrays
+of values. By default, Julia picks reasonable algorithms and sorts in ascending order:
 
 ```jldoctest
 julia> sort([2,3,1])
@@ -11,7 +11,7 @@ julia> sort([2,3,1])
  3
 ```
 
-You can easily sort in reverse order as well:
+You can sort in reverse order as well:
 
 ```jldoctest
 julia> sort([2,3,1], rev=true)
@@ -21,7 +21,8 @@ julia> sort([2,3,1], rev=true)
  1
 ```
 
-To sort an array in-place, use the "bang" version of the sort function:
+`sort` constructs a sorted copy leaving its input unchanged. Use the "bang" version of
+the sort function to mutate an existing array:
 
 ```jldoctest
 julia> a = [2,3,1];
@@ -35,8 +36,8 @@ julia> a
  3
 ```
 
-Instead of directly sorting an array, you can compute a permutation of the array's indices that
-puts the array into sorted order:
+Instead of directly sorting an array, you can compute a permutation of the array's
+indices that puts the array into sorted order:
 
 ```julia-repl
 julia> v = randn(5)
@@ -64,7 +65,7 @@ julia> v[p]
   0.382396
 ```
 
-Arrays can easily be sorted according to an arbitrary transformation of their values:
+Arrays can be sorted according to an arbitrary transformation of their values:
 
 ```julia-repl
 julia> sort(v, by=abs)
@@ -100,9 +101,12 @@ julia> sort(v, alg=InsertionSort)
   0.382396
 ```
 
-All the sorting and order related functions rely on a "less than" relation defining a total order
-on the values to be manipulated. The `isless` function is invoked by default, but the relation
-can be specified via the `lt` keyword.
+All the sorting and order related functions rely on a "less than" relation defining a
+[strict weak order](https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings)
+on the values to be manipulated. The `isless` function is invoked by default, but the
+relation can be specified via the `lt` keyword, a function that takes two array elements
+and returns `true` if and only if the first argument is "less than" the second. See
+[`sort!`](@ref) and [Alternate Orderings](@ref) for more information.
 
 ## Sorting Functions
 
@@ -134,77 +138,47 @@ Base.Sort.partialsortperm!
 
 ## Sorting Algorithms
 
-There are currently four sorting algorithms available in base Julia:
+There are currently four sorting algorithms publicly available in base Julia:
 
   * [`InsertionSort`](@ref)
   * [`QuickSort`](@ref)
   * [`PartialQuickSort(k)`](@ref)
   * [`MergeSort`](@ref)
 
-`InsertionSort` is an O(n^2) stable sorting algorithm. It is efficient for very small `n`, and
-is used internally by `QuickSort`.
-
-`QuickSort` is an O(n log n) sorting algorithm which is in-place, very fast, but not stable –
-i.e. elements which are considered equal will not remain in the same order in which they originally
-appeared in the array to be sorted. `QuickSort` is the default algorithm for numeric values, including
-integers and floats.
-
-`PartialQuickSort(k)` is similar to `QuickSort`, but the output array is only sorted up to index
-`k` if `k` is an integer, or in the range of `k` if `k` is an `OrdinalRange`. For example:
-
-```julia
-x = rand(1:500, 100)
-k = 50
-k2 = 50:100
-s = sort(x; alg=QuickSort)
-ps = sort(x; alg=PartialQuickSort(k))
-qs = sort(x; alg=PartialQuickSort(k2))
-map(issorted, (s, ps, qs))             # => (true, false, false)
-map(x->issorted(x[1:k]), (s, ps, qs))  # => (true, true, false)
-map(x->issorted(x[k2]), (s, ps, qs))   # => (true, false, true)
-s[1:k] == ps[1:k]                      # => true
-s[k2] == qs[k2]                        # => true
-```
-
-`MergeSort` is an O(n log n) stable sorting algorithm but is not in-place – it requires a temporary
-array of half the size of the input array – and is typically not quite as fast as `QuickSort`.
-It is the default algorithm for non-numeric data.
-
-The default sorting algorithms are chosen on the basis that they are fast and stable, or *appear*
-to be so. For numeric types indeed, `QuickSort` is selected as it is faster and indistinguishable
-in this case from a stable sort (unless the array records its mutations in some way). The stability
-property comes at a non-negligible cost, so if you don't need it, you may want to explicitly specify
-your preferred algorithm, e.g. `sort!(v, alg=QuickSort)`.
-
-The mechanism by which Julia picks default sorting algorithms is implemented via the `Base.Sort.defalg`
-function. It allows a particular algorithm to be registered as the default in all sorting functions
-for specific arrays. For example, here are the two default methods from [`sort.jl`](https://github.com/JuliaLang/julia/blob/master/base/sort.jl):
-
+By default, the `sort` family of functions uses stable sorting algorithms that are fast
+on most inputs. The exact algorithm choice is an implementation detail to allow for
+future performance improvements. Currently, a hybrid of `RadixSort`, `ScratchQuickSort`,
+`InsertionSort`, and `CountingSort` is used based on input type, size, and composition.
+Implementation details are subject to change but currently available in the extended help
+of `??Base.DEFAULT_STABLE` and the docstrings of internal sorting algorithms listed there.
+
+You can explicitly specify your preferred algorithm with the `alg` keyword
+(e.g. `sort!(v, alg=PartialQuickSort(10:20))`) or reconfigure the default sorting algorithm
+for custom types by adding a specialized method to the `Base.Sort.defalg` function.
+For example, [InlineStrings.jl](https://github.com/JuliaStrings/InlineStrings.jl/blob/v1.3.2/src/InlineStrings.jl#L903)
+defines the following method:
 ```julia
-defalg(v::AbstractArray) = MergeSort
-defalg(v::AbstractArray{<:Number}) = QuickSort
+Base.Sort.defalg(::AbstractArray{<:Union{SmallInlineStrings, Missing}}) = InlineStringSort
 ```
 
-As for numeric arrays, choosing a non-stable default algorithm for array types for which the notion
-of a stable sort is meaningless (i.e. when two values comparing equal can not be distinguished)
-may make sense.
-
-## Alternate orderings
+!!! compat "Julia 1.9"
+    The default sorting algorithm (returned by `Base.Sort.defalg`) is guaranteed to
+    be stable since Julia 1.9. Previous versions had unstable edge cases when
+    sorting numeric arrays.
 
-By default, `sort` and related functions use [`isless`](@ref) to compare two
-elements in order to determine which should come first. The
-[`Base.Order.Ordering`](@ref) abstract type provides a mechanism for defining
-alternate orderings on the same set of elements. Instances of `Ordering` define
-a [total order](https://en.wikipedia.org/wiki/Total_order) on a set of elements,
-so that for any elements `a`, `b`, `c` the following hold:
+## Alternate Orderings
 
-* Exactly one of the following is true: `a` is less than `b`, `b` is less than
-  `a`, or `a` and `b` are equal (according to [`isequal`](@ref)).
-* The relation is transitive - if `a` is less than `b` and `b` is less than `c`
-  then `a` is less than `c`.
+By default, `sort`, `searchsorted`, and related functions use [`isless`](@ref) to compare
+two elements in order to determine which should come first. The
+[`Base.Order.Ordering`](@ref) abstract type provides a mechanism for defining alternate
+orderings on the same set of elements: when calling a sorting function like
+`sort!`, an instance of `Ordering` can be provided with the keyword argument `order`.
 
-The [`Base.Order.lt`](@ref) function works as a generalization of `isless` to
-test whether `a` is less than `b` according to a given order.
+Instances of `Ordering` define an order through the [`Base.Order.lt`](@ref)
+function, which works as a generalization of `isless`.
+This function's behavior on custom `Ordering`s must satisfy all the conditions of a
+[strict weak order](https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings).
+See [`sort!`](@ref) for details and examples of valid and invalid `lt` functions.
 
 ```@docs
 Base.Order.Ordering
diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md
index 263c0019788c3..2b70a4f53198e 100644
--- a/doc/src/base/strings.md
+++ b/doc/src/base/strings.md
@@ -17,6 +17,11 @@ Core.String(::AbstractString)
 Base.SubString
 Base.LazyString
 Base.@lazy_str
+Base.AnnotatedString
+Base.AnnotatedChar
+Base.annotatedstring
+Base.annotations
+Base.annotate!
 Base.transcode
 Base.unsafe_string
 Base.ncodeunits(::AbstractString)
@@ -51,8 +56,9 @@ Base.findlast(::AbstractChar, ::AbstractString)
 Base.findprev(::AbstractString, ::AbstractString, ::Integer)
 Base.occursin
 Base.reverse(::Union{String,SubString{String}})
-Base.replace(s::AbstractString, ::Pair...)
+Base.replace(::IO, s::AbstractString, ::Pair...)
 Base.eachsplit
+Base.eachrsplit
 Base.split
 Base.rsplit
 Base.strip
@@ -89,5 +95,6 @@ Base.isspace
 Base.isuppercase
 Base.isxdigit
 Base.escape_string
+Base.escape_raw_string
 Base.unescape_string
 ```
diff --git a/doc/src/devdocs/EscapeAnalysis.md b/doc/src/devdocs/EscapeAnalysis.md
index 983a6782ccc79..1bd7868790f7f 100644
--- a/doc/src/devdocs/EscapeAnalysis.md
+++ b/doc/src/devdocs/EscapeAnalysis.md
@@ -18,9 +18,16 @@ This escape analysis aims to:
 ## Try it out!
 
 You can give a try to the escape analysis by loading the `EAUtils.jl` utility script that
-define the convenience entries `code_escapes` and `@code_escapes` for testing and debugging purposes:
+defines the convenience entries `code_escapes` and `@code_escapes` for testing and debugging purposes:
 ```@repl EAUtils
-include(normpath(Sys.BINDIR, "..", "share", "julia", "test", "compiler", "EscapeAnalysis", "EAUtils.jl")); using .EAUtils
+let JULIA_DIR = normpath(Sys.BINDIR, "..", "share", "julia")
+    # load `EscapeAnalysis` module to define the core analysis code
+    include(normpath(JULIA_DIR, "base", "compiler", "ssair", "EscapeAnalysis", "EscapeAnalysis.jl"))
+    using .EscapeAnalysis
+    # load `EAUtils` module to define the utilities
+    include(normpath(JULIA_DIR, "test", "compiler", "EscapeAnalysis", "EAUtils.jl"))
+    using .EAUtils
+end
 
 mutable struct SafeRef{T}
     x::T
@@ -47,7 +54,7 @@ result = code_escapes((String,String,String,String)) do s1, s2, s3, s4
 end
 ```
 
-The symbols in the side of each call argument and SSA statements represents the following meaning:
+The symbols on the side of each call argument and SSA statements represent the following meaning:
 - `◌` (plain): this value is not analyzed because escape information of it won't be used anyway (when the object is `isbitstype` for example)
 - `✓` (green or cyan): this value never escapes (`has_no_escape(result.state[x])` holds), colored blue if it has arg escape also (`has_arg_escape(result.state[x])` holds)
 - `↑` (blue or yellow): this value can escape to the caller via return (`has_return_escape(result.state[x])` holds), colored yellow if it has unhandled thrown escape also (`has_thrown_escape(result.state[x])` holds)
@@ -358,14 +365,10 @@ non-inlined callees that has been derived by previous `IPO EA`.
 More interestingly, it is also valid to use `IPO EA` escape information for type inference,
 e.g., inference accuracy can be improved by forming `Const`/`PartialStruct`/`MustAlias` of mutable object.
 
-Since the computational cost of `analyze_escapes` is not that cheap,
-both `IPO EA` and `Local EA` are better to run only when there is any profitability.
-Currently `EscapeAnalysis` provides the `is_ipo_profitable` heuristic to check a profitability of `IPO EA`.
 ```@docs
 Core.Compiler.EscapeAnalysis.analyze_escapes
 Core.Compiler.EscapeAnalysis.EscapeState
 Core.Compiler.EscapeAnalysis.EscapeInfo
-Core.Compiler.EscapeAnalysis.is_ipo_profitable
 ```
 
 --------------------------------------------------------------------------------------------
diff --git a/doc/src/devdocs/aot.md b/doc/src/devdocs/aot.md
new file mode 100644
index 0000000000000..cdaf1880ab927
--- /dev/null
+++ b/doc/src/devdocs/aot.md
@@ -0,0 +1,76 @@
+# Ahead of Time Compilation
+
+This document describes the design and structure of the ahead-of-time (AOT) compilation system in Julia. This system is used when generating system images and package images. Much of the implementation described here is located in `aotcompile.cpp`, `staticdata.c`, and `processor.cpp`
+
+## Introduction
+
+Though Julia normally compiles code just-in-time (JIT), it is possible to compile code ahead of time and save the resulting code to a file. This can be useful for a number of reasons:
+1. To reduce the time it takes to start a Julia process.
+2. To reduce the time spent in the JIT compiler instead of executing code (time to first execution, TTFX).
+3. To reduce the amount of memory used by the JIT compiler.
+
+## High-Level Overview
+
+The following descriptions are a snapshot of the current implementation details of the end-to-end pipeline that happens internally when the user compiles a new AOT module, such as occurs when they type `using Foo`. These details are likely to change over time as we implement better ways to handle them, so current implementations may not exactly match the dataflow and functions described below.
+
+### Compiling Code Images
+
+Firstly, the methods that need to be compiled to native code must be identified. This can only be done by actually executing the code to be compiled, as the set of methods that need to be compiled depends on the types of the arguments passed to the methods, and method invocations with certain combinations of types may not be known until runtime. During this process, the exact methods that the compiler sees are tracked for later compilation, producing a compilation trace.
+
+!!! note
+
+    Currently when compiling images, Julia runs the trace generation in a different process than the process performing the AOT compilation. This can have impacts when attempting to use a debugger during precompilation. The best way to debug precompilation with a debugger is to use the rr debugger, record the entire process tree, use `rr ps` to identify the relevant failing process, and then use `rr replay -p PID` to replay just the failing process.
+
+Once the methods to be compiled have been identified, they are passed to the `jl_create_system_image` function. This function sets up a number of data structures that will be used when serializing native code to a file, and then calls `jl_create_native` with the array of methods. `jl_create_native` runs codegen on the methods produces one or more LLVM modules. `jl_create_system_image` then records some useful information about what codegen produced from the module(s).
+
+The module(s) are then passed to `jl_dump_native`, along with the information recorded by `jl_create_system_image`. `jl_dump_native` contains the code necessary to serialize the module(s) to bitcode, object, or assembly files depending on the command-line options passed to Julia. The serialized code and information are then written to a file as an archive.
+
+The final step is to run a system linker on the object files in the archive produced by `jl_dump_native`. Once this step is complete, a shared library containing the compiled code is produced.
+
+### Loading Code Images
+
+When loading a code image, the shared library produced by the linker is loaded into memory. The system image data is then loaded from the shared library. This data contains information about the types, methods, and code instances that were compiled into the shared library. This data is used to restore the state of the runtime to what it was when the code image was compiled.
+
+If the code image was compiled with multiversioning, the loader will pick the appropriate version of each function to use based on the CPU features available on the current machine.
+
+For system images, since no other code has been loaded, the state of the runtime is now the same as it was when the code image was compiled. For package images, the environment may have changed compared to when the code was compiled, so each method must be checked against the global method table to determine if it is still valid code.
+
+## Compiling Methods
+
+### Tracing Compiled Methods
+
+Julia has a command-line flag to record all of the methods that are compiled by the JIT compiler, `--trace-compile=filename`. When a function is compiled and this flag has a filename, Julia will print out a precompile statement to that file with the method and argument types it was called with. This therefore generates a precompile script that can be used later in the AOT compilation process. The [PrecompileTools](https://julialang.github.io/PrecompileTools.jl/stable/) package has tooling that can make taking advantage of this functionality easier for package developers.
+
+### `jl_create_system_image`
+
+`jl_create_system_image` saves all of the Julia-specific metadata necessary to later restore the state of the runtime. This includes data such as code instances, method instances, method tables, and type information. This function also sets up the data structures necessary to serialize the native code to a file. Finally, it calls `jl_create_native` to create one or more LLVM modules containing the native code for the methods passed to it. `jl_create_native` is responsible for running codegen on the methods passed to it.
+
+### `jl_dump_native`
+
+`jl_dump_native` is responsible for serializing the LLVM module containing the native code to a file. In addition to the module, the system image data produced by `jl_create_system_image` is compiled as a global variable. The output of this method is bitcode, object, and/or assembly archives containing the code and system image data.
+
+`jl_dump_native` is typically one of the larger time sinks when emitting native code, with much of the time spent in optimizing LLVM IR and emitting machine code. Therefore, this function is capable of multithreading the optimization and machine code emission steps. This multithreading is parameterized on the size of the module, but can be explicitly overridden by setting the [`JULIA_IMAGE_THREADS`](@ref JULIA_IMAGE_THREADS) environment variable. The default maximum number of threads is half the number of available threads, but setting it to be lower can reduce peak memory usage during compilation.
+
+`jl_dump_native` can also produce native code optimized for multiple architectures, when integrated with the Julia loader. This is triggered by setting the [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) environment variable and mediated by the multiversioning pass in the optimization pipeline. To make this work with multithreading, an annotation step is added before the module is split into submodules that are emitted on their own threads, and this annotation step uses information available throughout the entire module to decide what functions are cloned for different architectures. Once the annotation has happened, individual threads can emit code for different architectures in parallel, knowing that a different submodule is guaranteed to produce the necessary functions that will be called by a cloned function.
+
+Some other metadata about how the module was serialized is also stored in the archive, such as the number of threads used to serialize the module and the number of functions that were compiled.
+
+### Static Linking
+
+The final step in the AOT compilation process is to run a linker on the object files in the archive produced by `jl_dump_native`. This produces a shared library containing the compiled code. This shared library can then be loaded by Julia to restore the state of the runtime. When compiling a system image, the native linker used by a C compiler is used to produce the final shared library. For package images, the LLVM linker LLD is used to provide a more consistent linking interface.
+
+## Loading Code Images
+
+### Loading the Shared Library
+
+The first step in loading a code image is to load the shared library produced by the linker. This is done by calling `jl_dlopen` on the path to the shared library. This function is responsible for loading the shared library and resolving all of the symbols in the library.
+
+### Loading Native Code
+
+The loader first needs to identify whether the native code that was compiled is valid for the architecture that the loader is running on. This is necessary to avoid executing instructions that older CPUs do not recognize. This is done by checking the CPU features available on the current machine against the CPU features that the code was compiled for. When multiversioning is enabled, the loader will pick the appropriate version of each function to use based on the CPU features available on the current machine. If none of the feature sets that were multiversioned, the loader will throw an error.
+
+Part of the multiversioning pass creates a number of global arrays of all of the functions in the module. When this process is multithreaded, an array of arrays is created, which the loader reorganizes into one large array with all of the functions that were compiled for this architecture. A similar process occurs for the global variables in the module.
+
+### Setting Up Julia State
+
+The loader then uses the global variables and functions produced from loading native code to set up Julia runtime core data structures in the current process. This setup involves adding types and methods to the Julia runtime, and making the cached native code available for use by other Julia functions and the interpreter. For package images, each method must be validated, in that the global method table's state must match the state that the package image was compiled for. In particular, if a different set of methods exists at the load time compared to compile time of the package image, the method must be invalidated and recompiled on first use. This is necessary to ensure that execution semantics remain the same regardless of if a package was precompiled or if the code was directly executed. System images do not need to perform this validation, since the global method table is empty at load time. Thus, system images have faster load times than package images.
diff --git a/doc/src/devdocs/ast.md b/doc/src/devdocs/ast.md
index 1978cd19a9a79..f829b27663e62 100644
--- a/doc/src/devdocs/ast.md
+++ b/doc/src/devdocs/ast.md
@@ -249,14 +249,11 @@ types exist in lowered form:
     While almost every part of a surface AST is represented by an `Expr`, the IR uses only a
     limited number of `Expr`s, mostly for calls and some top-level-only forms.
 
-  * `Slot`
+  * `SlotNumber`
 
-    Identifies arguments and local variables by consecutive numbering. `Slot` is an abstract type
-    with subtypes `SlotNumber` and `TypedSlot`. Both types have an integer-valued `id` field giving
-    the slot index. Most slots have the same type at all uses, and so are represented with `SlotNumber`.
-    The types of these slots are found in the `slottypes` field of their `CodeInfo` object.
-    Slots that require per-use type annotations are represented with `TypedSlot`, which has a `typ`
-    field.
+    Identifies arguments and local variables by consecutive numbering. It has an
+    integer-valued `id` field giving the slot index.
+    The types of these slots can be found in the `slottypes` field of their `CodeInfo` object.
 
   * `Argument`
 
@@ -322,7 +319,7 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
   * `=`
 
-    Assignment. In the IR, the first argument is always a Slot or a GlobalRef.
+    Assignment. In the IR, the first argument is always a `SlotNumber` or a `GlobalRef`.
 
   * `method`
 
@@ -425,7 +422,7 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
   * `splatnew`
 
     Similar to `new`, except field values are passed as a single tuple. Works similarly to
-    `Base.Splat(new)` if `new` were a first-class function, hence the name.
+    `splat(new)` if `new` were a first-class function, hence the name.
 
   * `isdefined`
 
@@ -436,10 +433,6 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
     Yields the caught exception inside a `catch` block, as returned by `jl_current_exception()`.
 
-  * `undefcheck`
-
-    Temporary node inserted by the compiler and will be processed in `type_lift_pass!`.
-
   * `enter`
 
     Enters an exception handler (`setjmp`). `args[1]` is the label of the catch block to jump to on
@@ -581,7 +574,7 @@ A unique'd container describing the shared metadata for a single method.
     Pointers to non-AST things that have been interpolated into the AST, required by
     compression of the AST, type-inference, or the generation of native code.
 
-  * `nargs`, `isva`, `called`, `isstaged`, `pure`
+  * `nargs`, `isva`, `called`, `is_for_opaque_closure`,
 
     Descriptive bit-fields for the source code of this Method.
 
@@ -608,10 +601,9 @@ for important details on how to modify these fields safely.
 
   * `sparam_vals`
 
-    The values of the static parameters in `specTypes` indexed by `def.sparam_syms`. For the
-    `MethodInstance` at `Method.unspecialized`, this is the empty `SimpleVector`. But for a
-    runtime `MethodInstance` from the `MethodTable` cache, this will always be defined and
-    indexable.
+    The values of the static parameters in `specTypes`.
+    For the `MethodInstance` at `Method.unspecialized`, this is the empty `SimpleVector`.
+    But for a runtime `MethodInstance` from the `MethodTable` cache, this will always be defined and indexable.
 
   * `uninferred`
 
@@ -655,7 +647,7 @@ for important details on how to modify these fields safely.
     The ABI to use when calling `fptr`. Some significant ones include:
 
       * 0 - Not compiled yet
-      * 1 - JL_CALLABLE `jl_value_t *(*)(jl_function_t *f, jl_value_t *args[nargs], uint32_t nargs)`
+      * 1 - `JL_CALLABLE` `jl_value_t *(*)(jl_function_t *f, jl_value_t *args[nargs], uint32_t nargs)`
       * 2 - Constant (value stored in `rettype_const`)
       * 3 - With Static-parameters forwarded `jl_value_t *(*)(jl_svec_t *sparams, jl_function_t *f, jl_value_t *args[nargs], uint32_t nargs)`
       * 4 - Run in interpreter `jl_value_t *(*)(jl_method_instance_t *meth, jl_function_t *f, jl_value_t *args[nargs], uint32_t nargs)`
@@ -683,10 +675,10 @@ A (usually temporary) container for holding lowered source code.
 
     A `UInt8` array of slot properties, represented as bit flags:
 
-      * 2  - assigned (only false if there are *no* assignment statements with this var on the left)
-      * 8  - const (currently unused for local variables)
-      * 16 - statically assigned once
-      * 32 - might be used before assigned. This flag is only valid after type inference.
+      * 0x02 - assigned (only false if there are *no* assignment statements with this var on the left)
+      * 0x08 - used (if there is any read or write of the slot)
+      * 0x10 - statically assigned once
+      * 0x20 - might be used before assigned. This flag is only valid after type inference.
 
   * `ssavaluetypes`
 
@@ -697,15 +689,8 @@ A (usually temporary) container for holding lowered source code.
 
   * `ssaflags`
 
-    Statement-level flags for each expression in the function. Many of these are reserved, but not yet implemented:
-
-    * 0x01 << 0 = statement is marked as `@inbounds`
-    * 0x01 << 1 = statement is marked as `@inline`
-    * 0x01 << 2 = statement is marked as `@noinline`
-    * 0x01 << 3 = statement is within a block that leads to `throw` call
-    * 0x01 << 4 = statement may be removed if its result is unused, in particular it is thus be both pure and effect free
-    * 0x01 << 5-6 = <unused>
-    * 0x01 << 7 = <reserved> has out-of-band info
+    Statement-level 32 bits flags for each expression in the function.
+    See the definition of `jl_code_info_t` in julia.h for more details.
 
   * `linetable`
 
@@ -759,11 +744,6 @@ Boolean properties:
     Whether this should propagate `@inbounds` when inlined for the purpose of eliding
     `@boundscheck` blocks.
 
-  * `pure`
-
-    Whether this is known to be a pure function of its arguments, without respect to the
-    state of the method caches or other mutable global state.
-
 
 `UInt8` settings:
 
diff --git a/doc/src/devdocs/boundscheck.md b/doc/src/devdocs/boundscheck.md
index 258528dbd5960..7acd32f04dc75 100644
--- a/doc/src/devdocs/boundscheck.md
+++ b/doc/src/devdocs/boundscheck.md
@@ -28,10 +28,10 @@ end
 With a custom array-like type `MyArray` having:
 
 ```julia
-@inline getindex(A::MyArray, i::Real) = (@boundscheck checkbounds(A,i); A.data[to_index(i)])
+@inline getindex(A::MyArray, i::Real) = (@boundscheck checkbounds(A, i); A.data[to_index(i)])
 ```
 
-Then when `getindex` is inlined into `sum`, the call to `checkbounds(A,i)` will be elided. If
+Then when `getindex` is inlined into `sum`, the call to `checkbounds(A, i)` will be elided. If
 your function contains multiple layers of inlining, only `@boundscheck` blocks at most one level
 of inlining deeper are eliminated. The rule prevents unintended changes in program behavior from
 code further up the stack.
@@ -47,7 +47,7 @@ function sum(A::AbstractArray)
     for i in 1:length(A)
         @inbounds r += A[i]
     end
-	return r
+    return r
 end
 ```
 
@@ -57,7 +57,7 @@ with [`OffsetArrays`](@ref man-custom-indices):
 ```julia-repl
 julia> using OffsetArrays
 
-julia> sum(OffsetArray([1,2,3], -10))
+julia> sum(OffsetArray([1, 2, 3], -10))
 9164911648 # inconsistent results or segfault
 ```
 
@@ -123,4 +123,4 @@ the last argument).
 
 ## Emit bounds checks
 
-Julia can be launched with `--check-bounds={yes|no|auto}` to emit bounds checks always, never, or respect @inbounds declarations.
+Julia can be launched with `--check-bounds={yes|no|auto}` to emit bounds checks always, never, or respect `@inbounds` declarations.
diff --git a/doc/src/devdocs/build/arm.md b/doc/src/devdocs/build/arm.md
index 9268da32d9c26..747ee25d22a04 100644
--- a/doc/src/devdocs/build/arm.md
+++ b/doc/src/devdocs/build/arm.md
@@ -68,6 +68,16 @@ Compilation on `ARMv8-A` requires that `Make.user` is configured as follows:
 MCPU=armv8-a
 ```
 
+Starting from Julia v1.10, [JITLink](https://llvm.org/docs/JITLink.html) is automatically enabled on this architecture for all operating systems when linking to LLVM 15 or later versions.
+Due to a [bug in LLVM memory manager](https://github.com/llvm/llvm-project/issues/63236), non-trivial workloads may generate too many memory mappings that on Linux can exceed the limit of memory mappings (`mmap`) set in the file `/proc/sys/vm/max_map_count`, resulting in an error like
+```
+JIT session error: Cannot allocate memory
+```
+Should this happen, ask your system administrator to increase the limit of memory mappings for example with the command
+```
+sysctl -w vm.max_map_count=262144
+```
+
 ### nVidia Jetson TX2
 
 Julia builds and runs on the [nVidia Jetson TX2](https://www.nvidia.com/object/embedded-systems-dev-kits-modules.html)
diff --git a/doc/src/devdocs/build/build.md b/doc/src/devdocs/build/build.md
index 091a15d892513..52d9482805b9a 100644
--- a/doc/src/devdocs/build/build.md
+++ b/doc/src/devdocs/build/build.md
@@ -16,7 +16,7 @@ variables.
 
 When compiled the first time, the build will automatically download
 pre-built [external
-dependencies](#required-build-tools-and-external-libraries). If you
+dependencies](#Required-Build-Tools-and-External-Libraries). If you
 prefer to build all the dependencies on your own, or are building on a system that cannot
 access the network during the build process, add the following in `Make.user`:
 
@@ -60,6 +60,16 @@ To run julia from anywhere you can:
 
 - write `prefix=/path/to/install/folder` into `Make.user` and then run `make install`. If there is a version of Julia already installed in this folder, you should delete it before running `make install`.
 
+Some of the options you can set to control the build of Julia are listed and documented at the beginning of the file `Make.inc`, but you should never edit it for this purpose, use `Make.user` instead.
+
+Julia's Makefiles define convenient automatic rules called `print-<VARNAME>` for printing the value of variables, replacing `<VARNAME>` with the name of the variable to print the value of.
+For example
+```console
+$ make print-JULIA_PRECOMPILE
+JULIA_PRECOMPILE=1
+```
+These rules are useful for debugging purposes.
+
 Now you should be able to run Julia like this:
 
     julia
@@ -139,12 +149,12 @@ Notes for various architectures:
 
 * [ARM](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/arm.md)
 
-## [Required Build Tools and External Libraries](@id build-tools)
+## Required Build Tools and External Libraries
 
 Building Julia requires that the following software be installed:
 
 - **[GNU make]**                — building dependencies.
-- **[gcc & g++][gcc]** (>= 5.1) or **[Clang][clang]** (>= 3.5, >= 6.0 for Apple Clang) — compiling and linking C, C++.
+- **[gcc & g++][gcc]** (>= 7.1) or **[Clang][clang]** (>= 5.0, >= 9.3 for Apple Clang) — compiling and linking C, C++.
 - **[libatomic][gcc]**          — provided by **[gcc]** and needed to support atomic operations.
 - **[python]** (>=2.7)          — needed to build LLVM.
 - **[gfortran]**                — compiling and linking Fortran libraries.
@@ -167,9 +177,9 @@ Julia uses the following external libraries, which are automatically
 downloaded (or in a few cases, included in the Julia source
 repository) and then compiled from source the first time you run
 `make`. The specific version numbers of these libraries that Julia
-uses are listed in [`deps/Versions.make`](https://github.com/JuliaLang/julia/blob/master/deps/Versions.make):
+uses are listed in [`deps/$(libname).version`](https://github.com/JuliaLang/julia/blob/master/deps/):
 
-- **[LLVM]** (9.0 + [patches](https://github.com/JuliaLang/julia/tree/master/deps/patches)) — compiler infrastructure (see [note below](#llvm)).
+- **[LLVM]** (15.0 + [patches](https://github.com/JuliaLang/llvm-project/tree/julia-release/15.x)) — compiler infrastructure (see [note below](#llvm)).
 - **[FemtoLisp]**            — packaged with Julia source, and used to implement the compiler front-end.
 - **[libuv]**  (custom fork) — portable, high-performance event-based I/O library.
 - **[OpenLibm]**             — portable libm library containing elementary math functions.
@@ -187,6 +197,7 @@ uses are listed in [`deps/Versions.make`](https://github.com/JuliaLang/julia/blo
 - **[mbedtls]**              — library used for cryptography and transport layer security, used by libssh2
 - **[utf8proc]**             — a library for processing UTF-8 encoded Unicode strings.
 - **[LLVM libunwind]**       — LLVM's fork of [libunwind], a library that determines the call-chain of a program.
+- **[ITTAPI]**               — Intel's Instrumentation and Tracing Technology and Just-In-Time API.
 
 [GNU make]:     https://www.gnu.org/software/make
 [patch]:        https://www.gnu.org/software/patch
@@ -222,6 +233,7 @@ uses are listed in [`deps/Versions.make`](https://github.com/JuliaLang/julia/blo
 [pkg-config]:   https://www.freedesktop.org/wiki/Software/pkg-config/
 [powershell]:   https://docs.microsoft.com/en-us/powershell/scripting/wmf/overview
 [which]:        https://carlowood.github.io/which/
+[ITTAPI]:       https://github.com/intel/ittapi
 
 ## Build dependencies
 
@@ -236,11 +248,49 @@ The most complicated dependency is LLVM, for which we require additional patches
 For packaging Julia with LLVM, we recommend either:
  - bundling a Julia-only LLVM library inside the Julia package, or
  - adding the patches to the LLVM package of the distribution.
-   * A complete list of patches is available in `deps/llvm.mk`, and the patches themselves are in `deps/patches/`.
-   * The only Julia-specific patch is the lib renaming (`llvm-symver-jlprefix.patch`), which should _not_ be applied to a system LLVM.
+   * A complete list of patches is available in on [Github](https://github.com/JuliaLang/llvm-project) see the `julia-release/15.x` branch.
+   * The only Julia-specific patch is the lib renaming (`llvm7-symver-jlprefix.patch`), which should _not_ be applied to a system LLVM.
    * The remaining patches are all upstream bug fixes, and have been contributed into upstream LLVM.
 
-Using an unpatched or different version of LLVM will result in errors and/or poor performance. Though Julia can be built with newer LLVM versions, support for this should be regarded as experimental and not suitable for packaging.
+Using an unpatched or different version of LLVM will result in errors and/or poor performance.
+You can build a different version of LLVM from a remote Git repository with the following options in the `Make.user` file:
+
+```make
+# Force source build of LLVM
+USE_BINARYBUILDER_LLVM = 0
+# Use Git for fetching LLVM source code
+# this is either `1` to get all of them
+DEPS_GIT = 1
+# or a space-separated list of specific dependencies to download with git
+DEPS_GIT = llvm
+
+# Other useful options:
+#URL of the Git repository you want to obtain LLVM from:
+#  LLVM_GIT_URL = ...
+#Name of the alternate branch to clone from git
+#  LLVM_BRANCH = julia-16.0.6-0
+#SHA hash of the alterate commit to check out automatically
+#  LLVM_SHA1 = $(LLVM_BRANCH)
+#List of LLVM targets to build.  It is strongly recommended to keep at least all the
+#default targets listed in `deps/llvm.mk`, even if you don't necessarily need all of them.
+#  LLVM_TARGETS = ...
+#Use ccache for faster recompilation in case you need to restart a build.
+#  USECCACHE = 1
+#  CMAKE_GENERATOR=Ninja
+#  LLVM_ASSERTIONS=1
+#  LLVM_DEBUG=Symbols
+```
+
+The various build phases are controlled by specific files:
+ * `deps/llvm.version` : touch or change to checkout a new version, `make get-llvm check-llvm`
+ * `deps/srccache/llvm/source-extracted` : result of `make extract-llvm`
+ * `deps/llvm/build_Release*/build-configured` : result of `make configure-llvm`
+ * `deps/llvm/build_Release*/build-configured` : result of `make compile-llvm`
+ * `usr-staging/llvm/build_Release*.tgz` : result of `make stage-llvm` (regenerate with `make reinstall-llvm`)
+ * `usr/manifest/llvm` : result of `make install-llvm` (regenerate with `make uninstall-llvm`)
+ * `make version-check-llvm` : runs every time to warn the user if there are local modifications
+
+Though Julia can be built with newer LLVM versions, support for this should be regarded as experimental and not suitable for packaging.
 
 ### libuv
 
@@ -302,4 +352,51 @@ From this point, you should
 ```
 (Note that `sudo` isn't installed, but neither is it necessary since you are running as `root`, so you can omit `sudo` from all commands.)
 
-Then add all the [build dependencies](@ref build-tools), a console-based editor of your choice, `git`, and anything else you'll need (e.g., `gdb`, `rr`, etc). Pick a directory to work in and `git clone` Julia, check out the branch you wish to debug, and build Julia as usual.
+Then add all the [build dependencies](#required-build-tools-and-external-libraries), a console-based editor of your choice, `git`, and anything else you'll need (e.g., `gdb`, `rr`, etc). Pick a directory to work in and `git clone` Julia, check out the branch you wish to debug, and build Julia as usual.
+
+
+## Update the version number of a dependency
+
+There are two types of builds
+1. Build everything (`deps/` and `src/`) from source code.
+    (Add `USE_BINARYBUILDER=0` to `Make.user`, see [Building Julia](#building-julia))
+2. Build from source (`src/`) with pre-compiled dependencies (default)
+
+When you want to update the version number of a dependency in `deps/`,
+you may want to use the following checklist:
+
+```md
+### Check list
+
+Version numbers:
+- [ ] `deps/$(libname).version`: `LIBNAME_VER`, `LIBNAME_BRANCH`, `LIBNAME_SHA1` and `LIBNAME_JLL_VER`
+- [ ] `stdlib/$(LIBNAME_JLL_NAME)_jll/Project.toml`: `version`
+
+Checksum:
+- [ ] `deps/checksums/$(libname)`
+- [ ] `deps/checksums/$(LIBNAME_JLL_NAME)-*/`: `md5` and `sha512`
+
+Patches:
+- [ ] `deps/$(libname).mk`
+- [ ] `deps/patches/$(libname)-*.patch`
+```
+
+Note:
+- For specific dependencies, some items in the checklist may not exist.
+- For checksum file, it may be **a single file** without a suffix, or **a folder** containing two files.
+
+
+### Example: `OpenLibm`
+
+1. Update Version numbers in `deps/openlibm.version`
+    - `OPENLIBM_VER := 0.X.Y`
+    - `OPENLIBM_BRANCH = v0.X.Y`
+    - `OPENLIBM_SHA1 = new-sha1-hash`
+2. Update Version number in `stdlib/OpenLibm_jll/Project.toml`
+    - `version = "0.X.Y+0"`
+3. Update checksums in `deps/checksums/openlibm`
+    - `make -f contrib/refresh_checksums.mk openlibm`
+4. Check if the patch files `deps/patches/openlibm-*.patch` exist
+    - if patches don't exist, skip.
+    - if patches exist, check if they have been merged into the new version and need to be removed.
+        When deleting a patch, remember to modify the corresponding Makefile file (`deps/openlibm.mk`).
diff --git a/doc/src/devdocs/build/distributing.md b/doc/src/devdocs/build/distributing.md
index 9ae75a8683020..8a6c5743035c3 100644
--- a/doc/src/devdocs/build/distributing.md
+++ b/doc/src/devdocs/build/distributing.md
@@ -18,12 +18,12 @@ GPL licensed. We do hope to have a non-GPL distribution of Julia in the future.
 Versioning and Git
 ------------------
 The Makefile uses both the `VERSION` file and commit hashes and tags from the
-git repository to generate the `version_git.jl` with information we use to
+git repository to generate the `base/version_git.jl` with information we use to
 fill the splash screen and the `versioninfo()` output. If you for some reason
 don't want to have the git repository available when building you should
-pre-generate the `version_git.jl` file with:
+pregenerate the `base/version_git.jl` file with:
 
-    make -C base version_git.jl
+    make -C base version_git.jl.phony
 
 Julia has lots of build dependencies where we use patched versions that has not
 yet been included by the popular package managers. These dependencies will usually
@@ -53,7 +53,7 @@ as it will make Julia fail at startup on any machine with incompatible CPUs
 We therefore recommend that you pass the `MARCH` variable when calling `make`,
 setting it to the baseline target you intend to support. This will determine
 the target CPU for both the Julia executable and libraries, and the system
-image (the latter can also be set using `JULIA_CPU_TARGET`). Typically useful
+image (the latter can also be set using [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET)). Typically useful
 values for x86 CPUs are `x86-64` and `core2` (for 64-bit builds) and
 `pentium4` (for 32-bit builds). Unfortunately, CPUs older than Pentium 4
 are currently not supported (see
diff --git a/doc/src/devdocs/build/linux.md b/doc/src/devdocs/build/linux.md
index 4e596ef73341b..8c4773e4e41ad 100644
--- a/doc/src/devdocs/build/linux.md
+++ b/doc/src/devdocs/build/linux.md
@@ -8,7 +8,7 @@
 
 ## Architecture Customization
 
-Julia can be built for a non-generic architecture by configuring the `ARCH` Makefile variable in a `Make.user` file. See the appropriate section of `Make.inc` for additional customization options, such as `MARCH` and `JULIA_CPU_TARGET`.
+Julia can be built for a non-generic architecture by configuring the `ARCH` Makefile variable in a `Make.user` file. See the appropriate section of `Make.inc` for additional customization options, such as `MARCH` and [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET).
 
 For example, to build for Pentium 4, set `MARCH=pentium4` and install the necessary system libraries for linking. On Ubuntu, these may include lib32gfortran-6-dev, lib32gcc1, and lib32stdc++6, among others.
 
diff --git a/doc/src/devdocs/build/windows.md b/doc/src/devdocs/build/windows.md
index fef4413db7d1a..8f8f0c8bc676a 100644
--- a/doc/src/devdocs/build/windows.md
+++ b/doc/src/devdocs/build/windows.md
@@ -47,15 +47,18 @@ MinGW-w64 compilers available through Cygwin's package manager.
     either 32 or 64 bit Julia from either 32 or 64 bit Cygwin. 64 bit Cygwin
     has a slightly smaller but often more up-to-date selection of packages.
 
-    Advanced: you may skip steps 2-4 by running:
+    *Advanced*: you may skip steps 2-4 by running:
 
-        setup-x86_64.exe -s <url> -q -P cmake,gcc-g++,git,make,patch,curl,m4,python3,p7zip,mingw64-i686-gcc-g++,mingw64-i686-gcc-fortran,mingw64-x86_64-gcc-g++,mingw64-x86_64-gcc-fortran
-        :: replace <url> with a site from https://cygwin.com/mirrors.html
-        :: or run setup manually first and select a mirror
+    ```sh
+    setup-x86_64.exe -s <url> -q -P cmake,gcc-g++,git,make,patch,curl,m4,python3,p7zip,mingw64-i686-gcc-g++,mingw64-i686-gcc-fortran,mingw64-x86_64-gcc-g++,mingw64-x86_64-gcc-fortran
+    ```
 
- 2. Select installation location and download mirror.
+    replacing `<url>` with a site from [https://cygwin.com/mirrors.html](https://cygwin.com/mirrors.html)
+    or run setup manually first and select a mirror.
 
- 3. At the '*Select Packages'* step, select the following:
+ 2. Select installation location and a mirror to download from.
+
+ 3. At the *Select Packages* step, select the following:
 
     1.  From the *Devel* category: `cmake`, `gcc-g++`, `git`, `make`, `patch`
     2.  From the *Net* category: `curl`
@@ -66,13 +69,10 @@ MinGW-w64 compilers available through Cygwin's package manager.
     6.  For 64 bit Julia, and also from the *Devel* category:
         `mingw64-x86_64-gcc-g++` and `mingw64-x86_64-gcc-fortran`
 
- 4. At the *'Resolving Dependencies'* step, be sure to leave *'Select required
-    packages (RECOMMENDED)'* enabled.
-
- 5. Allow Cygwin installation to finish, then start from the installed shortcut
-    a *'Cygwin Terminal'*, or *'Cygwin64 Terminal'*, respectively.
+ 4. Allow Cygwin installation to finish, then start from the installed shortcut
+    *'Cygwin Terminal'*, or *'Cygwin64 Terminal'*, respectively.
 
- 6. Build Julia and its dependencies from source:
+ 5. Build Julia and its dependencies from source:
 
     1. Get the Julia sources
        ```sh
@@ -93,36 +93,98 @@ MinGW-w64 compilers available through Cygwin's package manager.
 
     3. Start the build
        ```sh
-       make -j 4   # Adjust the number of threads (4) to match your build environment.
+       make -j 4       # Adjust the number of threads (4) to match your build environment.
+       make -j 4 debug # This builds julia-debug.exe
        ```
-
-
-    > Protip: build both!
-    > ```sh
-    > make O=julia-win32 configure
-    > make O=julia-win64 configure
-    > echo 'XC_HOST = i686-w64-mingw32' > julia-win32/Make.user
-    > echo 'XC_HOST = x86_64-w64-mingw32' > julia-win64/Make.user
-    > echo 'ifeq ($(BUILDROOT),$(JULIAHOME))
-    >         $(error "in-tree build disabled")
-    >       endif' >> Make.user
-    > make -C julia-win32  # build for Windows x86 in julia-win32 folder
-    > make -C julia-win64  # build for Windows x86-64 in julia-win64 folder
-    > ```
-
- 7. Run Julia using the Julia executables directly
+ 6. Run Julia using the Julia executables directly
     ```sh
     usr/bin/julia.exe
     usr/bin/julia-debug.exe
     ```
 
+!!! note "Pro tip: build both!"
+    ```sh
+    make O=julia-win32 configure
+    make O=julia-win64 configure
+    echo 'XC_HOST = i686-w64-mingw32' > julia-win32/Make.user
+    echo 'XC_HOST = x86_64-w64-mingw32' > julia-win64/Make.user
+    echo 'ifeq ($(BUILDROOT),$(JULIAHOME))
+            $(error "in-tree build disabled")
+          endif' >> Make.user
+    make -C julia-win32  # build for Windows x86 in julia-win32 folder
+    make -C julia-win64  # build for Windows x86-64 in julia-win64 folder
+    ```
+
 ### Compiling with MinGW/MSYS2
 
-Compiling Julia from source using [MSYS2](https://msys2.github.io) has worked in the past
-but is not actively supported. Pull requests to restore support would be welcome. See a
-[past version of this
-file](https://github.com/JuliaLang/julia/blob/v0.6.0/README.windows.md) for the former
-instructions for compiling using MSYS2.
+[MSYS2](https://www.msys2.org/) is a software distribution and build environment for Windows.
+
+Note: MSYS2 requires **64 bit** Windows 7 or newer.
+
+ 1. Install and configure MSYS2.
+
+    1. Download and run the latest installer for the
+        [64-bit](https://github.com/msys2/msys2-installer/releases/latest) distribution.
+        The installer will have a name like `msys2-x86_64-yyyymmdd.exe`.
+
+    2. Open the MSYS2 shell. Update the package database and base packages:
+
+        ```
+        pacman -Syu
+        ```
+    3. Exit and restart MSYS2. Update the rest of the base packages:
+
+        ```
+        pacman -Syu
+        ```
+
+    4. Then install tools required to build julia:
+
+        ```
+        pacman -S cmake diffutils git m4 make patch tar p7zip curl python
+        ```
+
+        For 64 bit Julia, install the x86_64 version:
+
+        ```
+        pacman -S mingw-w64-x86_64-gcc
+        ```
+
+        For 32 bit Julia, install the i686 version:
+
+        ```
+        pacman -S mingw-w64-i686-gcc
+        ```
+
+    5. Configuration of MSYS2 is complete. Now `exit` the MSYS2 shell.
+ 2. Build Julia and its dependencies with pre-build dependencies.
+
+    1. Open a new [**MINGW64/MINGW32 shell**](https://www.msys2.org/docs/environments/#overview).
+        Currently we can't use both mingw32 and mingw64,
+        so if you want to build the x86_64 and i686 versions,
+        you'll need to build them in each environment separately.
+
+    2. Clone the Julia sources:
+
+        ```
+        git clone https://github.com/JuliaLang/julia.git
+        cd julia
+        ```
+
+    3. Start the build
+
+        ```
+        make -j$(nproc)
+        ```
+
+!!! note "Pro tip: build in dir"
+    ```sh
+    make O=julia-mingw-w64 configure
+    echo 'ifeq ($(BUILDROOT),$(JULIAHOME))
+            $(error "in-tree build disabled")
+          endif' >> Make.user
+    make -C julia-mingw-w64
+    ```
 
 
 ### Cross-compiling from Unix (Linux/Mac/WSL)
@@ -131,7 +193,7 @@ You can also use MinGW-w64 cross compilers to build a Windows version of Julia f
 Linux, Mac, or the Windows Subsystem for Linux (WSL).
 
 First, you will need to ensure your system has the required dependencies. We
-need wine (>=1.7.5), a system compiler, and some downloaders. Note: a cygwin install might
+need wine (>=1.7.5), a system compiler, and some downloaders. Note: a Cygwin install might
 interfere with this method if using WSL.
 
 **On Ubuntu** (on other Linux systems the dependency names are likely to be similar):
@@ -139,7 +201,9 @@ interfere with this method if using WSL.
 apt-get install wine-stable gcc wget p7zip-full winbind mingw-w64 gfortran-mingw-w64
 dpkg --add-architecture i386 && apt-get update && apt-get install wine32 # add sudo to each if needed
 # switch all of the following to their "-posix" variants (interactively):
-for pkg in i686-w64-mingw32-g++ i686-w64-mingw32-gcc i686-w64-mingw32-gfortran x86_64-w64-mingw32-g++ x86_64-w64-mingw32-gcc x86_64-w64-mingw32-gfortran; do sudo update-alternatives --config $pkg; done
+for pkg in i686-w64-mingw32-g++ i686-w64-mingw32-gcc i686-w64-mingw32-gfortran x86_64-w64-mingw32-g++ x86_64-w64-mingw32-gcc x86_64-w64-mingw32-gfortran; do
+    sudo update-alternatives --config $pkg
+done
 ```
 
 **On Mac**: Install XCode, XCode command line tools, X11 (now
@@ -157,19 +221,19 @@ install wine wget mingw-w64`, as appropriate.
  6. `make binary-dist` then `make exe` to create the Windows installer.
  7. move the `julia-*.exe` installer to the target machine
 
-If you are building for 64-bit windows, the steps are essentially the same.
-Just replace `i686` in `XC_HOST` with `x86_64`. (note: on Mac, wine only runs
+If you are building for 64-bit Windows, the steps are essentially the same.
+Just replace `i686` in `XC_HOST` with `x86_64`. (Note: on Mac, wine only runs
 in 32-bit mode).
 
 
 ## Debugging a cross-compiled build under wine
 
 The most effective way to debug a cross-compiled version of Julia on the cross-compilation
-host is to install a windows version of gdb and run it under wine as usual. The pre-built
+host is to install a Windows version of GDB and run it under wine as usual. The pre-built
 packages available [as part of the MSYS2
-project](https://sourceforge.net/projects/msys2/files/REPOS/MINGW/) are known to work. Apart
+project](https://packages.msys2.org/) are known to work. Apart
 from the GDB package you may also need the python and termcap packages. Finally, GDB's
-prompt may not work when launch from the command line. This can be worked around by
+prompt may not work when launched from the command line. This can be worked around by
 prepending `wineconsole` to the regular GDB invocation.
 
 
@@ -178,24 +242,25 @@ prepending `wineconsole` to the regular GDB invocation.
 Compiling using one of the options above creates a basic Julia build, but not some
 extra components that are included if you run the full Julia binary installer.
 If you need these components, the easiest way to get them is to build the installer
-yourself using ```make win-extras``` followed by ```make binary-dist``` and ```make exe```. Then running the resulting installer.
+yourself using ```make win-extras``` followed by ```make binary-dist``` and ```make exe```.
+Then run the resulting installer.
 
 
 ## Windows Build Debugging
 
 
-### GDB hangs with cygwin mintty
+### GDB hangs with Cygwin mintty
 
-- Run gdb under the windows console (cmd) instead. gdb [may not function
+- Run GDB under the Windows console (cmd) instead. GDB [may not function
   properly](https://www.cygwin.com/ml/cygwin/2009-02/msg00531.html) under mintty with non-
-  cygwin applications. You can use `cmd /c start` to start the windows console from mintty
+  Cygwin applications. You can use `cmd /c start` to start the Windows console from mintty
   if necessary.
 
 ### GDB not attaching to the right process
 
- - Use the PID from the windows task manager or `WINPID` from the `ps` command
-   instead of the PID from unix style command line tools (e.g. `pgrep`).  You
-   may need to add the PID column if it is not shown by default in the windows
+ - Use the PID from the Windows task manager or `WINPID` from the `ps` command
+   instead of the PID from unix-style command line tools (e.g. `pgrep`).  You
+   may need to add the PID column if it is not shown by default in the Windows
    task manager.
 
 ### GDB not showing the right backtrace
diff --git a/doc/src/devdocs/builtins.md b/doc/src/devdocs/builtins.md
new file mode 100644
index 0000000000000..eef5cbd04dd8c
--- /dev/null
+++ b/doc/src/devdocs/builtins.md
@@ -0,0 +1,18 @@
+# [Core.Builtins](@id lib-builtins)
+
+## Builtin Function APIs
+
+The following Builtin function APIs are considered unstable, but provide the basic
+definitions for what defines the abilities and behaviors of a Julia program. They are
+typically accessed through a higher level generic API.
+
+```@docs
+Core.memoryref
+Core.memoryrefoffset
+Core.memoryrefget
+Core.memoryrefset!
+Core.memoryref_isassigned
+Core.IntrinsicFunction
+Core.Intrinsics
+Core.IR
+```
diff --git a/doc/src/devdocs/cartesian.md b/doc/src/devdocs/cartesian.md
index 1d338cbd8fab3..604f04f2a39e5 100644
--- a/doc/src/devdocs/cartesian.md
+++ b/doc/src/devdocs/cartesian.md
@@ -133,6 +133,7 @@ Base.Cartesian.@nref
 Base.Cartesian.@nextract
 Base.Cartesian.@nexprs
 Base.Cartesian.@ncall
+Base.Cartesian.@ncallkw
 Base.Cartesian.@ntuple
 Base.Cartesian.@nall
 Base.Cartesian.@nany
diff --git a/doc/src/devdocs/debuggingtips.md b/doc/src/devdocs/debuggingtips.md
index 7639e8be2ef96..a7740b1780b06 100644
--- a/doc/src/devdocs/debuggingtips.md
+++ b/doc/src/devdocs/debuggingtips.md
@@ -41,11 +41,16 @@ useful.
 
 ## Useful Julia functions for Inspecting those variables
 
-  * `jl_gdblookup($rip)` :: For looking up the current function and line. (use `$eip` on i686 platforms)
+  * `jl_print_task_backtraces(0)` :: Similar to gdb's `thread apply all bt` or lldb's `thread backtrace
+    all`. Runs all threads while printing backtraces for all existing tasks.
+  * `jl_gdblookup($pc)` :: For looking up the current function and line.
+  * `jl_gdblookupinfo($pc)` :: For looking up the current method instance object.
+  * `jl_gdbdumpcode(mi)` :: For dumping all of `code_typed/code_llvm/code_asm` when the REPL is not working right.
   * `jlbacktrace()` :: For dumping the current Julia backtrace stack to stderr. Only usable after
     `record_backtrace()` has been called.
   * `jl_dump_llvm_value(Value*)` :: For invoking `Value->dump()` in gdb, where it doesn't work natively.
     For example, `f->linfo->functionObject`, `f->linfo->specFunctionObject`, and `to_function(f->linfo)`.
+  * `jl_dump_llvm_module(Module*)` :: For invoking `Module->dump()` in gdb, where it doesn't work natively.
   * `Type->dump()` :: only works in lldb. Note: add something like `;1` to prevent lldb from printing
     its prompt over the output
   * `jl_eval_string("expr")` :: for invoking side-effects to modify the current state or to lookup
diff --git a/doc/src/devdocs/eval.md b/doc/src/devdocs/eval.md
index 1aea5161ad23a..8f2fd68159676 100644
--- a/doc/src/devdocs/eval.md
+++ b/doc/src/devdocs/eval.md
@@ -18,6 +18,9 @@ function, and primitive function, before turning into the desired result (hopefu
         Abstract Syntax Tree The AST is the digital representation of the code structure. In this form
         the code has been tokenized for meaning so that it is more suitable for manipulation and execution.
 
+
+![Diagram of the compiler flow](./img/compiler_diagram.png)
+
 ## Julia Execution
 
 The 10,000 foot view of the whole process is as follows:
@@ -25,7 +28,7 @@ The 10,000 foot view of the whole process is as follows:
 1. The user starts `julia`.
 2. The C function `main()` from `cli/loader_exe.c` gets called. This function processes the command line
    arguments, filling in the `jl_options` struct and setting the variable `ARGS`. It then initializes
-   Julia (by calling [`julia_init` in `task.c`](https://github.com/JuliaLang/julia/blob/master/src/task.c),
+   Julia (by calling [`julia_init` in `init.c`](https://github.com/JuliaLang/julia/blob/master/src/init.c),
    which may load a previously compiled [sysimg](@ref dev-sysimg)). Finally, it passes off control to Julia
    by calling [`Base._start()`](https://github.com/JuliaLang/julia/blob/master/base/client.jl).
 3. When `_start()` takes over control, the subsequent sequence of commands depends on the command
diff --git a/doc/src/devdocs/external_profilers.md b/doc/src/devdocs/external_profilers.md
new file mode 100644
index 0000000000000..836d821b91df9
--- /dev/null
+++ b/doc/src/devdocs/external_profilers.md
@@ -0,0 +1,104 @@
+# External Profiler Support
+
+Julia provides explicit support for some external tracing profilers, enabling you to obtain a high-level overview of the runtime's execution behavior.
+
+The currently supported profilers are:
+- [Tracy](https://github.com/wolfpld/tracy)
+- [Intel VTune (ITTAPI)](https://github.com/intel/ittapi)
+
+### Adding New Zones
+
+To add new zones, use the `JL_TIMING` macro. You can find numerous examples throughout the codebase by searching for `JL_TIMING`. To add a new type of zone
+you add it to `JL_TIMING_OWNERS` (and possibly `JL_TIMING_EVENTS`).
+
+### Dynamically Enabling and Disabling Zones
+
+The [`JULIA_TIMING_SUBSYSTEMS`](@ref JULIA_TIMING_SUBSYSTEMS) environment variable allows you to enable or disable zones for a specific Julia run. For instance, setting the variable to `+GC,-INFERENCE` will enable the `GC` zones and disable the `INFERENCE`
+zones.
+
+## Tracy Profiler
+
+[Tracy](https://github.com/wolfpld/tracy)  is a flexible profiler that can be optionally integrated with Julia.
+
+A typical Tracy session might look like this:
+
+![Typical Tracy usage](tracy.png)
+
+### Building Julia with Tracy
+
+To enable Tracy integration, build Julia with the extra option `WITH_TRACY=1` in the `Make.user` file.
+
+### Installing the Tracy Profile Viewer
+
+The easiest way to obtain the profile viewer is by adding the `TracyProfiler_jll` package and launching the profiler with:
+
+```julia
+run(TracyProfiler_jll.tracy())
+```
+
+!!! note
+    On macOS, you may want to set the `TRACY_DPI_SCALE` environment variable to `1.0` if the UI elements in the profiler appear excessively large.
+
+To run a "headless" instance that saves the trace to disk, use
+
+```julia
+run(`$(TracyProfiler_jll.capture()) -o mytracefile.tracy`)
+```
+
+instead.
+
+For information on using the Tracy UI, refer to the Tracy manual.
+
+### Profiling Julia with Tracy
+
+A typical workflow for profiling Julia with Tracy involves starting Julia using:
+
+```julia
+JULIA_WAIT_FOR_TRACY=1 ./julia -e '...'
+```
+
+The environment variable ensures that Julia waits until it has successfully connected to the Tracy profiler before continuing execution. Afterward, use the Tracy profiler UI, click `Connect`, and Julia execution should resume and profiling should start.
+
+### Profiling package precompilation with Tracy
+
+To profile a package precompilation process it is easiest to explicitly call into `Base.compilecache` with the package you want to precompile:
+
+```julia
+pkg = Base.identify_package("SparseArrays")
+withenv("JULIA_WAIT_FOR_TRACY" => 1, "TRACY_PORT" => 9001) do
+    Base.compilecache(pkg)
+end
+```
+
+Here, we use a custom port for tracy which makes it easier to find the correct client in the Tracy UI to connect to.
+
+### Adding metadata to zones
+
+The various `jl_timing_show_*` and `jl_timing_printf` functions can be used to attach a string (or strings) to a zone. For example, the trace zone for inference shows the method instance that is being inferred.
+
+The `TracyCZoneColor` function can be used to set the color of a certain zone. Search through the codebase to see how it is used.
+
+### Viewing Tracy files in your browser
+
+Visit https://topolarity.github.io/trace-viewer/ for an (experimental) web viewer for Tracy traces.
+
+You can open a local `.tracy` file or provide a URL from the web (e.g. a file in a Github repo). If you load a trace file from the web, you can also share the page URL directly with others, enabling them to view the same trace.
+
+### Enabling stack trace samples
+
+To enable call stack sampling in Tracy, build Julia with these options in your `Make.user` file:
+```
+WITH_TRACY := 1
+WITH_TRACY_CALLSTACKS := 1
+USE_BINARYBUILDER_LIBTRACYCLIENT := 0
+```
+
+You may also need to run `make -C deps clean-libtracyclient` to force a re-build of Tracy.
+
+This feature has a significant impact on trace size and profiling overhead, so it is recommended to leave call stack sampling off when possible, especially if you intend to share your trace files online.
+
+Note that the Julia JIT runtime does not yet have integration for Tracy's symbolification, so Julia functions will typically be unknown in these stack traces.
+
+## Intel VTune (ITTAPI) Profiler
+
+*This section is yet to be written.*
diff --git a/doc/src/devdocs/functions.md b/doc/src/devdocs/functions.md
index 13f863cd26d81..283f63b2d0dce 100644
--- a/doc/src/devdocs/functions.md
+++ b/doc/src/devdocs/functions.md
@@ -48,7 +48,7 @@ jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t nargs);
 
 Given the above dispatch process, conceptually all that is needed to add a new method is (1) a
 tuple type, and (2) code for the body of the method. `jl_method_def` implements this operation.
-`jl_first_argument_datatype` is called to extract the relevant method table from what would be
+`jl_method_table_for` is called to extract the relevant method table from what would be
 the type of the first argument. This is much more complicated than the corresponding procedure
 during dispatch, since the argument tuple type might be abstract. For example, we can define:
 
@@ -141,9 +141,9 @@ but works reasonably well.
 
 ## Keyword arguments
 
-Keyword arguments work by associating a special, hidden function object with each method table
-that has definitions with keyword arguments. This function is called the "keyword argument sorter"
-or "keyword sorter", or "kwsorter", and is stored in the `kwsorter` field of `MethodTable` objects.
+Keyword arguments work by adding methods to the kwcall function. This function
+is usually the "keyword argument sorter" or "keyword sorter", which then calls
+the inner body of the function (defined anonymously).
 Every definition in the kwsorter function has the same arguments as some definition in the normal
 method table, except with a single `NamedTuple` argument prepended, which gives
 the names and values of passed keyword arguments. The kwsorter's job is to move keyword arguments
@@ -220,10 +220,10 @@ circle((0,0), 1.0, color = red; other...)
 is lowered to:
 
 ```julia
-kwfunc(circle)(merge((color = red,), other), circle, (0,0), 1.0)
+kwcall(merge((color = red,), other), circle, (0,0), 1.0)
 ```
 
- `kwfunc` (also in`Core`) fetches the kwsorter for the called function.
+ `kwcall` (also in`Core`) denotes a kwcall signature and dispatch.
 The keyword splatting operation (written as `other...`) calls the named tuple `merge` function.
 This function further unpacks each *element* of `other`, expecting each one to contain two values
 (a symbol and a value).
diff --git a/doc/src/devdocs/gc.md b/doc/src/devdocs/gc.md
new file mode 100644
index 0000000000000..942535f426b34
--- /dev/null
+++ b/doc/src/devdocs/gc.md
@@ -0,0 +1,78 @@
+# Garbage Collection in Julia
+
+## Introduction
+
+Julia has a serial, stop-the-world, generational, non-moving mark-sweep garbage collector.
+Native objects are precisely scanned and foreign ones are conservatively marked.
+
+## Memory layout of objects and GC bits
+
+An opaque tag is stored in the front of GC managed objects, and its lowest two bits are
+used for garbage collection.  The lowest bit is set for marked objects and the second
+lowest bit stores age information (e.g. it's only set for old objects).
+
+Objects are aligned by a multiple of 4 bytes to ensure this pointer tagging is legal.
+
+## Pool allocation
+
+Sufficiently small objects (up to 2032 bytes) are allocated on per-thread object
+pools.
+
+A three-level tree (analogous to a three-level page-table) is used to keep metadata
+(e.g. whether a page has been allocated, whether contains marked objects, number of free objects etc.)
+about address ranges spanning at least one page.
+Sweeping a pool allocated object consists of inserting it back into the free list
+maintained by its pool.
+
+## Malloc'd arrays and big objects
+
+Two lists are used to keep track of the remaining allocated objects:
+one for sufficiently large malloc'd arrays (`mallocarray_t`) and one for
+sufficiently large objects (`bigval_t`).
+
+Sweeping these objects consists of unlinking them from their list and calling `free` on the
+corresponding address.
+
+## Generational and remembered sets
+
+Field writes into old objects trigger a write barrier if the written field
+points to a young object and if a write barrier has not been triggered on the old object yet.
+In this case, the old object being written to is enqueued into a remembered set, and
+its mark bit is set to indicate that a write barrier has already been triggered on it.
+
+There is no explicit flag to determine whether a marking pass will scan the
+entire heap or only through young objects and remembered set.
+The mark bits of the objects themselves are used to determine whether a full mark happens.
+The mark-sweep algorithm follows this sequence of steps:
+
+- Objects in the remembered set have their GC mark bits reset
+(these are set once write barrier is triggered, as described above) and are enqueued.
+
+- Roots (e.g. thread locals) are enqueued.
+
+- Object graph is traversed and mark bits are set.
+
+- Object pools, malloc'd arrays and big objects are sweeped. On a full sweep,
+the mark bits of all marked objects are reset. On a generational sweep,
+only the mark bits of marked young objects are reset.
+
+- Mark bits of objects in the remembered set are set,
+so we don't trigger the write barrier on them again.
+
+After these stages, old objects will be left with their mark bits set,
+so that references from them are not explored in a subsequent generational collection.
+This scheme eliminates the need of explicitly keeping a flag to indicate a full mark
+(though a flag to indicate a full sweep is necessary).
+
+## Heuristics
+
+GC heuristics tune the GC by changing the size of the allocation interval between garbage collections.
+
+The GC heuristics measure how big the heap size is after a collection and set the next
+collection according to the algorithm described by https://dl.acm.org/doi/10.1145/3563323,
+in summary, it argues that the heap target should have a square root relationship with the live heap, and that it should also be scaled by how fast the GC is freeing objects and how fast the mutators are allocating.
+The heuristics measure the heap size by counting the number of pages that are in use and the objects that use malloc. Previously we measured the heap size by counting
+the alive objects, but that doesn't take into account fragmentation which could lead to bad decisions, that also meant that we used thread local information (allocations) to make
+decisions about a process wide (when to GC), measuring pages means the decision is global.
+
+The GC will do full collections when the heap size reaches 80% of the maximum allowed size.
diff --git a/doc/src/devdocs/img/compiler_diagram.png b/doc/src/devdocs/img/compiler_diagram.png
new file mode 100644
index 0000000000000..5c4b780bca455
Binary files /dev/null and b/doc/src/devdocs/img/compiler_diagram.png differ
diff --git a/doc/src/devdocs/img/compiler_diagram.svg b/doc/src/devdocs/img/compiler_diagram.svg
new file mode 100644
index 0000000000000..f8fb9172a788f
--- /dev/null
+++ b/doc/src/devdocs/img/compiler_diagram.svg
@@ -0,0 +1 @@
+<svg version="1.1" viewBox="0.0 0.0 960.0 720.0" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg"><clipPath id="p.0"><path d="m0 0l960.0 0l0 720.0l-960.0 0l0 -720.0z" clip-rule="nonzero"/></clipPath><g clip-path="url(#p.0)"><path fill="#000000" fill-opacity="0.0" d="m0 0l960.0 0l0 720.0l-960.0 0z" fill-rule="evenodd"/><path fill="#ffffff" d="m8.0 32.0l888.0 0l0 568.0l-888.0 0z" fill-rule="evenodd"/><path fill="#efefef" d="m326.12335 40.0l561.8898 0l0 552.0l-561.8898 0z" fill-rule="evenodd"/><path fill="#d9d9d9" d="m336.0 104.0l544.0 0l0 160.0l-544.0 0z" fill-rule="evenodd"/><path fill="#9fc5e8" d="m16.0 40.0l208.47244 0l0 552.0l-208.47244 0z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m12.787401 39.368767l215.43307 0l0 64.62992l-215.43307 0z" fill-rule="evenodd"/><path fill="#000000" d="m43.396564 84.20876l0 -26.71875l11.359375 0q4.28125 0 6.21875 0.71875q1.9375 0.71875 3.1093712 2.5625q1.171875 1.84375 1.171875 4.203125q0 3.015625 -1.7812462 4.984375q-1.765625 1.953125 -5.28125 2.453125q1.75 1.03125 2.890625 2.25q1.140625 1.21875 3.0781212 4.328125l3.25 5.21875l-6.453121 0l-3.890625 -5.8125q-2.078125 -3.125 -2.84375 -3.921875q-0.765625 -0.8125 -1.625 -1.109375q-0.859375 -0.3125 -2.71875 -0.3125l-1.09375 0l0 11.15625l-5.390625 0zm5.390625 -15.421875l4.0 0q3.875 0 4.84375 -0.328125q0.96875 -0.328125 1.515625 -1.125q0.546875 -0.8125 0.546875 -2.0q0 -1.359375 -0.71875 -2.1875q-0.71875 -0.828125 -2.03125 -1.046875q-0.65625 -0.09375 -3.9375 -0.09375l-4.21875 0l0 6.78125zm34.254055 15.421875l0 -2.890625q-1.0625 1.546875 -2.78125 2.4375q-1.71875 0.890625 -3.640625 0.890625q-1.953125 0 -3.5 -0.859375q-1.546875 -0.859375 -2.25 -2.40625q-0.6875 -1.546875 -0.6875 -4.28125l0 -12.25l5.125 0l0 8.890625q0 4.09375 0.28125 5.015625q0.28125 0.921875 1.03125 1.453125q0.75 0.53125 1.90625 0.53125q1.3125 0 2.34375 -0.71875q1.046875 -0.71875 1.421875 -1.78125q0.390625 -1.0625 0.390625 -5.21875l0 -8.171875l5.109375 0l0 19.359375l-4.75 0zm27.660889 0l-5.109375 0l0 -9.875q0 -3.140625 -0.328125 -4.0625q-0.328125 -0.921875 -1.078125 -1.421875q-0.734375 -0.515625 -1.765625 -0.515625q-1.34375 0 -2.40625 0.734375q-1.046875 0.71875 -1.4375 1.921875q-0.390625 1.203125 -0.390625 4.453125l0 8.765625l-5.125 0l0 -19.359375l4.765625 0l0 2.84375q2.53125 -3.28125 6.375 -3.28125q1.6875 0 3.09375 0.609375q1.40625 0.609375 2.125 1.5625q0.71875 0.953125 1.0 2.15625q0.28125 1.203125 0.28125 3.4375l0 12.03125zm24.45372 -19.359375l0 4.078125l-3.5 0l0 7.8125q0 2.359375 0.09375 2.765625q0.09375 0.390625 0.453125 0.640625q0.359375 0.25 0.875 0.25q0.703125 0 2.046875 -0.484375l0.4375 3.96875q-1.78125 0.765625 -4.046875 0.765625q-1.375 0 -2.5 -0.453125q-1.109375 -0.46875 -1.625 -1.203125q-0.515625 -0.75 -0.71875 -2.0q-0.15625 -0.90625 -0.15625 -3.609375l0 -8.453125l-2.359375 0l0 -4.078125l2.359375 0l0 -3.84375l5.140625 -2.984375l0 6.828125l3.5 0zm3.5399323 -2.625l0 -4.734375l5.125 0l0 4.734375l-5.125 0zm0 21.984375l0 -19.359375l5.125 0l0 19.359375l-5.125 0zm9.995956 -19.359375l4.71875 0l0 2.640625q2.53125 -3.078125 6.03125 -3.078125q1.859375 0 3.21875 0.765625q1.375 0.765625 2.25 2.3125q1.28125 -1.546875 2.75 -2.3125q1.484375 -0.765625 3.15625 -0.765625q2.140625 0 3.609375 0.875q1.484375 0.859375 2.203125 2.53125q0.53125 1.25 0.53125 4.015625l0 12.375l-5.125 0l0 -11.0625q0 -2.875 -0.53125 -3.71875q-0.703125 -1.09375 -2.1875 -1.09375q-1.0625 0 -2.015625 0.65625q-0.953125 0.65625 -1.375 1.921875q-0.40625 1.265625 -0.40625 4.0l0 9.296875l-5.125 0l0 -10.609375q0 -2.828125 -0.28125 -3.640625q-0.265625 -0.828125 -0.84375 -1.21875q-0.578125 -0.40625 -1.5625 -0.40625q-1.171875 0 -2.125 0.640625q-0.953125 0.640625 -1.359375 1.84375q-0.40625 1.203125 -0.40625 3.984375l0 9.40625l-5.125 0l0 -19.359375zm44.78444 13.203125l5.09375 0.859375q-0.96875 2.796875 -3.09375 4.265625q-2.125 1.46875 -5.3125 1.46875q-5.0625 0 -7.484375 -3.296875q-1.90625 -2.640625 -1.90625 -6.671875q0 -4.8125 2.515625 -7.53125q2.515625 -2.734375 6.359375 -2.734375q4.3125 0 6.8125 2.859375q2.5 2.84375 2.390625 8.734375l-12.828125 0q0.046875 2.28125 1.234375 3.546875q1.1875 1.265625 2.953125 1.265625q1.203125 0 2.015625 -0.65625q0.828125 -0.65625 1.25 -2.109375zm0.296875 -5.1875q-0.0625 -2.21875 -1.15625 -3.375q-1.09375 -1.15625 -2.65625 -1.15625q-1.6875 0 -2.78125 1.21875q-1.09375 1.21875 -1.0625 3.3125l7.65625 0z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m328.0 39.37008l560.0 0l0 64.62992l-560.0 0z" fill-rule="evenodd"/><path fill="#000000" d="m510.6244 74.38195l5.234375 1.65625q-1.203125 4.375 -4.0 6.5q-2.796875 2.125 -7.109375 2.125q-5.3125 0 -8.75 -3.625q-3.421875 -3.640625 -3.421875 -9.953125q0 -6.671875 3.4375 -10.359375q3.453125 -3.6875 9.0625 -3.6875q4.90625 0 7.96875 2.890625q1.828125 1.71875 2.734375 4.921875l-5.34375 1.28125q-0.46875 -2.078125 -1.96875 -3.28125q-1.5 -1.203125 -3.65625 -1.203125q-2.96875 0 -4.828125 2.140625q-1.84375 2.125 -1.84375 6.890625q0 5.078125 1.8125 7.234375q1.828125 2.140625 4.75 2.140625q2.15625 0 3.703125 -1.359375q1.546875 -1.375 2.21875 -4.3125zm8.644653 -0.125q0 -2.546875 1.25 -4.9375q1.265625 -2.390625 3.5625 -3.640625q2.3125 -1.265625 5.15625 -1.265625q4.390625 0 7.1875 2.859375q2.8125 2.84375 2.8125 7.203125q0 4.390625 -2.84375 7.28125q-2.828125 2.890625 -7.125 2.890625q-2.65625 0 -5.078125 -1.203125q-2.40625 -1.203125 -3.671875 -3.515625q-1.25 -2.328125 -1.25 -5.671875zm5.25 0.28125q0 2.875 1.359375 4.40625q1.375 1.53125 3.375 1.53125q2.0 0 3.359375 -1.53125q1.359375 -1.53125 1.359375 -4.453125q0 -2.84375 -1.359375 -4.375q-1.359375 -1.53125 -3.359375 -1.53125q-2.0 0 -3.375 1.53125q-1.359375 1.53125 -1.359375 4.421875zm18.348389 -9.6875l4.71875 0l0 2.640625q2.53125 -3.078125 6.03125 -3.078125q1.859375 0 3.21875 0.765625q1.375 0.765625 2.25 2.3125q1.28125 -1.546875 2.75 -2.3125q1.484375 -0.765625 3.15625 -0.765625q2.140625 0 3.609375 0.875q1.484375 0.859375 2.203125 2.53125q0.53125 1.25 0.53125 4.015625l0 12.375l-5.125 0l0 -11.0625q0 -2.875 -0.53125 -3.71875q-0.703125 -1.09375 -2.1875 -1.09375q-1.0625 0 -2.015625 0.65625q-0.953125 0.65625 -1.375 1.921875q-0.40625 1.265625 -0.40625 4.0l0 9.296875l-5.125 0l0 -10.609375q0 -2.828125 -0.28125 -3.640625q-0.265625 -0.828125 -0.84375 -1.21875q-0.578125 -0.40625 -1.5625 -0.40625q-1.171875 0 -2.125 0.640625q-0.953125 0.640625 -1.359375 1.84375q-0.40625 1.203125 -0.40625 3.984375l0 9.40625l-5.125 0l0 -19.359375zm33.42511 0l4.78125 0l0 2.84375q0.921875 -1.453125 2.5 -2.359375q1.59375 -0.921875 3.53125 -0.921875q3.375 0 5.71875 2.640625q2.359375 2.640625 2.359375 7.375q0 4.84375 -2.375 7.53125q-2.375 2.6875 -5.734375 2.6875q-1.609375 0 -2.921875 -0.640625q-1.296875 -0.640625 -2.734375 -2.1875l0 9.75l-5.125 0l0 -26.71875zm5.0625 9.359375q0 3.25 1.296875 4.8125q1.296875 1.5625 3.15625 1.5625q1.78125 0 2.96875 -1.421875q1.1875 -1.4375 1.1875 -4.703125q0 -3.046875 -1.21875 -4.515625q-1.21875 -1.484375 -3.03125 -1.484375q-1.875 0 -3.125 1.453125q-1.234375 1.453125 -1.234375 4.296875zm17.879639 -11.984375l0 -4.734375l5.125 0l0 4.734375l-5.125 0zm0 21.984375l0 -19.359375l5.125 0l0 19.359375l-5.125 0zm10.370911 0l0 -26.71875l5.125 0l0 26.71875l-5.125 0zm21.589722 -6.15625l5.09375 0.859375q-0.96875 2.796875 -3.09375 4.265625q-2.125 1.46875 -5.3125 1.46875q-5.0625 0 -7.484375 -3.296875q-1.90625 -2.640625 -1.90625 -6.671875q0 -4.8125 2.515625 -7.53125q2.515625 -2.734375 6.359375 -2.734375q4.3125 0 6.8125 2.859375q2.5 2.84375 2.390625 8.734375l-12.828125 0q0.046875 2.28125 1.234375 3.546875q1.1875 1.265625 2.953125 1.265625q1.203125 0 2.015625 -0.65625q0.828125 -0.65625 1.25 -2.109375zm0.296875 -5.1875q-0.0625 -2.21875 -1.15625 -3.375q-1.09375 -1.15625 -2.65625 -1.15625q-1.6875 0 -2.78125 1.21875q-1.09375 1.21875 -1.0625 3.3125l7.65625 0zm28.506104 -8.015625l0 4.078125l-3.5 0l0 7.8125q0 2.359375 0.09375 2.765625q0.09375 0.390625 0.453125 0.640625q0.359375 0.25 0.875 0.25q0.703125 0 2.046875 -0.484375l0.4375 3.96875q-1.78125 0.765625 -4.046875 0.765625q-1.375 0 -2.5 -0.453125q-1.109375 -0.46875 -1.625 -1.203125q-0.515625 -0.75 -0.71875 -2.0q-0.15625 -0.90625 -0.15625 -3.609375l0 -8.453125l-2.359375 0l0 -4.078125l2.359375 0l0 -3.84375l5.140625 -2.984375l0 6.828125l3.5 0zm3.539917 -2.625l0 -4.734375l5.125 0l0 4.734375l-5.125 0zm0 21.984375l0 -19.359375l5.125 0l0 19.359375l-5.125 0zm9.995972 -19.359375l4.71875 0l0 2.640625q2.53125 -3.078125 6.03125 -3.078125q1.859375 0 3.21875 0.765625q1.375 0.765625 2.25 2.3125q1.28125 -1.546875 2.75 -2.3125q1.484375 -0.765625 3.15625 -0.765625q2.140625 0 3.609375 0.875q1.484375 0.859375 2.203125 2.53125q0.53125 1.25 0.53125 4.015625l0 12.375l-5.125 0l0 -11.0625q0 -2.875 -0.53125 -3.71875q-0.703125 -1.09375 -2.1875 -1.09375q-1.0625 0 -2.015625 0.65625q-0.953125 0.65625 -1.375 1.921875q-0.40625 1.265625 -0.40625 4.0l0 9.296875l-5.125 0l0 -10.609375q0 -2.828125 -0.28125 -3.640625q-0.265625 -0.828125 -0.84375 -1.21875q-0.578125 -0.40625 -1.5625 -0.40625q-1.171875 0 -2.125 0.640625q-0.953125 0.640625 -1.359375 1.84375q-0.40625 1.203125 -0.40625 3.984375l0 9.40625l-5.125 0l0 -19.359375zm44.784424 13.203125l5.09375 0.859375q-0.96875 2.796875 -3.09375 4.265625q-2.125 1.46875 -5.3125 1.46875q-5.0625 0 -7.484375 -3.296875q-1.90625 -2.640625 -1.90625 -6.671875q0 -4.8125 2.515625 -7.53125q2.515625 -2.734375 6.359375 -2.734375q4.3125 0 6.8125 2.859375q2.5 2.84375 2.390625 8.734375l-12.828125 0q0.046875 2.28125 1.234375 3.546875q1.1875 1.265625 2.953125 1.265625q1.203125 0 2.015625 -0.65625q0.828125 -0.65625 1.25 -2.109375zm0.296875 -5.1875q-0.0625 -2.21875 -1.15625 -3.375q-1.09375 -1.15625 -2.65625 -1.15625q-1.6875 0 -2.78125 1.21875q-1.09375 1.21875 -1.0625 3.3125l7.65625 0z" fill-rule="nonzero"/><path fill="#cfe2f3" d="m68.7874 143.99869l104.0 0l0 80.09448l-104.0 0z" fill-rule="evenodd"/><path fill="#000000" d="m88.978676 179.68593l0 -12.40625l8.375 0l0 1.46875l-6.734375 0l0 3.84375l5.828125 0l0 1.453125l-5.828125 0l0 5.640625l-1.640625 0zm16.194092 0l0 -1.3125q-1.046875 1.515625 -2.84375 1.515625q-0.796875 0 -1.5 -0.296875q-0.6875 -0.3125 -1.015625 -0.765625q-0.328125 -0.46875 -0.46875 -1.140625q-0.09375 -0.4375 -0.09375 -1.421875l0 -5.5625l1.515625 0l0 4.984375q0 1.1875 0.09375 1.609375q0.15625 0.59375 0.609375 0.9375q0.46875 0.34375 1.15625 0.34375q0.6875 0 1.28125 -0.34375q0.609375 -0.359375 0.859375 -0.953125q0.25 -0.609375 0.25 -1.765625l0 -4.8125l1.515625 0l0 8.984375l-1.359375 0zm3.74646 0l0 -8.984375l1.375 0l0 1.28125q0.984375 -1.484375 2.859375 -1.484375q0.8125 0 1.484375 0.296875q0.6875 0.28125 1.015625 0.765625q0.34375 0.46875 0.484375 1.125q0.078125 0.421875 0.078125 1.46875l0 5.53125l-1.515625 0l0 -5.46875q0 -0.921875 -0.1875 -1.375q-0.171875 -0.46875 -0.625 -0.75q-0.453125 -0.28125 -1.0625 -0.28125q-0.96875 0 -1.671875 0.625q-0.703125 0.609375 -0.703125 2.34375l0 4.90625l-1.53125 0zm15.49646 -3.296875l1.5 0.203125q-0.234375 1.546875 -1.25 2.421875q-1.015625 0.875 -2.484375 0.875q-1.84375 0 -2.96875 -1.203125q-1.125 -1.203125 -1.125 -3.453125q0 -1.46875 0.484375 -2.546875q0.484375 -1.09375 1.46875 -1.640625q0.984375 -0.546875 2.140625 -0.546875q1.46875 0 2.390625 0.75q0.9375 0.734375 1.203125 2.09375l-1.484375 0.21875q-0.203125 -0.890625 -0.75 -1.34375q-0.53125 -0.46875 -1.296875 -0.46875q-1.140625 0 -1.875 0.828125q-0.71875 0.828125 -0.71875 2.609375q0 1.8125 0.6875 2.640625q0.703125 0.8125 1.828125 0.8125q0.890625 0 1.484375 -0.546875q0.609375 -0.5625 0.765625 -1.703125zm6.13282 1.9375l0.21875 1.34375q-0.640625 0.140625 -1.15625 0.140625q-0.828125 0 -1.28125 -0.265625q-0.45313263 -0.265625 -0.6406326 -0.6875q-0.1875 -0.4375 -0.1875 -1.796875l0 -5.171875l-1.125 0l0 -1.1875l1.125 0l0 -2.21875l1.5156326 -0.921875l0 3.140625l1.53125 0l0 1.1875l-1.53125 0l0 5.25q0 0.65625 0.078125 0.84375q0.078125 0.1875 0.25 0.296875q0.1875 0.109375 0.53125 0.109375q0.25 0 0.671875 -0.0625zm1.5018158 -9.296875l0 -1.75l1.515625 0l0 1.75l-1.515625 0zm0 10.65625l0 -8.984375l1.515625 0l0 8.984375l-1.515625 0zm3.271637 -4.5q0 -2.484375 1.390625 -3.6875q1.15625 -1.0 2.8125 -1.0q1.859375 0 3.03125 1.21875q1.1875 1.203125 1.1875 3.34375q0 1.734375 -0.53125 2.734375q-0.515625 1.0 -1.515625 1.546875q-0.984375 0.546875 -2.171875 0.546875q-1.875 0 -3.046875 -1.203125q-1.15625 -1.21875 -1.15625 -3.5zm1.5625 0q0 1.734375 0.75 2.59375q0.75 0.859375 1.890625 0.859375q1.140625 0 1.890625 -0.859375q0.75 -0.875 0.75 -2.640625q0 -1.65625 -0.75 -2.515625q-0.75 -0.859375 -1.890625 -0.859375q-1.140625 0 -1.890625 0.859375q-0.75 0.84375 -0.75 2.5625zm8.637085 4.5l0 -8.984375l1.375 0l0 1.28125q0.984375 -1.484375 2.859375 -1.484375q0.8125 0 1.484375 0.296875q0.6875 0.28125 1.015625 0.765625q0.34375 0.46875 0.484375 1.125q0.078125 0.421875 0.078125 1.46875l0 5.53125l-1.515625 0l0 -5.46875q0 -0.921875 -0.1875 -1.375q-0.171875 -0.46875 -0.625 -0.75q-0.453125 -0.28125 -1.0625 -0.28125q-0.96875 0 -1.671875 0.625q-0.703125 0.609375 -0.703125 2.34375l0 4.90625l-1.53125 0z" fill-rule="nonzero"/><path fill="#000000" d="m116.04968 196.34218l1.640625 0.40625q-0.515625 2.03125 -1.859375 3.09375q-1.34375 1.0625 -3.28125 1.0625q-2.0 0 -3.265625 -0.8125q-1.25 -0.828125 -1.90625 -2.375q-0.65625 -1.546875 -0.65625 -3.328125q0 -1.9375 0.734375 -3.375q0.75 -1.453125 2.109375 -2.1875q1.375 -0.75 3.015625 -0.75q1.859375 0 3.125 0.953125q1.28125 0.9375 1.765625 2.65625l-1.609375 0.375q-0.4375 -1.34375 -1.265625 -1.953125q-0.8125 -0.625 -2.046875 -0.625q-1.4375 0 -2.40625 0.6875q-0.953125 0.671875 -1.34375 1.828125q-0.375 1.15625 -0.375 2.390625q0 1.578125 0.453125 2.765625q0.46875 1.171875 1.4375 1.765625q0.96875 0.578125 2.109375 0.578125q1.375 0 2.328125 -0.796875q0.953125 -0.796875 1.296875 -2.359375zm9.326324 3.234375q-0.84375 0.71875 -1.625 1.015625q-0.78125 0.296875 -1.671875 0.296875q-1.484375 0 -2.28125 -0.71875q-0.796875 -0.734375 -0.796875 -1.859375q0 -0.65625 0.296875 -1.203125q0.3125 -0.546875 0.796875 -0.875q0.484375 -0.328125 1.09375 -0.5q0.4375 -0.109375 1.34375 -0.21875q1.84375 -0.21875 2.71875 -0.53125q0.015625 -0.3125 0.015625 -0.390625q0 -0.9375 -0.4375 -1.3125q-0.578125 -0.515625 -1.734375 -0.515625q-1.078125 0 -1.59375 0.375q-0.5 0.375 -0.75 1.328125l-1.484375 -0.203125q0.203125 -0.953125 0.65625 -1.53125q0.46875 -0.59375 1.34375 -0.90625q0.890625 -0.328125 2.046875 -0.328125q1.15625 0 1.875 0.28125q0.71875 0.265625 1.046875 0.671875q0.34375 0.40625 0.484375 1.03125q0.078125 0.390625 0.078125 1.40625l0 2.03125q0 2.125 0.09375 2.6875q0.09375 0.5625 0.390625 1.078125l-1.59375 0q-0.234375 -0.46875 -0.3125 -1.109375zm-0.125 -3.40625q-0.828125 0.34375 -2.484375 0.578125q-0.9375 0.140625 -1.328125 0.3125q-0.390625 0.171875 -0.609375 0.5q-0.203125 0.3125 -0.203125 0.71875q0 0.609375 0.453125 1.015625q0.46875 0.40625 1.359375 0.40625q0.875 0 1.5625 -0.390625q0.6875 -0.390625 1.015625 -1.046875q0.234375 -0.515625 0.234375 -1.53125l0 -0.5625zm3.87146 4.515625l0 -12.40625l1.515625 0l0 12.40625l-1.515625 0zm3.849762 0l0 -12.40625l1.515625 0l0 12.40625l-1.515625 0z" fill-rule="nonzero"/><path fill="#cfe2f3" d="m68.7874 303.9042l104.0 0l0 80.09448l-104.0 0z" fill-rule="evenodd"/><path fill="#000000" d="m93.17009 339.59143l0 -12.40625l2.46875 0l2.9375 8.78125q0.40625 1.234375 0.59375 1.84375q0.21875 -0.6875 0.65625 -2.0l2.984375 -8.625l2.203125 0l0 12.40625l-1.578125 0l0 -10.375l-3.609375 10.375l-1.484375 0l-3.578125 -10.5625l0 10.5625l-1.59375 0zm20.450089 -2.890625l1.5625 0.1875q-0.359375 1.390625 -1.375 2.15625q-1.0 0.75 -2.5625 0.75q-1.984375 0 -3.140625 -1.203125q-1.140625 -1.21875 -1.140625 -3.421875q0 -2.265625 1.15625 -3.515625q1.171875 -1.25 3.03125 -1.25q1.8125 0 2.953125 1.234375q1.140625 1.21875 1.140625 3.4375q0 0.140625 -0.015625 0.40625l-6.703125 0q0.09375 1.484375 0.84375 2.28125q0.75 0.78125 1.875 0.78125q0.84375 0 1.4375 -0.4375q0.59375 -0.4375 0.9375 -1.40625zm-5.0 -2.46875l5.015625 0q-0.109375 -1.125 -0.578125 -1.703125q-0.734375 -0.875 -1.890625 -0.875q-1.046875 0 -1.765625 0.703125q-0.703125 0.703125 -0.78125 1.875zm11.80896 4.0l0.21875 1.34375q-0.640625 0.140625 -1.15625 0.140625q-0.828125 0 -1.28125 -0.265625q-0.453125 -0.265625 -0.640625 -0.6875q-0.1875 -0.4375 -0.1875 -1.796875l0 -5.171875l-1.125 0l0 -1.1875l1.125 0l0 -2.21875l1.515625 -0.921875l0 3.140625l1.53125 0l0 1.1875l-1.53125 0l0 5.25q0 0.65625 0.078125 0.84375q0.078125 0.1875 0.25 0.296875q0.1875 0.109375 0.53125 0.109375q0.25 0 0.671875 -0.0625zm1.4861908 1.359375l0 -12.40625l1.53125 0l0 4.453125q1.0625 -1.234375 2.6875 -1.234375q1.0 0 1.734375 0.390625q0.734375 0.390625 1.046875 1.09375q0.328125 0.6875 0.328125 2.015625l0 5.6875l-1.53125 0l0 -5.6875q0 -1.15625 -0.5 -1.671875q-0.484375 -0.515625 -1.390625 -0.515625q-0.6875 0 -1.28125 0.359375q-0.59375 0.34375 -0.84375 0.953125q-0.25 0.59375 -0.25 1.640625l0 4.921875l-1.53125 0zm9.074585 -4.5q0 -2.484375 1.390625 -3.6875q1.15625 -1.0 2.8125 -1.0q1.859375 0 3.03125 1.21875q1.1875 1.203125 1.1875 3.34375q0 1.734375 -0.53125 2.734375q-0.515625 1.0 -1.515625 1.546875q-0.984375 0.546875 -2.171875 0.546875q-1.875 0 -3.046875 -1.203125q-1.15625 -1.21875 -1.15625 -3.5zm1.5625 0q0 1.734375 0.75 2.59375q0.75 0.859375 1.890625 0.859375q1.140625 0 1.890625 -0.859375q0.75 -0.875 0.75 -2.640625q0 -1.65625 -0.75 -2.515625q-0.75 -0.859375 -1.890625 -0.859375q-1.140625 0 -1.890625 0.859375q-0.75 0.84375 -0.75 2.5625zm14.46521 4.5l0 -1.140625q-0.84375 1.34375 -2.515625 1.34375q-1.0625 0 -1.96875 -0.59375q-0.90625 -0.59375 -1.40625 -1.65625q-0.484375 -1.0625 -0.484375 -2.4375q0 -1.34375 0.4375 -2.4375q0.453125 -1.09375 1.34375 -1.671875q0.90625 -0.59375 2.015625 -0.59375q0.8125 0 1.4375 0.34375q0.640625 0.34375 1.046875 0.890625l0 -4.453125l1.515625 0l0 12.40625l-1.421875 0zm-4.8125 -4.484375q0 1.71875 0.71875 2.578125q0.734375 0.859375 1.734375 0.859375q0.984375 0 1.6875 -0.8125q0.703125 -0.828125 0.703125 -2.5q0 -1.84375 -0.71875 -2.703125q-0.703125 -0.859375 -1.75 -0.859375q-1.015625 0 -1.703125 0.828125q-0.671875 0.828125 -0.671875 2.609375z" fill-rule="nonzero"/><path fill="#000000" d="m88.42251 360.59143l0 -12.40625l4.265625 0q1.453125 0 2.203125 0.1875q1.078125 0.234375 1.828125 0.875q0.984375 0.828125 1.46875 2.125q0.484375 1.296875 0.484375 2.953125q0 1.40625 -0.328125 2.5q-0.328125 1.09375 -0.84375 1.8125q-0.515625 0.703125 -1.140625 1.125q-0.609375 0.40625 -1.484375 0.625q-0.859375 0.203125 -1.984375 0.203125l-4.46875 0zm1.640625 -1.46875l2.640625 0q1.234375 0 1.921875 -0.21875q0.703125 -0.234375 1.125 -0.640625q0.578125 -0.59375 0.90625 -1.578125q0.328125 -0.984375 0.328125 -2.390625q0 -1.9375 -0.640625 -2.984375q-0.640625 -1.046875 -1.5625 -1.40625q-0.65625 -0.25 -2.125 -0.25l-2.59375 0l0 9.46875zm10.685699 -9.1875l0 -1.75l1.515625 0l0 1.75l-1.515625 0zm0 10.65625l0 -8.984375l1.515625 0l0 8.984375l-1.515625 0zm3.224762 -2.6875l1.515625 -0.234375q0.125 0.90625 0.703125 1.390625q0.578125 0.484375 1.609375 0.484375q1.0625 0 1.5625 -0.421875q0.515625 -0.4375 0.515625 -1.015625q0 -0.515625 -0.453125 -0.8125q-0.3125 -0.203125 -1.5625 -0.515625q-1.671875 -0.421875 -2.328125 -0.71875q-0.640625 -0.3125 -0.984375 -0.859375q-0.328125 -0.546875 -0.328125 -1.203125q0 -0.609375 0.28125 -1.109375q0.28125 -0.515625 0.75 -0.859375q0.359375 -0.265625 0.96875 -0.4375q0.609375 -0.1875 1.3125 -0.1875q1.0625 0 1.859375 0.3125q0.796875 0.296875 1.171875 0.828125q0.390625 0.515625 0.53125 1.375l-1.484375 0.203125q-0.109375 -0.6875 -0.59375 -1.078125q-0.484375 -0.390625 -1.375 -0.390625q-1.046875 0 -1.5 0.34375q-0.4375 0.34375 -0.4375 0.8125q0 0.296875 0.171875 0.53125q0.1875 0.25 0.59375 0.40625q0.21875 0.09375 1.34375 0.390625q1.609375 0.4375 2.25 0.71875q0.640625 0.265625 1.0 0.796875q0.375 0.515625 0.375 1.296875q0 0.765625 -0.453125 1.4375q-0.4375 0.671875 -1.28125 1.046875q-0.828125 0.359375 -1.890625 0.359375q-1.75 0 -2.671875 -0.71875q-0.921875 -0.734375 -1.171875 -2.171875zm9.2734375 6.125l0 -12.421875l1.390625 0l0 1.171875q0.484375 -0.6875 1.09375 -1.03125q0.625 -0.34375 1.515625 -0.34375q1.140625 0 2.015625 0.59375q0.890625 0.59375 1.328125 1.671875q0.453125 1.078125 0.453125 2.359375q0 1.375 -0.5 2.484375q-0.484375 1.109375 -1.4375 1.703125q-0.9375 0.578125 -1.96875 0.578125q-0.765625 0 -1.375 -0.3125q-0.609375 -0.328125 -0.984375 -0.828125l0 4.375l-1.53125 0zm1.375 -7.875q0 1.734375 0.703125 2.5625q0.703125 0.828125 1.703125 0.828125q1.015625 0 1.734375 -0.859375q0.734375 -0.859375 0.734375 -2.65625q0 -1.71875 -0.71875 -2.578125q-0.703125 -0.859375 -1.6875 -0.859375q-0.96875 0 -1.71875 0.921875q-0.75 0.90625 -0.75 2.640625zm14.121468 3.328125q-0.8437576 0.71875 -1.6250076 1.015625q-0.78125 0.296875 -1.671875 0.296875q-1.484375 0 -2.28125 -0.71875q-0.796875 -0.734375 -0.796875 -1.859375q0 -0.65625 0.296875 -1.203125q0.3125 -0.546875 0.796875 -0.875q0.484375 -0.328125 1.09375 -0.5q0.4375 -0.109375 1.34375 -0.21875q1.84375 -0.21875 2.7187576 -0.53125q0.015625 -0.3125 0.015625 -0.390625q0 -0.9375 -0.4375 -1.3125q-0.5781326 -0.515625 -1.7343826 -0.515625q-1.078125 0 -1.59375 0.375q-0.5 0.375 -0.75 1.328125l-1.484375 -0.203125q0.203125 -0.953125 0.65625 -1.53125q0.46875 -0.59375 1.34375 -0.90625q0.890625 -0.328125 2.046875 -0.328125q1.15625 0 1.8750076 0.28125q0.71875 0.265625 1.046875 0.671875q0.34375 0.40625 0.484375 1.03125q0.078125 0.390625 0.078125 1.40625l0 2.03125q0 2.125 0.09375 2.6875q0.09375 0.5625 0.390625 1.078125l-1.59375 0q-0.234375 -0.46875 -0.3125 -1.109375zm-0.125 -3.40625q-0.8281326 0.34375 -2.4843826 0.578125q-0.9375 0.140625 -1.328125 0.3125q-0.390625 0.171875 -0.609375 0.5q-0.203125 0.3125 -0.203125 0.71875q0 0.609375 0.453125 1.015625q0.46875 0.40625 1.359375 0.40625q0.875 0 1.5625 -0.390625q0.6875076 -0.390625 1.0156326 -1.046875q0.234375 -0.515625 0.234375 -1.53125l0 -0.5625zm7.230835 3.15625l0.21875 1.34375q-0.640625 0.140625 -1.15625 0.140625q-0.828125 0 -1.28125 -0.265625q-0.453125 -0.265625 -0.640625 -0.6875q-0.1875 -0.4375 -0.1875 -1.796875l0 -5.171875l-1.125 0l0 -1.1875l1.125 0l0 -2.21875l1.515625 -0.921875l0 3.140625l1.53125 0l0 1.1875l-1.53125 0l0 5.25q0 0.65625 0.078125 0.84375q0.078125 0.1875 0.25 0.296875q0.1875 0.109375 0.53125 0.109375q0.25 0 0.671875 -0.0625zm7.345566 -1.9375l1.5 0.203125q-0.234375 1.546875 -1.25 2.421875q-1.015625 0.875 -2.484375 0.875q-1.84375 0 -2.96875 -1.203125q-1.125 -1.203125 -1.125 -3.453125q0 -1.46875 0.484375 -2.546875q0.484375 -1.09375 1.46875 -1.640625q0.984375 -0.546875 2.140625 -0.546875q1.46875 0 2.390625 0.75q0.9375 0.734375 1.203125 2.09375l-1.484375 0.21875q-0.203125 -0.890625 -0.75 -1.34375q-0.53125 -0.46875 -1.296875 -0.46875q-1.140625 0 -1.875 0.828125q-0.71875 0.828125 -0.71875 2.609375q0 1.8125 0.6875 2.640625q0.703125 0.8125 1.828125 0.8125q0.890625 0 1.484375 -0.546875q0.609375 -0.5625 0.765625 -1.703125zm2.8046875 3.296875l0 -12.40625l1.53125 0l0 4.453125q1.0625 -1.234375 2.6875 -1.234375q1.0 0 1.734375 0.390625q0.734375 0.390625 1.046875 1.09375q0.328125 0.6875 0.328125 2.015625l0 5.6875l-1.53125 0l0 -5.6875q0 -1.15625 -0.5 -1.671875q-0.484375 -0.515625 -1.390625 -0.515625q-0.6875 0 -1.28125 0.359375q-0.59375 0.34375 -0.84375 0.953125q-0.25 0.59375 -0.25 1.640625l0 4.921875l-1.53125 0z" fill-rule="nonzero"/><path fill="#cfe2f3" d="m68.7874 463.9042l104.0 0l0 80.09445l-104.0 0z" fill-rule="evenodd"/><path fill="#000000" d="m110.26236 495.24768l1.640625 0.40625q-0.515625 2.03125 -1.859375 3.09375q-1.34375 1.0625 -3.28125 1.0625q-2.0 0 -3.265625 -0.8125q-1.25 -0.828125 -1.90625 -2.375q-0.65625 -1.546875 -0.65625 -3.328125q0 -1.9375 0.734375 -3.375q0.75 -1.453125 2.109375 -2.1875q1.375 -0.75 3.015625 -0.75q1.859375 0 3.125 0.953125q1.28125 0.9375 1.765625 2.65625l-1.609375 0.375q-0.4375 -1.34375 -1.265625 -1.953125q-0.8125 -0.625 -2.046875 -0.625q-1.4375 0 -2.40625 0.6875q-0.953125 0.671875 -1.34375 1.828125q-0.375 1.15625 -0.375 2.390625q0 1.578125 0.453125 2.765625q0.46875 1.171875 1.4375 1.765625q0.96875 0.578125 2.109375 0.578125q1.375 0 2.328125 -0.796875q0.953125 -0.796875 1.296875 -2.359375zm2.9044495 -0.15625q0 -2.484375 1.390625 -3.6875q1.15625 -1.0 2.8125 -1.0q1.859375 0 3.03125 1.21875q1.1875 1.203125 1.1875 3.34375q0 1.734375 -0.53125 2.734375q-0.515625 1.0 -1.515625 1.546875q-0.984375 0.546875 -2.171875 0.546875q-1.875 0 -3.046875 -1.203125q-1.15625 -1.21875 -1.15625 -3.5zm1.5625 0q0 1.734375 0.75 2.59375q0.75 0.859375 1.890625 0.859375q1.140625 0 1.890625 -0.859375q0.75 -0.875 0.75 -2.640625q0 -1.65625 -0.75 -2.515625q-0.75 -0.859375 -1.890625 -0.859375q-1.140625 0 -1.890625 0.859375q-0.75 0.84375 -0.75 2.5625zm14.46521 4.5l0 -1.140625q-0.84375 1.34375 -2.515625 1.34375q-1.0625 0 -1.96875 -0.59375q-0.90625 -0.59375 -1.40625 -1.65625q-0.484375 -1.0625 -0.484375 -2.4375q0 -1.34375 0.4375 -2.4375q0.453125 -1.09375 1.34375 -1.671875q0.90625 -0.59375 2.015625 -0.59375q0.8125 0 1.4375 0.34375q0.640625 0.34375 1.046875 0.890625l0 -4.453125l1.515625 0l0 12.40625l-1.421875 0zm-4.8125 -4.484375q0 1.71875 0.71875 2.578125q0.734375 0.859375 1.734375 0.859375q0.984375 0 1.6875 -0.8125q0.703125 -0.828125 0.703125 -2.5q0 -1.84375 -0.71875 -2.703125q-0.703125 -0.859375 -1.75 -0.859375q-1.015625 0 -1.703125 0.828125q-0.671875 0.828125 -0.671875 2.609375zm14.77771 1.59375l1.5625 0.1875q-0.359375 1.390625 -1.375 2.15625q-1.0 0.75 -2.5625 0.75q-1.984375 0 -3.140625 -1.203125q-1.140625 -1.21875 -1.140625 -3.421875q0 -2.265625 1.15625 -3.515625q1.171875 -1.25 3.03125 -1.25q1.8125 0 2.953125 1.234375q1.140625 1.21875 1.140625 3.4375q0 0.140625 -0.015625 0.40625l-6.703125 0q0.09375 1.484375 0.84375 2.28125q0.75 0.78125 1.875 0.78125q0.84375 0 1.4375 -0.4375q0.59375 -0.4375 0.9375 -1.40625zm-5.0 -2.46875l5.015625 0q-0.109375 -1.125 -0.578125 -1.703125q-0.734375 -0.875 -1.890625 -0.875q-1.046875 0 -1.765625 0.703125q-0.703125 0.703125 -0.78125 1.875z" fill-rule="nonzero"/><path fill="#000000" d="m86.52466 520.59143l0 -12.40625l8.96875 0l0 1.46875l-7.328125 0l0 3.796875l6.859375 0l0 1.453125l-6.859375 0l0 4.21875l7.609375 0l0 1.46875l-9.25 0zm10.307724 0l3.28125 -4.671875l-3.03125 -4.3125l1.90625 0l1.375 2.109375q0.390625 0.59375 0.625 1.0q0.375 -0.5625 0.6875 -0.984375l1.515625 -2.125l1.8125 0l-3.109375 4.234375l3.34375 4.75l-1.859375 0l-1.84375 -2.796875l-0.5 -0.75l-2.359375 3.546875l-1.84375 0zm15.8359375 -2.890625l1.5625 0.1875q-0.359375 1.390625 -1.375 2.15625q-1.0 0.75 -2.5625 0.75q-1.984375 0 -3.140625 -1.203125q-1.140625 -1.21875 -1.140625 -3.421875q0 -2.265625 1.15625 -3.515625q1.171875 -1.25 3.03125 -1.25q1.8125 0 2.953125 1.234375q1.140625 1.21875 1.140625 3.4375q0 0.140625 -0.015625 0.40625l-6.703125 0q0.09375 1.484375 0.84375 2.28125q0.75 0.78125 1.875 0.78125q0.84375 0 1.4375 -0.4375q0.59375 -0.4375 0.9375 -1.40625zm-5.0 -2.46875l5.015625 0q-0.109375 -1.125 -0.578125 -1.703125q-0.734375 -0.875 -1.890625 -0.875q-1.046875 0 -1.765625 0.703125q-0.703125 0.703125 -0.78125 1.875zm14.34021 2.0625l1.5 0.203125q-0.234375 1.546875 -1.25 2.421875q-1.015625 0.875 -2.484375 0.875q-1.84375 0 -2.96875 -1.203125q-1.125 -1.203125 -1.125 -3.453125q0 -1.46875 0.484375 -2.546875q0.484375 -1.09375 1.46875 -1.640625q0.984375 -0.546875 2.140625 -0.546875q1.46875 0 2.390625 0.75q0.9375 0.734375 1.203125 2.09375l-1.484375 0.21875q-0.203125 -0.890625 -0.75 -1.34375q-0.53125 -0.46875 -1.296875 -0.46875q-1.140625 0 -1.875 0.828125q-0.71875 0.828125 -0.71875 2.609375q0 1.8125 0.6875 2.640625q0.703125 0.8125 1.828125 0.8125q0.890625 0 1.484375 -0.546875q0.609375 -0.5625 0.765625 -1.703125zm8.6953125 3.296875l0 -1.3125q-1.046875 1.515625 -2.84375 1.515625q-0.796875 0 -1.5 -0.296875q-0.6875 -0.3125 -1.015625 -0.765625q-0.328125 -0.46875 -0.46875 -1.140625q-0.09375 -0.4375 -0.09375 -1.421875l0 -5.5625l1.515625 0l0 4.984375q0 1.1875 0.09375 1.609375q0.15625 0.59375 0.609375 0.9375q0.46875 0.34375 1.15625 0.34375q0.6875 0 1.28125 -0.34375q0.609375 -0.359375 0.859375 -0.953125q0.25 -0.609375 0.25 -1.765625l0 -4.8125l1.515625 0l0 8.984375l-1.359375 0zm7.074585 -1.359375l0.21875 1.34375q-0.640625 0.140625 -1.15625 0.140625q-0.828125 0 -1.28125 -0.265625q-0.453125 -0.265625 -0.640625 -0.6875q-0.1875 -0.4375 -0.1875 -1.796875l0 -5.171875l-1.125 0l0 -1.1875l1.125 0l0 -2.21875l1.515625 -0.921875l0 3.140625l1.53125 0l0 1.1875l-1.53125 0l0 5.25q0 0.65625 0.078125 0.84375q0.078125 0.1875 0.25 0.296875q0.1875 0.109375 0.53125 0.109375q0.25 0 0.671875 -0.0625zm7.642441 -1.53125l1.5625 0.1875q-0.359375 1.390625 -1.375 2.15625q-1.0 0.75 -2.5625 0.75q-1.984375 0 -3.140625 -1.203125q-1.140625 -1.21875 -1.140625 -3.421875q0 -2.265625 1.15625 -3.515625q1.171875 -1.25 3.03125 -1.25q1.8125 0 2.953125 1.234375q1.140625 1.21875 1.140625 3.4375q0 0.140625 -0.015625 0.40625l-6.703125 0q0.09375 1.484375 0.84375 2.28125q0.75 0.78125 1.875 0.78125q0.84375 0 1.4375 -0.4375q0.59375 -0.4375 0.9375 -1.40625zm-5.0 -2.46875l5.015625 0q-0.109375 -1.125 -0.578125 -1.703125q-0.734375 -0.875 -1.890625 -0.875q-1.046875 0 -1.765625 0.703125q-0.703125 0.703125 -0.78125 1.875zm7.87146 2.671875l1.515625 -0.234375q0.125 0.90625 0.703125 1.390625q0.578125 0.484375 1.609375 0.484375q1.0625 0 1.5625 -0.421875q0.515625 -0.4375 0.515625 -1.015625q0 -0.515625 -0.453125 -0.8125q-0.3125 -0.203125 -1.5625 -0.515625q-1.671875 -0.421875 -2.328125 -0.71875q-0.640625 -0.3125 -0.984375 -0.859375q-0.328125 -0.546875 -0.328125 -1.203125q0 -0.609375 0.28125 -1.109375q0.28125 -0.515625 0.75 -0.859375q0.359375 -0.265625 0.96875 -0.4375q0.609375 -0.1875 1.3125 -0.1875q1.0625 0 1.859375 0.3125q0.796875 0.296875 1.171875 0.828125q0.390625 0.515625 0.53125 1.375l-1.484375 0.203125q-0.109375 -0.6875 -0.59375 -1.078125q-0.484375 -0.390625 -1.375 -0.390625q-1.046875 0 -1.5 0.34375q-0.4375 0.34375 -0.4375 0.8125q0 0.296875 0.171875 0.53125q0.1875 0.25 0.59375 0.40625q0.21875 0.09375 1.34375 0.390625q1.609375 0.4375 2.25 0.71875q0.640625 0.265625 1.0 0.796875q0.375 0.515625 0.375 1.296875q0 0.765625 -0.453125 1.4375q-0.4375 0.671875 -1.28125 1.046875q-0.828125 0.359375 -1.890625 0.359375q-1.75 0 -2.671875 -0.71875q-0.921875 -0.734375 -1.171875 -2.171875z" fill-rule="nonzero"/><path fill="#cfe2f3" d="m372.6929 144.0l83.3071 0l0 80.09448l-83.3071 0z" fill-rule="evenodd"/><path fill="#000000" d="m396.17365 190.96724l5.125 -13.359375l1.90625 0l5.46875 13.359375l-2.015625 0l-1.546875 -4.046875l-5.59375 0l-1.46875 4.046875l-1.875 0zm3.859375 -5.484375l4.53125 0l-1.40625 -3.703125q-0.625 -1.6875 -0.9375 -2.765625q-0.265625 1.28125 -0.71875 2.546875l-1.46875 3.921875zm9.459198 1.1875l1.65625 -0.140625q0.125 1.0 0.546875 1.640625q0.4375 0.640625 1.34375 1.046875q0.921875 0.390625 2.0625 0.390625q1.0 0 1.78125 -0.296875q0.78125 -0.296875 1.15625 -0.8125q0.375 -0.53125 0.375 -1.15625q0 -0.625 -0.375 -1.09375q-0.359375 -0.46875 -1.1875 -0.796875q-0.546875 -0.203125 -2.390625 -0.640625q-1.828125 -0.453125 -2.5625 -0.84375q-0.96875 -0.5 -1.4375 -1.234375q-0.46875 -0.75 -0.46875 -1.671875q0 -1.0 0.578125 -1.875q0.578125 -0.890625 1.671875 -1.34375q1.109375 -0.453125 2.453125 -0.453125q1.484375 0 2.609375 0.484375q1.140625 0.46875 1.75 1.40625q0.609375 0.921875 0.65625 2.09375l-1.6875 0.125q-0.140625 -1.265625 -0.9375 -1.90625q-0.78125 -0.65625 -2.3125 -0.65625q-1.609375 0 -2.34375 0.59375q-0.734375 0.59375 -0.734375 1.421875q0 0.71875 0.53125 1.171875q0.5 0.46875 2.65625 0.96875q2.15625 0.484375 2.953125 0.84375q1.171875 0.53125 1.71875 1.359375q0.5625 0.828125 0.5625 1.90625q0 1.0625 -0.609375 2.015625q-0.609375 0.9375 -1.75 1.46875q-1.140625 0.515625 -2.578125 0.515625q-1.8125 0 -3.046875 -0.53125q-1.21875 -0.53125 -1.921875 -1.59375q-0.6875 -1.0625 -0.71875 -2.40625zm16.443573 4.296875l0 -11.78125l-4.40625 0l0 -1.578125l10.578125 0l0 1.578125l-4.40625 0l0 11.78125l-1.765625 0z" fill-rule="nonzero"/><path fill="#cfe2f3" d="m572.69293 143.90552l83.30707 0l0 80.09448l-83.30707 0z" fill-rule="evenodd"/><path fill="#000000" d="m596.17365 190.87276l5.125 -13.359375l1.90625 0l5.46875 13.359375l-2.015625 0l-1.546875 -4.046875l-5.59375 0l-1.46875 4.046875l-1.875 0zm3.859375 -5.484375l4.53125 0l-1.40625 -3.703125q-0.625 -1.6875 -0.9375 -2.765625q-0.265625 1.28125 -0.71875 2.546875l-1.46875 3.921875zm9.4591675 1.1875l1.65625 -0.140625q0.125 1.0 0.546875 1.640625q0.4375 0.640625 1.34375 1.046875q0.921875 0.390625 2.0625 0.390625q1.0 0 1.78125 -0.296875q0.78125 -0.296875 1.15625 -0.8125q0.375 -0.53125 0.375 -1.15625q0 -0.625 -0.375 -1.09375q-0.359375 -0.46875 -1.1875 -0.796875q-0.546875 -0.203125 -2.390625 -0.640625q-1.828125 -0.453125 -2.5625 -0.84375q-0.96875 -0.5 -1.4375 -1.234375q-0.46875 -0.75 -0.46875 -1.671875q0 -1.0 0.578125 -1.875q0.578125 -0.890625 1.671875 -1.34375q1.109375 -0.453125 2.453125 -0.453125q1.484375 0 2.609375 0.484375q1.140625 0.46875 1.75 1.40625q0.609375 0.921875 0.65625 2.09375l-1.6875 0.125q-0.140625 -1.265625 -0.9375 -1.90625q-0.78125 -0.65625 -2.3125 -0.65625q-1.609375 0 -2.34375 0.59375q-0.734375 0.59375 -0.734375 1.421875q0 0.71875 0.53125 1.171875q0.5 0.46875 2.65625 0.96875q2.15625 0.484375 2.953125 0.84375q1.171875 0.53125 1.71875 1.359375q0.5625 0.828125 0.5625 1.90625q0 1.0625 -0.609375 2.015625q-0.609375 0.9375 -1.75 1.46875q-1.140625 0.515625 -2.578125 0.515625q-1.8125 0 -3.046875 -0.53125q-1.21875 -0.53125 -1.921875 -1.59375q-0.6875 -1.0625 -0.71875 -2.40625zm16.443604 4.296875l0 -11.78125l-4.40625 0l0 -1.578125l10.578125 0l0 1.578125l-4.40625 0l0 11.78125l-1.765625 0z" fill-rule="nonzero"/><path fill="#cfe2f3" d="m768.0 143.90552l83.30707 0l0 80.09448l-83.30707 0z" fill-rule="evenodd"/><path fill="#000000" d="m780.9415 175.57588l1.65625 -0.140625q0.125 1.0 0.546875 1.640625q0.4375 0.640625 1.34375 1.046875q0.921875 0.390625 2.0625 0.390625q1.0 0 1.78125 -0.296875q0.78125 -0.296875 1.15625 -0.8125q0.375 -0.53125 0.375 -1.15625q0 -0.625 -0.375 -1.09375q-0.359375 -0.46875 -1.1875 -0.796875q-0.546875 -0.203125 -2.390625 -0.640625q-1.828125 -0.453125 -2.5625 -0.84375q-0.96875 -0.5 -1.4375 -1.234375q-0.46875 -0.75 -0.46875 -1.671875q0 -1.0 0.578125 -1.875q0.578125 -0.890625 1.671875 -1.34375q1.109375 -0.453125 2.453125 -0.453125q1.484375 0 2.609375 0.484375q1.140625 0.46875 1.75 1.40625q0.609375 0.921875 0.65625 2.09375l-1.6875 0.125q-0.140625 -1.265625 -0.9375 -1.90625q-0.78125 -0.65625 -2.3125 -0.65625q-1.609375 0 -2.34375 0.59375q-0.734375 0.59375 -0.734375 1.421875q0 0.71875 0.53125 1.171875q0.5 0.46875 2.65625 0.96875q2.15625 0.484375 2.953125 0.84375q1.171875 0.53125 1.71875 1.359375q0.5625 0.828125 0.5625 1.90625q0 1.0625 -0.609375 2.015625q-0.609375 0.9375 -1.75 1.46875q-1.140625 0.515625 -2.578125 0.515625q-1.8125 0 -3.046875 -0.53125q-1.21875 -0.53125 -1.921875 -1.59375q-0.6875 -1.0625 -0.71875 -2.40625zm12.2248535 -0.546875q0 -2.6875 1.484375 -3.96875q1.25 -1.078125 3.046875 -1.078125q2.0 0 3.265625 1.3125q1.265625 1.296875 1.265625 3.609375q0 1.859375 -0.5625 2.9375q-0.5625 1.0625 -1.640625 1.65625q-1.0625 0.59375 -2.328125 0.59375q-2.03125 0 -3.28125 -1.296875q-1.25 -1.3125 -1.25 -3.765625zm1.6875 0q0 1.859375 0.796875 2.796875q0.8125 0.921875 2.046875 0.921875q1.21875 0 2.03125 -0.921875q0.8125 -0.9375 0.8125 -2.84375q0 -1.796875 -0.8125 -2.71875q-0.8125 -0.921875 -2.03125 -0.921875q-1.234375 0 -2.046875 0.921875q-0.796875 0.90625 -0.796875 2.765625zm15.625671 4.84375l0 -1.421875q-1.125 1.640625 -3.0625 1.640625q-0.859375 0 -1.609375 -0.328125q-0.734375 -0.328125 -1.09375 -0.828125q-0.359375 -0.5 -0.5 -1.21875q-0.109375 -0.46875 -0.109375 -1.53125l0 -5.984375l1.640625 0l0 5.359375q0 1.28125 0.109375 1.734375q0.15625 0.640625 0.65625 1.015625q0.5 0.375 1.234375 0.375q0.734375 0 1.375 -0.375q0.65625 -0.390625 0.921875 -1.03125q0.265625 -0.65625 0.265625 -1.890625l0 -5.1875l1.640625 0l0 9.671875l-1.46875 0zm4.0319824 0l0 -9.671875l1.46875 0l0 1.46875q0.5625 -1.03125 1.03125 -1.359375q0.484375 -0.328125 1.0625 -0.328125q0.828125 0 1.6875 0.53125l-0.5625 1.515625q-0.609375 -0.359375 -1.203125 -0.359375q-0.546875 0 -0.96875 0.328125q-0.421875 0.328125 -0.609375 0.890625q-0.28125 0.875 -0.28125 1.921875l0 5.0625l-1.625 0zm12.5408325 -3.546875l1.609375 0.21875q-0.265625 1.65625 -1.359375 2.609375q-1.078125 0.9375 -2.671875 0.9375q-1.984375 0 -3.1875 -1.296875q-1.203125 -1.296875 -1.203125 -3.71875q0 -1.578125 0.515625 -2.75q0.515625 -1.171875 1.578125 -1.75q1.0625 -0.59375 2.3125 -0.59375q1.578125 0 2.578125 0.796875q1.0 0.796875 1.28125 2.265625l-1.59375 0.234375q-0.234375 -0.96875 -0.8125 -1.453125q-0.578125 -0.5 -1.390625 -0.5q-1.234375 0 -2.015625 0.890625q-0.78125 0.890625 -0.78125 2.8125q0 1.953125 0.75 2.84375q0.75 0.875 1.953125 0.875q0.96875 0 1.609375 -0.59375q0.65625 -0.59375 0.828125 -1.828125zm9.640625 0.4375l1.6875 0.203125q-0.40625 1.484375 -1.484375 2.3125q-1.078125 0.8125 -2.765625 0.8125q-2.125 0 -3.375 -1.296875q-1.234375 -1.3125 -1.234375 -3.671875q0 -2.453125 1.25 -3.796875q1.265625 -1.34375 3.265625 -1.34375q1.9375 0 3.15625 1.328125q1.234375 1.3125 1.234375 3.703125q0 0.15625 0 0.4375l-7.21875 0q0.09375 1.59375 0.90625 2.453125q0.8125 0.84375 2.015625 0.84375q0.90625 0 1.546875 -0.46875q0.640625 -0.484375 1.015625 -1.515625zm-5.390625 -2.65625l5.40625 0q-0.109375 -1.21875 -0.625 -1.828125q-0.78125 -0.953125 -2.03125 -0.953125q-1.125 0 -1.90625 0.765625q-0.765625 0.75 -0.84375 2.015625z" fill-rule="nonzero"/><path fill="#000000" d="m798.32227 197.18526l1.765625 0.453125q-0.5625 2.171875 -2.0 3.328125q-1.4375 1.140625 -3.53125 1.140625q-2.15625 0 -3.515625 -0.875q-1.34375 -0.890625 -2.0625 -2.546875q-0.703125 -1.671875 -0.703125 -3.59375q0 -2.078125 0.796875 -3.625q0.796875 -1.5625 2.265625 -2.359375q1.484375 -0.8125 3.25 -0.8125q2.0 0 3.359375 1.015625q1.375 1.015625 1.90625 2.875l-1.734375 0.40625q-0.46875 -1.453125 -1.359375 -2.109375q-0.875 -0.671875 -2.203125 -0.671875q-1.546875 0 -2.578125 0.734375q-1.03125 0.734375 -1.453125 1.984375q-0.421875 1.234375 -0.421875 2.5625q0 1.703125 0.5 2.96875q0.5 1.265625 1.546875 1.90625q1.046875 0.625 2.265625 0.625q1.484375 0 2.515625 -0.859375q1.03125 -0.859375 1.390625 -2.546875zm3.1291504 -0.15625q0 -2.6875 1.484375 -3.96875q1.25 -1.078125 3.046875 -1.078125q2.0 0 3.265625 1.3125q1.265625 1.296875 1.265625 3.609375q0 1.859375 -0.5625 2.9375q-0.5625 1.0625 -1.640625 1.65625q-1.0625 0.59375 -2.328125 0.59375q-2.03125 0 -3.28125 -1.296875q-1.25 -1.3125 -1.25 -3.765625zm1.6875 0q0 1.859375 0.796875 2.796875q0.8125 0.921875 2.046875 0.921875q1.21875 0 2.03125 -0.921875q0.8125 -0.9375 0.8125 -2.84375q0 -1.796875 -0.8125 -2.71875q-0.8125 -0.921875 -2.03125 -0.921875q-1.234375 0 -2.046875 0.921875q-0.796875 0.90625 -0.796875 2.765625zm15.563232 4.84375l0 -1.21875q-0.90625 1.4375 -2.703125 1.4375q-1.15625 0 -2.125 -0.640625q-0.96875 -0.640625 -1.5 -1.78125q-0.53125 -1.140625 -0.53125 -2.625q0 -1.453125 0.484375 -2.625q0.484375 -1.1875 1.4375 -1.8125q0.96875 -0.625 2.171875 -0.625q0.875 0 1.546875 0.375q0.6875 0.359375 1.109375 0.953125l0 -4.796875l1.640625 0l0 13.359375l-1.53125 0zm-5.171875 -4.828125q0 1.859375 0.78125 2.78125q0.78125 0.921875 1.84375 0.921875q1.078125 0 1.828125 -0.875q0.75 -0.890625 0.75 -2.6875q0 -1.984375 -0.765625 -2.90625q-0.765625 -0.9375 -1.890625 -0.9375q-1.078125 0 -1.8125 0.890625q-0.734375 0.890625 -0.734375 2.8125zm15.906982 1.71875l1.6875 0.203125q-0.40625 1.484375 -1.484375 2.3125q-1.078125 0.8125 -2.765625 0.8125q-2.125 0 -3.375 -1.296875q-1.234375 -1.3125 -1.234375 -3.671875q0 -2.453125 1.25 -3.796875q1.265625 -1.34375 3.265625 -1.34375q1.9375 0 3.15625 1.328125q1.234375 1.3125 1.234375 3.703125q0 0.15625 0 0.4375l-7.21875 0q0.09375 1.59375 0.90625 2.453125q0.8125 0.84375 2.015625 0.84375q0.90625 0 1.546875 -0.46875q0.640625 -0.484375 1.015625 -1.515625zm-5.390625 -2.65625l5.40625 0q-0.109375 -1.21875 -0.625 -1.828125q-0.78125 -0.953125 -2.03125 -0.953125q-1.125 0 -1.90625 0.765625q-0.765625 0.75 -0.84375 2.015625z" fill-rule="nonzero"/><path fill="#cfe2f3" d="m372.6929 304.0l83.3071 0l0 80.09448l-83.3071 0z" fill-rule="evenodd"/><path fill="#000000" d="m406.75272 350.96725l0 -13.359375l1.765625 0l0 13.359375l-1.765625 0zm4.9176636 0l0 -13.359375l5.921875 0q1.78125 0 2.703125 0.359375q0.9375 0.359375 1.484375 1.28125q0.5625 0.90625 0.5625 2.015625q0 1.40625 -0.921875 2.390625q-0.921875 0.96875 -2.84375 1.234375q0.703125 0.34375 1.078125 0.671875q0.765625 0.703125 1.453125 1.765625l2.328125 3.640625l-2.21875 0l-1.765625 -2.78125q-0.78125 -1.203125 -1.28125 -1.828125q-0.5 -0.640625 -0.90625 -0.890625q-0.390625 -0.265625 -0.796875 -0.359375q-0.296875 -0.078125 -0.984375 -0.078125l-2.046875 0l0 5.9375l-1.765625 0zm1.765625 -7.453125l3.796875 0q1.21875 0 1.890625 -0.25q0.6875 -0.265625 1.046875 -0.8125q0.359375 -0.546875 0.359375 -1.1875q0 -0.953125 -0.6875 -1.5625q-0.6875 -0.609375 -2.1875 -0.609375l-4.21875 0l0 4.421875z" fill-rule="nonzero"/><path fill="#cfe2f3" d="m572.69293 304.0l83.30707 0l0 80.09448l-83.30707 0z" fill-rule="evenodd"/><path fill="#000000" d="m606.7527 350.96725l0 -13.359375l1.765625 0l0 13.359375l-1.765625 0zm4.9177246 0l0 -13.359375l5.921875 0q1.78125 0 2.703125 0.359375q0.9375 0.359375 1.484375 1.28125q0.5625 0.90625 0.5625 2.015625q0 1.40625 -0.921875 2.390625q-0.921875 0.96875 -2.84375 1.234375q0.703125 0.34375 1.078125 0.671875q0.765625 0.703125 1.453125 1.765625l2.328125 3.640625l-2.21875 0l-1.765625 -2.78125q-0.78125 -1.203125 -1.28125 -1.828125q-0.5 -0.640625 -0.90625 -0.890625q-0.390625 -0.265625 -0.796875 -0.359375q-0.296875 -0.078125 -0.984375 -0.078125l-2.046875 0l0 5.9375l-1.765625 0zm1.765625 -7.453125l3.796875 0q1.21875 0 1.890625 -0.25q0.6875 -0.265625 1.046875 -0.8125q0.359375 -0.546875 0.359375 -1.1875q0 -0.953125 -0.6875 -1.5625q-0.6875 -0.609375 -2.1875 -0.609375l-4.21875 0l0 4.421875z" fill-rule="nonzero"/><path fill="#cfe2f3" d="m768.0 304.0l83.30707 0l0 80.09448l-83.30707 0z" fill-rule="evenodd"/><path fill="#000000" d="m789.1349 343.88913q-1.359375 -1.703125 -2.296875 -4.0q-0.9375 -2.296875 -0.9375 -4.765625q0 -2.15625 0.703125 -4.140625q0.828125 -2.3125 2.53125 -4.59375l1.171875 0q-1.09375 1.890625 -1.453125 2.703125q-0.546875 1.25 -0.875 2.625q-0.390625 1.703125 -0.390625 3.421875q0 4.375 2.71875 8.75l-1.171875 0zm2.6970215 -8.21875l1.65625 -0.140625q0.125 1.0 0.546875 1.640625q0.4375 0.640625 1.34375 1.046875q0.921875 0.390625 2.0625 0.390625q1.0 0 1.78125 -0.296875q0.78125 -0.296875 1.15625 -0.8125q0.375 -0.53125 0.375 -1.15625q0 -0.625 -0.375 -1.09375q-0.359375 -0.46875 -1.1875 -0.796875q-0.546875 -0.203125 -2.390625 -0.640625q-1.828125 -0.453125 -2.5625 -0.84375q-0.96875 -0.5 -1.4375 -1.234375q-0.46875 -0.75 -0.46875 -1.671875q0 -1.0 0.578125 -1.875q0.578125 -0.890625 1.671875 -1.34375q1.109375 -0.453125 2.453125 -0.453125q1.484375 0 2.609375 0.484375q1.140625 0.46875 1.75 1.40625q0.609375 0.921875 0.65625 2.09375l-1.6875 0.125q-0.140625 -1.265625 -0.9375 -1.90625q-0.78125 -0.65625 -2.3125 -0.65625q-1.609375 0 -2.34375 0.59375q-0.734375 0.59375 -0.734375 1.421875q0 0.71875 0.53125 1.171875q0.5 0.46875 2.65625 0.96875q2.15625 0.484375 2.953125 0.84375q1.171875 0.53125 1.71875 1.359375q0.5625 0.828125 0.5625 1.90625q0 1.0625 -0.609375 2.015625q-0.609375 0.9375 -1.75 1.46875q-1.140625 0.515625 -2.578125 0.515625q-1.8125 0 -3.046875 -0.53125q-1.21875 -0.53125 -1.921875 -1.59375q-0.6875 -1.0625 -0.71875 -2.40625zm12.4436035 0l1.65625 -0.140625q0.125 1.0 0.546875 1.640625q0.4375 0.640625 1.34375 1.046875q0.921875 0.390625 2.0625 0.390625q1.0 0 1.78125 -0.296875q0.78125 -0.296875 1.15625 -0.8125q0.375 -0.53125 0.375 -1.15625q0 -0.625 -0.375 -1.09375q-0.359375 -0.46875 -1.1875 -0.796875q-0.546875 -0.203125 -2.390625 -0.640625q-1.828125 -0.453125 -2.5625 -0.84375q-0.96875 -0.5 -1.4375 -1.234375q-0.46875 -0.75 -0.46875 -1.671875q0 -1.0 0.578125 -1.875q0.578125 -0.890625 1.671875 -1.34375q1.109375 -0.453125 2.453125 -0.453125q1.484375 0 2.609375 0.484375q1.140625 0.46875 1.75 1.40625q0.609375 0.921875 0.65625 2.09375l-1.6875 0.125q-0.140625 -1.265625 -0.9375 -1.90625q-0.78125 -0.65625 -2.3125 -0.65625q-1.609375 0 -2.34375 0.59375q-0.734375 0.59375 -0.734375 1.421875q0 0.71875 0.53125 1.171875q0.5 0.46875 2.65625 0.96875q2.15625 0.484375 2.953125 0.84375q1.171875 0.53125 1.71875 1.359375q0.5625 0.828125 0.5625 1.90625q0 1.0625 -0.609375 2.015625q-0.609375 0.9375 -1.75 1.46875q-1.140625 0.515625 -2.578125 0.515625q-1.8125 0 -3.046875 -0.53125q-1.21875 -0.53125 -1.921875 -1.59375q-0.6875 -1.0625 -0.71875 -2.40625zm11.5685425 4.296875l5.125 -13.359375l1.90625 0l5.46875 13.359375l-2.015625 0l-1.546875 -4.046875l-5.59375 0l-1.46875 4.046875l-1.875 0zm3.859375 -5.484375l4.53125 0l-1.40625 -3.703125q-0.625 -1.6875 -0.9375 -2.765625q-0.265625 1.28125 -0.71875 2.546875l-1.46875 3.921875zm10.9279785 9.40625l-1.1875 0q2.734375 -4.375 2.734375 -8.75q0 -1.71875 -0.390625 -3.390625q-0.3125 -1.375 -0.875 -2.625q-0.359375 -0.828125 -1.46875 -2.734375l1.1875 0q1.703125 2.28125 2.53125 4.59375q0.6875 1.984375 0.6875 4.140625q0 2.46875 -0.9375 4.765625q-0.9375 2.296875 -2.28125 4.0z" fill-rule="nonzero"/><path fill="#000000" d="m802.0598 361.96725l0 -13.359375l1.765625 0l0 13.359375l-1.765625 0zm4.9176636 0l0 -13.359375l5.921875 0q1.78125 0 2.703125 0.359375q0.9375 0.359375 1.484375 1.28125q0.5625 0.90625 0.5625 2.015625q0 1.40625 -0.921875 2.390625q-0.921875 0.96875 -2.84375 1.234375q0.703125 0.34375 1.078125 0.671875q0.765625 0.703125 1.453125 1.765625l2.328125 3.640625l-2.21875 0l-1.765625 -2.78125q-0.78125 -1.203125 -1.28125 -1.828125q-0.5 -0.640625 -0.90625 -0.890625q-0.390625 -0.265625 -0.796875 -0.359375q-0.296875 -0.078125 -0.984375 -0.078125l-2.046875 0l0 5.9375l-1.765625 0zm1.765625 -7.453125l3.796875 0q1.21875 0 1.890625 -0.25q0.6875 -0.265625 1.046875 -0.8125q0.359375 -0.546875 0.359375 -1.1875q0 -0.953125 -0.6875 -1.5625q-0.6875 -0.609375 -2.1875 -0.609375l-4.21875 0l0 4.421875z" fill-rule="nonzero"/><path fill="#cfe2f3" d="m768.0 463.90552l83.30707 0l0 80.09448l-83.30707 0z" fill-rule="evenodd"/><path fill="#000000" d="m789.1349 503.79462q-1.359375 -1.703125 -2.296875 -4.0q-0.9375 -2.296875 -0.9375 -4.765625q0 -2.15625 0.703125 -4.140625q0.828125 -2.3125 2.53125 -4.59375l1.171875 0q-1.09375 1.890625 -1.453125 2.703125q-0.546875 1.25 -0.875 2.625q-0.390625 1.703125 -0.390625 3.421875q0 4.375 2.71875 8.75l-1.171875 0zm2.6970215 -8.21875l1.65625 -0.140625q0.125 1.0 0.546875 1.640625q0.4375 0.640625 1.34375 1.046875q0.921875 0.390625 2.0625 0.390625q1.0 0 1.78125 -0.296875q0.78125 -0.296875 1.15625 -0.8125q0.375 -0.53125 0.375 -1.15625q0 -0.625 -0.375 -1.09375q-0.359375 -0.46875 -1.1875 -0.796875q-0.546875 -0.203125 -2.390625 -0.640625q-1.828125 -0.453125 -2.5625 -0.84375q-0.96875 -0.5 -1.4375 -1.234375q-0.46875 -0.75 -0.46875 -1.671875q0 -1.0 0.578125 -1.875q0.578125 -0.890625 1.671875 -1.34375q1.109375 -0.453125 2.453125 -0.453125q1.484375 0 2.609375 0.484375q1.140625 0.46875 1.75 1.40625q0.609375 0.921875 0.65625 2.09375l-1.6875 0.125q-0.140625 -1.265625 -0.9375 -1.90625q-0.78125 -0.65625 -2.3125 -0.65625q-1.609375 0 -2.34375 0.59375q-0.734375 0.59375 -0.734375 1.421875q0 0.71875 0.53125 1.171875q0.5 0.46875 2.65625 0.96875q2.15625 0.484375 2.953125 0.84375q1.171875 0.53125 1.71875 1.359375q0.5625 0.828125 0.5625 1.90625q0 1.0625 -0.609375 2.015625q-0.609375 0.9375 -1.75 1.46875q-1.140625 0.515625 -2.578125 0.515625q-1.8125 0 -3.046875 -0.53125q-1.21875 -0.53125 -1.921875 -1.59375q-0.6875 -1.0625 -0.71875 -2.40625zm12.4436035 0l1.65625 -0.140625q0.125 1.0 0.546875 1.640625q0.4375 0.640625 1.34375 1.046875q0.921875 0.390625 2.0625 0.390625q1.0 0 1.78125 -0.296875q0.78125 -0.296875 1.15625 -0.8125q0.375 -0.53125 0.375 -1.15625q0 -0.625 -0.375 -1.09375q-0.359375 -0.46875 -1.1875 -0.796875q-0.546875 -0.203125 -2.390625 -0.640625q-1.828125 -0.453125 -2.5625 -0.84375q-0.96875 -0.5 -1.4375 -1.234375q-0.46875 -0.75 -0.46875 -1.671875q0 -1.0 0.578125 -1.875q0.578125 -0.890625 1.671875 -1.34375q1.109375 -0.453125 2.453125 -0.453125q1.484375 0 2.609375 0.484375q1.140625 0.46875 1.75 1.40625q0.609375 0.921875 0.65625 2.09375l-1.6875 0.125q-0.140625 -1.265625 -0.9375 -1.90625q-0.78125 -0.65625 -2.3125 -0.65625q-1.609375 0 -2.34375 0.59375q-0.734375 0.59375 -0.734375 1.421875q0 0.71875 0.53125 1.171875q0.5 0.46875 2.65625 0.96875q2.15625 0.484375 2.953125 0.84375q1.171875 0.53125 1.71875 1.359375q0.5625 0.828125 0.5625 1.90625q0 1.0625 -0.609375 2.015625q-0.609375 0.9375 -1.75 1.46875q-1.140625 0.515625 -2.578125 0.515625q-1.8125 0 -3.046875 -0.53125q-1.21875 -0.53125 -1.921875 -1.59375q-0.6875 -1.0625 -0.71875 -2.40625zm11.5685425 4.296875l5.125 -13.359375l1.90625 0l5.46875 13.359375l-2.015625 0l-1.546875 -4.046875l-5.59375 0l-1.46875 4.046875l-1.875 0zm3.859375 -5.484375l4.53125 0l-1.40625 -3.703125q-0.625 -1.6875 -0.9375 -2.765625q-0.265625 1.28125 -0.71875 2.546875l-1.46875 3.921875zm10.9279785 9.40625l-1.1875 0q2.734375 -4.375 2.734375 -8.75q0 -1.71875 -0.390625 -3.390625q-0.3125 -1.375 -0.875 -2.625q-0.359375 -0.828125 -1.46875 -2.734375l1.1875 0q1.703125 2.28125 2.53125 4.59375q0.6875 1.984375 0.6875 4.140625q0 2.46875 -0.9375 4.765625q-0.9375 2.296875 -2.28125 4.0z" fill-rule="nonzero"/><path fill="#000000" d="m802.0598 521.87274l0 -13.359375l1.765625 0l0 13.359375l-1.765625 0zm4.9176636 0l0 -13.359375l5.921875 0q1.78125 0 2.703125 0.359375q0.9375 0.359375 1.484375 1.28125q0.5625 0.90625 0.5625 2.015625q0 1.40625 -0.921875 2.390625q-0.921875 0.96875 -2.84375 1.234375q0.703125 0.34375 1.078125 0.671875q0.765625 0.703125 1.453125 1.765625l2.328125 3.640625l-2.21875 0l-1.765625 -2.78125q-0.78125 -1.203125 -1.28125 -1.828125q-0.5 -0.640625 -0.90625 -0.890625q-0.390625 -0.265625 -0.796875 -0.359375q-0.296875 -0.078125 -0.984375 -0.078125l-2.046875 0l0 5.9375l-1.765625 0zm1.765625 -7.453125l3.796875 0q1.21875 0 1.890625 -0.25q0.6875 -0.265625 1.046875 -0.8125q0.359375 -0.546875 0.359375 -1.1875q0 -0.953125 -0.6875 -1.5625q-0.6875 -0.609375 -2.1875 -0.609375l-4.21875 0l0 4.421875z" fill-rule="nonzero"/><path fill="#cfe2f3" d="m568.0 463.90552l83.30707 0l0 80.09448l-83.30707 0z" fill-rule="evenodd"/><path fill="#000000" d="m587.3377 499.87274l0 -13.359375l1.78125 0l0 11.78125l6.5625 0l0 1.578125l-8.34375 0zm10.375732 0l0 -13.359375l1.78125 0l0 11.78125l6.5625 0l0 1.578125l-8.34375 0zm12.88092 0l-5.171875 -13.359375l1.921875 0l3.46875 9.703125q0.421875 1.171875 0.703125 2.1875q0.3125 -1.09375 0.71875 -2.1875l3.609375 -9.703125l1.796875 0l-5.234375 13.359375l-1.8125 0zm8.5842285 0l0 -13.359375l2.65625 0l3.15625 9.453125q0.4375 1.328125 0.640625 1.984375q0.234375 -0.734375 0.703125 -2.140625l3.203125 -9.296875l2.375 0l0 13.359375l-1.703125 0l0 -11.171875l-3.875 11.171875l-1.59375 0l-3.859375 -11.375l0 11.375l-1.703125 0z" fill-rule="nonzero"/><path fill="#000000" d="m602.0598 521.87274l0 -13.359375l1.765625 0l0 13.359375l-1.765625 0zm4.9176636 0l0 -13.359375l5.921875 0q1.78125 0 2.703125 0.359375q0.9375 0.359375 1.484375 1.28125q0.5625 0.90625 0.5625 2.015625q0 1.40625 -0.921875 2.390625q-0.921875 0.96875 -2.84375 1.234375q0.703125 0.34375 1.078125 0.671875q0.765625 0.703125 1.453125 1.765625l2.328125 3.640625l-2.21875 0l-1.765625 -2.78125q-0.78125 -1.203125 -1.28125 -1.828125q-0.5 -0.640625 -0.90625 -0.890625q-0.390625 -0.265625 -0.796875 -0.359375q-0.296875 -0.078125 -0.984375 -0.078125l-2.046875 0l0 5.9375l-1.765625 0zm1.765625 -7.453125l3.796875 0q1.21875 0 1.890625 -0.25q0.6875 -0.265625 1.046875 -0.8125q0.359375 -0.546875 0.359375 -1.1875q0 -0.953125 -0.6875 -1.5625q-0.6875 -0.609375 -2.1875 -0.609375l-4.21875 0l0 4.421875z" fill-rule="nonzero"/><path fill="#cfe2f3" d="m372.6929 463.90552l83.3071 0l0 80.09448l-83.3071 0z" fill-rule="evenodd"/><path fill="#000000" d="m389.328 499.87274l0 -13.359375l1.8125 0l7.015625 10.484375l0 -10.484375l1.6875 0l0 13.359375l-1.8125 0l-7.015625 -10.5l0 10.5l-1.6875 0zm19.597961 -1.1875q-0.921875 0.765625 -1.765625 1.09375q-0.828125 0.3125 -1.796875 0.3125q-1.59375 0 -2.453125 -0.78125q-0.859375 -0.78125 -0.859375 -1.984375q0 -0.71875 0.328125 -1.296875q0.328125 -0.59375 0.84375 -0.9375q0.53125 -0.359375 1.1875 -0.546875q0.46875 -0.125 1.453125 -0.25q1.984375 -0.234375 2.921875 -0.5625q0.015625 -0.34375 0.015625 -0.421875q0 -1.0 -0.46875 -1.421875q-0.625 -0.546875 -1.875 -0.546875q-1.15625 0 -1.703125 0.40625q-0.546875 0.40625 -0.8125 1.421875l-1.609375 -0.21875q0.21875 -1.015625 0.71875 -1.640625q0.5 -0.640625 1.453125 -0.984375q0.953125 -0.34375 2.1875 -0.34375q1.25 0 2.015625 0.296875q0.78125 0.28125 1.140625 0.734375q0.375 0.4375 0.515625 1.109375q0.078125 0.421875 0.078125 1.515625l0 2.1875q0 2.28125 0.109375 2.890625q0.109375 0.59375 0.40625 1.15625l-1.703125 0q-0.265625 -0.515625 -0.328125 -1.1875zm-0.140625 -3.671875q-0.890625 0.375 -2.671875 0.625q-1.015625 0.140625 -1.4375 0.328125q-0.421875 0.1875 -0.65625 0.53125q-0.21875 0.34375 -0.21875 0.78125q0 0.65625 0.5 1.09375q0.5 0.4375 1.453125 0.4375q0.9375 0 1.671875 -0.40625q0.75 -0.421875 1.09375 -1.140625q0.265625 -0.5625 0.265625 -1.640625l0 -0.609375zm7.781952 3.390625l0.234375 1.453125q-0.6875 0.140625 -1.234375 0.140625q-0.890625 0 -1.390625 -0.28125q-0.484375 -0.28125 -0.6875 -0.734375q-0.203125 -0.46875 -0.203125 -1.9375l0 -5.578125l-1.203125 0l0 -1.265625l1.203125 0l0 -2.390625l1.625 -0.984375l0 3.375l1.65625 0l0 1.265625l-1.65625 0l0 5.671875q0 0.6875 0.078125 0.890625q0.09375 0.203125 0.28125 0.328125q0.203125 0.109375 0.578125 0.109375q0.265625 0 0.71875 -0.0625zm1.6051941 -10.0l0 -1.890625l1.640625 0l0 1.890625l-1.640625 0zm0 11.46875l0 -9.671875l1.640625 0l0 9.671875l-1.640625 0zm6.832306 0l-3.6875 -9.671875l1.734375 0l2.078125 5.796875q0.328125 0.9375 0.625 1.9375q0.203125 -0.765625 0.609375 -1.828125l2.140625 -5.90625l1.6875 0l-3.65625 9.671875l-1.53125 0zm13.265625 -3.109375l1.6875 0.203125q-0.40625 1.484375 -1.484375 2.3125q-1.078125 0.8125 -2.765625 0.8125q-2.125 0 -3.375 -1.296875q-1.234375 -1.3125 -1.234375 -3.671875q0 -2.453125 1.25 -3.796875q1.265625 -1.34375 3.265625 -1.34375q1.9375 0 3.15625 1.328125q1.234375 1.3125 1.234375 3.703125q0 0.15625 0 0.4375l-7.21875 0q0.09375 1.59375 0.90625 2.453125q0.8125 0.84375 2.015625 0.84375q0.90625 0 1.546875 -0.46875q0.640625 -0.484375 1.015625 -1.515625zm-5.390625 -2.65625l5.40625 0q-0.109375 -1.21875 -0.625 -1.828125q-0.78125 -0.953125 -2.03125 -0.953125q-1.125 0 -1.90625 0.765625q-0.765625 0.75 -0.84375 2.015625z" fill-rule="nonzero"/><path fill="#000000" d="m403.01517 517.18524l1.765625 0.453125q-0.5625 2.171875 -2.0 3.328125q-1.4375 1.140625 -3.53125 1.140625q-2.15625 0 -3.515625 -0.875q-1.34375 -0.890625 -2.0625 -2.546875q-0.703125 -1.671875 -0.703125 -3.59375q0 -2.078125 0.796875 -3.625q0.796875 -1.5625 2.265625 -2.359375q1.484375 -0.8125 3.25 -0.8125q2.0 0 3.359375 1.015625q1.375 1.015625 1.90625 2.875l-1.734375 0.40625q-0.46875 -1.453125 -1.359375 -2.109375q-0.875 -0.671875 -2.203125 -0.671875q-1.546875 0 -2.578125 0.734375q-1.03125 0.734375 -1.453125 1.984375q-0.421875 1.234375 -0.421875 2.5625q0 1.703125 0.5 2.96875q0.5 1.265625 1.546875 1.90625q1.046875 0.625 2.265625 0.625q1.484375 0 2.515625 -0.859375q1.03125 -0.859375 1.390625 -2.546875zm3.129181 -0.15625q0 -2.6875 1.484375 -3.96875q1.25 -1.078125 3.046875 -1.078125q2.0 0 3.265625 1.3125q1.265625 1.296875 1.265625 3.609375q0 1.859375 -0.5625 2.9375q-0.5625 1.0625 -1.640625 1.65625q-1.0625 0.59375 -2.328125 0.59375q-2.03125 0 -3.28125 -1.296875q-1.25 -1.3125 -1.25 -3.765625zm1.6875 0q0 1.859375 0.796875 2.796875q0.8125 0.921875 2.046875 0.921875q1.21875 0 2.03125 -0.921875q0.8125 -0.9375 0.8125 -2.84375q0 -1.796875 -0.8125 -2.71875q-0.8125 -0.921875 -2.03125 -0.921875q-1.234375 0 -2.046875 0.921875q-0.796875 0.90625 -0.796875 2.765625zm15.563232 4.84375l0 -1.21875q-0.90625 1.4375 -2.703125 1.4375q-1.15625 0 -2.125 -0.640625q-0.96875 -0.640625 -1.5 -1.78125q-0.53125 -1.140625 -0.53125 -2.625q0 -1.453125 0.484375 -2.625q0.484375 -1.1875 1.4375 -1.8125q0.96875 -0.625 2.171875 -0.625q0.875 0 1.546875 0.375q0.6875 0.359375 1.109375 0.953125l0 -4.796875l1.640625 0l0 13.359375l-1.53125 0zm-5.171875 -4.828125q0 1.859375 0.78125 2.78125q0.78125 0.921875 1.84375 0.921875q1.078125 0 1.828125 -0.875q0.75 -0.890625 0.75 -2.6875q0 -1.984375 -0.765625 -2.90625q-0.765625 -0.9375 -1.890625 -0.9375q-1.078125 0 -1.8125 0.890625q-0.734375 0.890625 -0.734375 2.8125zm15.906952 1.71875l1.6875 0.203125q-0.40625 1.484375 -1.484375 2.3125q-1.078125 0.8125 -2.765625 0.8125q-2.125 0 -3.375 -1.296875q-1.234375 -1.3125 -1.234375 -3.671875q0 -2.453125 1.25 -3.796875q1.265625 -1.34375 3.265625 -1.34375q1.9375 0 3.15625 1.328125q1.234375 1.3125 1.234375 3.703125q0 0.15625 0 0.4375l-7.21875 0q0.09375 1.59375 0.90625 2.453125q0.8125 0.84375 2.015625 0.84375q0.90625 0 1.546875 -0.46875q0.640625 -0.484375 1.015625 -1.515625zm-5.390625 -2.65625l5.40625 0q-0.109375 -1.21875 -0.625 -1.828125q-0.78125 -0.953125 -2.03125 -0.953125q-1.125 0 -1.90625 0.765625q-0.765625 0.75 -0.84375 2.015625z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m352.0 113.22835l138.55118 0l0 38.771652l-138.55118 0z" fill-rule="evenodd"/><path fill="#38761d" d="m370.14062 131.15085q0.03125 -1.078125 -0.234375 -1.96875q-0.25 -0.890625 -0.75 -1.53125q-0.515625 -0.640625 -1.265625 -0.984375q-0.734375 -0.34375 -1.65625 -0.34375q-1.0625 0 -1.953125 0.46875q-0.875 0.453125 -1.515625 1.265625q-0.65625 0.8125 -1.03125 1.90625q-0.359375 1.078125 -0.40625 2.3125q-0.03125 1.234375 0.203125 2.234375q0.25 1.0 0.796875 1.6875q0.53125 0.703125 1.359375 1.078125q0.828125 0.375 1.96875 0.375q0.265625 0 0.546875 -0.046875q0.296875 -0.03125 0.59375 -0.109375q0.28125 -0.0625 0.546875 -0.171875q0.265625 -0.109375 0.46875 -0.265625l-0.25 -1.046875q-0.1875 0.09375 -0.40625 0.171875q-0.21875 0.078125 -0.46875 0.125q-0.25 0.0625 -0.515625 0.09375q-0.25 0.03125 -0.484375 0.03125q-0.765625 0 -1.359375 -0.25q-0.59375 -0.265625 -0.96875 -0.78125q-0.390625 -0.515625 -0.578125 -1.296875q-0.1875 -0.78125 -0.140625 -1.828125q0.03125 -1.046875 0.28125 -1.90625q0.265625 -0.875 0.71875 -1.515625q0.4375 -0.625 1.078125 -0.984375q0.640625 -0.359375 1.4375 -0.359375q0.703125 0 1.234375 0.21875q0.546875 0.21875 0.90625 0.671875q0.359375 0.453125 0.515625 1.15625q0.171875 0.703125 0.140625 1.65625q0 0.609375 -0.0625 1.140625q-0.0625 0.515625 -0.1875 0.890625q-0.125 0.375 -0.296875 0.59375q-0.171875 0.203125 -0.375 0.203125q-0.078125 0 -0.15625 -0.015625q-0.078125 -0.015625 -0.125 -0.078125q-0.0625 -0.0625 -0.09375 -0.1875q-0.03125 -0.125 -0.015625 -0.359375l0.34375 -4.109375q-0.25 -0.25 -0.640625 -0.421875q-0.390625 -0.171875 -0.984375 -0.171875q-0.59375 0 -1.078125 0.25q-0.46875 0.234375 -0.828125 0.6875q-0.34375 0.46875 -0.578125 1.109375q-0.21875 0.640625 -0.296875 1.4375q-0.0625 0.671875 0.015625 1.234375q0.078125 0.546875 0.296875 0.9375q0.21875 0.40625 0.5625 0.625q0.359375 0.203125 0.828125 0.203125q0.234375 0 0.4375 -0.078125q0.21875 -0.078125 0.40625 -0.203125q0.125 -0.09375 0.234375 -0.21875q0.125 -0.125 0.234375 -0.265625q0.078125 0.171875 0.1875 0.3125q0.109375 0.125 0.25 0.21875q0.15625 0.125 0.359375 0.1875q0.203125 0.0625 0.4375 0.0625q0.59375 0 1.015625 -0.359375q0.4375 -0.359375 0.71875 -0.9375q0.28125 -0.5625 0.421875 -1.28125q0.140625 -0.734375 0.15625 -1.46875zm-5.21875 1.03125q0.0625 -0.5625 0.171875 -0.984375q0.125 -0.421875 0.3125 -0.71875q0.1875 -0.296875 0.453125 -0.4375q0.265625 -0.15625 0.609375 -0.15625q0.09375 0 0.140625 0.015625q0.0625 0 0.09375 0.015625l-0.296875 3.484375l0 0.109375q-0.078125 0.109375 -0.15625 0.1875q-0.0625 0.078125 -0.125 0.125q-0.125 0.09375 -0.25 0.125q-0.109375 0.03125 -0.234375 0.03125q-0.1875 0 -0.34375 -0.109375q-0.140625 -0.125 -0.234375 -0.359375q-0.09375 -0.21875 -0.125 -0.546875q-0.03125 -0.34375 -0.015625 -0.78125zm8.1328125 -3.046875l-1.859375 0l0 8.453125l1.96875 0l0 -6.328125q0.046875 -0.109375 0.09375 -0.1875q0.046875 -0.09375 0.109375 -0.15625q0.078125 -0.078125 0.1875 -0.125q0.109375 -0.046875 0.25 -0.046875q0.125 0 0.234375 0.03125q0.109375 0.03125 0.203125 0.125q0.078125 0.078125 0.125 0.234375q0.046875 0.15625 0.046875 0.40625l0 6.046875l1.90625 0l0 -6.234375q0 -0.09375 0 -0.140625q0 -0.0625 0 -0.078125q0.046875 -0.09375 0.09375 -0.15625q0.0625 -0.0625 0.125 -0.125q0.09375 -0.046875 0.1875 -0.078125q0.09375 -0.03125 0.21875 -0.03125q0.125 0 0.234375 0.03125q0.109375 0.03125 0.203125 0.125q0.078125 0.09375 0.125 0.25q0.0625 0.15625 0.0625 0.390625l0 6.046875l1.96875 0l0 -6.03125q0 -0.703125 -0.140625 -1.1875q-0.125 -0.5 -0.375 -0.8125q-0.25 -0.296875 -0.578125 -0.4375q-0.328125 -0.140625 -0.71875 -0.140625q-0.296875 0 -0.5625 0.078125q-0.25 0.078125 -0.453125 0.21875q-0.171875 0.109375 -0.3125 0.265625q-0.125 0.140625 -0.234375 0.328125q-0.078125 -0.1875 -0.171875 -0.34375q-0.09375 -0.15625 -0.234375 -0.265625q-0.171875 -0.140625 -0.40625 -0.203125q-0.234375 -0.078125 -0.53125 -0.078125q-0.609375 0 -1.03125 0.3125q-0.421875 0.3125 -0.671875 0.859375l-0.0625 -1.015625zm13.5390625 8.453125l2.203125 0l0 -0.140625q-0.171875 -0.3125 -0.265625 -0.75q-0.078125 -0.453125 -0.078125 -1.140625l0 -3.625q0 -0.734375 -0.28125 -1.28125q-0.265625 -0.5625 -0.75 -0.9375q-0.484375 -0.359375 -1.140625 -0.546875q-0.65625 -0.1875 -1.421875 -0.1875q-0.859375 0 -1.515625 0.21875q-0.65625 0.203125 -1.109375 0.578125q-0.46875 0.359375 -0.703125 0.84375q-0.234375 0.484375 -0.234375 1.03125l2.171875 0q0 -0.234375 0.0625 -0.421875q0.0625 -0.1875 0.203125 -0.3125q0.15625 -0.15625 0.40625 -0.234375q0.25 -0.078125 0.59375 -0.078125q0.390625 0 0.6875 0.09375q0.296875 0.09375 0.484375 0.28125q0.1875 0.171875 0.28125 0.40625q0.09375 0.234375 0.09375 0.53125l0 0.5l-1.203125 0q-0.9375 0 -1.671875 0.171875q-0.71875 0.171875 -1.203125 0.484375q-0.546875 0.359375 -0.828125 0.90625q-0.265625 0.53125 -0.265625 1.234375q0 0.546875 0.21875 1.015625q0.21875 0.46875 0.609375 0.796875q0.390625 0.34375 0.921875 0.53125q0.546875 0.1875 1.203125 0.1875q0.390625 0 0.734375 -0.078125q0.34375 -0.0625 0.640625 -0.203125q0.28125 -0.125 0.515625 -0.296875q0.234375 -0.171875 0.40625 -0.375q0.046875 0.234375 0.09375 0.4375q0.0625 0.203125 0.140625 0.359375zm-2.078125 -1.53125q-0.296875 0 -0.53125 -0.0625q-0.234375 -0.078125 -0.390625 -0.203125q-0.15625 -0.140625 -0.234375 -0.328125q-0.078125 -0.1875 -0.078125 -0.421875q0 -0.28125 0.109375 -0.53125q0.109375 -0.25 0.328125 -0.421875q0.234375 -0.171875 0.59375 -0.265625q0.359375 -0.109375 0.859375 -0.109375l1.109375 0l0 1.453125q-0.09375 0.171875 -0.265625 0.328125q-0.15625 0.15625 -0.375 0.28125q-0.234375 0.125 -0.515625 0.203125q-0.28125 0.078125 -0.609375 0.078125zm10.1328125 -0.046875q-0.515625 0 -0.859375 -0.203125q-0.34375 -0.21875 -0.53125 -0.578125q-0.203125 -0.34375 -0.28125 -0.796875q-0.078125 -0.453125 -0.078125 -0.953125l0 -0.234375q0 -0.484375 0.078125 -0.9375q0.078125 -0.453125 0.28125 -0.796875q0.203125 -0.359375 0.53125 -0.5625q0.34375 -0.21875 0.859375 -0.21875q0.34375 0 0.640625 0.125q0.296875 0.125 0.515625 0.328125q0.203125 0.203125 0.3125 0.484375q0.109375 0.28125 0.09375 0.59375l2.046875 0q0.015625 -0.75 -0.25 -1.359375q-0.265625 -0.609375 -0.734375 -1.03125q-0.484375 -0.421875 -1.15625 -0.65625q-0.65625 -0.234375 -1.4375 -0.234375q-0.984375 0 -1.71875 0.34375q-0.734375 0.328125 -1.234375 0.90625q-0.5 0.578125 -0.75 1.359375q-0.25 0.765625 -0.25 1.65625l0 0.234375q0 0.890625 0.25 1.671875q0.25 0.78125 0.75 1.359375q0.5 0.578125 1.234375 0.921875q0.75 0.328125 1.734375 0.328125q0.71875 0 1.375 -0.234375q0.65625 -0.234375 1.140625 -0.640625q0.5 -0.40625 0.78125 -0.96875q0.28125 -0.5625 0.265625 -1.21875l-2.046875 0q0.015625 0.296875 -0.109375 0.546875q-0.109375 0.234375 -0.328125 0.40625q-0.21875 0.171875 -0.515625 0.265625q-0.28125 0.09375 -0.609375 0.09375zm11.9296875 -7.03125q-0.84375 0 -1.5625 0.40625q-0.71875 0.40625 -1.234375 1.125l-0.015625 -0.1875l-0.09375 -1.1875l-2.015625 0l0 8.453125l2.171875 0l0 -5.078125q0.140625 -0.359375 0.359375 -0.625q0.234375 -0.265625 0.5625 -0.4375q0.265625 -0.15625 0.59375 -0.21875q0.34375 -0.078125 0.765625 -0.078125q0.40625 0 0.859375 0.046875q0.453125 0.046875 0.875 0.15625l0.328125 -2.171875q-0.265625 -0.078125 -0.6875 -0.140625q-0.40625 -0.0625 -0.90625 -0.0625zm3.2421875 4.296875l0 0.171875q0 0.921875 0.265625 1.71875q0.265625 0.78125 0.78125 1.359375q0.5 0.578125 1.25 0.90625q0.75 0.328125 1.703125 0.328125q0.9375 0 1.671875 -0.328125q0.75 -0.328125 1.265625 -0.90625q0.5 -0.578125 0.765625 -1.359375q0.265625 -0.796875 0.265625 -1.71875l0 -0.171875q0 -0.921875 -0.265625 -1.703125q-0.265625 -0.78125 -0.765625 -1.359375q-0.515625 -0.578125 -1.265625 -0.90625q-0.734375 -0.328125 -1.6875 -0.328125q-0.953125 0 -1.703125 0.328125q-0.734375 0.328125 -1.234375 0.90625q-0.515625 0.578125 -0.78125 1.359375q-0.265625 0.78125 -0.265625 1.703125zm2.171875 0.171875l0 -0.171875q0 -0.515625 0.09375 -0.96875q0.109375 -0.46875 0.328125 -0.8125q0.21875 -0.359375 0.5625 -0.5625q0.34375 -0.203125 0.828125 -0.203125q0.484375 0 0.828125 0.203125q0.34375 0.203125 0.5625 0.5625q0.21875 0.34375 0.3125 0.8125q0.109375 0.453125 0.109375 0.96875l0 0.171875q0 0.53125 -0.109375 1.0q-0.09375 0.453125 -0.3125 0.796875q-0.21875 0.359375 -0.5625 0.5625q-0.34375 0.203125 -0.8125 0.203125q-0.5 0 -0.84375 -0.203125q-0.34375 -0.203125 -0.5625 -0.5625q-0.21875 -0.34375 -0.328125 -0.796875q-0.09375 -0.46875 -0.09375 -1.0zm11.8203125 4.296875q1.234375 0 2.125 -0.46875q0.90625 -0.484375 1.3125 -1.078125l-1.078125 -1.171875q-0.375 0.484375 -0.984375 0.734375q-0.59375 0.234375 -1.234375 0.234375q-0.453125 0 -0.84375 -0.140625q-0.375 -0.140625 -0.671875 -0.390625q-0.296875 -0.25 -0.46875 -0.5625q-0.171875 -0.3125 -0.28125 -0.8125l0 -0.03125l5.734375 0l0 -0.921875q0 -0.921875 -0.265625 -1.6875q-0.25 -0.78125 -0.734375 -1.328125q-0.5 -0.546875 -1.21875 -0.84375q-0.71875 -0.296875 -1.625 -0.296875q-0.890625 0 -1.640625 0.328125q-0.75 0.3125 -1.296875 0.890625q-0.546875 0.578125 -0.859375 1.375q-0.296875 0.796875 -0.296875 1.765625l0 0.3125q0 0.859375 0.3125 1.609375q0.3125 0.734375 0.875 1.28125q0.578125 0.5625 1.375 0.890625q0.796875 0.3125 1.765625 0.3125zm-0.25 -7.015625q0.40625 0 0.71875 0.140625q0.328125 0.125 0.546875 0.34375q0.21875 0.21875 0.34375 0.53125q0.125 0.296875 0.125 0.640625l0 0.171875l-3.5625 0q0.078125 -0.40625 0.234375 -0.734375q0.15625 -0.34375 0.390625 -0.59375q0.21875 -0.234375 0.515625 -0.359375q0.3125 -0.140625 0.6875 -0.140625zm9.6171875 1.046875l-1.828125 -2.640625l-2.328125 0l2.828125 4.140625l-2.9375 4.3125l2.34375 0l1.9375 -2.765625l1.9375 2.765625l2.34375 0l-2.9375 -4.3125l2.84375 -4.140625l-2.34375 0l-1.859375 2.640625zm5.7734375 9.0625l2.171875 0l0 -3.984375q0.140625 0.15625 0.296875 0.296875q0.171875 0.125 0.359375 0.234375q0.296875 0.171875 0.65625 0.265625q0.359375 0.09375 0.78125 0.09375q0.796875 0 1.40625 -0.328125q0.625 -0.328125 1.046875 -0.90625q0.40625 -0.578125 0.625 -1.359375q0.21875 -0.796875 0.21875 -1.71875l0 -0.15625q0 -0.96875 -0.21875 -1.75q-0.21875 -0.796875 -0.640625 -1.375q-0.421875 -0.5625 -1.046875 -0.859375q-0.609375 -0.3125 -1.40625 -0.3125q-0.390625 0 -0.734375 0.078125q-0.34375 0.078125 -0.625 0.234375q-0.234375 0.125 -0.4375 0.296875q-0.203125 0.15625 -0.375 0.375l-0.078125 -0.828125l-2.0 0l0 11.703125zm5.390625 -7.5625l0 0.15625q0 0.53125 -0.09375 1.0q-0.09375 0.453125 -0.296875 0.8125q-0.203125 0.34375 -0.53125 0.546875q-0.328125 0.203125 -0.78125 0.203125q-0.296875 0 -0.53125 -0.046875q-0.234375 -0.0625 -0.40625 -0.171875q-0.1875 -0.125 -0.34375 -0.28125q-0.140625 -0.171875 -0.234375 -0.375l0 -3.53125q0.109375 -0.21875 0.25 -0.375q0.15625 -0.171875 0.359375 -0.28125q0.1875 -0.09375 0.40625 -0.140625q0.21875 -0.046875 0.484375 -0.046875q0.453125 0 0.78125 0.203125q0.328125 0.1875 0.53125 0.515625q0.203125 0.34375 0.296875 0.8125q0.109375 0.46875 0.109375 1.0zm9.4609375 4.3125l2.203125 0l0 -0.140625q-0.171875 -0.3125 -0.265625 -0.75q-0.078125 -0.453125 -0.078125 -1.140625l0 -3.625q0 -0.734375 -0.28125 -1.28125q-0.265625 -0.5625 -0.75 -0.9375q-0.484375 -0.359375 -1.140625 -0.546875q-0.65625 -0.1875 -1.421875 -0.1875q-0.859375 0 -1.515625 0.21875q-0.65625 0.203125 -1.109375 0.578125q-0.46875 0.359375 -0.703125 0.84375q-0.234375 0.484375 -0.234375 1.03125l2.171875 0q0 -0.234375 0.0625 -0.421875q0.0625 -0.1875 0.203125 -0.3125q0.15625 -0.15625 0.40625 -0.234375q0.25 -0.078125 0.59375 -0.078125q0.390625 0 0.6875 0.09375q0.296875 0.09375 0.484375 0.28125q0.1875 0.171875 0.28125 0.40625q0.09375 0.234375 0.09375 0.53125l0 0.5l-1.203125 0q-0.9375 0 -1.671875 0.171875q-0.71875 0.171875 -1.203125 0.484375q-0.546875 0.359375 -0.828125 0.90625q-0.265625 0.53125 -0.265625 1.234375q0 0.546875 0.21875 1.015625q0.21875 0.46875 0.609375 0.796875q0.390625 0.34375 0.921875 0.53125q0.546875 0.1875 1.203125 0.1875q0.390625 0 0.734375 -0.078125q0.34375 -0.0625 0.640625 -0.203125q0.28125 -0.125 0.515625 -0.296875q0.234375 -0.171875 0.40625 -0.375q0.046875 0.234375 0.09375 0.4375q0.0625 0.203125 0.140625 0.359375zm-2.078125 -1.53125q-0.296875 0 -0.53125 -0.0625q-0.234375 -0.078125 -0.390625 -0.203125q-0.15625 -0.140625 -0.234375 -0.328125q-0.078125 -0.1875 -0.078125 -0.421875q0 -0.28125 0.109375 -0.53125q0.109375 -0.25 0.328125 -0.421875q0.234375 -0.171875 0.59375 -0.265625q0.359375 -0.109375 0.859375 -0.109375l1.109375 0l0 1.453125q-0.09375 0.171875 -0.265625 0.328125q-0.15625 0.15625 -0.375 0.28125q-0.234375 0.125 -0.515625 0.203125q-0.28125 0.078125 -0.609375 0.078125zm6.3984375 1.53125l2.171875 0l0 -6.015625q0.109375 -0.171875 0.234375 -0.3125q0.140625 -0.140625 0.3125 -0.234375q0.203125 -0.140625 0.453125 -0.203125q0.265625 -0.078125 0.578125 -0.078125q0.375 0 0.671875 0.078125q0.296875 0.0625 0.515625 0.25q0.203125 0.1875 0.3125 0.515625q0.125 0.328125 0.125 0.828125l0 5.171875l2.171875 0l0 -5.203125q0 -0.921875 -0.21875 -1.5625q-0.21875 -0.65625 -0.59375 -1.0625q-0.390625 -0.40625 -0.921875 -0.59375q-0.53125 -0.1875 -1.171875 -0.1875q-0.5 0 -0.953125 0.140625q-0.4375 0.140625 -0.8125 0.40625q-0.21875 0.15625 -0.421875 0.359375q-0.1875 0.1875 -0.359375 0.421875l-0.125 -1.171875l-1.96875 0l0 8.453125zm9.3671875 -4.296875l0 0.171875q0 0.921875 0.21875 1.703125q0.234375 0.78125 0.671875 1.359375q0.421875 0.5625 1.03125 0.890625q0.609375 0.328125 1.390625 0.328125q0.703125 0 1.234375 -0.265625q0.546875 -0.28125 0.953125 -0.78125l0.109375 0.890625l1.953125 0l0 -12.0l-2.171875 0l0 4.296875q-0.390625 -0.4375 -0.90625 -0.671875q-0.5 -0.234375 -1.15625 -0.234375q-0.796875 0 -1.40625 0.3125q-0.609375 0.3125 -1.046875 0.875q-0.421875 0.578125 -0.65625 1.375q-0.21875 0.796875 -0.21875 1.75zm2.171875 0.171875l0 -0.171875q0 -0.53125 0.09375 -0.984375q0.09375 -0.46875 0.296875 -0.828125q0.203125 -0.34375 0.53125 -0.53125q0.328125 -0.203125 0.78125 -0.203125q0.5625 0 0.921875 0.234375q0.375 0.234375 0.59375 0.671875l0 3.421875q-0.21875 0.421875 -0.59375 0.671875q-0.375 0.234375 -0.9375 0.234375q-0.453125 0 -0.78125 -0.1875q-0.3125 -0.203125 -0.515625 -0.546875q-0.203125 -0.34375 -0.296875 -0.796875q-0.09375 -0.453125 -0.09375 -0.984375z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m560.0 113.22835l314.96063 0l0 38.771652l-314.96063 0z" fill-rule="evenodd"/><path fill="#38761d" d="m572.71875 126.21335l-2.71875 0l0 11.375l2.015625 0l0 -3.375l-0.09375 -5.109375l1.328125 4.734375l1.09375 0l1.484375 -4.9375l-0.09375 5.3125l0 3.375l2.015625 0l0 -11.375l-2.71875 0l-1.234375 4.265625l-1.078125 -4.265625zm11.0859375 11.53125q1.234375 0 2.125 -0.46875q0.90625 -0.484375 1.3125 -1.078125l-1.078125 -1.171875q-0.375 0.484375 -0.984375 0.734375q-0.59375 0.234375 -1.234375 0.234375q-0.453125 0 -0.84375 -0.140625q-0.375 -0.140625 -0.671875 -0.390625q-0.296875 -0.25 -0.46875 -0.5625q-0.171875 -0.3125 -0.28125 -0.8125l0 -0.03125l5.734375 0l0 -0.921875q0 -0.921875 -0.265625 -1.6875q-0.25 -0.78125 -0.734375 -1.328125q-0.5 -0.546875 -1.21875 -0.84375q-0.71875 -0.296875 -1.625 -0.296875q-0.890625 0 -1.640625 0.328125q-0.75 0.3125 -1.296875 0.890625q-0.546875 0.578125 -0.859375 1.375q-0.296875 0.796875 -0.296875 1.765625l0 0.3125q0 0.859375 0.3125 1.609375q0.3125 0.734375 0.875 1.28125q0.578125 0.5625 1.375 0.890625q0.796875 0.3125 1.765625 0.3125zm-0.25 -7.015625q0.40625 0 0.71875 0.140625q0.328125 0.125 0.546875 0.34375q0.21875 0.21875 0.34375 0.53125q0.125 0.296875 0.125 0.640625l0 0.171875l-3.5625 0q0.078125 -0.40625 0.234375 -0.734375q0.15625 -0.34375 0.390625 -0.59375q0.21875 -0.234375 0.515625 -0.359375q0.3125 -0.140625 0.6875 -0.140625zm9.7421875 -3.671875l-2.171875 0l0 2.078125l-1.9375 0l0 1.59375l1.9375 0l0 3.84375q0 0.84375 0.21875 1.4375q0.21875 0.59375 0.625 0.984375q0.390625 0.390625 0.953125 0.578125q0.578125 0.1875 1.265625 0.1875q0.359375 0 0.734375 -0.046875q0.375 -0.03125 0.71875 -0.09375q0.34375 -0.0625 0.640625 -0.15625q0.3125 -0.109375 0.53125 -0.234375l-0.203125 -1.5q-0.15625 0.046875 -0.359375 0.09375q-0.203125 0.03125 -0.4375 0.0625q-0.25 0.03125 -0.515625 0.0625q-0.25 0.015625 -0.5 0.015625q-0.34375 0 -0.625 -0.078125q-0.28125 -0.078125 -0.46875 -0.265625q-0.203125 -0.171875 -0.3125 -0.484375q-0.09375 -0.3125 -0.09375 -0.765625l0 -3.640625l3.15625 0l0 -1.59375l-3.15625 0l0 -2.078125zm10.8984375 10.53125l2.203125 0l0 -0.140625q-0.171875 -0.3125 -0.265625 -0.75q-0.078125 -0.453125 -0.078125 -1.140625l0 -3.625q0 -0.734375 -0.28125 -1.28125q-0.265625 -0.5625 -0.75 -0.9375q-0.484375 -0.359375 -1.140625 -0.546875q-0.65625 -0.1875 -1.421875 -0.1875q-0.859375 0 -1.515625 0.21875q-0.65625 0.203125 -1.109375 0.578125q-0.46875 0.359375 -0.703125 0.84375q-0.234375 0.484375 -0.234375 1.03125l2.171875 0q0 -0.234375 0.0625 -0.421875q0.0625 -0.1875 0.203125 -0.3125q0.15625 -0.15625 0.40625 -0.234375q0.25 -0.078125 0.59375 -0.078125q0.390625 0 0.6875 0.09375q0.296875 0.09375 0.484375 0.28125q0.1875 0.171875 0.28125 0.40625q0.09375 0.234375 0.09375 0.53125l0 0.5l-1.203125 0q-0.9375 0 -1.671875 0.171875q-0.71875 0.171875 -1.203125 0.484375q-0.546875 0.359375 -0.828125 0.90625q-0.265625 0.53125 -0.265625 1.234375q0 0.546875 0.21875 1.015625q0.21875 0.46875 0.609375 0.796875q0.390625 0.34375 0.921875 0.53125q0.546875 0.1875 1.203125 0.1875q0.390625 0 0.734375 -0.078125q0.34375 -0.0625 0.640625 -0.203125q0.28125 -0.125 0.515625 -0.296875q0.234375 -0.171875 0.40625 -0.375q0.046875 0.234375 0.09375 0.4375q0.0625 0.203125 0.140625 0.359375zm-2.078125 -1.53125q-0.296875 0 -0.53125 -0.0625q-0.234375 -0.078125 -0.390625 -0.203125q-0.15625 -0.140625 -0.234375 -0.328125q-0.078125 -0.1875 -0.078125 -0.421875q0 -0.28125 0.109375 -0.53125q0.109375 -0.25 0.328125 -0.421875q0.234375 -0.171875 0.59375 -0.265625q0.359375 -0.109375 0.859375 -0.109375l1.109375 0l0 1.453125q-0.09375 0.171875 -0.265625 0.328125q-0.15625 0.15625 -0.375 0.28125q-0.234375 0.125 -0.515625 0.203125q-0.28125 0.078125 -0.609375 0.078125zm8.5859375 0.265625q0 0.609375 0.40625 1.015625q0.40625 0.40625 1.109375 0.40625q0.6875 0 1.09375 -0.40625q0.40625 -0.40625 0.40625 -1.015625q0 -0.609375 -0.40625 -1.015625q-0.390625 -0.421875 -1.09375 -0.421875q-0.71875 0 -1.125 0.421875q-0.390625 0.421875 -0.390625 1.015625zm7.4453125 4.515625l2.171875 0l0 -3.984375q0.140625 0.15625 0.296875 0.296875q0.171875 0.125 0.359375 0.234375q0.296875 0.171875 0.65625 0.265625q0.359375 0.09375 0.78125 0.09375q0.796875 0 1.40625 -0.328125q0.625 -0.328125 1.046875 -0.90625q0.40625 -0.578125 0.625 -1.359375q0.21875 -0.796875 0.21875 -1.71875l0 -0.15625q0 -0.96875 -0.21875 -1.75q-0.21875 -0.796875 -0.640625 -1.375q-0.421875 -0.5625 -1.046875 -0.859375q-0.609375 -0.3125 -1.40625 -0.3125q-0.390625 0 -0.734375 0.078125q-0.34375 0.078125 -0.625 0.234375q-0.234375 0.125 -0.4375 0.296875q-0.203125 0.15625 -0.375 0.375l-0.078125 -0.828125l-2.0 0l0 11.703125zm5.390625 -7.5625l0 0.15625q0 0.53125 -0.09375 1.0q-0.09375 0.453125 -0.296875 0.8125q-0.203125 0.34375 -0.53125 0.546875q-0.328125 0.203125 -0.78125 0.203125q-0.296875 0 -0.53125 -0.046875q-0.234375 -0.0625 -0.40625 -0.171875q-0.1875 -0.125 -0.34375 -0.28125q-0.140625 -0.171875 -0.234375 -0.375l0 -3.53125q0.109375 -0.21875 0.25 -0.375q0.15625 -0.171875 0.359375 -0.28125q0.1875 -0.09375 0.40625 -0.140625q0.21875 -0.046875 0.484375 -0.046875q0.453125 0 0.78125 0.203125q0.328125 0.1875 0.53125 0.515625q0.203125 0.34375 0.296875 0.8125q0.109375 0.46875 0.109375 1.0zm9.4609375 4.3125l2.203125 0l0 -0.140625q-0.171875 -0.3125 -0.265625 -0.75q-0.078125 -0.453125 -0.078125 -1.140625l0 -3.625q0 -0.734375 -0.28125 -1.28125q-0.265625 -0.5625 -0.75 -0.9375q-0.484375 -0.359375 -1.140625 -0.546875q-0.65625 -0.1875 -1.421875 -0.1875q-0.859375 0 -1.515625 0.21875q-0.65625 0.203125 -1.109375 0.578125q-0.46875 0.359375 -0.703125 0.84375q-0.234375 0.484375 -0.234375 1.03125l2.171875 0q0 -0.234375 0.0625 -0.421875q0.0625 -0.1875 0.203125 -0.3125q0.15625 -0.15625 0.40625 -0.234375q0.25 -0.078125 0.59375 -0.078125q0.390625 0 0.6875 0.09375q0.296875 0.09375 0.484375 0.28125q0.1875 0.171875 0.28125 0.40625q0.09375 0.234375 0.09375 0.53125l0 0.5l-1.203125 0q-0.9375 0 -1.671875 0.171875q-0.71875 0.171875 -1.203125 0.484375q-0.546875 0.359375 -0.828125 0.90625q-0.265625 0.53125 -0.265625 1.234375q0 0.546875 0.21875 1.015625q0.21875 0.46875 0.609375 0.796875q0.390625 0.34375 0.921875 0.53125q0.546875 0.1875 1.203125 0.1875q0.390625 0 0.734375 -0.078125q0.34375 -0.0625 0.640625 -0.203125q0.28125 -0.125 0.515625 -0.296875q0.234375 -0.171875 0.40625 -0.375q0.046875 0.234375 0.09375 0.4375q0.0625 0.203125 0.140625 0.359375zm-2.078125 -1.53125q-0.296875 0 -0.53125 -0.0625q-0.234375 -0.078125 -0.390625 -0.203125q-0.15625 -0.140625 -0.234375 -0.328125q-0.078125 -0.1875 -0.078125 -0.421875q0 -0.28125 0.109375 -0.53125q0.109375 -0.25 0.328125 -0.421875q0.234375 -0.171875 0.59375 -0.265625q0.359375 -0.109375 0.859375 -0.109375l1.109375 0l0 1.453125q-0.09375 0.171875 -0.265625 0.328125q-0.15625 0.15625 -0.375 0.28125q-0.234375 0.125 -0.515625 0.203125q-0.28125 0.078125 -0.609375 0.078125zm12.4609375 -7.078125q-0.84375 0 -1.5625 0.40625q-0.71875 0.40625 -1.234375 1.125l-0.015625 -0.1875l-0.09375 -1.1875l-2.015625 0l0 8.453125l2.171875 0l0 -5.078125q0.140625 -0.359375 0.359375 -0.625q0.234375 -0.265625 0.5625 -0.4375q0.265625 -0.15625 0.59375 -0.21875q0.34375 -0.078125 0.765625 -0.078125q0.40625 0 0.859375 0.046875q0.453125 0.046875 0.875 0.15625l0.328125 -2.171875q-0.265625 -0.078125 -0.6875 -0.140625q-0.40625 -0.0625 -0.90625 -0.0625zm8.9921875 6.28125q0 0.203125 -0.109375 0.375q-0.09375 0.15625 -0.28125 0.28125q-0.203125 0.125 -0.53125 0.203125q-0.3125 0.0625 -0.734375 0.0625q-0.34375 0 -0.6875 -0.0625q-0.328125 -0.078125 -0.578125 -0.234375q-0.265625 -0.140625 -0.4375 -0.390625q-0.15625 -0.25 -0.171875 -0.59375l-2.078125 0q0 0.515625 0.25 1.03125q0.265625 0.515625 0.75 0.90625q0.5 0.40625 1.21875 0.671875q0.734375 0.25 1.6875 0.25q0.875 0 1.578125 -0.1875q0.703125 -0.203125 1.203125 -0.546875q0.484375 -0.34375 0.75 -0.8125q0.28125 -0.484375 0.28125 -1.046875q0 -0.609375 -0.28125 -1.046875q-0.265625 -0.4375 -0.734375 -0.75q-0.484375 -0.3125 -1.140625 -0.5q-0.65625 -0.203125 -1.421875 -0.328125q-0.53125 -0.078125 -0.875 -0.171875q-0.34375 -0.109375 -0.53125 -0.25q-0.203125 -0.125 -0.28125 -0.28125q-0.078125 -0.15625 -0.078125 -0.34375q0 -0.1875 0.078125 -0.359375q0.09375 -0.171875 0.265625 -0.296875q0.1875 -0.140625 0.484375 -0.21875q0.296875 -0.078125 0.703125 -0.078125q0.46875 0 0.796875 0.125q0.328125 0.109375 0.53125 0.3125q0.140625 0.140625 0.203125 0.328125q0.078125 0.1875 0.078125 0.390625l2.171875 0q0 -0.578125 -0.265625 -1.078125q-0.25 -0.5 -0.734375 -0.875q-0.5 -0.359375 -1.203125 -0.5625q-0.703125 -0.203125 -1.578125 -0.203125q-0.84375 0 -1.515625 0.21875q-0.671875 0.203125 -1.140625 0.5625q-0.46875 0.359375 -0.71875 0.84375q-0.25 0.46875 -0.25 0.984375q0 0.546875 0.234375 0.96875q0.25 0.40625 0.6875 0.71875q0.4375 0.3125 1.046875 0.546875q0.625 0.21875 1.359375 0.359375q0.59375 0.109375 0.96875 0.21875q0.390625 0.109375 0.625 0.234375q0.21875 0.140625 0.3125 0.296875q0.09375 0.140625 0.09375 0.328125zm8.2421875 2.484375q1.234375 0 2.125 -0.46875q0.90625 -0.484375 1.3125 -1.078125l-1.078125 -1.171875q-0.375 0.484375 -0.984375 0.734375q-0.59375 0.234375 -1.234375 0.234375q-0.453125 0 -0.84375 -0.140625q-0.375 -0.140625 -0.671875 -0.390625q-0.296875 -0.25 -0.46875 -0.5625q-0.171875 -0.3125 -0.28125 -0.8125l0 -0.03125l5.734375 0l0 -0.921875q0 -0.921875 -0.265625 -1.6875q-0.25 -0.78125 -0.734375 -1.328125q-0.5 -0.546875 -1.21875 -0.84375q-0.71875 -0.296875 -1.625 -0.296875q-0.890625 0 -1.640625 0.328125q-0.75 0.3125 -1.296875 0.890625q-0.546875 0.578125 -0.859375 1.375q-0.296875 0.796875 -0.296875 1.765625l0 0.3125q0 0.859375 0.3125 1.609375q0.3125 0.734375 0.875 1.28125q0.578125 0.5625 1.375 0.890625q0.796875 0.3125 1.765625 0.3125zm-0.25 -7.015625q0.40625 0 0.71875 0.140625q0.328125 0.125 0.546875 0.34375q0.21875 0.21875 0.34375 0.53125q0.125 0.296875 0.125 0.640625l0 0.171875l-3.5625 0q0.078125 -0.40625 0.234375 -0.734375q0.15625 -0.34375 0.390625 -0.59375q0.21875 -0.234375 0.515625 -0.359375q0.3125 -0.140625 0.6875 -0.140625z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m768.0 183.95276l-112.0 0" fill-rule="evenodd"/><path stroke="#ff0000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m768.0 183.95276l-100.0 0" fill-rule="evenodd"/><path fill="#ff0000" stroke="#ff0000" stroke-width="2.0" stroke-linecap="butt" d="m668.0 180.64929l-9.076172 3.3034668l9.076172 3.3034668z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m572.69293 183.95276l-116.69293 0.09448242" fill-rule="evenodd"/><path stroke="#ff0000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m572.69293 183.95276l-104.69293 0.08476257" fill-rule="evenodd"/><path fill="#ff0000" stroke="#ff0000" stroke-width="2.0" stroke-linecap="butt" d="m467.9973 180.73407l-9.073517 3.3108063l9.078888 3.296112z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m349.03937 377.22833l314.96063 0l0 38.771667l-314.96063 0z" fill-rule="evenodd"/><path fill="#38761d" d="m367.18 395.15085q0.03125 -1.078125 -0.234375 -1.96875q-0.25 -0.890625 -0.75 -1.53125q-0.515625 -0.640625 -1.265625 -0.984375q-0.734375 -0.34375 -1.65625 -0.34375q-1.0625 0 -1.953125 0.46875q-0.875 0.453125 -1.515625 1.265625q-0.65625 0.8125 -1.03125 1.90625q-0.359375 1.078125 -0.40625 2.3125q-0.03125 1.234375 0.203125 2.234375q0.25 1.0 0.796875 1.6875q0.53125 0.703125 1.359375 1.078125q0.828125 0.375 1.96875 0.375q0.265625 0 0.546875 -0.046875q0.296875 -0.03125 0.59375 -0.109375q0.28125 -0.0625 0.546875 -0.171875q0.265625 -0.109375 0.46875 -0.265625l-0.25 -1.046875q-0.1875 0.09375 -0.40625 0.171875q-0.21875 0.078125 -0.46875 0.125q-0.25 0.0625 -0.515625 0.09375q-0.25 0.03125 -0.484375 0.03125q-0.765625 0 -1.359375 -0.25q-0.59375 -0.265625 -0.96875 -0.78125q-0.390625 -0.515625 -0.578125 -1.296875q-0.1875 -0.78125 -0.140625 -1.828125q0.03125 -1.046875 0.28125 -1.90625q0.265625 -0.875 0.71875 -1.515625q0.4375 -0.625 1.078125 -0.984375q0.640625 -0.359375 1.4375 -0.359375q0.703125 0 1.234375 0.21875q0.546875 0.21875 0.90625 0.671875q0.359375 0.453125 0.515625 1.15625q0.171875 0.703125 0.140625 1.65625q0 0.609375 -0.0625 1.140625q-0.0625 0.515625 -0.1875 0.890625q-0.125 0.375 -0.296875 0.59375q-0.171875 0.203125 -0.375 0.203125q-0.078125 0 -0.15625 -0.015625q-0.078125 -0.015625 -0.125 -0.078125q-0.0625 -0.0625 -0.09375 -0.1875q-0.03125 -0.125 -0.015625 -0.359375l0.34375 -4.109375q-0.25 -0.25 -0.640625 -0.421875q-0.390625 -0.171875 -0.984375 -0.171875q-0.59375 0 -1.078125 0.25q-0.46875 0.234375 -0.828125 0.6875q-0.34375 0.46875 -0.578125 1.109375q-0.21875 0.640625 -0.296875 1.4375q-0.0625 0.671875 0.015625 1.234375q0.078125 0.546875 0.296875 0.9375q0.21875 0.40625 0.5625 0.625q0.359375 0.203125 0.828125 0.203125q0.234375 0 0.4375 -0.078125q0.21875 -0.078125 0.40625 -0.203125q0.125 -0.09375 0.234375 -0.21875q0.125 -0.125 0.234375 -0.265625q0.078125 0.171875 0.1875 0.3125q0.109375 0.125 0.25 0.21875q0.15625 0.125 0.359375 0.1875q0.203125 0.0625 0.4375 0.0625q0.59375 0 1.015625 -0.359375q0.4375 -0.359375 0.71875 -0.9375q0.28125 -0.5625 0.421875 -1.28125q0.140625 -0.734375 0.15625 -1.46875zm-5.21875 1.03125q0.0625 -0.5625 0.171875 -0.984375q0.125 -0.421875 0.3125 -0.71875q0.1875 -0.296875 0.453125 -0.4375q0.265625 -0.15625 0.609375 -0.15625q0.09375 0 0.140625 0.015625q0.0625 0 0.09375 0.015625l-0.296875 3.484375l0 0.109375q-0.078125 0.109375 -0.15625 0.1875q-0.0625 0.078125 -0.125 0.125q-0.125 0.09375 -0.25 0.125q-0.109375 0.03125 -0.234375 0.03125q-0.1875 0 -0.34375 -0.109375q-0.140625 -0.125 -0.234375 -0.359375q-0.09375 -0.21875 -0.125 -0.546875q-0.03125 -0.34375 -0.015625 -0.78125zm10.5234375 3.828125q-0.515625 0 -0.859375 -0.203125q-0.34375 -0.21875 -0.53125 -0.578125q-0.203125 -0.34375 -0.28125 -0.796875q-0.078125 -0.453125 -0.078125 -0.953125l0 -0.234375q0 -0.484375 0.078125 -0.9375q0.078125 -0.453125 0.28125 -0.796875q0.203125 -0.359375 0.53125 -0.5625q0.34375 -0.21875 0.859375 -0.21875q0.34375 0 0.640625 0.125q0.296875 0.125 0.515625 0.328125q0.203125 0.203125 0.3125 0.484375q0.109375 0.28125 0.09375 0.59375l2.046875 0q0.015625 -0.75 -0.25 -1.359375q-0.265625 -0.609375 -0.734375 -1.03125q-0.484375 -0.421875 -1.15625 -0.65625q-0.65625 -0.234375 -1.4375 -0.234375q-0.984375 0 -1.71875 0.34375q-0.734375 0.328125 -1.234375 0.90625q-0.5 0.578125 -0.75 1.359375q-0.25 0.765625 -0.25 1.65625l0 0.234375q0 0.890625 0.25 1.671875q0.25 0.78125 0.75 1.359375q0.5 0.578125 1.234375 0.921875q0.75 0.328125 1.734375 0.328125q0.71875 0 1.375 -0.234375q0.65625 -0.234375 1.140625 -0.640625q0.5 -0.40625 0.78125 -0.96875q0.28125 -0.5625 0.265625 -1.21875l-2.046875 0q0.015625 0.296875 -0.109375 0.546875q-0.109375 0.234375 -0.328125 0.40625q-0.21875 0.171875 -0.515625 0.265625q-0.28125 0.09375 -0.609375 0.09375zm5.5703125 -2.734375l0 0.171875q0 0.921875 0.265625 1.71875q0.265625 0.78125 0.78125 1.359375q0.5 0.578125 1.25 0.90625q0.75 0.328125 1.703125 0.328125q0.9375 0 1.671875 -0.328125q0.75 -0.328125 1.265625 -0.90625q0.5 -0.578125 0.765625 -1.359375q0.265625 -0.796875 0.265625 -1.71875l0 -0.171875q0 -0.921875 -0.265625 -1.703125q-0.265625 -0.78125 -0.765625 -1.359375q-0.515625 -0.578125 -1.265625 -0.90625q-0.734375 -0.328125 -1.6875 -0.328125q-0.953125 0 -1.703125 0.328125q-0.734375 0.328125 -1.234375 0.90625q-0.515625 0.578125 -0.78125 1.359375q-0.265625 0.78125 -0.265625 1.703125zm2.171875 0.171875l0 -0.171875q0 -0.515625 0.09375 -0.96875q0.109375 -0.46875 0.328125 -0.8125q0.21875 -0.359375 0.5625 -0.5625q0.34375 -0.203125 0.828125 -0.203125q0.484375 0 0.828125 0.203125q0.34375 0.203125 0.5625 0.5625q0.21875 0.34375 0.3125 0.8125q0.109375 0.453125 0.109375 0.96875l0 0.171875q0 0.53125 -0.109375 1.0q-0.09375 0.453125 -0.3125 0.796875q-0.21875 0.359375 -0.5625 0.5625q-0.34375 0.203125 -0.8125 0.203125q-0.5 0 -0.84375 -0.203125q-0.34375 -0.203125 -0.5625 -0.5625q-0.21875 -0.34375 -0.328125 -0.796875q-0.09375 -0.46875 -0.09375 -1.0zm7.4921875 -0.15625l0 0.171875q0 0.921875 0.21875 1.703125q0.234375 0.78125 0.671875 1.359375q0.421875 0.5625 1.03125 0.890625q0.609375 0.328125 1.390625 0.328125q0.703125 0 1.234375 -0.265625q0.546875 -0.28125 0.953125 -0.78125l0.109375 0.890625l1.953125 0l0 -12.0l-2.171875 0l0 4.296875q-0.390625 -0.4375 -0.90625 -0.671875q-0.5 -0.234375 -1.15625 -0.234375q-0.796875 0 -1.40625 0.3125q-0.609375 0.3125 -1.046875 0.875q-0.421875 0.578125 -0.65625 1.375q-0.21875 0.796875 -0.21875 1.75zm2.171875 0.171875l0 -0.171875q0 -0.53125 0.09375 -0.984375q0.09375 -0.46875 0.296875 -0.828125q0.203125 -0.34375 0.53125 -0.53125q0.328125 -0.203125 0.78125 -0.203125q0.5625 0 0.921875 0.234375q0.375 0.234375 0.59375 0.671875l0 3.421875q-0.21875 0.421875 -0.59375 0.671875q-0.375 0.234375 -0.9375 0.234375q-0.453125 0 -0.78125 -0.1875q-0.3125 -0.203125 -0.515625 -0.546875q-0.203125 -0.34375 -0.296875 -0.796875q-0.09375 -0.453125 -0.09375 -0.984375zm11.7578125 4.28125q1.234375 0 2.125 -0.46875q0.90625 -0.484375 1.3125 -1.078125l-1.078125 -1.171875q-0.375 0.484375 -0.984375 0.734375q-0.59375 0.234375 -1.234375 0.234375q-0.453125 0 -0.84375 -0.140625q-0.375 -0.140625 -0.671875 -0.390625q-0.296875 -0.25 -0.46875 -0.5625q-0.171875 -0.3125 -0.28125 -0.8125l0 -0.03125l5.734375 0l0 -0.921875q0 -0.921875 -0.265625 -1.6875q-0.25 -0.78125 -0.734375 -1.328125q-0.5 -0.546875 -1.21875 -0.84375q-0.71875 -0.296875 -1.625 -0.296875q-0.890625 0 -1.640625 0.328125q-0.75 0.3125 -1.296875 0.890625q-0.546875 0.578125 -0.859375 1.375q-0.296875 0.796875 -0.296875 1.765625l0 0.3125q0 0.859375 0.3125 1.609375q0.3125 0.734375 0.875 1.28125q0.578125 0.5625 1.375 0.890625q0.796875 0.3125 1.765625 0.3125zm-0.25 -7.015625q0.40625 0 0.71875 0.140625q0.328125 0.125 0.546875 0.34375q0.21875 0.21875 0.34375 0.53125q0.125 0.296875 0.125 0.640625l0 0.171875l-3.5625 0q0.078125 -0.40625 0.234375 -0.734375q0.15625 -0.34375 0.390625 -0.59375q0.21875 -0.234375 0.515625 -0.359375q0.3125 -0.140625 0.6875 -0.140625zm12.9296875 8.5625l0 -1.703125l-7.125 0l0 1.703125l7.125 0zm2.8828125 -13.703125l0 1.765625l2.53125 0l0 8.46875l-2.53125 0l0 1.765625l7.140625 0l0 -1.765625l-2.421875 0l0 -10.234375l-4.71875 0zm8.8515625 7.6875l0 0.171875q0 0.921875 0.265625 1.71875q0.265625 0.78125 0.78125 1.359375q0.5 0.578125 1.25 0.90625q0.75 0.328125 1.703125 0.328125q0.9375 0 1.671875 -0.328125q0.75 -0.328125 1.265625 -0.90625q0.5 -0.578125 0.765625 -1.359375q0.265625 -0.796875 0.265625 -1.71875l0 -0.171875q0 -0.921875 -0.265625 -1.703125q-0.265625 -0.78125 -0.765625 -1.359375q-0.515625 -0.578125 -1.265625 -0.90625q-0.734375 -0.328125 -1.6875 -0.328125q-0.953125 0 -1.703125 0.328125q-0.734375 0.328125 -1.234375 0.90625q-0.515625 0.578125 -0.78125 1.359375q-0.265625 0.78125 -0.265625 1.703125zm2.171875 0.171875l0 -0.171875q0 -0.515625 0.09375 -0.96875q0.109375 -0.46875 0.328125 -0.8125q0.21875 -0.359375 0.5625 -0.5625q0.34375 -0.203125 0.828125 -0.203125q0.484375 0 0.828125 0.203125q0.34375 0.203125 0.5625 0.5625q0.21875 0.34375 0.3125 0.8125q0.109375 0.453125 0.109375 0.96875l0 0.171875q0 0.53125 -0.109375 1.0q-0.09375 0.453125 -0.3125 0.796875q-0.21875 0.359375 -0.5625 0.5625q-0.34375 0.203125 -0.8125 0.203125q-0.5 0 -0.84375 -0.203125q-0.34375 -0.203125 -0.5625 -0.5625q-0.21875 -0.34375 -0.328125 -0.796875q-0.09375 -0.46875 -0.09375 -1.0zm8.3515625 4.140625l1.90625 0l0.890625 -4.515625l0.171875 -0.859375l0.1875 0.859375l0.921875 4.515625l1.90625 0l1.515625 -8.453125l-1.984375 0l-0.625 4.984375l-0.109375 0.8125l-0.15625 -0.796875l-0.90625 -5.0l-1.453125 0l-0.921875 5.015625l-0.140625 0.8125l-0.109375 -0.8125l-0.625 -5.015625l-1.984375 0l1.515625 8.453125zm13.0703125 0.15625q1.234375 0 2.125 -0.46875q0.90625 -0.484375 1.3125 -1.078125l-1.078125 -1.171875q-0.375 0.484375 -0.984375 0.734375q-0.59375 0.234375 -1.234375 0.234375q-0.453125 0 -0.84375 -0.140625q-0.375 -0.140625 -0.671875 -0.390625q-0.296875 -0.25 -0.46875 -0.5625q-0.171875 -0.3125 -0.28125 -0.8125l0 -0.03125l5.734375 0l0 -0.921875q0 -0.921875 -0.265625 -1.6875q-0.25 -0.78125 -0.734375 -1.328125q-0.5 -0.546875 -1.21875 -0.84375q-0.71875 -0.296875 -1.625 -0.296875q-0.890625 0 -1.640625 0.328125q-0.75 0.3125 -1.296875 0.890625q-0.546875 0.578125 -0.859375 1.375q-0.296875 0.796875 -0.296875 1.765625l0 0.3125q0 0.859375 0.3125 1.609375q0.3125 0.734375 0.875 1.28125q0.578125 0.5625 1.375 0.890625q0.796875 0.3125 1.765625 0.3125zm-0.25 -7.015625q0.40625 0 0.71875 0.140625q0.328125 0.125 0.546875 0.34375q0.21875 0.21875 0.34375 0.53125q0.125 0.296875 0.125 0.640625l0 0.171875l-3.5625 0q0.078125 -0.40625 0.234375 -0.734375q0.15625 -0.34375 0.390625 -0.59375q0.21875 -0.234375 0.515625 -0.359375q0.3125 -0.140625 0.6875 -0.140625zm11.8203125 -1.75q-0.84375 0 -1.5625 0.40625q-0.71875 0.40625 -1.234375 1.125l-0.015625 -0.1875l-0.09375 -1.1875l-2.015625 0l0 8.453125l2.171875 0l0 -5.078125q0.140625 -0.359375 0.359375 -0.625q0.234375 -0.265625 0.5625 -0.4375q0.265625 -0.15625 0.59375 -0.21875q0.34375 -0.078125 0.765625 -0.078125q0.40625 0 0.859375 0.046875q0.453125 0.046875 0.875 0.15625l0.328125 -2.171875q-0.265625 -0.078125 -0.6875 -0.140625q-0.40625 -0.0625 -0.90625 -0.0625zm7.6328125 8.765625q1.234375 0 2.125 -0.46875q0.90625 -0.484375 1.3125 -1.078125l-1.078125 -1.171875q-0.375 0.484375 -0.984375 0.734375q-0.59375 0.234375 -1.234375 0.234375q-0.453125 0 -0.84375 -0.140625q-0.375 -0.140625 -0.671875 -0.390625q-0.296875 -0.25 -0.46875 -0.5625q-0.171875 -0.3125 -0.28125 -0.8125l0 -0.03125l5.734375 0l0 -0.921875q0 -0.921875 -0.265625 -1.6875q-0.25 -0.78125 -0.734375 -1.328125q-0.5 -0.546875 -1.21875 -0.84375q-0.71875 -0.296875 -1.625 -0.296875q-0.890625 0 -1.640625 0.328125q-0.75 0.3125 -1.296875 0.890625q-0.546875 0.578125 -0.859375 1.375q-0.296875 0.796875 -0.296875 1.765625l0 0.3125q0 0.859375 0.3125 1.609375q0.3125 0.734375 0.875 1.28125q0.578125 0.5625 1.375 0.890625q0.796875 0.3125 1.765625 0.3125zm-0.25 -7.015625q0.40625 0 0.71875 0.140625q0.328125 0.125 0.546875 0.34375q0.21875 0.21875 0.34375 0.53125q0.125 0.296875 0.125 0.640625l0 0.171875l-3.5625 0q0.078125 -0.40625 0.234375 -0.734375q0.15625 -0.34375 0.390625 -0.59375q0.21875 -0.234375 0.515625 -0.359375q0.3125 -0.140625 0.6875 -0.140625zm5.5234375 2.5625l0 0.171875q0 0.921875 0.21875 1.703125q0.234375 0.78125 0.671875 1.359375q0.421875 0.5625 1.03125 0.890625q0.609375 0.328125 1.390625 0.328125q0.703125 0 1.234375 -0.265625q0.546875 -0.28125 0.953125 -0.78125l0.109375 0.890625l1.953125 0l0 -12.0l-2.171875 0l0 4.296875q-0.390625 -0.4375 -0.90625 -0.671875q-0.5 -0.234375 -1.15625 -0.234375q-0.796875 0 -1.40625 0.3125q-0.609375 0.3125 -1.046875 0.875q-0.421875 0.578125 -0.65625 1.375q-0.21875 0.796875 -0.21875 1.75zm2.171875 0.171875l0 -0.171875q0 -0.53125 0.09375 -0.984375q0.09375 -0.46875 0.296875 -0.828125q0.203125 -0.34375 0.53125 -0.53125q0.328125 -0.203125 0.78125 -0.203125q0.5625 0 0.921875 0.234375q0.375 0.234375 0.59375 0.671875l0 3.421875q-0.21875 0.421875 -0.59375 0.671875q-0.375 0.234375 -0.9375 0.234375q-0.453125 0 -0.78125 -0.1875q-0.3125 -0.203125 -0.515625 -0.546875q-0.203125 -0.34375 -0.296875 -0.796875q-0.09375 -0.453125 -0.09375 -0.984375z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m352.0 537.22833l314.96063 0l0 38.771667l-314.96063 0z" fill-rule="evenodd"/><path fill="#38761d" d="m370.14062 555.1508q0.03125 -1.078125 -0.234375 -1.96875q-0.25 -0.890625 -0.75 -1.53125q-0.515625 -0.640625 -1.265625 -0.984375q-0.734375 -0.34375 -1.65625 -0.34375q-1.0625 0 -1.953125 0.46875q-0.875 0.453125 -1.515625 1.265625q-0.65625 0.8125 -1.03125 1.90625q-0.359375 1.078125 -0.40625 2.3125q-0.03125 1.234375 0.203125 2.234375q0.25 1.0 0.796875 1.6875q0.53125 0.703125 1.359375 1.078125q0.828125 0.375 1.96875 0.375q0.265625 0 0.546875 -0.046875q0.296875 -0.03125 0.59375 -0.109375q0.28125 -0.0625 0.546875 -0.171875q0.265625 -0.109375 0.46875 -0.265625l-0.25 -1.046875q-0.1875 0.09375 -0.40625 0.171875q-0.21875 0.078125 -0.46875 0.125q-0.25 0.0625 -0.515625 0.09375q-0.25 0.03125 -0.484375 0.03125q-0.765625 0 -1.359375 -0.25q-0.59375 -0.265625 -0.96875 -0.78125q-0.390625 -0.515625 -0.578125 -1.296875q-0.1875 -0.78125 -0.140625 -1.828125q0.03125 -1.046875 0.28125 -1.90625q0.265625 -0.875 0.71875 -1.515625q0.4375 -0.625 1.078125 -0.984375q0.640625 -0.359375 1.4375 -0.359375q0.703125 0 1.234375 0.21875q0.546875 0.21875 0.90625 0.671875q0.359375 0.453125 0.515625 1.15625q0.171875 0.703125 0.140625 1.65625q0 0.609375 -0.0625 1.140625q-0.0625 0.515625 -0.1875 0.890625q-0.125 0.375 -0.296875 0.59375q-0.171875 0.203125 -0.375 0.203125q-0.078125 0 -0.15625 -0.015625q-0.078125 -0.015625 -0.125 -0.078125q-0.0625 -0.0625 -0.09375 -0.1875q-0.03125 -0.125 -0.015625 -0.359375l0.34375 -4.109375q-0.25 -0.25 -0.640625 -0.421875q-0.390625 -0.171875 -0.984375 -0.171875q-0.59375 0 -1.078125 0.25q-0.46875 0.234375 -0.828125 0.6875q-0.34375 0.46875 -0.578125 1.109375q-0.21875 0.640625 -0.296875 1.4375q-0.0625 0.671875 0.015625 1.234375q0.078125 0.546875 0.296875 0.9375q0.21875 0.40625 0.5625 0.625q0.359375 0.203125 0.828125 0.203125q0.234375 0 0.4375 -0.078125q0.21875 -0.078125 0.40625 -0.203125q0.125 -0.09375 0.234375 -0.21875q0.125 -0.125 0.234375 -0.265625q0.078125 0.171875 0.1875 0.3125q0.109375 0.125 0.25 0.21875q0.15625 0.125 0.359375 0.1875q0.203125 0.0625 0.4375 0.0625q0.59375 0 1.015625 -0.359375q0.4375 -0.359375 0.71875 -0.9375q0.28125 -0.5625 0.421875 -1.28125q0.140625 -0.734375 0.15625 -1.46875zm-5.21875 1.03125q0.0625 -0.5625 0.171875 -0.984375q0.125 -0.421875 0.3125 -0.71875q0.1875 -0.296875 0.453125 -0.4375q0.265625 -0.15625 0.609375 -0.15625q0.09375 0 0.140625 0.015625q0.0625 0 0.09375 0.015625l-0.296875 3.484375l0 0.109375q-0.078125 0.109375 -0.15625 0.1875q-0.0625 0.078125 -0.125 0.125q-0.125 0.09375 -0.25 0.125q-0.109375 0.03125 -0.234375 0.03125q-0.1875 0 -0.34375 -0.109375q-0.140625 -0.125 -0.234375 -0.359375q-0.09375 -0.21875 -0.125 -0.546875q-0.03125 -0.34375 -0.015625 -0.78125zm10.5234375 3.828125q-0.515625 0 -0.859375 -0.203125q-0.34375 -0.21875 -0.53125 -0.578125q-0.203125 -0.34375 -0.28125 -0.796875q-0.078125 -0.453125 -0.078125 -0.953125l0 -0.234375q0 -0.484375 0.078125 -0.9375q0.078125 -0.453125 0.28125 -0.796875q0.203125 -0.359375 0.53125 -0.5625q0.34375 -0.21875 0.859375 -0.21875q0.34375 0 0.640625 0.125q0.296875 0.125 0.515625 0.328125q0.203125 0.203125 0.3125 0.484375q0.109375 0.28125 0.09375 0.59375l2.046875 0q0.015625 -0.75 -0.25 -1.359375q-0.265625 -0.609375 -0.734375 -1.03125q-0.484375 -0.421875 -1.15625 -0.65625q-0.65625 -0.234375 -1.4375 -0.234375q-0.984375 0 -1.71875 0.34375q-0.734375 0.328125 -1.234375 0.90625q-0.5 0.578125 -0.75 1.359375q-0.25 0.765625 -0.25 1.65625l0 0.234375q0 0.890625 0.25 1.671875q0.25 0.78125 0.75 1.359375q0.5 0.578125 1.234375 0.921875q0.75 0.328125 1.734375 0.328125q0.71875 0 1.375 -0.234375q0.65625 -0.234375 1.140625 -0.640625q0.5 -0.40625 0.78125 -0.96875q0.28125 -0.5625 0.265625 -1.21875l-2.046875 0q0.015625 0.296875 -0.109375 0.546875q-0.109375 0.234375 -0.328125 0.40625q-0.21875 0.171875 -0.515625 0.265625q-0.28125 0.09375 -0.609375 0.09375zm5.5703125 -2.734375l0 0.171875q0 0.921875 0.265625 1.71875q0.265625 0.78125 0.78125 1.359375q0.5 0.578125 1.25 0.90625q0.75 0.328125 1.703125 0.328125q0.9375 0 1.671875 -0.328125q0.75 -0.328125 1.265625 -0.90625q0.5 -0.578125 0.765625 -1.359375q0.265625 -0.796875 0.265625 -1.71875l0 -0.171875q0 -0.921875 -0.265625 -1.703125q-0.265625 -0.78125 -0.765625 -1.359375q-0.515625 -0.578125 -1.265625 -0.90625q-0.734375 -0.328125 -1.6875 -0.328125q-0.953125 0 -1.703125 0.328125q-0.734375 0.328125 -1.234375 0.90625q-0.515625 0.578125 -0.78125 1.359375q-0.265625 0.78125 -0.265625 1.703125zm2.171875 0.171875l0 -0.171875q0 -0.515625 0.09375 -0.96875q0.109375 -0.46875 0.328125 -0.8125q0.21875 -0.359375 0.5625 -0.5625q0.34375 -0.203125 0.828125 -0.203125q0.484375 0 0.828125 0.203125q0.34375 0.203125 0.5625 0.5625q0.21875 0.34375 0.3125 0.8125q0.109375 0.453125 0.109375 0.96875l0 0.171875q0 0.53125 -0.109375 1.0q-0.09375 0.453125 -0.3125 0.796875q-0.21875 0.359375 -0.5625 0.5625q-0.34375 0.203125 -0.8125 0.203125q-0.5 0 -0.84375 -0.203125q-0.34375 -0.203125 -0.5625 -0.5625q-0.21875 -0.34375 -0.328125 -0.796875q-0.09375 -0.46875 -0.09375 -1.0zm7.4921875 -0.15625l0 0.171875q0 0.921875 0.21875 1.703125q0.234375 0.78125 0.671875 1.359375q0.421875 0.5625 1.03125 0.890625q0.609375 0.328125 1.390625 0.328125q0.703125 0 1.234375 -0.265625q0.546875 -0.28125 0.953125 -0.78125l0.109375 0.890625l1.953125 0l0 -12.0l-2.171875 0l0 4.296875q-0.390625 -0.4375 -0.90625 -0.671875q-0.5 -0.234375 -1.15625 -0.234375q-0.796875 0 -1.40625 0.3125q-0.609375 0.3125 -1.046875 0.875q-0.421875 0.578125 -0.65625 1.375q-0.21875 0.796875 -0.21875 1.75zm2.171875 0.171875l0 -0.171875q0 -0.53125 0.09375 -0.984375q0.09375 -0.46875 0.296875 -0.828125q0.203125 -0.34375 0.53125 -0.53125q0.328125 -0.203125 0.78125 -0.203125q0.5625 0 0.921875 0.234375q0.375 0.234375 0.59375 0.671875l0 3.421875q-0.21875 0.421875 -0.59375 0.671875q-0.375 0.234375 -0.9375 0.234375q-0.453125 0 -0.78125 -0.1875q-0.3125 -0.203125 -0.515625 -0.546875q-0.203125 -0.34375 -0.296875 -0.796875q-0.09375 -0.453125 -0.09375 -0.984375zm11.7578125 4.28125q1.234375 0 2.125 -0.46875q0.90625 -0.484375 1.3125 -1.078125l-1.078125 -1.171875q-0.375 0.484375 -0.984375 0.734375q-0.59375 0.234375 -1.234375 0.234375q-0.453125 0 -0.84375 -0.140625q-0.375 -0.140625 -0.671875 -0.390625q-0.296875 -0.25 -0.46875 -0.5625q-0.171875 -0.3125 -0.28125 -0.8125l0 -0.03125l5.734375 0l0 -0.921875q0 -0.921875 -0.265625 -1.6875q-0.25 -0.78125 -0.734375 -1.328125q-0.5 -0.546875 -1.21875 -0.84375q-0.71875 -0.296875 -1.625 -0.296875q-0.890625 0 -1.640625 0.328125q-0.75 0.3125 -1.296875 0.890625q-0.546875 0.578125 -0.859375 1.375q-0.296875 0.796875 -0.296875 1.765625l0 0.3125q0 0.859375 0.3125 1.609375q0.3125 0.734375 0.875 1.28125q0.578125 0.5625 1.375 0.890625q0.796875 0.3125 1.765625 0.3125zm-0.25 -7.015625q0.40625 0 0.71875 0.140625q0.328125 0.125 0.546875 0.34375q0.21875 0.21875 0.34375 0.53125q0.125 0.296875 0.125 0.640625l0 0.171875l-3.5625 0q0.078125 -0.40625 0.234375 -0.734375q0.15625 -0.34375 0.390625 -0.59375q0.21875 -0.234375 0.515625 -0.359375q0.3125 -0.140625 0.6875 -0.140625zm12.9296875 8.5625l0 -1.703125l-7.125 0l0 1.703125l7.125 0zm2.4296875 -1.703125l2.171875 0l0 -6.015625q0.109375 -0.171875 0.234375 -0.3125q0.140625 -0.140625 0.3125 -0.234375q0.203125 -0.140625 0.453125 -0.203125q0.265625 -0.078125 0.578125 -0.078125q0.375 0 0.671875 0.078125q0.296875 0.0625 0.515625 0.25q0.203125 0.1875 0.3125 0.515625q0.125 0.328125 0.125 0.828125l0 5.171875l2.171875 0l0 -5.203125q0 -0.921875 -0.21875 -1.5625q-0.21875 -0.65625 -0.59375 -1.0625q-0.390625 -0.40625 -0.921875 -0.59375q-0.53125 -0.1875 -1.171875 -0.1875q-0.5 0 -0.953125 0.140625q-0.4375 0.140625 -0.8125 0.40625q-0.21875 0.15625 -0.421875 0.359375q-0.1875 0.1875 -0.359375 0.421875l-0.125 -1.171875l-1.96875 0l0 8.453125zm14.8828125 0l2.203125 0l0 -0.140625q-0.171875 -0.3125 -0.265625 -0.75q-0.078125 -0.453125 -0.078125 -1.140625l0 -3.625q0 -0.734375 -0.28125 -1.28125q-0.265625 -0.5625 -0.75 -0.9375q-0.484375 -0.359375 -1.140625 -0.546875q-0.65625 -0.1875 -1.421875 -0.1875q-0.859375 0 -1.515625 0.21875q-0.65625 0.203125 -1.109375 0.578125q-0.46875 0.359375 -0.703125 0.84375q-0.234375 0.484375 -0.234375 1.03125l2.171875 0q0 -0.234375 0.0625 -0.421875q0.0625 -0.1875 0.203125 -0.3125q0.15625 -0.15625 0.40625 -0.234375q0.25 -0.078125 0.59375 -0.078125q0.390625 0 0.6875 0.09375q0.296875 0.09375 0.484375 0.28125q0.1875 0.171875 0.28125 0.40625q0.09375 0.234375 0.09375 0.53125l0 0.5l-1.203125 0q-0.9375 0 -1.671875 0.171875q-0.71875 0.171875 -1.203125 0.484375q-0.546875 0.359375 -0.828125 0.90625q-0.265625 0.53125 -0.265625 1.234375q0 0.546875 0.21875 1.015625q0.21875 0.46875 0.609375 0.796875q0.390625 0.34375 0.921875 0.53125q0.546875 0.1875 1.203125 0.1875q0.390625 0 0.734375 -0.078125q0.34375 -0.0625 0.640625 -0.203125q0.28125 -0.125 0.515625 -0.296875q0.234375 -0.171875 0.40625 -0.375q0.046875 0.234375 0.09375 0.4375q0.0625 0.203125 0.140625 0.359375zm-2.078125 -1.53125q-0.296875 0 -0.53125 -0.0625q-0.234375 -0.078125 -0.390625 -0.203125q-0.15625 -0.140625 -0.234375 -0.328125q-0.078125 -0.1875 -0.078125 -0.421875q0 -0.28125 0.109375 -0.53125q0.109375 -0.25 0.328125 -0.421875q0.234375 -0.171875 0.59375 -0.265625q0.359375 -0.109375 0.859375 -0.109375l1.109375 0l0 1.453125q-0.09375 0.171875 -0.265625 0.328125q-0.15625 0.15625 -0.375 0.28125q-0.234375 0.125 -0.515625 0.203125q-0.28125 0.078125 -0.609375 0.078125zm10.3828125 -9.0l-2.171875 0l0 2.078125l-1.9375 0l0 1.59375l1.9375 0l0 3.84375q0 0.84375 0.21875 1.4375q0.21875 0.59375 0.625 0.984375q0.390625 0.390625 0.953125 0.578125q0.578125 0.1875 1.265625 0.1875q0.359375 0 0.734375 -0.046875q0.375 -0.03125 0.71875 -0.09375q0.34375 -0.0625 0.640625 -0.15625q0.3125 -0.109375 0.53125 -0.234375l-0.203125 -1.5q-0.15625 0.046875 -0.359375 0.09375q-0.203125 0.03125 -0.4375 0.0625q-0.25 0.03125 -0.515625 0.0625q-0.25 0.015625 -0.5 0.015625q-0.34375 0 -0.625 -0.078125q-0.28125 -0.078125 -0.46875 -0.265625q-0.203125 -0.171875 -0.3125 -0.484375q-0.09375 -0.3125 -0.09375 -0.765625l0 -3.640625l3.15625 0l0 -1.59375l-3.15625 0l0 -2.078125zm6.1953125 2.078125l0 1.765625l2.40625 0l0 4.921875l-2.40625 0l0 1.765625l6.890625 0l0 -1.765625l-2.296875 0l0 -6.6875l-4.59375 0zm2.25 -2.171875q0 0.25 0.078125 0.46875q0.09375 0.21875 0.265625 0.359375q0.15625 0.15625 0.390625 0.25q0.234375 0.078125 0.515625 0.078125q0.578125 0 0.90625 -0.3125q0.34375 -0.328125 0.34375 -0.84375q0 -0.5 -0.34375 -0.828125q-0.328125 -0.328125 -0.90625 -0.328125q-0.28125 0 -0.515625 0.09375q-0.234375 0.078125 -0.390625 0.234375q-0.171875 0.15625 -0.265625 0.375q-0.078125 0.203125 -0.078125 0.453125zm9.4609375 10.625l2.03125 0l3.21875 -8.453125l-2.265625 0l-1.859375 5.953125l-0.109375 0.59375l-0.125 -0.59375l-1.859375 -5.953125l-2.265625 0l3.234375 8.453125zm11.0078125 0.15625q1.234375 0 2.125 -0.46875q0.90625 -0.484375 1.3125 -1.078125l-1.078125 -1.171875q-0.375 0.484375 -0.984375 0.734375q-0.59375 0.234375 -1.234375 0.234375q-0.453125 0 -0.84375 -0.140625q-0.375 -0.140625 -0.671875 -0.390625q-0.296875 -0.25 -0.46875 -0.5625q-0.171875 -0.3125 -0.28125 -0.8125l0 -0.03125l5.734375 0l0 -0.921875q0 -0.921875 -0.265625 -1.6875q-0.25 -0.78125 -0.734375 -1.328125q-0.5 -0.546875 -1.21875 -0.84375q-0.71875 -0.296875 -1.625 -0.296875q-0.890625 0 -1.640625 0.328125q-0.75 0.3125 -1.296875 0.890625q-0.546875 0.578125 -0.859375 1.375q-0.296875 0.796875 -0.296875 1.765625l0 0.3125q0 0.859375 0.3125 1.609375q0.3125 0.734375 0.875 1.28125q0.578125 0.5625 1.375 0.890625q0.796875 0.3125 1.765625 0.3125zm-0.25 -7.015625q0.40625 0 0.71875 0.140625q0.328125 0.125 0.546875 0.34375q0.21875 0.21875 0.34375 0.53125q0.125 0.296875 0.125 0.640625l0 0.171875l-3.5625 0q0.078125 -0.40625 0.234375 -0.734375q0.15625 -0.34375 0.390625 -0.59375q0.21875 -0.234375 0.515625 -0.359375q0.3125 -0.140625 0.6875 -0.140625z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m552.0 536.0l314.96063 0l0 38.771667l-314.96063 0z" fill-rule="evenodd"/><path fill="#38761d" d="m570.1406 553.9225q0.03125 -1.078125 -0.234375 -1.96875q-0.25 -0.890625 -0.75 -1.53125q-0.515625 -0.640625 -1.265625 -0.984375q-0.734375 -0.34375 -1.65625 -0.34375q-1.0625 0 -1.953125 0.46875q-0.875 0.453125 -1.515625 1.265625q-0.65625 0.8125 -1.03125 1.90625q-0.359375 1.078125 -0.40625 2.3125q-0.03125 1.234375 0.203125 2.234375q0.25 1.0 0.796875 1.6875q0.53125 0.703125 1.359375 1.078125q0.828125 0.375 1.96875 0.375q0.265625 0 0.546875 -0.046875q0.296875 -0.03125 0.59375 -0.109375q0.28125 -0.0625 0.546875 -0.171875q0.265625 -0.109375 0.46875 -0.265625l-0.25 -1.046875q-0.1875 0.09375 -0.40625 0.171875q-0.21875 0.078125 -0.46875 0.125q-0.25 0.0625 -0.515625 0.09375q-0.25 0.03125 -0.484375 0.03125q-0.765625 0 -1.359375 -0.25q-0.59375 -0.265625 -0.96875 -0.78125q-0.390625 -0.515625 -0.578125 -1.296875q-0.1875 -0.78125 -0.140625 -1.828125q0.03125 -1.046875 0.28125 -1.90625q0.265625 -0.875 0.71875 -1.515625q0.4375 -0.625 1.078125 -0.984375q0.640625 -0.359375 1.4375 -0.359375q0.703125 0 1.234375 0.21875q0.546875 0.21875 0.90625 0.671875q0.359375 0.453125 0.515625 1.15625q0.171875 0.703125 0.140625 1.65625q0 0.609375 -0.0625 1.140625q-0.0625 0.515625 -0.1875 0.890625q-0.125 0.375 -0.296875 0.59375q-0.171875 0.203125 -0.375 0.203125q-0.078125 0 -0.15625 -0.015625q-0.078125 -0.015625 -0.125 -0.078125q-0.0625 -0.0625 -0.09375 -0.1875q-0.03125 -0.125 -0.015625 -0.359375l0.34375 -4.109375q-0.25 -0.25 -0.640625 -0.421875q-0.390625 -0.171875 -0.984375 -0.171875q-0.59375 0 -1.078125 0.25q-0.46875 0.234375 -0.828125 0.6875q-0.34375 0.46875 -0.578125 1.109375q-0.21875 0.640625 -0.296875 1.4375q-0.0625 0.671875 0.015625 1.234375q0.078125 0.546875 0.296875 0.9375q0.21875 0.40625 0.5625 0.625q0.359375 0.203125 0.828125 0.203125q0.234375 0 0.4375 -0.078125q0.21875 -0.078125 0.40625 -0.203125q0.125 -0.09375 0.234375 -0.21875q0.125 -0.125 0.234375 -0.265625q0.078125 0.171875 0.1875 0.3125q0.109375 0.125 0.25 0.21875q0.15625 0.125 0.359375 0.1875q0.203125 0.0625 0.4375 0.0625q0.59375 0 1.015625 -0.359375q0.4375 -0.359375 0.71875 -0.9375q0.28125 -0.5625 0.421875 -1.28125q0.140625 -0.734375 0.15625 -1.46875zm-5.21875 1.03125q0.0625 -0.5625 0.171875 -0.984375q0.125 -0.421875 0.3125 -0.71875q0.1875 -0.296875 0.453125 -0.4375q0.265625 -0.15625 0.609375 -0.15625q0.09375 0 0.140625 0.015625q0.0625 0 0.09375 0.015625l-0.296875 3.484375l0 0.109375q-0.078125 0.109375 -0.15625 0.1875q-0.0625 0.078125 -0.125 0.125q-0.125 0.09375 -0.25 0.125q-0.109375 0.03125 -0.234375 0.03125q-0.1875 0 -0.34375 -0.109375q-0.140625 -0.125 -0.234375 -0.359375q-0.09375 -0.21875 -0.125 -0.546875q-0.03125 -0.34375 -0.015625 -0.78125zm10.5234375 3.828125q-0.515625 0 -0.859375 -0.203125q-0.34375 -0.21875 -0.53125 -0.578125q-0.203125 -0.34375 -0.28125 -0.796875q-0.078125 -0.453125 -0.078125 -0.953125l0 -0.234375q0 -0.484375 0.078125 -0.9375q0.078125 -0.453125 0.28125 -0.796875q0.203125 -0.359375 0.53125 -0.5625q0.34375 -0.21875 0.859375 -0.21875q0.34375 0 0.640625 0.125q0.296875 0.125 0.515625 0.328125q0.203125 0.203125 0.3125 0.484375q0.109375 0.28125 0.09375 0.59375l2.046875 0q0.015625 -0.75 -0.25 -1.359375q-0.265625 -0.609375 -0.734375 -1.03125q-0.484375 -0.421875 -1.15625 -0.65625q-0.65625 -0.234375 -1.4375 -0.234375q-0.984375 0 -1.71875 0.34375q-0.734375 0.328125 -1.234375 0.90625q-0.5 0.578125 -0.75 1.359375q-0.25 0.765625 -0.25 1.65625l0 0.234375q0 0.890625 0.25 1.671875q0.25 0.78125 0.75 1.359375q0.5 0.578125 1.234375 0.921875q0.75 0.328125 1.734375 0.328125q0.71875 0 1.375 -0.234375q0.65625 -0.234375 1.140625 -0.640625q0.5 -0.40625 0.78125 -0.96875q0.28125 -0.5625 0.265625 -1.21875l-2.046875 0q0.015625 0.296875 -0.109375 0.546875q-0.109375 0.234375 -0.328125 0.40625q-0.21875 0.171875 -0.515625 0.265625q-0.28125 0.09375 -0.609375 0.09375zm5.5703125 -2.734375l0 0.171875q0 0.921875 0.265625 1.71875q0.265625 0.78125 0.78125 1.359375q0.5 0.578125 1.25 0.90625q0.75 0.328125 1.703125 0.328125q0.9375 0 1.671875 -0.328125q0.75 -0.328125 1.265625 -0.90625q0.5 -0.578125 0.765625 -1.359375q0.265625 -0.796875 0.265625 -1.71875l0 -0.171875q0 -0.921875 -0.265625 -1.703125q-0.265625 -0.78125 -0.765625 -1.359375q-0.515625 -0.578125 -1.265625 -0.90625q-0.734375 -0.328125 -1.6875 -0.328125q-0.953125 0 -1.703125 0.328125q-0.734375 0.328125 -1.234375 0.90625q-0.515625 0.578125 -0.78125 1.359375q-0.265625 0.78125 -0.265625 1.703125zm2.171875 0.171875l0 -0.171875q0 -0.515625 0.09375 -0.96875q0.109375 -0.46875 0.328125 -0.8125q0.21875 -0.359375 0.5625 -0.5625q0.34375 -0.203125 0.828125 -0.203125q0.484375 0 0.828125 0.203125q0.34375 0.203125 0.5625 0.5625q0.21875 0.34375 0.3125 0.8125q0.109375 0.453125 0.109375 0.96875l0 0.171875q0 0.53125 -0.109375 1.0q-0.09375 0.453125 -0.3125 0.796875q-0.21875 0.359375 -0.5625 0.5625q-0.34375 0.203125 -0.8125 0.203125q-0.5 0 -0.84375 -0.203125q-0.34375 -0.203125 -0.5625 -0.5625q-0.21875 -0.34375 -0.328125 -0.796875q-0.09375 -0.46875 -0.09375 -1.0zm7.4921875 -0.15625l0 0.171875q0 0.921875 0.21875 1.703125q0.234375 0.78125 0.671875 1.359375q0.421875 0.5625 1.03125 0.890625q0.609375 0.328125 1.390625 0.328125q0.703125 0 1.234375 -0.265625q0.546875 -0.28125 0.953125 -0.78125l0.109375 0.890625l1.953125 0l0 -12.0l-2.171875 0l0 4.296875q-0.390625 -0.4375 -0.90625 -0.671875q-0.5 -0.234375 -1.15625 -0.234375q-0.796875 0 -1.40625 0.3125q-0.609375 0.3125 -1.046875 0.875q-0.421875 0.578125 -0.65625 1.375q-0.21875 0.796875 -0.21875 1.75zm2.171875 0.171875l0 -0.171875q0 -0.53125 0.09375 -0.984375q0.09375 -0.46875 0.296875 -0.828125q0.203125 -0.34375 0.53125 -0.53125q0.328125 -0.203125 0.78125 -0.203125q0.5625 0 0.921875 0.234375q0.375 0.234375 0.59375 0.671875l0 3.421875q-0.21875 0.421875 -0.59375 0.671875q-0.375 0.234375 -0.9375 0.234375q-0.453125 0 -0.78125 -0.1875q-0.3125 -0.203125 -0.515625 -0.546875q-0.203125 -0.34375 -0.296875 -0.796875q-0.09375 -0.453125 -0.09375 -0.984375zm11.7578125 4.28125q1.234375 0 2.125 -0.46875q0.90625 -0.484375 1.3125 -1.078125l-1.078125 -1.171875q-0.375 0.484375 -0.984375 0.734375q-0.59375 0.234375 -1.234375 0.234375q-0.453125 0 -0.84375 -0.140625q-0.375 -0.140625 -0.671875 -0.390625q-0.296875 -0.25 -0.46875 -0.5625q-0.171875 -0.3125 -0.28125 -0.8125l0 -0.03125l5.734375 0l0 -0.921875q0 -0.921875 -0.265625 -1.6875q-0.25 -0.78125 -0.734375 -1.328125q-0.5 -0.546875 -1.21875 -0.84375q-0.71875 -0.296875 -1.625 -0.296875q-0.890625 0 -1.640625 0.328125q-0.75 0.3125 -1.296875 0.890625q-0.546875 0.578125 -0.859375 1.375q-0.296875 0.796875 -0.296875 1.765625l0 0.3125q0 0.859375 0.3125 1.609375q0.3125 0.734375 0.875 1.28125q0.578125 0.5625 1.375 0.890625q0.796875 0.3125 1.765625 0.3125zm-0.25 -7.015625q0.40625 0 0.71875 0.140625q0.328125 0.125 0.546875 0.34375q0.21875 0.21875 0.34375 0.53125q0.125 0.296875 0.125 0.640625l0 0.171875l-3.5625 0q0.078125 -0.40625 0.234375 -0.734375q0.15625 -0.34375 0.390625 -0.59375q0.21875 -0.234375 0.515625 -0.359375q0.3125 -0.140625 0.6875 -0.140625zm12.9296875 8.5625l0 -1.703125l-7.125 0l0 1.703125l7.125 0zm2.8828125 -13.703125l0 1.765625l2.53125 0l0 8.46875l-2.53125 0l0 1.765625l7.140625 0l0 -1.765625l-2.421875 0l0 -10.234375l-4.71875 0zm9.6015625 0l0 1.765625l2.53125 0l0 8.46875l-2.53125 0l0 1.765625l7.140625 0l0 -1.765625l-2.421875 0l0 -10.234375l-4.71875 0zm11.8359375 12.0l2.03125 0l3.21875 -8.453125l-2.265625 0l-1.859375 5.953125l-0.109375 0.59375l-0.125 -0.59375l-1.859375 -5.953125l-2.265625 0l3.234375 8.453125zm8.2578125 -8.453125l-1.859375 0l0 8.453125l1.96875 0l0 -6.328125q0.046875 -0.109375 0.09375 -0.1875q0.046875 -0.09375 0.109375 -0.15625q0.078125 -0.078125 0.1875 -0.125q0.109375 -0.046875 0.25 -0.046875q0.125 0 0.234375 0.03125q0.109375 0.03125 0.203125 0.125q0.078125 0.078125 0.125 0.234375q0.046875 0.15625 0.046875 0.40625l0 6.046875l1.90625 0l0 -6.234375q0 -0.09375 0 -0.140625q0 -0.0625 0 -0.078125q0.046875 -0.09375 0.09375 -0.15625q0.0625 -0.0625 0.125 -0.125q0.09375 -0.046875 0.1875 -0.078125q0.09375 -0.03125 0.21875 -0.03125q0.125 0 0.234375 0.03125q0.109375 0.03125 0.203125 0.125q0.078125 0.09375 0.125 0.25q0.0625 0.15625 0.0625 0.390625l0 6.046875l1.96875 0l0 -6.03125q0 -0.703125 -0.140625 -1.1875q-0.125 -0.5 -0.375 -0.8125q-0.25 -0.296875 -0.578125 -0.4375q-0.328125 -0.140625 -0.71875 -0.140625q-0.296875 0 -0.5625 0.078125q-0.25 0.078125 -0.453125 0.21875q-0.171875 0.109375 -0.3125 0.265625q-0.125 0.140625 -0.234375 0.328125q-0.078125 -0.1875 -0.171875 -0.34375q-0.09375 -0.15625 -0.234375 -0.265625q-0.171875 -0.140625 -0.40625 -0.203125q-0.234375 -0.078125 -0.53125 -0.078125q-0.609375 0 -1.03125 0.3125q-0.421875 0.3125 -0.671875 0.859375l-0.0625 -1.015625z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m746.96063 537.22833l314.96063 0l0 38.771667l-314.96063 0z" fill-rule="evenodd"/><path fill="#38761d" d="m765.10126 555.1508q0.03125 -1.078125 -0.234375 -1.96875q-0.25 -0.890625 -0.75 -1.53125q-0.515625 -0.640625 -1.265625 -0.984375q-0.734375 -0.34375 -1.65625 -0.34375q-1.0625 0 -1.953125 0.46875q-0.875 0.453125 -1.515625 1.265625q-0.65625 0.8125 -1.03125 1.90625q-0.359375 1.078125 -0.40625 2.3125q-0.03125 1.234375 0.203125 2.234375q0.25 1.0 0.796875 1.6875q0.53125 0.703125 1.359375 1.078125q0.828125 0.375 1.96875 0.375q0.265625 0 0.546875 -0.046875q0.296875 -0.03125 0.59375 -0.109375q0.28125 -0.0625 0.546875 -0.171875q0.265625 -0.109375 0.46875 -0.265625l-0.25 -1.046875q-0.1875 0.09375 -0.40625 0.171875q-0.21875 0.078125 -0.46875 0.125q-0.25 0.0625 -0.515625 0.09375q-0.25 0.03125 -0.484375 0.03125q-0.765625 0 -1.359375 -0.25q-0.59375 -0.265625 -0.96875 -0.78125q-0.390625 -0.515625 -0.578125 -1.296875q-0.1875 -0.78125 -0.140625 -1.828125q0.03125 -1.046875 0.28125 -1.90625q0.265625 -0.875 0.71875 -1.515625q0.4375 -0.625 1.078125 -0.984375q0.640625 -0.359375 1.4375 -0.359375q0.703125 0 1.234375 0.21875q0.546875 0.21875 0.90625 0.671875q0.359375 0.453125 0.515625 1.15625q0.171875 0.703125 0.140625 1.65625q0 0.609375 -0.0625 1.140625q-0.0625 0.515625 -0.1875 0.890625q-0.125 0.375 -0.296875 0.59375q-0.171875 0.203125 -0.375 0.203125q-0.078125 0 -0.15625 -0.015625q-0.078125 -0.015625 -0.125 -0.078125q-0.0625 -0.0625 -0.09375 -0.1875q-0.03125 -0.125 -0.015625 -0.359375l0.34375 -4.109375q-0.25 -0.25 -0.640625 -0.421875q-0.390625 -0.171875 -0.984375 -0.171875q-0.59375 0 -1.078125 0.25q-0.46875 0.234375 -0.828125 0.6875q-0.34375 0.46875 -0.578125 1.109375q-0.21875 0.640625 -0.296875 1.4375q-0.0625 0.671875 0.015625 1.234375q0.078125 0.546875 0.296875 0.9375q0.21875 0.40625 0.5625 0.625q0.359375 0.203125 0.828125 0.203125q0.234375 0 0.4375 -0.078125q0.21875 -0.078125 0.40625 -0.203125q0.125 -0.09375 0.234375 -0.21875q0.125 -0.125 0.234375 -0.265625q0.078125 0.171875 0.1875 0.3125q0.109375 0.125 0.25 0.21875q0.15625 0.125 0.359375 0.1875q0.203125 0.0625 0.4375 0.0625q0.59375 0 1.015625 -0.359375q0.4375 -0.359375 0.71875 -0.9375q0.28125 -0.5625 0.421875 -1.28125q0.140625 -0.734375 0.15625 -1.46875zm-5.21875 1.03125q0.0625 -0.5625 0.171875 -0.984375q0.125 -0.421875 0.3125 -0.71875q0.1875 -0.296875 0.453125 -0.4375q0.265625 -0.15625 0.609375 -0.15625q0.09375 0 0.140625 0.015625q0.0625 0 0.09375 0.015625l-0.296875 3.484375l0 0.109375q-0.078125 0.109375 -0.15625 0.1875q-0.0625 0.078125 -0.125 0.125q-0.125 0.09375 -0.25 0.125q-0.109375 0.03125 -0.234375 0.03125q-0.1875 0 -0.34375 -0.109375q-0.140625 -0.125 -0.234375 -0.359375q-0.09375 -0.21875 -0.125 -0.546875q-0.03125 -0.34375 -0.015625 -0.78125zm10.5234375 3.828125q-0.515625 0 -0.859375 -0.203125q-0.34375 -0.21875 -0.53125 -0.578125q-0.203125 -0.34375 -0.28125 -0.796875q-0.078125 -0.453125 -0.078125 -0.953125l0 -0.234375q0 -0.484375 0.078125 -0.9375q0.078125 -0.453125 0.28125 -0.796875q0.203125 -0.359375 0.53125 -0.5625q0.34375 -0.21875 0.859375 -0.21875q0.34375 0 0.640625 0.125q0.296875 0.125 0.515625 0.328125q0.203125 0.203125 0.3125 0.484375q0.109375 0.28125 0.09375 0.59375l2.046875 0q0.015625 -0.75 -0.25 -1.359375q-0.265625 -0.609375 -0.734375 -1.03125q-0.484375 -0.421875 -1.15625 -0.65625q-0.65625 -0.234375 -1.4375 -0.234375q-0.984375 0 -1.71875 0.34375q-0.734375 0.328125 -1.234375 0.90625q-0.5 0.578125 -0.75 1.359375q-0.25 0.765625 -0.25 1.65625l0 0.234375q0 0.890625 0.25 1.671875q0.25 0.78125 0.75 1.359375q0.5 0.578125 1.234375 0.921875q0.75 0.328125 1.734375 0.328125q0.71875 0 1.375 -0.234375q0.65625 -0.234375 1.140625 -0.640625q0.5 -0.40625 0.78125 -0.96875q0.28125 -0.5625 0.265625 -1.21875l-2.046875 0q0.015625 0.296875 -0.109375 0.546875q-0.109375 0.234375 -0.328125 0.40625q-0.21875 0.171875 -0.515625 0.265625q-0.28125 0.09375 -0.609375 0.09375zm5.5703125 -2.734375l0 0.171875q0 0.921875 0.265625 1.71875q0.265625 0.78125 0.78125 1.359375q0.5 0.578125 1.25 0.90625q0.75 0.328125 1.703125 0.328125q0.9375 0 1.671875 -0.328125q0.75 -0.328125 1.265625 -0.90625q0.5 -0.578125 0.765625 -1.359375q0.265625 -0.796875 0.265625 -1.71875l0 -0.171875q0 -0.921875 -0.265625 -1.703125q-0.265625 -0.78125 -0.765625 -1.359375q-0.515625 -0.578125 -1.265625 -0.90625q-0.734375 -0.328125 -1.6875 -0.328125q-0.953125 0 -1.703125 0.328125q-0.734375 0.328125 -1.234375 0.90625q-0.515625 0.578125 -0.78125 1.359375q-0.265625 0.78125 -0.265625 1.703125zm2.171875 0.171875l0 -0.171875q0 -0.515625 0.09375 -0.96875q0.109375 -0.46875 0.328125 -0.8125q0.21875 -0.359375 0.5625 -0.5625q0.34375 -0.203125 0.828125 -0.203125q0.484375 0 0.828125 0.203125q0.34375 0.203125 0.5625 0.5625q0.21875 0.34375 0.3125 0.8125q0.109375 0.453125 0.109375 0.96875l0 0.171875q0 0.53125 -0.109375 1.0q-0.09375 0.453125 -0.3125 0.796875q-0.21875 0.359375 -0.5625 0.5625q-0.34375 0.203125 -0.8125 0.203125q-0.5 0 -0.84375 -0.203125q-0.34375 -0.203125 -0.5625 -0.5625q-0.21875 -0.34375 -0.328125 -0.796875q-0.09375 -0.46875 -0.09375 -1.0zm7.4921875 -0.15625l0 0.171875q0 0.921875 0.21875 1.703125q0.234375 0.78125 0.671875 1.359375q0.421875 0.5625 1.03125 0.890625q0.609375 0.328125 1.390625 0.328125q0.703125 0 1.234375 -0.265625q0.546875 -0.28125 0.953125 -0.78125l0.109375 0.890625l1.953125 0l0 -12.0l-2.171875 0l0 4.296875q-0.390625 -0.4375 -0.90625 -0.671875q-0.5 -0.234375 -1.15625 -0.234375q-0.796875 0 -1.40625 0.3125q-0.609375 0.3125 -1.046875 0.875q-0.421875 0.578125 -0.65625 1.375q-0.21875 0.796875 -0.21875 1.75zm2.171875 0.171875l0 -0.171875q0 -0.53125 0.09375 -0.984375q0.09375 -0.46875 0.296875 -0.828125q0.203125 -0.34375 0.53125 -0.53125q0.328125 -0.203125 0.78125 -0.203125q0.5625 0 0.921875 0.234375q0.375 0.234375 0.59375 0.671875l0 3.421875q-0.21875 0.421875 -0.59375 0.671875q-0.375 0.234375 -0.9375 0.234375q-0.453125 0 -0.78125 -0.1875q-0.3125 -0.203125 -0.515625 -0.546875q-0.203125 -0.34375 -0.296875 -0.796875q-0.09375 -0.453125 -0.09375 -0.984375zm11.7578125 4.28125q1.234375 0 2.125 -0.46875q0.90625 -0.484375 1.3125 -1.078125l-1.078125 -1.171875q-0.375 0.484375 -0.984375 0.734375q-0.59375 0.234375 -1.234375 0.234375q-0.453125 0 -0.84375 -0.140625q-0.375 -0.140625 -0.671875 -0.390625q-0.296875 -0.25 -0.46875 -0.5625q-0.171875 -0.3125 -0.28125 -0.8125l0 -0.03125l5.734375 0l0 -0.921875q0 -0.921875 -0.265625 -1.6875q-0.25 -0.78125 -0.734375 -1.328125q-0.5 -0.546875 -1.21875 -0.84375q-0.71875 -0.296875 -1.625 -0.296875q-0.890625 0 -1.640625 0.328125q-0.75 0.3125 -1.296875 0.890625q-0.546875 0.578125 -0.859375 1.375q-0.296875 0.796875 -0.296875 1.765625l0 0.3125q0 0.859375 0.3125 1.609375q0.3125 0.734375 0.875 1.28125q0.578125 0.5625 1.375 0.890625q0.796875 0.3125 1.765625 0.3125zm-0.25 -7.015625q0.40625 0 0.71875 0.140625q0.328125 0.125 0.546875 0.34375q0.21875 0.21875 0.34375 0.53125q0.125 0.296875 0.125 0.640625l0 0.171875l-3.5625 0q0.078125 -0.40625 0.234375 -0.734375q0.15625 -0.34375 0.390625 -0.59375q0.21875 -0.234375 0.515625 -0.359375q0.3125 -0.140625 0.6875 -0.140625zm12.9296875 8.5625l0 -1.703125l-7.125 0l0 1.703125l7.125 0zm6.4140625 -12.234375l-2.171875 0l0 2.078125l-1.9375 0l0 1.59375l1.9375 0l0 3.84375q0 0.84375 0.21875 1.4375q0.21875 0.59375 0.625 0.984375q0.390625 0.390625 0.953125 0.578125q0.578125 0.1875 1.265625 0.1875q0.359375 0 0.734375 -0.046875q0.375 -0.03125 0.71875 -0.09375q0.34375 -0.0625 0.640625 -0.15625q0.3125 -0.109375 0.53125 -0.234375l-0.203125 -1.5q-0.15625 0.046875 -0.359375 0.09375q-0.203125 0.03125 -0.4375 0.0625q-0.25 0.03125 -0.515625 0.0625q-0.25 0.015625 -0.5 0.015625q-0.34375 0 -0.625 -0.078125q-0.28125 -0.078125 -0.46875 -0.265625q-0.203125 -0.171875 -0.3125 -0.484375q-0.09375 -0.3125 -0.09375 -0.765625l0 -3.640625l3.15625 0l0 -1.59375l-3.15625 0l0 -2.078125zm6.9453125 13.953125q0.65625 0 1.140625 -0.21875q0.484375 -0.21875 0.8125 -0.546875q0.34375 -0.3125 0.5625 -0.6875q0.21875 -0.375 0.359375 -0.703125l4.296875 -9.71875l-2.40625 0l-1.921875 4.96875l-0.328125 0.84375l-0.234375 -0.859375l-1.9375 -4.953125l-2.375 0l3.578125 8.09375l-0.5 0.9375q-0.09375 0.171875 -0.15625 0.265625q-0.0625 0.09375 -0.203125 0.28125q-0.140625 0.1875 -0.34375 0.34375q-0.203125 0.15625 -0.453125 0.15625q-0.125 0 -0.34375 -0.015625q-0.203125 0 -0.328125 0l-0.265625 1.65625q0.28125 0.078125 0.5 0.109375q0.234375 0.046875 0.546875 0.046875zm8.3046875 -0.171875l2.171875 0l0 -3.984375q0.140625 0.15625 0.296875 0.296875q0.171875 0.125 0.359375 0.234375q0.296875 0.171875 0.65625 0.265625q0.359375 0.09375 0.78125 0.09375q0.796875 0 1.40625 -0.328125q0.625 -0.328125 1.046875 -0.90625q0.40625 -0.578125 0.625 -1.359375q0.21875 -0.796875 0.21875 -1.71875l0 -0.15625q0 -0.96875 -0.21875 -1.75q-0.21875 -0.796875 -0.640625 -1.375q-0.421875 -0.5625 -1.046875 -0.859375q-0.609375 -0.3125 -1.40625 -0.3125q-0.390625 0 -0.734375 0.078125q-0.34375 0.078125 -0.625 0.234375q-0.234375 0.125 -0.4375 0.296875q-0.203125 0.15625 -0.375 0.375l-0.078125 -0.828125l-2.0 0l0 11.703125zm5.390625 -7.5625l0 0.15625q0 0.53125 -0.09375 1.0q-0.09375 0.453125 -0.296875 0.8125q-0.203125 0.34375 -0.53125 0.546875q-0.328125 0.203125 -0.78125 0.203125q-0.296875 0 -0.53125 -0.046875q-0.234375 -0.0625 -0.40625 -0.171875q-0.1875 -0.125 -0.34375 -0.28125q-0.140625 -0.171875 -0.234375 -0.375l0 -3.53125q0.109375 -0.21875 0.25 -0.375q0.15625 -0.171875 0.359375 -0.28125q0.1875 -0.09375 0.40625 -0.140625q0.21875 -0.046875 0.484375 -0.046875q0.453125 0 0.78125 0.203125q0.328125 0.1875 0.53125 0.515625q0.203125 0.34375 0.296875 0.8125q0.109375 0.46875 0.109375 1.0zm8.2734375 4.46875q1.234375 0 2.125 -0.46875q0.90625 -0.484375 1.3125 -1.078125l-1.078125 -1.171875q-0.375 0.484375 -0.984375 0.734375q-0.59375 0.234375 -1.234375 0.234375q-0.453125 0 -0.84375 -0.140625q-0.375 -0.140625 -0.671875 -0.390625q-0.296875 -0.25 -0.46875 -0.5625q-0.171875 -0.3125 -0.28125 -0.8125l0 -0.03125l5.734375 0l0 -0.921875q0 -0.921875 -0.265625 -1.6875q-0.25 -0.78125 -0.734375 -1.328125q-0.5 -0.546875 -1.21875 -0.84375q-0.71875 -0.296875 -1.625 -0.296875q-0.890625 0 -1.640625 0.328125q-0.75 0.3125 -1.296875 0.890625q-0.546875 0.578125 -0.859375 1.375q-0.296875 0.796875 -0.296875 1.765625l0 0.3125q0 0.859375 0.3125 1.609375q0.3125 0.734375 0.875 1.28125q0.578125 0.5625 1.375 0.890625q0.796875 0.3125 1.765625 0.3125zm-0.25 -7.015625q0.40625 0 0.71875 0.140625q0.328125 0.125 0.546875 0.34375q0.21875 0.21875 0.34375 0.53125q0.125 0.296875 0.125 0.640625l0 0.171875l-3.5625 0q0.078125 -0.40625 0.234375 -0.734375q0.15625 -0.34375 0.390625 -0.59375q0.21875 -0.234375 0.515625 -0.359375q0.3125 -0.140625 0.6875 -0.140625zm5.5234375 2.5625l0 0.171875q0 0.921875 0.21875 1.703125q0.234375 0.78125 0.671875 1.359375q0.421875 0.5625 1.03125 0.890625q0.609375 0.328125 1.390625 0.328125q0.703125 0 1.234375 -0.265625q0.546875 -0.28125 0.953125 -0.78125l0.109375 0.890625l1.953125 0l0 -12.0l-2.171875 0l0 4.296875q-0.390625 -0.4375 -0.90625 -0.671875q-0.5 -0.234375 -1.15625 -0.234375q-0.796875 0 -1.40625 0.3125q-0.609375 0.3125 -1.046875 0.875q-0.421875 0.578125 -0.65625 1.375q-0.21875 0.796875 -0.21875 1.75zm2.171875 0.171875l0 -0.171875q0 -0.53125 0.09375 -0.984375q0.09375 -0.46875 0.296875 -0.828125q0.203125 -0.34375 0.53125 -0.53125q0.328125 -0.203125 0.78125 -0.203125q0.5625 0 0.921875 0.234375q0.375 0.234375 0.59375 0.671875l0 3.421875q-0.21875 0.421875 -0.59375 0.671875q-0.375 0.234375 -0.9375 0.234375q-0.453125 0 -0.78125 -0.1875q-0.3125 -0.203125 -0.515625 -0.546875q-0.203125 -0.34375 -0.296875 -0.796875q-0.09375 -0.453125 -0.09375 -0.984375z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m414.34647 224.09448l0 79.90552" fill-rule="evenodd"/><path stroke="#ff0000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m414.34647 224.09448l0 67.90552" fill-rule="evenodd"/><path fill="#ff0000" stroke="#ff0000" stroke-width="2.0" stroke-linecap="butt" d="m411.043 292.0l3.3034668 9.076202l3.3034668 -9.076202z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m456.0 344.04724l116.69293 0" fill-rule="evenodd"/><path stroke="#ff0000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m456.0 344.04724l104.69293 0" fill-rule="evenodd"/><path fill="#ff0000" stroke="#ff0000" stroke-width="2.0" stroke-linecap="butt" d="m560.69293 347.3507l9.076172 -3.3034668l-9.076172 -3.3034668z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m656.0 344.04724l112.0 0" fill-rule="evenodd"/><path stroke="#ff0000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m656.0 344.04724l100.0 0" fill-rule="evenodd"/><path fill="#ff0000" stroke="#ff0000" stroke-width="2.0" stroke-linecap="butt" d="m756.0 347.3507l9.076172 -3.3034668l-9.076172 -3.3034668z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m809.65356 384.09448l0 79.811035" fill-rule="evenodd"/><path stroke="#ff0000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m809.65356 384.09448l0 67.811035" fill-rule="evenodd"/><path fill="#ff0000" stroke="#ff0000" stroke-width="2.0" stroke-linecap="butt" d="m806.3501 451.90552l3.3034668 9.076202l3.3034668 -9.076202z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m768.0 503.95276l-116.69293 0" fill-rule="evenodd"/><path stroke="#ff0000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m768.0 503.95276l-104.69293 0" fill-rule="evenodd"/><path fill="#ff0000" stroke="#ff0000" stroke-width="2.0" stroke-linecap="butt" d="m663.30707 500.6493l-9.076172 3.3034668l9.076172 3.3034668z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m568.0 503.95276l-112.0 0" fill-rule="evenodd"/><path stroke="#ff0000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m568.0 503.95276l-100.0 0" fill-rule="evenodd"/><path fill="#ff0000" stroke="#ff0000" stroke-width="2.0" stroke-linecap="butt" d="m468.0 500.6493l-9.076202 3.3034668l9.076202 3.3034668z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m372.6929 503.95276l-199.9055 0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m372.6929 503.95276l-187.9055 0" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="2.0" stroke-linecap="butt" d="m184.7874 500.6493l-9.076187 3.3034668l9.076187 3.3034668z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m172.7874 343.95145l199.9055 0.09448242" fill-rule="evenodd"/><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m172.7874 343.95145l187.9055 0.08883667" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="2.0" stroke-linecap="butt" d="m360.69135 347.34372l9.077759 -3.2991638l-9.0746155 -3.3077698z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m120.7874 224.09317l0 79.811035" fill-rule="evenodd"/><path stroke="#0000ff" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m120.7874 224.09317l0 67.811035" fill-rule="evenodd"/><path fill="#0000ff" stroke="#0000ff" stroke-width="2.0" stroke-linecap="butt" d="m117.48394 291.9042l3.3034592 9.076202l3.3034668 -9.076202z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m120.7874 383.9987l0 79.90552" fill-rule="evenodd"/><path stroke="#0000ff" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m120.7874 383.9987l0 67.90552" fill-rule="evenodd"/><path fill="#0000ff" stroke="#0000ff" stroke-width="2.0" stroke-linecap="butt" d="m117.48394 451.9042l3.3034592 9.076202l3.3034668 -9.076202z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m458.29922 152.0l117.700775 0l0 61.417328l-117.700775 0z" fill-rule="evenodd"/><path fill="#ff0000" d="m493.8437 177.64l2.578125 -12.40625l2.046875 0l1.25 8.046875q0.25 1.578125 0.328125 3.03125q0.515625 -1.25 1.71875 -3.390625l4.34375 -7.6875l2.078125 0l-2.59375 12.40625l-1.625 0l1.296875 -6.03125q0.4375 -2.09375 1.28125 -4.6875q-0.53125 1.203125 -1.3125 2.609375l-4.625 8.109375l-1.59375 0l-1.25 -7.953125q-0.171875 -1.09375 -0.25 -2.546875q-0.203125 1.640625 -0.40625 2.59375l-1.65625 7.90625l-1.609375 0zm20.309448 -1.109375q-0.796875 0.671875 -1.546875 1.0q-0.734375 0.3125 -1.5625 0.3125q-1.25 0 -2.015625 -0.734375q-0.75 -0.734375 -0.75 -1.875q0 -0.75 0.34375 -1.328125q0.34375 -0.578125 0.90625 -0.921875q0.5625 -0.359375 1.390625 -0.515625q0.515625 -0.09375 1.953125 -0.15625q1.4375 -0.0625 2.0625 -0.3125q0.1875 -0.625 0.1875 -1.03125q0 -0.53125 -0.390625 -0.84375q-0.53125 -0.421875 -1.5625 -0.421875q-0.953125 0 -1.578125 0.4375q-0.609375 0.421875 -0.890625 1.203125l-1.546875 -0.140625q0.46875 -1.328125 1.5 -2.03125q1.03125 -0.71875 2.59375 -0.71875q1.671875 0 2.640625 0.796875q0.75 0.59375 0.75 1.53125q0 0.71875 -0.21875 1.671875l-0.5 2.234375q-0.234375 1.0625 -0.234375 1.734375q0 0.421875 0.1875 1.21875l-1.546875 0q-0.125 -0.4375 -0.171875 -1.109375zm0.5625 -3.4375q-0.328125 0.125 -0.703125 0.1875q-0.359375 0.0625 -1.21875 0.140625q-1.34375 0.125 -1.890625 0.3125q-0.546875 0.171875 -0.828125 0.578125q-0.28125 0.390625 -0.28125 0.875q0 0.640625 0.4375 1.0625q0.453125 0.40625 1.28125 0.40625q0.75 0 1.453125 -0.390625q0.703125 -0.40625 1.109375 -1.125q0.40625 -0.71875 0.640625 -2.046875zm9.21521 1.265625l1.53125 0.171875q-0.578125 1.671875 -1.640625 2.5q-1.0625 0.8125 -2.421875 0.8125q-1.453125 0 -2.375 -0.9375q-0.90625 -0.953125 -0.90625 -2.65625q0 -1.46875 0.578125 -2.890625q0.59375 -1.421875 1.671875 -2.15625q1.09375 -0.75 2.5 -0.75q1.4375 0 2.28125 0.8125q0.84375 0.8125 0.84375 2.171875l-1.5 0.09375q-0.015625 -0.859375 -0.5 -1.328125q-0.484375 -0.484375 -1.28125 -0.484375q-0.921875 0 -1.59375 0.578125q-0.671875 0.578125 -1.0625 1.78125q-0.390625 1.1875 -0.390625 2.28125q0 1.15625 0.5 1.734375q0.515625 0.5625 1.265625 0.5625q0.75 0 1.421875 -0.5625q0.6875 -0.5625 1.078125 -1.734375zm2.4609375 3.28125l1.875 -8.984375l1.359375 0l-0.390625 1.828125q0.703125 -1.03125 1.359375 -1.53125q0.671875 -0.5 1.359375 -0.5q0.453125 0 1.125 0.328125l-0.625 1.421875q-0.390625 -0.28125 -0.875 -0.28125q-0.796875 0 -1.640625 0.890625q-0.84375 0.890625 -1.328125 3.21875l-0.765625 3.609375l-1.453125 0zm6.0360107 -3.40625q0 -2.640625 1.546875 -4.359375q1.28125 -1.421875 3.359375 -1.421875q1.625 0 2.609375 1.015625q1.0 1.015625 1.0 2.734375q0 1.546875 -0.625 2.890625q-0.625 1.328125 -1.78125 2.046875q-1.15625 0.703125 -2.4375 0.703125q-1.046875 0 -1.90625 -0.4375q-0.859375 -0.453125 -1.3125 -1.265625q-0.453125 -0.828125 -0.453125 -1.90625zm1.53125 -0.15625q0 1.265625 0.609375 1.921875q0.609375 0.65625 1.546875 0.65625q0.5 0 0.96875 -0.1875q0.484375 -0.203125 0.890625 -0.609375q0.421875 -0.40625 0.71875 -0.921875q0.296875 -0.53125 0.46875 -1.125q0.265625 -0.84375 0.265625 -1.609375q0 -1.21875 -0.625 -1.890625q-0.609375 -0.671875 -1.53125 -0.671875q-0.71875 0 -1.3125 0.34375q-0.59375 0.34375 -1.078125 1.0q-0.46875 0.65625 -0.703125 1.53125q-0.21875 0.875 -0.21875 1.5625z" fill-rule="nonzero"/><path fill="#ff0000" d="m495.89673 195.39313l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.265625 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.6875 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125zm6.887085 5.28125l3.921875 -4.5625l-2.25 -4.421875l1.6875 0l0.765625 1.59375q0.421875 0.90625 0.765625 1.71875l2.59375 -3.3125l1.859375 0l-3.765625 4.546875l2.265625 4.4375l-1.6875 0l-0.90625 -1.828125q-0.28125 -0.578125 -0.65625 -1.453125l-2.671875 3.28125l-1.921875 0zm8.5234375 3.4375l2.59375 -12.421875l1.40625 0l-0.25 1.25q0.78125 -0.8125 1.390625 -1.125q0.625 -0.328125 1.328125 -0.328125q1.28125 0 2.125 0.9375q0.84375 0.9375 0.84375 2.671875q0 1.40625 -0.46875 2.5625q-0.453125 1.15625 -1.140625 1.859375q-0.671875 0.703125 -1.375 1.03125q-0.6875 0.328125 -1.40625 0.328125q-1.625 0 -2.5 -1.640625l-1.015625 4.875l-1.53125 0zm3.015625 -7.109375q0 1.0 0.15625 1.390625q0.203125 0.5625 0.6875 0.90625q0.484375 0.328125 1.125 0.328125q1.3125 0 2.125 -1.484375q0.8125 -1.484375 0.8125 -3.03125q0 -1.140625 -0.546875 -1.765625q-0.546875 -0.625 -1.359375 -0.625q-0.578125 0 -1.078125 0.3125q-0.5 0.3125 -0.9375 0.921875q-0.421875 0.609375 -0.703125 1.5q-0.28125 0.890625 -0.28125 1.546875zm13.43396 2.5625q-0.796875 0.671875 -1.546875 1.0q-0.734375 0.3125 -1.5625 0.3125q-1.25 0 -2.015625 -0.734375q-0.75 -0.734375 -0.75 -1.875q0 -0.75 0.34375 -1.328125q0.34375 -0.578125 0.90625 -0.921875q0.5625 -0.359375 1.390625 -0.515625q0.515625 -0.09375 1.953125 -0.15625q1.4375 -0.0625 2.0625 -0.3125q0.1875 -0.625 0.1875 -1.03125q0 -0.53125 -0.390625 -0.84375q-0.53125 -0.421875 -1.5625 -0.421875q-0.953125 0 -1.578125 0.4375q-0.609375 0.421875 -0.890625 1.203125l-1.546875 -0.140625q0.46875 -1.328125 1.5 -2.03125q1.03125 -0.71875 2.59375 -0.71875q1.671875 0 2.640625 0.796875q0.75 0.59375 0.75 1.53125q0 0.71875 -0.21875 1.671875l-0.5 2.234375q-0.234375 1.0625 -0.234375 1.734375q0 0.421875 0.1875 1.21875l-1.546875 0q-0.125 -0.4375 -0.171875 -1.109375zm0.5625 -3.4375q-0.328125 0.125 -0.703125 0.1875q-0.359375 0.0625 -1.21875 0.140625q-1.34375 0.125 -1.890625 0.3125q-0.546875 0.171875 -0.828125 0.578125q-0.28125 0.390625 -0.28125 0.875q0 0.640625 0.4375 1.0625q0.453125 0.40625 1.28125 0.40625q0.75 0 1.453125 -0.390625q0.703125 -0.40625 1.109375 -1.125q0.40625 -0.71875 0.640625 -2.046875zm3.012085 4.546875l1.875 -8.984375l1.390625 0l-0.328125 1.5625q0.90625 -0.921875 1.6875 -1.34375q0.78125 -0.421875 1.609375 -0.421875q1.09375 0 1.703125 0.59375q0.625 0.59375 0.625 1.578125q0 0.5 -0.21875 1.578125l-1.140625 5.4375l-1.53125 0l1.203125 -5.6875q0.171875 -0.828125 0.171875 -1.234375q0 -0.453125 -0.3125 -0.71875q-0.3125 -0.28125 -0.890625 -0.28125q-1.171875 0 -2.09375 0.84375q-0.921875 0.84375 -1.34375 2.890625l-0.875 4.1875l-1.53125 0zm15.699585 -1.296875q-1.3125 1.5 -2.75 1.5q-1.28125 0 -2.140625 -0.9375q-0.84375 -0.953125 -0.84375 -2.75q0 -1.640625 0.671875 -2.984375q0.6875 -1.359375 1.6875 -2.03125q1.015625 -0.6875 2.03125 -0.6875q1.671875 0 2.53125 1.609375l1.015625 -4.828125l1.515625 0l-2.578125 12.40625l-1.421875 0l0.28125 -1.296875zm-4.203125 -2.46875q0 0.9375 0.1875 1.484375q0.1875 0.53125 0.625 0.890625q0.453125 0.359375 1.078125 0.359375q1.046875 0 1.890625 -1.078125q1.140625 -1.4375 1.140625 -3.546875q0 -1.0625 -0.5625 -1.65625q-0.5625 -0.609375 -1.40625 -0.609375q-0.546875 0 -1.0 0.25q-0.453125 0.234375 -0.90625 0.828125q-0.4375 0.578125 -0.75 1.484375q-0.296875 0.90625 -0.296875 1.59375z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m656.0 149.98425l112.0 0l0 40.409454l-112.0 0z" fill-rule="evenodd"/><path fill="#ff0000" d="m690.1168 175.62425l2.59375 -12.40625l5.171875 0q1.34375 0 2.015625 0.3125q0.671875 0.3125 1.109375 1.0625q0.453125 0.75 0.453125 1.671875q0 0.765625 -0.3125 1.5625q-0.3125 0.796875 -0.796875 1.3125q-0.46875 0.515625 -0.96875 0.78125q-0.484375 0.265625 -1.046875 0.390625q-1.1875 0.28125 -2.40625 0.28125l-3.109375 0l-1.046875 5.03125l-1.65625 0zm3.0 -6.4375l2.734375 0q1.59375 0 2.328125 -0.34375q0.75 -0.34375 1.1875 -1.046875q0.453125 -0.703125 0.453125 -1.484375q0 -0.609375 -0.234375 -0.984375q-0.234375 -0.390625 -0.671875 -0.578125q-0.421875 -0.1875 -1.65625 -0.1875l-3.171875 0l-0.96875 4.625zm14.448364 5.328125q-0.796875 0.671875 -1.546875 1.0q-0.734375 0.3125 -1.5625 0.3125q-1.25 0 -2.015625 -0.734375q-0.75 -0.734375 -0.75 -1.875q0 -0.75 0.34375 -1.328125q0.34375 -0.578125 0.90625 -0.921875q0.5625 -0.359375 1.390625 -0.515625q0.515625 -0.09375 1.953125 -0.15625q1.4375 -0.0625 2.0625 -0.3125q0.1875 -0.625 0.1875 -1.03125q0 -0.53125 -0.390625 -0.84375q-0.53125 -0.421875 -1.5625 -0.421875q-0.953125 0 -1.578125 0.4375q-0.609375 0.421875 -0.890625 1.203125l-1.546875 -0.140625q0.46875 -1.328125 1.5 -2.03125q1.03125 -0.71875 2.59375 -0.71875q1.671875 0 2.640625 0.796875q0.75 0.59375 0.75 1.53125q0 0.71875 -0.21875 1.671875l-0.5 2.234375q-0.234375 1.0625 -0.234375 1.734375q0 0.421875 0.1875 1.21875l-1.546875 0q-0.125 -0.4375 -0.171875 -1.109375zm0.5625 -3.4375q-0.328125 0.125 -0.703125 0.1875q-0.359375 0.0625 -1.21875 0.140625q-1.34375 0.125 -1.890625 0.3125q-0.546875 0.171875 -0.828125 0.578125q-0.28125 0.390625 -0.28125 0.875q0 0.640625 0.4375 1.0625q0.453125 0.40625 1.28125 0.40625q0.75 0 1.453125 -0.390625q0.703125 -0.40625 1.109375 -1.125q0.40625 -0.71875 0.640625 -2.046875zm3.012085 4.546875l1.875 -8.984375l1.359375 0l-0.390625 1.828125q0.703125 -1.03125 1.359375 -1.53125q0.671875 -0.5 1.359375 -0.5q0.453125 0 1.125 0.328125l-0.625 1.421875q-0.390625 -0.28125 -0.875 -0.28125q-0.796875 0 -1.640625 0.890625q-0.84375 0.890625 -1.328125 3.21875l-0.765625 3.609375l-1.453125 0zm5.9110107 -3.078125l1.53125 -0.09375q0 0.671875 0.203125 1.140625q0.203125 0.453125 0.75 0.75q0.546875 0.296875 1.265625 0.296875q1.015625 0 1.515625 -0.40625q0.515625 -0.40625 0.515625 -0.953125q0 -0.390625 -0.296875 -0.75q-0.3125 -0.359375 -1.53125 -0.875q-1.21875 -0.53125 -1.546875 -0.734375q-0.5625 -0.34375 -0.859375 -0.8125q-0.28125 -0.46875 -0.28125 -1.078125q0 -1.078125 0.84375 -1.828125q0.84375 -0.765625 2.359375 -0.765625q1.703125 0 2.578125 0.78125q0.890625 0.78125 0.921875 2.0625l-1.5 0.09375q-0.03125 -0.8125 -0.578125 -1.28125q-0.53125 -0.46875 -1.53125 -0.46875q-0.78125 0 -1.234375 0.359375q-0.4375 0.359375 -0.4375 0.78125q0 0.421875 0.390625 0.75q0.25 0.21875 1.3125 0.671875q1.75 0.765625 2.203125 1.203125q0.734375 0.703125 0.734375 1.71875q0 0.65625 -0.421875 1.3125q-0.40625 0.640625 -1.25 1.03125q-0.828125 0.375 -1.96875 0.375q-1.5625 0 -2.65625 -0.765625q-1.09375 -0.78125 -1.03125 -2.515625zm15.1171875 0.03125l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.265625 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.6875 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m454.75592 225.76378l0 77.70079l-40.409454 0l0 -77.70079z" fill-rule="evenodd"/><path fill="#ff0000" d="m429.1159 241.70393l12.40625 2.59375l0 1.65625l-11.0 -2.296875l0 6.453125l-1.40625 -0.296875l0 -8.109375zm3.40625 9.793335q2.640625 0 4.359375 1.546875q1.421875 1.28125 1.421875 3.3593597q0 1.6250305 -1.015625 2.6094055q-1.015625 1.0 -2.734375 1.0q-1.546875 0 -2.890625 -0.625q-1.328125 -0.625 -2.046875 -1.7812805q-0.703125 -1.15625 -0.703125 -2.4374847q0 -1.046875 0.4375 -1.90625q0.453125 -0.859375 1.265625 -1.3125q0.828125 -0.453125 1.90625 -0.453125zm0.15625 1.53125q-1.265625 0 -1.921875 0.609375q-0.65625 0.609375 -0.65625 1.546875q0 0.5 0.1875 0.96873474q0.203125 0.484375 0.609375 0.890625q0.40625 0.421875 0.921875 0.71875q0.53125 0.29690552 1.125 0.46878052q0.84375 0.265625 1.609375 0.265625q1.21875 0 1.890625 -0.625q0.671875 -0.6094055 0.671875 -1.5312805q0 -0.71873474 -0.34375 -1.3124847q-0.34375 -0.59375 -1.0 -1.078125q-0.65625 -0.46875 -1.53125 -0.703125q-0.875 -0.21875 -1.5625 -0.21875zm-3.5625 9.54335l8.984375 -0.9375l0 1.46875l-4.03125 0.40625l-2.265625 0.171875q-0.265625 0.015625 -1.03125 0.046875q0.859375 0.25 1.3125 0.421875q0.46875 0.1875 1.328125 0.578125l4.6875 2.140625l0 1.65625l-4.484375 0.296875q-1.03125 0.078125 -2.734375 0.09375q1.125 0.421875 3.140625 1.375l4.078125 1.9375l0 1.53125l-8.984375 -4.40625l0 -1.578125l5.265625 -0.34375q0.5 -0.03125 1.921875 -0.046875q-0.90625 -0.34375 -1.953125 -0.828125l-5.234375 -2.375l0 -1.609375zm3.046875 17.40445l-0.140625 1.5q-1.109375 -0.328125 -2.109375 -1.484375q-1.0 -1.15625 -1.0 -2.75q0 -1.0 0.453125 -1.84375q0.46875 -0.828125 1.34375 -1.265625q0.890625 -0.4375 2.015625 -0.4375q1.46875 0 2.84375 0.6875q1.390625 0.6875 2.0625 1.765625q0.671875 1.078125 0.671875 2.34375q0 1.609375 -1.0 2.5625q-1.0 0.96875 -2.71875 0.96875q-0.65625 0 -1.359375 -0.125l0 -6.609375q-0.265625 -0.046875 -0.46875 -0.046875q-1.265625 0 -1.9375 0.578125q-0.65625 0.59375 -0.65625 1.421875q0 0.796875 0.515625 1.546875q0.515625 0.765625 1.484375 1.1875zm2.234375 -4.453125l0 5.046875q0.234375 0.015625 0.34375 0.015625q1.140625 0 1.75 -0.578125q0.625 -0.578125 0.625 -1.484375q0 -0.984375 -0.6875 -1.796875q-0.671875 -0.796875 -2.03125 -1.203125zm-5.28125 7.49646l8.984375 1.875l0 1.359375l-1.828125 -0.390625q1.03125 0.703125 1.53125 1.359375q0.5 0.671875 0.5 1.359375q0 0.453125 -0.328125 1.125l-1.421875 -0.625q0.28125 -0.390625 0.28125 -0.875q0 -0.796875 -0.890625 -1.640625q-0.890625 -0.84375 -3.21875 -1.328125l-3.609375 -0.765625l0 -1.453125z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m458.29922 311.3701l117.700775 0l0 61.417328l-117.700775 0z" fill-rule="evenodd"/><path fill="#ff0000" d="m500.7974 337.01007l2.609375 -12.40625l1.640625 0l-2.59375 12.40625l-1.65625 0zm4.4080505 0l1.875 -8.984375l1.390625 0l-0.328125 1.5625q0.90625 -0.921875 1.6875 -1.34375q0.78125 -0.421875 1.609375 -0.421875q1.09375 0 1.703125 0.59375q0.625 0.59375 0.625 1.578125q0 0.5 -0.21875 1.578125l-1.140625 5.4375l-1.53125 0l1.203125 -5.6875q0.171875 -0.828125 0.171875 -1.234375q0 -0.453125 -0.3125 -0.71875q-0.3125 -0.28125 -0.890625 -0.28125q-1.171875 0 -2.09375 0.84375q-0.921875 0.84375 -1.34375 2.890625l-0.875 4.1875l-1.53125 0zm9.84021 0l1.640625 -7.796875l-1.375 0l0.25 -1.1875l1.375 0l0.25 -1.28125q0.203125 -0.953125 0.421875 -1.375q0.21875 -0.421875 0.703125 -0.6875q0.484375 -0.28125 1.328125 -0.28125q0.578125 0 1.6875 0.234375l-0.28125 1.34375q-0.78125 -0.203125 -1.296875 -0.203125q-0.453125 0 -0.6875 0.234375q-0.234375 0.21875 -0.390625 1.046875l-0.203125 0.96875l1.703125 0l-0.25 1.1875l-1.703125 0l-1.640625 7.796875l-1.53125 0zm11.204956 -3.046875l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.265625 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.6875 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125zm7.49646 5.28125l1.875 -8.984375l1.359375 0l-0.390625 1.828125q0.703125 -1.03125 1.359375 -1.53125q0.671875 -0.5 1.359375 -0.5q0.453125 0 1.125 0.328125l-0.625 1.421875q-0.390625 -0.28125 -0.875 -0.28125q-0.796875 0 -1.640625 0.890625q-0.84375 0.890625 -1.328125 3.21875l-0.765625 3.609375l-1.453125 0z" fill-rule="nonzero"/><path fill="#ff0000" d="m500.59756 356.5601l-0.25 1.265625q-0.5625 0.140625 -1.078125 0.140625q-0.90625 0 -1.453125 -0.453125q-0.40625 -0.34375 -0.40625 -0.921875q0 -0.296875 0.21875 -1.359375l1.09375 -5.21875l-1.203125 0l0.234375 -1.1875l1.21875 0l0.46875 -2.203125l1.75 -1.0625l-0.6875 3.265625l1.5 0l-0.25 1.1875l-1.5 0l-1.03125 4.96875q-0.203125 0.9375 -0.203125 1.125q0 0.265625 0.15625 0.421875q0.15625 0.140625 0.515625 0.140625q0.5 0 0.90625 -0.109375zm0.65805054 4.71875l0.09375 -1.4375q0.46875 0.140625 0.921875 0.140625q0.46875 0 0.75 -0.21875q0.375 -0.28125 0.8125 -1.046875l0.5 -0.875l-1.5 -9.015625l1.5 0l0.671875 4.546875q0.203125 1.34375 0.359375 2.6875l4.015625 -7.234375l1.59375 0l-5.71875 10.171875q-0.828125 1.5 -1.484375 1.96875q-0.640625 0.484375 -1.46875 0.484375q-0.546875 0 -1.046875 -0.171875zm8.4921875 -0.03125l2.59375 -12.421875l1.40625 0l-0.25 1.25q0.78125 -0.8125 1.390625 -1.125q0.625 -0.328125 1.328125 -0.328125q1.28125 0 2.125 0.9375q0.84375 0.9375 0.84375 2.671875q0 1.40625 -0.46875 2.5625q-0.453125 1.15625 -1.140625 1.859375q-0.671875 0.703125 -1.375 1.03125q-0.6875 0.328125 -1.40625 0.328125q-1.625 0 -2.5 -1.640625l-1.015625 4.875l-1.53125 0zm3.015625 -7.109375q0 1.0 0.15625 1.390625q0.203125 0.5625 0.6875 0.90625q0.484375 0.328125 1.125 0.328125q1.3125 0 2.125 -1.484375q0.8125 -1.484375 0.8125 -3.03125q0 -1.140625 -0.546875 -1.765625q-0.546875 -0.625 -1.359375 -0.625q-0.578125 0 -1.078125 0.3125q-0.5 0.3125 -0.9375 0.921875q-0.421875 0.609375 -0.703125 1.5q-0.28125 0.890625 -0.28125 1.546875zm13.96521 0.625l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.265625 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.6875 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125zm7.637085 2.203125l1.53125 -0.09375q0 0.671875 0.203125 1.140625q0.203125 0.453125 0.75 0.75q0.546875 0.296875 1.265625 0.296875q1.015625 0 1.515625 -0.40625q0.515625 -0.40625 0.515625 -0.953125q0 -0.390625 -0.296875 -0.75q-0.3125 -0.359375 -1.53125 -0.875q-1.21875 -0.53125 -1.546875 -0.734375q-0.5625 -0.34375 -0.859375 -0.8125q-0.28125 -0.46875 -0.28125 -1.078125q0 -1.078125 0.84375 -1.828125q0.84375 -0.765625 2.359375 -0.765625q1.703125 0 2.578125 0.78125q0.890625 0.78125 0.921875 2.0625l-1.5 0.09375q-0.03125 -0.8125 -0.578125 -1.28125q-0.53125 -0.46875 -1.53125 -0.46875q-0.78125 0 -1.234375 0.359375q-0.4375 0.359375 -0.4375 0.78125q0 0.421875 0.390625 0.75q0.25 0.21875 1.3125 0.671875q1.75 0.765625 2.203125 1.203125q0.734375 0.703125 0.734375 1.71875q0 0.65625 -0.421875 1.3125q-0.40625 0.640625 -1.25 1.03125q-0.828125 0.375 -1.96875 0.375q-1.5625 0 -2.65625 -0.765625q-1.09375 -0.78125 -1.03125 -2.515625z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m656.0 311.3701l112.0 0l0 61.417328l-112.0 0z" fill-rule="evenodd"/><path fill="#ff0000" d="m695.88214 332.99445l1.625 -0.140625l-0.015625 0.421875q0 0.71875 0.328125 1.328125q0.328125 0.59375 1.09375 0.921875q0.765625 0.3125 1.8125 0.3125q1.484375 0 2.265625 -0.640625q0.78125 -0.65625 0.78125 -1.5q0 -0.578125 -0.40625 -1.0625q-0.421875 -0.46875 -2.3125 -1.28125q-1.46875 -0.640625 -2.0 -0.984375q-0.84375 -0.546875 -1.234375 -1.1875q-0.390625 -0.65625 -0.390625 -1.484375q0 -0.953125 0.515625 -1.71875q0.53125 -0.765625 1.53125 -1.171875q1.015625 -0.40625 2.28125 -0.40625q1.515625 0 2.546875 0.515625q1.046875 0.5 1.515625 1.34375q0.484375 0.84375 0.484375 1.609375q0 0.078125 -0.015625 0.25l-1.59375 0.140625q0 -0.53125 -0.09375 -0.828125q-0.171875 -0.515625 -0.53125 -0.875q-0.359375 -0.359375 -0.984375 -0.5625q-0.609375 -0.21875 -1.375 -0.21875q-1.359375 0 -2.109375 0.609375q-0.578125 0.46875 -0.578125 1.234375q0 0.453125 0.234375 0.8125q0.234375 0.359375 0.859375 0.703125q0.4375 0.25 2.078125 0.984375q1.34375 0.59375 1.859375 0.921875q0.671875 0.453125 1.03125 1.09375q0.359375 0.640625 0.359375 1.453125q0 1.0 -0.609375 1.859375q-0.609375 0.84375 -1.703125 1.3125q-1.078125 0.46875 -2.46875 0.46875q-2.109375 0 -3.4375 -0.921875q-1.328125 -0.921875 -1.34375 -3.3125zm11.557739 0l1.625 -0.140625l-0.015625 0.421875q0 0.71875 0.328125 1.328125q0.328125 0.59375 1.09375 0.921875q0.765625 0.3125 1.8125 0.3125q1.484375 0 2.265625 -0.640625q0.78125 -0.65625 0.78125 -1.5q0 -0.578125 -0.40625 -1.0625q-0.421875 -0.46875 -2.3125 -1.28125q-1.46875 -0.640625 -2.0 -0.984375q-0.84375 -0.546875 -1.234375 -1.1875q-0.390625 -0.65625 -0.390625 -1.484375q0 -0.953125 0.515625 -1.71875q0.53125 -0.765625 1.53125 -1.171875q1.015625 -0.40625 2.28125 -0.40625q1.515625 0 2.546875 0.515625q1.046875 0.5 1.515625 1.34375q0.484375 0.84375 0.484375 1.609375q0 0.078125 -0.015625 0.25l-1.59375 0.140625q0 -0.53125 -0.09375 -0.828125q-0.171875 -0.515625 -0.53125 -0.875q-0.359375 -0.359375 -0.984375 -0.5625q-0.609375 -0.21875 -1.375 -0.21875q-1.359375 0 -2.109375 0.609375q-0.578125 0.46875 -0.578125 1.234375q0 0.453125 0.234375 0.8125q0.234375 0.359375 0.859375 0.703125q0.4375 0.25 2.078125 0.984375q1.34375 0.59375 1.859375 0.921875q0.671875 0.453125 1.03125 1.09375q0.359375 0.640625 0.359375 1.453125q0 1.0 -0.609375 1.859375q-0.609375 0.84375 -1.703125 1.3125q-1.078125 0.46875 -2.46875 0.46875q-2.109375 0 -3.4375 -0.921875q-1.328125 -0.921875 -1.34375 -3.3125zm9.995239 4.015625l7.03125 -12.40625l1.953125 0l2.046875 12.40625l-1.609375 0l-0.59375 -3.578125l-5.03125 0l-1.984375 3.578125l-1.8125 0zm4.515625 -4.84375l4.109375 0l-0.484375 -3.125q-0.28125 -1.84375 -0.34375 -3.078125q-0.4375 1.0625 -1.25 2.53125l-2.03125 3.671875z" fill-rule="nonzero"/><path fill="#ff0000" d="m690.3692 354.52884l1.53125 0.171875q-0.578125 1.671875 -1.640625 2.5q-1.0625 0.8125 -2.421875 0.8125q-1.453125 0 -2.375 -0.9375q-0.90625 -0.953125 -0.90625 -2.65625q0 -1.46875 0.578125 -2.890625q0.59375 -1.421875 1.671875 -2.15625q1.09375 -0.75 2.5 -0.75q1.4375 0 2.28125 0.8125q0.84375 0.8125 0.84375 2.171875l-1.5 0.09375q-0.015625 -0.859375 -0.5 -1.328125q-0.484375 -0.484375 -1.28125 -0.484375q-0.921875 0 -1.59375 0.578125q-0.671875 0.578125 -1.0625 1.78125q-0.390625 1.1875 -0.390625 2.28125q0 1.15625 0.5 1.734375q0.515625 0.5625 1.265625 0.5625q0.75 0 1.421875 -0.5625q0.6875 -0.5625 1.078125 -1.734375zm2.7265625 -0.125q0 -2.640625 1.546875 -4.359375q1.28125 -1.421875 3.359375 -1.421875q1.625 0 2.609375 1.015625q1.0 1.015625 1.0 2.734375q0 1.546875 -0.625 2.890625q-0.625 1.328125 -1.78125 2.046875q-1.15625 0.703125 -2.4375 0.703125q-1.046875 0 -1.90625 -0.4375q-0.859375 -0.453125 -1.3125 -1.265625q-0.453125 -0.828125 -0.453125 -1.90625zm1.53125 -0.15625q0 1.265625 0.609375 1.921875q0.609375 0.65625 1.546875 0.65625q0.5 0 0.96875 -0.1875q0.484375 -0.203125 0.890625 -0.609375q0.421875 -0.40625 0.71875 -0.921875q0.296875 -0.53125 0.46875 -1.125q0.265625 -0.84375 0.265625 -1.609375q0 -1.21875 -0.625 -1.890625q-0.609375 -0.671875 -1.53125 -0.671875q-0.71875 0 -1.3125 0.34375q-0.59375 0.34375 -1.078125 1.0q-0.46875 0.65625 -0.703125 1.53125q-0.21875 0.875 -0.21875 1.5625zm7.84021 3.5625l1.875 -8.984375l1.390625 0l-0.328125 1.5625q0.90625 -0.921875 1.6875 -1.34375q0.78125 -0.421875 1.609375 -0.421875q1.09375 0 1.703125 0.59375q0.625 0.59375 0.625 1.578125q0 0.5 -0.21875 1.578125l-1.140625 5.4375l-1.53125 0l1.203125 -5.6875q0.171875 -0.828125 0.171875 -1.234375q0 -0.453125 -0.3125 -0.71875q-0.3125 -0.28125 -0.890625 -0.28125q-1.171875 0 -2.09375 0.84375q-0.921875 0.84375 -1.34375 2.890625l-0.875 4.1875l-1.53125 0zm11.90271 0l-1.46875 -8.984375l1.484375 0l0.765625 4.953125q0.125 0.8125 0.3125 2.640625q0.4375 -0.953125 1.125 -2.1875l3.015625 -5.40625l1.609375 0l-5.140625 8.984375l-1.703125 0zm12.9921875 -3.046875l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.265625 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.6875 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125zm7.49646 5.28125l1.875 -8.984375l1.359375 0l-0.390625 1.828125q0.703125 -1.03125 1.359375 -1.53125q0.671875 -0.5 1.359375 -0.5q0.453125 0 1.125 0.328125l-0.625 1.421875q-0.390625 -0.28125 -0.875 -0.28125q-0.796875 0 -1.640625 0.890625q-0.84375 0.890625 -1.328125 3.21875l-0.765625 3.609375l-1.453125 0zm9.348511 -1.25l-0.25 1.265625q-0.5625 0.140625 -1.078125 0.140625q-0.90625 0 -1.453125 -0.453125q-0.40625 -0.34375 -0.40625 -0.921875q0 -0.296875 0.21875 -1.359375l1.09375 -5.21875l-1.203125 0l0.234375 -1.1875l1.21875 0l0.46875 -2.203125l1.75 -1.0625l-0.6875 3.265625l1.5 0l-0.25 1.1875l-1.5 0l-1.03125 4.96875q-0.203125 0.9375 -0.203125 1.125q0 0.265625 0.15625 0.421875q0.15625 0.140625 0.515625 0.140625q0.5 0 0.90625 -0.109375z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m813.10236 376.0l0 96.0l-40.409424 0l0 -96.0z" fill-rule="evenodd"/><path fill="#ff0000" d="m792.64984 391.39566q3.328125 0 5.375 1.921875q2.046875 1.921875 2.046875 4.640625q0 2.328125 -1.515625 3.78125q-1.5 1.46875 -4.03125 1.46875q-1.8125 0 -3.359375 -0.734375q-1.171875 -0.5625 -2.015625 -1.40625q-0.828125 -0.828125 -1.296875 -1.78125q-0.609375 -1.265625 -0.609375 -2.6875q0 -1.5 0.703125 -2.71875q0.71875 -1.21875 1.984375 -1.859375q1.28125 -0.625 2.71875 -0.625zm-0.078125 1.640625q-1.09375 0 -2.015625 0.453125q-0.90625 0.453125 -1.421875 1.34375q-0.515625 0.90625 -0.515625 1.890625q0 0.953125 0.453125 1.8125q0.453125 0.875 1.265625 1.546875q0.8125 0.6875 1.984375 1.078125q1.1875 0.390625 2.328125 0.390625q1.84375 0 2.9375 -1.046875q1.109375 -1.03125 1.109375 -2.546875q0 -1.953125 -1.671875 -3.4375q-1.65625 -1.484375 -4.453125 -1.484375zm-8.546875 10.087769l12.421875 2.59375l0 1.40625l-1.25 -0.25q0.8125 0.78125 1.125 1.390625q0.328125 0.625 0.328125 1.328125q0 1.28125 -0.9375 2.125q-0.9375 0.84375 -2.671875 0.84375q-1.40625 0 -2.5625 -0.46875q-1.15625 -0.453125 -1.859375 -1.140625q-0.703125 -0.671875 -1.03125 -1.375q-0.328125 -0.6875 -0.328125 -1.40625q0 -1.625 1.640625 -2.5l-4.875 -1.015625l0 -1.53125zm7.109375 3.015625q-1.0 0 -1.390625 0.15625q-0.5625 0.203125 -0.90625 0.6875q-0.328125 0.484375 -0.328125 1.125q0 1.3125 1.484375 2.125q1.484375 0.8125 3.03125 0.8125q1.140625 0 1.765625 -0.546875q0.625 -0.546875 0.625 -1.359375q0 -0.578125 -0.3125 -1.078125q-0.3125 -0.5 -0.921875 -0.9375q-0.609375 -0.421875 -1.5 -0.703125q-0.890625 -0.28125 -1.546875 -0.28125zm-2.421875 10.949585l-1.265625 -0.25q-0.140625 -0.5625 -0.140625 -1.078125q0 -0.90625 0.453125 -1.453125q0.34375 -0.40625 0.921875 -0.40625q0.296875 0 1.359375 0.21875l5.21875 1.09375l0 -1.203125l1.1875 0.234375l0 1.21875l2.203125 0.46875l1.0625 1.75l-3.265625 -0.6875l0 1.5l-1.1875 -0.25l0 -1.5l-4.96875 -1.03125q-0.9375 -0.203125 -1.125 -0.203125q-0.265625 0 -0.421875 0.15625q-0.140625 0.15625 -0.140625 0.515625q0 0.5 0.109375 0.90625zm9.421875 3.4080505l1.734375 0.359375l0 1.53125l-1.734375 -0.375l0 -1.515625zm-10.671875 -2.234375l8.984375 1.875l0 1.53125l-8.984375 -1.875l0 -1.53125zm0 3.896637l8.984375 1.875l0 1.546875l-1.484375 -0.328125q0.96875 0.859375 1.328125 1.53125q0.359375 0.671875 0.359375 1.46875q0 0.828125 -0.453125 1.390625q-0.4375 0.5625 -1.234375 0.75q0.84375 0.671875 1.265625 1.421875q0.421875 0.765625 0.421875 1.609375q0 1.125 -0.53125 1.6875q-0.53125 0.5625 -1.5 0.5625q-0.40625 0 -1.375 -0.1875l-5.78125 -1.21875l0 -1.53125l5.9375 1.234375q0.734375 0.171875 1.046875 0.171875q0.4375 0 0.6875 -0.28125q0.25 -0.28125 0.25 -0.796875q0 -0.6875 -0.421875 -1.40625q-0.40625 -0.703125 -1.078125 -1.09375q-0.671875 -0.390625 -2.0625 -0.6875l-4.359375 -0.90625l0 -1.53125l6.0625 1.265625q0.625 0.140625 0.890625 0.140625q0.4375 0 0.703125 -0.28125q0.265625 -0.28125 0.265625 -0.71875q0 -0.65625 -0.421875 -1.375q-0.40625 -0.703125 -1.140625 -1.15625q-0.734375 -0.4375 -2.109375 -0.734375l-4.25 -0.890625l0 -1.53125zm10.671875 16.621979l1.734375 0.359375l0 1.53125l-1.734375 -0.375l0 -1.515625zm-10.671875 -2.234375l8.984375 1.875l0 1.53125l-8.984375 -1.875l0 -1.53125zm0 3.677887l1.1875 0.234375l5.4375 4.84375q0.578125 0.515625 1.1875 1.140625q-0.109375 -1.09375 -0.109375 -1.5625l0 -3.34375l1.28125 0.265625l0 6.953125l-0.96875 -0.203125l-5.46875 -4.875q-0.46875 -0.40625 -1.28125 -1.234375q0.078125 1.28125 0.078125 1.75l0 3.546875l-1.34375 -0.28125l0 -7.234375zm3.046875 15.4921875l-0.140625 1.5q-1.109375 -0.328125 -2.109375 -1.484375q-1.0 -1.15625 -1.0 -2.75q0 -1.0 0.453125 -1.84375q0.46875 -0.828125 1.34375 -1.265625q0.890625 -0.4375 2.015625 -0.4375q1.46875 0 2.84375 0.6875q1.390625 0.6875 2.0625 1.765625q0.671875 1.078125 0.671875 2.34375q0 1.609375 -1.0 2.5625q-1.0 0.96875 -2.71875 0.96875q-0.65625 0 -1.359375 -0.125l0 -6.609375q-0.265625 -0.046875 -0.46875 -0.046875q-1.265625 0 -1.9375 0.578125q-0.65625 0.59375 -0.65625 1.421875q0 0.796875 0.515625 1.546875q0.515625 0.765625 1.484375 1.1875zm2.234375 -4.453125l0 5.046875q0.234375 0.015625 0.34375 0.015625q1.140625 0 1.75 -0.578125q0.625 -0.578125 0.625 -1.484375q0 -0.984375 -0.6875 -1.796875q-0.671875 -0.796875 -2.03125 -1.203125z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m648.0 464.0l120.0 0l0 40.409454l-120.0 0z" fill-rule="evenodd"/><path fill="#ff0000" d="m676.4649 489.64l2.296875 -10.984375l-4.078125 0l0.296875 -1.421875l9.765625 0l-0.296875 1.421875l-4.03125 0l-2.296875 10.984375l-1.65625 0zm5.938904 0l1.875 -8.984375l1.359375 0l-0.390625 1.828125q0.703125 -1.03125 1.359375 -1.53125q0.671875 -0.5 1.359375 -0.5q0.453125 0 1.125 0.328125l-0.625 1.421875q-0.390625 -0.28125 -0.875 -0.28125q-0.796875 0 -1.640625 0.890625q-0.84375 0.890625 -1.328125 3.21875l-0.765625 3.609375l-1.453125 0zm11.832886 -1.109375q-0.796875 0.671875 -1.546875 1.0q-0.734375 0.3125 -1.5625 0.3125q-1.25 0 -2.015625 -0.734375q-0.75 -0.734375 -0.75 -1.875q0 -0.75 0.34375 -1.328125q0.34375 -0.578125 0.90625 -0.921875q0.5625 -0.359375 1.390625 -0.515625q0.515625 -0.09375 1.953125 -0.15625q1.4375 -0.0625 2.0625 -0.3125q0.1875 -0.625 0.1875 -1.03125q0 -0.53125 -0.390625 -0.84375q-0.53125 -0.421875 -1.5625 -0.421875q-0.953125 0 -1.578125 0.4375q-0.609375 0.421875 -0.890625 1.203125l-1.546875 -0.140625q0.46875 -1.328125 1.5 -2.03125q1.03125 -0.71875 2.59375 -0.71875q1.671875 0 2.640625 0.796875q0.75 0.59375 0.75 1.53125q0 0.71875 -0.21875 1.671875l-0.5 2.234375q-0.234375 1.0625 -0.234375 1.734375q0 0.421875 0.1875 1.21875l-1.546875 0q-0.125 -0.4375 -0.171875 -1.109375zm0.5625 -3.4375q-0.328125 0.125 -0.703125 0.1875q-0.359375 0.0625 -1.21875 0.140625q-1.34375 0.125 -1.890625 0.3125q-0.546875 0.171875 -0.828125 0.578125q-0.28125 0.390625 -0.28125 0.875q0 0.640625 0.4375 1.0625q0.453125 0.40625 1.28125 0.40625q0.75 0 1.453125 -0.390625q0.703125 -0.40625 1.109375 -1.125q0.40625 -0.71875 0.640625 -2.046875zm3.012085 4.546875l1.875 -8.984375l1.390625 0l-0.328125 1.5625q0.90625 -0.921875 1.6875 -1.34375q0.78125 -0.421875 1.609375 -0.421875q1.09375 0 1.703125 0.59375q0.625 0.59375 0.625 1.578125q0 0.5 -0.21875 1.578125l-1.140625 5.4375l-1.53125 0l1.203125 -5.6875q0.171875 -0.828125 0.171875 -1.234375q0 -0.453125 -0.3125 -0.71875q-0.3125 -0.28125 -0.890625 -0.28125q-1.171875 0 -2.09375 0.84375q-0.921875 0.84375 -1.34375 2.890625l-0.875 4.1875l-1.53125 0zm9.77771 -3.078125l1.53125 -0.09375q0 0.671875 0.203125 1.140625q0.203125 0.453125 0.75 0.75q0.546875 0.296875 1.265625 0.296875q1.015625 0 1.515625 -0.40625q0.515625 -0.40625 0.515625 -0.953125q0 -0.390625 -0.296875 -0.75q-0.3125 -0.359375 -1.53125 -0.875q-1.21875 -0.53125 -1.546875 -0.734375q-0.5625 -0.34375 -0.859375 -0.8125q-0.28125 -0.46875 -0.28125 -1.078125q0 -1.078125 0.84375 -1.828125q0.84375 -0.765625 2.359375 -0.765625q1.703125 0 2.578125 0.78125q0.890625 0.78125 0.921875 2.0625l-1.5 0.09375q-0.03125 -0.8125 -0.578125 -1.28125q-0.53125 -0.46875 -1.53125 -0.46875q-0.78125 0 -1.234375 0.359375q-0.4375 0.359375 -0.4375 0.78125q0 0.421875 0.390625 0.75q0.25 0.21875 1.3125 0.671875q1.75 0.765625 2.203125 1.203125q0.734375 0.703125 0.734375 1.71875q0 0.65625 -0.421875 1.3125q-0.40625 0.640625 -1.25 1.03125q-0.828125 0.375 -1.96875 0.375q-1.5625 0 -2.65625 -0.765625q-1.09375 -0.78125 -1.03125 -2.515625zm8.3984375 3.078125l2.59375 -12.40625l1.53125 0l-2.59375 12.40625l-1.53125 0zm10.0372925 -1.109375q-0.796875 0.671875 -1.546875 1.0q-0.734375 0.3125 -1.5625 0.3125q-1.25 0 -2.015625 -0.734375q-0.75 -0.734375 -0.75 -1.875q0 -0.75 0.34375 -1.328125q0.34375 -0.578125 0.90625 -0.921875q0.5625 -0.359375 1.390625 -0.515625q0.515625 -0.09375 1.953125 -0.15625q1.4375 -0.0625 2.0625 -0.3125q0.1875 -0.625 0.1875 -1.03125q0 -0.53125 -0.390625 -0.84375q-0.53125 -0.421875 -1.5625 -0.421875q-0.953125 0 -1.578125 0.4375q-0.609375 0.421875 -0.890625 1.203125l-1.546875 -0.140625q0.46875 -1.328125 1.5 -2.03125q1.03125 -0.71875 2.59375 -0.71875q1.671875 0 2.640625 0.796875q0.75 0.59375 0.75 1.53125q0 0.71875 -0.21875 1.671875l-0.5 2.234375q-0.234375 1.0625 -0.234375 1.734375q0 0.421875 0.1875 1.21875l-1.546875 0q-0.125 -0.4375 -0.171875 -1.109375zm0.5625 -3.4375q-0.328125 0.125 -0.703125 0.1875q-0.359375 0.0625 -1.21875 0.140625q-1.34375 0.125 -1.890625 0.3125q-0.546875 0.171875 -0.828125 0.578125q-0.28125 0.390625 -0.28125 0.875q0 0.640625 0.4375 1.0625q0.453125 0.40625 1.28125 0.40625q0.75 0 1.453125 -0.390625q0.703125 -0.40625 1.109375 -1.125q0.40625 -0.71875 0.640625 -2.046875zm6.59021 3.296875l-0.25 1.265625q-0.5625 0.140625 -1.078125 0.140625q-0.90625 0 -1.453125 -0.453125q-0.40625 -0.34375 -0.40625 -0.921875q0 -0.296875 0.21875 -1.359375l1.09375 -5.21875l-1.203125 0l0.234375 -1.1875l1.21875 0l0.46875 -2.203125l1.75 -1.0625l-0.6875 3.265625l1.5 0l-0.25 1.1875l-1.5 0l-1.03125 4.96875q-0.203125 0.9375 -0.203125 1.125q0 0.265625 0.15625 0.421875q0.15625 0.140625 0.515625 0.140625q0.5 0 0.90625 -0.109375zm7.829956 -1.796875l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.265625 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.6875 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m456.0 463.59055l112.0 0l0 40.409454l-112.0 0z" fill-rule="evenodd"/><path fill="#ff0000" d="m482.96948 484.33994l0.296875 -1.40625l5.4375 0l-1.0625 5.046875q-1.015625 0.65625 -2.328125 1.0625q-1.3125 0.40625 -2.578125 0.40625q-2.71875 0 -4.046875 -1.640625q-1.125 -1.375 -1.125 -3.546875q0 -2.234375 0.984375 -4.0625q0.984375 -1.828125 2.484375 -2.703125q1.5 -0.875 3.34375 -0.875q1.3125 0 2.359375 0.484375q1.046875 0.46875 1.609375 1.265625q0.5625 0.796875 0.8125 2.078125l-1.625 0.1875q-0.265625 -1.28125 -1.09375 -1.953125q-0.828125 -0.6875 -2.125 -0.6875q-1.34375 0 -2.515625 0.734375q-1.15625 0.734375 -1.859375 2.21875q-0.6875 1.46875 -0.6875 3.34375q0 1.859375 0.921875 2.828125q0.921875 0.953125 2.546875 0.953125q1.609375 0 3.46875 -1.0625l0.5625 -2.671875l-3.78125 0zm13.556519 1.84375l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.265625 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.6875 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125zm7.49646 5.28125l1.875 -8.984375l1.390625 0l-0.328125 1.5625q0.90625 -0.921875 1.6875 -1.34375q0.78125 -0.421875 1.609375 -0.421875q1.09375 0 1.703125 0.59375q0.625 0.59375 0.625 1.578125q0 0.5 -0.21875 1.578125l-1.140625 5.4375l-1.53125 0l1.203125 -5.6875q0.171875 -0.828125 0.171875 -1.234375q0 -0.453125 -0.3125 -0.71875q-0.3125 -0.28125 -0.890625 -0.28125q-1.171875 0 -2.09375 0.84375q-0.921875 0.84375 -1.34375 2.890625l-0.875 4.1875l-1.53125 0zm16.230835 -3.046875l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.265625 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.6875 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125zm7.49646 5.28125l1.875 -8.984375l1.359375 0l-0.390625 1.828125q0.703125 -1.03125 1.359375 -1.53125q0.671875 -0.5 1.359375 -0.5q0.453125 0 1.125 0.328125l-0.625 1.421875q-0.390625 -0.28125 -0.875 -0.28125q-0.796875 0 -1.640625 0.890625q-0.84375 0.890625 -1.328125 3.21875l-0.765625 3.609375l-1.453125 0zm11.832886 -1.109375q-0.796875 0.671875 -1.546875 1.0q-0.734375 0.3125 -1.5625 0.3125q-1.25 0 -2.015625 -0.734375q-0.75 -0.734375 -0.75 -1.875q0 -0.75 0.34375 -1.328125q0.34375 -0.578125 0.90625 -0.921875q0.5625 -0.359375 1.390625 -0.515625q0.515625 -0.09375 1.953125 -0.15625q1.4375 -0.0625 2.0625 -0.3125q0.1875 -0.625 0.1875 -1.03125q0 -0.53125 -0.390625 -0.84375q-0.53125 -0.421875 -1.5625 -0.421875q-0.953125 0 -1.578125 0.4375q-0.609375 0.421875 -0.890625 1.203125l-1.546875 -0.140625q0.46875 -1.328125 1.5 -2.03125q1.03125 -0.71875 2.59375 -0.71875q1.671875 0 2.640625 0.796875q0.75 0.59375 0.75 1.53125q0 0.71875 -0.21875 1.671875l-0.5 2.234375q-0.234375 1.0625 -0.234375 1.734375q0 0.421875 0.1875 1.21875l-1.546875 0q-0.125 -0.4375 -0.171875 -1.109375zm0.5625 -3.4375q-0.328125 0.125 -0.703125 0.1875q-0.359375 0.0625 -1.21875 0.140625q-1.34375 0.125 -1.890625 0.3125q-0.546875 0.171875 -0.828125 0.578125q-0.28125 0.390625 -0.28125 0.875q0 0.640625 0.4375 1.0625q0.453125 0.40625 1.28125 0.40625q0.75 0 1.453125 -0.390625q0.703125 -0.40625 1.109375 -1.125q0.40625 -0.71875 0.640625 -2.046875zm6.59021 3.296875l-0.25 1.265625q-0.5625 0.140625 -1.078125 0.140625q-0.90625 0 -1.453125 -0.453125q-0.40625 -0.34375 -0.40625 -0.921875q0 -0.296875 0.21875 -1.359375l1.09375 -5.21875l-1.203125 0l0.234375 -1.1875l1.21875 0l0.46875 -2.203125l1.75 -1.0625l-0.6875 3.265625l1.5 0l-0.25 1.1875l-1.5 0l-1.03125 4.96875q-0.203125 0.9375 -0.203125 1.125q0 0.265625 0.15625 0.421875q0.15625 0.140625 0.515625 0.140625q0.5 0 0.90625 -0.109375zm7.829956 -1.796875l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.265625 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.6875 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m304.0 407.08398l248.0 0l0 40.409454l-248.0 0z" fill-rule="evenodd"/><path fill="#000000" d="m389.72702 428.70837l1.625 -0.140625l-0.015625 0.421875q0 0.71875 0.328125 1.328125q0.328125 0.59375 1.09375 0.921875q0.765625 0.3125 1.8125 0.3125q1.484375 0 2.265625 -0.640625q0.78125 -0.65625 0.78125 -1.5q0 -0.578125 -0.40625 -1.0625q-0.421875 -0.46875 -2.3125 -1.28125q-1.46875 -0.640625 -2.0 -0.984375q-0.84375 -0.546875 -1.234375 -1.1875q-0.390625 -0.65625 -0.390625 -1.484375q0 -0.953125 0.515625 -1.71875q0.53125 -0.765625 1.53125 -1.171875q1.015625 -0.40625 2.28125 -0.40625q1.515625 0 2.546875 0.515625q1.046875 0.5 1.515625 1.34375q0.484375 0.84375 0.484375 1.609375q0 0.078125 -0.015625 0.25l-1.59375 0.140625q0 -0.53125 -0.09375 -0.828125q-0.171875 -0.515625 -0.53125 -0.875q-0.359375 -0.359375 -0.984375 -0.5625q-0.609375 -0.21875 -1.375 -0.21875q-1.359375 0 -2.109375 0.609375q-0.578125 0.46875 -0.578125 1.234375q0 0.453125 0.234375 0.8125q0.234375 0.359375 0.859375 0.703125q0.4375 0.25 2.078125 0.984375q1.34375 0.59375 1.859375 0.921875q0.671875 0.453125 1.03125 1.09375q0.359375 0.640625 0.359375 1.453125q0 1.0 -0.609375 1.859375q-0.609375 0.84375 -1.703125 1.3125q-1.078125 0.46875 -2.46875 0.46875q-2.109375 0 -3.4375 -0.921875q-1.328125 -0.921875 -1.34375 -3.3125zm10.167084 7.453125l2.59375 -12.421875l1.40625 0l-0.25 1.25q0.78125 -0.8125 1.390625 -1.125q0.625 -0.328125 1.328125 -0.328125q1.28125 0 2.125 0.9375q0.84375 0.9375 0.84375 2.671875q0 1.40625 -0.46875 2.5625q-0.453125 1.15625 -1.140625 1.859375q-0.671875 0.703125 -1.375 1.03125q-0.6875 0.328125 -1.40625 0.328125q-1.625 0 -2.5 -1.640625l-1.015625 4.875l-1.53125 0zm3.015625 -7.109375q0 1.0 0.15625 1.390625q0.203125 0.5625 0.6875 0.90625q0.484375 0.328125 1.125 0.328125q1.3125 0 2.125 -1.484375q0.8125 -1.484375 0.8125 -3.03125q0 -1.140625 -0.546875 -1.765625q-0.546875 -0.625 -1.359375 -0.625q-0.578125 0 -1.078125 0.3125q-0.5 0.3125 -0.9375 0.921875q-0.421875 0.609375 -0.703125 1.5q-0.28125 0.890625 -0.28125 1.546875zm13.96521 0.625l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.265625 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.6875 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125zm13.699585 2.0l1.53125 0.171875q-0.578125 1.671875 -1.640625 2.5q-1.0625 0.8125 -2.421875 0.8125q-1.453125 0 -2.375 -0.9375q-0.90625 -0.953125 -0.90625 -2.65625q0 -1.46875 0.578125 -2.890625q0.59375 -1.421875 1.671875 -2.15625q1.09375 -0.75 2.5 -0.75q1.4375 0 2.28125 0.8125q0.84375 0.8125 0.84375 2.171875l-1.5 0.09375q-0.015625 -0.859375 -0.5 -1.328125q-0.484375 -0.484375 -1.28125 -0.484375q-0.921875 0 -1.59375 0.578125q-0.671875 0.578125 -1.0625 1.78125q-0.390625 1.1875 -0.390625 2.28125q0 1.15625 0.5 1.734375q0.515625 0.5625 1.265625 0.5625q0.75 0 1.421875 -0.5625q0.6875 -0.5625 1.078125 -1.734375zm4.6328125 -7.390625l0.359375 -1.734375l1.53125 0l-0.375 1.734375l-1.515625 0zm-2.234375 10.671875l1.875 -8.984375l1.53125 0l-1.875 8.984375l-1.53125 0zm9.974762 -1.109375q-0.796875 0.671875 -1.546875 1.0q-0.734375 0.3125 -1.5625 0.3125q-1.25 0 -2.015625 -0.734375q-0.75 -0.734375 -0.75 -1.875q0 -0.75 0.34375 -1.328125q0.34375 -0.578125 0.90625 -0.921875q0.5625 -0.359375 1.390625 -0.515625q0.515625 -0.09375 1.953125 -0.15625q1.4375 -0.0625 2.0625 -0.3125q0.1875 -0.625 0.1875 -1.03125q0 -0.53125 -0.390625 -0.84375q-0.53125 -0.421875 -1.5625 -0.421875q-0.953125 0 -1.578125 0.4375q-0.609375 0.421875 -0.890625 1.203125l-1.546875 -0.140625q0.46875 -1.328125 1.5 -2.03125q1.03125 -0.71875 2.59375 -0.71875q1.671875 0 2.640625 0.796875q0.75 0.59375 0.75 1.53125q0 0.71875 -0.21875 1.671875l-0.5 2.234375q-0.234375 1.0625 -0.234375 1.734375q0 0.421875 0.1875 1.21875l-1.546875 0q-0.125 -0.4375 -0.171875 -1.109375zm0.5625 -3.4375q-0.328125 0.125 -0.703125 0.1875q-0.359375 0.0625 -1.21875 0.140625q-1.34375 0.125 -1.890625 0.3125q-0.546875 0.171875 -0.828125 0.578125q-0.28125 0.390625 -0.28125 0.875q0 0.640625 0.4375 1.0625q0.453125 0.40625 1.28125 0.40625q0.75 0 1.453125 -0.390625q0.703125 -0.40625 1.109375 -1.125q0.40625 -0.71875 0.640625 -2.046875zm2.887085 4.546875l2.59375 -12.40625l1.53125 0l-2.59375 12.40625l-1.53125 0zm6.146637 -10.671875l0.359375 -1.734375l1.53125 0l-0.375 1.734375l-1.515625 0zm-2.234375 10.671875l1.875 -8.984375l1.53125 0l-1.875 8.984375l-1.53125 0zm3.677887 0l0.234375 -1.1875l4.84375 -5.4375q0.515625 -0.578125 1.140625 -1.1875q-1.09375 0.109375 -1.5625 0.109375l-3.34375 0l0.265625 -1.28125l6.953125 0l-0.203125 0.96875l-4.875 5.46875q-0.40625 0.46875 -1.234375 1.28125q1.28125 -0.078125 1.75 -0.078125l3.546875 0l-0.28125 1.34375l-7.234375 0zm15.4921875 -3.046875l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.265625 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.6875 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m196.74016 303.63647l151.99998 0l0 40.409454l-151.99998 0z" fill-rule="evenodd"/><path fill="#000000" d="m234.46716 325.26086l1.625 -0.140625l-0.015625 0.421875q0 0.71875 0.328125 1.328125q0.328125 0.59375 1.09375 0.921875q0.765625 0.3125 1.8125 0.3125q1.484375 0 2.265625 -0.640625q0.78125 -0.65625 0.78125 -1.5q0 -0.578125 -0.40625 -1.0625q-0.421875 -0.46875 -2.3125 -1.28125q-1.46875 -0.640625 -2.0 -0.984375q-0.84375 -0.546875 -1.234375 -1.1875q-0.390625 -0.65625 -0.390625 -1.484375q0 -0.953125 0.515625 -1.71875q0.53125 -0.765625 1.53125 -1.171875q1.015625 -0.40625 2.28125 -0.40625q1.515625 0 2.546875 0.515625q1.046875 0.5 1.515625 1.34375q0.484375 0.84375 0.484375 1.609375q0 0.078125 -0.015625 0.25l-1.59375 0.140625q0 -0.53125 -0.09375 -0.828125q-0.171875 -0.515625 -0.53125 -0.875q-0.359375 -0.359375 -0.984375 -0.5625q-0.609375 -0.21875 -1.375 -0.21875q-1.359375 0 -2.109375 0.609375q-0.578125 0.46875 -0.578125 1.234375q0 0.453125 0.234375 0.8125q0.234375 0.359375 0.859375 0.703125q0.4375 0.25 2.078125 0.984375q1.34375 0.59375 1.859375 0.921875q0.671875 0.453125 1.03125 1.09375q0.359375 0.640625 0.359375 1.453125q0 1.0 -0.609375 1.859375q-0.609375 0.84375 -1.703125 1.3125q-1.078125 0.46875 -2.46875 0.46875q-2.109375 0 -3.4375 -0.921875q-1.328125 -0.921875 -1.34375 -3.3125zm10.167099 7.453125l2.59375 -12.421875l1.40625 0l-0.25 1.25q0.78125 -0.8125 1.390625 -1.125q0.625 -0.328125 1.328125 -0.328125q1.28125 0 2.125 0.9375q0.84375 0.9375 0.84375 2.671875q0 1.40625 -0.46875 2.5625q-0.453125 1.15625 -1.140625 1.859375q-0.671875 0.703125 -1.375 1.03125q-0.6875 0.328125 -1.40625 0.328125q-1.625 0 -2.5 -1.640625l-1.015625 4.875l-1.53125 0zm3.015625 -7.109375q0 1.0 0.15625 1.390625q0.203125 0.5625 0.6875 0.90625q0.484375 0.328125 1.125 0.328125q1.3125 0 2.125 -1.484375q0.8125 -1.484375 0.8125 -3.03125q0 -1.140625 -0.546875 -1.765625q-0.546875 -0.625 -1.359375 -0.625q-0.578125 0 -1.078125 0.3125q-0.5 0.3125 -0.9375 0.921875q-0.421875 0.609375 -0.703125 1.5q-0.28125 0.890625 -0.28125 1.546875zm13.965225 0.625l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.2656403 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.68751526 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125zm13.699585 2.0l1.53125 0.171875q-0.578125 1.671875 -1.640625 2.5q-1.0625 0.8125 -2.421875 0.8125q-1.453125 0 -2.375 -0.9375q-0.90625 -0.953125 -0.90625 -2.65625q0 -1.46875 0.578125 -2.890625q0.59375 -1.421875 1.671875 -2.15625q1.09375 -0.75 2.5 -0.75q1.4375 0 2.28125 0.8125q0.84375 0.8125 0.84375 2.171875l-1.5 0.09375q-0.015625 -0.859375 -0.5 -1.328125q-0.484375 -0.484375 -1.28125 -0.484375q-0.921875 0 -1.59375 0.578125q-0.671875 0.578125 -1.0625 1.78125q-0.390625 1.1875 -0.390625 2.28125q0 1.15625 0.5 1.734375q0.515625 0.5625 1.265625 0.5625q0.75 0 1.421875 -0.5625q0.6875 -0.5625 1.078125 -1.734375zm4.6328125 -7.390625l0.359375 -1.734375l1.53125 0l-0.375 1.734375l-1.515625 0zm-2.234375 10.671875l1.875 -8.984375l1.53125 0l-1.875 8.984375l-1.53125 0zm9.974762 -1.109375q-0.796875 0.671875 -1.546875 1.0q-0.734375 0.3125 -1.5625 0.3125q-1.25 0 -2.015625 -0.734375q-0.75 -0.734375 -0.75 -1.875q0 -0.75 0.34375 -1.328125q0.34375 -0.578125 0.90625 -0.921875q0.5625 -0.359375 1.390625 -0.515625q0.515625 -0.09375 1.953125 -0.15625q1.4375 -0.0625 2.0625 -0.3125q0.1875 -0.625 0.1875 -1.03125q0 -0.53125 -0.390625 -0.84375q-0.53125 -0.421875 -1.5625 -0.421875q-0.953125 0 -1.578125 0.4375q-0.609375 0.421875 -0.890625 1.203125l-1.546875 -0.140625q0.46875 -1.328125 1.5 -2.03125q1.03125 -0.71875 2.59375 -0.71875q1.671875 0 2.640625 0.796875q0.75 0.59375 0.75 1.53125q0 0.71875 -0.21875 1.671875l-0.5 2.234375q-0.234375 1.0625 -0.234375 1.734375q0 0.421875 0.1875 1.21875l-1.546875 0q-0.125 -0.4375 -0.171875 -1.109375zm0.5625 -3.4375q-0.328125 0.125 -0.703125 0.1875q-0.359375 0.0625 -1.21875 0.140625q-1.34375 0.125 -1.890625 0.3125q-0.546875 0.171875 -0.828125 0.578125q-0.28125 0.390625 -0.28125 0.875q0 0.640625 0.4375 1.0625q0.453125 0.40625 1.28125 0.40625q0.75 0 1.453125 -0.390625q0.703125 -0.40625 1.109375 -1.125q0.40625 -0.71875 0.640625 -2.046875zm2.887085 4.546875l2.59375 -12.40625l1.53125 0l-2.59375 12.40625l-1.53125 0zm6.146637 -10.671875l0.359375 -1.734375l1.53125 0l-0.375 1.734375l-1.515625 0zm-2.234375 10.671875l1.875 -8.984375l1.53125 0l-1.875 8.984375l-1.53125 0zm3.677887 0l0.234375 -1.1875l4.84375 -5.4375q0.515625 -0.578125 1.140625 -1.1875q-1.09375 0.109375 -1.5625 0.109375l-3.34375 0l0.265625 -1.28125l6.953125 0l-0.203125 0.96875l-4.875 5.46875q-0.40625 0.46875 -1.234375 1.28125q1.28125 -0.078125 1.75 -0.078125l3.546875 0l-0.28125 1.34375l-7.234375 0zm15.4921875 -3.046875l1.5 0.140625q-0.328125 1.109375 -1.484375 2.109375q-1.15625 1.0 -2.75 1.0q-1.0 0 -1.84375 -0.453125q-0.828125 -0.46875 -1.265625 -1.34375q-0.4375 -0.890625 -0.4375 -2.015625q0 -1.46875 0.6875 -2.84375q0.6875 -1.390625 1.765625 -2.0625q1.078125 -0.671875 2.34375 -0.671875q1.609375 0 2.5625 1.0q0.96875 1.0 0.96875 2.71875q0 0.65625 -0.125 1.359375l-6.609375 0q-0.046875 0.265625 -0.046875 0.46875q0 1.265625 0.578125 1.9375q0.59375 0.65625 1.421875 0.65625q0.796875 0 1.546875 -0.515625q0.765625 -0.515625 1.1875 -1.484375zm-4.453125 -2.234375l5.046875 0q0.015625 -0.234375 0.015625 -0.34375q0 -1.140625 -0.578125 -1.75q-0.578125 -0.625 -1.484375 -0.625q-0.984375 0 -1.796875 0.6875q-0.796875 0.671875 -1.203125 2.03125z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m688.0 221.33858l173.98425 0l0 51.716537l-173.98425 0z" fill-rule="evenodd"/><path fill="#666666" d="m698.8281 255.93858l0 -19.078125l3.84375 0l0 19.078125l-3.84375 0zm20.062195 0l-3.65625 0l0 -7.046875q0 -2.25 -0.234375 -2.90625q-0.234375 -0.65625 -0.765625 -1.015625q-0.515625 -0.375 -1.265625 -0.375q-0.953125 0 -1.703125 0.53125q-0.75 0.515625 -1.03125 1.375q-0.28125 0.859375 -0.28125 3.171875l0 6.265625l-3.65625 0l0 -13.828125l3.390625 0l0 2.03125q1.8125 -2.34375 4.5625 -2.34375q1.203125 0 2.203125 0.4375q1.015625 0.4375 1.515625 1.125q0.515625 0.671875 0.71875 1.53125q0.203125 0.859375 0.203125 2.453125l0 8.59375zm3.7045898 -15.703125l0 -3.375l3.671875 0l0 3.375l-3.671875 0zm0 15.703125l0 -13.828125l3.671875 0l0 13.828125l-3.671875 0zm13.749695 -13.828125l0 2.921875l-2.5 0l0 5.5625q0 1.703125 0.0625 1.984375q0.078125 0.28125 0.328125 0.46875q0.265625 0.171875 0.625 0.171875q0.515625 0 1.46875 -0.359375l0.3125 2.84375q-1.265625 0.54685974 -2.890625 0.54685974q-0.984375 0 -1.78125 -0.32810974q-0.796875 -0.328125 -1.171875 -0.859375q-0.359375 -0.53125 -0.5 -1.421875q-0.125 -0.640625 -0.125 -2.578125l0 -6.03125l-1.671875 0l0 -2.921875l1.671875 0l0 -2.734375l3.671875 -2.140625l0 4.875l2.5 0zm2.53302 -1.875l0 -3.375l3.671875 0l0 3.375l-3.671875 0zm0 15.703125l0 -13.828125l3.671875 0l0 13.828125l-3.671875 0zm10.14032 -9.609375l-3.3125 -0.59375q0.5625 -2.0 1.921875 -2.96875q1.375 -0.96875 4.0625 -0.96875q2.453125 0 3.640625 0.578125q1.203125 0.578125 1.6875 1.484375q0.5 0.890625 0.5 3.265625l-0.046875 4.265625q0 1.828125 0.171875 2.703125q0.171875 0.859375 0.65625 1.84375l-3.609375 0q-0.140625 -0.359375 -0.359375 -1.078125q-0.09375 -0.328125 -0.125 -0.4375q-0.9375 0.921875 -2.015625 1.375q-1.0625 0.45310974 -2.265625 0.45310974q-2.140625 0 -3.375 -1.1562347q-1.21875 -1.15625 -1.21875 -2.9375q0 -1.171875 0.546875 -2.078125q0.5625 -0.921875 1.578125 -1.40625q1.015625 -0.484375 2.90625 -0.859375q2.5625 -0.484375 3.5625 -0.890625l0 -0.375q0 -1.046875 -0.53125 -1.5q-0.515625 -0.453125 -1.96875 -0.453125q-0.96875 0 -1.515625 0.390625q-0.546875 0.390625 -0.890625 1.34375zm4.90625 2.96875q-0.703125 0.234375 -2.234375 0.5625q-1.515625 0.328125 -1.984375 0.640625q-0.71875 0.5 -0.71875 1.28125q0 0.765625 0.5625 1.328125q0.578125 0.5625 1.46875 0.5625q0.984375 0 1.890625 -0.65625q0.65625 -0.484375 0.859375 -1.203125q0.15625 -0.46875 0.15625 -1.78125l0 -0.734375zm7.184326 6.640625l0 -19.078125l3.671875 0l0 19.078125l-3.671875 0zm7.405945 -15.703125l0 -3.375l3.671875 0l0 3.375l-3.671875 0zm0 15.703125l0 -13.828125l3.671875 0l0 13.828125l-3.671875 0zm5.937195 0l0 -2.84375l5.1875 -5.953125q1.265625 -1.453125 1.890625 -2.078125q-0.640625 0.046875 -1.6875 0.0625l-4.875 0.015625l0 -3.03125l11.421875 0l0 2.59375l-5.28125 6.09375l-1.859375 2.015625q1.515625 -0.09375 1.890625 -0.09375l5.65625 0l0 3.21875l-12.34375 0zm17.53125 -9.609375l-3.3125 -0.59375q0.5625 -2.0 1.921875 -2.96875q1.375 -0.96875 4.0625 -0.96875q2.453125 0 3.640625 0.578125q1.203125 0.578125 1.6875 1.484375q0.5 0.890625 0.5 3.265625l-0.046875 4.265625q0 1.828125 0.171875 2.703125q0.171875 0.859375 0.65625 1.84375l-3.609375 0q-0.140625 -0.359375 -0.359375 -1.078125q-0.09375 -0.328125 -0.125 -0.4375q-0.9375 0.921875 -2.015625 1.375q-1.0625 0.45310974 -2.265625 0.45310974q-2.140625 0 -3.375 -1.1562347q-1.21875 -1.15625 -1.21875 -2.9375q0 -1.171875 0.546875 -2.078125q0.5625 -0.921875 1.578125 -1.40625q1.015625 -0.484375 2.90625 -0.859375q2.5625 -0.484375 3.5625 -0.890625l0 -0.375q0 -1.046875 -0.53125 -1.5q-0.515625 -0.453125 -1.96875 -0.453125q-0.96875 0 -1.515625 0.390625q-0.546875 0.390625 -0.890625 1.34375zm4.90625 2.96875q-0.703125 0.234375 -2.234375 0.5625q-1.515625 0.328125 -1.984375 0.640625q-0.71875 0.5 -0.71875 1.28125q0 0.765625 0.5625 1.328125q0.578125 0.5625 1.46875 0.5625q0.984375 0 1.890625 -0.65625q0.65625 -0.484375 0.859375 -1.203125q0.15625 -0.46875 0.15625 -1.78125l0 -0.734375zm13.528076 -7.1875l0 2.921875l-2.5 0l0 5.5625q0 1.703125 0.0625 1.984375q0.078125 0.28125 0.328125 0.46875q0.265625 0.171875 0.625 0.171875q0.515625 0 1.46875 -0.359375l0.3125 2.84375q-1.265625 0.54685974 -2.890625 0.54685974q-0.984375 0 -1.78125 -0.32810974q-0.796875 -0.328125 -1.171875 -0.859375q-0.359375 -0.53125 -0.5 -1.421875q-0.125 -0.640625 -0.125 -2.578125l0 -6.03125l-1.671875 0l0 -2.921875l1.671875 0l0 -2.734375l3.671875 -2.140625l0 4.875l2.5 0zm2.532959 -1.875l0 -3.375l3.671875 0l0 3.375l-3.671875 0zm0 15.703125l0 -13.828125l3.671875 0l0 13.828125l-3.671875 0zm6.562256 -7.109375q0 -1.8125 0.890625 -3.515625q0.90625 -1.71875 2.546875 -2.609375q1.65625 -0.90625 3.6875 -0.90625q3.140625 0 5.140625 2.046875q2.0 2.03125 2.0 5.140625q0 3.140625 -2.03125 5.203125q-2.015625 2.0624847 -5.078125 2.0624847q-1.90625 0 -3.640625 -0.85935974q-1.71875 -0.859375 -2.625 -2.515625q-0.890625 -1.65625 -0.890625 -4.046875zm3.75 0.203125q0 2.046875 0.96875 3.140625q0.984375 1.09375 2.421875 1.09375q1.421875 0 2.390625 -1.09375q0.984375 -1.09375 0.984375 -3.171875q0 -2.03125 -0.984375 -3.125q-0.96875 -1.09375 -2.390625 -1.09375q-1.4375 0 -2.421875 1.09375q-0.96875 1.09375 -0.96875 3.15625zm25.95459 6.90625l-3.65625 0l0 -7.046875q0 -2.25 -0.234375 -2.90625q-0.234375 -0.65625 -0.765625 -1.015625q-0.515625 -0.375 -1.265625 -0.375q-0.953125 0 -1.703125 0.53125q-0.75 0.515625 -1.03125 1.375q-0.28125 0.859375 -0.28125 3.171875l0 6.265625l-3.65625 0l0 -13.828125l3.390625 0l0 2.03125q1.8125 -2.34375 4.5625 -2.34375q1.203125 0 2.203125 0.4375q1.015625 0.4375 1.515625 1.125q0.515625 0.671875 0.71875 1.53125q0.203125 0.859375 0.203125 2.453125l0 8.59375z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m36.787403 183.98294l31.999996 0.06298828" fill-rule="evenodd"/><path stroke="#0000ff" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m36.787403 183.98294l20.000023 0.039367676" fill-rule="evenodd"/><path fill="#0000ff" stroke="#0000ff" stroke-width="2.0" stroke-linecap="butt" d="m56.780922 187.32578l9.08268 -3.2855988l-9.069672 -3.3213196z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m552.0 440.0l0 -96.0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m552.0 440.0l0 -96.0" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m276.17322 438.99738l275.84253 1.0078735" fill-rule="evenodd"/><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m276.17322 438.99738l275.84253 1.0078735" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m276.6614 440.0l0.03149414 -96.0" fill-rule="evenodd"/><path stroke="#000000" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m276.6614 440.0l0.027557373 -84.0" fill-rule="evenodd"/><path fill="#000000" stroke="#000000" stroke-width="2.0" stroke-linecap="butt" d="m279.99243 356.00107l-3.300476 -9.0772705l-3.3064575 9.075104z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m68.7874 503.9987l-31.999996 0" fill-rule="evenodd"/><path stroke="#0000ff" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m68.7874 503.9987l-31.999996 0" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m36.787403 503.9987l0 -320.0" fill-rule="evenodd"/><path stroke="#0000ff" stroke-width="2.0" stroke-linejoin="round" stroke-linecap="butt" d="m36.787403 503.9987l0 -320.0" fill-rule="evenodd"/></g></svg>
\ No newline at end of file
diff --git a/doc/src/devdocs/img/precompilation_hang.png b/doc/src/devdocs/img/precompilation_hang.png
new file mode 100644
index 0000000000000..d076b7697f271
Binary files /dev/null and b/doc/src/devdocs/img/precompilation_hang.png differ
diff --git a/doc/src/devdocs/inference.md b/doc/src/devdocs/inference.md
index cce272f336a86..c885441e4dd84 100644
--- a/doc/src/devdocs/inference.md
+++ b/doc/src/devdocs/inference.md
@@ -2,12 +2,12 @@
 
 ## How inference works
 
-[Type inference](https://en.wikipedia.org/wiki/Type_inference) refers
-to the process of deducing the types of later values from the types of
-input values. Julia's approach to inference has been described in blog
-posts
-([1](https://juliacomputing.com/blog/2016/04/inference-convergence/),
-[2](https://juliacomputing.com/blog/2017/05/inference-converage2/)).
+In Julia compiler, "type inference" refers to the process of deducing the types of later
+values from the types of input values. Julia's approach to inference has been described in
+the blog posts below:
+1. [Shows a simplified implementation of the data-flow analysis algorithm, that Julia's type inference routine is based on.](https://aviatesk.github.io/posts/data-flow-problem/)
+2. [Gives a high level view of inference with a focus on its inter-procedural convergence guarantee.](https://info.juliahub.com/inference-convergence-algorithm-in-julia)
+3. [Explains a refinement on the algorithm introduced in 2.](https://info.juliahub.com/inference-convergence-algorithm-in-julia-revisited)
 
 ## Debugging compiler.jl
 
@@ -36,9 +36,9 @@ m = first(mths)
 # Create variables needed to call `typeinf_code`
 interp = Core.Compiler.NativeInterpreter()
 sparams = Core.svec()      # this particular method doesn't have type-parameters
-optimize = true            # run all inference optimizations
+run_optimizer = true       # run all inference optimizations
 types = Tuple{typeof(convert), atypes.parameters...} # Tuple{typeof(convert), Type{Int}, UInt}
-Core.Compiler.typeinf_code(interp, m, types, sparams, optimize)
+Core.Compiler.typeinf_code(interp, m, types, sparams, run_optimizer)
 ```
 
 If your debugging adventures require a `MethodInstance`, you can look it up by
@@ -96,19 +96,20 @@ Each statement gets analyzed for its total cost in a function called
 as follows:
 ```jldoctest; filter=r"tuple.jl:\d+"
 julia> Base.print_statement_costs(stdout, map, (typeof(sqrt), Tuple{Int},)) # map(sqrt, (2,))
-map(f, t::Tuple{Any})
-     @ Base tuple.jl:273
-  0 1 ─ %1  = Base.getfield(_3, 1, true)::Int64
-  1 │   %2  = Base.sitofp(Float64, %1)::Float64
-  2 │   %3  = Base.lt_float(%2, 0.0)::Bool
-  0 └──       goto #3 if not %3
-  0 2 ─       invoke Base.Math.throw_complex_domainerror(:sqrt::Symbol, %2::Float64)::Union{}
+map(f, t::Tuple{Any}) @ Base tuple.jl:281
+  0 1 ─ %1  = $(Expr(:boundscheck, true))::Bool
+  0 │   %2  = Base.getfield(_3, 1, %1)::Int64
+  1 │   %3  = Base.sitofp(Float64, %2)::Float64
+  0 │   %4  = Base.lt_float(%3, 0.0)::Bool
+  0 └──       goto #3 if not %4
+  0 2 ─       invoke Base.Math.throw_complex_domainerror(:sqrt::Symbol, %3::Float64)::Union{}
   0 └──       unreachable
- 20 3 ─ %7  = Base.Math.sqrt_llvm(%2)::Float64
+ 20 3 ─ %8  = Base.Math.sqrt_llvm(%3)::Float64
   0 └──       goto #4
   0 4 ─       goto #5
-  0 5 ─ %10 = Core.tuple(%7)::Tuple{Float64}
-  0 └──       return %10
+  0 5 ─ %11 = Core.tuple(%8)::Tuple{Float64}
+  0 └──       return %11
+
 ```
 
 The line costs are in the left column. This includes the consequences of inlining and other forms of optimization.
diff --git a/doc/src/devdocs/init.md b/doc/src/devdocs/init.md
index 348e69f673f80..1e0e1173f8695 100644
--- a/doc/src/devdocs/init.md
+++ b/doc/src/devdocs/init.md
@@ -6,9 +6,9 @@ How does the Julia runtime execute `julia -e 'println("Hello World!")'` ?
 
 Execution starts at [`main()` in `cli/loader_exe.c`](https://github.com/JuliaLang/julia/blob/master/cli/loader_exe.c),
 which calls `jl_load_repl()` in [`cli/loader_lib.c`](https://github.com/JuliaLang/julia/blob/master/cli/loader_lib.c)
-which loads a few libraries, eventually calling [`repl_entrypoint()` in `src/jlapi.c`](https://github.com/JuliaLang/julia/blob/master/src/jlapi.c).
+which loads a few libraries, eventually calling [`jl_repl_entrypoint()` in `src/jlapi.c`](https://github.com/JuliaLang/julia/blob/master/src/jlapi.c).
 
-`repl_entrypoint()` calls [`libsupport_init()`](https://github.com/JuliaLang/julia/blob/master/src/support/libsupportinit.c)
+`jl_repl_entrypoint()` calls [`libsupport_init()`](https://github.com/JuliaLang/julia/blob/master/src/support/libsupportinit.c)
 to set the C library locale and to initialize the "ios" library (see [`ios_init_stdstreams()`](https://github.com/JuliaLang/julia/blob/master/src/support/ios.c)
 and [Legacy `ios.c` library](@ref Legacy-ios.c-library)).
 
@@ -20,7 +20,7 @@ or early initialization. Other options are handled later by [`exec_options()` in
 
 ## `julia_init()`
 
-[`julia_init()` in `task.c`](https://github.com/JuliaLang/julia/blob/master/src/task.c) is called
+[`julia_init()` in `init.c`](https://github.com/JuliaLang/julia/blob/master/src/init.c) is called
 by `main()` and calls [`_julia_init()` in `init.c`](https://github.com/JuliaLang/julia/blob/master/src/init.c).
 
 `_julia_init()` begins by calling `libsupport_init()` again (it does nothing the second time).
@@ -118,7 +118,7 @@ Other signals (`SIGINFO, SIGBUS, SIGILL, SIGTERM, SIGABRT, SIGQUIT, SIGSYS` and
 hooked up to [`sigdie_handler()`](https://github.com/JuliaLang/julia/blob/master/src/signals-unix.c)
 which prints a backtrace.
 
-[`jl_init_restored_modules()`](https://github.com/JuliaLang/julia/blob/master/src/staticdata.c) calls
+[`jl_init_restored_module()`](https://github.com/JuliaLang/julia/blob/master/src/staticdata.c) calls
 [`jl_module_run_initializer()`](https://github.com/JuliaLang/julia/blob/master/src/module.c) for
 each deserialized module to run the `__init__()` function.
 
diff --git a/doc/src/devdocs/isbitsunionarrays.md b/doc/src/devdocs/isbitsunionarrays.md
index 2a25c033ec9fd..f01afe50985ec 100644
--- a/doc/src/devdocs/isbitsunionarrays.md
+++ b/doc/src/devdocs/isbitsunionarrays.md
@@ -18,6 +18,12 @@ Lastly, a value of `0x00` signals that the `nothing` value will be returned for
 type with a single type instance, it technically has a size of 0. The type tag byte for a type's Union field is stored
 directly after the field's computed Union memory.
 
-## isbits Union Arrays
+## isbits Union Memory
 
-Julia can now also store "isbits Union" values inline in an Array, as opposed to requiring an indirection box. The optimization is accomplished by storing an extra "type tag array" of bytes, one byte per array element, alongside the bytes of the actual array data. This type tag array serves the same function as the type field case: its value signals the type of the actual stored Union value in the array. In terms of layout, a Julia Array can include extra "buffer" space before and after its actual data values, which are tracked in the `a->offset` and `a->maxsize` fields of the `jl_array_t*` type. The "type tag array" is treated exactly as another `jl_array_t*`, but which shares the same `a->offset`, `a->maxsize`, and `a->len` fields. So the formula to access an isbits Union Array's type tag bytes is `a->data + (a->maxsize - a->offset) * a->elsize + a->offset`; i.e. the Array's `a->data` pointer is already shifted by `a->offset`, so correcting for that, we follow the data all the way to the max of what it can hold `a->maxsize`, then adjust by `a->offset` more bytes to account for any present "front buffering" the array might be doing. This layout in particular allows for very efficient resizing operations as the type tag data only ever has to move when the actual array's data has to move.
+Julia can now also store "isbits Union" values inline in a Memory, as opposed to requiring
+an indirection box. The optimization is accomplished by storing an extra "type tag memory"
+of bytes, one byte per element, alongside the bytes of the actual data. This type tag memory
+serves the same function as the type field case: its value signals the type of the actual
+stored Union value. The "type tag memory" directly follows the regular data space. So the
+formula to access an isbits Union Array's type tag bytes is `a->data + a->length *
+a->elsize`.
diff --git a/doc/src/devdocs/jit.md b/doc/src/devdocs/jit.md
new file mode 100644
index 0000000000000..f33b968ad3948
--- /dev/null
+++ b/doc/src/devdocs/jit.md
@@ -0,0 +1,78 @@
+# JIT Design and Implementation
+
+This document explains the design and implementation of Julia's JIT, after codegen has finished and unoptimized LLVM IR has been produced. The JIT is responsible for optimizing and compiling this IR to machine code, and for linking it into the current process and making the code available for execution.
+
+## Introduction
+
+The JIT is responsible for managing compilation resources, looking up previously compiled code, and compiling new code. It is primarily built on LLVM's [On-Request-Compilation](https://llvm.org/docs/ORCv2.html) (ORCv2) technology, which provides support for a number of useful features such as concurrent compilation, lazy compilation, and the ability to compile code in a separate process. Though LLVM provides a basic JIT compiler in the form of LLJIT, Julia uses many ORCv2 APIs directly to create its own custom JIT compiler.
+
+## Overview
+
+![Diagram of the compiler flow](./img/compiler_diagram.png)
+
+Codegen produces an LLVM module containing IR for one or more Julia functions from the original Julia SSA IR produced by type inference (labeled as translate on the compiler diagram above). It also produces a mapping of code-instance to LLVM function name. However, though some optimizations have been applied by the Julia-based compiler on Julia IR, the LLVM IR produced by codegen still contains many opportunities for optimization. Thus, the first step the JIT takes is to run a target-independent optimization pipeline[^tdp] on the LLVM module. Then, the JIT runs a target-dependent optimization pipeline, which includes target-specific optimizations and code generation, and outputs an object file. Finally, the JIT links the resulting object file into the current process and makes the code available for execution. All of this is controlled by code in `src/jitlayers.cpp`.
+
+[^tdp]: This is not a totally-target independent pipeline, as transformations such as vectorization rely upon target information such as vector register width and cost modeling. Additionally, codegen itself makes a few target-dependent assumptions, and the optimization pipeline will take advantage of that knowledge.
+
+Currently, only one thread at a time is permitted to enter the optimize-compile-link pipeline at a time, due to restrictions imposed by one of our linkers (RuntimeDyld). However, the JIT is designed to support concurrent optimization and compilation, and the linker restriction is expected to be lifted in the future when RuntimeDyld has been fully superseded on all platforms.
+
+## Optimization Pipeline
+
+The optimization pipeline is based off LLVM's new pass manager, but the pipeline is customized for Julia's needs. The pipeline is defined in `src/pipeline.cpp`, and broadly proceeds through a number of stages as detailed below.
+
+1. Early Simplification
+   1. These passes are mainly used to simplify the IR and canonicalize patterns so that later passes can identify those patterns more easily. Additionally, various intrinsic calls such as branch prediction hints and annotations are lowered into other metadata or other IR features. [`SimplifyCFG`](https://llvm.org/docs/Passes.html#simplifycfg-simplify-the-cfg) (simplify control flow graph), [`DCE`](https://llvm.org/docs/Passes.html#dce-dead-code-elimination) (dead code elimination), and [`SROA`](https://llvm.org/docs/Passes.html#sroa-scalar-replacement-of-aggregates) (scalar replacement of aggregates) are some of the key players here.
+2. Early Optimization
+   1. These passes are typically cheap and are primarily focused around reducing the number of instructions in the IR and propagating knowledge to other instructions. For example, [`EarlyCSE`](https://en.wikipedia.org/wiki/Common_subexpression_elimination) is used to perform common subexpression elimination, and [`InstCombine`](https://llvm.org/docs/Passes.html#instcombine-combine-redundant-instructions) and [`InstSimplify`](https://llvm.org/doxygen/classllvm_1_1InstSimplifyPass.html#details) perform a number of small peephole optimizations to make operations less expensive.
+3. Loop Optimization
+   1. These passes canonicalize and simplify loops. Loops are often hot code, which makes loop optimization extremely important for performance. Key players here include [`LoopRotate`](https://llvm.org/docs/Passes.html#loop-rotate-rotate-loops), [`LICM`](https://llvm.org/docs/Passes.html#licm-loop-invariant-code-motion), and [`LoopFullUnroll`](https://llvm.org/docs/Passes.html#loop-unroll-unroll-loops). Some bounds check elimination also happens here, as a result of the [`IRCE`](https://llvm.org/doxygen/InductiveRangeCheckElimination_8cpp_source.html) pass which can prove certain bounds are never exceeded.
+4. Scalar Optimization
+   1. The scalar optimization pipeline contains a number of more expensive, but more powerful passes such as [`GVN`](https://llvm.org/docs/Passes.html#gvn-global-value-numbering) (global value numbering), [`SCCP`](https://llvm.org/docs/Passes.html#sccp-sparse-conditional-constant-propagation) (sparse conditional constant propagation), and another round of bounds check elimination. These passes are expensive, but they can often remove large amounts of code and make vectorization much more successful and effective. Several other simplification and optimization passes intersperse the more expensive ones to reduce the amount of work they have to do.
+5. Vectorization
+   1. [Automatic vectorization](https://en.wikipedia.org/wiki/Automatic_vectorization) is an extremely powerful transformation for CPU-intensive code. Briefly, vectorization allows execution of a [single instruction on multiple data](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) (SIMD), e.g. performing 8 addition operations at the same time. However, proving code to be both capable of vectorization and profitable to vectorize is difficult, and this relies heavily on the prior optimization passes to massage the IR into a state where vectorization is worth it.
+6. Intrinsic Lowering
+   1. Julia inserts a number of custom intrinsics, for reasons such as object allocation, garbage collection, and exception handling. These intrinsics were originally placed to make optimization opportunities more obvious, but they are now lowered into LLVM IR to enable the IR to be emitted as machine code.
+7. Cleanup
+   1. These passes are last-chance optimizations, and perform small optimizations such as fused multiply-add propagation and division-remainder simplification. Additionally, targets that do not support half-precision floating point numbers will have their half-precision instructions lowered into single-precision instructions here, and passes are added to provide sanitizer support.
+
+## Target-Dependent Optimization and Code Generation
+
+LLVM provides target-dependent optimization and machine code generation in the same pipeline, located in the TargetMachine for a given platform. These passes include instruction selection, instruction scheduling, register allocation, and machine code emission. The LLVM documentation provides a good overview of the process, and the LLVM source code is the best place to look for details on the pipeline and passes.
+
+## Linking
+
+Currently, Julia is transitioning between two linkers: the older RuntimeDyld linker, and the newer [JITLink](https://llvm.org/docs/JITLink.html) linker. JITLink contains a number of features that RuntimeDyld does not have, such as concurrent and reentrant linking, but currently lacks good support for profiling integrations and does not yet support all of the platforms that RuntimeDyld supports. Over time, JITLink is expected to replace RuntimeDyld entirely. Further details on JITLink can be found in the LLVM documentation.
+
+## Execution
+
+Once the code has been linked into the current process, it is available for execution. This fact is made known to the generating codeinst by updating the `invoke`, `specsigflags`, and `specptr` fields appropriately. Codeinsts support upgrading `invoke`, `specsigflags`, and `specptr` fields, so long as every combination of these fields that exists at any given point in time is valid to be called. This allows the JIT to update these fields without invalidating existing codeinsts, supporting a potential future concurrent JIT. Specifically, the following states may be valid:
+1. `invoke` is NULL, `specsigflags` is 0b00, `specptr` is NULL
+   1. This is the initial state of a codeinst, and indicates that the codeinst has not yet been compiled.
+2. `invoke` is non-null, `specsigflags` is 0b00, `specptr` is NULL
+   1. This indicates that the codeinst was not compiled with any specialization, and that the codeinst should be invoked directly. Note that in this instance, `invoke` does not read either the `specsigflags` or `specptr` fields, and therefore they may be modified without invalidating the `invoke` pointer.
+3. `invoke` is non-null, `specsigflags` is 0b10, `specptr` is non-null
+   1. This indicates that the codeinst was compiled, but a specialized function signature was deemed unnecessary by codegen.
+4. `invoke` is non-null, `specsigflags` is 0b11, `specptr` is non-null
+   1. This indicates that the codeinst was compiled, and a specialized function signature was deemed necessary by codegen. The `specptr` field contains a pointer to the specialized function signature. The `invoke` pointer is permitted to read both `specsigflags` and `specptr` fields.
+
+In addition, there are a number of different transitional states that occur during the update process. To account for these potential situations, the following write and read patterns should be used when dealing with these codeinst fields.
+
+1. When writing `invoke`, `specsigflags`, and `specptr`:
+      1. Perform an atomic compare-exchange operation of specptr assuming the old value was NULL. This compare-exchange operation should have at least acquire-release ordering, to provide ordering guarantees of the remaining memory operations in the write.
+      2. If `specptr` was non-null, cease the write operation and wait for bit 0b10 of `specsigflags` to be written.
+      3. Write the new low bit of `specsigflags` to its final value. This may be a relaxed write.
+      4. Write the new `invoke` pointer to its final value. This must have at least a release memory ordering to synchronize with reads of `invoke`.
+      5. Set the second bit of `specsigflags` to 1. This must be at least a release memory ordering to synchronize with reads of `specsigflags`. This step completes the write operation and announces to all other threads that all fields have been set.
+2. When reading all of `invoke`, `specsigflags`, and `specptr`:
+   1. Read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `initial_invoke`.
+   2. If `initial_invoke` is NULL, the codeinst is not yet executable. `invoke` is NULL, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL.
+   3. Read the `specptr` field with at least an acquire memory ordering.
+   4. If `specptr` is NULL, then the `initial_invoke` pointer must not be relying on `specptr` to guarantee correct execution. Therefore, `invoke` is non-null, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL.
+   5. If `specptr` is non-null, then `initial_invoke` might not be the final `invoke` field that uses `specptr`. This can occur if `specptr` has been written, but `invoke` has not yet been written. Therefore, spin on the second bit of `specsigflags` until it is set to 1 with at least acquire memory ordering.
+   6. Re-read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `final_invoke`.
+   7. Read the `specsigflags` field with any memory ordering.
+   8. `invoke` is `final_invoke`, `specsigflags` is the value read in step 7, `specptr` is the value read in step 3.
+3. When updating a `specptr` to a different but equivalent function pointer:
+   1. Perform a release store of the new function pointer to `specptr`. Races here must be benign, as the old function pointer is required to still be valid, and any new ones are also required to be valid as well. Once a pointer has been written to `specptr`, it must always be callable whether or not it is later overwritten.
+
+Although these write, read, and update steps are complicated, they ensure that the JIT can update codeinsts without invalidating existing codeinsts, and that the JIT can update codeinsts without invalidating existing `invoke` pointers. This allows the JIT to potentially reoptimize functions at higher optimization levels in the future, and also will allow the JIT to support concurrent compilation of functions in the future.
diff --git a/doc/src/devdocs/llvm-passes.md b/doc/src/devdocs/llvm-passes.md
new file mode 100644
index 0000000000000..36383acaef512
--- /dev/null
+++ b/doc/src/devdocs/llvm-passes.md
@@ -0,0 +1,161 @@
+# Custom LLVM Passes
+
+Julia has a number of custom LLVM passes. Broadly, they can be classified into passes that are required to be run to maintain Julia semantics, and passes that take advantage of Julia semantics to optimize LLVM IR.
+
+## Semantic Passes
+
+These passes are used to transform LLVM IR into code that is legal to be run on a CPU. Their main purpose is to enable simpler IR to be emitted by codegen, which then enables other LLVM passes to optimize common patterns.
+
+### CPUFeatures
+
+* Filename: `llvm-cpufeatures.cpp`
+* Class Name: `CPUFeaturesPass`
+* Opt Name: `module(CPUFeatures)`
+
+This pass lowers the `julia.cpu.have_fma.(f32|f64)` intrinsic to either true or false, depending on the target architecture and target features present on the function. This intrinsic is often used to determine if using algorithms dependent on fast [fused multiply-add](https://en.wikipedia.org/wiki/Multiply%E2%80%93accumulate_operation#Fused_multiply%E2%80%93add) operations is better than using standard algorithms not dependent on such instructions.
+
+### DemoteFloat16
+
+* Filename: `llvm-demote-float16.cpp`
+* ClassName: `DemoteFloat16Pass`
+* Opt Name `function(DemoteFloat16)`
+
+This pass replaces [float16](https://en.wikipedia.org/wiki/Half-precision_floating-point_format) operations with float32 operations on architectures that do not natively support float16 operations. This is done by inserting `fpext` and `fptrunc` instructions around any float16 operation. On architectures that do support native float16 operations, this pass is a no-op.
+
+### LateGCLowering
+
+* Filename: `llvm-late-gc-lowering.cpp`
+* Class Name: `LateLowerGCPass`
+* Opt Name: `function(LateLowerGCFrame)`
+
+This pass performs most of the GC rooting work required to track pointers between GC safepoints. It also lowers several intrinsics to their corresponding instruction translation, and is permitted to violate the non-integral invariants previously established (`pointer_from_objref` is lowered to a `ptrtoint` instruction here). This pass typically occupies the most time out of all the custom Julia passes, due to its dataflow algorithm to minimize the number of objects live at any safepoint.
+
+### FinalGCLowering
+
+* Filename: `llvm-final-gc-lowering.cpp`
+* Class Name: `FinalLowerGCPass`
+* Opt Name: `module(FinalLowerGC)`
+
+This pass lowers a few last intrinsics to their final form targeting functions in the `libjulia` library. Separating this from `LateGCLowering` enables other backends (GPU compilation) to supply their own custom lowerings for these intrinsics, enabling the Julia pipeline to be used on those backends as well.
+
+### LowerHandlers
+
+* Filename: `llvm-lower-handlers.cpp`
+* Class Name: `LowerExcHandlersPass`
+* Opt Name: `function(LowerExcHandlers)`
+
+This pass lowers exception handling intrinsics into calls to runtime functions that are actually called when handling exceptions.
+
+### RemoveNI
+
+* Filename: `llvm-remove-ni.cpp`
+* Class Name: `RemoveNIPass`
+* Opt Name: `module(RemoveNI)`
+
+This pass removes the non-integral address spaces from the module's datalayout string. This enables the backend to lower Julia's custom address spaces directly to machine code, without a costly rewrite of every pointer operation to address space 0.
+
+### SIMDLoop
+
+* Filename: `llvm-simdloop.cpp`
+* Class Name: `LowerSIMDLoopPass`
+* Opt Name: `loop(LowerSIMDLoop)`
+
+This pass acts as the main driver of the `@simd` annotation. Codegen inserts a `!llvm.loopid` marker at the back branch of a loop, which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop).
+
+### LowerPTLS
+
+* Filename: `llvm-ptls.cpp`
+* Class Name: `LowerPTLSPass`
+* Opt Name: `module(LowerPTLSPass)`
+
+This pass lowers thread-local Julia intrinsics to assembly instructions. Julia relies on thread-local storage for garbage collection and multithreading task scheduling. When compiling code for system images and package images, this pass replaces calls to intrinsics with loads from global variables that are initialized at load time.
+
+If codegen produces a function with a `swiftself` argument and calling convention, this pass assumes the `swiftself` argument is the pgcstack and will replace the intrinsics with that argument. Doing so provides speedups on architectures that have slow thread local storage accesses.
+
+### RemoveAddrspaces
+
+* Filename: `llvm-remove-addrspaces.cpp`
+* Class Name: `RemoveAddrspacesPass`
+* Opt Name: `module(RemoveAddrspaces)`
+
+This pass renames pointers in one address space to another address space. This is used to remove Julia-specific address spaces from LLVM IR.
+
+### RemoveJuliaAddrspaces
+
+* Filename: `llvm-remove-addrspaces.cpp`
+* Class Name: `RemoveJuliaAddrspacesPass`
+* Opt Name: `module(RemoveJuliaAddrspaces)`
+
+This pass removes Julia-specific address spaces from LLVM IR. It is mostly used for displaying LLVM IR in a less cluttered format. Internally, it is implemented off the RemoveAddrspaces pass.
+
+### Multiversioning
+
+* Filename: `llvm-multiversioning.cpp`
+* Class Name: `MultiVersioningPass`
+* Opt Name: `module(JuliaMultiVersioning)`
+
+This pass performs modifications to a module to create functions that are optimized for running on different architectures (see sysimg.md and pkgimg.md for more details). Implementation-wise, it clones functions and applies different target-specific attributes to them to allow the optimizer to use advanced features such as vectorization and instruction scheduling for that platform. It also creates some infrastructure to enable the Julia image loader to select the appropriate version of the function to call based on the architecture the loader is running on. The target-specific attributes are controlled by the `julia.mv.specs` module flag, which during compilation is derived from the [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) environment variable. The pass must also be enabled by providing a `julia.mv.enable` module flag with a value of 1.
+
+!!! warning
+
+   Use of `llvmcall` with multiversioning is dangerous. `llvmcall` enables access to features not typically exposed by the Julia APIs, and are therefore usually not available on all architectures. If multiversioning is enabled and code generation is requested for a target architecture that does not support the feature required by an `llvmcall` expression, LLVM will probably error out, likely with an abort and the message `LLVM ERROR: Do not know how to split the result of this operator!`.
+
+### GCInvariantVerifier
+
+* Filename: `llvm-gc-invariant-verifier.cpp`
+* Class Name: `GCInvariantVerifierPass`
+* Opt Name: `module(GCInvariantVerifier)`
+
+This pass is used to verify Julia's invariants about LLVM IR. This includes things such as the nonexistence of `ptrtoint` in Julia's [non-integral address spaces](https://llvm.org/docs/LangRef.html#non-integral-pointer-type) [^nislides] and the existence of only blessed `addrspacecast` instructions (Tracked -> Derived, 0 -> Tracked, etc). It performs no transformations on IR.
+
+[^nislides]: https://llvm.org/devmtg/2015-02/slides/chisnall-pointers-not-int.pdf
+
+## Optimization Passes
+
+These passes are used to perform transformations on LLVM IR that LLVM will not perform itself, e.g. fast math flag propagation, escape analysis, and optimizations on Julia-specific internal functions. They use knowledge about Julia's semantics to perform these optimizations.
+
+### CombineMulAdd
+
+* Filename: `llvm-muladd.cpp`
+* Class Name: `CombineMulAddPass`
+* Opt Name: `function(CombineMulAdd)`
+
+This pass serves to optimize the particular combination of a regular `fmul` with a fast `fadd` into a contract `fmul` with a fast `fadd`. This is later optimized by the backend to a [fused multiply-add](https://en.wikipedia.org/wiki/Multiply%E2%80%93accumulate_operation#Fused_multiply%E2%80%93add) instruction, which can provide significantly faster operations at the cost of more [unpredictable semantics](https://simonbyrne.github.io/notes/fastmath/).
+
+!!! note
+
+    This optimization only occurs when the `fmul` has a single use, which is the fast `fadd`.
+
+### AllocOpt
+
+* Filename: `llvm-alloc-opt.cpp`
+* Class Name: `AllocOptPass`
+* Opt Name: `function(AllocOpt)`
+
+Julia does not have the concept of a program stack as a place to allocate mutable objects. However, allocating objects on the stack reduces GC pressure and is critical for GPU compilation. Thus, `AllocOpt` performs heap to stack conversion of objects that it can prove do not [escape](https://en.wikipedia.org/wiki/Escape_analysis) the current function. It also performs a number of other optimizations on allocations, such as removing allocations that are never used, optimizing typeof calls to freshly allocated objects, and removing stores to allocations that are immediately overwritten. The escape analysis implementation is located in `llvm-alloc-helpers.cpp`. Currently, this pass does not use information from `EscapeAnalysis.jl`, though that may change in the future.
+
+### PropagateJuliaAddrspaces
+
+* Filename: `llvm-propagate-addrspaces.cpp`
+* Class Name: `PropagateJuliaAddrspacesPass`
+* Opt Name: `function(PropagateJuliaAddrspaces)`
+
+This pass is used to propagate Julia-specific address spaces through operations on pointers. LLVM is not allowed to introduce or remove addrspacecast instructions by optimizations, so this pass acts to eliminate redundant addrspace casts by replacing operations with their equivalent in a Julia address space. For more information on Julia's address spaces, see (TODO link to llvm.md).
+
+### JuliaLICM
+
+* Filename: `llvm-julia-licm.cpp`
+* Class Name: `JuliaLICMPass`
+* Opt Name: `loop(JuliaLICM)`
+
+This pass is used to hoist Julia-specific intrinsics out of loops. Specifically, it performs the following transformations:
+1. Hoist `gc_preserve_begin` and sink `gc_preserve_end` out of loops when the preserved objects are loop-invariant.
+   1. Since objects preserved within a loop are likely preserved for the duration of the loop, this transformation can reduce the number of `gc_preserve_begin`/`gc_preserve_end` pairs in the IR. This makes it easier for the `LateLowerGCPass` to identify where particular objects are preserved.
+2. Hoist write barriers with invariant objects
+   1. Here we assume that there are only two generations that an object can be a part of. Given that, a write barrier needs to only execute once for any pair of the same object. Thus, we can hoist write barriers out of loops when the object being written to is loop-invariant.
+3. Hoist allocations out of loops when they do not escape the loop
+   1. We use a very conservative definition of escape here, the same as the one used in `AllocOptPass`. This transformation can reduce the number of allocations in the IR, even when an allocation escapes the function altogether.
+
+!!!note
+
+    This pass is required to preserve LLVM's [MemorySSA](https://llvm.org/docs/MemorySSA.html) ([Short Video](https://www.youtube.com/watch?v=bdxWmryoHak), [Longer Video](https://www.youtube.com/watch?v=1e5y6WDbXCQ)) and [ScalarEvolution](https://baziotis.cs.illinois.edu/compilers/introduction-to-scalar-evolution.html) ([Newer Slides](https://llvm.org/devmtg/2018-04/slides/Absar-ScalarEvolution.pdf) [Older Slides](https://llvm.org/devmtg/2009-10/ScalarEvolutionAndLoopOptimization.pdf)) analyses.
diff --git a/doc/src/devdocs/llvm.md b/doc/src/devdocs/llvm.md
index b9890b5d7fe3e..170a812c09994 100644
--- a/doc/src/devdocs/llvm.md
+++ b/doc/src/devdocs/llvm.md
@@ -9,18 +9,36 @@ Julia dynamically links against LLVM by default. Build with `USE_LLVM_SHLIB=0` t
 
 The code for lowering Julia AST to LLVM IR or interpreting it directly is in directory `src/`.
 
-| File                | Description                                                |
-|:------------------- |:---------------------------------------------------------- |
-| `builtins.c`        | Builtin functions                                          |
-| `ccall.cpp`         | Lowering [`ccall`](@ref)                                   |
-| `cgutils.cpp`       | Lowering utilities, notably for array and tuple accesses   |
-| `codegen.cpp`       | Top-level of code generation, pass list, lowering builtins |
-| `debuginfo.cpp`     | Tracks debug information for JIT code                      |
-| `disasm.cpp`        | Handles native object file and JIT code diassembly         |
-| `gf.c`              | Generic functions                                          |
-| `intrinsics.cpp`    | Lowering intrinsics                                        |
-| `llvm-simdloop.cpp` | Custom LLVM pass for [`@simd`](@ref)                       |
-| `sys.c`             | I/O and operating system utility functions                 |
+| File                             | Description                                                        |
+|:-------------------------------- |:------------------------------------------------------------------ |
+| `aotcompile.cpp`                 | Compiler C-interface entry and object file emission           |
+| `builtins.c`                     | Builtin functions                                                  |
+| `ccall.cpp`                      | Lowering [`ccall`](@ref)                                           |
+| `cgutils.cpp`                    | Lowering utilities, notably for array and tuple accesses           |
+| `codegen.cpp`                    | Top-level of code generation, pass list, lowering builtins         |
+| `debuginfo.cpp`                  | Tracks debug information for JIT code                              |
+| `disasm.cpp`                     | Handles native object file and JIT code diassembly                 |
+| `gf.c`                           | Generic functions                                                  |
+| `intrinsics.cpp`                 | Lowering intrinsics                                                |
+| `jitlayers.cpp`                  | JIT-specific code, ORC compilation layers/utilities                |
+| `llvm-alloc-helpers.cpp`         | Julia-specific escape analysis                                     |
+| `llvm-alloc-opt.cpp`             | Custom LLVM pass to demote heap allocations to the stack           |
+| `llvm-cpufeatures.cpp`           | Custom LLVM pass to lower CPU-based functions (e.g. haveFMA)       |
+| `llvm-demote-float16.cpp`        | Custom LLVM pass to lower 16b float ops to 32b float ops           |
+| `llvm-final-gc-lowering.cpp`     | Custom LLVM pass to lower GC calls to their final form             |
+| `llvm-gc-invariant-verifier.cpp` | Custom LLVM pass to verify Julia GC invariants                     |
+| `llvm-julia-licm.cpp`            | Custom LLVM pass to hoist/sink Julia-specific intrinsics           |
+| `llvm-late-gc-lowering.cpp`      | Custom LLVM pass to root GC-tracked values                         |
+| `llvm-lower-handlers.cpp`        | Custom LLVM pass to lower try-catch blocks                         |
+| `llvm-muladd.cpp`                | Custom LLVM pass for fast-match FMA                                |
+| `llvm-multiversioning.cpp`       | Custom LLVM pass to generate sysimg code on multiple architectures |
+| `llvm-propagate-addrspaces.cpp`  | Custom LLVM pass to canonicalize addrspaces                        |
+| `llvm-ptls.cpp`                  | Custom LLVM pass to lower TLS operations                           |
+| `llvm-remove-addrspaces.cpp`     | Custom LLVM pass to remove Julia addrspaces                        |
+| `llvm-remove-ni.cpp`             | Custom LLVM pass to remove Julia non-integral addrspaces           |
+| `llvm-simdloop.cpp`              | Custom LLVM pass for [`@simd`](@ref)                               |
+| `pipeline.cpp`                   | New pass manager pipeline, pass pipeline parsing                   |
+| `sys.c`                          | I/O and operating system utility functions                         |
 
 Some of the `.cpp` files form a group that compile to a single object.
 
@@ -38,7 +56,7 @@ The `-O` option enables LLVM's [Basic Alias Analysis](https://llvm.org/docs/Alia
 
 ## Building Julia with a different version of LLVM
 
-The default version of LLVM is specified in `deps/Versions.make`. You can override it by creating
+The default version of LLVM is specified in `deps/llvm.version`. You can override it by creating
 a file called `Make.user` in the top-level directory and adding a line to it such as:
 
 ```
@@ -57,29 +75,74 @@ implies that option by default.
 
 ## Passing options to LLVM
 
-You can pass options to LLVM via the environment variable `JULIA_LLVM_ARGS`.
+You can pass options to LLVM via the environment variable [`JULIA_LLVM_ARGS`](@ref JULIA_LLVM_ARGS).
 Here are example settings using `bash` syntax:
 
   * `export JULIA_LLVM_ARGS=-print-after-all` dumps IR after each pass.
   * `export JULIA_LLVM_ARGS=-debug-only=loop-vectorize` dumps LLVM `DEBUG(...)` diagnostics for
     loop vectorizer. If you get warnings about "Unknown command line argument", rebuild LLVM with
     `LLVM_ASSERTIONS = 1`.
+  * `export JULIA_LLVM_ARGS=-help` shows a list of available options. `export JULIA_LLVM_ARGS=-help-hidden` shows even more.
+  * `export JULIA_LLVM_ARGS="-fatal-warnings -print-options"` is an example how to use multiple options.
+
+### Useful `JULIA_LLVM_ARGS` parameters
+  * `-print-after=PASS`: prints the IR after any execution of `PASS`, useful for checking changes done by a pass.
+  * `-print-before=PASS`: prints the IR before any execution of `PASS`, useful for checking the input to a pass.
+  * `-print-changed`: prints the IR whenever a pass changes the IR, useful for narrowing down which passes are causing problems.
+  * `-print-(before|after)=MARKER-PASS`: the Julia pipeline ships with a number of marker passes in the pipeline, which can be used to identify where problems or optimizations are occurring. A marker pass is defined as a pass which appears once in the pipeline and performs no transformations on the IR, and is only useful for targeting print-before/print-after. Currently, the following marker passes exist in the pipeline:
+    * BeforeOptimization
+    * BeforeEarlySimplification
+    * AfterEarlySimplification
+    * BeforeEarlyOptimization
+    * AfterEarlyOptimization
+    * BeforeLoopOptimization
+    * BeforeLICM
+    * AfterLICM
+    * BeforeLoopSimplification
+    * AfterLoopSimplification
+    * AfterLoopOptimization
+    * BeforeScalarOptimization
+    * AfterScalarOptimization
+    * BeforeVectorization
+    * AfterVectorization
+    * BeforeIntrinsicLowering
+    * AfterIntrinsicLowering
+    * BeforeCleanup
+    * AfterCleanup
+    * AfterOptimization
+  * `-time-passes`: prints the time spent in each pass, useful for identifying which passes are taking a long time.
+  * `-print-module-scope`: used in conjunction with `-print-(before|after)`, gets the entire module rather than the IR unit received by the pass
+  * `-debug`: prints out a lot of debugging information throughout LLVM
+  * `-debug-only=NAME`, prints out debugging statements from files with `DEBUG_TYPE` defined to `NAME`, useful for getting additional context about a problem
 
 ## Debugging LLVM transformations in isolation
 
 On occasion, it can be useful to debug LLVM's transformations in isolation from
 the rest of the Julia system, e.g. because reproducing the issue inside `julia`
 would take too long, or because one wants to take advantage of LLVM's tooling
-(e.g. bugpoint). To get unoptimized IR for the entire system image, pass the
+(e.g. bugpoint).
+
+To start with, you can install the developer tools to work with LLVM via:
+```
+make -C deps install-llvm-tools
+```
+
+To get unoptimized IR for the entire system image, pass the
 `--output-unopt-bc unopt.bc` option to the system image build process, which will
 output the unoptimized IR to an `unopt.bc` file. This file can then be passed to
 LLVM tools as usual. `libjulia` can function as an LLVM pass plugin and can be
 loaded into LLVM tools, to make julia-specific passes available in this
 environment. In addition, it exposes the `-julia` meta-pass, which runs the
 entire Julia pass-pipeline over the IR. As an example, to generate a system
-image, one could do:
+image with the old pass manager, one could do:
+```
+
+llc -o sys.o opt.bc
+cc -shared -o sys.so sys.o
+```
+To generate a system image with the new pass manager, one could do:
 ```
-opt -enable-new-pm=0 -load libjulia-codegen.so -julia -o opt.bc unopt.bc
+opt -load-pass-plugin=libjulia-codegen.so --passes='julia' -o opt.bc unopt.bc
 llc -o sys.o opt.bc
 cc -shared -o sys.so sys.o
 ```
@@ -91,12 +154,26 @@ using:
 fun, T = +, Tuple{Int,Int} # Substitute your function of interest here
 optimize = false
 open("plus.ll", "w") do file
-    println(file, InteractiveUtils._dump_function(fun, T, false, false, false, true, :att, optimize, :default))
+    println(file, InteractiveUtils._dump_function(fun, T, false, false, false, true, :att, optimize, :default, false))
 end
 ```
 These files can be processed the same way as the unoptimized sysimg IR shown
 above.
 
+## Running the LLVM test suite
+
+To run the llvm tests locally, you need to first install the tools, build julia, then you
+can run the tests:
+```
+make -C deps install-llvm-tools
+make -j julia-src-release
+make -C test/llvmpasses
+```
+
+If you want to run the individual test files directly, via the commands at the top of each
+test file, the first step here will have installed the tools into `./usr/tools/opt`. Then
+you'll want to manually replace `%s` with the name of the test file.
+
 ## Improving LLVM optimizations for Julia
 
 Improving LLVM code generation usually involves either changing Julia lowering to be more friendly
@@ -264,8 +341,8 @@ ccall(:foo, Cvoid, (Ptr{Float64},), A)
 In lowering, the compiler will insert a conversion from the array to the
 pointer which drops the reference to the array value. However, we of course
 need to make sure that the array does stay alive while we're doing the
-[`ccall`](@ref). To understand how this is done, first recall the lowering of the
-above code:
+[`ccall`](@ref). To understand how this is done, lets look at a hypothetical
+approximate possible lowering of the above code:
 ```julia
 return $(Expr(:foreigncall, :(:foo), Cvoid, svec(Ptr{Float64}), 0, :(:ccall), Expr(:foreigncall, :(:jl_array_ptr), Ptr{Float64}, svec(Any), 0, :(:ccall), :(A)), :(A)))
 ```
diff --git a/doc/src/devdocs/locks.md b/doc/src/devdocs/locks.md
index 59dac6ad79498..f79f4f2b1e3e3 100644
--- a/doc/src/devdocs/locks.md
+++ b/doc/src/devdocs/locks.md
@@ -30,19 +30,26 @@ The following are definitely leaf locks (level 1), and must not try to acquire a
 >   * jl_in_stackwalk (Win32)
 >   * ResourcePool<?>::mutex
 >   * RLST_mutex
+>   * llvm_printing_mutex
 >   * jl_locked_stream::mutex
+>   * debuginfo_asyncsafe
+>   * inference_timing_mutex
+>   * ExecutionEngine::SessionLock
 >
 >     > flisp itself is already threadsafe, this lock only protects the `jl_ast_context_list_t` pool
 >     > likewise, the ResourcePool<?>::mutexes just protect the associated resource pool
 
 The following is a leaf lock (level 2), and only acquires level 1 locks (safepoint) internally:
 
->   * typecache
+>   * global_roots_lock
 >   * Module->lock
+>   * JLDebuginfoPlugin::PluginMutex
+>   * newly_inferred_mutex
 
 The following is a level 3 lock, which can only acquire level 1 or level 2 locks internally:
 
 >   * Method->writelock
+>   * typecache
 
 The following is a level 4 lock, which can only recurse to acquire level 1, 2, or 3 locks:
 
@@ -50,10 +57,17 @@ The following is a level 4 lock, which can only recurse to acquire level 1, 2, o
 
 No Julia code may be called while holding a lock above this point.
 
-orc::ThreadSafeContext locks occupy a special spot in the locking diagram. They are used to protect
-LLVM's global non-threadsafe state, but there may be an arbitrary number of them. For now, there is
-only one global context, and thus acquiring it is a level 5 lock. However, acquiring such a lock
-should only be done at the same time that the codegen lock is acquired.
+orc::ThreadSafeContext (TSCtx) locks occupy a special spot in the locking hierarchy. They are used to
+protect LLVM's global non-threadsafe state, but there may be an arbitrary number of them. By default,
+all of these locks may be treated as level 5 locks for the purposes of comparing with the rest of the
+hierarchy. Acquiring a TSCtx should only be done from the JIT's pool of TSCtx's, and all locks on
+that TSCtx should be released prior to returning it to the pool. If multiple TSCtx locks must be
+acquired at the same time (due to recursive compilation), then locks should be acquired in the order
+that the TSCtxs were borrowed from the pool.
+
+The following is a level 5 lock
+
+>   * JuliaOJIT::EmissionMutex
 
 The following are a level 6 lock, which can only recurse to acquire locks at lower levels:
 
@@ -79,6 +93,8 @@ may result in pernicious and hard-to-find deadlocks. BE VERY CAREFUL!
 >
 >     > this may continue to be held after releasing the iolock, or acquired without it,
 >     > but be very careful to never attempt to acquire the iolock while holding it
+>
+>   * Libdl.LazyLibrary lock
 
 
 The following is the root lock, meaning no other lock shall be held when trying to acquire it:
@@ -121,7 +137,7 @@ These data structures each need locks due to being shared mutable global state.
 list for the above lock priority list. This list does not include level 1 leaf resources due to
 their simplicity.
 
-MethodTable modifications (def, cache, kwsorter type) : MethodTable->writelock
+MethodTable modifications (def, cache) : MethodTable->writelock
 
 Type declarations : toplevel lock
 
diff --git a/doc/src/devdocs/meta.md b/doc/src/devdocs/meta.md
index 7a58578b3e53e..726f471c228e0 100644
--- a/doc/src/devdocs/meta.md
+++ b/doc/src/devdocs/meta.md
@@ -34,9 +34,8 @@ quote
 end
 ```
 
-`Base.pushmeta!(ex, :symbol, args...)` appends `:symbol` to the end of the `:meta` expression,
-creating a new `:meta` expression if necessary. If `args` is specified, a nested expression containing
-`:symbol` and these arguments is appended instead, which can be used to specify additional information.
+`Base.pushmeta!(ex, tag::Union{Symbol,Expr})` appends `:tag` to the end of the `:meta` expression,
+creating a new `:meta` expression if necessary.
 
 To use the metadata, you have to parse these `:meta` expressions. If your implementation can be
 performed within Julia, `Base.popmeta!` is very handy: `Base.popmeta!(body, :symbol)` will scan
diff --git a/doc/src/devdocs/object.md b/doc/src/devdocs/object.md
index cf377c052bf15..a2f72d623ab21 100644
--- a/doc/src/devdocs/object.md
+++ b/doc/src/devdocs/object.md
@@ -163,11 +163,8 @@ Arrays:
 
 ```c
 jl_array_t *jl_new_array(jl_value_t *atype, jl_tuple_t *dims);
-jl_array_t *jl_new_arrayv(jl_value_t *atype, ...);
 jl_array_t *jl_alloc_array_1d(jl_value_t *atype, size_t nr);
-jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr, size_t nc);
-jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr, size_t nc, size_t z);
-jl_array_t *jl_alloc_vec_any(size_t n);
+jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims);
 ```
 
 Note that many of these have alternative allocation functions for various special-purposes. The
@@ -189,6 +186,8 @@ then tagged with its type:
 jl_value_t *jl_gc_allocobj(size_t nbytes);
 void jl_set_typeof(jl_value_t *v, jl_datatype_t *type);
 ```
+!!! note "Out of date Warning"
+    The documentation and usage for the function `jl_gc_allocobj` may be out of date
 
 Note that all objects are allocated in multiples of 4 bytes and aligned to the platform pointer
 size. Memory is allocated from a pool for smaller objects, or directly with `malloc()` for large
diff --git a/doc/src/devdocs/pkgimg.md b/doc/src/devdocs/pkgimg.md
new file mode 100644
index 0000000000000..64f4e640b7c19
--- /dev/null
+++ b/doc/src/devdocs/pkgimg.md
@@ -0,0 +1,49 @@
+# [Package Images](@id pkgimages)
+
+Julia package images provide object (native code) caches for Julia packages.
+They are similar to Julia's [system image](@ref dev-sysimg) and support many of the same features.
+In fact the underlying serialization format is the same, and the system image is the base image that the package images are build against.
+
+## High-level overview
+
+Package images are shared libraries that contain both code and data. Like `.ji` cache files, they are generated per package. The data section contains both global data (global variables in the package) as well as the necessary metadata about what methods and types are defined by the package. The code section contains native objects that cache the final output of Julia's LLVM-based compiler.
+
+The command line option `--pkgimages=no` can be used to turn off object caching for this session. Note that this means that cache files have to likely be regenerated.
+See [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@ref JULIA_MAX_NUM_PRECOMPILE_FILES) for the upper limit of variants Julia caches per default.
+
+!!! note
+    While the package images present themselves as native shared libraries, they are only an approximation thereof. You will not be able to link against them from a native program and they must be loaded from Julia.
+
+
+## Linking
+
+Since the package images contain native code, we must run a linker over them before we can use them. You can set the environment variable [`JULIA_VERBOSE_LINKING`](@ref JULIA_VERBOSE_LINKING) to `true` to make the package image linking process verbose.
+
+Furthermore, we cannot assume that the user has a working system linker installed. Therefore, Julia ships with LLD, the LLVM linker, to provide a working out of the box experience. In `base/linking.jl`, we implement a limited interface to be able to link package images on all supported platforms.
+
+### Quirks
+Despite LLD being a multi-platform linker, it does not provide a consistent interface across platforms. Furthermore, it is meant to be used from `clang` or
+another compiler driver, we therefore reimplement some of the logic from `llvm-project/clang/lib/Driver/ToolChains`. Thankfully one can use `lld -flavor` to set lld to the right platform
+
+#### Windows
+To avoid having to deal with `link.exe` we use `-flavor gnu`, effectively turning `lld` into a cross-linker from a mingw32 environment. Windows DLLs are required to contain a `_DllMainCRTStartup` function and to minimize our dependence on mingw32 libraries, we inject a stub definition ourselves.
+
+#### MacOS
+Dynamic libraries on macOS need to link against `-lSystem`. On recent macOS versions, `-lSystem` is only available for linking when Xcode is available.
+To that effect we link with `-undefined dynamic_lookup`.
+
+## [Package images optimized for multiple microarchitectures](@id pkgimgs-multi-versioning)
+Similar to [multi-versioning](@ref sysimg-multi-versioning) for system images, package images support multi-versioning. If you are in a heterogeneous environment, with a unified cache,
+you can set the environment variable `JULIA_CPU_TARGET=generic` to multi-version the object caches.
+
+## Flags that impact package image creation and selection
+
+These are the Julia command line flags that impact cache selection. Package images
+that were created with different flags will be rejected.
+
+- `-g`, `--debug-info`: Exact match required since it changes code generation.
+- `--check-bounds`: Exact match required since it changes code generation.
+- `--inline`: Exact match required since it changes code generation.
+- `--pkgimages`: To allow running without object caching enabled.
+- `-O`, `--optimize`: Reject package images generated for a lower optimization level,
+  but allow for higher optimization levels to be loaded.
diff --git a/doc/src/devdocs/precompile_hang.md b/doc/src/devdocs/precompile_hang.md
new file mode 100644
index 0000000000000..0de9c99792b64
--- /dev/null
+++ b/doc/src/devdocs/precompile_hang.md
@@ -0,0 +1,98 @@
+# Fixing precompilation hangs due to open tasks or IO
+
+On Julia 1.10 or higher, you might see the following message:
+
+![Screenshot of precompilation hang](./img/precompilation_hang.png)
+
+This may repeat. If it continues to repeat with no hints that it will
+resolve itself, you may have a "precompilation hang" that requires
+fixing. Even if it's transient, you might prefer to resolve it so that
+users will not be bothered by this warning.  This page walks you
+through how to analyze and fix such issues.
+
+If you follow the advice and hit `Ctrl-C`, you might see
+
+```
+^C Interrupted: Exiting precompilation...
+
+  1 dependency had warnings during precompilation:
+┌ Test1 [ac89d554-e2ba-40bc-bc5c-de68b658c982]
+│  [pid 2745] waiting for IO to finish:
+│   Handle type        uv_handle_t->data
+│   timer              0x55580decd1e0->0x7f94c3a4c340
+```
+
+This message conveys two key pieces of information:
+
+- the hang is occurring during precompilation of `Test1`, a dependency of `Test2` (the package we were trying to load with `using Test2`)
+- during precompilation of `Test1`, Julia created a `Timer` object (use `?Timer` if you're unfamiliar with Timers) which is still open; until that closes, the process is hung
+
+If this is enough of a hint for you to figure out how `timer = Timer(args...)` is being created, one good solution is to add `wait(timer)` if `timer` eventually finishes on its own, or `close(timer)` if you need to force-close it, before the final `end` of the module.
+
+However, there are cases that may not be that straightforward. Usually the best option is to start by determining whether the hang is due to code in Test1 or whether it is due to one of Test1's dependencies:
+
+- Option 1: `Pkg.add("Aqua")` and use [`Aqua.test_persistent_tasks`](https://juliatesting.github.io/Aqua.jl/dev/#Aqua.test_persistent_tasks-Tuple{Base.PkgId}). This should help you identify which package is causing the problem, after which the instructions [below](@ref pchang_fix) should be followed. If needed, you can create a `PkgId` as `Base.PkgId(UUID("..."), "Test1")`, where `...` comes from the `uuid` entry in `Test1/Project.toml`.
+- Option 2: manually diagnose the source of the hang.
+
+To manually diagnose:
+
+1. `Pkg.develop("Test1")`
+2. Comment out all the code `include`d or defined in `Test1`, *except* the `using/import` statements.
+3. Try `using Test2` (or even `using Test1` assuming that hangs too) again
+
+Now we arrive at a fork in the road: either
+
+- the hang persists, indicating it is [due to one of your dependencies](@ref pchang_deps)
+- the hang disappears, indicating that it is [due to something in your code](@ref pchang_fix).
+
+## [Diagnosing and fixing hangs due to a package dependency](@id pchang_deps)
+
+Use a binary search to identify the problematic dependency: start by commenting out half your dependencies, then when you isolate which half is responsible comment out half of that half, etc. (You don't have to remove them from the project, just comment out the `using`/`import` statements.)
+
+Once you've identified a suspect (here we'll call it `ThePackageYouThinkIsCausingTheProblem`), first try precompiling that package. If it also hangs during precompilation, continue chasing the problem backwards.
+
+However, most likely `ThePackageYouThinkIsCausingTheProblem` will precompile fine. This suggests it's in the function `ThePackageYouThinkIsCausingTheProblem.__init__`, which does not run during precompilation of `ThePackageYouThinkIsCausingTheProblem` but *does* in any package that loads `ThePackageYouThinkIsCausingTheProblem`.  To test this theory, set up a minimal working example (MWE), something like
+
+```julia
+(@v1.10) pkg> generate MWE
+  Generating  project MWE:
+    MWE\Project.toml
+    MWE\src\MWE.jl
+```
+
+where the source code of `MWE.jl` is
+
+```julia
+module MWE
+using ThePackageYouThinkIsCausingTheProblem
+end
+```
+
+and you've added `ThePackageYouThinkIsCausingTheProblem` to MWE's dependencies.
+
+If that MWE reproduces the hang, you've found your culprit:
+`ThePackageYouThinkIsCausingTheProblem.__init__` must be creating the `Timer` object. If the timer object can be safely `close`d, that's a good option. Otherwise, the most common solution is to avoid creating the timer while *any* package is being precompiled: add
+
+```julia
+ccall(:jl_generating_output, Cint, ()) == 1 && return nothing
+```
+
+as the first line of `ThePackageYouThinkIsCausingTheProblem.__init__`, and it will avoid doing any initialization in any Julia process whose purpose is to precompile packages.
+
+## [Fixing package code to avoid hangs](@id pchang_fix)
+
+Search your package for suggestive words (here like "Timer") and see if you can identify where the problem is being created. Note that a method *definition* like
+
+```julia
+maketimer() = Timer(timer -> println("hi"), 0; interval=1)
+```
+
+is not problematic in and of itself: it can cause this problem only if `maketimer` gets called while the module is being defined. This might be happening from a top-level statement such as
+
+```julia
+const GLOBAL_TIMER = maketimer()
+```
+
+or it might conceivably occur in a [precompile workload](https://github.com/JuliaLang/PrecompileTools.jl).
+
+If you struggle to identify the causative lines, then consider doing a binary search: comment out sections of your package (or `include` lines to omit entire files) until you've reduced the problem in scope.
diff --git a/doc/src/devdocs/probes.md b/doc/src/devdocs/probes.md
index 5cfd9f6a762f8..d15723e945462 100644
--- a/doc/src/devdocs/probes.md
+++ b/doc/src/devdocs/probes.md
@@ -27,28 +27,28 @@ to enable USDT probes.
 > readelf -n usr/lib/libjulia-internal.so.1
 
 Displaying notes found in: .note.gnu.build-id
-  Owner                Data size 	Description
-  GNU                  0x00000014	NT_GNU_BUILD_ID (unique build ID bitstring)
+  Owner                Data size  Description
+  GNU                  0x00000014 NT_GNU_BUILD_ID (unique build ID bitstring)
     Build ID: 57161002f35548772a87418d2385c284ceb3ead8
 
 Displaying notes found in: .note.stapsdt
-  Owner                Data size 	Description
-  stapsdt              0x00000029	NT_STAPSDT (SystemTap probe descriptors)
+  Owner                Data size  Description
+  stapsdt              0x00000029 NT_STAPSDT (SystemTap probe descriptors)
     Provider: julia
     Name: gc__begin
     Location: 0x000000000013213e, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cac
     Arguments:
-  stapsdt              0x00000032	NT_STAPSDT (SystemTap probe descriptors)
+  stapsdt              0x00000032 NT_STAPSDT (SystemTap probe descriptors)
     Provider: julia
     Name: gc__stop_the_world
     Location: 0x0000000000132144, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cae
     Arguments:
-  stapsdt              0x00000027	NT_STAPSDT (SystemTap probe descriptors)
+  stapsdt              0x00000027 NT_STAPSDT (SystemTap probe descriptors)
     Provider: julia
     Name: gc__end
     Location: 0x000000000013214a, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb0
     Arguments:
-  stapsdt              0x0000002d	NT_STAPSDT (SystemTap probe descriptors)
+  stapsdt              0x0000002d NT_STAPSDT (SystemTap probe descriptors)
     Provider: julia
     Name: gc__finalizer
     Location: 0x0000000000132150, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb2
@@ -308,7 +308,7 @@ An example probe in the bpftrace format looks like:
 ```
 usdt:usr/lib/libjulia-internal.so:julia:gc__begin
 {
-	@start[pid] = nsecs;
+  @start[pid] = nsecs;
 }
 ```
 
diff --git a/doc/src/devdocs/require.md b/doc/src/devdocs/require.md
index 5198a7425ee49..9f824e78a8653 100644
--- a/doc/src/devdocs/require.md
+++ b/doc/src/devdocs/require.md
@@ -7,26 +7,22 @@ precompilation cache. It is the implementation of the `import` statement.
 The features below are experimental and not part of the stable Julia API.
 Before building upon them inform yourself about the current thinking and whether they might change soon.
 
-### Module loading callbacks
+### Package loading callbacks
 
-It is possible to listen to the modules loaded by `Base.require`, by registering a callback.
+It is possible to listen to the packages loaded by `Base.require`, by registering a callback.
 
 ```julia
-loaded_packages = Channel{Symbol}()
-callback = (mod::Symbol) -> put!(loaded_packages, mod)
+loaded_packages = Base.PkgId[]
+callback = (pkg::Base.PkgId) -> push!(loaded_packages, pkg)
 push!(Base.package_callbacks, callback)
 ```
 
-Please note that the symbol given to the callback is a non-unique identifier and
-it is the responsibility of the callback provider to walk the module chain to
-determine the fully qualified name of the loaded binding.
+Using this would look something like:
 
-The callback below is an example of how to do that:
+```julia-repl
+julia> using Example
 
-```julia
-# Get the fully-qualified name of a module.
-function module_fqn(name::Symbol)
-    fqn = fullname(Base.root_module(name))
-    return join(fqn, '.')
-end
+julia> loaded_packages
+1-element Vector{Base.PkgId}:
+ Example [7876af07-990d-54b4-ab0e-23690620f79a]
 ```
diff --git a/doc/src/devdocs/ssair.md b/doc/src/devdocs/ssair.md
index 6d3de6d1f5758..4375012c9f580 100644
--- a/doc/src/devdocs/ssair.md
+++ b/doc/src/devdocs/ssair.md
@@ -1,5 +1,53 @@
 # Julia SSA-form IR
 
+Julia uses a static single assignment intermediate representation ([SSA IR](https://en.wikipedia.org/wiki/Static_single-assignment_form)) to perform optimization.
+This IR is different from LLVM IR, and unique to Julia.
+It allows for Julia specific optimizations.
+
+1. Basic blocks (regions with no control flow) are explicitly annotated.
+2. if/else and loops are turned into `goto` statements.
+3. lines with multiple operations are split into multiple lines by introducing variables.
+
+For example the following Julia code:
+```julia
+function foo(x)
+    y = sin(x)
+    if x > 5.0
+        y = y + cos(x)
+    end
+    return exp(2) + y
+end
+```
+when called with a `Float64` argument is translated into:
+
+```julia
+using InteractiveUtils
+@code_typed foo(1.0)
+```
+
+```llvm
+CodeInfo(
+1 ─ %1 = invoke Main.sin(x::Float64)::Float64
+│   %2 = Base.lt_float(x, 5.0)::Bool
+└──      goto #3 if not %2
+2 ─ %4 = invoke Main.cos(x::Float64)::Float64
+└── %5 = Base.add_float(%1, %4)::Float64
+3 ┄ %6 = φ (#2 => %5, #1 => %1)::Float64
+│   %7 = Base.add_float(7.38905609893065, %6)::Float64
+└──      return %7
+) => Float64
+```
+
+In this example, we can see all of these changes.
+1. The first basic block is everything in
+```llvm
+1 ─ %1 = invoke Main.sin(x::Float64)::Float64
+│   %2 = Base.lt_float(x, 5.0)::Bool
+└──      goto #3 if not %2
+```
+2. The `if` statement is translated into `goto #3 if not %2` which goes to the 3rd basic block if `x>5` isn't met and otherwise goes to the second basic block.
+3. `%2` is an SSA value introduced to represent `x > 5`.
+
 ## Background
 
 Beginning in Julia 0.7, parts of the compiler use a new [SSA-form](https://en.wikipedia.org/wiki/Static_single_assignment_form)
@@ -11,11 +59,9 @@ linearized (i.e. turned into a form where function arguments could only be SSA v
 conditional control flow). This negated much of the usefulness of SSA form representation when performing
 middle end optimizations. Some heroic effort was put into making these optimizations work without a complete SSA
 form representation, but the lack of such a representation ultimately proved prohibitive.
+## Categories of IR nodes
 
-## New IR nodes
-
-With the new IR representation, the compiler learned to handle four new IR nodes, Phi nodes, Pi
-nodes as well as PhiC nodes and Upsilon nodes (the latter two are only used for exception handling).
+The SSA IR representation has four categories of IR nodes: Phi, Pi, PhiC, and Upsilon nodes (the latter two are only used for exception handling).
 
 ### Phi nodes and Pi nodes
 
@@ -144,7 +190,7 @@ The corresponding IR (with irrelevant types stripped) is:
 4 ┄ %13 = φᶜ (%3, %6, %9)::Bool
 │   %14 = φᶜ (%4, %7, %10)::Core.Compiler.MaybeUndef(Int64)
 │   %15 = φᶜ (%5)::Core.Const(1)
-└──       $(Expr(:leave, 1))
+└──       $(Expr(:leave, Core.SSAValue(2)))
 5 ─       $(Expr(:pop_exception, :(%2)))::Any
 │         $(Expr(:throw_undef_if_not, :y, :(%13)))::Any
 │   %19 = Core.tuple(%15, %14)
@@ -179,7 +225,7 @@ Instead, we do the following:
 - RAUW style operations are performed by setting the corresponding statement index to the replacement
   value.
 - Statements are erased by setting the corresponding statement to `nothing` (this is essentially just a special-case
-  convention of the above.
+  convention of the above).
 - If there are any uses of the statement being erased, they will be set to `nothing`.
 
 There is a `compact!` function that compacts the above data structure by performing the insertion of nodes in the appropriate place, trivial copy propagation, and renaming of uses to any changed SSA values. However, the clever part
diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md
index 5c976875846d3..dafeddd259745 100644
--- a/doc/src/devdocs/sysimg.md
+++ b/doc/src/devdocs/sysimg.md
@@ -8,6 +8,9 @@ as many platforms as possible, so as to give vastly improved startup times.  On
 not ship with a precompiled system image file, one can be generated from the source files shipped
 in Julia's `DATAROOTDIR/julia/base` folder.
 
+Julia will by default generate its system image on half of the available system threads. This
+may be controlled by the [`JULIA_IMAGE_THREADS`](@ref JULIA_IMAGE_THREADS) environment variable.
+
 This operation is useful for multiple reasons.  A user may:
 
   * Build a precompiled shared library system image on a platform that did not ship with one, thereby
@@ -19,7 +22,7 @@ This operation is useful for multiple reasons.  A user may:
 The [`PackageCompiler.jl` package](https://github.com/JuliaLang/PackageCompiler.jl) contains convenient
 wrapper functions to automate this process.
 
-## System image optimized for multiple microarchitectures
+## [System image optimized for multiple microarchitectures](@id sysimg-multi-versioning)
 
 The system image can be compiled simultaneously for multiple CPU microarchitectures
 under the same instruction set architecture (ISA). Multiple versions of the same function
@@ -31,7 +34,7 @@ based on available CPU features.
 ### Specifying multiple system image targets
 
 A multi-microarchitecture system image can be enabled by passing multiple targets
-during system image compilation. This can be done either with the `JULIA_CPU_TARGET` make option
+during system image compilation. This can be done either with the [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) make option
 or with the `-C` command line option when running the compilation command manually.
 Multiple targets are separated by `;` in the option string.
 The syntax for each target is a CPU name followed by multiple features separated by `,`.
@@ -39,6 +42,9 @@ All features supported by LLVM are supported and a feature can be disabled with
 (`+` prefix is also allowed and ignored to be consistent with LLVM syntax).
 Additionally, a few special features are supported to control the function cloning behavior.
 
+!!! note
+    It is good practice to specify either `clone_all` or `base(<n>)` for every target apart from the first one. This makes it explicit which targets have all functions cloned, and which targets are based on other targets. If this is not done, the default behavior is to not clone every function, and to use the first target's function definition as the fallback when not cloning a function.
+
 1. `clone_all`
 
     By default, only functions that are the most likely to benefit from
@@ -101,7 +107,7 @@ See code comments for each components for more implementation details.
     (see comments in `MultiVersioning::runOnModule` for how this is done),
     the pass also generates metadata so that the runtime can load and initialize the
     system image correctly.
-    A detail description of the metadata is available in `src/processor.h`.
+    A detailed description of the metadata is available in `src/processor.h`.
 
 2. System image loading
 
diff --git a/doc/src/devdocs/tracy.png b/doc/src/devdocs/tracy.png
new file mode 100644
index 0000000000000..a0371be9db63e
Binary files /dev/null and b/doc/src/devdocs/tracy.png differ
diff --git a/doc/src/devdocs/types.md b/doc/src/devdocs/types.md
index 003574f99c182..42bdf251ac82f 100644
--- a/doc/src/devdocs/types.md
+++ b/doc/src/devdocs/types.md
@@ -100,6 +100,8 @@ UnionAll
       lb: Union{}
       ub: Any
     body: Array{T, N} <: DenseArray{T, N}
+      ref::MemoryRef{T}
+      size::NTuple{N, Int64}
 ```
 
 This indicates that `Array` actually names a `UnionAll` type. There is one `UnionAll` type for
@@ -198,7 +200,6 @@ TypeName
     defs: Nothing nothing
     cache: Nothing nothing
     max_args: Int64 0
-    kwsorter: #undef
     module: Module Core
     : Int64 0
     : Int64 0
diff --git a/doc/src/devdocs/valgrind.md b/doc/src/devdocs/valgrind.md
index 7e62aeb176f3c..b8dd96ed8be60 100644
--- a/doc/src/devdocs/valgrind.md
+++ b/doc/src/devdocs/valgrind.md
@@ -11,7 +11,7 @@ assumption works fine in most instances but fails miserably for a just-in-time c
 `julia`.  For this reason it is crucial to pass `--smc-check=all-non-file` to `valgrind`, else
 code may crash or behave unexpectedly (often in subtle ways).
 
-In some cases, to better detect memory errors using Valgrind it can help to compile `julia` with
+In some cases, to better detect memory errors using Valgrind, it can help to compile `julia` with
 memory pools disabled.  The compile-time flag `MEMDEBUG` disables memory pools in Julia, and
 `MEMDEBUG2` disables memory pools in FemtoLisp.  To build `julia` with both flags, add the following
 line to `Make.user`:
@@ -20,7 +20,7 @@ line to `Make.user`:
 CFLAGS = -DMEMDEBUG -DMEMDEBUG2
 ```
 
-Another thing to note: if your program uses multiple workers processes, it is likely that you
+Another thing to note: if your program uses multiple worker processes, it is likely that you
 want all such worker processes to run under Valgrind, not just the parent process.  To do this,
 pass `--trace-children=yes` to `valgrind`.
 
@@ -57,7 +57,7 @@ to `valgrind` as well.
 
 ## Additional spurious warnings
 
-This section covers Valgrind warnings which cannot be added to the
+This section covers Valgrind warnings that cannot be added to the
 suppressions file yet are nonetheless safe to ignore.
 
 ### Unhandled rr system calls
diff --git a/doc/src/index.md b/doc/src/index.md
index a1915395151bc..8c88af424e8e3 100644
--- a/doc/src/index.md
+++ b/doc/src/index.md
@@ -32,7 +32,19 @@ Markdown.parse("""
 """)
 ```
 
-### [Introduction](@id man-introduction)
+## [Important Links](@id man-important-links)
+
+Below is a non-exhaustive list of links that will be useful as you learn and use the Julia programming language.
+
+- [Julia Homepage](https://julialang.org)
+- [Download Julia](https://julialang.org/downloads/)
+- [Discussion forum](https://discourse.julialang.org)
+- [Julia YouTube](https://www.youtube.com/user/JuliaLanguage)
+- [Find Julia Packages](https://julialang.org/packages/)
+- [Learning Resources](https://julialang.org/learning/)
+- [Read and write blogs on Julia](https://forem.julialang.org)
+
+## [Introduction](@id man-introduction)
 
 Scientific computing has traditionally required the highest performance, yet domain experts have
 largely moved to slower dynamic languages for daily work. We believe there are many good reasons
@@ -46,7 +58,9 @@ with performance comparable to traditional statically-typed languages.
 Because Julia's compiler is different from the interpreters used for languages like Python or
 R, you may find that Julia's performance is unintuitive at first. If you find that something is
 slow, we highly recommend reading through the [Performance Tips](@ref man-performance-tips) section before trying anything
-else. Once you understand how Julia works, it's easy to write code that's nearly as fast as C.
+else. Once you understand how Julia works, it is easy to write code that is nearly as fast as C.
+
+## [Julia Compared to Other Languages](@id man-julia-compared-other-languages)
 
 Julia features optional typing, multiple dispatch, and good performance, achieved using type inference
 and [just-in-time (JIT) compilation](https://en.wikipedia.org/wiki/Just-in-time_compilation) (and
@@ -70,14 +84,16 @@ The most significant departures of Julia from typical dynamic languages are:
   * Automatic generation of efficient, specialized code for different argument types
   * Good performance, approaching that of statically-compiled languages like C
 
-Although one sometimes speaks of dynamic languages as being "typeless", they are definitely not:
-every object, whether primitive or user-defined, has a type. The lack of type declarations in
+Although one sometimes speaks of dynamic languages as being "typeless", they are definitely not.
+Every object, whether primitive or user-defined, has a type. The lack of type declarations in
 most dynamic languages, however, means that one cannot instruct the compiler about the types of
 values, and often cannot explicitly talk about types at all. In static languages, on the other
 hand, while one can -- and usually must -- annotate types for the compiler, types exist only at
 compile time and cannot be manipulated or expressed at run time. In Julia, types are themselves
 run-time objects, and can also be used to convey information to the compiler.
 
+### [What Makes Julia, Julia?](@id man-what-makes-julia)
+
 While the casual programmer need not explicitly use types or multiple dispatch, they are the core
 unifying features of Julia: functions are defined on different combinations of argument types,
 and applied by dispatching to the most specific matching definition. This model is a good fit
@@ -93,6 +109,8 @@ languages. For large scale numerical problems, speed always has been, continues
 always will be crucial: the amount of data being processed has easily kept pace with Moore's Law
 over the past decades.
 
+### [Advantages of Julia](@id man-advantages-of-julia)
+
 Julia aims to create an unprecedented combination of ease-of-use, power, and efficiency in a single
 language. In addition to the above, some advantages of Julia over comparable systems include:
 
diff --git a/doc/src/manual/arrays.md b/doc/src/manual/arrays.md
index 1c71a4bd59e35..09a095729f174 100644
--- a/doc/src/manual/arrays.md
+++ b/doc/src/manual/arrays.md
@@ -1,4 +1,4 @@
-# [Multi-dimensional Arrays](@id man-multi-dim-arrays)
+# [Single- and multi-dimensional Arrays](@id man-multi-dim-arrays)
 
 Julia, like most technical computing languages, provides a first-class array implementation. Most
 technical computing languages pay a lot of attention to their array implementation at the expense
@@ -107,7 +107,7 @@ where no arguments are given. [Array literal can be typed](@ref man-array-typed-
 the syntax `T[A, B, C, ...]` where `T` is a type.
 
 ```jldoctest
-julia> [1,2,3] # An array of `Int`s
+julia> [1, 2, 3] # An array of `Int`s
 3-element Vector{Int64}:
  1
  2
@@ -326,8 +326,8 @@ These syntaxes are shorthands for function calls that themselves are convenience
 | Syntax                 | Function         | Description                                                                                                |
 |:---------------------- |:---------------- |:---------------------------------------------------------------------------------------------------------- |
 |                        | [`cat`](@ref)    | concatenate input arrays along dimension(s) `k`                                                            |
-| `[A; B; C; ...]`       | [`vcat`](@ref)   | shorthand for `cat(A...; dims=1)                                                                           |
-| `[A B C ...]`          | [`hcat`](@ref)   | shorthand for `cat(A...; dims=2)                                                                           |
+| `[A; B; C; ...]`       | [`vcat`](@ref)   | shorthand for `cat(A...; dims=1)`                                                                          |
+| `[A B C ...]`          | [`hcat`](@ref)   | shorthand for `cat(A...; dims=2)`                                                                          |
 | `[A B; C D; ...]`      | [`hvcat`](@ref)  | simultaneous vertical and horizontal concatenation                                                         |
 | `[A; C;; B; D;;; ...]` | [`hvncat`](@ref) | simultaneous n-dimensional concatenation, where number of semicolons indicate the dimension to concatenate |
 
@@ -356,7 +356,7 @@ Comprehensions provide a general and powerful way to construct arrays. Comprehen
 similar to set construction notation in mathematics:
 
 ```
-A = [ F(x,y,...) for x=rx, y=ry, ... ]
+A = [ F(x, y, ...) for x=rx, y=ry, ... ]
 ```
 
 The meaning of this form is that `F(x,y,...)` is evaluated with the variables `x`, `y`, etc. taking
@@ -440,7 +440,7 @@ Ranges in generators and comprehensions can depend on previous ranges by writing
 keywords:
 
 ```jldoctest
-julia> [(i,j) for i=1:3 for j=1:i]
+julia> [(i, j) for i=1:3 for j=1:i]
 6-element Vector{Tuple{Int64, Int64}}:
  (1, 1)
  (2, 1)
@@ -455,7 +455,7 @@ In such cases, the result is always 1-d.
 Generated values can be filtered using the `if` keyword:
 
 ```jldoctest
-julia> [(i,j) for i=1:3 for j=1:i if i+j == 4]
+julia> [(i, j) for i=1:3 for j=1:i if i+j == 4]
 2-element Vector{Tuple{Int64, Int64}}:
  (2, 2)
  (3, 1)
@@ -603,7 +603,7 @@ overwritten with the value of `X`, [`convert`](@ref)ing to the
 If any index `I_k` is itself an array, then the right hand side `X` must also be an
 array with the same shape as the result of indexing `A[I_1, I_2, ..., I_n]` or a vector with
 the same number of elements. The value in location `I_1[i_1], I_2[i_2], ..., I_n[i_n]` of
-`A` is overwritten with the value `X[I_1, I_2, ..., I_n]`, converting if necessary. The
+`A` is overwritten with the value `X[i_1, i_2, ..., i_n]`, converting if necessary. The
 element-wise assignment operator `.=` may be used to [broadcast](@ref Broadcasting) `X`
 across the selected locations:
 
@@ -653,7 +653,7 @@ indices and can be converted to such by [`to_indices`](@ref):
     * [`CartesianIndex{N}`](@ref)s, which behave like an `N`-tuple of integers spanning multiple dimensions (see below for more details)
 2. An array of scalar indices. This includes:
     * Vectors and multidimensional arrays of integers
-    * Empty arrays like `[]`, which select no elements
+    * Empty arrays like `[]`, which select no elements e.g. `A[[]]` (not to be confused with `A[]`)
     * Ranges like `a:c` or `a:b:c`, which select contiguous or strided subsections from `a` to `c` (inclusive)
     * Any custom array of scalar indices that is a subtype of `AbstractArray`
     * Arrays of `CartesianIndex{N}` (see below for more details)
@@ -740,17 +740,17 @@ that is sometimes referred to as pointwise indexing. For example, it enables
 accessing the diagonal elements from the first "page" of `A` from above:
 
 ```jldoctest cartesianindex
-julia> page = A[:,:,1]
+julia> page = A[:, :, 1]
 4×4 Matrix{Int64}:
  1  5   9  13
  2  6  10  14
  3  7  11  15
  4  8  12  16
 
-julia> page[[CartesianIndex(1,1),
-             CartesianIndex(2,2),
-             CartesianIndex(3,3),
-             CartesianIndex(4,4)]]
+julia> page[[CartesianIndex(1, 1),
+             CartesianIndex(2, 2),
+             CartesianIndex(3, 3),
+             CartesianIndex(4, 4)]]
 4-element Vector{Int64}:
   1
   6
@@ -879,10 +879,15 @@ slower than multiplication. While some arrays — like [`Array`](@ref) itself 
 are implemented using a linear chunk of memory and directly use a linear index
 in their implementations, other arrays — like [`Diagonal`](@ref) — need the
 full set of cartesian indices to do their lookup (see [`IndexStyle`](@ref) to
-introspect which is which). As such, when iterating over an entire array, it's
-much better to iterate over [`eachindex(A)`](@ref) instead of `1:length(A)`.
-Not only will the former be much faster in cases where `A` is `IndexCartesian`,
-but it will also support [OffsetArrays](https://github.com/JuliaArrays/OffsetArrays.jl), too.
+introspect which is which).
+
+!!! warnings
+
+    When iterating over all the indices for an array, it is
+    better to iterate over [`eachindex(A)`](@ref) instead of `1:length(A)`.
+    Not only will this be faster in cases where `A` is `IndexCartesian`,
+    but it will also support arrays with custom indexing, such as [OffsetArrays](https://github.com/JuliaArrays/OffsetArrays.jl).
+    If only the values are needed, then is better to just iterate the array directly, i.e. `for a in A`.
 
 #### Omitted and extra indices
 
@@ -930,13 +935,13 @@ element of `axes(A, d)` where `d` is that particular dimension number). This
 allows vectors to be indexed like one-column matrices, for example:
 
 ```jldoctest
-julia> A = [8,6,7]
+julia> A = [8, 6, 7]
 3-element Vector{Int64}:
  8
  6
  7
 
-julia> A[2,1]
+julia> A[2, 1]
 6
 ```
 
@@ -959,7 +964,7 @@ construct, `i` will be an `Int` if `A` is an array type with fast linear indexin
 it will be a `CartesianIndex`:
 
 ```jldoctest
-julia> A = rand(4,3);
+julia> A = rand(4, 3);
 
 julia> B = view(A, 1:3, 2:3);
 
@@ -974,8 +979,11 @@ i = CartesianIndex(2, 2)
 i = CartesianIndex(3, 2)
 ```
 
-In contrast with `for i = 1:length(A)`, iterating with [`eachindex`](@ref) provides an efficient way to
-iterate over any array type.
+!!! note
+
+    In contrast with `for i = 1:length(A)`, iterating with [`eachindex`](@ref) provides an efficient way to
+    iterate over any array type. Besides, this also supports generic arrays with custom indexing such as
+    [OffsetArrays](https://github.com/JuliaArrays/OffsetArrays.jl).
 
 ## Array traits
 
@@ -998,7 +1006,7 @@ The following operators are supported for arrays:
 
 To enable convenient vectorization of mathematical and other operations,
 Julia [provides the dot syntax](@ref man-vectorized) `f.(args...)`, e.g. `sin.(x)`
-or `min.(x,y)`, for elementwise operations over arrays or mixtures of arrays and
+or `min.(x, y)`, for elementwise operations over arrays or mixtures of arrays and
 scalars (a [Broadcasting](@ref) operation); these have the additional advantage of
 "fusing" into a single loop when combined with other dot calls, e.g. `sin.(cos.(x))`.
 
@@ -1012,7 +1020,7 @@ operations like `<`, *only* the elementwise `.<` version is applicable to arrays
 
 Also notice the difference between `max.(a,b)`, which [`broadcast`](@ref)s [`max`](@ref)
 elementwise over `a` and `b`, and [`maximum(a)`](@ref), which finds the largest value within
-`a`. The same relationship holds for `min.(a,b)` and `minimum(a)`.
+`a`. The same relationship holds for `min.(a, b)` and `minimum(a)`.
 
 ## Broadcasting
 
@@ -1021,9 +1029,9 @@ sizes, such as adding a vector to each column of a matrix. An inefficient way to
 be to replicate the vector to the size of the matrix:
 
 ```julia-repl
-julia> a = rand(2,1); A = rand(2,3);
+julia> a = rand(2, 1); A = rand(2, 3);
 
-julia> repeat(a,1,3)+A
+julia> repeat(a, 1, 3) + A
 2×3 Array{Float64,2}:
  1.20813  1.82068  1.25387
  1.56851  1.86401  1.67846
@@ -1103,10 +1111,10 @@ generally work correctly as a fallback for any specific array implementation.
 The `AbstractArray` type includes anything vaguely array-like, and implementations of it might
 be quite different from conventional arrays. For example, elements might be computed on request
 rather than stored. However, any concrete `AbstractArray{T,N}` type should generally implement
-at least [`size(A)`](@ref) (returning an `Int` tuple), [`getindex(A,i)`](@ref) and [`getindex(A,i1,...,iN)`](@ref getindex);
-mutable arrays should also implement [`setindex!`](@ref). It is recommended that these operations
-have nearly constant time complexity, as otherwise some array
-functions may be unexpectedly slow. Concrete types should also typically provide a [`similar(A,T=eltype(A),dims=size(A))`](@ref)
+at least [`size(A)`](@ref) (returning an `Int` tuple), [`getindex(A, i)`](@ref) and
+[`getindex(A, i1, ..., iN)`](@ref getindex); mutable arrays should also implement [`setindex!`](@ref).
+It is recommended that these operations have nearly constant time complexity, as otherwise some array
+functions may be unexpectedly slow. Concrete types should also typically provide a [`similar(A, T=eltype(A), dims=size(A))`](@ref)
 method, which is used to allocate a similar array for [`copy`](@ref) and other out-of-place
 operations. No matter how an `AbstractArray{T,N}` is represented internally, `T` is the type of
 object returned by *integer* indexing (`A[1, ..., 1]`, when `A` is not empty) and `N` should be
@@ -1145,15 +1153,15 @@ arranged contiguously in column major order. This means that the stride of the f
 dimension — the spacing between elements in the same column — is `1`:
 
 ```julia-repl
-julia> A = rand(5,7,2);
+julia> A = rand(5, 7, 2);
 
-julia> stride(A,1)
+julia> stride(A, 1)
 1
 ```
 
 The stride of the second dimension is the spacing between elements in the same row, skipping
 as many elements as there are in a single column (`5`). Similarly, jumping between the two
-"pages" (in the third dimension) requires skipping `5*7 == 35` elements.  The [`strides`](@ref)
+"pages" (in the third dimension) requires skipping `5*7 == 35` elements. The [`strides`](@ref)
 of this array is the tuple of these three numbers together:
 
 ```julia-repl
diff --git a/doc/src/manual/asynchronous-programming.md b/doc/src/manual/asynchronous-programming.md
index 4eee0fccf7da2..5b43ba971ee1c 100644
--- a/doc/src/manual/asynchronous-programming.md
+++ b/doc/src/manual/asynchronous-programming.md
@@ -289,7 +289,7 @@ julia> @elapsed while n > 0 # print out results
 0.029772311
 ```
 
-Instead of `errormonitor(t)`, a more robust solution may be use use `bind(results, t)`, as that will
+Instead of `errormonitor(t)`, a more robust solution may be to use `bind(results, t)`, as that will
 not only log any unexpected failures, but also force the associated resources to close and propagate
 the exception everywhere.
 
diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md
index 0ebed7db009c9..2602a8b129b2b 100644
--- a/doc/src/manual/calling-c-and-fortran-code.md
+++ b/doc/src/manual/calling-c-and-fortran-code.md
@@ -253,10 +253,14 @@ to the specified type. For example, the following call:
 will behave as if it were written like this:
 
 ```julia
-@ccall "libfoo".foo(
-    Base.unsafe_convert(Int32, Base.cconvert(Int32, x))::Int32,
-    Base.unsafe_convert(Float64, Base.cconvert(Float64, y))::Float64
+c_x = Base.cconvert(Int32, x)
+c_y = Base.cconvert(Float64, y)
+GC.@preserve c_x c_y begin
+    @ccall "libfoo".foo(
+        Base.unsafe_convert(Int32, c_x)::Int32,
+        Base.unsafe_convert(Float64, c_y)::Float64
     )::Cvoid
+end
 ```
 
 [`Base.cconvert`](@ref) normally just calls [`convert`](@ref), but can be defined to return an
@@ -817,7 +821,7 @@ end
 ## Garbage Collection Safety
 
 When passing data to a `@ccall`, it is best to avoid using the [`pointer`](@ref) function.
-Instead define a convert method and pass the variables directly to the `@ccall`. `@ccall`
+Instead define a [`Base.cconvert`](@ref) method and pass the variables directly to the `@ccall`. `@ccall`
 automatically arranges that all of its arguments will be preserved from garbage collection until
 the call returns. If a C API will store a reference to memory allocated by Julia, after the `@ccall`
 returns, you must ensure that the object remains visible to the garbage collector. The suggested
@@ -1000,7 +1004,7 @@ A table of translations between the macro and function interfaces is given below
 
 ## [Calling Convention](@id calling-convention)
 
-The second argument to `ccall` (immediatel preceding return type) can optionally
+The second argument to `ccall` (immediately preceding return type) can optionally
 be a calling convention specifier (the `@ccall` macro currently does not support
 giving a calling convention). Without any specifier, the platform-default C
 calling convention is used. Other supported conventions are: `stdcall`, `cdecl`,
@@ -1118,9 +1122,7 @@ For more details on how to pass callbacks to C libraries, see this [blog post](h
 
 ## C++
 
-For direct C++ interfacing, see the [Cxx](https://github.com/Keno/Cxx.jl) package. For tools to create C++
-bindings, see the [CxxWrap](https://github.com/JuliaInterop/CxxWrap.jl) package.
-
+For tools to create C++ bindings, see the [CxxWrap](https://github.com/JuliaInterop/CxxWrap.jl) package.
 
 
 [^1]: Non-library function calls in both C and Julia can be inlined and thus may have
diff --git a/doc/src/manual/code-loading.md b/doc/src/manual/code-loading.md
index d6f359f83d5cb..25ebf3475f34c 100644
--- a/doc/src/manual/code-loading.md
+++ b/doc/src/manual/code-loading.md
@@ -14,7 +14,7 @@ Code inclusion is quite straightforward and simple: it evaluates the given sourc
 
 A *package* is a source tree with a standard layout providing functionality that can be reused by other Julia projects. A package is loaded by `import X` or  `using X` statements. These statements also make the module named `X`—which results from loading the package code—available within the module where the import statement occurs. The meaning of `X` in `import X` is context-dependent: which `X` package is loaded depends on what code the statement occurs in. Thus, handling of `import X` happens in two stages: first, it determines **what** package is defined to be `X` in this context; second, it determines **where** that particular `X` package is found.
 
-These questions are answered by searching through the project environments listed in [`LOAD_PATH`](@ref) for project files (`Project.toml` or `JuliaProject.toml`), manifest files (`Manifest.toml` or `JuliaManifest.toml`), or folders of source files.
+These questions are answered by searching through the project environments listed in [`LOAD_PATH`](@ref) for project files (`Project.toml` or `JuliaProject.toml`), manifest files (`Manifest.toml` or `JuliaManifest.toml`, or the same names suffixed by `-v{major}.{minor}.toml` for specific versions), or folders of source files.
 
 
 ## Federation of packages
@@ -63,7 +63,7 @@ Each kind of environment defines these three maps differently, as detailed in th
 
 ### Project environments
 
-A project environment is determined by a directory containing a project file called `Project.toml`, and optionally a manifest file called `Manifest.toml`. These files may also be called `JuliaProject.toml` and `JuliaManifest.toml`, in which case `Project.toml` and `Manifest.toml` are ignored. This allows for coexistence with other tools that might consider files called `Project.toml` and `Manifest.toml` significant. For pure Julia projects, however, the names `Project.toml` and `Manifest.toml` are preferred.
+A project environment is determined by a directory containing a project file called `Project.toml`, and optionally a manifest file called `Manifest.toml`. These files may also be called `JuliaProject.toml` and `JuliaManifest.toml`, in which case `Project.toml` and `Manifest.toml` are ignored. This allows for coexistence with other tools that might consider files called `Project.toml` and `Manifest.toml` significant. For pure Julia projects, however, the names `Project.toml` and `Manifest.toml` are preferred. However, from Julia v1.11 onwards, `(Julia)Manifest-v{major}.{minor}.toml` is recognized as a format to make a given julia version use a specific manifest file i.e. in the same folder, a `Manifest-v1.11.toml` would be used by v1.11 and `Manifest.toml` by any other julia version.
 
 The roots, graph and paths maps of a project environment are defined as follows:
 
@@ -349,17 +349,62 @@ The subscripted `rootsᵢ`, `graphᵢ` and `pathsᵢ` variables correspond to th
 
 Since the primary environment is typically the environment of a project you're working on, while environments later in the stack contain additional tools, this is the right trade-off: it's better to break your development tools but keep the project working. When such incompatibilities occur, you'll typically want to upgrade your dev tools to versions that are compatible with the main project.
 
-### Package/Environment Preferences
+### [Package Extensions](@id man-extensions)
+
+A package "extension" is a module that is automatically loaded when a specified set of other packages (its "extension dependencies") are loaded in the current Julia session. Extensions are defined under the `[extensions]` section in the project file. The extension dependencies of an extension are a subset of those packages listed under the `[weakdeps]` section of the project file. Those packages can have compat entries like other packages.
+
+```toml
+name = "MyPackage"
+
+[compat]
+ExtDep = "1.0"
+OtherExtDep = "1.0"
+
+[weakdeps]
+ExtDep = "c9a23..." # uuid
+OtherExtDep = "862e..." # uuid
+
+[extensions]
+BarExt = ["ExtDep", "OtherExtDep"]
+FooExt = "ExtDep"
+...
+```
+
+The keys under `extensions` are the names of the extensions.
+They are loaded when all the packages on the right hand side (the extension dependencies) of that extension are loaded.
+If an extension only has one extension dependency the list of extension dependencies can be written as just a string for brevity.
+The location for the entry point of the extension is either in `ext/FooExt.jl` or `ext/FooExt/FooExt.jl` for
+extension `FooExt`.
+The content of an extension is often structured as:
+
+```
+module FooExt
+
+# Load main package and extension dependencies
+using MyPackage, ExtDep
+
+# Extend functionality in main package with types from the extension dependencies
+MyPackage.func(x::ExtDep.SomeStruct) = ...
+
+end
+```
+
+When a package with extensions is added to an environment, the `weakdeps` and `extensions` sections
+are stored in the manifest file in the section for that package. The dependency lookup rules for
+a package are the same as for its "parent" except that the listed extension dependencies are also considered as
+dependencies.
+
+### [Package/Environment Preferences](@id preferences)
 
 Preferences are dictionaries of metadata that influence package behavior within an environment.
-The preferences system supports reading preferences at compile-time, which means that at code-loading time, we must ensure that a particular `.ji` file was built with the same preferences as the current environment before loading it.
+The preferences system supports reading preferences at compile-time, which means that at code-loading time, we must ensure that the precompilation files selected by Julia were built with the same preferences as the current environment before loading them.
 The public API for modifying Preferences is contained within the [Preferences.jl](https://github.com/JuliaPackaging/Preferences.jl) package.
 Preferences are stored as TOML dictionaries within a `(Julia)LocalPreferences.toml` file next to the currently-active project.
 If a preference is "exported", it is instead stored within the `(Julia)Project.toml` instead.
 The intention is to allow shared projects to contain shared preferences, while allowing for users themselves to override those preferences with their own settings in the LocalPreferences.toml file, which should be .gitignored as the name implies.
 
-Preferences that are accessed during compilation are automatically marked as compile-time preferences, and any change recorded to these preferences will cause the Julia compiler to recompile any cached precompilation `.ji` files for that module.
-This is done by serializing the hash of all compile-time preferences during compilation, then checking that hash against the current environment when searching for the proper `.ji` file to load.
+Preferences that are accessed during compilation are automatically marked as compile-time preferences, and any change recorded to these preferences will cause the Julia compiler to recompile any cached precompilation file(s) (`.ji` and corresponding `.so`, `.dll`, or `.dylib` files) for that module.
+This is done by serializing the hash of all compile-time preferences during compilation, then checking that hash against the current environment when searching for the proper file(s) to load.
 
 Preferences can be set with depot-wide defaults; if package Foo is installed within your global environment and it has preferences set, these preferences will apply as long as your global environment is part of your `LOAD_PATH`.
 Preferences in environments higher up in the environment stack get overridden by the more proximal entries in the load path, ending with the currently active project.
diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md
index 1c281a67e55e9..c69d221a80f2e 100644
--- a/doc/src/manual/command-line-interface.md
+++ b/doc/src/manual/command-line-interface.md
@@ -1,4 +1,4 @@
-# Command-line Interface
+# [Command-line Interface](@id cli)
 
 ## Using arguments inside scripts
 
@@ -39,6 +39,73 @@ $ julia --color=yes -O -- script.jl arg1 arg2..
 
 See also [Scripting](@ref man-scripting) for more information on writing Julia scripts.
 
+## The `Main.main` entry point
+
+As of Julia, 1.11, Base export a special macro `@main`. This macro simply expands to the symbol `main`,
+but at the conclusion of executing a script or expression, `julia` will attempt to execute the function
+`Main.main(ARGS)` if such a function has been defined and this behavior was opted into
+using the `@main macro`. This feature is intended to aid in the unification
+of compiled and interactive workflows. In compiled workflows, loading the code that defines the `main`
+function may be spatially and temporally separated from the invocation. However, for interactive workflows,
+the behavior is equivalent to explicitly calling `exit(main(ARGS))` at the end of the evaluated script or
+expression.
+
+!!! compat "Julia 1.11"
+    The special entry point `Main.main` was added in Julia 1.11. For compatibility with prior julia versions,
+    add an explicit `@isdefined(var"@main") ? (@main) : exit(main(ARGS))` at the end of your scripts.
+
+To see this feature in action, consider the following definition, which will execute the print function despite there being no explicit call to `main`:
+
+```
+$ julia -e '(@main)(ARGS) = println("Hello World!")'
+Hello World!
+$
+```
+
+Only the `main` binding in the `Main`, module has this special behavior and only if
+the macro `@main` was used within the defining module.
+
+For example, using `hello` instead of `main` will result not result in the `hello` function executing:
+
+```
+$ julia -e 'hello(ARGS) = println("Hello World!")'
+$
+```
+
+and neither will a plain definition of `main`:
+```
+$ julia -e 'main(ARGS) = println("Hello World!")'
+$
+```
+
+However, the opt-in need not occur at definition time:
+$ julia -e 'main(ARGS) = println("Hello World!"); @main'
+Hello World!
+$
+
+The `main` binding may be imported from a package. A hello package defined as
+
+```
+module Hello
+
+export main
+(@main)(ARGS) = println("Hello from the package!")
+
+end
+```
+
+may be used as:
+
+```
+$ julia -e 'using Hello'
+Hello from the package!
+$ julia -e 'import Hello' # N.B.: Execution depends on the binding not whether the package is loaded
+$
+```
+
+However, note that the current best practice recommendation is to not mix application and reusable library
+code in the same package. Helper applications may be distributed as separate packages or as scripts with
+separate `main` entry points in a package's `bin` folder.
 
 ## Parallel mode
 
@@ -69,7 +136,7 @@ Note that although you should have a `~/.julia` directory once you've run Julia
 first time, you may need to create the `~/.julia/config` folder and the
 `~/.julia/config/startup.jl` file if you use it.
 
-To have startup code run only in [The Julia REPL] (and not when `julia` is *e.g.* run
+To have startup code run only in [The Julia REPL](@ref) (and not when `julia` is *e.g.* run
 on a script), use [`atreplinit`](@ref) in `startup.jl`:
 
 ```julia
@@ -79,7 +146,7 @@ end
 ```
 
 
-## [Command-line switches for Julia](@id command-line-options)
+## [Command-line switches for Julia](@id command-line-interface)
 
 There are various ways to run Julia code and provide options, similar to those available for the
 `perl` and `ruby` programs:
@@ -88,25 +155,27 @@ There are various ways to run Julia code and provide options, similar to those a
 julia [switches] -- [programfile] [args...]
 ```
 
-The following is a complete list of command-line switches available when launching julia (a '*' marks the default value, if applicable):
+The following is a complete list of command-line switches available when launching julia (a '*' marks the default value, if applicable; settings marked '($)' may trigger package precompilation):
 
 |Switch                                 |Description|
 |:---                                   |:---|
 |`-v`, `--version`                      |Display version information|
 |`-h`, `--help`                         |Print command-line options (this message).|
 |`--help-hidden`                        |Uncommon options not shown by `-h`|
-|`--project[={<dir>\|@.}]`              |Set `<dir>` as the home project/environment. The default `@.` option will search through parent directories until a `Project.toml` or `JuliaProject.toml` file is found.|
+|`--project[={<dir>\|@.}]`              |Set `<dir>` as the active project/environment. The default `@.` option will search through parent directories until a `Project.toml` or `JuliaProject.toml` file is found.|
 |`-J`, `--sysimage <file>`              |Start up with the given system image file|
 |`-H`, `--home <dir>`                   |Set location of `julia` executable|
-|`--startup-file={yes*\|no}`            |Load `JULIA_DEPOT_PATH/config/startup.jl`; if `JULIA_DEPOT_PATH` environment variable is unset, load `~/.julia/config/startup.jl`|
+|`--startup-file={yes*\|no}`            |Load `JULIA_DEPOT_PATH/config/startup.jl`; if [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) environment variable is unset, load `~/.julia/config/startup.jl`|
 |`--handle-signals={yes*\|no}`          |Enable or disable Julia's default signal handlers|
 |`--sysimage-native-code={yes*\|no}`    |Use native code from system image if available|
-|`--compiled-modules={yes*\|no}`        |Enable or disable incremental precompilation of modules|
+|`--compiled-modules={yes*\|no\|existing}` |Enable or disable incremental precompilation of modules. The `existing` option allows use of existing compiled modules that were previously precompiled, but disallows creation of new precompile files.|
+|`--pkgimages={yes*\|no}`               |Enable or disable usage of native code caching in the form of pkgimages|
 |`-e`, `--eval <expr>`                  |Evaluate `<expr>`|
 |`-E`, `--print <expr>`                 |Evaluate `<expr>` and display the result|
 |`-L`, `--load <file>`                  |Load `<file>` immediately on all processors|
-|`-t`, `--threads {N\|auto`}            |Enable N threads; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future.  Currently, `auto` uses the number of CPUs assigned to this julia process based on the OS-specific affinity assignment interface, if supported (Linux and Windows). If this is not supported (macOS) or process affinity is not configured, it uses the number of CPU threads.|
-|`-p`, `--procs {N\|auto`}              |Integer value N launches N additional local worker processes; `auto` launches as many workers as the number of local CPU threads (logical cores)|
+|`-t`, `--threads {N\|auto}`            |Enable N threads; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future.  Currently, `auto` uses the number of CPUs assigned to this julia process based on the OS-specific affinity assignment interface, if supported (Linux and Windows). If this is not supported (macOS) or process affinity is not configured, it uses the number of CPU threads.|
+| `--gcthreads {N}`                     |Enable N GC threads; If unspecified is set to half of the compute worker threads.|
+|`-p`, `--procs {N\|auto}`              |Integer value N launches N additional local worker processes; `auto` launches as many workers as the number of local CPU threads (logical cores)|
 |`--machine-file <file>`                |Run processes on hosts listed in `<file>`|
 |`-i`                                   |Interactive mode; REPL runs and `isinteractive()` is true|
 |`-q`, `--quiet`                        |Quiet startup: no banner, suppress REPL warnings|
@@ -117,15 +186,17 @@ The following is a complete list of command-line switches available when launchi
 |`--warn-overwrite={yes\|no*}`          |Enable or disable method overwrite warnings|
 |`--warn-scope={yes*\|no}`              |Enable or disable warning for ambiguous top-level scope|
 |`-C`, `--cpu-target <target>`          |Limit usage of CPU features up to `<target>`; set to `help` to see the available options|
-|`-O`, `--optimize={0,1,2*,3}`          |Set the optimization level (level is 3 if `-O` is used without a level)|
+|`-O`, `--optimize={0,1,2*,3}`          |Set the optimization level (level is 3 if `-O` is used without a level) ($)|
 |`--min-optlevel={0*,1,2,3}`            |Set the lower bound on per-module optimization|
-|`-g {0,1*,2}`                          |Set the level of debug info generation (level is 2 if `-g` is used without a level)|
+|`-g`, `--debug-info={0,1*,2}`          |Set the level of debug info generation (level is 2 if `-g` is used without a level) ($)|
 |`--inline={yes\|no}`                   |Control whether inlining is permitted, including overriding `@inline` declarations|
-|`--check-bounds={yes\|no\|auto*}`      |Emit bounds checks always, never, or respect `@inbounds` declarations|
+|`--check-bounds={yes\|no\|auto*}`      |Emit bounds checks always, never, or respect `@inbounds` declarations ($)|
 |`--math-mode={ieee,fast}`              |Disallow or enable unsafe floating point optimizations (overrides `@fastmath` declaration)|
 |`--code-coverage[={none*\|user\|all}]` |Count executions of source lines (omitting setting is equivalent to `user`)|
+|`--code-coverage=@<path>`              |Count executions but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
 |`--code-coverage=tracefile.info`       |Append coverage information to the LCOV tracefile (filename supports format tokens).|
 |`--track-allocation[={none*\|user\|all}]` |Count bytes allocated by each source line (omitting setting is equivalent to "user")|
+|`--track-allocation=@<path>`           |Count bytes but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
 |`--bug-report=KIND`                    |Launch a bug report session. It can be used to start a REPL, run a script, or evaluate expressions. It first tries to use BugReporting.jl installed in current environment and falls back to the latest compatible BugReporting.jl if not. For more information, see `--bug-report=help`.|
 |`--compile={yes*\|no\|all\|min}`       |Enable or disable JIT compiler, or request exhaustive or minimal compilation|
 |`--output-o <name>`                    |Generate an object file (including system image data)|
diff --git a/doc/src/manual/complex-and-rational-numbers.md b/doc/src/manual/complex-and-rational-numbers.md
index 6fa0e2b71f822..9cab2ed1e4f24 100644
--- a/doc/src/manual/complex-and-rational-numbers.md
+++ b/doc/src/manual/complex-and-rational-numbers.md
@@ -48,7 +48,7 @@ julia> 3(2 - 5im)^2
 -63 - 60im
 
 julia> 3(2 - 5im)^-1.0
-0.20689655172413796 + 0.5172413793103449im
+0.20689655172413793 + 0.5172413793103449im
 ```
 
 The promotion mechanism ensures that combinations of operands of different types just work:
@@ -140,7 +140,7 @@ when applied to `-1` versus `-1 + 0im` even though `-1 == -1 + 0im`:
 ```jldoctest
 julia> sqrt(-1)
 ERROR: DomainError with -1.0:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
 
diff --git a/doc/src/manual/constructors.md b/doc/src/manual/constructors.md
index dad96e374742e..27e2c9396c437 100644
--- a/doc/src/manual/constructors.md
+++ b/doc/src/manual/constructors.md
@@ -244,8 +244,8 @@ ERROR: UndefRefError: access to undefined reference
 This avoids the need to continually check for `null` values. However, not all object fields are
 references. Julia considers some types to be "plain data", meaning all of their data is self-contained
 and does not reference other objects. The plain data types consist of primitive types (e.g. `Int`)
-and immutable structs of other plain data types. The initial contents of a plain data type is
-undefined:
+and immutable structs of other plain data types (see also: [`isbits`](@ref), [`isbitstype`](@ref)).
+The initial contents of a plain data type is undefined:
 
 ```julia-repl
 julia> struct HasPlain
@@ -491,6 +491,7 @@ operator, which provides a syntax for writing rationals (e.g. `1 ⊘ 2`). Julia'
 type uses the [`//`](@ref) operator for this purpose. Before these definitions, `⊘`
 is a completely undefined operator with only syntax and no meaning. Afterwards, it behaves just
 as described in [Rational Numbers](@ref) -- its entire behavior is defined in these few lines.
+Note that the infix use of `⊘` works because Julia has a set of symbols that are recognized to be infix operators.
 The first and most basic definition just makes `a ⊘ b` construct a `OurRational` by applying the
 `OurRational` constructor to `a` and `b` when they are integers. When one of the operands of `⊘`
 is already a rational number, we construct a new rational for the resulting ratio slightly differently;
diff --git a/doc/src/manual/control-flow.md b/doc/src/manual/control-flow.md
index 92c927f9aa2da..4ab611f0cafae 100644
--- a/doc/src/manual/control-flow.md
+++ b/doc/src/manual/control-flow.md
@@ -139,7 +139,7 @@ julia> test(1,2)
 x is less than y.
 
 julia> test(2,1)
-ERROR: UndefVarError: relation not defined
+ERROR: UndefVarError: `relation` not defined in local scope
 Stacktrace:
  [1] test(::Int64, ::Int64) at ./none:7
 ```
@@ -397,7 +397,7 @@ julia> while i <= 3
 3
 ```
 
-The `while` loop evaluates the condition expression (`i <= 5` in this case), and as long it remains
+The `while` loop evaluates the condition expression (`i <= 3` in this case), and as long it remains
 `true`, keeps also evaluating the body of the `while` loop. If the condition expression is `false`
 when the `while` loop is first reached, the body is never evaluated.
 
@@ -414,8 +414,33 @@ julia> for i = 1:3
 3
 ```
 
-Here the `1:3` is a range object, representing the sequence of numbers 1, 2, 3. The `for`
-loop iterates through these values, assigning each one in turn to the variable `i`. One rather
+Here the `1:3` is a [`range`](@ref) object, representing the sequence of numbers 1, 2, 3. The `for`
+loop iterates through these values, assigning each one in turn to the variable `i`.
+In general, the `for` construct can loop over any "iterable" object (or "container"), from a  range like `1:3` or `1:3:13` (a [`StepRange`](@ref) indicating every 3rd integer 1, 4, 7, …, 13) to more generic containers like arrays, including [iterators defined by user code](@ref man-interface-iteration)
+or external packages. For containers other than ranges, the alternative
+(but fully equivalent) keyword `in` or `∈` is typically used instead of `=`, since it makes
+the code read more clearly:
+
+```jldoctest
+julia> for i in [1,4,0]
+           println(i)
+       end
+1
+4
+0
+
+julia> for s ∈ ["foo","bar","baz"]
+           println(s)
+       end
+foo
+bar
+baz
+```
+
+Various types of iterable containers will be introduced and discussed in later sections of the
+manual (see, e.g., [Multi-dimensional Arrays](@ref man-multi-dim-arrays)).
+
+One rather
 important distinction between the previous `while` loop form and the `for` loop form is the scope
 during which the variable is visible. A `for` loop always introduces a new iteration variable in
 its body, regardless of whether a variable of the same name exists in the enclosing scope.
@@ -433,7 +458,7 @@ julia> for j = 1:3
 3
 
 julia> j
-ERROR: UndefVarError: j not defined
+ERROR: UndefVarError: `j` not defined in `Main`
 ```
 
 ```jldoctest
@@ -455,29 +480,6 @@ Use `for outer` to modify the latter behavior and reuse an existing local variab
 See [Scope of Variables](@ref scope-of-variables) for a detailed explanation of variable scope, [`outer`](@ref), and how it works in
 Julia.
 
-In general, the `for` loop construct can iterate over any container. In these cases, the alternative
-(but fully equivalent) keyword `in` or `∈` is typically used instead of `=`, since it makes
-the code read more clearly:
-
-```jldoctest
-julia> for i in [1,4,0]
-           println(i)
-       end
-1
-4
-0
-
-julia> for s ∈ ["foo","bar","baz"]
-           println(s)
-       end
-foo
-bar
-baz
-```
-
-Various types of iterable containers will be introduced and discussed in later sections of the
-manual (see, e.g., [Multi-dimensional Arrays](@ref man-multi-dim-arrays)).
-
 It is sometimes convenient to terminate the repetition of a `while` before the test condition
 is falsified or stop iterating in a `for` loop before the end of the iterable object is reached.
 This can be accomplished with the `break` keyword:
@@ -623,7 +625,7 @@ real value:
 ```jldoctest
 julia> sqrt(-1)
 ERROR: DomainError with -1.0:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
 ```
@@ -637,11 +639,11 @@ julia> struct MyCustomException <: Exception end
 ### The [`throw`](@ref) function
 
 Exceptions can be created explicitly with [`throw`](@ref). For example, a function defined only
-for nonnegative numbers could be written to [`throw`](@ref) a [`DomainError`](@ref) if the argument
+for non-negative numbers could be written to [`throw`](@ref) a [`DomainError`](@ref) if the argument
 is negative:
 
 ```jldoctest; filter = r"Stacktrace:(\n \[[0-9]+\].*)*"
-julia> f(x) = x>=0 ? exp(-x) : throw(DomainError(x, "argument must be nonnegative"))
+julia> f(x) = x>=0 ? exp(-x) : throw(DomainError(x, "argument must be non-negative"))
 f (generic function with 1 method)
 
 julia> f(1)
@@ -649,7 +651,7 @@ julia> f(1)
 
 julia> f(-1)
 ERROR: DomainError with -1:
-argument must be nonnegative
+argument must be non-negative
 Stacktrace:
  [1] f(::Int64) at ./none:1
 ```
@@ -669,7 +671,7 @@ Additionally, some exception types take one or more arguments that are used for
 
 ```jldoctest
 julia> throw(UndefVarError(:x))
-ERROR: UndefVarError: x not defined
+ERROR: UndefVarError: `x` not defined
 ```
 
 This mechanism can be implemented easily by custom exception types following the way [`UndefVarError`](@ref)
@@ -797,7 +799,7 @@ julia> sqrt_second(9)
 
 julia> sqrt_second(-9)
 ERROR: DomainError with -9.0:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
 ```
@@ -827,6 +829,45 @@ no error has occurred, but the ability to unwind the stack and pass a value to a
 is desirable. Julia provides the [`rethrow`](@ref), [`backtrace`](@ref), [`catch_backtrace`](@ref)
 and [`current_exceptions`](@ref) functions for more advanced error handling.
 
+### `else` Clauses
+
+!!! compat "Julia 1.8"
+    This functionality requires at least Julia 1.8.
+
+In some cases, one may not only want to appropriately handle the error case, but also want to run
+some code only if the `try` block succeeds. For this, an `else` clause can be specified after the
+`catch` block that is run whenever no error was thrown previously. The advantage over including
+this code in the `try` block instead is that any further errors don't get silently caught by the
+`catch` clause.
+
+```julia
+local x
+try
+    x = read("file", String)
+catch
+    # handle read errors
+else
+    # do something with x
+end
+```
+
+!!! note
+    The `try`, `catch`, `else`, and `finally` clauses each introduce their own scope blocks, so if
+    a variable is only defined in the `try` block, it can not be accessed by the `else` or `finally`
+    clause:
+    ```jldoctest
+    julia> try
+               foo = 1
+           catch
+           else
+               foo
+           end
+    ERROR: UndefVarError: `foo` not defined in `Main`
+    Suggestion: check for spelling errors or missing imports.
+    ```
+    Use the [`local` keyword](@ref local-scope) outside the `try` block to make the variable
+    accessible from anywhere within the outer scope.
+
 ### `finally` Clauses
 
 In code that performs state changes or uses resources like files, there is typically clean-up
diff --git a/doc/src/manual/conversion-and-promotion.md b/doc/src/manual/conversion-and-promotion.md
index 63ae37660cff4..9f785a560bfcc 100644
--- a/doc/src/manual/conversion-and-promotion.md
+++ b/doc/src/manual/conversion-and-promotion.md
@@ -165,6 +165,7 @@ constructor.
 Such a definition might look like this:
 
 ```julia
+import Base: convert
 convert(::Type{MyType}, x) = MyType(x)
 ```
 
@@ -181,7 +182,7 @@ For example, this definition states that it's valid to `convert` any `Number` ty
 any other by calling a 1-argument constructor:
 
 ```julia
-convert(::Type{T}, x::Number) where {T<:Number} = T(x)
+convert(::Type{T}, x::Number) where {T<:Number} = T(x)::T
 ```
 
 This means that new `Number` types only need to define constructors, since this
@@ -195,6 +196,8 @@ convert(::Type{T}, x::T) where {T<:Number} = x
 
 Similar definitions exist for `AbstractString`, [`AbstractArray`](@ref), and [`AbstractDict`](@ref).
 
+
+
 ## Promotion
 
 Promotion refers to converting values of mixed types to a single common type. Although it is not
@@ -233,11 +236,11 @@ julia> promote(1 + 2im, 3//4)
 ```
 
 Floating-point values are promoted to the largest of the floating-point argument types. Integer
-values are promoted to the larger of either the native machine word size or the largest integer
-argument type. Mixtures of integers and floating-point values are promoted to a floating-point
-type big enough to hold all the values. Integers mixed with rationals are promoted to rationals.
-Rationals mixed with floats are promoted to floats. Complex values mixed with real values are
-promoted to the appropriate kind of complex value.
+values are promoted to the largest of the integer argument types. If the types are the same size
+but differ in signedness, the unsigned type is chosen. Mixtures of integers and floating-point
+values are promoted to a floating-point type big enough to hold all the values. Integers mixed
+with rationals are promoted to rationals. Rationals mixed with floats are promoted to floats.
+Complex values mixed with real values are promoted to the appropriate kind of complex value.
 
 That is really all there is to using promotions. The rest is just a matter of clever application,
 the most typical "clever" application being the definition of catch-all methods for numeric operations
@@ -291,6 +294,7 @@ another type object, such that instances of the argument types will be promoted
 type. Thus, by defining the rule:
 
 ```julia
+import Base: promote_rule
 promote_rule(::Type{Float64}, ::Type{Float32}) = Float64
 ```
 
@@ -336,6 +340,7 @@ Finally, we finish off our ongoing case study of Julia's rational number type, w
 sophisticated use of the promotion mechanism with the following promotion rules:
 
 ```julia
+import Base: promote_rule
 promote_rule(::Type{Rational{T}}, ::Type{S}) where {T<:Integer,S<:Integer} = Rational{promote_type(T,S)}
 promote_rule(::Type{Rational{T}}, ::Type{Rational{S}}) where {T<:Integer,S<:Integer} = Rational{promote_type(T,S)}
 promote_rule(::Type{Rational{T}}, ::Type{S}) where {T<:Integer,S<:AbstractFloat} = promote_type(T,S)
diff --git a/doc/src/manual/distributed-computing.md b/doc/src/manual/distributed-computing.md
index 2c1af8ecb9c26..dd372ccbc53d4 100644
--- a/doc/src/manual/distributed-computing.md
+++ b/doc/src/manual/distributed-computing.md
@@ -33,7 +33,7 @@ You can wait for a remote call to finish by calling [`wait`](@ref) on the return
 and you can obtain the full value of the result using [`fetch`](@ref).
 
 On the other hand, [`RemoteChannel`](@ref) s are rewritable. For example, multiple processes can
-co-ordinate their processing by referencing the same remote `Channel`.
+coordinate their processing by referencing the same remote `Channel`.
 
 Each process has an associated identifier. The process providing the interactive Julia prompt
 always has an `id` equal to 1. The processes used by default for parallel operations are referred
@@ -158,7 +158,7 @@ julia> rand2(2,2)
  1.15119   0.918912
 
 julia> fetch(@spawnat :any rand2(2,2))
-ERROR: RemoteException(2, CapturedException(UndefVarError(Symbol("#rand2"))
+ERROR: RemoteException(2, CapturedException(UndefVarError(Symbol("#rand2"))))
 Stacktrace:
 [...]
 ```
@@ -209,7 +209,7 @@ MyType(7)
 
 julia> fetch(@spawnat 2 MyType(7))
 ERROR: On worker 2:
-UndefVarError: MyType not defined
+UndefVarError: `MyType` not defined in `Main`
 ⋮
 
 julia> fetch(@spawnat 2 DummyModule.MyType(7))
@@ -251,7 +251,7 @@ The base Julia installation has in-built support for two types of clusters:
     should use to connect to this worker.
 
 !!! note
-    While Julia generally strives for backward compatability, distribution of code to worker processes relies on
+    While Julia generally strives for backward compatibility, distribution of code to worker processes relies on
     [`Serialization.serialize`](@ref). As pointed out in the corresponding documentation, this can not be guaranteed to work across
     different Julia versions, so it is advised that all workers on all machines use the same version.
 
@@ -539,9 +539,72 @@ Methods [`put!`](@ref), [`take!`](@ref), [`fetch`](@ref), [`isready`](@ref) and
 on a [`RemoteChannel`](@ref) are proxied onto the backing store on the remote process.
 
 [`RemoteChannel`](@ref) can thus be used to refer to user implemented `AbstractChannel` objects.
-A simple example of this is provided in `dictchannel.jl` in the
-[Examples repository](https://github.com/JuliaAttic/Examples), which uses a dictionary as its
-remote store.
+A simple example of this is the following `DictChannel` which uses a dictionary as its
+remote store:
+
+```jldoctest
+julia> struct DictChannel{T} <: AbstractChannel{T}
+           d::Dict
+           cond_take::Threads.Condition    # waiting for data to become available
+           DictChannel{T}() where {T} = new(Dict(), Threads.Condition())
+           DictChannel() = DictChannel{Any}()
+       end
+
+julia> begin
+       function Base.put!(D::DictChannel, k, v)
+           @lock D.cond_take begin
+               D.d[k] = v
+               notify(D.cond_take)
+           end
+           return D
+       end
+       function Base.take!(D::DictChannel, k)
+           @lock D.cond_take begin
+               v = fetch(D, k)
+               delete!(D.d, k)
+               return v
+           end
+       end
+       Base.isready(D::DictChannel) = @lock D.cond_take !isempty(D.d)
+       Base.isready(D::DictChannel, k) = @lock D.cond_take haskey(D.d, k)
+       function Base.fetch(D::DictChannel, k)
+           @lock D.cond_take begin
+               wait(D, k)
+               return D.d[k]
+           end
+       end
+       function Base.wait(D::DictChannel, k)
+           @lock D.cond_take begin
+               while !isready(D, k)
+                   wait(D.cond_take)
+               end
+           end
+       end
+       end;
+
+julia> d = DictChannel();
+
+julia> isready(d)
+false
+
+julia> put!(d, :k, :v);
+
+julia> isready(d, :k)
+true
+
+julia> fetch(d, :k)
+:v
+
+julia> wait(d, :k)
+
+julia> take!(d, :k)
+:v
+
+julia> isready(d, :k)
+false
+```
+
+
 
 
 ## Channels and RemoteChannels
diff --git a/doc/src/manual/documentation.md b/doc/src/manual/documentation.md
index 99d46e364b3eb..169b27ead15f8 100644
--- a/doc/src/manual/documentation.md
+++ b/doc/src/manual/documentation.md
@@ -17,7 +17,10 @@ environments provide a way to access documentation directly:
   You can also use the Julia panel in the sidebar to search for documentation.
 - In [Pluto](https://github.com/fonsp/Pluto.jl), open the "Live Docs" panel on the bottom right.
 - In [Juno](https://junolab.org) using `Ctrl-J, Ctrl-D` will show the documentation for the object
-under the cursor.
+  under the cursor.
+
+
+`Docs.hasdoc(module, name)::Bool` tells whether a name has a docstring.
 
 ## Writing Documentation
 
@@ -310,18 +313,18 @@ end
 @doc "`subtract(a,b)` subtracts `b` from `a`" subtract
 ```
 
-Documentation written in non-toplevel blocks, such as `begin`, `if`, `for`, and `let`, is
-added to the documentation system as blocks are evaluated. For example:
+Documentation in non-toplevel blocks, such as `begin`, `if`, `for`, and `let`, should be
+added to the documentation system via `@doc` as well. For example:
 
 ```julia
 if condition()
-    "..."
+    @doc "..."
     f(x) = x
 end
 ```
 
 will add documentation to `f(x)` when `condition()` is `true`. Note that even if `f(x)` goes
-out of scope at the end of the block, its documentation will remain.
+out of scope at the end of a block, its documentation will remain.
 
 It is possible to make use of metaprogramming to assist in the creation of documentation.
 When using string-interpolation within the docstring you will need to use an extra `$` as
diff --git a/doc/src/manual/embedding.md b/doc/src/manual/embedding.md
index 26904d9ccffcd..f59dee225e1a7 100644
--- a/doc/src/manual/embedding.md
+++ b/doc/src/manual/embedding.md
@@ -6,7 +6,8 @@ calling Julia functions from C code. This can be used to integrate Julia code in
 C/C++ project, without the need to rewrite everything in C/C++. Julia has a C API to make
 this possible. As almost all programming languages have some way to call C functions, the
 Julia C API can also be used to build further language bridges (e.g. calling Julia from
-Python or C#).
+Python, Rust or C#). Even though Rust and C++ can use the C embedding API directly, both
+have packages helping with it, for C++ [Jluna](https://github.com/Clemapfel/jluna) is useful.
 
 ## High-Level Embedding
 
@@ -406,8 +407,10 @@ As an alternative for very simple cases, it is possible to just create a global
 per pointer using
 
 ```c
-jl_binding_t *bp = jl_get_binding_wr(jl_main_module, jl_symbol("var"), 1);
-jl_checked_assignment(bp, val);
+jl_module_t *mod = jl_main_module;
+jl_sym_t *var = jl_symbol("var");
+jl_binding_t *bp = jl_get_binding_wr(mod, var);
+jl_checked_assignment(bp, mod, var, val);
 ```
 
 ### Updating fields of GC-managed objects
@@ -429,14 +432,14 @@ object has just been allocated and no garbage collection has run since then. Not
 `jl_...` functions can sometimes invoke garbage collection.
 
 The write barrier is also necessary for arrays of pointers when updating their data directly.
-For example:
+Calling `jl_array_ptr_set` is usually much preferred. But direct updates can be done. For example:
 
 ```c
 jl_array_t *some_array = ...; // e.g. a Vector{Any}
-void **data = (void**)jl_array_data(some_array);
+void **data = jl_array_data(some_array, void*);
 jl_value_t *some_value = ...;
 data[0] = some_value;
-jl_gc_wb(some_array, some_value);
+jl_gc_wb(jl_array_owner(some_array), some_value);
 ```
 
 ### Controlling the Garbage Collector
@@ -484,13 +487,13 @@ referenced.
 In order to access the data of `x`, we can use `jl_array_data`:
 
 ```c
-double *xData = (double*)jl_array_data(x);
+double *xData = jl_array_data(x, double);
 ```
 
 Now we can fill the array:
 
 ```c
-for(size_t i=0; i<jl_array_len(x); i++)
+for (size_t i = 0; i < jl_array_nrows(x); i++)
     xData[i] = i;
 ```
 
@@ -524,10 +527,11 @@ that creates a 2D array and accesses its properties:
 ```c
 // Create 2D array of float64 type
 jl_value_t *array_type = jl_apply_array_type((jl_value_t*)jl_float64_type, 2);
-jl_array_t *x  = jl_alloc_array_2d(array_type, 10, 5);
+int dims[] = {10,5};
+jl_array_t *x  = jl_alloc_array_nd(array_type, dims, 2);
 
 // Get array pointer
-double *p = (double*)jl_array_data(x);
+double *p = jl_array_data(x, double);
 // Get number of dimensions
 int ndims = jl_array_ndims(x);
 // Get the size of the i-th dim
@@ -604,7 +608,7 @@ The second condition above implies that you can not safely call `jl_...()` funct
 void *func(void*)
 {
     // Wrong, jl_eval_string() called from thread that was not started by Julia
-    jl_eval_string("println(Threads.nthreads())");
+    jl_eval_string("println(Threads.threadid())");
     return NULL;
 }
 
@@ -630,7 +634,7 @@ void *func(void*)
     // Okay, all jl_...() calls from the same thread,
     // even though it is not the main application thread
     jl_init();
-    jl_eval_string("println(Threads.nthreads())");
+    jl_eval_string("println(Threads.threadid())");
     jl_atexit_hook(0);
     return NULL;
 }
@@ -670,7 +674,7 @@ int main()
     jl_eval_string("func(i) = ccall(:c_func, Float64, (Int32,), i)");
 
     // Call func() multiple times, using multiple threads to do so
-    jl_eval_string("println(Threads.nthreads())");
+    jl_eval_string("println(Threads.threadpoolsize())");
     jl_eval_string("use(i) = println(\"[J $(Threads.threadid())] i = $(i) -> $(func(i))\")");
     jl_eval_string("Threads.@threads for i in 1:5 use(i) end");
 
diff --git a/doc/src/manual/environment-variables.md b/doc/src/manual/environment-variables.md
index bc4a742365d69..4d591faedc8f1 100644
--- a/doc/src/manual/environment-variables.md
+++ b/doc/src/manual/environment-variables.md
@@ -2,7 +2,7 @@
 
 Julia can be configured with a number of environment variables, set either in
 the usual way for each operating system, or in a portable way from within Julia.
-Supposing that you want to set the environment variable `JULIA_EDITOR` to `vim`,
+Supposing that you want to set the environment variable [`JULIA_EDITOR`](@ref JULIA_EDITOR) to `vim`,
 you can type `ENV["JULIA_EDITOR"] = "vim"` (for instance, in the REPL) to make
 this change on a case by case basis, or add the same to the user configuration
 file `~/.julia/config/startup.jl` in the user's home directory to have a
@@ -16,15 +16,26 @@ including those which include `JULIA` in their names.
 
 !!! note
 
-    Some variables, such as `JULIA_NUM_THREADS` and `JULIA_PROJECT`, need to be set before Julia
-    starts, therefore adding these to `~/.julia/config/startup.jl` is too late in the startup process.
+    It is recommended to avoid changing environment variables during runtime,
+    such as within a `~/.julia/config/startup.jl`.
+
+    One reason is that some julia language variables, such as [`JULIA_NUM_THREADS`](@ref JULIA_NUM_THREADS)
+    and [`JULIA_PROJECT`](@ref JULIA_PROJECT), need to be set before Julia starts.
+
+    Similarly, `__init__()` functions of user modules in the sysimage (via PackageCompiler) are
+    run before `startup.jl`, so setting environment variables in a `startup.jl` may be too late for
+    user code.
+
+    Further, changing environment variables during runtime can introduce data races into
+    otherwise benign code.
+
     In Bash, environment variables can either be set manually by running, e.g.,
     `export JULIA_NUM_THREADS=4` before starting Julia, or by adding the same command to
     `~/.bashrc` or `~/.bash_profile` to set the variable each time Bash is started.
 
 ## File locations
 
-### `JULIA_BINDIR`
+### [`JULIA_BINDIR`](@id JULIA_BINDIR)
 
 The absolute path of the directory containing the Julia executable, which sets
 the global variable [`Sys.BINDIR`](@ref). If `$JULIA_BINDIR` is not set, then
@@ -61,7 +72,7 @@ by default (via `Base.load_julia_startup()`).
 
 For example, a Linux installation with a Julia executable located at
 `/bin/julia`, a `DATAROOTDIR` of `../share`, and a `SYSCONFDIR` of `../etc` will
-have `JULIA_BINDIR` set to `/bin`, a source-file search path of
+have [`JULIA_BINDIR`](@ref JULIA_BINDIR) set to `/bin`, a source-file search path of
 
 ```
 /share/julia/base
@@ -73,38 +84,39 @@ and a global configuration search path of
 /etc/julia/startup.jl
 ```
 
-### `JULIA_PROJECT`
+### [`JULIA_PROJECT`](@id JULIA_PROJECT)
 
 A directory path that indicates which project should be the initial active project.
 Setting this environment variable has the same effect as specifying the `--project`
 start-up option, but `--project` has higher precedence. If the variable is set to `@.`
+(note the trailing dot)
 then Julia tries to find a project directory that contains `Project.toml` or
 `JuliaProject.toml` file from the current directory and its parents. See also
 the chapter on [Code Loading](@ref code-loading).
 
 !!! note
 
-    `JULIA_PROJECT` must be defined before starting julia; defining it in `startup.jl`
+    [`JULIA_PROJECT`](@ref JULIA_PROJECT) must be defined before starting julia; defining it in `startup.jl`
     is too late in the startup process.
 
-### `JULIA_LOAD_PATH`
+### [`JULIA_LOAD_PATH`](@id JULIA_LOAD_PATH)
 
-The `JULIA_LOAD_PATH` environment variable is used to populate the global Julia
+The [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) environment variable is used to populate the global Julia
 [`LOAD_PATH`](@ref) variable, which determines which packages can be loaded via
 `import` and `using` (see [Code Loading](@ref code-loading)).
 
-Unlike the shell `PATH` variable, empty entries in `JULIA_LOAD_PATH` are expanded to
+Unlike the shell `PATH` variable, empty entries in [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) are expanded to
 the default value of `LOAD_PATH`, `["@", "@v#.#", "@stdlib"]` when populating
 `LOAD_PATH`. This allows easy appending, prepending, etc. of the load path value in
-shell scripts regardless of whether `JULIA_LOAD_PATH` is already set or not. For
+shell scripts regardless of whether [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) is already set or not. For
 example, to prepend the directory `/foo/bar` to `LOAD_PATH` just do
 ```sh
 export JULIA_LOAD_PATH="/foo/bar:$JULIA_LOAD_PATH"
 ```
-If the `JULIA_LOAD_PATH` environment variable is already set, its old value will be
-prepended with `/foo/bar`. On the other hand, if `JULIA_LOAD_PATH` is not set, then
+If the [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) environment variable is already set, its old value will be
+prepended with `/foo/bar`. On the other hand, if [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) is not set, then
 it will be set to `/foo/bar:` which will expand to a `LOAD_PATH` value of
-`["/foo/bar", "@", "@v#.#", "@stdlib"]`. If `JULIA_LOAD_PATH` is set to the empty
+`["/foo/bar", "@", "@v#.#", "@stdlib"]`. If [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH) is set to the empty
 string, it expands to an empty `LOAD_PATH` array. In other words, the empty string
 is interpreted as a zero-element array, not a one-element array of the empty string.
 This behavior was chosen so that it would be possible to set an empty load path via
@@ -116,26 +128,26 @@ environment variable or if it must have a value, set it to the string `:`.
     On Windows, path elements are separated by the `;` character, as is the case with
     most path lists on Windows. Replace `:` with `;` in the above paragraph.
 
-### `JULIA_DEPOT_PATH`
+### [`JULIA_DEPOT_PATH`](@id JULIA_DEPOT_PATH)
 
-The `JULIA_DEPOT_PATH` environment variable is used to populate the global Julia
+The [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) environment variable is used to populate the global Julia
 [`DEPOT_PATH`](@ref) variable, which controls where the package manager, as well
 as Julia's code loading mechanisms, look for package registries, installed
 packages, named environments, repo clones, cached compiled package images,
 configuration files, and the default location of the REPL's history file.
 
-Unlike the shell `PATH` variable but similar to `JULIA_LOAD_PATH`, empty entries in
-`JULIA_DEPOT_PATH` are expanded to the default value of `DEPOT_PATH`. This allows
+Unlike the shell `PATH` variable but similar to [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH), empty entries in
+[`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) are expanded to the default value of `DEPOT_PATH`. This allows
 easy appending, prepending, etc. of the depot path value in shell scripts regardless
-of whether `JULIA_DEPOT_PATH` is already set or not. For example, to prepend the
+of whether [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) is already set or not. For example, to prepend the
 directory `/foo/bar` to `DEPOT_PATH` just do
 ```sh
 export JULIA_DEPOT_PATH="/foo/bar:$JULIA_DEPOT_PATH"
 ```
-If the `JULIA_DEPOT_PATH` environment variable is already set, its old value will be
-prepended with `/foo/bar`. On the other hand, if `JULIA_DEPOT_PATH` is not set, then
+If the [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) environment variable is already set, its old value will be
+prepended with `/foo/bar`. On the other hand, if [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) is not set, then
 it will be set to `/foo/bar:` which will have the effect of prepending `/foo/bar` to
-the default depot path. If `JULIA_DEPOT_PATH` is set to the empty string, it expands
+the default depot path. If [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) is set to the empty string, it expands
 to an empty `DEPOT_PATH` array. In other words, the empty string is interpreted as a
 zero-element array, not a one-element array of the empty string. This behavior was
 chosen so that it would be possible to set an empty depot path via the environment
@@ -148,12 +160,12 @@ or if it must have a value, set it to the string `:`.
     most path lists on Windows. Replace `:` with `;` in the above paragraph.
 
 !!! note
-    `JULIA_DEPOT_PATH` must be defined before starting julia; defining it in
+    [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) must be defined before starting julia; defining it in
     `startup.jl` is too late in the startup process; at that point you can instead
     directly modify the `DEPOT_PATH` array, which is populated from the environment
     variable.
 
-### `JULIA_HISTORY`
+### [`JULIA_HISTORY`](@id JULIA_HISTORY)
 
 The absolute path `REPL.find_hist_file()` of the REPL's history file. If
 `$JULIA_HISTORY` is not set, then `REPL.find_hist_file()` defaults to
@@ -162,50 +174,54 @@ The absolute path `REPL.find_hist_file()` of the REPL's history file. If
 $(DEPOT_PATH[1])/logs/repl_history.jl
 ```
 
-### `JULIA_MAX_NUM_PRECOMPILE_FILES`
+### [`JULIA_MAX_NUM_PRECOMPILE_FILES`](@id JULIA_MAX_NUM_PRECOMPILE_FILES)
 
 Sets the maximum number of different instances of a single package that are to be stored in the precompile cache (default = 10).
 
+### [`JULIA_VERBOSE_LINKING`](@id JULIA_VERBOSE_LINKING)
+
+If set to true, linker commands will be displayed during precompilation.
+
 ## Pkg.jl
 
-### `JULIA_CI`
+### [`JULIA_CI`](@id JULIA_CI)
 
 If set to `true`, this indicates to the package server that any package operations are part of a continuous integration (CI) system for the purposes of gathering package usage statistics.
 
-### `JULIA_NUM_PRECOMPILE_TASKS`
+### [`JULIA_NUM_PRECOMPILE_TASKS`](@id JULIA_NUM_PRECOMPILE_TASKS)
 
 The number of parallel tasks to use when precompiling packages. See [`Pkg.precompile`](https://pkgdocs.julialang.org/v1/api/#Pkg.precompile).
 
-### `JULIA_PKG_DEVDIR`
+### [`JULIA_PKG_DEVDIR`](@id JULIA_PKG_DEVDIR)
 
 The default directory used by [`Pkg.develop`](https://pkgdocs.julialang.org/v1/api/#Pkg.develop) for downloading packages.
 
-### `JULIA_PKG_IGNORE_HASHES`
+### [`JULIA_PKG_IGNORE_HASHES`](@id JULIA_PKG_IGNORE_HASHES)
 
 If set to `1`, this will ignore incorrect hashes in artifacts. This should be used carefully, as it disables verification of downloads, but can resolve issues when moving files across different types of file systems. See [Pkg.jl issue #2317](https://github.com/JuliaLang/Pkg.jl/issues/2317) for more details.
 
 !!! compat "Julia 1.6"
     This is only supported in Julia 1.6 and above.
 
-### `JULIA_PKG_OFFLINE`
+### [`JULIA_PKG_OFFLINE`](@id JULIA_PKG_OFFLINE)
 
 If set to `true`, this will enable offline mode: see [`Pkg.offline`](https://pkgdocs.julialang.org/v1/api/#Pkg.offline).
 
 !!! compat "Julia 1.5"
     Pkg's offline mode requires Julia 1.5 or later.
 
-### `JULIA_PKG_PRECOMPILE_AUTO`
+### [`JULIA_PKG_PRECOMPILE_AUTO`](@id JULIA_PKG_PRECOMPILE_AUTO)
 
 If set to `0`, this will disable automatic precompilation by package actions which change the manifest. See [`Pkg.precompile`](https://pkgdocs.julialang.org/v1/api/#Pkg.precompile).
 
-### `JULIA_PKG_SERVER`
+### [`JULIA_PKG_SERVER`](@id JULIA_PKG_SERVER)
 
 Specifies the URL of the package registry to use. By default, `Pkg` uses
 `https://pkg.julialang.org` to fetch Julia packages. In addition, you can disable the use of the PkgServer
 protocol, and instead access the packages directly from their hosts (GitHub, GitLab, etc.)
 by setting: ``` export JULIA_PKG_SERVER="" ```
 
-### `JULIA_PKG_SERVER_REGISTRY_PREFERENCE`
+### [`JULIA_PKG_SERVER_REGISTRY_PREFERENCE`](@id JULIA_PKG_SERVER_REGISTRY_PREFERENCE)
 
 Specifies the preferred registry flavor. Currently supported values are `conservative`
 (the default), which will only publish resources that have been processed by the storage
@@ -217,37 +233,49 @@ downloading from arbitrary servers should not use the `eager` flavor.
 !!! compat "Julia 1.7"
     This only affects Julia 1.7 and above.
 
-### `JULIA_PKG_UNPACK_REGISTRY`
+### [`JULIA_PKG_UNPACK_REGISTRY`](@id JULIA_PKG_UNPACK_REGISTRY)
 
 If set to `true`, this will unpack the registry instead of storing it as a compressed tarball.
 
 !!! compat "Julia 1.7"
     This only affects Julia 1.7 and above. Earlier versions will always unpack the registry.
 
-### `JULIA_PKG_USE_CLI_GIT`
+### [`JULIA_PKG_USE_CLI_GIT`](@id JULIA_PKG_USE_CLI_GIT)
 
 If set to `true`, Pkg operations which use the git protocol will use an external `git` executable instead of the default libgit2 library.
 
 !!! compat "Julia 1.7"
     Use of the `git` executable is only supported on Julia 1.7 and above.
 
-### `JULIA_PKGRESOLVE_ACCURACY`
+### [`JULIA_PKGRESOLVE_ACCURACY`](@id JULIA_PKGRESOLVE_ACCURACY)
 
 The accuracy of the package resolver. This should be a positive integer, the default is `1`.
 
+### [`JULIA_PKG_PRESERVE_TIERED_INSTALLED`](@id JULIA_PKG_PRESERVE_TIERED_INSTALLED)
+
+Change the default package installation strategy to `Pkg.PRESERVE_TIERED_INSTALLED`
+to let the package manager try to install versions of packages while keeping as many
+versions of packages already installed as possible.
+
+!!! compat "Julia 1.9"
+    This only affects Julia 1.9 and above.
+
 ## Network transport
 
-### `JULIA_NO_VERIFY_HOSTS` / `JULIA_SSL_NO_VERIFY_HOSTS` / `JULIA_SSH_NO_VERIFY_HOSTS` / `JULIA_ALWAYS_VERIFY_HOSTS`
+### [`JULIA_NO_VERIFY_HOSTS`](@id JULIA_NO_VERIFY_HOSTS)
+### [`JULIA_SSL_NO_VERIFY_HOSTS`](@id JULIA_SSL_NO_VERIFY_HOSTS)
+### [`JULIA_SSH_NO_VERIFY_HOSTS`](@id JULIA_SSH_NO_VERIFY_HOSTS)
+### [`JULIA_ALWAYS_VERIFY_HOSTS`](@id JULIA_ALWAYS_VERIFY_HOSTS)
 
 Specify hosts whose identity should or should not be verified for specific transport layers. See [`NetworkOptions.verify_host`](https://github.com/JuliaLang/NetworkOptions.jl#verify_host)
 
-### `JULIA_SSL_CA_ROOTS_PATH`
+### [`JULIA_SSL_CA_ROOTS_PATH`](@id JULIA_SSL_CA_ROOTS_PATH)
 
 Specify the file or directory containing the certificate authority roots. See [`NetworkOptions.ca_roots`](https://github.com/JuliaLang/NetworkOptions.jl#ca_roots)
 
 ## External applications
 
-### `JULIA_SHELL`
+### [`JULIA_SHELL`](@id JULIA_SHELL)
 
 The absolute path of the shell with which Julia should execute external commands
 (via `Base.repl_cmd()`). Defaults to the environment variable `$SHELL`, and
@@ -258,7 +286,7 @@ falls back to `/bin/sh` if `$SHELL` is unset.
     On Windows, this environment variable is ignored, and external commands are
     executed directly.
 
-### `JULIA_EDITOR`
+### [`JULIA_EDITOR`](@id JULIA_EDITOR)
 
 The editor returned by `InteractiveUtils.editor()` and used in, e.g., [`InteractiveUtils.edit`](@ref),
 referring to the command of the preferred editor, for instance `vim`.
@@ -272,12 +300,12 @@ To use Visual Studio Code on Windows, set `$JULIA_EDITOR` to `code.cmd`.
 
 ## Parallelization
 
-### `JULIA_CPU_THREADS`
+### [`JULIA_CPU_THREADS`](@id JULIA_CPU_THREADS)
 
 Overrides the global variable [`Base.Sys.CPU_THREADS`](@ref), the number of
 logical CPU cores available.
 
-### `JULIA_WORKER_TIMEOUT`
+### [`JULIA_WORKER_TIMEOUT`](@id JULIA_WORKER_TIMEOUT)
 
 A [`Float64`](@ref) that sets the value of `Distributed.worker_timeout()` (default: `60.0`).
 This function gives the number of seconds a worker process will wait for
@@ -304,14 +332,35 @@ to the number of CPU threads.
 !!! compat "Julia 1.7"
     The `auto` value for `$JULIA_NUM_THREADS` requires Julia 1.7 or above.
 
-### `JULIA_THREAD_SLEEP_THRESHOLD`
+### [`JULIA_THREAD_SLEEP_THRESHOLD`](@id JULIA_THREAD_SLEEP_THRESHOLD)
 
 If set to a string that starts with the case-insensitive substring `"infinite"`,
 then spinning threads never sleep. Otherwise, `$JULIA_THREAD_SLEEP_THRESHOLD` is
 interpreted as an unsigned 64-bit integer (`uint64_t`) and gives, in
 nanoseconds, the amount of time after which spinning threads should sleep.
 
-### `JULIA_EXCLUSIVE`
+### [`JULIA_NUM_GC_THREADS`](@id JULIA_NUM_GC_THREADS)
+
+Sets the number of threads used by Garbage Collection. If unspecified is set to
+half of the number of worker threads.
+
+!!! compat "Julia 1.10"
+    The environment variable was added in 1.10
+
+### [`JULIA_IMAGE_THREADS`](@id JULIA_IMAGE_THREADS)
+
+An unsigned 32-bit integer that sets the number of threads used by image
+compilation in this Julia process. The value of this variable may be
+ignored if the module is a small module. If left unspecified, the smaller
+of the value of [`JULIA_CPU_THREADS`](@ref JULIA_CPU_THREADS) or half the
+number of logical CPU cores is used in its place.
+
+### [`JULIA_IMAGE_TIMINGS`](@id JULIA_IMAGE_TIMINGS)
+
+A boolean value that determines if detailed timing information is printed during
+during image compilation. Defaults to 0.
+
+### [`JULIA_EXCLUSIVE`](@id JULIA_EXCLUSIVE)
 
 If set to anything besides `0`, then Julia's thread policy is consistent with
 running on a dedicated machine: the master thread is on proc 0, and threads are
@@ -325,38 +374,98 @@ sequences](https://en.wikipedia.org/wiki/ANSI_escape_code). Julia provides
 a high-level interface with much of the same functionality; see the section on
 [The Julia REPL](@ref).
 
-### `JULIA_ERROR_COLOR`
+### [`JULIA_ERROR_COLOR`](@id JULIA_ERROR_COLOR)
 
 The formatting `Base.error_color()` (default: light red, `"\033[91m"`) that
 errors should have at the terminal.
 
-### `JULIA_WARN_COLOR`
+### [`JULIA_WARN_COLOR`](@id JULIA_WARN_COLOR)
 
 The formatting `Base.warn_color()` (default: yellow, `"\033[93m"`) that warnings
 should have at the terminal.
 
-### `JULIA_INFO_COLOR`
+### [`JULIA_INFO_COLOR`](@id JULIA_INFO_COLOR)
 
 The formatting `Base.info_color()` (default: cyan, `"\033[36m"`) that info
 should have at the terminal.
 
-### `JULIA_INPUT_COLOR`
+### [`JULIA_INPUT_COLOR`](@id JULIA_INPUT_COLOR)
 
 The formatting `Base.input_color()` (default: normal, `"\033[0m"`) that input
 should have at the terminal.
 
-### `JULIA_ANSWER_COLOR`
+### [`JULIA_ANSWER_COLOR`](@id JULIA_ANSWER_COLOR)
 
 The formatting `Base.answer_color()` (default: normal, `"\033[0m"`) that output
 should have at the terminal.
 
+## System and Package Image Building
+
+### [`JULIA_CPU_TARGET`](@id JULIA_CPU_TARGET)
+
+Modify the target machine architecture for (pre)compiling
+[system](@ref sysimg-multi-versioning) and [package images](@ref pkgimgs-multi-versioning).
+`JULIA_CPU_TARGET` only affects machine code image generation being output to a disk cache.
+Unlike the `--cpu-target`, or `-C`, [command line option](@ref cli), it does not influence
+just-in-time (JIT) code generation within a Julia session where machine code is only
+stored in memory.
+
+Valid values for [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) can be obtained by executing `julia -C help`.
+
+Setting [`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) is important for heterogeneous compute systems where processors of
+distinct types or features may be present. This is commonly encountered in high performance
+computing (HPC) clusters since the component nodes may be using distinct processors.
+
+The CPU target string is a list of strings separated by `;` each string starts with a CPU
+or architecture name and followed by an optional list of features separated by `,`.
+A `generic` or empty CPU name means the basic required feature set of the target ISA
+which is at least the architecture the C/C++ runtime is compiled with. Each string
+is interpreted by LLVM.
+
+A few special features are supported:
+1. `clone_all`
+
+     This forces the target to have all functions in sysimg cloned.
+     When used in negative form (i.e. `-clone_all`), this disables full clone that's
+     enabled by default for certain targets.
+
+2. `base([0-9]*)`
+
+     This specifies the (0-based) base target index. The base target is the target
+     that the current target is based on, i.e. the functions that are not being cloned
+     will use the version in the base target. This option causes the base target to be
+     fully cloned (as if `clone_all` is specified for it) if it is not the default target (0).
+     The index can only be smaller than the current index.
+
+3. `opt_size`
+
+     Optimize for size with minimum performance impact. Clang/GCC's `-Os`.
+
+4. `min_size`
+
+     Optimize only for size. Clang's `-Oz`.
+
+
 ## Debugging and profiling
 
-### `JULIA_DEBUG`
+### [`JULIA_DEBUG`](@id JULIA_DEBUG)
 
 Enable debug logging for a file or module, see [`Logging`](@ref man-logging) for more information.
 
-### `JULIA_GC_ALLOC_POOL`, `JULIA_GC_ALLOC_OTHER`, `JULIA_GC_ALLOC_PRINT`
+### [`JULIA_PROFILE_PEEK_HEAP_SNAPSHOT`](@id JULIA_PROFILE_PEEK_HEAP_SNAPSHOT)
+
+Enable collecting of a heap snapshot during execution via the profiling peek mechanism.
+See [Triggered During Execution](@ref).
+
+### [`JULIA_TIMING_SUBSYSTEMS`](@id JULIA_TIMING_SUBSYSTEMS)
+
+Allows you to enable or disable zones for a specific Julia run.
+For instance, setting the variable to `+GC,-INFERENCE` will enable the `GC` zones and disable
+the `INFERENCE` zones. See [Dynamically Enabling and Disabling Zones](@ref).
+
+### [`JULIA_GC_ALLOC_POOL`](@id JULIA_GC_ALLOC_POOL)
+### [`JULIA_GC_ALLOC_OTHER`](@id JULIA_GC_ALLOC_OTHER)
+### [`JULIA_GC_ALLOC_PRINT`](@id JULIA_GC_ALLOC_PRINT)
 
 If set, these environment variables take strings that optionally start with the
 character `'r'`, followed by a string interpolation of a colon-separated list of
@@ -383,7 +492,7 @@ the interval between garbage collection events is randomized.
     garbage-collection debugging (that is, if `WITH_GC_DEBUG_ENV` is set to `1`
     in the build configuration).
 
-### `JULIA_GC_NO_GENERATIONAL`
+### [`JULIA_GC_NO_GENERATIONAL`](@id JULIA_GC_NO_GENERATIONAL)
 
 If set to anything besides `0`, then the Julia garbage collector never performs
 "quick sweeps" of memory.
@@ -394,7 +503,7 @@ If set to anything besides `0`, then the Julia garbage collector never performs
     garbage-collection debugging (that is, if `WITH_GC_DEBUG_ENV` is set to `1`
     in the build configuration).
 
-### `JULIA_GC_WAIT_FOR_DEBUGGER`
+### [`JULIA_GC_WAIT_FOR_DEBUGGER`](@id JULIA_GC_WAIT_FOR_DEBUGGER)
 
 If set to anything besides `0`, then the Julia garbage collector will wait for
 a debugger to attach instead of aborting whenever there's a critical error.
@@ -405,7 +514,7 @@ a debugger to attach instead of aborting whenever there's a critical error.
     garbage-collection debugging (that is, if `WITH_GC_DEBUG_ENV` is set to `1`
     in the build configuration).
 
-### `ENABLE_JITPROFILING`
+### [`ENABLE_JITPROFILING`](@id ENABLE_JITPROFILING)
 
 If set to anything besides `0`, then the compiler will create and register an
 event listener for just-in-time (JIT) profiling.
@@ -421,12 +530,16 @@ event listener for just-in-time (JIT) profiling.
     * [Perf](https://perf.wiki.kernel.org) (`USE_PERF_JITEVENTS` set to `1`
       in the build configuration). This integration is enabled by default.
 
-### `ENABLE_GDBLISTENER`
+### [`ENABLE_GDBLISTENER`](@id ENABLE_GDBLISTENER)
 
 If set to anything besides `0` enables GDB registration of Julia code on release builds.
 On debug builds of Julia this is always enabled. Recommended to use with `-g 2`.
 
 
-### `JULIA_LLVM_ARGS`
+### [`JULIA_LLVM_ARGS`](@id JULIA_LLVM_ARGS)
 
 Arguments to be passed to the LLVM backend.
+
+### `JULIA_FALLBACK_REPL`
+
+Forces the fallback repl instead of REPL.jl.
diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md
index b476e2b25b64e..ccea44f922a8e 100644
--- a/doc/src/manual/faq.md
+++ b/doc/src/manual/faq.md
@@ -22,11 +22,28 @@ On the other hand, language *interoperability* is extremely useful: we want to e
 
 ### How does Julia define its public API?
 
-The only interfaces that are stable with respect to [SemVer](https://semver.org/) of `julia`
-version are the Julia `Base` and standard libraries interfaces described in
-[the documentation](https://docs.julialang.org/) and not marked as unstable (e.g.,
-experimental and internal).  Functions, types, and constants are not part of the public
-API if they are not included in the documentation, _even if they have docstrings_.
+Julia's public [API](https://en.wikipedia.org/wiki/API) is the behavior described in
+documentation of public symbols from `Base` and the standard libraries. Functions,
+types, and constants are not part of the public API if they are not public, even if
+they have docstrings or are described in the documentation. Further, only the documented
+behavior of public symbols is part of the public API. Undocumented behavior of public
+symbols is internal.
+
+Public symbols are those marked with either `public foo` or `export foo`.
+
+In other words:
+
+- Documented behavior of public symbols is part of the public API.
+- Undocumented behavior of public symbols is not part of the public API.
+- Documented behavior of private symbols is not part of the public API.
+- Undocumented behavior of private symbols is not part of the public API.
+
+You can get a complete list of the public symbols from a module with `names(MyModule)`.
+
+Package authors are encouraged to define their public API similarly.
+
+Anything in Julia's Public API is covered by [SemVer](https://semver.org/) and therefore
+will not be removed or receive meaningful breaking changes before Julia 2.0.
 
 ### There is a useful undocumented function/type/constant. Can I use it?
 
@@ -36,8 +53,8 @@ a complex non-public API, especially when using it from a stable package, it is
 to open an [issue](https://github.com/JuliaLang/julia/issues) or
 [pull request](https://github.com/JuliaLang/julia/pulls) to start a discussion for turning it
 into a public API.  However, we do not discourage the attempt to create packages that expose
-stable public interfaces while relying on non-public implementation details of `julia` and
-buffering the differences across different `julia` versions.
+stable public interfaces while relying on non-public implementation details of Julia and
+buffering the differences across different Julia versions.
 
 ### The documentation is not accurate enough. Can I rely on the existing behavior?
 
@@ -94,6 +111,9 @@ When a file is run as the main script using `julia file.jl` one might want to ac
 functionality like command line argument handling. A way to determine that a file is run in
 this fashion is to check if `abspath(PROGRAM_FILE) == @__FILE__` is `true`.
 
+However, it is recommended to not write files that double as a script and as an importable library.
+If one needs functionality both available as a library and a script, it is better to write is as a library, then import the functionality into a distinct script.
+
 ### [How do I catch CTRL-C in a script?](@id catch-ctrl-c)
 
 Running a Julia script using `julia file.jl` does not throw
@@ -103,43 +123,25 @@ which may or may not be caused by CTRL-C, use [`atexit`](@ref).
 Alternatively, you can use `julia -e 'include(popfirst!(ARGS))'
 file.jl` to execute a script while being able to catch
 `InterruptException` in the [`try`](@ref) block.
+Note that with this strategy [`PROGRAM_FILE`](@ref) will not be set.
 
 ### How do I pass options to `julia` using `#!/usr/bin/env`?
 
-Passing options to `julia` in so-called shebang by, e.g.,
-`#!/usr/bin/env julia --startup-file=no` may not work in some
-platforms such as Linux.  This is because argument parsing in shebang
-is platform-dependent and not well-specified.  In a Unix-like
-environment, a reliable way to pass options to `julia` in an
-executable script would be to start the script as a `bash` script and
-use `exec` to replace the process to `julia`:
+Passing options to `julia` in a so-called shebang line, as in
+`#!/usr/bin/env julia --startup-file=no`, will not work on many
+platforms (BSD, macOS, Linux) where the kernel, unlike the shell, does
+not split arguments at space characters. The option `env -S`, which
+splits a single argument string into multiple arguments at spaces,
+similar to a shell, offers a simple workaround:
 
 ```julia
-#!/bin/bash
-#=
-exec julia --color=yes --startup-file=no "${BASH_SOURCE[0]}" "$@"
-=#
-
+#!/usr/bin/env -S julia --color=yes --startup-file=no
 @show ARGS  # put any Julia code here
 ```
 
-In the example above, the code between `#=` and `=#` is run as a `bash`
-script.  Julia ignores this part since it is a multi-line comment for
-Julia.  The Julia code after `=#` is ignored by `bash` since it stops
-parsing the file once it reaches to the `exec` statement.
-
 !!! note
-    In order to [catch CTRL-C](@ref catch-ctrl-c) in the script you can use
-    ```julia
-    #!/bin/bash
-    #=
-    exec julia --color=yes --startup-file=no -e 'include(popfirst!(ARGS))' \
-        "${BASH_SOURCE[0]}" "$@"
-    =#
-
-    @show ARGS  # put any Julia code here
-    ```
-    instead. Note that with this strategy [`PROGRAM_FILE`](@ref) will not be set.
+    Option `env -S` appeared in FreeBSD 6.0 (2005), macOS Sierra (2016)
+    and GNU/Linux coreutils 8.30 (2018).
 
 ### Why doesn't `run` support `*` or pipes for scripting external programs?
 
@@ -172,7 +174,7 @@ while x < 10
 end
 ```
 and notice that it works fine in an interactive environment (like the Julia REPL),
-but gives `UndefVarError: x not defined` when you try to run it in script or other
+but gives ```UndefVarError: `x` not defined``` when you try to run it in script or other
 file.   What is going on is that Julia generally requires you to **be explicit about assigning to global variables in a local scope**.
 
 Here, `x` is a global variable, `while` defines a [local scope](@ref scope-of-variables), and `x += 1` is
@@ -420,7 +422,7 @@ Certain operations make mathematical sense but result in errors:
 ```jldoctest
 julia> sqrt(-2.0)
 ERROR: DomainError with -2.0:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
+sqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
 Stacktrace:
 [...]
 ```
@@ -723,7 +725,7 @@ julia> module Foo
 
 julia> Foo.foo()
 ERROR: On worker 2:
-UndefVarError: Foo not defined
+UndefVarError: `Foo` not defined in `Main`
 Stacktrace:
 [...]
 ```
@@ -744,7 +746,7 @@ julia> @everywhere module Foo
 
 julia> Foo.foo()
 ERROR: On worker 2:
-UndefVarError: gvar not defined
+UndefVarError: `gvar` not defined in `Main.Foo`
 Stacktrace:
 [...]
 ```
@@ -780,7 +782,7 @@ bar (generic function with 1 method)
 
 julia> remotecall_fetch(bar, 2)
 ERROR: On worker 2:
-UndefVarError: #bar not defined
+UndefVarError: `#bar` not defined in `Main`
 [...]
 
 julia> anon_bar  = ()->1
@@ -836,10 +838,13 @@ to strings); similarly, `repeat` can be used instead of `^` to repeat strings. T
 
 ### What is the difference between "using" and "import"?
 
-There is only one difference, and on the surface (syntax-wise) it may seem very minor. The difference
-between `using` and `import` is that with `using` you need to say `function Foo.bar(..` to
-extend module Foo's function bar with a new method, but with `import Foo.bar`,
-you only need to say `function bar(...` and it automatically extends module Foo's function bar.
+There are several differences between `using` and `import`
+(see the [Modules section](https://docs.julialang.org/en/v1/manual/modules/#modules)),
+but there is an important difference that may not seem intuitive at first glance,
+and on the surface (i.e. syntax-wise) it may seem very minor. When loading modules with `using`,
+you need to say `function Foo.bar(...` to extend module `Foo`'s function `bar` with a new method,
+but with `import Foo.bar`, you only need to say `function bar(...` and it automatically extends
+module `Foo`'s function `bar`.
 
 The reason this is important enough to have been given separate syntax is that you don't want
 to accidentally extend a function that you didn't know existed, because that could easily cause
@@ -1047,17 +1052,15 @@ Modifying OpenBLAS settings or compiling Julia with a different BLAS library, eg
 
 ### How do I manage precompilation caches in distributed file systems?
 
-When using `julia` in high-performance computing (HPC) facilities, invoking
-_n_ `julia` processes simultaneously creates at most _n_ temporary copies of
-precompilation cache files. If this is an issue (slow and/or small distributed
-file system), you may:
+When using Julia in high-performance computing (HPC) facilities with shared filesystems, it is recommended to use a shared
+depot (via the [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH) environment variable). Since Julia v1.10, multiple Julia processes on functionally similar
+workers and using the same depot will coordinate via pidfile locks to only spend effort precompiling on one process while the
+others wait. The precompilation process will indicate when the process is precompiling or waiting for another that is
+precompiling. If non-interactive the messages are via `@debug`.
 
-1. Use `julia` with `--compiled-modules=no` flag to turn off precompilation.
-2. Configure a private writable depot using `pushfirst!(DEPOT_PATH, private_path)`
-   where `private_path` is a path unique to this `julia` process.  This
-   can also be done by setting environment variable `JULIA_DEPOT_PATH` to
-   `$private_path:$HOME/.julia`.
-3. Create a symlink from `~/.julia/compiled` to a directory in a scratch space.
+However, due to caching of binary code, the cache rejection since v1.9 is more strict and users may need to set the
+[`JULIA_CPU_TARGET`](@ref JULIA_CPU_TARGET) environment variable appropriately to get a single cache that is usable throughout the HPC
+environment.
 
 ## Julia Releases
 
@@ -1067,7 +1070,7 @@ The Stable version of Julia is the latest released version of Julia, this is the
 It has the latest features, including improved performance.
 The Stable version of Julia is versioned according to [SemVer](https://semver.org/) as v1.x.y.
 A new minor release of Julia corresponding to a new Stable version is made approximately every 4-5 months after a few weeks of testing as a release candidate.
-Unlike the LTS version the a Stable version will not normally receive bugfixes after another Stable version of Julia has been released.
+Unlike the LTS version the Stable version will not normally receive bugfixes after another Stable version of Julia has been released.
 However, upgrading to the next Stable release will always be possible as each release of Julia v1.x will continue to run code written for earlier versions.
 
 You may prefer the LTS (Long Term Support) version of Julia if you are looking for a very stable code base.
diff --git a/doc/src/manual/functions.md b/doc/src/manual/functions.md
index c3012efa1d8b1..af7f150a74e42 100644
--- a/doc/src/manual/functions.md
+++ b/doc/src/manual/functions.md
@@ -5,7 +5,7 @@ functions are not pure mathematical functions, because they can alter and be aff
 by the global state of the program. The basic syntax for defining functions in Julia is:
 
 ```jldoctest
-julia> function f(x,y)
+julia> function f(x, y)
            x + y
        end
 f (generic function with 1 method)
@@ -18,7 +18,7 @@ There is a second, more terse syntax for defining a function in Julia. The tradi
 declaration syntax demonstrated above is equivalent to the following compact "assignment form":
 
 ```jldoctest fofxy
-julia> f(x,y) = x + y
+julia> f(x, y) = x + y
 f (generic function with 1 method)
 ```
 
@@ -30,7 +30,7 @@ both typing and visual noise.
 A function is called using the traditional parenthesis syntax:
 
 ```jldoctest fofxy
-julia> f(2,3)
+julia> f(2, 3)
 5
 ```
 
@@ -40,28 +40,71 @@ like any other value:
 ```jldoctest fofxy
 julia> g = f;
 
-julia> g(2,3)
+julia> g(2, 3)
 5
 ```
 
 As with variables, Unicode can also be used for function names:
 
 ```jldoctest
-julia> ∑(x,y) = x + y
+julia> ∑(x, y) = x + y
 ∑ (generic function with 1 method)
 
 julia> ∑(2, 3)
 5
 ```
 
-## Argument Passing Behavior
+## [Argument Passing Behavior](@id man-argument-passing)
 
 Julia function arguments follow a convention sometimes called "pass-by-sharing", which means that
 values are not copied when they are passed to functions. Function arguments themselves act as
-new variable *bindings* (new locations that can refer to values), but the values they refer to
+new variable *bindings* (new "names" that can refer to values), much like
+[assignments](@ref man-assignment-expressions) `argument_name = argument_value`, so that the objects they refer to
 are identical to the passed values. Modifications to mutable values (such as `Array`s) made within
-a function will be visible to the caller. This is the same behavior found in Scheme, most Lisps,
-Python, Ruby and Perl, among other dynamic languages.
+a function will be visible to the caller. (This is the same behavior found in Scheme, most Lisps,
+Python, Ruby and Perl, among other dynamic languages.)
+
+For example, in the function
+```julia
+function f(x, y)
+    x[1] = 42    # mutates x
+    y = 7 + y    # new binding for y, no mutation
+    return y
+end
+```
+The statement `x[1] = 42` *mutates* the object `x`, and hence this change *will* be visible in the array passed
+by the caller for this argument.   On the other hand, the assignment `y = 7 + y` changes the *binding* ("name")
+`y` to refer to a new value `7 + y`, rather than mutating the *original* object referred to by `y`,
+and hence does *not* change the corresponding argument passed by the caller.   This can be seen if we call `f(x, y)`:
+```julia-repl
+julia> a = [4, 5, 6]
+3-element Vector{Int64}:
+ 4
+ 5
+ 6
+
+julia> b = 3
+3
+
+julia> f(a, b) # returns 7 + b == 10
+10
+
+julia> a  # a[1] is changed to 42 by f
+3-element Vector{Int64}:
+ 42
+  5
+  6
+
+julia> b  # not changed
+3
+```
+As a common convention in Julia (not a syntactic requirement), such a function would
+[typically be named `f!(x, y)`](@ref man-punctuation) rather than `f(x, y)`, as a visual reminder at
+the call site that at least one of the arguments (often the first one) is being mutated.
+
+!!! warning "Shared memory between arguments"
+    The behavior of a mutating function can be unexpected when a mutated argument shares memory with another argument, a situation known as aliasing (e.g. when one is a view of the other).
+    Unless the function docstring explicitly indicates that aliasing produces the expected result, it is the responsibility of the caller to ensure proper behavior on such inputs.
 
 ## Argument-type declarations
 
@@ -90,7 +133,7 @@ the `return` keyword causes a function to return immediately, providing
 an expression whose value is returned:
 
 ```julia
-function g(x,y)
+function g(x, y)
     return x * y
     x + y
 end
@@ -100,19 +143,19 @@ Since function definitions can be entered into interactive sessions, it is easy
 definitions:
 
 ```jldoctest
-julia> f(x,y) = x + y
+julia> f(x, y) = x + y
 f (generic function with 1 method)
 
-julia> function g(x,y)
+julia> function g(x, y)
            return x * y
            x + y
        end
 g (generic function with 1 method)
 
-julia> f(2,3)
+julia> f(2, 3)
 5
 
-julia> g(2,3)
+julia> g(2, 3)
 6
 ```
 
@@ -123,18 +166,18 @@ is of real use. Here, for example, is a function that computes the hypotenuse le
 triangle with sides of length `x` and `y`, avoiding overflow:
 
 ```jldoctest
-julia> function hypot(x,y)
+julia> function hypot(x, y)
            x = abs(x)
            y = abs(y)
            if x > y
                r = y/x
-               return x*sqrt(1+r*r)
+               return x*sqrt(1 + r*r)
            end
            if y == 0
                return zero(x)
            end
            r = x/y
-           return y*sqrt(1+r*r)
+           return y*sqrt(1 + r*r)
        end
 hypot (generic function with 1 method)
 
@@ -146,7 +189,7 @@ There are three possible points of return from this function, returning the valu
 expressions, depending on the values of `x` and `y`. The `return` on the last line could be omitted
 since it is the last expression.
 
-### Return type
+### [Return type](@id man-functions-return-type)
 
 A return type can be specified in the function declaration using the `::` operator. This converts
 the return value to the specified type.
@@ -203,7 +246,7 @@ as you would any other function:
 julia> 1 + 2 + 3
 6
 
-julia> +(1,2,3)
+julia> +(1, 2, 3)
 6
 ```
 
@@ -214,7 +257,7 @@ operators such as [`+`](@ref) and [`*`](@ref) just like you would with other fun
 ```jldoctest
 julia> f = +;
 
-julia> f(1,2,3)
+julia> f(1, 2, 3)
 6
 ```
 
@@ -224,16 +267,20 @@ Under the name `f`, the function does not support infix notation, however.
 
 A few special expressions correspond to calls to functions with non-obvious names. These are:
 
-| Expression        | Calls                   |
-|:----------------- |:----------------------- |
-| `[A B C ...]`     | [`hcat`](@ref)          |
-| `[A; B; C; ...]`  | [`vcat`](@ref)          |
-| `[A B; C D; ...]` | [`hvcat`](@ref)         |
-| `A'`              | [`adjoint`](@ref)       |
-| `A[i]`            | [`getindex`](@ref)      |
-| `A[i] = x`        | [`setindex!`](@ref)     |
-| `A.n`             | [`getproperty`](@ref Base.getproperty) |
-| `A.n = x`         | [`setproperty!`](@ref Base.setproperty!) |
+| Expression            | Calls                   |
+|:--------------------- |:----------------------- |
+| `[A B C ...]`         | [`hcat`](@ref)          |
+| `[A; B; C; ...]`      | [`vcat`](@ref)          |
+| `[A B; C D; ...]`     | [`hvcat`](@ref)         |
+| `[A; B;; C; D;; ...]` | [`hvncat`](@ref)        |
+| `A'`                  | [`adjoint`](@ref)       |
+| `A[i]`                | [`getindex`](@ref)      |
+| `A[i] = x`            | [`setindex!`](@ref)     |
+| `A.n`                 | [`getproperty`](@ref Base.getproperty) |
+| `A.n = x`             | [`setproperty!`](@ref Base.setproperty!) |
+
+Note that expressions similar to `[A; B;; C; D;; ...]` but with more than two
+consecutive `;` also correspond to `hvncat` calls.
 
 ## [Anonymous Functions](@id man-anonymous-functions)
 
@@ -253,7 +300,7 @@ julia> function (x)
 #3 (generic function with 1 method)
 ```
 
-This creates a function taking one argument `x` and returning the value of the polynomial `x^2 +
+Each statement creates a function taking one argument `x` and returning the value of the polynomial `x^2 +
 2x - 1` at that value. Notice that the result is a generic function, but with a compiler-generated
 name based on consecutive numbering.
 
@@ -283,28 +330,17 @@ julia> map(x -> x^2 + 2x - 1, [1, 3, -1])
 ```
 
 An anonymous function accepting multiple arguments can be written using the syntax `(x,y,z)->2x+y-z`.
-A zero-argument anonymous function is written as `()->3`. The idea of a function with no arguments
-may seem strange, but is useful for "delaying" a computation. In this usage, a block of code is
-wrapped in a zero-argument function, which is later invoked by calling it as `f`.
-
-As an example, consider this call to [`get`](@ref):
-
-```julia
-get(dict, key) do
-    # default value calculated here
-    time()
-end
-```
 
-The code above is equivalent to calling `get` with an anonymous function containing the code
-enclosed between `do` and `end`, like so:
+Argument-type declarations for anonymous functions work as for named functions, for example `x::Integer->2x`.
+The return type of an anonymous function cannot be specified.
 
-```julia
-get(()->time(), dict, key)
-```
-
-The call to [`time`](@ref) is delayed by wrapping it in a 0-argument anonymous function
-that is called only when the requested key is absent from `dict`.
+A zero-argument anonymous function can be written as `()->2+2`. The idea of a function with
+no arguments may seem strange, but is useful in cases where a result cannot (or should not)
+be precomputed. For example, Julia has a zero-argument [`time`](@ref) function that returns
+the current time in seconds, and thus `seconds = ()->round(Int, time())` is an anonymous
+function that returns this time rounded to the nearest integer assigned to the variable
+`seconds`. Each time this anonymous function is called as `seconds()` the current time will
+be calculated and returned.
 
 ## Tuples
 
@@ -348,9 +384,8 @@ julia> x.a
 2
 ```
 
-Named tuples are very similar to tuples, except that fields can additionally be accessed by name
-using dot syntax (`x.a`) in addition to the regular indexing syntax
-(`x[1]`).
+The fields of named tuples can be accessed by name using dot syntax (`x.a`) in
+addition to the regular indexing syntax (`x[1]` or `x[:a]`).
 
 ## [Destructuring Assignment and Multiple Return Values](@id destructuring-assignment)
 
@@ -359,7 +394,7 @@ left side of an assignment: the value on the right side is _destructured_ by ite
 over and assigning to each variable in turn:
 
 ```jldoctest
-julia> (a,b,c) = 1:3
+julia> (a, b, c) = 1:3
 1:3
 
 julia> b
@@ -374,7 +409,7 @@ This can be used to return multiple values from functions by returning a tuple o
 other iterable value. For example, the following function returns two values:
 
 ```jldoctest foofunc
-julia> function foo(a,b)
+julia> function foo(a, b)
            a+b, a*b
        end
 foo (generic function with 1 method)
@@ -384,14 +419,14 @@ If you call it in an interactive session without assigning the return value anyw
 see the tuple returned:
 
 ```jldoctest foofunc
-julia> foo(2,3)
+julia> foo(2, 3)
 (5, 6)
 ```
 
 Destructuring assignment extracts each value into a variable:
 
 ```jldoctest foofunc
-julia> x, y = foo(2,3)
+julia> x, y = foo(2, 3)
 (5, 6)
 
 julia> x
@@ -430,7 +465,7 @@ Other valid left-hand side expressions can be used as elements of the assignment
 ```jldoctest
 julia> X = zeros(3);
 
-julia> X[1], (a,b) = (1, (2, 3))
+julia> X[1], (a, b) = (1, (2, 3))
 (1, (2, 3))
 
 julia> X
@@ -582,7 +617,7 @@ julia> foo(A(3, 4))
 For anonymous functions, destructuring a single argument requires an extra comma:
 
 ```
-julia> map(((x,y),) -> x + y, [(1,2), (3,4)])
+julia> map(((x, y),) -> x + y, [(1, 2), (3, 4)])
 2-element Array{Int64,1}:
  3
  7
@@ -595,7 +630,7 @@ Such functions are traditionally known as "varargs" functions, which is short fo
 of arguments". You can define a varargs function by following the last positional argument with an ellipsis:
 
 ```jldoctest barfunc
-julia> bar(a,b,x...) = (a,b,x)
+julia> bar(a, b, x...) = (a, b, x)
 bar (generic function with 1 method)
 ```
 
@@ -604,16 +639,16 @@ The variables `a` and `b` are bound to the first two argument values as usual, a
 two arguments:
 
 ```jldoctest barfunc
-julia> bar(1,2)
+julia> bar(1, 2)
 (1, 2, ())
 
-julia> bar(1,2,3)
+julia> bar(1, 2, 3)
 (1, 2, (3,))
 
 julia> bar(1, 2, 3, 4)
 (1, 2, (3, 4))
 
-julia> bar(1,2,3,4,5,6)
+julia> bar(1, 2, 3, 4, 5, 6)
 (1, 2, (3, 4, 5, 6))
 ```
 
@@ -630,7 +665,7 @@ call instead:
 julia> x = (3, 4)
 (3, 4)
 
-julia> bar(1,2,x...)
+julia> bar(1, 2, x...)
 (1, 2, (3, 4))
 ```
 
@@ -641,7 +676,7 @@ of arguments go. This need not be the case, however:
 julia> x = (2, 3, 4)
 (2, 3, 4)
 
-julia> bar(1,x...)
+julia> bar(1, x...)
 (1, 2, (3, 4))
 
 julia> x = (1, 2, 3, 4)
@@ -654,15 +689,15 @@ julia> bar(x...)
 Furthermore, the iterable object splatted into a function call need not be a tuple:
 
 ```jldoctest barfunc
-julia> x = [3,4]
+julia> x = [3, 4]
 2-element Vector{Int64}:
  3
  4
 
-julia> bar(1,2,x...)
+julia> bar(1, 2, x...)
 (1, 2, (3, 4))
 
-julia> x = [1,2,3,4]
+julia> x = [1, 2, 3, 4]
 4-element Vector{Int64}:
  1
  2
@@ -677,9 +712,9 @@ Also, the function that arguments are splatted into need not be a varargs functi
 often is):
 
 ```jldoctest
-julia> baz(a,b) = a + b;
+julia> baz(a, b) = a + b;
 
-julia> args = [1,2]
+julia> args = [1, 2]
 2-element Vector{Int64}:
  1
  2
@@ -687,7 +722,7 @@ julia> args = [1,2]
 julia> baz(args...)
 3
 
-julia> args = [1,2,3]
+julia> args = [1, 2, 3]
 3-element Vector{Int64}:
  1
  2
@@ -716,12 +751,15 @@ from `Dates` module constructs a `Date` type for a given year `y`, month `m` and
 However, `m` and `d` arguments are optional and their default value is `1`.
 This behavior can be expressed concisely as:
 
-```julia
-function Date(y::Int64, m::Int64=1, d::Int64=1)
-    err = validargs(Date, y, m, d)
-    err === nothing || throw(err)
-    return Date(UTD(totaldays(y, m, d)))
-end
+```jldoctest date_default_args
+julia> using Dates
+
+julia> function date(y::Int64, m::Int64=1, d::Int64=1)
+           err = Dates.validargs(Date, y, m, d)
+           err === nothing || throw(err)
+           return Date(Dates.UTD(Dates.totaldays(y, m, d)))
+       end
+date (generic function with 3 methods)
 ```
 
 Observe, that this definition calls another method of the `Date` function that takes one argument
@@ -730,22 +768,28 @@ of type `UTInstant{Day}`.
 With this definition, the function can be called with either one, two or three arguments, and
 `1` is automatically passed when only one or two of the arguments are specified:
 
-```jldoctest
-julia> using Dates
-
-julia> Date(2000, 12, 12)
+```jldoctest date_default_args
+julia> date(2000, 12, 12)
 2000-12-12
 
-julia> Date(2000, 12)
+julia> date(2000, 12)
 2000-12-01
 
-julia> Date(2000)
+julia> date(2000)
 2000-01-01
 ```
 
 Optional arguments are actually just a convenient syntax for writing multiple method definitions
 with different numbers of arguments (see [Note on Optional and keyword Arguments](@ref)).
-This can be checked for our `Date` function example by calling `methods` function.
+This can be checked for our `date` function example by calling the `methods` function:
+
+```julia-repl
+julia> methods(date)
+# 3 methods for generic function "date":
+[1] date(y::Int64) in Main at REPL[1]:1
+[2] date(y::Int64, m::Int64) in Main at REPL[1]:1
+[3] date(y::Int64, m::Int64, d::Int64) in Main at REPL[1]:1
+```
 
 ## Keyword Arguments
 
@@ -779,7 +823,7 @@ prior keyword arguments.
 The types of keyword arguments can be made explicit as follows:
 
 ```julia
-function f(;x::Int=1)
+function f(; x::Int=1)
     ###
 end
 ```
@@ -801,7 +845,8 @@ end
 ```
 
 Inside `f`, `kwargs` will be an immutable key-value iterator over a named tuple.
-Named tuples (as well as dictionaries with keys of `Symbol`) can be passed as
+Named tuples (as well as dictionaries with keys of `Symbol`, and other iterators
+yielding two-value collections with symbol as first values) can be passed as
 keyword arguments using a semicolon in a call, e.g. `f(x, z=1; kwargs...)`.
 
 If a keyword argument is not assigned a default value in the method definition,
@@ -928,7 +973,7 @@ can create performance challenges as discussed in [performance tips](@ref man-pe
 Functions in Julia can be combined by composing or piping (chaining) them together.
 
 Function composition is when you combine functions together and apply the resulting composition to arguments.
-You use the function composition operator (`∘`) to compose the functions, so `(f ∘ g)(args...)` is the same as `f(g(args...))`.
+You use the function composition operator (`∘`) to compose the functions, so `(f ∘ g)(args...; kw...)` is the same as `f(g(args...; kw...))`.
 
 You can type the composition operator at the REPL and suitably-configured editors using `\circ<tab>`.
 
@@ -979,7 +1024,7 @@ julia> ["a", "list", "of", "strings"] .|> [uppercase, reverse, titlecase, length
  7
 ```
 
-When combining pipes with anonymous functions, parentheses must be used if subsequent pipes are not to parsed as part of the anonymous function's body. Compare:
+When combining pipes with anonymous functions, parentheses must be used if subsequent pipes are not to be parsed as part of the anonymous function's body. Compare:
 
 ```jldoctest
 julia> 1:3 .|> (x -> x^2) |> sum |> sqrt
@@ -1024,13 +1069,13 @@ in advance by the library writer.
 
 More generally, `f.(args...)` is actually equivalent to `broadcast(f, args...)`, which allows
 you to operate on multiple arrays (even of different shapes), or a mix of arrays and scalars (see
-[Broadcasting](@ref)). For example, if you have `f(x,y) = 3x + 4y`, then `f.(pi,A)` will return
-a new array consisting of `f(pi,a)` for each `a` in `A`, and `f.(vector1,vector2)` will return
-a new vector consisting of `f(vector1[i],vector2[i])` for each index `i` (throwing an exception
+[Broadcasting](@ref)). For example, if you have `f(x, y) = 3x + 4y`, then `f.(pi, A)` will return
+a new array consisting of `f(pi,a)` for each `a` in `A`, and `f.(vector1, vector2)` will return
+a new vector consisting of `f(vector1[i], vector2[i])` for each index `i` (throwing an exception
 if the vectors have different length).
 
 ```jldoctest
-julia> f(x,y) = 3x + 4y;
+julia> f(x, y) = 3x + 4y;
 
 julia> A = [1.0, 2.0, 3.0];
 
@@ -1097,7 +1142,7 @@ they are equivalent to `broadcast` calls and are fused with other nested "dot" c
 
 You can also combine dot operations with function chaining using [`|>`](@ref), as in this example:
 ```jldoctest
-julia> [1:5;] .|> [x->x^2, inv, x->2*x, -, isodd]
+julia> 1:5 .|> [x->x^2, inv, x->2*x, -, isodd]
 5-element Vector{Real}:
     1
     0.5
@@ -1106,6 +1151,8 @@ julia> [1:5;] .|> [x->x^2, inv, x->2*x, -, isodd]
  true
 ```
 
+All functions in the fused broadcast are always called for every element of the result. Thus `X .+ σ .* randn.()` will add a mask of independent and identically sampled random values to each element of the array `X`, but `X .+ σ .* randn()` will add the *same* random sample to each element. In cases where the fused computation is constant along one or more axes of the broadcast iteration, it may be possible to leverage a space-time tradeoff and allocate intermediate values to reduce the number of computations. See more at [performance tips](@ref man-performance-unfuse).
+
 ## Further Reading
 
 We should mention here that this is far from a complete picture of defining functions. Julia has
diff --git a/doc/src/manual/getting-started.md b/doc/src/manual/getting-started.md
index 16dab24afecf9..36d54650388cd 100644
--- a/doc/src/manual/getting-started.md
+++ b/doc/src/manual/getting-started.md
@@ -10,8 +10,9 @@ known as a read-eval-print loop or "REPL") by double-clicking the Julia executab
 `julia` from the command line:
 
 ```@eval
+using REPL
 io = IOBuffer()
-Base.banner(io)
+REPL.banner(io)
 banner = String(take!(io))
 import Markdown
 Markdown.parse("```\n\$ julia\n\n$(banner)\njulia> 1 + 2\n3\n\njulia> ans\n3\n```")
@@ -34,7 +35,7 @@ command:
 $ julia script.jl
 ```
 
-You can pass additional arguments to Julia, and to your program `script.jl`. A detailed list of all the available options can be found under [Command-line Interface](@ref).
+You can pass additional arguments to Julia, and to your program `script.jl`. A detailed list of all the available options can be found under [Command-line Interface](@ref cli).
 
 ## Resources
 
diff --git a/doc/src/manual/integers-and-floating-point-numbers.md b/doc/src/manual/integers-and-floating-point-numbers.md
index 2d073b83aec0a..4c31871374aa2 100644
--- a/doc/src/manual/integers-and-floating-point-numbers.md
+++ b/doc/src/manual/integers-and-floating-point-numbers.md
@@ -185,7 +185,9 @@ determining storage size of a literal. So `0x01` is a `UInt8` while `0x0001` is
 
 That allows the user to control the size.
 
-Values which cannot be stored in `UInt128` cannot be written as such literals.
+Unsigned literals (starting with `0x`) that encode integers too large to be represented as
+`UInt128` values will construct `BigInt` values instead. This is not an unsigned type but
+it is the only built-in type big enough to represent such large integer values.
 
 Binary, octal, and hexadecimal literals may be signed by a `-` immediately preceding the
 unsigned literal. They produce an unsigned integer of the same size as the unsigned literal
@@ -241,11 +243,10 @@ julia> x + 1 == typemin(Int64)
 true
 ```
 
-Thus, arithmetic with Julia integers is actually a form of [modular arithmetic](https://en.wikipedia.org/wiki/Modular_arithmetic).
-This reflects the characteristics of the underlying arithmetic of integers as implemented on modern
-computers. In applications where overflow is possible, explicit checking for wraparound produced
-by overflow is essential; otherwise, the [`BigInt`](@ref) type in [Arbitrary Precision Arithmetic](@ref)
-is recommended instead.
+Arithmetic operations with Julia's integer types inherently perform [modular arithmetic](https://en.wikipedia.org/wiki/Modular_arithmetic),
+mirroring the characteristics of integer arithmetic on modern computer hardware. In scenarios where overflow is a possibility,
+it is crucial to explicitly check for wraparound effects that can result from such overflows.
+The [`Base.Checked`](@ref) module provides a suite of arithmetic operations equipped with overflow checks, which trigger errors if an overflow occurs. For use cases where overflow cannot be tolerated under any circumstances, utilizing the [`BigInt`](@ref) type, as detailed in [Arbitrary Precision Arithmetic](@ref), is advisable.
 
 An example of overflow behavior and how to potentially resolve it is as follows:
 
@@ -651,6 +652,13 @@ julia> setprecision(40) do
 1.1000000000004
 ```
 
+!!! warning
+    The relation between [`setprecision`](@ref) or [`setrounding`](@ref) and
+    [`@big_str`](@ref), the macro used for `big` string literals (such as
+    `big"0.3"`), might not be intuitive, as a consequence of the fact that
+    `@big_str` is a macro. See the [`@big_str`](@ref) documentation for
+    details.
+
 ## [Numeric Literal Coefficients](@id man-numeric-literal-coefficients)
 
 To make common numeric formulae and expressions clearer, Julia allows variables to be immediately
diff --git a/doc/src/manual/interfaces.md b/doc/src/manual/interfaces.md
index 96475b6818a1a..d158fb86575a2 100644
--- a/doc/src/manual/interfaces.md
+++ b/doc/src/manual/interfaces.md
@@ -7,29 +7,24 @@ to generically build upon those behaviors.
 
 ## [Iteration](@id man-interface-iteration)
 
-| Required methods               |                        | Brief description                                                                     |
-|:------------------------------ |:---------------------- |:------------------------------------------------------------------------------------- |
-| `iterate(iter)`                |                        | Returns either a tuple of the first item and initial state or [`nothing`](@ref) if empty        |
-| `iterate(iter, state)`         |                        | Returns either a tuple of the next item and next state or `nothing` if no items remain  |
-| **Important optional methods** | **Default definition** | **Brief description**                                                                 |
-| `Base.IteratorSize(IterType)`  | `Base.HasLength()`     | One of `Base.HasLength()`, `Base.HasShape{N}()`, `Base.IsInfinite()`, or `Base.SizeUnknown()` as appropriate |
-| `Base.IteratorEltype(IterType)`| `Base.HasEltype()`     | Either `Base.EltypeUnknown()` or `Base.HasEltype()` as appropriate                    |
-| `eltype(IterType)`             | `Any`                  | The type of the first entry of the tuple returned by `iterate()`                      |
-| `length(iter)`                 | (*undefined*)          | The number of items, if known                                                         |
-| `size(iter, [dim])`            | (*undefined*)          | The number of items in each dimension, if known                                       |
-| `Base.isdone(iter[, state])`   | `missing`              | Fast-path hint for iterator completion. Should be defined for stateful iterators, or else `isempty(iter)` may call `iterate(iter[, state])` and mutate the iterator. |
-
-| Value returned by `IteratorSize(IterType)` | Required Methods                           |
-|:------------------------------------------ |:------------------------------------------ |
-| `Base.HasLength()`                         | [`length(iter)`](@ref)                     |
-| `Base.HasShape{N}()`                       | `length(iter)`  and `size(iter, [dim])`    |
-| `Base.IsInfinite()`                        | (*none*)                                   |
-| `Base.SizeUnknown()`                       | (*none*)                                   |
-
-| Value returned by `IteratorEltype(IterType)` | Required Methods   |
-|:-------------------------------------------- |:------------------ |
-| `Base.HasEltype()`                           | `eltype(IterType)` |
-| `Base.EltypeUnknown()`                       | (*none*)           |
+There are two methods that are always required:
+
+| Required method         | Brief description                                                                        |
+|:----------------------- |:---------------------------------------------------------------------------------------- |
+| [`iterate(iter)`](@ref) | Returns either a tuple of the first item and initial state or [`nothing`](@ref) if empty |
+| `iterate(iter, state)`  | Returns either a tuple of the next item and next state or `nothing` if no items remain   |
+
+There are several more methods that should be defined in some circumstances.
+Please note that you should always define at least one of `Base.IteratorSize(IterType)` and `length(iter)` because the default definition of `Base.IteratorSize(IterType)` is `Base.HasLength()`.
+
+| Method                                  | When should this method be defined?                                         | Default definition | Brief description |
+|:--- |:--- |:--- |:--- |
+| [`Base.IteratorSize(IterType)`](@ref)   | If default is not appropriate                                               | `Base.HasLength()` | One of `Base.HasLength()`, `Base.HasShape{N}()`, `Base.IsInfinite()`, or `Base.SizeUnknown()` as appropriate |
+| [`length(iter)`](@ref)                  | If `Base.IteratorSize()` returns `Base.HasLength()` or `Base.HasShape{N}()` | (*undefined*)      | The number of items, if known |
+| [`size(iter, [dim])`](@ref)             | If `Base.IteratorSize()` returns `Base.HasShape{N}()`                       | (*undefined*)      | The number of items in each dimension, if known |
+| [`Base.IteratorEltype(IterType)`](@ref) | If default is not appropriate                                               | `Base.HasEltype()` | Either `Base.EltypeUnknown()` or `Base.HasEltype()` as appropriate |
+| [`eltype(IterType)`](@ref)              | If default is not appropriate                                               | `Any`              | The type of the first entry of the tuple returned by `iterate()` |
+| [`Base.isdone(iter, [state])`](@ref)    | **Must** be defined if iterator is stateful                                 | `missing`          | Fast-path hint for iterator completion. If not defined for a stateful iterator then functions that check for done-ness, like `isempty()` and `zip()`, may mutate the iterator and cause buggy behaviour! |
 
 Sequential iteration is implemented by the [`iterate`](@ref) function. Instead
 of mutating objects as they are iterated over, Julia iterators may keep track
@@ -157,10 +152,10 @@ julia> collect(Iterators.reverse(Squares(4)))
 
 | Methods to implement | Brief description                |
 |:-------------------- |:-------------------------------- |
-| `getindex(X, i)`     | `X[i]`, indexed element access   |
-| `setindex!(X, v, i)` | `X[i] = v`, indexed assignment   |
+| `getindex(X, i)`     | `X[i]`, indexed access, non-scalar `i` should allocate a copy  |
+| `setindex!(X, v, i)` | `X[i] = v`, indexed assignment         |
 | `firstindex(X)`         | The first index, used in `X[begin]` |
-| `lastindex(X)`           | The last index, used in `X[end]` |
+| `lastindex(X)`           | The last index, used in `X[end]`   |
 
 For the `Squares` iterable above, we can easily compute the `i`th element of the sequence by squaring
 it.  We can expose this as an indexing expression `S[i]`. To opt into this behavior, `Squares`
@@ -220,10 +215,10 @@ ourselves, we can officially define it as a subtype of an [`AbstractArray`](@ref
 | `size(A)`                                       |                                        | Returns a tuple containing the dimensions of `A`                                      |
 | `getindex(A, i::Int)`                           |                                        | (if `IndexLinear`) Linear scalar indexing                                             |
 | `getindex(A, I::Vararg{Int, N})`                |                                        | (if `IndexCartesian`, where `N = ndims(A)`) N-dimensional scalar indexing             |
-| `setindex!(A, v, i::Int)`                       |                                        | (if `IndexLinear`) Scalar indexed assignment                                          |
-| `setindex!(A, v, I::Vararg{Int, N})`            |                                        | (if `IndexCartesian`, where `N = ndims(A)`) N-dimensional scalar indexed assignment   |
 | **Optional methods**                            | **Default definition**                 | **Brief description**                                                                 |
 | `IndexStyle(::Type)`                            | `IndexCartesian()`                     | Returns either `IndexLinear()` or `IndexCartesian()`. See the description below.      |
+| `setindex!(A, v, i::Int)`                       |                                        | (if `IndexLinear`) Scalar indexed assignment                                          |
+| `setindex!(A, v, I::Vararg{Int, N})`            |                                        | (if `IndexCartesian`, where `N = ndims(A)`) N-dimensional scalar indexed assignment   |
 | `getindex(A, I...)`                             | defined in terms of scalar `getindex`  | [Multidimensional and nonscalar indexing](@ref man-array-indexing)                    |
 | `setindex!(A, X, I...)`                            | defined in terms of scalar `setindex!` | [Multidimensional and nonscalar indexed assignment](@ref man-array-indexing)          |
 | `iterate`                                       | defined in terms of scalar `getindex`  | Iteration                                                                             |
@@ -233,7 +228,7 @@ ourselves, we can officially define it as a subtype of an [`AbstractArray`](@ref
 | `similar(A, dims::Dims)`                        | `similar(A, eltype(A), dims)`          | Return a mutable array with the same element type and size *dims*                     |
 | `similar(A, ::Type{S}, dims::Dims)`             | `Array{S}(undef, dims)`                | Return a mutable array with the specified element type and size                       |
 | **Non-traditional indices**                     | **Default definition**                 | **Brief description**                                                                 |
-| `axes(A)`                                    | `map(OneTo, size(A))`                  | Return a tuple of `AbstractUnitRange{<:Integer}` of valid indices                    |
+| `axes(A)`                                    | `map(OneTo, size(A))`                  | Return a tuple of `AbstractUnitRange{<:Integer}` of valid indices. The axes should be their own axes, that is `axes.(axes(A),1) == axes(A)` should be satisfied. |
 | `similar(A, ::Type{S}, inds)`              | `similar(A, S, Base.to_shape(inds))`   | Return a mutable array with the specified indices `inds` (see below)                  |
 | `similar(T::Union{Type,Function}, inds)`   | `T(Base.to_shape(inds))`               | Return an array similar to `T` with the specified indices `inds` (see below)          |
 
@@ -462,10 +457,17 @@ Not all types support `axes` and indexing, but many are convenient to allow in b
 The [`Base.broadcastable`](@ref) function is called on each argument to broadcast, allowing
 it to return something different that supports `axes` and indexing. By
 default, this is the identity function for all `AbstractArray`s and `Number`s — they already
-support `axes` and indexing. For a handful of other types (including but not limited to
-types themselves, functions, special singletons like [`missing`](@ref) and [`nothing`](@ref), and dates),
-`Base.broadcastable` returns the argument wrapped in a `Ref` to act as a 0-dimensional
-"scalar" for the purposes of broadcasting. Custom types can similarly specialize
+support `axes` and indexing.
+
+If a type is intended to act like a "0-dimensional scalar" (a single object) rather than as a
+container for broadcasting, then the following method should be defined:
+```julia
+Base.broadcastable(o::MyType) = Ref(o)
+```
+that returns the argument wrapped in a 0-dimensional [`Ref`](@ref) container.   For example, such a wrapper
+method is defined for types themselves, functions, special singletons like [`missing`](@ref) and [`nothing`](@ref), and dates.
+
+Custom array-like types can specialize
 `Base.broadcastable` to define their shape, but they should follow the convention that
 `collect(Base.broadcastable(x)) == collect(x)`. A notable exception is `AbstractString`;
 strings are special-cased to behave as scalars for the purposes of broadcast even though
@@ -739,7 +741,7 @@ in one or two dimensional outputs, but produce an `Array` for any other dimensio
 
 | Methods to implement              | Default definition           | Brief description                                                                     |
 |:--------------------------------- |:---------------------------- |:------------------------------------------------------------------------------------- |
-| `propertynames(x::ObjType, private::Bool=false)` | `fieldnames(typeof((x))`     | Return a tuple of the properties (`x.property`) of an object `x`. If `private=true`, also return fieldnames intended to be kept as private |
+| `propertynames(x::ObjType, private::Bool=false)` | `fieldnames(typeof(x))`     | Return a tuple of the properties (`x.property`) of an object `x`. If `private=true`, also return property names intended to be kept as private |
 | `getproperty(x::ObjType, s::Symbol)`       | `getfield(x, s)`     | Return property `s` of `x`. `x.s` calls `getproperty(x, :s)`.  |
 | `setproperty!(x::ObjType, s::Symbol, v)`   | `setfield!(x, s, v)` | Set property `s` of `x` to `v`. `x.s = v` calls `setproperty!(x, :s, v)`. Should return `v`.|
 
@@ -785,9 +787,9 @@ defined to add new functionality:
 julia> Base.propertynames(::Point, private::Bool=false) = private ? (:x, :y, :r, :ϕ) : (:x, :y)
 
 julia> function Base.getproperty(p::Point, s::Symbol)
-           if s == :x
+           if s === :x
                return getfield(p, :r) * cos(getfield(p, :ϕ))
-           elseif s == :y
+           elseif s === :y
                return getfield(p, :r) * sin(getfield(p, :ϕ))
            else
                # This allows accessing fields with p.r and p.ϕ
@@ -796,12 +798,12 @@ julia> function Base.getproperty(p::Point, s::Symbol)
        end
 
 julia> function Base.setproperty!(p::Point, s::Symbol, f)
-           if s == :x
+           if s === :x
                y = p.y
                setfield!(p, :r, sqrt(f^2 + y^2))
                setfield!(p, :ϕ, atan(y, f))
                return f
-           elseif s == :y
+           elseif s === :y
                x = p.x
                setfield!(p, :r, sqrt(x^2 + f^2))
                setfield!(p, :ϕ, atan(f, x))
@@ -834,3 +836,51 @@ julia> p.r
 Finally, it is worth noting that adding instance properties like this is quite
 rarely done in Julia and should in general only be done if there is a good
 reason for doing so.
+
+## [Rounding](@id man-rounding-interface)
+
+| Methods to implement                          | Default definition        | Brief description                                                                                   |
+|:--------------------------------------------- |:------------------------- |:--------------------------------------------------------------------------------------------------- |
+| `round(x::ObjType, r::RoundingMode)`          | none                      | Round `x` and return the result. If possible, round should return an object of the same type as `x` |
+| `round(T::Type, x::ObjType, r::RoundingMode)` | `convert(T, round(x, r))` | Round `x`, returning the result as a `T`                                                            |
+
+To support rounding on a new type it is typically sufficient to define the single method
+`round(x::ObjType, r::RoundingMode)`. The passed rounding mode determines in which direction
+the value should be rounded. The most commonly used rounding modes are `RoundNearest`,
+`RoundToZero`, `RoundDown`, and `RoundUp`, as these rounding modes are used in the
+definitions of the one argument `round`, method, and `trunc`, `floor`, and `ceil`,
+respectively.
+
+In some cases, it is possible to define a three-argument `round` method that is more
+accurate or performant than the two-argument method followed by conversion. In this case it
+is acceptable to define the three argument method in addition to the two argument method.
+If it is impossible to represent the rounded result as an object of the type `T`,
+then the three argument method should throw an `InexactError`.
+
+For example, if we have an `Interval` type which represents a range of possible values
+similar to https://github.com/JuliaPhysics/Measurements.jl, we may define rounding on that
+type with the following
+
+```jldoctest
+julia> struct Interval{T}
+           min::T
+           max::T
+       end
+
+julia> Base.round(x::Interval, r::RoundingMode) = Interval(round(x.min, r), round(x.max, r))
+
+julia> x = Interval(1.7, 2.2)
+Interval{Float64}(1.7, 2.2)
+
+julia> round(x)
+Interval{Float64}(2.0, 2.0)
+
+julia> floor(x)
+Interval{Float64}(1.0, 2.0)
+
+julia> ceil(x)
+Interval{Float64}(2.0, 3.0)
+
+julia> trunc(x)
+Interval{Float64}(1.0, 2.0)
+```
diff --git a/doc/src/manual/mathematical-operations.md b/doc/src/manual/mathematical-operations.md
index 21722a5e80684..59af063ce8487 100644
--- a/doc/src/manual/mathematical-operations.md
+++ b/doc/src/manual/mathematical-operations.md
@@ -20,9 +20,9 @@ are supported on all primitive numeric types:
 | `x ÷ y`    | integer divide | x / y, truncated to an integer          |
 | `x \ y`    | inverse divide | equivalent to `y / x`                   |
 | `x ^ y`    | power          | raises `x` to the `y`th power           |
-| `x % y`    | remainder      | equivalent to `rem(x,y)`                |
+| `x % y`    | remainder      | equivalent to `rem(x, y)`               |
 
-A numeric literal placed directly before an identifier or parentheses, e.g. `2x` or `2(x+y)`, is treated as a multiplication, except with higher precedence than other binary operations.  See [Numeric Literal Coefficients](@ref man-numeric-literal-coefficients) for details.
+A numeric literal placed directly before an identifier or parentheses, e.g. `2x` or `2(x + y)`, is treated as a multiplication, except with higher precedence than other binary operations.  See [Numeric Literal Coefficients](@ref man-numeric-literal-coefficients) for details.
 
 Julia's promotion system makes arithmetic operations on mixtures of argument types "just work"
 naturally and automatically. See [Conversion and Promotion](@ref conversion-and-promotion) for details of the promotion
@@ -171,15 +171,15 @@ The updating versions of all the binary arithmetic and bitwise operators are:
 For *every* binary operation like `^`, there is a corresponding
 "dot" operation `.^` that is *automatically* defined
 to perform `^` element-by-element on arrays. For example,
-`[1,2,3] ^ 3` is not defined, since there is no standard
+`[1, 2, 3] ^ 3` is not defined, since there is no standard
 mathematical meaning to "cubing" a (non-square) array, but
-`[1,2,3] .^ 3` is defined as computing the elementwise
+`[1, 2, 3] .^ 3` is defined as computing the elementwise
 (or "vectorized") result `[1^3, 2^3, 3^3]`.  Similarly for unary
 operators like `!` or `√`, there is a corresponding `.√` that
 applies the operator elementwise.
 
 ```jldoctest
-julia> [1,2,3] .^ 3
+julia> [1, 2, 3] .^ 3
 3-element Vector{Int64}:
   1
   8
@@ -204,9 +204,9 @@ as `a .= a .+ b`, where `.=` is a fused *in-place* assignment operation
 (see the [dot syntax documentation](@ref man-vectorized)).
 
 Note the dot syntax is also applicable to user-defined operators.
-For example, if you define `⊗(A,B) = kron(A,B)` to give a convenient
+For example, if you define `⊗(A, B) = kron(A, B)` to give a convenient
 infix syntax `A ⊗ B` for Kronecker products ([`kron`](@ref)), then
-`[A,B] .⊗ [C,D]` will compute `[A⊗C, B⊗D]` with no additional coding.
+`[A, B] .⊗ [C, D]` will compute `[A⊗C, B⊗D]` with no additional coding.
 
 Combining dot operators with numeric literals can be ambiguous.
 For example, it is not clear whether `1.+x` means `1. + x` or `1 .+ x`.
@@ -457,7 +457,7 @@ Juxtaposition parses like a unary operator, which has the same natural asymmetry
 Julia supports three forms of numerical conversion, which differ in their handling of inexact
 conversions.
 
-  * The notation `T(x)` or `convert(T,x)` converts `x` to a value of type `T`.
+  * The notation `T(x)` or `convert(T, x)` converts `x` to a value of type `T`.
 
       * If `T` is a floating-point type, the result is the nearest representable value, which could be
         positive or negative infinity.
@@ -502,7 +502,7 @@ julia> round(Int8,127.4)
 127
 
 julia> round(Int8,127.6)
-ERROR: InexactError: trunc(Int8, 128.0)
+ERROR: InexactError: Int8(128.0)
 Stacktrace:
 [...]
 ```
@@ -524,30 +524,30 @@ See [Conversion and Promotion](@ref conversion-and-promotion) for how to define
 
 ### Division functions
 
-| Function                  | Description                                                                                               |
-|:------------------------- |:--------------------------------------------------------------------------------------------------------- |
-| [`div(x,y)`](@ref), `x÷y` | truncated division; quotient rounded towards zero                                                         |
-| [`fld(x,y)`](@ref)        | floored division; quotient rounded towards `-Inf`                                                         |
-| [`cld(x,y)`](@ref)        | ceiling division; quotient rounded towards `+Inf`                                                         |
-| [`rem(x,y)`](@ref), `x%y` | remainder; satisfies `x == div(x,y)*y + rem(x,y)`; sign matches `x`                                       |
-| [`mod(x,y)`](@ref)        | modulus; satisfies `x == fld(x,y)*y + mod(x,y)`; sign matches `y`                                         |
-| [`mod1(x,y)`](@ref)       | `mod` with offset 1; returns `r∈(0,y]` for `y>0` or `r∈[y,0)` for `y<0`, where `mod(r, y) == mod(x, y)`   |
-| [`mod2pi(x)`](@ref)       | modulus with respect to 2pi;  `0 <= mod2pi(x) < 2pi`                                                      |
-| [`divrem(x,y)`](@ref)     | returns `(div(x,y),rem(x,y))`                                                                             |
-| [`fldmod(x,y)`](@ref)     | returns `(fld(x,y),mod(x,y))`                                                                             |
-| [`gcd(x,y...)`](@ref)     | greatest positive common divisor of `x`, `y`,...                                                          |
-| [`lcm(x,y...)`](@ref)     | least positive common multiple of `x`, `y`,...                                                            |
+| Function                   | Description                                                                                               |
+|:-------------------------- |:--------------------------------------------------------------------------------------------------------- |
+| [`div(x, y)`](@ref), `x÷y` | truncated division; quotient rounded towards zero                                                         |
+| [`fld(x, y)`](@ref)        | floored division; quotient rounded towards `-Inf`                                                         |
+| [`cld(x, y)`](@ref)        | ceiling division; quotient rounded towards `+Inf`                                                         |
+| [`rem(x, y)`](@ref), `x%y` | remainder; satisfies `x == div(x, y)*y + rem(x, y)`; sign matches `x`                                     |
+| [`mod(x, y)`](@ref)        | modulus; satisfies `x == fld(x, y)*y + mod(x, y)`; sign matches `y`                                       |
+| [`mod1(x, y)`](@ref)       | `mod` with offset 1; returns `r∈(0, y]` for `y>0` or `r∈[y, 0)` for `y<0`, where `mod(r, y) == mod(x, y)` |
+| [`mod2pi(x)`](@ref)        | modulus with respect to 2pi;  `0 <= mod2pi(x) < 2pi`                                                      |
+| [`divrem(x, y)`](@ref)     | returns `(div(x, y),rem(x, y))`                                                                           |
+| [`fldmod(x, y)`](@ref)     | returns `(fld(x, y), mod(x, y))`                                                                          |
+| [`gcd(x, y...)`](@ref)     | greatest positive common divisor of `x`, `y`,...                                                          |
+| [`lcm(x, y...)`](@ref)     | least positive common multiple of `x`, `y`,...                                                            |
 
 ### Sign and absolute value functions
 
-| Function                | Description                                                |
-|:----------------------- |:---------------------------------------------------------- |
-| [`abs(x)`](@ref)        | a positive value with the magnitude of `x`                 |
-| [`abs2(x)`](@ref)       | the squared magnitude of `x`                               |
-| [`sign(x)`](@ref)       | indicates the sign of `x`, returning -1, 0, or +1          |
-| [`signbit(x)`](@ref)    | indicates whether the sign bit is on (true) or off (false) |
-| [`copysign(x,y)`](@ref) | a value with the magnitude of `x` and the sign of `y`      |
-| [`flipsign(x,y)`](@ref) | a value with the magnitude of `x` and the sign of `x*y`    |
+| Function                 | Description                                                |
+|:------------------------ |:---------------------------------------------------------- |
+| [`abs(x)`](@ref)         | a positive value with the magnitude of `x`                 |
+| [`abs2(x)`](@ref)        | the squared magnitude of `x`                               |
+| [`sign(x)`](@ref)        | indicates the sign of `x`, returning -1, 0, or +1          |
+| [`signbit(x)`](@ref)     | indicates whether the sign bit is on (true) or off (false) |
+| [`copysign(x, y)`](@ref) | a value with the magnitude of `x` and the sign of `y`      |
+| [`flipsign(x, y)`](@ref) | a value with the magnitude of `x` and the sign of `x*y`    |
 
 ### Powers, logs and roots
 
@@ -555,15 +555,15 @@ See [Conversion and Promotion](@ref conversion-and-promotion) for how to define
 |:------------------------ |:-------------------------------------------------------------------------- |
 | [`sqrt(x)`](@ref), `√x`  | square root of `x`                                                         |
 | [`cbrt(x)`](@ref), `∛x`  | cube root of `x`                                                           |
-| [`hypot(x,y)`](@ref)     | hypotenuse of right-angled triangle with other sides of length `x` and `y` |
+| [`hypot(x, y)`](@ref)    | hypotenuse of right-angled triangle with other sides of length `x` and `y` |
 | [`exp(x)`](@ref)         | natural exponential function at `x`                                        |
-| [`expm1(x)`](@ref)       | accurate `exp(x)-1` for `x` near zero                                      |
-| [`ldexp(x,n)`](@ref)     | `x*2^n` computed efficiently for integer values of `n`                     |
+| [`expm1(x)`](@ref)       | accurate `exp(x) - 1` for `x` near zero                                    |
+| [`ldexp(x, n)`](@ref)    | `x * 2^n` computed efficiently for integer values of `n`                   |
 | [`log(x)`](@ref)         | natural logarithm of `x`                                                   |
-| [`log(b,x)`](@ref)       | base `b` logarithm of `x`                                                  |
+| [`log(b, x)`](@ref)      | base `b` logarithm of `x`                                                  |
 | [`log2(x)`](@ref)        | base 2 logarithm of `x`                                                    |
 | [`log10(x)`](@ref)       | base 10 logarithm of `x`                                                   |
-| [`log1p(x)`](@ref)       | accurate `log(1+x)` for `x` near zero                                      |
+| [`log1p(x)`](@ref)       | accurate `log(1 + x)` for `x` near zero                                    |
 | [`exponent(x)`](@ref)    | binary exponent of `x`                                                     |
 | [`significand(x)`](@ref) | binary significand (a.k.a. mantissa) of a floating-point number `x`        |
 
@@ -587,7 +587,7 @@ These are all single-argument functions, with [`atan`](@ref) also accepting two
 corresponding to a traditional [`atan2`](https://en.wikipedia.org/wiki/Atan2) function.
 
 Additionally, [`sinpi(x)`](@ref) and [`cospi(x)`](@ref) are provided for more accurate computations
-of [`sin(pi*x)`](@ref) and [`cos(pi*x)`](@ref) respectively.
+of [`sin(pi * x)`](@ref) and [`cos(pi * x)`](@ref) respectively.
 
 In order to compute trigonometric functions with degrees instead of radians, suffix the function
 with `d`. For example, [`sind(x)`](@ref) computes the sine of `x` where `x` is specified in degrees.
diff --git a/doc/src/manual/metaprogramming.md b/doc/src/manual/metaprogramming.md
index a374b9c879e6a..b1623ff8591b0 100644
--- a/doc/src/manual/metaprogramming.md
+++ b/doc/src/manual/metaprogramming.md
@@ -11,6 +11,21 @@ code in Julia are represented by Julia data structures, powerful [reflection](ht
 capabilities are available to explore the internals of a program and its types just like any other
 data.
 
+!!! warning
+    Metaprogramming is a powerful tool, but it introduces complexity that can make code more
+    difficult to understand. For example, it can be surprisingly hard to get scope rules
+    correct. Metaprogramming should typically be used only when other approaches such as
+    [higher order functions](@ref man-anonymous-functions) and
+    [closures](https://en.wikipedia.org/wiki/Closure_(computer_programming)) cannot be applied.
+
+    `eval` and defining new macros should be typically used as a last resort. It is almost
+    never a good idea to use `Meta.parse` or convert an arbitrary string into Julia code. For
+    manipulating Julia code, use the `Expr` data structure directly to avoid the complexity
+    of how Julia syntax is parsed.
+
+    The best uses of metaprogramming often implement most of their functionality in runtime
+    helper functions, striving to minimize the amount of code they generate.
+
 ## Program representation
 
 Every Julia program starts life as a string:
@@ -102,7 +117,7 @@ julia> Meta.show_sexpr(ex3)
 
 The `:` character has two syntactic purposes in Julia. The first form creates a [`Symbol`](@ref),
 an [interned string](https://en.wikipedia.org/wiki/String_interning) used as one building-block
-of expressions:
+of expressions, from valid identifiers:
 
 ```jldoctest
 julia> s = :foo
@@ -116,9 +131,12 @@ The [`Symbol`](@ref) constructor takes any number of arguments and creates a new
 their string representations together:
 
 ```jldoctest
-julia> :foo == Symbol("foo")
+julia> :foo === Symbol("foo")
 true
 
+julia> Symbol("1foo") # `:1foo` would not work, as `1foo` is not a valid identifier
+Symbol("1foo")
+
 julia> Symbol("func",10)
 :func10
 
@@ -126,9 +144,6 @@ julia> Symbol(:var,'_',"sym")
 :var_sym
 ```
 
-Note that to use `:` syntax, the symbol's name must be a valid identifier.
-Otherwise the `Symbol(str)` constructor must be used.
-
 In the context of an expression, symbols are used to indicate access to variables; when an expression
 is evaluated, a symbol is replaced with the value bound to that symbol in the appropriate [scope](@ref scope-of-variables).
 
@@ -364,7 +379,7 @@ julia> ex = :(a + b)
 :(a + b)
 
 julia> eval(ex)
-ERROR: UndefVarError: b not defined
+ERROR: UndefVarError: `b` not defined in `Main`
 [...]
 
 julia> a = 1; b = 2;
@@ -382,7 +397,7 @@ julia> ex = :(x = 1)
 :(x = 1)
 
 julia> x
-ERROR: UndefVarError: x not defined
+ERROR: UndefVarError: `x` not defined in `Main`
 
 julia> eval(ex)
 1
@@ -425,7 +440,7 @@ value 1 and the variable `b`. Note the important distinction between the way `a`
 
 As hinted above, one extremely useful feature of Julia is the capability to generate and manipulate
 Julia code within Julia itself. We have already seen one example of a function returning [`Expr`](@ref)
-objects: the [`parse`](@ref) function, which takes a string of Julia code and returns the corresponding
+objects: the [`Meta.parse`](@ref) function, which takes a string of Julia code and returns the corresponding
 `Expr`. A function can also take one or more `Expr` objects as arguments, and return another
 `Expr`. Here is a simple, motivating example:
 
@@ -1325,8 +1340,7 @@ julia> function sub2ind_loop(dims::NTuple{N}, I::Integer...) where N
                ind = I[i]-1 + dims[i]*ind
            end
            return ind + 1
-       end
-sub2ind_loop (generic function with 1 method)
+       end;
 
 julia> sub2ind_loop((3, 5), 1, 2)
 4
@@ -1353,7 +1367,8 @@ Both these implementations, although different, do essentially the same thing: a
 over the dimensions of the array, collecting the offset in each dimension into the final index.
 
 However, all the information we need for the loop is embedded in the type information of the arguments.
-Thus, we can utilize generated functions to move the iteration to compile-time; in compiler parlance,
+This allows the compiler to move the iteration to compile time and eliminate the runtime loops
+altogether. We can utilize generated functions to achieve a similar effect; in compiler parlance,
 we use generated functions to manually unroll the loop. The body becomes almost identical, but
 instead of calculating the linear index, we build up an *expression* that calculates the index:
 
@@ -1364,8 +1379,7 @@ julia> @generated function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
                ex = :(I[$i] - 1 + dims[$i] * $ex)
            end
            return :($ex + 1)
-       end
-sub2ind_gen (generic function with 1 method)
+       end;
 
 julia> sub2ind_gen((3, 5), 1, 2)
 4
@@ -1376,11 +1390,6 @@ julia> sub2ind_gen((3, 5), 1, 2)
 An easy way to find out is to extract the body into another (regular) function:
 
 ```jldoctest sub2ind_gen2
-julia> @generated function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
-           return sub2ind_gen_impl(dims, I...)
-       end
-sub2ind_gen (generic function with 1 method)
-
 julia> function sub2ind_gen_impl(dims::Type{T}, I...) where T <: NTuple{N,Any} where N
            length(I) == N || return :(error("partial indexing is unsupported"))
            ex = :(I[$N] - 1)
@@ -1388,8 +1397,14 @@ julia> function sub2ind_gen_impl(dims::Type{T}, I...) where T <: NTuple{N,Any} w
                ex = :(I[$i] - 1 + dims[$i] * $ex)
            end
            return :($ex + 1)
-       end
-sub2ind_gen_impl (generic function with 1 method)
+       end;
+
+julia> @generated function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
+           return sub2ind_gen_impl(dims, I...)
+       end;
+
+julia> sub2ind_gen((3, 5), 1, 2)
+4
 ```
 
 We can now execute `sub2ind_gen_impl` and examine the expression it returns:
@@ -1418,25 +1433,34 @@ To solve this problem, the language provides syntax for writing normal, non-gene
 alternative implementations of generated functions.
 Applied to the `sub2ind` example above, it would look like this:
 
-```julia
-function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
-    if N != length(I)
-        throw(ArgumentError("Number of dimensions must match number of indices."))
-    end
-    if @generated
-        ex = :(I[$N] - 1)
-        for i = (N - 1):-1:1
-            ex = :(I[$i] - 1 + dims[$i] * $ex)
-        end
-        return :($ex + 1)
-    else
-        ind = I[N] - 1
-        for i = (N - 1):-1:1
-            ind = I[i] - 1 + dims[i]*ind
-        end
-        return ind + 1
-    end
-end
+```jldoctest sub2ind_gen_opt
+julia> function sub2ind_gen_impl(dims::Type{T}, I...) where T <: NTuple{N,Any} where N
+           ex = :(I[$N] - 1)
+           for i = (N - 1):-1:1
+               ex = :(I[$i] - 1 + dims[$i] * $ex)
+           end
+           return :($ex + 1)
+       end;
+
+julia> function sub2ind_gen_fallback(dims::NTuple{N}, I) where N
+           ind = I[N] - 1
+           for i = (N - 1):-1:1
+               ind = I[i] - 1 + dims[i]*ind
+           end
+           return ind + 1
+       end;
+
+julia> function sub2ind_gen(dims::NTuple{N}, I::Integer...) where N
+           length(I) == N || error("partial indexing is unsupported")
+           if @generated
+               return sub2ind_gen_impl(dims, I...)
+           else
+               return sub2ind_gen_fallback(dims, I)
+           end
+       end;
+
+julia> sub2ind_gen((3, 5), 1, 2)
+4
 ```
 
 Internally, this code creates two implementations of the function: a generated one where
diff --git a/doc/src/manual/methods.md b/doc/src/manual/methods.md
index 6cbcc4fad6a65..ee68ee265ff95 100644
--- a/doc/src/manual/methods.md
+++ b/doc/src/manual/methods.md
@@ -265,8 +265,40 @@ julia> methods(+)
 ```
 
 Multiple dispatch together with the flexible parametric type system give Julia its ability to
-abstractly express high-level algorithms decoupled from implementation details, yet generate efficient,
-specialized code to handle each case at run time.
+abstractly express high-level algorithms decoupled from implementation details.
+
+## [Method specializations](@id man-method-specializations)
+
+When you create multiple methods of the same function, this is sometimes called
+"specialization." In this case, you're specializing the *function* by adding additional
+methods to it: each new method is a new specialization of the function.
+As shown above, these specializations are returned by `methods`.
+
+There's another kind of specialization that occurs without programmer intervention:
+Julia's compiler can automatically specialize the *method* for the specific argument types used.
+Such specializations are *not* listed by `methods`, as this doesn't create new `Method`s, but tools like [`@code_typed`](@ref) allow you to inspect such specializations.
+
+For example, if you create a method
+
+```
+mysum(x::Real, y::Real) = x + y
+```
+
+you've given the function `mysum` one new method (possibly its only method), and that method takes any pair of `Real` number inputs. But if you then execute
+
+```julia-repl
+julia> mysum(1, 2)
+3
+
+julia> mysum(1.0, 2.0)
+3.0
+```
+
+Julia will compile `mysum` twice, once for `x::Int, y::Int` and again for `x::Float64, y::Float64`.
+The point of compiling twice is performance: the methods that get called for `+` (which `mysum` uses) vary depending on the specific types of `x` and `y`, and by compiling different specializations Julia can do all the method lookup ahead of time. This allows the program to run much more quickly, since it does not have to bother with method lookup while it is running.
+Julia's automatic specialization allows you to write generic algorithms and expect that the compiler will generate efficient, specialized code to handle each case you need.
+
+In cases where the number of potential specializations might be effectively unlimited, Julia may avoid this default specialization. See [Be aware of when Julia avoids specializing](@ref) for more information.
 
 ## [Method Ambiguities](@id man-ambiguities)
 
@@ -290,10 +322,10 @@ julia> g(2.0, 3.0)
 ERROR: MethodError: g(::Float64, ::Float64) is ambiguous.
 
 Candidates:
-  g(x::Float64, y)
-    @ Main none:1
   g(x, y::Float64)
     @ Main none:1
+  g(x::Float64, y)
+    @ Main none:1
 
 Possible fix, define
   g(::Float64, ::Float64)
@@ -302,10 +334,11 @@ Stacktrace:
 [...]
 ```
 
-Here the call `g(2.0, 3.0)` could be handled by either the `g(Float64, Any)` or the `g(Any, Float64)`
-method, and neither is more specific than the other. In such cases, Julia raises a [`MethodError`](@ref)
-rather than arbitrarily picking a method. You can avoid method ambiguities by specifying an appropriate
-method for the intersection case:
+Here the call `g(2.0, 3.0)` could be handled by either the `g(::Float64, ::Any)` or the
+`g(::Any, ::Float64)` method. The order in which the methods are defined does not matter and
+neither is more specific than the other. In such cases, Julia raises a
+[`MethodError`](@ref) rather than arbitrarily picking a method. You can avoid method
+ambiguities by specifying an appropriate method for the intersection case:
 
 ```jldoctest gofxy
 julia> g(x::Float64, y::Float64) = 2x + 2y
@@ -374,7 +407,20 @@ Here's an example where the method type parameter `T` is used as the type parame
 type `Vector{T}` in the method signature:
 
 ```jldoctest
-julia> myappend(v::Vector{T}, x::T) where {T} = [v..., x]
+julia> function myappend(v::Vector{T}, x::T) where {T}
+           return [v..., x]
+       end
+myappend (generic function with 1 method)
+```
+
+The type parameter `T` in this example ensures that the added element `x` is a subtype of the
+existing eltype of the vector `v`.
+The `where` keyword introduces a list of those constraints after the method signature definition.
+This works the same for one-line definitions, as seen above, and must appear _before_ the [return
+type declaration](@ref man-functions-return-type), if present, as illustrated below:
+
+```jldoctest
+julia> (myappend(v::Vector{T}, x::T)::Vector) where {T} = [v..., x]
 myappend (generic function with 1 method)
 
 julia> myappend([1,2,3],4)
@@ -412,9 +458,9 @@ Stacktrace:
 [...]
 ```
 
-As you can see, the type of the appended element must match the element type of the vector it
-is appended to, or else a [`MethodError`](@ref) is raised. In the following example, the method type parameter
-`T` is used as the return value:
+If the type of the appended element does not match the element type of the vector it is appended to,
+a [`MethodError`](@ref) is raised.
+In the following example, the method's type parameter `T` is used as the return value:
 
 ```jldoctest
 julia> mytypeof(x::T) where {T} = T
@@ -1215,5 +1261,6 @@ function f2(inc)
         x -> x - 1
     end
 end
+```
 
 [^Clarke61]: Arthur C. Clarke, *Profiles of the Future* (1961): Clarke's Third Law.
diff --git a/doc/src/manual/missing.md b/doc/src/manual/missing.md
index 9bddcdfbb2ac2..8c8e801ccac9a 100644
--- a/doc/src/manual/missing.md
+++ b/doc/src/manual/missing.md
@@ -88,7 +88,7 @@ true
 ```
 
 The [`isless`](@ref) operator is another exception: `missing` is considered
-as greater than any other value. This operator is used by [`sort`](@ref),
+as greater than any other value. This operator is used by [`sort!`](@ref),
 which therefore places `missing` values after all other values:
 
 ```jldoctest
diff --git a/doc/src/manual/modules.md b/doc/src/manual/modules.md
index c6009594bea2d..8c366616bac49 100644
--- a/doc/src/manual/modules.md
+++ b/doc/src/manual/modules.md
@@ -7,16 +7,17 @@ Modules in Julia help organize code into coherent units. They are delimited synt
    allows the same name to be used for different functions or global variables without conflict, as long as they are in separate modules.
 
 2. Modules have facilities for detailed namespace management: each defines a set of names it
-   `export`s, and can import names from other modules with `using` and `import` (we explain these below).
+   `export`s and marks as `public`, and can import names from other modules with `using` and
+   `import` (we explain these below).
 
-3. Modules can be precompiled for faster loading, and contain code for runtime initialization.
+3. Modules can be precompiled for faster loading, and may contain code for runtime initialization.
 
 Typically, in larger Julia packages you will see module code organized into files, eg
 
 ```julia
 module SomeModule
 
-# export, using, import statements are usually here; we discuss these below
+# export, public, using, import statements are usually here; we discuss these below
 
 include("file1.jl")
 include("file2.jl")
@@ -103,6 +104,12 @@ Also, some modules don't export names at all. This is usually done if they use c
 words, such as `derivative`, in their API, which could easily clash with the export lists of other
 modules. We will see how to manage name clashes below.
 
+To mark a name as public without exporting it into the namespace of folks who call `using NiceStuff`,
+one can use `public` instead of `export`. This marks the public name(s) as part of the public API,
+but does not have any namespace implications. The `public` keyword is only available in Julia 1.11
+and above. To maintain compatibility with Julia 1.10 and below, use the `@compat` macro from the
+[Compat](https://github.com/JuliaLang/Compat.jl) package.
+
 ### Standalone `using` and `import`
 
 Possibly the most common way of loading a module is `using ModuleName`. This [loads](@ref
@@ -143,7 +150,7 @@ As we will see in the next section `import .NiceStuff` is equivalent to `using .
 You can combine multiple `using` and `import` statements of the same kind in a comma-separated expression, e.g.
 
 ```jldoctest module_manual
-julia> using LinearAlgebra, Statistics
+julia> using LinearAlgebra, Random
 ```
 
 ### `using` and `import` with specific identifiers, and adding methods
@@ -171,7 +178,7 @@ julia> using .NiceStuff: nice
 julia> struct Cat end
 
 julia> nice(::Cat) = "nice 😸"
-ERROR: error in method definition: function NiceStuff.nice must be explicitly imported to be extended
+ERROR: invalid method definition in Main: function NiceStuff.nice must be explicitly imported to be extended
 Stacktrace:
  [1] top-level scope
    @ none:0
@@ -281,7 +288,7 @@ julia> using .A, .B
 
 julia> f
 WARNING: both B and A export "f"; uses of it in module Main must be qualified
-ERROR: UndefVarError: f not defined
+ERROR: UndefVarError: `f` not defined in `Main`
 ```
 
 Here, Julia cannot decide which `f` you are referring to, so you have to make a choice. The following solutions are commonly used:
@@ -325,7 +332,17 @@ include(p) = Base.include(Mod, p)
 end
 ```
 
-If even `Core` is not wanted, a module that imports nothing and defines no names at all can be defined with `Module(:YourNameHere, false, false)` and code can be evaluated into it with [`@eval`](@ref) or [`Core.eval`](@ref).
+If even `Core` is not wanted, a module that imports nothing and defines no names at all can be defined with `Module(:YourNameHere, false, false)` and code can be evaluated into it with [`@eval`](@ref) or [`Core.eval`](@ref):
+```jldoctest
+julia> arithmetic = Module(:arithmetic, false, false)
+Main.arithmetic
+
+julia> @eval arithmetic add(x, y) = $(+)(x, y)
+add (generic function with 1 method)
+
+julia> arithmetic.add(12, 13)
+25
+```
 
 ### Standard modules
 
@@ -387,7 +404,7 @@ x = 0
 
 module Sub
 using ..TestPackage
-z = y # ERROR: UndefVarError: y not defined
+z = y # ERROR: UndefVarError: `y` not defined in `Main`
 end
 
 y = 1
@@ -403,7 +420,7 @@ For similar reasons, you cannot use a cyclic ordering:
 module A
 
 module B
-using ..C # ERROR: UndefVarError: C not defined
+using ..C # ERROR: UndefVarError: `C` not defined in `Main.A`
 end
 
 module C
@@ -419,22 +436,28 @@ Large modules can take several seconds to load because executing all of the stat
 often involves compiling a large amount of code.
 Julia creates precompiled caches of the module to reduce this time.
 
-The incremental precompiled module file are created and used automatically when using `import`
-or `using` to load a module.  This will cause it to be automatically compiled the first time
-it is imported. Alternatively, you can manually call [`Base.compilecache(Base.identify_package("modulename"))`](@ref). The resulting
-cache files will be stored in `DEPOT_PATH[1]/compiled/`. Subsequently, the module is automatically
-recompiled upon `using` or `import` whenever any of its dependencies change; dependencies are modules it
+Precompiled module files (sometimes called "cache files") are created and used automatically when `import` or `using` loads a module.  If the cache file(s) do not yet exist, the module will be compiled and saved for future reuse. You can also manually call [`Base.compilecache(Base.identify_package("modulename"))`](@ref) to create these files without loading the module. The resulting
+cache files will be stored in the `compiled` subfolder of `DEPOT_PATH[1]`. If nothing about your system changes,
+such cache files will be used when you load the module with `import` or `using`.
+
+Precompilation cache files store definitions of modules, types, methods, and constants. They may also store method specializations and the code generated for them, but this typically requires that the developer add explicit [`precompile`](@ref) directives or execute workloads that force compilation during the package build.
+
+However, if you update the module's dependencies or change its source code, the module is automatically
+recompiled upon `using` or `import`. Dependencies are modules it
 imports, the Julia build, files it includes, or explicit dependencies declared by [`include_dependency(path)`](@ref)
 in the module file(s).
 
-For file dependencies, a change is determined by examining whether the modification time (`mtime`)
-of each file loaded by `include` or added explicitly by `include_dependency` is unchanged, or equal
-to the modification time truncated to the nearest second (to accommodate systems that can't copy
-mtime with sub-second accuracy). It also takes into account whether the path to the file chosen
+For file dependencies loaded by `include`, a change is determined by examining whether the
+file size (`fsize`) or content (condensed into a hash) is unchanged.
+For file dependencies loaded by `include_dependency` a change is determined by examining whether the modification time (`mtime`)
+is unchanged, or equal to the modification time truncated to the nearest second
+(to accommodate systems that can't copy mtime with sub-second accuracy).
+It also takes into account whether the path to the file chosen
 by the search logic in `require` matches the path that had created the precompile file. It also takes
 into account the set of dependencies already loaded into the current process and won't recompile those
 modules, even if their files change or disappear, in order to avoid creating incompatibilities between
 the running system and the precompile cache.
+Finally, it takes account of changes in any [compile-time preferences](@ref preferences).
 
 If you know that a module is *not* safe to precompile
 (for example, for one of the reasons described below), you should
@@ -576,9 +599,19 @@ A few other points to be aware of:
    an error to do this, but you simply need to be prepared that the system will try to copy some
    of these and to create a single unique instance of others.
 
-It is sometimes helpful during module development to turn off incremental precompilation. The
-command line flag `--compiled-modules={yes|no}` enables you to toggle module precompilation on and
-off. When Julia is started with `--compiled-modules=no` the serialized modules in the compile cache
-are ignored when loading modules and module dependencies. `Base.compilecache` can still be called
-manually. The state of this command line flag is passed to `Pkg.build` to disable automatic
-precompilation triggering when installing, updating, and explicitly building packages.
+It is sometimes helpful during module development to turn off incremental precompilation.
+The command line flag `--compiled-modules={yes|no|existing}` enables you to toggle module
+precompilation on and off. When Julia is started with `--compiled-modules=no` the serialized
+modules in the compile cache are ignored when loading modules and module dependencies. In
+some cases, you may want to load existing precompiled modules, but not create new ones. This
+can be done by starting Julia with `--compiled-modules=existing`. More fine-grained control
+is available with `--pkgimages={yes|no|existing}`, which only affects native-code storage
+during precompilation. `Base.compilecache` can still be called manually. The state of this
+command line flag is passed to `Pkg.build` to disable automatic precompilation triggering
+when installing, updating, and explicitly building packages.
+
+You can also debug some precompilation failures with environment variables. Setting
+`JULIA_VERBOSE_LINKING=true` may help resolve failures in linking shared libraries of
+compiled native code. See the **Developer Documentation** part of the Julia manual, where
+you will find further details in the section documenting Julia's internals under "Package
+Images".
diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md
index b20d0e54f1087..71e49e65b19a8 100644
--- a/doc/src/manual/multi-threading.md
+++ b/doc/src/manual/multi-threading.md
@@ -20,14 +20,14 @@ specified, then `-t`/`--threads` takes precedence.
 
 The number of threads can either be specified as an integer (`--threads=4`) or as `auto`
 (`--threads=auto`), where `auto` tries to infer a useful default number of threads to use
-(see [Command-line Options](@ref command-line-options) for more details).
+(see [Command-line Options](@ref command-line-interface) for more details).
 
 !!! compat "Julia 1.5"
     The `-t`/`--threads` command line argument requires at least Julia 1.5.
     In older versions you must use the environment variable instead.
 
 !!! compat "Julia 1.7"
-    Using `auto` as value of the environment variable `JULIA_NUM_THREADS` requires at least Julia 1.7.
+    Using `auto` as value of the environment variable [`JULIA_NUM_THREADS`](@ref JULIA_NUM_THREADS) requires at least Julia 1.7.
     In older versions, this value is ignored.
 Lets start Julia with 4 threads:
 
@@ -72,6 +72,15 @@ julia> Threads.threadid()
     three processes have 2 threads enabled. For more fine grained control over worker
     threads use [`addprocs`](@ref) and pass `-t`/`--threads` as `exeflags`.
 
+### Multiple GC Threads
+
+The Garbage Collector (GC) can use multiple threads. The amount used is either half the number
+of compute worker threads or configured by either the `--gcthreads` command line argument or by using the
+[`JULIA_NUM_GC_THREADS`](@ref JULIA_NUM_GC_THREADS) environment variable.
+
+!!! compat "Julia 1.10"
+    The `--gcthreads` command line argument requires at least Julia 1.10.
+
 ## [Threadpools](@id man-threadpools)
 
 When a program's threads are busy with many tasks to run, tasks may experience
@@ -93,7 +102,7 @@ Julia may be started with one or more threads reserved to run interactive tasks:
 $ julia --threads 3,1
 ```
 
-The environment variable `JULIA_NUM_THREADS` can also be used similarly:
+The environment variable [`JULIA_NUM_THREADS`](@ref JULIA_NUM_THREADS) can also be used similarly:
 ```bash
 export JULIA_NUM_THREADS=3,1
 ```
@@ -104,73 +113,28 @@ the `:interactive` threadpool:
 ```julia-repl
 julia> using Base.Threads
 
-julia> nthreads()
-4
-
 julia> nthreadpools()
 2
 
 julia> threadpool()
 :default
 
+julia> nthreads(:default)
+3
+
 julia> nthreads(:interactive)
 1
-```
-
-Either or both numbers can be replaced with the word `auto`, which causes
-Julia to choose a reasonable default.
-
-## Communication and synchronization
-
-Although Julia's threads can communicate through shared memory, it is notoriously
-difficult to write correct and data-race free multi-threaded code. Julia's
-[`Channel`](@ref)s are thread-safe and may be used to communicate safely.
-
-### Data-race freedom
 
-You are entirely responsible for ensuring that your program is data-race free,
-and nothing promised here can be assumed if you do not observe that
-requirement. The observed results may be highly unintuitive.
-
-The best way to ensure this is to acquire a lock around any access to data that
-can be observed from multiple threads. For example, in most cases you should
-use the following code pattern:
-
-```julia-repl
-julia> lock(lk) do
-           use(a)
-       end
-
-julia> begin
-           lock(lk)
-           try
-               use(a)
-           finally
-               unlock(lk)
-           end
-       end
+julia> nthreads()
+3
 ```
-where `lk` is a lock (e.g. `ReentrantLock()`) and `a` data.
-
-Additionally, Julia is not memory safe in the presence of a data race. Be very
-careful about reading _any_ data if another thread might write to it!
-Instead, always use the lock pattern above when changing data (such as assigning
-to a global or closure variable) accessed by other threads.
 
-```julia
-Thread 1:
-global b = false
-global a = rand()
-global b = true
-
-Thread 2:
-while !b; end
-bad_read1(a) # it is NOT safe to access `a` here!
+!!! note
+    The zero-argument version of `nthreads` returns the number of threads
+    in the default pool.
 
-Thread 3:
-while !@isdefined(a); end
-bad_read2(a) # it is NOT safe to access `a` here
-```
+Either or both numbers can be replaced with the word `auto`, which causes
+Julia to choose a reasonable default.
 
 ## The `@threads` Macro
 
@@ -223,7 +187,133 @@ julia> a
 
 Note that [`Threads.@threads`](@ref) does not have an optional reduction parameter like [`@distributed`](@ref).
 
-## Atomic Operations
+### Using `@threads` without data-races
+The concept of a data-race is elaborated on in ["Communication and data races between threads"](@ref man-communication-and-data-races). For now, just known that a data race can result in incorrect results and dangerous errors.
+
+Lets say we want to make the function `sum_single` below multithreaded.
+```julia-repl
+julia> function sum_single(a)
+           s = 0
+           for i in a
+               s += i
+           end
+           s
+       end
+sum_single (generic function with 1 method)
+
+julia> sum_single(1:1_000_000)
+500000500000
+```
+
+Simply adding `@threads` exposes a data race with multiple threads reading and writing `s` at the same time.
+```julia-repl
+julia> function sum_multi_bad(a)
+           s = 0
+           Threads.@threads for i in a
+               s += i
+           end
+           s
+       end
+sum_multi_bad (generic function with 1 method)
+
+julia> sum_multi_bad(1:1_000_000)
+70140554652
+```
+
+Note that the result is not `500000500000` as it should be, and will most likely change each evaluation.
+
+To fix this, buffers that are specific to the task may be used to segment the sum into chunks that are race-free.
+Here `sum_single` is reused, with its own internal buffer `s`. The input vector `a` is split into `nthreads()`
+chunks for parallel work. We then use `Threads.@spawn` to create tasks that individually sum each chunk. Finally, we sum the results from each task using `sum_single` again:
+```julia-repl
+julia> function sum_multi_good(a)
+           chunks = Iterators.partition(a, length(a) ÷ Threads.nthreads())
+           tasks = map(chunks) do chunk
+               Threads.@spawn sum_single(chunk)
+           end
+           chunk_sums = fetch.(tasks)
+           return sum_single(chunk_sums)
+       end
+sum_multi_good (generic function with 1 method)
+
+julia> sum_multi_good(1:1_000_000)
+500000500000
+```
+!!! note
+    Buffers should not be managed based on `threadid()` i.e. `buffers = zeros(Threads.nthreads())` because concurrent tasks
+    can yield, meaning multiple concurrent tasks may use the same buffer on a given thread, introducing risk of data races.
+    Further, when more than one thread is available tasks may change thread at yield points, which is known as
+    [task migration](@ref man-task-migration).
+
+Another option is the use of atomic operations on variables shared across tasks/threads, which may be more performant
+depending on the characteristics of the operations.
+
+## [Communication and data-races between threads](@id man-communication-and-data-races)
+
+Although Julia's threads can communicate through shared memory, it is notoriously difficult to write correct and data-race free multi-threaded code. Julia's
+[`Channel`](@ref)s are thread-safe and may be used to communicate safely. There are also sections below that explain how to use [locks](@ref man-using-locks) and [atomics](@ref man-atomic-operations) to avoid data-races.
+
+### Data-race freedom
+
+You are entirely responsible for ensuring that your program is data-race free,
+and nothing promised here can be assumed if you do not observe that
+requirement. The observed results may be highly unintuitive.
+
+If data-races are introduced, Julia is not memory safe. **Be very
+careful about reading _any_ data if another thread might write to it, as it could result in segmentation faults or worse**. Below are a couple of unsafe ways to access global variables from different threads:
+```julia
+Thread 1:
+global b = false
+global a = rand()
+global b = true
+
+Thread 2:
+while !b; end
+bad_read1(a) # it is NOT safe to access `a` here!
+
+Thread 3:
+while !@isdefined(a); end
+bad_read2(a) # it is NOT safe to access `a` here
+```
+
+### [Using locks to avoid data-races](@id man-using-locks)
+An important tool to avoid data-races, and thereby write thread-safe code, is the concept of a "lock". A lock can be locked and unlocked. If a thread has locked a lock, and not unlocked it, it is said to "hold" the lock. If there is only one lock, and we write code the requires holding the lock to access some data, we can ensure that multiple threads will never access the same data simultaneously. Note that the link between a lock and a variable is made by the programmer, and not the program.
+
+For example, we can create a lock `my_lock`, and lock it while we mutate a variable `my_variable`. This is done most simply with the `@lock` macro:
+
+```julia-repl
+julia> my_lock = ReentrantLock();
+
+julia> my_variable = [1, 2, 3];
+
+julia> @lock my_lock my_variable[1] = 100
+100
+```
+
+By using a similar pattern with the same lock and variable, but on another thread, the operations are free from data-races.
+
+We could have performed the operation above with the functional version of `lock`, in the following two ways:
+```julia-repl
+julia> lock(my_lock) do
+           my_variable[1] = 100
+       end
+100
+
+julia> begin
+           lock(my_lock)
+           try
+               my_variable[1] = 100
+           finally
+               unlock(my_lock)
+           end
+       end
+100
+```
+
+All three options are equivalent. Note how the final version requires an explicit `try`-block to ensure that the lock is always unlocked, whereas the first two version do this internally. One should always use the lock pattern above when changing data (such as assigning
+to a global or closure variable) accessed by other threads. Failing to do this could have unforeseen and serious consequences.
+
+### [Atomic Operations](@id man-atomic-operations)
 
 Julia supports accessing and modifying values *atomically*, that is, in a thread-safe way to avoid
 [race conditions](https://en.wikipedia.org/wiki/Race_condition). A value (which must be of a primitive
@@ -267,7 +357,7 @@ avoid the race:
 ```julia-repl
 julia> using Base.Threads
 
-julia> nthreads()
+julia> Threads.nthreads()
 4
 
 julia> acc = Ref(0)
@@ -292,7 +382,7 @@ julia> acc[]
 ```
 
 
-## [Per-field atomics](@id man-atomics)
+#### [Per-field atomics](@id man-atomics)
 
 We can also use atomics on a more granular level using the [`@atomic`](@ref
 Base.@atomic), [`@atomicswap`](@ref Base.@atomicswap), and
@@ -361,10 +451,7 @@ threads in Julia:
     multiple threads where at least one thread modifies the collection
     (common examples include `push!` on arrays, or inserting
     items into a `Dict`).
-  * `@threads` currently uses a static schedule, using all threads and assigning
-    equal iteration counts to each. In the future the default schedule is likely
-    to change to be dynamic.
-  * The schedule used by `@spawn` is nondeterministic and should not be relied on.
+  * The schedule used by [`@spawn`](@ref Threads.@spawn) is nondeterministic and should not be relied on.
   * Compute-bound, non-memory-allocating tasks can prevent garbage collection from
     running in other threads that are allocating memory. In these cases it may
     be necessary to insert a manual call to `GC.safepoint()` to allow GC to run.
@@ -373,9 +460,24 @@ threads in Julia:
     method, and module definitions in parallel.
   * Be aware that finalizers registered by a library may break if threads are enabled.
     This may require some transitional work across the ecosystem before threading
-    can be widely adopted with confidence. See the next section for further details.
+    can be widely adopted with confidence. See the section on
+    [the safe use of finalizers](@ref man-finalizers) for further details.
+
+## [Task Migration](@id man-task-migration)
+
+After a task starts running on a certain thread it may move to a different thread if the task yields.
+
+Such tasks may have been started with [`@spawn`](@ref Threads.@spawn) or [`@threads`](@ref Threads.@threads),
+although the `:static` schedule option for `@threads` does freeze the threadid.
+
+This means that in most cases [`threadid()`](@ref Threads.threadid) should not be treated as constant within a task,
+and therefore should not be used to index into a vector of buffers or stateful objects.
+
+!!! compat "Julia 1.7"
+    Task migration was introduced in Julia 1.7. Before this tasks always remained on the same thread that they were
+    started on.
 
-## Safe use of Finalizers
+## [Safe use of Finalizers](@id man-finalizers)
 
 Because finalizers can interrupt any code, they must be very careful in how
 they interact with any global state. Unfortunately, the main reason that
diff --git a/doc/src/manual/networking-and-streams.md b/doc/src/manual/networking-and-streams.md
index fc62632433850..45bf60a7944d2 100644
--- a/doc/src/manual/networking-and-streams.md
+++ b/doc/src/manual/networking-and-streams.md
@@ -1,9 +1,10 @@
 # Networking and Streams
 
 Julia provides a rich interface to deal with streaming I/O objects such as terminals, pipes and
-TCP sockets. This interface, though asynchronous at the system level, is presented in a synchronous
-manner to the programmer and it is usually unnecessary to think about the underlying asynchronous
-operation. This is achieved by making heavy use of Julia cooperative threading ([coroutine](@ref man-tasks))
+TCP sockets.
+These objects allow data to be sent and received in a stream-like fashion, which means that data is processed sequentially as it becomes available.
+This interface, though asynchronous at the system level, is presented in a synchronous manner to the programmer.
+This is achieved by making heavy use of Julia cooperative threading ([coroutine](@ref man-tasks))
 functionality.
 
 ## Basic Stream I/O
@@ -66,8 +67,8 @@ abcd
 "abcd"
 ```
 
-Note that depending on your terminal settings, your TTY may be line buffered and might thus require
-an additional enter before the data is sent to Julia.
+Note that depending on your terminal settings, your TTY ("teletype terminal") may be line buffered and might thus require an additional enter before `stdin` data is sent to Julia.
+When running Julia from the command line in a TTY, output is sent to the console by default, and standard input is read from the keyboard.
 
 To read every line from [`stdin`](@ref) you can use [`eachline`](@ref):
 
@@ -120,7 +121,28 @@ of common properties.
 
 ## Working with Files
 
-Like many other environments, Julia has an [`open`](@ref) function, which takes a filename and
+You can write content to a file with the `write(filename::String, content)` method:
+
+```julia-repl
+julia> write("hello.txt", "Hello, World!")
+13
+```
+
+_(`13` is the number of bytes written.)_
+
+You can read the contents of a file with the `read(filename::String)` method, or `read(filename::String, String)`
+to the contents as a string:
+
+```julia-repl
+julia> read("hello.txt", String)
+"Hello, World!"
+```
+
+
+### Advanced: streaming files
+
+The `read` and `write` methods above allow you to read and write file contents. Like many other
+environments, Julia also has an [`open`](@ref) function, which takes a filename and
 returns an [`IOStream`](@ref) object that you can use to read and write things from the file. For example,
 if we have a file, `hello.txt`, whose contents are `Hello, World!`:
 
@@ -184,6 +206,24 @@ julia> open("hello.txt") do f
 "HELLO AGAIN."
 ```
 
+If you want to redirect stdout to a file
+
+```# Open file for writing
+out_file = open("output.txt", "w")
+
+# Redirect stdout to file
+redirect_stdout(out_file) do
+    # Your code here
+    println("This output goes to `out_file` via the `stdout` variable.")
+end
+
+# Close file
+close(out_file)
+
+```
+
+Redirecting stdout to a file can help you save and analyze program output, automate processes, and meet compliance requirements.
+
 ## A simple TCP example
 
 Let's jump right in with a simple example involving TCP sockets.
@@ -315,7 +355,6 @@ ip"74.125.226.225"
 
 ## Asynchronous I/O
 
-
 All I/O operations exposed by [`Base.read`](@ref) and [`Base.write`](@ref) can be performed
 asynchronously through the use of [coroutines](@ref man-tasks). You can create a new coroutine to
 read from or write to a stream using the [`@async`](@ref) macro:
@@ -368,7 +407,7 @@ UDP can use special multicast addresses to allow simultaneous communication betw
 
 To transmit data over UDP multicast, simply `recv` on the socket, and the first packet received will be returned. Note that it may not be the first packet that you sent however!
 
-```
+```julia
 using Sockets
 group = ip"228.5.6.7"
 socket = Sockets.UDPSocket()
@@ -384,7 +423,7 @@ close(socket)
 To transmit data over UDP multicast, simply `send` to the socket.
 Notice that it is not necessary for a sender to join the multicast group.
 
-```
+```julia
 using Sockets
 group = ip"228.5.6.7"
 socket = Sockets.UDPSocket()
@@ -397,7 +436,8 @@ close(socket)
 This example gives the same functionality as the previous program, but uses IPv6 as the network-layer protocol.
 
 Listener:
-```
+
+```julia
 using Sockets
 group = Sockets.IPv6("ff05::5:6:7")
 socket = Sockets.UDPSocket()
@@ -409,7 +449,8 @@ close(socket)
 ```
 
 Sender:
-```
+
+```julia
 using Sockets
 group = Sockets.IPv6("ff05::5:6:7")
 socket = Sockets.UDPSocket()
diff --git a/doc/src/manual/noteworthy-differences.md b/doc/src/manual/noteworthy-differences.md
index dc3093ad8db6b..ef47a7635d50a 100644
--- a/doc/src/manual/noteworthy-differences.md
+++ b/doc/src/manual/noteworthy-differences.md
@@ -46,7 +46,7 @@ may trip up Julia users accustomed to MATLAB:
   * A Julia script may contain any number of functions, and all definitions will be externally visible
     when the file is loaded. Function definitions can be loaded from files outside the current working
     directory.
-  * In Julia, reductions such as [`sum`](@ref), [`prod`](@ref), and [`max`](@ref) are performed
+  * In Julia, reductions such as [`sum`](@ref), [`prod`](@ref), and [`maximum`](@ref) are performed
     over every element of an array when called with a single argument, as in `sum(A)`, even if `A`
     has more than one dimension.
   * In Julia, parentheses must be used to call a function with zero arguments, like in [`rand()`](@ref).
@@ -214,6 +214,7 @@ For users coming to Julia from R, these are some noteworthy differences:
     Python's special interpretation of negative indexing, `a[-1]` and `a[-2]`, should be written
     `a[end]` and `a[end-1]` in Julia.
   * Julia requires `end` for indexing until the last element. `x[1:]` in Python is equivalent to `x[2:end]` in Julia.
+  * In Julia, `:` before any object creates a [`Symbol`](@ref) or *quotes* an expression; so, `x[:5]` is same as `x[5]`. If you want to get the first `n` elements of an array, then use range indexing.
   * Julia's range indexing has the format of `x[start:step:stop]`, whereas Python's format is `x[start:(stop+1):step]`. Hence, `x[0:10:2]` in Python is equivalent to `x[1:2:10]` in Julia. Similarly, `x[::-1]` in Python, which refers to the reversed array, is equivalent to `x[end:-1:1]` in Julia.
   * In Julia, ranges can be constructed independently as `start:step:stop`, the same syntax it uses
     in array-indexing.  The `range` function is also supported.
@@ -249,7 +250,8 @@ For users coming to Julia from R, these are some noteworthy differences:
   * There are no classes in Julia. Instead there are structures (mutable or immutable), containing data but no methods.
   * Calling a method of a class instance in Python (`x = MyClass(*args); x.f(y)`) corresponds to a function call in Julia, e.g. `x = MyType(args...); f(x, y)`. In general, multiple dispatch is more flexible and powerful than the Python class system.
   * Julia structures may have exactly one abstract supertype, whereas Python classes can inherit from one or more (abstract or concrete) superclasses.
-  * The logical Julia program structure (Packages and Modules) is independent of the file structure (`include` for additional files), whereas the Python code structure is defined by directories (Packages) and files (Modules).
+  * The logical Julia program structure (Packages and Modules) is independent of the file structure, whereas the Python code structure is defined by directories (Packages) and files (Modules).
+  * In Julia, it is idiomatic to split the text of large modules into multiple files, without introducing a new module per file. The code is reassembled inside a single module in a main file via `include`. While the Python equivalent (`exec`) is not typical for this use (it will silently clobber prior definitions), Julia programs are defined as a unit at the `module` level with `using` or `import`, which will only get executed once when first needed--like `include` in Python. Within those modules, the individual files that make up that module are loaded with `include` by listing them once in the intended order.
   * The ternary operator `x > 0 ? 1 : -1` in Julia corresponds to a conditional expression in Python `1 if x > 0 else -1`.
   * In Julia the `@` symbol refers to a macro, whereas in Python it refers to a decorator.
   * Exception handling in Julia is done using `try` — `catch` — `finally`, instead of `try` — `except` — `finally`. In contrast to Python, it is not recommended to use exception handling as part of the normal workflow in Julia (compared with Python, Julia is faster at ordinary control flow but slower at exception-catching).
@@ -351,6 +353,98 @@ For users coming to Julia from R, these are some noteworthy differences:
     it's more general than that since methods are dispatched on every argument type, not only `this`,
     using the most-specific-declaration rule).
 
+### Julia &hArr; C/C++: Namespaces
+  * C/C++ `namespace`s correspond roughly to Julia `module`s.
+  * There are no private globals or fields in Julia.  Everything is publicly accessible
+    through fully qualified paths (or relative paths, if desired).
+  * `using MyNamespace::myfun` (C++) corresponds roughly to `import MyModule: myfun` (Julia).
+  * `using namespace MyNamespace` (C++) corresponds roughly to `using MyModule` (Julia)
+    * In Julia, only `export`ed symbols are made available to the calling module.
+    * In C++, only elements found in the included (public) header files are made available.
+  * Caveat: `import`/`using` keywords (Julia) also *load* modules (see below).
+  * Caveat: `import`/`using` (Julia) works only at the global scope level (`module`s)
+    * In C++, `using namespace X` works within arbitrary scopes (ex: function scope).
+
+### Julia &hArr; C/C++: Module loading
+  * When you think of a C/C++ "**library**", you are likely looking for a Julia "**package**".
+    * Caveat: C/C++ libraries often house multiple "software modules" whereas Julia
+      "packages" typically house one.
+    * Reminder: Julia `module`s are global scopes (not necessarily "software modules").
+  * **Instead of build/`make` scripts**, Julia uses "Project Environments" (sometimes called
+    either "Project" or "Environment").
+    * Build scripts are only needed for more complex applications
+      (like those needing to compile or download C/C++ executables).
+    * To develop application or project in Julia, you can initialize its root directory
+      as a "Project Environment", and house application-specific code/packages there.
+      This provides good control over project dependencies, and future reproducibility.
+    * Available packages are added to a "Project Environment" with the `Pkg.add()` function or Pkg REPL mode.
+      (This does not **load** said package, however).
+    * The list of available packages (direct dependencies) for a "Project Environment" are
+      saved in its `Project.toml` file.
+    * The *full* dependency information for a "Project Environment" is auto-generated & saved
+      in its `Manifest.toml` file by `Pkg.resolve()`.
+  * Packages ("software modules") available to the "Project Environment" are loaded with
+    `import` or `using`.
+    * In C/C++, you `#include <moduleheader>` to get object/function declarations, and link in
+      libraries when you build the executable.
+    * In Julia, calling using/import again just brings the existing module into scope, but does not load it again
+      (similar to adding the non-standard `#pragma once` to C/C++).
+  * **Directory-based package repositories** (Julia) can be made available by adding repository
+    paths to the `Base.LOAD_PATH` array.
+    * Packages from directory-based repositories do not require the `Pkg.add()` tool prior to
+      being loaded with `import` or `using`. They are simply available to the project.
+    * Directory-based package repositories are the **quickest solution** to developing local
+      libraries of "software modules".
+
+### Julia &hArr; C/C++: Assembling modules
+  * In C/C++, `.c`/`.cpp` files are compiled & added to a library with build/`make` scripts.
+    * In Julia, `import [PkgName]`/`using [PkgName]` statements load `[PkgName].jl` located
+      in a package's `[PkgName]/src/` subdirectory.
+    * In turn, `[PkgName].jl` typically loads associated source files with calls to
+      `include "[someotherfile].jl"`.
+  * `include "./path/to/somefile.jl"` (Julia) is very similar to
+    `#include "./path/to/somefile.jl"` (C/C++).
+    * However `include "..."` (Julia) is not used to include header files (not required).
+    * **Do not use** `include "..."` (Julia) to load code from other "software modules"
+      (use `import`/`using` instead).
+    * `include "path/to/some/module.jl"` (Julia) would instantiate multiple versions of the
+      same code in different modules (creating *distinct* types (etc.) with the *same* names).
+    * `include "somefile.jl"` is typically used to assemble multiple files *within the same
+      Julia package* ("software module"). It is therefore relatively straightforward to ensure
+      file are `include`d only once (No `#ifdef` confusion).
+
+### Julia &hArr; C/C++: Module interface
+  * C++ exposes interfaces using "public" `.h`/`.hpp` files whereas Julia `module`s mark
+    specific symbols that are intended for their users as `public`or `export`ed.
+    * Often, Julia `module`s simply add functionality by generating new "methods" to existing
+      functions (ex: `Base.push!`).
+    * Developers of Julia packages therefore cannot rely on header files for interface
+      documentation.
+    * Interfaces for Julia packages are typically described using docstrings, README.md,
+      static web pages, ...
+  * Some developers choose not to `export` all symbols required to use their package/module,
+    but should still mark unexported user facing symbols as `public`.
+    * Users might be expected to access these components by qualifying functions/structs/...
+      with the package/module name (ex: `MyModule.run_this_task(...)`).
+
+### Julia &hArr; C/C++: Quick reference
+
+| Software Concept   | Julia | C/C++ |
+| :---               | :---  | :---  |
+| unnamed scope      | `begin` ... `end`        | `{` ... `}`                                  |
+| function scope     | `function x()` ... `end` | `int x() {` ... `}`                          |
+| global scope       | `module MyMod` ... `end` | `namespace MyNS {` ... `}`                   |
+| software module    | A Julia "package"        | `.h`/`.hpp` files<br>+compiled `somelib.a`   |
+| assembling<br>software modules | `SomePkg.jl`: ...<br>`import("subfile1.jl")`<br>`import("subfile2.jl")`<br>... | `$(AR) *.o` &rArr; `somelib.a` |
+| import<br>software module | `import SomePkg`  | `#include <somelib>`<br>+link in `somelib.a` |
+| module library     | `LOAD_PATH[]`, \*Git repository,<br>\*\*custom package registry  | more `.h`/`.hpp` files<br>+bigger compiled `somebiglib.a` |
+
+\* The Julia package manager supports registering multiple packages from a single Git repository.<br>
+\* This allows users to house a library of related packages in a single repository.<br>
+\*\* Julia registries are primarily designed to provide versioning \& distribution of packages.<br>
+\*\* Custom package registries can be used to create a type of module library.
+
+
 ## Noteworthy differences from Common Lisp
 
 - Julia uses 1-based indexing for arrays by default, and it can also handle arbitrary [index offsets](@ref man-custom-indices).
diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md
index 8403b71b524a4..68ee5132f8592 100644
--- a/doc/src/manual/performance-tips.md
+++ b/doc/src/manual/performance-tips.md
@@ -90,7 +90,14 @@ On the first call (`@time sum_global()`) the function gets compiled. (If you've
 in this session, it will also compile functions needed for timing.)  You should not take the results
 of this run seriously. For the second run, note that in addition to reporting the time, it also
 indicated that a significant amount of memory was allocated. We are here just computing a sum over all elements in
-a vector of 64-bit floats so there should be no need to allocate memory (at least not on the heap which is what `@time` reports).
+a vector of 64-bit floats so there should be no need to allocate (heap) memory.
+
+We should clarify that what `@time` reports is specifically *heap* allocations, which are typically needed for either
+mutable objects or for creating/growing variable-sized containers (such as `Array` or `Dict`, strings, or "type-unstable"
+objects whose type is only known at runtime).  Allocating (or deallocating) such blocks of memory may require an expensive
+system call (e.g. via `malloc` in C), and they must be tracked for garbage collection.  In contrast, immutable values like
+numbers (except bignums), tuples, and immutable `struct`s can be stored much more cheaply, e.g. in stack or CPU-register
+memory, so one doesn’t typically worry about the performance cost of "allocating" them.
 
 Unexpected memory allocation is almost always a sign of some problem with your code, usually a
 problem with type-stability or creating many small temporary arrays.
@@ -98,8 +105,8 @@ Consequently, in addition to the allocation itself, it's very likely
 that the code generated for your function is far from optimal. Take such indications seriously
 and follow the advice below.
 
-If we instead pass `x` as an argument to the function it no longer allocates memory
-(the allocation reported below is due to running the `@time` macro in global scope)
+In this particular case, the memory allocation is due to the usage of a type-unstable global variable `x`, so if we instead pass `x` as an argument to the function it no longer allocates memory
+(the remaining allocation reported below is due to running the `@time` macro in global scope)
 and is significantly faster after the first call:
 
 ```jldoctest sumarg; setup = :(using Random; Random.seed!(1234)), filter = r"[0-9\.]+ seconds \(.*?\)"
@@ -351,6 +358,27 @@ julia> !isconcretetype(Array), !isabstracttype(Array), isstructtype(Array), !isc
 ```
 In this case, it would be better to avoid declaring `MyType` with a field `a::Array` and instead declare the field as `a::Array{T,N}` or as `a::A`, where `{T,N}` or `A` are parameters of `MyType`.
 
+The previous advice is especially useful when the fields of a struct are meant to be functions, or more generally callable objects.
+It is very tempting to define a struct as follows:
+
+```julia
+struct MyCallableWrapper
+    f::Function
+end
+```
+
+But since `Function` is an abstract type, every call to `wrapper.f` will require dynamic dispatch, due to the type instability of accessing the field `f`.
+Instead, you should write something like:
+
+```julia
+struct MyCallableWrapper{F}
+    f::F
+end
+```
+
+which has nearly identical behavior but will be much faster (because the type instability is eliminated).
+Note that we do not impose `F<:Function`: this means callable objects which do not subtype `Function` are also allowed for the field `f`.
+
 ### Avoid fields with abstract containers
 
 The same best practices also work for container types:
@@ -518,7 +546,7 @@ at the time `k` is compiled.
 
 ### Be aware of when Julia avoids specializing
 
-As a heuristic, Julia avoids automatically specializing on argument type parameters in three
+As a heuristic, Julia avoids automatically [specializing](@ref man-method-specializations) on argument type parameters in three
 specific cases: `Type`, `Function`, and `Vararg`. Julia will always specialize when the argument is
 used within the method, but not if the argument is just passed through to another function. This
 usually has no performance impact at runtime and
@@ -577,7 +605,7 @@ h_vararg(x::Vararg{Any, N}) where {N} = tuple(x...)
 Note that [`@code_typed`](@ref) and friends will always show you specialized code, even if Julia
 would not normally specialize that method call. You need to check the
 [method internals](@ref ast-lowered-method) if you want to see whether specializations are generated
-when argument types are changed, i.e., if `(@which f(...)).specializations` contains specializations
+when argument types are changed, i.e., if `Base.specializations(@which f(...))` contains specializations
 for the argument in question.
 
 ## Break functions into multiple definitions
@@ -604,8 +632,8 @@ end
 This can be written more concisely and efficiently as:
 
 ```julia
-norm(x::Vector) = sqrt(real(dot(x, x)))
-norm(A::Matrix) = maximum(svdvals(A))
+mynorm(x::Vector) = sqrt(real(dot(x, x)))
+mynorm(A::Matrix) = maximum(svdvals(A))
 ```
 
 It should however be noted that the compiler is quite efficient at optimizing away the dead branches in code
@@ -824,10 +852,10 @@ This might be worthwhile when either of the following are true:
   * You require CPU-intensive processing on each `Car`, and it becomes vastly more efficient if you
     know the `Make` and `Model` at compile time and the total number of different `Make` or `Model`
     that will be used is not too large.
-  * You have homogenous lists of the same type of `Car` to process, so that you can store them all
+  * You have homogeneous lists of the same type of `Car` to process, so that you can store them all
     in an `Array{Car{:Honda,:Accord},N}`.
 
-When the latter holds, a function processing such a homogenous array can be productively specialized:
+When the latter holds, a function processing such a homogeneous array can be productively specialized:
 Julia knows the type of each element in advance (all objects in the container have the same concrete
 type), so Julia can "look up" the correct method calls when the function is being compiled (obviating
 the need to check at run-time) and thereby emit efficient code for processing the whole list.
@@ -1007,6 +1035,20 @@ some judgment may be required. However, for "vectorized" (element-wise) function
 syntax `x .= f.(y)` can be used for in-place operations with fused loops and no temporary arrays
 (see the [dot syntax for vectorizing functions](@ref man-vectorized)).
 
+## [Use `MutableArithmetics` for more control over allocation for mutable arithmetic types](@id man-perftips-mutablearithmetics)
+
+Some [`Number`](@ref) subtypes, such as [`BigInt`](@ref) or [`BigFloat`](@ref), may
+be implemented as [`mutable struct`](@ref) types, or they may have mutable
+components. The arithmetic interfaces in Julia `Base` usually opt for convenience
+over efficiency in such cases, so using them in a naive manner may result in
+suboptimal performance. The abstractions of the
+[`MutableArithmetics`](https://juliahub.com/ui/Packages/General/MutableArithmetics)
+package, on the other hand, make it possible to exploit the mutability of such types
+for writing fast code that allocates only as much as necessary. `MutableArithmetics`
+also makes it possible to copy values of mutable arithmetic types explicitly when
+necessary. `MutableArithmetics` is a user package and is not affiliated with the
+Julia project.
+
 ## More dots: Fuse vectorized operations
 
 Julia has a special [dot syntax](@ref man-vectorized) that converts
@@ -1048,11 +1090,37 @@ julia> @time f.(x);
 
 That is, `fdot(x)` is ten times faster and allocates 1/6 the
 memory of `f(x)`, because each `*` and `+` operation in `f(x)` allocates
-a new temporary array and executes in a separate loop. (Of course,
-if you just do `f.(x)` then it is as fast as `fdot(x)` in this
-example, but in many contexts it is more convenient to just sprinkle
-some dots in your expressions rather than defining a separate function
-for each vectorized operation.)
+a new temporary array and executes in a separate loop. In this example
+`f.(x)` is as fast as `fdot(x)` but in many contexts it is more
+convenient to sprinkle some dots in your expressions than to
+define a separate function for each vectorized operation.
+
+## [Fewer dots: Unfuse certain intermediate broadcasts](@id man-performance-unfuse)
+
+The dot loop fusion mentioned above enables concise and idiomatic code to express highly performant operations. However, it is important to remember that the fused operation will be computed at every iteration of the broadcast. This means that in some situations, particularly in the presence of composed or multidimensional broadcasts, an expression with dot calls may be computing a function more times than intended. As an example, say we want to build a random matrix whose rows have Euclidean norm one. We might write something like the following:
+```
+julia> x = rand(1000, 1000);
+
+julia> d = sum(abs2, x; dims=2);
+
+julia> @time x ./= sqrt.(d);
+  0.002049 seconds (4 allocations: 96 bytes)
+```
+This will work. However, this expression will actually recompute `sqrt(d[i])` for *every* element in the row `x[i, :]`, meaning that many more square roots are computed than necessary. To see precisely over which indices the broadcast will iterate, we can call `Broadcast.combine_axes` on the arguments of the fused expression. This will return a tuple of ranges whose entries correspond to the axes of iteration; the product of lengths of these ranges will be the total number of calls to the fused operation.
+
+It follows that when some components of the broadcast expression are constant along an axis—like the `sqrt` along the second dimension in the preceding example—there is potential for a performance improvement by forcibly "unfusing" those components, i.e. allocating the result of the broadcasted operation in advance and reusing the cached value along its constant axis. Some such potential approaches are to use temporary variables, wrap components of a dot expression in `identity`, or use an equivalent intrinsically vectorized (but non-fused) function.
+```
+julia> @time let s = sqrt.(d); x ./= s end;
+  0.000809 seconds (5 allocations: 8.031 KiB)
+
+julia> @time x ./= identity(sqrt.(d));
+  0.000608 seconds (5 allocations: 8.031 KiB)
+
+julia> @time x ./= map(sqrt, d);
+  0.000611 seconds (4 allocations: 8.016 KiB)
+```
+
+Any of these options yields approximately a three-fold speedup at the cost of an allocation; for large broadcastables this speedup can be asymptotically very large.
 
 ## [Consider using views for slices](@id man-performance-views)
 
@@ -1095,42 +1163,41 @@ of the `fview` version of the function.
 
 Arrays are stored contiguously in memory, lending themselves to CPU vectorization
 and fewer memory accesses due to caching. These are the same reasons that it is recommended
-to access arrays in column-major order (see above). Irregular access patterns and non-contiguous views
-can drastically slow down computations on arrays because of non-sequential memory access.
+to access arrays in column-major order (see above). Irregular access patterns and non-contiguous
+views can drastically slow down computations on arrays because of non-sequential memory access.
 
-Copying irregularly-accessed data into a contiguous array before operating on it can result
-in a large speedup, such as in the example below. Here, a matrix and a vector are being accessed at
-800,000 of their randomly-shuffled indices before being multiplied. Copying the views into
-plain arrays speeds up the multiplication even with the cost of the copying operation.
+Copying irregularly-accessed data into a contiguous array before repeated access it can result
+in a large speedup, such as in the example below. Here, a matrix is being accessed at
+randomly-shuffled indices before being multiplied. Copying into plain arrays speeds up the
+multiplication even with the added cost of copying and allocation.
 
 ```julia-repl
 julia> using Random
 
-julia> x = randn(1_000_000);
-
-julia> inds = shuffle(1:1_000_000)[1:800000];
+julia> A = randn(3000, 3000);
 
-julia> A = randn(50, 1_000_000);
+julia> x = randn(2000);
 
-julia> xtmp = zeros(800_000);
+julia> inds = shuffle(1:3000)[1:2000];
 
-julia> Atmp = zeros(50, 800_000);
+julia> function iterated_neural_network(A, x, depth)
+           for _ in 1:depth
+               x .= max.(0, A * x)
+           end
+           argmax(x)
+       end
 
-julia> @time sum(view(A, :, inds) * view(x, inds))
-  0.412156 seconds (14 allocations: 960 bytes)
--4256.759568345458
+julia> @time iterated_neural_network(view(A, inds, inds), x, 10)
+  0.324903 seconds (12 allocations: 157.562 KiB)
+1569
 
-julia> @time begin
-           copyto!(xtmp, view(x, inds))
-           copyto!(Atmp, view(A, :, inds))
-           sum(Atmp * xtmp)
-       end
-  0.285923 seconds (14 allocations: 960 bytes)
--4256.759568345134
+julia> @time iterated_neural_network(A[inds, inds], x, 10)
+  0.054576 seconds (13 allocations: 30.671 MiB, 13.33% gc time)
+1569
 ```
 
-Provided there is enough memory for the copies, the cost of copying the view to an array is
-far outweighed by the speed boost from doing the matrix multiplication on a contiguous array.
+Provided there is enough memory, the cost of copying the view to an array is outweighed
+by the speed boost from doing the repeated matrix multiplications on a contiguous array.
 
 ## Consider StaticArrays.jl for small fixed-size vector/matrix operations
 
@@ -1477,11 +1544,13 @@ julia> function f(x)
        end;
 
 julia> @code_warntype f(3.2)
-Variables
+MethodInstance for f(::Float64)
+  from f(x) @ Main REPL[9]:1
+Arguments
   #self#::Core.Const(f)
   x::Float64
-  y::UNION{FLOAT64, INT64}
-
+Locals
+  y::Union{Float64, Int64}
 Body::Float64
 1 ─      (y = Main.pos(x))
 │   %2 = (y * x)::Float64
@@ -1502,7 +1571,7 @@ At the top, the inferred return type of the function is shown as `Body::Float64`
 The next lines represent the body of `f` in Julia's SSA IR form.
 The numbered boxes are labels and represent targets for jumps (via `goto`) in your code.
 Looking at the body, you can see that the first thing that happens is that `pos` is called and the
-return value has been inferred as the `Union` type `UNION{FLOAT64, INT64}` shown in uppercase since
+return value has been inferred as the `Union` type `Union{Float64, Int64}` shown in uppercase since
 it is a non-concrete type. This means that we cannot know the exact return type of `pos` based on the
 input types. However, the result of `y*x`is a `Float64` no matter if `y` is a `Float64` or `Int64`
 The net result is that `f(x::Float64)` will not be type-unstable
@@ -1524,20 +1593,20 @@ are color highlighted in yellow, instead of red.
 
 The following examples may help you interpret expressions marked as containing non-leaf types:
 
-  * Function body starting with `Body::UNION{T1,T2})`
+  * Function body starting with `Body::Union{T1,T2})`
       * Interpretation: function with unstable return type
       * Suggestion: make the return value type-stable, even if you have to annotate it
 
-  * `invoke Main.g(%%x::Int64)::UNION{FLOAT64, INT64}`
+  * `invoke Main.g(%%x::Int64)::Union{Float64, Int64}`
       * Interpretation: call to a type-unstable function `g`.
       * Suggestion: fix the function, or if necessary annotate the return value
 
-  * `invoke Base.getindex(%%x::Array{Any,1}, 1::Int64)::ANY`
+  * `invoke Base.getindex(%%x::Array{Any,1}, 1::Int64)::Any`
       * Interpretation: accessing elements of poorly-typed arrays
       * Suggestion: use arrays with better-defined types, or if necessary annotate the type of individual
         element accesses
 
-  * `Base.getfield(%%x, :(:data))::ARRAY{FLOAT64,N} WHERE N`
+  * `Base.getfield(%%x, :(:data))::Array{Float64,N} where N`
       * Interpretation: getting a field that is of non-leaf type. In this case, the type of `x`, say `ArrayContainer`, had a
         field `data::Array{T}`. But `Array` needs the dimension `N`, too, to be a concrete type.
       * Suggestion: use concrete types like `Array{T,3}` or `Array{T,N}`, where `N` is now a parameter
@@ -1624,3 +1693,32 @@ will not require this degree of programmer annotation to attain performance.
 In the mean time, some user-contributed packages like
 [FastClosures](https://github.com/c42f/FastClosures.jl) automate the
 insertion of `let` statements as in `abmult3`.
+
+## [Multithreading and linear algebra](@id man-multithreading-linear-algebra)
+
+This section applies to multithreaded Julia code which, in each thread, performs linear algebra operations.
+Indeed, these linear algebra operations involve BLAS / LAPACK calls, which are themselves multithreaded.
+In this case, one must ensure that cores aren't oversubscribed due to the two different types of multithreading.
+
+Julia compiles and uses its own copy of OpenBLAS for linear algebra, whose number of threads is controlled by the environment variable `OPENBLAS_NUM_THREADS`.
+It can either be set as a command line option when launching Julia, or modified during the Julia session with `BLAS.set_num_threads(N)` (the submodule `BLAS` is exported by `using LinearAlgebra`).
+Its current value can be accessed with `BLAS.get_num_threads()`.
+
+When the user does not specify anything, Julia tries to choose a reasonable value for the number of OpenBLAS threads (e.g. based on the platform, the Julia version, etc.).
+However, it is generally recommended to check and set the value manually.
+The OpenBLAS behavior is as follows:
+
+* If `OPENBLAS_NUM_THREADS=1`, OpenBLAS uses the calling Julia thread(s), i.e. it "lives in" the Julia thread that runs the computation.
+* If `OPENBLAS_NUM_THREADS=N>1`, OpenBLAS creates and manages its own pool of threads (`N` in total). There is just one OpenBLAS thread pool shared among all Julia threads.
+
+When you start Julia in multithreaded mode with `JULIA_NUM_THREADS=X`, it is generally recommended to set `OPENBLAS_NUM_THREADS=1`.
+Given the behavior described above, increasing the number of BLAS threads to `N>1` can very easily lead to worse performance, in particular when `N<<X`.
+However this is just a rule of thumb, and the best way to set each number of threads is to experiment on your specific application.
+
+## [Alternative linear algebra backends](@id man-backends-linear-algebra)
+
+As an alternative to OpenBLAS, there exist several other backends that can help with linear algebra performance.
+Prominent examples include [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl) and [AppleAccelerate.jl](https://github.com/JuliaMath/AppleAccelerate.jl).
+
+These are external packages, so we will not discuss them in detail here.
+Please refer to their respective documentations (especially because they have different behaviors than OpenBLAS with respect to multithreading).
diff --git a/doc/src/manual/running-external-programs.md b/doc/src/manual/running-external-programs.md
index e643ffff3ee61..ed3fe85194d93 100644
--- a/doc/src/manual/running-external-programs.md
+++ b/doc/src/manual/running-external-programs.md
@@ -41,7 +41,7 @@ hello
 ```
 
 The `hello` is the output of the `echo` command, sent to [`stdout`](@ref). If the external command fails to run
-successfully, the run method throws an [`ErrorException`](@ref).
+successfully, the run method throws an [`ProcessFailedException`](@ref).
 
 If you want to read the output of the external command, [`read`](@ref) or [`readchomp`](@ref)
 can be used instead:
diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md
index be3f76bb99683..ec146125024b8 100644
--- a/doc/src/manual/strings.md
+++ b/doc/src/manual/strings.md
@@ -48,7 +48,7 @@ to a numeric value representing a
 [Unicode code point](https://en.wikipedia.org/wiki/Code_point).  (Julia packages may define
 other subtypes of `AbstractChar`, e.g. to optimize operations for other
 [text encodings](https://en.wikipedia.org/wiki/Character_encoding).) Here is how `Char` values are
-input and shown:
+input and shown (note that character literals are delimited with single quotes, not double quotes):
 
 ```jldoctest
 julia> c = 'x'
@@ -156,7 +156,7 @@ julia> 'A' + 1
 
 ## String Basics
 
-String literals are delimited by double quotes or triple double quotes:
+String literals are delimited by double quotes or triple double quotes (not single quotes):
 
 ```jldoctest helloworldstring
 julia> str = "Hello, world.\n"
@@ -535,7 +535,9 @@ Constructing strings using concatenation can become a bit cumbersome, however. T
 verbose calls to [`string`](@ref) or repeated multiplications, Julia allows interpolation into string literals
 using `$`, as in Perl:
 
-```jldoctest stringconcat
+```jldoctest
+julia> greet = "Hello"; whom = "world";
+
 julia> "$greet, $whom.\n"
 "Hello, world.\n"
 ```
@@ -770,9 +772,10 @@ are some examples of non-standard string literals. Users and packages may also d
 Further documentation is given in the [Metaprogramming](@ref meta-non-standard-string-literals) section.
 
 ## [Regular Expressions](@id man-regex-literals)
+Sometimes you are not looking for an exact string, but a particular *pattern*. For example, suppose you are trying to extract a single date from a large text file. You don’t know what that date is (that’s why you are searching for it), but you do know it will look something like `YYYY-MM-DD`. Regular expressions allow you to specify these patterns and search for them.
 
-Julia has Perl-compatible regular expressions (regexes), as provided by the [PCRE](https://www.pcre.org/)
-library (a description of the syntax can be found [here](https://www.pcre.org/current/doc/html/pcre2syntax.html)). Regular expressions are related to strings in two ways: the obvious connection is that
+Julia uses version 2 of Perl-compatible regular expressions (regexes), as provided by the [PCRE](https://www.pcre.org/)
+library (see the [PCRE2 syntax description](https://www.pcre.org/current/doc/html/pcre2syntax.html) for more details). Regular expressions are related to strings in two ways: the obvious connection is that
 regular expressions are used to find regular patterns in strings; the other connection is that
 regular expressions are themselves input as strings, which are parsed into a state machine that
 can be used to efficiently search for patterns in strings. In Julia, regular expressions are input
@@ -1037,8 +1040,11 @@ true
 ```
 
 Note the use of the `\Q...\E` escape sequence. All characters between the `\Q` and the `\E`
-are interpreted as literal characters (after string interpolation). This escape sequence can
-be useful when interpolating, possibly malicious, user input.
+are interpreted as literal characters. This is convenient for matching characters that
+would otherwise be regex metacharacters. However, caution is needed when using this feature
+together with string interpolation, since the interpolated string might itself contain
+the `\E` sequence, unexpectedly terminating literal matching. User inputs need to be sanitized
+before inclusion in a regex.
 
 ## [Byte Array Literals](@id man-byte-array-literals)
 
@@ -1138,7 +1144,7 @@ Version numbers can easily be expressed with non-standard string literals of the
 Version number literals create [`VersionNumber`](@ref) objects which follow the
 specifications of [semantic versioning](https://semver.org/),
 and therefore are composed of major, minor and patch numeric values, followed by pre-release and
-build alpha-numeric annotations. For example, `v"0.2.1-rc1+win64"` is broken into major version
+build alphanumeric annotations. For example, `v"0.2.1-rc1+win64"` is broken into major version
 `0`, minor version `2`, patch version `1`, pre-release `rc1` and build `win64`. When entering
 a version literal, everything except the major version number is optional, therefore e.g.  `v"0.2"`
 is equivalent to `v"0.2.0"` (with empty pre-release/build annotations), `v"2"` is equivalent to
@@ -1197,3 +1203,51 @@ Notice that the first two backslashes appear verbatim in the output, since they
 precede a quote character.
 However, the next backslash character escapes the backslash that follows it, and the
 last backslash escapes a quote, since these backslashes appear before a quote.
+
+
+## [Annotated Strings](@id man-annotated-strings)
+
+It is sometimes useful to be able to hold metadata relating to regions of a
+string. A [`AnnotatedString`](@ref Base.AnnotatedString) wraps another string and
+allows for regions of it to be annotated with labelled values (`:label => value`).
+All generic string operations are applied to the underlying string. However,
+when possible, styling information is preserved. This means you can manipulate a
+[`AnnotatedString`](@ref Base.AnnotatedString) —taking substrings, padding them,
+concatenating them with other strings— and the metadata annotations will "come
+along for the ride".
+
+This string type is fundamental to the [StyledStrings stdlib](@ref
+stdlib-styledstrings), which uses `:face`-labelled annotations to hold styling
+information.
+
+When concatenating a [`AnnotatedString`](@ref Base.AnnotatedString), take care to use
+[`annotatedstring`](@ref Base.annotatedstring) instead of [`string`](@ref) if you want
+to keep the string annotations.
+
+```jldoctest
+julia> str = Base.AnnotatedString("hello there",
+               [(1:5, :word => :greeting), (7:11, :label => 1)])
+"hello there"
+
+julia> length(str)
+11
+
+julia> lpad(str, 14)
+"   hello there"
+
+julia> typeof(lpad(str, 7))
+Base.AnnotatedString{String}
+
+julia> str2 = Base.AnnotatedString(" julia", [(2:6, :face => :magenta)])
+" julia"
+
+julia> Base.annotatedstring(str, str2)
+"hello there julia"
+
+julia> str * str2 == Base.annotatedstring(str, str2) # *-concatenation still works
+true
+```
+
+The annotations of a [`AnnotatedString`](@ref Base.AnnotatedString) can be accessed
+and modified via the [`annotations`](@ref Base.annotations) and
+[`annotate!`](@ref Base.annotate!) functions.
diff --git a/doc/src/manual/style-guide.md b/doc/src/manual/style-guide.md
index cbe7e9b94eefc..19b4908927187 100644
--- a/doc/src/manual/style-guide.md
+++ b/doc/src/manual/style-guide.md
@@ -96,7 +96,7 @@ Instead of:
 
 ```julia
 function double(a::AbstractArray{<:Number})
-    for i = firstindex(a):lastindex(a)
+    for i in eachindex(a)
         a[i] *= 2
     end
     return a
@@ -107,7 +107,7 @@ use:
 
 ```julia
 function double!(a::AbstractArray{<:Number})
-    for i = firstindex(a):lastindex(a)
+    for i in eachindex(a)
         a[i] *= 2
     end
     return a
@@ -119,6 +119,10 @@ with both copying and modifying forms (e.g., [`sort`](@ref) and [`sort!`](@ref))
 which are just modifying (e.g., [`push!`](@ref), [`pop!`](@ref), [`splice!`](@ref)).  It
 is typical for such functions to also return the modified array for convenience.
 
+Functions related to IO or making use of random number generators (RNG) are notable exceptions:
+Since these functions almost invariably must mutate the IO or RNG, functions ending with `!` are used to signify a mutation _other_ than mutating the IO or advancing the RNG state.
+For example, `rand(x)` mutates the RNG, whereas `rand!(x)` mutates both the RNG and `x`; similarly, `read(io)` mutates `io`, whereas `read!(io, x)` mutates both arguments.
+
 ## Avoid strange type `Union`s
 
 Types such as `Union{Function,AbstractString}` are often a sign that some design could be cleaner.
@@ -342,7 +346,7 @@ This would provide custom showing of vectors with a specific new element type. W
 this should be avoided. The trouble is that users will expect a well-known type like `Vector()`
 to behave in a certain way, and overly customizing its behavior can make it harder to work with.
 
-## Avoid type piracy
+## [Avoid type piracy](@id avoid-type-piracy)
 
 "Type piracy" refers to the practice of extending or redefining methods in Base
 or other packages on types that you have not defined. In extreme cases, you can crash Julia
@@ -378,7 +382,7 @@ You generally want to use [`isa`](@ref) and [`<:`](@ref) for testing types,
 not `==`. Checking types for exact equality typically only makes sense when comparing to a known
 concrete type (e.g. `T == Float64`), or if you *really, really* know what you're doing.
 
-## Do not write `x->f(x)`
+## Don't write a trivial anonymous function `x->f(x)` for a named function `f`
 
 Since higher-order functions are often called with anonymous functions, it is easy to conclude
 that this is desirable or even necessary. But any function can be passed directly, without being
diff --git a/doc/src/manual/types.md b/doc/src/manual/types.md
index 055569d873d50..f3ce600091aca 100644
--- a/doc/src/manual/types.md
+++ b/doc/src/manual/types.md
@@ -57,9 +57,9 @@ kinds of programming, however, become clearer, simpler, faster and more robust w
 The `::` operator can be used to attach type annotations to expressions and variables in programs.
 There are two primary reasons to do this:
 
-1. As an assertion to help confirm that your program works the way you expect,
+1. As an assertion to help confirm that your program works the way you expect, and
 2. To provide extra type information to the compiler, which can then improve performance in some
-   cases
+   cases.
 
 When appended to an expression computing a value, the `::` operator is read as "is an instance
 of". It can be used anywhere to assert that the value of the expression on the left is an instance
@@ -108,9 +108,26 @@ local x::Int8  # in a local declaration
 x::Int8 = 10   # as the left-hand side of an assignment
 ```
 
-and applies to the whole current scope, even before the declaration. Currently, type declarations
-cannot be used in global scope, e.g. in the REPL, since Julia does not yet have constant-type
-globals.
+and applies to the whole current scope, even before the declaration.
+
+As of Julia 1.8, type declarations can now be used in global scope i.e.
+type annotations can be added to global variables to make accessing them type stable.
+```julia
+julia> x::Int = 10
+10
+
+julia> x = 3.5
+ERROR: InexactError: Int64(3.5)
+
+julia> function foo(y)
+           global x = 15.8    # throws an error when foo is called
+           return x + y
+       end
+foo (generic function with 1 method)
+
+julia> foo(10)
+ERROR: InexactError: Int64(15.8)
+```
 
 Declarations can also be attached to function definitions:
 
@@ -230,8 +247,8 @@ default method by many combinations of concrete types. Thanks to multiple dispat
 has full control over whether the default or more specific method is used.
 
 An important point to note is that there is no loss in performance if the programmer relies on
-a function whose arguments are abstract types, because it is recompiled for each tuple of argument
-concrete types with which it is invoked. (There may be a performance issue, however, in the case
+a function whose arguments are abstract types, because it is recompiled for each tuple of concrete
+argument types with which it is invoked. (There may be a performance issue, however, in the case
 of function arguments that are containers of abstract types; see [Performance Tips](@ref man-performance-abstract-container).)
 
 ## Primitive Types
@@ -409,6 +426,9 @@ There is much more to say about how instances of composite types are created, bu
 depends on both [Parametric Types](@ref) and on [Methods](@ref), and is sufficiently important
 to be addressed in its own section: [Constructors](@ref man-constructors).
 
+For many user-defined types `X`, you may want to define a method [`Base.broadcastable(x::X) = Ref(x)`](@ref man-interfaces-broadcasting)
+so that instances of that type act as 0-dimensional "scalars" for [broadcasting](@ref Broadcasting).
+
 ## Mutable Composite Types
 
 If a composite type is declared with `mutable struct` instead of `struct`, then instances of
@@ -958,24 +978,29 @@ alias for `Tuple{Vararg{T,N}}`, i.e. a tuple type containing exactly `N` element
 
 Named tuples are instances of the [`NamedTuple`](@ref) type, which has two parameters: a tuple of
 symbols giving the field names, and a tuple type giving the field types.
+For convenience, `NamedTuple` types are printed using the [`@NamedTuple`](@ref) macro which provides a
+convenient `struct`-like syntax for declaring these types via `key::Type` declarations,
+where an omitted `::Type` corresponds to `::Any`.
+
 
 ```jldoctest
-julia> typeof((a=1,b="hello"))
-NamedTuple{(:a, :b), Tuple{Int64, String}}
+julia> typeof((a=1,b="hello")) # prints in macro form
+@NamedTuple{a::Int64, b::String}
+
+julia> NamedTuple{(:a, :b), Tuple{Int64, String}} # long form of the type
+@NamedTuple{a::Int64, b::String}
 ```
 
-The [`@NamedTuple`](@ref) macro provides a more convenient `struct`-like syntax for declaring
-`NamedTuple` types via `key::Type` declarations, where an omitted `::Type` corresponds to `::Any`.
+The `begin ... end` form of the `@NamedTuple` macro allows the declarations to be
+split across multiple lines (similar to a struct declaration), but is otherwise equivalent:
 
-```jldoctest
-julia> @NamedTuple{a::Int, b::String}
-NamedTuple{(:a, :b), Tuple{Int64, String}}
 
+```jldoctest
 julia> @NamedTuple begin
            a::Int
            b::String
        end
-NamedTuple{(:a, :b), Tuple{Int64, String}}
+@NamedTuple{a::Int64, b::String}
 ```
 
 A `NamedTuple` type can be used as a constructor, accepting a single tuple argument.
@@ -983,10 +1008,10 @@ The constructed `NamedTuple` type can be either a concrete type, with both param
 or a type that specifies only field names:
 
 ```jldoctest
-julia> @NamedTuple{a::Float32,b::String}((1,""))
+julia> @NamedTuple{a::Float32,b::String}((1, ""))
 (a = 1.0f0, b = "")
 
-julia> NamedTuple{(:a, :b)}((1,""))
+julia> NamedTuple{(:a, :b)}((1, ""))
 (a = 1, b = "")
 ```
 
@@ -1313,6 +1338,16 @@ type -- either [`Int32`](@ref) or [`Int64`](@ref).
 reflects the size of a native pointer on that machine, the floating point register sizes
 are specified by the IEEE-754 standard.)
 
+Type aliases may be parametrized:
+
+```jldoctest
+julia> const Family{T} = Set{T}
+Set
+
+julia> Family{Char} === Set{Char}
+true
+```
+
 ## Operations on Types
 
 Since types in Julia are themselves objects, ordinary functions can operate on them. Some functions
@@ -1524,7 +1559,7 @@ when the `:compact` property is set to `true`, falling back to the long
 representation if the property is `false` or absent:
 ```jldoctest polartype
 julia> function Base.show(io::IO, z::Polar)
-           if get(io, :compact, false)
+           if get(io, :compact, false)::Bool
                print(io, z.r, "ℯ", z.Θ, "im")
            else
                print(io, z.r, " * exp(", z.Θ, "im)")
@@ -1595,5 +1630,5 @@ in unfavorable cases, you can easily end up making the performance of your code
  In particular, you would never want to write actual code as illustrated above.  For more information
 about the proper (and improper) uses of `Val`, please read [the more extensive discussion in the performance tips](@ref man-performance-value-type).
 
-[^1]: "Small" is defined by the `MAX_UNION_SPLITTING` constant, which is currently set to 4.
+[^1]: "Small" is defined by the `max_union_splitting` configuration, which currently defaults to 4.
 [^2]: A few popular languages have singleton types, including Haskell, Scala and Ruby.
diff --git a/doc/src/manual/unicode-input.md b/doc/src/manual/unicode-input.md
index 7539e75bb4f24..db1bd69c2e480 100644
--- a/doc/src/manual/unicode-input.md
+++ b/doc/src/manual/unicode-input.md
@@ -52,11 +52,12 @@ function fix_combining_chars(char)
     return cat == 6 || cat == 8 ? "$NBSP$char$NBSP" : "$char"
 end
 
-
 function table_entries(completions, unicode_dict)
-    entries = [[
-        "Code point(s)", "Character(s)",
-        "Tab completion sequence(s)", "Unicode name(s)"
+    entries = Any[Any[
+        ["Code point(s)"],
+        ["Character(s)"],
+        ["Tab completion sequence(s)"],
+        ["Unicode name(s)"],
     ]]
     for (chars, inputs) in sort!(collect(completions), by = first)
         code_points, unicode_names, characters = String[], String[], String[]
@@ -65,12 +66,21 @@ function table_entries(completions, unicode_dict)
             push!(unicode_names, get(unicode_dict, UInt32(char), "(No Unicode name)"))
             push!(characters, isempty(characters) ? fix_combining_chars(char) : "$char")
         end
+        inputs_md = []
+        for (i, input) in enumerate(inputs)
+            i > 1 && push!(inputs_md, ", ")
+            push!(inputs_md, Markdown.Code("", input))
+        end
         push!(entries, [
-            join(code_points, " + "), join(characters),
-            join(inputs, ", "), join(unicode_names, " + ")
+            [join(code_points, " + ")],
+            [join(characters)],
+            inputs_md,
+            [join(unicode_names, " + ")],
         ])
     end
-    return Markdown.Table(entries, [:l, :l, :l, :l])
+    table = Markdown.Table(entries, [:l, :c, :l, :l])
+    # We also need to wrap the Table in a Markdown.MD "document"
+    return Markdown.MD([table])
 end
 
 table_entries(
diff --git a/doc/src/manual/variables-and-scoping.md b/doc/src/manual/variables-and-scoping.md
index ca6ebc2157b71..6a6f176650b2a 100644
--- a/doc/src/manual/variables-and-scoping.md
+++ b/doc/src/manual/variables-and-scoping.md
@@ -33,7 +33,7 @@ Notably missing from this table are
 which do *not* introduce new scopes.
 The three types of scopes follow somewhat different rules which will be explained below.
 
-Julia uses [lexical scoping](https://en.wikipedia.org/wiki/Scope_%28computer_science%29#Lexical_scoping_vs._dynamic_scoping),
+Julia uses [lexical scoping](https://en.wikipedia.org/wiki/Scope_(computer_science)#Lexical_scope_vs._dynamic_scope),
 meaning that a function's scope does not inherit from its caller's scope, but from the scope in
 which the function was defined. For example, in the following code the `x` inside `foo` refers
 to the `x` in the global scope of its module `Bar`:
@@ -90,7 +90,8 @@ julia> module B
 julia> module D
            b = a # errors as D's global scope is separate from A's
        end;
-ERROR: UndefVarError: a not defined
+ERROR: UndefVarError: `a` not defined in `D`
+Suggestion: check for spelling errors or missing imports.
 ```
 
 If a top-level expression contains a variable declaration with keyword `local`,
@@ -111,7 +112,7 @@ x = 1
 
 Note that the interactive prompt (aka REPL) is in the global scope of the module `Main`.
 
-## Local Scope
+## [Local Scope](@id local-scope)
 
 A new local scope is introduced by most code blocks (see above [table](@ref
 man-scope-table) for a complete list). If such a block is syntactically nested
@@ -187,7 +188,7 @@ julia> greet()
 hello
 
 julia> x # global
-ERROR: UndefVarError: x not defined
+ERROR: UndefVarError: `x` not defined in `Main`
 ```
 
 Inside of the `greet` function, the assignment `x = "hello"` causes `x` to be a new local variable
@@ -256,7 +257,7 @@ julia> sum_to(10)
 55
 
 julia> s # global
-ERROR: UndefVarError: s not defined
+ERROR: UndefVarError: `s` not defined in `Main`
 ```
 
 Since `s` is local to the function `sum_to`, calling the function has no effect on the global
@@ -343,7 +344,7 @@ hello
 hello
 
 julia> x
-ERROR: UndefVarError: x not defined
+ERROR: UndefVarError: `x` not defined in `Main`
 ```
 
 Since the global `x` is not defined when the `for` loop is evaluated, the first clause of the soft
@@ -408,7 +409,7 @@ julia> code = """
 julia> include_string(Main, code)
 ┌ Warning: Assignment to `s` in soft scope is ambiguous because a global variable by the same name exists: `s` will be treated as a new local. Disambiguate by using `local s` to suppress this warning or `global s` to assign to the existing global variable.
 └ @ string:4
-ERROR: LoadError: UndefVarError: s not defined
+ERROR: LoadError: UndefVarError: `s` not defined in local scope
 ```
 
 Here we use [`include_string`](@ref), to evaluate `code` as though it were the contents of a file.
@@ -559,7 +560,7 @@ julia> let x = 1, z
            println("z: $z") # errors as z has not been assigned yet but is local
        end
 x: 1, y: -1
-ERROR: UndefVarError: z not defined
+ERROR: UndefVarError: `z` not defined in local scope
 ```
 
 The assignments are evaluated in order, with each right-hand side evaluated in the scope before
diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md
index 0dfc4f508577f..75c2163896d9c 100644
--- a/doc/src/manual/variables.md
+++ b/doc/src/manual/variables.md
@@ -59,10 +59,10 @@ name `δ` can be entered by typing `\delta`-*tab*, or even `α̂⁽²⁾` by `\a
 that you don't know how to type, the REPL help will tell you: just type `?` and
 then paste the symbol.)
 
-Julia will even let you redefine built-in constants and functions if needed (although
-this is not recommended to avoid potential confusions):
+Julia will even let you shadow existing exported constants and functions with local ones
+(although this is not recommended to avoid potential confusions):
 
-```jldoctest
+```jldoctest; filter = r"with \d+ methods"
 julia> pi = 3
 3
 
@@ -71,6 +71,12 @@ julia> pi
 
 julia> sqrt = 4
 4
+
+julia> length() = 5
+length (generic function with 1 method)
+
+julia> Base.length
+length (generic function with 79 methods)
 ```
 
 However, if you try to redefine a built-in constant or function already in use, Julia will give
@@ -81,7 +87,7 @@ julia> pi
 π = 3.1415926535897...
 
 julia> pi = 3
-ERROR: cannot assign a value to imported variable MathConstants.pi from module Main
+ERROR: cannot assign a value to imported variable Base.pi from module Main
 
 julia> sqrt(100)
 10.0
@@ -111,16 +117,17 @@ variable name. For example, if `+ᵃ` is an operator, then `+ᵃx` must be writt
 it from `+ ᵃx` where `ᵃx` is the variable name.
 
 
-A particular class of variable names is one that contains only underscores. These identifiers can only be assigned values but cannot be used to assign values to other variables.
-More technically, they can only be used as an [L-value](https://en.wikipedia.org/wiki/Value_(computer_science)#lrvalue), but not as an
- [R-value](https://en.wikipedia.org/wiki/R-value):
+A particular class of variable names is one that contains only underscores. These identifiers are write-only. I.e. they can only be assigned values, which are immediately discarded, and their values cannot be used in any way.
 
 ```julia-repl
 julia> x, ___ = size([2 2; 1 1])
 (2, 2)
 
 julia> y = ___
-ERROR: syntax: all-underscore identifier used as rvalue
+ERROR: syntax: all-underscore identifiers are write-only and their values cannot be used in expressions
+
+julia> println(___)
+ERROR: syntax: all-underscore identifiers are write-only and their values cannot be used in expressions
 ```
 
 The only explicitly disallowed names for variables are the names of the built-in [Keywords](@ref Keywords):
@@ -135,7 +142,7 @@ ERROR: syntax: unexpected "="
 
 Some Unicode characters are considered to be equivalent in identifiers.
 Different ways of entering Unicode combining characters (e.g., accents)
-are treated as equivalent (specifically, Julia identifiers are [NFC](http://www.macchiato.com/unicode/nfc-faq)-normalized).
+are treated as equivalent (specifically, Julia identifiers are [NFC](https://en.wikipedia.org/wiki/Unicode_equivalence).
 Julia also includes a few non-standard equivalences for characters that are
 visually similar and are easily entered by some input methods. The Unicode
 characters `ɛ` (U+025B: Latin small letter open e) and `µ` (U+00B5: micro sign)
@@ -145,6 +152,81 @@ are treated as equivalent to the corresponding Greek letters. The middle dot
 treated as the mathematical dot operator `⋅` (U+22C5).
 The minus sign `−` (U+2212) is treated as equivalent to the hyphen-minus sign `-` (U+002D).
 
+## [Assignment expressions and assignment versus mutation](@id man-assignment-expressions)
+
+An assignment `variable = value` "binds" the name `variable` to the `value` computed
+on the right-hand side, and the whole assignment is treated by Julia as an expression
+equal to the right-hand-side `value`.  This means that assignments can be *chained*
+(the same `value` assigned to multiple variables with `variable1 = variable2 = value`)
+or used in other expressions, and is also why their result is shown in the REPL as
+the value of the right-hand side.  (In general, the REPL displays the value of whatever
+expression you evaluate.)  For example, here the value `4` of `b = 2+2` is
+used in another arithmetic operation and assignment:
+
+```jldoctest
+julia> a = (b = 2+2) + 3
+7
+
+julia> a
+7
+
+julia> b
+4
+```
+
+A common confusion is the distinction between *assignment* (giving a new "name" to a value)
+and *mutation* (changing a value).  If you run `a = 2` followed by `a = 3`, you have changed
+the "name" `a` to refer to a new value `3` … you haven't changed the number `2`, so `2+2`
+will still give `4` and not `6`!   This distinction becomes more clear when dealing with
+*mutable* types like [arrays](@ref lib-arrays), whose contents *can* be changed:
+
+```jldoctest mutation_vs_rebind
+julia> a = [1,2,3] # an array of 3 integers
+3-element Vector{Int64}:
+ 1
+ 2
+ 3
+
+julia> b = a   # both b and a are names for the same array!
+3-element Vector{Int64}:
+ 1
+ 2
+ 3
+```
+
+Here, the line `b = a` does *not* make a copy of the array `a`, it simply binds the name
+`b` to the *same* array `a`: both `b` and `a` "point" to one array `[1,2,3]` in memory.
+In contrast, an assignment `a[i] = value` *changes* the *contents* of the array, and the
+modified array will be visible through both the names `a` and `b`:
+
+```jldoctest mutation_vs_rebind
+julia> a[1] = 42     # change the first element
+42
+
+julia> a = 3.14159   # a is now the name of a different object
+3.14159
+
+julia> b   # b refers to the original array object, which has been mutated
+3-element Vector{Int64}:
+ 42
+  2
+  3
+```
+That is, `a[i] = value` (an alias for [`setindex!`](@ref)) *mutates* an existing array object
+in memory, accessible via either `a` or `b`.  Subsequently setting `a = 3.14159`
+does not change this array, it simply binds `a` to a different object; the array is still
+accessible via `b`. Another common syntax to mutate an existing object is
+`a.field = value` (an alias for [`setproperty!`](@ref)), which can be used to change
+a [`mutable struct`](@ref).  There is also mutation via dot assignment, for example
+`b .= 5:7` (which mutates our array `b` in-place to contain `[5,6,7]`), as part of Julia's
+[vectorized "dot" syntax](@ref man-dot-operators).
+
+When you call a [function](@ref man-functions) in Julia, it behaves as if you *assigned*
+the argument values to new variable names corresponding to the function arguments, as discussed
+in [Argument-Passing Behavior](@ref man-argument-passing).  (By [convention](@ref man-punctuation),
+functions that mutate one or more of their arguments have names ending with `!`.)
+
+
 ## Stylistic Conventions
 
 While Julia imposes few restrictions on valid names, it has become useful to adopt the following
diff --git a/doc/src/manual/workflow-tips.md b/doc/src/manual/workflow-tips.md
index 4085a51ff9131..c3bbbbae8146f 100644
--- a/doc/src/manual/workflow-tips.md
+++ b/doc/src/manual/workflow-tips.md
@@ -10,57 +10,40 @@ your experience at the command line.
 
 ### A basic editor/REPL workflow
 
-The most basic Julia workflows involve using a text editor in conjunction with the `julia` command
-line. A common pattern includes the following elements:
+The most basic Julia workflows involve using a text editor in conjunction with the `julia` command line.
 
-  * **Put code under development in a temporary module.** Create a file, say `Tmp.jl`, and include
-    within it
+Create a file, say `Tmp.jl`, and include within it
+```julia
+module Tmp
 
-    ```julia
-    module Tmp
-    export say_hello
+say_hello() = println("Hello!")
 
-    say_hello() = println("Hello!")
+# Your other definitions here
 
-    # your other definitions here
+end # module
 
-    end
-    ```
-  * **Put your test code in another file.** Create another file, say `tst.jl`, which looks like
+using .Tmp
+```
+Then, in the same directory, start the Julia REPL (using the `julia` command).
+Run the new file as follows:
+```
+julia> include("Tmp.jl")
 
-    ```julia
-    include("Tmp.jl")
-    import .Tmp
-    # using .Tmp # we can use `using` to bring the exported symbols in `Tmp` into our namespace
+julia> Tmp.say_hello()
+Hello!
+```
+Explore ideas in the REPL. Save good ideas in `Tmp.jl`.
+To reload the file after it has been changed, just `include` it again.
 
-    Tmp.say_hello()
-    # say_hello()
+The key in the above is that your code is encapsulated in a module.
+That allows you to edit `struct` definitions and remove methods, without restarting Julia.
 
-    # your other test code here
-    ```
+(Explanation: `struct`s cannot be edited after definition, nor can methods be deleted.
+But you _can_ overwrite the definition of a module, which is what we do when we re-`include("Tmp.jl")`).
 
-    and includes tests for the contents of `Tmp`.
-    Alternatively, you can wrap the contents of your test file in a module, as
+In addition, the encapsulation of code in a module protects it from being influenced
+by previous state in the REPL, protecting you from hard-to-detect errors.
 
-    ```julia
-    module Tst
-        include("Tmp.jl")
-        import .Tmp
-        #using .Tmp
-
-        Tmp.say_hello()
-        # say_hello()
-
-        # your other test code here
-    end
-    ```
-
-    The advantage is that your testing code is now contained in a module and does not use the global scope in `Main` for
-    definitions, which is a bit more tidy.
-
-  * `include` the `tst.jl` file in the Julia REPL with `include("tst.jl")`.
-
-  * **Lather. Rinse. Repeat.** Explore ideas at the `julia` command prompt. Save good ideas in `tst.jl`. To execute `tst.jl` after it has been changed, just `include` it again.
 
 ## Browser-based workflow
 
diff --git a/doc/src/tutorials/creating-packages.md b/doc/src/tutorials/creating-packages.md
new file mode 100644
index 0000000000000..d1f79e8c88a9d
--- /dev/null
+++ b/doc/src/tutorials/creating-packages.md
@@ -0,0 +1,634 @@
+# [Creating Packages](@id creating-packages-tutorial)
+
+## Generating files for a package
+
+!!! note
+    The [PkgTemplates](https://github.com/invenia/PkgTemplates.jl) package offers an easy, repeatable, and
+    customizable way to generate the files for a new package. It can also generate files needed for Documentation, CI, etc.
+    We recommend that you use PkgTemplates for creating
+    new packages instead of using the minimal `pkg> generate` functionality described below.
+
+To generate the bare minimum files for a new package, use `pkg> generate`.
+
+```julia-repl
+(@v1.8) pkg> generate HelloWorld
+```
+
+This creates a new project `HelloWorld` in a subdirectory by the same name, with the following files (visualized with the external [`tree` command](https://linux.die.net/man/1/tree)):
+
+```julia-repl
+shell> tree HelloWorld/
+HelloWorld/
+├── Project.toml
+└── src
+    └── HelloWorld.jl
+
+2 directories, 2 files
+```
+
+The `Project.toml` file contains the name of the package, its unique UUID, its version, the authors and potential dependencies:
+
+```toml
+name = "HelloWorld"
+uuid = "b4cd1eb8-1e24-11e8-3319-93036a3eb9f3"
+version = "0.1.0"
+authors = ["Some One <someone@email.com>"]
+
+[deps]
+```
+
+The content of `src/HelloWorld.jl` is:
+
+```julia
+module HelloWorld
+
+greet() = print("Hello World!")
+
+end # module
+```
+
+We can now activate the project by using the path to the directory where it is installed, and load the package:
+
+```julia-repl
+pkg> activate ./HelloWorld
+
+julia> import HelloWorld
+
+julia> HelloWorld.greet()
+Hello World!
+```
+
+For the rest of the tutorial we enter inside the directory of the project, for convenience:
+
+```julia-repl
+julia> cd("HelloWorld")
+```
+
+## Adding dependencies to the project
+
+Let’s say we want to use the standard library package `Random` and the registered package `JSON` in our project.
+We simply `add` these packages (note how the prompt now shows the name of the newly generated project,
+since we `activate`d it):
+
+```julia-repl
+(HelloWorld) pkg> add Random JSON
+   Resolving package versions...
+    Updating `~/HelloWorld/Project.toml`
+  [682c06a0] + JSON v0.21.3
+  [9a3f8284] + Random
+    Updating `~/HelloWorld/Manifest.toml`
+  [682c06a0] + JSON v0.21.3
+  [69de0a69] + Parsers v2.4.0
+  [ade2ca70] + Dates
+ ...
+```
+
+Both `Random` and `JSON` got added to the project’s `Project.toml` file, and the resulting dependencies got added to the `Manifest.toml` file.
+The resolver has installed each package with the highest possible version, while still respecting the compatibility that each package enforces on its dependencies.
+
+We can now use both `Random` and `JSON` in our project. Changing `src/HelloWorld.jl` to
+
+```julia
+module HelloWorld
+
+import Random
+import JSON
+
+greet() = print("Hello World!")
+greet_alien() = print("Hello ", Random.randstring(8))
+
+end # module
+```
+
+and reloading the package, the new `greet_alien` function that uses `Random` can be called:
+
+```julia-repl
+julia> HelloWorld.greet_alien()
+Hello aT157rHV
+```
+
+## Defining a public API
+
+If you want your package to be useful to other packages and you want folks to be able to
+easily update to newer version of your package when they come out, it is important to
+document what behavior will stay consistent across updates.
+
+Unless you note otherwise, the public API of your package is defined as all the behavior you
+describe about public symbols. A public symbol is a symbol that is exported from your
+package with the `export` keyword or marked as public with the `public` keyword. When you
+change the behavior of something that was previously public so that the new
+version no longer conforms to the specifications provided in the old version, you should
+adjust your package version number according to [Julia's variant on SemVer](#Version-specifier-format).
+If you would like to include a symbol in your public API without exporting it into the
+global namespace of folks who call `using YourPackage`, you should mark that symbol as
+public with `public that_symbol`. Symbols marked as public with the `public` keyword are
+just as public as those marked as public with the `export` keyword, but when folks call
+`using YourPackage`, they will still have to qualify access to those symbols with
+`YourPackage.that_symbol`.
+
+Let's say we would like our `greet` function to be part of the public API, but not the
+`greet_alien` function. We could the write the following and release it as version `1.0.0`.
+
+```julia
+module HelloWorld
+
+export greet
+
+import Random
+import JSON
+
+"Writes a friendly message."
+greet() = print("Hello World!")
+
+"Greet an alien by a randomly generated name."
+greet_alien() = print("Hello ", Random.randstring(8))
+
+end # module
+```
+
+Then, if we change `greet` to
+
+```julia
+"Writes a friendly message that is exactly three words long."
+greet() = print("Hello Lovely World!")
+```
+
+We would release the new version as `1.1.0`. This is not breaking
+because the new implementation conforms to the old documentation, but
+it does add a new feature, that the message must be three words long.
+
+Later, we may wish to change `greet_alien` to
+
+```julia
+"Greet an alien by the name of \"Zork\"."
+greet_alien() = print("Hello Zork")
+```
+
+And also export it by changing
+
+```julia
+export greet
+```
+
+to
+
+```julia
+export greet, greet_alien
+```
+
+We should release this new version as `1.2.0` because it adds a new feature
+`greet_alien` to the public API. Even though `greet_alien` was documented before
+and the new version does not conform to the old documentation, this is not breaking
+because the old documentation was not attached to a symbol that was exported
+at the time so that documentation does not apply across released versions.
+
+However, if we now wish to change `greet` to
+
+```julia
+"Writes a friendly message that is exactly four words long."
+greet() = print("Hello very lovely world")
+```
+
+we would need to release the new version as `2.0.0`. In version `1.1.0`, we specified that
+the greeting would be three words long, and because `greet` was exported, that description
+also applies to all future versions until the next breaking release. Because this new
+version does not conform to the old specification, it must be tagged as a breaking change.
+
+Please note that version numbers are free and unlimited. It is okay to use lots of them
+(e.g. version `6.62.8`).
+
+## Adding a build step to the package
+
+The build step is executed the first time a package is installed or when explicitly invoked with `build`.
+A package is built by executing the file `deps/build.jl`.
+
+```julia-repl
+julia> mkpath("deps");
+
+julia> write("deps/build.jl",
+             """
+             println("I am being built...")
+             """);
+
+(HelloWorld) pkg> build
+  Building HelloWorld → `deps/build.log`
+ Resolving package versions...
+
+julia> print(readchomp("deps/build.log"))
+I am being built...
+```
+
+If the build step fails, the output of the build step is printed to the console
+
+```julia-repl
+julia> write("deps/build.jl",
+             """
+             error("Ooops")
+             """);
+
+(HelloWorld) pkg> build
+    Building HelloWorld → `~/HelloWorld/deps/build.log`
+ERROR: Error building `HelloWorld`:
+ERROR: LoadError: Ooops
+Stacktrace:
+ [1] error(s::String)
+   @ Base ./error.jl:35
+ [2] top-level scope
+   @ ~/HelloWorld/deps/build.jl:1
+ [3] include(fname::String)
+   @ Base.MainInclude ./client.jl:476
+ [4] top-level scope
+   @ none:5
+in expression starting at /home/kc/HelloWorld/deps/build.jl:1
+```
+
+!!! warning
+    A build step should generally not create or modify any files in the package directory. If you need to store some files
+    from the build step, use the [Scratch.jl](https://github.com/JuliaPackaging/Scratch.jl) package.
+
+## [Adding tests to the package](@id adding-tests-to-packages)
+
+When a package is tested the file `test/runtests.jl` is executed:
+
+```julia-repl
+julia> mkpath("test");
+
+julia> write("test/runtests.jl",
+             """
+             println("Testing...")
+             """);
+
+(HelloWorld) pkg> test
+   Testing HelloWorld
+ Resolving package versions...
+Testing...
+   Testing HelloWorld tests passed
+```
+
+Tests are run in a new Julia process, where the package itself, and any
+test-specific dependencies, are available, see below.
+
+
+!!! warning
+    Tests should generally not create or modify any files in the package directory. If you need to store some files
+    from the build step, use the [Scratch.jl](https://github.com/JuliaPackaging/Scratch.jl) package.
+
+### Test-specific dependencies
+
+There are two ways of adding test-specific dependencies (dependencies that are not dependencies of the package but will still be available to
+load when the package is tested).
+
+#### `target` based test specific dependencies
+
+Using this method of adding test-specific dependencies, the packages are added under an `[extras]` section and to a test target,
+e.g. to add `Markdown` and `Test` as test dependencies, add the following to the `Project.toml` file:
+
+```toml
+[extras]
+Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Markdown", "Test"]
+```
+
+Note that the only supported targets are `test` and `build`, the latter of which (not recommended) can be used
+for any `deps/build.jl` scripts.
+
+#### Alternative approach: `test/Project.toml` file test specific dependencies
+
+!!! note
+    The exact interaction between `Project.toml`, `test/Project.toml` and their corresponding
+    `Manifest.toml`s are not fully worked out and may be subject to change in future versions.
+    The older method of adding test-specific dependencies, described in the previous section,
+    will therefore be supported throughout all Julia 1.X releases.
+
+In Julia 1.2 and later test dependencies can be declared in `test/Project.toml`. When running
+tests, Pkg will automatically merge this and the package Projects to create the test environment.
+
+!!! note
+    If no `test/Project.toml` exists Pkg will use the `target` based test specific dependencies.
+
+To add a test-specific dependency, i.e. a dependency that is available only when testing,
+it is thus enough to add this dependency to the `test/Project.toml` project. This can be
+done from the Pkg REPL by activating this environment, and then use `add` as one normally
+does. Let's add the `Test` standard library as a test dependency:
+
+```julia-repl
+(HelloWorld) pkg> activate ./test
+[ Info: activating environment at `~/HelloWorld/test/Project.toml`.
+
+(test) pkg> add Test
+ Resolving package versions...
+  Updating `~/HelloWorld/test/Project.toml`
+  [8dfed614] + Test
+  Updating `~/HelloWorld/test/Manifest.toml`
+  [...]
+```
+
+We can now use `Test` in the test script and we can see that it gets installed when testing:
+
+```julia-repl
+julia> write("test/runtests.jl",
+             """
+             using Test
+             @test 1 == 1
+             """);
+
+(test) pkg> activate .
+
+(HelloWorld) pkg> test
+   Testing HelloWorld
+ Resolving package versions...
+  Updating `/var/folders/64/76tk_g152sg6c6t0b4nkn1vw0000gn/T/tmpPzUPPw/Project.toml`
+  [d8327f2a] + HelloWorld v0.1.0 [`~/.julia/dev/Pkg/HelloWorld`]
+  [8dfed614] + Test
+  Updating `/var/folders/64/76tk_g152sg6c6t0b4nkn1vw0000gn/T/tmpPzUPPw/Manifest.toml`
+  [d8327f2a] + HelloWorld v0.1.0 [`~/.julia/dev/Pkg/HelloWorld`]
+   Testing HelloWorld tests passed```
+```
+
+## Compatibility on dependencies
+
+Every dependency should in general have a compatibility constraint on it.
+This is an important topic so there is a chapter in the package docs about it:
+[Compatibility](https://pkgdocs.julialang.org/v1/compatibility).
+
+## Weak dependencies
+
+!!! note
+    This is a somewhat advanced usage of Pkg which can be skipped for people new to Julia and Julia packages.
+
+!!! compat
+    The described feature requires Julia 1.9+.
+
+A weak dependency is a dependency that will not automatically install when the package is installed but
+you can still control what versions of that package are allowed to be installed by setting compatibility on it.
+These are listed in the project file under the `[weakdeps]` section:
+
+```toml
+[weakdeps]
+SomePackage = "b3785f31-9d33-4cdf-bc73-f646780f1739"
+
+[compat]
+SomePackage = "1.2"
+```
+
+The current usage of this is almost solely limited to "extensions" which is described in the next section.
+
+## Conditional loading of code in packages (Extensions)
+
+!!! note
+    This is a somewhat advanced usage of Pkg which can be skipped for people new to Julia and Julia packages.
+
+!!! compat
+    The described feature requires Julia 1.9+.
+
+Sometimes one wants to make two or more packages work well together, but may be reluctant (perhaps due to increased load times) to make one an unconditional dependency of the other.
+A package *extension* is a module in a file (similar to a package) that is automatically loaded when *some other set of packages* are
+loaded into the Julia session. This is very similar to functionality that the external package
+[Requires.jl](https://github.com/JuliaPackaging/Requires.jl) provides, but which is now available directly through Julia,
+and provides added benefits such as being able to precompile the extension.
+
+### Code structure
+
+A useful application of extensions could be for a plotting package that should be able to plot
+objects from a wide variety of different Julia packages.
+Adding all those different Julia packages as dependencies of the plotting package
+could be expensive since they would end up getting loaded even if they were never used.
+Instead, the code required to plot objects for specific packages can be put into separate files
+(extensions) and these are loaded only when the packages that define the type(s) we want to plot
+are loaded.
+
+Below is an example of how the code can be structured for a use case in which a
+`Plotting` package wants to be able to display objects defined in the external package `Contour`.
+The file and folder structure shown below is found in the `Plotting` package.
+
+ `Project.toml`:
+ ```toml
+name = "Plotting"
+version = "0.1.0"
+uuid = "..."
+
+[weakdeps]
+Contour = "d38c429a-6771-53c6-b99e-75d170b6e991"
+
+[extensions]
+# name of extension to the left
+# extension dependencies required to load the extension to the right
+# use a list for multiple extension dependencies
+PlottingContourExt = "Contour"
+
+[compat]
+Contour = "0.6.2"
+```
+
+`src/Plotting.jl`:
+```julia
+module Plotting
+
+function plot(x::Vector)
+    # Some functionality for plotting a vector here
+end
+
+end # module
+```
+
+`ext/PlottingContourExt.jl` (can also be in `ext/PlottingContourExt/PlottingContourExt.jl`):
+```julia
+module PlottingContourExt # Should be same name as the file (just like a normal package)
+
+using Plotting, Contour
+
+function Plotting.plot(c::Contour.ContourCollection)
+    # Some functionality for plotting a contour here
+end
+
+end # module
+```
+
+Extensions can have any arbitrary name (here `PlottingContourExt`), but using something similar to the format of
+this example that makes the extended functionality and dependency of the extension clear is likely a good idea.
+
+!!! compat
+    Often you will put the extension dependencies into the `test` target so they are loaded when running e.g. `Pkg.test()`. On earlier Julia versions
+    this requires you to also put the package in the `[extras]` section. This is unfortunate but the project verifier on older Julia versions will
+    complain if this is not done.
+
+!!! note
+    If you use a manifest generated by a Julia version that does not know about extensions with a Julia version that does
+    know about them, the extensions will not load. This is because the manifest lacks some information that tells Julia
+    when it should load these packages. So make sure you use a manifest generated at least the Julia version you are using.
+
+### Behavior of extensions
+
+A user that depends only on `Plotting` will not pay the cost of the "extension" inside the `PlottingContourExt` module.
+It is only when the `Contour` package actually gets loaded that the `PlottingContourExt` extension is loaded too
+and provides the new functionality.
+
+In our example, the new functionality is an additional _method_, which we add to an existing _function_ from the parent package `Plotting`.
+Implementing such methods is among the most standard use cases of package extensions.
+Within the parent package, the function to extend can even be defined with zero methods, as follows:
+
+```julia
+function plot end
+```
+
+!!! note
+    If one considers `PlottingContourExt` as a completely separate package, it could be argued that defining `Plotting.plot(c::Contour.ContourCollection)` is
+    [type piracy](@ref avoid-type-piracy) since `PlottingContourExt` _owns_ neither the function `Plotting.plot` nor the type `Contour.ContourCollection`.
+    However, for extensions, it is ok to assume that the extension owns the functions in its parent package.
+
+In other situations, one may need to define new symbols in the extension (types, structs, functions, etc.) instead of reusing those from the parent package.
+Such symbols are created in a separate module corresponding to the extension, namely `PlottingContourExt`, and thus not in `Plotting` itself.
+If extension symbols are needed in the parent package, one must call `Base.get_extension` to retrieve them.
+Here is an example showing how a custom type defined in `PlottingContourExt` can be accessed in `Plotting`:
+
+```julia
+ext = Base.get_extension(@__MODULE__, :PlottingContourExt)
+if !isnothing(ext)
+    ContourPlotType = ext.ContourPlotType
+end
+```
+
+On the other hand, accessing extension symbols from a third-party package (i.e. not the parent) is not a recommended practice at the moment.
+
+### Backwards compatibility
+
+This section discusses various methods for using extensions on Julia versions that support them,
+while simultaneously providing similar functionality on older Julia versions.
+
+#### Requires.jl
+
+This section is relevant if you are currently using Requires.jl but want to transition to using extensions (while still having Requires be used on Julia versions that do not support extensions).
+This is done by making the following changes (using the example above):
+
+- Add the following to the package file. This makes it so that Requires.jl loads and inserts the
+  callback only when extensions are not supported
+  ```julia
+  # This symbol is only defined on Julia versions that support extensions
+  if !isdefined(Base, :get_extension)
+  using Requires
+  end
+
+  @static if !isdefined(Base, :get_extension)
+  function __init__()
+      @require Contour = "d38c429a-6771-53c6-b99e-75d170b6e991" include("../ext/PlottingContourExt.jl")
+  end
+  end
+  ```
+  or if you have other things in your `__init__()` function:
+  ```julia
+  if !isdefined(Base, :get_extension)
+  using Requires
+  end
+
+  function __init__()
+      # Other init functionality here
+
+      @static if !isdefined(Base, :get_extension)
+          @require Contour = "d38c429a-6771-53c6-b99e-75d170b6e991" include("../ext/PlottingContourExt.jl")
+      end
+  end
+  ```
+- Make the following change in the conditionally-loaded code:
+  ```julia
+  isdefined(Base, :get_extension) ? (using Contour) : (using ..Contour)
+  ```
+- Add `Requires` to `[weakdeps]` in your `Project.toml` file, so that it is listed in both `[deps]` and `[weakdeps]`.
+  Julia 1.9+ knows to not install it as a regular dependency, whereas earlier versions will consider it a dependency.
+
+The package should now work with Requires.jl on Julia versions before extensions were introduced
+and with extensions on more recent Julia versions.
+
+####  Transition from normal dependency to extension
+
+This section is relevant if you have a normal dependency that you want to transition be an extension (while still having the dependency be a normal dependency on Julia versions that do not support extensions).
+This is done by making the following changes (using the example above):
+
+- Make sure that the package is **both** in the `[deps]` and `[weakdeps]` section. Newer Julia versions will ignore dependencies in `[deps]` that are also in `[weakdeps]`.
+- Add the following to your main package file (typically at the bottom):
+  ```julia
+  if !isdefined(Base, :get_extension)
+    include("../ext/PlottingContourExt.jl")
+  end
+  ```
+
+#### Using an extension while supporting older Julia versions
+
+In the case where one wants to use an extension (without worrying about the
+feature of the extension being available on older Julia versions) while still
+supporting older Julia versions the packages under `[weakdeps]` should be
+duplicated into `[extras]`. This is an unfortunate duplication, but without
+doing this the project verifier under older Julia versions will throw an error
+if it finds packages under `[compat]` that is not listed in `[extras]`.
+
+## Package naming guidelines
+
+Package names should be sensible to most Julia users, *even to those who are not domain experts*.
+The following guidelines apply to the `General` registry but may be useful for other package
+registries as well.
+
+Since the `General` registry belongs to the entire community, people may have opinions about
+your package name when you publish it, especially if it's ambiguous or can be confused with
+something other than what it is. Usually, you will then get suggestions for a new name that
+may fit your package better.
+
+1. Avoid jargon. In particular, avoid acronyms unless there is minimal possibility of confusion.
+
+     * It's ok to say `USA` if you're talking about the USA.
+     * It's not ok to say `PMA`, even if you're talking about positive mental attitude.
+2. Avoid using `Julia` in your package name or prefixing it with `Ju`.
+
+     * It is usually clear from context and to your users that the package is a Julia package.
+     * Package names already have a `.jl` extension, which communicates to users that `Package.jl` is a Julia package.
+     * Having Julia in the name can imply that the package is connected to, or endorsed by, contributors
+       to the Julia language itself.
+3. Packages that provide most of their functionality in association with a new type should have pluralized
+   names.
+
+     * `DataFrames` provides the `DataFrame` type.
+     * `BloomFilters` provides the `BloomFilter` type.
+     * In contrast, `JuliaParser` provides no new type, but instead new functionality in the `JuliaParser.parse()`
+       function.
+4. Err on the side of clarity, even if clarity seems long-winded to you.
+
+     * `RandomMatrices` is a less ambiguous name than `RndMat` or `RMT`, even though the latter are shorter.
+5. A less systematic name may suit a package that implements one of several possible approaches to
+   its domain.
+
+     * Julia does not have a single comprehensive plotting package. Instead, `Gadfly`, `PyPlot`, `Winston`
+       and other packages each implement a unique approach based on a particular design philosophy.
+     * In contrast, `SortingAlgorithms` provides a consistent interface to use many well-established
+       sorting algorithms.
+6. Packages that wrap external libraries or programs should be named after those libraries or programs.
+
+     * `CPLEX.jl` wraps the `CPLEX` library, which can be identified easily in a web search.
+     * `MATLAB.jl` provides an interface to call the MATLAB engine from within Julia.
+7. Avoid naming a package closely to an existing package
+     * `Websocket` is too close to `WebSockets` and can be confusing to users. Rather use a new name such as `SimpleWebsockets`.
+
+## Registering packages
+
+Once a package is ready it can be registered with the [General Registry](https://github.com/JuliaRegistries/General#registering-a-package-in-general) (see also the [FAQ](https://github.com/JuliaRegistries/General#faq)).
+Currently, packages are submitted via [`Registrator`](https://juliaregistrator.github.io/).
+In addition to `Registrator`, [`TagBot`](https://github.com/marketplace/actions/julia-tagbot) helps manage the process of tagging releases.
+
+## Best Practices
+
+Packages should avoid mutating their own state (writing to files within their package directory).
+Packages should, in general, not assume that they are located in a writable location (e.g. if installed as part of a system-wide depot) or even a stable one (e.g. if they are bundled into a system image by [PackageCompiler.jl](https://github.com/JuliaLang/PackageCompiler.jl)).
+To support the various use cases in the Julia package ecosystem, the Pkg developers have created a number of auxiliary packages and techniques to help package authors create self-contained, immutable, and relocatable packages:
+
+* [`Artifacts`](https://pkgdocs.julialang.org/v1/artifacts/) can be used to bundle chunks of data alongside your package, or even allow them to be downloaded on-demand.
+  Prefer artifacts over attempting to open a file via a path such as `joinpath(@__DIR__, "data", "my_dataset.csv")` as this is non-relocatable.
+  Once your package has been precompiled, the result of `@__DIR__` will have been baked into your precompiled package data, and if you attempt to distribute this package, it will attempt to load files at the wrong location.
+  Artifacts can be bundled and accessed easily using the `artifact"name"` string macro.
+
+* [`Scratch.jl`](https://github.com/JuliaPackaging/Scratch.jl) provides the notion of "scratch spaces", mutable containers of data for packages.
+  Scratch spaces are designed for data caches that are completely managed by a package and should be removed when the package itself is uninstalled.
+  For important user-generated data, packages should continue to write out to a user-specified path that is not managed by Julia or Pkg.
+
+* [`Preferences.jl`](https://github.com/JuliaPackaging/Preferences.jl) allows packages to read and write preferences to the top-level `Project.toml`.
+  These preferences can be read at runtime or compile-time, to enable or disable different aspects of package behavior.
+  Packages previously would write out files to their own package directories to record options set by the user or environment, but this is highly discouraged now that `Preferences` is available.
diff --git a/doc/src/tutorials/external.md b/doc/src/tutorials/external.md
new file mode 100644
index 0000000000000..0211db3d63a5e
--- /dev/null
+++ b/doc/src/tutorials/external.md
@@ -0,0 +1,4 @@
+# External Tutorials
+
+We have created a non-exhaustive list of community provided Julia tutorials.
+[Check them out to learn Julia through the lens of someone from the community](https://julialang.org/learning/tutorials/).
diff --git a/doc/src/manual/profile.md b/doc/src/tutorials/profile.md
similarity index 77%
rename from doc/src/manual/profile.md
rename to doc/src/tutorials/profile.md
index c3dc1ca090a46..a3baec593a53d 100644
--- a/doc/src/manual/profile.md
+++ b/doc/src/tutorials/profile.md
@@ -59,11 +59,13 @@ julia> @profile myfunc()
 
 To see the profiling results, there are several graphical browsers.
 One "family" of visualizers is based on [FlameGraphs.jl](https://github.com/timholy/FlameGraphs.jl), with each family member providing a different user interface:
-- [Juno](https://junolab.org/) is a full IDE with built-in support for profile visualization
+- [VS Code](https://www.julia-vscode.org/) is a full IDE with built-in support for profile visualization
 - [ProfileView.jl](https://github.com/timholy/ProfileView.jl) is a stand-alone visualizer based on GTK
 - [ProfileVega.jl](https://github.com/davidanthoff/ProfileVega.jl) uses VegaLight and integrates well with Jupyter notebooks
-- [StatProfilerHTML](https://github.com/tkluck/StatProfilerHTML.jl) produces HTML and presents some additional summaries, and also integrates well with Jupyter notebooks
-- [ProfileSVG](https://github.com/timholy/ProfileSVG.jl) renders SVG
+- [StatProfilerHTML.jl](https://github.com/tkluck/StatProfilerHTML.jl) produces HTML and presents some additional summaries, and also integrates well with Jupyter notebooks
+- [ProfileSVG.jl](https://github.com/timholy/ProfileSVG.jl) renders SVG
+- [PProf.jl](https://github.com/JuliaPerf/PProf.jl) serves a local website for inspecting graphs, flamegraphs and more
+- [ProfileCanvas.jl](https://github.com/pfitzseb/ProfileCanvas.jl) is a HTML canvas based profile viewer UI, used by the [Julia VS Code extension](https://www.julia-vscode.org/), but can also generate interactive HTML files.
 
 An entirely independent approach to profile visualization is [PProf.jl](https://github.com/vchuravy/PProf.jl), which uses the external `pprof` tool.
 
@@ -298,35 +300,15 @@ on the author's laptop).
 ## Memory allocation analysis
 
 One of the most common techniques to improve performance is to reduce memory allocation. Julia
-provides several tools measure this:
+provides several tools to measure this:
 
 ### `@time`
 
-The total amount of allocation can be measured with [`@time`](@ref) and [`@allocated`](@ref), and
-specific lines triggering allocation can often be inferred from profiling via the cost of garbage
+The total amount of allocation can be measured with [`@time`](@ref), [`@allocated`](@ref) and [`@allocations`](@ref),
+and specific lines triggering allocation can often be inferred from profiling via the cost of garbage
 collection that these lines incur. However, sometimes it is more efficient to directly measure
 the amount of memory allocated by each line of code.
 
-### Line-by-Line Allocation Tracking
-
-To measure allocation line-by-line, start Julia with the `--track-allocation=<setting>` command-line
-option, for which you can choose `none` (the default, do not measure allocation), `user` (measure
-memory allocation everywhere except Julia's core code), or `all` (measure memory allocation at
-each line of Julia code). Allocation gets measured for each line of compiled code. When you quit
-Julia, the cumulative results are written to text files with `.mem` appended after the file name,
-residing in the same directory as the source file. Each line lists the total number of bytes
-allocated. The [`Coverage` package](https://github.com/JuliaCI/Coverage.jl) contains some elementary
-analysis tools, for example to sort the lines in order of number of bytes allocated.
-
-In interpreting the results, there are a few important details. Under the `user` setting, the
-first line of any function directly called from the REPL will exhibit allocation due to events
-that happen in the REPL code itself. More significantly, JIT-compilation also adds to allocation
-counts, because much of Julia's compiler is written in Julia (and compilation usually requires
-memory allocation). The recommended procedure is to force compilation by executing all the commands
-you want to analyze, then call [`Profile.clear_malloc_data()`](@ref) to reset all allocation counters.
- Finally, execute the desired commands and quit Julia to trigger the generation of the `.mem`
-files.
-
 ### GC Logging
 
 While [`@time`](@ref) logs high-level stats about memory usage and garbage collection over the course
@@ -336,32 +318,137 @@ and how much garbage it collects each time. This can be enabled with
 [`GC.enable_logging(true)`](@ref), which causes Julia to log to stderr every time
 a garbage collection happens.
 
-### Allocation Profiler
+### [Allocation Profiler](@id allocation-profiler)
+
+!!! compat "Julia 1.8"
+    This functionality requires at least Julia 1.8.
 
 The allocation profiler records the stack trace, type, and size of each
 allocation while it is running. It can be invoked with
 [`Profile.Allocs.@profile`](@ref).
 
 This information about the allocations is returned as an array of `Alloc`
-objects, wrapped in an `AllocResults` object. The best way to visualize
-these is currently with the [PProf.jl](https://github.com/JuliaPerf/PProf.jl)
-library, which can visualize the call stacks which are making the most
-allocations.
+objects, wrapped in an `AllocResults` object. The best way to visualize these is
+currently with the [PProf.jl](https://github.com/JuliaPerf/PProf.jl) and
+[ProfileCanvas.jl](https://github.com/pfitzseb/ProfileCanvas.jl) packages, which
+can visualize the call stacks which are making the most allocations.
 
 The allocation profiler does have significant overhead, so a `sample_rate`
 argument can be passed to speed it up by making it skip some allocations.
 Passing `sample_rate=1.0` will make it record everything (which is slow);
 `sample_rate=0.1` will record only 10% of the allocations (faster), etc.
 
-!!! note
+!!! compat "Julia 1.11"
+
+    Older versions of Julia could not capture types in all cases. In older versions of
+    Julia, if you see an allocation of type `Profile.Allocs.UnknownType`, it means that
+    the profiler doesn't know what type of object was allocated. This mainly happened when
+    the allocation was coming from generated code produced by the compiler. See
+    [issue #43688](https://github.com/JuliaLang/julia/issues/43688) for more info.
+
+    Since Julia 1.11, all allocations should have a type reported.
+
+For more details on how to use this tool, please see the following talk from JuliaCon 2022:
+https://www.youtube.com/watch?v=BFvpwC8hEWQ
+
+##### Allocation Profiler Example
+
+In this simple example, we use PProf to visualize the alloc profile. You could use another
+visualization tool instead. We collect the profile (specifying a sample rate), then we visualize it.
+```julia
+using Profile, PProf
+Profile.Allocs.clear()
+Profile.Allocs.@profile sample_rate=0.0001 my_function()
+PProf.Allocs.pprof()
+```
+
+Here is a more in-depth example, showing how we can tune the sample rate. A
+good number of samples to aim for is around 1 - 10 thousand. Too many, and the
+profile visualizer can get overwhelmed, and profiling will be slow. Too few,
+and you don't have a representative sample.
+
+
+```julia-repl
+julia> import Profile
+
+julia> @time my_function()  # Estimate allocations from a (second-run) of the function
+  0.110018 seconds (1.50 M allocations: 58.725 MiB, 17.17% gc time)
+500000
+
+julia> Profile.Allocs.clear()
 
-    The current implementation of the Allocations Profiler _does not
-    capture types for all allocations._ Allocations for which the profiler
-    could not capture the type are represented as having type
-    `Profile.Allocs.UnknownType`.
+julia> Profile.Allocs.@profile sample_rate=0.001 begin   # 1.5 M * 0.001 = ~1.5K allocs.
+           my_function()
+       end
+500000
+
+julia> prof = Profile.Allocs.fetch();  # If you want, you can also manually inspect the results.
+
+julia> length(prof.allocs)  # Confirm we have expected number of allocations.
+1515
+
+julia> using PProf  # Now, visualize with an external tool, like PProf or ProfileCanvas.
+
+julia> PProf.Allocs.pprof(prof; from_c=false)  # You can optionally pass in a previously fetched profile result.
+Analyzing 1515 allocation samples... 100%|████████████████████████████████| Time: 0:00:00
+Main binary filename not available.
+Serving web UI on http://localhost:62261
+"alloc-profile.pb.gz"
+```
+Then you can view the profile by navigating to http://localhost:62261, and the profile is saved to disk.
+See PProf package for more options.
+
+##### Allocation Profiling Tips
+
+As stated above, aim for around 1-10 thousand samples in your profile.
+
+Note that we are uniformly sampling in the space of _all allocations_, and are not weighting
+our samples by the size of the allocation. So a given allocation profile may not give a
+representative profile of where most bytes are allocated in your program, unless you had set
+`sample_rate=1`.
+
+Allocations can come from users directly constructing objects, but can also come from inside
+the runtime or be inserted into compiled code to handle type instability. Looking at the
+"source code" view can be helpful to isolate them, and then other external tools such as
+[`Cthulhu.jl`](https://github.com/JuliaDebug/Cthulhu.jl) can be useful for identifying the
+cause of the allocation.
+
+##### Allocation Profile Visualization Tools
+
+There are several profiling visualization tools now that can all display Allocation
+Profiles. Here is a small list of some of the main ones we know about:
+- [PProf.jl](https://github.com/JuliaPerf/PProf.jl)
+- [ProfileCanvas.jl](https://github.com/pfitzseb/ProfileCanvas.jl)
+- VSCode's built-in profile visualizer (`@profview_allocs`) [docs needed]
+- Viewing the results directly in the REPL
+  - You can inspect the results in the REPL via [`Profile.Allocs.fetch()`](@ref), to view
+    the stacktrace and type of each allocation.
+
+#### Line-by-Line Allocation Tracking
+
+An alternative way to measure allocations is to start Julia with the `--track-allocation=<setting>` command-line
+option, for which you can choose `none` (the default, do not measure allocation), `user` (measure
+memory allocation everywhere except Julia's core code), or `all` (measure memory allocation at
+each line of Julia code). Allocation gets measured for each line of compiled code. When you quit
+Julia, the cumulative results are written to text files with `.mem` appended after the file name,
+residing in the same directory as the source file. Each line lists the total number of bytes
+allocated. The [`Coverage` package](https://github.com/JuliaCI/Coverage.jl) contains some elementary
+analysis tools, for example to sort the lines in order of number of bytes allocated.
+
+In interpreting the results, there are a few important details. Under the `user` setting, the
+first line of any function directly called from the REPL will exhibit allocation due to events
+that happen in the REPL code itself. More significantly, JIT-compilation also adds to allocation
+counts, because much of Julia's compiler is written in Julia (and compilation usually requires
+memory allocation). The recommended procedure is to force compilation by executing all the commands
+you want to analyze, then call [`Profile.clear_malloc_data()`](@ref) to reset all allocation counters.
+ Finally, execute the desired commands and quit Julia to trigger the generation of the `.mem`
+files.
+
+!!! note
 
-    You can read more about the missing types and the plan to improve this, here:
-    https://github.com/JuliaLang/julia/issues/43688.
+    `--track-allocation` changes code generation to log the allocations, and so the allocations may
+    be different than what happens without the option. We recommend using the
+    [allocation profiler](@ref allocation-profiler) instead.
 
 ## External Profiling
 
@@ -370,7 +457,7 @@ Currently Julia supports `Intel VTune`, `OProfile` and `perf` as external profil
 Depending on the tool you choose, compile with `USE_INTEL_JITEVENTS`, `USE_OPROFILE_JITEVENTS` and
 `USE_PERF_JITEVENTS` set to 1 in `Make.user`. Multiple flags are supported.
 
-Before running Julia set the environment variable `ENABLE_JITPROFILING` to 1.
+Before running Julia set the environment variable [`ENABLE_JITPROFILING`](@ref ENABLE_JITPROFILING) to 1.
 
 Now you have a multitude of ways to employ those tools!
 For example with `OProfile` you can try a simple recording :
diff --git a/julia.spdx.json b/julia.spdx.json
index d90a4e40e3273..bea7bdc6c3a5d 100644
--- a/julia.spdx.json
+++ b/julia.spdx.json
@@ -24,7 +24,7 @@
             "licenseDeclared": "MIT",
             "copyrightText": "Copyright (c) 2009-2022: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors",
             "summary": "Julia is a high-level, high-performance dynamic language for technical computing.",
-            "comment": "In addition to the source code described by this package, Julia pulls in code from many other respositories, which are also described in this document. See relationships for details."
+            "comment": "In addition to the source code described by this package, Julia pulls in code from many other repositories, which are also described in this document. See relationships for details."
         },
         {
             "name": "Pkg.jl",
@@ -146,25 +146,13 @@
             "copyrightText": "Copyright (c) 2014: Elliot Saba",
             "summary": "A performant, 100% native-julia SHA1, SHA2, and SHA3 implementation"
         },
-        {
-            "name": "DelimitedFiles.jl",
-            "SPDXID": "SPDXRef-JuliaDelimitedFiles",
-            "downloadLocation": "git+https://github.com/JuliaData/DelimitedFiles.jl.git",
-            "filesAnalyzed": false,
-            "homepage": "https://julialang.org",
-            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/DelimitedFiles.version",
-            "licenseConcluded": "MIT",
-            "licenseDeclared": "MIT",
-            "copyrightText": "Copyright (c) 2012-2022 The Julia Programming Language",
-            "summary": "A package for reading and writing files with delimited values."
-        },
         {
             "name": "dSFMT",
             "SPDXID": "SPDXRef-dSFMT",
             "downloadLocation": "git+https://github.com/MersenneTwister-Lab/dSFMT.git",
             "filesAnalyzed": false,
             "homepage": "https://github.com/MersenneTwister-Lab/dSFMT",
-            "sourceInfo": "The git hash of the version in use can be found in the file deps/Versions.make",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/dsfmt.version",
             "licenseConcluded": "BSD-3-Clause",
             "licenseDeclared": "BSD-3-Clause",
             "copyrightText": "Copyright (c) 2007, 2008, 2009 Mutsuo Saito, Makoto Matsumoto and Hiroshima University. Copyright (c) 2011, 2002 Mutsuo Saito, Makoto Matsumoto, Hiroshima University and The University of Tokyo.",
@@ -188,7 +176,7 @@
             "downloadLocation": "https://gmplib.org/download/gmp/",
             "filesAnalyzed": false,
             "homepage": "https://gmplib.org/",
-            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "sourceInfo": "The version in use can be found in the file deps/gmp.version",
             "licenseConcluded": "LGPL-3.0-or-later",
             "licenseDeclared": "LGPL-3.0-or-later OR GPL-2.0-or-later",
             "copyrightText": "Copyright 1991, 1996, 1999, 2000, 2007 Free Software Foundation, Inc.",
@@ -212,7 +200,7 @@
             "downloadLocation": "git+https://github.com/curl/curl.git",
             "filesAnalyzed": false,
             "homepage": "https://curl.se",
-            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "sourceInfo": "The version in use can be found in the file deps/curl.version",
             "licenseConcluded": "curl",
             "licenseDeclared": "curl",
             "copyrightText": "Copyright (c) 1996 - 2021, Daniel Stenberg, daniel@haxx.se, and many contributors, see the THANKS file.",
@@ -236,7 +224,7 @@
             "downloadLocation": "git+https://github.com/ARMmbed/mbedtls.git",
             "filesAnalyzed": false,
             "homepage": "https://tls.mbed.org",
-            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "sourceInfo": "The version in use can be found in the file deps/mbedtls.version",
             "licenseConcluded": "Apache-2.0",
             "licenseDeclared": "Apache-2.0",
             "copyrightText": "NOASSERTION",
@@ -248,7 +236,7 @@
             "downloadLocation": "https://www.mpfr.org/",
             "filesAnalyzed": false,
             "homepage": "https://www.mpfr.org/",
-            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "sourceInfo": "The version in use can be found in the file deps/mpfr.version",
             "licenseConcluded": "LGPL-3.0-or-later",
             "licenseDeclared": "LGPL-3.0-or-later",
             "copyrightText": "Copyright 2000-2020 Free Software Foundation, Inc.",
@@ -272,7 +260,7 @@
             "downloadLocation": "https://www.netlib.org/lapack/",
             "filesAnalyzed": false,
             "homepage": "https://netlib.org/",
-            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "sourceInfo": "The version in use can be found in the file deps/openblas.version",
             "licenseConcluded": "BSD-3-Clause",
             "licenseDeclared": "BSD-3-Clause",
             "copyrightText": "Copyright (c) 1992-2013 The University of Tennessee and The University of Tennessee Research Foundation.  All rights reserved.\nCopyright (c) 2000-2013 The University of California Berkeley. All rights reserved.\nCopyright (c) 2006-2013 The University of Colorado Denver.  All rights reserved.",
@@ -284,7 +272,7 @@
             "downloadLocation": "git+https://github.com/PhilipHazel/pcre2.git",
             "filesAnalyzed": false,
             "homepage": "https://www.pcre.org",
-            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "sourceInfo": "The version in use can be found in the file deps/pcre.version",
             "licenseConcluded": "BSD-3-Clause",
             "licenseDeclared": "BSD-3-Clause",
             "copyrightText": "Copyright (c) 1997-2021 University of Cambridge All rights reserved.\nCopyright(c) 2009-2021 Zoltan Herczeg\n",
@@ -297,7 +285,7 @@
             "downloadLocation": "git+https://github.com/DrTimothyAldenDavis/SuiteSparse.git",
             "filesAnalyzed": false,
             "homepage": "https://people.engr.tamu.edu/davis/suitesparse.html",
-            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "sourceInfo": "The version in use can be found in the file deps/libsuitesparse.version",
             "licenseConcluded": "GPL-2.0-or-later",
             "licenseDeclared": "LGPL-2.0-or-later AND GPL-2.0-or-later AND BSD-3 AND Apache-2.0 ",
             "licenseComments": "SuiteSparse consists of many modules, each of which is licensed separately.",
@@ -334,7 +322,7 @@
             "downloadLocation": "git+https://github.com/libunwind/libunwind.git",
             "filesAnalyzed": false,
             "homepage": "http://www.nongnu.org/libunwind/",
-            "sourceInfo": "The git hash of the version in use can be found in the file deps/Versions.make",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/unwind.version",
             "licenseConcluded": "MIT",
             "licenseDeclared": "MIT",
             "copyrightText": "Copyright (c) 2002 Hewlett-Packard Co.",
@@ -388,7 +376,7 @@
             "downloadLocation": "https://sourceforge.net/projects/p7zip/files/p7zip",
             "filesAnalyzed": false,
             "homepage": "https://www.7-zip.org",
-            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "sourceInfo": "The version in use can be found in the file deps/p7zip.version",
             "licenseConcluded": "LGPL-3.0-or-later",
             "licenseDeclared": "LGPL-3.0-or-later AND BSD-3",
             "copyrightText": "Copyright (C) 1999-2021 Igor Pavlov",
@@ -412,12 +400,12 @@
             "downloadLocation": "git+https://github.com/NixOS/patchelf.git",
             "filesAnalyzed": false,
             "homepage": "https://nixos.org/patchelf.html",
-            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "sourceInfo": "The version in use can be found in the file deps/patchelf.version",
             "licenseConcluded": "GPL-3.0-or-later",
             "licenseDeclared": "GPL-3.0-or-later",
             "copyrightText": "Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>",
             "summary": "A small utility to modify the dynamic linker and RPATH of ELF executables.",
-            "comment": "PATCHELF is not part of the Julia binary. It is a tool used as part of building the binary, a bit like a compiler. Julia chooses to build the tool from source during the build process as a convienence."
+            "comment": "PATCHELF is not part of the Julia binary. It is a tool used as part of building the binary, a bit like a compiler. Julia chooses to build the tool from source during the build process as a convenience."
         },
         {
             "name": "objconv",
@@ -429,7 +417,7 @@
             "licenseDeclared": "GPL-3.0-or-later",
             "copyrightText": "By Agner Fog © 2018",
             "summary": "A utility for cross-platform development of function libraries, for converting and modifying object files and for dumping and disassembling object and executable files for all x86 and x86-64 platforms.",
-            "comment": "OBJCONV is not part of the Julia binary. It is a tool used as part of building the binary, a bit like a compiler. Julia chooses to build the tool from source during the build process as a convienence."
+            "comment": "OBJCONV is not part of the Julia binary. It is a tool used as part of building the binary, a bit like a compiler. Julia chooses to build the tool from source during the build process as a convenience."
         },
         {
             "name": "libwhich",
@@ -442,7 +430,7 @@
             "licenseDeclared": "MIT",
             "copyrightText": "Copyright (c) 2017 Jameson Nash",
             "summary": "Like `which`, for dynamic libraries",
-            "comment": "LIBWHICH is not part of the Julia binary. It is a tool used as part of building the binary, a bit like a compiler. Julia chooses to build the tool from source during the build process as a convienence."
+            "comment": "LIBWHICH is not part of the Julia binary. It is a tool used as part of building the binary, a bit like a compiler. Julia chooses to build the tool from source during the build process as a convenience."
         }
     ],
     "hasExtractedLicensingInfos": [
@@ -503,11 +491,6 @@
             "relationshipType": "BUILD_DEPENDENCY_OF",
             "relatedSpdxElement": "SPDXRef-JuliaMain"
         },
-        {
-            "spdxElementId": "SPDXRef-JuliaDelimitedFiles",
-            "relationshipType": "BUILD_DEPENDENCY_OF",
-            "relatedSpdxElement": "SPDXRef-JuliaMain"
-        },
         {
             "spdxElementId": "SPDXRef-dSFMT",
             "relationshipType": "BUILD_DEPENDENCY_OF",
diff --git a/pkgimage.mk b/pkgimage.mk
new file mode 100644
index 0000000000000..83c66bd94c702
--- /dev/null
+++ b/pkgimage.mk
@@ -0,0 +1,134 @@
+SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
+BUILDDIR := .
+JULIAHOME := $(SRCDIR)
+include $(JULIAHOME)/Make.inc
+include $(JULIAHOME)/stdlib/stdlib.mk
+
+
+# set some influential environment variables
+export JULIA_DEPOT_PATH := $(build_prefix)/share/julia
+export JULIA_LOAD_PATH := @stdlib
+unexport JULIA_PROJECT :=
+unexport JULIA_BINDIR :=
+
+export JULIA_FALLBACK_REPL := true
+
+default: release
+release: all-release
+debug: all-debug
+all: release debug
+
+$(JULIA_DEPOT_PATH):
+	mkdir -p $@
+
+print-depot-path:
+	@$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --startup-file=no -e '@show Base.DEPOT_PATH')
+
+all-release: $(addprefix cache-release-, $(STDLIBS))
+all-debug:   $(addprefix cache-debug-, $(STDLIBS))
+
+define stdlib_builder
+ifneq ($(filter $(1),$(INDEPENDENT_STDLIBS)),)
+# Define target-specific export for `JULIA_CPU_TARGET`
+$$(BUILDDIR)/stdlib/$1.release.image: export JULIA_CPU_TARGET=$(JULIA_CPU_TARGET)
+$$(BUILDDIR)/stdlib/$1.debug.image: export JULIA_CPU_TARGET=$(JULIA_CPU_TARGET)
+
+$$(BUILDDIR)/stdlib/$1.release.image: $$($1_SRCS) $$(addsuffix .release.image,$$(addprefix $$(BUILDDIR)/stdlib/,$2)) $(build_private_libdir)/sys.$(SHLIB_EXT)
+	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes -e 'Base.compilecache(Base.identify_package("$1"))')
+	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.compilecache(Base.identify_package("$1"))')
+	touch $$@
+$$(BUILDDIR)/stdlib/$1.debug.image: $$($1_SRCS) $$(addsuffix .debug.image,$$(addprefix $$(BUILDDIR)/stdlib/,$2)) $(build_private_libdir)/sys-debug.$(SHLIB_EXT)
+	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no --check-bounds=yes -e 'Base.compilecache(Base.identify_package("$1"))')
+	@$$(call PRINT_JULIA, $$(call spawn,$$(JULIA_EXECUTABLE)) --startup-file=no -e 'Base.compilecache(Base.identify_package("$1"))')
+	touch $$@
+else
+ifneq ($(filter $(1),$(STDLIBS_WITHIN_SYSIMG)),)
+$$(BUILDDIR)/stdlib/$1.release.image:
+	touch $$@
+$$(BUILDDIR)/stdlib/$1.debug.image:
+	touch $$@
+else
+$$(error $(1) neither in STDLIBS_WITHIN_SYSIMG nor INDEPENDENT_STDLIBS)
+endif
+endif
+cache-release-$1: $$(BUILDDIR)/stdlib/$1.release.image
+cache-debug-$1: $$(BUILDDIR)/stdlib/$1.debug.image
+.SECONDARY: $$(BUILDDIR)/stdlib/$1.release.image $$(BUILDDIR)/stdlib/$1.debug.image
+endef
+
+# no dependencies
+$(eval $(call stdlib_builder,MozillaCACerts_jll,))
+$(eval $(call stdlib_builder,ArgTools,))
+$(eval $(call stdlib_builder,Artifacts,))
+$(eval $(call stdlib_builder,Base64,))
+$(eval $(call stdlib_builder,CRC32c,))
+$(eval $(call stdlib_builder,FileWatching,))
+$(eval $(call stdlib_builder,Libdl,))
+$(eval $(call stdlib_builder,Logging,))
+$(eval $(call stdlib_builder,Mmap,))
+$(eval $(call stdlib_builder,NetworkOptions,))
+$(eval $(call stdlib_builder,SHA,))
+$(eval $(call stdlib_builder,Serialization,))
+$(eval $(call stdlib_builder,Sockets,))
+$(eval $(call stdlib_builder,Unicode,))
+$(eval $(call stdlib_builder,Profile,))
+$(eval $(call stdlib_builder,StyledStrings,))
+
+# 1-depth packages
+$(eval $(call stdlib_builder,GMP_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,LLVMLibUnwind_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,LibUV_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,LibUnwind_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,MbedTLS_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,nghttp2_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,OpenLibm_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,PCRE2_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,Zlib_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,dSFMT_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,libLLVM_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,libblastrampoline_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,p7zip_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,OpenBLAS_jll,Artifacts Libdl))
+$(eval $(call stdlib_builder,Markdown,Base64))
+$(eval $(call stdlib_builder,Printf,Unicode))
+$(eval $(call stdlib_builder,Random,SHA))
+$(eval $(call stdlib_builder,Tar,ArgTools,SHA))
+$(eval $(call stdlib_builder,DelimitedFiles,Mmap))
+
+# 2-depth packages
+$(eval $(call stdlib_builder,LLD_jll,Zlib_jll libLLVM_jll Artifacts Libdl))
+$(eval $(call stdlib_builder,LibSSH2_jll,Artifacts Libdl MbedTLS_jll))
+$(eval $(call stdlib_builder,MPFR_jll,Artifacts Libdl GMP_jll))
+$(eval $(call stdlib_builder,LinearAlgebra,Libdl libblastrampoline_jll OpenBLAS_jll))
+$(eval $(call stdlib_builder,Dates,Printf))
+$(eval $(call stdlib_builder,Distributed,Random Serialization Sockets))
+$(eval $(call stdlib_builder,Future,Random))
+$(eval $(call stdlib_builder,UUIDs,Random SHA))
+$(eval $(call stdlib_builder,InteractiveUtils,Markdown))
+
+ # 3-depth packages
+$(eval $(call stdlib_builder,LibGit2_jll,MbedTLS_jll LibSSH2_jll Artifacts Libdl))
+$(eval $(call stdlib_builder,LibCURL_jll,LibSSH2_jll nghttp2_jll MbedTLS_jll Zlib_jll Artifacts Libdl))
+$(eval $(call stdlib_builder,REPL,InteractiveUtils Markdown Sockets Unicode))
+$(eval $(call stdlib_builder,SharedArrays,Distributed Mmap Random Serialization))
+$(eval $(call stdlib_builder,TOML,Dates))
+$(eval $(call stdlib_builder,Test,Logging Random Serialization InteractiveUtils))
+
+# 4-depth packages
+$(eval $(call stdlib_builder,LibGit2,LibGit2_jll NetworkOptions Printf SHA Base64))
+$(eval $(call stdlib_builder,LibCURL,LibCURL_jll MozillaCACerts_jll))
+
+# 5-depth packages
+$(eval $(call stdlib_builder,Downloads,ArgTools FileWatching LibCURL NetworkOptions))
+
+# 6-depth packages
+$(eval $(call stdlib_builder,Pkg, Artifacts Dates Downloads FileWatching LibGit2 Libdl\
+								  Logging Markdown Printf REPL Random SHA Serialization\
+								  TOML Tar UUIDs p7zip_jll))
+
+# 7-depth packages
+$(eval $(call stdlib_builder,LazyArtifacts,Artifacts Pkg))
+
+$(eval $(call stdlib_builder,SparseArrays,Libdl LinearAlgebra Random Serialization))
+$(eval $(call stdlib_builder,Statistics,LinearAlgebra SparseArrays))
+# SuiteSparse_jll
diff --git a/src/.gitignore b/src/.gitignore
index 388e971d4f12d..4ddd75fbb5d62 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -21,6 +21,7 @@
 /julia_version.h
 /flisp/host
 /support/host
+/base/
 
 # Clang compilation database
 /compile_commands*.json
diff --git a/src/APInt-C.cpp b/src/APInt-C.cpp
index bc0a62e21dd3e..22b3beef996db 100644
--- a/src/APInt-C.cpp
+++ b/src/APInt-C.cpp
@@ -7,16 +7,11 @@
 #include <llvm/Support/MathExtras.h>
 
 #include "APInt-C.h"
-#include "julia.h"
 #include "julia_assert.h"
 #include "julia_internal.h"
 
 using namespace llvm;
 
-inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
-    return alignTo(Value, Align, Skew);
-}
-
 const unsigned int integerPartWidth = llvm::APInt::APINT_BITS_PER_WORD;
 const unsigned int host_char_bit = 8;
 
@@ -25,15 +20,15 @@ const unsigned int host_char_bit = 8;
     APInt s; \
     if ((numbits % integerPartWidth) != 0) { \
         /* use LLT_ALIGN to round the memory area up to the nearest integerPart-sized chunk */ \
-        unsigned nbytes = RoundUpToAlignment(numbits, integerPartWidth) / host_char_bit; \
+        unsigned nbytes = alignTo(numbits, integerPartWidth) / host_char_bit; \
         integerPart *data_a64 = (integerPart*)alloca(nbytes); \
         /* TODO: this memcpy assumes little-endian,
          * for big-endian, need to align the copy to the other end */ \
-        memcpy(data_a64, p##s, RoundUpToAlignment(numbits, host_char_bit) / host_char_bit); \
-        s = APInt(numbits, makeArrayRef(data_a64, nbytes / sizeof(integerPart))); \
+        memcpy(data_a64, p##s, alignTo(numbits, host_char_bit) / host_char_bit); \
+        s = APInt(numbits, ArrayRef<uint64_t>(data_a64, nbytes / sizeof(integerPart))); \
     } \
     else { \
-        s = APInt(numbits, makeArrayRef(p##s, numbits / integerPartWidth)); \
+        s = APInt(numbits, ArrayRef<uint64_t>(p##s, numbits / integerPartWidth)); \
     }
 
 /* assign to "integerPart *pr" from "APInt a" */
@@ -47,7 +42,7 @@ const unsigned int host_char_bit = 8;
     else if (numbits <= 64) \
         *(uint64_t*)p##r = a.getZExtValue(); \
     else \
-        memcpy(p##r, a.getRawData(), RoundUpToAlignment(numbits, host_char_bit) / host_char_bit); \
+        memcpy(p##r, a.getRawData(), alignTo(numbits, host_char_bit) / host_char_bit); \
 
 extern "C" JL_DLLEXPORT
 void LLVMNeg(unsigned numbits, integerPart *pa, integerPart *pr) {
@@ -313,17 +308,25 @@ void LLVMByteSwap(unsigned numbits, integerPart *pa, integerPart *pr) {
     ASSIGN(r, a)
 }
 
-void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr, bool isSigned, bool *isExact) {
+extern "C" float julia_half_to_float(uint16_t ival) JL_NOTSAFEPOINT;
+extern "C" uint16_t julia_float_to_half(float param) JL_NOTSAFEPOINT;
+extern "C" float julia_bfloat_to_float(uint16_t ival) JL_NOTSAFEPOINT;
+extern "C" uint16_t julia_float_to_bfloat(float param) JL_NOTSAFEPOINT;
+
+void LLVMFPtoInt(jl_datatype_t *ty, void *pa, jl_datatype_t *oty, integerPart *pr, bool isSigned, bool *isExact) {
     double Val;
-    if (numbits == 16)
-        Val = __gnu_h2f_ieee(*(uint16_t*)pa);
-    else if (numbits == 32)
+    if (ty == jl_float16_type)
+        Val = julia_half_to_float(*(uint16_t*)pa);
+    else if (ty == jl_bfloat16_type)
+        Val = julia_bfloat_to_float(*(uint16_t*)pa);
+    else if (ty == jl_float32_type)
         Val = *(float*)pa;
-    else if (numbits == 64)
+    else if (jl_float64_type)
         Val = *(double*)pa;
     else
         jl_error("FPtoSI: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64");
-    unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
+    unsigned onumbytes = jl_datatype_size(oty);
+    unsigned onumbits = onumbytes * host_char_bit;
     if (onumbits <= 64) { // fast-path, if possible
         if (isSigned) {
             int64_t ia = Val;
@@ -350,7 +353,7 @@ void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr,
         APFloat a(Val);
         bool isVeryExact;
         APFloat::roundingMode rounding_mode = APFloat::rmNearestTiesToEven;
-        unsigned nbytes = RoundUpToAlignment(onumbits, integerPartWidth) / host_char_bit;
+        unsigned nbytes = alignTo(onumbits, integerPartWidth) / host_char_bit;
         integerPart *parts = (integerPart*)alloca(nbytes);
         APFloat::opStatus status = a.convertToInteger(MutableArrayRef<integerPart>(parts, nbytes), onumbits, isSigned, rounding_mode, &isVeryExact);
         memcpy(pr, parts, onumbytes);
@@ -360,69 +363,78 @@ void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr,
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMFPtoSI(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    LLVMFPtoInt(numbits, pa, onumbits, pr, true, NULL);
+void LLVMFPtoSI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
+    LLVMFPtoInt(ty, pa, oty, pr, true, NULL);
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMFPtoUI(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    LLVMFPtoInt(numbits, pa, onumbits, pr, false, NULL);
+void LLVMFPtoUI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
+    LLVMFPtoInt(ty, pa, oty, pr, false, NULL);
 }
 
 extern "C" JL_DLLEXPORT
-int LLVMFPtoSI_exact(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
+int LLVMFPtoSI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
     bool isExact;
-    LLVMFPtoInt(numbits, pa, onumbits, pr, true, &isExact);
+    LLVMFPtoInt(ty, pa, oty, pr, true, &isExact);
     return isExact;
 }
 
 extern "C" JL_DLLEXPORT
-int LLVMFPtoUI_exact(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
+int LLVMFPtoUI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
     bool isExact;
-    LLVMFPtoInt(numbits, pa, onumbits, pr, false, &isExact);
+    LLVMFPtoInt(ty, pa, oty, pr, false, &isExact);
     return isExact;
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMSItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
+void LLVMSItoFP(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
     double val;
     { // end scope before jl_error call
+        unsigned numbytes = jl_datatype_size(ty);
+        unsigned numbits = numbytes * host_char_bit;
         CREATE(a)
         val = a.roundToDouble(true);
     }
-    if (onumbits == 16)
-        *(uint16_t*)pr = __gnu_f2h_ieee(val);
-    else if (onumbits == 32)
+    if (oty == jl_float16_type)
+        *(uint16_t*)pr = julia_float_to_half(val);
+    else if (oty == jl_bfloat16_type)
+        *(uint16_t*)pr = julia_float_to_bfloat(val);
+    else if (oty == jl_float32_type)
         *(float*)pr = val;
-    else if (onumbits == 64)
+    else if (oty == jl_float64_type)
         *(double*)pr = val;
     else
         jl_error("SItoFP: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64");
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMUItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
+void LLVMUItoFP(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr) {
     double val;
     { // end scope before jl_error call
+        unsigned numbytes = jl_datatype_size(ty);
+        unsigned numbits = numbytes * host_char_bit;
         CREATE(a)
         val = a.roundToDouble(false);
     }
-    if (onumbits == 16)
-        *(uint16_t*)pr = __gnu_f2h_ieee(val);
-    else if (onumbits == 32)
+    if (oty == jl_float16_type)
+        *(uint16_t*)pr = julia_float_to_half(val);
+    else if (oty == jl_bfloat16_type)
+        *(uint16_t*)pr = julia_float_to_bfloat(val);
+    else if (oty == jl_float32_type)
         *(float*)pr = val;
-    else if (onumbits == 64)
+    else if (oty == jl_float64_type)
         *(double*)pr = val;
     else
         jl_error("UItoFP: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64");
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMSExt(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    if (!(onumbits > inumbits))
+void LLVMSExt(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *otys, integerPart *pr) {
+    unsigned inumbytes = jl_datatype_size(ty);
+    unsigned onumbytes = jl_datatype_size(otys);
+    if (!(onumbytes > inumbytes))
         jl_error("SExt: output bitsize must be > input bitsize");
-    unsigned inumbytes = RoundUpToAlignment(inumbits, host_char_bit) / host_char_bit;
-    unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
+    unsigned inumbits = inumbytes * host_char_bit;
     int bits = (0 - inumbits) % host_char_bit;
     int signbit = (inumbits - 1) % host_char_bit;
     int sign = ((unsigned char*)pa)[inumbytes - 1] & (1 << signbit) ? -1 : 0;
@@ -437,11 +449,12 @@ void LLVMSExt(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMZExt(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    if (!(onumbits > inumbits))
+void LLVMZExt(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *otys, integerPart *pr) {
+    unsigned inumbytes = jl_datatype_size(ty);
+    unsigned onumbytes = jl_datatype_size(otys);
+    if (!(onumbytes > inumbytes))
         jl_error("ZExt: output bitsize must be > input bitsize");
-    unsigned inumbytes = RoundUpToAlignment(inumbits, host_char_bit) / host_char_bit;
-    unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
+    unsigned inumbits = inumbytes * host_char_bit;
     int bits = (0 - inumbits) % host_char_bit;
     // copy over the input bytes
     memcpy(pr, pa, inumbytes);
@@ -454,10 +467,11 @@ void LLVMZExt(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart
 }
 
 extern "C" JL_DLLEXPORT
-void LLVMTrunc(unsigned inumbits, integerPart *pa, unsigned onumbits, integerPart *pr) {
-    if (!(onumbits < inumbits))
+void LLVMTrunc(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *otys, integerPart *pr) {
+    unsigned inumbytes = jl_datatype_size(ty);
+    unsigned onumbytes = jl_datatype_size(otys);
+    if (!(onumbytes < inumbytes))
         jl_error("Trunc: output bitsize must be < input bitsize");
-    unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
     memcpy(pr, pa, onumbytes);
 }
 
diff --git a/src/APInt-C.h b/src/APInt-C.h
index e71d49e82e99a..816d40ccc6529 100644
--- a/src/APInt-C.h
+++ b/src/APInt-C.h
@@ -3,10 +3,12 @@
 #ifndef JL_APINT_C_H
 #define JL_APINT_C_H
 
+#include "julia.h"
+#include "dtypes.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
-#include "dtypes.h"
 
 #ifdef LLVM_VERSION_MAJOR
 using integerPart = llvm::APInt::WordType;
@@ -57,16 +59,16 @@ JL_DLLEXPORT unsigned LLVMCountTrailingZeros(unsigned numbits, integerPart *pa);
 JL_DLLEXPORT unsigned LLVMCountLeadingOnes(unsigned numbits, integerPart *pa);
 JL_DLLEXPORT unsigned LLVMCountLeadingZeros(unsigned numbits, integerPart *pa);
 
-JL_DLLEXPORT void LLVMFPtoSI(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMFPtoUI(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMSItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMUItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMSExt(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMZExt(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT void LLVMTrunc(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
+JL_DLLEXPORT void LLVMFPtoSI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMFPtoUI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMSItoFP(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMUItoFP(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMSExt(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMZExt(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT void LLVMTrunc(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
 
-JL_DLLEXPORT int LLVMFPtoSI_exact(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
-JL_DLLEXPORT int LLVMFPtoUI_exact(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr);
+JL_DLLEXPORT int LLVMFPtoSI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
+JL_DLLEXPORT int LLVMFPtoUI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr);
 
 JL_DLLEXPORT void jl_LLVMSMod(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
 JL_DLLEXPORT void jl_LLVMFlipSign(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr);
diff --git a/src/Makefile b/src/Makefile
index c62cf3dde1ec2..747090a8dcd83 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,7 +1,6 @@
 SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 JULIAHOME := $(abspath $(SRCDIR)/..)
 BUILDDIR := .
-include $(JULIAHOME)/deps/Versions.make
 include $(JULIAHOME)/Make.inc
 include $(JULIAHOME)/deps/llvm-ver.make
 
@@ -43,10 +42,10 @@ endif
 
 SRCS := \
 	jltypes gf typemap smallintset ast builtins module interpreter symbol \
-	dlload sys init task array dump staticdata toplevel jl_uv datatype \
-	simplevector runtime_intrinsics precompile jloptions \
-	threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \
-	jlapi signal-handling safepoint timing subtype rtutils \
+	dlload sys init task array genericmemory staticdata toplevel jl_uv datatype \
+	simplevector runtime_intrinsics precompile jloptions mtarraylist \
+	threading scheduler stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \
+	jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \
 	crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall
 
 RT_LLVMLINK :=
@@ -57,7 +56,7 @@ CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop llvm
 	llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering llvm-ptls \
 	llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
 	llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \
-	llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures
+	llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures pipeline llvm_api
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
 CG_LLVM_LIBS := all
 ifeq ($(USE_POLLY),1)
@@ -80,6 +79,10 @@ endif
 
 RT_LLVM_LIBS := support
 
+ifeq ($(LLVM_VER_MAJ),16)
+RT_LLVM_LIBS += targetparser
+endif
+
 ifeq ($(OS),WINNT)
 SRCS += win32_ucontext
 endif
@@ -92,6 +95,7 @@ endif
 else
 DTRACE_HEADERS :=
 endif
+.SECONDARY: $(addprefix $(BUILDDIR)/,$(DTRACE_HEADERS))
 
 # headers are used for dependency tracking, while public headers will be part of the dist
 UV_HEADERS :=
@@ -99,7 +103,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0)
 UV_HEADERS += uv.h
 UV_HEADERS += uv/*.h
 endif
-PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
+PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
 ifeq ($(OS),WINNT)
 PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
 endif
@@ -117,24 +121,32 @@ endif
 
 ifeq ($(JULIACODEGEN),LLVM)
 ifneq ($(USE_SYSTEM_LLVM),0)
+# USE_SYSTEM_LLVM != 0
 CG_LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs --system-libs)
+LLVM_SHLIB_SYMBOL_VERSION := $(shell nm -D --with-symbol-versions $(shell $(LLVM_CONFIG_HOST) --libfiles --link-shared | awk '{print $1; exit}') | \
+                               grep _ZN4llvm3Any6TypeId | head -n 1 | sed -e 's/.*@//')
+
 # HACK: llvm-config doesn't correctly point to shared libs on all platforms
 #       https://github.com/JuliaLang/julia/issues/29981
 else
+# USE_SYSTEM_LLVM == 0
 ifneq ($(USE_LLVM_SHLIB),1)
+# USE_LLVM_SHLIB != 1
 CG_LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs $(CG_LLVM_LIBS) --link-static) $($(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --system-libs 2> /dev/null)
 else
+# USE_LLVM_SHLIB == 1
 ifeq ($(OS), Darwin)
 CG_LLVMLINK += $(LLVM_LDFLAGS) -lLLVM
 else
-CG_LLVMLINK += $(LLVM_LDFLAGS) -lLLVM-14jl
-endif
-endif
-endif
+CG_LLVMLINK += $(LLVM_LDFLAGS) $(LLVM_SHARED_LINK_FLAG)
+endif # OS
+endif # USE_LLVM_SHLIB
+endif # USE_SYSTEM_LLVM
+
 ifeq ($(USE_LLVM_SHLIB),1)
 FLAGS += -DLLVM_SHLIB
 endif # USE_LLVM_SHLIB == 1
-endif
+endif # JULIACODEGEN == LLVM
 
 RT_LLVM_LINK_ARGS := $(shell $(LLVM_CONFIG_HOST) --libs $(RT_LLVM_LIBS) --system-libs --link-static)
 RT_LLVMLINK += $(LLVM_LDFLAGS) $(RT_LLVM_LINK_ARGS)
@@ -145,16 +157,19 @@ endif
 CLANG_LDFLAGS := $(LLVM_LDFLAGS)
 ifeq ($(OS), Darwin)
 CLANG_LDFLAGS += -Wl,-undefined,dynamic_lookup
-OSLIBS += $(SRCDIR)/mach_dyld_atfork.tbd
+OSLIBS += -Wl,-U,__dyld_atfork_parent -Wl,-U,__dyld_atfork_prepare -Wl,-U,__dyld_dlopen_atfork_parent -Wl,-U,__dyld_dlopen_atfork_prepare -Wl,-U,_jl_image_pointers -Wl,-U,_jl_system_image_data -Wl,-U,_jl_system_image_size
+LIBJULIA_PATH_REL := @rpath/libjulia
+else
+LIBJULIA_PATH_REL := libjulia
 endif
 
 COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir)
-RT_LIBS := $(LIBUV) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS)
-CG_LIBS := $(NO_WHOLE_ARCHIVE) $(LIBUV) $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS)
+RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI)
+CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI)
 RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS)
-CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(WHOLE_ARCHIVE) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug
+CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug
 RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(RT_LIBS)
-CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(WHOLE_ARCHIVE) $(CG_LIBS) -ljulia -ljulia-internal
+CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia -ljulia-internal
 
 OBJS := $(SRCS:%=$(BUILDDIR)/%.o)
 DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
@@ -162,16 +177,18 @@ DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
 CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o)
 CODEGEN_DOBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.dbg.obj)
 
-DEBUGFLAGS += $(FLAGS) -DLIBRARY_EXPORTS
-SHIPFLAGS += $(FLAGS) -DLIBRARY_EXPORTS
-
-# if not absolute, then relative to the directory of the julia executable
-SHIPFLAGS  += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys.$(SHLIB_EXT)\""
-DEBUGFLAGS += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys-debug.$(SHLIB_EXT)\""
-
 # Add SONAME defines so we can embed proper `dlopen()` calls.
-SHIPFLAGS  += "-DJL_LIBJULIA_SONAME=\"libjulia.$(JL_MAJOR_SHLIB_EXT)\""       "-DJL_LIBJULIA_INTERNAL_SONAME=\"libjulia-internal.$(JL_MAJOR_SHLIB_EXT)\""
-DEBUGFLAGS += "-DJL_LIBJULIA_SONAME=\"libjulia-debug.$(JL_MAJOR_SHLIB_EXT)\"" "-DJL_LIBJULIA_INTERNAL_SONAME=\"libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT)\""
+ADDL_SHIPFLAGS  := "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys.$(SHLIB_EXT)\"" \
+                   "-DJL_LIBJULIA_SONAME=\"$(LIBJULIA_PATH_REL).$(JL_MAJOR_SHLIB_EXT)\""
+ADDL_DEBUGFLAGS := "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys-debug.$(SHLIB_EXT)\"" \
+                   "-DJL_LIBJULIA_SONAME=\"$(LIBJULIA_PATH_REL)-debug.$(JL_MAJOR_SHLIB_EXT)\""
+
+SHIPFLAGS         += $(FLAGS) $(ADDL_SHIPFLAGS)
+DEBUGFLAGS        += $(FLAGS) $(ADDL_DEBUGFLAGS)
+SHIPFLAGS_GCC     += $(FLAGS) $(ADDL_SHIPFLAGS)
+DEBUGFLAGS_GCC    += $(FLAGS) $(ADDL_DEBUGFLAGS)
+SHIPFLAGS_CLANG   += $(FLAGS) $(ADDL_SHIPFLAGS)
+DEBUGFLAGS_CLANG  += $(FLAGS) $(ADDL_DEBUGFLAGS)
 
 ifeq ($(USE_CROSS_FLISP), 1)
 FLISPDIR := $(BUILDDIR)/flisp/host
@@ -239,6 +256,8 @@ $(build_includedir)/julia/uv/*.h: $(LIBUV_INC)/uv/*.h | $(build_includedir)/juli
 	$(INSTALL_F) $^ $(build_includedir)/julia/uv
 
 libccalltest: $(build_shlibdir)/libccalltest.$(SHLIB_EXT)
+libccalllazyfoo: $(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT)
+libccalllazybar: $(build_shlibdir)/libccalllazybar.$(SHLIB_EXT)
 libllvmcalltest: $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT)
 
 ifeq ($(OS), Linux)
@@ -263,6 +282,12 @@ endif
 	mv $@.tmp $@
 	$(INSTALL_NAME_CMD)libccalltest.$(SHLIB_EXT) $@
 
+$(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT): $(SRCDIR)/ccalllazyfoo.c
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,ccalllazyfoo.$(SHLIB_EXT)))
+
+$(build_shlibdir)/libccalllazybar.$(SHLIB_EXT): $(SRCDIR)/ccalllazybar.c $(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT)
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,ccalllazybar.$(SHLIB_EXT)) -lccalllazyfoo)
+
 $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvmcalltest.cpp $(LLVM_CONFIG_ABSOLUTE)
 	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(FLAGS) $(CPPFLAGS) $(CXXFLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(NO_WHOLE_ARCHIVE) $(CG_LLVMLINK)) -lpthread
 
@@ -279,40 +304,44 @@ $(BUILDDIR)/julia_flisp.boot: $(addprefix $(SRCDIR)/,jlfrontend.scm flisp/aliase
 
 # additional dependency links
 $(BUILDDIR)/codegen-stubs.o $(BUILDDIR)/codegen-stubs.dbg.obj: $(SRCDIR)/intrinsics.h
-$(BUILDDIR)/aotcompile.o $(BUILDDIR)/aotcompile.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h $(SRCDIR)/debug-registry.h
+$(BUILDDIR)/aotcompile.o $(BUILDDIR)/aotcompile.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/processor.h
 $(BUILDDIR)/ast.o $(BUILDDIR)/ast.dbg.obj: $(BUILDDIR)/julia_flisp.boot.inc $(SRCDIR)/flisp/*.h
-$(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/builtin_proto.h
+$(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/idset.c $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
-	intrinsics.cpp jitlayers.h debug-registry.h intrinsics.h codegen_shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h builtin_proto.h)
+	intrinsics.cpp jitlayers.h intrinsics.h llvm-codegen-shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h builtin_proto.h)
+$(BUILDDIR)/datatype.o $(BUILDDIR)/datatype.dbg.obj: $(SRCDIR)/support/htable.h $(SRCDIR)/support/htable.inc
 $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h)
 $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
-$(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h)
 $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
 $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
-$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h
+$(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h
 $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h
-$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h $(SRCDIR)/debug-registry.h
+$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h
 $(BUILDDIR)/jltypes.o $(BUILDDIR)/jltypes.dbg.obj: $(SRCDIR)/builtin_proto.h
-$(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/codegen_shared.h $(BUILDDIR)/julia_version.h
-$(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
-$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
-$(BUILDDIR)/llvm-cpufeatures.o $(BUILDDIR)/llvm-cpufeatures.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/debug-registry.h
-$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
-$(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/codegen_shared.h
-$(BUILDDIR)/llvm-julia-licm.o $(BUILDDIR)/llvm-julia-licm.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-alloc-helpers.h $(SRCDIR)/llvm-pass-helpers.h
-$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
-$(BUILDDIR)/llvm-lower-handlers.o $(BUILDDIR)/llvm-lower-handlers.dbg.obj: $(SRCDIR)/codegen_shared.h
-$(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/processor.h
-$(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
-$(BUILDDIR)/llvm-propagate-addrspaces.o $(BUILDDIR)/llvm-propagate-addrspaces.dbg.obj: $(SRCDIR)/codegen_shared.h
-$(BUILDDIR)/llvm-remove-addrspaces.o $(BUILDDIR)/llvm-remove-addrspaces.dbg.obj: $(SRCDIR)/codegen_shared.h
-$(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/codegen_shared.h
+$(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvm-codegen-shared.h $(BUILDDIR)/julia_version.h
+$(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
+$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
+$(BUILDDIR)/llvm-cpufeatures.o $(BUILDDIR)/llvm-cpufeatures.dbg.obj: $(SRCDIR)/jitlayers.h
+$(BUILDDIR)/llvm-demote-float16.o $(BUILDDIR)/llvm-demote-float16.dbg.obj: $(SRCDIR)/jitlayers.h
+$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-julia-licm.o $(BUILDDIR)/llvm-julia-licm.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-alloc-helpers.h $(SRCDIR)/llvm-pass-helpers.h
+$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-lower-handlers.o $(BUILDDIR)/llvm-lower-handlers.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/processor.h
+$(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-propagate-addrspaces.o $(BUILDDIR)/llvm-propagate-addrspaces.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-remove-addrspaces.o $(BUILDDIR)/llvm-remove-addrspaces.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
 $(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h)
 $(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c)
-$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h
+$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/staticdata_utils.c $(SRCDIR)/precompile_utils.c $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/toplevel.o $(BUILDDIR)/toplevel.dbg.obj: $(SRCDIR)/builtin_proto.h
-$(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h
+$(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h $(SRCDIR)/common_symbols1.inc $(SRCDIR)/common_symbols2.inc
+$(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/passes.h $(SRCDIR)/jitlayers.h
 
 $(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h)
 $(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h
@@ -330,10 +359,10 @@ $(FLISP_EXECUTABLE_release): $(BUILDDIR)/flisp/libflisp.a
 $(FLISP_EXECUTABLE_debug): $(BUILDDIR)/flisp/libflisp-debug.a
 	$(MAKE) -C $(BUILDDIR)/flisp $(subst $(abspath $(BUILDDIR)/flisp)/,,$(abspath $(FLISP_EXECUTABLE_debug)))
 
-$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/flisp/,*.h *.c) $(BUILDDIR)/support/libsupport.a
+$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/flisp/,*.h *.c) $(BUILDDIR)/support/libsupport.a $(BUILDDIR)/julia.expmap
 	$(MAKE) -C $(SRCDIR)/flisp BUILDDIR='$(abspath $(BUILDDIR)/flisp)'
 
-$(BUILDDIR)/flisp/libflisp-debug.a: $(addprefix $(SRCDIR)/,flisp/*.h flisp/*.c) $(BUILDDIR)/support/libsupport-debug.a
+$(BUILDDIR)/flisp/libflisp-debug.a: $(addprefix $(SRCDIR)/,flisp/*.h flisp/*.c) $(BUILDDIR)/support/libsupport-debug.a $(BUILDDIR)/julia.expmap
 	$(MAKE) -C $(SRCDIR)/flisp debug BUILDDIR='$(abspath $(BUILDDIR)/flisp)'
 
 $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION
@@ -341,7 +370,7 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION
 	@echo "#ifndef JL_VERSION_H" >> $@.$(JULIA_BUILD_MODE).tmp
 	@echo "#define JL_VERSION_H" >> $@.$(JULIA_BUILD_MODE).tmp
 	@echo "#define JULIA_VERSION_STRING" \"$(JULIA_VERSION)\" >> $@.$(JULIA_BUILD_MODE).tmp
-	@echo $(JULIA_VERSION) | awk 'BEGIN {FS="[.,-]"} \
+	@echo $(JULIA_VERSION) | awk 'BEGIN {FS="[.,+-]"} \
 	{print "#define JULIA_VERSION_MAJOR " $$1 "\n" \
 	"#define JULIA_VERSION_MINOR " $$2 "\n" \
 	"#define JULIA_VERSION_PATCH " $$3 ; \
@@ -351,13 +380,17 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION
 
 CXXLD = $(CXX) -shared
 
-$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV)
+$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in
+	sed <'$<' >'$@' -e "s/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/" \
+		        -e "s/@LLVM_SHLIB_SYMBOL_VERSION@/$(LLVM_SHLIB_SYMBOL_VERSION)/"
+
+$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \
 		$(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
 
-$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV)
+$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(RPATH_LIB) -o $@ \
 		$(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@
@@ -370,6 +403,8 @@ $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libj
 $(build_shlibdir)/libjulia-internal.$(SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(SHLIB_EXT): $(build_shlibdir)/libjulia-internal%.$(SHLIB_EXT): \
 		$(build_shlibdir)/libjulia-internal%.$(JL_MAJOR_MINOR_SHLIB_EXT)
 	@$(call PRINT_LINK, ln -sf $(notdir $<) $@)
+$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(build_shlibdir)/libjulia-internal.$(SHLIB_EXT)
+$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(build_shlibdir)/libjulia-internal-debug.$(SHLIB_EXT)
 libjulia-internal-release: $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal.$(SHLIB_EXT)
 libjulia-internal-debug: $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(SHLIB_EXT)
 endif
@@ -377,13 +412,13 @@ libjulia-internal-release: $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_
 libjulia-internal-debug: $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)
 libjulia-internal-debug libjulia-internal-release: $(PUBLIC_HEADER_TARGETS)
 
-$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(CODEGEN_OBJS) $(BUILDDIR)/support/libsupport.a $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT)
+$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(CODEGEN_OBJS) $(BUILDDIR)/support/libsupport.a $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(CODEGEN_OBJS) $(RPATH_LIB) -o $@ \
 		$(JLDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-codegen.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
 
-$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(CODEGEN_DOBJS) $(BUILDDIR)/support/libsupport-debug.a $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)
+$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(CODEGEN_DOBJS) $(BUILDDIR)/support/libsupport-debug.a $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(CODEGEN_DOBJS) $(RPATH_LIB) -o $@ \
 		$(JLDFLAGS) $(JLIBLDFLAGS) $(CG_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-codegen-debug.$(SHLIB_EXT) $@
@@ -403,10 +438,17 @@ libjulia-codegen-release: $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SH
 libjulia-codegen-debug: $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)
 libjulia-codegen-debug libjulia-codegen-release: $(PUBLIC_HEADER_TARGETS)
 
+# set the exports for the source files based on where they are getting linked
+$(OBJS): SHIPFLAGS += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(DOBJS): DEBUGFLAGS += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(CODEGEN_OBJS): SHIPFLAGS += -DJL_LIBRARY_EXPORTS_CODEGEN
+$(CODEGEN_DOBJS): DEBUGFLAGS += -DJL_LIBRARY_EXPORTS_CODEGEN
+
 clean:
 	-rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libjulia-codegen* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest*
 	-rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc
 	-rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen
+	-rm -f $(BUILDDIR)/julia.expmap
 	-rm -f $(BUILDDIR)/julia_version.h
 
 clean-flisp:
@@ -426,8 +468,14 @@ $(build_shlibdir)/lib%Plugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/%.cpp $(LLVM_CONFIG
 # before attempting this static analysis, so that all necessary headers
 # and dependencies are properly installed:
 #   make -C src install-analysis-deps
+ANALYSIS_DEPS := llvm clang llvm-tools libuv utf8proc
+ifeq ($(OS),Darwin)
+ANALYSIS_DEPS += llvmunwind
+else ifneq ($(OS),WINNT)
+ANALYSIS_DEPS += unwind
+endif
 install-analysis-deps:
-	$(MAKE) -C $(JULIAHOME)/deps install-llvm install-clang install-llvm-tools install-libuv install-utf8proc install-unwind
+	$(MAKE) -C $(JULIAHOME)/deps $(addprefix install-,$(ANALYSIS_DEPS))
 
 analyzegc-deps-check: $(BUILDDIR)/julia_version.h $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc
 ifeq ($(USE_BINARYBUILDER_LLVM),0)
@@ -445,8 +493,6 @@ SA_EXCEPTIONS-jloptions.c                   := -Xanalyzer -analyzer-config -Xana
 SA_EXCEPTIONS-subtype.c                     := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core.uninitialized.Assign;core.UndefinedBinaryOperatorResult"
 SA_EXCEPTIONS-codegen.c                     := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core"
  # these need to be annotated (and possibly fixed)
-SKIP_IMPLICIT_ATOMICS := dump.c module.c staticdata.c codegen.cpp
- # these need to be annotated (and possibly fixed)
 SKIP_GC_CHECK := codegen.cpp rtutils.c
 
 # make sure LLVM's invariant information is not discarded with -DNDEBUG
@@ -455,40 +501,46 @@ clang-sagc-%: $(SRCDIR)/%.c $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) .F
 	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text --analyzer-no-default-checks \
 		-Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker \
 		$(SA_EXCEPTIONS-$(notdir $<)) \
-		$(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -x c $<)
+		$(CLANGSA_FLAGS) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG) $(JL_CFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -x c $<)
 clang-sagc-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
 	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text --analyzer-no-default-checks \
 		-Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker \
 		$(SA_EXCEPTIONS-$(notdir $<)) \
-		$(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -x c++ $<)
+		$(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -x c++ $<)
 
 clang-sa-%: JL_CXXFLAGS += -UNDEBUG
 clang-sa-%: $(SRCDIR)/%.c .FORCE | analyzegc-deps-check
 	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text \
 		-Xanalyzer -analyzer-disable-checker=deadcode.DeadStores \
 		$(SA_EXCEPTIONS-$(notdir $<)) \
-		$(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -Werror -x c $<)
+		$(CLANGSA_FLAGS) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG) $(JL_CFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -Werror -x c $<)
 clang-sa-%: $(SRCDIR)/%.cpp .FORCE | analyzegc-deps-check
 	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text \
 		-Xanalyzer -analyzer-disable-checker=deadcode.DeadStores \
 		$(SA_EXCEPTIONS-$(notdir $<)) \
-		$(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -Werror -x c++ $<)
+		$(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -Werror -x c++ $<)
 
 clang-tidy-%: $(SRCDIR)/%.c $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
 	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang-tidy $< -header-filter='.*' --quiet \
 		-load $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \
-		-- $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -fno-caret-diagnostics -x c)
+		-- $(CLANGSA_FLAGS) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG) $(JL_CFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -fno-caret-diagnostics -x c)
 clang-tidy-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
 	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang-tidy $< -header-filter='.*' --quiet \
 		-load $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \
-		-- $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics --system-header-prefix=llvm -Wno-deprecated-declarations -fno-caret-diagnostics -x c++)
+		-- $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics --system-header-prefix=llvm -Wno-deprecated-declarations -fno-caret-diagnostics -x c++)
 
+# set the exports for the source files based on where they are getting linked
+$(addprefix clang-sa-,$(SRCS)):      DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(addprefix clang-sagc-,$(SRCS)):    DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(addprefix clang-tidy-,$(SRCS)):    DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS_INTERNAL
+$(addprefix clang-sa-,$(CODEGEN_SRCS)):   DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS_CODEGEN
+$(addprefix clang-sagc-,$(CODEGEN_SRCS)): DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS_CODEGEN
+$(addprefix clang-tidy-,$(CODEGEN_SRCS)): DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS_CODEGEN
 
 # Add C files as a target of `analyzesrc` and `analyzegc` and `tidysrc`
-tidysrc: $(addprefix clang-tidy-,$(filter-out $(basename $(SKIP_IMPLICIT_ATOMICS)),$(CODEGEN_SRCS) $(SRCS)))
+tidysrc: $(addprefix clang-tidy-,$(CODEGEN_SRCS) $(SRCS))
 analyzesrc: $(addprefix clang-sa-,$(CODEGEN_SRCS) $(SRCS))
 analyzegc: $(addprefix clang-sagc-,$(filter-out $(basename $(SKIP_GC_CHECK)),$(CODEGEN_SRCS) $(SRCS)))
-analyzegc: analyzesrc tidysrc # TODO: remove me (depended on by CI currently)
 analyze: analyzesrc analyzegc tidysrc
 
 clean-analyzegc:
diff --git a/src/abi_aarch64.cpp b/src/abi_aarch64.cpp
index 1a3f160329c6c..7c31b6606139a 100644
--- a/src/abi_aarch64.cpp
+++ b/src/abi_aarch64.cpp
@@ -43,9 +43,11 @@ Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
     // the homogeneity check.
     jl_datatype_t *ft0 = (jl_datatype_t*)jl_field_type(dt, 0);
     // `ft0` should be a `VecElement` type and the true element type
-    // should be a primitive type
-    if (ft0->name != jl_vecelement_typename ||
-        ((jl_datatype_t*)jl_field_type(ft0, 0))->layout->nfields)
+    // should be a primitive type (nfields == 0)
+    if (!jl_is_datatype(ft0) || ft0->name != jl_vecelement_typename)
+        return nullptr;
+    jl_datatype_t *ft00 = (jl_datatype_t*)jl_field_type(ft0, 0);
+    if (!jl_is_datatype(ft00) || ft00->layout->nfields)
         return nullptr;
     for (size_t i = 1; i < nfields; i++) {
         if (jl_field_type(dt, i) != (jl_value_t*)ft0) {
@@ -86,7 +88,7 @@ Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 Type *get_llvm_fp_or_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    if (dt->name->mutabl || dt->layout->npointers || dt->layout->haspadding)
+    if (dt->name->mutabl || dt->layout->npointers || dt->layout->flags.haspadding)
         return nullptr;
     return dt->layout->nfields ? get_llvm_vectype(dt, ctx) : get_llvm_fptype(dt, ctx);
 }
@@ -120,15 +122,17 @@ bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele, L
         // For composite types, find the first non zero sized member
         size_t i;
         size_t fieldsz;
-        for (i = 0;i < nfields;i++) {
+        for (i = 0; i < nfields; i++) {
             if ((fieldsz = jl_field_size(dt, i))) {
                 break;
             }
         }
         assert(i < nfields);
-        // If there's only one non zero sized member, try again on this member
+        // If there's only one non-zero sized member, try again on this member
         if (fieldsz == dsz) {
             dt = (jl_datatype_t*)jl_field_type(dt, i);
+            if (!jl_is_datatype(dt))
+                return false;
             continue;
         }
         if (Type *vectype = get_llvm_vectype(dt, ctx)) {
@@ -140,11 +144,13 @@ bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele, L
             return true;
         }
         // Otherwise, process each members
-        for (;i < nfields;i++) {
+        for (; i < nfields; i++) {
             size_t fieldsz = jl_field_size(dt, i);
             if (fieldsz == 0)
                 continue;
             jl_datatype_t *fieldtype = (jl_datatype_t*)jl_field_type(dt, i);
+            if (!jl_is_datatype(dt))
+                return false;
             // Check element count.
             // This needs to be done after the zero size member check
             if (nele > 3 || !isHFAorHVA(fieldtype, fieldsz, nele, ele, ctx)) {
@@ -178,7 +184,7 @@ Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele, LLVMContext &ctx) const
     // uniquely addressable members.
     // Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector)
     size_t dsz = jl_datatype_size(dt);
-    if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->haspadding)
+    if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->flags.haspadding)
         return NULL;
     nele = 0;
     ElementType eltype;
diff --git a/src/abi_arm.cpp b/src/abi_arm.cpp
index 4987d07657ae6..68f980d7b40da 100644
--- a/src/abi_arm.cpp
+++ b/src/abi_arm.cpp
@@ -82,7 +82,7 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base, LLVMContext &ctx) const
     if (jl_is_structtype(dt)) {
         // Fast path checks before descending the type hierarchy
         // (4 x 128b vector == 64B max size)
-        if (jl_datatype_size(dt) > 64 || dt->layout->npointers || dt->layout->haspadding)
+        if (jl_datatype_size(dt) > 64 || dt->layout->npointers || dt->layout->flags.haspadding)
             return 0;
 
         base = NULL;
@@ -91,6 +91,8 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base, LLVMContext &ctx) const
         size_t parent_members = jl_datatype_nfields(dt);
         for (size_t i = 0; i < parent_members; ++i) {
             jl_datatype_t *fdt = (jl_datatype_t*)jl_field_type(dt,i);
+            if (!jl_is_datatype(fdt))
+                return 0;
 
             Type *T = isLegalHAType(fdt, ctx);
             if (T)
diff --git a/src/abi_ppc64le.cpp b/src/abi_ppc64le.cpp
index 016eebd455525..1f10817cfeeee 100644
--- a/src/abi_ppc64le.cpp
+++ b/src/abi_ppc64le.cpp
@@ -44,6 +44,9 @@ struct ABI_PPC64leLayout : AbiLayout {
 // count the homogeneous floating aggregate size (saturating at max count of 8)
 unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const
 {
+    if (jl_datatype_size(ty) > 128 || ty->layout->npointers || ty->layout->flags.haspadding)
+        return 9;
+
     size_t i, l = ty->layout->nfields;
     // handle homogeneous float aggregates
     if (l == 0) {
@@ -52,7 +55,7 @@ unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const
         *hva = false;
         if (*ty0 == NULL)
             *ty0 = ty;
-        else if (*hva || ty->size != (*ty0)->size)
+        else if (*hva || jl_datatype_size(ty) != jl_datatype_size(*ty0))
             return 9;
         return 1;
     }
@@ -69,7 +72,7 @@ unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const
         *hva = true;
         if (*ty0 == NULL)
             *ty0 = ty;
-        else if (!*hva || ty->size != (*ty0)->size)
+        else if (!*hva || jl_datatype_size(ty) != jl_datatype_size(*ty0))
             return 9;
         for (i = 1; i < l; i++) {
             jl_datatype_t *fld = (jl_datatype_t*)jl_field_type(ty, i);
@@ -115,7 +118,12 @@ bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *T
 Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     // Arguments are either scalar or passed by value
-    size_t size = jl_datatype_size(dt);
+
+    // LLVM passes Float16 in floating-point registers, but this doesn't match the ABI.
+    // No C compiler seems to support _Float16 yet, so in the meantime, pass as i16
+    if (dt == jl_float16_type || dt == jl_bfloat16_type)
+        return Type::getInt16Ty(ctx);
+
     // don't need to change bitstypes
     if (!jl_datatype_nfields(dt))
         return NULL;
@@ -140,6 +148,7 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const
     }
     // rewrite integer-sized (non-HFA) struct to an array
     // the bitsize of the integer gives the desired alignment
+    size_t size = jl_datatype_size(dt);
     if (size > 8) {
         if (jl_datatype_align(dt) <= 8) {
             Type  *T_int64 = Type::getInt64Ty(ctx);
diff --git a/src/abi_win32.cpp b/src/abi_win32.cpp
index 078d9b6df4e44..ccfc6a16ebee3 100644
--- a/src/abi_win32.cpp
+++ b/src/abi_win32.cpp
@@ -52,7 +52,7 @@ bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *Ty) override
 {
     // Use pass by reference for all structs
-    if (dt->layout->nfields > 0) {
+    if (dt->layout->nfields > 0 || dt->layout->npointers) {
         ab.addByValAttr(Ty);
         return true;
     }
@@ -63,7 +63,7 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const
 {
     // Arguments are either scalar or passed by value
     // rewrite integer sized (non-sret) struct to the corresponding integer
-    if (!dt->layout->nfields)
+    if (!dt->layout->nfields && !dt->layout->npointers)
         return NULL;
     return Type::getIntNTy(ctx, jl_datatype_nbits(dt));
 }
diff --git a/src/abi_x86_64.cpp b/src/abi_x86_64.cpp
index 43e539b8386ce..6a853421dbccd 100644
--- a/src/abi_x86_64.cpp
+++ b/src/abi_x86_64.cpp
@@ -118,7 +118,8 @@ struct Classification {
 void classifyType(Classification& accum, jl_datatype_t *dt, uint64_t offset) const
 {
     // Floating point types
-    if (dt == jl_float64_type || dt == jl_float32_type) {
+    if (dt == jl_float64_type || dt == jl_float32_type || dt == jl_float16_type ||
+        dt == jl_bfloat16_type) {
         accum.addField(offset, Sse);
     }
     // Misc types
@@ -147,12 +148,16 @@ void classifyType(Classification& accum, jl_datatype_t *dt, uint64_t offset) con
         accum.addField(offset, Sse);
     }
     // Other struct types
-    else if (jl_datatype_size(dt) <= 16 && dt->layout) {
+    else if (jl_datatype_size(dt) <= 16 && dt->layout && !jl_is_layout_opaque(dt->layout)) {
         size_t i;
         for (i = 0; i < jl_datatype_nfields(dt); ++i) {
             jl_value_t *ty = jl_field_type(dt, i);
             if (jl_field_isptr(dt, i))
                 ty = (jl_value_t*)jl_voidpointer_type;
+            else if (!jl_is_datatype(ty)) { // inline union
+                accum.addField(offset, Memory);
+                continue;
+            }
             classifyType(accum, (jl_datatype_t*)ty, offset + jl_field_offset(dt, i));
         }
     }
@@ -235,7 +240,9 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const
                 types[0] = Type::getIntNTy(ctx, nbits);
             break;
         case Sse:
-            if (size <= 4)
+            if (size <= 2)
+                types[0] = Type::getHalfTy(ctx);
+            else if (size <= 4)
                 types[0] = Type::getFloatTy(ctx);
             else
                 types[0] = Type::getDoubleTy(ctx);
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 13872b29322a3..be70016e45f21 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -5,14 +5,11 @@
 
 // target support
 #include <llvm/ADT/Triple.h>
+#include <llvm/ADT/Statistic.h>
 #include <llvm/Analysis/TargetLibraryInfo.h>
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/IR/DataLayout.h>
-#if JL_LLVM_VERSION >= 140000
 #include <llvm/MC/TargetRegistry.h>
-#else
-#include <llvm/Support/TargetRegistry.h>
-#endif
 #include <llvm/Target/TargetMachine.h>
 
 // analysis passes
@@ -20,18 +17,22 @@
 #include <llvm/Analysis/BasicAliasAnalysis.h>
 #include <llvm/Analysis/TypeBasedAliasAnalysis.h>
 #include <llvm/Analysis/ScopedNoAliasAA.h>
+#include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Transforms/IPO.h>
 #include <llvm/Transforms/Scalar.h>
 #include <llvm/Transforms/Vectorize.h>
 #include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
+#include <llvm/Transforms/Instrumentation/MemorySanitizer.h>
 #include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
 #include <llvm/Transforms/Scalar/GVN.h>
 #include <llvm/Transforms/IPO/AlwaysInliner.h>
 #include <llvm/Transforms/InstCombine/InstCombine.h>
 #include <llvm/Transforms/Scalar/InstSimplifyPass.h>
+#include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
 #include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Passes/PassPlugin.h>
 #if defined(USE_POLLY)
@@ -46,45 +47,51 @@
 // for outputting code
 #include <llvm/Bitcode/BitcodeWriter.h>
 #include <llvm/Bitcode/BitcodeWriterPass.h>
+#include <llvm/Bitcode/BitcodeReader.h>
 #include "llvm/Object/ArchiveWriter.h"
 #include <llvm/IR/IRPrintingPasses.h>
 
 #include <llvm/IR/LegacyPassManagers.h>
 #include <llvm/Transforms/Utils/Cloning.h>
+#include <llvm/Support/FormatAdapters.h>
+#include <llvm/Linker/Linker.h>
 
 
 using namespace llvm;
 
-#include "julia.h"
-#include "julia_internal.h"
 #include "jitlayers.h"
+#include "serialize.h"
 #include "julia_assert.h"
+#include "processor.h"
+
+#define DEBUG_TYPE "julia_aotcompile"
+
+STATISTIC(CICacheLookups, "Number of codeinst cache lookups");
+STATISTIC(CreateNativeCalls, "Number of jl_create_native calls made");
+STATISTIC(CreateNativeMethods, "Number of methods compiled for jl_create_native");
+STATISTIC(CreateNativeMax, "Max number of methods compiled at once for jl_create_native");
+STATISTIC(CreateNativeGlobals, "Number of globals compiled for jl_create_native");
 
-template<class T> // for GlobalObject's
-static T *addComdat(T *G)
+static void addComdat(GlobalValue *G, Triple &T)
 {
-#if defined(_OS_WINDOWS_)
-    if (!G->isDeclaration()) {
+    if (T.isOSBinFormatCOFF() && !G->isDeclaration()) {
         // add __declspec(dllexport) to everything marked for export
-        if (G->getLinkage() == GlobalValue::ExternalLinkage)
-            G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
-        else
-            G->setDLLStorageClass(GlobalValue::DefaultStorageClass);
+        assert(G->hasExternalLinkage() && "Cannot set DLLExport on non-external linkage!");
+        G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
     }
-#endif
-    return G;
 }
 
 
 typedef struct {
     orc::ThreadSafeModule M;
-    std::vector<GlobalValue*> jl_sysimg_fvars;
-    std::vector<GlobalValue*> jl_sysimg_gvars;
+    SmallVector<GlobalValue*, 0> jl_sysimg_fvars;
+    SmallVector<GlobalValue*, 0> jl_sysimg_gvars;
     std::map<jl_code_instance_t*, std::tuple<uint32_t, uint32_t>> jl_fvar_map;
-    std::map<void*, int32_t> jl_value_to_llvm; // uses 1-based indexing
+    SmallVector<void*, 0> jl_value_to_llvm;
+    SmallVector<jl_code_instance_t*, 0> jl_external_to_llvm;
 } jl_native_code_desc_t;
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst,
         int32_t *func_idx, int32_t *specfunc_idx)
 {
@@ -98,21 +105,25 @@ void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst,
     }
 }
 
-extern "C" JL_DLLEXPORT
-int32_t jl_get_llvm_gv_impl(void *native_code, jl_value_t *p)
+extern "C" JL_DLLEXPORT_CODEGEN
+void jl_get_llvm_gvs_impl(void *native_code, arraylist_t *gvs)
 {
-    // map a jl_value_t memory location to a GlobalVariable
+    // map a memory location (jl_value_t or jl_binding_t) to a GlobalVariable
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
-    if (data) {
-        auto it = data->jl_value_to_llvm.find(p);
-        if (it != data->jl_value_to_llvm.end()) {
-            return it->second;
-        }
-    }
-    return 0;
+    arraylist_grow(gvs, data->jl_value_to_llvm.size());
+    memcpy(gvs->items, data->jl_value_to_llvm.data(), gvs->len * sizeof(void*));
+}
+
+extern "C" JL_DLLEXPORT_CODEGEN
+void jl_get_llvm_external_fns_impl(void *native_code, arraylist_t *external_fns)
+{
+    jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
+    arraylist_grow(external_fns, data->jl_external_to_llvm.size());
+    memcpy(external_fns->items, data->jl_external_to_llvm.data(),
+        external_fns->len * sizeof(jl_code_instance_t*));
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 LLVMOrcThreadSafeModuleRef jl_get_llvm_module_impl(void *native_code)
 {
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
@@ -122,7 +133,7 @@ LLVMOrcThreadSafeModuleRef jl_get_llvm_module_impl(void *native_code)
         return NULL;
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 GlobalValue* jl_get_llvm_function_impl(void *native_code, uint32_t idx)
 {
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
@@ -133,22 +144,24 @@ GlobalValue* jl_get_llvm_function_impl(void *native_code, uint32_t idx)
 }
 
 
-static void emit_offset_table(Module &mod, const std::vector<GlobalValue*> &vars, StringRef name, Type *T_psize)
+static void emit_offset_table(Module &mod, ArrayRef<GlobalValue*> vars,
+                              StringRef name, Type *T_psize)
 {
     // Emit a global variable with all the variable addresses.
     // The cloning pass will convert them into offsets.
-    assert(!vars.empty());
     size_t nvars = vars.size();
-    std::vector<Constant*> addrs(nvars);
+    SmallVector<Constant*, 0> addrs(nvars);
     for (size_t i = 0; i < nvars; i++) {
         Constant *var = vars[i];
         addrs[i] = ConstantExpr::getBitCast(var, T_psize);
     }
     ArrayType *vars_type = ArrayType::get(T_psize, nvars);
-    new GlobalVariable(mod, vars_type, true,
+    auto GV = new GlobalVariable(mod, vars_type, true,
                        GlobalVariable::ExternalLinkage,
                        ConstantArray::get(vars_type, addrs),
                        name);
+    GV->setVisibility(GlobalValue::HiddenVisibility);
+    GV->setDSOLocal(true);
 }
 
 static bool is_safe_char(unsigned char c)
@@ -212,28 +225,31 @@ static void makeSafeName(GlobalObject &G)
 
 static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance_t *mi, size_t world, jl_code_instance_t **ci_out, jl_code_info_t **src_out)
 {
+    ++CICacheLookups;
     jl_value_t *ci = cgparams.lookup(mi, world, world);
     JL_GC_PROMISE_ROOTED(ci);
     jl_code_instance_t *codeinst = NULL;
     if (ci != jl_nothing) {
         codeinst = (jl_code_instance_t*)ci;
-        *src_out = (jl_code_info_t*)codeinst->inferred;
+        *src_out = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
         jl_method_t *def = codeinst->def->def.method;
         if ((jl_value_t*)*src_out == jl_nothing)
             *src_out = NULL;
         if (*src_out && jl_is_method(def))
-            *src_out = jl_uncompress_ir(def, codeinst, (jl_array_t*)*src_out);
+            *src_out = jl_uncompress_ir(def, codeinst, (jl_value_t*)*src_out);
     }
     if (*src_out == NULL || !jl_is_code_info(*src_out)) {
-        if (cgparams.lookup != jl_rettype_inferred) {
+        if (cgparams.lookup != jl_rettype_inferred_addr) {
             jl_error("Refusing to automatically run type inference with custom cache lookup.");
         }
         else {
             *src_out = jl_type_infer(mi, world, 0);
             if (*src_out) {
-                codeinst = jl_get_method_inferred(mi, (*src_out)->rettype, (*src_out)->min_world, (*src_out)->max_world);
-                if ((*src_out)->inferred && !codeinst->inferred)
-                    codeinst->inferred = jl_nothing;
+                codeinst = jl_get_codeinst_for_src(mi, *src_out);
+                if ((*src_out)->inferred) {
+                    jl_value_t *null = nullptr;
+                    jl_atomic_cmpswap_relaxed(&codeinst->inferred, &null, jl_nothing);
+                }
             }
         }
     }
@@ -243,41 +259,56 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup, and can
 // also be used be extern consumers like GPUCompiler.jl to obtain a module containing
-// all reachable & inferrrable functions. The `policy` flag switches between the default
-// mode `0`, the extern mode `1`, and imaging mode `2`.
-extern "C" JL_DLLEXPORT
-void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy)
+// all reachable & inferrrable functions.
+// The `policy` flag switches between the default mode `0` and the extern mode `1` used by GPUCompiler.
+// `_imaging_mode` controls if raw pointers can be embedded (e.g. the code will be loaded into the same session).
+// `_external_linkage` create linkages between pkgimages.
+extern "C" JL_DLLEXPORT_CODEGEN
+void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage, size_t _world)
 {
+    JL_TIMING(NATIVE_AOT, NATIVE_Create);
+    ++CreateNativeCalls;
+    CreateNativeMax.updateMax(jl_array_nrows(methods));
     if (cgparams == NULL)
         cgparams = &jl_default_cgparams;
     jl_native_code_desc_t *data = new jl_native_code_desc_t;
     CompilationPolicy policy = (CompilationPolicy) _policy;
-    bool imaging = imaging_default() || policy == CompilationPolicy::ImagingMode;
-    jl_workqueue_t emitted;
+    bool imaging = imaging_default() || _imaging_mode == 1;
     jl_method_instance_t *mi = NULL;
     jl_code_info_t *src = NULL;
     JL_GC_PUSH1(&src);
-    JL_LOCK(&jl_codegen_lock);
+    auto ct = jl_current_task;
+    bool timed = (ct->reentrant_timing & 1) == 0;
+    if (timed)
+        ct->reentrant_timing |= 1;
     orc::ThreadSafeContext ctx;
     orc::ThreadSafeModule backing;
     if (!llvmmod) {
         ctx = jl_ExecutionEngine->acquireContext();
-        backing = jl_create_llvm_module("text", ctx, imaging);
+        backing = jl_create_ts_module("text", ctx);
     }
     orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing;
     auto ctxt = clone.getContext();
-    jl_codegen_params_t params(ctxt);
-    params.params = cgparams;
+
     uint64_t compiler_start_time = 0;
     uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
     if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
 
-    params.imaging = imaging;
-
     // compile all methods for the current world and type-inference world
-    size_t compile_for[] = { jl_typeinf_world, jl_atomic_load_acquire(&jl_world_counter) };
+
+    JL_LOCK(&jl_codegen_lock);
+    auto target_info = clone.withModuleDo([&](Module &M) {
+        return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
+    });
+    jl_codegen_params_t params(ctxt, std::move(target_info.first), std::move(target_info.second));
+    params.params = cgparams;
+    params.imaging_mode = imaging;
+    params.debug_level = cgparams->debug_info_level;
+    params.external_linkage = _external_linkage;
+    size_t compile_for[] = { jl_typeinf_world, _world };
     for (int worlds = 0; worlds < 2; worlds++) {
+        JL_TIMING(NATIVE_AOT, NATIVE_Codegen);
         params.world = compile_for[worlds];
         if (!params.world)
             continue;
@@ -285,7 +316,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
         if (policy != CompilationPolicy::Default && params.world == jl_typeinf_world)
             continue;
         size_t i, l;
-        for (i = 0, l = jl_array_len(methods); i < l; i++) {
+        for (i = 0, l = jl_array_nrows(methods); i < l; i++) {
             // each item in this list is either a MethodInstance indicating something
             // to compile, or an svec(rettype, sig) describing a C-callable alias to create.
             jl_value_t *item = jl_array_ptr_ref(methods, i);
@@ -303,63 +334,96 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
                 // find and prepare the source code to compile
                 jl_code_instance_t *codeinst = NULL;
                 jl_ci_cache_lookup(*cgparams, mi, params.world, &codeinst, &src);
-                if (src && !emitted.count(codeinst)) {
+                if (src && !params.compiled_functions.count(codeinst)) {
                     // now add it to our compilation results
                     JL_GC_PROMISE_ROOTED(codeinst->rettype);
-                    orc::ThreadSafeModule result_m = jl_create_llvm_module(name_from_method_instance(codeinst->def),
-                            params.tsctx, params.imaging,
-                            clone.getModuleUnlocked()->getDataLayout(),
+                    orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(codeinst->def),
+                            params.tsctx, clone.getModuleUnlocked()->getDataLayout(),
                             Triple(clone.getModuleUnlocked()->getTargetTriple()));
                     jl_llvm_functions_t decls = jl_emit_code(result_m, mi, src, codeinst->rettype, params);
                     if (result_m)
-                        emitted[codeinst] = {std::move(result_m), std::move(decls)};
+                        params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
                 }
             }
         }
 
         // finally, make sure all referenced methods also get compiled or fixed up
-        jl_compile_workqueue(emitted, *clone.getModuleUnlocked(), params, policy);
+        jl_compile_workqueue(params, policy);
     }
+    JL_UNLOCK(&jl_codegen_lock); // Might GC
     JL_GC_POP();
 
     // process the globals array, before jl_merge_module destroys them
-    std::vector<std::string> gvars;
-
-    for (auto &global : params.globals) {
-        gvars.push_back(std::string(global.second->getName()));
-        data->jl_value_to_llvm[global.first] = gvars.size();
+    SmallVector<std::string, 0> gvars(params.global_targets.size());
+    data->jl_value_to_llvm.resize(params.global_targets.size());
+    StringSet<> gvars_names;
+    DenseSet<GlobalValue *> gvars_set;
+
+    size_t idx = 0;
+    for (auto &global : params.global_targets) {
+        gvars[idx] = global.second->getName().str();
+        global.second->setInitializer(literal_static_pointer_val(global.first, global.second->getValueType()));
+        assert(gvars_set.insert(global.second).second && "Duplicate gvar in params!");
+        assert(gvars_names.insert(gvars[idx]).second && "Duplicate gvar name in params!");
+        data->jl_value_to_llvm[idx] = global.first;
+        idx++;
+    }
+    CreateNativeMethods += params.compiled_functions.size();
+
+    size_t offset = gvars.size();
+    data->jl_external_to_llvm.resize(params.external_fns.size());
+
+    for (auto &extern_fn : params.external_fns) {
+        jl_code_instance_t *this_code = std::get<0>(extern_fn.first);
+        bool specsig = std::get<1>(extern_fn.first);
+        assert(specsig && "Error external_fns doesn't handle non-specsig yet");
+        (void) specsig;
+        GlobalVariable *F = extern_fn.second;
+        size_t idx = gvars.size() - offset;
+        assert(idx >= 0);
+        assert(idx < data->jl_external_to_llvm.size());
+        data->jl_external_to_llvm[idx] = this_code;
+        assert(gvars_set.insert(F).second && "Duplicate gvar in params!");
+        assert(gvars_names.insert(F->getName()).second && "Duplicate gvar name in params!");
+        gvars.push_back(std::string(F->getName()));
     }
 
     // clones the contents of the module `m` to the shadow_output collector
     // while examining and recording what kind of function pointer we have
-    for (auto &def : emitted) {
-        jl_merge_module(clone, std::move(std::get<0>(def.second)));
-        jl_code_instance_t *this_code = def.first;
-        jl_llvm_functions_t decls = std::get<1>(def.second);
-        StringRef func = decls.functionObject;
-        StringRef cfunc = decls.specFunctionObject;
-        uint32_t func_id = 0;
-        uint32_t cfunc_id = 0;
-        if (func == "jl_fptr_args") {
-            func_id = -1;
-        }
-        else if (func == "jl_fptr_sparam") {
-            func_id = -2;
-        }
-        else {
-            //Safe b/c context is locked by params
-            data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(func)));
-            func_id = data->jl_sysimg_fvars.size();
+    {
+        JL_TIMING(NATIVE_AOT, NATIVE_Merge);
+        Linker L(*clone.getModuleUnlocked());
+        for (auto &def : params.compiled_functions) {
+            jl_merge_module(clone, std::move(std::get<0>(def.second)));
+            jl_code_instance_t *this_code = def.first;
+            jl_llvm_functions_t decls = std::get<1>(def.second);
+            StringRef func = decls.functionObject;
+            StringRef cfunc = decls.specFunctionObject;
+            uint32_t func_id = 0;
+            uint32_t cfunc_id = 0;
+            if (func == "jl_fptr_args") {
+                func_id = -1;
+            }
+            else if (func == "jl_fptr_sparam") {
+                func_id = -2;
+            }
+            else {
+                //Safe b/c context is locked by params
+                data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(func)));
+                func_id = data->jl_sysimg_fvars.size();
+            }
+            if (!cfunc.empty()) {
+                //Safe b/c context is locked by params
+                data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(cfunc)));
+                cfunc_id = data->jl_sysimg_fvars.size();
+            }
+            data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id);
         }
-        if (!cfunc.empty()) {
-            //Safe b/c context is locked by params
-            data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(cfunc)));
-            cfunc_id = data->jl_sysimg_fvars.size();
+        if (params._shared_module) {
+            bool error = L.linkInModule(std::move(params._shared_module));
+            assert(!error && "Error linking in shared module");
+            (void)error;
         }
-        data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id);
-    }
-    if (params._shared_module) {
-        jl_merge_module(clone, std::move(params._shared_module));
     }
 
     // now get references to the globals in the merged module
@@ -367,21 +431,24 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     for (auto &global : gvars) {
         //Safe b/c context is locked by params
         GlobalVariable *G = cast<GlobalVariable>(clone.getModuleUnlocked()->getNamedValue(global));
-        G->setInitializer(ConstantPointerNull::get(cast<PointerType>(G->getValueType())));
-        G->setLinkage(GlobalVariable::InternalLinkage);
+        assert(G->hasInitializer());
+        G->setLinkage(GlobalValue::InternalLinkage);
+        G->setDSOLocal(true);
         data->jl_sysimg_gvars.push_back(G);
     }
+    CreateNativeGlobals += gvars.size();
 
     //Safe b/c context is locked by params
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-    // setting the function personality enables stack unwinding and catching exceptions
-    // so make sure everything has something set
-    Type *T_int32 = Type::getInt32Ty(clone.getModuleUnlocked()->getContext());
-    Function *juliapersonality_func =
-       Function::Create(FunctionType::get(T_int32, true),
-           Function::ExternalLinkage, "__julia_personality", clone.getModuleUnlocked());
-    juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
-#endif
+    auto TT = Triple(clone.getModuleUnlocked()->getTargetTriple());
+    Function *juliapersonality_func = nullptr;
+    if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
+        // setting the function personality enables stack unwinding and catching exceptions
+        // so make sure everything has something set
+        Type *T_int32 = Type::getInt32Ty(clone.getModuleUnlocked()->getContext());
+        juliapersonality_func = Function::Create(FunctionType::get(T_int32, true),
+            Function::ExternalLinkage, "__julia_personality", clone.getModuleUnlocked());
+        juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
+    }
 
     // move everything inside, now that we've merged everything
     // (before adding the exported headers)
@@ -389,37 +456,33 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
         //Safe b/c context is locked by params
         for (GlobalObject &G : clone.getModuleUnlocked()->global_objects()) {
             if (!G.isDeclaration()) {
-                G.setLinkage(Function::InternalLinkage);
+                G.setLinkage(GlobalValue::InternalLinkage);
+                G.setDSOLocal(true);
                 makeSafeName(G);
-                addComdat(&G);
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-                // Add unwind exception personalities to functions to handle async exceptions
-                if (Function *F = dyn_cast<Function>(&G))
-                    F->setPersonalityFn(juliapersonality_func);
-#endif
+                if (Function *F = dyn_cast<Function>(&G)) {
+                    if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
+                        // Add unwind exception personalities to functions to handle async exceptions
+                        F->setPersonalityFn(juliapersonality_func);
+                    }
+                }
             }
         }
     }
 
     data->M = std::move(clone);
-    if (measure_compile_time_enabled)
-        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
+    if (timed) {
+        if (measure_compile_time_enabled) {
+            auto end = jl_hrtime();
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+        }
+        ct->reentrant_timing &= ~1ull;
+    }
     if (ctx.getContext()) {
         jl_ExecutionEngine->releaseContext(std::move(ctx));
     }
-    JL_UNLOCK(&jl_codegen_lock); // Might GC
     return (void*)data;
 }
 
-
-static void emit_result(std::vector<NewArchiveMember> &Archive, SmallVectorImpl<char> &OS,
-        StringRef Name, std::vector<std::string> &outputs)
-{
-    outputs.push_back({ OS.data(), OS.size() });
-    Archive.push_back(NewArchiveMember(MemoryBufferRef(outputs.back(), Name)));
-    OS.clear();
-}
-
 static object::Archive::Kind getDefaultForHost(Triple &triple)
 {
       if (triple.isOSDarwin())
@@ -434,570 +497,1387 @@ static void reportWriterError(const ErrorInfoBase &E)
     jl_safe_printf("ERROR: failed to emit output file %s\n", err.c_str());
 }
 
+static void injectCRTAlias(Module &M, StringRef name, StringRef alias, FunctionType *FT)
+{
+    Function *target = M.getFunction(alias);
+    if (!target) {
+        target = Function::Create(FT, Function::ExternalLinkage, alias, M);
+    }
+    Function *interposer = Function::Create(FT, Function::InternalLinkage, name, M);
+    appendToCompilerUsed(M, {interposer});
+
+    llvm::IRBuilder<> builder(BasicBlock::Create(M.getContext(), "top", interposer));
+    SmallVector<Value *, 4> CallArgs;
+    for (auto &arg : interposer->args())
+        CallArgs.push_back(&arg);
+    auto val = builder.CreateCall(target, CallArgs);
+    builder.CreateRet(val);
+}
+
+void multiversioning_preannotate(Module &M);
+
+// See src/processor.h for documentation about this table. Corresponds to jl_image_shard_t.
+static GlobalVariable *emit_shard_table(Module &M, Type *T_size, Type *T_psize, unsigned threads) {
+    SmallVector<Constant *, 0> tables(sizeof(jl_image_shard_t) / sizeof(void *) * threads);
+    for (unsigned i = 0; i < threads; i++) {
+        auto suffix = "_" + std::to_string(i);
+        auto create_gv = [&](StringRef name, bool constant) {
+            auto gv = new GlobalVariable(M, T_size, constant,
+                                         GlobalValue::ExternalLinkage, nullptr, name + suffix);
+            gv->setVisibility(GlobalValue::HiddenVisibility);
+            gv->setDSOLocal(true);
+            return gv;
+        };
+        auto table = tables.data() + i * sizeof(jl_image_shard_t) / sizeof(void *);
+        table[offsetof(jl_image_shard_t, fvar_base) / sizeof(void*)] = create_gv("jl_fvar_base", false);
+        table[offsetof(jl_image_shard_t, fvar_offsets) / sizeof(void*)] = create_gv("jl_fvar_offsets", true);
+        table[offsetof(jl_image_shard_t, fvar_idxs) / sizeof(void*)] = create_gv("jl_fvar_idxs", true);
+        table[offsetof(jl_image_shard_t, gvar_base) / sizeof(void*)] = create_gv("jl_gvar_base", false);
+        table[offsetof(jl_image_shard_t, gvar_offsets) / sizeof(void*)] = create_gv("jl_gvar_offsets", true);
+        table[offsetof(jl_image_shard_t, gvar_idxs) / sizeof(void*)] = create_gv("jl_gvar_idxs", true);
+        table[offsetof(jl_image_shard_t, clone_slots) / sizeof(void*)] = create_gv("jl_clone_slots", true);
+        table[offsetof(jl_image_shard_t, clone_offsets) / sizeof(void*)] = create_gv("jl_clone_offsets", true);
+        table[offsetof(jl_image_shard_t, clone_idxs) / sizeof(void*)] = create_gv("jl_clone_idxs", true);
+    }
+    auto tables_arr = ConstantArray::get(ArrayType::get(T_psize, tables.size()), tables);
+    auto tables_gv = new GlobalVariable(M, tables_arr->getType(), false,
+                                        GlobalValue::ExternalLinkage, tables_arr, "jl_shard_tables");
+    tables_gv->setVisibility(GlobalValue::HiddenVisibility);
+    tables_gv->setDSOLocal(true);
+    return tables_gv;
+}
+
+// See src/processor.h for documentation about this table. Corresponds to jl_image_ptls_t.
+static GlobalVariable *emit_ptls_table(Module &M, Type *T_size, Type *T_psize) {
+    std::array<Constant *, 3> ptls_table{
+        new GlobalVariable(M, T_size, false, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), "jl_pgcstack_func_slot"),
+        new GlobalVariable(M, T_size, false, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), "jl_pgcstack_key_slot"),
+        new GlobalVariable(M, T_size, false, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), "jl_tls_offset"),
+    };
+    for (auto &gv : ptls_table) {
+        cast<GlobalVariable>(gv)->setVisibility(GlobalValue::HiddenVisibility);
+        cast<GlobalVariable>(gv)->setDSOLocal(true);
+    }
+    auto ptls_table_arr = ConstantArray::get(ArrayType::get(T_psize, ptls_table.size()), ptls_table);
+    auto ptls_table_gv = new GlobalVariable(M, ptls_table_arr->getType(), false,
+                                            GlobalValue::ExternalLinkage, ptls_table_arr, "jl_ptls_table");
+    ptls_table_gv->setVisibility(GlobalValue::HiddenVisibility);
+    ptls_table_gv->setDSOLocal(true);
+    return ptls_table_gv;
+}
+
+// See src/processor.h for documentation about this table. Corresponds to jl_image_header_t.
+static GlobalVariable *emit_image_header(Module &M, unsigned threads, unsigned nfvars, unsigned ngvars) {
+    constexpr uint32_t version = 1;
+    std::array<uint32_t, 4> header{
+        version,
+        threads,
+        nfvars,
+        ngvars,
+    };
+    auto header_arr = ConstantDataArray::get(M.getContext(), header);
+    auto header_gv = new GlobalVariable(M, header_arr->getType(), false,
+                                        GlobalValue::InternalLinkage, header_arr, "jl_image_header");
+    return header_gv;
+}
+
+// Grab fvars and gvars data from the module
+static void get_fvars_gvars(Module &M, DenseMap<GlobalValue *, unsigned> &fvars, DenseMap<GlobalValue *, unsigned> &gvars) {
+    auto fvars_gv = M.getGlobalVariable("jl_fvars");
+    auto gvars_gv = M.getGlobalVariable("jl_gvars");
+    auto fvars_idxs = M.getGlobalVariable("jl_fvar_idxs");
+    auto gvars_idxs = M.getGlobalVariable("jl_gvar_idxs");
+    assert(fvars_gv);
+    assert(gvars_gv);
+    assert(fvars_idxs);
+    assert(gvars_idxs);
+    auto fvars_init = cast<ConstantArray>(fvars_gv->getInitializer());
+    auto gvars_init = cast<ConstantArray>(gvars_gv->getInitializer());
+    for (unsigned i = 0; i < fvars_init->getNumOperands(); ++i) {
+        auto gv = cast<GlobalValue>(fvars_init->getOperand(i)->stripPointerCasts());
+        assert(gv && gv->hasName() && "fvar must be a named global");
+        assert(!fvars.count(gv) && "Duplicate fvar");
+        fvars[gv] = i;
+    }
+    assert(fvars.size() == fvars_init->getNumOperands());
+    for (unsigned i = 0; i < gvars_init->getNumOperands(); ++i) {
+        auto gv = cast<GlobalValue>(gvars_init->getOperand(i)->stripPointerCasts());
+        assert(gv && gv->hasName() && "gvar must be a named global");
+        assert(!gvars.count(gv) && "Duplicate gvar");
+        gvars[gv] = i;
+    }
+    assert(gvars.size() == gvars_init->getNumOperands());
+    fvars_gv->eraseFromParent();
+    gvars_gv->eraseFromParent();
+    fvars_idxs->eraseFromParent();
+    gvars_idxs->eraseFromParent();
+}
+
+// Weight computation
+// It is important for multithreaded image building to be able to split work up
+// among the threads equally. The weight calculated here is an estimation of
+// how expensive a particular function is going to be to compile.
+
+struct FunctionInfo {
+    size_t weight;
+    size_t bbs;
+    size_t insts;
+    size_t clones;
+};
+
+static FunctionInfo getFunctionWeight(const Function &F)
+{
+    FunctionInfo info;
+    info.weight = 1;
+    info.bbs = F.size();
+    info.insts = 0;
+    info.clones = 1;
+    for (const BasicBlock &BB : F) {
+        info.insts += BB.size();
+    }
+    if (F.hasFnAttribute("julia.mv.clones")) {
+        auto val = F.getFnAttribute("julia.mv.clones").getValueAsString();
+        // base16, so must be at most 4 * length bits long
+        // popcount gives number of clones
+        info.clones = APInt(val.size() * 4, val, 16).countPopulation() + 1;
+    }
+    info.weight += info.insts;
+    // more basic blocks = more complex than just sum of insts,
+    // add some weight to it
+    info.weight += info.bbs;
+    info.weight *= info.clones;
+    return info;
+}
+
+struct ModuleInfo {
+    Triple triple;
+    size_t globals;
+    size_t funcs;
+    size_t bbs;
+    size_t insts;
+    size_t clones;
+    size_t weight;
+};
+
+ModuleInfo compute_module_info(Module &M) {
+    ModuleInfo info;
+    info.triple = Triple(M.getTargetTriple());
+    info.globals = 0;
+    info.funcs = 0;
+    info.bbs = 0;
+    info.insts = 0;
+    info.clones = 0;
+    info.weight = 0;
+    for (auto &G : M.global_values()) {
+        if (G.isDeclaration()) {
+            continue;
+        }
+        info.globals++;
+        if (auto F = dyn_cast<Function>(&G)) {
+            info.funcs++;
+            auto func_info = getFunctionWeight(*F);
+            info.bbs += func_info.bbs;
+            info.insts += func_info.insts;
+            info.clones += func_info.clones;
+            info.weight += func_info.weight;
+        } else {
+            info.weight += 1;
+        }
+    }
+    return info;
+}
+
+struct Partition {
+    StringMap<bool> globals;
+    StringMap<unsigned> fvars;
+    StringMap<unsigned> gvars;
+    size_t weight;
+};
+
+static bool canPartition(const GlobalValue &G) {
+    if (auto F = dyn_cast<Function>(&G)) {
+        if (F->hasFnAttribute(Attribute::AlwaysInline))
+            return false;
+    }
+    return true;
+}
+
+static inline bool verify_partitioning(const SmallVectorImpl<Partition> &partitions, const Module &M, size_t fvars_size, size_t gvars_size) {
+    bool bad = false;
+#ifndef JL_NDEBUG
+    SmallVector<uint32_t, 0> fvars(fvars_size);
+    SmallVector<uint32_t, 0> gvars(gvars_size);
+    StringMap<uint32_t> GVNames;
+    for (uint32_t i = 0; i < partitions.size(); i++) {
+        for (auto &name : partitions[i].globals) {
+            if (GVNames.count(name.getKey())) {
+                bad = true;
+                dbgs() << "Duplicate global name " << name.getKey() << " in partitions " << i << " and " << GVNames[name.getKey()] << "\n";
+            }
+            GVNames[name.getKey()] = i;
+        }
+        for (auto &fvar : partitions[i].fvars) {
+            if (fvars[fvar.second] != 0) {
+                bad = true;
+                dbgs() << "Duplicate fvar " << fvar.first() << " in partitions " << i << " and " << fvars[fvar.second] - 1 << "\n";
+            }
+            fvars[fvar.second] = i+1;
+        }
+        for (auto &gvar : partitions[i].gvars) {
+            if (gvars[gvar.second] != 0) {
+                bad = true;
+                dbgs() << "Duplicate gvar " << gvar.first() << " in partitions " << i << " and " << gvars[gvar.second] - 1 << "\n";
+            }
+            gvars[gvar.second] = i+1;
+        }
+    }
+    for (auto &GV : M.global_values()) {
+        if (GV.isDeclaration()) {
+            if (GVNames.count(GV.getName())) {
+                bad = true;
+                dbgs() << "Global " << GV.getName() << " is a declaration but is in partition " << GVNames[GV.getName()] << "\n";
+            }
+        } else {
+            // Local global values are not partitioned
+            if (!canPartition(GV)) {
+                if (GVNames.count(GV.getName())) {
+                    bad = true;
+                    dbgs() << "Shouldn't have partitioned " << GV.getName() << ", but is in partition " << GVNames[GV.getName()] << "\n";
+                }
+                continue;
+            }
+            if (!GVNames.count(GV.getName())) {
+                bad = true;
+                dbgs() << "Global " << GV << " not in any partition\n";
+            }
+            for (ConstantUses<GlobalValue> uses(const_cast<GlobalValue*>(&GV), const_cast<Module&>(M)); !uses.done(); uses.next()) {
+                auto val = uses.get_info().val;
+                if (!GVNames.count(val->getName())) {
+                    bad = true;
+                    dbgs() << "Global " << val->getName() << " used by " << GV.getName() << ", which is not in any partition\n";
+                    continue;
+                }
+                if (GVNames[val->getName()] != GVNames[GV.getName()]) {
+                    bad = true;
+                    dbgs() << "Global " << val->getName() << " used by " << GV.getName() << ", which is in partition " << GVNames[GV.getName()] << " but " << val->getName() << " is in partition " << GVNames[val->getName()] << "\n";
+                }
+            }
+        }
+    }
+    for (uint32_t i = 0; i < fvars_size; i++) {
+        if (fvars[i] == 0) {
+            bad = true;
+            dbgs() << "fvar " << i << " not in any partition\n";
+        }
+    }
+    for (uint32_t i = 0; i < gvars_size; i++) {
+        if (gvars[i] == 0) {
+            bad = true;
+            dbgs() << "gvar " << i << " not in any partition\n";
+        }
+    }
+#endif
+    return !bad;
+}
+
+// Chop a module up as equally as possible by weight into threads partitions
+static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
+    //Start by stripping fvars and gvars, which helpfully removes their uses as well
+    DenseMap<GlobalValue *, unsigned> fvars, gvars;
+    get_fvars_gvars(M, fvars, gvars);
+
+    // Partition by union-find, since we only have def->use traversal right now
+    struct Partitioner {
+        struct Node {
+            GlobalValue *GV;
+            unsigned parent;
+            unsigned size;
+            size_t weight;
+        };
+        SmallVector<Node, 0> nodes;
+        DenseMap<GlobalValue *, unsigned> node_map;
+        unsigned merged;
+
+        unsigned make(GlobalValue *GV, size_t weight) {
+            unsigned idx = nodes.size();
+            nodes.push_back({GV, idx, 1, weight});
+            node_map[GV] = idx;
+            return idx;
+        }
+
+        unsigned find(unsigned idx) {
+            while (nodes[idx].parent != idx) {
+                nodes[idx].parent = nodes[nodes[idx].parent].parent;
+                idx = nodes[idx].parent;
+            }
+            return idx;
+        }
+
+        unsigned merge(unsigned x, unsigned y) {
+            x = find(x);
+            y = find(y);
+            if (x == y)
+                return x;
+            if (nodes[x].size < nodes[y].size)
+                std::swap(x, y);
+            nodes[y].parent = x;
+            nodes[x].size += nodes[y].size;
+            nodes[x].weight += nodes[y].weight;
+            merged++;
+            return x;
+        }
+    };
+
+    Partitioner partitioner;
+
+    for (auto &G : M.global_values()) {
+        if (G.isDeclaration())
+            continue;
+        if (!canPartition(G))
+            continue;
+        // Currently ccallable global aliases have extern linkage, we only want to make the
+        // internally linked functions/global variables extern+hidden
+        if (G.hasLocalLinkage()) {
+            G.setLinkage(GlobalValue::ExternalLinkage);
+            G.setVisibility(GlobalValue::HiddenVisibility);
+        }
+        if (auto F = dyn_cast<Function>(&G)) {
+            partitioner.make(&G, getFunctionWeight(*F).weight);
+        } else {
+            partitioner.make(&G, 1);
+        }
+    }
+
+    // Merge all uses to go together into the same partition
+    for (unsigned i = 0; i < partitioner.nodes.size(); ++i) {
+        for (ConstantUses<GlobalValue> uses(partitioner.nodes[i].GV, M); !uses.done(); uses.next()) {
+            auto val = uses.get_info().val;
+            auto idx = partitioner.node_map.find(val);
+            // This can fail if we can't partition a global, but it uses something we can partition
+            // This should be fixed by altering canPartition to not permit partitioning this global
+            assert(idx != partitioner.node_map.end());
+            partitioner.merge(i, idx->second);
+        }
+    }
+
+    SmallVector<Partition, 32> partitions(threads);
+    // always get the smallest partition first
+    auto pcomp = [](const Partition *p1, const Partition *p2) {
+        return p1->weight > p2->weight;
+    };
+    std::priority_queue<Partition *, SmallVector<Partition *, 0>, decltype(pcomp)> pq(pcomp);
+    for (unsigned i = 0; i < threads; ++i) {
+        pq.push(&partitions[i]);
+    }
+
+    SmallVector<unsigned, 0> idxs(partitioner.nodes.size());
+    std::iota(idxs.begin(), idxs.end(), 0);
+    std::sort(idxs.begin(), idxs.end(), [&](unsigned a, unsigned b) {
+        //because roots have more weight than their children,
+        //we can sort by weight and get the roots first
+        return partitioner.nodes[a].weight > partitioner.nodes[b].weight;
+    });
+
+    // Assign the root of each partition to a partition, then assign its children to the same one
+    for (unsigned idx = 0; idx < idxs.size(); ++idx) {
+        auto i = idxs[idx];
+        auto root = partitioner.find(i);
+        assert(root == i || partitioner.nodes[root].weight == 0);
+        if (partitioner.nodes[root].weight) {
+            auto &node = partitioner.nodes[root];
+            auto &P = *pq.top();
+            pq.pop();
+            auto name = node.GV->getName();
+            P.globals.insert({name, true});
+            if (fvars.count(node.GV))
+                P.fvars[name] = fvars[node.GV];
+            if (gvars.count(node.GV))
+                P.gvars[name] = gvars[node.GV];
+            P.weight += node.weight;
+            node.weight = 0;
+            node.size = &P - partitions.data();
+            pq.push(&P);
+        }
+        if (root != i) {
+            auto &node = partitioner.nodes[i];
+            assert(node.weight != 0);
+            // we assigned its root already, so just add it to the root's partition
+            // don't touch the priority queue, since we're not changing the weight
+            auto &P = partitions[partitioner.nodes[root].size];
+            auto name = node.GV->getName();
+            P.globals.insert({name, true});
+            if (fvars.count(node.GV))
+                P.fvars[name] = fvars[node.GV];
+            if (gvars.count(node.GV))
+                P.gvars[name] = gvars[node.GV];
+            node.weight = 0;
+            node.size = partitioner.nodes[root].size;
+        }
+    }
+
+    bool verified = verify_partitioning(partitions, M, fvars.size(), gvars.size());
+    assert(verified && "Partitioning failed to partition globals correctly");
+    (void) verified;
+
+    return partitions;
+}
+
+struct ImageTimer {
+    uint64_t elapsed = 0;
+    std::string name;
+    std::string desc;
+
+    void startTimer() {
+        elapsed = jl_hrtime();
+    }
+
+    void stopTimer() {
+        elapsed = jl_hrtime() - elapsed;
+    }
+
+    void init(const Twine &name, const Twine &desc) {
+        this->name = name.str();
+        this->desc = desc.str();
+    }
+
+    operator bool() const {
+        return elapsed != 0;
+    }
+
+    void print(raw_ostream &out, bool clear=false) {
+        if (!*this)
+            return;
+        out << llvm::formatv("{0:F3}  ", elapsed / 1e9) << name << "  " << desc << "\n";
+        if (clear)
+            elapsed = 0;
+    }
+};
+
+struct ShardTimers {
+    ImageTimer deserialize;
+    ImageTimer materialize;
+    ImageTimer construct;
+    // impl timers
+    ImageTimer unopt;
+    ImageTimer optimize;
+    ImageTimer opt;
+    ImageTimer obj;
+    ImageTimer asm_;
+
+    std::string name;
+    std::string desc;
+
+    void print(raw_ostream &out, bool clear=false) {
+        StringRef sep = "===-------------------------------------------------------------------------===";
+        out << formatv("{0}\n{1}\n{0}\n", sep, fmt_align(name + " : " + desc, AlignStyle::Center, sep.size()));
+        auto total = deserialize.elapsed + materialize.elapsed + construct.elapsed +
+            unopt.elapsed + optimize.elapsed + opt.elapsed + obj.elapsed + asm_.elapsed;
+        out << "Time (s)  Name  Description\n";
+        deserialize.print(out, clear);
+        materialize.print(out, clear);
+        construct.print(out, clear);
+        unopt.print(out, clear);
+        optimize.print(out, clear);
+        opt.print(out, clear);
+        obj.print(out, clear);
+        asm_.print(out, clear);
+        out << llvm::formatv("{0:F3}  total  Total time taken\n", total / 1e9);
+    }
+};
+
+struct AOTOutputs {
+    SmallVector<char, 0> unopt, opt, obj, asm_;
+};
+
+// Perform the actual optimization and emission of the output files
+static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimers &timers,
+        bool unopt, bool opt, bool obj, bool asm_) {
+    assert((unopt || opt || obj || asm_) && "no output requested");
+    AOTOutputs out;
+    auto TM = std::unique_ptr<TargetMachine>(
+        SourceTM.getTarget().createTargetMachine(
+            SourceTM.getTargetTriple().str(),
+            SourceTM.getTargetCPU(),
+            SourceTM.getTargetFeatureString(),
+            SourceTM.Options,
+            SourceTM.getRelocationModel(),
+            SourceTM.getCodeModel(),
+            SourceTM.getOptLevel()));
+    fixupTM(*TM);
+    if (unopt) {
+        timers.unopt.startTimer();
+        raw_svector_ostream OS(out.unopt);
+        PassBuilder PB;
+        AnalysisManagers AM{*TM, PB, OptimizationLevel::O0};
+        ModulePassManager MPM;
+        MPM.addPass(BitcodeWriterPass(OS));
+        MPM.run(M, AM.MAM);
+        timers.unopt.stopTimer();
+    }
+    if (!opt && !obj && !asm_) {
+        return out;
+    }
+    assert(!verifyLLVMIR(M));
+
+    {
+        timers.optimize.startTimer();
+
+        auto PMTM = std::unique_ptr<TargetMachine>(
+            SourceTM.getTarget().createTargetMachine(
+                SourceTM.getTargetTriple().str(),
+                SourceTM.getTargetCPU(),
+                SourceTM.getTargetFeatureString(),
+                SourceTM.Options,
+                SourceTM.getRelocationModel(),
+                SourceTM.getCodeModel(),
+                SourceTM.getOptLevel()));
+        fixupTM(*PMTM);
+        NewPM optimizer{std::move(PMTM), getOptLevel(jl_options.opt_level), OptimizationOptions::defaults(true, true)};
+        optimizer.run(M);
+        assert(!verifyLLVMIR(M));
+        bool inject_aliases = false;
+        for (auto &F : M.functions()) {
+            if (!F.isDeclaration() && F.getName() != "_DllMainCRTStartup") {
+                inject_aliases = true;
+                break;
+            }
+        }
+        // no need to inject aliases if we have no functions
+
+        if (inject_aliases) {
+            // We would like to emit an alias or an weakref alias to redirect these symbols
+            // but LLVM doesn't let us emit a GlobalAlias to a declaration...
+            // So for now we inject a definition of these functions that calls our runtime
+            // functions. We do so after optimization to avoid cloning these functions.
+
+            // Float16 conversion routines
+            injectCRTAlias(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee",
+                    FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__extendhfsf2", "julia__gnu_h2f_ieee",
+                    FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee",
+                    FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncsfhf2", "julia__gnu_f2h_ieee",
+                    FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncdfhf2", "julia__truncdfhf2",
+                    FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
+
+            // BFloat16 conversion routines
+            injectCRTAlias(M, "__truncsfbf2", "julia__truncsfbf2",
+                    FunctionType::get(Type::getBFloatTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
+            injectCRTAlias(M, "__truncsdbf2", "julia__truncdfbf2",
+                    FunctionType::get(Type::getBFloatTy(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
+        }
+        timers.optimize.stopTimer();
+    }
+
+    if (opt) {
+        timers.opt.startTimer();
+        raw_svector_ostream OS(out.opt);
+        PassBuilder PB;
+        AnalysisManagers AM{*TM, PB, OptimizationLevel::O0};
+        ModulePassManager MPM;
+        MPM.addPass(BitcodeWriterPass(OS));
+        MPM.run(M, AM.MAM);
+        timers.opt.stopTimer();
+    }
+
+    if (obj) {
+        timers.obj.startTimer();
+        raw_svector_ostream OS(out.obj);
+        legacy::PassManager emitter;
+        addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+        if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_ObjectFile, false))
+            jl_safe_printf("ERROR: target does not support generation of object files\n");
+        emitter.run(M);
+        timers.obj.stopTimer();
+    }
+
+    if (asm_) {
+        timers.asm_.startTimer();
+        raw_svector_ostream OS(out.asm_);
+        legacy::PassManager emitter;
+        addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+        if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_AssemblyFile, false))
+            jl_safe_printf("ERROR: target does not support generation of assembly files\n");
+        emitter.run(M);
+        timers.asm_.stopTimer();
+    }
+
+    return out;
+}
+
+// serialize module to bitcode
+static auto serializeModule(const Module &M) {
+    assert(!verifyLLVMIR(M) && "Serializing invalid module!");
+    SmallVector<char, 0> ClonedModuleBuffer;
+    BitcodeWriter BCWriter(ClonedModuleBuffer);
+    BCWriter.writeModule(M);
+    BCWriter.writeSymtab();
+    BCWriter.writeStrtab();
+    return ClonedModuleBuffer;
+}
+
+// Modules are deserialized lazily by LLVM, to avoid deserializing
+// unnecessary functions. We take advantage of this by serializing
+// the entire module once, then deleting the bodies of functions
+// that are not in this partition. Once unnecessary functions are
+// deleted, we then materialize the entire module to make use-lists
+// consistent.
+static void materializePreserved(Module &M, Partition &partition) {
+    DenseSet<GlobalValue *> Preserve;
+    for (auto &Name : partition.globals) {
+        auto *GV = M.getNamedValue(Name.first());
+        assert(GV && !GV->isDeclaration() && !GV->hasLocalLinkage());
+        if (!Name.second) {
+            // We skip partitioning for internal variables, so this has
+            // the same effect as putting it in preserve.
+            // This just avoids a hashtable lookup.
+            GV->setLinkage(GlobalValue::InternalLinkage);
+            assert(GV->hasDefaultVisibility());
+        } else {
+            Preserve.insert(GV);
+        }
+    }
+
+    for (auto &F : M.functions()) {
+        if (F.isDeclaration())
+            continue;
+        if (F.hasLocalLinkage())
+            continue;
+        if (Preserve.contains(&F))
+            continue;
+        F.deleteBody();
+        F.setLinkage(GlobalValue::ExternalLinkage);
+        F.setVisibility(GlobalValue::HiddenVisibility);
+        F.setDSOLocal(true);
+    }
+
+    for (auto &GV : M.globals()) {
+        if (GV.isDeclaration())
+            continue;
+        if (Preserve.contains(&GV))
+            continue;
+        if (GV.hasLocalLinkage())
+            continue;
+        GV.setInitializer(nullptr);
+        GV.setLinkage(GlobalValue::ExternalLinkage);
+        GV.setVisibility(GlobalValue::HiddenVisibility);
+        GV.setDSOLocal(true);
+    }
+
+    // Global aliases are a pain to deal with. It is illegal to have an alias to a declaration,
+    // so we need to replace them with either a function or a global variable declaration. However,
+    // we can't just delete the alias, because that would break the users of the alias. Therefore,
+    // we do a dance where we point each global alias to a dummy function or global variable,
+    // then materialize the module to access use-lists, then replace all the uses, and finally commit
+    // to deleting the old alias.
+    SmallVector<std::pair<GlobalAlias *, GlobalValue *>> DeletedAliases;
+    for (auto &GA : M.aliases()) {
+        assert(!GA.isDeclaration() && "Global aliases can't be declarations!"); // because LLVM says so
+        if (Preserve.contains(&GA))
+            continue;
+        if (GA.hasLocalLinkage())
+            continue;
+        if (GA.getValueType()->isFunctionTy()) {
+            auto F = Function::Create(cast<FunctionType>(GA.getValueType()), GlobalValue::ExternalLinkage, "", &M);
+            // This is an extremely sad hack to make sure the global alias never points to an extern function
+            auto BB = BasicBlock::Create(M.getContext(), "", F);
+            new UnreachableInst(M.getContext(), BB);
+            GA.setAliasee(F);
+            DeletedAliases.push_back({ &GA, F });
+        }
+        else {
+            auto GV = new GlobalVariable(M, GA.getValueType(), false, GlobalValue::ExternalLinkage, Constant::getNullValue(GA.getValueType()));
+            DeletedAliases.push_back({ &GA, GV });
+        }
+    }
+
+    cantFail(M.materializeAll());
+
+    for (auto &Deleted : DeletedAliases) {
+        Deleted.second->takeName(Deleted.first);
+        Deleted.first->replaceAllUsesWith(Deleted.second);
+        Deleted.first->eraseFromParent();
+        // undo our previous sad hack
+        if (auto F = dyn_cast<Function>(Deleted.second)) {
+            F->deleteBody();
+        } else {
+            cast<GlobalVariable>(Deleted.second)->setInitializer(nullptr);
+        }
+    }
+}
+
+// Reconstruct jl_fvars, jl_gvars, jl_fvars_idxs, and jl_gvars_idxs from the partition
+static void construct_vars(Module &M, Partition &partition) {
+    SmallVector<std::pair<uint32_t, GlobalValue *>> fvar_pairs;
+    fvar_pairs.reserve(partition.fvars.size());
+    for (auto &fvar : partition.fvars) {
+        auto F = M.getFunction(fvar.first());
+        assert(F);
+        assert(!F->isDeclaration());
+        fvar_pairs.push_back({ fvar.second, F });
+    }
+    SmallVector<GlobalValue *, 0> fvars;
+    SmallVector<uint32_t, 0> fvar_idxs;
+    fvars.reserve(fvar_pairs.size());
+    fvar_idxs.reserve(fvar_pairs.size());
+    std::sort(fvar_pairs.begin(), fvar_pairs.end());
+    for (auto &fvar : fvar_pairs) {
+        fvars.push_back(fvar.second);
+        fvar_idxs.push_back(fvar.first);
+    }
+    SmallVector<std::pair<uint32_t, GlobalValue *>, 0> gvar_pairs;
+    gvar_pairs.reserve(partition.gvars.size());
+    for (auto &gvar : partition.gvars) {
+        auto GV = M.getNamedGlobal(gvar.first());
+        assert(GV);
+        assert(!GV->isDeclaration());
+        gvar_pairs.push_back({ gvar.second, GV });
+    }
+    SmallVector<GlobalValue *, 0> gvars;
+    SmallVector<uint32_t, 0> gvar_idxs;
+    gvars.reserve(gvar_pairs.size());
+    gvar_idxs.reserve(gvar_pairs.size());
+    std::sort(gvar_pairs.begin(), gvar_pairs.end());
+    for (auto &gvar : gvar_pairs) {
+        gvars.push_back(gvar.second);
+        gvar_idxs.push_back(gvar.first);
+    }
+
+    // Now commit the fvars, gvars, and idxs
+    auto T_psize = M.getDataLayout().getIntPtrType(M.getContext())->getPointerTo();
+    emit_offset_table(M, fvars, "jl_fvars", T_psize);
+    emit_offset_table(M, gvars, "jl_gvars", T_psize);
+    auto fidxs = ConstantDataArray::get(M.getContext(), fvar_idxs);
+    auto fidxs_var = new GlobalVariable(M, fidxs->getType(), true,
+                                        GlobalVariable::ExternalLinkage,
+                                        fidxs, "jl_fvar_idxs");
+    fidxs_var->setVisibility(GlobalValue::HiddenVisibility);
+    fidxs_var->setDSOLocal(true);
+    auto gidxs = ConstantDataArray::get(M.getContext(), gvar_idxs);
+    auto gidxs_var = new GlobalVariable(M, gidxs->getType(), true,
+                                        GlobalVariable::ExternalLinkage,
+                                        gidxs, "jl_gvar_idxs");
+    gidxs_var->setVisibility(GlobalValue::HiddenVisibility);
+    gidxs_var->setDSOLocal(true);
+}
+
+extern "C" void lambda_trampoline(void* arg) {
+    std::function<void()>* func = static_cast<std::function<void()>*>(arg);
+    (*func)();
+    delete func;
+}
+
+// Entrypoint to optionally-multithreaded image compilation. This handles global coordination of the threading,
+// as well as partitioning, serialization, and deserialization.
+template<typename ModuleReleasedFunc>
+static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, StringRef name, unsigned threads,
+                bool unopt_out, bool opt_out, bool obj_out, bool asm_out, ModuleReleasedFunc module_released) {
+    SmallVector<AOTOutputs, 16> outputs(threads);
+    assert(threads);
+    assert(unopt_out || opt_out || obj_out || asm_out);
+    // Timers for timing purposes
+    TimerGroup timer_group("add_output", ("Time to optimize and emit LLVM module " + name).str());
+    SmallVector<ShardTimers, 1> timers(threads);
+    for (unsigned i = 0; i < threads; ++i) {
+        auto idx = std::to_string(i);
+        timers[i].name = "shard_" + idx;
+        timers[i].desc = ("Timings for " + name + " module shard " + idx).str();
+        timers[i].deserialize.init("deserialize_" + idx, "Deserialize module");
+        timers[i].materialize.init("materialize_" + idx, "Materialize declarations");
+        timers[i].construct.init("construct_" + idx, "Construct partitioned definitions");
+        timers[i].unopt.init("unopt_" + idx, "Emit unoptimized bitcode");
+        timers[i].optimize.init("optimize_" + idx, "Optimize shard");
+        timers[i].opt.init("opt_" + idx, "Emit optimized bitcode");
+        timers[i].obj.init("obj_" + idx, "Emit object file");
+        timers[i].asm_.init("asm_" + idx, "Emit assembly file");
+    }
+    Timer partition_timer("partition", "Partition module", timer_group);
+    Timer serialize_timer("serialize", "Serialize module", timer_group);
+    Timer output_timer("output", "Add outputs", timer_group);
+    bool report_timings = false;
+    if (auto env = getenv("JULIA_IMAGE_TIMINGS")) {
+        char *endptr;
+        unsigned long val = strtoul(env, &endptr, 10);
+        if (endptr != env && !*endptr && val <= 1) {
+            report_timings = val;
+        } else {
+            if (StringRef("true").compare_insensitive(env) == 0)
+                report_timings = true;
+            else if (StringRef("false").compare_insensitive(env) == 0)
+                report_timings = false;
+            else
+                errs() << "WARNING: Invalid value for JULIA_IMAGE_TIMINGS: " << env << "\n";
+        }
+    }
+    // Single-threaded case
+    if (threads == 1) {
+        output_timer.startTimer();
+        {
+            JL_TIMING(NATIVE_AOT, NATIVE_Opt);
+            outputs[0] = add_output_impl(M, TM, timers[0], unopt_out, opt_out, obj_out, asm_out);
+        }
+        output_timer.stopTimer();
+        // Don't need M anymore
+        module_released(M);
+
+        if (!report_timings) {
+            timer_group.clear();
+        } else {
+            timer_group.print(dbgs(), true);
+            for (auto &t : timers) {
+                t.print(dbgs(), true);
+            }
+        }
+        return outputs;
+    }
+
+    partition_timer.startTimer();
+    uint64_t counter = 0;
+    // Partitioning requires all globals to have names.
+    // We use a prefix to avoid name conflicts with user code.
+    for (auto &G : M.global_values()) {
+        if (!G.isDeclaration() && !G.hasName()) {
+            G.setName("jl_ext_" + Twine(counter++));
+        }
+    }
+    auto partitions = partitionModule(M, threads);
+    partition_timer.stopTimer();
+
+    serialize_timer.startTimer();
+    auto serialized = serializeModule(M);
+    serialize_timer.stopTimer();
+
+    // Don't need M anymore, since we'll only read from serialized from now on
+    module_released(M);
+
+    output_timer.startTimer();
+
+    // Start all of the worker threads
+    {
+        JL_TIMING(NATIVE_AOT, NATIVE_Opt);
+        std::vector<uv_thread_t> workers(threads);
+        for (unsigned i = 0; i < threads; i++) {
+            std::function<void()> func = [&, i]() {
+                LLVMContext ctx;
+                // Lazily deserialize the entire module
+                timers[i].deserialize.startTimer();
+                auto EM = getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx);
+                // Make sure this also fails with only julia, but not LLVM assertions enabled,
+                // otherwise, the first error we hit is the LLVM module verification failure,
+                // which will look very confusing, because the module was partially deserialized.
+                bool deser_succeeded = (bool)EM;
+                auto M = cantFail(std::move(EM), "Error loading module");
+                assert(deser_succeeded); (void)deser_succeeded;
+                timers[i].deserialize.stopTimer();
+
+                timers[i].materialize.startTimer();
+                materializePreserved(*M, partitions[i]);
+                timers[i].materialize.stopTimer();
+
+                timers[i].construct.startTimer();
+                construct_vars(*M, partitions[i]);
+                M->setModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(M->getContext(), "_" + std::to_string(i)));
+                // The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file
+                // or it may skip emitting debug info for that file. Here set it to ./julia#N
+                DIFile *topfile = DIFile::get(M->getContext(), "julia#" + std::to_string(i), ".");
+                for (DICompileUnit *CU : M->debug_compile_units())
+                    CU->replaceOperandWith(0, topfile);
+                timers[i].construct.stopTimer();
+
+                outputs[i] = add_output_impl(*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out);
+            };
+            auto arg = new std::function<void()>(func);
+            uv_thread_create(&workers[i], lambda_trampoline, arg); // Use libuv thread to avoid issues with stack sizes
+        }
+
+        // Wait for all of the worker threads to finish
+        for (unsigned i = 0; i < threads; i++)
+            uv_thread_join(&workers[i]);
+    }
+
+    output_timer.stopTimer();
+
+    if (!report_timings) {
+        timer_group.clear();
+    } else {
+        timer_group.print(dbgs(), true);
+        for (auto &t : timers) {
+            t.print(dbgs(), true);
+        }
+        dbgs() << "Partition weights: [";
+        bool comma = false;
+        for (auto &p : partitions) {
+            if (comma)
+                dbgs() << ", ";
+            else
+                comma = true;
+            dbgs() << p.weight;
+        }
+        dbgs() << "]\n";
+    }
+    return outputs;
+}
+
+extern int jl_is_timing_passes;
+static unsigned compute_image_thread_count(const ModuleInfo &info) {
+    // 32-bit systems are very memory-constrained
+#ifdef _P32
+    LLVM_DEBUG(dbgs() << "32-bit systems are restricted to a single thread\n");
+    return 1;
+#endif
+    if (jl_is_timing_passes) // LLVM isn't thread safe when timing the passes https://github.com/llvm/llvm-project/issues/44417
+        return 1;
+    // COFF has limits on external symbols (even hidden) up to 65536. We reserve the last few
+    // for any of our other symbols that we insert during compilation.
+    if (info.triple.isOSBinFormatCOFF() && info.globals > 64000) {
+        LLVM_DEBUG(dbgs() << "COFF is restricted to a single thread for large images\n");
+        return 1;
+    }
+    // This is not overridable because empty modules do occasionally appear, but they'll be very small and thus exit early to
+    // known easy behavior. Plus they really don't warrant multiple threads
+    if (info.weight < 1000) {
+        LLVM_DEBUG(dbgs() << "Small module, using a single thread\n");
+        return 1;
+    }
+
+    unsigned threads = std::max(jl_cpu_threads() / 2, 1);
+
+    auto max_threads = info.globals / 100;
+    if (max_threads < threads) {
+        LLVM_DEBUG(dbgs() << "Low global count limiting threads to " << max_threads << " (" << info.globals << "globals)\n");
+        threads = max_threads;
+    }
+
+    // environment variable override
+    const char *env_threads = getenv("JULIA_IMAGE_THREADS");
+    bool env_threads_set = false;
+    if (env_threads) {
+        char *endptr;
+        unsigned long requested = strtoul(env_threads, &endptr, 10);
+        if (*endptr || !requested) {
+            jl_safe_printf("WARNING: invalid value '%s' for JULIA_IMAGE_THREADS\n", env_threads);
+        } else {
+            LLVM_DEBUG(dbgs() << "Overriding threads to " << requested << " due to JULIA_IMAGE_THREADS\n");
+            threads = requested;
+            env_threads_set = true;
+        }
+    }
+
+    // more defaults
+    if (!env_threads_set && threads > 1) {
+        if (auto fallbackenv = getenv("JULIA_CPU_THREADS")) {
+            char *endptr;
+            unsigned long requested = strtoul(fallbackenv, &endptr, 10);
+            if (*endptr || !requested) {
+                jl_safe_printf("WARNING: invalid value '%s' for JULIA_CPU_THREADS\n", fallbackenv);
+            } else if (requested < threads) {
+                LLVM_DEBUG(dbgs() << "Overriding threads to " << requested << " due to JULIA_CPU_THREADS\n");
+                threads = requested;
+            }
+        }
+    }
+
+    threads = std::max(threads, 1u);
+
+    return threads;
+}
+
+jl_emission_params_t default_emission_params = { 1 };
 
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_native_impl(void *native_code,
         const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname,
         const char *asm_fname,
-        const char *sysimg_data, size_t sysimg_len)
+        ios_t *z, ios_t *s,
+        jl_emission_params_t *params)
 {
-    JL_TIMING(NATIVE_DUMP);
+    JL_TIMING(NATIVE_AOT, NATIVE_Dump);
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
-    auto TSCtx = data->M.getContext();
-    auto lock = TSCtx.getLock();
-    LLVMContext &Context = *TSCtx.getContext();
+    if (!bc_fname && !unopt_bc_fname && !obj_fname && !asm_fname) {
+        LLVM_DEBUG(dbgs() << "No output requested, skipping native code dump?\n");
+        delete data;
+        return;
+    }
+
+    if (!params) {
+        params = &default_emission_params;
+    }
+
     // We don't want to use MCJIT's target machine because
     // it uses the large code model and we may potentially
     // want less optimizations there.
-    Triple TheTriple = Triple(jl_ExecutionEngine->getTargetTriple());
     // make sure to emit the native object format, even if FORCE_ELF was set in codegen
-#if defined(_OS_WINDOWS_)
-    TheTriple.setObjectFormat(Triple::COFF);
-#elif defined(_OS_DARWIN_)
-    TheTriple.setObjectFormat(Triple::MachO);
-    TheTriple.setOS(llvm::Triple::MacOSX);
-#endif
-    std::unique_ptr<TargetMachine> TM(
+    Triple TheTriple(data->M.withModuleDo([](Module &M) { return M.getTargetTriple(); }));
+    if (TheTriple.isOSWindows()) {
+        TheTriple.setObjectFormat(Triple::COFF);
+    } else if (TheTriple.isOSDarwin()) {
+        TheTriple.setObjectFormat(Triple::MachO);
+        SmallString<16> Str;
+        Str += "macosx";
+        if (TheTriple.isAArch64())
+            Str += "11.0.0"; // Update this if MACOSX_VERSION_MIN changes
+        else
+            Str += "10.14.0";
+        TheTriple.setOSName(Str);
+    }
+    Optional<Reloc::Model> RelocModel;
+    if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD()) {
+        RelocModel = Reloc::PIC_;
+    }
+    CodeModel::Model CMModel = CodeModel::Small;
+    if (TheTriple.isPPC()) {
+        // On PPC the small model is limited to 16bit offsets
+        CMModel = CodeModel::Medium;
+    }
+    std::unique_ptr<TargetMachine> SourceTM(
         jl_ExecutionEngine->getTarget().createTargetMachine(
             TheTriple.getTriple(),
             jl_ExecutionEngine->getTargetCPU(),
             jl_ExecutionEngine->getTargetFeatureString(),
             jl_ExecutionEngine->getTargetOptions(),
-#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
-            Reloc::PIC_,
-#else
-            Optional<Reloc::Model>(),
-#endif
-#if defined(_CPU_PPC_) || defined(_CPU_PPC64_)
-            // On PPC the small model is limited to 16bit offsets
-            CodeModel::Medium,
-#else
-            // Use small model so that we can use signed 32bits offset in the function and GV tables
-            CodeModel::Small,
-#endif
+            RelocModel,
+            CMModel,
             CodeGenOpt::Aggressive // -O3 TODO: respect command -O0 flag?
             ));
+    fixupTM(*SourceTM);
+    auto DL = jl_create_datalayout(*SourceTM);
+    std::string StackProtectorGuard;
+    unsigned OverrideStackAlignment;
+    data->M.withModuleDo([&](Module &M) {
+        StackProtectorGuard = M.getStackProtectorGuard().str();
+        OverrideStackAlignment = M.getOverrideStackAlignment();
+    });
+
+    auto compile = [&](Module &M, StringRef name, unsigned threads, auto module_released) {
+        return add_output(M, *SourceTM, name, threads, !!unopt_bc_fname, !!bc_fname, !!obj_fname, !!asm_fname, module_released);
+    };
 
+    SmallVector<AOTOutputs, 16> sysimg_outputs;
+    SmallVector<AOTOutputs, 16> data_outputs;
+    SmallVector<AOTOutputs, 16> metadata_outputs;
+    if (z) {
+        JL_TIMING(NATIVE_AOT, NATIVE_Sysimg);
+        LLVMContext Context;
+        Module sysimgM("sysimg", Context);
+        sysimgM.setTargetTriple(TheTriple.str());
+        sysimgM.setDataLayout(DL);
+        sysimgM.setStackProtectorGuard(StackProtectorGuard);
+        sysimgM.setOverrideStackAlignment(OverrideStackAlignment);
+        Constant *data = ConstantDataArray::get(Context,
+            ArrayRef<uint8_t>((const unsigned char*)z->buf, z->size));
+        auto sysdata = new GlobalVariable(sysimgM, data->getType(), false,
+                                     GlobalVariable::ExternalLinkage,
+                                     data, "jl_system_image_data");
+        sysdata->setAlignment(Align(64));
+        addComdat(sysdata, TheTriple);
+        Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), z->size);
+        addComdat(new GlobalVariable(sysimgM, len->getType(), true,
+                                     GlobalVariable::ExternalLinkage,
+                                     len, "jl_system_image_size"), TheTriple);
+        // Free z here, since we've copied out everything into data
+        // Results in serious memory savings
+        ios_close(z);
+        free(z);
+        // Note that we don't set z to null, this allows the check in WRITE_ARCHIVE
+        // to function as expected
+        // no need to free the module/context, destructor handles that
+        sysimg_outputs = compile(sysimgM, "sysimg", 1, [](Module &) {});
+    }
 
-    // set up optimization passes
-    SmallVector<char, 0> bc_Buffer;
-    SmallVector<char, 0> obj_Buffer;
-    SmallVector<char, 0> asm_Buffer;
-    SmallVector<char, 0> unopt_bc_Buffer;
-    raw_svector_ostream bc_OS(bc_Buffer);
-    raw_svector_ostream obj_OS(obj_Buffer);
-    raw_svector_ostream asm_OS(asm_Buffer);
-    raw_svector_ostream unopt_bc_OS(unopt_bc_Buffer);
-    std::vector<NewArchiveMember> bc_Archive;
-    std::vector<NewArchiveMember> obj_Archive;
-    std::vector<NewArchiveMember> asm_Archive;
-    std::vector<NewArchiveMember> unopt_bc_Archive;
-    std::vector<std::string> outputs;
-
-    legacy::PassManager preopt, postopt;
-
-    if (unopt_bc_fname)
-        preopt.add(createBitcodeWriterPass(unopt_bc_OS));
-
-    //Is this necessary for TM?
-    // addTargetPasses(&postopt, TM->getTargetTriple(), TM->getTargetIRAnalysis());
-    if (bc_fname)
-        postopt.add(createBitcodeWriterPass(bc_OS));
-    if (obj_fname)
-        if (TM->addPassesToEmitFile(postopt, obj_OS, nullptr, CGFT_ObjectFile, false))
-            jl_safe_printf("ERROR: target does not support generation of object files\n");
-    if (asm_fname)
-        if (TM->addPassesToEmitFile(postopt, asm_OS, nullptr, CGFT_AssemblyFile, false))
-            jl_safe_printf("ERROR: target does not support generation of object files\n");
+    bool imaging_mode = imaging_default() || jl_options.outputo;
 
-    legacy::PassManager optimizer;
-    if (bc_fname || obj_fname || asm_fname) {
-        addTargetPasses(&optimizer, TM->getTargetTriple(), TM->getTargetIRAnalysis());
-        addOptimizationPasses(&optimizer, jl_options.opt_level, true, true);
-        addMachinePasses(&optimizer, jl_options.opt_level);
-    }
+    unsigned threads = 1;
+    unsigned nfvars = 0;
+    unsigned ngvars = 0;
 
     // Reset the target triple to make sure it matches the new target machine
-    auto dataM = data->M.getModuleUnlocked();
-    dataM->setTargetTriple(TM->getTargetTriple().str());
-    dataM->setDataLayout(jl_create_datalayout(*TM));
-    Type *T_size;
-    if (sizeof(size_t) == 8)
-        T_size = Type::getInt64Ty(Context);
-    else
-        T_size = Type::getInt32Ty(Context);
-    Type *T_psize = T_size->getPointerTo();
 
-    // add metadata information
-    if (imaging_default()) {
-        emit_offset_table(*dataM, data->jl_sysimg_gvars, "jl_sysimg_gvars", T_psize);
-        emit_offset_table(*dataM, data->jl_sysimg_fvars, "jl_sysimg_fvars", T_psize);
+    bool has_veccall = false;
 
-        // reflect the address of the jl_RTLD_DEFAULT_handle variable
-        // back to the caller, so that we can check for consistency issues
-        GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(dataM);
-        addComdat(new GlobalVariable(*dataM,
-                                     jlRTLD_DEFAULT_var->getType(),
-                                     true,
-                                     GlobalVariable::ExternalLinkage,
-                                     jlRTLD_DEFAULT_var,
-                                     "jl_RTLD_DEFAULT_handle_pointer"));
-    }
+    data->M.withModuleDo([&](Module &dataM) {
+        JL_TIMING(NATIVE_AOT, NATIVE_Setup);
+        dataM.setTargetTriple(TheTriple.str());
+        dataM.setDataLayout(DL);
+        auto &Context = dataM.getContext();
 
-    // do the actual work
-    auto add_output = [&] (Module &M, StringRef unopt_bc_Name, StringRef bc_Name, StringRef obj_Name, StringRef asm_Name) {
-        preopt.run(M);
-        optimizer.run(M);
-        postopt.run(M);
-        if (unopt_bc_fname)
-            emit_result(unopt_bc_Archive, unopt_bc_Buffer, unopt_bc_Name, outputs);
-        if (bc_fname)
-            emit_result(bc_Archive, bc_Buffer, bc_Name, outputs);
-        if (obj_fname)
-            emit_result(obj_Archive, obj_Buffer, obj_Name, outputs);
-        if (asm_fname)
-            emit_result(asm_Archive, asm_Buffer, asm_Name, outputs);
-    };
+        Type *T_psize = dataM.getDataLayout().getIntPtrType(Context)->getPointerTo();
 
-    add_output(*dataM, "unopt.bc", "text.bc", "text.o", "text.s");
+        // This should really be in jl_create_native, but we haven't
+        // yet set the target triple binary format correctly at that
+        // point. This should be resolved when we start JITting for
+        // COFF when we switch over to JITLink.
+        for (auto &GA : dataM.aliases()) {
+            // Global aliases are only used for ccallable things, so we should
+            // mark them as dllexport
+            addComdat(&GA, TheTriple);
+        }
 
-    orc::ThreadSafeModule sysimage(std::make_unique<Module>("sysimage", Context), TSCtx);
-    auto sysimageM = sysimage.getModuleUnlocked();
-    sysimageM->setTargetTriple(dataM->getTargetTriple());
-    sysimageM->setDataLayout(dataM->getDataLayout());
-#if JL_LLVM_VERSION >= 130000
-    sysimageM->setStackProtectorGuard(dataM->getStackProtectorGuard());
-    sysimageM->setOverrideStackAlignment(dataM->getOverrideStackAlignment());
-#endif
-    data->M = orc::ThreadSafeModule(); // free memory for data->M
+        // Wipe the global initializers, we'll reset them at load time
+        for (auto gv : data->jl_sysimg_gvars) {
+            cast<GlobalVariable>(gv)->setInitializer(Constant::getNullValue(gv->getValueType()));
+        }
 
-    if (sysimg_data) {
-        Constant *data = ConstantDataArray::get(Context,
-            ArrayRef<uint8_t>((const unsigned char*)sysimg_data, sysimg_len));
-        addComdat(new GlobalVariable(*sysimageM, data->getType(), false,
-                                     GlobalVariable::ExternalLinkage,
-                                     data, "jl_system_image_data"))->setAlignment(Align(64));
-        Constant *len = ConstantInt::get(T_size, sysimg_len);
-        addComdat(new GlobalVariable(*sysimageM, len->getType(), true,
-                                     GlobalVariable::ExternalLinkage,
-                                     len, "jl_system_image_size"));
-    }
-    add_output(*sysimageM, "data.bc", "data.bc", "data.o", "data.s");
-
-    object::Archive::Kind Kind = getDefaultForHost(TheTriple);
-    if (unopt_bc_fname)
-        handleAllErrors(writeArchive(unopt_bc_fname, unopt_bc_Archive, true,
-                    Kind, true, false), reportWriterError);
-    if (bc_fname)
-        handleAllErrors(writeArchive(bc_fname, bc_Archive, true,
-                    Kind, true, false), reportWriterError);
-    if (obj_fname)
-        handleAllErrors(writeArchive(obj_fname, obj_Archive, true,
-                    Kind, true, false), reportWriterError);
-    if (asm_fname)
-        handleAllErrors(writeArchive(asm_fname, asm_Archive, true,
-                    Kind, true, false), reportWriterError);
-
-    delete data;
-}
+        // add metadata information
+        if (imaging_mode) {
+            multiversioning_preannotate(dataM);
+            {
+                DenseSet<GlobalValue *> fvars(data->jl_sysimg_fvars.begin(), data->jl_sysimg_fvars.end());
+                for (auto &F : dataM) {
+                    if (F.hasFnAttribute("julia.mv.reloc") || F.hasFnAttribute("julia.mv.fvar")) {
+                        if (fvars.insert(&F).second) {
+                            data->jl_sysimg_fvars.push_back(&F);
+                        }
+                    }
+                }
+            }
 
-void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
-{
-    PM->add(new TargetLibraryInfoWrapperPass(triple));
-    PM->add(createTargetTransformInfoWrapperPass(std::move(analysis)));
-}
+            ModuleInfo module_info = compute_module_info(dataM);
+            LLVM_DEBUG(dbgs()
+                << "Dumping module with stats:\n"
+                << "    globals: " << module_info.globals << "\n"
+                << "    functions: " << module_info.funcs << "\n"
+                << "    basic blocks: " << module_info.bbs << "\n"
+                << "    instructions: " << module_info.insts << "\n"
+                << "    clones: " << module_info.clones << "\n"
+                << "    weight: " << module_info.weight << "\n"
+            );
+            threads = compute_image_thread_count(module_info);
+            LLVM_DEBUG(dbgs() << "Using " << threads << " to emit aot image\n");
+            nfvars = data->jl_sysimg_fvars.size();
+            ngvars = data->jl_sysimg_gvars.size();
+            emit_offset_table(dataM, data->jl_sysimg_gvars, "jl_gvars", T_psize);
+            emit_offset_table(dataM, data->jl_sysimg_fvars, "jl_fvars", T_psize);
+            SmallVector<uint32_t, 0> idxs;
+            idxs.resize(data->jl_sysimg_gvars.size());
+            std::iota(idxs.begin(), idxs.end(), 0);
+            auto gidxs = ConstantDataArray::get(Context, idxs);
+            auto gidxs_var = new GlobalVariable(dataM, gidxs->getType(), true,
+                                                GlobalVariable::ExternalLinkage,
+                                                gidxs, "jl_gvar_idxs");
+            gidxs_var->setVisibility(GlobalValue::HiddenVisibility);
+            gidxs_var->setDSOLocal(true);
+            idxs.clear();
+            idxs.resize(data->jl_sysimg_fvars.size());
+            std::iota(idxs.begin(), idxs.end(), 0);
+            auto fidxs = ConstantDataArray::get(Context, idxs);
+            auto fidxs_var = new GlobalVariable(dataM, fidxs->getType(), true,
+                                                GlobalVariable::ExternalLinkage,
+                                                fidxs, "jl_fvar_idxs");
+            fidxs_var->setVisibility(GlobalValue::HiddenVisibility);
+            fidxs_var->setDSOLocal(true);
+            dataM.addModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(Context, "_0"));
+
+            // let the compiler know we are going to internalize a copy of this,
+            // if it has a current usage with ExternalLinkage
+            auto jl_small_typeof_copy = dataM.getGlobalVariable("jl_small_typeof");
+            if (jl_small_typeof_copy) {
+                jl_small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
+                jl_small_typeof_copy->setDSOLocal(true);
+            }
+        }
 
+        has_veccall = !!dataM.getModuleFlag("julia.mv.veccall");
+    });
 
-void addMachinePasses(legacy::PassManagerBase *PM, int optlevel)
-{
-    // TODO: don't do this on CPUs that natively support Float16
-    PM->add(createDemoteFloat16Pass());
-    if (optlevel > 1)
-        PM->add(createGVNPass());
-}
+    {
+        // Don't use withModuleDo here since we delete the TSM midway through
+        auto TSCtx = data->M.getContext();
+        auto lock = TSCtx.getLock();
+        auto dataM = data->M.getModuleUnlocked();
 
-// this defines the set of optimization passes defined for Julia at various optimization levels.
-// it assumes that the TLI and TTI wrapper passes have already been added.
-void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
-                           bool lower_intrinsics, bool dump_native,
-                           bool external_use)
-{
-    // Note: LLVM 12 disabled the hoisting of common instruction
-    //       before loop vectorization (https://reviews.llvm.org/D84108).
-    //
-    // TODO: CommonInstruction hoisting/sinking enables AllocOpt
-    //       to merge allocations and sometimes eliminate them,
-    //       since AllocOpt does not handle PhiNodes.
-    //       Enable this instruction hoisting because of this and Union benchmarks.
-    auto basicSimplifyCFGOptions = SimplifyCFGOptions()
-        .convertSwitchRangeToICmp(true)
-        .convertSwitchToLookupTable(true)
-        .forwardSwitchCondToPhi(true);
-    auto aggressiveSimplifyCFGOptions = SimplifyCFGOptions()
-        .convertSwitchRangeToICmp(true)
-        .convertSwitchToLookupTable(true)
-        .forwardSwitchCondToPhi(true)
-        //These mess with loop rotation, so only do them after that
-        .hoistCommonInsts(true)
-        // Causes an SRET assertion error in late-gc-lowering
-        // .sinkCommonInsts(true)
-        ;
-#ifdef JL_DEBUG_BUILD
-    PM->add(createGCInvariantVerifierPass(true));
-    PM->add(createVerifierPass());
-#endif
+        // Delete data when add_output thinks it's done with it
+        // Saves memory for use when multithreading
+        data_outputs = compile(*dataM, "text", threads, [data](Module &) { delete data; });
+    }
 
-    PM->add(createConstantMergePass());
-    if (opt_level < 2) {
-        if (!dump_native) {
-            // we won't be multiversioning, so lower CPU feature checks early on
-            // so that we can avoid an additional CFG simplification pass at the end.
-            PM->add(createCPUFeaturesPass());
-            if (opt_level == 1)
-                PM->add(createInstSimplifyLegacyPass());
-        }
-        PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
-        if (opt_level == 1) {
-            PM->add(createSROAPass());
-            PM->add(createInstructionCombiningPass());
-            PM->add(createEarlyCSEPass());
-            // maybe add GVN?
-            // also try GVNHoist and GVNSink
-        }
-        PM->add(createMemCpyOptPass());
-        PM->add(createAlwaysInlinerLegacyPass()); // Respect always_inline
-        PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
-        if (lower_intrinsics) {
-            PM->add(createBarrierNoopPass());
-            PM->add(createLowerExcHandlersPass());
-            PM->add(createGCInvariantVerifierPass(false));
-            PM->add(createRemoveNIPass());
-            PM->add(createLateLowerGCFramePass());
-            PM->add(createFinalLowerGCPass());
-            PM->add(createLowerPTLSPass(dump_native));
-        }
-        else {
-            PM->add(createRemoveNIPass());
+    if (params->emit_metadata)
+    {
+        JL_TIMING(NATIVE_AOT, NATIVE_Metadata);
+        LLVMContext Context;
+        Module metadataM("metadata", Context);
+        metadataM.setTargetTriple(TheTriple.str());
+        metadataM.setDataLayout(DL);
+        metadataM.setStackProtectorGuard(StackProtectorGuard);
+        metadataM.setOverrideStackAlignment(OverrideStackAlignment);
+
+        // reflect the address of the jl_RTLD_DEFAULT_handle variable
+        // back to the caller, so that we can check for consistency issues
+        GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(&metadataM);
+        addComdat(new GlobalVariable(metadataM,
+                                    jlRTLD_DEFAULT_var->getType(),
+                                    true,
+                                    GlobalVariable::ExternalLinkage,
+                                    jlRTLD_DEFAULT_var,
+                                    "jl_RTLD_DEFAULT_handle_pointer"), TheTriple);
+
+        Type *T_size = DL.getIntPtrType(Context);
+        Type *T_psize = T_size->getPointerTo();
+
+        if (TheTriple.isOSWindows()) {
+            // Windows expect that the function `_DllMainStartup` is present in an dll.
+            // Normal compilers use something like Zig's crtdll.c instead we provide a
+            // a stub implementation.
+            auto T_pvoid = Type::getInt8Ty(Context)->getPointerTo();
+            auto T_int32 = Type::getInt32Ty(Context);
+            auto FT = FunctionType::get(T_int32, {T_pvoid, T_int32, T_pvoid}, false);
+            auto F = Function::Create(FT, Function::ExternalLinkage, "_DllMainCRTStartup", metadataM);
+            F->setCallingConv(CallingConv::X86_StdCall);
+
+            llvm::IRBuilder<> builder(BasicBlock::Create(Context, "top", F));
+            builder.CreateRet(ConstantInt::get(T_int32, 1));
         }
-        PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
-        if (dump_native) {
-            PM->add(createMultiVersioningPass(external_use));
-            PM->add(createCPUFeaturesPass());
-            // minimal clean-up to get rid of CPU feature checks
-            if (opt_level == 1) {
-                PM->add(createInstSimplifyLegacyPass());
-                PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
+        if (imaging_mode) {
+            auto specs = jl_get_llvm_clone_targets();
+            const uint32_t base_flags = has_veccall ? JL_TARGET_VEC_CALL : 0;
+            SmallVector<uint8_t, 0> data;
+            auto push_i32 = [&] (uint32_t v) {
+                uint8_t buff[4];
+                memcpy(buff, &v, 4);
+                data.insert(data.end(), buff, buff + 4);
+            };
+            push_i32(specs.size());
+            for (uint32_t i = 0; i < specs.size(); i++) {
+                push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME));
+                auto &specdata = specs[i].data;
+                data.insert(data.end(), specdata.begin(), specdata.end());
+            }
+            auto value = ConstantDataArray::get(Context, data);
+            auto target_ids = new GlobalVariable(metadataM, value->getType(), true,
+                                        GlobalVariable::InternalLinkage,
+                                        value, "jl_dispatch_target_ids");
+            auto shards = emit_shard_table(metadataM, T_size, T_psize, threads);
+            auto ptls = emit_ptls_table(metadataM, T_size, T_psize);
+            auto header = emit_image_header(metadataM, threads, nfvars, ngvars);
+            auto AT = ArrayType::get(T_size, sizeof(jl_small_typeof) / sizeof(void*));
+            auto jl_small_typeof_copy = new GlobalVariable(metadataM, AT, false,
+                                                        GlobalVariable::ExternalLinkage,
+                                                        Constant::getNullValue(AT),
+                                                        "jl_small_typeof");
+            jl_small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
+            jl_small_typeof_copy->setDSOLocal(true);
+            AT = ArrayType::get(T_psize, 5);
+            auto pointers = new GlobalVariable(metadataM, AT, false,
+                                            GlobalVariable::ExternalLinkage,
+                                            ConstantArray::get(AT, {
+                                                    ConstantExpr::getBitCast(header, T_psize),
+                                                    ConstantExpr::getBitCast(shards, T_psize),
+                                                    ConstantExpr::getBitCast(ptls, T_psize),
+                                                    ConstantExpr::getBitCast(jl_small_typeof_copy, T_psize),
+                                                    ConstantExpr::getBitCast(target_ids, T_psize)
+                                            }),
+                                            "jl_image_pointers");
+            addComdat(pointers, TheTriple);
+            if (s) {
+                write_int32(s, data.size());
+                ios_write(s, (const char *)data.data(), data.size());
             }
         }
-#if defined(_COMPILER_ASAN_ENABLED_)
-        PM->add(createAddressSanitizerFunctionPass());
-#endif
-#if defined(_COMPILER_MSAN_ENABLED_)
-        PM->add(createMemorySanitizerPass(true));
-#endif
-#if defined(_COMPILER_TSAN_ENABLED_)
-        PM->add(createThreadSanitizerLegacyPassPass());
-#endif
-        return;
-    }
-    PM->add(createPropagateJuliaAddrspaces());
-    PM->add(createScopedNoAliasAAWrapperPass());
-    PM->add(createTypeBasedAAWrapperPass());
-    if (opt_level >= 3) {
-        PM->add(createBasicAAWrapperPass());
-    }
-
-    PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
-    PM->add(createDeadCodeEliminationPass());
-    PM->add(createSROAPass());
-
-    //PM->add(createMemCpyOptPass());
-
-    PM->add(createAlwaysInlinerLegacyPass()); // Respect always_inline
-
-    // Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
-    // merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
-    // pass.
-    PM->add(createAllocOptPass());
-    // consider AggressiveInstCombinePass at optlevel > 2
-    PM->add(createInstructionCombiningPass());
-    PM->add(createCFGSimplificationPass(basicSimplifyCFGOptions));
-    if (dump_native)
-        PM->add(createMultiVersioningPass(external_use));
-    PM->add(createCPUFeaturesPass());
-    PM->add(createSROAPass());
-    PM->add(createInstSimplifyLegacyPass());
-    PM->add(createJumpThreadingPass());
-    PM->add(createCorrelatedValuePropagationPass());
-
-    PM->add(createReassociatePass());
-
-    PM->add(createEarlyCSEPass());
-
-    // Load forwarding above can expose allocations that aren't actually used
-    // remove those before optimizing loops.
-    PM->add(createAllocOptPass());
-    PM->add(createLoopRotatePass());
-    // moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1)
-#ifdef USE_POLLY
-    // LCSSA (which has already run at this point due to the dependencies of the
-    // above passes) introduces redundant phis that hinder Polly. Therefore we
-    // run InstCombine here to remove them.
-    PM->add(createInstructionCombiningPass());
-    PM->add(polly::createCodePreparationPass());
-    polly::registerPollyPasses(*PM);
-    PM->add(polly::createCodegenCleanupPass());
-#endif
-    // LoopRotate strips metadata from terminator, so run LowerSIMD afterwards
-    PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
-    PM->add(createLICMPass());
-    PM->add(createJuliaLICMPass());
-    PM->add(createLoopUnswitchPass());
-    PM->add(createLICMPass());
-    PM->add(createJuliaLICMPass());
-    PM->add(createInductiveRangeCheckEliminationPass()); // Must come before indvars
-    // Subsequent passes not stripping metadata from terminator
-    PM->add(createInstSimplifyLegacyPass());
-    PM->add(createLoopIdiomPass());
-    PM->add(createIndVarSimplifyPass());
-    PM->add(createLoopDeletionPass());
-    PM->add(createSimpleLoopUnrollPass());
-
-    // Run our own SROA on heap objects before LLVM's
-    PM->add(createAllocOptPass());
-    // Re-run SROA after loop-unrolling (useful for small loops that operate,
-    // over the structure of an aggregate)
-    PM->add(createSROAPass());
-    // might not be necessary:
-    PM->add(createInstSimplifyLegacyPass());
-
-    PM->add(createGVNPass());
-    PM->add(createMemCpyOptPass());
-    PM->add(createSCCPPass());
-
-    //These next two passes must come before IRCE to eliminate the bounds check in #43308
-    PM->add(createCorrelatedValuePropagationPass());
-    PM->add(createDeadCodeEliminationPass());
-
-    PM->add(createInductiveRangeCheckEliminationPass()); // Must come between the two GVN passes
-
-    // Run instcombine after redundancy elimination to exploit opportunities
-    // opened up by them.
-    // This needs to be InstCombine instead of InstSimplify to allow
-    // loops over Union-typed arrays to vectorize.
-    PM->add(createInstructionCombiningPass());
-    PM->add(createJumpThreadingPass());
-    if (opt_level >= 3) {
-        PM->add(createGVNPass()); // Must come after JumpThreading and before LoopVectorize
-    }
-    PM->add(createDeadStoreEliminationPass());
-    // see if all of the constant folding has exposed more loops
-    // to simplification and deletion
-    // this helps significantly with cleaning up iteration
-    PM->add(createCFGSimplificationPass(aggressiveSimplifyCFGOptions));
-
-    // More dead allocation (store) deletion before loop optimization
-    // consider removing this:
-    // Moving this after aggressive CFG simplification helps deallocate when allocations are hoisted
-    PM->add(createAllocOptPass());
-    PM->add(createLoopDeletionPass());
-    PM->add(createInstructionCombiningPass());
-    PM->add(createLoopVectorizePass());
-    PM->add(createLoopLoadEliminationPass());
-    // Cleanup after LV pass
-    PM->add(createInstructionCombiningPass());
-    PM->add(createCFGSimplificationPass( // Aggressive CFG simplification
-        aggressiveSimplifyCFGOptions
-    ));
-    PM->add(createSLPVectorizerPass());
-    // might need this after LLVM 11:
-    //PM->add(createVectorCombinePass());
-
-    PM->add(createAggressiveDCEPass());
-
-    if (lower_intrinsics) {
-        // LowerPTLS removes an indirect call. As a result, it is likely to trigger
-        // LLVM's devirtualization heuristics, which would result in the entire
-        // pass pipeline being re-executed. Prevent this by inserting a barrier.
-        PM->add(createBarrierNoopPass());
-        PM->add(createLowerExcHandlersPass());
-        PM->add(createGCInvariantVerifierPass(false));
-        // Needed **before** LateLowerGCFrame on LLVM < 12
-        // due to bug in `CreateAlignmentAssumption`.
-        PM->add(createRemoveNIPass());
-        PM->add(createLateLowerGCFramePass());
-        PM->add(createFinalLowerGCPass());
-        // We need these two passes and the instcombine below
-        // after GC lowering to let LLVM do some constant propagation on the tags.
-        // and remove some unnecessary write barrier checks.
-        PM->add(createGVNPass());
-        PM->add(createSCCPPass());
-        // Remove dead use of ptls
-        PM->add(createDeadCodeEliminationPass());
-        PM->add(createLowerPTLSPass(dump_native));
-        PM->add(createInstructionCombiningPass());
-        // Clean up write barrier and ptls lowering
-        PM->add(createCFGSimplificationPass());
+
+        // no need to free module/context, destructor handles that
+        metadata_outputs = compile(metadataM, "data", 1, [](Module &) {});
     }
-    else {
-        PM->add(createRemoveNIPass());
+
+    {
+        JL_TIMING(NATIVE_AOT, NATIVE_Write);
+
+        object::Archive::Kind Kind = getDefaultForHost(TheTriple);
+#define WRITE_ARCHIVE(fname, field, prefix, suffix) \
+        if (fname) {\
+            SmallVector<NewArchiveMember, 0> archive; \
+            SmallVector<std::string, 16> filenames; \
+            SmallVector<StringRef, 16> buffers; \
+            for (size_t i = 0; i < threads; i++) { \
+                filenames.push_back((StringRef("text") + prefix + "#" + Twine(i) + suffix).str()); \
+                buffers.push_back(StringRef(data_outputs[i].field.data(), data_outputs[i].field.size())); \
+            } \
+            filenames.push_back("metadata" prefix suffix); \
+            buffers.push_back(StringRef(metadata_outputs[0].field.data(), metadata_outputs[0].field.size())); \
+            if (z) { \
+                filenames.push_back("sysimg" prefix suffix); \
+                buffers.push_back(StringRef(sysimg_outputs[0].field.data(), sysimg_outputs[0].field.size())); \
+            } \
+            for (size_t i = 0; i < filenames.size(); i++) { \
+                archive.push_back(NewArchiveMember(MemoryBufferRef(buffers[i], filenames[i]))); \
+            } \
+            handleAllErrors(writeArchive(fname, archive, true, Kind, true, false), reportWriterError); \
+        }
+
+        WRITE_ARCHIVE(unopt_bc_fname, unopt, "_unopt", ".bc");
+        WRITE_ARCHIVE(bc_fname, opt, "_opt", ".bc");
+        WRITE_ARCHIVE(obj_fname, obj, "", ".o");
+        WRITE_ARCHIVE(asm_fname, asm_, "", ".s");
+#undef WRITE_ARCHIVE
     }
-    PM->add(createCombineMulAddPass());
-    PM->add(createDivRemPairsPass());
-#if defined(_COMPILER_ASAN_ENABLED_)
-    PM->add(createAddressSanitizerFunctionPass());
-#endif
-#if defined(_COMPILER_MSAN_ENABLED_)
-    PM->add(createMemorySanitizerPass(true));
-#endif
-#if defined(_COMPILER_TSAN_ENABLED_)
-    PM->add(createThreadSanitizerLegacyPassPass());
-#endif
 }
 
-// An LLVM module pass that just runs all julia passes in order. Useful for
-// debugging
-template <int OptLevel, bool dump_native>
-class JuliaPipeline : public Pass {
-public:
-    static char ID;
-    // A bit of a hack, but works
-    struct TPMAdapter : public PassManagerBase {
-        PMTopLevelManager *TPM;
-        TPMAdapter(PMTopLevelManager *TPM) : TPM(TPM) {}
-        void add(Pass *P) { TPM->schedulePass(P); }
-    };
-    void preparePassManager(PMStack &Stack) override {
-        (void)jl_init_llvm();
-        PMTopLevelManager *TPM = Stack.top()->getTopLevelManager();
-        TPMAdapter Adapter(TPM);
-        addTargetPasses(&Adapter, jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis());
-        addOptimizationPasses(&Adapter, OptLevel, true, dump_native, true);
-        addMachinePasses(&Adapter, OptLevel);
-    }
-    JuliaPipeline() : Pass(PT_PassManager, ID) {}
-    Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const override {
-        return createPrintModulePass(O, Banner);
-    }
-};
-template<> char JuliaPipeline<0,false>::ID = 0;
-template<> char JuliaPipeline<2,false>::ID = 0;
-template<> char JuliaPipeline<3,false>::ID = 0;
-template<> char JuliaPipeline<0,true>::ID = 0;
-template<> char JuliaPipeline<2,true>::ID = 0;
-template<> char JuliaPipeline<3,true>::ID = 0;
-static RegisterPass<JuliaPipeline<0,false>> X("juliaO0", "Runs the entire julia pipeline (at -O0)", false, false);
-static RegisterPass<JuliaPipeline<2,false>> Y("julia", "Runs the entire julia pipeline (at -O2)", false, false);
-static RegisterPass<JuliaPipeline<3,false>> Z("juliaO3", "Runs the entire julia pipeline (at -O3)", false, false);
-
-static RegisterPass<JuliaPipeline<0,true>> XS("juliaO0-sysimg", "Runs the entire julia pipeline (at -O0/sysimg mode)", false, false);
-static RegisterPass<JuliaPipeline<2,true>> YS("julia-sysimg", "Runs the entire julia pipeline (at -O2/sysimg mode)", false, false);
-static RegisterPass<JuliaPipeline<3,true>> ZS("juliaO3-sysimg", "Runs the entire julia pipeline (at -O3/sysimg mode)", false, false);
-
-extern "C" JL_DLLEXPORT
-void jl_add_optimization_passes_impl(LLVMPassManagerRef PM, int opt_level, int lower_intrinsics) {
-    addOptimizationPasses(unwrap(PM), opt_level, lower_intrinsics);
+void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
+{
+    PM->add(new TargetLibraryInfoWrapperPass(triple));
+    PM->add(createTargetTransformInfoWrapperPass(std::move(analysis)));
 }
 
-// new pass manager plugin
-
-// NOTE: Instead of exporting all the constructors in passes.h we could
-// forward the callbacks to the respective passes. LLVM seems to prefer this,
-// and when we add the full pass builder having them directly will be helpful.
-static void registerCallbacks(PassBuilder &PB) {
-    PB.registerPipelineParsingCallback(
-        [](StringRef Name, FunctionPassManager &PM,
-           ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
-            if (Name == "DemoteFloat16") {
-                PM.addPass(DemoteFloat16());
-                return true;
-            }
-            if (Name == "CombineMulAdd") {
-              PM.addPass(CombineMulAdd());
-              return true;
-            }
-            if (Name == "LateLowerGCFrame") {
-                PM.addPass(LateLowerGC());
-                return true;
-            }
-            if (Name == "AllocOpt") {
-                PM.addPass(AllocOptPass());
-                return true;
-            }
-            if (Name == "PropagateJuliaAddrspaces") {
-                PM.addPass(PropagateJuliaAddrspacesPass());
-                return true;
-            }
-            if (Name == "LowerExcHandlers") {
-                PM.addPass(LowerExcHandlers());
-                return true;
-            }
-            if (Name == "GCInvariantVerifier") {
-                // TODO: Parse option and allow users to set `Strong`
-                PM.addPass(GCInvariantVerifierPass());
-                return true;
-            }
-            return false;
-        });
+// sometimes in GDB you want to find out what code was created from a mi
+extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi)
+{
+    jl_llvmf_dump_t llvmf_dump;
+    size_t world = jl_current_task->world_age;
+    JL_STREAM *stream = (JL_STREAM*)STDERR_FILENO;
+
+    jl_printf(stream, "---- dumping IR for ----\n");
+    jl_static_show(stream, (jl_value_t*)mi);
+    jl_printf(stream, "\n----\n");
+
+    jl_printf(stream, "\n---- unoptimized IR ----");
+    jl_get_llvmf_defn(&llvmf_dump, mi, world, 0, false, jl_default_cgparams);
+    if (llvmf_dump.F) {
+        jl_value_t *ir = jl_dump_function_ir(&llvmf_dump, 0, 1, "source");
+        jl_static_show(stream, ir);
+    }
+    jl_printf(stream, "----\n");
 
-    PB.registerPipelineParsingCallback(
-        [](StringRef Name, ModulePassManager &PM,
-           ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
-            if (Name == "CPUFeatures") {
-              PM.addPass(CPUFeatures());
-              return true;
-            }
-            if (Name == "RemoveNI") {
-              PM.addPass(RemoveNI());
-              return true;
-            }
-            if (Name == "LowerSIMDLoop") {
-              PM.addPass(LowerSIMDLoop());
-              return true;
-            }
-            if (Name == "FinalLowerGC") {
-                PM.addPass(FinalLowerGCPass());
-                return true;
-            }
-            if (Name == "RemoveJuliaAddrspaces") {
-                PM.addPass(RemoveJuliaAddrspacesPass());
-                return true;
-            }
-            if (Name == "MultiVersioning") {
-                PM.addPass(MultiVersioning());
-                return true;
-            }
-            if (Name == "LowerPTLS") {
-                PM.addPass(LowerPTLSPass());
-                return true;
-            }
-            return false;
-        });
+    jl_printf(stream, "\n---- optimized IR ----");
+    jl_get_llvmf_defn(&llvmf_dump, mi, world, 0, true, jl_default_cgparams);
+    if (llvmf_dump.F) {
+        jl_value_t *ir = jl_dump_function_ir(&llvmf_dump, 0, 1, "source");
+        jl_static_show(stream, ir);
+    }
+    jl_printf(stream, "----\n");
 
-    PB.registerPipelineParsingCallback(
-        [](StringRef Name, LoopPassManager &PM,
-           ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
-            if (Name == "JuliaLICM") {
-                PM.addPass(JuliaLICMPass());
-                return true;
-            }
-            return false;
-        });
-}
+    jl_printf(stream, "\n---- assembly ----");
+    jl_get_llvmf_defn(&llvmf_dump, mi, world, 0, true, jl_default_cgparams);
+    if (llvmf_dump.F) {
+        jl_value_t *ir = jl_dump_function_asm(&llvmf_dump, 0, "", "source", 0, true);
+        jl_static_show(stream, ir);
+    }
+    jl_printf(stream, "----\n");
 
-extern "C" JL_DLLEXPORT ::llvm::PassPluginLibraryInfo
-llvmGetPassPluginInfo() {
-      return {LLVM_PLUGIN_API_VERSION, "Julia", "1", registerCallbacks};
+    jl_code_info_t *src = NULL;
+    jl_value_t *ci = jl_default_cgparams.lookup(mi, world, world);
+    if (ci != jl_nothing) {
+        jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
+        src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
+        if ((jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method)) {
+            JL_GC_PUSH2(&codeinst, &src);
+            src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
+            JL_GC_POP();
+        }
+    }
+    if (!src || (jl_value_t*)src == jl_nothing) {
+        src = jl_type_infer(mi, world, 0);
+    }
+    return src;
 }
 
 // --- native code info, and dump function to IR and ASM ---
 // Get pointer to llvm::Function instance, compiling if necessary
 // for use in reflection from Julia.
-// this is paired with jl_dump_function_ir, jl_dump_function_asm, jl_dump_method_asm in particular ways:
+// This is paired with jl_dump_function_ir, jl_dump_function_asm, jl_dump_method_asm in particular ways:
 // misuse will leak memory or cause read-after-free
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, size_t world, char getwrapper, char optimize, const jl_cgparams_t params)
 {
     if (jl_is_method(mi->def.method) && mi->def.method->source == NULL &&
@@ -1010,28 +1890,32 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
     // get the source code for this function
     jl_value_t *jlrettype = (jl_value_t*)jl_any_type;
     jl_code_info_t *src = NULL;
-    JL_GC_PUSH2(&src, &jlrettype);
-    if (jl_is_method(mi->def.method) && mi->def.method->source != NULL && jl_ir_flag_inferred((jl_array_t*)mi->def.method->source)) {
+    jl_code_instance_t *codeinst = NULL;
+    JL_GC_PUSH3(&src, &jlrettype, &codeinst);
+    if (jl_is_method(mi->def.method) && mi->def.method->source != NULL && mi->def.method->source != jl_nothing && jl_ir_flag_inferred(mi->def.method->source)) {
+        // uninferred opaque closure
         src = (jl_code_info_t*)mi->def.method->source;
         if (src && !jl_is_code_info(src))
-            src = jl_uncompress_ir(mi->def.method, NULL, (jl_array_t*)src);
-    } else {
-        jl_value_t *ci = jl_rettype_inferred(mi, world, world);
+            src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
+    }
+    else {
+        jl_value_t *ci = params.lookup(mi, world, world);
         if (ci != jl_nothing) {
-            jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
-            src = (jl_code_info_t*)codeinst->inferred;
+            codeinst = (jl_code_instance_t*)ci;
+            src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
             if ((jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method))
-                src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
+                src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
             jlrettype = codeinst->rettype;
+            codeinst = NULL; // not needed outside of this branch
         }
         if (!src || (jl_value_t*)src == jl_nothing) {
             src = jl_type_infer(mi, world, 0);
             if (src)
                 jlrettype = src->rettype;
             else if (jl_is_method(mi->def.method)) {
-                src = mi->def.method->generator ? jl_code_for_staged(mi) : (jl_code_info_t*)mi->def.method->source;
-                if (src && !jl_is_code_info(src) && jl_is_method(mi->def.method))
-                    src = jl_uncompress_ir(mi->def.method, NULL, (jl_array_t*)src);
+                src = mi->def.method->generator ? jl_code_for_staged(mi, world) : (jl_code_info_t*)mi->def.method->source;
+                if (src && (jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method))
+                    src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
             }
             // TODO: use mi->uninferred
         }
@@ -1039,33 +1923,64 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
 
     // emit this function into a new llvm module
     if (src && jl_is_code_info(src)) {
-        JL_LOCK(&jl_codegen_lock);
         auto ctx = jl_ExecutionEngine->getContext();
-        jl_codegen_params_t output(*ctx);
-        output.world = world;
-        output.params = &params;
-        orc::ThreadSafeModule m = jl_create_llvm_module(name_from_method_instance(mi), output.tsctx, output.imaging);
+        orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), *ctx);
         uint64_t compiler_start_time = 0;
         uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
         if (measure_compile_time_enabled)
             compiler_start_time = jl_hrtime();
+        JL_LOCK(&jl_codegen_lock);
+        auto target_info = m.withModuleDo([&](Module &M) {
+            return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
+        });
+        jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second));
+        output.world = world;
+        output.params = &params;
+        output.imaging_mode = imaging_default();
+        // This would be nice, but currently it causes some assembly regressions that make printed output
+        // differ very significantly from the actual non-imaging mode code.
+        // // Force imaging mode for names of pointers
+        // output.imaging = true;
+        // This would also be nice, but it seems to cause OOMs on the windows32 builder
+        // To get correct names in the IR this needs to be at least 2
+        output.debug_level = params.debug_info_level;
         auto decls = jl_emit_code(m, mi, src, jlrettype, output);
+        JL_UNLOCK(&jl_codegen_lock); // Might GC
 
         Function *F = NULL;
         if (m) {
             // if compilation succeeded, prepare to return the result
-            // For imaging mode, global constants are currently private without initializer
-            // which isn't legal. Convert them to extern linkage so that the code can compile
-            // and will better match what's actually in sysimg.
-            for (auto &global : output.globals)
-                global.second->setLinkage(GlobalValue::ExternalLinkage);
+            // Similar to jl_link_global from jitlayers.cpp,
+            // so that code_llvm shows similar codegen to the jit
+            for (auto &global : output.global_targets) {
+                if (jl_options.image_codegen) {
+                    global.second->setLinkage(GlobalValue::ExternalLinkage);
+                } else {
+                    auto p = literal_static_pointer_val(global.first, global.second->getValueType());
+                    Type *elty;
+                    if (p->getType()->isOpaquePointerTy()) {
+                        elty = PointerType::get(output.getContext(), 0);
+                    } else {
+                        elty = p->getType()->getNonOpaquePointerElementType();
+                    }
+                    // For pretty printing, when LLVM inlines the global initializer into its loads
+                    auto alias = GlobalAlias::create(elty, 0, GlobalValue::PrivateLinkage, global.second->getName() + ".jit", p, global.second->getParent());
+                    global.second->setInitializer(ConstantExpr::getBitCast(alias, global.second->getValueType()));
+                    global.second->setConstant(true);
+                    global.second->setLinkage(GlobalValue::PrivateLinkage);
+                    global.second->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+                    global.second->setVisibility(GlobalValue::DefaultVisibility);
+                }
+            }
+            if (!jl_options.image_codegen) {
+                optimizeDLSyms(*m.getModuleUnlocked());
+            }
+            assert(!verifyLLVMIR(*m.getModuleUnlocked()));
             if (optimize) {
-                legacy::PassManager PM;
-                addTargetPasses(&PM, jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis());
-                addOptimizationPasses(&PM, jl_options.opt_level);
-                addMachinePasses(&PM, jl_options.opt_level);
+                NewPM PM{jl_ExecutionEngine->cloneTargetMachine(), getOptLevel(jl_options.opt_level)};
                 //Safe b/c context lock is held by output
                 PM.run(*m.getModuleUnlocked());
+                assert(!verifyLLVMIR(*m.getModuleUnlocked()));
             }
             const std::string *fname;
             if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam")
@@ -1077,9 +1992,10 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
             F = cast<Function>(m.getModuleUnlocked()->getNamedValue(*fname));
         }
         JL_GC_POP();
-        if (measure_compile_time_enabled)
-            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
-        JL_UNLOCK(&jl_codegen_lock); // Might GC
+        if (measure_compile_time_enabled) {
+            auto end = jl_hrtime();
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+        }
         if (F) {
             dump->TSM = wrap(new orc::ThreadSafeModule(std::move(m)));
             dump->F = wrap(F);
diff --git a/src/array.c b/src/array.c
index ae89087502627..fcae8ac26fa22 100644
--- a/src/array.c
+++ b/src/array.c
@@ -16,58 +16,6 @@
 extern "C" {
 #endif
 
-#define JL_ARRAY_IMPL_NUL 1
-
-#define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
-
-static inline void arrayassign_safe(int hasptr, jl_value_t *parent, char *dst, const jl_value_t *src, size_t nb) JL_NOTSAFEPOINT
-{
-    // array can assume more alignment than a field would normally have
-    assert(nb >= jl_datatype_size(jl_typeof(src))); // nb might move some undefined bits, but we should be okay with that
-    if (hasptr) {
-        size_t nptr = nb / sizeof(void*);
-        memmove_refs((void**)dst, (void* const*)src, nptr);
-        jl_gc_multi_wb(parent, src);
-    }
-    else {
-        switch (nb) {
-        case  0: break;
-        case  1: *(uint8_t*)dst  = *(uint8_t*)src;  break;
-        case  2: *(uint16_t*)dst = *(uint16_t*)src; break;
-        case  4: *(uint32_t*)dst = *(uint32_t*)src; break;
-        case  8: *(uint64_t*)dst = *(uint64_t*)src; break;
-        case 16:
-            memcpy(jl_assume_aligned(dst, 16), jl_assume_aligned(src, 16), 16);
-            break;
-        default: memcpy(dst, src, nb);
-        }
-    }
-}
-
-static inline void memmove_safe(int hasptr, char *dst, const char *src, size_t nb) JL_NOTSAFEPOINT
-{
-    if (hasptr)
-        memmove_refs((void**)dst, (void**)src, nb / sizeof(void*));
-    else
-        memmove(dst, src, nb);
-}
-
-// array constructors ---------------------------------------------------------
-JL_DLLEXPORT char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT
-{
-    assert(jl_array_isbitsunion(a));
-    return ((char*)jl_array_data(a)) + ((jl_array_ndims(a) == 1 ? (a->maxsize - a->offset) : jl_array_len(a)) * a->elsize) + a->offset;
-}
-
-STATIC_INLINE jl_value_t *jl_array_owner(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
-{
-    if (a->flags.how == 3) {
-        a = (jl_array_t*)jl_array_data_owner(a);
-        assert(jl_is_string(a) || a->flags.how != 3);
-    }
-    return (jl_value_t*)a;
-}
-
 #if defined(_P64) && defined(UINT128MAX)
 typedef __uint128_t wideint_t;
 #else
@@ -76,140 +24,27 @@ typedef uint64_t wideint_t;
 
 #define MAXINTVAL (((size_t)-1)>>1)
 
-JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, size_t *tot, uint32_t ndims, size_t *dims, size_t elsz)
+JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, uint32_t ndims, size_t *dims)
 {
     size_t i;
     size_t _nel = 1;
-    for(i=0; i < ndims; i++) {
+    for (i = 0; i < ndims; i++) {
         size_t di = dims[i];
         wideint_t prod = (wideint_t)_nel * (wideint_t)di;
         if (prod >= (wideint_t) MAXINTVAL || di >= MAXINTVAL)
             return 1;
         _nel = prod;
     }
-    wideint_t prod = (wideint_t)elsz * (wideint_t)_nel;
-    if (prod >= (wideint_t) MAXINTVAL)
-        return 2;
     *nel = _nel;
-    *tot = (size_t)prod;
     return 0;
 }
 
-static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
-                               int8_t isunboxed, int8_t hasptr, int8_t isunion, int8_t zeroinit, size_t elsz)
-{
-    jl_task_t *ct = jl_current_task;
-    size_t i, tot, nel;
-    void *data;
-    jl_array_t *a;
-    assert(isunboxed || elsz == sizeof(void*));
-    assert(atype == NULL || isunion == jl_is_uniontype(jl_tparam0(atype)));
-    int validated = jl_array_validate_dims(&nel, &tot, ndims, dims, elsz);
-    if (validated == 1)
-        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-    else if (validated == 2)
-        jl_error("invalid Array size");
-    if (isunboxed) {
-        if (elsz == 1 && !isunion) {
-            // extra byte for all julia allocated byte arrays
-            tot++;
-        }
-        if (isunion) {
-            // an extra byte for each isbits union array element, stored after a->maxsize
-            tot += nel;
-        }
-    }
-
-    int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
-    if (tot <= ARRAY_INLINE_NBYTES) {
-        // align data area
-        if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
-            tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT);
-        else if (isunboxed && elsz >= 4)
-            tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT);
-        size_t doffs = tsz;
-        tsz += tot;
-        // jl_array_t is large enough that objects will always be aligned 16
-        a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-        assert(((size_t)a & 15) == 0);
-        // No allocation or safepoint allowed after this
-        a->flags.how = 0;
-        data = (char*)a + doffs;
-    }
-    else {
-        data = jl_gc_managed_malloc(tot);
-        // Allocate the Array **after** allocating the data
-        // to make sure the array is still young
-        a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-        // No allocation or safepoint allowed after this
-        a->flags.how = 2;
-        jl_gc_track_malloced_array(ct->ptls, a);
-    }
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-
-    if (zeroinit)
-        memset(data, 0, tot);
-    a->data = data;
-    if (JL_ARRAY_IMPL_NUL && elsz == 1)
-        ((char*)data)[tot - 1] = '\0';
-    a->length = nel;
-    a->flags.ndims = ndims;
-    a->flags.ptrarray = !isunboxed;
-    a->flags.hasptr = hasptr;
-    a->elsize = elsz;
-    a->flags.isshared = 0;
-    a->flags.isaligned = 1;
-    a->offset = 0;
-    if (ndims == 1) {
-        a->nrows = nel;
-        a->maxsize = nel;
-    }
-    else if (a->flags.ndims != ndims) {
-        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-    }
-    else {
-        size_t *adims = &a->nrows;
-        for (i = 0; i < ndims; i++)
-            adims[i] = dims[i];
-    }
-
-    return a;
-}
-
-static inline jl_array_t *_new_array(jl_value_t *atype, uint32_t ndims, size_t *dims)
-{
-    jl_value_t *eltype = jl_tparam0(atype);
-    size_t elsz = 0, al = 0;
-    if (!jl_is_kind(jl_typeof(eltype)))
-        jl_type_error_rt("Array", "element type", (jl_value_t*)jl_type_type, eltype);
-    int isunboxed = jl_islayout_inline(eltype, &elsz, &al);
-    int isunion = jl_is_uniontype(eltype);
-    int hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0);
-    if (!isunboxed) {
-        elsz = sizeof(void*);
-        al = elsz;
-    }
-    else {
-        elsz = LLT_ALIGN(elsz, al);
-    }
-    int zi = !isunboxed || hasptr || isunion || (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->zeroinit);
-
-    return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, zi, elsz);
-}
-
-jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims,
-                                             int isunboxed, int hasptr, int isunion, int elsz)
-{
-    return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, 0, (size_t)elsz);
-}
-
 #ifndef JL_NDEBUG
 static inline int is_ntuple_long(jl_value_t *v)
 {
     if (!jl_is_tuple(v))
         return 0;
-    jl_value_t *tt = jl_typeof(v);
+    jl_value_t *tt = (jl_value_t*)jl_typetagof(v);
     size_t i, nfields = jl_nparams(tt);
     for (i = 0; i < nfields; i++) {
         if (jl_tparam(tt, i) != (jl_value_t*)jl_long_type) {
@@ -220,313 +55,141 @@ static inline int is_ntuple_long(jl_value_t *v)
 }
 #endif
 
-JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
-                                          jl_value_t *_dims)
+#define jl_array_elsize(a) (((jl_datatype_t*)jl_typetagof((a)->ref.mem))->layout->size)
+
+static char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
-    assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype)));
+    assert(jl_genericmemory_isbitsunion(a->ref.mem));
+    return jl_genericmemory_typetagdata(a->ref.mem) + (uintptr_t)a->ref.ptr_or_offset;
+}
 
-    size_t ndims = jl_nfields(_dims);
-    assert(is_ntuple_long(_dims));
-    size_t *dims = (size_t*)_dims;
-    int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*);
+STATIC_INLINE jl_array_t *_new_array(jl_value_t *atype, jl_genericmemory_t *mem, const jl_datatype_layout_t *layout, uint32_t ndims, size_t *dims)
+{
+    jl_task_t *ct = jl_current_task;
+    size_t i;
+    int tsz = sizeof(jl_array_t) + ndims*sizeof(size_t);
     jl_array_t *a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-    // No allocation or safepoint allowed after this
-    // copy data (except dims) from the old object
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-    a->flags.ndims = ndims;
-    a->offset = 0;
-    a->data = NULL;
-    a->flags.isaligned = data->flags.isaligned;
-    a->elsize = data->elsize;
-    a->flags.ptrarray = data->flags.ptrarray;
-    a->flags.hasptr = data->flags.hasptr;
-
-    // if data is itself a shared wrapper,
-    // owner should point back to the original array
-    jl_array_t *owner = (jl_array_t*)jl_array_owner(data);
-    jl_array_data_owner(a) = (jl_value_t*)owner;
-
-    a->flags.how = 3;
-    a->data = data->data;
-    a->flags.isshared = 1;
-    data->flags.isshared = 1;
+    a->ref.mem = mem;
+    if (layout->flags.arrayelem_isunion || layout->size == 0)
+        a->ref.ptr_or_offset = 0;
+    else
+        a->ref.ptr_or_offset = mem->ptr;
+    for (i = 0; i < ndims; i++)
+        a->dimsize[i] = dims[i];
+    return a;
+}
 
-    if (ndims == 1) {
-        size_t l = dims[0];
-        a->length = l;
-        a->nrows = l;
-        a->maxsize = l;
-    }
-    else if (a->flags.ndims != ndims) {
+STATIC_INLINE jl_array_t *new_array(jl_value_t *atype, uint32_t ndims, size_t *dims)
+{
+    size_t nel;
+    if (jl_array_validate_dims(&nel, ndims, dims) || *(size_t*)jl_tparam1(atype) != ndims)
         jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-    }
-    else {
-        size_t *adims = &a->nrows;
-        size_t l = 1;
-        wideint_t prod;
-        for (size_t i = 0; i < ndims; i++) {
-            adims[i] = dims[i];
-            prod = (wideint_t)l * (wideint_t)adims[i];
-            if (prod > (wideint_t) MAXINTVAL)
-                jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-            l = prod;
-        }
-        a->length = l;
-    }
-
+    jl_value_t *mtype = jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)atype, 0), 1);
+    // extra byte for all julia allocated byte vectors
+    jl_genericmemory_t *mem = jl_alloc_genericmemory(mtype, nel);
+    JL_GC_PUSH1(&mem);
+    jl_array_t *a = _new_array(atype, mem, ((jl_datatype_t*)mtype)->layout, ndims, dims);
+    JL_GC_POP();
     return a;
 }
 
+jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t isunion, int8_t zeroinit, size_t elsz);
+
+JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str);
+
 JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str)
 {
     jl_task_t *ct = jl_current_task;
-    jl_array_t *a;
-
-    int ndimwords = jl_array_ndimwords(1);
-    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*);
-    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, jl_array_uint8_type);
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-    a->flags.ndims = 1;
-    a->offset = 0;
-    a->data = jl_string_data(str);
-    a->flags.isaligned = 0;
-    a->elsize = 1;
-    a->flags.ptrarray = 0;
-    a->flags.hasptr = 0;
-    jl_array_data_owner(a) = str;
-    a->flags.how = 3;
-    a->flags.isshared = 1;
-    size_t l = jl_string_len(str);
-    a->length = l;
-    a->nrows = a->maxsize = l;
+    jl_genericmemory_t *mem = jl_string_to_genericmemory(str);
+    JL_GC_PUSH1(&mem);
+    int ndimwords = 1;
+    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
+    jl_array_t *a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, jl_array_uint8_type);
+    a->ref.mem = mem;
+    a->ref.ptr_or_offset = mem->ptr;
+    a->dimsize[0] = mem->length;
+    JL_GC_POP();
     return a;
 }
 
-// own_buffer != 0 iff GC should call free() on this pointer eventually
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
                                             size_t nel, int own_buffer)
 {
-    jl_task_t *ct = jl_current_task;
-    jl_array_t *a;
-    jl_value_t *eltype = jl_tparam0(atype);
-
-    int isunboxed = jl_stored_inline(eltype);
-    if (isunboxed && jl_is_uniontype(eltype))
-        jl_exceptionf(jl_argumenterror_type,
-                      "unsafe_wrap: unspecified layout for union element type");
-    size_t elsz;
-    unsigned align;
-    if (isunboxed) {
-        elsz = jl_datatype_size(eltype);
-        align = jl_datatype_align(eltype);
-    }
-    else {
-        align = elsz = sizeof(void*);
-    }
-    if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1))
-        jl_exceptionf(jl_argumenterror_type,
-                      "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
-
-    int ndimwords = jl_array_ndimwords(1);
-    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
-    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-    // No allocation or safepoint allowed after this
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-    a->data = data;
-    a->length = nel;
-    a->elsize = LLT_ALIGN(elsz, align);
-    a->flags.ptrarray = !isunboxed;
-    a->flags.hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0);
-    a->flags.ndims = 1;
-    a->flags.isshared = 1;
-    a->flags.isaligned = 0;  // TODO: allow passing memalign'd buffers
-    if (own_buffer) {
-        a->flags.how = 2;
-        jl_gc_track_malloced_array(ct->ptls, a);
-        jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
-    }
-    else {
-        a->flags.how = 0;
-    }
-
-    a->nrows = nel;
-    a->maxsize = nel;
-    a->offset = 0;
+    if (*(size_t*)jl_tparam1(atype) != 1)
+        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
+    jl_value_t *mtype = jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)atype, 0), 1);
+    jl_genericmemory_t *mem = jl_ptr_to_genericmemory(mtype, data, nel, own_buffer);
+    JL_GC_PUSH1(&mem);
+    jl_array_t *a = _new_array(atype, mem, ((jl_datatype_t*)mtype)->layout, 1, &nel);
+    JL_GC_POP();
     return a;
 }
 
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
                                          jl_value_t *_dims, int own_buffer)
 {
-    jl_task_t *ct = jl_current_task;
-    size_t nel = 1;
-    jl_array_t *a;
     size_t ndims = jl_nfields(_dims);
-    wideint_t prod;
     assert(is_ntuple_long(_dims));
     size_t *dims = (size_t*)_dims;
-    for (size_t i = 0; i < ndims; i++) {
-        prod = (wideint_t)nel * (wideint_t)dims[i];
-        if (prod > (wideint_t) MAXINTVAL)
-            jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-        nel = prod;
-    }
-    if (__unlikely(ndims == 1))
-        return jl_ptr_to_array_1d(atype, data, nel, own_buffer);
-    jl_value_t *eltype = jl_tparam0(atype);
-
-    int isunboxed = jl_stored_inline(eltype);
-    if (isunboxed && jl_is_uniontype(eltype))
-        jl_exceptionf(jl_argumenterror_type,
-                      "unsafe_wrap: unspecified layout for union element type");
-    size_t elsz;
-    unsigned align;
-    if (isunboxed) {
-        elsz = jl_datatype_size(eltype);
-        align = jl_datatype_align(eltype);
-    }
-    else {
-        align = elsz = sizeof(void*);
-    }
-    if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1))
-        jl_exceptionf(jl_argumenterror_type,
-                      "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
-
-    int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
-    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
-    // No allocation or safepoint allowed after this
-    a->flags.pooled = tsz <= GC_MAX_SZCLASS;
-    a->data = data;
-    a->length = nel;
-    a->elsize = LLT_ALIGN(elsz, align);
-    a->flags.ptrarray = !isunboxed;
-    a->flags.hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0);
-    a->flags.ndims = ndims;
-    a->offset = 0;
-    a->flags.isshared = 1;
-    a->flags.isaligned = 0;
-    if (own_buffer) {
-        a->flags.how = 2;
-        jl_gc_track_malloced_array(ct->ptls, a);
-        jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
-    }
-    else {
-        a->flags.how = 0;
-    }
-
-    assert(ndims != 1); // handled above
-    if (a->flags.ndims != ndims)
+    size_t nel;
+    if (jl_array_validate_dims(&nel, ndims, dims) || *(size_t*)jl_tparam1(atype) != ndims)
         jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-    memcpy(&a->nrows, dims, ndims * sizeof(size_t));
-    return a;
-}
-
-JL_DLLEXPORT jl_array_t *jl_new_array(jl_value_t *atype, jl_value_t *_dims)
-{
-    size_t ndims = jl_nfields(_dims);
-    assert(is_ntuple_long(_dims));
-    return _new_array(atype, ndims, (size_t*)_dims);
-}
-
-JL_DLLEXPORT jl_array_t *jl_alloc_array_1d(jl_value_t *atype, size_t nr)
-{
-    return _new_array(atype, 1, &nr);
-}
-
-JL_DLLEXPORT jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr,
-                                           size_t nc)
-{
-    size_t d[2] = {nr, nc};
-    return _new_array(atype, 2, &d[0]);
-}
-
-JL_DLLEXPORT jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr,
-                                           size_t nc, size_t z)
-{
-    size_t d[3] = {nr, nc, z};
-    return _new_array(atype, 3, &d[0]);
-}
-
-JL_DLLEXPORT jl_array_t *jl_pchar_to_array(const char *str, size_t len)
-{
-    jl_array_t *a = jl_alloc_array_1d(jl_array_uint8_type, len);
-    memcpy(a->data, str, len);
+    jl_value_t *mtype = jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)atype, 0), 1);
+    jl_genericmemory_t *mem = jl_ptr_to_genericmemory(mtype, data, nel, own_buffer);
+    JL_GC_PUSH1(&mem);
+    jl_array_t *a = _new_array(atype, mem, ((jl_datatype_t*)mtype)->layout, ndims, dims);
+    JL_GC_POP();
     return a;
 }
 
 JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a)
 {
-    size_t len = jl_array_len(a);
+    size_t len = jl_array_nrows(a); // only for Vector
     if (len == 0) {
         // this may seem like purely an optimization (which it also is), but it
         // also ensures that calling `String(a)` doesn't corrupt a previous
         // string also created the same way, where `a = StringVector(_)`.
         return jl_an_empty_string;
     }
-    if (a->flags.how == 3 && a->offset == 0 && a->elsize == 1 &&
-        (jl_array_ndims(a) != 1 ||
-         ((a->maxsize + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (len + sizeof(void*) + 1 <= GC_MAX_SZCLASS)))) {
-        jl_value_t *o = jl_array_data_owner(a);
-        if (jl_is_string(o)) {
-            a->flags.isshared = 1;
-            *(size_t*)o = len;
-            a->nrows = 0;
-            a->length = 0;
-            a->maxsize = 0;
-            return o;
-        }
-    }
-    a->nrows = 0;
-    a->length = 0;
-    a->maxsize = 0;
-    return jl_pchar_to_string((const char*)jl_array_data(a), len);
+    jl_value_t *str;
+    if (a->ref.ptr_or_offset == a->ref.mem->ptr)
+        str = jl_genericmemory_to_string(a->ref.mem, len);
+    else
+        str = jl_pchar_to_string(jl_array_data(a, char), len);
+    a->ref.mem = (jl_genericmemory_t*)((jl_datatype_t*)jl_memory_uint8_type)->instance;
+    a->ref.ptr_or_offset = a->ref.mem->ptr;
+    a->dimsize[0] = 0;
+    return str;
 }
 
-JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
+JL_DLLEXPORT jl_array_t *jl_alloc_array_1d(jl_value_t *atype, size_t nr)
 {
-    if (len == 0)
-        return jl_an_empty_string;
-    size_t sz = sizeof(size_t) + len + 1; // add space for trailing \nul protector and size
-    if (sz < len) // overflow
-        jl_throw(jl_memory_exception);
-    jl_task_t *ct = jl_current_task;
-    jl_value_t *s;
-    jl_ptls_t ptls = ct->ptls;
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    if (sz <= GC_MAX_SZCLASS) {
-        int pool_id = jl_gc_szclass_align8(allocsz);
-        jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
-        int osize = jl_gc_sizeclasses[pool_id];
-        // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
-        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-        s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
-    }
-    else {
-        if (allocsz < sz) // overflow in adding offs, size was "negative"
-            jl_throw(jl_memory_exception);
-        s = jl_gc_big_alloc_noinline(ptls, allocsz);
-    }
-    jl_set_typeof(s, jl_string_type);
-    maybe_record_alloc_to_profile(s, len, jl_string_type);
-    *(size_t*)s = len;
-    jl_string_data(s)[len] = 0;
-    return s;
+    return new_array(atype, 1, &nr);
 }
 
-JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
+JL_DLLEXPORT jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr, size_t nc)
 {
-    jl_value_t *s = jl_alloc_string(len);
-    if (len > 0)
-        memcpy(jl_string_data(s), str, len);
-    return s;
+    size_t dims[2] = {nr, nc};
+    return new_array(atype, 2, &dims[0]);
 }
 
-JL_DLLEXPORT jl_value_t *jl_cstr_to_string(const char *str)
+JL_DLLEXPORT jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr, size_t nc, size_t z)
 {
-    return jl_pchar_to_string(str, strlen(str));
+    size_t dims[3] = {nr, nc, z};
+    return new_array(atype, 3, &dims[0]);
+}
+
+JL_DLLEXPORT jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims)
+{
+    return new_array(atype, ndims, dims);
+}
+
+JL_DLLEXPORT jl_array_t *jl_pchar_to_array(const char *str, size_t len)
+{
+    jl_array_t *a = jl_alloc_array_1d(jl_array_uint8_type, len);
+    assert(jl_array_data(a, char));
+    memcpy(jl_array_data(a, char), str, len);
+    return a;
 }
 
 JL_DLLEXPORT jl_array_t *jl_alloc_vec_any(size_t n)
@@ -543,719 +206,75 @@ JL_DLLEXPORT jl_value_t *jl_apply_array_type(jl_value_t *type, size_t dim)
     return ret;
 }
 
-// array primitives -----------------------------------------------------------
-
-JL_DLLEXPORT jl_value_t *jl_ptrarrayref(jl_array_t *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
-{
-    assert(i < jl_array_len(a));
-    assert(a->flags.ptrarray);
-    jl_value_t *elt = jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)a->data) + i);
-    if (elt == NULL)
-        jl_throw(jl_undefref_exception);
-    return elt;
-}
-
-
-JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i)
-{
-    if (a->flags.ptrarray)
-        return jl_ptrarrayref(a, i);
-    assert(i < jl_array_len(a));
-    jl_value_t *eltype = (jl_value_t*)jl_tparam0(jl_typeof(a));
-    if (jl_is_uniontype(eltype)) {
-        // isbits union selector bytes are always stored directly after the last array element
-        uint8_t sel = jl_array_typetagdata(a)[i];
-        eltype = jl_nth_union_component(eltype, sel);
-        if (jl_is_datatype_singleton((jl_datatype_t*)eltype))
-            return ((jl_datatype_t*)eltype)->instance;
-    }
-    jl_value_t *r = undefref_check((jl_datatype_t*)eltype, jl_new_bits(eltype, &((char*)a->data)[i * a->elsize]));
-    if (__unlikely(r == NULL))
-        jl_throw(jl_undefref_exception);
-    return r;
-}
-
-JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i)
-{
-    if (a->flags.ptrarray) {
-        return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)jl_array_data(a)) + i) != NULL;
-    }
-    else if (a->flags.hasptr) {
-         jl_datatype_t *eltype = (jl_datatype_t*)jl_tparam0(jl_typeof(a));
-         assert(eltype->layout->first_ptr >= 0);
-         jl_value_t **elem = (jl_value_t**)((char*)a->data + i * a->elsize);
-         return elem[eltype->layout->first_ptr] != NULL;
-    }
-    return 1;
-}
-
-JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *rhs JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, size_t i)
-{
-    assert(i < jl_array_len(a));
-    jl_value_t *eltype = jl_tparam0(jl_typeof(a));
-    if (eltype != (jl_value_t*)jl_any_type) {
-        JL_GC_PUSH1(&rhs);
-        if (!jl_isa(rhs, eltype))
-            jl_type_error("arrayset", eltype, rhs);
-        JL_GC_POP();
-    }
-    if (!a->flags.ptrarray) {
-        int hasptr;
-        if (jl_is_uniontype(eltype)) {
-            uint8_t *psel = &((uint8_t*)jl_array_typetagdata(a))[i];
-            unsigned nth = 0;
-            if (!jl_find_union_component(eltype, jl_typeof(rhs), &nth))
-                assert(0 && "invalid arrayset to isbits union");
-            *psel = nth;
-            if (jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(rhs)))
-                return;
-            hasptr = 0;
-        }
-        else {
-            hasptr = a->flags.hasptr;
-        }
-        arrayassign_safe(hasptr, jl_array_owner(a), &((char*)a->data)[i * a->elsize], rhs, a->elsize);
-    }
-    else {
-        jl_atomic_store_release(((_Atomic(jl_value_t*)*)a->data) + i, rhs);
-        jl_gc_wb(jl_array_owner(a), rhs);
-    }
-}
-
-JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
-{
-    if (i >= jl_array_len(a))
-        jl_bounds_error_int((jl_value_t*)a, i + 1);
-    if (a->flags.ptrarray)
-        jl_atomic_store_release(((_Atomic(jl_value_t*)*)a->data) + i, NULL);
-    else if (a->flags.hasptr) {
-        size_t elsize = a->elsize;
-        jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
-        memset((char*)a->data + elsize * i, 0, elsize);
-    }
-}
-
-// at this size and bigger, allocate resized array data with malloc directly
-// instead of managing them separately as gc objects
-#define MALLOC_THRESH 1048576
-
-// Resize the buffer to a max size of `newlen`
-// The buffer can either be newly allocated or realloc'd, the return
-// value is 1 if a new buffer is allocated and 0 if it is realloc'd.
-// the caller needs to take care of moving the data from the old buffer
-// to the new one if necessary.
-// When this function returns, the `->data` pointer always points to
-// the **beginning** of the new buffer.
-static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen)
-{
-    jl_task_t *ct = jl_current_task;
-    assert(!a->flags.isshared || a->flags.how == 3);
-    size_t elsz = a->elsize;
-    size_t nbytes = newlen * elsz;
-    size_t oldnbytes = a->maxsize * elsz;
-    size_t oldoffsnb = a->offset * elsz;
-    size_t oldlen = a->nrows;
-    int isbitsunion = jl_array_isbitsunion(a);
-    assert(nbytes >= oldnbytes);
-    if (elsz == 1 && !isbitsunion) {
-        nbytes++;
-        oldnbytes++;
-    }
-    if (isbitsunion) {
-        nbytes += newlen;
-        oldnbytes += a->maxsize;
-    }
-    int newbuf = 0;
-    if (a->flags.how == 2) {
-        // already malloc'd - use realloc
-        char *olddata = (char*)a->data - oldoffsnb;
-        a->data = jl_gc_managed_realloc(olddata, nbytes, oldnbytes,
-                                        a->flags.isaligned, (jl_value_t*)a);
-    }
-    else if (a->flags.how == 3 && jl_is_string(jl_array_data_owner(a)) && !isbitsunion) {
-        // if data is in a String, keep it that way
-        jl_value_t *s;
-        if (a->flags.isshared) {
-            s = jl_alloc_string(nbytes - (elsz == 1));
-            newbuf = 1;
-        }
-        else {
-            s = jl_gc_realloc_string(jl_array_data_owner(a), nbytes - (elsz == 1));
-        }
-        jl_array_data_owner(a) = s;
-        jl_gc_wb(a, s);
-        a->data = jl_string_data(s);
-    }
-    else {
-        newbuf = 1;
-        if (nbytes >= MALLOC_THRESH) {
-            a->data = jl_gc_managed_malloc(nbytes);
-            jl_gc_track_malloced_array(ct->ptls, a);
-            a->flags.how = 2;
-            a->flags.isaligned = 1;
-        }
-        else {
-            a->data = jl_gc_alloc_buf(ct->ptls, nbytes);
-            a->flags.how = 1;
-            jl_gc_wb_buf(a, a->data, nbytes);
-        }
-    }
-    if (JL_ARRAY_IMPL_NUL && elsz == 1 && !isbitsunion)
-        memset((char*)a->data + oldnbytes - 1, 0, nbytes - oldnbytes + 1);
-    (void)oldlen;
-    assert(oldlen == a->nrows &&
-           "Race condition detected: recursive resizing on the same array.");
-    a->flags.isshared = 0;
-    a->maxsize = newlen;
-    return newbuf;
-}
-
-static void NOINLINE array_try_unshare(jl_array_t *a)
-{
-    if (a->flags.isshared) {
-        if (a->flags.how != 3)
-            jl_error("cannot resize array with shared data");
-        // allow resizing when data is shared with a String
-        if (jl_is_string(jl_array_data_owner(a)))
-            return;
-        assert(a->offset == 0);
-        size_t len = a->maxsize;
-        size_t nbytes = len * a->elsize;
-        if (jl_array_isbitsunion(a)) {
-            nbytes += len;
-        }
-        char *olddata = (char*)a->data;
-        int newbuf = array_resize_buffer(a, len);
-        assert(newbuf);
-        (void)newbuf;
-        memcpy(a->data, olddata, nbytes);
-    }
-}
-
-size_t overallocation(size_t maxsize)
-{
-    if (maxsize < 8)
-        return 8;
-    // compute maxsize = maxsize + 4*maxsize^(7/8) + maxsize/8
-    // for small n, we grow faster than O(n)
-    // for large n, we grow at O(n/8)
-    // and as we reach O(memory) for memory>>1MB,
-    // this means we end by adding about 10% of memory each time
-    int exp2 = sizeof(maxsize) * 8 -
-#ifdef _P64
-        __builtin_clzll(maxsize);
-#else
-        __builtin_clz(maxsize);
-#endif
-    maxsize += ((size_t)1 << (exp2 * 7 / 8)) * 4 + maxsize / 8;
-    return maxsize;
-}
-
-STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
-                                        size_t n)
+JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc)
 {
-    // designed to handle the case of growing and shrinking at both ends
-    if (__unlikely(a->flags.isshared)) {
-        if (a->flags.how != 3)
-            jl_error("cannot resize array with shared data");
-        if (inc == 0) {
-            // If inc > 0, it will always trigger the slow path and unshare the
-            // buffer
-            array_try_unshare(a);
-            return;
-        }
-    }
+    size_t n = jl_array_nrows(a);
+    size_t elsz = jl_array_elsize(a);
+    char *data = jl_array_data(a,char);
+    jl_value_t *mtype = (jl_value_t*)jl_typetagof(a->ref.mem);
+    int isbitsunion = jl_genericmemory_isbitsunion(a->ref.mem);
     size_t newnrows = n + inc;
-    size_t elsz = a->elsize;
-    size_t nbinc = inc * elsz;
-    char *data = (char*)a->data;
-    char *newdata;
-    char *typetagdata;
-    char *newtypetagdata = NULL;
-    int isbitsunion = jl_array_isbitsunion(a);
-    if (isbitsunion) typetagdata = jl_array_typetagdata(a);
-    if (a->offset >= inc) {
-        // already have enough space in a->offset
-        newdata = data - nbinc;
-        a->offset -= inc;
-        if (isbitsunion) newtypetagdata = typetagdata - inc;
-        if (idx > 0) {
-            // inserting new elements after 1st element
-            memmove_safe(a->flags.hasptr, newdata, data, idx * elsz);
-            if (isbitsunion) {
-                memmove(newtypetagdata, typetagdata, idx);
-                memset(newtypetagdata + idx, 0, inc);
-            }
-        }
-    }
-    else {
-        // not enough room for requested growth from existing a->offset
-        size_t oldoffset = a->offset;
-        size_t oldoffsnb = oldoffset * elsz;
-        size_t oldmaxsize = a->maxsize;
-        size_t nb1 = idx * elsz;
-        if (inc > (a->maxsize - n) / 2 - (a->maxsize - n) / 20) {
-            // not enough room for requested growth from end of array
-            size_t newlen = inc * 2;
-            while (n + 2 * inc > newlen - a->offset)
-                newlen *= 2;
-            size_t newmaxsize = overallocation(a->maxsize);
-            if (newlen < newmaxsize)
-                newlen = newmaxsize;
-            size_t newoffset = (newlen - newnrows) / 2;
-            if (!array_resize_buffer(a, newlen)) {
-                data = (char*)a->data + oldoffsnb;
-            }
-            newdata = (char*)a->data + newoffset * elsz;
-            if (isbitsunion) {
-                typetagdata = data + (oldmaxsize - oldoffset) * elsz + oldoffset;
-                newtypetagdata = newdata + (a->maxsize - newoffset) * elsz + newoffset;
-                memmove(newtypetagdata, typetagdata, idx);
-                memset(newtypetagdata + idx, 0, inc);
-                memmove(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
-            }
-            // We could use memcpy if resizing allocates a new buffer,
-            // hopefully it's not a particularly important optimization.
-            if (idx > 0 && newdata < data) {
-                memmove_safe(a->flags.hasptr, newdata, data, nb1);
-            }
-            memmove_safe(a->flags.hasptr, newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
-            if (idx > 0 && newdata > data) {
-                memmove_safe(a->flags.hasptr, newdata, data, nb1);
-            }
-            a->offset = newoffset;
-        }
-        else {
-            // use extra space between a->nrows & a->maxsize
-            a->offset = (a->maxsize - newnrows) / 2;
-            newdata = data - oldoffsnb + a->offset * elsz;
-            if (isbitsunion) newtypetagdata = newdata + (a->maxsize - a->offset) * elsz + a->offset;
-            if (idx > 0 && newdata < data) {
-                memmove_safe(a->flags.hasptr, newdata, data, nb1);
-                if (isbitsunion) {
-                    memmove(newtypetagdata, typetagdata, idx);
-                    memset(newtypetagdata + idx, 0, inc);
-                }
-            }
-            memmove_safe(a->flags.hasptr, newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
-            if (isbitsunion) memmove(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
-            if (idx > 0 && newdata > data) {
-                memmove_safe(a->flags.hasptr, newdata, data, nb1);
-                if (isbitsunion) {
-                    memmove(newtypetagdata, typetagdata, idx);
-                    memset(newtypetagdata + idx, 0, inc);
-                }
-            }
-        }
-    }
-    a->length = newnrows;
-    a->nrows = newnrows;
-    a->data = newdata;
-    if (jl_is_array_zeroinit(a)) {
-        memset(newdata + idx * elsz, 0, nbinc);
-    }
-    if (newtypetagdata) {
-        memset(newtypetagdata + idx, 0, inc);
-    }
-}
-
-STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx,
-                                        size_t inc, size_t n)
-{
-    // optimized for the case of only growing and shrinking at the end
-    if (__unlikely(a->flags.isshared)) {
-        if (a->flags.how != 3)
-            jl_error("cannot resize array with shared data");
-        if (inc == 0) {
-            // If inc > 0, it will always trigger the slow path and unshare the
-            // buffer
-            array_try_unshare(a);
-            return;
-        }
+    if (!isbitsunion && elsz == 0) {
+        jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, MAXINTVAL - 1);
+        a->ref.mem = newmem;
+        jl_gc_wb(a, newmem);
+        a->dimsize[0] = newnrows;
+        return;
     }
-    size_t elsz = a->elsize;
-    char *data = (char*)a->data;
-    char *typetagdata;
-    char *newtypetagdata;
-    int isbitsunion = jl_array_isbitsunion(a);
-    if (isbitsunion) typetagdata = jl_array_typetagdata(a);
-    int has_gap = n > idx;
-    size_t reqmaxsize = a->offset + n + inc;
-    if (__unlikely(reqmaxsize > a->maxsize)) {
-        size_t nb1 = idx * elsz;
-        size_t nbinc = inc * elsz;
-        // grow either by our computed overallocation factor or exactly the requested size,
-        // whichever is larger
-        size_t newmaxsize = overallocation(a->maxsize);
+    size_t oldoffset = isbitsunion ? (size_t)data : (data - (char*)a->ref.mem->ptr) / elsz;
+    if (isbitsunion)
+        data = (char*)a->ref.mem->ptr + oldoffset * elsz;
+    size_t oldmaxsize = a->ref.mem->length;
+    size_t reqmaxsize = oldoffset + newnrows;
+    if (__unlikely(reqmaxsize > oldmaxsize)) {
+        size_t newmaxsize;
+        if (oldmaxsize < 4) // typical sequence: 0, // 4, // 6, 9, 13, 19, 28, 42, // 50, 60, 72, ...
+            newmaxsize = 4;
+        else if (oldmaxsize < 48)
+            newmaxsize = oldmaxsize*3/2; // grow by 50%
+        else
+            newmaxsize = oldmaxsize*6/5; // grow by 20%
         if (newmaxsize < reqmaxsize)
             newmaxsize = reqmaxsize;
-        size_t oldmaxsize = a->maxsize;
-        int newbuf = array_resize_buffer(a, newmaxsize);
-        char *newdata = (char*)a->data + a->offset * elsz;
-        if (isbitsunion) newtypetagdata = newdata + (a->maxsize - a->offset) * elsz + a->offset;
-        if (newbuf) {
-            memcpy(newdata, data, nb1);
-            if (isbitsunion) {
-                memcpy(newtypetagdata, typetagdata, idx);
-                if (has_gap) memcpy(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
-                memset(newtypetagdata + idx, 0, inc);
-            }
-            if (has_gap) memcpy(newdata + nb1 + nbinc, data + nb1, n * elsz - nb1);
-        }
-        else {
-            if (isbitsunion) {
-                typetagdata = newdata + (oldmaxsize - a->offset) * elsz + a->offset;
-                if (has_gap) memmove(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
-                memmove(newtypetagdata, typetagdata, idx);
-                memset(newtypetagdata + idx, 0, inc);
-            }
-            if (has_gap) memmove_safe(a->flags.hasptr, newdata + nb1 + nbinc, newdata + nb1, n * elsz - nb1);
-        }
-        a->data = data = newdata;
-    }
-    else if (has_gap) {
-        if (isbitsunion) {
-            memmove(typetagdata + idx + inc, typetagdata + idx, n - idx);
-            memset(typetagdata + idx, 0, inc);
-        }
-        size_t nb1 = idx * elsz;
-        memmove_safe(a->flags.hasptr, data + nb1 + inc * elsz, data + nb1, n * elsz - nb1);
-    }
-    else {
-        // there was enough room for requested growth already in a->maxsize
-        if (isbitsunion)
-            memset(typetagdata + idx, 0, inc);
-    }
-    size_t newnrows = n + inc;
-    a->length = newnrows;
-    a->nrows = newnrows;
-    if (jl_is_array_zeroinit(a)) {
-        memset(data + idx * elsz, 0, inc * elsz);
-    }
-}
-
-JL_DLLEXPORT void jl_array_grow_at(jl_array_t *a, ssize_t idx, size_t inc)
-{
-    // No need to explicitly unshare.
-    // Shared arrays are guaranteed to trigger the slow path for growing.
-    size_t n = jl_array_nrows(a);
-    if (idx < 0 || idx > n)
-        jl_bounds_error_int((jl_value_t*)a, idx + 1);
-    if (idx + 1 < n / 2) {
-        jl_array_grow_at_beg(a, idx, inc, n);
-    }
-    else {
-        jl_array_grow_at_end(a, idx, inc, n);
-    }
-}
-
-JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc)
-{
-    size_t n = jl_array_nrows(a);
-    jl_array_grow_at_end(a, n, inc, n);
-}
-
-JL_DLLEXPORT void jl_array_grow_beg(jl_array_t *a, size_t inc)
-{
-    size_t n = jl_array_nrows(a);
-    jl_array_grow_at_beg(a, 0, inc, n);
-}
-
-STATIC_INLINE void jl_array_shrink(jl_array_t *a, size_t dec)
-{
-    //if we don't manage this array return
-    if (a->flags.how == 0) return;
-
-    size_t elsz = a->elsize;
-    size_t newbytes = (a->maxsize - dec) * a->elsize;
-    size_t oldnbytes = (a->maxsize) * a->elsize;
-    int isbitsunion = jl_array_isbitsunion(a);
-    if (isbitsunion) {
-        newbytes += a->maxsize - dec;
-        oldnbytes += a->maxsize;
-    }
-
-    if (elsz == 1 && !isbitsunion) {
-        newbytes++;
-        oldnbytes++;
-    }
-    char *originalptr = ((char*) a->data) - a->offset * a->elsize;
-    if (a->flags.how == 1) {
-        //this is a julia-allocated buffer that needs to be marked
-        char *typetagdata;
-        char *newtypetagdata;
-        if (isbitsunion) {
-            typetagdata = (char*)malloc_s(a->nrows);
-            memcpy(typetagdata, jl_array_typetagdata(a), a->nrows);
-        }
-        jl_task_t *ct = jl_current_task;
-        char *originaldata = (char*) a->data - a->offset * a->elsize;
-        char *newdata = (char*)jl_gc_alloc_buf(ct->ptls, newbytes);
-        jl_gc_wb_buf(a, newdata, newbytes);
-        a->maxsize -= dec;
-        if (isbitsunion) {
-            newtypetagdata = jl_array_typetagdata(a);
-            memcpy(newtypetagdata, typetagdata, a->nrows);
-            free(typetagdata);
-        }
-        memcpy(newdata, originaldata, newbytes);
-        a->data = newdata + a->offset * elsz;
-    }
-    else if (a->flags.how == 2) {
-        //malloc-allocated pointer this array object manages
-        char *typetagdata;
-        char *newtypetagdata;
-        if (isbitsunion) {
-            typetagdata = (char*)malloc_s(a->nrows);
-            memcpy(typetagdata, jl_array_typetagdata(a), a->nrows);
-        }
-        size_t oldoffsnb = a->offset * elsz;
-        a->data = ((char*)jl_gc_managed_realloc(originalptr, newbytes, oldnbytes,
-                a->flags.isaligned, (jl_value_t*) a)) + oldoffsnb;
-        a->maxsize -= dec;
-        if (isbitsunion) {
-            newtypetagdata = jl_array_typetagdata(a);
-            memcpy(newtypetagdata, typetagdata, a->nrows);
-            free(typetagdata);
-        }
-    }
-    else if (a->flags.how == 3) {
-        //this has has a pointer to the object that owns the data
-    }
-}
-
-static size_t jl_array_limit_offset(jl_array_t *a, size_t offset)
-{
-    // make sure offset doesn't grow forever due to deleting at beginning
-    // and growing at end
-    if (offset >= 13 * a->maxsize / 20)
-        offset = 17 * (a->maxsize - a->nrows) / 100;
-#ifdef _P64
-    while (offset > (size_t)UINT32_MAX) {
-        offset /= 2;
-    }
-#endif
-    return offset;
-}
-
-STATIC_INLINE void jl_array_del_at_beg(jl_array_t *a, size_t idx, size_t dec,
-                                       size_t n)
-{
-    // no error checking
-    // assume inbounds, assume unshared
-    size_t elsz = a->elsize;
-    size_t offset = a->offset;
-    int isbitsunion = jl_array_isbitsunion(a);
-    offset += dec;
-    a->length = n - dec;
-    a->nrows = n - dec;
-    size_t newoffs = jl_array_limit_offset(a, offset);
-    assert(newoffs <= offset);
-    size_t nbdec = dec * elsz;
-    if (__unlikely(newoffs != offset) || idx > 0) {
-        char *olddata = (char*)a->data;
-        char *newdata = olddata - (a->offset - newoffs) * elsz;
-        char *typetagdata;
-        char *newtypetagdata;
-        if (isbitsunion) {
-            typetagdata = jl_array_typetagdata(a);
-            newtypetagdata = typetagdata - (a->offset - newoffs);
-        }
-
-        size_t nb1 = idx * elsz; // size in bytes of the first block
-        size_t nbtotal = a->nrows * elsz; // size in bytes of the new array
-        // Implicit '\0' for byte arrays
-        if (elsz == 1 && !isbitsunion)
-            nbtotal++;
-        if (idx > 0) {
-            memmove_safe(a->flags.hasptr, newdata, olddata, nb1);
-            if (isbitsunion) memmove(newtypetagdata, typetagdata, idx);
-        }
-        // Move the rest of the data if the offset changed
-        if (newoffs != offset) {
-            memmove_safe(a->flags.hasptr, newdata + nb1, olddata + nb1 + nbdec, nbtotal - nb1);
-            if (isbitsunion) memmove(newtypetagdata + idx, typetagdata + idx + dec, a->nrows - idx);
-        }
-        a->data = newdata;
-    }
-    else {
-        char *data = (char*)a->data;
-        a->data = data + nbdec;
-    }
-    a->offset = newoffs;
-}
-
-STATIC_INLINE void jl_array_del_at_end(jl_array_t *a, size_t idx, size_t dec,
-                                       size_t n)
-{
-    // no error checking
-    // assume inbounds, assume unshared
-    char *data = (char*)a->data;
-    size_t elsz = a->elsize;
-    int isbitsunion = jl_array_isbitsunion(a);
-    size_t last = idx + dec;
-    if (n > last) {
-        memmove_safe(a->flags.hasptr, data + idx * elsz, data + last * elsz, (n - last) * elsz);
+        // TODO: round this up to newmaxsize < GC_MAX_SZCLASS ? jl_gc_sizeclasses[jl_gc_szclass(newmaxsize)] : LLT_ALIGN(newmaxsize, 4096), after accounting for the object header (24 bytes)
+        jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, newmaxsize);
+        char *newdata = (char*)newmem->ptr + oldoffset * elsz;
+        memcpy(newdata, data, n * elsz);
         if (isbitsunion) {
             char *typetagdata = jl_array_typetagdata(a);
-            memmove(typetagdata + idx, typetagdata + last, n - last);
+            char *newtypetagdata = (char*)newmem->ptr + newmaxsize * elsz + oldoffset;
+            memcpy(newtypetagdata, typetagdata, n);
         }
+        a->ref.mem = newmem;
+        jl_gc_wb(a, newmem);
+        if (isbitsunion)
+            a->ref.ptr_or_offset = (void*)oldoffset;
+        else
+            a->ref.ptr_or_offset = newdata;
     }
-    n -= dec;
-    if (elsz == 1 && !isbitsunion)
-        data[n] = 0;
-    a->nrows = n;
-    a->length = n;
-}
-
-JL_DLLEXPORT void jl_array_del_at(jl_array_t *a, ssize_t idx, size_t dec)
-{
-    size_t n = jl_array_nrows(a);
-    size_t last = idx + dec;
-    if (__unlikely(idx < 0))
-        jl_bounds_error_int((jl_value_t*)a, idx + 1);
-    if (__unlikely(last > n))
-        jl_bounds_error_int((jl_value_t*)a, last);
-    // The unsharing needs to happen before we modify the buffer
-    if (__unlikely(a->flags.isshared))
-        array_try_unshare(a);
-    if (idx < n - last) {
-        jl_array_del_at_beg(a, idx, dec, n);
-    }
-    else {
-        jl_array_del_at_end(a, idx, dec, n);
-    }
-}
-
-JL_DLLEXPORT void jl_array_del_beg(jl_array_t *a, size_t dec)
-{
-    size_t n = jl_array_nrows(a);
-    if (__unlikely(dec > n))
-        jl_bounds_error_int((jl_value_t*)a, dec);
-    if (__unlikely(a->flags.isshared))
-        array_try_unshare(a);
-    if (dec == 0)
-        return;
-    jl_array_del_at_beg(a, 0, dec, n);
+    a->dimsize[0] = newnrows;
 }
 
 JL_DLLEXPORT void jl_array_del_end(jl_array_t *a, size_t dec)
 {
+    // assume inbounds, assume unshared
     size_t n = jl_array_nrows(a);
     if (__unlikely(n < dec))
         jl_bounds_error_int((jl_value_t*)a, 0);
-    if (__unlikely(a->flags.isshared))
-        array_try_unshare(a);
-    if (dec == 0)
+    if (__unlikely(dec == 0))
         return;
-    jl_array_del_at_end(a, n - dec, dec, n);
-}
-
-JL_DLLEXPORT void jl_array_sizehint(jl_array_t *a, size_t sz)
-{
-    size_t n = jl_array_nrows(a);
-
-    size_t min = a->offset + a->length;
-    sz = (sz < min) ? min : sz;
-
-    if (sz <= a->maxsize) {
-        size_t dec = a->maxsize - sz;
-        //if we don't save at least an eighth of maxsize then its not worth it to shrink
-        if (dec < a->maxsize / 8) return;
-        jl_array_shrink(a, dec);
-    }
-    else {
-        size_t inc = sz - n;
-        jl_array_grow_end(a, inc);
-
-        a->nrows = n;
-        a->length = n;
-    }
-}
-
-JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary)
-{
-    size_t elsz = ary->elsize;
-    size_t len = jl_array_len(ary);
-    int isunion = jl_is_uniontype(jl_tparam0(jl_typeof(ary)));
-    jl_array_t *new_ary = _new_array_(jl_typeof(ary), jl_array_ndims(ary),
-                                      &ary->nrows, !ary->flags.ptrarray,
-                                      ary->flags.hasptr, isunion, 0, elsz);
-    memcpy(new_ary->data, ary->data, len * elsz);
-    // ensure isbits union arrays copy their selector bytes correctly
-    if (jl_array_isbitsunion(ary))
-        memcpy(jl_array_typetagdata(new_ary), jl_array_typetagdata(ary), len);
-    return new_ary;
-}
-
-// Copy element by element until we hit a young object, at which point
-// we can finish by using `memmove`.
-static NOINLINE ssize_t jl_array_ptr_copy_forward(jl_value_t *owner,
-                                                  void **src_p, void **dest_p,
-                                                  ssize_t n) JL_NOTSAFEPOINT
-{
-    _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p;
-    _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p;
-    for (ssize_t i = 0; i < n; i++) {
-        void *val = jl_atomic_load_relaxed(src_pa + i);
-        jl_atomic_store_release(dest_pa + i, val);
-        // `val` is young or old-unmarked
-        if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
-            jl_gc_queue_root(owner);
-            return i;
-        }
-    }
-    return n;
-}
-
-static NOINLINE ssize_t jl_array_ptr_copy_backward(jl_value_t *owner,
-                                                   void **src_p, void **dest_p,
-                                                   ssize_t n) JL_NOTSAFEPOINT
-{
-    _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p;
-    _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p;
-    for (ssize_t i = 0; i < n; i++) {
-        void *val = jl_atomic_load_relaxed(src_pa + n - i - 1);
-        jl_atomic_store_release(dest_pa + n - i - 1, val);
-        // `val` is young or old-unmarked
-        if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
-            jl_gc_queue_root(owner);
-            return i;
-        }
-    }
-    return n;
-}
-
-// Unsafe, assume inbounds and that dest and src have the same eltype
-JL_DLLEXPORT void jl_array_ptr_copy(jl_array_t *dest, void **dest_p,
-                                    jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT
-{
-    assert(dest->flags.ptrarray && src->flags.ptrarray);
-    jl_value_t *owner = jl_array_owner(dest);
-    // Destination is old and doesn't refer to any young object
-    if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
-        jl_value_t *src_owner = jl_array_owner(src);
-        // Source is young or being promoted or might refer to young objects
-        // (i.e. source is not an old object that doesn't have wb triggered)
-        if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
-            ssize_t done;
-            if (dest_p < src_p || dest_p > src_p + n) {
-                done = jl_array_ptr_copy_forward(owner, src_p, dest_p, n);
-                dest_p += done;
-                src_p += done;
-            }
-            else {
-                done = jl_array_ptr_copy_backward(owner, src_p, dest_p, n);
-            }
-            n -= done;
-        }
+    n -= dec;
+    a->dimsize[0] = n;
+    // don't leave behind deleted data
+    if (jl_is_genericmemory_zeroinit(a->ref.mem) && !jl_genericmemory_isbitsunion(a->ref.mem)) {
+        size_t elsz = jl_array_elsize(a);
+        memset(jl_array_data(a,char) + n * elsz, 0, elsz * dec);
     }
-    memmove_refs(dest_p, src_p, n);
 }
 
 JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item)
 {
-    assert(jl_typeis(a, jl_array_any_type));
+    assert(jl_typetagis(a, jl_array_any_type));
     jl_array_grow_end(a, 1);
     size_t n = jl_array_nrows(a);
     jl_array_ptr_set(a, n - 1, item);
@@ -1263,8 +282,8 @@ JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item)
 
 JL_DLLEXPORT void jl_array_ptr_1d_append(jl_array_t *a, jl_array_t *a2)
 {
-    assert(jl_typeis(a, jl_array_any_type));
-    assert(jl_typeis(a2, jl_array_any_type));
+    assert(jl_typetagis(a, jl_array_any_type));
+    assert(jl_typetagis(a2, jl_array_any_type));
     size_t i;
     size_t n = jl_array_nrows(a);
     size_t n2 = jl_array_nrows(a2);
@@ -1274,50 +293,77 @@ JL_DLLEXPORT void jl_array_ptr_1d_append(jl_array_t *a, jl_array_t *a2)
     }
 }
 
-JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a) JL_NOTSAFEPOINT
+JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_copy_slice(jl_genericmemory_t *mem, void *data, size_t len);
+
+JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary)
 {
-    return jl_array_data_owner(a);
+    size_t len = jl_array_len(ary);
+    jl_genericmemory_t *mem = jl_genericmemory_copy_slice(ary->ref.mem, ary->ref.ptr_or_offset, len);
+    JL_GC_PUSH1(&mem);
+    jl_array_t *new_ary = _new_array((jl_value_t*)jl_typetagof(ary), mem, ((jl_datatype_t*)jl_typetagof(ary->ref.mem))->layout, jl_array_ndims(ary), &ary->dimsize[0]);
+    JL_GC_POP();
+    return new_ary;
 }
 
-STATIC_INLINE int jl_has_implicit_byte_owned(jl_array_t *a)
+JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
 {
-    assert(a->flags.how != 3);
-    if (!a->flags.isshared)
-        return 1;
-    return a->flags.how == 1;
+    if (len == 0)
+        return jl_an_empty_string;
+    size_t sz = sizeof(size_t) + len + 1; // add space for trailing \nul protector and size
+    if (sz < len) // overflow
+        jl_throw(jl_memory_exception);
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *s;
+    jl_ptls_t ptls = ct->ptls;
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    if (sz <= GC_MAX_SZCLASS) {
+        int pool_id = jl_gc_szclass_align8(allocsz);
+        jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
+        int osize = jl_gc_sizeclasses[pool_id];
+        // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
+        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
+        s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
+    }
+    else {
+        if (allocsz < sz) // overflow in adding offs, size was "negative"
+            jl_throw(jl_memory_exception);
+        s = jl_gc_big_alloc_noinline(ptls, allocsz);
+    }
+    jl_set_typetagof(s, jl_string_tag, 0);
+    maybe_record_alloc_to_profile(s, len, jl_string_type);
+    *(size_t*)s = len;
+    jl_string_data(s)[len] = 0;
+    return s;
 }
 
-STATIC_INLINE int jl_has_implicit_byte(jl_array_t *a)
+JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
 {
-    // * unshared:
-    //   * how: 0-2
-    //     We own and allocated the data.
-    //     It should have the extra byte.
-    // * shared:
-    //   * how: 0, 2
-    //     The data might come from external source without implicit NUL byte.
-    //     There could be an entra byte for a `reinterpreted` array
-    //     but that should be unlikely for strings.
-    //   * how: 1
-    //     We allocated the data with the extra byte.
-    //   * how: 3
-    //     We should check the owner.
-    if (a->flags.how == 3) {
-        a = (jl_array_t*)jl_array_data_owner(a);
-        if (jl_is_string(a)) return 1;
-        return a->elsize == 1 && jl_has_implicit_byte_owned(a);
-    }
-    return jl_has_implicit_byte_owned(a);
+    jl_value_t *s = jl_alloc_string(len);
+    if (len > 0)
+        memcpy(jl_string_data(s), str, len);
+    return s;
 }
 
-// Create an array with the same content
-JL_DLLEXPORT jl_array_t *jl_array_cconvert_cstring(jl_array_t *a)
+JL_DLLEXPORT jl_value_t *jl_cstr_to_string(const char *str)
 {
-    assert(jl_typeof(a) == jl_array_uint8_type);
-    if (!jl_has_implicit_byte(a))
-        a = jl_array_copy(a);
-    ((char*)a->data)[a->nrows] = 0;
-    return a;
+    return jl_pchar_to_string(str, strlen(str));
+}
+
+
+// deprecated and unused internally, but some packages (notably OrderedCollections.jl) have not yet started to use the modern Base.unsetindex API
+JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
+{
+    if (i >= jl_array_len(a))
+        jl_bounds_error_int((jl_value_t*)a, i + 1);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(a->ref.mem))->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        jl_atomic_store_relaxed(jl_array_data(a,_Atomic(jl_value_t*)) + i, NULL);
+    }
+    else if (layout->first_ptr >= 0) {
+        size_t elsize = layout->size;
+        jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
+        memset(jl_array_data(a,char) + elsize * i, 0, elsize);
+    }
 }
 
 #ifdef __cplusplus
diff --git a/src/ast.c b/src/ast.c
index 70ee915475651..7e7e7fb445e00 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -28,6 +28,7 @@ JL_DLLEXPORT jl_sym_t *jl_top_sym;
 JL_DLLEXPORT jl_sym_t *jl_module_sym;
 JL_DLLEXPORT jl_sym_t *jl_slot_sym;
 JL_DLLEXPORT jl_sym_t *jl_export_sym;
+JL_DLLEXPORT jl_sym_t *jl_public_sym;
 JL_DLLEXPORT jl_sym_t *jl_import_sym;
 JL_DLLEXPORT jl_sym_t *jl_toplevel_sym;
 JL_DLLEXPORT jl_sym_t *jl_quote_sym;
@@ -59,6 +60,7 @@ JL_DLLEXPORT jl_sym_t *jl_thunk_sym;
 JL_DLLEXPORT jl_sym_t *jl_foreigncall_sym;
 JL_DLLEXPORT jl_sym_t *jl_as_sym;
 JL_DLLEXPORT jl_sym_t *jl_global_sym;
+JL_DLLEXPORT jl_sym_t *jl_local_sym;
 JL_DLLEXPORT jl_sym_t *jl_list_sym;
 JL_DLLEXPORT jl_sym_t *jl_dot_sym;
 JL_DLLEXPORT jl_sym_t *jl_newvar_sym;
@@ -66,7 +68,6 @@ JL_DLLEXPORT jl_sym_t *jl_boundscheck_sym;
 JL_DLLEXPORT jl_sym_t *jl_inbounds_sym;
 JL_DLLEXPORT jl_sym_t *jl_copyast_sym;
 JL_DLLEXPORT jl_sym_t *jl_cfunction_sym;
-JL_DLLEXPORT jl_sym_t *jl_pure_sym;
 JL_DLLEXPORT jl_sym_t *jl_loopinfo_sym;
 JL_DLLEXPORT jl_sym_t *jl_meta_sym;
 JL_DLLEXPORT jl_sym_t *jl_inert_sym;
@@ -84,6 +85,7 @@ JL_DLLEXPORT jl_sym_t *jl_aggressive_constprop_sym;
 JL_DLLEXPORT jl_sym_t *jl_no_constprop_sym;
 JL_DLLEXPORT jl_sym_t *jl_purity_sym;
 JL_DLLEXPORT jl_sym_t *jl_nospecialize_sym;
+JL_DLLEXPORT jl_sym_t *jl_nospecializeinfer_sym;
 JL_DLLEXPORT jl_sym_t *jl_macrocall_sym;
 JL_DLLEXPORT jl_sym_t *jl_colon_sym;
 JL_DLLEXPORT jl_sym_t *jl_hygienicscope_sym;
@@ -97,6 +99,8 @@ JL_DLLEXPORT jl_sym_t *jl_aliasscope_sym;
 JL_DLLEXPORT jl_sym_t *jl_popaliasscope_sym;
 JL_DLLEXPORT jl_sym_t *jl_optlevel_sym;
 JL_DLLEXPORT jl_sym_t *jl_thismodule_sym;
+JL_DLLEXPORT jl_sym_t *jl_eval_sym;
+JL_DLLEXPORT jl_sym_t *jl_include_sym;
 JL_DLLEXPORT jl_sym_t *jl_atom_sym;
 JL_DLLEXPORT jl_sym_t *jl_statement_sym;
 JL_DLLEXPORT jl_sym_t *jl_all_sym;
@@ -149,15 +153,59 @@ static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mo
 static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v);
 static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel, size_t world, int throw_load_error);
 
+static jl_sym_t *scmsym_to_julia(fl_context_t *fl_ctx, value_t s)
+{
+    assert(issymbol(s));
+    if (fl_isgensym(fl_ctx, s)) {
+        char gsname[16];
+        char *n = uint2str(&gsname[1], sizeof(gsname)-1,
+                           ((gensym_t*)ptr(s))->id, 10);
+        *(--n) = '#';
+        return jl_symbol(n);
+    }
+    return jl_symbol(symbol_name(fl_ctx, s));
+}
+
 static value_t fl_defined_julia_global(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
     // tells whether a var is defined in and *by* the current module
     argcount(fl_ctx, "defined-julia-global", nargs, 1);
     (void)tosymbol(fl_ctx, args[0], "defined-julia-global");
     jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
-    jl_sym_t *var = jl_symbol(symbol_name(fl_ctx, args[0]));
-    jl_binding_t *b = jl_get_module_binding(ctx->module, var);
-    return (b != NULL && b->owner == ctx->module) ? fl_ctx->T : fl_ctx->F;
+    jl_sym_t *var = scmsym_to_julia(fl_ctx, args[0]);
+    jl_binding_t *b = jl_get_module_binding(ctx->module, var, 0);
+    return (b != NULL && jl_atomic_load_relaxed(&b->owner) == b) ? fl_ctx->T : fl_ctx->F;
+}
+
+static value_t fl_nothrow_julia_global(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
+{
+    // tells whether a var is defined, in the sense that accessing it is nothrow
+    // can take either a symbol or a module and a symbol
+    jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
+    jl_module_t *mod = ctx->module;
+    jl_sym_t *var = NULL;
+    if (nargs == 1) {
+        (void)tosymbol(fl_ctx, args[0], "nothrow-julia-global");
+        var = scmsym_to_julia(fl_ctx, args[0]);
+    }
+    else {
+        argcount(fl_ctx, "nothrow-julia-global", nargs, 2);
+        value_t argmod = args[0];
+        if (iscvalue(argmod) && cv_class((cvalue_t*)ptr(argmod)) == jl_ast_ctx(fl_ctx)->jvtype) {
+            mod = *(jl_module_t**)cv_data((cvalue_t*)ptr(argmod));
+            JL_GC_PROMISE_ROOTED(mod);
+        } else {
+            (void)tosymbol(fl_ctx, argmod, "nothrow-julia-global");
+            if (scmsym_to_julia(fl_ctx, argmod) != jl_thismodule_sym) {
+                lerrorf(fl_ctx, fl_ctx->ArgError, "nothrow-julia-global: Unknown globalref module kind");
+            }
+        }
+        (void)tosymbol(fl_ctx, args[1], "nothrow-julia-global");
+        var = scmsym_to_julia(fl_ctx, args[1]);
+    }
+    jl_binding_t *b = jl_get_module_binding(mod, var, 0);
+    b = b ? jl_atomic_load_relaxed(&b->owner) : NULL;
+    return b != NULL && jl_atomic_load_relaxed(&b->value) != NULL ? fl_ctx->T : fl_ctx->F;
 }
 
 static value_t fl_current_module_counter(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
@@ -206,6 +254,7 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
 
 static const builtinspec_t julia_flisp_ast_ext[] = {
     { "defined-julia-global", fl_defined_julia_global }, // TODO: can we kill this safepoint
+    { "nothrow-julia-global", fl_nothrow_julia_global },
     { "current-julia-module-counter", fl_current_module_counter },
     { "julia-scalar?", fl_julia_scalar },
     { "julia-current-file", fl_julia_current_file },
@@ -304,6 +353,7 @@ void jl_init_common_symbols(void)
     jl_lambda_sym = jl_symbol("lambda");
     jl_module_sym = jl_symbol("module");
     jl_export_sym = jl_symbol("export");
+    jl_public_sym = jl_symbol("public");
     jl_import_sym = jl_symbol("import");
     jl_using_sym = jl_symbol("using");
     jl_assign_sym = jl_symbol("=");
@@ -318,6 +368,7 @@ void jl_init_common_symbols(void)
     jl_opaque_closure_method_sym = jl_symbol("opaque_closure_method");
     jl_const_sym = jl_symbol("const");
     jl_global_sym = jl_symbol("global");
+    jl_local_sym = jl_symbol("local");
     jl_thunk_sym = jl_symbol("thunk");
     jl_toplevel_sym = jl_symbol("toplevel");
     jl_dot_sym = jl_symbol(".");
@@ -328,7 +379,6 @@ void jl_init_common_symbols(void)
     jl_newvar_sym = jl_symbol("newvar");
     jl_copyast_sym = jl_symbol("copyast");
     jl_loopinfo_sym = jl_symbol("loopinfo");
-    jl_pure_sym = jl_symbol("pure");
     jl_meta_sym = jl_symbol("meta");
     jl_list_sym = jl_symbol("list");
     jl_unused_sym = jl_symbol("#unused#");
@@ -344,6 +394,7 @@ void jl_init_common_symbols(void)
     jl_isdefined_sym = jl_symbol("isdefined");
     jl_nospecialize_sym = jl_symbol("nospecialize");
     jl_specialize_sym = jl_symbol("specialize");
+    jl_nospecializeinfer_sym = jl_symbol("nospecializeinfer");
     jl_optlevel_sym = jl_symbol("optlevel");
     jl_compile_sym = jl_symbol("compile");
     jl_force_compile_sym = jl_symbol("force_compile");
@@ -363,6 +414,8 @@ void jl_init_common_symbols(void)
     jl_aliasscope_sym = jl_symbol("aliasscope");
     jl_popaliasscope_sym = jl_symbol("popaliasscope");
     jl_thismodule_sym = jl_symbol("thismodule");
+    jl_eval_sym = jl_symbol("eval");
+    jl_include_sym = jl_symbol("include");
     jl_block_sym = jl_symbol("block");
     jl_atom_sym = jl_symbol("atom");
     jl_statement_sym = jl_symbol("statement");
@@ -413,20 +466,6 @@ JL_DLLEXPORT void fl_profile(const char *fname)
     jl_ast_ctx_leave(ctx);
 }
 
-
-static jl_sym_t *scmsym_to_julia(fl_context_t *fl_ctx, value_t s)
-{
-    assert(issymbol(s));
-    if (fl_isgensym(fl_ctx, s)) {
-        char gsname[16];
-        char *n = uint2str(&gsname[1], sizeof(gsname)-1,
-                           ((gensym_t*)ptr(s))->id, 10);
-        *(--n) = '#';
-        return jl_symbol(n);
-    }
-    return jl_symbol(symbol_name(fl_ctx, s));
-}
-
 static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mod)
 {
     jl_value_t *v = NULL;
@@ -436,6 +475,8 @@ static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mo
     }
     JL_CATCH {
         // if expression cannot be converted, replace with error expr
+        //jl_(jl_current_exception());
+        //jlbacktrace();
         jl_expr_t *ex = jl_exprn(jl_error_sym, 1);
         v = (jl_value_t*)ex;
         jl_array_ptr_set(ex->args, 0, jl_cstr_to_string("invalid AST"));
@@ -562,6 +603,15 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             temp = scm_to_julia(fl_ctx, car_(cdr_(e)), mod);
             temp = jl_new_struct(jl_gotoifnot_type, ex, temp);
         }
+        else if (sym == jl_enter_sym) {
+            ex = scm_to_julia_(fl_ctx, car_(e), mod);
+            temp = jl_new_struct_uninit(jl_enternode_type);
+            jl_enternode_scope(temp) = NULL;
+            jl_enternode_catch_dest(temp) = jl_unbox_long(ex);
+            if (n == 2) {
+                jl_enternode_scope(temp) = scm_to_julia(fl_ctx, car_(cdr_(e)), mod);
+            }
+        }
         else if (sym == jl_newvar_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = jl_new_struct(jl_newvarnode_type, ex);
@@ -646,9 +696,9 @@ static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v)
 static void array_to_list(fl_context_t *fl_ctx, jl_array_t *a, value_t *pv, int check_valid)
 {
     value_t temp;
-    for(long i=jl_array_len(a)-1; i >= 0; i--) {
+    for (long i = jl_array_nrows(a) - 1; i >= 0; i--) {
         *pv = fl_cons(fl_ctx, fl_ctx->NIL, *pv);
-        temp = julia_to_scm_(fl_ctx, jl_array_ptr_ref(a,i), check_valid);
+        temp = julia_to_scm_(fl_ctx, jl_array_ptr_ref(a, i), check_valid);
         // note: must be separate statement
         car_(*pv) = temp;
     }
@@ -688,8 +738,8 @@ static value_t julia_to_scm_noalloc2(fl_context_t *fl_ctx, jl_value_t *v, int ch
     if (check_valid) {
         if (jl_is_ssavalue(v))
             lerror(fl_ctx, symbol(fl_ctx, "error"), "SSAValue objects should not occur in an AST");
-        if (jl_is_slot(v))
-            lerror(fl_ctx, symbol(fl_ctx, "error"), "Slot objects should not occur in an AST");
+        if (jl_is_slotnumber(v))
+            lerror(fl_ctx, symbol(fl_ctx, "error"), "SlotNumber objects should not occur in an AST");
     }
     value_t opaque = cvalue(fl_ctx, jl_ast_ctx(fl_ctx)->jvtype, sizeof(void*));
     *(jl_value_t**)cv_data((cvalue_t*)ptr(opaque)) = v;
@@ -702,11 +752,11 @@ static value_t julia_to_scm_noalloc(fl_context_t *fl_ctx, jl_value_t *v, int che
     if (julia_to_scm_noalloc1(fl_ctx, v, &retval))
         return retval;
     assert(!jl_is_expr(v) &&
-           !jl_typeis(v, jl_linenumbernode_type) &&
-           !jl_typeis(v, jl_gotonode_type) &&
-           !jl_typeis(v, jl_quotenode_type) &&
-           !jl_typeis(v, jl_newvarnode_type) &&
-           !jl_typeis(v, jl_globalref_type));
+           !jl_typetagis(v, jl_linenumbernode_type) &&
+           !jl_typetagis(v, jl_gotonode_type) &&
+           !jl_typetagis(v, jl_quotenode_type) &&
+           !jl_typetagis(v, jl_newvarnode_type) &&
+           !jl_typetagis(v, jl_globalref_type));
     return julia_to_scm_noalloc2(fl_ctx, v, check_valid);
 }
 
@@ -747,7 +797,7 @@ static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_vali
     // GC Note: jl_fieldref(v, 0) allocates for GotoNode
     //          but we don't need a GC root here because julia_to_list2_noalloc
     //          shouldn't allocate in this case.
-    if (jl_typeis(v, jl_linenumbernode_type)) {
+    if (jl_is_linenode(v)) {
         jl_value_t *file = jl_fieldref_noalloc(v,1);
         jl_value_t *line = jl_fieldref(v,0);
         value_t args = julia_to_list2_noalloc(fl_ctx, line, file, check_valid);
@@ -757,13 +807,13 @@ static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_vali
         fl_free_gc_handles(fl_ctx, 1);
         return scmv;
     }
-    if (jl_typeis(v, jl_gotonode_type))
+    if (jl_typetagis(v, jl_gotonode_type))
         return julia_to_list2_noalloc(fl_ctx, (jl_value_t*)jl_goto_sym, jl_fieldref(v,0), check_valid);
-    if (jl_typeis(v, jl_quotenode_type))
+    if (jl_typetagis(v, jl_quotenode_type))
         return julia_to_list2(fl_ctx, (jl_value_t*)jl_inert_sym, jl_fieldref_noalloc(v,0), 0);
-    if (jl_typeis(v, jl_newvarnode_type))
+    if (jl_typetagis(v, jl_newvarnode_type))
         return julia_to_list2_noalloc(fl_ctx, (jl_value_t*)jl_newvar_sym, jl_fieldref(v,0), check_valid);
-    if (jl_typeis(v, jl_globalref_type)) {
+    if (jl_typetagis(v, jl_globalref_type)) {
         jl_module_t *m = jl_globalref_mod(v);
         jl_sym_t *sym = jl_globalref_name(v);
         if (m == jl_core_module)
@@ -785,7 +835,8 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len,
                                      jl_value_t *filename, size_t lineno,
                                      size_t offset, jl_value_t *options)
 {
-    JL_TIMING(PARSING);
+    JL_TIMING(PARSING, PARSING);
+    jl_timing_show_filename(jl_string_data(filename), JL_TIMING_DEFAULT_BLOCK);
     if (offset > text_len) {
         jl_value_t *textstr = jl_pchar_to_string(text, text_len);
         JL_GC_PUSH1(&textstr);
@@ -835,7 +886,7 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len,
 }
 
 // returns either an expression or a thunk
-jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule)
+static jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule)
 {
     jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule);
     fl_context_t *fl_ctx = &ctx->fl;
@@ -848,8 +899,8 @@ jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module
     return result;
 }
 
-static jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr,
-                                              jl_module_t *inmodule, const char *file, int line)
+jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr,
+                                       jl_module_t *inmodule, const char *file, int line)
 {
     jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule);
     fl_context_t *fl_ctx = &ctx->fl;
@@ -875,7 +926,7 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
         JL_GC_PUSH2(&new_ci, &new_code);
         new_ci = jl_copy_code_info(new_ci);
         new_code = jl_array_copy(new_ci->code);
-        size_t clen = jl_array_len(new_code);
+        size_t clen = jl_array_nrows(new_code);
         for (int i = 0; i < clen; ++i) {
             jl_array_ptr_set(new_code, i, jl_copy_ast(
                 jl_array_ptr_ref(new_code, i)
@@ -908,7 +959,7 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
     }
     if (jl_is_expr(expr)) {
         jl_expr_t *e = (jl_expr_t*)expr;
-        size_t i, l = jl_array_len(e->args);
+        size_t i, l = jl_array_nrows(e->args);
         jl_expr_t *ne = jl_exprn(e->head, l);
         JL_GC_PUSH2(&ne, &expr);
         for (i = 0; i < l; i++) {
@@ -932,7 +983,7 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(expr, 0);
         JL_GC_PUSH1(&values);
         values = jl_array_copy(values);
-        jl_value_t *ret = jl_new_struct(jl_phinode_type, values);
+        jl_value_t *ret = jl_new_struct(jl_phicnode_type, values);
         JL_GC_POP();
         return ret;
     }
@@ -986,11 +1037,11 @@ JL_DLLEXPORT int jl_operator_precedence(char *sym)
 
 int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT
 {
-    size_t i, l = jl_array_len(body);
+    size_t i, l = jl_array_nrows(body);
     for (i = 0; i < l; i++) {
         jl_expr_t *stmt = (jl_expr_t*)jl_array_ptr_ref(body, i);
         if (jl_is_expr((jl_value_t*)stmt) && stmt->head == jl_meta_sym) {
-            size_t i, l = jl_array_len(stmt->args);
+            size_t i, l = jl_array_nrows(stmt->args);
             for (i = 0; i < l; i++)
                 if (jl_array_ptr_ref(stmt->args, i) == (jl_value_t*)sym)
                     return 1;
@@ -999,11 +1050,63 @@ int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT
     return 0;
 }
 
-static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, size_t world, int throw_load_error)
+// Utility function to return whether `e` is any of the special AST types or
+// will always evaluate to itself exactly unchanged. This corresponds to
+// `is_self_quoting` in Core.Compiler utilities.
+int jl_is_ast_node(jl_value_t *e) JL_NOTSAFEPOINT
+{
+    return jl_is_newvarnode(e)
+        || jl_is_code_info(e)
+        || jl_is_linenode(e)
+        || jl_is_gotonode(e)
+        || jl_is_gotoifnot(e)
+        || jl_is_returnnode(e)
+        || jl_is_ssavalue(e)
+        || jl_is_slotnumber(e)
+        || jl_is_argument(e)
+        || jl_is_quotenode(e)
+        || jl_is_globalref(e)
+        || jl_is_symbol(e)
+        || jl_is_pinode(e)
+        || jl_is_phinode(e)
+        || jl_is_phicnode(e)
+        || jl_is_upsilonnode(e)
+        || jl_is_expr(e);
+}
+
+static int is_self_quoting_expr(jl_expr_t *e) JL_NOTSAFEPOINT
+{
+    return (e->head == jl_inert_sym ||
+            e->head == jl_core_sym ||
+            e->head == jl_line_sym ||
+            e->head == jl_lineinfo_sym ||
+            e->head == jl_meta_sym ||
+            e->head == jl_boundscheck_sym ||
+            e->head == jl_inline_sym ||
+            e->head == jl_noinline_sym);
+}
+
+// any AST, except those that cannot contain symbols
+// and have no side effects
+int need_esc_node(jl_value_t *e) JL_NOTSAFEPOINT
+{
+    if (jl_is_linenode(e)
+        || jl_is_ssavalue(e)
+        || jl_is_slotnumber(e)
+        || jl_is_argument(e)
+        || jl_is_quotenode(e))
+        return 0;
+    if (jl_is_expr(e))
+        return !is_self_quoting_expr((jl_expr_t*)e);
+    // note: jl_is_globalref(e) is not included here, since we care a little about about having a line number for it
+    return jl_is_ast_node(e);
+}
+
+static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, jl_value_t **lineinfo, size_t world, int throw_load_error)
 {
     jl_task_t *ct = jl_current_task;
-    JL_TIMING(MACRO_INVOCATION);
-    size_t nargs = jl_array_len(args) + 1;
+    JL_TIMING(MACRO_INVOCATION, MACRO_INVOCATION);
+    size_t nargs = jl_array_nrows(args) + 1;
     JL_NARGSV("macrocall", 3); // macro name, location, and module
     jl_value_t **margs;
     JL_GC_PUSHARGS(margs, nargs);
@@ -1011,10 +1114,9 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
     margs[0] = jl_array_ptr_ref(args, 0);
     // __source__ argument
     jl_value_t *lno = jl_array_ptr_ref(args, 1);
+    if (!jl_is_linenode(lno))
+        lno = jl_new_struct(jl_linenumbernode_type, jl_box_long(0), jl_nothing);
     margs[1] = lno;
-    if (!jl_typeis(lno, jl_linenumbernode_type)) {
-        margs[1] = jl_new_struct(jl_linenumbernode_type, jl_box_long(0), jl_nothing);
-    }
     margs[2] = (jl_value_t*)inmodule;
     for (i = 3; i < nargs; i++)
         margs[i] = jl_array_ptr_ref(args, i - 1);
@@ -1026,12 +1128,13 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
     jl_value_t *result;
     JL_TRY {
         margs[0] = jl_toplevel_eval(*ctx, margs[0]);
-        jl_method_instance_t *mfunc = jl_method_lookup(margs, nargs, world);
+        jl_method_instance_t *mfunc = jl_method_lookup(margs, nargs, ct->world_age);
         JL_GC_PROMISE_ROOTED(mfunc);
         if (mfunc == NULL) {
-            jl_method_error(margs[0], &margs[1], nargs, world);
+            jl_method_error(margs[0], &margs[1], nargs, ct->world_age);
             // unreachable
         }
+        jl_timing_show_macro(mfunc, margs[1], inmodule, JL_TIMING_DEFAULT_BLOCK);
         *ctx = mfunc->def.method->module;
         result = jl_invoke(margs[0], &margs[1], nargs - 1, mfunc);
     }
@@ -1052,6 +1155,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
         }
     }
     ct->world_age = last_age;
+    *lineinfo = margs[1];
     JL_GC_POP();
     return result;
 }
@@ -1074,14 +1178,18 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
         JL_GC_POP();
         return expr;
     }
-    if (e->head == jl_hygienicscope_sym && jl_expr_nargs(e) == 2) {
+    if (e->head == jl_hygienicscope_sym && jl_expr_nargs(e) >= 2) {
         struct macroctx_stack newctx;
         newctx.m = (jl_module_t*)jl_exprarg(e, 1);
         JL_TYPECHK(hygienic-scope, module, (jl_value_t*)newctx.m);
         newctx.parent = macroctx;
         jl_value_t *a = jl_exprarg(e, 0);
         jl_value_t *a2 = jl_expand_macros(a, inmodule, &newctx, onelevel, world, throw_load_error);
-        if (a != a2)
+        if (jl_is_expr(a2) && ((jl_expr_t*)a2)->head == jl_escape_sym && !need_esc_node(jl_exprarg(a2, 0)))
+            expr = jl_exprarg(a2, 0);
+        else if (!need_esc_node(a2))
+            expr = a2;
+        else if (a != a2)
             jl_array_ptr_set(e->args, 0, a2);
         return expr;
     }
@@ -1089,21 +1197,28 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
         struct macroctx_stack newctx;
         newctx.m = macroctx ? macroctx->m : inmodule;
         newctx.parent = macroctx;
-        jl_value_t *result = jl_invoke_julia_macro(e->args, inmodule, &newctx.m, world, throw_load_error);
+        jl_value_t *lineinfo = NULL;
+        jl_value_t *result = jl_invoke_julia_macro(e->args, inmodule, &newctx.m, &lineinfo, world, throw_load_error);
+        if (!need_esc_node(result))
+            return result;
         jl_value_t *wrap = NULL;
-        JL_GC_PUSH3(&result, &wrap, &newctx.m);
+        JL_GC_PUSH4(&result, &wrap, &newctx.m, &lineinfo);
         // copy and wrap the result in `(hygienic-scope ,result ,newctx)
         if (jl_is_expr(result) && ((jl_expr_t*)result)->head == jl_escape_sym)
             result = jl_exprarg(result, 0);
         else
-            wrap = (jl_value_t*)jl_exprn(jl_hygienicscope_sym, 2);
+            wrap = (jl_value_t*)jl_exprn(jl_hygienicscope_sym, 3);
         result = jl_copy_ast(result);
         if (!onelevel)
             result = jl_expand_macros(result, inmodule, wrap ? &newctx : macroctx, onelevel, world, throw_load_error);
-        if (wrap) {
+        if (wrap && need_esc_node(result)) {
             jl_exprargset(wrap, 0, result);
             jl_exprargset(wrap, 1, newctx.m);
-            result = wrap;
+            jl_exprargset(wrap, 2, lineinfo);
+            if (jl_is_expr(result) && ((jl_expr_t*)result)->head == jl_escape_sym)
+                result = jl_exprarg(result, 0);
+            else
+                result = wrap;
         }
         JL_GC_POP();
         return result;
@@ -1130,7 +1245,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
     }
 
     size_t i;
-    for (i = 0; i < jl_array_len(e->args); i++) {
+    for (i = 0; i < jl_array_nrows(e->args); i++) {
         jl_value_t *a = jl_array_ptr_ref(e->args, i);
         jl_value_t *a2 = jl_expand_macros(a, inmodule, macroctx, onelevel, world, throw_load_error);
         if (a != a2)
@@ -1141,7 +1256,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
 
 JL_DLLEXPORT jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule)
 {
-    JL_TIMING(LOWERING);
+    JL_TIMING(LOWERING, LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
     expr = jl_expand_macros(expr, inmodule, NULL, 0, jl_atomic_load_acquire(&jl_world_counter), 0);
@@ -1152,7 +1267,7 @@ JL_DLLEXPORT jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule)
 
 JL_DLLEXPORT jl_value_t *jl_macroexpand1(jl_value_t *expr, jl_module_t *inmodule)
 {
-    JL_TIMING(LOWERING);
+    JL_TIMING(LOWERING, LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
     expr = jl_expand_macros(expr, inmodule, NULL, 1, jl_atomic_load_acquire(&jl_world_counter), 0);
@@ -1178,7 +1293,8 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc(jl_value_t *expr, jl_module_t *inmod
 JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmodule,
                                             const char *file, int line, size_t world)
 {
-    JL_TIMING(LOWERING);
+    JL_TIMING(LOWERING, LOWERING);
+    jl_timing_show_location(file, line, inmodule, JL_TIMING_DEFAULT_BLOCK);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
     expr = jl_expand_macros(expr, inmodule, NULL, 0, world, 1);
@@ -1191,7 +1307,8 @@ JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmod
 JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *inmodule,
                                                  const char *file, int line)
 {
-    JL_TIMING(LOWERING);
+    JL_TIMING(LOWERING, LOWERING);
+    jl_timing_show_location(file, line, inmodule, JL_TIMING_DEFAULT_BLOCK);
     jl_array_t *kwargs = NULL;
     JL_GC_PUSH2(&expr, &kwargs);
     expr = jl_copy_ast(expr);
@@ -1239,7 +1356,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *
 JL_DLLEXPORT jl_value_t *jl_expand_stmt_with_loc(jl_value_t *expr, jl_module_t *inmodule,
                                                  const char *file, int line)
 {
-    JL_TIMING(LOWERING);
+    JL_TIMING(LOWERING, LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
     expr = jl_expand_macros(expr, inmodule, NULL, 0, ~(size_t)0, 1);
@@ -1260,8 +1377,8 @@ JL_DLLEXPORT jl_value_t *jl_expand_stmt(jl_value_t *expr, jl_module_t *inmodule)
 // Internal C entry point to parser
 // `text` is passed as a pointer to allow raw non-String buffers to be used
 // without copying.
-JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
-                                  size_t lineno, size_t offset, jl_value_t *options)
+jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
+                     size_t lineno, size_t offset, jl_value_t *options)
 {
     jl_value_t *core_parse = NULL;
     if (jl_core_module) {
@@ -1279,8 +1396,8 @@ JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t
     jl_svecset(args[1], 0, jl_box_uint8pointer((uint8_t*)text));
     jl_svecset(args[1], 1, jl_box_long(text_len));
     args[2] = filename;
-    args[3] = jl_box_ulong(lineno);
-    args[4] = jl_box_ulong(offset);
+    args[3] = jl_box_long(lineno);
+    args[4] = jl_box_long(offset);
     args[5] = options;
     jl_task_t *ct = jl_current_task;
     size_t last_age = ct->world_age;
diff --git a/src/ast.scm b/src/ast.scm
index 0f69638fdb52e..abfce314fc569 100644
--- a/src/ast.scm
+++ b/src/ast.scm
@@ -226,13 +226,13 @@
                               ""))
                         "")
                     (string.rep "    " ilvl) "end"))
-	   ((do)
-	    (let ((call (cadr e))
-		  (args (cdr (cadr (caddr e))))
-		  (body (caddr (caddr e))))
-	      (deparse-block (string (deparse call) " do" (if (null? args) "" " ")
-				     (deparse-arglist args))
-			     (cdr body) ilvl)))
+           ((do)
+            (let ((call (cadr e))
+                  (args (cdr (cadr (caddr e))))
+                  (body (caddr (caddr e))))
+              (deparse-block (string (deparse call) " do" (if (null? args) "" " ")
+                                     (deparse-arglist args))
+                             (cdr body) ilvl)))
            ((struct)
             (string (if (equal? (cadr e) '(true)) "mutable " "")
                     "struct "
@@ -249,7 +249,7 @@
            ;; misc syntax forms
            ((import using)
             (string (car e) " " (string.join (map deparse-import-path (cdr e)) ", ")))
-           ((global local export) (string (car e) " " (string.join (map deparse (cdr e)) ", ")))
+           ((global local export public) (string (car e) " " (string.join (map deparse (cdr e)) ", ")))
            ((const)        (string "const " (deparse (cadr e))))
            ((top)          (deparse (cadr e)))
            ((core)         (string "Core." (deparse (cadr e))))
@@ -329,8 +329,8 @@
         (else
          (case (car v)
            ((...)
-	    (arg-name (cadr v)) ;; to check for errors
-	    (decl-var (cadr v)))
+            (arg-name (cadr v)) ;; to check for errors
+            (decl-var (cadr v)))
            ((|::|)
             (if (not (symbol? (cadr v)))
                 (bad-formal-argument (cadr v)))
@@ -479,12 +479,13 @@
 (define (eq-sym? a b)
   (or (eq? a b) (and (ssavalue? a) (ssavalue? b) (eqv? (cdr a) (cdr b)))))
 
-(define (blockify e)
+(define (blockify e (lno #f))
+  (set! lno (if lno (list lno) '()))
   (if (and (pair? e) (eq? (car e) 'block))
       (if (null? (cdr e))
-          `(block (null))
-          e)
-      `(block ,e)))
+          `(block ,@lno (null))
+          (if (null? lno) e `(block ,@lno ,@(cdr e))))
+      `(block ,@lno ,e)))
 
 (define (make-var-info name) (list name '(core Any) 0))
 (define vinfo:name car)
@@ -523,6 +524,21 @@
   (and (if one (length= e 3) (length> e 2))
        (eq? (car e) 'meta) (memq (cadr e) '(nospecialize specialize))))
 
+(define (meta? e)
+  (and (length> e 1) (eq? (car e) 'meta)))
+
+(define (method-meta-sym? x)
+  (memq x '(inline noinline aggressive_constprop no_constprop propagate_inbounds)))
+
+(define (propagate-method-meta e)
+  `(meta ,@(filter (lambda (x)
+                     (or (method-meta-sym? x)
+                         (and (pair? x) (eq? (car x) 'purity))))
+                   (cdr e))))
+
+(define (argwide-nospecialize-meta? e)
+  (and (length= e 2) (eq? (car e) 'meta) (memq (cadr e) '(nospecialize specialize))))
+
 (define (if-generated? e)
   (and (length= e 4) (eq? (car e) 'if) (equal? (cadr e) '(generated))))
 
diff --git a/src/base b/src/base
deleted file mode 120000
index 24312d19b81d4..0000000000000
--- a/src/base
+++ /dev/null
@@ -1 +0,0 @@
-../base
\ No newline at end of file
diff --git a/src/builtin_proto.h b/src/builtin_proto.h
index 6a2b930e17186..a009b535ac951 100644
--- a/src/builtin_proto.h
+++ b/src/builtin_proto.h
@@ -25,14 +25,15 @@ DECLARE_BUILTIN(applicable);
 DECLARE_BUILTIN(_apply_iterate);
 DECLARE_BUILTIN(_apply_pure);
 DECLARE_BUILTIN(apply_type);
-DECLARE_BUILTIN(arrayref);
-DECLARE_BUILTIN(arrayset);
-DECLARE_BUILTIN(arraysize);
+DECLARE_BUILTIN(memoryref);
+DECLARE_BUILTIN(memoryrefoffset);
+DECLARE_BUILTIN(memoryrefget);
+DECLARE_BUILTIN(memoryrefset);
+DECLARE_BUILTIN(memoryref_isassigned);
 DECLARE_BUILTIN(_call_in_world);
 DECLARE_BUILTIN(_call_in_world_total);
 DECLARE_BUILTIN(_call_latest);
 DECLARE_BUILTIN(replacefield);
-DECLARE_BUILTIN(const_arrayref);
 DECLARE_BUILTIN(_expr);
 DECLARE_BUILTIN(fieldtype);
 DECLARE_BUILTIN(getfield);
@@ -55,16 +56,14 @@ DECLARE_BUILTIN(_typebody);
 DECLARE_BUILTIN(typeof);
 DECLARE_BUILTIN(_typevar);
 DECLARE_BUILTIN(donotdelete);
+DECLARE_BUILTIN(compilerbarrier);
 DECLARE_BUILTIN(getglobal);
 DECLARE_BUILTIN(setglobal);
 DECLARE_BUILTIN(finalizer);
+DECLARE_BUILTIN(_compute_sparams);
+DECLARE_BUILTIN(_svec_ref);
+DECLARE_BUILTIN(current_scope);
 
-JL_CALLABLE(jl_f_invoke_kwsorter);
-#ifdef DEFINE_BUILTIN_GLOBALS
-JL_DLLEXPORT jl_fptr_args_t jl_f_invoke_kwsorter_addr = &jl_f_invoke_kwsorter;
-#else
-JL_DLLEXPORT extern jl_fptr_args_t jl_f_invoke_kwsorter_addr;
-#endif
 JL_CALLABLE(jl_f__structtype);
 JL_CALLABLE(jl_f__abstracttype);
 JL_CALLABLE(jl_f__primitivetype);
@@ -72,10 +71,8 @@ JL_CALLABLE(jl_f__setsuper);
 JL_CALLABLE(jl_f__equiv_typedef);
 JL_CALLABLE(jl_f_get_binding_type);
 JL_CALLABLE(jl_f_set_binding_type);
-JL_CALLABLE(jl_f_donotdelete);
-JL_CALLABLE(jl_f_setglobal);
-JL_CALLABLE(jl_f_finalizer);
-
+JL_CALLABLE(jl_f__compute_sparams);
+JL_CALLABLE(jl_f__svec_ref);
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/builtins.c b/src/builtins.c
index 8db1fa92ec783..e6472457cb6a9 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -35,8 +35,8 @@ extern "C" {
 static int bits_equal(const void *a, const void *b, int sz) JL_NOTSAFEPOINT
 {
     switch (sz) {
-    case 1:  return *(int8_t*)a == *(int8_t*)b;
-        // Let compiler constant folds the following.
+    case 1:  return *(uint8_t*)a == *(uint8_t*)b;
+        // Let compiler constant folds the following, though we may not know alignment of them
     case 2:  return memcmp(a, b, 2) == 0;
     case 4:  return memcmp(a, b, 4) == 0;
     case 8:  return memcmp(a, b, 8) == 0;
@@ -115,8 +115,8 @@ static int NOINLINE compare_fields(const jl_value_t *a, const jl_value_t *b, jl_
                     continue; // skip this field (it is #undef)
                 }
             }
-            if (!ft->layout->haspadding) {
-                if (!bits_equal(ao, bo, ft->size))
+            if (!ft->layout->flags.haspadding) {
+                if (!bits_equal(ao, bo, ft->layout->size))
                     return 0;
             }
             else {
@@ -147,10 +147,10 @@ static int egal_types(const jl_value_t *a, const jl_value_t *b, jl_typeenv_t *en
 {
     if (a == b)
         return 1;
-    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(a);
-    if (dt != (jl_datatype_t*)jl_typeof(b))
+    uintptr_t dtag = jl_typetagof(a);
+    if (dtag != jl_typetagof(b))
         return 0;
-    if (dt == jl_datatype_type) {
+    if (dtag == jl_datatype_tag << 4) {
         jl_datatype_t *dta = (jl_datatype_t*)a;
         jl_datatype_t *dtb = (jl_datatype_t*)b;
         if (dta->name != dtb->name)
@@ -164,7 +164,7 @@ static int egal_types(const jl_value_t *a, const jl_value_t *b, jl_typeenv_t *en
         }
         return 1;
     }
-    if (dt == jl_tvar_type) {
+    if (dtag == jl_tvar_tag << 4) {
         jl_typeenv_t *pe = env;
         while (pe != NULL) {
             if (pe->var == (jl_tvar_t*)a)
@@ -173,7 +173,7 @@ static int egal_types(const jl_value_t *a, const jl_value_t *b, jl_typeenv_t *en
         }
         return 0;
     }
-    if (dt == jl_unionall_type) {
+    if (dtag == jl_unionall_tag << 4) {
         jl_unionall_t *ua = (jl_unionall_t*)a;
         jl_unionall_t *ub = (jl_unionall_t*)b;
         if (tvar_names && ua->var->name != ub->var->name)
@@ -183,11 +183,11 @@ static int egal_types(const jl_value_t *a, const jl_value_t *b, jl_typeenv_t *en
         jl_typeenv_t e = { ua->var, (jl_value_t*)ub->var, env };
         return egal_types(ua->body, ub->body, &e, tvar_names);
     }
-    if (dt == jl_uniontype_type) {
+    if (dtag == jl_uniontype_tag << 4) {
         return egal_types(((jl_uniontype_t*)a)->a, ((jl_uniontype_t*)b)->a, env, tvar_names) &&
             egal_types(((jl_uniontype_t*)a)->b, ((jl_uniontype_t*)b)->b, env, tvar_names);
     }
-    if (dt == jl_vararg_type) {
+    if (dtag == jl_vararg_tag << 4) {
         jl_vararg_t *vma = (jl_vararg_t*)a;
         jl_vararg_t *vmb = (jl_vararg_t*)b;
         jl_value_t *vmaT = vma->T ? vma->T : (jl_value_t*)jl_any_type;
@@ -198,10 +198,8 @@ static int egal_types(const jl_value_t *a, const jl_value_t *b, jl_typeenv_t *en
             return egal_types(vma->N, vmb->N, env, tvar_names);
         return !vma->N && !vmb->N;
     }
-    if (dt == jl_symbol_type)
-        return 0;
-    assert(!dt->name->mutabl);
-    return jl_egal__bits(a, b, dt);
+    assert(dtag == jl_symbol_tag << 4 || dtag == jl_module_tag << 4 || !((jl_datatype_t*)jl_typeof(a))->name->mutabl);
+    return jl_egal__bitstag(a, b, dtag);
 }
 
 JL_DLLEXPORT int jl_types_egal(jl_value_t *a, jl_value_t *b)
@@ -215,45 +213,79 @@ JL_DLLEXPORT int (jl_egal)(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value
     return jl_egal(a, b);
 }
 
-JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT
 {
     // warning: a,b may NOT have been gc-rooted by the caller
-    return jl_egal__unboxed_(a, b, dt);
-}
-
-int jl_egal__special(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
-{
-    if (dt == jl_simplevector_type)
-        return compare_svec((jl_svec_t*)a, (jl_svec_t*)b);
-    if (dt == jl_datatype_type) {
-        jl_datatype_t *dta = (jl_datatype_t*)a;
-        jl_datatype_t *dtb = (jl_datatype_t*)b;
-        if (dta->name != dtb->name)
+    return jl_egal__unboxed_(a, b, dtag);
+}
+
+JL_DLLEXPORT int jl_egal__bitstag(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT
+{
+    if (dtag < jl_max_tags << 4) {
+        switch ((enum jl_small_typeof_tags)(dtag >> 4)) {
+        case jl_int8_tag:
+        case jl_uint8_tag:
+            return *(uint8_t*)a == *(uint8_t*)b;
+        case jl_int16_tag:
+        case jl_uint16_tag:
+            return *(uint16_t*)a == *(uint16_t*)b;
+        case jl_int32_tag:
+        case jl_uint32_tag:
+        case jl_char_tag:
+            return *(uint32_t*)a == *(uint32_t*)b;
+        case jl_int64_tag:
+        case jl_uint64_tag:
+            return *(uint64_t*)a == *(uint64_t*)b;
+        case jl_unionall_tag:
+            return egal_types(a, b, NULL, 1);
+        case jl_uniontype_tag:
+            return compare_fields(a, b, jl_uniontype_type);
+        case jl_vararg_tag:
+            return compare_fields(a, b, jl_vararg_type);
+        case jl_task_tag:
+        case jl_tvar_tag:
+        case jl_symbol_tag:
+        case jl_module_tag:
+        case jl_bool_tag:
             return 0;
-        if (dta->name != jl_tuple_typename && (dta->isconcretetype || dtb->isconcretetype))
-            return 0;
-        return compare_svec(dta->parameters, dtb->parameters);
-    }
-    if (dt == jl_string_type) {
-        size_t l = jl_string_len(a);
-        if (jl_string_len(b) != l)
-            return 0;
-        return !memcmp(jl_string_data(a), jl_string_data(b), l);
+        case jl_simplevector_tag:
+            return compare_svec((jl_svec_t*)a, (jl_svec_t*)b);
+        case jl_string_tag: {
+                size_t l = jl_string_len(a);
+                if (jl_string_len(b) != l)
+                    return 0;
+                return !memcmp(jl_string_data(a), jl_string_data(b), l);
+            }
+        case jl_datatype_tag: {
+                jl_datatype_t *dta = (jl_datatype_t*)a;
+                jl_datatype_t *dtb = (jl_datatype_t*)b;
+                if (dta->name != dtb->name)
+                    return 0;
+                if (dta->name != jl_tuple_typename && (dta->isconcretetype || dtb->isconcretetype))
+                    return 0;
+                return compare_svec(dta->parameters, dtb->parameters);
+            }
+#ifndef NDEBUG
+        default:
+#endif
+        case jl_max_tags:
+        case jl_null_tag:
+        case jl_typeofbottom_tag:
+        case jl_tags_count:
+            abort();
+        }
     }
-    assert(0 && "unreachable");
-    return 0;
+    return jl_egal__bits(a, b, (jl_datatype_t*)dtag);
 }
 
-int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
+inline int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
     size_t sz = jl_datatype_size(dt);
     if (sz == 0)
         return 1;
     size_t nf = jl_datatype_nfields(dt);
-    if (nf == 0 || !dt->layout->haspadding)
+    if (nf == 0 || !dt->layout->flags.haspadding)
         return bits_equal(a, b, sz);
-    if (dt == jl_unionall_type)
-        return egal_types(a, b, NULL, 1);
     return compare_fields(a, b, dt);
 }
 
@@ -312,6 +344,9 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN
             i++;
             pe = pe->prev;
         }
+        uintptr_t bits = jl_astaggedvalue(v)->header;
+        if (bits & GC_IN_IMAGE)
+            return ((uintptr_t*)v)[-2];
         return inthash((uintptr_t)v);
     }
     if (tv == jl_uniontype_type) {
@@ -347,6 +382,8 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN
     }
     if (tv == jl_symbol_type)
         return ((jl_sym_t*)v)->hash;
+    if (tv == jl_module_type)
+        return ((jl_module_t*)v)->hash;
     assert(!tv->name->mutabl);
     return immut_id_(tv, v, tv->hash);
 }
@@ -357,7 +394,7 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT
     if (sz == 0)
         return ~h;
     size_t f, nf = jl_datatype_nfields(dt);
-    if (nf == 0 || (!dt->layout->haspadding && dt->layout->npointers == 0)) {
+    if (nf == 0 || (!dt->layout->flags.haspadding && dt->layout->npointers == 0)) {
         // operate element-wise if there are unused bits inside,
         // otherwise just take the whole data block at once
         // a few select pointers (notably symbol) also have special hash values
@@ -398,51 +435,62 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT
     return h;
 }
 
-static uintptr_t NOINLINE jl_object_id__cold(jl_datatype_t *dt, jl_value_t *v) JL_NOTSAFEPOINT
+static uintptr_t NOINLINE jl_object_id__cold(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT
 {
-    if (dt == jl_simplevector_type)
-        return hash_svec((jl_svec_t*)v);
-    if (dt == jl_datatype_type) {
-        jl_datatype_t *dtv = (jl_datatype_t*)v;
-        uintptr_t h = ~dtv->name->hash;
-        return bitmix(h, hash_svec(dtv->parameters));
-    }
-    if (dt == jl_string_type) {
+    jl_datatype_t *dt = (jl_datatype_t*)jl_to_typeof(tv);
+    if (dt->name->mutabl) {
+        if (dt == jl_string_type) {
 #ifdef _P64
-        return memhash_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
+            return memhash_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
 #else
-        return memhash32_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
+            return memhash32_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
 #endif
-    }
-    if (dt->name->mutabl)
+        }
+        if (dt == jl_simplevector_type)
+            return hash_svec((jl_svec_t*)v);
+        if (dt == jl_datatype_type) {
+            jl_datatype_t *dtv = (jl_datatype_t*)v;
+            uintptr_t h = ~dtv->name->hash;
+            return bitmix(h, hash_svec(dtv->parameters));
+        }
+        if (dt == jl_module_type) {
+            jl_module_t *m = (jl_module_t*)v;
+            return m->hash;
+        }
+        uintptr_t bits = jl_astaggedvalue(v)->header;
+        if (bits & GC_IN_IMAGE)
+            return ((uintptr_t*)v)[-2];
         return inthash((uintptr_t)v);
+    }
     return immut_id_(dt, v, dt->hash);
 }
 
-JL_DLLEXPORT inline uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPOINT
+JL_DLLEXPORT inline uintptr_t jl_object_id_(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT
 {
-    jl_datatype_t *dt = (jl_datatype_t*)tv;
-    if (dt == jl_symbol_type)
+    if (tv == jl_symbol_tag << 4) {
         return ((jl_sym_t*)v)->hash;
-    if (dt == jl_typename_type)
-        return ((jl_typename_t*)v)->hash;
-    if (dt == jl_datatype_type) {
+    }
+    else if (tv == jl_datatype_tag << 4) {
         jl_datatype_t *dtv = (jl_datatype_t*)v;
         if (dtv->isconcretetype)
             return dtv->hash;
     }
-    return jl_object_id__cold(dt, v);
+    else if (tv == (uintptr_t)jl_typename_type) {
+        return ((jl_typename_t*)v)->hash;
+    }
+    return jl_object_id__cold(tv, v);
 }
 
 
 JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT
 {
-    return jl_object_id_(jl_typeof(v), v);
+    return jl_object_id_(jl_typetagof(v), v);
 }
 
 // eq hash table --------------------------------------------------------------
 
 #include "iddict.c"
+#include "idset.c"
 
 // object model and type primitives -------------------------------------------
 
@@ -473,21 +521,18 @@ JL_CALLABLE(jl_f_sizeof)
     }
     if (jl_is_datatype(x)) {
         jl_datatype_t *dx = (jl_datatype_t*)x;
-        if (dx->layout == NULL) {
+        if (!jl_struct_try_layout(dx)) {
             if (dx->name->abstract)
                 jl_errorf("Abstract type %s does not have a definite size.", jl_symbol_name(dx->name->name));
             else
                 jl_errorf("Argument is an incomplete %s type and does not have a definite size.", jl_symbol_name(dx->name->name));
         }
-        if (jl_is_layout_opaque(dx->layout))
+        if (jl_is_layout_opaque(dx->layout)) // includes all GenericMemory{kind,T}
             jl_errorf("Type %s does not have a definite size.", jl_symbol_name(dx->name->name));
         return jl_box_long(jl_datatype_size(x));
     }
     if (x == jl_bottom_type)
         jl_error("The empty type does not have a definite size since it does not have instances.");
-    if (jl_is_array(x)) {
-        return jl_box_long(jl_array_len(x) * ((jl_array_t*)x)->elsize);
-    }
     if (jl_is_string(x))
         return jl_box_long(jl_string_len(x));
     if (jl_is_symbol(x))
@@ -497,7 +542,10 @@ JL_CALLABLE(jl_f_sizeof)
     jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(x);
     assert(jl_is_datatype(dt));
     assert(!dt->name->abstract);
-    return jl_box_long(jl_datatype_size(dt));
+    size_t sz = dt->layout->size;
+    if (jl_is_genericmemory(x))
+        sz = (sz + (dt->layout->flags.arrayelem_isunion ? 1 : 0)) * ((jl_genericmemory_t*)x)->length;
+    return jl_box_long(sz);
 }
 
 JL_CALLABLE(jl_f_issubtype)
@@ -539,6 +587,12 @@ JL_CALLABLE(jl_f_ifelse)
     return (args[0] == jl_false ? args[2] : args[1]);
 }
 
+JL_CALLABLE(jl_f_current_scope)
+{
+    JL_NARGS(current_scope, 0, 0);
+    return jl_current_task->scope;
+}
+
 // apply ----------------------------------------------------------------------
 
 static NOINLINE jl_svec_t *_copy_to(size_t newalloc, jl_value_t **oldargs, size_t oldalloc)
@@ -569,7 +623,13 @@ STATIC_INLINE void _grow_to(jl_value_t **root, jl_value_t ***oldargs, jl_svec_t
     *n_alloc = newalloc;
 }
 
-static jl_value_t *do_apply( jl_value_t **args, uint32_t nargs, jl_value_t *iterate)
+
+static jl_value_t *jl_arrayref(jl_array_t *a, size_t i)
+{
+    return jl_memoryrefget(jl_memoryrefindex(a->ref, i));
+}
+
+static jl_value_t *do_apply(jl_value_t **args, uint32_t nargs, jl_value_t *iterate)
 {
     jl_function_t *f = args[0];
     if (nargs == 2) {
@@ -577,6 +637,17 @@ static jl_value_t *do_apply( jl_value_t **args, uint32_t nargs, jl_value_t *iter
         if (f == jl_builtin_svec) {
             if (jl_is_svec(args[1]))
                 return args[1];
+            if (jl_is_genericmemory(args[1])) {
+                jl_genericmemory_t *mem = (jl_genericmemory_t*)args[1];
+                size_t n = mem->length;
+                jl_svec_t *t = jl_alloc_svec(n);
+                JL_GC_PUSH1(&t);
+                for (size_t i = 0; i < n; i++) {
+                    jl_svecset(t, i, jl_genericmemoryref(mem, i));
+                }
+                JL_GC_POP();
+                return (jl_value_t*)t;
+            }
             if (jl_is_array(args[1])) {
                 size_t n = jl_array_len(args[1]);
                 jl_svec_t *t = jl_alloc_svec(n);
@@ -603,6 +674,9 @@ static jl_value_t *do_apply( jl_value_t **args, uint32_t nargs, jl_value_t *iter
         else if (jl_is_tuple(args[i]) || jl_is_namedtuple(args[i])) {
             precount += jl_nfields(args[i]);
         }
+        else if (jl_is_genericmemory(args[i])) {
+            precount += ((jl_genericmemory_t*)args[i])->length;
+        }
         else if (jl_is_array(args[i])) {
             precount += jl_array_len(args[i]);
         }
@@ -611,7 +685,7 @@ static jl_value_t *do_apply( jl_value_t **args, uint32_t nargs, jl_value_t *iter
         }
     }
     if (extra && iterate == NULL) {
-        jl_undefined_var_error(jl_symbol("iterate"));
+        jl_undefined_var_error(jl_symbol("iterate"), NULL);
     }
     // allocate space for the argument array and gc roots for it
     // based on our previous estimates
@@ -671,13 +745,40 @@ static jl_value_t *do_apply( jl_value_t **args, uint32_t nargs, jl_value_t *iter
                     jl_gc_wb(arg_heap, newargs[n - 1]);
             }
         }
+        else if (jl_is_genericmemory(ai)) {
+            jl_genericmemory_t *mem = (jl_genericmemory_t*)ai;
+            size_t j, al = mem->length;
+            precount = (precount > al) ? precount - al : 0;
+            _grow_to(&roots[0], &newargs, &arg_heap, &n_alloc, n + precount + al, extra);
+            assert(newargs != NULL); // inform GCChecker that we didn't write a NULL here
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(mem))->layout;
+            if (layout->flags.arrayelem_isboxed) {
+                for (j = 0; j < al; j++) {
+                    jl_value_t *arg = jl_genericmemory_ptr_ref(mem, j);
+                    // apply with array splatting may have embedded NULL value (#11772)
+                    if (__unlikely(arg == NULL))
+                        jl_throw(jl_undefref_exception);
+                    newargs[n++] = arg;
+                    if (arg_heap)
+                        jl_gc_wb(arg_heap, arg);
+                }
+            }
+            else {
+                for (j = 0; j < al; j++) {
+                    newargs[n++] = jl_genericmemoryref(mem, j);
+                    if (arg_heap)
+                        jl_gc_wb(arg_heap, newargs[n - 1]);
+                }
+            }
+        }
         else if (jl_is_array(ai)) {
             jl_array_t *aai = (jl_array_t*)ai;
             size_t j, al = jl_array_len(aai);
             precount = (precount > al) ? precount - al : 0;
             _grow_to(&roots[0], &newargs, &arg_heap, &n_alloc, n + precount + al, extra);
             assert(newargs != NULL); // inform GCChecker that we didn't write a NULL here
-            if (aai->flags.ptrarray) {
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(aai->ref.mem))->layout;
+            if (layout->flags.arrayelem_isboxed) {
                 for (j = 0; j < al; j++) {
                     jl_value_t *arg = jl_array_ptr_ref(aai, j);
                     // apply with array splatting may have embedded NULL value (#11772)
@@ -893,13 +994,17 @@ static inline size_t get_checked_fieldindex(const char *name, jl_datatype_t *st,
         if (idx >= jl_datatype_nfields(st))
             jl_bounds_error(v, arg);
     }
-    else {
-        JL_TYPECHKS(name, symbol, arg);
+    else if (jl_is_symbol(arg)) {
         idx = jl_field_index(st, (jl_sym_t*)arg, 1);
     }
+    else {
+        jl_value_t *ts[2] = {(jl_value_t*)jl_long_type, (jl_value_t*)jl_symbol_type};
+        jl_value_t *t = jl_type_union(ts, 2);
+        jl_type_error("getfield", t, arg);
+    }
     if (mutabl && jl_field_isconst(st, idx)) {
         jl_errorf("%s: const field .%s of type %s cannot be changed", name,
-                jl_symbol_name((jl_sym_t*)jl_svec_ref(jl_field_names(st), idx)), jl_symbol_name(st->name->name));
+                jl_symbol_name((jl_sym_t*)jl_svecref(jl_field_names(st), idx)), jl_symbol_name(st->name->name));
     }
     return idx;
 }
@@ -1186,49 +1291,55 @@ JL_CALLABLE(jl_f_getglobal)
         JL_TYPECHK(getglobal, symbol, args[2]);
         order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 0);
     }
-    JL_TYPECHK(getglobal, module, args[0]);
-    JL_TYPECHK(getglobal, symbol, args[1]);
+    jl_module_t *mod = (jl_module_t*)args[0];
+    jl_sym_t *sym = (jl_sym_t*)args[1];
+    JL_TYPECHK(getglobal, module, (jl_value_t*)mod);
+    JL_TYPECHK(getglobal, symbol, (jl_value_t*)sym);
     if (order == jl_memory_order_notatomic)
         jl_atomic_error("getglobal: module binding cannot be read non-atomically");
-    jl_value_t *v = jl_eval_global_var((jl_module_t*)args[0], (jl_sym_t*)args[1]);
+    jl_value_t *v = jl_eval_global_var(mod, sym);
     // is seq_cst already, no fence needed
     return v;
 }
 
 JL_CALLABLE(jl_f_setglobal)
 {
-    enum jl_memory_order order = jl_memory_order_monotonic;
+    enum jl_memory_order order = jl_memory_order_release;
     JL_NARGS(setglobal!, 3, 4);
     if (nargs == 4) {
         JL_TYPECHK(setglobal!, symbol, args[3]);
         order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 0, 1);
     }
-    JL_TYPECHK(setglobal!, module, args[0]);
-    JL_TYPECHK(setglobal!, symbol, args[1]);
+    jl_module_t *mod = (jl_module_t*)args[0];
+    jl_sym_t *var = (jl_sym_t*)args[1];
+    JL_TYPECHK(setglobal!, module, (jl_value_t*)mod);
+    JL_TYPECHK(setglobal!, symbol, (jl_value_t*)var);
     if (order == jl_memory_order_notatomic)
         jl_atomic_error("setglobal!: module binding cannot be written non-atomically");
     // is seq_cst already, no fence needed
-    jl_binding_t *b = jl_get_binding_wr_or_error((jl_module_t*)args[0], (jl_sym_t*)args[1]);
-    jl_checked_assignment(b, args[2]);
+    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    jl_checked_assignment(b, mod, var, args[2]);
     return args[2];
 }
 
 JL_CALLABLE(jl_f_get_binding_type)
 {
     JL_NARGS(get_binding_type, 2, 2);
-    JL_TYPECHK(get_binding_type, module, args[0]);
-    JL_TYPECHK(get_binding_type, symbol, args[1]);
     jl_module_t *mod = (jl_module_t*)args[0];
-    jl_sym_t *sym = (jl_sym_t*)args[1];
-    jl_value_t *ty = jl_binding_type(mod, sym);
+    jl_sym_t *var = (jl_sym_t*)args[1];
+    JL_TYPECHK(get_binding_type, module, (jl_value_t*)mod);
+    JL_TYPECHK(get_binding_type, symbol, (jl_value_t*)var);
+    jl_value_t *ty = jl_get_binding_type(mod, var);
     if (ty == (jl_value_t*)jl_nothing) {
-        jl_binding_t *b = jl_get_binding_wr(mod, sym, 0);
-        if (b && b->owner == mod) {
-            jl_value_t *old_ty = NULL;
-            jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
-            return jl_atomic_load_relaxed(&b->ty);
-        }
-        return (jl_value_t*)jl_any_type;
+        jl_binding_t *b = jl_get_module_binding(mod, var, 0);
+        if (b == NULL)
+            return (jl_value_t*)jl_any_type;
+        jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
+        if (b2 != b)
+            return (jl_value_t*)jl_any_type;
+        jl_value_t *old_ty = NULL;
+        jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
+        return jl_atomic_load_relaxed(&b->ty);
     }
     return ty;
 }
@@ -1236,42 +1347,55 @@ JL_CALLABLE(jl_f_get_binding_type)
 JL_CALLABLE(jl_f_set_binding_type)
 {
     JL_NARGS(set_binding_type!, 2, 3);
-    JL_TYPECHK(set_binding_type!, module, args[0]);
-    JL_TYPECHK(set_binding_type!, symbol, args[1]);
+    jl_module_t *m = (jl_module_t*)args[0];
+    jl_sym_t *s = (jl_sym_t*)args[1];
+    JL_TYPECHK(set_binding_type!, module, (jl_value_t*)m);
+    JL_TYPECHK(set_binding_type!, symbol, (jl_value_t*)s);
     jl_value_t *ty = nargs == 2 ? (jl_value_t*)jl_any_type : args[2];
     JL_TYPECHK(set_binding_type!, type, ty);
-    jl_binding_t *b = jl_get_binding_wr((jl_module_t*)args[0], (jl_sym_t*)args[1], 1);
+    jl_binding_t *b = jl_get_binding_wr(m, s);
     jl_value_t *old_ty = NULL;
     if (!jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, ty) && ty != old_ty) {
         if (nargs == 2)
             return jl_nothing;
-        jl_errorf("cannot set type for global %s. It already has a value or is already set to a different type.",
-                  jl_symbol_name(b->name));
+        jl_errorf("cannot set type for global %s.%s. It already has a value or is already set to a different type.",
+                  jl_symbol_name(m->name), jl_symbol_name(s));
     }
+    jl_gc_wb(b, ty);
     return jl_nothing;
 }
 
 
 // apply_type -----------------------------------------------------------------
 
-int jl_valid_type_param(jl_value_t *v)
+static int is_nestable_type_param(jl_value_t *t)
 {
-    if (jl_is_tuple(v)) {
+    if (jl_is_namedtuple_type(t))
+        t = jl_tparam1(t);
+    if (jl_is_tuple_type(t)) {
         // NOTE: tuples of symbols are not currently bits types, but have been
         // allowed as type parameters. this is a bit ugly.
-        jl_value_t *tt = jl_typeof(v);
-        size_t i, l = jl_nparams(tt);
-        for(i=0; i < l; i++) {
-            jl_value_t *pi = jl_tparam(tt,i);
-            if (!(pi == (jl_value_t*)jl_symbol_type || jl_isbits(pi)))
+        size_t i, l = jl_nparams(t);
+        for (i = 0; i < l; i++) {
+            jl_value_t *pi = jl_tparam(t, i);
+            if (!(pi == (jl_value_t*)jl_symbol_type || jl_isbits(pi) || is_nestable_type_param(pi) ||
+        jl_is_module(pi)))
                 return 0;
         }
         return 1;
     }
+    return 0;
+}
+
+int jl_valid_type_param(jl_value_t *v)
+{
+    if (jl_is_tuple(v) || jl_is_namedtuple(v))
+        return is_nestable_type_param(jl_typeof(v));
     if (jl_is_vararg(v))
         return 0;
     // TODO: maybe more things
-    return jl_is_type(v) || jl_is_typevar(v) || jl_is_symbol(v) || jl_isbits(jl_typeof(v));
+    return jl_is_type(v) || jl_is_typevar(v) || jl_is_symbol(v) || jl_isbits(jl_typeof(v)) ||
+        jl_is_module(v);
 }
 
 JL_CALLABLE(jl_f_apply_type)
@@ -1291,7 +1415,7 @@ JL_CALLABLE(jl_f_apply_type)
                 jl_type_error_rt("Tuple", "parameter", (jl_value_t*)jl_type_type, pi);
             }
         }
-        return (jl_value_t*)jl_apply_tuple_type_v(&args[1], nargs-1);
+        return jl_apply_tuple_type_v(&args[1], nargs-1);
     }
     else if (args[0] == (jl_value_t*)jl_uniontype_type) {
         // Union{} has extra restrictions, so it needs to be checked after
@@ -1302,11 +1426,11 @@ JL_CALLABLE(jl_f_apply_type)
         jl_vararg_t *vm = (jl_vararg_t*)args[0];
         if (!vm->T) {
             JL_NARGS(apply_type, 2, 3);
-            return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL);
+            return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL, 1);
         }
         else if (!vm->N) {
             JL_NARGS(apply_type, 2, 2);
-            return (jl_value_t*)jl_wrap_vararg(vm->T, args[1]);
+            return (jl_value_t*)jl_wrap_vararg(vm->T, args[1], 1);
         }
     }
     else if (jl_is_unionall(args[0])) {
@@ -1341,56 +1465,12 @@ JL_CALLABLE(jl_f_invoke)
     if (!jl_is_tuple_type(jl_unwrap_unionall(args[1])))
         jl_type_error("invoke", (jl_value_t*)jl_anytuple_type_type, args[1]);
     if (!jl_tuple_isa(&args[2], nargs - 2, (jl_datatype_t*)argtypes))
-        jl_error("invoke: argument type error");
+        jl_type_error("invoke: argument type error", argtypes, jl_f_tuple(NULL, &args[2], nargs - 2));
     jl_value_t *res = jl_gf_invoke(argtypes, args[0], &args[2], nargs - 1);
     JL_GC_POP();
     return res;
 }
 
-JL_CALLABLE(jl_f_invoke_kwsorter)
-{
-    JL_NARGSV(invoke, 3);
-    jl_value_t *kwargs = args[0];
-    // args[1] is `invoke` itself
-    jl_value_t *func = args[2];
-    jl_value_t *argtypes = args[3];
-    jl_value_t *kws = jl_get_keyword_sorter(func);
-    JL_GC_PUSH1(&argtypes);
-    if (jl_is_tuple_type(argtypes)) {
-        // construct a tuple type for invoking a keyword sorter by putting the kw container type
-        // and the type of the function at the front.
-        size_t i, nt = jl_nparams(argtypes) + 2;
-        if (nt < jl_page_size/sizeof(jl_value_t*)) {
-            jl_value_t **types = (jl_value_t**)alloca(nt*sizeof(jl_value_t*));
-            types[0] = (jl_value_t*)jl_namedtuple_type;
-            types[1] = jl_is_type(func) ? (jl_value_t*)jl_wrap_Type(func) : jl_typeof(func);
-            for (i = 2; i < nt; i++)
-                types[i] = jl_tparam(argtypes, i - 2);
-            argtypes = (jl_value_t*)jl_apply_tuple_type_v(types, nt);
-        }
-        else {
-            jl_svec_t *types = jl_alloc_svec_uninit(nt);
-            JL_GC_PUSH1(&types);
-            jl_svecset(types, 0, jl_namedtuple_type);
-            jl_svecset(types, 1, jl_is_type(func) ? (jl_value_t*)jl_wrap_Type(func) : jl_typeof(func));
-            for (i = 2; i < nt; i++)
-                jl_svecset(types, i, jl_tparam(argtypes, i - 2));
-            argtypes = (jl_value_t*)jl_apply_tuple_type(types);
-            JL_GC_POP();
-        }
-    }
-    else {
-        // invoke will throw an error
-    }
-    args[0] = kws;
-    args[1] = argtypes;
-    args[2] = kwargs;
-    args[3] = func;
-    jl_value_t *res = jl_f_invoke(NULL, args, nargs);
-    JL_GC_POP();
-    return res;
-}
-
 // Expr constructor for internal use ------------------------------------------
 
 jl_expr_t *jl_exprn(jl_sym_t *head, size_t n)
@@ -1432,6 +1512,7 @@ JL_DLLEXPORT jl_tvar_t *jl_new_typevar(jl_sym_t *name, jl_value_t *lb, jl_value_
         jl_type_error_rt("TypeVar", "upper bound", (jl_value_t *)jl_type_type, ub);
     jl_task_t *ct = jl_current_task;
     jl_tvar_t *tv = (jl_tvar_t *)jl_gc_alloc(ct->ptls, sizeof(jl_tvar_t), jl_tvar_type);
+    jl_set_typetagof(tv, jl_tvar_tag, 0);
     tv->name = name;
     tv->lb = lb;
     tv->ub = ub;
@@ -1445,70 +1526,115 @@ JL_CALLABLE(jl_f__typevar)
     return (jl_value_t *)jl_new_typevar((jl_sym_t*)args[0], args[1], args[2]);
 }
 
-// arrays ---------------------------------------------------------------------
-
-JL_CALLABLE(jl_f_arraysize)
-{
-    JL_NARGS(arraysize, 2, 2);
-    JL_TYPECHK(arraysize, array, args[0]);
-    jl_array_t *a = (jl_array_t*)args[0];
-    size_t nd = jl_array_ndims(a);
-    JL_TYPECHK(arraysize, long, args[1]);
-    int dno = jl_unbox_long(args[1]);
-    if (dno < 1)
-        jl_error("arraysize: dimension out of range");
-    if (dno > nd)
-        return jl_box_long(1);
-    return jl_box_long((&a->nrows)[dno-1]);
-}
+// genericmemory ---------------------------------------------------------------------
 
-static size_t array_nd_index(jl_array_t *a, jl_value_t **args, size_t nidxs,
-                             const char *fname)
+JL_CALLABLE(jl_f_memoryref)
 {
-    size_t i = 0;
-    size_t k, stride = 1;
-    size_t nd = jl_array_ndims(a);
-    for (k = 0; k < nidxs; k++) {
-        if (!jl_is_long(args[k]))
-            jl_type_error(fname, (jl_value_t*)jl_long_type, args[k]);
-        size_t ii = jl_unbox_long(args[k]) - 1;
-        i += ii * stride;
-        size_t d = (k >= nd) ? 1 : jl_array_dim(a, k);
-        if (k < nidxs - 1 && ii >= d)
-            jl_bounds_error_v((jl_value_t*)a, args, nidxs);
-        stride *= d;
+    JL_NARGS(memoryref, 1, 3);
+    if (nargs == 1) {
+        JL_TYPECHK(memoryref, genericmemory, args[0]);
+        jl_genericmemory_t *m = (jl_genericmemory_t*)args[0];
+        jl_value_t *typ = jl_apply_type((jl_value_t*)jl_genericmemoryref_type, jl_svec_data(((jl_datatype_t*)jl_typetagof(m))->parameters), 3);
+        JL_GC_PROMISE_ROOTED(typ); // it is a concrete type
+        const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+        if (layout->flags.arrayelem_isunion || layout->size == 0)
+            return (jl_value_t*)jl_new_memoryref(typ, m, 0);
+        return (jl_value_t*)jl_new_memoryref(typ, m, m->ptr);
+    }
+    else {
+        JL_TYPECHK(memoryref, genericmemoryref, args[0]);
+        JL_TYPECHK(memoryref, long, args[1]);
+        if (nargs == 3)
+            JL_TYPECHK(memoryref, bool, args[2]);
+        jl_genericmemoryref_t *m = (jl_genericmemoryref_t*)args[0];
+        size_t i = jl_unbox_long(args[1]) - 1;
+        const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m->mem))->layout;
+        char *data = (char*)m->ptr_or_offset;
+        if (layout->flags.arrayelem_isboxed) {
+            if (((data - (char*)m->mem->ptr) / sizeof(jl_value_t*)) + i >= m->mem->length)
+                jl_bounds_error((jl_value_t*)m, args[1]);
+            data += sizeof(jl_value_t*) * i;
+        }
+        else if (layout->flags.arrayelem_isunion || layout->size == 0) {
+            if ((size_t)data + i >= m->mem->length)
+                jl_bounds_error((jl_value_t*)m, args[1]);
+            data += i;
+        }
+        else {
+            if (((data - (char*)m->mem->ptr) / layout->size) + i >= m->mem->length)
+                jl_bounds_error((jl_value_t*)m, args[1]);
+            data += layout->size * i;
+        }
+        return (jl_value_t*)jl_new_memoryref((jl_value_t*)jl_typetagof(m), m->mem, data);
     }
-    for (; k < nd; k++)
-        stride *= jl_array_dim(a, k);
-    if (i >= stride)
-        jl_bounds_error_v((jl_value_t*)a, args, nidxs);
-    return i;
-}
-
-JL_CALLABLE(jl_f_arrayref)
-{
-    JL_NARGSV(arrayref, 3);
-    JL_TYPECHK(arrayref, bool, args[0]);
-    JL_TYPECHK(arrayref, array, args[1]);
-    jl_array_t *a = (jl_array_t*)args[1];
-    size_t i = array_nd_index(a, &args[2], nargs - 2, "arrayref");
-    return jl_arrayref(a, i);
 }
 
-JL_CALLABLE(jl_f_const_arrayref)
+JL_CALLABLE(jl_f_memoryrefoffset)
 {
-    return jl_f_arrayref(F, args, nargs);
+    JL_NARGS(memoryrefoffset, 1, 1);
+    JL_TYPECHK(memoryref, genericmemoryref, args[0]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    size_t offset;
+    if (layout->flags.arrayelem_isboxed) {
+        offset = (((char*)m.ptr_or_offset - (char*)m.mem->ptr) / sizeof(jl_value_t*));
+    }
+    else if (layout->flags.arrayelem_isunion || layout->size == 0) {
+        offset = (size_t)m.ptr_or_offset;
+    }
+    else {
+        offset = ((char*)m.ptr_or_offset - (char*)m.mem->ptr) / layout->size;
+    }
+    return (jl_value_t*)jl_box_long(offset + 1);
+}
+
+JL_CALLABLE(jl_f_memoryrefget)
+{
+    JL_NARGS(memoryrefget, 3, 3);
+    JL_TYPECHK(memoryrefget, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefget, symbol, args[1]);
+    JL_TYPECHK(memoryrefget, bool, args[2]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *isatomic = jl_tparam0(jl_typetagof(m.mem));
+    if (isatomic == jl_false)
+        if (args[1] != (jl_value_t*)jl_not_atomic_sym)
+            jl_atomic_error("memoryrefget!: non-atomic memory cannot be accessed atomically");
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    return jl_memoryrefget(m);
+}
+
+JL_CALLABLE(jl_f_memoryrefset)
+{
+    JL_NARGS(memoryrefset!, 4, 4);
+    JL_TYPECHK(memoryrefset!, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryrefset!, symbol, args[2]);
+    JL_TYPECHK(memoryrefset!, bool, args[3]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *isatomic = jl_tparam0(jl_typetagof(m.mem));
+    if (isatomic == jl_false)
+        if (args[2] != (jl_value_t*)jl_not_atomic_sym)
+            jl_atomic_error("memoryrefset!: non-atomic memory cannot be written atomically");
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    jl_memoryrefset(m, args[1]);
+    return args[0];
 }
 
-JL_CALLABLE(jl_f_arrayset)
+JL_CALLABLE(jl_f_memoryref_isassigned)
 {
-    JL_NARGSV(arrayset, 4);
-    JL_TYPECHK(arrayset, bool, args[0]);
-    JL_TYPECHK(arrayset, array, args[1]);
-    jl_array_t *a = (jl_array_t*)args[1];
-    size_t i = array_nd_index(a, &args[3], nargs - 3, "arrayset");
-    jl_arrayset(a, args[2], i);
-    return args[1];
+    JL_NARGS(memoryref_isassigned, 3, 3);
+    JL_TYPECHK(memoryref_isassigned, genericmemoryref, args[0]);
+    JL_TYPECHK(memoryref_isassigned, symbol, args[1]);
+    JL_TYPECHK(memoryref_isassigned, bool, args[2]);
+    jl_genericmemoryref_t m = *(jl_genericmemoryref_t*)args[0];
+    jl_value_t *isatomic = jl_tparam0(jl_typetagof(m.mem));
+    if (isatomic == jl_false)
+        if (args[1] != (jl_value_t*)jl_not_atomic_sym)
+            jl_atomic_error("memoryref_isassigned!: non-atomic memory cannot be accessed atomically");
+    if (m.mem->length == 0)
+        return jl_false;
+    return jl_memoryref_isassigned(m);
 }
 
 // type definition ------------------------------------------------------------
@@ -1600,14 +1726,56 @@ JL_CALLABLE(jl_f_donotdelete)
     return jl_nothing;
 }
 
+JL_CALLABLE(jl_f_compilerbarrier)
+{
+    JL_NARGS(compilerbarrier, 2, 2);
+    JL_TYPECHK(compilerbarrier, symbol, args[0])
+    jl_sym_t *setting = (jl_sym_t*)args[0];
+    if (!(setting == jl_symbol("type") ||
+          setting == jl_symbol("const") ||
+          setting == jl_symbol("conditional")))
+        jl_error("The first argument of `compilerbarrier` must be either of `:type`, `:const` or `:conditional`.");
+    jl_value_t *val = args[1];
+    return val;
+}
+
 JL_CALLABLE(jl_f_finalizer)
 {
+    // NOTE the compiler may temporarily insert additional argument for the later inlining pass
     JL_NARGS(finalizer, 2, 4);
     jl_task_t *ct = jl_current_task;
     jl_gc_add_finalizer_(ct->ptls, args[1], args[0]);
     return jl_nothing;
 }
 
+JL_CALLABLE(jl_f__compute_sparams)
+{
+    JL_NARGSV(_compute_sparams, 1);
+    jl_method_t *m = (jl_method_t*)args[0];
+    JL_TYPECHK(_compute_sparams, method, (jl_value_t*)m);
+    jl_datatype_t *tt = jl_inst_arg_tuple_type(args[1], &args[2], nargs-1, 1);
+    jl_svec_t *env = jl_emptysvec;
+    JL_GC_PUSH2(&env, &tt);
+    jl_type_intersection_env((jl_value_t*)tt, m->sig, &env);
+    JL_GC_POP();
+    return (jl_value_t*)env;
+}
+
+JL_CALLABLE(jl_f__svec_ref)
+{
+    JL_NARGS(_svec_ref, 2, 2);
+    jl_svec_t *s = (jl_svec_t*)args[0];
+    jl_value_t *i = (jl_value_t*)args[1];
+    JL_TYPECHK(_svec_ref, simplevector, (jl_value_t*)s);
+    JL_TYPECHK(_svec_ref, long, i);
+    size_t len = jl_svec_len(s);
+    ssize_t idx = jl_unbox_long(i);
+    if (idx < 1 || idx > len) {
+        jl_bounds_error_int((jl_value_t*)s, idx);
+    }
+    return jl_svecref(s, idx-1);
+}
+
 static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
 {
     size_t nf = jl_svec_len(ft);
@@ -1621,7 +1789,7 @@ static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
             if (!jl_has_free_typevars(tb) || !jl_egal(ta, tb))
                 return 0;
         }
-        else if (jl_has_free_typevars(tb) || jl_typeof(ta) != jl_typeof(tb) ||
+        else if (jl_has_free_typevars(tb) || jl_typetagof(ta) != jl_typetagof(tb) ||
                  !jl_types_equal(ta, tb)) {
             return 0;
         }
@@ -1634,36 +1802,48 @@ static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
 // inline it. The only way fields can reference this type (due to
 // syntax-enforced restrictions) is via being passed as a type parameter. Thus
 // we can conservatively check this by examining only the parameters of the
-// dependent types.
-// affects_layout is a hack introduced by #35275 to workaround a problem
-// introduced by #34223: it checks whether we will potentially need to
-// compute the layout of the object before we have fully computed the types of
-// the fields during recursion over the allocation of the parameters for the
-// field types (of the concrete subtypes)
-static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layout) JL_NOTSAFEPOINT
-{
-    if (jl_is_uniontype(p))
-        return references_name(((jl_uniontype_t*)p)->a, name, affects_layout) ||
-               references_name(((jl_uniontype_t*)p)->b, name, affects_layout);
-    if (jl_is_unionall(p))
-        return references_name((jl_value_t*)((jl_unionall_t*)p)->var->lb, name, 0) ||
-               references_name((jl_value_t*)((jl_unionall_t*)p)->var->ub, name, 0) ||
-               references_name(((jl_unionall_t*)p)->body, name, affects_layout);
+// dependent types. Additionally, a field might have already observed this
+// object for layout purposes before we got around to deciding if inlining
+// would be possible, so we cannot change the layout now if so.
+// affects_layout is a (conservative) analysis of layout_uses_free_typevars
+// freevars is a (conservative) analysis of what calling jl_has_bound_typevars from name->wrapper gives (TODO: just call this instead?)
+static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layout, int freevars) JL_NOTSAFEPOINT
+{
+    if (freevars && !jl_has_free_typevars(p))
+        freevars = 0;
+    while (jl_is_unionall(p)) {
+        if (references_name((jl_value_t*)((jl_unionall_t*)p)->var->lb, name, 0, freevars) ||
+            references_name((jl_value_t*)((jl_unionall_t*)p)->var->ub, name, 0, freevars))
+            return 1;
+       p = ((jl_unionall_t*)p)->body;
+    }
+    if (jl_is_uniontype(p)) {
+        return references_name(((jl_uniontype_t*)p)->a, name, affects_layout, freevars) ||
+               references_name(((jl_uniontype_t*)p)->b, name, affects_layout, freevars);
+    }
     if (jl_is_typevar(p))
         return 0; // already checked by unionall, if applicable
     if (jl_is_datatype(p)) {
         jl_datatype_t *dp = (jl_datatype_t*)p;
         if (affects_layout && dp->name == name)
             return 1;
-        // affects_layout checks whether we will need to attempt to layout this
-        // type (based on whether all copies of it have the same layout) in
-        // that case, we still need to check the recursive parameters for
-        // layout recursion happening also, but we know it won't itself cause
-        // problems for the layout computation
-        affects_layout = ((jl_datatype_t*)jl_unwrap_unionall(dp->name->wrapper))->layout == NULL;
+        affects_layout = jl_is_genericmemory_type(dp) || ((jl_datatype_t*)jl_unwrap_unionall(dp->name->wrapper))->layout == NULL;
+        // and even if it has a layout, the fields themselves might trigger layouts if they use tparam i
+        // rather than checking this for each field, we just assume it applies
+        if (!affects_layout && freevars && jl_field_names(dp) != jl_emptysvec) {
+            jl_svec_t *types = ((jl_datatype_t*)jl_unwrap_unionall(dp->name->wrapper))->types;
+            size_t i, l = jl_svec_len(types);
+            for (i = 0; i < l; i++) {
+                jl_value_t *ft = jl_svecref(types, i);
+                if (!jl_is_typevar(ft) && jl_has_free_typevars(ft)) {
+                    affects_layout = 1;
+                    break;
+                }
+            }
+        }
         size_t i, l = jl_nparams(p);
         for (i = 0; i < l; i++) {
-            if (references_name(jl_tparam(p, i), name, affects_layout))
+            if (references_name(jl_tparam(p, i), name, affects_layout, freevars))
                 return 1;
         }
     }
@@ -1699,12 +1879,12 @@ JL_CALLABLE(jl_f__typebody)
             // able to compute the layout of the object before needing to
             // publish it, so we must assume it cannot be inlined, if that
             // check passes, then we also still need to check the fields too.
-            if (!dt->name->mutabl && (nf == 0 || !references_name((jl_value_t*)dt->super, dt->name, 1))) {
+            if (!dt->name->mutabl && (nf == 0 || !references_name((jl_value_t*)dt->super, dt->name, 0, 1))) {
                 int mayinlinealloc = 1;
                 size_t i;
                 for (i = 0; i < nf; i++) {
                     jl_value_t *fld = jl_svecref(ft, i);
-                    if (references_name(fld, dt->name, 1)) {
+                    if (references_name(fld, dt->name, 1, 1)) {
                         mayinlinealloc = 0;
                         break;
                     }
@@ -1734,12 +1914,13 @@ static int equiv_type(jl_value_t *ta, jl_value_t *tb)
     if (!jl_is_datatype(dta))
         return 0;
     jl_datatype_t *dtb = (jl_datatype_t*)jl_unwrap_unionall(tb);
-    if (!(jl_typeof(dta) == jl_typeof(dtb) &&
+    if (!(jl_typetagof(dta) == jl_typetagof(dtb) &&
           dta->name->name == dtb->name->name &&
           dta->name->abstract == dtb->name->abstract &&
           dta->name->mutabl == dtb->name->mutabl &&
           dta->name->n_uninitialized == dtb->name->n_uninitialized &&
-          (jl_svec_len(jl_field_names(dta)) != 0 || dta->size == dtb->size) &&
+          dta->isprimitivetype == dtb->isprimitivetype &&
+          (!dta->isprimitivetype || dta->layout->size == dtb->layout->size) &&
           (dta->name->atomicfields == NULL
            ? dtb->name->atomicfields == NULL
            : (dtb->name->atomicfields != NULL &&
@@ -1798,13 +1979,12 @@ static unsigned intrinsic_nargs[num_intrinsics];
 
 JL_CALLABLE(jl_f_intrinsic_call)
 {
-    JL_TYPECHK(intrinsic_call, intrinsic, F);
     enum intrinsic f = (enum intrinsic)*(uint32_t*)jl_data_ptr(F);
     if (f == cglobal && nargs == 1)
         f = cglobal_auto;
     unsigned fargs = intrinsic_nargs[f];
     if (!fargs)
-        jl_errorf("`%s` must be compiled to be called", jl_intrinsic_name(f));
+        jl_errorf("`%s` requires the compiler", jl_intrinsic_name(f));
     JL_NARGS(intrinsic_call, fargs, fargs);
 
     union {
@@ -1857,16 +2037,17 @@ unsigned jl_intrinsic_nargs(int f)
 
 static void add_intrinsic_properties(enum intrinsic f, unsigned nargs, void (*pfunc)(void))
 {
+    assert(nargs <= 5 && "jl_f_intrinsic_call only implements up to 5 args");
     intrinsic_nargs[f] = nargs;
     runtime_fp[f] = pfunc;
 }
 
 static void add_intrinsic(jl_module_t *inm, const char *name, enum intrinsic f) JL_GC_DISABLED
 {
-    jl_value_t *i = jl_permbox32(jl_intrinsic_type, (int32_t)f);
+    jl_value_t *i = jl_permbox32(jl_intrinsic_type, 0, (int32_t)f);
     jl_sym_t *sym = jl_symbol(name);
     jl_set_const(inm, sym, i);
-    jl_module_export(inm, sym);
+    jl_module_public(inm, sym, 1);
 }
 
 void jl_init_intrinsic_properties(void) JL_GC_DISABLED
@@ -1882,7 +2063,7 @@ void jl_init_intrinsic_properties(void) JL_GC_DISABLED
 
 void jl_init_intrinsic_functions(void) JL_GC_DISABLED
 {
-    jl_module_t *inm = jl_new_module(jl_symbol("Intrinsics"));
+    jl_module_t *inm = jl_new_module(jl_symbol("Intrinsics"), NULL);
     inm->parent = jl_core_module;
     jl_set_const(jl_core_module, jl_symbol("Intrinsics"), (jl_value_t*)inm);
     jl_mk_builtin_func(jl_intrinsic_type, "IntrinsicFunction", jl_f_intrinsic_call);
@@ -1890,6 +2071,11 @@ void jl_init_intrinsic_functions(void) JL_GC_DISABLED
         (jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_opaque_closure_type),
         "OpaqueClosure", jl_f_opaque_closure_call);
 
+    // Save a reference to the just created OpaqueClosure method, so we can provide special
+    // codegen for it later.
+    jl_opaque_closure_method = (jl_method_t*)jl_methtable_lookup(jl_opaque_closure_typename->mt,
+        (jl_value_t*)jl_anytuple_type, 1);
+
 #define ADD_I(name, nargs) add_intrinsic(inm, #name, name);
 #define ADD_HIDDEN(name, nargs)
 #define ALIAS ADD_I
@@ -1904,10 +2090,10 @@ static void add_builtin(const char *name, jl_value_t *v)
     jl_set_const(jl_core_module, jl_symbol(name), v);
 }
 
-jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b)
+jl_fptr_args_t jl_get_builtin_fptr(jl_datatype_t *dt)
 {
-    assert(jl_isa(b, (jl_value_t*)jl_builtin_type));
-    jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_atomic_load_relaxed(&jl_gf_mtable(b)->defs);
+    assert(jl_subtype((jl_value_t*)dt, (jl_value_t*)jl_builtin_type));
+    jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_atomic_load_relaxed(&dt->name->mt->defs);
     jl_method_instance_t *mi = jl_atomic_load_relaxed(&entry->func.method->unspecialized);
     jl_code_instance_t *ci = jl_atomic_load_relaxed(&mi->cache);
     return jl_atomic_load_relaxed(&ci->specptr.fptr1);
@@ -1946,20 +2132,16 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin_func("get_binding_type", jl_f_get_binding_type);
     add_builtin_func("set_binding_type!", jl_f_set_binding_type);
 
-    // array primitives
-    jl_builtin_arrayref = add_builtin_func("arrayref", jl_f_arrayref);
-    jl_builtin_const_arrayref = add_builtin_func("const_arrayref", jl_f_arrayref);
-    jl_builtin_arrayset = add_builtin_func("arrayset", jl_f_arrayset);
-    jl_builtin_arraysize = add_builtin_func("arraysize", jl_f_arraysize);
+    // memory primitives
+    jl_builtin_memoryref = add_builtin_func("memoryref", jl_f_memoryref);
+    jl_builtin_memoryrefoffset = add_builtin_func("memoryrefoffset", jl_f_memoryrefoffset);
+    jl_builtin_memoryrefget = add_builtin_func("memoryrefget", jl_f_memoryrefget);
+    jl_builtin_memoryrefset = add_builtin_func("memoryrefset!", jl_f_memoryrefset);
+    jl_builtin_memoryref_isassigned = add_builtin_func("memoryref_isassigned", jl_f_memoryref_isassigned);
 
     // method table utils
     jl_builtin_applicable = add_builtin_func("applicable", jl_f_applicable);
     jl_builtin_invoke = add_builtin_func("invoke", jl_f_invoke);
-    jl_typename_t *itn = ((jl_datatype_t*)jl_typeof(jl_builtin_invoke))->name;
-    jl_value_t *ikws = jl_new_generic_function_with_supertype(itn->name, jl_core_module, jl_builtin_type);
-    itn->mt->kwsorter = ikws;
-    jl_gc_wb(itn->mt, ikws);
-    jl_mk_builtin_func((jl_datatype_t*)jl_typeof(ikws), jl_symbol_name(jl_gf_name(ikws)), jl_f_invoke_kwsorter);
 
     // internal functions
     jl_builtin_apply_type = add_builtin_func("apply_type", jl_f_apply_type);
@@ -1978,7 +2160,11 @@ void jl_init_primitives(void) JL_GC_DISABLED
     jl_builtin__typebody = add_builtin_func("_typebody!", jl_f__typebody);
     add_builtin_func("_equiv_typedef", jl_f__equiv_typedef);
     jl_builtin_donotdelete = add_builtin_func("donotdelete", jl_f_donotdelete);
+    jl_builtin_compilerbarrier = add_builtin_func("compilerbarrier", jl_f_compilerbarrier);
     add_builtin_func("finalizer", jl_f_finalizer);
+    add_builtin_func("_compute_sparams", jl_f__compute_sparams);
+    add_builtin_func("_svec_ref", jl_f__svec_ref);
+    add_builtin_func("current_scope", jl_f_current_scope);
 
     // builtin types
     add_builtin("Any", (jl_value_t*)jl_any_type);
@@ -1994,6 +2180,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("Tuple", (jl_value_t*)jl_anytuple_type);
     add_builtin("TypeofVararg", (jl_value_t*)jl_vararg_type);
     add_builtin("SimpleVector", (jl_value_t*)jl_simplevector_type);
+    add_builtin("Vararg", (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0));
 
     add_builtin("Module", (jl_value_t*)jl_module_type);
     add_builtin("MethodTable", (jl_value_t*)jl_methtable_type);
@@ -2003,34 +2190,38 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("TypeMapLevel", (jl_value_t*)jl_typemap_level_type);
     add_builtin("Symbol", (jl_value_t*)jl_symbol_type);
     add_builtin("SSAValue", (jl_value_t*)jl_ssavalue_type);
-    add_builtin("Slot", (jl_value_t*)jl_abstractslot_type);
     add_builtin("SlotNumber", (jl_value_t*)jl_slotnumber_type);
-    add_builtin("TypedSlot", (jl_value_t*)jl_typedslot_type);
     add_builtin("Argument", (jl_value_t*)jl_argument_type);
     add_builtin("Const", (jl_value_t*)jl_const_type);
     add_builtin("PartialStruct", (jl_value_t*)jl_partial_struct_type);
     add_builtin("PartialOpaque", (jl_value_t*)jl_partial_opaque_type);
+    add_builtin("InterConditional", (jl_value_t*)jl_interconditional_type);
     add_builtin("MethodMatch", (jl_value_t*)jl_method_match_type);
     add_builtin("IntrinsicFunction", (jl_value_t*)jl_intrinsic_type);
     add_builtin("Function", (jl_value_t*)jl_function_type);
     add_builtin("Builtin", (jl_value_t*)jl_builtin_type);
     add_builtin("MethodInstance", (jl_value_t*)jl_method_instance_type);
     add_builtin("CodeInfo", (jl_value_t*)jl_code_info_type);
-    add_builtin("Ref", (jl_value_t*)jl_ref_type);
-    add_builtin("Ptr", (jl_value_t*)jl_pointer_type);
     add_builtin("LLVMPtr", (jl_value_t*)jl_llvmpointer_type);
     add_builtin("Task", (jl_value_t*)jl_task_type);
     add_builtin("OpaqueClosure", (jl_value_t*)jl_opaque_closure_type);
 
+    add_builtin("AddrSpace", (jl_value_t*)jl_addrspace_type);
+    add_builtin("Ref", (jl_value_t*)jl_ref_type);
+    add_builtin("Ptr", (jl_value_t*)jl_pointer_type);
+    //add_builtin("GenericPtr", (jl_value_t*)jl_genericpointer_type);
     add_builtin("AbstractArray", (jl_value_t*)jl_abstractarray_type);
     add_builtin("DenseArray", (jl_value_t*)jl_densearray_type);
     add_builtin("Array", (jl_value_t*)jl_array_type);
+    add_builtin("GenericMemory", (jl_value_t*)jl_genericmemory_type);
+    add_builtin("GenericMemoryRef", (jl_value_t*)jl_genericmemoryref_type);
 
     add_builtin("Expr", (jl_value_t*)jl_expr_type);
     add_builtin("LineNumberNode", (jl_value_t*)jl_linenumbernode_type);
     add_builtin("LineInfoNode", (jl_value_t*)jl_lineinfonode_type);
     add_builtin("GotoNode", (jl_value_t*)jl_gotonode_type);
     add_builtin("GotoIfNot", (jl_value_t*)jl_gotoifnot_type);
+    add_builtin("EnterNode", (jl_value_t*)jl_enternode_type);
     add_builtin("ReturnNode", (jl_value_t*)jl_returnnode_type);
     add_builtin("PiNode", (jl_value_t*)jl_pinode_type);
     add_builtin("PhiNode", (jl_value_t*)jl_phinode_type);
@@ -2038,6 +2229,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("UpsilonNode", (jl_value_t*)jl_upsilonnode_type);
     add_builtin("QuoteNode", (jl_value_t*)jl_quotenode_type);
     add_builtin("NewvarNode", (jl_value_t*)jl_newvarnode_type);
+    add_builtin("Binding", (jl_value_t*)jl_binding_type);
     add_builtin("GlobalRef", (jl_value_t*)jl_globalref_type);
     add_builtin("NamedTuple", (jl_value_t*)jl_namedtuple_type);
 
diff --git a/src/ccall.cpp b/src/ccall.cpp
index 88c80b333b027..92ede74402791 100644
--- a/src/ccall.cpp
+++ b/src/ccall.cpp
@@ -27,17 +27,16 @@ TRANSFORMED_CCALL_STAT(jl_get_current_task);
 TRANSFORMED_CCALL_STAT(jl_set_next_task);
 TRANSFORMED_CCALL_STAT(jl_sigatomic_begin);
 TRANSFORMED_CCALL_STAT(jl_sigatomic_end);
-TRANSFORMED_CCALL_STAT(jl_svec_len);
-TRANSFORMED_CCALL_STAT(jl_svec_isassigned);
-TRANSFORMED_CCALL_STAT(jl_svec_ref);
-TRANSFORMED_CCALL_STAT(jl_array_isassigned);
 TRANSFORMED_CCALL_STAT(jl_string_ptr);
 TRANSFORMED_CCALL_STAT(jl_symbol_name);
+TRANSFORMED_CCALL_STAT(jl_genericmemory_owner);
+TRANSFORMED_CCALL_STAT(jl_alloc_genericmemory);
 TRANSFORMED_CCALL_STAT(memcpy);
 TRANSFORMED_CCALL_STAT(memset);
 TRANSFORMED_CCALL_STAT(memmove);
 TRANSFORMED_CCALL_STAT(jl_object_id);
 #undef TRANSFORMED_CCALL_STAT
+extern "C" JL_DLLEXPORT jl_value_t *ijl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 
 STATISTIC(EmittedCCalls, "Number of ccalls emitted");
 STATISTIC(DeferredCCallLookups, "Number of ccalls looked up at runtime");
@@ -58,13 +57,10 @@ GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M)
 static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_name,
                             GlobalVariable *&lib, GlobalVariable *&sym)
 {
-    auto &TSM = ctx.emission_context.shared_module(*jl_Module);
-    //Safe b/c emission context holds context lock
-    auto M = TSM.getModuleUnlocked();
+    auto M = &ctx.emission_context.shared_module();
     bool runtime_lib = false;
     GlobalVariable *libptrgv;
     jl_codegen_params_t::SymMapGV *symMap;
-#ifdef _OS_WINDOWS_
     if ((intptr_t)f_lib == (intptr_t)JL_EXE_LIBNAME) {
         libptrgv = prepare_global_in(M, jlexe_var);
         symMap = &ctx.emission_context.symMapExe;
@@ -77,16 +73,14 @@ static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_
         libptrgv = prepare_global_in(M, jldll_var);
         symMap = &ctx.emission_context.symMapDll;
     }
-    else
-#endif
-    if (f_lib == NULL) {
+    else if (f_lib == NULL) {
         libptrgv = jl_emit_RTLD_DEFAULT_var(M);
         symMap = &ctx.emission_context.symMapDefault;
     }
     else {
         std::string name = "ccalllib_";
         name += llvm::sys::path::filename(f_lib);
-        name += std::to_string(globalUniqueGeneratedNames++);
+        name += std::to_string(jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1));
         runtime_lib = true;
         auto &libgv = ctx.emission_context.libMapGV[f_lib];
         if (libgv.first == NULL) {
@@ -106,7 +100,7 @@ static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_
         std::string name = "ccall_";
         name += f_name;
         name += "_";
-        name += std::to_string(globalUniqueGeneratedNames++);
+        name += std::to_string(jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1));
         auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(M->getContext());
         llvmgv = new GlobalVariable(*M, T_pvoidfunc, false,
                                     GlobalVariable::ExternalLinkage,
@@ -141,6 +135,7 @@ static Value *runtime_sym_lookup(
     BasicBlock *ccall_bb = BasicBlock::Create(irbuilder.getContext(), "ccall");
     Constant *initnul = ConstantPointerNull::get(T_pvoidfunc);
     LoadInst *llvmf_orig = irbuilder.CreateAlignedLoad(T_pvoidfunc, llvmgv, Align(sizeof(void*)));
+    setName(emission_context, llvmf_orig, f_name + StringRef(".cached"));
     // This in principle needs a consume ordering so that load from
     // this pointer sees a valid value. However, this is not supported by
     // LLVM (or agreed on in the C/C++ standard FWIW) and should be
@@ -149,13 +144,15 @@ static Value *runtime_sym_lookup(
     // invalid load from the `cglobal` but doesn't depend on the `cglobal`
     // value for this to happen.
     llvmf_orig->setAtomic(AtomicOrdering::Unordered);
+    auto nonnull = irbuilder.CreateICmpNE(llvmf_orig, initnul);
+    setName(emission_context, nonnull, "is_cached");
     irbuilder.CreateCondBr(
-            irbuilder.CreateICmpNE(llvmf_orig, initnul),
+            nonnull,
             ccall_bb,
             dlsym_lookup);
 
     assert(f->getParent() != NULL);
-    f->getBasicBlockList().push_back(dlsym_lookup);
+    dlsym_lookup->insertInto(f);
     irbuilder.SetInsertPoint(dlsym_lookup);
     Instruction *llvmf;
     Value *nameval = stringConstPtr(emission_context, irbuilder, f_name);
@@ -171,34 +168,26 @@ static Value *runtime_sym_lookup(
         }
         else {
             // f_lib is actually one of the special sentinel values
-            libname = ConstantExpr::getIntToPtr(ConstantInt::get(getSizeTy(irbuilder.getContext()), (uintptr_t)f_lib), getInt8PtrTy(irbuilder.getContext()));
+            libname = ConstantExpr::getIntToPtr(ConstantInt::get(emission_context.DL.getIntPtrType(irbuilder.getContext()), (uintptr_t)f_lib), getInt8PtrTy(irbuilder.getContext()));
         }
-        llvmf = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func),
+        auto lookup = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func),
                     { libname, nameval, libptrgv });
+        llvmf = lookup;
     }
+    setName(emission_context, llvmf, f_name + StringRef(".found"));
     StoreInst *store = irbuilder.CreateAlignedStore(llvmf, llvmgv, Align(sizeof(void*)));
     store->setAtomic(AtomicOrdering::Release);
     irbuilder.CreateBr(ccall_bb);
 
-    f->getBasicBlockList().push_back(ccall_bb);
+    ccall_bb->insertInto(f);
     irbuilder.SetInsertPoint(ccall_bb);
     PHINode *p = irbuilder.CreatePHI(T_pvoidfunc, 2);
     p->addIncoming(llvmf_orig, enter_bb);
     p->addIncoming(llvmf, llvmf->getParent());
+    setName(emission_context, p, f_name);
     return irbuilder.CreateBitCast(p, funcptype);
 }
 
-static Value *runtime_sym_lookup(
-        jl_codectx_t &ctx,
-        PointerType *funcptype, const char *f_lib, jl_value_t *lib_expr,
-        const char *f_name, Function *f,
-        GlobalVariable *libptrgv,
-        GlobalVariable *llvmgv, bool runtime_lib)
-{
-    return runtime_sym_lookup(ctx.emission_context, ctx.builder, &ctx, funcptype, f_lib, lib_expr,
-                              f_name, f, libptrgv, llvmgv, runtime_lib);
-}
-
 static Value *runtime_sym_lookup(
         jl_codectx_t &ctx,
         PointerType *funcptype, const char *f_lib, jl_value_t *lib_expr,
@@ -216,7 +205,7 @@ static Value *runtime_sym_lookup(
         std::string gvname = "libname_";
         gvname += f_name;
         gvname += "_";
-        gvname += std::to_string(globalUniqueGeneratedNames++);
+        gvname += std::to_string(jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1));
         llvmgv = new GlobalVariable(*jl_Module, T_pvoidfunc, false,
                                     GlobalVariable::ExternalLinkage,
                                     Constant::getNullValue(T_pvoidfunc), gvname);
@@ -226,7 +215,7 @@ static Value *runtime_sym_lookup(
         libptrgv = prepare_global_in(jl_Module, libptrgv);
     }
     llvmgv = prepare_global_in(jl_Module, llvmgv);
-    return runtime_sym_lookup(ctx, funcptype, f_lib, lib_expr, f_name, f, libptrgv, llvmgv, runtime_lib);
+    return runtime_sym_lookup(ctx.emission_context, ctx.builder, &ctx, funcptype, f_lib, lib_expr, f_name, f, libptrgv, llvmgv, runtime_lib);
 }
 
 // Emit a "PLT" entry that will be lazily initialized
@@ -239,25 +228,29 @@ static GlobalVariable *emit_plt_thunk(
         bool runtime_lib)
 {
     ++PLTThunks;
-    auto &TSM = ctx.emission_context.shared_module(*jl_Module);
-    Module *M = TSM.getModuleUnlocked();
+    auto M = &ctx.emission_context.shared_module();
     PointerType *funcptype = PointerType::get(functype, 0);
     libptrgv = prepare_global_in(M, libptrgv);
     llvmgv = prepare_global_in(M, llvmgv);
     std::string fname;
-    raw_string_ostream(fname) << "jlplt_" << f_name << "_" << globalUniqueGeneratedNames++;
+    raw_string_ostream(fname) << "jlplt_" << f_name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
     Function *plt = Function::Create(functype,
-                                     GlobalVariable::ExternalLinkage,
+                                     GlobalVariable::PrivateLinkage,
                                      fname, M);
     plt->setAttributes(attrs);
     if (cc != CallingConv::C)
         plt->setCallingConv(cc);
-    fname += "_got";
     auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(M->getContext());
     GlobalVariable *got = new GlobalVariable(*M, T_pvoidfunc, false,
                                              GlobalVariable::ExternalLinkage,
                                              ConstantExpr::getBitCast(plt, T_pvoidfunc),
-                                             fname);
+                                             fname + "_got");
+    if (runtime_lib) {
+        got->addAttribute("julia.libname", f_lib);
+    } else {
+        got->addAttribute("julia.libidx", std::to_string((uintptr_t) f_lib));
+    }
+    got->addAttribute("julia.fname", f_name);
     BasicBlock *b0 = BasicBlock::Create(M->getContext(), "top", plt);
     IRBuilder<> irbuilder(b0);
     Value *ptr = runtime_sym_lookup(ctx.emission_context, irbuilder, NULL, funcptype, f_lib, NULL, f_name, plt, libptrgv,
@@ -265,8 +258,8 @@ static GlobalVariable *emit_plt_thunk(
     StoreInst *store = irbuilder.CreateAlignedStore(irbuilder.CreateBitCast(ptr, T_pvoidfunc), got, Align(sizeof(void*)));
     store->setAtomic(AtomicOrdering::Release);
     SmallVector<Value*, 16> args;
-    for (Function::arg_iterator arg = plt->arg_begin(), arg_e = plt->arg_end(); arg != arg_e; ++arg)
-        args.push_back(&*arg);
+    for (auto &arg : plt->args())
+        args.push_back(&arg);
     assert(cast<PointerType>(ptr->getType())->isOpaqueOrPointeeTypeMatches(functype));
     CallInst *ret = irbuilder.CreateCall(
         functype,
@@ -283,14 +276,13 @@ static GlobalVariable *emit_plt_thunk(
     else {
         // musttail support is very bad on ARM, PPC, PPC64 (as of LLVM 3.9)
         // Known failures includes vararg (not needed here) and sret.
-
-#if (defined(_CPU_X86_) || defined(_CPU_X86_64_) || (defined(_CPU_AARCH64_) && !defined(_OS_DARWIN_)))
-        // Ref https://bugs.llvm.org/show_bug.cgi?id=47058
-        // LLVM, as of 10.0.1 emits wrong/worse code when musttail is set
-        // Apple silicon macs give an LLVM ERROR if musttail is set here #44107.
-        if (!attrs.hasAttrSomewhere(Attribute::ByVal))
-            ret->setTailCallKind(CallInst::TCK_MustTail);
-#endif
+        if (ctx.emission_context.TargetTriple.isX86() || (ctx.emission_context.TargetTriple.isAArch64() && !ctx.emission_context.TargetTriple.isOSDarwin())) {
+            // Ref https://bugs.llvm.org/show_bug.cgi?id=47058
+            // LLVM, as of 10.0.1 emits wrong/worse code when musttail is set
+            // Apple silicon macs give an LLVM ERROR if musttail is set here #44107.
+            if (!attrs.hasAttrSomewhere(Attribute::ByVal))
+                ret->setTailCallKind(CallInst::TCK_MustTail);
+        }
         if (functype->getReturnType() == getVoidTy(irbuilder.getContext())) {
             irbuilder.CreateRetVoid();
         }
@@ -310,7 +302,6 @@ static Value *emit_plt(
         CallingConv::ID cc, const char *f_lib, const char *f_name)
 {
     ++PLT;
-    assert(ctx.emission_context.imaging);
     // Don't do this for vararg functions so that the `musttail` is only
     // an optimization and is not required to function correctly.
     assert(!functype->isVarArg());
@@ -328,6 +319,7 @@ static Value *emit_plt(
     }
     GlobalVariable *got = prepare_global_in(jl_Module, sharedgot);
     LoadInst *got_val = ctx.builder.CreateAlignedLoad(got->getValueType(), got, Align(sizeof(void*)));
+    setName(ctx.emission_context, got_val, f_name);
     // See comment in `runtime_sym_lookup` above. This in principle needs a
     // consume ordering too. This is even less likely to cause issues though
     // since the only thing we do to this loaded pointer is to call it
@@ -447,19 +439,23 @@ static Value *llvm_type_rewrite(
     Value *from;
     Value *to;
     const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
-    unsigned align = std::max(DL.getPrefTypeAlignment(target_type), DL.getPrefTypeAlignment(from_type));
+    Align align = std::max(DL.getPrefTypeAlign(target_type), DL.getPrefTypeAlign(from_type));
     if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) {
         to = emit_static_alloca(ctx, target_type);
-        cast<AllocaInst>(to)->setAlignment(Align(align));
+        setName(ctx.emission_context, to, "type_rewrite_buffer");
+        cast<AllocaInst>(to)->setAlignment(align);
         from = emit_bitcast(ctx, to, from_type->getPointerTo());
     }
     else {
         from = emit_static_alloca(ctx, from_type);
-        cast<AllocaInst>(from)->setAlignment(Align(align));
+        setName(ctx.emission_context, from, "type_rewrite_buffer");
+        cast<AllocaInst>(from)->setAlignment(align);
         to = emit_bitcast(ctx, from, target_type->getPointerTo());
     }
-    ctx.builder.CreateAlignedStore(v, from, Align(align));
-    return ctx.builder.CreateAlignedLoad(target_type, to, Align(align));
+    ctx.builder.CreateAlignedStore(v, from, align);
+    auto pun = ctx.builder.CreateAlignedLoad(target_type, to, align);
+    setName(ctx.emission_context, pun, "type_rewrite");
+    return pun;
 }
 
 // --- argument passing and scratch space utilities ---
@@ -474,9 +470,9 @@ static Value *runtime_apply_type_env(jl_codectx_t &ctx, jl_value_t *ty)
         ctx.builder.CreateInBoundsGEP(
                 ctx.types().T_prjlvalue,
                 ctx.spvals_ptr,
-                ConstantInt::get(getSizeTy(ctx.builder.getContext()), sizeof(jl_svec_t) / sizeof(jl_value_t*)))
+                ConstantInt::get(ctx.types().T_size, sizeof(jl_svec_t) / sizeof(jl_value_t*)))
     };
-    auto call = ctx.builder.CreateCall(prepare_call(jlapplytype_func), makeArrayRef(args));
+    auto call = ctx.builder.CreateCall(prepare_call(jlapplytype_func), ArrayRef<Value*>(args));
     addRetAttr(call, Attribute::getWithAlignment(ctx.builder.getContext(), Align(16)));
     return call;
 }
@@ -486,9 +482,10 @@ static const std::string make_errmsg(const char *fname, int n, const char *err)
     std::string _msg;
     raw_string_ostream msg(_msg);
     msg << fname;
-    if (n > 0)
-        msg << " argument " << n;
-    else
+    if (n > 0) {
+        msg << " argument ";
+        msg << n;
+    } else
         msg << " return";
     msg << err;
     return msg.str();
@@ -516,12 +513,13 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val
                 Value *istype = ctx.builder.CreateICmpNE(
                         ctx.builder.CreateCall(prepare_call(jlisa_func), { vx, boxed(ctx, jlto_runtime) }),
                         ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
+                setName(ctx.emission_context, istype, "istype");
                 BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
                 BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass", ctx.f);
                 ctx.builder.CreateCondBr(istype, passBB, failBB);
 
                 ctx.builder.SetInsertPoint(failBB);
-                emit_type_error(ctx, mark_julia_type(ctx, vx, true, jl_any_type), boxed(ctx, jlto_runtime), msg);
+                just_emit_type_error(ctx, mark_julia_type(ctx, vx, true, jl_any_type), boxed(ctx, jlto_runtime), msg);
                 ctx.builder.CreateUnreachable();
                 ctx.builder.SetInsertPoint(passBB);
             }
@@ -553,11 +551,16 @@ static Value *julia_to_native(
     // pass the address of an alloca'd thing, not a box
     // since those are immutable.
     Value *slot = emit_static_alloca(ctx, to);
+    unsigned align = julia_alignment(jlto);
+    cast<AllocaInst>(slot)->setAlignment(Align(align));
+    setName(ctx.emission_context, slot, "native_convert_buffer");
     if (!jvinfo.ispointer()) {
-        tbaa_decorate(jvinfo.tbaa, ctx.builder.CreateStore(emit_unbox(ctx, to, jvinfo, jlto), slot));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa);
+        ai.decorateInst(ctx.builder.CreateStore(emit_unbox(ctx, to, jvinfo, jlto), slot));
     }
     else {
-        emit_memcpy(ctx, slot, jvinfo.tbaa, jvinfo, jl_datatype_size(jlto), julia_alignment(jlto));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa);
+        emit_memcpy(ctx, slot, ai, jvinfo, jl_datatype_size(jlto), align, align);
     }
     return slot;
 }
@@ -571,8 +574,15 @@ typedef struct {
     jl_value_t *gcroot;
 } native_sym_arg_t;
 
+static inline const char *invalid_symbol_err_msg(bool ccall)
+{
+    return ccall ?
+        "ccall: first argument not a pointer or valid constant expression" :
+        "cglobal: first argument not a pointer or valid constant expression";
+}
+
 // --- parse :sym or (:sym, :lib) argument into address info ---
-static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_value_t *arg, const char *fname, bool llvmcall)
+static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_value_t *arg, bool ccall, bool llvmcall)
 {
     Value *&jl_ptr = out.jl_ptr;
     void (*&fptr)(void) = out.fptr;
@@ -602,13 +612,11 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
         jl_cgval_t arg1 = emit_expr(ctx, arg);
         jl_value_t *ptr_ty = arg1.typ;
         if (!jl_is_cpointer_type(ptr_ty)) {
-            const char *errmsg = !strcmp(fname, "ccall") ?
-                "ccall: first argument not a pointer or valid constant expression" :
-                "cglobal: first argument not a pointer or valid constant expression";
+            const char *errmsg = invalid_symbol_err_msg(ccall);
             emit_cpointercheck(ctx, arg1, errmsg);
         }
         arg1 = update_julia_type(ctx, arg1, (jl_value_t*)jl_voidpointer_type);
-        jl_ptr = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), arg1, (jl_value_t*)jl_voidpointer_type);
+        jl_ptr = emit_unbox(ctx, ctx.types().T_size, arg1, (jl_value_t*)jl_voidpointer_type);
     }
     else {
         out.gcroot = ptr;
@@ -629,16 +637,12 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
                 std::string iname("i");
                 iname += f_name;
                 if (jl_dlsym(jl_libjulia_internal_handle, iname.c_str(), &symaddr, 0)) {
-#ifdef _OS_WINDOWS_
                     f_lib = JL_LIBJULIA_INTERNAL_DL_LIBNAME;
-#endif
                     f_name = jl_symbol_name(jl_symbol(iname.c_str()));
                 }
-#ifdef _OS_WINDOWS_
                 else {
-                    f_lib = jl_dlfind_win32(f_name);
+                    f_lib = jl_dlfind(f_name);
                 }
-#endif
             }
         }
         else if (jl_is_cpointer_type(jl_typeof(ptr))) {
@@ -650,19 +654,15 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
                 f_name = jl_symbol_name((jl_sym_t*)t0);
             else if (jl_is_string(t0))
                 f_name = jl_string_data(t0);
-            else
-                JL_TYPECHKS(fname, symbol, t0);
 
             jl_value_t *t1 = jl_fieldref(ptr, 1);
             if (jl_is_symbol(t1))
                 f_lib = jl_symbol_name((jl_sym_t*)t1);
             else if (jl_is_string(t1))
                 f_lib = jl_string_data(t1);
-            else
-                JL_TYPECHKS(fname, symbol, t1);
-        }
-        else {
-            JL_TYPECHKS(fname, pointer, ptr);
+            else {
+                out.lib_expr = t1;
+            }
         }
     }
 }
@@ -696,42 +696,35 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
     else {
         rt = (jl_value_t*)jl_voidpointer_type;
     }
-    Type *lrt = getSizeTy(ctx.builder.getContext());
+    Type *lrt = ctx.types().T_size;
     assert(lrt == julia_type_to_llvm(ctx, rt));
 
-    interpret_symbol_arg(ctx, sym, args[1], "cglobal", false);
+    interpret_symbol_arg(ctx, sym, args[1], /*ccall=*/false, false);
+
+    if (sym.f_name == NULL && sym.fptr == NULL && sym.jl_ptr == NULL && sym.gcroot != NULL) {
+        const char *errmsg = invalid_symbol_err_msg(/*ccall=*/false);
+        jl_cgval_t arg1 = emit_expr(ctx, args[1]);
+        emit_type_error(ctx, arg1, literal_pointer_val(ctx, (jl_value_t *)jl_pointer_type), errmsg);
+        JL_GC_POP();
+        return jl_cgval_t();
+    }
 
     if (sym.jl_ptr != NULL) {
         res = ctx.builder.CreateBitCast(sym.jl_ptr, lrt);
     }
     else if (sym.fptr != NULL) {
         res = ConstantInt::get(lrt, (uint64_t)sym.fptr);
-        if (ctx.emission_context.imaging)
+        if (ctx.emission_context.imaging_mode)
             jl_printf(JL_STDERR,"WARNING: literal address used in cglobal for %s; code cannot be statically compiled\n", sym.f_name);
     }
     else {
         if (sym.lib_expr) {
             res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), NULL, sym.lib_expr, sym.f_name, ctx.f);
         }
-        else if (ctx.emission_context.imaging) {
+        else /*if (ctx.emission_context.imaging) */{
             res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), sym.f_lib, NULL, sym.f_name, ctx.f);
             res = ctx.builder.CreatePtrToInt(res, lrt);
         }
-        else {
-            void *symaddr;
-
-            void* libsym = jl_get_library_(sym.f_lib, 0);
-            if (!libsym || !jl_dlsym(libsym, sym.f_name, &symaddr, 0)) {
-                // Error mode, either the library or the symbol couldn't be find during compiletime.
-                // Fallback to a runtime symbol lookup.
-                res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), sym.f_lib, NULL, sym.f_name, ctx.f);
-                res = ctx.builder.CreatePtrToInt(res, lrt);
-            } else {
-                // since we aren't saving this code, there's no sense in
-                // putting anything complicated here: just JIT the address of the cglobal
-                res = ConstantInt::get(lrt, (uint64_t)symaddr);
-            }
-        }
     }
 
     JL_GC_POP();
@@ -756,14 +749,15 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     jl_value_t *ir_arg = args[1];
     JL_GC_PUSH4(&ir, &rt, &at, &entry);
     if (jl_is_ssavalue(ir_arg))
-        ir_arg = jl_arrayref((jl_array_t*)ctx.source->code, ((jl_ssavalue_t*)ir_arg)->id - 1);
+        ir_arg = jl_array_ptr_ref((jl_array_t*)ctx.source->code, ((jl_ssavalue_t*)ir_arg)->id - 1);
     ir = static_eval(ctx, ir_arg);
     if (!ir) {
         emit_error(ctx, "error statically evaluating llvm IR argument");
+        JL_GC_POP();
         return jl_cgval_t();
     }
     if (jl_is_ssavalue(args[2]) && !jl_is_long(ctx.source->ssavaluetypes)) {
-        jl_value_t *rtt = jl_arrayref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[2])->id - 1);
+        jl_value_t *rtt = jl_array_ptr_ref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[2])->id - 1);
         if (jl_is_type_type(rtt))
             rt = jl_tparam0(rtt);
     }
@@ -771,11 +765,12 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         rt = static_eval(ctx, args[2]);
         if (!rt) {
             emit_error(ctx, "error statically evaluating llvmcall return type");
+            JL_GC_POP();
             return jl_cgval_t();
         }
     }
     if (jl_is_ssavalue(args[3]) && !jl_is_long(ctx.source->ssavaluetypes)) {
-        jl_value_t *att = jl_arrayref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[3])->id - 1);
+        jl_value_t *att = jl_array_ptr_ref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[3])->id - 1);
         if (jl_is_type_type(att))
             at = jl_tparam0(att);
     }
@@ -783,6 +778,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         at = static_eval(ctx, args[3]);
         if (!at) {
             emit_error(ctx, "error statically evaluating llvmcall argument tuple");
+            JL_GC_POP();
             return jl_cgval_t();
         }
     }
@@ -790,23 +786,27 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         // if the IR is a tuple, we expect (mod, fn)
         if (jl_nfields(ir) != 2) {
             emit_error(ctx, "Tuple as first argument to llvmcall must have exactly two children");
+            JL_GC_POP();
             return jl_cgval_t();
         }
         entry = jl_fieldref(ir, 1);
         if (!jl_is_string(entry)) {
             emit_error(ctx, "Function name passed to llvmcall must be a string");
+            JL_GC_POP();
             return jl_cgval_t();
         }
         ir = jl_fieldref(ir, 0);
 
-        if (!jl_is_string(ir) && !jl_typeis(ir, jl_array_uint8_type)) {
+        if (!jl_is_string(ir) && !jl_typetagis(ir, jl_array_uint8_type)) {
             emit_error(ctx, "Module IR passed to llvmcall must be a string or an array of bytes");
+            JL_GC_POP();
             return jl_cgval_t();
         }
     }
     else {
         if (!jl_is_string(ir)) {
             emit_error(ctx, "Function IR passed to llvmcall must be a string");
+            JL_GC_POP();
             return jl_cgval_t();
         }
     }
@@ -826,8 +826,8 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
      * If the argument type is immutable (including bitstype), we pass the loaded llvm value
      * type. Otherwise we pass a pointer to a jl_value_t.
      */
-    std::vector<llvm::Type*> argtypes;
-    Value **argvals = (Value**)alloca(nargt * sizeof(Value*));
+    SmallVector<llvm::Type*, 0> argtypes;
+    SmallVector<Value *, 8> argvals(nargt);
     for (size_t i = 0; i < nargt; ++i) {
         jl_value_t *tti = jl_svecref(tt,i);
         bool toboxed;
@@ -835,6 +835,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         argtypes.push_back(t);
         if (4 + i > nargs) {
             emit_error(ctx, "Missing arguments to llvmcall!");
+            JL_GC_POP();
             return jl_cgval_t();
         }
         jl_value_t *argi = args[4 + i];
@@ -851,7 +852,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     // Make sure to find a unique name
     std::string ir_name;
     while (true) {
-        raw_string_ostream(ir_name) << (ctx.f->getName().str()) << "u" << globalUniqueGeneratedNames++;
+        raw_string_ostream(ir_name) << (ctx.f->getName().str()) << "u" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
         if (jl_Module->getFunction(ir_name) == NULL)
             break;
     }
@@ -862,7 +863,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         // we only have function IR, which we should put in a function
 
         bool first = true;
-        for (std::vector<Type *>::iterator it = argtypes.begin(); it != argtypes.end(); ++it) {
+        for (SmallVector<Type *, 0>::iterator it = argtypes.begin(); it != argtypes.end(); ++it) {
             if (!first)
                 argstream << ",";
             else
@@ -889,6 +890,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
             raw_string_ostream stream(message);
             Err.print("", stream, true);
             emit_error(ctx, stream.str());
+            JL_GC_POP();
             return jl_cgval_t();
         }
 
@@ -906,12 +908,13 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
                 raw_string_ostream stream(message);
                 Err.print("", stream, true);
                 emit_error(ctx, stream.str());
+                JL_GC_POP();
                 return jl_cgval_t();
             }
         }
         else {
             auto Buf = MemoryBuffer::getMemBuffer(
-                StringRef((char *)jl_array_data(ir), jl_array_len(ir)), "llvmcall",
+                StringRef(jl_array_data(ir, char), jl_array_nrows(ir)), "llvmcall",
                 /*RequiresNullTerminator*/ false);
             Expected<std::unique_ptr<Module>> ModuleOrErr =
                 parseBitcodeFile(*Buf, ctx.builder.getContext());
@@ -923,6 +926,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
                 raw_string_ostream stream(message);
                 stream << Message;
                 emit_error(ctx, stream.str());
+                JL_GC_POP();
                 return jl_cgval_t();
             }
             Mod = std::move(ModuleOrErr.get());
@@ -931,6 +935,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         Function *f = Mod->getFunction(jl_string_data(entry));
         if (!f) {
             emit_error(ctx, "Module IR does not contain specified entry function");
+            JL_GC_POP();
             return jl_cgval_t();
         }
         f->setName(ir_name);
@@ -939,7 +944,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         assert(!f->isDeclaration());
         assert(f->getReturnType() == rettype);
         int i = 0;
-        for (std::vector<Type *>::iterator it = argtypes.begin();
+        for (SmallVector<Type *, 0>::iterator it = argtypes.begin();
             it != argtypes.end(); ++it, ++i)
             assert(*it == f->getFunctionType()->getParamType(i));
     }
@@ -947,10 +952,8 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     // copy module properties that should always match
     Mod->setTargetTriple(jl_Module->getTargetTriple());
     Mod->setDataLayout(jl_Module->getDataLayout());
-#if JL_LLVM_VERSION >= 130000
     Mod->setStackProtectorGuard(jl_Module->getStackProtectorGuard());
     Mod->setOverrideStackAlignment(jl_Module->getOverrideStackAlignment());
-#endif
 
     // verify the definition
     Function *def = Mod->getFunction(ir_name);
@@ -959,6 +962,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     raw_string_ostream stream(message);
     if (verifyFunction(*def, &stream)) {
         emit_error(ctx, stream.str());
+        JL_GC_POP();
         return jl_cgval_t();
     }
     def->setLinkage(GlobalVariable::LinkOnceODRLinkage);
@@ -968,12 +972,12 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     Function *decl = Function::Create(decl_typ, def->getLinkage(), def->getAddressSpace(),
                                       def->getName(), jl_Module);
     decl->setAttributes(def->getAttributes());
-    CallInst *inst = ctx.builder.CreateCall(decl, ArrayRef<Value *>(&argvals[0], nargt));
+    CallInst *inst = ctx.builder.CreateCall(decl, argvals);
 
     // save the module to be linked later.
     // we cannot do this right now, because linking mutates the destination module,
     // which might invalidate LLVM values cached in cgval_t's (specifically constant arrays)
-    ctx.llvmcall_modules.push_back(orc::ThreadSafeModule(std::move(Mod), ctx.emission_context.tsctx));
+    ctx.llvmcall_modules.push_back(std::move(Mod));
 
     JL_GC_POP();
 
@@ -997,8 +1001,10 @@ static Value *box_ccall_result(jl_codectx_t &ctx, Value *result, Value *runtime_
     // XXX: need to handle parameterized zero-byte types (singleton)
     const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
     unsigned nb = DL.getTypeStoreSize(result->getType());
+    unsigned align = sizeof(void*); // Allocations are at least pointer aligned
     MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
-    Value *strct = emit_allocobj(ctx, nb, runtime_dt);
+    Value *strct = emit_allocobj(ctx, nb, runtime_dt, true, align);
+    setName(ctx.emission_context, strct, "ccall_result_box");
     init_bits_value(ctx, strct, result, tbaa);
     return strct;
 }
@@ -1018,10 +1024,10 @@ static jl_cgval_t mark_or_box_ccall_result(jl_codectx_t &ctx, Value *result, boo
 
 class function_sig_t {
 public:
-    std::vector<Type*> fargt; // vector of llvm output types (julia_struct_to_llvm) for arguments
-    std::vector<Type*> fargt_sig; // vector of ABI coercion types for call signature
-    std::vector<bool> fargt_isboxed; // vector of whether the llvm output type is a Julia-box for each argument
-    std::vector<bool> byRefList; // vector of "byref" parameters
+    SmallVector<Type*, 0> fargt; // vector of llvm output types (julia_struct_to_llvm) for arguments
+    SmallVector<Type*, 0> fargt_sig; // vector of ABI coercion types for call signature
+    SmallVector<bool, 0> fargt_isboxed; // vector of whether the llvm output type is a Julia-box for each argument
+    SmallVector<bool, 0> byRefList; // vector of "byref" parameters
     AttributeList attributes; // vector of function call site attributes
     Type *lrt; // input parameter of the llvm return type (from julia_struct_to_llvm)
     bool retboxed; // input parameter indicating whether lrt is jl_value_t*
@@ -1050,7 +1056,7 @@ class function_sig_t {
     FunctionType *functype(LLVMContext &ctxt) const {
         assert(err_msg.empty());
         if (nreqargs > 0)
-            return FunctionType::get(sret ? getVoidTy(ctxt) : prt, makeArrayRef(fargt_sig).slice(0, nreqargs), true);
+            return FunctionType::get(sret ? getVoidTy(ctxt) : prt, ArrayRef<Type*>(fargt_sig).slice(0, nreqargs), true);
         else
             return FunctionType::get(sret ? getVoidTy(ctxt) : prt, fargt_sig, false);
     }
@@ -1067,7 +1073,7 @@ std::string generate_func_sig(const char *fname)
 {
     assert(rt && !jl_is_abstract_ref_type(rt));
 
-    std::vector<AttributeSet> paramattrs;
+    SmallVector<AttributeSet, 0> paramattrs;
     std::unique_ptr<AbiLayout> abi;
     if (llvmcall)
         abi.reset(new ABI_LLVMLayout());
@@ -1085,14 +1091,11 @@ std::string generate_func_sig(const char *fname)
             abi->use_sret(jl_voidpointer_type, LLVMCtx);
         }
         else if (abi->use_sret((jl_datatype_t*)rt, LLVMCtx)) {
-#if JL_LLVM_VERSION >= 140000
             AttrBuilder retattrs(LLVMCtx);
-#else
-            AttrBuilder retattrs;
-#endif
-#if !defined(_OS_WINDOWS_) // llvm used to use the old mingw ABI, skipping this marking works around that difference
-            retattrs.addStructRetAttr(lrt);
-#endif
+            if (!ctx->TargetTriple.isOSWindows()) {
+                // llvm used to use the old mingw ABI, skipping this marking works around that difference
+                retattrs.addStructRetAttr(lrt);
+            }
             retattrs.addAttribute(Attribute::NoAlias);
             paramattrs.push_back(AttributeSet::get(LLVMCtx, retattrs));
             fargt_sig.push_back(PointerType::get(lrt, 0));
@@ -1107,11 +1110,7 @@ std::string generate_func_sig(const char *fname)
     }
 
     for (size_t i = 0; i < nccallargs; ++i) {
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder ab(LLVMCtx);
-#else
-        AttrBuilder ab;
-#endif
         jl_value_t *tti = jl_svecref(at, i);
         Type *t = NULL;
         bool isboxed;
@@ -1126,26 +1125,26 @@ std::string generate_func_sig(const char *fname)
             isboxed = false;
         }
         else {
-            if (jl_is_primitivetype(tti)) {
+            t = _julia_struct_to_llvm(ctx, LLVMCtx, tti, &isboxed, llvmcall);
+            if (t == getVoidTy(LLVMCtx)) {
+                return make_errmsg(fname, i + 1, " type doesn't correspond to a C type");
+            }
+            if (jl_is_primitivetype(tti) && t->isIntegerTy()) {
                 // see pull req #978. need to annotate signext/zeroext for
                 // small integer arguments.
                 jl_datatype_t *bt = (jl_datatype_t*)tti;
-                if (jl_datatype_size(bt) < 4 && bt != jl_float16_type) {
+                if (jl_datatype_size(bt) < 4) {
                     if (jl_signed_type && jl_subtype(tti, (jl_value_t*)jl_signed_type))
                         ab.addAttribute(Attribute::SExt);
                     else
                         ab.addAttribute(Attribute::ZExt);
                 }
             }
-
-            t = _julia_struct_to_llvm(ctx, LLVMCtx, tti, &isboxed, llvmcall);
-            if (t == getVoidTy(LLVMCtx)) {
-                return make_errmsg(fname, i + 1, " type doesn't correspond to a C type");
-            }
         }
 
         Type *pat;
-        if (!jl_is_datatype(tti) || ((jl_datatype_t*)tti)->layout == NULL || jl_is_layout_opaque(((jl_datatype_t*)tti)->layout)) {
+        // n.b. `Array` used as argument type just passes a julia object reference
+        if (!jl_is_datatype(tti) || ((jl_datatype_t*)tti)->layout == NULL || jl_is_array_type(tti) || jl_is_layout_opaque(((jl_datatype_t*)tti)->layout)) {
             tti = (jl_value_t*)jl_voidpointer_type; // passed as pointer
         }
 
@@ -1250,6 +1249,7 @@ static bool verify_ref_type(jl_codectx_t &ctx, jl_value_t* ref, jl_unionall_t *u
                         Value *notany = ctx.builder.CreateICmpNE(
                                 boxed(ctx, runtime_sp),
                                 track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_any_type)));
+                        setName(ctx.emission_context, notany, "any_type.not");
                         error_unless(ctx, notany, make_errmsg(fname, n, rt_err_msg_notany));
                         always_error = false;
                     }
@@ -1275,7 +1275,7 @@ static const std::string verify_ccall_sig(jl_value_t *&rt, jl_value_t *at,
     JL_TYPECHK(ccall, type, rt);
     JL_TYPECHK(ccall, simplevector, at);
 
-    if (rt == (jl_value_t*)jl_any_type || jl_is_array_type(rt) ||
+    if (rt == (jl_value_t*)jl_any_type || jl_is_array_type(rt) || jl_is_genericmemory_type(rt) ||
             (jl_is_datatype(rt) && ((jl_datatype_t*)rt)->layout != NULL &&
              jl_is_layout_opaque(((jl_datatype_t*)rt)->layout))) {
         // n.b. `Array` used as return type just returns a julia object reference
@@ -1335,50 +1335,57 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     bool llvmcall = false;
     std::tie(cc, llvmcall) = convert_cconv(cc_sym);
 
-    interpret_symbol_arg(ctx, symarg, args[1], "ccall", llvmcall);
+    interpret_symbol_arg(ctx, symarg, args[1], /*ccall=*/true, llvmcall);
     Value *&jl_ptr = symarg.jl_ptr;
     void (*&fptr)(void) = symarg.fptr;
     const char *&f_name = symarg.f_name;
     const char *&f_lib = symarg.f_lib;
 
     if (f_name == NULL && fptr == NULL && jl_ptr == NULL) {
-        emit_error(ctx, "ccall: null function pointer");
+        if (symarg.gcroot != NULL) { // static_eval(ctx, args[1]) could not be interpreted to a function pointer
+            const char *errmsg = invalid_symbol_err_msg(/*ccall=*/true);
+            jl_cgval_t arg1 = emit_expr(ctx, args[1]);
+            emit_type_error(ctx, arg1, literal_pointer_val(ctx, (jl_value_t *)jl_pointer_type), errmsg);
+        } else {
+            emit_error(ctx, "ccall: null function pointer");
+        }
         JL_GC_POP();
         return jl_cgval_t();
     }
 
-    auto ccallarg = [=] (size_t i) {
-        assert(i < nccallargs && i + fc_args_start <= nargs);
-        return args[fc_args_start + i];
-    };
-
     auto _is_libjulia_func = [&] (uintptr_t ptr, StringRef name) {
         if ((uintptr_t)fptr == ptr)
             return true;
         if (f_lib) {
-#ifdef _OS_WINDOWS_
             if ((f_lib == JL_EXE_LIBNAME) || // preventing invalid pointer access
                 (f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME) ||
-                (f_lib == JL_LIBJULIA_DL_LIBNAME) ||
-                (!strcmp(f_lib, jl_crtdll_basename))) {
+                (f_lib == JL_LIBJULIA_DL_LIBNAME)) {
+                // libjulia-like
+            }
+            else
+#ifdef _OS_WINDOWS_
+            if (strcmp(f_lib, jl_crtdll_basename) == 0) {
                 // libjulia-like
             }
             else
-                return false;
-#else
-            return false;
 #endif
+            return false;
         }
         return f_name && f_name == name;
     };
 #define is_libjulia_func(name) _is_libjulia_func((uintptr_t)&(name), StringRef(XSTR(name)))
 
     // emit arguments
-    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nccallargs);
+    SmallVector<jl_cgval_t, 4> argv(nccallargs);
     for (size_t i = 0; i < nccallargs; i++) {
         // Julia (expression) value of current parameter
-        jl_value_t *argi = ccallarg(i);
+        assert(i < nccallargs && i + fc_args_start <= nargs);
+        jl_value_t *argi = args[fc_args_start + i];
         argv[i] = emit_expr(ctx, argi);
+        if (argv[i].typ == jl_bottom_type) {
+            JL_GC_POP();
+            return jl_cgval_t();
+        }
     }
 
     // emit roots
@@ -1389,7 +1396,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         if (jl_is_long(argi_root))
             continue;
         jl_cgval_t arg_root = emit_expr(ctx, argi_root);
-        Value *gc_root = get_gc_root_for(arg_root);
+        Value *gc_root = get_gc_root_for(ctx, arg_root);
         if (gc_root)
             gc_uses.push_back(gc_root);
     }
@@ -1438,7 +1445,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         return jl_cgval_t();
     }
     if (rt != args[2] && rt != (jl_value_t*)jl_any_type)
-        jl_add_method_root(ctx, rt);
+        rt = jl_ensure_rooted(ctx, rt);
     function_sig_t sig("ccall", lrt, rt, retboxed,
                        (jl_svec_t*)at, unionall, nreqargs,
                        cc, llvmcall, &ctx.emission_context);
@@ -1455,25 +1462,16 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     // some special functions
     bool isVa = nreqargs > 0;
     (void)isVa; // prevent compiler warning
-    if (is_libjulia_func(jl_array_ptr)) {
-        ++CCALL_STAT(jl_array_ptr);
-        assert(lrt == getSizeTy(ctx.builder.getContext()));
-        assert(!isVa && !llvmcall && nccallargs == 1);
-        const jl_cgval_t &ary = argv[0];
-        JL_GC_POP();
-        return mark_or_box_ccall_result(ctx, ctx.builder.CreatePtrToInt(emit_unsafe_arrayptr(ctx, ary), lrt),
-                                        retboxed, rt, unionall, static_rt);
-    }
-    else if (is_libjulia_func(jl_value_ptr)) {
+    if (is_libjulia_func(jl_value_ptr)) {
         ++CCALL_STAT(jl_value_ptr);
-        assert(retboxed ? lrt == ctx.types().T_prjlvalue : lrt == getSizeTy(ctx.builder.getContext()));
+        assert(retboxed ? lrt == ctx.types().T_prjlvalue : lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 1);
         jl_value_t *tti = jl_svecref(at, 0);
         Type *largty;
         bool isboxed;
         if (jl_is_abstract_ref_type(tti)) {
             tti = (jl_value_t*)jl_voidpointer_type;
-            largty = getSizeTy(ctx.builder.getContext());
+            largty = ctx.types().T_size;
             isboxed = false;
         }
         else {
@@ -1495,28 +1493,39 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, retval, retboxed, rt, unionall, static_rt);
     }
-    else if (is_libjulia_func(jl_cpu_pause)) {
+    else if (is_libjulia_func(jl_cpu_pause)||is_libjulia_func(jl_cpu_suspend)) {
         ++CCALL_STAT(jl_cpu_pause);
         // Keep in sync with the julia_threads.h version
         assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
 #ifdef __MIC__
-        // TODO
-#elif defined(_CPU_X86_64_) || defined(_CPU_X86_)  /* !__MIC__ */
-        auto pauseinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "pause",
-                                               "~{memory}", true);
-        ctx.builder.CreateCall(pauseinst);
-        JL_GC_POP();
-        return ghostValue(ctx, jl_nothing_type);
-#elif defined(_CPU_AARCH64_) || (defined(_CPU_ARM_) && __ARM_ARCH >= 7)
-        auto wfeinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "wfe",
-                                             "~{memory}", true);
-        ctx.builder.CreateCall(wfeinst);
-        JL_GC_POP();
-        return ghostValue(ctx, jl_nothing_type);
+    //TODO
 #else
-        JL_GC_POP();
-        return ghostValue(ctx, jl_nothing_type);
+        if (ctx.emission_context.TargetTriple.isX86()) {
+            auto pauseinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "pause",
+                                                "~{memory}", true);
+            ctx.builder.CreateCall(pauseinst);
+            JL_GC_POP();
+            return ghostValue(ctx, jl_nothing_type);
+        } else if (ctx.emission_context.TargetTriple.isAArch64()
+                    || (ctx.emission_context.TargetTriple.isARM()
+                        && ctx.emission_context.TargetTriple.getSubArch() != Triple::SubArchType::NoSubArch
+                        // ARMv7 and above is < armv6
+                        && ctx.emission_context.TargetTriple.getSubArch() < Triple::SubArchType::ARMSubArch_v6)) {
+            InlineAsm* wait_inst;
+            if (is_libjulia_func(jl_cpu_pause))
+                wait_inst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "isb",
+                                                "~{memory}", true);
+            else
+                wait_inst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "wfe",
+                                                "~{memory}", true);
+            ctx.builder.CreateCall(wait_inst);
+            JL_GC_POP();
+            return ghostValue(ctx, jl_nothing_type);
+        } else {
+            JL_GC_POP();
+            return ghostValue(ctx, jl_nothing_type);
+        }
 #endif
     }
     else if (is_libjulia_func(jl_cpu_wake)) {
@@ -1527,25 +1536,31 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
 #if JL_CPU_WAKE_NOOP == 1
         JL_GC_POP();
         return ghostValue(ctx, jl_nothing_type);
-#elif defined(_CPU_AARCH64_) || (defined(_CPU_ARM_) && __ARM_ARCH >= 7)
-        auto sevinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "sev",
-                                             "~{memory}", true);
-        ctx.builder.CreateCall(sevinst);
-        JL_GC_POP();
-        return ghostValue(ctx, jl_nothing_type);
 #endif
+        if (ctx.emission_context.TargetTriple.isAArch64()
+            || (ctx.emission_context.TargetTriple.isARM()
+                && ctx.emission_context.TargetTriple.getSubArch() != Triple::SubArchType::NoSubArch
+                // ARMv7 and above is < armv6
+                && ctx.emission_context.TargetTriple.getSubArch() < Triple::SubArchType::ARMSubArch_v6)) {
+            auto sevinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "sev",
+                                                "~{memory}", true);
+            ctx.builder.CreateCall(sevinst);
+            JL_GC_POP();
+            return ghostValue(ctx, jl_nothing_type);
+        }
     }
     else if (is_libjulia_func(jl_gc_safepoint)) {
         ++CCALL_STAT(jl_gc_safepoint);
         assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        emit_gc_safepoint(ctx);
+        ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
+        emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
         return ghostValue(ctx, jl_nothing_type);
     }
     else if (is_libjulia_func("jl_get_ptls_states")) {
         ++CCALL_STAT(jl_get_ptls_states);
-        assert(lrt == getSizeTy(ctx.builder.getContext()));
+        assert(lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx,
@@ -1559,9 +1574,12 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         JL_GC_POP();
         Value *ptask_i16 = emit_bitcast(ctx, get_current_task(ctx), getInt16PtrTy(ctx.builder.getContext()));
         const int tid_offset = offsetof(jl_task_t, tid);
-        Value *ptid = ctx.builder.CreateInBoundsGEP(getInt16Ty(ctx.builder.getContext()), ptask_i16, ConstantInt::get(getSizeTy(ctx.builder.getContext()), tid_offset / sizeof(int16_t)));
+        Value *ptid = ctx.builder.CreateInBoundsGEP(getInt16Ty(ctx.builder.getContext()), ptask_i16, ConstantInt::get(ctx.types().T_size, tid_offset / sizeof(int16_t)));
+        setName(ctx.emission_context, ptid, "thread_id_ptr");
         LoadInst *tid = ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), ptid, Align(sizeof(int16_t)));
-        tbaa_decorate(ctx.tbaa().tbaa_gcframe, tid);
+        setName(ctx.emission_context, tid, "thread_id");
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        ai.decorateInst(tid);
         return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_gc_disable_finalizers_internal)
@@ -1572,16 +1590,20 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         JL_GC_POP();
         Value *ptls_i32 = emit_bitcast(ctx, get_current_ptls(ctx), getInt32PtrTy(ctx.builder.getContext()));
         const int finh_offset = offsetof(jl_tls_states_t, finalizers_inhibited);
-        Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), ptls_i32, ConstantInt::get(getSizeTy(ctx.builder.getContext()), finh_offset / 4));
+        Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), ptls_i32, ConstantInt::get(ctx.types().T_size, finh_offset / 4));
+        setName(ctx.emission_context, pfinh, "finalizers_inhibited_ptr");
         LoadInst *finh = ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), pfinh, Align(sizeof(int32_t)));
+        setName(ctx.emission_context, finh, "finalizers_inhibited");
         Value *newval;
         if (is_libjulia_func(jl_gc_disable_finalizers_internal)) {
             newval = ctx.builder.CreateAdd(finh, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1));
+            setName(ctx.emission_context, newval, "finalizers_inhibited_inc");
         }
         else {
             newval = ctx.builder.CreateSelect(ctx.builder.CreateICmpEQ(finh, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)),
                                               ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0),
                                               ctx.builder.CreateSub(finh, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1)));
+            setName(ctx.emission_context, newval, "finalizers_inhibited_dec");
         }
         ctx.builder.CreateStore(newval, pfinh);
         return ghostValue(ctx, jl_nothing_type);
@@ -1601,7 +1623,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         JL_GC_POP();
         Value *ptls_pv = emit_bitcast(ctx, get_current_ptls(ctx), ctx.types().T_ppjlvalue);
         const int nt_offset = offsetof(jl_tls_states_t, next_task);
-        Value *pnt = ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, ptls_pv, ConstantInt::get(getSizeTy(ctx.builder.getContext()), nt_offset / sizeof(void*)));
+        Value *pnt = ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, ptls_pv, ConstantInt::get(ctx.types().T_size, nt_offset / sizeof(void*)));
+        setName(ctx.emission_context, pnt, "next_task_ptr");
         ctx.builder.CreateStore(emit_pointer_from_objref(ctx, boxed(ctx, argv[0])), pnt);
         return ghostValue(ctx, jl_nothing_type);
     }
@@ -1612,8 +1635,11 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         JL_GC_POP();
         ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
         Value *pdefer_sig = emit_defer_signal(ctx);
+        setName(ctx.emission_context, pdefer_sig, "defer_signal_ptr");
         Value *defer_sig = ctx.builder.CreateLoad(ctx.types().T_sigatomic, pdefer_sig);
+        setName(ctx.emission_context, defer_sig, "defer_signal");
         defer_sig = ctx.builder.CreateAdd(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 1));
+        setName(ctx.emission_context, defer_sig, "defer_signal_inc");
         ctx.builder.CreateStore(defer_sig, pdefer_sig);
         emit_signal_fence(ctx);
         return ghostValue(ctx, jl_nothing_type);
@@ -1625,7 +1651,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         JL_GC_POP();
         ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
         Value *pdefer_sig = emit_defer_signal(ctx);
+        setName(ctx.emission_context, pdefer_sig, "defer_signal_ptr");
         Value *defer_sig = ctx.builder.CreateLoad(ctx.types().T_sigatomic, pdefer_sig);
+        setName(ctx.emission_context, defer_sig, "defer_signal");
         emit_signal_fence(ctx);
         error_unless(ctx,
                 ctx.builder.CreateICmpNE(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 0)),
@@ -1633,144 +1661,45 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         defer_sig = ctx.builder.CreateSub(
                 defer_sig,
                 ConstantInt::get(ctx.types().T_sigatomic, 1));
+        setName(ctx.emission_context, defer_sig, "defer_signal_dec");
         ctx.builder.CreateStore(defer_sig, pdefer_sig);
         BasicBlock *checkBB = BasicBlock::Create(ctx.builder.getContext(), "check",
                                                  ctx.f);
         BasicBlock *contBB = BasicBlock::Create(ctx.builder.getContext(), "cont");
+        auto not_deferred = ctx.builder.CreateICmpEQ(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 0));
+        setName(ctx.emission_context, not_deferred, "deferred.not");
         ctx.builder.CreateCondBr(
-                ctx.builder.CreateICmpEQ(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 0)),
+                not_deferred,
                 checkBB, contBB);
         ctx.builder.SetInsertPoint(checkBB);
-        ctx.builder.CreateLoad(
-                getSizeTy(ctx.builder.getContext()),
-                ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()), get_current_signal_page(ctx), -1),
+        auto signal_page_load = ctx.builder.CreateLoad(
+                ctx.types().T_size,
+                ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size,
+                    get_current_signal_page_from_ptls(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const), -1),
                 true);
+        setName(ctx.emission_context, signal_page_load, "signal_page_load");
         ctx.builder.CreateBr(contBB);
-        ctx.f->getBasicBlockList().push_back(contBB);
+        contBB->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(contBB);
         return ghostValue(ctx, jl_nothing_type);
     }
-    else if (is_libjulia_func(jl_svec_len)) {
-        ++CCALL_STAT(jl_svec_len);
-        assert(!isVa && !llvmcall && nccallargs == 1);
-        const jl_cgval_t &svecv = argv[0];
-        Value *len;
-        if (svecv.constant && svecv.typ == (jl_value_t*)jl_simplevector_type) {
-            // Check the type as well before we call
-            len = ConstantInt::get(getSizeTy(ctx.builder.getContext()), jl_svec_len(svecv.constant));
-        }
-        else {
-            auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), getSizePtrTy(ctx.builder.getContext()));
-            len = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), ptr, Align(sizeof(size_t)));
-            // Only mark with TBAA if we are sure about the type.
-            // This could otherwise be in a dead branch
-            if (svecv.typ == (jl_value_t*)jl_simplevector_type)
-                tbaa_decorate(ctx.tbaa().tbaa_const, cast<Instruction>(len));
-            MDBuilder MDB(ctx.builder.getContext());
-            auto rng = MDB.createRange(
-                Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), INTPTR_MAX / sizeof(void*) - 1));
-            cast<LoadInst>(len)->setMetadata(LLVMContext::MD_range, rng);
-        }
-        JL_GC_POP();
-        return mark_or_box_ccall_result(ctx, len, retboxed, rt, unionall, static_rt);
-    }
-    else if (is_libjulia_func(jl_svec_isassigned) &&
-             argv[1].typ == (jl_value_t*)jl_long_type) {
-        ++CCALL_STAT(jl_svec_isassigned);
-        assert(!isVa && !llvmcall && nccallargs == 2);
-        const jl_cgval_t &svecv = argv[0];
-        const jl_cgval_t &idxv = argv[1];
-        Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), idxv, (jl_value_t*)jl_long_type);
-        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
-        auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_pprjlvalue);
-        Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue,
-                                                         decay_derived(ctx, ptr), idx);
-        LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr,
-                                                       Align(sizeof(void*)));
-        load->setAtomic(AtomicOrdering::Unordered);
-        // Only mark with TBAA if we are sure about the type.
-        // This could otherwise be in a dead branch
-        if (svecv.typ == (jl_value_t*)jl_simplevector_type)
-            tbaa_decorate(ctx.tbaa().tbaa_const, load);
-        Value *res = ctx.builder.CreateZExt(ctx.builder.CreateICmpNE(load, Constant::getNullValue(ctx.types().T_prjlvalue)), getInt8Ty(ctx.builder.getContext()));
-        JL_GC_POP();
-        return mark_or_box_ccall_result(ctx, res, retboxed, rt, unionall, static_rt);
-    }
-    else if (is_libjulia_func(jl_svec_ref) && argv[1].typ == (jl_value_t*)jl_long_type) {
-        ++CCALL_STAT(jl_svec_ref);
-        assert(lrt == ctx.types().T_prjlvalue);
-        assert(!isVa && !llvmcall && nccallargs == 2);
-        const jl_cgval_t &svecv = argv[0];
-        const jl_cgval_t &idxv = argv[1];
-        Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), idxv, (jl_value_t*)jl_long_type);
-        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
-        auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_pprjlvalue);
-        Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue,
-                                                         decay_derived(ctx, ptr), idx);
-        LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr,
-                                                       Align(sizeof(void*)));
-        load->setAtomic(AtomicOrdering::Unordered);
-        // Only mark with TBAA if we are sure about the type.
-        // This could otherwise be in a dead branch
-        if (svecv.typ == (jl_value_t*)jl_simplevector_type)
-            tbaa_decorate(ctx.tbaa().tbaa_const, load);
-        null_pointer_check(ctx, load);
-        JL_GC_POP();
-        return mark_or_box_ccall_result(ctx, load, retboxed, rt, unionall, static_rt);
-    }
-    else if (is_libjulia_func(jl_array_isassigned) &&
-             argv[1].typ == (jl_value_t*)jl_ulong_type) {
-        ++CCALL_STAT(jl_array_isassigned);
-        assert(!isVa && !llvmcall && nccallargs == 2);
-        jl_value_t *aryex = ccallarg(0);
-        const jl_cgval_t &aryv = argv[0];
-        const jl_cgval_t &idxv = argv[1];
-        jl_datatype_t *arydt = (jl_datatype_t*)jl_unwrap_unionall(aryv.typ);
-        if (jl_is_array_type(arydt)) {
-            jl_value_t *ety = jl_tparam0(arydt);
-            bool ptrarray = !jl_stored_inline(ety);
-            if (!ptrarray && !jl_type_hasptr(ety)) {
-                JL_GC_POP();
-                return mark_or_box_ccall_result(ctx, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1),
-                                                false, rt, unionall, static_rt);
-            }
-            else if (!jl_has_free_typevars(ety)) {
-                Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), idxv, (jl_value_t*)jl_ulong_type);
-                Value *arrayptr = emit_bitcast(ctx, emit_arrayptr(ctx, aryv, aryex), ctx.types().T_pprjlvalue);
-                if (!ptrarray) {
-                    size_t elsz = jl_datatype_size(ety);
-                    unsigned align = jl_datatype_align(ety);
-                    size_t stride = LLT_ALIGN(elsz, align) / sizeof(jl_value_t*);
-                    if (stride != 1)
-                        idx = ctx.builder.CreateMul(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), stride));
-                    idx = ctx.builder.CreateAdd(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), ((jl_datatype_t*)ety)->layout->first_ptr));
-                }
-                Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, arrayptr, idx);
-                LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr, Align(sizeof(void*)));
-                load->setAtomic(AtomicOrdering::Unordered);
-                tbaa_decorate(ctx.tbaa().tbaa_ptrarraybuf, load);
-                Value *res = ctx.builder.CreateZExt(ctx.builder.CreateICmpNE(load, Constant::getNullValue(ctx.types().T_prjlvalue)), getInt32Ty(ctx.builder.getContext()));
-                JL_GC_POP();
-                return mark_or_box_ccall_result(ctx, res, retboxed, rt, unionall, static_rt);
-            }
-        }
-    }
     else if (is_libjulia_func(jl_string_ptr)) {
         ++CCALL_STAT(jl_string_ptr);
-        assert(lrt == getSizeTy(ctx.builder.getContext()));
+        assert(lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 1);
         auto obj = emit_bitcast(ctx, emit_pointer_from_objref(ctx, boxed(ctx, argv[0])),
                                 ctx.types().T_pprjlvalue);
         // The inbounds gep makes it more clear to LLVM that the resulting value is not
         // a null pointer.
         auto strp = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, obj, 1);
-        strp = ctx.builder.CreatePtrToInt(strp, getSizeTy(ctx.builder.getContext()));
+        strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size);
+        setName(ctx.emission_context, strp, "string_ptr");
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_symbol_name)) {
         ++CCALL_STAT(jl_symbol_name);
-        assert(lrt == getSizeTy(ctx.builder.getContext()));
+        assert(lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 1);
         auto obj = emit_bitcast(ctx, emit_pointer_from_objref(ctx, boxed(ctx, argv[0])),
                                 ctx.types().T_pprjlvalue);
@@ -1778,25 +1707,61 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         // a null pointer.
         auto strp = ctx.builder.CreateConstInBoundsGEP1_32(
             ctx.types().T_prjlvalue, obj, (sizeof(jl_sym_t) + sizeof(void*) - 1) / sizeof(void*));
-        strp = ctx.builder.CreatePtrToInt(strp, getSizeTy(ctx.builder.getContext()));
+        strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size);
+        setName(ctx.emission_context, strp, "symbol_name");
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
+    else if (is_libjulia_func(jl_genericmemory_owner) || is_libjulia_func(ijl_genericmemory_owner)) {
+        ++CCALL_STAT(jl_genericmemory_owner);
+        assert(lrt == ctx.types().T_prjlvalue);
+        assert(!isVa && !llvmcall && nccallargs == 1);
+        Value *obj = emit_genericmemoryowner(ctx, boxed(ctx, argv[0]));
+        JL_GC_POP();
+        return mark_julia_type(ctx, obj, true, jl_any_type);
+    }
+    else if (is_libjulia_func(jl_alloc_genericmemory)) {
+        ++CCALL_STAT(jl_alloc_genericmemory);
+        assert(lrt == ctx.types().T_prjlvalue);
+        assert(!isVa && !llvmcall && nccallargs == 2);
+        const jl_cgval_t &typ = argv[0];
+        const jl_cgval_t &nel = argv[1];
+        auto arg_typename = [&] JL_NOTSAFEPOINT {
+            auto istyp = argv[0].constant;
+            std::string type_str;
+            if (istyp && jl_is_datatype(istyp) && jl_is_genericmemory_type(istyp)){
+                auto eltype = jl_tparam1(istyp);
+                if (jl_is_datatype(eltype))
+                    type_str = jl_symbol_name(((jl_datatype_t*)eltype)->name->name);
+                else if (jl_is_uniontype(eltype))
+                    type_str = "Union";
+                else
+                    type_str = "<unknown type>";
+            }
+            else
+                type_str = "<unknown type>";
+            return "Memory{" + type_str + "}[]";
+            };
+        auto alloc = ctx.builder.CreateCall(prepare_call(jl_allocgenericmemory), { boxed(ctx,typ), emit_unbox(ctx, ctx.types().T_size, nel, (jl_value_t*)jl_ulong_type)});
+        setName(ctx.emission_context, alloc, arg_typename);
+        JL_GC_POP();
+        return mark_julia_type(ctx, alloc, true, jl_any_type);
+    }
     else if (is_libjulia_func(memcpy) && (rt == (jl_value_t*)jl_nothing_type || jl_is_cpointer_type(rt))) {
         ++CCALL_STAT(memcpy);
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &src = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
 
         ctx.builder.CreateMemCpy(
                 emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
                 MaybeAlign(1),
                 emit_inttoptr(ctx,
-                    emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), src, (jl_value_t*)jl_voidpointer_type),
+                    emit_unbox(ctx, ctx.types().T_size, src, (jl_value_t*)jl_voidpointer_type),
                     getInt8PtrTy(ctx.builder.getContext())),
-                MaybeAlign(0),
-                emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), n, (jl_value_t*)jl_ulong_type),
+                MaybeAlign(1),
+                emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
                 false);
         JL_GC_POP();
         return rt == (jl_value_t*)jl_nothing_type ? ghostValue(ctx, jl_nothing_type) :
@@ -1807,13 +1772,13 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &val = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
         Value *val32 = emit_unbox(ctx, getInt32Ty(ctx.builder.getContext()), val, (jl_value_t*)jl_uint32_type);
         Value *val8 = ctx.builder.CreateTrunc(val32, getInt8Ty(ctx.builder.getContext()), "memset_val");
         ctx.builder.CreateMemSet(
             emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
             val8,
-            emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), n, (jl_value_t*)jl_ulong_type),
+            emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
             MaybeAlign(1)
         );
         JL_GC_POP();
@@ -1825,16 +1790,16 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &src = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, ctx.types().T_size, dst, (jl_value_t*)jl_voidpointer_type);
 
         ctx.builder.CreateMemMove(
                 emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
                 MaybeAlign(0),
                 emit_inttoptr(ctx,
-                    emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), src, (jl_value_t*)jl_voidpointer_type),
+                    emit_unbox(ctx, ctx.types().T_size, src, (jl_value_t*)jl_voidpointer_type),
                     getInt8PtrTy(ctx.builder.getContext())),
                 MaybeAlign(0),
-                emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), n, (jl_value_t*)jl_ulong_type),
+                emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
                 false);
         JL_GC_POP();
         return rt == (jl_value_t*)jl_nothing_type ? ghostValue(ctx, jl_nothing_type) :
@@ -1847,10 +1812,13 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         if (val.typ == (jl_value_t*)jl_symbol_type) {
             JL_GC_POP();
             const int hash_offset = offsetof(jl_sym_t, hash);
-            Value *ph1 = emit_bitcast(ctx, decay_derived(ctx, boxed(ctx, val)), getSizePtrTy(ctx.builder.getContext()));
-            Value *ph2 = ctx.builder.CreateInBoundsGEP(getSizeTy(ctx.builder.getContext()), ph1, ConstantInt::get(getSizeTy(ctx.builder.getContext()), hash_offset / sizeof(size_t)));
-            LoadInst *hashval = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), ph2, Align(sizeof(size_t)));
-            tbaa_decorate(ctx.tbaa().tbaa_const, hashval);
+            Value *ph1 = emit_bitcast(ctx, decay_derived(ctx, boxed(ctx, val)), ctx.types().T_size->getPointerTo());
+            Value *ph2 = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, ph1, ConstantInt::get(ctx.types().T_size, hash_offset / ctx.types().sizeof_ptr));
+            setName(ctx.emission_context, ph2, "object_id_ptr");
+            LoadInst *hashval = ctx.builder.CreateAlignedLoad(ctx.types().T_size, ph2, ctx.types().alignof_ptr);
+            setName(ctx.emission_context, hashval, "object_id");
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+            ai.decorateInst(hashval);
             return mark_or_box_ccall_result(ctx, hashval, retboxed, rt, unionall, static_rt);
         }
         else if (!val.isboxed) {
@@ -1860,13 +1828,14 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
             if (!val.isghost && !val.ispointer())
                 val = value_to_pointer(ctx, val);
             Value *args[] = {
-                emit_typeof_boxed(ctx, val),
+                emit_typeof(ctx, val, false, true),
                 val.isghost ? ConstantPointerNull::get(T_pint8_derived) :
                     ctx.builder.CreateBitCast(
                         decay_derived(ctx, data_pointer(ctx, val)),
                         T_pint8_derived)
             };
-            Value *ret = ctx.builder.CreateCall(prepare_call(jl_object_id__func), makeArrayRef(args));
+            Value *ret = ctx.builder.CreateCall(prepare_call(jl_object_id__func), ArrayRef<Value*>(args));
+            setName(ctx.emission_context, ret, "object_id");
             JL_GC_POP();
             return mark_or_box_ccall_result(ctx, ret, retboxed, rt, unionall, static_rt);
         }
@@ -1875,7 +1844,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     jl_cgval_t retval = sig.emit_a_ccall(
             ctx,
             symarg,
-            argv,
+            argv.data(),
             gc_uses,
             static_rt);
     JL_GC_POP();
@@ -1897,15 +1866,15 @@ jl_cgval_t function_sig_t::emit_a_ccall(
 
     FunctionType *functype = this->functype(ctx.builder.getContext());
 
-    Value **argvals = (Value**) alloca((nccallargs + sret) * sizeof(Value*));
+    SmallVector<Value *, 8> argvals(nccallargs + sret);
     for (size_t ai = 0; ai < nccallargs; ai++) {
         // Current C function parameter
         jl_cgval_t &arg = argv[ai];
         jl_value_t *jargty = jl_svecref(at, ai); // Julia type of the current parameter
-        Type *largty = fargt.at(ai); // LLVM type of the current parameter
-        bool toboxed = fargt_isboxed.at(ai);
-        Type *pargty = fargt_sig.at(ai + sret); // LLVM coercion type
-        bool byRef = byRefList.at(ai); // Argument attributes
+        Type *largty = fargt[ai]; // LLVM type of the current parameter
+        bool toboxed = fargt_isboxed[ai];
+        Type *pargty = fargt_sig[ai + sret]; // LLVM coercion type
+        bool byRef = byRefList[ai]; // Argument attributes
 
         // if we know the function sparams, try to fill those in now
         // so that the julia_to_native type checks are more likely to be doable (e.g. concrete types) at compile-time
@@ -1914,7 +1883,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 jl_svec_len(ctx.linfo->sparam_vals) > 0) {
             jargty_in_env = jl_instantiate_type_in_env(jargty_in_env, unionall_env, jl_svec_data(ctx.linfo->sparam_vals));
             if (jargty_in_env != jargty)
-                jl_add_method_root(ctx, jargty_in_env);
+                jargty_in_env = jl_ensure_rooted(ctx, jargty_in_env);
         }
 
         Value *v;
@@ -1955,20 +1924,20 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         assert(!retboxed && jl_is_datatype(rt) && "sret return type invalid");
         if (jl_is_pointerfree(rt)) {
             result = emit_static_alloca(ctx, lrt);
+            setName(ctx.emission_context, result, "ccall_sret");
             sretty = lrt;
-            argvals[0] = ctx.builder.CreateBitCast(result, fargt_sig.at(0));
+            argvals[0] = ctx.builder.CreateBitCast(result, fargt_sig[0]);
         }
         else {
             // XXX: result needs to be zero'd and given a GC root here
             // and has incorrect write barriers.
             // instead this code path should behave like `unsafe_load`
-            assert(jl_datatype_size(rt) > 0 && "sret shouldn't be a singleton instance");
-            result = emit_allocobj(ctx, jl_datatype_size(rt),
-                                   literal_pointer_val(ctx, (jl_value_t*)rt));
+            result = emit_allocobj(ctx, (jl_datatype_t*)rt, true);
+            setName(ctx.emission_context, result, "ccall_sret_box");
             sretty = ctx.types().T_jlvalue;
             sretboxed = true;
             gc_uses.push_back(result);
-            argvals[0] = ctx.builder.CreateBitCast(emit_pointer_from_objref(ctx, result), fargt_sig.at(0));
+            argvals[0] = ctx.builder.CreateBitCast(emit_pointer_from_objref(ctx, result), fargt_sig[0]);
         }
     }
 
@@ -2036,11 +2005,20 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     }
     else if (symarg.fptr != NULL) {
         ++LiteralCCalls;
-        Type *funcptype = PointerType::get(functype, 0);
+        Type *funcptype = functype->getPointerTo(0);
         llvmf = literal_static_pointer_val((void*)(uintptr_t)symarg.fptr, funcptype);
-        if (ctx.emission_context.imaging)
+        setName(ctx.emission_context, llvmf, "ccall_fptr");
+        if (ctx.emission_context.imaging_mode)
             jl_printf(JL_STDERR,"WARNING: literal address used in ccall for %s; code cannot be statically compiled\n", symarg.f_name);
     }
+    else if (!ctx.params->use_jlplt) {
+        if ((symarg.f_lib && !((symarg.f_lib == JL_EXE_LIBNAME) ||
+              (symarg.f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME) ||
+              (symarg.f_lib == JL_LIBJULIA_DL_LIBNAME))) || symarg.lib_expr) {
+            emit_error(ctx, "ccall: Had library expression, but symbol lookup was disabled");
+        }
+        llvmf = jl_Module->getOrInsertFunction(symarg.f_name, functype).getCallee();
+    }
     else {
         assert(symarg.f_name != NULL);
         PointerType *funcptype = PointerType::get(functype, 0);
@@ -2048,7 +2026,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
             ++DeferredCCallLookups;
             llvmf = runtime_sym_lookup(ctx, funcptype, NULL, symarg.lib_expr, symarg.f_name, ctx.f);
         }
-        else if (ctx.emission_context.imaging) {
+        else /*if (ctx.emission_context.imaging) */{
             ++DeferredCCallLookups;
             // vararg requires musttail,
             // but musttail is incompatible with noreturn.
@@ -2057,27 +2035,12 @@ jl_cgval_t function_sig_t::emit_a_ccall(
             else
                 llvmf = emit_plt(ctx, functype, attributes, cc, symarg.f_lib, symarg.f_name);
         }
-        else {
-            void *symaddr;
-            void *libsym = jl_get_library_(symarg.f_lib, 0);
-            if (!libsym || !jl_dlsym(libsym, symarg.f_name, &symaddr, 0)) {
-                ++DeferredCCallLookups;
-                // either the library or the symbol could not be found, place a runtime
-                // lookup here instead.
-                llvmf = runtime_sym_lookup(ctx, funcptype, symarg.f_lib, NULL, symarg.f_name, ctx.f);
-            } else {
-                ++LiteralCCalls;
-                // since we aren't saving this code, there's no sense in
-                // putting anything complicated here: just JIT the function address
-                llvmf = literal_static_pointer_val(symaddr, funcptype);
-            }
-        }
     }
 
     OperandBundleDef OpBundle("jl_roots", gc_uses);
     // the actual call
     CallInst *ret = ctx.builder.CreateCall(functype, llvmf,
-            ArrayRef<Value*>(&argvals[0], nccallargs + sret),
+            argvals,
             ArrayRef<OperandBundleDef>(&OpBundle, gc_uses.empty() ? 0 : 1));
     ((CallInst*)ret)->setAttributes(attributes);
 
@@ -2112,6 +2075,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 return mark_julia_slot(result, rt, NULL, ctx.tbaa().tbaa_stack);
             ++SRetCCalls;
             result = ctx.builder.CreateLoad(sretty, result);
+            setName(ctx.emission_context, result, "returned");
         }
     }
     else {
@@ -2125,22 +2089,23 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         else if (jlretboxed && !retboxed) {
             assert(jl_is_datatype(rt));
             if (static_rt) {
-                Value *runtime_bt = literal_pointer_val(ctx, rt);
-                size_t rtsz = jl_datatype_size(rt);
-                assert(rtsz > 0);
-                Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
+                Value *strct = emit_allocobj(ctx, (jl_datatype_t*)rt, true);
+                setName(ctx.emission_context, strct, "ccall_ret_box");
                 MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
                 int boxalign = julia_alignment(rt);
                 // copy the data from the return value to the new struct
                 const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
                 auto resultTy = result->getType();
+                size_t rtsz = jl_datatype_size(rt);
                 if (DL.getTypeStoreSize(resultTy) > rtsz) {
                     // ARM and AArch64 can use a LLVM type larger than the julia type.
                     // When this happens, cast through memory.
                     auto slot = emit_static_alloca(ctx, resultTy);
+                    setName(ctx.emission_context, slot, "type_pun_slot");
                     slot->setAlignment(Align(boxalign));
                     ctx.builder.CreateAlignedStore(result, slot, Align(boxalign));
-                    emit_memcpy(ctx, strct, tbaa, slot, tbaa, rtsz, boxalign);
+                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+                    emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign, boxalign);
                 }
                 else {
                     init_bits_value(ctx, strct, result, tbaa, boxalign);
diff --git a/src/ccalllazybar.c b/src/ccalllazybar.c
new file mode 100644
index 0000000000000..84bf9763fffa5
--- /dev/null
+++ b/src/ccalllazybar.c
@@ -0,0 +1,10 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "ccalltest_common.h"
+
+// We expect this to come from `libccalllazyfoo`
+extern int foo(int);
+
+DLLEXPORT int bar(int a) {
+    return foo(a + 1);
+}
diff --git a/src/ccalllazyfoo.c b/src/ccalllazyfoo.c
new file mode 100644
index 0000000000000..d68421adef67b
--- /dev/null
+++ b/src/ccalllazyfoo.c
@@ -0,0 +1,7 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "ccalltest_common.h"
+
+DLLEXPORT int foo(int a) {
+    return a*2;
+}
diff --git a/src/ccalltest.c b/src/ccalltest.c
index 64a6a3aabfb0b..0c7c85b328415 100644
--- a/src/ccalltest.c
+++ b/src/ccalltest.c
@@ -1,39 +1,10 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <complex.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#include "../src/support/platform.h"
-#include "../src/support/dtypes.h"
-
-// Borrow definition from `support/dtypes.h`
-#ifdef _OS_WINDOWS_
-#  define DLLEXPORT __declspec(dllexport)
-#else
-# if defined(_OS_LINUX_)
-#  define DLLEXPORT __attribute__ ((visibility("protected")))
-# else
-#  define DLLEXPORT __attribute__ ((visibility("default")))
-# endif
-#endif
-
-
-#ifdef _P64
-#define jint int64_t
-#define PRIjint PRId64
-#else
-#define jint int32_t
-#define PRIjint PRId32
-#endif
+#include "ccalltest_common.h"
 
 int verbose = 1;
-
 int c_int = 0;
 
-
 //////////////////////////////////
 // Test for proper argument register truncation
 
diff --git a/src/ccalltest_common.h b/src/ccalltest_common.h
new file mode 100644
index 0000000000000..484cbde593369
--- /dev/null
+++ b/src/ccalltest_common.h
@@ -0,0 +1,30 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+#include <stdio.h>
+#include <stdlib.h>
+#include <complex.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include "../src/support/platform.h"
+#include "../src/support/dtypes.h"
+
+// Borrow definition from `support/dtypes.h`
+#ifdef _OS_WINDOWS_
+#  define DLLEXPORT __declspec(dllexport)
+#else
+# if defined(_OS_LINUX_) && !defined(_COMPILER_CLANG_)
+// Clang and ld disagree about the proper relocation for STV_PROTECTED, causing
+// linker errors.
+#  define DLLEXPORT __attribute__ ((visibility("protected")))
+# else
+#  define DLLEXPORT __attribute__ ((visibility("default")))
+# endif
+#endif
+
+#ifdef _P64
+#define jint int64_t
+#define PRIjint PRId64
+#else
+#define jint int32_t
+#define PRIjint PRId32
+#endif
diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp
index 37e02b0efccbb..b627224e027a9 100644
--- a/src/cgmemmgr.cpp
+++ b/src/cgmemmgr.cpp
@@ -218,7 +218,12 @@ static _Atomic(size_t) map_offset{0};
 // Hopefully no one will set a ulimit for this to be a problem...
 static constexpr size_t map_size_inc_default = 128 * 1024 * 1024;
 static size_t map_size = 0;
-static uv_mutex_t shared_map_lock;
+static struct _make_shared_map_lock {
+    uv_mutex_t mtx;
+    _make_shared_map_lock() {
+        uv_mutex_init(&mtx);
+    };
+} shared_map_lock;
 
 static size_t get_map_size_inc()
 {
@@ -264,7 +269,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec)
     *id = off;
     size_t map_size_inc = get_map_size_inc();
     if (__unlikely(off + size > map_size)) {
-        uv_mutex_lock(&shared_map_lock);
+        uv_mutex_lock(&shared_map_lock.mtx);
         size_t old_size = map_size;
         while (off + size > map_size)
             map_size += map_size_inc;
@@ -275,7 +280,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec)
                 abort();
             }
         }
-        uv_mutex_unlock(&shared_map_lock);
+        uv_mutex_unlock(&shared_map_lock.mtx);
     }
     return create_shared_map(size, off);
 }
@@ -295,7 +300,7 @@ ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr)
         // However, it seems possible to change this at kernel compile time.
 
         // pwrite doesn't support offset with sign bit set but lseek does.
-        // This is obviously not thread safe but none of the mem manager does anyway...
+        // This is obviously not thread-safe but none of the mem manager does anyway...
         // From the kernel code, `lseek` with `SEEK_SET` can't fail.
         // However, this can possibly confuse the glibc wrapper to think that
         // we have invalid input value. Use syscall directly to be sure.
@@ -313,7 +318,6 @@ ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr)
 // Use `get_self_mem_fd` which has a guard to call this only once.
 static int _init_self_mem()
 {
-    uv_mutex_init(&shared_map_lock);
     struct utsname kernel;
     uname(&kernel);
     int major, minor;
@@ -856,9 +860,14 @@ uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size,
                                                     StringRef SectionName)
 {
     // allocating more than one code section can confuse libunwind.
+#if !defined(_COMPILER_MSAN_ENABLED_) && !defined(_COMPILER_ASAN_ENABLED_)
+    // TODO: Figure out why msan and now asan too need this.
     assert(!code_allocated);
     code_allocated = true;
+#endif
     total_allocated += Size;
+    jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
+    jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size);
     if (exe_alloc)
         return (uint8_t*)exe_alloc->alloc(Size, Alignment);
     return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID,
@@ -872,6 +881,8 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size,
                                                     bool isReadOnly)
 {
     total_allocated += Size;
+    jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
+    jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, Size);
     if (!isReadOnly)
         return (uint8_t*)rw_alloc.alloc(Size, Alignment);
     if (ro_alloc)
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index db3807de988b2..8c6880d7245cd 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -8,7 +8,6 @@
 
 STATISTIC(EmittedPointerFromObjref, "Number of emitted pointer_from_objref calls");
 STATISTIC(EmittedPointerBitcast, "Number of emitted pointer bitcasts");
-STATISTIC(EmittedNthPtrAddr, "Number of emitted nth pointer address instructions");
 STATISTIC(EmittedTypeof, "Number of emitted typeof instructions");
 STATISTIC(EmittedErrors, "Number of emitted errors");
 STATISTIC(EmittedConditionalErrors, "Number of emitted conditional errors");
@@ -29,20 +28,14 @@ STATISTIC(EmittedGetfieldKnowns, "Number of known getfield calls emitted");
 STATISTIC(EmittedSetfield, "Number of setfield calls emitted");
 STATISTIC(EmittedUnionLoads, "Number of union loads emitted");
 STATISTIC(EmittedVarargsLength, "Number of varargs length calls emitted");
-STATISTIC(EmittedArraysize, "Number of arraysize calls emitted");
-STATISTIC(EmittedArraylen, "Number of array length calls emitted");
-STATISTIC(EmittedArrayptr, "Number of array data pointer loads emitted");
-STATISTIC(EmittedArrayflags, "Number of arrayflags calls emitted");
-STATISTIC(EmittedArrayNDims, "Number of array ndims calls emitted");
+STATISTIC(EmittedArrayptr, "Number of array ptr calls emitted");
 STATISTIC(EmittedArrayElsize, "Number of array elsize calls emitted");
-STATISTIC(EmittedArrayOffset, "Number of array offset calls emitted");
 STATISTIC(EmittedArrayNdIndex, "Number of array nd index calls emitted");
 STATISTIC(EmittedBoxes, "Number of box operations emitted");
 STATISTIC(EmittedCPointerChecks, "Number of C pointer checks emitted");
 STATISTIC(EmittedAllocObjs, "Number of object allocations emitted");
 STATISTIC(EmittedWriteBarriers, "Number of write barriers emitted");
 STATISTIC(EmittedNewStructs, "Number of new structs emitted");
-STATISTIC(EmittedSignalFences, "Number of signal fences emitted");
 STATISTIC(EmittedDeferSignal, "Number of deferred signals emitted");
 
 static Value *track_pjlvalue(jl_codectx_t &ctx, Value *V)
@@ -113,15 +106,30 @@ AtomicOrdering get_llvm_atomic_order(enum jl_memory_order order)
 static Value *stringConstPtr(
         jl_codegen_params_t &emission_context,
         IRBuilder<> &irbuilder,
-        const std::string &txt)
+        const Twine &txt)
 {
     Module *M = jl_builderModule(irbuilder);
-    StringRef ctxt(txt.c_str(), txt.size() + 1);
-    Constant *Data = ConstantDataArray::get(irbuilder.getContext(), arrayRefFromStringRef(ctxt));
-    GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, "_j_str", *M);
+    SmallVector<char, 128> ctxt;
+    txt.toVector(ctxt);
+    // null-terminate the string
+    ctxt.push_back(0);
+    Constant *Data = ConstantDataArray::get(irbuilder.getContext(), ctxt);
+    ctxt.pop_back();
+    // We use this for the name of the gv, so cap its size to avoid memory blowout
+    if (ctxt.size() > 28) {
+        ctxt.resize(28);
+        ctxt[25] = ctxt[26] = ctxt[27] = '.';
+    }
+    // Doesn't need to be aligned, we shouldn't operate on these like julia objects
+    GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, Align(1), "_j_str_" + StringRef(ctxt.data(), ctxt.size()), *M);
     Value *zero = ConstantInt::get(Type::getInt32Ty(irbuilder.getContext()), 0);
     Value *Args[] = { zero, zero };
-    return irbuilder.CreateInBoundsGEP(gv->getValueType(), gv, Args);
+    auto gep = irbuilder.CreateInBoundsGEP(gv->getValueType(),
+                                       // AddrSpaceCast in case globals are in non-0 AS
+                                       irbuilder.CreateAddrSpaceCast(gv, gv->getValueType()->getPointerTo(0)),
+                                       Args);
+    setName(emission_context, gep, "string_const_ptr");
+    return gep;
 }
 
 
@@ -152,6 +160,39 @@ Metadata *to_md_tree(jl_value_t *val, LLVMContext &ctxt) {
 
 // --- Debug info ---
 
+static DICompileUnit *getOrCreateJuliaCU(Module &M,
+    DICompileUnit::DebugEmissionKind emissionKind,
+    DICompileUnit::DebugNameTableKind tableKind)
+{
+    // TODO: share debug objects globally in the context, instead of allocating a new one every time
+    // or figure out how to delete them afterwards?
+    // But at least share them a little bit here
+    auto CUs = M.debug_compile_units();
+    for (DICompileUnit *CU : CUs) {
+        if (CU->getEmissionKind() == emissionKind &&
+            CU->getNameTableKind() == tableKind)
+        return CU;
+    }
+    DIFile *topfile = DIFile::get(M.getContext(), "julia", ".");
+    DIBuilder dbuilder(M);
+    DICompileUnit *CU =
+        dbuilder.createCompileUnit(llvm::dwarf::DW_LANG_Julia
+                                   ,topfile      // File
+                                   ,"julia"      // Producer
+                                   ,true         // isOptimized
+                                   ,""           // Flags
+                                   ,0            // RuntimeVersion
+                                   ,""           // SplitName
+                                   ,emissionKind // Kind
+                                   ,0            // DWOId
+                                   ,true         // SplitDebugInlining
+                                   ,false        // DebugInfoForProfiling
+                                   ,tableKind    // NameTableKind
+                                   );
+    dbuilder.finalize();
+    return CU;
+}
+
 static DIType *_julia_type_to_di(jl_codegen_params_t *ctx, jl_debugcache_t &debuginfo, jl_value_t *jt, DIBuilder *dbuilder, bool isboxed)
 {
     jl_datatype_t *jdt = (jl_datatype_t*)jt;
@@ -167,9 +208,9 @@ static DIType *_julia_type_to_di(jl_codegen_params_t *ctx, jl_debugcache_t &debu
         uint64_t SizeInBits = jl_datatype_nbits(jdt);
         ditype = dbuilder->createBasicType(tname, SizeInBits, llvm::dwarf::DW_ATE_unsigned);
     }
-    else if (jl_is_structtype(jt) && !jl_is_layout_opaque(jdt->layout)) {
+    else if (jl_is_structtype(jt) && !jl_is_layout_opaque(jdt->layout) && !jl_is_array_type(jdt)) {
         size_t ntypes = jl_datatype_nfields(jdt);
-        std::vector<llvm::Metadata*> Elements(ntypes);
+        SmallVector<llvm::Metadata*, 0> Elements(ntypes);
         for (unsigned i = 0; i < ntypes; i++) {
             jl_value_t *el = jl_field_type_concrete(jdt, i);
             DIType *di;
@@ -232,7 +273,7 @@ void jl_debugcache_t::initialize(Module *m) {
                                                 __alignof__(jl_value_t*) * 8);
 
     SmallVector<llvm::Metadata *, 1> Elts;
-    std::vector<Metadata*> diargs(0);
+    SmallVector<Metadata*, 0> diargs(0);
     Elts.push_back(jl_pvalue_dillvmt);
     dbuilder.replaceArrays(jl_value_dillvmt,
     dbuilder.getOrCreateArray(Elts));
@@ -268,19 +309,34 @@ static Value *emit_pointer_from_objref(jl_codectx_t &ctx, Value *V)
     return Call;
 }
 
-static Value *get_gc_root_for(const jl_cgval_t &x)
+static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt);
+static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value* dest, MDNode *tbaa_dest, unsigned alignment, bool isVolatile=false);
+
+static Value *get_gc_root_for(jl_codectx_t &ctx, const jl_cgval_t &x)
 {
-    if (x.Vboxed)
+    if (x.constant || x.typ == jl_bottom_type)
+        return nullptr;
+    if (x.Vboxed) // superset of x.isboxed
         return x.Vboxed;
-    if (x.ispointer() && !x.constant) {
+    assert(!x.isboxed);
+#ifndef NDEBUG
+    if (x.ispointer()) {
         assert(x.V);
         if (PointerType *T = dyn_cast<PointerType>(x.V->getType())) {
-            if (T->getAddressSpace() == AddressSpace::Tracked ||
-                T->getAddressSpace() == AddressSpace::Derived) {
-                return x.V;
+            assert(T->getAddressSpace() != AddressSpace::Tracked);
+            if (T->getAddressSpace() == AddressSpace::Derived) {
+                // n.b. this IR would not be valid after LLVM-level inlining,
+                // since codegen does not have a way to determine the whether
+                // this argument value needs to be re-rooted
             }
         }
     }
+#endif
+    if (jl_is_concrete_immutable(x.typ) && !jl_is_pointerfree(x.typ)) {
+        Type *T = julia_type_to_llvm(ctx, x.typ);
+        return emit_unbox(ctx, T, x, x.typ);
+    }
+    // nothing here to root, move along
     return nullptr;
 }
 
@@ -289,17 +345,18 @@ static Value *get_gc_root_for(const jl_cgval_t &x)
 
 static inline Constant *literal_static_pointer_val(const void *p, Type *T);
 
-static Value *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
+static Constant *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
 {
     // emit a GlobalVariable for a jl_value_t named "cname"
     // store the name given so we can reuse it (facilitating merging later)
     // so first see if there already is a GlobalVariable for this address
-    GlobalVariable* &gv = ctx.global_targets[addr];
+    GlobalVariable* &gv = ctx.emission_context.global_targets[addr];
     Module *M = jl_Module;
     StringRef localname;
     std::string gvname;
     if (!gv) {
-        raw_string_ostream(gvname) << cname << ctx.global_targets.size();
+        uint64_t id = jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1); // TODO: use ctx.emission_context.global_targets.size()
+        raw_string_ostream(gvname) << cname << id;
         localname = StringRef(gvname);
     }
     else {
@@ -309,7 +366,7 @@ static Value *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
     }
     if (gv == nullptr)
         gv = new GlobalVariable(*M, ctx.types().T_pjlvalue,
-                                false, GlobalVariable::PrivateLinkage,
+                                false, GlobalVariable::ExternalLinkage,
                                 NULL, localname);
     // LLVM passes sometimes strip metadata when moving load around
     // since the load at the new location satisfy the same condition as the original one.
@@ -321,55 +378,57 @@ static Value *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
     return gv;
 }
 
-static Value *julia_pgv(jl_codectx_t &ctx, const char *prefix, jl_sym_t *name, jl_module_t *mod, void *addr)
+static Constant *julia_pgv(jl_codectx_t &ctx, const char *prefix, jl_sym_t *name, jl_module_t *mod, void *addr)
 {
     // emit a GlobalVariable for a jl_value_t, using the prefix, name, and module to
-    // to create a readable name of the form prefixModA.ModB.name
-    size_t len = strlen(jl_symbol_name(name)) + strlen(prefix) + 1;
+    // to create a readable name of the form prefixModA.ModB.name#
+    // reverse-of-reverse algorithm
+    std::string finalname;
+    StringRef name_str(jl_symbol_name(name));
+    finalname.resize(name_str.size() + 1);
+    finalname[0] = '#';
+    std::reverse_copy(name_str.begin(), name_str.end(), finalname.begin() + 1);
     jl_module_t *parent = mod, *prev = NULL;
-    while (parent != NULL && parent != prev) {
-        len += strlen(jl_symbol_name(parent->name))+1;
+    while (parent && parent != prev) {
+        size_t orig_end = finalname.size() + 1;
+        StringRef parent_name(jl_symbol_name(parent->name));
+        finalname.resize(orig_end + parent_name.size());
+        finalname[orig_end - 1] = '.';
+        std::reverse_copy(parent_name.begin(), parent_name.end(), finalname.begin() + orig_end);
         prev = parent;
         parent = parent->parent;
     }
-    char *fullname = (char*)alloca(len);
-    strcpy(fullname, prefix);
-    len -= strlen(jl_symbol_name(name)) + 1;
-    strcpy(fullname + len, jl_symbol_name(name));
-    parent = mod;
-    prev = NULL;
-    while (parent != NULL && parent != prev) {
-        size_t part = strlen(jl_symbol_name(parent->name)) + 1;
-        strcpy(fullname + len - part, jl_symbol_name(parent->name));
-        fullname[len - 1] = '.';
-        len -= part;
-        prev = parent;
-        parent = parent->parent;
-    }
-    return julia_pgv(ctx, fullname, addr);
+    size_t orig_end = finalname.size();
+    StringRef prefix_name(prefix);
+    finalname.resize(orig_end + prefix_name.size());
+    std::reverse_copy(prefix_name.begin(), prefix_name.end(), finalname.begin() + orig_end);
+    std::reverse(finalname.begin(), finalname.end());
+    return julia_pgv(ctx, finalname.c_str(), addr);
 }
 
 static JuliaVariable *julia_const_gv(jl_value_t *val);
-static Value *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p)
+static Constant *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p)
 {
     // emit a pointer to a jl_value_t* which will allow it to be valid across reloading code
     // also, try to give it a nice name for gdb, for easy identification
-    if (!ctx.emission_context.imaging) {
-        // TODO: this is an optimization, but is it useful or premature
-        // (it'll block any attempt to cache these, but can be simply deleted)
-        Module *M = jl_Module;
-        GlobalVariable *gv = new GlobalVariable(
-                *M, ctx.types().T_pjlvalue, true, GlobalVariable::PrivateLinkage,
-                literal_static_pointer_val(p, ctx.types().T_pjlvalue));
-        gv->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-        return gv;
-    }
     if (JuliaVariable *gv = julia_const_gv(p)) {
         // if this is a known special object, use the existing GlobalValue
         return prepare_global_in(jl_Module, gv);
     }
     if (jl_is_datatype(p)) {
         jl_datatype_t *addr = (jl_datatype_t*)p;
+        if (addr->smalltag) {
+            // some common builtin datatypes have a special pool for accessing them by smalltag id
+            Constant *tag = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), addr->smalltag << 4);
+            Constant *smallp = ConstantExpr::getInBoundsGetElementPtr(getInt8Ty(ctx.builder.getContext()), prepare_global_in(jl_Module, jl_small_typeof_var), tag);
+            auto ty = ctx.types().T_ppjlvalue;
+            if (ty->getPointerAddressSpace() == smallp->getType()->getPointerAddressSpace())
+                return ConstantExpr::getBitCast(smallp, ty);
+            else {
+                Constant *newsmallp = ConstantExpr::getAddrSpaceCast(smallp, ty);
+                return ConstantExpr::getBitCast(newsmallp, ty);
+            }
+        }
         // DataTypes are prefixed with a +
         return julia_pgv(ctx, "+", addr->name->name, addr->name->module, p);
     }
@@ -395,11 +454,7 @@ static Value *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p)
 
 static size_t dereferenceable_size(jl_value_t *jt)
 {
-    if (jl_is_array_type(jt)) {
-        // Array has at least this much data
-        return sizeof(jl_array_t);
-    }
-    else if (jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt)) {
+    if (jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt)) {
         return jl_datatype_size(jt);
     }
     return 0;
@@ -408,15 +463,12 @@ static size_t dereferenceable_size(jl_value_t *jt)
 // Return the min required / expected alignment of jltype (on the stack or heap)
 static unsigned julia_alignment(jl_value_t *jt)
 {
-    if (jl_is_array_type(jt)) {
-        // Array always has this alignment
-        return JL_SMALL_BYTE_ALIGNMENT;
-    }
     if (jt == (jl_value_t*)jl_datatype_type) {
         // types are never allocated in julia code/on the stack
         // and this is the guarantee we have for the GC bits
         return 16;
     }
+
     assert(jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt));
     unsigned alignment = jl_datatype_align(jt);
     if (alignment > JL_HEAP_ALIGNMENT)
@@ -428,7 +480,7 @@ static inline void maybe_mark_argument_dereferenceable(AttrBuilder &B, jl_value_
 {
     B.addAttribute(Attribute::NonNull);
     B.addAttribute(Attribute::NoUndef);
-    // The `dereferencable` below does not imply `nonnull` for non addrspace(0) pointers.
+    // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
     size_t size = dereferenceable_size(jt);
     if (size) {
         B.addDereferenceableAttr(size);
@@ -441,7 +493,7 @@ static inline Instruction *maybe_mark_load_dereferenceable(Instruction *LI, bool
 {
     if (isa<PointerType>(LI->getType())) {
         if (!can_be_null)
-            // The `dereferencable` below does not imply `nonnull` for non addrspace(0) pointers.
+            // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
             LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(LI->getContext(), None));
         if (size) {
             Metadata *OP = ConstantAsMetadata::get(ConstantInt::get(getInt64Ty(LI->getContext()), size));
@@ -470,12 +522,13 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p)
 {
     if (p == NULL)
         return Constant::getNullValue(ctx.types().T_pjlvalue);
-    if (!ctx.emission_context.imaging)
-        return literal_static_pointer_val(p, ctx.types().T_pjlvalue);
     Value *pgv = literal_pointer_val_slot(ctx, p);
-    return tbaa_decorate(ctx.tbaa().tbaa_const, maybe_mark_load_dereferenceable(
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    auto load = ai.decorateInst(maybe_mark_load_dereferenceable(
             ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))),
             false, jl_typeof(p)));
+    setName(ctx.emission_context, load, pgv->getName());
+    return load;
 }
 
 // Returns ctx.types().T_pjlvalue
@@ -484,13 +537,15 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_binding_t *p)
     // emit a pointer to any jl_value_t which will be valid across reloading code
     if (p == NULL)
         return Constant::getNullValue(ctx.types().T_pjlvalue);
-    if (!ctx.emission_context.imaging)
-        return literal_static_pointer_val(p, ctx.types().T_pjlvalue);
     // bindings are prefixed with jl_bnd#
-    Value *pgv = julia_pgv(ctx, "jl_bnd#", p->name, p->owner, p);
-    return tbaa_decorate(ctx.tbaa().tbaa_const, maybe_mark_load_dereferenceable(
+    jl_globalref_t *gr = p->globalref;
+    Value *pgv = gr ? julia_pgv(ctx, "jl_bnd#", gr->name, gr->mod, p) : julia_pgv(ctx, "jl_bnd#", p);
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    auto load = ai.decorateInst(maybe_mark_load_dereferenceable(
             ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))),
             false, sizeof(jl_binding_t), alignof(jl_binding_t)));
+    setName(ctx.emission_context, load, pgv->getName());
+    return load;
 }
 
 // bitcast a value, but preserve its address space when dealing with pointer types
@@ -517,7 +572,7 @@ static Value *maybe_bitcast(jl_codectx_t &ctx, Value *V, Type *to) {
 static Value *julia_binding_pvalue(jl_codectx_t &ctx, Value *bv)
 {
     bv = emit_bitcast(ctx, bv, ctx.types().T_pprjlvalue);
-    Value *offset = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_binding_t, value) / sizeof(size_t));
+    Value *offset = ConstantInt::get(ctx.types().T_size, offsetof(jl_binding_t, value) / ctx.types().sizeof_ptr);
     return ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, bv, offset);
 }
 
@@ -525,11 +580,12 @@ static Value *julia_binding_gv(jl_codectx_t &ctx, jl_binding_t *b)
 {
     // emit a literal_pointer_val to a jl_binding_t
     // binding->value are prefixed with *
-    if (ctx.emission_context.imaging)
-        return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue,
-                    julia_pgv(ctx, "*", b->name, b->owner, b), Align(sizeof(void*))));
-    else
-        return literal_static_pointer_val(b, ctx.types().T_pjlvalue);
+    jl_globalref_t *gr = b->globalref;
+    Value *pgv = gr ? julia_pgv(ctx, "*", gr->name, gr->mod, b) : julia_pgv(ctx, "*jl_bnd#", b);
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    auto load = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))));
+    setName(ctx.emission_context, load, pgv->getName());
+    return load;
 }
 
 // --- mapping between julia and llvm types ---
@@ -588,7 +644,7 @@ static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed
     return _julia_type_to_llvm(&ctx.emission_context, ctx.builder.getContext(), jt, isboxed);
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 Type *jl_type_to_llvm_impl(jl_value_t *jt, LLVMContextRef ctxt, bool *isboxed)
 {
     return _julia_type_to_llvm(NULL, *unwrap(ctxt), jt, isboxed);
@@ -611,6 +667,8 @@ static Type *bitstype_to_llvm(jl_value_t *bt, LLVMContext &ctxt, bool llvmcall =
         return getFloatTy(ctxt);
     if (bt == (jl_value_t*)jl_float64_type)
         return getDoubleTy(ctxt);
+    if (bt == (jl_value_t*)jl_bfloat16_type)
+        return getBFloatTy(ctxt);
     if (jl_is_llvmpointer_type(bt)) {
         jl_value_t *as_param = jl_tparam1(bt);
         int as;
@@ -639,6 +697,37 @@ static unsigned jl_field_align(jl_datatype_t *dt, size_t i)
     return std::min({al, (unsigned)jl_datatype_align(dt), (unsigned)JL_HEAP_ALIGNMENT});
 }
 
+static llvm::StructType* get_jlmemoryref(llvm::LLVMContext &C, unsigned AS) {
+    return llvm::StructType::get(C, {
+            llvm::PointerType::get(llvm::Type::getInt8Ty(C), AS),
+            JuliaType::get_prjlvalue_ty(C),
+            });
+}
+static llvm::StructType* get_jlmemoryboxedref(llvm::LLVMContext &C, unsigned AS) {
+    return llvm::StructType::get(C, {
+            llvm::PointerType::get(JuliaType::get_prjlvalue_ty(C), AS),
+            JuliaType::get_prjlvalue_ty(C),
+            });
+}
+static llvm::StructType* get_jlmemoryunionref(llvm::LLVMContext &C, llvm::Type *T_size) {
+    return llvm::StructType::get(C, {
+            T_size, // offset
+            JuliaType::get_prjlvalue_ty(C),
+            });
+}
+static StructType *get_memoryref_type(LLVMContext &ctxt, Type *T_size, const jl_datatype_layout_t *layout, unsigned AS)
+{
+    // TODO: try to remove this slightly odd special case
+    bool isboxed = layout->flags.arrayelem_isboxed;
+    bool isunion = layout->flags.arrayelem_isunion;
+    bool isghost = layout->size == 0;
+    if (isboxed)
+        return get_jlmemoryboxedref(ctxt, AS);
+    if (isunion || isghost)
+        return get_jlmemoryunionref(ctxt, T_size);
+    return get_jlmemoryref(ctxt, AS);
+}
+
 static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall)
 {
     // this function converts a Julia Type into the equivalent LLVM struct
@@ -650,7 +739,13 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
     if (jl_is_primitivetype(jt))
         return bitstype_to_llvm(jt, ctxt, llvmcall);
     jl_datatype_t *jst = (jl_datatype_t*)jt;
-    if (jl_is_structtype(jt) && !(jst->layout && jl_is_layout_opaque(jst->layout))) {
+    if (jl_is_structtype(jt) && !(jst->layout && jl_is_layout_opaque(jst->layout)) && !jl_is_array_type(jst) && !jl_is_genericmemory_type(jst)) {
+        if (jl_is_genericmemoryref_type(jst)) {
+            jl_value_t *mty_dt = jl_field_type_concrete(jst, 1);
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            Type *T_size = bitstype_to_llvm((jl_value_t*)jl_long_type, ctxt);
+            return get_memoryref_type(ctxt, T_size, layout, 0);
+        }
         bool isTuple = jl_is_tuple_type(jt);
         jl_svec_t *ftypes = jl_get_fieldtypes(jst);
         size_t i, ntypes = jl_svec_len(ftypes);
@@ -666,7 +761,7 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
         Type *&struct_decl = (ctx && !llvmcall ? ctx->llvmtypes[jst] : _struct_decl);
         if (struct_decl)
             return struct_decl;
-        std::vector<Type*> latypes(0);
+        SmallVector<Type*, 0> latypes(0);
         bool isarray = true;
         bool isvector = true;
         jl_value_t *jlasttype = NULL;
@@ -688,17 +783,15 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
                 lty = JuliaType::get_prjlvalue_ty(ctxt);
                 isvector = false;
             }
-            else if (ty == (jl_value_t*)jl_bool_type) {
-                lty = getInt8Ty(ctxt);
-            }
             else if (jl_is_uniontype(ty)) {
                 // pick an Integer type size such that alignment will generally be correct,
                 // and always end with an Int8 (selector byte).
                 // We may need to insert padding first to get to the right offset
                 size_t fsz = 0, al = 0;
                 bool isptr = !jl_islayout_inline(ty, &fsz, &al);
-                assert(!isptr && fsz == jl_field_size(jst, i) - 1); (void)isptr;
-                if (fsz > 0) {
+                assert(!isptr && fsz < jl_field_size(jst, i)); (void)isptr;
+                size_t fsz1 = jl_field_size(jst, i) - 1;
+                if (fsz1 > 0) {
                     if (al > MAX_ALIGN) {
                         Type *AlignmentType;
                         AlignmentType = ArrayType::get(FixedVectorType::get(getInt8Ty(ctxt), al), 0);
@@ -706,8 +799,8 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
                         al = MAX_ALIGN;
                     }
                     Type *AlignmentType = IntegerType::get(ctxt, 8 * al);
-                    unsigned NumATy = fsz / al;
-                    unsigned remainder = fsz % al;
+                    unsigned NumATy = fsz1 / al;
+                    unsigned remainder = fsz1 % al;
                     assert(al == 1 || NumATy > 0);
                     while (NumATy--)
                         latypes.push_back(AlignmentType);
@@ -765,13 +858,13 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
     //  // pick an Integer type size such that alignment will be correct
     //  // and always end with an Int8 (selector byte)
     //  lty = ArrayType::get(IntegerType::get(lty->getContext(), 8 * al), fsz / al);
-    //  std::vector<Type*> Elements(2);
+    //  SmallVector<Type*, 0> Elements(2);
     //  Elements[0] = lty;
     //  Elements[1] = getInt8Ty(ctxt);
     //  unsigned remainder = fsz % al;
     //  while (remainder--)
     //      Elements.push_back(getInt8Ty(ctxt));
-    //  lty = StructType::get(lty->getContext(), makeArrayRef(Elements));
+    //  lty = StructType::get(lty->getContext(),ArrayRef<Type*>(Elements));
     // }
     if (isboxed) *isboxed = true;
     return JuliaType::get_prjlvalue_ty(ctxt);
@@ -842,7 +935,8 @@ static bool is_uniontype_allunboxed(jl_value_t *typ)
     return for_each_uniontype_small([&](unsigned, jl_datatype_t*) {}, typ, counter);
 }
 
-static Value *emit_typeof_boxed(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull=false);
+static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool justtag, bool notag=false);
+static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull=false, bool justtag=false);
 
 static unsigned get_box_tindex(jl_datatype_t *jt, jl_value_t *ut)
 {
@@ -871,7 +965,7 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x)
     if (x.constant) {
         Constant *val = julia_const_to_llvm(ctx, x.constant);
         if (val)
-            data = get_pointer_to_constant(ctx.emission_context, val, "_j_const", *jl_Module);
+            data = get_pointer_to_constant(ctx.emission_context, val, Align(julia_alignment(jl_typeof(x.constant))), "_j_const", *jl_Module);
         else
             data = literal_pointer_val(ctx, x.constant);
     }
@@ -885,27 +979,26 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x)
     return data;
 }
 
-static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Value *src, MDNode *tbaa_src,
-                             uint64_t sz, unsigned align, bool is_volatile)
+static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
+                             jl_aliasinfo_t const &src_ai, uint64_t sz, unsigned align_dst, unsigned align_src, bool is_volatile)
 {
     if (sz == 0)
         return;
-    assert(align && "align must be specified");
+    assert(align_dst && "align must be specified");
     // If the types are small and simple, use load and store directly.
     // Going through memcpy can cause LLVM (e.g. SROA) to create bitcasts between float and int
     // that interferes with other optimizations.
-#ifndef JL_LLVM_OPAQUE_POINTERS
     // TODO: Restore this for opaque pointers? Needs extra type information from the caller.
-    if (sz <= 64) {
+    if (ctx.builder.getContext().supportsTypedPointers() && sz <= 64) {
         // The size limit is arbitrary but since we mainly care about floating points and
         // machine size vectors this should be enough.
         const DataLayout &DL = jl_Module->getDataLayout();
         auto srcty = cast<PointerType>(src->getType());
         //TODO unsafe nonopaque pointer
-        auto srcel = srcty->getPointerElementType();
+        auto srcel = srcty->getNonOpaquePointerElementType();
         auto dstty = cast<PointerType>(dst->getType());
         //TODO unsafe nonopaque pointer
-        auto dstel = dstty->getPointerElementType();
+        auto dstel = dstty->getNonOpaquePointerElementType();
         while (srcel->isArrayTy() && srcel->getArrayNumElements() == 1) {
             src = ctx.builder.CreateConstInBoundsGEP2_32(srcel, src, 0, 0);
             srcel = srcel->getArrayElementType();
@@ -928,132 +1021,148 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Va
             src = emit_bitcast(ctx, src, dstty);
         }
         if (directel) {
-            auto val = tbaa_decorate(tbaa_src, ctx.builder.CreateAlignedLoad(directel, src, Align(align), is_volatile));
-            tbaa_decorate(tbaa_dst, ctx.builder.CreateAlignedStore(val, dst, Align(align), is_volatile));
+            if (isa<Instruction>(src) && !src->hasName())
+                setName(ctx.emission_context, src, "memcpy_refined_src");
+            if (isa<Instruction>(dst) && !dst->hasName())
+                setName(ctx.emission_context, dst, "memcpy_refined_dst");
+            auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, MaybeAlign(align_src), is_volatile));
+            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align_dst), is_volatile));
             ++SkippedMemcpys;
             return;
         }
     }
-#endif
+    ++EmittedMemcpys;
+
     // the memcpy intrinsic does not allow to specify different alias tags
     // for the load part (x.tbaa) and the store part (ctx.tbaa().tbaa_stack).
     // since the tbaa lattice has to be a tree we have unfortunately
     // x.tbaa ∪ ctx.tbaa().tbaa_stack = tbaa_root if x.tbaa != ctx.tbaa().tbaa_stack
-    ++EmittedMemcpys;
-    ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile, MDNode::getMostGenericTBAA(tbaa_dst, tbaa_src));
+
+    // Now that we use scoped aliases to label disparate regions of memory, the TBAA
+    // metadata should be revisited so that it only represents memory layouts. Once
+    // that's done, we can expect that in most cases tbaa(src) == tbaa(dst) and the
+    // above problem won't be as serious.
+
+    auto merged_ai = dst_ai.merge(src_ai);
+    ctx.builder.CreateMemCpy(dst, Align(align_dst), src, Align(align_src), sz, is_volatile,
+                             merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
 }
 
-static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Value *src, MDNode *tbaa_src,
-                             Value *sz, unsigned align, bool is_volatile)
+static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
+                             jl_aliasinfo_t const &src_ai, Value *sz, unsigned align_dst, unsigned align_src, bool is_volatile)
 {
     if (auto const_sz = dyn_cast<ConstantInt>(sz)) {
-        emit_memcpy_llvm(ctx, dst, tbaa_dst, src, tbaa_src, const_sz->getZExtValue(), align, is_volatile);
+        emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, const_sz->getZExtValue(), align_dst, align_src, is_volatile);
         return;
     }
     ++EmittedMemcpys;
-    ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile, MDNode::getMostGenericTBAA(tbaa_dst, tbaa_src));
-}
 
-template<typename T1>
-static void emit_memcpy(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Value *src, MDNode *tbaa_src,
-                        T1 &&sz, unsigned align, bool is_volatile=false)
-{
-    emit_memcpy_llvm(ctx, dst, tbaa_dst, src, tbaa_src, sz, align, is_volatile);
+    auto merged_ai = dst_ai.merge(src_ai);
+    ctx.builder.CreateMemCpy(dst, MaybeAlign(align_dst), src, MaybeAlign(align_src), sz, is_volatile,
+                             merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
 }
 
 template<typename T1>
-static void emit_memcpy(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, const jl_cgval_t &src,
-                        T1 &&sz, unsigned align, bool is_volatile=false)
-{
-    emit_memcpy_llvm(ctx, dst, tbaa_dst, data_pointer(ctx, src), src.tbaa, sz, align, is_volatile);
-}
-
-static Value *emit_nthptr_addr(jl_codectx_t &ctx, Value *v, ssize_t n, bool gctracked = true)
+static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
+                        jl_aliasinfo_t const &src_ai, T1 &&sz, unsigned align_dst, unsigned align_src, bool is_volatile=false)
 {
-    ++EmittedNthPtrAddr;
-    return ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_prjlvalue,
-            emit_bitcast(ctx, maybe_decay_tracked(ctx, v), ctx.types().T_pprjlvalue),
-            ConstantInt::get(getSizeTy(ctx.builder.getContext()), n));
+    emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align_dst, align_src, is_volatile);
 }
 
-static Value *emit_nthptr_addr(jl_codectx_t &ctx, Value *v, Value *idx)
+template<typename T1>
+static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, const jl_cgval_t &src,
+                        T1 &&sz, unsigned align_dst, unsigned align_src, bool is_volatile=false)
 {
-    ++EmittedNthPtrAddr;
-    return ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_prjlvalue,
-            emit_bitcast(ctx, maybe_decay_tracked(ctx, v), ctx.types().T_pprjlvalue),
-            idx);
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, src.tbaa);
+    emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align_dst, align_src, is_volatile);
 }
 
-static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDNode *tbaa, Type *type)
+static Value *emit_tagfrom(jl_codectx_t &ctx, jl_datatype_t *dt)
 {
-    // p = (jl_value_t**)v; *(type*)&p[n]
-    Value *vptr = emit_nthptr_addr(ctx, v, idx);
-    return cast<LoadInst>(tbaa_decorate(tbaa, ctx.builder.CreateLoad(type,
-        emit_bitcast(ctx, vptr, PointerType::get(type, 0)))));
+    if (dt->smalltag)
+        return ConstantInt::get(ctx.types().T_size, dt->smalltag << 4);
+    auto tag = ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, (jl_value_t*)dt), ctx.types().T_size);
+    setName(ctx.emission_context, tag, jl_symbol_name(dt->name->name));
+    return tag;
 }
 
-static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, ssize_t n, MDNode *tbaa, Type *type)
-{
-    // p = (jl_value_t**)v; *(type*)&p[n]
-    Value *vptr = emit_nthptr_addr(ctx, v, n);
-    return cast<LoadInst>(tbaa_decorate(tbaa, ctx.builder.CreateLoad(type,
-        emit_bitcast(ctx, vptr, PointerType::get(type, 0)))));
- }
-
-static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v);
-static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull);
-
-static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull)
+// Returns justtag ? ctx.types.T_size : ctx.types().T_prjlvalue
+static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull, bool justtag)
 {
     // given p, compute its type
+    jl_datatype_t *dt = NULL;
     if (p.constant)
-        return mark_julia_const(ctx, jl_typeof(p.constant));
-    if (p.isboxed && !jl_is_concrete_type(p.typ)) {
-        if (jl_is_type_type(p.typ)) {
-            jl_value_t *tp = jl_tparam0(p.typ);
-            if (!jl_is_type(tp) || jl_is_concrete_type(tp)) {
-                // convert 1::Type{1} ==> typeof(1) ==> Int
-                return mark_julia_const(ctx, jl_typeof(tp));
-            }
+        dt = (jl_datatype_t*)jl_typeof(p.constant);
+    else if (jl_is_concrete_type(p.typ))
+        dt = (jl_datatype_t*)p.typ;
+    if (dt) {
+        if (justtag)
+            return emit_tagfrom(ctx, dt);
+        return track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)dt));
+    }
+    auto notag = [justtag] (jl_value_t *typ) {
+        // compute if the tag is always a type (not a builtin tag)
+        // based on having no intersection with one of the special types
+        // this doesn't matter if the user just wants the tag value
+        if (justtag)
+            return false;
+        jl_value_t *uw = jl_unwrap_unionall(typ);
+        if (jl_is_datatype(uw)) { // quick path to catch common cases
+            jl_datatype_t *dt = (jl_datatype_t*)uw;
+            assert(!dt->smalltag);
+            if (!dt->name->abstract)
+                return true;
+            if (dt == jl_any_type)
+                return false;
         }
-        return mark_julia_type(ctx, emit_typeof(ctx, p.V, maybenull), true, jl_datatype_type);
-    }
+        if (jl_has_intersect_type_not_kind(typ))
+            return false;
+        for (size_t i = 0; i < jl_tags_count; i++) {
+            jl_datatype_t *dt = jl_small_typeof[(i << 4) / sizeof(*jl_small_typeof)];
+            if (dt && !jl_has_empty_intersection((jl_value_t*)dt, typ))
+                return false;
+        }
+        return true;
+    };
+    if (p.isboxed)
+        return emit_typeof(ctx, p.V, maybenull, justtag, notag(p.typ));
     if (p.TIndex) {
         Value *tindex = ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
         bool allunboxed = is_uniontype_allunboxed(p.typ);
-        Value *datatype_or_p = ctx.emission_context.imaging ? Constant::getNullValue(ctx.types().T_ppjlvalue) : Constant::getNullValue(ctx.types().T_prjlvalue);
+        Type *expr_type = justtag ? ctx.types().T_size : ctx.types().T_pjlvalue;
+        Value *datatype_or_p = Constant::getNullValue(expr_type->getPointerTo());
         unsigned counter = 0;
         for_each_uniontype_small(
             [&](unsigned idx, jl_datatype_t *jt) {
                 Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx));
-                Value *ptr;
-                if (ctx.emission_context.imaging) {
-                    ptr = literal_pointer_val_slot(ctx, (jl_value_t*)jt);
+                Constant *ptr;
+                if (justtag && jt->smalltag) {
+                    ptr = get_pointer_to_constant(ctx.emission_context, ConstantInt::get(expr_type, jt->smalltag << 4), Align(sizeof(jl_value_t*)), StringRef("_j_smalltag_") + jl_symbol_name(jt->name->name), *jl_Module);
                 }
                 else {
-                    ptr = track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jt));
+                    ptr = ConstantExpr::getBitCast(literal_pointer_val_slot(ctx, (jl_value_t*)jt), datatype_or_p->getType());
                 }
                 datatype_or_p = ctx.builder.CreateSelect(cmp, ptr, datatype_or_p);
+                setName(ctx.emission_context, datatype_or_p, "typetag_ptr");
             },
             p.typ,
             counter);
         auto emit_unboxty = [&] () -> Value* {
-            if (ctx.emission_context.imaging)
-                return track_pjlvalue(
-                    ctx, tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, datatype_or_p, Align(sizeof(void*)))));
-            return datatype_or_p;
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+            Value *datatype = ai.decorateInst(ctx.builder.CreateAlignedLoad(expr_type, datatype_or_p, Align(sizeof(void*))));
+            setName(ctx.emission_context, datatype, "typetag");
+            return justtag ? datatype : track_pjlvalue(ctx, datatype);
         };
         Value *res;
         if (!allunboxed) {
             Value *isnull = ctx.builder.CreateIsNull(datatype_or_p);
+            setName(ctx.emission_context, isnull, "typetag_isnull");
             BasicBlock *boxBB = BasicBlock::Create(ctx.builder.getContext(), "boxed", ctx.f);
             BasicBlock *unboxBB = BasicBlock::Create(ctx.builder.getContext(), "unboxed", ctx.f);
             BasicBlock *mergeBB = BasicBlock::Create(ctx.builder.getContext(), "merge", ctx.f);
             ctx.builder.CreateCondBr(isnull, boxBB, unboxBB);
             ctx.builder.SetInsertPoint(boxBB);
-            auto boxTy = emit_typeof(ctx, p.Vboxed, maybenull);
+            auto boxTy = emit_typeof(ctx, p.Vboxed, maybenull, justtag, notag(p.typ));
             ctx.builder.CreateBr(mergeBB);
             boxBB = ctx.builder.GetInsertBlock(); // could have changed
             ctx.builder.SetInsertPoint(unboxBB);
@@ -1061,44 +1170,62 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybe
             ctx.builder.CreateBr(mergeBB);
             unboxBB = ctx.builder.GetInsertBlock(); // could have changed
             ctx.builder.SetInsertPoint(mergeBB);
-            auto phi = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
+            auto phi = ctx.builder.CreatePHI(boxTy->getType(), 2);
             phi->addIncoming(boxTy, boxBB);
             phi->addIncoming(unboxTy, unboxBB);
             res = phi;
+            setName(ctx.emission_context, res, "typetag");
         }
         else {
             res = emit_unboxty();
         }
-        return mark_julia_type(ctx, res, true, jl_datatype_type);
+        return res;
     }
-    return mark_julia_const(ctx, p.typ);
-}
-
-// Returns ctx.types().T_prjlvalue
-static Value *emit_typeof_boxed(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull)
-{
-    return boxed(ctx, emit_typeof(ctx, p, maybenull));
+    assert(0 && "what is this struct"); abort();
 }
 
 static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt)
 {
     Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppjlvalue);
-    Value *Idx = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_datatype_t, types) / sizeof(void*));
-    return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(
+    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, types) / sizeof(void*));
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    auto types = ai.decorateInst(ctx.builder.CreateAlignedLoad(
                 ctx.types().T_pjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, Ptr, Idx), Align(sizeof(void*))));
+    setName(ctx.emission_context, types, "datatype_types");
+    return types;
 }
 
 static Value *emit_datatype_nfields(jl_codectx_t &ctx, Value *dt)
 {
-    Value *type_svec = emit_bitcast(ctx, emit_datatype_types(ctx, dt), getSizePtrTy(ctx.builder.getContext()));
-    return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), type_svec, Align(sizeof(void*))));
-}
-
-static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt)
-{
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), getInt32PtrTy(ctx.builder.getContext()));
-    Value *Idx = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_datatype_t, size) / sizeof(int));
-    return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), Ptr, Idx), Align(sizeof(int32_t))));
+    Value *type_svec = emit_bitcast(ctx, emit_datatype_types(ctx, dt), ctx.types().T_size->getPointerTo());
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    auto nfields = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, type_svec, Align(sizeof(void*))));
+    setName(ctx.emission_context, nfields, "datatype_nfields");
+    return nfields;
+}
+
+// emit the size field from the layout of a dt
+static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt, bool add_isunion=false)
+{
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), getInt32PtrTy(ctx.builder.getContext())->getPointerTo());
+    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, layout) / sizeof(int32_t*));
+    Ptr = ctx.builder.CreateInBoundsGEP(getInt32PtrTy(ctx.builder.getContext()), Ptr, Idx);
+    Ptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32PtrTy(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t*))));
+    Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, size) / sizeof(int32_t));
+    Value *SizePtr = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), Ptr, Idx);
+    Value *Size = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), SizePtr, Align(sizeof(int32_t))));
+    setName(ctx.emission_context, Size, "datatype_size");
+    if (add_isunion) {
+        Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, flags) / sizeof(int16_t));
+        Value *FlagPtr = ctx.builder.CreateInBoundsGEP(getInt16Ty(ctx.builder.getContext()), emit_bitcast(ctx, Ptr, getInt16PtrTy(ctx.builder.getContext())), Idx);
+        Value *Flag = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), FlagPtr, Align(sizeof(int16_t))));
+        Flag = ctx.builder.CreateLShr(Flag, 4);
+        Flag = ctx.builder.CreateAnd(Flag, ConstantInt::get(Flag->getType(), 1));
+        Flag = ctx.builder.CreateZExt(Flag, Size->getType());
+        Size = ctx.builder.CreateAdd(Size, Flag);
+    }
+    return Size;
 }
 
 /* this is valid code, it's simply unused
@@ -1120,11 +1247,11 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
             BasicBlock *dynloadBB = BasicBlock::Create(ctx.builder.getContext(), "dyn_sizeof", ctx.f);
             BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_sizeof", ctx.f);
             Value *isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             ctx.builder.CreateCondBr(isboxed, dynloadBB, postBB);
             ctx.builder.SetInsertPoint(dynloadBB);
-            Value *datatype = emit_typeof(p.V);
+            Value *datatype = emit_typeof(ctx, p.V, false, false);
             Value *dyn_size = emit_datatype_size(ctx, datatype);
             ctx.builder.CreateBr(postBB);
             dynloadBB = ctx.builder.GetInsertBlock(); // could have changed
@@ -1144,38 +1271,50 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
         return ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(p.typ));
     }
     else {
-        Value *datatype = emit_typeof_boxed(ctx, p);
+        Value *datatype = emit_typeof(ctx, p, false, false);
         Value *dyn_size = emit_datatype_size(ctx, datatype);
         return dyn_size;
     }
 }
-*/
 
 static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt)
 {
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppint8);
-    Value *Idx = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_datatype_t, name));
-    Value *Nam = tbaa_decorate(ctx.tbaa().tbaa_const,
+    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, name));
+    Value *Nam = ai.decorateInst(
             ctx.builder.CreateAlignedLoad(getInt8PtrTy(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt8PtrTy(ctx.builder.getContext()), Ptr, Idx), Align(sizeof(int8_t*))));
-    Value *Idx2 = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_typename_t, n_uninitialized) + sizeof(((jl_typename_t*)nullptr)->n_uninitialized));
-    Value *mutabl = tbaa_decorate(ctx.tbaa().tbaa_const,
+    Value *Idx2 = ConstantInt::get(ctx.types().T_size, offsetof(jl_typename_t, n_uninitialized) + sizeof(((jl_typename_t*)nullptr)->n_uninitialized));
+    Value *mutabl = ai.decorateInst(
             ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), Nam, Idx2), Align(1)));
     mutabl = ctx.builder.CreateLShr(mutabl, 1);
     return ctx.builder.CreateTrunc(mutabl, getInt1Ty(ctx.builder.getContext()));
 }
+*/
 
-static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *dt)
+static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ)
 {
-    Value *immut = ctx.builder.CreateNot(emit_datatype_mutabl(ctx, dt));
-    Value *nofields = ctx.builder.CreateICmpEQ(emit_datatype_nfields(ctx, dt), Constant::getNullValue(getSizeTy(ctx.builder.getContext())));
-    Value *sized = ctx.builder.CreateICmpSGT(emit_datatype_size(ctx, dt), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
-    return ctx.builder.CreateAnd(immut, ctx.builder.CreateAnd(nofields, sized));
+    Value *isprimitive;
+    isprimitive = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, decay_derived(ctx, typ), getInt8PtrTy(ctx.builder.getContext())), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    isprimitive = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isprimitive, Align(1)));
+    isprimitive = ctx.builder.CreateLShr(isprimitive, 7);
+    isprimitive = ctx.builder.CreateTrunc(isprimitive, getInt1Ty(ctx.builder.getContext()));
+    setName(ctx.emission_context, isprimitive, "datatype_isprimitive");
+    return isprimitive;
 }
 
 static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
 {
-    Value *vptr = emit_nthptr_addr(ctx, dt, (ssize_t)(offsetof(jl_datatype_t, name) / sizeof(char*)));
-    return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, vptr, Align(sizeof(void*))));
+    unsigned n = offsetof(jl_datatype_t, name) / sizeof(char*);
+    Value *vptr = ctx.builder.CreateInBoundsGEP(
+            ctx.types().T_pjlvalue,
+            emit_bitcast(ctx, maybe_decay_tracked(ctx, dt), ctx.types().T_ppjlvalue),
+            ConstantInt::get(ctx.types().T_size, n));
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    auto name = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, vptr, Align(sizeof(void*))));
+    setName(ctx.emission_context, name, "datatype_name");
+    return name;
 }
 
 // --- generating various error checks ---
@@ -1183,13 +1322,13 @@ static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
 // the error is always thrown. This may cause non dominated use
 // of SSA value error in the verifier.
 
-static void just_emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
+static void just_emit_error(jl_codectx_t &ctx, Function *F, const Twine &txt)
 {
     ++EmittedErrors;
     ctx.builder.CreateCall(F, stringConstPtr(ctx.emission_context, ctx.builder, txt));
 }
 
-static void emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
+static void emit_error(jl_codectx_t &ctx, Function *F, const Twine &txt)
 {
     just_emit_error(ctx, F, txt);
     ctx.builder.CreateUnreachable();
@@ -1197,13 +1336,13 @@ static void emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
     ctx.builder.SetInsertPoint(cont);
 }
 
-static void emit_error(jl_codectx_t &ctx, const std::string &txt)
+static void emit_error(jl_codectx_t &ctx, const Twine &txt)
 {
     emit_error(ctx, prepare_call(jlerror_func), txt);
 }
 
 // DO NOT PASS IN A CONST CONDITION!
-static void error_unless(jl_codectx_t &ctx, Value *cond, const std::string &msg)
+static void error_unless(jl_codectx_t &ctx, Value *cond, const Twine &msg)
 {
     ++EmittedConditionalErrors;
     BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
@@ -1212,7 +1351,7 @@ static void error_unless(jl_codectx_t &ctx, Value *cond, const std::string &msg)
     ctx.builder.SetInsertPoint(failBB);
     just_emit_error(ctx, prepare_call(jlerror_func), msg);
     ctx.builder.CreateUnreachable();
-    ctx.f->getBasicBlockList().push_back(passBB);
+    passBB->insertInto(ctx.f);
     ctx.builder.SetInsertPoint(passBB);
 }
 
@@ -1226,7 +1365,7 @@ static void raise_exception(jl_codectx_t &ctx, Value *exc,
         contBB = BasicBlock::Create(ctx.builder.getContext(), "after_throw", ctx.f);
     }
     else {
-        ctx.f->getBasicBlockList().push_back(contBB);
+        contBB->insertInto(ctx.f);
     }
     ctx.builder.SetInsertPoint(contBB);
 }
@@ -1264,6 +1403,9 @@ static void null_pointer_check(jl_codectx_t &ctx, Value *v, Value **nullcheck =
 template<typename Func>
 static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, Func &&func)
 {
+    if (!ifnot) {
+        return func();
+    }
     if (auto Cond = dyn_cast<ConstantInt>(ifnot)) {
         if (Cond->isZero())
             return defval;
@@ -1284,6 +1426,7 @@ static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval,
     PHINode *phi = ctx.builder.CreatePHI(defval->getType(), 2);
     phi->addIncoming(defval, currBB);
     phi->addIncoming(res, passBB);
+    setName(ctx.emission_context, phi, "guard_res");
     return phi;
 }
 
@@ -1320,29 +1463,56 @@ static Value *emit_nullcheck_guard2(jl_codectx_t &ctx, Value *nullcheck1,
 
 // Returns typeof(v), or null if v is a null pointer at run time and maybenull is true.
 // This is used when the value might have come from an undefined value (a PhiNode),
-// yet we try to read its type to compute a union index when moving the value (a PiNode).
+// yet jl_max_tags try to read its type to compute a union index when moving the value (a PiNode).
 // Returns a ctx.types().T_prjlvalue typed Value
-static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull)
+static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool justtag, bool notag)
 {
     ++EmittedTypeof;
     assert(v != NULL && !isa<AllocaInst>(v) && "expected a conditionally boxed value");
+    Value *nonnull = maybenull ? null_pointer_cmp(ctx, v) : ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
     Function *typeof = prepare_call(jl_typeof_func);
-    if (maybenull)
-        return emit_guarded_test(ctx, null_pointer_cmp(ctx, v), Constant::getNullValue(typeof->getReturnType()), [&] {
-            // e.g. emit_typeof(ctx, v)
-            return ctx.builder.CreateCall(typeof, {v});
+    auto val = emit_guarded_test(ctx, nonnull, Constant::getNullValue(justtag ? ctx.types().T_size : typeof->getReturnType()), [&] {
+        // e.g. emit_typeof(ctx, v)
+        Value *typetag = ctx.builder.CreateCall(typeof, {v});
+        if (notag)
+            return typetag;
+        Value *tag = ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, typetag), ctx.types().T_size);
+        if (justtag)
+            return tag;
+        auto issmall = ctx.builder.CreateICmpULT(tag, ConstantInt::get(tag->getType(), (uintptr_t)jl_max_tags << 4));
+        return emit_guarded_test(ctx, issmall, typetag, [&] {
+            // we lied a bit: this wasn't really an object (though it was valid for GC rooting)
+            // and we need to use it as an index to get the real object now
+            Module *M = jl_Module;
+            Value *smallp = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), prepare_global_in(M, jl_small_typeof_var), tag);
+            smallp = ctx.builder.CreateBitCast(smallp, typetag->getType()->getPointerTo(0));
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+            auto small = ctx.builder.CreateAlignedLoad(typetag->getType(), smallp, M->getDataLayout().getPointerABIAlignment(0));
+            small->setMetadata(LLVMContext::MD_nonnull, MDNode::get(M->getContext(), None));
+            return ai.decorateInst(small);
         });
-    return ctx.builder.CreateCall(typeof, {v});
+    });
+    setName(ctx.emission_context, val, "typeof");
+    return val;
 }
 
+static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v,  bool is_promotable=false);
 
-static void emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const std::string &msg)
+static void just_emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const Twine &msg)
 {
     Value *msg_val = stringConstPtr(ctx.emission_context, ctx.builder, msg);
     ctx.builder.CreateCall(prepare_call(jltypeerror_func),
                        { msg_val, maybe_decay_untracked(ctx, type), mark_callee_rooted(ctx, boxed(ctx, x))});
 }
 
+static void emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const Twine &msg)
+{
+    just_emit_type_error(ctx, x, type, msg);
+    ctx.builder.CreateUnreachable();
+    BasicBlock *cont = BasicBlock::Create(ctx.builder.getContext(), "after_type_error", ctx.f);
+    ctx.builder.SetInsertPoint(cont);
+}
+
 // Should agree with `emit_isa` below
 static bool _can_optimize_isa(jl_value_t *type, int &counter)
 {
@@ -1353,6 +1523,8 @@ static bool _can_optimize_isa(jl_value_t *type, int &counter)
         return (_can_optimize_isa(((jl_uniontype_t*)type)->a, counter) &&
                 _can_optimize_isa(((jl_uniontype_t*)type)->b, counter));
     }
+    if (type == (jl_value_t*)jl_type_type)
+        return true;
     if (jl_is_type_type(type) && jl_pointer_egal(type))
         return true;
     if (jl_has_intersect_type_not_kind(type))
@@ -1372,16 +1544,59 @@ static bool can_optimize_isa_union(jl_uniontype_t *type)
 }
 
 // a simple case of emit_isa that is obvious not to include a safe-point
-static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_value_t *dt)
-{
-    assert(jl_is_concrete_type(dt));
-    return ctx.builder.CreateICmpEQ(
-            emit_typeof_boxed(ctx, arg),
-            track_pjlvalue(ctx, literal_pointer_val(ctx, dt)));
+static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_datatype_t *dt, bool could_be_null=false)
+{
+    assert(jl_is_concrete_type((jl_value_t*)dt));
+    if (arg.TIndex) {
+        unsigned tindex = get_box_tindex(dt, arg.typ);
+        if (tindex > 0) {
+            // optimize more when we know that this is a split union-type where tindex = 0 is invalid
+            Value *xtindex = ctx.builder.CreateAnd(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), ~UNION_BOX_MARKER));
+            auto isa = ctx.builder.CreateICmpEQ(xtindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex));
+            setName(ctx.emission_context, isa, "exactly_isa");
+            return isa;
+        }
+        else if (arg.Vboxed) {
+            // test for (arg.TIndex == UNION_BOX_MARKER && typeof(arg.V) == type)
+            Value *isboxed = ctx.builder.CreateICmpEQ(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
+            if (could_be_null) {
+                isboxed = ctx.builder.CreateAnd(isboxed,
+                    ctx.builder.CreateNot(null_pointer_cmp(ctx, arg.Vboxed)));
+            }
+            setName(ctx.emission_context, isboxed, "isboxed");
+            BasicBlock *currBB = ctx.builder.GetInsertBlock();
+            BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f);
+            BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_isa", ctx.f);
+            ctx.builder.CreateCondBr(isboxed, isaBB, postBB);
+            ctx.builder.SetInsertPoint(isaBB);
+            Value *istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg.Vboxed, false, true), emit_tagfrom(ctx, dt));
+            ctx.builder.CreateBr(postBB);
+            isaBB = ctx.builder.GetInsertBlock(); // could have changed
+            ctx.builder.SetInsertPoint(postBB);
+            PHINode *istype = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2);
+            istype->addIncoming(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), currBB);
+            istype->addIncoming(istype_boxed, isaBB);
+            setName(ctx.emission_context, istype, "exactly_isa");
+            return istype;
+        } else {
+            // handle the case where we know that `arg` is unboxed (but of unknown type), but that concrete type `type` cannot be unboxed
+            return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
+        }
+    }
+    Value *isnull = NULL;
+    if (could_be_null && arg.isboxed) {
+        isnull = null_pointer_cmp(ctx, arg.Vboxed);
+    }
+    Constant *Vfalse = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
+    return emit_guarded_test(ctx, isnull, Vfalse, [&]{
+        auto isa = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg, false, true), emit_tagfrom(ctx, dt));
+        setName(ctx.emission_context, isa, "exactly_isa");
+        return isa;
+    });
 }
 
 static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
-                                        jl_value_t *type, const std::string *msg);
+                                        jl_value_t *type, const Twine &msg);
 
 static void emit_isa_union(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type,
                            SmallVectorImpl<std::pair<std::pair<BasicBlock*,BasicBlock*>,Value*>> &bbs)
@@ -1393,7 +1608,7 @@ static void emit_isa_union(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
         return;
     }
     BasicBlock *enter = ctx.builder.GetInsertBlock();
-    Value *v = emit_isa(ctx, x, type, nullptr).first;
+    Value *v = emit_isa(ctx, x, type, Twine()).first;
     BasicBlock *exit = ctx.builder.GetInsertBlock();
     bbs.emplace_back(std::make_pair(enter, exit), v);
     BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f);
@@ -1401,7 +1616,7 @@ static void emit_isa_union(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
 }
 
 // Should agree with `_can_optimize_isa` above
-static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const std::string *msg)
+static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const Twine &msg)
 {
     ++EmittedIsa;
     // TODO: The subtype check below suffers from incorrectness issues due to broken
@@ -1421,11 +1636,8 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
             known_isa = false;
     }
     if (known_isa) {
-        if (!*known_isa && msg) {
-            emit_type_error(ctx, x, literal_pointer_val(ctx, type), *msg);
-            ctx.builder.CreateUnreachable();
-            BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
-            ctx.builder.SetInsertPoint(failBB);
+        if (!*known_isa && !msg.isTriviallyEmpty()) {
+            emit_type_error(ctx, x, literal_pointer_val(ctx, type), msg);
         }
         return std::make_pair(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), *known_isa), true);
     }
@@ -1436,11 +1648,28 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
         auto ptr = track_pjlvalue(ctx, literal_pointer_val(ctx, jl_tparam0(intersected_type)));
         return {ctx.builder.CreateICmpEQ(boxed(ctx, x), ptr), false};
     }
+    if (intersected_type == (jl_value_t*)jl_type_type) {
+        // Inline jl_is_kind(jl_typeof(x))
+        // N.B. We do the comparison with untracked pointers, because that gives
+        // LLVM more optimization opportunities. That means it is possible for
+        // `typ` to get GC'ed, but we don't actually care, because we don't ever
+        // dereference it.
+        Value *typ = emit_typeof(ctx, x, false, true);
+        auto val = ctx.builder.CreateOr(
+            ctx.builder.CreateOr(
+                ctx.builder.CreateICmpEQ(typ, emit_tagfrom(ctx, jl_uniontype_type)),
+                ctx.builder.CreateICmpEQ(typ, emit_tagfrom(ctx, jl_datatype_type))),
+            ctx.builder.CreateOr(
+                ctx.builder.CreateICmpEQ(typ, emit_tagfrom(ctx, jl_unionall_type)),
+                ctx.builder.CreateICmpEQ(typ, emit_tagfrom(ctx, jl_typeofbottom_type))));
+        setName(ctx.emission_context, val, "is_kind");
+        return std::make_pair(val, false);
+    }
     // intersection with Type needs to be handled specially
     if (jl_has_intersect_type_not_kind(type) || jl_has_intersect_type_not_kind(intersected_type)) {
         Value *vx = boxed(ctx, x);
         Value *vtyp = track_pjlvalue(ctx, literal_pointer_val(ctx, type));
-        if (msg && *msg == "typeassert") {
+        if (msg.isSingleStringRef() && msg.getSingleStringRef() == "typeassert") {
             ctx.builder.CreateCall(prepare_call(jltypeassert_func), { vx, vtyp });
             return std::make_pair(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1), true);
         }
@@ -1450,36 +1679,7 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
     }
     // tests for isa concretetype can be handled with pointer comparisons
     if (jl_is_concrete_type(intersected_type)) {
-        if (x.TIndex) {
-            unsigned tindex = get_box_tindex((jl_datatype_t*)intersected_type, x.typ);
-            if (tindex > 0) {
-                // optimize more when we know that this is a split union-type where tindex = 0 is invalid
-                Value *xtindex = ctx.builder.CreateAnd(x.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
-                return std::make_pair(ctx.builder.CreateICmpEQ(xtindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex)), false);
-            }
-            else if (x.Vboxed) {
-                // test for (x.TIndex == 0x80 && typeof(x.V) == type)
-                Value *isboxed = ctx.builder.CreateICmpEQ(x.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
-                BasicBlock *currBB = ctx.builder.GetInsertBlock();
-                BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f);
-                BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_isa", ctx.f);
-                ctx.builder.CreateCondBr(isboxed, isaBB, postBB);
-                ctx.builder.SetInsertPoint(isaBB);
-                Value *istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, x.Vboxed, false),
-                    track_pjlvalue(ctx, literal_pointer_val(ctx, intersected_type)));
-                ctx.builder.CreateBr(postBB);
-                isaBB = ctx.builder.GetInsertBlock(); // could have changed
-                ctx.builder.SetInsertPoint(postBB);
-                PHINode *istype = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2);
-                istype->addIncoming(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), currBB);
-                istype->addIncoming(istype_boxed, isaBB);
-                return std::make_pair(istype, false);
-            } else {
-                // handle the case where we know that `x` is unboxed (but of unknown type), but that concrete type `type` cannot be unboxed
-                return std::make_pair(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), false);
-            }
-        }
-        return std::make_pair(emit_exactly_isa(ctx, x, intersected_type), false);
+        return std::make_pair(emit_exactly_isa(ctx, x, (jl_datatype_t*)intersected_type), false);
     }
     jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(intersected_type);
     if (jl_is_datatype(dt) && !dt->name->abstract && jl_subtype(dt->name->wrapper, type)) {
@@ -1487,8 +1687,8 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
         // so the isa test reduces to a comparison of the typename by pointer
         return std::make_pair(
                 ctx.builder.CreateICmpEQ(
-                    mark_callee_rooted(ctx, emit_datatype_name(ctx, emit_typeof_boxed(ctx, x))),
-                    mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)dt->name))),
+                    emit_datatype_name(ctx, emit_typeof(ctx, x, false, false)),
+                    literal_pointer_val(ctx, (jl_value_t*)dt->name)),
                 false);
     }
     if (jl_is_uniontype(intersected_type) &&
@@ -1516,7 +1716,7 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
     // everything else can be handled via subtype tests
     return std::make_pair(ctx.builder.CreateICmpNE(
             ctx.builder.CreateCall(prepare_call(jlsubtype_func),
-              { emit_typeof_boxed(ctx, x),
+              { emit_typeof(ctx, x, false, false),
                 track_pjlvalue(ctx, literal_pointer_val(ctx, type)) }),
             ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)), false);
 }
@@ -1530,16 +1730,16 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
 static Value *emit_isa_and_defined(jl_codectx_t &ctx, const jl_cgval_t &val, jl_value_t *typ)
 {
     return emit_nullcheck_guard(ctx, val.ispointer() ? val.V : nullptr, [&] {
-        return emit_isa(ctx, val, typ, nullptr).first;
+        return emit_isa(ctx, val, typ, Twine()).first;
     });
 }
 
 
-static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const std::string &msg)
+static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const Twine &msg)
 {
     Value *istype;
     bool handled_msg;
-    std::tie(istype, handled_msg) = emit_isa(ctx, x, type, &msg);
+    std::tie(istype, handled_msg) = emit_isa(ctx, x, type, msg);
     if (!handled_msg) {
         ++EmittedTypechecks;
         BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
@@ -1547,10 +1747,10 @@ static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
         ctx.builder.CreateCondBr(istype, passBB, failBB);
         ctx.builder.SetInsertPoint(failBB);
 
-        emit_type_error(ctx, x, literal_pointer_val(ctx, type), msg);
+        just_emit_type_error(ctx, x, literal_pointer_val(ctx, type), msg);
         ctx.builder.CreateUnreachable();
 
-        ctx.f->getBasicBlockList().push_back(passBB);
+        passBB->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(passBB);
     }
 }
@@ -1559,13 +1759,15 @@ static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ)
 {
     Value *isconcrete;
     isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, decay_derived(ctx, typ), getInt8PtrTy(ctx.builder.getContext())), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
-    isconcrete = tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isconcrete, Align(1)));
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    isconcrete = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isconcrete, Align(1)));
     isconcrete = ctx.builder.CreateLShr(isconcrete, 1);
     isconcrete = ctx.builder.CreateTrunc(isconcrete, getInt1Ty(ctx.builder.getContext()));
+    setName(ctx.emission_context, isconcrete, "isconcrete");
     return isconcrete;
 }
 
-static void emit_concretecheck(jl_codectx_t &ctx, Value *typ, const std::string &msg)
+static void emit_concretecheck(jl_codectx_t &ctx, Value *typ, const Twine &msg)
 {
     ++EmittedConcretechecks;
     assert(typ->getType() == ctx.types().T_prjlvalue);
@@ -1590,11 +1792,11 @@ static bool bounds_check_enabled(jl_codectx_t &ctx, jl_value_t *inbounds) {
 
 static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_value_t *ty, Value *i, Value *len, jl_value_t *boundscheck)
 {
-    Value *im1 = ctx.builder.CreateSub(i, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
-#if CHECK_BOUNDS==1
+    Value *im1 = ctx.builder.CreateSub(i, ConstantInt::get(ctx.types().T_size, 1));
     if (bounds_check_enabled(ctx, boundscheck)) {
         ++EmittedBoundschecks;
         Value *ok = ctx.builder.CreateICmpULT(im1, len);
+        setName(ctx.emission_context, ok, "boundscheck");
         BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
         BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass");
         ctx.builder.CreateCondBr(ok, passBB, failBB);
@@ -1613,6 +1815,7 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
             else if (!ainfo.ispointer()) {
                 // CreateAlloca is OK here since we are on an error branch
                 Value *tempSpace = ctx.builder.CreateAlloca(a->getType());
+                setName(ctx.emission_context, tempSpace, "errorbox");
                 ctx.builder.CreateStore(a, tempSpace);
                 a = tempSpace;
             }
@@ -1622,21 +1825,26 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
                     i });
         }
         ctx.builder.CreateUnreachable();
-        ctx.f->getBasicBlockList().push_back(passBB);
+        passBB->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(passBB);
     }
-#endif
     return im1;
 }
 
-static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt);
-static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value* dest, MDNode *tbaa_dest, unsigned alignment, bool isVolatile=false);
+static Value *CreateSimplifiedExtractValue(jl_codectx_t &ctx, Value *Agg, ArrayRef<unsigned> Idxs)
+{
+    // aka IRBuilder<InstSimplifyFolder>
+    SimplifyQuery SQ(jl_Module->getDataLayout()); // not actually used, but required by API
+    if (Value *Inst = simplifyExtractValueInst(Agg, Idxs, SQ))
+        return Inst;
+    return ctx.builder.CreateExtractValue(Agg, Idxs);
+}
 
 static void emit_write_barrier(jl_codectx_t&, Value*, ArrayRef<Value*>);
 static void emit_write_barrier(jl_codectx_t&, Value*, Value*);
 static void emit_write_multibarrier(jl_codectx_t&, Value*, Value*, jl_value_t*);
 
-std::vector<unsigned> first_ptr(Type *T)
+SmallVector<unsigned, 0> first_ptr(Type *T)
 {
     if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
         if (!isa<StructType>(T)) {
@@ -1645,12 +1853,8 @@ std::vector<unsigned> first_ptr(Type *T)
                 num_elements = AT->getNumElements();
             else {
                 VectorType *VT = cast<VectorType>(T);
-#if JL_LLVM_VERSION >= 120000
                 ElementCount EC = VT->getElementCount();
                 num_elements = EC.getKnownMinValue();
-#else
-                num_elements = VT->getNumElements();
-#endif
             }
             if (num_elements == 0)
                 return {};
@@ -1658,7 +1862,7 @@ std::vector<unsigned> first_ptr(Type *T)
         unsigned i = 0;
         for (Type *ElTy : T->subtypes()) {
             if (isa<PointerType>(ElTy) && ElTy->getPointerAddressSpace() == AddressSpace::Tracked) {
-                return std::vector<unsigned>{i};
+                return SmallVector<unsigned, 0>{i};
             }
             auto path = first_ptr(ElTy);
             if (!path.empty()) {
@@ -1708,12 +1912,15 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
     // note that nb == jl_Module->getDataLayout().getTypeAllocSize(elty) or getTypeStoreSize, depending on whether it is a struct or primitive type
     AllocaInst *intcast = NULL;
     if (Order == AtomicOrdering::NotAtomic) {
-        if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count)
+        if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count) {
             intcast = emit_static_alloca(ctx, elty);
+            setName(ctx.emission_context, intcast, "aggregate_load_box");
+        }
     }
     else {
         if (!isboxed && !elty->isIntOrPtrTy()) {
             intcast = emit_static_alloca(ctx, elty);
+            setName(ctx.emission_context, intcast, "atomic_load_box");
             elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
         }
     }
@@ -1737,18 +1944,37 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
     else if (!alignment)
         alignment = julia_alignment(jltype);
     if (intcast && Order == AtomicOrdering::NotAtomic) {
-        emit_memcpy(ctx, intcast, ctx.tbaa().tbaa_stack, data, tbaa, nb, alignment);
+        emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, alignment, intcast->getAlign().value());
     }
     else {
-        LoadInst *load = ctx.builder.CreateAlignedLoad(elty, data, Align(alignment), false);
-        load->setOrdering(Order);
-        if (aliasscope)
-            load->setMetadata("alias.scope", aliasscope);
-        if (isboxed)
-            maybe_mark_load_dereferenceable(load, true, jltype);
-        if (tbaa)
-            tbaa_decorate(tbaa, load);
-        instr = load;
+        if (!isboxed && jl_is_genericmemoryref_type(jltype)) {
+            // load these FCA as individual fields, so LLVM does not need to split them later
+            Value *fld0 = ctx.builder.CreateStructGEP(elty, data, 0);
+            LoadInst *load0 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(0), fld0, Align(alignment), false);
+            load0->setOrdering(Order);
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+            ai.scope = MDNode::concatenate(aliasscope, ai.scope);
+            ai.decorateInst(load0);
+            Value *fld1 = ctx.builder.CreateStructGEP(elty, data, 1);
+            LoadInst *load1 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(1), fld1, Align(alignment), false);
+            static_assert(offsetof(jl_genericmemoryref_t, ptr_or_offset) == 0, "wrong field order");
+            maybe_mark_load_dereferenceable(load1, true, sizeof(void*)*2, alignof(void*));
+            load1->setOrdering(Order);
+            ai.decorateInst(load1);
+            instr = Constant::getNullValue(elty);
+            instr = ctx.builder.CreateInsertValue(instr, load0, 0);
+            instr = ctx.builder.CreateInsertValue(instr, load1, 1);
+        }
+        else {
+            LoadInst *load = ctx.builder.CreateAlignedLoad(elty, data, Align(alignment), false);
+            load->setOrdering(Order);
+            if (isboxed)
+                maybe_mark_load_dereferenceable(load, true, jltype);
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+            ai.scope = MDNode::concatenate(aliasscope, ai.scope);
+            ai.decorateInst(load);
+            instr = load;
+        }
         if (elty != realelty)
             instr = ctx.builder.CreateTrunc(instr, realelty);
         if (intcast) {
@@ -1786,7 +2012,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         Value *parent,  // for the write barrier, NULL if no barrier needed
         bool isboxed, AtomicOrdering Order, AtomicOrdering FailOrder, unsigned alignment,
         bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
-        bool maybe_null_if_boxed, const jl_cgval_t *modifyop, const std::string &fname)
+        bool maybe_null_if_boxed, const jl_cgval_t *modifyop, const Twine &fname)
 {
     auto newval = [&](const jl_cgval_t &lhs) {
         const jl_cgval_t argv[3] = { cmp, lhs, rhs };
@@ -1830,8 +2056,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
     unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype);
     AllocaInst *intcast = nullptr;
     if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
-        if (!issetfield)
+        if (!issetfield) {
             intcast = emit_static_alloca(ctx, elty);
+            setName(ctx.emission_context, intcast, "atomic_store_box");
+        }
         elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
     }
     Type *realelty = elty;
@@ -1869,42 +2097,34 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
     if (issetfield || (Order == AtomicOrdering::NotAtomic && isswapfield)) {
         if (isswapfield) {
             auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+            setName(ctx.emission_context, load, "swapfield_load");
             if (isboxed)
                 load->setOrdering(AtomicOrdering::Unordered);
-            if (aliasscope)
-                load->setMetadata("noalias", aliasscope);
-            if (tbaa)
-                tbaa_decorate(tbaa, load);
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+            ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
+            ai.decorateInst(load);
             assert(realelty == elty);
             instr = load;
         }
         if (r) {
             StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
             store->setOrdering(Order == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Release : Order);
-            if (aliasscope)
-                store->setMetadata("noalias", aliasscope);
-            if (tbaa)
-                tbaa_decorate(tbaa, store);
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+            ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
+            ai.decorateInst(store);
         }
         else {
             assert(Order == AtomicOrdering::NotAtomic && !isboxed && rhs.typ == jltype);
             emit_unbox_store(ctx, rhs, ptr, tbaa, alignment);
         }
     }
-    else if (isswapfield && !isboxed) {
+    else if (isswapfield && isStrongerThanMonotonic(Order)) {
         assert(Order != AtomicOrdering::NotAtomic && r);
-        // we can't handle isboxed here as a workaround for really bad LLVM
-        // design issue: plain Xchg only works with integers
-#if JL_LLVM_VERSION >= 130000
         auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order);
-#else
-        auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Order);
-        store->setAlignment(Align(alignment));
-#endif
-        if (aliasscope)
-            store->setMetadata("noalias", aliasscope);
-        if (tbaa)
-            tbaa_decorate(tbaa, store);
+        setName(ctx.emission_context, store, "swapfield_atomicrmw");
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
+        ai.decorateInst(store);
         instr = store;
     }
     else {
@@ -1918,20 +2138,19 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             }
             else if (!isboxed) {
                 assert(jl_is_concrete_type(jltype));
-                needloop = ((jl_datatype_t*)jltype)->layout->haspadding;
-                Value *SameType = emit_isa(ctx, cmp, jltype, nullptr).first;
+                needloop = ((jl_datatype_t*)jltype)->layout->flags.haspadding;
+                Value *SameType = emit_isa(ctx, cmp, jltype, Twine()).first;
                 if (SameType != ConstantInt::getTrue(ctx.builder.getContext())) {
                     BasicBlock *SkipBB = BasicBlock::Create(ctx.builder.getContext(), "skip_xchg", ctx.f);
                     BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "ok_xchg", ctx.f);
                     ctx.builder.CreateCondBr(SameType, BB, SkipBB);
                     ctx.builder.SetInsertPoint(SkipBB);
                     LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+                    setName(ctx.emission_context, load, "atomic_replacefield_initial");
                     load->setOrdering(FailOrder == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Monotonic : FailOrder);
-                    if (aliasscope)
-                        load->setMetadata("noalias", aliasscope);
-                    if (tbaa)
-                        tbaa_decorate(tbaa, load);
-                    instr = load;
+                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+                    ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
+                    instr = ai.decorateInst(load);
                     ctx.builder.CreateBr(DoneBB);
                     ctx.builder.SetInsertPoint(DoneBB);
                     Succ = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2);
@@ -1958,11 +2177,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         else { // swap or modify
             LoadInst *Current = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
             Current->setOrdering(Order == AtomicOrdering::NotAtomic && !isboxed ? Order : AtomicOrdering::Monotonic);
-            if (aliasscope)
-                Current->setMetadata("noalias", aliasscope);
-            if (tbaa)
-                tbaa_decorate(tbaa, Current);
-            Compare = Current;
+            setName(ctx.emission_context, Current, "atomic_initial");
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+            ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
+            Compare = ai.decorateInst(Current);
             needloop = !isswapfield || Order != AtomicOrdering::NotAtomic;
         }
         BasicBlock *BB = NULL;
@@ -2014,12 +2232,11 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             // modifyfield or replacefield
             assert(elty == realelty && !intcast);
             auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+            ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
+            ai.decorateInst(load);
             if (isboxed)
                 load->setOrdering(AtomicOrdering::Monotonic);
-            if (aliasscope)
-                load->setMetadata("noalias", aliasscope);
-            if (tbaa)
-                tbaa_decorate(tbaa, load);
             Value *first_ptr = nullptr;
             if (maybe_null_if_boxed && !ismodifyfield)
                 first_ptr = isboxed ? load : extract_first_ptr(ctx, load);
@@ -2035,10 +2252,9 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             ctx.builder.SetInsertPoint(XchgBB);
             if (r) {
                 auto *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
-                if (aliasscope)
-                    store->setMetadata("noalias", aliasscope);
-                if (tbaa)
-                    tbaa_decorate(tbaa, store);
+                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+                ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
+                ai.decorateInst(store);
             }
             else {
                 assert(!isboxed && rhs.typ == jltype);
@@ -2057,16 +2273,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                 FailOrder = AtomicOrdering::Monotonic;
             else if (FailOrder == AtomicOrdering::Unordered)
                 FailOrder = AtomicOrdering::Monotonic;
-#if JL_LLVM_VERSION >= 130000
             auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Align(alignment), Order, FailOrder);
-#else
-            auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Order, FailOrder);
-            store->setAlignment(Align(alignment));
-#endif
-            if (aliasscope)
-                store->setMetadata("noalias", aliasscope);
-            if (tbaa)
-                tbaa_decorate(tbaa, store);
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+            ai.noalias = MDNode::concatenate(aliasscope, ai.noalias);
+            ai.decorateInst(store);
             instr = ctx.builder.Insert(ExtractValueInst::Create(store, 0));
             Success = ctx.builder.Insert(ExtractValueInst::Create(store, 1));
             Done = Success;
@@ -2113,7 +2323,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         emit_lockstate_value(ctx, parent, false);
     if (parent != NULL) {
         if (isreplacefield) {
-            // TOOD: avoid this branch if we aren't making a write barrier
+            // TODO: avoid this branch if we aren't making a write barrier
             BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "xchg_wb", ctx.f);
             DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg_wb", ctx.f);
             ctx.builder.CreateCondBr(Success, BB, DoneBB);
@@ -2170,13 +2380,15 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
 // Returns ctx.types().T_pjlvalue
 static Value *julia_bool(jl_codectx_t &ctx, Value *cond)
 {
-    return ctx.builder.CreateSelect(cond, literal_pointer_val(ctx, jl_true),
+    auto boolean = ctx.builder.CreateSelect(cond, literal_pointer_val(ctx, jl_true),
                                           literal_pointer_val(ctx, jl_false));
+    setName(ctx.emission_context, boolean, "bool");
+    return boolean;
 }
 
 // --- accessing the representations of built-in data types ---
 
-static void emit_atomic_error(jl_codectx_t &ctx, const std::string &msg)
+static void emit_atomic_error(jl_codectx_t &ctx, const Twine &msg)
 {
     emit_error(ctx, prepare_call(jlatomicerror_func), msg);
 }
@@ -2194,7 +2406,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
     size_t nfields = jl_datatype_nfields(stt);
     bool maybe_null = (unsigned)stt->name->n_uninitialized != 0;
     auto idx0 = [&]() {
-        return emit_bounds_check(ctx, strct, (jl_value_t*)stt, idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), nfields), inbounds);
+        return emit_bounds_check(ctx, strct, (jl_value_t*)stt, idx, ConstantInt::get(ctx.types().T_size, nfields), inbounds);
     };
     if (nfields == 0) {
         (void)idx0();
@@ -2250,13 +2462,14 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             assert((cast<ArrayType>(strct.V->getType())->getElementType() == ctx.types().T_prjlvalue) == isboxed);
             Value *idx = idx0();
             unsigned i = 0;
-            Value *fld = ctx.builder.CreateExtractValue(strct.V, makeArrayRef(i));
+            Value *fld = ctx.builder.CreateExtractValue(strct.V, ArrayRef<unsigned>(i));
             for (i = 1; i < nfields; i++) {
                 fld = ctx.builder.CreateSelect(
                         ctx.builder.CreateICmpEQ(idx, ConstantInt::get(idx->getType(), i)),
-                        ctx.builder.CreateExtractValue(strct.V, makeArrayRef(i)),
+                        ctx.builder.CreateExtractValue(strct.V, ArrayRef<unsigned>(i)),
                         fld);
             }
+            setName(ctx.emission_context, fld, "getfield");
             jl_value_t *jft = issame ? jl_svecref(types, 0) : (jl_value_t*)jl_any_type;
             if (isboxed && maybe_null)
                 null_pointer_check(ctx, fld);
@@ -2290,9 +2503,12 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
                     ctx.types().T_prjlvalue,
                     emit_bitcast(ctx, data_pointer(ctx, strct), ctx.types().T_pprjlvalue),
                     idx0());
+            setName(ctx.emission_context, fldptr, "getfield_ptr");
             LoadInst *fld = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fldptr, Align(sizeof(void*)));
+            setName(ctx.emission_context, fld, "getfield");
             fld->setOrdering(AtomicOrdering::Unordered);
-            tbaa_decorate(strct.tbaa, fld);
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, strct.tbaa);
+            ai.decorateInst(fld);
             maybe_mark_load_dereferenceable(fld, maybe_null, minimum_field_size, minimum_align);
             if (maybe_null)
                 null_pointer_check(ctx, fld);
@@ -2317,7 +2533,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             return true;
         }
         else if (strct.isboxed) {
-            idx = ctx.builder.CreateSub(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+            idx = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1));
             Value *fld = ctx.builder.CreateCall(prepare_call(jlgetnthfieldchecked_func), { boxed(ctx, strct), idx });
             *ret = mark_julia_type(ctx, fld, true, jl_any_type);
             return true;
@@ -2331,7 +2547,8 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex,
         unsigned union_max, MDNode *tbaa_ptindex)
 {
     ++EmittedUnionLoads;
-    Instruction *tindex0 = tbaa_decorate(tbaa_ptindex, ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), ptindex, Align(1)));
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_ptindex);
+    Instruction *tindex0 = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), ptindex, Align(1)));
     tindex0->setMetadata(LLVMContext::MD_range, MDNode::get(ctx.builder.getContext(), {
         ConstantAsMetadata::get(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
         ConstantAsMetadata::get(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), union_max)) }));
@@ -2340,20 +2557,48 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex,
         // move value to an immutable stack slot (excluding tindex)
         Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (fsz + al - 1) / al);
         AllocaInst *lv = emit_static_alloca(ctx, AT);
+        setName(ctx.emission_context, lv, "immutable_union");
         if (al > 1)
             lv->setAlignment(Align(al));
-        emit_memcpy(ctx, lv, tbaa, addr, tbaa, fsz, al);
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        emit_memcpy(ctx, lv, ai, addr, ai, fsz, al, al);
         addr = lv;
     }
     return mark_julia_slot(fsz > 0 ? addr : nullptr, jfty, tindex, tbaa);
 }
 
+static MDNode *best_field_tbaa(jl_codectx_t &ctx, const jl_cgval_t &strct, jl_datatype_t *jt, unsigned idx, size_t byte_offset)
+{
+    auto tbaa = strct.tbaa;
+    if (tbaa == ctx.tbaa().tbaa_datatype)
+        if (byte_offset != offsetof(jl_datatype_t, types))
+            return ctx.tbaa().tbaa_const;
+    if (tbaa == ctx.tbaa().tbaa_array) {
+        if (jl_is_genericmemory_type(jt)) {
+            if (idx == 0)
+                return ctx.tbaa().tbaa_memorylen;
+            if (idx == 1)
+                return ctx.tbaa().tbaa_memoryptr;
+        }
+        else if (jl_is_array_type(jt)) {
+            if (idx == 0)
+                return ctx.tbaa().tbaa_arrayptr;
+            if (idx == 1)
+                return ctx.tbaa().tbaa_arraysize;
+        }
+    }
+    return tbaa;
+}
+
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
 static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &strct,
                                          unsigned idx, jl_datatype_t *jt,
                                          enum jl_memory_order order, Value **nullcheck)
 {
+    auto get_objname = [&]() {
+        return strct.V ? strct.V->getName() : StringRef("");
+    };
     jl_value_t *jfty = jl_field_type(jt, idx);
     bool isatomic = jl_field_isatomic(jt, idx);
     bool needlock = isatomic && !jl_field_isptr(jt, idx) && jl_datatype_size(jfty) > MAX_ATOMIC_SIZE;
@@ -2377,10 +2622,8 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
     size_t nfields = jl_datatype_nfields(jt);
     bool maybe_null = idx >= nfields - (unsigned)jt->name->n_uninitialized;
     size_t byte_offset = jl_field_offset(jt, idx);
-    auto tbaa = strct.tbaa;
-    if (tbaa == ctx.tbaa().tbaa_datatype && byte_offset != offsetof(jl_datatype_t, types))
-        tbaa = ctx.tbaa().tbaa_const;
     if (strct.ispointer()) {
+        auto tbaa = best_field_tbaa(ctx, strct, jt, idx, byte_offset);
         Value *staddr = data_pointer(ctx, strct);
         bool isboxed;
         Type *lt = julia_type_to_llvm(ctx, (jl_value_t*)jt, &isboxed);
@@ -2393,7 +2636,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 addr = ctx.builder.CreateInBoundsGEP(
                         getInt8Ty(ctx.builder.getContext()),
                         emit_bitcast(ctx, staddr, getInt8PtrTy(ctx.builder.getContext())),
-                        ConstantInt::get(getSizeTy(ctx.builder.getContext()), byte_offset));
+                        ConstantInt::get(ctx.types().T_size, byte_offset));
             }
             else {
                 addr = staddr;
@@ -2407,12 +2650,18 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 addr = emit_struct_gep(ctx, lt, staddr, byte_offset);
             else
                 addr = ctx.builder.CreateConstInBoundsGEP2_32(lt, staddr, 0, idx);
+            if (addr != staddr) {
+                setNameWithField(ctx.emission_context, addr, get_objname, jt, idx, Twine("_ptr"));
+            }
         }
         if (jl_field_isptr(jt, idx)) {
+            setNameWithField(ctx.emission_context, addr, get_objname, jt, idx, Twine("_ptr"));
             LoadInst *Load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, maybe_bitcast(ctx, addr, ctx.types().T_pprjlvalue), Align(sizeof(void*)));
+            setNameWithField(ctx.emission_context, Load, get_objname, jt, idx, Twine());
             Load->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order));
             maybe_mark_load_dereferenceable(Load, maybe_null, jl_field_type(jt, idx));
-            Value *fldv = tbaa_decorate(tbaa, Load);
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+            Value *fldv = ai.decorateInst(Load);
             if (maybe_null)
                 null_pointer_check(ctx, fldv, nullcheck);
             return mark_julia_type(ctx, fldv, true, jfty);
@@ -2421,16 +2670,21 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
             size_t fsz = 0, al = 0;
             int union_max = jl_islayout_inline(jfty, &fsz, &al);
             bool isptr = (union_max == 0);
-            assert(!isptr && fsz == jl_field_size(jt, idx) - 1); (void)isptr;
+            assert(!isptr && fsz < jl_field_size(jt, idx)); (void)isptr;
+            size_t fsz1 = jl_field_size(jt, idx) - 1;
             Value *ptindex;
             if (isboxed) {
                 ptindex = ctx.builder.CreateConstInBoundsGEP1_32(
-                    getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, staddr, getInt8PtrTy(ctx.builder.getContext())), byte_offset + fsz);
+                    getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, staddr, getInt8PtrTy(ctx.builder.getContext())), byte_offset + fsz1);
             }
             else {
-                ptindex = emit_struct_gep(ctx, cast<StructType>(lt), staddr, byte_offset + fsz);
+                ptindex = emit_struct_gep(ctx, cast<StructType>(lt), staddr, byte_offset + fsz1);
+            }
+            auto val = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, !jl_field_isconst(jt, idx), union_max, ctx.tbaa().tbaa_unionselbyte);
+            if (val.V && val.V != addr) {
+                setNameWithField(ctx.emission_context, val.V, get_objname, jt, idx, Twine());
             }
-            return emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, !jl_field_isconst(jt, idx), union_max, ctx.tbaa().tbaa_unionselbyte);
+            return val;
         }
         assert(jl_is_concrete_type(jfty));
         if (jl_field_isconst(jt, idx) && !(maybe_null && (jfty == (jl_value_t*)jl_bool_type ||
@@ -2444,6 +2698,9 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
         jl_cgval_t ret = typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, false,
                 needlock ? AtomicOrdering::NotAtomic : get_llvm_atomic_order(order),
                 maybe_null, align, nullcheck);
+        if (ret.V) {
+            setNameWithField(ctx.emission_context, ret.V, get_objname, jt, idx, Twine());
+        }
         if (needlock)
             emit_lockstate_value(ctx, strct, false);
         return ret;
@@ -2461,6 +2718,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
         }
         else if (isa<VectorType>(T)) {
             fldv = ctx.builder.CreateExtractElement(obj, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), idx));
+            setNameWithField(ctx.emission_context, fldv, get_objname, jt, idx, Twine());
         }
         else if (!jl_field_isptr(jt, idx) && jl_is_uniontype(jfty)) {
             int fsz = jl_field_size(jt, idx) - 1;
@@ -2476,7 +2734,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 unsigned i = 0;
                 for (; i < fsz / align; i++) {
                     unsigned fld = st_idx + i;
-                    Value *fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(fld));
+                    Value *fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(fld));
                     Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
                     ctx.builder.CreateAlignedStore(fldv, fldp, Align(align));
                 }
@@ -2485,14 +2743,16 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                     Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
                     staddr = ctx.builder.CreateBitCast(staddr, getInt8PtrTy(ctx.builder.getContext()));
                     for (; i < ptindex - st_idx; i++) {
-                        Value *fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(st_idx + i));
+                        Value *fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(st_idx + i));
                         Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i);
                         ctx.builder.CreateAlignedStore(fldv, fldp, Align(1));
                     }
                 }
+                setNameWithField(ctx.emission_context, lv, get_objname, jt, idx, Twine());
             }
-            Value *tindex0 = ctx.builder.CreateExtractValue(obj, makeArrayRef(ptindex));
+            Value *tindex0 = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(ptindex));
             Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1), tindex0);
+            setNameWithField(ctx.emission_context, tindex, get_objname, jt, idx, Twine(".tindex"));
             return mark_julia_slot(lv, jfty, tindex, ctx.tbaa().tbaa_stack);
         }
         else {
@@ -2503,7 +2763,8 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 st_idx = convert_struct_offset(ctx, T, byte_offset);
             else
                 llvm_unreachable("encountered incompatible type for a struct");
-            fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(st_idx));
+            fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(st_idx));
+            setNameWithField(ctx.emission_context, fldv, get_objname, jt, idx, Twine());
         }
         if (maybe_null) {
             Value *first_ptr = jl_field_isptr(jt, idx) ? fldv : extract_first_ptr(ctx, fldv);
@@ -2534,346 +2795,115 @@ static Value *emit_n_varargs(jl_codectx_t &ctx)
 #endif
 }
 
-static bool arraytype_constdim(jl_value_t *ty, size_t *dim)
+static Value *emit_genericmemoryelsize(jl_codectx_t &ctx, Value *v, jl_value_t *typ, bool add_isunion)
 {
-    if (jl_is_array_type(ty) && jl_is_long(jl_tparam1(ty))) {
-        *dim = jl_unbox_long(jl_tparam1(ty));
-        return true;
+    ++EmittedArrayElsize;
+    jl_datatype_t *sty = (jl_datatype_t*)jl_unwrap_unionall(typ);
+    if (jl_is_datatype(sty) && !jl_has_free_typevars((jl_value_t*)sty) && sty->layout) {
+        if (jl_is_genericmemoryref_type(sty))
+            sty = (jl_datatype_t*)jl_field_type_concrete(sty, 1);
+        size_t sz = sty->layout->size;
+        if (sty->layout->flags.arrayelem_isunion)
+            sz++;
+        return ConstantInt::get(ctx.types().T_size, sz);
+    }
+    else {
+        v = emit_bitcast(ctx, v, ctx.types().T_prjlvalue);
+        Value *t = emit_typeof(ctx, v, false, false, true);
+        Value *elsize = emit_datatype_size(ctx, t, add_isunion);
+        return ctx.builder.CreateZExt(elsize, ctx.types().T_size);
     }
-    return false;
-}
-
-static bool arraytype_constshape(jl_value_t *ty)
-{
-    size_t dim;
-    if (!arraytype_constdim(ty, &dim))
-        return false;
-    return dim != 1;
 }
 
-static bool arraytype_constelsize(jl_datatype_t *ty, size_t *elsz)
+static ssize_t genericmemoryype_constelsize(jl_value_t *typ)
 {
-    assert(jl_is_array_type(ty));
-    jl_value_t *ety = jl_tparam0(ty);
-    if (jl_has_free_typevars(ety))
-        return false;
-    // `jl_islayout_inline` requires `*elsz` and `al` to be initialized.
-    size_t al = 0;
-    *elsz = 0;
-    int union_max = jl_islayout_inline(ety, elsz, &al);
-    bool isboxed = (union_max == 0);
-    if (isboxed) {
-        *elsz = sizeof(void*);
-    }
-    else if (jl_is_primitivetype(ety)) {
-        // Primitive types should use the array element size, but
-        // this can be different from the type's size
-        *elsz = LLT_ALIGN(*elsz, al);
+    jl_datatype_t *sty = (jl_datatype_t*)jl_unwrap_unionall(typ);
+    if (jl_is_datatype(sty) && !jl_has_free_typevars((jl_value_t*)sty) && sty->layout) {
+        if (jl_is_array_type(sty))
+            sty = (jl_datatype_t*)jl_field_type_concrete(sty, 0);
+        if (jl_is_genericmemoryref_type(sty))
+            sty = (jl_datatype_t*)jl_field_type_concrete(sty, 1);
+        return sty->layout->size;
     }
-    return true;
+    return -1;
 }
 
-static intptr_t arraytype_maxsize(jl_value_t *ty)
+static intptr_t genericmemoryype_maxsize(jl_value_t *ty) // the maxsize is strictly less than the return value
 {
-    if (!jl_is_array_type(ty))
-        return INTPTR_MAX;
-    size_t elsz;
-    if (arraytype_constelsize((jl_datatype_t*)ty, &elsz) || elsz == 0)
+    ssize_t elsz = genericmemoryype_constelsize(ty);
+    if (elsz <= 1)
         return INTPTR_MAX;
     return INTPTR_MAX / elsz;
 }
 
-static Value *emit_arraylen(jl_codectx_t &ctx, const jl_cgval_t &tinfo);
-
-static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value *dim)
+static Value *emit_genericmemorylen(jl_codectx_t &ctx, Value *addr, jl_value_t *typ)
 {
-    size_t ndim;
-    MDNode *tbaa = ctx.tbaa().tbaa_arraysize;
-    if (arraytype_constdim(tinfo.typ, &ndim)) {
-        if (ndim == 0)
-            return ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1);
-        if (ndim == 1) {
-            if (auto d = dyn_cast<ConstantInt>(dim)) {
-                if (d->getZExtValue() == 1) {
-                    return emit_arraylen(ctx, tinfo);
-                }
-            }
-        }
-        if (ndim > 1) {
-            if (tinfo.constant && isa<ConstantInt>(dim)) {
-                auto n = cast<ConstantInt>(dim)->getZExtValue() - 1;
-                return ConstantInt::get(getSizeTy(ctx.builder.getContext()), jl_array_dim(tinfo.constant, n));
-            }
-            tbaa = ctx.tbaa().tbaa_const;
-        }
-    }
-    ++EmittedArraysize;
-    Value *t = boxed(ctx, tinfo);
-    int o = offsetof(jl_array_t, nrows) / sizeof(void*) - 1;
-    auto load = emit_nthptr_recast(ctx,
-            t,
-            ctx.builder.CreateAdd(dim, ConstantInt::get(dim->getType(), o)),
-            tbaa, getSizeTy(ctx.builder.getContext()));
-    MDBuilder MDB(ctx.builder.getContext());
-    auto rng = MDB.createRange(Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), arraytype_maxsize(tinfo.typ)));
-    load->setMetadata(LLVMContext::MD_range, rng);
-    return load;
-}
-
-static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int dim)
-{
-    return emit_arraysize(ctx, tinfo, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), dim));
-}
-
-static Value *emit_vectormaxsize(jl_codectx_t &ctx, const jl_cgval_t &ary)
-{
-    return emit_arraysize(ctx, ary, 2); // maxsize aliases ncols in memory layout for vector
-}
-
-static Value *emit_arraylen_prim(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
-{
-    size_t ndim;
-    jl_value_t *ty = tinfo.typ;
-    MDNode *tbaa = ctx.tbaa().tbaa_arraylen;
-    if (arraytype_constdim(ty, &ndim)) {
-        if (ndim == 0)
-            return ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1);
-        if (ndim != 1) {
-            if (tinfo.constant)
-                return ConstantInt::get(getSizeTy(ctx.builder.getContext()), jl_array_len(tinfo.constant));
-            tbaa = ctx.tbaa().tbaa_const;
-        }
-    }
-    ++EmittedArraylen;
-    Value *t = boxed(ctx, tinfo);
-    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
-            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
-            1); //index (not offset) of length field in ctx.types().T_pjlarray
-    LoadInst *len = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), addr, Align(sizeof(size_t)));
-    len->setOrdering(AtomicOrdering::NotAtomic);
+    addr = emit_bitcast(ctx, decay_derived(ctx, addr), ctx.types().T_jlgenericmemory->getPointerTo()),
+    addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, addr, 0);
+    LoadInst *LI = ctx.builder.CreateAlignedLoad(ctx.types().T_jlgenericmemory->getElementType(0), addr, Align(sizeof(size_t)));
+    jl_aliasinfo_t aliasinfo_mem = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memorylen);
+    aliasinfo_mem.decorateInst(LI);
     MDBuilder MDB(ctx.builder.getContext());
-    auto rng = MDB.createRange(Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), arraytype_maxsize(tinfo.typ)));
-    len->setMetadata(LLVMContext::MD_range, rng);
-    return tbaa_decorate(tbaa, len);
-}
-
-static Value *emit_arraylen(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
-{
-    return emit_arraylen_prim(ctx, tinfo);
+    auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, genericmemoryype_maxsize(typ)));
+    LI->setMetadata(LLVMContext::MD_range, rng);
+    return LI;
 }
 
-static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value *t, unsigned AS, bool isboxed)
+static Value *emit_genericmemoryptr(jl_codectx_t &ctx, Value *mem, const jl_datatype_layout_t *layout, unsigned AS)
 {
     ++EmittedArrayptr;
-    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
-                                              emit_bitcast(ctx, t, ctx.types().T_pjlarray), 0);
-    // Normally allocated array of 0 dimension always have a inline pointer.
-    // However, we can't rely on that here since arrays can also be constructed from C pointers.
-    PointerType *PT = cast<PointerType>(addr->getType());
-    PointerType *PPT = cast<PointerType>(ctx.types().T_jlarray->getElementType(0));
-    PointerType *LoadT = PPT;
-
-    if (isboxed) {
-        LoadT = PointerType::get(ctx.types().T_prjlvalue, AS);
-    }
-    else if (AS != PPT->getAddressSpace()) {
-        LoadT = PointerType::getWithSamePointeeType(PPT, AS);
-    }
-    if (LoadT != PPT) {
-        const auto Ty = PointerType::get(LoadT, PT->getAddressSpace());
-        addr = ctx.builder.CreateBitCast(addr, Ty);
-    }
-
-    LoadInst *LI = ctx.builder.CreateAlignedLoad(LoadT, addr, Align(sizeof(char *)));
+    PointerType *PT = cast<PointerType>(mem->getType());
+    assert(PT == ctx.types().T_prjlvalue);
+    Value *addr = emit_bitcast(ctx, mem, ctx.types().T_jlgenericmemory->getPointerTo(PT->getAddressSpace()));
+    addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, addr, 1);
+    setName(ctx.emission_context, addr, ".data_ptr");
+    PointerType *PPT = cast<PointerType>(ctx.types().T_jlgenericmemory->getElementType(1));
+    LoadInst *LI = ctx.builder.CreateAlignedLoad(PPT, addr, Align(sizeof(char*)));
     LI->setOrdering(AtomicOrdering::NotAtomic);
     LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None));
-    tbaa_decorate(arraytype_constshape(tinfo.typ) ? ctx.tbaa().tbaa_const : ctx.tbaa().tbaa_arrayptr, LI);
-    return LI;
-}
-
-static Value *emit_arrayptr(jl_codectx_t &ctx, const jl_cgval_t &tinfo, bool isboxed = false)
-{
-    Value *t = boxed(ctx, tinfo);
-    return emit_arrayptr_internal(ctx, tinfo, decay_derived(ctx, t), AddressSpace::Loaded, isboxed);
-}
-
-static Value *emit_unsafe_arrayptr(jl_codectx_t &ctx, const jl_cgval_t &tinfo, bool isboxed = false)
-{
-    Value *t = boxed(ctx, tinfo);
-    t = emit_pointer_from_objref(ctx, decay_derived(ctx, t));
-    return emit_arrayptr_internal(ctx, tinfo, t, 0, isboxed);
-}
-
-static Value *emit_arrayptr(jl_codectx_t &ctx, const jl_cgval_t &tinfo, jl_value_t *ex, bool isboxed = false)
-{
-    return emit_arrayptr(ctx, tinfo, isboxed);
-}
-
-static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, jl_value_t *ex, int dim)
-{
-    return emit_arraysize(ctx, tinfo, dim);
-}
-
-static Value *emit_arrayflags(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
-{
-    ++EmittedArrayflags;
-    Value *t = boxed(ctx, tinfo);
-    int arrayflag_field = 2;
-    Value *addr = ctx.builder.CreateStructGEP(
-            ctx.types().T_jlarray,
-            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
-            arrayflag_field);
-    return tbaa_decorate(ctx.tbaa().tbaa_arrayflags, ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t))));
-}
-
-static Value *emit_arrayndims(jl_codectx_t &ctx, const jl_cgval_t &ary)
-{
-    ++EmittedArrayNDims;
-    Value *flags = emit_arrayflags(ctx, ary);
-    cast<LoadInst>(flags)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(ctx.builder.getContext(), None));
-    flags = ctx.builder.CreateLShr(flags, 2);
-    flags = ctx.builder.CreateAnd(flags, 0x1FF); // (1<<9) - 1
-    return flags;
-}
-
-static Value *emit_arrayelsize(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
-{
-    ++EmittedArrayElsize;
-    Value *t = boxed(ctx, tinfo);
-    int elsize_field = 3;
-    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
-            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
-            elsize_field);
-    return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t))));
-}
-
-static Value *emit_arrayoffset(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int nd)
-{
-    ++EmittedArrayOffset;
-    if (nd != -1 && nd != 1) // only Vector can have an offset
-        return ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0);
-    Value *t = boxed(ctx, tinfo);
-    int offset_field = 4;
-
-    Value *addr = ctx.builder.CreateStructGEP(
-            ctx.types().T_jlarray,
-            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
-            offset_field);
-    return tbaa_decorate(ctx.tbaa().tbaa_arrayoffset, ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), addr, Align(sizeof(int32_t))));
-}
-
-// Returns the size of the array represented by `tinfo` for the given dimension `dim` if
-// `dim` is a valid dimension, otherwise returns constant one.
-static Value *emit_arraysize_for_unsafe_dim(jl_codectx_t &ctx,
-        const jl_cgval_t &tinfo, jl_value_t *ex, size_t dim, size_t nd)
-{
-    return dim > nd ? ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1) : emit_arraysize(ctx, tinfo, ex, dim);
+    jl_aliasinfo_t aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    aliasinfo.decorateInst(LI);
+    Value *ptr = LI;
+    if (AS) {
+        assert(AS == AddressSpace::Loaded);
+        ptr = ctx.builder.CreateCall(prepare_call(gc_loaded_func), { mem, ptr });
+    }
+    if (!layout->flags.arrayelem_isboxed)
+        ptr = ctx.builder.CreateBitCast(ptr, PointerType::get(getInt8Ty(ctx.builder.getContext()), AS));
+    return ptr;
 }
 
-// `nd == -1` means the dimension is unknown.
-static Value *emit_array_nd_index(
-        jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_value_t *ex, ssize_t nd,
-        const jl_cgval_t *argv, size_t nidxs, jl_value_t *inbounds)
+static Value *emit_genericmemoryowner(jl_codectx_t &ctx, Value *t)
 {
-    ++EmittedArrayNdIndex;
-    Value *a = boxed(ctx, ainfo);
-    Value *i = Constant::getNullValue(getSizeTy(ctx.builder.getContext()));
-    Value *stride = ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1);
-#if CHECK_BOUNDS==1
-    bool bc = bounds_check_enabled(ctx, inbounds);
-    BasicBlock *failBB = NULL, *endBB = NULL;
-    if (bc) {
-        failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
-        endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
-    }
-#endif
-    Value **idxs = (Value**)alloca(sizeof(Value*) * nidxs);
-    for (size_t k = 0; k < nidxs; k++) {
-        idxs[k] = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), argv[k], (jl_value_t*)jl_long_type); // type asserted by caller
-    }
-    Value *ii = NULL;
-    for (size_t k = 0; k < nidxs; k++) {
-        ii = ctx.builder.CreateSub(idxs[k], ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
-        i = ctx.builder.CreateAdd(i, ctx.builder.CreateMul(ii, stride));
-        if (k < nidxs - 1) {
-            assert(nd >= 0);
-            Value *d = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, k + 1, nd);
-#if CHECK_BOUNDS==1
-            if (bc) {
-                BasicBlock *okBB = BasicBlock::Create(ctx.builder.getContext(), "ib");
-                // if !(i < d) goto error
-                ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(ii, d), okBB, failBB);
-                ctx.f->getBasicBlockList().push_back(okBB);
-                ctx.builder.SetInsertPoint(okBB);
-            }
-#endif
-            stride = ctx.builder.CreateMul(stride, d);
-        }
-    }
-#if CHECK_BOUNDS==1
-    if (bc) {
-        // We have already emitted a bounds check for each index except for
-        // the last one which we therefore have to do here.
-        if (nidxs == 1) {
-            // Linear indexing: Check against the entire linear span of the array
-            Value *alen = emit_arraylen(ctx, ainfo);
-            ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(i, alen), endBB, failBB);
-        } else if (nidxs >= (size_t)nd){
-            // No dimensions were omitted; just check the last remaining index
-            assert(nd >= 0);
-            Value *last_index = ii;
-            Value *last_dimension = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nidxs, nd);
-            ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(last_index, last_dimension), endBB, failBB);
-        } else {
-            // There were fewer indices than dimensions; check the last remaining index
-            BasicBlock *checktrailingdimsBB = BasicBlock::Create(ctx.builder.getContext(), "dimsib");
-            assert(nd >= 0);
-            Value *last_index = ii;
-            Value *last_dimension = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nidxs, nd);
-            ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(last_index, last_dimension), checktrailingdimsBB, failBB);
-            ctx.f->getBasicBlockList().push_back(checktrailingdimsBB);
-            ctx.builder.SetInsertPoint(checktrailingdimsBB);
-            // And then also make sure that all dimensions that weren't explicitly
-            // indexed into have size 1
-            for (size_t k = nidxs+1; k < (size_t)nd; k++) {
-                BasicBlock *dimsokBB = BasicBlock::Create(ctx.builder.getContext(), "dimsok");
-                Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, k, nd);
-                ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1)), dimsokBB, failBB);
-                ctx.f->getBasicBlockList().push_back(dimsokBB);
-                ctx.builder.SetInsertPoint(dimsokBB);
-            }
-            Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nd, nd);
-            ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1)), endBB, failBB);
-        }
-
-        ctx.f->getBasicBlockList().push_back(failBB);
-        ctx.builder.SetInsertPoint(failBB);
-        // CreateAlloca is OK here since we are on an error branch
-        Value *tmp = ctx.builder.CreateAlloca(getSizeTy(ctx.builder.getContext()), ConstantInt::get(getSizeTy(ctx.builder.getContext()), nidxs));
-        for (size_t k = 0; k < nidxs; k++) {
-            ctx.builder.CreateAlignedStore(idxs[k], ctx.builder.CreateInBoundsGEP(getSizeTy(ctx.builder.getContext()), tmp, ConstantInt::get(getSizeTy(ctx.builder.getContext()), k)), Align(sizeof(size_t)));
-        }
-        ctx.builder.CreateCall(prepare_call(jlboundserrorv_func),
-            { mark_callee_rooted(ctx, a), tmp, ConstantInt::get(getSizeTy(ctx.builder.getContext()), nidxs) });
-        ctx.builder.CreateUnreachable();
-
-        ctx.f->getBasicBlockList().push_back(endBB);
-        ctx.builder.SetInsertPoint(endBB);
-    }
-#endif
-
-    return i;
+    Value *m = emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_jlgenericmemory->getPointerTo(0));
+    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, m, 1);
+    Type *T_data = ctx.types().T_jlgenericmemory->getElementType(1);
+    LoadInst *LI = ctx.builder.CreateAlignedLoad(T_data, addr, Align(sizeof(char*)));
+    LI->setOrdering(AtomicOrdering::NotAtomic);
+    LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None));
+    jl_aliasinfo_t aliasinfo_mem = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryown);
+    aliasinfo_mem.decorateInst(LI);
+    addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, emit_bitcast(ctx, m, LI->getType()), JL_SMALL_BYTE_ALIGNMENT / sizeof(void*));
+    Value *foreign = ctx.builder.CreateICmpNE(addr, decay_derived(ctx, LI));
+    return emit_guarded_test(ctx, foreign, t, [&] {
+            addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_jlgenericmemory, m, 1);
+            LoadInst *owner = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, emit_bitcast(ctx, addr, ctx.types().T_pprjlvalue), Align(sizeof(void*)));
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+            ai.decorateInst(owner);
+            return ctx.builder.CreateSelect(ctx.builder.CreateIsNull(owner), t, owner);
+        });
 }
 
 // --- boxing ---
 
-static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt);
+static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt, bool fully_initialized);
 
 static void init_bits_value(jl_codectx_t &ctx, Value *newv, Value *v, MDNode *tbaa,
                             unsigned alignment = sizeof(void*)) // min alignment in julia's gc is pointer-aligned
 {
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
     // newv should already be tagged
-    tbaa_decorate(tbaa, ctx.builder.CreateAlignedStore(v, emit_bitcast(ctx, newv,
+    ai.decorateInst(ctx.builder.CreateAlignedStore(v, emit_bitcast(ctx, newv,
         PointerType::get(v->getType(), 0)), Align(alignment)));
 }
 
@@ -2881,7 +2911,8 @@ static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t& v,
 {
     // newv should already be tagged
     if (v.ispointer()) {
-        emit_memcpy(ctx, newv, tbaa, v, jl_datatype_size(v.typ), sizeof(void*));
+        unsigned align = std::max(julia_alignment(v.typ), (unsigned)sizeof(void*));
+        emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), align, julia_alignment(v.typ));
     }
     else {
         init_bits_value(ctx, newv, v.V, tbaa);
@@ -2929,18 +2960,14 @@ static jl_value_t *static_constant_instance(const llvm::DataLayout &DL, Constant
     if (const auto *CC = dyn_cast<ConstantAggregate>(constant))
         nargs = CC->getNumOperands();
     else if (const auto *CAZ = dyn_cast<ConstantAggregateZero>(constant)) {
-#if JL_LLVM_VERSION >= 130000
-        // SVE: Elsewhere we use `getMinKownValue`
+        // SVE: Elsewhere we use `getMinKnownValue`
         nargs = CAZ->getElementCount().getFixedValue();
-#else
-        nargs = CAZ->getNumElements();
-#endif
     }
     else if (const auto *CDS = dyn_cast<ConstantDataSequential>(constant))
         nargs = CDS->getNumElements();
     else
         return NULL;
-    assert(nargs > 0 && jst->instance == NULL);
+    assert(nargs > 0 && !jl_is_datatype_singleton(jst));
     if (nargs != jl_datatype_nfields(jst))
         return NULL;
 
@@ -2967,7 +2994,8 @@ static jl_value_t *static_constant_instance(const llvm::DataLayout &DL, Constant
     return obj;
 }
 
-static Value *call_with_attrs(jl_codectx_t &ctx, JuliaFunction *intr, Value *v)
+template<typename TypeFn_t>
+static Value *call_with_attrs(jl_codectx_t &ctx, JuliaFunction<TypeFn_t> *intr, Value *v)
 {
     Function *F = prepare_call(intr);
     CallInst *Call = ctx.builder.CreateCall(F, v);
@@ -2975,7 +3003,7 @@ static Value *call_with_attrs(jl_codectx_t &ctx, JuliaFunction *intr, Value *v)
     return Call;
 }
 
-static void jl_add_method_root(jl_codectx_t &ctx, jl_value_t *val);
+static jl_value_t *jl_ensure_rooted(jl_codectx_t &ctx, jl_value_t *val);
 
 static Value *as_value(jl_codectx_t &ctx, Type *to, const jl_cgval_t &v)
 {
@@ -2989,7 +3017,8 @@ static Value *load_i8box(jl_codectx_t &ctx, Value *v, jl_datatype_t *ty)
     GlobalVariable *gv = prepare_global_in(jl_Module, jvar);
     Value *idx[] = {ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0), ctx.builder.CreateZExt(v, getInt32Ty(ctx.builder.getContext()))};
     auto slot = ctx.builder.CreateInBoundsGEP(gv->getValueType(), gv, idx);
-    return tbaa_decorate(ctx.tbaa().tbaa_const, maybe_mark_load_dereferenceable(
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    return ai.decorateInst(maybe_mark_load_dereferenceable(
             ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, slot, Align(sizeof(void*))), false,
             (jl_value_t*)ty));
 }
@@ -3008,7 +3037,7 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t
         if (Constant *c = dyn_cast<Constant>(vinfo.V)) {
             jl_value_t *s = static_constant_instance(jl_Module->getDataLayout(), c, jt);
             if (s) {
-                jl_add_method_root(ctx, s);
+                s = jl_ensure_rooted(ctx, s);
                 return track_pjlvalue(ctx, literal_pointer_val(ctx, s));
             }
         }
@@ -3041,33 +3070,36 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t
     else if (jb == jl_char_type)
         box = call_with_attrs(ctx, box_char_func, as_value(ctx, t, vinfo));
     else if (jb == jl_ssavalue_type) {
-        unsigned zero = 0;
         Value *v = as_value(ctx, t, vinfo);
         assert(v->getType() == ctx.emission_context.llvmtypes[jl_ssavalue_type]);
-        v = ctx.builder.CreateExtractValue(v, makeArrayRef(&zero, 1));
+        v = ctx.builder.CreateExtractValue(v, 0);
         box = call_with_attrs(ctx, box_ssavalue_func, v);
     }
     else if (!jb->name->abstract && jl_datatype_nbits(jb) == 0) {
         // singleton
-        assert(jb->instance != NULL);
+        assert(jl_is_datatype_singleton(jb));
         return track_pjlvalue(ctx, literal_pointer_val(ctx, jb->instance));
     }
+    if (box) {
+        setName(ctx.emission_context, box, [&]() {return "box_" + std::string(jl_symbol_name(jb->name->name));});
+    }
     return box;
 }
 
-static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype, jl_value_t *supertype, jl_value_t *ut)
+static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype_tag, jl_value_t *supertype, jl_value_t *ut)
 {
     Value *tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0);
     unsigned counter = 0;
     for_each_uniontype_small(
             [&](unsigned idx, jl_datatype_t *jt) {
                 if (jl_subtype((jl_value_t*)jt, supertype)) {
-                    Value *cmp = ctx.builder.CreateICmpEQ(track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jt)), datatype);
+                    Value *cmp = ctx.builder.CreateICmpEQ(emit_tagfrom(ctx, jt), datatype_tag);
                     tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx), tindex);
                 }
             },
             ut,
             counter);
+    setName(ctx.emission_context, tindex, datatype_tag->getName() + ".tindex");
     return tindex;
 }
 
@@ -3080,10 +3112,11 @@ static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, j
         return ConstantInt::get(getInt8Ty(ctx.builder.getContext()), get_box_tindex((jl_datatype_t*)jl_typeof(val.constant), typ));
     if (val.TIndex)
         return ctx.builder.CreateAnd(val.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
-    Value *typof = emit_typeof_boxed(ctx, val, maybenull);
+    Value *typof = emit_typeof(ctx, val, maybenull, true);
     return compute_box_tindex(ctx, typof, val.typ, typ);
 }
 
+
 static void union_alloca_type(jl_uniontype_t *ut,
         bool &allunbox, size_t &nbytes, size_t &align, size_t &min_align)
 {
@@ -3118,6 +3151,7 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut,
         // try to pick an Integer type size such that SROA will emit reasonable code
         Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * min_align), (nbytes + min_align - 1) / min_align);
         AllocaInst *lv = emit_static_alloca(ctx, AT);
+        setName(ctx.emission_context, lv, "unionalloca");
         if (align > 1)
             lv->setAlignment(Align(align));
         return lv;
@@ -3130,7 +3164,7 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut,
  * returning `Constant::getNullValue(ctx.types().T_pjlvalue)` in one of the skipped cases. If `skip` is not empty,
  * skip[0] (corresponding to unknown boxed) must always be set. In that
  * case, the calling code must separately deal with the case where
- * `vinfo` is already an unknown boxed union (union tag 0x80).
+ * `vinfo` is already an unknown boxed union (union tag UNION_BOX_MARKER).
  */
 // Returns ctx.types().T_prjlvalue
 static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallBitVector &skip)
@@ -3173,7 +3207,8 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
                     jl_cgval_t vinfo_r = jl_cgval_t(vinfo, (jl_value_t*)jt, NULL);
                     box = _boxed_special(ctx, vinfo_r, t);
                     if (!box) {
-                        box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
+                        box = emit_allocobj(ctx, jt, true);
+                        setName(ctx.emission_context, box, "unionbox");
                         init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
                     }
                 }
@@ -3204,6 +3239,44 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
     return box_merge;
 }
 
+static Function *mangleIntrinsic(IntrinsicInst *call) //mangling based on replaceIntrinsicUseWith
+{
+    Intrinsic::ID ID = call->getIntrinsicID();
+    auto nargs = call->arg_size();
+    SmallVector<Type*, 8> argTys(nargs);
+    auto oldfType = call->getFunctionType();
+    for (unsigned i = 0; i < oldfType->getNumParams(); i++) {
+        auto argi = call->getArgOperand(i);
+        argTys[i] = argi->getType();
+    }
+
+    auto newfType = FunctionType::get(
+            oldfType->getReturnType(),
+            ArrayRef<Type*>(argTys).slice(0, oldfType->getNumParams()),
+            oldfType->isVarArg());
+
+    // Accumulate an array of overloaded types for the given intrinsic
+    // and compute the new name mangling schema
+    SmallVector<Type*, 4> overloadTys;
+    {
+        SmallVector<Intrinsic::IITDescriptor, 8> Table;
+        getIntrinsicInfoTableEntries(ID, Table);
+        ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
+        auto res = Intrinsic::matchIntrinsicSignature(newfType, TableRef, overloadTys);
+        assert(res == Intrinsic::MatchIntrinsicTypes_Match);
+        (void)res;
+        bool matchvararg = !Intrinsic::matchIntrinsicVarArg(newfType->isVarArg(), TableRef);
+        assert(matchvararg);
+        (void)matchvararg;
+    }
+    auto newF = Intrinsic::getDeclaration(call->getModule(), ID, overloadTys);
+    assert(newF->getFunctionType() == newfType);
+    newF->setCallingConv(call->getCallingConv());
+    return newF;
+}
+
+
+//Used for allocation hoisting in *boxed
 static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsigned ToAS)
 {
     for (auto *User : Val->users()) {
@@ -3213,21 +3286,14 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig
             recursively_adjust_ptr_type(Inst, FromAS, ToAS);
         }
         else if (isa<IntrinsicInst>(User)) {
-            IntrinsicInst *II = cast<IntrinsicInst>(User);
-            SmallVector<Type*, 3> ArgTys;
-            Intrinsic::getIntrinsicSignature(II->getCalledFunction(), ArgTys);
-            assert(ArgTys.size() <= II->arg_size());
-            for (size_t i = 0; i < ArgTys.size(); ++i)
-                ArgTys[i] = II->getArgOperand(i)->getType();
-            II->setCalledFunction(Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(), ArgTys));
-        }
-#ifndef JL_LLVM_OPAQUE_POINTERS
+            IntrinsicInst *call = cast<IntrinsicInst>(User);
+            call->setCalledFunction(mangleIntrinsic(call));
+        }
         else if (isa<BitCastInst>(User)) {
             BitCastInst *Inst = cast<BitCastInst>(User);
             Inst->mutateType(PointerType::getWithSamePointeeType(cast<PointerType>(Inst->getType()), ToAS));
             recursively_adjust_ptr_type(Inst, FromAS, ToAS);
         }
-#endif
     }
 }
 
@@ -3235,7 +3301,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig
 // dynamically-typed value is required (e.g. argument to unknown function).
 // if it's already a pointer it's left alone.
 // Returns ctx.types().T_prjlvalue
-static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo)
+static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotable)
 {
     jl_value_t *jt = vinfo.typ;
     if (jt == jl_bottom_type || jt == NULL)
@@ -3265,24 +3331,27 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo)
         box = _boxed_special(ctx, vinfo, t);
         if (!box) {
             bool do_promote = vinfo.promotion_point;
-            if (do_promote) {
+            if (do_promote && is_promotable) {
                 auto IP = ctx.builder.saveIP();
                 ctx.builder.SetInsertPoint(vinfo.promotion_point);
-                box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
+                box = emit_allocobj(ctx, (jl_datatype_t*)jt, true);
                 Value *decayed = decay_derived(ctx, box);
                 AllocaInst *originalAlloca = cast<AllocaInst>(vinfo.V);
-#ifndef JL_LLVM_OPAQUE_POINTERS
-                decayed = maybe_bitcast(ctx, decayed, PointerType::get(originalAlloca->getType()->getPointerElementType(), AddressSpace::Derived));
-#endif
+                box->takeName(originalAlloca);
+                decayed = maybe_bitcast(ctx, decayed, PointerType::getWithSamePointeeType(originalAlloca->getType(), AddressSpace::Derived));
                 // Warning: Very illegal IR here temporarily
                 originalAlloca->mutateType(decayed->getType());
                 recursively_adjust_ptr_type(originalAlloca, 0, AddressSpace::Derived);
                 originalAlloca->replaceAllUsesWith(decayed);
                 // end illegal IR
-                cast<Instruction>(vinfo.V)->eraseFromParent();
+                originalAlloca->eraseFromParent();
                 ctx.builder.restoreIP(IP);
             } else {
-                box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
+                auto arg_typename = [&] JL_NOTSAFEPOINT {
+                    return "box::" + std::string(jl_symbol_name(((jl_datatype_t*)(jt))->name->name));
+                };
+                box = emit_allocobj(ctx, (jl_datatype_t*)jt, true);
+                setName(ctx.emission_context, box, arg_typename);
                 init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
             }
         }
@@ -3294,7 +3363,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo)
 static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, const jl_cgval_t &src, Value *skip, bool isVolatile=false)
 {
     if (AllocaInst *ai = dyn_cast<AllocaInst>(dest))
-        // TODO: make this a lifetime_end & dereferencable annotation?
+        // TODO: make this a lifetime_end & dereferenceable annotation?
         ctx.builder.CreateAlignedStore(UndefValue::get(ai->getAllocatedType()), ai, ai->getAlign());
     if (jl_is_concrete_type(src.typ) || src.constant) {
         jl_value_t *typ = src.constant ? jl_typeof(src.constant) : src.typ;
@@ -3311,7 +3380,8 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
                 //   select copy dest -> dest to simulate an undef value / conditional copy
                 // if (skip) src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr);
                 auto f = [&] {
-                    (void)emit_memcpy(ctx, dest, tbaa_dst, src_ptr, src.tbaa, nb, alignment, isVolatile);
+                    (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
+                                      jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, alignment, isVolatile);
                     return nullptr;
                 };
                 if (skip)
@@ -3347,8 +3417,8 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
                             ctx.builder.CreateUnreachable();
                             return;
                         } else {
-                            emit_memcpy(ctx, dest, tbaa_dst, src_ptr,
-                                        src.tbaa, nb, alignment, isVolatile);
+                            emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
+                                        jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, alignment, isVolatile);
                         }
                     }
                     ctx.builder.CreateBr(postBB);
@@ -3371,9 +3441,10 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
     else {
         assert(src.isboxed && "expected boxed value for sizeof/alignment computation");
         auto f = [&] {
-            Value *datatype = emit_typeof_boxed(ctx, src);
+            Value *datatype = emit_typeof(ctx, src, false, false);
             Value *copy_bytes = emit_datatype_size(ctx, datatype);
-            emit_memcpy(ctx, dest, tbaa_dst, src, copy_bytes, /*TODO: min-align*/1, isVolatile);
+            (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src),
+                              jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, 1, 1, isVolatile);
             return nullptr;
         };
         if (skip)
@@ -3384,39 +3455,53 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
 }
 
 
-static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std::string &msg)
+static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const Twine &msg)
 {
     ++EmittedCPointerChecks;
-    Value *t = emit_typeof_boxed(ctx, x);
-    emit_typecheck(ctx, mark_julia_type(ctx, t, true, jl_any_type), (jl_value_t*)jl_datatype_type, msg);
+    Value *t = emit_typeof(ctx, x, false, false);
 
     Value *istype =
-        ctx.builder.CreateICmpEQ(mark_callee_rooted(ctx, emit_datatype_name(ctx, t)),
-                                 mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_pointer_typename)));
-    BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(),"fail",ctx.f);
-    BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(),"pass");
+        ctx.builder.CreateICmpEQ(emit_datatype_name(ctx, t),
+                                 literal_pointer_val(ctx, (jl_value_t*)jl_pointer_typename));
+    setName(ctx.emission_context, istype, "istype");
+    BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
+    BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass");
     ctx.builder.CreateCondBr(istype, passBB, failBB);
     ctx.builder.SetInsertPoint(failBB);
 
-    emit_type_error(ctx, x, literal_pointer_val(ctx, (jl_value_t*)jl_pointer_type), msg);
+    just_emit_type_error(ctx, x, literal_pointer_val(ctx, (jl_value_t*)jl_pointer_type), msg);
     ctx.builder.CreateUnreachable();
 
-    ctx.f->getBasicBlockList().push_back(passBB);
+    passBB->insertInto(ctx.f);
     ctx.builder.SetInsertPoint(passBB);
 }
 
 // allocation for known size object
 // returns a prjlvalue
-static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
+static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt,
+                            bool fully_initialized, unsigned align)
 {
     ++EmittedAllocObjs;
     Value *current_task = get_current_task(ctx);
     Function *F = prepare_call(jl_alloc_obj_func);
-    auto call = ctx.builder.CreateCall(F, {current_task, ConstantInt::get(getSizeTy(ctx.builder.getContext()), static_size), maybe_decay_untracked(ctx, jt)});
+    auto call = ctx.builder.CreateCall(F, {current_task, ConstantInt::get(ctx.types().T_size, static_size), maybe_decay_untracked(ctx, jt)});
     call->setAttributes(F->getAttributes());
+    if (static_size > 0)
+        call->addRetAttr(Attribute::getWithDereferenceableBytes(call->getContext(), static_size));
+    call->addRetAttr(Attribute::getWithAlignment(call->getContext(), Align(align)));
+#if JL_LLVM_VERSION >= 150000
+    if (fully_initialized)
+        call->addFnAttr(Attribute::get(call->getContext(), Attribute::AllocKind, uint64_t(AllocFnKind::Alloc | AllocFnKind::Uninitialized)));
+#endif
     return call;
 }
 
+static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt, bool fully_initialized)
+{
+    return emit_allocobj(ctx, jl_datatype_size(jt), ctx.builder.CreateIntToPtr(emit_tagfrom(ctx, jt), ctx.types().T_pjlvalue),
+                         fully_initialized, julia_alignment((jl_value_t*)jt));
+}
+
 // allocation for unknown object from an untracked pointer
 static Value *emit_new_bits(jl_codectx_t &ctx, Value *jt, Value *pval)
 {
@@ -3430,7 +3515,7 @@ static Value *emit_new_bits(jl_codectx_t &ctx, Value *jt, Value *pval)
 // if ptr is NULL this emits a write barrier _back_
 static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, Value *ptr)
 {
-    emit_write_barrier(ctx, parent, makeArrayRef(ptr));
+    emit_write_barrier(ctx, parent, ArrayRef<Value*>(ptr));
 }
 
 static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, ArrayRef<Value*> ptrs)
@@ -3447,15 +3532,7 @@ static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, ArrayRef<Value*
     ctx.builder.CreateCall(prepare_call(jl_write_barrier_func), decay_ptrs);
 }
 
-static void emit_write_barrier_binding(jl_codectx_t &ctx, Value *parent, Value *ptr)
-{
-    SmallVector<Value*, 8> decay_ptrs;
-    decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, parent, ctx.types().T_prjlvalue)));
-    decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, ptr, ctx.types().T_prjlvalue)));
-    ctx.builder.CreateCall(prepare_call(jl_write_barrier_binding_func), decay_ptrs);
-}
-
-static void find_perm_offsets(jl_datatype_t *typ, SmallVector<unsigned,4> &res, unsigned offset)
+static void find_perm_offsets(jl_datatype_t *typ, SmallVectorImpl<unsigned> &res, unsigned offset)
 {
     // This is a inlined field at `offset`.
     if (!typ->layout || typ->layout->npointers == 0)
@@ -3493,31 +3570,38 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         jl_cgval_t rhs, jl_cgval_t cmp,
         bool wb, AtomicOrdering Order, AtomicOrdering FailOrder,
         bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
-        const jl_cgval_t *modifyop, const std::string &fname)
+        const jl_cgval_t *modifyop, const Twine &fname)
 {
+    auto get_objname = [&]() {
+        return strct.V ? strct.V->getName() : StringRef("");
+    };
     ++EmittedSetfield;
     assert(strct.ispointer());
     size_t byte_offset = jl_field_offset(sty, idx0);
+    auto tbaa = best_field_tbaa(ctx, strct, sty, idx0, byte_offset);
     Value *addr = data_pointer(ctx, strct);
     if (byte_offset > 0) {
         addr = ctx.builder.CreateInBoundsGEP(
                 getInt8Ty(ctx.builder.getContext()),
                 emit_bitcast(ctx, addr, getInt8PtrTy(ctx.builder.getContext())),
-                ConstantInt::get(getSizeTy(ctx.builder.getContext()), byte_offset)); // TODO: use emit_struct_gep
+                ConstantInt::get(ctx.types().T_size, byte_offset)); // TODO: use emit_struct_gep
+        setNameWithField(ctx.emission_context, addr, get_objname, sty, idx0, Twine("_ptr"));
     }
     jl_value_t *jfty = jl_field_type(sty, idx0);
     if (!jl_field_isptr(sty, idx0) && jl_is_uniontype(jfty)) {
         size_t fsz = 0, al = 0;
         int union_max = jl_islayout_inline(jfty, &fsz, &al);
         bool isptr = (union_max == 0);
-        assert(!isptr && fsz == jl_field_size(sty, idx0) - 1); (void)isptr;
+        assert(!isptr && fsz < jl_field_size(sty, idx0)); (void)isptr;
+        size_t fsz1 = jl_field_size(sty, idx0) - 1;
         // compute tindex from rhs
         jl_cgval_t rhs_union = convert_julia_type(ctx, rhs, jfty);
         if (rhs_union.typ == jl_bottom_type)
             return jl_cgval_t();
         Value *ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()),
                 emit_bitcast(ctx, addr, getInt8PtrTy(ctx.builder.getContext())),
-                ConstantInt::get(getSizeTy(ctx.builder.getContext()), fsz));
+                ConstantInt::get(ctx.types().T_size, fsz1));
+        setNameWithField(ctx.emission_context, ptindex, get_objname, sty, idx0, Twine(".tindex_ptr"));
         if (needlock)
             emit_lockstate_value(ctx, strct, true);
         BasicBlock *ModifyBB = NULL;
@@ -3528,7 +3612,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         }
         jl_cgval_t oldval = rhs;
         if (!issetfield)
-            oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true, union_max, ctx.tbaa().tbaa_unionselbyte);
+            oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, true, union_max, ctx.tbaa().tbaa_unionselbyte);
         Value *Success = NULL;
         BasicBlock *DoneBB = NULL;
         if (isreplacefield || ismodifyfield) {
@@ -3551,7 +3635,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
                 if (needlock)
                     emit_lockstate_value(ctx, strct, true);
                 cmp = oldval;
-                oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true, union_max, ctx.tbaa().tbaa_unionselbyte);
+                oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, true, union_max, ctx.tbaa().tbaa_unionselbyte);
             }
             BasicBlock *XchgBB = BasicBlock::Create(ctx.builder.getContext(), "xchg", ctx.f);
             DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg", ctx.f);
@@ -3561,10 +3645,11 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         }
         Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty);
         tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
-        tbaa_decorate(ctx.tbaa().tbaa_unionselbyte, ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
+        ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
         // copy data
         if (!rhs.isghost) {
-            emit_unionmove(ctx, addr, strct.tbaa, rhs, nullptr);
+            emit_unionmove(ctx, addr, tbaa, rhs, nullptr);
         }
         if (isreplacefield || ismodifyfield) {
             ctx.builder.CreateBr(DoneBB);
@@ -3577,11 +3662,17 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
             jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
             jl_datatype_t *rettyp = jl_apply_cmpswap_type(jfty);
             oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+            if (oldval.V) {
+                setNameWithField(ctx.emission_context, oldval.V, get_objname, sty, idx0, Twine());
+            }
         }
         else if (ismodifyfield) {
             jl_cgval_t argv[2] = {oldval, rhs};
             jl_datatype_t *rettyp = jl_apply_modify_type(jfty);
             oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+            if (oldval.V) {
+                setNameWithField(ctx.emission_context, oldval.V, get_objname, sty, idx0, Twine());
+            }
         }
         return oldval;
     }
@@ -3590,7 +3681,7 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         bool isboxed = jl_field_isptr(sty, idx0);
         size_t nfields = jl_datatype_nfields(sty);
         bool maybe_null = idx0 >= nfields - (unsigned)sty->name->n_uninitialized;
-        return typed_store(ctx, addr, NULL, rhs, cmp, jfty, strct.tbaa, nullptr,
+        return typed_store(ctx, addr, NULL, rhs, cmp, jfty, tbaa, nullptr,
             wb ? boxed(ctx, strct) : nullptr,
             isboxed, Order, FailOrder, align,
             needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, maybe_null, modifyop, fname);
@@ -3603,6 +3694,9 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
     assert(jl_is_datatype(ty));
     assert(jl_is_concrete_type(ty));
     jl_datatype_t *sty = (jl_datatype_t*)ty;
+    auto arg_typename = [&] JL_NOTSAFEPOINT {
+        return "new::" + std::string(jl_symbol_name((sty)->name->name));
+    };
     size_t nf = jl_datatype_nfields(sty);
     if (nf > 0 || sty->name->mutabl) {
         if (deserves_stack(ty)) {
@@ -3627,15 +3721,22 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 strct = NULL;
             }
             else if (init_as_value) {
-                if (tracked.count)
+                if (tracked.count) {
                     strct = Constant::getNullValue(lt);
-                else
+                }
+                else {
                     strct = UndefValue::get(lt);
+                    if (nargs < nf)
+                        strct = ctx.builder.CreateFreeze(strct);
+                }
             }
             else {
                 strct = emit_static_alloca(ctx, lt);
+                setName(ctx.emission_context, strct, arg_typename);
+                if (nargs < nf)
+                    promotion_point = ctx.builder.CreateStore(ctx.builder.CreateFreeze(UndefValue::get(lt)), strct);
                 if (tracked.count)
-                    undef_derived_strct(ctx.builder, strct, sty, ctx.tbaa().tbaa_stack);
+                    undef_derived_strct(ctx, strct, sty, ctx.tbaa().tbaa_stack);
             }
 
             for (unsigned i = 0; i < na; i++) {
@@ -3648,7 +3749,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 if (fval_info.typ == jl_bottom_type)
                     return jl_cgval_t();
                 // TODO: Use (post-)domination instead.
-                bool field_promotable = !init_as_value && fval_info.promotion_ssa != -1 &&
+                bool field_promotable = !jl_is_uniontype(jtype) && !init_as_value && fval_info.promotion_ssa != -1 &&
                     fval_info.promotion_point && fval_info.promotion_point->getParent() == ctx.builder.GetInsertBlock();
                 if (field_promotable) {
                     savedIP = ctx.builder.saveIP();
@@ -3666,11 +3767,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     // avoid unboxing the argument explicitly
                     // and use memcpy instead
                     Instruction *inst;
-#ifndef JL_LLVM_OPAQUE_POINTERS
                     dest = inst = cast<Instruction>(ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx));
-#else
-                    dest = inst = cast<Instruction>(ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), strct, offs));
-#endif
                     // Our promotion point needs to come before
                     //  A) All of our arguments' promotion points
                     //  B) Any instructions we insert at any of our arguments' promotion points
@@ -3681,20 +3778,22 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                             promotion_point = inst;
                             promotion_ssa = fval_info.promotion_ssa;
                         }
-                    } else if (!promotion_point) {
+                    }
+                    else if (!promotion_point) {
                         promotion_point = inst;
                     }
                 }
                 Value *fval = NULL;
                 if (jl_field_isptr(sty, i)) {
-                    fval = boxed(ctx, fval_info);
-                    if (!init_as_value)
-                        cast<StoreInst>(tbaa_decorate(ctx.tbaa().tbaa_stack,
-                                    ctx.builder.CreateAlignedStore(fval, dest, Align(jl_field_align(sty, i)))))
-                                ->setOrdering(AtomicOrdering::Unordered);
+                    fval = boxed(ctx, fval_info, field_promotable);
+                    if (!init_as_value) {
+                        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                        StoreInst *SI = cast<StoreInst>(ai.decorateInst(
+                                ctx.builder.CreateAlignedStore(fval, dest, Align(jl_field_align(sty, i)))));
+                        SI->setOrdering(AtomicOrdering::Unordered);
+                    }
                 }
                 else if (jl_is_uniontype(jtype)) {
-                    assert(!field_promotable);
                     // compute tindex from rhs
                     jl_cgval_t rhs_union = convert_julia_type(ctx, fval_info, jtype);
                     if (rhs_union.typ == jl_bottom_type)
@@ -3703,25 +3802,28 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
                     size_t fsz = 0, al = 0;
                     bool isptr = !jl_islayout_inline(jtype, &fsz, &al);
-                    assert(!isptr && fsz == jl_field_size(sty, i) - 1); (void)isptr;
+                    assert(!isptr && fsz < jl_field_size(sty, i)); (void)isptr;
+                    size_t fsz1 = jl_field_size(sty, i) - 1;
                     if (init_as_value) {
                         // If you wanted to implement init_as_value,
                         // would need to emit the union-move into temporary memory,
                         // then load it and combine with the tindex.
                         // But more efficient to just store it directly.
-                        unsigned ptindex = convert_struct_offset(ctx, lt, offs + fsz);
-                        if (fsz > 0 && !fval_info.isghost) {
+                        unsigned ptindex = convert_struct_offset(ctx, lt, offs + fsz1);
+                        if (fsz1 > 0 && !fval_info.isghost) {
                             Type *ET = IntegerType::get(ctx.builder.getContext(), 8 * al);
                             assert(lt->getStructElementType(llvm_idx) == ET);
                             AllocaInst *lv = emit_static_alloca(ctx, ET);
-                            lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + al - 1) / al));
+                            setName(ctx.emission_context, lv, "unioninit");
+                            lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz1 + al - 1) / al));
                             emit_unionmove(ctx, lv, ctx.tbaa().tbaa_stack, fval_info, nullptr);
                             // emit all of the align-sized words
                             unsigned i = 0;
-                            for (; i < fsz / al; i++) {
+                            for (; i < fsz1 / al; i++) {
                                 Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
-                                Value *fldv = tbaa_decorate(ctx.tbaa().tbaa_stack, ctx.builder.CreateAlignedLoad(ET, fldp, Align(al)));
-                                strct = ctx.builder.CreateInsertValue(strct, fldv, makeArrayRef(llvm_idx + i));
+                                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                                Value *fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(ET, fldp, Align(al)));
+                                strct = ctx.builder.CreateInsertValue(strct, fldv, ArrayRef<unsigned>(llvm_idx + i));
                             }
                             // emit remaining bytes up to tindex
                             if (i < ptindex - llvm_idx) {
@@ -3729,19 +3831,21 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                                 staddr = ctx.builder.CreateBitCast(staddr, getInt8PtrTy(ctx.builder.getContext()));
                                 for (; i < ptindex - llvm_idx; i++) {
                                     Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i);
-                                    Value *fldv = tbaa_decorate(ctx.tbaa().tbaa_stack, ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), fldp, Align(1)));
-                                    strct = ctx.builder.CreateInsertValue(strct, fldv, makeArrayRef(llvm_idx + i));
+                                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                                    Value *fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), fldp, Align(1)));
+                                    strct = ctx.builder.CreateInsertValue(strct, fldv, ArrayRef<unsigned>(llvm_idx + i));
                                 }
                             }
                         }
                         llvm_idx = ptindex;
                         fval = tindex;
                         if (jl_is_vecelement_type(ty))
-                            fval = ctx.builder.CreateInsertValue(strct, fval, makeArrayRef(llvm_idx));
+                            fval = ctx.builder.CreateInsertValue(strct, fval, ArrayRef<unsigned>(llvm_idx));
                     }
                     else {
-                        Value *ptindex = emit_struct_gep(ctx, lt, strct, offs + fsz);
-                        tbaa_decorate(ctx.tbaa().tbaa_unionselbyte, ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
+                        Value *ptindex = emit_struct_gep(ctx, lt, strct, offs + fsz1);
+                        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
+                        ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
                         if (!rhs_union.isghost)
                             emit_unionmove(ctx, dest, ctx.tbaa().tbaa_stack, fval_info, nullptr);
                     }
@@ -3763,7 +3867,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     else if (lt->isVectorTy())
                         strct = ctx.builder.CreateInsertElement(strct, fval, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), llvm_idx));
                     else if (lt->isAggregateType())
-                        strct = ctx.builder.CreateInsertValue(strct, fval, makeArrayRef(llvm_idx));
+                        strct = ctx.builder.CreateInsertValue(strct, fval, ArrayRef<unsigned>(llvm_idx));
                     else
                         assert(false);
                 }
@@ -3777,12 +3881,14 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     int fsz = jl_field_size(sty, i) - 1;
                     unsigned llvm_idx = convert_struct_offset(ctx, cast<StructType>(lt), offs + fsz);
                     if (init_as_value)
-                        strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), makeArrayRef(llvm_idx));
-                    else
-                        tbaa_decorate(ctx.tbaa().tbaa_unionselbyte, ctx.builder.CreateAlignedStore(
+                        strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ArrayRef<unsigned>(llvm_idx));
+                    else {
+                        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
+                        ai.decorateInst(ctx.builder.CreateAlignedStore(
                                 ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
                                 ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx),
                                 Align(1)));
+                    }
                 }
             }
             if (type_is_ghost(lt))
@@ -3798,17 +3904,18 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 return ret;
             }
         }
-        Value *strct = emit_allocobj(ctx, jl_datatype_size(sty),
-                                     literal_pointer_val(ctx, (jl_value_t*)ty));
+        Value *strct = emit_allocobj(ctx, sty, nargs >= nf);
+        setName(ctx.emission_context, strct, arg_typename);
         jl_cgval_t strctinfo = mark_julia_type(ctx, strct, true, ty);
         strct = decay_derived(ctx, strct);
-        undef_derived_strct(ctx.builder, strct, sty, strctinfo.tbaa);
+        undef_derived_strct(ctx, strct, sty, strctinfo.tbaa);
         for (size_t i = nargs; i < nf; i++) {
             if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
-                tbaa_decorate(ctx.tbaa().tbaa_unionselbyte, ctx.builder.CreateAlignedStore(
+                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
+                ai.decorateInst(ctx.builder.CreateAlignedStore(
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
                         ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, strct, getInt8PtrTy(ctx.builder.getContext())),
-                                ConstantInt::get(getSizeTy(ctx.builder.getContext()), jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)),
+                                ConstantInt::get(ctx.types().T_size, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)),
                         Align(1)));
             }
         }
@@ -3830,20 +3937,20 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
         return strctinfo;
     }
     else {
-        // 0 fields, ghost or bitstype
+        // 0 fields, ghost or primitive type
         if (jl_datatype_nbits(sty) == 0)
             return ghostValue(ctx, sty);
+        // n.b. this is not valid IR form to construct a primitive type (use bitcast for example)
         bool isboxed;
         Type *lt = julia_type_to_llvm(ctx, ty, &isboxed);
         assert(!isboxed);
-        return mark_julia_type(ctx, UndefValue::get(lt), false, ty);
+        return mark_julia_type(ctx, ctx.builder.CreateFreeze(UndefValue::get(lt)), false, ty);
     }
 }
 
 static void emit_signal_fence(jl_codectx_t &ctx)
 {
-    ++EmittedSignalFences;
-    ctx.builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SyncScope::SingleThread);
+    emit_signal_fence(ctx.builder);
 }
 
 static Value *emit_defer_signal(jl_codectx_t &ctx)
@@ -3856,83 +3963,233 @@ static Value *emit_defer_signal(jl_codectx_t &ctx)
     return ctx.builder.CreateInBoundsGEP(ctx.types().T_sigatomic, ptls, ArrayRef<Value*>(offset), "jl_defer_signal");
 }
 
-static void emit_gc_safepoint(jl_codectx_t &ctx)
-{
-    ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
-    emit_signal_fence(ctx);
-    ctx.builder.CreateLoad(getSizeTy(ctx.builder.getContext()), get_current_signal_page(ctx), true);
-    emit_signal_fence(ctx);
-}
-
-static Value *emit_gc_state_set(jl_codectx_t &ctx, Value *state, Value *old_state)
-{
-    Type *T_int8 = state->getType();
-    Value *ptls = emit_bitcast(ctx, get_current_ptls(ctx), getInt8PtrTy(ctx.builder.getContext()));
-    Constant *offset = ConstantInt::getSigned(getInt32Ty(ctx.builder.getContext()), offsetof(jl_tls_states_t, gc_state));
-    Value *gc_state = ctx.builder.CreateInBoundsGEP(T_int8, ptls, ArrayRef<Value*>(offset), "gc_state");
-    if (old_state == nullptr) {
-        old_state = ctx.builder.CreateLoad(T_int8, gc_state);
-        cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic);
-    }
-    ctx.builder.CreateAlignedStore(state, gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
-    if (auto *C = dyn_cast<ConstantInt>(old_state))
-        if (C->isZero())
-            return old_state;
-    if (auto *C = dyn_cast<ConstantInt>(state))
-        if (!C->isZero())
-            return old_state;
-    BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "safepoint", ctx.f);
-    BasicBlock *exitBB = BasicBlock::Create(ctx.builder.getContext(), "after_safepoint", ctx.f);
-    Constant *zero8 = ConstantInt::get(T_int8, 0);
-    ctx.builder.CreateCondBr(ctx.builder.CreateAnd(ctx.builder.CreateICmpNE(old_state, zero8), // if (old_state && !state)
-                                                   ctx.builder.CreateICmpEQ(state, zero8)),
-                             passBB, exitBB);
-    ctx.builder.SetInsertPoint(passBB);
-    emit_gc_safepoint(ctx);
-    ctx.builder.CreateBr(exitBB);
-    ctx.builder.SetInsertPoint(exitBB);
-    return old_state;
+#ifndef JL_NDEBUG
+static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b)
+{
+    return
+           (a->track_allocations == b->track_allocations) &&
+           (a->code_coverage == b->code_coverage) &&
+           (a->prefer_specsig == b->prefer_specsig) &&
+           (a->gnu_pubnames == b->gnu_pubnames) &&
+           (a->debug_info_kind == b->debug_info_kind) &&
+           (a->safepoint_on_entry == b->safepoint_on_entry) &&
+           (a->gcstack_arg == b->gcstack_arg) &&
+           (a->use_jlplt == b->use_jlplt) &&
+           (a->lookup == b->lookup);
 }
+#endif
 
-static Value *emit_gc_unsafe_enter(jl_codectx_t &ctx)
+static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, Value *mem, Value *data, const jl_datatype_layout_t *layout, jl_value_t *typ)
 {
-    Value *state = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0);
-    return emit_gc_state_set(ctx, state, nullptr);
+    //jl_cgval_t argv[] = {
+    //    mark_julia_type(ctx, mem, true, jl_any_type),
+    //    mark_julia_type(ctx, data, false, jl_voidpointer_type)
+    //};
+    //return emit_new_struct(ctx, typ, 3, argv);
+    Value *ref = Constant::getNullValue(get_memoryref_type(ctx.builder.getContext(), ctx.types().T_size, layout, 0));
+    ref = ctx.builder.CreateInsertValue(ref, data, 0);
+    ref = ctx.builder.CreateInsertValue(ref, mem, 1);
+    return mark_julia_type(ctx, ref, false, typ);
 }
 
-static Value *emit_gc_unsafe_leave(jl_codectx_t &ctx, Value *state)
+static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &mem, const jl_datatype_layout_t *layout, jl_value_t *typ)
 {
-    Value *old_state = ConstantInt::get(state->getType(), 0);
-    return emit_gc_state_set(ctx, state, old_state);
+    bool isboxed = layout->flags.arrayelem_isboxed;
+    bool isunion = layout->flags.arrayelem_isunion;
+    bool isghost = layout->size == 0;
+    Value *data = (!isboxed && isunion) || isghost ? ConstantInt::get(ctx.types().T_size, 0) : emit_genericmemoryptr(ctx, boxed(ctx, mem), layout, 0);
+    return _emit_memoryref(ctx, boxed(ctx, mem), data, layout, typ);
 }
 
-//static Value *emit_gc_safe_enter(jl_codectx_t &ctx)
-//{
-//    Value *state = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), JL_GC_STATE_SAFE);
-//    return emit_gc_state_set(ctx, state, nullptr);
-//}
-//
-//static Value *emit_gc_safe_leave(jl_codectx_t &ctx, Value *state)
-//{
-//    Value *old_state = ConstantInt::get(state->getType(), JL_GC_STATE_SAFE);
-//    return emit_gc_state_set(ctx, state, old_state);
-//}
-
+static Value *emit_memoryref_FCA(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
+{
+    if (ref.ispointer()) {
+        LLVMContext &C = ctx.builder.getContext();
+        Type *type = get_memoryref_type(C, ctx.types().T_size, layout, 0);
+        LoadInst *load = ctx.builder.CreateLoad(type, emit_bitcast(ctx, data_pointer(ctx, ref), PointerType::get(type, 0)));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ref.tbaa);
+        ai.decorateInst(load);
+        return load;
+    }
+    else {
+        return ref.V;
+    }
+}
 
+static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cgval_t idx, jl_value_t *inbounds, const jl_datatype_layout_t *layout)
+{
+    ++EmittedArrayNdIndex;
+    emit_typecheck(ctx, idx, (jl_value_t*)jl_long_type, "memoryref");
+    idx = update_julia_type(ctx, idx, (jl_value_t*)jl_long_type);
+    if (idx.typ == jl_bottom_type)
+        return jl_cgval_t();
+    Value *V = emit_memoryref_FCA(ctx, ref, layout);
+    Value *data = CreateSimplifiedExtractValue(ctx, V, 0);
+    Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+    Value *i = emit_unbox(ctx, ctx.types().T_size, idx, (jl_value_t*)jl_long_type);
+    Value *offset = ctx.builder.CreateSub(i, ConstantInt::get(ctx.types().T_size, 1));
+    Value *elsz = emit_genericmemoryelsize(ctx, mem, ref.typ, false);
+    bool bc = bounds_check_enabled(ctx, inbounds);
+#if 1
+    Value *ovflw = nullptr;
+#endif
+    Value *newdata;
+    bool isboxed = layout->flags.arrayelem_isboxed;
+    bool isunion = layout->flags.arrayelem_isunion;
+    bool isghost = layout->size == 0;
+    if ((!isboxed && isunion) || isghost) {
+        newdata = ctx.builder.CreateAdd(data, offset);
+        if (bc) {
+            BasicBlock *failBB, *endBB;
+            failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+            endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            Value *inbound = ctx.builder.CreateICmpULT(newdata, mlen);
+            ctx.builder.CreateCondBr(inbound, endBB, failBB);
+            ctx.f->getBasicBlockList().push_back(failBB);
+            ctx.builder.SetInsertPoint(failBB);
+            ctx.builder.CreateCall(prepare_call(jlboundserror_func),
+                { mark_callee_rooted(ctx, boxed(ctx, ref)), i });
+            ctx.builder.CreateUnreachable();
+            ctx.f->getBasicBlockList().push_back(endBB);
+            ctx.builder.SetInsertPoint(endBB);
+        }
+    }
+    else {
+        Value *boffset;
+#if 0
+        if (bc) {
+            auto *MulF = Intrinsic::getDeclaration(jl_Module, Intrinsic::smul_with_overflow, offset->getType());
+            CallInst *Mul = ctx.builder.CreateCall(MulF, {offset, elsz});
+            boffset = ctx.builder.CreateExtractValue(Mul, 0);
+            ovflw = ctx.builder.CreateExtractValue(Mul, 1);
+        }
+        else
+#else
+        if (bc) {
+            // n.b. we could boundscheck that -len<=offset<=len instead of using smul.ovflw,
+            // since we know that len*elsz does not overflow,
+            // and we can further rearrange that as ovflw = !( offset+len < len+len ) as unsigned math
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            ovflw = ctx.builder.CreateICmpUGE(ctx.builder.CreateAdd(offset, mlen), ctx.builder.CreateNUWAdd(mlen, mlen));
+        }
+#endif
+            boffset = ctx.builder.CreateMul(offset, elsz);
+#if 0 // TODO: if opaque-pointers?
+        newdata = emit_bitcast(ctx, data, getInt8PtrTy(ctx.builder.getContext()));
+        newdata = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), newdata, boffset);
+#else
+        Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jl_tparam1(ref.typ));
+        newdata = emit_bitcast(ctx, data, elty->getPointerTo(0));
+        newdata = ctx.builder.CreateInBoundsGEP(elty, newdata, offset);
+        (void)boffset; // LLVM is very bad at handling GEP with types different from the load
+#endif
+        newdata = emit_bitcast(ctx, newdata, data->getType());
+        if (bc) {
+            BasicBlock *failBB, *endBB;
+            failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+            endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            Value *mptr = emit_genericmemoryptr(ctx, mem, layout, 0);
+            mptr = emit_bitcast(ctx, mptr, newdata->getType());
+#if 0
+            Value *mend = emit_bitcast(ctx, mptr, getInt8PtrTy(ctx.builder.getContext()));
+            Value *blen = ctx.builder.CreateMul(mlen, elsz, "", true, true);
+            mend = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), mend, blen);
+            mend = emit_bitcast(ctx, mend, newdata->getType());
+            Value *inbound = ctx.builder.CreateAnd(
+                    ctx.builder.CreateICmpULE(mptr, newdata),
+                    ctx.builder.CreateICmpULT(newdata, mend));
+            inbound = ctx.builder.CreateAnd(
+                    ctx.builder.CreateNot(ovflw),
+                    inbound);
+#elif 1
+            Value *bidx0 = ctx.builder.CreateSub(
+                ctx.builder.CreatePtrToInt(newdata, ctx.types().T_size),
+                ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
+            Value *blen = ctx.builder.CreateMul(mlen, elsz, "", true, true);
+            Value *inbound = ctx.builder.CreateICmpULT(bidx0, blen);
+            inbound = ctx.builder.CreateAnd(ctx.builder.CreateNot(ovflw), inbound);
+#else
+            Value *idx0; // (newdata - mptr) / elsz
+            idx0 = ctx.builder.CreateSub(
+                ctx.builder.CreatePtrToInt(newdata, ctx.types().T_size),
+                ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
+            idx0 = ctx.builder.CreateExactUDiv(idx0, elsz);
+            Value *inbound = ctx.builder.CreateICmpULT(idx0, mlen);
+#endif
+            ctx.builder.CreateCondBr(inbound, endBB, failBB);
+            ctx.f->getBasicBlockList().push_back(failBB);
+            ctx.builder.SetInsertPoint(failBB);
+            ctx.builder.CreateCall(prepare_call(jlboundserror_func),
+                { mark_callee_rooted(ctx, boxed(ctx, ref)), i });
+            ctx.builder.CreateUnreachable();
+            ctx.f->getBasicBlockList().push_back(endBB);
+            ctx.builder.SetInsertPoint(endBB);
+        }
+    }
+    return _emit_memoryref(ctx, mem, newdata, layout, ref.typ);
+}
 
-#ifndef JL_NDEBUG
-static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b)
+static jl_cgval_t emit_memoryref_offset(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
 {
-    return
-           (a->track_allocations == b->track_allocations) &&
-           (a->code_coverage == b->code_coverage) &&
-           (a->prefer_specsig == b->prefer_specsig) &&
-           (a->gnu_pubnames == b->gnu_pubnames) &&
-           (a->debug_info_kind == b->debug_info_kind) &&
-           (a->lookup == b->lookup) &&
-           (a->generic_context == b->generic_context);
+    Value *offset;
+    Value *V = emit_memoryref_FCA(ctx, ref, layout);
+    Value *data = CreateSimplifiedExtractValue(ctx, V, 0);
+    if (layout->flags.arrayelem_isunion || layout->size == 0) {
+        offset = data;
+    }
+    else {
+        Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+        Value *mptr = emit_genericmemoryptr(ctx, mem, layout, 0);
+        mptr = emit_bitcast(ctx, mptr, mem->getType());
+        // (data - mptr) / elsz
+        offset = ctx.builder.CreateSub(
+            ctx.builder.CreatePtrToInt(data, ctx.types().T_size),
+            ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
+        Value *elsz = emit_genericmemoryelsize(ctx, mem, ref.typ, false);
+        offset = ctx.builder.CreateExactUDiv(offset, elsz);
+    }
+    offset = ctx.builder.CreateAdd(offset, ConstantInt::get(ctx.types().T_size, 1));
+    return mark_julia_type(ctx, offset, false, jl_long_type);
+}
+
+static Value *emit_memoryref_mem(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
+{
+    Value *V = emit_memoryref_FCA(ctx, ref, layout);
+    return CreateSimplifiedExtractValue(ctx, V, 1);
+}
+
+static Value *emit_memoryref_ptr(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
+{
+    assert(!layout->flags.arrayelem_isunion && layout->size != 0);
+    Value *newref = emit_memoryref_FCA(ctx, ref, layout);
+    Value *data = CreateSimplifiedExtractValue(ctx, newref, 0);
+    unsigned AS = AddressSpace::Loaded;
+    Value *mem = CreateSimplifiedExtractValue(ctx, newref, 1);
+    // rebuild GEP on data, so that we manually hoist this gc_loaded_func call over it, back to the original load
+    // we should add this to llvm-julia-licm too, so we can attempt hoisting over PhiNodes too (which aren't defined yet here)
+    IRBuilder<>::InsertPointGuard resetIP(ctx.builder);
+    SmallVector<GetElementPtrInst*,0> GEPlist;
+    data = data->stripPointerCastsSameRepresentation();
+    while (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(data)) { // ignoring bitcast will not be required with opaque pointers
+        GEPlist.push_back(GEP);
+        data = GEP->getPointerOperand()->stripPointerCastsSameRepresentation();
+    }
+    data = ctx.builder.CreateBitCast(data, ctx.types().T_pprjlvalue);
+    data = ctx.builder.CreateCall(prepare_call(gc_loaded_func), { mem, data });
+    if (!GEPlist.empty()) {
+        for (auto &GEP : make_range(GEPlist.rbegin(), GEPlist.rend())) {
+            data = ctx.builder.CreateBitCast(data, PointerType::get(GEP->getSourceElementType(), AS));
+            Instruction *GEP2 = GEP->clone();
+            GEP2->mutateType(PointerType::get(GEP->getResultElementType(), AS));
+            GEP2->setOperand(GetElementPtrInst::getPointerOperandIndex(), data);
+            ctx.builder.Insert(GEP2);
+            data = GEP2;
+        }
+    }
+    return data;
 }
-#endif
 
 // Reset us back to codegen debug type
 #undef DEBUG_TYPE
diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp
index 38bd012ff46fc..8aa0ef009f4eb 100644
--- a/src/clangsa/GCChecker.cpp
+++ b/src/clangsa/GCChecker.cpp
@@ -37,6 +37,16 @@ static const Stmt *getStmtForDiagnostics(const ExplodedNode *N)
     return N->getStmtForDiagnostics();
 }
 
+static unsigned getStackFrameHeight(const LocationContext *stack)
+{
+    // TODO: or use getID ?
+    unsigned depth = 0;
+    while (stack) {
+        depth++;
+        stack = stack->getParent();
+    }
+    return depth;
+}
 
 class GCChecker
     : public Checker<
@@ -53,8 +63,8 @@ class GCChecker
           check::Location> {
   mutable std::unique_ptr<BugType> BT;
   template <typename callback>
-  void report_error(callback f, CheckerContext &C, const char *message) const;
-  void report_error(CheckerContext &C, const char *message) const {
+  void report_error(callback f, CheckerContext &C, StringRef message) const;
+  void report_error(CheckerContext &C, StringRef message) const {
     return report_error([](PathSensitiveBugReport *) {}, C, message);
   }
   void
@@ -124,8 +134,8 @@ class GCChecker
       return ValueState(Rooted, Root, Depth);
     }
     static ValueState getForArgument(const FunctionDecl *FD,
-                                     const ParmVarDecl *PVD) {
-      bool isFunctionSafepoint = !isFDAnnotatedNotSafepoint(FD);
+                                     const ParmVarDecl *PVD,
+                                     bool isFunctionSafepoint) {
       bool maybeUnrooted = declHasAnnotation(PVD, "julia_maybe_unrooted");
       if (!isFunctionSafepoint || maybeUnrooted) {
         ValueState VS = getAllocated();
@@ -181,15 +191,6 @@ class GCChecker
     }
     return f(TD->getName());
   }
-  static bool isValueCollection(QualType QT) {
-    if (QT->isPointerType() || QT->isArrayType())
-      return isValueCollection(
-          clang::QualType(QT->getPointeeOrArrayElementType(), 0));
-    const TagDecl *TD = QT->getUnqualifiedDesugaredType()->getAsTagDecl();
-    if (!TD)
-      return false;
-    return declHasAnnotation(TD, "julia_rooted_value_collection");
-  }
   template <typename callback>
   static SymbolRef walkToRoot(callback f, const ProgramStateRef &State,
                               const MemRegion *Region);
@@ -198,9 +199,10 @@ class GCChecker
   static bool isGCTracked(const Expr *E);
   bool isGloballyRootedType(QualType Type) const;
   static void dumpState(const ProgramStateRef &State);
-  static bool declHasAnnotation(const clang::Decl *D, const char *which);
-  static bool isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD);
-  bool isSafepoint(const CallEvent &Call) const;
+  static const AnnotateAttr *declHasAnnotation(const clang::Decl *D, const char *which);
+  static bool isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD, const SourceManager &SM);
+  static const SourceManager &getSM(CheckerContext &C) { return C.getSourceManager(); }
+  bool isSafepoint(const CallEvent &Call, CheckerContext &C) const;
   bool processPotentialSafepoint(const CallEvent &Call, CheckerContext &C,
                                  ProgramStateRef &State) const;
   bool processAllocationOfResult(const CallEvent &Call, CheckerContext &C,
@@ -214,7 +216,9 @@ class GCChecker
                                                 const MemRegion *R,
                                                 bool Debug = false);
   bool gcEnabledHere(CheckerContext &C) const;
+  bool gcEnabledHere(ProgramStateRef State) const;
   bool safepointEnabledHere(CheckerContext &C) const;
+  bool safepointEnabledHere(ProgramStateRef State) const;
   bool propagateArgumentRootedness(CheckerContext &C,
                                    ProgramStateRef &State) const;
   SymbolRef getSymbolForResult(const Expr *Result, const ValueState *OldValS,
@@ -247,6 +251,18 @@ class GCChecker
     PDP VisitNode(const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &BR) override;
   };
 
+  class SafepointBugVisitor : public BugReporterVisitor {
+  public:
+    SafepointBugVisitor() {}
+
+    void Profile(llvm::FoldingSetNodeID &ID) const override {
+      static int X = 0;
+      ID.AddPointer(&X);
+    }
+
+    PDP VisitNode(const ExplodedNode *N, BugReporterContext &BRC, PathSensitiveBugReport &BR) override;
+  };
+
   class GCValueBugVisitor : public BugReporterVisitor {
   protected:
     SymbolRef Sym;
@@ -360,6 +376,33 @@ PDP GCChecker::GCBugVisitor::VisitNode(const ExplodedNode *N,
   return nullptr;
 }
 
+PDP GCChecker::SafepointBugVisitor::VisitNode(const ExplodedNode *N,
+                                       BugReporterContext &BRC, PathSensitiveBugReport &BR) {
+  const ExplodedNode *PrevN = N->getFirstPred();
+  unsigned NewSafepointDisabled = N->getState()->get<SafepointDisabledAt>();
+  unsigned OldSafepointDisabled = PrevN->getState()->get<SafepointDisabledAt>();
+  if (NewSafepointDisabled != OldSafepointDisabled) {
+    const Decl *D = &N->getCodeDecl();
+    const AnnotateAttr *Ann = declHasAnnotation(D, "julia_not_safepoint");
+    PathDiagnosticLocation Pos;
+    if (OldSafepointDisabled == (unsigned)-1) {
+      if (Ann) {
+        Pos = PathDiagnosticLocation{Ann->getLoc(), BRC.getSourceManager()};
+        return MakePDP(Pos, "Tracking JL_NOT_SAFEPOINT annotation here.");
+      } else {
+        PathDiagnosticLocation Pos = PathDiagnosticLocation::createDeclBegin(
+            N->getLocationContext(), BRC.getSourceManager());
+        return MakePDP(Pos, "Tracking JL_NOT_SAFEPOINT annotation here.");
+      }
+    } else if (NewSafepointDisabled == (unsigned)-1) {
+      PathDiagnosticLocation Pos = PathDiagnosticLocation::createDeclBegin(
+          N->getLocationContext(), BRC.getSourceManager());
+      return MakePDP(Pos, "Safepoints re-enabled here");
+    }
+  }
+  return nullptr;
+}
+
 PDP GCChecker::GCValueBugVisitor::ExplainNoPropagationFromExpr(
     const clang::Expr *FromWhere, const ExplodedNode *N,
     PathDiagnosticLocation Pos, BugReporterContext &BRC, PathSensitiveBugReport &BR) {
@@ -463,7 +506,7 @@ PDP GCChecker::GCValueBugVisitor::VisitNode(const ExplodedNode *N,
     } else {
       if (NewValueState->FD) {
         bool isFunctionSafepoint =
-            !isFDAnnotatedNotSafepoint(NewValueState->FD);
+            !isFDAnnotatedNotSafepoint(NewValueState->FD, BRC.getSourceManager());
         bool maybeUnrooted =
             declHasAnnotation(NewValueState->PVD, "julia_maybe_unrooted");
         assert(isFunctionSafepoint || maybeUnrooted);
@@ -509,7 +552,7 @@ PDP GCChecker::GCValueBugVisitor::VisitNode(const ExplodedNode *N,
 
 template <typename callback>
 void GCChecker::report_error(callback f, CheckerContext &C,
-                             const char *message) const {
+                             StringRef message) const {
   // Generate an error node.
   ExplodedNode *N = C.generateErrorNode();
   if (!N)
@@ -544,12 +587,20 @@ void GCChecker::report_value_error(CheckerContext &C, SymbolRef Sym,
 }
 
 bool GCChecker::gcEnabledHere(CheckerContext &C) const {
-  unsigned disabledAt = C.getState()->get<GCDisabledAt>();
+  return gcEnabledHere(C.getState());
+}
+
+bool GCChecker::gcEnabledHere(ProgramStateRef State) const {
+  unsigned disabledAt = State->get<GCDisabledAt>();
   return disabledAt == (unsigned)-1;
 }
 
 bool GCChecker::safepointEnabledHere(CheckerContext &C) const {
-  unsigned disabledAt = C.getState()->get<SafepointDisabledAt>();
+    return safepointEnabledHere(C.getState());
+}
+
+bool GCChecker::safepointEnabledHere(ProgramStateRef State) const {
+  unsigned disabledAt = State->get<SafepointDisabledAt>();
   return disabledAt == (unsigned)-1;
 }
 
@@ -617,8 +668,8 @@ void GCChecker::checkBeginFunction(CheckerContext &C) const {
   // otherwise
   const auto *LCtx = C.getLocationContext();
   const auto *FD = dyn_cast<FunctionDecl>(LCtx->getDecl());
-  if (!FD)
-    return;
+  assert(FD);
+  unsigned CurrentHeight = getStackFrameHeight(C.getStackFrame());
   ProgramStateRef State = C.getState();
   bool Change = false;
   if (C.inTopFrame()) {
@@ -626,15 +677,14 @@ void GCChecker::checkBeginFunction(CheckerContext &C) const {
     State = State->set<SafepointDisabledAt>((unsigned)-1);
     Change = true;
   }
-  if (State->get<GCDisabledAt>() == (unsigned)-1) {
-    if (declHasAnnotation(FD, "julia_gc_disabled")) {
-      State = State->set<GCDisabledAt>(C.getStackFrame()->getIndex());
-      Change = true;
-    }
+  if (gcEnabledHere(State) && declHasAnnotation(FD, "julia_gc_disabled")) {
+    State = State->set<GCDisabledAt>(CurrentHeight);
+    Change = true;
   }
-  if (State->get<SafepointDisabledAt>() == (unsigned)-1 &&
-      isFDAnnotatedNotSafepoint(FD)) {
-    State = State->set<SafepointDisabledAt>(C.getStackFrame()->getIndex());
+  bool isFunctionSafepoint = !isFDAnnotatedNotSafepoint(FD, getSM(C));
+  if (safepointEnabledHere(State) &&
+      (!isFunctionSafepoint || declHasAnnotation(FD, "julia_notsafepoint_leave"))) {
+    State = State->set<SafepointDisabledAt>(CurrentHeight);
     Change = true;
   }
   if (!C.inTopFrame()) {
@@ -654,7 +704,7 @@ void GCChecker::checkBeginFunction(CheckerContext &C) const {
         continue;
       assert(AssignedSym);
       State = State->set<GCValueMap>(AssignedSym,
-                                     ValueState::getForArgument(FD, P));
+                                     ValueState::getForArgument(FD, P, isFunctionSafepoint));
       Change = true;
     }
   }
@@ -666,8 +716,10 @@ void GCChecker::checkBeginFunction(CheckerContext &C) const {
 void GCChecker::checkEndFunction(const clang::ReturnStmt *RS,
                                  CheckerContext &C) const {
   ProgramStateRef State = C.getState();
+  const auto *LCtx = C.getLocationContext();
+  const auto *FD = dyn_cast<FunctionDecl>(LCtx->getDecl());
 
-  if (RS && gcEnabledHere(C) && RS->getRetValue() && isGCTracked(RS->getRetValue())) {
+  if (RS && gcEnabledHere(State) && RS->getRetValue() && isGCTracked(RS->getRetValue())) {
     auto ResultVal = C.getSVal(RS->getRetValue());
     SymbolRef Sym = ResultVal.getAsSymbol(true);
     const ValueState *ValS = Sym ? State->get<GCValueMap>(Sym) : nullptr;
@@ -676,12 +728,16 @@ void GCChecker::checkEndFunction(const clang::ReturnStmt *RS,
     }
   }
 
+  unsigned CurrentHeight = getStackFrameHeight(C.getStackFrame());
   bool Changed = false;
-  if (State->get<GCDisabledAt>() == C.getStackFrame()->getIndex()) {
+  if (State->get<GCDisabledAt>() == CurrentHeight) {
     State = State->set<GCDisabledAt>((unsigned)-1);
     Changed = true;
   }
-  if (State->get<SafepointDisabledAt>() == C.getStackFrame()->getIndex()) {
+  if (State->get<SafepointDisabledAt>() == CurrentHeight) {
+    if (!isFDAnnotatedNotSafepoint(FD, getSM(C)) && !(FD && declHasAnnotation(FD, "julia_notsafepoint_enter"))) {
+      report_error(C, "Safepoints disabled at end of function");
+    }
     State = State->set<SafepointDisabledAt>((unsigned)-1);
     Changed = true;
   }
@@ -689,20 +745,52 @@ void GCChecker::checkEndFunction(const clang::ReturnStmt *RS,
     C.addTransition(State);
   if (!C.inTopFrame())
     return;
-  if (C.getState()->get<GCDepth>() > 0)
+  unsigned CurrentDepth = C.getState()->get<GCDepth>();
+  if (CurrentDepth != 0) {
     report_error(C, "Non-popped GC frame present at end of function");
+  }
 }
 
-bool GCChecker::declHasAnnotation(const clang::Decl *D, const char *which) {
+const AnnotateAttr *GCChecker::declHasAnnotation(const clang::Decl *D, const char *which) {
   for (const auto *Ann : D->specific_attrs<AnnotateAttr>()) {
     if (Ann->getAnnotation() == which)
-      return true;
+      return Ann;
   }
+  return nullptr;
+}
+
+bool GCChecker::isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD, const SourceManager &SM) {
+  if (declHasAnnotation(FD, "julia_not_safepoint"))
+      return true;
+  SourceLocation Loc = FD->getLocation();
+  StringRef Name = SM.getFilename(Loc);
+  Name = llvm::sys::path::filename(Name);
+  if (Name.startswith("llvm-"))
+      return true;
   return false;
 }
 
-bool GCChecker::isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD) {
-  return declHasAnnotation(FD, "julia_not_safepoint");
+static bool isMutexLock(StringRef name) {
+    return name == "uv_mutex_lock" ||
+           //name == "uv_mutex_trylock" ||
+           name == "pthread_mutex_lock" ||
+           //name == "pthread_mutex_trylock" ||
+           name == "pthread_spin_lock" ||
+           //name == "pthread_spin_trylock" ||
+           name == "uv_rwlock_rdlock" ||
+           //name == "uv_rwlock_tryrdlock" ||
+           name == "uv_rwlock_wrlock" ||
+           //name == "uv_rwlock_trywrlock" ||
+           false;
+}
+
+static bool isMutexUnlock(StringRef name) {
+    return name == "uv_mutex_unlock" ||
+           name == "pthread_mutex_unlock" ||
+           name == "pthread_spin_unlock" ||
+           name == "uv_rwlock_rdunlock" ||
+           name == "uv_rwlock_wrunlock" ||
+           false;
 }
 
 #if LLVM_VERSION_MAJOR >= 13
@@ -710,8 +798,7 @@ bool GCChecker::isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD) {
 #endif
 
 bool GCChecker::isGCTrackedType(QualType QT) {
-  return isValueCollection(QT) ||
-         isJuliaType(
+  return isJuliaType(
              [](StringRef Name) {
                if (Name.endswith_lower("jl_value_t") ||
                    Name.endswith_lower("jl_svec_t") ||
@@ -719,6 +806,8 @@ bool GCChecker::isGCTrackedType(QualType QT) {
                    Name.endswith_lower("jl_expr_t") ||
                    Name.endswith_lower("jl_code_info_t") ||
                    Name.endswith_lower("jl_array_t") ||
+                   Name.endswith_lower("jl_genericmemory_t") ||
+                   //Name.endswith_lower("jl_genericmemoryref_t") ||
                    Name.endswith_lower("jl_method_t") ||
                    Name.endswith_lower("jl_method_instance_t") ||
                    Name.endswith_lower("jl_tupletype_t") ||
@@ -745,6 +834,7 @@ bool GCChecker::isGCTrackedType(QualType QT) {
                    Name.endswith_lower("jl_method_match_t") ||
                    Name.endswith_lower("jl_vararg_t") ||
                    Name.endswith_lower("jl_opaque_closure_t") ||
+                   Name.endswith_lower("jl_globalref_t") ||
                    // Probably not technically true for these, but let's allow it
                    Name.endswith_lower("typemap_intersection_env") ||
                    Name.endswith_lower("interpreter_state") ||
@@ -778,14 +868,20 @@ bool GCChecker::isGloballyRootedType(QualType QT) const {
       [](StringRef Name) { return Name.endswith("jl_sym_t"); }, QT);
 }
 
-bool GCChecker::isSafepoint(const CallEvent &Call) const {
+bool GCChecker::isSafepoint(const CallEvent &Call, CheckerContext &C) const {
   bool isCalleeSafepoint = true;
   if (Call.isInSystemHeader()) {
     // defined by -isystem per
     // https://clang.llvm.org/docs/UsersManual.html#controlling-diagnostics-in-system-headers
     isCalleeSafepoint = false;
   } else {
-    auto *Decl = Call.getDecl();
+    const clang::Decl *Decl = Call.getDecl(); // we might not have a simple call, or we might have an SVal
+    const clang::Expr *Callee = nullptr;
+    if (auto CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr())) {
+      Callee = CE->getCallee();
+      if (Decl == nullptr)
+          Decl = CE->getCalleeDecl(); // ignores dyn_cast<FunctionDecl>, so it could also be a MemberDecl, etc.
+    }
     const DeclContext *DC = Decl ? Decl->getDeclContext() : nullptr;
     while (DC) {
       // Anything in llvm or std is not a safepoint
@@ -796,9 +892,9 @@ bool GCChecker::isSafepoint(const CallEvent &Call) const {
     }
     const FunctionDecl *FD = Decl ? Decl->getAsFunction() : nullptr;
     if (!Decl || !FD) {
-      const clang::Expr *Callee =
-          dyn_cast<CallExpr>(Call.getOriginExpr())->getCallee();
-      if (const TypedefType *TDT = dyn_cast<TypedefType>(Callee->getType())) {
+      if (Callee == nullptr) {
+        isCalleeSafepoint = true;
+      } else if (const TypedefType *TDT = dyn_cast<TypedefType>(Callee->getType())) {
         isCalleeSafepoint =
             !declHasAnnotation(TDT->getDecl(), "julia_not_safepoint");
       } else if (const CXXPseudoDestructorExpr *PDE =
@@ -819,7 +915,7 @@ bool GCChecker::isSafepoint(const CallEvent &Call) const {
                FD->getName() != "uv_run")
         isCalleeSafepoint = false;
       else
-        isCalleeSafepoint = !isFDAnnotatedNotSafepoint(FD);
+        isCalleeSafepoint = !isFDAnnotatedNotSafepoint(FD, getSM(C));
     }
   }
   return isCalleeSafepoint;
@@ -828,7 +924,7 @@ bool GCChecker::isSafepoint(const CallEvent &Call) const {
 bool GCChecker::processPotentialSafepoint(const CallEvent &Call,
                                           CheckerContext &C,
                                           ProgramStateRef &State) const {
-  if (!isSafepoint(Call))
+  if (!isSafepoint(Call, C))
     return false;
   bool DidChange = false;
   if (!gcEnabledHere(C))
@@ -1112,8 +1208,9 @@ void GCChecker::checkDerivingExpr(const Expr *Result, const Expr *Parent,
           dyn_cast<FunctionDecl>(C.getLocationContext()->getDecl());
       if (FD) {
         inheritedState = true;
+        bool isFunctionSafepoint = !isFDAnnotatedNotSafepoint(FD, getSM(C));
         Updated =
-            ValueState::getForArgument(FD, cast<ParmVarDecl>(VR->getDecl()));
+            ValueState::getForArgument(FD, cast<ParmVarDecl>(VR->getDecl()), isFunctionSafepoint);
       }
     } else {
       VR = Helpers::walk_back_to_global_VR(Region);
@@ -1221,16 +1318,35 @@ void GCChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const {
     return;
   unsigned NumArgs = Call.getNumArgs();
   ProgramStateRef State = C.getState();
-  bool isCalleeSafepoint = isSafepoint(Call);
+  bool isCalleeSafepoint = isSafepoint(Call, C);
   auto *Decl = Call.getDecl();
   const FunctionDecl *FD = Decl ? Decl->getAsFunction() : nullptr;
-  if (!safepointEnabledHere(C) && isCalleeSafepoint) {
+  StringRef FDName =
+      FD && FD->getDeclName().isIdentifier() ? FD->getName() : "";
+  if (isMutexUnlock(FDName) || (FD && declHasAnnotation(FD, "julia_notsafepoint_leave"))) {
+    const auto *LCtx = C.getLocationContext();
+    const auto *FD = dyn_cast<FunctionDecl>(LCtx->getDecl());
+    if (State->get<SafepointDisabledAt>() == getStackFrameHeight(C.getStackFrame()) &&
+        !isFDAnnotatedNotSafepoint(FD, getSM(C))) {
+      State = State->set<SafepointDisabledAt>((unsigned)-1);
+      C.addTransition(State);
+    }
+  }
+  if (!safepointEnabledHere(State) && isCalleeSafepoint) {
     // Suppress this warning if the function is noreturn.
     // We could separate out "not safepoint, except for noreturn functions",
     // but that seems like a lot of effort with little benefit.
     if (!FD || !FD->isNoReturn()) {
-      report_error(C, "Calling potential safepoint from function annotated "
-                      "JL_NOTSAFEPOINT");
+      report_error(
+          [&](PathSensitiveBugReport *Report) {
+            if (FD)
+              Report->addNote(
+                  "Tried to call method defined here",
+                  PathDiagnosticLocation::create(FD, C.getSourceManager()));
+            Report->addVisitor(make_unique<SafepointBugVisitor>());
+          },
+          C, ("Calling potential safepoint as " +
+              Call.getKindAsString() + " from function annotated JL_NOTSAFEPOINT").str());
       return;
     }
   }
@@ -1324,7 +1440,7 @@ bool GCChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
   } else if (name == "JL_GC_PUSH1" || name == "JL_GC_PUSH2" ||
              name == "JL_GC_PUSH3" || name == "JL_GC_PUSH4" ||
              name == "JL_GC_PUSH5" || name == "JL_GC_PUSH6" ||
-             name == "JL_GC_PUSH7") {
+             name == "JL_GC_PUSH7" || name == "JL_GC_PUSH8") {
     ProgramStateRef State = C.getState();
     // Transform slots to roots, transform values to rooted
     unsigned NumArgs = CE->getNumArgs();
@@ -1440,7 +1556,7 @@ bool GCChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
     } else {
       cast<SymbolConjured>(Arg.getAsSymbol())->getStmt()->dump();
     }
-    bool EnabledNow = State->get<GCDisabledAt>() == (unsigned)-1;
+    bool EnabledNow = gcEnabledHere(State);
     if (!EnabledAfter) {
       State = State->set<GCDisabledAt>((unsigned)-2);
     } else {
@@ -1452,22 +1568,16 @@ bool GCChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
     C.addTransition(State->BindExpr(CE, C.getLocationContext(), Result));
     return true;
   }
-  else if (name == "uv_mutex_lock") {
-    ProgramStateRef State = C.getState();
-    if (State->get<SafepointDisabledAt>() == (unsigned)-1) {
-      C.addTransition(State->set<SafepointDisabledAt>(C.getStackFrame()->getIndex()));
-      return true;
-    }
-  }
-  else if (name == "uv_mutex_unlock") {
-    ProgramStateRef State = C.getState();
-    const auto *LCtx = C.getLocationContext();
-    const auto *FD = dyn_cast<FunctionDecl>(LCtx->getDecl());
-    if (State->get<SafepointDisabledAt>() == (unsigned)C.getStackFrame()->getIndex() &&
-        !isFDAnnotatedNotSafepoint(FD)) {
-      C.addTransition(State->set<SafepointDisabledAt>(-1));
-      return true;
-    }
+  {
+      auto *Decl = Call.getDecl();
+      const FunctionDecl *FD = Decl ? Decl->getAsFunction() : nullptr;
+      if (isMutexLock(name) || (FD && declHasAnnotation(FD, "julia_notsafepoint_enter"))) {
+        ProgramStateRef State = C.getState();
+        if (State->get<SafepointDisabledAt>() == (unsigned)-1) {
+          C.addTransition(State->set<SafepointDisabledAt>(getStackFrameHeight(C.getStackFrame())));
+          return true;
+        }
+      }
   }
   return false;
 }
diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c
index 1f209f36291a2..0ce3a9b188df2 100644
--- a/src/codegen-stubs.c
+++ b/src/codegen-stubs.c
@@ -12,12 +12,13 @@
 
 JL_DLLEXPORT void jl_dump_native_fallback(void *native_code,
         const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname,
-        const char *sysimg_data, size_t sysimg_len) UNAVAILABLE
-JL_DLLEXPORT int32_t jl_get_llvm_gv_fallback(void *native_code, jl_value_t *p) UNAVAILABLE
+        ios_t *z, ios_t *s) UNAVAILABLE
+JL_DLLEXPORT void jl_get_llvm_gvs_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE
+JL_DLLEXPORT void jl_get_llvm_external_fns_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE
 
 JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE
 JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world,
-        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
+        char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
 JL_DLLEXPORT jl_value_t *jl_dump_function_ir_fallback(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo) UNAVAILABLE
 JL_DLLEXPORT void jl_get_llvmf_defn_fallback(jl_llvmf_dump_t *dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE
 
@@ -31,10 +32,10 @@ JL_DLLEXPORT int jl_getFunctionInfo_fallback(jl_frame_t **frames, uintptr_t poin
     return 0;
 }
 
-JL_DLLEXPORT void jl_register_fptrs_fallback(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t *fptrs,
+JL_DLLEXPORT void jl_register_fptrs_fallback(uint64_t image_base, const struct _jl_image_fptrs_t *fptrs,
                        jl_method_instance_t **linfos, size_t n)
 {
-    (void)sysimage_base; (void)fptrs; (void)linfos; (void)n;
+    (void)image_base; (void)fptrs; (void)linfos; (void)n;
 }
 
 JL_DLLEXPORT jl_code_instance_t *jl_generate_fptr_fallback(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
@@ -47,6 +48,8 @@ JL_DLLEXPORT void jl_generate_fptr_for_unspecialized_fallback(jl_code_instance_t
     jl_atomic_store_release(&unspec->invoke, &jl_fptr_interpret_call);
 }
 
+JL_DLLEXPORT void jl_generate_fptr_for_oc_wrapper_fallback(jl_code_instance_t *unspec) UNAVAILABLE
+
 JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION_fallback(void)
 {
     return 0;
@@ -57,7 +60,7 @@ JL_DLLEXPORT int jl_compile_extern_c_fallback(LLVMOrcThreadSafeModuleRef llvmmod
     return 0;
 }
 
-JL_DLLEXPORT void jl_teardown_codegen_fallback(void)
+JL_DLLEXPORT void jl_teardown_codegen_fallback(void) JL_NOTSAFEPOINT
 {
 }
 
@@ -66,7 +69,7 @@ JL_DLLEXPORT size_t jl_jit_total_bytes_fallback(void)
     return 0;
 }
 
-JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmctxt, const jl_cgparams_t *cgparams, int _policy) UNAVAILABLE
+JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage, size_t _world) UNAVAILABLE
 
 JL_DLLEXPORT void jl_dump_compiles_fallback(void *s)
 {
@@ -80,9 +83,9 @@ JL_DLLEXPORT void jl_dump_llvm_opt_fallback(void *s)
 {
 }
 
-JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm_fallback(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
+JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm_fallback(uint64_t fptr, char emit_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
 
-JL_DLLEXPORT jl_value_t *jl_dump_function_asm_fallback(jl_llvmf_dump_t* dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
+JL_DLLEXPORT jl_value_t *jl_dump_function_asm_fallback(jl_llvmf_dump_t* dump, char emit_mc, const char* asm_variant, const char *debuginfo, char binary, char raw) UNAVAILABLE
 
 JL_DLLEXPORT void jl_get_function_id_fallback(void *native_code, jl_code_instance_t *ncode,
         int32_t *func_idx, int32_t *specfunc_idx) UNAVAILABLE
@@ -104,34 +107,46 @@ JL_DLLEXPORT uint64_t jl_getUnwindInfo_fallback(uint64_t dwAddr)
     return 0;
 }
 
-JL_DLLEXPORT void jl_add_optimization_passes_fallback(void *PM, int opt_level, int lower_intrinsics) UNAVAILABLE
+JL_DLLEXPORT void jl_build_newpm_pipeline_fallback(void *MPM, void *PB, int Speedup, int Size,
+    int lower_intrinsics, int dump_native, int external_use, int llvm_only) UNAVAILABLE
 
-JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass_fallback(void *PM) UNAVAILABLE
+JL_DLLEXPORT void jl_register_passbuilder_callbacks_fallback(void *PB) { }
 
-JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass_fallback(void *PM) UNAVAILABLE
+#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \
+    JL_DLLEXPORT void LLVMExtraMPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE
+#define CGSCC_PASS(NAME, CLASS, CREATE_PASS) \
+    JL_DLLEXPORT void LLVMExtraCGPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE
+#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \
+    JL_DLLEXPORT void LLVMExtraFPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE
+#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \
+    JL_DLLEXPORT void LLVMExtraLPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE
 
-JL_DLLEXPORT void LLVMExtraAddPropagateJuliaAddrspaces_fallback(void *PM) UNAVAILABLE
+#include "llvm-julia-passes.inc"
 
-JL_DLLEXPORT void LLVMExtraAddRemoveJuliaAddrspacesPass_fallback(void *PM) UNAVAILABLE
+#undef MODULE_PASS
+#undef CGSCC_PASS
+#undef FUNCTION_PASS
+#undef LOOP_PASS
 
-JL_DLLEXPORT void LLVMExtraAddCombineMulAddPass_fallback(void *PM) UNAVAILABLE
+//LLVM C api to the julia JIT
+JL_DLLEXPORT void* JLJITGetLLVMOrcExecutionSession_fallback(void* JIT) UNAVAILABLE
 
-JL_DLLEXPORT void LLVMExtraAddMultiVersioningPass_fallback(void *PM) UNAVAILABLE
+JL_DLLEXPORT void* JLJITGetJuliaOJIT_fallback(void) UNAVAILABLE
 
-JL_DLLEXPORT void LLVMExtraAddLowerExcHandlersPass_fallback(void *PM) UNAVAILABLE
+JL_DLLEXPORT void* JLJITGetExternalJITDylib_fallback(void* JIT) UNAVAILABLE
 
-JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass_fallback(void *PM) UNAVAILABLE
+JL_DLLEXPORT void* JLJITAddObjectFile_fallback(void* JIT, void* JD, void* ObjBuffer) UNAVAILABLE
 
-JL_DLLEXPORT void LLVMExtraJuliaLICMPass_fallback(void *PM) UNAVAILABLE
+JL_DLLEXPORT void* JLJITAddLLVMIRModule_fallback(void* JIT, void* JD, void* TSM) UNAVAILABLE
 
-JL_DLLEXPORT void LLVMExtraAddAllocOptPass_fallback(void *PM) UNAVAILABLE
+JL_DLLEXPORT void* JLJITLookup_fallback(void* JIT, void* Result, const char *Name) UNAVAILABLE
 
-JL_DLLEXPORT void LLVMExtraAddLowerPTLSPass_fallback(void *PM, bool_t imaging_mode) UNAVAILABLE
+JL_DLLEXPORT void* JLJITMangleAndIntern_fallback(void* JIT, const char *Name) UNAVAILABLE
 
-JL_DLLEXPORT void LLVMExtraAddRemoveNIPass_fallback(void *PM) UNAVAILABLE
+JL_DLLEXPORT const char *JLJITGetTripleString_fallback(void* JIT) UNAVAILABLE
 
-JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass_fallback(void *PM, bool_t Strong) UNAVAILABLE
+JL_DLLEXPORT const char JLJITGetGlobalPrefix_fallback(void* JIT) UNAVAILABLE
 
-JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass_fallback(void *PM) UNAVAILABLE
+JL_DLLEXPORT const char *JLJITGetDataLayoutString_fallback(void* JIT) UNAVAILABLE
 
-JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_fallback(void *PM) UNAVAILABLE
+JL_DLLEXPORT void* JLJITGetIRCompileLayer_fallback(void* JIT) UNAVAILABLE
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 8ac0cf6105601..ceef6486f8c31 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -3,12 +3,6 @@
 #undef DEBUG
 #include "llvm-version.h"
 #include "platform.h"
-#if defined(_CPU_X86_)
-#define JL_NEED_FLOATTEMP_VAR 1
-#endif
-#if defined(_OS_WINDOWS_) || defined(_OS_FREEBSD_)
-#define JL_DISABLE_FPO
-#endif
 
 #ifndef __STDC_LIMIT_MACROS
 #define __STDC_LIMIT_MACROS
@@ -26,11 +20,7 @@
 
 // target machine computation
 #include <llvm/CodeGen/TargetSubtargetInfo.h>
-#if JL_LLVM_VERSION >= 140000
 #include <llvm/MC/TargetRegistry.h>
-#else
-#include <llvm/Support/TargetRegistry.h>
-#endif
 #include <llvm/Target/TargetOptions.h>
 #include <llvm/Support/Host.h>
 #include <llvm/Support/TargetSelect.h>
@@ -50,10 +40,10 @@
 #include <llvm/IR/Attributes.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/MDBuilder.h>
+#include <llvm/Analysis/InstructionSimplify.h>
 
 // support
 #include <llvm/ADT/SmallBitVector.h>
-#include <llvm/ADT/Optional.h>
 #include <llvm/ADT/Statistic.h>
 #include <llvm/Support/raw_ostream.h>
 #include <llvm/Support/FormattedStream.h>
@@ -88,6 +78,27 @@
 
 using namespace llvm;
 
+static bool jl_fpo_disabled(const Triple &TT) {
+#ifdef JL_DISABLE_FPO
+    return true;
+#endif
+#ifdef _COMPILER_MSAN_ENABLED_
+    // MSAN doesn't support FPO
+    return true;
+#endif
+    if (TT.isOSLinux() || TT.isOSWindows() || TT.isOSFreeBSD()) {
+        return true;
+    }
+    return false;
+}
+
+static bool jl_floattemp_var_needed(const Triple &TT) {
+#ifdef JL_NEED_FLOATTEMP_VAR
+    return true;
+#endif
+    return TT.getArch() == Triple::x86;
+}
+
 //Drag some useful type functions into our namespace
 //to reduce verbosity of our code
 auto getInt1Ty(LLVMContext &ctxt) {
@@ -114,6 +125,9 @@ auto getFloatTy(LLVMContext &ctxt) {
 auto getDoubleTy(LLVMContext &ctxt) {
     return Type::getDoubleTy(ctxt);
 }
+auto getBFloatTy(LLVMContext &ctxt) {
+    return Type::getBFloatTy(ctxt);
+}
 auto getFP128Ty(LLVMContext &ctxt) {
     return Type::getFP128Ty(ctxt);
 }
@@ -141,13 +155,6 @@ auto getFloatPtrTy(LLVMContext &ctxt) {
 auto getDoublePtrTy(LLVMContext &ctxt) {
     return Type::getDoublePtrTy(ctxt);
 }
-auto getSizePtrTy(LLVMContext &ctxt) {
-    if (sizeof(size_t) > sizeof(uint32_t)) {
-        return getInt64PtrTy(ctxt);
-    } else {
-        return getInt32PtrTy(ctxt);
-    }
-}
 
 typedef Instruction TerminatorInst;
 
@@ -155,16 +162,65 @@ typedef Instruction TerminatorInst;
 #define NOMINMAX
 #endif
 
-#include "julia.h"
-#include "julia_internal.h"
 #include "jitlayers.h"
-#include "codegen_shared.h"
 #include "processor.h"
 #include "julia_assert.h"
 
 #undef DEBUG_TYPE //LLVM occasionally likes to set DEBUG_TYPE in a header...
 #define DEBUG_TYPE "julia_irgen_codegen"
 
+void setName(jl_codegen_params_t &params, Value *V, const Twine &Name)
+{
+    // we do the constant check again later, duplicating it here just makes sure the assertion
+    // fires on debug builds even if debug info is not enabled
+    // note that if this assertion fires then the implication is that the caller of setName
+    // is not checking that setName is only called for non-folded instructions (e.g. folded bitcasts
+    // and 0-byte geps), which can result in information loss on the renamed instruction.
+    assert((isa<Constant>(V) || isa<Instruction>(V)) && "Should only set names on instructions!");
+    if (params.debug_level >= 2 && !isa<Constant>(V)) {
+        V->setName(Name);
+    }
+}
+
+void setName(jl_codegen_params_t &params, Value *V, std::function<std::string()> GetName)
+{
+    assert((isa<Constant>(V) || isa<Instruction>(V)) && "Should only set names on instructions!");
+    if (params.debug_level >= 2 && !isa<Constant>(V)) {
+        V->setName(Twine(GetName()));
+    }
+}
+
+void setNameWithField(jl_codegen_params_t &params, Value *V, std::function<StringRef()> GetObjName, jl_datatype_t *jt, unsigned idx, const Twine &suffix)
+{
+    assert((isa<Constant>(V) || isa<Instruction>(V)) && "Should only set names on instructions!");
+    if (params.debug_level >= 2 && !isa<Constant>(V)) {
+        if (jl_is_tuple_type(jt)){
+            V->setName(Twine(GetObjName()) + "[" + Twine(idx + 1) + "]"+ suffix);
+            return;
+        }
+
+        if (jl_is_namedtuple_type(jt)) {
+            auto names = jl_tparam0(jt);
+            assert(jl_is_tuple(names));
+            if (idx < jl_nfields(names)) {
+                auto name = jl_fieldref(names, idx);
+                assert(jl_is_symbol(name));
+                V->setName(Twine(GetObjName()) + "." + Twine(jl_symbol_name((jl_sym_t*)name)) + suffix);
+                return;
+            }
+        } else {
+            auto flds = jl_field_names(jt);
+            if (idx < jl_svec_len(flds)) {
+                auto name = jl_svecref(flds, idx);
+                assert(jl_is_symbol(name));
+                V->setName(Twine(GetObjName()) + "." + Twine(jl_symbol_name((jl_sym_t*)name)) + suffix);
+                return;
+            }
+        }
+        V->setName(Twine(GetObjName()) + "." + Twine("unknown field") + suffix);
+    }
+}
+
 STATISTIC(EmittedAllocas, "Number of allocas emitted");
 STATISTIC(EmittedIntToPtrs, "Number of inttoptrs emitted");
 STATISTIC(ModulesCreated, "Number of LLVM Modules created");
@@ -187,10 +243,10 @@ STATISTIC(GeneratedCCallables, "Number of C-callable functions generated");
 STATISTIC(GeneratedInvokeWrappers, "Number of invoke wrappers generated");
 STATISTIC(EmittedFunctions, "Number of functions emitted");
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_emitted_mi_name_impl(void *s)
 {
-    **jl_ExecutionEngine->get_dump_emitted_mi_name_stream() = (JL_STREAM*)s;
+    **jl_ExecutionEngine->get_dump_emitted_mi_name_stream() = (ios_t*)s;
 }
 
 extern "C" {
@@ -227,11 +283,13 @@ extern void _chkstk(void);
 
 // types
 struct jl_typecache_t {
+    Type *T_size;
     Type *T_jlvalue;
     Type *T_pjlvalue;
     Type *T_prjlvalue;
     Type *T_ppjlvalue;
     Type *T_pprjlvalue;
+    StructType *T_jlgenericmemory;
     StructType *T_jlarray;
     Type *T_pjlarray;
     FunctionType *T_jlfunc;
@@ -240,22 +298,29 @@ struct jl_typecache_t {
     IntegerType *T_sigatomic;
 
     Type *T_ppint8;
+    unsigned sizeof_ptr;
+    Align alignof_ptr;
 
     bool initialized;
 
     jl_typecache_t() :
         T_jlvalue(nullptr), T_pjlvalue(nullptr), T_prjlvalue(nullptr),
-        T_ppjlvalue(nullptr), T_pprjlvalue(nullptr), T_jlarray(nullptr),
-        T_pjlarray(nullptr), T_jlfunc(nullptr), T_jlfuncparams(nullptr),
-        T_sigatomic(nullptr), T_ppint8(nullptr), initialized(false) {}
+        T_ppjlvalue(nullptr), T_pprjlvalue(nullptr),
+        T_jlgenericmemory(nullptr), T_jlarray(nullptr), T_pjlarray(nullptr),
+        T_jlfunc(nullptr), T_jlfuncparams(nullptr), T_sigatomic(nullptr), T_ppint8(nullptr),
+        initialized(false) {}
 
-    void initialize(LLVMContext &context) {
+    void initialize(LLVMContext &context, const DataLayout &DL) {
         if (initialized) {
             return;
         }
         initialized = true;
         T_ppint8 = PointerType::get(getInt8PtrTy(context), 0);
         T_sigatomic = Type::getIntNTy(context, sizeof(sig_atomic_t) * 8);
+        T_size = DL.getIntPtrType(context);
+        sizeof_ptr = DL.getPointerSize();
+        // use pointer abi alignment for intptr_t
+        alignof_ptr = DL.getPointerABIAlignment(0);
         T_jlvalue = JuliaType::get_jlvalue_ty(context);
         T_pjlvalue = PointerType::get(T_jlvalue, 0);
         T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
@@ -267,15 +332,12 @@ struct jl_typecache_t {
         T_jlfuncparams = JuliaType::get_jlfuncparams_ty(context);
         assert(T_jlfuncparams != NULL);
 
-        Type *vaelts[] = {PointerType::get(getInt8Ty(context), AddressSpace::Loaded)
-                        , getSizeTy(context)
-                        , getInt16Ty(context)
-                        , getInt16Ty(context)
-                        , getInt32Ty(context)
+        T_jlgenericmemory = StructType::get(context, { T_size, T_pprjlvalue /* [, real-owner] */ });
+        Type *vaelts[] = { PointerType::get(getInt8Ty(context), AddressSpace::Loaded),
+                           PointerType::get(T_jlgenericmemory, AddressSpace::Tracked),
+                           // dimsize[ndims]
         };
-        static_assert(sizeof(jl_array_flags_t) == sizeof(int16_t),
-                    "Size of jl_array_flags_t is not the same as int16_t");
-        T_jlarray = StructType::get(context, makeArrayRef(vaelts));
+        T_jlarray = StructType::get(context, ArrayRef<Type*>(vaelts));
         T_pjlarray = PointerType::get(T_jlarray, 0);
     }
 };
@@ -290,19 +352,19 @@ struct jl_tbaacache_t {
     MDNode *tbaa_unionselbyte;   // a selector byte in isbits Union struct fields
     MDNode *tbaa_data;       // Any user data that `pointerset/ref` are allowed to alias
     MDNode *tbaa_binding;        // jl_binding_t::value
-    MDNode *tbaa_value;          // jl_value_t, that is not jl_array_t
+    MDNode *tbaa_value;          // jl_value_t, that is not jl_array_t or jl_genericmemory_t
     MDNode *tbaa_mutab;              // mutable type
     MDNode *tbaa_datatype;               // datatype
     MDNode *tbaa_immut;              // immutable type
     MDNode *tbaa_ptrarraybuf;    // Data in an array of boxed values
     MDNode *tbaa_arraybuf;       // Data in an array of POD
-    MDNode *tbaa_array;      // jl_array_t
-    MDNode *tbaa_arrayptr;       // The pointer inside a jl_array_t
+    MDNode *tbaa_array;      // jl_array_t or jl_genericmemory_t
+    MDNode *tbaa_arrayptr;       // The pointer inside a jl_array_t (to memoryref)
     MDNode *tbaa_arraysize;      // A size in a jl_array_t
-    MDNode *tbaa_arraylen;       // The len in a jl_array_t
-    MDNode *tbaa_arrayflags;     // The flags in a jl_array_t
-    MDNode *tbaa_arrayoffset;     // The offset in a jl_array_t
-    MDNode *tbaa_arrayselbyte;   // a selector byte in a isbits Union jl_array_t
+    MDNode *tbaa_arrayselbyte;   // a selector byte in a isbits Union jl_genericmemory_t
+    MDNode *tbaa_memoryptr;      // The pointer inside a jl_genericmemory_t
+    MDNode *tbaa_memorylen;      // The length in a jl_genericmemory_t
+    MDNode *tbaa_memoryown;      // The owner in a foreign jl_genericmemory_t
     MDNode *tbaa_const;      // Memory that is immutable by the time LLVM can see it
     bool initialized;
 
@@ -311,8 +373,8 @@ struct jl_tbaacache_t {
                     tbaa_value(nullptr), tbaa_mutab(nullptr), tbaa_datatype(nullptr),
                     tbaa_immut(nullptr), tbaa_ptrarraybuf(nullptr), tbaa_arraybuf(nullptr),
                     tbaa_array(nullptr), tbaa_arrayptr(nullptr), tbaa_arraysize(nullptr),
-                    tbaa_arraylen(nullptr), tbaa_arrayflags(nullptr), tbaa_arrayoffset(nullptr),
-                    tbaa_arrayselbyte(nullptr), tbaa_const(nullptr), initialized(false) {}
+                    tbaa_arrayselbyte(nullptr), tbaa_memoryptr(nullptr), tbaa_memorylen(nullptr), tbaa_memoryown(nullptr),
+                    tbaa_const(nullptr), initialized(false) {}
 
     auto tbaa_make_child(MDBuilder &mbuilder, const char *name, MDNode *parent = nullptr, bool isConstant = false) {
         MDNode *scalar = mbuilder.createTBAAScalarTypeNode(name, parent ? parent : tbaa_root);
@@ -350,11 +412,62 @@ struct jl_tbaacache_t {
         std::tie(tbaa_array, tbaa_array_scalar) = tbaa_make_child(mbuilder, "jtbaa_array");
         tbaa_arrayptr = tbaa_make_child(mbuilder, "jtbaa_arrayptr", tbaa_array_scalar).first;
         tbaa_arraysize = tbaa_make_child(mbuilder, "jtbaa_arraysize", tbaa_array_scalar).first;
-        tbaa_arraylen = tbaa_make_child(mbuilder, "jtbaa_arraylen", tbaa_array_scalar).first;
-        tbaa_arrayflags = tbaa_make_child(mbuilder, "jtbaa_arrayflags", tbaa_array_scalar).first;
-        tbaa_arrayoffset = tbaa_make_child(mbuilder, "jtbaa_arrayoffset", tbaa_array_scalar).first;
-        tbaa_const = tbaa_make_child(mbuilder, "jtbaa_const", nullptr, true).first;
         tbaa_arrayselbyte = tbaa_make_child(mbuilder, "jtbaa_arrayselbyte", tbaa_array_scalar).first;
+        tbaa_memoryptr = tbaa_make_child(mbuilder, "jtbaa_memoryptr", tbaa_array_scalar, true).first;
+        tbaa_memorylen = tbaa_make_child(mbuilder, "jtbaa_memorylen", tbaa_array_scalar, true).first;
+        tbaa_memoryown = tbaa_make_child(mbuilder, "jtbaa_memoryown", tbaa_array_scalar, true).first;
+        tbaa_const = tbaa_make_child(mbuilder, "jtbaa_const", nullptr, true).first;
+    }
+};
+
+struct jl_noaliascache_t {
+    // Each domain operates completely independently.
+    // "No aliasing" is inferred if it is implied by any domain.
+
+    // memory regions domain
+    struct jl_regions_t {
+        MDNode *gcframe;        // GC frame
+        MDNode *stack;          // Stack slot
+        MDNode *data;           // Any user data that `pointerset/ref` are allowed to alias
+        MDNode *type_metadata;  // Non-user-accessible type metadata incl. union selectors, etc.
+        MDNode *constant;       // Memory that is immutable by the time LLVM can see it
+
+        jl_regions_t(): gcframe(nullptr), stack(nullptr), data(nullptr), type_metadata(nullptr), constant(nullptr) {}
+
+        void initialize(llvm::LLVMContext &context) {
+            MDBuilder mbuilder(context);
+            MDNode *domain = mbuilder.createAliasScopeDomain("jnoalias");
+
+            this->gcframe = mbuilder.createAliasScope("jnoalias_gcframe", domain);
+            this->stack = mbuilder.createAliasScope("jnoalias_stack", domain);
+            this->data = mbuilder.createAliasScope("jnoalias_data", domain);
+            this->type_metadata = mbuilder.createAliasScope("jnoalias_typemd", domain);
+            this->constant = mbuilder.createAliasScope("jnoalias_const", domain);
+        }
+    } regions;
+
+    // `@aliasscope` domain
+    struct jl_aliasscope_t {
+        MDNode *current;
+
+        jl_aliasscope_t(): current(nullptr) {}
+
+        // No init required, this->current is only used to store the currently active aliasscope
+        void initialize(llvm::LLVMContext &context) {}
+    } aliasscope;
+
+    bool initialized;
+
+    jl_noaliascache_t(): regions(), aliasscope(), initialized(false) {}
+
+    void initialize(llvm::LLVMContext &context) {
+        if (initialized) {
+            assert(&regions.constant->getContext() == &context);
+            return;
+        }
+        initialized = true;
+        regions.initialize(context);
+        aliasscope.initialize(context);
     }
 };
 
@@ -415,14 +528,15 @@ struct JuliaVariable {
 public:
     StringLiteral name;
     bool isconst;
-    Type *(*_type)(LLVMContext &C);
+    Type *(*_type)(Type *T_size);
 
     JuliaVariable(const JuliaVariable&) = delete;
     JuliaVariable(const JuliaVariable&&) = delete;
     GlobalVariable *realize(Module *m) {
         if (GlobalValue *V = m->getNamedValue(name))
             return cast<GlobalVariable>(V);
-        return new GlobalVariable(*m, _type(m->getContext()),
+        auto T_size = m->getDataLayout().getIntPtrType(m->getContext());
+        return new GlobalVariable(*m, _type(T_size),
                 isconst, GlobalVariable::ExternalLinkage,
                 NULL, name);
     }
@@ -433,10 +547,31 @@ static inline void add_named_global(JuliaVariable *name, void *addr)
     add_named_global(name->name, addr);
 }
 
+
+typedef FunctionType *(*TypeFnContextOnly)(LLVMContext &C);
+typedef FunctionType *(*TypeFnContextAndSizeT)(LLVMContext &C, Type *T_size);
+typedef FunctionType *(*TypeFnContextAndTriple)(LLVMContext &C, const Triple &triple);
+
+FunctionType *invoke_type(TypeFnContextOnly f, Module &M)
+{
+    return f(M.getContext());
+}
+
+FunctionType *invoke_type(TypeFnContextAndSizeT f, Module &M)
+{
+    return f(M.getContext(), M.getDataLayout().getIntPtrType(M.getContext()));
+}
+
+FunctionType *invoke_type(TypeFnContextAndTriple f, Module &M)
+{
+    return f(M.getContext(), Triple(M.getTargetTriple()));
+}
+
+template<typename TypeFn_t = TypeFnContextOnly>
 struct JuliaFunction {
 public:
     llvm::StringLiteral name;
-    llvm::FunctionType *(*_type)(llvm::LLVMContext &C);
+    TypeFn_t _type;
     llvm::AttributeList (*_attrs)(llvm::LLVMContext &C);
 
     JuliaFunction(const JuliaFunction&) = delete;
@@ -444,7 +579,7 @@ struct JuliaFunction {
     llvm::Function *realize(llvm::Module *m) {
         if (llvm::GlobalValue *V = m->getNamedValue(name))
             return llvm::cast<llvm::Function>(V);
-        llvm::Function *F = llvm::Function::Create(_type(m->getContext()),
+        llvm::Function *F = llvm::Function::Create(invoke_type(_type, *m),
                          llvm::Function::ExternalLinkage,
                          name, m);
         if (_attrs)
@@ -453,8 +588,8 @@ struct JuliaFunction {
     }
 };
 
-template<typename T>
-static inline void add_named_global(JuliaFunction *name, T *addr)
+template<typename T, typename TypeFn_t>
+static inline void add_named_global(JuliaFunction<TypeFn_t> *name, T *addr)
 {
     // cast through integer to avoid c++ pedantic warning about casting between
     // data and code pointers
@@ -468,18 +603,21 @@ static inline void add_named_global(StringRef name, T *addr)
     add_named_global(name, (void*)(uintptr_t)addr);
 }
 
-AttributeSet Attributes(LLVMContext &C, std::initializer_list<Attribute::AttrKind> attrkinds)
+AttributeSet Attributes(LLVMContext &C, std::initializer_list<Attribute::AttrKind> attrkinds, std::initializer_list<Attribute> extra={})
 {
-    SmallVector<Attribute, 8> attrs(attrkinds.size());
+    SmallVector<Attribute, 8> attrs(attrkinds.size() + extra.size());
     for (size_t i = 0; i < attrkinds.size(); i++)
         attrs[i] = Attribute::get(C, attrkinds.begin()[i]);
-    return AttributeSet::get(C, makeArrayRef(attrs));
+    for (size_t i = 0; i < extra.size(); i++)
+        attrs[attrkinds.size() + i] = extra.begin()[i];
+    return AttributeSet::get(C, ArrayRef<Attribute>(attrs));
 }
 
 static Type *get_pjlvalue(LLVMContext &C) { return JuliaType::get_pjlvalue_ty(C); }
 
 static FunctionType *get_func_sig(LLVMContext &C) { return JuliaType::get_jlfunc_ty(C); }
 static FunctionType *get_func2_sig(LLVMContext &C) { return JuliaType::get_jlfunc2_ty(C); }
+static FunctionType *get_func3_sig(LLVMContext &C) { return JuliaType::get_jlfunc3_ty(C); }
 
 static FunctionType *get_donotdelete_sig(LLVMContext &C) {
     return FunctionType::get(getVoidTy(C), true);
@@ -496,9 +634,16 @@ static AttributeList get_func_attrs(LLVMContext &C)
 
 static AttributeList get_donotdelete_func_attrs(LLVMContext &C)
 {
-    AttributeSet FnAttrs = Attributes(C, {Attribute::InaccessibleMemOnly, Attribute::WillReturn, Attribute::NoUnwind});
+    AttrBuilder FnAttrs(C);
+#if JL_LLVM_VERSION >= 160000
+    FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly());
+#else
+    FnAttrs.addAttribute(Attribute::InaccessibleMemOnly);
+#endif
+    FnAttrs.addAttribute(Attribute::WillReturn);
+    FnAttrs.addAttribute(Attribute::NoUnwind);
     return AttributeList::get(C,
-            FnAttrs,
+            AttributeSet::get(C, FnAttrs),
             Attributes(C, {}),
             None);
 }
@@ -519,20 +664,65 @@ static AttributeList get_attrs_basic(LLVMContext &C)
                 None);
 }
 
-static AttributeList get_attrs_sext(LLVMContext &C)
+static AttributeList get_attrs_box_float(LLVMContext &C, unsigned nbytes)
 {
+    auto FnAttrs = AttrBuilder(C);
+    FnAttrs.addAttribute(Attribute::WillReturn);
+    FnAttrs.addAttribute(Attribute::NoUnwind);
+#if JL_LLVM_VERSION >= 160000
+    FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly());
+#else
+    FnAttrs.addAttribute(Attribute::InaccessibleMemOnly);
+#endif
+    auto RetAttrs = AttrBuilder(C);
+    RetAttrs.addAttribute(Attribute::NonNull);
+    RetAttrs.addDereferenceableAttr(nbytes);
+    RetAttrs.addAlignmentAttr(Align(alignof(void*)));
     return AttributeList::get(C,
-                AttributeSet(),
-                Attributes(C, {Attribute::NonNull}),
-                {Attributes(C, {Attribute::SExt})});
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet::get(C, RetAttrs),
+                None);
+}
+
+static AttributeList get_attrs_box_sext(LLVMContext &C, unsigned nbytes)
+{
+    auto FnAttrs = AttrBuilder(C);
+    FnAttrs.addAttribute(Attribute::WillReturn);
+    FnAttrs.addAttribute(Attribute::NoUnwind);
+#if JL_LLVM_VERSION >= 160000
+    FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly());
+#else
+    FnAttrs.addAttribute(Attribute::InaccessibleMemOnly);
+#endif
+    auto RetAttrs = AttrBuilder(C);
+    RetAttrs.addAttribute(Attribute::NonNull);
+    RetAttrs.addAttribute(Attribute::getWithDereferenceableBytes(C, nbytes));
+    RetAttrs.addDereferenceableAttr(nbytes);
+    RetAttrs.addAlignmentAttr(Align(alignof(void*)));
+    return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet::get(C, RetAttrs),
+                AttributeSet::get(C, {Attribute::get(C, Attribute::SExt)}));
 }
 
-static AttributeList get_attrs_zext(LLVMContext &C)
+static AttributeList get_attrs_box_zext(LLVMContext &C, unsigned nbytes)
 {
+    auto FnAttrs = AttrBuilder(C);
+    FnAttrs.addAttribute(Attribute::WillReturn);
+    FnAttrs.addAttribute(Attribute::NoUnwind);
+#if JL_LLVM_VERSION >= 160000
+    FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly());
+#else
+    FnAttrs.addAttribute(Attribute::InaccessibleMemOnly);
+#endif
+    auto RetAttrs = AttrBuilder(C);
+    RetAttrs.addAttribute(Attribute::NonNull);
+    RetAttrs.addDereferenceableAttr(nbytes);
+    RetAttrs.addAlignmentAttr(Align(alignof(void*)));
     return AttributeList::get(C,
-                AttributeSet(),
-                Attributes(C, {Attribute::NonNull}),
-                {Attributes(C, {Attribute::ZExt})});
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet::get(C, RetAttrs),
+                AttributeSet::get(C, {Attribute::get(C, Attribute::ZExt)}));
 }
 
 
@@ -540,67 +730,75 @@ static AttributeList get_attrs_zext(LLVMContext &C)
 static const auto jlRTLD_DEFAULT_var = new JuliaVariable{
     XSTR(jl_RTLD_DEFAULT_handle),
     true,
-    [](LLVMContext &C) { return static_cast<llvm::Type*>(getInt8PtrTy(C)); },
+    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
 };
-#ifdef _OS_WINDOWS_
 static const auto jlexe_var = new JuliaVariable{
     XSTR(jl_exe_handle),
     true,
-    [](LLVMContext &C) { return static_cast<llvm::Type*>(getInt8PtrTy(C)); },
+    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
 };
 static const auto jldll_var = new JuliaVariable{
     XSTR(jl_libjulia_handle),
     true,
-    [](LLVMContext &C) { return static_cast<llvm::Type*>(getInt8PtrTy(C)); },
+    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
 };
 static const auto jldlli_var = new JuliaVariable{
     XSTR(jl_libjulia_internal_handle),
     true,
-    [](LLVMContext &C) { return static_cast<llvm::Type*>(getInt8PtrTy(C)); },
+    [](Type *T_size) -> Type * { return getInt8PtrTy(T_size->getContext()); },
+};
+static const auto jl_small_typeof_var = new JuliaVariable{
+    XSTR(jl_small_typeof),
+    true,
+    [](Type *T_size) -> Type * { return getInt8Ty(T_size->getContext()); },
 };
-#endif //_OS_WINDOWS_
 
 static const auto jlstack_chk_guard_var = new JuliaVariable{
     XSTR(__stack_chk_guard),
     true,
-    get_pjlvalue,
+    [](Type *T_size) -> Type * { return get_pjlvalue(T_size->getContext()); },
 };
 
 static const auto jlgetworld_global = new JuliaVariable{
     XSTR(jl_world_counter),
     false,
-    [](LLVMContext &C) { return (Type*)getSizeTy(C); },
+    [](Type *T_size) -> Type * { return T_size; },
 };
 
 static const auto jlboxed_int8_cache = new JuliaVariable{
     XSTR(jl_boxed_int8_cache),
     true,
-    [](LLVMContext &C) { return (Type*)ArrayType::get(get_pjlvalue(C), 256); },
+    [](Type *T_size) -> Type * { return ArrayType::get(get_pjlvalue(T_size->getContext()), 256); },
 };
 
 static const auto jlboxed_uint8_cache = new JuliaVariable{
     XSTR(jl_boxed_uint8_cache),
     true,
-    [](LLVMContext &C) { return (Type*)ArrayType::get(get_pjlvalue(C), 256); },
+    [](Type *T_size) -> Type * { return ArrayType::get(get_pjlvalue(T_size->getContext()), 256); },
 };
 
-static const auto jlpgcstack_func = new JuliaFunction{
+static const auto jlpgcstack_func = new JuliaFunction<>{
     "julia.get_pgcstack",
     [](LLVMContext &C) { return FunctionType::get(PointerType::get(JuliaType::get_ppjlvalue_ty(C), 0), false); },
     nullptr,
 };
 
+static const auto jladoptthread_func = new JuliaFunction<>{
+    "julia.get_pgcstack_or_new",
+    jlpgcstack_func->_type,
+    jlpgcstack_func->_attrs,
+};
 
 
 // important functions
 // Symbols are not gc-tracked, but we'll treat them as callee rooted anyway,
 // because they may come from a gc-rooted location
-static const auto jlnew_func = new JuliaFunction{
+static const auto jlnew_func = new JuliaFunction<>{
     XSTR(jl_new_structv),
     get_func_sig,
     get_func_attrs,
 };
-static const auto jlsplatnew_func = new JuliaFunction{
+static const auto jlsplatnew_func = new JuliaFunction<>{
     XSTR(jl_new_structt),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -609,73 +807,87 @@ static const auto jlsplatnew_func = new JuliaFunction{
     },
     get_attrs_basic,
 };
-static const auto jlthrow_func = new JuliaFunction{
+static const auto jlthrow_func = new JuliaFunction<>{
     XSTR(jl_throw),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
-static const auto jlerror_func = new JuliaFunction{
+static const auto jlerror_func = new JuliaFunction<>{
     XSTR(jl_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {getInt8PtrTy(C)}, false); },
     get_attrs_noreturn,
 };
-static const auto jlatomicerror_func = new JuliaFunction{
+static const auto jlatomicerror_func = new JuliaFunction<>{
     XSTR(jl_atomic_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {getInt8PtrTy(C)}, false); },
     get_attrs_noreturn,
 };
-static const auto jltypeerror_func = new JuliaFunction{
+static const auto jltypeerror_func = new JuliaFunction<>{
     XSTR(jl_type_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {getInt8PtrTy(C), JuliaType::get_prjlvalue_ty(C), PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
-static const auto jlundefvarerror_func = new JuliaFunction{
+static const auto jlundefvarerror_func = new JuliaFunction<>{
     XSTR(jl_undefined_var_error),
+    [](LLVMContext &C) {
+        Type *T = PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted);
+        return FunctionType::get(getVoidTy(C), {T, T}, false);
+    },
+    get_attrs_noreturn,
+};
+static const auto jlhasnofield_func = new JuliaFunction<>{
+    XSTR(jl_has_no_field_error),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted),
+             PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
-static const auto jlboundserrorv_func = new JuliaFunction{
+static const auto jlboundserrorv_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_bounds_error_ints),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted), getSizePtrTy(C), getSizeTy(C)}, false); },
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted), T_size->getPointerTo(), T_size}, false); },
     get_attrs_noreturn,
 };
-static const auto jlboundserror_func = new JuliaFunction{
+static const auto jlboundserror_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_bounds_error_int),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted), getSizeTy(C)}, false); },
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted), T_size}, false); },
     get_attrs_noreturn,
 };
-static const auto jlvboundserror_func = new JuliaFunction{
+static const auto jlvboundserror_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_bounds_error_tuple_int),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {JuliaType::get_pprjlvalue_ty(C), getSizeTy(C), getSizeTy(C)}, false); },
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
+            {JuliaType::get_pprjlvalue_ty(C), T_size, T_size}, false); },
     get_attrs_noreturn,
 };
-static const auto jluboundserror_func = new JuliaFunction{
+static const auto jluboundserror_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_bounds_error_unboxed_int),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {PointerType::get(getInt8Ty(C), AddressSpace::Derived), JuliaType::get_pjlvalue_ty(C), getSizeTy(C)}, false); },
+    [](LLVMContext &C, Type *T_size) {
+        return FunctionType::get(getVoidTy(C),
+            {PointerType::get(getInt8Ty(C), AddressSpace::Derived), JuliaType::get_pjlvalue_ty(C), T_size}, false); },
     get_attrs_noreturn,
 };
-static const auto jlcheckassign_func = new JuliaFunction{
+static const auto jlcheckassign_func = new JuliaFunction<>{
     XSTR(jl_checked_assignment),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {JuliaType::get_pjlvalue_ty(C), PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue, PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     nullptr,
 };
-static const auto jldeclareconst_func = new JuliaFunction{
+static const auto jldeclareconst_func = new JuliaFunction<>{
     XSTR(jl_declare_constant),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {JuliaType::get_pjlvalue_ty(C)}, false); },
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_pjlvalue, T_pjlvalue, T_pjlvalue}, false); },
     nullptr,
 };
-static const auto jlgetbindingorerror_func = new JuliaFunction{
+static const auto jlgetbindingorerror_func = new JuliaFunction<>{
     XSTR(jl_get_binding_or_error),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
@@ -684,8 +896,8 @@ static const auto jlgetbindingorerror_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jlgetbindingwrorerror_func = new JuliaFunction{
-    XSTR(jl_get_binding_wr_or_error),
+static const auto jlgetbindingwrorerror_func = new JuliaFunction<>{
+    XSTR(jl_get_binding_wr),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
         return FunctionType::get(T_pjlvalue,
@@ -693,7 +905,7 @@ static const auto jlgetbindingwrorerror_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jlboundp_func = new JuliaFunction{
+static const auto jlboundp_func = new JuliaFunction<>{
     XSTR(jl_boundp),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
@@ -702,7 +914,7 @@ static const auto jlboundp_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jltopeval_func = new JuliaFunction{
+static const auto jltopeval_func = new JuliaFunction<>{
     XSTR(jl_toplevel_eval),
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
@@ -714,7 +926,7 @@ static const auto jltopeval_func = new JuliaFunction{
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-static const auto jlcopyast_func = new JuliaFunction{
+static const auto jlcopyast_func = new JuliaFunction<>{
     XSTR(jl_copy_ast),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -726,21 +938,12 @@ static const auto jlcopyast_func = new JuliaFunction{
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-//static const auto jlnsvec_func = new JuliaFunction{
-//    XSTR(jl_svec),
-//    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-//                {getSizeTy(C)}, true); },
-//    [](LLVMContext &C) { return AttributeList::get(C,
-//            AttributeSet(),
-//            Attributes(C, {Attribute::NonNull}),
-//            None); },
-//};
-static const auto jlapplygeneric_func = new JuliaFunction{
+static const auto jlapplygeneric_func = new JuliaFunction<>{
     XSTR(jl_apply_generic),
     get_func_sig,
     get_func_attrs,
 };
-static const auto jlinvoke_func = new JuliaFunction{
+static const auto jlinvoke_func = new JuliaFunction<>{
     XSTR(jl_invoke),
     get_func2_sig,
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -749,7 +952,7 @@ static const auto jlinvoke_func = new JuliaFunction{
             {AttributeSet(),
              Attributes(C, {Attribute::ReadOnly, Attribute::NoCapture})}); },
 };
-static const auto jlmethod_func = new JuliaFunction{
+static const auto jlmethod_func = new JuliaFunction<>{
     XSTR(jl_method_def),
     [](LLVMContext &C) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
@@ -760,19 +963,18 @@ static const auto jlmethod_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jlgenericfunction_func = new JuliaFunction{
+static const auto jlgenericfunction_func = new JuliaFunction<>{
     XSTR(jl_generic_function_def),
     [](LLVMContext &C) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
         auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
         auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
         auto T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
-        return FunctionType::get(T_prjlvalue,
-                {T_pjlvalue, T_pjlvalue, T_pprjlvalue, T_pjlvalue, T_pjlvalue}, false);
+        return FunctionType::get(T_prjlvalue, {T_pjlvalue, T_pjlvalue, T_pprjlvalue, T_pjlvalue}, false);
     },
     nullptr,
 };
-static const auto jllockvalue_func = new JuliaFunction{
+static const auto jllockvalue_func = new JuliaFunction<>{
     XSTR(jl_lock_value),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
@@ -781,7 +983,7 @@ static const auto jllockvalue_func = new JuliaFunction{
             AttributeSet(),
             {Attributes(C, {Attribute::NoCapture})}); },
 };
-static const auto jlunlockvalue_func = new JuliaFunction{
+static const auto jlunlockvalue_func = new JuliaFunction<>{
     XSTR(jl_unlock_value),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
@@ -790,59 +992,83 @@ static const auto jlunlockvalue_func = new JuliaFunction{
             AttributeSet(),
             {Attributes(C, {Attribute::NoCapture})}); },
 };
-static const auto jlenter_func = new JuliaFunction{
+static const auto jlenter_func = new JuliaFunction<>{
     XSTR(jl_enter_handler),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {getInt8PtrTy(C)}, false); },
     nullptr,
 };
-static const auto jl_current_exception_func = new JuliaFunction{
+static const auto jl_current_exception_func = new JuliaFunction<>{
     XSTR(jl_current_exception),
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), false); },
     nullptr,
 };
-static const auto jlleave_func = new JuliaFunction{
+static const auto jlleave_func = new JuliaFunction<>{
     XSTR(jl_pop_handler),
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {getInt32Ty(C)}, false); },
     nullptr,
 };
-static const auto jl_restore_excstack_func = new JuliaFunction{
+static const auto jl_restore_excstack_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_restore_excstack),
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {getSizeTy(C)}, false); },
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getVoidTy(C),
+            {T_size}, false); },
     nullptr,
 };
-static const auto jl_excstack_state_func = new JuliaFunction{
+static const auto jl_excstack_state_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_excstack_state),
-    [](LLVMContext &C) { return FunctionType::get(getSizeTy(C), false); },
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(T_size, false); },
     nullptr,
 };
-static const auto jlegalx_func = new JuliaFunction{
+static const auto jlegalx_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_egal__unboxed),
-    [](LLVMContext &C) {
+    [](LLVMContext &C, Type *T_size) {
         Type *T = PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived);
-        return FunctionType::get(getInt32Ty(C), {T, T, JuliaType::get_prjlvalue_ty(C)}, false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind, Attribute::ArgMemOnly}),
-            AttributeSet(),
-            None); },
+        return FunctionType::get(getInt32Ty(C), {T, T, T_size}, false); },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+#if JL_LLVM_VERSION >= 160000
+        FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleOrArgMemOnly());
+#else
+        FnAttrs.addAttribute(Attribute::ReadOnly);
+        FnAttrs.addAttribute(Attribute::ArgMemOnly);
+#endif
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet(),
+                None); },
 };
-static const auto jl_alloc_obj_func = new JuliaFunction{
+static const auto jl_alloc_obj_func = new JuliaFunction<TypeFnContextAndSizeT>{
     "julia.gc_alloc_obj",
-    [](LLVMContext &C) {
+    [](LLVMContext &C, Type *T_size) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
         auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
         auto T_ppjlvalue = PointerType::get(PointerType::get(T_jlvalue, 0), 0);
         return FunctionType::get(T_prjlvalue,
-                {T_ppjlvalue, getSizeTy(C), T_prjlvalue}, false);
+                {T_ppjlvalue, T_size, T_prjlvalue}, false);
+    },
+    [](LLVMContext &C) {
+        auto FnAttrs = AttrBuilder(C);
+        FnAttrs.addAllocSizeAttr(1, None); // returns %1 bytes
+#if JL_LLVM_VERSION >= 150000
+        FnAttrs.addAllocKindAttr(AllocFnKind::Alloc);
+#endif
+#if JL_LLVM_VERSION >= 160000
+        FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | inaccessibleMemOnly(ModRefInfo::ModRef));
+#endif
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        auto RetAttrs = AttrBuilder(C);
+        RetAttrs.addAttribute(Attribute::NoAlias);
+        RetAttrs.addAttribute(Attribute::NonNull);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
+            AttributeSet::get(C, RetAttrs),
+            None);
     },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            AttributeSet::get(C, makeArrayRef({Attribute::getWithAllocSizeArgs(C, 1, None)})), // returns %1 bytes
-            Attributes(C, {Attribute::NoAlias, Attribute::NonNull}),
-            None); },
 };
-static const auto jl_newbits_func = new JuliaFunction{
+static const auto jl_newbits_func = new JuliaFunction<>{
     XSTR(jl_new_bits),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -857,45 +1083,49 @@ static const auto jl_newbits_func = new JuliaFunction{
 // `julia.typeof` does read memory, but it is effectively readnone before we lower
 // the allocation function. This is OK as long as we lower `julia.typeof` no later than
 // `julia.gc_alloc_obj`.
-static const auto jl_typeof_func = new JuliaFunction{
+static const auto jl_typeof_func = new JuliaFunction<>{
     "julia.typeof",
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(T_prjlvalue,
                 {T_prjlvalue}, false);
     },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadNone, Attribute::NoUnwind, Attribute::NoRecurse}),
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+#if JL_LLVM_VERSION >= 160000
+        FnAttrs.addMemoryAttr(MemoryEffects::none());
+#else
+        FnAttrs.addAttribute(Attribute::ReadNone);
+#endif
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::NoRecurse);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-static const auto jl_loopinfo_marker_func = new JuliaFunction{
-    "julia.loopinfo_marker",
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadOnly, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
-            AttributeSet(),
-            None); },
-};
-static const auto jl_write_barrier_func = new JuliaFunction{
+
+static const auto jl_write_barrier_func = new JuliaFunction<>{
     "julia.write_barrier",
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
             {JuliaType::get_prjlvalue_ty(C)}, true); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::NoUnwind, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
-            AttributeSet(),
-            {Attributes(C, {Attribute::ReadOnly})}); },
-};
-static const auto jl_write_barrier_binding_func = new JuliaFunction{
-    "julia.write_barrier_binding",
-    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
-            {JuliaType::get_prjlvalue_ty(C)}, true); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::NoUnwind, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+#if JL_LLVM_VERSION >= 160000
+        FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly());
+#else
+        FnAttrs.addAttribute(Attribute::InaccessibleMemOnly);
+#endif
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::NoRecurse);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
             AttributeSet(),
-            {Attributes(C, {Attribute::ReadOnly})}); },
+            {Attributes(C, {Attribute::ReadOnly})});
+    },
 };
-static const auto jlisa_func = new JuliaFunction{
+
+static const auto jlisa_func = new JuliaFunction<>{
     XSTR(jl_isa),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -905,7 +1135,7 @@ static const auto jlisa_func = new JuliaFunction{
     nullptr,
 };
 
-static const auto jlsubtype_func = new JuliaFunction{
+static const auto jlsubtype_func = new JuliaFunction<>{
     XSTR(jl_subtype),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -914,7 +1144,7 @@ static const auto jlsubtype_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jlapplytype_func = new JuliaFunction{
+static const auto jlapplytype_func = new JuliaFunction<>{
     XSTR(jl_instantiate_type_in_env),
     [](LLVMContext &C) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
@@ -927,53 +1157,63 @@ static const auto jlapplytype_func = new JuliaFunction{
     [](LLVMContext &C) {
         return AttributeList::get(C,
             AttributeSet(),
-            AttributeSet::get(C, makeArrayRef({Attribute::get(C, Attribute::NonNull),
+            AttributeSet::get(C, ArrayRef<Attribute>({Attribute::get(C, Attribute::NonNull),
                                                Attribute::getWithAlignment(C, Align(16))})),
             None);
     },
 };
-static const auto jl_object_id__func = new JuliaFunction{
+static const auto jl_object_id__func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_object_id_),
-    [](LLVMContext &C) { return FunctionType::get(getSizeTy(C),
-            {JuliaType::get_prjlvalue_ty(C), PointerType::get(getInt8Ty(C), AddressSpace::Derived)}, false); },
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(T_size,
+            {T_size, PointerType::get(getInt8Ty(C), AddressSpace::Derived)}, false); },
     nullptr,
 };
-static const auto setjmp_func = new JuliaFunction{
+static const auto setjmp_func = new JuliaFunction<TypeFnContextAndTriple>{
     jl_setjmp_name,
-    [](LLVMContext &C) { return FunctionType::get(getInt32Ty(C),
-            {getInt8PtrTy(C),
-#ifndef _OS_WINDOWS_
-            getInt32Ty(C),
-#endif
-            }, false); },
+    [](LLVMContext &C, const Triple &T) {
+        if (T.isOSWindows())
+            return FunctionType::get(getInt32Ty(C),
+                {getInt8PtrTy(C)}, false);
+        return FunctionType::get(getInt32Ty(C),
+            {getInt8PtrTy(C), getInt32Ty(C)}, false);
+    },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::ReturnsTwice}),
             AttributeSet(),
             None); },
 };
-static const auto memcmp_func = new JuliaFunction{
+static const auto memcmp_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(memcmp),
-    [](LLVMContext &C) { return FunctionType::get(getInt32Ty(C),
-            {getInt8PtrTy(C), getInt8PtrTy(C), getSizeTy(C)}, false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind, Attribute::ArgMemOnly}),
+    [](LLVMContext &C, Type *T_size) { return FunctionType::get(getInt32Ty(C),
+            {getInt8PtrTy(C), getInt8PtrTy(C), T_size}, false); },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+#if JL_LLVM_VERSION >= 160000
+        FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref));
+#else
+        FnAttrs.addAttribute(Attribute::ArgMemOnly);
+        FnAttrs.addAttribute(Attribute::ReadOnly);
+#endif
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
             AttributeSet(),
             None); },
     // TODO: inferLibFuncAttributes(*memcmp_func, TLI);
 };
-static const auto jldlsym_func = new JuliaFunction{
+static const auto jldlsym_func = new JuliaFunction<>{
     XSTR(jl_load_and_lookup),
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pvoidfunc_ty(C),
             {getInt8PtrTy(C), getInt8PtrTy(C), PointerType::get(getInt8PtrTy(C), 0)}, false); },
     nullptr,
 };
-static const auto jllazydlsym_func = new JuliaFunction{
+static const auto jllazydlsym_func = new JuliaFunction<>{
     XSTR(jl_lazy_load_and_lookup),
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pvoidfunc_ty(C),
             {JuliaType::get_prjlvalue_ty(C), getInt8PtrTy(C)}, false); },
     nullptr,
 };
-static const auto jltypeassert_func = new JuliaFunction{
+static const auto jltypeassert_func = new JuliaFunction<>{
     XSTR(jl_typeassert),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -982,19 +1222,52 @@ static const auto jltypeassert_func = new JuliaFunction{
     },
     nullptr,
 };
-static const auto jlgetnthfieldchecked_func = new JuliaFunction{
+static const auto jlgetnthfieldchecked_func = new JuliaFunction<TypeFnContextAndSizeT>{
     XSTR(jl_get_nth_field_checked),
-    [](LLVMContext &C) {
+    [](LLVMContext &C, Type *T_size) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(T_prjlvalue,
-            {T_prjlvalue, getSizeTy(C)}, false);
+            {T_prjlvalue, T_size}, false);
     },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-static const auto jlgetcfunctiontrampoline_func = new JuliaFunction{
+static const auto jlfieldindex_func = new JuliaFunction<>{
+    XSTR(jl_field_index),
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(getInt32Ty(C),
+            {T_prjlvalue, T_prjlvalue, getInt32Ty(C)}, false);
+    },
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+#if JL_LLVM_VERSION >= 160000
+        FnAttrs.addMemoryAttr(MemoryEffects::readOnly());
+#else
+        FnAttrs.addAttribute(Attribute::ReadOnly);
+#endif
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
+            AttributeSet(),
+            None); }, // This function can error if the third argument is 1 so don't do that.
+};
+static const auto jlfieldisdefinedchecked_func = new JuliaFunction<TypeFnContextAndSizeT>{
+    XSTR(jl_field_isdefined_checked),
+    [](LLVMContext &C, Type *T_size) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(getInt32Ty(C),
+            {T_prjlvalue, T_size}, false);
+    },
+    [](LLVMContext &C) { return AttributeList::get(C,
+            AttributeSet(),
+            Attributes(C, {}),
+            None); },
+};
+static const auto jlgetcfunctiontrampoline_func = new JuliaFunction<>{
     XSTR(jl_get_cfunction_trampoline),
     [](LLVMContext &C) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
@@ -1018,18 +1291,41 @@ static const auto jlgetcfunctiontrampoline_func = new JuliaFunction{
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-static const auto diff_gc_total_bytes_func = new JuliaFunction{
+static const auto diff_gc_total_bytes_func = new JuliaFunction<>{
     XSTR(jl_gc_diff_total_bytes),
     [](LLVMContext &C) { return FunctionType::get(getInt64Ty(C), false); },
     nullptr,
 };
-static const auto sync_gc_total_bytes_func = new JuliaFunction{
+static const auto sync_gc_total_bytes_func = new JuliaFunction<>{
     XSTR(jl_gc_sync_total_bytes),
     [](LLVMContext &C) { return FunctionType::get(getInt64Ty(C),
             {getInt64Ty(C)}, false); },
     nullptr,
 };
-static const auto jlarray_data_owner_func = new JuliaFunction{
+static const auto jl_allocgenericmemory = new JuliaFunction<TypeFnContextAndSizeT>{
+    XSTR(jl_alloc_genericmemory),
+    [](LLVMContext &C, Type *T_Size) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue, // new Memory
+                                {T_prjlvalue, // type
+                                T_Size        // nelements
+                                }, false); },
+        [](LLVMContext &C) {
+            AttrBuilder FnAttrs(C);
+            AttrBuilder RetAttrs(C);
+#if JL_LLVM_VERSION >= 160000
+            FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef) | argMemOnly(MemoryEffects::ModRefInfo::Ref));
+#endif
+            FnAttrs.addAttribute(Attribute::WillReturn);
+            RetAttrs.addAlignmentAttr(Align(16));
+            RetAttrs.addAttribute(Attribute::NonNull);
+            RetAttrs.addDereferenceableAttr(16);
+            return AttributeList::get(C,
+                AttributeSet::get(C, FnAttrs),
+                AttributeSet::get(C, RetAttrs),
+                None); },
+};
+static const auto jlarray_data_owner_func = new JuliaFunction<>{
     XSTR(jl_array_data_owner),
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
@@ -1041,59 +1337,98 @@ static const auto jlarray_data_owner_func = new JuliaFunction{
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-#define BOX_FUNC(ct,at,attrs)                                                    \
-static const auto box_##ct##_func = new JuliaFunction{                           \
+#define BOX_FUNC(ct,at,attrs,nbytes)                                                    \
+static const auto box_##ct##_func = new JuliaFunction<>{                           \
     XSTR(jl_box_##ct),                                                           \
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C),\
             {at}, false); },                                                     \
-    attrs,                                                                       \
-}
-BOX_FUNC(int16, getInt16Ty(C), get_attrs_sext);
-BOX_FUNC(uint16, getInt16Ty(C), get_attrs_zext);
-BOX_FUNC(int32, getInt32Ty(C), get_attrs_sext);
-BOX_FUNC(uint32, getInt32Ty(C), get_attrs_zext);
-BOX_FUNC(int64, getInt64Ty(C), get_attrs_sext);
-BOX_FUNC(uint64, getInt64Ty(C), get_attrs_zext);
-BOX_FUNC(char, getCharTy(C), get_attrs_zext);
-BOX_FUNC(float32, getFloatTy(C), get_attrs_basic);
-BOX_FUNC(float64, getDoubleTy(C), get_attrs_basic);
-BOX_FUNC(ssavalue, getSizeTy(C), get_attrs_basic);
+    [](LLVMContext &C) { return attrs(C,nbytes); },                                                                \
+}
+BOX_FUNC(int16, getInt16Ty(C), get_attrs_box_sext, 2);
+BOX_FUNC(uint16, getInt16Ty(C), get_attrs_box_zext, 2);
+BOX_FUNC(int32, getInt32Ty(C), get_attrs_box_sext, 4);
+BOX_FUNC(uint32, getInt32Ty(C), get_attrs_box_zext, 4);
+BOX_FUNC(int64, getInt64Ty(C), get_attrs_box_sext, 8);
+BOX_FUNC(uint64, getInt64Ty(C), get_attrs_box_zext, 8);
+BOX_FUNC(char, getCharTy(C), get_attrs_box_zext, 1);
+BOX_FUNC(float32, getFloatTy(C), get_attrs_box_float, 4);
+BOX_FUNC(float64, getDoubleTy(C), get_attrs_box_float, 8);
 #undef BOX_FUNC
 
+static const auto box_ssavalue_func = new JuliaFunction<TypeFnContextAndSizeT>{
+    XSTR(jl_box_ssavalue),
+    [](LLVMContext &C, Type *T_size) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_size}, false);
+    },
+    get_attrs_basic,
+};
+static const auto jlgetbuiltinfptr_func = new JuliaFunction<>{
+    XSTR(jl_get_builtin_fptr),
+    [](LLVMContext &C) { return FunctionType::get(get_func_sig(C)->getPointerTo(),
+            {JuliaType::get_prjlvalue_ty(C)}, false); },
+    nullptr,
+};
+
 
 // placeholder functions
-static const auto gcroot_flush_func = new JuliaFunction{
+static const auto gcroot_flush_func = new JuliaFunction<>{
     "julia.gcroot_flush",
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); },
     nullptr,
 };
-static const auto gc_preserve_begin_func = new JuliaFunction{
+static const auto gc_preserve_begin_func = new JuliaFunction<>{
     "llvm.julia.gc_preserve_begin",
     [](LLVMContext &C) { return FunctionType::get(Type::getTokenTy(C), true); },
     nullptr,
 };
-static const auto gc_preserve_end_func = new JuliaFunction {
+static const auto gc_preserve_end_func = new JuliaFunction<> {
     "llvm.julia.gc_preserve_end",
     [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), {Type::getTokenTy(C)}, false); },
     nullptr,
 };
-static const auto except_enter_func = new JuliaFunction{
+static const auto except_enter_func = new JuliaFunction<>{
     "julia.except_enter",
     [](LLVMContext &C) { return FunctionType::get(getInt32Ty(C), false); },
     [](LLVMContext &C) { return AttributeList::get(C,
-            AttributeSet::get(C, makeArrayRef({Attribute::get(C, Attribute::ReturnsTwice)})),
+            Attributes(C, {Attribute::ReturnsTwice}),
             AttributeSet(),
             None); },
 };
-static const auto pointer_from_objref_func = new JuliaFunction{
+static const auto pointer_from_objref_func = new JuliaFunction<>{
     "julia.pointer_from_objref",
     [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pjlvalue_ty(C),
             {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived)}, false); },
-    [](LLVMContext &C) { return AttributeList::get(C,
-            AttributeSet::get(C, makeArrayRef({Attribute::get(C, Attribute::ReadNone), Attribute::get(C, Attribute::NoUnwind)})),
+    [](LLVMContext &C) {
+        AttrBuilder FnAttrs(C);
+#if JL_LLVM_VERSION >= 160000
+        FnAttrs.addMemoryAttr(MemoryEffects::none());
+#else
+        FnAttrs.addAttribute(Attribute::ReadNone);
+#endif
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        return AttributeList::get(C,
+            AttributeSet::get(C, FnAttrs),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
+static const auto gc_loaded_func = new JuliaFunction<>{
+    "julia.gc_loaded",
+    // # memory(none) nosync nounwind speculatable willreturn norecurse
+    // declare nonnull noundef ptr(Loaded) @"julia.gc_loaded"(ptr(Tracked) nocapture nonnull noundef readnone, ptr nonnull noundef readnone)
+    //  top:
+    //   %metadata GC base pointer is ptr(Tracked)
+    //   ret addrspacecast ptr to ptr(Loaded)
+    [](LLVMContext &C) { return FunctionType::get(PointerType::get(JuliaType::get_prjlvalue_ty(C), AddressSpace::Loaded),
+            {JuliaType::get_prjlvalue_ty(C), PointerType::get(JuliaType::get_prjlvalue_ty(C), 0)}, false); },
+    [](LLVMContext &C) {
+        AttributeSet FnAttrs = Attributes(C, {Attribute::ReadNone, Attribute::NoSync, Attribute::NoUnwind, Attribute::Speculatable, Attribute::WillReturn, Attribute::NoRecurse});
+        AttributeSet RetAttrs = Attributes(C, {Attribute::NonNull, Attribute::NoUndef});
+        return AttributeList::get(C, FnAttrs, RetAttrs,
+                { Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone, Attribute::NoCapture}),
+                  Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone}) }); },
+};
 
 // julia.call represents a call with julia calling convention, it is used as
 //
@@ -1106,16 +1441,12 @@ static const auto pointer_from_objref_func = new JuliaFunction{
 // with all the spelled out args appropriately moved into the argument stack buffer.
 // By representing it this way rather than allocating the stack buffer earlier, we
 // allow LLVM to make more aggressive optimizations on the call arguments.
-static const auto julia_call = new JuliaFunction{
+static const auto julia_call = new JuliaFunction<>{
     "julia.call",
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(T_prjlvalue,
-#ifdef JL_LLVM_OPAQUE_POINTERS
-            {PointerType::get(C, 0),
-#else
             {get_func_sig(C)->getPointerTo(),
-#endif
              T_prjlvalue}, // %f
             true); }, // %args
     get_attrs_basic,
@@ -1123,77 +1454,98 @@ static const auto julia_call = new JuliaFunction{
 
 // julia.call2 is like julia.call, except that %arg1 gets passed as a register
 // argument at the end of the argument list.
-static const auto julia_call2 = new JuliaFunction{
+static const auto julia_call2 = new JuliaFunction<>{
     "julia.call2",
     [](LLVMContext &C) {
         auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
         return FunctionType::get(T_prjlvalue,
-#ifdef JL_LLVM_OPAQUE_POINTERS
-            {PointerType::get(C, 0),
-#else
             {get_func2_sig(C)->getPointerTo(),
-#endif
              T_prjlvalue, // %arg1
              T_prjlvalue}, // %f
             true); }, // %args
     get_attrs_basic,
 };
 
-static const auto jltuple_func = new JuliaFunction{XSTR(jl_f_tuple), get_func_sig, get_func_attrs};
+// julia.call3 is like julia.call, except that %fptr is derived rather than tracked
+static const auto julia_call3 = new JuliaFunction<>{
+    "julia.call3",
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        Type *T = PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived);
+        return FunctionType::get(T_prjlvalue,
+            {get_func3_sig(C)->getPointerTo(),
+             T}, // %f
+            true); }, // %args
+    get_attrs_basic,
+};
+
+
+static const auto jltuple_func = new JuliaFunction<>{XSTR(jl_f_tuple), get_func_sig, get_func_attrs};
+static const auto jlintrinsic_func = new JuliaFunction<>{XSTR(jl_f_intrinsic_call), get_func3_sig, get_func_attrs};
+
 static const auto &builtin_func_map() {
-    static std::map<jl_fptr_args_t, JuliaFunction*> builtins = {
-          { jl_f_is_addr,                 new JuliaFunction{XSTR(jl_f_is), get_func_sig, get_func_attrs} },
-          { jl_f_typeof_addr,             new JuliaFunction{XSTR(jl_f_typeof), get_func_sig, get_func_attrs} },
-          { jl_f_sizeof_addr,             new JuliaFunction{XSTR(jl_f_sizeof), get_func_sig, get_func_attrs} },
-          { jl_f_issubtype_addr,          new JuliaFunction{XSTR(jl_f_issubtype), get_func_sig, get_func_attrs} },
-          { jl_f_isa_addr,                new JuliaFunction{XSTR(jl_f_isa), get_func_sig, get_func_attrs} },
-          { jl_f_typeassert_addr,         new JuliaFunction{XSTR(jl_f_typeassert), get_func_sig, get_func_attrs} },
-          { jl_f_ifelse_addr,             new JuliaFunction{XSTR(jl_f_ifelse), get_func_sig, get_func_attrs} },
-          { jl_f__apply_iterate_addr,     new JuliaFunction{XSTR(jl_f__apply_iterate), get_func_sig, get_func_attrs} },
-          { jl_f__apply_pure_addr,        new JuliaFunction{XSTR(jl_f__apply_pure), get_func_sig, get_func_attrs} },
-          { jl_f__call_latest_addr,       new JuliaFunction{XSTR(jl_f__call_latest), get_func_sig, get_func_attrs} },
-          { jl_f__call_in_world_addr,     new JuliaFunction{XSTR(jl_f__call_in_world), get_func_sig, get_func_attrs} },
-          { jl_f__call_in_world_total_addr, new JuliaFunction{XSTR(jl_f__call_in_world_total), get_func_sig, get_func_attrs} },
-          { jl_f_throw_addr,              new JuliaFunction{XSTR(jl_f_throw), get_func_sig, get_func_attrs} },
+    static std::map<jl_fptr_args_t, JuliaFunction<>*> builtins = {
+          { jl_f_is_addr,                 new JuliaFunction<>{XSTR(jl_f_is), get_func_sig, get_func_attrs} },
+          { jl_f_typeof_addr,             new JuliaFunction<>{XSTR(jl_f_typeof), get_func_sig, get_func_attrs} },
+          { jl_f_sizeof_addr,             new JuliaFunction<>{XSTR(jl_f_sizeof), get_func_sig, get_func_attrs} },
+          { jl_f_issubtype_addr,          new JuliaFunction<>{XSTR(jl_f_issubtype), get_func_sig, get_func_attrs} },
+          { jl_f_isa_addr,                new JuliaFunction<>{XSTR(jl_f_isa), get_func_sig, get_func_attrs} },
+          { jl_f_typeassert_addr,         new JuliaFunction<>{XSTR(jl_f_typeassert), get_func_sig, get_func_attrs} },
+          { jl_f_ifelse_addr,             new JuliaFunction<>{XSTR(jl_f_ifelse), get_func_sig, get_func_attrs} },
+          { jl_f__apply_iterate_addr,     new JuliaFunction<>{XSTR(jl_f__apply_iterate), get_func_sig, get_func_attrs} },
+          { jl_f__apply_pure_addr,        new JuliaFunction<>{XSTR(jl_f__apply_pure), get_func_sig, get_func_attrs} },
+          { jl_f__call_latest_addr,       new JuliaFunction<>{XSTR(jl_f__call_latest), get_func_sig, get_func_attrs} },
+          { jl_f__call_in_world_addr,     new JuliaFunction<>{XSTR(jl_f__call_in_world), get_func_sig, get_func_attrs} },
+          { jl_f__call_in_world_total_addr, new JuliaFunction<>{XSTR(jl_f__call_in_world_total), get_func_sig, get_func_attrs} },
+          { jl_f_throw_addr,              new JuliaFunction<>{XSTR(jl_f_throw), get_func_sig, get_func_attrs} },
           { jl_f_tuple_addr,              jltuple_func },
-          { jl_f_svec_addr,               new JuliaFunction{XSTR(jl_f_svec), get_func_sig, get_func_attrs} },
-          { jl_f_applicable_addr,         new JuliaFunction{XSTR(jl_f_applicable), get_func_sig, get_func_attrs} },
-          { jl_f_invoke_addr,             new JuliaFunction{XSTR(jl_f_invoke), get_func_sig, get_func_attrs} },
-          { jl_f_invoke_kwsorter_addr,    new JuliaFunction{XSTR(jl_f_invoke_kwsorter), get_func_sig, get_func_attrs} },
-          { jl_f_isdefined_addr,          new JuliaFunction{XSTR(jl_f_isdefined), get_func_sig, get_func_attrs} },
-          { jl_f_getfield_addr,           new JuliaFunction{XSTR(jl_f_getfield), get_func_sig, get_func_attrs} },
-          { jl_f_setfield_addr,           new JuliaFunction{XSTR(jl_f_setfield), get_func_sig, get_func_attrs} },
-          { jl_f_swapfield_addr,          new JuliaFunction{XSTR(jl_f_swapfield), get_func_sig, get_func_attrs} },
-          { jl_f_modifyfield_addr,        new JuliaFunction{XSTR(jl_f_modifyfield), get_func_sig, get_func_attrs} },
-          { jl_f_fieldtype_addr,          new JuliaFunction{XSTR(jl_f_fieldtype), get_func_sig, get_func_attrs} },
-          { jl_f_nfields_addr,            new JuliaFunction{XSTR(jl_f_nfields), get_func_sig, get_func_attrs} },
-          { jl_f__expr_addr,              new JuliaFunction{XSTR(jl_f__expr), get_func_sig, get_func_attrs} },
-          { jl_f__typevar_addr,           new JuliaFunction{XSTR(jl_f__typevar), get_func_sig, get_func_attrs} },
-          { jl_f_arrayref_addr,           new JuliaFunction{XSTR(jl_f_arrayref), get_func_sig, get_func_attrs} },
-          { jl_f_const_arrayref_addr,     new JuliaFunction{XSTR(jl_f_const_arrayref), get_func_sig, get_func_attrs} },
-          { jl_f_arrayset_addr,           new JuliaFunction{XSTR(jl_f_arrayset), get_func_sig, get_func_attrs} },
-          { jl_f_arraysize_addr,          new JuliaFunction{XSTR(jl_f_arraysize), get_func_sig, get_func_attrs} },
-          { jl_f_apply_type_addr,         new JuliaFunction{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} },
-          { jl_f_donotdelete_addr,        new JuliaFunction{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} },
-          { jl_f_finalizer_addr,          new JuliaFunction{XSTR(jl_f_finalizer), get_func_sig, get_func_attrs} }
+          { jl_f_svec_addr,               new JuliaFunction<>{XSTR(jl_f_svec), get_func_sig, get_func_attrs} },
+          { jl_f_applicable_addr,         new JuliaFunction<>{XSTR(jl_f_applicable), get_func_sig, get_func_attrs} },
+          { jl_f_invoke_addr,             new JuliaFunction<>{XSTR(jl_f_invoke), get_func_sig, get_func_attrs} },
+          { jl_f_isdefined_addr,          new JuliaFunction<>{XSTR(jl_f_isdefined), get_func_sig, get_func_attrs} },
+          { jl_f_getfield_addr,           new JuliaFunction<>{XSTR(jl_f_getfield), get_func_sig, get_func_attrs} },
+          { jl_f_setfield_addr,           new JuliaFunction<>{XSTR(jl_f_setfield), get_func_sig, get_func_attrs} },
+          { jl_f_swapfield_addr,          new JuliaFunction<>{XSTR(jl_f_swapfield), get_func_sig, get_func_attrs} },
+          { jl_f_modifyfield_addr,        new JuliaFunction<>{XSTR(jl_f_modifyfield), get_func_sig, get_func_attrs} },
+          { jl_f_fieldtype_addr,          new JuliaFunction<>{XSTR(jl_f_fieldtype), get_func_sig, get_func_attrs} },
+          { jl_f_nfields_addr,            new JuliaFunction<>{XSTR(jl_f_nfields), get_func_sig, get_func_attrs} },
+          { jl_f__expr_addr,              new JuliaFunction<>{XSTR(jl_f__expr), get_func_sig, get_func_attrs} },
+          { jl_f__typevar_addr,           new JuliaFunction<>{XSTR(jl_f__typevar), get_func_sig, get_func_attrs} },
+          { jl_f_memoryref_addr,          new JuliaFunction<>{XSTR(jl_f_memoryref), get_func_sig, get_func_attrs} },
+          { jl_f_memoryrefoffset_addr,    new JuliaFunction<>{XSTR(jl_f_memoryrefoffset), get_func_sig, get_func_attrs} },
+          { jl_f_memoryrefset_addr,       new JuliaFunction<>{XSTR(jl_f_memoryrefset), get_func_sig, get_func_attrs} },
+          { jl_f_memoryref_isassigned_addr,new JuliaFunction<>{XSTR(jl_f_memoryref_isassigned), get_func_sig, get_func_attrs} },
+          { jl_f_apply_type_addr,         new JuliaFunction<>{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} },
+          { jl_f_donotdelete_addr,        new JuliaFunction<>{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} },
+          { jl_f_compilerbarrier_addr,    new JuliaFunction<>{XSTR(jl_f_compilerbarrier), get_func_sig, get_func_attrs} },
+          { jl_f_finalizer_addr,          new JuliaFunction<>{XSTR(jl_f_finalizer), get_func_sig, get_func_attrs} },
+          { jl_f__svec_ref_addr,          new JuliaFunction<>{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} }
         };
     return builtins;
 }
 
-static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs};
+static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs};
 
-static std::atomic<int> globalUniqueGeneratedNames{0};
+static _Atomic(uint64_t) globalUniqueGeneratedNames{1};
 
 // --- code generation ---
+
 extern "C" {
-    jl_cgparams_t jl_default_cgparams = {1, 1, 0,
+    jl_cgparams_t jl_default_cgparams = {
+        /* track_allocations */ 1,
+        /* code_coverage */ 1,
+        /* prefer_specsig */ 0,
 #ifdef _OS_WINDOWS_
-        0,
+        /* gnu_pubnames */ 0,
 #else
-        1,
+        /* gnu_pubnames */ 1,
 #endif
-        (int) DICompileUnit::DebugEmissionKind::FullDebug,
-        jl_rettype_inferred, NULL };
+        /* debug_info_kind */ (int) DICompileUnit::DebugEmissionKind::FullDebug,
+        /* debug_line_info */ 1,
+        /* safepoint_on_entry */ 1,
+        /* gcstack_arg */ 1,
+        /* use_jlplt*/ 1,
+        /* lookup */ jl_rettype_inferred_addr };
 }
 
 
@@ -1206,6 +1558,8 @@ static MDNode *best_tbaa(jl_tbaacache_t &tbaa_cache, jl_value_t *jt) {
         return tbaa_cache.tbaa_value;
     if (jl_is_abstracttype(jt))
         return tbaa_cache.tbaa_value;
+    if (jl_is_genericmemory_type(jt) || jl_is_array_type(jt))
+        return tbaa_cache.tbaa_array;
     // If we're here, we know all subtypes are (im)mutable, even if we
     // don't know what the exact type is
     return jl_is_mutable(jt) ? tbaa_cache.tbaa_mutab : tbaa_cache.tbaa_immut;
@@ -1250,17 +1604,90 @@ static bool deserves_sret(jl_value_t *dt, Type *T)
     return (size_t)jl_datatype_size(dt) > sizeof(void*) && !T->isFloatingPointTy() && !T->isVectorTy();
 }
 
+// Alias Analysis Info (analogous to llvm::AAMDNodes)
+struct jl_aliasinfo_t {
+    MDNode *tbaa = nullptr;          // '!tbaa': Struct-path TBAA. TBAA graph forms a tree (indexed by offset).
+                                     //          Two pointers do not alias if they are not transitive parents
+                                     //          (effectively, subfields) of each other or equal.
+    MDNode *tbaa_struct = nullptr;   // '!tbaa.struct': Describes memory layout of struct.
+    MDNode *scope = nullptr;         // '!alias.scope': Generic "noalias" memory access sets.
+                                     //                 If alias.scope(inst_a) ⊆ noalias(inst_b) (in any "domain")
+                                     //                    => inst_a, inst_b do not alias.
+    MDNode *noalias = nullptr;       // '!noalias': See '!alias.scope' above.
+
+    enum class Region { unknown, gcframe, stack, data, constant, type_metadata }; // See jl_regions_t
+
+    explicit jl_aliasinfo_t() = default;
+    explicit jl_aliasinfo_t(jl_codectx_t &ctx, Region r, MDNode *tbaa);
+    explicit jl_aliasinfo_t(MDNode *tbaa, MDNode *tbaa_struct, MDNode *scope, MDNode *noalias)
+        : tbaa(tbaa), tbaa_struct(tbaa_struct), scope(scope), noalias(noalias) {}
+    jl_aliasinfo_t(const jl_aliasinfo_t &) = default;
+
+    // Add !tbaa, !tbaa.struct, !alias.scope, !noalias annotations to an instruction.
+    //
+    // Also adds `invariant.load` to load instructions in the constant !noalias scope.
+    Instruction *decorateInst(Instruction *inst) const {
+
+        if (this->tbaa)
+            inst->setMetadata(LLVMContext::MD_tbaa, this->tbaa);
+        if (this->tbaa_struct)
+            inst->setMetadata(LLVMContext::MD_tbaa_struct, this->tbaa_struct);
+        if (this->scope)
+            inst->setMetadata(LLVMContext::MD_alias_scope, this->scope);
+        if (this->noalias)
+            inst->setMetadata(LLVMContext::MD_noalias, this->noalias);
+
+        if (this->scope && isa<LoadInst>(inst)) {
+            // If this is in the read-only region, mark the load with "!invariant.load"
+            if (this->scope->getNumOperands() == 1) {
+                MDNode *operand = cast<MDNode>(this->scope->getOperand(0));
+                auto scope_name = cast<MDString>(operand->getOperand(0))->getString();
+                if (scope_name == "jnoalias_const")
+                    inst->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(inst->getContext(), None));
+            }
+        }
+
+        return inst;
+    }
+
+    // Merge two sets of alias information.
+    jl_aliasinfo_t merge(const jl_aliasinfo_t &other) const {
+        jl_aliasinfo_t result;
+        result.tbaa = MDNode::getMostGenericTBAA(this->tbaa, other.tbaa);
+        result.tbaa_struct = nullptr;
+        result.scope = MDNode::getMostGenericAliasScope(this->scope, other.scope);
+        result.noalias = MDNode::intersect(this->noalias, other.noalias);
+        return result;
+    }
+
+    // Create alias information based on the provided TBAA metadata.
+    //
+    // This function only exists to help transition to using !noalias to encode
+    // memory region non-aliasing. It should be deleted once the TBAA metadata
+    // is improved to encode only memory layout and *not* memory regions.
+    static jl_aliasinfo_t fromTBAA(jl_codectx_t &ctx, MDNode *tbaa);
+};
 
 // metadata tracking for a llvm Value* during codegen
+const uint8_t UNION_BOX_MARKER = 0x80;
 struct jl_cgval_t {
     Value *V; // may be of type T* or T, or set to NULL if ghost (or if the value has not been initialized yet, for a variable definition)
     // For unions, we may need to keep a reference to the boxed part individually.
     // If this is non-NULL, then, at runtime, we satisfy the invariant that (for the corresponding
-    // runtime values) if `(TIndex | 0x80) != 0`, then `Vboxed == V` (by value).
+    // runtime values) if `(TIndex | UNION_BOX_MARKER) != 0`, then `Vboxed == V` (by value).
     // For convenience, we also set this value of isboxed values, in which case
     // it is equal (at compile time) to V.
-    // If this is non-NULL, it is always of type `T_prjlvalue`
+
+    // If this is non-NULL (at compile time), it is always of type `T_prjlvalue`.
+    // N.B.: In general we expect this to always be a dereferenceable pointer at runtime.
+    //       However, there are situations where this value may be a runtime NULL
+    //       (PhiNodes with undef predecessors or PhiC with undef UpsilonNode).
+    //       The middle-end arranges appropriate error checks before any use
+    //       of this value that may read a non-dereferenceable Vboxed, with two
+    //       exceptions: PhiNode and UpsilonNode arguments which need special
+    //       handling to account for the possibility that this may be NULL.
     Value *Vboxed;
+
     Value *TIndex; // if `V` is an unboxed (tagged) Union described by `typ`, this gives the DataType index (1-based, small int) as an i8
     jl_value_t *constant; // constant value (rooted in linfo.def.roots)
     jl_value_t *typ; // the original type of V, never NULL
@@ -1401,32 +1828,31 @@ class jl_codectx_t {
     IRBuilder<> builder;
     jl_codegen_params_t &emission_context;
     llvm::MapVector<jl_code_instance_t*, jl_codegen_call_target_t> call_targets;
-    std::map<void*, GlobalVariable*> &global_targets;
     Function *f = NULL;
+    MDNode* LoopID = NULL;
     // local var info. globals are not in here.
-    std::vector<jl_varinfo_t> slots;
+    SmallVector<jl_varinfo_t, 0> slots;
     std::map<int, jl_varinfo_t> phic_slots;
-    std::vector<jl_cgval_t> SAvalues;
-    std::vector<std::tuple<jl_cgval_t, BasicBlock *, AllocaInst *, PHINode *, jl_value_t *>> PhiNodes;
-    std::vector<bool> ssavalue_assigned;
-    std::vector<int> ssavalue_usecount;
-    std::vector<orc::ThreadSafeModule> oc_modules;
+    std::map<int, std::pair<Value*, Value*> > scope_restore;
+    SmallVector<jl_cgval_t, 0> SAvalues;
+    SmallVector<std::tuple<jl_cgval_t, BasicBlock *, AllocaInst *, PHINode *, jl_value_t *>, 0> PhiNodes;
+    SmallVector<bool, 0> ssavalue_assigned;
+    SmallVector<int, 0> ssavalue_usecount;
     jl_module_t *module = NULL;
     jl_typecache_t type_cache;
     jl_tbaacache_t tbaa_cache;
+    jl_noaliascache_t aliasscope_cache;
     jl_method_instance_t *linfo = NULL;
     jl_value_t *rettype = NULL;
     jl_code_info_t *source = NULL;
     jl_array_t *code = NULL;
     size_t world = 0;
-    jl_array_t *roots = NULL;
     const char *name = NULL;
     StringRef file{};
     ssize_t *line = NULL;
     Value *spvals_ptr = NULL;
     Value *argArray = NULL;
     Value *argCount = NULL;
-    MDNode *aliasscope = NULL;
     std::string funcName;
     int vaSlot = -1;        // name of vararg argument
     int nReqArgs = 0;
@@ -1437,23 +1863,23 @@ class jl_codectx_t {
     Value *pgcstack = NULL;
     Instruction *topalloca = NULL;
 
-    bool debug_enabled = false;
     bool use_cache = false;
+    bool external_linkage = false;
     const jl_cgparams_t *params = NULL;
 
-    std::vector<orc::ThreadSafeModule> llvmcall_modules;
+    SmallVector<std::unique_ptr<Module>, 0> llvmcall_modules;
 
     jl_codectx_t(LLVMContext &llvmctx, jl_codegen_params_t &params)
       : builder(llvmctx),
         emission_context(params),
         call_targets(),
-        global_targets(params.globals),
         world(params.world),
         use_cache(params.cache),
+        external_linkage(params.external_linkage),
         params(params.params) { }
 
     jl_typecache_t &types() {
-        type_cache.initialize(builder.getContext());
+        type_cache.initialize(builder.getContext(), emission_context.DL);
         return type_cache;
     }
 
@@ -1462,8 +1888,12 @@ class jl_codectx_t {
         return tbaa_cache;
     }
 
+    jl_noaliascache_t &noalias() {
+        aliasscope_cache.initialize(builder.getContext());
+        return aliasscope_cache;
+    }
+
     ~jl_codectx_t() {
-        assert(this->roots == NULL);
         // Transfer local delayed calls to the global queue
         for (auto call_target : call_targets)
             emission_context.workqueue.push_back(call_target);
@@ -1474,39 +1904,108 @@ GlobalVariable *JuliaVariable::realize(jl_codectx_t &ctx) {
     return realize(jl_Module);
 }
 
+jl_aliasinfo_t::jl_aliasinfo_t(jl_codectx_t &ctx, Region r, MDNode *tbaa): tbaa(tbaa), tbaa_struct(nullptr) {
+    MDNode *alias_scope = nullptr;
+    jl_noaliascache_t::jl_regions_t regions = ctx.noalias().regions;
+    switch (r) {
+        case Region::unknown:
+            alias_scope = nullptr;
+            break;
+        case Region::gcframe:
+            alias_scope = regions.gcframe;
+            break;
+        case Region::stack:
+            alias_scope = regions.stack;
+            break;
+        case Region::data:
+            alias_scope = regions.data;
+            break;
+        case Region::constant:
+            alias_scope = regions.constant;
+            break;
+        case Region::type_metadata:
+            alias_scope = regions.type_metadata;
+            break;
+    }
+
+    MDNode *all_scopes[5] = { regions.gcframe, regions.stack, regions.data, regions.type_metadata, regions.constant };
+    if (alias_scope) {
+        // The matching region is added to !alias.scope
+        // All other regions are added to !noalias
+
+        int i = 0;
+        Metadata *scopes[1] = { alias_scope };
+        Metadata *noaliases[4];
+        for (auto const &scope: all_scopes) {
+            if (scope == alias_scope) continue;
+            noaliases[i++] = scope;
+        }
+
+        this->scope = MDNode::get(ctx.builder.getContext(), ArrayRef<Metadata*>(scopes));
+        this->noalias = MDNode::get(ctx.builder.getContext(), ArrayRef<Metadata*>(noaliases));
+    }
+}
+
+jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) {
+    auto cache = ctx.tbaa();
+
+    // Each top-level TBAA node has a corresponding !alias.scope scope
+    MDNode *tbaa_srcs[5] = { cache.tbaa_gcframe, cache.tbaa_stack, cache.tbaa_data, cache.tbaa_array, cache.tbaa_const };
+    Region regions[5] = { Region::gcframe, Region::stack, Region::data, Region::type_metadata, Region::constant };
+
+    if (tbaa != nullptr) {
+        MDNode *node = cast<MDNode>(tbaa->getOperand(1));
+        if (cast<MDString>(node->getOperand(0))->getString() != "jtbaa") {
+
+            // Climb up to node just before root
+            MDNode *parent_node = cast<MDNode>(node->getOperand(1));
+            while (cast<MDString>(parent_node->getOperand(0))->getString() != "jtbaa") {
+                node = parent_node;
+                parent_node = cast<MDNode>(node->getOperand(1));
+            }
+
+            // Find the matching node's index
+            for (int i = 0; i < 5; i++) {
+                if (cast<MDNode>(tbaa_srcs[i]->getOperand(1)) == node)
+                    return jl_aliasinfo_t(ctx, regions[i], tbaa);
+            }
+        }
+    }
+
+    return jl_aliasinfo_t(ctx, Region::unknown, tbaa);
+}
+
 static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL);
-static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure);
+static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg, BitVector *used_arguments=nullptr, size_t *args_begin=nullptr);
 static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1);
 static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
                                      jl_binding_t **pbnd, bool assign);
-static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, bool isvol, MDNode *tbaa);
+static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, jl_value_t *scope, bool isvol, MDNode *tbaa);
 static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i);
-static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const std::string &msg);
-static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0);
+static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const Twine &msg);
 static Value *get_current_task(jl_codectx_t &ctx);
 static Value *get_current_ptls(jl_codectx_t &ctx);
 static Value *get_last_age_field(jl_codectx_t &ctx);
-static Value *get_current_signal_page(jl_codectx_t &ctx);
 static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block = true);
-static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
-                             const jl_cgval_t *args, size_t nargs, JuliaFunction *trampoline);
-static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
-                             const jl_cgval_t *args, size_t nargs, JuliaFunction *trampoline);
+static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *theF,
+                             const jl_cgval_t *args, size_t nargs, JuliaFunction<> *trampoline);
+static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction<> *theFptr, Value *theF,
+                             const jl_cgval_t *args, size_t nargs, JuliaFunction<> *trampoline);
 static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
                         Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
 static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv, bool is_promotable=false);
 static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt);
 
 static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p);
-static GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G);
-Instruction *tbaa_decorate(MDNode *md, Instruction *inst);
+static unsigned julia_alignment(jl_value_t *jt);
 
 static GlobalVariable *prepare_global_in(Module *M, JuliaVariable *G)
 {
     return G->realize(M);
 }
 
-static Function *prepare_call_in(Module *M, JuliaFunction *G)
+template<typename TypeFn_t>
+static Function *prepare_call_in(Module *M, JuliaFunction<TypeFn_t> *G)
 {
     return G->realize(M);
 }
@@ -1519,8 +2018,11 @@ static inline GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G)
     if (!local) {
         // Copy the GlobalVariable, but without the initializer, so it becomes a declaration
         GlobalVariable *proto = new GlobalVariable(*M, G->getValueType(),
-                G->isConstant(), GlobalVariable::ExternalLinkage,
+                G->isConstant(), G->getLinkage(),
                 nullptr, G->getName(), nullptr, G->getThreadLocalMode());
+        if (proto->hasLocalLinkage()) {
+            proto->setInitializer(G->getInitializer());
+        }
         proto->copyAttributesFrom(G);
         // DLLImport only needs to be set for the shadow module
         // it just gets annoying in the JIT
@@ -1533,31 +2035,32 @@ static inline GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G)
 
 // --- convenience functions for tagging llvm values with julia types ---
 
-static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_context, Constant *val, StringRef name, Module &M)
+static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_context, Constant *val, Align align, const Twine &name, Module &M)
 {
     GlobalVariable *&gv = emission_context.mergedConstants[val];
-    StringRef localname;
-    std::string ssno;
-    if (gv == nullptr) {
-        raw_string_ostream(ssno) << name << emission_context.mergedConstants.size();
-        localname = StringRef(ssno);
-    }
-    else {
-        localname = gv->getName();
-        if (gv->getParent() != &M)
-            gv = cast_or_null<GlobalVariable>(M.getNamedValue(localname));
-    }
-    if (gv == nullptr) {
-        gv = new GlobalVariable(
+    auto get_gv = [&](const Twine &name) {
+        auto gv = new GlobalVariable(
                 M,
                 val->getType(),
                 true,
                 GlobalVariable::PrivateLinkage,
                 val,
-                localname);
+                name);
         gv->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+        gv->setAlignment(align);
+        return gv;
+    };
+    if (gv == nullptr) {
+        gv = get_gv(name + "#" + Twine(emission_context.mergedConstants.size()));
+    }
+    else if (gv->getParent() != &M) {
+        StringRef gvname = gv->getName();
+        gv = M.getNamedGlobal(gvname);
+        if (!gv) {
+            gv = get_gv(gvname);
+        }
     }
-    assert(localname == gv->getName());
+    assert(gv->getName().startswith(name.str()));
     assert(val == gv->getInitializer());
     return gv;
 }
@@ -1565,29 +2068,30 @@ static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_con
 static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty)
 {
     ++EmittedAllocas;
-    return new AllocaInst(lty, 0, "", /*InsertBefore=*/ctx.topalloca);
+    return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), "", /*InsertBefore=*/ctx.topalloca);
 }
 
-static void undef_derived_strct(IRBuilder<> &irbuilder, Value *ptr, jl_datatype_t *sty, MDNode *tbaa)
+static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *sty, MDNode *tbaa)
 {
     assert(ptr->getType()->getPointerAddressSpace() != AddressSpace::Tracked);
     size_t first_offset = sty->layout->nfields ? jl_field_offset(sty, 0) : 0;
     if (first_offset != 0)
-        irbuilder.CreateMemSet(ptr, ConstantInt::get(getInt8Ty(irbuilder.getContext()), 0), first_offset, MaybeAlign(0));
-    size_t i, np = sty->layout->npointers;
-    if (np == 0)
+        ctx.builder.CreateMemSet(ptr, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), first_offset, MaybeAlign(0));
+    if (sty->layout->first_ptr < 0)
         return;
-    auto T_prjlvalue = JuliaType::get_prjlvalue_ty(irbuilder.getContext());
-    ptr = irbuilder.CreateBitCast(ptr, T_prjlvalue->getPointerTo(ptr->getType()->getPointerAddressSpace()));
+    size_t i, np = sty->layout->npointers;
+    auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx.builder.getContext());
+    ptr = ctx.builder.CreateBitCast(ptr, T_prjlvalue->getPointerTo(ptr->getType()->getPointerAddressSpace()));
     for (i = 0; i < np; i++) {
-        Value *fld = irbuilder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, jl_ptr_offset(sty, i));
-        tbaa_decorate(tbaa, irbuilder.CreateStore(Constant::getNullValue(T_prjlvalue), fld));
+        Value *fld = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, jl_ptr_offset(sty, i));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(T_prjlvalue), fld));
     }
 }
 
 static Value *emit_inttoptr(jl_codectx_t &ctx, Value *v, Type *ty)
 {
-    // Almost all of our inttoptr are generated due to representing `Ptr` with `getSizeTy(ctx.builder.getContext())`
+    // Almost all of our inttoptr are generated due to representing `Ptr` with `ctx.types().T_size`
     // in LLVM and most of these integers are generated from `ptrtoint` in the first place.
     if (auto I = dyn_cast<PtrToIntInst>(v)) {
         auto ptr = I->getOperand(0);
@@ -1609,6 +2113,7 @@ static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_value_t *typ)
         typ = (jl_value_t*)jl_typeofbottom_type->super;
     }
     if (jl_is_type_type(typ)) {
+        assert(is_uniquerep_Type(typ));
         // replace T::Type{T} with T, by assuming that T must be a leaftype of some sort
         jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
         constant.constant = jl_tparam0(typ);
@@ -1659,11 +2164,15 @@ static bool valid_as_globalinit(const Value *v) {
     return isa<Constant>(v);
 }
 
+static Value *zext_struct(jl_codectx_t &ctx, Value *V);
+
 static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, Value *v, jl_value_t *typ, Value *tindex)
 {
     Value *loc;
+    v = zext_struct(ctx, v);
     if (valid_as_globalinit(v)) { // llvm can't handle all the things that could be inside a ConstantExpr
-        loc = get_pointer_to_constant(ctx.emission_context, cast<Constant>(v), "_j_const", *jl_Module);
+        assert(jl_is_concrete_type(typ)); // not legal to have an unboxed abstract type
+        loc = get_pointer_to_constant(ctx.emission_context, cast<Constant>(v), Align(julia_alignment(typ)), "_j_const", *jl_Module);
     }
     else {
         loc = emit_static_alloca(ctx, v->getType());
@@ -1680,16 +2189,14 @@ static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, const jl_cgval_t &v
 
 static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isboxed, jl_value_t *typ)
 {
-    if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) {
-        // no need to explicitly load/store a constant/ghost value
-        return ghostValue(ctx, typ);
-    }
     if (jl_is_type_type(typ)) {
-        jl_value_t *tp0 = jl_tparam0(typ);
-        if (jl_is_concrete_type(tp0) || tp0 == jl_bottom_type) {
+        if (is_uniquerep_Type(typ)) {
             // replace T::Type{T} with T
             return ghostValue(ctx, typ);
         }
+    } else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) {
+        // no need to explicitly load/store a constant/ghost value
+        return ghostValue(ctx, typ);
     }
     Type *T = julia_type_to_llvm(ctx, typ);
     if (type_is_ghost(T)) {
@@ -1713,8 +2220,10 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox
 // see if it might be profitable (and cheap) to change the type of v to typ
 static inline jl_cgval_t update_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ)
 {
-    if (v.typ == jl_bottom_type || v.constant || typ == (jl_value_t*)jl_any_type || jl_egal(v.typ, typ))
+    if (v.typ == jl_bottom_type || typ == (jl_value_t*)jl_any_type || jl_egal(v.typ, typ))
         return v; // fast-path
+    if (v.constant)
+        return jl_isa(v.constant, typ) ? v : jl_cgval_t();
     if (jl_is_concrete_type(v.typ) && !jl_is_kind(v.typ)) {
         if (jl_is_concrete_type(typ) && !jl_is_kind(typ)) {
             // type mismatch: changing from one leaftype to another
@@ -1749,6 +2258,11 @@ static inline jl_cgval_t update_julia_type(jl_codectx_t &ctx, const jl_cgval_t &
     Type *T = julia_type_to_llvm(ctx, typ);
     if (type_is_ghost(T))
         return ghostValue(ctx, typ);
+    else if (v.TIndex && v.V == NULL) {
+        // type mismatch (there weren't any non-ghost values in the union)
+        CreateTrap(ctx.builder);
+        return jl_cgval_t();
+    }
     return jl_cgval_t(v, typ, NULL);
 }
 
@@ -1773,6 +2287,7 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi)
     assert((!vi.boxroot || vi.pTIndex) && "undef check is null pointer for boxed things");
     if (vi.usedUndef) {
         vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext()));
+        setName(ctx.emission_context, vi.defFlag, "isdefined");
         store_def_flag(ctx, vi, false);
     }
 }
@@ -1780,17 +2295,6 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi)
 
 // --- utilities ---
 
-static Constant *undef_value_for_type(Type *T) {
-    auto tracked = CountTrackedPointers(T);
-    Constant *undef;
-    if (tracked.count)
-        // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
-        undef = Constant::getNullValue(T);
-    else
-        undef = UndefValue::get(T);
-    return undef;
-}
-
 static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block)
 {
     Function *f = irbuilder.GetInsertBlock()->getParent();
@@ -1833,7 +2337,7 @@ static void CreateConditionalAbort(IRBuilder<> &irbuilder, Value *test)
 static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ, Value **skip)
 {
     // previous value was a split union, compute new index, or box
-    Value *new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
+    Value *new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER);
     SmallBitVector skip_box(1, true);
     Value *tindex = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
     if (jl_is_uniontype(typ)) {
@@ -1871,20 +2375,22 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
             v.typ,
             counter);
     }
+    setName(ctx.emission_context, new_tindex, "tindex");
 
     // some of the values are still unboxed
     if (!isa<Constant>(new_tindex)) {
         Value *wasboxed = NULL;
-        // If the old value was boxed and unknown (type tag 0x80),
+        // If the old value was boxed and unknown (type tag UNION_BOX_MARKER),
         // it is possible that the tag was actually one of the types
         // that are now explicitly represented. To find out, we need
         // to compare typeof(v.Vboxed) (i.e. the type of the unknown
         // value) against all the types that are now explicitly
         // selected and select the appropriate one as our new tindex.
         if (v.Vboxed) {
-            wasboxed = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+            wasboxed = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
             new_tindex = ctx.builder.CreateOr(wasboxed, new_tindex);
             wasboxed = ctx.builder.CreateICmpNE(wasboxed, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
+            setName(ctx.emission_context, wasboxed, "wasboxed");
 
             BasicBlock *currBB = ctx.builder.GetInsertBlock();
 
@@ -1897,16 +2403,16 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                 if (!union_isaBB) {
                     union_isaBB = BasicBlock::Create(ctx.builder.getContext(), "union_isa", ctx.f);
                     ctx.builder.SetInsertPoint(union_isaBB);
-                    union_box_dt = emit_typeof(ctx, v.Vboxed, skip != NULL);
+                    union_box_dt = emit_typeof(ctx, v.Vboxed, skip != NULL, true);
                     post_union_isaBB = ctx.builder.GetInsertBlock();
                 }
             };
 
             // If we don't find a match. The type remains unknown
-            // (0x80). We could use `v.Tindex`, here, since we know
-            // it has to be 0x80, but it seems likely the backend
+            // (UNION_BOX_MARKER). We could use `v.Tindex`, here, since we know
+            // it has to be UNION_BOX_MARKER, but it seems likely the backend
             // will like the explicit constant better.
-            Value *union_box_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
+            Value *union_box_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER);
             unsigned counter = 0;
             for_each_uniontype_small(
                 // for each new union-split value
@@ -1915,35 +2421,37 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                     if (old_idx == 0) {
                         // didn't handle this item before, select its new union index
                         maybe_setup_union_isa();
-                        Value *cmp = ctx.builder.CreateICmpEQ(track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jt)), union_box_dt);
-                        union_box_tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80 | idx), union_box_tindex);
+                        Value *cmp = ctx.builder.CreateICmpEQ(emit_tagfrom(ctx, jt), union_box_dt);
+                        union_box_tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER | idx), union_box_tindex);
                     }
                 },
                 typ,
                 counter);
+            setName(ctx.emission_context, union_box_tindex, "union_box_tindex");
             if (union_box_dt) {
                 BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_union_isa", ctx.f);
                 ctx.builder.CreateBr(postBB);
                 ctx.builder.SetInsertPoint(currBB);
-                Value *wasunknown = ctx.builder.CreateICmpEQ(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+                Value *wasunknown = ctx.builder.CreateICmpEQ(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
                 ctx.builder.CreateCondBr(wasunknown, union_isaBB, postBB);
                 ctx.builder.SetInsertPoint(postBB);
                 PHINode *tindex_phi = ctx.builder.CreatePHI(getInt8Ty(ctx.builder.getContext()), 2);
                 tindex_phi->addIncoming(new_tindex, currBB);
                 tindex_phi->addIncoming(union_box_tindex, post_union_isaBB);
                 new_tindex = tindex_phi;
+                setName(ctx.emission_context, new_tindex, "tindex");
             }
         }
         if (!skip_box.all()) {
             // some values weren't unboxed in the new union
-            // box them now (tindex above already selected 0x80 = box for them)
+            // box them now (tindex above already selected UNION_BOX_MARKER = box for them)
             Value *boxv = box_union(ctx, v, skip_box);
             if (v.Vboxed) {
                 // If the value is boxed both before and after, we don't need
                 // to touch it at all. Otherwise we're either transitioning
                 // unboxed->boxed, or leaving an unboxed value in place.
                 Value *isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(new_tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ctx.builder.CreateAnd(new_tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                 boxv = ctx.builder.CreateSelect(
                     ctx.builder.CreateAnd(wasboxed, isboxed), v.Vboxed, boxv);
@@ -1977,7 +2485,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
 }
 
 // given a value marked with type `v.typ`, compute the mapping and/or boxing to return a value of type `typ`
-// TODO: should this set TIndex when trivial (such as 0x80 or concrete types) ?
+// TODO: should this set TIndex when trivial (such as UNION_BOX_MARKER or concrete types) ?
 static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ, Value **skip)
 {
     if (typ == (jl_value_t*)jl_typeofbottom_type)
@@ -1989,29 +2497,28 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
         return ghostValue(ctx, typ);
     Value *new_tindex = NULL;
     if (jl_is_concrete_type(typ)) {
-        if (v.TIndex && !jl_is_pointerfree(typ)) {
-            // discovered that this union-split type must actually be isboxed
-            if (v.Vboxed) {
-                return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
-            }
-            else {
-                // type mismatch: there weren't any boxed values in the union
-                if (skip)
-                    *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
-                else
-                    CreateTrap(ctx.builder);
-                return jl_cgval_t();
-            }
+        if (jl_is_concrete_type(v.typ)) {
+            // type mismatch: changing from one leaftype to another
+            if (skip)
+                *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
+            else
+                CreateTrap(ctx.builder);
+            return jl_cgval_t();
         }
-        if (jl_is_concrete_type(v.typ) && !jl_is_kind(v.typ)) {
-            if (jl_is_concrete_type(typ) && !jl_is_kind(typ)) {
-                // type mismatch: changing from one leaftype to another
-                if (skip)
-                    *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
-                else
-                    CreateTrap(ctx.builder);
-                return jl_cgval_t();
+        bool mustbox_union = v.TIndex && !jl_is_pointerfree(typ);
+        if (v.Vboxed && (v.isboxed || mustbox_union)) {
+            if (skip) {
+                *skip = ctx.builder.CreateNot(emit_exactly_isa(ctx, v, (jl_datatype_t*)typ, true));
             }
+            return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+        }
+        if (mustbox_union) {
+            // type mismatch: there weren't any boxed values in the union
+            if (skip)
+                *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
+            else
+                CreateTrap(ctx.builder);
+            return jl_cgval_t();
         }
     }
     else {
@@ -2027,7 +2534,7 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
                 new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), new_idx);
                 if (v.V && !v.ispointer()) {
                     // TODO: remove this branch once all consumers of v.TIndex understand how to handle a non-ispointer value
-                    return value_to_pointer(ctx, v.V, typ, new_tindex);
+                    return jl_cgval_t(value_to_pointer(ctx, v), typ, new_tindex);
                 }
             }
             else if (jl_subtype(v.typ, typ)) {
@@ -2055,22 +2562,13 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
     return jl_cgval_t(v, typ, new_tindex);
 }
 
-orc::ThreadSafeModule jl_create_llvm_module(StringRef name, orc::ThreadSafeContext context, bool imaging_mode, const DataLayout &DL, const Triple &triple)
+std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &context, const DataLayout &DL, const Triple &triple)
 {
     ++ModulesCreated;
-    auto lock = context.getLock();
-    Module *m = new Module(name, *context.getContext());
-    orc::ThreadSafeModule TSM(std::unique_ptr<Module>(m), std::move(context));
-    // Some linkers (*cough* OS X) don't understand DWARF v4, so we use v2 in
-    // imaging mode. The structure of v4 is slightly nicer for debugging JIT
-    // code.
+    auto m = std::make_unique<Module>(name, context);
+    // According to clang darwin above 10.10 supports dwarfv4
     if (!m->getModuleFlag("Dwarf Version")) {
-        int dwarf_version = 4;
-#ifdef _OS_DARWIN_
-        if (imaging_mode)
-            dwarf_version = 2;
-#endif
-        m->addModuleFlag(llvm::Module::Warning, "Dwarf Version", dwarf_version);
+        m->addModuleFlag(llvm::Module::Warning, "Dwarf Version", 4);
     }
     if (!m->getModuleFlag("Debug Info Version"))
         m->addModuleFlag(llvm::Module::Warning, "Debug Info Version",
@@ -2078,110 +2576,128 @@ orc::ThreadSafeModule jl_create_llvm_module(StringRef name, orc::ThreadSafeConte
     m->setDataLayout(DL);
     m->setTargetTriple(triple.str());
 
-#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_) && JL_LLVM_VERSION >= 130000
-    // tell Win32 to assume the stack is always 16-byte aligned,
-    // and to ensure that it is 16-byte aligned for out-going calls,
-    // to ensure compatibility with GCC codes
-    m->setOverrideStackAlignment(16);
-#endif
-#if defined(JL_DEBUG_BUILD) && JL_LLVM_VERSION >= 130000
+    if (triple.isOSWindows() && triple.getArch() == Triple::x86) {
+        // tell Win32 to assume the stack is always 16-byte aligned,
+        // and to ensure that it is 16-byte aligned for out-going calls,
+        // to ensure compatibility with GCC codes
+        m->setOverrideStackAlignment(16);
+    }
+
+#if defined(JL_DEBUG_BUILD)
     m->setStackProtectorGuard("global");
 #endif
-    return TSM;
+    return m;
 }
 
-static void jl_init_function(Function *F)
-{
-    // set any attributes that *must* be set on all functions
-#if JL_LLVM_VERSION >= 140000
-    AttrBuilder attr(F->getContext());
-#else
-    AttrBuilder attr;
-#endif
-#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
-    // tell Win32 to realign the stack to the next 16-byte boundary
-    // upon entry to any function. This achieves compatibility
-    // with both MinGW-GCC (which assumes an 16-byte-aligned stack) and
-    // i686 Windows (which uses a 4-byte-aligned stack)
-    attr.addStackAlignmentAttr(16);
-#endif
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-    attr.addAttribute(Attribute::UWTable); // force NeedsWinEH
-#endif
-#ifdef JL_DISABLE_FPO
-    attr.addAttribute("frame-pointer", "all");
-#endif
-#if !defined(_COMPILER_ASAN_ENABLED_) && !defined(_OS_WINDOWS_)
-    // ASAN won't like us accessing undefined memory causing spurious issues,
-    // and Windows has platform-specific handling which causes it to mishandle
-    // this annotation. Other platforms should just ignore this if they don't
-    // implement it.
-    attr.addAttribute("probe-stack", "inline-asm");
-    //attr.addAttribute("stack-probe-size", "4096"); // can use this to change the default
-#endif
-#if JL_LLVM_VERSION >= 140000
-    F->addFnAttrs(attr);
-#else
-    F->addAttributes(AttributeList::FunctionIndex, attr);
-#endif
+static void jl_name_jlfunc_args(jl_codegen_params_t &params, Function *F) {
+    assert(F->arg_size() == 3);
+    F->getArg(0)->setName("function::Core.Function");
+    F->getArg(1)->setName("args::Any[]");
+    F->getArg(2)->setName("nargs::UInt32");
 }
 
-static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t *rettype, bool prefer_specsig)
+static void jl_name_jlfuncparams_args(jl_codegen_params_t &params, Function *F) {
+    assert(F->arg_size() == 4);
+    F->getArg(0)->setName("function::Core.Function");
+    F->getArg(1)->setName("args::Any[]");
+    F->getArg(2)->setName("nargs::UInt32");
+    F->getArg(3)->setName("sparams::Any");
+}
+
+static void jl_init_function(Function *F, const Triple &TT)
 {
-    size_t nreq = jl_is_method(lam->def.method) ? lam->def.method->nargs : 0;
-    int va = 0;
-    if (nreq > 0 && lam->def.method->isva) {
-        nreq--;
-        va = 1;
-    }
-    jl_value_t *sig = lam->specTypes;
-    bool needsparams = false;
-    if (jl_is_method(lam->def.method)) {
-        if ((size_t)jl_subtype_env_size(lam->def.method->sig) != jl_svec_len(lam->sparam_vals))
-            needsparams = true;
-        for (size_t i = 0; i < jl_svec_len(lam->sparam_vals); ++i) {
-            if (jl_is_typevar(jl_svecref(lam->sparam_vals, i)))
-                needsparams = true;
-        }
+    // set any attributes that *must* be set on all functions
+    AttrBuilder attr(F->getContext());
+    if (TT.isOSWindows() && TT.getArch() == Triple::x86) {
+        // tell Win32 to assume the stack is always 16-byte aligned,
+        // and to ensure that it is 16-byte aligned for out-going calls,
+        // to ensure compatibility with GCC codes
+        attr.addStackAlignmentAttr(16);
+    }
+    if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
+#if JL_LLVM_VERSION < 150000
+        attr.addAttribute(Attribute::UWTable); // force NeedsWinEH
+#else
+        attr.addUWTableAttr(llvm::UWTableKind::Default); // force NeedsWinEH
+#endif
+    }
+    if (jl_fpo_disabled(TT))
+        attr.addAttribute("frame-pointer", "all");
+    if (!TT.isOSWindows()) {
+#if !defined(_COMPILER_ASAN_ENABLED_)
+        // ASAN won't like us accessing undefined memory causing spurious issues,
+        // and Windows has platform-specific handling which causes it to mishandle
+        // this annotation. Other platforms should just ignore this if they don't
+        // implement it.
+        attr.addAttribute("probe-stack", "inline-asm");
+        //attr.addAttribute("stack-probe-size", "4096"); // can use this to change the default
+#endif
     }
+#if defined(_COMPILER_ASAN_ENABLED_)
+    attr.addAttribute(Attribute::SanitizeAddress);
+#endif
+#if defined(_COMPILER_MSAN_ENABLED_)
+    attr.addAttribute(Attribute::SanitizeMemory);
+#endif
+    F->addFnAttrs(attr);
+}
+
+static bool uses_specsig(jl_value_t *sig, bool needsparams, bool va, jl_value_t *rettype, bool prefer_specsig)
+{
     if (needsparams)
-        return std::make_pair(false, true);
+        return false;
     if (sig == (jl_value_t*)jl_anytuple_type)
-        return std::make_pair(false, false);
+        return false;
     if (!jl_is_datatype(sig))
-        return std::make_pair(false, false);
+        return false;
     if (jl_nparams(sig) == 0)
-        return std::make_pair(false, false);
+        return false;
     if (va) {
         if (jl_is_vararg(jl_tparam(sig, jl_nparams(sig) - 1)))
-            return std::make_pair(false, false);
+            return false;
     }
     // not invalid, consider if specialized signature is worthwhile
     if (prefer_specsig)
-        return std::make_pair(true, false);
+        return true;
     if (!deserves_retbox(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype) && rettype != (jl_value_t*)jl_bool_type)
-        return std::make_pair(true, false);
+        return true;
     if (jl_is_uniontype(rettype)) {
         bool allunbox;
         size_t nbytes, align, min_align;
         union_alloca_type((jl_uniontype_t*)rettype, allunbox, nbytes, align, min_align);
         if (nbytes > 0)
-            return std::make_pair(true, false); // some elements of the union could be returned unboxed avoiding allocation
+            return true; // some elements of the union could be returned unboxed avoiding allocation
     }
     if (jl_nparams(sig) <= 3) // few parameters == more efficient to pass directly
-        return std::make_pair(true, false);
+        return true;
     bool allSingleton = true;
     for (size_t i = 0; i < jl_nparams(sig); i++) {
         jl_value_t *sigt = jl_tparam(sig, i);
         bool issing = jl_is_datatype(sigt) && jl_is_datatype_singleton((jl_datatype_t*)sigt);
         allSingleton &= issing;
         if (!deserves_argbox(sigt) && !issing) {
-            return std::make_pair(true, false);
+            return true;
         }
     }
     if (allSingleton)
-        return std::make_pair(true, false);
-    return std::make_pair(false, false); // jlcall sig won't require any box allocations
+        return true;
+    return false; // jlcall sig won't require any box allocations
+}
+
+static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t *rettype, bool prefer_specsig)
+{
+    int va = lam->def.method->isva;
+    jl_value_t *sig = lam->specTypes;
+    bool needsparams = false;
+    if (jl_is_method(lam->def.method)) {
+        if ((size_t)jl_subtype_env_size(lam->def.method->sig) != jl_svec_len(lam->sparam_vals))
+            needsparams = true;
+        for (size_t i = 0; i < jl_svec_len(lam->sparam_vals); ++i) {
+            if (jl_is_typevar(jl_svecref(lam->sparam_vals, i)))
+                needsparams = true;
+        }
+    }
+    return std::make_pair(uses_specsig(sig, needsparams, va, rettype, prefer_specsig), needsparams);
 }
 
 
@@ -2194,7 +2710,7 @@ JL_DLLEXPORT uint64_t *jl_malloc_data_pointer(StringRef filename, int line);
 static void visitLine(jl_codectx_t &ctx, uint64_t *ptr, Value *addend, const char *name)
 {
     Value *pv = ConstantExpr::getIntToPtr(
-        ConstantInt::get(getSizeTy(ctx.builder.getContext()), (uintptr_t)ptr),
+        ConstantInt::get(ctx.types().T_size, (uintptr_t)ptr),
         getInt64PtrTy(ctx.builder.getContext()));
     Value *v = ctx.builder.CreateLoad(getInt64Ty(ctx.builder.getContext()), pv, true, name);
     v = ctx.builder.CreateAdd(v, addend);
@@ -2206,7 +2722,8 @@ static void visitLine(jl_codectx_t &ctx, uint64_t *ptr, Value *addend, const cha
 
 static void coverageVisitLine(jl_codectx_t &ctx, StringRef filename, int line)
 {
-    assert(!ctx.emission_context.imaging);
+    if (ctx.emission_context.imaging_mode)
+        return; // TODO
     if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
     visitLine(ctx, jl_coverage_data_pointer(filename, line), ConstantInt::get(getInt64Ty(ctx.builder.getContext()), 1), "lcnt");
@@ -2216,7 +2733,8 @@ static void coverageVisitLine(jl_codectx_t &ctx, StringRef filename, int line)
 
 static void mallocVisitLine(jl_codectx_t &ctx, StringRef filename, int line, Value *sync)
 {
-    assert(!ctx.emission_context.imaging);
+    if (ctx.emission_context.imaging_mode)
+        return; // TODO
     if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
     Value *addend = sync
@@ -2232,9 +2750,9 @@ static void show_source_loc(jl_codectx_t &ctx, JL_STREAM *out)
     jl_printf(out, "in %s at %s", ctx.name, ctx.file.str().c_str());
 }
 
-static void cg_bdw(jl_codectx_t &ctx, jl_binding_t *b)
+static void cg_bdw(jl_codectx_t &ctx, jl_sym_t *var, jl_binding_t *b)
 {
-    jl_binding_deprecation_warning(ctx.module, b);
+    jl_binding_deprecation_warning(ctx.module, var, b);
     if (b->deprecated == 1 && jl_options.depwarn) {
         show_source_loc(ctx, JL_STDERR);
         jl_printf(JL_STDERR, "\n");
@@ -2244,7 +2762,7 @@ static void cg_bdw(jl_codectx_t &ctx, jl_binding_t *b)
 static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args, size_t nargs)
 {
     assert(nargs > 1);
-    jl_value_t **v = (jl_value_t**)alloca(sizeof(jl_value_t*) * nargs);
+    SmallVector<jl_value_t *, 0> v(nargs);
     for (size_t i = 0; i < nargs; i++) {
         if (!args[i].constant)
             return NULL;
@@ -2256,7 +2774,7 @@ static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args,
     jl_current_task->world_age = 1;
     jl_value_t *result;
     JL_TRY {
-        result = jl_apply(v, nargs);
+        result = jl_apply(v.data(), nargs);
     }
     JL_CATCH {
         result = NULL;
@@ -2275,13 +2793,13 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
             return jl_get_global(ctx.module, sym);
         return NULL;
     }
-    if (jl_is_slot(ex) || jl_is_argument(ex))
+    if (jl_is_slotnumber(ex) || jl_is_argument(ex))
         return NULL;
     if (jl_is_ssavalue(ex)) {
         ssize_t idx = ((jl_ssavalue_t*)ex)->id - 1;
         assert(idx >= 0);
-        if (ctx.ssavalue_assigned.at(idx)) {
-            return ctx.SAvalues.at(idx).constant;
+        if (ctx.ssavalue_assigned[idx]) {
+            return ctx.SAvalues[idx].constant;
         }
         return NULL;
     }
@@ -2296,8 +2814,8 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
         jl_binding_t *b = jl_get_binding(jl_globalref_mod(ex), s);
         if (b && b->constp) {
             if (b->deprecated)
-                cg_bdw(ctx, b);
-            return b->value;
+                cg_bdw(ctx, s, b);
+            return jl_atomic_load_relaxed(&b->value);
         }
         return NULL;
     }
@@ -2318,8 +2836,8 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
                         jl_binding_t *b = jl_get_binding(m, s);
                         if (b && b->constp) {
                             if (b->deprecated)
-                                cg_bdw(ctx, b);
-                            return b->value;
+                                cg_bdw(ctx, s, b);
+                            return jl_atomic_load_relaxed(&b->value);
                         }
                     }
                 }
@@ -2369,7 +2887,7 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
 
 static bool slot_eq(jl_value_t *e, int sl)
 {
-    return (jl_is_slot(e) || jl_is_argument(e)) && jl_slot_number(e)-1 == sl;
+    return (jl_is_slotnumber(e) || jl_is_argument(e)) && jl_slot_number(e)-1 == sl;
 }
 
 // --- code gen for intrinsic functions ---
@@ -2407,12 +2925,12 @@ static bool local_var_occurs(jl_value_t *e, int sl)
 static std::set<int> assigned_in_try(jl_array_t *stmts, int s, long l)
 {
     std::set<int> av;
-    for(int i=s; i <= l; i++) {
+    for(int i=s; i < l; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts,i);
         if (jl_is_expr(st)) {
             if (((jl_expr_t*)st)->head == jl_assign_sym) {
                 jl_value_t *ar = jl_exprarg(st, 0);
-                if (jl_is_slot(ar)) {
+                if (jl_is_slotnumber(ar)) {
                     av.insert(jl_slot_number(ar)-1);
                 }
             }
@@ -2421,23 +2939,23 @@ static std::set<int> assigned_in_try(jl_array_t *stmts, int s, long l)
     return av;
 }
 
-static void mark_volatile_vars(jl_array_t *stmts, std::vector<jl_varinfo_t> &slots)
+static void mark_volatile_vars(jl_array_t *stmts, SmallVectorImpl<jl_varinfo_t> &slots)
 {
     size_t slength = jl_array_dim0(stmts);
     for (int i = 0; i < (int)slength; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts, i);
-        if (jl_is_expr(st)) {
-            if (((jl_expr_t*)st)->head == jl_enter_sym) {
-                int last = jl_unbox_long(jl_exprarg(st, 0));
-                std::set<int> as = assigned_in_try(stmts, i + 1, last);
-                for (int j = 0; j < (int)slength; j++) {
-                    if (j < i || j > last) {
-                        std::set<int>::iterator it = as.begin();
-                        for (; it != as.end(); it++) {
-                            if (local_var_occurs(jl_array_ptr_ref(stmts, j), *it)) {
-                                jl_varinfo_t &vi = slots[*it];
-                                vi.isVolatile = true;
-                            }
+        if (jl_is_enternode(st)) {
+            int last = jl_enternode_catch_dest(st);
+            if (last == 0)
+                continue;
+            std::set<int> as = assigned_in_try(stmts, i + 1, last - 1);
+            for (int j = 0; j < (int)slength; j++) {
+                if (j < i || j > last) {
+                    std::set<int>::iterator it = as.begin();
+                    for (; it != as.end(); it++) {
+                        if (local_var_occurs(jl_array_ptr_ref(stmts, j), *it)) {
+                            jl_varinfo_t &vi = slots[*it];
+                            vi.isVolatile = true;
                         }
                     }
                 }
@@ -2495,7 +3013,7 @@ static void general_use_analysis(jl_codectx_t &ctx, jl_value_t *expr, callback &
     }
     else if (jl_is_phicnode(expr)) {
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(expr, 0);
-        size_t i, elen = jl_array_len(values);
+        size_t i, elen = jl_array_nrows(values);
         for (i = 0; i < elen; i++) {
             jl_value_t *v = jl_array_ptr_ref(values, i);
             general_use_analysis(ctx, v, f);
@@ -2503,7 +3021,7 @@ static void general_use_analysis(jl_codectx_t &ctx, jl_value_t *expr, callback &
     }
     else if (jl_is_phinode(expr)) {
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(expr, 1);
-        size_t i, elen = jl_array_len(values);
+        size_t i, elen = jl_array_nrows(values);
         for (i = 0; i < elen; i++) {
             jl_value_t *v = jl_array_ptr_ref(values, i);
             if (v)
@@ -2515,7 +3033,7 @@ static void general_use_analysis(jl_codectx_t &ctx, jl_value_t *expr, callback &
 static void simple_use_analysis(jl_codectx_t &ctx, jl_value_t *expr)
 {
     auto scan_slot_arg = [&](jl_value_t *expr) {
-        if (jl_is_slot(expr) || jl_is_argument(expr)) {
+        if (jl_is_slotnumber(expr) || jl_is_argument(expr)) {
             int i = jl_slot_number(expr) - 1;
             ctx.slots[i].used = true;
             return true;
@@ -2529,27 +3047,27 @@ static void simple_use_analysis(jl_codectx_t &ctx, jl_value_t *expr)
 
 // ---- Get Element Pointer (GEP) instructions within the GC frame ----
 
-static void jl_add_method_root(jl_codectx_t &ctx, jl_value_t *val)
+static jl_value_t *jl_ensure_rooted(jl_codectx_t &ctx, jl_value_t *val)
 {
-    if (jl_is_concrete_type(val) || jl_is_bool(val) || jl_is_symbol(val) || val == jl_nothing ||
-            val == (jl_value_t*)jl_any_type || val == (jl_value_t*)jl_bottom_type || val == (jl_value_t*)jl_core_module)
-        return;
-    JL_GC_PUSH1(&val);
-    if (ctx.roots == NULL) {
-        ctx.roots = jl_alloc_vec_any(1);
-        jl_array_ptr_set(ctx.roots, 0, val);
-    }
-    else {
-        size_t rlen = jl_array_dim0(ctx.roots);
-        for (size_t i = 0; i < rlen; i++) {
-            if (jl_array_ptr_ref(ctx.roots,i) == val) {
-                JL_GC_POP();
-                return;
+    if (jl_is_globally_rooted(val))
+        return val;
+    jl_method_t *m = ctx.linfo->def.method;
+    if (jl_is_method(m)) {
+        // the method might have a root for this already; use it if so
+        JL_LOCK(&m->writelock);
+        if (m->roots) {
+            size_t i, len = jl_array_dim0(m->roots);
+            for (i = 0; i < len; i++) {
+                jl_value_t *mval = jl_array_ptr_ref(m->roots, i);
+                if (mval == val || jl_egal(mval, val)) {
+                    JL_UNLOCK(&m->writelock);
+                    return mval;
+                }
             }
         }
-        jl_array_ptr_1d_push(ctx.roots, val);
+        JL_UNLOCK(&m->writelock);
     }
-    JL_GC_POP();
+    return jl_as_global_root(val, 1);
 }
 
 // --- generating function calls ---
@@ -2561,31 +3079,45 @@ static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *
     if (bp == NULL)
         return jl_cgval_t();
     bp = julia_binding_pvalue(ctx, bp);
-    if (bnd && bnd->value != NULL) {
-        if (bnd->constp) {
-            return mark_julia_const(ctx, bnd->value);
+    if (bnd) {
+        jl_value_t *v = jl_atomic_load_acquire(&bnd->value); // acquire value for ty
+        if (v != NULL) {
+            if (bnd->constp)
+                return mark_julia_const(ctx, v);
+            LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
+            setName(ctx.emission_context, v, jl_symbol_name(name));
+            v->setOrdering(order);
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding);
+            ai.decorateInst(v);
+            jl_value_t *ty = jl_atomic_load_relaxed(&bnd->ty);
+            return mark_julia_type(ctx, v, true, ty);
         }
-        LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
-        v->setOrdering(order);
-        tbaa_decorate(ctx.tbaa().tbaa_binding, v);
-        return mark_julia_type(ctx, v, true, bnd->ty);
     }
     // todo: use type info to avoid undef check
-    return emit_checked_var(ctx, bp, name, false, ctx.tbaa().tbaa_binding);
+    return emit_checked_var(ctx, bp, name, (jl_value_t*)mod, false, ctx.tbaa().tbaa_binding);
 }
 
-static void emit_globalset(jl_codectx_t &ctx, jl_binding_t *bnd, Value *bp, const jl_cgval_t &rval_info, AtomicOrdering Order)
+static bool emit_globalset(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *sym, const jl_cgval_t &rval_info, AtomicOrdering Order)
 {
+    jl_binding_t *bnd = NULL;
+    Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true);
+    if (bp == NULL)
+        return false;
     Value *rval = boxed(ctx, rval_info);
-    if (bnd && !bnd->constp && bnd->ty && jl_subtype(rval_info.typ, bnd->ty)) {
-        StoreInst *v = ctx.builder.CreateAlignedStore(rval, julia_binding_pvalue(ctx, bp), Align(sizeof(void*)));
-        v->setOrdering(Order);
-        tbaa_decorate(ctx.tbaa().tbaa_binding, v);
-        emit_write_barrier_binding(ctx, bp, rval);
-    }
-    else {
-        ctx.builder.CreateCall(prepare_call(jlcheckassign_func), { bp, mark_callee_rooted(ctx, rval) });
+    if (bnd && !bnd->constp) {
+        jl_value_t *ty = jl_atomic_load_relaxed(&bnd->ty);
+        if (ty && jl_subtype(rval_info.typ, ty)) { // TODO: use typeassert here instead
+            StoreInst *v = ctx.builder.CreateAlignedStore(rval, julia_binding_pvalue(ctx, bp), Align(sizeof(void*)));
+            v->setOrdering(Order);
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding);
+            ai.decorateInst(v);
+            emit_write_barrier(ctx, bp, rval);
+            return true;
+        }
     }
+    ctx.builder.CreateCall(prepare_call(jlcheckassign_func),
+            { bp, literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym), mark_callee_rooted(ctx, rval) });
+    return true;
 }
 
 static Value *emit_box_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
@@ -2607,8 +3139,8 @@ static Value *emit_box_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const
         }
         Value *neq = ctx.builder.CreateICmpNE(varg1, varg2);
         return emit_guarded_test(ctx, neq, true, [&] {
-            Value *dtarg = emit_typeof_boxed(ctx, arg1);
-            Value *dt_eq = ctx.builder.CreateICmpEQ(dtarg, emit_typeof_boxed(ctx, arg2));
+            Value *dtarg = emit_typeof(ctx, arg1, false, true);
+            Value *dt_eq = ctx.builder.CreateICmpEQ(dtarg, emit_typeof(ctx, arg2, false, true));
             return emit_guarded_test(ctx, dt_eq, false, [&] {
                 return ctx.builder.CreateTrunc(ctx.builder.CreateCall(prepare_call(jlegalx_func),
                                                                       {varg1, varg2, dtarg}), getInt1Ty(ctx.builder.getContext()));
@@ -2628,6 +3160,7 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1,
     Value *tindex2 = arg2.TIndex;
     tindex2 = ctx.builder.CreateAnd(tindex2, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
     Value *typeeq = ctx.builder.CreateICmpEQ(tindex, tindex2);
+    setName(ctx.emission_context, typeeq, "typematch");
     tindex = ctx.builder.CreateSelect(typeeq, tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x00));
     BasicBlock *defaultBB = BasicBlock::Create(ctx.builder.getContext(), "unionbits_is_boxed", ctx.f);
     SwitchInst *switchInst = ctx.builder.CreateSwitch(tindex, defaultBB);
@@ -2659,6 +3192,7 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1,
     ctx.builder.CreateCall(trap_func);
     ctx.builder.CreateUnreachable();
     ctx.builder.SetInsertPoint(postBB);
+    setName(ctx.emission_context, phi, "unionbits_is");
     return phi;
 }
 
@@ -2673,7 +3207,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
 
     if (at->isIntegerTy() || at->isPointerTy() || at->isFloatingPointTy()) {
-        Type *at_int = INTT(at);
+        Type *at_int = INTT(at, ctx.emission_context.DL);
         Value *varg1 = emit_unbox(ctx, at_int, arg1, arg1.typ);
         Value *varg2 = emit_unbox(ctx, at_int, arg2, arg2.typ);
         return ctx.builder.CreateICmpEQ(varg1, varg2);
@@ -2700,7 +3234,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
     if (at->isAggregateType()) { // Struct or Array
         jl_datatype_t *sty = (jl_datatype_t*)arg1.typ;
         size_t sz = jl_datatype_size(sty);
-        if (sz > 512 && !sty->layout->haspadding) {
+        if (sz > 512 && !sty->layout->flags.haspadding) {
             Value *varg1 = arg1.ispointer() ? data_pointer(ctx, arg1) :
                 value_to_pointer(ctx, arg1).V;
             Value *varg2 = arg2.ispointer() ? data_pointer(ctx, arg2) :
@@ -2709,28 +3243,35 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
             varg2 = emit_pointer_from_objref(ctx, varg2);
             Value *gc_uses[2];
             int nroots = 0;
-            if ((gc_uses[nroots] = get_gc_root_for(arg1)))
+            // these roots may seem a bit overkill, but we want to make sure
+            // that a!=b implies (a,)!=(b,) even if a and b are unused and
+            // therefore could be freed and then the memory for a reused for b
+            if ((gc_uses[nroots] = get_gc_root_for(ctx, arg1)))
                 nroots++;
-            if ((gc_uses[nroots] = get_gc_root_for(arg2)))
+            if ((gc_uses[nroots] = get_gc_root_for(ctx, arg2)))
                 nroots++;
-            OperandBundleDef OpBundle("jl_roots", makeArrayRef(gc_uses, nroots));
+            OperandBundleDef OpBundle("jl_roots", ArrayRef<Value*>(gc_uses, nroots));
             auto answer = ctx.builder.CreateCall(prepare_call(memcmp_func), {
                         ctx.builder.CreateBitCast(varg1, getInt8PtrTy(ctx.builder.getContext())),
                         ctx.builder.CreateBitCast(varg2, getInt8PtrTy(ctx.builder.getContext())),
-                        ConstantInt::get(getSizeTy(ctx.builder.getContext()), sz) },
+                        ConstantInt::get(ctx.types().T_size, sz) },
                     ArrayRef<OperandBundleDef>(&OpBundle, nroots ? 1 : 0));
-            MDNode *tbaa = nullptr;
-            if (!arg1.tbaa) {
-                tbaa = arg2.tbaa;
-            }
-            else if (!arg2.tbaa) {
-                tbaa = arg1.tbaa;
-            }
-            else {
-                tbaa = MDNode::getMostGenericTBAA(arg1.tbaa, arg2.tbaa);
+
+            if (arg1.tbaa || arg2.tbaa) {
+                jl_aliasinfo_t ai;
+                if (!arg1.tbaa) {
+                    ai = jl_aliasinfo_t::fromTBAA(ctx, arg2.tbaa);
+                }
+                else if (!arg2.tbaa) {
+                    ai = jl_aliasinfo_t::fromTBAA(ctx, arg1.tbaa);
+                }
+                else {
+                    jl_aliasinfo_t arg1_ai = jl_aliasinfo_t::fromTBAA(ctx, arg1.tbaa);
+                    jl_aliasinfo_t arg2_ai = jl_aliasinfo_t::fromTBAA(ctx, arg2.tbaa);
+                    ai = arg1_ai.merge(arg2_ai);
+                }
+                ai.decorateInst(answer);
             }
-            if (tbaa)
-                tbaa_decorate(tbaa, answer);
             return ctx.builder.CreateICmpEQ(answer, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
         }
         else {
@@ -2789,11 +3330,11 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
         // since it is normalized to `::Type{Union{}}` instead...
         if (arg1.TIndex)
             return emit_nullcheck_guard(ctx, nullcheck1, [&] {
-                return emit_exactly_isa(ctx, arg1, rt2); // rt2 is a singleton type
+                return emit_exactly_isa(ctx, arg1, (jl_datatype_t*)rt2); // rt2 is a singleton type
             });
         if (arg2.TIndex)
             return emit_nullcheck_guard(ctx, nullcheck2, [&] {
-                return emit_exactly_isa(ctx, arg2, rt1); // rt1 is a singleton type
+                return emit_exactly_isa(ctx, arg2, (jl_datatype_t*)rt1); // rt1 is a singleton type
             });
         if (!(arg1.isboxed || arg1.constant) || !(arg2.isboxed || arg2.constant))
             // not TIndex && not boxed implies it is an unboxed value of a different type from this singleton
@@ -2816,8 +3357,8 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     bool justbits2 = jl_is_concrete_immutable(rt2);
     if (justbits1 || justbits2) { // whether this type is unique'd by value
         return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] () -> Value* {
-            jl_value_t *typ = justbits1 ? rt1 : rt2;
-            if (typ == (jl_value_t*)jl_bool_type) { // aka jl_pointer_egal
+            jl_datatype_t *typ = (jl_datatype_t*)(justbits1 ? rt1 : rt2);
+            if (typ == jl_bool_type) { // aka jl_pointer_egal
                 // some optimizations for bool, since pointer comparison may be better
                 if ((arg1.isboxed || arg1.constant) && (arg2.isboxed || arg2.constant)) { // aka have-fast-pointer
                     Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, ctx.types().T_pjlvalue);
@@ -2827,14 +3368,14 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
             }
             if (rt1 == rt2)
                 return emit_bits_compare(ctx, arg1, arg2);
-            Value *same_type = emit_exactly_isa(ctx, (typ == rt2 ? arg1 : arg2), typ);
+            Value *same_type = emit_exactly_isa(ctx, (justbits1 ? arg2 : arg1), typ);
             BasicBlock *currBB = ctx.builder.GetInsertBlock();
             BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "is", ctx.f);
             BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_is", ctx.f);
             ctx.builder.CreateCondBr(same_type, isaBB, postBB);
             ctx.builder.SetInsertPoint(isaBB);
-            Value *bitcmp = emit_bits_compare(ctx, jl_cgval_t(arg1, typ, NULL),
-                                              jl_cgval_t(arg2, typ, NULL));
+            Value *bitcmp = emit_bits_compare(ctx, jl_cgval_t(arg1, (jl_value_t*)typ, NULL),
+                                              jl_cgval_t(arg2, (jl_value_t*)typ, NULL));
             isaBB = ctx.builder.GetInsertBlock(); // might have changed
             ctx.builder.CreateBr(postBB);
             ctx.builder.SetInsertPoint(postBB);
@@ -2863,6 +3404,7 @@ static bool emit_f_opglobal(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
     const jl_cgval_t &sym = argv[2];
     const jl_cgval_t &val = argv[3];
     enum jl_memory_order order = jl_memory_order_unspecified;
+    assert(f == jl_builtin_setglobal && modifyop == nullptr && "unimplemented");
 
     if (nargs == 4) {
         const jl_cgval_t &arg4 = argv[4];
@@ -2872,7 +3414,7 @@ static bool emit_f_opglobal(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             return false;
     }
     else
-        order = jl_memory_order_monotonic;
+        order = jl_memory_order_release;
 
     if (order == jl_memory_order_invalid || order == jl_memory_order_notatomic) {
         emit_atomic_error(ctx, order == jl_memory_order_invalid ? "invalid atomic ordering" : "setglobal!: module binding cannot be written non-atomically");
@@ -2883,15 +3425,10 @@ static bool emit_f_opglobal(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
     if (sym.constant && jl_is_symbol(sym.constant)) {
         jl_sym_t *name = (jl_sym_t*)sym.constant;
         if (mod.constant && jl_is_module(mod.constant)) {
-            jl_binding_t *bnd = NULL;
-            Value *bp = global_binding_pointer(ctx, (jl_module_t*)mod.constant, name, &bnd, true);
-            if (bp) {
-                emit_globalset(ctx, bnd, bp, val, get_llvm_atomic_order(order));
+            if (emit_globalset(ctx, (jl_module_t*)mod.constant, name, val, get_llvm_atomic_order(order)))
                 *ret = val;
-            }
-            else {
+            else
                 *ret = jl_cgval_t(); // unreachable
-            }
             return true;
         }
     }
@@ -2987,7 +3524,7 @@ static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 }
                 else if (jl_field_isconst(uty, idx)) {
                     std::string msg = fname + ": const field ."
-                        + std::string(jl_symbol_name((jl_sym_t*)jl_svec_ref(jl_field_names(uty), idx)))
+                        + std::string(jl_symbol_name((jl_sym_t*)jl_svecref(jl_field_names(uty), idx)))
                         + " of type "
                         + std::string(jl_symbol_name(uty->name->name))
                         + " cannot be changed";
@@ -3019,6 +3556,8 @@ static jl_llvm_functions_t
         jl_value_t *jlrettype,
         jl_codegen_params_t &params);
 
+static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *type, jl_cgval_t name);
+
 static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                               const jl_cgval_t *argv, size_t nargs, jl_value_t *rt,
                               jl_expr_t *ex, bool is_promotable)
@@ -3028,12 +3567,18 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
     if (f == jl_builtin_is && nargs == 2) {
         // emit comparison test
         Value *ans = emit_f_is(ctx, argv[1], argv[2]);
-        *ret = mark_julia_type(ctx, ctx.builder.CreateZExt(ans, getInt8Ty(ctx.builder.getContext())), false, jl_bool_type);
+        *ret = mark_julia_type(ctx, ans, false, jl_bool_type);
         return true;
     }
 
     else if (f == jl_builtin_typeof && nargs == 1) {
-        *ret = emit_typeof(ctx, argv[1], false);
+        const jl_cgval_t &p = argv[1];
+        if (p.constant)
+            *ret = mark_julia_const(ctx, jl_typeof(p.constant));
+        else if (jl_is_concrete_type(p.typ))
+            *ret = mark_julia_const(ctx, p.typ);
+        else
+            *ret = mark_julia_type(ctx, emit_typeof(ctx, p, false, false), true, jl_datatype_type);
         return true;
     }
 
@@ -3060,9 +3605,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         const jl_cgval_t &ty = argv[2];
         if (jl_is_type_type(ty.typ) && !jl_has_free_typevars(ty.typ)) {
             jl_value_t *tp0 = jl_tparam0(ty.typ);
-            Value *isa_result = emit_isa(ctx, arg, tp0, NULL).first;
-            if (isa_result->getType() == getInt1Ty(ctx.builder.getContext()))
-                isa_result = ctx.builder.CreateZExt(isa_result, getInt8Ty(ctx.builder.getContext()));
+            Value *isa_result = emit_isa(ctx, arg, tp0, Twine()).first;
             *ret = mark_julia_type(ctx, isa_result, false, jl_bool_type);
             return true;
         }
@@ -3088,7 +3631,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 #ifdef _P64
                 nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext()));
 #endif
-                Value *theArgs = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(getSizeTy(ctx.builder.getContext()), ctx.nReqArgs));
+                Value *theArgs = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs));
                 Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva });
                 *ret = mark_julia_type(ctx, r, true, jl_any_type);
                 return true;
@@ -3114,246 +3657,282 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         return true;
     }
 
-    else if (f == jl_builtin_arraysize && nargs == 2) {
-        const jl_cgval_t &ary = argv[1];
-        const jl_cgval_t &idx = argv[2];
-        jl_value_t *aty = jl_unwrap_unionall(ary.typ);
-        if (jl_is_array_type(aty) && idx.typ == (jl_value_t*)jl_long_type) {
-            jl_value_t *ndp = jl_tparam1(aty);
-            if (jl_is_long(ndp)) {
-                size_t ndims = jl_unbox_long(ndp);
-                if (idx.constant) {
-                    uint32_t idx_const = (uint32_t)jl_unbox_long(idx.constant);
-                    if (idx_const > 0 && idx_const <= ndims) {
-                        jl_value_t *ary_ex = jl_exprarg(ex, 1);
-                        *ret = mark_julia_type(ctx, emit_arraysize(ctx, ary, ary_ex, idx_const), false, jl_long_type);
-                        return true;
-                    }
-                    else if (idx_const > ndims) {
-                        *ret = mark_julia_type(ctx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1), false, jl_long_type);
-                        return true;
-                    }
+    else if (f == jl_builtin_memoryref && nargs == 1) {
+        const jl_cgval_t &mem = argv[1];
+        jl_datatype_t *mty_dt = (jl_datatype_t*)jl_unwrap_unionall(mem.typ);
+        if (jl_is_genericmemory_type(mty_dt) && jl_is_concrete_type((jl_value_t*)mty_dt)) {
+            jl_value_t *typ = jl_apply_type((jl_value_t*)jl_genericmemoryref_type, jl_svec_data(mty_dt->parameters), jl_svec_len(mty_dt->parameters));
+            const jl_datatype_layout_t *layout = mty_dt->layout;
+            *ret = _emit_memoryref(ctx, mem, layout, typ);
+            return true;
+        }
+    }
+
+    else if (f == jl_builtin_memoryref && (nargs == 2 || nargs == 3)) {
+        const jl_cgval_t &ref = argv[1];
+        jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type(mty_dt)) {
+            mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            jl_value_t *boundscheck = nargs == 3 ? argv[3].constant : nullptr;
+            if (nargs == 3)
+                emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "memoryref");
+            *ret = emit_memoryref(ctx, ref, argv[2], boundscheck, layout);
+            return true;
+        }
+    }
+
+    else if (f == jl_builtin_memoryrefoffset && nargs == 1) {
+        const jl_cgval_t &ref = argv[1];
+        jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type(mty_dt)) {
+            mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            *ret = emit_memoryref_offset(ctx, ref, layout);
+            return true;
+        }
+    }
+
+    else if (f == jl_builtin_memoryrefget && nargs == 3) {
+        const jl_cgval_t &ref = argv[1];
+        jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type(mty_dt)) {
+            jl_value_t *isatomic = jl_tparam0(mty_dt); (void)isatomic; // TODO
+            jl_value_t *ety = jl_tparam1(mty_dt);
+            jl_value_t *addrspace = jl_tparam2(mty_dt); (void)addrspace; // TODO
+            mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+            jl_value_t *order = argv[2].constant;
+            if (order != (jl_value_t*)jl_not_atomic_sym)
+                return false;
+            jl_value_t *boundscheck = argv[3].constant;
+            emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "memoryref");
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            Value *mem = emit_memoryref_mem(ctx, ref, layout);
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            if (bounds_check_enabled(ctx, boundscheck)) {
+                BasicBlock *failBB, *endBB;
+                failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+                endBB = BasicBlock::Create(ctx.builder.getContext(), "load");
+                ctx.builder.CreateCondBr(ctx.builder.CreateIsNull(mlen), failBB, endBB);
+                ctx.f->getBasicBlockList().push_back(failBB);
+                ctx.builder.SetInsertPoint(failBB);
+                ctx.builder.CreateCall(prepare_call(jlboundserror_func), { mark_callee_rooted(ctx, mem), ConstantInt::get(ctx.types().T_size, 1) });
+                ctx.builder.CreateUnreachable();
+                ctx.f->getBasicBlockList().push_back(endBB);
+                ctx.builder.SetInsertPoint(endBB);
+            }
+            bool isboxed = layout->flags.arrayelem_isboxed;
+            bool isunion = layout->flags.arrayelem_isunion;
+            size_t elsz = layout->size;
+            size_t al = layout->alignment;
+            if (!isboxed && !isunion && elsz == 0) {
+                assert(jl_is_datatype(ety) && jl_is_datatype_singleton((jl_datatype_t*)ety));
+                *ret = ghostValue(ctx, ety);
+            }
+            else if (isunion) {
+                Value *V = emit_memoryref_FCA(ctx, ref, layout);
+                Value *idx0 = CreateSimplifiedExtractValue(ctx, V, 0);
+                Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+                Value *data = emit_genericmemoryptr(ctx, mem, layout, AddressSpace::Loaded);
+                Value *ptindex;
+                if (elsz == 0) {
+                    ptindex = data;
                 }
                 else {
-                    Value *idx_dyn = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), idx, (jl_value_t*)jl_long_type);
-                    error_unless(ctx, ctx.builder.CreateICmpSGT(idx_dyn, Constant::getNullValue(getSizeTy(ctx.builder.getContext()))),
-                                 "arraysize: dimension out of range");
-                    BasicBlock *outBB = BasicBlock::Create(ctx.builder.getContext(), "outofrange", ctx.f);
-                    BasicBlock *inBB = BasicBlock::Create(ctx.builder.getContext(), "inrange");
-                    BasicBlock *ansBB = BasicBlock::Create(ctx.builder.getContext(), "arraysize");
-                    ctx.builder.CreateCondBr(ctx.builder.CreateICmpSLE(idx_dyn,
-                                ConstantInt::get(getSizeTy(ctx.builder.getContext()), ndims)),
-                            inBB, outBB);
-                    ctx.builder.SetInsertPoint(outBB);
-                    Value *v_one = ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1);
-                    ctx.builder.CreateBr(ansBB);
-                    ctx.f->getBasicBlockList().push_back(inBB);
-                    ctx.builder.SetInsertPoint(inBB);
-                    Value *v_sz = emit_arraysize(ctx, ary, idx_dyn);
-                    ctx.builder.CreateBr(ansBB);
-                    inBB = ctx.builder.GetInsertBlock(); // could have changed
-                    ctx.f->getBasicBlockList().push_back(ansBB);
-                    ctx.builder.SetInsertPoint(ansBB);
-                    PHINode *result = ctx.builder.CreatePHI(getSizeTy(ctx.builder.getContext()), 2);
-                    result->addIncoming(v_one, outBB);
-                    result->addIncoming(v_sz, inBB);
-                    *ret = mark_julia_type(ctx, result, false, jl_long_type);
-                    return true;
+                    Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
+                    data = emit_bitcast(ctx, data, AT->getPointerTo());
+                    // isbits union selector bytes are stored after mem->length bytes
+                    ptindex = ctx.builder.CreateInBoundsGEP(AT, data, mlen);
+                    data = ctx.builder.CreateInBoundsGEP(AT, data, idx0);
                 }
+                ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext()));
+                ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx0);
+                size_t elsz_c = 0, al_c = 0;
+                int union_max = jl_islayout_inline(ety, &elsz_c, &al_c);
+                assert(union_max && LLT_ALIGN(elsz_c, al_c) == elsz && al_c == al);
+                *ret = emit_unionload(ctx, data, ptindex, ety, elsz_c, al, ctx.tbaa().tbaa_arraybuf, true, union_max, ctx.tbaa().tbaa_arrayselbyte);
+            }
+            else {
+                *ret = typed_load(ctx,
+                        emit_memoryref_ptr(ctx, ref, layout),
+                        nullptr, ety,
+                        isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
+                        ctx.noalias().aliasscope.current,
+                        isboxed,
+                        AtomicOrdering::NotAtomic);
             }
+            return true;
         }
     }
 
-    else if ((f == jl_builtin_arrayref || f == jl_builtin_const_arrayref) && nargs >= 3) {
-        const jl_cgval_t &ary = argv[2];
-        bool indices_ok = true;
-        for (size_t i = 3; i <= nargs; i++) {
-            if (argv[i].typ != (jl_value_t*)jl_long_type) {
-                indices_ok = false;
-                break;
+    else if (f == jl_builtin_memoryrefset && nargs == 4) {
+        const jl_cgval_t &ref = argv[1];
+        jl_cgval_t val = argv[2];
+        jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type(mty_dt)) {
+            jl_value_t *isatomic = jl_tparam0(mty_dt); (void)isatomic; // TODO
+            jl_value_t *ety = jl_tparam1(mty_dt);
+            jl_value_t *addrspace = jl_tparam2(mty_dt); (void)addrspace; // TODO
+            mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+            jl_value_t *order = argv[3].constant;
+            if (order != (jl_value_t*)jl_not_atomic_sym)
+                return false;
+            jl_value_t *boundscheck = argv[4].constant;
+            emit_typecheck(ctx, argv[4], (jl_value_t*)jl_bool_type, "memoryset");
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            Value *mem = emit_memoryref_mem(ctx, ref, layout);
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            if (bounds_check_enabled(ctx, boundscheck)) {
+                BasicBlock *failBB, *endBB;
+                failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+                endBB = BasicBlock::Create(ctx.builder.getContext(), "load");
+                ctx.builder.CreateCondBr(ctx.builder.CreateIsNull(mlen), failBB, endBB);
+                ctx.f->getBasicBlockList().push_back(failBB);
+                ctx.builder.SetInsertPoint(failBB);
+                ctx.builder.CreateCall(prepare_call(jlboundserror_func), { mark_callee_rooted(ctx, mem), ConstantInt::get(ctx.types().T_size, 1) });
+                ctx.builder.CreateUnreachable();
+                ctx.f->getBasicBlockList().push_back(endBB);
+                ctx.builder.SetInsertPoint(endBB);
+            }
+            emit_typecheck(ctx, val, ety, "memoryset");
+            val = update_julia_type(ctx, val, ety);
+            if (val.typ == jl_bottom_type)
+                return true;
+            bool isboxed = layout->flags.arrayelem_isboxed;
+            bool isunion = layout->flags.arrayelem_isunion;
+            size_t elsz = layout->size;
+            size_t al = layout->alignment;
+            if (isboxed)
+               ety = (jl_value_t*)jl_any_type;
+            if (!isboxed && !isunion && elsz == 0) {
+                assert(jl_is_datatype(ety) && jl_datatype_size(ety) == 0);
+                // no-op
             }
-        }
-        jl_value_t *aty_dt = jl_unwrap_unionall(ary.typ);
-        if (jl_is_array_type(aty_dt) && indices_ok) {
-            jl_value_t *ety = jl_tparam0(aty_dt);
-            jl_value_t *ndp = jl_tparam1(aty_dt);
-            if (!jl_has_free_typevars(ety) && (jl_is_long(ndp) || nargs == 3)) {
-                jl_value_t *ary_ex = jl_exprarg(ex, 2);
-                size_t elsz = 0, al = 0;
-                int union_max = jl_islayout_inline(ety, &elsz, &al);
-                bool isboxed = (union_max == 0);
-                if (isboxed)
-                    ety = (jl_value_t*)jl_any_type;
-                ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
-                jl_value_t *boundscheck = argv[1].constant;
-                emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayref");
-                Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[3], nargs - 2, boundscheck);
-                if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
-                    assert(((jl_datatype_t*)ety)->instance != NULL);
-                    *ret = ghostValue(ctx, ety);
+            else {
+                Value *V = emit_memoryref_FCA(ctx, ref, layout);
+                Value *data_owner = NULL; // owner object against which the write barrier must check
+                if (isboxed || layout->first_ptr >= 0) { // if elements are just bits, don't need a write barrier
+                    data_owner = emit_genericmemoryowner(ctx, CreateSimplifiedExtractValue(ctx, V, 1));
                 }
-                else if (!isboxed && jl_is_uniontype(ety)) {
-                    Value *data = emit_arrayptr(ctx, ary, ary_ex);
-                    Value *offset = emit_arrayoffset(ctx, ary, nd);
+                if (isunion) {
+                    Value *idx0 = CreateSimplifiedExtractValue(ctx, V, 0);
+                    Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+                    Value *data = emit_genericmemoryptr(ctx, mem, layout, AddressSpace::Loaded);
+                    Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
+                    data = emit_bitcast(ctx, data, AT->getPointerTo());
+                    // compute tindex from val
+                    jl_cgval_t rhs_union = convert_julia_type(ctx, val, ety);
+                    Value *tindex = compute_tindex_unboxed(ctx, rhs_union, ety);
+                    tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
                     Value *ptindex;
                     if (elsz == 0) {
                         ptindex = data;
                     }
                     else {
-                        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
                         data = emit_bitcast(ctx, data, AT->getPointerTo());
-                        // isbits union selector bytes are stored after a->maxsize
-                        Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd));
-                        Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1));
-                        Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, getSizeTy(ctx.builder.getContext())));
-                        Value *selidx_m = emit_arraylen(ctx, ary);
-                        Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
-                        ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
-                        data = ctx.builder.CreateInBoundsGEP(AT, data, idx);
+                        // isbits union selector bytes are stored after mem->length
+                        ptindex = ctx.builder.CreateInBoundsGEP(AT, data, mlen);
+                        data = ctx.builder.CreateInBoundsGEP(AT, data, idx0);
                     }
                     ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext()));
-                    ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, offset);
-                    ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx);
-                    *ret = emit_unionload(ctx, data, ptindex, ety, elsz, al, ctx.tbaa().tbaa_arraybuf, true, union_max, ctx.tbaa().tbaa_arrayselbyte);
+                    ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx0);
+                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayselbyte);
+                    ai.decorateInst(ctx.builder.CreateStore(tindex, ptindex));
+                    if (elsz > 0 && (!jl_is_datatype(val.typ) || jl_datatype_size(val.typ) > 0)) {
+                        // copy data (if any)
+                        emit_unionmove(ctx, data, ctx.tbaa().tbaa_arraybuf, val, nullptr);
+                    }
                 }
                 else {
-                    MDNode *aliasscope = (f == jl_builtin_const_arrayref) ? ctx.aliasscope : nullptr;
-                    *ret = typed_load(ctx,
-                            emit_arrayptr(ctx, ary, ary_ex),
-                            idx, ety,
-                            isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
-                            aliasscope,
-                            isboxed,
-                            AtomicOrdering::NotAtomic);
+                    typed_store(ctx,
+                                emit_memoryref_ptr(ctx, ref, layout),
+                                nullptr, val, jl_cgval_t(), ety,
+                                isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
+                                ctx.noalias().aliasscope.current,
+                                data_owner,
+                                isboxed,
+                                isboxed ? AtomicOrdering::Release : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                                /*FailOrder*/AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                                0,
+                                false,
+                                true,
+                                false,
+                                false,
+                                false,
+                                false,
+                                nullptr,
+                                "");
                 }
-                return true;
             }
+            *ret = ref;
+            return true;
         }
     }
 
-    else if (f == jl_builtin_arrayset && nargs >= 4) {
-        const jl_cgval_t &ary = argv[2];
-        jl_cgval_t val = argv[3];
-        bool indices_ok = true;
-        for (size_t i = 4; i <= nargs; i++) {
-            if (argv[i].typ != (jl_value_t*)jl_long_type) {
-                indices_ok = false;
-                break;
-            }
-        }
-        jl_value_t *aty_dt = jl_unwrap_unionall(ary.typ);
-        if (jl_is_array_type(aty_dt) && indices_ok) {
-            jl_value_t *ety = jl_tparam0(aty_dt);
-            jl_value_t *ndp = jl_tparam1(aty_dt);
-            if (!jl_has_free_typevars(ety) && (jl_is_long(ndp) || nargs == 4)) {
-                emit_typecheck(ctx, val, ety, "arrayset");
-                val = update_julia_type(ctx, val, ety);
-                if (val.typ == jl_bottom_type)
-                    return true;
-                size_t elsz = 0, al = 0;
-                int union_max = jl_islayout_inline(ety, &elsz, &al);
-                bool isboxed = (union_max == 0);
-                if (isboxed)
-                    ety = (jl_value_t*)jl_any_type;
-                jl_value_t *ary_ex = jl_exprarg(ex, 2);
-                ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
-                jl_value_t *boundscheck = argv[1].constant;
-                emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayset");
-                Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[4], nargs - 3, boundscheck);
-                if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
-                    // no-op
+    else if (f == jl_builtin_memoryref_isassigned && nargs == 3) {
+        const jl_cgval_t &ref = argv[1];
+        jl_value_t *mty_dt = jl_unwrap_unionall(ref.typ);
+        if (jl_is_genericmemoryref_type(mty_dt) && jl_is_concrete_type(mty_dt)) {
+            jl_value_t *isatomic = jl_tparam0(mty_dt); (void)isatomic; // TODO
+            mty_dt = jl_field_type_concrete((jl_datatype_t*)mty_dt, 1);
+            jl_value_t *order = argv[2].constant;
+            if (order != (jl_value_t*)jl_not_atomic_sym)
+                return false;
+            jl_value_t *boundscheck = argv[3].constant;
+            emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "memory_isassigned");
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout;
+            Value *mem = emit_memoryref_mem(ctx, ref, layout);
+            Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
+            Value *oob = bounds_check_enabled(ctx, boundscheck) ? ctx.builder.CreateIsNull(mlen) : nullptr;
+            bool isboxed = layout->flags.arrayelem_isboxed;
+            if (isboxed || layout->first_ptr >= 0) {
+                PHINode *result = nullptr;
+                if (oob) {
+                    BasicBlock *passBB, *endBB, *fromBB;
+                    passBB = BasicBlock::Create(ctx.builder.getContext(), "load");
+                    endBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+                    ctx.f->getBasicBlockList().push_back(passBB);
+                    ctx.f->getBasicBlockList().push_back(endBB);
+                    fromBB = ctx.builder.CreateCondBr(oob, endBB, passBB)->getParent();
+                    ctx.builder.SetInsertPoint(endBB);
+                    result = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2);
+                    result->addIncoming(ConstantInt::get(result->getType(), 0), fromBB);
+                    setName(ctx.emission_context, result, "arraysize");
+                    ctx.builder.SetInsertPoint(passBB);
                 }
-                else {
-                    PHINode *data_owner = NULL; // owner object against which the write barrier must check
-                    if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier
-                        Value *aryv = boxed(ctx, ary);
-                        Value *flags = emit_arrayflags(ctx, ary);
-                        // the owner of the data is ary itself except if ary->how == 3
-                        flags = ctx.builder.CreateAnd(flags, 3);
-                        Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 3));
-                        BasicBlock *curBB = ctx.builder.GetInsertBlock();
-                        BasicBlock *ownedBB = BasicBlock::Create(ctx.builder.getContext(), "array_owned", ctx.f);
-                        BasicBlock *mergeBB = BasicBlock::Create(ctx.builder.getContext(), "merge_own", ctx.f);
-                        ctx.builder.CreateCondBr(is_owned, ownedBB, mergeBB);
-                        ctx.builder.SetInsertPoint(ownedBB);
-                        // load owner pointer
-                        Instruction *own_ptr;
-                        if (jl_is_long(ndp)) {
-                            own_ptr = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue,
-                                        emit_bitcast(ctx, decay_derived(ctx, aryv), ctx.types().T_pprjlvalue),
-                                        jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)),
-                                    Align(sizeof(void*)));
-                            tbaa_decorate(ctx.tbaa().tbaa_const, maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type));
-                        }
-                        else {
-                            own_ptr = ctx.builder.CreateCall(
-                                prepare_call(jlarray_data_owner_func),
-                                {aryv});
-                        }
-                        ctx.builder.CreateBr(mergeBB);
-                        ctx.builder.SetInsertPoint(mergeBB);
-                        data_owner = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
-                        data_owner->addIncoming(aryv, curBB);
-                        data_owner->addIncoming(own_ptr, ownedBB);
-                    }
-                    if (!isboxed && jl_is_uniontype(ety)) {
-                        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
-                        Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
-                        Value *offset = emit_arrayoffset(ctx, ary, nd);
-                        // compute tindex from val
-                        jl_cgval_t rhs_union = convert_julia_type(ctx, val, ety);
-                        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, ety);
-                        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
-                        Value *ptindex;
-                        if (elsz == 0) {
-                            ptindex = data;
-                        }
-                        else {
-                            Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd));
-                            Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1));
-                            Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, getSizeTy(ctx.builder.getContext())));
-                            Value *selidx_m = emit_arraylen(ctx, ary);
-                            Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
-                            ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
-                            data = ctx.builder.CreateInBoundsGEP(AT, data, idx);
-                        }
-                        ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext()));
-                        ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, offset);
-                        ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx);
-                        tbaa_decorate(ctx.tbaa().tbaa_arrayselbyte, ctx.builder.CreateStore(tindex, ptindex));
-                        if (elsz > 0 && (!jl_is_datatype(val.typ) || jl_datatype_size(val.typ) > 0)) {
-                            // copy data (if any)
-                            emit_unionmove(ctx, data, ctx.tbaa().tbaa_arraybuf, val, nullptr);
-                        }
-                    }
-                    else {
-                        typed_store(ctx,
-                                    emit_arrayptr(ctx, ary, ary_ex, isboxed),
-                                    idx, val, jl_cgval_t(), ety,
-                                    isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
-                                    ctx.aliasscope,
-                                    data_owner,
-                                    isboxed,
-                                    isboxed ? AtomicOrdering::Release : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-                                    /*FailOrder*/AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-                                    0,
-                                    false,
-                                    true,
-                                    false,
-                                    false,
-                                    false,
-                                    false,
-                                    nullptr,
-                                    "");
-                    }
+                Value *elem = emit_memoryref_ptr(ctx, ref, layout);
+                elem = emit_bitcast(ctx, elem, ctx.types().T_pprjlvalue);
+                if (!isboxed)
+                    elem = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, elem, layout->first_ptr);
+                // emit this using the same type as jl_builtin_memoryrefget
+                // so that LLVM may be able to load-load forward them and fold the result
+                auto tbaa = isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf;
+                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+                LoadInst *fldv = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, elem, ctx.types().alignof_ptr);
+                //fldv->setOrdering(AtomicOrdering::Unordered);
+                ai.decorateInst(fldv);
+                Value *isdef = ctx.builder.CreateIsNotNull(fldv);
+                setName(ctx.emission_context, isdef, "memoryref_isassigned");
+                if (oob) {
+                    assert(result);
+                    result->addIncoming(isdef, ctx.builder.CreateBr(result->getParent())->getParent());
+                    ctx.builder.SetInsertPoint(result->getParent());
+                    isdef = result;
                 }
-                *ret = ary;
-                return true;
+                *ret = mark_julia_type(ctx, isdef, false, jl_bool_type);
+            }
+            else if (oob) {
+                Value *isdef = ctx.builder.CreateNot(oob);
+                *ret = mark_julia_type(ctx, isdef, false, jl_bool_type);
             }
+            else {
+                *ret = mark_julia_const(ctx, jl_true);
+            }
+            return true;
         }
     }
 
+
     else if (f == jl_builtin_getfield && (nargs == 2 || nargs == 3 || nargs == 4)) {
         const jl_cgval_t &obj = argv[1];
         const jl_cgval_t &fld = argv[2];
@@ -3412,14 +3991,16 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) {
                         Value *valen = emit_n_varargs(ctx);
                         jl_cgval_t va_ary( // fake instantiation of a cgval, in order to call emit_bounds_check (it only checks the `.V` field)
-                                ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(getSizeTy(ctx.builder.getContext()), ctx.nReqArgs)),
+                                ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs)),
                                 NULL, NULL);
-                        Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
+                        Value *idx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
                         idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck);
-                        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), ctx.nReqArgs));
+                        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs));
                         Instruction *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, idx), Align(sizeof(void*)));
+                        setName(ctx.emission_context, v, "getfield");
                         // if we know the result type of this load, we will mark that information here too
-                        tbaa_decorate(ctx.tbaa().tbaa_value, maybe_mark_load_dereferenceable(v, false, rt));
+                        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_value);
+                        ai.decorateInst(maybe_mark_load_dereferenceable(v, false, rt));
                         *ret = mark_julia_type(ctx, v, /*boxed*/ true, rt);
                         return true;
                     }
@@ -3440,12 +4021,13 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     }
                     else {
                         // unknown index
-                        Value *vidx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
+                        Value *vidx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
                         if (emit_getfield_unknownidx(ctx, ret, obj, vidx, utt, boundscheck, order)) {
                             return true;
                         }
                     }
                 }
+                Value *vidx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
                 if (jl_is_tuple_type(utt) && is_tupletype_homogeneous(utt->parameters, true)) {
                     // For tuples, we can emit code even if we don't know the exact
                     // type (e.g. because we don't know the length). This is possible
@@ -3456,19 +4038,19 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                             *ret = jl_cgval_t(); // unreachable
                             return true;
                         }
-                        // Determine which was the type that was homogenous
+                        // Determine which was the type that was homogeneous
                         jl_value_t *jt = jl_tparam0(utt);
                         if (jl_is_vararg(jt))
                             jt = jl_unwrap_vararg(jt);
                         assert(jl_is_datatype(jt));
-                        Value *vidx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
                         // This is not necessary for correctness, but allows to omit
                         // the extra code for getting the length of the tuple
                         if (!bounds_check_enabled(ctx, boundscheck)) {
-                            vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
-                        } else {
+                            vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
+                        }
+                        else {
                             vidx = emit_bounds_check(ctx, obj, (jl_value_t*)obj.typ, vidx,
-                                emit_datatype_nfields(ctx, emit_typeof_boxed(ctx, obj)),
+                                emit_datatype_nfields(ctx, emit_typeof(ctx, obj, false, false)),
                                 jl_true);
                         }
                         bool isboxed = !jl_datatype_isinlinealloc((jl_datatype_t*)jt, 0);
@@ -3479,9 +4061,36 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                         return true;
                     }
                 }
+
+                // Unknown object, but field known to be integer
+                vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
+                Value *fld_val = ctx.builder.CreateCall(prepare_call(jlgetnthfieldchecked_func), { boxed(ctx, obj), vidx }, "getfield");
+                *ret = mark_julia_type(ctx, fld_val, true, jl_any_type);
+                return true;
+            }
+        }
+        else if (fld.typ == (jl_value_t*)jl_symbol_type) { // Known type but unknown symbol
+            if (jl_is_datatype(utt) && (utt != jl_module_type) && jl_struct_try_layout(utt)) {
+                if ((jl_datatype_nfields(utt) == 1 && !jl_is_namedtuple_type(utt) && !jl_is_tuple_type(utt))) {
+                    jl_svec_t *fn = jl_field_names(utt);
+                    assert(jl_svec_len(fn) == 1);
+                    Value *typ_sym = literal_pointer_val(ctx, jl_svecref(fn, 0));
+                    Value *cond = ctx.builder.CreateICmpEQ(mark_callee_rooted(ctx, typ_sym), mark_callee_rooted(ctx, boxed(ctx, fld)));
+                    emit_hasnofield_error_ifnot(ctx, cond, utt->name->name, fld);
+                    *ret = emit_getfield_knownidx(ctx, obj, 0, utt, order);
+                    return true;
+                }
+                else {
+                    Value *index = ctx.builder.CreateCall(prepare_call(jlfieldindex_func),
+                            {emit_typeof(ctx, obj, false, false), boxed(ctx, fld), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)});
+                    Value *cond = ctx.builder.CreateICmpNE(index, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), -1));
+                    emit_hasnofield_error_ifnot(ctx, cond, utt->name->name, fld);
+                    Value *idx2 = ctx.builder.CreateAdd(ctx.builder.CreateIntCast(index, ctx.types().T_size, false), ConstantInt::get(ctx.types().T_size, 1)); // getfield_unknown is 1 based
+                    if (emit_getfield_unknownidx(ctx, ret, obj, idx2, utt, jl_false, order))
+                        return true;
+                }
             }
         }
-        // TODO: generic getfield func with more efficient calling convention
         return false;
     }
 
@@ -3553,9 +4162,9 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         }
         Value *sz;
         if (nf != -1)
-            sz = ConstantInt::get(getSizeTy(ctx.builder.getContext()), nf);
+            sz = ConstantInt::get(ctx.types().T_size, nf);
         else
-            sz = emit_datatype_nfields(ctx, emit_typeof_boxed(ctx, obj));
+            sz = emit_datatype_nfields(ctx, emit_typeof(ctx, obj, false, false));
         *ret = mark_julia_type(ctx, sz, false, jl_long_type);
         return true;
     }
@@ -3570,13 +4179,15 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 Value *tyv = boxed(ctx, typ);
                 Value *types_svec = emit_datatype_types(ctx, tyv);
                 Value *types_len = emit_datatype_nfields(ctx, tyv);
-                Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
+                Value *idx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
                 jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true);
                 if (nargs == 3)
                     emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "fieldtype");
                 emit_bounds_check(ctx, typ, (jl_value_t*)jl_datatype_type, idx, types_len, boundscheck);
                 Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, decay_derived(ctx, emit_bitcast(ctx, types_svec, ctx.types().T_pprjlvalue)), idx);
-                Value *fieldtyp = tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fieldtyp_p, Align(sizeof(void*))));
+                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+                Value *fieldtyp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fieldtyp_p, Align(sizeof(void*))));
+                setName(ctx.emission_context, fieldtyp, "fieldtype");
                 *ret = mark_julia_type(ctx, fieldtyp, true, (jl_value_t*)jl_type_type);
                 return true;
             }
@@ -3596,37 +4207,33 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 else {
                     sz = (1 + jl_svec_len(obj.constant)) * sizeof(void*);
                 }
-                *ret = mark_julia_type(ctx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), sz), false, jl_long_type);
+                *ret = mark_julia_type(ctx, ConstantInt::get(ctx.types().T_size, sz), false, jl_long_type);
                 return true;
             }
             // String and SimpleVector's length fields have the same layout
-            auto ptr = emit_bitcast(ctx, boxed(ctx, obj), getSizePtrTy(ctx.builder.getContext()));
-            Value *len = tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), ptr, Align(sizeof(size_t))));
+            auto ptr = emit_bitcast(ctx, boxed(ctx, obj), ctx.types().T_size->getPointerTo());
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+            Value *len = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, ptr, ctx.types().alignof_ptr));
             MDBuilder MDB(ctx.builder.getContext());
             if (sty == jl_simplevector_type) {
                 auto rng = MDB.createRange(
-                    Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), INTPTR_MAX / sizeof(void*) - 1));
+                    Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, INTPTR_MAX / sizeof(void*) - 1));
                 cast<LoadInst>(len)->setMetadata(LLVMContext::MD_range, rng);
-                len = ctx.builder.CreateMul(len, ConstantInt::get(getSizeTy(ctx.builder.getContext()), sizeof(void*)));
-                len = ctx.builder.CreateAdd(len, ConstantInt::get(getSizeTy(ctx.builder.getContext()), sizeof(void*)));
+                len = ctx.builder.CreateMul(len, ConstantInt::get(ctx.types().T_size, sizeof(void*)));
+                len = ctx.builder.CreateAdd(len, ConstantInt::get(ctx.types().T_size, sizeof(void*)));
             }
             else {
-                auto rng = MDB.createRange(Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), INTPTR_MAX));
+                auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, INTPTR_MAX));
                 cast<LoadInst>(len)->setMetadata(LLVMContext::MD_range, rng);
             }
+            setName(ctx.emission_context, len, "sizeof");
             *ret = mark_julia_type(ctx, len, false, jl_long_type);
             return true;
         }
-        else if (jl_is_array_type(sty)) {
-            auto len = emit_arraylen(ctx, obj);
-            Value *elsize;
-            size_t elsz;
-            if (arraytype_constelsize(sty, &elsz)) {
-                elsize = ConstantInt::get(getSizeTy(ctx.builder.getContext()), elsz);
-            }
-            else {
-                elsize = ctx.builder.CreateZExt(emit_arrayelsize(ctx, obj), getSizeTy(ctx.builder.getContext()));
-            }
+        else if (jl_is_genericmemory_type(sty)) {
+            Value *v = boxed(ctx, obj);
+            auto len = emit_genericmemorylen(ctx, v, (jl_value_t*)sty);
+            auto elsize = emit_genericmemoryelsize(ctx, v, obj.typ, true);
             *ret = mark_julia_type(ctx, ctx.builder.CreateMul(len, elsize), false, jl_long_type);
             return true;
         }
@@ -3637,7 +4244,9 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             // don't bother codegen constant-folding for toplevel.
             jl_value_t *ty = static_apply_type(ctx, argv, nargs + 1);
             if (ty != NULL) {
-                jl_add_method_root(ctx, ty);
+                JL_GC_PUSH1(&ty);
+                ty = jl_ensure_rooted(ctx, ty);
+                JL_GC_POP();
                 *ret = mark_julia_const(ctx, ty);
                 return true;
             }
@@ -3648,6 +4257,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         const jl_cgval_t &obj = argv[1];
         const jl_cgval_t &fld = argv[2];
         jl_datatype_t *stt = (jl_datatype_t*)obj.typ;
+        ssize_t fieldidx = -1;
         if (jl_is_type_type((jl_value_t*)stt)) {
             // the representation type of Type{T} is either typeof(T), or unknown
             // TODO: could use `issingletontype` predicate here, providing better type knowledge
@@ -3659,11 +4269,10 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         }
         if (!jl_is_concrete_type((jl_value_t*)stt) || jl_is_array_type(stt) ||
             stt == jl_module_type) { // TODO: use ->layout here instead of concrete_type
-            return false;
+            goto isdefined_unknown_idx;
         }
         assert(jl_is_datatype(stt));
 
-        ssize_t fieldidx = -1;
         if (fld.constant && jl_is_symbol(fld.constant)) {
             jl_sym_t *sym = (jl_sym_t*)fld.constant;
             fieldidx = jl_field_index(stt, sym, 0);
@@ -3672,7 +4281,15 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             fieldidx = jl_unbox_long(fld.constant) - 1;
         }
         else {
-            return false;
+isdefined_unknown_idx:
+            if (nargs == 3 || fld.typ != (jl_value_t*)jl_long_type)
+                return false;
+            Value *vidx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
+            vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
+            Value *isd = ctx.builder.CreateCall(prepare_call(jlfieldisdefinedchecked_func), { boxed(ctx, obj), vidx });
+            isd = ctx.builder.CreateTrunc(isd, getInt8Ty(ctx.builder.getContext()));
+            *ret = mark_julia_type(ctx, isd, false, jl_bool_type);
+            return true;
         }
         enum jl_memory_order order = jl_memory_order_unspecified;
         if (nargs == 3) {
@@ -3714,17 +4331,16 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         else if (jl_field_isptr(stt, fieldidx) || jl_type_hasptr(jl_field_type(stt, fieldidx))) {
             Value *fldv;
             size_t offs = jl_field_offset(stt, fieldidx) / sizeof(jl_value_t*);
-            auto tbaa = obj.tbaa;
-            if (tbaa == ctx.tbaa().tbaa_datatype && offs != offsetof(jl_datatype_t, types))
-                tbaa = ctx.tbaa().tbaa_const;
             if (obj.ispointer()) {
+                auto tbaa = best_field_tbaa(ctx, obj, stt, fieldidx, offs);
                 if (!jl_field_isptr(stt, fieldidx))
                     offs += ((jl_datatype_t*)jl_field_type(stt, fieldidx))->layout->first_ptr;
                 Value *ptr = emit_bitcast(ctx, data_pointer(ctx, obj), ctx.types().T_pprjlvalue);
                 Value *addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, ptr, offs);
                 // emit this using the same type as emit_getfield_knownidx
                 // so that LLVM may be able to load-load forward them and fold the result
-                fldv = tbaa_decorate(tbaa, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(size_t))));
+                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+                fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, ctx.types().alignof_ptr));
                 cast<LoadInst>(fldv)->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order));
             }
             else {
@@ -3735,6 +4351,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 }
             }
             Value *isdef = ctx.builder.CreateIsNotNull(fldv);
+            setName(ctx.emission_context, isdef, "isdefined");
             *ret = mark_julia_type(ctx, isdef, false, jl_bool_type);
         }
         else {
@@ -3777,18 +4394,24 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         return true;
     }
 
+    else if (f == jl_builtin_compilerbarrier && (nargs == 2)) {
+        emit_typecheck(ctx, argv[1], (jl_value_t*)jl_symbol_type, "compilerbarrier");
+        *ret = argv[2];
+        return true;
+    }
+
     return false;
 }
 
 // Returns ctx.types().T_prjlvalue
-static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
-                             const jl_cgval_t *argv, size_t nargs, JuliaFunction *trampoline)
+static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *theF,
+                             const jl_cgval_t *argv, size_t nargs, JuliaFunction<> *trampoline)
 {
     ++EmittedJLCalls;
     Function *TheTrampoline = prepare_call(trampoline);
     // emit arguments
     SmallVector<Value*, 4> theArgs;
-    theArgs.push_back(theFptr);
+    theArgs.push_back(theFptr.getCallee());
     if (theF)
         theArgs.push_back(theF);
     for (size_t i = 0; i < nargs; i++) {
@@ -3802,41 +4425,41 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
 }
 
 // Returns ctx.types().T_prjlvalue
-static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
-                             const jl_cgval_t *argv, size_t nargs, JuliaFunction *trampoline)
+static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction<> *theFptr, Value *theF,
+                             const jl_cgval_t *argv, size_t nargs, JuliaFunction<> *trampoline)
 {
     return emit_jlcall(ctx, prepare_call(theFptr), theF, argv, nargs, trampoline);
 }
 
-
-static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_t *mi, jl_value_t *jlretty, StringRef specFunctionObject,
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
                                           const jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
 {
     ++EmittedSpecfunCalls;
     // emit specialized call site
-    bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
-    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, specFunctionObject, mi->specTypes, jlretty, is_opaque_closure);
-    FunctionType *cft = returninfo.decl->getFunctionType();
+    bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
+    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg);
+    FunctionType *cft = returninfo.decl.getFunctionType();
     *cc = returninfo.cc;
     *return_roots = returninfo.return_roots;
 
     size_t nfargs = cft->getNumParams();
-    Value **argvals = (Value**)alloca(nfargs * sizeof(Value*));
+    SmallVector<Value *, 0> argvals(nfargs);
     unsigned idx = 0;
-    AllocaInst *result;
+    AllocaInst *result = nullptr;
     switch (returninfo.cc) {
     case jl_returninfo_t::Boxed:
     case jl_returninfo_t::Register:
     case jl_returninfo_t::Ghosts:
         break;
     case jl_returninfo_t::SRet:
-        result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.decl->getAttributes(), 1, Attribute::StructRet).getValueAsType());
+        result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType());
         assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
         argvals[idx] = result;
         idx++;
         break;
     case jl_returninfo_t::Union:
         result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes));
+        setName(ctx.emission_context, result, "sret_box");
         if (returninfo.union_align > 1)
             result->setAlignment(Align(returninfo.union_align));
         argvals[idx] = result;
@@ -3849,44 +4472,76 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
         argvals[idx] = return_roots;
         idx++;
     }
-
+    if (gcstack_arg) {
+        argvals[idx] = ctx.pgcstack;
+        idx++;
+    }
     for (size_t i = 0; i < nargs; i++) {
-        jl_value_t *jt = (is_opaque_closure && i == 0) ? (jl_value_t*)jl_any_type :
-            jl_nth_slot_type(mi->specTypes, i);
-        if (is_uniquerep_Type(jt))
-            continue;
-        bool isboxed = deserves_argbox(jt);
-        Type *et = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
-        if (type_is_ghost(et))
-            continue;
-        assert(idx < nfargs);
-        Type *at = cft->getParamType(idx);
+        jl_value_t *jt = jl_nth_slot_type(specTypes, i);
+        // n.b.: specTypes is required to be a datatype by construction for specsig
         jl_cgval_t arg = argv[i];
-        if (isboxed) {
-            assert(at == ctx.types().T_prjlvalue && et == ctx.types().T_prjlvalue);
-            argvals[idx] = boxed(ctx, arg);
-        }
-        else if (et->isAggregateType()) {
+        if (is_opaque_closure && i == 0) {
+            Type *at = cft->getParamType(idx);
+            // Special optimization for opaque closures: We know that specsig opaque
+            // closures don't look at their type tag (they are fairly quickly discarded
+            // for their environments). Therefore, we can just pass these as a pointer,
+            // rather than a boxed value.
             arg = value_to_pointer(ctx, arg);
-            // can lazy load on demand, no copy needed
-            assert(at == PointerType::get(et, AddressSpace::Derived));
             argvals[idx] = decay_derived(ctx, maybe_bitcast(ctx, data_pointer(ctx, arg), at));
         }
-        else {
-            assert(at == et);
-            Value *val = emit_unbox(ctx, et, arg, jt);
-            if (!val) {
-                // There was a type mismatch of some sort - exit early
-                CreateTrap(ctx.builder);
-                return jl_cgval_t();
+        else if (is_uniquerep_Type(jt)) {
+            continue;
+        } else {
+            bool isboxed = deserves_argbox(jt);
+            Type *et = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
+            if (type_is_ghost(et))
+                continue;
+            assert(idx < nfargs);
+            Type *at = cft->getParamType(idx);
+            if (isboxed) {
+                assert(at == ctx.types().T_prjlvalue && et == ctx.types().T_prjlvalue);
+                argvals[idx] = boxed(ctx, arg);
+            }
+            else if (et->isAggregateType()) {
+                arg = value_to_pointer(ctx, arg);
+                // can lazy load on demand, no copy needed
+                assert(at == PointerType::get(et, AddressSpace::Derived));
+                argvals[idx] = decay_derived(ctx, maybe_bitcast(ctx, data_pointer(ctx, arg), at));
+            }
+            else {
+                assert(at == et);
+                Value *val = emit_unbox(ctx, et, arg, jt);
+                if (!val) {
+                    // There was a type mismatch of some sort - exit early
+                    CreateTrap(ctx.builder);
+                    return jl_cgval_t();
+                }
+                argvals[idx] = val;
             }
-            argvals[idx] = val;
         }
         idx++;
     }
     assert(idx == nfargs);
-    CallInst *call = ctx.builder.CreateCall(returninfo.decl, ArrayRef<Value*>(&argvals[0], nfargs));
-    call->setAttributes(returninfo.decl->getAttributes());
+    Value *TheCallee = returninfo.decl.getCallee();
+    if (fromexternal) {
+        std::string namep("p");
+        namep += cast<Function>(returninfo.decl.getCallee())->getName();
+        GlobalVariable *GV = cast_or_null<GlobalVariable>(jl_Module->getNamedValue(namep));
+        if (GV == nullptr) {
+            GV = new GlobalVariable(*jl_Module, TheCallee->getType(), false,
+                                    GlobalVariable::ExternalLinkage,
+                                    Constant::getNullValue(TheCallee->getType()),
+                                    namep);
+            ctx.emission_context.external_fns[std::make_tuple(fromexternal, true)] = GV;
+        }
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+        TheCallee = ai.decorateInst(ctx.builder.CreateAlignedLoad(TheCallee->getType(), GV, Align(sizeof(void*))));
+        setName(ctx.emission_context, TheCallee, namep);
+    }
+    CallInst *call = ctx.builder.CreateCall(cft, TheCallee, argvals);
+    call->setAttributes(returninfo.attrs);
+    if (gcstack_arg)
+        call->setCallingConv(CallingConv::Swift);
 
     jl_cgval_t retval;
     switch (returninfo.cc) {
@@ -3897,6 +4552,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
             retval = mark_julia_type(ctx, call, false, jlretty);
             break;
         case jl_returninfo_t::SRet:
+            assert(result);
             retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_stack);
             break;
         case jl_returninfo_t::Union: {
@@ -3904,7 +4560,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
             Value *tindex = ctx.builder.CreateExtractValue(call, 1);
             Value *derived = ctx.builder.CreateSelect(
                 ctx.builder.CreateICmpEQ(
-                        ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                        ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
                 decay_derived(ctx, ctx.builder.CreateBitCast(argvals[0], ctx.types().T_pjlvalue)),
                 decay_derived(ctx, box)
@@ -3924,31 +4580,57 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
     return update_julia_type(ctx, retval, inferred_retty);
 }
 
-static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty, StringRef specFunctionObject,
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_t *mi, jl_value_t *jlretty, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
+                                          const jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+{
+    bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+    return emit_call_specfun_other(ctx, is_opaque_closure, mi->specTypes, jlretty, NULL,
+        specFunctionObject, fromexternal, argv, nargs, cc, return_roots, inferred_retty);
+}
+
+static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
                                           const jl_cgval_t *argv, size_t nargs, jl_value_t *inferred_retty)
 {
-    auto theFptr = cast<Function>(
-        jl_Module->getOrInsertFunction(specFunctionObject, ctx.types().T_jlfunc).getCallee());
-    addRetAttr(theFptr, Attribute::NonNull);
-    Value *ret = emit_jlcall(ctx, theFptr, nullptr, argv, nargs, julia_call);
+    Value *theFptr;
+    if (fromexternal) {
+        std::string namep("p");
+        namep += specFunctionObject;
+        GlobalVariable *GV = cast_or_null<GlobalVariable>(jl_Module->getNamedValue(namep));
+        Type *pfunc = ctx.types().T_jlfunc->getPointerTo();
+        if (GV == nullptr) {
+            GV = new GlobalVariable(*jl_Module, pfunc, false,
+                                    GlobalVariable::ExternalLinkage,
+                                    Constant::getNullValue(pfunc),
+                                    namep);
+            ctx.emission_context.external_fns[std::make_tuple(fromexternal, false)] = GV;
+        }
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+        theFptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(pfunc, GV, Align(sizeof(void*))));
+        setName(ctx.emission_context, theFptr, specFunctionObject);
+    }
+    else {
+        theFptr = jl_Module->getOrInsertFunction(specFunctionObject, ctx.types().T_jlfunc).getCallee();
+        addRetAttr(cast<Function>(theFptr), Attribute::NonNull);
+    }
+    Value *ret = emit_jlcall(ctx, FunctionCallee(ctx.types().T_jlfunc, theFptr), nullptr, argv, nargs, julia_call);
     return update_julia_type(ctx, mark_julia_type(ctx, ret, true, jlretty), inferred_retty);
 }
 
 static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
 {
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
     size_t arglen = jl_array_dim0(ex->args);
     size_t nargs = arglen - 1;
     assert(arglen >= 2);
 
     jl_cgval_t lival = emit_expr(ctx, args[0]);
-    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    SmallVector<jl_cgval_t, 0> argv(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i + 1]);
         if (argv[i].typ == jl_bottom_type)
             return jl_cgval_t();
     }
-    return emit_invoke(ctx, lival, argv, nargs, rt);
+    return emit_invoke(ctx, lival, argv.data(), nargs, rt);
 }
 
 static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt)
@@ -3965,20 +4647,20 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
             FunctionType *ft = ctx.f->getFunctionType();
             StringRef protoname = ctx.f->getName();
             if (ft == ctx.types().T_jlfunc) {
-                result = emit_call_specfun_boxed(ctx, ctx.rettype, protoname, argv, nargs, rt);
+                result = emit_call_specfun_boxed(ctx, ctx.rettype, protoname, nullptr, argv, nargs, rt);
                 handled = true;
             }
             else if (ft != ctx.types().T_jlfuncparams) {
                 unsigned return_roots = 0;
-                result = emit_call_specfun_other(ctx, mi, ctx.rettype, protoname, argv, nargs, &cc, &return_roots, rt);
+                result = emit_call_specfun_other(ctx, mi, ctx.rettype, protoname, nullptr, argv, nargs, &cc, &return_roots, rt);
                 handled = true;
             }
         }
         else {
             jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
-            jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
             if (ci != jl_nothing) {
-                auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+                jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
+                auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
                  // check if we know how to handle this specptr
                 if (invoke == jl_fptr_const_return_addr) {
                     result = mark_julia_const(ctx, codeinst->rettype_const);
@@ -3990,37 +4672,57 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
                     std::string name;
                     StringRef protoname;
                     bool need_to_emit = true;
-                    if (ctx.use_cache) {
+                    bool cache_valid = ctx.use_cache || ctx.external_linkage;
+                    bool external = false;
+
+                    // Check if we already queued this up
+                    auto it = ctx.call_targets.find(codeinst);
+                    if (need_to_emit && it != ctx.call_targets.end()) {
+                        protoname = it->second.decl->getName();
+                        need_to_emit = cache_valid = false;
+                    }
+
+                    // Check if it is already compiled (either JIT or externally)
+                    if (cache_valid) {
                         // optimization: emit the correct name immediately, if we know it
                         // TODO: use `emitted` map here too to try to consolidate names?
-                        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+                        // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
                         auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
                         if (fptr) {
-                            if (specsig ? codeinst->isspecsig : invoke == jl_fptr_args_addr) {
+                            while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                                jl_cpu_pause();
+                            }
+                            invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+                            if (specsig ? jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1 : invoke == jl_fptr_args_addr) {
                                 protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
-                                need_to_emit = false;
+                                if (ctx.external_linkage) {
+                                    // TODO: Add !specsig support to aotcompile.cpp
+                                    // Check that the codeinst is containing native code
+                                    if (specsig && jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b100) {
+                                        external = true;
+                                        need_to_emit = false;
+                                    }
+                                }
+                                else { // ctx.use_cache
+                                    need_to_emit = false;
+                                }
                             }
                         }
                     }
-                    auto it = ctx.call_targets.find(codeinst);
-                    if (need_to_emit && it != ctx.call_targets.end()) {
-                        protoname = std::get<2>(it->second)->getName();
-                        need_to_emit = false;
-                    }
                     if (need_to_emit) {
-                        raw_string_ostream(name) << (specsig ? "j_" : "j1_") << name_from_method_instance(mi) << "_" << globalUniqueGeneratedNames++;
+                        raw_string_ostream(name) << (specsig ? "j_" : "j1_") << name_from_method_instance(mi) << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
                         protoname = StringRef(name);
                     }
                     jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
                     unsigned return_roots = 0;
                     if (specsig)
-                        result = emit_call_specfun_other(ctx, mi, codeinst->rettype, protoname, argv, nargs, &cc, &return_roots, rt);
+                        result = emit_call_specfun_other(ctx, mi, codeinst->rettype, protoname, external ? codeinst : nullptr, argv, nargs, &cc, &return_roots, rt);
                     else
-                        result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, argv, nargs, rt);
+                        result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, external ? codeinst : nullptr, argv, nargs, rt);
                     handled = true;
                     if (need_to_emit) {
                         Function *trampoline_decl = cast<Function>(jl_Module->getNamedValue(protoname));
-                        ctx.call_targets[codeinst] = std::make_tuple(cc, return_roots, trampoline_decl, specsig);
+                        ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, specsig};
                     }
                 }
             }
@@ -4038,12 +4740,12 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
 static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
 {
     ++EmittedInvokes;
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
     size_t arglen = jl_array_dim0(ex->args);
     size_t nargs = arglen - 1;
     assert(arglen >= 2);
     jl_cgval_t lival = emit_expr(ctx, args[0]);
-    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    SmallVector<jl_cgval_t, 0> argv(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i + 1]);
         if (argv[i].typ == jl_bottom_type)
@@ -4052,46 +4754,74 @@ static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_
     const jl_cgval_t &f = argv[0];
     jl_cgval_t ret;
     if (f.constant && f.constant == jl_builtin_modifyfield) {
-        if (emit_f_opfield(ctx, &ret, jl_builtin_modifyfield, argv, nargs - 1, &lival))
+        if (emit_f_opfield(ctx, &ret, jl_builtin_modifyfield, argv.data(), nargs - 1, &lival))
             return ret;
         auto it = builtin_func_map().find(jl_f_modifyfield_addr);
         assert(it != builtin_func_map().end());
         Value *oldnew = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), &argv[1], nargs - 1, julia_call);
         return mark_julia_type(ctx, oldnew, true, rt);
     }
-    if (f.constant && jl_typeis(f.constant, jl_intrinsic_type)) {
+    if (f.constant && jl_typetagis(f.constant, jl_intrinsic_type)) {
         JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
         if (fi == JL_I::atomic_pointermodify && jl_intrinsic_nargs((int)fi) == nargs - 1)
-            return emit_atomic_pointerop(ctx, fi, argv, nargs - 1, &lival);
+            return emit_atomic_pointerop(ctx, fi, argv.data(), nargs - 1, &lival);
     }
 
     // emit function and arguments
-    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, nargs, julia_call);
+    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv.data(), nargs, julia_call);
     return mark_julia_type(ctx, callval, true, rt);
 }
 
+static jl_cgval_t emit_specsig_oc_call(jl_codectx_t &ctx, jl_value_t *oc_type, jl_value_t *sigtype, jl_cgval_t *argv, size_t nargs)
+{
+    jl_datatype_t *oc_argt = (jl_datatype_t *)jl_tparam0(oc_type);
+    jl_value_t *oc_rett = jl_tparam1(oc_type);
+    jl_svec_t *types = jl_get_fieldtypes((jl_datatype_t*)oc_argt);
+    size_t ntypes = jl_svec_len(types);
+    for (size_t i = 0; i < nargs-1; ++i) {
+        jl_value_t *typ = i >= ntypes ? jl_svecref(types, ntypes-1) : jl_svecref(types, i);
+        if (jl_is_vararg(typ))
+            typ = jl_unwrap_vararg(typ);
+        emit_typecheck(ctx, argv[i+1], typ, "typeassert");
+        argv[i+1] = update_julia_type(ctx, argv[i+1], typ);
+        if (argv[i+1].typ == jl_bottom_type)
+            return jl_cgval_t();
+    }
+    jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
+    unsigned return_roots = 0;
+
+    // Load specptr
+    jl_cgval_t &theArg = argv[0];
+    jl_cgval_t closure_specptr = emit_getfield_knownidx(ctx, theArg, 4, (jl_datatype_t*)oc_type, jl_memory_order_notatomic);
+    Value *specptr = emit_unbox(ctx, ctx.types().T_size, closure_specptr, (jl_value_t*)jl_long_type);
+    JL_GC_PUSH1(&sigtype);
+    jl_cgval_t r = emit_call_specfun_other(ctx, true, sigtype, oc_rett, specptr, "", NULL, argv, nargs,
+        &cc, &return_roots, oc_rett);
+    JL_GC_POP();
+    return r;
+}
+
 static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bool is_promotable)
 {
     ++EmittedCalls;
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
     size_t nargs = jl_array_dim0(ex->args);
     assert(nargs >= 1);
     jl_cgval_t f = emit_expr(ctx, args[0]);
+    if (f.typ == jl_bottom_type) {
+        return jl_cgval_t();
+    }
 
-    if (f.constant && jl_typeis(f.constant, jl_intrinsic_type)) {
+    if (f.constant && jl_typetagis(f.constant, jl_intrinsic_type)) {
         JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
         return emit_intrinsic(ctx, fi, args, nargs - 1);
     }
 
-    jl_value_t *context = ctx.params->generic_context == jl_nothing ? nullptr : ctx.params->generic_context;
-    size_t n_generic_args = nargs + (context ? 1 : 0);
+    size_t n_generic_args = nargs;
+
+    SmallVector<jl_cgval_t, 0> generic_argv(n_generic_args);
+    jl_cgval_t *argv = generic_argv.data();
 
-    jl_cgval_t *generic_argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * n_generic_args);
-    jl_cgval_t *argv = generic_argv;
-    if (context) {
-        generic_argv[0] = mark_julia_const(ctx, context);
-        argv = &generic_argv[1];
-    }
     argv[0] = f;
     for (size_t i = 1; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i]);
@@ -4099,41 +4829,92 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
             return jl_cgval_t(); // anything past here is unreachable
     }
 
-    if (f.constant && jl_isa(f.constant, (jl_value_t*)jl_builtin_type)) {
-        if (f.constant == jl_builtin_ifelse && nargs == 4)
-            return emit_ifelse(ctx, argv[1], argv[2], argv[3], rt);
-        jl_cgval_t result;
-        bool handled = emit_builtin_call(ctx, &result, f.constant, argv, nargs - 1, rt, ex, is_promotable);
-        if (handled) {
-            return result;
+    if (jl_subtype(f.typ, (jl_value_t*)jl_builtin_type)) {
+        if (f.constant) {
+            if (f.constant == jl_builtin_ifelse && nargs == 4)
+                return emit_ifelse(ctx, argv[1], argv[2], argv[3], rt);
+            jl_cgval_t result;
+            bool handled = emit_builtin_call(ctx, &result, f.constant, argv, nargs - 1, rt, ex, is_promotable);
+            if (handled)
+                return result;
+
+            // special case for some known builtin not handled by emit_builtin_call
+            auto it = builtin_func_map().find(jl_get_builtin_fptr((jl_datatype_t*)jl_typeof(f.constant)));
+            if (it != builtin_func_map().end()) {
+                Value *ret = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), &argv[1], nargs - 1, julia_call);
+                setName(ctx.emission_context, ret, it->second->name + "_ret");
+                return mark_julia_type(ctx, ret, true, rt);
+            }
+        }
+        FunctionCallee fptr;
+        Value *F;
+        JuliaFunction<> *cc;
+        if (f.typ == (jl_value_t*)jl_intrinsic_type) {
+            fptr = prepare_call(jlintrinsic_func);
+            F = f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V;
+            F = decay_derived(ctx, maybe_bitcast(ctx, F, ctx.types().T_pjlvalue));
+            cc = julia_call3;
         }
-
-        // special case for known builtin not handled by emit_builtin_call
-        auto it = builtin_func_map().find(jl_get_builtin_fptr(f.constant));
-        if (it != builtin_func_map().end()) {
-            Value *ret = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), &argv[1], nargs - 1, julia_call);
-            return mark_julia_type(ctx, ret, true, rt);
+        else {
+            fptr = FunctionCallee(get_func_sig(ctx.builder.getContext()), ctx.builder.CreateCall(prepare_call(jlgetbuiltinfptr_func), {emit_typeof(ctx, f)}));
+            F = boxed(ctx, f);
+            cc = julia_call;
+        }
+        Value *ret = emit_jlcall(ctx, fptr, F, &argv[1], nargs - 1, cc);
+        setName(ctx.emission_context, ret, "Builtin_ret");
+        return mark_julia_type(ctx, ret, true, rt);
+    }
+
+    // handle calling an OpaqueClosure
+    if (jl_is_concrete_type(f.typ) && jl_subtype(f.typ, (jl_value_t*)jl_opaque_closure_type)) {
+        jl_value_t *oc_argt = jl_tparam0(f.typ);
+        jl_value_t *oc_rett = jl_tparam1(f.typ);
+        if (jl_is_datatype(oc_argt) && jl_tupletype_length_compat(oc_argt, nargs-1)) {
+            jl_value_t *sigtype = jl_argtype_with_function_type((jl_value_t*)f.typ, (jl_value_t*)oc_argt);
+            if (uses_specsig(sigtype, false, true, oc_rett, true)) {
+                JL_GC_PUSH1(&sigtype);
+                jl_cgval_t r = emit_specsig_oc_call(ctx, f.typ, sigtype, argv, nargs);
+                JL_GC_POP();
+                return r;
+            }
         }
     }
 
     // emit function and arguments
-    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, generic_argv, n_generic_args, julia_call);
+    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, generic_argv.data(), n_generic_args, julia_call);
     return mark_julia_type(ctx, callval, true, rt);
 }
 
 // --- accessing and assigning variables ---
 
-static void undef_var_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *name)
+static void undef_var_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *name, jl_value_t *scope)
+{
+    ++EmittedUndefVarErrors;
+    BasicBlock *err = BasicBlock::Create(ctx.builder.getContext(), "err", ctx.f);
+    BasicBlock *ifok = BasicBlock::Create(ctx.builder.getContext(), "ok");
+    ctx.builder.CreateCondBr(ok, ifok, err);
+    ctx.builder.SetInsertPoint(err);
+    ctx.builder.CreateCall(prepare_call(jlundefvarerror_func), {
+            mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)name)),
+            mark_callee_rooted(ctx, literal_pointer_val(ctx, scope))});
+    ctx.builder.CreateUnreachable();
+    ifok->insertInto(ctx.f);
+    ctx.builder.SetInsertPoint(ifok);
+}
+
+static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *type, jl_cgval_t name)
 {
     ++EmittedUndefVarErrors;
+    assert(name.typ == (jl_value_t*)jl_symbol_type);
     BasicBlock *err = BasicBlock::Create(ctx.builder.getContext(), "err", ctx.f);
     BasicBlock *ifok = BasicBlock::Create(ctx.builder.getContext(), "ok");
     ctx.builder.CreateCondBr(ok, ifok, err);
     ctx.builder.SetInsertPoint(err);
-    ctx.builder.CreateCall(prepare_call(jlundefvarerror_func),
-        mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)name)));
+    ctx.builder.CreateCall(prepare_call(jlhasnofield_func),
+                          {mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)type)),
+                           mark_callee_rooted(ctx, boxed(ctx, name))});
     ctx.builder.CreateUnreachable();
-    ctx.f->getBasicBlockList().push_back(ifok);
+    ifok->insertInto(ctx.f);
     ctx.builder.SetInsertPoint(ifok);
 }
 
@@ -4143,63 +4924,78 @@ static void undef_var_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *name)
 static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
                                      jl_binding_t **pbnd, bool assign)
 {
-    jl_binding_t *b = NULL;
-    if (assign)
-        b = jl_get_binding_wr(m, s, 0);
-    else
-        b = jl_get_binding(m, s);
+    jl_binding_t *b = jl_get_module_binding(m, s, 1);
+    if (assign) {
+        if (jl_atomic_load_relaxed(&b->owner) == NULL)
+            // not yet declared
+            b = NULL;
+    }
+    else {
+        b = jl_atomic_load_relaxed(&b->owner);
+        if (b == NULL)
+            // try to look this up now
+            b = jl_get_binding(m, s);
+    }
     if (b == NULL) {
         // var not found. switch to delayed lookup.
         Constant *initnul = Constant::getNullValue(ctx.types().T_pjlvalue);
         GlobalVariable *bindinggv = new GlobalVariable(*ctx.f->getParent(), ctx.types().T_pjlvalue,
-                false, GlobalVariable::PrivateLinkage, initnul);
+                false, GlobalVariable::PrivateLinkage, initnul, "jl_binding_ptr"); // LLVM has bugs with nameless globals
         LoadInst *cachedval = ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, bindinggv, Align(sizeof(void*)));
+        setName(ctx.emission_context, cachedval, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s) + ".cached");
         cachedval->setOrdering(AtomicOrdering::Unordered);
         BasicBlock *have_val = BasicBlock::Create(ctx.builder.getContext(), "found");
         BasicBlock *not_found = BasicBlock::Create(ctx.builder.getContext(), "notfound");
         BasicBlock *currentbb = ctx.builder.GetInsertBlock();
-        ctx.builder.CreateCondBr(ctx.builder.CreateICmpNE(cachedval, initnul), have_val, not_found);
-        ctx.f->getBasicBlockList().push_back(not_found);
+        auto iscached = ctx.builder.CreateICmpNE(cachedval, initnul);
+        setName(ctx.emission_context, iscached, "iscached");
+        ctx.builder.CreateCondBr(iscached, have_val, not_found);
+        not_found->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(not_found);
         Value *bval = ctx.builder.CreateCall(prepare_call(assign ? jlgetbindingwrorerror_func : jlgetbindingorerror_func),
                 { literal_pointer_val(ctx, (jl_value_t*)m),
                   literal_pointer_val(ctx, (jl_value_t*)s) });
+        setName(ctx.emission_context, bval, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s) + ".found");
         ctx.builder.CreateAlignedStore(bval, bindinggv, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
         ctx.builder.CreateBr(have_val);
-        ctx.f->getBasicBlockList().push_back(have_val);
+        have_val->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(have_val);
         PHINode *p = ctx.builder.CreatePHI(ctx.types().T_pjlvalue, 2);
         p->addIncoming(cachedval, currentbb);
         p->addIncoming(bval, not_found);
+        setName(ctx.emission_context, p, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s));
         return p;
     }
     if (assign) {
-        if (b->owner != m) {
-            char *msg;
-            (void)asprintf(&msg, "cannot assign a value to imported variable %s.%s from module %s",
-                    jl_symbol_name(b->owner->name), jl_symbol_name(s), jl_symbol_name(m->name));
-            emit_error(ctx, msg);
-            free(msg);
+        if (jl_atomic_load_relaxed(&b->owner) != b) {
+            // this will fail at runtime, so defer to the runtime to create the error
+            ctx.builder.CreateCall(prepare_call(jlgetbindingwrorerror_func),
+                    { literal_pointer_val(ctx, (jl_value_t*)m),
+                      literal_pointer_val(ctx, (jl_value_t*)s) });
+            CreateTrap(ctx.builder);
             return NULL;
         }
     }
     else {
         if (b->deprecated)
-            cg_bdw(ctx, b);
+            cg_bdw(ctx, s, b);
     }
     *pbnd = b;
     return julia_binding_gv(ctx, b);
 }
 
-static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, bool isvol, MDNode *tbaa)
+static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, jl_value_t *scope, bool isvol, MDNode *tbaa)
 {
     LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
+    setName(ctx.emission_context, v, jl_symbol_name(name) + StringRef(".checked"));
     if (isvol)
         v->setVolatile(true);
     v->setOrdering(AtomicOrdering::Unordered);
-    if (tbaa)
-        tbaa_decorate(tbaa, v);
-    undef_var_error_ifnot(ctx, ctx.builder.CreateIsNotNull(v), name);
+    if (tbaa) {
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        ai.decorateInst(v);
+    }
+    undef_var_error_ifnot(ctx, ctx.builder.CreateIsNotNull(v), name, scope);
     return mark_julia_type(ctx, v, true, jl_any_type);
 }
 
@@ -4216,42 +5012,23 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i)
             ctx.types().T_prjlvalue,
             ctx.spvals_ptr,
             i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
-    Value *sp = tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
-    Value *isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false),
-            track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type)));
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
+    setName(ctx.emission_context, sp, "sparam");
+    Value *isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false, true), emit_tagfrom(ctx, jl_tvar_type));
     jl_unionall_t *sparam = (jl_unionall_t*)ctx.linfo->def.method->sig;
     for (size_t j = 0; j < i; j++) {
         sparam = (jl_unionall_t*)sparam->body;
         assert(jl_is_unionall(sparam));
     }
-    undef_var_error_ifnot(ctx, isnull, sparam->var->name);
+    undef_var_error_ifnot(ctx, isnull, sparam->var->name, (jl_value_t*)jl_static_parameter_sym);
     return mark_julia_type(ctx, sp, true, jl_any_type);
 }
 
-static jl_cgval_t emit_global(jl_codectx_t &ctx, jl_sym_t *sym)
-{
-    jl_binding_t *jbp = NULL;
-    Value *bp = global_binding_pointer(ctx, ctx.module, sym, &jbp, false);
-    if (bp == NULL)
-        return jl_cgval_t();
-    if (jbp && jbp->value != NULL) {
-        if (jbp->constp)
-            return mark_julia_const(ctx, jbp->value);
-        // double-check that a global variable is actually defined. this
-        // can be a problem in parallel when a definition is missing on
-        // one machine.
-        LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
-        v->setOrdering(AtomicOrdering::Unordered);
-        tbaa_decorate(ctx.tbaa().tbaa_binding, v);
-        return mark_julia_type(ctx, v, true, jl_any_type);
-    }
-    return emit_checked_var(ctx, bp, sym, false, ctx.tbaa().tbaa_binding);
-}
-
 static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
 {
     Value *isnull = NULL;
-    if (jl_is_slot(sym) || jl_is_argument(sym)) {
+    if (jl_is_slotnumber(sym) || jl_is_argument(sym)) {
         size_t sl = jl_slot_number(sym) - 1;
         jl_varinfo_t &vi = ctx.slots[sl];
         if (!vi.usedUndef)
@@ -4265,10 +5042,10 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
             Value *box_isnull = ctx.builder.CreateICmpNE(boxed, Constant::getNullValue(ctx.types().T_prjlvalue));
             if (vi.pTIndex) {
                 // value is either boxed in the stack slot, or unboxed in value
-                // as indicated by testing (pTIndex & 0x80)
+                // as indicated by testing (pTIndex & UNION_BOX_MARKER)
                 Value *tindex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(sizeof(void*)), vi.isVolatile);
                 Value *load_unbox = ctx.builder.CreateICmpEQ(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                             ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                 isnull = ctx.builder.CreateSelect(load_unbox, isnull, box_isnull);
             }
@@ -4291,9 +5068,9 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
                 ctx.types().T_prjlvalue,
                 ctx.spvals_ptr,
                 i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
-        Value *sp = tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
-        isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false),
-            track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type)));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+        Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
+        isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false, true), emit_tagfrom(ctx, jl_tvar_type));
     }
     else {
         jl_module_t *modu;
@@ -4309,12 +5086,13 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
         }
         jl_binding_t *bnd = jl_get_binding(modu, name);
         if (bnd) {
-            if (bnd->value != NULL)
+            if (jl_atomic_load_relaxed(&bnd->value) != NULL)
                 return mark_julia_const(ctx, jl_true);
             Value *bp = julia_binding_gv(ctx, bnd);
             bp = julia_binding_pvalue(ctx, bp);
             LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
-            tbaa_decorate(ctx.tbaa().tbaa_binding, v);
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding);
+            ai.decorateInst(v);
             v->setOrdering(AtomicOrdering::Unordered);
             isnull = ctx.builder.CreateICmpNE(v, Constant::getNullValue(ctx.types().T_prjlvalue));
         }
@@ -4329,8 +5107,8 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
     return mark_julia_type(ctx, isnull, false, jl_bool_type);
 }
 
-static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *varname, jl_value_t *better_typ=NULL) {
-    jl_value_t *typ = better_typ ? better_typ : vi.value.typ;
+static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *varname) {
+    jl_value_t *typ = vi.value.typ;
     jl_cgval_t v;
     Value *isnull = NULL;
     if (vi.boxroot == NULL || vi.pTIndex != NULL) {
@@ -4342,9 +5120,11 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
         else {
             // copy value to a non-mutable (non-volatile SSA) location
             AllocaInst *varslot = cast<AllocaInst>(vi.value.V);
+            setName(ctx.emission_context, varslot, jl_symbol_name(varname));
             Type *T = varslot->getAllocatedType();
             assert(!varslot->isArrayAllocation() && "variables not expected to be VLA");
             AllocaInst *ssaslot = cast<AllocaInst>(varslot->clone());
+            setName(ctx.emission_context, ssaslot, jl_symbol_name(varname) + StringRef(".ssa"));
             ssaslot->insertAfter(varslot);
             if (vi.isVolatile) {
                 Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot,
@@ -4355,7 +5135,7 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
             else {
                 const DataLayout &DL = jl_Module->getDataLayout();
                 uint64_t sz = DL.getTypeStoreSize(T);
-                emit_memcpy(ctx, ssaslot, ctx.tbaa().tbaa_stack, vi.value, sz, ssaslot->getAlign().value());
+                emit_memcpy(ctx, ssaslot, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), vi.value, sz, ssaslot->getAlign().value(), varslot->getAlign().value());
             }
             Value *tindex = NULL;
             if (vi.pTIndex)
@@ -4377,9 +5157,9 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
         maybe_mark_load_dereferenceable(boxed, vi.usedUndef || vi.pTIndex, typ);
         if (vi.pTIndex) {
             // value is either boxed in the stack slot, or unboxed in value
-            // as indicated by testing (pTIndex & 0x80)
+            // as indicated by testing (pTIndex & UNION_BOX_MARKER)
             Value *load_unbox = ctx.builder.CreateICmpEQ(
-                        ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                        ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             if (vi.usedUndef)
                 isnull = ctx.builder.CreateSelect(load_unbox, isnull, box_isnull);
@@ -4397,8 +5177,10 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
                 isnull = box_isnull;
         }
     }
-    if (isnull)
-        undef_var_error_ifnot(ctx, isnull, varname);
+    if (isnull) {
+        setName(ctx.emission_context, isnull, jl_symbol_name(varname) + StringRef("_is_null"));
+        undef_var_error_ifnot(ctx, isnull, varname, (jl_value_t*)jl_local_sym);
+    }
     return v;
 }
 
@@ -4407,14 +5189,13 @@ static jl_cgval_t emit_local(jl_codectx_t &ctx, jl_value_t *slotload)
     size_t sl = jl_slot_number(slotload) - 1;
     jl_varinfo_t &vi = ctx.slots[sl];
     jl_sym_t *sym = slot_symbol(ctx, sl);
-    jl_value_t *typ = NULL;
-    if (jl_typeis(slotload, jl_typedslot_type)) {
-        // use the better type from inference for this load
-        typ = jl_typedslot_get_type(slotload);
-        if (jl_is_typevar(typ))
-            typ = ((jl_tvar_t*)typ)->ub;
+    if (sym == jl_unused_sym) {
+        // This shouldn't happen in well-formed input, but let's be robust,
+        // since we otherwise cause undefined behavior here.
+        emit_error(ctx, "(INTERNAL ERROR): Tried to use `#undef#` argument.");
+        return jl_cgval_t();
     }
-    return emit_varinfo(ctx, vi, sym, typ);
+    return emit_varinfo(ctx, vi, sym);
 }
 
 static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Value *isboxed, jl_cgval_t rval_info)
@@ -4439,13 +5220,14 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
                 }
                 else {
                     Value *dest = vi.value.V;
-                    if (vi.pTIndex)
+                    if (vi.pTIndex) // TODO: use lifetime-end here instead
                         ctx.builder.CreateStore(UndefValue::get(cast<AllocaInst>(vi.value.V)->getAllocatedType()), vi.value.V);
                     Type *store_ty = julia_type_to_llvm(ctx, rval_info.constant ? jl_typeof(rval_info.constant) : rval_info.typ);
                     Type *dest_ty = store_ty->getPointerTo();
                     if (dest_ty != dest->getType())
                         dest = emit_bitcast(ctx, dest, dest_ty);
-                    tbaa_decorate(ctx.tbaa().tbaa_stack, ctx.builder.CreateStore(
+                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                    ai.decorateInst(ctx.builder.CreateStore(
                                       emit_unbox(ctx, store_ty, rval_info, rval_info.typ),
                                       dest,
                                       vi.isVolatile));
@@ -4461,8 +5243,8 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
                 // This check should probably mostly catch the relevant situations.
                 if (vi.value.V != rval_info.V) {
                     Value *copy_bytes = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(vi.value.typ));
-                    emit_memcpy(ctx, vi.value.V, ctx.tbaa().tbaa_stack, rval_info, copy_bytes,
-                                julia_alignment(rval_info.typ), vi.isVolatile);
+                    emit_memcpy(ctx, vi.value.V, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), rval_info, copy_bytes,
+                                julia_alignment(rval_info.typ), julia_alignment(rval_info.typ), vi.isVolatile);
                 }
             }
             else {
@@ -4503,14 +5285,14 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
         if (dest) {
             Instruction *phi = dest->clone();
             phi->insertAfter(dest);
-            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_len(edges), "tindex_phi");
+            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_nrows(edges), "tindex_phi");
             BB->getInstList().insert(InsertPt, Tindex_phi);
-            PHINode *ptr_phi = PHINode::Create(ctx.types().T_prjlvalue, jl_array_len(edges), "ptr_phi");
+            PHINode *ptr_phi = PHINode::Create(ctx.types().T_prjlvalue, jl_array_nrows(edges), "ptr_phi");
             BB->getInstList().insert(InsertPt, ptr_phi);
             Value *isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
-            ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, MaybeAlign(0), nbytes, false);
+            ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, dest->getAlign(), nbytes, false);
             ctx.builder.CreateLifetimeEnd(dest);
             Value *ptr = ctx.builder.CreateSelect(isboxed,
                 maybe_bitcast(ctx, decay_derived(ctx, ptr_phi), getInt8PtrTy(ctx.builder.getContext())),
@@ -4518,17 +5300,17 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
             jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, ctx.tbaa().tbaa_stack); // XXX: this TBAA is wrong for ptr_phi
             val.Vboxed = ptr_phi;
             ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, r));
-            ctx.SAvalues.at(idx) = val;
-            ctx.ssavalue_assigned.at(idx) = true;
+            ctx.SAvalues[idx] = val;
+            ctx.ssavalue_assigned[idx] = true;
             return;
         }
         else if (allunbox) {
-            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_len(edges), "tindex_phi");
+            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_nrows(edges), "tindex_phi");
             BB->getInstList().insert(InsertPt, Tindex_phi);
             jl_cgval_t val = mark_julia_slot(NULL, phiType, Tindex_phi, ctx.tbaa().tbaa_stack);
             ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, (PHINode*)NULL, r));
-            ctx.SAvalues.at(idx) = val;
-            ctx.ssavalue_assigned.at(idx) = true;
+            ctx.SAvalues[idx] = val;
+            ctx.ssavalue_assigned[idx] = true;
             return;
         }
     }
@@ -4537,10 +5319,10 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
     // The frontend should really not emit this, but we allow it
     // for convenience.
     if (type_is_ghost(vtype)) {
-        assert(jl_is_datatype(phiType) && ((jl_datatype_t*)phiType)->instance);
+        assert(jl_is_datatype(phiType) && jl_is_datatype_singleton((jl_datatype_t*)phiType));
         // Skip adding it to the PhiNodes list, since we didn't create one.
-        ctx.SAvalues.at(idx) = mark_julia_const(ctx, ((jl_datatype_t*)phiType)->instance);
-        ctx.ssavalue_assigned.at(idx) = true;
+        ctx.SAvalues[idx] = mark_julia_const(ctx, ((jl_datatype_t*)phiType)->instance);
+        ctx.ssavalue_assigned[idx] = true;
         return;
     }
     jl_cgval_t slot;
@@ -4550,26 +5332,26 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
         // here it's moved into phi in the successor (from dest)
         dest = emit_static_alloca(ctx, vtype);
         Value *phi = emit_static_alloca(ctx, vtype);
-        ctx.builder.CreateMemCpy(phi, MaybeAlign(julia_alignment(phiType)),
-             dest, MaybeAlign(0),
+        ctx.builder.CreateMemCpy(phi, Align(julia_alignment(phiType)),
+             dest, dest->getAlign(),
              jl_datatype_size(phiType), false);
         ctx.builder.CreateLifetimeEnd(dest);
         slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack);
     }
     else {
-        value_phi = PHINode::Create(vtype, jl_array_len(edges), "value_phi");
+        value_phi = PHINode::Create(vtype, jl_array_nrows(edges), "value_phi");
         BB->getInstList().insert(InsertPt, value_phi);
         slot = mark_julia_type(ctx, value_phi, isboxed, phiType);
     }
     ctx.PhiNodes.push_back(std::make_tuple(slot, BB, dest, value_phi, r));
-    ctx.SAvalues.at(idx) = slot;
-    ctx.ssavalue_assigned.at(idx) = true;
+    ctx.SAvalues[idx] = slot;
+    ctx.ssavalue_assigned[idx] = true;
     return;
 }
 
 static void emit_ssaval_assign(jl_codectx_t &ctx, ssize_t ssaidx_0based, jl_value_t *r)
 {
-    assert(!ctx.ssavalue_assigned.at(ssaidx_0based));
+    assert(!ctx.ssavalue_assigned[ssaidx_0based]);
     if (jl_is_phinode(r)) {
         return emit_phinode_assign(ctx, ssaidx_0based, r);
     }
@@ -4595,18 +5377,27 @@ static void emit_ssaval_assign(jl_codectx_t &ctx, ssize_t ssaidx_0based, jl_valu
             }
         }
     }
-    ctx.SAvalues.at(ssaidx_0based) = slot; // now SAvalues[ssaidx_0based] contains the SAvalue
-    ctx.ssavalue_assigned.at(ssaidx_0based) = true;
+    ctx.SAvalues[ssaidx_0based] = slot; // now SAvalues[ssaidx_0based] contains the SAvalue
+    ctx.ssavalue_assigned[ssaidx_0based] = true;
 }
 
-static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t rval_info, jl_value_t *l=NULL)
+static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t rval_info, jl_value_t *l=NULL, bool allow_mismatch=false)
 {
     if (!vi.used || vi.value.typ == jl_bottom_type)
         return;
 
     // convert rval-type to lval-type
     jl_value_t *slot_type = vi.value.typ;
-    rval_info = convert_julia_type(ctx, rval_info, slot_type);
+    // If allow_mismatch is set, type mismatches will not result in traps.
+    // This is used for upsilon nodes, where the destination can have a narrower
+    // type than the store, if inference determines that the store is never read.
+    Value *skip = NULL;
+    rval_info = convert_julia_type(ctx, rval_info, slot_type, &skip);
+    if (!allow_mismatch && skip) {
+        CreateTrap(ctx.builder);
+        return;
+    }
+
     if (rval_info.typ == jl_bottom_type)
         return;
 
@@ -4616,13 +5407,13 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
         if (rval_info.TIndex) {
             tindex = rval_info.TIndex;
             if (!vi.boxroot)
-                tindex = ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
+                tindex = ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), ~UNION_BOX_MARKER));
         }
         else {
             assert(rval_info.isboxed || rval_info.constant);
             tindex = compute_tindex_unboxed(ctx, rval_info, vi.value.typ);
             if (vi.boxroot)
-                tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+                tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
             else
                 rval_info.TIndex = tindex;
         }
@@ -4636,7 +5427,7 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
         if (vi.pTIndex && rval_info.TIndex) {
             ctx.builder.CreateStore(rval_info.TIndex, vi.pTIndex, vi.isVolatile);
             isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(rval_info.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ctx.builder.CreateAnd(rval_info.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             rval = rval_info.Vboxed ? rval_info.Vboxed : Constant::getNullValue(ctx.types().T_prjlvalue);
             assert(rval->getType() == ctx.types().T_prjlvalue);
@@ -4651,8 +5442,13 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
 
     // store unboxed variables
     if (!vi.boxroot || (vi.pTIndex && rval_info.TIndex)) {
-        emit_vi_assignment_unboxed(ctx, vi, isboxed, rval_info);
+        emit_guarded_test(ctx, skip ? ctx.builder.CreateNot(skip) : nullptr, nullptr, [&]{
+            emit_vi_assignment_unboxed(ctx, vi, isboxed, rval_info);
+            return nullptr;
+        });
     }
+
+    return;
 }
 
 static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssize_t ssaval)
@@ -4660,27 +5456,28 @@ static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssi
     assert(!jl_is_ssavalue(l));
     jl_cgval_t rval_info = emit_expr(ctx, r, ssaval);
 
-    if (jl_is_slot(l)) {
+    if (jl_is_slotnumber(l)) {
         int sl = jl_slot_number(l) - 1;
         // it's a local variable
         jl_varinfo_t &vi = ctx.slots[sl];
-        return emit_varinfo_assign(ctx, vi, rval_info, l);
+        emit_varinfo_assign(ctx, vi, rval_info, l);
+        return;
     }
 
-    jl_binding_t *bnd = NULL;
-    Value *bp = NULL;
-    if (jl_is_symbol(l))
-        bp = global_binding_pointer(ctx, ctx.module, (jl_sym_t*)l, &bnd, true);
+    jl_module_t *mod;
+    jl_sym_t *sym;
+    if (jl_is_symbol(l)) {
+        mod = ctx.module;
+        sym = (jl_sym_t*)l;
+    }
     else {
         assert(jl_is_globalref(l));
-        bp = global_binding_pointer(ctx, jl_globalref_mod(l), jl_globalref_name(l), &bnd, true);
+        mod = jl_globalref_mod(l);
+        sym = jl_globalref_name(l);
     }
-    if (bp != NULL) {
-        emit_globalset(ctx, bnd, bp, rval_info, AtomicOrdering::Unordered);
-        // Global variable. Does not need debug info because the debugger knows about
-        // its memory location.
-    }
-    return;
+    emit_globalset(ctx, mod, sym, rval_info, AtomicOrdering::Release);
+    // Global variable. Does not need debug info because the debugger knows about
+    // its memory location.
 }
 
 static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
@@ -4695,15 +5492,17 @@ static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
     // upsilon node is not dynamically observed.
     if (val) {
         jl_cgval_t rval_info = emit_expr(ctx, val);
-        if (rval_info.typ == jl_bottom_type)
+        if (rval_info.typ == jl_bottom_type) {
             // as a special case, PhiC nodes are allowed to use undefined
             // values, since they are just copy operations, so we need to
             // ignore the store (it will not by dynamically observed), while
             // normally, for any other operation result, we'd assume this store
             // was unreachable and dead
             val = NULL;
-        else
-            emit_varinfo_assign(ctx, vi, rval_info);
+        }
+        else {
+            emit_varinfo_assign(ctx, vi, rval_info, NULL, true);
+        }
     }
     if (!val) {
         if (vi.boxroot) {
@@ -4715,7 +5514,7 @@ static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
             // does need to satisfy the union invariants (i.e. inbounds
             // tindex).
             ctx.builder.CreateAlignedStore(
-                vi.boxroot ? ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80) :
+                vi.boxroot ? ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER) :
                              ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x01),
                 vi.pTIndex, Align(1), true);
         }
@@ -4734,7 +5533,7 @@ static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
 
 static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, const jl_cgval_t &fexpr, jl_value_t *rt, jl_svec_t *argt);
 
-static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const std::string &msg)
+static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const Twine &msg)
 {
     bool isbool = (condV.typ == (jl_value_t*)jl_bool_type);
     if (!isbool) {
@@ -4745,19 +5544,18 @@ static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const s
         emit_typecheck(ctx, condV, (jl_value_t*)jl_bool_type, msg);
     }
     if (isbool) {
-        Value *cond = emit_unbox(ctx, getInt8Ty(ctx.builder.getContext()), condV, (jl_value_t*)jl_bool_type);
-        assert(cond->getType() == getInt8Ty(ctx.builder.getContext()));
-        return ctx.builder.CreateXor(ctx.builder.CreateTrunc(cond, getInt1Ty(ctx.builder.getContext())), ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1));
+        Value *cond = emit_unbox(ctx, getInt1Ty(ctx.builder.getContext()), condV, (jl_value_t*)jl_bool_type);
+        return ctx.builder.CreateNot(cond);
     }
     if (condV.isboxed) {
         return ctx.builder.CreateICmpEQ(boxed(ctx, condV),
             track_pjlvalue(ctx, literal_pointer_val(ctx, jl_false)));
     }
-    // not a boolean
-    return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0); // TODO: replace with Undef
+    // not a boolean (unreachable dead code)
+    return UndefValue::get(getInt1Ty(ctx.builder.getContext()));
 }
 
-static Value *emit_condition(jl_codectx_t &ctx, jl_value_t *cond, const std::string &msg)
+static Value *emit_condition(jl_codectx_t &ctx, jl_value_t *cond, const Twine &msg)
 {
     return emit_condition(ctx, emit_expr(ctx, cond), msg);
 }
@@ -4766,7 +5564,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
 {
     if (jl_is_ssavalue(expr) && ssaval_result == -1)
         return; // value not used, no point in attempting codegen for it
-    if (jl_is_slot(expr) && ssaval_result == -1) {
+    if (jl_is_slotnumber(expr) && ssaval_result == -1) {
         size_t sl = jl_slot_number(expr) - 1;
         jl_varinfo_t &vi = ctx.slots[sl];
         if (vi.usedUndef)
@@ -4778,7 +5576,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
     }
     if (jl_is_newvarnode(expr)) {
         jl_value_t *var = jl_fieldref(expr, 0);
-        assert(jl_is_slot(var));
+        assert(jl_is_slotnumber(var));
         jl_varinfo_t &vi = ctx.slots[jl_slot_number(var)-1];
         if (vi.usedUndef) {
             // create a new uninitialized variable
@@ -4796,7 +5594,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
         return;
     }
     jl_expr_t *ex = (jl_expr_t*)expr;
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
     jl_sym_t *head = ex->head;
     if (head == jl_meta_sym || head == jl_inbounds_sym || head == jl_coverageeffect_sym
             || head == jl_aliasscope_sym || head == jl_popaliasscope_sym || head == jl_inline_sym || head == jl_noinline_sym) {
@@ -4805,13 +5603,37 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
         return;
     }
     else if (head == jl_leave_sym) {
-        assert(jl_is_long(args[0]));
+        int hand_n_leave = 0;
+        Value *scope_to_restore = nullptr;
+        Value *scope_ptr = nullptr;
+        for (size_t i = 0; i < jl_expr_nargs(ex); ++i) {
+            jl_value_t *arg = args[i];
+            if (arg == jl_nothing)
+                continue;
+            assert(jl_is_ssavalue(arg));
+            size_t enter_idx = ((jl_ssavalue_t*)arg)->id - 1;
+            jl_value_t *enter_stmt = jl_array_ptr_ref(ctx.code, enter_idx);
+            if (enter_stmt == jl_nothing)
+                continue;
+            if (ctx.scope_restore.count(enter_idx))
+                std::tie(scope_to_restore, scope_ptr) = ctx.scope_restore[enter_idx];
+            if (jl_enternode_catch_dest(enter_stmt)) {
+                // We're not actually setting up the exception frames for these, so
+                // we don't need to exit them.
+                hand_n_leave += 1;
+            }
+        }
         ctx.builder.CreateCall(prepare_call(jlleave_func),
-                           ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_unbox_long(args[0])));
+                           ConstantInt::get(getInt32Ty(ctx.builder.getContext()), hand_n_leave));
+        if (scope_to_restore) {
+            jl_aliasinfo_t scope_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+            scope_ai.decorateInst(
+                ctx.builder.CreateAlignedStore(scope_to_restore, scope_ptr, ctx.types().alignof_ptr));
+        }
     }
     else if (head == jl_pop_exception_sym) {
         jl_cgval_t excstack_state = emit_expr(ctx, jl_exprarg(expr, 0));
-        assert(excstack_state.V && excstack_state.V->getType() == getSizeTy(ctx.builder.getContext()));
+        assert(excstack_state.V && excstack_state.V->getType() == ctx.types().T_size);
         ctx.builder.CreateCall(prepare_call(jl_restore_excstack_func), excstack_state.V);
         return;
     }
@@ -4825,8 +5647,7 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
 {
     jl_svec_t *sig_args = NULL;
     jl_value_t *sigtype = NULL;
-    jl_code_info_t *ir = NULL;
-    JL_GC_PUSH3(&sig_args, &sigtype, &ir);
+    JL_GC_PUSH2(&sig_args, &sigtype);
 
     size_t nsig = 1 + jl_svec_len(argt_typ->parameters);
     sig_args = jl_alloc_svec_uninit(nsig);
@@ -4834,25 +5655,38 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
     for (size_t i = 0; i < jl_svec_len(argt_typ->parameters); ++i) {
         jl_svecset(sig_args, 1+i, jl_svecref(argt_typ->parameters, i));
     }
-    sigtype = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(sig_args), nsig);
+    sigtype = jl_apply_tuple_type_v(jl_svec_data(sig_args), nsig);
+
+    jl_method_instance_t *mi = jl_specializations_get_linfo(closure_method, sigtype, jl_emptysvec);
+    jl_code_instance_t *ci = (jl_code_instance_t*)jl_rettype_inferred_addr(mi, ctx.world, ctx.world);
+
+    if (ci == NULL || (jl_value_t*)ci == jl_nothing) {
+        JL_GC_POP();
+        return std::make_pair((Function*)NULL, (Function*)NULL);
+    }
+    auto inferred = jl_atomic_load_relaxed(&ci->inferred);
+    if (!inferred || inferred == jl_nothing) {
+        JL_GC_POP();
+        return std::make_pair((Function*)NULL, (Function*)NULL);
+    }
 
-    jl_method_instance_t *mi = jl_specializations_get_linfo(closure_method, sigtype, jl_emptysvec);
-    jl_code_instance_t *ci = (jl_code_instance_t*)jl_rettype_inferred(mi, ctx.world, ctx.world);
+    auto it = ctx.emission_context.compiled_functions.find(ci);
 
-    if (ci == NULL || (jl_value_t*)ci == jl_nothing || ci->inferred == NULL || ci->inferred == jl_nothing) {
+    if (it == ctx.emission_context.compiled_functions.end()) {
+        ++EmittedOpaqueClosureFunctions;
+        jl_code_info_t *ir = jl_uncompress_ir(closure_method, ci, (jl_value_t*)inferred);
+        JL_GC_PUSH1(&ir);
+        // TODO: Emit this inline and outline it late using LLVM's coroutine support.
+        orc::ThreadSafeModule closure_m = jl_create_ts_module(
+                name_from_method_instance(mi), ctx.emission_context.tsctx,
+                jl_Module->getDataLayout(), Triple(jl_Module->getTargetTriple()));
+        jl_llvm_functions_t closure_decls = emit_function(closure_m, mi, ir, rettype, ctx.emission_context);
         JL_GC_POP();
-        return std::make_pair((Function*)NULL, (Function*)NULL);
+        it = ctx.emission_context.compiled_functions.insert(std::make_pair(ci, std::make_pair(std::move(closure_m), std::move(closure_decls)))).first;
     }
-    ++EmittedOpaqueClosureFunctions;
 
-    ir = jl_uncompress_ir(closure_method, ci, (jl_array_t*)ci->inferred);
-
-    // TODO: Emit this inline and outline it late using LLVM's coroutine support.
-    orc::ThreadSafeModule closure_m = jl_create_llvm_module(
-            name_from_method_instance(mi), ctx.emission_context.tsctx,
-            ctx.emission_context.imaging,
-            jl_Module->getDataLayout(), Triple(jl_Module->getTargetTriple()));
-    jl_llvm_functions_t closure_decls = emit_function(closure_m, mi, ir, rettype, ctx.emission_context);
+    auto &closure_m = it->second.first;
+    auto &closure_decls = it->second.second;
 
     assert(closure_decls.functionObject != "jl_fptr_sparam");
     bool isspecsig = closure_decls.functionObject != "jl_fptr_args";
@@ -4867,7 +5701,8 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
         F = Function::Create(get_func_sig(ctx.builder.getContext()),
                              Function::ExternalLinkage,
                              fname, jl_Module);
-        jl_init_function(F);
+        jl_init_function(F, ctx.emission_context.TargetTriple);
+        jl_name_jlfunc_args(ctx.emission_context, F);
         F->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), F->getAttributes()}));
     }
     Function *specF = NULL;
@@ -4877,12 +5712,11 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
         //emission context holds context lock so can get module
         specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject);
         if (specF) {
-            jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module,
-                closure_decls.specFunctionObject, sigtype, rettype, true);
-            specF = returninfo.decl;
+            jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, NULL,
+                closure_decls.specFunctionObject, sigtype, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg));
+            specF = cast<Function>(returninfo.decl.getCallee());
         }
     }
-    ctx.oc_modules.push_back(std::move(closure_m));
     JL_GC_POP();
     return std::make_pair(F, specF);
 }
@@ -4893,20 +5727,20 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
 {
     if (jl_is_symbol(expr)) {
         jl_sym_t *sym = (jl_sym_t*)expr;
-        return emit_global(ctx, sym);
+        return emit_globalref(ctx, ctx.module, sym, AtomicOrdering::Unordered);
     }
-    if (jl_is_slot(expr) || jl_is_argument(expr)) {
+    if (jl_is_slotnumber(expr) || jl_is_argument(expr)) {
         return emit_local(ctx, expr);
     }
     if (jl_is_ssavalue(expr)) {
         ssize_t idx = ((jl_ssavalue_t*)expr)->id - 1;
         assert(idx >= 0);
-        if (!ctx.ssavalue_assigned.at(idx)) {
-            ctx.ssavalue_assigned.at(idx) = true; // (assignment, not comparison test)
+        if (!ctx.ssavalue_assigned[idx]) {
+            ctx.ssavalue_assigned[idx] = true; // (assignment, not comparison test)
             return jl_cgval_t(); // dead code branch
         }
         else {
-            return ctx.SAvalues.at(idx); // at this point, SAvalues[idx] actually contains the SAvalue
+            return ctx.SAvalues[idx]; // at this point, SAvalues[idx] actually contains the SAvalue
         }
     }
     if (jl_is_globalref(expr)) {
@@ -4926,40 +5760,17 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         return convert_julia_type(ctx, emit_expr(ctx, jl_fieldref_noalloc(expr, 0)), jl_fieldref_noalloc(expr, 1), &skip);
     }
     if (!jl_is_expr(expr)) {
-        int needroot = true;
-        if (jl_is_quotenode(expr)) {
-            expr = jl_fieldref_noalloc(expr,0);
-        }
-        // numeric literals
-        if (jl_is_int32(expr)) {
-            int32_t val = jl_unbox_int32(expr);
-            if ((uint32_t)(val+512) < 1024) {
-                // this can be gotten from the box cache
-                needroot = false;
-                expr = jl_box_int32(val);
-            }
-        }
-        else if (jl_is_int64(expr)) {
-            uint64_t val = jl_unbox_uint64(expr);
-            if ((uint64_t)(val+512) < 1024) {
-                // this can be gotten from the box cache
-                needroot = false;
-                expr = jl_box_int64(val);
-            }
-        }
-        else if (jl_is_uint8(expr)) {
-            expr = jl_box_uint8(jl_unbox_uint8(expr));
-            needroot = false;
-        }
-        if (needroot && jl_is_method(ctx.linfo->def.method)) { // toplevel exprs and some integers are already rooted
-            jl_add_method_root(ctx, expr);
-        }
-        return mark_julia_const(ctx, expr);
+        jl_value_t *val = expr;
+        if (jl_is_quotenode(expr))
+            val = jl_fieldref_noalloc(expr, 0);
+        if (jl_is_method(ctx.linfo->def.method)) // toplevel exprs are already rooted
+            val = jl_ensure_rooted(ctx, val);
+        return mark_julia_const(ctx, val);
     }
 
     jl_expr_t *ex = (jl_expr_t*)expr;
-    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
-    size_t nargs = jl_array_len(ex->args);
+    jl_value_t **args = jl_array_data(ex->args, jl_value_t*);
+    size_t nargs = jl_array_nrows(ex->args);
     jl_sym_t *head = ex->head;
     // this is object-disoriented.
     // however, this is a good way to do it because it should *not* be easy
@@ -4977,7 +5788,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                 literal_pointer_val(ctx, jl_undefref_exception));
         }
         else {
-            undef_var_error_ifnot(ctx, cond, var);
+            undef_var_error_ifnot(ctx, cond, var, (jl_value_t*)jl_local_sym);
         }
         return ghostValue(ctx, jl_nothing_type);
     }
@@ -5006,7 +5817,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         jl_cgval_t res = emit_call(ctx, ex, expr_t, is_promotable);
         // some intrinsics (e.g. typeassert) can return a wider type
         // than what's actually possible
-        if (is_promotable && res.promotion_point) {
+        if (is_promotable && res.promotion_point && res.promotion_ssa == -1) {
             res.promotion_ssa = ssaidx_0based;
         }
         res = update_julia_type(ctx, res, expr_t);
@@ -5035,9 +5846,9 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     else if (head == jl_method_sym) {
         if (nargs == 1) {
             jl_value_t *mn = args[0];
-            assert(jl_is_symbol(mn) || jl_is_slot(mn));
+            assert(jl_is_symbol(mn) || jl_is_slotnumber(mn));
 
-            Value *bp = NULL, *name, *bp_owner = Constant::getNullValue(ctx.types().T_pjlvalue);
+            Value *bp = NULL, *name;
             jl_binding_t *bnd = NULL;
             bool issym = jl_is_symbol(mn);
             bool isglobalref = !issym && jl_is_globalref(mn);
@@ -5055,29 +5866,28 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                 }
                 JL_CATCH {
                     jl_value_t *e = jl_current_exception();
-                    // errors. boo. root it somehow :(
-                    bnd = jl_get_binding_wr(ctx.module, (jl_sym_t*)jl_gensym(), 1);
-                    bnd->value = e;
-                    bnd->constp = 1;
+                    // errors. boo. :(
+                    JL_GC_PUSH1(&e);
+                    e = jl_as_global_root(e, 1);
+                    JL_GC_POP();
                     raise_exception(ctx, literal_pointer_val(ctx, e));
                     return ghostValue(ctx, jl_nothing_type);
                 }
                 bp = julia_binding_gv(ctx, bnd);
                 bp = julia_binding_pvalue(ctx, bp);
-                bp_owner = literal_pointer_val(ctx, (jl_value_t*)mod);
             }
-            else if (jl_is_slot(mn) || jl_is_argument(mn)) {
+            else if (jl_is_slotnumber(mn) || jl_is_argument(mn)) {
+                // XXX: eval_methoddef does not have this code branch
                 int sl = jl_slot_number(mn)-1;
                 jl_varinfo_t &vi = ctx.slots[sl];
                 bp = vi.boxroot;
                 name = literal_pointer_val(ctx, (jl_value_t*)slot_symbol(ctx, sl));
             }
             if (bp) {
-                Value *mdargs[5] = { name, literal_pointer_val(ctx, (jl_value_t*)mod), bp,
-                                    bp_owner, literal_pointer_val(ctx, bnd) };
+                Value *mdargs[] = { name, literal_pointer_val(ctx, (jl_value_t*)mod), bp, literal_pointer_val(ctx, bnd) };
                 jl_cgval_t gf = mark_julia_type(
                         ctx,
-                        ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), makeArrayRef(mdargs)),
+                        ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), ArrayRef<Value*>(mdargs)),
                         true,
                         jl_function_type);
                 return gf;
@@ -5096,7 +5906,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         };
         jl_cgval_t meth = mark_julia_type(
             ctx,
-            ctx.builder.CreateCall(prepare_call(jlmethod_func), makeArrayRef(mdargs)),
+            ctx.builder.CreateCall(prepare_call(jlmethod_func), ArrayRef<Value*>(mdargs)),
             true,
             jl_method_type);
         return meth;
@@ -5113,7 +5923,8 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             jl_binding_t *bnd = NULL;
             Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true);
             if (bp)
-                ctx.builder.CreateCall(prepare_call(jldeclareconst_func), bp);
+                ctx.builder.CreateCall(prepare_call(jldeclareconst_func),
+                        { bp, literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym) });
         }
     }
     else if (head == jl_new_sym) {
@@ -5122,7 +5933,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             is_promotable = ctx.ssavalue_usecount[ssaidx_0based] == 1;
         }
         assert(nargs > 0);
-        jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+        SmallVector<jl_cgval_t, 0> argv(nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
         }
@@ -5131,12 +5942,12 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                 jl_is_datatype(jl_tparam0(ty)) &&
                 jl_is_concrete_type(jl_tparam0(ty))) {
             assert(nargs <= jl_datatype_nfields(jl_tparam0(ty)) + 1);
-            jl_cgval_t res = emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, &argv[1], is_promotable);
+            jl_cgval_t res = emit_new_struct(ctx, jl_tparam0(ty), nargs - 1, argv.data() + 1, is_promotable);
             if (is_promotable && res.promotion_point && res.promotion_ssa==-1)
                 res.promotion_ssa = ssaidx_0based;
             return res;
         }
-        Value *val = emit_jlcall(ctx, jlnew_func, nullptr, argv, nargs, julia_call);
+        Value *val = emit_jlcall(ctx, jlnew_func, nullptr, argv.data(), nargs, julia_call);
         // temporarily mark as `Any`, expecting `emit_ssaval_assign` to update
         // it to the inferred type.
         return mark_julia_type(ctx, val, true, (jl_value_t*)jl_any_type);
@@ -5166,7 +5977,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         if (source.constant == NULL) {
             // For now, we require non-constant source to be handled by using
             // eval. This should probably be a verifier error and an abort here.
-            emit_error(ctx, "(internal error) invalid IR: opaque closure source be constant");
+            emit_error(ctx, "(internal error) invalid IR: opaque closure source must be constant");
             return jl_cgval_t();
         }
         bool can_optimize = argt.constant != NULL && lb.constant != NULL && ub.constant != NULL &&
@@ -5177,35 +5988,33 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
 
         if (can_optimize) {
             jl_value_t *closure_t = NULL;
-            jl_tupletype_t *env_t = NULL;
+            jl_value_t *env_t = NULL;
             JL_GC_PUSH2(&closure_t, &env_t);
 
-            jl_value_t **env_component_ts = (jl_value_t**)alloca(sizeof(jl_value_t*) * (nargs-4));
+            SmallVector<jl_value_t *, 0> env_component_ts(nargs-4);
             for (size_t i = 0; i < nargs - 4; ++i) {
                 env_component_ts[i] = argv[4+i].typ;
             }
 
-            env_t = jl_apply_tuple_type_v(env_component_ts, nargs-4);
+            env_t = jl_apply_tuple_type_v(env_component_ts.data(), nargs-4);
             // we need to know the full env type to look up the right specialization
-            if (jl_is_concrete_type((jl_value_t*)env_t)) {
+            if (jl_is_concrete_type(env_t)) {
                 jl_tupletype_t *argt_typ = (jl_tupletype_t*)argt.constant;
                 Function *F, *specF;
-                std::tie(F, specF) = get_oc_function(ctx, (jl_method_t*)source.constant, env_t, argt_typ, ub.constant);
+                std::tie(F, specF) = get_oc_function(ctx, (jl_method_t*)source.constant, (jl_tupletype_t*)env_t, argt_typ, ub.constant);
                 if (F) {
                     jl_cgval_t jlcall_ptr = mark_julia_type(ctx, F, false, jl_voidpointer_type);
-                    jl_cgval_t world_age = mark_julia_type(ctx,
-                                      tbaa_decorate(ctx.tbaa().tbaa_gcframe,
-                                      ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), get_last_age_field(ctx), Align(sizeof(size_t)))),
-                        false,
-                        jl_long_type);
+                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+                    Instruction *I = ctx.builder.CreateAlignedLoad(ctx.types().T_size, get_last_age_field(ctx), ctx.types().alignof_ptr);
+                    jl_cgval_t world_age = mark_julia_type(ctx, ai.decorateInst(I), false, jl_long_type);
                     jl_cgval_t fptr;
                     if (specF)
                         fptr = mark_julia_type(ctx, specF, false, jl_voidpointer_type);
                     else
-                        fptr = mark_julia_type(ctx, (llvm::Value*)Constant::getNullValue(getSizeTy(ctx.builder.getContext())), false, jl_voidpointer_type);
+                        fptr = mark_julia_type(ctx, Constant::getNullValue(ctx.types().T_size), false, jl_voidpointer_type);
 
                     // TODO: Inline the env at the end of the opaque closure and generate a descriptor for GC
-                    jl_cgval_t env = emit_new_struct(ctx, (jl_value_t*)env_t, nargs-4, &argv.data()[4]);
+                    jl_cgval_t env = emit_new_struct(ctx, env_t, nargs-4, &argv.data()[4]);
 
                     jl_cgval_t closure_fields[5] = {
                         env,
@@ -5248,43 +6057,43 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
     }
     else if (head == jl_loopinfo_sym) {
         // parse Expr(:loopinfo, "julia.simdloop", ("llvm.loop.vectorize.width", 4))
+        // to LLVM LoopID
         SmallVector<Metadata *, 8> MDs;
+
+        // Reserve first location for self reference to the LoopID metadata node.
+        TempMDTuple TempNode = MDNode::getTemporary(ctx.builder.getContext(), None);
+        MDs.push_back(TempNode.get());
+
         for (int i = 0, ie = nargs; i < ie; ++i) {
             Metadata *MD = to_md_tree(args[i], ctx.builder.getContext());
             if (MD)
                 MDs.push_back(MD);
         }
 
-        MDNode* MD = MDNode::get(ctx.builder.getContext(), MDs);
-        CallInst *I = ctx.builder.CreateCall(prepare_call(jl_loopinfo_marker_func));
-        I->setMetadata("julia.loopinfo", MD);
+        ctx.LoopID = MDNode::getDistinct(ctx.builder.getContext(), MDs);
+        // Replace the temporary node with a self-reference.
+        ctx.LoopID->replaceOperandWith(0, ctx.LoopID);
         return jl_cgval_t();
     }
     else if (head == jl_leave_sym || head == jl_coverageeffect_sym
-            || head == jl_pop_exception_sym || head == jl_enter_sym || head == jl_inbounds_sym
+            || head == jl_pop_exception_sym || head == jl_inbounds_sym
             || head == jl_aliasscope_sym || head == jl_popaliasscope_sym || head == jl_inline_sym || head == jl_noinline_sym) {
         jl_errorf("Expr(:%s) in value position", jl_symbol_name(head));
     }
     else if (head == jl_boundscheck_sym) {
-        return mark_julia_const(ctx, bounds_check_enabled(ctx, jl_true) ? jl_true : jl_false);
+        jl_value_t *def = (nargs == 0) ? jl_true : args[0];
+        return mark_julia_const(ctx, bounds_check_enabled(ctx, def) ? jl_true : jl_false);
     }
     else if (head == jl_gc_preserve_begin_sym) {
-        jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+        SmallVector<jl_cgval_t, 0> argv(nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
         }
-        std::vector<Value*> vals;
+        SmallVector<Value*, 0> vals;
         for (size_t i = 0; i < nargs; ++i) {
-            const jl_cgval_t &ai = argv[i];
-            if (ai.constant || ai.typ == jl_bottom_type)
-                continue;
-            if (ai.isboxed) {
-                vals.push_back(ai.Vboxed);
-            }
-            else if (jl_is_concrete_immutable(ai.typ) && !jl_is_pointerfree(ai.typ)) {
-                Type *at = julia_type_to_llvm(ctx, ai.typ);
-                vals.push_back(emit_unbox(ctx, at, ai, ai.typ));
-            }
+            Value *gc_root = get_gc_root_for(ctx, argv[i]);
+            if (gc_root)
+                vals.push_back(gc_root);
         }
         Value *token = vals.empty()
             ? (Value*)ConstantTokenNone::get(ctx.builder.getContext())
@@ -5327,27 +6136,24 @@ JL_GCC_IGNORE_STOP
 // --- generate function bodies ---
 
 // gc frame emission
-static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0)
+static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0, bool or_new=false)
 {
     // allocate a placeholder gc instruction
     // this will require the runtime, but it gets deleted later if unused
-    ctx.topalloca = ctx.builder.CreateCall(prepare_call(jlpgcstack_func));
+    ctx.topalloca = ctx.builder.CreateCall(prepare_call(or_new ? jladoptthread_func : jlpgcstack_func));
     ctx.pgcstack = ctx.topalloca;
+    ctx.pgcstack->setName("pgcstack");
 }
 
 static Value *get_current_task(jl_codectx_t &ctx)
 {
-    const int ptls_offset = offsetof(jl_task_t, gcstack);
-    return ctx.builder.CreateInBoundsGEP(
-        ctx.types().T_pjlvalue, emit_bitcast(ctx, ctx.pgcstack, ctx.types().T_ppjlvalue),
-        ConstantInt::get(getSizeTy(ctx.builder.getContext()), -(ptls_offset / sizeof(void *))),
-        "current_task");
+    return get_current_task_from_pgcstack(ctx.builder, ctx.types().T_size, ctx.pgcstack);
 }
 
 // Get PTLS through current task.
 static Value *get_current_ptls(jl_codectx_t &ctx)
 {
-    return get_current_ptls_from_task(ctx.builder, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
+    return get_current_ptls_from_task(ctx.builder, ctx.types().T_size, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
 }
 
 // Get the address of the world age of the current task
@@ -5355,19 +6161,20 @@ static Value *get_last_age_field(jl_codectx_t &ctx)
 {
     Value *ct = get_current_task(ctx);
     return ctx.builder.CreateInBoundsGEP(
-            getSizeTy(ctx.builder.getContext()),
-            ctx.builder.CreateBitCast(ct, getSizePtrTy(ctx.builder.getContext())),
-            ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_task_t, world_age) / sizeof(size_t)),
+            ctx.types().T_size,
+            ctx.builder.CreateBitCast(ct, ctx.types().T_size->getPointerTo()),
+            ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, world_age) / ctx.types().sizeof_ptr),
             "world_age");
 }
 
-// Get signal page through current task.
-static Value *get_current_signal_page(jl_codectx_t &ctx)
+static Value *get_scope_field(jl_codectx_t &ctx)
 {
-    // return ctx.builder.CreateCall(prepare_call(reuse_signal_page_func));
-    Value *ptls = get_current_ptls(ctx);
-    int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
-    return emit_nthptr_recast(ctx, ptls, nthfield, ctx.tbaa().tbaa_const, getSizePtrTy(ctx.builder.getContext()));
+    Value *ct = get_current_task(ctx);
+    return ctx.builder.CreateInBoundsGEP(
+            ctx.types().T_prjlvalue,
+            ctx.builder.CreateBitCast(ct, ctx.types().T_prjlvalue->getPointerTo()),
+            ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, scope) / ctx.types().sizeof_ptr),
+            "current_scope");
 }
 
 static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_codegen_params_t &params)
@@ -5375,11 +6182,12 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     ++EmittedToJLInvokes;
     jl_codectx_t ctx(M->getContext(), params);
     std::string name;
-    raw_string_ostream(name) << "tojlinvoke" << globalUniqueGeneratedNames++;
+    raw_string_ostream(name) << "tojlinvoke" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
     Function *f = Function::Create(ctx.types().T_jlfunc,
             GlobalVariable::InternalLinkage,
             name, M);
-    jl_init_function(f);
+    jl_init_function(f, params.TargetTriple);
+    jl_name_jlfunc_args(params, f);
     //f->setAlwaysInline();
     ctx.f = f; // for jl_Module
     BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", f);
@@ -5387,7 +6195,9 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     Function *theFunc;
     Value *theFarg;
     auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-    if (params.cache && invoke != NULL) {
+    bool cache_valid = params.cache;
+
+    if (cache_valid && invoke != NULL) {
         StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, codeinst);
         theFunc = cast<Function>(
             M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(ctx.builder.getContext())).getCallee());
@@ -5415,7 +6225,7 @@ static Type *get_unionbytes_type(LLVMContext &C, unsigned unionbytes) {
 
 static void emit_cfunc_invalidate(
         Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
-        jl_value_t *calltype, jl_value_t *rettype,
+        jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
         size_t nargs,
         jl_codegen_params_t &params,
         Function *target)
@@ -5429,29 +6239,42 @@ static void emit_cfunc_invalidate(
     DebugLoc noDbg;
     ctx.builder.SetCurrentDebugLocation(noDbg);
     allocate_gc_frame(ctx, b0);
-
     Function::arg_iterator AI = gf_thunk->arg_begin();
-    jl_cgval_t *myargs = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    SmallVector<jl_cgval_t, 0> myargs(nargs);
     if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union)
         ++AI;
     if (return_roots)
         ++AI;
+    if (JL_FEAT_TEST(ctx,gcstack_arg)){
+        ++AI; // gcstack_arg
+    }
     for (size_t i = 0; i < nargs; i++) {
         jl_value_t *jt = jl_nth_slot_type(calltype, i);
-        bool isboxed = deserves_argbox(jt);
-        Type *et = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
+        // n.b. specTypes is required to be a datatype by construction for specsig
+        bool isboxed = false;
+        Type *et;
+        if (i == 0 && is_for_opaque_closure) {
+            et = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
+        }
+        else if (deserves_argbox(jt)) {
+            et = ctx.types().T_prjlvalue;
+            isboxed = true;
+        }
+        else {
+            et = julia_type_to_llvm(ctx, jt);
+        }
         if (is_uniquerep_Type(jt)) {
             myargs[i] = mark_julia_const(ctx, jl_tparam0(jt));
         }
         else if (type_is_ghost(et)) {
-            assert(jl_is_datatype(jt) && ((jl_datatype_t*)jt)->instance);
+            assert(jl_is_datatype(jt) && jl_is_datatype_singleton((jl_datatype_t*)jt));
             myargs[i] = mark_julia_const(ctx, ((jl_datatype_t*)jt)->instance);
         }
         else {
             Value *arg_v = &*AI;
             ++AI;
             Type *at = arg_v->getType();
-            if (!isboxed && et->isAggregateType()) {
+            if ((i == 0 && is_for_opaque_closure) || (!isboxed && et->isAggregateType())) {
                 myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const);
             }
             else {
@@ -5462,7 +6285,7 @@ static void emit_cfunc_invalidate(
         }
     }
     assert(AI == gf_thunk->arg_end());
-    Value *gf_ret = emit_jlcall(ctx, target, nullptr, myargs, nargs, julia_call);
+    Value *gf_ret = emit_jlcall(ctx, target, nullptr, myargs.data(), nargs, julia_call);
     jl_cgval_t gf_retbox = mark_julia_type(ctx, gf_ret, true, jl_any_type);
     if (cc != jl_returninfo_t::Boxed) {
         emit_typecheck(ctx, gf_retbox, rettype, "cfunction");
@@ -5490,15 +6313,16 @@ static void emit_cfunc_invalidate(
             root1 = ctx.builder.CreateConstInBoundsGEP2_32(get_returnroots_type(ctx, return_roots), root1, 0, 0);
             ctx.builder.CreateStore(gf_ret, root1);
         }
-        emit_memcpy(ctx, &*gf_thunk->arg_begin(), nullptr, gf_ret, nullptr, jl_datatype_size(rettype), julia_alignment(rettype));
+        emit_memcpy(ctx, &*gf_thunk->arg_begin(), jl_aliasinfo_t::fromTBAA(ctx, nullptr), gf_ret,
+                    jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), julia_alignment(rettype), julia_alignment(rettype));
         ctx.builder.CreateRetVoid();
         break;
     }
     case jl_returninfo_t::Union: {
         Type *retty = gf_thunk->getReturnType();
         Value *gf_retval = UndefValue::get(retty);
-        Value *tindex = compute_box_tindex(ctx, emit_typeof_boxed(ctx, gf_retbox), (jl_value_t*)jl_any_type, rettype);
-        tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+        Value *tindex = compute_box_tindex(ctx, emit_typeof(ctx, gf_retbox, false, true), (jl_value_t*)jl_any_type, rettype);
+        tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
         gf_retval = ctx.builder.CreateInsertValue(gf_retval, gf_ret, 0);
         gf_retval = ctx.builder.CreateInsertValue(gf_retval, tindex, 1);
         ctx.builder.CreateRet(gf_retval);
@@ -5514,11 +6338,11 @@ static void emit_cfunc_invalidate(
 
 static void emit_cfunc_invalidate(
         Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
-        jl_value_t *calltype, jl_value_t *rettype,
+        jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
         size_t nargs,
         jl_codegen_params_t &params)
 {
-    emit_cfunc_invalidate(gf_thunk, cc, return_roots, calltype, rettype, nargs, params,
+    emit_cfunc_invalidate(gf_thunk, cc, return_roots, calltype, rettype, is_for_opaque_closure, nargs, params,
         prepare_call_in(gf_thunk->getParent(), jlapplygeneric_func));
 }
 
@@ -5546,18 +6370,27 @@ static Function* gen_cfun_wrapper(
     if (lam && params.cache) {
         // TODO: this isn't ideal to be unconditionally calling type inference (and compile) from here
         codeinst = jl_compile_method_internal(lam, world);
-        assert(codeinst->invoke);
-        if (codeinst->invoke == jl_fptr_args_addr) {
-            callptr = codeinst->specptr.fptr;
+        auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
+        auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+        assert(invoke);
+        if (fptr) {
+            while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                jl_cpu_pause();
+            }
+            invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+        }
+        // WARNING: this invoke load is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
+        if (invoke == jl_fptr_args_addr) {
+            callptr = fptr;
             calltype = 1;
         }
-        else if (codeinst->invoke == jl_fptr_const_return_addr) {
+        else if (invoke == jl_fptr_const_return_addr) {
             // don't need the fptr
             callptr = (void*)codeinst->rettype_const;
             calltype = 2;
         }
-        else if (codeinst->isspecsig) {
-            callptr = codeinst->specptr.fptr;
+        else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
+            callptr = fptr;
             calltype = 3;
         }
         astrt = codeinst->rettype;
@@ -5571,7 +6404,7 @@ static Function* gen_cfun_wrapper(
     }
 
     std::string funcName;
-    raw_string_ostream(funcName) << "jlcapi_" << name << "_" << globalUniqueGeneratedNames++;
+    raw_string_ostream(funcName) << "jlcapi_" << name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
 
     Module *M = into; // Safe because ctx lock is held by params
     AttributeList attributes = sig.attributes;
@@ -5579,21 +6412,16 @@ static Function* gen_cfun_wrapper(
     if (nest) {
         // add nest parameter (pointer to jl_value_t* data array) after sret arg
         assert(closure_types);
-        std::vector<Type*> fargt_sig(sig.fargt_sig);
+        SmallVector<Type*, 0> fargt_sig(sig.fargt_sig.begin(), sig.fargt_sig.end());
 
         fargt_sig.insert(fargt_sig.begin() + sig.sret, JuliaType::get_pprjlvalue_ty(M->getContext()));
 
         // Shift LLVM attributes for parameters one to the right, as
         // we are adding the extra nest parameter after sret arg.
-        std::vector<std::pair<unsigned, AttributeSet>> newAttributes;
+        SmallVector<std::pair<unsigned, AttributeSet>, 0> newAttributes;
         newAttributes.reserve(attributes.getNumAttrSets() + 1);
-#if JL_LLVM_VERSION >= 140000
         auto it = *attributes.indexes().begin();
         const auto it_end = *attributes.indexes().end();
-#else
-        auto it = attributes.index_begin();
-        const auto it_end = attributes.index_end();
-#endif
 
         // Skip past FunctionIndex
         if (it == AttributeList::AttrIndex::FunctionIndex) {
@@ -5608,11 +6436,7 @@ static Function* gen_cfun_wrapper(
         }
 
         // Add the new nest attribute
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder attrBuilder(M->getContext());
-#else
-        AttrBuilder attrBuilder;
-#endif
         attrBuilder.addAttribute(Attribute::Nest);
         newAttributes.emplace_back(it, AttributeSet::get(M->getContext(), attrBuilder));
 
@@ -5641,7 +6465,7 @@ static Function* gen_cfun_wrapper(
     Function *cw = Function::Create(functype,
             GlobalVariable::ExternalLinkage,
             funcName, M);
-    jl_init_function(cw);
+    jl_init_function(cw, params.TargetTriple);
     cw->setAttributes(AttributeList::get(M->getContext(), {attributes, cw->getAttributes()}));
 
     jl_codectx_t ctx(M->getContext(), params);
@@ -5654,39 +6478,27 @@ static Function* gen_cfun_wrapper(
     ctx.builder.SetInsertPoint(b0);
     DebugLoc noDbg;
     ctx.builder.SetCurrentDebugLocation(noDbg);
-    allocate_gc_frame(ctx, b0);
+    allocate_gc_frame(ctx, b0, true);
 
-    Value *dummy_world = ctx.builder.CreateAlloca(getSizeTy(ctx.builder.getContext()));
-    Value *have_tls = ctx.builder.CreateIsNotNull(ctx.pgcstack);
-    // TODO: in the future, initialize a full TLS context here
     Value *world_age_field = get_last_age_field(ctx);
-    world_age_field = ctx.builder.CreateSelect(have_tls, world_age_field, dummy_world);
-    Value *last_age = tbaa_decorate(ctx.tbaa().tbaa_gcframe,
-            ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), world_age_field, Align(sizeof(size_t))));
-    Value *last_gc_state = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), JL_GC_STATE_SAFE);
-    last_gc_state = emit_guarded_test(ctx, have_tls, last_gc_state, [&] {
-        return emit_gc_unsafe_enter(ctx);
-    });
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+    Value *last_age = ai.decorateInst(
+            ctx.builder.CreateAlignedLoad(ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
 
-    Value *world_v = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()),
-        prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
+    Value *world_v = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
+        prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
     cast<LoadInst>(world_v)->setOrdering(AtomicOrdering::Acquire);
 
     Value *age_ok = NULL;
     if (calltype) {
         LoadInst *lam_max = ctx.builder.CreateAlignedLoad(
-                getSizeTy(ctx.builder.getContext()),
+                ctx.types().T_size,
                 ctx.builder.CreateConstInBoundsGEP1_32(
-                    getSizeTy(ctx.builder.getContext()),
-                    emit_bitcast(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), getSizePtrTy(ctx.builder.getContext())),
-                    offsetof(jl_code_instance_t, max_world) / sizeof(size_t)),
-                Align(sizeof(size_t)));
-        // XXX: age is always OK if we don't have a TLS. This is a hack required due to `@threadcall` abuse.
-        // and adds quite a bit of complexity here, even though it's still wrong
-        // (anything that tries to interact with the runtime will fault)
+                    ctx.types().T_size,
+                    emit_bitcast(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), ctx.types().T_size->getPointerTo()),
+                    offsetof(jl_code_instance_t, max_world) / ctx.types().sizeof_ptr),
+                ctx.types().alignof_ptr);
         age_ok = ctx.builder.CreateICmpUGE(lam_max, world_v);
-        world_v = ctx.builder.CreateSelect(ctx.builder.CreateOr(have_tls, age_ok), world_v, lam_max);
-        age_ok = ctx.builder.CreateOr(ctx.builder.CreateNot(have_tls), age_ok);
     }
     ctx.builder.CreateStore(world_v, world_age_field);
 
@@ -5694,7 +6506,7 @@ static Function* gen_cfun_wrapper(
     Function::arg_iterator AI = cw->arg_begin();
     Value *sretPtr = sig.sret ? &*AI++ : NULL;
     Value *nestPtr = nest ? &*AI++ : NULL;
-    jl_cgval_t *inputargs = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * (nargs + 1));
+    SmallVector<jl_cgval_t, 0> inputargs(nargs + 1);
     if (ff) {
         // we need to pass the function object even if (even though) it is a singleton
         inputargs[0] = mark_julia_const(ctx, ff);
@@ -5711,7 +6523,7 @@ static Function* gen_cfun_wrapper(
     for (size_t i = 0; i < nargs; ++i, ++AI) {
         // figure out how to unpack this argument type
         Value *val = &*AI;
-        assert(sig.fargt_sig.at(i + sig.sret) == val->getType());
+        assert(sig.fargt_sig[i + sig.sret] == val->getType());
         jl_cgval_t &inputarg = inputargs[i + 1];
         jl_value_t *jargty = jl_svecref(sig.at, i);
         bool aref = jl_is_abstract_ref_type(jargty);
@@ -5766,14 +6578,14 @@ static Function* gen_cfun_wrapper(
                     *closure_types = jl_alloc_vec_any(0);
                 jl_array_ptr_1d_push(*closure_types, jargty);
                 Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                        ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_len(*closure_types)),
+                        ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_nrows(*closure_types)),
                         Align(sizeof(void*)));
                 BasicBlock *boxedBB = BasicBlock::Create(ctx.builder.getContext(), "isboxed", cw);
                 BasicBlock *loadBB = BasicBlock::Create(ctx.builder.getContext(), "need-load", cw);
                 BasicBlock *unboxedBB = BasicBlock::Create(ctx.builder.getContext(), "maybe-unboxed", cw);
                 BasicBlock *isanyBB = BasicBlock::Create(ctx.builder.getContext(), "any", cw);
                 BasicBlock *afterBB = BasicBlock::Create(ctx.builder.getContext(), "after", cw);
-                Value *isrtboxed = ctx.builder.CreateIsNull(val);
+                Value *isrtboxed = ctx.builder.CreateIsNull(val); // XXX: this is the wrong condition and should be inspecting runtime_dt instead
                 ctx.builder.CreateCondBr(isrtboxed, boxedBB, loadBB);
                 ctx.builder.SetInsertPoint(boxedBB);
                 Value *p1 = ctx.builder.CreateBitCast(val, ctx.types().T_pjlvalue);
@@ -5800,7 +6612,7 @@ static Function* gen_cfun_wrapper(
             }
         }
         else {
-            bool argboxed = sig.fargt_isboxed.at(i);
+            bool argboxed = sig.fargt_isboxed[i];
             if (argboxed) {
                 // a jl_value_t*, even when represented as a struct
                 inputarg = mark_julia_type(ctx, val, true, jargty_proper);
@@ -5809,7 +6621,7 @@ static Function* gen_cfun_wrapper(
                 // something of type T
                 // undo whatever we might have done to this poor argument
                 assert(jl_is_datatype(jargty));
-                if (sig.byRefList.at(i)) {
+                if (sig.byRefList[i]) {
                     val = ctx.builder.CreateAlignedLoad(sig.fargt[i], val, Align(1)); // unknown alignment from C
                 }
                 else {
@@ -5833,7 +6645,7 @@ static Function* gen_cfun_wrapper(
                         *closure_types = jl_alloc_vec_any(0);
                     jl_array_ptr_1d_push(*closure_types, jargty);
                     Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                            ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_len(*closure_types)),
+                            ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_nrows(*closure_types)),
                             Align(sizeof(void*)));
                     Value *strct = box_ccall_result(ctx, val, runtime_dt, jargty);
                     inputarg = mark_julia_type(ctx, strct, true, jargty_proper);
@@ -5849,6 +6661,7 @@ static Function* gen_cfun_wrapper(
     jl_cgval_t retval;
     if (calltype == 2) {
         nargs = 0; // arguments not needed -- TODO: not really true, should emit an age_ok test and jlcall
+        (void)nargs; // silence unused variable warning
         jlfunc_sret = false;
         retval = mark_julia_const(ctx, (jl_value_t*)callptr);
     }
@@ -5862,7 +6675,8 @@ static Function* gen_cfun_wrapper(
             if (!theFptr) {
                 theFptr = Function::Create(ctx.types().T_jlfunc, GlobalVariable::ExternalLinkage,
                                            fname, jl_Module);
-                jl_init_function(theFptr);
+                jl_init_function(theFptr, ctx.emission_context.TargetTriple);
+                jl_name_jlfunc_args(ctx.emission_context, theFptr);
                 addRetAttr(theFptr, Attribute::NonNull);
             }
             else {
@@ -5885,7 +6699,7 @@ static Function* gen_cfun_wrapper(
             ctx.builder.CreateBr(b_after);
             ctx.builder.SetInsertPoint(b_generic);
         }
-        Value *ret = emit_jlcall(ctx, jlapplygeneric_func, NULL, inputargs, nargs + 1, julia_call);
+        Value *ret = emit_jlcall(ctx, jlapplygeneric_func, NULL, inputargs.data(), nargs + 1, julia_call);
         if (age_ok) {
             ctx.builder.CreateBr(b_after);
             ctx.builder.SetInsertPoint(b_after);
@@ -5900,14 +6714,15 @@ static Function* gen_cfun_wrapper(
         bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
         assert(calltype == 3);
         // emit a specsig call
+        bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
         StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst);
-        jl_returninfo_t returninfo = get_specsig_function(ctx, M, protoname, lam->specTypes, astrt, is_opaque_closure);
-        FunctionType *cft = returninfo.decl->getFunctionType();
+        jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure, gcstack_arg);
+        FunctionType *cft = returninfo.decl.getFunctionType();
         jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet);
 
         // TODO: Can use use emit_call_specfun_other here?
-        std::vector<Value*> args;
-        Value *result;
+        SmallVector<Value*, 0> args;
+        Value *result = nullptr;
         if (jlfunc_sret || returninfo.cc == jl_returninfo_t::Union) {
             // fuse the two sret together, or emit an alloca to hold it
             if (sig.sret && jlfunc_sret) {
@@ -5915,10 +6730,12 @@ static Function* gen_cfun_wrapper(
             }
             else {
                 if (jlfunc_sret) {
-                    result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.decl->getAttributes(), 1, Attribute::StructRet).getValueAsType());
+                    result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType());
+                    setName(ctx.emission_context, result, "sret");
                     assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
                 } else {
                     result = emit_static_alloca(ctx, get_unionbytes_type(ctx.builder.getContext(), returninfo.union_bytes));
+                    setName(ctx.emission_context, result, "result_union");
                     assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
                 }
             }
@@ -5926,14 +6743,18 @@ static Function* gen_cfun_wrapper(
         }
         if (returninfo.return_roots) {
             AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots));
+            setName(ctx.emission_context, return_roots, "return_roots");
             args.push_back(return_roots);
         }
+        if (gcstack_arg)
+            args.push_back(ctx.pgcstack);
         for (size_t i = 0; i < nargs + 1; i++) {
             // figure out how to repack the arguments
             jl_cgval_t &inputarg = inputargs[i];
             Value *arg;
             jl_value_t *spect = (i == 0 && is_opaque_closure) ? (jl_value_t*)jl_any_type :
                 jl_nth_slot_type(lam->specTypes, i);
+            // n.b. specTypes is required to be a datatype by construction for specsig
             bool isboxed = deserves_argbox(spect);
             Type *T = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, spect);
             if (is_uniquerep_Type(spect)) {
@@ -5959,25 +6780,29 @@ static Function* gen_cfun_wrapper(
             // add to argument list
             args.push_back(arg);
         }
-        Value *theFptr = returninfo.decl;
+        Value *theFptr = returninfo.decl.getCallee();
         assert(theFptr);
         if (age_ok) {
             funcName += "_gfthunk";
-            Function *gf_thunk = Function::Create(returninfo.decl->getFunctionType(),
+            Function *gf_thunk = Function::Create(returninfo.decl.getFunctionType(),
                     GlobalVariable::InternalLinkage, funcName, M);
-            jl_init_function(gf_thunk);
-            gf_thunk->setAttributes(AttributeList::get(M->getContext(), {returninfo.decl->getAttributes(), gf_thunk->getAttributes()}));
+            jl_init_function(gf_thunk, ctx.emission_context.TargetTriple);
+            gf_thunk->setAttributes(AttributeList::get(M->getContext(), {returninfo.attrs, gf_thunk->getAttributes()}));
             // build a  specsig -> jl_apply_generic converter thunk
             // this builds a method that calls jl_apply_generic (as a closure over a singleton function pointer),
             // but which has the signature of a specsig
-            emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, nargs + 1, ctx.emission_context);
+            emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context);
             theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk);
         }
-        assert(cast<PointerType>(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl->getFunctionType()));
+
+        assert(cast<PointerType>(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl.getFunctionType()));
         CallInst *call = ctx.builder.CreateCall(
-            cast<FunctionType>(returninfo.decl->getFunctionType()),
+            returninfo.decl.getFunctionType(),
             theFptr, ArrayRef<Value*>(args));
-        call->setAttributes(returninfo.decl->getAttributes());
+        call->setAttributes(returninfo.attrs);
+        if (gcstack_arg)
+            call->setCallingConv(CallingConv::Swift);
+
         switch (returninfo.cc) {
             case jl_returninfo_t::Boxed:
                 retval = mark_julia_type(ctx, call, true, astrt);
@@ -5993,7 +6818,7 @@ static Function* gen_cfun_wrapper(
                 Value *tindex = ctx.builder.CreateExtractValue(call, 1);
                 Value *derived = ctx.builder.CreateSelect(
                     ctx.builder.CreateICmpEQ(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                             ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
                     decay_derived(ctx, ctx.builder.CreateBitCast(result, ctx.types().T_pjlvalue)),
                     decay_derived(ctx, box));
@@ -6028,8 +6853,6 @@ static Function* gen_cfun_wrapper(
     }
     else if (!type_is_ghost(sig.lrt)) {
         Type *prt = sig.prt;
-        if (sig.sret)
-            prt = sig.fargt_sig[0]->getContainedType(0); // sret is a PointerType
         bool issigned = jl_signed_type && jl_subtype(declrt, (jl_value_t*)jl_signed_type);
         Value *v = emit_unbox(ctx, sig.lrt, retval, retval.typ);
         r = llvm_type_rewrite(ctx, v, prt, issigned);
@@ -6043,12 +6866,6 @@ static Function* gen_cfun_wrapper(
     }
 
     ctx.builder.CreateStore(last_age, world_age_field);
-    if (!sig.retboxed) {
-        emit_guarded_test(ctx, have_tls, nullptr, [&] {
-            emit_gc_unsafe_leave(ctx, last_gc_state);
-            return nullptr;
-        });
-    }
     ctx.builder.CreateRet(r);
 
     ctx.builder.SetCurrentDebugLocation(noDbg);
@@ -6065,7 +6882,9 @@ static Function* gen_cfun_wrapper(
                 FunctionType::get(getInt8PtrTy(ctx.builder.getContext()), { getInt8PtrTy(ctx.builder.getContext()), ctx.types().T_ppjlvalue }, false),
                 GlobalVariable::ExternalLinkage,
                 funcName, M);
-        jl_init_function(cw_make);
+        jl_init_function(cw_make, ctx.emission_context.TargetTriple);
+        cw_make->getArg(0)->setName("wrapper");
+        cw_make->getArg(1)->setName("newval");
         BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", cw_make);
         IRBuilder<> cwbuilder(b0);
         Function::arg_iterator AI = cw_make->arg_begin();
@@ -6133,7 +6952,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         return jl_cgval_t();
     }
     if (rt != declrt && rt != (jl_value_t*)jl_any_type)
-        jl_add_method_root(ctx, rt);
+        rt = jl_ensure_rooted(ctx, rt);
 
     function_sig_t sig("cfunction", lrt, rt, retboxed, argt, unionall_env, false, CallingConv::C, false, &ctx.emission_context);
     assert(sig.fargt.size() + sig.sret == sig.fargt_sig.size());
@@ -6170,19 +6989,19 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         sigt = NULL;
     }
     else {
-        sigt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)sigt);
+        sigt = jl_apply_tuple_type((jl_svec_t*)sigt, 1);
     }
     if (sigt && !(unionall_env && jl_has_typevar_from_unionall(rt, unionall_env))) {
         unionall_env = NULL;
     }
 
     bool nest = (!fexpr_rt.constant || unionall_env);
-#if defined(_CPU_AARCH64_) || defined(_CPU_ARM_) || defined(_CPU_PPC64_)
-    if (nest) {
-        emit_error(ctx, "cfunction: closures are not supported on this platform");
-        return jl_cgval_t();
+    if (ctx.emission_context.TargetTriple.isAArch64() || ctx.emission_context.TargetTriple.isARM() || ctx.emission_context.TargetTriple.isPPC64()) {
+        if (nest) {
+            emit_error(ctx, "cfunction: closures are not supported on this platform");
+            return jl_cgval_t();
+        }
     }
-#endif
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
     size_t min_valid = 0;
     size_t max_valid = ~(size_t)0;
@@ -6201,14 +7020,16 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         jl_svec_t *fill = jl_emptysvec;
         if (closure_types) {
             assert(ctx.spvals_ptr);
-            size_t n = jl_array_len(closure_types);
-            jl_svec_t *fill = jl_alloc_svec_uninit(n);
+            size_t n = jl_array_nrows(closure_types);
+            jl_svec_t *fill_i = jl_alloc_svec_uninit(n);
             for (size_t i = 0; i < n; i++) {
-                jl_svecset(fill, i, jl_array_ptr_ref(closure_types, i));
+                jl_svecset(fill_i, i, jl_array_ptr_ref(closure_types, i));
             }
-            jl_add_method_root(ctx, (jl_value_t*)fill);
+            JL_GC_PUSH1(&fill_i);
+            fill = (jl_svec_t*)jl_ensure_rooted(ctx, (jl_value_t*)fill_i);
+            JL_GC_POP();
         }
-        Type *T_htable = ArrayType::get(getSizeTy(ctx.builder.getContext()), sizeof(htable_t) / sizeof(void*));
+        Type *T_htable = ArrayType::get(ctx.types().T_size, sizeof(htable_t) / sizeof(void*));
         Value *cache = new GlobalVariable(*jl_Module, T_htable, false,
                                GlobalVariable::PrivateLinkage,
                                ConstantAggregateZero::get(T_htable));
@@ -6224,22 +7045,23 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
         outboxed = true;
     }
     else {
-        F = ctx.builder.CreatePtrToInt(F, getSizeTy(ctx.builder.getContext()));
+        F = ctx.builder.CreatePtrToInt(F, ctx.types().T_size);
         outboxed = (output_type != (jl_value_t*)jl_voidpointer_type);
         if (outboxed) {
             assert(jl_datatype_size(output_type) == sizeof(void*) * 4);
-            Value *strct = emit_allocobj(ctx, jl_datatype_size(output_type),
-                                         literal_pointer_val(ctx, (jl_value_t*)output_type));
-            Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), getSizePtrTy(ctx.builder.getContext()));
+            Value *strct = emit_allocobj(ctx, (jl_datatype_t*)output_type, true);
+            setName(ctx.emission_context, strct, "cfun_result");
+            Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), ctx.types().T_size->getPointerTo());
             MDNode *tbaa = best_tbaa(ctx.tbaa(), output_type);
-            tbaa_decorate(tbaa, ctx.builder.CreateStore(F, derived_strct));
-            tbaa_decorate(tbaa, ctx.builder.CreateStore(
-                ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, fexpr_rt.constant), getSizeTy(ctx.builder.getContext())),
-                ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()), derived_strct, 1)));
-            tbaa_decorate(tbaa, ctx.builder.CreateStore(Constant::getNullValue(getSizeTy(ctx.builder.getContext())),
-                    ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()), derived_strct, 2)));
-            tbaa_decorate(tbaa, ctx.builder.CreateStore(Constant::getNullValue(getSizeTy(ctx.builder.getContext())),
-                    ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()), derived_strct, 3)));
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+            ai.decorateInst(ctx.builder.CreateStore(F, derived_strct));
+            ai.decorateInst(ctx.builder.CreateStore(
+                ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, fexpr_rt.constant), ctx.types().T_size),
+                ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, derived_strct, 1)));
+            ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(ctx.types().T_size),
+                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, derived_strct, 2)));
+            ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(ctx.types().T_size),
+                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, derived_strct, 3)));
             F = strct;
         }
     }
@@ -6286,6 +7108,10 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi
                 int found = jl_dlsym(sysimg_handle, name, &addr, 0);
                 if (found)
                     add_named_global(name, addr);
+                else {
+                    err = jl_get_exceptionf(jl_errorexception_type, "%s not found in sysimg", name);
+                    jl_throw(err);
+                }
             }
             else {
                 jl_method_instance_t *lam = jl_get_specialization1((jl_tupletype_t*)sigt, world, &min_valid, &max_valid, 0);
@@ -6306,8 +7132,11 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
 {
     ++GeneratedInvokeWrappers;
     Function *w = Function::Create(get_func_sig(M->getContext()), GlobalVariable::ExternalLinkage, funcName, M);
-    jl_init_function(w);
+    jl_init_function(w, params.TargetTriple);
+    jl_name_jlfunc_args(params, w);
     w->setAttributes(AttributeList::get(M->getContext(), {get_func_attrs(M->getContext()), w->getAttributes()}));
+    w->addFnAttr(Attribute::OptimizeNone);
+    w->addFnAttr(Attribute::NoInline);
     Function::arg_iterator AI = w->arg_begin();
     Value *funcArg = &*AI++;
     Value *argArray = &*AI++;
@@ -6328,9 +7157,9 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     allocate_gc_frame(ctx, b0);
 
     // TODO: replace this with emit_call_specfun_other?
-    FunctionType *ftype = f.decl->getFunctionType();
+    FunctionType *ftype = const_cast<llvm::FunctionCallee&>(f.decl).getFunctionType();
     size_t nfargs = ftype->getNumParams();
-    Value **args = (Value**) alloca(nfargs * sizeof(Value*));
+    SmallVector<Value *, 0> args(nfargs);
     unsigned idx = 0;
     AllocaInst *result = NULL;
     switch (f.cc) {
@@ -6339,8 +7168,9 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     case jl_returninfo_t::Ghosts:
         break;
     case jl_returninfo_t::SRet:
-        assert(cast<PointerType>(ftype->getParamType(0))->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(f.decl->getAttributes(), 1, Attribute::StructRet).getValueAsType()));
-        result = ctx.builder.CreateAlloca(getAttributeAtIndex(f.decl->getAttributes(), 1, Attribute::StructRet).getValueAsType());
+        assert(cast<PointerType>(ftype->getParamType(0))->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType()));
+        result = ctx.builder.CreateAlloca(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType());
+        setName(ctx.emission_context, result, "sret");
         args[idx] = result;
         idx++;
         break;
@@ -6350,29 +7180,42 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
             result->setAlignment(Align(f.union_align));
         args[idx] = result;
         idx++;
+        setName(ctx.emission_context, result, "result_union");
         break;
     }
     if (f.return_roots) {
         AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, f.return_roots));
+        setName(ctx.emission_context, return_roots, "return_roots");
         args[idx] = return_roots;
         idx++;
     }
-
+    bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
+    if (gcstack_arg) {
+        args[idx] = ctx.pgcstack;
+        idx++;
+    }
     bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
     for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) {
         jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type :
             jl_nth_slot_type(lam->specTypes, i);
+        // n.b. specTypes is required to be a datatype by construction for specsig
         bool isboxed = deserves_argbox(ty);
         Type *lty = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, ty);
         if (type_is_ghost(lty) || is_uniquerep_Type(ty))
             continue;
         Value *theArg;
         if (i == 0) {
-            theArg = funcArg;
+            // This function adapts from generic jlcall to OC specsig. Generic jlcall pointers
+            // come in as ::Tracked, but specsig expected ::Derived.
+            if (is_opaque_closure)
+                theArg = decay_derived(ctx, funcArg);
+            else
+                theArg = funcArg;
         }
         else {
             Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
-            theArg = tbaa_decorate(ctx.tbaa().tbaa_const, maybe_mark_load_dereferenceable(
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+            theArg = ai.decorateInst(maybe_mark_load_dereferenceable(
                     ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
                     false,
                     ty));
@@ -6382,13 +7225,14 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
             if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers
                 theArg = ctx.builder.CreateAlignedLoad(lty, theArg, Align(julia_alignment(ty)));
         }
-        assert(dyn_cast<UndefValue>(theArg) == NULL);
+        assert(!isa<UndefValue>(theArg));
         args[idx] = theArg;
         idx++;
     }
-    CallInst *call = ctx.builder.CreateCall(f.decl, ArrayRef<Value*>(&args[0], nfargs));
-    call->setAttributes(f.decl->getAttributes());
-
+    CallInst *call = ctx.builder.CreateCall(f.decl, args);
+    call->setAttributes(f.attrs);
+    if (gcstack_arg)
+        call->setCallingConv(CallingConv::Swift);
     jl_cgval_t retval;
     if (retarg != -1) {
         Value *theArg;
@@ -6427,14 +7271,14 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
         }
     }
     ctx.builder.CreateRet(boxed(ctx, retval));
-    assert(!ctx.roots);
     return w;
 }
 
-static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure)
+static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg, BitVector *used_arguments, size_t *arg_offset)
 {
     jl_returninfo_t props = {};
     SmallVector<Type*, 8> fsig;
+    SmallVector<std::string, 4> argnames;
     Type *rt = NULL;
     Type *srt = NULL;
     if (jlrettype == (jl_value_t*)jl_bottom_type) {
@@ -6452,8 +7296,9 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
             props.cc = jl_returninfo_t::Union;
             Type *AT = ArrayType::get(getInt8Ty(ctx.builder.getContext()), props.union_bytes);
             fsig.push_back(AT->getPointerTo());
+            argnames.push_back("union_bytes_return");
             Type *pair[] = { ctx.types().T_prjlvalue, getInt8Ty(ctx.builder.getContext()) };
-            rt = StructType::get(ctx.builder.getContext(), makeArrayRef(pair));
+            rt = StructType::get(ctx.builder.getContext(), ArrayRef<Type*>(pair));
         }
         else if (allunbox) {
             props.cc = jl_returninfo_t::Ghosts;
@@ -6468,12 +7313,15 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
         rt = julia_type_to_llvm(ctx, jlrettype, &retboxed);
         assert(!retboxed);
         if (rt != getVoidTy(ctx.builder.getContext()) && deserves_sret(jlrettype, rt)) {
-            auto tracked = CountTrackedPointers(rt);
+            auto tracked = CountTrackedPointers(rt, true);
             assert(!tracked.derived);
             if (tracked.count && !tracked.all)
                 props.return_roots = tracked.count;
             props.cc = jl_returninfo_t::SRet;
-            fsig.push_back(rt->getPointerTo());
+            // sret is always passed from alloca
+            assert(M);
+            fsig.push_back(rt->getPointerTo(M->getDataLayout().getAllocaAddrSpace()));
+            argnames.push_back("sret_return");
             srt = rt;
             rt = getVoidTy(ctx.builder.getContext());
         }
@@ -6488,11 +7336,7 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
     SmallVector<AttributeSet, 8> attrs; // function declaration attributes
     if (props.cc == jl_returninfo_t::SRet) {
         assert(srt);
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext());
-#else
-        AttrBuilder param;
-#endif
         param.addStructRetAttr(srt);
         param.addAttribute(Attribute::NoAlias);
         param.addAttribute(Attribute::NoCapture);
@@ -6501,11 +7345,7 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
         assert(fsig.size() == 1);
     }
     if (props.cc == jl_returninfo_t::Union) {
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext());
-#else
-        AttrBuilder param;
-#endif
         param.addAttribute(Attribute::NoAlias);
         param.addAttribute(Attribute::NoCapture);
         param.addAttribute(Attribute::NoUndef);
@@ -6514,34 +7354,47 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
     }
 
     if (props.return_roots) {
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext());
-#else
-        AttrBuilder param;
-#endif
         param.addAttribute(Attribute::NoAlias);
         param.addAttribute(Attribute::NoCapture);
         param.addAttribute(Attribute::NoUndef);
         attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
         fsig.push_back(get_returnroots_type(ctx, props.return_roots)->getPointerTo(0));
+        argnames.push_back("return_roots");
     }
 
-    for (size_t i = 0; i < jl_nparams(sig); i++) {
+    if (gcstack_arg){
+        AttrBuilder param(ctx.builder.getContext());
+        param.addAttribute(Attribute::SwiftSelf);
+        param.addAttribute(Attribute::NonNull);
+        attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
+        fsig.push_back(PointerType::get(JuliaType::get_ppjlvalue_ty(ctx.builder.getContext()), 0));
+        argnames.push_back("pgcstack_arg");
+    }
+
+    if (arg_offset)
+        *arg_offset = fsig.size();
+    size_t nparams = jl_nparams(sig);
+    if (used_arguments)
+        used_arguments->resize(nparams);
+
+    for (size_t i = 0; i < nparams; i++) {
         jl_value_t *jt = jl_tparam(sig, i);
+        bool isboxed = false;
+        Type *ty = NULL;
         if (i == 0 && is_opaque_closure) {
-            jt = (jl_value_t*)jl_any_type;
+            ty = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
+            isboxed = true; // true-ish anyway - we might not have the type tag
+        }
+        else {
+            if (is_uniquerep_Type(jt))
+                continue;
+            isboxed = deserves_argbox(jt);
+            ty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
         }
-        if (is_uniquerep_Type(jt))
-            continue;
-        bool isboxed = deserves_argbox(jt);
-        Type *ty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
         if (type_is_ghost(ty))
             continue;
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext());
-#else
-        AttrBuilder param;
-#endif
         if (ty->isAggregateType()) { // aggregate types are passed by pointer
             param.addAttribute(Attribute::NoCapture);
             param.addAttribute(Attribute::ReadOnly);
@@ -6557,6 +7410,8 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
         }
         attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
         fsig.push_back(ty);
+        if (used_arguments)
+            used_arguments->set(i);
     }
 
     AttributeSet FnAttrs;
@@ -6566,17 +7421,43 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
     else if (rt == ctx.types().T_prjlvalue)
         RetAttrs = RetAttrs.addAttribute(ctx.builder.getContext(), Attribute::NonNull);
     AttributeList attributes = AttributeList::get(ctx.builder.getContext(), FnAttrs, RetAttrs, attrs);
+
     FunctionType *ftype = FunctionType::get(rt, fsig, false);
-    Function *f = M ? cast_or_null<Function>(M->getNamedValue(name)) : NULL;
-    if (f == NULL) {
-        f = Function::Create(ftype, GlobalVariable::ExternalLinkage, name, M);
-        jl_init_function(f);
-        f->setAttributes(AttributeList::get(f->getContext(), {attributes, f->getAttributes()}));
+    if (fval == NULL) {
+        Function *f = M ? cast_or_null<Function>(M->getNamedValue(name)) : NULL;
+        if (f == NULL) {
+            f = Function::Create(ftype, GlobalVariable::ExternalLinkage, name, M);
+            jl_init_function(f, ctx.emission_context.TargetTriple);
+            if (ctx.emission_context.debug_level >= 2) {
+                ios_t sigbuf;
+                ios_mem(&sigbuf, 0);
+                jl_static_show_func_sig((JL_STREAM*) &sigbuf, sig);
+                f->setAttributes(AttributeList::get(f->getContext(), {attributes.addFnAttribute(ctx.builder.getContext(),"julia.fsig", StringRef(sigbuf.buf, sigbuf.size)), f->getAttributes()}));
+                ios_close(&sigbuf);
+            } else
+                f->setAttributes(AttributeList::get(f->getContext(), {attributes, f->getAttributes()}));
+        }
+        else {
+            assert(f->getFunctionType() == ftype);
+        }
+        fval = f;
     }
     else {
-        assert(f->getFunctionType() == ftype);
+        if (fval->getType()->isIntegerTy())
+            fval = emit_inttoptr(ctx, fval, ftype->getPointerTo());
+        else
+            fval = emit_bitcast(ctx, fval, ftype->getPointerTo());
+    }
+    if (auto F = dyn_cast<Function>(fval)) {
+        if (gcstack_arg)
+            F->setCallingConv(CallingConv::Swift);
+        assert(F->arg_size() >= argnames.size());
+        for (size_t i = 0; i < argnames.size(); i++) {
+            F->getArg(i)->setName(argnames[i]);
+        }
     }
-    props.decl = f;
+    props.decl = FunctionCallee(ftype, fval);
+    props.attrs = attributes;
     return props;
 }
 
@@ -6592,7 +7473,7 @@ static DISubroutineType *
 get_specsig_di(jl_codectx_t &ctx, jl_debugcache_t &debuginfo, jl_value_t *rt, jl_value_t *sig, DIBuilder &dbuilder)
 {
     size_t nargs = jl_nparams(sig); // TODO: if this is a Varargs function, our debug info for the `...` var may be misleading
-    std::vector<Metadata*> ditypes(nargs + 1);
+    SmallVector<Metadata*, 0> ditypes(nargs + 1);
     ditypes[0] = julia_type_to_di(ctx, debuginfo, rt, &dbuilder, false);
     for (size_t i = 0; i < nargs; i++) {
         jl_value_t *jt = jl_tparam(sig, i);
@@ -6601,6 +7482,7 @@ get_specsig_di(jl_codectx_t &ctx, jl_debugcache_t &debuginfo, jl_value_t *rt, jl
     return dbuilder.createSubroutineType(dbuilder.getOrCreateTypeArray(ditypes));
 }
 
+/* aka Core.Compiler.tuple_tfunc */
 static jl_datatype_t *compute_va_type(jl_method_instance_t *lam, size_t nreq)
 {
     size_t nvargs = jl_nparams(lam->specTypes)-nreq;
@@ -6608,13 +7490,38 @@ static jl_datatype_t *compute_va_type(jl_method_instance_t *lam, size_t nreq)
     JL_GC_PUSH1(&tupargs);
     for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) {
         jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
+        // n.b. specTypes is required to be a datatype by construction for specsig
+        if (is_uniquerep_Type(argType))
+            argType = jl_typeof(jl_tparam0(argType));
+        else if (jl_has_intersect_type_not_kind(argType)) {
+            jl_value_t *ts[2] = {argType, (jl_value_t*)jl_type_type};
+            argType = jl_type_union(ts, 2);
+        }
         jl_svecset(tupargs, i-nreq, argType);
     }
-    jl_datatype_t *typ = jl_apply_tuple_type(tupargs);
+    jl_value_t *typ = jl_apply_tuple_type(tupargs, 1);
     JL_GC_POP();
-    return typ;
+    return (jl_datatype_t*)typ;
 }
 
+static std::string get_function_name(bool specsig, bool needsparams, const char *unadorned_name, const Triple &TargetTriple)
+{
+    std::string _funcName;
+    raw_string_ostream funcName(_funcName);
+    // try to avoid conflicts in the global symbol table
+    if (specsig)
+        funcName << "julia_"; // api 5
+    else if (needsparams)
+        funcName << "japi3_";
+    else
+        funcName << "japi1_";
+    if (TargetTriple.isOSLinux()) {
+        if (unadorned_name[0] == '@')
+            unadorned_name++;
+    }
+    funcName << unadorned_name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
+    return funcName.str();
+}
 
 // Compile to LLVM IR, using a specialized signature if applicable.
 static jl_llvm_functions_t
@@ -6630,8 +7537,9 @@ static jl_llvm_functions_t
     jl_llvm_functions_t declarations;
     jl_codectx_t ctx(*params.tsctx.getContext(), params);
     jl_datatype_t *vatyp = NULL;
-    JL_GC_PUSH3(&ctx.code, &ctx.roots, &vatyp);
+    JL_GC_PUSH2(&ctx.code, &vatyp);
     ctx.code = src->code;
+    ctx.source = src;
 
     std::map<int, BasicBlock*> labels;
     bool toplevel = false;
@@ -6654,13 +7562,12 @@ static jl_llvm_functions_t
     }
     ctx.nReqArgs = nreq;
     if (va) {
-        jl_sym_t *vn = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, ctx.nargs - 1);
+        jl_sym_t *vn = slot_symbol(ctx, ctx.nargs-1);
         if (vn != jl_unused_sym)
             ctx.vaSlot = ctx.nargs - 1;
     }
     toplevel = !jl_is_method(lam->def.method);
     ctx.rettype = jlrettype;
-    ctx.source = src;
     ctx.funcName = ctx.name;
     ctx.spvals_ptr = NULL;
     jl_array_t *stmts = ctx.code;
@@ -6680,7 +7587,7 @@ static jl_llvm_functions_t
         toplineno = lam->def.method->line;
         ctx.file = jl_symbol_name(lam->def.method->file);
     }
-    else if (jl_array_len(src->linetable) > 0) {
+    else if (jl_array_nrows(src->linetable) > 0) {
         jl_value_t *locinfo = jl_array_ptr_ref(src->linetable, 0);
         ctx.file = jl_symbol_name((jl_sym_t*)jl_fieldref_noalloc(locinfo, 2));
         toplineno = jl_unbox_int32(jl_fieldref(locinfo, 3));
@@ -6690,14 +7597,12 @@ static jl_llvm_functions_t
     // jl_printf(JL_STDERR, "\n*** compiling %s at %s:%d\n\n",
     //           jl_symbol_name(ctx.name), ctx.file.str().c_str(), toplineno);
 
-    ctx.debug_enabled = true;
+    bool debug_enabled = ctx.emission_context.debug_level != 0;
     if (dbgFuncName.empty()) // Should never happen anymore?
-        ctx.debug_enabled = 0;
-    if (jl_options.debug_level == 0)
-        ctx.debug_enabled = 0;
+        debug_enabled = false;
 
     // step 2. process var-info lists to see what vars need boxing
-    int n_ssavalues = jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_len(src->ssavaluetypes);
+    int n_ssavalues = jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_nrows(src->ssavaluetypes);
     size_t vinfoslen = jl_array_dim0(src->slotflags);
     ctx.slots.resize(vinfoslen, jl_varinfo_t(ctx.builder.getContext()));
     assert(lam->specTypes); // the specTypes field should always be assigned
@@ -6718,14 +7623,16 @@ static jl_llvm_functions_t
     for (i = 0; i < nreq; i++) {
         jl_varinfo_t &varinfo = ctx.slots[i];
         varinfo.isArgument = true;
-        jl_sym_t *argname = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, i);
+        jl_sym_t *argname = slot_symbol(ctx, i);
         if (argname == jl_unused_sym)
             continue;
         jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i);
+        // TODO: jl_nth_slot_type should call jl_rewrap_unionall
+        //  specTypes is required to be a datatype by construction for specsig, but maybe not otherwise
         // OpaqueClosure implicitly loads the env
         if (i == 0 && ctx.is_opaque_closure) {
             if (jl_is_array(src->slottypes)) {
-                ty = jl_arrayref((jl_array_t*)src->slottypes, i);
+                ty = jl_array_ptr_ref((jl_array_t*)src->slottypes, i);
             }
             else {
                 ty = (jl_value_t*)jl_any_type;
@@ -6761,42 +7668,68 @@ static jl_llvm_functions_t
     if (!specsig)
         ctx.nReqArgs--;  // function not part of argArray in jlcall
 
-    std::string _funcName;
-    raw_string_ostream funcName(_funcName);
-    // try to avoid conflicts in the global symbol table
-    if (specsig)
-        funcName << "julia_"; // api 5
-    else if (needsparams)
-        funcName << "japi3_";
-    else
-        funcName << "japi1_";
-    const char* unadorned_name = ctx.name;
-#if defined(_OS_LINUX_)
-    if (unadorned_name[0] == '@')
-        unadorned_name++;
-#endif
-    funcName << unadorned_name << "_" << globalUniqueGeneratedNames++;
-    declarations.specFunctionObject = funcName.str();
+    std::string _funcName = get_function_name(specsig, needsparams, ctx.name, ctx.emission_context.TargetTriple);
+    declarations.specFunctionObject = _funcName;
 
     // allocate Function declarations and wrapper objects
     //Safe because params holds ctx lock
     Module *M = TSM.getModuleUnlocked();
+    M->addModuleFlag(Module::Warning, "julia.debug_level", ctx.emission_context.debug_level);
     jl_debugcache_t debuginfo;
     debuginfo.initialize(M);
     jl_returninfo_t returninfo = {};
     Function *f = NULL;
     bool has_sret = false;
     if (specsig) { // assumes !va and !needsparams
-        returninfo = get_specsig_function(ctx, M, declarations.specFunctionObject, lam->specTypes, jlrettype, ctx.is_opaque_closure);
-        f = returninfo.decl;
+        BitVector used_args;
+        size_t args_begin;
+        returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes,
+                                          jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg), &used_args, &args_begin);
+        f = cast<Function>(returninfo.decl.getCallee());
         has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union);
-        jl_init_function(f);
+        jl_init_function(f, ctx.emission_context.TargetTriple);
+        if (ctx.emission_context.debug_level >= 2) {
+            auto arg_typename = [&](size_t i) JL_NOTSAFEPOINT {
+                auto tp = jl_tparam(lam->specTypes, i);
+                return jl_is_datatype(tp) ? jl_symbol_name(((jl_datatype_t*)tp)->name->name) : "<unknown type>";
+            };
+            size_t nreal = 0;
+            for (size_t i = 0; i < std::min(nreq, static_cast<size_t>(used_args.size())); i++) {
+                jl_sym_t *argname = slot_symbol(ctx, i);
+                if (argname == jl_unused_sym)
+                    continue;
+                if (used_args.test(i)) {
+                    auto &arg = *f->getArg(args_begin++);
+                    nreal++;
+                    auto name = jl_symbol_name(argname);
+                    if (!name[0]) {
+                        arg.setName(StringRef("#") + Twine(nreal) + StringRef("::") + arg_typename(i));
+                    } else {
+                        arg.setName(name + StringRef("::") + arg_typename(i));
+                    }
+                }
+            }
+            if (va && ctx.vaSlot != -1) {
+                size_t vidx = 0;
+                for (size_t i = nreq; i < used_args.size(); i++) {
+                    if (used_args.test(i)) {
+                        auto &arg = *f->getArg(args_begin++);
+                        auto type = arg_typename(i);
+                        const char *name = jl_symbol_name(slot_symbol(ctx, ctx.vaSlot));
+                        if (!name[0])
+                            name = "...";
+                        vidx++;
+                        arg.setName(name + StringRef("[") + Twine(vidx) + StringRef("]::") + type);
+                    }
+                }
+            }
+        }
 
         // common pattern: see if all return statements are an argument in that
         // case the apply-generic call can re-use the original box for the return
         int retarg = [stmts, nreq]() {
             int retarg = -1;
-            for (size_t i = 0; i < jl_array_len(stmts); ++i) {
+            for (size_t i = 0; i < jl_array_nrows(stmts); ++i) {
                 jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
                 if (jl_is_returnnode(stmt)) {
                     stmt = jl_returnnode_value(stmt);
@@ -6817,7 +7750,7 @@ static jl_llvm_functions_t
         }();
 
         std::string wrapName;
-        raw_string_ostream(wrapName) << "jfptr_" << unadorned_name << "_" << globalUniqueGeneratedNames++;
+        raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1);
         declarations.functionObject = wrapName;
         (void)gen_invoke_wrapper(lam, jlrettype, returninfo, retarg, declarations.functionObject, M, ctx.emission_context);
         // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType)
@@ -6828,22 +7761,27 @@ static jl_llvm_functions_t
         f = Function::Create(needsparams ? ctx.types().T_jlfuncparams : ctx.types().T_jlfunc,
                              GlobalVariable::ExternalLinkage,
                              declarations.specFunctionObject, M);
-        jl_init_function(f);
+        jl_init_function(f, ctx.emission_context.TargetTriple);
+        if (needsparams) {
+            jl_name_jlfuncparams_args(ctx.emission_context, f);
+        } else {
+            jl_name_jlfunc_args(ctx.emission_context, f);
+        }
         f->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), f->getAttributes()}));
         returninfo.decl = f;
         declarations.functionObject = needsparams ? "jl_fptr_sparam" : "jl_fptr_args";
     }
 
-#if JL_LLVM_VERSION >= 140000
+    if (ctx.emission_context.debug_level >= 2 && lam->def.method && jl_is_method(lam->def.method) && lam->specTypes != (jl_value_t*)jl_emptytuple_type) {
+        ios_t sigbuf;
+        ios_mem(&sigbuf, 0);
+        jl_static_show_func_sig((JL_STREAM*) &sigbuf, (jl_value_t*)lam->specTypes);
+        f->addFnAttr("julia.fsig", StringRef(sigbuf.buf, sigbuf.size));
+        ios_close(&sigbuf);
+    }
+
     AttrBuilder FnAttrs(ctx.builder.getContext(), f->getAttributes().getFnAttrs());
-#else
-    AttrBuilder FnAttrs(f->getAttributes().getFnAttributes());
-#endif
-#if JL_LLVM_VERSION >= 140000
     AttrBuilder RetAttrs(ctx.builder.getContext(), f->getAttributes().getRetAttrs());
-#else
-    AttrBuilder RetAttrs(f->getAttributes().getRetAttributes());
-#endif
 
     if (jlrettype == (jl_value_t*)jl_bottom_type)
         FnAttrs.addAttribute(Attribute::NoReturn);
@@ -6853,7 +7791,7 @@ static jl_llvm_functions_t
         FnAttrs.addAttribute(polly::PollySkipFnAttr);
 #endif
 
-    if (jl_has_meta(stmts, jl_noinline_sym))
+    if (src->inlining == 2)
         FnAttrs.addAttribute(Attribute::NoInline);
 
 #ifdef JL_DEBUG_BUILD
@@ -6876,47 +7814,26 @@ static jl_llvm_functions_t
     ctx.f = f;
 
     // Step 4b. determine debug info signature and other type info for locals
-    DIBuilder dbuilder(*M);
+    DICompileUnit::DebugEmissionKind emissionKind = (DICompileUnit::DebugEmissionKind) ctx.params->debug_info_kind;
+    DICompileUnit::DebugNameTableKind tableKind;
+    if (JL_FEAT_TEST(ctx, gnu_pubnames))
+        tableKind = DICompileUnit::DebugNameTableKind::GNU;
+    else
+        tableKind = DICompileUnit::DebugNameTableKind::None;
+    DIBuilder dbuilder(*M, true, debug_enabled ? getOrCreateJuliaCU(*M, emissionKind, tableKind) : NULL);
     DIFile *topfile = NULL;
     DISubprogram *SP = NULL;
     DebugLoc noDbg, topdebugloc;
-    if (ctx.debug_enabled) {
-        DICompileUnit::DebugEmissionKind emissionKind = (DICompileUnit::DebugEmissionKind) ctx.params->debug_info_kind;
-        DICompileUnit::DebugNameTableKind tableKind;
-
-        if (JL_FEAT_TEST(ctx, gnu_pubnames)) {
-            tableKind = DICompileUnit::DebugNameTableKind::GNU;
-        }
-        else {
-            tableKind = DICompileUnit::DebugNameTableKind::None;
-        }
+    if (debug_enabled) {
         topfile = dbuilder.createFile(ctx.file, ".");
-        DICompileUnit *CU =
-            dbuilder.createCompileUnit(llvm::dwarf::DW_LANG_Julia
-                                       ,topfile      // File
-                                       ,"julia"      // Producer
-                                       ,true         // isOptimized
-                                       ,""           // Flags
-                                       ,0            // RuntimeVersion
-                                       ,""           // SplitName
-                                       ,emissionKind // Kind
-                                       ,0            // DWOId
-                                       ,true         // SplitDebugInlining
-                                       ,false        // DebugInfoForProfiling
-                                       ,tableKind    // NameTableKind
-                                       );
-
         DISubroutineType *subrty;
-        if (jl_options.debug_level <= 1) {
+        if (ctx.emission_context.debug_level <= 1)
             subrty = debuginfo.jl_di_func_null_sig;
-        }
-        else if (!specsig) {
+        else if (!specsig)
             subrty = debuginfo.jl_di_func_sig;
-        }
-        else {
+        else
             subrty = get_specsig_di(ctx, debuginfo, jlrettype, lam->specTypes, dbuilder);
-        }
-        SP = dbuilder.createFunction(CU
+        SP = dbuilder.createFunction(nullptr
                                      ,dbgFuncName      // Name
                                      ,f->getName()     // LinkageName
                                      ,topfile          // File
@@ -6931,11 +7848,11 @@ static jl_llvm_functions_t
                                      );
         topdebugloc = DILocation::get(ctx.builder.getContext(), toplineno, 0, SP, NULL);
         f->setSubprogram(SP);
-        if (jl_options.debug_level >= 2) {
+        if (ctx.emission_context.debug_level >= 2) {
             const bool AlwaysPreserve = true;
             // Go over all arguments and local variables and initialize their debug information
             for (i = 0; i < nreq; i++) {
-                jl_sym_t *argname = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, i);
+                jl_sym_t *argname = slot_symbol(ctx, i);
                 if (argname == jl_unused_sym)
                     continue;
                 jl_varinfo_t &varinfo = ctx.slots[i];
@@ -6962,7 +7879,7 @@ static jl_llvm_functions_t
                     DINode::FlagZero);                  // Flags (TODO: Do we need any)
             }
             for (i = 0; i < vinfoslen; i++) {
-                jl_sym_t *s = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, i);
+                jl_sym_t *s = slot_symbol(ctx, i);
                 jl_varinfo_t &varinfo = ctx.slots[i];
                 if (varinfo.isArgument || s == jl_empty_sym || s == jl_unused_sym)
                     continue;
@@ -6993,6 +7910,7 @@ static jl_llvm_functions_t
         fArg = &*AI++;
         argArray = &*AI++;
         pargArray = ctx.builder.CreateAlloca(argArray->getType());
+        setName(ctx.emission_context, pargArray, "stackargs");
         ctx.builder.CreateStore(argArray, pargArray, true/*volatile store to prevent removal of this alloca*/);
         argCount = &*AI++;
         ctx.argArray = argArray;
@@ -7001,14 +7919,14 @@ static jl_llvm_functions_t
             ctx.spvals_ptr = &*AI++;
         }
     }
-
     // step 6. set up GC frame
     allocate_gc_frame(ctx, b0);
     Value *last_age = NULL;
     Value *world_age_field = get_last_age_field(ctx);
     if (toplevel || ctx.is_opaque_closure) {
-        last_age = tbaa_decorate(ctx.tbaa().tbaa_gcframe, ctx.builder.CreateAlignedLoad(
-            getSizeTy(ctx.builder.getContext()), world_age_field, Align(sizeof(size_t))));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        last_age = ai.decorateInst(ctx.builder.CreateAlignedLoad(
+            ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
     }
 
     // step 7. allocate local variables slots
@@ -7034,6 +7952,7 @@ static jl_llvm_functions_t
                 lv->setName(jl_symbol_name(s));
                 varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack);
                 varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()));
+                setName(ctx.emission_context, varinfo.pTIndex, "tindex");
             }
             else if (allunbox) {
                 // all ghost values just need a selector allocated
@@ -7053,14 +7972,14 @@ static jl_llvm_functions_t
             Type *vtype = julia_type_to_llvm(ctx, jt, &isboxed);
             assert(!isboxed);
             assert(!type_is_ghost(vtype) && "constants should already be handled");
-            Value *lv = new AllocaInst(vtype, 0, jl_symbol_name(s), /*InsertBefore*/ctx.topalloca);
+            Value *lv = new AllocaInst(vtype, M->getDataLayout().getAllocaAddrSpace(), nullptr, Align(jl_datatype_align(jt)), jl_symbol_name(s), /*InsertBefore*/ctx.topalloca);
             if (CountTrackedPointers(vtype).count) {
                 StoreInst *SI = new StoreInst(Constant::getNullValue(vtype), lv, false, Align(sizeof(void*)));
                 SI->insertAfter(ctx.topalloca);
             }
             varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack);
             alloc_def_flag(ctx, varinfo);
-            if (ctx.debug_enabled && varinfo.dinfo) {
+            if (debug_enabled && varinfo.dinfo) {
                 assert((Metadata*)varinfo.dinfo->getType() != debuginfo.jl_pvalue_dillvmt);
                 dbuilder.insertDeclare(lv, varinfo.dinfo, dbuilder.createExpression(),
                                        topdebugloc,
@@ -7072,12 +7991,12 @@ static jl_llvm_functions_t
             specsig || // for arguments, give them stack slots if they aren't in `argArray` (otherwise, will use that pointer)
             (va && (int)i == ctx.vaSlot) || // or it's the va arg tuple
             i == 0) { // or it is the first argument (which isn't in `argArray`)
-            AllocaInst *av = new AllocaInst(ctx.types().T_prjlvalue, 0,
-                jl_symbol_name(s), /*InsertBefore*/ctx.topalloca);
+            AllocaInst *av = new AllocaInst(ctx.types().T_prjlvalue, M->getDataLayout().getAllocaAddrSpace(),
+                nullptr, Align(sizeof(jl_value_t*)), jl_symbol_name(s), /*InsertBefore*/ctx.topalloca);
             StoreInst *SI = new StoreInst(Constant::getNullValue(ctx.types().T_prjlvalue), av, false, Align(sizeof(void*)));
             SI->insertAfter(ctx.topalloca);
             varinfo.boxroot = av;
-            if (ctx.debug_enabled && varinfo.dinfo) {
+            if (debug_enabled && varinfo.dinfo) {
                 DIExpression *expr;
                 if ((Metadata*)varinfo.dinfo->getType() == debuginfo.jl_pvalue_dillvmt) {
                     expr = dbuilder.createExpression();
@@ -7113,12 +8032,21 @@ static jl_llvm_functions_t
     // yield to them.
     // Also count ssavalue uses.
     {
-        for (size_t i = 0; i < jl_array_len(stmts); ++i) {
+        for (size_t i = 0; i < jl_array_nrows(stmts); ++i) {
             jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
 
             auto scan_ssavalue = [&](jl_value_t *val) {
                 if (jl_is_ssavalue(val)) {
-                    ctx.ssavalue_usecount[((jl_ssavalue_t*)val)->id-1] += 1;
+                    size_t ssa_idx = ((jl_ssavalue_t*)val)->id-1;
+                    /*
+                     * We technically allow out of bounds SSAValues in dead IR, so make
+                     * sure to bounds check this here. It's still not *good* to leave
+                     * dead code in the IR, because this will conservatively overcount
+                     * it, but let's at least make it not crash.
+                     */
+                    if (ssa_idx < ctx.ssavalue_usecount.size()) {
+                        ctx.ssavalue_usecount[ssa_idx] += 1;
+                    }
                     return true;
                 }
                 return false;
@@ -7127,7 +8055,7 @@ static jl_llvm_functions_t
 
             if (jl_is_phicnode(stmt)) {
                 jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(stmt, 0);
-                for (size_t j = 0; j < jl_array_len(values); ++j) {
+                for (size_t j = 0; j < jl_array_nrows(values); ++j) {
                     jl_value_t *val = jl_array_ptr_ref(values, j);
                     assert(jl_is_ssavalue(val));
                     upsilon_to_phic[((jl_ssavalue_t*)val)->id] = i;
@@ -7145,7 +8073,7 @@ static jl_llvm_functions_t
 
     // step 8. move args into local variables
     Function::arg_iterator AI = f->arg_begin();
-    std::vector<AttributeSet> attrs(f->arg_size()); // function declaration attributes
+    SmallVector<AttributeSet, 0> attrs(f->arg_size()); // function declaration attributes
 
     auto get_specsig_arg = [&](jl_value_t *argType, Type *llvmArgType, bool isboxed) {
         if (type_is_ghost(llvmArgType)) { // this argument is not actually passed
@@ -7156,11 +8084,7 @@ static jl_llvm_functions_t
         }
         Argument *Arg = &*AI;
         ++AI;
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
-#else
-        AttrBuilder param(f->getAttributes().getParamAttributes(Arg->getArgNo()));
-#endif
         jl_cgval_t theArg;
         if (llvmArgType->isAggregateType()) {
             maybe_mark_argument_dereferenceable(param, argType);
@@ -7173,21 +8097,17 @@ static jl_llvm_functions_t
             if (theArg.tbaa == ctx.tbaa().tbaa_immut)
                 theArg.tbaa = ctx.tbaa().tbaa_const;
         }
-        attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
+        attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
         return theArg;
     };
 
     if (has_sret) {
         Argument *Arg = &*AI;
         ++AI;
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
-#else
-        AttrBuilder param(f->getAttributes().getParamAttributes(Arg->getArgNo()));
-#endif
         if (returninfo.cc == jl_returninfo_t::Union) {
             param.addAttribute(Attribute::NonNull);
-            // The `dereferencable` below does not imply `nonnull` for non addrspace(0) pointers.
+            // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
             param.addDereferenceableAttr(returninfo.union_bytes);
             param.addAlignmentAttr(returninfo.union_align);
         }
@@ -7197,33 +8117,44 @@ static jl_llvm_functions_t
             TypeSize sz = DL.getTypeAllocSize(RT);
             Align al = DL.getPrefTypeAlign(RT);
             param.addAttribute(Attribute::NonNull);
-            // The `dereferencable` below does not imply `nonnull` for non addrspace(0) pointers.
+            // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
             param.addDereferenceableAttr(sz);
             param.addAlignmentAttr(al);
         }
-        attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
+        attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
     }
     if (returninfo.return_roots) {
         Argument *Arg = &*AI;
         ++AI;
-#if JL_LLVM_VERSION >= 140000
         AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
-#else
-        AttrBuilder param(f->getAttributes().getParamAttributes(Arg->getArgNo()));
-#endif
         param.addAttribute(Attribute::NonNull);
-        // The `dereferencable` below does not imply `nonnull` for non addrspace(0) pointers.
+        // The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
         size_t size = returninfo.return_roots * sizeof(jl_value_t*);
         param.addDereferenceableAttr(size);
         param.addAlignmentAttr(Align(sizeof(jl_value_t*)));
-        attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
+        attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
+    }
+    if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)){
+        Argument *Arg = &*AI;
+        ++AI;
+        AttrBuilder param(ctx.builder.getContext());
+        attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param);
     }
     for (i = 0; i < nreq; i++) {
-        jl_sym_t *s = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, i);
-        jl_value_t *argType = (i == 0 && ctx.is_opaque_closure) ? (jl_value_t*)jl_any_type :
-            jl_nth_slot_type(lam->specTypes, i);
+        jl_sym_t *s = slot_symbol(ctx, i);
+        jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
+        // TODO: jl_nth_slot_type should call jl_rewrap_unionall?
+        //  specTypes is required to be a datatype by construction for specsig, but maybe not otherwise
         bool isboxed = deserves_argbox(argType);
-        Type *llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
+        Type *llvmArgType = NULL;
+        if (i == 0 && ctx.is_opaque_closure) {
+            isboxed = true;
+            llvmArgType = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
+            argType = (jl_value_t*)jl_any_type;
+        }
+        else {
+            llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
+        }
         if (s == jl_unused_sym) {
             if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType))
                 ++AI;
@@ -7237,22 +8168,45 @@ static jl_llvm_functions_t
                 ++AI;
         }
         else {
-            if (specsig) {
+            // If this is an opaque closure, implicitly load the env and switch
+            // the world age.
+            if (i == 0 && ctx.is_opaque_closure) {
+                // Load closure world
+                Value *oc_this = decay_derived(ctx, &*AI++);
+                Value *argaddr = emit_bitcast(ctx, oc_this, getInt8PtrTy(ctx.builder.getContext()));
+                Value *worldaddr = ctx.builder.CreateInBoundsGEP(
+                        getInt8Ty(ctx.builder.getContext()), argaddr,
+                        ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, world)));
+
+                jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
+                    nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value());
+                emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr.value());
+
+                // Load closure env
+                Value *envaddr = ctx.builder.CreateInBoundsGEP(
+                        getInt8Ty(ctx.builder.getContext()), argaddr,
+                        ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, captures)));
+
+                jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type,
+                    nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*));
+                theArg = update_julia_type(ctx, closure_env, vi.value.typ);
+            }
+            else if (specsig) {
                 theArg = get_specsig_arg(argType, llvmArgType, isboxed);
             }
             else {
                 if (i == 0) {
                     // first (function) arg is separate in jlcall
-                    theArg = mark_julia_type(ctx, fArg, true, ctx.is_opaque_closure ?
-                        argType : vi.value.typ);
+                    theArg = mark_julia_type(ctx, fArg, true, vi.value.typ);
                 }
                 else {
                     Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
-                    Value *load = tbaa_decorate(ctx.tbaa().tbaa_const, maybe_mark_load_dereferenceable(
+                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+                    Value *load = ai.decorateInst(maybe_mark_load_dereferenceable(
                             ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
                             false, vi.value.typ));
                     theArg = mark_julia_type(ctx, load, true, vi.value.typ);
-                    if (ctx.debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) {
+                    if (debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) {
                         SmallVector<uint64_t, 8> addr;
                         addr.push_back(llvm::dwarf::DW_OP_deref);
                         addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
@@ -7266,34 +8220,12 @@ static jl_llvm_functions_t
                 }
             }
 
-            // If this is an opaque closure, implicitly load the env and switch
-            // the world age.
-            if (i == 0 && ctx.is_opaque_closure) {
-                // Load closure world
-                Value *argaddr = emit_bitcast(ctx, data_pointer(ctx, theArg), getInt8PtrTy(ctx.builder.getContext()));
-                Value *worldaddr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()), argaddr,
-                        ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_opaque_closure_t, world)));
-
-                jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
-                    theArg.tbaa, nullptr, false, AtomicOrdering::NotAtomic, false, sizeof(size_t));
-                emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, sizeof(size_t));
-
-                // Load closure env
-                Value *envaddr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()), argaddr,
-                        ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_opaque_closure_t, captures)));
-
-                jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type,
-                    theArg.tbaa, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*));
-                theArg = convert_julia_type(ctx, closure_env, vi.value.typ);
-            }
 
             if (vi.boxroot == NULL) {
                 assert(vi.value.V == NULL && "unexpected variable slot created for argument");
                 // keep track of original (possibly boxed) value to avoid re-boxing or moving
                 vi.value = theArg;
-                if (specsig && theArg.V && ctx.debug_enabled && vi.dinfo) {
+                if (specsig && theArg.V && debug_enabled && vi.dinfo) {
                     SmallVector<uint64_t, 8> addr;
                     Value *parg;
                     if (theArg.ispointer()) {
@@ -7326,20 +8258,21 @@ static jl_llvm_functions_t
         }
         else if (specsig) {
             ctx.nvargs = jl_nparams(lam->specTypes) - nreq;
-            jl_cgval_t *vargs = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * ctx.nvargs);
+            SmallVector<jl_cgval_t, 0> vargs(ctx.nvargs);
             for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) {
                 jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
+                // n.b. specTypes is required to be a datatype by construction for specsig
                 bool isboxed = deserves_argbox(argType);
                 Type *llvmArgType = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
                 vargs[i - nreq] = get_specsig_arg(argType, llvmArgType, isboxed);
             }
             if (jl_is_concrete_type(vi.value.typ)) {
-                jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs);
+                jl_cgval_t tuple = emit_new_struct(ctx, vi.value.typ, ctx.nvargs, vargs.data());
                 emit_varinfo_assign(ctx, vi, tuple);
             }
             else {
                 restTuple = emit_jlcall(ctx, jltuple_func, Constant::getNullValue(ctx.types().T_prjlvalue),
-                    vargs, ctx.nvargs, julia_call);
+                    vargs.data(), ctx.nvargs, julia_call);
                 jl_cgval_t tuple = mark_julia_type(ctx, restTuple, true, vi.value.typ);
                 emit_varinfo_assign(ctx, vi, tuple);
             }
@@ -7351,7 +8284,7 @@ static jl_llvm_functions_t
                 ctx.builder.CreateCall(F,
                         { Constant::getNullValue(ctx.types().T_prjlvalue),
                           ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, argArray,
-                                  ConstantInt::get(getSizeTy(ctx.builder.getContext()), nreq - 1)),
+                                  ConstantInt::get(ctx.types().T_size, nreq - 1)),
                           ctx.builder.CreateSub(argCount,
                                   ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nreq - 1)) });
             restTuple->setAttributes(F->getAttributes());
@@ -7386,10 +8319,10 @@ static jl_llvm_functions_t
             return other.loc == loc && other.file == file && other.line == line && other.is_user_code == is_user_code && other.is_tracked == is_tracked && other.inlined_at == inlined_at;
         }
     };
-    std::vector<DebugLineTable> linetable;
+    SmallVector<DebugLineTable, 0> linetable;
     { // populate the linetable data format
         assert(jl_is_array(src->linetable));
-        size_t nlocs = jl_array_len(src->linetable);
+        size_t nlocs = jl_array_nrows(src->linetable);
         std::map<std::tuple<StringRef, StringRef>, DISubprogram*> subprograms;
         linetable.resize(nlocs + 1);
         DebugLineTable &topinfo = linetable[0];
@@ -7403,7 +8336,7 @@ static jl_llvm_functions_t
             // LineInfoNode(mod::Module, method::Any, file::Symbol, line::Int32, inlined_at::Int32)
             jl_value_t *locinfo = jl_array_ptr_ref(src->linetable, i);
             DebugLineTable &info = linetable[i + 1];
-            assert(jl_typeis(locinfo, jl_lineinfonode_type));
+            assert(jl_typetagis(locinfo, jl_lineinfonode_type));
             jl_module_t *module = (jl_module_t*)jl_fieldref_noalloc(locinfo, 0);
             jl_value_t *method = jl_fieldref_noalloc(locinfo, 1);
             jl_sym_t *filesym = (jl_sym_t*)jl_fieldref_noalloc(locinfo, 2);
@@ -7418,7 +8351,7 @@ static jl_llvm_functions_t
             else
                 info.is_user_code = in_user_mod(module);
             info.is_tracked = in_tracked_path(info.file);
-            if (ctx.debug_enabled) {
+            if (debug_enabled) {
                 StringRef fname;
                 if (jl_is_method_instance(method))
                     method = ((jl_method_instance_t*)method)->def.value;
@@ -7449,19 +8382,19 @@ static jl_llvm_functions_t
                                                      ,nullptr          // ThrownTypes
                                                      );
                     }
-                    DebugLoc inl_loc = (info.inlined_at == 0) ? DebugLoc(DILocation::get(ctx.builder.getContext(), 0, 0, SP, NULL)) : linetable.at(info.inlined_at).loc;
+                    DebugLoc inl_loc = (info.inlined_at == 0) ? DebugLoc(DILocation::get(ctx.builder.getContext(), 0, 0, SP, NULL)) : linetable[info.inlined_at].loc;
                     info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, inl_SP, inl_loc);
                 }
             }
         }
     }
 
-    std::vector<MDNode*> aliasscopes;
+    SmallVector<MDNode*, 0> aliasscopes;
     MDNode* current_aliasscope = nullptr;
-    std::vector<Metadata*> scope_stack;
-    std::vector<MDNode*> scope_list_stack;
+    SmallVector<Metadata*, 0> scope_stack;
+    SmallVector<MDNode*, 0> scope_list_stack;
     {
-        size_t nstmts = jl_array_len(stmts);
+        size_t nstmts = jl_array_nrows(stmts);
         aliasscopes.resize(nstmts + 1, nullptr);
         MDBuilder mbuilder(ctx.builder.getContext());
         MDNode *alias_domain = mbuilder.createAliasScopeDomain(ctx.name);
@@ -7491,12 +8424,24 @@ static jl_llvm_functions_t
 
     Instruction &prologue_end = ctx.builder.GetInsertBlock()->back();
 
+    // step 11a. For top-level code, load the world age
+    if (toplevel && !ctx.is_opaque_closure) {
+        LoadInst *world = ctx.builder.CreateAlignedLoad(ctx.types().T_size,
+            prepare_global_in(jl_Module, jlgetworld_global), ctx.types().alignof_ptr);
+        world->setOrdering(AtomicOrdering::Acquire);
+        ctx.builder.CreateAlignedStore(world, world_age_field, ctx.types().alignof_ptr);
+    }
+
+    // step 11b. Emit the entry safepoint
+    if (JL_FEAT_TEST(ctx, safepoint_on_entry))
+        emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
 
-    // step 11. Do codegen in control flow order
-    std::vector<int> workstack;
+    // step 11c. Do codegen in control flow order
+    SmallVector<int, 0> workstack;
     std::map<int, BasicBlock*> BB;
     std::map<size_t, BasicBlock*> come_from_bb;
     int cursor = 0;
+    int current_label = 0;
     auto find_next_stmt = [&] (int seq_next) {
         // new style ir is always in dominance order, but frontend IR might not be
         // `seq_next` is the next statement we want to emit
@@ -7513,6 +8458,7 @@ static jl_llvm_functions_t
             workstack.pop_back();
             auto nextbb = BB.find(item + 1);
             if (nextbb == BB.end()) {
+                // Not a BB
                 cursor = item;
                 return;
             }
@@ -7523,32 +8469,38 @@ static jl_llvm_functions_t
             seq_next = -1;
             // if this BB is non-empty, we've visited it before so skip it
             if (!nextbb->second->getTerminator()) {
+                // New BB
                 ctx.builder.SetInsertPoint(nextbb->second);
                 cursor = item;
+                current_label = item;
                 return;
             }
         }
         cursor = -1;
     };
 
+    // If a pkgimage or sysimage is being generated, disable tracking.
+    // This means sysimage build or pkgimage precompilation workloads aren't tracked.
     auto do_coverage = [&] (bool in_user_code, bool is_tracked) {
-        return (coverage_mode == JL_LOG_ALL ||
+        return (jl_generating_output() == 0 &&
+                (coverage_mode == JL_LOG_ALL ||
                 (in_user_code && coverage_mode == JL_LOG_USER) ||
-                (is_tracked && coverage_mode == JL_LOG_PATH));
+                (is_tracked && coverage_mode == JL_LOG_PATH)));
     };
     auto do_malloc_log = [&] (bool in_user_code, bool is_tracked) {
-        return (malloc_log_mode == JL_LOG_ALL ||
+        return (jl_generating_output() == 0 &&
+                (malloc_log_mode == JL_LOG_ALL ||
                 (in_user_code && malloc_log_mode == JL_LOG_USER) ||
-                (is_tracked && malloc_log_mode == JL_LOG_PATH));
+                (is_tracked && malloc_log_mode == JL_LOG_PATH)));
     };
-    std::vector<unsigned> current_lineinfo, new_lineinfo;
+    SmallVector<unsigned, 0> current_lineinfo, new_lineinfo;
     auto coverageVisitStmt = [&] (size_t dbg) {
         if (dbg == 0 || dbg >= linetable.size())
             return;
         // Compute inlining stack for current line, inner frame first
         while (dbg) {
             new_lineinfo.push_back(dbg);
-            dbg = linetable.at(dbg).inlined_at;
+            dbg = linetable[dbg].inlined_at;
         }
         // Visit frames which differ from previous statement as tracked in
         // current_lineinfo (tracked outer frame first).
@@ -7557,7 +8509,7 @@ static jl_llvm_functions_t
             unsigned newdbg = new_lineinfo[new_lineinfo.size() - dbg - 1];
             if (newdbg != current_lineinfo[dbg]) {
                 current_lineinfo[dbg] = newdbg;
-                const auto &info = linetable.at(newdbg);
+                const auto &info = linetable[newdbg];
                 if (do_coverage(info.is_user_code, info.is_tracked))
                     coverageVisitLine(ctx, info.file, info.line);
             }
@@ -7570,9 +8522,9 @@ static jl_llvm_functions_t
                 ctx.builder.CreateCall(prepare_call(sync_gc_total_bytes_func), {sync});
             return;
         }
-        while (linetable.at(dbg).inlined_at)
-            dbg = linetable.at(dbg).inlined_at;
-        mallocVisitLine(ctx, ctx.file, linetable.at(dbg).line, sync);
+        while (linetable[dbg].inlined_at)
+            dbg = linetable[dbg].inlined_at;
+        mallocVisitLine(ctx, ctx.file, linetable[dbg].line, sync);
     };
     if (coverage_mode != JL_LOG_NONE) {
         // record all lines that could be covered
@@ -7601,14 +8553,13 @@ static jl_llvm_functions_t
                 // targets.
                 if (i + 2 <= stmtslen)
                     branch_targets.insert(i + 2);
-            } else if (jl_is_expr(stmt)) {
-                if (((jl_expr_t*)stmt)->head == jl_enter_sym) {
-                    branch_targets.insert(i + 1);
-                    if (i + 2 <= stmtslen)
-                        branch_targets.insert(i + 2);
-                    int dest = jl_unbox_long(jl_array_ptr_ref(((jl_expr_t*)stmt)->args, 0));
-                    branch_targets.insert(dest);
-                }
+            } else if (jl_is_enternode(stmt)) {
+                branch_targets.insert(i + 1);
+                if (i + 2 <= stmtslen)
+                    branch_targets.insert(i + 2);
+                size_t catch_dest = jl_enternode_catch_dest(stmt);
+                if (catch_dest)
+                    branch_targets.insert(catch_dest);
             } else if (jl_is_gotonode(stmt)) {
                 int dest = jl_gotonode_label(stmt);
                 branch_targets.insert(dest);
@@ -7616,8 +8567,8 @@ static jl_llvm_functions_t
                     branch_targets.insert(i + 2);
             } else if (jl_is_phinode(stmt)) {
                 jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(stmt, 0);
-                for (size_t j = 0; j < jl_array_len(edges); ++j) {
-                    size_t edge = ((int32_t*)jl_array_data(edges))[j];
+                for (size_t j = 0; j < jl_array_nrows(edges); ++j) {
+                    size_t edge = jl_array_data(edges, int32_t)[j];
                     if (edge == i)
                         branch_targets.insert(i + 1);
                 }
@@ -7635,9 +8586,9 @@ static jl_llvm_functions_t
     if (do_malloc_log(true, mod_is_tracked))
         sync_bytes = ctx.builder.CreateCall(prepare_call(diff_gc_total_bytes_func), {});
     { // coverage for the function definition line number
-        const auto &topinfo = linetable.at(0);
+        const auto &topinfo = linetable[0];
         if (linetable.size() > 1) {
-            if (topinfo == linetable.at(1))
+            if (topinfo == linetable[1])
                 current_lineinfo.push_back(1);
         }
         if (do_coverage(topinfo.is_user_code, topinfo.is_tracked))
@@ -7646,15 +8597,14 @@ static jl_llvm_functions_t
 
     find_next_stmt(0);
     while (cursor != -1) {
-        int32_t debuginfoloc = ((int32_t*)jl_array_data(src->codelocs))[cursor];
+        int32_t debuginfoloc = jl_array_data(src->codelocs, int32_t)[cursor];
         if (debuginfoloc > 0) {
-            if (ctx.debug_enabled)
-                ctx.builder.SetCurrentDebugLocation(linetable.at(debuginfoloc).loc);
+            if (debug_enabled)
+                ctx.builder.SetCurrentDebugLocation(linetable[debuginfoloc].loc);
             coverageVisitStmt(debuginfoloc);
         }
-        ctx.aliasscope = aliasscopes[cursor];
+        ctx.noalias().aliasscope.current = aliasscopes[cursor];
         jl_value_t *stmt = jl_array_ptr_ref(stmts, cursor);
-        jl_expr_t *expr = jl_is_expr(stmt) ? (jl_expr_t*)stmt : nullptr;
         if (jl_is_returnnode(stmt)) {
             jl_value_t *retexpr = jl_returnnode_value(stmt);
             if (retexpr == NULL) {
@@ -7665,6 +8615,11 @@ static jl_llvm_functions_t
             // this is basically a copy of emit_assignment,
             // but where the assignment slot is the retval
             jl_cgval_t retvalinfo = emit_expr(ctx, retexpr);
+
+            if (ctx.is_opaque_closure) {
+                emit_typecheck(ctx, retvalinfo, jlrettype, "OpaqueClosure");
+            }
+
             retvalinfo = convert_julia_type(ctx, retvalinfo, jlrettype);
             if (retvalinfo.typ == jl_bottom_type) {
                 CreateTrap(ctx.builder, false);
@@ -7702,7 +8657,7 @@ static jl_llvm_functions_t
                         // also need to account for the possibility the return object is boxed
                         // and avoid / skip copying it to the stack
                         isboxed_union = ctx.builder.CreateICmpNE(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                             ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                         data = ctx.builder.CreateSelect(isboxed_union, retvalinfo.Vboxed, data);
                     }
@@ -7711,7 +8666,7 @@ static jl_llvm_functions_t
                     // treat this as a simple boxed returninfo
                     //assert(retvalinfo.isboxed);
                     tindex = compute_tindex_unboxed(ctx, retvalinfo, jlrettype);
-                    tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+                    tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
                     data = boxed(ctx, retvalinfo);
                     sret = NULL;
                 }
@@ -7732,8 +8687,8 @@ static jl_llvm_functions_t
                     }
                     if (returninfo.cc == jl_returninfo_t::SRet) {
                         assert(jl_is_concrete_type(jlrettype));
-                        emit_memcpy(ctx, sret, nullptr, retvalinfo, jl_datatype_size(jlrettype),
-                                    julia_alignment(jlrettype));
+                        emit_memcpy(ctx, sret, jl_aliasinfo_t::fromTBAA(ctx, nullptr), retvalinfo,
+                                    jl_datatype_size(jlrettype), julia_alignment(jlrettype), julia_alignment(jlrettype));
                     }
                     else { // must be jl_returninfo_t::Union
                         emit_unionmove(ctx, sret, nullptr, retvalinfo, /*skip*/isboxed_union);
@@ -7766,7 +8721,12 @@ static jl_llvm_functions_t
         if (jl_is_gotonode(stmt)) {
             int lname = jl_gotonode_label(stmt);
             come_from_bb[cursor+1] = ctx.builder.GetInsertBlock();
-            ctx.builder.CreateBr(BB[lname]);
+            auto br = ctx.builder.CreateBr(BB[lname]);
+            // Check if backwards branch
+            if (ctx.LoopID && lname <= current_label) {
+                br->setMetadata(LLVMContext::MD_loop, ctx.LoopID);
+                ctx.LoopID = NULL;
+            }
             find_next_stmt(lname - 1);
             continue;
         }
@@ -7784,44 +8744,69 @@ static jl_llvm_functions_t
             workstack.push_back(lname - 1);
             BasicBlock *ifnot = BB[lname];
             BasicBlock *ifso = BB[cursor+2];
+            Instruction *br;
             if (ifnot == ifso)
-                ctx.builder.CreateBr(ifnot);
+                br = ctx.builder.CreateBr(ifnot);
             else
-                ctx.builder.CreateCondBr(isfalse, ifnot, ifso);
+                br = ctx.builder.CreateCondBr(isfalse, ifnot, ifso);
+
+            // Check if backwards branch
+            if (ctx.LoopID && lname <= current_label) {
+                br->setMetadata(LLVMContext::MD_loop, ctx.LoopID);
+                ctx.LoopID = NULL;
+            }
             find_next_stmt(cursor + 1);
             continue;
         }
-        else if (expr && expr->head == jl_enter_sym) {
-            jl_value_t **args = (jl_value_t**)jl_array_data(expr->args);
-
-            assert(jl_is_long(args[0]));
-            int lname = jl_unbox_long(args[0]);
-            // Save exception stack depth at enter for use in pop_exception
-            Value *excstack_state =
-                ctx.builder.CreateCall(prepare_call(jl_excstack_state_func));
-            assert(!ctx.ssavalue_assigned.at(cursor));
-            ctx.SAvalues.at(cursor) = jl_cgval_t(excstack_state, (jl_value_t*)jl_ulong_type, NULL);
-            ctx.ssavalue_assigned.at(cursor) = true;
-            CallInst *sj = ctx.builder.CreateCall(prepare_call(except_enter_func));
-            // We need to mark this on the call site as well. See issue #6757
-            sj->setCanReturnTwice();
-            Value *isz = ctx.builder.CreateICmpEQ(sj, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
-            BasicBlock *tryblk = BasicBlock::Create(ctx.builder.getContext(), "try", f);
-            BasicBlock *handlr = NULL;
-            handlr = BB[lname];
-            workstack.push_back(lname - 1);
-            come_from_bb[cursor + 1] = ctx.builder.GetInsertBlock();
-            ctx.builder.CreateCondBr(isz, tryblk, handlr);
-            ctx.builder.SetInsertPoint(tryblk);
+        else if (jl_is_enternode(stmt)) {
+            // For the two-arg version of :enter, twiddle the scope
+            Value *scope_ptr = NULL;
+            Value *old_scope = NULL;
+            jl_aliasinfo_t scope_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+            if (jl_enternode_scope(stmt)) {
+                jl_cgval_t new_scope = emit_expr(ctx, jl_enternode_scope(stmt));
+                Value *new_scope_boxed = boxed(ctx, new_scope);
+                scope_ptr = get_scope_field(ctx);
+                old_scope = scope_ai.decorateInst(
+                        ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, scope_ptr, ctx.types().alignof_ptr));
+                scope_ai.decorateInst(
+                    ctx.builder.CreateAlignedStore(new_scope_boxed, scope_ptr, ctx.types().alignof_ptr));
+                ctx.scope_restore[cursor] = std::make_pair(old_scope, scope_ptr);
+            }
+            int lname = jl_enternode_catch_dest(stmt);
+            if (lname) {
+                // Save exception stack depth at enter for use in pop_exception
+                Value *excstack_state =
+                    ctx.builder.CreateCall(prepare_call(jl_excstack_state_func));
+                assert(!ctx.ssavalue_assigned[cursor]);
+                ctx.SAvalues[cursor] = jl_cgval_t(excstack_state, (jl_value_t*)jl_ulong_type, NULL);
+                ctx.ssavalue_assigned[cursor] = true;
+                // Actually enter the exception frame
+                CallInst *sj = ctx.builder.CreateCall(prepare_call(except_enter_func));
+                // We need to mark this on the call site as well. See issue #6757
+                sj->setCanReturnTwice();
+                Value *isz = ctx.builder.CreateICmpEQ(sj, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
+                BasicBlock *tryblk = BasicBlock::Create(ctx.builder.getContext(), "try", f);
+                BasicBlock *catchpop = BasicBlock::Create(ctx.builder.getContext(), "catch_pop", f);
+                BasicBlock *handlr = NULL;
+                handlr = BB[lname];
+                workstack.push_back(lname - 1);
+                come_from_bb[cursor + 1] = ctx.builder.GetInsertBlock();
+                ctx.builder.CreateCondBr(isz, tryblk, catchpop);
+                ctx.builder.SetInsertPoint(catchpop);
+                {
+                    ctx.builder.CreateCall(prepare_call(jlleave_func),
+                                    ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1));
+                    if (old_scope) {
+                        scope_ai.decorateInst(
+                            ctx.builder.CreateAlignedStore(old_scope, scope_ptr, ctx.types().alignof_ptr));
+                    }
+                    ctx.builder.CreateBr(handlr);
+                }
+                ctx.builder.SetInsertPoint(tryblk);
+            }
         }
         else {
-            if (!jl_is_method(ctx.linfo->def.method) && !ctx.is_opaque_closure) {
-                // TODO: inference is invalid if this has any effect (which it often does)
-                LoadInst *world = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()),
-                    prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
-                world->setOrdering(AtomicOrdering::Acquire);
-                ctx.builder.CreateAlignedStore(world, world_age_field, Align(sizeof(size_t)));
-            }
             emit_stmtpos(ctx, stmt, cursor);
             mallocVisitStmt(debuginfoloc, nullptr);
         }
@@ -7839,7 +8824,7 @@ static jl_llvm_functions_t
 
     // Codegen Phi nodes
     std::map<std::pair<BasicBlock*, BasicBlock*>, BasicBlock*> BB_rewrite_map;
-    std::vector<llvm::PHINode*> ToDelete;
+    SmallVector<llvm::PHINode*, 0> ToDelete;
     for (auto &tup : ctx.PhiNodes) {
         jl_cgval_t phi_result;
         PHINode *VN;
@@ -7852,8 +8837,8 @@ static jl_llvm_functions_t
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(r, 1);
         PHINode *TindexN = cast_or_null<PHINode>(phi_result.TIndex);
         DenseSet<BasicBlock*> preds;
-        for (size_t i = 0; i < jl_array_len(edges); ++i) {
-            size_t edge = ((int32_t*)jl_array_data(edges))[i];
+        for (size_t i = 0; i < jl_array_nrows(edges); ++i) {
+            size_t edge = jl_array_data(edges, int32_t)[i];
             jl_value_t *value = jl_array_ptr_ref(values, i);
             // This edge value is undef, handle it the same as if the edge wasn't listed at all
             if (!value)
@@ -7872,7 +8857,7 @@ static jl_llvm_functions_t
                 // Only codegen this branch once for each PHI (the expression must be the same on all branches)
 #ifndef NDEBUG
                 for (size_t j = 0; j < i; ++j) {
-                    size_t j_edge = ((int32_t*)jl_array_data(edges))[j];
+                    size_t j_edge = jl_array_data(edges, int32_t)[j];
                     if (j_edge == edge) {
                         assert(jl_egal(value, jl_array_ptr_ref(values, j)));
                     }
@@ -7886,9 +8871,7 @@ static jl_llvm_functions_t
                 // Can't use `llvm::SplitCriticalEdge` here because
                 // we may have invalid phi nodes in the destination.
                 BasicBlock *NewBB = BasicBlock::Create(terminator->getContext(),
-                   FromBB->getName() + "." + PhiBB->getName() + "_crit_edge");
-                Function::iterator FBBI = FromBB->getIterator();
-                ctx.f->getBasicBlockList().insert(++FBBI, NewBB); // insert after existing block
+                   FromBB->getName() + "." + PhiBB->getName() + "_crit_edge", FromBB->getParent(), FromBB->getNextNode()); // insert after existing block
                 terminator->replaceSuccessorWith(PhiBB, NewBB);
                 DebugLoc Loc = terminator->getDebugLoc();
                 terminator = BranchInst::Create(PhiBB);
@@ -7952,7 +8935,7 @@ static jl_llvm_functions_t
                     if (tindex == 0) {
                         if (VN)
                             V = boxed(ctx, val);
-                        RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
+                        RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER);
                     }
                     else {
                         if (VN)
@@ -7974,7 +8957,7 @@ static jl_llvm_functions_t
                         if (dest) {
                             // If dest is not set, this is a ghost union, the recipient of which
                             // is often not prepared to handle a boxed representation of the ghost.
-                            RTindex = ctx.builder.CreateOr(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
+                            RTindex = ctx.builder.CreateOr(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER));
                         }
                         new_union.TIndex = RTindex;
                     }
@@ -7982,8 +8965,8 @@ static jl_llvm_functions_t
                         V = new_union.Vboxed ? new_union.Vboxed : Constant::getNullValue(ctx.types().T_prjlvalue);
                     if (dest) { // basically, if !ghost union
                         if (new_union.Vboxed != nullptr) {
-                            Value *isboxed = ctx.builder.CreateICmpNE( // if 0x80 is set, we won't select this slot anyways
-                                    ctx.builder.CreateAnd(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            Value *isboxed = ctx.builder.CreateICmpNE( // if UNION_BOX_MARKER is set, we won't select this slot anyways
+                                    ctx.builder.CreateAnd(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                             skip = skip ? ctx.builder.CreateOr(isboxed, skip) : isboxed;
                         }
@@ -8028,7 +9011,7 @@ static jl_llvm_functions_t
                 Value *undef = undef_value_for_type(VN->getType());
                 VN->addIncoming(undef, FromBB);
                 if (TindexN) // let the runtime / optimizer know this is unknown / boxed / null, so that it won't try to union_move / copy it later
-                    RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
+                    RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER);
             }
             if (TindexN)
                 TindexN->addIncoming(RTindex, FromBB);
@@ -8042,17 +9025,18 @@ static jl_llvm_functions_t
     }
 
     for (PHINode *PN : ToDelete) {
+        // This basic block is statically unreachable, thus so is this PHINode
         PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
         PN->eraseFromParent();
     }
 
     // step 12. Perform any delayed instantiations
-    if (ctx.debug_enabled) {
-        bool in_prologue = true;
-        for (auto &BB : *ctx.f) {
-            for (auto &I : BB) {
-                CallBase *call = dyn_cast<CallBase>(&I);
-                if (call && !I.getDebugLoc()) {
+    bool in_prologue = true;
+    for (auto &BB : *ctx.f) {
+        for (auto &I : BB) {
+            CallBase *call = dyn_cast<CallBase>(&I);
+            if (call) {
+                if (debug_enabled && !I.getDebugLoc()) {
                     // LLVM Verifier: inlinable function call in a function with debug info must have a !dbg location
                     // make sure that anything we attempt to call has some inlining info, just in case optimization messed up
                     // (except if we know that it is an intrinsic used in our prologue, which should never have its own debug subprogram)
@@ -8061,12 +9045,24 @@ static jl_llvm_functions_t
                         I.setDebugLoc(topdebugloc);
                     }
                 }
-                if (&I == &prologue_end)
-                    in_prologue = false;
+                if (toplevel && !ctx.is_opaque_closure && !in_prologue) {
+                    // we're at toplevel; insert an atomic barrier between every instruction
+                    // TODO: inference is invalid if this has any effect (which it often does)
+                    LoadInst *world = new LoadInst(ctx.types().T_size,
+                        prepare_global_in(jl_Module, jlgetworld_global), Twine(),
+                        /*isVolatile*/false, ctx.types().alignof_ptr, /*insertBefore*/&I);
+                    world->setOrdering(AtomicOrdering::Acquire);
+                    StoreInst *store_world = new StoreInst(world, world_age_field,
+                        /*isVolatile*/false, ctx.types().alignof_ptr, /*insertBefore*/&I);
+                    (void)store_world;
+                }
             }
+            if (&I == &prologue_end)
+                in_prologue = false;
         }
-        dbuilder.finalize();
     }
+    if (debug_enabled)
+        dbuilder.finalize();
 
     if (ctx.vaSlot > 0) {
         // remove VA allocation if we never referenced it
@@ -8106,56 +9102,17 @@ static jl_llvm_functions_t
         }
     }
 
-    // copy ctx.roots into m->roots
-    // if we created any new roots during codegen
-    if (ctx.roots) {
-        jl_method_t *m = lam->def.method;
-        JL_LOCK(&m->writelock);
-        if (m->roots == NULL) {
-            m->roots = ctx.roots;
-            jl_gc_wb(m, m->roots);
-        }
-        else {
-            size_t i, ilen = jl_array_dim0(ctx.roots);
-            size_t j, jlen = jl_array_dim0(m->roots);
-            for (i = 0; i < ilen; i++) {
-                jl_value_t *ival = jl_array_ptr_ref(ctx.roots, i);
-                for (j = 0; j < jlen; j++) {
-                    jl_value_t *jval = jl_array_ptr_ref(m->roots, j);
-                    if (ival == jval)
-                        break;
-                }
-                if (j == jlen) // not found - add to array
-                    jl_add_method_root(m, jl_precompile_toplevel_module, ival);
-            }
-        }
-        ctx.roots = NULL;
-        JL_UNLOCK(&m->writelock);
-    }
-
     // link the dependent llvmcall modules, but switch their function's linkage to internal
     // so that they don't conflict when they show up in the execution engine.
-    for (auto &TSMod : ctx.llvmcall_modules) {
-        SmallVector<std::string, 1> Exports;
-        TSMod.withModuleDo([&](Module &Mod) {
-            for (const auto &F: Mod.functions())
-                if (!F.isDeclaration())
-                    Exports.push_back(F.getName().str());
-        });
-        jl_merge_module(TSM, std::move(TSMod));
-        for (auto FN: Exports)
-            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage);
-    }
-
-    // link in opaque closure modules
-    for (auto &TSMod : ctx.oc_modules) {
+    Linker L(*jl_Module);
+    for (auto &Mod : ctx.llvmcall_modules) {
         SmallVector<std::string, 1> Exports;
-        TSMod.withModuleDo([&](Module &Mod) {
-            for (const auto &F: Mod.functions())
-                if (!F.isDeclaration())
-                    Exports.push_back(F.getName().str());
-        });
-        jl_merge_module(TSM, std::move(TSMod));
+        for (const auto &F: Mod->functions())
+            if (!F.isDeclaration())
+                Exports.push_back(F.getName().str());
+        bool error = L.linkInModule(std::move(Mod));
+        assert(!error && "linking llvmcall modules failed");
+        (void)error;
         for (auto FN: Exports)
             jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage);
     }
@@ -8176,7 +9133,8 @@ jl_llvm_functions_t jl_emit_code(
         jl_value_t *jlrettype,
         jl_codegen_params_t &params)
 {
-    JL_TIMING(CODEGEN);
+    JL_TIMING(CODEGEN, CODEGEN_LLVM);
+    jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_DEFAULT_BLOCK);
     // caller must hold codegen_lock
     jl_llvm_functions_t decls = {};
     assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache ||
@@ -8199,11 +9157,11 @@ jl_llvm_functions_t jl_emit_code(
     JL_CATCH {
         // Something failed! This is very, very bad.
         // Try to pretend that it isn't and attempt to recover.
-        const char *mname = m.getModuleUnlocked()->getModuleIdentifier().data();
+        std::string mname = m.getModuleUnlocked()->getModuleIdentifier();
         m = orc::ThreadSafeModule();
         decls.functionObject = "";
         decls.specFunctionObject = "";
-        jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: encountered unexpected error during compilation of %s:\n", mname);
+        jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: encountered unexpected error during compilation of %s:\n", mname.c_str());
         jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
         jlbacktrace(); // written to STDERR_FILENO
@@ -8212,19 +9170,54 @@ jl_llvm_functions_t jl_emit_code(
     return decls;
 }
 
+static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codegen_params_t &params, jl_method_instance_t *mi, jl_value_t *rettype)
+{
+    Module *M = m.getModuleUnlocked();
+    jl_codectx_t ctx(M->getContext(), params);
+    ctx.name = M->getModuleIdentifier().data();
+    std::string funcName = get_function_name(true, false, ctx.name, ctx.emission_context.TargetTriple);
+    jl_llvm_functions_t declarations;
+    declarations.functionObject = "jl_f_opaque_closure_call";
+    if (uses_specsig(mi->specTypes, false, true, rettype, true)) {
+        jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg));
+        Function *gf_thunk = cast<Function>(returninfo.decl.getCallee());
+        jl_init_function(gf_thunk, ctx.emission_context.TargetTriple);
+        size_t nrealargs = jl_nparams(mi->specTypes);
+        emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, mi->specTypes, rettype, true, nrealargs, ctx.emission_context);
+        declarations.specFunctionObject = funcName;
+    }
+    return declarations;
+}
+
+static int effects_foldable(uint32_t effects)
+{
+    // N.B.: This needs to be kept in sync with Core.Compiler.is_foldable(effects, true)
+    return ((effects & 0x7) == 0) && // is_consistent(effects)
+           (((effects >> 10) & 0x03) == 0) && // is_noub(effects)
+           (((effects >> 3) & 0x03) == 0) && // is_effect_free(effects)
+           ((effects >> 6) & 0x01); // is_terminates(effects)
+}
+
 jl_llvm_functions_t jl_emit_codeinst(
         orc::ThreadSafeModule &m,
         jl_code_instance_t *codeinst,
         jl_code_info_t *src,
         jl_codegen_params_t &params)
 {
-    JL_TIMING(CODEGEN);
+    JL_TIMING(CODEGEN, CODEGEN_Codeinst);
+    jl_timing_show_method_instance(codeinst->def, JL_TIMING_DEFAULT_BLOCK);
     JL_GC_PUSH1(&src);
     if (!src) {
-        src = (jl_code_info_t*)codeinst->inferred;
+        src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
         jl_method_t *def = codeinst->def->def.method;
+        // Check if this is the generic method for opaque closure wrappers -
+        // if so, generate the specsig -> invoke converter.
+        if (def == jl_opaque_closure_method) {
+            JL_GC_POP();
+            return jl_emit_oc_wrapper(m, params, codeinst->def, codeinst->rettype);
+        }
         if (src && (jl_value_t*)src != jl_nothing && jl_is_method(def))
-            src = jl_uncompress_ir(def, codeinst, (jl_array_t*)src);
+            src = jl_uncompress_ir(def, codeinst, (jl_value_t*)src);
         if (!src || !jl_is_code_info(src)) {
             JL_GC_POP();
             m = orc::ThreadSafeModule();
@@ -8252,36 +9245,38 @@ jl_llvm_functions_t jl_emit_codeinst(
                 jl_add_code_in_flight(f, codeinst, DL);
         }
 
-        if (// don't alter `inferred` when the code is not directly being used
-            params.world &&
+        if (params.world) {// don't alter `inferred` when the code is not directly being used
+            jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
             // don't change inferred state
-            codeinst->inferred) {
-            jl_method_t *def = codeinst->def->def.method;
-            if (// keep code when keeping everything
-                !(JL_DELETE_NON_INLINEABLE) ||
-                // aggressively keep code when debugging level >= 2
-                jl_options.debug_level > 1) {
-                // update the stored code
-                if (codeinst->inferred != (jl_value_t*)src) {
-                    if (jl_is_method(def)) {
-                        src = (jl_code_info_t*)jl_compress_ir(def, src);
-                        assert(jl_typeis(src, jl_array_uint8_type));
-                        codeinst->relocatability = ((uint8_t*)jl_array_data(src))[jl_array_len(src)-1];
+            if (inferred) {
+                jl_method_t *def = codeinst->def->def.method;
+                if (// keep code when keeping everything
+                    !(JL_DELETE_NON_INLINEABLE) ||
+                    // aggressively keep code when debugging level >= 2
+                    // note that this uses the global jl_options.debug_level, not the local emission_ctx.debug_level
+                    jl_options.debug_level > 1) {
+                    // update the stored code
+                    if (inferred != (jl_value_t*)src) {
+                        if (jl_is_method(def)) {
+                            src = (jl_code_info_t*)jl_compress_ir(def, src);
+                            assert(jl_is_string(src));
+                            codeinst->relocatability = jl_string_data(src)[jl_string_len(src)-1];
+                        }
+                        jl_atomic_store_release(&codeinst->inferred, (jl_value_t*)src);
+                        jl_gc_wb(codeinst, src);
                     }
-                    codeinst->inferred = (jl_value_t*)src;
-                    jl_gc_wb(codeinst, src);
                 }
-            }
-            else if (// don't delete toplevel code
-                     jl_is_method(def) &&
-                     // and there is something to delete (test this before calling jl_ir_flag_inlineable)
-                     codeinst->inferred != jl_nothing &&
-                     // don't delete inlineable code, unless it is constant
-                     (codeinst->invoke == jl_fptr_const_return_addr || !jl_ir_flag_inlineable((jl_array_t*)codeinst->inferred)) &&
-                     // don't delete code when generating a precompile file
-                     !(params.imaging || jl_options.incremental)) {
-                // if not inlineable, code won't be needed again
-                codeinst->inferred = jl_nothing;
+                // delete non-inlineable code, since it won't be needed again
+                // because we already emitted LLVM code from it and the native
+                // Julia-level optimization will never need to see it
+                else if (jl_is_method(def) && // don't delete toplevel code
+                         inferred != jl_nothing && // and there is something to delete (test this before calling jl_ir_inlining_cost)
+                         !effects_foldable(codeinst->ipo_purity_bits) && // don't delete code we may want for irinterp
+                         ((jl_ir_inlining_cost(inferred) == UINT16_MAX) || // don't delete inlineable code
+                          jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) && // unless it is constant
+                         !(params.imaging_mode || jl_options.incremental)) { // don't delete code when generating a precompile file
+                    jl_atomic_store_release(&codeinst->inferred, jl_nothing);
+                }
             }
         }
     }
@@ -8291,97 +9286,94 @@ jl_llvm_functions_t jl_emit_codeinst(
 
 
 void jl_compile_workqueue(
-    jl_workqueue_t &emitted,
-    Module &original,
-    jl_codegen_params_t &params, CompilationPolicy policy)
+    jl_codegen_params_t &params,
+    CompilationPolicy policy)
 {
-    JL_TIMING(CODEGEN);
+    JL_TIMING(CODEGEN, CODEGEN_Workqueue);
     jl_code_info_t *src = NULL;
     JL_GC_PUSH1(&src);
     while (!params.workqueue.empty()) {
         jl_code_instance_t *codeinst;
-        Function *protodecl;
-        jl_returninfo_t::CallingConv proto_cc;
-        bool proto_specsig;
-        unsigned proto_return_roots;
         auto it = params.workqueue.back();
         codeinst = it.first;
-        std::tie(proto_cc, proto_return_roots, protodecl, proto_specsig) = it.second;
+        auto proto = it.second;
         params.workqueue.pop_back();
         // try to emit code for this item from the workqueue
         assert(codeinst->min_world <= params.world && codeinst->max_world >= params.world &&
             "invalid world for code-instance");
         StringRef preal_decl = "";
         bool preal_specsig = false;
-        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
-        if (params.cache && invoke != NULL) {
+        auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
+        bool cache_valid = params.cache;
+        // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
+        if (cache_valid && invoke != NULL) {
             auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+            if (fptr) {
+                while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                    jl_cpu_pause();
+                }
+                // in case we are racing with another thread that is emitting this function
+                invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+            }
             if (invoke == jl_fptr_args_addr) {
                 preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
             }
-            else if (codeinst->isspecsig) {
+            else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
                 preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
                 preal_specsig = true;
             }
         }
         else {
-            auto &result = emitted[codeinst];
-            jl_llvm_functions_t *decls = NULL;
-            if (std::get<0>(result)) {
-                decls = &std::get<1>(result);
-            }
-            else {
+            auto it = params.compiled_functions.find(codeinst);
+            if (it == params.compiled_functions.end()) {
                 // Reinfer the function. The JIT came along and removed the inferred
                 // method body. See #34993
                 if (policy != CompilationPolicy::Default &&
-                    codeinst->inferred && codeinst->inferred == jl_nothing) {
+                    jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) {
                     src = jl_type_infer(codeinst->def, jl_atomic_load_acquire(&jl_world_counter), 0);
                     if (src) {
                         orc::ThreadSafeModule result_m =
-                        jl_create_llvm_module(name_from_method_instance(codeinst->def),
-                            params.tsctx, params.imaging,
-                            original.getDataLayout(), Triple(original.getTargetTriple()));
-                        result.second = jl_emit_code(result_m, codeinst->def, src, src->rettype, params);
-                        result.first = std::move(result_m);
+                        jl_create_ts_module(name_from_method_instance(codeinst->def),
+                            params.tsctx, params.DL, params.TargetTriple);
+                        auto decls = jl_emit_code(result_m, codeinst->def, src, src->rettype, params);
+                        if (result_m)
+                            it = params.compiled_functions.insert(std::make_pair(codeinst, std::make_pair(std::move(result_m), std::move(decls)))).first;
                     }
                 }
                 else {
                     orc::ThreadSafeModule result_m =
-                        jl_create_llvm_module(name_from_method_instance(codeinst->def),
-                            params.tsctx, params.imaging,
-                            original.getDataLayout(), Triple(original.getTargetTriple()));
-                    result.second = jl_emit_codeinst(result_m, codeinst, NULL, params);
-                    result.first = std::move(result_m);
+                        jl_create_ts_module(name_from_method_instance(codeinst->def),
+                            params.tsctx, params.DL, params.TargetTriple);
+                    auto decls = jl_emit_codeinst(result_m, codeinst, NULL, params);
+                    if (result_m)
+                        it = params.compiled_functions.insert(std::make_pair(codeinst, std::make_pair(std::move(result_m), std::move(decls)))).first;
                 }
-                if (std::get<0>(result))
-                    decls = &std::get<1>(result);
-                else
-                    emitted.erase(codeinst); // undo the insert above
             }
-            if (decls) {
-                if (decls->functionObject == "jl_fptr_args") {
-                    preal_decl = decls->specFunctionObject;
+            if (it != params.compiled_functions.end()) {
+                auto &decls = it->second.second;
+                if (decls.functionObject == "jl_fptr_args") {
+                    preal_decl = decls.specFunctionObject;
                 }
-                else if (decls->functionObject != "jl_fptr_sparam") {
-                    preal_decl = decls->specFunctionObject;
+                else if (decls.functionObject != "jl_fptr_sparam") {
+                    preal_decl = decls.specFunctionObject;
                     preal_specsig = true;
                 }
             }
         }
         // patch up the prototype we emitted earlier
-        Module *mod = protodecl->getParent();
-        assert(protodecl->isDeclaration());
-        if (proto_specsig) {
+        Module *mod = proto.decl->getParent();
+        assert(proto.decl->isDeclaration());
+        if (proto.specsig) {
             // expected specsig
             if (!preal_specsig) {
                 // emit specsig-to-(jl)invoke conversion
                 Function *preal = emit_tojlinvoke(codeinst, mod, params);
-                protodecl->setLinkage(GlobalVariable::InternalLinkage);
+                proto.decl->setLinkage(GlobalVariable::InternalLinkage);
                 //protodecl->setAlwaysInline();
-                jl_init_function(protodecl);
+                jl_init_function(proto.decl, params.TargetTriple);
                 size_t nrealargs = jl_nparams(codeinst->def->specTypes); // number of actual arguments being passed
                 // TODO: maybe this can be cached in codeinst->specfptr?
-                emit_cfunc_invalidate(protodecl, proto_cc, proto_return_roots, codeinst->def->specTypes, codeinst->rettype, nrealargs, params, preal);
+                emit_cfunc_invalidate(proto.decl, proto.cc, proto.return_roots, codeinst->def->specTypes, codeinst->rettype, false, nrealargs, params, preal);
                 preal_decl = ""; // no need to fixup the name
             }
             else {
@@ -8398,11 +9390,11 @@ void jl_compile_workqueue(
         if (!preal_decl.empty()) {
             // merge and/or rename this prototype to the real function
             if (Value *specfun = mod->getNamedValue(preal_decl)) {
-                if (protodecl != specfun)
-                    protodecl->replaceAllUsesWith(specfun);
+                if (proto.decl != specfun)
+                    proto.decl->replaceAllUsesWith(specfun);
             }
             else {
-                protodecl->setName(preal_decl);
+                proto.decl->setName(preal_decl);
             }
         }
     }
@@ -8411,7 +9403,7 @@ void jl_compile_workqueue(
 
 
 // --- initialization ---
-std::vector<std::pair<jl_value_t**, JuliaVariable*>> gv_for_global;
+SmallVector<std::pair<jl_value_t**, JuliaVariable*>, 0> gv_for_global;
 static void global_jlvalue_to_llvm(JuliaVariable *var, jl_value_t **addr)
 {
     gv_for_global.push_back(std::make_pair(addr, var));
@@ -8427,19 +9419,22 @@ static JuliaVariable *julia_const_gv(jl_value_t *val)
 
 static void init_jit_functions(void)
 {
+    add_named_global(jl_small_typeof_var, &jl_small_typeof);
     add_named_global(jlstack_chk_guard_var, &__stack_chk_guard);
     add_named_global(jlRTLD_DEFAULT_var, &jl_RTLD_DEFAULT_handle);
-#ifdef _OS_WINDOWS_
     add_named_global(jlexe_var, &jl_exe_handle);
     add_named_global(jldll_var, &jl_libjulia_handle);
     add_named_global(jldlli_var, &jl_libjulia_internal_handle);
-#endif
-    global_jlvalue_to_llvm(new JuliaVariable{"jl_true", true, get_pjlvalue}, &jl_true);
-    global_jlvalue_to_llvm(new JuliaVariable{"jl_false", true, get_pjlvalue}, &jl_false);
-    global_jlvalue_to_llvm(new JuliaVariable{"jl_emptysvec", true, get_pjlvalue}, (jl_value_t**)&jl_emptysvec);
-    global_jlvalue_to_llvm(new JuliaVariable{"jl_emptytuple", true, get_pjlvalue}, &jl_emptytuple);
-    global_jlvalue_to_llvm(new JuliaVariable{"jl_diverror_exception", true, get_pjlvalue}, &jl_diverror_exception);
-    global_jlvalue_to_llvm(new JuliaVariable{"jl_undefref_exception", true, get_pjlvalue}, &jl_undefref_exception);
+    auto size2pjlvalue = [](Type *T_size) -> Type * {
+        return get_pjlvalue(T_size->getContext());
+    };
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_true", true, size2pjlvalue}, &jl_true);
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_false", true, size2pjlvalue}, &jl_false);
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_nothing", true, size2pjlvalue}, &jl_nothing);
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_emptysvec", true, size2pjlvalue}, (jl_value_t**)&jl_emptysvec);
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_emptytuple", true, size2pjlvalue}, &jl_emptytuple);
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_diverror_exception", true, size2pjlvalue}, &jl_diverror_exception);
+    global_jlvalue_to_llvm(new JuliaVariable{"jl_undefref_exception", true, size2pjlvalue}, &jl_undefref_exception);
     add_named_global(jlgetworld_global, &jl_world_counter);
     add_named_global("__stack_chk_fail", &__stack_chk_fail);
     add_named_global(jlpgcstack_func, (void*)NULL);
@@ -8447,6 +9442,7 @@ static void init_jit_functions(void)
     add_named_global(jlatomicerror_func, &jl_atomic_error);
     add_named_global(jlthrow_func, &jl_throw);
     add_named_global(jlundefvarerror_func, &jl_undefined_var_error);
+    add_named_global(jlhasnofield_func, &jl_has_no_field_error);
     add_named_global(jlboundserrorv_func, &jl_bounds_error_ints);
     add_named_global(jlboundserror_func, &jl_bounds_error_int);
     add_named_global(jlvboundserror_func, &jl_bounds_error_tuple_int);
@@ -8459,10 +9455,12 @@ static void init_jit_functions(void)
     add_named_global(jlcheckassign_func, &jl_checked_assignment);
     add_named_global(jldeclareconst_func, &jl_declare_constant);
     add_named_global(jlgetbindingorerror_func, &jl_get_binding_or_error);
-    add_named_global(jlgetbindingwrorerror_func, &jl_get_binding_wr_or_error);
+    add_named_global(jlgetbindingwrorerror_func, &jl_get_binding_wr);
     add_named_global(jlboundp_func, &jl_boundp);
     for (auto it : builtin_func_map())
         add_named_global(it.second, it.first);
+    add_named_global(jlintrinsic_func, &jl_f_intrinsic_call);
+    add_named_global(jlgetbuiltinfptr_func, &jl_get_builtin_fptr);
     add_named_global(jlapplygeneric_func, &jl_apply_generic);
     add_named_global(jlinvoke_func, &jl_invoke);
     add_named_global(jltopeval_func, &jl_toplevel_eval);
@@ -8483,16 +9481,16 @@ static void init_jit_functions(void)
     add_named_global(jl_object_id__func, &jl_object_id_);
     add_named_global(jl_alloc_obj_func, (void*)NULL);
     add_named_global(jl_newbits_func, (void*)jl_new_bits);
-    add_named_global(jl_loopinfo_marker_func, (void*)NULL);
     add_named_global(jl_typeof_func, (void*)NULL);
     add_named_global(jl_write_barrier_func, (void*)NULL);
-    add_named_global(jl_write_barrier_binding_func, (void*)NULL);
     add_named_global(jldlsym_func, &jl_load_and_lookup);
+    add_named_global("jl_adopt_thread", &jl_adopt_thread);
     add_named_global(jlgetcfunctiontrampoline_func, &jl_get_cfunction_trampoline);
     add_named_global(jlgetnthfieldchecked_func, &jl_get_nth_field_checked);
+    add_named_global(jlfieldindex_func, &jl_field_index);
     add_named_global(diff_gc_total_bytes_func, &jl_gc_diff_total_bytes);
     add_named_global(sync_gc_total_bytes_func, &jl_gc_sync_total_bytes);
-    add_named_global(jlarray_data_owner_func, &jl_array_data_owner);
+    add_named_global(jl_allocgenericmemory, &jl_alloc_genericmemory);
     add_named_global(gcroot_flush_func, (void*)NULL);
     add_named_global(gc_preserve_begin_func, (void*)NULL);
     add_named_global(gc_preserve_end_func, (void*)NULL);
@@ -8539,12 +9537,13 @@ char jl_using_oprofile_jitevents = 0; // Non-zero if running under OProfile
 char jl_using_perf_jitevents = 0;
 #endif
 
+int jl_is_timing_passes = 0;
+
 extern "C" void jl_init_llvm(void)
 {
     jl_page_size = jl_getpagesize();
     jl_default_debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug;
-    jl_default_cgparams.generic_context = jl_nothing;
-
+    jl_default_cgparams.debug_info_level = (int) jl_options.debug_level;
     InitializeNativeTarget();
     InitializeNativeTargetAsmPrinter();
     InitializeNativeTargetAsmParser();
@@ -8553,14 +9552,20 @@ extern "C" void jl_init_llvm(void)
     // Initialize passes
     PassRegistry &Registry = *PassRegistry::getPassRegistry();
     initializeCore(Registry);
+#if JL_LLVM_VERSION < 150000
     initializeCoroutines(Registry);
+#endif
     initializeScalarOpts(Registry);
     initializeVectorization(Registry);
     initializeAnalysis(Registry);
     initializeTransformUtils(Registry);
     initializeInstCombine(Registry);
+#if JL_LLVM_VERSION >= 160000
+    // TODO
+#else
     initializeAggressiveInstCombine(Registry);
     initializeInstrumentation(Registry);
+#endif
     initializeTarget(Registry);
 #ifdef USE_POLLY
     polly::initializePollyPasses(Registry);
@@ -8576,24 +9581,38 @@ extern "C" void jl_init_llvm(void)
     clopt = llvmopts.lookup("enable-tail-merge"); // NOO TOUCHIE; NO TOUCH! See #922
     if (clopt->getNumOccurrences() == 0)
         cl::ProvidePositionalOption(clopt, "0", 1);
+    // For parity with LoopUnswitch
+    clopt = llvmopts.lookup("unswitch-threshold");
+    if (clopt->getNumOccurrences() == 0)
+        cl::ProvidePositionalOption(clopt, "100", 1);
     // if the patch adding this option has been applied, lower its limit to provide
     // better DAGCombiner performance.
     clopt = llvmopts.lookup("combiner-store-merge-dependence-limit");
     if (clopt && clopt->getNumOccurrences() == 0)
         cl::ProvidePositionalOption(clopt, "4", 1);
 
+    // we want the opaque-pointers to be opt-in, per LLVMContext, for this release
+    // so change the default value back to pre-14.x, without changing the NumOccurrences flag for it
+    clopt = llvmopts.lookup("opaque-pointers");
+    if (clopt && clopt->getNumOccurrences() == 0) {
+        clopt->addOccurrence(1, clopt->ArgStr, "false", true);
+    }
+
+    clopt = llvmopts.lookup("time-passes");
+    if (clopt && clopt->getNumOccurrences() > 0)
+        jl_is_timing_passes = 1;
+
     jl_ExecutionEngine = new JuliaOJIT();
 
     bool jl_using_gdb_jitevents = false;
     // Register GDB event listener
 #if defined(JL_DEBUG_BUILD)
     jl_using_gdb_jitevents = true;
-# else
+#endif
     const char *jit_gdb = getenv("ENABLE_GDBLISTENER");
-    if (jit_gdb && atoi(jit_gdb)) {
-        jl_using_gdb_jitevents = true;
+    if (jit_gdb) {
+        jl_using_gdb_jitevents = !!atoi(jit_gdb);
     }
-#endif
     if (jl_using_gdb_jitevents)
         jl_ExecutionEngine->enableJITDebuggingSupport();
 
@@ -8601,7 +9620,7 @@ extern "C" void jl_init_llvm(void)
     defined(JL_USE_OPROFILE_JITEVENTS) || \
     defined(JL_USE_PERF_JITEVENTS)
 #ifdef JL_USE_JITLINK
-#error "JIT profiling support (JL_USE_*_JITEVENTS) not yet available on platforms that use JITLink"
+#pragma message("JIT profiling support (JL_USE_*_JITEVENTS) not yet available on platforms that use JITLink")
 #else
     const char *jit_profiling = getenv("ENABLE_JITPROFILING");
 
@@ -8643,18 +9662,22 @@ extern "C" void jl_init_llvm(void)
     cl::PrintOptionValues();
 }
 
-extern "C" JL_DLLEXPORT void jl_init_codegen_impl(void)
+extern "C" JL_DLLEXPORT_CODEGEN void jl_init_codegen_impl(void)
 {
     jl_init_llvm();
     // Now that the execution engine exists, initialize all modules
     init_jit_functions();
 }
 
-extern "C" JL_DLLEXPORT void jl_teardown_codegen_impl()
+extern "C" JL_DLLEXPORT_CODEGEN void jl_teardown_codegen_impl() JL_NOTSAFEPOINT
 {
     // output LLVM timings and statistics
-    reportAndResetTimings();
+    // Guard against exits before we have initialized the ExecutionEngine
+    if (jl_ExecutionEngine)
+        jl_ExecutionEngine->printTimers();
     PrintStatistics();
+    JL_LOCK(&jl_codegen_lock); // TODO: If this lock gets removed reconsider
+                                    // LLVM global state/destructors (maybe a rwlock)
 }
 
 // the rest of this file are convenience functions
@@ -8725,17 +9748,21 @@ extern void jl_write_bitcode_module(void *M, char *fname) {
 
 #include <llvm-c/Core.h>
 
-extern "C" JL_DLLEXPORT jl_value_t *jl_get_libllvm_impl(void) JL_NOTSAFEPOINT
+extern "C" JL_DLLEXPORT_CODEGEN jl_value_t *jl_get_libllvm_impl(void) JL_NOTSAFEPOINT
 {
 #if defined(_OS_WINDOWS_)
     HMODULE mod;
     if (!GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, (LPCSTR)&llvm::DebugFlag, &mod))
         return jl_nothing;
-
-    char path[MAX_PATH];
-    if (!GetModuleFileNameA(mod, path, sizeof(path)))
+    wchar_t path16[MAX_PATH];
+    DWORD n16 = GetModuleFileNameW(mod, path16, MAX_PATH);
+    if (n16 <= 0)
+        return jl_nothing;
+    path16[n16++] = 0;
+    char path8[MAX_PATH * 3];
+    if (!WideCharToMultiByte(CP_UTF8, 0, path16, n16, path8, MAX_PATH * 3, NULL, NULL))
         return jl_nothing;
-    return (jl_value_t*) jl_symbol(path);
+    return (jl_value_t*) jl_symbol(path8);
 #else
     Dl_info dli;
     if (!dladdr((void*)LLVMContextCreate, &dli))
diff --git a/src/codegen_shared.h b/src/codegen_shared.h
deleted file mode 100644
index 9bb81748e7c54..0000000000000
--- a/src/codegen_shared.h
+++ /dev/null
@@ -1,331 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#include <utility>
-#include <llvm/ADT/ArrayRef.h>
-#include <llvm/Support/Debug.h>
-#include <llvm/IR/DebugLoc.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/MDBuilder.h>
-#include "julia.h"
-
-#define STR(csym)           #csym
-#define XSTR(csym)          STR(csym)
-
-enum AddressSpace {
-    Generic = 0,
-    Tracked = 10,
-    Derived = 11,
-    CalleeRooted = 12,
-    Loaded = 13,
-    FirstSpecial = Tracked,
-    LastSpecial = Loaded,
-};
-
-static inline auto getSizeTy(llvm::LLVMContext &ctxt) {
-    if (sizeof(size_t) > sizeof(uint32_t)) {
-        return llvm::Type::getInt64Ty(ctxt);
-    } else {
-        return llvm::Type::getInt32Ty(ctxt);
-    }
-}
-
-namespace JuliaType {
-    static inline llvm::StructType* get_jlvalue_ty(llvm::LLVMContext &C) {
-        return llvm::StructType::get(C);
-    }
-
-    static inline llvm::PointerType* get_pjlvalue_ty(llvm::LLVMContext &C) {
-        return llvm::PointerType::get(get_jlvalue_ty(C), 0);
-    }
-
-    static inline llvm::PointerType* get_prjlvalue_ty(llvm::LLVMContext &C) {
-        return llvm::PointerType::get(get_jlvalue_ty(C), AddressSpace::Tracked);
-    }
-
-    static inline llvm::PointerType* get_ppjlvalue_ty(llvm::LLVMContext &C) {
-        return llvm::PointerType::get(get_pjlvalue_ty(C), 0);
-    }
-
-    static inline llvm::PointerType* get_pprjlvalue_ty(llvm::LLVMContext &C) {
-        return llvm::PointerType::get(get_prjlvalue_ty(C), 0);
-    }
-
-    static inline auto get_jlfunc_ty(llvm::LLVMContext &C) {
-        auto T_prjlvalue = get_prjlvalue_ty(C);
-        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
-        return llvm::FunctionType::get(T_prjlvalue, {
-                T_prjlvalue,  // function
-                T_pprjlvalue, // args[]
-                llvm::Type::getInt32Ty(C)}, // nargs
-            false);
-    }
-
-    static inline auto get_jlfunc2_ty(llvm::LLVMContext &C) {
-        auto T_prjlvalue = get_prjlvalue_ty(C);
-        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
-        return llvm::FunctionType::get(T_prjlvalue, {
-                T_prjlvalue,  // function
-                T_pprjlvalue, // args[]
-                llvm::Type::getInt32Ty(C),
-                T_prjlvalue,  // linfo
-                }, // nargs
-            false);
-    }
-
-    static inline auto get_jlfuncparams_ty(llvm::LLVMContext &C) {
-        auto T_prjlvalue = get_prjlvalue_ty(C);
-        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
-        return llvm::FunctionType::get(T_prjlvalue, {
-                T_prjlvalue,  // function
-                T_pprjlvalue, // args[]
-                llvm::Type::getInt32Ty(C),
-                T_pprjlvalue,  // linfo->sparam_vals
-                }, // nargs
-            false);
-    }
-
-    static inline auto get_voidfunc_ty(llvm::LLVMContext &C) {
-        return llvm::FunctionType::get(llvm::Type::getVoidTy(C), /*isVarArg*/false);
-    }
-
-    static inline auto get_pvoidfunc_ty(llvm::LLVMContext &C) {
-        return get_voidfunc_ty(C)->getPointerTo();
-    }
-}
-
-// return how many Tracked pointers are in T (count > 0),
-// and if there is anything else in T (all == false)
-struct CountTrackedPointers {
-    unsigned count = 0;
-    bool all = true;
-    bool derived = false;
-    CountTrackedPointers(llvm::Type *T);
-};
-
-unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::Type *DTy, llvm::IRBuilder<> &irbuilder);
-std::vector<llvm::Value*> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
-
-static inline void llvm_dump(llvm::Value *v)
-{
-    v->print(llvm::dbgs(), true);
-    llvm::dbgs() << "\n";
-}
-
-static inline void llvm_dump(llvm::Type *v)
-{
-    v->print(llvm::dbgs(), true);
-    llvm::dbgs() << "\n";
-}
-
-static inline void llvm_dump(llvm::Function *f)
-{
-    f->print(llvm::dbgs(), nullptr, false, true);
-}
-
-static inline void llvm_dump(llvm::Module *m)
-{
-    m->print(llvm::dbgs(), nullptr);
-}
-
-static inline void llvm_dump(llvm::Metadata *m)
-{
-    m->print(llvm::dbgs());
-    llvm::dbgs() << "\n";
-}
-
-static inline void llvm_dump(llvm::DebugLoc *dbg)
-{
-    dbg->print(llvm::dbgs());
-    llvm::dbgs() << "\n";
-}
-
-static inline std::pair<llvm::MDNode*,llvm::MDNode*> tbaa_make_child_with_context(llvm::LLVMContext &ctxt, const char *name, llvm::MDNode *parent=nullptr, bool isConstant=false)
-{
-    llvm::MDBuilder mbuilder(ctxt);
-    llvm::MDNode *jtbaa = mbuilder.createTBAARoot("jtbaa");
-    llvm::MDNode *tbaa_root = mbuilder.createTBAAScalarTypeNode("jtbaa", jtbaa);
-    llvm::MDNode *scalar = mbuilder.createTBAAScalarTypeNode(name, parent ? parent : tbaa_root);
-    llvm::MDNode *n = mbuilder.createTBAAStructTagNode(scalar, scalar, 0, isConstant);
-    return std::make_pair(n, scalar);
-}
-
-static inline llvm::MDNode *get_tbaa_const(llvm::LLVMContext &ctxt) {
-    return tbaa_make_child_with_context(ctxt, "jtbaa_const", nullptr, true).first;
-}
-
-static inline llvm::Instruction *tbaa_decorate(llvm::MDNode *md, llvm::Instruction *inst)
-{
-    inst->setMetadata(llvm::LLVMContext::MD_tbaa, md);
-    if (llvm::isa<llvm::LoadInst>(inst) && md && md == get_tbaa_const(md->getContext()))
-        inst->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(md->getContext(), llvm::None));
-    return inst;
-}
-
-// bitcast a value, but preserve its address space when dealing with pointer types
-static inline llvm::Value *emit_bitcast_with_builder(llvm::IRBuilder<> &builder, llvm::Value *v, llvm::Type *jl_value)
-{
-    using namespace llvm;
-    if (isa<PointerType>(jl_value) &&
-        v->getType()->getPointerAddressSpace() != jl_value->getPointerAddressSpace()) {
-        // Cast to the proper address space
-        Type *jl_value_addr = PointerType::getWithSamePointeeType(cast<PointerType>(jl_value), v->getType()->getPointerAddressSpace());
-        return builder.CreateBitCast(v, jl_value_addr);
-    }
-    else {
-        return builder.CreateBitCast(v, jl_value);
-    }
-}
-
-// Get PTLS through current task.
-static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Value *current_task, llvm::MDNode *tbaa)
-{
-    using namespace llvm;
-    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
-    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
-    auto T_size = builder.GetInsertBlock()->getModule()->getDataLayout().getIntPtrType(builder.getContext());
-    const int ptls_offset = offsetof(jl_task_t, ptls);
-    llvm::Value *pptls = builder.CreateInBoundsGEP(
-        T_pjlvalue, current_task,
-        ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
-        "ptls_field");
-    LoadInst *ptls_load = builder.CreateAlignedLoad(T_pjlvalue,
-        emit_bitcast_with_builder(builder, pptls, T_ppjlvalue), Align(sizeof(void *)), "ptls_load");
-    // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c.
-    tbaa_decorate(tbaa, ptls_load);
-    // Using `CastInst::Create` to get an `Instruction*` without explicit cast:
-    auto ptls = CastInst::Create(Instruction::BitCast, ptls_load, T_ppjlvalue, "ptls");
-    builder.Insert(ptls);
-    return ptls;
-}
-
-// Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14.
-//
-// Once we no longer support LLVM < 14, these can be mechanically removed by
-// translating foo(Bar, …) into Bar->foo(…) resp. Bar.foo(…).
-namespace {
-using namespace llvm;
-
-inline void addFnAttr(CallInst *Target, Attribute::AttrKind Attr)
-{
-#if JL_LLVM_VERSION >= 140000
-    Target->addFnAttr(Attr);
-#else
-    Target->addAttribute(AttributeList::FunctionIndex, Attr);
-#endif
-}
-
-template<class T, class A>
-inline void addRetAttr(T *Target, A Attr)
-{
-#if JL_LLVM_VERSION >= 140000
-    Target->addRetAttr(Attr);
-#else
-    Target->addAttribute(AttributeList::ReturnIndex, Attr);
-#endif
-}
-
-inline void addAttributeAtIndex(Function *F, unsigned Index, Attribute Attr)
-{
-#if JL_LLVM_VERSION >= 140000
-    F->addAttributeAtIndex(Index, Attr);
-#else
-    F->addAttribute(Index, Attr);
-#endif
-}
-
-inline AttributeSet getFnAttrs(const AttributeList &Attrs)
-{
-#if JL_LLVM_VERSION >= 140000
-    return Attrs.getFnAttrs();
-#else
-    return Attrs.getFnAttributes();
-#endif
-}
-
-inline AttributeSet getRetAttrs(const AttributeList &Attrs)
-{
-#if JL_LLVM_VERSION >= 140000
-    return Attrs.getRetAttrs();
-#else
-    return Attrs.getRetAttributes();
-#endif
-}
-
-inline bool hasFnAttr(const AttributeList &L, Attribute::AttrKind Kind)
-{
-#if JL_LLVM_VERSION >= 140000
-    return L.hasFnAttr(Kind);
-#else
-    return L.hasAttribute(AttributeList::FunctionIndex, Kind);
-#endif
-}
-
-inline AttributeList addAttributeAtIndex(const AttributeList &L, LLVMContext &C,
-                                         unsigned Index, Attribute::AttrKind Kind)
-{
-#if JL_LLVM_VERSION >= 140000
-    return L.addAttributeAtIndex(C, Index, Kind);
-#else
-    return L.addAttribute(C, Index, Kind);
-#endif
-}
-
-inline AttributeList addAttributeAtIndex(const AttributeList &L, LLVMContext &C,
-                                         unsigned Index, Attribute Attr)
-{
-#if JL_LLVM_VERSION >= 140000
-    return L.addAttributeAtIndex(C, Index, Attr);
-#else
-    return L.addAttribute(C, Index, Attr);
-#endif
-}
-
-inline AttributeList addAttributesAtIndex(const AttributeList &L, LLVMContext &C,
-                                          unsigned Index, const AttrBuilder &Builder)
-{
-#if JL_LLVM_VERSION >= 140000
-    return L.addAttributesAtIndex(C, Index, Builder);
-#else
-    return L.addAttributes(C, Index, Builder);
-#endif
-}
-
-inline AttributeList addFnAttribute(const AttributeList &L, LLVMContext &C,
-                                    Attribute::AttrKind Kind)
-{
-#if JL_LLVM_VERSION >= 140000
-    return L.addFnAttribute(C, Kind);
-#else
-    return L.addAttribute(C, AttributeList::FunctionIndex, Kind);
-#endif
-}
-
-inline AttributeList addRetAttribute(const AttributeList &L, LLVMContext &C,
-                                     Attribute::AttrKind Kind)
-{
-#if JL_LLVM_VERSION >= 140000
-    return L.addRetAttribute(C, Kind);
-#else
-    return L.addAttribute(C, AttributeList::ReturnIndex, Kind);
-#endif
-}
-
-inline bool hasAttributesAtIndex(const AttributeList &L, unsigned Index)
-{
-#if JL_LLVM_VERSION >= 140000
-    return L.hasAttributesAtIndex(Index);
-#else
-    return L.hasAttributes(Index);
-#endif
-}
-
-inline Attribute getAttributeAtIndex(const AttributeList &L, unsigned Index, Attribute::AttrKind Kind)
-{
-#if JL_LLVM_VERSION >= 140000
-    return L.getAttributeAtIndex(Index, Kind);
-#else
-    return L.getAttribute(Index, Kind);
-#endif
-}
-
-}
diff --git a/src/common_symbols1.inc b/src/common_symbols1.inc
index 7d445289e80fa..f54be52729a4f 100644
--- a/src/common_symbols1.inc
+++ b/src/common_symbols1.inc
@@ -1,99 +1,92 @@
 jl_symbol("="),
 jl_symbol("getproperty"),
-jl_symbol("apply_type"),
 jl_symbol("getfield"),
+jl_symbol("apply_type"),
+jl_symbol("==="),
 jl_symbol("getindex"),
 jl_symbol("convert"),
-jl_symbol("==="),
-jl_symbol("iterate"),
 jl_symbol("=="),
 jl_symbol("new"),
-jl_symbol("foreigncall"),
 jl_symbol("int.jl"),
-jl_symbol("throw"),
-jl_symbol("nothing"),
-jl_symbol("essentials.jl"),
 jl_symbol("+"),
-jl_symbol("unsafe_convert"),
+jl_symbol("boot.jl"),
+jl_symbol("essentials.jl"),
+jl_symbol("ccall"),
+jl_symbol("foreigncall"),
+jl_symbol("iterate"),
 jl_symbol("not_int"),
+jl_symbol("Base.jl"),
 jl_symbol("-"),
-jl_symbol("boot.jl"),
-jl_symbol("number.jl"),
+jl_symbol("throw"),
+jl_symbol("promotion.jl"),
 jl_symbol("length"),
 jl_symbol("<"),
-jl_symbol("cconvert"),
-jl_symbol("Base.jl"),
-jl_symbol("promotion.jl"),
-jl_symbol("tuple.jl"),
-jl_symbol("static_parameter"),
-jl_symbol("isempty"),
-jl_symbol("<="),
-jl_symbol("array.jl"),
+jl_symbol("isa"),
 jl_symbol("operators.jl"),
-jl_symbol("NamedTuple"),
+jl_symbol("number.jl"),
+jl_symbol("unsafe_convert"),
+jl_symbol("tuple.jl"),
+jl_symbol("nothing"),
 jl_symbol("bitcast"),
-jl_symbol("!"),
+jl_symbol("NamedTuple"),
 jl_symbol("indexed_iterate"),
-jl_symbol("sle_int"),
 jl_symbol("bool.jl"),
-jl_symbol("Ptr"),
-jl_symbol("size"),
+jl_symbol("!"),
+jl_symbol("isempty"),
+jl_symbol("<="),
+jl_symbol("cconvert"),
 jl_symbol("add_int"),
+jl_symbol("static_parameter"),
+jl_symbol("array.jl"),
 jl_symbol("slt_int"),
-jl_symbol("*"),
-jl_symbol("range.jl"),
-jl_symbol("abstractarray.jl"),
 jl_symbol("!="),
-jl_symbol("isa"),
-jl_symbol("setindex!"),
-jl_symbol("string"),
-jl_symbol("ifelse"),
-jl_symbol(":"),
-jl_symbol(">"),
-jl_symbol("_apply_iterate"),
 jl_symbol("UInt64"),
+jl_symbol("range.jl"),
+jl_symbol("sle_int"),
+jl_symbol("size"),
 jl_symbol("&"),
-jl_symbol("max"),
+jl_symbol("abstractarray.jl"),
 jl_symbol("rem"),
-jl_symbol("sub_int"),
-jl_symbol(">="),
-jl_symbol("UInt8"),
-jl_symbol("iterators.jl"),
+jl_symbol(">"),
 jl_symbol("Int64"),
-jl_symbol("pairs"),
+jl_symbol("sub_int"),
+jl_symbol("*"),
 jl_symbol("and_int"),
+jl_symbol("string"),
+jl_symbol(">="),
+jl_symbol("Ptr"),
+jl_symbol("toInt64"),
 jl_symbol("last"),
-jl_symbol("typeof"),
-jl_symbol("arrayref"),
 jl_symbol("pointer.jl"),
-jl_symbol("toInt64"),
-jl_symbol("arraylen"),
+jl_symbol("reinterpret"),
+jl_symbol("first"),
+jl_symbol("pairs"),
+jl_symbol("_apply_iterate"),
 jl_symbol("typeassert"),
+jl_symbol(":"),
+jl_symbol("UInt8"),
+jl_symbol("setindex!"),
+jl_symbol("isdefined"),
+jl_symbol("typeof"),
+jl_symbol("promote"),
+jl_symbol("kwcall"),
+jl_symbol("unsigned"),
+jl_symbol("_promote"),
+jl_symbol("toUInt64"),
 jl_symbol("map"),
-jl_symbol("kwfunc"),
-jl_symbol("ArgumentError"),
 jl_symbol("lshr_int"),
+jl_symbol("gc_preserve_begin"),
+jl_symbol("gc_preserve_end"),
+jl_symbol("trunc_int"),
+jl_symbol("ArgumentError"),
 jl_symbol("axes"),
-jl_symbol("reinterpret"),
+jl_symbol("ult_int"),
+jl_symbol("UInt"),
+jl_symbol("zext_int"),
+jl_symbol("strings/string.jl"),
+jl_symbol("ifelse"),
 jl_symbol("Array"),
-jl_symbol("first"),
-jl_symbol("trunc_int"),
-jl_symbol("OneTo"),
-jl_symbol("haskey"),
-jl_symbol("Int"),
-jl_symbol("oneto"),
 jl_symbol("eq_int"),
 jl_symbol("throw_inexacterror"),
-jl_symbol("toUInt64"),
-jl_symbol("arraysize"),
-jl_symbol("UInt"),
+jl_symbol("|"),
 jl_symbol("setproperty!"),
-jl_symbol("check_top_bit"),
-jl_symbol("promote"),
-jl_symbol("unsigned"),
-jl_symbol("is_top_bit_set"),
-jl_symbol("structdiff"),
-jl_symbol("undef"),
-jl_symbol("sizeof"),
-jl_symbol("String"),
-jl_symbol("namedtuple.jl"),
diff --git a/src/common_symbols2.inc b/src/common_symbols2.inc
index c9f4e41b83e33..a4583fe39f186 100644
--- a/src/common_symbols2.inc
+++ b/src/common_symbols2.inc
@@ -1,254 +1,248 @@
-jl_symbol("pop"),
-jl_symbol("inbounds"),
-jl_symbol("strings/string.jl"),
-jl_symbol("Ref"),
-jl_symbol("Vector"),
-jl_symbol("kwerr"),
-jl_symbol("_promote"),
 jl_symbol("sext_int"),
-jl_symbol("pointer"),
-jl_symbol("similar"),
-jl_symbol("arrayset"),
+jl_symbol("String"),
+jl_symbol("Int"),
+jl_symbol("iterators.jl"),
+jl_symbol("Colon"),
+jl_symbol("unchecked_oneto"),
+jl_symbol("structdiff"),
+jl_symbol("UnitRange"),
+jl_symbol("unitrange_last"),
+jl_symbol("sizeof"),
+jl_symbol("check_top_bit"),
+jl_symbol("is_top_bit_set"),
+jl_symbol("data"),
+jl_symbol("kwerr"),
 jl_symbol("axes1"),
 jl_symbol("eachindex"),
-jl_symbol("|"),
-jl_symbol("ult_int"),
-jl_symbol("lastindex"),
-jl_symbol("setfield!"),
-jl_symbol("UnitRange"),
-jl_symbol("push!"),
+jl_symbol("or_int"),
 jl_symbol("Bool"),
-jl_symbol("Colon"),
+jl_symbol("setfield!"),
 jl_symbol("fieldtype"),
-jl_symbol("unitrange_last"),
-jl_symbol("bitarray.jl"),
-jl_symbol("<<"),
-jl_symbol("zext_int"),
-jl_symbol("Tuple"),
+jl_symbol("Ref"),
+jl_symbol("pointer"),
+jl_symbol("max"),
+jl_symbol("push!"),
+jl_symbol("lastindex"),
 jl_symbol("reflection.jl"),
-jl_symbol("TypeError"),
-jl_symbol("print"),
-jl_symbol("eltype"),
+jl_symbol("<<"),
+jl_symbol("similar"),
+jl_symbol("Vector"),
+jl_symbol("UInt32"),
 jl_symbol(">>"),
-jl_symbol("strings/basic.jl"),
-jl_symbol("gc_preserve_begin"),
-jl_symbol("require_one_based_indexing"),
-jl_symbol("gc_preserve_end"),
-jl_symbol("DimensionMismatch"),
-jl_symbol("indices.jl"),
-jl_symbol("Cvoid"),
-jl_symbol("oftype"),
-jl_symbol("zero"),
-jl_symbol("float.jl"),
-jl_symbol("Any"),
-jl_symbol("checkbounds"),
-jl_symbol("or_int"),
-jl_symbol("isdefined"),
 jl_symbol("dict.jl"),
+jl_symbol("checkbounds"),
+jl_symbol("undef"),
+jl_symbol("jl_string_ptr"),
+jl_symbol("error"),
 jl_symbol("strings/io.jl"),
-jl_symbol("shl_int"),
-jl_symbol("copy"),
-jl_symbol("macro expansion"),
-jl_symbol("abstractdict.jl"),
+jl_symbol("strings/substring.jl"),
+jl_symbol("bitarray.jl"),
+jl_symbol("strings/basic.jl"),
+jl_symbol("merge"),
+jl_symbol("TypeError"),
+jl_symbol("keyword argument"),
 jl_symbol("in"),
-jl_symbol("io.jl"),
-jl_symbol("BlasInt"),
-jl_symbol("Float64"),
+jl_symbol("print"),
+jl_symbol("macro expansion"),
 jl_symbol("mul_int"),
-jl_symbol("UInt32"),
+jl_symbol("shl_int"),
 jl_symbol("C_NULL"),
+jl_symbol("oftype"),
+jl_symbol("_growend!"),
+jl_symbol("Any"),
+jl_symbol("Tuple"),
+jl_symbol("float.jl"),
+jl_symbol("ncodeunits"),
 jl_symbol("Integer"),
+jl_symbol("io.jl"),
+jl_symbol("eltype"),
+jl_symbol("name"),
+jl_symbol("parent"),
 jl_symbol("!=="),
-jl_symbol("merge"),
-jl_symbol("BoundsError"),
-jl_symbol("broadcasted"),
-jl_symbol("Cint"),
-jl_symbol("min"),
-jl_symbol("libblastrampoline"),
 jl_symbol("iszero"),
+jl_symbol("min"),
+jl_symbol("DimensionMismatch"),
 jl_symbol("refvalue.jl"),
-jl_symbol("stride"),
-jl_symbol("error"),
-jl_symbol("ncodeunits"),
-jl_symbol("LinearIndices"),
-jl_symbol("Clong"),
-jl_symbol("pair.jl"),
-jl_symbol("_growend!"),
-jl_symbol("char.jl"),
-jl_symbol("copyto!"),
-jl_symbol("get"),
-jl_symbol("tail"),
-jl_symbol("real"),
 jl_symbol("Union"),
-jl_symbol("multidimensional.jl"),
-jl_symbol("enter"),
-jl_symbol("leave"),
+jl_symbol("BlasInt"),
+jl_symbol("unsafe_load"),
+jl_symbol("indices.jl"),
+jl_symbol("x"),
+jl_symbol("require_one_based_indexing"),
+jl_symbol("namedtuple.jl"),
+jl_symbol("tail"),
+jl_symbol("Float64"),
+jl_symbol("head"),
+jl_symbol("Cvoid"),
+jl_symbol("copy"),
+jl_symbol("libblastrampoline"),
+jl_symbol("get"),
+jl_symbol("neg_int"),
+jl_symbol("stop"),
+jl_symbol("zero"),
 jl_symbol("add_ptr"),
-jl_symbol("chkstride1"),
+jl_symbol("toUInt32"),
+jl_symbol("ptr"),
+jl_symbol("char.jl"),
+jl_symbol("trunc"),
+jl_symbol("not_atomic"),
+jl_symbol("enter"),
+jl_symbol("Pair"),
+jl_symbol("jl_value_ptr"),
 jl_symbol("Expr"),
-jl_symbol("write"),
-jl_symbol("broadcast.jl"),
+jl_symbol("broadcasted"),
+jl_symbol("pointerref"),
+jl_symbol("multidimensional.jl"),
+jl_symbol("Generator"),
+jl_symbol("leave"),
+jl_symbol("memoryref"),
 jl_symbol("show.jl"),
+jl_symbol("pointer_from_objref"),
+jl_symbol("memoryrefget"),
+jl_symbol("reduce.jl"),
+jl_symbol("stride"),
+jl_symbol("pair.jl"),
+jl_symbol("_string"),
+jl_symbol("cmem.jl"),
+jl_symbol("generator.jl"),
+jl_symbol("broadcast.jl"),
 jl_symbol("none"),
-jl_symbol("Generator"),
+jl_symbol("copyto!"),
+jl_symbol("chkstride1"),
+jl_symbol("value"),
+jl_symbol("write"),
+jl_symbol("identity"),
+jl_symbol("real"),
+jl_symbol("start"),
+jl_symbol("Cint"),
+jl_symbol("fill!"),
+jl_symbol("checkindex"),
+jl_symbol("keys"),
+jl_symbol("BoundsError"),
+jl_symbol("vals"),
+jl_symbol("Symbol"),
+jl_symbol("strings/util.jl"),
 jl_symbol("Int32"),
-jl_symbol("materialize"),
+jl_symbol("ht_keyindex"),
+jl_symbol("io"),
+jl_symbol("~"),
+jl_symbol("AssertionError"),
+jl_symbol("abstractdict.jl"),
 jl_symbol("show"),
-jl_symbol("lock"),
-jl_symbol("unsafe_load"),
-jl_symbol("gmp.jl"),
 jl_symbol("mpfr.jl"),
-jl_symbol("Symbol"),
-jl_symbol("Pair"),
-jl_symbol("resize!"),
-jl_symbol("neg_int"),
-jl_symbol("strings/substring.jl"),
-jl_symbol("AssertionError"),
-jl_symbol("identity"),
-jl_symbol("one"),
-jl_symbol("reduce.jl"),
-jl_symbol("libcholmod"),
 jl_symbol("isless"),
+jl_symbol("args"),
+jl_symbol("lock"),
 jl_symbol("reducedim.jl"),
+jl_symbol("gmp.jl"),
+jl_symbol("offset"),
+jl_symbol("resize!"),
+jl_symbol("throw_boundserror"),
+jl_symbol("Clong"),
+jl_symbol("_call_latest"),
+jl_symbol("argtail"),
+jl_symbol("compiler/ssair/ir.jl"),
+jl_symbol("sub_ptr"),
+jl_symbol("materialize"),
 jl_symbol("checksquare"),
-jl_symbol("sort.jl"),
-jl_symbol("generator.jl"),
-jl_symbol("pointer_from_objref"),
-jl_symbol("Float32"),
-jl_symbol("chklapackerror"),
-jl_symbol("parent"),
-jl_symbol("task.jl"),
+jl_symbol("LinearIndices"),
+jl_symbol("ule_int"),
+jl_symbol("dict"),
 jl_symbol("div"),
-jl_symbol("cholmod_common"),
-jl_symbol("ht_keyindex"),
-jl_symbol("pop_exception"),
-jl_symbol("c.jl"),
-jl_symbol("firstindex"),
-jl_symbol("some.jl"),
-jl_symbol("iobuffer.jl"),
-jl_symbol("sub_ptr"),
-jl_symbol("vect"),
-jl_symbol("unsafe_string"),
-jl_symbol("llvmcall"),
-jl_symbol("checkindex"),
-jl_symbol("_call_latest"),
+jl_symbol("chklapackerror"),
+jl_symbol("count"),
+jl_symbol("Float32"),
+jl_symbol("genericmemory.jl"),
+jl_symbol("print_to_string"),
 jl_symbol("rethrow"),
-jl_symbol("pointerref"),
+jl_symbol("sort.jl"),
+jl_symbol("boundscheck"),
 jl_symbol("println"),
-jl_symbol("keys"),
-jl_symbol("RefValue"),
+jl_symbol("loading.jl"),
+jl_symbol("collect"),
+jl_symbol("ashr_int"),
 jl_symbol("_expr"),
-jl_symbol("toUInt32"),
-jl_symbol("ismissing"),
-jl_symbol("throw_boundserror"),
-jl_symbol("IteratorSize"),
-jl_symbol("iddict.jl"),
+jl_symbol("iobuffer.jl"),
+jl_symbol("DataType"),
+jl_symbol("Dict"),
+jl_symbol("unsafe_string"),
+jl_symbol("RefValue"),
+jl_symbol("step"),
 jl_symbol("to_shape"),
-jl_symbol("Csize_t"),
-jl_symbol("~"),
-jl_symbol("argtail"),
-jl_symbol("include"),
-jl_symbol("set.jl"),
-jl_symbol("isequal"),
+jl_symbol("pop_exception"),
+jl_symbol("Memory"),
+jl_symbol("KeyError"),
+jl_symbol("chunks"),
 jl_symbol("refpointer.jl"),
-jl_symbol("=>"),
-jl_symbol("Val"),
-jl_symbol("Base"),
+jl_symbol("llvmcall"),
+jl_symbol("c.jl"),
+jl_symbol("set.jl"),
+jl_symbol("abs"),
+jl_symbol("checked_trunc_uint"),
+jl_symbol("Type"),
 jl_symbol("%"),
-jl_symbol("collect"),
-jl_symbol("Type##kw"),
-jl_symbol("typemax"),
-jl_symbol("fill!"),
-jl_symbol("ule_int"),
-jl_symbol("atomics.jl"),
-jl_symbol("libgit2"),
+jl_symbol("len"),
 jl_symbol("BigFloat"),
-jl_symbol("ashr_int"),
-jl_symbol("boundscheck"),
-jl_symbol("abs"),
-jl_symbol("^"),
-jl_symbol("ensure_initialized"),
-jl_symbol("_array_for"),
-jl_symbol("strings/util.jl"),
-jl_symbol("Dict"),
+jl_symbol("isequal"),
+jl_symbol("vect"),
+jl_symbol("sprint"),
+jl_symbol("mode"),
+jl_symbol("expr.jl"),
 jl_symbol("Nothing"),
-jl_symbol("compiler/ssair/ir.jl"),
+jl_symbol("Val"),
+jl_symbol("IteratorSize"),
+jl_symbol("=>"),
+jl_symbol("haskey"),
+jl_symbol("iddict.jl"),
 jl_symbol("unsafe_write"),
-jl_symbol("util.jl"),
+jl_symbol("val"),
+jl_symbol("flags"),
+jl_symbol("task.jl"),
+jl_symbol("UnionAll"),
+jl_symbol("memset"),
+jl_symbol("xor"),
+jl_symbol("jl_alloc_genericmemory"),
+jl_symbol("uplo"),
 jl_symbol("toInt32"),
-jl_symbol("loading.jl"),
-jl_symbol("value"),
-jl_symbol("expr.jl"),
-jl_symbol("print_to_string"),
+jl_symbol("Base"),
+jl_symbol("atomics.jl"),
+jl_symbol("uuid"),
+jl_symbol("one"),
+jl_symbol("math.jl"),
+jl_symbol("position"),
+jl_symbol("typemax"),
+jl_symbol("all"),
+jl_symbol("error.jl"),
+jl_symbol("path.jl"),
+jl_symbol("^"),
+jl_symbol("nextind"),
+jl_symbol("include"),
 jl_symbol("the_exception"),
-jl_symbol("nonzeros"),
-jl_symbol("<:"),
-jl_symbol("KeyError"),
-jl_symbol("xor"),
-jl_symbol("logging.jl"),
+jl_symbol("ensure_initialized"),
+jl_symbol("Const"),
+jl_symbol("UInt128"),
+jl_symbol("codeunit"),
 jl_symbol("stat.jl"),
-jl_symbol("close"),
-jl_symbol("adjoint"),
-jl_symbol("meta"),
-jl_symbol("path.jl"),
-jl_symbol("round"),
-jl_symbol("Cstring"),
-jl_symbol("SizeUnknown"),
-jl_symbol("esc"),
-jl_symbol("missing.jl"),
+jl_symbol("gcutils.jl"),
+jl_symbol("UndefRefError"),
+jl_symbol("diag"),
 jl_symbol("throw_undef_if_not"),
-jl_symbol("error.jl"),
-jl_symbol("Type"),
-jl_symbol("mul!"),
-jl_symbol("math.jl"),
-jl_symbol("unsafe_trunc"),
 jl_symbol("missing"),
-jl_symbol("subarray.jl"),
-jl_symbol("noinline"),
 jl_symbol("isnan"),
-jl_symbol("ldiv!"),
-jl_symbol("DataType"),
-jl_symbol("codeunit"),
-jl_symbol("condition.jl"),
-jl_symbol("step"),
-jl_symbol("copyast"),
-jl_symbol("bitset.jl"),
-jl_symbol("float"),
+jl_symbol("Enums.jl"),
+jl_symbol("logging.jl"),
+jl_symbol("_deleteend!"),
+jl_symbol("indices"),
+jl_symbol("compiler/utilities.jl"),
+jl_symbol("Pairs"),
+jl_symbol("<:"),
+jl_symbol("compiler/tfuncs.jl"),
+jl_symbol("close"),
+jl_symbol("subarray.jl"),
 jl_symbol("fastmath.jl"),
+jl_symbol("invokelatest"),
+jl_symbol("jl_array_del_end"),
 jl_symbol("_mod64"),
-jl_symbol("_div64"),
-jl_symbol("all"),
-jl_symbol("parse"),
-jl_symbol("joinpath"),
-jl_symbol("nextind"),
+jl_symbol("parameters"),
+jl_symbol("monotonic"),
 jl_symbol("regex.jl"),
-jl_symbol("Enums.jl"),
-jl_symbol("promote_type"),
-jl_symbol("Cdouble"),
-jl_symbol("ComplexF32"),
-jl_symbol("read"),
-jl_symbol("intfuncs.jl"),
-jl_symbol("Complex"),
-jl_symbol("_deleteend!"),
-jl_symbol("stat"),
-jl_symbol("UnionAll"),
-jl_symbol("special/trig.jl"),
-jl_symbol("UInt128"),
-jl_symbol("_copyto_impl!"),
-jl_symbol("stream.jl"),
-jl_symbol("lmul!"),
-jl_symbol("repr"),
-jl_symbol("promote_rule"),
-jl_symbol("xor_int"),
-jl_symbol("complex.jl"),
-jl_symbol("transpose"),
-jl_symbol(">>>"),
-jl_symbol("cholmod_sparse"),
-jl_symbol("filemode"),
-jl_symbol("ComplexF64"),
-jl_symbol("SparseMatrixCSC"),
-jl_symbol("view"),
-jl_symbol("GitError"),
-jl_symbol("zeros"),
-jl_symbol("InexactError"),
-jl_symbol("LogLevel"),
diff --git a/src/coverage.cpp b/src/coverage.cpp
index 46363a7e9ac01..100a4c66322bd 100644
--- a/src/coverage.cpp
+++ b/src/coverage.cpp
@@ -17,16 +17,16 @@ using namespace llvm;
 
 static int codegen_imaging_mode(void)
 {
-    return jl_options.image_codegen || (jl_generating_output() && !jl_options.incremental);
+    return jl_options.image_codegen || (jl_generating_output() && jl_options.use_pkgimages);
 }
 
 // Logging for code coverage and memory allocation
 
 const int logdata_blocksize = 32; // target getting nearby lines in the same general cache area and reducing calls to malloc by chunking
 typedef uint64_t logdata_block[logdata_blocksize];
-typedef StringMap< std::vector<logdata_block*> > logdata_t;
+typedef StringMap< SmallVector<logdata_block*, 0> > logdata_t;
 
-static uint64_t *allocLine(std::vector<logdata_block*> &vec, int line)
+static uint64_t *allocLine(SmallVector<logdata_block*, 0> &vec, int line)
 {
     unsigned block = line / logdata_blocksize;
     line = line % logdata_blocksize;
@@ -63,7 +63,7 @@ extern "C" JL_DLLEXPORT void jl_coverage_visit_line(const char *filename_, size_
     StringRef filename = StringRef(filename_, len_filename);
     if (codegen_imaging_mode() || filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
-    std::vector<logdata_block*> &vec = coverageData[filename];
+    SmallVector<logdata_block*, 0> &vec = coverageData[filename];
     uint64_t *ptr = allocLine(vec, line);
     (*ptr)++;
 }
@@ -82,8 +82,8 @@ extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
 {
     logdata_t::iterator it = mallocData.begin();
     for (; it != mallocData.end(); it++) {
-        std::vector<logdata_block*> &bytes = (*it).second;
-        std::vector<logdata_block*>::iterator itb;
+        SmallVector<logdata_block*, 0> &bytes = (*it).second;
+        SmallVector<logdata_block*, 0>::iterator itb;
         for (itb = bytes.begin(); itb != bytes.end(); itb++) {
             if (*itb) {
                 logdata_block &data = **itb;
@@ -104,7 +104,7 @@ static void write_log_data(logdata_t &logData, const char *extension)
     logdata_t::iterator it = logData.begin();
     for (; it != logData.end(); it++) {
         std::string filename(it->first());
-        std::vector<logdata_block*> &values = it->second;
+        SmallVector<logdata_block*, 0> &values = it->second;
         if (!values.empty()) {
             if (!jl_isabspath(filename.c_str()))
                 filename = base + filename;
@@ -160,7 +160,7 @@ static void write_lcov_data(logdata_t &logData, const std::string &outfile)
     logdata_t::iterator it = logData.begin();
     for (; it != logData.end(); it++) {
         StringRef filename = it->first();
-        const std::vector<logdata_block*> &values = it->second;
+        const SmallVector<logdata_block*, 0> &values = it->second;
         if (!values.empty()) {
             outf << "SF:" << filename.str() << '\n';
             size_t n_covered = 0;
@@ -206,7 +206,7 @@ extern "C" JL_DLLEXPORT void jl_write_coverage_data(const char *output)
     }
 }
 
-extern "C" JL_DLLEXPORT void jl_write_malloc_log(void)
+extern "C" void jl_write_malloc_log(void)
 {
     std::string stm;
     raw_string_ostream(stm) << "." << uv_os_getpid() << ".mem";
diff --git a/src/crc32c.c b/src/crc32c.c
index 4ca8db06459a1..b38f9d6c0e765 100644
--- a/src/crc32c.c
+++ b/src/crc32c.c
@@ -1,6 +1,6 @@
 /* crc32c.c -- compute CRC-32C using software table or available hardware instructions
- * Copyright (C) 2013 Mark Adler
- * Version 1.1  1 Aug 2013  Mark Adler
+ * Copyright (C) 2013, 2021 Mark Adler
+ * Version 1.1  1 Aug 2013  Mark Adler, updates from Version 1.2 5 June 2021
  *
  * Code retrieved in August 2016 from August 2013 post by Mark Adler on
  *    http://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software
@@ -10,6 +10,7 @@
  *    - architecture and compiler detection
  *    - precompute crc32c tables and store in a generated .c file
  *    - ARMv8 support
+ * Updated to incorporate upstream 2021 patch by Mark Adler to register constraints.
  */
 
 /*
@@ -39,6 +40,8 @@
 /* Version history:
    1.0  10 Feb 2013  First version
    1.1   1 Aug 2013  Correct comments on why three crc instructions in parallel
+   1.2   5 Jun 2021  Correct register constraints on assembly instructions
+                     (+ other changes that were superfluous for us)
 */
 
 #include "julia.h"
@@ -53,14 +56,9 @@
 #define POLY 0x82f63b78
 
 /* Block sizes for three-way parallel crc computation.  LONG and SHORT must
-   both be powers of two.  The associated string constants must be set
-   accordingly, for use in constructing the assembler instructions. */
+   both be powers of two. */
 #define LONG 8192
-#define LONGx1 "8192"
-#define LONGx2 "16384"
 #define SHORT 256
-#define SHORTx1 "256"
-#define SHORTx2 "512"
 
 #ifndef GEN_CRC32C_TABLES
 #include "crc32c-tables.c"
@@ -97,27 +95,27 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
     /* compute the crc for up to seven leading bytes to bring the data pointer
        to an eight-byte boundary */
     while (len && ((uintptr_t)buf & 7) != 0) {
-        __asm__("crc32b\t" "(%1), %0"
-                : "=r"(crc0)
-                : "r"(buf), "0"(crc0));
+        __asm__("crc32b\t" "%1, %0"
+                : "+r"(crc0)
+                : "m"(*buf));
         buf++;
         len--;
     }
 
-    /* compute the crc on sets of LONG*3 bytes, executing three independent crc
-       instructions, each on LONG bytes -- this is optimized for the Nehalem,
-       Westmere, Sandy Bridge, and Ivy Bridge architectures, which have a
-       throughput of one crc per cycle, but a latency of three cycles */
+    /* compute the crc on sets of LONG*3 bytes,
+       making use of three ALUs in parallel on a single core. */
     while (len >= LONG * 3) {
         uintptr_t crc1 = 0;
         uintptr_t crc2 = 0;
         const char *end = buf + LONG;
         do {
-            __asm__(CRC32_PTR "\t" "(%3), %0\n\t"
-                    CRC32_PTR "\t" LONGx1 "(%3), %1\n\t"
-                    CRC32_PTR "\t" LONGx2 "(%3), %2"
-                    : "=r"(crc0), "=r"(crc1), "=r"(crc2)
-                    : "r"(buf), "0"(crc0), "1"(crc1), "2"(crc2));
+            __asm__(CRC32_PTR "\t%3, %0\n\t"
+                    CRC32_PTR "\t%4, %1\n\t"
+                    CRC32_PTR "\t%5, %2"
+                    : "+r"(crc0), "+r"(crc1), "+r"(crc2)
+                    : "m"(* (const uintptr_t *) &buf[0]),
+                      "m"(* (const uintptr_t *) &buf[LONG]),
+                      "m"(* (const uintptr_t *) &buf[LONG*2]));
             buf += sizeof(void*);
         } while (buf < end);
         crc0 = crc32c_shift(crc32c_long, crc0) ^ crc1;
@@ -133,11 +131,13 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
         uintptr_t crc2 = 0;
         const char *end = buf + SHORT;
         do {
-            __asm__(CRC32_PTR "\t" "(%3), %0\n\t"
-                    CRC32_PTR "\t" SHORTx1 "(%3), %1\n\t"
-                    CRC32_PTR "\t" SHORTx2 "(%3), %2"
-                    : "=r"(crc0), "=r"(crc1), "=r"(crc2)
-                    : "r"(buf), "0"(crc0), "1"(crc1), "2"(crc2));
+            __asm__(CRC32_PTR "\t%3, %0\n\t"
+                    CRC32_PTR "\t%4, %1\n\t"
+                    CRC32_PTR "\t%5, %2"
+                    : "+r"(crc0), "+r"(crc1), "+r"(crc2)
+                    : "m"(* (const uintptr_t *) &buf[0]),
+                      "m"(* (const uintptr_t *) &buf[SHORT]),
+                      "m"(* (const uintptr_t *) &buf[SHORT*2]));
             buf += sizeof(void*);
         } while (buf < end);
         crc0 = crc32c_shift(crc32c_short, crc0) ^ crc1;
@@ -150,18 +150,18 @@ static uint32_t crc32c_sse42(uint32_t crc, const char *buf, size_t len)
        block */
     const char *end = buf + (len - (len & 7));
     while (buf < end) {
-        __asm__(CRC32_PTR "\t" "(%1), %0"
-                : "=r"(crc0)
-                : "r"(buf), "0"(crc0));
+        __asm__(CRC32_PTR "\t" "%1, %0"
+                : "+r"(crc0)
+                : "m"(* (const uintptr_t *) buf));
         buf += sizeof(void*);
     }
     len &= 7;
 
     /* compute the crc for up to seven trailing bytes */
     while (len) {
-        __asm__("crc32b\t" "(%1), %0"
-                : "=r"(crc0)
-                : "r"(buf), "0"(crc0));
+        __asm__("crc32b\t" "%1, %0"
+                : "+r"(crc0)
+                : "m"(*buf));
         buf++;
         len--;
     }
diff --git a/src/datatype.c b/src/datatype.c
index 593a2ededd169..7a04bfd7f2046 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -49,12 +49,11 @@ JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *mo
     mt->name = jl_demangle_typename(name);
     mt->module = module;
     jl_atomic_store_relaxed(&mt->defs, jl_nothing);
-    jl_atomic_store_relaxed(&mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&mt->leafcache, (jl_genericmemory_t*)jl_an_empty_memory_any);
     jl_atomic_store_relaxed(&mt->cache, jl_nothing);
-    mt->max_args = 0;
-    mt->kwsorter = NULL;
+    jl_atomic_store_relaxed(&mt->max_args, 0);
     mt->backedges = NULL;
-    JL_MUTEX_INIT(&mt->writelock);
+    JL_MUTEX_INIT(&mt->writelock, "methodtable->writelock");
     mt->offs = 0;
     mt->frozen = 0;
     return mt;
@@ -69,11 +68,12 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu
     tn->name = name;
     tn->module = module;
     tn->wrapper = NULL;
-    jl_atomic_store_release(&tn->Typeofwrapper, NULL);
+    jl_atomic_store_relaxed(&tn->Typeofwrapper, NULL);
     jl_atomic_store_relaxed(&tn->cache, jl_emptysvec);
     jl_atomic_store_relaxed(&tn->linearcache, jl_emptysvec);
     tn->names = NULL;
-    tn->hash = bitmix(bitmix(module ? module->build_id : 0, name->hash), 0xa1ada1da);
+    tn->hash = bitmix(bitmix(module ? module->build_id.lo : 0, name->hash), 0xa1ada1da);
+    tn->_reserved = 0;
     tn->abstract = abstract;
     tn->mutabl = mutabl;
     tn->mayinlinealloc = 0;
@@ -96,13 +96,18 @@ jl_datatype_t *jl_new_uninitialized_datatype(void)
 {
     jl_task_t *ct = jl_current_task;
     jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type);
+    jl_set_typetagof(t, jl_datatype_tag, 0);
     t->hash = 0;
     t->hasfreetypevars = 0;
     t->isdispatchtuple = 0;
     t->isbitstype = 0;
+    t->isprimitivetype = 0;
     t->zeroinit = 0;
     t->has_concrete_subtype = 1;
-    t->cached_by_hash = 0;
+    t->maybe_subtype_of_cache = 1;
+    t->ismutationfree = 0;
+    t->isidentityfree = 0;
+    t->smalltag = 0;
     t->name = NULL;
     t->super = NULL;
     t->parameters = NULL;
@@ -112,10 +117,69 @@ jl_datatype_t *jl_new_uninitialized_datatype(void)
     return t;
 }
 
-static jl_datatype_layout_t *jl_get_layout(uint32_t nfields,
+#include "support/htable.inc"
+
+static uint32_t _hash_djb2(uint32_t hash, const char *mem, size_t s) JL_NOTSAFEPOINT
+{
+    for (size_t i = 0; i < s; i++)
+        hash = ((hash << 5) + hash) + mem[i];
+    return hash;
+}
+
+static uint32_t _hash_layout_djb2(uintptr_t _layout, void *unused) JL_NOTSAFEPOINT
+{
+    (void)unused;
+    jl_datatype_layout_t* layout = (jl_datatype_layout_t *)_layout;
+    assert(layout);
+    size_t own_size = sizeof(jl_datatype_layout_t);
+    const char *fields = jl_dt_layout_fields(layout);
+    assert(fields);
+    size_t fields_size = layout->nfields * jl_fielddesc_size(layout->flags.fielddesc_type);
+    const char *pointers = jl_dt_layout_ptrs(layout);
+    assert(pointers);
+    size_t pointers_size = layout->first_ptr < 0 ? 0 : (layout->npointers << layout->flags.fielddesc_type);
+
+    uint_t hash = 5381;
+    hash = _hash_djb2(hash, (char *)layout, own_size);
+    hash = _hash_djb2(hash, fields, fields_size);
+    hash = _hash_djb2(hash, pointers, pointers_size);
+    return hash;
+}
+
+static int layout_eq(void *_l1, void *_l2, void *unused) JL_NOTSAFEPOINT
+{
+    (void)unused;
+    jl_datatype_layout_t *l1 = (jl_datatype_layout_t *)_l1;
+    jl_datatype_layout_t *l2 = (jl_datatype_layout_t *)_l2;
+    if (memcmp(l1, l2, sizeof(jl_datatype_layout_t)))
+        return 0;
+    const char *f1 = jl_dt_layout_fields(l1);
+    const char *f2 = jl_dt_layout_fields(l2);
+    size_t fields_size = l1->nfields * jl_fielddesc_size(l1->flags.fielddesc_type);
+    if (memcmp(f1, f2, fields_size))
+        return 0;
+    const char *p1 = jl_dt_layout_ptrs(l1);
+    const char *p2 = jl_dt_layout_ptrs(l2);
+    size_t pointers_size = l1->first_ptr < 0 ? 0 : (l1->npointers << l1->flags.fielddesc_type);
+    if (memcmp(p1, p2, pointers_size))
+        return 0;
+    return 1;
+}
+
+//HTPROT(layoutcache)
+static void **layoutcache_lookup_bp_r(htable_t *h, void *key, void *ctx) JL_NOTSAFEPOINT;
+static void **layoutcache_peek_bp_r(htable_t *h, void *key, void *ctx) JL_NOTSAFEPOINT;
+HTPROT_R(layoutcache)
+HTIMPL_R(layoutcache, _hash_layout_djb2, layout_eq)
+static htable_t layoutcache;
+static int layoutcache_initialized = 0;
+
+static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
+                                           uint32_t nfields,
                                            uint32_t npointers,
                                            uint32_t alignment,
                                            int haspadding,
+                                           int arrayelem,
                                            jl_fielddesc32_t desc[],
                                            uint32_t pointers[]) JL_NOTSAFEPOINT
 {
@@ -123,46 +187,55 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t nfields,
 
     // compute the smallest fielddesc type that can hold the layout description
     int fielddesc_type = 0;
+    uint32_t max_size = 0;
+    uint32_t max_offset = 0;
     if (nfields > 0) {
-        uint32_t max_size = 0;
-        uint32_t max_offset = desc[nfields - 1].offset;
-        if (npointers > 0 && pointers[npointers - 1] > max_offset)
-            max_offset = pointers[npointers - 1];
+        max_offset = desc[nfields - 1].offset;
         for (size_t i = 0; i < nfields; i++) {
             if (desc[i].size > max_size)
                 max_size = desc[i].size;
         }
-        jl_fielddesc8_t maxdesc8 = { 0, max_size, max_offset };
-        jl_fielddesc16_t maxdesc16 = { 0, max_size, max_offset };
-        jl_fielddesc32_t maxdesc32 = { 0, max_size, max_offset };
-        if (maxdesc8.size != max_size || maxdesc8.offset != max_offset) {
-            fielddesc_type = 1;
-            if (maxdesc16.size != max_size || maxdesc16.offset != max_offset) {
-                fielddesc_type = 2;
-                if (maxdesc32.size != max_size || maxdesc32.offset != max_offset) {
-                    assert(0); // should have been verified by caller
-                }
+    }
+    if (npointers > 0 && pointers[npointers - 1] > max_offset)
+        max_offset = pointers[npointers - 1];
+    jl_fielddesc8_t maxdesc8 = { 0, max_size, max_offset };
+    jl_fielddesc16_t maxdesc16 = { 0, max_size, max_offset };
+    jl_fielddesc32_t maxdesc32 = { 0, max_size, max_offset };
+    if (maxdesc8.size != max_size || maxdesc8.offset != max_offset) {
+        fielddesc_type = 1;
+        if (maxdesc16.size != max_size || maxdesc16.offset != max_offset) {
+            fielddesc_type = 2;
+            if (maxdesc32.size != max_size || maxdesc32.offset != max_offset) {
+                assert(0); // should have been verified by caller
             }
         }
     }
-
-    // allocate a new descriptor
-    // TODO: lots of these are the same--take advantage of the fact these are immutable to combine them
-    uint32_t fielddesc_size = jl_fielddesc_size(fielddesc_type);
-    jl_datatype_layout_t *flddesc = (jl_datatype_layout_t*)jl_gc_perm_alloc(
-                sizeof(jl_datatype_layout_t) + nfields * fielddesc_size + (npointers << fielddesc_type),
-                0, 4, 0);
+    int32_t first_ptr = (npointers > 0 ? (int32_t)pointers[0] : -1);
+
+    // allocate a new descriptor, on the stack if possible.
+    size_t fields_size = nfields * jl_fielddesc_size(fielddesc_type);
+    size_t pointers_size = first_ptr < 0 ? 0 : (npointers << fielddesc_type);
+    size_t flddesc_sz = sizeof(jl_datatype_layout_t) + fields_size + pointers_size;
+    int should_malloc = flddesc_sz >= jl_page_size;
+    jl_datatype_layout_t *mallocmem = (jl_datatype_layout_t *)(should_malloc ? malloc(flddesc_sz) : NULL);
+    jl_datatype_layout_t *allocamem = (jl_datatype_layout_t *)(should_malloc ? NULL : alloca(flddesc_sz));
+    jl_datatype_layout_t *flddesc = should_malloc ? mallocmem : allocamem;
+    assert(flddesc);
+    flddesc->size = sz;
     flddesc->nfields = nfields;
     flddesc->alignment = alignment;
-    flddesc->haspadding = haspadding;
-    flddesc->fielddesc_type = fielddesc_type;
+    flddesc->flags.haspadding = haspadding;
+    flddesc->flags.fielddesc_type = fielddesc_type;
+    flddesc->flags.arrayelem_isboxed = arrayelem == 1;
+    flddesc->flags.arrayelem_isunion = arrayelem == 2;
+    flddesc->flags.padding = 0;
     flddesc->npointers = npointers;
-    flddesc->first_ptr = (npointers > 0 ? pointers[0] : -1);
+    flddesc->first_ptr = first_ptr;
 
     // fill out the fields of the new descriptor
-    jl_fielddesc8_t* desc8 = (jl_fielddesc8_t*)jl_dt_layout_fields(flddesc);
-    jl_fielddesc16_t* desc16 = (jl_fielddesc16_t*)jl_dt_layout_fields(flddesc);
-    jl_fielddesc32_t* desc32 = (jl_fielddesc32_t*)jl_dt_layout_fields(flddesc);
+    jl_fielddesc8_t *desc8 = (jl_fielddesc8_t *)jl_dt_layout_fields(flddesc);
+    jl_fielddesc16_t *desc16 = (jl_fielddesc16_t *)jl_dt_layout_fields(flddesc);
+    jl_fielddesc32_t *desc32 = (jl_fielddesc32_t *)jl_dt_layout_fields(flddesc);
     for (size_t i = 0; i < nfields; i++) {
         if (fielddesc_type == 0) {
             desc8[i].offset = desc[i].offset;
@@ -180,21 +253,48 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t nfields,
             desc32[i].isptr = desc[i].isptr;
         }
     }
-    uint8_t* ptrs8 = (uint8_t*)jl_dt_layout_ptrs(flddesc);
-    uint16_t* ptrs16 = (uint16_t*)jl_dt_layout_ptrs(flddesc);
-    uint32_t* ptrs32 = (uint32_t*)jl_dt_layout_ptrs(flddesc);
-    for (size_t i = 0; i < npointers; i++) {
-        if (fielddesc_type == 0) {
-            ptrs8[i] = pointers[i];
+    if (first_ptr >= 0) {
+        uint8_t *ptrs8 = (uint8_t *)jl_dt_layout_ptrs(flddesc);
+        uint16_t *ptrs16 = (uint16_t *)jl_dt_layout_ptrs(flddesc);
+        uint32_t *ptrs32 = (uint32_t *)jl_dt_layout_ptrs(flddesc);
+        for (size_t i = 0; i < npointers; i++) {
+            if (fielddesc_type == 0) {
+                ptrs8[i] = pointers[i];
+            }
+            else if (fielddesc_type == 1) {
+                ptrs16[i] = pointers[i];
+            }
+            else {
+                ptrs32[i] = pointers[i];
+            }
         }
-        else if (fielddesc_type == 1) {
-            ptrs16[i] = pointers[i];
+    }
+
+    if (__unlikely(!layoutcache_initialized)) {
+        htable_new(&layoutcache, 4096);
+        layoutcache_initialized = 1;
+    }
+
+    // Check the cache to see if this object already exists.
+    // Add to cache if not present, free temp buffer, return.
+    jl_datatype_layout_t *ret =
+            (jl_datatype_layout_t *)layoutcache_get_r(&layoutcache, flddesc, NULL);
+    if ((void*)ret == HT_NOTFOUND) {
+        if (!should_malloc) {
+            char *perm_mem = (char *)jl_gc_perm_alloc(flddesc_sz, 0, 4, 0);
+            assert(perm_mem);
+            ret = (jl_datatype_layout_t *)perm_mem;
+            memcpy(perm_mem, flddesc, flddesc_sz);
         }
         else {
-            ptrs32[i] = pointers[i];
+            ret = mallocmem;
         }
+        layoutcache_put_r(&layoutcache, ret, ret, NULL);
+        return ret;
     }
-    return flddesc;
+
+    if (should_malloc) free(flddesc);
+    return ret;
 }
 
 // Determine if homogeneous tuple with fields of type t will have
@@ -225,23 +325,23 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t)
 
 STATIC_INLINE int jl_is_datatype_make_singleton(jl_datatype_t *d) JL_NOTSAFEPOINT
 {
-    return (!d->name->abstract && jl_datatype_size(d) == 0 && d != jl_symbol_type && d->name != jl_array_typename &&
-            d->isconcretetype && !d->name->mutabl);
+    return d->isconcretetype && jl_datatype_size(d) == 0 && d->layout->npointers == 0 && !d->name->mutabl; // implies jl_is_layout_opaque
 }
 
 STATIC_INLINE void jl_maybe_allocate_singleton_instance(jl_datatype_t *st) JL_NOTSAFEPOINT
 {
+    // It's possible for st to already have an ->instance if it was redefined
+    if (st->instance)
+        return;
     if (jl_is_datatype_make_singleton(st)) {
-        // It's possible for st to already have an ->instance if it was redefined
-        if (!st->instance)
-            st->instance = jl_gc_permobj(0, st);
+        st->instance = jl_gc_permobj(0, st);
     }
 }
 
 // return whether all concrete subtypes of this type have the same layout
 int jl_struct_try_layout(jl_datatype_t *dt)
 {
-    if (dt->layout)
+    if (dt->layout || jl_is_genericmemory_type(dt))
         return 1;
     else if (!jl_has_fixed_layout(dt))
         return 0;
@@ -259,7 +359,7 @@ int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree)
                 return 0;
             if (ty->name->n_uninitialized != 0)
                 return 0;
-            if (ty->layout->fielddesc_type > 1) // GC only implements support for 8 and 16 (not array32)
+            if (ty->layout->flags.fielddesc_type > 1) // GC only implements support for 8 and 16 (not array32)
                 return 0;
         }
         return 1;
@@ -357,6 +457,113 @@ static void throw_ovf(int should_malloc, void *desc, jl_datatype_t* st, int offs
     jl_errorf("type %s has field offset %d that exceeds the page size", jl_symbol_name(st->name->name), offset);
 }
 
+static int is_type_mutationfree(jl_value_t *t)
+{
+    t = jl_unwrap_unionall(t);
+    if (jl_is_uniontype(t)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)t;
+        return is_type_mutationfree(u->a) && is_type_mutationfree(u->b);
+    }
+    if (jl_is_datatype(t)) {
+        return ((jl_datatype_t*)t)->ismutationfree;
+    }
+    // Free tvars, etc.
+    return 0;
+}
+
+static int is_type_identityfree(jl_value_t *t)
+{
+    t = jl_unwrap_unionall(t);
+    if (jl_is_uniontype(t)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)t;
+        return is_type_identityfree(u->a) && is_type_identityfree(u->b);
+    }
+    if (jl_is_datatype(t)) {
+        return ((jl_datatype_t*)t)->isidentityfree;
+    }
+    // Free tvars, etc.
+    return 0;
+}
+
+// make a copy of the layout of st, but with nfields=0
+void jl_get_genericmemory_layout(jl_datatype_t *st)
+{
+    jl_value_t *isatomic = jl_tparam0(st);
+    jl_value_t *eltype = jl_tparam1(st);
+    jl_value_t *addrspace = jl_tparam2(st);
+    if (!jl_is_typevar(eltype) && !jl_is_type(eltype)) {
+        // this is expected to have a layout, but since it is not constructable, we don't care too much what it is
+        static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), {0}};
+        st->layout = &opaque_ptr_layout;
+        st->has_concrete_subtype = 0;
+        return;
+    }
+
+    size_t elsz = 0, al = 0;
+    int isunboxed = jl_islayout_inline(eltype, &elsz, &al);
+    int isunion = isunboxed && jl_is_uniontype(eltype);
+    int haspadding = 1; // we may want to eventually actually compute this
+    int nfields = 0; // aka jl_is_layout_opaque
+    int npointers = 1;
+    int zi;
+    uint32_t first_ptr = -1;
+    uint32_t *pointers = &first_ptr;
+
+    if (isunboxed) {
+        elsz = LLT_ALIGN(elsz, al);
+        if (isunion) {
+            zi = 1;
+        }
+        else {
+            assert(jl_is_datatype(eltype));
+            zi = ((jl_datatype_t*)eltype)->zeroinit;
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)eltype)->layout;
+            if (layout->first_ptr >= 0) {
+                first_ptr = layout->first_ptr;
+                npointers = layout->npointers;
+                if (layout->flags.fielddesc_type == 2) {
+                    pointers = (uint32_t*)jl_dt_layout_ptrs(layout);
+                }
+                else {
+                    pointers = (uint32_t*)alloca(npointers * sizeof(uint32_t));
+                    for (int j = 0; j < npointers; j++) {
+                        pointers[j] = jl_ptr_offset((jl_datatype_t*)eltype, j);
+                    }
+                }
+            }
+        }
+    }
+    else {
+        elsz = sizeof(void*);
+        al = elsz;
+        zi = 1;
+    }
+
+    int arrayelem;
+    if (!isunboxed)
+        arrayelem = 1;
+    else if (isunion)
+        arrayelem = 2;
+    else
+        arrayelem = 0;
+    assert(!st->layout);
+    st->layout = jl_get_layout(elsz, nfields, npointers, al, haspadding, arrayelem, NULL, pointers);
+    st->zeroinit = zi;
+    //st->has_concrete_subtype = 1;
+    //st->isbitstype = 0;
+    //st->ismutationfree = 0;
+    //st->isidentityfree = 0;
+
+    if (isatomic == (jl_value_t*)jl_not_atomic_sym && jl_is_addrspacecore(addrspace) && jl_unbox_uint8(addrspace) == 0) {
+        jl_genericmemory_t *zeroinst = (jl_genericmemory_t*)jl_gc_permobj(LLT_ALIGN(sizeof(jl_genericmemory_t), JL_SMALL_BYTE_ALIGNMENT) + (elsz ? elsz : isunion), st);
+        zeroinst->length = 0;
+        zeroinst->ptr = (char*)zeroinst + JL_SMALL_BYTE_ALIGNMENT;
+        memset(zeroinst->ptr, 0, elsz ? elsz : isunion);
+        assert(!st->instance);
+        st->instance = (jl_value_t*)zeroinst;
+    }
+}
+
 void jl_compute_field_offsets(jl_datatype_t *st)
 {
     const uint64_t max_offset = (((uint64_t)1) << 32) - 1;
@@ -368,19 +575,23 @@ void jl_compute_field_offsets(jl_datatype_t *st)
     if (st == w && st->layout) {
         // this check allows us to force re-computation of the layout for some types during init
         st->layout = NULL;
-        st->size = 0;
         st->zeroinit = 0;
         st->has_concrete_subtype = 1;
     }
+    if (st->name == jl_genericmemory_typename) {
+        jl_get_genericmemory_layout(st);
+        return;
+    }
     int isbitstype = st->isconcretetype && st->name->mayinlinealloc;
+    int ismutationfree = !w->layout || !jl_is_layout_opaque(w->layout);
+    int isidentityfree = !st->name->mutabl;
     // If layout doesn't depend on type parameters, it's stored in st->name->wrapper
     // and reused by all subtypes.
     if (w->layout) {
         st->layout = w->layout;
-        st->size = w->size;
         st->zeroinit = w->zeroinit;
         st->has_concrete_subtype = w->has_concrete_subtype;
-        if (!jl_is_layout_opaque(st->layout)) { // e.g. jl_array_typename
+        if (!jl_is_layout_opaque(st->layout)) { // e.g. jl_simplevector_type
             st->isbitstype = isbitstype && st->layout->npointers == 0;
             jl_maybe_allocate_singleton_instance(st);
         }
@@ -393,18 +604,18 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         // if we have no fields, we can trivially skip the rest
         if (st == jl_symbol_type || st == jl_string_type) {
             // opaque layout - heap-allocated blob
-            static const jl_datatype_layout_t opaque_byte_layout = {0, 1, -1, 1, 0, 0};
+            static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, {0}};
             st->layout = &opaque_byte_layout;
             return;
         }
-        else if (st == jl_simplevector_type || st == jl_module_type || st->name == jl_array_typename) {
-            static const jl_datatype_layout_t opaque_ptr_layout = {0, 1, -1, sizeof(void*), 0, 0};
+        else if (st == jl_simplevector_type || st == jl_module_type) {
+            static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), {0}};
             st->layout = &opaque_ptr_layout;
             return;
         }
         else {
             // reuse the same layout for all singletons
-            static const jl_datatype_layout_t singleton_layout = {0, 0, -1, 1, 0, 0};
+            static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, {0}};
             st->layout = &singleton_layout;
         }
     }
@@ -424,9 +635,11 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         }
     }
 
-    for (i = 0; isbitstype && i < nfields; i++) {
+    for (i = 0; (isbitstype || isidentityfree || ismutationfree) && i < nfields; i++) {
         jl_value_t *fld = jl_field_type(st, i);
-        isbitstype = jl_isbits(fld);
+        isbitstype &= jl_isbits(fld);
+        ismutationfree &= (!st->name->mutabl || jl_field_isconst(st, i)) && is_type_mutationfree(fld);
+        isidentityfree &= is_type_identityfree(fld);
     }
 
     // if we didn't reuse the layout above, compute it now
@@ -463,7 +676,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                 }
                 else {
                     uint32_t fld_npointers = ((jl_datatype_t*)fld)->layout->npointers;
-                    if (((jl_datatype_t*)fld)->layout->haspadding)
+                    if (((jl_datatype_t*)fld)->layout->flags.haspadding)
                         haspadding = 1;
                     if (i >= nfields - st->name->n_uninitialized && fld_npointers &&
                         fld_npointers * sizeof(void*) != fsz) {
@@ -525,9 +738,10 @@ void jl_compute_field_offsets(jl_datatype_t *st)
             if (al > alignm)
                 alignm = al;
         }
-        st->size = LLT_ALIGN(sz, alignm);
-        if (st->size > sz)
+        if (LLT_ALIGN(sz, alignm) > sz) {
             haspadding = 1;
+            sz = LLT_ALIGN(sz, alignm);
+        }
         if (should_malloc && npointers)
             pointers = (uint32_t*)malloc_s(npointers * sizeof(uint32_t));
         else
@@ -546,7 +760,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
             }
         }
         assert(ptr_i == npointers);
-        st->layout = jl_get_layout(nfields, npointers, alignm, haspadding, desc, pointers);
+        st->layout = jl_get_layout(sz, nfields, npointers, alignm, haspadding, 0, desc, pointers);
         if (should_malloc) {
             free(desc);
             if (npointers)
@@ -557,6 +771,8 @@ void jl_compute_field_offsets(jl_datatype_t *st)
     // now finish deciding if this instantiation qualifies for special properties
     assert(!isbitstype || st->layout->npointers == 0); // the definition of isbits
     st->isbitstype = isbitstype;
+    st->ismutationfree = ismutationfree;
+    st->isidentityfree = isidentityfree;
     jl_maybe_allocate_singleton_instance(st);
     return;
 }
@@ -584,7 +800,7 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
     jl_typename_t *tn = NULL;
     JL_GC_PUSH2(&t, &tn);
 
-    assert(parameters);
+    assert(parameters && fnames);
 
     // init enough before possibly calling jl_new_typename_in
     t = jl_new_uninitialized_datatype();
@@ -594,7 +810,6 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
     jl_gc_wb(t, t->parameters);
     t->types = ftypes;
     if (ftypes != NULL) jl_gc_wb(t, t->types);
-    t->size = 0;
 
     t->name = NULL;
     if (jl_is_typename(name)) {
@@ -700,9 +915,14 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *
     uint32_t alignm = next_power_of_two(nbytes);
     if (alignm > MAX_ALIGN)
         alignm = MAX_ALIGN;
+    // memoize isprimitivetype, since it is much easier than checking
+    // (dta->name->names == svec() && dta->layout && dta->layout->size != 0)
+    // and we easily have a free bit for it in the DataType flags
+    bt->isprimitivetype = 1;
+    bt->ismutationfree = 1;
+    bt->isidentityfree = 1;
     bt->isbitstype = (parameters == jl_emptysvec);
-    bt->size = nbytes;
-    bt->layout = jl_get_layout(0, 0, alignm, 0, NULL, NULL);
+    bt->layout = jl_get_layout(nbytes, 0, 0, alignm, 0, 0, NULL, NULL);
     bt->instance = NULL;
     return bt;
 }
@@ -717,15 +937,18 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
 {
     jl_datatype_t *bt = jl_new_datatype(name, module, super,
       jl_emptysvec, jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
-    bt->size = large ? GC_MAX_SZCLASS+1 : 0;
     jl_datatype_layout_t *layout = (jl_datatype_layout_t *)
       jl_gc_perm_alloc(sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t),
         0, 4, 0);
+    layout->size = large ? GC_MAX_SZCLASS+1 : 0;
     layout->nfields = 0;
     layout->alignment = sizeof(void *);
-    layout->haspadding = 1;
     layout->npointers = haspointers;
-    layout->fielddesc_type = 3;
+    layout->flags.haspadding = 1;
+    layout->flags.fielddesc_type = 3;
+    layout->flags.padding = 0;
+    layout->flags.arrayelem_isboxed = 0;
+    layout->flags.arrayelem_isunion = 0;
     jl_fielddescdyn_t * desc =
       (jl_fielddescdyn_t *) ((char *)layout + sizeof(*layout));
     desc->markfunc = markfunc;
@@ -735,11 +958,26 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
     return bt;
 }
 
-JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt)
+JL_DLLEXPORT int jl_reinit_foreign_type(jl_datatype_t *dt,
+                                        jl_markfunc_t markfunc,
+                                        jl_sweepfunc_t sweepfunc)
 {
-    return jl_is_datatype(dt) && dt->layout && dt->layout->fielddesc_type == 3;
+    if (!jl_is_foreign_type(dt))
+        return 0;
+    const jl_datatype_layout_t *layout = dt->layout;
+    jl_fielddescdyn_t * desc =
+      (jl_fielddescdyn_t *) ((char *)layout + sizeof(*layout));
+    assert(!desc->markfunc);
+    assert(!desc->sweepfunc);
+    desc->markfunc = markfunc;
+    desc->sweepfunc = sweepfunc;
+    return 1;
 }
 
+JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt)
+{
+    return jl_is_datatype(dt) && dt->layout && dt->layout->flags.fielddesc_type == 3;
+}
 
 // bits constructors ----------------------------------------------------------
 
@@ -819,6 +1057,7 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data)
     if (bt == jl_uint16_type)  return jl_box_uint16(*(uint16_t*)data);
     if (bt == jl_char_type)    return jl_box_char(*(uint32_t*)data);
 
+    assert(!bt->smalltag);
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
     memcpy(jl_assume_aligned(v, sizeof(void*)), data, nb);
@@ -844,6 +1083,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *data)
     if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_load((_Atomic(uint16_t)*)data));
     if (bt == jl_char_type)    return jl_box_char(jl_atomic_load((_Atomic(uint32_t)*)data));
 
+    assert(!bt->smalltag);
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
     // data is aligned to the power of two,
@@ -911,6 +1151,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl
     if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_exchange((_Atomic(uint16_t)*)dst, *(uint16_t*)src));
     if (bt == jl_char_type)    return jl_box_char(jl_atomic_exchange((_Atomic(uint32_t)*)dst, *(uint32_t*)src));
 
+    assert(!bt->smalltag);
     jl_task_t *ct = jl_current_task;
     jl_value_t *v = jl_gc_alloc(ct->ptls, jl_datatype_size(bt), bt);
     if (nb == 1)
@@ -979,7 +1220,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
     // n.b.: this does not spuriously fail if there are padding bits
     jl_task_t *ct = jl_current_task;
     int isptr = jl_field_isptr(rettyp, 0);
-    jl_value_t *y = jl_gc_alloc(ct->ptls, isptr ? nb : rettyp->size, isptr ? dt : rettyp);
+    jl_value_t *y = jl_gc_alloc(ct->ptls, isptr ? nb : jl_datatype_size(rettyp), isptr ? dt : rettyp);
     int success;
     jl_datatype_t *et = (jl_datatype_t*)jl_typeof(expected);
     if (nb == 0) {
@@ -987,7 +1228,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
     }
     else if (nb == 1) {
         uint8_t *y8 = (uint8_t*)y;
-        assert(!dt->layout->haspadding);
+        assert(!dt->layout->flags.haspadding);
         if (dt == et) {
             *y8 = *(uint8_t*)expected;
             uint8_t z8 = *(uint8_t*)src;
@@ -1000,7 +1241,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
     }
     else if (nb == 2) {
         uint16_t *y16 = (uint16_t*)y;
-        assert(!dt->layout->haspadding);
+        assert(!dt->layout->flags.haspadding);
         if (dt == et) {
             *y16 = *(uint16_t*)expected;
             uint16_t z16 = *(uint16_t*)src;
@@ -1018,7 +1259,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
             uint32_t z32 = zext_read32(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, y32, z32);
-                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1035,7 +1276,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
             uint64_t z64 = zext_read64(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, y64, z64);
-                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1053,7 +1294,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
             jl_uint128_t z128 = zext_read128(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, y128, z128);
-                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1068,7 +1309,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
     }
     if (isptr) {
         JL_GC_PUSH1(&y);
-        jl_value_t *z = jl_gc_alloc(ct->ptls, rettyp->size, rettyp);
+        jl_value_t *z = jl_gc_alloc(ct->ptls, jl_datatype_size(rettyp), rettyp);
         *(jl_value_t**)z = y;
         JL_GC_POP();
         y = z;
@@ -1078,34 +1319,18 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t
     return y;
 }
 
-
-
-// used by boot.jl
-JL_DLLEXPORT jl_value_t *jl_typemax_uint(jl_value_t *bt)
-{
-    uint64_t data = 0xffffffffffffffffULL;
-    jl_task_t *ct = jl_current_task;
-    jl_value_t *v = jl_gc_alloc(ct->ptls, sizeof(size_t), bt);
-    memcpy(v, &data, sizeof(size_t));
-    return v;
-}
-
-#define PERMBOXN_FUNC(nb,nw)                                            \
-    jl_value_t *jl_permbox##nb(jl_datatype_t *t, int##nb##_t x)         \
-    {   /* NOTE: t must be a concrete isbits datatype */                \
-        assert(jl_datatype_size(t) == sizeof(x));                       \
-        jl_value_t *v = jl_gc_permobj(nw * sizeof(void*), t);           \
-        *(int##nb##_t*)jl_data_ptr(v) = x;                              \
+#define PERMBOXN_FUNC(nb)                                               \
+    jl_value_t *jl_permbox##nb(jl_datatype_t *t, uintptr_t tag, uint##nb##_t x) \
+    {   /* n.b. t must be a concrete isbits datatype of the right size */ \
+        jl_value_t *v = jl_gc_permobj(LLT_ALIGN(nb, sizeof(void*)), t); \
+        if (tag) jl_set_typetagof(v, tag, GC_OLD_MARKED);               \
+        *(uint##nb##_t*)jl_data_ptr(v) = x;                             \
         return v;                                                       \
     }
-PERMBOXN_FUNC(8,  1)
-PERMBOXN_FUNC(16, 1)
-PERMBOXN_FUNC(32, 1)
-#ifdef _P64
-PERMBOXN_FUNC(64, 1)
-#else
-PERMBOXN_FUNC(64, 2)
-#endif
+PERMBOXN_FUNC(8)
+PERMBOXN_FUNC(16)
+PERMBOXN_FUNC(32)
+PERMBOXN_FUNC(64)
 
 #define UNBOX_FUNC(j_type,c_type)                                       \
     JL_DLLEXPORT c_type jl_unbox_##j_type(jl_value_t *v)                \
@@ -1128,27 +1353,27 @@ UNBOX_FUNC(float64, double)
 UNBOX_FUNC(voidpointer, void*)
 UNBOX_FUNC(uint8pointer, uint8_t*)
 
-#define BOX_FUNC(typ,c_type,pfx,nw)                                     \
+#define BOX_FUNC(typ,c_type,pfx)                                        \
     JL_DLLEXPORT jl_value_t *pfx##_##typ(c_type x)                      \
     {                                                                   \
         jl_task_t *ct = jl_current_task;                                \
-        jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*),       \
+        jl_value_t *v = jl_gc_alloc(ct->ptls, LLT_ALIGN(sizeof(x), sizeof(void*)), \
                                     jl_##typ##_type);                   \
         *(c_type*)jl_data_ptr(v) = x;                                   \
         return v;                                                       \
     }
-BOX_FUNC(float32, float,  jl_box, 1)
-BOX_FUNC(voidpointer, void*,  jl_box, 1)
-BOX_FUNC(uint8pointer, uint8_t*,  jl_box, 1)
-#ifdef _P64
-BOX_FUNC(float64, double, jl_box, 1)
-#else
-BOX_FUNC(float64, double, jl_box, 2)
-#endif
+BOX_FUNC(float32, float,  jl_box)
+BOX_FUNC(float64, double, jl_box)
+BOX_FUNC(voidpointer, void*,  jl_box)
+BOX_FUNC(uint8pointer, uint8_t*,  jl_box)
 
 #define NBOX_C 1024
 
-#define SIBOX_FUNC(typ,c_type,nw)\
+// some shims to support UIBOX_FUNC definition
+#define jl_ssavalue_tag (((uintptr_t)jl_ssavalue_type) >> 4)
+#define jl_slotnumber_tag (((uintptr_t)jl_slotnumber_type) >> 4)
+
+#define SIBOX_FUNC(typ,c_type)                                          \
     static jl_value_t *boxed_##typ##_cache[NBOX_C];                     \
     JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x)                     \
     {                                                                   \
@@ -1156,36 +1381,33 @@ BOX_FUNC(float64, double, jl_box, 2)
         c_type idx = x+NBOX_C/2;                                        \
         if ((u##c_type)idx < (u##c_type)NBOX_C)                         \
             return boxed_##typ##_cache[idx];                            \
-        jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*),       \
+        jl_value_t *v = jl_gc_alloc(ct->ptls, LLT_ALIGN(sizeof(x), sizeof(void*)), \
                                     jl_##typ##_type);                   \
+        jl_set_typetagof(v, jl_##typ##_tag, 0);                         \
         *(c_type*)jl_data_ptr(v) = x;                                   \
         return v;                                                       \
     }
-#define UIBOX_FUNC(typ,c_type,nw)                                       \
+#define UIBOX_FUNC(typ,c_type)                                          \
     static jl_value_t *boxed_##typ##_cache[NBOX_C];                     \
     JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x)                     \
     {                                                                   \
         jl_task_t *ct = jl_current_task;                                \
         if (x < NBOX_C)                                                 \
             return boxed_##typ##_cache[x];                              \
-        jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*),       \
+        jl_value_t *v = jl_gc_alloc(ct->ptls, LLT_ALIGN(sizeof(x), sizeof(void*)), \
                                     jl_##typ##_type);                   \
+        jl_set_typetagof(v, jl_##typ##_tag, 0);                         \
         *(c_type*)jl_data_ptr(v) = x;                                   \
         return v;                                                       \
     }
-SIBOX_FUNC(int16,  int16_t, 1)
-SIBOX_FUNC(int32,  int32_t, 1)
-UIBOX_FUNC(uint16, uint16_t, 1)
-UIBOX_FUNC(uint32, uint32_t, 1)
-UIBOX_FUNC(ssavalue, size_t, 1)
-UIBOX_FUNC(slotnumber, size_t, 1)
-#ifdef _P64
-SIBOX_FUNC(int64,  int64_t, 1)
-UIBOX_FUNC(uint64, uint64_t, 1)
-#else
-SIBOX_FUNC(int64,  int64_t, 2)
-UIBOX_FUNC(uint64, uint64_t, 2)
-#endif
+SIBOX_FUNC(int16,  int16_t)
+SIBOX_FUNC(int32,  int32_t)
+UIBOX_FUNC(uint16, uint16_t)
+UIBOX_FUNC(uint32, uint32_t)
+UIBOX_FUNC(ssavalue, size_t)
+UIBOX_FUNC(slotnumber, size_t)
+SIBOX_FUNC(int64,  int64_t)
+UIBOX_FUNC(uint64, uint64_t)
 
 static jl_value_t *boxed_char_cache[128];
 JL_DLLEXPORT jl_value_t *jl_box_char(uint32_t x)
@@ -1195,6 +1417,7 @@ JL_DLLEXPORT jl_value_t *jl_box_char(uint32_t x)
     if (u < 128)
         return boxed_char_cache[(uint8_t)u];
     jl_value_t *v = jl_gc_alloc(ct->ptls, sizeof(void*), jl_char_type);
+    jl_set_typetagof(v, jl_char_tag, 0);
     *(uint32_t*)jl_data_ptr(v) = x;
     return v;
 }
@@ -1214,35 +1437,35 @@ void jl_init_int32_int64_cache(void)
 {
     int64_t i;
     for(i=0; i < NBOX_C; i++) {
-        boxed_int32_cache[i]  = jl_permbox32(jl_int32_type, i-NBOX_C/2);
-        boxed_int64_cache[i]  = jl_permbox64(jl_int64_type, i-NBOX_C/2);
+        boxed_int32_cache[i]  = jl_permbox32(jl_int32_type, jl_int32_tag, i-NBOX_C/2);
+        boxed_int64_cache[i]  = jl_permbox64(jl_int64_type, jl_int64_tag, i-NBOX_C/2);
+        boxed_uint16_cache[i] = jl_permbox16(jl_uint16_type, jl_uint16_tag, i);
+        boxed_uint64_cache[i] = jl_permbox64(jl_uint64_type, jl_uint64_tag, i);
+        boxed_uint32_cache[i] = jl_permbox32(jl_uint32_type, jl_uint32_tag, i);
 #ifdef _P64
-        boxed_ssavalue_cache[i] = jl_permbox64(jl_ssavalue_type, i);
-        boxed_slotnumber_cache[i] = jl_permbox64(jl_slotnumber_type, i);
+        boxed_ssavalue_cache[i] = jl_permbox64(jl_ssavalue_type, 0, i);
+        boxed_slotnumber_cache[i] = jl_permbox64(jl_slotnumber_type, 0, i);
 #else
-        boxed_ssavalue_cache[i] = jl_permbox32(jl_ssavalue_type, i);
-        boxed_slotnumber_cache[i] = jl_permbox32(jl_slotnumber_type, i);
+        boxed_ssavalue_cache[i] = jl_permbox32(jl_ssavalue_type, 0, i);
+        boxed_slotnumber_cache[i] = jl_permbox32(jl_slotnumber_type, 0, i);
 #endif
     }
     for(i=0; i < 256; i++) {
-        jl_boxed_uint8_cache[i] = jl_permbox8(jl_uint8_type, i);
+        jl_boxed_uint8_cache[i] = jl_permbox8(jl_uint8_type, jl_uint8_tag, i);
     }
 }
 
 void jl_init_box_caches(void)
 {
-    int64_t i;
-    for(i=0; i < 128; i++) {
-        boxed_char_cache[i] = jl_permbox32(jl_char_type, i << 24);
+    uint32_t i;
+    for (i = 0; i < 128; i++) {
+        boxed_char_cache[i] = jl_permbox32(jl_char_type, jl_char_tag, i << 24);
     }
-    for(i=0; i < 256; i++) {
-        jl_boxed_int8_cache[i] = jl_permbox8(jl_int8_type, i);
+    for (i = 0; i < 256; i++) {
+        jl_boxed_int8_cache[i] = jl_permbox8(jl_int8_type, jl_int8_tag, i);
     }
-    for(i=0; i < NBOX_C; i++) {
-        boxed_int16_cache[i]  = jl_permbox16(jl_int16_type, i-NBOX_C/2);
-        boxed_uint16_cache[i] = jl_permbox16(jl_uint16_type, i);
-        boxed_uint32_cache[i] = jl_permbox32(jl_uint32_type, i);
-        boxed_uint64_cache[i] = jl_permbox64(jl_uint64_type, i);
+    for (i = 0; i < NBOX_C; i++) {
+        boxed_int16_cache[i]  = jl_permbox16(jl_int16_type, jl_int16_tag, i-NBOX_C/2);
     }
 }
 
@@ -1258,11 +1481,17 @@ JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x)
 JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...)
 {
     jl_task_t *ct = jl_current_task;
-    if (type->instance != NULL) return type->instance;
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout)) {
+        jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
+    }
+    if (type->instance != NULL)
+        return type->instance;
     va_list args;
     size_t i, nf = jl_datatype_nfields(type);
     va_start(args, type);
     jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(type), type);
+    if (type->smalltag) // TODO: move to callers?
+        jl_set_typetagof(jv, type->smalltag, 0);
     if (nf > 0 && jl_field_offset(type, 0) != 0) {
         memset(jv, 0, jl_field_offset(type, 0));
     }
@@ -1276,7 +1505,7 @@ JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...)
 JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na)
 {
     jl_task_t *ct = jl_current_task;
-    if (!jl_is_datatype(type) || type->layout == NULL) {
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout)) {
         jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
     }
     size_t nf = jl_datatype_nfields(type);
@@ -1290,6 +1519,8 @@ JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args,
     if (type->instance != NULL)
         return type->instance;
     jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(type), type);
+    if (type->smalltag) // TODO: do we need this?
+        jl_set_typetagof(jv, type->smalltag, 0);
     if (jl_datatype_nfields(type) > 0) {
         if (jl_field_offset(type, 0) != 0) {
             memset(jl_data_ptr(jv), 0, jl_field_offset(type, 0));
@@ -1313,7 +1544,7 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
     jl_task_t *ct = jl_current_task;
     if (!jl_is_tuple(tup))
         jl_type_error("new", (jl_value_t*)jl_tuple_type, tup);
-    if (!jl_is_datatype(type) || type->layout == NULL)
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout))
         jl_type_error("new", (jl_value_t *)jl_datatype_type, (jl_value_t *)type);
     size_t nargs = jl_nfields(tup);
     size_t nf = jl_datatype_nfields(type);
@@ -1331,6 +1562,8 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
     }
     size_t size = jl_datatype_size(type);
     jl_value_t *jv = jl_gc_alloc(ct->ptls, size, type);
+    if (type->smalltag) // TODO: do we need this?
+        jl_set_typetagof(jv, type->smalltag, 0);
     if (nf == 0)
         return jv;
     jl_value_t *fi = NULL;
@@ -1358,9 +1591,15 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
 JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type)
 {
     jl_task_t *ct = jl_current_task;
-    if (type->instance != NULL) return type->instance;
+    if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout)) {
+        jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
+    }
+    if (type->instance != NULL)
+        return type->instance;
     size_t size = jl_datatype_size(type);
     jl_value_t *jv = jl_gc_alloc(ct->ptls, size, type);
+    if (type->smalltag) // TODO: do we need this?
+        jl_set_typetagof(jv, type->smalltag, 0);
     if (size > 0)
         memset(jl_data_ptr(jv), 0, size);
     return jv;
@@ -1401,8 +1640,7 @@ JL_DLLEXPORT int jl_field_index(jl_datatype_t *t, jl_sym_t *fld, int err)
         }
     }
     if (err)
-        jl_errorf("type %s has no field %s", jl_symbol_name(t->name->name),
-                  jl_symbol_name(fld));
+        jl_has_no_field_error(t->name->name, fld);
     return -1;
 }
 
@@ -1467,9 +1705,10 @@ static inline void memassign_safe(int hasptr, jl_value_t *parent, char *dst, con
         // assert that although dst might have some undefined bits, the src heap box should be okay with that
         assert(LLT_ALIGN(nb, sizeof(void*)) == LLT_ALIGN(jl_datatype_size(jl_typeof(src)), sizeof(void*)));
         size_t nptr = nb / sizeof(void*);
-        memmove_refs((void**)dst, (void**)src, nptr);
+        memmove_refs((_Atomic(void*)*)dst, (_Atomic(void*)*)src, nptr);
         jl_gc_multi_wb(parent, src);
         src = (jl_value_t*)((char*)src + nptr * sizeof(void*));
+        dst = dst + nptr * sizeof(void*);
         nb -= nptr * sizeof(void*);
     }
     else {
@@ -1565,6 +1804,7 @@ jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_
         }
         else {
             hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
+            r = NULL;
         }
         size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
         int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
@@ -1645,8 +1885,8 @@ jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_valu
                 int success = memcmp((char*)v + offs, r, fsz) == 0;
                 if (success) {
                     if (isunion) {
-                        size_t fsz = jl_field_size(st, i);
-                        uint8_t *psel = &((uint8_t*)v)[offs + fsz - 1];
+                        size_t fsz_i = jl_field_size(st, i);
+                        uint8_t *psel = &((uint8_t*)v)[offs + fsz_i - 1];
                         success = (jl_typeof(r) == jl_nth_union_component(ty, *psel));
                         if (success) {
                             unsigned nth = 0;
@@ -1738,14 +1978,14 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val
                 rty = jl_nth_union_component(rty, *psel);
             }
             assert(!jl_field_isptr(rettyp, 0));
-            r = jl_gc_alloc(ct->ptls, rettyp->size, (jl_value_t*)rettyp);
+            r = jl_gc_alloc(ct->ptls, jl_datatype_size(rettyp), (jl_value_t*)rettyp);
             int success = (rty == jl_typeof(expected));
             if (needlock)
                 jl_lock_value(v);
             memcpy((char*)r, (char*)v + offs, fsz); // copy field, including union bits
             if (success) {
                 size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
-                if (((jl_datatype_t*)rty)->layout->haspadding)
+                if (((jl_datatype_t*)rty)->layout->flags.haspadding)
                     success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
                 else
                     success = memcmp((char*)r, (char*)expected, fsz) == 0;
@@ -1789,6 +2029,16 @@ JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT
     return fval != NULL ? 1 : 0;
 }
 
+JL_DLLEXPORT int jl_field_isdefined_checked(jl_value_t *v, size_t i)
+{
+    if (jl_is_module(v)) {
+        jl_type_error("isdefined", (jl_value_t*)jl_symbol_type, jl_box_long(i + 1));
+    }
+    if (i >= jl_nfields(v))
+        return 0;
+    return !!jl_field_isdefined(v, i);
+}
+
 JL_DLLEXPORT size_t jl_get_field_offset(jl_datatype_t *ty, int field)
 {
     if (!jl_struct_try_layout(ty) || field > jl_datatype_nfields(ty) || field < 1)
diff --git a/src/debug-registry.h b/src/debug-registry.h
index b98c42f8de1fc..f30049eb5b210 100644
--- a/src/debug-registry.h
+++ b/src/debug-registry.h
@@ -2,8 +2,7 @@
 #include <llvm/DebugInfo/DIContext.h>
 #include <llvm/IR/DataLayout.h>
 
-#include "julia_internal.h"
-#include "processor.h"
+#include "julia.h"
 
 #include <map>
 #include <mutex>
@@ -80,17 +79,18 @@ class JITDebugInfoRegistry
         ~Locked() JL_NOTSAFEPOINT = default;
     };
 
-    struct sysimg_info_t {
-        uint64_t jl_sysimage_base;
-        jl_sysimg_fptrs_t sysimg_fptrs;
-        jl_method_instance_t **sysimg_fvars_linfo;
-        size_t sysimg_fvars_n;
+    struct image_info_t {
+        uint64_t base;
+        jl_image_fptrs_t fptrs;
+        jl_method_instance_t **fvars_linfo;
+        size_t fvars_n;
     };
 
     struct libc_frames_t {
 #if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
-        std::atomic<void(*)(void*)> libc_register_frame_{nullptr};
-        std::atomic<void(*)(void*)> libc_deregister_frame_{nullptr};
+        typedef void (*frame_register_func)(void *) JL_NOTSAFEPOINT;
+        std::atomic<frame_register_func> libc_register_frame_{nullptr};
+        std::atomic<frame_register_func> libc_deregister_frame_{nullptr};
 
         void libc_register_frame(const char *Entry) JL_NOTSAFEPOINT;
 
@@ -121,7 +121,7 @@ class JITDebugInfoRegistry
     // that it came from (providing name, type signature, file info, etc.)
     Locked<llvm::StringMap<jl_code_instance_t*>> codeinst_in_flight{};
 
-    Locked<sysimg_info_t> sysimg_info{};
+    Locked<llvm::DenseMap<uint64_t, image_info_t>> image_info{};
 
     Locked<objfilemap_t> objfilemap{};
 
@@ -138,9 +138,9 @@ class JITDebugInfoRegistry
     jl_method_instance_t *lookupLinfo(size_t pointer) JL_NOTSAFEPOINT;
     void registerJITObject(const llvm::object::ObjectFile &Object,
                         std::function<uint64_t(const llvm::StringRef &)> getLoadAddress,
-                        std::function<void*(void*)> lookupWriteAddress) JL_NOTSAFEPOINT;
+                        std::function<void*(void*)> lookupWriteAddress);
     objectmap_t& getObjectMap() JL_NOTSAFEPOINT;
-    void set_sysimg_info(sysimg_info_t info) JL_NOTSAFEPOINT;
-    Locked<sysimg_info_t>::ConstLockT get_sysimg_info() const JL_NOTSAFEPOINT;
+    void add_image_info(image_info_t info) JL_NOTSAFEPOINT;
+    bool get_image_info(uint64_t base, image_info_t *info) const JL_NOTSAFEPOINT;
     Locked<objfilemap_t>::LockT get_objfile_map() JL_NOTSAFEPOINT;
 };
diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp
index 2058125159b90..b2b4743962fc0 100644
--- a/src/debuginfo.cpp
+++ b/src/debuginfo.cpp
@@ -20,10 +20,13 @@
 #include <llvm/Object/COFF.h>
 #include <llvm/Object/ELFObjectFile.h>
 
+#ifdef _OS_DARWIN_
+#include <CoreFoundation/CoreFoundation.h>
+#endif
+
 using namespace llvm;
 
-#include "julia.h"
-#include "julia_internal.h"
+#include "jitlayers.h"
 #include "debuginfo.h"
 #if defined(_OS_LINUX_)
 #  include <link.h>
@@ -36,15 +39,12 @@ using namespace llvm;
 #include <set>
 #include <mutex>
 #include "julia_assert.h"
+#include "debug-registry.h"
 
-#ifdef _OS_DARWIN_
-#include <CoreFoundation/CoreFoundation.h>
-#endif
-
-#include "jitlayers.h"
+static JITDebugInfoRegistry *DebugRegistry = new JITDebugInfoRegistry;
 
 static JITDebugInfoRegistry &getJITDebugRegistry() JL_NOTSAFEPOINT {
-    return jl_ExecutionEngine->getDebugInfoRegistry();
+    return *DebugRegistry;
 }
 
 struct debug_link_info {
@@ -53,8 +53,8 @@ struct debug_link_info {
 };
 
 #if (defined(_OS_LINUX_) || defined(_OS_FREEBSD_) || (defined(_OS_DARWIN_) && defined(LLVM_SHLIB)))
-extern "C" void __register_frame(void*);
-extern "C" void __deregister_frame(void*);
+extern "C" void __register_frame(void*) JL_NOTSAFEPOINT;
+extern "C" void __deregister_frame(void*) JL_NOTSAFEPOINT;
 
 template <typename callback>
 static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f)
@@ -79,7 +79,7 @@ static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f)
 }
 #endif
 
-std::string JITDebugInfoRegistry::mangle(StringRef Name, const DataLayout &DL) JL_NOTSAFEPOINT
+std::string JITDebugInfoRegistry::mangle(StringRef Name, const DataLayout &DL)
 {
     std::string MangledName;
     {
@@ -89,11 +89,11 @@ std::string JITDebugInfoRegistry::mangle(StringRef Name, const DataLayout &DL) J
     return MangledName;
 }
 
-void JITDebugInfoRegistry::add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) JL_NOTSAFEPOINT {
+void JITDebugInfoRegistry::add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) {
     (**codeinst_in_flight)[mangle(name, DL)] = codeinst;
 }
 
-jl_method_instance_t *JITDebugInfoRegistry::lookupLinfo(size_t pointer) JL_NOTSAFEPOINT
+jl_method_instance_t *JITDebugInfoRegistry::lookupLinfo(size_t pointer)
 {
     jl_lock_profile();
     auto region = linfomap.lower_bound(pointer);
@@ -106,26 +106,32 @@ jl_method_instance_t *JITDebugInfoRegistry::lookupLinfo(size_t pointer) JL_NOTSA
 
 //Protected by debuginfo_asyncsafe (profile) lock
 JITDebugInfoRegistry::objectmap_t &
-JITDebugInfoRegistry::getObjectMap() JL_NOTSAFEPOINT
+JITDebugInfoRegistry::getObjectMap()
 {
     return objectmap;
 }
 
-void JITDebugInfoRegistry::set_sysimg_info(sysimg_info_t info) JL_NOTSAFEPOINT {
-    (**this->sysimg_info) = info;
+void JITDebugInfoRegistry::add_image_info(image_info_t info) {
+    (**this->image_info)[info.base] = info;
 }
 
-JITDebugInfoRegistry::Locked<JITDebugInfoRegistry::sysimg_info_t>::ConstLockT
-JITDebugInfoRegistry::get_sysimg_info() const JL_NOTSAFEPOINT {
-    return *this->sysimg_info;
+
+bool JITDebugInfoRegistry::get_image_info(uint64_t base, JITDebugInfoRegistry::image_info_t *info) const {
+    auto infos = *this->image_info;
+    auto it = infos->find(base);
+    if (it != infos->end()) {
+        *info = it->second;
+        return true;
+    }
+    return false;
 }
 
 JITDebugInfoRegistry::Locked<JITDebugInfoRegistry::objfilemap_t>::LockT
-JITDebugInfoRegistry::get_objfile_map() JL_NOTSAFEPOINT {
+JITDebugInfoRegistry::get_objfile_map() {
     return *this->objfilemap;
 }
 
-JITDebugInfoRegistry::JITDebugInfoRegistry() JL_NOTSAFEPOINT { }
+JITDebugInfoRegistry::JITDebugInfoRegistry() { }
 
 struct unw_table_entry
 {
@@ -136,7 +142,7 @@ struct unw_table_entry
 // some actions aren't signal (especially profiler) safe so we acquire a lock
 // around them to establish a mutual exclusion with unwinding from a signal
 template <typename T>
-static void jl_profile_atomic(T f)
+static void jl_profile_atomic(T f) JL_NOTSAFEPOINT
 {
     assert(0 == jl_lock_profile_rd_held());
     jl_lock_profile_wr();
@@ -183,7 +189,7 @@ static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnnam
         if (mod_size && !SymLoadModuleEx(GetCurrentProcess(), NULL, NULL, NULL, (DWORD64)Section, mod_size, NULL, SLMFLAG_VIRTUAL)) {
             static int warned = 0;
             if (!warned) {
-                jl_printf(JL_STDERR, "WARNING: failed to insert module info for backtrace: %lu\n", GetLastError());
+                jl_safe_printf("WARNING: failed to insert module info for backtrace: %lu\n", GetLastError());
                 warned = 1;
             }
         }
@@ -196,17 +202,17 @@ static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnnam
             name[len-1] = 0;
             if (!SymAddSymbol(GetCurrentProcess(), (ULONG64)Section, name,
                         (DWORD64)Code, (DWORD)Size, 0)) {
-                jl_printf(JL_STDERR, "WARNING: failed to insert function name %s into debug info: %lu\n", name, GetLastError());
+                jl_safe_printf("WARNING: failed to insert function name %s into debug info: %lu\n", name, GetLastError());
             }
         }
         uv_mutex_unlock(&jl_in_stackwalk);
     }
 #if defined(_CPU_X86_64_)
-    jl_profile_atomic([&]() {
+    jl_profile_atomic([&]() JL_NOTSAFEPOINT {
         if (!RtlAddFunctionTable(tbl, 1, (DWORD64)Section)) {
             static int warned = 0;
             if (!warned) {
-                jl_printf(JL_STDERR, "WARNING: failed to insert function stack unwind info: %lu\n", GetLastError());
+                jl_safe_printf("WARNING: failed to insert function stack unwind info: %lu\n", GetLastError());
                 warned = 1;
             }
         }
@@ -264,7 +270,7 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
         di->u.rti.name_ptr = 0;
         di->u.rti.table_data = arm_exidx_addr;
         di->u.rti.table_len = arm_exidx_len;
-        jl_profile_atomic([&]() {
+        jl_profile_atomic([&]() JL_NOTSAFEPOINT {
             _U_dyn_register(di);
         });
         break;
@@ -366,9 +372,19 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
                 codeinst_in_flight.erase(codeinst_it);
             }
         }
-        jl_profile_atomic([&]() {
-            if (codeinst)
-                linfomap[Addr] = std::make_pair(Size, codeinst->def);
+        jl_method_instance_t *mi = NULL;
+        if (codeinst) {
+            JL_GC_PROMISE_ROOTED(codeinst);
+            mi = codeinst->def;
+            // Non-opaque-closure MethodInstances are considered globally rooted
+            // through their methods, but for OC, we need to create a global root
+            // here.
+            if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure)
+                mi = (jl_method_instance_t*)jl_as_global_root((jl_value_t*)mi, 1);
+        }
+        jl_profile_atomic([&]() JL_NOTSAFEPOINT {
+            if (mi)
+                linfomap[Addr] = std::make_pair(Size, mi);
             if (first) {
                 objectmap[SectionLoadAddr] = {&Object,
                     (size_t)SectionSize,
@@ -497,7 +513,7 @@ static int lookup_pointer(
                 std::size_t semi_pos = func_name.find(';');
                 if (semi_pos != std::string::npos) {
                     func_name = func_name.substr(0, semi_pos);
-                    frame->linfo = NULL; // TODO: if (new_frames[n_frames - 1].linfo) frame->linfo = lookup(func_name in linfo)?
+                    frame->linfo = NULL; // Looked up on Julia side
                 }
             }
         }
@@ -534,26 +550,26 @@ static int lookup_pointer(
 #if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
 
 void JITDebugInfoRegistry::libc_frames_t::libc_register_frame(const char *Entry) {
-    auto libc_register_frame_ = jl_atomic_load_relaxed(&this->libc_register_frame_);
+    frame_register_func libc_register_frame_ = jl_atomic_load_relaxed(&this->libc_register_frame_);
     if (!libc_register_frame_) {
         libc_register_frame_ = (void(*)(void*))dlsym(RTLD_NEXT, "__register_frame");
         jl_atomic_store_release(&this->libc_register_frame_, libc_register_frame_);
     }
     assert(libc_register_frame_);
-    jl_profile_atomic([&]() {
+    jl_profile_atomic([&]() JL_NOTSAFEPOINT {
         libc_register_frame_(const_cast<char *>(Entry));
         __register_frame(const_cast<char *>(Entry));
     });
 }
 
 void JITDebugInfoRegistry::libc_frames_t::libc_deregister_frame(const char *Entry) {
-    auto libc_deregister_frame_ = jl_atomic_load_relaxed(&this->libc_deregister_frame_);
+    frame_register_func libc_deregister_frame_  = jl_atomic_load_relaxed(&this->libc_deregister_frame_);
     if (!libc_deregister_frame_) {
         libc_deregister_frame_ = (void(*)(void*))dlsym(RTLD_NEXT, "__deregister_frame");
         jl_atomic_store_release(&this->libc_deregister_frame_, libc_deregister_frame_);
     }
     assert(libc_deregister_frame_);
-    jl_profile_atomic([&]() {
+    jl_profile_atomic([&]() JL_NOTSAFEPOINT {
         libc_deregister_frame_(const_cast<char *>(Entry));
         __deregister_frame(const_cast<char *>(Entry));
     });
@@ -597,7 +613,7 @@ static debug_link_info getDebuglink(const object::ObjectFile &Obj) JL_NOTSAFEPOI
  *   code or tables extracted from it, as desired without restriction.
  */
 static uint32_t
-calc_gnu_debuglink_crc32(const void *buf, size_t size)
+calc_gnu_debuglink_crc32(const void *buf, size_t size) JL_NOTSAFEPOINT
 {
     static const uint32_t g_crc32_tab[] =
     {
@@ -655,7 +671,7 @@ calc_gnu_debuglink_crc32(const void *buf, size_t size)
 }
 
 static Expected<object::OwningBinary<object::ObjectFile>>
-openDebugInfo(StringRef debuginfopath, const debug_link_info &info)
+openDebugInfo(StringRef debuginfopath, const debug_link_info &info) JL_NOTSAFEPOINT
 {
     auto SplitFile = MemoryBuffer::getFile(debuginfopath);
     if (std::error_code EC = SplitFile.getError()) {
@@ -681,11 +697,11 @@ openDebugInfo(StringRef debuginfopath, const debug_link_info &info)
             std::move(error_splitobj.get()),
             std::move(SplitFile.get()));
 }
-extern "C" JL_DLLEXPORT
-void jl_register_fptrs_impl(uint64_t sysimage_base, const jl_sysimg_fptrs_t *fptrs,
+extern "C" JL_DLLEXPORT_CODEGEN
+void jl_register_fptrs_impl(uint64_t image_base, const jl_image_fptrs_t *fptrs,
     jl_method_instance_t **linfos, size_t n)
 {
-    getJITDebugRegistry().set_sysimg_info({(uintptr_t) sysimage_base, *fptrs, linfos, n});
+    getJITDebugRegistry().add_image_info({(uintptr_t) image_base, *fptrs, linfos, n});
 }
 
 template<typename T>
@@ -696,12 +712,9 @@ static inline void ignoreError(T &err) JL_NOTSAFEPOINT
 #endif
 }
 
-static void get_function_name_and_base(llvm::object::SectionRef Section, size_t pointer, int64_t slide, bool insysimage,
+static void get_function_name_and_base(llvm::object::SectionRef Section, size_t pointer, int64_t slide, bool inimage,
                                        void **saddr, char **name, bool untrusted_dladdr) JL_NOTSAFEPOINT
 {
-    // Assume we only need base address for sysimg for now
-    if (!insysimage || !getJITDebugRegistry().get_sysimg_info()->sysimg_fptrs.base)
-        saddr = nullptr;
     bool needs_saddr = saddr && (!*saddr || untrusted_dladdr);
     bool needs_name = name && (!*name || untrusted_dladdr);
     // Try platform specific methods first since they are usually faster
@@ -782,7 +795,7 @@ static void get_function_name_and_base(llvm::object::SectionRef Section, size_t
     }
 #ifdef _OS_WINDOWS_
     // For ntdll and msvcrt since we are currently only parsing DWARF debug info through LLVM
-    if (!insysimage && needs_name) {
+    if (!inimage && needs_name) {
         static char frame_info_func[
             sizeof(SYMBOL_INFO) +
             MAX_SYM_NAME * sizeof(TCHAR)];
@@ -1014,7 +1027,7 @@ static object::SectionRef getModuleSectionForAddress(const object::ObjectFile *o
 
 
 bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *slide, llvm::DIContext **context,
-    bool onlySysImg, bool *isSysImg, void **saddr, char **name, char **filename) JL_NOTSAFEPOINT
+    bool onlyImage, bool *isImage, uint64_t *_fbase, void **saddr, char **name, char **filename) JL_NOTSAFEPOINT
 {
     *Section = object::SectionRef();
     *context = NULL;
@@ -1048,10 +1061,11 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
     if (fname.empty()) // empirically, LoadedImageName might be missing
         fname = ModuleInfo.ImageName;
     DWORD64 fbase = ModuleInfo.BaseOfImage;
-    bool insysimage = (fbase == getJITDebugRegistry().get_sysimg_info()->jl_sysimage_base);
-    if (isSysImg)
-        *isSysImg = insysimage;
-    if (onlySysImg && !insysimage)
+    JITDebugInfoRegistry::image_info_t image_info;
+    bool inimage = getJITDebugRegistry().get_image_info(fbase, &image_info);
+    if (isImage)
+        *isImage = inimage;
+    if (onlyImage && !inimage)
         return false;
     // If we didn't find the filename before in the debug
     // info, use the dll name
@@ -1059,6 +1073,8 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
         jl_copy_str(filename, fname.data());
     if (saddr)
         *saddr = NULL;
+    if (_fbase)
+        *_fbase = fbase;
 
 #else // ifdef _OS_WINDOWS_
     Dl_info dlinfo;
@@ -1067,6 +1083,15 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
 #ifdef __GLIBC__
     struct link_map *extra_info;
     dladdr_success = dladdr1((void*)pointer, &dlinfo, (void**)&extra_info, RTLD_DL_LINKMAP) != 0;
+    if (dladdr_success) {
+        msan_unpoison(&dlinfo, sizeof(dlinfo));
+        if (dlinfo.dli_fname)
+            msan_unpoison_string(dlinfo.dli_fname);
+        if (dlinfo.dli_sname)
+            msan_unpoison_string(dlinfo.dli_sname);
+        msan_unpoison(&extra_info, sizeof(struct link_map*));
+        msan_unpoison(extra_info, sizeof(struct link_map));
+    }
 #else
 #ifdef _OS_DARWIN_
     // On macOS 12, dladdr(-1, …) succeeds and returns the main executable image,
@@ -1088,16 +1113,19 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
     fbase = (uintptr_t)dlinfo.dli_fbase;
 #endif
     StringRef fname;
-    bool insysimage = (fbase == getJITDebugRegistry().get_sysimg_info()->jl_sysimage_base);
-    if (saddr && !(insysimage && untrusted_dladdr))
+    JITDebugInfoRegistry::image_info_t image_info;
+    bool inimage = getJITDebugRegistry().get_image_info(fbase, &image_info);
+    if (saddr && !(inimage && untrusted_dladdr))
         *saddr = dlinfo.dli_saddr;
-    if (isSysImg)
-        *isSysImg = insysimage;
-    if (onlySysImg && !insysimage)
+    if (isImage)
+        *isImage = inimage;
+    if (onlyImage && !inimage)
         return false;
+    if (_fbase)
+        *_fbase = fbase;
     // In case we fail with the debug info lookup, we at least still
     // have the function name, even if we don't have line numbers
-    if (name && !(insysimage && untrusted_dladdr))
+    if (name && !(inimage && untrusted_dladdr))
         jl_copy_str(name, dlinfo.dli_sname);
     if (filename)
         jl_copy_str(filename, dlinfo.dli_fname);
@@ -1108,7 +1136,10 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
     *context = entry.ctx;
     if (entry.obj)
         *Section = getModuleSectionForAddress(entry.obj, pointer + entry.slide);
-    get_function_name_and_base(*Section, pointer, entry.slide, insysimage, saddr, name, untrusted_dladdr);
+    // Assume we only need base address for sysimg for now
+    if (!inimage || !image_info.fptrs.base)
+        saddr = nullptr;
+    get_function_name_and_base(*Section, pointer, entry.slide, inimage, saddr, name, untrusted_dladdr);
     return true;
 }
 
@@ -1137,34 +1168,36 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip
     object::SectionRef Section;
     llvm::DIContext *context = NULL;
     int64_t slide;
-    bool isSysImg;
+    bool isImage;
     void *saddr;
-    if (!jl_dylib_DI_for_fptr(pointer, &Section, &slide, &context, skipC, &isSysImg, &saddr, &frame0->func_name, &frame0->file_name)) {
+    uint64_t fbase;
+    if (!jl_dylib_DI_for_fptr(pointer, &Section, &slide, &context, skipC, &isImage, &fbase, &saddr, &frame0->func_name, &frame0->file_name)) {
         frame0->fromC = 1;
         return 1;
     }
-    frame0->fromC = !isSysImg;
+    frame0->fromC = !isImage;
     {
-        auto sysimg_locked = getJITDebugRegistry().get_sysimg_info();
-        if (isSysImg && sysimg_locked->sysimg_fptrs.base && saddr) {
-            intptr_t diff = (uintptr_t)saddr - (uintptr_t)sysimg_locked->sysimg_fptrs.base;
-            for (size_t i = 0; i < sysimg_locked->sysimg_fptrs.nclones; i++) {
-                if (diff == sysimg_locked->sysimg_fptrs.clone_offsets[i]) {
-                    uint32_t idx = sysimg_locked->sysimg_fptrs.clone_idxs[i] & jl_sysimg_val_mask;
-                    if (idx < sysimg_locked->sysimg_fvars_n) // items after this were cloned but not referenced directly by a method (such as our ccall PLT thunks)
-                        frame0->linfo = sysimg_locked->sysimg_fvars_linfo[idx];
+        JITDebugInfoRegistry::image_info_t image;
+        bool inimage = getJITDebugRegistry().get_image_info(fbase, &image);
+        if (isImage && saddr && inimage) {
+            intptr_t diff = (uintptr_t)saddr - (uintptr_t)image.fptrs.base;
+            for (size_t i = 0; i < image.fptrs.nclones; i++) {
+                if (diff == image.fptrs.clone_offsets[i]) {
+                    uint32_t idx = image.fptrs.clone_idxs[i] & jl_sysimg_val_mask;
+                    if (idx < image.fvars_n) // items after this were cloned but not referenced directly by a method (such as our ccall PLT thunks)
+                        frame0->linfo = image.fvars_linfo[idx];
                     break;
                 }
             }
-            for (size_t i = 0; i < sysimg_locked->sysimg_fvars_n; i++) {
-                if (diff == sysimg_locked->sysimg_fptrs.offsets[i]) {
-                    frame0->linfo = sysimg_locked->sysimg_fvars_linfo[i];
+            for (size_t i = 0; i < image.fvars_n; i++) {
+                if (diff == image.fptrs.offsets[i]) {
+                    frame0->linfo = image.fvars_linfo[i];
                     break;
                 }
             }
         }
     }
-    return lookup_pointer(Section, context, frames, pointer, slide, isSysImg, noInline);
+    return lookup_pointer(Section, context, frames, pointer, slide, isImage, noInline);
 }
 
 int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
@@ -1194,7 +1227,7 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
 }
 
 // Set *name and *filename to either NULL or malloc'd string
-extern "C" JL_DLLEXPORT int jl_getFunctionInfo_impl(jl_frame_t **frames_out, size_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT
+extern "C" JL_DLLEXPORT_CODEGEN int jl_getFunctionInfo_impl(jl_frame_t **frames_out, size_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT
 {
     // This function is not allowed to reference any TLS variables if noInline
     // since it can be called from an unmanaged thread on OSX.
@@ -1429,7 +1462,7 @@ static DW_EH_PE parseCIE(const uint8_t *Addr, const uint8_t *End)
 void register_eh_frames(uint8_t *Addr, size_t Size)
 {
     // System unwinder
-    jl_profile_atomic([&]() {
+    jl_profile_atomic([&]() JL_NOTSAFEPOINT {
         __register_frame(Addr);
     });
 
@@ -1458,7 +1491,7 @@ void register_eh_frames(uint8_t *Addr, size_t Size)
     // While we're at it, also record the start_ip and size,
     // which we fill in the table
     unw_table_entry *table = new unw_table_entry[nentries];
-    std::vector<uintptr_t> start_ips(nentries);
+    SmallVector<uintptr_t, 0> start_ips(nentries);
     size_t cur_entry = 0;
     // Cache the previously parsed CIE entry so that we can support multiple
     // CIE's (may not happen) without parsing it every time.
@@ -1557,14 +1590,14 @@ void register_eh_frames(uint8_t *Addr, size_t Size)
     di->start_ip = start_ip;
     di->end_ip = end_ip;
 
-    jl_profile_atomic([&]() {
+    jl_profile_atomic([&]() JL_NOTSAFEPOINT {
         _U_dyn_register(di);
     });
 }
 
 void deregister_eh_frames(uint8_t *Addr, size_t Size)
 {
-    jl_profile_atomic([&]() {
+    jl_profile_atomic([&]() JL_NOTSAFEPOINT {
         __deregister_frame(Addr);
     });
     // Deregistering with our unwinder (_U_dyn_cancel) requires a lookup table
@@ -1584,7 +1617,7 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size)
 
 #endif
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr)
 {
     // Might be called from unmanaged thread
diff --git a/src/debuginfo.h b/src/debuginfo.h
index 5ea34350ac1fb..5b5cdcb82d534 100644
--- a/src/debuginfo.h
+++ b/src/debuginfo.h
@@ -6,7 +6,7 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
         llvm::object::SectionRef *Section, llvm::DIContext **context) JL_NOTSAFEPOINT;
 
 bool jl_dylib_DI_for_fptr(size_t pointer, llvm::object::SectionRef *Section, int64_t *slide, llvm::DIContext **context,
-    bool onlySysImg, bool *isSysImg, void **saddr, char **name, char **filename) JL_NOTSAFEPOINT;
+    bool onlyImage, bool *isImage, uint64_t* fbase, void **saddr, char **name, char **filename) JL_NOTSAFEPOINT;
 
 static object::SectionedAddress makeAddress(
         llvm::object::SectionRef Section, uint64_t address) JL_NOTSAFEPOINT
diff --git a/src/disasm.cpp b/src/disasm.cpp
index 838934a6c5893..2e0cb22e43a1c 100644
--- a/src/disasm.cpp
+++ b/src/disasm.cpp
@@ -92,11 +92,7 @@
 #include <llvm/Support/MemoryBuffer.h>
 #include <llvm/Support/NativeFormatting.h>
 #include <llvm/Support/SourceMgr.h>
-#if JL_LLVM_VERSION >= 140000
 #include <llvm/MC/TargetRegistry.h>
-#else
-#include <llvm/Support/TargetRegistry.h>
-#endif
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Support/raw_ostream.h>
 
@@ -111,8 +107,6 @@
 
 #include <llvm-c/Disassembler.h>
 
-#include "julia.h"
-#include "julia_internal.h"
 #include "jitlayers.h"
 #include "processor.h"
 
@@ -123,7 +117,7 @@ using namespace llvm;
 // helper class for tracking inlining context while printing debug info
 class DILineInfoPrinter {
     // internal state:
-    std::vector<DILineInfo> context;
+    SmallVector<DILineInfo, 0> context;
     uint32_t inline_depth = 0;
     // configuration options:
     const char* LineStart = "; ";
@@ -135,10 +129,11 @@ class DILineInfoPrinter {
         output_source = 1,
     } verbosity = output_source;
 public:
-    DILineInfoPrinter(const char *LineStart, bool bracket_outer)
+    DILineInfoPrinter(const char *LineStart, bool bracket_outer) JL_NOTSAFEPOINT
         : LineStart(LineStart),
           bracket_outer(bracket_outer) {};
-    void SetVerbosity(const char *c)
+    ~DILineInfoPrinter() JL_NOTSAFEPOINT = default;
+    void SetVerbosity(const char *c) JL_NOTSAFEPOINT
     {
         if (StringRef("default") == c) {
             verbosity = output_source;
@@ -151,14 +146,14 @@ class DILineInfoPrinter {
         }
     }
 
-    void emit_finish(raw_ostream &Out);
-    void emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo> &DI);
+    void emit_finish(raw_ostream &Out) JL_NOTSAFEPOINT;
+    void emit_lineinfo(raw_ostream &Out, SmallVectorImpl<DILineInfo> &DI) JL_NOTSAFEPOINT;
 
     struct repeat {
         size_t times;
         const char *c;
     };
-    struct repeat inlining_indent(const char *c)
+    struct repeat inlining_indent(const char *c) JL_NOTSAFEPOINT
     {
         return repeat{
             std::max(inline_depth + bracket_outer, (uint32_t)1) - 1,
@@ -166,37 +161,37 @@ class DILineInfoPrinter {
     }
 
     template<class T>
-    void emit_lineinfo(std::string &Out, T &DI)
+    void emit_lineinfo(std::string &Out, T &DI) JL_NOTSAFEPOINT
     {
         raw_string_ostream OS(Out);
         emit_lineinfo(OS, DI);
     }
 
-    void emit_lineinfo(raw_ostream &Out, DILineInfo &DI)
+    void emit_lineinfo(raw_ostream &Out, DILineInfo &DI) JL_NOTSAFEPOINT
     {
-        std::vector<DILineInfo> DIvec(1);
+        SmallVector<DILineInfo, 0> DIvec(1);
         DIvec[0] = DI;
         emit_lineinfo(Out, DIvec);
     }
 
-    void emit_lineinfo(raw_ostream &Out, DIInliningInfo &DI)
+    void emit_lineinfo(raw_ostream &Out, DIInliningInfo &DI) JL_NOTSAFEPOINT
     {
         uint32_t nframes = DI.getNumberOfFrames();
-        std::vector<DILineInfo> DIvec(nframes);
+        SmallVector<DILineInfo, 0> DIvec(nframes);
         for (uint32_t i = 0; i < DI.getNumberOfFrames(); i++) {
             DIvec[i] = DI.getFrame(i);
         }
         emit_lineinfo(Out, DIvec);
     }
 
-    void emit_finish(std::string &Out)
+    void emit_finish(std::string &Out) JL_NOTSAFEPOINT
     {
         raw_string_ostream OS(Out);
         emit_finish(OS);
     }
 };
 
-static raw_ostream &operator<<(raw_ostream &Out, struct DILineInfoPrinter::repeat i)
+static raw_ostream &operator<<(raw_ostream &Out, struct DILineInfoPrinter::repeat i) JL_NOTSAFEPOINT
 {
     while (i.times-- > 0)
         Out << i.c;
@@ -212,7 +207,7 @@ void DILineInfoPrinter::emit_finish(raw_ostream &Out)
     this->inline_depth = 0;
 }
 
-void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo> &DI)
+void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, SmallVectorImpl<DILineInfo> &DI)
 {
     if (verbosity == output_none)
         return;
@@ -222,8 +217,8 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
     // compute the size of the matching prefix in the inlining information stack
     uint32_t nctx;
     for (nctx = 0; nctx < context.size() && nctx < nframes; nctx++) {
-        const DILineInfo &CtxLine = context.at(nctx);
-        const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
+        const DILineInfo &CtxLine = context[nctx];
+        const DILineInfo &FrameLine = DI[nframes - 1 - nctx];
         if (CtxLine != FrameLine) {
             break;
         }
@@ -235,27 +230,27 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
             // if so, drop all existing calls to it from the top of the context
             // AND check if instead the context was previously printed that way
             // but now has removed the recursive frames
-            StringRef method = StringRef(context.at(nctx - 1).FunctionName).rtrim(';'); // last matching frame
-            if ((nctx < nframes && StringRef(DI.at(nframes - nctx - 1).FunctionName).rtrim(';') == method) ||
-                (nctx < context.size() && StringRef(context.at(nctx).FunctionName).rtrim(';') == method)) {
+            StringRef method = StringRef(context[nctx - 1].FunctionName).rtrim(';'); // last matching frame
+            if ((nctx < nframes && StringRef(DI[nframes - nctx - 1].FunctionName).rtrim(';') == method) ||
+                (nctx < context.size() && StringRef(context[nctx].FunctionName).rtrim(';') == method)) {
                 update_line_only = true;
                 // transform nctx to exclude the combined frames
-                while (nctx > 0 && StringRef(context.at(nctx - 1).FunctionName).rtrim(';') == method)
+                while (nctx > 0 && StringRef(context[nctx - 1].FunctionName).rtrim(';') == method)
                     nctx -= 1;
             }
         }
         if (!update_line_only && nctx < context.size() && nctx < nframes) {
             // look at the first non-matching element to see if we are only changing the line number
-            const DILineInfo &CtxLine = context.at(nctx);
-            const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
+            const DILineInfo &CtxLine = context[nctx];
+            const DILineInfo &FrameLine = DI[nframes - 1 - nctx];
             if (StringRef(CtxLine.FunctionName).rtrim(';') == StringRef(FrameLine.FunctionName).rtrim(';'))
                 update_line_only = true;
         }
     }
     else if (nctx < context.size() && nctx < nframes) {
         // look at the first non-matching element to see if we are only changing the line number
-        const DILineInfo &CtxLine = context.at(nctx);
-        const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
+        const DILineInfo &CtxLine = context[nctx];
+        const DILineInfo &FrameLine = DI[nframes - 1 - nctx];
         if (CtxLine.FileName == FrameLine.FileName &&
                 StringRef(CtxLine.FunctionName).rtrim(';') == StringRef(FrameLine.FunctionName).rtrim(';')) {
             update_line_only = true;
@@ -267,9 +262,9 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
         uint32_t npops;
         if (collapse_recursive) {
             npops = 1;
-            StringRef Prev = StringRef(context.at(nctx).FunctionName).rtrim(';');
+            StringRef Prev = StringRef(context[nctx].FunctionName).rtrim(';');
             for (uint32_t i = nctx + 1; i < context.size(); i++) {
-                StringRef Next = StringRef(context.at(i).FunctionName).rtrim(';');
+                StringRef Next = StringRef(context[i].FunctionName).rtrim(';');
                 if (Prev != Next)
                     npops += 1;
                 Prev = Next;
@@ -287,7 +282,7 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
     }
     // print the new frames
     while (nctx < nframes) {
-        const DILineInfo &frame = DI.at(nframes - 1 - nctx);
+        const DILineInfo &frame = DI[nframes - 1 - nctx];
         Out << LineStart << inlining_indent("│");
         nctx += 1;
         context.push_back(frame);
@@ -306,7 +301,7 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
         Out << " within `" << method << "`";
         if (collapse_recursive) {
             while (nctx < nframes) {
-                const DILineInfo &frame = DI.at(nframes - 1 - nctx);
+                const DILineInfo &frame = DI[nframes - 1 - nctx];
                 if (StringRef(frame.FunctionName).rtrim(';') != method)
                     break;
                 nctx += 1;
@@ -318,10 +313,10 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
         Out << "\n";
     }
 #ifndef JL_NDEBUG
-    StringRef Prev = StringRef(context.at(0).FunctionName).rtrim(';');
+    StringRef Prev = StringRef(context[0].FunctionName).rtrim(';');
     uint32_t depth2 = 1;
     for (uint32_t i = 1; i < nctx; i++) {
-        StringRef Next = StringRef(context.at(i).FunctionName).rtrim(';');
+        StringRef Next = StringRef(context[i].FunctionName).rtrim(';');
         if (!collapse_recursive || Prev != Next)
             depth2 += 1;
         Prev = Next;
@@ -338,27 +333,28 @@ class LineNumberAnnotatedWriter : public AssemblyAnnotationWriter {
     DenseMap<const Instruction *, DILocation *> DebugLoc;
     DenseMap<const Function *, DISubprogram *> Subprogram;
 public:
-    LineNumberAnnotatedWriter(const char *LineStart, bool bracket_outer, const char *debuginfo)
+    LineNumberAnnotatedWriter(const char *LineStart, bool bracket_outer, const char *debuginfo) JL_NOTSAFEPOINT
       : LinePrinter(LineStart, bracket_outer) {
         LinePrinter.SetVerbosity(debuginfo);
     }
-    virtual void emitFunctionAnnot(const Function *, formatted_raw_ostream &);
-    virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &);
-    virtual void emitInstructionAnnot(const DILocation *, formatted_raw_ostream &);
-    virtual void emitBasicBlockEndAnnot(const BasicBlock *, formatted_raw_ostream &);
-    // virtual void printInfoComment(const Value &, formatted_raw_ostream &) {}
-
-    void emitEnd(formatted_raw_ostream &Out) {
+    ~LineNumberAnnotatedWriter() JL_NOTSAFEPOINT = default;
+    virtual void emitFunctionAnnot(const Function *, formatted_raw_ostream &) JL_NOTSAFEPOINT;
+    virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &) JL_NOTSAFEPOINT;
+    virtual void emitInstructionAnnot(const DILocation *, formatted_raw_ostream &) JL_NOTSAFEPOINT;
+    virtual void emitBasicBlockEndAnnot(const BasicBlock *, formatted_raw_ostream &) JL_NOTSAFEPOINT;
+    // virtual void printInfoComment(const Value &, formatted_raw_ostream &) JL_NOTSAFEPOINT {}
+
+    void emitEnd(formatted_raw_ostream &Out) JL_NOTSAFEPOINT {
         LinePrinter.emit_finish(Out);
         InstrLoc = nullptr;
     }
 
-    void addSubprogram(const Function *F, DISubprogram *SP)
+    void addSubprogram(const Function *F, DISubprogram *SP) JL_NOTSAFEPOINT
     {
         Subprogram[F] = SP;
     }
 
-    void addDebugLoc(const Instruction *I, DILocation *Loc)
+    void addDebugLoc(const Instruction *I, DILocation *Loc) JL_NOTSAFEPOINT
     {
         DebugLoc[I] = Loc;
     }
@@ -367,6 +363,10 @@ class LineNumberAnnotatedWriter : public AssemblyAnnotationWriter {
 void LineNumberAnnotatedWriter::emitFunctionAnnot(
       const Function *F, formatted_raw_ostream &Out)
 {
+    if (F->hasFnAttribute("julia.fsig")) {
+        auto sig = F->getFnAttribute("julia.fsig").getValueAsString();
+        Out << "; Function Signature: " << sig << "\n";
+    }
     InstrLoc = nullptr;
     DISubprogram *FuncLoc = F->getSubprogram();
     if (!FuncLoc) {
@@ -375,7 +375,7 @@ void LineNumberAnnotatedWriter::emitFunctionAnnot(
             FuncLoc = SP->second;
     }
     if (FuncLoc) {
-        std::vector<DILineInfo> DIvec(1);
+        SmallVector<DILineInfo, 0> DIvec(1);
         DILineInfo &DI = DIvec.back();
         DI.FunctionName = FuncLoc->getName().str();
         DI.FileName = FuncLoc->getFilename().str();
@@ -402,7 +402,7 @@ void LineNumberAnnotatedWriter::emitInstructionAnnot(
 {
     if (NewInstrLoc && NewInstrLoc != InstrLoc) {
         InstrLoc = NewInstrLoc;
-        std::vector<DILineInfo> DIvec;
+        SmallVector<DILineInfo, 0> DIvec;
         do {
             DIvec.emplace_back();
             DILineInfo &DI = DIvec.back();
@@ -424,7 +424,7 @@ void LineNumberAnnotatedWriter::emitBasicBlockEndAnnot(
         emitEnd(Out);
 }
 
-static void jl_strip_llvm_debug(Module *m, bool all_meta, LineNumberAnnotatedWriter *AAW)
+static void jl_strip_llvm_debug(Module *m, bool all_meta, LineNumberAnnotatedWriter *AAW) JL_NOTSAFEPOINT
 {
     // strip metadata from all instructions in all functions in the module
     Instruction *deletelast = nullptr; // can't actually delete until the iterator advances
@@ -475,21 +475,21 @@ static void jl_strip_llvm_debug(Module *m, bool all_meta, LineNumberAnnotatedWri
     //    m->eraseNamedMetadata(md);
 }
 
-void jl_strip_llvm_debug(Module *m)
+void jl_strip_llvm_debug(Module *m) JL_NOTSAFEPOINT
 {
     jl_strip_llvm_debug(m, false, NULL);
 }
 
-void jl_strip_llvm_addrspaces(Module *m)
+void jl_strip_llvm_addrspaces(Module *m) JL_NOTSAFEPOINT
 {
-    legacy::PassManager PM;
-    PM.add(createRemoveJuliaAddrspacesPass());
-    PM.run(*m);
+    PassBuilder PB;
+    AnalysisManagers AM(PB);
+    RemoveJuliaAddrspacesPass().run(*m, AM.MAM);
 }
 
 // print an llvm IR acquired from jl_get_llvmf
 // warning: this takes ownership of, and destroys, dump->TSM
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo)
 {
     std::string code;
@@ -543,7 +543,7 @@ static void jl_dump_asm_internal(
         raw_ostream &rstream,
         const char* asm_variant,
         const char* debuginfo,
-        bool binary);
+        bool binary) JL_NOTSAFEPOINT;
 
 // This isn't particularly fast, but neither is printing assembly, and they're only used for interactive mode
 static uint64_t compute_obj_symsize(object::SectionRef Section, uint64_t offset)
@@ -578,8 +578,8 @@ static uint64_t compute_obj_symsize(object::SectionRef Section, uint64_t offset)
 }
 
 // print a native disassembly for the function starting at fptr
-extern "C" JL_DLLEXPORT
-jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary)
+extern "C" JL_DLLEXPORT_CODEGEN
+jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char emit_mc, const char* asm_variant, const char *debuginfo, char binary)
 {
     assert(fptr != 0);
     std::string code;
@@ -592,7 +592,7 @@ jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char raw_mc, const char* asm_va
     llvm::DIContext *context = NULL;
     if (!jl_DI_for_fptr(fptr, &symsize, &slide, &Section, &context)) {
         if (!jl_dylib_DI_for_fptr(fptr, &Section, &slide, &context,
-                    false, NULL, NULL, NULL, NULL)) {
+                    false, NULL, NULL, NULL, NULL, NULL)) {
             jl_printf(JL_STDERR, "WARNING: Unable to find function pointer\n");
             return jl_pchar_to_string("", 0);
         }
@@ -604,7 +604,7 @@ jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char raw_mc, const char* asm_va
         return jl_pchar_to_string("", 0);
     }
 
-    if (raw_mc) {
+    if (emit_mc) {
         return (jl_value_t*)jl_pchar_to_array((char*)fptr, symsize);
     }
 
@@ -642,20 +642,21 @@ class SymbolTable {
     uint64_t ip; // virtual instruction pointer of the current instruction
     int64_t slide;
 public:
-    SymbolTable(MCContext &Ctx, const object::ObjectFile *object, int64_t slide, const FuncMCView &MemObj):
-        Ctx(Ctx), MemObj(MemObj), object(object), ip(0), slide(slide) {}
-    const FuncMCView &getMemoryObject() const { return MemObj; }
-    void setPass(int Pass) { this->Pass = Pass; }
-    int getPass() const { return Pass; }
-    void insertAddress(uint64_t addr);
+    SymbolTable(MCContext &Ctx, const object::ObjectFile *object, int64_t slide, const FuncMCView &MemObj) JL_NOTSAFEPOINT
+        : Ctx(Ctx), MemObj(MemObj), object(object), ip(0), slide(slide) {}
+    ~SymbolTable() JL_NOTSAFEPOINT = default;
+    const FuncMCView &getMemoryObject() const JL_NOTSAFEPOINT { return MemObj; }
+    void setPass(int Pass) JL_NOTSAFEPOINT { this->Pass = Pass; }
+    int getPass() const JL_NOTSAFEPOINT { return Pass; }
+    void insertAddress(uint64_t addr) JL_NOTSAFEPOINT;
     // void createSymbol(const char *name, uint64_t addr);
-    void createSymbols();
-    const char *lookupSymbolName(uint64_t addr);
-    MCSymbol *lookupSymbol(uint64_t addr);
-    StringRef getSymbolNameAt(uint64_t offset) const;
-    const char *lookupLocalPC(size_t addr);
-    void setIP(uint64_t addr);
-    uint64_t getIP() const;
+    void createSymbols() JL_NOTSAFEPOINT;
+    const char *lookupSymbolName(uint64_t addr) JL_NOTSAFEPOINT;
+    MCSymbol *lookupSymbol(uint64_t addr) JL_NOTSAFEPOINT;
+    StringRef getSymbolNameAt(uint64_t offset) const JL_NOTSAFEPOINT;
+    const char *lookupLocalPC(size_t addr) JL_NOTSAFEPOINT;
+    void setIP(uint64_t addr) JL_NOTSAFEPOINT;
+    uint64_t getIP() const JL_NOTSAFEPOINT;
 };
 
 void SymbolTable::setIP(uint64_t addr)
@@ -794,15 +795,21 @@ static const char *SymbolLookup(void *DisInfo, uint64_t ReferenceValue, uint64_t
     return NULL;
 }
 
-static int OpInfoLookup(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t Size,
+static int OpInfoLookup(void *DisInfo, uint64_t PC,
+                        uint64_t Offset,
+#if JL_LLVM_VERSION < 150000
+                        uint64_t Size,
+#else
+                        uint64_t OpSize, uint64_t InstSize,
+#endif
                         int TagType, void *TagBuf)
 {
-    SymbolTable *SymTab = (SymbolTable*)DisInfo;
+    // SymbolTable *SymTab = (SymbolTable*)DisInfo;
     LLVMOpInfo1 *info = (LLVMOpInfo1*)TagBuf;
     memset(info, 0, sizeof(*info));
     if (TagType != 1)
         return 0;               // Unknown data format
-    PC += SymTab->getIP() - (uint64_t)(uintptr_t)SymTab->getMemoryObject().data(); // add offset from MemoryObject base
+    // PC += SymTab->getIP() - (uint64_t)(uintptr_t)SymTab->getMemoryObject().data(); // add offset from MemoryObject base
     // TODO: see if we knew of a relocation applied at PC
     // info->AddSymbol.Present = 1;
     // info->AddSymbol.Name = name;
@@ -876,16 +883,10 @@ static void jl_dump_asm_internal(
       TheTarget->createMCSubtargetInfo(TheTriple.str(), cpu, features));
     assert(STI && "Unable to create subtarget info!");
 
-#if JL_LLVM_VERSION >= 130000
     MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
     std::unique_ptr<MCObjectFileInfo> MOFI(
       TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false, /*LargeCodeModel=*/ false));
     Ctx.setObjectFileInfo(MOFI.get());
-#else
-    std::unique_ptr<MCObjectFileInfo> MOFI(new MCObjectFileInfo());
-    MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr);
-    MOFI->InitMCObjectFileInfo(TheTriple, /* PIC */ false, Ctx);
-#endif
 
     std::unique_ptr<MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI, Ctx));
     if (!DisAsm) {
@@ -1048,10 +1049,14 @@ static void jl_dump_asm_internal(
             MCInst Inst;
             MCDisassembler::DecodeStatus S;
             FuncMCView view = memoryObject.slice(Index);
+#if JL_LLVM_VERSION < 150000
+#define getCommentOS() GetCommentOS()
+#endif
             S = DisAsm->getInstruction(Inst, insSize, view, 0,
-                                      /*CStream*/ pass != 0 ? Streamer->GetCommentOS() : nulls());
-            if (pass != 0 && Streamer->GetCommentOS().tell() > 0)
-                Streamer->GetCommentOS() << '\n';
+                                      /*CStream*/ pass != 0 ? Streamer->getCommentOS () : nulls());
+            if (pass != 0 && Streamer->getCommentOS ().tell() > 0)
+                Streamer->getCommentOS () << '\n';
+#undef GetCommentOS
             switch (S) {
             case MCDisassembler::Fail:
                 if (insSize == 0) // skip illegible bytes
@@ -1163,6 +1168,7 @@ class LineNumberPrinterHandler : public AsmPrinterHandler {
           LinePrinter("; ", true, debuginfo),
           RawStream(Buffer),
           Stream(RawStream) {}
+    ~LineNumberPrinterHandler() JL_NOTSAFEPOINT = default;
 
     void emitAndReset() {
         Stream.flush();
@@ -1200,8 +1206,8 @@ class LineNumberPrinterHandler : public AsmPrinterHandler {
 };
 
 // get a native assembly for llvm::Function
-extern "C" JL_DLLEXPORT
-jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary)
+extern "C" JL_DLLEXPORT_CODEGEN
+jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const char* asm_variant, const char *debuginfo, char binary, char raw)
 {
     // precise printing via IR assembler
     SmallVector<char, 4096> ObjBufferSV;
@@ -1215,12 +1221,15 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char raw_mc, const
                 if (f != &f2 && !f->isDeclaration())
                     f2.deleteBody();
             }
+            // add a nounwind attribute to get rid of cfi instructions
+            if (!raw)
+                f->addFnAttr(Attribute::NoUnwind);
         });
         auto TMBase = jl_ExecutionEngine->cloneTargetMachine();
         LLVMTargetMachine *TM = static_cast<LLVMTargetMachine*>(TMBase.get());
         legacy::PassManager PM;
         addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis());
-        if (raw_mc) {
+        if (emit_mc) {
             raw_svector_ostream obj_OS(ObjBufferSV);
             if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CGFT_ObjectFile, false, nullptr))
                 return jl_an_empty_string;
@@ -1274,7 +1283,7 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char raw_mc, const
     return jl_pchar_to_string(ObjBufferSV.data(), ObjBufferSV.size());
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 LLVMDisasmContextRef jl_LLVMCreateDisasm_impl(
         const char *TripleName, void *DisInfo, int TagType,
         LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp)
@@ -1282,8 +1291,8 @@ LLVMDisasmContextRef jl_LLVMCreateDisasm_impl(
     return LLVMCreateDisasm(TripleName, DisInfo, TagType, GetOpInfo, SymbolLookUp);
 }
 
-extern "C" JL_DLLEXPORT
-JL_DLLEXPORT size_t jl_LLVMDisasmInstruction_impl(
+extern "C" JL_DLLEXPORT_CODEGEN
+size_t jl_LLVMDisasmInstruction_impl(
         LLVMDisasmContextRef DC, uint8_t *Bytes, uint64_t BytesSize,
         uint64_t PC, char *OutString, size_t OutStringSize)
 {
diff --git a/src/dlload.c b/src/dlload.c
index 230a31ed3d695..484c36a228886 100644
--- a/src/dlload.c
+++ b/src/dlload.c
@@ -4,6 +4,9 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
+#ifdef __GLIBC__
+#include <link.h>
+#endif
 
 #include "platform.h"
 #include "julia.h"
@@ -67,10 +70,8 @@ const char *jl_crtdll_name = CRTDLL_BASENAME ".dll";
 
 #define PATHBUF 4096
 
-#define JL_RTLD(flags, FLAG) (flags & JL_RTLD_ ## FLAG ? RTLD_ ## FLAG : 0)
-
 #ifdef _OS_WINDOWS_
-static void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT
+void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT
 {
     DWORD res;
     LPWSTR errmsg;
@@ -97,19 +98,106 @@ static void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOI
 }
 #endif
 
+#if defined(_COMPILER_MSAN_ENABLED_) || defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_)
+struct link_map;
+typedef void* (dlopen_prototype)(const char* filename, int flags);
+
+/* This function is copied from the memory sanitizer runtime.
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+*/
+static inline uintptr_t RoundUpTo(uintptr_t size, uintptr_t boundary) {
+  return (size + boundary - 1) & ~(boundary - 1);
+}
+static inline uintptr_t RoundDownTo(uintptr_t x, uintptr_t boundary) {
+  return x & ~(boundary - 1);
+}
+void ForEachMappedRegion(struct link_map *map, void (*cb)(const volatile void *, uintptr_t)) {
+#if !defined(_OS_FREEBSD_)
+  typedef ElfW(Phdr) Elf_Phdr;
+  typedef ElfW(Ehdr) Elf_Ehdr;
+#endif
+  char *base = (char *)map->l_addr;
+  Elf_Ehdr *ehdr = (Elf_Ehdr *)base;
+  char *phdrs = base + ehdr->e_phoff;
+  char *phdrs_end = phdrs + ehdr->e_phnum * ehdr->e_phentsize;
+
+  // Find the segment with the minimum base so we can "relocate" the p_vaddr
+  // fields.  Typically ET_DYN objects (DSOs) have base of zero and ET_EXEC
+  // objects have a non-zero base.
+  uintptr_t preferred_base = (uintptr_t)-1;
+  for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
+    Elf_Phdr *phdr = (Elf_Phdr *)iter;
+    if (phdr->p_type == PT_LOAD && preferred_base > (uintptr_t)phdr->p_vaddr)
+      preferred_base = (uintptr_t)phdr->p_vaddr;
+  }
+
+  // Compute the delta from the real base to get a relocation delta.
+  intptr_t delta = (uintptr_t)base - preferred_base;
+  // Now we can figure out what the loader really mapped.
+  for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
+    Elf_Phdr *phdr = (Elf_Phdr *)iter;
+    if (phdr->p_type == PT_LOAD) {
+      uintptr_t seg_start = phdr->p_vaddr + delta;
+      uintptr_t seg_end = seg_start + phdr->p_memsz;
+      // None of these values are aligned.  We consider the ragged edges of the
+      // load command as defined, since they are mapped from the file.
+      seg_start = RoundDownTo(seg_start, jl_page_size);
+      seg_end = RoundUpTo(seg_end, jl_page_size);
+      cb((void *)seg_start, seg_end - seg_start);
+    }
+  }
+}
+#endif
+
+#if defined(_OS_WINDOWS_)
 JL_DLLEXPORT void *jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOINT
 {
-#if defined(_OS_WINDOWS_)
     size_t len = MultiByteToWideChar(CP_UTF8, 0, filename, -1, NULL, 0);
     if (!len) return NULL;
     WCHAR *wfilename = (WCHAR*)alloca(len * sizeof(WCHAR));
     if (!MultiByteToWideChar(CP_UTF8, 0, filename, -1, wfilename, len)) return NULL;
-    HANDLE lib = LoadLibraryExW(wfilename, NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
-    if (lib)
-        needsSymRefreshModuleList = 1;
+    HANDLE lib;
+    if (flags & JL_RTLD_NOLOAD) {
+        lib = GetModuleHandleW(wfilename);
+    }
+    else {
+        lib = LoadLibraryExW(wfilename, NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
+        if (lib)
+            needsSymRefreshModuleList = 1;
+    }
     return lib;
+}
 #else
-    return dlopen(filename,
+
+#define JL_RTLD(flags, FLAG) (flags & JL_RTLD_ ## FLAG ? RTLD_ ## FLAG : 0)
+
+JL_DLLEXPORT JL_NO_SANITIZE void *jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOINT
+{
+    /* The sanitizers break RUNPATH use in dlopen for annoying reasons that are
+       are hard to fix. Specifically, libc will use the return address of the
+       caller to determine certain paths and flags that affect .so location lookup.
+       To work around this, we need to avoid using the sanitizer's dlopen interposition,
+       instead using the real dlopen directly from the current shared library.
+       Of course, this does mean that we need to manually perform the work that
+       the sanitizers would otherwise do. */
+#if (defined(_COMPILER_MSAN_ENABLED_) || defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_)) && __GLIBC__
+    static dlopen_prototype *dlopen = NULL;
+    if (!dlopen) {
+        dlopen = (dlopen_prototype*)dlsym(RTLD_NEXT, "dlopen");
+        if (!dlopen)
+            return NULL;
+        void *libdl_handle = dlopen("libdl.so.2", RTLD_NOW | RTLD_NOLOAD);
+        assert(libdl_handle);
+        dlopen = (dlopen_prototype*)dlsym(libdl_handle, "dlopen");
+        dlclose(libdl_handle);
+        assert(dlopen);
+    }
+    // The real interceptors check the validity of the string here, but let's
+    // just skip that for the time being.
+#endif
+    void *hnd = dlopen(filename,
                   (flags & JL_RTLD_NOW ? RTLD_NOW : RTLD_LAZY)
                   | JL_RTLD(flags, LOCAL)
                   | JL_RTLD(flags, GLOBAL)
@@ -126,8 +214,15 @@ JL_DLLEXPORT void *jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOI
                   | JL_RTLD(flags, FIRST)
 #endif
                   );
+#if defined(_COMPILER_MSAN_ENABLED_) && defined(__GLIBC__)
+    struct link_map *map = (struct link_map*)hnd;
+    if (filename && map)
+      ForEachMappedRegion(map, __msan_unpoison);
 #endif
+    return hnd;
 }
+#endif
+
 
 JL_DLLEXPORT int jl_dlclose(void *handle) JL_NOTSAFEPOINT
 {
@@ -145,6 +240,25 @@ JL_DLLEXPORT int jl_dlclose(void *handle) JL_NOTSAFEPOINT
 #endif
 }
 
+void *jl_find_dynamic_library_by_addr(void *symbol) {
+    void *handle;
+#ifdef _OS_WINDOWS_
+    if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+                            (LPCWSTR)symbol,
+                            (HMODULE*)&handle)) {
+        jl_error("could not load base module");
+    }
+#else
+    Dl_info info;
+    if (!dladdr(symbol, &info) || !info.dli_fname) {
+        jl_error("could not load base module");
+    }
+    handle = dlopen(info.dli_fname, RTLD_NOW | RTLD_NOLOAD | RTLD_LOCAL);
+    dlclose(handle); // Undo ref count increment from `dlopen`
+#endif
+    return handle;
+}
+
 JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, int throw_err)
 {
     char path[PATHBUF], relocated[PATHBUF];
@@ -161,29 +275,17 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
     int n_extensions = endswith_extension(modname) ? 1 : N_EXTENSIONS;
     int ret;
 
-    /*
-      this branch returns handle of libjulia-internal
-    */
-    if (modname == NULL) {
-#ifdef _OS_WINDOWS_
-        if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
-                                (LPCWSTR)(uintptr_t)(&jl_load_dynamic_library),
-                                (HMODULE*)&handle)) {
-            jl_error("could not load base module");
-        }
-#else
-        Dl_info info;
-        if (!dladdr((void*)(uintptr_t)&jl_load_dynamic_library, &info) || !info.dli_fname) {
-            jl_error("could not load base module");
-        }
-        handle = dlopen(info.dli_fname, RTLD_NOW);
-#endif
-        goto done;
-    }
+    // modname == NULL is a sentinel value requesting the handle of libjulia-internal
+    if (modname == NULL)
+        return jl_find_dynamic_library_by_addr(&jl_load_dynamic_library);
 
     abspath = jl_isabspath(modname);
     is_atpath = 0;
 
+    JL_TIMING(DL_OPEN, DL_OPEN);
+    if (!(flags & JL_RTLD_NOLOAD))
+        jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, modname);
+
     // Detect if our `modname` is something like `@rpath/libfoo.dylib`
 #ifdef _OS_DARWIN_
     size_t nameLen = strlen(modname);
@@ -206,11 +308,11 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
       such as Windows, so we emulate them here.
     */
     if (!abspath && !is_atpath && jl_base_module != NULL) {
-        jl_binding_t *b = jl_get_module_binding(jl_base_module, jl_symbol("DL_LOAD_PATH"));
+        jl_binding_t *b = jl_get_module_binding(jl_base_module, jl_symbol("DL_LOAD_PATH"), 0);
         jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? jl_atomic_load_relaxed(&b->value) : NULL);
         if (DL_LOAD_PATH != NULL) {
             size_t j;
-            for (j = 0; j < jl_array_len(DL_LOAD_PATH); j++) {
+            for (j = 0; j < jl_array_nrows(DL_LOAD_PATH); j++) {
                 char *dl_path = jl_string_data(jl_array_ptr_data(DL_LOAD_PATH)[j]);
                 size_t len = strlen(dl_path);
                 if (len == 0)
@@ -239,8 +341,10 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
                     if (i == 0) { // LoadLibrary already tested the extensions, we just need to check the `stat` result
 #endif
                         handle = jl_dlopen(path, flags);
+                        if (handle && !(flags & JL_RTLD_NOLOAD))
+                            jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, jl_pathname_for_handle(handle));
                         if (handle)
-                            goto done;
+                            return handle;
 #ifdef _OS_WINDOWS_
                         err = GetLastError();
                     }
@@ -259,11 +363,17 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
         path[0] = '\0';
         snprintf(path, PATHBUF, "%s%s", modname, ext);
         handle = jl_dlopen(path, flags);
+        if (handle && !(flags & JL_RTLD_NOLOAD))
+            jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, jl_pathname_for_handle(handle));
         if (handle)
-            goto done;
+            return handle;
 #ifdef _OS_WINDOWS_
         err = GetLastError();
         break; // LoadLibrary already tested the rest
+#else
+        // bail out and show the error if file actually exists
+        if (jl_stat(path, (char*)&stbuf) == 0)
+            break;
 #endif
     }
 
@@ -279,7 +389,6 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
     }
     handle = NULL;
 
-done:
     return handle;
 }
 
@@ -318,26 +427,29 @@ JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int t
         char err[256];
         win32_formatmessage(GetLastError(), err, sizeof(err));
 #endif
-#ifndef __clang_gcanalyzer__
-        // Hide the error throwing from the analyser since there isn't a way to express
-        // "safepoint only when throwing error" currently.
         jl_errorf("could not load symbol \"%s\":\n%s", symbol, err);
-#endif
     }
     return symbol_found;
 }
 
-#ifdef _OS_WINDOWS_
-//Look for symbols in win32 libraries
-JL_DLLEXPORT const char *jl_dlfind_win32(const char *f_name)
+// Look for symbols in internal libraries
+JL_DLLEXPORT const char *jl_dlfind(const char *f_name)
 {
-    void * dummy;
-    if (jl_dlsym(jl_exe_handle, f_name, &dummy, 0))
+#ifdef _OS_FREEBSD_
+    // This is a workaround for FreeBSD <= 13.2 which do not have
+    // https://cgit.freebsd.org/src/commit/?id=21a52f99440c9bec7679f3b0c5c9d888901c3694
+    // (See https://github.com/JuliaLang/julia/issues/50846)
+    if (strcmp(f_name, "dl_iterate_phdr") == 0)
         return JL_EXE_LIBNAME;
+#endif
+    void * dummy;
     if (jl_dlsym(jl_libjulia_internal_handle, f_name, &dummy, 0))
         return JL_LIBJULIA_INTERNAL_DL_LIBNAME;
     if (jl_dlsym(jl_libjulia_handle, f_name, &dummy, 0))
         return JL_LIBJULIA_DL_LIBNAME;
+    if (jl_dlsym(jl_exe_handle, f_name, &dummy, 0))
+        return JL_EXE_LIBNAME;
+#ifdef _OS_WINDOWS_
     if (jl_dlsym(jl_kernel32_handle, f_name, &dummy, 0))
         return "kernel32";
     if (jl_dlsym(jl_crtdll_handle, f_name, &dummy, 0)) // Prefer crtdll over ntdll
@@ -346,6 +458,7 @@ JL_DLLEXPORT const char *jl_dlfind_win32(const char *f_name)
         return "ntdll";
     if (jl_dlsym(jl_winsock_handle, f_name, &dummy, 0))
         return "ws2_32";
+#endif
     // additional common libraries (libc?) could be added here, but in general,
     // it is better to specify the library explicitly in the code. This exists
     // mainly to ease compatibility with linux, and for libraries that don't
@@ -357,7 +470,6 @@ JL_DLLEXPORT const char *jl_dlfind_win32(const char *f_name)
     // which defaults to jl_libjulia_internal_handle, where we won't find it, and
     // will throw the appropriate error.
 }
-#endif
 
 #ifdef __cplusplus
 }
diff --git a/src/dump.c b/src/dump.c
deleted file mode 100644
index 63c504d5813c7..0000000000000
--- a/src/dump.c
+++ /dev/null
@@ -1,3280 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-/*
-  saving and restoring precompiled modules (.ji files)
-*/
-#include <stdlib.h>
-#include <string.h>
-
-#include "julia.h"
-#include "julia_internal.h"
-#include "julia_gcext.h"
-#include "builtin_proto.h"
-#include "serialize.h"
-
-#ifndef _OS_WINDOWS_
-#include <dlfcn.h>
-#endif
-
-#include "valgrind.h"
-#include "julia_assert.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// This file, together with ircode.c, allows (de)serialization between
-// modules and *.ji cache files. `jl_save_incremental` gets called as the final step
-// during package precompilation, and `_jl_restore_incremental` by `using SomePkg`
-// whenever `SomePkg` has not yet been loaded.
-
-// Types, methods, and method instances form a graph that may have cycles, so
-// serialization has to break these cycles. This is handled via "backreferences,"
-// referring to already (de)serialized items by an index. It is critial to ensure
-// that the indexes of these backreferences align precisely during serialization
-// and deserialization, to ensure that these integer indexes mean the same thing
-// under both circumstances. Consequently, if you are modifying this file, be
-// careful to match the sequence, if necessary reserving space for something that will
-// be updated later.
-
-// It is also necessary to save & restore references to externally-defined objects,
-// e.g., for package methods that call methods defined in Base or elsewhere.
-// Consequently during deserialization there's a distinction between "reference"
-// types, methods, and method instances (essentially like a GlobalRef),
-// and "recached" version that refer to the actual entity in the running session.
-// We complete deserialization before beginning the process of recaching,
-// because we need the backreferences during deserialization and the actual
-// objects during recaching.
-
-// Finally, because our backedge graph is not bidirectional, special handling is
-// required to identify backedges from external methods that call internal methods.
-// These get set aside and restored at the end of deserialization.
-
-// Note that one should prioritize deserialization performance over serialization performance,
-// since deserialization may be performed much more often than serialization.
-
-
-// TODO: put WeakRefs on the weak_refs list during deserialization
-// TODO: handle finalizers
-
-// type => tag hash for a few core types (e.g., Expr, PhiNode, etc)
-static htable_t ser_tag;
-// tag => type mapping, the reverse of ser_tag
-static jl_value_t *deser_tag[256];
-// hash of some common symbols, encoded as CommonSym_tag plus 1 byte
-static htable_t common_symbol_tag;
-static jl_value_t *deser_symbols[256];
-
-// table of all objects that have been deserialized, indexed by pos
-// (the order in the serializer stream). the low
-// bit is reserved for flagging certain entries and pos is
-// left shift by 1
-static htable_t backref_table;
-static int backref_table_numel;
-static arraylist_t backref_list;
-static htable_t new_code_instance_validate;
-
-// list of (jl_value_t **loc, size_t pos) entries
-// for anything that was flagged by the deserializer for later
-// type-rewriting of some sort. pos is the index in backref_list.
-static arraylist_t flagref_list;
-// ref => value hash for looking up the "real" entity from
-// the deserialized ref. Used for entities that must be unique,
-// like types, methods, and method instances
-static htable_t uniquing_table;
-
-// list of (size_t pos, (void *f)(jl_value_t*)) entries
-// for the serializer to mark values in need of rework by function f
-// during deserialization later
-static arraylist_t reinit_list;
-
-// list of stuff that is being serialized
-// This is not quite globally rooted, but we take care to only
-// ever assigned rooted values here.
-static jl_array_t *serializer_worklist JL_GLOBALLY_ROOTED;
-// external MethodInstances we want to serialize
-static htable_t external_mis;
-// Inference tracks newly-inferred MethodInstances during precompilation
-// and registers them by calling jl_set_newly_inferred
-static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED;
-
-// New roots to add to Methods. These can't be added until after
-// recaching is complete, so we have to hold on to them separately
-// Stored as method => (worklist_key, roots)
-static htable_t queued_method_roots;
-
-// inverse of backedges graph (caller=>callees hash)
-htable_t edges_map;
-
-// list of requested ccallable signatures
-static arraylist_t ccallable_list;
-
-typedef struct {
-    ios_t *s;
-    jl_ptls_t ptls;
-    jl_array_t *loaded_modules_array;
-} jl_serializer_state;
-
-static jl_value_t *jl_idtable_type = NULL;
-static jl_typename_t *jl_idtable_typename = NULL;
-static jl_value_t *jl_bigint_type = NULL;
-static int gmp_limb_size = 0;
-
-static void write_uint64(ios_t *s, uint64_t i) JL_NOTSAFEPOINT
-{
-    ios_write(s, (char*)&i, 8);
-}
-
-static void write_float64(ios_t *s, double x) JL_NOTSAFEPOINT
-{
-    write_uint64(s, *((uint64_t*)&x));
-}
-
-void *jl_lookup_ser_tag(jl_value_t *v)
-{
-    return ptrhash_get(&ser_tag, v);
-}
-
-void *jl_lookup_common_symbol(jl_value_t *v)
-{
-    return ptrhash_get(&common_symbol_tag, v);
-}
-
-jl_value_t *jl_deser_tag(uint8_t tag)
-{
-    return deser_tag[tag];
-}
-
-jl_value_t *jl_deser_symbol(uint8_t tag)
-{
-    return deser_symbols[tag];
-}
-
-uint64_t jl_worklist_key(jl_array_t *worklist)
-{
-    assert(jl_is_array(worklist));
-    size_t len = jl_array_len(worklist);
-    if (len > 0) {
-        jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(worklist, len-1);
-        assert(jl_is_module(topmod));
-        return topmod->build_id;
-    }
-    return 0;
-}
-
-// --- serialize ---
-
-#define jl_serialize_value(s, v) jl_serialize_value_((s), (jl_value_t*)(v), 0)
-static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED;
-
-static void jl_serialize_cnull(jl_serializer_state *s, jl_value_t *t)
-{
-    backref_table_numel++;
-    write_uint8(s->s, TAG_CNULL);
-    jl_serialize_value(s, t);
-}
-
-static int module_in_worklist(jl_module_t *mod) JL_NOTSAFEPOINT
-{
-    int i, l = jl_array_len(serializer_worklist);
-    for (i = 0; i < l; i++) {
-        jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(serializer_worklist, i);
-        if (jl_is_module(workmod) && jl_is_submodule(mod, workmod))
-            return 1;
-    }
-    return 0;
-}
-
-static int method_instance_in_queue(jl_method_instance_t *mi)
-{
-    return ptrhash_get(&external_mis, mi) != HT_NOTFOUND;
-}
-
-// compute whether a type references something internal to worklist
-// and thus could not have existed before deserialize
-// and thus does not need delayed unique-ing
-static int type_in_worklist(jl_datatype_t *dt) JL_NOTSAFEPOINT
-{
-    if (module_in_worklist(dt->name->module))
-        return 1;
-    int i, l = jl_svec_len(dt->parameters);
-    for (i = 0; i < l; i++) {
-        jl_value_t *p = jl_unwrap_unionall(jl_tparam(dt, i));
-        // TODO: what about Union and TypeVar??
-        if (type_in_worklist((jl_datatype_t*)(jl_is_datatype(p) ? p : jl_typeof(p))))
-            return 1;
-    }
-    return 0;
-}
-
-static int type_recursively_external(jl_datatype_t *dt);
-
-static int type_parameter_recursively_external(jl_value_t *p0) JL_NOTSAFEPOINT
-{
-    if (!jl_is_concrete_type(p0))
-        return 0;
-    jl_datatype_t *p = (jl_datatype_t*)p0;
-    //while (jl_is_unionall(p)) {
-    //    if (!type_parameter_recursively_external(((jl_unionall_t*)p)->var->lb))
-    //        return 0;
-    //    if (!type_parameter_recursively_external(((jl_unionall_t*)p)->var->ub))
-    //        return 0;
-    //    p = (jl_datatype_t*)((jl_unionall_t*)p)->body;
-    //}
-    if (module_in_worklist(p->name->module))
-        return 0;
-    if (p->name->wrapper != (jl_value_t*)p0) {
-        if (!type_recursively_external(p))
-            return 0;
-    }
-    return 1;
-}
-
-// returns true if all of the parameters are tag 6 or 7
-static int type_recursively_external(jl_datatype_t *dt) JL_NOTSAFEPOINT
-{
-    if (!dt->isconcretetype)
-        return 0;
-    if (jl_svec_len(dt->parameters) == 0)
-        return 1;
-
-    int i, l = jl_svec_len(dt->parameters);
-    for (i = 0; i < l; i++) {
-        if (!type_parameter_recursively_external(jl_tparam(dt, i)))
-            return 0;
-    }
-    return 1;
-}
-
-// When we infer external method instances, ensure they link back to the
-// package. Otherwise they might be, e.g., for external macros
-static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited)
-{
-    void **bp = ptrhash_bp(visited, mi);
-    // HT_NOTFOUND: not yet analyzed
-    // HT_NOTFOUND + 1: doesn't link back
-    // HT_NOTFOUND + 2: does link back
-    if (*bp != HT_NOTFOUND)
-        return (char*)*bp - (char*)HT_NOTFOUND - 1;
-    *bp = (void*)((char*)HT_NOTFOUND + 1);  // preliminarily mark as "not found"
-    jl_module_t *mod = mi->def.module;
-    if (jl_is_method(mod))
-        mod = ((jl_method_t*)mod)->module;
-    assert(jl_is_module(mod));
-    if (mi->precompiled || module_in_worklist(mod)) {
-        *bp = (void*)((char*)HT_NOTFOUND + 2);      // found
-        return 1;
-    }
-    if (!mi->backedges) {
-        return 0;
-    }
-    size_t i, n = jl_array_len(mi->backedges);
-    for (i = 0; i < n; i++) {
-        jl_method_instance_t *be = (jl_method_instance_t*)jl_array_ptr_ref(mi->backedges, i);
-        if (has_backedge_to_worklist(be, visited)) {
-            bp = ptrhash_bp(visited, mi);           // re-acquire since rehashing might change the location
-            *bp = (void*)((char*)HT_NOTFOUND + 2);  // found
-            return 1;
-        }
-    }
-    return 0;
-}
-
-// given the list of MethodInstances that were inferred during the
-// build, select those that are external and have at least one
-// relocatable CodeInstance.
-static size_t queue_external_mis(jl_array_t *list)
-{
-    size_t i, n = 0;
-    htable_t visited;
-    if (list) {
-        assert(jl_is_array(list));
-        size_t n0 = jl_array_len(list);
-        htable_new(&visited, n0);
-        for (i = 0; i < n0; i++) {
-            jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
-            assert(jl_is_method_instance(mi));
-            if (jl_is_method(mi->def.value)) {
-                jl_method_t *m = mi->def.method;
-                if (!module_in_worklist(m->module)) {
-                    jl_code_instance_t *ci = mi->cache;
-                    int relocatable = 0;
-                    while (ci) {
-                        relocatable |= ci->relocatability;
-                        ci = ci->next;
-                    }
-                    if (relocatable && ptrhash_get(&external_mis, mi) == HT_NOTFOUND) {
-                        if (has_backedge_to_worklist(mi, &visited)) {
-                            ptrhash_put(&external_mis, mi, mi);
-                            n++;
-                        }
-                    }
-                }
-            }
-        }
-        htable_free(&visited);
-    }
-    return n;
-}
-
-static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_GC_DISABLED
-{
-    int tag = 0;
-    int internal = module_in_worklist(dt->name->module);
-    if (!internal && jl_unwrap_unionall(dt->name->wrapper) == (jl_value_t*)dt) {
-        tag = 6; // external primary type
-    }
-    else if (jl_is_tuple_type(dt) ? !dt->isconcretetype : dt->hasfreetypevars) {
-        tag = 0; // normal struct
-    }
-    else if (internal) {
-        if (jl_unwrap_unionall(dt->name->wrapper) == (jl_value_t*)dt) // comes up often since functions create types
-            tag = 5; // internal, and not in the typename cache
-        else
-            tag = 10; // anything else that's internal (just may need recaching)
-    }
-    else if (type_recursively_external(dt)) {
-        tag = 7; // external type that can be immediately recreated (with apply_type)
-    }
-    else if (type_in_worklist(dt)) {
-        tag = 11; // external, but definitely new (still needs caching, but not full unique-ing)
-    }
-    else {
-        // this is eligible for (and possibly requires) unique-ing later,
-        // so flag this in the backref table as special
-        uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, dt);
-        assert(*bp != (uintptr_t)HT_NOTFOUND);
-        *bp |= 1;
-        tag = 12;
-    }
-
-    char *dtname = jl_symbol_name(dt->name->name);
-    size_t dtnl = strlen(dtname);
-    if (dtnl > 4 && strcmp(&dtname[dtnl - 4], "##kw") == 0 && !internal && tag != 0) {
-        /* XXX: yuck, this is horrible, but the auto-generated kw types from the serializer isn't a real type, so we *must* be very careful */
-        assert(tag == 6); // other struct types should never exist
-        tag = 9;
-        if (jl_type_type_mt->kwsorter != NULL && dt == (jl_datatype_t*)jl_typeof(jl_type_type_mt->kwsorter)) {
-            dt = jl_datatype_type; // any representative member with this MethodTable
-        }
-        else if (jl_nonfunction_mt->kwsorter != NULL && dt == (jl_datatype_t*)jl_typeof(jl_nonfunction_mt->kwsorter)) {
-            dt = jl_symbol_type; // any representative member with this MethodTable
-        }
-        else {
-            // search for the representative member of this MethodTable
-            jl_methtable_t *mt = dt->name->mt;
-            size_t l = strlen(jl_symbol_name(mt->name));
-            char *prefixed;
-            prefixed = (char*)malloc_s(l + 2);
-            prefixed[0] = '#';
-            strcpy(&prefixed[1], jl_symbol_name(mt->name));
-            // remove ##kw suffix
-            prefixed[l-3] = 0;
-            jl_sym_t *tname = jl_symbol(prefixed);
-            free(prefixed);
-            jl_value_t *primarydt = jl_get_global(mt->module, tname);
-            if (!primarydt)
-                primarydt = jl_get_global(mt->module, mt->name);
-            primarydt = jl_unwrap_unionall(primarydt);
-            assert(jl_is_datatype(primarydt));
-            assert(primarydt == (jl_value_t*)jl_any_type || jl_typeof(((jl_datatype_t*)primarydt)->name->mt->kwsorter) == (jl_value_t*)dt);
-            dt = (jl_datatype_t*)primarydt;
-        }
-    }
-
-    write_uint8(s->s, TAG_DATATYPE);
-    write_uint8(s->s, tag);
-    if (tag == 6 || tag == 7) {
-        // for tag==6, copy its typevars in case there are references to them elsewhere
-        jl_serialize_value(s, dt->name);
-        jl_serialize_value(s, dt->parameters);
-        return;
-    }
-    if (tag == 9) {
-        jl_serialize_value(s, dt);
-        return;
-    }
-
-    write_int32(s->s, dt->size);
-    int has_instance = (dt->instance != NULL);
-    int has_layout = (dt->layout != NULL);
-    write_uint8(s->s, has_layout | (has_instance << 1));
-    write_uint8(s->s, dt->hasfreetypevars
-            | (dt->isconcretetype << 1)
-            | (dt->isdispatchtuple << 2)
-            | (dt->isbitstype << 3)
-            | (dt->zeroinit << 4)
-            | (dt->has_concrete_subtype << 5)
-            | (dt->cached_by_hash << 6));
-    write_int32(s->s, dt->hash);
-
-    if (has_layout) {
-        uint8_t layout = 0;
-        if (dt->layout == ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->layout) {
-            layout = 1;
-        }
-        else if (dt->layout == jl_nothing_type->layout) {
-            layout = 2;
-        }
-        else if (dt->layout == ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_pointer_type))->layout) {
-            layout = 3;
-        }
-        write_uint8(s->s, layout);
-        if (layout == 0) {
-            uint32_t nf = dt->layout->nfields;
-            uint32_t np = dt->layout->npointers;
-            size_t fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type);
-            ios_write(s->s, (const char*)dt->layout, sizeof(*dt->layout));
-            size_t fldsize = nf * fieldsize;
-            if (dt->layout->first_ptr != -1)
-                fldsize += np << dt->layout->fielddesc_type;
-            ios_write(s->s, (const char*)(dt->layout + 1), fldsize);
-        }
-    }
-
-    if (has_instance)
-        jl_serialize_value(s, dt->instance);
-    jl_serialize_value(s, dt->name);
-    jl_serialize_value(s, dt->parameters);
-    jl_serialize_value(s, dt->super);
-    jl_serialize_value(s, dt->types);
-}
-
-static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m)
-{
-    write_uint8(s->s, TAG_MODULE);
-    jl_serialize_value(s, m->name);
-    size_t i;
-    if (!module_in_worklist(m)) {
-        if (m == m->parent) {
-            // top-level module
-            write_int8(s->s, 2);
-            int j = 0;
-            for (i = 0; i < jl_array_len(s->loaded_modules_array); i++) {
-                jl_module_t *mi = (jl_module_t*)jl_array_ptr_ref(s->loaded_modules_array, i);
-                if (!module_in_worklist(mi)) {
-                    if (m == mi) {
-                        write_int32(s->s, j);
-                        return;
-                    }
-                    j++;
-                }
-            }
-            assert(0 && "top level module not found in modules array");
-        }
-        else {
-            write_int8(s->s, 1);
-            jl_serialize_value(s, m->parent);
-        }
-        return;
-    }
-    write_int8(s->s, 0);
-    jl_serialize_value(s, m->parent);
-    void **table = m->bindings.table;
-    for (i = 0; i < m->bindings.size; i += 2) {
-        if (table[i+1] != HT_NOTFOUND) {
-            jl_serialize_value(s, (jl_value_t*)table[i]);
-            jl_binding_t *b = (jl_binding_t*)table[i+1];
-            jl_serialize_value(s, b->name);
-            jl_value_t *e = jl_atomic_load_relaxed(&b->value);
-            if (!b->constp && e && jl_is_cpointer(e) && jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL)
-                // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
-                jl_serialize_cnull(s, jl_typeof(e));
-            else
-                jl_serialize_value(s, e);
-            jl_serialize_value(s, jl_atomic_load_relaxed(&b->globalref));
-            jl_serialize_value(s, b->owner);
-            jl_serialize_value(s, jl_atomic_load_relaxed(&b->ty));
-            write_int8(s->s, (b->deprecated<<3) | (b->constp<<2) | (b->exportp<<1) | (b->imported));
-        }
-    }
-    jl_serialize_value(s, NULL);
-    write_int32(s->s, m->usings.len);
-    for(i=0; i < m->usings.len; i++) {
-        jl_serialize_value(s, (jl_value_t*)m->usings.items[i]);
-    }
-    write_uint8(s->s, m->istopmod);
-    write_uint64(s->s, m->uuid.hi);
-    write_uint64(s->s, m->uuid.lo);
-    write_uint64(s->s, m->build_id);
-    write_int32(s->s, m->counter);
-    write_int32(s->s, m->nospecialize);
-    write_uint8(s->s, m->optlevel);
-    write_uint8(s->s, m->compile);
-    write_uint8(s->s, m->infer);
-    write_uint8(s->s, m->max_methods);
-}
-
-static int jl_serialize_generic(jl_serializer_state *s, jl_value_t *v) JL_GC_DISABLED
-{
-    if (v == NULL) {
-        write_uint8(s->s, TAG_NULL);
-        return 1;
-    }
-
-    void *tag = ptrhash_get(&ser_tag, v);
-    if (tag != HT_NOTFOUND) {
-        uint8_t t8 = (intptr_t)tag;
-        if (t8 <= LAST_TAG)
-            write_uint8(s->s, 0);
-        write_uint8(s->s, t8);
-        return 1;
-    }
-
-    if (jl_is_symbol(v)) {
-        void *idx = ptrhash_get(&common_symbol_tag, v);
-        if (idx != HT_NOTFOUND) {
-            write_uint8(s->s, TAG_COMMONSYM);
-            write_uint8(s->s, (uint8_t)(size_t)idx);
-            return 1;
-        }
-    }
-    else if (v == (jl_value_t*)jl_core_module) {
-        write_uint8(s->s, TAG_CORE);
-        return 1;
-    }
-    else if (v == (jl_value_t*)jl_base_module) {
-        write_uint8(s->s, TAG_BASE);
-        return 1;
-    }
-
-    if (jl_typeis(v, jl_string_type) && jl_string_len(v) == 0) {
-        jl_serialize_value(s, jl_an_empty_string);
-        return 1;
-    }
-    else if (!jl_is_uint8(v)) {
-        void **bp = ptrhash_bp(&backref_table, v);
-        if (*bp != HT_NOTFOUND) {
-            uintptr_t pos = (char*)*bp - (char*)HT_NOTFOUND - 1;
-            if (pos < 65536) {
-                write_uint8(s->s, TAG_SHORT_BACKREF);
-                write_uint16(s->s, pos);
-            }
-            else {
-                write_uint8(s->s, TAG_BACKREF);
-                write_int32(s->s, pos);
-            }
-            return 1;
-        }
-        intptr_t pos = backref_table_numel++;
-        if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) {
-            // will need to rehash this, later (after types are fully constructed)
-            arraylist_push(&reinit_list, (void*)pos);
-            arraylist_push(&reinit_list, (void*)1);
-        }
-        if (jl_is_module(v)) {
-            jl_module_t *m = (jl_module_t*)v;
-            if (module_in_worklist(m) && !module_in_worklist(m->parent)) {
-                // will need to reinsert this into parent bindings, later (in case of any errors during reinsert)
-                arraylist_push(&reinit_list, (void*)pos);
-                arraylist_push(&reinit_list, (void*)2);
-            }
-        }
-        // TypeMapLevels need to be rehashed
-        if (jl_is_mtable(v)) {
-            arraylist_push(&reinit_list, (void*)pos);
-            arraylist_push(&reinit_list, (void*)3);
-        }
-        pos <<= 1;
-        ptrhash_put(&backref_table, v, (char*)HT_NOTFOUND + pos + 1);
-    }
-
-    return 0;
-}
-
-static void jl_serialize_code_instance(jl_serializer_state *s, jl_code_instance_t *codeinst, int skip_partial_opaque, int internal) JL_GC_DISABLED
-{
-    if (internal > 2) {
-        while (codeinst && !codeinst->relocatability)
-            codeinst = codeinst->next;
-    }
-    if (jl_serialize_generic(s, (jl_value_t*)codeinst)) {
-        return;
-    }
-    assert(codeinst != NULL); // handle by jl_serialize_generic, but this makes clang-sa happy
-
-    int validate = 0;
-    if (codeinst->max_world == ~(size_t)0)
-        validate = 1; // can check on deserialize if this cache entry is still valid
-    int flags = validate << 0;
-    if (codeinst->invoke == jl_fptr_const_return)
-        flags |= 1 << 2;
-    if (codeinst->precompile)
-        flags |= 1 << 3;
-
-    // CodeInstances with PartialOpaque return type are currently not allowed
-    // to be cached. We skip them in serialization here, forcing them to
-    // be re-infered on reload.
-    int write_ret_type = validate || codeinst->min_world == 0;
-    if (write_ret_type && codeinst->rettype_const &&
-            jl_typeis(codeinst->rettype_const, jl_partial_opaque_type)) {
-        if (skip_partial_opaque) {
-            jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque, internal);
-            return;
-        }
-        else {
-            jl_error("Cannot serialize CodeInstance with PartialOpaque rettype");
-        }
-    }
-
-    write_uint8(s->s, TAG_CODE_INSTANCE);
-    write_uint8(s->s, flags);
-    write_uint32(s->s, codeinst->ipo_purity_bits);
-    write_uint32(s->s, codeinst->purity_bits);
-    jl_serialize_value(s, (jl_value_t*)codeinst->def);
-    if (write_ret_type) {
-        jl_serialize_value(s, codeinst->inferred);
-        jl_serialize_value(s, codeinst->rettype_const);
-        jl_serialize_value(s, codeinst->rettype);
-        jl_serialize_value(s, codeinst->argescapes);
-    }
-    else {
-        // skip storing useless data
-        jl_serialize_value(s, NULL);
-        jl_serialize_value(s, NULL);
-        jl_serialize_value(s, jl_any_type);
-        jl_serialize_value(s, jl_nothing);
-    }
-    write_uint8(s->s, codeinst->relocatability);
-    jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque, internal);
-}
-
-enum METHOD_SERIALIZATION_MODE {
-    METHOD_INTERNAL = 1,
-    METHOD_EXTERNAL_MT = 2,
-    METHOD_HAS_NEW_ROOTS = 4,
-};
-
-static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED
-{
-    if (jl_serialize_generic(s, v)) {
-        return;
-    }
-
-    size_t i;
-    if (jl_is_svec(v)) {
-        size_t l = jl_svec_len(v);
-        if (l <= 255) {
-            write_uint8(s->s, TAG_SVEC);
-            write_uint8(s->s, (uint8_t)l);
-        }
-        else {
-            write_uint8(s->s, TAG_LONG_SVEC);
-            write_int32(s->s, l);
-        }
-        for (i = 0; i < l; i++) {
-            jl_serialize_value(s, jl_svecref(v, i));
-        }
-    }
-    else if (jl_is_symbol(v)) {
-        size_t l = strlen(jl_symbol_name((jl_sym_t*)v));
-        if (l <= 255) {
-            write_uint8(s->s, TAG_SYMBOL);
-            write_uint8(s->s, (uint8_t)l);
-        }
-        else {
-            write_uint8(s->s, TAG_LONG_SYMBOL);
-            write_int32(s->s, l);
-        }
-        ios_write(s->s, jl_symbol_name((jl_sym_t*)v), l);
-    }
-    else if (jl_is_array(v)) {
-        jl_array_t *ar = (jl_array_t*)v;
-        jl_value_t *et = jl_tparam0(jl_typeof(ar));
-        int isunion = jl_is_uniontype(et);
-        if (ar->flags.ndims == 1 && ar->elsize <= 0x1f) {
-            write_uint8(s->s, TAG_ARRAY1D);
-            write_uint8(s->s, (ar->flags.ptrarray << 7) | (ar->flags.hasptr << 6) | (isunion << 5) | (ar->elsize & 0x1f));
-        }
-        else {
-            write_uint8(s->s, TAG_ARRAY);
-            write_uint16(s->s, ar->flags.ndims);
-            write_uint16(s->s, (ar->flags.ptrarray << 15) | (ar->flags.hasptr << 14) | (isunion << 13) | (ar->elsize & 0x1fff));
-        }
-        for (i = 0; i < ar->flags.ndims; i++)
-            jl_serialize_value(s, jl_box_long(jl_array_dim(ar,i)));
-        jl_serialize_value(s, jl_typeof(ar));
-        size_t l = jl_array_len(ar);
-        if (ar->flags.ptrarray) {
-            for (i = 0; i < l; i++) {
-                jl_value_t *e = jl_array_ptr_ref(v, i);
-                if (e && jl_is_cpointer(e) && jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL)
-                    // reset Ptr elements to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
-                    jl_serialize_cnull(s, jl_typeof(e));
-                else
-                    jl_serialize_value(s, e);
-            }
-        }
-        else if (ar->flags.hasptr) {
-            const char *data = (const char*)jl_array_data(ar);
-            uint16_t elsz = ar->elsize;
-            size_t j, np = ((jl_datatype_t*)et)->layout->npointers;
-            for (i = 0; i < l; i++) {
-                const char *start = data;
-                for (j = 0; j < np; j++) {
-                    uint32_t ptr = jl_ptr_offset((jl_datatype_t*)et, j);
-                    const jl_value_t *const *fld = &((const jl_value_t *const *)data)[ptr];
-                    if ((const char*)fld != start)
-                        ios_write(s->s, start, (const char*)fld - start);
-                    JL_GC_PROMISE_ROOTED(*fld);
-                    jl_serialize_value(s, *fld);
-                    start = (const char*)&fld[1];
-                }
-                data += elsz;
-                if (data != start)
-                    ios_write(s->s, start, data - start);
-            }
-        }
-        else if (jl_is_cpointer_type(et)) {
-            // reset Ptr elements to C_NULL
-            const void **data = (const void**)jl_array_data(ar);
-            for (i = 0; i < l; i++) {
-                const void *e = data[i];
-                if (e != (void*)-1)
-                    e = NULL;
-                ios_write(s->s, (const char*)&e, sizeof(e));
-            }
-        }
-        else {
-            ios_write(s->s, (char*)jl_array_data(ar), l * ar->elsize);
-            if (jl_array_isbitsunion(ar))
-                ios_write(s->s, jl_array_typetagdata(ar), l);
-        }
-    }
-    else if (jl_is_datatype(v)) {
-        jl_serialize_datatype(s, (jl_datatype_t*)v);
-    }
-    else if (jl_is_unionall(v)) {
-        write_uint8(s->s, TAG_UNIONALL);
-        jl_datatype_t *d = (jl_datatype_t*)jl_unwrap_unionall(v);
-        if (jl_is_datatype(d) && d->name->wrapper == v &&
-            !module_in_worklist(d->name->module)) {
-            write_uint8(s->s, 1);
-            jl_serialize_value(s, d->name->module);
-            jl_serialize_value(s, d->name->name);
-        }
-        else {
-            write_uint8(s->s, 0);
-            jl_serialize_value(s, ((jl_unionall_t*)v)->var);
-            jl_serialize_value(s, ((jl_unionall_t*)v)->body);
-        }
-    }
-    else if (jl_is_typevar(v)) {
-        write_uint8(s->s, TAG_TVAR);
-        jl_serialize_value(s, ((jl_tvar_t*)v)->name);
-        jl_serialize_value(s, ((jl_tvar_t*)v)->lb);
-        jl_serialize_value(s, ((jl_tvar_t*)v)->ub);
-    }
-    else if (jl_is_method(v)) {
-        write_uint8(s->s, TAG_METHOD);
-        jl_method_t *m = (jl_method_t*)v;
-        uint64_t key = 0;
-        int serialization_mode = 0, nwithkey = 0;
-        if (m->is_for_opaque_closure || module_in_worklist(m->module))
-            serialization_mode |= METHOD_INTERNAL;
-        if (!(serialization_mode & METHOD_INTERNAL)) {
-            key = jl_worklist_key(serializer_worklist);
-            nwithkey = nroots_with_key(m, key);
-            if (nwithkey > 0)
-                serialization_mode |= METHOD_HAS_NEW_ROOTS;
-        }
-        if (!(serialization_mode & METHOD_INTERNAL)) {
-            // flag this in the backref table as special
-            uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, v);
-            assert(*bp != (uintptr_t)HT_NOTFOUND);
-            *bp |= 1;
-        }
-        jl_serialize_value(s, (jl_value_t*)m->sig);
-        jl_serialize_value(s, (jl_value_t*)m->module);
-        if (m->external_mt != NULL) {
-            assert(jl_typeis(m->external_mt, jl_methtable_type));
-            jl_methtable_t *mt = (jl_methtable_t*)m->external_mt;
-            if (!module_in_worklist(mt->module)) {
-                serialization_mode |= METHOD_EXTERNAL_MT;
-            }
-        }
-        write_uint8(s->s, serialization_mode);
-        if (serialization_mode & METHOD_EXTERNAL_MT) {
-            // We reference this method table by module and binding
-            jl_methtable_t *mt = (jl_methtable_t*)m->external_mt;
-            jl_serialize_value(s, mt->module);
-            jl_serialize_value(s, mt->name);
-        }
-        else {
-            jl_serialize_value(s, (jl_value_t*)m->external_mt);
-        }
-        if (!(serialization_mode & METHOD_INTERNAL)) {
-            if (serialization_mode & METHOD_HAS_NEW_ROOTS) {
-                // Serialize the roots that belong to key
-                write_uint64(s->s, key);
-                write_int32(s->s, nwithkey);
-                rle_iter_state rootiter = rle_iter_init(0);
-                uint64_t *rletable = NULL;
-                size_t nblocks2 = 0, nroots = jl_array_len(m->roots);
-                if (m->root_blocks) {
-                    rletable = (uint64_t*)jl_array_data(m->root_blocks);
-                    nblocks2 = jl_array_len(m->root_blocks);
-                }
-                // this visits every item, if it becomes a bottlneck we could hop blocks
-                while (rle_iter_increment(&rootiter, nroots, rletable, nblocks2))
-                    if (rootiter.key == key)
-                        jl_serialize_value(s, jl_array_ptr_ref(m->roots, rootiter.i));
-            }
-            return;
-        }
-        jl_serialize_value(s, m->specializations);
-        jl_serialize_value(s, jl_atomic_load_relaxed(&m->speckeyset));
-        jl_serialize_value(s, (jl_value_t*)m->name);
-        jl_serialize_value(s, (jl_value_t*)m->file);
-        write_int32(s->s, m->line);
-        write_int32(s->s, m->called);
-        write_int32(s->s, m->nargs);
-        write_int32(s->s, m->nospecialize);
-        write_int32(s->s, m->nkw);
-        write_int8(s->s, m->isva);
-        write_int8(s->s, m->pure);
-        write_int8(s->s, m->is_for_opaque_closure);
-        write_int8(s->s, m->constprop);
-        write_uint8(s->s, m->purity.bits);
-        jl_serialize_value(s, (jl_value_t*)m->slot_syms);
-        jl_serialize_value(s, (jl_value_t*)m->roots);
-        jl_serialize_value(s, (jl_value_t*)m->root_blocks);
-        write_int32(s->s, m->nroots_sysimg);
-        jl_serialize_value(s, (jl_value_t*)m->ccallable);
-        jl_serialize_value(s, (jl_value_t*)m->source);
-        jl_serialize_value(s, (jl_value_t*)m->unspecialized);
-        jl_serialize_value(s, (jl_value_t*)m->generator);
-        jl_serialize_value(s, (jl_value_t*)m->invokes);
-        jl_serialize_value(s, (jl_value_t*)m->recursion_relation);
-    }
-    else if (jl_is_method_instance(v)) {
-        jl_method_instance_t *mi = (jl_method_instance_t*)v;
-        if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) {
-            jl_error("unimplemented: serialization of MethodInstances for OpaqueClosure");
-        }
-        write_uint8(s->s, TAG_METHOD_INSTANCE);
-        int internal = 0;
-        if (!jl_is_method(mi->def.method))
-            internal = 1;
-        else if (module_in_worklist(mi->def.method->module))
-            internal = 2;
-        else if (ptrhash_get(&external_mis, (void*)mi) != HT_NOTFOUND)
-            internal = 3;
-        write_uint8(s->s, internal);
-        if (!internal) {
-            // also flag this in the backref table as special
-            uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, v);
-            assert(*bp != (uintptr_t)HT_NOTFOUND);
-            *bp |= 1;
-        }
-        if (internal == 1)
-            jl_serialize_value(s, (jl_value_t*)mi->uninferred);
-        jl_serialize_value(s, (jl_value_t*)mi->specTypes);
-        jl_serialize_value(s, mi->def.value);
-        if (!internal)
-            return;
-        jl_serialize_value(s, (jl_value_t*)mi->sparam_vals);
-        jl_array_t *backedges = mi->backedges;
-        if (backedges) {
-            // filter backedges to only contain pointers
-            // to items that we will actually store (internal >= 2)
-            size_t ins, i, l = jl_array_len(backedges);
-            jl_method_instance_t **b_edges = (jl_method_instance_t**)jl_array_data(backedges);
-            for (ins = i = 0; i < l; i++) {
-                jl_method_instance_t *backedge = b_edges[i];
-                if (module_in_worklist(backedge->def.method->module) || method_instance_in_queue(backedge)) {
-                    b_edges[ins++] = backedge;
-                }
-            }
-            if (ins != l)
-                jl_array_del_end(backedges, l - ins);
-            if (ins == 0)
-                backedges = NULL;
-        }
-        jl_serialize_value(s, (jl_value_t*)backedges);
-        jl_serialize_value(s, (jl_value_t*)NULL); //callbacks
-        jl_serialize_code_instance(s, mi->cache, 1, internal);
-    }
-    else if (jl_is_code_instance(v)) {
-        jl_serialize_code_instance(s, (jl_code_instance_t*)v, 0, 2);
-    }
-    else if (jl_typeis(v, jl_module_type)) {
-        jl_serialize_module(s, (jl_module_t*)v);
-    }
-    else if (jl_typeis(v, jl_task_type)) {
-        jl_error("Task cannot be serialized");
-    }
-    else if (jl_typeis(v, jl_opaque_closure_type)) {
-        jl_error("Live opaque closures cannot be serialized");
-    }
-    else if (jl_typeis(v, jl_string_type)) {
-        write_uint8(s->s, TAG_STRING);
-        write_int32(s->s, jl_string_len(v));
-        ios_write(s->s, jl_string_data(v), jl_string_len(v));
-    }
-    else if (jl_typeis(v, jl_int64_type)) {
-        void *data = jl_data_ptr(v);
-        if (*(int64_t*)data >= INT16_MIN && *(int64_t*)data <= INT16_MAX) {
-            write_uint8(s->s, TAG_SHORTER_INT64);
-            write_uint16(s->s, (uint16_t)*(int64_t*)data);
-        }
-        else if (*(int64_t*)data >= S32_MIN && *(int64_t*)data <= S32_MAX) {
-            write_uint8(s->s, TAG_SHORT_INT64);
-            write_int32(s->s, (int32_t)*(int64_t*)data);
-        }
-        else {
-            write_uint8(s->s, TAG_INT64);
-            write_int64(s->s, *(int64_t*)data);
-        }
-    }
-    else if (jl_typeis(v, jl_int32_type)) {
-        void *data = jl_data_ptr(v);
-        if (*(int32_t*)data >= INT16_MIN && *(int32_t*)data <= INT16_MAX) {
-            write_uint8(s->s, TAG_SHORT_INT32);
-            write_uint16(s->s, (uint16_t)*(int32_t*)data);
-        }
-        else {
-            write_uint8(s->s, TAG_INT32);
-            write_int32(s->s, *(int32_t*)data);
-        }
-    }
-    else if (jl_typeis(v, jl_uint8_type)) {
-        write_uint8(s->s, TAG_UINT8);
-        write_int8(s->s, *(int8_t*)jl_data_ptr(v));
-    }
-    else if (jl_is_cpointer(v) && jl_unbox_voidpointer(v) == NULL) {
-        write_uint8(s->s, TAG_CNULL);
-        jl_serialize_value(s, jl_typeof(v));
-        return;
-    }
-    else if (jl_bigint_type && jl_typeis(v, jl_bigint_type)) {
-        write_uint8(s->s, TAG_SHORT_GENERAL);
-        write_uint8(s->s, jl_datatype_size(jl_bigint_type));
-        jl_serialize_value(s, jl_bigint_type);
-        jl_value_t *sizefield = jl_get_nth_field(v, 1);
-        jl_serialize_value(s, sizefield);
-        void *data = jl_unbox_voidpointer(jl_get_nth_field(v, 2));
-        int32_t sz = jl_unbox_int32(sizefield);
-        size_t nb = (sz == 0 ? 1 : (sz < 0 ? -sz : sz)) * gmp_limb_size;
-        ios_write(s->s, (char*)data, nb);
-    }
-    else {
-        jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
-        if (v == t->instance) {
-            if (!type_in_worklist(t)) {
-                // also flag this in the backref table as special
-                // if it might not be unique (is external)
-                uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, v);
-                assert(*bp != (uintptr_t)HT_NOTFOUND);
-                *bp |= 1;
-            }
-            write_uint8(s->s, TAG_SINGLETON);
-            jl_serialize_value(s, t);
-            return;
-        }
-        assert(!t->instance && "detected singleton construction corruption");
-
-        if (t == jl_typename_type) {
-            void *bttag = ptrhash_get(&ser_tag, ((jl_typename_t*)t)->wrapper);
-            if (bttag != HT_NOTFOUND) {
-                write_uint8(s->s, TAG_BITYPENAME);
-                write_uint8(s->s, (uint8_t)(intptr_t)bttag);
-                return;
-            }
-        }
-        if (t->size <= 255) {
-            write_uint8(s->s, TAG_SHORT_GENERAL);
-            write_uint8(s->s, t->size);
-        }
-        else {
-            write_uint8(s->s, TAG_GENERAL);
-            write_int32(s->s, t->size);
-        }
-        jl_serialize_value(s, t);
-        if (t == jl_typename_type) {
-            jl_typename_t *tn = (jl_typename_t*)v;
-            int internal = module_in_worklist(tn->module);
-            write_uint8(s->s, internal);
-            jl_serialize_value(s, tn->module);
-            jl_serialize_value(s, tn->name);
-            if (internal) {
-                jl_serialize_value(s, tn->names);
-                jl_serialize_value(s, tn->wrapper);
-                jl_serialize_value(s, tn->mt);
-                ios_write(s->s, (char*)&tn->hash, sizeof(tn->hash));
-                write_uint8(s->s, tn->abstract | (tn->mutabl << 1) | (tn->mayinlinealloc << 2));
-                write_uint8(s->s, tn->max_methods);
-                if (!tn->abstract)
-                    write_uint16(s->s, tn->n_uninitialized);
-                size_t nb = tn->atomicfields ? (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t) : 0;
-                write_int32(s->s, nb);
-                if (nb)
-                    ios_write(s->s, (char*)tn->atomicfields, nb);
-                nb = tn->constfields ? (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t) : 0;
-                write_int32(s->s, nb);
-                if (nb)
-                    ios_write(s->s, (char*)tn->constfields, nb);
-            }
-            return;
-        }
-
-        if (jl_is_foreign_type(t)) {
-            jl_error("Cannot serialize instances of foreign datatypes");
-        }
-
-        char *data = (char*)jl_data_ptr(v);
-        size_t i, j, np = t->layout->npointers;
-        uint32_t nf = t->layout->nfields;
-        char *last = data;
-        for (i = 0, j = 0; i < nf+1; i++) {
-            char *ptr = data + (i < nf ? jl_field_offset(t, i) : jl_datatype_size(t));
-            if (j < np) {
-                char *prevptr = (char*)&((jl_value_t**)data)[jl_ptr_offset(t, j)];
-                while (ptr > prevptr) {
-                    // previous field contained pointers; write them and their interleaved data
-                    if (prevptr > last)
-                        ios_write(s->s, last, prevptr - last);
-                    jl_value_t *e = *(jl_value_t**)prevptr;
-                    JL_GC_PROMISE_ROOTED(e);
-                    if (t->name->mutabl && e && jl_field_isptr(t, i - 1) && jl_is_cpointer(e) &&
-                        jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL)
-                        // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
-                        jl_serialize_cnull(s, jl_typeof(e));
-                    else
-                        jl_serialize_value(s, e);
-                    last = prevptr + sizeof(jl_value_t*);
-                    j++;
-                    if (j < np)
-                        prevptr = (char*)&((jl_value_t**)data)[jl_ptr_offset(t, j)];
-                    else
-                        break;
-                }
-            }
-            if (i == nf)
-                break;
-            if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i)) && *(void**)ptr != (void*)-1) {
-                if (ptr > last)
-                    ios_write(s->s, last, ptr - last);
-                char *n = NULL;
-                ios_write(s->s, (char*)&n, sizeof(n));
-                last = ptr + sizeof(n);
-            }
-        }
-        char *ptr = data + jl_datatype_size(t);
-        if (ptr > last)
-            ios_write(s->s, last, ptr - last);
-    }
-}
-
-// Used to serialize the external method instances queued in queued_method_roots (from newly_inferred)
-static void serialize_htable_keys(jl_serializer_state *s, htable_t *ht, int nitems)
-{
-    write_int32(s->s, nitems);
-    void **table = ht->table;
-    size_t i, n = 0, sz = ht->size;
-    (void)n;
-    for (i = 0; i < sz; i += 2) {
-        if (table[i+1] != HT_NOTFOUND) {
-            jl_serialize_value(s, (jl_value_t*)table[i]);
-            n += 1;
-        }
-    }
-    assert(n == nitems);
-}
-
-// Create the forward-edge map (caller => callees)
-// the intent of these functions is to invert the backedges tree
-// for anything that points to a method not part of the worklist
-// or method instances not in the queue
-//
-// from MethodTables
-static void jl_collect_missing_backedges_to_mod(jl_methtable_t *mt)
-{
-    jl_array_t *backedges = mt->backedges;
-    if (backedges) {
-        size_t i, l = jl_array_len(backedges);
-        for (i = 1; i < l; i += 2) {
-            jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i);
-            jl_value_t *missing_callee = jl_array_ptr_ref(backedges, i - 1);  // signature of abstract callee
-            jl_array_t **edges = (jl_array_t**)ptrhash_bp(&edges_map, (void*)caller);
-            if (*edges == HT_NOTFOUND)
-                *edges = jl_alloc_vec_any(0);
-            jl_array_ptr_1d_push(*edges, missing_callee);
-        }
-    }
-}
-
-// from MethodInstances
-static void collect_backedges(jl_method_instance_t *callee) JL_GC_DISABLED
-{
-    jl_array_t *backedges = callee->backedges;
-    if (backedges) {
-        size_t i, l = jl_array_len(backedges);
-        for (i = 0; i < l; i++) {
-            jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i);
-            jl_array_t **edges = (jl_array_t**)ptrhash_bp(&edges_map, caller);
-            if (*edges == HT_NOTFOUND)
-                *edges = jl_alloc_vec_any(0);
-            jl_array_ptr_1d_push(*edges, (jl_value_t*)callee);
-        }
-    }
-}
-
-
-// For functions owned by modules not on the worklist, call this on each method.
-// - if the method is owned by a worklist module, add it to the list of things to be
-//   fully serialized
-// - otherwise (i.e., if it's an external method), check all of its specializations.
-//   Collect backedges from those that are not being fully serialized.
-static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure) JL_GC_DISABLED
-{
-    jl_array_t *s = (jl_array_t*)closure;
-    jl_method_t *m = ml->func.method;
-    if (module_in_worklist(m->module)) {
-        jl_array_ptr_1d_push(s, (jl_value_t*)m);
-        jl_array_ptr_1d_push(s, (jl_value_t*)ml->simplesig);
-    }
-    else {
-        jl_svec_t *specializations = m->specializations;
-        size_t i, l = jl_svec_len(specializations);
-        for (i = 0; i < l; i++) {
-            jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i);
-            if ((jl_value_t*)callee != jl_nothing && !method_instance_in_queue(callee))
-                collect_backedges(callee);
-        }
-    }
-    return 1;
-}
-
-static void jl_collect_methtable_from_mod(jl_array_t *s, jl_methtable_t *mt) JL_GC_DISABLED
-{
-    jl_typemap_visitor(mt->defs, jl_collect_methcache_from_mod, (void*)s);
-}
-
-// Collect methods of external functions defined by modules in the worklist
-// "extext" = "extending external"
-// Also collect relevant backedges
-static void jl_collect_extext_methods_from_mod(jl_array_t *s, jl_module_t *m) JL_GC_DISABLED
-{
-    if (module_in_worklist(m))
-        return;
-    size_t i;
-    void **table = m->bindings.table;
-    for (i = 1; i < m->bindings.size; i += 2) {
-        if (table[i] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
-            if (b->owner == m && b->value && b->constp) {
-                jl_value_t *bv = jl_unwrap_unionall(b->value);
-                if (jl_is_datatype(bv)) {
-                    jl_typename_t *tn = ((jl_datatype_t*)bv)->name;
-                    if (tn->module == m && tn->name == b->name && tn->wrapper == b->value) {
-                        jl_methtable_t *mt = tn->mt;
-                        if (mt != NULL &&
-                                (jl_value_t*)mt != jl_nothing &&
-                                (mt != jl_type_type_mt && mt != jl_nonfunction_mt)) {
-                            jl_collect_methtable_from_mod(s, mt);
-                            jl_collect_missing_backedges_to_mod(mt);
-                        }
-                    }
-                }
-                else if (jl_is_module(b->value)) {
-                    jl_module_t *child = (jl_module_t*)b->value;
-                    if (child != m && child->parent == m && child->name == b->name) {
-                        // this is the original/primary binding for the submodule
-                        jl_collect_extext_methods_from_mod(s, (jl_module_t*)b->value);
-                    }
-                }
-                else if (jl_is_mtable(b->value)) {
-                    jl_methtable_t *mt = (jl_methtable_t*)b->value;
-                    if (mt->module == m && mt->name == b->name) {
-                        // this is probably an external method table, so let's assume so
-                        // as there is no way to precisely distinguish them,
-                        // and the rest of this serializer does not bother
-                        // to handle any method tables specially
-                        jl_collect_methtable_from_mod(s, (jl_methtable_t*)bv);
-                    }
-                }
-            }
-        }
-    }
-}
-
-// flatten the backedge map reachable from caller into callees
-static void jl_collect_backedges_to(jl_method_instance_t *caller, htable_t *all_callees) JL_GC_DISABLED
-{
-    jl_array_t **pcallees = (jl_array_t**)ptrhash_bp(&edges_map, (void*)caller),
-                *callees = *pcallees;
-    if (callees != HT_NOTFOUND) {
-        *pcallees = (jl_array_t*) HT_NOTFOUND;
-        size_t i, l = jl_array_len(callees);
-        for (i = 0; i < l; i++) {
-            jl_value_t *c = jl_array_ptr_ref(callees, i);
-            ptrhash_put(all_callees, c, c);
-            if (jl_is_method_instance(c)) {
-                jl_collect_backedges_to((jl_method_instance_t*)c, all_callees);
-            }
-        }
-    }
-}
-
-// Extract `edges` and `ext_targets` from `edges_map`
-// This identifies internal->external edges in the call graph, pulling them out for special treatment.
-static void jl_collect_backedges( /* edges */ jl_array_t *s, /* ext_targets */ jl_array_t *t)
-{
-    htable_t all_targets;         // target => tgtindex mapping
-    htable_t all_callees;         // MIs called by worklist methods (eff. Set{MethodInstance})
-    htable_new(&all_targets, 0);
-    htable_new(&all_callees, 0);
-    size_t i;
-    void **table = edges_map.table;    // edges is caller => callees
-    for (i = 0; i < edges_map.size; i += 2) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)table[i];
-        jl_array_t *callees = (jl_array_t*)table[i + 1];
-        if (callees != HT_NOTFOUND && (module_in_worklist(caller->def.method->module) || method_instance_in_queue(caller))) {
-            size_t i, l = jl_array_len(callees);
-            for (i = 0; i < l; i++) {
-                jl_value_t *c = jl_array_ptr_ref(callees, i);
-                ptrhash_put(&all_callees, c, c);
-                if (jl_is_method_instance(c)) {
-                    jl_collect_backedges_to((jl_method_instance_t*)c, &all_callees);
-                }
-            }
-            callees = jl_alloc_array_1d(jl_array_int32_type, 0);
-            void **pc = all_callees.table;
-            size_t j;
-            int valid = 1;
-            for (j = 0; valid && j < all_callees.size; j += 2) {
-                if (pc[j + 1] != HT_NOTFOUND) {
-                    jl_value_t *callee = (jl_value_t*)pc[j];
-                    void *target = ptrhash_get(&all_targets, (void*)callee);
-                    if (target == HT_NOTFOUND) {
-                        jl_method_instance_t *callee_mi = (jl_method_instance_t*)callee;
-                        jl_value_t *sig;
-                        if (jl_is_method_instance(callee)) {
-                            sig = callee_mi->specTypes;
-                        }
-                        else {
-                            sig = callee;
-                        }
-                        size_t min_valid = 0;
-                        size_t max_valid = ~(size_t)0;
-                        int ambig = 0;
-                        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, -1, 0, jl_atomic_load_acquire(&jl_world_counter), &min_valid, &max_valid, &ambig);
-                        if (matches == jl_false) {
-                            valid = 0;
-                            break;
-                        }
-                        size_t k;
-                        for (k = 0; k < jl_array_len(matches); k++) {
-                            jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k);
-                            jl_array_ptr_set(matches, k, match->method);
-                        }
-                        jl_array_ptr_1d_push(t, callee);
-                        jl_array_ptr_1d_push(t, matches);
-                        target = (char*)HT_NOTFOUND + jl_array_len(t) / 2;
-                        ptrhash_put(&all_targets, (void*)callee, target);
-                    }
-                    jl_array_grow_end(callees, 1);
-                    ((int32_t*)jl_array_data(callees))[jl_array_len(callees) - 1] = (char*)target - (char*)HT_NOTFOUND - 1;
-                }
-            }
-            htable_reset(&all_callees, 100);
-            if (valid) {
-                jl_array_ptr_1d_push(s, (jl_value_t*)caller);
-                jl_array_ptr_1d_push(s, (jl_value_t*)callees);
-            }
-        }
-    }
-    htable_free(&all_targets);
-    htable_free(&all_callees);
-}
-
-// serialize information about all loaded modules
-static void write_mod_list(ios_t *s, jl_array_t *a)
-{
-    size_t i;
-    size_t len = jl_array_len(a);
-    for (i = 0; i < len; i++) {
-        jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(a, i);
-        assert(jl_is_module(m));
-        if (!module_in_worklist(m)) {
-            const char *modname = jl_symbol_name(m->name);
-            size_t l = strlen(modname);
-            write_int32(s, l);
-            ios_write(s, modname, l);
-            write_uint64(s, m->uuid.hi);
-            write_uint64(s, m->uuid.lo);
-            write_uint64(s, m->build_id);
-        }
-    }
-    write_int32(s, 0);
-}
-
-// "magic" string and version header of .ji file
-static const int JI_FORMAT_VERSION = 11;
-static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature
-static const uint16_t BOM = 0xFEFF; // byte-order marker
-static void write_header(ios_t *s)
-{
-    ios_write(s, JI_MAGIC, strlen(JI_MAGIC));
-    write_uint16(s, JI_FORMAT_VERSION);
-    ios_write(s, (char *) &BOM, 2);
-    write_uint8(s, sizeof(void*));
-    ios_write(s, JL_BUILD_UNAME, strlen(JL_BUILD_UNAME)+1);
-    ios_write(s, JL_BUILD_ARCH, strlen(JL_BUILD_ARCH)+1);
-    ios_write(s, JULIA_VERSION_STRING, strlen(JULIA_VERSION_STRING)+1);
-    const char *branch = jl_git_branch(), *commit = jl_git_commit();
-    ios_write(s, branch, strlen(branch)+1);
-    ios_write(s, commit, strlen(commit)+1);
-}
-
-// serialize information about the result of deserializing this file
-static void write_work_list(ios_t *s)
-{
-    int i, l = jl_array_len(serializer_worklist);
-    for (i = 0; i < l; i++) {
-        jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(serializer_worklist, i);
-        if (workmod->parent == jl_main_module || workmod->parent == workmod) {
-            size_t l = strlen(jl_symbol_name(workmod->name));
-            write_int32(s, l);
-            ios_write(s, jl_symbol_name(workmod->name), l);
-            write_uint64(s, workmod->uuid.hi);
-            write_uint64(s, workmod->uuid.lo);
-            write_uint64(s, workmod->build_id);
-        }
-    }
-    write_int32(s, 0);
-}
-
-static void write_module_path(ios_t *s, jl_module_t *depmod) JL_NOTSAFEPOINT
-{
-    if (depmod->parent == jl_main_module || depmod->parent == depmod)
-        return;
-    const char *mname = jl_symbol_name(depmod->name);
-    size_t slen = strlen(mname);
-    write_module_path(s, depmod->parent);
-    write_int32(s, slen);
-    ios_write(s, mname, slen);
-}
-
-// Cache file header
-// Serialize the global Base._require_dependencies array of pathnames that
-// are include dependencies. Also write Preferences and return
-// the location of the srctext "pointer" in the header index.
-static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp)
-{
-    int64_t initial_pos = 0;
-    int64_t pos = 0;
-    static jl_array_t *deps = NULL;
-    if (!deps)
-        deps = (jl_array_t*)jl_get_global(jl_base_module, jl_symbol("_require_dependencies"));
-
-    // unique(deps) to eliminate duplicates while preserving order:
-    // we preserve order so that the topmost included .jl file comes first
-    static jl_value_t *unique_func = NULL;
-    if (!unique_func)
-        unique_func = jl_get_global(jl_base_module, jl_symbol("unique"));
-    jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps};
-    jl_task_t *ct = jl_current_task;
-    size_t last_age = ct->world_age;
-    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-    jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL);
-    ct->world_age = last_age;
-
-    // write a placeholder for total size so that we can quickly seek past all of the
-    // dependencies if we don't need them
-    initial_pos = ios_pos(s);
-    write_uint64(s, 0);
-    if (udeps) {
-        size_t i, l = jl_array_len(udeps);
-        for (i = 0; i < l; i++) {
-            jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
-            jl_value_t *dep = jl_fieldref(deptuple, 1);              // file abspath
-            size_t slen = jl_string_len(dep);
-            write_int32(s, slen);
-            ios_write(s, jl_string_data(dep), slen);
-            write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2)));  // mtime
-            jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0);  // evaluating module
-            jl_module_t *depmod_top = depmod;
-            while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top)
-                depmod_top = depmod_top->parent;
-            unsigned provides = 0;
-            size_t j, lj = jl_array_len(serializer_worklist);
-            for (j = 0; j < lj; j++) {
-                jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(serializer_worklist, j);
-                if (workmod->parent == jl_main_module || workmod->parent == workmod) {
-                    ++provides;
-                    if (workmod == depmod_top) {
-                        write_int32(s, provides);
-                        write_module_path(s, depmod);
-                        break;
-                    }
-                }
-            }
-            write_int32(s, 0);
-        }
-        write_int32(s, 0); // terminator, for ease of reading
-
-        // Calculate Preferences hash for current package.
-        jl_value_t *prefs_hash = NULL;
-        jl_value_t *prefs_list = NULL;
-        JL_GC_PUSH1(&prefs_list);
-        if (jl_base_module) {
-            // Toplevel module is the module we're currently compiling, use it to get our preferences hash
-            jl_value_t * toplevel = (jl_value_t*)jl_get_global(jl_base_module, jl_symbol("__toplevel__"));
-            jl_value_t * prefs_hash_func = jl_get_global(jl_base_module, jl_symbol("get_preferences_hash"));
-            jl_value_t * get_compiletime_prefs_func = jl_get_global(jl_base_module, jl_symbol("get_compiletime_preferences"));
-
-            if (toplevel && prefs_hash_func && get_compiletime_prefs_func) {
-                // Temporary invoke in newest world age
-                size_t last_age = ct->world_age;
-                ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-
-                // call get_compiletime_prefs(__toplevel__)
-                jl_value_t *args[3] = {get_compiletime_prefs_func, (jl_value_t*)toplevel, NULL};
-                prefs_list = (jl_value_t*)jl_apply(args, 2);
-
-                // Call get_preferences_hash(__toplevel__, prefs_list)
-                args[0] = prefs_hash_func;
-                args[2] = prefs_list;
-                prefs_hash = (jl_value_t*)jl_apply(args, 3);
-
-                // Reset world age to normal
-                ct->world_age = last_age;
-            }
-        }
-
-        // If we successfully got the preferences, write it out, otherwise write `0` for this `.ji` file.
-        if (prefs_hash != NULL && prefs_list != NULL) {
-            size_t i, l = jl_array_len(prefs_list);
-            for (i = 0; i < l; i++) {
-                jl_value_t *pref_name = jl_array_ptr_ref(prefs_list, i);
-                size_t slen = jl_string_len(pref_name);
-                write_int32(s, slen);
-                ios_write(s, jl_string_data(pref_name), slen);
-            }
-            write_int32(s, 0); // terminator
-            write_uint64(s, jl_unbox_uint64(prefs_hash));
-        } else {
-            // This is an error path, but let's at least generate a valid `.ji` file.
-            // We declare an empty list of preference names, followed by a zero-hash.
-            // The zero-hash is not what would be generated for an empty set of preferences,
-            // and so this `.ji` file will be invalidated by a future non-erroring pass
-            // through this function.
-            write_int32(s, 0);
-            write_uint64(s, 0);
-        }
-        JL_GC_POP(); // for prefs_list
-
-        // write a dummy file position to indicate the beginning of the source-text
-        pos = ios_pos(s);
-        ios_seek(s, initial_pos);
-        write_uint64(s, pos - initial_pos);
-        ios_seek(s, pos);
-        write_int64(s, 0);
-    }
-    return pos;
-}
-
-// --- deserialize ---
-
-static jl_value_t *jl_deserialize_value(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED;
-
-static jl_value_t *jl_deserialize_datatype(jl_serializer_state *s, int pos, jl_value_t **loc) JL_GC_DISABLED
-{
-    assert(pos == backref_list.len - 1 && "nothing should have been deserialized since assigning pos");
-    int tag = read_uint8(s->s);
-    if (tag == 6 || tag == 7) {
-        jl_typename_t *name = (jl_typename_t*)jl_deserialize_value(s, NULL);
-        jl_value_t *dtv = name->wrapper;
-        jl_svec_t *parameters = (jl_svec_t*)jl_deserialize_value(s, NULL);
-        dtv = jl_apply_type(dtv, jl_svec_data(parameters), jl_svec_len(parameters));
-        backref_list.items[pos] = dtv;
-        return dtv;
-    }
-    if (tag == 9) {
-        jl_datatype_t *primarydt = (jl_datatype_t*)jl_deserialize_value(s, NULL);
-        jl_value_t *dtv = jl_typeof(jl_get_kwsorter((jl_value_t*)primarydt));
-        backref_list.items[pos] = dtv;
-        return dtv;
-    }
-    if (!(tag == 0 || tag == 5 || tag == 10 || tag == 11 || tag == 12)) {
-        assert(0 && "corrupt deserialization state");
-        abort();
-    }
-    jl_datatype_t *dt = jl_new_uninitialized_datatype();
-    backref_list.items[pos] = dt;
-    if (loc != NULL && loc != HT_NOTFOUND)
-        *loc = (jl_value_t*)dt;
-    size_t size = read_int32(s->s);
-    uint8_t flags = read_uint8(s->s);
-    uint8_t memflags = read_uint8(s->s);
-    dt->size = size;
-    int has_layout = flags & 1;
-    int has_instance = (flags >> 1) & 1;
-    dt->hasfreetypevars = memflags & 1;
-    dt->isconcretetype = (memflags >> 1) & 1;
-    dt->isdispatchtuple = (memflags >> 2) & 1;
-    dt->isbitstype = (memflags >> 3) & 1;
-    dt->zeroinit = (memflags >> 4) & 1;
-    dt->has_concrete_subtype = (memflags >> 5) & 1;
-    dt->cached_by_hash = (memflags >> 6) & 1;
-    dt->hash = read_int32(s->s);
-
-    if (has_layout) {
-        uint8_t layout = read_uint8(s->s);
-        if (layout == 1) {
-            dt->layout = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->layout;
-        }
-        else if (layout == 2) {
-            dt->layout = jl_nothing_type->layout;
-        }
-        else if (layout == 3) {
-            dt->layout = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_pointer_type))->layout;
-        }
-        else {
-            assert(layout == 0);
-            jl_datatype_layout_t buffer;
-            ios_readall(s->s, (char*)&buffer, sizeof(buffer));
-            uint32_t nf = buffer.nfields;
-            uint32_t np = buffer.npointers;
-            uint8_t fielddesc_type = buffer.fielddesc_type;
-            size_t fielddesc_size = nf > 0 ? jl_fielddesc_size(fielddesc_type) : 0;
-            size_t fldsize = nf * fielddesc_size;
-            if (buffer.first_ptr != -1)
-                fldsize += np << fielddesc_type;
-            jl_datatype_layout_t *layout = (jl_datatype_layout_t*)jl_gc_perm_alloc(
-                    sizeof(jl_datatype_layout_t) + fldsize,
-                    0, 4, 0);
-            *layout = buffer;
-            ios_readall(s->s, (char*)(layout + 1), fldsize);
-            dt->layout = layout;
-        }
-    }
-
-    if (tag == 10 || tag == 11 || tag == 12) {
-        assert(pos > 0);
-        arraylist_push(&flagref_list, loc == HT_NOTFOUND ? NULL : loc);
-        arraylist_push(&flagref_list, (void*)(uintptr_t)pos);
-        ptrhash_put(&uniquing_table, dt, NULL);
-    }
-
-    if (has_instance) {
-        assert(dt->isconcretetype && "there shouldn't be an instance on an abstract type");
-        dt->instance = jl_deserialize_value(s, &dt->instance);
-        jl_gc_wb(dt, dt->instance);
-    }
-    dt->name = (jl_typename_t*)jl_deserialize_value(s, (jl_value_t**)&dt->name);
-    jl_gc_wb(dt, dt->name);
-    dt->parameters = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&dt->parameters);
-    jl_gc_wb(dt, dt->parameters);
-    dt->super = (jl_datatype_t*)jl_deserialize_value(s, (jl_value_t**)&dt->super);
-    jl_gc_wb(dt, dt->super);
-    dt->types = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&dt->types);
-    if (dt->types) jl_gc_wb(dt, dt->types);
-
-    return (jl_value_t*)dt;
-}
-
-static jl_value_t *jl_deserialize_value_svec(jl_serializer_state *s, uint8_t tag, jl_value_t **loc) JL_GC_DISABLED
-{
-    size_t i, len;
-    if (tag == TAG_SVEC)
-        len = read_uint8(s->s);
-    else
-        len = read_int32(s->s);
-    jl_svec_t *sv = jl_alloc_svec(len);
-    if (loc != NULL)
-        *loc = (jl_value_t*)sv;
-    arraylist_push(&backref_list, (jl_value_t*)sv);
-    jl_value_t **data = jl_svec_data(sv);
-    for (i = 0; i < len; i++) {
-        data[i] = jl_deserialize_value(s, &data[i]);
-    }
-    return (jl_value_t*)sv;
-}
-
-static jl_value_t *jl_deserialize_value_symbol(jl_serializer_state *s, uint8_t tag) JL_GC_DISABLED
-{
-    size_t len;
-    if (tag == TAG_SYMBOL)
-        len = read_uint8(s->s);
-    else
-        len = read_int32(s->s);
-    char *name = (char*)(len >= 256 ? malloc_s(len + 1) : alloca(len + 1));
-    ios_readall(s->s, name, len);
-    name[len] = '\0';
-    jl_value_t *sym = (jl_value_t*)jl_symbol(name);
-    if (len >= 256)
-        free(name);
-    arraylist_push(&backref_list, sym);
-    return sym;
-}
-
-static jl_value_t *jl_deserialize_value_array(jl_serializer_state *s, uint8_t tag) JL_GC_DISABLED
-{
-    int16_t i, ndims;
-    int isptr, isunion, hasptr, elsize;
-    if (tag == TAG_ARRAY1D) {
-        ndims = 1;
-        elsize = read_uint8(s->s);
-        isptr = (elsize >> 7) & 1;
-        hasptr = (elsize >> 6) & 1;
-        isunion = (elsize >> 5) & 1;
-        elsize = elsize & 0x1f;
-    }
-    else {
-        ndims = read_uint16(s->s);
-        elsize = read_uint16(s->s);
-        isptr = (elsize >> 15) & 1;
-        hasptr = (elsize >> 14) & 1;
-        isunion = (elsize >> 13) & 1;
-        elsize = elsize & 0x1fff;
-    }
-    uintptr_t pos = backref_list.len;
-    arraylist_push(&backref_list, NULL);
-    size_t *dims = (size_t*)alloca(ndims * sizeof(size_t));
-    for (i = 0; i < ndims; i++) {
-        dims[i] = jl_unbox_long(jl_deserialize_value(s, NULL));
-    }
-    jl_array_t *a = jl_new_array_for_deserialization(
-            (jl_value_t*)NULL, ndims, dims, !isptr, hasptr, isunion, elsize);
-    backref_list.items[pos] = a;
-    jl_value_t *aty = jl_deserialize_value(s, &jl_astaggedvalue(a)->type);
-    jl_set_typeof(a, aty);
-    if (a->flags.ptrarray) {
-        jl_value_t **data = (jl_value_t**)jl_array_data(a);
-        size_t i, numel = jl_array_len(a);
-        for (i = 0; i < numel; i++) {
-            data[i] = jl_deserialize_value(s, &data[i]);
-            //if (data[i]) // not needed because `a` is new (gc is disabled)
-            //    jl_gc_wb(a, data[i]);
-        }
-        assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled
-    }
-    else if (a->flags.hasptr) {
-        size_t i, numel = jl_array_len(a);
-        char *data = (char*)jl_array_data(a);
-        uint16_t elsz = a->elsize;
-        jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(jl_typeof(a));
-        size_t j, np = et->layout->npointers;
-        for (i = 0; i < numel; i++) {
-            char *start = data;
-            for (j = 0; j < np; j++) {
-                uint32_t ptr = jl_ptr_offset(et, j);
-                jl_value_t **fld = &((jl_value_t**)data)[ptr];
-                if ((char*)fld != start)
-                    ios_readall(s->s, start, (const char*)fld - start);
-                *fld = jl_deserialize_value(s, fld);
-                //if (*fld) // not needed because `a` is new (gc is disabled)
-                //    jl_gc_wb(a, *fld);
-                start = (char*)&fld[1];
-            }
-            data += elsz;
-            if (data != start)
-                ios_readall(s->s, start, data - start);
-        }
-        assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled
-    }
-    else {
-        size_t extra = jl_array_isbitsunion(a) ? jl_array_len(a) : 0;
-        size_t tot = jl_array_len(a) * a->elsize + extra;
-        ios_readall(s->s, (char*)jl_array_data(a), tot);
-    }
-    return (jl_value_t*)a;
-}
-
-static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED
-{
-    jl_method_t *m =
-        (jl_method_t*)jl_gc_alloc(s->ptls, sizeof(jl_method_t),
-                                  jl_method_type);
-    memset(m, 0, sizeof(jl_method_t));
-    uintptr_t pos = backref_list.len;
-    arraylist_push(&backref_list, m);
-    m->sig = (jl_value_t*)jl_deserialize_value(s, (jl_value_t**)&m->sig);
-    jl_gc_wb(m, m->sig);
-    m->module = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&m->module);
-    jl_gc_wb(m, m->module);
-    int serialization_mode = read_uint8(s->s);
-    if (serialization_mode & METHOD_EXTERNAL_MT) {
-        jl_module_t *mt_mod = (jl_module_t*)jl_deserialize_value(s, NULL);
-        jl_sym_t *mt_name = (jl_sym_t*)jl_deserialize_value(s, NULL);
-        m->external_mt = jl_get_global(mt_mod, mt_name);
-        jl_gc_wb(m, m->external_mt);
-        assert(jl_typeis(m->external_mt, jl_methtable_type));
-    }
-    else {
-        m->external_mt = jl_deserialize_value(s, &m->external_mt);
-        jl_gc_wb(m, m->external_mt);
-    }
-    if (!(serialization_mode & METHOD_INTERNAL)) {
-        assert(loc != NULL && loc != HT_NOTFOUND);
-        arraylist_push(&flagref_list, loc);
-        arraylist_push(&flagref_list, (void*)pos);
-        if (serialization_mode & METHOD_HAS_NEW_ROOTS) {
-            uint64_t key = read_uint64(s->s);
-            int i, nnew = read_int32(s->s);
-            jl_array_t *newroots = jl_alloc_vec_any(nnew);
-            jl_value_t **data = (jl_value_t**)jl_array_data(newroots);
-            for (i = 0; i < nnew; i++)
-                data[i] = jl_deserialize_value(s, &(data[i]));
-            // Storing the new roots in `m->roots` risks losing them due to recaching
-            // (which replaces pointers to `m` with ones to the "live" method).
-            // Put them in separate storage so we can find them later.
-            assert(ptrhash_get(&queued_method_roots, m) == HT_NOTFOUND);
-            // In storing the key, on 32-bit platforms we need two slots. Might as well do this for all platforms.
-            jl_svec_t *qmrval = jl_alloc_svec_uninit(3);    // GC is disabled
-            jl_svec_data(qmrval)[0] = (jl_value_t*)(uintptr_t)(key & ((((uint64_t)1) << 32) - 1));          // lo bits
-            jl_svec_data(qmrval)[1] = (jl_value_t*)(uintptr_t)((key >> 32) & ((((uint64_t)1) << 32) - 1));  // hi bits
-            jl_svec_data(qmrval)[2] = (jl_value_t*)newroots;
-            ptrhash_put(&queued_method_roots, m, qmrval);
-        }
-        return (jl_value_t*)m;
-    }
-    m->specializations = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&m->specializations);
-    jl_gc_wb(m, m->specializations);
-    jl_array_t *speckeyset = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->speckeyset);
-    jl_atomic_store_relaxed(&m->speckeyset, speckeyset);
-    jl_gc_wb(m, speckeyset);
-    m->name = (jl_sym_t*)jl_deserialize_value(s, NULL);
-    jl_gc_wb(m, m->name);
-    m->file = (jl_sym_t*)jl_deserialize_value(s, NULL);
-    m->line = read_int32(s->s);
-    m->primary_world = jl_atomic_load_acquire(&jl_world_counter);
-    m->deleted_world = ~(size_t)0;
-    m->called = read_int32(s->s);
-    m->nargs = read_int32(s->s);
-    m->nospecialize = read_int32(s->s);
-    m->nkw = read_int32(s->s);
-    m->isva = read_int8(s->s);
-    m->pure = read_int8(s->s);
-    m->is_for_opaque_closure = read_int8(s->s);
-    m->constprop = read_int8(s->s);
-    m->purity.bits = read_uint8(s->s);
-    m->slot_syms = jl_deserialize_value(s, (jl_value_t**)&m->slot_syms);
-    jl_gc_wb(m, m->slot_syms);
-    m->roots = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->roots);
-    if (m->roots)
-        jl_gc_wb(m, m->roots);
-    m->root_blocks = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->root_blocks);
-    if (m->root_blocks)
-        jl_gc_wb(m, m->root_blocks);
-    m->nroots_sysimg = read_int32(s->s);
-    m->ccallable = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&m->ccallable);
-    if (m->ccallable) {
-        jl_gc_wb(m, m->ccallable);
-        arraylist_push(&ccallable_list, m->ccallable);
-    }
-    m->source = jl_deserialize_value(s, &m->source);
-    if (m->source)
-        jl_gc_wb(m, m->source);
-    m->unspecialized = (jl_method_instance_t*)jl_deserialize_value(s, (jl_value_t**)&m->unspecialized);
-    if (m->unspecialized)
-        jl_gc_wb(m, m->unspecialized);
-    m->generator = jl_deserialize_value(s, (jl_value_t**)&m->generator);
-    if (m->generator)
-        jl_gc_wb(m, m->generator);
-    m->invokes = jl_deserialize_value(s, (jl_value_t**)&m->invokes);
-    jl_gc_wb(m, m->invokes);
-    m->recursion_relation = jl_deserialize_value(s, (jl_value_t**)&m->recursion_relation);
-    if (m->recursion_relation)
-        jl_gc_wb(m, m->recursion_relation);
-    JL_MUTEX_INIT(&m->writelock);
-    return (jl_value_t*)m;
-}
-
-static jl_value_t *jl_deserialize_value_method_instance(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED
-{
-    jl_method_instance_t *mi =
-        (jl_method_instance_t*)jl_gc_alloc(s->ptls, sizeof(jl_method_instance_t),
-                                       jl_method_instance_type);
-    memset(mi, 0, sizeof(jl_method_instance_t));
-    uintptr_t pos = backref_list.len;
-    arraylist_push(&backref_list, mi);
-    int internal = read_uint8(s->s);
-    if (internal == 1) {
-        mi->uninferred = jl_deserialize_value(s, &mi->uninferred);
-        jl_gc_wb(mi, mi->uninferred);
-    }
-    mi->specTypes = (jl_value_t*)jl_deserialize_value(s, (jl_value_t**)&mi->specTypes);
-    jl_gc_wb(mi, mi->specTypes);
-    mi->def.value = jl_deserialize_value(s, &mi->def.value);
-    jl_gc_wb(mi, mi->def.value);
-
-    if (!internal) {
-        assert(loc != NULL && loc != HT_NOTFOUND);
-        arraylist_push(&flagref_list, loc);
-        arraylist_push(&flagref_list, (void*)pos);
-        return (jl_value_t*)mi;
-    }
-
-    mi->sparam_vals = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&mi->sparam_vals);
-    jl_gc_wb(mi, mi->sparam_vals);
-    mi->backedges = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&mi->backedges);
-    if (mi->backedges)
-        jl_gc_wb(mi, mi->backedges);
-    mi->callbacks = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&mi->callbacks);
-    if (mi->callbacks)
-        jl_gc_wb(mi, mi->callbacks);
-    mi->cache = (jl_code_instance_t*)jl_deserialize_value(s, (jl_value_t**)&mi->cache);
-    if (mi->cache)
-        jl_gc_wb(mi, mi->cache);
-    return (jl_value_t*)mi;
-}
-
-static jl_value_t *jl_deserialize_value_code_instance(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED
-{
-    jl_code_instance_t *codeinst =
-        (jl_code_instance_t*)jl_gc_alloc(s->ptls, sizeof(jl_code_instance_t), jl_code_instance_type);
-    memset(codeinst, 0, sizeof(jl_code_instance_t));
-    arraylist_push(&backref_list, codeinst);
-    int flags = read_uint8(s->s);
-    int validate = (flags >> 0) & 3;
-    int constret = (flags >> 2) & 1;
-    codeinst->ipo_purity_bits = read_uint32(s->s);
-    codeinst->purity_bits = read_uint32(s->s);
-    codeinst->def = (jl_method_instance_t*)jl_deserialize_value(s, (jl_value_t**)&codeinst->def);
-    jl_gc_wb(codeinst, codeinst->def);
-    codeinst->inferred = jl_deserialize_value(s, &codeinst->inferred);
-    jl_gc_wb(codeinst, codeinst->inferred);
-    codeinst->rettype_const = jl_deserialize_value(s, &codeinst->rettype_const);
-    if (codeinst->rettype_const)
-        jl_gc_wb(codeinst, codeinst->rettype_const);
-    codeinst->rettype = jl_deserialize_value(s, &codeinst->rettype);
-    jl_gc_wb(codeinst, codeinst->rettype);
-    codeinst->argescapes = jl_deserialize_value(s, &codeinst->argescapes);
-    jl_gc_wb(codeinst, codeinst->argescapes);
-    if (constret)
-        codeinst->invoke = jl_fptr_const_return;
-    if ((flags >> 3) & 1)
-        codeinst->precompile = 1;
-    codeinst->relocatability = read_uint8(s->s);
-    assert(codeinst->relocatability <= 1);
-    codeinst->next = (jl_code_instance_t*)jl_deserialize_value(s, (jl_value_t**)&codeinst->next);
-    jl_gc_wb(codeinst, codeinst->next);
-    if (validate) {
-        codeinst->min_world = jl_atomic_load_acquire(&jl_world_counter);
-        ptrhash_put(&new_code_instance_validate, codeinst, (void*)(~(uintptr_t)HT_NOTFOUND));   // "HT_FOUND"
-    }
-    return (jl_value_t*)codeinst;
-}
-
-static jl_value_t *jl_deserialize_value_module(jl_serializer_state *s) JL_GC_DISABLED
-{
-    uintptr_t pos = backref_list.len;
-    arraylist_push(&backref_list, NULL);
-    jl_sym_t *mname = (jl_sym_t*)jl_deserialize_value(s, NULL);
-    int ref_only = read_uint8(s->s);
-    if (ref_only) {
-        jl_value_t *m_ref;
-        if (ref_only == 1)
-            m_ref = jl_get_global((jl_module_t*)jl_deserialize_value(s, NULL), mname);
-        else
-            m_ref = jl_array_ptr_ref(s->loaded_modules_array, read_int32(s->s));
-        backref_list.items[pos] = m_ref;
-        return m_ref;
-    }
-    jl_module_t *m = jl_new_module(mname);
-    backref_list.items[pos] = m;
-    m->parent = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&m->parent);
-    jl_gc_wb(m, m->parent);
-
-    while (1) {
-        jl_sym_t *asname = (jl_sym_t*)jl_deserialize_value(s, NULL);
-        if (asname == NULL)
-            break;
-        jl_binding_t *b = jl_get_binding_wr(m, asname, 1);
-        b->name = (jl_sym_t*)jl_deserialize_value(s, (jl_value_t**)&b->name);
-        jl_value_t *bvalue = jl_deserialize_value(s, (jl_value_t**)&b->value);
-        *(jl_value_t**)&b->value = bvalue;
-        if (bvalue != NULL) jl_gc_wb(m, bvalue);
-        jl_value_t *bglobalref = jl_deserialize_value(s, (jl_value_t**)&b->globalref);
-        *(jl_value_t**)&b->globalref = bglobalref;
-        if (bglobalref != NULL) jl_gc_wb(m, bglobalref);
-        b->owner = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&b->owner);
-        if (b->owner != NULL) jl_gc_wb(m, b->owner);
-        jl_value_t *bty = jl_deserialize_value(s, (jl_value_t**)&b->ty);
-        *(jl_value_t**)&b->ty = bty;
-        int8_t flags = read_int8(s->s);
-        b->deprecated = (flags>>3) & 1;
-        b->constp = (flags>>2) & 1;
-        b->exportp = (flags>>1) & 1;
-        b->imported = (flags) & 1;
-    }
-    size_t i = m->usings.len;
-    size_t ni = read_int32(s->s);
-    arraylist_grow(&m->usings, ni);
-    ni += i;
-    while (i < ni) {
-        m->usings.items[i] = jl_deserialize_value(s, (jl_value_t**)&m->usings.items[i]);
-        i++;
-    }
-    m->istopmod = read_uint8(s->s);
-    m->uuid.hi = read_uint64(s->s);
-    m->uuid.lo = read_uint64(s->s);
-    m->build_id = read_uint64(s->s);
-    m->counter = read_int32(s->s);
-    m->nospecialize = read_int32(s->s);
-    m->optlevel = read_int8(s->s);
-    m->compile = read_int8(s->s);
-    m->infer = read_int8(s->s);
-    m->max_methods = read_int8(s->s);
-    m->primary_world = jl_atomic_load_acquire(&jl_world_counter);
-    return (jl_value_t*)m;
-}
-
-static jl_value_t *jl_deserialize_value_singleton(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED
-{
-    jl_value_t *v = (jl_value_t*)jl_gc_alloc(s->ptls, 0, NULL);
-    uintptr_t pos = backref_list.len;
-    arraylist_push(&backref_list, (void*)v);
-    // TODO: optimize the case where the value can easily be obtained
-    // from an external module (tag == 6) as dt->instance
-    assert(loc != HT_NOTFOUND);
-    // if loc == NULL, then the caller can't provide the address where the instance will be
-    // stored. this happens if a field might store a 0-size value, but the field itself is
-    // not 0 size, e.g. `::Union{Int,Nothing}`
-    if (loc != NULL) {
-        arraylist_push(&flagref_list, loc);
-        arraylist_push(&flagref_list, (void*)pos);
-    }
-    jl_datatype_t *dt = (jl_datatype_t*)jl_deserialize_value(s, (jl_value_t**)HT_NOTFOUND); // no loc, since if dt is replaced, then dt->instance would be also
-    jl_set_typeof(v, dt);
-    if (dt->instance == NULL)
-        return v;
-    return dt->instance;
-}
-
-static void jl_deserialize_struct(jl_serializer_state *s, jl_value_t *v) JL_GC_DISABLED
-{
-    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(v);
-    char *data = (char*)jl_data_ptr(v);
-    size_t i, np = dt->layout->npointers;
-    char *start = data;
-    for (i = 0; i < np; i++) {
-        uint32_t ptr = jl_ptr_offset(dt, i);
-        jl_value_t **fld = &((jl_value_t**)data)[ptr];
-        if ((char*)fld != start)
-            ios_readall(s->s, start, (const char*)fld - start);
-        *fld = jl_deserialize_value(s, fld);
-        //if (*fld)// a is new (gc is disabled)
-        //    jl_gc_wb(a, *fld);
-        start = (char*)&fld[1];
-    }
-    data += jl_datatype_size(dt);
-    if (data != start)
-        ios_readall(s->s, start, data - start);
-    if (dt == jl_typemap_entry_type) {
-        jl_typemap_entry_t *entry = (jl_typemap_entry_t*)v;
-        if (entry->max_world == ~(size_t)0) {
-            if (entry->min_world > 1) {
-                // update world validity to reflect current state of the counter
-                entry->min_world = jl_atomic_load_acquire(&jl_world_counter);
-            }
-        }
-        else {
-            // garbage entry - delete it :(
-            entry->min_world = 1;
-            entry->max_world = 0;
-        }
-    }
-}
-
-static jl_value_t *jl_deserialize_value_any(jl_serializer_state *s, uint8_t tag, jl_value_t **loc) JL_GC_DISABLED
-{
-    int32_t sz = (tag == TAG_SHORT_GENERAL ? read_uint8(s->s) : read_int32(s->s));
-    jl_value_t *v = jl_gc_alloc(s->ptls, sz, NULL);
-    jl_set_typeof(v, (void*)(intptr_t)0x50);
-    uintptr_t pos = backref_list.len;
-    arraylist_push(&backref_list, v);
-    jl_datatype_t *dt = (jl_datatype_t*)jl_deserialize_value(s, &jl_astaggedvalue(v)->type);
-    assert(sz != 0 || loc);
-    if (dt == jl_typename_type) {
-        int internal = read_uint8(s->s);
-        jl_typename_t *tn;
-        if (internal) {
-            tn = (jl_typename_t*)jl_gc_alloc(
-                    s->ptls, sizeof(jl_typename_t), jl_typename_type);
-            memset(tn, 0, sizeof(jl_typename_t));
-            tn->cache = jl_emptysvec; // the cache is refilled later (tag 5)
-            tn->linearcache = jl_emptysvec; // the cache is refilled later (tag 5)
-            backref_list.items[pos] = tn;
-        }
-        jl_module_t *m = (jl_module_t*)jl_deserialize_value(s, NULL);
-        jl_sym_t *sym = (jl_sym_t*)jl_deserialize_value(s, NULL);
-        if (internal) {
-            tn->module = m;
-            tn->name = sym;
-            tn->names = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&tn->names);
-            jl_gc_wb(tn, tn->names);
-            tn->wrapper = jl_deserialize_value(s, &tn->wrapper);
-            jl_gc_wb(tn, tn->wrapper);
-            tn->Typeofwrapper = NULL;
-            tn->mt = (jl_methtable_t*)jl_deserialize_value(s, (jl_value_t**)&tn->mt);
-            jl_gc_wb(tn, tn->mt);
-            ios_read(s->s, (char*)&tn->hash, sizeof(tn->hash));
-            int8_t flags = read_int8(s->s);
-            tn->abstract = flags & 1;
-            tn->mutabl = (flags>>1) & 1;
-            tn->mayinlinealloc = (flags>>2) & 1;
-            tn->max_methods = read_uint8(s->s);
-            if (tn->abstract)
-                tn->n_uninitialized = 0;
-            else
-                tn->n_uninitialized = read_uint16(s->s);
-            size_t nfields = read_int32(s->s);
-            if (nfields) {
-                tn->atomicfields = (uint32_t*)malloc(nfields);
-                ios_read(s->s, (char*)tn->atomicfields, nfields);
-            }
-            nfields = read_int32(s->s);
-            if (nfields) {
-                tn->constfields = (uint32_t*)malloc(nfields);
-                ios_read(s->s, (char*)tn->constfields, nfields);
-            }
-        }
-        else {
-            jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(jl_get_global(m, sym));
-            assert(jl_is_datatype(dt));
-            tn = dt->name;
-            backref_list.items[pos] = tn;
-        }
-        return (jl_value_t*)tn;
-    }
-    jl_set_typeof(v, dt);
-    if ((jl_value_t*)dt == jl_bigint_type) {
-        jl_value_t *sizefield = jl_deserialize_value(s, NULL);
-        int32_t sz = jl_unbox_int32(sizefield);
-        int32_t nw = (sz == 0 ? 1 : (sz < 0 ? -sz : sz));
-        size_t nb = nw * gmp_limb_size;
-        void *buf = jl_gc_counted_malloc(nb);
-        if (buf == NULL)
-            jl_throw(jl_memory_exception);
-        ios_readall(s->s, (char*)buf, nb);
-        jl_set_nth_field(v, 0, jl_box_int32(nw));
-        jl_set_nth_field(v, 1, sizefield);
-        jl_set_nth_field(v, 2, jl_box_voidpointer(buf));
-    }
-    else {
-        jl_deserialize_struct(s, v);
-    }
-    return v;
-}
-
-static jl_value_t *jl_deserialize_value(jl_serializer_state *s, jl_value_t **loc) JL_GC_DISABLED
-{
-    assert(!ios_eof(s->s));
-    jl_value_t *v;
-    size_t n;
-    uintptr_t pos;
-    uint8_t tag = read_uint8(s->s);
-    if (tag > LAST_TAG)
-        return deser_tag[tag];
-    switch (tag) {
-    case TAG_NULL: return NULL;
-    case 0:
-        tag = read_uint8(s->s);
-        return deser_tag[tag];
-    case TAG_BACKREF: JL_FALLTHROUGH; case TAG_SHORT_BACKREF: ;
-        uintptr_t offs = (tag == TAG_BACKREF) ? read_int32(s->s) : read_uint16(s->s);
-        int isflagref = 0;
-        isflagref = !!(offs & 1);
-        offs >>= 1;
-        // assert(offs >= 0); // offs is unsigned so this is always true
-        assert(offs < backref_list.len);
-        jl_value_t *bp = (jl_value_t*)backref_list.items[offs];
-        assert(bp);
-        if (isflagref && loc != HT_NOTFOUND) {
-            if (loc != NULL) {
-                // as in jl_deserialize_value_singleton, the caller won't have a place to
-                // store this reference given a field type like Union{Int,Nothing}
-                arraylist_push(&flagref_list, loc);
-                arraylist_push(&flagref_list, (void*)(uintptr_t)-1);
-            }
-        }
-        return (jl_value_t*)bp;
-    case TAG_SVEC: JL_FALLTHROUGH; case TAG_LONG_SVEC:
-        return jl_deserialize_value_svec(s, tag, loc);
-    case TAG_COMMONSYM:
-        return deser_symbols[read_uint8(s->s)];
-    case TAG_SYMBOL: JL_FALLTHROUGH; case TAG_LONG_SYMBOL:
-        return jl_deserialize_value_symbol(s, tag);
-    case TAG_ARRAY: JL_FALLTHROUGH; case TAG_ARRAY1D:
-        return jl_deserialize_value_array(s, tag);
-    case TAG_UNIONALL:
-        pos = backref_list.len;
-        arraylist_push(&backref_list, NULL);
-        if (read_uint8(s->s)) {
-            jl_module_t *m = (jl_module_t*)jl_deserialize_value(s, NULL);
-            jl_sym_t *sym = (jl_sym_t*)jl_deserialize_value(s, NULL);
-            jl_value_t *v = jl_get_global(m, sym);
-            assert(jl_is_unionall(v));
-            backref_list.items[pos] = v;
-            return v;
-        }
-        v = jl_gc_alloc(s->ptls, sizeof(jl_unionall_t), jl_unionall_type);
-        backref_list.items[pos] = v;
-        ((jl_unionall_t*)v)->var = (jl_tvar_t*)jl_deserialize_value(s, (jl_value_t**)&((jl_unionall_t*)v)->var);
-        jl_gc_wb(v, ((jl_unionall_t*)v)->var);
-        ((jl_unionall_t*)v)->body = jl_deserialize_value(s, &((jl_unionall_t*)v)->body);
-        jl_gc_wb(v, ((jl_unionall_t*)v)->body);
-        return v;
-    case TAG_TVAR:
-        v = jl_gc_alloc(s->ptls, sizeof(jl_tvar_t), jl_tvar_type);
-        jl_tvar_t *tv = (jl_tvar_t*)v;
-        arraylist_push(&backref_list, tv);
-        tv->name = (jl_sym_t*)jl_deserialize_value(s, NULL);
-        jl_gc_wb(tv, tv->name);
-        tv->lb = jl_deserialize_value(s, &tv->lb);
-        jl_gc_wb(tv, tv->lb);
-        tv->ub = jl_deserialize_value(s, &tv->ub);
-        jl_gc_wb(tv, tv->ub);
-        return (jl_value_t*)tv;
-    case TAG_METHOD:
-        return jl_deserialize_value_method(s, loc);
-    case TAG_METHOD_INSTANCE:
-        return jl_deserialize_value_method_instance(s, loc);
-    case TAG_CODE_INSTANCE:
-        return jl_deserialize_value_code_instance(s, loc);
-    case TAG_MODULE:
-        return jl_deserialize_value_module(s);
-    case TAG_SHORTER_INT64:
-        v = jl_box_int64((int16_t)read_uint16(s->s));
-        arraylist_push(&backref_list, v);
-        return v;
-    case TAG_SHORT_INT64:
-        v = jl_box_int64(read_int32(s->s));
-        arraylist_push(&backref_list, v);
-        return v;
-    case TAG_INT64:
-        v = jl_box_int64((int64_t)read_uint64(s->s));
-        arraylist_push(&backref_list, v);
-        return v;
-    case TAG_SHORT_INT32:
-        v = jl_box_int32((int16_t)read_uint16(s->s));
-        arraylist_push(&backref_list, v);
-        return v;
-    case TAG_INT32:
-        v = jl_box_int32(read_int32(s->s));
-        arraylist_push(&backref_list, v);
-        return v;
-    case TAG_UINT8:
-        return jl_box_uint8(read_uint8(s->s));
-    case TAG_SINGLETON:
-        return jl_deserialize_value_singleton(s, loc);
-    case TAG_CORE:
-        return (jl_value_t*)jl_core_module;
-    case TAG_BASE:
-        return (jl_value_t*)jl_base_module;
-    case TAG_CNULL:
-        v = jl_gc_alloc(s->ptls, sizeof(void*), NULL);
-        jl_set_typeof(v, (void*)(intptr_t)0x50);
-        *(void**)v = NULL;
-        uintptr_t pos = backref_list.len;
-        arraylist_push(&backref_list, v);
-        jl_set_typeof(v, jl_deserialize_value(s, &jl_astaggedvalue(v)->type));
-        return v;
-    case TAG_BITYPENAME:
-        v = deser_tag[read_uint8(s->s)];
-        return (jl_value_t*)((jl_datatype_t*)jl_unwrap_unionall(v))->name;
-    case TAG_STRING:
-        n = read_int32(s->s);
-        v = jl_alloc_string(n);
-        arraylist_push(&backref_list, v);
-        ios_readall(s->s, jl_string_data(v), n);
-        return v;
-    case TAG_DATATYPE:
-        pos = backref_list.len;
-        arraylist_push(&backref_list, NULL);
-        return jl_deserialize_datatype(s, pos, loc);
-    default:
-        assert(tag == TAG_GENERAL || tag == TAG_SHORT_GENERAL);
-        return jl_deserialize_value_any(s, tag, loc);
-    }
-}
-
-// Add methods to external (non-worklist-owned) functions
-static void jl_insert_methods(jl_array_t *list)
-{
-    size_t i, l = jl_array_len(list);
-    for (i = 0; i < l; i += 2) {
-        jl_method_t *meth = (jl_method_t*)jl_array_ptr_ref(list, i);
-        assert(jl_is_method(meth));
-        assert(!meth->is_for_opaque_closure);
-        jl_tupletype_t *simpletype = (jl_tupletype_t*)jl_array_ptr_ref(list, i + 1);
-        jl_methtable_t *mt = jl_method_get_table(meth);
-        assert((jl_value_t*)mt != jl_nothing);
-        jl_method_table_insert(mt, meth, simpletype);
-    }
-}
-
-void remove_code_instance_from_validation(jl_code_instance_t *codeinst)
-{
-    ptrhash_remove(&new_code_instance_validate, codeinst);
-}
-
-static void jl_insert_method_instances(jl_array_t *list)
-{
-    size_t i, l = jl_array_len(list);
-    // Validate the MethodInstances
-    jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l);
-    memset(jl_array_data(valids), 1, l);
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
-    for (i = 0; i < l; i++) {
-        jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
-        assert(jl_is_method_instance(mi));
-        if (jl_is_method(mi->def.method)) {
-            // Is this still the method we'd be calling?
-            jl_methtable_t *mt = jl_method_table_for(mi->specTypes);
-            struct jl_typemap_assoc search = {(jl_value_t*)mi->specTypes, world, NULL, 0, ~(size_t)0};
-            jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(mt->defs, &search, /*offs*/0, /*subtype*/1);
-            if (entry) {
-                jl_value_t *mworld = entry->func.value;
-                if (jl_is_method(mworld) && mi->def.method != (jl_method_t*)mworld && jl_type_morespecific(((jl_method_t*)mworld)->sig, mi->def.method->sig)) {
-                    jl_array_uint8_set(valids, i, 0);
-                    invalidate_backedges(&remove_code_instance_from_validation, mi, world, "jl_insert_method_instance");
-                    // The codeinst of this mi haven't yet been removed
-                    jl_code_instance_t *codeinst = mi->cache;
-                    while (codeinst) {
-                        remove_code_instance_from_validation(codeinst);
-                        codeinst = codeinst->next;
-                    }
-                    if (_jl_debug_method_invalidation) {
-                        jl_array_ptr_1d_push(_jl_debug_method_invalidation, mworld);
-                        jl_array_ptr_1d_push(_jl_debug_method_invalidation, jl_cstr_to_string("jl_method_table_insert")); // GC disabled
-                    }
-                }
-            }
-        }
-    }
-    // While it's tempting to just remove the invalidated MIs altogether,
-    // this hurts the ability of SnoopCompile to diagnose problems.
-    for (i = 0; i < l; i++) {
-        jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
-        jl_method_instance_t *milive = jl_specializations_get_or_insert(mi);
-        ptrhash_put(&uniquing_table, mi, milive);  // store the association for the 2nd pass
-    }
-    // We may need to fix up the backedges for the ones that didn't "go live"
-    for (i = 0; i < l; i++) {
-        jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
-        jl_method_instance_t *milive = (jl_method_instance_t*)ptrhash_get(&uniquing_table, mi);
-        if (milive != mi) {
-            // A previously-loaded module compiled this method, so the one we deserialized will be dropped.
-            // But make sure the backedges are copied over.
-            if (mi->backedges) {
-                if (!milive->backedges) {
-                    // Copy all the backedges (after looking up the live ones)
-                    size_t j, n = jl_array_len(mi->backedges);
-                    milive->backedges = jl_alloc_vec_any(n);
-                    jl_gc_wb(milive, milive->backedges);
-                    for (j = 0; j < n; j++) {
-                        jl_method_instance_t *be = (jl_method_instance_t*)jl_array_ptr_ref(mi->backedges, j);
-                        jl_method_instance_t *belive = (jl_method_instance_t*)ptrhash_get(&uniquing_table, be);
-                        if (belive == HT_NOTFOUND)
-                            belive = be;
-                        jl_array_ptr_set(milive->backedges, j, belive);
-                    }
-                } else {
-                    // Copy the missing backedges (this is an O(N^2) algorithm, but many methods have few MethodInstances)
-                    size_t j, k, n = jl_array_len(mi->backedges), nlive = jl_array_len(milive->backedges);
-                    for (j = 0; j < n; j++) {
-                        jl_method_instance_t *be = (jl_method_instance_t*)jl_array_ptr_ref(mi->backedges, j);
-                        jl_method_instance_t *belive = (jl_method_instance_t*)ptrhash_get(&uniquing_table, be);
-                        if (belive == HT_NOTFOUND)
-                            belive = be;
-                        int found = 0;
-                        for (k = 0; k < nlive; k++) {
-                            if (belive == (jl_method_instance_t*)jl_array_ptr_ref(milive->backedges, k)) {
-                                found = 1;
-                                break;
-                            }
-                        }
-                        if (!found)
-                            jl_array_ptr_1d_push(milive->backedges, (jl_value_t*)belive);
-                    }
-                }
-            }
-            // Additionally, if we have CodeInstance(s) and the running CodeInstance is world-limited, transfer it
-            if (mi->cache && jl_array_uint8_ref(valids, i)) {
-                if (!milive->cache || milive->cache->max_world < ~(size_t)0) {
-                    jl_code_instance_t *cilive = milive->cache, *ci;
-                    milive->cache = mi->cache;
-                    jl_gc_wb(milive, milive->cache);
-                    ci = mi->cache;
-                    ci->def = milive;
-                    while (ci->next) {
-                        ci = ci->next;
-                        ci->def = milive;
-                    }
-                    ci->next = cilive;
-                    jl_gc_wb(ci, ci->next);
-                }
-            }
-        }
-    }
-}
-
-// verify that these edges intersect with the same methods as before
-static void jl_verify_edges(jl_array_t *targets, jl_array_t **pvalids)
-{
-    size_t i, l = jl_array_len(targets) / 2;
-    jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l);
-    memset(jl_array_data(valids), 1, l);
-    jl_value_t *loctag = NULL;
-    JL_GC_PUSH1(&loctag);
-    *pvalids = valids;
-    for (i = 0; i < l; i++) {
-        jl_value_t *callee = jl_array_ptr_ref(targets, i * 2);
-        jl_method_instance_t *callee_mi = (jl_method_instance_t*)callee;
-        jl_value_t *sig;
-        if (jl_is_method_instance(callee)) {
-            sig = callee_mi->specTypes;
-        }
-        else {
-            sig = callee;
-        }
-        jl_array_t *expected = (jl_array_t*)jl_array_ptr_ref(targets, i * 2 + 1);
-        assert(jl_is_array(expected));
-        int valid = 1;
-        size_t min_valid = 0;
-        size_t max_valid = ~(size_t)0;
-        int ambig = 0;
-        // TODO: possibly need to included ambiguities too (for the optimizer correctness)?
-        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, -1, 0, jl_atomic_load_acquire(&jl_world_counter), &min_valid, &max_valid, &ambig);
-        if (matches == jl_false || jl_array_len(matches) != jl_array_len(expected)) {
-            valid = 0;
-        }
-        else {
-            size_t j, k, l = jl_array_len(expected);
-            for (k = 0; k < jl_array_len(matches); k++) {
-                jl_method_match_t *match = (jl_method_match_t*)jl_array_ptr_ref(matches, k);
-                jl_method_t *m = match->method;
-                for (j = 0; j < l; j++) {
-                    if (m == (jl_method_t*)jl_array_ptr_ref(expected, j))
-                        break;
-                }
-                if (j == l) {
-                    // intersection has a new method or a method was
-                    // deleted--this is now probably no good, just invalidate
-                    // everything about it now
-                    valid = 0;
-                    break;
-                }
-            }
-        }
-        jl_array_uint8_set(valids, i, valid);
-        if (!valid && _jl_debug_method_invalidation) {
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)callee);
-            loctag = jl_cstr_to_string("insert_backedges_callee");
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-        }
-    }
-    JL_GC_POP();
-}
-
-// Restore backedges to external targets
-// `targets` is [callee1, matches1, ...], the global set of non-worklist callees of worklist-owned methods.
-// `list` = [caller1, targets_indexes1, ...], the list of worklist-owned methods calling external methods.
-static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
-{
-    // map(enable, ((list[i] => targets[list[i + 1] .* 2]) for i in 1:2:length(list) if all(valids[list[i + 1]])))
-    size_t i, l = jl_array_len(list);
-    jl_array_t *valids = NULL;
-    jl_value_t *loctag = NULL;
-    JL_GC_PUSH2(&valids, &loctag);
-    jl_verify_edges(targets, &valids);
-    for (i = 0; i < l; i += 2) {
-        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
-        assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
-        jl_array_t *idxs_array = (jl_array_t*)jl_array_ptr_ref(list, i + 1);
-        assert(jl_isa((jl_value_t*)idxs_array, jl_array_int32_type));
-        int32_t *idxs = (int32_t*)jl_array_data(idxs_array);
-        int valid = 1;
-        size_t j;
-        for (j = 0; valid && j < jl_array_len(idxs_array); j++) {
-            int32_t idx = idxs[j];
-            valid = jl_array_uint8_ref(valids, idx);
-        }
-        if (valid) {
-            // if this callee is still valid, add all the backedges
-            for (j = 0; j < jl_array_len(idxs_array); j++) {
-                int32_t idx = idxs[j];
-                jl_value_t *callee = jl_array_ptr_ref(targets, idx * 2);
-                if (jl_is_method_instance(callee)) {
-                    jl_method_instance_add_backedge((jl_method_instance_t*)callee, caller);
-                }
-                else {
-                    jl_methtable_t *mt = jl_method_table_for(callee);
-                    // FIXME: rarely, `callee` has an unexpected `Union` signature,
-                    // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344
-                    // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)`
-                    // This workaround exposes us to (rare) 265-violations.
-                    if ((jl_value_t*)mt != jl_nothing)
-                        jl_method_table_add_backedge(mt, callee, (jl_value_t*)caller);
-                }
-            }
-            // then enable it
-            jl_code_instance_t *codeinst = caller->cache;
-            while (codeinst) {
-                if (ptrhash_get(&new_code_instance_validate, codeinst) != HT_NOTFOUND && codeinst->min_world > 0)
-                    codeinst->max_world = ~(size_t)0;
-                ptrhash_remove(&new_code_instance_validate, codeinst);  // mark it as handled
-                codeinst = jl_atomic_load_relaxed(&codeinst->next);
-            }
-        }
-        else {
-            jl_code_instance_t *codeinst = caller->cache;
-            while (codeinst) {
-                ptrhash_remove(&new_code_instance_validate, codeinst);  // should be left invalid
-                codeinst = jl_atomic_load_relaxed(&codeinst->next);
-            }
-            if (_jl_debug_method_invalidation) {
-                jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller);
-                loctag = jl_cstr_to_string("insert_backedges");
-                jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-            }
-        }
-    }
-    JL_GC_POP();
-}
-
-static void validate_new_code_instances(void)
-{
-    size_t i;
-    for (i = 0; i < new_code_instance_validate.size; i += 2) {
-        if (new_code_instance_validate.table[i+1] != HT_NOTFOUND) {
-            ((jl_code_instance_t*)new_code_instance_validate.table[i])->max_world = ~(size_t)0;
-        }
-    }
-}
-
-static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *mod_list)
-{
-    if (!jl_main_module->build_id) {
-        return jl_get_exceptionf(jl_errorexception_type,
-                "Main module uuid state is invalid for module deserialization.");
-    }
-    size_t i, l = jl_array_len(mod_list);
-    for (i = 0; ; i++) {
-        size_t len = read_int32(s);
-        if (len == 0 && i == l)
-            return NULL; // success
-        if (len == 0 || i == l)
-            return jl_get_exceptionf(jl_errorexception_type, "Wrong number of entries in module list.");
-        char *name = (char*)alloca(len + 1);
-        ios_readall(s, name, len);
-        name[len] = '\0';
-        jl_uuid_t uuid;
-        uuid.hi = read_uint64(s);
-        uuid.lo = read_uint64(s);
-        uint64_t build_id = read_uint64(s);
-        jl_sym_t *sym = _jl_symbol(name, len);
-        jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_list, i);
-        if (!m || !jl_is_module(m) || m->uuid.hi != uuid.hi || m->uuid.lo != uuid.lo || m->name != sym || m->build_id != build_id) {
-            return jl_get_exceptionf(jl_errorexception_type,
-                "Invalid input in module list: expected %s.", name);
-        }
-    }
-}
-
-static int readstr_verify(ios_t *s, const char *str)
-{
-    size_t i, len = strlen(str);
-    for (i = 0; i < len; ++i)
-        if ((char)read_uint8(s) != str[i])
-            return 0;
-    return 1;
-}
-
-JL_DLLEXPORT int jl_read_verify_header(ios_t *s)
-{
-    uint16_t bom;
-    return (readstr_verify(s, JI_MAGIC) &&
-            read_uint16(s) == JI_FORMAT_VERSION &&
-            ios_read(s, (char *) &bom, 2) == 2 && bom == BOM &&
-            read_uint8(s) == sizeof(void*) &&
-            readstr_verify(s, JL_BUILD_UNAME) && !read_uint8(s) &&
-            readstr_verify(s, JL_BUILD_ARCH) && !read_uint8(s) &&
-            readstr_verify(s, JULIA_VERSION_STRING) && !read_uint8(s) &&
-            readstr_verify(s, jl_git_branch()) && !read_uint8(s) &&
-            readstr_verify(s, jl_git_commit()) && !read_uint8(s));
-}
-
-static void jl_finalize_serializer(jl_serializer_state *s)
-{
-    size_t i, l;
-    // save module initialization order
-    if (jl_module_init_order != NULL) {
-        l = jl_array_len(jl_module_init_order);
-        for (i = 0; i < l; i++) {
-            // verify that all these modules were saved
-            assert(ptrhash_get(&backref_table, jl_array_ptr_ref(jl_module_init_order, i)) != HT_NOTFOUND);
-        }
-    }
-    jl_serialize_value(s, jl_module_init_order);
-
-    // record list of reinitialization functions
-    l = reinit_list.len;
-    for (i = 0; i < l; i += 2) {
-        write_int32(s->s, (int)((uintptr_t) reinit_list.items[i]));
-        write_int32(s->s, (int)((uintptr_t) reinit_list.items[i+1]));
-    }
-    write_int32(s->s, -1);
-}
-
-static void jl_reinit_item(jl_value_t *v, int how, arraylist_t *tracee_list)
-{
-    JL_TRY {
-        switch (how) {
-            case 1: { // rehash IdDict
-                jl_array_t **a = (jl_array_t**)v;
-                // Assume *a don't need a write barrier
-                *a = jl_idtable_rehash(*a, jl_array_len(*a));
-                jl_gc_wb(v, *a);
-                break;
-            }
-            case 2: { // reinsert module v into parent (const)
-                jl_module_t *mod = (jl_module_t*)v;
-                if (mod->parent == mod) // top level modules handled by loader
-                    break;
-                jl_binding_t *b = jl_get_binding_wr(mod->parent, mod->name, 1); // this can throw
-                jl_declare_constant(b); // this can also throw
-                if (b->value != NULL) {
-                    if (!jl_is_module(b->value)) {
-                        jl_errorf("Invalid redefinition of constant %s.",
-                                  jl_symbol_name(mod->name)); // this also throws
-                    }
-                    if (jl_generating_output() && jl_options.incremental) {
-                        jl_errorf("Cannot replace module %s during incremental precompile.", jl_symbol_name(mod->name));
-                    }
-                    jl_printf(JL_STDERR, "WARNING: replacing module %s.\n", jl_symbol_name(mod->name));
-                }
-                b->value = v;
-                jl_gc_wb_binding(b, v);
-                break;
-            }
-            case 3: { // rehash MethodTable
-                jl_methtable_t *mt = (jl_methtable_t*)v;
-                if (tracee_list)
-                    arraylist_push(tracee_list, mt);
-                break;
-            }
-            default:
-                assert(0 && "corrupt deserialization state");
-                abort();
-        }
-    }
-    JL_CATCH {
-        jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: error while reinitializing value ");
-        jl_static_show((JL_STREAM*)STDERR_FILENO, v);
-        jl_printf((JL_STREAM*)STDERR_FILENO, ":\n");
-        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
-        jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-        jlbacktrace(); // written to STDERR_FILENO
-    }
-}
-
-static jl_array_t *jl_finalize_deserializer(jl_serializer_state *s, arraylist_t *tracee_list)
-{
-    jl_array_t *init_order = (jl_array_t*)jl_deserialize_value(s, NULL);
-
-    // run reinitialization functions
-    int pos = read_int32(s->s);
-    while (pos != -1) {
-        jl_reinit_item((jl_value_t*)backref_list.items[pos], read_int32(s->s), tracee_list);
-        pos = read_int32(s->s);
-    }
-    return init_order;
-}
-
-JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order)
-{
-    int i, l = jl_array_len(init_order);
-    for (i = 0; i < l; i++) {
-        jl_value_t *mod = jl_array_ptr_ref(init_order, i);
-        if (!jl_generating_output() || jl_options.incremental) {
-            jl_module_run_initializer((jl_module_t*)mod);
-        }
-        else {
-            if (jl_module_init_order == NULL)
-                jl_module_init_order = jl_alloc_vec_any(0);
-            jl_array_ptr_1d_push(jl_module_init_order, mod);
-        }
-    }
-}
-
-
-// --- entry points ---
-
-// Register all newly-inferred MethodInstances
-// This gets called as the final step of Base.include_package_for_output
-JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred)
-{
-    assert(_newly_inferred == NULL || jl_is_array(_newly_inferred));
-    newly_inferred = (jl_array_t*) _newly_inferred;
-}
-
-// Serialize the modules in `worklist` to file `fname`
-JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
-{
-    JL_TIMING(SAVE_MODULE);
-    ios_t f;
-    jl_array_t *mod_array = NULL, *udeps = NULL;
-    if (ios_file(&f, fname, 1, 1, 1, 1) == NULL) {
-        jl_printf(JL_STDERR, "Cannot open cache file \"%s\" for writing.\n", fname);
-        return 1;
-    }
-    JL_GC_PUSH2(&mod_array, &udeps);
-    mod_array = jl_get_loaded_modules();  // __toplevel__ modules loaded in this session (from Base.loaded_modules_array)
-    assert(jl_precompile_toplevel_module == NULL);
-    jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1);
-
-    serializer_worklist = worklist;
-    write_header(&f);
-    // write description of contents (name, uuid, buildid)
-    write_work_list(&f);
-    // Determine unique (module, abspath, mtime) dependencies for the files defining modules in the worklist
-    // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header.
-    // Also write Preferences.
-    int64_t srctextpos = write_dependency_list(&f, &udeps);  // srctextpos: position of srctext entry in header index (update later)
-    // write description of requirements for loading (modules that must be pre-loaded if initialization is to succeed)
-    // this can return errors during deserialize,
-    // best to keep it early (before any actual initialization)
-    write_mod_list(&f, mod_array);
-
-    arraylist_new(&reinit_list, 0);
-    htable_new(&edges_map, 0);
-    htable_new(&backref_table, 5000);
-    htable_new(&external_mis, newly_inferred ? jl_array_len(newly_inferred) : 0);
-    ptrhash_put(&backref_table, jl_main_module, (char*)HT_NOTFOUND + 1);
-    backref_table_numel = 1;
-    jl_idtable_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("IdDict")) : NULL;
-    jl_idtable_typename = jl_base_module ? ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_idtable_type))->name : NULL;
-    jl_bigint_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("BigInt")) : NULL;
-    if (jl_bigint_type) {
-        gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")),
-                                                    jl_symbol("BITS_PER_LIMB"))) / 8;
-    }
-
-    int en = jl_gc_enable(0); // edges map is not gc-safe
-    jl_array_t *extext_methods = jl_alloc_vec_any(0);  // [method1, simplesig1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist
-    jl_array_t *ext_targets = jl_alloc_vec_any(0);     // [callee1, matches1, ...] non-worklist callees of worklist-owned methods
-    jl_array_t *edges = jl_alloc_vec_any(0);           // [caller1, ext_targets_indexes1, ...] for worklist-owned methods calling external methods
-
-    int n_ext_mis = queue_external_mis(newly_inferred);
-
-    size_t i;
-    size_t len = jl_array_len(mod_array);
-    for (i = 0; i < len; i++) {
-        jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i);
-        assert(jl_is_module(m));
-        if (m->parent == m) // some toplevel modules (really just Base) aren't actually
-            jl_collect_extext_methods_from_mod(extext_methods, m);
-    }
-    jl_collect_methtable_from_mod(extext_methods, jl_type_type_mt);
-    jl_collect_missing_backedges_to_mod(jl_type_type_mt);
-    jl_collect_methtable_from_mod(extext_methods, jl_nonfunction_mt);
-    jl_collect_missing_backedges_to_mod(jl_nonfunction_mt);
-
-    // jl_collect_extext_methods_from_mod and jl_collect_missing_backedges_to_mod accumulate data in edges_map.
-    // Process this to extract `edges` and `ext_targets`.
-    jl_collect_backedges(edges, ext_targets);
-
-    jl_serializer_state s = {
-        &f,
-        jl_current_task->ptls,
-        mod_array
-    };
-    jl_serialize_value(&s, worklist);   // serialize module-owned items (those accessible from the bindings table)
-    jl_serialize_value(&s, extext_methods);  // serialize new worklist-owned methods for external functions
-    serialize_htable_keys(&s, &external_mis, n_ext_mis);  // serialize external MethodInstances
-
-    // The next two allow us to restore backedges from external "unserialized" (stub-serialized) MethodInstances
-    // to the ones we serialize here
-    jl_serialize_value(&s, edges);
-    jl_serialize_value(&s, ext_targets);
-    jl_finalize_serializer(&s);
-    serializer_worklist = NULL;
-
-    jl_gc_enable(en);
-    htable_reset(&edges_map, 0);
-    htable_reset(&backref_table, 0);
-    htable_reset(&external_mis, 0);
-    arraylist_free(&reinit_list);
-
-    // Write the source-text for the dependent files
-    if (udeps) {
-        // Go back and update the source-text position to point to the current position
-        int64_t posfile = ios_pos(&f);
-        ios_seek(&f, srctextpos);
-        write_int64(&f, posfile);
-        ios_seek_end(&f);
-        // Each source-text file is written as
-        //   int32: length of abspath
-        //   char*: abspath
-        //   uint64: length of src text
-        //   char*: src text
-        // At the end we write int32(0) as a terminal sentinel.
-        len = jl_array_len(udeps);
-        ios_t srctext;
-        for (i = 0; i < len; i++) {
-            jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
-            jl_value_t *depmod = jl_fieldref(deptuple, 0);  // module
-            // Dependencies declared with `include_dependency` are excluded
-            // because these may not be Julia code (and could be huge)
-            if (depmod != (jl_value_t*)jl_main_module) {
-                jl_value_t *dep = jl_fieldref(deptuple, 1);  // file abspath
-                const char *depstr = jl_string_data(dep);
-                if (!depstr[0])
-                    continue;
-                ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0);
-                if (!srctp) {
-                    jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n",
-                              jl_string_data(dep));
-                    continue;
-                }
-                size_t slen = jl_string_len(dep);
-                write_int32(&f, slen);
-                ios_write(&f, depstr, slen);
-                posfile = ios_pos(&f);
-                write_uint64(&f, 0);   // placeholder for length of this file in bytes
-                uint64_t filelen = (uint64_t) ios_copyall(&f, &srctext);
-                ios_close(&srctext);
-                ios_seek(&f, posfile);
-                write_uint64(&f, filelen);
-                ios_seek_end(&f);
-            }
-        }
-    }
-    write_int32(&f, 0); // mark the end of the source text
-    ios_close(&f);
-    JL_GC_POP();
-    jl_precompile_toplevel_module = NULL;
-
-    return 0;
-}
-
-#ifndef JL_NDEBUG
-// skip the performance optimizations of jl_types_equal and just use subtyping directly
-// one of these types is invalid - that's why we're doing the recache type operation
-static int jl_invalid_types_equal(jl_datatype_t *a, jl_datatype_t *b)
-{
-    return jl_subtype((jl_value_t*)a, (jl_value_t*)b) && jl_subtype((jl_value_t*)b, (jl_value_t*)a);
-}
-STATIC_INLINE jl_value_t *verify_type(jl_value_t *v) JL_NOTSAFEPOINT
-{
-    assert(v && jl_typeof(v) && jl_typeof(jl_typeof(v)) == (jl_value_t*)jl_datatype_type);
-    return v;
-}
-#endif
-
-
-static jl_datatype_t *recache_datatype(jl_datatype_t *dt) JL_GC_DISABLED;
-
-static jl_value_t *recache_type(jl_value_t *p) JL_GC_DISABLED
-{
-    if (jl_is_datatype(p)) {
-        jl_datatype_t *pdt = (jl_datatype_t*)p;
-        if (ptrhash_get(&uniquing_table, p) != HT_NOTFOUND) {
-            p = (jl_value_t*)recache_datatype(pdt);
-        }
-        else {
-            jl_svec_t *tt = pdt->parameters;
-            // ensure all type parameters are recached
-            size_t i, l = jl_svec_len(tt);
-            for (i = 0; i < l; i++)
-                jl_svecset(tt, i, recache_type(jl_svecref(tt, i)));
-            ptrhash_put(&uniquing_table, p, p); // ensures this algorithm isn't too exponential
-        }
-    }
-    else if (jl_is_typevar(p)) {
-        jl_tvar_t *ptv = (jl_tvar_t*)p;
-        ptv->lb = recache_type(ptv->lb);
-        ptv->ub = recache_type(ptv->ub);
-    }
-    else if (jl_is_uniontype(p)) {
-        jl_uniontype_t *pu = (jl_uniontype_t*)p;
-        pu->a = recache_type(pu->a);
-        pu->b = recache_type(pu->b);
-    }
-    else if (jl_is_unionall(p)) {
-        jl_unionall_t *pa = (jl_unionall_t*)p;
-        pa->var = (jl_tvar_t*)recache_type((jl_value_t*)pa->var);
-        pa->body = recache_type(pa->body);
-    }
-    else {
-        jl_datatype_t *pt = (jl_datatype_t*)jl_typeof(p);
-        jl_datatype_t *cachep = recache_datatype(pt);
-        if (cachep->instance)
-            p = cachep->instance;
-        else if (pt != cachep)
-            jl_set_typeof(p, cachep);
-    }
-    return p;
-}
-
-// Extract pre-existing datatypes from cache, and insert new types into cache
-// insertions also update uniquing_table
-static jl_datatype_t *recache_datatype(jl_datatype_t *dt) JL_GC_DISABLED
-{
-    jl_datatype_t *t; // the type after unique'ing
-    assert(verify_type((jl_value_t*)dt));
-    t = (jl_datatype_t*)ptrhash_get(&uniquing_table, dt);
-    if (t == HT_NOTFOUND)
-        return dt;
-    if (t != NULL)
-        return t;
-
-    jl_svec_t *tt = dt->parameters;
-    // recache all type parameters
-    size_t i, l = jl_svec_len(tt);
-    for (i = 0; i < l; i++)
-        jl_svecset(tt, i, recache_type(jl_svecref(tt, i)));
-
-    // then recache the type itself
-    if (jl_svec_len(tt) == 0) { // jl_cache_type doesn't work if length(parameters) == 0
-        t = dt;
-    }
-    else {
-        t = jl_lookup_cache_type_(dt);
-        if (t == NULL) {
-            jl_cache_type_(dt);
-            t = dt;
-        }
-        assert(t->hash == dt->hash);
-        assert(jl_invalid_types_equal(t, dt));
-    }
-    ptrhash_put(&uniquing_table, dt, t);
-    return t;
-}
-
-// Recache everything from flagref_list except methods and method instances
-// Cleans out any handled items so that anything left in flagref_list still needs future processing
-static void jl_recache_types(void) JL_GC_DISABLED
-{
-    size_t i;
-    // first rewrite all the unique'd objects
-    for (i = 0; i < flagref_list.len; i += 2) {
-        jl_value_t **loc = (jl_value_t**)flagref_list.items[i + 0];
-        int offs = (int)(intptr_t)flagref_list.items[i + 1];
-        jl_value_t *o = loc ? *loc : (jl_value_t*)backref_list.items[offs];
-        if (!jl_is_method(o) && !jl_is_method_instance(o)) {
-            jl_datatype_t *dt;
-            jl_value_t *v;
-            if (jl_is_datatype(o)) {
-                dt = (jl_datatype_t*)o;
-                v = dt->instance;
-            }
-            else {
-                dt = (jl_datatype_t*)jl_typeof(o);
-                v = o;
-            }
-            jl_datatype_t *t = recache_datatype(dt); // get or create cached type (also updates uniquing_table)
-            if ((jl_value_t*)dt == o && t != dt) {
-                assert(!type_in_worklist(dt));
-                if (loc)
-                    *loc = (jl_value_t*)t;
-                if (offs > 0)
-                    backref_list.items[offs] = t;
-            }
-            if (v == o && t->instance != v) {
-                assert(t->instance);
-                assert(loc);
-                *loc = t->instance;
-                if (offs > 0)
-                    backref_list.items[offs] = t->instance;
-            }
-        }
-    }
-    // invalidate the old datatypes to help catch errors
-    for (i = 0; i < uniquing_table.size; i += 2) {
-        jl_datatype_t *o = (jl_datatype_t*)uniquing_table.table[i];      // deserialized ref
-        jl_datatype_t *t = (jl_datatype_t*)uniquing_table.table[i + 1];  // the real type
-        if (o != t) {
-            assert(t != NULL && jl_is_datatype(o));
-            if (t->instance != o->instance)
-                jl_set_typeof(o->instance, (void*)(intptr_t)0x20);
-            jl_set_typeof(o, (void*)(intptr_t)0x10);
-        }
-    }
-    // then do a cleanup pass to drop these from future iterations of flagref_list
-    i = 0;
-    while (i < flagref_list.len) {
-        jl_value_t **loc = (jl_value_t**)flagref_list.items[i + 0];
-        int offs = (int)(intptr_t)flagref_list.items[i + 1];
-        jl_value_t *o = loc ? *loc : (jl_value_t*)backref_list.items[offs];
-        if (jl_is_method(o) || jl_is_method_instance(o)) {
-            i += 2;
-        }
-        else {
-            // delete this item from the flagref list, so it won't be re-encountered later
-            flagref_list.len -= 2;
-            if (i >= flagref_list.len)
-                break;
-            flagref_list.items[i + 0] = flagref_list.items[flagref_list.len + 0];  // move end-of-list here (executes a `reverse()`)
-            flagref_list.items[i + 1] = flagref_list.items[flagref_list.len + 1];
-        }
-    }
-}
-
-// look up a method from a previously deserialized dependent module
-static jl_method_t *jl_lookup_method(jl_methtable_t *mt, jl_datatype_t *sig, size_t world)
-{
-    if (world < jl_main_module->primary_world)
-        world = jl_main_module->primary_world;
-    struct jl_typemap_assoc search = {(jl_value_t*)sig, world, NULL, 0, ~(size_t)0};
-    jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(mt->defs, &search, /*offs*/0, /*subtype*/0);
-    return (jl_method_t*)entry->func.value;
-}
-
-static jl_method_t *jl_recache_method(jl_method_t *m)
-{
-    assert(!m->is_for_opaque_closure);
-    assert(jl_is_method(m));
-    jl_datatype_t *sig = (jl_datatype_t*)m->sig;
-    jl_methtable_t *mt = jl_method_get_table(m);
-    assert((jl_value_t*)mt != jl_nothing);
-    jl_set_typeof(m, (void*)(intptr_t)0x30); // invalidate the old value to help catch errors
-    return jl_lookup_method(mt, sig, m->module->primary_world);
-}
-
-static jl_value_t *jl_recache_other_(jl_value_t *o);
-
-static jl_method_instance_t *jl_recache_method_instance(jl_method_instance_t *mi)
-{
-    jl_method_t *m = mi->def.method;
-    m = (jl_method_t*)jl_recache_other_((jl_value_t*)m);
-    assert(jl_is_method(m));
-    jl_datatype_t *argtypes = (jl_datatype_t*)mi->specTypes;
-    jl_set_typeof(mi, (void*)(intptr_t)0x40); // invalidate the old value to help catch errors
-    jl_svec_t *env = jl_emptysvec;
-    jl_value_t *ti = jl_type_intersection_env((jl_value_t*)argtypes, (jl_value_t*)m->sig, &env);
-    //assert(ti != jl_bottom_type); (void)ti;
-    if (ti == jl_bottom_type)
-        env = jl_emptysvec; // the intersection may fail now if the type system had made an incorrect subtype env in the past
-    jl_method_instance_t *_new = jl_specializations_get_linfo(m, (jl_value_t*)argtypes, env);
-    return _new;
-}
-
-static jl_value_t *jl_recache_other_(jl_value_t *o)
-{
-    jl_value_t *newo = (jl_value_t*)ptrhash_get(&uniquing_table, o);
-    if (newo != HT_NOTFOUND)
-        return newo;
-    if (jl_is_method(o)) {
-        // lookup the real Method based on the placeholder sig
-        newo = (jl_value_t*)jl_recache_method((jl_method_t*)o);
-        ptrhash_put(&uniquing_table, newo, newo);
-    }
-    else if (jl_is_method_instance(o)) {
-        // lookup the real MethodInstance based on the placeholder specTypes
-        newo = (jl_value_t*)jl_recache_method_instance((jl_method_instance_t*)o);
-    }
-    else {
-        abort();
-    }
-    ptrhash_put(&uniquing_table, o, newo);
-    return newo;
-}
-
-static void jl_recache_other(void)
-{
-    size_t i = 0;
-    while (i < flagref_list.len) {
-        jl_value_t **loc = (jl_value_t**)flagref_list.items[i + 0];
-        int offs = (int)(intptr_t)flagref_list.items[i + 1];
-        jl_value_t *o = loc ? *loc : (jl_value_t*)backref_list.items[offs];
-        i += 2;
-        jl_value_t *newo = jl_recache_other_(o);
-        if (loc)
-            *loc = newo;
-        if (offs > 0)
-            backref_list.items[offs] = newo;
-    }
-    flagref_list.len = 0;
-}
-
-// Wait to copy roots until recaching is done
-// This is because recaching requires that all pointers to methods and methodinstances
-// stay at their source location as recorded by flagref_list. Once recaching is complete,
-// they can be safely copied over.
-static void jl_copy_roots(void)
-{
-    size_t i, j, l;
-    for (i = 0; i < queued_method_roots.size; i+=2) {
-        jl_method_t *m = (jl_method_t*)queued_method_roots.table[i];
-        m = (jl_method_t*)ptrhash_get(&uniquing_table, m);
-        jl_svec_t *keyroots = (jl_svec_t*)queued_method_roots.table[i+1];
-        if (keyroots != HT_NOTFOUND) {
-            uint64_t key = (uint64_t)(uintptr_t)jl_svec_ref(keyroots, 0) | ((uint64_t)(uintptr_t)jl_svec_ref(keyroots, 1) << 32);
-            jl_array_t *roots = (jl_array_t*)jl_svec_ref(keyroots, 2);
-            assert(jl_is_array(roots));
-            l = jl_array_len(roots);
-            for (j = 0; j < l; j++) {
-                jl_value_t *r = jl_array_ptr_ref(roots, j);
-                jl_value_t *newr = (jl_value_t*)ptrhash_get(&uniquing_table, r);
-                if (newr != HT_NOTFOUND) {
-                    jl_array_ptr_set(roots, j, newr);
-                }
-            }
-            jl_append_method_roots(m, key, roots);
-        }
-    }
-}
-
-static int trace_method(jl_typemap_entry_t *entry, void *closure)
-{
-    jl_call_tracer(jl_newmeth_tracer, (jl_value_t*)entry->func.method);
-    return 1;
-}
-
-// Restore module(s) from a cache file f
-static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array)
-{
-    JL_TIMING(LOAD_MODULE);
-    jl_task_t *ct = jl_current_task;
-    if (ios_eof(f) || !jl_read_verify_header(f)) {
-        ios_close(f);
-        return jl_get_exceptionf(jl_errorexception_type,
-                "Precompile file header verification checks failed.");
-    }
-    { // skip past the mod list
-        size_t len;
-        while ((len = read_int32(f)))
-            ios_skip(f, len + 3 * sizeof(uint64_t));
-    }
-    { // skip past the dependency list
-        size_t deplen = read_uint64(f);
-        ios_skip(f, deplen);
-    }
-
-    jl_bigint_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("BigInt")) : NULL;
-    if (jl_bigint_type) {
-        gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")),
-                                                    jl_symbol("BITS_PER_LIMB"))) / 8;
-    }
-
-    // verify that the system state is valid
-    jl_value_t *verify_fail = read_verify_mod_list(f, mod_array);
-    if (verify_fail) {
-        ios_close(f);
-        return verify_fail;
-    }
-
-    // prepare to deserialize
-    int en = jl_gc_enable(0);
-    jl_gc_enable_finalizers(ct, 0);
-    jl_atomic_fetch_add(&jl_world_counter, 1); // reserve a world age for the deserialization
-
-    arraylist_new(&backref_list, 4000);
-    arraylist_push(&backref_list, jl_main_module);
-    arraylist_new(&flagref_list, 0);
-    htable_new(&queued_method_roots, 0);
-    htable_new(&new_code_instance_validate, 0);
-    arraylist_new(&ccallable_list, 0);
-    htable_new(&uniquing_table, 0);
-
-    jl_serializer_state s = {
-        f,
-        ct->ptls,
-        mod_array
-    };
-    jl_array_t *restored = (jl_array_t*)jl_deserialize_value(&s, (jl_value_t**)&restored);
-    serializer_worklist = restored;
-    assert(jl_isa((jl_value_t*)restored, jl_array_any_type));
-
-    // See explanation in jl_save_incremental for variables of the same names
-    jl_value_t *extext_methods = jl_deserialize_value(&s, &extext_methods);
-    int i, n_ext_mis = read_int32(s.s);
-    jl_array_t *mi_list = jl_alloc_vec_any(n_ext_mis);   // reload MIs stored by serialize_htable_keys
-    jl_value_t **midata = (jl_value_t**)jl_array_data(mi_list);
-    for (i = 0; i < n_ext_mis; i++)
-        midata[i] = jl_deserialize_value(&s, &(midata[i]));
-    jl_value_t *edges = jl_deserialize_value(&s, &edges);
-    jl_value_t *ext_targets = jl_deserialize_value(&s, &ext_targets);
-
-    arraylist_t *tracee_list = NULL;
-    if (jl_newmeth_tracer)  // debugging
-        tracee_list = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
-
-    // at this point, the AST is fully reconstructed, but still completely disconnected
-    // now all of the interconnects will be created
-    jl_recache_types(); // make all of the types identities correct
-    jl_insert_methods((jl_array_t*)extext_methods); // hook up extension methods for external generic functions (needs to be after recache types)
-    jl_recache_other(); // make all of the other objects identities correct (needs to be after insert methods)
-    jl_copy_roots();    // copying new roots of external methods (must wait until recaching is complete)
-    // At this point, the novel specializations in mi_list reference the real method, but they haven't been cached in its specializations
-    jl_insert_method_instances(mi_list);   // insert novel specializations
-    htable_free(&uniquing_table);
-    jl_array_t *init_order = jl_finalize_deserializer(&s, tracee_list); // done with f and s (needs to be after recache)
-    if (init_order == NULL)
-        init_order = (jl_array_t*)jl_an_empty_vec_any;
-    assert(jl_isa((jl_value_t*)init_order, jl_array_any_type));
-
-    JL_GC_PUSH4(&init_order, &restored, &edges, &ext_targets);
-    jl_gc_enable(en); // subtyping can allocate a lot, not valid before recache-other
-
-    jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets); // restore external backedges (needs to be last)
-
-    // check new CodeInstances and validate any that lack external backedges
-    validate_new_code_instances();
-
-    serializer_worklist = NULL;
-    htable_free(&new_code_instance_validate);
-    arraylist_free(&flagref_list);
-    arraylist_free(&backref_list);
-    htable_free(&queued_method_roots);
-    ios_close(f);
-
-    jl_gc_enable_finalizers(ct, 1); // make sure we don't run any Julia code concurrently before this point
-    if (tracee_list) {
-        jl_methtable_t *mt;
-        while ((mt = (jl_methtable_t*)arraylist_pop(tracee_list)) != NULL) {
-            JL_GC_PROMISE_ROOTED(mt);
-            jl_typemap_visitor(mt->defs, trace_method, NULL);
-        }
-        arraylist_free(tracee_list);
-        free(tracee_list);
-    }
-    for (int i = 0; i < ccallable_list.len; i++) {
-        jl_svec_t *item = (jl_svec_t*)ccallable_list.items[i];
-        JL_GC_PROMISE_ROOTED(item);
-        int success = jl_compile_extern_c(NULL, NULL, NULL, jl_svecref(item, 0), jl_svecref(item, 1));
-        if (!success)
-            jl_safe_printf("@ccallable was already defined for this method name\n");
-    }
-    arraylist_free(&ccallable_list);
-    jl_value_t *ret = (jl_value_t*)jl_svec(2, restored, init_order);
-    JL_GC_POP();
-
-    return (jl_value_t*)ret;
-}
-
-JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, size_t sz, jl_array_t *mod_array)
-{
-    ios_t f;
-    ios_static_buffer(&f, (char*)buf, sz);
-    return _jl_restore_incremental(&f, mod_array);
-}
-
-JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *mod_array)
-{
-    ios_t f;
-    if (ios_file(&f, fname, 1, 0, 0, 0) == NULL) {
-        return jl_get_exceptionf(jl_errorexception_type,
-            "Cache file \"%s\" not found.\n", fname);
-    }
-    return _jl_restore_incremental(&f, mod_array);
-}
-
-// --- init ---
-
-void jl_init_serializer(void)
-{
-    jl_task_t *ct = jl_current_task;
-    htable_new(&ser_tag, 0);
-    htable_new(&common_symbol_tag, 0);
-    htable_new(&backref_table, 0);
-
-    void *vals[] = { jl_emptysvec, jl_emptytuple, jl_false, jl_true, jl_nothing, jl_any_type,
-                     jl_call_sym, jl_invoke_sym, jl_invoke_modify_sym, jl_goto_ifnot_sym, jl_return_sym, jl_symbol("tuple"),
-                     jl_an_empty_string, jl_an_empty_vec_any,
-
-                     // empirical list of very common symbols
-                     #include "common_symbols1.inc"
-
-                     jl_box_int32(0), jl_box_int32(1), jl_box_int32(2),
-                     jl_box_int32(3), jl_box_int32(4), jl_box_int32(5),
-                     jl_box_int32(6), jl_box_int32(7), jl_box_int32(8),
-                     jl_box_int32(9), jl_box_int32(10), jl_box_int32(11),
-                     jl_box_int32(12), jl_box_int32(13), jl_box_int32(14),
-                     jl_box_int32(15), jl_box_int32(16), jl_box_int32(17),
-                     jl_box_int32(18), jl_box_int32(19), jl_box_int32(20),
-
-                     jl_box_int64(0), jl_box_int64(1), jl_box_int64(2),
-                     jl_box_int64(3), jl_box_int64(4), jl_box_int64(5),
-                     jl_box_int64(6), jl_box_int64(7), jl_box_int64(8),
-                     jl_box_int64(9), jl_box_int64(10), jl_box_int64(11),
-                     jl_box_int64(12), jl_box_int64(13), jl_box_int64(14),
-                     jl_box_int64(15), jl_box_int64(16), jl_box_int64(17),
-                     jl_box_int64(18), jl_box_int64(19), jl_box_int64(20),
-
-                     jl_bool_type, jl_linenumbernode_type, jl_pinode_type,
-                     jl_upsilonnode_type, jl_type_type, jl_bottom_type, jl_ref_type,
-                     jl_pointer_type, jl_abstractarray_type, jl_nothing_type,
-                     jl_vararg_type,
-                     jl_densearray_type, jl_function_type, jl_typename_type,
-                     jl_builtin_type, jl_task_type, jl_uniontype_type,
-                     jl_array_any_type, jl_intrinsic_type,
-                     jl_abstractslot_type, jl_methtable_type, jl_typemap_level_type,
-                     jl_voidpointer_type, jl_newvarnode_type, jl_abstractstring_type,
-                     jl_array_symbol_type, jl_anytuple_type, jl_tparam0(jl_anytuple_type),
-                     jl_emptytuple_type, jl_array_uint8_type, jl_code_info_type,
-                     jl_typeofbottom_type, jl_typeofbottom_type->super,
-                     jl_namedtuple_type, jl_array_int32_type,
-                     jl_typedslot_type, jl_uint32_type, jl_uint64_type,
-                     jl_type_type_mt, jl_nonfunction_mt,
-                     jl_opaque_closure_type,
-
-                     ct->ptls->root_task,
-
-                     NULL };
-
-    // more common symbols, less common than those above. will get 2-byte encodings.
-    void *common_symbols[] = {
-        #include "common_symbols2.inc"
-        NULL
-    };
-
-    deser_tag[TAG_SYMBOL] = (jl_value_t*)jl_symbol_type;
-    deser_tag[TAG_SSAVALUE] = (jl_value_t*)jl_ssavalue_type;
-    deser_tag[TAG_DATATYPE] = (jl_value_t*)jl_datatype_type;
-    deser_tag[TAG_SLOTNUMBER] = (jl_value_t*)jl_slotnumber_type;
-    deser_tag[TAG_SVEC] = (jl_value_t*)jl_simplevector_type;
-    deser_tag[TAG_ARRAY] = (jl_value_t*)jl_array_type;
-    deser_tag[TAG_EXPR] = (jl_value_t*)jl_expr_type;
-    deser_tag[TAG_PHINODE] = (jl_value_t*)jl_phinode_type;
-    deser_tag[TAG_PHICNODE] = (jl_value_t*)jl_phicnode_type;
-    deser_tag[TAG_STRING] = (jl_value_t*)jl_string_type;
-    deser_tag[TAG_MODULE] = (jl_value_t*)jl_module_type;
-    deser_tag[TAG_TVAR] = (jl_value_t*)jl_tvar_type;
-    deser_tag[TAG_METHOD_INSTANCE] = (jl_value_t*)jl_method_instance_type;
-    deser_tag[TAG_METHOD] = (jl_value_t*)jl_method_type;
-    deser_tag[TAG_CODE_INSTANCE] = (jl_value_t*)jl_code_instance_type;
-    deser_tag[TAG_GLOBALREF] = (jl_value_t*)jl_globalref_type;
-    deser_tag[TAG_INT32] = (jl_value_t*)jl_int32_type;
-    deser_tag[TAG_INT64] = (jl_value_t*)jl_int64_type;
-    deser_tag[TAG_UINT8] = (jl_value_t*)jl_uint8_type;
-    deser_tag[TAG_LINEINFO] = (jl_value_t*)jl_lineinfonode_type;
-    deser_tag[TAG_UNIONALL] = (jl_value_t*)jl_unionall_type;
-    deser_tag[TAG_GOTONODE] = (jl_value_t*)jl_gotonode_type;
-    deser_tag[TAG_QUOTENODE] = (jl_value_t*)jl_quotenode_type;
-    deser_tag[TAG_GOTOIFNOT] = (jl_value_t*)jl_gotoifnot_type;
-    deser_tag[TAG_RETURNNODE] = (jl_value_t*)jl_returnnode_type;
-    deser_tag[TAG_ARGUMENT] = (jl_value_t*)jl_argument_type;
-
-    intptr_t i = 0;
-    while (vals[i] != NULL) {
-        deser_tag[LAST_TAG+1+i] = (jl_value_t*)vals[i];
-        i += 1;
-    }
-    assert(LAST_TAG+1+i < 256);
-
-    for (i = 2; i < 256; i++) {
-        if (deser_tag[i])
-            ptrhash_put(&ser_tag, deser_tag[i], (void*)i);
-    }
-
-    i = 2;
-    while (common_symbols[i-2] != NULL) {
-        ptrhash_put(&common_symbol_tag, common_symbols[i-2], (void*)i);
-        deser_symbols[i] = (jl_value_t*)common_symbols[i-2];
-        i += 1;
-    }
-    assert(i <= 256);
-}
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/src/features_x86.h b/src/features_x86.h
index 93cef3d8ce30e..08f979df546b7 100644
--- a/src/features_x86.h
+++ b/src/features_x86.h
@@ -74,11 +74,13 @@ JL_FEATURE_DEF(enqcmd, 32 * 3 + 29, 0)
 // EAX=7,ECX=0: EDX
 // JL_FEATURE_DEF(avx5124vnniw, 32 * 4 + 2, ?????)
 // JL_FEATURE_DEF(avx5124fmaps, 32 * 4 + 3, ?????)
+JL_FEATURE_DEF(uintr, 32 * 4 + 5, 140000)
 JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 0)
 JL_FEATURE_DEF(serialize, 32 * 4 + 14, 110000)
 JL_FEATURE_DEF(tsxldtrk, 32 * 4 + 16, 110000)
 JL_FEATURE_DEF(pconfig, 32 * 4 + 18, 0)
 JL_FEATURE_DEF_NAME(amx_bf16, 32 * 4 + 22, 110000, "amx-bf16")
+JL_FEATURE_DEF(avx512fp16, 32 * 4 + 23, 140000)
 JL_FEATURE_DEF_NAME(amx_tile, 32 * 4 + 24, 110000, "amx-tile")
 JL_FEATURE_DEF_NAME(amx_int8, 32 * 4 + 25, 110000, "amx-int8")
 
@@ -89,7 +91,7 @@ JL_FEATURE_DEF(lzcnt, 32 * 5 + 5, 0)
 JL_FEATURE_DEF(sse4a, 32 * 5 + 6, 0)
 JL_FEATURE_DEF(prfchw, 32 * 5 + 8, 0)
 JL_FEATURE_DEF(xop, 32 * 5 + 11, 0)
-JL_FEATURE_DEF(lwp, 32 * 5 + 15, 0)
+// JL_FEATURE_DEF(lwp, 32 * 5 + 15, 0) Deprecated
 JL_FEATURE_DEF(fma4, 32 * 5 + 16, 0)
 JL_FEATURE_DEF(tbm, 32 * 5 + 21, 0)
 JL_FEATURE_DEF(mwaitx, 32 * 5 + 29, 0)
diff --git a/src/flisp/Makefile b/src/flisp/Makefile
index 7a363b0ec13d7..17292d301115b 100644
--- a/src/flisp/Makefile
+++ b/src/flisp/Makefile
@@ -49,7 +49,7 @@ endif
 
 FLAGS := -I$(LLTSRCDIR) $(JCFLAGS) $(HFILEDIRS:%=-I%) \
         -I$(LIBUV_INC) -I$(UTF8PROC_INC) -I$(build_includedir) $(LIBDIRS:%=-L%) \
-        -DLIBRARY_EXPORTS -DUTF8PROC_EXPORTS
+        -DJL_LIBRARY_EXPORTS_INTERNAL -DUTF8PROC_EXPORTS
 ifneq ($(OS), emscripten)
 FLAGS += -DUSE_COMPUTED_GOTO
 endif
@@ -95,6 +95,13 @@ $(BUILDDIR)/$(LIBTARGET).a: $(OBJS) | $(BUILDDIR)
 
 CCLD := $(CC)
 
+# Override `-shared-libasan` from root Make.inc
+ifeq ($(SANITIZE),1)
+ifeq ($(SANITIZE_ADDRESS),1)
+JLDFLAGS += -static-libsan
+endif
+endif
+
 $(BUILDDIR)/$(EXENAME)-debug$(EXE): $(DOBJS) $(LIBFILES_debug) $(BUILDDIR)/$(LIBTARGET)-debug.a $(BUILDDIR)/flmain.dbg.obj | $(BUILDDIR)/flisp.boot
 	@$(call PRINT_LINK, $(CCLD) $(DEBUGFLAGS) $(JLDFLAGS) $(DOBJS) $(BUILDDIR)/flmain.dbg.obj -o $@ $(BUILDDIR)/$(LIBTARGET)-debug.a $(LIBFILES_debug) $(LIBS) $(OSLIBS))
 
diff --git a/src/flisp/cvalues.c b/src/flisp/cvalues.c
index 071a0b1642971..a5635c238ba3c 100644
--- a/src/flisp/cvalues.c
+++ b/src/flisp/cvalues.c
@@ -108,7 +108,7 @@ static value_t cprim(fl_context_t *fl_ctx, fltype_t *type, size_t sz)
     return tagptr(pcp, TAG_CPRIM);
 }
 
-value_t cvalue(fl_context_t *fl_ctx, fltype_t *type, size_t sz)
+static value_t _cvalue(fl_context_t *fl_ctx, fltype_t *type, size_t sz, int may_finalize)
 {
     cvalue_t *pcv;
     int str=0;
@@ -127,7 +127,7 @@ value_t cvalue(fl_context_t *fl_ctx, fltype_t *type, size_t sz)
         pcv = (cvalue_t*)alloc_words(fl_ctx, nw);
         pcv->type = type;
         pcv->data = &pcv->_space[0];
-        if (type->vtable != NULL && type->vtable->finalize != NULL)
+        if (may_finalize && type->vtable != NULL && type->vtable->finalize != NULL)
             add_finalizer(fl_ctx, pcv);
     }
     else {
@@ -148,6 +148,16 @@ value_t cvalue(fl_context_t *fl_ctx, fltype_t *type, size_t sz)
     return tagptr(pcv, TAG_CVALUE);
 }
 
+value_t cvalue(fl_context_t *fl_ctx, fltype_t *type, size_t sz)
+{
+    return _cvalue(fl_ctx, type, sz, 1);
+}
+
+value_t cvalue_no_finalizer(fl_context_t *fl_ctx, fltype_t *type, size_t sz)
+{
+    return _cvalue(fl_ctx, type, sz, 0);
+}
+
 value_t cvalue_from_data(fl_context_t *fl_ctx, fltype_t *type, void *data, size_t sz)
 {
     value_t cv;
diff --git a/src/flisp/flisp.h b/src/flisp/flisp.h
index e77904a32d1f2..b031e456cd3fe 100644
--- a/src/flisp/flisp.h
+++ b/src/flisp/flisp.h
@@ -328,6 +328,7 @@ typedef float    fl_float_t;
 typedef value_t (*builtin_t)(fl_context_t*, value_t*, uint32_t);
 
 value_t cvalue(fl_context_t *fl_ctx, fltype_t *type, size_t sz) JL_NOTSAFEPOINT;
+value_t cvalue_no_finalizer(fl_context_t *fl_ctx, fltype_t *type, size_t sz) JL_NOTSAFEPOINT;
 void add_finalizer(fl_context_t *fl_ctx, cvalue_t *cv);
 void cv_autorelease(fl_context_t *fl_ctx, cvalue_t *cv);
 void cv_pin(fl_context_t *fl_ctx, cvalue_t *cv);
diff --git a/src/flisp/flmain.c b/src/flisp/flmain.c
index f3861eed9e8a2..401905cc7a7a8 100644
--- a/src/flisp/flmain.c
+++ b/src/flisp/flmain.c
@@ -8,14 +8,6 @@
 extern "C" {
 #endif
 
-#if defined(__has_feature)
-#if __has_feature(address_sanitizer)
-const char* __asan_default_options() {
-    return "detect_leaks=0";
-}
-#endif
-#endif
-
 static value_t argv_list(fl_context_t *fl_ctx, int argc, char *argv[])
 {
     int i;
diff --git a/src/flisp/iostream.c b/src/flisp/iostream.c
index b2b2477bb43c6..c1c6d965d2917 100644
--- a/src/flisp/iostream.c
+++ b/src/flisp/iostream.c
@@ -354,7 +354,7 @@ value_t fl_ioreaduntil(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     ios_setbuf(&dest, data, 80, 0);
     char delim = get_delim_arg(fl_ctx, args[1], "io.readuntil");
     ios_t *src = toiostream(fl_ctx, args[0], "io.readuntil");
-    size_t n = ios_copyuntil(&dest, src, delim);
+    size_t n = ios_copyuntil(&dest, src, delim, 1);
     cv->len = n;
     if (dest.buf != data) {
         // outgrew initial space
@@ -376,7 +376,7 @@ value_t fl_iocopyuntil(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     ios_t *dest = toiostream(fl_ctx, args[0], "io.copyuntil");
     ios_t *src = toiostream(fl_ctx, args[1], "io.copyuntil");
     char delim = get_delim_arg(fl_ctx, args[2], "io.copyuntil");
-    return size_wrap(fl_ctx, ios_copyuntil(dest, src, delim));
+    return size_wrap(fl_ctx, ios_copyuntil(dest, src, delim, 1));
 }
 
 value_t fl_iocopy(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
diff --git a/src/flisp/julia_charmap.h b/src/flisp/julia_charmap.h
index 3c54eaf98f484..8471d1e3b3b91 100644
--- a/src/flisp/julia_charmap.h
+++ b/src/flisp/julia_charmap.h
@@ -10,4 +10,5 @@ static const uint32_t charmap[][2] = {
     { 0x00B7, 0x22C5 }, // middot char -> dot operator (#25098)
     { 0x0387, 0x22C5 }, // Greek interpunct -> dot operator (#25098)
     { 0x2212, 0x002D }, // minus -> hyphen-minus (#26193)
+    { 0x210F, 0x0127 }, // hbar -> small letter h with stroke (#48870)
 };
diff --git a/src/flisp/read.c b/src/flisp/read.c
index 9a480e0536c7a..7a6039323a988 100644
--- a/src/flisp/read.c
+++ b/src/flisp/read.c
@@ -303,7 +303,7 @@ static uint32_t peek(fl_context_t *fl_ctx)
             fl_ctx->readtokval = fixnum(x);
         }
         else if (c == '!') {
-            // #! single line comment for shbang script support
+            // #! single line comment for shebang script support
             do {
                 ch = ios_getc(readF(fl_ctx));
             } while (ch != IOS_EOF && (char)ch != '\n');
diff --git a/src/flisp/table.c b/src/flisp/table.c
index a24cdf3bc06e8..1d8aed358e88d 100644
--- a/src/flisp/table.c
+++ b/src/flisp/table.c
@@ -87,9 +87,7 @@ value_t fl_table(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     value_t nt;
     // prevent small tables from being added to finalizer list
     if (cnt <= HT_N_INLINE) {
-        fl_ctx->table_vtable.finalize = NULL;
-        nt = cvalue(fl_ctx, fl_ctx->tabletype, sizeof(htable_t));
-        fl_ctx->table_vtable.finalize = free_htable;
+        nt = cvalue_no_finalizer(fl_ctx, fl_ctx->tabletype, sizeof(htable_t));
     }
     else {
         nt = cvalue(fl_ctx, fl_ctx->tabletype, 2*sizeof(void*));
@@ -104,6 +102,12 @@ value_t fl_table(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
         else
             k = arg;
     }
+    if (h->table != &h->_space[0]) {
+        // We expected to use the inline table, but we ended up outgrowing it.
+        // Make sure to register the finalizer.
+        add_finalizer(fl_ctx, (cvalue_t*)ptr(nt));
+        ((cvalue_t*)ptr(nt))->len = 2*sizeof(void*);
+    }
     return nt;
 }
 
diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp
index 818d6e803c9df..c7ee32269138a 100644
--- a/src/gc-alloc-profiler.cpp
+++ b/src/gc-alloc-profiler.cpp
@@ -5,11 +5,12 @@
 #include "julia_internal.h"
 #include "gc.h"
 
+#include "llvm/ADT/SmallVector.h"
+
 #include <string>
-#include <vector>
 
 using std::string;
-using std::vector;
+using llvm::SmallVector;
 
 struct jl_raw_backtrace_t {
     jl_bt_element_t *data;
@@ -27,17 +28,17 @@ struct jl_raw_alloc_t {
 // == These structs define the global singleton profile buffer that will be used by
 // callbacks to store profile results. ==
 struct jl_per_thread_alloc_profile_t {
-    vector<jl_raw_alloc_t> allocs;
+    SmallVector<jl_raw_alloc_t, 0> allocs;
 };
 
 struct jl_alloc_profile_t {
     double sample_rate;
 
-    vector<jl_per_thread_alloc_profile_t> per_thread_profiles;
+    SmallVector<jl_per_thread_alloc_profile_t, 0> per_thread_profiles;
 };
 
 struct jl_combined_results {
-    vector<jl_raw_alloc_t> combined_allocs;
+    SmallVector<jl_raw_alloc_t, 0> combined_allocs;
 };
 
 // == Global variables manipulated by callbacks ==
@@ -80,7 +81,8 @@ extern "C" {  // Needed since these functions doesn't take any arguments.
 
 JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate) {
     // We only need to do this once, the first time this is called.
-    while (g_alloc_profile.per_thread_profiles.size() < (size_t)jl_n_threads) {
+    size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    while (g_alloc_profile.per_thread_profiles.size() < nthreads) {
         g_alloc_profile.per_thread_profiles.push_back(jl_per_thread_alloc_profile_t{});
     }
 
@@ -131,7 +133,10 @@ JL_DLLEXPORT void jl_free_alloc_profile() {
 
 void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *type) JL_NOTSAFEPOINT {
     auto& global_profile = g_alloc_profile;
-    auto thread_id = jl_atomic_load_relaxed(&jl_current_task->tid);
+    size_t thread_id = jl_atomic_load_relaxed(&jl_current_task->tid);
+    if (thread_id >= global_profile.per_thread_profiles.size())
+        return; // ignore allocations on threads started after the alloc-profile started
+
     auto& profile = global_profile.per_thread_profiles[thread_id];
 
     auto sample_val = double(rand()) / double(RAND_MAX);
diff --git a/src/gc-alloc-profiler.h b/src/gc-alloc-profiler.h
index 8be6fed21a899..fcd8e45caa2d8 100644
--- a/src/gc-alloc-profiler.h
+++ b/src/gc-alloc-profiler.h
@@ -14,7 +14,7 @@ extern "C" {
 // The public interface to call from Julia for allocations profiling
 // ---------------------------------------------------------------------
 
-// Forward-declaration to avoid depenency in header file.
+// Forward-declaration to avoid dependency in header file.
 struct jl_raw_alloc_t;  // Defined in gc-alloc-profiler.cpp
 
 typedef struct {
@@ -35,6 +35,7 @@ void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t
 
 extern int g_alloc_profile_enabled;
 
+// This should only be used from _deprecated_ code paths. We shouldn't see UNKNOWN anymore.
 #define jl_gc_unknown_type_tag ((jl_datatype_t*)0xdeadaa03)
 
 static inline void maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *typ) JL_NOTSAFEPOINT {
diff --git a/src/gc-debug.c b/src/gc-debug.c
index 7d6ca8ece2ecf..108983ad8992d 100644
--- a/src/gc-debug.c
+++ b/src/gc-debug.c
@@ -1,7 +1,10 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include "gc.h"
+#include "julia.h"
 #include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 
 // re-include assert.h without NDEBUG,
@@ -27,19 +30,16 @@ jl_gc_pagemeta_t *jl_gc_page_metadata(void *data)
 // the end of the page.
 JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p)
 {
-    if (!page_metadata(p))
+    if (!gc_alloc_map_is_set(p))
         // Not in the pool
         return NULL;
-    struct jl_gc_metadata_ext info = page_metadata_ext(p);
+    jl_gc_pagemeta_t *meta = page_metadata(p);
     char *page_begin = gc_page_data(p) + GC_PAGE_OFFSET;
     // In the page header
     if (p < page_begin)
         return NULL;
     size_t ofs = p - page_begin;
-    // Check if this is a free page
-    if (!(info.pagetable0->allocmap[info.pagetable0_i32] & (uint32_t)(1 << info.pagetable0_i)))
-        return NULL;
-    int osize = info.meta->osize;
+    int osize = meta->osize;
     // Shouldn't be needed, just in case
     if (osize == 0)
         return NULL;
@@ -83,14 +83,13 @@ void add_lostval_parent(jl_value_t *parent)
  innocent looking functions which allocate (and thus trigger marking) only on special cases.
 
  If you can't find it, you can try the following :
- - Ensure that should_timeout() is deterministic instead of clock based.
  - Once you have a completely deterministic program which crashes on gc_verify, the addresses
    should stay constant between different runs (with same binary, same environment ...).
    Do not forget to turn off ASLR (linux: echo 0 > /proc/sys/kernel/randomize_va_space).
    At this point you should be able to run under gdb and use a hw watch to look for writes
    at the exact addr of the slot (use something like watch *slot_addr if *slot_addr == val).
  - If it went well you are now stopped at the exact point the problem is happening.
-   Backtraces in JIT'd code wont work for me (but I'm not sure they should) so in that
+   Backtraces in JIT'd code won't work for me (but I'm not sure they should) so in that
    case you can try to jl_throw(something) from gdb.
  */
 // this does not yet detect missing writes from marked to marked_noesc
@@ -99,7 +98,7 @@ static arraylist_t bits_save[4];
 
 static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[pg->thread_n];
+    jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n];
     jl_gc_pool_t *pool = &ptls2->heap.norm_pools[pg->pool_n];
     jl_taggedvalue_t *pv = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
     char *lim = (char*)pv + GC_PAGE_SZ - GC_PAGE_OFFSET - pool->osize;
@@ -111,44 +110,14 @@ static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits)
     }
 }
 
-static void gc_clear_mark_pagetable0(pagetable0_t *pagetable0, int bits)
+static void gc_clear_mark_outer(int bits)
 {
-    for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) {
-        uint32_t line = pagetable0->allocmap[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    gc_clear_mark_page(pagetable0->meta[pg_i * 32 + j], bits);
-                }
-            }
-        }
-    }
-}
-
-static void gc_clear_mark_pagetable1(pagetable1_t *pagetable1, int bits)
-{
-    for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) {
-        uint32_t line = pagetable1->allocmap0[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    gc_clear_mark_pagetable0(pagetable1->meta0[pg_i * 32 + j], bits);
-                }
-            }
-        }
-    }
-}
-
-static void gc_clear_mark_pagetable(int bits)
-{
-    for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) {
-        uint32_t line = memory_map.allocmap1[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    gc_clear_mark_pagetable1(memory_map.meta1[pg_i * 32 + j], bits);
-                }
-            }
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
+        while (pg != NULL) {
+            gc_clear_mark_page(pg, bits);
+            pg = pg->next;
         }
     }
 }
@@ -164,8 +133,8 @@ static void clear_mark(int bits)
         }
     }
     bigval_t *v;
-    for (int i = 0;i < jl_n_threads;i++) {
-        v = jl_all_tls_states[i]->heap.big_objects;
+    for (int i = 0; i < gc_n_threads; i++) {
+        v = gc_all_tls_states[i]->heap.big_objects;
         while (v != NULL) {
             void *gcv = &v->header;
             if (!gc_verifying)
@@ -184,7 +153,7 @@ static void clear_mark(int bits)
         v = v->next;
     }
 
-    gc_clear_mark_pagetable(bits);
+    gc_clear_mark_outer(bits);
 }
 
 static void restore(void)
@@ -198,21 +167,32 @@ static void restore(void)
 
 static void gc_verify_track(jl_ptls_t ptls)
 {
-    jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
+    // `gc_verify_track` is limited to single-threaded GC
+    if (jl_n_gcthreads != 0)
+        return;
     do {
-        jl_gc_mark_sp_t sp;
-        gc_mark_sp_init(gc_cache, &sp);
+        jl_gc_markqueue_t mq;
+        jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
+        ws_queue_t *cq = &mq.chunk_queue;
+        ws_queue_t *q = &mq.ptr_queue;
+        jl_atomic_store_relaxed(&cq->top, 0);
+        jl_atomic_store_relaxed(&cq->bottom, 0);
+        jl_atomic_store_relaxed(&cq->array, jl_atomic_load_relaxed(&mq2->chunk_queue.array));
+        jl_atomic_store_relaxed(&q->top, 0);
+        jl_atomic_store_relaxed(&q->bottom, 0);
+        jl_atomic_store_relaxed(&q->array, jl_atomic_load_relaxed(&mq2->ptr_queue.array));
+        arraylist_new(&mq.reclaim_set, 32);
         arraylist_push(&lostval_parents_done, lostval);
         jl_safe_printf("Now looking for %p =======\n", lostval);
         clear_mark(GC_CLEAN);
-        gc_mark_queue_all_roots(ptls, &sp);
-        gc_mark_queue_finlist(gc_cache, &sp, &to_finalize, 0);
-        for (int i = 0;i < jl_n_threads;i++) {
-            jl_ptls_t ptls2 = jl_all_tls_states[i];
-            gc_mark_queue_finlist(gc_cache, &sp, &ptls2->finalizers, 0);
+        gc_mark_queue_all_roots(ptls, &mq);
+        gc_mark_finlist(&mq, &to_finalize, 0);
+        for (int i = 0; i < gc_n_threads;i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            gc_mark_finlist(&mq, &ptls2->finalizers, 0);
         }
-        gc_mark_queue_finlist(gc_cache, &sp, &finalizer_list_marked, 0);
-        gc_mark_loop(ptls, sp);
+        gc_mark_finlist(&mq, &finalizer_list_marked, 0);
+        gc_mark_loop_serial_(ptls, &mq);
         if (lostval_parents.len == 0) {
             jl_safe_printf("Could not find the missing link. We missed a toplevel root. This is odd.\n");
             break;
@@ -246,22 +226,35 @@ static void gc_verify_track(jl_ptls_t ptls)
 
 void gc_verify(jl_ptls_t ptls)
 {
-    jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
-    jl_gc_mark_sp_t sp;
-    gc_mark_sp_init(gc_cache, &sp);
+    // `gc_verify` is limited to single-threaded GC
+    if (jl_n_gcthreads != 0) {
+        jl_safe_printf("Warn. GC verify disabled in multi-threaded GC\n");
+        return;
+    }
+    jl_gc_markqueue_t mq;
+    jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
+    ws_queue_t *cq = &mq.chunk_queue;
+    ws_queue_t *q = &mq.ptr_queue;
+    jl_atomic_store_relaxed(&cq->top, 0);
+    jl_atomic_store_relaxed(&cq->bottom, 0);
+    jl_atomic_store_relaxed(&cq->array, jl_atomic_load_relaxed(&mq2->chunk_queue.array));
+    jl_atomic_store_relaxed(&q->top, 0);
+    jl_atomic_store_relaxed(&q->bottom, 0);
+    jl_atomic_store_relaxed(&q->array, jl_atomic_load_relaxed(&mq2->ptr_queue.array));
+    arraylist_new(&mq.reclaim_set, 32);
     lostval = NULL;
     lostval_parents.len = 0;
     lostval_parents_done.len = 0;
     clear_mark(GC_CLEAN);
     gc_verifying = 1;
-    gc_mark_queue_all_roots(ptls, &sp);
-    gc_mark_queue_finlist(gc_cache, &sp, &to_finalize, 0);
-    for (int i = 0;i < jl_n_threads;i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
-        gc_mark_queue_finlist(gc_cache, &sp, &ptls2->finalizers, 0);
+    gc_mark_queue_all_roots(ptls, &mq);
+    gc_mark_finlist(&mq, &to_finalize, 0);
+    for (int i = 0; i < gc_n_threads;i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        gc_mark_finlist(&mq, &ptls2->finalizers, 0);
     }
-    gc_mark_queue_finlist(gc_cache, &sp, &finalizer_list_marked, 0);
-    gc_mark_loop(ptls, sp);
+    gc_mark_finlist(&mq, &finalizer_list_marked, 0);
+    gc_mark_loop_serial_(ptls, &mq);
     int clean_len = bits_save[GC_CLEAN].len;
     for(int i = 0; i < clean_len + bits_save[GC_OLD].len; i++) {
         jl_taggedvalue_t *v = (jl_taggedvalue_t*)bits_save[i >= clean_len ? GC_OLD : GC_CLEAN].items[i >= clean_len ? i - clean_len : i];
@@ -297,7 +290,7 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
     // for all pages in use
     int p_n = pg->pool_n;
     int t_n = pg->thread_n;
-    jl_ptls_t ptls2 = jl_all_tls_states[t_n];
+    jl_ptls_t ptls2 = gc_all_tls_states[t_n];
     jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
     int osize = pg->osize;
     char *data = pg->data;
@@ -345,10 +338,10 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
         if (!in_freelist) {
             jl_value_t *dt = jl_typeof(jl_valueof(v));
             if (dt != (jl_value_t*)jl_buff_tag &&
-                    // the following are used by the deserializer to invalidate objects
-                    v->header != 0x10 && v->header != 0x20 &&
-                    v->header != 0x30 && v->header != 0x40 &&
-                    v->header != 0x50 && v->header != 0x60) {
+                    // the following may be use (by the deserializer) to invalidate objects
+                    v->header != 0xf10 && v->header != 0xf20 &&
+                    v->header != 0xf30 && v->header != 0xf40 &&
+                    v->header != 0xf50 && v->header != 0xf60) {
                 assert(jl_typeof(dt) == (jl_value_t*)jl_datatype_type);
             }
         }
@@ -401,8 +394,8 @@ static void gc_verify_tags_pagetable(void)
 void gc_verify_tags(void)
 {
     // verify the freelist chains look valid
-    for (int t_i = 0; t_i < jl_n_threads; t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         for (int i = 0; i < JL_GC_N_POOLS; i++) {
             // for all pools, iterate its freelist
             jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
@@ -467,7 +460,7 @@ static void gc_debug_alloc_init(jl_alloc_num_t *num, const char *name)
         return;
     if (*env == 'r') {
         env++;
-        for (int i = 0;i < 3;i++) {
+        for (int i = 0; i < 3; i++) {
             while (num->random[i] == 0) {
                 num->random[i] = jl_rand();
             }
@@ -500,7 +493,7 @@ int jl_gc_debug_check_other(void)
     return gc_debug_alloc_check(&jl_gc_debug_env.other);
 }
 
-void jl_gc_debug_print_status(void)
+void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT
 {
     uint64_t pool_count = jl_gc_debug_env.pool.num;
     uint64_t other_count = jl_gc_debug_env.other.num;
@@ -509,7 +502,7 @@ void jl_gc_debug_print_status(void)
                    pool_count + other_count, pool_count, other_count, gc_num.pause);
 }
 
-void jl_gc_debug_critical_error(void)
+void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT
 {
     jl_gc_debug_print_status();
     if (!jl_gc_debug_env.wait_for_debugger)
@@ -535,9 +528,8 @@ void gc_scrub_record_task(jl_task_t *t)
     arraylist_push(&jl_gc_debug_tasks, t);
 }
 
-static void gc_scrub_range(char *low, char *high)
+JL_NO_ASAN static void gc_scrub_range(char *low, char *high)
 {
-    jl_ptls_t ptls = jl_current_task->ptls;
     jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
     if (jl_setjmp(buf, 0)) {
@@ -556,14 +548,6 @@ static void gc_scrub_range(char *low, char *high)
         // Make sure the sweep rebuild the freelist
         pg->has_marked = 1;
         pg->has_young = 1;
-        // Find the age bit
-        char *page_begin = gc_page_data(tag) + GC_PAGE_OFFSET;
-        int obj_id = (((char*)tag) - page_begin) / osize;
-        uint8_t *ages = pg->ages + obj_id / 8;
-        // Force this to be a young object to save some memory
-        // (especially on 32bit where it's more likely to have pointer-like
-        //  bit patterns)
-        *ages &= ~(1 << (obj_id % 8));
         memset(tag, 0xff, osize);
         // set mark to GC_MARKED (young and marked)
         tag->bits.gc = GC_MARKED;
@@ -577,7 +561,7 @@ static void gc_scrub_task(jl_task_t *ta)
     jl_ptls_t ptls = jl_current_task->ptls;
     jl_ptls_t ptls2 = NULL;
     if (tid != -1)
-        ptls2 = jl_all_tls_states[tid];
+        ptls2 = gc_all_tls_states[tid];
 
     char *low;
     char *high;
@@ -631,8 +615,7 @@ void objprofile_count(void *ty, int old, int sz)
         ty = (void*)jl_buff_tag;
     }
     else if (ty != (void*)jl_buff_tag && ty != jl_malloc_tag &&
-             jl_typeof(ty) == (jl_value_t*)jl_datatype_type &&
-             ((jl_datatype_t*)ty)->instance) {
+             jl_is_datatype(ty) && jl_is_datatype_singleton((jl_datatype_t*)ty)) {
         ty = jl_singleton_tag;
     }
     void **bp = ptrhash_bp(&obj_counts[old], ty);
@@ -762,45 +745,37 @@ void gc_final_pause_end(int64_t t0, int64_t tend)
 
 static void gc_stats_pagetable0(pagetable0_t *pagetable0, unsigned *p0)
 {
-    for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) {
-        uint32_t line = pagetable0->allocmap[pg_i] | pagetable0->freemap[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    (*p0)++;
-                }
-            }
+    for (int pg_i = 0; pg_i < REGION0_PG_COUNT; pg_i++) {
+        uint8_t meta = pagetable0->meta[pg_i];
+        assert(meta == GC_PAGE_UNMAPPED || meta == GC_PAGE_ALLOCATED ||
+               meta == GC_PAGE_LAZILY_FREED || meta == GC_PAGE_FREED);
+        if (meta != GC_PAGE_UNMAPPED) {
+            (*p0)++;
         }
     }
 }
 
 static void gc_stats_pagetable1(pagetable1_t *pagetable1, unsigned *p1, unsigned *p0)
 {
-    for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) {
-        uint32_t line = pagetable1->allocmap0[pg_i] | pagetable1->freemap0[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    (*p1)++;
-                    gc_stats_pagetable0(pagetable1->meta0[pg_i * 32 + j], p0);
-                }
-            }
+    for (int pg_i = 0; pg_i < REGION1_PG_COUNT; pg_i++) {
+        pagetable0_t *pagetable0 = pagetable1->meta0[pg_i];
+        if (pagetable0 == NULL) {
+            continue;
         }
+        (*p1)++;
+        gc_stats_pagetable0(pagetable0, p0);
     }
 }
 
 static void gc_stats_pagetable(unsigned *p2, unsigned *p1, unsigned *p0)
 {
-    for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) {
-        uint32_t line = memory_map.allocmap1[pg_i] | memory_map.freemap1[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    (*p2)++;
-                    gc_stats_pagetable1(memory_map.meta1[pg_i * 32 + j], p1, p0);
-                }
-            }
+    for (int pg_i = 0; pg_i < REGION2_PG_COUNT; pg_i++) {
+        pagetable1_t *pagetable1 = alloc_map.meta1[pg_i];
+        if (pagetable1 == NULL) {
+            continue;
         }
+        (*p2)++;
+        gc_stats_pagetable1(pagetable1, p1, p0);
     }
 }
 
@@ -809,12 +784,13 @@ void jl_print_gc_stats(JL_STREAM *s)
 #ifdef _OS_LINUX_
     malloc_stats();
 #endif
-    double ptime = jl_clock_now() - process_t0;
-    jl_safe_printf("exec time\t%.5f sec\n", ptime);
+    double ptime = jl_hrtime() - process_t0;
+    double exec_time = jl_ns2s(ptime);
+    jl_safe_printf("exec time\t%.5f sec\n", exec_time);
     if (gc_num.pause > 0) {
         jl_safe_printf("gc time  \t%.5f sec (%2.1f%%) in %d (%d full) collections\n",
                        jl_ns2s(gc_num.total_time),
-                       jl_ns2s(gc_num.total_time) / ptime * 100,
+                       jl_ns2s(gc_num.total_time) / exec_time * 100,
                        gc_num.pause, gc_num.full_sweep);
         jl_safe_printf("gc pause \t%.2f ms avg\n\t\t%2.0f ms max\n",
                        jl_ns2ms(gc_num.total_time) / gc_num.pause,
@@ -873,11 +849,11 @@ void gc_time_pool_end(int sweep_full)
     double sweep_speed = sweep_gb / sweep_pool_sec;
     jl_safe_printf("GC sweep pools end %.2f ms at %.1f GB/s "
                    "(skipped %.2f %% of %" PRId64 ", swept %" PRId64 " pgs, "
-                   "%" PRId64 " freed with %" PRId64 " lazily) %s\n",
+                   "%" PRId64 " freed) %s\n",
                    sweep_pool_sec * 1000, sweep_speed,
                    (total_pages ? ((double)skipped_pages * 100) / total_pages : 0),
                    total_pages, total_pages - skipped_pages,
-                   freed_pages, lazy_freed_pages,
+                   freed_pages,
                    sweep_full ? "full" : "quick");
 }
 
@@ -916,29 +892,29 @@ void gc_time_big_end(void)
                    t_ms, big_freed, big_total, big_reset);
 }
 
-static int64_t mallocd_array_total;
-static int64_t mallocd_array_freed;
-static int64_t mallocd_array_sweep_start;
+static int64_t mallocd_memory_total;
+static int64_t mallocd_memory_freed;
+static int64_t mallocd_memory_sweep_start;
 
-void gc_time_mallocd_array_start(void)
+void gc_time_mallocd_memory_start(void)
 {
-    mallocd_array_total = 0;
-    mallocd_array_freed = 0;
-    mallocd_array_sweep_start = jl_hrtime();
+    mallocd_memory_total = 0;
+    mallocd_memory_freed = 0;
+    mallocd_memory_sweep_start = jl_hrtime();
 }
 
-void gc_time_count_mallocd_array(int bits)
+void gc_time_count_mallocd_memory(int bits)
 {
-    mallocd_array_total++;
-    mallocd_array_freed += !gc_marked(bits);
+    mallocd_memory_total++;
+    mallocd_memory_freed += !gc_marked(bits);
 }
 
-void gc_time_mallocd_array_end(void)
+void gc_time_mallocd_memory_end(void)
 {
-    double t_ms = jl_ns2ms(jl_hrtime() - mallocd_array_sweep_start);
+    double t_ms = jl_ns2ms(jl_hrtime() - mallocd_memory_sweep_start);
     jl_safe_printf("GC sweep arrays %.2f ms "
                    "(freed %" PRId64 " / %" PRId64 ")\n",
-                   t_ms, mallocd_array_freed, mallocd_array_total);
+                   t_ms, mallocd_memory_freed, mallocd_memory_total);
 }
 
 void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
@@ -946,8 +922,8 @@ void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
 {
     int64_t last_remset_len = 0;
     int64_t remset_nptr = 0;
-    for (int t_i = 0;t_i < jl_n_threads;t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         last_remset_len += ptls2->heap.last_remset->len;
         remset_nptr = ptls2->heap.remset_nptr;
     }
@@ -969,12 +945,12 @@ void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd,
     jl_safe_printf("GC sweep pause %.2f ms live %" PRId64 " kB "
                    "(freed %" PRId64 " kB EST %" PRId64 " kB "
                    "[error %" PRId64 "] = %d%% of allocd b %" PRIu64 ") "
-                   "(%.2f ms in post_mark) %s | next in %" PRId64 " kB\n",
+                   "(%.2f ms in post_mark) %s\n",
                    jl_ns2ms(sweep_pause), live_bytes / 1024,
                    gc_num.freed / 1024, estimate_freed / 1024,
-                   gc_num.freed - estimate_freed, pct, gc_num.since_sweep / 1024,
+                   gc_num.freed - estimate_freed, pct, gc_num.allocd / 1024,
                    jl_ns2ms(gc_postmark_end - gc_premark_end),
-                   sweep_full ? "full" : "quick", -gc_num.allocd / 1024);
+                   sweep_full ? "full" : "quick");
 }
 
 void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
@@ -994,11 +970,35 @@ void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
         jl_safe_printf("TS: %" PRIu64 " Minor collection: estimate freed = %" PRIu64
                        " live = %" PRIu64 "m new interval = %" PRIu64 "m pause time = %"
                        PRIu64 "ms ttsp = %" PRIu64 "us mark time = %" PRIu64
-                       "ms sweep time = %" PRIu64 "ms \n",
+                       "ms sweep time = %" PRIu64 "ms\n",
                        end, freed, live/1024/1024,
                        interval/1024/1024, pause/1000000, ttsp,
                        mark/1000000,sweep/1000000);
 }
+
+void gc_heuristics_summary(
+        uint64_t old_alloc_diff, uint64_t alloc_mem,
+        uint64_t old_mut_time, uint64_t alloc_time,
+        uint64_t old_freed_diff, uint64_t gc_mem,
+        uint64_t old_pause_time, uint64_t gc_time,
+        int thrash_counter, const char *reason,
+        uint64_t current_heap, uint64_t target_heap)
+{
+    jl_safe_printf("Estimates: alloc_diff=%" PRIu64 "kB (%" PRIu64 ")"
+                            //"  nongc_time=%" PRIu64 "ns (%" PRIu64 ")"
+                            "  mut_time=%" PRIu64 "ns (%" PRIu64 ")"
+                            "  freed_diff=%" PRIu64 "kB (%" PRIu64 ")"
+                            "  pause_time=%" PRIu64 "ns (%" PRIu64 ")"
+                            "  thrash_counter=%d%s"
+                            "  current_heap=%" PRIu64 " MB"
+                            "  target_heap=%" PRIu64 " MB\n",
+                   old_alloc_diff/1024, alloc_mem/1024,
+                   old_mut_time/1000, alloc_time/1000,
+                   old_freed_diff/1024, gc_mem/1024,
+                   old_pause_time/1000, gc_time/1000,
+                   thrash_counter, reason,
+                   current_heap/1024/1024, target_heap/1024/1024);
+}
 #endif
 
 void jl_gc_debug_init(void)
@@ -1023,14 +1023,14 @@ void jl_gc_debug_init(void)
 #endif
 
 #ifdef OBJPROFILE
-    for (int g = 0;g < 3;g++) {
+    for (int g = 0; g < 3; g++) {
         htable_new(&obj_counts[g], 0);
         htable_new(&obj_sizes[g], 0);
     }
 #endif
 
 #ifdef GC_FINAL_STATS
-    process_t0 = jl_clock_now();
+    process_t0 = jl_hrtime();
 #endif
 }
 
@@ -1085,8 +1085,8 @@ void gc_stats_all_pool(void)
 {
     size_t nb=0, w, tw=0, no=0, tp=0, nold=0, noldbytes=0, np, nol;
     for (int i = 0; i < JL_GC_N_POOLS; i++) {
-        for (int t_i = 0; t_i < jl_n_threads; t_i++) {
-            jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
             size_t b = pool_stats(&ptls2->heap.norm_pools[i], &w, &np, &nol);
             nb += b;
             no += (b / ptls2->heap.norm_pools[i].osize);
@@ -1110,8 +1110,8 @@ void gc_stats_all_pool(void)
 void gc_stats_big_obj(void)
 {
     size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0;
-    for (int t_i = 0; t_i < jl_n_threads; t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         bigval_t *v = ptls2->heap.big_objects;
         while (v != NULL) {
             if (gc_marked(v->bits.gc)) {
@@ -1133,7 +1133,7 @@ void gc_stats_big_obj(void)
         while (ma != NULL) {
             if (gc_marked(jl_astaggedvalue(ma->a)->bits.gc)) {
                 nused++;
-                nbytes += jl_array_nbytes(ma->a);
+                nbytes += jl_genericmemory_nbytes((jl_genericmemory_t*)ma->a);
             }
             ma = ma->next;
         }
@@ -1152,7 +1152,7 @@ void gc_stats_big_obj(void)
 static int64_t poolobj_sizes[4];
 static int64_t empty_pages;
 
-static void gc_count_pool_page(jl_gc_pagemeta_t *pg)
+static void gc_count_pool_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
 {
     int osize = pg->osize;
     char *data = pg->data;
@@ -1171,44 +1171,16 @@ static void gc_count_pool_page(jl_gc_pagemeta_t *pg)
     }
 }
 
-static void gc_count_pool_pagetable0(pagetable0_t *pagetable0)
-{
-    for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) {
-        uint32_t line = pagetable0->allocmap[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    gc_count_pool_page(pagetable0->meta[pg_i * 32 + j]);
-                }
-            }
-        }
-    }
-}
-
-static void gc_count_pool_pagetable1(pagetable1_t *pagetable1)
-{
-    for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) {
-        uint32_t line = pagetable1->allocmap0[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    gc_count_pool_pagetable0(pagetable1->meta0[pg_i * 32 + j]);
-                }
-            }
-        }
-    }
-}
-
 static void gc_count_pool_pagetable(void)
 {
-    for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) {
-        uint32_t line = memory_map.allocmap1[pg_i];
-        if (line) {
-            for (int j = 0; j < 32; j++) {
-                if ((line >> j) & 1) {
-                    gc_count_pool_pagetable1(memory_map.meta1[pg_i * 32 + j]);
-                }
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
+        while (pg != NULL) {
+            if (gc_alloc_map_is_set(pg->data)) {
+                gc_count_pool_page(pg);
             }
+            pg = pg->next;
         }
     }
 }
@@ -1219,7 +1191,7 @@ void gc_count_pool(void)
     empty_pages = 0;
     gc_count_pool_pagetable();
     jl_safe_printf("****** Pool stat: ******\n");
-    for (int i = 0;i < 4;i++)
+    for (int i = 0; i < 4; i++)
         jl_safe_printf("bits(%d): %"  PRId64 "\n", i, poolobj_sizes[i]);
     // empty_pages is inaccurate after the sweep since young objects are
     // also GC_CLEAN
@@ -1227,20 +1199,17 @@ void gc_count_pool(void)
     jl_safe_printf("************************\n");
 }
 
-int gc_slot_to_fieldidx(void *obj, void *slot)
+int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT
 {
-    jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj);
     int nf = (int)jl_datatype_nfields(vt);
-    for (int i = 0; i < nf; i++) {
-        void *fieldaddr = (char*)obj + jl_field_offset(vt, i);
-        if (fieldaddr >= slot) {
-            return i;
-        }
+    for (int i = 1; i < nf; i++) {
+        if (slot < (void*)((char*)obj + jl_field_offset(vt, i)))
+            return i - 1;
     }
-    return -1;
+    return nf - 1;
 }
 
-int gc_slot_to_arrayidx(void *obj, void *_slot)
+int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT
 {
     char *slot = (char*)_slot;
     jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj);
@@ -1256,167 +1225,36 @@ int gc_slot_to_arrayidx(void *obj, void *_slot)
         start = (char*)jl_svec_data(obj);
         len = jl_svec_len(obj);
     }
-    else if (vt->name == jl_array_typename) {
-        jl_array_t *a = (jl_array_t*)obj;
-        if (!a->flags.ptrarray)
-            return -1;
-        start = (char*)a->data;
-        len = jl_array_len(a);
-        elsize = a->elsize;
-    }
     if (slot < start || slot >= start + elsize * len)
         return -1;
     return (slot - start) / elsize;
 }
 
-// Print a backtrace from the bottom (start) of the mark stack up to `sp`
-// `pc_offset` will be added to `sp` for convenience in the debugger.
-NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset)
-{
-    jl_jmp_buf *old_buf = jl_get_safe_restore();
-    jl_jmp_buf buf;
-    jl_set_safe_restore(&buf);
-    if (jl_setjmp(buf, 0) != 0) {
-        jl_safe_printf("\n!!! ERROR when unwinding gc mark loop -- ABORTING !!!\n");
-        jl_set_safe_restore(old_buf);
-        return;
-    }
-    void **top = sp.pc + pc_offset;
-    jl_gc_mark_data_t *data_top = sp.data;
-    sp.data = ptls->gc_cache.data_stack;
-    sp.pc = ptls->gc_cache.pc_stack;
-    int isroot = 1;
-    while (sp.pc < top) {
-        void *pc = *sp.pc;
-        const char *prefix = isroot ? "r--" : " `-";
-        isroot = 0;
-        if (pc == gc_mark_label_addrs[GC_MARK_L_marked_obj]) {
-            gc_mark_marked_obj_t *data = gc_repush_markdata(&sp, gc_mark_marked_obj_t);
-            if ((jl_gc_mark_data_t *)data > data_top) {
-                jl_safe_printf("Mark stack unwind overflow -- ABORTING !!!\n");
-                break;
-            }
-            jl_safe_printf("%p: Root object: %p :: %p (bits: %d)\n        of type ",
-                           (void*)data, (void*)data->obj, (void*)data->tag, (int)data->bits);
-            jl_((void*)data->tag);
-            isroot = 1;
-        }
-        else if (pc == gc_mark_label_addrs[GC_MARK_L_scan_only]) {
-            gc_mark_marked_obj_t *data = gc_repush_markdata(&sp, gc_mark_marked_obj_t);
-            if ((jl_gc_mark_data_t *)data > data_top) {
-                jl_safe_printf("Mark stack unwind overflow -- ABORTING !!!\n");
-                break;
-            }
-            jl_safe_printf("%p: Queued root: %p :: %p (bits: %d)\n        of type ",
-                           (void*)data, (void*)data->obj, (void*)data->tag, (int)data->bits);
-            jl_((void*)data->tag);
-            isroot = 1;
-        }
-        else if (pc == gc_mark_label_addrs[GC_MARK_L_finlist]) {
-            gc_mark_finlist_t *data = gc_repush_markdata(&sp, gc_mark_finlist_t);
-            if ((jl_gc_mark_data_t *)data > data_top) {
-                jl_safe_printf("Mark stack unwind overflow -- ABORTING !!!\n");
-                break;
-            }
-            jl_safe_printf("%p: Finalizer list from %p to %p\n",
-                           (void*)data, (void*)data->begin, (void*)data->end);
-            isroot = 1;
-        }
-        else if (pc == gc_mark_label_addrs[GC_MARK_L_objarray]) {
-            gc_mark_objarray_t *data = gc_repush_markdata(&sp, gc_mark_objarray_t);
-            if ((jl_gc_mark_data_t *)data > data_top) {
-                jl_safe_printf("Mark stack unwind overflow -- ABORTING !!!\n");
-                break;
-            }
-            jl_safe_printf("%p:  %s Array in object %p :: %p -- [%p, %p)\n        of type ",
-                           (void*)data, prefix, (void*)data->parent, ((void**)data->parent)[-1],
-                           (void*)data->begin, (void*)data->end);
-            jl_(jl_typeof(data->parent));
-        }
-        else if (pc == gc_mark_label_addrs[GC_MARK_L_obj8]) {
-            gc_mark_obj8_t *data = gc_repush_markdata(&sp, gc_mark_obj8_t);
-            if ((jl_gc_mark_data_t *)data > data_top) {
-                jl_safe_printf("Mark stack unwind overflow -- ABORTING !!!\n");
-                break;
-            }
-            jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(data->parent);
-            uint8_t *desc = (uint8_t*)jl_dt_layout_ptrs(vt->layout);
-            jl_safe_printf("%p:  %s Object (8bit) %p :: %p -- [%d, %d)\n        of type ",
-                           (void*)data, prefix, (void*)data->parent, ((void**)data->parent)[-1],
-                           (int)(data->begin - desc), (int)(data->end - desc));
-            jl_(jl_typeof(data->parent));
-        }
-        else if (pc == gc_mark_label_addrs[GC_MARK_L_obj16]) {
-            gc_mark_obj16_t *data = gc_repush_markdata(&sp, gc_mark_obj16_t);
-            if ((jl_gc_mark_data_t *)data > data_top) {
-                jl_safe_printf("Mark stack unwind overflow -- ABORTING !!!\n");
-                break;
-            }
-            jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(data->parent);
-            uint16_t *desc = (uint16_t*)jl_dt_layout_ptrs(vt->layout);
-            jl_safe_printf("%p:  %s Object (16bit) %p :: %p -- [%d, %d)\n        of type ",
-                           (void*)data, prefix, (void*)data->parent, ((void**)data->parent)[-1],
-                           (int)(data->begin - desc), (int)(data->end - desc));
-            jl_(jl_typeof(data->parent));
-        }
-        else if (pc == gc_mark_label_addrs[GC_MARK_L_obj32]) {
-            gc_mark_obj32_t *data = gc_repush_markdata(&sp, gc_mark_obj32_t);
-            if ((jl_gc_mark_data_t *)data > data_top) {
-                jl_safe_printf("Mark stack unwind overflow -- ABORTING !!!\n");
-                break;
-            }
-            jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(data->parent);
-            uint32_t *desc = (uint32_t*)jl_dt_layout_ptrs(vt->layout);
-            jl_safe_printf("%p:  %s Object (32bit) %p :: %p -- [%d, %d)\n        of type ",
-                           (void*)data, prefix, (void*)data->parent, ((void**)data->parent)[-1],
-                           (int)(data->begin - desc), (int)(data->end - desc));
-            jl_(jl_typeof(data->parent));
-        }
-        else if (pc == gc_mark_label_addrs[GC_MARK_L_stack]) {
-            gc_mark_stackframe_t *data = gc_repush_markdata(&sp, gc_mark_stackframe_t);
-            if ((jl_gc_mark_data_t *)data > data_top) {
-                jl_safe_printf("Mark stack unwind overflow -- ABORTING !!!\n");
-                break;
-            }
-            jl_safe_printf("%p:  %s Stack frame %p -- %d of %d (%s)\n",
-                           (void*)data, prefix, (void*)data->s, (int)data->i,
-                           (int)data->nroots >> 1,
-                           (data->nroots & 1) ? "indirect" : "direct");
-        }
-        else if (pc == gc_mark_label_addrs[GC_MARK_L_module_binding]) {
-            // module_binding
-            gc_mark_binding_t *data = gc_repush_markdata(&sp, gc_mark_binding_t);
-            if ((jl_gc_mark_data_t *)data > data_top) {
-                jl_safe_printf("Mark stack unwind overflow -- ABORTING !!!\n");
-                break;
-            }
-            jl_safe_printf("%p:  %s Module (bindings) %p (bits %d) -- [%p, %p)\n",
-                           (void*)data, prefix, (void*)data->parent, (int)data->bits,
-                           (void*)data->begin, (void*)data->end);
-        }
-        else {
-            jl_safe_printf("Unknown pc %p --- ABORTING !!!\n", pc);
-            break;
-        }
-    }
-    jl_set_safe_restore(old_buf);
-}
-
 static int gc_logging_enabled = 0;
 
 JL_DLLEXPORT void jl_enable_gc_logging(int enable) {
     gc_logging_enabled = enable;
 }
 
-void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT {
+void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT {
     if (!gc_logging_enabled) {
         return;
     }
-    jl_safe_printf("GC: pause %.2fms. collected %fMB. %s %s\n",
-        pause/1e6, freed/1e6,
+    jl_safe_printf("\nGC: pause %.2fms. collected %fMB. %s %s\n",
+        pause/1e6, freed/(double)(1<<20),
         full ? "full" : "incr",
         recollect ? "recollect" : ""
     );
+
+    jl_safe_printf("Heap stats: bytes_mapped %.2f MB, bytes_resident %.2f MB,\nheap_size %.2f MB, heap_target %.2f MB, Fragmentation %.3f\n",
+        jl_atomic_load_relaxed(&gc_heap_stats.bytes_mapped)/(double)(1<<20),
+        jl_atomic_load_relaxed(&gc_heap_stats.bytes_resident)/(double)(1<<20),
+        // live_bytes/(double)(1<<20), live byes tracking is not accurate.
+        jl_atomic_load_relaxed(&gc_heap_stats.heap_size)/(double)(1<<20),
+        jl_atomic_load_relaxed(&gc_heap_stats.heap_target)/(double)(1<<20),
+        (double)live_bytes/(double)jl_atomic_load_relaxed(&gc_heap_stats.heap_size)
+    );
+    // Should fragmentation use bytes_resident instead of heap_size?
 }
 
 #ifdef __cplusplus
diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp
new file mode 100644
index 0000000000000..1875a22f64ff6
--- /dev/null
+++ b/src/gc-heap-snapshot.cpp
@@ -0,0 +1,537 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "gc-heap-snapshot.h"
+
+#include "julia_internal.h"
+#include "gc.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/DenseMap.h"
+
+#include <vector>
+#include <string>
+#include <sstream>
+
+using std::string;
+using std::ostringstream;
+using std::pair;
+using std::make_pair;
+using llvm::SmallVector;
+using llvm::StringMap;
+using llvm::DenseMap;
+using llvm::StringRef;
+
+// https://stackoverflow.com/a/33799784/751061
+void print_str_escape_json(ios_t *stream, StringRef s)
+{
+    ios_putc('"', stream);
+    for (auto c = s.begin(); c != s.end(); c++) {
+        switch (*c) {
+        case '"':  ios_write(stream, "\\\"", 2); break;
+        case '\\': ios_write(stream, "\\\\", 2); break;
+        case '\b': ios_write(stream, "\\b",  2); break;
+        case '\f': ios_write(stream, "\\f",  2); break;
+        case '\n': ios_write(stream, "\\n",  2); break;
+        case '\r': ios_write(stream, "\\r",  2); break;
+        case '\t': ios_write(stream, "\\t",  2); break;
+        default:
+            if (('\x00' <= *c) & (*c <= '\x1f')) {
+                ios_printf(stream, "\\u%04x", (int)*c);
+            }
+            else {
+                ios_putc(*c, stream);
+            }
+        }
+    }
+    ios_putc('"', stream);
+}
+
+
+// Edges
+// "edge_fields":
+//   [ "type", "name_or_index", "to_node" ]
+// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2598-L2601
+
+struct Edge {
+    size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index.
+    size_t name_or_index; // name of the field (for objects/modules) or index of array
+    size_t to_node;
+};
+
+// Nodes
+// "node_fields":
+//   [ "type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness" ]
+// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2568-L2575
+
+const int k_node_number_of_fields = 7;
+struct Node {
+    size_t type; // index into snapshot->node_types
+    size_t name;
+    size_t id; // This should be a globally-unique counter, but we use the memory address
+    size_t self_size;
+    size_t trace_node_id;  // This is ALWAYS 0 in Javascript heap-snapshots.
+    // whether the from_node is attached or detached from the main application state
+    // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745
+    int detachedness;  // 0 - unknown, 1 - attached, 2 - detached
+    SmallVector<Edge, 0> edges;
+
+    ~Node() JL_NOTSAFEPOINT = default;
+};
+
+struct StringTable {
+    StringMap<size_t> map;
+    SmallVector<StringRef, 0> strings;
+
+    size_t find_or_create_string_id(StringRef key) JL_NOTSAFEPOINT {
+        auto val = map.insert(make_pair(key, map.size()));
+        if (val.second)
+            strings.push_back(val.first->first());
+        return val.first->second;
+    }
+
+    void print_json_array(ios_t *stream, bool newlines) {
+        ios_printf(stream, "[");
+        bool first = true;
+        for (const auto &str : strings) {
+            if (first) {
+                first = false;
+            }
+            else {
+                ios_printf(stream, newlines ? ",\n" : ",");
+            }
+            print_str_escape_json(stream, str);
+        }
+        ios_printf(stream, "]");
+    }
+};
+
+struct HeapSnapshot {
+    SmallVector<Node, 0> nodes;
+    // edges are stored on each from_node
+
+    StringTable names;
+    StringTable node_types;
+    StringTable edge_types;
+    DenseMap<void *, size_t> node_ptr_to_index_map;
+
+    size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes.
+};
+
+// global heap snapshot, mutated by garbage collector
+// when snapshotting is on.
+int gc_heap_snapshot_enabled = 0;
+HeapSnapshot *g_snapshot = nullptr;
+extern jl_mutex_t heapsnapshot_lock;
+
+void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one);
+static inline void _record_gc_edge(const char *edge_type,
+                                   jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT;
+void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT;
+void _add_internal_root(HeapSnapshot *snapshot);
+
+
+JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one)
+{
+    HeapSnapshot snapshot;
+    _add_internal_root(&snapshot);
+
+    jl_mutex_lock(&heapsnapshot_lock);
+
+    // Enable snapshotting
+    g_snapshot = &snapshot;
+    gc_heap_snapshot_enabled = true;
+
+    // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot`
+    jl_gc_collect(JL_GC_FULL);
+
+    // Disable snapshotting
+    gc_heap_snapshot_enabled = false;
+    g_snapshot = nullptr;
+
+    jl_mutex_unlock(&heapsnapshot_lock);
+
+    // When we return, the snapshot is full
+    // Dump the snapshot
+    serialize_heap_snapshot((ios_t*)stream, snapshot, all_one);
+}
+
+// adds a node at id 0 which is the "uber root":
+// a synthetic node which points to all the GC roots.
+void _add_internal_root(HeapSnapshot *snapshot)
+{
+    Node internal_root{
+        snapshot->node_types.find_or_create_string_id("synthetic"),
+        snapshot->names.find_or_create_string_id(""), // name
+        0, // id
+        0, // size
+        0, // size_t trace_node_id (unused)
+        0, // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
+        SmallVector<Edge, 0>() // outgoing edges
+    };
+    snapshot->nodes.push_back(internal_root);
+}
+
+// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597
+// returns the index of the new node
+size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
+{
+    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size()));
+    if (!val.second) {
+        return val.first->second;
+    }
+
+    ios_t str_;
+    bool ios_need_close = 0;
+
+    // Insert a new Node
+    size_t self_size = 0;
+    StringRef name = "<missing>";
+    StringRef node_type = "object";
+
+    jl_datatype_t *type = (jl_datatype_t*)jl_typeof(a);
+
+    if (jl_is_string(a)) {
+        node_type = "String";
+        name = jl_string_data(a);
+        self_size = jl_string_len(a);
+    }
+    else if (jl_is_symbol(a)) {
+        node_type = "jl_sym_t";
+        name = jl_symbol_name((jl_sym_t*)a);
+        self_size = name.size();
+    }
+    else if (jl_is_simplevector(a)) {
+        node_type = "jl_svec_t";
+        name = "SimpleVector";
+        self_size = sizeof(jl_svec_t) + sizeof(void*) * jl_svec_len(a);
+    }
+    else if (jl_is_module(a)) {
+        node_type = "jl_module_t";
+        name = jl_symbol_name_(((_jl_module_t*)a)->name);
+        self_size = sizeof(jl_module_t);
+    }
+    else if (jl_is_task(a)) {
+        node_type = "jl_task_t";
+        name = "Task";
+        self_size = sizeof(jl_task_t);
+    }
+    else if (jl_is_datatype(a)) {
+        ios_need_close = 1;
+        ios_mem(&str_, 0);
+        JL_STREAM* str = (JL_STREAM*)&str_;
+        jl_static_show(str, a);
+        name = StringRef((const char*)str_.buf, str_.size);
+        node_type = "jl_datatype_t";
+        self_size = sizeof(jl_datatype_t);
+    }
+    else if (jl_is_array(a)){
+        ios_need_close = 1;
+        ios_mem(&str_, 0);
+        JL_STREAM* str = (JL_STREAM*)&str_;
+        jl_static_show(str, (jl_value_t*)type);
+        name = StringRef((const char*)str_.buf, str_.size);
+        node_type = "jl_array_t";
+        self_size = sizeof(jl_array_t);
+    }
+    else {
+        self_size = (size_t)jl_datatype_size(type);
+        // print full type into ios buffer and get StringRef to it.
+        // The ios is cleaned up below.
+        ios_need_close = 1;
+        ios_mem(&str_, 0);
+        JL_STREAM* str = (JL_STREAM*)&str_;
+        jl_static_show(str, (jl_value_t*)type);
+
+        name = StringRef((const char*)str_.buf, str_.size);
+    }
+
+    g_snapshot->nodes.push_back(Node{
+        g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type;
+        g_snapshot->names.find_or_create_string_id(name), // size_t name;
+        (size_t)a,     // size_t id;
+        // We add 1 to self-size for the type tag that all heap-allocated objects have.
+        // Also because the Chrome Snapshot viewer ignores size-0 leaves!
+        sizeof(void*) + self_size, // size_t self_size;
+        0,             // size_t trace_node_id (unused)
+        0,             // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
+        SmallVector<Edge, 0>() // outgoing edges
+    });
+
+    if (ios_need_close)
+        ios_close(&str_);
+
+    return val.first->second;
+}
+
+static size_t record_pointer_to_gc_snapshot(void *a, size_t bytes, StringRef name) JL_NOTSAFEPOINT
+{
+    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size()));
+    if (!val.second) {
+        return val.first->second;
+    }
+
+    g_snapshot->nodes.push_back(Node{
+        g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type;
+        g_snapshot->names.find_or_create_string_id(name), // size_t name;
+        (size_t)a,     // size_t id;
+        bytes,         // size_t self_size;
+        0,             // size_t trace_node_id (unused)
+        0,             // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
+        SmallVector<Edge, 0>() // outgoing edges
+    });
+
+    return val.first->second;
+}
+
+static string _fieldpath_for_slot(void *obj, void *slot) JL_NOTSAFEPOINT
+{
+    string res;
+    jl_datatype_t *objtype = (jl_datatype_t*)jl_typeof(obj);
+
+    while (1) {
+        int i = gc_slot_to_fieldidx(obj, slot, objtype);
+
+        if (jl_is_tuple_type(objtype) || jl_is_namedtuple_type(objtype)) {
+            ostringstream ss;
+            ss << "[" << i << "]";
+            res += ss.str();
+        }
+        else {
+            jl_svec_t *field_names = jl_field_names(objtype);
+            jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, i);
+            res += jl_symbol_name(name);
+        }
+
+        if (!jl_field_isptr(objtype, i)) {
+            // Tail recurse
+            res += ".";
+            obj = (void*)((char*)obj + jl_field_offset(objtype, i));
+            objtype = (jl_datatype_t*)jl_field_type_concrete(objtype, i);
+        }
+        else {
+            return res;
+        }
+    }
+}
+
+
+void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT
+{
+    record_node_to_gc_snapshot(root);
+
+    auto &internal_root = g_snapshot->nodes.front();
+    auto to_node_idx = g_snapshot->node_ptr_to_index_map[root];
+    auto edge_label = g_snapshot->names.find_or_create_string_id(name);
+
+    _record_gc_just_edge("internal", internal_root, to_node_idx, edge_label);
+}
+
+// Add a node to the heap snapshot representing a Julia stack frame.
+// Each task points at a stack frame, which points at the stack frame of
+// the function it's currently calling, forming a linked list.
+// Stack frame nodes point at the objects they have as local variables.
+size_t _record_stack_frame_node(HeapSnapshot *snapshot, void *frame) JL_NOTSAFEPOINT
+{
+    auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->nodes.size()));
+    if (!val.second) {
+        return val.first->second;
+    }
+
+    snapshot->nodes.push_back(Node{
+        snapshot->node_types.find_or_create_string_id("synthetic"),
+        snapshot->names.find_or_create_string_id("(stack frame)"), // name
+        (size_t)frame, // id
+        1, // size
+        0, // size_t trace_node_id (unused)
+        0, // int detachedness;  // 0 - unknown,  1 - attached;  2 - detached
+        SmallVector<Edge, 0>() // outgoing edges
+    });
+
+    return val.first->second;
+}
+
+void _gc_heap_snapshot_record_frame_to_object_edge(void *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    auto from_node_idx = _record_stack_frame_node(g_snapshot, (jl_gcframe_t*)from);
+    auto to_idx = record_node_to_gc_snapshot(to);
+    Node &from_node = g_snapshot->nodes[from_node_idx];
+
+    auto name_idx = g_snapshot->names.find_or_create_string_id("local var");
+    _record_gc_just_edge("internal", from_node, to_idx, name_idx);
+}
+
+void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_NOTSAFEPOINT
+{
+    auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)from);
+    auto to_node_idx = _record_stack_frame_node(g_snapshot, to);
+    Node &from_node = g_snapshot->nodes[from_node_idx];
+
+    auto name_idx = g_snapshot->names.find_or_create_string_id("stack");
+    _record_gc_just_edge("internal", from_node, to_node_idx, name_idx);
+}
+
+void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT
+{
+    auto from_node_idx = _record_stack_frame_node(g_snapshot, from);
+    auto to_node_idx = _record_stack_frame_node(g_snapshot, to);
+    Node &from_node = g_snapshot->nodes[from_node_idx];
+
+    auto name_idx = g_snapshot->names.find_or_create_string_id("next frame");
+    _record_gc_just_edge("internal", from_node, to_node_idx, name_idx);
+}
+
+void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT
+{
+    _record_gc_edge("element", from, to, index);
+}
+
+void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void *slot) JL_NOTSAFEPOINT
+{
+    string path = _fieldpath_for_slot(from, slot);
+    _record_gc_edge("property", from, to,
+                    g_snapshot->names.find_or_create_string_id(path));
+}
+
+void _gc_heap_snapshot_record_module_to_binding(jl_module_t *module, jl_binding_t *binding) JL_NOTSAFEPOINT
+{
+    jl_globalref_t *globalref = binding->globalref;
+    jl_sym_t *name = globalref->name;
+    auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)module);
+    auto to_node_idx = record_pointer_to_gc_snapshot(binding, sizeof(jl_binding_t), jl_symbol_name(name));
+
+    jl_value_t *value = jl_atomic_load_relaxed(&binding->value);
+    auto value_idx = value ? record_node_to_gc_snapshot(value) : 0;
+    jl_value_t *ty = jl_atomic_load_relaxed(&binding->ty);
+    auto ty_idx = ty ? record_node_to_gc_snapshot(ty) : 0;
+    auto globalref_idx = record_node_to_gc_snapshot((jl_value_t*)globalref);
+
+    auto &from_node = g_snapshot->nodes[from_node_idx];
+    auto &to_node = g_snapshot->nodes[to_node_idx];
+
+    _record_gc_just_edge("property", from_node, to_node_idx, g_snapshot->names.find_or_create_string_id("<native>"));
+    if (value_idx)     _record_gc_just_edge("internal", to_node, value_idx, g_snapshot->names.find_or_create_string_id("value"));
+    if (ty_idx)        _record_gc_just_edge("internal", to_node, ty_idx, g_snapshot->names.find_or_create_string_id("ty"));
+    if (globalref_idx) _record_gc_just_edge("internal", to_node, globalref_idx, g_snapshot->names.find_or_create_string_id("globalref"));
+}
+
+void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    _record_gc_edge("internal", from, to,
+                    g_snapshot->names.find_or_create_string_id("<internal>"));
+}
+
+void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT
+{
+    size_t name_or_idx = g_snapshot->names.find_or_create_string_id("<native>");
+
+    auto from_node_idx = record_node_to_gc_snapshot(from);
+    const char *alloc_kind;
+    switch (alloc_type)
+    {
+    case 0:
+        alloc_kind = "<malloc>";
+        break;
+    case 1:
+        alloc_kind = "<pooled>";
+        break;
+    case 2:
+        alloc_kind = "<inline>";
+        break;
+    default:
+        alloc_kind = "<undef>";
+        break;
+    }
+    auto to_node_idx = record_pointer_to_gc_snapshot(to, bytes, alloc_kind);
+    auto &from_node = g_snapshot->nodes[from_node_idx];
+
+    _record_gc_just_edge("hidden", from_node, to_node_idx, name_or_idx);
+}
+
+static inline void _record_gc_edge(const char *edge_type, jl_value_t *a,
+                                  jl_value_t *b, size_t name_or_idx) JL_NOTSAFEPOINT
+{
+    auto from_node_idx = record_node_to_gc_snapshot(a);
+    auto to_node_idx = record_node_to_gc_snapshot(b);
+
+    auto &from_node = g_snapshot->nodes[from_node_idx];
+
+    _record_gc_just_edge(edge_type, from_node, to_node_idx, name_or_idx);
+}
+
+void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT
+{
+    from_node.edges.push_back(Edge{
+        g_snapshot->edge_types.find_or_create_string_id(edge_type),
+        name_or_idx, // edge label
+        to_idx // to
+    });
+
+    g_snapshot->num_edges += 1;
+}
+
+void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one)
+{
+    // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567
+    ios_printf(stream, "{\"snapshot\":{");
+    ios_printf(stream, "\"meta\":{");
+    ios_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],");
+    ios_printf(stream, "\"node_types\":[");
+    snapshot.node_types.print_json_array(stream, false);
+    ios_printf(stream, ",");
+    ios_printf(stream, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],");
+    ios_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],");
+    ios_printf(stream, "\"edge_types\":[");
+    snapshot.edge_types.print_json_array(stream, false);
+    ios_printf(stream, ",");
+    ios_printf(stream, "\"string_or_number\",\"from_node\"]");
+    ios_printf(stream, "},\n"); // end "meta"
+    ios_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size());
+    ios_printf(stream, "\"edge_count\":%zu", snapshot.num_edges);
+    ios_printf(stream, "},\n"); // end "snapshot"
+
+    ios_printf(stream, "\"nodes\":[");
+    bool first_node = true;
+    for (const auto &from_node : snapshot.nodes) {
+        if (first_node) {
+            first_node = false;
+        }
+        else {
+            ios_printf(stream, ",");
+        }
+        // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"]
+        ios_printf(stream, "%zu,%zu,%zu,%zu,%zu,%zu,%d\n",
+                            from_node.type,
+                            from_node.name,
+                            from_node.id,
+                            all_one ? (size_t)1 : from_node.self_size,
+                            from_node.edges.size(),
+                            from_node.trace_node_id,
+                            from_node.detachedness);
+    }
+    ios_printf(stream, "],\n");
+
+    ios_printf(stream, "\"edges\":[");
+    bool first_edge = true;
+    for (const auto &from_node : snapshot.nodes) {
+        for (const auto &edge : from_node.edges) {
+            if (first_edge) {
+                first_edge = false;
+            }
+            else {
+                ios_printf(stream, ",");
+            }
+            ios_printf(stream, "%zu,%zu,%zu\n",
+                                edge.type,
+                                edge.name_or_index,
+                                edge.to_node * k_node_number_of_fields);
+        }
+    }
+    ios_printf(stream, "],\n"); // end "edges"
+
+    ios_printf(stream, "\"strings\":");
+
+    snapshot.names.print_json_array(stream, true);
+
+    ios_printf(stream, "}");
+}
diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h
new file mode 100644
index 0000000000000..8c3af5b86bec7
--- /dev/null
+++ b/src/gc-heap-snapshot.h
@@ -0,0 +1,108 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_GC_HEAP_SNAPSHOT_H
+#define JL_GC_HEAP_SNAPSHOT_H
+
+#include "julia.h"
+#include "ios.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+// ---------------------------------------------------------------------
+// Functions to call from GC when heap snapshot is enabled
+// ---------------------------------------------------------------------
+void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_frame_to_object_edge(void *from, jl_value_t *to) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT;
+void _gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_t* binding) JL_NOTSAFEPOINT;
+// Used for objects managed by GC, but which aren't exposed in the julia object, so have no
+// field or index.  i.e. they're not reachable from julia code, but we _will_ hit them in
+// the GC mark phase (so we can check their type tag to get the size).
+void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT;
+// Used for objects manually allocated in C (outside julia GC), to still tell the heap snapshot about the
+// size of the object, even though we're never going to mark that object.
+void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT;
+
+
+extern int gc_heap_snapshot_enabled;
+extern int prev_sweep_full;
+
+int gc_slot_to_fieldidx(void *_obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT;
+int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT;
+
+static inline void gc_heap_snapshot_record_frame_to_object_edge(void *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_frame_to_object_edge(from, to);
+    }
+}
+static inline void gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_task_to_frame_edge(from, to);
+    }
+}
+static inline void gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_frame_to_frame_edge(from, to);
+    }
+}
+static inline void gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_root(root, name);
+    }
+}
+static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t **to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_array_edge(from, *to, gc_slot_to_arrayidx(from, to));
+    }
+}
+static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t **to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_object_edge(from, *to, to);
+    }
+}
+
+static inline void gc_heap_snapshot_record_module_to_binding(jl_module_t* module, jl_binding_t* binding) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_module_to_binding(module, binding);
+    }
+}
+
+static inline void gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_internal_array_edge(from, to);
+    }
+}
+
+static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_hidden_edge(from, to, bytes, alloc_type);
+    }
+}
+
+// ---------------------------------------------------------------------
+// Functions to call from Julia to take heap snapshot
+// ---------------------------------------------------------------------
+JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif  // JL_GC_HEAP_SNAPSHOT_H
diff --git a/src/gc-pages.c b/src/gc-pages.c
index a4ebe0315d71e..696f0831762be 100644
--- a/src/gc-pages.c
+++ b/src/gc-pages.c
@@ -19,7 +19,6 @@ extern "C" {
 #define MIN_BLOCK_PG_ALLOC (1) // 16 KB
 
 static int block_pg_cnt = DEFAULT_BLOCK_PG_ALLOC;
-static size_t current_pg_count = 0;
 
 void jl_gc_init_page(void)
 {
@@ -33,7 +32,7 @@ void jl_gc_init_page(void)
 
 // Try to allocate a memory block for multiple pages
 // Return `NULL` if allocation failed. Result is aligned to `GC_PAGE_SZ`.
-static char *jl_gc_try_alloc_pages(int pg_cnt) JL_NOTSAFEPOINT
+char *jl_gc_try_alloc_pages_(int pg_cnt) JL_NOTSAFEPOINT
 {
     size_t pages_sz = GC_PAGE_SZ * pg_cnt;
 #ifdef _OS_WINDOWS_
@@ -53,6 +52,8 @@ static char *jl_gc_try_alloc_pages(int pg_cnt) JL_NOTSAFEPOINT
         // round data pointer up to the nearest gc_page_data-aligned
         // boundary if mmap didn't already do so.
         mem = (char*)gc_page_data(mem + GC_PAGE_SZ - 1);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mapped, pages_sz);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, pages_sz);
     return mem;
 }
 
@@ -63,13 +64,12 @@ static char *jl_gc_try_alloc_pages(int pg_cnt) JL_NOTSAFEPOINT
 // smaller `MIN_BLOCK_PG_ALLOC` a `jl_memory_exception` is thrown.
 // Assumes `gc_perm_lock` is acquired, the lock is released before the
 // exception is thrown.
-static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT
+char *jl_gc_try_alloc_pages(void) JL_NOTSAFEPOINT
 {
-    // try to allocate a large block of memory (or a small one)
-    unsigned pg, pg_cnt = block_pg_cnt;
+    unsigned pg_cnt = block_pg_cnt;
     char *mem = NULL;
     while (1) {
-        if (__likely((mem = jl_gc_try_alloc_pages(pg_cnt))))
+        if (__likely((mem = jl_gc_try_alloc_pages_(pg_cnt))))
             break;
         size_t min_block_pg_alloc = MIN_BLOCK_PG_ALLOC;
         if (GC_PAGE_SZ * min_block_pg_alloc < jl_page_size)
@@ -86,204 +86,79 @@ static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT
             jl_throw(jl_memory_exception);
         }
     }
-
-    // now need to insert these pages into the pagetable metadata
-    // if any allocation fails, this just stops recording more pages from that point
-    // and will free (munmap) the remainder
-    jl_gc_pagemeta_t *page_meta =
-        (jl_gc_pagemeta_t*)jl_gc_perm_alloc_nolock(pg_cnt * sizeof(jl_gc_pagemeta_t), 1,
-                                                   sizeof(void*), 0);
-    pg = 0;
-    if (page_meta) {
-        for (; pg < pg_cnt; pg++) {
-            struct jl_gc_metadata_ext info;
-            uint32_t msk;
-            unsigned i;
-            pagetable1_t **ppagetable1;
-            pagetable0_t **ppagetable0;
-            jl_gc_pagemeta_t **pmeta;
-
-            char *ptr = mem + (GC_PAGE_SZ * pg);
-            page_meta[pg].data = ptr;
-
-            // create & store the level 2 / outermost info
-            i = REGION_INDEX(ptr);
-            info.pagetable_i = i % 32;
-            info.pagetable_i32 = i / 32;
-            msk = (1u << info.pagetable_i);
-            if ((memory_map.freemap1[info.pagetable_i32] & msk) == 0)
-                memory_map.freemap1[info.pagetable_i32] |= msk; // has free
-            info.pagetable1 = *(ppagetable1 = &memory_map.meta1[i]);
-            if (!info.pagetable1) {
-                info.pagetable1 = (pagetable1_t*)jl_gc_perm_alloc_nolock(sizeof(pagetable1_t), 1,
-                                                                         sizeof(void*), 0);
-                *ppagetable1 = info.pagetable1;
-                if (!info.pagetable1)
-                    break;
-            }
-
-            // create & store the level 1 info
-            i = REGION1_INDEX(ptr);
-            info.pagetable1_i = i % 32;
-            info.pagetable1_i32 = i / 32;
-            msk = (1u << info.pagetable1_i);
-            if ((info.pagetable1->freemap0[info.pagetable1_i32] & msk) == 0)
-                info.pagetable1->freemap0[info.pagetable1_i32] |= msk; // has free
-            info.pagetable0 = *(ppagetable0 = &info.pagetable1->meta0[i]);
-            if (!info.pagetable0) {
-                info.pagetable0 = (pagetable0_t*)jl_gc_perm_alloc_nolock(sizeof(pagetable0_t), 1,
-                                                                         sizeof(void*), 0);
-                *ppagetable0 = info.pagetable0;
-                if (!info.pagetable0)
-                    break;
-            }
-
-            // create & store the level 0 / page info
-            i = REGION0_INDEX(ptr);
-            info.pagetable0_i = i % 32;
-            info.pagetable0_i32 = i / 32;
-            msk = (1u << info.pagetable0_i);
-            info.pagetable0->freemap[info.pagetable0_i32] |= msk; // is free
-            pmeta = &info.pagetable0->meta[i];
-            info.meta = (*pmeta = &page_meta[pg]);
-        }
-    }
-
-    if (pg < pg_cnt) {
-#ifndef _OS_WINDOWS_
-        // Trim the allocation to only cover the region
-        // that we successfully created the metadata for.
-        // This is not supported by the Windows kernel,
-        // so we have to just skip it there and just lose these virtual addresses.
-        munmap(mem + LLT_ALIGN(GC_PAGE_SZ * pg, jl_page_size),
-               GC_PAGE_SZ * pg_cnt - LLT_ALIGN(GC_PAGE_SZ * pg, jl_page_size));
-#endif
-        if (pg == 0) {
-            uv_mutex_unlock(&gc_perm_lock);
-            jl_throw(jl_memory_exception);
-        }
-    }
-    return page_meta;
+    return mem;
 }
 
 // get a new page, either from the freemap
 // or from the kernel if none are available
 NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
 {
-    struct jl_gc_metadata_ext info;
-    uv_mutex_lock(&gc_perm_lock);
-
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
 #endif
-    // scan over memory_map page-table for existing allocated but unused pages
-    for (info.pagetable_i32 = memory_map.lb; info.pagetable_i32 < (REGION2_PG_COUNT + 31) / 32; info.pagetable_i32++) {
-        uint32_t freemap1 = memory_map.freemap1[info.pagetable_i32];
-        for (info.pagetable_i = 0; freemap1; info.pagetable_i++, freemap1 >>= 1) {
-            unsigned next = ffs_u32(freemap1);
-            info.pagetable_i += next;
-            freemap1 >>= next;
-            info.pagetable1 = memory_map.meta1[info.pagetable_i + info.pagetable_i32 * 32];
-            // repeat over page-table level 1
-            for (info.pagetable1_i32 = info.pagetable1->lb; info.pagetable1_i32 < REGION1_PG_COUNT / 32; info.pagetable1_i32++) {
-                uint32_t freemap0 = info.pagetable1->freemap0[info.pagetable1_i32];
-                for (info.pagetable1_i = 0; freemap0; info.pagetable1_i++, freemap0 >>= 1) {
-                    unsigned next = ffs_u32(freemap0);
-                    info.pagetable1_i += next;
-                    freemap0 >>= next;
-                    info.pagetable0 = info.pagetable1->meta0[info.pagetable1_i + info.pagetable1_i32 * 32];
-                    // repeat over page-table level 0
-                    for (info.pagetable0_i32 = info.pagetable0->lb; info.pagetable0_i32 < REGION0_PG_COUNT / 32; info.pagetable0_i32++) {
-                        uint32_t freemap = info.pagetable0->freemap[info.pagetable0_i32];
-                        if (freemap) {
-                            info.pagetable0_i = ffs_u32(freemap);
-                            info.meta = info.pagetable0->meta[info.pagetable0_i + info.pagetable0_i32 * 32];
-                            assert(info.meta->data);
-                            // new pages available starting at min of lb and pagetable_i32
-                            if (memory_map.lb < info.pagetable_i32)
-                                memory_map.lb = info.pagetable_i32;
-                            if (info.pagetable1->lb < info.pagetable1_i32)
-                                info.pagetable1->lb = info.pagetable1_i32;
-                            if (info.pagetable0->lb < info.pagetable0_i32)
-                                info.pagetable0->lb = info.pagetable0_i32;
-                            goto have_free_page; // break out of all of these loops
-                        }
-                    }
-                    info.pagetable1->freemap0[info.pagetable1_i32] &= ~(uint32_t)(1u << info.pagetable1_i); // record that this was full
-                }
-            }
-            memory_map.freemap1[info.pagetable_i32] &= ~(uint32_t)(1u << info.pagetable_i); // record that this was full
-        }
+    jl_gc_pagemeta_t *meta = NULL;
+
+    // try to get page from `pool_lazily_freed`
+    meta = pop_lf_back(&global_page_pool_lazily_freed);
+    if (meta != NULL) {
+        gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
+        // page is already mapped
+        return meta;
     }
 
-    // no existing pages found, allocate a new one
-    {
-        jl_gc_pagemeta_t *meta = jl_gc_alloc_new_page();
-        info = page_metadata_ext(meta->data);
-        assert(meta == info.meta);
-        // new pages are now available starting at max of lb and pagetable_i32
-        if (memory_map.lb > info.pagetable_i32)
-            memory_map.lb = info.pagetable_i32;
-        if (info.pagetable1->lb > info.pagetable1_i32)
-            info.pagetable1->lb = info.pagetable1_i32;
-        if (info.pagetable0->lb > info.pagetable0_i32)
-            info.pagetable0->lb = info.pagetable0_i32;
+    // try to get page from `pool_clean`
+    meta = pop_lf_back(&global_page_pool_clean);
+    if (meta != NULL) {
+        gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
+        goto exit;
     }
 
-have_free_page:
-    // in-use pages are now ending at min of ub and pagetable_i32
-    if (memory_map.ub < info.pagetable_i32)
-        memory_map.ub = info.pagetable_i32;
-    if (info.pagetable1->ub < info.pagetable1_i32)
-        info.pagetable1->ub = info.pagetable1_i32;
-    if (info.pagetable0->ub < info.pagetable0_i32)
-        info.pagetable0->ub = info.pagetable0_i32;
-
-    // mark this entry as in-use and not free
-    info.pagetable0->freemap[info.pagetable0_i32] &= ~(uint32_t)(1u << info.pagetable0_i);
-    info.pagetable0->allocmap[info.pagetable0_i32] |= (uint32_t)(1u << info.pagetable0_i);
-    info.pagetable1->allocmap0[info.pagetable1_i32] |= (uint32_t)(1u << info.pagetable1_i);
-    memory_map.allocmap1[info.pagetable_i32] |= (uint32_t)(1u << info.pagetable_i);
+    // try to get page from `pool_freed`
+    meta = pop_lf_back(&global_page_pool_freed);
+    if (meta != NULL) {
+        jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, GC_PAGE_SZ);
+        gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
+        goto exit;
+    }
 
+    uv_mutex_lock(&gc_perm_lock);
+    // another thread may have allocated a large block while we were waiting...
+    meta = pop_lf_back(&global_page_pool_clean);
+    if (meta != NULL) {
+        uv_mutex_unlock(&gc_perm_lock);
+        gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
+        goto exit;
+    }
+    // must map a new set of pages
+    char *data = jl_gc_try_alloc_pages();
+    meta = (jl_gc_pagemeta_t*)malloc_s(block_pg_cnt * sizeof(jl_gc_pagemeta_t));
+    for (int i = 0; i < block_pg_cnt; i++) {
+        jl_gc_pagemeta_t *pg = &meta[i];
+        pg->data = data + GC_PAGE_SZ * i;
+        gc_alloc_map_maybe_create(pg->data);
+        if (i == 0) {
+            gc_alloc_map_set(pg->data, GC_PAGE_ALLOCATED);
+        }
+        else {
+            push_lf_back(&global_page_pool_clean, pg);
+        }
+    }
+    uv_mutex_unlock(&gc_perm_lock);
+exit:
 #ifdef _OS_WINDOWS_
-    VirtualAlloc(info.meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE);
-#endif
-#ifdef _OS_WINDOWS_
+    VirtualAlloc(meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE);
     SetLastError(last_error);
 #endif
     errno = last_errno;
-    current_pg_count++;
-    gc_final_count_page(current_pg_count);
-    uv_mutex_unlock(&gc_perm_lock);
-    return info.meta;
+    return meta;
 }
 
 // return a page to the freemap allocator
-void jl_gc_free_page(void *p) JL_NOTSAFEPOINT
+void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
 {
-    // update the allocmap and freemap to indicate this contains a free entry
-    struct jl_gc_metadata_ext info = page_metadata_ext(p);
-    uint32_t msk;
-    msk = (uint32_t)(1u << info.pagetable0_i);
-    assert(!(info.pagetable0->freemap[info.pagetable0_i32] & msk));
-    assert(info.pagetable0->allocmap[info.pagetable0_i32] & msk);
-    info.pagetable0->allocmap[info.pagetable0_i32] &= ~msk;
-    info.pagetable0->freemap[info.pagetable0_i32] |= msk;
-
-    msk = (uint32_t)(1u << info.pagetable1_i);
-    assert(info.pagetable1->allocmap0[info.pagetable1_i32] & msk);
-    if ((info.pagetable1->freemap0[info.pagetable1_i32] & msk) == 0)
-        info.pagetable1->freemap0[info.pagetable1_i32] |= msk;
-
-    msk = (uint32_t)(1u << info.pagetable_i);
-    assert(memory_map.allocmap1[info.pagetable_i32] & msk);
-    if ((memory_map.freemap1[info.pagetable_i32] & msk) == 0)
-        memory_map.freemap1[info.pagetable_i32] |= msk;
-
-    free(info.meta->ages);
-    info.meta->ages = NULL;
-
+    void *p = pg->data;
+    gc_alloc_map_set((char*)p, GC_PAGE_FREED);
     // tell the OS we don't need these pages right now
     size_t decommit_size = GC_PAGE_SZ;
     if (GC_PAGE_SZ < jl_page_size) {
@@ -293,28 +168,30 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT
         void *otherp = (void*)((uintptr_t)p & ~(jl_page_size - 1)); // round down to the nearest physical page
         p = otherp;
         while (n_pages--) {
-            struct jl_gc_metadata_ext info = page_metadata_ext(otherp);
-            msk = (uint32_t)(1u << info.pagetable0_i);
-            if (info.pagetable0->allocmap[info.pagetable0_i32] & msk)
-                goto no_decommit;
+            if (gc_alloc_map_is_set((char*)otherp)) {
+                return;
+            }
             otherp = (void*)((char*)otherp + GC_PAGE_SZ);
         }
     }
 #ifdef _OS_WINDOWS_
     VirtualFree(p, decommit_size, MEM_DECOMMIT);
+#elif defined(MADV_FREE)
+    static int supports_madv_free = 1;
+    if (supports_madv_free) {
+        if (madvise(p, decommit_size, MADV_FREE) == -1) {
+            assert(errno == EINVAL);
+            supports_madv_free = 0;
+        }
+    }
+    if (!supports_madv_free) {
+        madvise(p, decommit_size, MADV_DONTNEED);
+    }
 #else
     madvise(p, decommit_size, MADV_DONTNEED);
 #endif
-
-no_decommit:
-    // new pages are now available starting at max of lb and pagetable_i32
-    if (memory_map.lb > info.pagetable_i32)
-        memory_map.lb = info.pagetable_i32;
-    if (info.pagetable1->lb > info.pagetable1_i32)
-        info.pagetable1->lb = info.pagetable1_i32;
-    if (info.pagetable0->lb > info.pagetable0_i32)
-        info.pagetable0->lb = info.pagetable0_i32;
-    current_pg_count--;
+    msan_unpoison(p, decommit_size);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, -decommit_size);
 }
 
 #ifdef __cplusplus
diff --git a/src/gc-stacks.c b/src/gc-stacks.c
index b7adf254026ca..0318162289f11 100644
--- a/src/gc-stacks.c
+++ b/src/gc-stacks.c
@@ -73,6 +73,10 @@ static void free_stack(void *stkbuf, size_t bufsz)
 }
 #endif
 
+JL_DLLEXPORT uint32_t jl_get_num_stack_mappings(void)
+{
+    return jl_atomic_load_relaxed(&num_stack_mappings);
+}
 
 const unsigned pool_sizes[] = {
     128 * 1024,
@@ -106,10 +110,13 @@ static unsigned select_pool(size_t nb) JL_NOTSAFEPOINT
 
 static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
 {
+#ifdef _COMPILER_ASAN_ENABLED_
+    __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
+#endif
     if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(bufsz);
         if (pool_sizes[pool_id] == bufsz) {
-            arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
+            small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
             return;
         }
     }
@@ -135,7 +142,10 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
         unsigned pool_id = select_pool(bufsz);
         if (pool_sizes[pool_id] == bufsz) {
             task->stkbuf = NULL;
-            arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
+#ifdef _COMPILER_ASAN_ENABLED_
+            __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
+#endif
+            small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
         }
     }
 }
@@ -150,18 +160,20 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
     if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(ssize);
         ssize = pool_sizes[pool_id];
-        arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
+        small_arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
         if (pool->len > 0) {
-            stk = arraylist_pop(pool);
+            stk = small_arraylist_pop(pool);
         }
     }
     else {
         ssize = LLT_ALIGN(ssize, jl_page_size);
     }
     if (stk == NULL) {
-        if (jl_atomic_load_relaxed(&num_stack_mappings) >= MAX_STACK_MAPPINGS)
+        if (jl_atomic_load_relaxed(&num_stack_mappings) >= MAX_STACK_MAPPINGS) {
             // we accept that this can go over by as much as nthreads since it's not a CAS
+            errno = ENOMEM;
             return NULL;
+        }
         // TODO: allocate blocks of stacks? but need to mprotect individually anyways
         stk = malloc_stack(ssize);
         if (stk == MAP_FAILED)
@@ -169,8 +181,8 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
     }
     *bufsz = ssize;
     if (owner) {
-        arraylist_t *live_tasks = &ptls->heap.live_tasks;
-        arraylist_push(live_tasks, owner);
+        small_arraylist_t *live_tasks = &ptls->heap.live_tasks;
+        mtarraylist_push(live_tasks, owner);
     }
     return stk;
 }
@@ -188,14 +200,18 @@ void sweep_stack_pools(void)
     //            bufsz = t->bufsz
     //            if (stkbuf)
     //                push(free_stacks[sz], stkbuf)
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
 
         // free half of stacks that remain unused since last sweep
         for (int p = 0; p < JL_N_STACK_POOLS; p++) {
-            arraylist_t *al = &ptls2->heap.free_stacks[p];
+            small_arraylist_t *al = &ptls2->heap.free_stacks[p];
             size_t n_to_free;
-            if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
+            if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
+                n_to_free = al->len; // not alive yet or dead, so it does not need these anymore
+            }
+            else if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
                 n_to_free = al->len / 2;
                 if (n_to_free > (al->len - MIN_STACK_MAPPINGS_PER_POOL))
                     n_to_free = al->len - MIN_STACK_MAPPINGS_PER_POOL;
@@ -204,12 +220,12 @@ void sweep_stack_pools(void)
                 n_to_free = 0;
             }
             for (int n = 0; n < n_to_free; n++) {
-                void *stk = arraylist_pop(al);
+                void *stk = small_arraylist_pop(al);
                 free_stack(stk, pool_sizes[p]);
             }
         }
 
-        arraylist_t *live_tasks = &ptls2->heap.live_tasks;
+        small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
         size_t n = 0;
         size_t ndel = 0;
         size_t l = live_tasks->len;
@@ -252,24 +268,52 @@ void sweep_stack_pools(void)
 
 JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
 {
-    jl_task_t *ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    arraylist_t *live_tasks = &ptls->heap.live_tasks;
-    size_t i, j, l;
-    jl_array_t *a;
-    do {
-        l = live_tasks->len;
-        a = jl_alloc_vec_any(l + 1); // may gc, changing the number of tasks
-    } while (l + 1 < live_tasks->len);
-    l = live_tasks->len;
-    void **lst = live_tasks->items;
-    j = 0;
-    ((void**)jl_array_data(a))[j++] = ptls->root_task;
-    for (i = 0; i < l; i++) {
-        if (((jl_task_t*)lst[i])->stkbuf != NULL)
-            ((void**)jl_array_data(a))[j++] = lst[i];
+    size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
+    size_t l = 0; // l is not reset on restart, so we keep getting more aggressive at making a big enough list everything it fails
+restart:
+    for (size_t i = 0; i < nthreads; i++) {
+        // skip GC threads since they don't have tasks
+        if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
+            continue;
+        }
+        jl_ptls_t ptls2 = allstates[i];
+        if (ptls2 == NULL)
+            continue;
+        small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
+        size_t n = mtarraylist_length(live_tasks);
+        l += n + (ptls2->root_task->stkbuf != NULL);
+    }
+    l += l / 20; // add 5% for margin of estimation error
+    jl_array_t *a = jl_alloc_vec_any(l); // may gc, changing the number of tasks and forcing us to reload everything
+    nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
+    size_t j = 0;
+    for (size_t i = 0; i < nthreads; i++) {
+        // skip GC threads since they don't have tasks
+        if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
+            continue;
+        }
+        jl_ptls_t ptls2 = allstates[i];
+        if (ptls2 == NULL)
+            continue;
+        jl_task_t *t = ptls2->root_task;
+        if (t->stkbuf != NULL) {
+            if (j == l)
+                goto restart;
+            jl_array_data(a,void*)[j++] = t;
+        }
+        small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
+        size_t n = mtarraylist_length(live_tasks);
+        for (size_t i = 0; i < n; i++) {
+            jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);
+            if (t->stkbuf != NULL) {
+                if (j == l)
+                    goto restart;
+                jl_array_data(a,void*)[j++] = t;
+            }
+        }
     }
-    l = jl_array_len(a);
     if (j < l) {
         JL_GC_PUSH1(&a);
         jl_array_del_end(a, l - j);
diff --git a/src/gc.c b/src/gc.c
index 4221cb8e83f15..04004f267d01c 100644
--- a/src/gc.c
+++ b/src/gc.c
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include "gc.h"
+#include "julia.h"
 #include "julia_gcext.h"
 #include "julia_assert.h"
 #ifdef __GLIBC__
@@ -11,6 +12,28 @@
 extern "C" {
 #endif
 
+// Number of GC threads that may run parallel marking
+int jl_n_markthreads;
+// Number of GC threads that may run concurrent sweeping (0 or 1)
+int jl_n_sweepthreads;
+// Number of threads currently running the GC mark-loop
+_Atomic(int) gc_n_threads_marking;
+// Number of threads sweeping
+_Atomic(int) gc_n_threads_sweeping;
+// Temporary for the `ptls->page_metadata_allocd` used during parallel sweeping
+_Atomic(jl_gc_page_stack_t *) gc_allocd_scratch;
+// `tid` of mutator thread that triggered GC
+_Atomic(int) gc_master_tid;
+// `tid` of first GC thread
+int gc_first_tid;
+// Mutex/cond used to synchronize wakeup of GC threads on parallel marking
+uv_mutex_t gc_threads_lock;
+uv_cond_t gc_threads_cond;
+// To indicate whether concurrent sweeping should run
+uv_sem_t gc_sweep_assists_needed;
+// Mutex used to coordinate entry of GC threads in the mark loop
+uv_mutex_t gc_queue_observer_lock;
+
 // Linked list of callback functions
 
 typedef void (*jl_gc_cb_func_t)(void);
@@ -26,6 +49,8 @@ static jl_gc_callback_list_t *gc_cblist_pre_gc;
 static jl_gc_callback_list_t *gc_cblist_post_gc;
 static jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
 static jl_gc_callback_list_t *gc_cblist_notify_external_free;
+static jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
+typedef void (*jl_gc_cb_notify_gc_pressure_t)(void);
 
 #define gc_invoke_callbacks(ty, list, args) \
     do { \
@@ -112,15 +137,12 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_fre
         jl_gc_deregister_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
 }
 
-// Save/restore local mark stack to/from thread-local storage.
-
-STATIC_INLINE void export_gc_state(jl_ptls_t ptls, jl_gc_mark_sp_t *sp) {
-    ptls->gc_mark_sp = *sp;
-}
-
-STATIC_INLINE void import_gc_state(jl_ptls_t ptls, jl_gc_mark_sp_t *sp) {
-    // Has the stack been reallocated in the meantime?
-    *sp = ptls->gc_mark_sp;
+JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
 }
 
 // Protect all access to `finalizer_list_marked` and `to_finalize`.
@@ -130,6 +152,9 @@ STATIC_INLINE void import_gc_state(jl_ptls_t ptls, jl_gc_mark_sp_t *sp) {
 static jl_mutex_t finalizers_lock;
 static uv_mutex_t gc_cache_lock;
 
+// mutex for gc-heap-snapshot.
+jl_mutex_t heapsnapshot_lock;
+
 // Flag that tells us whether we need to support conservative marking
 // of objects.
 static _Atomic(int) support_conservative_marking = 0;
@@ -138,18 +163,18 @@ static _Atomic(int) support_conservative_marking = 0;
  * Note about GC synchronization:
  *
  * When entering `jl_gc_collect()`, `jl_gc_running` is atomically changed from
- * `0` to `1` to make sure that only one thread can be running the GC. Other
- * threads that enters `jl_gc_collect()` at the same time (or later calling
+ * `0` to `1` to make sure that only one thread can be running `_jl_gc_collect`. Other
+ * mutator threads that enters `jl_gc_collect()` at the same time (or later calling
  * from unmanaged code) will wait in `jl_gc_collect()` until the GC is finished.
  *
- * Before starting the mark phase the GC thread calls `jl_safepoint_gc_start()`
+ * Before starting the mark phase the GC thread calls `jl_safepoint_start_gc()`
  * and `jl_gc_wait_for_the_world()`
  * to make sure all the thread are in a safe state for the GC. The function
  * activates the safepoint and wait for all the threads to get ready for the
  * GC (`gc_state != 0`). It also acquires the `finalizers` lock so that no
  * other thread will access them when the GC is running.
  *
- * During the mark and sweep phase of the GC, the threads that are not running
+ * During the mark and sweep phase of the GC, the mutator threads that are not running
  * the GC should either be running unmanaged code (or code section that does
  * not have a GC critical region mainly including storing to the stack or
  * another object) or paused at a safepoint and wait for the GC to finish.
@@ -168,44 +193,36 @@ static _Atomic(int) support_conservative_marking = 0;
 
 jl_gc_num_t gc_num = {0};
 static size_t last_long_collect_interval;
-
-pagetable_t memory_map;
+int gc_n_threads;
+jl_ptls_t* gc_all_tls_states;
+gc_heapstatus_t gc_heap_stats = {0};
+int next_sweep_full = 0;
+const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00
+JL_DLLEXPORT uintptr_t jl_get_buff_tag(void)
+{
+    return jl_buff_tag;
+}
 
 // List of marked big objects.  Not per-thread.  Accessed only by master thread.
 bigval_t *big_objects_marked = NULL;
 
-// finalization
+// -- Finalization --
 // `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
-// If an object pointer has the lowest bit set, the next pointer is an unboxed
-// c function pointer.
+// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
+// If an object pointer has the second lowest bit set, the current pointer is a c object pointer.
+//   It must be aligned at least 4, and it finalized immediately (at "quiescence").
 // `to_finalize` should not have tagged pointers.
 arraylist_t finalizer_list_marked;
 arraylist_t to_finalize;
 JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
 
+
 NOINLINE uintptr_t gc_get_stack_ptr(void)
 {
     return (uintptr_t)jl_get_frame_addr();
 }
 
-#define should_timeout() 0
-
-static void jl_gc_wait_for_the_world(void)
-{
-    if (jl_n_threads > 1)
-        jl_wake_libuv();
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
-        // This acquire load pairs with the release stores
-        // in the signal handler of safepoint so we are sure that
-        // all the stores on those threads are visible.
-        // We're currently also using atomic store release in mutator threads
-        // (in jl_gc_state_set), but we may want to use signals to flush the
-        // memory operations on those threads lazily instead.
-        while (!jl_atomic_load_relaxed(&ptls2->gc_state) || !jl_atomic_load_acquire(&ptls2->gc_state))
-            jl_cpu_pause(); // yield?
-    }
-}
+void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads);
 
 // malloc wrappers, aligned allocation
 
@@ -267,17 +284,18 @@ static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
     jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1);
 }
 
-static void run_finalizer(jl_task_t *ct, jl_value_t *o, jl_value_t *ff)
+static void run_finalizer(jl_task_t *ct, void *o, void *ff)
 {
-    if (gc_ptr_tag(o, 1)) {
-        ((void (*)(void*))ff)(gc_ptr_clear_tag(o, 1));
+    int ptr_finalizer = gc_ptr_tag(o, 1);
+    o = gc_ptr_clear_tag(o, 3);
+    if (ptr_finalizer) {
+        ((void (*)(void*))ff)((void*)o);
         return;
     }
-    jl_value_t *args[2] = {ff,o};
     JL_TRY {
         size_t last_age = ct->world_age;
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-        jl_apply(args, 2);
+        jl_apply_generic((jl_value_t*)ff, (jl_value_t**)&o, 1);
         ct->world_age = last_age;
     }
     JL_CATCH {
@@ -342,7 +360,7 @@ static void finalize_object(arraylist_t *list, jl_value_t *o,
 
 // The first two entries are assumed to be empty and the rest are assumed to
 // be pointers to `jl_value_t` objects
-static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list)
+static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT
 {
     void **items = list->items;
     items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2);
@@ -353,7 +371,7 @@ static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list)
 // Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
 // to be hold for the current thread and will release the lock when the
 // function returns.
-static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list)
+static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT_LEAVE
 {
     // Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring
     // of `finalizer`) in a finalizer:
@@ -362,7 +380,7 @@ static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list)
     arraylist_push(list, list->items[0]);
     arraylist_push(list, list->items[1]);
     jl_gc_push_arraylist(ct, list);
-    jl_value_t **items = (jl_value_t**)list->items;
+    void **items = list->items;
     size_t len = list->len;
     JL_UNLOCK_NOGC(&finalizers_lock);
     // run finalizers in reverse order they were added, so lower-level finalizers run last
@@ -375,7 +393,16 @@ static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list)
     ct->sticky = sticky;
 }
 
-static void run_finalizers(jl_task_t *ct)
+static uint64_t finalizer_rngState[JL_RNG_SIZE];
+
+void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void)
+{
+    jl_rng_split(finalizer_rngState, jl_current_task->rngState);
+}
+
+static void run_finalizers(jl_task_t *ct, int finalizers_thread)
 {
     // Racy fast path:
     // The race here should be OK since the race can only happen if
@@ -396,9 +423,19 @@ static void run_finalizers(jl_task_t *ct)
     }
     jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0);
     arraylist_new(&to_finalize, 0);
+
+    uint64_t save_rngState[JL_RNG_SIZE];
+    memcpy(&save_rngState[0], &ct->rngState[0], sizeof(save_rngState));
+    jl_rng_split(ct->rngState, finalizer_rngState);
+
     // This releases the finalizers lock.
+    int8_t was_in_finalizer = ct->ptls->in_finalizer;
+    ct->ptls->in_finalizer = !finalizers_thread;
     jl_gc_run_finalizers_in_list(ct, &copied_list);
+    ct->ptls->in_finalizer = was_in_finalizer;
     arraylist_free(&copied_list);
+
+    memcpy(&ct->rngState[0], &save_rngState[0], sizeof(save_rngState));
 }
 
 JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
@@ -407,9 +444,7 @@ JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
         ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
     if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) {
-        ptls->in_finalizer = 1;
-        run_finalizers(ct);
-        ptls->in_finalizer = 0;
+        run_finalizers(ct, 0);
     }
 }
 
@@ -464,6 +499,11 @@ JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
     }
 }
 
+JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void)
+{
+    return jl_current_task->ptls->in_finalizer;
+}
+
 static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
 {
     void **items = flist->items;
@@ -480,12 +520,22 @@ static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
 
 void jl_gc_run_all_finalizers(jl_task_t *ct)
 {
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    // this is called from `jl_atexit_hook`; threads could still be running
+    // so we have to guard the finalizers' lists
+    JL_LOCK_NOGC(&finalizers_lock);
     schedule_all_finalizers(&finalizer_list_marked);
-    for (int i = 0;i < jl_n_threads;i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
-        schedule_all_finalizers(&ptls2->finalizers);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            schedule_all_finalizers(&ptls2->finalizers);
     }
-    run_finalizers(ct);
+    // unlock here because `run_finalizers` locks this
+    JL_UNLOCK_NOGC(&finalizers_lock);
+    run_finalizers(ct, 1);
 }
 
 void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
@@ -521,9 +571,16 @@ JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f
     jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
 }
 
+// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
+JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT
+{
+    assert(!gc_ptr_tag(v, 3));
+    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f);
+}
+
 JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
 {
-    if (__unlikely(jl_typeis(f, jl_voidpointer_type))) {
+    if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) {
         jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
     }
     else {
@@ -541,9 +598,14 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
     arraylist_new(&copied_list, 0);
     // No need to check the to_finalize list since the user is apparently
     // still holding a reference to the object
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
-        finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
     }
     finalize_object(&finalizer_list_marked, o, &copied_list, 0);
     if (copied_list.len > 0) {
@@ -577,9 +639,11 @@ static void gc_sweep_foreign_objs_in_list(arraylist_t *objs)
 
 static void gc_sweep_foreign_objs(void)
 {
-    for (int i = 0;i < jl_n_threads; i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
-        gc_sweep_foreign_objs_in_list(&ptls2->sweep_objs);
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            gc_sweep_foreign_objs_in_list(&ptls2->sweep_objs);
     }
 }
 
@@ -590,21 +654,31 @@ static int64_t last_gc_total_bytes = 0;
 // under this limit, but we will go above it rather than halting.
 #ifdef _P64
 typedef uint64_t memsize_t;
-#define default_collect_interval (5600*1024*sizeof(void*))
-static size_t max_collect_interval = 1250000000UL;
-// Eventually we can expose this to the user/ci.
-memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
+static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*);
+static size_t total_mem;
+// We expose this to the user/ci as jl_gc_set_max_memory
+static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
 #else
 typedef uint32_t memsize_t;
-#define default_collect_interval (3200*1024*sizeof(void*))
-static size_t max_collect_interval =  500000000UL;
+static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*);
 // Work really hard to stay within 2GB
 // Alternative is to risk running out of address space
 // on 32 bit architectures.
-memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024;
+#define MAX32HEAP 1536 * 1024 * 1024
+static memsize_t max_total_memory = (memsize_t) MAX32HEAP;
 #endif
-
+// heuristic stuff for https://dl.acm.org/doi/10.1145/3563323
+// start with values that are in the target ranges to reduce transient hiccups at startup
+static uint64_t old_pause_time = 1e7; // 10 ms
+static uint64_t old_mut_time = 1e9; // 1 second
+static uint64_t old_heap_size = 0;
+static uint64_t old_alloc_diff = default_collect_interval;
+static uint64_t old_freed_diff = default_collect_interval;
+static uint64_t gc_end_time = 0;
+static int thrash_counter = 0;
+static int thrashing = 0;
 // global variables for GC stats
+static uint64_t freed_in_runtime = 0;
 
 // Resetting the object to a young object, this is used when marking the
 // finalizer list to collect them the next time because the object is very
@@ -619,7 +693,7 @@ static int mark_reset_age = 0;
  *
  * <-[(quick)sweep]-
  *                 |
- *     ---->  GC_OLD  <--[(quick)sweep && age>promotion]--
+ *     ---->  GC_OLD  <--[(quick)sweep]-------------------
  *     |     |                                           |
  *     |     |  GC_MARKED (in remset)                    |
  *     |     |     ^            |                        |
@@ -636,9 +710,9 @@ static int mark_reset_age = 0;
  *  ========= above this line objects are old =========  |
  *                                                       |
  *  ----[new]------> GC_CLEAN ------[mark]-----------> GC_MARKED
- *                    |    ^                                   |
- *  <-[(quick)sweep]---    |                                   |
- *                         --[(quick)sweep && age<=promotion]---
+ *                    |
+ *  <-[(quick)sweep]---
+ *
  */
 
 // A quick sweep is a sweep where `!sweep_full`
@@ -652,18 +726,11 @@ static int mark_reset_age = 0;
 // When a write barrier triggers, the offending marked object is both queued,
 // so as not to trigger the barrier again, and put in the remset.
 
-
-#define PROMOTE_AGE 1
-// this cannot be increased as is without changing :
-// - sweep_page which is specialized for 1bit age
-// - the size of the age storage in jl_gc_pagemeta_t
-
-
 static int64_t scanned_bytes; // young bytes scanned while marking
 static int64_t perm_scanned_bytes; // old bytes scanned while marking
-static int prev_sweep_full = 1;
-
-#define inc_sat(v,s) v = (v) >= s ? s : (v)+1
+int prev_sweep_full = 1;
+int current_sweep_full = 0;
+int under_pressure = 0;
 
 // Full collection heuristics
 static int64_t live_bytes = 0;
@@ -707,9 +774,11 @@ static void gc_sync_cache(jl_ptls_t ptls) JL_NOTSAFEPOINT
 // No other threads can be running marking at the same time
 static void gc_sync_all_caches_nolock(jl_ptls_t ptls)
 {
-    for (int t_i = 0; t_i < jl_n_threads; t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
-        gc_sync_cache_nolock(ptls, &ptls2->gc_cache);
+    assert(gc_n_threads);
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 != NULL)
+            gc_sync_cache_nolock(ptls, &ptls2->gc_cache);
     }
 }
 
@@ -727,21 +796,13 @@ STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr,
     ptls->gc_cache.nbig_obj = nobj + 1;
 }
 
-// `gc_setmark_tag` can be called concurrently on multiple threads.
-// In all cases, the function atomically sets the mark bits and returns
-// the GC bits set as well as if the tag was unchanged by this thread.
-// All concurrent calls on the same object are guaranteed to be setting the
-// bits to the same value.
-// For normal objects, this is the bits with only `GC_MARKED` changed to `1`
-// For buffers, this is the bits of the owner object.
-// For `mark_reset_age`, this is `GC_MARKED` with `GC_OLD` cleared.
-// The return value is `1` if the object was not marked before.
-// Returning `0` can happen if another thread marked it in parallel.
-STATIC_INLINE int gc_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode,
-                                 uintptr_t tag, uint8_t *bits) JL_NOTSAFEPOINT
-{
-    assert(!gc_marked(tag));
+// Atomically set the mark bit for object and return whether it was previously unmarked
+FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT
+{
     assert(gc_marked(mark_mode));
+    uintptr_t tag = o->header;
+    if (gc_marked(tag))
+        return 0;
     if (mark_reset_age) {
         // Reset the object as if it was just allocated
         mark_mode = GC_MARKED;
@@ -753,7 +814,10 @@ STATIC_INLINE int gc_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode,
         tag = tag | mark_mode;
         assert((tag & 0x3) == mark_mode);
     }
-    *bits = mark_mode;
+    // XXX: note that marking not only sets the GC bits but also updates the
+    // page metadata for pool allocated objects.
+    // The second step is **not** idempotent, so we need a compare exchange here
+    // (instead of a pair of load&store) to avoid marking an object twice
     tag = jl_atomic_exchange_relaxed((_Atomic(uintptr_t)*)&o->header, tag);
     verify_val(jl_valueof(o));
     return !gc_marked(tag);
@@ -764,7 +828,7 @@ STATIC_INLINE int gc_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode,
 STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o,
                                   uint8_t mark_mode) JL_NOTSAFEPOINT
 {
-    assert(!page_metadata(o));
+    assert(!gc_alloc_map_is_set((char*)o));
     bigval_t *hdr = bigval_header(o);
     if (mark_mode == GC_OLD_MARKED) {
         ptls->gc_cache.perm_scanned_bytes += hdr->sz & ~3;
@@ -775,9 +839,8 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o,
         // We can't easily tell if the object is old or being promoted
         // from the gc bits but if the `age` is `0` then the object
         // must be already on a young list.
-        if (mark_reset_age && hdr->age) {
+        if (mark_reset_age) {
             // Reset the object as if it was just allocated
-            hdr->age = 0;
             gc_queue_big_marked(ptls, hdr, 1);
         }
     }
@@ -788,13 +851,11 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o,
 // This function should be called exactly once during marking for each pool
 // object being marked to update the page metadata.
 STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
-                                    uint8_t mark_mode,
-                                    jl_gc_pagemeta_t *page) JL_NOTSAFEPOINT
+                                    uint8_t mark_mode, jl_gc_pagemeta_t *page) JL_NOTSAFEPOINT
 {
 #ifdef MEMDEBUG
     gc_setmark_big(ptls, o, mark_mode);
 #else
-    jl_assume(page);
     if (mark_mode == GC_OLD_MARKED) {
         ptls->gc_cache.perm_scanned_bytes += page->osize;
         static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), "");
@@ -804,10 +865,6 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
         ptls->gc_cache.scanned_bytes += page->osize;
         if (mark_reset_age) {
             page->has_young = 1;
-            char *page_begin = gc_page_data(o) + GC_PAGE_OFFSET;
-            int obj_id = (((char*)o) - page_begin) / page->osize;
-            uint8_t *ages = page->ages + obj_id / 8;
-            jl_atomic_fetch_and_relaxed((_Atomic(uint8_t)*)ages, ~(1 << (obj_id % 8)));
         }
     }
     objprofile_count(jl_typeof(jl_valueof(o)),
@@ -819,7 +876,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
 STATIC_INLINE void gc_setmark_pool(jl_ptls_t ptls, jl_taggedvalue_t *o,
                                    uint8_t mark_mode) JL_NOTSAFEPOINT
 {
-    gc_setmark_pool_(ptls, o, mark_mode, page_metadata(o));
+    gc_setmark_pool_(ptls, o, mark_mode, page_metadata((char*)o));
 }
 
 STATIC_INLINE void gc_setmark(jl_ptls_t ptls, jl_taggedvalue_t *o,
@@ -836,19 +893,16 @@ STATIC_INLINE void gc_setmark(jl_ptls_t ptls, jl_taggedvalue_t *o,
 STATIC_INLINE void gc_setmark_buf_(jl_ptls_t ptls, void *o, uint8_t mark_mode, size_t minsz) JL_NOTSAFEPOINT
 {
     jl_taggedvalue_t *buf = jl_astaggedvalue(o);
-    uintptr_t tag = buf->header;
-    if (gc_marked(tag))
-        return;
-    uint8_t bits;
+    uint8_t bits = (gc_old(buf->header) && !mark_reset_age) ? GC_OLD_MARKED : GC_MARKED;;
     // If the object is larger than the max pool size it can't be a pool object.
     // This should be accurate most of the time but there might be corner cases
     // where the size estimate is a little off so we do a pool lookup to make
     // sure.
-    if (__likely(gc_setmark_tag(buf, mark_mode, tag, &bits)) && !gc_verifying) {
+    if (__likely(gc_try_setmark_tag(buf, mark_mode)) && !gc_verifying) {
         if (minsz <= GC_MAX_SZCLASS) {
-            jl_gc_pagemeta_t *page = page_metadata(buf);
-            if (page) {
-                gc_setmark_pool_(ptls, buf, bits, page);
+            jl_gc_pagemeta_t *meta = page_metadata(buf);
+            if (meta != NULL) {
+                gc_setmark_pool_(ptls, buf, bits, meta);
                 return;
             }
         }
@@ -861,40 +915,9 @@ void gc_setmark_buf(jl_ptls_t ptls, void *o, uint8_t mark_mode, size_t minsz) JL
     gc_setmark_buf_(ptls, o, mark_mode, minsz);
 }
 
-void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT
-{
-    jl_taggedvalue_t *o = jl_astaggedvalue(v);
-    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(v);
-    size_t dtsz = jl_datatype_size(dt);
-    if (o->bits.gc == GC_OLD_MARKED)
-        return;
-    o->bits.gc = GC_OLD_MARKED;
-    if (dt == jl_simplevector_type) {
-        size_t l = jl_svec_len(v);
-        dtsz = l * sizeof(void*) + sizeof(jl_svec_t);
-    }
-    else if (dt->name == jl_array_typename) {
-        jl_array_t *a = (jl_array_t*)v;
-        if (!a->flags.pooled)
-            dtsz = GC_MAX_SZCLASS + 1;
-    }
-    else if (dt == jl_module_type) {
-        dtsz = sizeof(jl_module_t);
-    }
-    else if (dt == jl_task_type) {
-        dtsz = sizeof(jl_task_t);
-    }
-    else if (dt == jl_symbol_type) {
-        return;
-    }
-    gc_setmark(ptls, o, GC_OLD_MARKED, dtsz);
-    if (dt->layout->npointers != 0)
-        jl_gc_queue_root(v);
-}
-
-static inline void maybe_collect(jl_ptls_t ptls)
+STATIC_INLINE void maybe_collect(jl_ptls_t ptls)
 {
-    if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) {
+    if (jl_atomic_load_relaxed(&gc_heap_stats.heap_size) >= jl_atomic_load_relaxed(&gc_heap_stats.heap_target) || jl_gc_debug_check_other()) {
         jl_gc_collect(JL_GC_AUTO);
     }
     else {
@@ -910,55 +933,77 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,
     jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*),
                                                   jl_weakref_type);
     wr->value = value;  // NOTE: wb not needed here
-    arraylist_push(&ptls->heap.weak_refs, wr);
+    small_arraylist_push(&ptls->heap.weak_refs, wr);
     return wr;
 }
 
 static void clear_weak_refs(void)
 {
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
-        size_t n, l = ptls2->heap.weak_refs.len;
-        void **lst = ptls2->heap.weak_refs.items;
-        for (n = 0; n < l; n++) {
-            jl_weakref_t *wr = (jl_weakref_t*)lst[n];
-            if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
-                wr->value = (jl_value_t*)jl_nothing;
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL) {
+            size_t n, l = ptls2->heap.weak_refs.len;
+            void **lst = ptls2->heap.weak_refs.items;
+            for (n = 0; n < l; n++) {
+                jl_weakref_t *wr = (jl_weakref_t*)lst[n];
+                if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
+                    wr->value = (jl_value_t*)jl_nothing;
+            }
         }
     }
 }
 
 static void sweep_weak_refs(void)
 {
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
-        size_t n = 0;
-        size_t ndel = 0;
-        size_t l = ptls2->heap.weak_refs.len;
-        void **lst = ptls2->heap.weak_refs.items;
-        if (l == 0)
-            continue;
-        while (1) {
-            jl_weakref_t *wr = (jl_weakref_t*)lst[n];
-            if (gc_marked(jl_astaggedvalue(wr)->bits.gc))
-                n++;
-            else
-                ndel++;
-            if (n >= l - ndel)
-                break;
-            void *tmp = lst[n];
-            lst[n] = lst[n + ndel];
-            lst[n + ndel] = tmp;
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL) {
+            size_t n = 0;
+            size_t ndel = 0;
+            size_t l = ptls2->heap.weak_refs.len;
+            void **lst = ptls2->heap.weak_refs.items;
+            if (l == 0)
+                continue;
+            while (1) {
+                jl_weakref_t *wr = (jl_weakref_t*)lst[n];
+                if (gc_marked(jl_astaggedvalue(wr)->bits.gc))
+                    n++;
+                else
+                    ndel++;
+                if (n >= l - ndel)
+                    break;
+                void *tmp = lst[n];
+                lst[n] = lst[n + ndel];
+                lst[n + ndel] = tmp;
+            }
+            ptls2->heap.weak_refs.len -= ndel;
         }
-        ptls2->heap.weak_refs.len -= ndel;
     }
 }
 
 
+STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
+{
+    uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc) + sz;
+    if (alloc_acc < 16*1024)
+        jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, alloc_acc);
+    else {
+        jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc);
+        jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
+    }
+}
+
+STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
+{
+    jl_atomic_store_relaxed(&ptls->gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_num.free_acc) + sz);
+}
+
 // big value list
 
 // Size includes the tag and the tag is not cleared!!
-static inline jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
+STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
 {
     maybe_collect(ptls);
     size_t offs = offsetof(bigval_t, header);
@@ -977,23 +1022,29 @@ static inline jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
         jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
     jl_atomic_store_relaxed(&ptls->gc_num.bigalloc,
         jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1);
+    jl_batch_accum_heap_size(ptls, allocsz);
 #ifdef MEMDEBUG
     memset(v, 0xee, allocsz);
 #endif
     v->sz = allocsz;
-    v->age = 0;
     gc_big_object_link(v, &ptls->heap.big_objects);
     return jl_valueof(&v->header);
 }
 
-// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
+// Deprecated version, supported for legacy code.
 JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz)
 {
     jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz);
-
     maybe_record_alloc_to_profile(val, sz, jl_gc_unknown_type_tag);
     return val;
 }
+// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_big_alloc_instrumented(jl_ptls_t ptls, size_t sz, jl_value_t *type)
+{
+    jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz);
+    maybe_record_alloc_to_profile(val, sz, (jl_datatype_t*)type);
+    return val;
+}
 
 // This wrapper exists only to prevent `jl_gc_big_alloc_inner` from being inlined into
 // its callers. We provide an external-facing interface for callers, and inline `jl_gc_big_alloc_inner`
@@ -1013,16 +1064,8 @@ static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
         int old_bits = bits;
         if (gc_marked(bits)) {
             pv = &v->next;
-            int age = v->age;
-            if (age >= PROMOTE_AGE || bits == GC_OLD_MARKED) {
-                if (sweep_full || bits == GC_MARKED) {
-                    bits = GC_OLD;
-                }
-            }
-            else {
-                inc_sat(age, PROMOTE_AGE);
-                v->age = age;
-                bits = GC_CLEAN;
+            if (sweep_full || bits == GC_MARKED) {
+                bits = GC_OLD;
             }
             v->bits.gc = bits;
         }
@@ -1032,6 +1075,8 @@ static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
             if (nxt)
                 nxt->prev = pv;
             gc_num.freed += v->sz&~3;
+            jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
+                jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - (v->sz&~3));
 #ifdef MEMDEBUG
             memset(v, 0xbb, v->sz&~3);
 #endif
@@ -1048,11 +1093,15 @@ static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
 static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
 {
     gc_time_big_start();
-    for (int i = 0;i < jl_n_threads;i++)
-        sweep_big_list(sweep_full, &jl_all_tls_states[i]->heap.big_objects);
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            sweep_big_list(sweep_full, &ptls2->heap.big_objects);
+    }
     if (sweep_full) {
         bigval_t **last_next = sweep_big_list(sweep_full, &big_objects_marked);
-        // Move all survivors from big_objects_marked list to big_objects list.
+        // Move all survivors from big_objects_marked list to the big_objects list of this thread.
         if (ptls->heap.big_objects)
             ptls->heap.big_objects->prev = last_next;
         *last_next = ptls->heap.big_objects;
@@ -1064,10 +1113,9 @@ static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
     gc_time_big_end();
 }
 
-// tracking Arrays with malloc'd storage
+// tracking Memorys with malloc'd storage
 
-void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT
-{
+void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){
     // This is **NOT** a GC safe point.
     mallocarray_t *ma;
     if (ptls->heap.mafreelist == NULL) {
@@ -1077,158 +1125,187 @@ void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT
         ma = ptls->heap.mafreelist;
         ptls->heap.mafreelist = ma->next;
     }
-    ma->a = a;
+    ma->a = (jl_value_t*)((uintptr_t)m | !!isaligned);
     ma->next = ptls->heap.mallocarrays;
     ptls->heap.mallocarrays = ma;
 }
 
+
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
 {
     jl_ptls_t ptls = jl_current_task->ptls;
     jl_atomic_store_relaxed(&ptls->gc_num.allocd,
         jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
+    jl_batch_accum_heap_size(ptls, sz);
 }
-
-static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
+// Only safe to update the heap inside the GC
+static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
 {
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls = jl_all_tls_states[i];
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
         if (ptls) {
             dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval);
-            dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed);
             dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc);
             dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc);
             dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc);
             dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc);
-            dest->freecall += jl_atomic_load_relaxed(&ptls->gc_num.freecall);
+            dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
+            if (update_heap) {
+                uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_num.alloc_acc);
+                freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_num.free_acc);
+                jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size));
+                jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
+                jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
+            }
         }
     }
 }
 
 static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
 {
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls = jl_all_tls_states[i];
-        if (ptls) {
-            memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
+        if (ptls != NULL) {
+            // don't reset `pool_live_bytes` here
             jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
+            jl_atomic_store_relaxed(&ptls->gc_num.malloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_num.realloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_num.poolalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_num.bigalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_num.alloc_acc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_num.free_acc, 0);
         }
     }
 }
 
+static int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT
+{
+    jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, inc);
+    return live_bytes += inc;
+}
+
 void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
 {
-    combine_thread_gc_counts(&gc_num);
-    live_bytes += (gc_num.deferred_alloc + gc_num.allocd);
+    combine_thread_gc_counts(&gc_num, 0);
+    inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd);
     gc_num.allocd = 0;
     gc_num.deferred_alloc = 0;
     reset_thread_gc_counts();
 }
 
-size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT
+size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
 {
-    size_t sz = 0;
-    int isbitsunion = jl_array_isbitsunion(a);
-    if (jl_array_ndims(a) == 1)
-        sz = a->elsize * a->maxsize + ((a->elsize == 1 && !isbitsunion) ? 1 : 0);
-    else
-        sz = a->elsize * jl_array_len(a);
-    if (isbitsunion)
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+    size_t sz = layout->size * m->length;
+    if (layout->flags.arrayelem_isunion)
         // account for isbits Union array selector bytes
-        sz += jl_array_len(a);
+        sz += m->length;
     return sz;
 }
 
-static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT
-{
-    if (a->flags.how == 2) {
-        char *d = (char*)a->data - a->offset*a->elsize;
-        if (a->flags.isaligned)
-            jl_free_aligned(d);
-        else
-            free(d);
-        gc_num.freed += jl_array_nbytes(a);
-        gc_num.freecall++;
-    }
-}
 
-static void sweep_malloced_arrays(void) JL_NOTSAFEPOINT
+static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT
 {
-    gc_time_mallocd_array_start();
-    for (int t_i = 0;t_i < jl_n_threads;t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
-        mallocarray_t *ma = ptls2->heap.mallocarrays;
-        mallocarray_t **pma = &ptls2->heap.mallocarrays;
-        while (ma != NULL) {
-            mallocarray_t *nxt = ma->next;
-            int bits = jl_astaggedvalue(ma->a)->bits.gc;
-            if (gc_marked(bits)) {
-                pma = &ma->next;
-            }
-            else {
-                *pma = nxt;
-                assert(ma->a->flags.how == 2);
-                jl_gc_free_array(ma->a);
-                ma->next = ptls2->heap.mafreelist;
-                ptls2->heap.mafreelist = ma;
+    assert(jl_is_genericmemory(v));
+    jl_genericmemory_t *m = (jl_genericmemory_t*)v;
+    assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
+    char *d = (char*)m->ptr;
+    if (isaligned)
+        jl_free_aligned(d);
+    else
+        free(d);
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
+        jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_genericmemory_nbytes(m));
+    gc_num.freed += jl_genericmemory_nbytes(m);
+    gc_num.freecall++;
+    gc_num.freecall++;
+}
+
+static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
+{
+    gc_time_mallocd_memory_start();
+    assert(gc_n_threads);
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 != NULL) {
+            mallocarray_t *ma = ptls2->heap.mallocarrays;
+            mallocarray_t **pma = &ptls2->heap.mallocarrays;
+            while (ma != NULL) {
+                mallocarray_t *nxt = ma->next;
+                jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1);
+                int bits = jl_astaggedvalue(a)->bits.gc;
+                if (gc_marked(bits)) {
+                    pma = &ma->next;
+                }
+                else {
+                    *pma = nxt;
+                    int isaligned = (uintptr_t)ma->a & 1;
+                    jl_gc_free_memory(a, isaligned);
+                    ma->next = ptls2->heap.mafreelist;
+                    ptls2->heap.mafreelist = ma;
+                }
+                gc_time_count_mallocd_memory(bits);
+                ma = nxt;
             }
-            gc_time_count_mallocd_array(bits);
-            ma = nxt;
         }
     }
-    gc_time_mallocd_array_end();
+    gc_time_mallocd_memory_end();
 }
 
 // pool allocation
-static inline jl_taggedvalue_t *reset_page(const jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t *fl) JL_NOTSAFEPOINT
+STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_t *p, jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
 {
     assert(GC_PAGE_OFFSET >= sizeof(void*));
     pg->nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / p->osize;
-    jl_ptls_t ptls2 = jl_all_tls_states[pg->thread_n];
     pg->pool_n = p - ptls2->heap.norm_pools;
-    memset(pg->ages, 0, GC_PAGE_SZ / 8 / p->osize + 1);
     jl_taggedvalue_t *beg = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
-    jl_taggedvalue_t *next = (jl_taggedvalue_t*)pg->data;
-    if (fl == NULL) {
-        next->next = NULL;
-    }
-    else {
-        // Insert free page after first page.
-        // This prevents unnecessary fragmentation from multiple pages
-        // being allocated from at the same time. Instead, objects will
-        // only ever be allocated from the first object in the list.
-        // This is specifically being relied on by the implementation
-        // of jl_gc_internal_obj_base_ptr() so that the function does
-        // not have to traverse the entire list.
-        jl_taggedvalue_t *flpage = (jl_taggedvalue_t *)gc_page_data(fl);
-        next->next = flpage->next;
-        flpage->next = beg;
-        beg = fl;
-    }
     pg->has_young = 0;
     pg->has_marked = 0;
-    pg->fl_begin_offset = -1;
-    pg->fl_end_offset = -1;
+    pg->prev_nold = 0;
+    pg->nold = 0;
+    pg->fl_begin_offset = UINT16_MAX;
+    pg->fl_end_offset = UINT16_MAX;
     return beg;
 }
 
+jl_gc_page_stack_t global_page_pool_lazily_freed;
+jl_gc_page_stack_t global_page_pool_clean;
+jl_gc_page_stack_t global_page_pool_freed;
+pagetable_t alloc_map;
+
 // Add a new page to the pool. Discards any pages in `p->newpages` before.
-static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
+static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
 {
     // Do not pass in `ptls` as argument. This slows down the fast path
     // in pool_alloc significantly
     jl_ptls_t ptls = jl_current_task->ptls;
-    jl_gc_pagemeta_t *pg = jl_gc_alloc_page();
+    jl_gc_pagemeta_t *pg = pop_lf_back(&ptls->page_metadata_buffered);
+    if (pg != NULL) {
+        gc_alloc_map_set(pg->data, GC_PAGE_ALLOCATED);
+    }
+    else {
+        pg = jl_gc_alloc_page();
+    }
     pg->osize = p->osize;
-    pg->ages = (uint8_t*)malloc_s(GC_PAGE_SZ / 8 / p->osize + 1);
     pg->thread_n = ptls->tid;
-    jl_taggedvalue_t *fl = reset_page(p, pg, NULL);
+    set_page_metadata(pg);
+    push_lf_back(&ptls->page_metadata_allocd, pg);
+    jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg);
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, GC_PAGE_SZ);
     p->newpages = fl;
     return fl;
 }
 
 // Size includes the tag and the tag is not cleared!!
-static inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset,
+STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset,
                                           int osize)
 {
     // Use the pool offset instead of the pool address as the argument
@@ -1242,21 +1319,24 @@ static inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset
     maybe_collect(ptls);
     jl_atomic_store_relaxed(&ptls->gc_num.allocd,
         jl_atomic_load_relaxed(&ptls->gc_num.allocd) + osize);
+    jl_atomic_store_relaxed(&ptls->gc_num.pool_live_bytes,
+        jl_atomic_load_relaxed(&ptls->gc_num.pool_live_bytes) + osize);
     jl_atomic_store_relaxed(&ptls->gc_num.poolalloc,
         jl_atomic_load_relaxed(&ptls->gc_num.poolalloc) + 1);
     // first try to use the freelist
     jl_taggedvalue_t *v = p->freelist;
-    if (v) {
+    if (v != NULL) {
         jl_taggedvalue_t *next = v->next;
         p->freelist = next;
         if (__unlikely(gc_page_data(v) != gc_page_data(next))) {
             // we only update pg's fields when the freelist changes page
             // since pg's metadata is likely not in cache
-            jl_gc_pagemeta_t *pg = jl_assume(page_metadata(v));
+            jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(v));
             assert(pg->osize == p->osize);
             pg->nfree = 0;
             pg->has_young = 1;
         }
+        msan_allocated_memory(v, osize);
         return jl_valueof(v);
     }
     // if the freelist is empty we reuse empty but not freed pages
@@ -1265,34 +1345,39 @@ static inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset
     // If there's no pages left or the current page is used up,
     // we need to use the slow path.
     char *cur_page = gc_page_data((char*)v - 1);
-    if (__unlikely(!v || cur_page + GC_PAGE_SZ < (char*)next)) {
-        if (v) {
+    if (__unlikely(v == NULL || cur_page + GC_PAGE_SZ < (char*)next)) {
+        if (v != NULL) {
             // like the freelist case,
             // but only update the page metadata when it is full
-            jl_gc_pagemeta_t *pg = jl_assume(page_metadata((char*)v - 1));
+            jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe((char*)v - 1));
             assert(pg->osize == p->osize);
             pg->nfree = 0;
             pg->has_young = 1;
-            v = *(jl_taggedvalue_t**)cur_page;
         }
-        // Not an else!!
-        if (!v)
-            v = add_page(p);
+        v = gc_add_page(p);
         next = (jl_taggedvalue_t*)((char*)v + osize);
     }
     p->newpages = next;
+    msan_allocated_memory(v, osize);
     return jl_valueof(v);
 }
 
-// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code.
+// Deprecated version, supported for legacy code.
 JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
                                           int osize)
 {
     jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
-
     maybe_record_alloc_to_profile(val, osize, jl_gc_unknown_type_tag);
     return val;
 }
+// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc_instrumented(jl_ptls_t ptls, int pool_offset,
+                                        int osize, jl_value_t* type)
+{
+    jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
+    maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type);
+    return val;
+}
 
 // This wrapper exists only to prevent `jl_gc_pool_alloc_inner` from being inlined into
 // its callers. We provide an external-facing interface for callers, and inline `jl_gc_pool_alloc_inner`
@@ -1313,49 +1398,73 @@ int jl_gc_classify_pools(size_t sz, int *osize)
 
 // sweep phase
 
-int64_t lazy_freed_pages = 0;
+gc_fragmentation_stat_t gc_page_fragmentation_stats[JL_GC_N_POOLS];
+JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS];
+
+STATIC_INLINE void gc_update_page_fragmentation_data(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
+{
+    gc_fragmentation_stat_t *stats = &gc_page_fragmentation_stats[pg->pool_n];
+    jl_atomic_fetch_add(&stats->n_freed_objs, pg->nfree);
+    jl_atomic_fetch_add(&stats->n_pages_allocd, 1);
+}
+
+STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT
+{
+    for (int i = 0; i < JL_GC_N_POOLS; i++) {
+        gc_fragmentation_stat_t *stats = &gc_page_fragmentation_stats[i];
+        double utilization = 1.0;
+        size_t n_freed_objs = jl_atomic_load_relaxed(&stats->n_freed_objs);
+        size_t n_pages_allocd = jl_atomic_load_relaxed(&stats->n_pages_allocd);
+        if (n_pages_allocd != 0) {
+            utilization -= ((double)n_freed_objs * (double)jl_gc_sizeclasses[i]) / (double)n_pages_allocd / (double)GC_PAGE_SZ;
+        }
+        jl_gc_page_utilization_stats[i] = utilization;
+        jl_atomic_store_relaxed(&stats->n_freed_objs, 0);
+        jl_atomic_store_relaxed(&stats->n_pages_allocd, 0);
+    }
+}
+
+int64_t buffered_pages = 0;
 
 // Returns pointer to terminal pointer of list rooted at *pfl.
-static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT
+static void gc_sweep_page(jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_page_stack_t *buffered,
+                          jl_gc_pagemeta_t *pg, int osize) JL_NOTSAFEPOINT
 {
     char *data = pg->data;
-    uint8_t *ages = pg->ages;
     jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
-    char *lim = (char*)v + GC_PAGE_SZ - GC_PAGE_OFFSET - osize;
+    char *lim = data + GC_PAGE_SZ - osize;
+    char *lim_newpages = data + GC_PAGE_SZ;
+    if (gc_page_data((char*)p->newpages - 1) == data) {
+        lim_newpages = (char*)p->newpages;
+    }
     size_t old_nfree = pg->nfree;
     size_t nfree;
 
+    int re_use_page = 1;
+    int keep_as_local_buffer = 0;
     int freedall = 1;
     int pg_skpd = 1;
     if (!pg->has_marked) {
+        re_use_page = 0;
+    #ifdef _P64 // TODO: re-enable on `_P32`?
         // lazy version: (empty) if the whole page was already unused, free it (return it to the pool)
         // eager version: (freedall) free page as soon as possible
         // the eager one uses less memory.
         // FIXME - need to do accounting on a per-thread basis
         // on quick sweeps, keep a few pages empty but allocated for performance
-        if (!sweep_full && lazy_freed_pages <= default_collect_interval / GC_PAGE_SZ) {
-            jl_taggedvalue_t *begin = reset_page(p, pg, p->newpages);
-            p->newpages = begin;
-            begin->next = (jl_taggedvalue_t*)0;
-            lazy_freed_pages++;
-        }
-        else {
-            jl_gc_free_page(data);
+        if (!current_sweep_full && buffered_pages <= default_collect_interval / GC_PAGE_SZ) {
+            buffered_pages++;
+            keep_as_local_buffer = 1;
         }
+    #endif
         nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / osize;
         goto done;
     }
     // For quick sweep, we might be able to skip the page if the page doesn't
     // have any young live cell before marking.
-    if (!sweep_full && !pg->has_young) {
+    if (!current_sweep_full && !pg->has_young) {
         assert(!prev_sweep_full || pg->prev_nold >= pg->nold);
         if (!prev_sweep_full || pg->prev_nold == pg->nold) {
-            // the position of the freelist begin/end in this page
-            // is stored in its metadata
-            if (pg->fl_begin_offset != (uint16_t)-1) {
-                *pfl = page_pfl_beg(pg);
-                pfl = (jl_taggedvalue_t**)page_pfl_end(pg);
-            }
             freedall = 0;
             nfree = pg->nfree;
             goto done;
@@ -1368,43 +1477,28 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
         int has_young = 0;
         int16_t prev_nold = 0;
         int pg_nfree = 0;
+        jl_taggedvalue_t *fl = NULL;
+        jl_taggedvalue_t **pfl = &fl;
         jl_taggedvalue_t **pfl_begin = NULL;
-        uint8_t msk = 1; // mask for the age bit in the current age byte
         while ((char*)v <= lim) {
             int bits = v->bits.gc;
-            if (!gc_marked(bits)) {
+            // if an object is past `lim_newpages` then we can guarantee it's garbage
+            if (!gc_marked(bits) || (char*)v >= lim_newpages) {
                 *pfl = v;
                 pfl = &v->next;
-                pfl_begin = pfl_begin ? pfl_begin : pfl;
+                pfl_begin = (pfl_begin != NULL) ? pfl_begin : pfl;
                 pg_nfree++;
-                *ages &= ~msk;
             }
             else { // marked young or old
-                if (*ages & msk || bits == GC_OLD_MARKED) { // old enough
-                    // `!age && bits == GC_OLD_MARKED` is possible for
-                    // non-first-class objects like `jl_binding_t`
-                    if (sweep_full || bits == GC_MARKED) {
-                        bits = v->bits.gc = GC_OLD; // promote
-                    }
-                    prev_nold++;
-                }
-                else {
-                    assert(bits == GC_MARKED);
-                    bits = v->bits.gc = GC_CLEAN; // unmark
-                    has_young = 1;
+                if (current_sweep_full || bits == GC_MARKED) { // old enough
+                    bits = v->bits.gc = GC_OLD; // promote
                 }
+                prev_nold++;
                 has_marked |= gc_marked(bits);
-                *ages |= msk;
                 freedall = 0;
             }
             v = (jl_taggedvalue_t*)((char*)v + osize);
-            msk <<= 1;
-            if (!msk) {
-                msk = 1;
-                ages++;
-            }
         }
-
         assert(!freedall);
         pg->has_marked = has_marked;
         pg->has_young = has_young;
@@ -1413,12 +1507,12 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
             pg->fl_end_offset = (char*)pfl - data;
         }
         else {
-            pg->fl_begin_offset = -1;
-            pg->fl_end_offset = -1;
+            pg->fl_begin_offset = UINT16_MAX;
+            pg->fl_end_offset = UINT16_MAX;
         }
 
         pg->nfree = pg_nfree;
-        if (sweep_full) {
+        if (current_sweep_full) {
             pg->nold = 0;
             pg->prev_nold = prev_nold;
         }
@@ -1426,109 +1520,48 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
     nfree = pg->nfree;
 
 done:
+    if (re_use_page) {
+        push_lf_back(allocd, pg);
+    }
+    else {
+        gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED);
+        jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ);
+        if (keep_as_local_buffer) {
+            push_lf_back(buffered, pg);
+        }
+        else {
+            push_lf_back(&global_page_pool_lazily_freed, pg);
+        }
+    }
+    gc_update_page_fragmentation_data(pg);
     gc_time_count_page(freedall, pg_skpd);
-    gc_num.freed += (nfree - old_nfree) * osize;
-    return pfl;
+    jl_ptls_t ptls = gc_all_tls_states[pg->thread_n];
+    jl_atomic_fetch_add(&ptls->gc_num.pool_live_bytes, GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize);
+    jl_atomic_fetch_add((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize);
 }
 
 // the actual sweeping over all allocated pages in a memory pool
-static inline void sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t *pg, int sweep_full) JL_NOTSAFEPOINT
+STATIC_INLINE void gc_sweep_pool_page(jl_gc_page_stack_t *allocd, jl_gc_page_stack_t *lazily_freed,
+                                      jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
 {
     int p_n = pg->pool_n;
     int t_n = pg->thread_n;
-    jl_ptls_t ptls2 = jl_all_tls_states[t_n];
+    jl_ptls_t ptls2 = gc_all_tls_states[t_n];
     jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
     int osize = pg->osize;
-    pfl[t_n * JL_GC_N_POOLS + p_n] = sweep_page(p, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize);
-}
-
-// sweep over a pagetable0 for all allocated pages
-static inline int sweep_pool_pagetable0(jl_taggedvalue_t ***pfl, pagetable0_t *pagetable0, int sweep_full) JL_NOTSAFEPOINT
-{
-    unsigned ub = 0;
-    unsigned alloc = 0;
-    for (unsigned pg_i = 0; pg_i <= pagetable0->ub; pg_i++) {
-        uint32_t line = pagetable0->allocmap[pg_i];
-        unsigned j;
-        if (!line)
-            continue;
-        ub = pg_i;
-        alloc = 1;
-        for (j = 0; line; j++, line >>= 1) {
-            unsigned next = ffs_u32(line);
-            j += next;
-            line >>= next;
-            jl_gc_pagemeta_t *pg = pagetable0->meta[pg_i * 32 + j];
-            sweep_pool_page(pfl, pg, sweep_full);
-        }
-    }
-    pagetable0->ub = ub;
-    return alloc;
-}
-
-// sweep over pagetable1 for all pagetable0 that may contain allocated pages
-static inline int sweep_pool_pagetable1(jl_taggedvalue_t ***pfl, pagetable1_t *pagetable1, int sweep_full) JL_NOTSAFEPOINT
-{
-    unsigned ub = 0;
-    unsigned alloc = 0;
-    for (unsigned pg_i = 0; pg_i <= pagetable1->ub; pg_i++) {
-        uint32_t line = pagetable1->allocmap0[pg_i];
-        unsigned j;
-        for (j = 0; line; j++, line >>= 1) {
-            unsigned next = ffs_u32(line);
-            j += next;
-            line >>= next;
-            pagetable0_t *pagetable0 = pagetable1->meta0[pg_i * 32 + j];
-            if (pagetable0 && !sweep_pool_pagetable0(pfl, pagetable0, sweep_full))
-                pagetable1->allocmap0[pg_i] &= ~(1 << j); // no allocations found, remember that for next time
-        }
-        if (pagetable1->allocmap0[pg_i]) {
-            ub = pg_i;
-            alloc = 1;
-        }
-    }
-    pagetable1->ub = ub;
-    return alloc;
-}
-
-// sweep over all memory for all pagetable1 that may contain allocated pages
-static void sweep_pool_pagetable(jl_taggedvalue_t ***pfl, int sweep_full) JL_NOTSAFEPOINT
-{
-    if (REGION2_PG_COUNT == 1) { // compile-time optimization
-        pagetable1_t *pagetable1 = memory_map.meta1[0];
-        if (pagetable1)
-            sweep_pool_pagetable1(pfl, pagetable1, sweep_full);
-        return;
-    }
-    unsigned ub = 0;
-    for (unsigned pg_i = 0; pg_i <= memory_map.ub; pg_i++) {
-        uint32_t line = memory_map.allocmap1[pg_i];
-        unsigned j;
-        for (j = 0; line; j++, line >>= 1) {
-            unsigned next = ffs_u32(line);
-            j += next;
-            line >>= next;
-            pagetable1_t *pagetable1 = memory_map.meta1[pg_i * 32 + j];
-            if (pagetable1 && !sweep_pool_pagetable1(pfl, pagetable1, sweep_full))
-                memory_map.allocmap1[pg_i] &= ~(1 << j); // no allocations found, remember that for next time
-        }
-        if (memory_map.allocmap1[pg_i]) {
-            ub = pg_i;
-        }
-    }
-    memory_map.ub = ub;
+    gc_sweep_page(p, allocd, lazily_freed, pg, osize);
 }
 
 // sweep over all memory that is being used and not in a pool
 static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
 {
-    sweep_malloced_arrays();
+    sweep_malloced_memory();
     sweep_big(ptls, sweep_full);
 }
 
 static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT
 {
-    assert(pg->fl_begin_offset != (uint16_t)-1);
+    assert(pg->fl_begin_offset != UINT16_MAX);
     char *cur_pg = gc_page_data(last);
     // Fast path for page that has no allocation
     jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
@@ -1542,15 +1575,74 @@ static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_
     pg->nfree = nfree;
 }
 
+void gc_sweep_wake_all(void)
+{
+    uv_mutex_lock(&gc_threads_lock);
+    for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        jl_atomic_fetch_add(&ptls2->gc_sweeps_requested, 1);
+    }
+    uv_cond_broadcast(&gc_threads_cond);
+    uv_mutex_unlock(&gc_threads_lock);
+}
+
+void gc_sweep_pool_parallel(void)
+{
+    jl_atomic_fetch_add(&gc_n_threads_sweeping, 1);
+    jl_gc_page_stack_t *allocd_scratch = jl_atomic_load(&gc_allocd_scratch);
+    if (allocd_scratch != NULL) {
+        while (1) {
+            int found_pg = 0;
+            for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+                jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+                if (ptls2 == NULL) {
+                    continue;
+                }
+                jl_gc_page_stack_t *allocd = &allocd_scratch[t_i];
+                jl_gc_pagemeta_t *pg = pop_lf_back(&ptls2->page_metadata_allocd);
+                if (pg == NULL) {
+                    continue;
+                }
+                gc_sweep_pool_page(allocd, &ptls2->page_metadata_buffered, pg);
+                found_pg = 1;
+            }
+            if (!found_pg) {
+                break;
+            }
+        }
+    }
+    jl_atomic_fetch_add(&gc_n_threads_sweeping, -1);
+}
+
+void gc_sweep_wait_for_all(void)
+{
+    jl_atomic_store(&gc_allocd_scratch, NULL);
+    while (jl_atomic_load_relaxed(&gc_n_threads_sweeping) != 0) {
+        jl_cpu_pause();
+    }
+}
+
+void gc_free_pages(void)
+{
+    while (1) {
+        jl_gc_pagemeta_t *pg = pop_lf_back(&global_page_pool_lazily_freed);
+        if (pg == NULL) {
+            break;
+        }
+        jl_gc_free_page(pg);
+        push_lf_back(&global_page_pool_freed, pg);
+    }
+}
+
 // setup the data-structures for a sweep over all memory pools
-static void gc_sweep_pool(int sweep_full)
+static void gc_sweep_pool(void)
 {
     gc_time_pool_start();
-    lazy_freed_pages = 0;
+    buffered_pages = 0;
 
-    // For the benfit of the analyzer, which doesn't know that jl_n_threads
+    // For the benefit of the analyzer, which doesn't know that gc_n_threads
     // doesn't change over the course of this function
-    size_t n_threads = jl_n_threads;
+    size_t n_threads = gc_n_threads;
 
     // allocate enough space to hold the end of the free list chain
     // for every thread and pool size
@@ -1559,12 +1651,19 @@ static void gc_sweep_pool(int sweep_full)
     // update metadata of pages that were pointed to by freelist or newpages from a pool
     // i.e. pages being the current allocation target
     for (int t_i = 0; t_i < n_threads; t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL) {
+            for (int i = 0; i < JL_GC_N_POOLS; i++) {
+                pfl[t_i * JL_GC_N_POOLS + i] = NULL;
+            }
+            continue;
+        }
+        jl_atomic_store_relaxed(&ptls2->gc_num.pool_live_bytes, 0);
         for (int i = 0; i < JL_GC_N_POOLS; i++) {
             jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
             jl_taggedvalue_t *last = p->freelist;
-            if (last) {
-                jl_gc_pagemeta_t *pg = jl_assume(page_metadata(last));
+            if (last != NULL) {
+                jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(last));
                 gc_pool_sync_nfree(pg, last);
                 pg->has_young = 1;
             }
@@ -1572,28 +1671,84 @@ static void gc_sweep_pool(int sweep_full)
             pfl[t_i * JL_GC_N_POOLS + i] = &p->freelist;
 
             last = p->newpages;
-            if (last) {
+            if (last != NULL) {
                 char *last_p = (char*)last;
-                jl_gc_pagemeta_t *pg = jl_assume(page_metadata(last_p - 1));
+                jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(last_p - 1));
                 assert(last_p - gc_page_data(last_p - 1) >= GC_PAGE_OFFSET);
                 pg->nfree = (GC_PAGE_SZ - (last_p - gc_page_data(last_p - 1))) / p->osize;
                 pg->has_young = 1;
             }
-            p->newpages = NULL;
+        }
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_buffered.bottom);
+        while (pg != NULL) {
+            jl_gc_pagemeta_t *pg2 = pg->next;
+            buffered_pages++;
+            pg = pg2;
         }
     }
 
     // the actual sweeping
-    sweep_pool_pagetable(pfl, sweep_full);
+    jl_gc_page_stack_t *tmp = (jl_gc_page_stack_t *)alloca(n_threads * sizeof(jl_gc_page_stack_t));
+    memset(tmp, 0, n_threads * sizeof(jl_gc_page_stack_t));
+    jl_atomic_store(&gc_allocd_scratch, tmp);
+    gc_sweep_wake_all();
+    gc_sweep_pool_parallel();
+    gc_sweep_wait_for_all();
+
+    for (int t_i = 0; t_i < n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 != NULL) {
+            ptls2->page_metadata_allocd = tmp[t_i];
+            for (int i = 0; i < JL_GC_N_POOLS; i++) {
+                jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
+                p->newpages = NULL;
+            }
+        }
+    }
+
+    // merge free lists
+    for (int t_i = 0; t_i < n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL) {
+            continue;
+        }
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
+        while (pg != NULL) {
+            jl_gc_pagemeta_t *pg2 = pg->next;
+            if (pg->fl_begin_offset != UINT16_MAX) {
+                char *cur_pg = pg->data;
+                jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
+                jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset);
+                *pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg;
+                pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next;
+            }
+            pg = pg2;
+        }
+    }
 
     // null out terminal pointers of free lists
     for (int t_i = 0; t_i < n_threads; t_i++) {
-        for (int i = 0; i < JL_GC_N_POOLS; i++) {
-            *pfl[t_i * JL_GC_N_POOLS + i] = NULL;
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 != NULL) {
+            for (int i = 0; i < JL_GC_N_POOLS; i++) {
+                *pfl[t_i * JL_GC_N_POOLS + i] = NULL;
+            }
         }
     }
 
-    gc_time_pool_end(sweep_full);
+#ifdef _P64 // only enable concurrent sweeping on 64bit
+    // wake thread up to sweep concurrently
+    if (jl_n_sweepthreads > 0) {
+        uv_sem_post(&gc_sweep_assists_needed);
+    }
+    else {
+        gc_free_pages();
+    }
+#else
+    gc_free_pages();
+#endif
+    gc_dump_page_utilization_data();
+    gc_time_pool_end(current_sweep_full);
 }
 
 static void gc_sweep_perm_alloc(void)
@@ -1609,20 +1764,21 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
 {
     jl_ptls_t ptls = jl_current_task->ptls;
     jl_taggedvalue_t *o = jl_astaggedvalue(ptr);
-    // The modification of the `gc_bits` is not atomic but it
-    // should be safe here since GC is not allowed to run here and we only
-    // write GC_OLD to the GC bits outside GC. This could cause
-    // duplicated objects in the remset but that shouldn't be a problem.
-    o->bits.gc = GC_MARKED;
-    arraylist_push(ptls->heap.remset, (jl_value_t*)ptr);
-    ptls->heap.remset_nptr++; // conservative
+    // The modification of the `gc_bits` needs to be atomic.
+    // We need to ensure that objects are in the remset at
+    // most once, since the mark phase may update page metadata,
+    // which is not idempotent. See comments in https://github.com/JuliaLang/julia/issues/50419
+    uintptr_t header = jl_atomic_load_relaxed((_Atomic(uintptr_t) *)&o->header);
+    header &= ~GC_OLD; // clear the age bit
+    header = jl_atomic_exchange_relaxed((_Atomic(uintptr_t) *)&o->header, header);
+    if (header & GC_OLD) { // write barrier has not been triggered in this object yet
+        arraylist_push(ptls->heap.remset, (jl_value_t*)ptr);
+        ptls->heap.remset_nptr++; // conservative
+    }
 }
 
-void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
+void jl_gc_queue_multiroot(const jl_value_t *parent, const void *ptr, jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
-    // first check if this is really necessary
-    // TODO: should we store this info in one of the extra gc bits?
-    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
     const jl_datatype_layout_t *ly = dt->layout;
     uint32_t npointers = ly->npointers;
     //if (npointers == 0) // this was checked by the caller
@@ -1638,14 +1794,14 @@ void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_N
     const uint32_t *ptrs32 = (const uint32_t*)jl_dt_layout_ptrs(ly);
     for (size_t i = 1; i < npointers; i++) {
         uint32_t fld;
-        if (ly->fielddesc_type == 0) {
+        if (ly->flags.fielddesc_type == 0) {
             fld = ptrs8[i];
         }
-        else if (ly->fielddesc_type == 1) {
+        else if (ly->flags.fielddesc_type == 1) {
             fld = ptrs16[i];
         }
         else {
-            assert(ly->fielddesc_type == 2);
+            assert(ly->flags.fielddesc_type == 2);
             fld = ptrs32[i];
         }
         jl_value_t *ptrf = ((jl_value_t**)ptr)[fld];
@@ -1657,31 +1813,15 @@ void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_N
     }
 }
 
-JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    jl_taggedvalue_t *buf = jl_astaggedvalue(bnd);
-    buf->bits.gc = GC_MARKED;
-    arraylist_push(&ptls->heap.rem_bindings, bnd);
-}
-
 
 #ifdef JL_DEBUG_BUILD
 static void *volatile gc_findval; // for usage from gdb, for finding the gc-root for a value
 #endif
 
-static void *sysimg_base;
-static void *sysimg_end;
-void jl_gc_set_permalloc_region(void *start, void *end)
-{
-    sysimg_base = start;
-    sysimg_end = end;
-}
-
 
 // Handle the case where the stack is only partially copied.
 STATIC_INLINE uintptr_t gc_get_stack_addr(void *_addr, uintptr_t offset,
-                                          uintptr_t lb, uintptr_t ub)
+                                          uintptr_t lb, uintptr_t ub) JL_NOTSAFEPOINT
 {
     uintptr_t addr = (uintptr_t)_addr;
     if (addr >= lb && addr < ub)
@@ -1690,1184 +1830,1332 @@ STATIC_INLINE uintptr_t gc_get_stack_addr(void *_addr, uintptr_t offset,
 }
 
 STATIC_INLINE uintptr_t gc_read_stack(void *_addr, uintptr_t offset,
-                                      uintptr_t lb, uintptr_t ub)
+                                      uintptr_t lb, uintptr_t ub) JL_NOTSAFEPOINT
 {
     uintptr_t real_addr = gc_get_stack_addr(_addr, offset, lb, ub);
     return *(uintptr_t*)real_addr;
 }
 
-JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t *vt,
-                                                  jl_gc_mark_sp_t sp)
+STATIC_INLINE void gc_assert_parent_validity(jl_value_t *parent, jl_value_t *child) JL_NOTSAFEPOINT
 {
-    jl_safe_printf("GC error (probable corruption) :\n");
-    jl_gc_debug_print_status();
-    jl_(vt);
-    jl_gc_debug_critical_error();
-    gc_mark_loop_unwind(ptls, sp, 0);
-    abort();
+#ifdef GC_ASSERT_PARENT_VALIDITY
+    jl_taggedvalue_t *child_astagged = jl_astaggedvalue(child);
+    jl_taggedvalue_t *child_vtag = (jl_taggedvalue_t *)(child_astagged->header & ~(uintptr_t)0xf);
+    uintptr_t child_vt = (uintptr_t)child_vtag;
+    if (child_vt == (jl_datatype_tag << 4) ||
+        child_vt == (jl_unionall_tag << 4) ||
+        child_vt == (jl_uniontype_tag << 4) ||
+        child_vt == (jl_tvar_tag << 4) ||
+        child_vt == (jl_vararg_tag << 4)) {
+        // Skip, since these wouldn't hit the object assert anyway
+        return;
+    }
+    else if (child_vt < jl_max_tags << 4) {
+        // Skip, since these wouldn't hit the object assert anyway
+        return;
+    }
+    if (__unlikely(!jl_is_datatype((jl_datatype_t *)child_vt) || ((jl_datatype_t *)child_vt)->smalltag)) {
+        jl_safe_printf("GC error (probable corruption)\n");
+        jl_gc_debug_print_status();
+        jl_safe_printf("Parent %p\n", (void *)parent);
+        jl_safe_printf("of type:\n");
+        jl_(jl_typeof(parent));
+        jl_safe_printf("While marking child at %p\n", (void *)child);
+        jl_safe_printf("of type:\n");
+        jl_(child_vtag);
+        jl_gc_debug_critical_error();
+        abort();
+    }
+#endif
 }
 
-// This stores the label address in the mark loop function.
-// We can't directly store that to a global array so we need some hack to get that.
-// See the call to `gc_mark_loop` in init with a `NULL` `ptls`.
-void *gc_mark_label_addrs[_GC_MARK_L_MAX];
-
-// Double the local mark stack (both pc and data)
-static void NOINLINE gc_mark_stack_resize(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) JL_NOTSAFEPOINT
+// Check if `nptr` is tagged for `old + refyoung`,
+// Push the object to the remset and update the `nptr` counter if necessary.
+STATIC_INLINE void gc_mark_push_remset(jl_ptls_t ptls, jl_value_t *obj,
+                                       uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    jl_gc_mark_data_t *old_data = gc_cache->data_stack;
-    void **pc_stack = sp->pc_start;
-    size_t stack_size = (char*)sp->pc_end - (char*)pc_stack;
-    gc_cache->data_stack = (jl_gc_mark_data_t *)realloc_s(old_data, stack_size * 2 * sizeof(jl_gc_mark_data_t));
-    sp->data = (jl_gc_mark_data_t *)(((char*)sp->data) + (((char*)gc_cache->data_stack) - ((char*)old_data)));
-
-    sp->pc_start = gc_cache->pc_stack = (void**)realloc_s(pc_stack, stack_size * 2 * sizeof(void*));
-    gc_cache->pc_stack_end = sp->pc_end = sp->pc_start + stack_size * 2;
-    sp->pc = sp->pc_start + (sp->pc - pc_stack);
+    if (__unlikely((nptr & 0x3) == 0x3)) {
+        ptls->heap.remset_nptr += nptr >> 2;
+        arraylist_t *remset = ptls->heap.remset;
+        size_t len = remset->len;
+        if (__unlikely(len >= remset->max)) {
+            arraylist_push(remset, obj);
+        }
+        else {
+            remset->len = len + 1;
+            remset->items[len] = obj;
+        }
+    }
 }
 
-// Push a work item to the stack. The type of the work item is marked with `pc`.
-// The data needed is in `data` and is of size `data_size`.
-// If there isn't enough space on the stack, the stack will be resized with the stack
-// lock held. The caller should invalidate any local cache of the stack addresses that's not
-// in `gc_cache` or `sp`
-// The `sp` will be updated on return if `inc` is true.
-STATIC_INLINE void gc_mark_stack_push(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp,
-                                      void *pc, void *data, size_t data_size, int inc) JL_NOTSAFEPOINT
+// Push a work item to the queue
+STATIC_INLINE void gc_ptr_queue_push(jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT
 {
-    assert(data_size <= sizeof(jl_gc_mark_data_t));
-    if (__unlikely(sp->pc == sp->pc_end))
-        gc_mark_stack_resize(gc_cache, sp);
-    *sp->pc = pc;
-    memcpy(sp->data, data, data_size);
-    if (inc) {
-        sp->data = (jl_gc_mark_data_t *)(((char*)sp->data) + data_size);
-        sp->pc++;
-    }
+#ifdef JL_DEBUG_BUILD
+    if (obj == gc_findval)
+        jl_raise_debugger();
+#endif
+    ws_array_t *old_a = ws_queue_push(&mq->ptr_queue, &obj, sizeof(jl_value_t*));
+    // Put `old_a` in `reclaim_set` to be freed after the mark phase
+    if (__unlikely(old_a != NULL))
+        arraylist_push(&mq->reclaim_set, old_a);
 }
 
-// Check if the reference is non-NULL and atomically set the mark bit.
-// Update `*nptr`, which is the `nptr` field of the parent item, if the object is young.
-// Return the tag (with GC bits cleared) and the GC bits in `*ptag` and `*pbits`.
-// Return whether the object needs to be scanned / have metadata updated.
-STATIC_INLINE int gc_try_setmark(jl_value_t *obj, uintptr_t *nptr,
-                                 uintptr_t *ptag, uint8_t *pbits) JL_NOTSAFEPOINT
+// Pop from the mark queue
+STATIC_INLINE jl_value_t *gc_ptr_queue_pop(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
 {
-    if (!obj)
-        return 0;
-    jl_taggedvalue_t *o = jl_astaggedvalue(obj);
-    uintptr_t tag = o->header;
-    if (!gc_marked(tag)) {
-        uint8_t bits;
-        int res = gc_setmark_tag(o, GC_MARKED, tag, &bits);
-        if (!gc_old(bits))
-            *nptr = *nptr | 1;
-        *ptag = tag & ~(uintptr_t)0xf;
-        *pbits = bits;
-        return __likely(res);
-    }
-    else if (!gc_old(tag)) {
-        *nptr = *nptr | 1;
-    }
-    return 0;
+    jl_value_t *v = NULL;
+    ws_queue_pop(&mq->ptr_queue, &v, sizeof(jl_value_t*));
+    return v;
 }
 
-// Queue a finalizer list to be scanned in the mark loop. Start marking from index `start`.
-void gc_mark_queue_finlist(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp,
-                           arraylist_t *list, size_t start)
+// Steal from `mq2`
+STATIC_INLINE jl_value_t *gc_ptr_queue_steal_from(jl_gc_markqueue_t *mq2) JL_NOTSAFEPOINT
 {
-    size_t len = list->len;
-    if (len <= start)
-        return;
-    jl_value_t **items = (jl_value_t**)list->items;
-    gc_mark_finlist_t markdata = {items + start, items + len};
-    gc_mark_stack_push(gc_cache, sp, gc_mark_label_addrs[GC_MARK_L_finlist],
-                       &markdata, sizeof(markdata), 1);
+    jl_value_t *v = NULL;
+    ws_queue_steal_from(&mq2->ptr_queue, &v, sizeof(jl_value_t*));
+    return v;
 }
 
-// Queue a object to be scanned. The object should already be marked and the GC metadata
-// should already be updated for it. Only scanning of the object should be performed.
-STATIC_INLINE void gc_mark_queue_scan_obj(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp,
-                                          jl_value_t *obj)
+// Push chunk `*c` into chunk queue
+STATIC_INLINE void gc_chunkqueue_push(jl_gc_markqueue_t *mq, jl_gc_chunk_t *c) JL_NOTSAFEPOINT
 {
-    jl_taggedvalue_t *o = jl_astaggedvalue(obj);
-    uintptr_t tag = o->header;
-    uint8_t bits = tag & 0xf;
-    tag = tag & ~(uintptr_t)0xf;
-    gc_mark_marked_obj_t data = {obj, tag, bits};
-    gc_mark_stack_push(gc_cache, sp, gc_mark_label_addrs[GC_MARK_L_scan_only],
-                       &data, sizeof(data), 1);
+    ws_array_t *old_a = ws_queue_push(&mq->chunk_queue, c, sizeof(jl_gc_chunk_t));
+    // Put `old_a` in `reclaim_set` to be freed after the mark phase
+    if (__unlikely(old_a != NULL))
+        arraylist_push(&mq->reclaim_set, old_a);
 }
 
-// Mark and queue a object to be scanned.
-// The object will be marked atomically which can also happen concurrently.
-// It will be queued if the object wasn't marked already (or concurrently by another thread)
-// Returns whether the object is young.
-STATIC_INLINE int gc_mark_queue_obj(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp, void *_obj) JL_NOTSAFEPOINT
+// Pop chunk from chunk queue
+STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_pop(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
 {
-    jl_value_t *obj = (jl_value_t*)jl_assume(_obj);
-    uintptr_t nptr = 0;
-    uintptr_t tag = 0;
-    uint8_t bits = 0;
-    if (!gc_try_setmark(obj, &nptr, &tag, &bits))
-        return (int)nptr;
-    gc_mark_marked_obj_t data = {obj, tag, bits};
-    gc_mark_stack_push(gc_cache, sp, gc_mark_label_addrs[GC_MARK_L_marked_obj],
-                       &data, sizeof(data), 1);
-    return (int)nptr;
+    jl_gc_chunk_t c = {.cid = GC_empty_chunk};
+    ws_queue_pop(&mq->chunk_queue, &c, sizeof(jl_gc_chunk_t));
+    return c;
 }
 
-int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp, jl_value_t *obj)
+// Dump mark queue on critical error
+JL_NORETURN NOINLINE void gc_dump_queue_and_abort(jl_ptls_t ptls, jl_datatype_t *vt) JL_NOTSAFEPOINT
 {
-    return gc_mark_queue_obj(gc_cache, sp, obj);
+    jl_safe_printf("GC error (probable corruption)\n");
+    jl_gc_debug_print_status();
+    jl_(vt);
+    jl_gc_debug_critical_error();
+    if (jl_n_gcthreads == 0) {
+        jl_safe_printf("\n");
+        jl_value_t *new_obj;
+        jl_gc_markqueue_t *mq = &ptls->mark_queue;
+        jl_safe_printf("thread %d ptr queue:\n", ptls->tid);
+        jl_safe_printf("~~~~~~~~~~ ptr queue top ~~~~~~~~~~\n");
+        while ((new_obj = gc_ptr_queue_steal_from(mq)) != NULL) {
+            jl_(new_obj);
+            jl_safe_printf("==========\n");
+        }
+        jl_safe_printf("~~~~~~~~~~ ptr queue bottom ~~~~~~~~~~\n");
+    }
+    abort();
 }
 
-JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
+// Steal chunk from `mq2`
+STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_steal_from(jl_gc_markqueue_t *mq2) JL_NOTSAFEPOINT
 {
-    return gc_mark_queue_obj(&ptls->gc_cache, &ptls->gc_mark_sp, obj);
+    jl_gc_chunk_t c = {.cid = GC_empty_chunk};
+    ws_queue_steal_from(&mq2->chunk_queue, &c, sizeof(jl_gc_chunk_t));
+    return c;
 }
 
-JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
-                                            jl_value_t **objs, size_t nobjs)
+// Enqueue an unmarked obj. last bit of `nptr` is set if `_obj` is young
+STATIC_INLINE void gc_try_claim_and_push(jl_gc_markqueue_t *mq, void *_obj,
+                           uintptr_t *nptr) JL_NOTSAFEPOINT
 {
-    gc_mark_objarray_t data = { parent, objs, objs + nobjs, 1,
-                                jl_astaggedvalue(parent)->bits.gc & 2 };
-    gc_mark_stack_push(&ptls->gc_cache, &ptls->gc_mark_sp,
-                       gc_mark_label_addrs[GC_MARK_L_objarray],
-                       &data, sizeof(data), 1);
+    if (_obj == NULL)
+        return;
+    jl_value_t *obj = (jl_value_t *)jl_assume(_obj);
+    jl_taggedvalue_t *o = jl_astaggedvalue(obj);
+    if (!gc_old(o->header) && nptr)
+        *nptr |= 1;
+    if (gc_try_setmark_tag(o, GC_MARKED))
+        gc_ptr_queue_push(mq, obj);
 }
 
-
-// Check if `nptr` is tagged for `old + refyoung`,
-// Push the object to the remset and update the `nptr` counter if necessary.
-STATIC_INLINE void gc_mark_push_remset(jl_ptls_t ptls, jl_value_t *obj, uintptr_t nptr) JL_NOTSAFEPOINT
+// Mark object with 8bit field descriptors
+STATIC_INLINE jl_value_t *gc_mark_obj8(jl_ptls_t ptls, char *obj8_parent, uint8_t *obj8_begin,
+                         uint8_t *obj8_end, uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    if (__unlikely((nptr & 0x3) == 0x3)) {
-        ptls->heap.remset_nptr += nptr >> 2;
-        arraylist_t *remset = ptls->heap.remset;
-        size_t len = remset->len;
-        if (__unlikely(len >= remset->max)) {
-            arraylist_push(remset, obj);
-        }
-        else {
-            remset->len = len + 1;
-            remset->items[len] = obj;
+    (void)jl_assume(obj8_begin < obj8_end);
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_value_t **slot = NULL;
+    jl_value_t *new_obj = NULL;
+    for (; obj8_begin < obj8_end; obj8_begin++) {
+        slot = &((jl_value_t**)obj8_parent)[*obj8_begin];
+        new_obj = *slot;
+        if (new_obj != NULL) {
+            verify_parent2("object", obj8_parent, slot, "field(%d)",
+                            gc_slot_to_fieldidx(obj8_parent, slot, (jl_datatype_t*)jl_typeof(obj8_parent)));
+            gc_assert_parent_validity((jl_value_t *)obj8_parent, new_obj);
+            if (obj8_begin + 1 != obj8_end) {
+                gc_try_claim_and_push(mq, new_obj, &nptr);
+            }
+            else {
+                // Unroll marking of last item to avoid pushing
+                // and popping it right away
+                jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
+                nptr |= !gc_old(o->header);
+                if (!gc_try_setmark_tag(o, GC_MARKED)) new_obj = NULL;
+            }
+            gc_heap_snapshot_record_object_edge((jl_value_t*)obj8_parent, slot);
         }
     }
+    gc_mark_push_remset(ptls, (jl_value_t *)obj8_parent, nptr);
+    return new_obj;
 }
 
-// Scan a dense array of object references, see `gc_mark_objarray_t`
-STATIC_INLINE int gc_mark_scan_objarray(jl_ptls_t ptls, jl_gc_mark_sp_t *sp,
-                                        gc_mark_objarray_t *objary,
-                                        jl_value_t **begin, jl_value_t **end,
-                                        jl_value_t **pnew_obj, uintptr_t *ptag, uint8_t *pbits)
+// Mark object with 16bit field descriptors
+STATIC_INLINE jl_value_t *gc_mark_obj16(jl_ptls_t ptls, char *obj16_parent, uint16_t *obj16_begin,
+                          uint16_t *obj16_end, uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    (void)jl_assume(objary == (gc_mark_objarray_t*)sp->data);
-    for (; begin < end; begin += objary->step) {
-        *pnew_obj = *begin;
-        if (*pnew_obj)
-            verify_parent2("obj array", objary->parent, begin, "elem(%d)",
-                           gc_slot_to_arrayidx(objary->parent, begin));
-        if (!gc_try_setmark(*pnew_obj, &objary->nptr, ptag, pbits))
-            continue;
-        begin += objary->step;
-        // Found an object to mark
-        if (begin < end) {
-            // Haven't done with this one yet. Update the content and push it back
-            objary->begin = begin;
-            gc_repush_markdata(sp, gc_mark_objarray_t);
-        }
-        else {
-            // Finished scanning this one, finish up by checking the GC invariance
-            // and let the next item replacing the current one directly.
-            gc_mark_push_remset(ptls, objary->parent, objary->nptr);
-        }
-        return 1;
-    }
-    gc_mark_push_remset(ptls, objary->parent, objary->nptr);
-    return 0;
-}
-
-// Scan a sparse array of object references, see `gc_mark_objarray_t`
-STATIC_INLINE int gc_mark_scan_array8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp,
-                                      gc_mark_array8_t *ary8,
-                                      jl_value_t **begin, jl_value_t **end,
-                                      uint8_t *elem_begin, uint8_t *elem_end,
-                                      jl_value_t **pnew_obj, uintptr_t *ptag, uint8_t *pbits)
-{
-    (void)jl_assume(ary8 == (gc_mark_array8_t*)sp->data);
-    size_t elsize = ((jl_array_t*)ary8->elem.parent)->elsize / sizeof(jl_value_t*);
-    for (; begin < end; begin += elsize) {
-        for (; elem_begin < elem_end; elem_begin++) {
-            jl_value_t **slot = &begin[*elem_begin];
-            *pnew_obj = *slot;
-            if (*pnew_obj)
-                verify_parent2("array", ary8->elem.parent, slot, "elem(%d)",
-                               gc_slot_to_arrayidx(ary8->elem.parent, begin));
-            if (!gc_try_setmark(*pnew_obj, &ary8->elem.nptr, ptag, pbits))
-                continue;
-            elem_begin++;
-            // Found an object to mark
-            if (elem_begin < elem_end) {
-                // Haven't done with this one yet. Update the content and push it back
-                ary8->elem.begin = elem_begin;
-                ary8->begin = begin;
-                gc_repush_markdata(sp, gc_mark_array8_t);
-            }
-            else {
-                begin += elsize;
-                if (begin < end) {
-                    // Haven't done with this array yet. Reset the content and push it back
-                    ary8->elem.begin = ary8->rebegin;
-                    ary8->begin = begin;
-                    gc_repush_markdata(sp, gc_mark_array8_t);
-                }
-                else {
-                    // Finished scanning this one, finish up by checking the GC invariance
-                    // and let the next item replacing the current one directly.
-                    gc_mark_push_remset(ptls, ary8->elem.parent, ary8->elem.nptr);
-                }
-            }
-            return 1;
-        }
-        elem_begin = ary8->rebegin;
-    }
-    gc_mark_push_remset(ptls, ary8->elem.parent, ary8->elem.nptr);
-    return 0;
-}
-
-// Scan a sparse array of object references, see `gc_mark_objarray_t`
-STATIC_INLINE int gc_mark_scan_array16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp,
-                                      gc_mark_array16_t *ary16,
-                                      jl_value_t **begin, jl_value_t **end,
-                                      uint16_t *elem_begin, uint16_t *elem_end,
-                                      jl_value_t **pnew_obj, uintptr_t *ptag, uint8_t *pbits)
-{
-    (void)jl_assume(ary16 == (gc_mark_array16_t*)sp->data);
-    size_t elsize = ((jl_array_t*)ary16->elem.parent)->elsize / sizeof(jl_value_t*);
-    for (; begin < end; begin += elsize) {
-        for (; elem_begin < elem_end; elem_begin++) {
-            jl_value_t **slot = &begin[*elem_begin];
-            *pnew_obj = *slot;
-            if (*pnew_obj)
-                verify_parent2("array", ary16->elem.parent, slot, "elem(%d)",
-                               gc_slot_to_arrayidx(ary16->elem.parent, begin));
-            if (!gc_try_setmark(*pnew_obj, &ary16->elem.nptr, ptag, pbits))
-                continue;
-            elem_begin++;
-            // Found an object to mark
-            if (elem_begin < elem_end) {
-                // Haven't done with this one yet. Update the content and push it back
-                ary16->elem.begin = elem_begin;
-                ary16->begin = begin;
-                gc_repush_markdata(sp, gc_mark_array16_t);
+    (void)jl_assume(obj16_begin < obj16_end);
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_value_t **slot = NULL;
+    jl_value_t *new_obj = NULL;
+    for (; obj16_begin < obj16_end; obj16_begin++) {
+        slot = &((jl_value_t **)obj16_parent)[*obj16_begin];
+        new_obj = *slot;
+        if (new_obj != NULL) {
+            verify_parent2("object", obj16_parent, slot, "field(%d)",
+                            gc_slot_to_fieldidx(obj16_parent, slot, (jl_datatype_t*)jl_typeof(obj16_parent)));
+            gc_assert_parent_validity((jl_value_t *)obj16_parent, new_obj);
+            if (obj16_begin + 1 != obj16_end) {
+                gc_try_claim_and_push(mq, new_obj, &nptr);
             }
             else {
-                begin += elsize;
-                if (begin < end) {
-                    // Haven't done with this array yet. Reset the content and push it back
-                    ary16->elem.begin = ary16->rebegin;
-                    ary16->begin = begin;
-                    gc_repush_markdata(sp, gc_mark_array16_t);
-                }
-                else {
-                    // Finished scanning this one, finish up by checking the GC invariance
-                    // and let the next item replacing the current one directly.
-                    gc_mark_push_remset(ptls, ary16->elem.parent, ary16->elem.nptr);
-                }
+                // Unroll marking of last item to avoid pushing
+                // and popping it right away
+                jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
+                nptr |= !gc_old(o->header);
+                if (!gc_try_setmark_tag(o, GC_MARKED)) new_obj = NULL;
             }
-            return 1;
+            gc_heap_snapshot_record_object_edge((jl_value_t*)obj16_parent, slot);
         }
-        elem_begin = ary16->rebegin;
     }
-    gc_mark_push_remset(ptls, ary16->elem.parent, ary16->elem.nptr);
-    return 0;
+    gc_mark_push_remset(ptls, (jl_value_t *)obj16_parent, nptr);
+    return new_obj;
 }
 
-
-// Scan an object with 8bits field descriptors. see `gc_mark_obj8_t`
-STATIC_INLINE int gc_mark_scan_obj8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mark_obj8_t *obj8,
-                                    char *parent, uint8_t *begin, uint8_t *end,
-                                    jl_value_t **pnew_obj, uintptr_t *ptag, uint8_t *pbits)
+// Mark object with 32bit field descriptors
+STATIC_INLINE jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint32_t *obj32_begin,
+                          uint32_t *obj32_end, uintptr_t nptr) JL_NOTSAFEPOINT
 {
-    (void)jl_assume(obj8 == (gc_mark_obj8_t*)sp->data);
-    (void)jl_assume(begin < end);
-    for (; begin < end; begin++) {
-        jl_value_t **slot = &((jl_value_t**)parent)[*begin];
-        *pnew_obj = *slot;
-        if (*pnew_obj)
-            verify_parent2("object", parent, slot, "field(%d)",
-                           gc_slot_to_fieldidx(parent, slot));
-        if (!gc_try_setmark(*pnew_obj, &obj8->nptr, ptag, pbits))
-            continue;
-        begin++;
-        // Found an object to mark
-        if (begin < end) {
-            // Haven't done with this one yet. Update the content and push it back
-            obj8->begin = begin;
-            gc_repush_markdata(sp, gc_mark_obj8_t);
-        }
-        else {
-            // Finished scanning this one, finish up by checking the GC invariance
-            // and let the next item replacing the current one directly.
-            gc_mark_push_remset(ptls, obj8->parent, obj8->nptr);
-        }
-        return 1;
-    }
-    gc_mark_push_remset(ptls, obj8->parent, obj8->nptr);
-    return 0;
-}
-
-// Scan an object with 16bits field descriptors. see `gc_mark_obj16_t`
-STATIC_INLINE int gc_mark_scan_obj16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mark_obj16_t *obj16,
-                                     char *parent, uint16_t *begin, uint16_t *end,
-                                     jl_value_t **pnew_obj, uintptr_t *ptag, uint8_t *pbits) JL_NOTSAFEPOINT
-{
-    (void)jl_assume(obj16 == (gc_mark_obj16_t*)sp->data);
-    (void)jl_assume(begin < end);
-    for (; begin < end; begin++) {
-        jl_value_t **slot = &((jl_value_t**)parent)[*begin];
-        *pnew_obj = *slot;
-        if (*pnew_obj)
-            verify_parent2("object", parent, slot, "field(%d)",
-                           gc_slot_to_fieldidx(parent, slot));
-        if (!gc_try_setmark(*pnew_obj, &obj16->nptr, ptag, pbits))
-            continue;
-        begin++;
-        // Found an object to mark
-        if (begin < end) {
-            // Haven't done with this one yet. Update the content and push it back
-            obj16->begin = begin;
-            gc_repush_markdata(sp, gc_mark_obj16_t);
-        }
-        else {
-            // Finished scanning this one, finish up by checking the GC invariance
-            // and let the next item replacing the current one directly.
-            gc_mark_push_remset(ptls, obj16->parent, obj16->nptr);
-        }
-        return 1;
-    }
-    gc_mark_push_remset(ptls, obj16->parent, obj16->nptr);
-    return 0;
-}
-
-// Scan an object with 32bits field descriptors. see `gc_mark_obj32_t`
-STATIC_INLINE int gc_mark_scan_obj32(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mark_obj32_t *obj32,
-                                     char *parent, uint32_t *begin, uint32_t *end,
-                                     jl_value_t **pnew_obj, uintptr_t *ptag, uint8_t *pbits)
-{
-    (void)jl_assume(obj32 == (gc_mark_obj32_t*)sp->data);
-    (void)jl_assume(begin < end);
-    for (; begin < end; begin++) {
-        jl_value_t **slot = &((jl_value_t**)parent)[*begin];
-        *pnew_obj = *slot;
-        if (*pnew_obj)
-            verify_parent2("object", parent, slot, "field(%d)",
-                           gc_slot_to_fieldidx(parent, slot));
-        if (!gc_try_setmark(*pnew_obj, &obj32->nptr, ptag, pbits))
-            continue;
-        begin++;
-        // Found an object to mark
-        if (begin < end) {
-            // Haven't done with this one yet. Update the content and push it back
-            obj32->begin = begin;
-            gc_repush_markdata(sp, gc_mark_obj32_t);
+    (void)jl_assume(obj32_begin < obj32_end);
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_value_t **slot = NULL;
+    jl_value_t *new_obj = NULL;
+    for (; obj32_begin < obj32_end; obj32_begin++) {
+        slot = &((jl_value_t **)obj32_parent)[*obj32_begin];
+        new_obj = *slot;
+        if (new_obj != NULL) {
+            verify_parent2("object", obj32_parent, slot, "field(%d)",
+                            gc_slot_to_fieldidx(obj32_parent, slot, (jl_datatype_t*)jl_typeof(obj32_parent)));
+            gc_assert_parent_validity((jl_value_t *)obj32_parent, new_obj);
+            if (obj32_begin + 1 != obj32_end) {
+                gc_try_claim_and_push(mq, new_obj, &nptr);
+            }
+            else {
+                // Unroll marking of last item to avoid pushing
+                // and popping it right away
+                jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
+                nptr |= !gc_old(o->header);
+                if (!gc_try_setmark_tag(o, GC_MARKED)) new_obj = NULL;
+            }
+            gc_heap_snapshot_record_object_edge((jl_value_t*)obj32_parent, slot);
         }
-        else {
-            // Finished scanning this one, finish up by checking the GC invariance
-            // and let the next item replacing the current one directly.
-            gc_mark_push_remset(ptls, obj32->parent, obj32->nptr);
+    }
+    gc_mark_push_remset(ptls, (jl_value_t *)obj32_parent, nptr);
+    return new_obj;
+}
+
+// Mark object array
+STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_value_t **obj_begin,
+                      jl_value_t **obj_end, uint32_t step, uintptr_t nptr) JL_NOTSAFEPOINT
+{
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_value_t *new_obj;
+    // Decide whether need to chunk objary
+    assert(step > 0);
+    (void)jl_assume(step > 0);
+    if ((nptr & 0x2) == 0x2) {
+        // pre-scan this object: most of this object should be old, so look for
+        // the first young object before starting this chunk
+        // (this also would be valid for young objects, but probably less beneficial)
+        for (; obj_begin < obj_end; obj_begin += step) {
+            jl_value_t **slot = obj_begin;
+            new_obj = *slot;
+            if (new_obj != NULL) {
+                verify_parent2("obj array", obj_parent, obj_begin, "elem(%d)",
+                               gc_slot_to_arrayidx(obj_parent, obj_begin));
+                jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
+                if (!gc_old(o->header))
+                    nptr |= 1;
+                if (!gc_marked(o->header))
+                    break;
+                gc_heap_snapshot_record_array_edge(obj_parent, slot);
+            }
         }
-        return 1;
     }
-    gc_mark_push_remset(ptls, obj32->parent, obj32->nptr);
-    return 0;
-}
-
-#if defined(__GNUC__) && !defined(_OS_EMSCRIPTEN_)
-#  define gc_mark_laddr(name) (&&name)
-#  define gc_mark_jmp(ptr) goto *(ptr)
-#else
-#define gc_mark_laddr(name) ((void*)(uintptr_t)GC_MARK_L_##name)
-#define gc_mark_jmp(ptr) do {                   \
-        switch ((int)(uintptr_t)ptr) {          \
-        case GC_MARK_L_marked_obj:              \
-            goto marked_obj;                    \
-        case GC_MARK_L_scan_only:               \
-            goto scan_only;                     \
-        case GC_MARK_L_finlist:                 \
-            goto finlist;                       \
-        case GC_MARK_L_objarray:                \
-            goto objarray;                      \
-        case GC_MARK_L_array8:                  \
-            goto array8;                        \
-        case GC_MARK_L_array16:                 \
-            goto array16;                       \
-        case GC_MARK_L_obj8:                    \
-            goto obj8;                          \
-        case GC_MARK_L_obj16:                   \
-            goto obj16;                         \
-        case GC_MARK_L_obj32:                   \
-            goto obj32;                         \
-        case GC_MARK_L_stack:                   \
-            goto stack;                         \
-        case GC_MARK_L_excstack:                \
-            goto excstack;                      \
-        case GC_MARK_L_module_binding:          \
-            goto module_binding;                \
-        default:                                \
-            abort();                            \
-        }                                       \
-    } while (0)
-#endif
-
-// This is the main marking loop.
-// It uses an iterative (mostly) Depth-first search (DFS) to mark all the objects.
-// Instead of using the native stack, two stacks are manually maintained,
-// one (fixed-size) pc stack which stores the return address and one (variable-size)
-// data stack which stores the local variables needed by the scanning code.
-// Using a manually maintained stack has a few advantages
-//
-// 1. We can resize the stack as we go and never worry about stack overflow
-//    This is especitally useful when enters the GC in a deep call stack.
-//    It also removes the very deep GC call stack in a profile.
-// 2. We can minimize the number of local variables to save on the stack.
-//    This includes minimizing the sizes of the stack frames and only saving variables
-//    that have been changed before making "function calls" (i.e. `goto mark;`)
-// 3. We can perform end-of-loop tail-call optimization for common cases.
-// 4. The marking can be interrupted more easily since all the states are maintained
-//    in a well-defined format already.
-//    This will be useful if we want to have incremental marking again.
-// 5. The frames can be stolen by another thread more easily and it is not necessary
-//    to copy works to be stolen to another queue. Useful for parallel marking.
-//    (Will still require synchronization in stack popping of course.)
-// 6. A flat function (i.e. no or very few function calls) also give the compiler
-//    opportunity to keep more states in registers that doesn't have to be spilled as often.
-//
-// We use two stacks so that the thief on another thread can steal the fixed sized pc stack
-// and use that to figure out the size of the struct on the variable size data stack.
-//
-// The main disadvantages are that we bypass some stack-based CPU optimizations including the
-// stack engine and return address prediction.
-// Using two stacks also double the number of operations on the stack pointer
-// though we still only need to use one of them (the pc stack pointer) for bounds check.
-// In general, it seems that the reduction of stack memory ops and instructions count
-// have a larger positive effect on the performance. =)
-
-// As a general guide we do not want to make non-inlined function calls in this function
-// if possible since a large number of registers has to be spilled when that happens.
-// This is especially true on on X86 which doesn't have many (any?)
-// callee saved general purpose registers.
-// (OTOH, the spill will likely make use of the stack engine which is otherwise idle so
-//  the performance impact is minimum as long as it's not in the hottest path)
-
-// There are three external entry points to the loop, corresponding to label
-// `marked_obj`, `scan_only` and `finlist` (see the corresponding functions
-// `gc_mark_queue_obj`, `gc_mark_queue_scan_obj` and `gc_mark_queue_finlist` above).
-// The scanning of the object starts with `goto mark`, which updates the metadata and scans
-// the object whose information is stored in `new_obj`, `tag` and `bits`.
-// The branches in `mark` will dispatch the object to one of the scan "loop"s to be scanned
-// as either a normal julia object or one of the special objects with specific storage format.
-// Each of the scan "loop" will perform a DFS of the object in the following way
-//
-// 1. When encountering an pointer (julia object reference) slots, load, perform NULL check
-//    and atomically set the mark bits to determine if the object needs to be scanned.
-// 2. If yes, it'll push itself back onto the mark stack (after updating fields that are changed)
-//    using `gc_repush_markdata` to increment the stack pointers.
-//    This step can also be replaced by a tail call by finishing up the marking of the current
-//    object when the end of the current object is reached.
-// 3. Jump to `mark`. The marking of the current object will be resumed after the child is
-//    scanned by popping the stack frame back.
-//
-// Some of the special object scannings use BFS to simplify the code (Task and Module).
-
-// The jumps from the dispatch to the scan "loop"s are done by first pushing a frame
-// to the stacks while only increment the data stack pointer before jumping to the loop
-// This way the scan "loop" gets exactly what it expects after a stack pop.
-// Additional optimizations are done for some of the common cases by skipping
-// the unnecessary data stack pointer increment and the load from the stack
-// (i.e. store to load forwaring). See `objary_loaded`, `obj8_loaded` and `obj16_loaded`.
-JL_EXTENSION NOINLINE void gc_mark_loop(jl_ptls_t ptls, jl_gc_mark_sp_t sp)
-{
-    if (__unlikely(ptls == NULL)) {
-        gc_mark_label_addrs[GC_MARK_L_marked_obj] = gc_mark_laddr(marked_obj);
-        gc_mark_label_addrs[GC_MARK_L_scan_only] = gc_mark_laddr(scan_only);
-        gc_mark_label_addrs[GC_MARK_L_finlist] = gc_mark_laddr(finlist);
-        gc_mark_label_addrs[GC_MARK_L_objarray] = gc_mark_laddr(objarray);
-        gc_mark_label_addrs[GC_MARK_L_array8] = gc_mark_laddr(array8);
-        gc_mark_label_addrs[GC_MARK_L_array16] = gc_mark_laddr(array16);
-        gc_mark_label_addrs[GC_MARK_L_obj8] = gc_mark_laddr(obj8);
-        gc_mark_label_addrs[GC_MARK_L_obj16] = gc_mark_laddr(obj16);
-        gc_mark_label_addrs[GC_MARK_L_obj32] = gc_mark_laddr(obj32);
-        gc_mark_label_addrs[GC_MARK_L_stack] = gc_mark_laddr(stack);
-        gc_mark_label_addrs[GC_MARK_L_excstack] = gc_mark_laddr(excstack);
-        gc_mark_label_addrs[GC_MARK_L_module_binding] = gc_mark_laddr(module_binding);
-        return;
+    size_t too_big = (obj_end - obj_begin) / GC_CHUNK_BATCH_SIZE > step; // use this order of operations to avoid idiv
+    jl_value_t **scan_end = obj_end;
+    int pushed_chunk = 0;
+    if (too_big) {
+        scan_end = obj_begin + step * GC_CHUNK_BATCH_SIZE;
+        // case 1: array owner is young, so we won't need to scan through all its elements
+        // to know that we will never need to push it to the remset. it's fine
+        // to create a chunk with "incorrect" `nptr` and push it to the chunk-queue
+        // ASAP in order to expose as much parallelism as possible
+        // case 2: lowest two bits of `nptr` are already set to 0x3, so won't change after
+        // scanning the array elements
+        if ((nptr & 0x2) != 0x2 || (nptr & 0x3) == 0x3) {
+            jl_gc_chunk_t c = {GC_objary_chunk, obj_parent, scan_end, obj_end, NULL, NULL, step, nptr};
+            gc_chunkqueue_push(mq, &c);
+            pushed_chunk = 1;
+        }
     }
-
-    jl_value_t *new_obj = NULL;
-    uintptr_t tag = 0;
-    uint8_t bits = 0;
-    int meta_updated = 0;
-
-    gc_mark_objarray_t *objary;
-    jl_value_t **objary_begin;
-    jl_value_t **objary_end;
-
-    gc_mark_array8_t *ary8;
-    gc_mark_array16_t *ary16;
-
-    gc_mark_obj8_t *obj8;
-    char *obj8_parent;
-    uint8_t *obj8_begin;
-    uint8_t *obj8_end;
-
-    gc_mark_obj16_t *obj16;
-    char *obj16_parent;
-    uint16_t *obj16_begin;
-    uint16_t *obj16_end;
-
-pop:
-    if (sp.pc == sp.pc_start) {
-        // TODO: stealing form another thread
-        return;
+    for (; obj_begin < scan_end; obj_begin += step) {
+        jl_value_t **slot = obj_begin;
+        new_obj = *obj_begin;
+        if (new_obj != NULL) {
+            verify_parent2("obj array", obj_parent, obj_begin, "elem(%d)",
+                        gc_slot_to_arrayidx(obj_parent, obj_begin));
+            gc_assert_parent_validity(obj_parent, new_obj);
+            gc_try_claim_and_push(mq, new_obj, &nptr);
+            gc_heap_snapshot_record_array_edge(obj_parent, slot);
+        }
     }
-    sp.pc--;
-    gc_mark_jmp(*sp.pc); // computed goto
-
-marked_obj: {
-        // An object that has been marked and needs have metadata updated and scanned.
-        gc_mark_marked_obj_t *obj = gc_pop_markdata(&sp, gc_mark_marked_obj_t);
-        new_obj = obj->obj;
-        tag = obj->tag;
-        bits = obj->bits;
-        goto mark;
-    }
-
-scan_only: {
-        // An object that has been marked and needs to be scanned.
-        gc_mark_marked_obj_t *obj = gc_pop_markdata(&sp, gc_mark_marked_obj_t);
-        new_obj = obj->obj;
-        tag = obj->tag;
-        bits = obj->bits;
-        meta_updated = 1;
-        goto mark;
-    }
-
-objarray:
-    objary = gc_pop_markdata(&sp, gc_mark_objarray_t);
-    objary_begin = objary->begin;
-    objary_end = objary->end;
-objarray_loaded:
-    if (gc_mark_scan_objarray(ptls, &sp, objary, objary_begin, objary_end,
-                              &new_obj, &tag, &bits))
-        goto mark;
-    goto pop;
-
-array8:
-    ary8 = gc_pop_markdata(&sp, gc_mark_array8_t);
-    objary_begin = ary8->begin;
-    objary_end = ary8->end;
-    obj8_begin = ary8->elem.begin;
-    obj8_end = ary8->elem.end;
-array8_loaded:
-    if (gc_mark_scan_array8(ptls, &sp, ary8, objary_begin, objary_end, obj8_begin, obj8_end,
-                            &new_obj, &tag, &bits))
-        goto mark;
-    goto pop;
-
-array16:
-    ary16 = gc_pop_markdata(&sp, gc_mark_array16_t);
-    objary_begin = ary16->begin;
-    objary_end = ary16->end;
-    obj16_begin = ary16->elem.begin;
-    obj16_end = ary16->elem.end;
-array16_loaded:
-    if (gc_mark_scan_array16(ptls, &sp, ary16, objary_begin, objary_end, obj16_begin, obj16_end,
-                            &new_obj, &tag, &bits))
-        goto mark;
-    goto pop;
-
-obj8:
-    obj8 = gc_pop_markdata(&sp, gc_mark_obj8_t);
-    obj8_parent = (char*)obj8->parent;
-    obj8_begin = obj8->begin;
-    obj8_end = obj8->end;
-obj8_loaded:
-    if (gc_mark_scan_obj8(ptls, &sp, obj8, obj8_parent, obj8_begin, obj8_end,
-                          &new_obj, &tag, &bits))
-        goto mark;
-    goto pop;
-
-obj16:
-    obj16 = gc_pop_markdata(&sp, gc_mark_obj16_t);
-    obj16_parent = (char*)obj16->parent;
-    obj16_begin = obj16->begin;
-    obj16_end = obj16->end;
-obj16_loaded:
-    if (gc_mark_scan_obj16(ptls, &sp, obj16, obj16_parent, obj16_begin, obj16_end,
-                           &new_obj, &tag, &bits))
-        goto mark;
-    goto pop;
-
-obj32: {
-        gc_mark_obj32_t *obj32 = gc_pop_markdata(&sp, gc_mark_obj32_t);
-        char *parent = (char*)obj32->parent;
-        uint32_t *begin = obj32->begin;
-        uint32_t *end = obj32->end;
-        if (gc_mark_scan_obj32(ptls, &sp, obj32, parent, begin, end, &new_obj, &tag, &bits))
-            goto mark;
-        goto pop;
+    if (too_big) {
+        if (!pushed_chunk) {
+            jl_gc_chunk_t c = {GC_objary_chunk, obj_parent, scan_end, obj_end, NULL, NULL, step, nptr};
+            gc_chunkqueue_push(mq, &c);
+        }
     }
-
-stack: {
-        // Scan the stack. see `gc_mark_stackframe_t`
-        // The task object this stack belongs to is being scanned separately as a normal
-        // 8bit field descriptor object.
-        gc_mark_stackframe_t *stack = gc_pop_markdata(&sp, gc_mark_stackframe_t);
-        jl_gcframe_t *s = stack->s;
-        uint32_t i = stack->i;
-        uint32_t nroots = stack->nroots;
-        uintptr_t offset = stack->offset;
-        uintptr_t lb = stack->lb;
-        uintptr_t ub = stack->ub;
-        uint32_t nr = nroots >> 2;
-        uintptr_t nptr = 0;
-        while (1) {
-            jl_value_t ***rts = (jl_value_t***)(((void**)s) + 2);
-            for (; i < nr; i++) {
-                if (nroots & 1) {
-                    void **slot = (void**)gc_read_stack(&rts[i], offset, lb, ub);
-                    new_obj = (jl_value_t*)gc_read_stack(slot, offset, lb, ub);
-                }
-                else {
-                    new_obj = (jl_value_t*)gc_read_stack(&rts[i], offset, lb, ub);
-                    if (gc_ptr_tag(new_obj, 1)) {
-                        // handle tagged pointers in finalizer list
-                        new_obj = gc_ptr_clear_tag(new_obj, 1);
-                        i++;
+    else {
+        gc_mark_push_remset(ptls, obj_parent, nptr);
+    }
+}
+
+// Mark array with 8bit field descriptors
+STATIC_INLINE void gc_mark_memory8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_value_t **ary8_begin,
+                    jl_value_t **ary8_end, uint8_t *elem_begin, uint8_t *elem_end, uintptr_t elsize,
+                    uintptr_t nptr) JL_NOTSAFEPOINT
+{
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_value_t *new_obj;
+    assert(elsize > 0);
+    (void)jl_assume(elsize > 0);
+    // Decide whether need to chunk objary
+    if ((nptr & 0x2) == 0x2) {
+        // pre-scan this object: most of this object should be old, so look for
+        // the first young object before starting this chunk
+        // (this also would be valid for young objects, but probably less beneficial)
+        for (; ary8_begin < ary8_end; ary8_begin += elsize) {
+            int early_end = 0;
+            for (uint8_t *pindex = elem_begin; pindex < elem_end; pindex++) {
+                jl_value_t **slot = &ary8_begin[*pindex];
+                new_obj = *slot;
+                if (new_obj != NULL) {
+                    verify_parent2("array", ary8_parent, &new_obj, "elem(%d)",
+                                gc_slot_to_arrayidx(ary8_parent, ary8_begin));
+                    jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
+                    if (!gc_old(o->header))
+                        nptr |= 1;
+                    if (!gc_marked(o->header)){
+                        early_end = 1;
+                        break;
                     }
+                    gc_heap_snapshot_record_array_edge(ary8_parent, slot);
                 }
-                if (!gc_try_setmark(new_obj, &nptr, &tag, &bits))
-                    continue;
-                i++;
-                if (i < nr) {
-                    // Haven't done with this one yet. Update the content and push it back
-                    stack->i = i;
-                    gc_repush_markdata(&sp, gc_mark_stackframe_t);
-                }
-                else if ((s = (jl_gcframe_t*)gc_read_stack(&s->prev, offset, lb, ub))) {
-                    stack->s = s;
-                    stack->i = 0;
-                    uintptr_t new_nroots = gc_read_stack(&s->nroots, offset, lb, ub);
-                    assert(new_nroots <= UINT32_MAX);
-                    stack->nroots = (uint32_t)new_nroots;
-                    gc_repush_markdata(&sp, gc_mark_stackframe_t);
-                }
-                goto mark;
             }
-            s = (jl_gcframe_t*)gc_read_stack(&s->prev, offset, lb, ub);
-            if (s != 0) {
-                stack->s = s;
-                i = 0;
-                uintptr_t new_nroots = gc_read_stack(&s->nroots, offset, lb, ub);
-                assert(new_nroots <= UINT32_MAX);
-                nroots = stack->nroots = (uint32_t)new_nroots;
-                nr = nroots >> 2;
-                continue;
+            if (early_end)
+                break;
+        }
+    }
+    size_t too_big = (ary8_end - ary8_begin) / GC_CHUNK_BATCH_SIZE > elsize; // use this order of operations to avoid idiv
+    jl_value_t **scan_end = ary8_end;
+    int pushed_chunk = 0;
+    if (too_big) {
+        scan_end = ary8_begin + elsize * GC_CHUNK_BATCH_SIZE;
+        // case 1: array owner is young, so we won't need to scan through all its elements
+        // to know that we will never need to push it to the remset. it's fine
+        // to create a chunk with "incorrect" `nptr` and push it to the chunk-queue
+        // ASAP in order to expose as much parallelism as possible
+        // case 2: lowest two bits of `nptr` are already set to 0x3, so won't change after
+        // scanning the array elements
+        if ((nptr & 0x2) != 0x2 || (nptr & 0x3) == 0x3) {
+            jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, scan_end, ary8_end, elem_begin, elem_end, elsize, nptr};
+            gc_chunkqueue_push(mq, &c);
+            pushed_chunk = 1;
+        }
+    }
+    for (; ary8_begin < ary8_end; ary8_begin += elsize) {
+        for (uint8_t *pindex = elem_begin; pindex < elem_end; pindex++) {
+            jl_value_t **slot = &ary8_begin[*pindex];
+            new_obj = *slot;
+            if (new_obj != NULL) {
+                verify_parent2("array", ary8_parent, &new_obj, "elem(%d)",
+                               gc_slot_to_arrayidx(ary8_parent, ary8_begin));
+                gc_assert_parent_validity(ary8_parent, new_obj);
+                gc_try_claim_and_push(mq, new_obj, &nptr);
+                gc_heap_snapshot_record_array_edge(ary8_parent, slot);
             }
-            goto pop;
         }
     }
-
-excstack: {
-        // Scan an exception stack
-        gc_mark_excstack_t *stackitr = gc_pop_markdata(&sp, gc_mark_excstack_t);
-        jl_excstack_t *excstack = stackitr->s;
-        size_t itr = stackitr->itr;
-        size_t bt_index = stackitr->bt_index;
-        size_t jlval_index = stackitr->jlval_index;
-        while (itr > 0) {
-            size_t bt_size = jl_excstack_bt_size(excstack, itr);
-            jl_bt_element_t *bt_data = jl_excstack_bt_data(excstack, itr);
-            for (; bt_index < bt_size; bt_index += jl_bt_entry_size(bt_data + bt_index)) {
-                jl_bt_element_t *bt_entry = bt_data + bt_index;
-                if (jl_bt_is_native(bt_entry))
-                    continue;
-                // Found an extended backtrace entry: iterate over any
-                // GC-managed values inside.
-                size_t njlvals = jl_bt_num_jlvals(bt_entry);
-                while (jlval_index < njlvals) {
-                    new_obj = jl_bt_entry_jlvalue(bt_entry, jlval_index);
-                    uintptr_t nptr = 0;
-                    jlval_index += 1;
-                    if (gc_try_setmark(new_obj, &nptr, &tag, &bits)) {
-                        stackitr->itr = itr;
-                        stackitr->bt_index = bt_index;
-                        stackitr->jlval_index = jlval_index;
-                        gc_repush_markdata(&sp, gc_mark_excstack_t);
-                        goto mark;
+    if (too_big) {
+        if (!pushed_chunk) {
+            jl_gc_chunk_t c = {GC_ary8_chunk, ary8_parent, scan_end, ary8_end, elem_begin, elem_end, elsize, nptr};
+            gc_chunkqueue_push(mq, &c);
+        }
+    }
+    else {
+        gc_mark_push_remset(ptls, ary8_parent, nptr);
+    }
+}
+
+// Mark array with 16bit field descriptors
+STATIC_INLINE void gc_mark_memory16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_value_t **ary16_begin,
+                     jl_value_t **ary16_end, uint16_t *elem_begin, uint16_t *elem_end, size_t elsize,
+                     uintptr_t nptr) JL_NOTSAFEPOINT
+{
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_value_t *new_obj;
+    assert(elsize > 0);
+    (void)jl_assume(elsize > 0);
+    // Decide whether need to chunk objary
+    if ((nptr & 0x2) == 0x2) {
+        // pre-scan this object: most of this object should be old, so look for
+        // the first young object before starting this chunk
+        // (this also would be valid for young objects, but probably less beneficial)
+        for (; ary16_begin < ary16_end; ary16_begin += elsize) {
+            int early_end = 0;
+            for (uint16_t *pindex = elem_begin; pindex < elem_end; pindex++) {
+                jl_value_t **slot = &ary16_begin[*pindex];
+                new_obj = *slot;
+                if (new_obj != NULL) {
+                    verify_parent2("array", ary16_parent, &new_obj, "elem(%d)",
+                                gc_slot_to_arrayidx(ary16_parent, ary16_begin));
+                    jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
+                    if (!gc_old(o->header))
+                        nptr |= 1;
+                    if (!gc_marked(o->header)){
+                        early_end = 1;
+                        break;
                     }
+                    gc_heap_snapshot_record_array_edge(ary16_parent, slot);
                 }
-                jlval_index = 0;
             }
-            // The exception comes last - mark it
-            new_obj = jl_excstack_exception(excstack, itr);
-            itr = jl_excstack_next(excstack, itr);
-            bt_index = 0;
-            jlval_index = 0;
-            uintptr_t nptr = 0;
-            if (gc_try_setmark(new_obj, &nptr, &tag, &bits)) {
-                stackitr->itr = itr;
-                stackitr->bt_index = bt_index;
-                stackitr->jlval_index = jlval_index;
-                gc_repush_markdata(&sp, gc_mark_excstack_t);
-                goto mark;
+            if (early_end)
+                break;
+        }
+    }
+    size_t too_big = (ary16_end - ary16_begin) / GC_CHUNK_BATCH_SIZE > elsize; // use this order of operations to avoid idiv
+    jl_value_t **scan_end = ary16_end;
+    int pushed_chunk = 0;
+    if (too_big) {
+        scan_end = ary16_begin + elsize * GC_CHUNK_BATCH_SIZE;
+        // case 1: array owner is young, so we won't need to scan through all its elements
+        // to know that we will never need to push it to the remset. it's fine
+        // to create a chunk with "incorrect" `nptr` and push it to the chunk-queue
+        // ASAP in order to expose as much parallelism as possible
+        // case 2: lowest two bits of `nptr` are already set to 0x3, so won't change after
+        // scanning the array elements
+        if ((nptr & 0x2) != 0x2 || (nptr & 0x3) == 0x3) {
+            jl_gc_chunk_t c = {GC_ary16_chunk, ary16_parent, scan_end, ary16_end, elem_begin, elem_end, elsize, nptr};
+            gc_chunkqueue_push(mq, &c);
+            pushed_chunk = 1;
+        }
+    }
+    for (; ary16_begin < scan_end; ary16_begin += elsize) {
+        for (uint16_t *pindex = elem_begin; pindex < elem_end; pindex++) {
+            jl_value_t **slot = &ary16_begin[*pindex];
+            new_obj = *slot;
+            if (new_obj != NULL) {
+                verify_parent2("array", ary16_parent, &new_obj, "elem(%d)",
+                               gc_slot_to_arrayidx(ary16_parent, ary16_begin));
+                gc_assert_parent_validity(ary16_parent, new_obj);
+                gc_try_claim_and_push(mq, new_obj, &nptr);
+                gc_heap_snapshot_record_array_edge(ary16_parent, slot);
             }
         }
-        goto pop;
     }
+    if (too_big) {
+        if (!pushed_chunk) {
+            jl_gc_chunk_t c = {GC_ary16_chunk, ary16_parent, scan_end, ary16_end, elem_begin, elem_end, elsize, nptr};
+            gc_chunkqueue_push(mq, &c);
+        }
+    }
+    else {
+        gc_mark_push_remset(ptls, ary16_parent, nptr);
+    }
+}
 
-module_binding: {
-        // Scan a module. see `gc_mark_binding_t`
-        // Other fields of the module will be scanned after the bindings are scanned
-        gc_mark_binding_t *binding = gc_pop_markdata(&sp, gc_mark_binding_t);
-        jl_binding_t **begin = binding->begin;
-        jl_binding_t **end = binding->end;
-        uint8_t mbits = binding->bits;
-        for (; begin < end; begin += 2) {
-            jl_binding_t *b = *begin;
-            if (b == (jl_binding_t*)HT_NOTFOUND)
-                continue;
-            if ((void*)b >= sysimg_base && (void*)b < sysimg_end) {
-                jl_taggedvalue_t *buf = jl_astaggedvalue(b);
-                uintptr_t tag = buf->header;
-                uint8_t bits;
-                if (!gc_marked(tag))
-                    gc_setmark_tag(buf, GC_OLD_MARKED, tag, &bits);
-            }
-            else {
-                gc_setmark_buf_(ptls, b, mbits, sizeof(jl_binding_t));
-            }
-            void *vb = jl_astaggedvalue(b);
-            verify_parent1("module", binding->parent, &vb, "binding_buff");
-            (void)vb;
-            jl_value_t *value = jl_atomic_load_relaxed(&b->value);
-            jl_value_t *globalref = jl_atomic_load_relaxed(&b->globalref);
-            if (value) {
-                verify_parent2("module", binding->parent,
-                               &b->value, "binding(%s)", jl_symbol_name(b->name));
-                if (gc_try_setmark(value, &binding->nptr, &tag, &bits)) {
-                    new_obj = value;
-                    begin += 2;
-                    binding->begin = begin;
-                    gc_repush_markdata(&sp, gc_mark_binding_t);
-                    uintptr_t gr_tag;
-                    uint8_t gr_bits;
-                    if (gc_try_setmark(globalref, &binding->nptr, &gr_tag, &gr_bits)) {
-                        gc_mark_marked_obj_t data = {globalref, gr_tag, gr_bits};
-                        gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(marked_obj),
-                                           &data, sizeof(data), 1);
-                    }
-                    goto mark;
-                }
-            }
-            if (gc_try_setmark(globalref, &binding->nptr, &tag, &bits)) {
-                begin += 2;
-                binding->begin = begin;
-                gc_repush_markdata(&sp, gc_mark_binding_t);
-                new_obj = globalref;
-                goto mark;
-            }
+// Mark chunk of large array
+STATIC_INLINE void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_chunk_t *c) JL_NOTSAFEPOINT
+{
+    switch (c->cid) {
+        case GC_objary_chunk: {
+            jl_value_t *obj_parent = c->parent;
+            jl_value_t **obj_begin = c->begin;
+            jl_value_t **obj_end = c->end;
+            uint32_t step = c->step;
+            uintptr_t nptr = c->nptr;
+            gc_mark_objarray(ptls, obj_parent, obj_begin, obj_end,
+                             step, nptr);
+            break;
         }
-        jl_module_t *m = binding->parent;
-        int scanparent = gc_try_setmark((jl_value_t*)m->parent, &binding->nptr, &tag, &bits);
-        size_t nusings = m->usings.len;
-        if (nusings) {
-            // this is only necessary because bindings for "using" modules
-            // are added only when accessed. therefore if a module is replaced
-            // after "using" it but before accessing it, this array might
-            // contain the only reference.
-            objary_begin = (jl_value_t**)m->usings.items;
-            objary_end = objary_begin + nusings;
-            gc_mark_objarray_t data = {(jl_value_t*)m, objary_begin, objary_end, 1, binding->nptr};
-            gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(objarray),
-                               &data, sizeof(data), 0);
-            if (!scanparent) {
-                objary = (gc_mark_objarray_t*)sp.data;
-                goto objarray_loaded;
-            }
-            sp.data = (jl_gc_mark_data_t *)(((char*)sp.data) + sizeof(data));
-            sp.pc++;
+        case GC_ary8_chunk: {
+            jl_value_t *ary8_parent = c->parent;
+            jl_value_t **ary8_begin = c->begin;
+            jl_value_t **ary8_end = c->end;
+            uint8_t *elem_begin = (uint8_t *)c->elem_begin;
+            uint8_t *elem_end = (uint8_t *)c->elem_end;
+            size_t elsize = c->step;
+            uintptr_t nptr = c->nptr;
+            gc_mark_memory8(ptls, ary8_parent, ary8_begin, ary8_end, elem_begin, elem_end,
+                           elsize, nptr);
+            break;
         }
-        else {
-            gc_mark_push_remset(ptls, (jl_value_t*)m, binding->nptr);
+        case GC_ary16_chunk: {
+            jl_value_t *ary16_parent = c->parent;
+            jl_value_t **ary16_begin = c->begin;
+            jl_value_t **ary16_end = c->end;
+            uint16_t *elem_begin = (uint16_t *)c->elem_begin;
+            uint16_t *elem_end = (uint16_t *)c->elem_end;
+            size_t elsize = c->step;
+            uintptr_t nptr = c->nptr;
+            gc_mark_memory16(ptls, ary16_parent, ary16_begin, ary16_end, elem_begin, elem_end,
+                            elsize, nptr);
+            break;
         }
-        if (scanparent) {
-            new_obj = (jl_value_t*)m->parent;
-            goto mark;
+        case GC_finlist_chunk: {
+            jl_value_t **fl_begin = c->begin;
+            jl_value_t **fl_end = c->end;
+            gc_mark_finlist_(mq, fl_begin, fl_end);
+            break;
+        }
+        default: {
+            // `empty-chunk` should be checked by caller
+            jl_safe_printf("GC internal error: chunk mismatch\n");
+            abort();
         }
-        goto pop;
     }
+}
 
-finlist: {
-        // Scan a finalizer (or format compatible) list. see `gc_mark_finlist_t`
-        gc_mark_finlist_t *finlist = gc_pop_markdata(&sp, gc_mark_finlist_t);
-        jl_value_t **begin = finlist->begin;
-        jl_value_t **end = finlist->end;
-        for (; begin < end; begin++) {
-            new_obj = *begin;
-            if (__unlikely(!new_obj))
-                continue;
-            if (gc_ptr_tag(new_obj, 1)) {
-                new_obj = (jl_value_t*)gc_ptr_clear_tag(new_obj, 1);
-                begin++;
-                assert(begin < end);
+// Mark gc frame
+STATIC_INLINE void gc_mark_stack(jl_ptls_t ptls, jl_gcframe_t *s, uint32_t nroots, uintptr_t offset,
+                   uintptr_t lb, uintptr_t ub) JL_NOTSAFEPOINT
+{
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_value_t *new_obj;
+    uint32_t nr = nroots >> 2;
+    while (1) {
+        jl_value_t ***rts = (jl_value_t ***)(((void **)s) + 2);
+        for (uint32_t i = 0; i < nr; i++) {
+            if (nroots & 1) {
+                void **slot = (void **)gc_read_stack(&rts[i], offset, lb, ub);
+                new_obj = (jl_value_t *)gc_read_stack(slot, offset, lb, ub);
+                if (new_obj == NULL)
+                    continue;
+            }
+            else {
+                new_obj = (jl_value_t *)gc_read_stack(&rts[i], offset, lb, ub);
+                if (gc_ptr_tag(new_obj, 1)) {
+                    // handle tagged pointers in finalizer list
+                    new_obj = (jl_value_t *)gc_ptr_clear_tag(new_obj, 1);
+                    // skip over the finalizer fptr
+                    i++;
+                }
+                if (gc_ptr_tag(new_obj, 2))
+                    continue;
+                // conservatively check for the presence of any smalltag type, instead of just NULL
+                // in the very unlikely event that codegen decides to root the result of julia.typeof
+                if (new_obj < (jl_value_t*)((uintptr_t)jl_max_tags << 4))
+                    continue;
             }
-            uintptr_t nptr = 0;
-            if (!gc_try_setmark(new_obj, &nptr, &tag, &bits))
+            gc_try_claim_and_push(mq, new_obj, NULL);
+            gc_heap_snapshot_record_frame_to_object_edge(s, new_obj);
+        }
+        jl_gcframe_t *sprev = (jl_gcframe_t *)gc_read_stack(&s->prev, offset, lb, ub);
+        if (sprev == NULL)
+            break;
+        gc_heap_snapshot_record_frame_to_frame_edge(s, sprev);
+        s = sprev;
+        uintptr_t new_nroots = gc_read_stack(&s->nroots, offset, lb, ub);
+        assert(new_nroots <= UINT32_MAX);
+        nroots = (uint32_t)new_nroots;
+        nr = nroots >> 2;
+    }
+}
+
+// Mark exception stack
+STATIC_INLINE void gc_mark_excstack(jl_ptls_t ptls, jl_excstack_t *excstack, size_t itr) JL_NOTSAFEPOINT
+{
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_value_t *new_obj;
+    while (itr > 0) {
+        size_t bt_size = jl_excstack_bt_size(excstack, itr);
+        jl_bt_element_t *bt_data = jl_excstack_bt_data(excstack, itr);
+        for (size_t bt_index = 0; bt_index < bt_size;
+             bt_index += jl_bt_entry_size(bt_data + bt_index)) {
+            jl_bt_element_t *bt_entry = bt_data + bt_index;
+            if (jl_bt_is_native(bt_entry))
                 continue;
-            begin++;
-            // Found an object to mark
-            if (begin < end) {
-                // Haven't done with this one yet. Update the content and push it back
-                finlist->begin = begin;
-                gc_repush_markdata(&sp, gc_mark_finlist_t);
+            // Found an extended backtrace entry: iterate over any
+            // GC-managed values inside.
+            size_t njlvals = jl_bt_num_jlvals(bt_entry);
+            for (size_t jlval_index = 0; jlval_index < njlvals; jlval_index++) {
+                new_obj = jl_bt_entry_jlvalue(bt_entry, jlval_index);
+                gc_try_claim_and_push(mq, new_obj, NULL);
+                gc_heap_snapshot_record_frame_to_object_edge(bt_entry, new_obj);
             }
-            goto mark;
         }
-        goto pop;
+        // The exception comes last - mark it
+        new_obj = jl_excstack_exception(excstack, itr);
+        itr = jl_excstack_next(excstack, itr);
+        gc_try_claim_and_push(mq, new_obj, NULL);
+        gc_heap_snapshot_record_frame_to_object_edge(excstack, new_obj);
+    }
+}
+
+// Mark module binding
+STATIC_INLINE void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent, uintptr_t nptr,
+                            uint8_t bits) JL_NOTSAFEPOINT
+{
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_value_t *bindings = (jl_value_t *)jl_atomic_load_relaxed(&mb_parent->bindings);
+    gc_assert_parent_validity((jl_value_t *)mb_parent, bindings);
+    gc_try_claim_and_push(mq, bindings, &nptr);
+    jl_value_t *bindingkeyset = (jl_value_t *)jl_atomic_load_relaxed(&mb_parent->bindingkeyset);
+    gc_assert_parent_validity((jl_value_t *)mb_parent, bindingkeyset);
+    gc_try_claim_and_push(mq, bindingkeyset, &nptr);
+    gc_assert_parent_validity((jl_value_t *)mb_parent, (jl_value_t *)mb_parent->parent);
+    gc_try_claim_and_push(mq, (jl_value_t *)mb_parent->parent, &nptr);
+    size_t nusings = mb_parent->usings.len;
+    if (nusings > 0) {
+        // this is only necessary because bindings for "using" modules
+        // are added only when accessed. therefore if a module is replaced
+        // after "using" it but before accessing it, this array might
+        // contain the only reference.
+        jl_value_t *obj_parent = (jl_value_t *)mb_parent;
+        jl_value_t **objary_begin = (jl_value_t **)mb_parent->usings.items;
+        jl_value_t **objary_end = objary_begin + nusings;
+        gc_mark_objarray(ptls, obj_parent, objary_begin, objary_end, 1, nptr);
     }
+    else {
+        gc_mark_push_remset(ptls, (jl_value_t *)mb_parent, nptr);
+    }
+}
 
-mark: {
-        // Generic scanning entry point.
-        // Expects `new_obj`, `tag` and `bits` to be set correctly.
-#ifdef JL_DEBUG_BUILD
-        if (new_obj == gc_findval)
-            jl_raise_debugger();
-#endif
+void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end)
+{
+    jl_value_t *new_obj;
+    // Decide whether need to chunk finlist
+    size_t nrefs = (fl_end - fl_begin);
+    if (nrefs > GC_CHUNK_BATCH_SIZE) {
+        jl_gc_chunk_t c = {GC_finlist_chunk, NULL, fl_begin + GC_CHUNK_BATCH_SIZE, fl_end, 0, 0, 0, 0};
+        gc_chunkqueue_push(mq, &c);
+        fl_end = fl_begin + GC_CHUNK_BATCH_SIZE;
+    }
+    for (; fl_begin < fl_end; fl_begin++) {
+        new_obj = *fl_begin;
+        if (__unlikely(new_obj == NULL))
+            continue;
+        if (gc_ptr_tag(new_obj, 1)) {
+            new_obj = (jl_value_t *)gc_ptr_clear_tag(new_obj, 1);
+            fl_begin++;
+            assert(fl_begin < fl_end);
+        }
+        if (gc_ptr_tag(new_obj, 2))
+            continue;
+        gc_try_claim_and_push(mq, new_obj, NULL);
+    }
+}
+
+// Mark finalizer list (or list of objects following same format)
+void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start)
+{
+    size_t len = list->len;
+    if (len <= start)
+        return;
+    jl_value_t **fl_begin = (jl_value_t **)list->items + start;
+    jl_value_t **fl_end = (jl_value_t **)list->items + len;
+    gc_mark_finlist_(mq, fl_begin, fl_end);
+}
+
+JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
+{
+    int may_claim = gc_try_setmark_tag(jl_astaggedvalue(obj), GC_MARKED);
+    if (may_claim)
+        gc_ptr_queue_push(&ptls->mark_queue, obj);
+    return may_claim;
+}
+
+JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
+                                            jl_value_t **objs, size_t nobjs)
+{
+    uintptr_t nptr = (nobjs << 2) | (jl_astaggedvalue(parent)->bits.gc & 2);
+    gc_mark_objarray(ptls, parent, objs, objs + nobjs, 1, nptr);
+}
+
+// Enqueue and mark all outgoing references from `new_obj` which have not been marked
+// yet. `meta_updated` is mostly used to make sure we don't update metadata twice for
+// objects which have been enqueued into the `remset`
+FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj,
+                              int meta_updated)
+{
+    jl_value_t *new_obj = (jl_value_t *)_new_obj;
+    mark_obj: {
         jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
-        jl_datatype_t *vt = (jl_datatype_t*)tag;
-        int foreign_alloc = 0;
+        uintptr_t vtag = o->header & ~(uintptr_t)0xf;
+        uint8_t bits = (gc_old(o->header) && !mark_reset_age) ? GC_OLD_MARKED : GC_MARKED;
         int update_meta = __likely(!meta_updated && !gc_verifying);
-        if (update_meta && (void*)o >= sysimg_base && (void*)o < sysimg_end) {
+        int foreign_alloc = 0;
+        if (update_meta && o->bits.in_image) {
             foreign_alloc = 1;
             update_meta = 0;
         }
-        meta_updated = 0;
         // Symbols are always marked
-        assert(vt != jl_symbol_type);
-        if (vt == jl_simplevector_type) {
-            size_t l = jl_svec_len(new_obj);
-            jl_value_t **data = jl_svec_data(new_obj);
-            size_t dtsz = l * sizeof(void*) + sizeof(jl_svec_t);
-            if (update_meta)
-                gc_setmark(ptls, o, bits, dtsz);
-            else if (foreign_alloc)
-                objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
-            uintptr_t nptr = (l << 2) | (bits & GC_OLD);
-            objary_begin = data;
-            objary_end = data + l;
-            gc_mark_objarray_t markdata = {new_obj, objary_begin, objary_end, 1, nptr};
-            gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(objarray),
-                               &markdata, sizeof(markdata), 0);
-            objary = (gc_mark_objarray_t*)sp.data;
-            goto objarray_loaded;
-        }
-        else if (vt->name == jl_array_typename) {
-            jl_array_t *a = (jl_array_t*)new_obj;
-            jl_array_flags_t flags = a->flags;
+        assert(vtag != (uintptr_t)jl_symbol_type && vtag != jl_symbol_tag << 4);
+        if (vtag == (jl_datatype_tag << 4) ||
+            vtag == (jl_unionall_tag << 4) ||
+            vtag == (jl_uniontype_tag << 4) ||
+            vtag == (jl_tvar_tag << 4) ||
+            vtag == (jl_vararg_tag << 4)) {
+            // these objects have pointers in them, but no other special handling
+            // so we want these to fall through to the end
+            vtag = (uintptr_t)ijl_small_typeof[vtag / sizeof(*ijl_small_typeof)];
+        }
+        else if (vtag < jl_max_tags << 4) {
+            // these objects either have specialing handling
+            if (vtag == jl_simplevector_tag << 4) {
+                size_t l = jl_svec_len(new_obj);
+                jl_value_t **data = jl_svec_data(new_obj);
+                size_t dtsz = l * sizeof(void *) + sizeof(jl_svec_t);
+                if (update_meta)
+                    gc_setmark(ptls, o, bits, dtsz);
+                else if (foreign_alloc)
+                    objprofile_count(jl_simplevector_type, bits == GC_OLD_MARKED, dtsz);
+                jl_value_t *objary_parent = new_obj;
+                jl_value_t **objary_begin = data;
+                jl_value_t **objary_end = data + l;
+                uint32_t step = 1;
+                uintptr_t nptr = (l << 2) | (bits & GC_OLD);
+                gc_mark_objarray(ptls, objary_parent, objary_begin, objary_end, step, nptr);
+            }
+            else if (vtag == jl_module_tag << 4) {
+                if (update_meta)
+                    gc_setmark(ptls, o, bits, sizeof(jl_module_t));
+                else if (foreign_alloc)
+                    objprofile_count(jl_module_type, bits == GC_OLD_MARKED, sizeof(jl_module_t));
+                jl_module_t *mb_parent = (jl_module_t *)new_obj;
+                uintptr_t nptr = ((mb_parent->usings.len + 1) << 2) | (bits & GC_OLD);
+                gc_mark_module_binding(ptls, mb_parent, nptr, bits);
+            }
+            else if (vtag == jl_task_tag << 4) {
+                if (update_meta)
+                    gc_setmark(ptls, o, bits, sizeof(jl_task_t));
+                else if (foreign_alloc)
+                    objprofile_count(jl_task_type, bits == GC_OLD_MARKED, sizeof(jl_task_t));
+                jl_task_t *ta = (jl_task_t *)new_obj;
+                gc_scrub_record_task(ta);
+                if (gc_cblist_task_scanner) {
+                    int16_t tid = jl_atomic_load_relaxed(&ta->tid);
+                    gc_invoke_callbacks(jl_gc_cb_task_scanner_t, gc_cblist_task_scanner,
+                                        (ta, tid != -1 && ta == gc_all_tls_states[tid]->root_task));
+                }
+        #ifdef COPY_STACKS
+                void *stkbuf = ta->stkbuf;
+                if (stkbuf && ta->copy_stack) {
+                    gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
+                    // For gc_heap_snapshot_record:
+                    // TODO: attribute size of stack
+                    // TODO: edge to stack data
+                    // TODO: synthetic node for stack data (how big is it?)
+                }
+        #endif
+                jl_gcframe_t *s = ta->gcstack;
+                size_t nroots;
+                uintptr_t offset = 0;
+                uintptr_t lb = 0;
+                uintptr_t ub = (uintptr_t)-1;
+        #ifdef COPY_STACKS
+                if (stkbuf && ta->copy_stack && !ta->ptls) {
+                    int16_t tid = jl_atomic_load_relaxed(&ta->tid);
+                    assert(tid >= 0);
+                    jl_ptls_t ptls2 = gc_all_tls_states[tid];
+                    ub = (uintptr_t)ptls2->stackbase;
+                    lb = ub - ta->copy_stack;
+                    offset = (uintptr_t)stkbuf - lb;
+                }
+        #endif
+                if (s != NULL) {
+                    nroots = gc_read_stack(&s->nroots, offset, lb, ub);
+                    gc_heap_snapshot_record_task_to_frame_edge(ta, s);
+                    assert(nroots <= UINT32_MAX);
+                    gc_mark_stack(ptls, s, (uint32_t)nroots, offset, lb, ub);
+                }
+                if (ta->excstack) {
+                    jl_excstack_t *excstack = ta->excstack;
+                    gc_heap_snapshot_record_task_to_frame_edge(ta, excstack);
+                    size_t itr = ta->excstack->top;
+                    gc_setmark_buf_(ptls, excstack, bits,
+                                    sizeof(jl_excstack_t) +
+                                        sizeof(uintptr_t) * excstack->reserved_size);
+                    gc_mark_excstack(ptls, excstack, itr);
+                }
+                const jl_datatype_layout_t *layout = jl_task_type->layout;
+                assert(layout->flags.fielddesc_type == 0);
+                assert(layout->nfields > 0);
+                uint32_t npointers = layout->npointers;
+                char *obj8_parent = (char *)ta;
+                uint8_t *obj8_begin = (uint8_t *)jl_dt_layout_ptrs(layout);
+                uint8_t *obj8_end = obj8_begin + npointers;
+                // assume tasks always reference young objects: set lowest bit
+                uintptr_t nptr = (npointers << 2) | 1 | bits;
+                new_obj = gc_mark_obj8(ptls, obj8_parent, obj8_begin, obj8_end, nptr);
+                if (new_obj != NULL) {
+                    if (!meta_updated)
+                        goto mark_obj;
+                    else
+                        gc_ptr_queue_push(mq, new_obj);
+                }
+            }
+            else if (vtag == jl_string_tag << 4) {
+                size_t dtsz = jl_string_len(new_obj) + sizeof(size_t) + 1;
+                if (update_meta)
+                    gc_setmark(ptls, o, bits, dtsz);
+                else if (foreign_alloc)
+                    objprofile_count(jl_string_type, bits == GC_OLD_MARKED, dtsz);
+            }
+            else {
+                jl_datatype_t *vt = ijl_small_typeof[vtag / sizeof(*ijl_small_typeof)];
+                size_t dtsz = jl_datatype_size(vt);
+                if (update_meta)
+                    gc_setmark(ptls, o, bits, dtsz);
+                else if (foreign_alloc)
+                    objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
+            }
+            return;
+        }
+        else {
+            jl_datatype_t *vt = (jl_datatype_t *)vtag;
+            if (__unlikely(!jl_is_datatype(vt) || vt->smalltag))
+                gc_dump_queue_and_abort(ptls, vt);
+        }
+        jl_datatype_t *vt = (jl_datatype_t *)vtag;
+        if (vt->name == jl_genericmemory_typename) {
+            jl_genericmemory_t *m = (jl_genericmemory_t*)new_obj;
+            int pooled = 1; // The jl_genericmemory_t itself is always pooled-size, even with data attached to it
             if (update_meta) {
-                if (flags.pooled)
+                if (pooled)
                     gc_setmark_pool(ptls, o, bits);
                 else
                     gc_setmark_big(ptls, o, bits);
             }
-            else if (foreign_alloc)
-                objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_array_t));
-            if (flags.how == 1) {
-                void *val_buf = jl_astaggedvalue((char*)a->data - a->offset * a->elsize);
-                verify_parent1("array", new_obj, &val_buf, "buffer ('loc' addr is meaningless)");
-                (void)val_buf;
-                gc_setmark_buf_(ptls, (char*)a->data - a->offset * a->elsize,
-                                bits, jl_array_nbytes(a));
+            else if (foreign_alloc) {
+                objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_genericmemory_t));
+            }
+            int how = jl_genericmemory_how(m);
+            if (how == 0 || how == 2) {
+                gc_heap_snapshot_record_hidden_edge(new_obj, m->ptr, jl_genericmemory_nbytes(m), how == 0 ? 2 : 0);
             }
-            else if (flags.how == 2) {
+            else if (how == 1) {
                 if (update_meta || foreign_alloc) {
                     objprofile_count(jl_malloc_tag, bits == GC_OLD_MARKED,
-                                     jl_array_nbytes(a));
+                                     jl_genericmemory_nbytes(m));
+                    size_t nb = jl_genericmemory_nbytes(m);
+                    gc_heap_snapshot_record_hidden_edge(new_obj, m->ptr, nb, 0);
                     if (bits == GC_OLD_MARKED) {
-                        ptls->gc_cache.perm_scanned_bytes += jl_array_nbytes(a);
+                        ptls->gc_cache.perm_scanned_bytes += nb;
                     }
                     else {
-                        ptls->gc_cache.scanned_bytes += jl_array_nbytes(a);
+                        ptls->gc_cache.scanned_bytes += nb;
                     }
                 }
             }
-            else if (flags.how == 3) {
-                jl_value_t *owner = jl_array_data_owner(a);
+            else if (how == 3) {
+                jl_value_t *owner = jl_genericmemory_data_owner_field(m);
                 uintptr_t nptr = (1 << 2) | (bits & GC_OLD);
-                int markowner = gc_try_setmark(owner, &nptr, &tag, &bits);
+                gc_try_claim_and_push(mq, owner, &nptr);
+                gc_heap_snapshot_record_internal_array_edge(new_obj, owner);
                 gc_mark_push_remset(ptls, new_obj, nptr);
-                if (markowner) {
-                    new_obj = owner;
-                    goto mark;
-                }
-                goto pop;
+                return;
             }
-            if (a->data == NULL || jl_array_len(a) == 0)
-                goto pop;
-            if (flags.ptrarray) {
-                if ((jl_datatype_t*)jl_tparam0(vt) == jl_symbol_type)
-                    goto pop;
-                size_t l = jl_array_len(a);
-                uintptr_t nptr = (l << 2) | (bits & GC_OLD);
-                objary_begin = (jl_value_t**)a->data;
-                objary_end = objary_begin + l;
-                gc_mark_objarray_t markdata = {new_obj, objary_begin, objary_end, 1, nptr};
-                gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(objarray),
-                                   &markdata, sizeof(markdata), 0);
-                objary = (gc_mark_objarray_t*)sp.data;
-                goto objarray_loaded;
+            if (m->length == 0)
+                return;
+            const jl_datatype_layout_t *layout = vt->layout;
+            if (layout->flags.arrayelem_isboxed) {
+                if ((jl_datatype_t*)jl_tparam1(vt) == jl_symbol_type)
+                    return;
+                jl_value_t *objary_parent = new_obj;
+                jl_value_t **objary_begin = (jl_value_t **)m->ptr;
+                jl_value_t **objary_end = objary_begin + m->length;
+                uint32_t step = 1;
+                uintptr_t nptr = (m->length << 2) | (bits & GC_OLD);
+                gc_mark_objarray(ptls, objary_parent, objary_begin, objary_end, step, nptr);
             }
-            else if (flags.hasptr) {
-                jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(vt);
-                const jl_datatype_layout_t *layout = et->layout;
+            else if (layout->first_ptr >= 0) {
+                const jl_datatype_layout_t *layout = vt->layout;
                 unsigned npointers = layout->npointers;
-                unsigned elsize = a->elsize / sizeof(jl_value_t*);
-                size_t l = jl_array_len(a);
+                unsigned elsize = layout->size / sizeof(jl_value_t*);
+                size_t l = m->length;
+                jl_value_t *objary_parent = new_obj;
+                jl_value_t **objary_begin = (jl_value_t**)m->ptr;
+                jl_value_t **objary_end = objary_begin + l * elsize;
+                uint32_t step = elsize;
                 uintptr_t nptr = ((l * npointers) << 2) | (bits & GC_OLD);
-                objary_begin = (jl_value_t**)a->data;
-                objary_end = objary_begin + l * elsize;
                 if (npointers == 1) { // TODO: detect anytime time stride is uniform?
                     objary_begin += layout->first_ptr;
-                    gc_mark_objarray_t markdata = {new_obj, objary_begin, objary_end, elsize, nptr};
-                    gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(objarray),
-                                       &markdata, sizeof(markdata), 0);
-                    objary = (gc_mark_objarray_t*)sp.data;
-                    goto objarray_loaded;
+                    gc_mark_objarray(ptls, objary_parent, objary_begin, objary_end, step, nptr);
                 }
-                else if (layout->fielddesc_type == 0) {
-                    obj8_begin = (uint8_t*)jl_dt_layout_ptrs(layout);
-                    obj8_end = obj8_begin + npointers;
-                    gc_mark_array8_t markdata = {objary_begin, objary_end, obj8_begin, {new_obj, obj8_begin, obj8_end, nptr}};
-                    gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(array8),
-                                       &markdata, sizeof(markdata), 0);
-                    ary8 = (gc_mark_array8_t*)sp.data;
-                    goto array8_loaded;
+                else if (layout->flags.fielddesc_type == 0) {
+                    uint8_t *obj8_begin = (uint8_t*)jl_dt_layout_ptrs(layout);
+                    uint8_t *obj8_end = obj8_begin + npointers;
+                    gc_mark_memory8(ptls, objary_parent, objary_begin, objary_end, obj8_begin, obj8_end,
+                                   elsize, nptr);
                 }
-                else if (layout->fielddesc_type == 1) {
-                    obj16_begin = (uint16_t*)jl_dt_layout_ptrs(layout);
-                    obj16_end = obj16_begin + npointers;
-                    gc_mark_array16_t markdata = {objary_begin, objary_end, obj16_begin, {new_obj, obj16_begin, obj16_end, nptr}};
-                    gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(array16),
-                                       &markdata, sizeof(markdata), 0);
-                    ary16 = (gc_mark_array16_t*)sp.data;
-                    goto array16_loaded;
+                else if (layout->flags.fielddesc_type == 1) {
+                    uint16_t *obj16_begin = (uint16_t*)jl_dt_layout_ptrs(layout);
+                    uint16_t *obj16_end = obj16_begin + npointers;
+                    gc_mark_memory16(ptls, objary_parent, objary_begin, objary_end, obj16_begin, obj16_end,
+                                    elsize, nptr);
                 }
                 else {
                     assert(0 && "unimplemented");
                 }
             }
-            goto pop;
+            return;
         }
-        else if (vt == jl_module_type) {
-            if (update_meta)
-                gc_setmark(ptls, o, bits, sizeof(jl_module_t));
-            else if (foreign_alloc)
-                objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_module_t));
-            jl_module_t *m = (jl_module_t*)new_obj;
-            jl_binding_t **table = (jl_binding_t**)m->bindings.table;
-            size_t bsize = m->bindings.size;
-            uintptr_t nptr = ((bsize + m->usings.len + 1) << 2) | (bits & GC_OLD);
-            gc_mark_binding_t markdata = {m, table + 1, table + bsize, nptr, bits};
-            gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(module_binding),
-                               &markdata, sizeof(markdata), 0);
-            sp.data = (jl_gc_mark_data_t *)(((char*)sp.data) + sizeof(markdata));
-            goto module_binding;
-        }
-        else if (vt == jl_task_type) {
-            if (update_meta)
-                gc_setmark(ptls, o, bits, sizeof(jl_task_t));
-            else if (foreign_alloc)
-                objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_task_t));
-            jl_task_t *ta = (jl_task_t*)new_obj;
-            gc_scrub_record_task(ta);
-            if (gc_cblist_task_scanner) {
-                export_gc_state(ptls, &sp);
-                int16_t tid = jl_atomic_load_relaxed(&ta->tid);
-                gc_invoke_callbacks(jl_gc_cb_task_scanner_t,
-                    gc_cblist_task_scanner,
-                    (ta, tid != -1 && ta == jl_all_tls_states[tid]->root_task));
-                import_gc_state(ptls, &sp);
-            }
-#ifdef COPY_STACKS
-            void *stkbuf = ta->stkbuf;
-            if (stkbuf && ta->copy_stack)
-                gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
-#endif
-            jl_gcframe_t *s = ta->gcstack;
-            size_t nroots;
-            uintptr_t offset = 0;
-            uintptr_t lb = 0;
-            uintptr_t ub = (uintptr_t)-1;
-#ifdef COPY_STACKS
-            if (stkbuf && ta->copy_stack && ta->ptls == NULL) {
-                int16_t tid = jl_atomic_load_relaxed(&ta->tid);
-                assert(tid >= 0);
-                jl_ptls_t ptls2 = jl_all_tls_states[tid];
-                ub = (uintptr_t)ptls2->stackbase;
-                lb = ub - ta->copy_stack;
-                offset = (uintptr_t)stkbuf - lb;
+        size_t dtsz = jl_datatype_size(vt);
+        if (update_meta)
+            gc_setmark(ptls, o, bits, dtsz);
+        else if (foreign_alloc)
+            objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
+        if (vt == jl_weakref_type)
+            return;
+        const jl_datatype_layout_t *layout = vt->layout;
+        uint32_t npointers = layout->npointers;
+        if (npointers == 0)
+            return;
+        uintptr_t nptr = (npointers << 2 | (bits & GC_OLD));
+        assert((layout->nfields > 0 || layout->flags.fielddesc_type == 3) &&
+               "opaque types should have been handled specially");
+        if (layout->flags.fielddesc_type == 0) {
+            char *obj8_parent = (char *)new_obj;
+            uint8_t *obj8_begin = (uint8_t *)jl_dt_layout_ptrs(layout);
+            uint8_t *obj8_end = obj8_begin + npointers;
+            assert(obj8_begin < obj8_end);
+            new_obj = gc_mark_obj8(ptls, obj8_parent, obj8_begin, obj8_end, nptr);
+            if (new_obj != NULL) {
+                if (!meta_updated)
+                    goto mark_obj;
+                else
+                    gc_ptr_queue_push(mq, new_obj);
             }
-#endif
-            if (s) {
-                nroots = gc_read_stack(&s->nroots, offset, lb, ub);
-                assert(nroots <= UINT32_MAX);
-                gc_mark_stackframe_t stackdata = {s, 0, (uint32_t)nroots, offset, lb, ub};
-                gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(stack),
-                                   &stackdata, sizeof(stackdata), 1);
+        }
+        else if (layout->flags.fielddesc_type == 1) {
+            char *obj16_parent = (char *)new_obj;
+            uint16_t *obj16_begin = (uint16_t *)jl_dt_layout_ptrs(layout);
+            uint16_t *obj16_end = obj16_begin + npointers;
+            assert(obj16_begin < obj16_end);
+            new_obj = gc_mark_obj16(ptls, obj16_parent, obj16_begin, obj16_end, nptr);
+            if (new_obj != NULL) {
+                if (!meta_updated)
+                    goto mark_obj;
+                else
+                    gc_ptr_queue_push(mq, new_obj);
             }
-            if (ta->excstack) {
-                gc_setmark_buf_(ptls, ta->excstack, bits, sizeof(jl_excstack_t) +
-                                sizeof(uintptr_t)*ta->excstack->reserved_size);
-                gc_mark_excstack_t stackdata = {ta->excstack, ta->excstack->top, 0, 0};
-                gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(excstack),
-                                   &stackdata, sizeof(stackdata), 1);
+        }
+        else if (layout->flags.fielddesc_type == 2) {
+            // This is very uncommon
+            // Do not do store to load forwarding to save some code size
+            char *obj32_parent = (char *)new_obj;
+            uint32_t *obj32_begin = (uint32_t *)jl_dt_layout_ptrs(layout);
+            uint32_t *obj32_end = obj32_begin + npointers;
+            assert(obj32_begin < obj32_end);
+            new_obj = gc_mark_obj32(ptls, obj32_parent, obj32_begin, obj32_end, nptr);
+            if (new_obj != NULL) {
+                if (!meta_updated)
+                    goto mark_obj;
+                else
+                    gc_ptr_queue_push(mq, new_obj);
             }
-            const jl_datatype_layout_t *layout = jl_task_type->layout;
-            assert(layout->fielddesc_type == 0);
-            assert(layout->nfields > 0);
-            uint32_t npointers = layout->npointers;
-            obj8_begin = (uint8_t*)jl_dt_layout_ptrs(layout);
-            obj8_end = obj8_begin + npointers;
-            // assume tasks always reference young objects: set lowest bit
-            uintptr_t nptr = (npointers << 2) | 1 | bits;
-            gc_mark_obj8_t markdata = {new_obj, obj8_begin, obj8_end, nptr};
-            gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(obj8),
-                               &markdata, sizeof(markdata), 0);
-            obj8 = (gc_mark_obj8_t*)sp.data;
-            obj8_parent = (char*)ta;
-            goto obj8_loaded;
-        }
-        else if (vt == jl_string_type) {
-            size_t dtsz = jl_string_len(new_obj) + sizeof(size_t) + 1;
-            if (update_meta)
-                gc_setmark(ptls, o, bits, dtsz);
-            else if (foreign_alloc)
-                objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
-            goto pop;
         }
         else {
-            if (__unlikely(!jl_is_datatype(vt)))
-                gc_assert_datatype_fail(ptls, vt, sp);
-            size_t dtsz = jl_datatype_size(vt);
-            if (update_meta)
-                gc_setmark(ptls, o, bits, dtsz);
-            else if (foreign_alloc)
-                objprofile_count(vt, bits == GC_OLD_MARKED, dtsz);
-            if (vt == jl_weakref_type)
-                goto pop;
-            const jl_datatype_layout_t *layout = vt->layout;
-            uint32_t npointers = layout->npointers;
-            if (npointers == 0)
+            assert(layout->flags.fielddesc_type == 3);
+            jl_fielddescdyn_t *desc = (jl_fielddescdyn_t *)jl_dt_layout_fields(layout);
+            int old = jl_astaggedvalue(new_obj)->bits.gc & 2;
+            uintptr_t young = desc->markfunc(ptls, new_obj);
+            if (old && young)
+                gc_mark_push_remset(ptls, new_obj, young * 4 + 3);
+        }
+    }
+}
+
+// Used in gc-debug
+void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
+{
+    while (1) {
+        void *new_obj = (void *)gc_ptr_queue_pop(&ptls->mark_queue);
+        // No more objects to mark
+        if (__unlikely(new_obj == NULL)) {
+            return;
+        }
+        gc_mark_outrefs(ptls, mq, new_obj, 0);
+    }
+}
+
+// Drain items from worker's own chunkqueue
+void gc_drain_own_chunkqueue(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
+{
+    jl_gc_chunk_t c = {.cid = GC_empty_chunk};
+    do {
+        c = gc_chunkqueue_pop(mq);
+        if (c.cid != GC_empty_chunk) {
+            gc_mark_chunk(ptls, mq, &c);
+            gc_mark_loop_serial_(ptls, mq);
+        }
+    } while (c.cid != GC_empty_chunk);
+}
+
+// Main mark loop. Stack (allocated on the heap) of `jl_value_t *`
+// is used to keep track of processed items. Maintaining this stack (instead of
+// native one) avoids stack overflow when marking deep objects and
+// makes it easier to implement parallel marking via work-stealing
+JL_EXTENSION NOINLINE void gc_mark_loop_serial(jl_ptls_t ptls)
+{
+    gc_mark_loop_serial_(ptls, &ptls->mark_queue);
+    gc_drain_own_chunkqueue(ptls, &ptls->mark_queue);
+}
+
+void gc_mark_and_steal(jl_ptls_t ptls)
+{
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    jl_gc_markqueue_t *mq_master = NULL;
+    int master_tid = jl_atomic_load(&gc_master_tid);
+    if (master_tid == -1) {
+        return;
+    }
+    mq_master = &gc_all_tls_states[master_tid]->mark_queue;
+    void *new_obj;
+    jl_gc_chunk_t c;
+    pop : {
+        new_obj = gc_ptr_queue_pop(mq);
+        if (new_obj != NULL) {
+            goto mark;
+        }
+        c = gc_chunkqueue_pop(mq);
+        if (c.cid != GC_empty_chunk) {
+            gc_mark_chunk(ptls, mq, &c);
+            goto pop;
+        }
+        goto steal;
+    }
+    mark : {
+        gc_mark_outrefs(ptls, mq, new_obj, 0);
+        goto pop;
+    }
+    // Note that for the stealing heuristics, we try to
+    // steal chunks much more aggressively than pointers,
+    // since we know chunks will likely expand into a lot
+    // of work for the mark loop
+    steal : {
+        // Try to steal chunk from random GC thread
+        for (int i = 0; i < 4 * jl_n_markthreads; i++) {
+            uint32_t v = gc_first_tid + cong(jl_n_markthreads,  &ptls->rngseed);
+            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
+            c = gc_chunkqueue_steal_from(mq2);
+            if (c.cid != GC_empty_chunk) {
+                gc_mark_chunk(ptls, mq, &c);
                 goto pop;
-            uintptr_t nptr = npointers << 2 | (bits & GC_OLD);
-            assert((layout->nfields > 0 || layout->fielddesc_type == 3) && "opaque types should have been handled specially");
-            if (layout->fielddesc_type == 0) {
-                obj8_parent = (char*)new_obj;
-                obj8_begin = (uint8_t*)jl_dt_layout_ptrs(layout);
-                obj8_end = obj8_begin + npointers;
-                assert(obj8_begin < obj8_end);
-                gc_mark_obj8_t markdata = {new_obj, obj8_begin, obj8_end, nptr};
-                gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(obj8),
-                                   &markdata, sizeof(markdata), 0);
-                obj8 = (gc_mark_obj8_t*)sp.data;
-                goto obj8_loaded;
             }
-            else if (layout->fielddesc_type == 1) {
-                obj16_parent = (char*)new_obj;
-                obj16_begin = (uint16_t*)jl_dt_layout_ptrs(layout);
-                obj16_end = obj16_begin + npointers;
-                assert(obj16_begin < obj16_end);
-                gc_mark_obj16_t markdata = {new_obj, obj16_begin, obj16_end, nptr};
-                gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(obj16),
-                                   &markdata, sizeof(markdata), 0);
-                obj16 = (gc_mark_obj16_t*)sp.data;
-                goto obj16_loaded;
-            }
-            else if (layout->fielddesc_type == 2) {
-                // This is very uncommon
-                // Do not do store to load forwarding to save some code size
-                uint32_t *obj32_begin = (uint32_t*)jl_dt_layout_ptrs(layout);
-                uint32_t *obj32_end = obj32_begin + npointers;
-                gc_mark_obj32_t markdata = {new_obj, obj32_begin, obj32_end, nptr};
-                gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(obj32),
-                                   &markdata, sizeof(markdata), 0);
-                sp.data = (jl_gc_mark_data_t *)(((char*)sp.data) + sizeof(markdata));
-                goto obj32;
+        }
+        // Sequentially walk GC threads to try to steal chunk
+        for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
+            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue;
+            c = gc_chunkqueue_steal_from(mq2);
+            if (c.cid != GC_empty_chunk) {
+                gc_mark_chunk(ptls, mq, &c);
+                goto pop;
             }
-            else {
-                assert(layout->fielddesc_type == 3);
-                jl_fielddescdyn_t *desc = (jl_fielddescdyn_t*)jl_dt_layout_fields(layout);
-                int old = jl_astaggedvalue(new_obj)->bits.gc & 2;
-                export_gc_state(ptls, &sp);
-                uintptr_t young = desc->markfunc(ptls, new_obj);
-                import_gc_state(ptls, &sp);
-                if (old && young)
-                    gc_mark_push_remset(ptls, new_obj, young * 4 + 3);
+        }
+        // Try to steal chunk from master thread
+        if (mq_master != NULL) {
+            c = gc_chunkqueue_steal_from(mq_master);
+            if (c.cid != GC_empty_chunk) {
+                gc_mark_chunk(ptls, mq, &c);
                 goto pop;
             }
         }
+        // Try to steal pointer from random GC thread
+        for (int i = 0; i < 4 * jl_n_markthreads; i++) {
+            uint32_t v = gc_first_tid + cong(jl_n_markthreads, &ptls->rngseed);
+            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue;
+            new_obj = gc_ptr_queue_steal_from(mq2);
+            if (new_obj != NULL)
+                goto mark;
+        }
+        // Sequentially walk GC threads to try to steal pointer
+        for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
+            jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue;
+            new_obj = gc_ptr_queue_steal_from(mq2);
+            if (new_obj != NULL)
+                goto mark;
+        }
+        // Try to steal pointer from master thread
+        if (mq_master != NULL) {
+            new_obj = gc_ptr_queue_steal_from(mq_master);
+            if (new_obj != NULL)
+                goto mark;
+        }
     }
 }
 
-static void jl_gc_queue_thread_local(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp,
-                                     jl_ptls_t ptls2)
+size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
-    gc_mark_queue_obj(gc_cache, sp, jl_atomic_load_relaxed(&ptls2->current_task));
-    gc_mark_queue_obj(gc_cache, sp, ptls2->root_task);
-    if (ptls2->next_task)
-        gc_mark_queue_obj(gc_cache, sp, ptls2->next_task);
-    if (ptls2->previous_task) // shouldn't be necessary, but no reason not to
-        gc_mark_queue_obj(gc_cache, sp, ptls2->previous_task);
-    if (ptls2->previous_exception)
-        gc_mark_queue_obj(gc_cache, sp, ptls2->previous_exception);
+    // assume each chunk is worth 256 units of work and each pointer
+    // is worth 1 unit of work
+    size_t work = 256 * (jl_atomic_load_relaxed(&ptls->mark_queue.chunk_queue.bottom) -
+        jl_atomic_load_relaxed(&ptls->mark_queue.chunk_queue.top));
+    work += (jl_atomic_load_relaxed(&ptls->mark_queue.ptr_queue.bottom) -
+        jl_atomic_load_relaxed(&ptls->mark_queue.ptr_queue.top));
+    return work;
+}
+
+/**
+ * Correctness argument for the mark-loop termination protocol.
+ *
+ * Safety properties:
+ * - No work items shall be in any thread's queues when `gc_mark_loop_barrier` observes
+ * that `gc_n_threads_marking` is zero.
+ *
+ * - No work item shall be stolen from the master thread (i.e. mutator thread which started
+ * GC and which helped the `jl_n_markthreads` - 1 threads to mark) after
+ * `gc_mark_loop_barrier` observes that `gc_n_threads_marking` is zero. This property is
+ * necessary because we call `gc_mark_loop_serial` after marking the finalizer list in
+ * `_jl_gc_collect`, and want to ensure that we have the serial mark-loop semantics there,
+ * and that no work is stolen from us at that point.
+ *
+ * Proof:
+ * - Suppose the master thread observes that `gc_n_threads_marking` is zero in
+ * `gc_mark_loop_barrier` and there is a work item left in one thread's queue at that point.
+ * Since threads try to steal from all threads' queues, this implies that all threads must
+ * have tried to steal from the queue which still has a work item left, but failed to do so,
+ * which violates the semantics of Chase-Lev's work-stealing queue.
+ *
+ * - Let E1 be the event "master thread writes -1 to gc_master_tid" and E2 be the event
+ * "master thread observes that `gc_n_threads_marking` is zero". Since we're using
+ * sequentially consistent atomics, E1 => E2. Now suppose one thread which is spinning in
+ * `gc_should_mark` tries to enter the mark-loop after E2. In order to do so, it must
+ * increment `gc_n_threads_marking` to 1 in an event E3, and then read `gc_master_tid` in an
+ * event E4. Since we're using sequentially consistent atomics, E3 => E4. Since we observed
+ * `gc_n_threads_marking` as zero in E2, then E2 => E3, and we conclude E1 => E4, so that
+ * the thread which is spinning in `gc_should_mark` must observe that `gc_master_tid` is -1
+ * and therefore won't enter the mark-loop.
+ */
+
+int gc_should_mark(jl_ptls_t ptls)
+{
+    int should_mark = 0;
+    int n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
+    // fast path
+    if (n_threads_marking == 0) {
+        return 0;
+    }
+    uv_mutex_lock(&gc_queue_observer_lock);
+    while (1) {
+        int tid = jl_atomic_load(&gc_master_tid);
+        // fast path
+        if (tid == -1) {
+            break;
+        }
+        n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
+        // fast path
+        if (n_threads_marking == 0) {
+            break;
+        }
+        size_t work = gc_count_work_in_queue(gc_all_tls_states[tid]);
+        for (tid = gc_first_tid; tid < gc_first_tid + jl_n_markthreads; tid++) {
+            work += gc_count_work_in_queue(gc_all_tls_states[tid]);
+        }
+        // if there is a lot of work left, enter the mark loop
+        if (work >= 16 * n_threads_marking) {
+            jl_atomic_fetch_add(&gc_n_threads_marking, 1);
+            should_mark = 1;
+            break;
+        }
+        jl_cpu_pause();
+    }
+    uv_mutex_unlock(&gc_queue_observer_lock);
+    return should_mark;
+}
+
+void gc_wake_all_for_marking(jl_ptls_t ptls)
+{
+    jl_atomic_store(&gc_master_tid, ptls->tid);
+    uv_mutex_lock(&gc_threads_lock);
+    jl_atomic_fetch_add(&gc_n_threads_marking, 1);
+    uv_cond_broadcast(&gc_threads_cond);
+    uv_mutex_unlock(&gc_threads_lock);
+}
+
+void gc_mark_loop_parallel(jl_ptls_t ptls, int master)
+{
+    if (master) {
+        gc_wake_all_for_marking(ptls);
+        gc_mark_and_steal(ptls);
+        jl_atomic_fetch_add(&gc_n_threads_marking, -1);
+    }
+    while (1) {
+        int should_mark = gc_should_mark(ptls);
+        if (!should_mark) {
+            break;
+        }
+        gc_mark_and_steal(ptls);
+        jl_atomic_fetch_add(&gc_n_threads_marking, -1);
+    }
+}
+
+void gc_mark_loop(jl_ptls_t ptls)
+{
+    if (jl_n_markthreads == 0 || gc_heap_snapshot_enabled) {
+        gc_mark_loop_serial(ptls);
+    }
+    else {
+        gc_mark_loop_parallel(ptls, 1);
+    }
+}
+
+void gc_mark_loop_barrier(void)
+{
+    jl_atomic_store(&gc_master_tid, -1);
+    while (jl_atomic_load(&gc_n_threads_marking) != 0) {
+        jl_cpu_pause();
+    }
+}
+
+void gc_mark_clean_reclaim_sets(void)
+{
+    // Clean up `reclaim-sets`
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        arraylist_t *reclaim_set2 = &ptls2->mark_queue.reclaim_set;
+        ws_array_t *a = NULL;
+        while ((a = (ws_array_t *)arraylist_pop(reclaim_set2)) != NULL) {
+            free(a->buffer);
+            free(a);
+        }
+    }
+}
+
+static void gc_premark(jl_ptls_t ptls2)
+{
+    arraylist_t *remset = ptls2->heap.remset;
+    ptls2->heap.remset = ptls2->heap.last_remset;
+    ptls2->heap.last_remset = remset;
+    ptls2->heap.remset->len = 0;
+    ptls2->heap.remset_nptr = 0;
+    // avoid counting remembered objects
+    // in `perm_scanned_bytes`
+    size_t len = remset->len;
+    void **items = remset->items;
+    for (size_t i = 0; i < len; i++) {
+        jl_value_t *item = (jl_value_t *)items[i];
+        objprofile_count(jl_typeof(item), 2, 0);
+        jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED;
+    }
+}
+
+static void gc_queue_thread_local(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
+{
+    jl_task_t *task;
+    task = ptls2->root_task;
+    if (task != NULL) {
+        gc_try_claim_and_push(mq, task, NULL);
+        gc_heap_snapshot_record_root((jl_value_t*)task, "root task");
+    }
+    task = jl_atomic_load_relaxed(&ptls2->current_task);
+    if (task != NULL) {
+        gc_try_claim_and_push(mq, task, NULL);
+        gc_heap_snapshot_record_root((jl_value_t*)task, "current task");
+    }
+    task = ptls2->next_task;
+    if (task != NULL) {
+        gc_try_claim_and_push(mq, task, NULL);
+        gc_heap_snapshot_record_root((jl_value_t*)task, "next task");
+    }
+    task = ptls2->previous_task;
+    if (task != NULL) {
+        gc_try_claim_and_push(mq, task, NULL);
+        gc_heap_snapshot_record_root((jl_value_t*)task, "previous task");
+    }
+    if (ptls2->previous_exception) {
+        gc_try_claim_and_push(mq, ptls2->previous_exception, NULL);
+        gc_heap_snapshot_record_root((jl_value_t*)ptls2->previous_exception, "previous exception");
+    }
+}
+
+static void gc_queue_bt_buf(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
+{
+    jl_bt_element_t *bt_data = ptls2->bt_data;
+    size_t bt_size = ptls2->bt_size;
+    for (size_t i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
+        jl_bt_element_t *bt_entry = bt_data + i;
+        if (jl_bt_is_native(bt_entry))
+            continue;
+        size_t njlvals = jl_bt_num_jlvals(bt_entry);
+        for (size_t j = 0; j < njlvals; j++)
+            gc_try_claim_and_push(mq, jl_bt_entry_jlvalue(bt_entry, j), NULL);
+    }
+}
+
+static void gc_queue_remset(jl_ptls_t ptls, jl_ptls_t ptls2)
+{
+    size_t len = ptls2->heap.last_remset->len;
+    void **items = ptls2->heap.last_remset->items;
+    for (size_t i = 0; i < len; i++) {
+        // Objects in the `remset` are already marked,
+        // so a `gc_try_claim_and_push` wouldn't work here
+        gc_mark_outrefs(ptls, &ptls->mark_queue, (jl_value_t *)items[i], 1);
+    }
 }
 
 extern jl_value_t *cmpswap_names JL_GLOBALLY_ROOTED;
+extern jl_task_t *wait_empty JL_GLOBALLY_ROOTED;
 
 // mark the initial root set
-static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
+static void gc_mark_roots(jl_gc_markqueue_t *mq)
 {
     // modules
-    gc_mark_queue_obj(gc_cache, sp, jl_main_module);
-
+    gc_try_claim_and_push(mq, jl_main_module, NULL);
+    gc_heap_snapshot_record_root((jl_value_t*)jl_main_module, "main_module");
     // invisible builtin values
-    if (jl_an_empty_vec_any != NULL)
-        gc_mark_queue_obj(gc_cache, sp, jl_an_empty_vec_any);
-    if (jl_module_init_order != NULL)
-        gc_mark_queue_obj(gc_cache, sp, jl_module_init_order);
+    gc_try_claim_and_push(mq, jl_an_empty_vec_any, NULL);
+    gc_try_claim_and_push(mq, jl_module_init_order, NULL);
     for (size_t i = 0; i < jl_current_modules.size; i += 2) {
         if (jl_current_modules.table[i + 1] != HT_NOTFOUND) {
-            gc_mark_queue_obj(gc_cache, sp, jl_current_modules.table[i]);
+            gc_try_claim_and_push(mq, jl_current_modules.table[i], NULL);
+            gc_heap_snapshot_record_root((jl_value_t*)jl_current_modules.table[i], "top level module");
         }
     }
-    gc_mark_queue_obj(gc_cache, sp, jl_anytuple_type_type);
+    gc_try_claim_and_push(mq, jl_anytuple_type_type, NULL);
     for (size_t i = 0; i < N_CALL_CACHE; i++) {
         jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]);
-        if (v != NULL)
-            gc_mark_queue_obj(gc_cache, sp, v);
+        gc_try_claim_and_push(mq, v, NULL);
     }
-    if (jl_all_methods != NULL)
-        gc_mark_queue_obj(gc_cache, sp, jl_all_methods);
-    if (_jl_debug_method_invalidation != NULL)
-        gc_mark_queue_obj(gc_cache, sp, _jl_debug_method_invalidation);
-
+    gc_try_claim_and_push(mq, jl_all_methods, NULL);
+    gc_try_claim_and_push(mq, _jl_debug_method_invalidation, NULL);
     // constants
-    gc_mark_queue_obj(gc_cache, sp, jl_emptytuple_type);
-    if (cmpswap_names != NULL)
-        gc_mark_queue_obj(gc_cache, sp, cmpswap_names);
+    gc_try_claim_and_push(mq, jl_emptytuple_type, NULL);
+    gc_try_claim_and_push(mq, cmpswap_names, NULL);
+    gc_try_claim_and_push(mq, jl_global_roots_list, NULL);
+    gc_try_claim_and_push(mq, jl_global_roots_keyset, NULL);
 }
 
 // find unmarked objects that need to be finalized from the finalizer list "list".
@@ -2879,17 +3167,25 @@ static void sweep_finalizer_list(arraylist_t *list)
     size_t j = 0;
     for (size_t i=0; i < len; i+=2) {
         void *v0 = items[i];
-        void *v = gc_ptr_clear_tag(v0, 1);
+        void *v = gc_ptr_clear_tag(v0, 3);
         if (__unlikely(!v0)) {
             // remove from this list
             continue;
         }
 
         void *fin = items[i+1];
-        int isfreed = !gc_marked(jl_astaggedvalue(v)->bits.gc);
-        int isold = (list != &finalizer_list_marked &&
+        int isfreed;
+        int isold;
+        if (gc_ptr_tag(v0, 2)) {
+            isfreed = 1;
+            isold = 0;
+        }
+        else {
+            isfreed = !gc_marked(jl_astaggedvalue(v)->bits.gc);
+            isold = (list != &finalizer_list_marked &&
                      jl_astaggedvalue(v)->bits.gc == GC_OLD_MARKED &&
                      jl_astaggedvalue(fin)->bits.gc == GC_OLD_MARKED);
+        }
         if (isfreed || isold) {
             // remove from this list
         }
@@ -2914,7 +3210,7 @@ static void sweep_finalizer_list(arraylist_t *list)
 }
 
 // collector entry point and control
-static _Atomic(uint32_t) jl_gc_disable_counter = 1;
+_Atomic(uint32_t) jl_gc_disable_counter = 1;
 
 JL_DLLEXPORT int jl_gc_enable(int on)
 {
@@ -2946,7 +3242,7 @@ JL_DLLEXPORT int jl_gc_is_enabled(void)
 JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT
 {
     jl_gc_num_t num = gc_num;
-    combine_thread_gc_counts(&num);
+    combine_thread_gc_counts(&num, 0);
     // Sync this logic with `base/util.jl:GC_Diff`
     *bytes = (num.total_allocd + num.deferred_alloc + num.allocd);
 }
@@ -2959,7 +3255,7 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void)
 JL_DLLEXPORT jl_gc_num_t jl_gc_num(void)
 {
     jl_gc_num_t num = gc_num;
-    combine_thread_gc_counts(&num);
+    combine_thread_gc_counts(&num, 0);
     return num;
 }
 
@@ -2989,70 +3285,56 @@ JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT
     return newtb - oldtb;
 }
 
-JL_DLLEXPORT int64_t jl_gc_live_bytes(void)
+JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void)
 {
-    return live_bytes;
+    int n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    jl_ptls_t *all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    int64_t pool_live_bytes = 0;
+    for (int i = 0; i < n_threads; i++) {
+        jl_ptls_t ptls2 = all_tls_states[i];
+        if (ptls2 != NULL) {
+            pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_num.pool_live_bytes);
+        }
+    }
+    return pool_live_bytes;
 }
 
-static void jl_gc_premark(jl_ptls_t ptls2)
+JL_DLLEXPORT int64_t jl_gc_live_bytes(void)
 {
-    arraylist_t *remset = ptls2->heap.remset;
-    ptls2->heap.remset = ptls2->heap.last_remset;
-    ptls2->heap.last_remset = remset;
-    ptls2->heap.remset->len = 0;
-    ptls2->heap.remset_nptr = 0;
-
-    // avoid counting remembered objects & bindings twice
-    // in `perm_scanned_bytes`
-    size_t len = remset->len;
-    void **items = remset->items;
-    for (size_t i = 0; i < len; i++) {
-        jl_value_t *item = (jl_value_t*)items[i];
-        objprofile_count(jl_typeof(item), 2, 0);
-        jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED;
-    }
-    len = ptls2->heap.rem_bindings.len;
-    items = ptls2->heap.rem_bindings.items;
-    for (size_t i = 0; i < len; i++) {
-        void *ptr = items[i];
-        jl_astaggedvalue(ptr)->bits.gc = GC_OLD_MARKED;
-    }
+    return live_bytes;
 }
 
-static void jl_gc_queue_remset(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp, jl_ptls_t ptls2)
+uint64_t jl_gc_smooth(uint64_t old_val, uint64_t new_val, double factor)
 {
-    size_t len = ptls2->heap.last_remset->len;
-    void **items = ptls2->heap.last_remset->items;
-    for (size_t i = 0; i < len; i++)
-        gc_mark_queue_scan_obj(gc_cache, sp, (jl_value_t*)items[i]);
-    int n_bnd_refyoung = 0;
-    len = ptls2->heap.rem_bindings.len;
-    items = ptls2->heap.rem_bindings.items;
-    for (size_t i = 0; i < len; i++) {
-        jl_binding_t *ptr = (jl_binding_t*)items[i];
-        // A null pointer can happen here when the binding is cleaned up
-        // as an exception is thrown after it was already queued (#10221)
-        jl_value_t *v = jl_atomic_load_relaxed(&ptr->value);
-        if (v != NULL && gc_mark_queue_obj(gc_cache, sp, v)) {
-            items[n_bnd_refyoung] = ptr;
-            n_bnd_refyoung++;
-        }
-    }
-    ptls2->heap.rem_bindings.len = n_bnd_refyoung;
+    double est = factor * old_val + (1 - factor) * new_val;
+    if (est <= 1)
+        return 1; // avoid issues with <= 0
+    if (est > (uint64_t)2<<36)
+        return (uint64_t)2<<36; // avoid overflow
+    return est;
 }
 
-static void jl_gc_queue_bt_buf(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp, jl_ptls_t ptls2)
+// an overallocation curve inspired by array allocations
+// grows very fast initially, then much slower at large heaps
+static uint64_t overallocation(uint64_t old_val, uint64_t val, uint64_t max_val)
 {
-    jl_bt_element_t *bt_data = ptls2->bt_data;
-    size_t bt_size = ptls2->bt_size;
-    for (size_t i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
-        jl_bt_element_t *bt_entry = bt_data + i;
-        if (jl_bt_is_native(bt_entry))
-            continue;
-        size_t njlvals = jl_bt_num_jlvals(bt_entry);
-        for (size_t j = 0; j < njlvals; j++)
-            gc_mark_queue_obj(gc_cache, sp, jl_bt_entry_jlvalue(bt_entry, j));
-    }
+    // compute maxsize = maxsize + 4*maxsize^(7/8) + maxsize/8
+    // for small n, we grow much faster than O(n)
+    // for large n, we grow at O(n/8)
+    // and as we reach O(memory) for memory>>1MB,
+    // this means we end by adding about 10% of memory each time at most
+    int exp2 = sizeof(old_val) * 8 -
+#ifdef _P64
+        __builtin_clzll(old_val);
+#else
+        __builtin_clz(old_val);
+#endif
+    uint64_t inc = (uint64_t)((size_t)1 << (exp2 * 7 / 8)) * 4 + old_val / 8;
+    // once overallocation would exceed max_val, grow by no more than 5% of max_val
+    if (inc + val > max_val)
+        if (inc > max_val / 20)
+            return max_val / 20;
+    return inc;
 }
 
 size_t jl_maxrss(void);
@@ -3060,42 +3342,100 @@ size_t jl_maxrss(void);
 // Only one thread should be running in this function
 static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 {
-    combine_thread_gc_counts(&gc_num);
+    combine_thread_gc_counts(&gc_num, 1);
 
-    jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
-    jl_gc_mark_sp_t sp;
-    gc_mark_sp_init(gc_cache, &sp);
+    // We separate the update of the graph from the update of live_bytes here
+    // so that the sweep shows a downward trend in memory usage.
+    jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, gc_num.allocd);
+
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
 
     uint64_t gc_start_time = jl_hrtime();
+    uint64_t mutator_time = gc_end_time == 0 ? old_mut_time : gc_start_time - gc_end_time;
+    uint64_t before_free_heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size);
     int64_t last_perm_scanned_bytes = perm_scanned_bytes;
-    JL_PROBE_GC_MARK_BEGIN();
     uint64_t start_mark_time = jl_hrtime();
+    JL_PROBE_GC_MARK_BEGIN();
+    {
+        JL_TIMING(GC, GC_Mark);
+
+        // 1. fix GC bits of objects in the remset.
+        assert(gc_n_threads);
+        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+            if (ptls2 != NULL)
+                gc_premark(ptls2);
+        }
+
+        assert(gc_n_threads);
+        int single_threaded_mark = (jl_n_markthreads == 0 || gc_heap_snapshot_enabled);
+        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+            jl_ptls_t ptls_dest = ptls;
+            jl_gc_markqueue_t *mq_dest = mq;
+            if (!single_threaded_mark) {
+                ptls_dest = gc_all_tls_states[gc_first_tid + t_i % jl_n_markthreads];
+                mq_dest = &ptls_dest->mark_queue;
+            }
+            if (ptls2 != NULL) {
+                // 2.1. mark every thread local root
+                gc_queue_thread_local(mq_dest, ptls2);
+                // 2.2. mark any managed objects in the backtrace buffer
+                // TODO: treat these as roots for gc_heap_snapshot_record
+                gc_queue_bt_buf(mq_dest, ptls2);
+                // 2.3. mark every object in the `last_remsets` and `rem_binding`
+                gc_queue_remset(ptls_dest, ptls2);
+            }
+        }
+
+        // 3. walk roots
+        gc_mark_roots(mq);
+        if (gc_cblist_root_scanner) {
+            gc_invoke_callbacks(jl_gc_cb_root_scanner_t,
+                gc_cblist_root_scanner, (collection));
+        }
+        gc_mark_loop(ptls);
+        gc_mark_loop_barrier();
+        gc_mark_clean_reclaim_sets();
+
+        // 4. check for objects to finalize
+        clear_weak_refs();
+        // Record the length of the marked list since we need to
+        // mark the object moved to the marked list from the
+        // `finalizer_list` by `sweep_finalizer_list`
+        size_t orig_marked_len = finalizer_list_marked.len;
+        assert(gc_n_threads);
+        for (int i = 0; i < gc_n_threads; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            if (ptls2 != NULL)
+                sweep_finalizer_list(&ptls2->finalizers);
+        }
+        if (prev_sweep_full) {
+            sweep_finalizer_list(&finalizer_list_marked);
+            orig_marked_len = 0;
+        }
+        assert(gc_n_threads);
+        for (int i = 0; i < gc_n_threads; i++) {
+            jl_ptls_t ptls2 = gc_all_tls_states[i];
+            if (ptls2 != NULL)
+                gc_mark_finlist(mq, &ptls2->finalizers, 0);
+        }
+        gc_mark_finlist(mq, &finalizer_list_marked, orig_marked_len);
+        // "Flush" the mark stack before flipping the reset_age bit
+        // so that the objects are not incorrectly reset.
+        gc_mark_loop_serial(ptls);
+        // Conservative marking relies on age to tell allocated objects
+        // and freelist entries apart.
+        mark_reset_age = !jl_gc_conservative_gc_support_enabled();
+        // Reset the age and old bit for any unmarked objects referenced by the
+        // `to_finalize` list. These objects are only reachable from this list
+        // and should not be referenced by any old objects so this won't break
+        // the GC invariant.
+        gc_mark_finlist(mq, &to_finalize, 0);
+        gc_mark_loop_serial(ptls);
+        mark_reset_age = 0;
+    }
 
-    // 1. fix GC bits of objects in the remset.
-    for (int t_i = 0; t_i < jl_n_threads; t_i++)
-        jl_gc_premark(jl_all_tls_states[t_i]);
-
-    for (int t_i = 0; t_i < jl_n_threads; t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
-        // 2.1. mark every object in the `last_remsets` and `rem_binding`
-        jl_gc_queue_remset(gc_cache, &sp, ptls2);
-        // 2.2. mark every thread local root
-        jl_gc_queue_thread_local(gc_cache, &sp, ptls2);
-        // 2.3. mark any managed objects in the backtrace buffer
-        jl_gc_queue_bt_buf(gc_cache, &sp, ptls2);
-    }
-
-    // 3. walk roots
-    mark_roots(gc_cache, &sp);
-    if (gc_cblist_root_scanner) {
-        export_gc_state(ptls, &sp);
-        gc_invoke_callbacks(jl_gc_cb_root_scanner_t,
-            gc_cblist_root_scanner, (collection));
-        import_gc_state(ptls, &sp);
-    }
-    gc_mark_loop(ptls, sp);
-    gc_mark_sp_init(gc_cache, &sp);
-    gc_num.since_sweep += gc_num.allocd;
     JL_PROBE_GC_MARK_END(scanned_bytes, perm_scanned_bytes);
     gc_settime_premark_end();
     gc_time_mark_pause(gc_start_time, scanned_bytes, perm_scanned_bytes);
@@ -3103,96 +3443,41 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     uint64_t mark_time = end_mark_time - start_mark_time;
     gc_num.mark_time = mark_time;
     gc_num.total_mark_time += mark_time;
-    int64_t actual_allocd = gc_num.since_sweep;
-    // marking is over
-
-    // 4. check for objects to finalize
-    clear_weak_refs();
-    // Record the length of the marked list since we need to
-    // mark the object moved to the marked list from the
-    // `finalizer_list` by `sweep_finalizer_list`
-    size_t orig_marked_len = finalizer_list_marked.len;
-    for (int i = 0;i < jl_n_threads;i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
-        sweep_finalizer_list(&ptls2->finalizers);
-    }
-    if (prev_sweep_full) {
-        sweep_finalizer_list(&finalizer_list_marked);
-        orig_marked_len = 0;
-    }
-    for (int i = 0;i < jl_n_threads;i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[i];
-        gc_mark_queue_finlist(gc_cache, &sp, &ptls2->finalizers, 0);
-    }
-    gc_mark_queue_finlist(gc_cache, &sp, &finalizer_list_marked, orig_marked_len);
-    // "Flush" the mark stack before flipping the reset_age bit
-    // so that the objects are not incorrectly reset.
-    gc_mark_loop(ptls, sp);
-    gc_mark_sp_init(gc_cache, &sp);
-    // Conservative marking relies on age to tell allocated objects
-    // and freelist entries apart.
-    mark_reset_age = !jl_gc_conservative_gc_support_enabled();
-    // Reset the age and old bit for any unmarked objects referenced by the
-    // `to_finalize` list. These objects are only reachable from this list
-    // and should not be referenced by any old objects so this won't break
-    // the GC invariant.
-    gc_mark_queue_finlist(gc_cache, &sp, &to_finalize, 0);
-    gc_mark_loop(ptls, sp);
-    mark_reset_age = 0;
     gc_settime_postmark_end();
+    // marking is over
 
     // Flush everything in mark cache
     gc_sync_all_caches_nolock(ptls);
 
-    int64_t live_sz_ub = live_bytes + actual_allocd;
-    int64_t live_sz_est = scanned_bytes + perm_scanned_bytes;
-    int64_t estimate_freed = live_sz_ub - live_sz_est;
 
     gc_verify(ptls);
-
     gc_stats_all_pool();
     gc_stats_big_obj();
     objprofile_printall();
     objprofile_reset();
-    gc_num.total_allocd += gc_num.since_sweep;
+    gc_num.total_allocd += gc_num.allocd;
     if (!prev_sweep_full)
         promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes;
     // 5. next collection decision
-    int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(actual_allocd/10));
-    int nptr = 0;
-    for (int i = 0;i < jl_n_threads;i++)
-        nptr += jl_all_tls_states[i]->heap.remset_nptr;
-
-    // many pointers in the intergen frontier => "quick" mark is not quick
-    int large_frontier = nptr*sizeof(void*) >= default_collect_interval;
-    int sweep_full = 0;
+    int remset_nptr = 0;
+    int sweep_full = next_sweep_full;
     int recollect = 0;
-
-    // update heuristics only if this GC was automatically triggered
-    if (collection == JL_GC_AUTO) {
-        if (not_freed_enough) {
-            gc_num.interval = gc_num.interval * 2;
-        }
-        if (large_frontier) {
-            sweep_full = 1;
-        }
-        if (gc_num.interval > max_collect_interval) {
-            sweep_full = 1;
-            gc_num.interval = max_collect_interval;
-        }
+    assert(gc_n_threads);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            remset_nptr += ptls2->heap.remset_nptr;
     }
+    (void)remset_nptr; //Use this information for something?
 
 
     // If the live data outgrows the suggested max_total_memory
     // we keep going with minimum intervals and full gcs until
     // we either free some space or get an OOM error.
-    if (live_bytes > max_total_memory) {
-        sweep_full = 1;
-    }
     if (gc_sweep_always_full) {
         sweep_full = 1;
     }
-    if (collection == JL_GC_FULL) {
+    if (collection == JL_GC_FULL && !prev_sweep_full) {
         sweep_full = 1;
         recollect = 1;
     }
@@ -3203,43 +3488,157 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         promoted_bytes = 0;
     }
     scanned_bytes = 0;
-    // 5. start sweeping
+    // 6. start sweeping
     uint64_t start_sweep_time = jl_hrtime();
     JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
-    sweep_weak_refs();
-    sweep_stack_pools();
-    gc_sweep_foreign_objs();
-    gc_sweep_other(ptls, sweep_full);
-    gc_scrub();
-    gc_verify_tags();
-    gc_sweep_pool(sweep_full);
-    if (sweep_full)
-        gc_sweep_perm_alloc();
+    {
+        JL_TIMING_CREATE_BLOCK(incremental_timing_block,
+                               GC, GC_IncrementalSweep);
+        JL_TIMING_CREATE_BLOCK(full_timing_block,
+                               GC, GC_FullSweep);
+        jl_timing_block_start(sweep_full ? &full_timing_block : &incremental_timing_block);
+#ifdef USE_TRACY
+        TracyCZoneColor(full_timing_block.tracy_ctx, 0xFFA500);
+#endif
+        current_sweep_full = sweep_full;
+        sweep_weak_refs();
+        sweep_stack_pools();
+        gc_sweep_foreign_objs();
+        gc_sweep_other(ptls, sweep_full);
+        gc_scrub();
+        gc_verify_tags();
+        gc_sweep_pool();
+        if (sweep_full)
+            gc_sweep_perm_alloc();
+    }
+
     JL_PROBE_GC_SWEEP_END();
 
-    uint64_t gc_end_time = jl_hrtime();
+    gc_end_time = jl_hrtime();
     uint64_t pause = gc_end_time - gc_start_time;
     uint64_t sweep_time = gc_end_time - start_sweep_time;
     gc_num.total_sweep_time += sweep_time;
     gc_num.sweep_time = sweep_time;
+    if (sweep_full) {
+        gc_num.last_full_sweep = gc_end_time;
+    }
+    else {
+        gc_num.last_incremental_sweep = gc_end_time;
+    }
+
+    size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - freed_in_runtime;
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_size, heap_size);
+    freed_in_runtime = 0;
+    uint64_t user_max = max_total_memory * 0.8;
+    uint64_t alloc_diff = before_free_heap_size - old_heap_size;
+    uint64_t freed_diff = before_free_heap_size - heap_size;
+    uint64_t target_heap;
+    const char *reason = ""; (void)reason; // for GC_TIME output stats
+    old_heap_size = heap_size; // TODO: Update these values dynamically instead of just during the GC
+    if (collection == JL_GC_AUTO) {
+        // update any heuristics only when the user does not force the GC
+        // but still update the timings, since GC was run and reset, even if it was too early
+        uint64_t target_allocs = 0.0;
+        double alloc_smooth_factor = 0.95;
+        double collect_smooth_factor = 0.5;
+        double tuning_factor = 2e4;
+        uint64_t alloc_mem = jl_gc_smooth(old_alloc_diff, alloc_diff, alloc_smooth_factor);
+        uint64_t alloc_time = jl_gc_smooth(old_mut_time, mutator_time, alloc_smooth_factor); // TODO: subtract estimated finalizer time?
+        uint64_t gc_mem = jl_gc_smooth(old_freed_diff, freed_diff, collect_smooth_factor);
+        uint64_t gc_time = jl_gc_smooth(old_pause_time, pause - sweep_time, collect_smooth_factor);
+        old_alloc_diff = alloc_mem;
+        old_mut_time = alloc_time;
+        old_freed_diff = gc_mem;
+        old_pause_time = gc_time;
+        // thrashing estimator: if GC time more than 50% of the runtime
+        if (pause > mutator_time && !(thrash_counter < 4))
+            thrash_counter += 1;
+        else if (thrash_counter > 0)
+            thrash_counter -= 1;
+        if (alloc_mem != 0 && alloc_time != 0 && gc_mem != 0 && gc_time != 0) {
+            double alloc_rate = (double)alloc_mem/alloc_time;
+            double gc_rate = (double)gc_mem/gc_time;
+            target_allocs = sqrt((double)heap_size * alloc_rate / gc_rate) * tuning_factor;
+        }
+
+        if (thrashing == 0 && thrash_counter >= 3) {
+            // require 3 consecutive thrashing cycles to force the default allocator rate
+            thrashing = 1;
+            // and require 4 default allocations to clear
+            thrash_counter = 6;
+        }
+        else if (thrashing == 1 && thrash_counter <= 2) {
+            thrashing = 0; // maybe we should report this to the user or error out?
+        }
 
+        target_heap = target_allocs + heap_size;
+        // optionally smooth this:
+        //   target_heap = jl_gc_smooth(jl_atomic_load_relaxed(&gc_heap_stats.heap_target), target_heap, alloc_smooth_factor);
+
+        // compute some guardrails values
+        uint64_t min_target_allocs = heap_size / 20; // minimum 5% of current heap
+        if (min_target_allocs < default_collect_interval / 8) // unless the heap is small
+            min_target_allocs = default_collect_interval / 8;
+        uint64_t max_target_allocs = overallocation(before_free_heap_size, heap_size, user_max);
+        if (max_target_allocs < min_target_allocs)
+            max_target_allocs = min_target_allocs;
+        // respect max_total_memory first
+        if (target_heap > user_max) {
+            target_allocs = heap_size < user_max ? user_max - heap_size : 1;
+            reason = " user limit";
+        }
+        // If we are thrashing use a default only (an average) for a couple collections
+        if (thrashing) {
+            uint64_t thrashing_allocs = sqrt((double)min_target_allocs * max_target_allocs);
+            if (target_allocs < thrashing_allocs) {
+                target_allocs = thrashing_allocs;
+                reason = " thrashing";
+            }
+        }
+        // then add the guardrails for transient issues
+        if (target_allocs > max_target_allocs) {
+            target_allocs = max_target_allocs;
+            reason = " rate limit max";
+        }
+        else if (target_allocs < min_target_allocs) {
+            target_allocs = min_target_allocs;
+            reason = " min limit";
+        }
+        // and set the heap detection threshold
+        target_heap = target_allocs + heap_size;
+        if (target_heap < default_collect_interval) {
+            target_heap = default_collect_interval;
+            reason = " min heap";
+        }
+        jl_atomic_store_relaxed(&gc_heap_stats.heap_target, target_heap);
+    }
+    else {
+        target_heap = jl_atomic_load_relaxed(&gc_heap_stats.heap_target);
+    }
+
+    double old_ratio = (double)promoted_bytes/(double)heap_size;
+    if (heap_size > user_max || old_ratio > 0.15)
+        next_sweep_full = 1;
+    else
+        next_sweep_full = 0;
+    if (heap_size > user_max || thrashing)
+        under_pressure = 1;
     // sweeping is over
-    // 6. if it is a quick sweep, put back the remembered objects in queued state
+    // 7. if it is a quick sweep, put back the remembered objects in queued state
     // so that we don't trigger the barrier again on them.
-    for (int t_i = 0;t_i < jl_n_threads;t_i++) {
-        jl_ptls_t ptls2 = jl_all_tls_states[t_i];
+    assert(gc_n_threads);
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL)
+            continue;
         if (!sweep_full) {
             for (int i = 0; i < ptls2->heap.remset->len; i++) {
-                jl_astaggedvalue(ptls2->heap.remset->items[i])->bits.gc = GC_MARKED;
-            }
-            for (int i = 0; i < ptls2->heap.rem_bindings.len; i++) {
-                void *ptr = ptls2->heap.rem_bindings.items[i];
+                void *ptr = ptls2->heap.remset->items[i];
                 jl_astaggedvalue(ptr)->bits.gc = GC_MARKED;
             }
         }
         else {
             ptls2->heap.remset->len = 0;
-            ptls2->heap.rem_bindings.len = 0;
         }
     }
 
@@ -3255,49 +3654,37 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     }
 #endif
 
-
-    _report_gc_finished(pause, gc_num.freed, sweep_full, recollect);
-
-    gc_final_pause_end(gc_start_time, gc_end_time);
-    gc_time_sweep_pause(gc_end_time, actual_allocd, live_bytes,
-                        estimate_freed, sweep_full);
-    gc_num.full_sweep += sweep_full;
+    _report_gc_finished(pause, gc_num.freed, sweep_full, recollect, live_bytes);
     uint64_t max_memory = last_live_bytes + gc_num.allocd;
     if (max_memory > gc_num.max_memory) {
         gc_num.max_memory = max_memory;
     }
-
-    gc_num.allocd = 0;
+    gc_final_pause_end(gc_start_time, gc_end_time);
+    gc_time_sweep_pause(gc_end_time, gc_num.allocd, live_bytes,
+                        gc_num.freed, sweep_full);
+    gc_num.full_sweep += sweep_full;
     last_live_bytes = live_bytes;
-    live_bytes += -gc_num.freed + gc_num.since_sweep;
-
-    if (collection == JL_GC_AUTO) {
-      // If the current interval is larger than half the live data decrease the interval
-      int64_t half = live_bytes/2;
-      if (gc_num.interval > half) gc_num.interval = half;
-      // But never go below default
-      if (gc_num.interval < default_collect_interval) gc_num.interval = default_collect_interval;
-    }
-
-    if (gc_num.interval + live_bytes > max_total_memory) {
-        if (live_bytes < max_total_memory) {
-            gc_num.interval = max_total_memory - live_bytes;
-        } else {
-            // We can't stay under our goal so let's go back to
-            // the minimum interval and hope things get better
-            gc_num.interval = default_collect_interval;
-       }
-    }
+    live_bytes += -gc_num.freed + gc_num.allocd;
+    jl_timing_counter_dec(JL_TIMING_COUNTER_HeapSize, gc_num.freed);
 
     gc_time_summary(sweep_full, t_start, gc_end_time, gc_num.freed,
                     live_bytes, gc_num.interval, pause,
                     gc_num.time_to_safepoint,
                     gc_num.mark_time, gc_num.sweep_time);
+    if (collection == JL_GC_AUTO) {
+        gc_heuristics_summary(
+            old_alloc_diff, alloc_diff,
+            old_mut_time, mutator_time,
+            old_freed_diff, freed_diff,
+            old_pause_time, pause - sweep_time,
+            thrash_counter, reason,
+            heap_size, target_heap);
+    }
 
     prev_sweep_full = sweep_full;
     gc_num.pause += !recollect;
     gc_num.total_time += pause;
-    gc_num.since_sweep = 0;
+    gc_num.allocd = 0;
     gc_num.freed = 0;
     if (pause > gc_num.max_pause) {
         gc_num.max_pause = pause;
@@ -3313,7 +3700,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
 
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
-    if (jl_atomic_load_relaxed(&jl_gc_disable_counter)) {
+    if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
         size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval;
         jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
         static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
@@ -3324,24 +3711,35 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
 
     int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state);
     jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
-    // `jl_safepoint_start_gc()` makes sure only one thread can
-    // run the GC.
+    // `jl_safepoint_start_gc()` makes sure only one thread can run the GC.
     uint64_t t0 = jl_hrtime();
     if (!jl_safepoint_start_gc()) {
-        // Multithread only. See assertion in `safepoint.c`
+        // either another thread is running GC, or the GC got disabled just now.
         jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
+        jl_safepoint_wait_thread_resume(); // block in thread-suspend now if requested, after clearing the gc_state
         return;
     }
-    JL_TIMING(GC);
+
+    JL_TIMING_SUSPEND_TASK(GC, ct);
+    JL_TIMING(GC, GC);
+
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
 #endif
     // Now we are ready to wait for other threads to hit the safepoint,
     // we can do a few things that doesn't require synchronization.
-    // TODO (concurrently queue objects)
-    // no-op for non-threading
-    jl_gc_wait_for_the_world();
+    //
+    // We must sync here with the tls_lock operations, so that we have a
+    // seq-cst order between these events now we know that either the new
+    // thread must run into our safepoint flag or we must observe the
+    // existence of the thread in the jl_n_threads count.
+    //
+    // TODO: concurrently queue objects
+    jl_fence();
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    jl_gc_wait_for_the_world(gc_all_tls_states, gc_n_threads);
     JL_PROBE_GC_STOP_THE_WORLD();
 
     uint64_t t1 = jl_hrtime();
@@ -3349,51 +3747,61 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     if (duration > gc_num.max_time_to_safepoint)
         gc_num.max_time_to_safepoint = duration;
     gc_num.time_to_safepoint = duration;
+    gc_num.total_time_to_safepoint += duration;
 
     gc_invoke_callbacks(jl_gc_cb_pre_gc_t,
         gc_cblist_pre_gc, (collection));
 
-    if (!jl_atomic_load_relaxed(&jl_gc_disable_counter)) {
-        JL_LOCK_NOGC(&finalizers_lock);
+    if (!jl_atomic_load_acquire(&jl_gc_disable_counter)) {
+        JL_LOCK_NOGC(&finalizers_lock); // all the other threads are stopped, so this does not make sense, right? otherwise, failing that, this seems like plausibly a deadlock
+#ifndef __clang_gcanalyzer__
         if (_jl_gc_collect(ptls, collection)) {
             // recollect
             int ret = _jl_gc_collect(ptls, JL_GC_AUTO);
             (void)ret;
             assert(!ret);
         }
+#endif
         JL_UNLOCK_NOGC(&finalizers_lock);
     }
 
-    // no-op for non-threading
+    gc_n_threads = 0;
+    gc_all_tls_states = NULL;
     jl_safepoint_end_gc();
     jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
     JL_PROBE_GC_END();
+    jl_safepoint_wait_thread_resume(); // block in thread-suspend now if requested, after clearing the gc_state
 
     // Only disable finalizers on current thread
     // Doing this on all threads is racy (it's impossible to check
     // or wait for finalizers on other threads without dead lock).
     if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
-        int8_t was_in_finalizer = ptls->in_finalizer;
-        ptls->in_finalizer = 1;
-        run_finalizers(ct);
-        ptls->in_finalizer = was_in_finalizer;
+        JL_TIMING(GC, GC_Finalizers);
+        run_finalizers(ct, 0);
     }
     JL_PROBE_GC_FINALIZER();
 
     gc_invoke_callbacks(jl_gc_cb_post_gc_t,
         gc_cblist_post_gc, (collection));
+    if (under_pressure)
+        gc_invoke_callbacks(jl_gc_cb_notify_gc_pressure_t,
+            gc_cblist_notify_gc_pressure, ());
+    under_pressure = 0;
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
     errno = last_errno;
 }
 
-void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_mark_sp_t *sp)
+void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
 {
-    jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
-    for (size_t i = 0; i < jl_n_threads; i++)
-        jl_gc_queue_thread_local(gc_cache, sp, jl_all_tls_states[i]);
-    mark_roots(gc_cache, sp);
+    assert(gc_n_threads);
+    for (size_t i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            gc_queue_thread_local(mq, ptls2);
+    }
+    gc_mark_roots(mq);
 }
 
 // allocator entry points
@@ -3406,8 +3814,6 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty)
 // Per-thread initialization
 void jl_init_thread_heap(jl_ptls_t ptls)
 {
-    if (ptls->tid == 0)
-        ptls->disable_gc = 1;
     jl_thread_heap_t *heap = &ptls->heap;
     jl_gc_pool_t *p = heap->norm_pools;
     for (int i = 0; i < JL_GC_N_POOLS; i++) {
@@ -3415,12 +3821,13 @@ void jl_init_thread_heap(jl_ptls_t ptls)
         p[i].freelist = NULL;
         p[i].newpages = NULL;
     }
-    arraylist_new(&heap->weak_refs, 0);
-    arraylist_new(&heap->live_tasks, 0);
+    small_arraylist_new(&heap->weak_refs, 0);
+    small_arraylist_new(&heap->live_tasks, 0);
+    for (int i = 0; i < JL_N_STACK_POOLS; i++)
+        small_arraylist_new(&heap->free_stacks[i], 0);
     heap->mallocarrays = NULL;
     heap->mafreelist = NULL;
     heap->big_objects = NULL;
-    arraylist_new(&heap->rem_bindings, 0);
     heap->remset = &heap->_remset[0];
     heap->last_remset = &heap->_remset[1];
     arraylist_new(heap->remset, 0);
@@ -3432,29 +3839,43 @@ void jl_init_thread_heap(jl_ptls_t ptls)
     gc_cache->perm_scanned_bytes = 0;
     gc_cache->scanned_bytes = 0;
     gc_cache->nbig_obj = 0;
-    size_t init_size = 1024;
-    gc_cache->pc_stack = (void**)malloc_s(init_size * sizeof(void*));
-    gc_cache->pc_stack_end = gc_cache->pc_stack + init_size;
-    gc_cache->data_stack = (jl_gc_mark_data_t *)malloc_s(init_size * sizeof(jl_gc_mark_data_t));
+
+    // Initialize GC mark-queue
+    jl_gc_markqueue_t *mq = &ptls->mark_queue;
+    ws_queue_t *cq = &mq->chunk_queue;
+    ws_array_t *wsa = create_ws_array(GC_CHUNK_QUEUE_INIT_SIZE, sizeof(jl_gc_chunk_t));
+    jl_atomic_store_relaxed(&cq->top, 0);
+    jl_atomic_store_relaxed(&cq->bottom, 0);
+    jl_atomic_store_relaxed(&cq->array, wsa);
+    ws_queue_t *q = &mq->ptr_queue;
+    ws_array_t *wsa2 = create_ws_array(GC_PTR_QUEUE_INIT_SIZE, sizeof(jl_value_t *));
+    jl_atomic_store_relaxed(&q->top, 0);
+    jl_atomic_store_relaxed(&q->bottom, 0);
+    jl_atomic_store_relaxed(&q->array, wsa2);
+    arraylist_new(&mq->reclaim_set, 32);
 
     memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
-    assert(gc_num.interval == default_collect_interval);
     jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
 }
 
 // System-wide initializations
 void jl_gc_init(void)
 {
-    JL_MUTEX_INIT(&finalizers_lock);
+    JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock");
+    JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
     uv_mutex_init(&gc_cache_lock);
     uv_mutex_init(&gc_perm_lock);
+    uv_mutex_init(&gc_threads_lock);
+    uv_cond_init(&gc_threads_cond);
+    uv_sem_init(&gc_sweep_assists_needed, 0);
+    uv_mutex_init(&gc_queue_observer_lock);
 
     jl_gc_init_page();
     jl_gc_debug_init();
 
     arraylist_new(&finalizer_list_marked, 0);
     arraylist_new(&to_finalize, 0);
-
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_target, default_collect_interval);
     gc_num.interval = default_collect_interval;
     last_long_collect_interval = default_collect_interval;
     gc_num.allocd = 0;
@@ -3462,35 +3883,34 @@ void jl_gc_init(void)
     gc_num.max_memory = 0;
 
 #ifdef _P64
-    // on a big memory machine, set max_collect_interval to totalmem / nthreads / 2
-    uint64_t total_mem = uv_get_total_memory();
+    total_mem = uv_get_total_memory();
     uint64_t constrained_mem = uv_get_constrained_memory();
     if (constrained_mem > 0 && constrained_mem < total_mem)
-        total_mem = constrained_mem;
-    size_t maxmem = total_mem / jl_n_threads / 2;
-    if (maxmem > max_collect_interval)
-        max_collect_interval = maxmem;
+        jl_gc_set_max_memory(constrained_mem - 250*1024*1024); // LLVM + other libraries need some amount of memory
 #endif
+    if (jl_options.heap_size_hint)
+        jl_gc_set_max_memory(jl_options.heap_size_hint - 250*1024*1024);
 
-    // We allocate with abandon until we get close to the free memory on the machine.
-    uint64_t free_mem = uv_get_free_memory();
-    uint64_t high_water_mark = free_mem / 10 * 7;  // 70% high water mark
-
-    if (high_water_mark < max_total_memory)
-       max_total_memory = high_water_mark;
-
-    jl_gc_mark_sp_t sp = {NULL, NULL, NULL, NULL};
-    gc_mark_loop(NULL, sp);
     t_start = jl_hrtime();
 }
 
-void jl_gc_set_max_memory(uint64_t max_mem) {
+JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem)
+{
     if (max_mem > 0
         && max_mem < (uint64_t)1 << (sizeof(memsize_t) * 8 - 1)) {
+        #ifdef _P64
         max_total_memory = max_mem;
+        #else
+        max_total_memory = max_mem < MAX32HEAP ? max_mem : MAX32HEAP;
+        #endif
     }
 }
 
+JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
+{
+    return max_total_memory;
+}
+
 // callback for passing OOM errors from gmp
 JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
 {
@@ -3503,30 +3923,34 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
 {
     jl_gcframe_t **pgcstack = jl_get_pgcstack();
     jl_task_t *ct = jl_current_task;
-    if (pgcstack && ct->world_age) {
+    void *data = malloc(sz);
+    if (data != NULL && pgcstack != NULL && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
         jl_atomic_store_relaxed(&ptls->gc_num.allocd,
             jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
         jl_atomic_store_relaxed(&ptls->gc_num.malloc,
             jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+        jl_batch_accum_heap_size(ptls, sz);
     }
-    return malloc(sz);
+    return data;
 }
 
 JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
 {
     jl_gcframe_t **pgcstack = jl_get_pgcstack();
     jl_task_t *ct = jl_current_task;
-    if (pgcstack && ct->world_age) {
+    void *data = calloc(nm, sz);
+    if (data != NULL && pgcstack != NULL && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
         jl_atomic_store_relaxed(&ptls->gc_num.allocd,
             jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz);
         jl_atomic_store_relaxed(&ptls->gc_num.malloc,
             jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+        jl_batch_accum_heap_size(ptls, sz * nm);
     }
-    return calloc(nm, sz);
+    return data;
 }
 
 JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
@@ -3534,12 +3958,8 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
     jl_gcframe_t **pgcstack = jl_get_pgcstack();
     jl_task_t *ct = jl_current_task;
     free(p);
-    if (pgcstack && ct->world_age) {
-        jl_ptls_t ptls = ct->ptls;
-        jl_atomic_store_relaxed(&ptls->gc_num.freed,
-            jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz);
-        jl_atomic_store_relaxed(&ptls->gc_num.freecall,
-            jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1);
+    if (pgcstack != NULL && ct->world_age) {
+        jl_batch_accum_free_size(ct->ptls, sz);
     }
 }
 
@@ -3547,19 +3967,25 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size
 {
     jl_gcframe_t **pgcstack = jl_get_pgcstack();
     jl_task_t *ct = jl_current_task;
-    if (pgcstack && ct->world_age) {
+    void *data = realloc(p, sz);
+    if (data != NULL && pgcstack != NULL && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        if (sz < old)
-            jl_atomic_store_relaxed(&ptls->gc_num.freed,
-                jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz));
-        else
+        if (!(sz < old))
             jl_atomic_store_relaxed(&ptls->gc_num.allocd,
                 jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old));
         jl_atomic_store_relaxed(&ptls->gc_num.realloc,
             jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
+
+        int64_t diff = sz - old;
+        if (diff < 0) {
+            jl_batch_accum_free_size(ptls, -diff);
+        }
+        else {
+            jl_batch_accum_heap_size(ptls, diff);
+        }
     }
-    return realloc(p, sz);
+    return data;
 }
 
 // allocation wrappers that save the size of allocations, to allow using
@@ -3628,10 +4054,7 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
     if (allocsz < sz)  // overflow in adding offs, size was "negative"
         jl_throw(jl_memory_exception);
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
-    jl_atomic_store_relaxed(&ptls->gc_num.malloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
@@ -3639,6 +4062,12 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     void *b = malloc_cache_align(allocsz);
     if (b == NULL)
         jl_throw(jl_memory_exception);
+
+    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_num.malloc,
+        jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
+    jl_batch_accum_heap_size(ptls, allocsz);
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
@@ -3653,24 +4082,11 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds
 {
     if (can_collect)
         maybe_collect(ptls);
-
+    int is_old_marked = jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED;
     size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
     if (allocsz < sz)  // overflow in adding offs, size was "negative"
         jl_throw(jl_memory_exception);
 
-    if (jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED) {
-        ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz;
-        live_bytes += allocsz - oldsz;
-    }
-    else if (allocsz < oldsz)
-        jl_atomic_store_relaxed(&ptls->gc_num.freed,
-            jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz));
-    else
-        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz));
-    jl_atomic_store_relaxed(&ptls->gc_num.realloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
-
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
@@ -3686,7 +4102,27 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds
     SetLastError(last_error);
 #endif
     errno = last_errno;
-    maybe_record_alloc_to_profile((jl_value_t*)b, sz, jl_gc_unknown_type_tag);
+    // gc_managed_realloc_ is currently used exclusively for resizing array buffers.
+    if (is_old_marked) {
+        ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz;
+        inc_live_bytes(allocsz - oldsz);
+    }
+    else if (!(allocsz < oldsz))
+        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz));
+    jl_atomic_store_relaxed(&ptls->gc_num.realloc,
+        jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
+
+    int64_t diff = allocsz - oldsz;
+    if (diff < 0) {
+        jl_batch_accum_free_size(ptls, -diff);
+    }
+    else {
+        jl_batch_accum_heap_size(ptls, diff);
+    }
+    if (allocsz > oldsz) {
+        maybe_record_alloc_to_profile((jl_value_t*)b, allocsz - oldsz, (jl_datatype_t*)jl_buff_tag);
+    }
     return b;
 }
 
@@ -3727,7 +4163,6 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz)
     // old pointer.
     bigval_t *newbig = (bigval_t*)gc_managed_realloc_(ptls, hdr, allocsz, oldsz, 1, s, 0);
     newbig->sz = allocsz;
-    newbig->age = 0;
     gc_big_object_link(newbig, &ptls->heap.big_objects);
     jl_value_t *snew = jl_valueof(&newbig->header);
     *(size_t*)snew = sz;
@@ -3754,17 +4189,17 @@ static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned o
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
 #endif
-    uintptr_t base = (uintptr_t)(zero ? calloc(1, sz) : malloc(sz));
-    if (base == 0)
+    void *base = zero ? calloc(1, sz) : malloc(sz);
+    if (base == NULL)
         jl_throw(jl_memory_exception);
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
 #endif
+    jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size,sz);
     errno = last_errno;
     jl_may_leak(base);
     assert(align > 0);
-    unsigned diff = (offset - base) % align;
-    return (void*)(base + diff);
+    return (void*)(LLT_ALIGN((uintptr_t)base + offset, (uintptr_t)align) - offset);
 }
 
 STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned offset) JL_NOTSAFEPOINT
@@ -3872,8 +4307,6 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void)
 
 JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void)
 {
-    static_assert(jl_buff_tag % GC_PAGE_SZ == 0,
-        "jl_buff_tag must be a multiple of GC_PAGE_SZ");
     if (jl_is_initialized()) {
         int result = jl_atomic_fetch_or(&support_conservative_marking, 1);
         if (!result) {
@@ -3900,7 +4333,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
 {
     p = (char *) p - 1;
     jl_gc_pagemeta_t *meta = page_metadata(p);
-    if (meta && meta->ages) {
+    if (meta != NULL) {
         char *page = gc_page_data(p);
         // offset within page.
         size_t off = (char *)p - page;
@@ -3909,6 +4342,8 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
         // offset within object
         size_t off2 = (off - GC_PAGE_OFFSET);
         size_t osize = meta->osize;
+        if (osize == 0)
+            return NULL;
         off2 %= osize;
         if (off - off2 + osize > GC_PAGE_SZ)
             return NULL;
@@ -3924,9 +4359,9 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
             goto valid_object;
         }
         jl_gc_pool_t *pool =
-            jl_all_tls_states[meta->thread_n]->heap.norm_pools +
+            gc_all_tls_states[meta->thread_n]->heap.norm_pools +
             meta->pool_n;
-        if (meta->fl_begin_offset == (uint16_t) -1) {
+        if (meta->fl_begin_offset == UINT16_MAX) {
             // case 2: this is a page on the newpages list
             jl_taggedvalue_t *newpages = pool->newpages;
             // Check if the page is being allocated from via newpages
@@ -3935,7 +4370,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
             char *data = gc_page_data(newpages);
             if (data != meta->data) {
                 // Pages on newpages form a linked list where only the
-                // first one is allocated from (see reset_page()).
+                // first one is allocated from (see gc_reset_page()).
                 // All other pages are empty.
                 return NULL;
             }
@@ -3963,7 +4398,6 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
         // entries and 1 for live objects. The above subcases arise
         // because allocating a cell will not update the age bit, so we
         // need extra logic for pages that have been allocated from.
-        unsigned obj_id = (off - off2) / osize;
         // We now distinguish between the second and third subcase.
         // Freelist entries are consumed in ascending order. Anything
         // before the freelist pointer was either live during the last
@@ -3971,17 +4405,15 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
         if (gc_page_data(cell) == gc_page_data(pool->freelist)
             && (char *)cell < (char *)pool->freelist)
             goto valid_object;
-        // We know now that the age bit reflects liveness status during
-        // the last sweep and that the cell has not been reused since.
-        if (!(meta->ages[obj_id / 8] & (1 << (obj_id % 8)))) {
-            return NULL;
-        }
+        // already skipped marked or old objects above, so here
+        // the age bits are 0, thus the object is on the freelist
+        return NULL;
         // Not a freelist entry, therefore a valid object.
     valid_object:
         // We have to treat objects with type `jl_buff_tag` differently,
         // as they must not be passed to the usual marking functions.
-        // Note that jl_buff_tag is a multiple of GC_PAGE_SZ, thus it
-        // cannot be a type reference.
+        // Note that jl_buff_tag is real pointer into libjulia,
+        // thus it cannot be a type reference.
         if ((cell->header & ~(uintptr_t) 3) == jl_buff_tag)
             return NULL;
         return jl_valueof(cell);
diff --git a/src/gc.h b/src/gc.h
index 00c3d48b52935..217b8050e40ac 100644
--- a/src/gc.h
+++ b/src/gc.h
@@ -9,6 +9,8 @@
 #ifndef JL_GC_H
 #define JL_GC_H
 
+#include <stddef.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
@@ -24,14 +26,19 @@
 #endif
 #endif
 #include "julia_assert.h"
+#include "gc-heap-snapshot.h"
 #include "gc-alloc-profiler.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#ifdef GC_SMALL_PAGE
+#define GC_PAGE_LG2 12 // log2(size of a page)
+#else
 #define GC_PAGE_LG2 14 // log2(size of a page)
-#define GC_PAGE_SZ (1 << GC_PAGE_LG2) // 16k
+#endif
+#define GC_PAGE_SZ (1 << GC_PAGE_LG2)
 #define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT))
 
 #define jl_malloc_tag ((void*)0xdeadaa01)
@@ -41,7 +48,6 @@ extern "C" {
 typedef struct {
     uint64_t num;
     uint64_t next;
-
     uint64_t min;
     uint64_t interv;
     uint64_t max;
@@ -68,7 +74,6 @@ typedef struct {
     uint64_t    freecall;
     uint64_t    total_time;
     uint64_t    total_allocd;
-    uint64_t    since_sweep;
     size_t      interval;
     int         pause;
     int         full_sweep;
@@ -76,179 +81,49 @@ typedef struct {
     uint64_t    max_memory;
     uint64_t    time_to_safepoint;
     uint64_t    max_time_to_safepoint;
+    uint64_t    total_time_to_safepoint;
     uint64_t    sweep_time;
     uint64_t    mark_time;
     uint64_t    total_sweep_time;
     uint64_t    total_mark_time;
+    uint64_t    last_full_sweep;
+    uint64_t    last_incremental_sweep;
 } jl_gc_num_t;
 
-enum {
-    GC_MARK_L_marked_obj,
-    GC_MARK_L_scan_only,
-    GC_MARK_L_finlist,
-    GC_MARK_L_objarray,
-    GC_MARK_L_array8,
-    GC_MARK_L_array16,
-    GC_MARK_L_obj8,
-    GC_MARK_L_obj16,
-    GC_MARK_L_obj32,
-    GC_MARK_L_stack,
-    GC_MARK_L_excstack,
-    GC_MARK_L_module_binding,
-    _GC_MARK_L_MAX
-};
-
-// The following structs (`gc_mark_*_t`) contain iterator state used for the
-// scanning of various object types.
-//
-// The `nptr` member records the number of pointers slots referenced by
-// an object to be used in the full collection heuristics as well as whether the object
-// references young objects.
-// `nptr >> 2` is the number of pointers fields referenced by the object.
-// The lowest bit of `nptr` is set if the object references young object.
-// The 2nd lowest bit of `nptr` is the GC old bits of the object after marking.
-// A `0x3` in the low bits means that the object needs to be in the remset.
-
-// An generic object that's marked and needs to be scanned
-// The metadata might need update too (depend on the PC)
-typedef struct {
-    jl_value_t *obj; // The object
-    uintptr_t tag; // The tag with the GC bits masked out
-    uint8_t bits; // The GC bits after tagging (`bits & 1 == 1`)
-} gc_mark_marked_obj_t;
-
-// An object array. This can come from an array, svec, or the using array or a module
-typedef struct {
-    jl_value_t *parent; // The parent object to trigger write barrier on.
-    jl_value_t **begin; // The first slot to be scanned.
-    jl_value_t **end; // The end address (after the last slot to be scanned)
-    uint32_t step; // Number of pointers to jump between marks
-    uintptr_t nptr; // See notes about `nptr` above.
-} gc_mark_objarray_t;
-
-// A normal object with 8bits field descriptors
-typedef struct {
-    jl_value_t *parent; // The parent object to trigger write barrier on.
-    uint8_t *begin; // Current field descriptor.
-    uint8_t *end; // End of field descriptor.
-    uintptr_t nptr; // See notes about `nptr` above.
-} gc_mark_obj8_t;
-
-// A normal object with 16bits field descriptors
-typedef struct {
-    jl_value_t *parent; // The parent object to trigger write barrier on.
-    uint16_t *begin; // Current field descriptor.
-    uint16_t *end; // End of field descriptor.
-    uintptr_t nptr; // See notes about `nptr` above.
-} gc_mark_obj16_t;
-
-// A normal object with 32bits field descriptors
-typedef struct {
-    jl_value_t *parent; // The parent object to trigger write barrier on.
-    uint32_t *begin; // Current field descriptor.
-    uint32_t *end; // End of field descriptor.
-    uintptr_t nptr; // See notes about `nptr` above.
-} gc_mark_obj32_t;
-
-typedef struct {
-    jl_value_t **begin; // The first slot to be scanned.
-    jl_value_t **end; // The end address (after the last slot to be scanned)
-    uint8_t *rebegin;
-    gc_mark_obj8_t elem;
-} gc_mark_array8_t;
-
-typedef struct {
-    jl_value_t **begin; // The first slot to be scanned.
-    jl_value_t **end; // The end address (after the last slot to be scanned)
-    uint16_t *rebegin;
-    gc_mark_obj16_t elem;
-} gc_mark_array16_t;
-
-// Stack frame
-typedef struct {
-    jl_gcframe_t *s; // The current stack frame
-    uint32_t i; // The current slot index in the frame
-    uint32_t nroots; // `nroots` fields in the frame
-    // Parameters to mark the copy_stack range.
-    uintptr_t offset;
-    uintptr_t lb;
-    uintptr_t ub;
-} gc_mark_stackframe_t;
-
-// Exception stack data
-typedef struct {
-    jl_excstack_t *s;   // Stack of exceptions
-    size_t itr;         // Iterator into exception stack
-    size_t bt_index;    // Current backtrace buffer entry index
-    size_t jlval_index; // Index into GC managed values for current bt entry
-} gc_mark_excstack_t;
-
-// Module bindings. This is also the beginning of module scanning.
-// The loop will start marking other references in a module after the bindings are marked
-typedef struct {
-    jl_module_t *parent; // The parent module to trigger write barrier on.
-    jl_binding_t **begin; // The first slot to be scanned.
-    jl_binding_t **end; // The end address (after the last slot to be scanned)
-    uintptr_t nptr; // See notes about `nptr` above.
-    uint8_t bits; // GC bits of the module (the bits to mark the binding buffer with)
-} gc_mark_binding_t;
-
-// Finalizer (or object) list
-typedef struct {
-    jl_value_t **begin;
-    jl_value_t **end;
-} gc_mark_finlist_t;
-
-// This is used to determine the max size of the data objects on the data stack.
-// We'll use this size to determine the size of the data stack corresponding to a
-// PC stack size. Since the data objects are not all of the same size, we'll waste
-// some memory on the data stack this way but that size is unlikely going to be significant.
-union _jl_gc_mark_data {
-    gc_mark_marked_obj_t marked;
-    gc_mark_objarray_t objarray;
-    gc_mark_array8_t array8;
-    gc_mark_array16_t array16;
-    gc_mark_obj8_t obj8;
-    gc_mark_obj16_t obj16;
-    gc_mark_obj32_t obj32;
-    gc_mark_stackframe_t stackframe;
-    gc_mark_excstack_t excstackframe;
-    gc_mark_binding_t binding;
-    gc_mark_finlist_t finlist;
-};
-
-// Pop a data struct from the mark data stack (i.e. decrease the stack pointer)
-// This should be used after dispatch and therefore the pc stack pointer is already popped from
-// the stack.
-STATIC_INLINE void *gc_pop_markdata_(jl_gc_mark_sp_t *sp, size_t size)
-{
-    jl_gc_mark_data_t *data = (jl_gc_mark_data_t *)(((char*)sp->data) - size);
-    sp->data = data;
-    return data;
-}
-#define gc_pop_markdata(sp, type) ((type*)gc_pop_markdata_(sp, sizeof(type)))
-
-// Re-push a frame to the mark stack (both data and pc)
-// The data and pc are expected to be on the stack (or updated in place) already.
-// Mainly useful to pause the current scanning in order to scan an new object.
-STATIC_INLINE void *gc_repush_markdata_(jl_gc_mark_sp_t *sp, size_t size) JL_NOTSAFEPOINT
-{
-    jl_gc_mark_data_t *data = sp->data;
-    sp->pc++;
-    sp->data = (jl_gc_mark_data_t *)(((char*)sp->data) + size);
-    return data;
-}
-#define gc_repush_markdata(sp, type) ((type*)gc_repush_markdata_(sp, sizeof(type)))
+// Array chunks (work items representing suffixes of
+// large arrays of pointers left to be marked)
+
+typedef enum {
+    GC_empty_chunk = 0, // for sentinel representing no items left in chunk queue
+    GC_objary_chunk,    // for chunk of object array
+    GC_ary8_chunk,      // for chunk of array with 8 bit field descriptors
+    GC_ary16_chunk,     // for chunk of array with 16 bit field descriptors
+    GC_finlist_chunk,   // for chunk of finalizer list
+} gc_chunk_id_t;
+
+typedef struct _jl_gc_chunk_t {
+    gc_chunk_id_t cid;
+    struct _jl_value_t *parent; // array owner
+    struct _jl_value_t **begin; // pointer to first element that needs scanning
+    struct _jl_value_t **end;   // pointer to last element that needs scanning
+    void *elem_begin;           // used to scan pointers within objects when marking `ary8` or `ary16`
+    void *elem_end;             // used to scan pointers within objects when marking `ary8` or `ary16`
+    uint32_t step;              // step-size used when marking objarray
+    uintptr_t nptr;             // (`nptr` & 0x1) if array has young element and (`nptr` & 0x2) if array owner is old
+} jl_gc_chunk_t;
+
+#define GC_CHUNK_BATCH_SIZE (1 << 16)       // maximum number of references that can be processed
+                                            // without creating a chunk
+
+#define GC_PTR_QUEUE_INIT_SIZE (1 << 18)    // initial size of queue of `jl_value_t *`
+#define GC_CHUNK_QUEUE_INIT_SIZE (1 << 14)  // initial size of chunk-queue
 
 // layout for big (>2k) objects
 
 JL_EXTENSION typedef struct _bigval_t {
     struct _bigval_t *next;
     struct _bigval_t **prev; // pointer to the next field of the prev entry
-    union {
-        size_t sz;
-        uintptr_t age : 2;
-    };
+    size_t sz;
 #ifdef _P64 // Add padding so that the value is 64-byte aligned
     // (8 pointers of 8 bytes each) - (4 other pointers in struct)
     void *_padding[8 - 4];
@@ -266,15 +141,18 @@ JL_EXTENSION typedef struct _bigval_t {
     // must be 64-byte aligned here, in 32 & 64 bit modes
 } bigval_t;
 
-// data structure for tracking malloc'd arrays.
+// data structure for tracking malloc'd arrays and genericmemory.
 
 typedef struct _mallocarray_t {
-    jl_array_t *a;
+    jl_value_t *a;
     struct _mallocarray_t *next;
 } mallocarray_t;
 
 // pool page metadata
-typedef struct {
+typedef struct _jl_gc_pagemeta_t {
+    // next metadata structure in per-thread list
+    // or in one of the `jl_gc_global_page_pool_t`
+    struct _jl_gc_pagemeta_t *next;
     // index of pool that owns this page
     uint8_t pool_n;
     // Whether any cell in the page is marked
@@ -301,36 +179,74 @@ typedef struct {
     // number of free objects in this page.
     // invalid if pool that owns this page is allocating objects from this page.
     uint16_t nfree;
-    uint16_t osize; // size of each object in this page
+    uint16_t osize;           // size of each object in this page
     uint16_t fl_begin_offset; // offset of first free object in this page
     uint16_t fl_end_offset;   // offset of last free object in this page
     uint16_t thread_n;        // thread id of the heap that owns this page
     char *data;
-    uint8_t *ages;
 } jl_gc_pagemeta_t;
 
-// Page layout:
-//  Newpage freelist: sizeof(void*)
-//  Padding: GC_PAGE_OFFSET - sizeof(void*)
-//  Blocks: osize * n
-//    Tag: sizeof(jl_taggedvalue_t)
-//    Data: <= osize - sizeof(jl_taggedvalue_t)
+extern jl_gc_page_stack_t global_page_pool_lazily_freed;
+extern jl_gc_page_stack_t global_page_pool_clean;
+extern jl_gc_page_stack_t global_page_pool_freed;
+
+// Lock-free stack implementation taken
+// from Herlihy's "The Art of Multiprocessor Programming"
+// XXX: this is not a general-purpose lock-free stack. We can
+// get away with just using a CAS and not implementing some ABA
+// prevention mechanism since once a node is popped from the
+// `jl_gc_page_stack_t`, it may only be pushed back to them
+// in the sweeping phase, which also doesn't push a node into the
+// same stack after it's popped
+
+STATIC_INLINE void push_lf_back(jl_gc_page_stack_t *pool, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT
+{
+    while (1) {
+        jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom);
+        elt->next = old_back;
+        if (jl_atomic_cmpswap(&pool->bottom, &old_back, elt)) {
+            break;
+        }
+        jl_cpu_pause();
+    }
+}
+
+STATIC_INLINE jl_gc_pagemeta_t *pop_lf_back(jl_gc_page_stack_t *pool) JL_NOTSAFEPOINT
+{
+    while (1) {
+        jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->bottom);
+        if (old_back == NULL) {
+            return NULL;
+        }
+        if (jl_atomic_cmpswap(&pool->bottom, &old_back, old_back->next)) {
+            return old_back;
+        }
+        jl_cpu_pause();
+    }
+}
 
-// Memory map:
-//  The complete address space is divided up into a multi-level page table.
-//  The three levels have similar but slightly different structures:
-//    - pagetable0_t: the bottom/leaf level (covers the contiguous addresses)
-//    - pagetable1_t: the middle level
-//    - pagetable2_t: the top/leaf level (covers the entire virtual address space)
-//  Corresponding to these similar structures is a large amount of repetitive
-//  code that is nearly the same but not identical. It could be made less
-//  repetitive with C macros, but only at the cost of debuggability. The specialized
-//  structure of this representation allows us to partially unroll and optimize
-//  various conditions at each level.
-
-//  The following constants define the branching factors at each level.
-//  The constants and GC_PAGE_LG2 must therefore sum to sizeof(void*).
-//  They should all be multiples of 32 (sizeof(uint32_t)) except that REGION2_PG_COUNT may also be 1.
+typedef struct {
+    _Atomic(size_t) n_freed_objs;
+    _Atomic(size_t) n_pages_allocd;
+} gc_fragmentation_stat_t;
+
+#ifdef GC_SMALL_PAGE
+#ifdef _P64
+#define REGION0_PG_COUNT (1 << 16)
+#define REGION1_PG_COUNT (1 << 18)
+#define REGION2_PG_COUNT (1 << 18)
+#define REGION0_INDEX(p) (((uintptr_t)(p) >> 12) & 0xFFFF) // shift by GC_PAGE_LG2
+#define REGION1_INDEX(p) (((uintptr_t)(p) >> 28) & 0x3FFFF)
+#define REGION_INDEX(p)  (((uintptr_t)(p) >> 46) & 0x3FFFF)
+#else
+#define REGION0_PG_COUNT (1 << 10)
+#define REGION1_PG_COUNT (1 << 10)
+#define REGION2_PG_COUNT (1 << 0)
+#define REGION0_INDEX(p) (((uintptr_t)(p) >> 12) & 0x3FF) // shift by GC_PAGE_LG2
+#define REGION1_INDEX(p) (((uintptr_t)(p) >> 22) & 0x3FF)
+#define REGION_INDEX(p)  (0)
+#endif
+#else
 #ifdef _P64
 #define REGION0_PG_COUNT (1 << 16)
 #define REGION1_PG_COUNT (1 << 16)
@@ -346,39 +262,129 @@ typedef struct {
 #define REGION1_INDEX(p) (((uintptr_t)(p) >> 22) & 0x3FF)
 #define REGION_INDEX(p)  (0)
 #endif
+#endif
 
 // define the representation of the levels of the page-table (0 to 2)
 typedef struct {
-    jl_gc_pagemeta_t *meta[REGION0_PG_COUNT];
-    uint32_t allocmap[REGION0_PG_COUNT / 32];
-    uint32_t freemap[REGION0_PG_COUNT / 32];
-    // store a lower bound of the first free page in each region
-    int lb;
-    // an upper bound of the last non-free page
-    int ub;
+    uint8_t meta[REGION0_PG_COUNT];
 } pagetable0_t;
 
 typedef struct {
     pagetable0_t *meta0[REGION1_PG_COUNT];
-    uint32_t allocmap0[REGION1_PG_COUNT / 32];
-    uint32_t freemap0[REGION1_PG_COUNT / 32];
-    // store a lower bound of the first free page in each region
-    int lb;
-    // an upper bound of the last non-free page
-    int ub;
 } pagetable1_t;
 
 typedef struct {
     pagetable1_t *meta1[REGION2_PG_COUNT];
-    uint32_t allocmap1[(REGION2_PG_COUNT + 31) / 32];
-    uint32_t freemap1[(REGION2_PG_COUNT + 31) / 32];
-    // store a lower bound of the first free page in each region
-    int lb;
-    // an upper bound of the last non-free page
-    int ub;
 } pagetable_t;
 
-#ifdef __clang_gcanalyzer__
+typedef struct {
+    _Atomic(size_t) bytes_mapped;
+    _Atomic(size_t) bytes_resident;
+    _Atomic(size_t) heap_size;
+    _Atomic(size_t) heap_target;
+} gc_heapstatus_t;
+
+#define GC_PAGE_UNMAPPED        0
+#define GC_PAGE_ALLOCATED       1
+#define GC_PAGE_LAZILY_FREED    2
+#define GC_PAGE_FREED           3
+
+extern pagetable_t alloc_map;
+
+STATIC_INLINE uint8_t gc_alloc_map_is_set(char *_data) JL_NOTSAFEPOINT
+{
+    uintptr_t data = ((uintptr_t)_data);
+    unsigned i;
+    i = REGION_INDEX(data);
+    pagetable1_t *r1 = alloc_map.meta1[i];
+    if (r1 == NULL)
+        return 0;
+    i = REGION1_INDEX(data);
+    pagetable0_t *r0 = r1->meta0[i];
+    if (r0 == NULL)
+        return 0;
+    i = REGION0_INDEX(data);
+    return (r0->meta[i] == GC_PAGE_ALLOCATED);
+}
+
+STATIC_INLINE void gc_alloc_map_set(char *_data, uint8_t v) JL_NOTSAFEPOINT
+{
+    uintptr_t data = ((uintptr_t)_data);
+    unsigned i;
+    i = REGION_INDEX(data);
+    pagetable1_t *r1 = alloc_map.meta1[i];
+    assert(r1 != NULL);
+    i = REGION1_INDEX(data);
+    pagetable0_t *r0 = r1->meta0[i];
+    assert(r0 != NULL);
+    i = REGION0_INDEX(data);
+    r0->meta[i] = v;
+}
+
+STATIC_INLINE void gc_alloc_map_maybe_create(char *_data) JL_NOTSAFEPOINT
+{
+    uintptr_t data = ((uintptr_t)_data);
+    unsigned i;
+    i = REGION_INDEX(data);
+    pagetable1_t *r1 = alloc_map.meta1[i];
+    if (r1 == NULL) {
+        r1 = (pagetable1_t*)calloc_s(sizeof(pagetable1_t));
+        alloc_map.meta1[i] = r1;
+    }
+    i = REGION1_INDEX(data);
+    pagetable0_t *r0 = r1->meta0[i];
+    if (r0 == NULL) {
+        r0 = (pagetable0_t*)calloc_s(sizeof(pagetable0_t));
+        r1->meta0[i] = r0;
+    }
+}
+
+// Page layout:
+//  Metadata pointer: sizeof(jl_gc_pagemeta_t*)
+//  Padding: GC_PAGE_OFFSET - sizeof(jl_gc_pagemeta_t*)
+//  Blocks: osize * n
+//    Tag: sizeof(jl_taggedvalue_t)
+//    Data: <= osize - sizeof(jl_taggedvalue_t)
+
+STATIC_INLINE char *gc_page_data(void *x) JL_NOTSAFEPOINT
+{
+    return (char*)(((uintptr_t)x >> GC_PAGE_LG2) << GC_PAGE_LG2);
+}
+
+STATIC_INLINE jl_gc_pagemeta_t *page_metadata_unsafe(void *_data) JL_NOTSAFEPOINT
+{
+    return *(jl_gc_pagemeta_t**)(gc_page_data(_data));
+}
+
+STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) JL_NOTSAFEPOINT
+{
+    if (!gc_alloc_map_is_set((char*)_data)) {
+        return NULL;
+    }
+    return page_metadata_unsafe(_data);
+}
+
+STATIC_INLINE void set_page_metadata(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
+{
+    *(jl_gc_pagemeta_t**)(pg->data) = pg;
+}
+
+STATIC_INLINE void push_page_metadata_back(jl_gc_pagemeta_t **ppg, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT
+{
+    elt->next = *ppg;
+    *ppg = elt;
+}
+
+STATIC_INLINE jl_gc_pagemeta_t *pop_page_metadata_back(jl_gc_pagemeta_t **ppg) JL_NOTSAFEPOINT
+{
+    jl_gc_pagemeta_t *v = *ppg;
+    if (*ppg != NULL) {
+        *ppg = (*ppg)->next;
+    }
+    return v;
+}
+
+#ifdef __clang_gcanalyzer__ /* clang may not have __builtin_ffs */
 unsigned ffs_u32(uint32_t bitvec) JL_NOTSAFEPOINT;
 #else
 STATIC_INLINE unsigned ffs_u32(uint32_t bitvec)
@@ -388,23 +394,20 @@ STATIC_INLINE unsigned ffs_u32(uint32_t bitvec)
 #endif
 
 extern jl_gc_num_t gc_num;
-extern pagetable_t memory_map;
 extern bigval_t *big_objects_marked;
 extern arraylist_t finalizer_list_marked;
 extern arraylist_t to_finalize;
-extern int64_t lazy_freed_pages;
+extern int64_t buffered_pages;
+extern int gc_first_tid;
+extern int gc_n_threads;
+extern jl_ptls_t* gc_all_tls_states;
+extern gc_heapstatus_t gc_heap_stats;
 
 STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT
 {
     return container_of(o, bigval_t, header);
 }
 
-// round an address inside a gcpage's data to its beginning
-STATIC_INLINE char *gc_page_data(void *x) JL_NOTSAFEPOINT
-{
-    return (char*)(((uintptr_t)x >> GC_PAGE_LG2) << GC_PAGE_LG2);
-}
-
 STATIC_INLINE jl_taggedvalue_t *page_pfl_beg(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT
 {
     return (jl_taggedvalue_t*)(p->data + p->fl_begin_offset);
@@ -442,52 +445,6 @@ STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
 
 NOINLINE uintptr_t gc_get_stack_ptr(void);
 
-STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) JL_NOTSAFEPOINT
-{
-    uintptr_t data = ((uintptr_t)_data);
-    unsigned i;
-    i = REGION_INDEX(data);
-    pagetable1_t *r1 = memory_map.meta1[i];
-    if (!r1)
-        return NULL;
-    i = REGION1_INDEX(data);
-    pagetable0_t *r0 = r1->meta0[i];
-    if (!r0)
-        return NULL;
-    i = REGION0_INDEX(data);
-    return r0->meta[i];
-}
-
-struct jl_gc_metadata_ext {
-    pagetable1_t *pagetable1;
-    pagetable0_t *pagetable0;
-    jl_gc_pagemeta_t *meta;
-    unsigned pagetable_i32, pagetable_i;
-    unsigned pagetable1_i32, pagetable1_i;
-    unsigned pagetable0_i32, pagetable0_i;
-};
-
-STATIC_INLINE struct jl_gc_metadata_ext page_metadata_ext(void *_data) JL_NOTSAFEPOINT
-{
-    uintptr_t data = (uintptr_t)_data;
-    struct jl_gc_metadata_ext info;
-    unsigned i;
-    i = REGION_INDEX(data);
-    info.pagetable_i = i % 32;
-    info.pagetable_i32 = i / 32;
-    info.pagetable1 = memory_map.meta1[i];
-    i = REGION1_INDEX(data);
-    info.pagetable1_i = i % 32;
-    info.pagetable1_i32 = i / 32;
-    info.pagetable0 = info.pagetable1->meta0[i];
-    i = REGION0_INDEX(data);
-    info.pagetable0_i = i % 32;
-    info.pagetable0_i32 = i / 32;
-    info.meta = info.pagetable0->meta[i];
-    assert(info.meta);
-    return info;
-}
-
 STATIC_INLINE void gc_big_object_unlink(const bigval_t *hdr) JL_NOTSAFEPOINT
 {
     *hdr->prev = hdr->next;
@@ -505,28 +462,27 @@ STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFE
     *list = hdr;
 }
 
-STATIC_INLINE void gc_mark_sp_init(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
-{
-    sp->pc = gc_cache->pc_stack;
-    sp->data = gc_cache->data_stack;
-    sp->pc_start = gc_cache->pc_stack;
-    sp->pc_end = gc_cache->pc_stack_end;
-}
-
-void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_mark_sp_t *sp);
-void gc_mark_queue_finlist(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp,
-                           arraylist_t *list, size_t start);
-void gc_mark_loop(jl_ptls_t ptls, jl_gc_mark_sp_t sp);
+extern uv_mutex_t gc_threads_lock;
+extern uv_cond_t gc_threads_cond;
+extern uv_sem_t gc_sweep_assists_needed;
+extern _Atomic(int) gc_n_threads_marking;
+extern _Atomic(int) gc_n_threads_sweeping;
+void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
+void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;
+void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT;
+void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
+void gc_mark_loop_serial(jl_ptls_t ptls);
+void gc_mark_loop_parallel(jl_ptls_t ptls, int master);
+void gc_sweep_pool_parallel(void);
+void gc_free_pages(void);
 void sweep_stack_pools(void);
 void jl_gc_debug_init(void);
 
-extern void *gc_mark_label_addrs[_GC_MARK_L_MAX];
-
 // GC pages
 
-void jl_gc_init_page(void);
+void jl_gc_init_page(void) JL_NOTSAFEPOINT;
 NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT;
-void jl_gc_free_page(void *p) JL_NOTSAFEPOINT;
+void jl_gc_free_page(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT;
 
 // GC debug
 
@@ -556,9 +512,9 @@ void gc_time_big_start(void) JL_NOTSAFEPOINT;
 void gc_time_count_big(int old_bits, int bits) JL_NOTSAFEPOINT;
 void gc_time_big_end(void) JL_NOTSAFEPOINT;
 
-void gc_time_mallocd_array_start(void) JL_NOTSAFEPOINT;
-void gc_time_count_mallocd_array(int bits) JL_NOTSAFEPOINT;
-void gc_time_mallocd_array_end(void) JL_NOTSAFEPOINT;
+void gc_time_mallocd_memory_start(void) JL_NOTSAFEPOINT;
+void gc_time_count_mallocd_memory(int bits) JL_NOTSAFEPOINT;
+void gc_time_mallocd_memory_end(void) JL_NOTSAFEPOINT;
 
 void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
                         int64_t perm_scanned_bytes);
@@ -569,6 +525,13 @@ void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
                      uint64_t freed, uint64_t live, uint64_t interval,
                      uint64_t pause, uint64_t ttsp, uint64_t mark,
                      uint64_t sweep);
+void gc_heuristics_summary(
+        uint64_t old_alloc_diff, uint64_t alloc_mem,
+        uint64_t old_mut_time, uint64_t alloc_time,
+        uint64_t old_freed_diff, uint64_t gc_mem,
+        uint64_t old_pause_time, uint64_t gc_time,
+        int thrash_counter, const char *reason,
+        uint64_t current_heap, uint64_t target_heap);
 #else
 #define gc_time_pool_start()
 STATIC_INLINE void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT
@@ -585,17 +548,24 @@ STATIC_INLINE void gc_time_count_big(int old_bits, int bits) JL_NOTSAFEPOINT
     (void)bits;
 }
 #define gc_time_big_end()
-#define gc_time_mallocd_array_start()
-STATIC_INLINE void gc_time_count_mallocd_array(int bits) JL_NOTSAFEPOINT
+#define gc_time_mallocd_memory_start()
+STATIC_INLINE void gc_time_count_mallocd_memory(int bits) JL_NOTSAFEPOINT
 {
     (void)bits;
 }
-#define gc_time_mallocd_array_end()
+#define gc_time_mallocd_memory_end()
 #define gc_time_mark_pause(t0, scanned_bytes, perm_scanned_bytes)
 #define gc_time_sweep_pause(gc_end_t, actual_allocd, live_bytes,        \
                             estimate_freed, sweep_full)
 #define  gc_time_summary(sweep_full, start, end, freed, live,           \
                          interval, pause, ttsp, mark, sweep)
+#define gc_heuristics_summary( \
+        old_alloc_diff, alloc_mem, \
+        old_mut_time, alloc_time, \
+        old_freed_diff, gc_mem, \
+        old_pause_time, gc_time, \
+        thrash_counter, reason, \
+        current_heap, target_heap)
 #endif
 
 #ifdef MEMFENCE
@@ -606,7 +576,6 @@ static inline void gc_verify_tags(void)
 }
 #endif
 
-
 #ifdef GC_VERIFY
 extern jl_value_t *lostval;
 void gc_verify(jl_ptls_t ptls);
@@ -646,9 +615,10 @@ extern int gc_verifying;
 #define verify_parent2(ty,obj,slot,arg1,arg2) do {} while (0)
 #define gc_verifying (0)
 #endif
-int gc_slot_to_fieldidx(void *_obj, void *slot);
-int gc_slot_to_arrayidx(void *_obj, void *begin);
-NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset);
+
+int gc_slot_to_fieldidx(void *_obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT;
+int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT;
+NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_markqueue_t *mq, int offset) JL_NOTSAFEPOINT;
 
 #ifdef GC_DEBUG_ENV
 JL_DLLEXPORT extern jl_gc_debug_env_t jl_gc_debug_env;
@@ -709,10 +679,11 @@ void gc_stats_big_obj(void);
 // For debugging
 void gc_count_pool(void);
 
-size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT;
+size_t jl_genericmemory_nbytes(jl_genericmemory_t *a) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_enable_gc_logging(int enable);
-void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint32_t jl_get_num_stack_mappings(void);
+void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
 }
diff --git a/src/gen_sysimg_symtab.jl b/src/gen_sysimg_symtab.jl
index 8f03cc1560767..a91f2f994194c 100644
--- a/src/gen_sysimg_symtab.jl
+++ b/src/gen_sysimg_symtab.jl
@@ -15,12 +15,6 @@ function _eachmethod(f, m::Module, visited, vmt)
             x = getfield(m, nm)
             if isa(x, Module) && !in(x, visited)
                 _eachmethod(f, x, visited, vmt)
-            elseif isa(x, Function)
-                mt = typeof(x).name.mt
-                if !in(mt, vmt)
-                    push!(vmt, mt)
-                    Base.visit(f, mt)
-                end
             elseif isa(x, Type)
                 x = Base.unwrap_unionall(x)
                 if isa(x, DataType) && isdefined(x.name, :mt)
@@ -69,5 +63,5 @@ function outputline(io, name)
     println(io, "jl_symbol(\"", name, "\"),")
 end
 
-open(f->foreach(l->outputline(f,l), take(syms, 100)), "common_symbols1.inc", "w")
-open(f->foreach(l->outputline(f,l), take(drop(syms, 100), 254)), "common_symbols2.inc", "w")
+open(f->foreach(l->outputline(f,l), take(syms, 94)), "common_symbols1.inc", "w")
+open(f->foreach(l->outputline(f,l), take(drop(syms, 94), 254)), "common_symbols2.inc", "w")
diff --git a/src/genericmemory.c b/src/genericmemory.c
new file mode 100644
index 0000000000000..24b2bac6b2ac1
--- /dev/null
+++ b/src/genericmemory.c
@@ -0,0 +1,620 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/*
+  GenericMemory{kind, T} constructors and primitives
+*/
+#include <stdlib.h>
+#include <string.h>
+#ifdef _OS_WINDOWS_
+#include <malloc.h>
+#endif
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void genericmemoryassign_safe(int hasptr, jl_value_t *parent, char *dst, const jl_value_t *src) JL_NOTSAFEPOINT
+{
+    size_t nb = jl_datatype_size(jl_typeof(src)); // make sure to shrink-wrap this copy
+    if (hasptr) {
+        size_t nptr = nb / sizeof(void*);
+        memmove_refs((_Atomic(void*)*)dst, (_Atomic(void*)*)src, nptr);
+        jl_gc_multi_wb(parent, src);
+    }
+    else {
+        // genericmemory can assume more alignment than a field would normally have
+        switch (nb) {
+        case  0: break;
+        case  1: *(uint8_t*)dst  = *(uint8_t*)src;  break;
+        case  2: *(uint16_t*)dst = *(uint16_t*)src; break;
+        case  4: *(uint32_t*)dst = *(uint32_t*)src; break;
+        case  8: *(uint64_t*)dst = *(uint64_t*)src; break;
+        case 16:
+            memcpy(jl_assume_aligned(dst, 16), jl_assume_aligned(src, 16), 16);
+            break;
+        default: memcpy(dst, src, nb);
+        }
+    }
+}
+
+// genericmemory constructors ---------------------------------------------------------
+JL_DLLEXPORT char *jl_genericmemory_typetagdata(jl_genericmemory_t *m) JL_NOTSAFEPOINT
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+    assert(layout->flags.arrayelem_isunion);
+    return (char*)m->ptr + m->length * layout->size;
+}
+
+#if defined(_P64) && defined(UINT128MAX)
+typedef __uint128_t wideint_t;
+#else
+typedef uint64_t wideint_t;
+#endif
+
+#define MAXINTVAL (((size_t)-1)>>1)
+
+jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t isunion, int8_t zeroinit, size_t elsz)
+{
+    jl_task_t *ct = jl_current_task;
+    char *data;
+    jl_genericmemory_t *m;
+    if (nel == 0) // zero-sized allocation optimization
+        return (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance;
+    wideint_t prod = (wideint_t)nel * elsz;
+    if (isunion) {
+        // an extra byte for each isbits union memory element, stored at m->ptr + m->length
+        prod += nel;
+    }
+    if (nel >= MAXINTVAL || prod >= (wideint_t) MAXINTVAL)
+        jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size");
+    size_t tot = (size_t)prod + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT);
+
+    int pooled = tot <= GC_MAX_SZCLASS;
+    if (!pooled) {
+        data = (char*)jl_gc_managed_malloc(prod);
+        tot = sizeof(jl_genericmemory_t) + sizeof(void*);
+    }
+    m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tot, mtype);
+    if (pooled) {
+        data = (char*)m + JL_SMALL_BYTE_ALIGNMENT;
+    }
+    else {
+        int isaligned = 1; // jl_gc_managed_malloc is always aligned
+        jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned);
+        jl_genericmemory_data_owner_field(m) = (jl_value_t*)m;
+    }
+    m->length = nel;
+    m->ptr = data;
+
+    if (zeroinit)
+        memset(data, 0, (size_t)prod);
+    return m;
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory(jl_value_t *mtype, size_t nel)
+{
+    assert(jl_is_datatype(mtype));
+    jl_genericmemory_t *m = (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance;
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
+    if (m == NULL) {
+        if (jl_tparam0((jl_datatype_t*)mtype) != (jl_value_t*)jl_not_atomic_sym)
+            jl_error("GenericMemory kind must be :not_atomic");
+        jl_value_t *addrspace = jl_tparam2((jl_datatype_t*)mtype);
+        if (!jl_is_addrspacecore(addrspace) || jl_unbox_uint8(addrspace) != 0)
+            jl_error("GenericMemory addrspace must be Core.CPU");
+        if (!((jl_datatype_t*)mtype)->has_concrete_subtype || layout == NULL)
+            jl_type_error_rt("GenericMemory", "element type", (jl_value_t*)jl_type_type, jl_tparam1(mtype));
+        abort(); // this is checked already by jl_get_genericmemory_layout
+    }
+    assert(jl_tparam0((jl_datatype_t*)mtype) == (jl_value_t*)jl_not_atomic_sym);
+    assert(((jl_datatype_t*)mtype)->has_concrete_subtype && layout != NULL);
+    if (nel == 0) // zero-sized allocation optimization fast path
+        return m;
+
+    size_t elsz = layout->size;
+    int isboxed = layout->flags.arrayelem_isboxed;
+    int isunion = layout->flags.arrayelem_isunion;
+    int zi = ((jl_datatype_t*)mtype)->zeroinit;
+    if (isboxed)
+        elsz = sizeof(void*);
+    return _new_genericmemory_(mtype, nel, isunion, zi, elsz);
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str)
+{
+    jl_task_t *ct = jl_current_task;
+    int tsz = sizeof(jl_genericmemory_t) + sizeof(void*);
+    jl_genericmemory_t *m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, jl_memory_uint8_type);
+    m->length = jl_string_len(str);
+    m->ptr = jl_string_data(str);
+    jl_genericmemory_data_owner_field(m) = str;
+    return m;
+}
+
+// own_buffer != 0 iff GC should call free() on this pointer eventually
+JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void *data,
+                                                         size_t nel, int own_buffer)
+{
+    jl_task_t *ct = jl_current_task;
+    assert(jl_is_datatype(mtype));
+    jl_genericmemory_t *m = (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance;
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
+    if (m == NULL) {
+        if (jl_tparam0((jl_datatype_t*)mtype) != (jl_value_t*)jl_not_atomic_sym)
+            jl_error("GenericMemory kind must be :not_atomic");
+        jl_value_t *addrspace = jl_tparam2((jl_datatype_t*)mtype);
+        if (!jl_is_addrspacecore(addrspace) || jl_unbox_uint8(addrspace) != 0)
+            jl_error("GenericMemory addrspace must be Core.CPU");
+        if (!((jl_datatype_t*)mtype)->has_concrete_subtype || layout == NULL)
+            jl_type_error_rt("GenericMemory", "element type", (jl_value_t*)jl_type_type, jl_tparam1(mtype));
+        abort();
+    }
+    assert(jl_tparam0((jl_datatype_t*)mtype) == (jl_value_t*)jl_not_atomic_sym);
+    assert(((jl_datatype_t*)mtype)->has_concrete_subtype && layout != NULL);
+    //if (nel == 0) {// zero-sized allocation optimization fast path
+    //    if (own_buffer)
+    //        free(data);
+    //    return m;
+    //}
+
+    size_t elsz = layout->size;
+    size_t align = layout->alignment;
+    int isboxed = layout->flags.arrayelem_isboxed;
+    int isunion = layout->flags.arrayelem_isunion;
+    if (isboxed)
+        elsz = sizeof(void*);
+    if (isunion)
+        jl_exceptionf(jl_argumenterror_type,
+                      "unsafe_wrap: unspecified layout for union element type");
+    if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1))
+        jl_exceptionf(jl_argumenterror_type,
+                      "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
+    wideint_t prod = (wideint_t)nel * elsz;
+    if (isunion) {
+        // an extra byte for each isbits union memory element, stored at m->ptr + m->length
+        prod += nel;
+    }
+    if (nel >= MAXINTVAL || prod >= (wideint_t) MAXINTVAL)
+        jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size");
+    int tsz = sizeof(jl_genericmemory_t) + sizeof(void*);
+    m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, mtype);
+    m->ptr = data;
+    m->length = nel;
+    jl_genericmemory_data_owner_field(m) = NULL;
+    int isaligned = 0;  // TODO: allow passing memalign'd buffers
+    if (own_buffer) {
+        jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned);
+        jl_gc_count_allocd(nel*elsz);
+    }
+    return m;
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_new_genericmemory(jl_value_t *mtype, jl_value_t *nel)
+{
+    return jl_alloc_genericmemory(mtype, jl_unbox_long(nel));
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_pchar_to_genericmemory(const char *str, size_t len)
+{
+    jl_genericmemory_t *m = jl_alloc_genericmemory(jl_memory_uint8_type, len);
+    memcpy(m->ptr, str, len);
+    return m;
+}
+
+JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_t len)
+{
+    assert(len <= m->length);
+    if (len == 0) {
+        // this may seem like purely an optimization (which it also is), but it
+        // also ensures that calling `String(m)` doesn't corrupt a previous
+        // string also created the same way, where `m = StringVector(_)`.
+        return jl_an_empty_string;
+    }
+    int how = jl_genericmemory_how(m);
+    size_t mlength = m->length;
+    m->length = 0;
+    if (how != 0) {
+        jl_value_t *o = jl_genericmemory_data_owner_field(m);
+        jl_genericmemory_data_owner_field(m) = NULL;
+        if (how == 3 &&
+             ((mlength + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (len + sizeof(void*) + 1 <= GC_MAX_SZCLASS))) {
+            if (jl_string_data(o)[len] != '\0')
+                jl_string_data(o)[len] = '\0';
+            if (*(size_t*)o != len)
+                *(size_t*)o = len;
+            return o;
+        }
+        JL_GC_PUSH1(&o);
+        jl_value_t *str = jl_pchar_to_string((const char*)m->ptr, len);
+        JL_GC_POP();
+        return str;
+    }
+    return jl_pchar_to_string((const char*)m->ptr, len);
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_memory_any(size_t n)
+{
+    return jl_alloc_genericmemory(jl_memory_any_type, n);
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_slice(jl_genericmemory_t *mem, void *data, size_t len)
+{
+    // Given a GenericMemoryRef represented as `jl_genericmemory_ref ref = {data, mem}`,
+    // return a new GenericMemory that only accesses the slice from the given GenericMemoryRef to
+    // the given length if this is possible to return. This allows us to make
+    // `length(Array)==length(Array.ref.mem)`, for simplification of this.
+    jl_datatype_t *dt = (jl_datatype_t*)jl_typetagof(mem);
+    const jl_datatype_layout_t *layout = dt->layout;
+    // repeated checks here ensure the values cannot overflow, since we know mem->length is a reasonable value
+    if (len > mem->length)
+        jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // TODO: make a BoundsError
+    if (layout->flags.arrayelem_isunion) {
+        if (!((size_t)data == 0 && mem->length == len))
+            jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // only exact slices are supported
+        data = mem->ptr;
+    }
+    else if (layout->size == 0) {
+        if ((size_t)data > mem->length || (size_t)data + len > mem->length)
+            jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // TODO: make a BoundsError
+        data = mem->ptr;
+    }
+    else {
+        if (data < mem->ptr || (char*)data > (char*)mem->ptr + mem->length * layout->size || (char*)data + len * layout->size > (char*)mem->ptr + mem->length * layout->size)
+            jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // TODO: make a BoundsError
+    }
+    jl_task_t *ct = jl_current_task;
+    jl_genericmemory_t *newmem = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, sizeof(jl_genericmemory_t) + sizeof(void*), dt);
+    newmem->length = len;
+    newmem->ptr = data;
+    jl_genericmemory_data_owner_field(newmem) = jl_genericmemory_owner(mem);
+    return newmem;
+}
+
+JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destdata,
+                                          jl_genericmemory_t *src, char* srcdata,
+                                          size_t n) JL_NOTSAFEPOINT
+{
+    jl_datatype_t *dt = (jl_datatype_t*)jl_typetagof(dest);
+    if (dt != (jl_datatype_t*)jl_typetagof(src))
+        jl_exceptionf(jl_argumenterror_type, "jl_genericmemory_copyto requires source and dest to have same type");
+    const jl_datatype_layout_t *layout = dt->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        _Atomic(void*) * dest_p = (_Atomic(void*)*)destdata;
+        _Atomic(void*) * src_p = (_Atomic(void*)*)srcdata;
+        jl_value_t *owner = jl_genericmemory_owner(dest);
+        if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
+            jl_value_t *src_owner = jl_genericmemory_owner(src);
+            ssize_t done = 0;
+            if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
+                if (dest_p < src_p || dest_p > src_p + n) {
+                    for (; done < n; done++) { // copy forwards
+                        void *val = jl_atomic_load_relaxed(src_p + done);
+                        jl_atomic_store_release(dest_p + done, val);
+                        // `val` is young or old-unmarked
+                        if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
+                            jl_gc_queue_root(owner);
+                            break;
+                        }
+                    }
+                    src_p += done;
+                    dest_p += done;
+                } else {
+                    for (; done < n; done++) { // copy backwards
+                        void *val = jl_atomic_load_relaxed(src_p + n - done - 1);
+                        jl_atomic_store_release(dest_p + n - done - 1, val);
+                        // `val` is young or old-unmarked
+                        if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
+                            jl_gc_queue_root(owner);
+                            break;
+                        }
+                    }
+                }
+                n -= done;
+            }
+        }
+        return memmove_refs(dest_p, src_p, n);
+    }
+    size_t elsz = layout->size;
+    char *src_p = srcdata;
+    int isbitsunion = layout->flags.arrayelem_isunion;
+    if (isbitsunion) {
+        char *sourcetypetagdata = jl_genericmemory_typetagdata(src);
+        char *desttypetagdata = jl_genericmemory_typetagdata(dest);
+        memmove(desttypetagdata+(size_t)destdata, sourcetypetagdata+(size_t)srcdata, n);
+        srcdata = (char*)src->ptr + elsz*(size_t)srcdata;
+        destdata = (char*)dest->ptr + elsz*(size_t)destdata;
+    }
+    if (layout->first_ptr != -1) {
+        memmove_refs((_Atomic(void*)*)destdata, (_Atomic(void*)*)srcdata, n * elsz / sizeof(void*));
+        jl_value_t *owner = jl_genericmemory_owner(dest);
+        if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
+            jl_value_t *src_owner = jl_genericmemory_owner(src);
+            if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
+                dt = (jl_datatype_t*)jl_tparam1(dt);
+                for (size_t done = 0; done < n; done++) { // copy forwards
+                    char* s = (char*)src_p+done*elsz;
+                    if (*((jl_value_t**)s+layout->first_ptr) != NULL)
+                        jl_gc_queue_multiroot(owner, s, dt);
+                }
+            }
+        }
+    }
+    else {
+        memmove(destdata, srcdata, n * elsz);
+    }
+}
+
+
+// genericmemory primitives -----------------------------------------------------------
+
+JL_DLLEXPORT jl_value_t *jl_ptrmemref(jl_genericmemory_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
+{
+    assert(i < m->length);
+    assert(((jl_datatype_t*)jl_typetagof(m))->layout->flags.arrayelem_isboxed);
+    jl_value_t *elt = jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)m->ptr) + i);
+    if (elt == NULL)
+        jl_throw(jl_undefref_exception);
+    return elt;
+}
+
+JL_DLLEXPORT jl_value_t *jl_genericmemoryref(jl_genericmemory_t *m, size_t i)
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+    if (layout->flags.arrayelem_isboxed)
+        return jl_ptrmemref(m, i);
+    assert(i < m->length);
+    jl_value_t *isatomic = jl_tparam0(jl_typetagof(m)); (void)isatomic; // TODO
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m));
+    if (layout->flags.arrayelem_isunion) {
+        // isbits union selector bytes are always stored directly after the last memory element
+        uint8_t sel = jl_genericmemory_typetagdata(m)[i];
+        eltype = jl_nth_union_component(eltype, sel);
+        if (jl_is_datatype_singleton((jl_datatype_t*)eltype))
+            return ((jl_datatype_t*)eltype)->instance;
+    }
+    jl_value_t *r = undefref_check((jl_datatype_t*)eltype, jl_new_bits(eltype, &((char*)m->ptr)[i * layout->size]));
+    if (__unlikely(r == NULL))
+        jl_throw(jl_undefref_exception);
+    return r;
+}
+
+JL_DLLEXPORT int jl_genericmemory_isassigned(jl_genericmemory_t *m, size_t i)
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)m->ptr) + i) != NULL;
+    }
+    else if (layout->first_ptr >= 0) {
+         jl_value_t **elem = (jl_value_t**)((char*)m->ptr + i * layout->size);
+         return elem[layout->first_ptr] != NULL;
+    }
+    return 1;
+}
+
+JL_DLLEXPORT void jl_genericmemoryset(jl_genericmemory_t *m JL_ROOTING_ARGUMENT, jl_value_t *rhs JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, size_t i)
+{
+    assert(i < m->length);
+    jl_value_t *isatomic = jl_tparam0(jl_typetagof(m)); (void)isatomic; // TODO
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m));
+    if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) {
+        JL_GC_PUSH1(&rhs);
+        if (!jl_isa(rhs, eltype))
+            jl_type_error("genericmemoryset", eltype, rhs);
+        JL_GC_POP();
+    }
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        jl_atomic_store_release(((_Atomic(jl_value_t*)*)m->ptr) + i, rhs);
+        jl_gc_wb(jl_genericmemory_owner(m), rhs);
+    }
+    else {
+        int hasptr;
+        if (jl_is_uniontype(eltype)) {
+            uint8_t *psel = &((uint8_t*)jl_genericmemory_typetagdata(m))[i];
+            unsigned nth = 0;
+            if (!jl_find_union_component(eltype, jl_typeof(rhs), &nth))
+                assert(0 && "invalid genericmemoryset to isbits union");
+            *psel = nth;
+            if (jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(rhs)))
+                return;
+            hasptr = 0;
+        }
+        else {
+            hasptr = layout->first_ptr >= 0;
+        }
+        genericmemoryassign_safe(hasptr, jl_genericmemory_owner(m), &((char*)m->ptr)[i * layout->size], rhs);
+    }
+}
+
+JL_DLLEXPORT void jl_genericmemoryunset(jl_genericmemory_t *m, size_t i)
+{
+    if (i >= m->length)
+        jl_bounds_error_int((jl_value_t*)m, i + 1);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+    if (layout->flags.arrayelem_isboxed)
+        jl_atomic_store_relaxed(((_Atomic(jl_value_t*)*)m->ptr) + i, NULL);
+    else if (layout->first_ptr >= 0) {
+        size_t elsize = layout->size;
+        jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
+        memset((char*)m->ptr + elsize * i, 0, elsize);
+    }
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_copy_slice(jl_genericmemory_t *mem, void *data, size_t len)
+{
+    jl_value_t *mtype = (jl_value_t*)jl_typetagof(mem);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
+    size_t elsz = layout->size;
+    int isunion = layout->flags.arrayelem_isunion;
+    jl_genericmemory_t *new_mem = _new_genericmemory_(mtype, len, isunion, 0, elsz);
+    if (isunion) {
+        memcpy(new_mem->ptr, (char*)mem->ptr + (size_t)data * elsz, len * elsz);
+        memcpy(jl_genericmemory_typetagdata(new_mem), jl_genericmemory_typetagdata(mem) + (size_t)data, len);
+    }
+    else if (layout->first_ptr != -1) {
+        memmove_refs((_Atomic(void*)*)new_mem->ptr, (_Atomic(void*)*)data, len * elsz / sizeof(void*));
+    }
+    else if (data != NULL) {
+        memcpy(new_mem->ptr, data, len * elsz);
+    }
+    return new_mem;
+}
+
+JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_copy(jl_genericmemory_t *mem)
+{
+    jl_value_t *mtype = (jl_value_t*)jl_typetagof(mem);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
+    return jl_genericmemory_copy_slice(mem, layout->flags.arrayelem_isunion || layout->size == 0 ? (void*)0 : mem->ptr, mem->length);
+}
+
+JL_DLLEXPORT jl_value_t *(jl_genericmemory_data_owner)(jl_genericmemory_t *m) JL_NOTSAFEPOINT
+{
+    return jl_genericmemory_data_owner_field(m);
+}
+
+jl_genericmemoryref_t *jl_new_memoryref(jl_value_t *typ, jl_genericmemory_t *mem, void *data)
+{
+    jl_task_t *ct = jl_current_task;
+    jl_genericmemoryref_t *m = (jl_genericmemoryref_t*)jl_gc_alloc(ct->ptls, sizeof(jl_genericmemoryref_t), typ);
+    m->mem = mem;
+    m->ptr_or_offset = data;
+    return m;
+}
+
+// memoryref primitives
+JL_DLLEXPORT jl_genericmemoryref_t jl_memoryrefindex(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, size_t idx)
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    if ((layout->flags.arrayelem_isboxed || !layout->flags.arrayelem_isunion) && layout->size != 0) {
+        m.ptr_or_offset = (void*)((char*)m.ptr_or_offset + idx * layout->size);
+        assert((char*)m.ptr_or_offset - (char*)m.mem->ptr < layout->size * m.mem->length);
+    }
+    else {
+        m.ptr_or_offset = (void*)((size_t)m.ptr_or_offset + idx);
+        assert((size_t)m.ptr_or_offset < m.mem->length);
+    }
+    return m;
+}
+
+JL_DLLEXPORT jl_value_t *jl_ptrmemrefget(jl_genericmemoryref_t m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    assert((char*)m.ptr_or_offset - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+    assert(((jl_datatype_t*)jl_typetagof(m.mem))->layout->flags.arrayelem_isboxed);
+    jl_value_t *elt = jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)m.ptr_or_offset);
+    if (elt == NULL)
+        jl_throw(jl_undefref_exception);
+    return elt;
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryrefget(jl_genericmemoryref_t m)
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    if (layout->flags.arrayelem_isboxed)
+        return jl_ptrmemrefget(m);
+    jl_value_t *isatomic = jl_tparam0(jl_typetagof(m.mem)); (void)isatomic; // TODO
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    char *data = (char*)m.ptr_or_offset;
+    if (layout->flags.arrayelem_isunion) {
+        assert(jl_is_uniontype(eltype));
+        size_t i = (size_t)data;
+        assert(i < m.mem->length);
+        // isbits union selector bytes are always stored directly after the last memory element
+        uint8_t sel = jl_genericmemory_typetagdata(m.mem)[i];
+        eltype = jl_nth_union_component(eltype, sel);
+        data = (char*)m.mem->ptr + i * layout->size;
+    }
+    if (layout->size == 0) {
+        assert(jl_is_datatype_singleton((jl_datatype_t*)eltype));
+        return ((jl_datatype_t*)eltype)->instance;
+    }
+    assert(data - (char*)m.mem->ptr < layout->size * m.mem->length);
+    jl_value_t *r = undefref_check((jl_datatype_t*)eltype, jl_new_bits(eltype, data));
+    if (__unlikely(r == NULL))
+        jl_throw(jl_undefref_exception);
+    return r;
+}
+
+static int _jl_memoryref_isassigned(jl_genericmemoryref_t m)
+{
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)m.ptr_or_offset) != NULL;
+    }
+    else if (layout->first_ptr >= 0) {
+         jl_value_t **elem = (jl_value_t**)m.ptr_or_offset;
+         return elem[layout->first_ptr] != NULL;
+    }
+    return 1;
+}
+
+JL_DLLEXPORT jl_value_t *jl_memoryref_isassigned(jl_genericmemoryref_t m)
+{
+    return _jl_memoryref_isassigned(m) ? jl_true : jl_false;
+}
+
+JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, jl_value_t *rhs JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED)
+{
+    jl_value_t *isatomic = jl_tparam0(jl_typetagof(m.mem)); (void)isatomic; // TODO
+    jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem));
+    if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) {
+        JL_GC_PUSH1(&rhs);
+        if (!jl_isa(rhs, eltype))
+            jl_type_error("memoryrefset!", eltype, rhs);
+        JL_GC_POP();
+    }
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        assert((char*)m.ptr_or_offset - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length);
+        jl_atomic_store_release((_Atomic(jl_value_t*)*)m.ptr_or_offset, rhs);
+        jl_gc_wb(jl_genericmemory_owner(m.mem), rhs);
+    }
+    else {
+        int hasptr;
+        char *data = (char*)m.ptr_or_offset;
+        if (layout->flags.arrayelem_isunion) {
+            assert(jl_is_uniontype(eltype));
+            size_t i = (size_t)data;
+            assert(i < m.mem->length);
+            uint8_t *psel = (uint8_t*)jl_genericmemory_typetagdata(m.mem) + i;
+            unsigned nth = 0;
+            if (!jl_find_union_component(eltype, jl_typeof(rhs), &nth))
+                assert(0 && "invalid genericmemoryset to isbits union");
+            *psel = nth;
+            hasptr = 0;
+            data = (char*)m.mem->ptr + i * layout->size;
+        }
+        else {
+            hasptr = layout->first_ptr >= 0;
+        }
+        if (layout->size != 0) {
+            assert(data - (char*)m.mem->ptr < layout->size * m.mem->length);
+            genericmemoryassign_safe(hasptr, jl_genericmemory_owner(m.mem), data, rhs);
+        }
+    }
+}
+
+JL_DLLEXPORT void jl_memoryrefunset(jl_genericmemoryref_t m)
+{
+    if (m.mem->length == 0)
+        jl_bounds_error_int((jl_value_t*)m.mem, 1);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        jl_atomic_store_relaxed((_Atomic(jl_value_t*)*)m.ptr_or_offset, NULL);
+    }
+    else if (layout->first_ptr >= 0) {
+        size_t elsize = layout->size;
+        jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
+        memset(m.ptr_or_offset, 0, elsize);
+    }
+}
+
+JL_DLLEXPORT jl_value_t *ijl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    return jl_genericmemory_owner(m);
+}
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/gf.c b/src/gf.c
index 1d36589a082f5..268927f60172e 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -27,6 +27,9 @@ extern "C" {
 JL_DLLEXPORT _Atomic(size_t) jl_world_counter = 1; // uses atomic acquire/release
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT
 {
+    jl_task_t *ct = jl_current_task;
+    if (ct->ptls->in_pure_callback)
+        return ~(size_t)0;
     return jl_atomic_load_acquire(&jl_world_counter);
 }
 
@@ -35,6 +38,36 @@ JL_DLLEXPORT size_t jl_get_tls_world_age(void) JL_NOTSAFEPOINT
     return jl_current_task->world_age;
 }
 
+// Compute the maximum number of times to unroll Varargs{T}, based on
+// m->max_varargs (if specified) or a heuristic based on the maximum
+// number of non-varargs arguments in the provided method table.
+//
+// If provided, `may_increase` is set to 1 if the returned value is
+// heuristic-based and has a chance of increasing in the future.
+static size_t get_max_varargs(
+        jl_method_t *m,
+        jl_methtable_t *kwmt,
+        jl_methtable_t *mt,
+        uint8_t *may_increase) JL_NOTSAFEPOINT
+{
+    size_t max_varargs = 1;
+    if (may_increase != NULL)
+        *may_increase = 0;
+
+    if (m->max_varargs != UINT8_MAX)
+        max_varargs = m->max_varargs;
+    else if (kwmt != NULL && kwmt != jl_type_type_mt && kwmt != jl_nonfunction_mt && kwmt != jl_kwcall_mt) {
+        if (may_increase != NULL)
+            *may_increase = 1; // `max_args` can increase as new methods are inserted
+
+        max_varargs = jl_atomic_load_relaxed(&kwmt->max_args) + 2;
+        if (mt == jl_kwcall_mt)
+            max_varargs += 2;
+        max_varargs -= m->nargs;
+    }
+    return max_varargs;
+}
+
 /// ----- Handling for Julia callbacks ----- ///
 
 JL_DLLEXPORT int8_t jl_is_in_pure_context(void)
@@ -77,7 +110,7 @@ static int8_t jl_cachearg_offset(jl_methtable_t *mt)
 /// ----- Insertion logic for special entries ----- ///
 
 
-static uint_t speccache_hash(size_t idx, jl_svec_t *data)
+static uint_t speccache_hash(size_t idx, jl_value_t *data)
 {
     jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx);
     jl_value_t *sig = ml->specTypes;
@@ -86,7 +119,7 @@ static uint_t speccache_hash(size_t idx, jl_svec_t *data)
     return ((jl_datatype_t*)sig)->hash;
 }
 
-static int speccache_eq(size_t idx, const void *ty, jl_svec_t *data, uint_t hv)
+static int speccache_eq(size_t idx, const void *ty, jl_value_t *data, uint_t hv)
 {
     jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx);
     jl_value_t *sig = ml->specTypes;
@@ -101,17 +134,37 @@ static int speccache_eq(size_t idx, const void *ty, jl_svec_t *data, uint_t hv)
 // get or create the MethodInstance for a specialization
 static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams, jl_method_instance_t *mi_insert)
 {
-    if (m->sig == (jl_value_t*)jl_anytuple_type && jl_atomic_load_relaxed(&m->unspecialized) != NULL)
+    if (m->sig == (jl_value_t*)jl_anytuple_type && jl_atomic_load_relaxed(&m->unspecialized) != NULL && m != jl_opaque_closure_method)
         return jl_atomic_load_relaxed(&m->unspecialized); // handle builtin methods
     jl_value_t *ut = jl_is_unionall(type) ? jl_unwrap_unionall(type) : type;
     JL_TYPECHK(specializations, datatype, ut);
     uint_t hv = ((jl_datatype_t*)ut)->hash;
-    for (int locked = 0; ; locked++) {
-        jl_array_t *speckeyset = jl_atomic_load_acquire(&m->speckeyset);
-        jl_svec_t *specializations = jl_atomic_load_relaxed(&m->specializations);
-        size_t i = -1, cl = jl_svec_len(specializations);
+    jl_genericmemory_t *speckeyset = NULL;
+    jl_value_t *specializations = NULL;
+    size_t i = -1, cl = 0, lastcl;
+    for (int locked = 0; locked < 2; locked++) {
+        if (locked) {
+            if (!sparams) // can't insert without knowing this
+                return NULL;
+            JL_LOCK(&m->writelock);
+        }
+        lastcl = cl;
+        speckeyset = jl_atomic_load_acquire(&m->speckeyset);
+        specializations = jl_atomic_load_relaxed(&m->specializations);
+        if (specializations == (jl_value_t*)jl_emptysvec)
+            continue;
+        if (!jl_is_svec(specializations)) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
+            if (jl_types_equal(mi->specTypes, type)) {
+                if (locked)
+                    JL_UNLOCK(&m->writelock);
+                return mi;
+            }
+            continue;
+        }
+        cl = jl_svec_len(specializations);
         if (hv) {
-            ssize_t idx = jl_smallintset_lookup(speckeyset, speccache_eq, type, specializations, hv);
+            ssize_t idx = jl_smallintset_lookup(speckeyset, speccache_eq, type, specializations, hv, 0);
             if (idx != -1) {
                 jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, idx);
                 if (locked)
@@ -122,8 +175,9 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
         else {
             _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
             JL_GC_PUSH1(&specializations); // clang-sa doesn't realize this loop uses specializations
-            for (i = cl; i > 0; i--) {
-                jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i - 1]);
+            // the last lastcl-i-1 elements are already checked when locked, so start search with the new elements only
+            for (i += cl - lastcl; i > 0; i--) {
+                jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
                 if ((jl_value_t*)mi == jl_nothing)
                     break;
                 if (jl_types_equal(mi->specTypes, type)) {
@@ -133,55 +187,66 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
                     return mi;
                 }
             }
+            // i points to the first unchecked element, or the place to insert
             JL_GC_POP();
         }
-        if (!sparams) // can't insert without knowing this
-            return NULL;
-        if (!locked) {
-            JL_LOCK(&m->writelock);
+    }
+    jl_method_instance_t *mi = mi_insert ? mi_insert : jl_get_specialized(m, type, sparams);
+    if (specializations == (jl_value_t*)jl_emptysvec) {
+        jl_atomic_store_release(&m->specializations, (jl_value_t*)mi);
+        jl_gc_wb(m, mi);
+    }
+    else {
+        JL_GC_PUSH1(&mi);
+        if (!jl_is_svec(specializations)) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
+            jl_value_t *type = mi->specTypes;
+            jl_value_t *ut = jl_is_unionall(type) ? jl_unwrap_unionall(type) : type;
+            uint_t hv = ((jl_datatype_t*)ut)->hash;
+            cl = 7;
+            i = cl - 1;
+            specializations = (jl_value_t*)jl_svec_fill(cl, jl_nothing);
+            jl_svecset(specializations, hv ? 0 : i--, mi);
+            jl_atomic_store_release(&m->specializations, specializations);
+            jl_gc_wb(m, specializations);
+            if (hv)
+                jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, 0, specializations);
         }
-        else {
-            if (hv) {
-                _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
-                for (i = 0; i < cl; i++) {
-                    jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
-                    if ((jl_value_t*)mi == jl_nothing)
-                        break;
-                    assert(!jl_types_equal(mi->specTypes, type));
-                }
-            }
-            jl_method_instance_t *mi = mi_insert ? mi_insert : jl_get_specialized(m, type, sparams);
-            JL_GC_PUSH1(&mi);
-            if (hv ? (i + 1 >= cl || jl_svecref(specializations, i + 1) != jl_nothing) : (i <= 1 || jl_svecref(specializations, i - 2) != jl_nothing)) {
-                size_t ncl = cl < 8 ? 8 : (cl*3)>>1;
-                jl_svec_t *nc = jl_alloc_svec_uninit(ncl);
-                if (i > 0)
-                    memcpy((char*)jl_svec_data(nc), jl_svec_data(specializations), sizeof(void*) * i);
-                for (int j = 0; j < ncl - cl; j++)
-                    jl_svecset(nc, j+i, jl_nothing);
-                if (i < cl)
-                    memcpy((char*)jl_svec_data(nc) + sizeof(void*) * (i + ncl - cl),
-                           (char*)jl_svec_data(specializations) + sizeof(void*) * i,
-                           sizeof(void*) * (cl - i));
-                jl_atomic_store_release(&m->specializations, nc);
-                jl_gc_wb(m, nc);
-                specializations = nc;
-                if (!hv)
-                    i += ncl - cl;
+        if (hv) {
+            _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
+            for (i = 0; i < cl; i++) {
+                jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
+                if ((jl_value_t*)mi == jl_nothing)
+                    break;
+                assert(!jl_types_equal(mi->specTypes, type));
             }
+            // i points at the place to insert
+        }
+        if (hv ? (i + 1 >= cl || jl_svecref(specializations, i + 1) != jl_nothing) : (i <= 1 || jl_svecref(specializations, i - 2) != jl_nothing)) {
+            size_t ncl = cl < 7 ? 7 : (cl*3)>>1;
+            jl_svec_t *nc = jl_alloc_svec_uninit(ncl);
+            if (i > 0)
+                memcpy((char*)jl_svec_data(nc), jl_svec_data(specializations), sizeof(void*) * i);
+            for (int j = 0; j < ncl - cl; j++)
+                jl_svecset(nc, j+i, jl_nothing);
+            if (i < cl)
+                memcpy((char*)jl_svec_data(nc) + sizeof(void*) * (i + ncl - cl),
+                       (char*)jl_svec_data(specializations) + sizeof(void*) * i,
+                       sizeof(void*) * (cl - i));
+            specializations = (jl_value_t*)nc;
+            jl_atomic_store_release(&m->specializations, specializations);
+            jl_gc_wb(m, specializations);
             if (!hv)
-                i -= 1;
-            assert(jl_svecref(specializations, i) == jl_nothing);
-            jl_svecset(specializations, i, mi); // jl_atomic_store_release?
-            if (hv) {
-                // TODO: fuse lookup and insert steps?
-                jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, i, specializations);
-            }
-            JL_UNLOCK(&m->writelock);
-            JL_GC_POP();
-            return mi;
+                i += ncl - cl;
         }
+        assert(jl_svecref(specializations, i) == jl_nothing);
+        jl_svecset(specializations, i, mi);
+        if (hv)
+            jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, i, specializations);
+        JL_GC_POP();
     }
+    JL_UNLOCK(&m->writelock); // may gc
+    return mi;
 }
 
 JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams)
@@ -207,8 +272,11 @@ JL_DLLEXPORT jl_value_t *jl_specializations_lookup(jl_method_t *m, jl_value_t *t
 
 JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *type, size_t world)
 {
+    // TODO: this is sort of an odd lookup strategy (and the only user of
+    // jl_typemap_assoc_by_type with subtype=0), while normally jl_gf_invoke_lookup would be
+    // expected to be used instead
     struct jl_typemap_assoc search = {type, world, NULL, 0, ~(size_t)0};
-    jl_typemap_entry_t *sf = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->defs), &search, /*offs*/0, /*subtype*/0);
+    jl_typemap_entry_t *sf = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->defs), &search, jl_cachearg_offset(mt), /*subtype*/0);
     if (!sf)
         return jl_nothing;
     return sf->func.value;
@@ -216,15 +284,6 @@ JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *typ
 
 // ----- MethodInstance specialization instantiation ----- //
 
-JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
-        jl_method_instance_t *mi, jl_value_t *rettype,
-        jl_value_t *inferred_const, jl_value_t *inferred,
-        int32_t const_flags, size_t min_world, size_t max_world,
-        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
-        uint8_t relocatability);
-JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
-                                     jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
-
 jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_args_t fptr) JL_GC_DISABLED
 {
     jl_sym_t *sname = jl_symbol(name);
@@ -250,14 +309,14 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
 
     newentry = jl_typemap_alloc(jl_anytuple_type, NULL, jl_emptysvec,
             (jl_value_t*)m, 1, ~(size_t)0);
-    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, 0);
+    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, jl_cachearg_offset(mt));
 
     jl_method_instance_t *mi = jl_get_specialized(m, (jl_value_t*)jl_anytuple_type, jl_emptysvec);
     jl_atomic_store_relaxed(&m->unspecialized, mi);
     jl_gc_wb(m, mi);
 
     jl_code_instance_t *codeinst = jl_new_codeinst(mi,
-        (jl_value_t*)jl_any_type, jl_nothing, jl_nothing,
+        (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, jl_nothing, jl_nothing,
         0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
     jl_mi_cache_insert(mi, codeinst);
     jl_atomic_store_relaxed(&codeinst->specptr.fptr1, fptr);
@@ -278,25 +337,30 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
 // if inference doesn't occur (or can't finish), returns NULL instead
 jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
 {
-    JL_TIMING(INFERENCE);
     if (jl_typeinf_func == NULL)
         return NULL;
-    if (jl_is_method(mi->def.method) && jl_atomic_load_relaxed(&mi->def.method->unspecialized) == mi)
-        return NULL; // avoid inferring the unspecialized method
-    static int in_inference;
-    if (in_inference > 2)
+    jl_task_t *ct = jl_current_task;
+    if (ct->reentrant_timing & 0b1000) {
+        // We must avoid attempting to re-enter inference here
+        assert(0 && "attempted to enter inference while writing out image");
+        abort();
+    }
+    // In case we use higher bits later, mask them out
+    if ((ct->reentrant_timing & 0b1111) >= 0b110)
         return NULL;
 
     jl_code_info_t *src = NULL;
 #ifdef ENABLE_INFERENCE
     if (mi->inInference && !force)
         return NULL;
-
+    JL_TIMING(INFERENCE, INFERENCE);
     jl_value_t **fargs;
     JL_GC_PUSHARGS(fargs, 3);
     fargs[0] = (jl_value_t*)jl_typeinf_func;
     fargs[1] = (jl_value_t*)mi;
     fargs[2] = jl_box_ulong(world);
+
+    jl_timing_show_method_instance(mi, JL_TIMING_DEFAULT_BLOCK);
 #ifdef TRACE_INFERENCE
     if (mi->specTypes != (jl_value_t*)jl_emptytuple_type) {
         jl_printf(JL_STDERR,"inference on ");
@@ -304,7 +368,6 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
         jl_printf(JL_STDERR, "\n");
     }
 #endif
-    jl_task_t *ct = jl_current_task;
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
@@ -312,7 +375,14 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
     size_t last_age = ct->world_age;
     ct->world_age = jl_typeinf_world;
     mi->inInference = 1;
-    in_inference++;
+    // first bit is for reentrant timing,
+    // so adding 1 to the bit above performs
+    // inference reentrancy counter addition.
+    // Note that this is only safe because
+    // the counter varies from 0-3; if we
+    // increase that limit, we'll need to
+    // allocate another bit for the counter.
+    ct->reentrant_timing += 0b10;
     JL_TRY {
         src = (jl_code_info_t*)jl_apply(fargs, 3);
     }
@@ -333,7 +403,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
         src = NULL;
     }
     ct->world_age = last_age;
-    in_inference--;
+    ct->reentrant_timing -= 0b10;
     mi->inInference = 0;
 #ifdef _OS_WINDOWS_
     SetLastError(last_error);
@@ -345,6 +415,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
     }
     JL_GC_POP();
 #endif
+
     return src;
 }
 
@@ -363,14 +434,15 @@ JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_method_instance_t *mi, size_t mi
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
     while (codeinst) {
         if (codeinst->min_world <= min_world && max_world <= codeinst->max_world) {
-            jl_value_t *code = codeinst->inferred;
-            if (code && (code == jl_nothing || jl_ir_flag_inferred((jl_array_t*)code)))
+            jl_value_t *code = jl_atomic_load_relaxed(&codeinst->inferred);
+            if (code && (code == jl_nothing || jl_ir_flag_inferred(code)))
                 return (jl_value_t*)codeinst;
         }
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
     return (jl_value_t*)jl_nothing;
 }
+JL_DLLEXPORT jl_value_t *(*const jl_rettype_inferred_addr)(jl_method_instance_t *mi, size_t min_world, size_t max_world) JL_NOTSAFEPOINT = jl_rettype_inferred;
 
 
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
@@ -387,17 +459,27 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
     codeinst = jl_new_codeinst(
-        mi, rettype, NULL, NULL,
+        mi, rettype, (jl_value_t*)jl_any_type, NULL, NULL,
         0, min_world, max_world, 0, 0, jl_nothing, 0);
     jl_mi_cache_insert(mi, codeinst);
     return codeinst;
 }
 
+JL_DLLEXPORT jl_code_instance_t *jl_get_codeinst_for_src(
+        jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_code_info_t *src)
+{
+    // TODO: copy backedges from src to mi
+    size_t max_world = src->max_world;
+    if (max_world >= jl_atomic_load_acquire(&jl_world_counter))
+        max_world = ~(size_t)0;
+    return jl_get_method_inferred(mi, src->rettype, src->min_world, max_world);
+}
+
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
-        jl_method_instance_t *mi, jl_value_t *rettype,
+        jl_method_instance_t *mi, jl_value_t *rettype, jl_value_t *exctype,
         jl_value_t *inferred_const, jl_value_t *inferred,
         int32_t const_flags, size_t min_world, size_t max_world,
-        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
+        uint32_t ipo_effects, uint32_t effects, jl_value_t *analysis_results,
         uint8_t relocatability
         /*, jl_array_t *edges, int absolute_max*/)
 {
@@ -409,23 +491,24 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
     codeinst->min_world = min_world;
     codeinst->max_world = max_world;
     codeinst->rettype = rettype;
-    codeinst->inferred = inferred;
+    codeinst->exctype = exctype;
+    jl_atomic_store_release(&codeinst->inferred, inferred);
     //codeinst->edges = NULL;
     if ((const_flags & 2) == 0)
         inferred_const = NULL;
     codeinst->rettype_const = inferred_const;
-    jl_atomic_store_relaxed(&codeinst->invoke, NULL);
     jl_atomic_store_relaxed(&codeinst->specptr.fptr, NULL);
+    jl_atomic_store_relaxed(&codeinst->invoke, NULL);
     if ((const_flags & 1) != 0) {
         assert(const_flags & 2);
         jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_const_return);
     }
-    codeinst->isspecsig = 0;
+    jl_atomic_store_relaxed(&codeinst->specsigflags, 0);
     jl_atomic_store_relaxed(&codeinst->precompile, 0);
     jl_atomic_store_relaxed(&codeinst->next, NULL);
     codeinst->ipo_purity_bits = ipo_effects;
-    codeinst->purity_bits = effects;
-    codeinst->argescapes = argescapes;
+    jl_atomic_store_relaxed(&codeinst->purity_bits, effects);
+    codeinst->analysis_results = analysis_results;
     codeinst->relocatability = relocatability;
     return codeinst;
 }
@@ -438,7 +521,8 @@ JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMEN
         JL_LOCK(&mi->def.method->writelock);
     jl_code_instance_t *oldci = jl_atomic_load_relaxed(&mi->cache);
     jl_atomic_store_relaxed(&ci->next, oldci);
-    jl_gc_wb(ci, oldci); // likely older, but just being careful
+    if (oldci)
+        jl_gc_wb(ci, oldci);
     jl_atomic_store_release(&mi->cache, ci);
     jl_gc_wb(mi, ci);
     if (jl_is_method(mi->def.method))
@@ -449,9 +533,19 @@ JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMEN
 
 static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
 {
-    jl_svec_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations);
-    size_t i, l = jl_svec_len(specializations);
     size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    jl_value_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations);
+    if (specializations == (jl_value_t*)jl_emptysvec)
+        return 1;
+    if (!jl_is_svec(specializations)) {
+        jl_method_instance_t *mi = (jl_method_instance_t*)specializations;
+        assert(jl_is_method_instance(mi));
+        if (jl_rettype_inferred(mi, world, world) == jl_nothing)
+            jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
+        return 1;
+    }
+    size_t i, l = jl_svec_len(specializations);
+    JL_GC_PUSH1(&specializations);
     for (i = 0; i < l; i++) {
         jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
         if ((jl_value_t*)mi != jl_nothing) {
@@ -460,46 +554,57 @@ static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
                 jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
         }
     }
+    JL_GC_POP();
     return 1;
 }
 
-static int foreach_mtable_in_module(
+int foreach_mtable_in_module(
         jl_module_t *m,
         int (*visit)(jl_methtable_t *mt, void *env),
         void *env)
 {
-    size_t i;
-    void **table = m->bindings.table;
-    for (i = 1; i < m->bindings.size; i += 2) {
-        if (table[i] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
-            JL_GC_PROMISE_ROOTED(b);
-            if (b->owner == m && b->constp) {
-                jl_value_t *v = jl_atomic_load_relaxed(&b->value);
-                if (v) {
-                    jl_value_t *uw = jl_unwrap_unionall(v);
-                    if (jl_is_datatype(uw)) {
-                        jl_typename_t *tn = ((jl_datatype_t*)uw)->name;
-                        if (tn->module == m && tn->name == b->name && tn->wrapper == v) {
-                            // this is the original/primary binding for the type (name/wrapper)
-                            jl_methtable_t *mt = tn->mt;
-                            if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt && mt != jl_nonfunction_mt) {
-                                if (!visit(mt, env))
-                                    return 0;
-                            }
-                        }
-                    }
-                    else if (jl_is_module(v)) {
-                        jl_module_t *child = (jl_module_t*)v;
-                        if (child != m && child->parent == m && child->name == b->name) {
-                            // this is the original/primary binding for the submodule
-                            if (!foreach_mtable_in_module(child, visit, env))
+    jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
+    for (size_t i = 0; i < jl_svec_len(table); i++) {
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+        if ((void*)b == jl_nothing)
+            break;
+        jl_sym_t *name = b->globalref->name;
+        if (jl_atomic_load_relaxed(&b->owner) == b && b->constp) {
+            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+            if (v) {
+                jl_value_t *uw = jl_unwrap_unionall(v);
+                if (jl_is_datatype(uw)) {
+                    jl_typename_t *tn = ((jl_datatype_t*)uw)->name;
+                    if (tn->module == m && tn->name == name && tn->wrapper == v) {
+                        // this is the original/primary binding for the type (name/wrapper)
+                        jl_methtable_t *mt = tn->mt;
+                        if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt && mt != jl_nonfunction_mt) {
+                            assert(mt->module == m);
+                            if (!visit(mt, env))
                                 return 0;
                         }
                     }
                 }
+                else if (jl_is_module(v)) {
+                    jl_module_t *child = (jl_module_t*)v;
+                    if (child != m && child->parent == m && child->name == name) {
+                        // this is the original/primary binding for the submodule
+                        if (!foreach_mtable_in_module(child, visit, env))
+                            return 0;
+                    }
+                }
+                else if (jl_is_mtable(v)) {
+                    jl_methtable_t *mt = (jl_methtable_t*)v;
+                    if (mt->module == m && mt->name == name) {
+                        // this is probably an external method table here, so let's
+                        // assume so as there is no way to precisely distinguish them
+                        if (!visit(mt, env))
+                            return 0;
+                    }
+                }
             }
         }
+        table = jl_atomic_load_relaxed(&m->bindings);
     }
     return 1;
 }
@@ -514,7 +619,7 @@ int jl_foreach_reachable_mtable(int (*visit)(jl_methtable_t *mt, void *env), voi
     if (mod_array) {
         JL_GC_PUSH1(&mod_array);
         int i;
-        for (i = 0; i < jl_array_len(mod_array); i++) {
+        for (i = 0; i < jl_array_nrows(mod_array); i++) {
             jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i);
             assert(jl_is_module(m));
             if (m->parent == m) // some toplevel modules (really just Base) aren't actually
@@ -539,7 +644,7 @@ static int reset_mt_caches(jl_methtable_t *mt, void *env)
     // removes all method caches
     // this might not be entirely safe (GC or MT), thus we only do it very early in bootstrapping
     if (!mt->frozen) { // make sure not to reset builtin functions
-        jl_atomic_store_release(&mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
+        jl_atomic_store_release(&mt->leafcache, (jl_genericmemory_t*)jl_an_empty_memory_any);
         jl_atomic_store_release(&mt->cache, jl_nothing);
     }
     jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), get_method_unspec_list, env);
@@ -547,7 +652,7 @@ static int reset_mt_caches(jl_methtable_t *mt, void *env)
 }
 
 
-jl_function_t *jl_typeinf_func = NULL;
+jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED = NULL;
 JL_DLLEXPORT size_t jl_typeinf_world = 1;
 
 JL_DLLEXPORT void jl_set_typeinf_func(jl_value_t *f)
@@ -563,7 +668,7 @@ JL_DLLEXPORT void jl_set_typeinf_func(jl_value_t *f)
         JL_GC_PUSH1(&unspec);
         jl_foreach_reachable_mtable(reset_mt_caches, (void*)unspec);
         size_t i, l;
-        for (i = 0, l = jl_array_len(unspec); i < l; i++) {
+        for (i = 0, l = jl_array_nrows(unspec); i < l; i++) {
             jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(unspec, i);
             if (jl_rettype_inferred(mi, world, world) == jl_nothing)
                 jl_type_infer(mi, world, 1);
@@ -581,15 +686,12 @@ jl_value_t *jl_nth_slot_type(jl_value_t *sig, size_t i) JL_NOTSAFEPOINT
 {
     sig = jl_unwrap_unionall(sig);
     size_t len = jl_nparams(sig);
-    if (len == 0)
-        return NULL;
     if (i < len-1)
         return jl_tparam(sig, i);
-    if (jl_is_vararg(jl_tparam(sig, len-1)))
-        return jl_unwrap_vararg(jl_tparam(sig, len-1));
-    if (i == len-1)
-        return jl_tparam(sig, i);
-    return NULL;
+    jl_value_t *p = jl_tparam(sig, len-1);
+    if (jl_is_vararg(p))
+        p = jl_unwrap_vararg(p);
+    return p;
 }
 
 // if concrete_match returns false, the sig may specify `Type{T::DataType}`, while the `tt` contained DataType
@@ -610,6 +712,45 @@ jl_value_t *jl_nth_slot_type(jl_value_t *sig, size_t i) JL_NOTSAFEPOINT
 //    return 1;
 //}
 
+static jl_value_t *inst_varargp_in_env(jl_value_t *decl, jl_svec_t *sparams)
+{
+    jl_value_t *unw = jl_unwrap_unionall(decl);
+    jl_value_t *vm = jl_tparam(unw, jl_nparams(unw) - 1);
+    assert(jl_is_vararg(vm));
+    int nsp = jl_svec_len(sparams);
+    if (nsp > 0 && jl_has_free_typevars(vm)) {
+        JL_GC_PUSH1(&vm);
+        assert(jl_subtype_env_size(decl) == nsp);
+        vm = jl_instantiate_type_in_env(vm, (jl_unionall_t*)decl, jl_svec_data(sparams));
+        assert(jl_is_vararg(vm));
+        // rewrap_unionall(lastdeclt, sparams) if any sparams isa TypeVar
+        // for example, `Tuple{Vararg{Union{Nothing,Int,Val{T}}}} where T`
+        // and the user called it with `Tuple{Vararg{Union{Nothing,Int},N}}`, then T is unbound
+        jl_value_t **sp = jl_svec_data(sparams);
+        while (jl_is_unionall(decl)) {
+            jl_tvar_t *v = (jl_tvar_t*)*sp;
+            if (jl_is_typevar(v)) {
+                // must unwrap and re-wrap Vararg object explicitly here since jl_type_unionall handles it differently
+                jl_value_t *T = ((jl_vararg_t*)vm)->T;
+                jl_value_t *N = ((jl_vararg_t*)vm)->N;
+                int T_has_tv = T && jl_has_typevar(T, v);
+                int N_has_tv = N && jl_has_typevar(N, v); // n.b. JL_VARARG_UNBOUND check means this should be false
+                assert(!N_has_tv || N == (jl_value_t*)v);
+                vm = T_has_tv ? jl_type_unionall(v, T) : T;
+                if (N_has_tv)
+                    N = NULL;
+                vm = (jl_value_t*)jl_wrap_vararg(vm, N, 1); // this cannot throw for these inputs
+            }
+            sp++;
+            decl = ((jl_unionall_t*)decl)->body;
+            nsp--;
+        }
+        assert(nsp == 0);
+        JL_GC_POP();
+    }
+    return vm;
+}
+
 static jl_value_t *ml_matches(jl_methtable_t *mt,
                               jl_tupletype_t *type, int lim, int include_ambiguous,
                               int intersections, size_t world, int cache_result,
@@ -617,37 +758,73 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
 
 // get the compilation signature specialization for this method
 static void jl_compilation_sig(
-    jl_tupletype_t *const tt, // the original tupletype of the call : this is expected to be a relative simple type (no Varags, Union, UnionAll, etc.)
+    jl_tupletype_t *const tt, // the original tupletype of the call (or DataType from precompile)
     jl_svec_t *sparams,
     jl_method_t *definition,
-    intptr_t nspec,
+    intptr_t max_varargs,
     // output:
     jl_svec_t **const newparams JL_REQUIRE_ROOTED_SLOT)
 {
+    assert(jl_is_tuple_type(tt));
+    jl_value_t *decl = definition->sig;
+    size_t nargs = definition->nargs; // == jl_nparams(jl_unwrap_unionall(decl));
+    size_t nspec = max_varargs + nargs;
+
     if (definition->generator) {
         // staged functions aren't optimized
         // so assume the caller was intelligent about calling us
         return;
     }
-    if (definition->sig == (jl_value_t*)jl_anytuple_type && jl_atomic_load_relaxed(&definition->unspecialized)) {
+
+    if (decl == (jl_value_t*)jl_anytuple_type && jl_atomic_load_relaxed(&definition->unspecialized)) {
         *newparams = jl_anytuple_type->parameters; // handle builtin methods
         return;
     }
 
-    jl_value_t *decl = definition->sig;
-    assert(jl_is_tuple_type(tt));
+    // some early sanity checks
     size_t i, np = jl_nparams(tt);
-    size_t nargs = definition->nargs; // == jl_nparams(jl_unwrap_unionall(decl));
+    switch (jl_va_tuple_kind((jl_datatype_t*)decl)) {
+    case JL_VARARG_NONE:
+        if (jl_is_va_tuple(tt))
+            // odd
+            return;
+        if (np != nargs)
+            // there are not enough input parameters to make this into a compilation sig
+            return;
+        break;
+    case JL_VARARG_INT:
+    case JL_VARARG_BOUND:
+        if (jl_is_va_tuple(tt))
+            // the length needed is not known, but required for compilation
+            return;
+        if (np < nargs - 1)
+            // there are not enough input parameters to make this into a compilation sig
+            return;
+        break;
+    case JL_VARARG_UNBOUND:
+        if (np < nspec && jl_is_va_tuple(tt))
+            // there are insufficient given parameters for jl_isa_compileable_sig now to like this type
+            // (there were probably fewer methods defined when we first selected this signature, or
+            //  the max varargs limit was not reached indicating the type is already fully-specialized)
+            return;
+        break;
+    }
+
+    jl_value_t *type_i = NULL;
+    JL_GC_PUSH1(&type_i);
     for (i = 0; i < np; i++) {
         jl_value_t *elt = jl_tparam(tt, i);
+        if (jl_is_vararg(elt))
+            elt = jl_unwrap_vararg(elt);
         jl_value_t *decl_i = jl_nth_slot_type(decl, i);
+        type_i = jl_rewrap_unionall(decl_i, decl);
         size_t i_arg = (i < nargs - 1 ? i : nargs - 1);
 
-        if (jl_is_kind(decl_i)) {
+        if (jl_is_kind(type_i)) {
             // if we can prove the match was against the kind (not a Type)
             // we want to put that in the cache instead
             if (!*newparams) *newparams = jl_svec_copy(tt->parameters);
-            elt = decl_i;
+            elt = type_i;
             jl_svecset(*newparams, i, elt);
         }
         else if (jl_is_type_type(elt)) {
@@ -656,7 +833,7 @@ static void jl_compilation_sig(
             // and the result of matching the type signature
             // needs to be restricted to the concrete type 'kind'
             jl_value_t *kind = jl_typeof(jl_tparam0(elt));
-            if (jl_subtype(kind, decl_i) && !jl_subtype((jl_value_t*)jl_type_type, decl_i)) {
+            if (jl_subtype(kind, type_i) && !jl_subtype((jl_value_t*)jl_type_type, type_i)) {
                 // if we can prove the match was against the kind (not a Type)
                 // it's simpler (and thus better) to put that cache instead
                 if (!*newparams) *newparams = jl_svec_copy(tt->parameters);
@@ -668,7 +845,7 @@ static void jl_compilation_sig(
             // not triggered for isdispatchtuple(tt), this attempts to handle
             // some cases of adapting a random signature into a compilation signature
             // if we get a kind, where we don't expect to accept one, widen it to something more expected (Type{T})
-            if (!(jl_subtype(elt, decl_i) && !jl_subtype((jl_value_t*)jl_type_type, decl_i))) {
+            if (!(jl_subtype(elt, type_i) && !jl_subtype((jl_value_t*)jl_type_type, type_i))) {
                 if (!*newparams) *newparams = jl_svec_copy(tt->parameters);
                 elt = (jl_value_t*)jl_type_type;
                 jl_svecset(*newparams, i, elt);
@@ -686,16 +863,14 @@ static void jl_compilation_sig(
             if (!jl_has_free_typevars(decl_i) && !jl_is_kind(decl_i)) {
                 if (decl_i != elt) {
                     if (!*newparams) *newparams = jl_svec_copy(tt->parameters);
+                    // n.b. it is possible here that !(elt <: decl_i), if elt was something unusual from intersection
+                    // so this might narrow the result slightly, though still being compatible with the declared signature
                     jl_svecset(*newparams, i, (jl_value_t*)decl_i);
                 }
                 continue;
             }
         }
 
-        if (jl_is_vararg(elt)) {
-            continue;
-        }
-
         if (jl_types_equal(elt, (jl_value_t*)jl_type_type)) { // elt == Type{T} where T
             // not triggered for isdispatchtuple(tt), this attempts to handle
             // some cases of adapting a random signature into a compilation signature
@@ -707,7 +882,7 @@ static void jl_compilation_sig(
             jl_svecset(*newparams, i, jl_type_type);
         }
         else if (jl_is_type_type(elt)) { // elt isa Type{T}
-            if (very_general_type(decl_i)) {
+            if (!jl_has_free_typevars(decl_i) && very_general_type(type_i)) {
                 /*
                   Here's a fairly simple heuristic: if this argument slot's
                   declared type is general (Type or Any),
@@ -746,15 +921,13 @@ static void jl_compilation_sig(
                 */
                 if (!*newparams) *newparams = jl_svec_copy(tt->parameters);
                 if (i < nargs || !definition->isva) {
-                    jl_value_t *di = jl_type_intersection(decl_i, (jl_value_t*)jl_type_type);
+                    jl_value_t *di = jl_type_intersection(type_i, (jl_value_t*)jl_type_type);
                     assert(di != (jl_value_t*)jl_bottom_type);
                     // issue #11355: DataType has a UID and so would take precedence in the cache
                     if (jl_is_kind(di))
                         jl_svecset(*newparams, i, (jl_value_t*)jl_type_type);
                     else
                         jl_svecset(*newparams, i, di);
-                    // TODO: recompute static parameter values, so in extreme cases we
-                    // can give `T=Type` instead of `T=Type{Type{Type{...`.   /* make editors happy:}}} */
                 }
                 else {
                     jl_svecset(*newparams, i, (jl_value_t*)jl_type_type);
@@ -763,14 +936,15 @@ static void jl_compilation_sig(
         }
 
         int notcalled_func = (i_arg > 0 && i_arg <= 8 && !(definition->called & (1 << (i_arg - 1))) &&
+                              !jl_has_free_typevars(decl_i) &&
                               jl_subtype(elt, (jl_value_t*)jl_function_type));
-        if (notcalled_func && (decl_i == (jl_value_t*)jl_any_type ||
-                               decl_i == (jl_value_t*)jl_function_type ||
-                               (jl_is_uniontype(decl_i) && // Base.Callable
-                                ((((jl_uniontype_t*)decl_i)->a == (jl_value_t*)jl_function_type &&
-                                  ((jl_uniontype_t*)decl_i)->b == (jl_value_t*)jl_type_type) ||
-                                 (((jl_uniontype_t*)decl_i)->b == (jl_value_t*)jl_function_type &&
-                                  ((jl_uniontype_t*)decl_i)->a == (jl_value_t*)jl_type_type))))) {
+        if (notcalled_func && (type_i == (jl_value_t*)jl_any_type ||
+                               type_i == (jl_value_t*)jl_function_type ||
+                               (jl_is_uniontype(type_i) && // Base.Callable
+                                ((((jl_uniontype_t*)type_i)->a == (jl_value_t*)jl_function_type &&
+                                  ((jl_uniontype_t*)type_i)->b == (jl_value_t*)jl_type_type) ||
+                                 (((jl_uniontype_t*)type_i)->b == (jl_value_t*)jl_function_type &&
+                                  ((jl_uniontype_t*)type_i)->a == (jl_value_t*)jl_type_type))))) {
             // and attempt to despecialize types marked Function, Callable, or Any
             // when called with a subtype of Function but is not called
             if (!*newparams) *newparams = jl_svec_copy(tt->parameters);
@@ -782,16 +956,16 @@ static void jl_compilation_sig(
     // in general, here we want to find the biggest type that's not a
     // supertype of any other method signatures. so far we are conservative
     // and the types we find should be bigger.
-    if (jl_nparams(tt) >= nspec && jl_va_tuple_kind((jl_datatype_t*)decl) == JL_VARARG_UNBOUND) {
-        jl_svec_t *limited = jl_alloc_svec(nspec);
-        JL_GC_PUSH1(&limited);
+    if (np >= nspec && jl_va_tuple_kind((jl_datatype_t*)decl) == JL_VARARG_UNBOUND) {
         if (!*newparams) *newparams = tt->parameters;
-        size_t i;
-        for (i = 0; i < nspec - 1; i++) {
-            jl_svecset(limited, i, jl_svecref(*newparams, i));
-        }
-        jl_value_t *lasttype = jl_svecref(*newparams, i - 1);
-        // if all subsequent arguments are subtypes of lasttype, specialize
+        if (max_varargs > 0) {
+            type_i = jl_svecref(*newparams, nspec - 2);
+        } else {
+            // If max varargs is zero, always specialize to (Any...) since
+            // there is no preceding parameter to use for `type_i`
+            type_i = jl_bottom_type;
+        }
+        // if all subsequent arguments are subtypes of type_i, specialize
         // on that instead of decl. for example, if decl is
         // (Any...)
         // and type is
@@ -799,53 +973,41 @@ static void jl_compilation_sig(
         // then specialize as (Symbol...), but if type is
         // (Symbol, Int32, Expr)
         // then specialize as (Any...)
-        size_t j = i;
+        size_t j = nspec - 1;
         int all_are_subtypes = 1;
         for (; j < jl_svec_len(*newparams); j++) {
             jl_value_t *paramj = jl_svecref(*newparams, j);
             if (jl_is_vararg(paramj))
                 paramj = jl_unwrap_vararg(paramj);
-            if (!jl_subtype(paramj, lasttype)) {
+            if (!jl_subtype(paramj, type_i)) {
                 all_are_subtypes = 0;
                 break;
             }
         }
         if (all_are_subtypes) {
             // avoid Vararg{Type{Type{...}}}
-            if (jl_is_type_type(lasttype) && jl_is_type_type(jl_tparam0(lasttype)))
-                lasttype = (jl_value_t*)jl_type_type;
-            jl_svecset(limited, i, jl_wrap_vararg(lasttype, (jl_value_t*)NULL));
+            if (jl_is_type_type(type_i) && jl_is_type_type(jl_tparam0(type_i)))
+                type_i = (jl_value_t*)jl_type_type;
+            type_i = (jl_value_t*)jl_wrap_vararg(type_i, (jl_value_t*)NULL, 1); // this cannot throw for these inputs
         }
         else {
-            jl_value_t *unw = jl_unwrap_unionall(decl);
-            jl_value_t *lastdeclt = jl_tparam(unw, jl_nparams(unw) - 1);
-            assert(jl_is_vararg(lastdeclt));
-            int nsp = jl_svec_len(sparams);
-            if (nsp > 0 && jl_has_free_typevars(lastdeclt)) {
-                assert(jl_subtype_env_size(decl) == nsp);
-                lastdeclt = jl_instantiate_type_in_env(lastdeclt, (jl_unionall_t*)decl, jl_svec_data(sparams));
-                // TODO: rewrap_unionall(lastdeclt, sparams) if any sparams isa TypeVar???
-                // TODO: if we made any replacements above, sparams may now be incorrect
-            }
-            jl_svecset(limited, i, lastdeclt);
+            type_i = inst_varargp_in_env(decl, sparams);
+        }
+        jl_svec_t *limited = jl_alloc_svec(nspec);
+        size_t i;
+        for (i = 0; i < nspec - 1; i++) {
+            jl_svecset(limited, i, jl_svecref(*newparams, i));
         }
+        jl_svecset(limited, i, type_i);
         *newparams = limited;
-        // now there is a problem: the widened signature is more
-        // general than just the given arguments, so it might conflict
-        // with another definition that doesn't have cache instances yet.
-        // to fix this, we insert guard cache entries for all intersections
-        // of this signature and definitions. those guard entries will
-        // supersede this one in conflicted cases, alerting us that there
-        // should actually be a cache miss.
-        // TODO: the above analysis assumes that there will never
-        // be a call attempted that should throw a no-method error
-        JL_GC_POP();
     }
+    JL_GC_POP();
 }
 
 // compute whether this type signature is a possible return value from jl_compilation_sig given a concrete-type for `tt`
 JL_DLLEXPORT int jl_isa_compileable_sig(
     jl_tupletype_t *type,
+    jl_svec_t *sparams,
     jl_method_t *definition)
 {
     jl_value_t *decl = definition->sig;
@@ -871,23 +1033,24 @@ JL_DLLEXPORT int jl_isa_compileable_sig(
     // supertype of any other method signatures. so far we are conservative
     // and the types we find should be bigger.
     if (definition->isva) {
-        unsigned nspec_min = nargs + 1; // min number of non-vararg values before vararg
-        unsigned nspec_max = INT32_MAX; // max number of non-vararg values before vararg
+        unsigned nspec_min = nargs + 1; // min number of arg values (including tail vararg)
+        unsigned nspec_max = INT32_MAX; // max number of arg values (including tail vararg)
         jl_methtable_t *mt = jl_method_table_for(decl);
+        jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(decl) : mt;
         if ((jl_value_t*)mt != jl_nothing) {
             // try to refine estimate of min and max
-            if (mt != jl_type_type_mt && mt != jl_nonfunction_mt)
-                nspec_min = mt->max_args + 2;
-            else
-                nspec_max = nspec_min;
+            uint8_t heuristic_used = 0;
+            nspec_max = nspec_min = nargs + get_max_varargs(definition, kwmt, mt, &heuristic_used);
+            if (heuristic_used)
+                nspec_max = INT32_MAX; // new methods may be added, increasing nspec_min later
         }
-        int isbound = (jl_va_tuple_kind((jl_datatype_t*)decl) == JL_VARARG_UNBOUND);
+        int isunbound = (jl_va_tuple_kind((jl_datatype_t*)decl) == JL_VARARG_UNBOUND);
         if (jl_is_vararg(jl_tparam(type, np - 1))) {
-            if (!isbound || np < nspec_min || np > nspec_max)
+            if (!isunbound || np < nspec_min || np > nspec_max)
                 return 0;
         }
         else {
-            if (np < nargs - 1 || (isbound && np >= nspec_max))
+            if (np < nargs - 1 || (isunbound && np >= nspec_max))
                 return 0;
         }
     }
@@ -895,74 +1058,87 @@ JL_DLLEXPORT int jl_isa_compileable_sig(
         return 0;
     }
 
+    jl_value_t *type_i = NULL;
+    JL_GC_PUSH1(&type_i);
     for (i = 0; i < np; i++) {
         jl_value_t *elt = jl_tparam(type, i);
-        jl_value_t *decl_i = jl_nth_slot_type((jl_value_t*)decl, i);
         size_t i_arg = (i < nargs - 1 ? i : nargs - 1);
 
         if (jl_is_vararg(elt)) {
-            elt = jl_unwrap_vararg(elt);
-            if (jl_has_free_typevars(decl_i)) {
-                // TODO: in this case, answer semi-conservatively that these varargs are always compilable
-                // we don't have the ability to get sparams, so deciding if elt
-                // is a potential result of jl_instantiate_type_in_env for decl_i
-                // for any sparams that is consistent with the rest of the arguments
-                // seems like it would be extremely difficult
-                // and hopefully the upstream code probably gave us something reasonable
-                continue;
-            }
-            else if (jl_egal(elt, decl_i)) {
-                continue;
+            type_i = inst_varargp_in_env(decl, sparams);
+            if (jl_has_free_typevars(type_i)) {
+                JL_GC_POP();
+                return 0; // something went badly wrong?
             }
-            else if (jl_is_type_type(elt) && jl_is_type_type(jl_tparam0(elt))) {
-                return 0;
+            if (jl_egal(elt, type_i))
+                continue; // elt could be chosen by inst_varargp_in_env for these sparams
+            elt = jl_unwrap_vararg(elt);
+            if (jl_is_type_type(elt) && jl_is_type_type(jl_tparam0(elt))) {
+                JL_GC_POP();
+                return 0; // elt would be set equal to jl_type_type instead
             }
-            // else, it needs to meet the usual rules
+            // else, elt also needs to meet the usual rules
         }
 
+        jl_value_t *decl_i = jl_nth_slot_type(decl, i);
+        type_i = jl_rewrap_unionall(decl_i, decl);
+
         if (i_arg > 0 && i_arg <= sizeof(definition->nospecialize) * 8 &&
                 (definition->nospecialize & (1 << (i_arg - 1)))) {
             if (!jl_has_free_typevars(decl_i) && !jl_is_kind(decl_i)) {
                 if (jl_egal(elt, decl_i))
                     continue;
+                JL_GC_POP();
                 return 0;
             }
         }
 
         if (jl_is_kind(elt)) {
             // kind slots always get guard entries (checking for subtypes of Type)
-            if (jl_subtype(elt, decl_i) && !jl_subtype((jl_value_t*)jl_type_type, decl_i))
+            if (jl_subtype(elt, type_i) && !jl_subtype((jl_value_t*)jl_type_type, type_i))
                 continue;
-            // TODO: other code paths that could reach here
+            // TODO: other code paths that could reach here?
+            JL_GC_POP();
             return 0;
         }
-        else if (jl_is_kind(decl_i)) {
+        else if (jl_is_kind(type_i)) {
+            JL_GC_POP();
             return 0;
         }
 
         if (jl_is_type_type(jl_unwrap_unionall(elt))) {
-            int iscalled = i_arg > 0 && i_arg <= 8 && (definition->called & (1 << (i_arg - 1)));
+            int iscalled = (i_arg > 0 && i_arg <= 8 && (definition->called & (1 << (i_arg - 1)))) ||
+                           jl_has_free_typevars(decl_i);
             if (jl_types_equal(elt, (jl_value_t*)jl_type_type)) {
-                if (!iscalled && very_general_type(decl_i))
+                if (!iscalled && very_general_type(type_i))
                     continue;
                 if (i >= nargs && definition->isva)
                     continue;
+                JL_GC_POP();
                 return 0;
             }
-            if (!iscalled && very_general_type(decl_i))
+            if (!iscalled && very_general_type(type_i)) {
+                JL_GC_POP();
                 return 0;
-            if (!jl_is_datatype(elt))
+            }
+            if (!jl_is_datatype(elt)) {
+                JL_GC_POP();
                 return 0;
+            }
 
             // if the declared type was not Any or Union{Type, ...},
             // then the match must been with kind, such as UnionAll or DataType,
             // and the result of matching the type signature
             // needs to be corrected to the concrete type 'kind' (and not to Type)
             jl_value_t *kind = jl_typeof(jl_tparam0(elt));
-            if (kind == jl_bottom_type)
+            if (kind == jl_bottom_type) {
+                JL_GC_POP();
                 return 0; // Type{Union{}} gets normalized to typeof(Union{})
-            if (jl_subtype(kind, decl_i) && !jl_subtype((jl_value_t*)jl_type_type, decl_i))
+            }
+            if (jl_subtype(kind, type_i) && !jl_subtype((jl_value_t*)jl_type_type, type_i)) {
+                JL_GC_POP();
                 return 0; // gets turned into a kind
+            }
 
             else if (jl_is_type_type(jl_tparam0(elt)) &&
                      // give up on specializing static parameters for Type{Type{Type{...}}}
@@ -975,20 +1151,20 @@ JL_DLLEXPORT int jl_isa_compileable_sig(
                   this can be determined using a type intersection.
                 */
                 if (i < nargs || !definition->isva) {
-                    jl_value_t *di = jl_type_intersection(decl_i, (jl_value_t*)jl_type_type);
-                    JL_GC_PUSH1(&di);
-                    assert(di != (jl_value_t*)jl_bottom_type);
-                    if (jl_is_kind(di)) {
+                    type_i = jl_type_intersection(type_i, (jl_value_t*)jl_type_type);
+                    assert(type_i != (jl_value_t*)jl_bottom_type);
+                    if (jl_is_kind(type_i)) {
                         JL_GC_POP();
                         return 0;
                     }
-                    else if (!jl_types_equal(di, elt)) {
+                    else if (!jl_types_equal(type_i, elt)) {
                         JL_GC_POP();
                         return 0;
                     }
-                    JL_GC_POP();
+                    continue;
                 }
                 else {
+                    JL_GC_POP();
                     return 0;
                 }
             }
@@ -996,24 +1172,29 @@ JL_DLLEXPORT int jl_isa_compileable_sig(
         }
 
         int notcalled_func = (i_arg > 0 && i_arg <= 8 && !(definition->called & (1 << (i_arg - 1))) &&
+                              !jl_has_free_typevars(decl_i) &&
                               jl_subtype(elt, (jl_value_t*)jl_function_type));
-        if (notcalled_func && (decl_i == (jl_value_t*)jl_any_type ||
-                               decl_i == (jl_value_t*)jl_function_type ||
-                               (jl_is_uniontype(decl_i) && // Base.Callable
-                                ((((jl_uniontype_t*)decl_i)->a == (jl_value_t*)jl_function_type &&
-                                  ((jl_uniontype_t*)decl_i)->b == (jl_value_t*)jl_type_type) ||
-                                 (((jl_uniontype_t*)decl_i)->b == (jl_value_t*)jl_function_type &&
-                                  ((jl_uniontype_t*)decl_i)->a == (jl_value_t*)jl_type_type))))) {
+        if (notcalled_func && (type_i == (jl_value_t*)jl_any_type ||
+                               type_i == (jl_value_t*)jl_function_type ||
+                               (jl_is_uniontype(type_i) && // Base.Callable
+                                ((((jl_uniontype_t*)type_i)->a == (jl_value_t*)jl_function_type &&
+                                  ((jl_uniontype_t*)type_i)->b == (jl_value_t*)jl_type_type) ||
+                                 (((jl_uniontype_t*)type_i)->b == (jl_value_t*)jl_function_type &&
+                                  ((jl_uniontype_t*)type_i)->a == (jl_value_t*)jl_type_type))))) {
             // and attempt to despecialize types marked Function, Callable, or Any
             // when called with a subtype of Function but is not called
             if (elt == (jl_value_t*)jl_function_type)
                 continue;
+            JL_GC_POP();
             return 0;
         }
 
-        if (!jl_is_concrete_type(elt))
+        if (!jl_is_concrete_type(elt)) {
+            JL_GC_POP();
             return 0;
+        }
     }
+    JL_GC_POP();
     return 1;
 }
 
@@ -1036,7 +1217,7 @@ static int concretesig_equal(jl_value_t *tt, jl_value_t *simplesig) JL_NOTSAFEPO
     return 1;
 }
 
-static inline jl_typemap_entry_t *lookup_leafcache(jl_array_t *leafcache JL_PROPAGATES_ROOT, jl_value_t *tt, size_t world) JL_NOTSAFEPOINT
+static inline jl_typemap_entry_t *lookup_leafcache(jl_genericmemory_t *leafcache JL_PROPAGATES_ROOT, jl_value_t *tt, size_t world) JL_NOTSAFEPOINT
 {
     jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_eqtable_get(leafcache, (jl_value_t*)tt, NULL);
     if (entry) {
@@ -1063,7 +1244,7 @@ static jl_method_instance_t *cache_method(
     int8_t offs = mt ? jl_cachearg_offset(mt) : 1;
     { // scope block
         if (mt) {
-            jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+            jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
             jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
             if (entry)
                 return entry->func.linfo;
@@ -1081,20 +1262,40 @@ static jl_method_instance_t *cache_method(
     jl_svec_t *newparams = NULL;
     JL_GC_PUSH5(&temp, &temp2, &temp3, &newmeth, &newparams);
 
+    // Consider if we can cache with the preferred compile signature
+    // so that we can minimize the number of required cache entries.
     int cache_with_orig = 1;
     jl_tupletype_t *compilationsig = tt;
-    intptr_t nspec = (mt == NULL || mt == jl_type_type_mt || mt == jl_nonfunction_mt ? definition->nargs + 1 : mt->max_args + 2);
-    jl_compilation_sig(tt, sparams, definition, nspec, &newparams);
+    jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(definition->sig) : mt;
+    intptr_t max_varargs = get_max_varargs(definition, kwmt, mt, NULL);
+    jl_compilation_sig(tt, sparams, definition, max_varargs, &newparams);
     if (newparams) {
-        compilationsig = jl_apply_tuple_type(newparams);
-        temp2 = (jl_value_t*)compilationsig;
-        // In most cases `!jl_isa_compileable_sig(tt, definition))`,
+        temp2 = jl_apply_tuple_type(newparams, 1);
+        // Now there may be a problem: the widened signature is more general
+        // than just the given arguments, so it might conflict with another
+        // definition that does not have cache instances yet. To fix this, we
+        // may insert guard cache entries for all intersections of this
+        // signature and definitions. Those guard entries will supersede this
+        // one in conflicted cases, alerting us that there should actually be a
+        // cache miss. Alternatively, we may use the original signature in the
+        // cache, but use this return for compilation.
+        //
+        // In most cases `!jl_isa_compileable_sig(tt, sparams, definition)`,
         // although for some cases, (notably Varargs)
         // we might choose a replacement type that's preferable but not strictly better
-        cache_with_orig = !jl_subtype((jl_value_t*)compilationsig, definition->sig);
+        int issubty;
+        temp = jl_type_intersection_env_s(temp2, (jl_value_t*)definition->sig, &newparams, &issubty);
+        assert(temp != (jl_value_t*)jl_bottom_type); (void)temp;
+        if (jl_egal((jl_value_t*)newparams, (jl_value_t*)sparams)) {
+            cache_with_orig = !issubty;
+            compilationsig = (jl_datatype_t*)temp2;
+        }
+        newparams = NULL;
     }
-    // TODO: maybe assert(jl_isa_compileable_sig(compilationsig, definition));
+    // TODO: maybe assert(jl_isa_compileable_sig(compilationsig, sparams, definition));
     newmeth = jl_specializations_get_linfo(definition, (jl_value_t*)compilationsig, sparams);
+    if (newmeth->cache_with_orig)
+        cache_with_orig = 1;
 
     jl_tupletype_t *cachett = tt;
     jl_svec_t* guardsigs = jl_emptysvec;
@@ -1104,14 +1305,16 @@ static jl_method_instance_t *cache_method(
         size_t max_valid2 = ~(size_t)0;
         temp = ml_matches(mt, compilationsig, MAX_UNSPECIALIZED_CONFLICTS, 1, 1, world, 0, &min_valid2, &max_valid2, NULL);
         int guards = 0;
-        if (temp == jl_false) {
+        if (temp == jl_nothing) {
             cache_with_orig = 1;
         }
         else {
             int unmatched_tvars = 0;
-            size_t i, l = jl_array_len(temp);
+            size_t i, l = jl_array_nrows(temp);
             for (i = 0; i < l; i++) {
                 jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(temp, i);
+                if (matc->method == definition)
+                    continue;
                 jl_svec_t *env = matc->sparams;
                 int k, l;
                 for (k = 0, l = jl_svec_len(env); k < l; k++) {
@@ -1130,9 +1333,7 @@ static jl_method_instance_t *cache_method(
                     cache_with_orig = 1;
                     break;
                 }
-                if (matc->method != definition) {
-                    guards++;
-                }
+                guards++;
             }
         }
         if (!cache_with_orig && guards > 0) {
@@ -1142,7 +1343,7 @@ static jl_method_instance_t *cache_method(
             guardsigs = jl_alloc_svec(guards);
             temp3 = (jl_value_t*)guardsigs;
             guards = 0;
-            for (i = 0, l = jl_array_len(temp); i < l; i++) {
+            for (i = 0, l = jl_array_nrows(temp); i < l; i++) {
                 jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(temp, i);
                 jl_method_t *other = matc->method;
                 if (other != definition) {
@@ -1153,6 +1354,7 @@ static jl_method_instance_t *cache_method(
                     //        NULL, jl_emptysvec, /*guard*/NULL, jl_cachearg_offset(mt), other->min_world, other->max_world);
                 }
             }
+            assert(guards == jl_svec_len(guardsigs));
         }
         if (!cache_with_orig) {
             // determined above that there's no ambiguity in also using compilationsig as the cacheablesig
@@ -1160,6 +1362,10 @@ static jl_method_instance_t *cache_method(
             max_valid = max_valid2;
             cachett = compilationsig;
         }
+        else {
+            // do not revisit this decision
+            newmeth->cache_with_orig = 1;
+        }
     }
 
     // now scan `cachett` and ensure that `Type{T}` in the cache will be matched exactly by `typeof(T)`
@@ -1187,7 +1393,7 @@ static jl_method_instance_t *cache_method(
         }
     }
     if (newparams) {
-        simplett = jl_apply_tuple_type(newparams);
+        simplett = (jl_datatype_t*)jl_apply_tuple_type(newparams, 1);
         temp2 = (jl_value_t*)simplett;
     }
 
@@ -1218,11 +1424,11 @@ static jl_method_instance_t *cache_method(
                 jl_cache_type_(tt);
             JL_UNLOCK(&typecache_lock); // Might GC
         }
-        jl_array_t *oldcache = jl_atomic_load_relaxed(&mt->leafcache);
+        jl_genericmemory_t *oldcache = jl_atomic_load_relaxed(&mt->leafcache);
         jl_typemap_entry_t *old = (jl_typemap_entry_t*)jl_eqtable_get(oldcache, (jl_value_t*)tt, jl_nothing);
         jl_atomic_store_relaxed(&newentry->next, old);
         jl_gc_wb(newentry, old);
-        jl_array_t *newcache = (jl_array_t*)jl_eqtable_put(jl_atomic_load_relaxed(&mt->leafcache), (jl_value_t*)tt, (jl_value_t*)newentry, NULL);
+        jl_genericmemory_t *newcache = jl_eqtable_put(jl_atomic_load_relaxed(&mt->leafcache), (jl_value_t*)tt, (jl_value_t*)newentry, NULL);
         if (newcache != oldcache) {
             jl_atomic_store_release(&mt->leafcache, newcache);
             jl_gc_wb(mt, newcache);
@@ -1238,34 +1444,45 @@ static jl_method_instance_t *cache_method(
 
 static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, jl_value_t *mt, size_t world, size_t *min_valid, size_t *max_valid);
 
-static jl_method_instance_t *jl_mt_assoc_by_type(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_datatype_t *tt, size_t world)
+static jl_method_instance_t *jl_mt_assoc_by_type(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_datatype_t *tt JL_MAYBE_UNROOTED, size_t world)
 {
-    // caller must hold the mt->writelock
+    jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+    jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
+    if (entry)
+        return entry->func.linfo;
+    JL_TIMING(METHOD_LOOKUP_SLOW, METHOD_LOOKUP_SLOW);
+    jl_method_match_t *matc = NULL;
+    JL_GC_PUSH2(&tt, &matc);
+    JL_LOCK(&mt->writelock);
     assert(tt->isdispatchtuple || tt->hasfreetypevars);
+    jl_method_instance_t *mi = NULL;
     if (tt->isdispatchtuple) {
-        jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+        jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
         jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
         if (entry)
-            return entry->func.linfo;
+            mi = entry->func.linfo;
     }
 
-    struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL, 0, ~(size_t)0};
-    jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->cache), &search, jl_cachearg_offset(mt), /*subtype*/1);
-    if (entry)
-        return entry->func.linfo;
+    if (!mi) {
+        struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL, 0, ~(size_t)0};
+        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->cache), &search, jl_cachearg_offset(mt), /*subtype*/1);
+        if (entry)
+            mi = entry->func.linfo;
+    }
 
-    size_t min_valid = 0;
-    size_t max_valid = ~(size_t)0;
-    jl_method_match_t *matc = _gf_invoke_lookup((jl_value_t*)tt, jl_nothing, world, &min_valid, &max_valid);
-    jl_method_instance_t *nf = NULL;
-    if (matc) {
-        JL_GC_PUSH1(&matc);
-        jl_method_t *m = matc->method;
-        jl_svec_t *env = matc->sparams;
-        nf = cache_method(mt, &mt->cache, (jl_value_t*)mt, tt, m, world, min_valid, max_valid, env);
-        JL_GC_POP();
+    if (!mi) {
+        size_t min_valid = 0;
+        size_t max_valid = ~(size_t)0;
+        matc = _gf_invoke_lookup((jl_value_t*)tt, jl_nothing, world, &min_valid, &max_valid);
+        if (matc) {
+            jl_method_t *m = matc->method;
+            jl_svec_t *env = matc->sparams;
+            mi = cache_method(mt, &mt->cache, (jl_value_t*)mt, tt, m, world, min_valid, max_valid, env);
+        }
     }
-    return nf;
+    JL_UNLOCK(&mt->writelock);
+    JL_GC_POP();
+    return mi;
 }
 
 
@@ -1273,24 +1490,29 @@ struct matches_env {
     struct typemap_intersection_env match;
     jl_typemap_entry_t *newentry;
     jl_value_t *shadowed;
+    jl_typemap_entry_t *replaced;
 };
+
 static int get_intersect_visitor(jl_typemap_entry_t *oldentry, struct typemap_intersection_env *closure0)
 {
     struct matches_env *closure = container_of(closure0, struct matches_env, match);
-    if (oldentry == closure->newentry)
-        return 1;
-    if (oldentry->max_world < ~(size_t)0 || oldentry->min_world == closure->newentry->min_world)
-        // skip if no world has both active
-        // also be careful not to try to scan something from the current dump-reload though
-        return 1;
+    assert(oldentry != closure->newentry && "entry already added");
+    assert(oldentry->min_world <= closure->newentry->min_world && "old method cannot be newer than new method");
+    assert(oldentry->max_world != closure->newentry->min_world && "method cannot be added at the same time as method deleted");
+    // don't need to consider other similar methods if this oldentry will always fully intersect with them and dominates all of them
+    typemap_slurp_search(oldentry, &closure->match);
     jl_method_t *oldmethod = oldentry->func.method;
+    if (closure->match.issubty // e.g. jl_subtype(closure->newentry.sig, oldentry->sig)
+        && jl_subtype(oldmethod->sig, (jl_value_t*)closure->newentry->sig)) { // e.g. jl_type_equal(closure->newentry->sig, oldentry->sig)
+        closure->replaced = oldentry;
+    }
     if (closure->shadowed == NULL)
         closure->shadowed = (jl_value_t*)jl_alloc_vec_any(0);
     jl_array_ptr_1d_push((jl_array_t*)closure->shadowed, (jl_value_t*)oldmethod);
     return 1;
 }
 
-static jl_value_t *get_intersect_matches(jl_typemap_t *defs, jl_typemap_entry_t *newentry)
+static jl_value_t *get_intersect_matches(jl_typemap_t *defs, jl_typemap_entry_t *newentry, jl_typemap_entry_t **replaced, int8_t offs, size_t world)
 {
     jl_tupletype_t *type = newentry->sig;
     jl_tupletype_t *ttypes = (jl_tupletype_t*)jl_unwrap_unionall((jl_value_t*)type);
@@ -1303,11 +1525,16 @@ static jl_value_t *get_intersect_matches(jl_typemap_t *defs, jl_typemap_entry_t
         else
             va = NULL;
     }
-    struct matches_env env = {{get_intersect_visitor, (jl_value_t*)type, va,
+    // search for all intersecting methods active in the previous world, to determine the changes needed to be made for the next world
+    struct matches_env env = {{get_intersect_visitor, (jl_value_t*)type, va, /* .search_slurp = */ 0,
+            /* .min_valid = */ world, /* .max_valid = */ world,
             /* .ti = */ NULL, /* .env = */ jl_emptysvec, /* .issubty = */ 0},
-        /* .newentry = */ newentry, /* .shadowed */ NULL};
+        /* .newentry = */ newentry, /* .shadowed */ NULL, /* .replaced */ NULL};
     JL_GC_PUSH3(&env.match.env, &env.match.ti, &env.shadowed);
-    jl_typemap_intersection_visitor(defs, 0, &env.match);
+    jl_typemap_intersection_visitor(defs, offs, &env.match);
+    env.match.env = NULL;
+    env.match.ti = NULL;
+    *replaced = env.replaced;
     JL_GC_POP();
     return env.shadowed;
 }
@@ -1335,7 +1562,9 @@ static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue
     jl_method_t *method = (jl_method_t*)newentry->func.method;
     jl_module_t *newmod = method->module;
     jl_module_t *oldmod = oldvalue->module;
-    jl_datatype_t *dt = jl_first_argument_datatype(oldvalue->sig);
+    jl_datatype_t *dt = jl_nth_argument_datatype(oldvalue->sig, 1);
+    if (dt == (jl_datatype_t*)jl_typeof(jl_kwcall_func))
+        dt = jl_nth_argument_datatype(oldvalue->sig, 3);
     int anon = dt && is_anonfn_typename(jl_symbol_name(dt->name->name));
     if ((jl_options.warn_overwrite == JL_OPTIONS_WARN_OVERWRITE_ON) ||
         (jl_options.incremental && jl_generating_output()) || anon) {
@@ -1354,21 +1583,24 @@ static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue
         jl_printf(s, ".\n");
         jl_uv_flush(s);
     }
-    if (jl_options.incremental && jl_generating_output())
-        jl_printf(JL_STDERR, "  ** incremental compilation may be fatally broken for this module **\n\n");
+    if (jl_generating_output()) {
+        jl_printf(JL_STDERR, "ERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.\n");
+        jl_throw(jl_precompilable_error);
+    }
 }
 
 static void update_max_args(jl_methtable_t *mt, jl_value_t *type)
 {
-    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt)
+    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt || mt == jl_kwcall_mt)
         return;
     type = jl_unwrap_unionall(type);
     assert(jl_is_datatype(type));
     size_t na = jl_nparams(type);
     if (jl_va_tuple_kind((jl_datatype_t*)type) == JL_VARARG_UNBOUND)
         na--;
-    if (na > mt->max_args)
-        mt->max_args = na;
+    // update occurs inside mt->writelock
+    if (na > jl_atomic_load_relaxed(&mt->max_args))
+        jl_atomic_store_relaxed(&mt->max_args, na);
 }
 
 jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED = NULL;
@@ -1394,7 +1626,7 @@ static void invalidate_external(jl_method_instance_t *mi, size_t max_world) {
         // AbstractInterpreter allows for MethodInstances to be present in non-local caches
         // inform those caches about the invalidation.
         JL_TRY {
-            size_t i, l = jl_array_len(callbacks);
+            size_t i, l = jl_array_nrows(callbacks);
             jl_value_t **args;
             JL_GC_PUSHARGS(args, 3);
             // these arguments are constant per call
@@ -1422,11 +1654,12 @@ static void invalidate_external(jl_method_instance_t *mi, size_t max_world) {
     }
 }
 
-static void do_nothing_with_codeinst(jl_code_instance_t *ci) {}
+static void _invalidate_backedges(jl_method_instance_t *replaced_mi, size_t max_world, int depth);
 
 // recursively invalidate cached methods that had an edge to a replaced method
-static void invalidate_method_instance(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced, size_t max_world, int depth)
+static void invalidate_method_instance(jl_method_instance_t *replaced, size_t max_world, int depth)
 {
+    jl_timing_counter_inc(JL_TIMING_COUNTER_Invalidations, 1);
     if (_jl_debug_method_invalidation) {
         jl_value_t *boxeddepth = NULL;
         JL_GC_PUSH1(&boxeddepth);
@@ -1435,6 +1668,7 @@ static void invalidate_method_instance(void (*f)(jl_code_instance_t*), jl_method
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, boxeddepth);
         JL_GC_POP();
     }
+    //jl_static_show(JL_STDERR, (jl_value_t*)replaced);
     if (!jl_is_method(replaced->def.method))
         return; // shouldn't happen, but better to be safe
     JL_LOCK(&replaced->def.method->writelock);
@@ -1445,36 +1679,37 @@ static void invalidate_method_instance(void (*f)(jl_code_instance_t*), jl_method
             codeinst->max_world = max_world;
         }
         assert(codeinst->max_world <= max_world);
-        (*f)(codeinst);
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
+    JL_GC_PUSH1(&replaced);
+    invalidate_external(replaced, max_world);
     // recurse to all backedges to update their valid range also
-    jl_array_t *backedges = replaced->backedges;
-    if (backedges) {
-        replaced->backedges = NULL;
-        size_t i, l = jl_array_len(backedges);
-        for (i = 0; i < l; i++) {
-            jl_method_instance_t *replaced = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i);
-            invalidate_method_instance(f, replaced, max_world, depth + 1);
-        }
-    }
+    _invalidate_backedges(replaced, max_world, depth + 1);
+    JL_GC_POP();
     JL_UNLOCK(&replaced->def.method->writelock);
 }
 
-// invalidate cached methods that overlap this definition
-void invalidate_backedges(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced_mi, size_t max_world, const char *why)
-{
-    JL_LOCK(&replaced_mi->def.method->writelock);
+static void _invalidate_backedges(jl_method_instance_t *replaced_mi, size_t max_world, int depth) {
     jl_array_t *backedges = replaced_mi->backedges;
     if (backedges) {
         // invalidate callers (if any)
         replaced_mi->backedges = NULL;
-        size_t i, l = jl_array_len(backedges);
-        jl_method_instance_t **replaced = (jl_method_instance_t**)jl_array_ptr_data(backedges);
-        for (i = 0; i < l; i++) {
-            invalidate_method_instance(f, replaced[i], max_world, 1);
+        JL_GC_PUSH1(&backedges);
+        size_t i = 0, l = jl_array_nrows(backedges);
+        jl_method_instance_t *replaced;
+        while (i < l) {
+            i = get_next_edge(backedges, i, NULL, &replaced);
+            invalidate_method_instance(replaced, max_world, depth);
         }
+        JL_GC_POP();
     }
+}
+
+// invalidate cached methods that overlap this definition
+static void invalidate_backedges(jl_method_instance_t *replaced_mi, size_t max_world, const char *why)
+{
+    JL_LOCK(&replaced_mi->def.method->writelock);
+    _invalidate_backedges(replaced_mi, max_world, 1);
     JL_UNLOCK(&replaced_mi->def.method->writelock);
     if (why && _jl_debug_method_invalidation) {
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)replaced_mi);
@@ -1486,25 +1721,37 @@ void invalidate_backedges(void (*f)(jl_code_instance_t*), jl_method_instance_t *
 }
 
 // add a backedge from callee to caller
-JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_method_instance_t *caller)
+JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_value_t *invokesig, jl_method_instance_t *caller)
 {
     JL_LOCK(&callee->def.method->writelock);
+    if (invokesig == jl_nothing)
+        invokesig = NULL;      // julia uses `nothing` but C uses NULL (#undef)
+    int found = 0;
+    // TODO: use jl_cache_type_(invokesig) like cache_method does to save memory
     if (!callee->backedges) {
         // lazy-init the backedges array
-        callee->backedges = jl_alloc_vec_any(1);
+        callee->backedges = jl_alloc_vec_any(0);
         jl_gc_wb(callee, callee->backedges);
-        jl_array_ptr_set(callee->backedges, 0, caller);
     }
     else {
-        size_t i, l = jl_array_len(callee->backedges);
+        size_t i = 0, l = jl_array_nrows(callee->backedges);
         for (i = 0; i < l; i++) {
-            if (jl_array_ptr_ref(callee->backedges, i) == (jl_value_t*)caller)
+            // optimized version of while (i < l) i = get_next_edge(callee->backedges, i, &invokeTypes, &mi);
+            jl_value_t *mi = jl_array_ptr_ref(callee->backedges, i);
+            if (mi != (jl_value_t*)caller)
+                continue;
+            jl_value_t *invokeTypes = i > 0 ? jl_array_ptr_ref(callee->backedges, i - 1) : NULL;
+            if (invokeTypes && jl_is_method_instance(invokeTypes))
+                invokeTypes = NULL;
+            if ((invokesig == NULL && invokeTypes == NULL) ||
+                (invokesig && invokeTypes && jl_types_equal(invokesig, invokeTypes))) {
+                found = 1;
                 break;
-        }
-        if (i == l) {
-            jl_array_ptr_1d_push(callee->backedges, (jl_value_t*)caller);
+            }
         }
     }
+    if (!found)
+        push_edge(callee->backedges, invokesig, caller);
     JL_UNLOCK(&callee->def.method->writelock);
 }
 
@@ -1520,7 +1767,8 @@ JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *t
         jl_array_ptr_set(mt->backedges, 1, caller);
     }
     else {
-        size_t i, l = jl_array_len(mt->backedges);
+        // TODO: use jl_cache_type_(tt) like cache_method does, instead of a linear scan
+        size_t i, l = jl_array_nrows(mt->backedges);
         for (i = 1; i < l; i += 2) {
             if (jl_types_equal(jl_array_ptr_ref(mt->backedges, i - 1), typ)) {
                 if (jl_array_ptr_ref(mt->backedges, i) == caller) {
@@ -1551,14 +1799,31 @@ static int invalidate_mt_cache(jl_typemap_entry_t *oldentry, void *closure0)
         jl_method_instance_t *mi = oldentry->func.linfo;
         int intersects = 0;
         jl_method_instance_t **d = (jl_method_instance_t**)jl_array_ptr_data(env->shadowed);
-        size_t i, n = jl_array_len(env->shadowed);
+        size_t i, n = jl_array_nrows(env->shadowed);
         for (i = 0; i < n; i++) {
             if (mi == d[i]) {
                 intersects = 1;
                 break;
             }
         }
+        if (intersects && (jl_value_t*)oldentry->sig != mi->specTypes) {
+            // the entry may point to a widened MethodInstance, in which case it is worthwhile to check if the new method
+            // actually has any meaningful intersection with the old one
+            intersects = !jl_has_empty_intersection((jl_value_t*)oldentry->sig, (jl_value_t*)env->newentry->sig);
+        }
+        if (intersects && oldentry->guardsigs != jl_emptysvec) {
+            // similarly, if it already matches an existing guardsigs, this is already safe to keep
+            size_t i, l;
+            for (i = 0, l = jl_svec_len(oldentry->guardsigs); i < l; i++) {
+                // see corresponding code in jl_typemap_entry_assoc_exact
+                if (jl_subtype((jl_value_t*)env->newentry->sig, jl_svecref(oldentry->guardsigs, i))) {
+                    intersects = 0;
+                    break;
+                }
+            }
+        }
         if (intersects) {
+            // TODO call invalidate_external here?
             if (_jl_debug_method_invalidation) {
                 jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);
                 jl_value_t *loctag = jl_cstr_to_string("invalidate_mt_cache");
@@ -1594,7 +1859,7 @@ static int typemap_search(jl_typemap_entry_t *entry, void *closure)
 
 static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_method_t *method) JL_NOTSAFEPOINT;
 
-#ifndef __clang_gcanalyzer__
+#ifndef __clang_gcanalyzer__ /* in general, jl_typemap_visitor could be a safepoint, but not for typemap_search */
 static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_method_t *method) JL_NOTSAFEPOINT {
     jl_value_t *closure = (jl_value_t*)(method);
     if (jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), typemap_search, &closure))
@@ -1603,8 +1868,11 @@ static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_RO
 }
 #endif
 
-static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *methodentry, jl_method_t *method, size_t max_world)
+static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *methodentry, size_t max_world)
 {
+    if (jl_options.incremental && jl_generating_output())
+        jl_error("Method deletion is not possible during Module precompile.");
+    jl_method_t *method = methodentry->func.method;
     assert(!method->is_for_opaque_closure);
     method->deleted_world = methodentry->max_world = max_world;
     // drop this method from mt->cache
@@ -1614,10 +1882,10 @@ static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *m
     mt_cache_env.shadowed = NULL;
     mt_cache_env.invalidated = 0;
     jl_typemap_visitor(jl_atomic_load_relaxed(&mt->cache), disable_mt_cache, (void*)&mt_cache_env);
-    jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
-    size_t i, l = jl_array_len(leafcache);
+    jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+    size_t i, l = leafcache->length;
     for (i = 1; i < l; i += 2) {
-        jl_typemap_entry_t *oldentry = (jl_typemap_entry_t*)jl_array_ptr_ref(leafcache, i);
+        jl_typemap_entry_t *oldentry = (jl_typemap_entry_t*)jl_genericmemory_ptr_ref(leafcache, i);
         if (oldentry) {
             while ((jl_value_t*)oldentry != jl_nothing) {
                 if (oldentry->max_world == ~(size_t)0)
@@ -1628,16 +1896,22 @@ static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *m
     }
     // Invalidate the backedges
     int invalidated = 0;
-    jl_svec_t *specializations = jl_atomic_load_relaxed(&methodentry->func.method->specializations);
+    jl_value_t *specializations = jl_atomic_load_relaxed(&method->specializations);
+    JL_GC_PUSH1(&specializations);
+    if (!jl_is_svec(specializations))
+        specializations = (jl_value_t*)jl_svec1(specializations);
     l = jl_svec_len(specializations);
     for (i = 0; i < l; i++) {
         jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
         if ((jl_value_t*)mi != jl_nothing) {
             invalidated = 1;
-            invalidate_external(mi, methodentry->max_world);
-            invalidate_backedges(&do_nothing_with_codeinst, mi, methodentry->max_world, "jl_method_table_disable");
+            invalidate_external(mi, max_world);
+            invalidate_backedges(mi, max_world, "jl_method_table_disable");
         }
     }
+    JL_GC_POP();
+    // XXX: this might have resolved an ambiguity, for which we have not tracked the edge here,
+    // and thus now introduce a mistake into inference
     if (invalidated && _jl_debug_method_invalidation) {
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)method);
         jl_value_t *loctag = jl_cstr_to_string("jl_method_table_disable");
@@ -1649,18 +1923,15 @@ static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *m
 
 JL_DLLEXPORT void jl_method_table_disable(jl_methtable_t *mt, jl_method_t *method)
 {
-    if (jl_options.incremental && jl_generating_output())
-        jl_printf(JL_STDERR, "WARNING: method deletion during Module precompile may lead to undefined behavior"
-                             "\n  ** incremental compilation may be fatally broken for this module **\n\n");
     jl_typemap_entry_t *methodentry = do_typemap_search(mt, method);
     JL_LOCK(&mt->writelock);
     // Narrow the world age on the method to make it uncallable
     size_t world = jl_atomic_fetch_add(&jl_world_counter, 1);
-    jl_method_table_invalidate(mt, methodentry, method, world);
+    jl_method_table_invalidate(mt, methodentry, world);
     JL_UNLOCK(&mt->writelock);
 }
 
-static int jl_type_intersection2(jl_value_t *t1, jl_value_t *t2, jl_value_t **isect, jl_value_t **isect2)
+static int jl_type_intersection2(jl_value_t *t1, jl_value_t *t2, jl_value_t **isect JL_REQUIRE_ROOTED_SLOT, jl_value_t **isect2 JL_REQUIRE_ROOTED_SLOT)
 {
     *isect2 = NULL;
     int is_subty = 0;
@@ -1686,11 +1957,45 @@ static int jl_type_intersection2(jl_value_t *t1, jl_value_t *t2, jl_value_t **is
     return 1;
 }
 
+enum morespec_options {
+    morespec_unknown,
+    morespec_isnot,
+    morespec_is
+};
+
+// check if `type` is replacing `m` with an ambiguity here, given other methods in `d` that already match it
+static int is_replacing(char ambig, jl_value_t *type, jl_method_t *m, jl_method_t *const *d, size_t n, jl_value_t *isect, jl_value_t *isect2, char *morespec)
+{
+    size_t k;
+    for (k = 0; k < n; k++) {
+        jl_method_t *m2 = d[k];
+        // see if m2 also fully covered this intersection
+        if (m == m2 || !(jl_subtype(isect, m2->sig) || (isect2 && jl_subtype(isect2, m2->sig))))
+            continue;
+        if (morespec[k] == (char)morespec_unknown)
+            morespec[k] = (char)(jl_type_morespecific(m2->sig, type) ? morespec_is : morespec_isnot);
+        if (morespec[k] == (char)morespec_is)
+            // not actually shadowing this--m2 will still be better
+            return 0;
+        // if type is not more specific than m (thus now dominating it)
+        // then there is a new ambiguity here,
+        // since m2 was also a previous match over isect,
+        // see if m was previously dominant over all m2
+        // or if this was already ambiguous before
+        if (ambig != morespec_is && !jl_type_morespecific(m->sig, m2->sig)) {
+            // m and m2 were previously ambiguous over the full intersection of mi with type, and will still be ambiguous with addition of type
+            return 0;
+        }
+    }
+    return 1;
+}
+
 JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype)
 {
-    JL_TIMING(ADD_METHOD);
+    JL_TIMING(ADD_METHOD, ADD_METHOD);
     assert(jl_is_method(method));
     assert(jl_is_mtable(mt));
+    jl_timing_show_method(method, JL_TIMING_DEFAULT_BLOCK);
     jl_value_t *type = method->sig;
     jl_value_t *oldvalue = NULL;
     jl_array_t *oldmi = NULL;
@@ -1704,23 +2009,22 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
     jl_typemap_entry_t *newentry = NULL;
     JL_GC_PUSH7(&oldvalue, &oldmi, &newentry, &loctag, &isect, &isect2, &isect3);
     JL_LOCK(&mt->writelock);
-    // first find if we have an existing entry to delete
-    struct jl_typemap_assoc search = {(jl_value_t*)type, method->primary_world, NULL, 0, ~(size_t)0};
-    jl_typemap_entry_t *oldentry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->defs), &search, /*offs*/0, /*subtype*/0);
-    // then add our new entry
+    // add our new entry
     newentry = jl_typemap_alloc((jl_tupletype_t*)type, simpletype, jl_emptysvec,
             (jl_value_t*)method, method->primary_world, method->deleted_world);
-    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, 0);
-    if (oldentry) {
-        jl_method_t *m = oldentry->func.method;
-        method_overwrite(newentry, m);
-        jl_method_table_invalidate(mt, oldentry, m, max_world);
+    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, jl_cachearg_offset(mt));
+    jl_typemap_entry_t *replaced = NULL;
+    // then check what entries we replaced
+    oldvalue = get_intersect_matches(jl_atomic_load_relaxed(&mt->defs), newentry, &replaced, jl_cachearg_offset(mt), max_world);
+    int invalidated = 0;
+    if (replaced) {
+        oldvalue = (jl_value_t*)replaced;
+        invalidated = 1;
+        method_overwrite(newentry, replaced->func.method);
+        jl_method_table_invalidate(mt, replaced, max_world);
     }
     else {
-        oldvalue = get_intersect_matches(jl_atomic_load_relaxed(&mt->defs), newentry);
-
-        int invalidated = 0;
-        jl_method_t **d;
+        jl_method_t *const *d;
         size_t j, n;
         if (oldvalue == NULL) {
             d = NULL;
@@ -1729,11 +2033,11 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
         else {
             assert(jl_is_array(oldvalue));
             d = (jl_method_t**)jl_array_ptr_data(oldvalue);
-            n = jl_array_len(oldvalue);
+            n = jl_array_nrows(oldvalue);
         }
         if (mt->backedges) {
             jl_value_t **backedges = jl_array_ptr_data(mt->backedges);
-            size_t i, na = jl_array_len(mt->backedges);
+            size_t i, na = jl_array_nrows(mt->backedges);
             size_t ins = 0;
             for (i = 1; i < na; i += 2) {
                 jl_value_t *backedgetyp = backedges[i - 1];
@@ -1749,6 +2053,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                     //    -> less specific or ambiguous with any one of them: can ignore the missing edge (not missing)
                     //      -> some may have been ambiguous: still are
                     //      -> some may have been called: they may be partly replaced (will be detected in the loop later)
+                    // c.f. `is_replacing`, which is a similar query, but with an existing method match to compare against
                     missing = 1;
                     size_t j;
                     for (j = 0; j < n; j++) {
@@ -1765,8 +2070,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                 }
                 if (missing) {
                     jl_method_instance_t *backedge = (jl_method_instance_t*)backedges[i];
-                    invalidate_external(backedge, max_world);
-                    invalidate_method_instance(&do_nothing_with_codeinst, backedge, max_world, 0);
+                    invalidate_method_instance(backedge, max_world, 0);
                     invalidated = 1;
                     if (_jl_debug_method_invalidation)
                         jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)backedgetyp);
@@ -1783,20 +2087,23 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
         }
         if (oldvalue) {
             oldmi = jl_alloc_vec_any(0);
-            enum morespec_options {
-                morespec_unknown,
-                morespec_isnot,
-                morespec_is
-            };
             char *morespec = (char*)alloca(n);
             memset(morespec, morespec_unknown, n);
             for (j = 0; j < n; j++) {
                 jl_method_t *m = d[j];
                 if (morespec[j] == (char)morespec_is)
                     continue;
-                jl_svec_t *specializations = jl_atomic_load_relaxed(&m->specializations);
-                _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
-                size_t i, l = jl_svec_len(specializations);
+                loctag = jl_atomic_load_relaxed(&m->specializations); // use loctag for a gcroot
+                _Atomic(jl_method_instance_t*) *data;
+                size_t i, l;
+                if (jl_is_svec(loctag)) {
+                    data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(loctag);
+                    l = jl_svec_len(loctag);
+                }
+                else {
+                    data = (_Atomic(jl_method_instance_t*)*) &loctag;
+                    l = 1;
+                }
                 enum morespec_options ambig = morespec_unknown;
                 for (i = 0; i < l; i++) {
                     jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
@@ -1804,6 +2111,11 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                         continue;
                     isect3 = jl_type_intersection(m->sig, (jl_value_t*)mi->specTypes);
                     if (jl_type_intersection2(type, isect3, &isect, &isect2)) {
+                        // TODO: this only checks pair-wise for ambiguities, but the ambiguities could arise from the interaction of multiple methods
+                        // and thus might miss a case where we introduce an ambiguity between two existing methods
+                        // We could instead work to sort this into 3 groups `morespecific .. ambiguous .. lesspecific`, with `type` in ambiguous,
+                        // such that everything in `morespecific` dominates everything in `ambiguous`, and everything in `ambiguous` dominates everything in `lessspecific`
+                        // And then compute where each isect falls, and whether it changed group--necessitating invalidation--or not.
                         if (morespec[j] == (char)morespec_unknown)
                             morespec[j] = (char)(jl_type_morespecific(m->sig, type) ? morespec_is : morespec_isnot);
                         if (morespec[j] == (char)morespec_is)
@@ -1812,38 +2124,52 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                         if (ambig == morespec_unknown)
                             ambig = jl_type_morespecific(type, m->sig) ? morespec_is : morespec_isnot;
                         // replacing a method--see if this really was the selected method previously
-                        // over the intersection
-                        if (ambig == morespec_isnot)  {
-                            size_t k;
-                            for (k = 0; k < n; k++) {
-                                jl_method_t *m2 = d[k];
-                                if (m == m2 || !(jl_subtype(isect, m2->sig) || (isect && jl_subtype(isect, m2->sig))))
-                                    continue;
-                                if (morespec[k] == (char)morespec_unknown)
-                                    morespec[k] = (char)(jl_type_morespecific(m2->sig, type) ? morespec_is : morespec_isnot);
-                                if (morespec[k] == (char)morespec_is)
-                                    // not actually shadowing this--m2 will still be better
-                                    break;
-                                // since m2 was also a previous match over isect,
-                                // see if m was also previously dominant over all m2
-                                if (!jl_type_morespecific(m->sig, m2->sig))
-                                    break;
+                        // over the intersection (not ambiguous) and the new method will be selected now (morespec_is)
+                        int replaced_dispatch = is_replacing(ambig, type, m, d, n, isect, isect2, morespec);
+                        // found that this specialization dispatch got replaced by m
+                        // call invalidate_backedges(mi, max_world, "jl_method_table_insert");
+                        // but ignore invoke-type edges
+                        jl_array_t *backedges = mi->backedges;
+                        if (backedges) {
+                            size_t ib = 0, insb = 0, nb = jl_array_nrows(backedges);
+                            jl_value_t *invokeTypes;
+                            jl_method_instance_t *caller;
+                            while (ib < nb) {
+                                ib = get_next_edge(backedges, ib, &invokeTypes, &caller);
+                                int replaced_edge;
+                                if (invokeTypes) {
+                                    // n.b. normally we must have mi.specTypes <: invokeTypes <: m.sig (though it might not strictly hold), so we only need to check the other subtypes
+                                    if (jl_egal(invokeTypes, caller->def.method->sig))
+                                        replaced_edge = 0; // if invokeTypes == m.sig, then the only way to change this invoke is to replace the method itself
+                                    else
+                                        replaced_edge = jl_subtype(invokeTypes, type) && is_replacing(ambig, type, m, d, n, invokeTypes, NULL, morespec);
+                                }
+                                else {
+                                    replaced_edge = replaced_dispatch;
+                                }
+                                if (replaced_edge) {
+                                    invalidate_method_instance(caller, max_world, 1);
+                                    invalidated = 1;
+                                }
+                                else {
+                                    insb = set_next_edge(backedges, insb, invokeTypes, caller);
+                                }
                             }
-                            if (k != n)
-                                continue;
+                            jl_array_del_end(backedges, nb - insb);
                         }
                         jl_array_ptr_1d_push(oldmi, (jl_value_t*)mi);
                         invalidate_external(mi, max_world);
-                        if (mi->backedges) {
-                            invalidated = 1;
-                            invalidate_backedges(&do_nothing_with_codeinst, mi, max_world, "jl_method_table_insert");
+                        if (_jl_debug_method_invalidation && invalidated) {
+                            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);
+                            loctag = jl_cstr_to_string("jl_method_table_insert");
+                            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
                         }
                     }
                 }
             }
-            if (jl_array_len(oldmi)) {
+            if (jl_array_nrows(oldmi)) {
                 // search mt->cache and leafcache and drop anything that might overlap with the new method
-                // TODO: keep track of just the `mi` for which shadowing was true (to avoid recomputing that here)
+                // this is very cheap, so we don't mind being fairly conservative at over-approximating this
                 struct invalidate_mt_env mt_cache_env;
                 mt_cache_env.max_world = max_world;
                 mt_cache_env.shadowed = oldmi;
@@ -1851,10 +2177,10 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                 mt_cache_env.invalidated = 0;
 
                 jl_typemap_visitor(jl_atomic_load_relaxed(&mt->cache), invalidate_mt_cache, (void*)&mt_cache_env);
-                jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
-                size_t i, l = jl_array_len(leafcache);
+                jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+                size_t i, l = leafcache->length;
                 for (i = 1; i < l; i += 2) {
-                    jl_value_t *entry = jl_array_ptr_ref(leafcache, i);
+                    jl_value_t *entry = jl_genericmemory_ptr_ref(leafcache, i);
                     if (entry) {
                         while (entry != jl_nothing) {
                             invalidate_mt_cache((jl_typemap_entry_t*)entry, (void*)&mt_cache_env);
@@ -1864,13 +2190,13 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                 }
             }
         }
-        if (invalidated && _jl_debug_method_invalidation) {
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)method);
-            loctag = jl_cstr_to_string("jl_method_table_insert");
-            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
-        }
-        update_max_args(mt, type);
     }
+    if (invalidated && _jl_debug_method_invalidation) {
+        jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)method);
+        loctag = jl_cstr_to_string("jl_method_table_insert");
+        jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+    }
+    update_max_args(mt, type);
     JL_UNLOCK(&mt->writelock);
     JL_GC_POP();
 }
@@ -1894,7 +2220,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args,
         jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n");
         jl_ptls_t ptls = jl_current_task->ptls;
         ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
-        jl_critical_error(0, NULL, jl_current_task);
+        jl_critical_error(0, 0, NULL, jl_current_task);
         abort();
     }
     // not reached
@@ -1918,7 +2244,19 @@ static jl_tupletype_t *lookup_arg_type_tuple(jl_value_t *arg1 JL_PROPAGATES_ROOT
     return jl_lookup_arg_tuple_type(arg1, args, nargs, 1);
 }
 
-jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world)
+JL_DLLEXPORT jl_method_instance_t *jl_method_lookup_by_tt(jl_tupletype_t *tt, size_t world, jl_value_t *_mt)
+{
+    jl_methtable_t *mt = NULL;
+    if (_mt == jl_nothing)
+        mt = jl_gf_ft_mtable(jl_tparam0(tt));
+    else {
+        assert(jl_isa(_mt, (jl_value_t*)jl_methtable_type));
+        mt = (jl_methtable_t*) _mt;
+    }
+    return jl_mt_assoc_by_type(mt, tt, world);
+}
+
+JL_DLLEXPORT jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world)
 {
     assert(nargs > 0 && "expected caller to handle this case");
     jl_methtable_t *mt = jl_gf_mtable(args[0]);
@@ -1927,16 +2265,7 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w
     if (entry)
         return entry->func.linfo;
     jl_tupletype_t *tt = arg_type_tuple(args[0], &args[1], nargs);
-    jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
-    entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
-    if (entry)
-        return entry->func.linfo;
-    JL_GC_PUSH1(&tt);
-    JL_LOCK(&mt->writelock);
-    jl_method_instance_t *sf = jl_mt_assoc_by_type(mt, tt, world);
-    JL_UNLOCK(&mt->writelock);
-    JL_GC_POP();
-    return sf;
+    return jl_mt_assoc_by_type(mt, tt, world);
 }
 
 // return a Vector{Any} of svecs, each describing a method match:
@@ -1947,15 +2276,16 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w
 //
 // lim is the max # of methods to return. if there are more, returns jl_false.
 // Negative values stand for no limit.
-// Unless lim == -1, remove matches that are unambiguously covered by earler ones
+// Unless lim == -1, remove matches that are unambiguously covered by earlier ones
 JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
                                              size_t world, size_t *min_valid, size_t *max_valid, int *ambig)
 {
-    JL_TIMING(METHOD_MATCH);
     if (ambig != NULL)
         *ambig = 0;
     jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types);
-    if (jl_is_tuple_type(unw) && jl_tparam0(unw) == jl_bottom_type)
+    if (!jl_is_tuple_type(unw))
+        return (jl_value_t*)jl_an_empty_vec_any;
+    if (unw == (jl_value_t*)jl_emptytuple_type || jl_tparam0(unw) == jl_bottom_type)
         return (jl_value_t*)jl_an_empty_vec_any;
     if (mt == jl_nothing)
         mt = (jl_value_t*)jl_method_table_for(unw);
@@ -2010,29 +2340,33 @@ jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi, size_t world)
 }
 
 jl_mutex_t precomp_statement_out_lock;
+ios_t f_precompile;
+JL_STREAM* s_precompile = NULL;
+
+static void init_precompile_output(void)
+{
+    const char *t = jl_options.trace_compile;
+    if (!strncmp(t, "stderr", 6)) {
+        s_precompile = JL_STDERR;
+    }
+    else {
+        if (ios_file(&f_precompile, t, 1, 1, 1, 1) == NULL)
+            jl_errorf("cannot open precompile statement file \"%s\" for writing", t);
+        s_precompile = (JL_STREAM*) &f_precompile;
+    }
+}
 
 static void record_precompile_statement(jl_method_instance_t *mi)
 {
-    static ios_t f_precompile;
-    static JL_STREAM* s_precompile = NULL;
     jl_method_t *def = mi->def.method;
     if (jl_options.trace_compile == NULL)
         return;
     if (!jl_is_method(def))
         return;
 
-    if (jl_n_threads > 1)
-        JL_LOCK(&precomp_statement_out_lock);
+    JL_LOCK(&precomp_statement_out_lock);
     if (s_precompile == NULL) {
-        const char *t = jl_options.trace_compile;
-        if (!strncmp(t, "stderr", 6)) {
-            s_precompile = JL_STDERR;
-        }
-        else {
-            if (ios_file(&f_precompile, t, 1, 1, 1, 1) == NULL)
-                jl_errorf("cannot open precompile statement file \"%s\" for writing", t);
-            s_precompile = (JL_STREAM*) &f_precompile;
-        }
+        init_precompile_output();
     }
     if (!jl_has_free_typevars(mi->specTypes)) {
         jl_printf(s_precompile, "precompile(");
@@ -2041,15 +2375,74 @@ static void record_precompile_statement(jl_method_instance_t *mi)
         if (s_precompile != JL_STDERR)
             ios_flush(&f_precompile);
     }
-    if (jl_n_threads > 1)
-        JL_UNLOCK(&precomp_statement_out_lock);
+    JL_UNLOCK(&precomp_statement_out_lock);
+}
+
+JL_DLLEXPORT void jl_write_precompile_statement(char* statement)
+{
+    if (jl_options.trace_compile == NULL)
+        return;
+    JL_LOCK(&precomp_statement_out_lock);
+    if (s_precompile == NULL) {
+        init_precompile_output();
+    }
+    jl_printf(s_precompile, "%s\n", statement);
+    if (s_precompile != JL_STDERR)
+        ios_flush(&f_precompile);
+    JL_UNLOCK(&precomp_statement_out_lock);
 }
 
+jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT);
+
 jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t world)
 {
+    // quick check if we already have a compiled result
     jl_code_instance_t *codeinst = jl_method_compiled(mi, world);
     if (codeinst)
         return codeinst;
+
+    // if mi has a better (wider) signature preferred for compilation use that
+    // instead and just copy it here for caching
+    jl_method_instance_t *mi2 = jl_normalize_to_compilable_mi(mi);
+    if (mi2 != mi) {
+        jl_code_instance_t *codeinst2 = jl_compile_method_internal(mi2, world);
+        jl_code_instance_t *codeinst = jl_get_method_inferred(
+                mi, codeinst2->rettype,
+                codeinst2->min_world, codeinst2->max_world);
+        if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL) {
+            codeinst->rettype_const = codeinst2->rettype_const;
+            uint8_t specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
+            jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst2->invoke);
+            void *fptr = jl_atomic_load_relaxed(&codeinst2->specptr.fptr);
+            if (fptr != NULL) {
+                while (!(specsigflags & 0b10)) {
+                    jl_cpu_pause();
+                    specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
+                }
+                invoke = jl_atomic_load_relaxed(&codeinst2->invoke);
+                void *prev_fptr = NULL;
+                // see jitlayers.cpp for the ordering restrictions here
+                if (jl_atomic_cmpswap_acqrel(&codeinst->specptr.fptr, &prev_fptr, fptr)) {
+                    jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & 0b1);
+                    jl_atomic_store_release(&codeinst->invoke, invoke);
+                    // unspec is probably not specsig, but might be using specptr
+                    jl_atomic_store_release(&codeinst->specsigflags, specsigflags & ~0b1); // clear specsig flag
+                } else {
+                    // someone else already compiled it
+                    while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                        jl_cpu_pause();
+                    }
+                    // codeinst is now set up fully, safe to return
+                }
+            } else {
+                jl_callptr_t prev = NULL;
+                jl_atomic_cmpswap_acqrel(&codeinst->invoke, &prev, invoke);
+            }
+        }
+        // don't call record_precompile_statement here, since we already compiled it as mi2 which is better
+        return codeinst;
+    }
+
     int compile_option = jl_options.compile_enabled;
     jl_method_t *def = mi->def.method;
     // disabling compilation per-module can override global setting
@@ -2069,14 +2462,22 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
             jl_method_instance_t *unspecmi = jl_atomic_load_relaxed(&def->unspecialized);
             if (unspecmi) {
                 jl_code_instance_t *unspec = jl_atomic_load_relaxed(&unspecmi->cache);
-                if (unspec && jl_atomic_load_acquire(&unspec->invoke)) {
+                jl_callptr_t unspec_invoke = NULL;
+                if (unspec && (unspec_invoke = jl_atomic_load_acquire(&unspec->invoke))) {
                     jl_code_instance_t *codeinst = jl_new_codeinst(mi,
-                        (jl_value_t*)jl_any_type, NULL, NULL,
+                        (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
                         0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-                    codeinst->isspecsig = 0;
-                    codeinst->specptr = unspec->specptr;
+                    void *unspec_fptr = jl_atomic_load_relaxed(&unspec->specptr.fptr);
+                    if (unspec_fptr) {
+                        // wait until invoke and specsigflags are properly set
+                        while (!(jl_atomic_load_acquire(&unspec->specsigflags) & 0b10)) {
+                            jl_cpu_pause();
+                        }
+                        unspec_invoke = jl_atomic_load_relaxed(&unspec->invoke);
+                    }
+                    jl_atomic_store_release(&codeinst->specptr.fptr, unspec_fptr);
                     codeinst->rettype_const = unspec->rettype_const;
-                    jl_atomic_store_relaxed(&codeinst->invoke, jl_atomic_load_relaxed(&unspec->invoke));
+                    jl_atomic_store_release(&codeinst->invoke, unspec_invoke);
                     jl_mi_cache_insert(mi, codeinst);
                     record_precompile_statement(mi);
                     return codeinst;
@@ -2084,21 +2485,22 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
             }
         }
     }
+
     // if that didn't work and compilation is off, try running in the interpreter
     if (compile_option == JL_OPTIONS_COMPILE_OFF ||
         compile_option == JL_OPTIONS_COMPILE_MIN) {
-        jl_code_info_t *src = jl_code_for_interpreter(mi);
-        if (!jl_code_requires_compiler(src)) {
+        jl_code_info_t *src = jl_code_for_interpreter(mi, world);
+        if (!jl_code_requires_compiler(src, 0)) {
             jl_code_instance_t *codeinst = jl_new_codeinst(mi,
-                (jl_value_t*)jl_any_type, NULL, NULL,
+                (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
                 0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-            jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_interpret_call);
+            jl_atomic_store_release(&codeinst->invoke, jl_fptr_interpret_call);
             jl_mi_cache_insert(mi, codeinst);
             record_precompile_statement(mi);
             return codeinst;
         }
         if (compile_option == JL_OPTIONS_COMPILE_OFF) {
-            jl_printf(JL_STDERR, "code missing for ");
+            jl_printf(JL_STDERR, "No compiled code available for ");
             jl_static_show(JL_STDERR, (jl_value_t*)mi);
             jl_printf(JL_STDERR, " : sysimg may not have been built with --compile=all\n");
         }
@@ -2109,28 +2511,36 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         jl_method_instance_t *unspec = jl_get_unspecialized_from_mi(mi);
         jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0);
         // ask codegen to make the fptr for unspec
-        if (jl_atomic_load_acquire(&ucache->invoke) == NULL) {
-            if (def->source == jl_nothing && (ucache->def->uninferred == jl_nothing ||
-                                              ucache->def->uninferred == NULL)) {
-                jl_printf(JL_STDERR, "source not available for ");
-                jl_static_show(JL_STDERR, (jl_value_t*)mi);
-                jl_printf(JL_STDERR, "\n");
-                jl_error("source missing for method that needs to be compiled");
+        jl_callptr_t ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
+        if (ucache_invoke == NULL) {
+            if (def->source == jl_nothing && (jl_atomic_load_relaxed(&ucache->def->uninferred) == jl_nothing ||
+                                              jl_atomic_load_relaxed(&ucache->def->uninferred) == NULL)) {
+                jl_throw(jl_new_struct(jl_missingcodeerror_type, (jl_value_t*)mi));
             }
             jl_generate_fptr_for_unspecialized(ucache);
+            ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
         }
-        assert(jl_atomic_load_relaxed(&ucache->invoke) != NULL);
-        if (jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_sparam &&
-            jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_interpret_call) {
+        assert(ucache_invoke != NULL);
+        if (ucache_invoke != jl_fptr_sparam &&
+            ucache_invoke != jl_fptr_interpret_call) {
             // only these care about the exact specTypes, otherwise we can use it directly
             return ucache;
         }
-        codeinst = jl_new_codeinst(mi, (jl_value_t*)jl_any_type, NULL, NULL,
+        codeinst = jl_new_codeinst(mi, (jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
             0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-        codeinst->isspecsig = 0;
-        codeinst->specptr = ucache->specptr;
+        void *unspec_fptr = jl_atomic_load_relaxed(&ucache->specptr.fptr);
+        if (unspec_fptr) {
+            // wait until invoke and specsigflags are properly set
+            while (!(jl_atomic_load_acquire(&ucache->specsigflags) & 0b10)) {
+                jl_cpu_pause();
+            }
+            ucache_invoke = jl_atomic_load_relaxed(&ucache->invoke);
+        }
+        // unspec is always not specsig, but might use specptr
+        jl_atomic_store_relaxed(&codeinst->specsigflags, jl_atomic_load_relaxed(&ucache->specsigflags) & 0b10);
+        jl_atomic_store_relaxed(&codeinst->specptr.fptr, unspec_fptr);
         codeinst->rettype_const = ucache->rettype_const;
-        jl_atomic_store_relaxed(&codeinst->invoke, jl_atomic_load_relaxed(&ucache->invoke));
+        jl_atomic_store_release(&codeinst->invoke, ucache_invoke);
         jl_mi_cache_insert(mi, codeinst);
     }
     else {
@@ -2140,7 +2550,6 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
     return codeinst;
 }
 
-
 jl_value_t *jl_fptr_const_return(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
     return m->rettype_const;
@@ -2148,29 +2557,27 @@ jl_value_t *jl_fptr_const_return(jl_value_t *f, jl_value_t **args, uint32_t narg
 
 jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
-    while (1) {
-        jl_fptr_args_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr1);
-        if (invoke)
-            return invoke(f, args, nargs);
-    }
+    jl_fptr_args_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr1);
+    assert(invoke && "Forgot to set specptr for jl_fptr_args!");
+    return invoke(f, args, nargs);
 }
 
 jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
     jl_svec_t *sparams = m->def->sparam_vals;
     assert(sparams != jl_emptysvec);
-    while (1) {
-        jl_fptr_sparam_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr3);
-        if (invoke)
-            return invoke(f, args, nargs, sparams);
-    }
+    jl_fptr_sparam_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr3);
+    assert(invoke && "Forgot to set specptr for jl_fptr_sparam!");
+    return invoke(f, args, nargs, sparams);
 }
 
-JL_DLLEXPORT jl_callptr_t jl_fptr_args_addr = &jl_fptr_args;
+JL_DLLEXPORT const jl_callptr_t jl_fptr_args_addr = &jl_fptr_args;
 
-JL_DLLEXPORT jl_callptr_t jl_fptr_const_return_addr = &jl_fptr_const_return;
+JL_DLLEXPORT const jl_callptr_t jl_fptr_const_return_addr = &jl_fptr_const_return;
 
-JL_DLLEXPORT jl_callptr_t jl_fptr_sparam_addr = &jl_fptr_sparam;
+JL_DLLEXPORT const jl_callptr_t jl_fptr_sparam_addr = &jl_fptr_sparam;
+
+JL_DLLEXPORT const jl_callptr_t jl_f_opaque_closure_call_addr = (jl_callptr_t)&jl_f_opaque_closure_call;
 
 // Return the index of the invoke api, if known
 JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *codeinst)
@@ -2189,18 +2596,89 @@ JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *codeinst)
     return -1;
 }
 
-JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_tupletype_t *ti, jl_svec_t *env, jl_method_t *m)
+JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_tupletype_t *ti, jl_svec_t *env, jl_method_t *m,
+                                                        int return_if_compileable)
 {
     jl_tupletype_t *tt = NULL;
     jl_svec_t *newparams = NULL;
     JL_GC_PUSH2(&tt, &newparams);
-    intptr_t nspec = (mt == jl_type_type_mt || mt == jl_nonfunction_mt ? m->nargs + 1 : mt->max_args + 2);
-    jl_compilation_sig(ti, env, m, nspec, &newparams);
-    tt = (newparams ? jl_apply_tuple_type(newparams) : ti);
-    int is_compileable = ((jl_datatype_t*)ti)->isdispatchtuple ||
-        jl_isa_compileable_sig(tt, m);
+    jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(m->sig) : mt;
+    intptr_t max_varargs = get_max_varargs(m, kwmt, mt, NULL);
+    jl_compilation_sig(ti, env, m, max_varargs, &newparams);
+    int is_compileable = ((jl_datatype_t*)ti)->isdispatchtuple;
+    if (newparams) {
+        tt = (jl_datatype_t*)jl_apply_tuple_type(newparams, 1);
+        if (!is_compileable) {
+            // compute new env, if used below
+            jl_value_t *ti = jl_type_intersection_env((jl_value_t*)tt, (jl_value_t*)m->sig, &newparams);
+            assert(ti != jl_bottom_type); (void)ti;
+            env = newparams;
+        }
+    }
+    else {
+        tt = ti;
+    }
+    if (!is_compileable)
+        is_compileable = jl_isa_compileable_sig(tt, env, m);
     JL_GC_POP();
-    return is_compileable ? (jl_value_t*)tt : jl_nothing;
+    return (!return_if_compileable || is_compileable) ? (jl_value_t*)tt : jl_nothing;
+}
+
+jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT)
+{
+    jl_method_t *def = mi->def.method;
+    if (!jl_is_method(def) || !jl_is_datatype(mi->specTypes))
+        return mi;
+    jl_methtable_t *mt = jl_method_get_table(def);
+    if ((jl_value_t*)mt == jl_nothing)
+        return mi;
+    jl_value_t *compilationsig = jl_normalize_to_compilable_sig(mt, (jl_datatype_t*)mi->specTypes, mi->sparam_vals, def, 1);
+    if (compilationsig == jl_nothing || jl_egal(compilationsig, mi->specTypes))
+        return mi;
+    jl_svec_t *env = NULL;
+    JL_GC_PUSH2(&compilationsig, &env);
+    jl_value_t *ti = jl_type_intersection_env((jl_value_t*)compilationsig, (jl_value_t*)def->sig, &env);
+    assert(ti != jl_bottom_type); (void)ti;
+    mi = jl_specializations_get_linfo(def, (jl_value_t*)compilationsig, env);
+    JL_GC_POP();
+    return mi;
+}
+
+// return a MethodInstance for a compileable method_match
+jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache)
+{
+    jl_method_t *m = match->method;
+    jl_svec_t *env = match->sparams;
+    jl_tupletype_t *ti = match->spec_types;
+    jl_method_instance_t *mi = NULL;
+    if (jl_is_datatype(ti)) {
+        jl_methtable_t *mt = jl_method_get_table(m);
+        if ((jl_value_t*)mt != jl_nothing) {
+            // get the specialization, possibly also caching it
+            if (mt_cache && ((jl_datatype_t*)ti)->isdispatchtuple) {
+                // Since we also use this presence in the cache
+                // to trigger compilation when producing `.ji` files,
+                // inject it there now if we think it will be
+                // used via dispatch later (e.g. because it was hinted via a call to `precompile`)
+                JL_LOCK(&mt->writelock);
+                mi = cache_method(mt, &mt->cache, (jl_value_t*)mt, ti, m, world, min_valid, max_valid, env);
+                JL_UNLOCK(&mt->writelock);
+            }
+            else {
+                jl_value_t *tt = jl_normalize_to_compilable_sig(mt, ti, env, m, 1);
+                if (tt != jl_nothing) {
+                    JL_GC_PUSH2(&tt, &env);
+                    if (!jl_egal(tt, (jl_value_t*)ti)) {
+                        jl_value_t *ti = jl_type_intersection_env((jl_value_t*)tt, (jl_value_t*)m->sig, &env);
+                        assert(ti != jl_bottom_type); (void)ti;
+                    }
+                    mi = jl_specializations_get_linfo(m, (jl_value_t*)tt, env);
+                    JL_GC_POP();
+                }
+            }
+        }
+    }
+    return mi;
 }
 
 // compile-time method lookup
@@ -2220,38 +2698,80 @@ jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types JL_PROPAGATES
         *min_valid = min_valid2;
     if (*max_valid > max_valid2)
         *max_valid = max_valid2;
-    if (matches == jl_false || jl_array_len(matches) != 1 || ambig)
+    if (matches == jl_nothing || jl_array_nrows(matches) != 1 || ambig)
         return NULL;
-    jl_value_t *tt = NULL;
-    JL_GC_PUSH2(&matches, &tt);
+    JL_GC_PUSH1(&matches);
     jl_method_match_t *match = (jl_method_match_t*)jl_array_ptr_ref(matches, 0);
-    jl_method_t *m = match->method;
-    jl_svec_t *env = match->sparams;
-    jl_tupletype_t *ti = match->spec_types;
-    jl_method_instance_t *nf = NULL;
-    if (jl_is_datatype(ti)) {
-        jl_methtable_t *mt = jl_method_table_for((jl_value_t*)ti);
-        if ((jl_value_t*)mt != jl_nothing) {
-            // get the specialization without caching it
-            if (mt_cache && ((jl_datatype_t*)ti)->isdispatchtuple) {
-                // Since we also use this presence in the cache
-                // to trigger compilation when producing `.ji` files,
-                // inject it there now if we think it will be
-                // used via dispatch later (e.g. because it was hinted via a call to `precompile`)
-                JL_LOCK(&mt->writelock);
-                nf = cache_method(mt, &mt->cache, (jl_value_t*)mt, ti, m, world, min_valid2, max_valid2, env);
-                JL_UNLOCK(&mt->writelock);
-            }
-            else {
-                tt = jl_normalize_to_compilable_sig(mt, ti, env, m);
-                if (tt != jl_nothing) {
-                    nf = jl_specializations_get_linfo(m, (jl_value_t*)tt, env);
+    jl_method_instance_t *mi = jl_method_match_to_mi(match, world, min_valid2, max_valid2, mt_cache);
+    JL_GC_POP();
+    return mi;
+}
+
+// Get a MethodInstance for a precompile() call. This uses a special kind of lookup that
+// tries to find a method for which the requested signature is compileable.
+static jl_method_instance_t *jl_get_compile_hint_specialization(jl_tupletype_t *types JL_PROPAGATES_ROOT, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache)
+{
+    if (jl_has_free_typevars((jl_value_t*)types))
+        return NULL; // don't poison the cache due to a malformed query
+    if (!jl_has_concrete_subtype((jl_value_t*)types))
+        return NULL;
+
+    size_t min_valid2 = 1;
+    size_t max_valid2 = ~(size_t)0;
+    int ambig = 0;
+    jl_value_t *matches = jl_matching_methods(types, jl_nothing, -1, 0, world, &min_valid2, &max_valid2, &ambig);
+    if (*min_valid < min_valid2)
+        *min_valid = min_valid2;
+    if (*max_valid > max_valid2)
+        *max_valid = max_valid2;
+    size_t i, n = jl_array_nrows(matches);
+    if (n == 0)
+        return NULL;
+    JL_GC_PUSH1(&matches);
+    jl_method_match_t *match = NULL;
+    if (n == 1) {
+        match = (jl_method_match_t*)jl_array_ptr_ref(matches, 0);
+    }
+    else if (jl_is_datatype(types)) {
+        // first, select methods for which `types` is compileable
+        size_t count = 0;
+        for (i = 0; i < n; i++) {
+            jl_method_match_t *match1 = (jl_method_match_t*)jl_array_ptr_ref(matches, i);
+            if (jl_isa_compileable_sig(types, match1->sparams, match1->method))
+                jl_array_ptr_set(matches, count++, (jl_value_t*)match1);
+        }
+        jl_array_del_end((jl_array_t*)matches, n - count);
+        n = count;
+        // now remove methods that are more specific than others in the list.
+        // this is because the intent of precompiling e.g. f(::DataType) is to
+        // compile that exact method if it exists, and not lots of f(::Type{X}) methods
+        int exclude;
+        count = 0;
+        for (i = 0; i < n; i++) {
+            jl_method_match_t *match1 = (jl_method_match_t*)jl_array_ptr_ref(matches, i);
+            exclude = 0;
+            for (size_t j = n-1; j > i; j--) {  // more general methods maybe more likely to be at end
+                jl_method_match_t *match2 = (jl_method_match_t*)jl_array_ptr_ref(matches, j);
+                if (jl_type_morespecific(match1->method->sig, match2->method->sig)) {
+                    exclude = 1;
+                    break;
                 }
             }
+            if (!exclude)
+                jl_array_ptr_set(matches, count++, (jl_value_t*)match1);
+            if (count > 1)
+                break;
         }
+        // at this point if there are 0 matches left we found nothing, or if there are
+        // more than one the request is ambiguous and we ignore it.
+        if (count == 1)
+            match = (jl_method_match_t*)jl_array_ptr_ref(matches, 0);
     }
+    jl_method_instance_t *mi = NULL;
+    if (match != NULL)
+        mi = jl_method_match_to_mi(match, world, min_valid2, max_valid2, mt_cache);
     JL_GC_POP();
-    return nf;
+    return mi;
 }
 
 static void _generate_from_hint(jl_method_instance_t *mi, size_t world)
@@ -2279,17 +2799,10 @@ static void jl_compile_now(jl_method_instance_t *mi)
     }
 }
 
-JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
+JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world)
 {
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     size_t tworld = jl_typeinf_world;
-    size_t min_valid = 0;
-    size_t max_valid = ~(size_t)0;
-    jl_method_instance_t *mi = jl_get_specialization1(types, world, &min_valid, &max_valid, 1);
-    if (mi == NULL)
-        return 0;
-    JL_GC_PROMISE_ROOTED(mi);
-    mi->precompiled = 1;
+    jl_atomic_store_relaxed(&mi->precompiled, 1);
     if (jl_generating_output()) {
         jl_compile_now(mi);
         // In addition to full compilation of the compilation-signature, if `types` is more specific (e.g. due to nospecialize),
@@ -2297,19 +2810,19 @@ JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
         // additional useful methods that should be compiled
         //ALT: if (jl_is_datatype(types) && ((jl_datatype_t*)types)->isdispatchtuple && !jl_egal(mi->specTypes, types))
         //ALT: if (jl_subtype(types, mi->specTypes))
-        if (!jl_subtype(mi->specTypes, (jl_value_t*)types)) {
+        if (types && !jl_subtype(mi->specTypes, (jl_value_t*)types)) {
             jl_svec_t *tpenv2 = jl_emptysvec;
             jl_value_t *types2 = NULL;
             JL_GC_PUSH2(&tpenv2, &types2);
             types2 = jl_type_intersection_env((jl_value_t*)types, (jl_value_t*)mi->def.method->sig, &tpenv2);
-            jl_method_instance_t *li2 = jl_specializations_get_linfo(mi->def.method, (jl_value_t*)types2, tpenv2);
+            jl_method_instance_t *mi2 = jl_specializations_get_linfo(mi->def.method, (jl_value_t*)types2, tpenv2);
             JL_GC_POP();
-            li2->precompiled = 1;
-            if (jl_rettype_inferred(li2, world, world) == jl_nothing)
-                (void)jl_type_infer(li2, world, 1);
+            jl_atomic_store_relaxed(&mi2->precompiled, 1);
+            if (jl_rettype_inferred(mi2, world, world) == jl_nothing)
+                (void)jl_type_infer(mi2, world, 1);
             if (jl_typeinf_func && mi->def.method->primary_world <= tworld) {
-                if (jl_rettype_inferred(li2, tworld, tworld) == jl_nothing)
-                    (void)jl_type_infer(li2, tworld, 1);
+                if (jl_rettype_inferred(mi2, tworld, tworld) == jl_nothing)
+                    (void)jl_type_infer(mi2, tworld, 1);
             }
         }
     }
@@ -2318,25 +2831,39 @@ JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
         // we should generate the native code immediately in preparation for use.
         (void)jl_compile_method_internal(mi, world);
     }
+}
+
+JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
+{
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    size_t min_valid = 0;
+    size_t max_valid = ~(size_t)0;
+    jl_method_instance_t *mi = jl_get_compile_hint_specialization(types, world, &min_valid, &max_valid, 1);
+    if (mi == NULL)
+        return 0;
+    JL_GC_PROMISE_ROOTED(mi);
+    jl_compile_method_instance(mi, types, world);
     return 1;
 }
 
 // add type of `f` to front of argument tuple type
-static jl_value_t *jl_argtype_with_function(jl_function_t *f, jl_value_t *types0)
+jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0)
+{
+    return jl_argtype_with_function_type(jl_is_type(f) ? (jl_value_t*)jl_wrap_Type(f) : jl_typeof(f), types0);
+}
+
+jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_value_t *types0)
 {
     jl_value_t *types = jl_unwrap_unionall(types0);
     size_t l = jl_nparams(types);
-    jl_value_t *tt = (jl_value_t*)jl_alloc_svec(1+l);
-    size_t i;
-    JL_GC_PUSH1(&tt);
-    if (jl_is_type(f))
-        jl_svecset(tt, 0, jl_wrap_Type(f));
-    else
-        jl_svecset(tt, 0, jl_typeof(f));
-    for(i=0; i < l; i++)
+    jl_value_t *tt = NULL;
+    JL_GC_PUSH2(&tt, &ft);
+    tt = (jl_value_t*)jl_alloc_svec(1+l);
+    jl_svecset(tt, 0, ft);
+    for (size_t i = 0; i < l; i++)
         jl_svecset(tt, i+1, jl_tparam(types,i));
-    tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt);
-    tt = jl_rewrap_unionall(tt, types0);
+    tt = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)tt, 1);
+    tt = jl_rewrap_unionall_(tt, types0);
     JL_GC_POP();
     return tt;
 }
@@ -2369,7 +2896,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mfunc->cache);
     while (codeinst) {
         if (codeinst->min_world <= world && world <= codeinst->max_world) {
-            jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+            jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
             if (invoke != NULL) {
                 jl_value_t *res = invoke(F, args, nargs, codeinst);
                 return verify_type(res);
@@ -2389,7 +2916,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
     errno = last_errno;
     if (jl_options.malloc_log)
         jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
-    jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+    jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
     jl_value_t *res = invoke(F, args, nargs, codeinst);
     return verify_type(res);
 }
@@ -2496,15 +3023,15 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
 #undef LOOP_BODY
     i = 4;
     jl_tupletype_t *tt = NULL;
-    int64_t last_alloc;
+    int64_t last_alloc = 0;
     if (i == 4) {
         // if no method was found in the associative cache, check the full cache
-        JL_TIMING(METHOD_LOOKUP_FAST);
+        JL_TIMING(METHOD_LOOKUP_FAST, METHOD_LOOKUP_FAST);
         mt = jl_gf_mtable(F);
-        jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+        jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
         entry = NULL;
-        if (leafcache != (jl_array_t*)jl_an_empty_vec_any &&
-                jl_typeis(jl_atomic_load_relaxed(&mt->cache), jl_typemap_level_type)) {
+        if (leafcache != (jl_genericmemory_t*)jl_an_empty_memory_any &&
+                jl_typetagis(jl_atomic_load_relaxed(&mt->cache), jl_typemap_level_type)) {
             // hashing args is expensive, but looking at mt->cache is probably even more expensive
             tt = lookup_arg_type_tuple(F, args, nargs);
             if (tt != NULL)
@@ -2538,14 +3065,9 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
         mfunc = entry->func.linfo;
     }
     else {
-        JL_GC_PUSH1(&tt);
         assert(tt);
-        JL_LOCK(&mt->writelock);
         // cache miss case
-        JL_TIMING(METHOD_LOOKUP_SLOW);
         mfunc = jl_mt_assoc_by_type(mt, tt, world);
-        JL_UNLOCK(&mt->writelock);
-        JL_GC_POP();
         if (jl_options.malloc_log)
             jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
         if (mfunc == NULL) {
@@ -2578,14 +3100,16 @@ JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint
 static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, jl_value_t *mt, size_t world, size_t *min_valid, size_t *max_valid)
 {
     jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types);
-    if (jl_is_tuple_type(unw) && jl_tparam0(unw) == jl_bottom_type)
+    if (!jl_is_tuple_type(unw))
+        return NULL;
+    if (jl_tparam0(unw) == jl_bottom_type)
         return NULL;
     if (mt == jl_nothing)
         mt = (jl_value_t*)jl_method_table_for(unw);
     if (mt == jl_nothing)
         mt = NULL;
     jl_value_t *matches = ml_matches((jl_methtable_t*)mt, (jl_tupletype_t*)types, 1, 0, 0, world, 1, min_valid, max_valid, NULL);
-    if (matches == jl_false || jl_array_len(matches) != 1)
+    if (matches == jl_nothing || jl_array_nrows(matches) != 1)
         return NULL;
     jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(matches, 0);
     return matc;
@@ -2625,7 +3149,7 @@ jl_value_t *jl_gf_invoke(jl_value_t *types0, jl_value_t *gf, jl_value_t **args,
     size_t world = jl_current_task->world_age;
     jl_value_t *types = NULL;
     JL_GC_PUSH1(&types);
-    types = jl_argtype_with_function(gf, types0);
+    types = jl_argtype_with_function((jl_value_t*)gf, types0);
     jl_method_t *method = (jl_method_t*)jl_gf_invoke_lookup(types, jl_nothing, world);
     JL_GC_PROMISE_ROOTED(method);
 
@@ -2700,44 +3224,12 @@ jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_
     jl_gc_wb(ftype->name->mt, name);
     jl_set_const(module, tname, (jl_value_t*)ftype);
     jl_value_t *f = jl_new_struct(ftype);
-    ftype->instance = f; jl_gc_wb(ftype, f);
+    ftype->instance = f;
+    jl_gc_wb(ftype, f);
     JL_GC_POP();
     return (jl_function_t*)f;
 }
 
-JL_DLLEXPORT jl_function_t *jl_get_kwsorter(jl_value_t *ty)
-{
-    jl_methtable_t *mt = jl_argument_method_table(ty);
-    if ((jl_value_t*)mt == jl_nothing)
-        jl_error("cannot get keyword sorter for abstract type");
-    if (!mt->kwsorter) {
-        JL_LOCK(&mt->writelock);
-        if (!mt->kwsorter) {
-            char *name;
-            if (mt == jl_nonfunction_mt) {
-                name = jl_symbol_name(mt->name);
-            }
-            else {
-                jl_datatype_t *dt = (jl_datatype_t*)jl_argument_datatype(ty);
-                assert(jl_is_datatype(dt));
-                name = jl_symbol_name(dt->name->name);
-                if (name[0] == '#')
-                    name++;
-            }
-            size_t l = strlen(name);
-            char *suffixed = (char*)malloc_s(l+5);
-            strcpy(&suffixed[0], name);
-            strcpy(&suffixed[l], "##kw");
-            jl_sym_t *fname = jl_symbol(suffixed);
-            free(suffixed);
-            mt->kwsorter = jl_new_generic_function_with_supertype(fname, mt->module, jl_function_type);
-            jl_gc_wb(mt, mt->kwsorter);
-        }
-        JL_UNLOCK(&mt->writelock);
-    }
-    return mt->kwsorter;
-}
-
 jl_function_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module)
 {
     return jl_new_generic_function_with_supertype(name, module, jl_function_type);
@@ -2749,11 +3241,9 @@ struct ml_matches_env {
     int intersections;
     size_t world;
     int lim;
+    int include_ambiguous;
     // results:
     jl_value_t *t; // array of method matches
-    size_t min_valid;
-    size_t max_valid;
-    // temporary:
     jl_method_match_t *matc; // current working method match
 };
 
@@ -2781,22 +3271,22 @@ static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersectio
         return 1;
     if (closure->world < ml->min_world) {
         // ignore method table entries that are part of a later world
-        if (closure->max_valid >= ml->min_world)
-            closure->max_valid = ml->min_world - 1;
+        if (closure->match.max_valid >= ml->min_world)
+            closure->match.max_valid = ml->min_world - 1;
         return 1;
     }
     else if (closure->world > ml->max_world) {
         // ignore method table entries that have been replaced in the current world
-        if (closure->min_valid <= ml->max_world)
-            closure->min_valid = ml->max_world + 1;
+        if (closure->match.min_valid <= ml->max_world)
+            closure->match.min_valid = ml->max_world + 1;
         return 1;
     }
     else {
-        // intersect the env valid range with method's valid range
-        if (closure->min_valid < ml->min_world)
-            closure->min_valid = ml->min_world;
-        if (closure->max_valid > ml->max_world)
-            closure->max_valid = ml->max_world;
+        // intersect the env valid range with method's inclusive valid range
+        if (closure->match.min_valid < ml->min_world)
+            closure->match.min_valid = ml->min_world;
+        if (closure->match.max_valid > ml->max_world)
+            closure->match.max_valid = ml->max_world;
     }
     jl_method_t *meth = ml->func.method;
     if (closure->lim >= 0 && jl_is_dispatch_tupletype(meth->sig)) {
@@ -2804,10 +3294,13 @@ static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersectio
             return 0;
         closure->lim--;
     }
+    // don't need to consider other similar methods if this ml will always fully intersect with them and dominates all of them
+    if (!closure->include_ambiguous || closure->lim != -1)
+        typemap_slurp_search(ml, &closure->match);
     closure->matc = make_method_match((jl_tupletype_t*)closure->match.ti,
         closure->match.env, meth,
         closure->match.issubty ? FULLY_COVERS : NOT_FULLY_COVERS);
-    size_t len = jl_array_len(closure->t);
+    size_t len = jl_array_nrows(closure->t);
     if (len == 0) {
         closure->t = (jl_value_t*)jl_alloc_vec_any(1);
         jl_array_ptr_set(closure->t, 0, (jl_value_t*)closure->matc);
@@ -2818,11 +3311,285 @@ static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersectio
     return 1;
 }
 
-static int ml_mtable_visitor(jl_methtable_t *mt, void *env)
-{
-    return jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), 0, (struct typemap_intersection_env*)env);
+static int ml_mtable_visitor(jl_methtable_t *mt, void *closure0)
+{
+    struct typemap_intersection_env* env = (struct typemap_intersection_env*)closure0;
+    return jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), jl_cachearg_offset(mt), env);
+}
+
+
+// Visit the candidate methods, starting from t[idx], to determine a possible valid sort ordering,
+// where every morespecific method appears before any method which it has a common
+// intersection with but is not partly ambiguous with (ambiguity is transitive, particularly
+// if lim==-1, although morespecific is not transitive).
+// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
+// Inputs:
+//  * `t`: the array of vertexes (method matches)
+//  * `idx`: the next vertex to add to the output
+//  * `visited`: the state of the algorithm for each vertex in `t`: either 1 if we visited it already or 1+depth if we are visiting it now
+//  * `stack`: the state of the algorithm for the current vertex (up to length equal to `t`): the list of all vertexes currently in the depth-first path or in the current SCC
+//  * `result`: the output of the algorithm, a sorted list of vertexes (up to length `lim`)
+//  * `allambig`: a list of all vertexes with an ambiguity (up to length equal to `t`), discovered while running the rest of the algorithm
+//  * `lim`: either -1 for unlimited matches, or the maximum length for `result` before returning failure (return -1).
+//           If specified as -1, this will return extra matches that would have been elided from the list because they were already covered by an earlier match.
+//           This gives a sort of maximal set of matching methods (up to the first minmax method).
+//           If specified as -1, the sorting will also include all "weak" edges (every ambiguous pair) which will create much larger ambiguity cycles,
+//           resulting in a less accurate sort order and much less accurate `*has_ambiguity` result.
+//  * `include_ambiguous`: whether to filter out fully ambiguous matches from `result`
+//  * `*has_ambiguity`: whether the algorithm does not need to compute if there is an unresolved ambiguity
+//  * `*found_minmax`: whether there is a minmax method already found, so future fully_covers matches should be ignored
+// Outputs:
+//  * `*has_ambiguity`: whether the caller should check if there remains an unresolved ambiguity (in `allambig`)
+// Returns:
+//  * -1: too many matches for lim, other outputs are undefined
+//  *  0: the child(ren) have been added to the output
+//  * 1+: the children are part of this SCC (up to this depth)
+// TODO: convert this function into an iterative call, rather than recursive
+static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, arraylist_t *stack, arraylist_t *result, arraylist_t *allambig, int lim, int include_ambiguous, int *has_ambiguity, int *found_minmax)
+{
+    size_t cycle = (size_t)visited->items[idx];
+    if (cycle != 0)
+        return cycle - 1; // depth remaining
+    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, idx);
+    jl_method_t *m = matc->method;
+    jl_value_t *ti = (jl_value_t*)matc->spec_types;
+    int subt = matc->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
+    // first check if this new method is actually already fully covered by an
+    // existing match and we can just ignore this entry quickly
+    size_t result_len = 0;
+    if (subt) {
+        if (*found_minmax == 2)
+            visited->items[idx] = (void*)1;
+    }
+    else if (lim != -1) {
+        for (; result_len < result->len; result_len++) {
+            size_t idx2 = (size_t)result->items[result_len];
+            jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
+            jl_method_t *m2 = matc2->method;
+            if (jl_subtype(ti, m2->sig)) {
+                if (include_ambiguous) {
+                    if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
+                        continue;
+                }
+                visited->items[idx] = (void*)1;
+                break;
+            }
+        }
+    }
+    if ((size_t)visited->items[idx] == 1)
+        return 0;
+    arraylist_push(stack, (void*)idx);
+    size_t depth = stack->len;
+    visited->items[idx] = (void*)(1 + depth);
+    cycle = depth;
+    int addambig = 0;
+    int mayexclude = 0;
+    // First visit all "strong" edges where the child is definitely better.
+    // This likely won't hit any cycles, but might (because morespecific is not transitive).
+    // Along the way, record if we hit any ambiguities-we may need to track those later.
+    for (size_t childidx = 0; childidx < jl_array_nrows(t); childidx++) {
+        if (childidx == idx)
+            continue;
+        int child_cycle = (size_t)visited->items[childidx];
+        if (child_cycle == 1)
+            continue; // already handled
+        if (child_cycle != 0 && child_cycle - 1 >= cycle)
+            continue; // already part of this cycle
+        jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
+        jl_method_t *m2 = matc2->method;
+        int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
+        // TODO: we could change this to jl_has_empty_intersection(ti, (jl_value_t*)matc2->spec_types);
+        // since we only care about sorting of the intersections the user asked us about
+        if (!subt2 && jl_has_empty_intersection(m2->sig, m->sig))
+            continue;
+        int msp = jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig);
+        int msp2 = !msp && jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig);
+        if (!msp) {
+            if (subt || !include_ambiguous || (lim != -1 && msp2)) {
+                if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
+                    // this may be filtered out as fully intersected, if applicable later
+                    mayexclude = 1;
+                }
+            }
+            if (!msp2) {
+                addambig = 1; // record there is a least one previously-undetected ambiguity that may need to be investigated later (between m and m2)
+            }
+        }
+        if (lim == -1 ? msp : !msp2) // include only strong or also weak edges, depending on whether the result size is limited
+            continue;
+        // m2 is (lim!=-1 ? better : not-worse), so attempt to visit it first
+        // if limited, then we want to visit only better edges, because that results in finding k best matches quickest
+        // if not limited, then we want to visit all edges, since that results in finding the largest SCC cycles, which requires doing the fewest intersections
+        child_cycle = sort_mlmatches(t, childidx, visited, stack, result, allambig, lim, include_ambiguous, has_ambiguity, found_minmax);
+        if (child_cycle == -1)
+            return -1;
+        if (child_cycle && child_cycle < cycle) {
+            // record the cycle will resolve at depth "cycle"
+            cycle = child_cycle;
+        }
+        if (stack->len == depth) {
+            // if this child resolved without hitting a cycle, then there is
+            // some probability that this method is already fully covered now
+            // (same check as before), and we can delete this vertex now without
+            // anyone noticing (too much)
+            if (subt) {
+                if (*found_minmax == 2)
+                    visited->items[idx] = (void*)1;
+            }
+            else if (lim != -1) {
+                for (; result_len < result->len; result_len++) {
+                    size_t idx2 = (size_t)result->items[result_len];
+                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
+                    jl_method_t *m2 = matc2->method;
+                    if (jl_subtype(ti, m2->sig)) {
+                        if (include_ambiguous) {
+                            if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
+                                continue;
+                        }
+                        visited->items[idx] = (void*)1;
+                        break;
+                    }
+                }
+            }
+            if ((size_t)visited->items[idx] == 1) {
+                // n.b. cycle might be < depth, if we had a cycle with a child
+                // idx, but since we are on the top of the stack, nobody
+                // observed that and so we are content to ignore this
+                size_t childidx = (size_t)arraylist_pop(stack);
+                assert(childidx == idx); (void)childidx;
+                assert(!subt || *found_minmax == 2);
+                return 0;
+            }
+        }
+    }
+    if (matc->fully_covers == NOT_FULLY_COVERS && addambig)
+        arraylist_push(allambig, (void*)idx);
+    if (cycle != depth)
+        return cycle;
+    result_len = result->len;
+    if (stack->len == depth) {
+        // Found one "best" method to add right now. But we might exclude it if
+        // we determined earlier that we had that option.
+        if (mayexclude) {
+            if (!subt || *found_minmax == 2)
+                visited->items[idx] = (void*)1;
+        }
+    }
+    else {
+        // We have a set of ambiguous methods. Record that.
+        // This is greatly over-approximated for lim==-1
+        *has_ambiguity = 1;
+        // If we followed weak edges above, then this also fully closed the ambiguity cycle
+        if (lim == -1)
+            addambig = 0;
+        // If we're only returning possible matches, now filter out this method
+        // if its intersection is fully ambiguous in this SCC group.
+        // This is a repeat of the "first check", now that we have completed the cycle analysis
+        for (size_t i = depth - 1; i < stack->len; i++) {
+            size_t childidx = (size_t)stack->items[i];
+            jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
+            jl_value_t *ti = (jl_value_t*)matc->spec_types;
+            int subt = matc->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
+            if ((size_t)visited->items[childidx] == 1) {
+                assert(subt);
+                continue;
+            }
+            assert(visited->items[childidx] == (void*)(2 + i));
+            // if we only followed strong edges before above
+            // check also if this set has an unresolved ambiguity missing from it
+            if (lim != -1 && !addambig) {
+                for (size_t j = 0; j < allambig->len; j++) {
+                    if ((size_t)allambig->items[j] == childidx) {
+                        addambig = 1;
+                        break;
+                    }
+                }
+            }
+            // always remove fully_covers matches after the first minmax ambiguity group is handled
+            if (subt) {
+                if (*found_minmax)
+                    visited->items[childidx] = (void*)1;
+                continue;
+            }
+            else if (lim != -1) {
+                // when limited, don't include this match if it was covered by an earlier one
+                for (size_t result_len = 0; result_len < result->len; result_len++) {
+                    size_t idx2 = (size_t)result->items[result_len];
+                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
+                    jl_method_t *m2 = matc2->method;
+                    if (jl_subtype(ti, m2->sig)) {
+                        if (include_ambiguous) {
+                            if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
+                                continue;
+                        }
+                        visited->items[childidx] = (void*)1;
+                        break;
+                    }
+                }
+            }
+        }
+        if (!include_ambiguous && lim == -1) {
+            for (size_t i = depth - 1; i < stack->len; i++) {
+                size_t childidx = (size_t)stack->items[i];
+                if ((size_t)visited->items[childidx] == 1)
+                    continue;
+                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
+                jl_method_t *m = matc->method;
+                jl_value_t *ti = (jl_value_t*)matc->spec_types;
+                for (size_t j = depth - 1; j < stack->len; j++) {
+                    if (i == j)
+                        continue;
+                    size_t idx2 = (size_t)stack->items[j];
+                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2);
+                    jl_method_t *m2 = matc2->method;
+                    int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
+                    // if their intersection contributes to the ambiguity cycle
+                    // and the contribution of m is fully ambiguous with the portion of the cycle from m2
+                    if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
+                        // but they aren't themselves simply ordered (here
+                        // we don't consider that a third method might be
+                        // disrupting that ordering and just consider them
+                        // pairwise to keep this simple).
+                        if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig) &&
+                            !jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig)) {
+                            visited->items[childidx] = (void*)-1;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    // copy this cycle into the results
+    for (size_t i = depth - 1; i < stack->len; i++) {
+        size_t childidx = (size_t)stack->items[i];
+        if ((size_t)visited->items[childidx] == 1)
+            continue;
+        if ((size_t)visited->items[childidx] != -1) {
+            assert(visited->items[childidx] == (void*)(2 + i));
+            visited->items[childidx] = (void*)-1;
+            if (lim == -1 || result->len < lim)
+                arraylist_push(result, (void*)childidx);
+            else
+                return -1;
+        }
+    }
+    // now finally cleanup the stack
+    while (stack->len >= depth) {
+        size_t childidx = (size_t)arraylist_pop(stack);
+        // always remove fully_covers matches after the first minmax ambiguity group is handled
+        //jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx);
+        if (matc->fully_covers != NOT_FULLY_COVERS && !addambig)
+            *found_minmax = 2;
+        if (visited->items[childidx] != (void*)-1)
+            continue;
+        visited->items[childidx] = (void*)1;
+    }
+    return 0;
 }
 
+
+
 // This is the collect form of calling jl_typemap_intersection_visitor
 // with optimizations to skip fully shadowed methods.
 //
@@ -2838,6 +3605,9 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                               int intersections, size_t world, int cache_result,
                               size_t *min_valid, size_t *max_valid, int *ambig)
 {
+    if (world > jl_atomic_load_acquire(&jl_world_counter))
+        return jl_nothing; // the future is not enumerable
+    JL_TIMING(METHOD_MATCH, METHOD_MATCH);
     int has_ambiguity = 0;
     jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)type);
     assert(jl_is_datatype(unw));
@@ -2850,10 +3620,11 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
         else
             va = NULL;
     }
-    struct ml_matches_env env = {{ml_matches_visitor, (jl_value_t*)type, va,
+    struct ml_matches_env env = {{ml_matches_visitor, (jl_value_t*)type, va, /* .search_slurp = */ 0,
+            /* .min_valid = */ *min_valid, /* .max_valid = */ *max_valid,
             /* .ti = */ NULL, /* .env = */ jl_emptysvec, /* .issubty = */ 0},
-        intersections, world, lim, /* .t = */ jl_an_empty_vec_any,
-        /* .min_valid = */ *min_valid, /* .max_valid = */ *max_valid, /* .matc = */ NULL};
+        intersections, world, lim, include_ambiguous, /* .t = */ jl_an_empty_vec_any,
+        /* .matc = */ NULL};
     struct jl_typemap_assoc search = {(jl_value_t*)type, world, jl_emptysvec, 1, ~(size_t)0};
     jl_value_t *isect2 = NULL;
     JL_GC_PUSH6(&env.t, &env.matc, &env.match.env, &search.env, &env.match.ti, &isect2);
@@ -2861,7 +3632,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
     if (mt) {
         // check the leaf cache if this type can be in there
         if (((jl_datatype_t*)unw)->isdispatchtuple) {
-            jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+            jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
             jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)type, world);
             if (entry) {
                 jl_method_instance_t *mi = entry->func.linfo;
@@ -2916,23 +3687,23 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                 return env.t;
             }
         }
-        if (!jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), 0, &env.match)) {
+        if (!ml_mtable_visitor(mt, &env.match)) {
             JL_GC_POP();
-            return jl_false;
+            return jl_nothing;
         }
     }
     else {
         // else: scan everything
         if (!jl_foreach_reachable_mtable(ml_mtable_visitor, &env.match)) {
             JL_GC_POP();
-            return jl_false;
+            return jl_nothing;
         }
     }
-    *min_valid = env.min_valid;
-    *max_valid = env.max_valid;
+    *min_valid = env.match.min_valid;
+    *max_valid = env.match.max_valid;
     // done with many of these values now
     env.match.ti = NULL; env.matc = NULL; env.match.env = NULL; search.env = NULL;
-    size_t i, j, len = jl_array_len(env.t);
+    size_t i, j, len = jl_array_nrows(env.t);
     jl_method_match_t *minmax = NULL;
     int minmax_ambig = 0;
     int all_subtypes = 1;
@@ -3007,7 +3778,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                 }
                 else if (lim == 1) {
                     JL_GC_POP();
-                    return jl_false;
+                    return jl_nothing;
                 }
             }
             else {
@@ -3017,273 +3788,151 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
                 len = 1;
             }
         }
+        if (minmax && lim == 0) {
+            // protect some later algorithms from underflow
+            JL_GC_POP();
+            return jl_nothing;
+        }
     }
     if (len > 1) {
-        // need to partially domsort the graph now into a list
-        // (this is an insertion sort attempt)
-        // if we have a minmax method, we ignore anything less specific
-        // we'll clean that up next
-        for (i = 1; i < len; i++) {
-            env.matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-            jl_method_t *m = env.matc->method;
-            int subt = env.matc->fully_covers != NOT_FULLY_COVERS;
-            if ((minmax != NULL || (minmax_ambig && !include_ambiguous)) && subt) {
-                continue; // already the biggest (skip will filter others)
-            }
-            for (j = 0; j < i; j++) {
-                jl_method_match_t *matc2 = (jl_method_match_t *)jl_array_ptr_ref(env.t, i - j - 1);
-                jl_method_t *m2 = matc2->method;
-                int subt2 = matc2->fully_covers != NOT_FULLY_COVERS;
-                if (!subt2 && subt)
-                    break;
-                if (subt == subt2) {
-                    if (lim != -1) {
-                        if (subt || !jl_has_empty_intersection(m->sig, m2->sig))
-                            if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig))
-                                break;
-                    }
-                    else {
-                        // if unlimited, use approximate sorting, with the only
-                        // main downside being that it may be overly-
-                        // conservative at reporting existence of ambiguities
-                        if (jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
-                            break;
-                    }
-                }
-                jl_array_ptr_set(env.t, i - j, matc2);
-            }
-            jl_array_ptr_set(env.t, i - j, env.matc);
-        }
-        char *skip = (char*)alloca(len);
-        memset(skip, 0, len);
+        arraylist_t stack, visited, result, allambig;
+        arraylist_new(&result, lim != -1 && lim < len ? lim : len);
+        arraylist_new(&stack, 0);
+        arraylist_new(&visited, len);
+        arraylist_new(&allambig, len);
+        arraylist_grow(&visited, len);
+        memset(visited.items, 0, len * sizeof(size_t));
         // if we had a minmax method (any subtypes), now may now be able to
-        // quickly cleanup some of our sort result
-        if (minmax != NULL || (minmax_ambig && !include_ambiguous)) {
-            for (i = 0; i < len; i++) {
-                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                if (minmax != matc && matc->fully_covers != NOT_FULLY_COVERS) {
-                    skip[i] = 1;
-                }
-            }
-        }
-        if (include_ambiguous && lim == -1 && ambig == NULL && !minmax_ambig) {
-            // in this case, we don't actually need to compute the ambiguity
-            // information at all as the user doesn't need us to filter them
-            // out or report them
-        }
-        else {
-            // now that the results are (mostly) sorted, assign group numbers to each ambiguity
-            // by computing the specificity-ambiguity matrix covering this query
-            uint32_t *ambig_groupid = (uint32_t*)alloca(len * sizeof(uint32_t));
-            for (i = 0; i < len; i++)
-                ambig_groupid[i] = i;
-            // as we go, keep a rough count of how many methods are disjoint, which
-            // gives us a lower bound on how many methods we will be returning
-            // and lets us stop early if we reach our limit
-            int ndisjoint = minmax ? 1 : 0;
-            for (i = 0; i < len; i++) {
-                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                if (skip[i]) {
-                    // if there was a minmax method, we can just pretend the rest are all in the same group:
-                    // they're all together but unsorted in the list, since we'll drop them all later anyways
-                    assert(matc->fully_covers != NOT_FULLY_COVERS);
-                    if (ambig_groupid[len - 1] > i)
-                        ambig_groupid[len - 1] = i; // ambiguity covering range [i:len)
-                    break;
-                }
-                jl_method_t *m = matc->method;
-                int subt = matc->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-                int rsubt = jl_egal((jl_value_t*)matc->spec_types, m->sig);
-                int disjoint = 1;
-                for (j = len; j > i; j--) {
-                    if (ambig_groupid[j - 1] < i) {
-                        disjoint = 0;
-                        break;
-                    }
-                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j - 1);
-                    // can't use skip[j - 1] here, since we still need to make sure the minmax dominates
-                    jl_method_t *m2 = matc2->method;
-                    int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
-                    int rsubt2 = jl_egal((jl_value_t*)matc2->spec_types, m2->sig);
-                    jl_value_t *ti;
-                    if (!subt && !subt2 && rsubt && rsubt2 && lim == -1 && ambig == NULL)
-                        // these would only be filtered out of the list as
-                        // ambiguous if they are also type-equal, as we
-                        // aren't skipping matches and the user doesn't
-                        // care if we report any ambiguities
-                        continue;
-                    if (jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig))
-                        continue;
-                    if (subt) {
-                        ti = (jl_value_t*)matc2->spec_types;
-                        isect2 = NULL;
-                    }
-                    else if (subt2) {
-                        ti = (jl_value_t*)matc->spec_types;
-                        isect2 = NULL;
-                    }
-                    else {
-                        jl_type_intersection2((jl_value_t*)matc->spec_types, (jl_value_t*)matc2->spec_types, &env.match.ti, &isect2);
-                        ti = env.match.ti;
-                    }
-                    if (ti != jl_bottom_type) {
-                        disjoint = 0;
-                        // m and m2 are ambiguous, but let's see if we can find another method (m3)
-                        // that dominates their intersection, and means we can ignore this
-                        size_t k;
-                        for (k = i; k > 0; k--) {
-                            jl_method_match_t *matc3 = (jl_method_match_t*)jl_array_ptr_ref(env.t, k - 1);
-                            jl_method_t *m3 = matc3->method;
-                            if ((jl_subtype(ti, m3->sig) || (isect2 && jl_subtype(isect2, m3->sig)))
-                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m->sig)
-                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m2->sig))
-                                break;
-                        }
-                        if (k == 0) {
-                            ambig_groupid[j - 1] = i; // ambiguity covering range [i:j)
-                            isect2 = NULL;
-                            break;
-                        }
-                    }
-                    isect2 = NULL;
-                }
-                if (disjoint && lim >= 0) {
-                    ndisjoint += 1;
-                    if (ndisjoint > lim) {
-                        JL_GC_POP();
-                        return jl_false;
-                    }
-                }
-            }
-            // then we'll merge those numbers to assign each item in the group the same number
-            uint32_t groupid = 0;
-            uint32_t grouphi = 0;
-            for (i = 0; i < len; i++) {
-                j = len - i - 1;
-                uint32_t agid = ambig_groupid[j];
-                if (agid != j) { // thus agid < j
-                    if (grouphi == 0) {
-                        groupid = agid;
-                        grouphi = j;
-                    }
-                    else if (agid < groupid) {
-                        groupid = agid;
-                    }
-                }
-                if (grouphi && j == groupid) {
-                    do {
-                        ambig_groupid[grouphi--] = groupid;
-                    } while (grouphi > j);
-                    ambig_groupid[j] = groupid;
-                    groupid = 0;
-                    grouphi = 0;
-                }
+        // quickly cleanup some of methods
+        int found_minmax = 0;
+        if (minmax != NULL)
+            found_minmax = 2;
+        else if (minmax_ambig && !include_ambiguous)
+            found_minmax = 1;
+        if (ambig == NULL) // if we don't care about the result, set it now so we won't bother attempting to compute it accurately later
+            has_ambiguity = 1;
+        for (i = 0; i < len; i++) {
+            assert(visited.items[i] == (void*)0 || visited.items[i] == (void*)1);
+            jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
+            if (matc->fully_covers != NOT_FULLY_COVERS && found_minmax) {
+                // this was already handled above and below, so we won't learn anything new
+                // by visiting it and it might be a bit costly
+                continue;
             }
-            // always remove matches after the first subtype, now that we've sorted the list for ambiguities
-            for (i = 0; i < len; i++) {
-                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                if (matc->fully_covers == FULLY_COVERS) { // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-                    uint32_t agid = ambig_groupid[i];
-                    while (i < len && agid == ambig_groupid[i])
-                        i++; // keep ambiguous ones
-                    for (; i < len; i++)
-                        skip[i] = 1; // drop the rest
-                }
+            int child_cycle = sort_mlmatches((jl_array_t*)env.t, i, &visited, &stack, &result, &allambig, lim == -1 || minmax == NULL ? lim : lim - 1, include_ambiguous, &has_ambiguity, &found_minmax);
+            if (child_cycle == -1) {
+                arraylist_free(&allambig);
+                arraylist_free(&visited);
+                arraylist_free(&stack);
+                arraylist_free(&result);
+                JL_GC_POP();
+                return jl_nothing;
             }
-            // when limited, skip matches that are covered by earlier ones (and aren't perhaps ambiguous with them)
-            if (lim != -1) {
-                for (i = 0; i < len; i++) {
-                    if (skip[i])
-                        continue;
-                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                    jl_method_t *m = matc->method;
-                    jl_tupletype_t *ti = matc->spec_types;
-                    if (matc->fully_covers == FULLY_COVERS)
-                        break; // remaining matches are ambiguous or already skipped
-                    for (j = 0; j < i; j++) {
-                        jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j);
-                        jl_method_t *m2 = matc2->method;
-                        if (jl_subtype((jl_value_t*)ti, m2->sig)) {
-                            if (ambig_groupid[i] != ambig_groupid[j]) {
-                                skip[i] = 1;
-                                break;
-                            }
-                            else if (!include_ambiguous) {
-                                if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig)) {
-                                    skip[i] = 1;
-                                    break;
-                                }
-                            }
-                        }
-                    }
-                }
+            assert(child_cycle == 0); (void)child_cycle;
+            assert(stack.len == 0);
+            assert(visited.items[i] == (void*)1);
+        }
+        // now compute whether there were ambiguities left in this cycle
+        if (has_ambiguity == 0 && allambig.len > 0) {
+            if (lim == -1) {
+                // lim is over-approximated, so has_ambiguities is too
+                has_ambiguity = 1;
             }
-            // Compute whether anything could be ambiguous by seeing if any two
-            // remaining methods in the result are in the same ambiguity group.
-            assert(len > 0);
-            uint32_t agid = ambig_groupid[0];
-            for (i = 1; i < len; i++) {
-                if (!skip[i]) {
-                    if (agid == ambig_groupid[i]) {
-                        has_ambiguity = 1;
-                        break;
-                    }
-                    agid = ambig_groupid[i];
-                }
-            }
-            // If we're only returning possible matches, now filter out any method
-            // whose intersection is fully ambiguous with the group it is in.
-            if (!include_ambiguous) {
-                for (i = 0; i < len; i++) {
-                    if (skip[i])
-                        continue;
-                    uint32_t agid = ambig_groupid[i];
-                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
+            else {
+                // go back and find the additional ambiguous methods and temporary add them to the stack
+                // (potentially duplicating them from lower on the stack to here)
+                jl_value_t *ti = NULL;
+                jl_value_t *isect2 = NULL;
+                JL_GC_PUSH2(&ti, &isect2);
+                for (size_t i = 0; i < allambig.len; i++) {
+                    size_t idx = (size_t)allambig.items[i];
+                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx);
                     jl_method_t *m = matc->method;
-                    jl_tupletype_t *ti = matc->spec_types;
                     int subt = matc->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-                    char ambig1 = 0;
-                    for (j = agid; j < len && ambig_groupid[j] == agid; j++) {
-                        if (j == i)
+                    for (size_t idx2 = 0; idx2 < jl_array_nrows(env.t); idx2++) {
+                        if (idx2 == idx)
                             continue;
-                        jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j);
+                        // laborious test, checking for existence and coverage of another method (m3)
+                        // outside of the ambiguity group that dominates any ambiguous methods,
+                        // and means we can ignore this for has_ambiguity
+                        // (has_ambiguity is overestimated for lim==-1, since we don't compute skipped matches either)
+                        // n.b. even if we skipped them earlier, they still might
+                        // contribute to the ambiguities (due to lock of transitivity of
+                        // morespecific over subtyping)
+                        // TODO: we could improve this result by checking if the removal of some
+                        // edge earlier means that this subgraph is now well-ordered and then be
+                        // allowed to ignore these vertexes entirely here
+                        jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx2);
                         jl_method_t *m2 = matc2->method;
                         int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
+                        if (subt) {
+                            ti = (jl_value_t*)matc2->spec_types;
+                            isect2 = NULL;
+                        }
+                        else if (subt2) {
+                            ti = (jl_value_t*)matc->spec_types;
+                            isect2 = NULL;
+                        }
+                        else {
+                            jl_type_intersection2((jl_value_t*)matc->spec_types, (jl_value_t*)matc2->spec_types, &ti, &isect2);
+                        }
                         // if their intersection contributes to the ambiguity cycle
-                        if (subt || subt2 || !jl_has_empty_intersection((jl_value_t*)ti, m2->sig)) {
-                            // and the contribution of m is ambiguous with the portion of the cycle from m2
-                            if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
-                                // but they aren't themselves simply ordered (here
-                                // we don't consider that a third method might be
-                                // disrupting that ordering and just consider them
-                                // pairwise to keep this simple).
-                                if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig) &&
-                                    !jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig)) {
-                                    ambig1 = 1;
-                                }
+                        if (ti == jl_bottom_type)
+                            continue;
+                        // and they aren't themselves simply ordered
+                        if (jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig) ||
+                            jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
+                            continue;
+                        // now look for a third method m3 that dominated these and that fully covered this intersection already
+                        size_t k;
+                        for (k = 0; k < result.len; k++) {
+                            size_t idx3 = (size_t)result.items[k];
+                            if (idx3 == idx || idx3 == idx2) {
+                                has_ambiguity = 1;
+                                break;
                             }
-                            else {
-                                // otherwise some aspect of m is not ambiguous
-                                ambig1 = 0;
+                            jl_method_match_t *matc3 = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx3);
+                            jl_method_t *m3 = matc3->method;
+                            if ((jl_subtype(ti, m3->sig) || (isect2 && jl_subtype(isect2, m3->sig)))
+                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m->sig)
+                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m2->sig)) {
+                                //if (jl_subtype(matc->spec_types, ti) || jl_subtype(matc->spec_types, matc3->m3->sig))
+                                //    // check if it covered not only this intersection, but all intersections with matc
+                                //    // if so, we do not need to check all of them separately
+                                //    j = len;
                                 break;
                             }
                         }
+                        if (k == result.len)
+                            has_ambiguity = 1;
+                        isect2 = NULL;
+                        ti = NULL;
+                        if (has_ambiguity)
+                            break;
                     }
-                    if (ambig1)
-                        skip[i] = 1;
+                    if (has_ambiguity)
+                        break;
                 }
+                JL_GC_POP();
             }
         }
-        // cleanup array to remove skipped entries
-        for (i = 0, j = 0; i < len; i++) {
+        arraylist_free(&allambig);
+        arraylist_free(&visited);
+        arraylist_free(&stack);
+        for (j = 0; j < result.len; j++) {
+            i = (size_t)result.items[j];
             jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-            if (!skip[i]) {
-                jl_array_ptr_set(env.t, j++, matc);
-                // remove our sentinel entry markers
-                if (matc->fully_covers == SENTINEL)
-                    matc->fully_covers = NOT_FULLY_COVERS;
-            }
+            // remove our sentinel entry markers
+            if (matc->fully_covers == SENTINEL)
+                matc->fully_covers = NOT_FULLY_COVERS;
+            result.items[j] = (void*)matc;
+        }
+        if (minmax) {
+            arraylist_push(&result, minmax);
+            j++;
         }
+        memcpy(jl_array_data(env.t, jl_method_match_t*), result.items, j * sizeof(jl_method_match_t*));
+        arraylist_free(&result);
         if (j != len)
             jl_array_del_end((jl_array_t*)env.t, len - j);
         len = j;
@@ -3294,7 +3943,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
             jl_method_t *meth = env.matc->method;
             jl_svec_t *tpenv = env.matc->sparams;
             JL_LOCK(&mt->writelock);
-            cache_method(mt, &mt->cache, (jl_value_t*)mt, (jl_tupletype_t*)unw, meth, world, env.min_valid, env.max_valid, tpenv);
+            cache_method(mt, &mt->cache, (jl_value_t*)mt, (jl_tupletype_t*)unw, meth, world, env.match.min_valid, env.match.max_valid, tpenv);
             JL_UNLOCK(&mt->writelock);
         }
     }
@@ -3302,7 +3951,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
         *ambig = has_ambiguity;
     JL_GC_POP();
     if (lim >= 0 && len > lim)
-        return jl_false;
+        return jl_nothing;
     return env.t;
 }
 
@@ -3321,32 +3970,35 @@ int jl_has_concrete_subtype(jl_value_t *typ)
     return ((jl_datatype_t*)typ)->has_concrete_subtype;
 }
 
-// TODO: separate the codegen and typeinf locks
-//   currently using a coarser lock seems like
-//   the best way to avoid acquisition priority
-//   ordering violations
-//static jl_mutex_t typeinf_lock;
-#define typeinf_lock jl_codegen_lock
-
-static uint64_t inference_start_time = 0;
-static uint8_t inference_is_measuring_compile_time = 0;
+JL_DLLEXPORT uint64_t jl_typeinf_timing_begin(void)
+{
+    jl_task_t *ct = jl_current_task;
+    if (ct->reentrant_timing & 1)
+        return 0;
+    ct->reentrant_timing |= 1;
+    return jl_hrtime();
+}
 
-JL_DLLEXPORT void jl_typeinf_begin(void)
+JL_DLLEXPORT void jl_typeinf_timing_end(uint64_t start)
 {
-    JL_LOCK(&typeinf_lock);
+    if (!start)
+        return;
+    jl_task_t *ct = jl_current_task;
+    ct->reentrant_timing &= ~1u;
     if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
-        inference_start_time = jl_hrtime();
-        inference_is_measuring_compile_time = 1;
+        uint64_t inftime = jl_hrtime() - start;
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, inftime);
     }
 }
 
-JL_DLLEXPORT void jl_typeinf_end(void)
+JL_DLLEXPORT void jl_typeinf_lock_begin(void)
 {
-    if (typeinf_lock.count == 1 && inference_is_measuring_compile_time) {
-        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - inference_start_time));
-        inference_is_measuring_compile_time = 0;
-    }
-    JL_UNLOCK(&typeinf_lock);
+    JL_LOCK(&jl_codegen_lock);
+}
+
+JL_DLLEXPORT void jl_typeinf_lock_end(void)
+{
+    JL_UNLOCK(&jl_codegen_lock);
 }
 
 #ifdef __cplusplus
diff --git a/src/iddict.c b/src/iddict.c
index da2c36d97d2e4..0a0895d048c32 100644
--- a/src/iddict.c
+++ b/src/iddict.c
@@ -1,49 +1,48 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#define hash_size(h) (jl_array_len(h) / 2)
+#define hash_size(h) (h->length / 2)
 
 // compute empirical max-probe for a given size
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
 
-#define keyhash(k) jl_object_id_(jl_typeof(k), k)
+#define keyhash(k) jl_object_id_(jl_typetagof(k), k)
 #define h2index(hv, sz) (size_t)(((hv) & ((sz)-1)) * 2)
 
-static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val);
+static inline int jl_table_assign_bp(jl_genericmemory_t **pa, jl_value_t *key, jl_value_t *val);
 
-JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
+JL_DLLEXPORT jl_genericmemory_t *jl_idtable_rehash(jl_genericmemory_t *a, size_t newsz)
 {
-    size_t sz = jl_array_len(a);
+    size_t sz = a->length;
     size_t i;
-    jl_value_t **ol = (jl_value_t **)a->data;
-    jl_array_t *newa = jl_alloc_vec_any(newsz);
-    // keep the original array in the original slot since we need `ol`
+    jl_value_t **ol = (jl_value_t **) a->ptr;
+    jl_genericmemory_t *newa = NULL;
+    // keep the original memory in the original slot since we need `ol`
     // to be valid in the loop below.
     JL_GC_PUSH2(&newa, &a);
+    newa = jl_alloc_memory_any(newsz);
     for (i = 0; i < sz; i += 2) {
         if (ol[i + 1] != NULL) {
             jl_table_assign_bp(&newa, ol[i], ol[i + 1]);
-            // it is however necessary here because allocation
-            // can (and will) occur in a recursive call inside table_lookup_bp
         }
     }
     JL_GC_POP();
     return newa;
 }
 
-static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val)
+static inline int jl_table_assign_bp(jl_genericmemory_t **pa, jl_value_t *key, jl_value_t *val)
 {
     // pa points to a **un**rooted address
     uint_t hv;
-    jl_array_t *a = *pa;
+    jl_genericmemory_t *a = *pa;
     size_t orig, index, iter, empty_slot;
     size_t newsz, sz = hash_size(a);
     if (sz == 0) {
-        a = jl_alloc_vec_any(HT_N_INLINE);
+        a = jl_alloc_memory_any(HT_N_INLINE);
         sz = hash_size(a);
         *pa = a;
     }
     size_t maxprobe = max_probe(sz);
-    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*)a->data;
+    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*) a->ptr;
 
     hv = keyhash(key);
     while (1) {
@@ -92,7 +91,7 @@ static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_
         /* quadruple size, rehash, retry the insert */
         /* it's important to grow the table really fast; otherwise we waste */
         /* lots of time rehashing all the keys over and over. */
-        sz = jl_array_len(a);
+        sz = a -> length;
         if (sz < HT_N_INLINE)
             newsz = HT_N_INLINE;
         else if (sz >= (1 << 19) || (sz <= (1 << 8)))
@@ -102,20 +101,20 @@ static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_
         *pa = jl_idtable_rehash(*pa, newsz);
 
         a = *pa;
-        tab = (_Atomic(jl_value_t*)*)a->data;
+        tab =  (_Atomic(jl_value_t*)*) a->ptr;
         sz = hash_size(a);
         maxprobe = max_probe(sz);
     }
 }
 
 /* returns bp if key is in hash, otherwise NULL */
-inline _Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT
+inline _Atomic(jl_value_t*) *jl_table_peek_bp(jl_genericmemory_t *a, jl_value_t *key) JL_NOTSAFEPOINT
 {
     size_t sz = hash_size(a);
     if (sz == 0)
         return NULL;
     size_t maxprobe = max_probe(sz);
-    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*)a->data;
+    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*) a->ptr;
     uint_t hv = keyhash(key);
     size_t index = h2index(hv, sz);
     sz *= 2;
@@ -142,7 +141,7 @@ inline _Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL
 }
 
 JL_DLLEXPORT
-jl_array_t *jl_eqtable_put(jl_array_t *h, jl_value_t *key, jl_value_t *val, int *p_inserted)
+jl_genericmemory_t *jl_eqtable_put(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *val, int *p_inserted)
 {
     int inserted = jl_table_assign_bp(&h, key, val);
     if (p_inserted)
@@ -153,14 +152,20 @@ jl_array_t *jl_eqtable_put(jl_array_t *h, jl_value_t *key, jl_value_t *val, int
 // Note: lookup in the IdDict is permitted concurrently, if you avoid deletions,
 // and assuming you do use an external lock around all insertions
 JL_DLLEXPORT
-jl_value_t *jl_eqtable_get(jl_array_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
+jl_value_t *jl_eqtable_get(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
 {
     _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
     return (bp == NULL) ? deflt : jl_atomic_load_relaxed(bp);
 }
 
+jl_value_t *jl_eqtable_getkey(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
+{
+    _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
+    return (bp == NULL) ? deflt : jl_atomic_load_relaxed(bp - 1);
+}
+
 JL_DLLEXPORT
-jl_value_t *jl_eqtable_pop(jl_array_t *h, jl_value_t *key, jl_value_t *deflt, int *found)
+jl_value_t *jl_eqtable_pop(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *deflt, int *found)
 {
     _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
     if (found)
@@ -174,12 +179,12 @@ jl_value_t *jl_eqtable_pop(jl_array_t *h, jl_value_t *key, jl_value_t *deflt, in
 }
 
 JL_DLLEXPORT
-size_t jl_eqtable_nextind(jl_array_t *t, size_t i)
+size_t jl_eqtable_nextind(jl_genericmemory_t *t, size_t i)
 {
     if (i & 1)
         i++;
-    size_t alen = jl_array_dim0(t);
-    while (i < alen && ((void **)t->data)[i + 1] == NULL)
+    size_t alen = t->length;
+    while (i < alen && ((void**) t->ptr)[i + 1] == NULL)
         i += 2;
     if (i >= alen)
         return (size_t)-1;
@@ -188,3 +193,4 @@ size_t jl_eqtable_nextind(jl_array_t *t, size_t i)
 
 #undef hash_size
 #undef max_probe
+#undef h2index
diff --git a/src/idset.c b/src/idset.c
new file mode 100644
index 0000000000000..b9711ee17f021
--- /dev/null
+++ b/src/idset.c
@@ -0,0 +1,118 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+
+static uint_t idset_hash(size_t idx, jl_value_t *data)
+{
+    jl_value_t *x = jl_genericmemory_ptr_ref(data, idx);
+    // x should not be NULL, unless there was concurrent corruption
+    return x == NULL ? 0 : jl_object_id(x);
+}
+
+static int idset_eq(size_t idx, const void *y, jl_value_t *data, uint_t hv)
+{
+    jl_value_t *x = jl_genericmemory_ptr_ref(data, idx);
+    // x should not be NULL, unless there was concurrent corruption
+    return x == NULL ? 0 : jl_egal(x, (jl_value_t*)y);
+}
+
+jl_genericmemory_t *jl_idset_rehash(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, size_t newsz)
+{
+    if (newsz == 0)
+        return idxs;
+    newsz = next_power_of_two(newsz);
+    //if (idxs->length == newsz)
+    //    jl_idset_put_idx(keys, idxs, -newsz+1);
+    //else
+    return smallintset_rehash(idxs, idset_hash, (jl_value_t*)keys, newsz, 0);
+}
+
+// Return idx if key is in hash, otherwise -1
+// Note: lookup in the IdSet is permitted concurrently, if you avoid deletions,
+// and assuming you do use an external lock around all insertions
+ssize_t jl_idset_peek_bp(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT
+{
+    uintptr_t hv = jl_object_id(key);
+    return jl_smallintset_lookup(idxs, idset_eq, key, (jl_value_t*)keys, hv, 0);
+}
+
+jl_value_t *jl_idset_get(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT
+{
+    ssize_t idx = jl_idset_peek_bp(keys, idxs, key);
+    if (idx == -1)
+        return NULL;
+    return jl_genericmemory_ptr_ref(keys, idx);
+}
+
+
+static ssize_t idset_compact(jl_genericmemory_t *keys)
+{
+    // compact keys before rehashing idxs
+    ssize_t i, j;
+    ssize_t rehash = 0;
+    for (i = j = 0; i < keys->length; i++) {
+        jl_value_t *k = jl_genericmemory_ptr_ref(keys, i);
+        if (k != NULL) {
+            if (i != j) {
+                rehash = 1;
+                jl_genericmemory_ptr_set(keys, j, k);
+                jl_genericmemory_ptr_set(keys, i, NULL);
+            }
+            j++;
+        }
+    }
+    return rehash ? -j : j;
+}
+
+jl_genericmemory_t *jl_idset_put_key(jl_genericmemory_t *keys, jl_value_t *key, ssize_t *newidx)
+{
+    ssize_t l = keys->length;
+    ssize_t i = l;
+    while (i > 0 && jl_genericmemory_ptr_ref(keys, i - 1) == NULL)
+        i--;
+    // i points to the place to insert
+    *newidx = i;
+    if (i == l) {
+        i = idset_compact(keys);
+        if (i < 0) {
+            *newidx = i - 1;
+            i = -i;
+        }
+        if (i >= l / 3 * 2) {
+            size_t nl = l < 4 ? 4 : (l * 3) >> 1; // grow space by 50% if less than 33% free after compacting
+            jl_genericmemory_t *nk = jl_alloc_genericmemory(jl_memory_any_type, nl);
+            if (i > 0)
+                memcpy(nk->ptr, keys->ptr, sizeof(void*) * i);
+            keys = nk;
+        }
+    }
+    assert(jl_genericmemory_ptr_ref(keys, i) == NULL);
+    jl_genericmemory_ptr_set(keys, i, key);
+    return keys;
+}
+
+jl_genericmemory_t *jl_idset_put_idx(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, ssize_t idx)
+{
+    _Atomic(jl_genericmemory_t*) newidxs = idxs;
+    JL_GC_PUSH1(&newidxs);
+    if (idx < 0) { // full rehash
+        smallintset_empty(idxs);
+        for (ssize_t i = 0; i < -idx; i++)
+            if (jl_genericmemory_ptr_ref(keys, i) != NULL)
+                jl_smallintset_insert(&newidxs, NULL, idset_hash, i, (jl_value_t*)keys);
+    }
+    else {
+        jl_smallintset_insert(&newidxs, NULL, idset_hash, idx, (jl_value_t*)keys);
+    }
+    JL_GC_POP();
+    return jl_atomic_load_relaxed(&newidxs);
+}
+
+/* returns idx if key is in hash, otherwise -1 */
+ssize_t jl_idset_pop(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT
+{
+    uintptr_t hv = jl_object_id(key);
+    ssize_t idx = jl_smallintset_lookup(idxs, idset_eq, key, (jl_value_t*)keys, hv, 1);
+    if (idx != -1)
+        jl_genericmemory_ptr_set(keys, idx, NULL);
+    return idx;
+}
diff --git a/src/init.c b/src/init.c
index a5632fc66a45a..d4128c8ae9e40 100644
--- a/src/init.c
+++ b/src/init.c
@@ -76,7 +76,7 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
 #if defined(_COMPILER_GCC_) && __GNUC__ >= 12
 #pragma GCC diagnostic ignored "-Wdangling-pointer"
 #endif
-        *stack_hi = (void*)&stacksize;
+        *stack_hi = (void*)__builtin_frame_address(0);
 #pragma GCC diagnostic pop
         return;
 #  elif defined(_OS_DARWIN_)
@@ -84,9 +84,8 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
         extern size_t pthread_get_stacksize_np(pthread_t thread);
         pthread_t thread = pthread_self();
         void *stackaddr = pthread_get_stackaddr_np(thread);
-        size_t stacksize = pthread_get_stacksize_np(thread);
         *stack_lo = (void*)stackaddr;
-        *stack_hi = (void*)&stacksize;
+        *stack_hi = (void*)__builtin_frame_address(0);
         return;
 #  elif defined(_OS_FREEBSD_)
         pthread_attr_t attr;
@@ -97,7 +96,7 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
         pthread_attr_getstack(&attr, &stackaddr, &stacksize);
         pthread_attr_destroy(&attr);
         *stack_lo = (void*)stackaddr;
-        *stack_hi = (void*)&stacksize;
+        *stack_hi = (void*)__builtin_frame_address(0);
         return;
 #  else
 #      warning "Getting precise stack size for thread is not supported."
@@ -106,19 +105,8 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
     struct rlimit rl;
     getrlimit(RLIMIT_STACK, &rl);
     size_t stacksize = rl.rlim_cur;
-// We intentionally leak a stack address here core.StackAddressEscape
-#  ifndef __clang_analyzer__
-    *stack_hi = (void*)&stacksize;
-#pragma GCC diagnostic push
-#if defined(_COMPILER_GCC_) && __GNUC__ >= 12
-#pragma GCC diagnostic ignored "-Wdangling-pointer"
-#endif
+    *stack_hi = __builtin_frame_address(0);
     *stack_lo = (void*)((char*)*stack_hi - stacksize);
-#pragma GCC diagnostic pop
-#  else
-    *stack_hi = 0;
-    *stack_lo = 0;
-#  endif
 #endif
 }
 
@@ -209,27 +197,78 @@ static void jl_close_item_atexit(uv_handle_t *handle)
     }
 }
 
-JL_DLLEXPORT void jl_atexit_hook(int exitcode)
+// This prevents `ct` from returning via error handlers or other unintentional
+// means by destroying some old state before we start destroying that state in atexit hooks.
+void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT;
+
+// cause this process to exit with WEXITSTATUS(signo), after waiting to finish all julia, C, and C++ cleanup
+JL_DLLEXPORT void jl_exit(int exitcode)
+{
+    jl_atexit_hook(exitcode);
+    exit(exitcode);
+}
+
+// cause this process to exit with WTERMSIG(signo),
+// fairly aggressively (flushing stderr a bit, and doing a little bit of other
+// external cleanup, but no internal cleanup)
+JL_DLLEXPORT void jl_raise(int signo)
+{
+    uv_tty_reset_mode();
+    fflush(NULL);
+#ifdef _OS_WINDOWS_
+    if (signo == SIGABRT) {
+        signal(signo, SIG_DFL);
+        abort();
+    }
+    // the exit status could also potentially be set to an NTSTATUS value
+    // corresponding to a signal number, but this seems somewhat is uncommon on Windows
+    TerminateProcess(GetCurrentProcess(), 3); // aka _exit
+    abort(); // prior call does not return, because we passed GetCurrentProcess()
+#else
+    signal(signo, SIG_DFL);
+    sigset_t sset;
+    sigemptyset(&sset);
+    sigaddset(&sset, signo);
+    pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
+    raise(signo); // aka pthread_kill(pthread_self(), signo);
+    if (signo == SIGABRT)
+        abort();
+    _exit(128 + signo);
+#endif
+}
+
+JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
 {
-    if (jl_all_tls_states == NULL)
+    uv_tty_reset_mode();
+
+    if (jl_atomic_load_relaxed(&jl_all_tls_states) == NULL)
         return;
 
-    jl_task_t *ct = jl_current_task;
+    jl_task_t *ct = jl_get_current_task();
+
+    if (ct == NULL && jl_base_module) {
+        ct = container_of(jl_adopt_thread(), jl_task_t, gcstack);
+    }
+    else if (ct != NULL) {
+        // we are about to start tearing everything down, so lets try not to get
+        // upset by the local mess of things when we run the user's _atexit hooks
+        // this also forces us into a GC-unsafe region without a safepoint
+        jl_task_frame_noreturn(ct);
+        jl_gc_safepoint_(ct->ptls);
+    }
 
-    if (exitcode == 0)
-        jl_write_compiler_output();
-    jl_print_gc_stats(JL_STDERR);
-    if (jl_options.code_coverage)
-        jl_write_coverage_data(jl_options.output_code_coverage);
-    if (jl_options.malloc_log)
-        jl_write_malloc_log();
     if (jl_base_module) {
         jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("_atexit"));
         if (f != NULL) {
+            jl_value_t **fargs;
+            JL_GC_PUSHARGS(fargs, 2);
+            fargs[0] = f;
+            fargs[1] = jl_box_int32(exitcode);
             JL_TRY {
+                assert(ct);
                 size_t last_age = ct->world_age;
                 ct->world_age = jl_get_world_counter();
-                jl_apply(&f, 1);
+                jl_apply(fargs, 2);
                 ct->world_age = last_age;
             }
             JL_CATCH {
@@ -238,15 +277,26 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
                 jlbacktrace(); // written to STDERR_FILENO
             }
+            JL_GC_POP();
         }
     }
 
+    if (ct && exitcode == 0)
+        jl_write_compiler_output();
+
+    jl_print_gc_stats(JL_STDERR);
+    if (jl_options.code_coverage)
+        jl_write_coverage_data(jl_options.output_code_coverage);
+    if (jl_options.malloc_log)
+        jl_write_malloc_log();
+
     // replace standard output streams with something that we can still print to
     // after the finalizers from base/stream.jl close the TTY
     JL_STDOUT = (uv_stream_t*) STDOUT_FILENO;
     JL_STDERR = (uv_stream_t*) STDERR_FILENO;
 
-    jl_gc_run_all_finalizers(ct);
+    if (ct)
+        jl_gc_run_all_finalizers(ct);
 
     uv_loop_t *loop = jl_global_event_loop();
     if (loop != NULL) {
@@ -254,7 +304,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
         JL_UV_LOCK();
         uv_walk(loop, jl_uv_exitcleanup_walk, &queue);
         struct uv_shutdown_queue_item *item = queue.first;
-        if (ct != NULL) {
+        if (ct) {
             while (item) {
                 JL_TRY {
                     while (item) {
@@ -284,22 +334,30 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
         // force libuv to spin until everything has finished closing
         loop->stop_flag = 0;
         while (uv_run(loop, UV_RUN_DEFAULT)) { }
-        JL_UV_UNLOCK();
+        jl_wake_libuv(); // set the async pending flag, so that future calls are immediate no-ops on other threads
+                         // we would like to guarantee this, but cannot currently, so there is still a small race window
+                         // that needs to be fixed in libuv
+    }
+    if (ct)
+        (void)jl_gc_safe_enter(ct->ptls); // park in gc-safe
+    if (loop != NULL) {
+        // TODO: consider uv_loop_close(loop) here, before shutdown?
+        uv_library_shutdown();
+        // no JL_UV_UNLOCK(), since it is now torn down
     }
 
-    // TODO: Destroy threads
+    // TODO: Destroy threads?
 
-    jl_destroy_timing();
-#ifdef ENABLE_TIMINGS
+    jl_destroy_timing(); // cleans up the current timing_stack for noreturn
+#ifdef USE_TIMING_COUNTS
     jl_print_timings();
 #endif
-
-    jl_teardown_codegen();
+    jl_teardown_codegen(); // prints stats
 }
 
 JL_DLLEXPORT void jl_postoutput_hook(void)
 {
-    if (jl_all_tls_states == NULL)
+    if (jl_atomic_load_relaxed(&jl_all_tls_states) == NULL)
         return;
 
     if (jl_base_module) {
@@ -323,7 +381,8 @@ JL_DLLEXPORT void jl_postoutput_hook(void)
     return;
 }
 
-static void post_boot_hooks(void);
+void post_boot_hooks(void);
+void post_image_load_hooks(void);
 
 JL_DLLEXPORT void *jl_libjulia_internal_handle;
 JL_DLLEXPORT void *jl_libjulia_handle;
@@ -516,6 +575,14 @@ static char *abspath(const char *in, int nprefix)
         }
     }
 #else
+    // GetFullPathName intentionally errors if given an empty string so manually insert `.` to invoke cwd
+    char *in2 = (char*)malloc_s(JL_PATH_MAX);
+    if (strlen(in) - nprefix == 0) {
+        memcpy(in2, in, nprefix);
+        in2[nprefix] = '.';
+        in2[nprefix+1] = '\0';
+        in = in2;
+    }
     DWORD n = GetFullPathName(in + nprefix, 0, NULL, NULL);
     if (n <= 0) {
         jl_error("fatal error: jl_options.image_file path too long or GetFullPathName failed");
@@ -526,6 +593,7 @@ static char *abspath(const char *in, int nprefix)
         jl_error("fatal error: jl_options.image_file path too long or GetFullPathName failed");
     }
     memcpy(out, in, nprefix);
+    free(in2);
 #endif
     return out;
 }
@@ -561,7 +629,8 @@ static const char *absformat(const char *in)
 }
 
 static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
-{   // this function resolves the paths in jl_options to absolute file locations as needed
+{
+    // this function resolves the paths in jl_options to absolute file locations as needed
     // and it replaces the pointers to `julia_bindir`, `julia_bin`, `image_file`, and output file paths
     // it may fail, print an error, and exit(1) if any of these paths are longer than JL_PATH_MAX
     //
@@ -620,7 +689,7 @@ static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
     if (jl_options.output_code_coverage)
         jl_options.output_code_coverage = absformat(jl_options.output_code_coverage);
     if (jl_options.tracked_path)
-        jl_options.tracked_path = absformat(jl_options.tracked_path);
+        jl_options.tracked_path = abspath(jl_options.tracked_path, 0);
 
     const char **cmdp = jl_options.cmds;
     if (cmdp) {
@@ -647,6 +716,10 @@ static void jl_set_io_wait(int v)
 }
 
 extern jl_mutex_t jl_modules_mutex;
+extern jl_mutex_t precomp_statement_out_lock;
+extern jl_mutex_t newly_inferred_mutex;
+extern jl_mutex_t global_roots_lock;
+extern jl_mutex_t profile_show_peek_cond_lock;
 
 static void restore_fp_env(void)
 {
@@ -659,17 +732,46 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
 
 JL_DLLEXPORT int jl_default_debug_info_kind;
 
+static void init_global_mutexes(void) {
+    JL_MUTEX_INIT(&jl_modules_mutex, "jl_modules_mutex");
+    JL_MUTEX_INIT(&precomp_statement_out_lock, "precomp_statement_out_lock");
+    JL_MUTEX_INIT(&newly_inferred_mutex, "newly_inferred_mutex");
+    JL_MUTEX_INIT(&global_roots_lock, "global_roots_lock");
+    JL_MUTEX_INIT(&jl_codegen_lock, "jl_codegen_lock");
+    JL_MUTEX_INIT(&typecache_lock, "typecache_lock");
+    JL_MUTEX_INIT(&profile_show_peek_cond_lock, "profile_show_peek_cond_lock");
+}
+
 JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 {
-    jl_default_debug_info_kind = 0;
-
+    // initialize many things, in no particular order
+    // but generally running from simple platform things to optional
+    // configuration features
     jl_init_timing();
     // Make sure we finalize the tls callback before starting any threads.
     (void)jl_get_pgcstack();
-    jl_safepoint_init();
+
+    // initialize backtraces
+    jl_init_profile_lock();
+#ifdef _OS_WINDOWS_
+    uv_mutex_init(&jl_in_stackwalk);
+    SymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES | SYMOPT_IGNORE_CVREC);
+    if (!SymInitialize(GetCurrentProcess(), "", 1)) {
+        jl_safe_printf("WARNING: failed to initialize stack walk info\n");
+    }
+    needsSymRefreshModuleList = 0;
+#else
+    // nongnu libunwind initialization is only threadsafe on architecture where the
+    // author could access TSAN, per https://github.com/libunwind/libunwind/pull/109
+    // so we need to do this once early (before threads)
+    rec_backtrace(NULL, 0, 0);
+#endif
+
     libsupport_init();
+    jl_safepoint_init();
+    jl_page_size = jl_getpagesize();
     htable_new(&jl_current_modules, 0);
-    JL_MUTEX_INIT(&jl_modules_mutex);
+    init_global_mutexes();
     jl_precompile_toplevel_module = NULL;
     ios_set_io_wait_func = jl_set_io_wait;
     jl_io_loop = uv_default_loop(); // this loop will internal events (spawning process etc.),
@@ -677,34 +779,30 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
     jl_init_uv();
     init_stdio();
     restore_fp_env();
-    restore_signals();
+    if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON)
+        restore_signals();
+
     jl_init_intrinsic_properties();
 
-    jl_page_size = jl_getpagesize();
+    // Important offset for external codegen.
+    jl_task_gcstack_offset = offsetof(jl_task_t, gcstack);
+    jl_task_ptls_offset = offsetof(jl_task_t, ptls);
+
     jl_prep_sanitizers();
     void *stack_lo, *stack_hi;
     jl_init_stack_limits(1, &stack_lo, &stack_hi);
 
-    jl_libjulia_internal_handle = jl_load_dynamic_library(NULL, JL_RTLD_DEFAULT, 1);
+    jl_libjulia_internal_handle = jl_find_dynamic_library_by_addr(&jl_load_dynamic_library);
+    jl_libjulia_handle = jl_find_dynamic_library_by_addr(&jl_any_type);
 #ifdef _OS_WINDOWS_
     jl_exe_handle = GetModuleHandleA(NULL);
     jl_RTLD_DEFAULT_handle = jl_libjulia_internal_handle;
-    if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
-                            (LPCWSTR)&jl_any_type,
-                            (HMODULE*)&jl_libjulia_handle)) {
-        jl_error("could not load base module");
-    }
-    jl_ntdll_handle = jl_dlopen("ntdll.dll", 0); // bypass julia's pathchecking for system dlls
-    jl_kernel32_handle = jl_dlopen("kernel32.dll", 0);
-    jl_crtdll_handle = jl_dlopen(jl_crtdll_name, 0);
-    jl_winsock_handle = jl_dlopen("ws2_32.dll", 0);
-    uv_mutex_init(&jl_in_stackwalk);
-    SymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES | SYMOPT_IGNORE_CVREC);
-    if (!SymInitialize(GetCurrentProcess(), "", 1)) {
-        jl_printf(JL_STDERR, "WARNING: failed to initialize stack walk info\n");
-    }
+    jl_ntdll_handle = jl_dlopen("ntdll.dll", JL_RTLD_NOLOAD); // bypass julia's pathchecking for system dlls
+    jl_kernel32_handle = jl_dlopen("kernel32.dll", JL_RTLD_NOLOAD);
+    jl_crtdll_handle = jl_dlopen(jl_crtdll_name, JL_RTLD_NOLOAD);
+    jl_winsock_handle = jl_dlopen("ws2_32.dll", JL_RTLD_NOLOAD);
+    HMODULE jl_dbghelp = (HMODULE) jl_dlopen("dbghelp.dll", JL_RTLD_NOLOAD);
     needsSymRefreshModuleList = 0;
-    HMODULE jl_dbghelp = (HMODULE) jl_dlopen("dbghelp.dll", 0);
     if (jl_dbghelp)
         jl_dlsym(jl_dbghelp, "SymRefreshModuleList", (void **)&hSymRefreshModuleList, 1);
 #else
@@ -716,13 +814,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 #endif
 #endif
 
-    if ((jl_options.outputo || jl_options.outputbc || jl_options.outputasm) &&
-        (jl_options.code_coverage || jl_options.malloc_log)) {
-        jl_error("cannot generate code-coverage or track allocation information while generating a .o, .bc, or .s output file");
-    }
-
     jl_init_rand();
-    jl_init_profile_lock();
     jl_init_runtime_ccall();
     jl_init_tasks();
     jl_init_threading();
@@ -731,6 +823,14 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
         jl_install_default_signal_handlers();
 
     jl_gc_init();
+
+    arraylist_new(&jl_linkage_blobs, 0);
+    arraylist_new(&jl_image_relocs, 0);
+    arraylist_new(&eytzinger_image_tree, 0);
+    arraylist_new(&eytzinger_idxs, 0);
+    arraylist_push(&eytzinger_idxs, (void*)0);
+    arraylist_push(&eytzinger_image_tree, (void*)1); // outside image
+
     jl_ptls_t ptls = jl_init_threadtls(0);
 #pragma GCC diagnostic push
 #if defined(_COMPILER_GCC_) && __GNUC__ >= 12
@@ -745,28 +845,30 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 
 static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct)
 {
+    JL_TIMING(JULIA_INIT, JULIA_INIT);
     jl_resolve_sysimg_location(rel);
     // loads sysimg if available, and conditionally sets jl_options.cpu_target
-    if (jl_options.image_file)
+    if (rel == JL_IMAGE_IN_MEMORY)
+        jl_set_sysimg_so(jl_exe_handle);
+    else if (jl_options.image_file)
         jl_preload_sysimg_so(jl_options.image_file);
     if (jl_options.cpu_target == NULL)
         jl_options.cpu_target = "native";
+    jl_init_codegen();
 
+    jl_init_common_symbols();
     if (jl_options.image_file) {
         jl_restore_system_image(jl_options.image_file);
     } else {
         jl_init_types();
-        jl_init_codegen();
+        jl_global_roots_list = (jl_genericmemory_t*)jl_an_empty_memory_any;
+        jl_global_roots_keyset = (jl_genericmemory_t*)jl_an_empty_memory_any;
     }
 
-    jl_init_common_symbols();
     jl_init_flisp();
     jl_init_serializer();
 
     if (!jl_options.image_file) {
-        jl_core_module = jl_new_module(jl_symbol("Core"));
-        jl_core_module->parent = jl_core_module;
-        jl_type_typename->mt->module = jl_core_module;
         jl_top_module = jl_core_module;
         jl_init_intrinsic_functions();
         jl_init_primitives();
@@ -777,7 +879,14 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
 
     if (jl_base_module == NULL) {
         // nthreads > 1 requires code in Base
-        jl_n_threads = 1;
+        jl_atomic_store_relaxed(&jl_n_threads, 1);
+        jl_n_markthreads = 0;
+        jl_n_sweepthreads = 0;
+        jl_n_gcthreads = 0;
+        jl_n_threads_per_pool[0] = 1;
+        jl_n_threads_per_pool[1] = 0;
+    } else {
+        post_image_load_hooks();
     }
     jl_start_threads();
 
@@ -787,7 +896,7 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
         jl_array_t *init_order = jl_module_init_order;
         JL_GC_PUSH1(&init_order);
         jl_module_init_order = NULL;
-        int i, l = jl_array_len(init_order);
+        int i, l = jl_array_nrows(init_order);
         for (i = 0; i < l; i++) {
             jl_value_t *mod = jl_array_ptr_ref(init_order, i);
             jl_module_run_initializer((jl_module_t*)mod);
@@ -799,80 +908,6 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
         jl_install_sigint_handler();
 }
 
-static jl_value_t *core(const char *name)
-{
-    return jl_get_global(jl_core_module, jl_symbol(name));
-}
-
-// fetch references to things defined in boot.jl
-static void post_boot_hooks(void)
-{
-    jl_char_type    = (jl_datatype_t*)core("Char");
-    jl_int8_type    = (jl_datatype_t*)core("Int8");
-    jl_int16_type   = (jl_datatype_t*)core("Int16");
-    jl_float16_type = (jl_datatype_t*)core("Float16");
-    jl_float32_type = (jl_datatype_t*)core("Float32");
-    jl_float64_type = (jl_datatype_t*)core("Float64");
-    jl_floatingpoint_type = (jl_datatype_t*)core("AbstractFloat");
-    jl_number_type  = (jl_datatype_t*)core("Number");
-    jl_signed_type  = (jl_datatype_t*)core("Signed");
-    jl_datatype_t *jl_unsigned_type = (jl_datatype_t*)core("Unsigned");
-    jl_datatype_t *jl_integer_type = (jl_datatype_t*)core("Integer");
-
-    jl_bool_type->super = jl_integer_type;
-    jl_uint8_type->super = jl_unsigned_type;
-    jl_uint16_type->super = jl_unsigned_type;
-    jl_uint32_type->super = jl_unsigned_type;
-    jl_uint64_type->super = jl_unsigned_type;
-    jl_int32_type->super = jl_signed_type;
-    jl_int64_type->super = jl_signed_type;
-
-    jl_errorexception_type = (jl_datatype_t*)core("ErrorException");
-    jl_stackovf_exception  = jl_new_struct_uninit((jl_datatype_t*)core("StackOverflowError"));
-    jl_diverror_exception  = jl_new_struct_uninit((jl_datatype_t*)core("DivideError"));
-    jl_undefref_exception  = jl_new_struct_uninit((jl_datatype_t*)core("UndefRefError"));
-    jl_undefvarerror_type  = (jl_datatype_t*)core("UndefVarError");
-    jl_atomicerror_type    = (jl_datatype_t*)core("ConcurrencyViolationError");
-    jl_interrupt_exception = jl_new_struct_uninit((jl_datatype_t*)core("InterruptException"));
-    jl_boundserror_type    = (jl_datatype_t*)core("BoundsError");
-    jl_memory_exception    = jl_new_struct_uninit((jl_datatype_t*)core("OutOfMemoryError"));
-    jl_readonlymemory_exception = jl_new_struct_uninit((jl_datatype_t*)core("ReadOnlyMemoryError"));
-    jl_typeerror_type      = (jl_datatype_t*)core("TypeError");
-#ifdef SEGV_EXCEPTION
-    jl_segv_exception      = jl_new_struct_uninit((jl_datatype_t*)core("SegmentationFault"));
-#endif
-    jl_argumenterror_type  = (jl_datatype_t*)core("ArgumentError");
-    jl_methoderror_type    = (jl_datatype_t*)core("MethodError");
-    jl_loaderror_type      = (jl_datatype_t*)core("LoadError");
-    jl_initerror_type      = (jl_datatype_t*)core("InitError");
-    jl_pair_type           = core("Pair");
-
-    jl_weakref_type = (jl_datatype_t*)core("WeakRef");
-    jl_vecelement_typename = ((jl_datatype_t*)jl_unwrap_unionall(core("VecElement")))->name;
-
-    jl_init_box_caches();
-
-    // set module field of primitive types
-    int i;
-    void **table = jl_core_module->bindings.table;
-    for (i = 1; i < jl_core_module->bindings.size; i += 2) {
-        if (table[i] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
-            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
-            if (v) {
-                if (jl_is_unionall(v))
-                    v = jl_unwrap_unionall(v);
-                if (jl_is_datatype(v)) {
-                    jl_datatype_t *tt = (jl_datatype_t*)v;
-                    tt->name->module = jl_core_module;
-                    if (tt->name->mt)
-                        tt->name->mt->module = jl_core_module;
-                }
-            }
-        }
-    }
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/interpreter.c b/src/interpreter.c
index 60bd4a6e1ce7e..5102d1417c939 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -65,7 +65,8 @@ extern void JL_GC_ENABLEFRAME(interpreter_state*) JL_NOTSAFEPOINT;
 // we define this separately so that we can populate the frame before we add it to the backtrace
 // it's recommended to mark the containing function with NOINLINE, though not essential
 #define JL_GC_ENABLEFRAME(frame) \
-  ((void**)&frame[1])[0] = __builtin_frame_address(0);
+    jl_signal_fence(); \
+    ((void**)&frame[1])[0] = __builtin_frame_address(0);
 
 #endif
 
@@ -91,10 +92,9 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
         if (!jl_is_symbol(fname)) {
             jl_error("method: invalid declaration");
         }
-        jl_value_t *bp_owner = (jl_value_t*)modu;
         jl_binding_t *b = jl_get_binding_for_method_def(modu, fname);
         _Atomic(jl_value_t*) *bp = &b->value;
-        jl_value_t *gf = jl_generic_function_def(b->name, b->owner, bp, bp_owner, b);
+        jl_value_t *gf = jl_generic_function_def(fname, modu, bp, b);
         return gf;
     }
 
@@ -103,7 +103,7 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
 
     fname = eval_value(args[0], s);
     jl_methtable_t *mt = NULL;
-    if (jl_typeis(fname, jl_methtable_type)) {
+    if (jl_typetagis(fname, jl_methtable_type)) {
         mt = (jl_methtable_t*)fname;
     }
     atypes = eval_value(args[1], s);
@@ -147,18 +147,26 @@ jl_value_t *jl_eval_global_var(jl_module_t *m, jl_sym_t *e)
 {
     jl_value_t *v = jl_get_global(m, e);
     if (v == NULL)
-        jl_undefined_var_error(e);
+        jl_undefined_var_error(e, (jl_value_t*)m);
+    return v;
+}
+
+jl_value_t *jl_eval_globalref(jl_globalref_t *g)
+{
+    jl_value_t *v = jl_get_globalref_value(g);
+    if (v == NULL)
+        jl_undefined_var_error(g->name, (jl_value_t*)g->mod);
     return v;
 }
 
 static int jl_source_nslots(jl_code_info_t *src) JL_NOTSAFEPOINT
 {
-    return jl_array_len(src->slotflags);
+    return jl_array_nrows(src->slotflags);
 }
 
 static int jl_source_nssavalues(jl_code_info_t *src) JL_NOTSAFEPOINT
 {
-    return jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_len(src->ssavaluetypes);
+    return jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_nrows(src->ssavaluetypes);
 }
 
 static void eval_stmt_value(jl_value_t *stmt, interpreter_state *s)
@@ -177,20 +185,20 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         else
             return s->locals[jl_source_nslots(src) + id];
     }
-    if (jl_is_slot(e) || jl_is_argument(e)) {
+    if (jl_is_slotnumber(e) || jl_is_argument(e)) {
         ssize_t n = jl_slot_number(e);
         if (src == NULL || n > jl_source_nslots(src) || n < 1 || s->locals == NULL)
             jl_error("access to invalid slot number");
         jl_value_t *v = s->locals[n - 1];
         if (v == NULL)
-            jl_undefined_var_error((jl_sym_t*)jl_array_ptr_ref(src->slotnames, n - 1));
+            jl_undefined_var_error((jl_sym_t*)jl_array_ptr_ref(src->slotnames, n - 1), (jl_value_t*)jl_local_sym);
         return v;
     }
     if (jl_is_quotenode(e)) {
         return jl_quotenode_value(e);
     }
     if (jl_is_globalref(e)) {
-        return jl_eval_global_var(jl_globalref_mod(e), jl_globalref_name(e));
+        return jl_eval_globalref((jl_globalref_t*)e);
     }
     if (jl_is_symbol(e)) {  // bare symbols appear in toplevel exprs not wrapped in `thunk`
         return jl_eval_global_var(s->module, (jl_sym_t*)e);
@@ -209,7 +217,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         return e;
     jl_expr_t *ex = (jl_expr_t*)e;
     jl_value_t **args = jl_array_ptr_data(ex->args);
-    size_t nargs = jl_array_len(ex->args);
+    size_t nargs = jl_array_nrows(ex->args);
     jl_sym_t *head = ex->head;
     if (head == jl_call_sym) {
         return do_call(args, nargs, s);
@@ -223,7 +231,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
     else if (head == jl_isdefined_sym) {
         jl_value_t *sym = args[0];
         int defined = 0;
-        if (jl_is_slot(sym) || jl_is_argument(sym)) {
+        if (jl_is_slotnumber(sym) || jl_is_argument(sym)) {
             ssize_t n = jl_slot_number(sym);
             if (src == NULL || n > jl_source_nslots(src) || n < 1 || s->locals == NULL)
                 jl_error("access to invalid slot number");
@@ -260,7 +268,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
             if (var == jl_getfield_undefref_sym)
                 jl_throw(jl_undefref_exception);
             else
-                jl_undefined_var_error(var);
+                jl_undefined_var_error(var, (jl_value_t*)jl_local_sym);
         }
         return jl_nothing;
     }
@@ -289,7 +297,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
             argv[i] = eval_value(args[i], s);
         JL_NARGSV(new_opaque_closure, 4);
         jl_value_t *ret = (jl_value_t*)jl_new_opaque_closure((jl_tupletype_t*)argv[0], argv[1], argv[2],
-            argv[3], argv+4, nargs-4);
+            argv[3], argv+4, nargs-4, 1);
         JL_GC_POP();
         return ret;
     }
@@ -299,7 +307,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         if (s->sparam_vals && n <= jl_svec_len(s->sparam_vals)) {
             jl_value_t *sp = jl_svecref(s->sparam_vals, n - 1);
             if (jl_is_typevar(sp) && !s->preevaluation)
-                jl_undefined_var_error(((jl_tvar_t*)sp)->name);
+                jl_undefined_var_error(((jl_tvar_t*)sp)->name, (jl_value_t*)jl_static_parameter_sym);
             return sp;
         }
         // static parameter val unknown needs to be an error for ccall
@@ -342,20 +350,42 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
 {
     size_t from = s->ip;
     size_t ip = to;
-    unsigned nphi = 0;
+    unsigned nphiblockstmts = 0;
+    unsigned last_phi = 0;
     for (ip = to; ip < ns; ip++) {
         jl_value_t *e = jl_array_ptr_ref(stmts, ip);
-        if (!jl_is_phinode(e))
-            break;
-        nphi += 1;
-    }
-    if (nphi) {
+        if (!jl_is_phinode(e)) {
+            if (jl_is_expr(e) || jl_is_returnnode(e) || jl_is_gotoifnot(e) ||
+                jl_is_gotonode(e) || jl_is_phicnode(e) || jl_is_upsilonnode(e) ||
+                jl_is_ssavalue(e)) {
+                break;
+            }
+            // Everything else is allowed in the phi-block for implementation
+            // convenience - fall through.
+        } else {
+            last_phi = nphiblockstmts + 1;
+        }
+        nphiblockstmts += 1;
+    }
+    // Cut off the phi block at the last phi node. For global refs that are not
+    // actually in the phi block, we want to evaluate them in the regular interpreter
+    // loop instead to make sure exception state is set up properly in case they throw.
+    nphiblockstmts = last_phi;
+    ip = to + last_phi;
+    if (nphiblockstmts) {
         jl_value_t **dest = &s->locals[jl_source_nslots(s->src) + to];
-        jl_value_t **phis; // = (jl_value_t**)alloca(sizeof(jl_value_t*) * nphi);
-        JL_GC_PUSHARGS(phis, nphi);
-        for (unsigned i = 0; i < nphi; i++) {
+        jl_value_t **phis; // = (jl_value_t**)alloca(sizeof(jl_value_t*) * nphiblockstmts);
+        JL_GC_PUSHARGS(phis, nphiblockstmts);
+        for (unsigned i = 0; i < nphiblockstmts; i++) {
             jl_value_t *e = jl_array_ptr_ref(stmts, to + i);
-            assert(jl_is_phinode(e));
+            if (!jl_is_phinode(e)) {
+                // IR verification guarantees that the only thing that gets
+                // evaluated here are constants, so it doesn't matter if we
+                // update the locals or the phis, but let's be consistent
+                // for simplicity.
+                phis[i] = eval_value(e, s);
+                continue;
+            }
             jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(e, 0);
             ssize_t edge = -1;
             size_t closest = to; // implicit edge has `to <= edge - 1 < to + i`
@@ -364,8 +394,8 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
             //   %2 = phi ...
             //   %3 = phi (1)[1 => %a], (2)[2 => %b]
             // from = 1, to = closest = 2, i = 1 --> edge = 2, edge_from = 2, from = 2
-            for (unsigned j = 0; j < jl_array_len(edges); ++j) {
-                size_t edge_from = ((int32_t*)jl_array_data(edges))[j]; // 1-indexed
+            for (unsigned j = 0; j < jl_array_nrows(edges); ++j) {
+                size_t edge_from = jl_array_data(edges, int32_t)[j]; // 1-indexed
                 if (edge_from == from + 1) {
                     if (edge == -1)
                         edge = j;
@@ -398,7 +428,7 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
                 i -= n_oldphi;
                 dest += n_oldphi;
                 to += n_oldphi;
-                nphi -= n_oldphi;
+                nphiblockstmts -= n_oldphi;
             }
             if (edge != -1) {
                 // if edges list doesn't contain last branch, or the value is explicitly undefined
@@ -411,7 +441,7 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
             phis[i] = val;
         }
         // now move all phi values to their position in edges
-        for (unsigned j = 0; j < nphi; j++) {
+        for (unsigned j = 0; j < nphiblockstmts; j++) {
             dest[j] = phis[j];
         }
         JL_GC_POP();
@@ -422,7 +452,7 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_
 static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip, int toplevel)
 {
     jl_handler_t __eh;
-    size_t ns = jl_array_len(stmts);
+    size_t ns = jl_array_nrows(stmts);
     jl_task_t *ct = jl_current_task;
 
     while (1) {
@@ -459,13 +489,76 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
             ssize_t id = ((jl_ssavalue_t*)phic)->id - 1;
             s->locals[jl_source_nslots(s->src) + id] = val;
         }
+        else if (jl_is_enternode(stmt)) {
+            jl_enter_handler(&__eh);
+            // This is a bit tricky, but supports the implementation of PhiC nodes.
+            // They are conceptually slots, but the slot to store to doesn't get explicitly
+            // mentioned in the store (aka the "UpsilonNode") (this makes them integrate more
+            // nicely with the rest of the SSA representation). In a compiler, we would figure
+            // out which slot to store to at compile time when we encounter the statement. We
+            // can't quite do that here, but we do something similar: We scan the catch entry
+            // block (the only place where PhiC nodes may occur) to find all the Upsilons we
+            // can possibly encounter. Then, we remember which slot they store to (we abuse the
+            // SSA value result array for this purpose). TODO: We could do this only the first
+            // time we encounter a given enter.
+            size_t catch_ip = jl_enternode_catch_dest(stmt);
+            if (catch_ip) {
+                catch_ip -= 1;
+                while (catch_ip < ns) {
+                    jl_value_t *phicnode = jl_array_ptr_ref(stmts, catch_ip);
+                    if (!jl_is_phicnode(phicnode))
+                        break;
+                    jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(phicnode, 0);
+                    for (size_t i = 0; i < jl_array_nrows(values); ++i) {
+                        jl_value_t *val = jl_array_ptr_ref(values, i);
+                        assert(jl_is_ssavalue(val));
+                        size_t upsilon = ((jl_ssavalue_t*)val)->id - 1;
+                        assert(jl_is_upsilonnode(jl_array_ptr_ref(stmts, upsilon)));
+                        s->locals[jl_source_nslots(s->src) + upsilon] = jl_box_ssavalue(catch_ip + 1);
+                    }
+                    s->locals[jl_source_nslots(s->src) + catch_ip] = NULL;
+                    catch_ip += 1;
+                }
+                // store current top of exception stack for restore in pop_exception.
+            }
+            s->locals[jl_source_nslots(s->src) + ip] = jl_box_ulong(jl_excstack_state());
+            if (jl_enternode_scope(stmt)) {
+                jl_value_t *old_scope = ct->scope;
+                JL_GC_PUSH1(&old_scope);
+                jl_value_t *new_scope = eval_value(jl_enternode_scope(stmt), s);
+                ct->scope = new_scope;
+                if (!jl_setjmp(__eh.eh_ctx, 1)) {
+                    eval_body(stmts, s, next_ip, toplevel);
+                    jl_unreachable();
+                }
+                ct->scope = old_scope;
+                JL_GC_POP();
+            }
+            else {
+                if (!jl_setjmp(__eh.eh_ctx, 1)) {
+                    eval_body(stmts, s, next_ip, toplevel);
+                    jl_unreachable();
+                }
+            }
+            jl_eh_restore_state(&__eh);
+            if (s->continue_at) { // means we reached a :leave expression
+                ip = s->continue_at;
+                s->continue_at = 0;
+                continue;
+            }
+            else { // a real exception
+                ip = catch_ip;
+                assert(jl_enternode_catch_dest(stmt) != 0);
+                continue;
+            }
+        }
         else if (jl_is_expr(stmt)) {
             // Most exprs are allowed to end a BB by fall through
             jl_sym_t *head = ((jl_expr_t*)stmt)->head;
             if (head == jl_assign_sym) {
                 jl_value_t *lhs = jl_exprarg(stmt, 0);
                 jl_value_t *rhs = eval_value(jl_exprarg(stmt, 1), s);
-                if (jl_is_slot(lhs)) {
+                if (jl_is_slotnumber(lhs)) {
                     ssize_t n = jl_slot_number(lhs);
                     assert(n <= jl_source_nslots(s->src) && n > 0);
                     s->locals[n - 1] = rhs;
@@ -483,67 +576,37 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                         sym = (jl_sym_t*)lhs;
                     }
                     JL_GC_PUSH1(&rhs);
-                    jl_binding_t *b = jl_get_binding_wr_or_error(modu, sym);
-                    jl_checked_assignment(b, rhs);
+                    jl_binding_t *b = jl_get_binding_wr(modu, sym);
+                    jl_checked_assignment(b, modu, sym, rhs);
                     JL_GC_POP();
                 }
             }
-            else if (head == jl_enter_sym) {
-                jl_enter_handler(&__eh);
-                // This is a bit tricky, but supports the implementation of PhiC nodes.
-                // They are conceptually slots, but the slot to store to doesn't get explicitly
-                // mentioned in the store (aka the "UpsilonNode") (this makes them integrate more
-                // nicely with the rest of the SSA representation). In a compiler, we would figure
-                // out which slot to store to at compile time when we encounter the statement. We
-                // can't quite do that here, but we do something similar: We scan the catch entry
-                // block (the only place where PhiC nodes may occur) to find all the Upsilons we
-                // can possibly encounter. Then, we remember which slot they store to (we abuse the
-                // SSA value result array for this purpose). TODO: We could do this only the first
-                // time we encounter a given enter.
-                size_t catch_ip = jl_unbox_long(jl_exprarg(stmt, 0)) - 1;
-                while (catch_ip < ns) {
-                    jl_value_t *phicnode = jl_array_ptr_ref(stmts, catch_ip);
-                    if (!jl_is_phicnode(phicnode))
-                        break;
-                    jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(phicnode, 0);
-                    for (size_t i = 0; i < jl_array_len(values); ++i) {
-                        jl_value_t *val = jl_array_ptr_ref(values, i);
-                        assert(jl_is_ssavalue(val));
-                        size_t upsilon = ((jl_ssavalue_t*)val)->id - 1;
-                        assert(jl_is_upsilonnode(jl_array_ptr_ref(stmts, upsilon)));
-                        s->locals[jl_source_nslots(s->src) + upsilon] = jl_box_ssavalue(catch_ip + 1);
-                    }
-                    s->locals[jl_source_nslots(s->src) + catch_ip] = NULL;
-                    catch_ip += 1;
-                }
-                // store current top of exception stack for restore in pop_exception.
-                s->locals[jl_source_nslots(s->src) + ip] = jl_box_ulong(jl_excstack_state());
-                if (!jl_setjmp(__eh.eh_ctx, 1)) {
-                    return eval_body(stmts, s, next_ip, toplevel);
-                }
-                else if (s->continue_at) { // means we reached a :leave expression
-                    ip = s->continue_at;
-                    s->continue_at = 0;
-                    continue;
+            else if (head == jl_leave_sym) {
+                int hand_n_leave = 0;
+                for (int i = 0; i < jl_expr_nargs(stmt); ++i) {
+                    jl_value_t *arg = jl_exprarg(stmt, i);
+                    if (arg == jl_nothing)
+                        continue;
+                    assert(jl_is_ssavalue(arg));
+                    jl_value_t *enter_stmt = jl_array_ptr_ref(stmts, ((jl_ssavalue_t*)arg)->id - 1);
+                    if (enter_stmt == jl_nothing)
+                        continue;
+                    hand_n_leave += 1;
                 }
-                else { // a real exception
-                    ip = catch_ip;
-                    continue;
+                if (hand_n_leave > 0) {
+                    assert(hand_n_leave > 0);
+                    // equivalent to jl_pop_handler(hand_n_leave), longjmping
+                    // to the :enter code above instead, which handles cleanup
+                    jl_handler_t *eh = ct->eh;
+                    while (--hand_n_leave > 0)
+                        eh = eh->prev;
+                    // leave happens during normal control flow, but we must
+                    // longjmp to pop the eval_body call for each enter.
+                    s->continue_at = next_ip;
+                    asan_unpoison_task_stack(ct, &eh->eh_ctx);
+                    jl_longjmp(eh->eh_ctx, 1);
                 }
             }
-            else if (head == jl_leave_sym) {
-                int hand_n_leave = jl_unbox_long(jl_exprarg(stmt, 0));
-                assert(hand_n_leave > 0);
-                // equivalent to jl_pop_handler(hand_n_leave), but retaining eh for longjmp:
-                jl_handler_t *eh = ct->eh;
-                while (--hand_n_leave > 0)
-                    eh = eh->prev;
-                jl_eh_restore_state(eh);
-                // leave happens during normal control flow, but we must
-                // longjmp to pop the eval_body call for each enter.
-                s->continue_at = next_ip;
-                jl_longjmp(eh->eh_ctx, 1);
-            }
             else if (head == jl_pop_exception_sym) {
                 size_t prev_state = jl_unbox_ulong(eval_value(jl_exprarg(stmt, 0), s));
                 jl_restore_excstack(prev_state);
@@ -600,7 +663,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
         }
         else if (jl_is_newvarnode(stmt)) {
             jl_value_t *var = jl_fieldref(stmt, 0);
-            assert(jl_is_slot(var));
+            assert(jl_is_slotnumber(var));
             ssize_t n = jl_slot_number(var);
             assert(n <= jl_source_nslots(s->src) && n > 0);
             s->locals[n - 1] = NULL;
@@ -618,9 +681,9 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
 
 // preparing method IR for interpreter
 
-jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi)
+jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi, size_t world)
 {
-    jl_code_info_t *src = (jl_code_info_t*)mi->uninferred;
+    jl_code_info_t *src = (jl_code_info_t*)jl_atomic_load_relaxed(&mi->uninferred);
     if (jl_is_method(mi->def.value)) {
         if (!src || (jl_value_t*)src == jl_nothing) {
             if (mi->def.method->source) {
@@ -628,19 +691,19 @@ jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *mi)
             }
             else {
                 assert(mi->def.method->generator);
-                src = jl_code_for_staged(mi);
+                src = jl_code_for_staged(mi, world);
             }
         }
         if (src && (jl_value_t*)src != jl_nothing) {
             JL_GC_PUSH1(&src);
-            src = jl_uncompress_ir(mi->def.method, NULL, (jl_array_t*)src);
-            mi->uninferred = (jl_value_t*)src;
+            src = jl_uncompress_ir(mi->def.method, NULL, (jl_value_t*)src);
+            jl_atomic_store_release(&mi->uninferred, (jl_value_t*)src);
             jl_gc_wb(mi, src);
             JL_GC_POP();
         }
     }
     if (!src || !jl_is_code_info(src)) {
-        jl_error("source missing for method called in interpreter");
+        jl_throw(jl_new_struct(jl_missingcodeerror_type, (jl_value_t*)mi));
     }
     return src;
 }
@@ -651,9 +714,11 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
 {
     interpreter_state *s;
     jl_method_instance_t *mi = codeinst->def;
-    jl_code_info_t *src = jl_code_for_interpreter(mi);
+    jl_task_t *ct = jl_current_task;
+    size_t world = ct->world_age;
+    jl_code_info_t *src = jl_code_for_interpreter(mi, world);
     jl_array_t *stmts = src->code;
-    assert(jl_typeis(stmts, jl_array_any_type));
+    assert(jl_typetagis(stmts, jl_array_any_type));
     unsigned nroots = jl_source_nslots(src) + jl_source_nssavalues(src) + 2;
     jl_value_t **locals = NULL;
     JL_GC_PUSHFRAME(s, locals, nroots);
@@ -688,12 +753,12 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
     return r;
 }
 
-JL_DLLEXPORT jl_callptr_t jl_fptr_interpret_call_addr = &jl_fptr_interpret_call;
+JL_DLLEXPORT const jl_callptr_t jl_fptr_interpret_call_addr = &jl_fptr_interpret_call;
 
 jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **args, size_t nargs)
 {
     jl_method_t *source = oc->source;
-    jl_code_info_t *code = jl_uncompress_ir(source, NULL, (jl_array_t*)source->source);
+    jl_code_info_t *code = jl_uncompress_ir(source, NULL, (jl_value_t*)source->source);
     interpreter_state *s;
     unsigned nroots = jl_source_nslots(code) + jl_source_nssavalues(code) + 2;
     jl_task_t *ct = jl_current_task;
@@ -726,8 +791,8 @@ jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **ar
     jl_value_t *r = eval_body(code->code, s, 0, 0);
     locals[0] = r; // GC root
     JL_GC_PROMISE_ROOTED(r);
-    jl_typeassert(r, jl_tparam1(jl_typeof(oc)));
     ct->world_age = last_age;
+    jl_typeassert(r, jl_tparam1(jl_typeof(oc)));
     JL_GC_POP();
     return r;
 }
@@ -738,7 +803,7 @@ jl_value_t *NOINLINE jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t
     unsigned nroots = jl_source_nslots(src) + jl_source_nssavalues(src);
     JL_GC_PUSHFRAME(s, s->locals, nroots);
     jl_array_t *stmts = src->code;
-    assert(jl_typeis(stmts, jl_array_any_type));
+    assert(jl_typetagis(stmts, jl_array_any_type));
     s->src = src;
     s->module = m;
     s->sparam_vals = jl_emptysvec;
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 117971a083db8..c784727c4f598 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -15,7 +15,6 @@ STATISTIC(EmittedCoercedUnboxes, "Number of unbox coercions emitted");
 STATISTIC(EmittedUnboxes, "Number of unboxes emitted");
 STATISTIC(EmittedRuntimeCalls, "Number of runtime intrinsic calls emitted");
 STATISTIC(EmittedIntrinsics, "Number of intrinsic calls emitted");
-STATISTIC(Emitted_arraylen, "Number of arraylen calls emitted");
 STATISTIC(Emitted_pointerref, "Number of pointerref calls emitted");
 STATISTIC(Emitted_pointerset, "Number of pointerset calls emitted");
 STATISTIC(Emitted_atomic_fence, "Number of atomic_fence calls emitted");
@@ -45,10 +44,10 @@ FunctionType *get_intr_args5(LLVMContext &C) { return FunctionType::get(JuliaTyp
 
 const auto &runtime_func() {
     static struct runtime_funcs_t {
-        std::array<JuliaFunction *, num_intrinsics> runtime_func;
+        std::array<JuliaFunction<> *, num_intrinsics> runtime_func;
         runtime_funcs_t() :
         runtime_func{
-#define ADD_I(name, nargs) new JuliaFunction{XSTR(jl_##name), get_intr_args##nargs, nullptr},
+#define ADD_I(name, nargs) new JuliaFunction<>{XSTR(jl_##name), get_intr_args##nargs, nullptr},
 #define ADD_HIDDEN ADD_I
 #define ALIAS(alias, base) nullptr,
     INTRINSICS
@@ -79,12 +78,10 @@ const auto &float_func() {
             float_func[sub_float] = true;
             float_func[mul_float] = true;
             float_func[div_float] = true;
-            float_func[rem_float] = true;
             float_func[add_float_fast] = true;
             float_func[sub_float_fast] = true;
             float_func[mul_float_fast] = true;
             float_func[div_float_fast] = true;
-            float_func[rem_float_fast] = true;
             float_func[fma_float] = true;
             float_func[muladd_float] = true;
             float_func[eq_float] = true;
@@ -110,8 +107,8 @@ const auto &float_func() {
     return float_funcs.float_func;
 }
 
-extern "C"
-JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION_impl(void)
+extern "C" JL_DLLEXPORT_CODEGEN
+uint32_t jl_get_LLVM_VERSION_impl(void)
 {
     return 10000 * LLVM_VERSION_MAJOR + 100 * LLVM_VERSION_MINOR
 #ifdef LLVM_VERSION_PATCH
@@ -156,18 +153,18 @@ static Type *FLOATT(Type *t)
 }
 
 // convert an llvm type to same-size int type
-static Type *INTT(Type *t)
+static Type *INTT(Type *t, const DataLayout &DL)
 {
     auto &ctxt = t->getContext();
     if (t->isIntegerTy())
         return t;
     if (t->isPointerTy())
-        return getSizeTy(ctxt);
+        return DL.getIntPtrType(t);
     if (t == getDoubleTy(ctxt))
         return getInt64Ty(ctxt);
     if (t == getFloatTy(ctxt))
         return getInt32Ty(ctxt);
-    if (t == getHalfTy(ctxt))
+    if (t == getHalfTy(ctxt) || t == getBFloatTy(ctxt))
         return getInt16Ty(ctxt);
     unsigned nb = t->getPrimitiveSizeInBits();
     assert(t != getVoidTy(ctxt) && nb > 0);
@@ -176,12 +173,7 @@ static Type *INTT(Type *t)
 
 static Value *uint_cnvt(jl_codectx_t &ctx, Type *to, Value *x)
 {
-    Type *t = x->getType();
-    if (t == to)
-        return x;
-    if (to->getPrimitiveSizeInBits() < x->getType()->getPrimitiveSizeInBits())
-        return ctx.builder.CreateTrunc(x, to);
-    return ctx.builder.CreateZExt(x, to);
+    return ctx.builder.CreateZExtOrTrunc(x, to);
 }
 
 static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_datatype_t *bt)
@@ -231,7 +223,7 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
     }
 
     size_t nf = jl_datatype_nfields(bt);
-    std::vector<Constant*> fields(0);
+    SmallVector<Constant*, 0> fields(0);
     for (size_t i = 0; i < nf; i++) {
         size_t offs = jl_field_offset(bt, i);
         jl_value_t *ft = jl_field_type(bt, i);
@@ -247,8 +239,8 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
         if (jl_is_uniontype(ft)) {
             // compute the same type layout as julia_struct_to_llvm
             size_t fsz = 0, al = 0;
-            (void)jl_islayout_inline(ft, &fsz, &al);
-            fsz = jl_field_size(bt, i);
+            (void)jl_islayout_inline(ft, &fsz, &al); // compute al
+            fsz = jl_field_size(bt, i); // get LLT_ALIGN(fsz+1,al)
             uint8_t sel = ((const uint8_t*)ptr)[offs + fsz - 1];
             jl_value_t *active_ty = jl_nth_union_component(ft, sel);
             size_t active_sz = jl_datatype_size(active_ty);
@@ -320,50 +312,115 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, jl_value_t *e)
     return julia_const_to_llvm(ctx, e, (jl_datatype_t*)bt);
 }
 
+static Constant *undef_value_for_type(Type *T) {
+    auto tracked = CountTrackedPointers(T);
+    Constant *undef;
+    if (tracked.count)
+        // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
+        undef = Constant::getNullValue(T);
+    else
+        undef = UndefValue::get(T);
+    return undef;
+}
+
+// rebuild a struct type with any i1 Bool (e.g. the llvmcall type) widened to i8 (the native size for memcpy)
+static Type *zext_struct_type(Type *T)
+{
+    if (auto *AT = dyn_cast<ArrayType>(T)) {
+        return ArrayType::get(AT->getElementType(), AT->getNumElements());
+    }
+    else if (auto *ST = dyn_cast<StructType>(T)) {
+        SmallVector<Type*> Elements(ST->element_begin(), ST->element_end());
+        for (size_t i = 0; i < Elements.size(); i++) {
+            Elements[i] = zext_struct_type(Elements[i]);
+        }
+        return StructType::get(ST->getContext(), Elements, ST->isPacked());
+    }
+    else if (auto *VT = dyn_cast<VectorType>(T)) {
+        return VectorType::get(zext_struct_type(VT->getElementType()), VT);
+    }
+    else if (auto *IT = dyn_cast<IntegerType>(T)) {
+        unsigned BitWidth = IT->getBitWidth();
+        if (alignTo(BitWidth, 8) != BitWidth)
+            return IntegerType::get(IT->getContext(), alignTo(BitWidth, 8));
+    }
+    return T;
+}
+
+// rebuild a struct with any i1 Bool (e.g. the llvmcall type) widened to i8 (the native size for memcpy)
+static Value *zext_struct_helper(jl_codectx_t &ctx, Value *V, Type *T2)
+{
+    Type *T = V->getType();
+    if (T == T2)
+        return V;
+    if (auto *AT = dyn_cast<ArrayType>(T2)) {
+        Value *V2 = undef_value_for_type(AT);
+        for (size_t i = 0; i < AT->getNumElements(); i++) {
+            Value *E = zext_struct_helper(ctx, ctx.builder.CreateExtractValue(V, i), AT->getElementType());
+            V2 = ctx.builder.CreateInsertValue(V2, E, i);
+        }
+        return V2;
+    }
+    else if (auto *ST = dyn_cast<StructType>(T2)) {
+        Value *V2 = undef_value_for_type(ST);
+        for (size_t i = 0; i < ST->getNumElements(); i++) {
+            Value *E = zext_struct_helper(ctx, ctx.builder.CreateExtractValue(V, i), ST->getElementType(i));
+            V2 = ctx.builder.CreateInsertValue(V2, E, i);
+        }
+        return V2;
+    }
+    else if (T2->isIntegerTy() || T2->isVectorTy()) {
+        return ctx.builder.CreateZExt(V, T2);
+    }
+    return V;
+}
+
+static Value *zext_struct(jl_codectx_t &ctx, Value *V)
+{
+    return zext_struct_helper(ctx, V, zext_struct_type(V->getType()));
+}
+
 static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
 {
+    if (unboxed->getType() == to)
+        return unboxed;
+    if (CastInst::castIsValid(Instruction::Trunc, unboxed, to))
+        return ctx.builder.CreateTrunc(unboxed, to);
+    unboxed = zext_struct(ctx, unboxed);
     Type *ty = unboxed->getType();
     if (ty == to)
         return unboxed;
     bool frompointer = ty->isPointerTy();
     bool topointer = to->isPointerTy();
     const DataLayout &DL = jl_Module->getDataLayout();
-    if (ty->isIntegerTy(1) && to->isIntegerTy(8)) {
-        // bools may be stored internally as int8
-        unboxed = ctx.builder.CreateZExt(unboxed, to);
-    }
-    else if (ty->isIntegerTy(8) && to->isIntegerTy(1)) {
-        // bools may be stored internally as int8
-        unboxed = ctx.builder.CreateTrunc(unboxed, to);
-    }
-    else if (ty->isVoidTy() || DL.getTypeSizeInBits(ty) != DL.getTypeSizeInBits(to)) {
+    if (ty->isVoidTy() || DL.getTypeSizeInBits(ty) != DL.getTypeSizeInBits(to)) {
         // this can happen in dead code
-        //emit_unreachable(ctx);
+        CreateTrap(ctx.builder);
         return UndefValue::get(to);
     }
     if (frompointer && topointer) {
         unboxed = emit_bitcast(ctx, unboxed, to);
     }
     else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) {
-#ifndef JL_NDEBUG
-        const DataLayout &DL = jl_Module->getDataLayout();
-#endif
         assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to));
         AllocaInst *cast = ctx.builder.CreateAlloca(ty);
+        setName(ctx.emission_context, cast, "coercion");
         ctx.builder.CreateStore(unboxed, cast);
         unboxed = ctx.builder.CreateLoad(to, ctx.builder.CreateBitCast(cast, to->getPointerTo()));
     }
     else if (frompointer) {
-        Type *INTT_to = INTT(to);
+        Type *INTT_to = INTT(to, DL);
         unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to);
+        setName(ctx.emission_context, unboxed, "coercion");
         if (INTT_to != to)
             unboxed = ctx.builder.CreateBitCast(unboxed, to);
     }
     else if (topointer) {
-        Type *INTT_to = INTT(to);
+        Type *INTT_to = INTT(to, DL);
         if (to != INTT_to)
             unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to);
         unboxed = emit_inttoptr(ctx, unboxed, to);
+        setName(ctx.emission_context, unboxed, "coercion");
     }
     else {
         unboxed = ctx.builder.CreateBitCast(unboxed, to);
@@ -383,7 +440,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
         if (type_is_ghost(to)) {
             return NULL;
         }
-        //emit_unreachable(ctx);
+        CreateTrap(ctx.builder);
         return UndefValue::get(to); // type mismatch error
     }
 
@@ -397,7 +454,9 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
     Value *p = x.constant ? literal_pointer_val(ctx, x.constant) : x.V;
 
     if (jt == (jl_value_t*)jl_bool_type || to->isIntegerTy(1)) {
-        Instruction *unbox_load = tbaa_decorate(x.tbaa, ctx.builder.CreateLoad(getInt8Ty(ctx.builder.getContext()), maybe_bitcast(ctx, p, getInt8PtrTy(ctx.builder.getContext()))));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+        Instruction *unbox_load = ai.decorateInst(ctx.builder.CreateLoad(getInt8Ty(ctx.builder.getContext()), maybe_bitcast(ctx, p, getInt8PtrTy(ctx.builder.getContext()))));
+        setName(ctx.emission_context, unbox_load, p->getName() + ".unbox");
         if (jt == (jl_value_t*)jl_bool_type)
             unbox_load->setMetadata(LLVMContext::MD_range, MDNode::get(ctx.builder.getContext(), {
                 ConstantAsMetadata::get(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
@@ -425,12 +484,16 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
                 (to->isFloatingPointTy() || to->isIntegerTy() || to->isPointerTy()) &&
                 DL.getTypeSizeInBits(AllocType) == DL.getTypeSizeInBits(to)) {
             Instruction *load = ctx.builder.CreateAlignedLoad(AllocType, p, Align(alignment));
-            return emit_unboxed_coercion(ctx, to, tbaa_decorate(x.tbaa, load));
+            setName(ctx.emission_context, load, p->getName() + ".unbox");
+            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+            return emit_unboxed_coercion(ctx, to, ai.decorateInst(load));
         }
     }
     p = maybe_bitcast(ctx, p, ptype);
     Instruction *load = ctx.builder.CreateAlignedLoad(to, p, Align(alignment));
-    return tbaa_decorate(x.tbaa, load);
+    setName(ctx.emission_context, load, p->getName() + ".unbox");
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    return ai.decorateInst(load);
 }
 
 // emit code to store a raw value into a destination
@@ -443,37 +506,31 @@ static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest
         return;
     }
 
-    Value *unboxed = nullptr;
-    if (!x.ispointer()) { // already unboxed, but sometimes need conversion
-        unboxed = x.V;
-        assert(unboxed);
-    }
-
-    // bools stored as int8, but can be narrowed to int1 often
-    if (x.typ == (jl_value_t*)jl_bool_type)
-        unboxed = emit_unbox(ctx, getInt8Ty(ctx.builder.getContext()), x, (jl_value_t*)jl_bool_type);
-
-    if (unboxed) {
+    if (!x.ispointer()) { // already unboxed, but sometimes need conversion (e.g. f32 -> i32)
+        assert(x.V);
+        Value *unboxed = zext_struct(ctx, x.V);
         Type *dest_ty = unboxed->getType()->getPointerTo();
         if (dest->getType() != dest_ty)
             dest = emit_bitcast(ctx, dest, dest_ty);
         StoreInst *store = ctx.builder.CreateAlignedStore(unboxed, dest, Align(alignment));
         store->setVolatile(isVolatile);
-        tbaa_decorate(tbaa_dest, store);
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest);
+        ai.decorateInst(store);
         return;
     }
 
     Value *src = data_pointer(ctx, x);
-    emit_memcpy(ctx, dest, tbaa_dest, src, x.tbaa, jl_datatype_size(x.typ), alignment, isVolatile);
+    emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), alignment, julia_alignment(x.typ), isVolatile);
 }
 
-static jl_value_t *staticeval_bitstype(const jl_cgval_t &targ)
+static jl_datatype_t *staticeval_bitstype(const jl_cgval_t &targ)
 {
     // evaluate an argument at compile time to determine what type it is
-    if (jl_is_type_type(targ.typ)) {
-        jl_value_t *bt = jl_tparam0(targ.typ);
+    jl_value_t *unw = jl_unwrap_unionall(targ.typ);
+    if (jl_is_type_type(unw)) {
+        jl_value_t *bt = jl_tparam0(unw);
         if (jl_is_primitivetype(bt))
-            return bt;
+            return (jl_datatype_t*)bt;
     }
     return NULL;
 }
@@ -481,11 +538,11 @@ static jl_value_t *staticeval_bitstype(const jl_cgval_t &targ)
 static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, const jl_cgval_t *argv, size_t nargs)
 {
     Function *func = prepare_call(runtime_func()[f]);
-    Value **argvalues = (Value**)alloca(sizeof(Value*) * nargs);
+    SmallVector<Value *, 0> argvalues(nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argvalues[i] = boxed(ctx, argv[i]);
     }
-    Value *r = ctx.builder.CreateCall(func, makeArrayRef(argvalues, nargs));
+    Value *r = ctx.builder.CreateCall(func, argvalues);
     return mark_julia_type(ctx, r, true, (jl_value_t*)jl_any_type);
 }
 
@@ -495,43 +552,48 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
     // Give the arguments names //
     const jl_cgval_t &bt_value = argv[0];
     const jl_cgval_t &v = argv[1];
-    jl_value_t *bt = staticeval_bitstype(bt_value);
+    jl_datatype_t *bt = staticeval_bitstype(bt_value);
 
     // it's easier to throw a good error from C than llvm
     if (!bt)
         return emit_runtime_call(ctx, bitcast, argv, 2);
 
-    Type *llvmt = bitstype_to_llvm(bt, ctx.builder.getContext(), true);
-    int nb = jl_datatype_size(bt);
+    Type *llvmt = bitstype_to_llvm((jl_value_t*)bt, ctx.builder.getContext(), true);
+    uint32_t nb = jl_datatype_size(bt);
+
+    Value *bt_value_rt = NULL;
+    if (!jl_is_concrete_type((jl_value_t*)bt)) {
+        bt_value_rt = boxed(ctx, bt_value);
+        emit_concretecheck(ctx, bt_value_rt, "bitcast: target type not a leaf primitive type");
+    }
 
     // Examine the second argument //
     bool isboxed;
     Type *vxt = julia_type_to_llvm(ctx, v.typ, &isboxed);
-
     if (!jl_is_primitivetype(v.typ) || jl_datatype_size(v.typ) != nb) {
-        Value *typ = emit_typeof_boxed(ctx, v);
+        Value *typ = emit_typeof(ctx, v, false, false);
         if (!jl_is_primitivetype(v.typ)) {
-            if (isboxed) {
-                Value *isprimitive = emit_datatype_isprimitivetype(ctx, typ);
-                error_unless(ctx, isprimitive, "bitcast: expected primitive type value for second argument");
-            }
-            else {
-                emit_error(ctx, "bitcast: expected primitive type value for second argument");
+            if (jl_is_datatype(v.typ) && !jl_is_abstracttype(v.typ)) {
+                emit_error(ctx, "bitcast: value not a primitive type");
                 return jl_cgval_t();
             }
-        }
-        if (!jl_is_datatype(v.typ) || jl_datatype_size(v.typ) != nb) {
-            if (isboxed) {
-                Value *size = emit_datatype_size(ctx, typ);
-                error_unless(ctx,
-                        ctx.builder.CreateICmpEQ(size, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nb)),
-                        "bitcast: argument size does not match size of target type");
-            }
             else {
-                emit_error(ctx, "bitcast: argument size does not match size of target type");
-                return jl_cgval_t();
+                Value *isprimitive = emit_datatype_isprimitivetype(ctx, typ);
+                error_unless(ctx, isprimitive, "bitcast: value not a primitive type");
             }
         }
+        if (jl_is_datatype(v.typ) && !jl_is_abstracttype(v.typ)) {
+            emit_error(ctx, "bitcast: argument size does not match size of target type");
+            return jl_cgval_t();
+        }
+        else {
+            Value *size = emit_datatype_size(ctx, typ);
+            auto sizecheck = ctx.builder.CreateICmpEQ(size, ConstantInt::get(size->getType(), nb));
+            setName(ctx.emission_context, sizecheck, "sizecheck");
+            error_unless(ctx,
+                    sizecheck,
+                    "bitcast: argument size does not match size of target type");
+        }
     }
 
     assert(!v.isghost);
@@ -547,33 +609,41 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
         if (isboxed)
             vxt = llvmt;
         auto storage_type = vxt->isIntegerTy(1) ? getInt8Ty(ctx.builder.getContext()) : vxt;
-        vx = tbaa_decorate(v.tbaa, ctx.builder.CreateLoad(
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, v.tbaa);
+        vx = ai.decorateInst(ctx.builder.CreateLoad(
             storage_type,
             emit_bitcast(ctx, data_pointer(ctx, v),
                 storage_type->getPointerTo())));
+        setName(ctx.emission_context, vx, "bitcast");
     }
 
     vxt = vx->getType();
     if (vxt != llvmt) {
-        if (llvmt->isIntegerTy(1))
+        if (llvmt->isIntegerTy(1)) {
             vx = ctx.builder.CreateTrunc(vx, llvmt);
-        else if (vxt->isIntegerTy(1) && llvmt->isIntegerTy(8))
+        } else if (vxt->isIntegerTy(1) && llvmt->isIntegerTy(8)) {
             vx = ctx.builder.CreateZExt(vx, llvmt);
-        else if (vxt->isPointerTy() && !llvmt->isPointerTy())
+        } else if (vxt->isPointerTy() && !llvmt->isPointerTy()) {
             vx = ctx.builder.CreatePtrToInt(vx, llvmt);
-        else if (!vxt->isPointerTy() && llvmt->isPointerTy())
+            setName(ctx.emission_context, vx, "bitcast_coercion");
+        } else if (!vxt->isPointerTy() && llvmt->isPointerTy()) {
             vx = emit_inttoptr(ctx, vx, llvmt);
-        else
+            setName(ctx.emission_context, vx, "bitcast_coercion");
+        } else {
             vx = emit_bitcast(ctx, vx, llvmt);
+            setName(ctx.emission_context, vx, "bitcast_coercion");
+        }
     }
 
-    if (jl_is_concrete_type(bt)) {
+    if (jl_is_concrete_type((jl_value_t*)bt)) {
         return mark_julia_type(ctx, vx, false, bt);
     }
     else {
-        Value *box = emit_allocobj(ctx, nb, boxed(ctx, bt_value));
+        unsigned align = sizeof(void*); // Allocations are at least pointer aligned
+        Value *box = emit_allocobj(ctx, nb, bt_value_rt, true, align);
+        setName(ctx.emission_context, box, "bitcast_box");
         init_bits_value(ctx, box, vx, ctx.tbaa().tbaa_immut);
-        return mark_julia_type(ctx, box, true, bt);
+        return mark_julia_type(ctx, box, true, bt->name->wrapper);
     }
 }
 
@@ -582,19 +652,22 @@ static jl_cgval_t generic_cast(
         intrinsic f, Instruction::CastOps Op,
         const jl_cgval_t *argv, bool toint, bool fromint)
 {
+    auto &TT = ctx.emission_context.TargetTriple;
+    auto &DL = ctx.emission_context.DL;
     const jl_cgval_t &targ = argv[0];
     const jl_cgval_t &v = argv[1];
-    jl_value_t *jlto = staticeval_bitstype(targ);
+    jl_datatype_t *jlto = staticeval_bitstype(targ);
     if (!jlto || !jl_is_primitivetype(v.typ))
         return emit_runtime_call(ctx, f, argv, 2);
-    Type *to = bitstype_to_llvm(jlto, ctx.builder.getContext(), true);
+    uint32_t nb = jl_datatype_size(jlto);
+    Type *to = bitstype_to_llvm((jl_value_t*)jlto, ctx.builder.getContext(), true);
     Type *vt = bitstype_to_llvm(v.typ, ctx.builder.getContext(), true);
     if (toint)
-        to = INTT(to);
+        to = INTT(to, DL);
     else
         to = FLOATT(to);
     if (fromint)
-        vt = INTT(vt);
+        vt = INTT(vt, DL);
     else
         vt = FLOATT(vt);
     if (!to || !vt)
@@ -603,22 +676,35 @@ static jl_cgval_t generic_cast(
     if (!CastInst::castIsValid(Op, from, to))
         return emit_runtime_call(ctx, f, argv, 2);
     if (Op == Instruction::FPExt) {
-#ifdef JL_NEED_FLOATTEMP_VAR
-        // Target platform might carry extra precision.
-        // Force rounding to single precision first. The reason is that it's
-        // fine to keep working in extended precision as long as it's
-        // understood that everything is implicitly rounded to 23 bits,
-        // but if we start looking at more bits we need to actually do the
-        // rounding first instead of carrying around incorrect low bits.
-        Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType());
-        ctx.builder.CreateStore(from, jlfloattemp_var);
-        from  = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true);
-#endif
+        if (jl_floattemp_var_needed(TT)) {
+            // Target platform might carry extra precision.
+            // Force rounding to single precision first. The reason is that it's
+            // fine to keep working in extended precision as long as it's
+            // understood that everything is implicitly rounded to 23 bits,
+            // but if we start looking at more bits we need to actually do the
+            // rounding first instead of carrying around incorrect low bits.
+            Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType());
+            setName(ctx.emission_context, jlfloattemp_var, "rounding_slot");
+            ctx.builder.CreateStore(from, jlfloattemp_var);
+            from  = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true);
+            setName(ctx.emission_context, from, "rounded");
+        }
     }
     Value *ans = ctx.builder.CreateCast(Op, from, to);
     if (f == fptosi || f == fptoui)
         ans = ctx.builder.CreateFreeze(ans);
-    return mark_julia_type(ctx, ans, false, jlto);
+    if (jl_is_concrete_type((jl_value_t*)jlto)) {
+        return mark_julia_type(ctx, ans, false, jlto);
+    }
+    else {
+        Value *targ_rt = boxed(ctx, targ);
+        emit_concretecheck(ctx, targ_rt, std::string(jl_intrinsic_name(f)) + ": target type not a leaf primitive type");
+        unsigned align = sizeof(void*); // Allocations are at least pointer aligned
+        Value *box = emit_allocobj(ctx, nb, targ_rt, true, align);
+        setName(ctx.emission_context, box, "cast_box");
+        init_bits_value(ctx, box, ans, ctx.tbaa().tbaa_immut);
+        return mark_julia_type(ctx, box, true, jlto->name->wrapper);
+    }
 }
 
 static jl_cgval_t emit_runtime_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
@@ -649,26 +735,32 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
         return jl_cgval_t();
     }
 
-    Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), i, (jl_value_t*)jl_long_type);
-    Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+    Value *idx = emit_unbox(ctx, ctx.types().T_size, i, (jl_value_t*)jl_long_type);
+    Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1));
+    setName(ctx.emission_context, im1, "pointerref_idx");
 
     if (ety == (jl_value_t*)jl_any_type) {
         Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ);
+        setName(ctx.emission_context, thePtr, "unbox_any_ptr");
         LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, thePtr, im1), Align(align_nb));
-        tbaa_decorate(ctx.tbaa().tbaa_data, load);
+        setName(ctx.emission_context, load, "any_unbox");
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data);
+        ai.decorateInst(load);
         return mark_julia_type(ctx, load, true, ety);
     }
-    else if (!jl_isbits(ety)) {
+    else if (!deserves_stack(ety)) {
         assert(jl_is_datatype(ety));
         uint64_t size = jl_datatype_size(ety);
-        Value *strct = emit_allocobj(ctx, size,
-                                     literal_pointer_val(ctx, ety));
-        im1 = ctx.builder.CreateMul(im1, ConstantInt::get(getSizeTy(ctx.builder.getContext()),
+        Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety, true);
+        setName(ctx.emission_context, strct, "pointerref_box");
+        im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size,
                     LLT_ALIGN(size, jl_datatype_align(ety))));
+        setName(ctx.emission_context, im1, "pointerref_offset");
         Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
         thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, thePtr, getInt8PtrTy(ctx.builder.getContext())), im1);
+        setName(ctx.emission_context, thePtr, "pointerref_src");
         MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
-        emit_memcpy(ctx, strct, tbaa, thePtr, nullptr, size, 1);
+        emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, sizeof(jl_value_t*), align_nb);
         return mark_julia_type(ctx, strct, true, ety);
     }
     else {
@@ -677,7 +769,9 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
         assert(!isboxed);
         if (!type_is_ghost(ptrty)) {
             Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
-            return typed_load(ctx, thePtr, im1, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, true, align_nb);
+            auto load = typed_load(ctx, thePtr, im1, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, false, align_nb);
+            setName(ctx.emission_context, load.V, "pointerref");
+            return load;
         }
         else {
             return ghostValue(ctx, ety);
@@ -718,24 +812,31 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
     }
     emit_typecheck(ctx, x, ety, "pointerset");
 
-    Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), i, (jl_value_t*)jl_long_type);
-    Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+    Value *idx = emit_unbox(ctx, ctx.types().T_size, i, (jl_value_t*)jl_long_type);
+    Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1));
+    setName(ctx.emission_context, im1, "pointerset_idx");
 
     Value *thePtr;
     if (ety == (jl_value_t*)jl_any_type) {
         // unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots.
-        thePtr = emit_unbox(ctx, getSizePtrTy(ctx.builder.getContext()), e, e.typ);
-        Instruction *store = ctx.builder.CreateAlignedStore(
-          ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), getSizeTy(ctx.builder.getContext())),
-            ctx.builder.CreateInBoundsGEP(getSizeTy(ctx.builder.getContext()), thePtr, im1), Align(align_nb));
-        tbaa_decorate(ctx.tbaa().tbaa_data, store);
-    }
-    else if (!jl_isbits(ety)) {
+        thePtr = emit_unbox(ctx, ctx.types().T_size->getPointerTo(), e, e.typ);
+        auto gep = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, thePtr, im1);
+        setName(ctx.emission_context, gep, "pointerset_ptr");
+        auto val = ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), ctx.types().T_size);
+        setName(ctx.emission_context, val, "pointerset_val");
+        Instruction *store = ctx.builder.CreateAlignedStore(val, gep, Align(align_nb));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data);
+        ai.decorateInst(store);
+    }
+    else if (x.ispointer()) {
         thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
         uint64_t size = jl_datatype_size(ety);
-        im1 = ctx.builder.CreateMul(im1, ConstantInt::get(getSizeTy(ctx.builder.getContext()),
+        im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size,
                     LLT_ALIGN(size, jl_datatype_align(ety))));
-        emit_memcpy(ctx, ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1), nullptr, x, size, align_nb);
+        setName(ctx.emission_context, im1, "pointerset_offset");
+        auto gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1);
+        setName(ctx.emission_context, gep, "pointerset_ptr");
+        emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb, julia_alignment(ety));
     }
     else {
         bool isboxed;
@@ -786,7 +887,9 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
     if (ety == (jl_value_t*)jl_any_type) {
         Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ);
         LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, thePtr, Align(sizeof(jl_value_t*)));
-        tbaa_decorate(ctx.tbaa().tbaa_data, load);
+        setName(ctx.emission_context, load, "atomic_pointerref");
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data);
+        ai.decorateInst(load);
         load->setOrdering(llvm_order);
         return mark_julia_type(ctx, load, true, ety);
     }
@@ -802,21 +905,22 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
         return jl_cgval_t();
     }
 
-    if (!jl_isbits(ety)) {
+    if (!deserves_stack(ety)) {
         assert(jl_is_datatype(ety));
-        uint64_t size = jl_datatype_size(ety);
-        Value *strct = emit_allocobj(ctx, size,
-                                     literal_pointer_val(ctx, ety));
+        Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety, true);
+        setName(ctx.emission_context, strct, "atomic_pointerref_box");
         Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
         Type *loadT = Type::getIntNTy(ctx.builder.getContext(), nb * 8);
         thePtr = emit_bitcast(ctx, thePtr, loadT->getPointerTo());
         MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
         LoadInst *load = ctx.builder.CreateAlignedLoad(loadT, thePtr, Align(nb));
-        tbaa_decorate(tbaa, load);
+        setName(ctx.emission_context, load, "atomic_pointerref");
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        ai.decorateInst(load);
         load->setOrdering(llvm_order);
         thePtr = emit_bitcast(ctx, strct, thePtr->getType());
         StoreInst *store = ctx.builder.CreateAlignedStore(load, thePtr, Align(julia_alignment(ety)));
-        tbaa_decorate(tbaa, store);
+        ai.decorateInst(store);
         return mark_julia_type(ctx, strct, true, ety);
     }
     else {
@@ -825,7 +929,9 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
         assert(!isboxed);
         if (!type_is_ghost(ptrty)) {
             Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
-            return typed_load(ctx, thePtr, nullptr, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, llvm_order, true, nb);
+            auto load = typed_load(ctx, thePtr, nullptr, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, llvm_order, false, nb);
+            setName(ctx.emission_context, load.V, "atomic_pointerref");
+            return load;
         }
         else {
             if (order > jl_memory_order_monotonic)
@@ -901,6 +1007,7 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
     }
 
     if (!jl_isbits(ety)) {
+        //if (!deserves_stack(ety))
         //Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
         //uint64_t size = jl_datatype_size(ety);
         return emit_runtime_call(ctx, f, argv, nargs); // TODO: optimizations
@@ -909,7 +1016,11 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
         bool isboxed;
         Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed);
         assert(!isboxed);
-        Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
+        Value *thePtr;
+        if (!type_is_ghost(ptrty))
+            thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
+        else
+            thePtr = nullptr; // could use any value here, since typed_store will not use it
         jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
                     llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
         if (issetfield)
@@ -921,15 +1032,18 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl
 static Value *emit_checked_srem_int(jl_codectx_t &ctx, Value *x, Value *den)
 {
     Type *t = den->getType();
+    auto ndivby0 = ctx.builder.CreateICmpNE(den, ConstantInt::get(t, 0));
+    setName(ctx.emission_context, ndivby0, "ndivby0");
     raise_exception_unless(ctx,
-            ctx.builder.CreateICmpNE(den, ConstantInt::get(t, 0)),
+            ndivby0,
             literal_pointer_val(ctx, jl_diverror_exception));
     BasicBlock *m1BB = BasicBlock::Create(ctx.builder.getContext(), "minus1", ctx.f);
     BasicBlock *okBB = BasicBlock::Create(ctx.builder.getContext(), "oksrem", ctx.f);
     BasicBlock *cont = BasicBlock::Create(ctx.builder.getContext(), "after_srem", ctx.f);
     PHINode *ret = PHINode::Create(t, 2);
-    ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(den ,ConstantInt::get(t, -1, true)),
-                         m1BB, okBB);
+    auto divbym1 = ctx.builder.CreateICmpEQ(den, ConstantInt::get(t, -1, true));
+    setName(ctx.emission_context, divbym1, "divbym1");
+    ctx.builder.CreateCondBr(divbym1, m1BB, okBB);
     ctx.builder.SetInsertPoint(m1BB);
     ctx.builder.CreateBr(cont);
     ctx.builder.SetInsertPoint(okBB);
@@ -939,6 +1053,7 @@ static Value *emit_checked_srem_int(jl_codectx_t &ctx, Value *x, Value *den)
     ret->addIncoming(// rem(typemin, -1) is undefined
                      ConstantInt::get(t, 0), m1BB);
     ret->addIncoming(sremval, okBB);
+    setName(ctx.emission_context, ret, "checked_srem");
     ctx.builder.Insert(ret);
     return ret;
 }
@@ -974,6 +1089,7 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
 static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_cgval_t y, jl_value_t *rt_hint)
 {
     Value *isfalse = emit_condition(ctx, c, "ifelse");
+    setName(ctx.emission_context, isfalse, "ifelse_cond");
     jl_value_t *t1 = x.typ;
     jl_value_t *t2 = y.typ;
     // handle cases where the condition is irrelevant based on type info
@@ -1046,6 +1162,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
                 if (x_ptr->getType() != y_ptr->getType())
                     y_ptr = ctx.builder.CreateBitCast(y_ptr, x_ptr->getType());
                 ifelse_result = ctx.builder.CreateSelect(isfalse, y_ptr, x_ptr);
+                setName(ctx.emission_context, ifelse_result, "ifelse_result");
                 ifelse_tbaa = MDNode::getMostGenericTBAA(x.tbaa, y.tbaa);
                 if (ifelse_tbaa == NULL) {
                     // LLVM won't return a TBAA result for the root, but mark_julia_struct requires it: make it now
@@ -1090,6 +1207,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
                 ctx.builder.SetInsertPoint(post);
                 ctx.builder.Insert(ret);
                 tindex = ret;
+                setName(ctx.emission_context, tindex, "ifelse_tindex");
             }
             jl_cgval_t ret = mark_julia_slot(ifelse_result, rt_hint, tindex, ifelse_tbaa);
             if (x_vboxed || y_vboxed) {
@@ -1098,6 +1216,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
                 if (!y_vboxed)
                     y_vboxed = ConstantPointerNull::get(cast<PointerType>(x_vboxed->getType()));
                 ret.Vboxed = ctx.builder.CreateSelect(isfalse, y_vboxed, x_vboxed);
+                setName(ctx.emission_context, ret.Vboxed, "ifelse_vboxed");
                 assert(ret.Vboxed->getType() == ctx.types().T_prjlvalue);
             }
             return ret;
@@ -1105,6 +1224,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
         ifelse_result = ctx.builder.CreateSelect(isfalse,
                 boxed(ctx, y),
                 boxed(ctx, x));
+        setName(ctx.emission_context, ifelse_result, "ifelse_result");
     }
     jl_value_t *jt = (t1 == t2 ? t1 : rt_hint);
     return mark_julia_type(ctx, ifelse_result, isboxed, jt);
@@ -1112,6 +1232,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
 
 static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **args, size_t nargs)
 {
+    auto &DL = ctx.emission_context.DL;
     assert(f < num_intrinsics);
     if (f == cglobal && nargs == 1)
         f = cglobal_auto;
@@ -1125,94 +1246,90 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
     if (f == cglobal_auto || f == cglobal)
         return emit_cglobal(ctx, args, nargs);
 
-    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    SmallVector<jl_cgval_t, 0> argv(nargs);
     for (size_t i = 0; i < nargs; ++i) {
-        argv[i] = emit_expr(ctx, args[i + 1]);
+        jl_cgval_t arg = emit_expr(ctx, args[i + 1]);
+        if (arg.typ == jl_bottom_type) {
+            // intrinsics generally don't handle bottom values, so bail out early
+            return jl_cgval_t();
+        }
+        argv[i] = arg;
     }
 
     // this forces everything to use runtime-intrinsics (e.g. for testing)
     // return emit_runtime_call(ctx, f, argv, nargs);
 
     switch (f) {
-    case arraylen: {
-        ++Emitted_arraylen;
-        assert(nargs == 1);
-        const jl_cgval_t &x = argv[0];
-        jl_value_t *typ = jl_unwrap_unionall(x.typ);
-        if (!jl_is_datatype(typ) || ((jl_datatype_t*)typ)->name != jl_array_typename)
-            return emit_runtime_call(ctx, f, argv, nargs);
-        return mark_julia_type(ctx, emit_arraylen(ctx, x), false, jl_long_type);
-    }
     case pointerref:
         ++Emitted_pointerref;
         assert(nargs == 3);
-        return emit_pointerref(ctx, argv);
+        return emit_pointerref(ctx, argv.data());
     case pointerset:
         ++Emitted_pointerset;
         assert(nargs == 4);
-        return emit_pointerset(ctx, argv);
+        return emit_pointerset(ctx, argv.data());
     case atomic_fence:
         ++Emitted_atomic_fence;
         assert(nargs == 1);
-        return emit_atomicfence(ctx, argv);
+        return emit_atomicfence(ctx, argv.data());
     case atomic_pointerref:
         ++Emitted_atomic_pointerref;
         assert(nargs == 2);
-        return emit_atomic_pointerref(ctx, argv);
+        return emit_atomic_pointerref(ctx, argv.data());
     case atomic_pointerset:
     case atomic_pointerswap:
     case atomic_pointermodify:
     case atomic_pointerreplace:
         ++Emitted_atomic_pointerop;
-        return emit_atomic_pointerop(ctx, f, argv, nargs, nullptr);
+        return emit_atomic_pointerop(ctx, f, argv.data(), nargs, nullptr);
     case bitcast:
         ++Emitted_bitcast;
         assert(nargs == 2);
-        return generic_bitcast(ctx, argv);
+        return generic_bitcast(ctx, argv.data());
     case trunc_int:
         ++Emitted_trunc_int;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::Trunc, argv, true, true);
+        return generic_cast(ctx, f, Instruction::Trunc, argv.data(), true, true);
     case sext_int:
         ++Emitted_sext_int;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::SExt, argv, true, true);
+        return generic_cast(ctx, f, Instruction::SExt, argv.data(), true, true);
     case zext_int:
         ++Emitted_zext_int;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::ZExt, argv, true, true);
+        return generic_cast(ctx, f, Instruction::ZExt, argv.data(), true, true);
     case uitofp:
         ++Emitted_uitofp;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::UIToFP, argv, false, true);
+        return generic_cast(ctx, f, Instruction::UIToFP, argv.data(), false, true);
     case sitofp:
         ++Emitted_sitofp;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::SIToFP, argv, false, true);
+        return generic_cast(ctx, f, Instruction::SIToFP, argv.data(), false, true);
     case fptoui:
         ++Emitted_fptoui;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPToUI, argv, true, false);
+        return generic_cast(ctx, f, Instruction::FPToUI, argv.data(), true, false);
     case fptosi:
         ++Emitted_fptosi;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPToSI, argv, true, false);
+        return generic_cast(ctx, f, Instruction::FPToSI, argv.data(), true, false);
     case fptrunc:
         ++Emitted_fptrunc;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPTrunc, argv, false, false);
+        return generic_cast(ctx, f, Instruction::FPTrunc, argv.data(), false, false);
     case fpext:
         ++Emitted_fpext;
         assert(nargs == 2);
-        return generic_cast(ctx, f, Instruction::FPExt, argv, false, false);
+        return generic_cast(ctx, f, Instruction::FPExt, argv.data(), false, false);
 
     case not_int: {
         ++Emitted_not_int;
         assert(nargs == 1);
         const jl_cgval_t &x = argv[0];
         if (!jl_is_primitivetype(x.typ))
-            return emit_runtime_call(ctx, f, argv, nargs);
-        Type *xt = INTT(bitstype_to_llvm(x.typ, ctx.builder.getContext(), true));
+            return emit_runtime_call(ctx, f, argv.data(), nargs);
+        Type *xt = INTT(bitstype_to_llvm(x.typ, ctx.builder.getContext(), true), DL);
         Value *from = emit_unbox(ctx, xt, x, x.typ);
         Value *ans = ctx.builder.CreateNot(from);
         return mark_julia_type(ctx, ans, false, x.typ);
@@ -1223,7 +1340,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         assert(nargs == 1);
         const jl_cgval_t &x = argv[0];
         if (!x.constant || !jl_is_datatype(x.constant))
-            return emit_runtime_call(ctx, f, argv, nargs);
+            return emit_runtime_call(ctx, f, argv.data(), nargs);
         jl_datatype_t *dt = (jl_datatype_t*) x.constant;
 
         // select the appropriated overloaded intrinsic
@@ -1233,7 +1350,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         else if (dt == jl_float64_type)
             intr_name += "f64";
         else
-            return emit_runtime_call(ctx, f, argv, nargs);
+            return emit_runtime_call(ctx, f, argv.data(), nargs);
 
         FunctionCallee intr = jl_Module->getOrInsertFunction(intr_name, getInt1Ty(ctx.builder.getContext()));
         auto ret = ctx.builder.CreateCall(intr);
@@ -1246,14 +1363,14 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
 
         // verify argument types
         if (!jl_is_primitivetype(xinfo.typ))
-            return emit_runtime_call(ctx, f, argv, nargs);
+            return emit_runtime_call(ctx, f, argv.data(), nargs);
         Type *xtyp = bitstype_to_llvm(xinfo.typ, ctx.builder.getContext(), true);
         if (float_func()[f])
             xtyp = FLOATT(xtyp);
         else
-            xtyp = INTT(xtyp);
+            xtyp = INTT(xtyp, DL);
         if (!xtyp)
-            return emit_runtime_call(ctx, f, argv, nargs);
+            return emit_runtime_call(ctx, f, argv.data(), nargs);
         ////Bool are required to be in the range [0,1]
         ////so while they are represented as i8,
         ////the operations need to be done in mod 1
@@ -1264,31 +1381,31 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         //if (xtyp == (jl_value_t*)jl_bool_type)
         //    r = getInt1Ty(ctx.builder.getContext());
 
-        Type **argt = (Type**)alloca(sizeof(Type*) * nargs);
+        SmallVector<Type *, 0> argt(nargs);
         argt[0] = xtyp;
 
         if (f == shl_int || f == lshr_int || f == ashr_int) {
             if (!jl_is_primitivetype(argv[1].typ))
-                return emit_runtime_call(ctx, f, argv, nargs);
-            argt[1] = INTT(bitstype_to_llvm(argv[1].typ, ctx.builder.getContext(), true));
+                return emit_runtime_call(ctx, f, argv.data(), nargs);
+            argt[1] = INTT(bitstype_to_llvm(argv[1].typ, ctx.builder.getContext(), true), DL);
         }
         else {
             for (size_t i = 1; i < nargs; ++i) {
                 if (xinfo.typ != argv[i].typ)
-                    return emit_runtime_call(ctx, f, argv, nargs);
+                    return emit_runtime_call(ctx, f, argv.data(), nargs);
                 argt[i] = xtyp;
             }
         }
 
         // unbox the arguments
-        Value **argvalues = (Value**)alloca(sizeof(Value*) * nargs);
+        SmallVector<Value *, 0> argvalues(nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argvalues[i] = emit_unbox(ctx, argt[i], argv[i], argv[i].typ);
         }
 
         // call the intrinsic
         jl_value_t *newtyp = xinfo.typ;
-        Value *r = emit_untyped_intrinsic(ctx, f, argvalues, nargs, (jl_datatype_t**)&newtyp, xinfo.typ);
+        Value *r = emit_untyped_intrinsic(ctx, f, argvalues.data(), nargs, (jl_datatype_t**)&newtyp, xinfo.typ);
         // Turn Bool operations into mod 1 now, if needed
         if (newtyp == (jl_value_t*)jl_bool_type && !r->getType()->isIntegerTy(1))
             r = ctx.builder.CreateTrunc(r, getInt1Ty(ctx.builder.getContext()));
@@ -1344,20 +1461,18 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
     case sub_float: return math_builder(ctx)().CreateFSub(x, y);
     case mul_float: return math_builder(ctx)().CreateFMul(x, y);
     case div_float: return math_builder(ctx)().CreateFDiv(x, y);
-    case rem_float: return math_builder(ctx)().CreateFRem(x, y);
     case add_float_fast: return math_builder(ctx, true)().CreateFAdd(x, y);
     case sub_float_fast: return math_builder(ctx, true)().CreateFSub(x, y);
     case mul_float_fast: return math_builder(ctx, true)().CreateFMul(x, y);
     case div_float_fast: return math_builder(ctx, true)().CreateFDiv(x, y);
-    case rem_float_fast: return math_builder(ctx, true)().CreateFRem(x, y);
     case fma_float: {
         assert(y->getType() == x->getType());
         assert(z->getType() == y->getType());
-        FunctionCallee fmaintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fma, makeArrayRef(t));
+        FunctionCallee fmaintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fma, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(fmaintr, {x, y, z});
     }
     case muladd_float: {
-        // LLVM 5.0 can create FMA in the backend for contractable fmul and fadd
+        // LLVM 5.0 can create FMA in the backend for contractible fmul and fadd
         // Emitting fmul and fadd here since they are easier for other LLVM passes to
         // optimize.
         auto mathb = math_builder(ctx, false, true);
@@ -1383,51 +1498,45 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
                 (f == checked_smul_int ?
                  Intrinsic::smul_with_overflow :
                  Intrinsic::umul_with_overflow)))));
-        FunctionCallee intr = Intrinsic::getDeclaration(jl_Module, intr_id, makeArrayRef(t));
-        Value *res = ctx.builder.CreateCall(intr, {x, y});
-        Value *val = ctx.builder.CreateExtractValue(res, ArrayRef<unsigned>(0));
-        Value *obit = ctx.builder.CreateExtractValue(res, ArrayRef<unsigned>(1));
-        Value *obyte = ctx.builder.CreateZExt(obit, getInt8Ty(ctx.builder.getContext()));
+        FunctionCallee intr = Intrinsic::getDeclaration(jl_Module, intr_id, ArrayRef<Type*>(t));
+        Value *tupval = ctx.builder.CreateCall(intr, {x, y});
 
         jl_value_t *params[2];
         params[0] = xtyp;
         params[1] = (jl_value_t*)jl_bool_type;
-        jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
+        jl_datatype_t *tuptyp = (jl_datatype_t*)jl_apply_tuple_type_v(params, 2);
         *newtyp = tuptyp;
 
-        Value *tupval;
-        tupval = UndefValue::get(julia_type_to_llvm(ctx, (jl_value_t*)tuptyp));
-        tupval = ctx.builder.CreateInsertValue(tupval, val, ArrayRef<unsigned>(0));
-        tupval = ctx.builder.CreateInsertValue(tupval, obyte, ArrayRef<unsigned>(1));
         return tupval;
     }
 
     case checked_sdiv_int: {
         Value *typemin = ctx.builder.CreateShl(ConstantInt::get(t, 1), t->getPrimitiveSizeInBits() - 1);
-        raise_exception_unless(ctx,
-                ctx.builder.CreateAnd(
-                    ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)),
-                    ctx.builder.CreateOr(
-                        ctx.builder.CreateICmpNE(y, ConstantInt::get(t, -1, true)),
-                        ctx.builder.CreateICmpNE(x, typemin))),
-                literal_pointer_val(ctx, jl_diverror_exception));
+        auto cond = ctx.builder.CreateAnd(
+                        ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)),
+                        ctx.builder.CreateOr(
+                            ctx.builder.CreateICmpNE(y, ConstantInt::get(t, -1, true)),
+                            ctx.builder.CreateICmpNE(x, typemin)));
+        setName(ctx.emission_context, cond, "divisor_valid");
+        raise_exception_unless(ctx, cond, literal_pointer_val(ctx, jl_diverror_exception));
 
         return ctx.builder.CreateSDiv(x, y);
     }
-    case checked_udiv_int:
-        raise_exception_unless(ctx,
-                ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)),
-                literal_pointer_val(ctx, jl_diverror_exception));
+    case checked_udiv_int: {
+        auto cond = ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0));
+        setName(ctx.emission_context, cond, "ndivby0");
+        raise_exception_unless(ctx, cond, literal_pointer_val(ctx, jl_diverror_exception));
         return ctx.builder.CreateUDiv(x, y);
-
+    }
     case checked_srem_int:
         return emit_checked_srem_int(ctx, x, y);
 
-    case checked_urem_int:
-        raise_exception_unless(ctx,
-                ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)),
-                literal_pointer_val(ctx, jl_diverror_exception));
+    case checked_urem_int: {
+        auto cond = ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0));
+        setName(ctx.emission_context, cond, "ndivby0");
+        raise_exception_unless(ctx, cond, literal_pointer_val(ctx, jl_diverror_exception));
         return ctx.builder.CreateURem(x, y);
+    }
 
     case eq_int:  *newtyp = jl_bool_type; return ctx.builder.CreateICmpEQ(x, y);
     case ne_int:  *newtyp = jl_bool_type; return ctx.builder.CreateICmpNE(x, y);
@@ -1448,7 +1557,7 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
 
     case fpiseq: {
         *newtyp = jl_bool_type;
-        Type *it = INTT(t);
+        Type *it = INTT(t, ctx.emission_context.DL);
         Value *xi = ctx.builder.CreateBitCast(x, it);
         Value *yi = ctx.builder.CreateBitCast(y, it);
         return ctx.builder.CreateOr(ctx.builder.CreateAnd(ctx.builder.CreateFCmpUNO(x, x),
@@ -1500,30 +1609,30 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
         }
     }
     case bswap_int: {
-        FunctionCallee bswapintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::bswap, makeArrayRef(t));
-        return ctx.builder.CreateCall(bswapintr, x);
+        FunctionCallee bswapintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::bswap, ArrayRef<Type*>(t)); //TODO: Move to deduction guides
+        return ctx.builder.CreateCall(bswapintr, x);                                                           // when we drop LLVM 15
     }
     case ctpop_int: {
-        FunctionCallee ctpopintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctpop, makeArrayRef(t));
+        FunctionCallee ctpopintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctpop, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(ctpopintr, x);
     }
     case ctlz_int: {
-        FunctionCallee ctlz = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctlz, makeArrayRef(t));
+        FunctionCallee ctlz = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctlz, ArrayRef<Type*>(t));
         y = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
         return ctx.builder.CreateCall(ctlz, {x, y});
     }
     case cttz_int: {
-        FunctionCallee cttz = Intrinsic::getDeclaration(jl_Module, Intrinsic::cttz, makeArrayRef(t));
+        FunctionCallee cttz = Intrinsic::getDeclaration(jl_Module, Intrinsic::cttz, ArrayRef<Type*>(t));
         y = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
         return ctx.builder.CreateCall(cttz, {x, y});
     }
 
     case abs_float: {
-        FunctionCallee absintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fabs, makeArrayRef(t));
+        FunctionCallee absintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fabs, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(absintr, x);
     }
     case copysign_float: {
-        FunctionCallee copyintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::copysign, makeArrayRef(t));
+        FunctionCallee copyintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::copysign, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(copyintr, {x, y});
     }
     case flipsign_int: {
@@ -1542,27 +1651,27 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
         return ctx.builder.CreateXor(ctx.builder.CreateAdd(x, tmp), tmp);
     }
     case ceil_llvm: {
-        FunctionCallee ceilintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ceil, makeArrayRef(t));
+        FunctionCallee ceilintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ceil, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(ceilintr, x);
     }
     case floor_llvm: {
-        FunctionCallee floorintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::floor, makeArrayRef(t));
+        FunctionCallee floorintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::floor, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(floorintr, x);
     }
     case trunc_llvm: {
-        FunctionCallee truncintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::trunc, makeArrayRef(t));
+        FunctionCallee truncintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::trunc, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(truncintr, x);
     }
     case rint_llvm: {
-        FunctionCallee rintintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::rint, makeArrayRef(t));
+        FunctionCallee rintintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::rint, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(rintintr, x);
     }
     case sqrt_llvm: {
-        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
+        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, ArrayRef<Type*>(t));
         return ctx.builder.CreateCall(sqrtintr, x);
     }
     case sqrt_llvm_fast: {
-        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
+        FunctionCallee sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, ArrayRef<Type*>(t));
         return math_builder(ctx, true)().CreateCall(sqrtintr, x);
     }
 
diff --git a/src/intrinsics.h b/src/intrinsics.h
index bb67460bbb31f..805aa914b5366 100644
--- a/src/intrinsics.h
+++ b/src/intrinsics.h
@@ -19,7 +19,6 @@
     ADD_I(sub_float, 2) \
     ADD_I(mul_float, 2) \
     ADD_I(div_float, 2) \
-    ADD_I(rem_float, 2) \
     ADD_I(fma_float, 3) \
     ADD_I(muladd_float, 3) \
     /*  fast arithmetic */ \
@@ -28,7 +27,6 @@
     ALIAS(sub_float_fast, sub_float) \
     ALIAS(mul_float_fast, mul_float) \
     ALIAS(div_float_fast, div_float) \
-    ALIAS(rem_float_fast, rem_float) \
     /*  same-type comparisons */ \
     ADD_I(eq_int, 2) \
     ADD_I(ne_int, 2) \
@@ -101,8 +99,6 @@
     /*  c interface */ \
     ADD_I(cglobal, 2) \
     ALIAS(llvmcall, llvmcall) \
-    /*  object access */ \
-    ADD_I(arraylen, 1) \
     /*  cpu feature tests */ \
     ADD_I(have_fma, 1) \
     /*  hidden intrinsics */ \
diff --git a/src/ircode.c b/src/ircode.c
index c3fe174db3206..b238e79a239ff 100644
--- a/src/ircode.c
+++ b/src/ircode.c
@@ -29,8 +29,37 @@ typedef struct {
     uint8_t relocatability;
 } jl_ircode_state;
 
+// type => tag hash for a few core types (e.g., Expr, PhiNode, etc)
+static htable_t ser_tag;
+// tag => type mapping, the reverse of ser_tag
+static jl_value_t *deser_tag[256];
+// hash of some common symbols, encoded as CommonSym_tag plus 1 byte
+static htable_t common_symbol_tag;
+static jl_value_t *deser_symbols[256];
+
+void *jl_lookup_ser_tag(jl_value_t *v)
+{
+    return ptrhash_get(&ser_tag, v);
+}
+
+void *jl_lookup_common_symbol(jl_value_t *v)
+{
+    return ptrhash_get(&common_symbol_tag, v);
+}
+
+jl_value_t *jl_deser_tag(uint8_t tag)
+{
+    return deser_tag[tag];
+}
+
+jl_value_t *jl_deser_symbol(uint8_t tag)
+{
+    return deser_symbols[tag];
+}
+
 // --- encoding ---
 
+static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED;
 #define jl_encode_value(s, v) jl_encode_value_((s), (jl_value_t*)(v), 0)
 
 static void tagged_root(rle_reference *rr, jl_ircode_state *s, int i)
@@ -42,7 +71,7 @@ static void tagged_root(rle_reference *rr, jl_ircode_state *s, int i)
 static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v) JL_GC_DISABLED
 {
     jl_array_t *rs = s->method->roots;
-    int i, l = jl_array_len(rs);
+    int i, l = jl_array_nrows(rs);
     if (jl_is_symbol(v) || jl_is_concrete_type(v)) {
         for (i = 0; i < l; i++) {
             if (jl_array_ptr_ref(rs, i) == v)
@@ -56,7 +85,7 @@ static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v)
         }
     }
     jl_add_method_root(s->method, jl_precompile_toplevel_module, v);
-    return tagged_root(rr, s, jl_array_len(rs) - 1);
+    return tagged_root(rr, s, jl_array_nrows(rs) - 1);
 }
 
 static void jl_encode_int32(jl_ircode_state *s, int32_t x)
@@ -71,10 +100,71 @@ static void jl_encode_int32(jl_ircode_state *s, int32_t x)
     }
 }
 
+static void jl_encode_as_indexed_root(jl_ircode_state *s, jl_value_t *v)
+{
+    rle_reference rr;
+
+    if (jl_is_string(v))
+        v = jl_as_global_root(v, 1);
+    literal_val_id(&rr, s, v);
+    int id = rr.index;
+    assert(id >= 0);
+    if (rr.key) {
+        write_uint8(s->s, TAG_RELOC_METHODROOT);
+        write_uint64(s->s, rr.key);
+    }
+    if (id <= UINT8_MAX) {
+        write_uint8(s->s, TAG_METHODROOT);
+        write_uint8(s->s, id);
+    }
+    else {
+        assert(id <= UINT32_MAX);
+        write_uint8(s->s, TAG_LONG_METHODROOT);
+        write_uint32(s->s, id);
+    }
+}
+
+static void jl_encode_memory_slice(jl_ircode_state *s, jl_genericmemory_t *mem, size_t offset, size_t len) JL_GC_DISABLED
+{
+    jl_datatype_t *t = (jl_datatype_t*)jl_typetagof(mem);
+    size_t i;
+    const jl_datatype_layout_t *layout = t->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        for (i = 0; i < len; i++) {
+            jl_value_t *e = jl_genericmemory_ptr_ref(mem, offset + i);
+            jl_encode_value(s, e);
+        }
+    }
+    else if (layout->first_ptr >= 0) {
+        uint16_t elsz = layout->size;
+        size_t j, np = layout->npointers;
+        const char *data = (const char*)mem->ptr + offset * elsz;
+        for (i = 0; i < len; i++) {
+            const char *start = data;
+            for (j = 0; j < np; j++) {
+                uint32_t ptr = jl_ptr_offset(t, j);
+                const jl_value_t *const *fld = &((const jl_value_t *const *)data)[ptr];
+                if ((const char*)fld != start)
+                    ios_write(s->s, start, (const char*)fld - start);
+                JL_GC_PROMISE_ROOTED(*fld);
+                jl_encode_value(s, *fld);
+                start = (const char*)&fld[1];
+            }
+            data += elsz;
+            if (data != start)
+                ios_write(s->s, start, data - start);
+        }
+    }
+    else {
+        ios_write(s->s, (char*)mem->ptr + offset * layout->size, len * layout->size);
+        if (layout->flags.arrayelem_isunion)
+            ios_write(s->s, jl_genericmemory_typetagdata(mem) + offset, len);
+    }
+}
+
 static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED
 {
     size_t i;
-    rle_reference rr;
 
     if (v == NULL) {
         write_uint8(s->s, TAG_NULL);
@@ -98,7 +188,7 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     else if (v == (jl_value_t*)jl_base_module) {
         write_uint8(s->s, TAG_BASE);
     }
-    else if (jl_typeis(v, jl_string_type) && jl_string_len(v) == 0) {
+    else if (jl_typetagis(v, jl_string_tag << 4) && jl_string_len(v) == 0) {
         jl_encode_value(s, jl_an_empty_string);
     }
     else if (v == (jl_value_t*)s->method->module) {
@@ -148,13 +238,13 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         write_uint8(s->s, TAG_LONG_SSAVALUE);
         write_uint16(s->s, ((jl_ssavalue_t*)v)->id);
     }
-    else if (jl_typeis(v, jl_slotnumber_type) && jl_slot_number(v) <= UINT16_MAX && jl_slot_number(v) >= 0) {
+    else if (jl_typetagis(v, jl_slotnumber_type) && jl_slot_number(v) <= UINT16_MAX && jl_slot_number(v) >= 0) {
         write_uint8(s->s, TAG_SLOTNUMBER);
         write_uint16(s->s, jl_slot_number(v));
     }
     else if (jl_is_expr(v)) {
         jl_expr_t *e = (jl_expr_t*)v;
-        size_t l = jl_array_len(e->args);
+        size_t l = jl_array_nrows(e->args);
         if (e->head == jl_call_sym) {
             if (l == 2) {
                 write_uint8(s->s, TAG_CALL1);
@@ -186,31 +276,31 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     else if (jl_is_phinode(v)) {
         jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(v, 0);
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(v, 1);
-        size_t l = jl_array_len(edges);
-        if (l <= 255 && jl_array_len(values) == l) {
+        size_t l = jl_array_nrows(edges);
+        if (l <= 255 && jl_array_nrows(values) == l) {
             write_uint8(s->s, TAG_PHINODE);
             write_uint8(s->s, (uint8_t)l);
         }
         else {
             write_uint8(s->s, TAG_LONG_PHINODE);
             write_int32(s->s, l);
-            write_int32(s->s, jl_array_len(values));
+            write_int32(s->s, jl_array_nrows(values));
         }
         for (i = 0; i < l; i++) {
-            int32_t e = ((int32_t*)jl_array_data(edges))[i];
+            int32_t e = jl_array_data(edges, int32_t)[i];
             if (e <= 20)
                 jl_encode_value(s, jl_box_int32(e));
             else
                 jl_encode_int32(s, e);
         }
-        l = jl_array_len(values);
+        l = jl_array_nrows(values);
         for (i = 0; i < l; i++) {
             jl_encode_value(s, jl_array_ptr_ref(values, i));
         }
     }
     else if (jl_is_phicnode(v)) {
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(v, 0);
-        size_t l = jl_array_len(values);
+        size_t l = jl_array_nrows(values);
         if (l <= 255) {
             write_uint8(s->s, TAG_PHICNODE);
             write_uint8(s->s, (uint8_t)l);
@@ -240,7 +330,17 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         write_uint8(s->s, TAG_RETURNNODE);
         jl_encode_value(s, jl_get_nth_field(v, 0));
     }
-    else if (jl_typeis(v, jl_int64_type)) {
+    else if (jl_is_quotenode(v)) {
+        write_uint8(s->s, TAG_QUOTENODE);
+        jl_value_t *inner = jl_quotenode_value(v);
+        // we might need to return this exact object at run time, therefore codegen might
+        // need to reference it as well, so it is more likely useful to give it a root
+        if (jl_is_expr(inner) || jl_is_phinode(inner) || jl_is_phicnode(inner))
+            jl_encode_as_indexed_root(s, inner);
+        else
+            jl_encode_value(s, inner);
+    }
+    else if (jl_typetagis(v, jl_int64_tag << 4)) {
         void *data = jl_data_ptr(v);
         if (*(int64_t*)data >= INT16_MIN && *(int64_t*)data <= INT16_MAX) {
             write_uint8(s->s, TAG_SHORTER_INT64);
@@ -252,17 +352,17 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         }
         else {
             write_uint8(s->s, TAG_INT64);
-            write_int64(s->s, *(int64_t*)data);
+            write_uint64(s->s, *(int64_t*)data);
         }
     }
-    else if (jl_typeis(v, jl_int32_type)) {
+    else if (jl_typetagis(v, jl_int32_tag << 4)) {
         jl_encode_int32(s, *(int32_t*)jl_data_ptr(v));
     }
-    else if (jl_typeis(v, jl_uint8_type)) {
+    else if (jl_typetagis(v, jl_uint8_tag << 4)) {
         write_uint8(s->s, TAG_UINT8);
         write_int8(s->s, *(int8_t*)jl_data_ptr(v));
     }
-    else if (jl_typeis(v, jl_lineinfonode_type)) {
+    else if (jl_typetagis(v, jl_lineinfonode_type)) {
         write_uint8(s->s, TAG_LINEINFO);
         for (i = 0; i < jl_datatype_nfields(jl_lineinfonode_type); i++)
             jl_encode_value(s, jl_get_nth_field(v, i));
@@ -271,90 +371,54 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         write_uint8(s->s, TAG_SINGLETON);
         jl_encode_value(s, jl_typeof(v));
     }
-    else if (as_literal && jl_typeis(v, jl_string_type)) {
+    else if (as_literal && jl_typetagis(v, jl_string_tag << 4)) {
         write_uint8(s->s, TAG_STRING);
         write_int32(s->s, jl_string_len(v));
         ios_write(s->s, jl_string_data(v), jl_string_len(v));
     }
     else if (as_literal && jl_is_array(v)) {
         jl_array_t *ar = (jl_array_t*)v;
-        jl_value_t *et = jl_tparam0(jl_typeof(ar));
-        int isunion = jl_is_uniontype(et);
-        if (ar->flags.ndims == 1 && ar->elsize <= 0x1f) {
+        if (jl_array_ndims(ar) == 1) {
             write_uint8(s->s, TAG_ARRAY1D);
-            write_uint8(s->s, (ar->flags.ptrarray << 7) | (ar->flags.hasptr << 6) | (isunion << 5) | (ar->elsize & 0x1f));
         }
         else {
             write_uint8(s->s, TAG_ARRAY);
-            write_uint16(s->s, ar->flags.ndims);
-            write_uint16(s->s, (ar->flags.ptrarray << 15) | (ar->flags.hasptr << 14) | (isunion << 13) | (ar->elsize & 0x1fff));
+            write_uint16(s->s, jl_array_ndims(ar));
         }
-        for (i = 0; i < ar->flags.ndims; i++)
-            jl_encode_value(s, jl_box_long(jl_array_dim(ar,i)));
+        for (i = 0; i < jl_array_ndims(ar); i++)
+            jl_encode_value(s, jl_box_long(jl_array_dim(ar, i)));
         jl_encode_value(s, jl_typeof(ar));
         size_t l = jl_array_len(ar);
-        if (ar->flags.ptrarray) {
-            for (i = 0; i < l; i++) {
-                jl_value_t *e = jl_array_ptr_ref(v, i);
-                jl_encode_value(s, e);
-            }
-        }
-        else if (ar->flags.hasptr) {
-            const char *data = (const char*)jl_array_data(ar);
-            uint16_t elsz = ar->elsize;
-            size_t j, np = ((jl_datatype_t*)et)->layout->npointers;
-            for (i = 0; i < l; i++) {
-                const char *start = data;
-                for (j = 0; j < np; j++) {
-                    uint32_t ptr = jl_ptr_offset((jl_datatype_t*)et, j);
-                    const jl_value_t *const *fld = &((const jl_value_t *const *)data)[ptr];
-                    if ((const char*)fld != start)
-                        ios_write(s->s, start, (const char*)fld - start);
-                    JL_GC_PROMISE_ROOTED(*fld);
-                    jl_encode_value(s, *fld);
-                    start = (const char*)&fld[1];
-                }
-                data += elsz;
-                if (data != start)
-                    ios_write(s->s, start, data - start);
-            }
-        }
-        else {
-            ios_write(s->s, (char*)jl_array_data(ar), l * ar->elsize);
-            if (jl_array_isbitsunion(ar))
-                ios_write(s->s, jl_array_typetagdata(ar), l);
-        }
-    }
-    else {
-        if (!as_literal && !(jl_is_uniontype(v) || jl_is_newvarnode(v) || jl_is_tuple(v) ||
-                             jl_is_linenode(v) || jl_is_upsilonnode(v) || jl_is_pinode(v) ||
-                             jl_is_slot(v) || jl_is_ssavalue(v))) {
-            literal_val_id(&rr, s, v);
-            int id = rr.index;
-            assert(id >= 0);
-            if (rr.key) {
-                write_uint8(s->s, TAG_RELOC_METHODROOT);
-                write_int64(s->s, rr.key);
-            }
-            if (id < 256) {
-                write_uint8(s->s, TAG_METHODROOT);
-                write_uint8(s->s, id);
-            }
-            else {
-                assert(id <= UINT16_MAX);
-                write_uint8(s->s, TAG_LONG_METHODROOT);
-                write_uint16(s->s, id);
-            }
-            return;
-        }
+        const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(ar->ref.mem))->layout;
+        size_t offset;
+        if (layout->flags.arrayelem_isunion || layout->size == 0)
+            offset = (uintptr_t)ar->ref.ptr_or_offset;
+        else
+            offset = ((char*)ar->ref.ptr_or_offset - (char*)ar->ref.mem->ptr) / layout->size;
+        jl_encode_memory_slice(s, ar->ref.mem, offset, l);
+    }
+    else if (as_literal && jl_is_genericmemory(v)) {
+        jl_genericmemory_t* m = (jl_genericmemory_t*)v;
+        write_uint8(s->s, TAG_MEMORYT);
+        jl_encode_value(s, (jl_datatype_t*)jl_typetagof(v));
+        jl_encode_value(s, jl_box_long(m->length));
+        jl_encode_memory_slice(s, m, 0, m->length);
+    }
+    else if (as_literal && jl_is_layout_opaque(((jl_datatype_t*)jl_typeof(v))->layout)) {
+        assert(0 && "not legal to store this as literal");
+    }
+    else if (as_literal || jl_is_uniontype(v) || jl_is_newvarnode(v) || jl_is_linenode(v) ||
+             jl_is_upsilonnode(v) || jl_is_pinode(v) || jl_is_slotnumber(v) || jl_is_ssavalue(v) ||
+             (jl_isbits(jl_typeof(v)) && jl_datatype_size(jl_typeof(v)) <= 64)) {
         jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
-        if (t->size <= 255) {
+        size_t tsz = jl_datatype_size(t);
+        if (tsz <= 255) {
             write_uint8(s->s, TAG_SHORT_GENERAL);
-            write_uint8(s->s, t->size);
+            write_uint8(s->s, tsz);
         }
         else {
             write_uint8(s->s, TAG_GENERAL);
-            write_int32(s->s, t->size);
+            write_int32(s->s, tsz);
         }
         jl_encode_value(s, t);
 
@@ -388,15 +452,20 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         if (ptr > last)
             ios_write(s->s, last, ptr - last);
     }
+    else {
+        jl_encode_as_indexed_root(s, v);
+    }
 }
 
-static jl_code_info_flags_t code_info_flags(uint8_t pure, uint8_t propagate_inbounds, uint8_t inlineable, uint8_t inferred, uint8_t constprop)
+static jl_code_info_flags_t code_info_flags(uint8_t inferred, uint8_t propagate_inbounds, uint8_t has_fcall,
+                                            uint8_t nospecializeinfer, uint8_t inlining, uint8_t constprop)
 {
     jl_code_info_flags_t flags;
-    flags.bits.pure = pure;
-    flags.bits.propagate_inbounds = propagate_inbounds;
-    flags.bits.inlineable = inlineable;
     flags.bits.inferred = inferred;
+    flags.bits.propagate_inbounds = propagate_inbounds;
+    flags.bits.has_fcall = has_fcall;
+    flags.bits.nospecializeinfer = nospecializeinfer;
+    flags.bits.inlining = inlining;
     flags.bits.constprop = constprop;
     return flags;
 }
@@ -420,52 +489,27 @@ static jl_value_t *jl_decode_value_svec(jl_ircode_state *s, uint8_t tag) JL_GC_D
     return (jl_value_t*)sv;
 }
 
-static jl_value_t *jl_decode_value_array(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_value_t *jl_decode_value_memory(jl_ircode_state *s, jl_value_t *mty, size_t nel) JL_GC_DISABLED
 {
-    int16_t i, ndims;
-    int isptr, isunion, hasptr, elsize;
-    if (tag == TAG_ARRAY1D) {
-        ndims = 1;
-        elsize = read_uint8(s->s);
-        isptr = (elsize >> 7) & 1;
-        hasptr = (elsize >> 6) & 1;
-        isunion = (elsize >> 5) & 1;
-        elsize = elsize & 0x1f;
-    }
-    else {
-        ndims = read_uint16(s->s);
-        elsize = read_uint16(s->s);
-        isptr = (elsize >> 15) & 1;
-        hasptr = (elsize >> 14) & 1;
-        isunion = (elsize >> 13) & 1;
-        elsize = elsize & 0x1fff;
-    }
-    size_t *dims = (size_t*)alloca(ndims * sizeof(size_t));
-    for (i = 0; i < ndims; i++) {
-        dims[i] = jl_unbox_long(jl_decode_value(s));
-    }
-    jl_array_t *a = jl_new_array_for_deserialization(
-            (jl_value_t*)NULL, ndims, dims, !isptr, hasptr, isunion, elsize);
-    jl_value_t *aty = jl_decode_value(s);
-    jl_set_typeof(a, aty);
-    if (a->flags.ptrarray) {
-        jl_value_t **data = (jl_value_t**)jl_array_data(a);
-        size_t i, numel = jl_array_len(a);
+    jl_genericmemory_t *m = jl_alloc_genericmemory(mty, nel);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty)->layout;
+    if (layout->flags.arrayelem_isboxed) {
+        jl_value_t **data = (jl_value_t**)m->ptr;
+        size_t i, numel = m->length;
         for (i = 0; i < numel; i++) {
             data[i] = jl_decode_value(s);
         }
-        assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled
+        assert(jl_astaggedvalue(m)->bits.gc == GC_CLEAN); // gc is disabled
     }
-    else if (a->flags.hasptr) {
-        size_t i, numel = jl_array_len(a);
-        char *data = (char*)jl_array_data(a);
-        uint16_t elsz = a->elsize;
-        jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(jl_typeof(a));
-        size_t j, np = et->layout->npointers;
+    else if (layout->first_ptr >= 0) {
+        size_t i, numel = m->length;
+        char *data = (char*)m->ptr;
+        uint16_t elsz = layout->size;
+        size_t j, np = layout->npointers;
         for (i = 0; i < numel; i++) {
             char *start = data;
             for (j = 0; j < np; j++) {
-                uint32_t ptr = jl_ptr_offset(et, j);
+                uint32_t ptr = jl_ptr_offset((jl_datatype_t*)mty, j);
                 jl_value_t **fld = &((jl_value_t**)data)[ptr];
                 if ((char*)fld != start)
                     ios_readall(s->s, start, (const char*)fld - start);
@@ -476,13 +520,39 @@ static jl_value_t *jl_decode_value_array(jl_ircode_state *s, uint8_t tag) JL_GC_
             if (data != start)
                 ios_readall(s->s, start, data - start);
         }
-        assert(jl_astaggedvalue(a)->bits.gc == GC_CLEAN); // gc is disabled
+        assert(jl_astaggedvalue(m)->bits.gc == GC_CLEAN); // gc is disabled
     }
     else {
-        size_t extra = jl_array_isbitsunion(a) ? jl_array_len(a) : 0;
-        size_t tot = jl_array_len(a) * a->elsize + extra;
-        ios_readall(s->s, (char*)jl_array_data(a), tot);
+        size_t extra = jl_genericmemory_isbitsunion(m) ? m->length : 0;
+        size_t tot = m->length * layout->size + extra;
+        ios_readall(s->s, (char*)m->ptr, tot);
+    }
+    return (jl_value_t*)m;
+}
+
+JL_DLLEXPORT jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims);
+
+static jl_value_t *jl_decode_value_array(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED
+{
+    int16_t i, ndims;
+    if (tag == TAG_ARRAY1D)
+        ndims = 1;
+    else
+        ndims = read_uint16(s->s);
+    size_t *dims = (size_t*)alloca(ndims * sizeof(size_t));
+    size_t len = 1;
+    for (i = 0; i < ndims; i++) {
+        dims[i] = jl_unbox_long(jl_decode_value(s));
+        len *= dims[i];
     }
+    jl_value_t *aty = jl_decode_value(s);
+    jl_array_t *a = jl_alloc_array_nd(aty, dims, ndims);
+    a->ref.mem = (jl_genericmemory_t*)jl_decode_value_memory(s, jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)aty, 0), 1), len);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(a->ref.mem))->layout;
+    if (layout->flags.arrayelem_isunion || layout->size == 0)
+        a->ref.ptr_or_offset = (void*)0;
+    else
+        a->ref.ptr_or_offset = a->ref.mem->ptr;
     return (jl_value_t*)a;
 }
 
@@ -507,7 +577,7 @@ static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) JL_GC_D
     if (head == NULL)
         head = (jl_sym_t*)jl_decode_value(s);
     jl_expr_t *e = jl_exprn(head, len);
-    jl_value_t **data = (jl_value_t**)(e->args->data);
+    jl_value_t **data = jl_array_ptr_data(e->args);
     for (i = 0; i < len; i++) {
         data[i] = jl_decode_value(s);
     }
@@ -527,11 +597,11 @@ static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) JL_GC_DI
     jl_array_t *e = jl_alloc_array_1d(jl_array_int32_type, len_e);
     jl_array_t *v = jl_alloc_vec_any(len_v);
     jl_value_t *phi = jl_new_struct(jl_phinode_type, e, v);
-    int32_t *data_e = (int32_t*)(e->data);
+    int32_t *data_e = jl_array_data(e, int32_t);
     for (i = 0; i < len_e; i++) {
         data_e[i] = jl_unbox_int32(jl_decode_value(s));
     }
-    jl_value_t **data_v = (jl_value_t**)(v->data);
+    jl_value_t **data_v = jl_array_ptr_data(v);
     for (i = 0; i < len_v; i++) {
         data_v[i] = jl_decode_value(s);
     }
@@ -547,7 +617,7 @@ static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag) JL_GC_D
         len = read_int32(s->s);
     jl_array_t *v = jl_alloc_vec_any(len);
     jl_value_t *phic = jl_new_struct(jl_phicnode_type, v);
-    jl_value_t **data = (jl_value_t**)(v->data);
+    jl_value_t **data = jl_array_ptr_data(v);
     for (i = 0; i < len; i++) {
         data[i] = jl_decode_value(s);
     }
@@ -565,9 +635,12 @@ static jl_value_t *jl_decode_value_any(jl_ircode_state *s, uint8_t tag) JL_GC_DI
 {
     int32_t sz = (tag == TAG_SHORT_GENERAL ? read_uint8(s->s) : read_int32(s->s));
     jl_value_t *v = jl_gc_alloc(s->ptls, sz, NULL);
-    jl_set_typeof(v, (void*)(intptr_t)0x50);
+    jl_set_typeof(v, (void*)(intptr_t)0xf50);
     jl_datatype_t *dt = (jl_datatype_t*)jl_decode_value(s);
-    jl_set_typeof(v, dt);
+    if (dt->smalltag)
+        jl_set_typetagof(v, dt->smalltag, 0);
+    else
+        jl_set_typeof(v, dt);
     char *data = (char*)jl_data_ptr(v);
     size_t i, np = dt->layout->npointers;
     char *start = data;
@@ -603,11 +676,17 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
         key = read_uint64(s->s);
         tag = read_uint8(s->s);
         assert(tag == TAG_METHODROOT || tag == TAG_LONG_METHODROOT);
-        return lookup_root(s->method, key, tag == TAG_METHODROOT ? read_uint8(s->s) : read_uint16(s->s));
+        int index = -1;
+        if (tag == TAG_METHODROOT)
+            index = read_uint8(s->s);
+        else if (tag == TAG_LONG_METHODROOT)
+            index = read_uint32(s->s);
+        assert(index >= 0);
+        return lookup_root(s->method, key, index);
     case TAG_METHODROOT:
         return lookup_root(s->method, 0, read_uint8(s->s));
     case TAG_LONG_METHODROOT:
-        return lookup_root(s->method, 0, read_uint16(s->s));
+        return lookup_root(s->method, 0, read_uint32(s->s));
     case TAG_SVEC: JL_FALLTHROUGH; case TAG_LONG_SVEC:
         return jl_decode_value_svec(s, tag);
     case TAG_COMMONSYM:
@@ -623,6 +702,8 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
         return v;
     case TAG_ARRAY: JL_FALLTHROUGH; case TAG_ARRAY1D:
         return jl_decode_value_array(s, tag);
+    case TAG_MEMORYT:
+        return jl_decode_value_memory(s, jl_decode_value(s), jl_unbox_long(jl_decode_value(s)));
     case TAG_EXPR:      JL_FALLTHROUGH;
     case TAG_LONG_EXPR: JL_FALLTHROUGH;
     case TAG_CALL1:     JL_FALLTHROUGH;
@@ -707,9 +788,23 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
 
 // --- entry points ---
 
-JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
+typedef jl_value_t jl_string_t; // for local expressibility
+
+#define IR_DATASIZE_FLAGS         sizeof(uint8_t)
+#define IR_DATASIZE_PURITY        sizeof(uint16_t)
+#define IR_DATASIZE_INLINING_COST sizeof(uint16_t)
+#define IR_DATASIZE_NSLOTS        sizeof(int32_t)
+typedef enum {
+    ir_offset_flags         = 0,
+    ir_offset_purity        = 0 + IR_DATASIZE_FLAGS,
+    ir_offset_inlining_cost = 0 + IR_DATASIZE_FLAGS + IR_DATASIZE_PURITY,
+    ir_offset_nslots        = 0 + IR_DATASIZE_FLAGS + IR_DATASIZE_PURITY + IR_DATASIZE_INLINING_COST,
+    ir_offset_slotflags     = 0 + IR_DATASIZE_FLAGS + IR_DATASIZE_PURITY + IR_DATASIZE_INLINING_COST + IR_DATASIZE_NSLOTS
+} ir_offset;
+
+JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
 {
-    JL_TIMING(AST_COMPRESS);
+    JL_TIMING(AST_COMPRESS, AST_COMPRESS);
     JL_LOCK(&m->writelock); // protect the roots array (Might GC)
     assert(jl_is_method(m));
     assert(jl_is_code_info(code));
@@ -729,14 +824,20 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
         1
     };
 
-    jl_code_info_flags_t flags = code_info_flags(code->pure, code->propagate_inbounds, code->inlineable, code->inferred, code->constprop);
+    jl_code_info_flags_t flags = code_info_flags(code->inferred, code->propagate_inbounds, code->has_fcall,
+                                                 code->nospecializeinfer, code->inlining, code->constprop);
     write_uint8(s.s, flags.packed);
-    write_uint8(s.s, code->purity.bits);
+    static_assert(sizeof(flags.packed) == IR_DATASIZE_FLAGS, "ir_datasize_flags is mismatched with the actual size");
+    write_uint16(s.s, code->purity.bits);
+    static_assert(sizeof(code->purity.bits) == IR_DATASIZE_PURITY, "ir_datasize_purity is mismatched with the actual size");
+    write_uint16(s.s, code->inlining_cost);
+    static_assert(sizeof(code->inlining_cost) == IR_DATASIZE_INLINING_COST, "ir_datasize_inlining_cost is mismatched with the actual size");
 
-    size_t nslots = jl_array_len(code->slotflags);
+    int32_t nslots = jl_array_nrows(code->slotflags);
     assert(nslots >= m->nargs && nslots < INT32_MAX); // required by generated functions
     write_int32(s.s, nslots);
-    ios_write(s.s, (char*)jl_array_data(code->slotflags), nslots);
+    static_assert(sizeof(nslots) == IR_DATASIZE_NSLOTS, "ir_datasize_nslots is mismatched with the actual size");
+    ios_write(s.s, jl_array_data(code->slotflags, const char), nslots);
 
     // N.B.: The layout of everything before this point is explicitly referenced
     // by the various jl_ir_ accessors. Make sure to adjust those if you change
@@ -766,50 +867,51 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
     else
         jl_encode_value(&s, jl_nothing);
 
-    size_t nstmt = jl_array_len(code->code);
-    assert(nstmt == jl_array_len(code->codelocs));
-    if (jl_array_len(code->linetable) < 256) {
+    size_t nstmt = jl_array_nrows(code->code);
+    assert(nstmt == jl_array_nrows(code->codelocs));
+    if (jl_array_nrows(code->linetable) < 256) {
         for (i = 0; i < nstmt; i++) {
-            write_uint8(s.s, ((int32_t*)jl_array_data(code->codelocs))[i]);
+            write_uint8(s.s, jl_array_data(code->codelocs, int32_t)[i]);
         }
     }
-    else if (jl_array_len(code->linetable) < 65536) {
+    else if (jl_array_nrows(code->linetable) < 65536) {
         for (i = 0; i < nstmt; i++) {
-            write_uint16(s.s, ((int32_t*)jl_array_data(code->codelocs))[i]);
+            write_uint16(s.s, jl_array_data(code->codelocs, int32_t)[i]);
         }
     }
     else {
-        ios_write(s.s, (char*)jl_array_data(code->codelocs), nstmt * sizeof(int32_t));
+        ios_write(s.s, (char*)jl_array_data(code->codelocs, int32_t), nstmt * sizeof(int32_t));
     }
 
     write_uint8(s.s, s.relocatability);
 
     ios_flush(s.s);
-    jl_array_t *v = jl_take_buffer(&dest);
+    jl_string_t *v = jl_pchar_to_string(s.s->buf, s.s->size);
     ios_close(s.s);
-    if (jl_array_len(m->roots) == 0) {
+    if (jl_array_nrows(m->roots) == 0) {
         m->roots = NULL;
     }
     JL_GC_PUSH1(&v);
     jl_gc_enable(en);
     JL_UNLOCK(&m->writelock); // Might GC
     JL_GC_POP();
+
     return v;
 }
 
-JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t *metadata, jl_array_t *data)
+JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t *metadata, jl_string_t *data)
 {
     if (jl_is_code_info(data))
         return (jl_code_info_t*)data;
-    JL_TIMING(AST_UNCOMPRESS);
+    JL_TIMING(AST_UNCOMPRESS, AST_UNCOMPRESS);
     JL_LOCK(&m->writelock); // protect the roots array (Might GC)
     assert(jl_is_method(m));
-    assert(jl_typeis(data, jl_array_uint8_type));
+    assert(jl_is_string(data));
     size_t i;
     ios_t src;
     ios_mem(&src, 0);
-    ios_setbuf(&src, (char*)data->data, jl_array_len(data), 0);
-    src.size = jl_array_len(data);
+    ios_setbuf(&src, (char*)jl_string_data(data), jl_string_len(data), 0);
+    src.size = jl_string_len(data);
     int en = jl_gc_enable(0); // Might GC
     jl_ircode_state s = {
         &src,
@@ -821,16 +923,18 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
     jl_code_info_t *code = jl_new_code_info_uninit();
     jl_code_info_flags_t flags;
     flags.packed = read_uint8(s.s);
+    code->inlining = flags.bits.inlining;
     code->constprop = flags.bits.constprop;
     code->inferred = flags.bits.inferred;
-    code->inlineable = flags.bits.inlineable;
     code->propagate_inbounds = flags.bits.propagate_inbounds;
-    code->pure = flags.bits.pure;
-    code->purity.bits = read_uint8(s.s);
+    code->has_fcall = flags.bits.has_fcall;
+    code->nospecializeinfer = flags.bits.nospecializeinfer;
+    code->purity.bits = read_uint16(s.s);
+    code->inlining_cost = read_uint16(s.s);
 
     size_t nslots = read_int32(&src);
     code->slotflags = jl_alloc_array_1d(jl_array_uint8_type, nslots);
-    ios_readall(s.s, (char*)jl_array_data(code->slotflags), nslots);
+    ios_readall(s.s, jl_array_data(code->slotflags, char), nslots);
 
     for (i = 0; i < 6; i++) {
         if (i == 1)  // skip codelocs
@@ -847,20 +951,20 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
         slotnames = m->slot_syms;
     code->slotnames = jl_uncompress_argnames(slotnames);
 
-    size_t nstmt = jl_array_len(code->code);
+    size_t nstmt = jl_array_nrows(code->code);
     code->codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nstmt);
-    if (jl_array_len(code->linetable) < 256) {
+    if (jl_array_nrows(code->linetable) < 256) {
         for (i = 0; i < nstmt; i++) {
-            ((int32_t*)jl_array_data(code->codelocs))[i] = read_uint8(s.s);
+            jl_array_data(code->codelocs, int32_t)[i] = read_uint8(s.s);
         }
     }
-    else if (jl_array_len(code->linetable) < 65536) {
+    else if (jl_array_nrows(code->linetable) < 65536) {
         for (i = 0; i < nstmt; i++) {
-            ((int32_t*)jl_array_data(code->codelocs))[i] = read_uint16(s.s);
+            jl_array_data(code->codelocs, int32_t)[i] = read_uint16(s.s);
         }
     }
     else {
-        ios_readall(s.s, (char*)jl_array_data(code->codelocs), nstmt * sizeof(int32_t));
+        ios_readall(s.s, (char*)jl_array_data(code->codelocs, int32_t), nstmt * sizeof(int32_t));
     }
 
     (void) read_uint8(s.s);   // relocatability
@@ -877,42 +981,52 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
         code->rettype = metadata->rettype;
         code->parent = metadata->def;
     }
+
     return code;
 }
 
-JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_array_t *data)
+JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_string_t *data)
 {
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->inferred;
-    assert(jl_typeis(data, jl_array_uint8_type));
+    assert(jl_is_string(data));
     jl_code_info_flags_t flags;
-    flags.packed = ((uint8_t*)data->data)[0];
+    flags.packed = jl_string_data(data)[ir_offset_flags];
     return flags.bits.inferred;
 }
 
-JL_DLLEXPORT uint8_t jl_ir_flag_inlineable(jl_array_t *data)
+JL_DLLEXPORT uint8_t jl_ir_flag_inlining(jl_string_t *data)
 {
     if (jl_is_code_info(data))
-        return ((jl_code_info_t*)data)->inlineable;
-    assert(jl_typeis(data, jl_array_uint8_type));
+        return ((jl_code_info_t*)data)->inlining;
+    assert(jl_is_string(data));
     jl_code_info_flags_t flags;
-    flags.packed = ((uint8_t*)data->data)[0];
-    return flags.bits.inlineable;
+    flags.packed = jl_string_data(data)[ir_offset_flags];
+    return flags.bits.inlining;
 }
 
-JL_DLLEXPORT uint8_t jl_ir_flag_pure(jl_array_t *data)
+JL_DLLEXPORT uint8_t jl_ir_flag_has_fcall(jl_string_t *data)
 {
     if (jl_is_code_info(data))
-        return ((jl_code_info_t*)data)->pure;
-    assert(jl_typeis(data, jl_array_uint8_type));
+        return ((jl_code_info_t*)data)->has_fcall;
+    assert(jl_is_string(data));
     jl_code_info_flags_t flags;
-    flags.packed = ((uint8_t*)data->data)[0];
-    return flags.bits.pure;
+    flags.packed = jl_string_data(data)[ir_offset_flags];
+    return flags.bits.has_fcall;
+}
+
+JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_string_t *data)
+{
+    if (jl_is_code_info(data))
+        return ((jl_code_info_t*)data)->inlining_cost;
+    assert(jl_is_string(data));
+    uint16_t res = jl_load_unaligned_i16(jl_string_data(data) + ir_offset_inlining_cost);
+    return res;
 }
 
 JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms)
 {
-    size_t nsyms = jl_array_len(syms);
+    size_t nsyms = jl_array_nrows(syms);
     size_t i, len = 0;
     for (i = 0; i < nsyms; i++) {
         jl_sym_t *name = (jl_sym_t*)jl_array_ptr_ref(syms, i);
@@ -936,26 +1050,28 @@ JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms)
     return str;
 }
 
-JL_DLLEXPORT ssize_t jl_ir_nslots(jl_array_t *data)
+JL_DLLEXPORT ssize_t jl_ir_nslots(jl_value_t *data)
 {
     if (jl_is_code_info(data)) {
         jl_code_info_t *func = (jl_code_info_t*)data;
-        return jl_array_len(func->slotnames);
+        return jl_array_nrows(func->slotnames);
     }
     else {
-        assert(jl_typeis(data, jl_array_uint8_type));
-        int nslots = jl_load_unaligned_i32((char*)data->data + 2);
+        assert(jl_is_string(data));
+        int nslots = jl_load_unaligned_i32(jl_string_data(data) + ir_offset_nslots);
         return nslots;
     }
 }
 
-JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_array_t *data, size_t i)
+JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_string_t *data, size_t i)
 {
     assert(i < jl_ir_nslots(data));
-    if (jl_is_code_info(data))
-        return ((uint8_t*)((jl_code_info_t*)data)->slotflags->data)[i];
-    assert(jl_typeis(data, jl_array_uint8_type));
-    return ((uint8_t*)data->data)[2 + sizeof(int32_t) + i];
+    if (jl_is_code_info(data)) {
+        jl_array_t *slotflags = ((jl_code_info_t*)data)->slotflags;
+        return jl_array_data(slotflags, uint8_t)[i];
+    }
+    assert(jl_is_string(data));
+    return jl_string_data(data)[ir_offset_slotflags + i];
 }
 
 JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms)
@@ -1001,6 +1117,113 @@ JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i)
     return jl_nothing;
 }
 
+void jl_init_serializer(void)
+{
+    jl_task_t *ct = jl_current_task;
+    htable_new(&ser_tag, 0);
+    htable_new(&common_symbol_tag, 0);
+
+    void *vals[] = { jl_emptysvec, jl_emptytuple, jl_false, jl_true, jl_nothing, jl_any_type,
+                     jl_call_sym, jl_invoke_sym, jl_invoke_modify_sym, jl_goto_ifnot_sym, jl_return_sym, jl_symbol("tuple"),
+                     jl_an_empty_string, jl_an_empty_vec_any,
+
+                     // empirical list of very common symbols
+                     #include "common_symbols1.inc"
+
+                     jl_box_int32(0), jl_box_int32(1), jl_box_int32(2),
+                     jl_box_int32(3), jl_box_int32(4), jl_box_int32(5),
+                     jl_box_int32(6), jl_box_int32(7), jl_box_int32(8),
+                     jl_box_int32(9), jl_box_int32(10), jl_box_int32(11),
+                     jl_box_int32(12), jl_box_int32(13), jl_box_int32(14),
+                     jl_box_int32(15), jl_box_int32(16), jl_box_int32(17),
+                     jl_box_int32(18), jl_box_int32(19), jl_box_int32(20),
+
+                     jl_box_int64(0), jl_box_int64(1), jl_box_int64(2),
+                     jl_box_int64(3), jl_box_int64(4), jl_box_int64(5),
+                     jl_box_int64(6), jl_box_int64(7), jl_box_int64(8),
+                     jl_box_int64(9), jl_box_int64(10), jl_box_int64(11),
+                     jl_box_int64(12), jl_box_int64(13), jl_box_int64(14),
+                     jl_box_int64(15), jl_box_int64(16), jl_box_int64(17),
+                     jl_box_int64(18), jl_box_int64(19), jl_box_int64(20),
+
+                     jl_bool_type, jl_linenumbernode_type, jl_pinode_type,
+                     jl_upsilonnode_type, jl_type_type, jl_bottom_type, jl_ref_type,
+                     jl_pointer_type, jl_abstractarray_type, jl_nothing_type,
+                     jl_vararg_type,
+                     jl_densearray_type, jl_function_type, jl_typename_type,
+                     jl_builtin_type, jl_task_type, jl_uniontype_type,
+                     jl_array_any_type, jl_intrinsic_type,
+                     jl_methtable_type, jl_typemap_level_type,
+                     jl_voidpointer_type, jl_newvarnode_type, jl_abstractstring_type,
+                     jl_array_symbol_type, jl_anytuple_type, jl_tparam0(jl_anytuple_type),
+                     jl_emptytuple_type, jl_array_uint8_type, jl_array_uint32_type, jl_code_info_type,
+                     jl_typeofbottom_type, jl_typeofbottom_type->super,
+                     jl_namedtuple_type, jl_array_int32_type,
+                     jl_uint32_type, jl_uint64_type,
+                     jl_type_type_mt, jl_nonfunction_mt,
+                     jl_opaque_closure_type,
+                     jl_memory_any_type,
+                     jl_memory_uint8_type,
+
+                     ct->ptls->root_task,
+
+                     NULL };
+
+    // more common symbols, less common than those above. will get 2-byte encodings.
+    void *common_symbols[] = {
+        #include "common_symbols2.inc"
+        NULL
+    };
+
+    deser_tag[TAG_SYMBOL] = (jl_value_t*)jl_symbol_type;
+    deser_tag[TAG_SSAVALUE] = (jl_value_t*)jl_ssavalue_type;
+    deser_tag[TAG_DATATYPE] = (jl_value_t*)jl_datatype_type;
+    deser_tag[TAG_SLOTNUMBER] = (jl_value_t*)jl_slotnumber_type;
+    deser_tag[TAG_SVEC] = (jl_value_t*)jl_simplevector_type;
+    deser_tag[TAG_ARRAY] = (jl_value_t*)jl_array_type;
+    deser_tag[TAG_MEMORYT] = (jl_value_t*)jl_genericmemory_type;
+    deser_tag[TAG_EXPR] = (jl_value_t*)jl_expr_type;
+    deser_tag[TAG_PHINODE] = (jl_value_t*)jl_phinode_type;
+    deser_tag[TAG_PHICNODE] = (jl_value_t*)jl_phicnode_type;
+    deser_tag[TAG_STRING] = (jl_value_t*)jl_string_type;
+    deser_tag[TAG_MODULE] = (jl_value_t*)jl_module_type;
+    deser_tag[TAG_TVAR] = (jl_value_t*)jl_tvar_type;
+    deser_tag[TAG_METHOD_INSTANCE] = (jl_value_t*)jl_method_instance_type;
+    deser_tag[TAG_METHOD] = (jl_value_t*)jl_method_type;
+    deser_tag[TAG_CODE_INSTANCE] = (jl_value_t*)jl_code_instance_type;
+    deser_tag[TAG_GLOBALREF] = (jl_value_t*)jl_globalref_type;
+    deser_tag[TAG_INT32] = (jl_value_t*)jl_int32_type;
+    deser_tag[TAG_INT64] = (jl_value_t*)jl_int64_type;
+    deser_tag[TAG_UINT8] = (jl_value_t*)jl_uint8_type;
+    deser_tag[TAG_LINEINFO] = (jl_value_t*)jl_lineinfonode_type;
+    deser_tag[TAG_UNIONALL] = (jl_value_t*)jl_unionall_type;
+    deser_tag[TAG_GOTONODE] = (jl_value_t*)jl_gotonode_type;
+    deser_tag[TAG_QUOTENODE] = (jl_value_t*)jl_quotenode_type;
+    deser_tag[TAG_GOTOIFNOT] = (jl_value_t*)jl_gotoifnot_type;
+    deser_tag[TAG_RETURNNODE] = (jl_value_t*)jl_returnnode_type;
+    deser_tag[TAG_ARGUMENT] = (jl_value_t*)jl_argument_type;
+
+    intptr_t i = 0;
+    while (vals[i] != NULL) {
+        deser_tag[LAST_TAG+1+i] = (jl_value_t*)vals[i];
+        i += 1;
+    }
+    assert(LAST_TAG+1+i < 256);
+
+    for (i = 2; i < 256; i++) {
+        if (deser_tag[i])
+            ptrhash_put(&ser_tag, deser_tag[i], (void*)i);
+    }
+
+    i = 2;
+    while (common_symbols[i-2] != NULL) {
+        ptrhash_put(&common_symbol_tag, common_symbols[i-2], (void*)i);
+        deser_symbols[i] = (jl_value_t*)common_symbols[i-2];
+        i += 1;
+    }
+    assert(i <= 256);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index eda841cbadb73..3ed86c688f6dd 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -3,16 +3,19 @@
 #include "llvm-version.h"
 #include "platform.h"
 #include <stdint.h>
+#include <sstream>
 
 #include "llvm/IR/Mangler.h"
+#include <llvm/ADT/Statistic.h>
 #include <llvm/ADT/StringMap.h>
 #include <llvm/Analysis/TargetLibraryInfo.h>
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/ExecutionEngine/Orc/CompileUtils.h>
 #include <llvm/ExecutionEngine/Orc/ExecutionUtils.h>
-#if JL_LLVM_VERSION >= 130000
+#include <llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h>
+#include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h>
 #include <llvm/ExecutionEngine/Orc/ExecutorProcessControl.h>
-#endif
+#include <llvm/IR/Verifier.h>
 #include <llvm/Support/DynamicLibrary.h>
 #include <llvm/Support/FormattedStream.h>
 #include <llvm/Support/SmallVectorMemoryBuffer.h>
@@ -23,11 +26,7 @@
 
 // target machine computation
 #include <llvm/CodeGen/TargetSubtargetInfo.h>
-#if JL_LLVM_VERSION >= 140000
 #include <llvm/MC/TargetRegistry.h>
-#else
-#include <llvm/Support/TargetRegistry.h>
-#endif
 #include <llvm/Target/TargetOptions.h>
 #include <llvm/Support/Host.h>
 #include <llvm/Support/TargetSelect.h>
@@ -35,65 +34,151 @@
 
 using namespace llvm;
 
-#include "julia.h"
-#include "julia_internal.h"
-#include "codegen_shared.h"
 #include "jitlayers.h"
 #include "julia_assert.h"
 #include "processor.h"
 
-#ifdef JL_USE_JITLINK
-# if JL_LLVM_VERSION >= 140000
-#  include <llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h>
-# endif
+# include <llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h>
 # include <llvm/ExecutionEngine/JITLink/EHFrameSupport.h>
 # include <llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h>
-#else
+# if JL_LLVM_VERSION >= 150000
+# include <llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h>
+# endif
 # include <llvm/ExecutionEngine/SectionMemoryManager.h>
-#endif
 
-#define DEBUG_TYPE "jitlayers"
+#define DEBUG_TYPE "julia_jitlayers"
+
+STATISTIC(LinkedGlobals, "Number of globals linked");
+STATISTIC(CompiledCodeinsts, "Number of codeinsts compiled directly");
+STATISTIC(MaxWorkqueueSize, "Maximum number of elements in the workqueue");
+STATISTIC(IndirectCodeinsts, "Number of dependent codeinsts compiled");
+STATISTIC(SpecFPtrCount, "Number of specialized function pointers compiled");
+STATISTIC(UnspecFPtrCount, "Number of specialized function pointers compiled");
+STATISTIC(ModulesAdded, "Number of modules added to the JIT");
+STATISTIC(ModulesOptimized, "Number of modules optimized by the JIT");
+STATISTIC(OptO0, "Number of modules optimized at level -O0");
+STATISTIC(OptO1, "Number of modules optimized at level -O1");
+STATISTIC(OptO2, "Number of modules optimized at level -O2");
+STATISTIC(OptO3, "Number of modules optimized at level -O3");
+STATISTIC(ModulesMerged, "Number of modules merged");
+STATISTIC(InternedGlobals, "Number of global constants interned in the string pool");
+
+#ifdef _COMPILER_MSAN_ENABLED_
+// TODO: This should not be necessary on ELF x86_64, but LLVM's implementation
+// of the TLS relocations is currently broken, so enable this unconditionally.
+#define MSAN_EMUTLS_WORKAROUND 1
+
+// See https://github.com/google/sanitizers/wiki/MemorySanitizerJIT
+namespace msan_workaround {
+
+extern "C" {
+    extern __thread unsigned long long __msan_param_tls[];
+    extern __thread unsigned int __msan_param_origin_tls[];
+    extern __thread unsigned long long __msan_retval_tls[];
+    extern __thread unsigned int __msan_retval_origin_tls;
+    extern __thread unsigned long long __msan_va_arg_tls[];
+    extern __thread unsigned int __msan_va_arg_origin_tls[];
+    extern __thread unsigned long long __msan_va_arg_overflow_size_tls;
+    extern __thread unsigned int __msan_origin_tls;
+}
+
+enum class MSanTLS
+{
+    param = 1,             // __msan_param_tls
+    param_origin,          //__msan_param_origin_tls
+    retval,                // __msan_retval_tls
+    retval_origin,         //__msan_retval_origin_tls
+    va_arg,                // __msan_va_arg_tls
+    va_arg_origin,         // __msan_va_arg_origin_tls
+    va_arg_overflow_size,  // __msan_va_arg_overflow_size_tls
+    origin,                //__msan_origin_tls
+};
+
+static void *getTLSAddress(void *control)
+{
+    auto tlsIndex = static_cast<MSanTLS>(reinterpret_cast<uintptr_t>(control));
+    switch(tlsIndex)
+    {
+    case MSanTLS::param: return reinterpret_cast<void *>(&__msan_param_tls);
+    case MSanTLS::param_origin: return reinterpret_cast<void *>(&__msan_param_origin_tls);
+    case MSanTLS::retval: return reinterpret_cast<void *>(&__msan_retval_tls);
+    case MSanTLS::retval_origin: return reinterpret_cast<void *>(&__msan_retval_origin_tls);
+    case MSanTLS::va_arg: return reinterpret_cast<void *>(&__msan_va_arg_tls);
+    case MSanTLS::va_arg_origin: return reinterpret_cast<void *>(&__msan_va_arg_origin_tls);
+    case MSanTLS::va_arg_overflow_size: return reinterpret_cast<void *>(&__msan_va_arg_overflow_size_tls);
+    case MSanTLS::origin: return reinterpret_cast<void *>(&__msan_origin_tls);
+    default:
+        assert(false && "BAD MSAN TLS INDEX");
+        return nullptr;
+    }
+}
+}
+#endif
 
 // Snooping on which functions are being compiled, and how long it takes
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_compiles_impl(void *s)
 {
-    **jl_ExecutionEngine->get_dump_compiles_stream() = (JL_STREAM*)s;
+    **jl_ExecutionEngine->get_dump_compiles_stream() = (ios_t*)s;
 }
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_dump_llvm_opt_impl(void *s)
 {
-    **jl_ExecutionEngine->get_dump_llvm_opt_stream() = (JL_STREAM*)s;
+    **jl_ExecutionEngine->get_dump_llvm_opt_stream() = (ios_t*)s;
 }
 
-static void jl_add_to_ee(orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports);
-static void jl_decorate_module(Module &M);
-static uint64_t getAddressForFunction(StringRef fname);
+static int jl_add_to_ee(
+        orc::ThreadSafeModule &M,
+        const StringMap<orc::ThreadSafeModule*> &NewExports,
+        DenseMap<orc::ThreadSafeModule*, int> &Queued,
+        SmallVectorImpl<orc::ThreadSafeModule*> &Stack) JL_NOTSAFEPOINT;
+static void jl_decorate_module(Module &M) JL_NOTSAFEPOINT;
+static uint64_t getAddressForFunction(StringRef fname) JL_NOTSAFEPOINT;
 
-void jl_link_global(GlobalVariable *GV, void *addr)
+void jl_link_global(GlobalVariable *GV, void *addr) JL_NOTSAFEPOINT
 {
+    ++LinkedGlobals;
     Constant *P = literal_static_pointer_val(addr, GV->getValueType());
     GV->setInitializer(P);
+    GV->setDSOLocal(true);
     if (jl_options.image_codegen) {
         // If we are forcing imaging mode codegen for debugging,
         // emit external non-const symbol to avoid LLVM optimizing the code
         // similar to non-imaging mode.
-        GV->setLinkage(GlobalValue::ExternalLinkage);
+        assert(GV->hasExternalLinkage());
     }
     else {
         GV->setConstant(true);
         GV->setLinkage(GlobalValue::PrivateLinkage);
+        GV->setVisibility(GlobalValue::DefaultVisibility);
         GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
     }
 }
 
-void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
+void jl_jit_globals(std::map<void *, GlobalVariable*> &globals) JL_NOTSAFEPOINT
 {
     for (auto &global : globals) {
         jl_link_global(global.second, global.first);
     }
 }
 
+// used for image_codegen, where we keep all the gvs external
+// so we can't jit them directly into each module
+static orc::ThreadSafeModule jl_get_globals_module(orc::ThreadSafeContext &ctx, const DataLayout &DL, const Triple &T, std::map<void *, GlobalVariable*> &globals) JL_NOTSAFEPOINT
+{
+    auto lock = ctx.getLock();
+    auto GTSM = jl_create_ts_module("globals", ctx, DL, T);
+    auto GM = GTSM.getModuleUnlocked();
+    for (auto &global : globals) {
+        auto GV = global.second;
+        auto GV2 = new GlobalVariable(*GM, GV->getValueType(), GV->isConstant(), GlobalValue::ExternalLinkage, literal_static_pointer_val(global.first, GV->getValueType()), GV->getName(), nullptr, GV->getThreadLocalMode(), GV->getAddressSpace(), false);
+        GV2->copyAttributesFrom(GV);
+        GV2->setDSOLocal(true);
+        GV2->setAlignment(GV->getAlign());
+    }
+    return GTSM;
+}
+
 // this generates llvm code for the lambda info
 // and adds the result to the jitlayers
 // (and the shadow module),
@@ -102,7 +187,8 @@ static jl_callptr_t _jl_compile_codeinst(
         jl_code_instance_t *codeinst,
         jl_code_info_t *src,
         size_t world,
-        orc::ThreadSafeContext context)
+        orc::ThreadSafeContext context,
+        bool is_recompile)
 {
     // caller must hold codegen_lock
     // and have disabled finalizers
@@ -114,59 +200,93 @@ static jl_callptr_t _jl_compile_codeinst(
     assert(jl_is_code_instance(codeinst));
     assert(codeinst->min_world <= world && (codeinst->max_world >= world || codeinst->max_world == 0) &&
         "invalid world for method-instance");
-    assert(src && jl_is_code_info(src));
 
+    JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE);
+#ifdef USE_TRACY
+    if (is_recompile) {
+        TracyCZoneColor(JL_TIMING_DEFAULT_BLOCK->tracy_ctx, 0xFFA500);
+    }
+#endif
     jl_callptr_t fptr = NULL;
     // emit the code in LLVM IR form
-    jl_codegen_params_t params(std::move(context)); // Locks the context
+    jl_codegen_params_t params(std::move(context), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context
     params.cache = true;
     params.world = world;
-    jl_workqueue_t emitted;
+    params.imaging_mode = imaging_default();
+    params.debug_level = jl_options.debug_level;
     {
         orc::ThreadSafeModule result_m =
-            jl_create_llvm_module(name_from_method_instance(codeinst->def), params.tsctx, params.imaging);
+            jl_create_ts_module(name_from_method_instance(codeinst->def), params.tsctx, params.DL, params.TargetTriple);
         jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params);
         if (result_m)
-            emitted[codeinst] = {std::move(result_m), std::move(decls)};
-        {
-            auto temp_module = jl_create_llvm_module(name_from_method_instance(codeinst->def), params.tsctx, params.imaging);
-            jl_compile_workqueue(emitted, *temp_module.getModuleUnlocked(), params, CompilationPolicy::Default);
+            params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
+        jl_compile_workqueue(params, CompilationPolicy::Default);
+
+        if (params._shared_module) {
+            jl_ExecutionEngine->optimizeDLSyms(*params._shared_module);
+            jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx));
         }
 
-        if (params._shared_module)
-            jl_ExecutionEngine->addModule(std::move(params._shared_module));
-        StringMap<orc::ThreadSafeModule*> NewExports;
-        StringMap<void*> NewGlobals;
-        for (auto &global : params.globals) {
-            NewGlobals[global.second->getName()] = global.first;
+        // In imaging mode, we can't inline global variable initializers in order to preserve
+        // the fiction that we don't know what loads from the global will return. Thus, we
+        // need to emit a separate module for the globals before any functions are compiled,
+        // to ensure that the globals are defined when they are compiled.
+        if (params.imaging_mode) {
+            // Won't contain any PLT/dlsym calls, so no need to optimize those
+            jl_ExecutionEngine->addModule(jl_get_globals_module(params.tsctx, params.DL, params.TargetTriple, params.global_targets));
+        } else {
+            StringMap<void*> NewGlobals;
+            for (auto &global : params.global_targets) {
+                NewGlobals[global.second->getName()] = global.first;
+            }
+            for (auto &def : params.compiled_functions) {
+                auto M = std::get<0>(def.second).getModuleUnlocked();
+                for (auto &GV : M->globals()) {
+                    auto InitValue = NewGlobals.find(GV.getName());
+                    if (InitValue != NewGlobals.end()) {
+                        jl_link_global(&GV, InitValue->second);
+                    }
+                }
+            }
         }
-        for (auto &def : emitted) {
+
+        // Collect the exported functions from the params.compiled_functions modules,
+        // which form dependencies on which functions need to be
+        // compiled first. Cycles of functions are compiled together.
+        // (essentially we compile a DAG of SCCs in reverse topological order,
+        // if we treat declarations of external functions as edges from declaration
+        // to definition)
+        StringMap<orc::ThreadSafeModule*> NewExports;
+        for (auto &def : params.compiled_functions) {
             orc::ThreadSafeModule &TSM = std::get<0>(def.second);
             //The underlying context object is still locked because params is not destroyed yet
             auto M = TSM.getModuleUnlocked();
+            jl_ExecutionEngine->optimizeDLSyms(*M);
             for (auto &F : M->global_objects()) {
                 if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
                     NewExports[F.getName()] = &TSM;
                 }
             }
-            // Let's link all globals here also (for now)
-            for (auto &GV : M->globals()) {
-                auto InitValue = NewGlobals.find(GV.getName());
-                if (InitValue != NewGlobals.end()) {
-                    jl_link_global(&GV, InitValue->second);
-                }
-            }
         }
-        for (auto &def : emitted) {
+        DenseMap<orc::ThreadSafeModule*, int> Queued;
+        SmallVector<orc::ThreadSafeModule*, 0> Stack;
+        for (auto &def : params.compiled_functions) {
             // Add the results to the execution engine now
             orc::ThreadSafeModule &M = std::get<0>(def.second);
-            jl_add_to_ee(M, NewExports);
+            jl_add_to_ee(M, NewExports, Queued, Stack);
+            assert(Queued.empty() && Stack.empty() && !M);
         }
+        ++CompiledCodeinsts;
+        MaxWorkqueueSize.updateMax(params.compiled_functions.size());
+        IndirectCodeinsts += params.compiled_functions.size() - 1;
     }
-    JL_TIMING(LLVM_MODULE_FINISH);
 
-    for (auto &def : emitted) {
+    size_t i = 0;
+    for (auto &def : params.compiled_functions) {
         jl_code_instance_t *this_code = def.first;
+        if (i < jl_timing_print_limit)
+            jl_timing_show_func_sig(this_code->def->specTypes, JL_TIMING_DEFAULT_BLOCK);
+
         jl_llvm_functions_t decls = std::get<1>(def.second);
         jl_callptr_t addr;
         bool isspecsig = false;
@@ -176,26 +296,45 @@ static jl_callptr_t _jl_compile_codeinst(
         else if (decls.functionObject == "jl_fptr_sparam") {
             addr = jl_fptr_sparam_addr;
         }
+        else if (decls.functionObject == "jl_f_opaque_closure_call") {
+            addr = jl_f_opaque_closure_call_addr;
+        }
         else {
             addr = (jl_callptr_t)getAddressForFunction(decls.functionObject);
             isspecsig = true;
         }
-        if (jl_atomic_load_relaxed(&this_code->invoke) == NULL) {
-            // once set, don't change invoke-ptr, as that leads to race conditions
-            // with the (not) simultaneous updates to invoke and specptr
-            if (!decls.specFunctionObject.empty()) {
-                jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
-                this_code->isspecsig = isspecsig;
+        if (!decls.specFunctionObject.empty()) {
+            void *prev_specptr = NULL;
+            auto spec = (void*)getAddressForFunction(decls.specFunctionObject);
+            if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) {
+                // only set specsig and invoke if we were the first to set specptr
+                jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig);
+                // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr
+                // either assumes that specptr was null, doesn't care about specptr,
+                // or will wait until specsigflags has 0b10 set before reloading invoke
+                jl_atomic_store_release(&this_code->invoke, addr);
+                jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig));
+            } else {
+                //someone else beat us, don't commit any results
+                while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) {
+                    jl_cpu_pause();
+                }
+                addr = jl_atomic_load_relaxed(&this_code->invoke);
+            }
+        } else {
+            jl_callptr_t prev_invoke = NULL;
+            if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
+                addr = prev_invoke;
+                //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other
+                //known lesser function)
             }
-            jl_atomic_store_release(&this_code->invoke, addr);
-        }
-        else if (jl_atomic_load_relaxed(&this_code->invoke) == jl_fptr_const_return_addr && !decls.specFunctionObject.empty()) {
-            // hack to export this pointer value to jl_dump_method_disasm
-            jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
         }
-        if (this_code== codeinst)
+        if (this_code == codeinst)
             fptr = addr;
+        i++;
     }
+    if (i > jl_timing_print_limit)
+        jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "... <%d methods truncated>", i - 10);
 
     uint64_t end_time = 0;
     if (timed)
@@ -207,9 +346,9 @@ static jl_callptr_t _jl_compile_codeinst(
     if (jl_is_method(mi->def.method)) {
         auto stream = *jl_ExecutionEngine->get_dump_compiles_stream();
         if (stream) {
-            jl_printf(stream, "%" PRIu64 "\t\"", end_time - start_time);
-            jl_static_show(stream, mi->specTypes);
-            jl_printf(stream, "\"\n");
+            ios_printf(stream, "%" PRIu64 "\t\"", end_time - start_time);
+            jl_static_show((JL_STREAM*)stream, mi->specTypes);
+            ios_printf(stream, "\"\n");
         }
     }
     return fptr;
@@ -218,10 +357,13 @@ static jl_callptr_t _jl_compile_codeinst(
 const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params);
 
 // compile a C-callable alias
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
 {
-    JL_LOCK(&jl_codegen_lock);
+    auto ct = jl_current_task;
+    bool timed = (ct->reentrant_timing & 1) == 0;
+    if (timed)
+        ct->reentrant_timing |= 1;
     uint64_t compiler_start_time = 0;
     uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
     if (measure_compile_time_enabled)
@@ -234,10 +376,16 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
         if (!pparams) {
             ctx = jl_ExecutionEngine->acquireContext();
         }
-        backing = jl_create_llvm_module("cextern", pparams ? pparams->tsctx : ctx, pparams ? pparams->imaging : imaging_default());
+        backing = jl_create_ts_module("cextern", pparams ? pparams->tsctx : ctx,  pparams ? pparams->DL : jl_ExecutionEngine->getDataLayout(), pparams ? pparams->TargetTriple : jl_ExecutionEngine->getTargetTriple());
         into = &backing;
     }
-    jl_codegen_params_t params(into->getContext());
+    JL_LOCK(&jl_codegen_lock);
+    auto target_info = into->withModuleDo([&](Module &M) {
+        return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
+    });
+    jl_codegen_params_t params(into->getContext(), std::move(target_info.first), std::move(target_info.second));
+    params.imaging_mode = imaging_default();
+    params.debug_level = jl_options.debug_level;
     if (pparams == NULL)
         pparams = &params;
     assert(pparams->tsctx.getContext() == into->getContext().getContext());
@@ -248,25 +396,36 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
             success = false;
         }
         if (success && p == NULL) {
-            jl_jit_globals(params.globals);
+            jl_jit_globals(params.global_targets);
             assert(params.workqueue.empty());
-            if (params._shared_module)
-                jl_ExecutionEngine->addModule(std::move(params._shared_module));
+            if (params._shared_module) {
+                jl_ExecutionEngine->optimizeDLSyms(*params._shared_module);
+                jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx));
+            }
         }
-        if (success && llvmmod == NULL)
+        if (success && llvmmod == NULL) {
+            into->withModuleDo([&](Module &M) {
+                jl_ExecutionEngine->optimizeDLSyms(M);
+            });
             jl_ExecutionEngine->addModule(std::move(*into));
+        }
+    }
+    JL_UNLOCK(&jl_codegen_lock);
+    if (timed) {
+        if (measure_compile_time_enabled) {
+            auto end = jl_hrtime();
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+        }
+        ct->reentrant_timing &= ~1ull;
     }
-    if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
-        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
     if (ctx.getContext()) {
         jl_ExecutionEngine->releaseContext(std::move(ctx));
     }
-    JL_UNLOCK(&jl_codegen_lock);
     return success;
 }
 
 // declare a C-callable entry point; called during code loading from the toplevel
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
 {
     // validate arguments. try to do as many checks as possible here to avoid
@@ -276,7 +435,7 @@ void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
         jl_type_error("@ccallable", (jl_value_t*)jl_anytuple_type_type, (jl_value_t*)sigt);
     // check that f is a guaranteed singleton type
     jl_datatype_t *ft = (jl_datatype_t*)jl_tparam0(sigt);
-    if (!jl_is_datatype(ft) || ft->instance == NULL)
+    if (!jl_is_datatype(ft) || !jl_is_datatype_singleton(ft))
         jl_error("@ccallable: function object must be a singleton");
 
     // compute / validate return type
@@ -309,12 +468,13 @@ void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
 }
 
 // this compiles li and emits fptr
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
 {
-    JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
-    auto ctx = jl_ExecutionEngine->getContext();
-    auto &context = *ctx;
+    auto ct = jl_current_task;
+    bool timed = (ct->reentrant_timing & 1) == 0;
+    if (timed)
+        ct->reentrant_timing |= 1;
     uint64_t compiler_start_time = 0;
     uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
     bool is_recompile = false;
@@ -322,15 +482,18 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
         compiler_start_time = jl_hrtime();
     // if we don't have any decls already, try to generate it now
     jl_code_info_t *src = NULL;
-    JL_GC_PUSH1(&src);
-    jl_value_t *ci = jl_rettype_inferred(mi, world, world);
-    jl_code_instance_t *codeinst = (ci == jl_nothing ? NULL : (jl_code_instance_t*)ci);
+    jl_code_instance_t *codeinst = NULL;
+    JL_GC_PUSH2(&src, &codeinst);
+    JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
+    jl_value_t *ci = jl_rettype_inferred_addr(mi, world, world);
+    if (ci != jl_nothing)
+        codeinst = (jl_code_instance_t*)ci;
     if (codeinst) {
-        src = (jl_code_info_t*)codeinst->inferred;
+        src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
         if ((jl_value_t*)src == jl_nothing)
             src = NULL;
         else if (jl_is_method(mi->def.method))
-            src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
+            src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
     }
     else {
         // identify whether this is an invalidated method that is being recompiled
@@ -343,6 +506,7 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
             // see if it is inferred, or try to infer it for ourself.
             // (but don't bother with typeinf on macros or toplevel thunks)
             src = jl_type_infer(mi, world, 0);
+            codeinst = nullptr;
         }
     }
     jl_code_instance_t *compiled = jl_method_compiled(mi, world);
@@ -351,96 +515,122 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
     }
     else if (src && jl_is_code_info(src)) {
         if (!codeinst) {
-            codeinst = jl_get_method_inferred(mi, src->rettype, src->min_world, src->max_world);
-            if (src->inferred && !codeinst->inferred)
-                codeinst->inferred = jl_nothing;
+            codeinst = jl_get_codeinst_for_src(mi, src);
+            if (src->inferred) {
+                jl_value_t *null = nullptr;
+                jl_atomic_cmpswap_relaxed(&codeinst->inferred, &null, jl_nothing);
+            }
         }
-        _jl_compile_codeinst(codeinst, src, world, context);
+        ++SpecFPtrCount;
+        _jl_compile_codeinst(codeinst, src, world, *jl_ExecutionEngine->getContext(), is_recompile);
         if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL)
             codeinst = NULL;
     }
     else {
         codeinst = NULL;
     }
-    if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) {
-        uint64_t t_comp = jl_hrtime() - compiler_start_time;
-        if (is_recompile)
-            jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
-        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
-    }
     JL_UNLOCK(&jl_codegen_lock);
+    if (timed) {
+        if (measure_compile_time_enabled) {
+            uint64_t t_comp = jl_hrtime() - compiler_start_time;
+            if (is_recompile) {
+                jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
+            }
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
+        }
+        ct->reentrant_timing &= ~1ull;
+    }
     JL_GC_POP();
     return codeinst;
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
+void jl_generate_fptr_for_oc_wrapper_impl(jl_code_instance_t *oc_wrap)
+{
+    if (jl_atomic_load_relaxed(&oc_wrap->invoke) != NULL) {
+        return;
+    }
+    JL_LOCK(&jl_codegen_lock);
+    if (jl_atomic_load_relaxed(&oc_wrap->invoke) == NULL) {
+        _jl_compile_codeinst(oc_wrap, NULL, 1, *jl_ExecutionEngine->getContext(), 0);
+    }
+    JL_UNLOCK(&jl_codegen_lock); // Might GC
+}
+
+extern "C" JL_DLLEXPORT_CODEGEN
 void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
 {
     if (jl_atomic_load_relaxed(&unspec->invoke) != NULL) {
         return;
     }
-    JL_LOCK(&jl_codegen_lock);
-    auto ctx = jl_ExecutionEngine->getContext();
-    auto &context = *ctx;
+    auto ct = jl_current_task;
+    bool timed = (ct->reentrant_timing & 1) == 0;
+    if (timed)
+        ct->reentrant_timing |= 1;
     uint64_t compiler_start_time = 0;
     uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
     if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
+    JL_LOCK(&jl_codegen_lock);
     if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
         jl_code_info_t *src = NULL;
         JL_GC_PUSH1(&src);
         jl_method_t *def = unspec->def->def.method;
         if (jl_is_method(def)) {
             src = (jl_code_info_t*)def->source;
-            if (src == NULL) {
-                // TODO: this is wrong
-                assert(def->generator);
-                // TODO: jl_code_for_staged can throw
-                src = jl_code_for_staged(unspec->def);
-            }
             if (src && (jl_value_t*)src != jl_nothing)
-                src = jl_uncompress_ir(def, NULL, (jl_array_t*)src);
+                src = jl_uncompress_ir(def, NULL, (jl_value_t*)src);
         }
         else {
-            src = (jl_code_info_t*)unspec->def->uninferred;
+            src = (jl_code_info_t*)jl_atomic_load_relaxed(&unspec->def->uninferred);
+            assert(src);
         }
-        assert(src && jl_is_code_info(src));
-        _jl_compile_codeinst(unspec, src, unspec->min_world, context);
-        if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
-            // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
-            jl_atomic_store_release(&unspec->invoke, jl_fptr_interpret_call_addr);
+        if (src) {
+            assert(jl_is_code_info(src));
+            ++UnspecFPtrCount;
+            _jl_compile_codeinst(unspec, src, unspec->min_world, *jl_ExecutionEngine->getContext(), 0);
         }
+        jl_callptr_t null = nullptr;
+        // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
+        jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr);
         JL_GC_POP();
     }
-    if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
-        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
     JL_UNLOCK(&jl_codegen_lock); // Might GC
+    if (timed) {
+        if (measure_compile_time_enabled) {
+            auto end = jl_hrtime();
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+        }
+        ct->reentrant_timing &= ~1ull;
+    }
 }
 
 
 // get a native disassembly for a compiled method
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
-        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary)
+        char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary)
 {
     // printing via disassembly
     jl_code_instance_t *codeinst = jl_generate_fptr(mi, world);
     if (codeinst) {
-        uintptr_t fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
+        uintptr_t fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
         if (getwrapper)
-            return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo, binary);
+            return jl_dump_fptr_asm(fptr, emit_mc, asm_variant, debuginfo, binary);
         uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
         if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
             // normally we prevent native code from being generated for these functions,
             // (using sentinel value `1` instead)
             // so create an exception here so we can print pretty our lies
-            JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
-            auto ctx = jl_ExecutionEngine->getContext();
-            auto &context = *ctx;
+            auto ct = jl_current_task;
+            bool timed = (ct->reentrant_timing & 1) == 0;
+            if (timed)
+                ct->reentrant_timing |= 1;
             uint64_t compiler_start_time = 0;
             uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
             if (measure_compile_time_enabled)
                 compiler_start_time = jl_hrtime();
+            JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
             specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
             if (specfptr == 0) {
                 jl_code_info_t *src = jl_type_infer(mi, world, 0);
@@ -449,27 +639,33 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
                 if (jl_is_method(def)) {
                     if (!src) {
                         // TODO: jl_code_for_staged can throw
-                        src = def->generator ? jl_code_for_staged(mi) : (jl_code_info_t*)def->source;
+                        src = def->generator ? jl_code_for_staged(mi, world) : (jl_code_info_t*)def->source;
                     }
                     if (src && (jl_value_t*)src != jl_nothing)
-                        src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
+                        src = jl_uncompress_ir(mi->def.method, codeinst, (jl_value_t*)src);
                 }
-                fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
+                fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
                 specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
                 if (src && jl_is_code_info(src)) {
                     if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
-                        fptr = (uintptr_t)_jl_compile_codeinst(codeinst, src, world, context);
+                        fptr = (uintptr_t)_jl_compile_codeinst(codeinst, src, world, *jl_ExecutionEngine->getContext(), 0);
+                        (void)fptr; // silence unused variable warning
                         specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
                     }
                 }
                 JL_GC_POP();
             }
-            if (measure_compile_time_enabled)
-                jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
             JL_UNLOCK(&jl_codegen_lock);
+            if (timed) {
+                if (measure_compile_time_enabled) {
+                    auto end = jl_hrtime();
+                    jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+                }
+                ct->reentrant_timing &= ~1ull;
+            }
         }
         if (specfptr != 0)
-            return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo, binary);
+            return jl_dump_fptr_asm(specfptr, emit_mc, asm_variant, debuginfo, binary);
     }
 
     // whatever, that didn't work - use the assembler output instead
@@ -477,7 +673,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
     jl_get_llvmf_defn(&llvmf_dump, mi, world, getwrapper, true, jl_default_cgparams);
     if (!llvmf_dump.F)
         return jl_an_empty_string;
-    return jl_dump_function_asm(&llvmf_dump, raw_mc, asm_variant, debuginfo, binary);
+    return jl_dump_function_asm(&llvmf_dump, emit_mc, asm_variant, debuginfo, binary, false);
 }
 
 CodeGenOpt::Level CodeGenOptLevelFor(int optlevel)
@@ -491,44 +687,101 @@ CodeGenOpt::Level CodeGenOptLevelFor(int optlevel)
 #endif
 }
 
-static auto countBasicBlocks(const Function &F)
+static auto countBasicBlocks(const Function &F) JL_NOTSAFEPOINT
 {
     return std::distance(F.begin(), F.end());
 }
 
-void JuliaOJIT::OptSelLayerT::emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) {
-    size_t optlevel = SIZE_MAX;
-    TSM.withModuleDo([&](Module &M) {
-        if (jl_generating_output()) {
-            optlevel = 0;
+static constexpr size_t N_optlevels = 4;
+
+static Expected<orc::ThreadSafeModule> validateExternRelocations(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+#if !defined(JL_NDEBUG) && !defined(JL_USE_JITLINK)
+    auto isIntrinsicFunction = [](GlobalObject &GO) JL_NOTSAFEPOINT {
+        auto F = dyn_cast<Function>(&GO);
+        if (!F)
+            return false;
+        return F->isIntrinsic() || F->getName().startswith("julia.");
+    };
+    // validate the relocations for M (only for RuntimeDyld, JITLink performs its own symbol validation)
+    auto Err = TSM.withModuleDo([isIntrinsicFunction](Module &M) JL_NOTSAFEPOINT {
+        Error Err = Error::success();
+        for (auto &GO : make_early_inc_range(M.global_objects())) {
+            if (!GO.isDeclarationForLinker())
+                continue;
+            if (GO.use_empty()) {
+                GO.eraseFromParent();
+                continue;
+            }
+            if (isIntrinsicFunction(GO))
+                continue;
+            auto sym = jl_ExecutionEngine->findUnmangledSymbol(GO.getName());
+            if (sym)
+                continue;
+            // TODO have we ever run into this check? It's been guaranteed to not
+            // fire in an assert build, since previously LLVM would abort due to
+            // not handling the error if we didn't find the unmangled symbol
+            if (SectionMemoryManager::getSymbolAddressInProcess(
+                            jl_ExecutionEngine->getMangledName(GO.getName()))) {
+                consumeError(sym.takeError());
+                continue;
+            }
+            Err = joinErrors(std::move(Err), sym.takeError());
         }
-        else {
-            optlevel = std::max(static_cast<int>(jl_options.opt_level), 0);
-            size_t optlevel_min = std::max(static_cast<int>(jl_options.opt_level_min), 0);
-            for (auto &F : M.functions()) {
-                if (!F.getBasicBlockList().empty()) {
+        return Err;
+    });
+    if (Err) {
+        return std::move(Err);
+    }
+#endif
+    return std::move(TSM);
+}
+
+static Expected<orc::ThreadSafeModule> selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
+    TSM.withModuleDo([](Module &M) {
+        size_t opt_level = std::max(static_cast<int>(jl_options.opt_level), 0);
+        do {
+            if (jl_generating_output()) {
+                opt_level = 0;
+                break;
+            }
+            size_t opt_level_min = std::max(static_cast<int>(jl_options.opt_level_min), 0);
+            for (auto &F : M) {
+                if (!F.isDeclaration()) {
                     Attribute attr = F.getFnAttribute("julia-optimization-level");
                     StringRef val = attr.getValueAsString();
                     if (val != "") {
                         size_t ol = (size_t)val[0] - '0';
-                        if (ol < optlevel)
-                            optlevel = ol;
+                        if (ol < opt_level)
+                            opt_level = ol;
                     }
                 }
             }
-            optlevel = std::min(std::max(optlevel, optlevel_min), this->count);
-        }
+            if (opt_level < opt_level_min)
+                opt_level = opt_level_min;
+        } while (0);
+        // currently -O3 is max
+        opt_level = std::min(opt_level, N_optlevels - 1);
+        M.addModuleFlag(Module::Warning, "julia.optlevel", opt_level);
+    });
+    return std::move(TSM);
+}
+
+static void recordDebugTSM(orc::MaterializationResponsibility &, orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT {
+    auto ptr = TSM.withModuleDo([](Module &M) JL_NOTSAFEPOINT {
+        auto md = M.getModuleFlag("julia.__jit_debug_tsm_addr");
+        if (!md)
+            return static_cast<orc::ThreadSafeModule *>(nullptr);
+        return reinterpret_cast<orc::ThreadSafeModule *>(cast<ConstantInt>(cast<ConstantAsMetadata>(md)->getValue())->getZExtValue());
     });
-    assert(optlevel != SIZE_MAX && "Failed to select a valid optimization level!");
-    this->optimizers[optlevel]->OptimizeLayer.emit(std::move(R), std::move(TSM));
+    if (ptr) {
+        *ptr = std::move(TSM);
+    }
 }
 
 void jl_register_jit_object(const object::ObjectFile &debugObj,
                             std::function<uint64_t(const StringRef &)> getLoadAddress,
                             std::function<void *(void *)> lookupWriteAddress);
 
-#ifdef JL_USE_JITLINK
-
 namespace {
 
 using namespace llvm::orc;
@@ -540,10 +793,11 @@ struct JITObjectInfo {
 };
 
 class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
+    std::mutex PluginMutex;
     std::map<MaterializationResponsibility *, std::unique_ptr<JITObjectInfo>> PendingObjs;
-    // Resources from distinct MaterializationResponsibilitys can get merged
+    // Resources from distinct `MaterializationResponsibility`s can get merged
     // after emission, so we can have multiple debug objects per resource key.
-    std::map<ResourceKey, std::vector<std::unique_ptr<JITObjectInfo>>> RegisteredObjs;
+    std::map<ResourceKey, SmallVector<std::unique_ptr<JITObjectInfo>, 0>> RegisteredObjs;
 
 public:
     void notifyMaterializing(MaterializationResponsibility &MR, jitlink::LinkGraph &G,
@@ -560,33 +814,40 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
         auto NewObj =
             cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef()));
 
-        assert(PendingObjs.count(&MR) == 0);
-        PendingObjs[&MR] = std::unique_ptr<JITObjectInfo>(
-            new JITObjectInfo{std::move(NewBuffer), std::move(NewObj), {}});
+        {
+            std::lock_guard<std::mutex> lock(PluginMutex);
+            assert(PendingObjs.count(&MR) == 0);
+            PendingObjs[&MR] = std::unique_ptr<JITObjectInfo>(
+                new JITObjectInfo{std::move(NewBuffer), std::move(NewObj), {}});
+        }
     }
 
     Error notifyEmitted(MaterializationResponsibility &MR) override
     {
-        auto It = PendingObjs.find(&MR);
-        if (It == PendingObjs.end())
-            return Error::success();
-
-        auto NewInfo = PendingObjs[&MR].get();
-        auto getLoadAddress = [NewInfo](const StringRef &Name) -> uint64_t {
-            auto result = NewInfo->SectionLoadAddresses.find(Name);
-            if (result == NewInfo->SectionLoadAddresses.end()) {
-                LLVM_DEBUG({
-                    dbgs() << "JLDebuginfoPlugin: No load address found for section '"
-                           << Name << "'\n";
-                });
-                return 0;
-            }
-            return result->second;
-        };
+        {
+            std::lock_guard<std::mutex> lock(PluginMutex);
+            auto It = PendingObjs.find(&MR);
+            if (It == PendingObjs.end())
+                return Error::success();
+
+            auto NewInfo = PendingObjs[&MR].get();
+            auto getLoadAddress = [NewInfo](const StringRef &Name) -> uint64_t {
+                auto result = NewInfo->SectionLoadAddresses.find(Name);
+                if (result == NewInfo->SectionLoadAddresses.end()) {
+                    LLVM_DEBUG({
+                        dbgs() << "JLDebuginfoPlugin: No load address found for section '"
+                            << Name << "'\n";
+                    });
+                    return 0;
+                }
+                return result->second;
+            };
 
-        jl_register_jit_object(*NewInfo->Object, getLoadAddress, nullptr);
+            jl_register_jit_object(*NewInfo->Object, getLoadAddress, nullptr);
+        }
 
         cantFail(MR.withResourceKeyDo([&](ResourceKey K) {
+            std::lock_guard<std::mutex> lock(PluginMutex);
             RegisteredObjs[K].push_back(std::move(PendingObjs[&MR]));
             PendingObjs.erase(&MR);
         }));
@@ -596,19 +857,29 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
 
     Error notifyFailed(MaterializationResponsibility &MR) override
     {
+        std::lock_guard<std::mutex> lock(PluginMutex);
         PendingObjs.erase(&MR);
         return Error::success();
     }
-
+#if JL_LLVM_VERSION >= 160000
+    Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override
+#else
     Error notifyRemovingResources(ResourceKey K) override
+#endif
     {
+        std::lock_guard<std::mutex> lock(PluginMutex);
         RegisteredObjs.erase(K);
         // TODO: If we ever unload code, need to notify debuginfo registry.
         return Error::success();
     }
 
+#if JL_LLVM_VERSION >= 160000
+    void notifyTransferringResources(JITDylib &JD, ResourceKey DstKey, ResourceKey SrcKey) override
+#else
     void notifyTransferringResources(ResourceKey DstKey, ResourceKey SrcKey) override
+#endif
     {
+        std::lock_guard<std::mutex> lock(PluginMutex);
         auto SrcIt = RegisteredObjs.find(SrcKey);
         if (SrcIt != RegisteredObjs.end()) {
             for (std::unique_ptr<JITObjectInfo> &Info : SrcIt->second)
@@ -620,13 +891,16 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
     void modifyPassConfig(MaterializationResponsibility &MR, jitlink::LinkGraph &,
                           jitlink::PassConfiguration &PassConfig) override
     {
+        std::lock_guard<std::mutex> lock(PluginMutex);
         auto It = PendingObjs.find(&MR);
         if (It == PendingObjs.end())
             return;
 
         JITObjectInfo &Info = *It->second;
-        PassConfig.PostAllocationPasses.push_back([&Info](jitlink::LinkGraph &G) -> Error {
+        PassConfig.PostAllocationPasses.push_back([&Info, this](jitlink::LinkGraph &G) -> Error {
+            std::lock_guard<std::mutex> lock(PluginMutex);
             for (const jitlink::Section &Sec : G.sections()) {
+#if defined(_OS_DARWIN_)
                 // Canonical JITLink section names have the segment name included, e.g.
                 // "__TEXT,__text" or "__DWARF,__debug_str". There are some special internal
                 // sections without a comma separator, which we can just ignore.
@@ -639,50 +913,126 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
                     continue;
                 }
                 auto SecName = Sec.getName().substr(SepPos + 1);
+#else
+                auto SecName = Sec.getName();
+#endif
                 // https://github.com/llvm/llvm-project/commit/118e953b18ff07d00b8f822dfbf2991e41d6d791
-#if JL_LLVM_VERSION >= 140000
                Info.SectionLoadAddresses[SecName] = jitlink::SectionRange(Sec).getStart().getValue();
+            }
+            return Error::success();
+        });
+    }
+};
+
+class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
+private:
+    std::atomic<size_t> &total_size;
+
+public:
+
+    JLMemoryUsagePlugin(std::atomic<size_t> &total_size)
+        : total_size(total_size) {}
+
+    Error notifyFailed(orc::MaterializationResponsibility &MR) override {
+        return Error::success();
+    }
+#if JL_LLVM_VERSION >= 160000
+    Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override
+#else
+    Error notifyRemovingResources(orc::ResourceKey K) override
+#endif
+    {
+        return Error::success();
+    }
+#if JL_LLVM_VERSION >= 160000
+    void notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey,
+                                     orc::ResourceKey SrcKey) override {}
+#else
+    void notifyTransferringResources(orc::ResourceKey DstKey,
+                                     orc::ResourceKey SrcKey) override {}
+#endif
+
+    void modifyPassConfig(orc::MaterializationResponsibility &,
+                          jitlink::LinkGraph &,
+                          jitlink::PassConfiguration &Config) override {
+        Config.PostAllocationPasses.push_back([this](jitlink::LinkGraph &G) {
+            size_t graph_size = 0;
+            size_t code_size = 0;
+            size_t data_size = 0;
+            for (auto block : G.blocks()) {
+                graph_size += block->getSize();
+            }
+            for (auto &section : G.sections()) {
+                size_t secsize = 0;
+                for (auto block : section.blocks()) {
+                    secsize += block->getSize();
+                }
+#if JL_LLVM_VERSION >= 160000
+                if ((section.getMemProt() & orc::MemProt::Exec) == orc::MemProt::None) {
 #else
-               Info.SectionLoadAddresses[SecName] = jitlink::SectionRange(Sec).getStart();
+                if ((section.getMemProt() & jitlink::MemProt::Exec) == jitlink::MemProt::None) {
 #endif
+                    data_size += secsize;
+                } else {
+                    code_size += secsize;
+                }
+                graph_size += secsize;
             }
+            (void) code_size;
+            (void) data_size;
+            this->total_size.fetch_add(graph_size, std::memory_order_relaxed);
+            jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, graph_size);
+            jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, code_size);
+            jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, data_size);
             return Error::success();
         });
     }
 };
+
+// replace with [[maybe_unused]] when we get to C++17
+#ifdef _COMPILER_GCC_
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#ifdef _COMPILER_CLANG_
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+// TODO: Port our memory management optimisations to JITLink instead of using the
+// default InProcessMemoryManager.
+std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager() {
+#if JL_LLVM_VERSION < 150000
+    return cantFail(jitlink::InProcessMemoryManager::Create());
+#elif JL_LLVM_VERSION < 160000
+    return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper<orc::InProcessMemoryMapper>());
+#else
+    return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper<orc::InProcessMemoryMapper>(/*Reservation Granularity*/ 16 * 1024 * 1024));
+#endif
 }
 
-# ifdef LLVM_SHLIB
+#ifdef _COMPILER_CLANG_
+#pragma clang diagnostic pop
+#endif
 
-#  if JL_LLVM_VERSION >= 140000
-#   define EHFRAME_RANGE(name) orc::ExecutorAddrRange name
-#   define UNPACK_EHFRAME_RANGE(name) \
-        name.Start.toPtr<uint8_t *>(), \
-        static_cast<size_t>(name.size())
-#  else
-#   define EHFRAME_RANGE(name) JITTargetAddress name##Addr, size_t name##Size
-#   define UNPACK_EHFRAME_RANGE(name) \
-        jitTargetAddressToPointer<uint8_t *>(name##Addr), \
-        name##Size
-#  endif
+#ifdef _COMPILER_GCC_
+#pragma GCC diagnostic pop
+#endif
+}
 
 class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar {
 public:
-    Error registerEHFrames(EHFRAME_RANGE(EHFrameSection)) override {
-        register_eh_frames(
-            UNPACK_EHFRAME_RANGE(EHFrameSection));
+    Error registerEHFrames(orc::ExecutorAddrRange EHFrameSection) override {
+        register_eh_frames(EHFrameSection.Start.toPtr<uint8_t *>(), static_cast<size_t>(EHFrameSection.size()));
         return Error::success();
     }
 
-    Error deregisterEHFrames(EHFRAME_RANGE(EHFrameSection)) override {
-        deregister_eh_frames(
-            UNPACK_EHFRAME_RANGE(EHFrameSection));
+    Error deregisterEHFrames(orc::ExecutorAddrRange EHFrameSection) override {
+        deregister_eh_frames(EHFrameSection.Start.toPtr<uint8_t *>(), static_cast<size_t>(EHFrameSection.size()));
         return Error::success();
     }
 };
-# endif
-
-#else // !JL_USE_JITLINK
 
 RTDyldMemoryManager* createRTDyldMemoryManager(void);
 
@@ -705,6 +1055,13 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
                                      bool IsReadOnly) override {
         return MemMgr->allocateDataSection(Size, Alignment, SectionID, SectionName, IsReadOnly);
     }
+#if JL_LLVM_VERSION >= 160000
+    virtual void reserveAllocationSpace(uintptr_t CodeSize, Align CodeAlign,
+                                        uintptr_t RODataSize, Align RODataAlign,
+                                        uintptr_t RWDataSize, Align RWDataAlign) override {
+        return MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, RWDataSize, RWDataAlign);
+    }
+#else
     virtual void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
                                         uintptr_t RODataSize,
                                         uint32_t RODataAlign,
@@ -712,6 +1069,7 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
                                         uint32_t RWDataAlign) override {
         return MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, RWDataSize, RWDataAlign);
     }
+#endif
     virtual bool needsToReserveAllocationSpace() override {
         return MemMgr->needsToReserveAllocationSpace();
     }
@@ -782,31 +1140,24 @@ void registerRTDyldJITObject(const object::ObjectFile &Object,
 #endif
     );
 }
-#endif
 namespace {
-    std::unique_ptr<TargetMachine> createTargetMachine() {
-
+    static std::unique_ptr<TargetMachine> createTargetMachine() JL_NOTSAFEPOINT {
         TargetOptions options = TargetOptions();
-#if defined(_OS_WINDOWS_)
+
+        Triple TheTriple(sys::getProcessTriple());
         // use ELF because RuntimeDyld COFF i686 support didn't exist
         // use ELF because RuntimeDyld COFF X86_64 doesn't seem to work (fails to generate function pointers)?
-#define FORCE_ELF
+        bool force_elf = TheTriple.isOSWindows();
+#ifdef FORCE_ELF
+        force_elf = true;
 #endif
+        if (force_elf) {
+            TheTriple.setObjectFormat(Triple::ELF);
+        }
         //options.PrintMachineCode = true; //Print machine code produced during JIT compiling
-#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_) && JL_LLVM_VERSION < 130000
-        // tell Win32 to assume the stack is always 16-byte aligned,
-        // and to ensure that it is 16-byte aligned for out-going calls,
-        // to ensure compatibility with GCC codes
-        // In LLVM 13 and onwards this has turned into a module option
-        options.StackAlignmentOverride = 16;
-#endif
-#if defined(JL_DEBUG_BUILD) && JL_LLVM_VERSION < 130000
-        // LLVM defaults to tls stack guard, which causes issues with Julia's tls implementation
-        options.StackProtectorGuard = StackProtectorGuards::Global;
-#endif
-        Triple TheTriple(sys::getProcessTriple());
-#if defined(FORCE_ELF)
-        TheTriple.setObjectFormat(Triple::ELF);
+#if defined(MSAN_EMUTLS_WORKAROUND)
+        options.EmulatedTLS = true;
+        options.ExplicitEmulatedTLS = true;
 #endif
         uint32_t target_flags = 0;
         auto target = jl_get_llvm_target(imaging_default(), target_flags);
@@ -814,13 +1165,17 @@ namespace {
         SmallVector<std::string, 10> targetFeatures(target.second.begin(), target.second.end());
         std::string errorstr;
         const Target *TheTarget = TargetRegistry::lookupTarget("", TheTriple, errorstr);
-        if (!TheTarget)
-            jl_errorf("%s", errorstr.c_str());
+        if (!TheTarget) {
+            jl_errorf("Internal problem with process triple %s lookup: %s", TheTriple.str().c_str(), errorstr.c_str());
+            return nullptr;
+        }
         if (jl_processor_print_help || (target_flags & JL_TARGET_UNKNOWN_NAME)) {
             std::unique_ptr<MCSubtargetInfo> MSTI(
                 TheTarget->createMCSubtargetInfo(TheTriple.str(), "", ""));
-            if (!MSTI->isCPUStringValid(TheCPU))
+            if (!MSTI->isCPUStringValid(TheCPU)) {
                 jl_errorf("Invalid CPU name \"%s\".", TheCPU.c_str());
+                return nullptr;
+            }
             if (jl_processor_print_help) {
                 // This is the only way I can find to print the help message once.
                 // It'll be nice if we can iterate through the features and print our own help
@@ -845,6 +1200,8 @@ namespace {
 #else
             None;
 #endif
+        if (TheTriple.isAArch64())
+            codemodel = CodeModel::Small;
         auto optlevel = CodeGenOptLevelFor(jl_options.opt_level);
         auto TM = TheTarget->createTargetMachine(
                 TheTriple.getTriple(), TheCPU, FeaturesStr,
@@ -855,139 +1212,459 @@ namespace {
                 true // JIT
                 );
         assert(TM && "Failed to select target machine -"
-                                " Is the LLVM backend for this CPU enabled?");
-        #if (!defined(_CPU_ARM_) && !defined(_CPU_PPC64_))
-        // FastISel seems to be buggy for ARM. Ref #13321
-        if (jl_options.opt_level < 2)
-            TM->setFastISel(true);
-        #endif
+                     " Is the LLVM backend for this CPU enabled?");
+        fixupTM(*TM);
         return std::unique_ptr<TargetMachine>(TM);
     }
 } // namespace
 
 namespace {
 
-    typedef legacy::PassManager PassManager;
+    typedef NewPM PassManager;
 
-    orc::JITTargetMachineBuilder createJTMBFromTM(TargetMachine &TM, int optlevel) {
+    orc::JITTargetMachineBuilder createJTMBFromTM(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT {
         return orc::JITTargetMachineBuilder(TM.getTargetTriple())
-        .setCPU(TM.getTargetCPU().str())
-        .setFeatures(TM.getTargetFeatureString())
-        .setOptions(TM.Options)
-        .setRelocationModel(Reloc::Static)
-        .setCodeModel(TM.getCodeModel())
-        .setCodeGenOptLevel(CodeGenOptLevelFor(optlevel));
+            .setCPU(TM.getTargetCPU().str())
+            .setFeatures(TM.getTargetFeatureString())
+            .setOptions(TM.Options)
+            .setRelocationModel(Reloc::Static)
+            .setCodeModel(TM.getCodeModel())
+            .setCodeGenOptLevel(CodeGenOptLevelFor(optlevel));
     }
 
     struct TMCreator {
         orc::JITTargetMachineBuilder JTMB;
 
-        TMCreator(TargetMachine &TM, int optlevel) : JTMB(createJTMBFromTM(TM, optlevel)) {}
+        TMCreator(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
+            : JTMB(createJTMBFromTM(TM, optlevel)) {}
 
-        std::unique_ptr<TargetMachine> operator()() {
-            return cantFail(JTMB.createTargetMachine());
+        std::unique_ptr<TargetMachine> operator()() JL_NOTSAFEPOINT {
+            auto TM = cantFail(JTMB.createTargetMachine());
+            fixupTM(*TM);
+            return TM;
         }
     };
 
     struct PMCreator {
-        std::unique_ptr<TargetMachine> TM;
-        int optlevel;
-        PMCreator(TargetMachine &TM, int optlevel) : TM(cantFail(createJTMBFromTM(TM, optlevel).createTargetMachine())), optlevel(optlevel) {}
-        PMCreator(const PMCreator &other) : PMCreator(*other.TM, other.optlevel) {}
-        PMCreator(PMCreator &&other) : TM(std::move(other.TM)), optlevel(other.optlevel) {}
-        friend void swap(PMCreator &self, PMCreator &other) {
-            using std::swap;
-            swap(self.TM, other.TM);
-            swap(self.optlevel, other.optlevel);
-        }
-        PMCreator &operator=(PMCreator other) {
-            swap(*this, other);
-            return *this;
-        }
-        std::unique_ptr<PassManager> operator()() {
-            auto PM = std::make_unique<legacy::PassManager>();
-            addTargetPasses(PM.get(), TM->getTargetTriple(), TM->getTargetIRAnalysis());
-            addOptimizationPasses(PM.get(), optlevel);
-            addMachinePasses(PM.get(), optlevel);
-            return PM;
+        orc::JITTargetMachineBuilder JTMB;
+        OptimizationLevel O;
+        SmallVector<std::function<void()>, 0> &printers;
+        std::mutex &llvm_printing_mutex;
+        PMCreator(TargetMachine &TM, int optlevel, SmallVector<std::function<void()>, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT
+            : JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers), llvm_printing_mutex(llvm_printing_mutex) {}
+
+        auto operator()() JL_NOTSAFEPOINT {
+            auto TM = cantFail(JTMB.createTargetMachine());
+            fixupTM(*TM);
+            auto NPM = std::make_unique<NewPM>(std::move(TM), O);
+            // TODO this needs to be locked, as different resource pools may add to the printer vector at the same time
+            {
+                std::lock_guard<std::mutex> lock(llvm_printing_mutex);
+                printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT {
+                    NPM->printTimers();
+                });
+            }
+            return NPM;
         }
     };
 
+    template<size_t N>
     struct OptimizerT {
-        OptimizerT(TargetMachine &TM, int optlevel) : optlevel(optlevel), PMs(PMCreator(TM, optlevel)) {}
+        OptimizerT(TargetMachine &TM, SmallVector<std::function<void()>, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT {
+            for (size_t i = 0; i < N; i++) {
+                PMs[i] = std::make_unique<JuliaOJIT::ResourcePool<std::unique_ptr<PassManager>>>(PMCreator(TM, i, printers, llvm_printing_mutex));
+            }
+        }
+
+        OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+            TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
+                auto PoolIdx = cast<ConstantInt>(cast<ConstantAsMetadata>(M.getModuleFlag("julia.optlevel"))->getValue())->getZExtValue();
+                assert(PoolIdx < N && "Invalid optimization pool index");
 
-        OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
-            TSM.withModuleDo([&](Module &M) {
                 uint64_t start_time = 0;
+
+                struct Stat {
+                    std::string name;
+                    uint64_t insts;
+                    uint64_t bbs;
+
+                    void dump(ios_t *stream) JL_NOTSAFEPOINT {
+                        ios_printf(stream, "    \"%s\":\n", name.c_str());
+                        ios_printf(stream, "        instructions: %u\n", insts);
+                        ios_printf(stream, "        basicblocks: %zd\n", bbs);
+                    }
+
+                    Stat(Function &F) JL_NOTSAFEPOINT : name(F.getName().str()), insts(F.getInstructionCount()), bbs(countBasicBlocks(F)) {}
+
+                    ~Stat() JL_NOTSAFEPOINT = default;
+                };
+                SmallVector<Stat, 8> before_stats;
                 {
-                    auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream();
-                    if (stream) {
-                        // Print LLVM function statistics _before_ optimization
-                        // Print all the information about this invocation as a YAML object
-                        jl_printf(stream, "- \n");
-                        // We print the name and some statistics for each function in the module, both
-                        // before optimization and again afterwards.
-                        jl_printf(stream, "  before: \n");
+                    if (*jl_ExecutionEngine->get_dump_llvm_opt_stream()) {
                         for (auto &F : M.functions()) {
                             if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
                                 continue;
                             }
                             // Each function is printed as a YAML object with several attributes
-                            jl_printf(stream, "    \"%s\":\n", F.getName().str().c_str());
-                            jl_printf(stream, "        instructions: %u\n", F.getInstructionCount());
-                            jl_printf(stream, "        basicblocks: %zd\n", countBasicBlocks(F));
+                            before_stats.emplace_back(F);
                         }
 
                         start_time = jl_hrtime();
                     }
                 }
 
-                JL_TIMING(LLVM_OPT);
-
-                //Run the optimization
-                (***PMs).run(M);
+                {
+                    JL_TIMING(LLVM_JIT, JIT_Opt);
+                    //Run the optimization
+                    (****PMs[PoolIdx]).run(M);
+                    assert(!verifyLLVMIR(M));
+                }
 
-                uint64_t end_time = 0;
                 {
-                    auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream();
-                    if (stream) {
-                        end_time = jl_hrtime();
-                        jl_printf(stream, "  time_ns: %" PRIu64 "\n", end_time - start_time);
-                        jl_printf(stream, "  optlevel: %d\n", optlevel);
+                    // Print optimization statistics as a YAML object
+                    // Looks like:
+                    // -
+                    //   before:
+                    //     "foo":
+                    //       instructions: uint64
+                    //       basicblocks: uint64
+                    //    "bar":
+                    //       instructions: uint64
+                    //       basicblocks: uint64
+                    //   time_ns: uint64
+                    //   optlevel: int
+                    //   after:
+                    //     "foo":
+                    //       instructions: uint64
+                    //       basicblocks: uint64
+                    //    "bar":
+                    //       instructions: uint64
+                    //       basicblocks: uint64
+                    if (auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream()) {
+                        uint64_t end_time = jl_hrtime();
+                        ios_printf(stream, "- \n");
+
+                        // Print LLVM function statistic _before_ optimization
+                        ios_printf(stream, "  before: \n");
+                        for (auto &s : before_stats) {
+                            s.dump(stream);
+                        }
+                        ios_printf(stream, "  time_ns: %" PRIu64 "\n", end_time - start_time);
+                        ios_printf(stream, "  optlevel: %d\n", PoolIdx);
 
                         // Print LLVM function statistics _after_ optimization
-                        jl_printf(stream, "  after: \n");
+                        ios_printf(stream, "  after: \n");
                         for (auto &F : M.functions()) {
                             if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
                                 continue;
                             }
-                            jl_printf(stream, "    \"%s\":\n", F.getName().str().c_str());
-                            jl_printf(stream, "        instructions: %u\n", F.getInstructionCount());
-                            jl_printf(stream, "        basicblocks: %zd\n", countBasicBlocks(F));
+                            Stat(F).dump(stream);
                         }
                     }
                 }
+                ++ModulesOptimized;
+                switch (PoolIdx) {
+                    case 0:
+                        ++OptO0;
+                        break;
+                    case 1:
+                        ++OptO1;
+                        break;
+                    case 2:
+                        ++OptO2;
+                        break;
+                    case 3:
+                        ++OptO3;
+                        break;
+                    default:
+                        // Change this if we ever gain other optlevels
+                        llvm_unreachable("optlevel is between 0 and 3!");
+                }
             });
             return Expected<orc::ThreadSafeModule>{std::move(TSM)};
         }
     private:
-        int optlevel;
-        JuliaOJIT::ResourcePool<std::unique_ptr<PassManager>> PMs;
+        std::array<std::unique_ptr<JuliaOJIT::ResourcePool<std::unique_ptr<PassManager>>>, N> PMs;
     };
 
+    template<size_t N>
     struct CompilerT : orc::IRCompileLayer::IRCompiler {
 
-        CompilerT(orc::IRSymbolMapper::ManglingOptions MO, TargetMachine &TM, int optlevel)
-        : orc::IRCompileLayer::IRCompiler(MO), TMs(TMCreator(TM, optlevel)) {}
+        CompilerT(orc::IRSymbolMapper::ManglingOptions MO, TargetMachine &TM) JL_NOTSAFEPOINT
+            : orc::IRCompileLayer::IRCompiler(MO) {
+            for (size_t i = 0; i < N; ++i) {
+                TMs[i] = std::make_unique<JuliaOJIT::ResourcePool<std::unique_ptr<TargetMachine>>>(TMCreator(TM, i));
+            }
+        }
 
         Expected<std::unique_ptr<MemoryBuffer>> operator()(Module &M) override {
-            return orc::SimpleCompiler(***TMs)(M);
+            JL_TIMING(LLVM_JIT, JIT_Compile);
+            size_t PoolIdx;
+            if (auto opt_level = M.getModuleFlag("julia.optlevel")) {
+                PoolIdx = cast<ConstantInt>(cast<ConstantAsMetadata>(opt_level)->getValue())->getZExtValue();
+            } else {
+                PoolIdx = jl_options.opt_level;
+            }
+            assert(PoolIdx < N && "Invalid optimization level for compiler!");
+            return orc::SimpleCompiler(****TMs[PoolIdx])(M);
+        }
+
+        std::array<std::unique_ptr<JuliaOJIT::ResourcePool<std::unique_ptr<TargetMachine>>>, N> TMs;
+    };
+
+    struct JITPointersT {
+
+        JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT
+            : SharedBytes(SharedBytes), Lock(Lock) {}
+
+        Expected<orc::ThreadSafeModule> operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
+            TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
+                std::lock_guard<std::mutex> locked(Lock);
+                for (auto &GV : make_early_inc_range(M.globals())) {
+                    if (auto *Shared = getSharedBytes(GV)) {
+                        ++InternedGlobals;
+                        GV.replaceAllUsesWith(Shared);
+                        GV.eraseFromParent();
+                    }
+                }
+
+                // Windows needs some inline asm to help
+                // build unwind tables
+                jl_decorate_module(M);
+            });
+            return std::move(TSM);
+        }
+
+    private:
+        // optimize memory by turning long strings into memoized copies, instead of
+        // making a copy per object file of output.
+        // we memoize them using a StringSet with a custom-alignment allocator
+        // to ensure they are properly aligned
+        Constant *getSharedBytes(GlobalVariable &GV) JL_NOTSAFEPOINT {
+            // We could probably technically get away with
+            // interning even external linkage globals,
+            // as long as they have global unnamedaddr,
+            // but currently we shouldn't be emitting those
+            // except in imaging mode, and we don't want to
+            // do this optimization there.
+            if (GV.hasExternalLinkage() || !GV.hasGlobalUnnamedAddr()) {
+                return nullptr;
+            }
+            if (!GV.hasInitializer()) {
+                return nullptr;
+            }
+            if (!GV.isConstant()) {
+                return nullptr;
+            }
+            auto CDS = dyn_cast<ConstantDataSequential>(GV.getInitializer());
+            if (!CDS) {
+                return nullptr;
+            }
+            StringRef Data = CDS->getRawDataValues();
+            if (Data.size() < 16) {
+                // Cutoff, since we don't want to intern small strings
+                return nullptr;
+            }
+            Align Required = GV.getAlign().valueOrOne();
+            Align Preferred = MaxAlignedAlloc::alignment(Data.size());
+            if (Required > Preferred)
+                return nullptr;
+            StringRef Interned = SharedBytes.insert(Data).first->getKey();
+            assert(llvm::isAddrAligned(Preferred, Interned.data()));
+            return literal_static_pointer_val(Interned.data(), GV.getType());
         }
 
-        JuliaOJIT::ResourcePool<std::unique_ptr<TargetMachine>> TMs;
+        SharedBytesT &SharedBytes;
+        std::mutex &Lock;
     };
 }
 
+
+struct JuliaOJIT::DLSymOptimizer {
+    DLSymOptimizer(bool named) JL_NOTSAFEPOINT {
+        this->named = named;
+#define INIT_RUNTIME_LIBRARY(libname, handle) \
+        do { \
+            auto libidx = (uintptr_t) libname; \
+            if (libidx >= runtime_symbols.size()) { \
+                runtime_symbols.resize(libidx + 1); \
+            } \
+            runtime_symbols[libidx].first = handle; \
+        } while (0)
+
+        INIT_RUNTIME_LIBRARY(NULL, jl_RTLD_DEFAULT_handle);
+        INIT_RUNTIME_LIBRARY(JL_EXE_LIBNAME, jl_exe_handle);
+        INIT_RUNTIME_LIBRARY(JL_LIBJULIA_INTERNAL_DL_LIBNAME, jl_libjulia_internal_handle);
+        INIT_RUNTIME_LIBRARY(JL_LIBJULIA_DL_LIBNAME, jl_libjulia_handle);
+
+#undef INIT_RUNTIME_LIBRARY
+    }
+
+    void *lookup_symbol(void *libhandle, const char *fname) {
+        void *addr;
+        jl_dlsym(libhandle, fname, &addr, 0);
+        return addr;
+    }
+
+    void *lookup(const char *libname, const char *fname) {
+        StringRef lib(libname);
+        StringRef f(fname);
+        std::lock_guard<std::mutex> lock(symbols_mutex);
+        auto uit = user_symbols.find(lib);
+        if (uit == user_symbols.end()) {
+            void *handle = jl_get_library_(libname, 0);
+            if (!handle)
+                return nullptr;
+            uit = user_symbols.insert(std::make_pair(lib, std::make_pair(handle, StringMap<void*>()))).first;
+        }
+        auto &symmap = uit->second.second;
+        auto it = symmap.find(f);
+        if (it != symmap.end()) {
+            return it->second;
+        }
+        void *handle = lookup_symbol(uit->second.first, fname);
+        symmap[f] = handle;
+        return handle;
+    }
+
+    void *lookup(uintptr_t libidx, const char *fname) {
+        std::lock_guard<std::mutex> lock(symbols_mutex);
+        runtime_symbols.resize(std::max(runtime_symbols.size(), libidx + 1));
+        auto it = runtime_symbols[libidx].second.find(fname);
+        if (it != runtime_symbols[libidx].second.end()) {
+            return it->second;
+        }
+        auto handle = lookup_symbol(runtime_symbols[libidx].first, fname);
+        runtime_symbols[libidx].second[fname] = handle;
+        return handle;
+    }
+
+    void operator()(Module &M) {
+        for (auto &GV : M.globals()) {
+            auto Name = GV.getName();
+            if (Name.startswith("jlplt") && Name.endswith("got")) {
+                auto fname = GV.getAttribute("julia.fname").getValueAsString().str();
+                void *addr;
+                if (GV.hasAttribute("julia.libname")) {
+                    auto libname = GV.getAttribute("julia.libname").getValueAsString().str();
+                    addr = lookup(libname.data(), fname.data());
+                } else {
+                    assert(GV.hasAttribute("julia.libidx") && "PLT entry should have either libname or libidx attribute!");
+                    auto libidx = (uintptr_t)std::stoull(GV.getAttribute("julia.libidx").getValueAsString().str());
+                    addr = lookup(libidx, fname.data());
+                }
+                if (addr) {
+                    Function *Thunk = nullptr;
+                    if (!GV.isDeclaration()) {
+                        Thunk = cast<Function>(GV.getInitializer()->stripPointerCasts());
+                        assert(++Thunk->uses().begin() == Thunk->uses().end() && "Thunk should only have one use in PLT initializer!");
+                        assert(Thunk->hasLocalLinkage() && "Thunk should not have non-local linkage!");
+                    } else {
+                        GV.setLinkage(GlobalValue::PrivateLinkage);
+                    }
+                    auto init = ConstantExpr::getIntToPtr(ConstantInt::get(M.getDataLayout().getIntPtrType(M.getContext()), (uintptr_t)addr), GV.getValueType());
+                    if (named) {
+                        auto T = GV.getValueType();
+                        assert(T->isPointerTy());
+                        if (!T->isOpaquePointerTy()) {
+                            T = T->getNonOpaquePointerElementType();
+                        }
+                        init = GlobalAlias::create(T, 0, GlobalValue::PrivateLinkage, GV.getName() + ".jit", init, &M);
+                    }
+                    GV.setInitializer(init);
+                    GV.setConstant(true);
+                    GV.setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+                    if (Thunk) {
+                        Thunk->eraseFromParent();
+                    }
+                }
+            }
+        }
+
+        for (auto &F : M) {
+            for (auto &BB : F) {
+                SmallVector<Instruction *, 0> to_delete;
+                for (auto &I : make_early_inc_range(BB)) {
+                    auto CI = dyn_cast<CallInst>(&I);
+                    if (!CI)
+                        continue;
+                    auto Callee = CI->getCalledFunction();
+                    if (!Callee || Callee->getName() != XSTR(jl_load_and_lookup))
+                        continue;
+                    // Long-winded way of extracting fname without needing a second copy in an attribute
+                    auto fname = cast<ConstantDataArray>(cast<GlobalVariable>(CI->getArgOperand(1)->stripPointerCasts())->getInitializer())->getAsCString();
+                    auto libarg = CI->getArgOperand(0)->stripPointerCasts();
+                    // Should only use in store and phi node
+                    // Note that this uses the raw output of codegen,
+                    // which is why we can assume this
+                    assert(++++CI->use_begin() == CI->use_end());
+                    void *addr;
+                    if (auto GV = dyn_cast<GlobalVariable>(libarg)) {
+                        // Can happen if the library is the empty string, just give up when that happens
+                        if (isa<ConstantAggregateZero>(GV->getInitializer()))
+                            continue;
+                        auto libname = cast<ConstantDataArray>(GV->getInitializer())->getAsCString();
+                        addr = lookup(libname.data(), fname.data());
+                    } else {
+                        // Can happen if we fail the compile time dlfind i.e when we try a symbol that doesn't exist in libc
+                        if (dyn_cast<ConstantPointerNull>(libarg))
+                            continue;
+                        assert(cast<ConstantExpr>(libarg)->getOpcode() == Instruction::IntToPtr && "libarg should be either a global variable or a integer index!");
+                        libarg = cast<ConstantExpr>(libarg)->getOperand(0);
+                        auto libidx = cast<ConstantInt>(libarg)->getZExtValue();
+                        addr = lookup(libidx, fname.data());
+                    }
+                    if (addr) {
+                        auto init = ConstantExpr::getIntToPtr(ConstantInt::get(M.getDataLayout().getIntPtrType(M.getContext()), (uintptr_t)addr), CI->getType());
+                        if (named) {
+                            auto T = CI->getType();
+                            assert(T->isPointerTy());
+                            if (!T->isOpaquePointerTy()) {
+                                T = T->getNonOpaquePointerElementType();
+                            }
+                            init = GlobalAlias::create(T, 0, GlobalValue::PrivateLinkage, CI->getName() + ".jit", init, &M);
+                        }
+                        // DCE and SimplifyCFG will kill the branching structure around
+                        // the call, so we don't need to worry about removing everything
+                        for (auto user : make_early_inc_range(CI->users())) {
+                            if (auto SI = dyn_cast<StoreInst>(user)) {
+                                to_delete.push_back(SI);
+                            } else {
+                                auto PHI = cast<PHINode>(user);
+                                PHI->replaceAllUsesWith(init);
+                                to_delete.push_back(PHI);
+                            }
+                        }
+                        to_delete.push_back(CI);
+                    }
+                }
+                for (auto I : to_delete) {
+                    I->eraseFromParent();
+                }
+            }
+        }
+    }
+
+    std::mutex symbols_mutex;
+    StringMap<std::pair<void *, StringMap<void *>>> user_symbols;
+    SmallVector<std::pair<void *, StringMap<void *>>, 0> runtime_symbols;
+    bool named;
+};
+
+void optimizeDLSyms(Module &M) {
+    JuliaOJIT::DLSymOptimizer(true)(M);
+}
+
+void fixupTM(TargetMachine &TM){
+    auto TheTriple = TM.getTargetTriple();
+    if (jl_options.opt_level < 2) {
+        if (!TheTriple.isARM() && !TheTriple.isPPC64() && !TheTriple.isAArch64())
+            TM.setFastISel(true);
+        else    // FastISel seems to be buggy Ref #13321
+            TM.setFastISel(false);
+    }
+}
+
 llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
     // Mark our address spaces as non-integral
     auto jl_data_layout = TM.createDataLayout();
@@ -995,36 +1672,31 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
     return jl_data_layout;
 }
 
-JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel)
-: CompileLayer(BaseLayer.getExecutionSession(), BaseLayer,
-    std::make_unique<CompilerT>(orc::irManglingOptionsFromTargetOptions(TM.Options), TM, optlevel)),
-  OptimizeLayer(CompileLayer.getExecutionSession(), CompileLayer, OptimizerT(TM, optlevel)) {}
+#ifdef _COMPILER_ASAN_ENABLED_
+int64_t ___asan_globals_registered;
+#endif
 
 JuliaOJIT::JuliaOJIT()
   : TM(createTargetMachine()),
     DL(jl_create_datalayout(*TM)),
-#if JL_LLVM_VERSION >= 130000
     ES(cantFail(orc::SelfExecutorProcessControl::Create())),
-#else
-    ES(),
-#endif
     GlobalJD(ES.createBareJITDylib("JuliaGlobals")),
     JD(ES.createBareJITDylib("JuliaOJIT")),
+    ExternalJD(ES.createBareJITDylib("JuliaExternal")),
+    DLSymOpt(std::make_unique<DLSymOptimizer>(false)),
     ContextPool([](){
         auto ctx = std::make_unique<LLVMContext>();
-#ifdef JL_LLVM_OPAQUE_POINTERS
-        ctx->enableOpaquePointers();
+        if (!ctx->hasSetOpaquePointersValue())
+#ifndef JL_LLVM_OPAQUE_POINTERS
+            ctx->setOpaquePointers(false);
+#else
+            ctx->setOpaquePointers(true);
 #endif
         return orc::ThreadSafeContext(std::move(ctx));
     }),
 #ifdef JL_USE_JITLINK
-    // TODO: Port our memory management optimisations to JITLink instead of using the
-    // default InProcessMemoryManager.
-# if JL_LLVM_VERSION < 140000
-    ObjectLayer(ES, std::make_unique<jitlink::InProcessMemoryManager>()),
-# else
-    ObjectLayer(ES, cantFail(jitlink::InProcessMemoryManager::Create())),
-# endif
+    MemMgr(createJITLinkMemoryManager()),
+    ObjectLayer(ES, *MemMgr),
 #else
     MemMgr(createRTDyldMemoryManager()),
     ObjectLayer(
@@ -1035,16 +1707,17 @@ JuliaOJIT::JuliaOJIT()
             }
         ),
 #endif
-    Pipelines{
-        std::make_unique<PipelineT>(ObjectLayer, *TM, 0),
-        std::make_unique<PipelineT>(ObjectLayer, *TM, 1),
-        std::make_unique<PipelineT>(ObjectLayer, *TM, 2),
-        std::make_unique<PipelineT>(ObjectLayer, *TM, 3),
-    },
-    OptSelLayer(Pipelines)
+    LockLayer(ObjectLayer),
+    CompileLayer(ES, LockLayer, std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)),
+    JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(SharedBytes, RLST_mutex))),
+    OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT<N_optlevels>(*TM, PrintLLVMTimers, llvm_printing_mutex))),
+    OptSelLayer(ES, OptimizeLayer, orc::IRTransformLayer::TransformFunction(selectOptLevel)),
+    DepsVerifyLayer(ES, OptSelLayer, orc::IRTransformLayer::TransformFunction(validateExternRelocations)),
+    ExternalCompileLayer(ES, LockLayer,
+        std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM))
 {
 #ifdef JL_USE_JITLINK
-# if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
+# if defined(LLVM_SHLIB)
     // When dynamically linking against LLVM, use our custom EH frame registration code
     // also used with RTDyld to inform both our and the libc copy of libunwind.
     auto ehRegistrar = std::make_unique<JLEHFrameRegistrar>();
@@ -1055,6 +1728,7 @@ JuliaOJIT::JuliaOJIT()
         ES, std::move(ehRegistrar)));
 
     ObjectLayer.addPlugin(std::make_unique<JLDebuginfoPlugin>());
+    ObjectLayer.addPlugin(std::make_unique<JLMemoryUsagePlugin>(total_size));
 #else
     ObjectLayer.setNotifyLoaded(
         [this](orc::MaterializationResponsibility &MR,
@@ -1063,28 +1737,41 @@ JuliaOJIT::JuliaOJIT()
             registerRTDyldJITObject(Object, LO, MemMgr);
         });
 #endif
+    CompileLayer.setNotifyCompiled(recordDebugTSM);
+
+    std::string ErrorStr;
+
+    // Make sure that libjulia-internal is loaded and placed first in the
+    // DynamicLibrary order so that calls to runtime intrinsics are resolved
+    // to the correct library when multiple libjulia-*'s have been loaded
+    // (e.g. when we `ccall` into a PackageCompiler.jl-created shared library)
+    sys::DynamicLibrary libjulia_internal_dylib = sys::DynamicLibrary::addPermanentLibrary(
+      jl_libjulia_internal_handle, &ErrorStr);
+    if(!ErrorStr.empty())
+        report_fatal_error(llvm::Twine("FATAL: unable to dlopen libjulia-internal\n") + ErrorStr);
 
     // Make sure SectionMemoryManager::getSymbolAddressInProcess can resolve
     // symbols in the program as well. The nullptr argument to the function
     // tells DynamicLibrary to load the program, not a library.
-    std::string ErrorStr;
     if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr, &ErrorStr))
         report_fatal_error(llvm::Twine("FATAL: unable to dlopen self\n") + ErrorStr);
 
+    GlobalJD.addGenerator(
+      std::make_unique<orc::DynamicLibrarySearchGenerator>(
+        libjulia_internal_dylib,
+        DL.getGlobalPrefix(),
+        orc::DynamicLibrarySearchGenerator::SymbolPredicate()));
+
     GlobalJD.addGenerator(
       cantFail(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
         DL.getGlobalPrefix())));
 
     // Resolve non-lock free atomic functions in the libatomic1 library.
     // This is the library that provides support for c11/c++11 atomic operations.
-    const char *const libatomic =
-#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
-        "libatomic.so.1";
-#elif defined(_OS_WINDOWS_)
-        "libatomic-1.dll";
-#else
-        NULL;
-#endif
+    auto TT = getTargetTriple();
+    const char *const libatomic = TT.isOSLinux() || TT.isOSFreeBSD() ?
+        "libatomic.so.1" : TT.isOSWindows() ?
+        "libatomic-1.dll" : nullptr;
     if (libatomic) {
         static void *atomic_hdl = jl_load_dynamic_library(libatomic, JL_RTLD_LOCAL, 0);
         if (atomic_hdl != NULL) {
@@ -1100,60 +1787,141 @@ JuliaOJIT::JuliaOJIT()
     }
 
     JD.addToLinkOrder(GlobalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly);
+    JD.addToLinkOrder(ExternalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly);
+    ExternalJD.addToLinkOrder(GlobalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly);
+    ExternalJD.addToLinkOrder(JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly);
+
+    orc::SymbolAliasMap jl_crt = {
+        // Float16 conversion routines
+        { mangle("__gnu_h2f_ieee"), { mangle("julia__gnu_h2f_ieee"), JITSymbolFlags::Exported } },
+        { mangle("__extendhfsf2"),  { mangle("julia__gnu_h2f_ieee"), JITSymbolFlags::Exported } },
+        { mangle("__gnu_f2h_ieee"), { mangle("julia__gnu_f2h_ieee"), JITSymbolFlags::Exported } },
+        { mangle("__truncsfhf2"),   { mangle("julia__gnu_f2h_ieee"), JITSymbolFlags::Exported } },
+        { mangle("__truncdfhf2"),   { mangle("julia__truncdfhf2"),   JITSymbolFlags::Exported } },
+
+        // BFloat16 conversion routines
+        { mangle("__truncsfbf2"),   { mangle("julia__truncsfbf2"),   JITSymbolFlags::Exported } },
+        { mangle("__truncdfbf2"),   { mangle("julia__truncdfbf2"),   JITSymbolFlags::Exported } },
+    };
+    cantFail(GlobalJD.define(orc::symbolAliases(jl_crt)));
+
+#ifdef MSAN_EMUTLS_WORKAROUND
+    orc::SymbolMap msan_crt;
+    msan_crt[mangle("__emutls_get_address")] = JITEvaluatedSymbol::fromPointer(msan_workaround::getTLSAddress, JITSymbolFlags::Exported);
+    msan_crt[mangle("__emutls_v.__msan_param_tls")] = JITEvaluatedSymbol::fromPointer(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param)), JITSymbolFlags::Exported);
+    msan_crt[mangle("__emutls_v.__msan_param_origin_tls")] = JITEvaluatedSymbol::fromPointer(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::param_origin)), JITSymbolFlags::Exported);
+    msan_crt[mangle("__emutls_v.__msan_retval_tls")] = JITEvaluatedSymbol::fromPointer(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::retval)), JITSymbolFlags::Exported);
+    msan_crt[mangle("__emutls_v.__msan_retval_origin_tls")] = JITEvaluatedSymbol::fromPointer(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::retval_origin)), JITSymbolFlags::Exported);
+    msan_crt[mangle("__emutls_v.__msan_va_arg_tls")] = JITEvaluatedSymbol::fromPointer(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg)), JITSymbolFlags::Exported);
+    msan_crt[mangle("__emutls_v.__msan_va_arg_origin_tls")] = JITEvaluatedSymbol::fromPointer(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_origin)), JITSymbolFlags::Exported);
+    msan_crt[mangle("__emutls_v.__msan_va_arg_overflow_size_tls")] = JITEvaluatedSymbol::fromPointer(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::va_arg_overflow_size)), JITSymbolFlags::Exported);
+    msan_crt[mangle("__emutls_v.__msan_origin_tls")] = JITEvaluatedSymbol::fromPointer(
+        reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::origin)), JITSymbolFlags::Exported);
+    cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt)));
+#endif
+#ifdef _COMPILER_ASAN_ENABLED_
+    orc::SymbolMap asan_crt;
+    asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&___asan_globals_registered, JITSymbolFlags::Exported);
+    cantFail(JD.define(orc::absoluteSymbols(asan_crt)));
+#endif
 }
 
-void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr)
+JuliaOJIT::~JuliaOJIT() = default;
+
+orc::SymbolStringPtr JuliaOJIT::mangle(StringRef Name)
 {
     std::string MangleName = getMangledName(Name);
-    cantFail(JD.define(orc::absoluteSymbols({{ES.intern(MangleName), JITEvaluatedSymbol::fromPointer((void*)Addr)}})));
+    return ES.intern(MangleName);
+}
+
+void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr)
+{
+    cantFail(JD.define(orc::absoluteSymbols({{mangle(Name), JITEvaluatedSymbol::fromPointer((void*)Addr)}})));
 }
 
 void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
 {
-    JL_TIMING(LLVM_MODULE_FINISH);
-    std::vector<std::string> NewExports;
-    TSM.withModuleDo([&](Module &M) {
-        jl_decorate_module(M);
-        shareStrings(M);
+    JL_TIMING(LLVM_JIT, JIT_Total);
+    ++ModulesAdded;
+    orc::SymbolLookupSet NewExports;
+    orc::ThreadSafeModule CurrentlyCompiling;
+    TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
         for (auto &F : M.global_values()) {
             if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-                NewExports.push_back(getMangledName(F.getName()));
+                auto Name = ES.intern(getMangledName(F.getName()));
+                NewExports.add(std::move(Name));
             }
         }
-#if !defined(JL_NDEBUG) && !defined(JL_USE_JITLINK)
-        // validate the relocations for M (not implemented for the JITLink memory manager yet)
-        for (Module::global_object_iterator I = M.global_objects().begin(), E = M.global_objects().end(); I != E; ) {
-            GlobalObject *F = &*I;
-            ++I;
-            if (F->isDeclaration()) {
-                if (F->use_empty())
-                    F->eraseFromParent();
-                else if (!((isa<Function>(F) && isIntrinsicFunction(cast<Function>(F))) ||
-                        findUnmangledSymbol(F->getName()) ||
-                        SectionMemoryManager::getSymbolAddressInProcess(
-                            getMangledName(F->getName())))) {
-                    llvm::errs() << "FATAL ERROR: "
-                                << "Symbol \"" << F->getName().str() << "\""
-                                << "not found";
-                    abort();
-                }
-            }
-        }
-#endif
+        assert(!verifyLLVMIR(M));
+        auto jit_debug_tsm_addr = ConstantInt::get(Type::getIntNTy(M.getContext(), sizeof(void*) * CHAR_BIT), (uintptr_t) &CurrentlyCompiling);
+        M.addModuleFlag(Module::Error, "julia.__jit_debug_tsm_addr", jit_debug_tsm_addr);
     });
+
     // TODO: what is the performance characteristics of this?
-    cantFail(OptSelLayer.add(JD, std::move(TSM)));
+    auto Err = DepsVerifyLayer.add(JD, std::move(TSM));
+    if (Err) {
+        ES.reportError(std::move(Err));
+        errs() << "Failed to add module to JIT!\n";
+        if (CurrentlyCompiling) {
+            CurrentlyCompiling.withModuleDo([](Module &M) JL_NOTSAFEPOINT { errs() << "Dumping failing module\n" << M << "\n"; });
+        } else {
+            errs() << "Module unavailable to be printed\n";
+        }
+        abort();
+    }
     // force eager compilation (for now), due to memory management specifics
     // (can't handle compilation recursion)
-    for (auto Name : NewExports)
-        cantFail(ES.lookup({&JD}, Name));
+    auto Lookups = ES.lookup({{&JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly}}, NewExports);
+    if (!Lookups) {
+        ES.reportError(Lookups.takeError());
+        errs() << "Failed to lookup symbols in module!";
+        if (CurrentlyCompiling) {
+            CurrentlyCompiling.withModuleDo([](Module &M) JL_NOTSAFEPOINT { errs() << "Dumping failing module\n" << M << "\n"; });
+        } else {
+            errs() << "Module unavailable to be printed\n";
+        }
+    }
+    for (auto &Sym : *Lookups) {
+        assert(Sym.second);
+        (void) Sym;
+    }
+}
+
+Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, bool ShouldOptimize)
+{
+    if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error
+            {
+            if (M.getDataLayout().isDefault())
+                M.setDataLayout(DL);
+            if (M.getDataLayout() != DL)
+                return make_error<StringError>(
+                    "Added modules have incompatible data layouts: " +
+                    M.getDataLayout().getStringRepresentation() + " (module) vs " +
+                    DL.getStringRepresentation() + " (jit)",
+                inconvertibleErrorCode());
+
+            return Error::success();
+            }))
+        return Err;
+    return ExternalCompileLayer.add(JD.getDefaultResourceTracker(), std::move(TSM));
+}
 
+Error JuliaOJIT::addObjectFile(orc::JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
+    assert(Obj && "Can not add null object");
+    return LockLayer.add(JD.getDefaultResourceTracker(), std::move(Obj));
 }
 
 JL_JITSymbol JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly)
 {
-    orc::JITDylib* SearchOrders[2] = {&GlobalJD, &JD};
-    ArrayRef<orc::JITDylib*> SearchOrder = makeArrayRef(&SearchOrders[ExportedSymbolsOnly ? 0 : 1], ExportedSymbolsOnly ? 2 : 1);
+    orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD};
+    ArrayRef<orc::JITDylib*> SearchOrder = ArrayRef<orc::JITDylib*>(&SearchOrders[0], ExportedSymbolsOnly ? 3 : 1);
     auto Sym = ES.lookup(SearchOrder, Name);
     if (Sym)
         return *Sym;
@@ -1165,6 +1933,14 @@ JL_JITSymbol JuliaOJIT::findUnmangledSymbol(StringRef Name)
     return findSymbol(getMangledName(Name), true);
 }
 
+Expected<JITEvaluatedSymbol> JuliaOJIT::findExternalJDSymbol(StringRef Name, bool ExternalJDOnly)
+{
+    orc::JITDylib* SearchOrders[3] = {&ExternalJD, &GlobalJD, &JD};
+    ArrayRef<orc::JITDylib*> SearchOrder = ArrayRef<orc::JITDylib*>(&SearchOrders[0], ExternalJDOnly ? 1 : 3);
+    auto Sym = ES.lookup(SearchOrder, getMangledName(Name));
+    return Sym;
+}
+
 uint64_t JuliaOJIT::getGlobalValueAddress(StringRef Name)
 {
     auto addr = findSymbol(getMangledName(Name), false);
@@ -1216,24 +1992,23 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *cod
 
 
 #ifdef JL_USE_JITLINK
-# if JL_LLVM_VERSION < 140000
-#  pragma message("JIT debugging (GDB integration) not available on LLVM < 14.0 (for JITLink)")
-void JuliaOJIT::enableJITDebuggingSupport() {}
-# else
 extern "C" orc::shared::CWrapperFunctionResult
 llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size);
 
 void JuliaOJIT::enableJITDebuggingSupport()
 {
-    // We do not use GDBJITDebugInfoRegistrationPlugin::Create, as the runtime name
-    // lookup is unnecessarily involved/fragile for our in-process JIT use case
-    // (with the llvm_orc_registerJITLoaderGDBAllocAction symbol being in either
-    // libjulia-codegen or yet another shared library for LLVM depending on the build
-    // flags, etc.).
-    const auto Addr = ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBAllocAction);
-    ObjectLayer.addPlugin(std::make_unique<orc::GDBJITDebugInfoRegistrationPlugin>(Addr));
+    orc::SymbolMap GDBFunctions;
+    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBAllocAction")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBAllocAction, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
+    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBWrapper")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBWrapper, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
+    cantFail(JD.define(orc::absoluteSymbols(GDBFunctions)));
+    if (TM->getTargetTriple().isOSBinFormatMachO())
+        ObjectLayer.addPlugin(cantFail(orc::GDBJITDebugInfoRegistrationPlugin::Create(ES, JD, TM->getTargetTriple())));
+#ifndef _COMPILER_ASAN_ENABLED_ // TODO: Fix duplicated sections spam #51794
+    else if (TM->getTargetTriple().isOSBinFormatELF())
+        //EPCDebugObjectRegistrar doesn't take a JITDylib, so we have to directly provide the call address
+        ObjectLayer.addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(ES, std::make_unique<orc::EPCDebugObjectRegistrar>(ES, orc::ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBWrapper))));
+#endif
 }
-# endif
 #else
 void JuliaOJIT::enableJITDebuggingSupport()
 {
@@ -1268,11 +2043,10 @@ std::string JuliaOJIT::getMangledName(const GlobalValue *GV)
 #ifdef JL_USE_JITLINK
 size_t JuliaOJIT::getTotalBytes() const
 {
-    // TODO: Implement in future custom JITLink memory manager.
-    return 0;
+    return total_size.load(std::memory_order_relaxed);
 }
 #else
-size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm);
+size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT;
 
 size_t JuliaOJIT::getTotalBytes() const
 {
@@ -1280,6 +2054,18 @@ size_t JuliaOJIT::getTotalBytes() const
 }
 #endif
 
+void JuliaOJIT::printTimers()
+{
+    for (auto &printer : PrintLLVMTimers) {
+        printer();
+    }
+    reportAndResetTimings();
+}
+
+void JuliaOJIT::optimizeDLSyms(Module &M) {
+    (*DLSymOpt)(M);
+}
+
 JuliaOJIT *jl_ExecutionEngine;
 
 // destructively move the contents of src into dest
@@ -1289,28 +2075,30 @@ JuliaOJIT *jl_ExecutionEngine;
 // Comdat is also removed, since the JIT doesn't need it
 void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTSM)
 {
-    destTSM.withModuleDo([&](Module &dest) {
-        srcTSM.withModuleDo([&](Module &src) {
+    ++ModulesMerged;
+    destTSM.withModuleDo([&](Module &dest) JL_NOTSAFEPOINT {
+        srcTSM.withModuleDo([&](Module &src) JL_NOTSAFEPOINT {
             assert(&dest != &src && "Cannot merge module with itself!");
             assert(&dest.getContext() == &src.getContext() && "Cannot merge modules with different contexts!");
             assert(dest.getDataLayout() == src.getDataLayout() && "Cannot merge modules with different data layouts!");
             assert(dest.getTargetTriple() == src.getTargetTriple() && "Cannot merge modules with different target triples!");
 
-            for (Module::global_iterator I = src.global_begin(), E = src.global_end(); I != E;) {
-                GlobalVariable *sG = &*I;
-                GlobalVariable *dG = cast_or_null<GlobalVariable>(dest.getNamedValue(sG->getName()));
-                ++I;
+            for (auto &SG : make_early_inc_range(src.globals())) {
+                GlobalVariable *dG = cast_or_null<GlobalVariable>(dest.getNamedValue(SG.getName()));
+                if (SG.hasLocalLinkage()) {
+                    dG = nullptr;
+                }
                 // Replace a declaration with the definition:
-                if (dG) {
-                    if (sG->isDeclaration()) {
-                        sG->replaceAllUsesWith(dG);
-                        sG->eraseFromParent();
+                if (dG && !dG->hasLocalLinkage()) {
+                    if (SG.isDeclaration()) {
+                        SG.replaceAllUsesWith(dG);
+                        SG.eraseFromParent();
                         continue;
                     }
                     //// If we start using llvm.used, we need to enable and test this
-                    //else if (!dG->isDeclaration() && dG->hasAppendingLinkage() && sG->hasAppendingLinkage()) {
+                    //else if (!dG->isDeclaration() && dG->hasAppendingLinkage() && SG.hasAppendingLinkage()) {
                     //    auto *dCA = cast<ConstantArray>(dG->getInitializer());
-                    //    auto *sCA = cast<ConstantArray>(sG->getInitializer());
+                    //    auto *sCA = cast<ConstantArray>(SG.getInitializer());
                     //    SmallVector<Constant *, 16> Init;
                     //    for (auto &Op : dCA->operands())
                     //        Init.push_back(cast_or_null<Constant>(Op));
@@ -1322,67 +2110,69 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS
                     //            GlobalValue::AppendingLinkage, ConstantArray::get(ATy, Init), "",
                     //            dG->getThreadLocalMode(), dG->getType()->getAddressSpace());
                     //    GV->copyAttributesFrom(dG);
-                    //    sG->replaceAllUsesWith(GV);
+                    //    SG.replaceAllUsesWith(GV);
                     //    dG->replaceAllUsesWith(GV);
-                    //    GV->takeName(sG);
-                    //    sG->eraseFromParent();
+                    //    GV->takeName(SG);
+                    //    SG.eraseFromParent();
                     //    dG->eraseFromParent();
                     //    continue;
                     //}
                     else {
-                        assert(dG->isDeclaration() || dG->getInitializer() == sG->getInitializer());
-                        dG->replaceAllUsesWith(sG);
+                        assert(dG->isDeclaration() || dG->getInitializer() == SG.getInitializer());
+                        dG->replaceAllUsesWith(&SG);
                         dG->eraseFromParent();
                     }
                 }
                 // Reparent the global variable:
-                sG->removeFromParent();
-                dest.getGlobalList().push_back(sG);
+                SG.removeFromParent();
+                dest.getGlobalList().push_back(&SG);
                 // Comdat is owned by the Module
-                sG->setComdat(nullptr);
+                SG.setComdat(nullptr);
             }
 
-            for (Module::iterator I = src.begin(), E = src.end(); I != E;) {
-                Function *sG = &*I;
-                Function *dG = cast_or_null<Function>(dest.getNamedValue(sG->getName()));
-                ++I;
+            for (auto &SG : make_early_inc_range(src)) {
+                Function *dG = cast_or_null<Function>(dest.getNamedValue(SG.getName()));
+                if (SG.hasLocalLinkage()) {
+                    dG = nullptr;
+                }
                 // Replace a declaration with the definition:
-                if (dG) {
-                    if (sG->isDeclaration()) {
-                        sG->replaceAllUsesWith(dG);
-                        sG->eraseFromParent();
+                if (dG && !dG->hasLocalLinkage()) {
+                    if (SG.isDeclaration()) {
+                        SG.replaceAllUsesWith(dG);
+                        SG.eraseFromParent();
                         continue;
                     }
                     else {
                         assert(dG->isDeclaration());
-                        dG->replaceAllUsesWith(sG);
+                        dG->replaceAllUsesWith(&SG);
                         dG->eraseFromParent();
                     }
                 }
                 // Reparent the global variable:
-                sG->removeFromParent();
-                dest.getFunctionList().push_back(sG);
+                SG.removeFromParent();
+                dest.getFunctionList().push_back(&SG);
                 // Comdat is owned by the Module
-                sG->setComdat(nullptr);
+                SG.setComdat(nullptr);
             }
 
-            for (Module::alias_iterator I = src.alias_begin(), E = src.alias_end(); I != E;) {
-                GlobalAlias *sG = &*I;
-                GlobalAlias *dG = cast_or_null<GlobalAlias>(dest.getNamedValue(sG->getName()));
-                ++I;
-                if (dG) {
+            for (auto &SG : make_early_inc_range(src.aliases())) {
+                GlobalAlias *dG = cast_or_null<GlobalAlias>(dest.getNamedValue(SG.getName()));
+                if (SG.hasLocalLinkage()) {
+                    dG = nullptr;
+                }
+                if (dG && !dG->hasLocalLinkage()) {
                     if (!dG->isDeclaration()) { // aliases are always definitions, so this test is reversed from the above two
-                        sG->replaceAllUsesWith(dG);
-                        sG->eraseFromParent();
+                        SG.replaceAllUsesWith(dG);
+                        SG.eraseFromParent();
                         continue;
                     }
                     else {
-                        dG->replaceAllUsesWith(sG);
+                        dG->replaceAllUsesWith(&SG);
                         dG->eraseFromParent();
                     }
                 }
-                sG->removeFromParent();
-                dest.getAliasList().push_back(sG);
+                SG.removeFromParent();
+                dest.getAliasList().push_back(&SG);
             }
 
             // metadata nodes need to be explicitly merged not just copied
@@ -1390,44 +2180,19 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS
             NamedMDNode *sNMD = src.getNamedMetadata("llvm.dbg.cu");
             if (sNMD) {
                 NamedMDNode *dNMD = dest.getOrInsertNamedMetadata("llvm.dbg.cu");
-                for (NamedMDNode::op_iterator I = sNMD->op_begin(), E = sNMD->op_end(); I != E; ++I) {
-                    dNMD->addOperand(*I);
+                for (MDNode *I : sNMD->operands()) {
+                    dNMD->addOperand(I);
                 }
             }
         });
     });
 }
 
-// optimize memory by turning long strings into memoized copies, instead of
-// making a copy per object file of output.
-void JuliaOJIT::shareStrings(Module &M)
-{
-    std::vector<GlobalVariable*> erase;
-    for (auto &GV : M.globals()) {
-        if (!GV.hasInitializer() || !GV.isConstant())
-            continue;
-        ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(GV.getInitializer());
-        if (CDS == nullptr)
-            continue;
-        StringRef data = CDS->getRawDataValues();
-        if (data.size() > 16) { // only for long strings: keep short ones as values
-            Type *T_size = Type::getIntNTy(GV.getContext(), sizeof(void*) * 8);
-            Constant *v = ConstantExpr::getIntToPtr(
-                ConstantInt::get(T_size, (uintptr_t)(*ES.intern(data)).data()),
-                GV.getType());
-            GV.replaceAllUsesWith(v);
-            erase.push_back(&GV);
-        }
-    }
-    for (auto GV : erase)
-        GV->eraseFromParent();
-}
-
 //TargetMachine pass-through methods
 
 std::unique_ptr<TargetMachine> JuliaOJIT::cloneTargetMachine() const
 {
-    return std::unique_ptr<TargetMachine>(getTarget()
+    auto NewTM = std::unique_ptr<TargetMachine>(getTarget()
         .createTargetMachine(
             getTargetTriple().str(),
             getTargetCPU(),
@@ -1436,6 +2201,8 @@ std::unique_ptr<TargetMachine> JuliaOJIT::cloneTargetMachine() const
             TM->getRelocationModel(),
             TM->getCodeModel(),
             TM->getOptLevel()));
+    fixupTM(*NewTM);
+    return NewTM;
 }
 
 const Triple& JuliaOJIT::getTargetTriple() const {
@@ -1458,11 +2225,12 @@ TargetIRAnalysis JuliaOJIT::getTargetIRAnalysis() const {
 }
 
 static void jl_decorate_module(Module &M) {
-#if defined(_CPU_X86_64_) && defined(_OS_WINDOWS_)
-    // Add special values used by debuginfo to build the UnwindData table registration for Win64
-    // This used to be GV, but with https://reviews.llvm.org/D100944 we no longer can emit GV into `.text`
-    // TODO: The data is set in debuginfo.cpp but it should be okay to actually emit it here.
-    M.appendModuleInlineAsm("\
+    auto TT = Triple(M.getTargetTriple());
+    if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
+        // Add special values used by debuginfo to build the UnwindData table registration for Win64
+        // This used to be GV, but with https://reviews.llvm.org/D100944 we no longer can emit GV into `.text`
+        // TODO: The data is set in debuginfo.cpp but it should be okay to actually emit it here.
+        M.appendModuleInlineAsm("\
     .section .text                  \n\
     .type   __UnwindData,@object    \n\
     .p2align        2, 0x90         \n\
@@ -1475,79 +2243,75 @@ static void jl_decorate_module(Module &M) {
     __catchjmp:                     \n\
         .zero   12                  \n\
         .size   __catchjmp, 12");
-#endif
+    }
 }
 
+// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
 static int jl_add_to_ee(
         orc::ThreadSafeModule &M,
-        StringMap<orc::ThreadSafeModule*> &NewExports,
+        const StringMap<orc::ThreadSafeModule*> &NewExports,
         DenseMap<orc::ThreadSafeModule*, int> &Queued,
-        std::vector<std::vector<orc::ThreadSafeModule*>> &ToMerge,
-        int depth)
+        SmallVectorImpl<orc::ThreadSafeModule*> &Stack)
 {
-    // DAG-sort (post-dominator) the compile to compute the minimum
-    // merge-module sets for linkage
+    // First check if the TSM is empty (already compiled)
     if (!M)
         return 0;
-    // First check and record if it's on the stack somewhere
+    // Next check and record if it is on the stack somewhere
     {
-        auto &Cycle = Queued[&M];
-        if (Cycle)
-            return Cycle;
-        ToMerge.push_back({});
-        Cycle = depth;
+        auto &Id = Queued[&M];
+        if (Id)
+            return Id;
+        Stack.push_back(&M);
+        Id = Stack.size();
     }
+    // Finally work out the SCC
+    int depth = Stack.size();
     int MergeUp = depth;
-    // Compute the cycle-id
-    M.withModuleDo([&](Module &m) {
+    SmallVector<orc::ThreadSafeModule*, 0> Children;
+    M.withModuleDo([&](Module &m) JL_NOTSAFEPOINT {
         for (auto &F : m.global_objects()) {
             if (F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
                 auto Callee = NewExports.find(F.getName());
                 if (Callee != NewExports.end()) {
-                    auto &CM = Callee->second;
-                    int Down = jl_add_to_ee(*CM, NewExports, Queued, ToMerge, depth + 1);
-                    assert(Down <= depth);
-                    if (Down && Down < MergeUp)
-                        MergeUp = Down;
+                    auto *CM = Callee->second;
+                    if (*CM && CM != &M) {
+                        auto Down = Queued.find(CM);
+                        if (Down != Queued.end())
+                            MergeUp = std::min(MergeUp, Down->second);
+                        else
+                            Children.push_back(CM);
+                    }
                 }
             }
         }
     });
-    if (MergeUp == depth) {
-        // Not in a cycle (or at the top of it)
-        Queued.erase(&M);
-        for (auto &CM : ToMerge.at(depth - 1)) {
-            assert(Queued.find(CM)->second == depth);
-            Queued.erase(CM);
-            jl_merge_module(M, std::move(*CM));
-        }
-        jl_ExecutionEngine->addModule(std::move(M));
-        MergeUp = 0;
+    assert(MergeUp > 0);
+    for (auto *CM : Children) {
+        int Down = jl_add_to_ee(*CM, NewExports, Queued, Stack);
+        assert(Down <= (int)Stack.size());
+        if (Down)
+            MergeUp = std::min(MergeUp, Down);
     }
-    else {
-        // Add our frame(s) to the top of the cycle
-        Queued[&M] = MergeUp;
-        auto &Top = ToMerge.at(MergeUp - 1);
-        Top.push_back(&M);
-        for (auto &CM : ToMerge.at(depth - 1)) {
-            assert(Queued.find(CM)->second == depth);
-            Queued[CM] = MergeUp;
-            Top.push_back(CM);
+    if (MergeUp < depth)
+        return MergeUp;
+    while (1) {
+        // Not in a cycle (or at the top of it)
+        // remove SCC state and merge every CM from the cycle into M
+        orc::ThreadSafeModule *CM = Stack.back();
+        auto it = Queued.find(CM);
+        assert(it->second == (int)Stack.size());
+        Queued.erase(it);
+        Stack.pop_back();
+        if ((int)Stack.size() < depth) {
+            assert(&M == CM);
+            break;
         }
+        jl_merge_module(M, std::move(*CM));
     }
-    ToMerge.pop_back();
-    return MergeUp;
-}
-
-static void jl_add_to_ee(orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports)
-{
-    DenseMap<orc::ThreadSafeModule*, int> Queued;
-    std::vector<std::vector<orc::ThreadSafeModule*>> ToMerge;
-    jl_add_to_ee(M, NewExports, Queued, ToMerge, 1);
-    assert(!M);
+    jl_ExecutionEngine->addModule(std::move(M));
+    return 0;
 }
 
-
 static uint64_t getAddressForFunction(StringRef fname)
 {
     auto addr = jl_ExecutionEngine->getFunctionAddress(fname);
@@ -1561,7 +2325,7 @@ void add_named_global(StringRef name, void *addr)
     jl_ExecutionEngine->addGlobalMapping(name, (uint64_t)(uintptr_t)addr);
 }
 
-extern "C" JL_DLLEXPORT
+extern "C" JL_DLLEXPORT_CODEGEN
 size_t jl_jit_total_bytes_impl(void)
 {
     return jl_ExecutionEngine->getTotalBytes();
diff --git a/src/jitlayers.h b/src/jitlayers.h
index ee3d0c14b3751..622fe9131861d 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -1,6 +1,8 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include <llvm/ADT/MapVector.h>
+#include <llvm/ADT/StringSet.h>
+#include <llvm/Support/AllocatorBase.h>
 
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/IR/Constants.h>
@@ -8,18 +10,26 @@
 #include <llvm/IR/Value.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/PassTimingInfo.h>
 
 #include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
 #include <llvm/ExecutionEngine/Orc/IRTransformLayer.h>
 #include <llvm/ExecutionEngine/JITEventListener.h>
 
+#include <llvm/Passes/PassBuilder.h>
+#include <llvm/Passes/PassPlugin.h>
+#include <llvm/Passes/StandardInstrumentations.h>
+
 #include <llvm/Target/TargetMachine.h>
 #include "julia_assert.h"
-#include "debug-registry.h"
-
+#include "julia.h"
+#include "julia_internal.h"
+#include "platform.h"
+#include "llvm-codegen-shared.h"
 #include <stack>
 #include <queue>
 
+
 // As of LLVM 13, there are two runtime JIT linker implementations, the older
 // RuntimeDyld (used via orc::RTDyldObjectLinkingLayer) and the newer JITLink
 // (used via orc::ObjectLinkingLayer).
@@ -35,20 +45,29 @@
 // However, JITLink is a relatively young library and lags behind in platform
 // and feature support (e.g. Windows, JITEventListeners for various profilers,
 // etc.). Thus, we currently only use JITLink where absolutely required, that is,
-// for Mac/aarch64.
-#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
-# if JL_LLVM_VERSION < 130000
-#  pragma message("On aarch64-darwin, LLVM version >= 13 is required for JITLink; fallback suffers from occasional segfaults")
-# endif
+// for Mac/aarch64 and Linux/aarch64.
+// #define JL_FORCE_JITLINK
+
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_)
+# define HAS_SANITIZER
+#endif
+// The sanitizers don't play well with our memory manager
+
+#if defined(JL_FORCE_JITLINK) || JL_LLVM_VERSION >= 150000 && defined(HAS_SANITIZER)
 # define JL_USE_JITLINK
+#else
+# if defined(_CPU_AARCH64_)
+#  if defined(_OS_LINUX_) && JL_LLVM_VERSION < 150000
+#   pragma message("On aarch64-gnu-linux, LLVM version >= 15 is required for JITLink; fallback suffers from occasional segfaults")
+#  else
+#   define JL_USE_JITLINK
+#  endif
+# endif
 #endif
 
-#ifdef JL_USE_JITLINK
 # include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
-#else
 # include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
 # include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
-#endif
 
 using namespace llvm;
 
@@ -57,42 +76,91 @@ extern "C" jl_cgparams_t jl_default_cgparams;
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeContext, LLVMOrcThreadSafeContextRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeModule, LLVMOrcThreadSafeModuleRef)
 
-void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis);
-void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false, bool external_use=false);
-void addMachinePasses(legacy::PassManagerBase *PM, int optlevel);
-void jl_finalize_module(orc::ThreadSafeModule  m);
-void jl_merge_module(orc::ThreadSafeModule &dest, orc::ThreadSafeModule src);
-GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M);
-DataLayout jl_create_datalayout(TargetMachine &TM);
+void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) JL_NOTSAFEPOINT;
+void jl_merge_module(orc::ThreadSafeModule &dest, orc::ThreadSafeModule src) JL_NOTSAFEPOINT;
+GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M) JL_NOTSAFEPOINT;
+DataLayout jl_create_datalayout(TargetMachine &TM) JL_NOTSAFEPOINT;
 
-static inline bool imaging_default() {
-    return jl_options.image_codegen || (jl_generating_output() && !jl_options.incremental);
+static inline bool imaging_default() JL_NOTSAFEPOINT {
+    return jl_options.image_codegen || (jl_generating_output() && (!jl_options.incremental || jl_options.use_pkgimages));
 }
 
+struct OptimizationOptions {
+    bool lower_intrinsics;
+    bool dump_native;
+    bool external_use;
+    bool llvm_only;
+
+    static constexpr OptimizationOptions defaults(
+        bool lower_intrinsics=true,
+        bool dump_native=false,
+        bool external_use=false,
+        bool llvm_only=false) {
+        return {lower_intrinsics, dump_native, external_use, llvm_only};
+    }
+};
+
+struct NewPM {
+    std::unique_ptr<TargetMachine> TM;
+    OptimizationLevel O;
+    OptimizationOptions options;
+    TimePassesHandler TimePasses;
+    NewPM(std::unique_ptr<TargetMachine> TM, OptimizationLevel O, OptimizationOptions options = OptimizationOptions::defaults()) JL_NOTSAFEPOINT;
+    ~NewPM() JL_NOTSAFEPOINT;
+
+    void run(Module &M) JL_NOTSAFEPOINT;
+
+    void printTimers() JL_NOTSAFEPOINT;
+};
+
+struct AnalysisManagers {
+    LoopAnalysisManager LAM;
+    FunctionAnalysisManager FAM;
+    CGSCCAnalysisManager CGAM;
+    ModuleAnalysisManager MAM;
+
+    AnalysisManagers(PassBuilder &PB) JL_NOTSAFEPOINT;
+    AnalysisManagers(TargetMachine &TM, PassBuilder &PB, OptimizationLevel O) JL_NOTSAFEPOINT;
+    ~AnalysisManagers() JL_NOTSAFEPOINT;
+};
+
+OptimizationLevel getOptLevel(int optlevel) JL_NOTSAFEPOINT;
+
 struct jl_locked_stream {
-    JL_STREAM *stream = nullptr;
+    ios_t *stream = nullptr;
     std::mutex mutex;
 
     struct lock {
         std::unique_lock<std::mutex> lck;
-        JL_STREAM *&stream;
+        ios_t *&stream;
 
-        lock(std::mutex &mutex, JL_STREAM *&stream) : lck(mutex), stream(stream) {}
+        lock(std::mutex &mutex, ios_t *&stream) JL_NOTSAFEPOINT
+            : lck(mutex), stream(stream) {}
+        lock(lock&) = delete;
+        lock(lock&&) JL_NOTSAFEPOINT = default;
+        ~lock() JL_NOTSAFEPOINT = default;
 
-        JL_STREAM *&operator*() {
+        ios_t *&operator*() JL_NOTSAFEPOINT {
             return stream;
         }
 
-        explicit operator bool() {
+        explicit operator bool() JL_NOTSAFEPOINT {
             return !!stream;
         }
 
-        operator JL_STREAM *() {
+        operator ios_t *() JL_NOTSAFEPOINT {
             return stream;
         }
+
+        operator JL_STREAM *() JL_NOTSAFEPOINT {
+            return (JL_STREAM*)stream;
+        }
     };
 
-    lock operator*() {
+    jl_locked_stream() JL_NOTSAFEPOINT = default;
+    ~jl_locked_stream() JL_NOTSAFEPOINT = default;
+
+    lock operator*() JL_NOTSAFEPOINT {
         return lock(mutex, stream);
     }
 };
@@ -103,7 +171,8 @@ typedef struct _jl_llvm_functions_t {
 } jl_llvm_functions_t;
 
 struct jl_returninfo_t {
-    llvm::Function *decl;
+    llvm::FunctionCallee decl;
+    llvm::AttributeList attrs;
     enum CallingConv {
         Boxed = 0,
         Register,
@@ -117,27 +186,43 @@ struct jl_returninfo_t {
     unsigned return_roots;
 };
 
-typedef std::tuple<jl_returninfo_t::CallingConv, unsigned, llvm::Function*, bool> jl_codegen_call_target_t;
+struct jl_codegen_call_target_t {
+    jl_returninfo_t::CallingConv cc;
+    unsigned return_roots;
+    llvm::Function *decl;
+    bool specsig;
+};
+
+typedef SmallVector<std::pair<jl_code_instance_t*, jl_codegen_call_target_t>, 0> jl_workqueue_t;
+// TODO DenseMap?
+typedef std::map<jl_code_instance_t*, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t>> jl_compiled_functions_t;
 
-typedef struct _jl_codegen_params_t {
+struct jl_codegen_params_t {
     orc::ThreadSafeContext tsctx;
     orc::ThreadSafeContext::Lock tsctx_lock;
+    DataLayout DL;
+    Triple TargetTriple;
+
+    inline LLVMContext &getContext() {
+        return *tsctx.getContext();
+    }
     typedef StringMap<GlobalVariable*> SymMapGV;
     // outputs
-    std::vector<std::pair<jl_code_instance_t*, jl_codegen_call_target_t>> workqueue;
-    std::map<void*, GlobalVariable*> globals;
+    jl_workqueue_t workqueue;
+    jl_compiled_functions_t compiled_functions;
+    std::map<void*, GlobalVariable*> global_targets;
+    std::map<std::tuple<jl_code_instance_t*,bool>, GlobalVariable*> external_fns;
     std::map<jl_datatype_t*, DIType*> ditypes;
     std::map<jl_datatype_t*, Type*> llvmtypes;
     DenseMap<Constant*, GlobalVariable*> mergedConstants;
     // Map from symbol name (in a certain library) to its GV in sysimg and the
     // DL handle address in the current session.
     StringMap<std::pair<GlobalVariable*,SymMapGV>> libMapGV;
-#ifdef _OS_WINDOWS_
+    SymMapGV symMapDefault;
+    // These symMaps are Windows-only
     SymMapGV symMapExe;
     SymMapGV symMapDll;
     SymMapGV symMapDlli;
-#endif
-    SymMapGV symMapDefault;
     // Map from distinct callee's to its GOT entry.
     // In principle the attribute, function type and calling convention
     // don't need to be part of the key but it seems impossible to forward
@@ -147,15 +232,19 @@ typedef struct _jl_codegen_params_t {
     DenseMap<AttributeList, std::map<
         std::tuple<GlobalVariable*, FunctionType*, CallingConv::ID>,
         GlobalVariable*>> allPltMap;
-    orc::ThreadSafeModule _shared_module;
-    inline orc::ThreadSafeModule &shared_module(Module &from);
+    std::unique_ptr<Module> _shared_module;
+    inline Module &shared_module();
     // inputs
     size_t world = 0;
     const jl_cgparams_t *params = &jl_default_cgparams;
     bool cache = false;
-    bool imaging;
-    _jl_codegen_params_t(orc::ThreadSafeContext ctx) : tsctx(std::move(ctx)), tsctx_lock(tsctx.getLock()), imaging(imaging_default()) {}
-} jl_codegen_params_t;
+    bool external_linkage = false;
+    bool imaging_mode;
+    int debug_level;
+    jl_codegen_params_t(orc::ThreadSafeContext ctx, DataLayout DL, Triple triple)
+        : tsctx(std::move(ctx)), tsctx_lock(tsctx.getLock()),
+            DL(std::move(DL)), TargetTriple(std::move(triple)), imaging_mode(imaging_default()) {}
+};
 
 jl_llvm_functions_t jl_emit_code(
         orc::ThreadSafeModule &M,
@@ -173,23 +262,18 @@ jl_llvm_functions_t jl_emit_codeinst(
 enum CompilationPolicy {
     Default = 0,
     Extern = 1,
-    ImagingMode = 2
 };
 
-typedef std::map<jl_code_instance_t*, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t>> jl_workqueue_t;
-
 void jl_compile_workqueue(
-    jl_workqueue_t &emitted,
-    Module &original,
     jl_codegen_params_t &params,
     CompilationPolicy policy);
 
 Function *jl_cfunction_object(jl_function_t *f, jl_value_t *rt, jl_tupletype_t *argt,
     jl_codegen_params_t &params);
 
-void add_named_global(StringRef name, void *addr);
+void add_named_global(StringRef name, void *addr) JL_NOTSAFEPOINT;
 
-static inline Constant *literal_static_pointer_val(const void *p, Type *T)
+static inline Constant *literal_static_pointer_val(const void *p, Type *T) JL_NOTSAFEPOINT
 {
     // this function will emit a static pointer into the generated code
     // the generated code will only be valid during the current session,
@@ -206,6 +290,46 @@ static const inline char *name_from_method_instance(jl_method_instance_t *li) JL
     return jl_is_method(li->def.method) ? jl_symbol_name(li->def.method->name) : "top-level scope";
 }
 
+template <size_t offset = 0>
+class MaxAlignedAllocImpl
+    : public AllocatorBase<MaxAlignedAllocImpl<offset>> {
+
+public:
+    MaxAlignedAllocImpl() JL_NOTSAFEPOINT = default;
+
+    static Align alignment(size_t Size) JL_NOTSAFEPOINT {
+        // Define the maximum alignment we expect to require, from offset bytes off
+        // the returned pointer, this is >= alignof(std::max_align_t), which is too
+        // small often to actually use.
+        const size_t MaxAlignment = JL_CACHE_BYTE_ALIGNMENT;
+        if (Size <= offset)
+            return Align(1);
+        return Align(std::min((size_t)llvm::PowerOf2Ceil(Size - offset), MaxAlignment));
+    }
+
+    LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, Align Alignment) {
+        Align MaxAlign = alignment(Size);
+        assert(Alignment < MaxAlign); (void)Alignment;
+        return jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset);
+    }
+
+    inline LLVM_ATTRIBUTE_RETURNS_NONNULL
+    void * Allocate(size_t Size, size_t Alignment) {
+        return Allocate(Size, Align(Alignment));
+    }
+
+    // Pull in base class overloads.
+    using AllocatorBase<MaxAlignedAllocImpl>::Allocate;
+
+    void Deallocate(const void *Ptr, size_t Size, size_t /*Alignment*/) { abort(); }
+
+    // Pull in base class overloads.
+    using AllocatorBase<MaxAlignedAllocImpl>::Deallocate;
+
+private:
+};
+using MaxAlignedAlloc = MaxAlignedAllocImpl<>;
+
 typedef JITSymbol JL_JITSymbol;
 // The type that is similar to SymbolInfo on LLVM 4.0 is actually
 // `JITEvaluatedSymbol`. However, we only use this type when a JITSymbol
@@ -214,6 +338,7 @@ typedef JITSymbol JL_SymbolInfo;
 
 using CompilerResultT = Expected<std::unique_ptr<llvm::MemoryBuffer>>;
 using OptimizerResultT = Expected<orc::ThreadSafeModule>;
+using SharedBytesT = StringSet<MaxAlignedAllocImpl<sizeof(StringSet<>::MapEntryTy)>>;
 
 class JuliaOJIT {
 public:
@@ -222,66 +347,91 @@ class JuliaOJIT {
 #else
     typedef orc::RTDyldObjectLinkingLayer ObjLayerT;
 #endif
+    struct LockLayerT : public orc::ObjectLayer {
+
+        LockLayerT(orc::ObjectLayer &BaseLayer) JL_NOTSAFEPOINT : orc::ObjectLayer(BaseLayer.getExecutionSession()), BaseLayer(BaseLayer) {}
+        ~LockLayerT() JL_NOTSAFEPOINT = default;
+
+        void emit(std::unique_ptr<orc::MaterializationResponsibility> R,
+                            std::unique_ptr<MemoryBuffer> O) override {
+            JL_TIMING(LLVM_JIT, JIT_Link);
+#ifndef JL_USE_JITLINK
+            std::lock_guard<std::mutex> lock(EmissionMutex);
+#endif
+            BaseLayer.emit(std::move(R), std::move(O));
+        }
+    private:
+        orc::ObjectLayer &BaseLayer;
+        std::mutex EmissionMutex;
+    };
     typedef orc::IRCompileLayer CompileLayerT;
+    typedef orc::IRTransformLayer JITPointersLayerT;
     typedef orc::IRTransformLayer OptimizeLayerT;
+    typedef orc::IRTransformLayer OptSelLayerT;
+    typedef orc::IRTransformLayer DepsVerifyLayerT;
     typedef object::OwningBinary<object::ObjectFile> OwningObj;
     template
     <typename ResourceT, size_t max = 0,
         typename BackingT = std::stack<ResourceT,
             std::conditional_t<max == 0,
-                SmallVector<ResourceT>,
+                SmallVector<ResourceT, 0>,
                 SmallVector<ResourceT, max>
             >
         >
     >
     struct ResourcePool {
         public:
-        ResourcePool(std::function<ResourceT()> creator) : creator(std::move(creator)), mutex(std::make_unique<WNMutex>()) {}
+        ResourcePool(std::function<ResourceT()> creator) JL_NOTSAFEPOINT : creator(std::move(creator)), mutex(std::make_unique<WNMutex>()) {}
+        ResourcePool(ResourcePool&) = delete;
+        ResourcePool(ResourcePool&&) JL_NOTSAFEPOINT = default;
+        ~ResourcePool() JL_NOTSAFEPOINT = default;
         class OwningResource {
             public:
-            OwningResource(ResourcePool &pool, ResourceT resource) : pool(pool), resource(std::move(resource)) {}
+            OwningResource(ResourcePool &pool, ResourceT resource) JL_NOTSAFEPOINT // _ENTER
+                : pool(pool), resource(std::move(resource)) {}
             OwningResource(const OwningResource &) = delete;
             OwningResource &operator=(const OwningResource &) = delete;
-            OwningResource(OwningResource &&) = default;
-            OwningResource &operator=(OwningResource &&) = default;
-            ~OwningResource() {
-                if (resource) pool.release(std::move(*resource));
+            OwningResource(OwningResource &&) JL_NOTSAFEPOINT = default;
+            OwningResource &operator=(OwningResource &&) JL_NOTSAFEPOINT = default;
+            ~OwningResource() JL_NOTSAFEPOINT { // _LEAVE
+                if (resource)
+                    pool.release(std::move(*resource));
             }
-            ResourceT release() {
+            ResourceT release() JL_NOTSAFEPOINT {
                 ResourceT res(std::move(*resource));
                 resource.reset();
                 return res;
             }
-            void reset(ResourceT res) {
+            void reset(ResourceT res) JL_NOTSAFEPOINT {
                 *resource = std::move(res);
             }
-            ResourceT &operator*() {
+            ResourceT &operator*() JL_NOTSAFEPOINT {
                 return *resource;
             }
-            ResourceT *operator->() {
+            ResourceT *operator->() JL_NOTSAFEPOINT {
                 return get();
             }
-            ResourceT *get() {
+            ResourceT *get() JL_NOTSAFEPOINT {
                 return resource.getPointer();
             }
-            const ResourceT &operator*() const {
+            const ResourceT &operator*() const JL_NOTSAFEPOINT {
                 return *resource;
             }
-            const ResourceT *operator->() const {
+            const ResourceT *operator->() const JL_NOTSAFEPOINT {
                 return get();
             }
-            const ResourceT *get() const {
+            const ResourceT *get() const JL_NOTSAFEPOINT {
                 return resource.getPointer();
             }
-            explicit operator bool() const {
+            explicit operator bool() const JL_NOTSAFEPOINT {
                 return resource;
             }
             private:
             ResourcePool &pool;
-            llvm::Optional<ResourceT> resource;
+            Optional<ResourceT> resource;
         };
 
-        OwningResource operator*() {
+        OwningResource operator*() JL_NOTSAFEPOINT {
             return OwningResource(*this, acquire());
         }
 
@@ -289,7 +439,7 @@ class JuliaOJIT {
             return **this;
         }
 
-        ResourceT acquire() {
+        ResourceT acquire() JL_NOTSAFEPOINT { // _ENTER
             std::unique_lock<std::mutex> lock(mutex->mutex);
             if (!pool.empty()) {
                 return pop(pool);
@@ -302,20 +452,20 @@ class JuliaOJIT {
             assert(!pool.empty() && "Expected resource pool to have a value!");
             return pop(pool);
         }
-        void release(ResourceT &&resource) {
+        void release(ResourceT &&resource) JL_NOTSAFEPOINT { // _LEAVE
             std::lock_guard<std::mutex> lock(mutex->mutex);
             pool.push(std::move(resource));
             mutex->empty.notify_one();
         }
         private:
         template<typename T, typename Container>
-        static ResourceT pop(std::queue<T, Container> &pool) {
+        static ResourceT pop(std::queue<T, Container> &pool) JL_NOTSAFEPOINT {
             ResourceT top = std::move(pool.front());
             pool.pop();
             return top;
         }
         template<typename PoolT>
-        static ResourceT pop(PoolT &pool) {
+        static ResourceT pop(PoolT &pool) JL_NOTSAFEPOINT {
             ResourceT top = std::move(pool.top());
             pool.pop();
             return top;
@@ -330,29 +480,8 @@ class JuliaOJIT {
 
         std::unique_ptr<WNMutex> mutex;
     };
-    struct PipelineT {
-        PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel);
-        CompileLayerT CompileLayer;
-        OptimizeLayerT OptimizeLayer;
-    };
 
-    struct OptSelLayerT : orc::IRLayer {
-
-        template<size_t N>
-        OptSelLayerT(const std::array<std::unique_ptr<PipelineT>, N> &optimizers)
-            : orc::IRLayer(optimizers[0]->OptimizeLayer.getExecutionSession(),
-                optimizers[0]->OptimizeLayer.getManglingOptions()),
-            optimizers(optimizers.data()),
-            count(N) {
-            static_assert(N > 0, "Expected array with at least one optimizer!");
-        }
-
-        void emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) override;
-
-        private:
-        const std::unique_ptr<PipelineT> * const optimizers;
-        size_t count;
-    };
+    struct DLSymOptimizer;
 
 private:
     // Custom object emission notification handler for the JuliaOJIT
@@ -361,47 +490,56 @@ class JuliaOJIT {
 
 public:
 
-    JuliaOJIT();
+    JuliaOJIT() JL_NOTSAFEPOINT;
+    ~JuliaOJIT() JL_NOTSAFEPOINT;
 
-    void enableJITDebuggingSupport();
+    void enableJITDebuggingSupport() JL_NOTSAFEPOINT;
 #ifndef JL_USE_JITLINK
     // JITLink doesn't support old JITEventListeners (yet).
-    void RegisterJITEventListener(JITEventListener *L);
+    void RegisterJITEventListener(JITEventListener *L) JL_NOTSAFEPOINT;
 #endif
 
-    void addGlobalMapping(StringRef Name, uint64_t Addr);
-    void addModule(orc::ThreadSafeModule M);
-
-    JL_JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly);
-    JL_JITSymbol findUnmangledSymbol(StringRef Name);
-    uint64_t getGlobalValueAddress(StringRef Name);
-    uint64_t getFunctionAddress(StringRef Name);
-    StringRef getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst);
-    auto getContext() {
+    orc::SymbolStringPtr mangle(StringRef Name) JL_NOTSAFEPOINT;
+    void addGlobalMapping(StringRef Name, uint64_t Addr) JL_NOTSAFEPOINT;
+    void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT;
+
+    //Methods for the C API
+    Error addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM,
+                            bool ShouldOptimize = false) JL_NOTSAFEPOINT;
+    Error addObjectFile(orc::JITDylib &JD,
+                        std::unique_ptr<MemoryBuffer> Obj) JL_NOTSAFEPOINT;
+    Expected<JITEvaluatedSymbol> findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT;
+    orc::IRCompileLayer &getIRCompileLayer() JL_NOTSAFEPOINT { return ExternalCompileLayer; };
+    orc::ExecutionSession &getExecutionSession() JL_NOTSAFEPOINT { return ES; }
+    orc::JITDylib &getExternalJITDylib() JL_NOTSAFEPOINT { return ExternalJD; }
+
+    JL_JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT;
+    JL_JITSymbol findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT;
+    uint64_t getGlobalValueAddress(StringRef Name) JL_NOTSAFEPOINT;
+    uint64_t getFunctionAddress(StringRef Name) JL_NOTSAFEPOINT;
+    StringRef getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT;
+    auto getContext() JL_NOTSAFEPOINT {
         return *ContextPool;
     }
-    orc::ThreadSafeContext acquireContext() {
+    orc::ThreadSafeContext acquireContext() { // JL_NOTSAFEPOINT_ENTER?
         return ContextPool.acquire();
     }
-    void releaseContext(orc::ThreadSafeContext &&ctx) {
+    void releaseContext(orc::ThreadSafeContext &&ctx) { // JL_NOTSAFEPOINT_LEAVE?
         ContextPool.release(std::move(ctx));
     }
-    const DataLayout& getDataLayout() const;
+    const DataLayout& getDataLayout() const JL_NOTSAFEPOINT;
 
     // TargetMachine pass-through methods
-    std::unique_ptr<TargetMachine> cloneTargetMachine() const;
-    const Triple& getTargetTriple() const;
-    StringRef getTargetFeatureString() const;
-    StringRef getTargetCPU() const;
-    const TargetOptions &getTargetOptions() const;
-    const Target &getTarget() const;
-    TargetIRAnalysis getTargetIRAnalysis() const;
-
-    size_t getTotalBytes() const;
-
-    JITDebugInfoRegistry &getDebugInfoRegistry() JL_NOTSAFEPOINT {
-        return DebugRegistry;
-    }
+    std::unique_ptr<TargetMachine> cloneTargetMachine() const JL_NOTSAFEPOINT;
+    const Triple& getTargetTriple() const JL_NOTSAFEPOINT;
+    StringRef getTargetFeatureString() const JL_NOTSAFEPOINT;
+    StringRef getTargetCPU() const JL_NOTSAFEPOINT;
+    const TargetOptions &getTargetOptions() const JL_NOTSAFEPOINT;
+    const Target &getTarget() const JL_NOTSAFEPOINT;
+    TargetIRAnalysis getTargetIRAnalysis() const JL_NOTSAFEPOINT;
+
+    size_t getTotalBytes() const JL_NOTSAFEPOINT;
+    void printTimers() JL_NOTSAFEPOINT;
 
     jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT {
         return dump_emitted_mi_name_stream;
@@ -412,10 +550,13 @@ class JuliaOJIT {
     jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT {
         return dump_llvm_opt_stream;
     }
+    std::string getMangledName(StringRef Name) JL_NOTSAFEPOINT;
+    std::string getMangledName(const GlobalValue *GV) JL_NOTSAFEPOINT;
+
+    // Note that this is a safepoint due to jl_get_library_ and jl_dlsym calls
+    void optimizeDLSyms(Module &M);
+
 private:
-    std::string getMangledName(StringRef Name);
-    std::string getMangledName(const GlobalValue *GV);
-    void shareStrings(Module &M);
 
     const std::unique_ptr<TargetMachine> TM;
     const DataLayout DL;
@@ -423,66 +564,58 @@ class JuliaOJIT {
     orc::ExecutionSession ES;
     orc::JITDylib &GlobalJD;
     orc::JITDylib &JD;
-
-    JITDebugInfoRegistry DebugRegistry;
-
+    orc::JITDylib &ExternalJD;
     //Map and inc are guarded by RLST_mutex
     std::mutex RLST_mutex{};
     int RLST_inc = 0;
     DenseMap<void*, std::string> ReverseLocalSymbolTable;
+    SharedBytesT SharedBytes;
+
+    std::unique_ptr<DLSymOptimizer> DLSymOpt;
 
     //Compilation streams
     jl_locked_stream dump_emitted_mi_name_stream;
     jl_locked_stream dump_compiles_stream;
     jl_locked_stream dump_llvm_opt_stream;
 
+    std::mutex llvm_printing_mutex{};
+    SmallVector<std::function<void()>, 0> PrintLLVMTimers;
+
     ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPool;
 
 #ifndef JL_USE_JITLINK
     const std::shared_ptr<RTDyldMemoryManager> MemMgr;
+#else
+    std::atomic<size_t> total_size{0};
+    const std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr;
 #endif
     ObjLayerT ObjectLayer;
-    const std::array<std::unique_ptr<PipelineT>, 4> Pipelines;
+    LockLayerT LockLayer;
+    CompileLayerT CompileLayer;
+    JITPointersLayerT JITPointersLayer;
+    OptimizeLayerT OptimizeLayer;
     OptSelLayerT OptSelLayer;
+    DepsVerifyLayerT DepsVerifyLayer;
+    CompileLayerT ExternalCompileLayer;
 };
 extern JuliaOJIT *jl_ExecutionEngine;
-orc::ThreadSafeModule jl_create_llvm_module(StringRef name, orc::ThreadSafeContext ctx, bool imaging_mode, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple());
+std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &ctx, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple()) JL_NOTSAFEPOINT;
+inline orc::ThreadSafeModule jl_create_ts_module(StringRef name, orc::ThreadSafeContext ctx, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple()) JL_NOTSAFEPOINT {
+    auto lock = ctx.getLock();
+    return orc::ThreadSafeModule(jl_create_llvm_module(name, *ctx.getContext(), DL, triple), ctx);
+}
 
-orc::ThreadSafeModule &jl_codegen_params_t::shared_module(Module &from) {
+Module &jl_codegen_params_t::shared_module() JL_NOTSAFEPOINT {
     if (!_shared_module) {
-        _shared_module = jl_create_llvm_module("globals", tsctx, imaging, from.getDataLayout(), Triple(from.getTargetTriple()));
-        assert(&from.getContext() == tsctx.getContext() && "Module context differs from codegen_params context!");
-    } else {
-        assert(&from.getContext() == _shared_module.getContext().getContext() && "Module context differs from shared module context!");
-        assert(from.getDataLayout() == _shared_module.getModuleUnlocked()->getDataLayout() && "Module data layout differs from shared module data layout!");
-        assert(from.getTargetTriple() == _shared_module.getModuleUnlocked()->getTargetTriple() && "Module target triple differs from shared module target triple!");
+        _shared_module = jl_create_llvm_module("globals", getContext(), DL, TargetTriple);
     }
-    return _shared_module;
+    return *_shared_module;
 }
+void fixupTM(TargetMachine &TM) JL_NOTSAFEPOINT;
 
-Pass *createLowerPTLSPass(bool imaging_mode);
-Pass *createCombineMulAddPass();
-Pass *createFinalLowerGCPass();
-Pass *createLateLowerGCFramePass();
-Pass *createLowerExcHandlersPass();
-Pass *createGCInvariantVerifierPass(bool Strong);
-Pass *createPropagateJuliaAddrspaces();
-Pass *createRemoveJuliaAddrspacesPass();
-Pass *createRemoveNIPass();
-Pass *createJuliaLICMPass();
-Pass *createMultiVersioningPass(bool external_use);
-Pass *createAllocOptPass();
-Pass *createDemoteFloat16Pass();
-Pass *createCPUFeaturesPass();
-Pass *createLowerSimdLoopPass();
+void optimizeDLSyms(Module &M);
 
 // NewPM
 #include "passes.h"
 
-// Whether the Function is an llvm or julia intrinsic.
-static inline bool isIntrinsicFunction(Function *F)
-{
-    return F->isIntrinsic() || F->getName().startswith("julia.");
-}
-
-CodeGenOpt::Level CodeGenOptLevelFor(int optlevel);
+CodeGenOpt::Level CodeGenOptLevelFor(int optlevel) JL_NOTSAFEPOINT;
diff --git a/src/jl_exported_data.inc b/src/jl_exported_data.inc
index 28c9c61c51452..f40b252180a65 100644
--- a/src/jl_exported_data.inc
+++ b/src/jl_exported_data.inc
@@ -3,10 +3,13 @@
 // Pointers that are exposed through the public libjulia
 #define JL_EXPORTED_DATA_POINTERS(XX) \
     XX(jl_abstractarray_type) \
-    XX(jl_abstractslot_type) \
     XX(jl_abstractstring_type) \
+    XX(jl_addrspace_type) \
+    XX(jl_addrspace_typename) \
+    XX(jl_addrspacecore_type) \
     XX(jl_an_empty_string) \
     XX(jl_an_empty_vec_any) \
+    XX(jl_an_empty_memory_any) \
     XX(jl_anytuple_type) \
     XX(jl_anytuple_type_type) \
     XX(jl_any_type) \
@@ -18,6 +21,7 @@
     XX(jl_array_type) \
     XX(jl_array_typename) \
     XX(jl_array_uint8_type) \
+    XX(jl_array_uint32_type) \
     XX(jl_array_uint64_type) \
     XX(jl_atomicerror_type) \
     XX(jl_base_module) \
@@ -42,30 +46,47 @@
     XX(jl_float16_type) \
     XX(jl_float32_type) \
     XX(jl_float64_type) \
+    XX(jl_bfloat16_type) \
     XX(jl_floatingpoint_type) \
     XX(jl_function_type) \
+    XX(jl_binding_type) \
     XX(jl_globalref_type) \
     XX(jl_gotoifnot_type) \
+    XX(jl_enternode_type) \
     XX(jl_gotonode_type) \
     XX(jl_initerror_type) \
     XX(jl_int16_type) \
     XX(jl_int32_type) \
     XX(jl_int64_type) \
     XX(jl_int8_type) \
+    XX(jl_interconditional_type) \
     XX(jl_interrupt_exception) \
     XX(jl_intrinsic_type) \
+    XX(jl_kwcall_func) \
     XX(jl_lineinfonode_type) \
     XX(jl_linenumbernode_type) \
     XX(jl_llvmpointer_type) \
     XX(jl_llvmpointer_typename) \
     XX(jl_loaderror_type) \
     XX(jl_main_module) \
+    XX(jl_memory_any_type) \
     XX(jl_memory_exception) \
+    XX(jl_genericmemory_type) \
+    XX(jl_genericmemory_typename) \
+    XX(jl_memory_uint8_type) \
+    XX(jl_memory_uint16_type) \
+    XX(jl_memory_uint32_type) \
+    XX(jl_memory_uint64_type) \
+    XX(jl_memoryref_any_type) \
+    XX(jl_genericmemoryref_type) \
+    XX(jl_genericmemoryref_typename) \
+    XX(jl_memoryref_uint8_type) \
     XX(jl_methoderror_type) \
     XX(jl_method_instance_type) \
     XX(jl_method_match_type) \
     XX(jl_method_type) \
     XX(jl_methtable_type) \
+    XX(jl_missingcodeerror_type) \
     XX(jl_module_type) \
     XX(jl_n_threads_per_pool) \
     XX(jl_namedtuple_type) \
@@ -101,7 +122,6 @@
     XX(jl_true) \
     XX(jl_tuple_typename) \
     XX(jl_tvar_type) \
-    XX(jl_typedslot_type) \
     XX(jl_typeerror_type) \
     XX(jl_typemap_entry_type) \
     XX(jl_typemap_level_type) \
@@ -125,11 +145,17 @@
     XX(jl_voidpointer_type) \
     XX(jl_void_type) \
     XX(jl_weakref_type) \
+    XX(jl_libdl_module) \
+    XX(jl_libdl_dlopen_func) \
+    XX(jl_precompilable_error) \
 
 // Data symbols that are defined inside the public libjulia
 #define JL_EXPORTED_DATA_SYMBOLS(XX) \
     XX(jl_n_threadpools, int) \
-    XX(jl_n_threads, int) \
+    XX(jl_n_threads, _Atomic(int)) \
+    XX(jl_n_gcthreads, int) \
     XX(jl_options, jl_options_t) \
+    XX(jl_task_gcstack_offset, int) \
+    XX(jl_task_ptls_offset, int) \
 
 // end of file
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index 72d385329ce49..8e850b47f2fe7 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -3,10 +3,12 @@
 #define JL_RUNTIME_EXPORTED_FUNCS(XX) \
     XX(jl_active_task_stack) \
     XX(jl_add_standard_imports) \
+    XX(jl_adopt_thread) \
     XX(jl_alignment) \
     XX(jl_alloc_array_1d) \
     XX(jl_alloc_array_2d) \
     XX(jl_alloc_array_3d) \
+    XX(jl_alloc_array_nd) \
     XX(jl_alloc_string) \
     XX(jl_alloc_svec) \
     XX(jl_alloc_svec_uninit) \
@@ -20,31 +22,19 @@
     XX(jl_apply_type1) \
     XX(jl_apply_type2) \
     XX(jl_argument_datatype) \
-    XX(jl_argument_method_table) \
-    XX(jl_arraylen) \
-    XX(jl_arrayref) \
-    XX(jl_arrayset) \
-    XX(jl_arrayunset) \
-    XX(jl_array_cconvert_cstring) \
-    XX(jl_array_copy) \
-    XX(jl_array_del_at) \
-    XX(jl_array_del_beg) \
     XX(jl_array_del_end) \
     XX(jl_array_eltype) \
-    XX(jl_array_grow_at) \
-    XX(jl_array_grow_beg) \
     XX(jl_array_grow_end) \
-    XX(jl_array_isassigned) \
     XX(jl_array_ptr) \
     XX(jl_array_ptr_1d_append) \
     XX(jl_array_ptr_1d_push) \
-    XX(jl_array_ptr_copy) \
+    XX(jl_genericmemory_isassigned) \
+    XX(jl_genericmemory_owner) \
+    XX(jl_genericmemoryref) \
+    XX(jl_genericmemoryset) \
+    XX(jl_genericmemoryunset) \
     XX(jl_array_rank) \
-    XX(jl_array_size) \
-    XX(jl_array_sizehint) \
     XX(jl_array_to_string) \
-    XX(jl_array_typetagdata) \
-    XX(jl_array_validate_dims) \
     XX(jl_atexit_hook) \
     XX(jl_atomic_bool_cmpswap_bits) \
     XX(jl_atomic_cmpswap_bits) \
@@ -54,7 +44,6 @@
     XX(jl_atomic_swap_bits) \
     XX(jl_backtrace_from_here) \
     XX(jl_base_relative_to) \
-    XX(jl_binding_owner) \
     XX(jl_binding_resolved_p) \
     XX(jl_bitcast) \
     XX(jl_boundp) \
@@ -97,6 +86,7 @@
     XX(jl_close_uv) \
     XX(jl_code_for_staged) \
     XX(jl_compile_hint) \
+    XX(jl_compile_method_instance) \
     XX(jl_compress_argnames) \
     XX(jl_compress_ir) \
     XX(jl_compute_fieldtypes) \
@@ -116,16 +106,16 @@
     XX(jl_dlopen) \
     XX(jl_dlsym) \
     XX(jl_dump_host_cpu) \
+    XX(jl_check_pkgimage_clones) \
     XX(jl_egal) \
     XX(jl_egal__bits) \
-    XX(jl_egal__special) \
+    XX(jl_egal__bitstag) \
     XX(jl_eh_restore_state) \
     XX(jl_enter_handler) \
     XX(jl_enter_threaded_region) \
     XX(jl_environ) \
     XX(jl_eof_error) \
     XX(jl_eqtable_get) \
-    XX(jl_eqtable_nextind) \
     XX(jl_eqtable_pop) \
     XX(jl_eqtable_put) \
     XX(jl_errno) \
@@ -150,6 +140,7 @@
     XX(jl_gc_add_finalizer) \
     XX(jl_gc_add_finalizer_th) \
     XX(jl_gc_add_ptr_finalizer) \
+    XX(jl_gc_add_quiescent) \
     XX(jl_gc_allocobj) \
     XX(jl_gc_alloc_0w) \
     XX(jl_gc_alloc_1w) \
@@ -157,6 +148,7 @@
     XX(jl_gc_alloc_3w) \
     XX(jl_gc_alloc_typed) \
     XX(jl_gc_big_alloc) \
+    XX(jl_gc_big_alloc_instrumented) \
     XX(jl_gc_collect) \
     XX(jl_gc_conservative_gc_support_enabled) \
     XX(jl_gc_counted_calloc) \
@@ -170,8 +162,11 @@
     XX(jl_gc_external_obj_hdr_size) \
     XX(jl_gc_find_taggedvalue_pool) \
     XX(jl_gc_get_total_bytes) \
+    XX(jl_gc_get_max_memory) \
     XX(jl_gc_internal_obj_base_ptr) \
     XX(jl_gc_is_enabled) \
+    XX(jl_gc_is_in_finalizer) \
+    XX(jl_gc_pool_live_bytes) \
     XX(jl_gc_live_bytes) \
     XX(jl_gc_managed_malloc) \
     XX(jl_gc_managed_realloc) \
@@ -182,16 +177,19 @@
     XX(jl_gc_new_weakref_th) \
     XX(jl_gc_num) \
     XX(jl_gc_pool_alloc) \
+    XX(jl_gc_pool_alloc_instrumented) \
     XX(jl_gc_queue_multiroot) \
     XX(jl_gc_queue_root) \
     XX(jl_gc_safepoint) \
     XX(jl_gc_schedule_foreign_sweepfunc) \
     XX(jl_gc_set_cb_notify_external_alloc) \
     XX(jl_gc_set_cb_notify_external_free) \
+    XX(jl_gc_set_cb_notify_gc_pressure) \
     XX(jl_gc_set_cb_post_gc) \
     XX(jl_gc_set_cb_pre_gc) \
     XX(jl_gc_set_cb_root_scanner) \
     XX(jl_gc_set_cb_task_scanner) \
+    XX(jl_gc_set_max_memory) \
     XX(jl_gc_sync_total_bytes) \
     XX(jl_gc_total_hrtime) \
     XX(jl_gdblookup) \
@@ -199,7 +197,6 @@
     XX(jl_generic_function_def) \
     XX(jl_gensym) \
     XX(jl_getallocationgranularity) \
-    XX(jl_getch) \
     XX(jl_getnameinfo) \
     XX(jl_getpagesize) \
     XX(jl_get_ARCH) \
@@ -207,9 +204,10 @@
     XX(jl_get_binding) \
     XX(jl_get_binding_for_method_def) \
     XX(jl_get_binding_or_error) \
-    XX(jl_get_binding_wr_or_error) \
     XX(jl_get_binding_wr) \
     XX(jl_get_cpu_name) \
+    XX(jl_get_cpu_features) \
+    XX(jl_cpu_has_fma) \
     XX(jl_get_current_task) \
     XX(jl_get_default_sysimg_path) \
     XX(jl_get_excstack) \
@@ -220,10 +218,6 @@
     XX(jl_get_JIT) \
     XX(jl_get_julia_bin) \
     XX(jl_get_julia_bindir) \
-    XX(jl_get_keyword_sorter) \
-    XX(jl_get_kwsorter) \
-    XX(jl_get_method_inferred) \
-    XX(jl_get_module_binding) \
     XX(jl_get_module_compile) \
     XX(jl_get_module_infer) \
     XX(jl_get_module_of_binding) \
@@ -244,6 +238,8 @@
     XX(jl_get_world_counter) \
     XX(jl_get_zero_subnormals) \
     XX(jl_gf_invoke_lookup) \
+    XX(jl_method_lookup_by_tt) \
+    XX(jl_method_lookup) \
     XX(jl_gf_invoke_lookup_worlds) \
     XX(jl_git_branch) \
     XX(jl_git_commit) \
@@ -258,7 +254,7 @@
     XX(jl_infer_thunk) \
     XX(jl_init) \
     XX(jl_init_options) \
-    XX(jl_init_restored_modules) \
+    XX(jl_init_restored_module) \
     XX(jl_init_with_image) \
     XX(jl_init_with_image__threading) \
     XX(jl_init__threading) \
@@ -276,8 +272,9 @@
     XX(jl_ios_fd) \
     XX(jl_ios_get_nbyte_int) \
     XX(jl_ir_flag_inferred) \
-    XX(jl_ir_flag_inlineable) \
-    XX(jl_ir_flag_pure) \
+    XX(jl_ir_flag_has_fcall) \
+    XX(jl_ir_flag_inlining) \
+    XX(jl_ir_inlining_cost) \
     XX(jl_ir_nslots) \
     XX(jl_ir_slotflag) \
     XX(jl_isa) \
@@ -288,6 +285,7 @@
     XX(jl_is_char_signed) \
     XX(jl_is_const) \
     XX(jl_is_debugbuild) \
+    XX(jl_is_foreign_type) \
     XX(jl_is_identifier) \
     XX(jl_is_imported) \
     XX(jl_is_initialized) \
@@ -323,25 +321,26 @@
     XX(jl_methtable_lookup) \
     XX(jl_mi_cache_insert) \
     XX(jl_module_build_id) \
-    XX(jl_module_export) \
     XX(jl_module_exports_p) \
     XX(jl_module_globalref) \
     XX(jl_module_import) \
     XX(jl_module_name) \
     XX(jl_module_names) \
     XX(jl_module_parent) \
+    XX(jl_module_public) \
+    XX(jl_module_public_p) \
     XX(jl_module_use) \
     XX(jl_module_using) \
     XX(jl_module_usings) \
     XX(jl_module_uuid) \
     XX(jl_native_alignment) \
     XX(jl_nb_available) \
-    XX(jl_new_array) \
     XX(jl_new_bits) \
     XX(jl_new_codeinst) \
     XX(jl_new_code_info_uninit) \
     XX(jl_new_datatype) \
     XX(jl_new_foreign_type) \
+    XX(jl_reinit_foreign_type) \
     XX(jl_new_method_instance_uninit) \
     XX(jl_new_method_table) \
     XX(jl_new_method_uninit) \
@@ -356,6 +355,7 @@
     XX(jl_new_typevar) \
     XX(jl_next_from_addrinfo) \
     XX(jl_normalize_to_compilable_sig) \
+    XX(jl_write_precompile_statement) \
     XX(jl_no_exc_handler) \
     XX(jl_object_id) \
     XX(jl_object_id_) \
@@ -386,33 +386,33 @@
     XX(jl_profile_maxlen_data) \
     XX(jl_profile_start_timer) \
     XX(jl_profile_stop_timer) \
-    XX(jl_ptrarrayref) \
     XX(jl_ptr_to_array) \
     XX(jl_ptr_to_array_1d) \
     XX(jl_queue_work) \
     XX(jl_raise_debugger) \
     XX(jl_readuntil) \
+    XX(jl_cache_flags) \
+    XX(jl_match_cache_flags) \
     XX(jl_read_verify_header) \
     XX(jl_realloc) \
     XX(jl_register_newmeth_tracer) \
-    XX(jl_reshape_array) \
     XX(jl_resolve_globals_in_ir) \
     XX(jl_restore_excstack) \
     XX(jl_restore_incremental) \
-    XX(jl_restore_incremental_from_buf) \
+    XX(jl_restore_package_image_from_file) \
     XX(jl_restore_system_image) \
     XX(jl_restore_system_image_data) \
     XX(jl_rethrow) \
     XX(jl_rethrow_other) \
-    XX(jl_rettype_inferred) \
     XX(jl_running_on_valgrind) \
     XX(jl_safe_printf) \
-    XX(jl_save_incremental) \
-    XX(jl_save_system_image) \
+    XX(jl_safepoint_suspend_thread) \
+    XX(jl_safepoint_resume_thread) \
     XX(jl_SC_CLK_TCK) \
     XX(jl_set_ARGS) \
     XX(jl_set_const) \
     XX(jl_set_errno) \
+    XX(jl_set_global) \
     XX(jl_set_istopmod) \
     XX(jl_set_module_compile) \
     XX(jl_set_module_infer) \
@@ -451,8 +451,6 @@
     XX(jl_svec2) \
     XX(jl_svec_copy) \
     XX(jl_svec_fill) \
-    XX(jl_svec_isassigned) \
-    XX(jl_svec_ref) \
     XX(jl_switch) \
     XX(jl_switchto) \
     XX(jl_symbol) \
@@ -462,6 +460,7 @@
     XX(jl_take_buffer) \
     XX(jl_task_get_next) \
     XX(jl_task_stack_buffer) \
+    XX(jl_termios_size) \
     XX(jl_test_cpu_feature) \
     XX(jl_threadid) \
     XX(jl_threadpoolid) \
@@ -474,14 +473,14 @@
     XX(jl_try_substrtod) \
     XX(jl_try_substrtof) \
     XX(jl_tty_set_mode) \
-    XX(jl_tupletype_fill) \
     XX(jl_typeassert) \
-    XX(jl_typeinf_begin) \
-    XX(jl_typeinf_end) \
+    XX(jl_typeinf_lock_begin) \
+    XX(jl_typeinf_lock_end) \
+    XX(jl_typeinf_timing_begin) \
+    XX(jl_typeinf_timing_end) \
     XX(jl_typename_str) \
     XX(jl_typeof_str) \
     XX(jl_types_equal) \
-    XX(jl_type_equality_is_identity) \
     XX(jl_type_error) \
     XX(jl_type_error_rt) \
     XX(jl_type_intersection) \
@@ -507,6 +506,8 @@
     XX(jl_uncompress_argname_n) \
     XX(jl_uncompress_ir) \
     XX(jl_undefined_var_error) \
+    XX(jl_unwrap_unionall) \
+    XX(jl_has_no_field_error) \
     XX(jl_value_ptr) \
     XX(jl_ver_is_release) \
     XX(jl_ver_major) \
@@ -516,6 +517,7 @@
     XX(jl_vexceptionf) \
     XX(jl_vprintf) \
     XX(jl_wakeup_thread) \
+    XX(jl_write_compiler_output) \
     XX(jl_yield) \
 
 #define JL_RUNTIME_EXPORTED_FUNCS_WIN(XX) \
@@ -531,7 +533,8 @@
     YY(jl_get_llvm_module) \
     YY(jl_get_LLVM_VERSION) \
     YY(jl_dump_native) \
-    YY(jl_get_llvm_gv) \
+    YY(jl_get_llvm_gvs) \
+    YY(jl_get_llvm_external_fns) \
     YY(jl_dump_function_asm) \
     YY(jl_LLVMCreateDisasm) \
     YY(jl_LLVMDisasmInstruction) \
@@ -540,6 +543,7 @@
     YY(jl_register_fptrs) \
     YY(jl_generate_fptr) \
     YY(jl_generate_fptr_for_unspecialized) \
+    YY(jl_generate_fptr_for_oc_wrapper) \
     YY(jl_compile_extern_c) \
     YY(jl_teardown_codegen) \
     YY(jl_jit_total_bytes) \
@@ -552,21 +556,35 @@
     YY(jl_type_to_llvm) \
     YY(jl_getUnwindInfo) \
     YY(jl_get_libllvm) \
-    YY(jl_add_optimization_passes) \
-    YY(LLVMExtraAddLowerSimdLoopPass) \
-    YY(LLVMExtraAddFinalLowerGCPass) \
-    YY(LLVMExtraAddPropagateJuliaAddrspaces) \
-    YY(LLVMExtraAddRemoveJuliaAddrspacesPass) \
-    YY(LLVMExtraAddCombineMulAddPass) \
-    YY(LLVMExtraAddMultiVersioningPass) \
-    YY(LLVMExtraAddLowerExcHandlersPass) \
-    YY(LLVMExtraAddLateLowerGCFramePass) \
-    YY(LLVMExtraJuliaLICMPass) \
-    YY(LLVMExtraAddAllocOptPass) \
-    YY(LLVMExtraAddLowerPTLSPass) \
-    YY(LLVMExtraAddRemoveNIPass) \
-    YY(LLVMExtraAddGCInvariantVerifierPass) \
-    YY(LLVMExtraAddDemoteFloat16Pass) \
-    YY(LLVMExtraAddCPUFeaturesPass) \
+    YY(jl_build_newpm_pipeline) \
+    YY(jl_register_passbuilder_callbacks) \
+    YY(LLVMExtraMPMAddCPUFeaturesPass) \
+    YY(LLVMExtraMPMAddRemoveNIPass) \
+    YY(LLVMExtraMPMAddMultiVersioningPass) \
+    YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \
+    YY(LLVMExtraMPMAddRemoveAddrspacesPass) \
+    YY(LLVMExtraMPMAddLowerPTLSPass) \
+    YY(LLVMExtraFPMAddDemoteFloat16Pass) \
+    YY(LLVMExtraFPMAddCombineMulAddPass) \
+    YY(LLVMExtraFPMAddLateLowerGCPass) \
+    YY(LLVMExtraFPMAddAllocOptPass) \
+    YY(LLVMExtraFPMAddPropagateJuliaAddrspacesPass) \
+    YY(LLVMExtraFPMAddLowerExcHandlersPass) \
+    YY(LLVMExtraFPMAddGCInvariantVerifierPass) \
+    YY(LLVMExtraFPMAddFinalLowerGCPass) \
+    YY(LLVMExtraLPMAddJuliaLICMPass) \
+    YY(LLVMExtraLPMAddLowerSIMDLoopPass) \
+    YY(JLJITGetLLVMOrcExecutionSession) \
+    YY(JLJITGetJuliaOJIT) \
+    YY(JLJITGetExternalJITDylib) \
+    YY(JLJITAddObjectFile) \
+    YY(JLJITAddLLVMIRModule) \
+    YY(JLJITLookup) \
+    YY(JLJITMangleAndIntern) \
+    YY(JLJITGetTripleString) \
+    YY(JLJITGetGlobalPrefix) \
+    YY(JLJITGetDataLayoutString) \
+    YY(JLJITGetIRCompileLayer) \
+
 
 // end of file
diff --git a/src/jl_uv.c b/src/jl_uv.c
index ab13056b7601f..32adf9f1ba8f2 100644
--- a/src/jl_uv.c
+++ b/src/jl_uv.c
@@ -30,6 +30,74 @@ extern "C" {
 #endif
 
 static uv_async_t signal_async;
+static uv_timer_t wait_empty_worker;
+
+static void walk_print_cb(uv_handle_t *h, void *arg)
+{
+    if (!uv_is_active(h) || !uv_has_ref(h))
+        return;
+    const char *type = uv_handle_type_name(h->type);
+    if (!type)
+        type = "<unknown>";
+    uv_os_fd_t fd;
+    if (h->type == UV_PROCESS)
+        fd = uv_process_get_pid((uv_process_t*)h);
+    else if (uv_fileno(h, &fd))
+        fd = (uv_os_fd_t)-1;
+    const char *pad = "                "; // 16 spaces
+    int npad = fd == -1 ? 0 : snprintf(NULL, 0, "%zd", (size_t)fd);
+    if (npad < 0)
+        npad = 0;
+    npad += strlen(type);
+    pad += npad < strlen(pad) ? npad : strlen(pad);
+    if (fd == -1)
+        jl_safe_printf(" %s   %s%p->%p\n", type,             pad, (void*)h, (void*)h->data);
+    else
+        jl_safe_printf(" %s[%zd] %s%p->%p\n", type, (size_t)fd, pad, (void*)h, (void*)h->data);
+}
+
+static void wait_empty_func(uv_timer_t *t)
+{
+    // make sure this is hidden now, since we would auto-unref it later
+    uv_unref((uv_handle_t*)&signal_async);
+    if (!uv_loop_alive(t->loop))
+        return;
+    jl_safe_printf("\n[pid %zd] waiting for IO to finish:\n"
+                   " Handle type        uv_handle_t->data\n",
+                   (size_t)uv_os_getpid());
+    uv_walk(jl_io_loop, walk_print_cb, NULL);
+    if (jl_generating_output() && jl_options.incremental) {
+        jl_safe_printf("This means that a package has started a background task or event source that has not finished running. For precompilation to complete successfully, the event source needs to be closed explicitly. See the developer documentation on fixing precompilation hangs for more help.\n");
+    }
+    jl_gc_collect(JL_GC_FULL);
+}
+
+void jl_wait_empty_begin(void)
+{
+    JL_UV_LOCK();
+    if (jl_io_loop) {
+        if (wait_empty_worker.type != UV_TIMER) {
+            // try to purge anything that is just waiting for cleanup
+            jl_io_loop->stop_flag = 0;
+            uv_run(jl_io_loop, UV_RUN_NOWAIT);
+            uv_timer_init(jl_io_loop, &wait_empty_worker);
+            uv_unref((uv_handle_t*)&wait_empty_worker);
+        }
+        // make sure this is running
+        uv_update_time(jl_io_loop);
+        uv_timer_start(&wait_empty_worker, wait_empty_func, 10, 15000);
+    }
+    JL_UV_UNLOCK();
+}
+void jl_wait_empty_end(void)
+{
+    // n.b. caller must be holding jl_uv_mutex
+    if (wait_empty_worker.type == UV_TIMER)
+        // make sure this timer is stopped, but not destroyed in case the user calls jl_wait_empty_begin again
+        uv_timer_stop(&wait_empty_worker);
+}
+
+
 
 static void jl_signal_async_cb(uv_async_t *hdl)
 {
@@ -49,7 +117,8 @@ jl_mutex_t jl_uv_mutex;
 void jl_init_uv(void)
 {
     uv_async_init(jl_io_loop, &signal_async, jl_signal_async_cb);
-    JL_MUTEX_INIT(&jl_uv_mutex); // a file-scope initializer can be used instead
+    uv_unref((uv_handle_t*)&signal_async);
+    JL_MUTEX_INIT(&jl_uv_mutex, "jl_uv_mutex"); // a file-scope initializer can be used instead
 }
 
 _Atomic(int) jl_uv_n_waiters = 0;
@@ -60,6 +129,7 @@ void JL_UV_LOCK(void)
     }
     else {
         jl_atomic_fetch_add_relaxed(&jl_uv_n_waiters, 1);
+        jl_fence(); // [^store_buffering_2]
         jl_wake_libuv();
         JL_LOCK(&jl_uv_mutex);
         jl_atomic_fetch_add_relaxed(&jl_uv_n_waiters, -1);
@@ -109,9 +179,12 @@ static void jl_uv_closeHandle(uv_handle_t *handle)
         ct->world_age = last_age;
         return;
     }
-    if (handle == (uv_handle_t*)&signal_async)
+    if (handle == (uv_handle_t*)&wait_empty_worker)
+        handle->type = UV_UNKNOWN_HANDLE;
+    else if (handle == (uv_handle_t*)&signal_async)
         return;
-    free(handle);
+    else
+        free(handle);
 }
 
 static void jl_uv_flush_close_callback(uv_write_t *req, int status)
@@ -212,7 +285,9 @@ JL_DLLEXPORT int jl_process_events(void)
         if (jl_atomic_load_relaxed(&jl_uv_n_waiters) == 0 && jl_mutex_trylock(&jl_uv_mutex)) {
             JL_PROBE_RT_START_PROCESS_EVENTS(ct);
             loop->stop_flag = 0;
+            uv_ref((uv_handle_t*)&signal_async); // force the loop alive
             int r = uv_run(loop, UV_RUN_NOWAIT);
+            uv_unref((uv_handle_t*)&signal_async);
             JL_PROBE_RT_FINISH_PROCESS_EVENTS(ct);
             JL_UV_UNLOCK();
             return r;
@@ -633,13 +708,6 @@ JL_DLLEXPORT void jl_safe_printf(const char *fmt, ...)
     errno = last_errno;
 }
 
-JL_DLLEXPORT void jl_exit(int exitcode)
-{
-    uv_tty_reset_mode();
-    jl_atexit_hook(exitcode);
-    exit(exitcode);
-}
-
 typedef union {
     struct sockaddr in;
     struct sockaddr_in v4;
diff --git a/src/jlapi.c b/src/jlapi.c
index d1fb1e5aacf25..6003aa5cee7d4 100644
--- a/src/jlapi.c
+++ b/src/jlapi.c
@@ -15,6 +15,10 @@
 #include "julia_assert.h"
 #include "julia_internal.h"
 
+#ifdef USE_TRACY
+#include "tracy/TracyC.h"
+#endif
+
 #ifdef __cplusplus
 #include <cfenv>
 extern "C" {
@@ -37,12 +41,12 @@ JL_DLLEXPORT void jl_set_ARGS(int argc, char **argv)
             jl_set_const(jl_core_module, jl_symbol("ARGS"), (jl_value_t*)args);
             JL_GC_POP();
         }
-        assert(jl_array_len(args) == 0);
+        assert(jl_array_nrows(args) == 0);
         jl_array_grow_end(args, argc);
         int i;
         for (i = 0; i < argc; i++) {
             jl_value_t *s = (jl_value_t*)jl_cstr_to_string(argv[i]);
-            jl_arrayset(args, s, i);
+            jl_array_ptr_set(args, i, s);
         }
     }
 }
@@ -96,9 +100,15 @@ JL_DLLEXPORT void jl_init_with_image__threading(const char *julia_bindir,
     jl_init_with_image(julia_bindir, image_relative_path);
 }
 
+static void _jl_exception_clear(jl_task_t *ct) JL_NOTSAFEPOINT
+{
+    ct->ptls->previous_exception = NULL;
+}
+
 JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
 {
     jl_value_t *r;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         const char filename[] = "none";
         jl_value_t *ast = jl_parse_all(str, strlen(str),
@@ -106,10 +116,10 @@ JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
         JL_GC_PUSH1(&ast);
         r = jl_toplevel_eval_in(jl_main_module, ast);
         JL_GC_POP();
-        jl_exception_clear();
+        _jl_exception_clear(ct);
     }
     JL_CATCH {
-        jl_current_task->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception();
         r = NULL;
     }
     return r;
@@ -128,7 +138,7 @@ JL_DLLEXPORT jl_value_t *jl_exception_occurred(void)
 
 JL_DLLEXPORT void jl_exception_clear(void)
 {
-    jl_current_task->ptls->previous_exception = NULL;
+    _jl_exception_clear(jl_current_task);
 }
 
 // get the name of a type as a string
@@ -155,8 +165,11 @@ JL_DLLEXPORT int jl_array_rank(jl_value_t *a)
     return jl_array_ndims(a);
 }
 
-JL_DLLEXPORT size_t jl_array_size(jl_value_t *a, int d)
+JL_DLLEXPORT size_t jl_array_size(jl_array_t *a, int d)
 {
+    // n.b this functions only use was to violate the vector abstraction, so we have to continue to emulate that
+    if (d >= jl_array_ndims(a))
+        return a->ref.mem->length;
     return jl_array_dim(a, d);
 }
 
@@ -181,7 +194,7 @@ JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, uint32_t n
         v = jl_apply(argv, nargs);
         ct->world_age = last_age;
         JL_GC_POP();
-        jl_exception_clear();
+        _jl_exception_clear(ct);
     }
     JL_CATCH {
         ct->ptls->previous_exception = jl_current_exception();
@@ -201,7 +214,7 @@ JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f)
         v = jl_apply_generic(f, NULL, 0);
         ct->world_age = last_age;
         JL_GC_POP();
-        jl_exception_clear();
+        _jl_exception_clear(ct);
     }
     JL_CATCH {
         ct->ptls->previous_exception = jl_current_exception();
@@ -224,7 +237,7 @@ JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a)
         v = jl_apply(argv, 2);
         ct->world_age = last_age;
         JL_GC_POP();
-        jl_exception_clear();
+        _jl_exception_clear(ct);
     }
     JL_CATCH {
         ct->ptls->previous_exception = jl_current_exception();
@@ -248,7 +261,7 @@ JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f, jl_value_t *a, jl_value_t *b
         v = jl_apply(argv, 3);
         ct->world_age = last_age;
         JL_GC_POP();
-        jl_exception_clear();
+        _jl_exception_clear(ct);
     }
     JL_CATCH {
         ct->ptls->previous_exception = jl_current_exception();
@@ -261,6 +274,7 @@ JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
                                   jl_value_t *b, jl_value_t *c)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         jl_value_t **argv;
         JL_GC_PUSHARGS(argv, 4);
@@ -268,16 +282,15 @@ JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
         argv[1] = a;
         argv[2] = b;
         argv[3] = c;
-        jl_task_t *ct = jl_current_task;
         size_t last_age = ct->world_age;
         ct->world_age = jl_get_world_counter();
         v = jl_apply(argv, 4);
         ct->world_age = last_age;
         JL_GC_POP();
-        jl_exception_clear();
+        _jl_exception_clear(ct);
     }
     JL_CATCH {
-        jl_current_task->ptls->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -467,6 +480,11 @@ JL_DLLEXPORT void (jl_cpu_pause)(void)
     jl_cpu_pause();
 }
 
+JL_DLLEXPORT void (jl_cpu_suspend)(void)
+{
+    jl_cpu_suspend();
+}
+
 JL_DLLEXPORT void (jl_cpu_wake)(void)
 {
     jl_cpu_wake();
@@ -560,17 +578,21 @@ static NOINLINE int true_main(int argc, char *argv[])
         (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("_start")) : NULL;
 
     if (start_client) {
+        jl_task_t *ct = jl_current_task;
+        int ret = 1;
         JL_TRY {
-            jl_task_t *ct = jl_current_task;
             size_t last_age = ct->world_age;
             ct->world_age = jl_get_world_counter();
-            jl_apply(&start_client, 1);
+            jl_value_t *r = jl_apply(&start_client, 1);
+            if (jl_typeof(r) != (jl_value_t*)jl_int32_type)
+                jl_type_error("typeassert", (jl_value_t*)jl_int32_type, r);
+            ret = jl_unbox_int32(r);
             ct->world_age = last_age;
         }
         JL_CATCH {
-            jl_no_exc_handler(jl_current_exception());
+            jl_no_exc_handler(jl_current_exception(), ct);
         }
-        return 0;
+        return ret;
     }
 
     // run program if specified, otherwise enter REPL
@@ -674,6 +696,11 @@ static void rr_detach_teleport(void) {
 
 JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
 {
+#ifdef USE_TRACY
+    if (getenv("JULIA_WAIT_FOR_TRACY"))
+        while (!TracyCIsConnected) jl_cpu_pause(); // Wait for connection
+#endif
+
     // no-op on Windows, note that the caller must have already converted
     // from `wchar_t` to `UTF-8` already if we're running on Windows.
     uv_setup_args(argc, argv);
diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm
index f72c79f281480..b46663c560346 100644
--- a/src/jlfrontend.scm
+++ b/src/jlfrontend.scm
@@ -31,6 +31,7 @@
 
 ;; this is overwritten when we run in actual julia
 (define (defined-julia-global v) #f)
+(define (nothrow-julia-global v) #f)
 (define (julia-current-file) 'none)
 (define (julia-current-line) 0)
 
@@ -93,18 +94,38 @@
 
 ;; lowering entry points
 
+; find the first line number in this expression, before we might eliminate them
+(define (first-lineno blk)
+  (cond ((not (pair? blk)) #f)
+        ((eq? (car blk) 'line) blk)
+        ((and (eq? (car blk) 'hygienic-scope) (pair? (cdddr blk)) (pair? (cadddr blk)) (eq? (car (cadddr blk)) 'line))
+         (cadddr blk))
+        ((memq (car blk) '(escape hygienic-scope))
+         (first-lineno (cadr blk)))
+        ((memq (car blk) '(toplevel block))
+           (let loop ((xs (cdr blk)))
+             (and (pair? xs)
+               (let ((elt (first-lineno (car xs))))
+                 (or elt (loop (cdr xs)))))))
+        (else #f)))
+
 ;; return a lambda expression representing a thunk for a top-level expression
 ;; note: expansion of stuff inside module is delayed, so the contents obey
 ;; toplevel expansion order (don't expand until stuff before is evaluated).
 (define (expand-toplevel-expr-- e file line)
-  (let ((ex0 (julia-expand-macroscope e)))
+  (let ((lno (first-lineno e))
+        (ex0 (julia-expand-macroscope e)))
+    (if (and lno (or (not (length= lno 3)) (not (atom? (caddr lno))))) (set! lno #f))
     (if (toplevel-only-expr? ex0)
-        ex0
-        (let* ((ex (julia-expand0 ex0 file line))
+        (if (and (pair? e) (memq (car ex0) '(error incomplete)))
+            ex0
+            (if lno `(toplevel ,lno ,ex0) ex0))
+        (let* ((linenode (if (and lno (or (= line 0) (eq? file 'none))) lno `(line ,line ,file)))
+               (ex (julia-expand0 ex0 linenode))
                (th (julia-expand1
                     `(lambda () ()
                              (scope-block
-                              ,(blockify ex)))
+                              ,(blockify ex lno)))
                     file line)))
           (if (and (null? (cdadr (caddr th)))
                    (and (length= (lam:body th) 2)
@@ -120,7 +141,7 @@
 
 (define (toplevel-only-expr? e)
   (and (pair? e)
-       (or (memq (car e) '(toplevel line module import using export
+       (or (memq (car e) '(toplevel line module import using export public
                                     error incomplete))
            (and (memq (car e) '(global const)) (every symbol? (cdr e))))))
 
@@ -129,7 +150,7 @@
 (define (expand-toplevel-expr e file line)
   (cond ((or (atom? e) (toplevel-only-expr? e))
          (if (underscore-symbol? e)
-             (error "all-underscore identifier used as rvalue"))
+             (error "all-underscore identifiers are write-only and their values cannot be used in expressions"))
          e)
         (else
          (let ((last *in-expand*))
@@ -179,14 +200,9 @@
 
 ;; construct default definitions of `eval` for non-bare modules
 ;; called by jl_eval_module_expr
-(define (module-default-defs e)
+(define (module-default-defs name file line)
   (jl-expand-to-thunk
-   (let* ((name (caddr e))
-          (body (cadddr e))
-          (loc  (if (null? (cdr body)) () (cadr body)))
-          (loc  (if (and (pair? loc) (eq? (car loc) 'line))
-                    (list loc)
-                    '()))
+   (let* ((loc  (if (and (eq? file 'none) (eq? line 0)) '() `((line ,line ,file))))
           (x    (if (eq? name 'x) 'y 'x))
           (mex  (if (eq? name 'mapexpr) 'map_expr 'mapexpr)))
      `(block
@@ -202,7 +218,7 @@
           (block
            ,@loc
            (call (core _call_latest) (top include) ,mex ,name ,x)))))
-   'none 0))
+   file line))
 
 ; run whole frontend on a string. useful for testing.
 (define (fe str)
diff --git a/src/jloptions.c b/src/jloptions.c
index ef5d192322c64..5d627687a38be 100644
--- a/src/jloptions.c
+++ b/src/jloptions.c
@@ -40,6 +40,8 @@ JL_DLLEXPORT void jl_init_options(void)
                         NULL, // cpu_target ("native", "core2", etc...)
                         0,    // nthreadpools
                         0,    // nthreads
+                        0,    // nmarkthreads
+                        0,    // nsweepthreads
                         NULL, // nthreads_per_pool
                         0,    // nprocs
                         NULL, // machine_file
@@ -71,6 +73,7 @@ JL_DLLEXPORT void jl_init_options(void)
                         JL_OPTIONS_HANDLE_SIGNALS_ON,
                         JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES,
                         JL_OPTIONS_USE_COMPILED_MODULES_YES,
+                        JL_OPTIONS_USE_PKGIMAGES_YES,
                         NULL, // bind-to
                         NULL, // output-bc
                         NULL, // output-unopt-bc
@@ -85,6 +88,7 @@ JL_DLLEXPORT void jl_init_options(void)
                         0, // rr-detach
                         0, // strip-metadata
                         0, // strip-ir
+                        0, // permalloc_pkgimg
                         0, // heap-size-hint
     };
     jl_options_initialized = 1;
@@ -92,13 +96,13 @@ JL_DLLEXPORT void jl_init_options(void)
 
 static const char usage[] = "\n    julia [switches] -- [programfile] [args...]\n\n";
 static const char opts[]  =
-    "Switches (a '*' marks the default value, if applicable):\n\n"
+    "Switches (a '*' marks the default value, if applicable; settings marked '($)' may trigger package precompilation):\n\n"
     " -v, --version              Display version information\n"
     " -h, --help                 Print this message (--help-hidden for more)\n"
     " --help-hidden              Uncommon options not shown by `-h`\n\n"
 
     // startup options
-    " --project[={<dir>|@.}]     Set <dir> as the home project/environment\n"
+    " --project[={<dir>|@.}]     Set <dir> as the active project/environment\n"
     " -J, --sysimage <file>      Start up with the given system image file\n"
     " -H, --home <dir>           Set location of `julia` executable\n"
     " --startup-file={yes*|no}   Load `JULIA_DEPOT_PATH/config/startup.jl`; if `JULIA_DEPOT_PATH`\n"
@@ -106,8 +110,10 @@ static const char opts[]  =
     " --handle-signals={yes*|no} Enable or disable Julia's default signal handlers\n"
     " --sysimage-native-code={yes*|no}\n"
     "                            Use native code from system image if available\n"
-    " --compiled-modules={yes*|no}\n"
-    "                            Enable or disable incremental precompilation of modules\n\n"
+    " --compiled-modules={yes*|no|existing}\n"
+    "                            Enable or disable incremental precompilation of modules\n"
+    " --pkgimages={yes*|no|existing}\n"
+    "                            Enable or disable usage of native code caching in the form of pkgimages ($)\n\n"
 
     // actions
     " -e, --eval <expr>          Evaluate <expr>\n"
@@ -125,14 +131,17 @@ static const char opts[]  =
     "                           interface if supported (Linux and Windows) or to the number of CPU\n"
     "                           threads if not supported (MacOS) or if process affinity is not\n"
     "                           configured, and sets M to 1.\n"
+    " --gcthreads=N[,M]         Use N threads for the mark phase of GC and M (0 or 1) threads for the concurrent sweeping phase of GC.\n"
+    "                           N is set to half of the number of compute threads and M is set to 0 if unspecified.\n"
     " -p, --procs {N|auto}      Integer value N launches N additional local worker processes\n"
     "                           \"auto\" launches as many workers as the number of local CPU threads (logical cores)\n"
     " --machine-file <file>     Run processes on hosts listed in <file>\n\n"
 
     // interactive options
-    " -i                         Interactive mode; REPL runs and `isinteractive()` is true\n"
+    " -i, --interactive          Interactive mode; REPL runs and `isinteractive()` is true\n"
     " -q, --quiet                Quiet startup: no banner, suppress REPL warnings\n"
-    " --banner={yes|no|auto*}    Enable or disable startup banner\n"
+    " --banner={yes|no|short|auto*}\n"
+    "                            Enable or disable startup banner\n"
     " --color={yes|no|auto*}     Enable or disable color text\n"
     " --history-file={yes*|no}   Load or save history\n\n"
 
@@ -143,16 +152,16 @@ static const char opts[]  =
 
     // code generation options
     " -C, --cpu-target <target>  Limit usage of CPU features up to <target>; set to `help` to see the available options\n"
-    " -O, --optimize={0,1,2*,3}  Set the optimization level (level 3 if `-O` is used without a level)\n"
+    " -O, --optimize={0,1,2*,3}  Set the optimization level (level 3 if `-O` is used without a level) ($)\n"
     " --min-optlevel={0*,1,2,3}  Set a lower bound on the optimization level\n"
 #ifdef JL_DEBUG_BUILD
-        " -g [{0,1,2*}]              Set the level of debug info generation in the julia-debug build\n"
+        " -g, --debug-info=[{0,1,2*}] Set the level of debug info generation in the julia-debug build ($)\n"
 #else
-        " -g [{0,1*,2}]              Set the level of debug info generation (level 2 if `-g` is used without a level)\n"
+        " -g, --debug-info=[{0,1*,2}] Set the level of debug info generation (level 2 if `-g` is used without a level) ($)\n"
 #endif
     " --inline={yes*|no}         Control whether inlining is permitted, including overriding @inline declarations\n"
     " --check-bounds={yes|no|auto*}\n"
-    "                            Emit bounds checks always, never, or respect @inbounds declarations\n"
+    "                            Emit bounds checks always, never, or respect @inbounds declarations ($)\n"
 #ifdef USE_POLLY
     " --polly={yes*|no}          Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)\n"
 #endif
@@ -204,6 +213,7 @@ static const char opts_hidden[]  =
     " --trace-compile={stderr,name}\n"
     "                          Print precompile statements for methods compiled during execution or save to a path\n"
     " --image-codegen          Force generate code in imaging mode\n"
+    " --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n"
 ;
 
 JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
@@ -239,6 +249,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_banner,
            opt_sysimage_native_code,
            opt_compiled_modules,
+           opt_pkgimages,
            opt_machine_file,
            opt_project,
            opt_bug_report,
@@ -247,6 +258,8 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_strip_metadata,
            opt_strip_ir,
            opt_heap_size_hint,
+           opt_gc_threads,
+           opt_permalloc_pkgimg
     };
     static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:";
     static const struct option longopts[] = {
@@ -256,6 +269,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "version",         no_argument,       0, 'v' },
         { "help",            no_argument,       0, 'h' },
         { "help-hidden",     no_argument,       0, opt_help_hidden },
+        { "interactive",     no_argument,       0, 'i' },
         { "quiet",           no_argument,       0, 'q' },
         { "banner",          required_argument, 0, opt_banner },
         { "home",            required_argument, 0, 'H' },
@@ -266,9 +280,11 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "sysimage",        required_argument, 0, 'J' },
         { "sysimage-native-code", required_argument, 0, opt_sysimage_native_code },
         { "compiled-modules",required_argument, 0, opt_compiled_modules },
+        { "pkgimages",       required_argument, 0, opt_pkgimages },
         { "cpu-target",      required_argument, 0, 'C' },
         { "procs",           required_argument, 0, 'p' },
         { "threads",         required_argument, 0, 't' },
+        { "gcthreads",       required_argument, 0, opt_gc_threads },
         { "machine-file",    required_argument, 0, opt_machine_file },
         { "project",         optional_argument, 0, opt_project },
         { "color",           required_argument, 0, opt_color },
@@ -279,6 +295,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "track-allocation",optional_argument, 0, opt_track_allocation },
         { "optimize",        optional_argument, 0, 'O' },
         { "min-optlevel",    optional_argument, 0, opt_optlevel_min },
+        { "debug-info",      optional_argument, 0, 'g' },
         { "check-bounds",    required_argument, 0, opt_check_bounds },
         { "output-bc",       required_argument, 0, opt_output_bc },
         { "output-unopt-bc", required_argument, 0, opt_output_unopt_bc },
@@ -302,6 +319,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "rr-detach",       no_argument,       0, opt_rr_detach },
         { "strip-metadata",  no_argument,       0, opt_strip_metadata },
         { "strip-ir",        no_argument,       0, opt_strip_ir },
+        { "permalloc-pkgimg",required_argument, 0, opt_permalloc_pkgimg },
         { "heap-size-hint",  required_argument, 0, opt_heap_size_hint },
         { 0, 0, 0, 0 }
     };
@@ -343,11 +361,14 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                             c = o->val;
                             goto restart_switch;
                         }
-                        else if (o->val <= 0xff && strchr(shortopts, o->val)) {
-                            jl_errorf("option `-%c/--%s` is missing an argument", o->val, o->name);
-                        }
                         else {
-                            jl_errorf("option `--%s` is missing an argument", o->name);
+                            const char *problem = o->has_arg ? "is missing an argument" : "does not accept an argument";
+                            if (o->val <= 0xff && strchr(shortopts, o->val)) {
+                                jl_errorf("option `-%c/--%s` %s", o->val, o->name, problem);
+                            }
+                            else {
+                                jl_errorf("option `--%s` %s", o->name, problem);
+                            }
                         }
                     }
                 }
@@ -423,8 +444,10 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_options.banner = 0;
             else if (!strcmp(optarg, "auto"))
                 jl_options.banner = -1;
+            else if (!strcmp(optarg, "short"))
+                jl_options.banner = 2;
             else
-                jl_errorf("julia: invalid argument to --banner={yes|no|auto} (%s)", optarg);
+                jl_errorf("julia: invalid argument to --banner={yes|no|auto|short} (%s)", optarg);
             break;
         case opt_sysimage_native_code:
             if (!strcmp(optarg,"yes"))
@@ -439,8 +462,20 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_options.use_compiled_modules = JL_OPTIONS_USE_COMPILED_MODULES_YES;
             else if (!strcmp(optarg,"no"))
                 jl_options.use_compiled_modules = JL_OPTIONS_USE_COMPILED_MODULES_NO;
+            else if (!strcmp(optarg,"existing"))
+                jl_options.use_compiled_modules = JL_OPTIONS_USE_COMPILED_MODULES_EXISTING;
+            else
+                jl_errorf("julia: invalid argument to --compiled-modules={yes|no|existing} (%s)", optarg);
+            break;
+        case opt_pkgimages:
+            if (!strcmp(optarg,"yes"))
+                jl_options.use_pkgimages = JL_OPTIONS_USE_PKGIMAGES_YES;
+            else if (!strcmp(optarg,"no"))
+                jl_options.use_pkgimages = JL_OPTIONS_USE_PKGIMAGES_NO;
+            else if (!strcmp(optarg,"existing"))
+                jl_options.use_pkgimages = JL_OPTIONS_USE_PKGIMAGES_EXISTING;
             else
-                jl_errorf("julia: invalid argument to --compiled-modules={yes|no} (%s)", optarg);
+                jl_errorf("julia: invalid argument to --pkgimages={yes|no} (%s)", optarg);
             break;
         case 'C': // cpu-target
             jl_options.cpu_target = strdup(optarg);
@@ -790,14 +825,36 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                             break;
                     }
                     jl_options.heap_size_hint = (uint64_t)(value * multiplier);
-
-                    jl_gc_set_max_memory(jl_options.heap_size_hint);
                 }
             }
             if (jl_options.heap_size_hint == 0)
                 jl_errorf("julia: invalid argument to --heap-size-hint without memory size specified");
 
             break;
+        case opt_gc_threads:
+            errno = 0;
+            long nmarkthreads = strtol(optarg, &endptr, 10);
+            if (errno != 0 || optarg == endptr || nmarkthreads < 1 || nmarkthreads >= INT16_MAX) {
+                jl_errorf("julia: --gcthreads=<n>[,<m>]; n must be an integer >= 1");
+            }
+            jl_options.nmarkthreads = (int16_t)nmarkthreads;
+            if (*endptr == ',') {
+                errno = 0;
+                char *endptri;
+                long nsweepthreads = strtol(&endptr[1], &endptri, 10);
+                if (errno != 0 || endptri == &endptr[1] || *endptri != 0 || nsweepthreads < 0 || nsweepthreads > 1)
+                    jl_errorf("julia: --gcthreads=<n>,<m>; m must be 0 or 1");
+                jl_options.nsweepthreads = (int8_t)nsweepthreads;
+            }
+            break;
+        case opt_permalloc_pkgimg:
+            if (!strcmp(optarg,"yes"))
+                jl_options.permalloc_pkgimg = 1;
+            else if (!strcmp(optarg,"no"))
+                jl_options.permalloc_pkgimg = 0;
+            else
+                jl_errorf("julia: invalid argument to --permalloc-pkgimg={yes|no} (%s)", optarg);
+            break;
         default:
             jl_errorf("julia: unhandled option -- %c\n"
                       "This is a bug, please report it.", c);
diff --git a/src/jloptions.h b/src/jloptions.h
index d7be95348f01f..8649c405112d7 100644
--- a/src/jloptions.h
+++ b/src/jloptions.h
@@ -15,6 +15,8 @@ typedef struct {
     const char *cpu_target;
     int8_t nthreadpools;
     int16_t nthreads;
+    int16_t nmarkthreads;
+    int8_t nsweepthreads;
     const int16_t *nthreads_per_pool;
     int32_t nprocs;
     const char *machine_file;
@@ -42,6 +44,7 @@ typedef struct {
     int8_t handle_signals;
     int8_t use_sysimage_native_code;
     int8_t use_compiled_modules;
+    int8_t use_pkgimages;
     const char *bindto;
     const char *outputbc;
     const char *outputunoptbc;
@@ -56,6 +59,7 @@ typedef struct {
     int8_t rr_detach;
     int8_t strip_metadata;
     int8_t strip_ir;
+    int8_t permalloc_pkgimg;
     uint64_t heap_size_hint;
 } jl_options_t;
 
diff --git a/src/jltypes.c b/src/jltypes.c
index 553f07e0d5481..f0f3b36951a2b 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -20,6 +20,7 @@ extern "C" {
 #endif
 
 _Atomic(jl_value_t*) cmpswap_names JL_GLOBALLY_ROOTED;
+jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(*ijl_small_typeof)]; // 16-bit aligned, like the GC
 
 // compute empirical max-probe for a given size
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
@@ -37,85 +38,121 @@ static int typeenv_has(jl_typeenv_t *env, jl_tvar_t *v) JL_NOTSAFEPOINT
     return 0;
 }
 
-static int layout_uses_free_typevars(jl_value_t *v, jl_typeenv_t *env)
+static int typeenv_has_ne(jl_typeenv_t *env, jl_tvar_t *v) JL_NOTSAFEPOINT
 {
-    if (jl_typeis(v, jl_tvar_type))
-        return !typeenv_has(env, (jl_tvar_t*)v);
-    if (jl_is_uniontype(v))
-        return layout_uses_free_typevars(((jl_uniontype_t*)v)->a, env) ||
-               layout_uses_free_typevars(((jl_uniontype_t*)v)->b, env);
-    if (jl_is_vararg(v)) {
-        jl_vararg_t *vm = (jl_vararg_t*)v;
-        if (vm->T && layout_uses_free_typevars(vm->T, env))
-            return 1;
-        if (vm->N && layout_uses_free_typevars(vm->N, env))
-            return 1;
-        return 0;
-    }
-    if (jl_is_unionall(v)) {
-        jl_unionall_t *ua = (jl_unionall_t*)v;
-        jl_typeenv_t newenv = { ua->var, NULL, env };
-        return layout_uses_free_typevars(ua->body, &newenv);
+    while (env != NULL) {
+        if (env->var == v)
+            return env->val != (jl_value_t*)v; // consider it actually not present if it is bound to itself unchanging
+        env = env->prev;
     }
-    if (jl_is_datatype(v)) {
-        jl_datatype_t *dt = (jl_datatype_t*)v;
-        if (dt->layout || dt->isconcretetype || !dt->name->mayinlinealloc)
-            return 0;
-        if (dt->name == jl_namedtuple_typename)
-            return layout_uses_free_typevars(jl_tparam0(dt), env) || layout_uses_free_typevars(jl_tparam1(dt), env);
-        if (dt->name == jl_tuple_typename)
-            // conservative, since we don't want to inline an abstract tuple,
-            // and we currently declare !has_fixed_layout for these, but that
-            // means we also won't be able to inline a tuple which is concrete
-            // except for the use of free type-vars
-            return 1;
-        jl_svec_t *types = jl_get_fieldtypes(dt);
-        size_t i, l = jl_svec_len(types);
-        for (i = 0; i < l; i++) {
-            jl_value_t *ft = jl_svecref(types, i);
-            if (layout_uses_free_typevars(ft, env)) {
-                // This might be inline-alloc, but we don't know the layout
+    return 0;
+}
+
+
+static int layout_uses_free_typevars(jl_value_t *v, jl_typeenv_t *env)
+{
+    while (1) {
+        if (jl_is_typevar(v))
+            return !typeenv_has(env, (jl_tvar_t*)v);
+        while (jl_is_unionall(v)) {
+            jl_unionall_t *ua = (jl_unionall_t*)v;
+            jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+            newenv->var = ua->var;
+            newenv->val = NULL;
+            newenv->prev = env;
+            env = newenv;
+            v = ua->body;
+        }
+        if (jl_is_datatype(v)) {
+            jl_datatype_t *dt = (jl_datatype_t*)v;
+            if (dt->isconcretetype)
+                return 0;
+            if (dt->layout || !dt->name->mayinlinealloc)
+                return 0;
+            if (dt->name == jl_namedtuple_typename)
+                return layout_uses_free_typevars(jl_tparam0(dt), env) || layout_uses_free_typevars(jl_tparam1(dt), env);
+            if (dt->name == jl_tuple_typename)
+                // conservative, since we don't want to inline an abstract tuple,
+                // and we currently declare !has_fixed_layout for these, but that
+                // means we also won't be able to inline a tuple which is concrete
+                // except for the use of free type-vars
                 return 1;
+            jl_svec_t *types = jl_get_fieldtypes(dt);
+            size_t i, l = jl_svec_len(types);
+            for (i = 0; i < l; i++) {
+                jl_value_t *ft = jl_svecref(types, i);
+                if (layout_uses_free_typevars(ft, env))
+                    // This might be inline-alloc, but we don't know the layout
+                    return 1;
             }
+            return 0;
+        }
+        else if (jl_is_uniontype(v)) {
+            if (layout_uses_free_typevars(((jl_uniontype_t*)v)->a, env))
+                return 1;
+           v = ((jl_uniontype_t*)v)->b;
+        }
+        else if (jl_is_vararg(v)) {
+            jl_vararg_t *vm = (jl_vararg_t*)v;
+            if (!vm->T)
+                return 0;
+            if (vm->N && layout_uses_free_typevars(vm->N, env))
+                return 1;
+            v = vm->T;
+        }
+        else {
+            return 0;
         }
     }
-    return 0;
 }
 
 static int has_free_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
 {
-    if (jl_typeis(v, jl_tvar_type)) {
-        return !typeenv_has(env, (jl_tvar_t*)v);
-    }
-    if (jl_is_uniontype(v))
-        return has_free_typevars(((jl_uniontype_t*)v)->a, env) ||
-            has_free_typevars(((jl_uniontype_t*)v)->b, env);
-    if (jl_is_vararg(v)) {
-        jl_vararg_t *vm = (jl_vararg_t*)v;
-        if (vm->T) {
-            if (has_free_typevars(vm->T, env))
-                return 1;
-            return vm->N && has_free_typevars(vm->N, env);
+    while (1) {
+        if (jl_is_typevar(v)) {
+            return !typeenv_has(env, (jl_tvar_t*)v);
         }
-    }
-    if (jl_is_unionall(v)) {
-        jl_unionall_t *ua = (jl_unionall_t*)v;
-        jl_typeenv_t newenv = { ua->var, NULL, env };
-        return has_free_typevars(ua->var->lb, env) || has_free_typevars(ua->var->ub, env) ||
-            has_free_typevars(ua->body, &newenv);
-    }
-    if (jl_is_datatype(v)) {
-        int expect = ((jl_datatype_t*)v)->hasfreetypevars;
-        if (expect == 0 || env == NULL)
-            return expect;
-        size_t i;
-        for (i = 0; i < jl_nparams(v); i++) {
-            if (has_free_typevars(jl_tparam(v, i), env)) {
+        while (jl_is_unionall(v)) {
+            jl_unionall_t *ua = (jl_unionall_t*)v;
+            if (ua->var->lb != jl_bottom_type && has_free_typevars(ua->var->lb, env))
+                return 1;
+            if (ua->var->ub != (jl_value_t*)jl_any_type && has_free_typevars(ua->var->ub, env))
                 return 1;
+            jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+            newenv->var = ua->var;
+            newenv->val = NULL;
+            newenv->prev = env;
+            env = newenv;
+            v = ua->body;
+        }
+        if (jl_is_datatype(v)) {
+            int expect = ((jl_datatype_t*)v)->hasfreetypevars;
+            if (expect == 0 || env == NULL)
+                return expect;
+            size_t i;
+            for (i = 0; i < jl_nparams(v); i++) {
+                if (has_free_typevars(jl_tparam(v, i), env))
+                    return 1;
             }
+            return 0;
+        }
+        else if (jl_is_uniontype(v)) {
+            if (has_free_typevars(((jl_uniontype_t*)v)->a, env))
+                return 1;
+           v = ((jl_uniontype_t*)v)->b;
+        }
+        else if (jl_is_vararg(v)) {
+            jl_vararg_t *vm = (jl_vararg_t*)v;
+            if (!vm->T)
+                return 0;
+            if (vm->N && has_free_typevars(vm->N, env))
+                return 1;
+            v = vm->T;
+        }
+        else {
+            return 0;
         }
     }
-    return 0;
 }
 
 JL_DLLEXPORT int jl_has_free_typevars(jl_value_t *v) JL_NOTSAFEPOINT
@@ -125,36 +162,48 @@ JL_DLLEXPORT int jl_has_free_typevars(jl_value_t *v) JL_NOTSAFEPOINT
 
 static void find_free_typevars(jl_value_t *v, jl_typeenv_t *env, jl_array_t *out)
 {
-    if (jl_typeis(v, jl_tvar_type)) {
-        if (!typeenv_has(env, (jl_tvar_t*)v))
-            jl_array_ptr_1d_push(out, v);
-    }
-    else if (jl_is_uniontype(v)) {
-        find_free_typevars(((jl_uniontype_t*)v)->a, env, out);
-        find_free_typevars(((jl_uniontype_t*)v)->b, env, out);
-    }
-    else if (jl_is_vararg(v)) {
-        jl_vararg_t *vm = (jl_vararg_t *)v;
-        if (vm->T) {
-            find_free_typevars(vm->T, env, out);
-            if (vm->N) {
+    while (1) {
+        if (jl_is_typevar(v)) {
+            if (!typeenv_has(env, (jl_tvar_t*)v))
+                jl_array_ptr_1d_push(out, v);
+            return;
+        }
+        while (jl_is_unionall(v)) {
+            jl_unionall_t *ua = (jl_unionall_t*)v;
+            if (ua->var->lb != jl_bottom_type)
+                find_free_typevars(ua->var->lb, env, out);
+            if (ua->var->ub != (jl_value_t*)jl_any_type)
+                find_free_typevars(ua->var->ub, env, out);
+            jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+            newenv->var = ua->var;
+            newenv->val = NULL;
+            newenv->prev = env;
+            env = newenv;
+            v = ua->body;
+        }
+        if (jl_is_datatype(v)) {
+            if (!((jl_datatype_t*)v)->hasfreetypevars)
+                return;
+            size_t i;
+            for (i = 0; i < jl_nparams(v); i++)
+                find_free_typevars(jl_tparam(v, i), env, out);
+            return;
+        }
+        else if (jl_is_uniontype(v)) {
+            find_free_typevars(((jl_uniontype_t*)v)->a, env, out);
+            v = ((jl_uniontype_t*)v)->b;
+        }
+        else if (jl_is_vararg(v)) {
+            jl_vararg_t *vm = (jl_vararg_t *)v;
+            if (!vm->T)
+                return;
+            if (vm->N) // this swap the visited order, but we don't mind it
                 find_free_typevars(vm->N, env, out);
-            }
+            v = vm->T;
         }
-    }
-    else if (jl_is_unionall(v)) {
-        jl_unionall_t *ua = (jl_unionall_t*)v;
-        jl_typeenv_t newenv = { ua->var, NULL, env };
-        find_free_typevars(ua->var->lb, env, out);
-        find_free_typevars(ua->var->ub, env, out);
-        find_free_typevars(ua->body, &newenv, out);
-    }
-    else if (jl_is_datatype(v)) {
-        if (!((jl_datatype_t*)v)->hasfreetypevars)
+        else {
             return;
-        size_t i;
-        for (i=0; i < jl_nparams(v); i++)
-            find_free_typevars(jl_tparam(v,i), env, out);
+        }
     }
 }
 
@@ -170,41 +219,55 @@ JL_DLLEXPORT jl_array_t *jl_find_free_typevars(jl_value_t *v)
 // test whether a type has vars bound by the given environment
 static int jl_has_bound_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
 {
-    if (jl_typeis(v, jl_tvar_type))
-        return typeenv_has(env, (jl_tvar_t*)v);
-    if (jl_is_uniontype(v))
-        return jl_has_bound_typevars(((jl_uniontype_t*)v)->a, env) ||
-            jl_has_bound_typevars(((jl_uniontype_t*)v)->b, env);
-    if (jl_is_vararg(v)) {
-        jl_vararg_t *vm = (jl_vararg_t *)v;
-        return vm->T && (jl_has_bound_typevars(vm->T, env) ||
-            (vm->N && jl_has_bound_typevars(vm->N, env)));
-    }
-    if (jl_is_unionall(v)) {
-        jl_unionall_t *ua = (jl_unionall_t*)v;
-        if (jl_has_bound_typevars(ua->var->lb, env) || jl_has_bound_typevars(ua->var->ub, env))
-            return 1;
-        jl_typeenv_t *te = env;
-        while (te != NULL) {
-            if (te->var == ua->var)
-                break;
-            te = te->prev;
+    while (1) {
+        if (jl_is_typevar(v)) {
+            return typeenv_has_ne(env, (jl_tvar_t*)v);
         }
-        if (te) te->var = NULL;  // temporarily remove this var from env
-        int ans = jl_has_bound_typevars(ua->body, env);
-        if (te) te->var = ua->var;
-        return ans;
-    }
-    if (jl_is_datatype(v)) {
-        if (!((jl_datatype_t*)v)->hasfreetypevars)
+        while (jl_is_unionall(v)) {
+            jl_unionall_t *ua = (jl_unionall_t*)v;
+            if (ua->var->lb != jl_bottom_type && jl_has_bound_typevars(ua->var->lb, env))
+                return 1;
+            if (ua->var->ub != (jl_value_t*)jl_any_type && jl_has_bound_typevars(ua->var->ub, env))
+                return 1;
+            // Temporarily remove this var from env if necessary
+            // Note that te might be bound more than once in the env, so
+            // we remove it by setting it to itself in a new env.
+            if (typeenv_has_ne(env, ua->var)) {
+                jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+                newenv->var = ua->var;
+                newenv->val = (jl_value_t*)ua->var;
+                newenv->prev = env;
+                env = newenv;
+            }
+            v = ua->body;
+        }
+        if (jl_is_datatype(v)) {
+            if (!((jl_datatype_t*)v)->hasfreetypevars)
+                return 0;
+            size_t i;
+            for (i = 0; i < jl_nparams(v); i++) {
+                if (jl_has_bound_typevars(jl_tparam(v, i), env))
+                    return 1;
+            }
             return 0;
-        size_t i;
-        for (i=0; i < jl_nparams(v); i++) {
-            if (jl_has_bound_typevars(jl_tparam(v,i), env))
+        }
+        else if (jl_is_uniontype(v)) {
+            if (jl_has_bound_typevars(((jl_uniontype_t*)v)->a, env))
                 return 1;
+           v = ((jl_uniontype_t*)v)->b;
+        }
+        else if (jl_is_vararg(v)) {
+            jl_vararg_t *vm = (jl_vararg_t *)v;
+            if (!vm->T)
+                return 0;
+            if (vm->N && jl_has_bound_typevars(vm->N, env))
+                return 1;
+            v = vm->T;
+        }
+        else {
+            return 0;
         }
     }
-    return 0;
 }
 
 JL_DLLEXPORT int jl_has_typevar(jl_value_t *t, jl_tvar_t *v) JL_NOTSAFEPOINT
@@ -229,7 +292,13 @@ JL_DLLEXPORT int jl_has_typevar_from_unionall(jl_value_t *t, jl_unionall_t *ua)
 
 int jl_has_fixed_layout(jl_datatype_t *dt)
 {
-    if (dt->layout || dt->isconcretetype)
+    if (dt->isconcretetype)
+        return 1;
+    if (jl_is_genericmemory_type(dt)) { // GenericMemory{kind,addrspace,T} uses T for final layout, which is a parameter not a field however
+        // optionally: return !layout_uses_free_typevars(jl_tparam1(dt), env);
+        return 0;
+    }
+    if (dt->layout)
         return 1;
     if (dt->name->abstract)
         return 0;
@@ -252,15 +321,15 @@ int jl_has_fixed_layout(jl_datatype_t *dt)
 int jl_type_mappable_to_c(jl_value_t *ty)
 {
     assert(!jl_is_typevar(ty) && jl_is_type(ty));
+    if (jl_is_array_type(ty) || jl_is_genericmemory_type(ty) ||
+        (jl_is_datatype(ty) && ((jl_datatype_t*)ty)->layout != NULL &&
+            jl_is_layout_opaque(((jl_datatype_t*)ty)->layout)))
+        return 1; // as boxed
     if (jl_is_structtype(ty))
         return jl_has_fixed_layout((jl_datatype_t*)ty) && ((jl_datatype_t*)ty)->name->atomicfields == NULL;
     if (jl_is_primitivetype(ty))
-        return 1;
-    if (ty == (jl_value_t*)jl_any_type || ty == (jl_value_t*)jl_bottom_type)
-        return 1; // as boxed
-    if (jl_is_abstract_ref_type(ty) || jl_is_array_type(ty) ||
-        (jl_is_datatype(ty) && ((jl_datatype_t*)ty)->layout != NULL &&
-            jl_is_layout_opaque(((jl_datatype_t*)ty)->layout)))
+        return 1; // as isbits
+    if (ty == (jl_value_t*)jl_any_type || ty == (jl_value_t*)jl_bottom_type || jl_is_abstract_ref_type(ty))
         return 1; // as boxed
     return 0; // refuse to map Union and UnionAll to C
 }
@@ -272,7 +341,7 @@ JL_DLLEXPORT int jl_get_size(jl_value_t *val, size_t *pnt)
     if (jl_is_long(val)) {
         ssize_t slen = jl_unbox_long(val);
         if (slen < 0)
-            jl_errorf("size or dimension is negative: %d", slen);
+            jl_errorf("size or dimension is negative: %zd", slen);
         *pnt = slen;
         return 1;
     }
@@ -281,28 +350,15 @@ JL_DLLEXPORT int jl_get_size(jl_value_t *val, size_t *pnt)
 
 // --- type union ---
 
-static int count_union_components(jl_value_t **types, size_t n)
-{
-    size_t i, c=0;
-    for(i=0; i < n; i++) {
-        jl_value_t *e = types[i];
-        if (jl_is_uniontype(e)) {
-            jl_uniontype_t *u = (jl_uniontype_t*)e;
-            c += count_union_components(&u->a, 1);
-            c += count_union_components(&u->b, 1);
-        }
-        else {
-            c++;
-        }
-    }
-    return c;
-}
-
 int jl_count_union_components(jl_value_t *v)
 {
-    if (!jl_is_uniontype(v)) return 1;
-    jl_uniontype_t *u = (jl_uniontype_t*)v;
-    return jl_count_union_components(u->a) + jl_count_union_components(u->b);
+    size_t c = 0;
+    while (jl_is_uniontype(v)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)v;
+        c += jl_count_union_components(u->a);
+        v = u->b;
+    }
+    return c + 1;
 }
 
 // Return the `*pi`th element of a nested type union, according to a
@@ -310,16 +366,16 @@ int jl_count_union_components(jl_value_t *v)
 // considered an "element". `*pi` is destroyed in the process.
 static jl_value_t *nth_union_component(jl_value_t *v, int *pi) JL_NOTSAFEPOINT
 {
-    if (!jl_is_uniontype(v)) {
-        if (*pi == 0)
-            return v;
-        (*pi)--;
-        return NULL;
+    while (jl_is_uniontype(v)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)v;
+        jl_value_t *a = nth_union_component(u->a, pi);
+        if (a) return a;
+        v = u->b;
     }
-    jl_uniontype_t *u = (jl_uniontype_t*)v;
-    jl_value_t *a = nth_union_component(u->a, pi);
-    if (a) return a;
-    return nth_union_component(u->b, pi);
+    if (*pi == 0)
+        return v;
+    (*pi)--;
+    return NULL;
 }
 
 jl_value_t *jl_nth_union_component(jl_value_t *v, int i) JL_NOTSAFEPOINT
@@ -330,12 +386,11 @@ jl_value_t *jl_nth_union_component(jl_value_t *v, int i) JL_NOTSAFEPOINT
 // inverse of jl_nth_union_component
 int jl_find_union_component(jl_value_t *haystack, jl_value_t *needle, unsigned *nth) JL_NOTSAFEPOINT
 {
-    if (jl_is_uniontype(haystack)) {
-        if (jl_find_union_component(((jl_uniontype_t*)haystack)->a, needle, nth))
+    while (jl_is_uniontype(haystack)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)haystack;
+        if (jl_find_union_component(u->a, needle, nth))
             return 1;
-        if (jl_find_union_component(((jl_uniontype_t*)haystack)->b, needle, nth))
-            return 1;
-        return 0;
+        haystack = u->b;
     }
     if (needle == haystack)
         return 1;
@@ -343,23 +398,6 @@ int jl_find_union_component(jl_value_t *haystack, jl_value_t *needle, unsigned *
     return 0;
 }
 
-static void flatten_type_union(jl_value_t **types, size_t n, jl_value_t **out, size_t *idx) JL_NOTSAFEPOINT
-{
-    size_t i;
-    for(i=0; i < n; i++) {
-        jl_value_t *e = types[i];
-        if (jl_is_uniontype(e)) {
-            jl_uniontype_t *u = (jl_uniontype_t*)e;
-            flatten_type_union(&u->a, 1, out, idx);
-            flatten_type_union(&u->b, 1, out, idx);
-        }
-        else {
-            out[*idx] = e;
-            (*idx)++;
-        }
-    }
-}
-
 STATIC_INLINE const char *datatype_module_name(jl_value_t *t) JL_NOTSAFEPOINT
 {
     if (((jl_datatype_t*)t)->name->module == NULL)
@@ -420,10 +458,8 @@ static int datatype_name_cmp(jl_value_t *a, jl_value_t *b) JL_NOTSAFEPOINT
 
 // sort singletons first, then DataTypes, then UnionAlls,
 // ties broken alphabetically including module name & type parameters
-static int union_sort_cmp(const void *ap, const void *bp) JL_NOTSAFEPOINT
+static int union_sort_cmp(jl_value_t *a, jl_value_t *b) JL_NOTSAFEPOINT
 {
-    jl_value_t *a = *(jl_value_t**)ap;
-    jl_value_t *b = *(jl_value_t**)bp;
     if (a == NULL)
         return b == NULL ? 0 : 1;
     if (b == NULL)
@@ -458,27 +494,91 @@ static int union_sort_cmp(const void *ap, const void *bp) JL_NOTSAFEPOINT
     }
 }
 
+static int count_union_components(jl_value_t **types, size_t n, int widen)
+{
+    size_t i, c = 0;
+    for (i = 0; i < n; i++) {
+        jl_value_t *e = types[i];
+        while (jl_is_uniontype(e)) {
+            jl_uniontype_t *u = (jl_uniontype_t*)e;
+            c += count_union_components(&u->a, 1, widen);
+            e = u->b;
+        }
+        if (widen && jl_is_unionall(e) && jl_is_uniontype(jl_unwrap_unionall(e))) {
+            jl_uniontype_t *u = (jl_uniontype_t*)jl_unwrap_unionall(e);
+            c += count_union_components(&u->a, 2, widen);
+        }
+        else {
+            c++;
+        }
+    }
+    return c;
+}
+
+static void flatten_type_union(jl_value_t **types, size_t n, jl_value_t **out, size_t *idx, int widen)
+{
+    size_t i;
+    for (i = 0; i < n; i++) {
+        jl_value_t *e = types[i];
+        while (jl_is_uniontype(e)) {
+            jl_uniontype_t *u = (jl_uniontype_t*)e;
+            flatten_type_union(&u->a, 1, out, idx, widen);
+            e = u->b;
+        }
+        if (widen && jl_is_unionall(e) && jl_is_uniontype(jl_unwrap_unionall(e))) {
+            // flatten this UnionAll into place by switching the union and unionall
+            jl_uniontype_t *u = (jl_uniontype_t*)jl_unwrap_unionall(e);
+            size_t old_idx = 0;
+            flatten_type_union(&u->a, 2, out, idx, widen);
+            for (; old_idx < *idx; old_idx++)
+                out[old_idx] = jl_rewrap_unionall(out[old_idx], e);
+        }
+        else {
+            out[*idx] = e;
+            (*idx)++;
+        }
+    }
+}
+
+
+static void isort_union(jl_value_t **a, size_t len) JL_NOTSAFEPOINT
+{
+    size_t i, j;
+    for (i = 1; i < len; i++) {
+        jl_value_t *x = a[i];
+        for (j = i; j > 0; j--) {
+            jl_value_t *y = a[j - 1];
+            if (!(union_sort_cmp(x, y) < 0))
+                break;
+            a[j] = y;
+        }
+        a[j] = x;
+    }
+}
+
 JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
 {
-    if (n == 0) return (jl_value_t*)jl_bottom_type;
+    if (n == 0)
+        return (jl_value_t*)jl_bottom_type;
     size_t i;
-    for(i=0; i < n; i++) {
+    for (i = 0; i < n; i++) {
         jl_value_t *pi = ts[i];
         if (!(jl_is_type(pi) || jl_is_typevar(pi)))
             jl_type_error("Union", (jl_value_t*)jl_type_type, pi);
     }
-    if (n == 1) return ts[0];
+    if (n == 1)
+        return ts[0];
 
-    size_t nt = count_union_components(ts, n);
+    size_t nt = count_union_components(ts, n, 1);
     jl_value_t **temp;
     JL_GC_PUSHARGS(temp, nt+1);
     size_t count = 0;
-    flatten_type_union(ts, n, temp, &count);
+    flatten_type_union(ts, n, temp, &count, 1);
     assert(count == nt);
     size_t j;
-    for(i=0; i < nt; i++) {
-        int has_free = temp[i]!=NULL && jl_has_free_typevars(temp[i]);
-        for(j=0; j < nt; j++) {
+    for (i = 0; i < nt; i++) {
+        int has_free = temp[i] != NULL && jl_has_free_typevars(temp[i]);
+        for (j = 0; j < nt; j++) {
             if (j != i && temp[i] && temp[j]) {
                 if (temp[i] == jl_bottom_type ||
                     temp[j] == (jl_value_t*)jl_any_type ||
@@ -490,7 +590,7 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
             }
         }
     }
-    qsort(temp, nt, sizeof(jl_value_t*), union_sort_cmp);
+    isort_union(temp, nt);
     jl_value_t **ptu = &temp[nt];
     *ptu = jl_bottom_type;
     int k;
@@ -508,6 +608,231 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
     return tu;
 }
 
+// note: this is turned off as `Union` doesn't do such normalization.
+// static int simple_subtype(jl_value_t *a, jl_value_t *b)
+// {
+//     if (jl_is_kind(b) && jl_is_type_type(a) && jl_typeof(jl_tparam0(a)) == b)
+//         return 1;
+//     if (jl_is_typevar(b) && obviously_egal(a, ((jl_tvar_t*)b)->lb))
+//         return 1;
+//     return 0;
+// }
+
+static int simple_subtype2(jl_value_t *a, jl_value_t *b, int hasfree)
+{
+    int subab = 0, subba = 0;
+    if (jl_egal(a, b)) {
+        subab = subba = 1;
+    }
+    else if (a == jl_bottom_type || b == (jl_value_t*)jl_any_type) {
+        subab = 1;
+    }
+    else if (b == jl_bottom_type || a == (jl_value_t*)jl_any_type) {
+        subba = 1;
+    }
+    else if (hasfree) {
+        // subab = simple_subtype(a, b);
+        // subba = simple_subtype(b, a);
+    }
+    else if (jl_is_type_type(a) && jl_is_type_type(b) &&
+             jl_typeof(jl_tparam0(a)) != jl_typeof(jl_tparam0(b))) {
+        // issue #24521: don't merge Type{T} where typeof(T) varies
+    }
+    else if (jl_typeof(a) == jl_typeof(b) && jl_types_egal(a, b)) {
+        subab = subba = 1;
+    }
+    else {
+        subab = jl_subtype(a, b);
+        subba = jl_subtype(b, a);
+    }
+    return subab | (subba<<1);
+}
+
+jl_value_t *simple_union(jl_value_t *a, jl_value_t *b)
+{
+    size_t nta = count_union_components(&a, 1, 1);
+    size_t ntb = count_union_components(&b, 1, 1);
+    size_t nt = nta + ntb;
+    jl_value_t **temp;
+    JL_GC_PUSHARGS(temp, nt+1);
+    size_t count = 0;
+    flatten_type_union(&a, 1, temp, &count, 1);
+    flatten_type_union(&b, 1, temp, &count, 1);
+    assert(count == nt);
+    size_t i, j;
+    size_t ra = nta, rb = ntb;
+    // first remove cross-redundancy and check if `a >: b` or `a <: b`.
+    for (i = 0; i < nta; i++) {
+        if (temp[i] == NULL) continue;
+        int hasfree = jl_has_free_typevars(temp[i]);
+        for (j = nta; j < nt; j++) {
+            if (temp[j] == NULL) continue;
+            int subs = simple_subtype2(temp[i], temp[j], hasfree || jl_has_free_typevars(temp[j]));
+            int subab = subs & 1, subba = subs >> 1;
+            if (subab) {
+                temp[i] = NULL;
+                if (!subba) ra = 0;
+                count--;
+                break;
+            }
+            else if (subba) {
+                temp[j] = NULL;
+                rb = 0;
+                count--;
+            }
+        }
+    }
+    if (count == ra) {
+        JL_GC_POP();
+        return a;
+    }
+    if (count == rb) {
+        JL_GC_POP();
+        return b;
+    }
+    // then remove self-redundancy
+    for (i = 0; i < nt; i++) {
+        int has_free = temp[i] != NULL && jl_has_free_typevars(temp[i]);
+        size_t jmin = i < nta ? 0 : nta;
+        size_t jmax = i < nta ? nta : nt;
+        for (j = jmin; j < jmax; j++) {
+            if (j != i && temp[i] && temp[j]) {
+                if (temp[i] == jl_bottom_type ||
+                    temp[j] == (jl_value_t*)jl_any_type ||
+                    jl_egal(temp[i], temp[j]) ||
+                    (!has_free && !jl_has_free_typevars(temp[j]) &&
+                     // issue #24521: don't merge Type{T} where typeof(T) varies
+                     !(jl_is_type_type(temp[i]) && jl_is_type_type(temp[j]) && jl_typeof(jl_tparam0(temp[i])) != jl_typeof(jl_tparam0(temp[j]))) &&
+                     jl_subtype(temp[i], temp[j]))) {
+                    temp[i] = NULL;
+                }
+            }
+        }
+    }
+    isort_union(temp, nt);
+    temp[nt] = jl_bottom_type;
+    size_t k;
+    for (k = nt; k-- > 0; ) {
+        if (temp[k] != NULL) {
+            if (temp[nt] == jl_bottom_type)
+                temp[nt] = temp[k];
+            else
+                temp[nt] = jl_new_struct(jl_uniontype_type, temp[k], temp[nt]);
+        }
+    }
+    assert(temp[nt] != NULL);
+    jl_value_t *tu = temp[nt];
+    JL_GC_POP();
+    return tu;
+}
+
+int obviously_disjoint(jl_value_t *a, jl_value_t *b, int specificity);
+
+jl_value_t *simple_intersect(jl_value_t *a, jl_value_t *b, int overesi)
+{
+    // Unlike `Union`, we don't unwrap `UnionAll` here to avoid possible widening.
+    size_t nta = count_union_components(&a, 1, 0);
+    size_t ntb = count_union_components(&b, 1, 0);
+    size_t nt = nta + ntb;
+    jl_value_t **temp;
+    JL_GC_PUSHARGS(temp, nt+1);
+    size_t count = 0;
+    flatten_type_union(&a, 1, temp, &count, 0);
+    flatten_type_union(&b, 1, temp, &count, 0);
+    assert(count == nt);
+    size_t i, j;
+    int8_t *stemp = (int8_t *)alloca(count);
+    // first remove disjoint elements.
+    memset(stemp, 0, count);
+    for (i = 0; i < nta; i++) {
+        int hasfree = jl_has_free_typevars(temp[i]);
+        for (j = nta; j < nt; j++) {
+            if (!stemp[i] || !stemp[j]) {
+                int intersect = !hasfree && !jl_has_free_typevars(temp[j]);
+                if (!(intersect ? jl_has_empty_intersection(temp[i], temp[j]) : obviously_disjoint(temp[i], temp[j], 0)))
+                    stemp[i] = stemp[j] = 1;
+            }
+        }
+    }
+    for (i = 0; i < nt; i++) {
+        temp[i] = stemp[i] ? temp[i] : NULL;
+    }
+    // then check subtyping.
+    // stemp[k] == -1 : ∃i temp[k] >:ₛ temp[i]
+    // stemp[k] == 1 : ∃i temp[k] == temp[i]
+    // stemp[k] == 2 : ∃i temp[k] <:ₛ temp[i]
+    memset(stemp, 0, count);
+    int all_disjoint = 1, subs[2] = {1, 1}, rs[2] = {1, 1};
+    for (i = 0; i < nta; i++) {
+        if (temp[i] == NULL) continue;
+        all_disjoint = 0;
+        int hasfree = jl_has_free_typevars(temp[i]);
+        for (j = nta; j < nt; j++) {
+            if (temp[j] == NULL) continue;
+            int subs = simple_subtype2(temp[i], temp[j], hasfree || jl_has_free_typevars(temp[j]));
+            int subab = subs & 1, subba = subs >> 1;
+            if (subba && !subab) {
+                stemp[i] = -1;
+                if (stemp[j] >= 0) stemp[j] = 2;
+            }
+            else if (subab && !subba) {
+                stemp[j] = -1;
+                if (stemp[i] >= 0) stemp[i] = 2;
+            }
+            else if (subs) {
+                if (stemp[i] == 0) stemp[i] = 1;
+                if (stemp[j] == 0) stemp[j] = 1;
+            }
+        }
+    }
+    if (!all_disjoint) {
+        for (i = 0; i < nt; i++) {
+            subs[i >= nta] &= (temp[i] == NULL || stemp[i] > 0);
+            rs[i >= nta] &= (temp[i] != NULL && stemp[i] > 0);
+        }
+        // return a(b) if a(b) <: b(a)
+        if (rs[0]) {
+            JL_GC_POP();
+            return a;
+        }
+        if (rs[1]) {
+            JL_GC_POP();
+            return b;
+        }
+    }
+    // return `Union{}` for `merge_env` if we can't prove `<:` or `>:`
+    if (all_disjoint || (!overesi && !subs[0] && !subs[1])) {
+        JL_GC_POP();
+        return jl_bottom_type;
+    }
+    nt = subs[0] ? nta : subs[1] ? nt  : nt;
+    i  = subs[0] ? 0   : subs[1] ? nta : 0;
+    count = nt - i;
+    if (!subs[0] && !subs[1]) {
+        // prepare for over estimation
+        // only preserve `a` with strict <:, but preserve `b` without strict >:
+        for (j = 0; j < nt; j++) {
+            if (stemp[j] < (j < nta ? 2 : 0))
+                temp[j] = NULL;
+        }
+    }
+    isort_union(&temp[i], count);
+    temp[nt] = jl_bottom_type;
+    size_t k;
+    for (k = nt; k-- > i; ) {
+        if (temp[k] != NULL) {
+            if (temp[nt] == jl_bottom_type)
+                temp[nt] = temp[k];
+            else
+                temp[nt] = jl_new_struct(jl_uniontype_type, temp[k], temp[nt]);
+        }
+    }
+    assert(temp[nt] != NULL);
+    jl_value_t *tu = temp[nt];
+    JL_GC_POP();
+    return tu;
+}
+
 // unionall types -------------------------------------------------------------
 
 JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body)
@@ -515,8 +840,8 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body)
     if (jl_is_vararg(body)) {
         if (jl_options.depwarn) {
             if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR)
-                jl_error("Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).");
-            jl_printf(JL_STDERR, "WARNING: Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\n");
+                jl_error("Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\nYou may need to write `f(x::Vararg{T})` rather than `f(x::Vararg{<:T})` or `f(x::Vararg{T}) where T` instead of `f(x::Vararg{T} where T)`.");
+            jl_printf(JL_STDERR, "WARNING: Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\nYou may need to write `f(x::Vararg{T})` rather than `f(x::Vararg{<:T})` or `f(x::Vararg{T}) where T` instead of `f(x::Vararg{T} where T)`.\n");
         }
         jl_vararg_t *vm = (jl_vararg_t*)body;
         int T_has_tv = vm->T && jl_has_typevar(vm->T, v);
@@ -530,14 +855,14 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body)
         if (T_has_tv) {
             jl_value_t *wrapped = jl_type_unionall(v, vm->T);
             JL_GC_PUSH1(&wrapped);
-            wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N);
+            wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N, 1);
             JL_GC_POP();
             return wrapped;
         }
         else {
             assert(N_has_tv);
             assert(vm->N == (jl_value_t*)v);
-            return (jl_value_t*)jl_wrap_vararg(vm->T, NULL);
+            return (jl_value_t*)jl_wrap_vararg(vm->T, NULL, 1);
         }
     }
     if (!jl_is_type(body) && !jl_is_typevar(body))
@@ -558,7 +883,7 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body)
 static int typekey_eq(jl_datatype_t *tt, jl_value_t **key, size_t n)
 {
     size_t j;
-    // TOOD: This shouldn't be necessary
+    // TODO: This shouldn't be necessary
     JL_GC_PROMISE_ROOTED(tt);
     size_t tnp = jl_nparams(tt);
     if (n != tnp)
@@ -566,8 +891,8 @@ static int typekey_eq(jl_datatype_t *tt, jl_value_t **key, size_t n)
     if (tt->name == jl_type_typename) {
         // for Type{T}, require `typeof(T)` to match also, to avoid incorrect
         // dispatch from changing the type of something.
-        // this should work because `Type`s don't have uids, and aren't the
-        // direct tags of values so we don't rely on pointer equality.
+        // this should work because `Type`s don't need unique pointers, and aren't the
+        // direct tags of values (concrete) so we don't rely on pointer equality.
         jl_value_t *kj = key[0];
         jl_value_t *tj = jl_tparam0(tt);
         return (kj == tj || (jl_typeof(tj) == jl_typeof(kj) && jl_types_equal(tj, kj)));
@@ -576,11 +901,14 @@ static int typekey_eq(jl_datatype_t *tt, jl_value_t **key, size_t n)
         jl_value_t *kj = key[j];
         jl_value_t *tj = jl_svecref(tt->parameters, j);
         if (tj != kj) {
-            // require exact same Type{T}. see e.g. issue #22842
-            if (jl_is_type_type(tj) || jl_is_type_type(kj))
-                return 0;
-            if ((jl_is_concrete_type(tj) || jl_is_concrete_type(kj)) &&
-                jl_type_equality_is_identity(tj, kj))
+            if (tt->name == jl_tuple_typename) {
+                // require exact same Type{T} in covariant context. see e.g. issue #22842
+                // this should work because `Tuple{Type}`s don't need unique pointers, and aren't the
+                // direct tags of values (concrete) so we don't rely on pointer equality.
+                if (jl_is_type_type(tj) || jl_is_type_type(kj))
+                    return 0;
+            }
+            if (jl_type_equality_is_identity(tj, kj))
                 return 0;
             if (!jl_types_equal(tj, kj))
                 return 0;
@@ -594,7 +922,7 @@ static int typekey_eq(jl_datatype_t *tt, jl_value_t **key, size_t n)
 static int typekeyvalue_eq(jl_datatype_t *tt, jl_value_t *key1, jl_value_t **key, size_t n, int leaf)
 {
     size_t j;
-    // TOOD: This shouldn't be necessary
+    // TODO: This shouldn't be necessary
     JL_GC_PROMISE_ROOTED(tt);
     size_t tnp = jl_nparams(tt);
     if (n != tnp)
@@ -642,9 +970,9 @@ static jl_datatype_t *lookup_type_set(jl_svec_t *cache, jl_value_t **key, size_t
     size_t iter = 0;
     do {
         jl_datatype_t *val = jl_atomic_load_relaxed(&tab[index]);
-        if (val == NULL)
+        if ((jl_value_t*)val == jl_nothing)
             return NULL;
-        if ((jl_value_t*)val != jl_nothing && val->hash == hv && typekey_eq(val, key, n))
+        if (val->hash == hv && typekey_eq(val, key, n))
             return val;
         index = (index + 1) & (sz - 1);
         iter++;
@@ -665,9 +993,9 @@ static jl_datatype_t *lookup_type_setvalue(jl_svec_t *cache, jl_value_t *key1, j
     size_t iter = 0;
     do {
         jl_datatype_t *val = jl_atomic_load_relaxed(&tab[index]);
-        if (val == NULL)
+        if ((jl_value_t*)val == jl_nothing)
             return NULL;
-        if ((jl_value_t*)val != jl_nothing && val->hash == hv && typekeyvalue_eq(val, key1, key, n, leaf))
+        if (val->hash == hv && typekeyvalue_eq(val, key1, key, n, leaf))
             return val;
         index = (index + 1) & (sz - 1);
         iter++;
@@ -687,7 +1015,7 @@ static ssize_t lookup_type_idx_linear(jl_svec_t *cache, jl_value_t **key, size_t
     ssize_t i;
     for (i = 0; i < cl; i++) {
         jl_datatype_t *tt = jl_atomic_load_relaxed(&data[i]);
-        if (tt == NULL)
+        if ((jl_value_t*)tt == jl_nothing)
             return ~i;
         if (typekey_eq(tt, key, n))
             return i;
@@ -704,7 +1032,7 @@ static ssize_t lookup_type_idx_linearvalue(jl_svec_t *cache, jl_value_t *key1, j
     ssize_t i;
     for (i = 0; i < cl; i++) {
         jl_datatype_t *tt = jl_atomic_load_relaxed(&data[i]);
-        if (tt == NULL)
+        if ((jl_value_t*)tt == jl_nothing)
             return ~i;
         if (typekeyvalue_eq(tt, key1, key, n, 1))
             return i;
@@ -714,7 +1042,7 @@ static ssize_t lookup_type_idx_linearvalue(jl_svec_t *cache, jl_value_t *key1, j
 
 static jl_value_t *lookup_type(jl_typename_t *tn JL_PROPAGATES_ROOT, jl_value_t **key, size_t n)
 {
-    JL_TIMING(TYPE_CACHE_LOOKUP);
+    JL_TIMING(TYPE_CACHE_LOOKUP, TYPE_CACHE_LOOKUP);
     if (tn == jl_type_typename) {
         assert(n == 1);
         jl_value_t *uw = jl_unwrap_unionall(key[0]);
@@ -735,7 +1063,7 @@ static jl_value_t *lookup_type(jl_typename_t *tn JL_PROPAGATES_ROOT, jl_value_t
 
 static jl_value_t *lookup_typevalue(jl_typename_t *tn, jl_value_t *key1, jl_value_t **key, size_t n, int leaf)
 {
-    JL_TIMING(TYPE_CACHE_LOOKUP);
+    JL_TIMING(TYPE_CACHE_LOOKUP, TYPE_CACHE_LOOKUP);
     unsigned hv = typekeyvalue_hash(tn, key1, key, n, leaf);
     if (hv) {
         jl_svec_t *cache = jl_atomic_load_relaxed(&tn->cache);
@@ -762,7 +1090,7 @@ static int cache_insert_type_set_(jl_svec_t *a, jl_datatype_t *val, uint_t hv, i
     size_t maxprobe = max_probe(sz);
     do {
         jl_value_t *tab_i = jl_atomic_load_relaxed(&tab[index]);
-        if (tab_i == NULL || tab_i == jl_nothing) {
+        if (tab_i == jl_nothing) {
             if (atomic)
                 jl_atomic_store_release(&tab[index], (jl_value_t*)val);
             else
@@ -777,8 +1105,6 @@ static int cache_insert_type_set_(jl_svec_t *a, jl_datatype_t *val, uint_t hv, i
     return 0;
 }
 
-static jl_svec_t *cache_rehash_set(jl_svec_t *a, size_t newsz);
-
 static void cache_insert_type_set(jl_datatype_t *val, uint_t hv)
 {
     jl_svec_t *a = jl_atomic_load_relaxed(&val->name->cache);
@@ -805,17 +1131,17 @@ static void cache_insert_type_set(jl_datatype_t *val, uint_t hv)
     }
 }
 
-static jl_svec_t *cache_rehash_set(jl_svec_t *a, size_t newsz)
+jl_svec_t *cache_rehash_set(jl_svec_t *a, size_t newsz)
 {
     jl_value_t **ol = jl_svec_data(a);
     size_t sz = jl_svec_len(a);
     while (1) {
         size_t i;
-        jl_svec_t *newa = jl_alloc_svec(newsz);
+        jl_svec_t *newa = jl_svec_fill(newsz, jl_nothing);
         JL_GC_PUSH1(&newa);
         for (i = 0; i < sz; i += 1) {
             jl_value_t *val = ol[i];
-            if (val != NULL && val != jl_nothing) {
+            if (val != jl_nothing) {
                 uint_t hv = ((jl_datatype_t*)val)->hash;
                 if (!cache_insert_type_set_(newa, (jl_datatype_t*)val, hv, 0)) {
                     break;
@@ -834,15 +1160,14 @@ static void cache_insert_type_linear(jl_datatype_t *type, ssize_t insert_at)
     jl_svec_t *cache = jl_atomic_load_relaxed(&type->name->linearcache);
     assert(jl_is_svec(cache));
     size_t n = jl_svec_len(cache);
-    if (n == 0 || jl_svecref(cache, n - 1) != NULL) {
-        jl_svec_t *nc = jl_alloc_svec(n < 8 ? 8 : (n*3)>>1);
+    if (n == 0 || jl_svecref(cache, n - 1) != jl_nothing) {
+        jl_svec_t *nc = jl_svec_fill(n < 4 ? 4 : n * 2, jl_nothing);
         memcpy(jl_svec_data(nc), jl_svec_data(cache), sizeof(void*) * n);
         jl_atomic_store_release(&type->name->linearcache, nc);
         jl_gc_wb(type->name, nc);
         cache = nc;
-        n = jl_svec_len(nc);
     }
-    assert(jl_svecref(cache, insert_at) == NULL);
+    assert(jl_svecref(cache, insert_at) == jl_nothing);
     jl_svecset(cache, insert_at, (jl_value_t*)type); // todo: make this an atomic-store
 }
 
@@ -858,7 +1183,7 @@ static int is_cacheable(jl_datatype_t *type)
 
 void jl_cache_type_(jl_datatype_t *type)
 {
-    JL_TIMING(TYPE_CACHE_INSERT);
+    JL_TIMING(TYPE_CACHE_INSERT, TYPE_CACHE_INSERT);
     assert(is_cacheable(type));
     jl_value_t **key = jl_svec_data(type->parameters);
     int n = jl_svec_len(type->parameters);
@@ -892,16 +1217,88 @@ jl_datatype_t *jl_lookup_cache_type_(jl_datatype_t *type)
     return (jl_datatype_t*)lookup_type(type->name, key, n);
 }
 
-JL_DLLEXPORT int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2)
+// compute whether kj might actually be a subtype of something in the cache
+// (which otherwise would normally be comparable with pointer-egal)
+static int maybe_subtype_of_cache(jl_value_t *kj, int covariant) JL_NOTSAFEPOINT
 {
-    if (t1 == t2)
+    jl_value_t *uw = jl_is_unionall(kj) ? jl_unwrap_unionall(kj) : kj;
+    if (jl_is_datatype(uw)) {
+        jl_datatype_t *dt = (jl_datatype_t*)uw;
+        return dt->maybe_subtype_of_cache;
+    }
+    else if (jl_is_uniontype(uw)) {
+        int ca = maybe_subtype_of_cache(((jl_uniontype_t*)uw)->a, covariant);
+        int cb = maybe_subtype_of_cache(((jl_uniontype_t*)uw)->b, covariant);
+        return ca && cb;
+    }
+    else if (uw == jl_bottom_type) {
         return 1;
-    if (!jl_is_datatype(t1) || !jl_is_datatype(t2))
-        return 0;
-    jl_datatype_t *dt1 = (jl_datatype_t *) t1;
-    jl_datatype_t *dt2 = (jl_datatype_t *) t2;
+    }
+    else if (jl_is_typevar(uw) && !covariant) { // assume Tuple's bounds are always degenerate
+        // TODO: improve this bound if we can prove that typeintersect(lb,ub) is a leaftype
+        jl_tvar_t *tv = (jl_tvar_t*)uw;
+        return tv->lb == tv->ub ||
+               tv->lb != jl_bottom_type;
+    }
+    return 1;
+}
+
+// compute whether kj might have a supertype which is actually concrete
+static int has_concrete_supertype(jl_value_t *kj) JL_NOTSAFEPOINT
+{
+    jl_value_t *uw = jl_is_unionall(kj) ? jl_unwrap_unionall(kj) : kj;
+    if (jl_is_datatype(uw)) {
+        jl_datatype_t *dt = (jl_datatype_t*)uw;
+        if (dt->name->abstract && dt->name != jl_type_typename)
+            return 0;
+        if (!dt->maybe_subtype_of_cache)
+            return 0;
+        if (dt->name == jl_tuple_typename) {
+            // check tuple parameters recursively for has_concrete_supertype
+            size_t i, n = jl_nparams(dt);
+            for (i = 0; i < n; i++) {
+                jl_value_t *p = jl_tparam(dt, i);
+                if (jl_is_vararg(p))
+                    p = jl_unwrap_vararg(p);
+                if (!has_concrete_supertype(p))
+                    return 0;
+            }
+        }
+        return 1;
+    }
+    else if (jl_is_uniontype(uw)) {
+        int ca = has_concrete_supertype(((jl_uniontype_t*)uw)->a);
+        int cb = has_concrete_supertype(((jl_uniontype_t*)uw)->b);
+        return ca && cb;
+    }
+    else if (uw == jl_bottom_type) {
+        return 1;
+    }
+    else if (jl_is_typevar(uw)) {
+        jl_tvar_t *tv = (jl_tvar_t*)uw;
+        return has_concrete_supertype(tv->ub);
+    }
+    return 0;
+}
 
-    return dt1->cached_by_hash == dt2->cached_by_hash;
+int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2) JL_NOTSAFEPOINT
+{
+    int c1 = jl_is_concrete_type(t1);
+    int c2 = jl_is_concrete_type(t2);
+    if (c1 && c2) {
+        if (((jl_datatype_t*)t1)->name != jl_tuple_typename)
+            return 1;
+        if (((jl_datatype_t*)t2)->name != jl_tuple_typename)
+            return 1;
+        if (((jl_datatype_t*)t1)->has_concrete_subtype && ((jl_datatype_t*)t2)->has_concrete_subtype)
+            return 1;
+        // e.g. Tuple{Union{}} and Tuple{Int} are both concrete!
+    }
+    if (c1 && !has_concrete_supertype(t2))
+        return 1;
+    if (c2 && !has_concrete_supertype(t1))
+        return 1;
+    return 0;
 }
 
 // type instantiation
@@ -927,7 +1324,7 @@ struct _jl_typestack_t;
 typedef struct _jl_typestack_t jl_typestack_t;
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       jl_typestack_t *stack, jl_typeenv_t *env);
+                                       jl_typestack_t *stack, jl_typeenv_t *env, int check);
 
 // Build an environment mapping a TypeName's parameters to parameter values.
 // This is the environment needed for instantiating a type's supertype and field types.
@@ -935,7 +1332,7 @@ static jl_value_t *inst_datatype_env(jl_value_t *dt, jl_svec_t *p, jl_value_t **
                                      jl_typestack_t *stack, jl_typeenv_t *env, int c)
 {
     if (jl_is_datatype(dt))
-        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env);
+        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env, 1);
     assert(jl_is_unionall(dt));
     jl_unionall_t *ua = (jl_unionall_t*)dt;
     jl_typeenv_t e = { ua->var, iparams[c], env };
@@ -945,7 +1342,7 @@ static jl_value_t *inst_datatype_env(jl_value_t *dt, jl_svec_t *p, jl_value_t **
 jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
 {
     if (tc == (jl_value_t*)jl_anytuple_type)
-        return (jl_value_t*)jl_apply_tuple_type_v(params, n);
+        return jl_apply_tuple_type_v(params, n);
     if (tc == (jl_value_t*)jl_uniontype_type)
         return (jl_value_t*)jl_type_union(params, n);
     size_t i;
@@ -961,8 +1358,12 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
     JL_GC_PUSH1(&tc);
     jl_value_t *tc0 = tc;
     for (i=0; i < n; i++) {
-        if (!jl_is_unionall(tc0))
-            jl_error("too many parameters for type");
+        if (!jl_is_unionall(tc0)){
+            char *typ = "";
+            if (jl_is_datatype(tc0))
+                typ = jl_symbol_name_(((jl_datatype_t*)tc0)->name->name);
+            jl_errorf("too many parameters for type %s", typ);
+        }
         jl_value_t *pi = params[i];
 
         tc0 = ((jl_unionall_t*)tc0)->body;
@@ -1014,6 +1415,15 @@ JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value
     return jl_apply_type(tc, args, 2);
 }
 
+JL_DLLEXPORT jl_value_t *jl_apply_type3(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2, jl_value_t *p3)
+{
+    jl_value_t *args[3];
+    args[0] = p1;
+    args[1] = p2;
+    args[2] = p3;
+    return jl_apply_type(tc, args, 3);
+}
+
 jl_datatype_t *jl_apply_modify_type(jl_value_t *dt)
 {
     jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2(jl_pair_type, dt, dt);
@@ -1021,7 +1431,7 @@ jl_datatype_t *jl_apply_modify_type(jl_value_t *dt)
     return rettyp;
 }
 
-jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt)
+jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *ty)
 {
     jl_value_t *params[2];
     jl_value_t *names = jl_atomic_load_relaxed(&cmpswap_names);
@@ -1032,24 +1442,13 @@ jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt)
         if (jl_atomic_cmpswap(&cmpswap_names, &names, lnames))
             names = jl_atomic_load_relaxed(&cmpswap_names); // == lnames
     }
-    params[0] = dt;
+    params[0] = ty;
     params[1] = (jl_value_t*)jl_bool_type;
-    jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
-    JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
-    jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_namedtuple_type, names, (jl_value_t*)tuptyp);
-    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
-    return rettyp;
-}
-
-JL_DLLEXPORT jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *v)
-{
-    // TODO: replace with just using NTuple
-    jl_value_t *p = NULL;
-    JL_GC_PUSH1(&p);
-    p = (jl_value_t*)jl_svec_fill(n, v);
-    p = (jl_value_t*)jl_apply_tuple_type((jl_svec_t*)p);
+    jl_value_t *tuptyp = jl_apply_tuple_type_v(params, 2);
+    JL_GC_PUSH1(&tuptyp);
+    jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_namedtuple_type, names, tuptyp);
     JL_GC_POP();
-    return p;
+    return rettyp;
 }
 
 JL_EXTENSION struct _jl_typestack_t {
@@ -1058,7 +1457,7 @@ JL_EXTENSION struct _jl_typestack_t {
 };
 
 static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t *stack, int check);
-static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack);
+static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack, int cacheable);
 
 JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p)
 {
@@ -1066,8 +1465,22 @@ JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p
     return inst_type_w_(u->body, &env, NULL, 1);
 }
 
+jl_unionall_t *jl_rename_unionall(jl_unionall_t *u)
+{
+    jl_tvar_t *v = jl_new_typevar(u->var->name, u->var->lb, u->var->ub);
+    jl_value_t *t = NULL;
+    JL_GC_PUSH2(&v, &t);
+    jl_typeenv_t env = { u->var, (jl_value_t *)v, NULL };
+    t = inst_type_w_(u->body, &env, NULL, 0);
+    t = jl_new_struct(jl_unionall_type, v, t);
+    JL_GC_POP();
+    return (jl_unionall_t*)t;
+}
+
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val)
 {
+    if (val == (jl_value_t*)var)
+        return t;
     jl_typeenv_t env = { var, val, NULL };
     return inst_type_w_(t, &env, NULL, 1);
 }
@@ -1080,12 +1493,36 @@ jl_value_t *jl_unwrap_unionall(jl_value_t *v)
 }
 
 // wrap `t` in the same unionalls that surround `u`
+// where `t` is derived from `u`, so the error checks in jl_type_unionall are unnecessary
 jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u)
 {
     if (!jl_is_unionall(u))
         return t;
-    JL_GC_PUSH1(&t);
     t = jl_rewrap_unionall(t, ((jl_unionall_t*)u)->body);
+    jl_tvar_t *v = ((jl_unionall_t*)u)->var;
+    // normalize `T where T<:S` => S
+    if (t == (jl_value_t*)v)
+        return v->ub;
+    // where var doesn't occur in body just return body
+    if (!jl_has_typevar(t, v))
+        return t;
+    JL_GC_PUSH1(&t);
+    //if (v->lb == v->ub)  // TODO maybe
+    //    t = jl_substitute_var(body, v, v->ub);
+    //else
+    t = jl_new_struct(jl_unionall_type, v, t);
+    JL_GC_POP();
+    return t;
+}
+
+// wrap `t` in the same unionalls that surround `u`
+// where `t` is extended from `u`, so the checks in jl_rewrap_unionall are unnecessary
+jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u)
+{
+    if (!jl_is_unionall(u))
+        return t;
+    t = jl_rewrap_unionall_(t, ((jl_unionall_t*)u)->body);
+    JL_GC_PUSH1(&t);
     t = jl_new_struct(jl_unionall_type, ((jl_unionall_t*)u)->var, t);
     JL_GC_POP();
     return t;
@@ -1110,7 +1547,7 @@ static jl_value_t *lookup_type_stack(jl_typestack_t *stack, jl_datatype_t *tt, s
 }
 
 // stable numbering for types--starts with name->hash, then falls back to objectid
-// sets failed if the hash value isn't stable (if not set on entry)
+// sets *failed if the hash value isn't stable (if this param not set on entry)
 static unsigned type_hash(jl_value_t *kj, int *failed) JL_NOTSAFEPOINT
 {
     jl_value_t *uw = jl_is_unionall(kj) ? jl_unwrap_unionall(kj) : kj;
@@ -1122,32 +1559,21 @@ static unsigned type_hash(jl_value_t *kj, int *failed) JL_NOTSAFEPOINT
                 *failed = 1;
                 return 0;
             }
+            // compute a hash now, only for the parent object we are putting in the cache
             hash = typekey_hash(dt->name, jl_svec_data(dt->parameters), jl_svec_len(dt->parameters), *failed);
         }
         return hash;
     }
     else if (jl_is_typevar(uw)) {
-        if (!*failed) {
-            *failed = 1;
-            return 0;
-        }
         // ignore var and lb, since those might get normalized out in equality testing
         return type_hash(((jl_tvar_t*)uw)->ub, failed);
     }
-    else if (jl_is_vararg(uw)) {
-        if (!*failed) {
-            *failed = 1;
-            return 0;
-        }
-        jl_vararg_t *vm = (jl_vararg_t *)uw;
-        // 0x064eeaab is just a randomly chosen constant
-        return bitmix(type_hash(vm->T ? vm->T : (jl_value_t*)jl_any_type, failed), vm->N ? type_hash(vm->N, failed) : 0x064eeaab);
-    }
     else if (jl_is_uniontype(uw)) {
         if (!*failed) {
             *failed = 1;
             return 0;
         }
+        // compute a hash now, only for the parent object we are putting in the cache
         unsigned hasha = type_hash(((jl_uniontype_t*)uw)->a, failed);
         unsigned hashb = type_hash(((jl_uniontype_t*)uw)->b, failed);
         // use a associative mixing function, with well-defined overflow
@@ -1159,6 +1585,14 @@ static unsigned type_hash(jl_value_t *kj, int *failed) JL_NOTSAFEPOINT
     }
 }
 
+JL_DLLEXPORT uintptr_t jl_type_hash(jl_value_t *v) JL_NOTSAFEPOINT
+{
+    // NOTE: The value of `failed` is purposefully ignored here. The parameter is relevant
+    // for other parts of the internal algorithm but not for exposing to the Julia side.
+    int failed = 0;
+    return type_hash(v, &failed);
+}
+
 static unsigned typekey_hash(jl_typename_t *tn, jl_value_t **key, size_t n, int nofail) JL_NOTSAFEPOINT
 {
     if (tn == jl_type_typename && key[0] == jl_bottom_type)
@@ -1167,9 +1601,21 @@ static unsigned typekey_hash(jl_typename_t *tn, jl_value_t **key, size_t n, int
     unsigned hash = 3;
     int failed = nofail;
     for (j = 0; j < n; j++) {
-        hash = bitmix(type_hash(key[j], &failed), hash);
+        jl_value_t *p = key[j];
+        size_t repeats = 1;
+        if (jl_is_vararg(p)) {
+            jl_vararg_t *vm = (jl_vararg_t*)p;
+            if (vm->N && jl_is_long(vm->N))
+                repeats = jl_unbox_long(vm->N);
+            else
+                hash = bitmix(0x064eeaab, hash); // 0x064eeaab is just a randomly chosen constant
+            p = vm->T ? vm->T : (jl_value_t*)jl_any_type;
+        }
+        unsigned hashp = type_hash(p, &failed);
         if (failed && !nofail)
             return 0;
+        while (repeats--)
+            hash = bitmix(hashp, hash);
     }
     hash = bitmix(~tn->hash, hash);
     return hash ? hash : 1;
@@ -1200,6 +1646,7 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable)
 {
     int istuple = (dt->name == jl_tuple_typename);
     dt->hasfreetypevars = 0;
+    dt->maybe_subtype_of_cache = 1;
     dt->isconcretetype = !dt->name->abstract;
     dt->isdispatchtuple = istuple;
     size_t i, l = jl_nparams(dt);
@@ -1210,30 +1657,41 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable)
             if (dt->hasfreetypevars)
                 dt->isconcretetype = 0;
         }
-        if (istuple && dt->isconcretetype)
-            dt->isconcretetype = (jl_is_datatype(p) && ((jl_datatype_t*)p)->isconcretetype) || p == jl_bottom_type;
-        if (dt->isdispatchtuple) {
-            dt->isdispatchtuple = jl_is_datatype(p) &&
-                ((!jl_is_kind(p) && ((jl_datatype_t*)p)->isconcretetype) ||
-                 (p == (jl_value_t*)jl_typeofbottom_type) || // == Type{Union{}}, so needs to be consistent
-                 (((jl_datatype_t*)p)->name == jl_type_typename && !((jl_datatype_t*)p)->hasfreetypevars));
+        if (istuple) {
+            if (dt->isconcretetype)
+                dt->isconcretetype = (jl_is_datatype(p) && ((jl_datatype_t*)p)->isconcretetype) || p == jl_bottom_type;
+            if (dt->isdispatchtuple) {
+                dt->isdispatchtuple = jl_is_datatype(p) &&
+                    ((!jl_is_kind(p) && ((jl_datatype_t*)p)->isconcretetype) ||
+                     (p == (jl_value_t*)jl_typeofbottom_type) || // == Type{Union{}}, so needs to be consistent
+                     (((jl_datatype_t*)p)->name == jl_type_typename && !((jl_datatype_t*)p)->hasfreetypevars));
+            }
         }
+        if (jl_is_vararg(p))
+            p = ((jl_vararg_t*)p)->T;
         if (istuple && dt->has_concrete_subtype) {
-            if (jl_is_vararg(p))
-                p = ((jl_vararg_t*)p)->T;
-            // tuple types like Tuple{:x} cannot have instances
+            // tuple types like Tuple{:x} and Tuple{Union{}} cannot have instances
             if (p && !jl_is_type(p) && !jl_is_typevar(p))
                 dt->has_concrete_subtype = 0;
+            if (p == jl_bottom_type)
+                dt->has_concrete_subtype = 0;
+        }
+        if (dt->maybe_subtype_of_cache) {
+            dt->maybe_subtype_of_cache = !p || maybe_subtype_of_cache(p, istuple) || !jl_has_free_typevars(p);
         }
     }
+    assert(dt->isconcretetype || dt->isdispatchtuple ? dt->maybe_subtype_of_cache : 1);
     if (dt->name == jl_type_typename) {
-        cacheable = 0; // the cache for Type ignores parameter normalization, so it can't be used as a regular hash
         jl_value_t *p = jl_tparam(dt, 0);
         if (!jl_is_type(p) && !jl_is_typevar(p)) // Type{v} has no subtypes, if v is not a Type
             dt->has_concrete_subtype = 0;
+        dt->maybe_subtype_of_cache = 1;
+        jl_value_t *uw = jl_unwrap_unionall(p);
+        // n.b. the cache for Type ignores parameter normalization except for Typeofwrapper, so it can't be used to make a stable hash value
+        if (!jl_is_datatype(uw) || ((jl_datatype_t*)uw)->name->wrapper != p)
+            cacheable = 0;
     }
     dt->hash = typekey_hash(dt->name, jl_svec_data(dt->parameters), l, cacheable);
-    dt->cached_by_hash = cacheable ? (typekey_hash(dt->name, jl_svec_data(dt->parameters), l, 0) != 0) : (dt->hash != 0);
 }
 
 static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, size_t np)
@@ -1271,7 +1729,7 @@ static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, si
     JL_GC_POP();
 }
 
-static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_GLOBALLY_ROOTED
+static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT JL_GLOBALLY_ROOTED
 {
     t = jl_unwrap_unionall(t);
     if (jl_is_datatype(t))
@@ -1286,47 +1744,56 @@ static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_GLOBALLY
     return NULL;
 }
 
-int _may_substitute_ub(jl_value_t *v, jl_tvar_t *var, int inside_inv, int *cov_count) JL_NOTSAFEPOINT
+static int _may_substitute_ub(jl_value_t *v, jl_tvar_t *var, int inside_inv, int *cov_count) JL_NOTSAFEPOINT
 {
-    if (v == (jl_value_t*)var) {
-        if (inside_inv) {
-            return 0;
+    while (1) {
+        if (v == (jl_value_t*)var) {
+            if (inside_inv) {
+                return 0;
+            }
+            else {
+                (*cov_count)++;
+                return *cov_count <= 1 || jl_is_concrete_type(var->ub);
+            }
         }
-        else {
-            (*cov_count)++;
-            return *cov_count <= 1 || jl_is_concrete_type(var->ub);
+        while (jl_is_unionall(v)) {
+            jl_unionall_t *ua = (jl_unionall_t*)v;
+            if (ua->var == var)
+                return 1;
+            if (ua->var->lb != jl_bottom_type && !_may_substitute_ub(ua->var->lb, var, inside_inv, cov_count))
+                return 0;
+            if (ua->var->ub != (jl_value_t*)jl_any_type && !_may_substitute_ub(ua->var->ub, var, inside_inv, cov_count))
+                return 0;
+            v = ua->body;
         }
-    }
-    else if (jl_is_uniontype(v)) {
-        return _may_substitute_ub(((jl_uniontype_t*)v)->a, var, inside_inv, cov_count) &&
-            _may_substitute_ub(((jl_uniontype_t*)v)->b, var, inside_inv, cov_count);
-    }
-    else if (jl_is_unionall(v)) {
-        jl_unionall_t *ua = (jl_unionall_t*)v;
-        if (ua->var == var)
+        if (jl_is_datatype(v)) {
+            int invar = inside_inv || !jl_is_tuple_type(v);
+            for (size_t i = 0; i < jl_nparams(v); i++) {
+                if (!_may_substitute_ub(jl_tparam(v, i), var, invar, cov_count))
+                    return 0;
+            }
             return 1;
-        return _may_substitute_ub(ua->var->lb, var, inside_inv, cov_count) &&
-            _may_substitute_ub(ua->var->ub, var, inside_inv, cov_count) &&
-            _may_substitute_ub(ua->body, var, inside_inv, cov_count);
-    }
-    else if (jl_is_datatype(v)) {
-        int invar = inside_inv || !jl_is_tuple_type(v);
-        for (size_t i = 0; i < jl_nparams(v); i++) {
-            if (!_may_substitute_ub(jl_tparam(v,i), var, invar, cov_count))
+        }
+        else if (jl_is_uniontype(v)) {
+            // TODO: is !inside_inv, these don't have to share the changes to cov_count
+            if (!_may_substitute_ub(((jl_uniontype_t*)v)->a, var, inside_inv, cov_count))
                 return 0;
+            v = ((jl_uniontype_t*)v)->b;
+        }
+        else if (jl_is_vararg(v)) {
+            jl_vararg_t *va = (jl_vararg_t*)v;
+            if (!va->T)
+                return 1;
+            if (va->N && !_may_substitute_ub(va->N, var, 1, cov_count))
+                return 0;
+            if (!jl_is_concrete_type(var->ub))
+                inside_inv = 1; // treat as invariant inside vararg, for the sake of this algorithm
+            v = va->T;
+        }
+        else {
+            return 1;
         }
     }
-    else if (jl_is_vararg(v)) {
-        jl_vararg_t *va = (jl_vararg_t*)v;
-        int old_count = *cov_count;
-        if (va->T && !_may_substitute_ub(va->T, var, inside_inv, cov_count))
-            return 0;
-        if (*cov_count > old_count && !jl_is_concrete_type(var->ub))
-            return 0;
-        if (va->N && !_may_substitute_ub(va->N, var, 1, cov_count))
-            return 0;
-    }
-    return 1;
 }
 
 // Check whether `var` may be replaced with its upper bound `ub` in `v where var<:ub`
@@ -1334,15 +1801,14 @@ int _may_substitute_ub(jl_value_t *v, jl_tvar_t *var, int inside_inv, int *cov_c
 //  * `var` does not appear in invariant position
 //  * `var` appears at most once (in covariant position) and not in a `Vararg`
 //    unless the upper bound is concrete (diagonal rule)
-int may_substitute_ub(jl_value_t *v, jl_tvar_t *var) JL_NOTSAFEPOINT
+static int may_substitute_ub(jl_value_t *v, jl_tvar_t *var) JL_NOTSAFEPOINT
 {
     int cov_count = 0;
     return _may_substitute_ub(v, var, 0, &cov_count);
 }
 
-jl_value_t *normalize_unionalls(jl_value_t *t)
+static jl_value_t *normalize_unionalls(jl_value_t *t)
 {
-    JL_GC_PUSH1(&t);
     if (jl_is_uniontype(t)) {
         jl_uniontype_t *u = (jl_uniontype_t*)t;
         jl_value_t *a = NULL;
@@ -1358,14 +1824,14 @@ jl_value_t *normalize_unionalls(jl_value_t *t)
     else if (jl_is_unionall(t)) {
         jl_unionall_t *u = (jl_unionall_t*)t;
         jl_value_t *body = normalize_unionalls(u->body);
+        JL_GC_PUSH1(&body);
         if (body != u->body) {
-            JL_GC_PUSH1(&body);
             t = jl_new_struct(jl_unionall_type, u->var, body);
-            JL_GC_POP();
             u = (jl_unionall_t*)t;
         }
 
         if (u->var->lb == u->var->ub || may_substitute_ub(body, u->var)) {
+            body = (jl_value_t*)u;
             JL_TRY {
                 t = jl_instantiate_unionall(u, u->var->ub);
             }
@@ -1374,33 +1840,75 @@ jl_value_t *normalize_unionalls(jl_value_t *t)
                 // (may happen for bounds inconsistent with the wrapper's bounds)
             }
         }
+        JL_GC_POP();
     }
-    JL_GC_POP();
     return t;
 }
 
+// used to expand an NTuple to a flat representation
+static jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *t, int check)
+{
+    jl_value_t *p = NULL;
+    JL_GC_PUSH1(&p);
+    if (check) {
+        // Since we are skipping making the Vararg and skipping checks later,
+        // we inline the checks from jl_wrap_vararg here now
+        if (!jl_valid_type_param(t))
+            jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
+        // jl_wrap_vararg sometimes simplifies the type, so we only do this 1 time, instead of for each n later
+        t = normalize_unionalls(t);
+        p = t;
+        jl_value_t *tw = extract_wrapper(t);
+        if (tw && t != tw && jl_types_equal(t, tw))
+            t = tw;
+        p = t;
+        check = 0; // remember that checks are already done now
+    }
+    p = (jl_value_t*)jl_svec_fill(n, t);
+    p = jl_apply_tuple_type((jl_svec_t*)p, check);
+    JL_GC_POP();
+    return p;
+}
+
 static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals, jl_typeenv_t *prev, jl_typestack_t *stack);
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       jl_typestack_t *stack, jl_typeenv_t *env)
+                                       jl_typestack_t *stack, jl_typeenv_t *env, int check)
 {
     jl_typestack_t top;
     jl_typename_t *tn = dt->name;
     int istuple = (tn == jl_tuple_typename);
     int isnamedtuple = (tn == jl_namedtuple_typename);
-    if (tn != jl_type_typename) {
-        size_t i;
-        for (i = 0; i < ntp; i++)
-            iparams[i] = normalize_unionalls(iparams[i]);
-    }
 
-    // check type cache, if applicable
+    // check if type cache will be applicable
     int cacheable = 1;
     if (istuple) {
         size_t i;
-        for (i = 0; cacheable && i < ntp; i++)
-            if (!jl_is_concrete_type(iparams[i]) && iparams[i] != jl_bottom_type)
+        for (i = 0; i < ntp; i++) {
+            jl_value_t *pi = iparams[i];
+            if (jl_is_vararg(pi) && jl_unwrap_vararg(pi) == jl_bottom_type) {
+                jl_value_t *va1 = jl_unwrap_vararg_num(pi);
+                if (va1 && jl_is_long(va1)) {
+                    ssize_t nt = jl_unbox_long(va1);
+                    if (nt == 0)
+                        va1 = NULL;
+                    else
+                        pi = jl_bottom_type; // trigger errorf below
+                }
+                // This imposes an implicit constraint that va1==0,
+                // so we keep the Vararg if it has a TypeVar
+                if (va1 == NULL) {
+                    p = NULL;
+                    ntp -= 1;
+                    assert(i == ntp);
+                    break;
+                }
+            }
+            if (pi == jl_bottom_type)
+                jl_errorf("Tuple field type cannot be Union{}");
+            if (cacheable && !jl_is_concrete_type(pi))
                 cacheable = 0;
+        }
     }
     else {
         size_t i;
@@ -1408,7 +1916,15 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             if (jl_has_free_typevars(iparams[i]))
                 cacheable = 0;
     }
+    // if applicable, check the cache first for a match
     if (cacheable) {
+        jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp);
+        if (lkup != NULL)
+            return lkup;
+    }
+    // if some normalization might be needed, do that now
+    // it is probably okay to mutate iparams, and we only store globally rooted objects here
+    if (check) {
         size_t i;
         for (i = 0; i < ntp; i++) {
             jl_value_t *pi = iparams[i];
@@ -1416,18 +1932,15 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
                 continue;
             if (jl_is_datatype(pi))
                 continue;
-            if (jl_is_vararg(pi)) {
-                pi = jl_unwrap_vararg(pi);
-                if (jl_has_free_typevars(pi))
-                    continue;
-            }
-            // normalize types equal to wrappers (prepare for wrapper_id)
+            if (jl_is_vararg(pi))
+                // This is already handled in jl_wrap_vararg instead
+                continue;
+            if (!cacheable && jl_has_free_typevars(pi))
+                continue;
+            // normalize types equal to wrappers (prepare for Typeofwrapper)
             jl_value_t *tw = extract_wrapper(pi);
             if (tw && tw != pi && (tn != jl_type_typename || jl_typeof(pi) == jl_typeof(tw)) &&
                     jl_types_equal(pi, tw)) {
-                // This would require some special handling, but is never used at
-                // the moment.
-                assert(!jl_is_vararg(iparams[i]));
                 iparams[i] = tw;
                 if (p) jl_gc_wb(p, tw);
             }
@@ -1437,6 +1950,9 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             // normalize Type{Type{Union{}}} to Type{TypeofBottom}
             iparams[0] = (jl_value_t*)jl_typeofbottom_type;
         }
+    }
+    // then check the cache again, if applicable
+    if (cacheable) {
         jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp);
         if (lkup != NULL)
             return lkup;
@@ -1445,19 +1961,22 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     if (stack_lkup)
         return stack_lkup;
 
-    if (!istuple) {
-        // check parameters against bounds in type definition
+    // check parameters against bounds in type definition
+    // for whether this is even valid
+    if (check && !istuple) {
+        assert(ntp > 0);
         check_datatype_parameters(tn, iparams, ntp);
     }
     else if (ntp == 0 && jl_emptytuple_type != NULL) {
         // empty tuple type case
+        assert(istuple);
         return (jl_value_t*)jl_emptytuple_type;
     }
 
     jl_datatype_t *ndt = NULL;
-    jl_value_t *last = iparams[ntp - 1];
-    JL_GC_PUSH3(&p, &ndt, &last);
+    JL_GC_PUSH2(&p, &ndt);
 
+    jl_value_t *last = iparams[ntp - 1];
     if (istuple && ntp > 0 && jl_is_vararg(last)) {
         // normalize Tuple{..., Vararg{Int, 3}} to Tuple{..., Int, Int, Int}
         jl_value_t *va = jl_unwrap_unionall(last);
@@ -1473,7 +1992,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             if (nt == 0 || !jl_has_free_typevars(va0)) {
                 if (ntp == 1) {
                     JL_GC_POP();
-                    return jl_tupletype_fill(nt, va0);
+                    return jl_tupletype_fill(nt, va0, 0);
                 }
                 size_t i, l;
                 p = jl_alloc_svec(ntp - 1 + nt);
@@ -1482,26 +2001,83 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
                 l = ntp - 1 + nt;
                 for (; i < l; i++)
                     jl_svecset(p, i, va0);
-                jl_value_t *ndt = (jl_value_t*)jl_apply_tuple_type(p);
+                jl_value_t *ndt = jl_apply_tuple_type(p, check);
                 JL_GC_POP();
                 return ndt;
             }
         }
     }
 
+    // try to simplify some type parameters
+    if (check && tn != jl_type_typename) {
+        int changed = 0;
+        if (istuple) // normalization might change Tuple's, but not other types's, cacheable status
+            cacheable = 1;
+        size_t i;
+        for (i = 0; i < ntp; i++) {
+            jl_value_t *pi = iparams[i];
+            jl_value_t *newp = normalize_unionalls(pi);
+            if (newp != pi) {
+                iparams[i] = newp;
+                if (p) jl_gc_wb(p, newp);
+                changed = 1;
+            }
+            if (istuple && cacheable && !jl_is_concrete_type(newp))
+                cacheable = 0;
+        }
+        if (changed) {
+            // If this changed something, we need to check the cache again, in
+            // case we missed the match earlier before the normalizations
+            //
+            // e.g. return inst_datatype_inner(dt, p, iparams, ntp, stack, env, 0);
+            if (cacheable) {
+                jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp);
+                if (lkup != NULL) {
+                    JL_GC_POP();
+                    return lkup;
+                }
+            }
+            jl_value_t *stack_lkup = lookup_type_stack(stack, dt, ntp, iparams);
+            if (stack_lkup) {
+                JL_GC_POP();
+                return stack_lkup;
+            }
+        }
+    }
+
+    // try to reduce duplication in objects (if the caller didn't already check) by
+    // comparing them against a list of objects already known to be globally rooted and
+    // swapping them as possible
+    if (check && jl_global_roots_list != NULL) {
+        for (size_t i = 0; i < ntp; i++) {
+            jl_value_t *pi = iparams[i];
+            if (cacheable || !jl_has_free_typevars(pi)) {
+                pi = jl_as_global_root(pi, cacheable);
+                if (pi != NULL) {
+                    iparams[i] = pi;
+                    if (p) jl_gc_wb(p, pi);
+                }
+            }
+        }
+    }
+
     // move array of instantiated parameters to heap; we need to keep it
     if (p == NULL) {
         p = jl_alloc_svec_uninit(ntp);
-        for (size_t i = 0; i < ntp; i++)
+        for (size_t i = 0; i < ntp; i++) {
             jl_svecset(p, i, iparams[i]);
+        }
     }
 
+    ndt = jl_new_uninitialized_datatype();
+
+    // now that most allocations are done
     // acquire the write lock now that we know we need a new object
     // since we're going to immediately leak it globally via the instantiation stack
     if (cacheable) {
         JL_LOCK(&typecache_lock); // Might GC
         jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp);
-        if (lkup != NULL) {
+        if (lkup) {
             JL_UNLOCK(&typecache_lock); // Might GC
             JL_GC_POP();
             return lkup;
@@ -1509,7 +2085,10 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     }
 
     // create and initialize new type
-    ndt = jl_new_uninitialized_datatype();
+    ndt->isprimitivetype = dt->isprimitivetype;
+    // Usually dt won't have ismutationfree set at this point, but it is
+    // overridden for `Type`, which we handle here.
+    ndt->ismutationfree = dt->ismutationfree;
     // associate these parameters with the new type on
     // the stack, in case one of its field types references it.
     top.tt = (jl_datatype_t*)ndt;
@@ -1551,10 +2130,17 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             ndt->types = jl_emptysvec; // XXX: this is essentially always false
         }
     }
+    else if (tn == jl_genericmemoryref_typename || tn == jl_genericmemory_typename) {
+        jl_value_t *isatomic = jl_svecref(p, 0);
+        if (!jl_is_typevar(isatomic) && !jl_is_symbol(isatomic))
+            jl_type_error_rt("GenericMemory", "isatomic parameter", (jl_value_t*)jl_symbol_type, isatomic);
+        jl_value_t *addrspace = jl_svecref(p, 2);
+        if (!jl_is_typevar(addrspace) && !jl_is_addrspace(addrspace))
+            jl_type_error_rt("GenericMemory", "addrspace parameter", (jl_value_t*)jl_addrspace_type, addrspace);
+    }
 
     jl_datatype_t *primarydt = ((jl_datatype_t*)jl_unwrap_unionall(tn->wrapper));
     jl_precompute_memoized_dt(ndt, cacheable);
-    ndt->size = 0;
     if (primarydt->layout)
         jl_compute_field_offsets(ndt);
 
@@ -1562,7 +2148,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         ndt->super = jl_any_type;
     }
     else if (dt->super) {
-        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)dt->super, env, stack, 1);
+        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)dt->super, env, stack, check);
         jl_gc_wb(ndt, ndt->super);
     }
     jl_svec_t *ftypes = dt->types;
@@ -1586,9 +2172,9 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         else if (cacheable) {
             // recursively instantiate the types of the fields
             if (dt->types == NULL)
-                ndt->types = jl_compute_fieldtypes(ndt, stack);
+                ndt->types = jl_compute_fieldtypes(ndt, stack, cacheable);
             else
-                ndt->types = inst_ftypes(ftypes, env, stack);
+                ndt->types = inst_ftypes(ftypes, env, stack, cacheable);
             jl_gc_wb(ndt, ndt->types);
         }
     }
@@ -1608,19 +2194,19 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     return (jl_value_t*)ndt;
 }
 
-static jl_tupletype_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params)
+static jl_value_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params, int check)
 {
-    return (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL);
+    return inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL, check);
 }
 
-JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type(jl_svec_t *params)
+JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params, int check)
 {
-    return jl_apply_tuple_type_v_(jl_svec_data(params), jl_svec_len(params), params);
+    return jl_apply_tuple_type_v_(jl_svec_data(params), jl_svec_len(params), params, check);
 }
 
-JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np)
+JL_DLLEXPORT jl_value_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np)
 {
-    return jl_apply_tuple_type_v_(p, np, NULL);
+    return jl_apply_tuple_type_v_(p, np, NULL, 1);
 }
 
 jl_tupletype_t *jl_lookup_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size_t nargs, int leaf)
@@ -1649,20 +2235,21 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
             }
             jl_svecset(params, i, ai);
         }
-        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL);
+        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL, 1);
         JL_GC_POP();
     }
     return tt;
 }
 
-static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack)
+static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *stack, int cacheable)
 {
     size_t i;
     size_t lp = jl_svec_len(p);
     jl_svec_t *np = jl_alloc_svec(lp);
-    JL_GC_PUSH1(&np);
+    jl_value_t *pi = NULL;
+    JL_GC_PUSH2(&np, &pi);
     for (i = 0; i < lp; i++) {
-        jl_value_t *pi = jl_svecref(p, i);
+        pi = jl_svecref(p, i);
         JL_TRY {
             pi = inst_type_w_(pi, env, stack, 1);
             if (!jl_is_type(pi) && !jl_is_typevar(pi)) {
@@ -1672,7 +2259,8 @@ static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *s
         JL_CATCH {
             pi = jl_bottom_type;
         }
-        jl_svecset(np, i, pi);
+        jl_value_t *globalpi = jl_as_global_root(pi, cacheable);
+        jl_svecset(np, i, globalpi ? globalpi : pi);
     }
     JL_GC_POP();
     return np;
@@ -1683,13 +2271,15 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
     jl_datatype_t *tt = (jl_datatype_t*)t;
     jl_svec_t *tp = tt->parameters;
     size_t ntp = jl_svec_len(tp);
-    // Instantiate NTuple{3,Int}
+    // Instantiate Tuple{Vararg{T,N}} where T is fixed and N is known, such as Dims{3}
+    // And avoiding allocating the intermediate steps
     // Note this does not instantiate Tuple{Vararg{Int,3}}; that's done in inst_datatype_inner
+    // Note this does not instantiate NTuple{N,T}, since it is unnecessary and inefficient to expand that now
     if (jl_is_va_tuple(tt) && ntp == 1) {
-        // If this is a Tuple{Vararg{T,N}} with known N, expand it to
+        // If this is a Tuple{Vararg{T,N}} with known N and T, expand it to
         // a fixed-length tuple
         jl_value_t *T=NULL, *N=NULL;
-        jl_value_t *va = jl_unwrap_unionall(jl_tparam0(tt));
+        jl_value_t *va = jl_tparam0(tt);
         jl_value_t *ttT = jl_unwrap_vararg(va);
         jl_value_t *ttN = jl_unwrap_vararg_num(va);
         jl_typeenv_t *e = env;
@@ -1700,11 +2290,12 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
                 N = e->val;
             e = e->prev;
         }
-        if (T != NULL && N != NULL && jl_is_long(N)) {
+        if (T != NULL && N != NULL && jl_is_long(N)) { // TODO: && !jl_has_free_typevars(T) to match inst_datatype_inner, or even && jl_is_concrete_type(T)
+            // Since this is skipping jl_wrap_vararg, we inline the checks from it here
             ssize_t nt = jl_unbox_long(N);
             if (nt < 0)
-                jl_errorf("size or dimension is negative: %zd", nt);
-            return (jl_value_t*)jl_tupletype_fill(nt, T);
+                jl_errorf("Vararg length is negative: %zd", nt);
+            return jl_tupletype_fill(nt, T, check);
         }
     }
     jl_value_t **iparams;
@@ -1720,14 +2311,14 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
     int i;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
-        jl_value_t *pi = inst_type_w_(elt, env, stack, 0);
+        jl_value_t *pi = inst_type_w_(elt, env, stack, check);
         iparams[i] = pi;
         if (ip_heap)
             jl_gc_wb(ip_heap, pi);
         bound |= (pi != elt);
     }
     if (bound)
-        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env);
+        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env, check);
     JL_GC_POP();
     return t;
 }
@@ -1780,8 +2371,14 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
         JL_GC_PUSH2(&a, &b);
         b = inst_type_w_(u->b, env, stack, check);
         if (a != u->a || b != u->b) {
-            jl_value_t *uargs[2] = {a, b};
-            t = jl_type_union(uargs, 2);
+            if (check) {
+                jl_value_t *uargs[2] = {a, b};
+                t = jl_type_union(uargs, 2);
+            }
+            else {
+                // fast path for `jl_rename_unionall`.
+                t = jl_new_struct(jl_uniontype_type, a, b);
+            }
         }
         JL_GC_POP();
         return t;
@@ -1797,7 +2394,7 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
                 N = inst_type_w_(v->N, env, stack, check);
         }
         if (T != v->T || N != v->N) {
-            t = (jl_value_t*)jl_wrap_vararg(T, N);
+            t = (jl_value_t*)jl_wrap_vararg(T, N, check);
         }
         JL_GC_POP();
         return t;
@@ -1823,7 +2420,7 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
     }
     // if t's parameters are not bound in the environment, return it uncopied (#9378)
     if (bound)
-        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env);
+        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env, check);
     JL_GC_POP();
     return t;
 }
@@ -1870,39 +2467,47 @@ jl_datatype_t *jl_wrap_Type(jl_value_t *t)
     return (jl_datatype_t*)jl_instantiate_unionall(jl_type_type, t);
 }
 
-jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n)
+jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check)
 {
-    if (n) {
-        if (jl_is_typevar(n)) {
-            // TODO: this is disabled due to #39698; it is also inconsistent
-            // with other similar checks, where we usually only check substituted
-            // values and not the bounds of variables.
-            /*
-            jl_tvar_t *N = (jl_tvar_t*)n;
-            if (!(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type))
-                jl_error("TypeVar in Vararg length must have bounds Union{} and Any");
-            */
-        }
-        else if (!jl_is_long(n)) {
-            jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n);
-        }
-        else if (jl_unbox_long(n) < 0) {
-            jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n));
+    jl_task_t *ct = jl_current_task;
+    JL_GC_PUSH1(&t);
+    if (check) {
+        if (n) {
+            if (jl_is_typevar(n) || jl_is_uniontype(jl_unwrap_unionall(n))) {
+                // TODO: this is disabled due to #39698; it is also inconsistent
+                // with other similar checks, where we usually only check substituted
+                // values and not the bounds of variables.
+                /*
+                jl_tvar_t *N = (jl_tvar_t*)n;
+                if (!(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type))
+                    jl_error("TypeVar in Vararg length must have bounds Union{} and Any");
+                */
+            }
+            else if (!jl_is_long(n)) {
+                jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n);
+            }
+            else if (jl_unbox_long(n) < 0) {
+                jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n));
+            }
         }
-    }
-    if (t) {
-        if (!jl_valid_type_param(t)) {
-            jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
+        if (t) {
+            if (!jl_valid_type_param(t))
+                jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
+            t = normalize_unionalls(t);
+            jl_value_t *tw = extract_wrapper(t);
+            if (tw && t != tw && jl_types_equal(t, tw))
+                t = tw;
         }
     }
-    jl_task_t *ct = jl_current_task;
     jl_vararg_t *vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
+    jl_set_typetagof(vm, jl_vararg_tag, 0);
     vm->T = t;
     vm->N = n;
+    JL_GC_POP();
     return vm;
 }
 
-JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack)
+JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack, int cacheable)
 {
     assert(st->name != jl_namedtuple_typename && st->name != jl_tuple_typename);
     jl_datatype_t *wt = (jl_datatype_t*)jl_unwrap_unionall(st->name->wrapper);
@@ -1922,7 +2527,7 @@ JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_RO
     jl_typestack_t top;
     top.tt = st;
     top.prev = (jl_typestack_t*)stack;
-    st->types = inst_ftypes(wt->types, &env[n - 1], &top);
+    st->types = inst_ftypes(wt->types, &env[n - 1], &top, cacheable);
     jl_gc_wb(st, st->types);
     return st->types;
 }
@@ -1939,7 +2544,7 @@ void jl_reinstantiate_inner_types(jl_datatype_t *t) // can throw!
     if (partial == NULL)
         return;
     if (n == 0) {
-        assert(jl_array_len(partial) == 0);
+        assert(jl_array_nrows(partial) == 0);
         return;
     }
 
@@ -1950,8 +2555,10 @@ void jl_reinstantiate_inner_types(jl_datatype_t *t) // can throw!
         env[i].prev = i == 0 ? NULL : &env[i - 1];
     }
 
-    for (j = 0; j < jl_array_len(partial); j++) {
+    for (j = 0; j < jl_array_nrows(partial); j++) {
         jl_datatype_t *ndt = (jl_datatype_t*)jl_array_ptr_ref(partial, j);
+        if (ndt == NULL)
+            continue;
         assert(jl_unwrap_unionall(ndt->name->wrapper) == (jl_value_t*)t);
         for (i = 0; i < n; i++)
             env[i].val = jl_svecref(ndt->parameters, i);
@@ -1961,17 +2568,21 @@ void jl_reinstantiate_inner_types(jl_datatype_t *t) // can throw!
     }
 
     if (t->types != jl_emptysvec) {
-        for (j = 0; j < jl_array_len(partial); j++) {
+        for (j = 0; j < jl_array_nrows(partial); j++) {
             jl_datatype_t *ndt = (jl_datatype_t*)jl_array_ptr_ref(partial, j);
+            if (ndt == NULL)
+                continue;
             for (i = 0; i < n; i++)
                 env[i].val = jl_svecref(ndt->parameters, i);
             assert(ndt->types == NULL);
-            ndt->types = inst_ftypes(t->types, &env[n - 1], &top);
+            ndt->types = inst_ftypes(t->types, &env[n - 1], &top, 1);
             jl_gc_wb(ndt, ndt->types);
             if (ndt->isconcretetype) { // cacheable
                 jl_compute_field_offsets(ndt);
             }
+            jl_array_ptr_set(partial, j, NULL);
         }
+        t->name->partial = NULL;
     }
     else {
         assert(jl_field_names(t) == jl_emptysvec);
@@ -1986,19 +2597,30 @@ static jl_tvar_t *tvar(const char *name)
                           (jl_value_t*)jl_any_type);
 }
 
+void export_jl_small_typeof(void)
+{
+    memcpy(&jl_small_typeof, &ijl_small_typeof, sizeof(jl_small_typeof));
+}
+
+#define XX(name) \
+    ijl_small_typeof[(jl_##name##_tag << 4) / sizeof(*ijl_small_typeof)] = jl_##name##_type; \
+    jl_##name##_type->smalltag = jl_##name##_tag;
 void jl_init_types(void) JL_GC_DISABLED
 {
     jl_module_t *core = NULL; // will need to be assigned later
 
     // create base objects
     jl_datatype_type = jl_new_uninitialized_datatype();
-    jl_set_typeof(jl_datatype_type, jl_datatype_type);
+    XX(datatype);
     jl_typename_type = jl_new_uninitialized_datatype();
     jl_symbol_type = jl_new_uninitialized_datatype();
+    XX(symbol);
     jl_simplevector_type = jl_new_uninitialized_datatype();
+    XX(simplevector);
     jl_methtable_type = jl_new_uninitialized_datatype();
 
     jl_emptysvec = (jl_svec_t*)jl_gc_permobj(sizeof(void*), jl_simplevector_type);
+    jl_set_typetagof(jl_emptysvec, jl_simplevector_tag, GC_OLD_MARKED);
     jl_svec_set_len_unsafe(jl_emptysvec, 0);
 
     jl_any_type = (jl_datatype_t*)jl_new_abstracttype((jl_value_t*)jl_symbol("Any"), core, NULL, jl_emptysvec);
@@ -2006,8 +2628,9 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_nonfunction_mt = jl_any_type->name->mt;
     jl_any_type->name->mt = NULL;
 
-    jl_type_type = (jl_unionall_t*)jl_new_abstracttype((jl_value_t*)jl_symbol("Type"), core, jl_any_type, jl_emptysvec);
-    jl_type_typename = ((jl_datatype_t*)jl_type_type)->name;
+    jl_datatype_t *type_type = jl_new_abstracttype((jl_value_t*)jl_symbol("Type"), core, jl_any_type, jl_emptysvec);
+    jl_type_type = (jl_unionall_t*)type_type;
+    jl_type_typename = type_type->name;
     jl_type_type_mt = jl_new_method_table(jl_type_typename->name, core);
     jl_type_typename->mt = jl_type_type_mt;
 
@@ -2015,20 +2638,19 @@ void jl_init_types(void) JL_GC_DISABLED
     // NOTE: types are not actually mutable, but we want to ensure they are heap-allocated with stable addresses
     jl_datatype_type->name = jl_new_typename_in(jl_symbol("DataType"), core, 0, 1);
     jl_datatype_type->name->wrapper = (jl_value_t*)jl_datatype_type;
-    jl_datatype_type->super = (jl_datatype_t*)jl_type_type;
+    jl_datatype_type->super = type_type;
     jl_datatype_type->parameters = jl_emptysvec;
-    jl_datatype_type->name->n_uninitialized = 9 - 3;
-    jl_datatype_type->name->names = jl_perm_symsvec(9,
+    jl_datatype_type->name->n_uninitialized = 8 - 3;
+    jl_datatype_type->name->names = jl_perm_symsvec(8,
             "name",
             "super",
             "parameters",
             "types",
             "instance",
             "layout",
-            "size",
             "hash",
-            "flags"); // "hasfreetypevars", "isconcretetype", "isdispatchtuple", "isbitstype", "zeroinit", "has_concrete_subtype", "cached_by_hash"
-    jl_datatype_type->types = jl_svec(9,
+            "flags"); // "hasfreetypevars", "isconcretetype", "isdispatchtuple", "isbitstype", "zeroinit", "has_concrete_subtype", "maybe_subtype_of_cache"
+    jl_datatype_type->types = jl_svec(8,
             jl_typename_type,
             jl_datatype_type,
             jl_simplevector_type,
@@ -2036,10 +2658,11 @@ void jl_init_types(void) JL_GC_DISABLED
             jl_any_type, // instance
             jl_any_type /*jl_voidpointer_type*/,
             jl_any_type /*jl_int32_type*/,
-            jl_any_type /*jl_int32_type*/,
-            jl_any_type /*jl_uint8_type*/);
-    const static uint32_t datatype_constfields[1] = { 0x00000097 }; // (1<<0)|(1<<1)|(1<<2)|(1<<4)|(1<<7)
+            jl_any_type /*jl_uint16_type*/);
+    const static uint32_t datatype_constfields[1] = { 0x00000057 }; // (1<<0)|(1<<1)|(1<<2)|(1<<4)|(1<<6)
+    const static uint32_t datatype_atomicfields[1] = { 0x00000028 }; // (1<<3)|(1<<5)
     jl_datatype_type->name->constfields = datatype_constfields;
+    jl_datatype_type->name->atomicfields = datatype_atomicfields;
     jl_precompute_memoized_dt(jl_datatype_type, 1);
 
     jl_typename_type->name = jl_new_typename_in(jl_symbol("TypeName"), core, 0, 1);
@@ -2055,6 +2678,11 @@ void jl_init_types(void) JL_GC_DISABLED
                                                     "hash", "n_uninitialized",
                                                     "flags", // "abstract", "mutable", "mayinlinealloc",
                                                     "max_methods");
+    const static uint32_t typename_constfields[1] = { 0x00003a27 }; // (1<<0)|(1<<1)|(1<<2)|(1<<5)|(1<<9)|(1<<11)|(1<<12)|(1<<13) ; TODO: put back (1<<3)|(1<<4) in this list
+    const static uint32_t typename_atomicfields[1] = { 0x00000180 }; // (1<<7)|(1<<8)
+    jl_typename_type->name->constfields = typename_constfields;
+    jl_typename_type->name->atomicfields = typename_atomicfields;
+    jl_precompute_memoized_dt(jl_typename_type, 1);
     jl_typename_type->types = jl_svec(15, jl_symbol_type, jl_any_type /*jl_module_type*/,
                                       jl_simplevector_type, jl_any_type/*jl_voidpointer_type*/, jl_any_type/*jl_voidpointer_type*/,
                                       jl_type_type, jl_type_type, jl_simplevector_type, jl_simplevector_type,
@@ -2062,28 +2690,27 @@ void jl_init_types(void) JL_GC_DISABLED
                                       jl_any_type /*jl_long_type*/, jl_any_type /*jl_int32_type*/,
                                       jl_any_type /*jl_uint8_type*/,
                                       jl_any_type /*jl_uint8_type*/);
-    const static uint32_t typename_constfields[1] = { 0x00003a3f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<9)|(1<<11)|(1<<12)|(1<<13)
-    jl_typename_type->name->constfields = typename_constfields;
-    jl_precompute_memoized_dt(jl_typename_type, 1);
 
     jl_methtable_type->name = jl_new_typename_in(jl_symbol("MethodTable"), core, 0, 1);
     jl_methtable_type->name->wrapper = (jl_value_t*)jl_methtable_type;
     jl_methtable_type->name->mt = jl_nonfunction_mt;
     jl_methtable_type->super = jl_any_type;
     jl_methtable_type->parameters = jl_emptysvec;
-    jl_methtable_type->name->n_uninitialized = 12 - 5;
-    jl_methtable_type->name->names = jl_perm_symsvec(12, "name", "defs",
+    jl_methtable_type->name->n_uninitialized = 11 - 6;
+    jl_methtable_type->name->names = jl_perm_symsvec(11, "name", "defs",
                                                      "leafcache", "cache", "max_args",
-                                                     "kwsorter", "module",
-                                                     "backedges", "", "", "offs", "");
-    jl_methtable_type->types = jl_svec(12, jl_symbol_type, jl_any_type, jl_any_type,
-                                       jl_any_type, jl_any_type/*jl_long*/,
-                                       jl_any_type, jl_any_type/*module*/,
-                                       jl_any_type/*any vector*/, jl_any_type/*voidpointer*/, jl_any_type/*int32*/,
-                                       jl_any_type/*uint8*/, jl_any_type/*uint8*/);
-    const static uint32_t methtable_constfields[1] = { 0x00000040 }; // (1<<6);
+                                                     "module", "backedges",
+                                                     "", "", "offs", "");
+    const static uint32_t methtable_constfields[1] = { 0x00000020 }; // (1<<5);
+    const static uint32_t methtable_atomicfields[1] = { 0x0000001e }; // (1<<1)|(1<<2)|(1<<3)|(1<<4);
     jl_methtable_type->name->constfields = methtable_constfields;
+    jl_methtable_type->name->atomicfields = methtable_atomicfields;
     jl_precompute_memoized_dt(jl_methtable_type, 1);
+    jl_methtable_type->types = jl_svec(11, jl_symbol_type, jl_any_type, jl_any_type,
+                                       jl_any_type, jl_any_type/*jl_long*/,
+                                       jl_any_type/*module*/, jl_any_type/*any vector*/,
+                                       jl_any_type/*voidpointer*/, jl_any_type/*int32*/,
+                                       jl_any_type/*uint8*/, jl_any_type/*uint8*/);
 
     jl_symbol_type->name = jl_new_typename_in(jl_symbol("Symbol"), core, 0, 1);
     jl_symbol_type->name->wrapper = (jl_value_t*)jl_symbol_type;
@@ -2093,7 +2720,6 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_symbol_type->name->n_uninitialized = 0;
     jl_symbol_type->name->names = jl_emptysvec;
     jl_symbol_type->types = jl_emptysvec;
-    jl_symbol_type->size = 0;
     jl_precompute_memoized_dt(jl_symbol_type, 1);
 
     jl_simplevector_type->name = jl_new_typename_in(jl_symbol("SimpleVector"), core, 0, 1);
@@ -2113,88 +2739,98 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_astaggedvalue(jl_nothing)->header = ((uintptr_t)jl_nothing_type) | GC_OLD_MARKED;
     jl_nothing_type->instance = jl_nothing;
 
-    jl_datatype_t *type_type = (jl_datatype_t*)jl_type_type;
-    jl_typeofbottom_type = jl_new_datatype(jl_symbol("TypeofBottom"), core, type_type, jl_emptysvec,
-                                         jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
-    jl_bottom_type = jl_new_struct(jl_typeofbottom_type);
-    jl_typeofbottom_type->instance = jl_bottom_type;
-
-    jl_uniontype_type = jl_new_datatype(jl_symbol("Union"), core, type_type, jl_emptysvec,
-                                        jl_perm_symsvec(2, "a", "b"),
-                                        jl_svec(2, jl_any_type, jl_any_type),
-                                        jl_emptysvec, 0, 0, 2);
-
     jl_tvar_type = jl_new_datatype(jl_symbol("TypeVar"), core, jl_any_type, jl_emptysvec,
                                    jl_perm_symsvec(3, "name", "lb", "ub"),
                                    jl_svec(3, jl_symbol_type, jl_any_type, jl_any_type),
                                    jl_emptysvec, 0, 1, 3);
+    XX(tvar);
+    const static uint32_t tvar_constfields[1] = { 0x00000007 }; // all fields are constant, even though TypeVar itself has identity
+    jl_tvar_type->name->constfields = tvar_constfields;
+
+    jl_typeofbottom_type = jl_new_datatype(jl_symbol("TypeofBottom"), core, type_type, jl_emptysvec,
+                                           jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
+    XX(typeofbottom);
+    jl_bottom_type = jl_gc_permobj(0, jl_typeofbottom_type);
+    jl_set_typetagof(jl_bottom_type, jl_typeofbottom_tag, GC_OLD_MARKED);
+    jl_typeofbottom_type->instance = jl_bottom_type;
 
     jl_unionall_type = jl_new_datatype(jl_symbol("UnionAll"), core, type_type, jl_emptysvec,
                                        jl_perm_symsvec(2, "var", "body"),
                                        jl_svec(2, jl_tvar_type, jl_any_type),
                                        jl_emptysvec, 0, 0, 2);
+    XX(unionall);
+    // It seems like we probably usually end up needing the box for kinds (often used in an Any context), so force it to exist
+    jl_unionall_type->name->mayinlinealloc = 0;
+
+    jl_uniontype_type = jl_new_datatype(jl_symbol("Union"), core, type_type, jl_emptysvec,
+                                        jl_perm_symsvec(2, "a", "b"),
+                                        jl_svec(2, jl_any_type, jl_any_type),
+                                        jl_emptysvec, 0, 0, 2);
+    XX(uniontype);
+    // It seems like we probably usually end up needing the box for kinds (often used in an Any context), so force it to exist
+    jl_uniontype_type->name->mayinlinealloc = 0;
+
+    jl_tvar_t *tttvar = tvar("T");
+    type_type->parameters = jl_svec(1, tttvar);
+    jl_precompute_memoized_dt(type_type, 0); // update the hash value ASAP
+    type_type->hasfreetypevars = 1;
+    type_type->ismutationfree = 1;
+    jl_type_typename->wrapper = jl_new_struct(jl_unionall_type, tttvar, (jl_value_t*)jl_type_type);
+    jl_type_type = (jl_unionall_t*)jl_type_typename->wrapper;
 
     jl_vararg_type = jl_new_datatype(jl_symbol("TypeofVararg"), core, jl_any_type, jl_emptysvec,
                                             jl_perm_symsvec(2, "T", "N"),
                                             jl_svec(2, jl_any_type, jl_any_type),
                                             jl_emptysvec, 0, 0, 0);
+    XX(vararg);
+    // It seems like we probably usually end up needing the box for kinds (often used in an Any context), so force it to exist
+    jl_vararg_type->name->mayinlinealloc = 0;
 
-    jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL));
+    jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL, 0));
     jl_anytuple_type = jl_new_datatype(jl_symbol("Tuple"), core, jl_any_type, anytuple_params,
                                        jl_emptysvec, anytuple_params, jl_emptysvec, 0, 0, 0);
     jl_tuple_typename = jl_anytuple_type->name;
     // fix some miscomputed values, since we didn't know this was going to be a Tuple in jl_precompute_memoized_dt
     jl_tuple_typename->wrapper = (jl_value_t*)jl_anytuple_type; // remove UnionAll wrappers
     jl_anytuple_type->isconcretetype = 0;
+    jl_anytuple_type->maybe_subtype_of_cache = 0;
     jl_anytuple_type->layout = NULL;
-    jl_anytuple_type->size = 0;
-    jl_anytuple_type->cached_by_hash = 0;
-
-    jl_tvar_t *tttvar = tvar("T");
-    ((jl_datatype_t*)jl_type_type)->parameters = jl_svec(1, tttvar);
-    ((jl_datatype_t*)jl_type_type)->hasfreetypevars = 1;
-    ((jl_datatype_t*)jl_type_type)->cached_by_hash = 0;
-    jl_type_typename->wrapper = jl_new_struct(jl_unionall_type, tttvar, (jl_value_t*)jl_type_type);
-    jl_type_type = (jl_unionall_t*)jl_type_typename->wrapper;
 
     jl_typeofbottom_type->super = jl_wrap_Type(jl_bottom_type);
-
-    jl_emptytuple_type = jl_apply_tuple_type(jl_emptysvec);
+    jl_emptytuple_type = (jl_datatype_t*)jl_apply_tuple_type(jl_emptysvec, 0);
     jl_emptytuple = jl_gc_permobj(0, jl_emptytuple_type);
     jl_emptytuple_type->instance = jl_emptytuple;
 
     // non-primitive definitions follow
     jl_int32_type = jl_new_primitivetype((jl_value_t*)jl_symbol("Int32"), core,
                                          jl_any_type, jl_emptysvec, 32);
+    XX(int32);
     jl_int64_type = jl_new_primitivetype((jl_value_t*)jl_symbol("Int64"), core,
                                          jl_any_type, jl_emptysvec, 64);
+    XX(int64);
     jl_uint32_type = jl_new_primitivetype((jl_value_t*)jl_symbol("UInt32"), core,
                                           jl_any_type, jl_emptysvec, 32);
+    XX(uint32);
     jl_uint64_type = jl_new_primitivetype((jl_value_t*)jl_symbol("UInt64"), core,
                                           jl_any_type, jl_emptysvec, 64);
+    XX(uint64);
     jl_uint8_type = jl_new_primitivetype((jl_value_t*)jl_symbol("UInt8"), core,
                                          jl_any_type, jl_emptysvec, 8);
+    XX(uint8);
     jl_uint16_type = jl_new_primitivetype((jl_value_t*)jl_symbol("UInt16"), core,
                                           jl_any_type, jl_emptysvec, 16);
+    XX(uint16);
 
     jl_ssavalue_type = jl_new_datatype(jl_symbol("SSAValue"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(1, "id"),
                                        jl_svec1(jl_long_type),
                                        jl_emptysvec, 0, 0, 1);
 
-    jl_abstractslot_type = jl_new_abstracttype((jl_value_t*)jl_symbol("Slot"), core, jl_any_type,
-                                               jl_emptysvec);
-
-    jl_slotnumber_type = jl_new_datatype(jl_symbol("SlotNumber"), core, jl_abstractslot_type, jl_emptysvec,
+    jl_slotnumber_type = jl_new_datatype(jl_symbol("SlotNumber"), core, jl_any_type, jl_emptysvec,
                                          jl_perm_symsvec(1, "id"),
                                          jl_svec1(jl_long_type),
                                          jl_emptysvec, 0, 0, 1);
 
-    jl_typedslot_type = jl_new_datatype(jl_symbol("TypedSlot"), core, jl_abstractslot_type, jl_emptysvec,
-                                        jl_perm_symsvec(2, "id", "typ"),
-                                        jl_svec(2, jl_long_type, jl_any_type),
-                                        jl_emptysvec, 0, 0, 2);
-
     jl_argument_type = jl_new_datatype(jl_symbol("Argument"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(1, "n"),
                                        jl_svec1(jl_long_type),
@@ -2205,14 +2841,16 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_bool_type = NULL;
     jl_bool_type = jl_new_primitivetype((jl_value_t*)jl_symbol("Bool"), core,
                                         jl_any_type, jl_emptysvec, 8);
-    jl_false = jl_permbox8(jl_bool_type, 0);
-    jl_true  = jl_permbox8(jl_bool_type, 1);
+    XX(bool);
+    jl_false = jl_permbox8(jl_bool_type, jl_bool_tag, 0);
+    jl_true  = jl_permbox8(jl_bool_type, jl_bool_tag, 1);
 
     jl_abstractstring_type = jl_new_abstracttype((jl_value_t*)jl_symbol("AbstractString"), core, jl_any_type, jl_emptysvec);
     jl_string_type = jl_new_datatype(jl_symbol("String"), core, jl_abstractstring_type, jl_emptysvec,
                                      jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
+    XX(string);
     jl_string_type->instance = NULL;
-    jl_compute_field_offsets(jl_string_type);
+    jl_compute_field_offsets(jl_string_type); // re-compute now that we assigned jl_string_type
     jl_an_empty_string = jl_pchar_to_string("\0", 1);
     *(size_t*)jl_an_empty_string = 0;
 
@@ -2234,6 +2872,8 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_any_type),
                         jl_emptysvec,
                         0, 1, 6);
+    const static uint32_t typemap_level_atomicfields[1] = { 0x0000003f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<5)
+    jl_typemap_level_type->name->atomicfields = typemap_level_atomicfields;
 
     jl_typemap_entry_type =
         jl_new_datatype(jl_symbol("TypeMapEntry"), core, jl_any_type, jl_emptysvec,
@@ -2261,15 +2901,69 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type),
                         jl_emptysvec,
                         0, 1, 4);
-    const static uint32_t typemap_entry_constfields[1] = { 0x000003fe }; // (1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9);
+    const static uint32_t typemap_entry_constfields[1] = { 0x000003fe }; // (1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)
+    const static uint32_t typemap_entry_atomicfields[1] = { 0x00000001 }; // (1<<0)
     jl_typemap_entry_type->name->constfields = typemap_entry_constfields;
+    jl_typemap_entry_type->name->atomicfields = typemap_entry_atomicfields;
 
     jl_function_type = jl_new_abstracttype((jl_value_t*)jl_symbol("Function"), core, jl_any_type, jl_emptysvec);
     jl_builtin_type  = jl_new_abstracttype((jl_value_t*)jl_symbol("Builtin"), core, jl_function_type, jl_emptysvec);
     jl_function_type->name->mt = NULL; // subtypes of Function have independent method tables
     jl_builtin_type->name->mt = NULL;  // so they don't share the Any type table
 
-    jl_svec_t *tv = jl_svec2(tvar("T"), tvar("N"));
+    jl_svec_t *tv;
+
+    jl_module_type =
+        jl_new_datatype(jl_symbol("Module"), core, jl_any_type, jl_emptysvec,
+                        jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
+    XX(module);
+    assert(jl_module_type->instance == NULL);
+    jl_compute_field_offsets(jl_module_type);
+
+    jl_binding_type =
+        jl_new_datatype(jl_symbol("Binding"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(5, "value", "globalref", "owner", "ty", "flags"),
+                        jl_svec(5, jl_any_type, jl_any_type/*jl_globalref_type*/, jl_any_type/*jl_binding_type*/, jl_type_type, jl_uint8_type),
+                        jl_emptysvec, 0, 1, 0);
+    const static uint32_t binding_atomicfields[] = { 0x0015 }; // Set fields 1, 3, 4 as atomic
+    jl_binding_type->name->atomicfields = binding_atomicfields;
+    const static uint32_t binding_constfields[] = { 0x0002 }; // Set fields 2 as constant
+    jl_binding_type->name->constfields = binding_constfields;
+
+    jl_globalref_type =
+        jl_new_datatype(jl_symbol("GlobalRef"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(3, "mod", "name", "binding"),
+                        jl_svec(3, jl_module_type, jl_symbol_type, jl_binding_type),
+                        jl_emptysvec, 0, 0, 3);
+
+    core = jl_new_module(jl_symbol("Core"), NULL);
+    core->parent = core;
+    jl_type_typename->mt->module = core;
+    jl_core_module = core;
+    core = NULL; // not ready yet to use
+
+    tv = jl_svec1(tvar("Backend"));
+    jl_addrspace_typename =
+        jl_new_primitivetype((jl_value_t*)jl_symbol("AddrSpace"), core, jl_any_type, tv, 8)->name;
+    jl_addrspace_type = (jl_unionall_t*)jl_addrspace_typename->wrapper;
+    jl_addrspacecore_type = (jl_datatype_t*)jl_apply_type1((jl_value_t*)jl_addrspace_type, (jl_value_t*)jl_core_module);
+    jl_value_t *cpumem = jl_permbox8(jl_addrspacecore_type, 0, 0);
+
+    tv = jl_svec1(tvar("T"));
+    jl_ref_type = (jl_unionall_t*)
+        jl_new_abstracttype((jl_value_t*)jl_symbol("Ref"), core, jl_any_type, tv)->name->wrapper;
+
+    tv = jl_svec1(tvar("T"));
+    jl_pointer_typename =
+        jl_new_primitivetype((jl_value_t*)jl_symbol("Ptr"), core,
+                             (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_ref_type, jl_svec_data(tv), 1),
+                             tv,
+                             sizeof(void*) * 8)->name;
+    jl_pointer_type = (jl_unionall_t*)jl_pointer_typename->wrapper;
+    jl_value_t *pointer_void = jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_nothing_type);
+    jl_voidpointer_type = (jl_datatype_t*)pointer_void;
+
+    tv = jl_svec2(tvar("T"), tvar("N"));
     jl_abstractarray_type = (jl_unionall_t*)
         jl_new_abstracttype((jl_value_t*)jl_symbol("AbstractArray"), core,
                             jl_any_type, tv)->name->wrapper;
@@ -2280,22 +2974,67 @@ void jl_init_types(void) JL_GC_DISABLED
                             (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_abstractarray_type, jl_svec_data(tv), 2),
                             tv)->name->wrapper;
 
+    tv = jl_svec(3, tvar("isatomic"), tvar("T"), tvar("addrspace"));
+    jl_datatype_t *jl_memory_supertype = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_densearray_type, jl_svecref(tv, 1), jl_box_long(1));
+    jl_datatype_t *memory_datatype =
+        jl_new_datatype(jl_symbol("GenericMemory"), core, jl_memory_supertype, tv,
+                        jl_perm_symsvec(2, "length", "ptr"),
+                        jl_svec(2, jl_long_type, pointer_void),
+                        jl_emptysvec, 0, 1, 2);
+    jl_genericmemory_typename = memory_datatype->name;
+    jl_genericmemory_type = (jl_unionall_t*)jl_genericmemory_typename->wrapper;
+    const static uint32_t memory_constfields[1] = { 0x00000003 }; // (1<<1)|(1<<0)
+    memory_datatype->name->constfields = memory_constfields;
+    memory_datatype->ismutationfree = 0;
+
+    jl_datatype_t *jl_memoryref_supertype = (jl_datatype_t*)jl_apply_type1((jl_value_t*)jl_ref_type, jl_svecref(tv, 1));
+    jl_datatype_t *memoryref_datatype =
+        jl_new_datatype(jl_symbol("GenericMemoryRef"), core, jl_memoryref_supertype, tv,
+                        jl_perm_symsvec(2, "ptr_or_offset", "mem"),
+                        jl_svec(2, pointer_void, memory_datatype),
+                        jl_emptysvec, 0, 0, 2);
+    jl_genericmemoryref_typename = memoryref_datatype->name;
+    jl_genericmemoryref_type = (jl_unionall_t*)jl_genericmemoryref_typename->wrapper;
+    memoryref_datatype->ismutationfree = 0;
+
+    jl_memory_any_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_any_type, cpumem);
+    jl_memory_uint8_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint8_type, cpumem);
+    jl_memory_uint16_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint16_type, cpumem);
+    jl_memory_uint32_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint32_type, cpumem);
+    jl_memory_uint64_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint64_type, cpumem);
+    jl_memoryref_any_type = jl_apply_type3((jl_value_t*)jl_genericmemoryref_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_any_type, cpumem);
+    jl_memoryref_uint8_type = jl_apply_type3((jl_value_t*)jl_genericmemoryref_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint8_type, cpumem);
+
     tv = jl_svec2(tvar("T"), tvar("N"));
-    jl_array_type = (jl_unionall_t*)
-        jl_new_datatype(jl_symbol("Array"), core,
+    jl_array_typename = jl_new_datatype(jl_symbol("Array"), core,
                         (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_densearray_type, jl_svec_data(tv), 2),
-                        tv, jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0)->name->wrapper;
-    jl_array_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->name;
-    jl_compute_field_offsets((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type));
+                        tv,
+                        jl_perm_symsvec(2, "ref", "size"),
+                        jl_svec(2,
+                            jl_apply_type3((jl_value_t*)jl_genericmemoryref_type, (jl_value_t*)jl_not_atomic_sym, jl_svecref(tv, 0), cpumem),
+                            jl_apply_type1((jl_value_t*)jl_tuple_type, (jl_value_t*)jl_wrap_vararg((jl_value_t*)jl_long_type, jl_svecref(tv, 1), 0))),
+                            jl_emptysvec, 0, 1, 2)->name;
+    jl_array_type = (jl_unionall_t*)jl_array_typename->wrapper;
 
     jl_array_any_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_any_type, jl_box_long(1));
     jl_array_symbol_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_symbol_type, jl_box_long(1));
     jl_array_uint8_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_uint8_type, jl_box_long(1));
+    jl_array_uint32_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_uint32_type, jl_box_long(1));
     jl_array_int32_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_int32_type, jl_box_long(1));
     jl_array_uint64_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_uint64_type, jl_box_long(1));
     jl_an_empty_vec_any = (jl_value_t*)jl_alloc_vec_any(0); // used internally
-    jl_atomic_store_relaxed(&jl_nonfunction_mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&jl_type_type_mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
+    jl_an_empty_memory_any = (jl_value_t*)jl_alloc_memory_any(0); // used internally
+    jl_atomic_store_relaxed(&jl_nonfunction_mt->leafcache, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_atomic_store_relaxed(&jl_type_type_mt->leafcache, (jl_genericmemory_t*)jl_an_empty_memory_any);
+
+    // finish initializing module Core
+    core = jl_core_module;
+    jl_atomic_store_relaxed(&core->bindingkeyset, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    // export own name, so "using Foo" makes "Foo" itself visible
+    jl_set_const(core, core->name, (jl_value_t*)core);
+    jl_module_public(core, core->name, 1);
+    jl_set_const(core, jl_symbol("CPU"), (jl_value_t*)cpumem);
+    core = NULL;
 
     jl_expr_type =
         jl_new_datatype(jl_symbol("Expr"), core,
@@ -2304,12 +3043,6 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_svec(2, jl_symbol_type, jl_array_any_type),
                         jl_emptysvec, 0, 1, 2);
 
-    jl_module_type =
-        jl_new_datatype(jl_symbol("Module"), core, jl_any_type, jl_emptysvec,
-                        jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
-    jl_module_type->instance = NULL;
-    jl_compute_field_offsets(jl_module_type);
-
     jl_value_t *symornothing[2] = { (jl_value_t*)jl_symbol_type, (jl_value_t*)jl_void_type };
     jl_linenumbernode_type =
         jl_new_datatype(jl_symbol("LineNumberNode"), core, jl_any_type, jl_emptysvec,
@@ -2335,6 +3068,12 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_svec(2, jl_any_type, jl_long_type),
                         jl_emptysvec, 0, 0, 2);
 
+    jl_enternode_type =
+        jl_new_datatype(jl_symbol("EnterNode"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(2, "catch_dest", "scope"),
+                        jl_svec(2, jl_long_type, jl_any_type),
+                        jl_emptysvec, 0, 0, 1);
+
     jl_returnnode_type =
         jl_new_datatype(jl_symbol("ReturnNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "val"),
@@ -2377,16 +3116,10 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_svec(1, jl_slotnumber_type),
                         jl_emptysvec, 0, 0, 1);
 
-    jl_globalref_type =
-        jl_new_datatype(jl_symbol("GlobalRef"), core, jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(2, "mod", "name"),
-                        jl_svec(2, jl_module_type, jl_symbol_type),
-                        jl_emptysvec, 0, 0, 2);
-
     jl_code_info_type =
         jl_new_datatype(jl_symbol("CodeInfo"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(20,
+                        jl_perm_symsvec(22,
                             "code",
                             "codelocs",
                             "ssavaluetypes",
@@ -2402,16 +3135,18 @@ void jl_init_types(void) JL_GC_DISABLED
                             "min_world",
                             "max_world",
                             "inferred",
-                            "inlineable",
                             "propagate_inbounds",
-                            "pure",
+                            "has_fcall",
+                            "nospecializeinfer",
+                            "inlining",
                             "constprop",
-                            "purity"),
-                        jl_svec(20,
+                            "purity",
+                            "inlining_cost"),
+                        jl_svec(22,
                             jl_array_any_type,
                             jl_array_int32_type,
                             jl_any_type,
-                            jl_array_uint8_type,
+                            jl_array_uint32_type,
                             jl_any_type,
                             jl_any_type,
                             jl_array_symbol_type,
@@ -2427,14 +3162,16 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_bool_type,
                             jl_uint8_type,
-                            jl_uint8_type),
+                            jl_uint8_type,
+                            jl_uint16_type,
+                            jl_uint16_type),
                         jl_emptysvec,
-                        0, 1, 20);
+                        0, 1, 22);
 
     jl_method_type =
         jl_new_datatype(jl_symbol("Method"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(29,
+                        jl_perm_symsvec(30,
                             "name",
                             "module",
                             "file",
@@ -2460,11 +3197,12 @@ void jl_init_types(void) JL_GC_DISABLED
                             "nospecialize",
                             "nkw",
                             "isva",
-                            "pure",
                             "is_for_opaque_closure",
+                            "nospecializeinfer",
                             "constprop",
+                            "max_varargs",
                             "purity"),
-                        jl_svec(29,
+                        jl_svec(30,
                             jl_symbol_type,
                             jl_module_type,
                             jl_symbol_type,
@@ -2472,8 +3210,8 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_ulong_type,
                             jl_ulong_type,
                             jl_type_type,
-                            jl_simplevector_type,
-                            jl_array_type,
+                            jl_any_type, // union(jl_simplevector_type, jl_method_instance_type),
+                            jl_genericmemory_type, // union(jl_memory_uint8_type, jl_memory_uint16_type, jl_memory_uint32_type, jl_memory_uint64_type, jl_memory_any_type)
                             jl_string_type,
                             jl_any_type,
                             jl_any_type,
@@ -2493,7 +3231,8 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_bool_type,
                             jl_uint8_type,
-                            jl_uint8_type),
+                            jl_uint8_type,
+                            jl_uint16_type),
                         jl_emptysvec,
                         0, 1, 10);
     //const static uint32_t method_constfields[1] = { 0x03fc065f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<6)|(1<<9)|(1<<10)|(1<<18)|(1<<19)|(1<<20)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25);
@@ -2502,7 +3241,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_method_instance_type =
         jl_new_datatype(jl_symbol("MethodInstance"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(9,
+                        jl_perm_symsvec(10,
                             "def",
                             "specTypes",
                             "sparam_vals",
@@ -2511,40 +3250,47 @@ void jl_init_types(void) JL_GC_DISABLED
                             "callbacks",
                             "cache",
                             "inInference",
+                            "cache_with_orig",
                             "precompiled"),
-                        jl_svec(9,
+                        jl_svec(10,
                             jl_new_struct(jl_uniontype_type, jl_method_type, jl_module_type),
                             jl_any_type,
                             jl_simplevector_type,
                             jl_any_type,
-                            jl_any_type,
+                            jl_array_any_type,
                             jl_any_type,
                             jl_any_type,
                             jl_bool_type,
+                            jl_bool_type,
                             jl_bool_type),
                         jl_emptysvec,
                         0, 1, 3);
+    // These fields should be constant, but Serialization wants to mutate them in initialization
     //const static uint32_t method_instance_constfields[1] = { 0x00000007 }; // (1<<0)|(1<<1)|(1<<2);
+    const static uint32_t method_instance_atomicfields[1] = { 0x00000248 }; // (1<<3)|(1<<6)|(1<<9);
+    //Fields 4 and 5 must be protected by method->write_lock, and thus all operations on jl_method_instance_t are threadsafe. TODO: except inInference
     //jl_method_instance_type->name->constfields = method_instance_constfields;
+    jl_method_instance_type->name->atomicfields = method_instance_atomicfields;
 
     jl_code_instance_type =
         jl_new_datatype(jl_symbol("CodeInstance"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(15,
+                        jl_perm_symsvec(16,
                             "def",
                             "next",
                             "min_world",
                             "max_world",
                             "rettype",
+                            "exctype",
                             "rettype_const",
                             "inferred",
                             //"edges",
                             //"absolute_max",
                             "ipo_purity_bits", "purity_bits",
-                            "argescapes",
-                            "isspecsig", "precompile", "invoke", "specptr", // function object decls
-                            "relocatability"),
-                        jl_svec(15,
+                            "analysis_results",
+                            "isspecsig", "precompile", "relocatability",
+                            "invoke", "specptr"), // function object decls
+                        jl_svec(16,
                             jl_method_instance_type,
                             jl_any_type,
                             jl_ulong_type,
@@ -2552,19 +3298,24 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_any_type,
                             jl_any_type,
                             jl_any_type,
+                            jl_any_type,
                             //jl_any_type,
                             //jl_bool_type,
                             jl_uint32_type, jl_uint32_type,
                             jl_any_type,
                             jl_bool_type,
                             jl_bool_type,
-                            jl_any_type, jl_any_type, // fptrs
-                            jl_uint8_type),
+                            jl_uint8_type,
+                            jl_any_type, jl_any_type), // fptrs
                         jl_emptysvec,
                         0, 1, 1);
     jl_svecset(jl_code_instance_type->types, 1, jl_code_instance_type);
-    const static uint32_t code_instance_constfields[1] = { 0x00000001 }; // (1<<1);
+    const static uint32_t code_instance_constfields[1]  = { 0b0000010101110001 }; // Set fields 1, 5-7, 9, 11 as const
+    const static uint32_t code_instance_atomicfields[1] = { 0b1101001010000010 }; // Set fields 2, 8, 10, 13, 15-16 as atomic
+    //Fields 3-4 are only operated on by construction and deserialization, so are const at runtime
+    //Fields 11 and 15 must be protected by locks, and thus all operations on jl_code_instance_t are threadsafe
     jl_code_instance_type->name->constfields = code_instance_constfields;
+    jl_code_instance_type->name->atomicfields = code_instance_atomicfields;
 
     jl_const_type = jl_new_datatype(jl_symbol("Const"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(1, "val"),
@@ -2573,9 +3324,14 @@ void jl_init_types(void) JL_GC_DISABLED
 
     jl_partial_struct_type = jl_new_datatype(jl_symbol("PartialStruct"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(2, "typ", "fields"),
-                                       jl_svec2(jl_datatype_type, jl_array_any_type),
+                                       jl_svec2(jl_any_type, jl_array_any_type),
                                        jl_emptysvec, 0, 0, 2);
 
+    jl_interconditional_type = jl_new_datatype(jl_symbol("InterConditional"), core, jl_any_type, jl_emptysvec,
+                                          jl_perm_symsvec(3, "slot", "thentype", "elsetype"),
+                                          jl_svec(3, jl_long_type, jl_any_type, jl_any_type),
+                                          jl_emptysvec, 0, 0, 3);
+
     jl_method_match_type = jl_new_datatype(jl_symbol("MethodMatch"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(4, "spec_types", "sparams", "method", "fully_covers"),
                                        jl_svec(4, jl_type_type, jl_simplevector_type, jl_method_type, jl_bool_type),
@@ -2588,20 +3344,10 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_intrinsic_type = jl_new_primitivetype((jl_value_t*)jl_symbol("IntrinsicFunction"), core,
                                              jl_builtin_type, jl_emptysvec, 32);
 
-    tv = jl_svec1(tvar("T"));
-    jl_ref_type = (jl_unionall_t*)
-        jl_new_abstracttype((jl_value_t*)jl_symbol("Ref"), core, jl_any_type, tv)->name->wrapper;
-
-    tv = jl_svec1(tvar("T"));
-    jl_pointer_type = (jl_unionall_t*)
-        jl_new_primitivetype((jl_value_t*)jl_symbol("Ptr"), core,
-                             (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_ref_type, jl_svec_data(tv), 1), tv,
-                             sizeof(void*)*8)->name->wrapper;
-    jl_pointer_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_pointer_type))->name;
-
     // LLVMPtr{T, AS} where {T, AS}
-    tv = jl_svec2(tvar("T"), tvar("AS"));
-    jl_svec_t *tv_base = jl_svec1(tvar("T"));
+    jl_tvar_t *elvar = tvar("T");
+    tv = jl_svec2(elvar, tvar("AS"));
+    jl_svec_t *tv_base = jl_svec1(elvar);
     jl_llvmpointer_type = (jl_unionall_t*)
         jl_new_primitivetype((jl_value_t*)jl_symbol("LLVMPtr"), core,
                              (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_ref_type, jl_svec_data(tv_base), 1), tv,
@@ -2629,23 +3375,24 @@ void jl_init_types(void) JL_GC_DISABLED
                         NULL,
                         jl_any_type,
                         jl_emptysvec,
-                        jl_perm_symsvec(15,
+                        jl_perm_symsvec(16,
                                         "next",
                                         "queue",
                                         "storage",
                                         "donenotify",
                                         "result",
-                                        "logstate",
+                                        "scope",
                                         "code",
                                         "rngState0",
                                         "rngState1",
                                         "rngState2",
                                         "rngState3",
+                                        "rngState4",
                                         "_state",
                                         "sticky",
                                         "_isexception",
                                         "priority"),
-                        jl_svec(15,
+                        jl_svec(16,
                                 jl_any_type,
                                 jl_any_type,
                                 jl_any_type,
@@ -2657,20 +3404,21 @@ void jl_init_types(void) JL_GC_DISABLED
                                 jl_uint64_type,
                                 jl_uint64_type,
                                 jl_uint64_type,
+                                jl_uint64_type,
                                 jl_uint8_type,
                                 jl_bool_type,
                                 jl_bool_type,
                                 jl_uint16_type),
                         jl_emptysvec,
                         0, 1, 6);
+    XX(task);
     jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type);
     jl_svecset(jl_task_type->types, 0, listt);
-    jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header;
-
-    jl_value_t *pointer_void = jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_nothing_type);
 
     tv = jl_svec2(tvar("A"), tvar("R"));
     jl_opaque_closure_type = (jl_unionall_t*)jl_new_datatype(jl_symbol("OpaqueClosure"), core, jl_function_type, tv,
+        // N.B.: OpaqueClosure call code relies on specptr being field 5.
+        // Update that code if you change this.
         jl_perm_symsvec(5, "captures", "world", "source", "invoke", "specptr"),
         jl_svec(5, jl_any_type, jl_long_type, jl_any_type, pointer_void, pointer_void),
         jl_emptysvec, 0, 0, 5)->name->wrapper;
@@ -2679,16 +3427,14 @@ void jl_init_types(void) JL_GC_DISABLED
 
     jl_partial_opaque_type = jl_new_datatype(jl_symbol("PartialOpaque"), core, jl_any_type, jl_emptysvec,
         jl_perm_symsvec(4, "typ", "env", "parent", "source"),
-        jl_svec(4, jl_type_type, jl_any_type, jl_method_instance_type, jl_method_type),
+        jl_svec(4, jl_type_type, jl_any_type, jl_method_instance_type, jl_any_type),
         jl_emptysvec, 0, 0, 4);
 
     // complete builtin type metadata
-    jl_voidpointer_type = (jl_datatype_t*)pointer_void;
     jl_uint8pointer_type = (jl_datatype_t*)jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_uint8_type);
     jl_svecset(jl_datatype_type->types, 5, jl_voidpointer_type);
     jl_svecset(jl_datatype_type->types, 6, jl_int32_type);
-    jl_svecset(jl_datatype_type->types, 7, jl_int32_type);
-    jl_svecset(jl_datatype_type->types, 8, jl_uint8_type);
+    jl_svecset(jl_datatype_type->types, 7, jl_uint16_type);
     jl_svecset(jl_typename_type->types, 1, jl_module_type);
     jl_svecset(jl_typename_type->types, 3, jl_voidpointer_type);
     jl_svecset(jl_typename_type->types, 4, jl_voidpointer_type);
@@ -2699,35 +3445,150 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_svecset(jl_typename_type->types, 13, jl_uint8_type);
     jl_svecset(jl_typename_type->types, 14, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 4, jl_long_type);
-    jl_svecset(jl_methtable_type->types, 6, jl_module_type);
-    jl_svecset(jl_methtable_type->types, 7, jl_array_any_type);
-    jl_svecset(jl_methtable_type->types, 8, jl_long_type); // voidpointer
-    jl_svecset(jl_methtable_type->types, 9, jl_long_type); // uint32_t plus alignment
+    jl_svecset(jl_methtable_type->types, 5, jl_module_type);
+    jl_svecset(jl_methtable_type->types, 6, jl_array_any_type);
+    jl_svecset(jl_methtable_type->types, 7, jl_long_type); // voidpointer
+    jl_svecset(jl_methtable_type->types, 8, jl_long_type); // uint32_t plus alignment
+    jl_svecset(jl_methtable_type->types, 9, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 10, jl_uint8_type);
-    jl_svecset(jl_methtable_type->types, 11, jl_uint8_type);
     jl_svecset(jl_method_type->types, 12, jl_method_instance_type);
     jl_svecset(jl_method_instance_type->types, 6, jl_code_instance_type);
-    jl_svecset(jl_code_instance_type->types, 12, jl_voidpointer_type);
-    jl_svecset(jl_code_instance_type->types, 13, jl_voidpointer_type);
+    jl_svecset(jl_code_instance_type->types, 14, jl_voidpointer_type);
+    jl_svecset(jl_code_instance_type->types, 15, jl_voidpointer_type);
+    jl_svecset(jl_binding_type->types, 1, jl_globalref_type);
+    jl_svecset(jl_binding_type->types, 2, jl_binding_type);
 
     jl_compute_field_offsets(jl_datatype_type);
     jl_compute_field_offsets(jl_typename_type);
     jl_compute_field_offsets(jl_uniontype_type);
     jl_compute_field_offsets(jl_tvar_type);
     jl_compute_field_offsets(jl_methtable_type);
-    jl_compute_field_offsets(jl_module_type);
     jl_compute_field_offsets(jl_method_instance_type);
     jl_compute_field_offsets(jl_code_instance_type);
     jl_compute_field_offsets(jl_unionall_type);
     jl_compute_field_offsets(jl_simplevector_type);
     jl_compute_field_offsets(jl_symbol_type);
 
+    // override ismutationfree for builtin types that are mutable for identity
+    jl_string_type->ismutationfree = jl_string_type->isidentityfree = 1;
+    jl_symbol_type->ismutationfree = jl_symbol_type->isidentityfree = 1;
+    jl_simplevector_type->ismutationfree = jl_simplevector_type->isidentityfree = 1;
+    jl_datatype_type->ismutationfree = 1;
+    assert(((jl_datatype_t*)jl_array_any_type)->ismutationfree == 0);
+    assert(((jl_datatype_t*)jl_array_uint8_type)->ismutationfree == 0);
+
+    // Technically not ismutationfree, but there's a separate system to deal
+    // with mutations for global state.
+    jl_module_type->ismutationfree = 1;
+    // Module object identity is determined by its name and parent name.
+    jl_module_type->isidentityfree = 1;
+
     // override the preferred layout for a couple types
     jl_lineinfonode_type->name->mayinlinealloc = 0; // FIXME: assumed to be a pointer by codegen
-    // It seems like we probably usually end up needing the box for kinds (used in an Any context)--but is that true?
-    jl_uniontype_type->name->mayinlinealloc = 0;
-    jl_unionall_type->name->mayinlinealloc = 0;
+
+    export_jl_small_typeof();
+}
+
+static jl_value_t *core(const char *name)
+{
+    return jl_get_global(jl_core_module, jl_symbol(name));
+}
+
+// fetch references to things defined in boot.jl
+void post_boot_hooks(void)
+{
+    jl_char_type    = (jl_datatype_t*)core("Char");
+    XX(char);
+    jl_int8_type    = (jl_datatype_t*)core("Int8");
+    XX(int8);
+    jl_int16_type   = (jl_datatype_t*)core("Int16");
+    XX(int16);
+    jl_float16_type = (jl_datatype_t*)core("Float16");
+    //XX(float16);
+    jl_float32_type = (jl_datatype_t*)core("Float32");
+    //XX(float32);
+    jl_float64_type = (jl_datatype_t*)core("Float64");
+    //XX(float64);
+    jl_bfloat16_type = (jl_datatype_t*)core("BFloat16");
+    //XX(bfloat16);
+    jl_floatingpoint_type = (jl_datatype_t*)core("AbstractFloat");
+    jl_number_type  = (jl_datatype_t*)core("Number");
+    jl_signed_type  = (jl_datatype_t*)core("Signed");
+    jl_datatype_t *jl_unsigned_type = (jl_datatype_t*)core("Unsigned");
+    jl_datatype_t *jl_integer_type = (jl_datatype_t*)core("Integer");
+
+    jl_bool_type->super = jl_integer_type;
+    jl_uint8_type->super = jl_unsigned_type;
+    jl_uint16_type->super = jl_unsigned_type;
+    jl_uint32_type->super = jl_unsigned_type;
+    jl_uint64_type->super = jl_unsigned_type;
+    jl_int32_type->super = jl_signed_type;
+    jl_int64_type->super = jl_signed_type;
+
+    jl_errorexception_type = (jl_datatype_t*)core("ErrorException");
+    jl_stackovf_exception  = jl_new_struct_uninit((jl_datatype_t*)core("StackOverflowError"));
+    jl_diverror_exception  = jl_new_struct_uninit((jl_datatype_t*)core("DivideError"));
+    jl_undefref_exception  = jl_new_struct_uninit((jl_datatype_t*)core("UndefRefError"));
+    jl_undefvarerror_type  = (jl_datatype_t*)core("UndefVarError");
+    jl_atomicerror_type    = (jl_datatype_t*)core("ConcurrencyViolationError");
+    jl_interrupt_exception = jl_new_struct_uninit((jl_datatype_t*)core("InterruptException"));
+    jl_boundserror_type    = (jl_datatype_t*)core("BoundsError");
+    jl_memory_exception    = jl_new_struct_uninit((jl_datatype_t*)core("OutOfMemoryError"));
+    jl_readonlymemory_exception = jl_new_struct_uninit((jl_datatype_t*)core("ReadOnlyMemoryError"));
+    jl_typeerror_type      = (jl_datatype_t*)core("TypeError");
+    jl_argumenterror_type  = (jl_datatype_t*)core("ArgumentError");
+    jl_methoderror_type    = (jl_datatype_t*)core("MethodError");
+    jl_loaderror_type      = (jl_datatype_t*)core("LoadError");
+    jl_initerror_type      = (jl_datatype_t*)core("InitError");
+    jl_missingcodeerror_type = (jl_datatype_t*)core("MissingCodeError");
+    jl_precompilable_error = jl_new_struct_uninit((jl_datatype_t*)core("PrecompilableError"));
+    jl_pair_type           = core("Pair");
+    jl_kwcall_func         = core("kwcall");
+    jl_kwcall_mt           = ((jl_datatype_t*)jl_typeof(jl_kwcall_func))->name->mt;
+    jl_atomic_store_relaxed(&jl_kwcall_mt->max_args, 0);
+
+    jl_weakref_type = (jl_datatype_t*)core("WeakRef");
+    jl_vecelement_typename = ((jl_datatype_t*)jl_unwrap_unionall(core("VecElement")))->name;
+
+    jl_init_box_caches();
+
+    // set module field of primitive types
+    jl_svec_t *bindings = jl_atomic_load_relaxed(&jl_core_module->bindings);
+    jl_value_t **table = jl_svec_data(bindings);
+    for (size_t i = 0; i < jl_svec_len(bindings); i++) {
+        if (table[i] != jl_nothing) {
+            jl_binding_t *b = (jl_binding_t*)table[i];
+            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+            if (v) {
+                if (jl_is_unionall(v))
+                    v = jl_unwrap_unionall(v);
+                if (jl_is_datatype(v)) {
+                    jl_datatype_t *tt = (jl_datatype_t*)v;
+                    tt->name->module = jl_core_module;
+                    if (tt->name->mt)
+                        tt->name->mt->module = jl_core_module;
+                }
+            }
+        }
+    }
+
+    export_jl_small_typeof();
+}
+
+void post_image_load_hooks(void) {
+    // Ensure that `Base` has been loaded.
+    assert(jl_base_module != NULL);
+
+    jl_libdl_module = (jl_module_t *)jl_get_global(
+        ((jl_module_t *)jl_get_global(jl_base_module, jl_symbol("Libc"))),
+        jl_symbol("Libdl")
+    );
+    jl_libdl_dlopen_func = jl_get_global(
+        jl_libdl_module,
+        jl_symbol("dlopen")
+    );
 }
+#undef XX
 
 #ifdef __cplusplus
 }
diff --git a/src/julia-parser.scm b/src/julia-parser.scm
index 38969faf5caf4..fb20717add65d 100644
--- a/src/julia-parser.scm
+++ b/src/julia-parser.scm
@@ -10,7 +10,7 @@
 ;; comma - higher than assignment outside parentheses, lower when inside
 (define prec-pair (add-dots '(=>)))
 (define prec-conditional '(?))
-(define prec-arrow       (add-dots '(← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ￩ ￫ ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ --> <-- <-->)))
+(define prec-arrow       (add-dots '(← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⥷ ⭄ ⥺ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ￩ ￫ ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ --> <-- <-->)))
 (define prec-lazy-or     (add-dots '(|\|\||)))
 (define prec-lazy-and    (add-dots '(&&)))
 (define prec-comparison
@@ -20,7 +20,7 @@
 (define prec-pipe>       '(|.\|>| |\|>|))
 (define prec-colon       (append! '(: |..|) (add-dots '(… ⁝ ⋮ ⋱ ⋰ ⋯))))
 (define prec-plus        (append! '($)
-                          (add-dots '(+ - − ¦ |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣))))
+                          (add-dots '(+ - − ¦ |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⟇ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣))))
 (define prec-times       (add-dots '(* / ⌿ ÷ % & · · ⋅ ∘ × |\\| ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗ ⨟)))
 (define prec-rational    (add-dots '(//)))
 (define prec-bitshift    (add-dots '(<< >> >>>)))
@@ -49,7 +49,7 @@
         ((not (length> l 8))
          (eval `(lambda (x)
                   (not (not (,(if (every symbol? l) 'memq 'memv) x (quote ,l)))))))
-        ((and (every symbol? l) (not (length> l 20)))
+        ((and (not (length> l 20)) (every symbol? l))
          (eval `(lambda (x)
                   (not (not (memq x (quote ,l)))))))
         (else
@@ -716,7 +716,7 @@
 
 ;; ";" at the top level produces a sequence of top level expressions
 (define (parse-stmts s)
-  (let ((ex (parse-Nary s (lambda (s) (parse-docstring s parse-eq))
+  (let ((ex (parse-Nary s (lambda (s) (parse-public s parse-eq))
                         '(#\;) 'toplevel (lambda (x) (eqv? x #\newline)) #f)))
     ;; check for unparsed junk after an expression
     (let ((t (peek-token s)))
@@ -1352,7 +1352,7 @@
 
 (define (rewrap-where x w)
   (if (and (pair? w) (eq? (car w) 'where))
-      (list 'where (rewrap-where x (cadr w)) (caddr w))
+      (list* 'where (rewrap-where x (cadr w)) (cddr w))
       x))
 
 (define (parse-struct-field s)
@@ -1608,18 +1608,18 @@
        ((module baremodule)
         (let* ((name (parse-unary-prefix s))
                (loc  (line-number-node s))
-               (body (parse-block s (lambda (s) (parse-docstring s parse-eq)))))
+               (body (parse-block s (lambda (s) (parse-public s parse-eq)))))
           (if (reserved-word? name)
               (error (string "invalid module name \"" name "\"")))
           (expect-end s word)
           (list 'module (if (eq? word 'module) '(true) '(false)) name
                 `(block ,loc ,@(cdr body)))))
-       ((export)
+       ((export public)
         (let ((es (map macrocall-to-atsym
                        (parse-comma-separated s parse-unary-prefix))))
           (if (not (every symbol-or-interpolate? es))
-              (error "invalid \"export\" statement"))
-          `(export ,@es)))
+              (error (string "invalid \"" word "\" statement")))
+          `(,word ,@es)))
        ((import using)
         (parse-imports s word))
        ((do)
@@ -2664,6 +2664,17 @@
            ;; string interpolation
            (eq? (car e) 'string))))
 
+(define (parse-public s production)
+  (if (eq? (peek-token s) 'public)
+      (let ((spc (ts:space? s)))
+        (take-token s)
+        (if (memv (peek-token s) '(#\( = #\[))
+            (begin ;; TODO: deprecation warning here
+                   (ts:put-back! s 'public spc)
+                   (parse-docstring s production))
+            (parse-resword s 'public)))
+      (parse-docstring s production)))
+
 (define (parse-docstring s production)
   (let ((startloc (line-number-node s)) ; be sure to use the line number from the head of the docstring
         (ex       (production s)))
diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index 4024d25c2e9ec..e7899688453c7 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -175,7 +175,7 @@
               ,(if (equal? rett '(core Any))
                    body
                    (let ((meta (take-while (lambda (x) (and (pair? x)
-                                                            (memq (car x) '(line meta))))
+                                                            (memq (car x) '(lineinfo line meta))))
                                            (cdr body)))
                          (R (make-ssavalue)))
                      `(,(car body) ,@meta
@@ -187,7 +187,7 @@
 ;; a bound is #f if not specified
 (define (analyze-typevar e)
   (define (check-sym s)
-    (if (symbol? s)
+    (if (symbol? (unescape s)) ; unescape for macroexpand.scm use
         s
         (error (string "invalid type parameter name \"" (deparse s) "\""))))
   (cond ((atom? e) (list (check-sym e) #f #f))
@@ -296,7 +296,8 @@
                                   (if (eq? n '|#self#|) (gensy) n))
                                 arg-names))))
     (let ((body (insert-after-meta body  ;; don't specialize on generator arguments
-                                   `((meta nospecialize ,@arg-names)))))
+                                   ;; arg-names slots start at 2 (after name)
+                                   `((meta nospecialize ,@(map (lambda (idx) `(slot ,(+ idx 2))) (iota (length arg-names))))))))
       `(block
         (global ,name)
         (function (call ,name ,@arg-names) ,body)))))
@@ -366,9 +367,9 @@
      (if (has-dups unused_anames)
          (error (string "function argument name not unique: \"" (car (has-dups unused_anames)) "\"")))
      (if (has-dups names)
-         (error "function static parameter names not unique"))
+         (error (string "function static parameter name not unique: \"" (car (has-dups names)) "\"")))
      (if (any (lambda (x) (and (not (eq? x UNUSED)) (memq x names))) anames)
-         (error "function argument and static parameter names must be distinct"))
+         (error (string "function argument and static parameter name not distinct: \"" (car (intersect names unused_anames)) "\"")))
      (if (or (and name (not (sym-ref-or-overlay? name))) (not (valid-name? name)))
          (error (string "invalid function name \"" (deparse name) "\"")))
      (let* ((loc (maybe-remove-functionloc! body))
@@ -382,13 +383,8 @@
                                          `((meta generated
                                                  (new (core GeneratedFunctionStub)
                                                       ,gname
-                                                      ,(cons 'list anames)
-                                                      ,(if (null? sparams)
-                                                           'nothing
-                                                           (cons 'list (map car sparams)))
-                                                      ,(cadr loc)
-                                                      (inert ,(caddr loc))
-                                                      (false))))))
+                                                      (call (core svec) ,@(map quotify anames))
+                                                      (call (core svec) ,@(map quotify names)))))))
                              (list gf))
                            '()))
             (types (llist-types argl))
@@ -512,7 +508,8 @@
                               positional-sparams)))
                   sparams))
          (kw      (gensy))
-         (rkw     (if (null? restkw) (make-ssavalue) (symbol (string (car restkw) "..."))))
+         (kwdecl  `(|::| ,kw (core NamedTuple)))
+         (rkw     (if (null? restkw) '() (symbol (string (car restkw) "..."))))
          (restkw  (map (lambda (v) `(|::| ,v (call (top pairs) (core NamedTuple)))) restkw))
          (mangled (let ((und (and name (undot-name name))))
                     (symbol (string (if (and name (= (string.char (string name) 0) #\#))
@@ -559,9 +556,12 @@
           name positional-sparams
           `((|::|
              ;; if there are optional positional args, we need to be able to reference the function name
-             ,(if (any kwarg? pargl) (gensy) UNUSED)
-             (call (core kwftype) ,ftype)) ,kw ,@pargl ,@vararg)
+             ,(if (any kwarg? `(,@pargl ,@vararg)) (gensy) UNUSED)
+             (call (core kwftype) ,ftype)) ,kwdecl ,@pargl ,@vararg)
           `(block
+            ;; propagate method metadata to keyword sorter
+            ,@(map propagate-method-meta (filter meta? prologue))
+            ,@(filter argwide-nospecialize-meta? prologue)
             ,@(let ((lnns (filter linenum? prologue)))
                 (if (pair? lnns)
                     (list (car lnns))
@@ -608,16 +608,18 @@
                                ,tempslot)))
                    vars vals)
               `(block
-                (= ,rkw (call (top pairs)
-                              ,(if (null? keynames)
-                                   kw
-                                   `(call (top structdiff) ,kw (curly (core NamedTuple)
-                                                                      (tuple ,@(map quotify keynames)))))))
-                ,@(if (null? restkw)
-                      `((if (call (top isempty) ,rkw)
+                ,(if (null? restkw)
+                      `(if (call (top isempty)
+                                 (call (top diff_names)
+                                       (call (top keys) ,kw)
+                                       (tuple ,@(map quotify keynames))))
                             (null)
-                            (call (top kwerr) ,kw ,@(map arg-name pargl) ,@splatted-vararg)))
-                      '())
+                            (call (top kwerr) ,kw ,@(map arg-name pargl) ,@splatted-vararg))
+                      `(= ,rkw (call (top pairs)
+                                     ,(if (null? keynames)
+                                          kw
+                                          `(call (top structdiff) ,kw (curly (core NamedTuple)
+                                                                             (tuple ,@(map quotify keynames))))))))
                 (return (call ,mangled  ;; finally, call the core function
                               ,@keyvars
                               ,@(if (null? restkw) '() (list rkw))
@@ -663,7 +665,10 @@
                       (vals   (list-tail dfl n))
                       (absent (list-tail opt n)) ;; absent arguments
                       (body
-                       (if (any (lambda (defaultv)
+                       (if (any vararg? (butlast vals))
+                         ;; Forbid splat in all but the final default value
+                         (error "invalid \"...\" in non-final positional argument default value")
+                         (if (any (lambda (defaultv)
                                   ;; does any default val expression...
                                   (contains (lambda (e)
                                               ;; contain "e" such that...
@@ -682,7 +687,7 @@
                            ;; otherwise add all
                            `(block
                              ,@prologue
-                             (call ,(arg-name (car req)) ,@(map arg-name (cdr passed)) ,@vals)))))
+                             (call ,(arg-name (car req)) ,@(map arg-name (cdr passed)) ,@vals))))))
                  (method-def-expr- name sp passed body)))
              (iota (length opt)))
       ,(method-def-expr- name sparams overall-argl body rett))))
@@ -753,8 +758,18 @@
 
 (define (default-inner-ctors name field-names field-types params bounds locs)
   (let* ((field-names (safe-field-names field-names field-types))
-         (any-ctor
+         (all-ctor (if (null? params)
+          ;; definition with exact types for all arguments
+          `(function (call ,name
+                          ,@(map make-decl field-names field-types))
+                    (block
+                     ,@locs
+                     (new (outerref ,name) ,@field-names)))
+          #f))
+         (any-ctor (if (or (not all-ctor) (any (lambda (t) (not (equal? t '(core Any))))
+                                 field-types))
           ;; definition with Any for all arguments
+          ;; only if any field type is not Any, checked at runtime
           `(function (call (|::| |#ctor-self#|
                             ,(with-wheres
                               `(curly (core Type) ,(if (pair? params)
@@ -764,23 +779,18 @@
                            ,@field-names)
                      (block
                       ,@locs
-                      (call new ,@field-names)))))
-    (if (and (null? params) (any (lambda (t) (not (equal? t '(core Any))))
-                                 field-types))
-        (list
-         ;; definition with field types for all arguments
-         ;; only if any field type is not Any, checked at runtime
-         `(if ,(foldl (lambda (t u)
-                        `(&& ,u (call (core ===) (core Any) ,t)))
-                      `(call (core ===) (core Any) ,(car field-types))
-                      (cdr field-types))
-            (block)
-            (function (call ,name
-                            ,@(map make-decl field-names field-types))
-                      (block
-                       ,@locs
-                       (new (outerref ,name) ,@field-names))))
-         any-ctor)
+                      (call new ,@field-names))) ; this will add convert calls later
+          #f)))
+    (if all-ctor
+        (if any-ctor
+            (list all-ctor
+                  `(if ,(foldl (lambda (t u)
+                           `(&& ,u (call (core ===) (core Any) ,t)))
+                         `(call (core ===) (core Any) ,(car field-types))
+                         (cdr field-types))
+                       '(block)
+                       ,any-ctor))
+            (list all-ctor))
         (list any-ctor))))
 
 (define (default-outer-ctor name field-names field-types params bounds locs)
@@ -790,7 +800,7 @@
                  (map (lambda (b) (cons 'var-bounds b)) bounds))
                (block
                 ,@locs
-                (call (curly ,name ,@params) ,@field-names)))))
+                (new (curly ,name ,@params) ,@field-names)))))
 
 (define (num-non-varargs args)
   (count (lambda (a) (not (vararg? a))) args))
@@ -815,12 +825,14 @@
          (field-convert (lambda (fld fty val)
                           (if (equal? fty '(core Any))
                               val
-                              `(call (top convert)
-                                     ,(if (and (not selftype?) (equal? type-params params) (memq fty params) (memq fty sparams))
-                                          fty ; the field type is a simple parameter, the usage here is of a
-                                              ; local variable (currently just handles sparam) for the bijection of params to type-params
-                                          `(call (core fieldtype) ,tn ,(+ fld 1)))
-                                     ,val)))))
+                              (convert-for-type-decl val
+                                                     ; for ty, usually use the fieldtype, not the fty expression
+                                                     (if (and (not selftype?) (equal? type-params params) (memq fty params) (memq fty sparams))
+                                                      fty ; the field type is a simple parameter, the usage here is of a
+                                                          ; local variable (currently just handles sparam) for the bijection of params to type-params
+                                                      `(call (core fieldtype) ,tn ,(+ fld 1)))
+                                                      #f
+                                                      #f)))))
     (cond ((> (num-non-varargs args) (length field-names))
            `(call (core throw) (call (top ArgumentError)
                                      ,(string "new: too many arguments (expected " (length field-names) ")"))))
@@ -1091,7 +1103,7 @@
 
 ;; insert calls to convert() in ccall, and pull out expressions that might
 ;; need to be rooted before conversion.
-(define (lower-ccall name RT atypes args cconv)
+(define (lower-ccall name RT atypes args cconv nreq)
   (let loop ((F atypes)  ;; formals
              (A args)    ;; actuals
              (stmts '()) ;; initializers
@@ -1108,13 +1120,15 @@
       (if (null? A)
           `(block
             ,.(reverse! stmts)
-            (foreigncall ,name ,RT (call (core svec) ,@(reverse! T))
-                         ,(if isseq (- (length atypes) 1) 0) ; 0 or number of arguments before ... in definition
+            (foreigncall ,(expand-forms name) ,(expand-forms RT) (call (core svec) ,@(reverse! T))
+                         ;; 0 or number of arguments before ... in definition
+                         ,(or nreq
+                              (if isseq (- (length atypes) 1) 0))
                          ',cconv
                          ,.(reverse! C)
                          ,@GC)) ; GC root ordering is arbitrary
-          (let* ((a     (car A))
-                 (ty    (if isseq (cadar F) (car F))))
+          (let* ((a     (expand-forms (car A)))
+                 (ty    (expand-forms (if isseq (cadar F) (car F)))))
             (if (and isseq (not (null? (cdr F)))) (error "only the trailing ccall argument type should have \"...\""))
             (if (eq? ty 'Any)
                 (loop (if isseq F (cdr F)) (cdr A) stmts (list* '(core Any) T) (list* a C) GC)
@@ -1412,7 +1426,7 @@
                 (scope-block ,finalb)))))
           ((length> e 3)
            (and (length> e 6) (error "invalid \"try\" form"))
-           (let ((elseb (if (length= e 6) (cdddddr e) '())))
+           (let ((elseb (if (length= e 6) `((scope-block ,@(cdddddr e))) '())))
              (expand-forms
                `(,(if (null? elseb) 'trycatch 'trycatchelse)
                  (scope-block ,tryb)
@@ -1659,7 +1673,7 @@
   (define (kwcall-unless-empty f pa kw-container-test kw-container)
     `(if (call (top isempty) ,kw-container-test)
          (call ,f ,@pa)
-         (call (call (core kwfunc) ,f) ,kw-container ,f ,@pa)))
+         (call (core kwcall) ,kw-container ,f ,@pa)))
 
   (let ((f            (if (sym-ref? fexpr) fexpr (make-ssavalue)))
         (kw-container (make-ssavalue)))
@@ -1673,7 +1687,7 @@
                                            #t))
       ,(if (every vararg? kw)
            (kwcall-unless-empty f pa kw-container kw-container)
-           `(call (call (core kwfunc) ,f) ,kw-container ,f ,@pa)))))
+           `(call (core kwcall) ,kw-container ,f ,@pa)))))
 
 ;; convert `a+=b` to `a=a+b`
 (define (expand-update-operator- op op= lhs rhs declT)
@@ -2261,7 +2275,7 @@
 ;; `n`:    total nr of lhs args
 ;; `end`:  car collects statements to be executed afterwards.
 ;;         In general, actual assignments should only happen after
-;;         the whole iterater is desctructured (https://github.com/JuliaLang/julia/issues/40574)
+;;         the whole iterator is desctructured (https://github.com/JuliaLang/julia/issues/40574)
 (define (destructure- i lhss xx n st end)
   (if (null? lhss)
       '()
@@ -2352,7 +2366,7 @@
       `(= ,lhs ,rhs)))
 
 (define (expand-forms e)
-  (if (or (atom? e) (memq (car e) '(quote inert top core globalref outerref module toplevel ssavalue null true false meta using import export thismodule toplevel-only)))
+  (if (or (atom? e) (memq (car e) '(quote inert top core globalref outerref module toplevel ssavalue null true false meta using import export public thismodule toplevel-only)))
       e
       (let ((ex (get expand-table (car e) #f)))
         (if ex
@@ -2610,7 +2624,9 @@
                  ((eq? f 'ccall)
                   (if (not (length> e 4)) (error "too few arguments to ccall"))
                   (let* ((cconv (cadddr e))
-                         (have-cconv (memq cconv '(cdecl stdcall fastcall thiscall llvmcall)))
+                         (have-cconv-expr (and (pair? cconv) (eq? (car cconv) 'cconv)))
+                         (have-cconv (or have-cconv-expr
+                                         (memq cconv '(cdecl stdcall fastcall thiscall llvmcall))))
                          (after-cconv (if have-cconv (cddddr e) (cdddr e)))
                          (name (caddr e))
                          (RT   (car after-cconv))
@@ -2623,9 +2639,13 @@
                                        (eq? (car RT) 'tuple))
                                   (error "ccall argument types must be a tuple; try \"(T,)\" and check if you specified a correct return type")
                                   (error "ccall argument types must be a tuple; try \"(T,)\"")))
-                          (expand-forms
-                           (lower-ccall name RT (cdr argtypes) args
-                                        (if have-cconv cconv 'ccall))))))
+                          (lower-ccall name RT (cdr argtypes) args
+                                       (if have-cconv
+                                           (if have-cconv-expr
+                                               (cadr cconv)
+                                               cconv)
+                                           'ccall)
+                                       (and have-cconv-expr (caddr cconv))))))
                  ((any kwarg? (cddr e))       ;; f(..., a=b, ...)
                   (expand-forms (lower-kw-call f (cddr e))))
                  ((has-parameters? (cddr e))  ;; f(...; ...)
@@ -2907,18 +2927,17 @@
          ,(construct-loops (reverse itrs) (reverse iv))
          ,result)))))
 
-(define (lhs-vars e)
-  (cond ((symdecl? e)   (list (decl-var e)))
-        ((and (pair? e) (eq? (car e) 'tuple))
-         (apply append (map lhs-vars (cdr e))))
-        (else '())))
-
 (define (lhs-decls e)
   (cond ((symdecl? e)   (list e))
-        ((and (pair? e) (eq? (car e) 'tuple))
+        ((and (pair? e)
+              (or (eq? (car e) 'tuple)
+                  (eq? (car e) 'parameters)))
          (apply append (map lhs-decls (cdr e))))
         (else '())))
 
+(define (lhs-vars e)
+  (map decl-var (lhs-decls e)))
+
 (define (all-decl-vars e)  ;; map decl-var over every level of an assignment LHS
   (cond ((eventually-call? e) e)
         ((decl? e)   (decl-var e))
@@ -3251,8 +3270,9 @@
         ((and (pair? e) (eq? (car e) 'with-static-parameters)) (free-vars- (cadr e) tab))
         ((or (atom? e) (quoted? e)) tab)
         ((eq? (car e) 'lambda)
-         (let ((bound (lambda-all-vars e)))
-           (for-each (lambda (v) (if (not (memq v bound)) (put! tab v #t)))
+         (let ((bound (table)))
+           (for-each (lambda (b) (put! bound b #t)) (lambda-all-vars e))
+           (for-each (lambda (v) (if (not (has? bound v)) (put! tab v #t)))
                      (free-vars (lam:body e))))
          tab)
         (else
@@ -3345,9 +3365,9 @@
          (let ((vi (get tab (cadr e) #f)))
            (if vi
                (vinfo:set-called! vi #t))
-           ;; calls to functions with keyword args go through `kwfunc` first
-           (if (and (length= e 3) (equal? (cadr e) '(core kwfunc)))
-               (let ((vi2 (get tab (caddr e) #f)))
+           ;; calls to functions with keyword args have head of `kwcall` first
+           (if (and (length> e 3) (equal? (cadr e) '(core kwcall)))
+               (let ((vi2 (get tab (cadddr e) #f)))
                  (if vi2
                      (vinfo:set-called! vi2 #t))))
            (for-each (lambda (x) (analyze-vars x env captvars sp tab))
@@ -3463,20 +3483,20 @@ f(x) = yt(x)
 
 
 (define (vinfo:not-capt vi)
-  (list (car vi) (cadr vi) (logand (caddr vi) (lognot 5))))
+  (list (car vi) (cadr vi) (logand (caddr vi) (lognot 1))))
 
 (define (clear-capture-bits vinfos)
   (map vinfo:not-capt vinfos))
 
 (define (convert-lambda lam fname interp capt-sp opaq)
   (let ((body (add-box-inits-to-body
-               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq))))
+               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq (table) (vinfo-to-table (car (lam:vinfo lam)))))))
     `(lambda ,(lam:args lam)
        (,(clear-capture-bits (car (lam:vinfo lam)))
         ()
         ,(caddr (lam:vinfo lam))
         ,(delete-duplicates (append (lam:sp lam) capt-sp)))
-      ,body)))
+       ,body)))
 
 ;; renumber ssavalues assigned in an expr, allowing it to be repeated
 (define (renumber-assigned-ssavalues e)
@@ -3496,26 +3516,34 @@ f(x) = yt(x)
                     (cons (car x)
                           (map do-replace (cdr x))))))))))
 
-(define (convert-for-type-decl rhs t)
+(define (convert-for-type-decl rhs t assert lam)
   (if (equal? t '(core Any))
       rhs
-      (let* ((temp (if (or (atom? t) (ssavalue? t) (quoted? t))
+      (let* ((new-mutable-var
+               (lambda () (let ((g (gensy)))
+                               (if lam (set-car! (lam:vinfo lam) (append (car (lam:vinfo lam)) `((,g Any 10)))))
+                               g)))
+             (left (if (or (atom? t) (ssavalue? t) (quoted? t))
                        #f
                        (make-ssavalue)))
-             (ty   (or temp t))
-             (ex   `(call (core typeassert)
-                          (call (top convert) ,ty ,rhs)
-                          ,ty)))
-        (if temp
-            `(block (= ,temp ,(renumber-assigned-ssavalues t)) ,ex)
-            ex))))
+             (temp (new-mutable-var)) ; use a slot to permit union-splitting this in inference
+             (ty   (or left t))
+             (ex   `(call (top convert) ,ty ,temp))
+             (ex   (if assert `(call (core typeassert) ,ex ,ty) ex))
+             (ex   `(= ,temp ,ex))
+             (ex   `(if (call (core isa) ,temp ,ty) (null) ,ex))
+             (t    (if left (renumber-assigned-ssavalues t) t))
+             (ex   `((= ,temp ,rhs) ,ex ,temp))
+             (ex   (if left (cons `(= ,left ,t) ex) ex))
+             (ex   (if lam ex (cons `(local-def ,temp) ex))))
+        (cons 'block ex))))
 
 (define (capt-var-access var fname opaq)
   (if opaq
       `(call (core getfield) ,fname ,(get opaq var))
       `(call (core getfield) ,fname (inert ,var))))
 
-(define (convert-global-assignment var rhs0 globals)
+(define (convert-global-assignment var rhs0 globals lam)
   (let* ((rhs1 (if (or (simple-atom? rhs0)
                        (equal? rhs0 '(the_exception)))
                    rhs0
@@ -3523,7 +3551,7 @@ f(x) = yt(x)
          (ref   (binding-to-globalref var))
          (ty   `(call (core get_binding_type) ,(cadr ref) (inert ,(caddr ref))))
          (rhs  (if (get globals ref #t) ;; no type declaration for constants
-                   (convert-for-type-decl rhs1 ty)
+                   (convert-for-type-decl rhs1 ty #f lam)
                    rhs1))
          (ex   `(= ,var ,rhs)))
     (if (eq? rhs1 rhs0)
@@ -3537,10 +3565,10 @@ f(x) = yt(x)
 ;; declared types.
 ;; when doing this, the original value needs to be preserved, to
 ;; ensure the expression `a=b` always returns exactly `b`.
-(define (convert-assignment var rhs0 fname lam interp opaq globals)
+(define (convert-assignment var rhs0 fname lam interp opaq globals locals)
   (cond
     ((symbol? var)
-     (let* ((vi (assq var (car  (lam:vinfo lam))))
+     (let* ((vi (get locals var #f))
             (cv (assq var (cadr (lam:vinfo lam))))
             (vt  (or (and vi (vinfo:type vi))
                      (and cv (vinfo:type cv))
@@ -3550,14 +3578,12 @@ f(x) = yt(x)
        (if (and (not closed) (not capt) (equal? vt '(core Any)))
            (if (or (local-in? var lam) (underscore-symbol? var))
                `(= ,var ,rhs0)
-               (convert-global-assignment var rhs0 globals))
+               (convert-global-assignment var rhs0 globals lam))
            (let* ((rhs1 (if (or (simple-atom? rhs0)
                                 (equal? rhs0 '(the_exception)))
                             rhs0
                             (make-ssavalue)))
-                  (rhs  (if (equal? vt '(core Any))
-                            rhs1
-                            (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq))))
+                  (rhs  (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq (table) locals) #t lam))
                   (ex (cond (closed `(call (core setfield!)
                                            ,(if interp
                                                 `($ ,var)
@@ -3572,7 +3598,7 @@ f(x) = yt(x)
                          ,ex
                          ,rhs1))))))
      ((or (outerref? var) (globalref? var))
-      (convert-global-assignment var rhs0 globals))
+      (convert-global-assignment var rhs0 globals lam))
      ((ssavalue? var)
       `(= ,var ,rhs0))
      (else
@@ -3632,12 +3658,22 @@ f(x) = yt(x)
               (loop (cdr xs))
               elt)))))
 
+; try to ignore some metadata expressions for implicit return sometimes
+(define (only-meta? blk)
+  (let loop ((xs blk))
+    (if (null? xs)
+        #t
+        (let ((elt (car xs)))
+          (if (and (pair? elt) (memq (car elt) '(lineinfo line loopinfo)))
+              (loop (cdr xs))
+              #f)))))
+
 ;; return `body` with `stmts` inserted after any meta nodes
 (define (insert-after-meta body stmts)
   (if (null? stmts)
       body
       (let ((meta (take-while (lambda (x) (and (pair? x)
-                                               (memq (car x) '(line meta))))
+                                               (memq (car x) '(lineinfo line meta))))
                               (cdr body))))
         `(,(car body)
           ,@meta
@@ -3669,15 +3705,16 @@ f(x) = yt(x)
                    (lambda (x) (and (pair? x) (not (eq? (car x) 'lambda)))))))
 
 (define lambda-opt-ignored-exprs
-  (Set '(quote top core line inert local-def unnecessary copyast
+  (Set '(quote top core lineinfo line inert local-def unnecessary copyast
          meta inbounds boundscheck loopinfo decl aliasscope popaliasscope
          thunk with-static-parameters toplevel-only
          global globalref outerref const-if-global thismodule
          const atomic null true false ssavalue isdefined toplevel module lambda
-         error gc_preserve_begin gc_preserve_end import using export inline noinline)))
+         error gc_preserve_begin gc_preserve_end import using export public inline noinline purity)))
 
-(define (local-in? s lam)
-  (or (assq s (car  (lam:vinfo lam)))
+(define (local-in? s lam (tab #f))
+  (or (and tab (has? tab s))
+      (assq s (car  (lam:vinfo lam)))
       (assq s (cadr (lam:vinfo lam)))))
 
 ;; Try to identify never-undef variables, and then clear the `captured` flag for single-assigned,
@@ -3777,8 +3814,6 @@ f(x) = yt(x)
              (let ((prev  (table.clone live))
                    (decl- (table.clone decl)))
                (let ((result (eager-any visit (cdr e))))
-                 (if (eq? (car e) '_while)
-                     (kill))  ;; body might not have run
                  (leave-loop! decl-)
                  (if result
                      #t
@@ -3832,17 +3867,17 @@ f(x) = yt(x)
 (define (toplevel-preserving? e)
   (and (pair? e) (memq (car e) '(if elseif block trycatch tryfinally trycatchelse))))
 
-(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq (globals (table)))
+(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table)))
   (if toplevel
       (map (lambda (x)
              (let ((tl (lift-toplevel (cl-convert x fname lam namemap defined
                                                   (and toplevel (toplevel-preserving? x))
-                                                  interp opaq globals))))
+                                                  interp opaq globals locals))))
                (if (null? (cdr tl))
                    (car tl)
                    `(block ,@(cdr tl) ,(car tl)))))
            exprs)
-      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq globals)) exprs)))
+      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq globals locals)) exprs)))
 
 (define (prepare-lambda! lam)
   ;; mark all non-arguments as assigned, since locals that are never assigned
@@ -3851,11 +3886,11 @@ f(x) = yt(x)
             (list-tail (car (lam:vinfo lam)) (length (lam:args lam))))
   (lambda-optimize-vars! lam))
 
-(define (cl-convert e fname lam namemap defined toplevel interp opaq (globals (table)))
+(define (cl-convert e fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table)))
   (if (and (not lam)
            (not (and (pair? e) (memq (car e) '(lambda method macro opaque_closure)))))
       (if (atom? e) e
-          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals)))
+          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals)))
       (cond
        ((symbol? e)
         (define (new-undef-var name)
@@ -3874,12 +3909,12 @@ f(x) = yt(x)
                  (val (if (equal? typ '(core Any))
                           val
                           `(call (core typeassert) ,val
-                                 ,(cl-convert typ fname lam namemap defined toplevel interp opaq globals)))))
+                                 ,(cl-convert typ fname lam namemap defined toplevel interp opaq globals locals)))))
             `(block
                ,@(if (eq? box access) '() `((= ,access ,box)))
                ,undefcheck
                ,val)))
-        (let ((vi (assq e (car  (lam:vinfo lam))))
+        (let ((vi (get locals e #f))
               (cv (assq e (cadr (lam:vinfo lam)))))
           (cond ((eq? e fname) e)
                 ((memq e (lam:sp lam)) e)
@@ -3898,7 +3933,7 @@ f(x) = yt(x)
        ((atom? e) e)
        (else
         (case (car e)
-          ((quote top core globalref outerref thismodule line break inert module toplevel null true false meta) e)
+          ((quote top core globalref outerref thismodule lineinfo line break inert module toplevel null true false meta) e)
           ((toplevel-only)
            ;; hack to avoid generating a (method x) expr for struct types
            (if (eq? (cadr e) 'struct)
@@ -3906,15 +3941,15 @@ f(x) = yt(x)
            e)
           ((=)
            (let ((var (cadr e))
-                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq globals)))
-             (convert-assignment var rhs fname lam interp opaq globals)))
+                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq globals locals)))
+             (convert-assignment var rhs fname lam interp opaq globals locals)))
           ((local-def) ;; make new Box for local declaration of defined variable
-           (let ((vi (assq (cadr e) (car (lam:vinfo lam)))))
+           (let ((vi (get locals (cadr e) #f)))
              (if (and vi (vinfo:asgn vi) (vinfo:capt vi))
                  `(= ,(cadr e) (call (core Box)))
                  '(null))))
           ((local) ;; convert local declarations to newvar statements
-           (let ((vi (assq (cadr e) (car (lam:vinfo lam)))))
+           (let ((vi (get locals (cadr e) #f)))
              (if (and vi (vinfo:asgn vi) (vinfo:capt vi))
                  `(= ,(cadr e) (call (core Box)))
                  (if (vinfo:never-undef vi)
@@ -3925,12 +3960,12 @@ f(x) = yt(x)
            e)
           ((atomic) e)
           ((const-if-global)
-           (if (local-in? (cadr e) lam)
+           (if (local-in? (cadr e) lam locals)
                '(null)
                `(const ,(cadr e))))
           ((isdefined) ;; convert isdefined expr to function for closure converted variables
            (let* ((sym (cadr e))
-                  (vi (and (symbol? sym) (assq sym (car  (lam:vinfo lam)))))
+                  (vi (and (symbol? sym) (get locals sym #f)))
                   (cv (and (symbol? sym) (assq sym (cadr (lam:vinfo lam))))))
              (cond ((eq? sym fname) e)
                    ((memq sym (lam:sp lam)) e)
@@ -3970,13 +4005,13 @@ f(x) = yt(x)
                   (lam2  (if short #f (cadddr e)))
                   (vis   (if short '(() () ()) (lam:vinfo lam2)))
                   (cvs   (map car (cadr vis)))
-                  (local? (lambda (s) (and lam (symbol? s) (local-in? s lam))))
+                  (local? (lambda (s) (and lam (symbol? s) (local-in? s lam locals))))
                   (local (and (not (outerref? (cadr e))) (local? name)))
                   (sig      (and (not short) (caddr e)))
                   (sp-inits (if (or short (not (eq? (car sig) 'block)))
                                 '()
                                 (map-cl-convert (butlast (cdr sig))
-                                                fname lam namemap defined toplevel interp opaq globals)))
+                                                fname lam namemap defined toplevel interp opaq globals locals)))
                   (sig      (and sig (if (eq? (car sig) 'block)
                                          (last sig)
                                          sig))))
@@ -4003,10 +4038,11 @@ f(x) = yt(x)
                                           ;; anonymous functions with keyword args generate global
                                           ;; functions that refer to the type of a local function
                                           (rename-sig-types sig namemap)
-                                          fname lam namemap defined toplevel interp opaq globals)
+                                          fname lam namemap defined toplevel interp opaq globals locals)
                                   ,(let ((body (add-box-inits-to-body
                                                 lam2
-                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq))))
+                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq (table)
+                                                            (vinfo-to-table (car (lam:vinfo lam2)))))))
                                      `(lambda ,(cadr lam2)
                                         (,(clear-capture-bits (car vis))
                                          ,@(cdr vis))
@@ -4017,7 +4053,7 @@ f(x) = yt(x)
                                (newlam    (compact-and-renumber (linearize (car exprs)) 'none 0)))
                           `(toplevel-butfirst
                             (block ,@sp-inits
-                                   (method ,name ,(cl-convert sig fname lam namemap defined toplevel interp opaq globals)
+                                   (method ,name ,(cl-convert sig fname lam namemap defined toplevel interp opaq globals locals)
                                            ,(julia-bq-macro newlam)))
                             ,@top-stmts))))
 
@@ -4120,7 +4156,7 @@ f(x) = yt(x)
                                (append (map (lambda (gs tvar)
                                               (make-assignment gs `(call (core TypeVar) ',tvar (core Any))))
                                             closure-param-syms closure-param-names)
-                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq globals)
+                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq globals locals)
                                                  ,(convert-lambda lam2
                                                                   (if iskw
                                                                       (caddr (lam:args lam2))
@@ -4159,7 +4195,7 @@ f(x) = yt(x)
                        (begin
                          (put! defined name #t)
                          `(toplevel-butfirst
-                           ,(convert-assignment name mk-closure fname lam interp opaq globals)
+                           ,(convert-assignment name mk-closure fname lam interp opaq globals locals)
                            ,@typedef
                            ,@(map (lambda (v) `(moved-local ,v)) moved-vars)
                            ,@sp-inits
@@ -4167,42 +4203,43 @@ f(x) = yt(x)
           ((lambda)  ;; happens inside (thunk ...) and generated function bodies
            (for-each (lambda (vi) (vinfo:set-asgn! vi #t))
                      (list-tail (car (lam:vinfo e)) (length (lam:args e))))
+           (lambda-optimize-vars! e)
            (let ((body (map-cl-convert (cdr (lam:body e)) 'anon
-                                       (lambda-optimize-vars! e)
+                                       e
                                        (table)
                                        (table)
                                        (null? (cadr e)) ;; only toplevel thunks have 0 args
-                                       interp opaq globals)))
+                                       interp opaq globals (vinfo-to-table (car (lam:vinfo e))))))
              `(lambda ,(cadr e)
                 (,(clear-capture-bits (car (lam:vinfo e)))
                  () ,@(cddr (lam:vinfo e)))
                 (block ,@body))))
           ;; remaining `::` expressions are type assertions
           ((|::|)
-           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq globals))
+           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq globals locals))
           ;; remaining `decl` expressions are only type assertions if the
           ;; argument is global or a non-symbol.
           ((decl)
            (cond ((and (symbol? (cadr e))
-                       (local-in? (cadr e) lam))
+                       (local-in? (cadr e) lam locals))
                   '(null))
                  (else
                   (cl-convert
-                    (let ((ref (binding-to-globalref (cadr e))))
-                      (if ref
-                          (begin
-                            (put! globals ref #t)
-                            `(block
-                               (toplevel-only set_binding_type! ,(cadr e))
-                               (call (core set_binding_type!) ,(cadr ref) (inert ,(caddr ref)) ,(caddr e))))
-                          `(call (core typeassert) ,@(cdr e))))
-                    fname lam namemap defined toplevel interp opaq globals))))
+                   (let ((ref (binding-to-globalref (cadr e))))
+                     (if ref
+                         (begin
+                           (put! globals ref #t)
+                           `(block
+                             (toplevel-only set_binding_type! ,(cadr e))
+                             (call (core set_binding_type!) ,(cadr ref) (inert ,(caddr ref)) ,(caddr e))))
+                         `(call (core typeassert) ,@(cdr e))))
+                   fname lam namemap defined toplevel interp opaq globals locals))))
           ;; `with-static-parameters` expressions can be removed now; used only by analyze-vars
           ((with-static-parameters)
-           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq globals))
+           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq globals locals))
           (else
            (cons (car e)
-                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals))))))))
+                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals))))))))
 
 (define (closure-convert e) (cl-convert e #f #f (table) (table) #f #f #f))
 
@@ -4217,17 +4254,21 @@ f(x) = yt(x)
         (else (for-each linearize (cdr e))))
   e)
 
+;; N.B.: This assumes that resolve-scopes has run, so outerref is equivalent to
+;; a global in the current scope.
 (define (valid-ir-argument? e)
-  (or (simple-atom? e) (symbol? e)
+  (or (simple-atom? e)
+      (and (outerref? e) (nothrow-julia-global (cadr e)))
+      (and (globalref? e) (nothrow-julia-global (cadr e) (caddr e)))
       (and (pair? e)
-           (memq (car e) '(quote inert top core globalref outerref
-                                 slot static_parameter boundscheck)))))
+           (memq (car e) '(quote inert top core
+                                 slot static_parameter)))))
 
 (define (valid-ir-rvalue? lhs e)
   (or (ssavalue? lhs)
       (valid-ir-argument? e)
       (and (symbol? lhs) (pair? e)
-           (memq (car e) '(new splatnew the_exception isdefined call invoke foreigncall cfunction gc_preserve_begin copyast new_opaque_closure)))))
+           (memq (car e) '(new splatnew the_exception isdefined call invoke foreigncall cfunction gc_preserve_begin copyast new_opaque_closure globalref outerref)))))
 
 (define (valid-ir-return? e)
   ;; returning lambda directly is needed for @generated
@@ -4246,6 +4287,7 @@ f(x) = yt(x)
         (current-loc #f)
         (rett #f)
         (global-const-error #f)
+        (vinfo-table (vinfo-to-table (car (lam:vinfo lam))))
         (arg-map #f)          ;; map arguments to new names if they are assigned
         (label-counter 0)     ;; counter for generating label addresses
         (label-map (table))   ;; maps label names to generated addresses
@@ -4255,9 +4297,10 @@ f(x) = yt(x)
                               ;; be emitted at the exit of the block. Code
                               ;; should enter the finally block via `enter-finally-block`.
         (handler-goto-fixups '())  ;; `goto`s that might need `leave` exprs added
-        (handler-level 0)     ;; exception handler nesting depth
+        (handler-token-stack '())  ;; tokens identifying handler stack while active
         (catch-token-stack '())) ;; tokens identifying handler enter for current catch blocks
     (define (emit c)
+      (or c (raise "missing value in IR"))
       (set! code (cons c code))
       c)
     (define (make-label)
@@ -4285,12 +4328,13 @@ f(x) = yt(x)
         (emit `(= ,(car finally-handler) ,tag))
         (if need-goto
             (let ((label (cadr finally-handler))
-                  (dest-handler-level (cadddr finally-handler))
-                  (dest-tokens        (caddddr finally-handler)))
+                  (dest-handler-tokens (cadddr finally-handler))
+                  (dest-catch-tokens   (caddddr finally-handler)))
               ;; Leave current exception handling scope and jump to finally block
-              (let ((pexc (pop-exc-expr catch-token-stack dest-tokens)))
+              (let ((pexc (pop-exc-expr catch-token-stack dest-catch-tokens)))
                 (if pexc (emit pexc)))
-              (emit `(leave ,(+ 1 (- handler-level dest-handler-level))))
+              (let ((plist (pop-handler-list handler-token-stack (cdr dest-handler-tokens) '())))
+                (emit `(leave ,@plist)))
               (emit `(goto ,label))))
         tag))
     (define (pop-exc-expr src-tokens dest-tokens)
@@ -4303,6 +4347,18 @@ f(x) = yt(x)
                                      (car s)
                                      (loop (cdr s))))))
             `(pop_exception ,restore-token))))
+    (define (pop-handler-list src-tokens dest-tokens lab)
+      (if (eq? src-tokens dest-tokens)
+          #f
+          (let loop ((s src-tokens)
+                     (l '()))
+            (if (not (pair? s))
+                (if (null? lab)
+                  (error "Attempt to jump into catch block")
+                  (error (string "cannot goto label \"" lab "\" inside try/catch block"))))
+            (if (eq? (cdr s) dest-tokens)
+                (cons (car s) l)
+                (loop (cdr s) (cons (car s) l))))))
     (define (emit-return x)
       (define (emit- x)
         (let* ((tmp (if ((if (null? catch-token-stack) valid-ir-return? simple-atom?) x)
@@ -4313,34 +4369,34 @@ f(x) = yt(x)
               x)))
       (define (actually-return x)
         (let* ((x (if rett
-                      (compile (convert-for-type-decl (emit- x) rett) '() #t #f)
+                      (compile (convert-for-type-decl (emit- x) rett #t lam) '() #t #f)
                       x))
                (x (emit- x)))
           (let ((pexc (pop-exc-expr catch-token-stack '())))
             (if pexc (emit pexc)))
           (emit `(return ,x))))
       (if x
-          (if (> handler-level 0)
+          (if (null? handler-token-stack)
+              (actually-return x)
               (let ((tmp (cond ((and (simple-atom? x) (or (not (ssavalue? x)) (not finally-handler))) #f)
                                (finally-handler  (new-mutable-var))
                                (else             (make-ssavalue)))))
                 (if tmp (emit `(= ,tmp ,x)))
                 (if finally-handler
                     (enter-finally-block `(return ,(or tmp x)))
-                    (begin (emit `(leave ,handler-level))
+                    (begin (emit `(leave ,@handler-token-stack))
                            (actually-return (or tmp x))))
-                (or tmp x))
-              (actually-return x))))
+                (or tmp x)))))
     (define (emit-break labl)
-      (let ((lvl (caddr labl))
+      (let ((dest-handler-tokens (caddr labl))
             (dest-tokens (cadddr labl)))
-        (if (and finally-handler (> (cadddr finally-handler) lvl))
+        (if (and finally-handler (> (length (cadddr finally-handler)) (length dest-handler-tokens)))
             (enter-finally-block `(break ,labl))
             (begin
               (let ((pexc (pop-exc-expr catch-token-stack dest-tokens)))
                 (if pexc (emit pexc)))
-              (if (> handler-level lvl)
-                  (emit `(leave ,(- handler-level lvl))))
+              (let ((plist (pop-handler-list handler-token-stack dest-handler-tokens '())))
+                (if plist (emit `(leave ,@plist))))
               (emit `(goto ,(cadr labl)))))))
     (define (new-mutable-var . name)
       (let ((g (if (null? name) (gensy) (named-gensy (car name)))))
@@ -4358,48 +4414,64 @@ f(x) = yt(x)
           (else                (string "\"" h "\" expression"))))
       (if (not (null? (cadr lam)))
           (error (string (head-to-text (car e)) " not at top level"))))
+    (define (valid-body-ir-argument? aval)
+      (or (valid-ir-argument? aval)
+          (and (symbol? aval) ; Arguments are always defined slots
+               (or (memq aval (lam:args lam))
+                   (let ((vi (get vinfo-table aval #f)))
+                     (and vi (vinfo:never-undef vi)))))))
+    (define (single-assign-var? aval)
+      (and (symbol? aval) ; Arguments are always sa
+           (or (memq aval (lam:args lam))
+               (let ((vi (get vinfo-table aval #f)))
+                 (and vi (vinfo:sa vi))))))
+    ;; TODO: We could also allow const globals here
+    (define (const-read-arg? x)
+      ;; Even if we have side effects, we know that singly-assigned
+      ;; locals cannot be affected them, so we can inline them anyway.
+      (or (simple-atom? x) (single-assign-var? x)
+        (and (pair? x)
+          (memq (car x) '(quote inert top core)))))
     ;; evaluate the arguments of a call, creating temporary locations as needed
     (define (compile-args lst break-labels)
       (if (null? lst) '()
-          (let ((simple? (every (lambda (x) (or (simple-atom? x) (symbol? x)
-                                                (and (pair? x)
-                                                     (memq (car x) '(quote inert top core globalref outerref boundscheck)))))
-                                lst)))
-            (let loop ((lst  lst)
-                       (vals '()))
-              (if (null? lst)
-                  (reverse! vals)
-                  (let* ((arg (car lst))
-                         (aval (or (compile arg break-labels #t #f)
-                                   ;; TODO: argument exprs that don't yield a value?
-                                   '(null))))
-                    (loop (cdr lst)
-                          (cons (if (and (not simple?)
-                                         (not (simple-atom? arg))
-                                         (not (simple-atom? aval))
-                                         (not (and (pair? arg)
-                                                   (memq (car arg) '(quote inert top core globalref outerref boundscheck))))
-                                         (not (and (symbol? aval) ;; function args are immutable and always assigned
-                                                   (memq aval (lam:args lam))))
-                                         (not (and (symbol? arg)
-                                                   (or (null? (cdr lst))
-                                                       (null? vals)))))
-                                    (let ((tmp (make-ssavalue)))
-                                      (emit `(= ,tmp ,aval))
-                                      tmp)
-                                    aval)
-                                vals))))))))
+        ;; First check if all the arguments as simple (and therefore side-effect free).
+        ;; Otherwise, we need to use ssa values for all arguments to ensure proper
+        ;; left-to-right evaluation semantics.
+        (let ((simple? (every (lambda (x) (or (simple-atom? x) (symbol? x)
+                                              (and (pair? x)
+                                                   (memq (car x) '(quote inert top core globalref outerref)))))
+                              lst)))
+          (let loop ((lst  lst)
+                     (vals '()))
+            (if (null? lst)
+                (reverse! vals)
+                (let* ((arg (car lst))
+                       (aval (or (compile arg break-labels #t #f)
+                                 ;; TODO: argument exprs that don't yield a value?
+                                 '(null))))
+                  (loop (cdr lst)
+                        (cons (if (and
+                                   (or simple? (const-read-arg? aval))
+                                   (valid-body-ir-argument? aval))
+                                  aval
+                                  (let ((tmp (make-ssavalue)))
+                                    (emit `(= ,tmp ,aval))
+                                    tmp))
+                              vals))))))))
     (define (compile-cond ex break-labels)
       (let ((cnd (or (compile ex break-labels #t #f)
                      ;; TODO: condition exprs that don't yield a value?
                      '(null))))
-        (if (not (valid-ir-argument? cnd))
+        (if (valid-body-ir-argument? cnd) cnd
             (let ((tmp (make-ssavalue)))
               (emit `(= ,tmp ,cnd))
-              tmp)
-            cnd)))
+              tmp))))
     (define (emit-cond cnd break-labels endl)
       (let* ((cnd (if (and (pair? cnd) (eq? (car cnd) 'block))
+                      (flatten-ex 'block cnd)
+                      cnd))
+             (cnd (if (and (pair? cnd) (eq? (car cnd) 'block))
                        (begin (if (length> cnd 2) (compile (butlast cnd) break-labels #f #f))
                               (last cnd))
                        cnd))
@@ -4427,7 +4499,8 @@ f(x) = yt(x)
               (emit `(= ,lhs ,rhs))
               (let ((rr (make-ssavalue)))
                 (emit `(= ,rr ,rhs))
-                (emit `(= ,lhs ,rr)))))
+                (emit `(= ,lhs ,rr))))
+          (emit `(= ,lhs (null)))) ; in unreachable code (such as after return), still emit the assignment so that the structure of those uses is preserved
       #f)
     ;; the interpreter loop. `break-labels` keeps track of the labels to jump to
     ;; for all currently closing break-blocks.
@@ -4445,7 +4518,7 @@ f(x) = yt(x)
                                (and (pair? e) (or (eq? (car e) 'outerref)
                                                   (eq? (car e) 'globalref))
                                     (underscore-symbol? (cadr e)))))
-                (error (string "all-underscore identifier used as rvalue" (format-loc current-loc))))
+                (error (string "all-underscore identifiers are write-only and their values cannot be used in expressions" (format-loc current-loc))))
             (cond (tail  (emit-return e1))
                   (value e1)
                   ((symbol? e1) (emit e1) #f)  ;; keep symbols for undefined-var checking
@@ -4522,12 +4595,14 @@ f(x) = yt(x)
                     (file-diff  (not (eq? fname last-fname)))
                     ;; don't need a filename node for start of function
                     (need-meta  (and file-diff last-fname
-                                     (not (eq? e (lam:body lam))))))
+                                     (not (eq? e (lam:body lam)))))
+                    (emit-final-meta (lambda ())))
                (if file-diff (set! filename fname))
                (if need-meta (emit `(meta push_loc ,fname)))
                (let ((v (let loop ((xs (cdr e)))
-                  (if (null? (cdr xs))
-                      (compile (car xs) break-labels value tail)
+                  (if (only-meta? (cdr xs))
+                      (begin (set! emit-final-meta (lambda () (map (lambda (v) (compile v break-labels #f #f)) (cdr xs))))
+                             (compile (car xs) break-labels value tail))
                       (begin (compile (car xs) break-labels #f #f)
                              (loop (cdr xs)))))))
                   (if need-meta
@@ -4543,16 +4618,20 @@ f(x) = yt(x)
                                    (let ((tmp (make-ssavalue)))
                                      (emit `(= ,tmp ,retv))
                                      (set! retv tmp)))
+                                 (emit-final-meta)
                                  (emit '(meta pop_loc))
                                  (emit `(return ,retv)))
                                (emit '(meta pop_loc))))
-                          ((and value (not (simple-atom? v)))
+                          ((and v value (not (simple-atom? v)))
                            (let ((tmp (make-ssavalue)))
                              (emit `(= ,tmp ,v))
                              (set! v tmp)
+                             (emit-final-meta)
                              (emit `(meta pop_loc))))
                           (else
-                           (emit `(meta pop_loc)))))
+                           (emit-final-meta)
+                           (emit `(meta pop_loc))))
+                    (emit-final-meta))
                   (if file-diff (set! filename last-fname))
                   v)))
             ((return)
@@ -4605,7 +4684,7 @@ f(x) = yt(x)
             ((break-block)
              (let ((endl (make-label)))
                (compile (caddr e)
-                        (cons (list (cadr e) endl handler-level catch-token-stack)
+                        (cons (list (cadr e) endl handler-token-stack catch-token-stack)
                               break-labels)
                         #f #f)
                (mark-label endl))
@@ -4619,7 +4698,7 @@ f(x) = yt(x)
              (if (eq? (car e) 'symboliclabel)
                  (if (has? label-nesting (cadr e))
                      (error (string "label \"" (cadr e) "\" defined multiple times"))
-                     (put! label-nesting (cadr e) (list handler-level catch-token-stack))))
+                     (put! label-nesting (cadr e) (list handler-token-stack catch-token-stack))))
              (let ((m (get label-map (cadr e) #f)))
                (if m
                    (emit `(label ,m))
@@ -4635,11 +4714,13 @@ f(x) = yt(x)
                (emit `(null))  ;; save space for `leave` that might be needed
                (emit `(goto ,m))
                (set! handler-goto-fixups
-                     (cons (list code handler-level catch-token-stack (cadr e)) handler-goto-fixups))
+                     (cons (list code handler-token-stack catch-token-stack (cadr e)) handler-goto-fixups))
                #f))
 
             ;; exception handlers are lowered using
-            ;; (= tok (enter L)) - push handler with catch block at label L, yielding token
+            ;; (= tok (enter L scope))
+            ;;      push handler with catch block at label L and scope `scope`, yielding token
+            ;;      `scope` is only recognized for tryfinally and may be omitted in the lowering
             ;; (leave n) - pop N exception handlers
             ;; (pop_exception tok) - pop exception stack back to state of associated enter
             ((trycatch tryfinally trycatchelse)
@@ -4649,11 +4730,12 @@ f(x) = yt(x)
                    (endl  (make-label))
                    (last-finally-handler finally-handler)
                    (finally           (if (eq? (car e) 'tryfinally) (new-mutable-var) #f))
+                   (scope             (if (eq? (car e) 'tryfinally) (cdddr e) '()))
                    (my-finally-handler #f))
                ;; handler block entry
-               (emit `(= ,handler-token (enter ,catch)))
-               (set! handler-level (+ handler-level 1))
-               (if finally (begin (set! my-finally-handler (list finally endl '() handler-level catch-token-stack))
+               (emit `(= ,handler-token (enter ,catch ,@(compile-args scope break-labels))))
+               (set! handler-token-stack (cons handler-token handler-token-stack))
+               (if finally (begin (set! my-finally-handler (list finally endl '() handler-token-stack catch-token-stack))
                                   (set! finally-handler my-finally-handler)
                                   (emit `(= ,finally -1))))
                (let* ((v1  (compile (cadr e) break-labels value #f)) ;; emit try block code
@@ -4664,21 +4746,21 @@ f(x) = yt(x)
                  (if tail
                      (begin (if els
                                 (begin (if (and (not val) v1) (emit v1))
-                                       (emit '(leave 1)))
+                                       (emit `(leave ,handler-token)))
                                 (if v1 (emit-return v1)))
                             (if (not finally) (set! endl #f)))
-                     (begin (emit '(leave 1))
+                     (begin (emit `(leave ,handler-token))
                             (emit `(goto ,(or els endl)))))
-                 (set! handler-level (- handler-level 1))
+                 (set! handler-token-stack (cdr handler-token-stack))
                  ;; emit else block
                  (if els
                      (begin (mark-label els)
                             (let ((v3 (compile (cadddr e) break-labels value tail))) ;; emit else block code
                               (if val (emit-assignment val v3)))
-                            (emit `(goto ,endl))))
-                 ;; emit either catch or finally block
+                            (if endl (emit `(goto ,endl)))))
+                 ;; emit either catch or finally block. A combined try/catch/finally block was split into
+                 ;; separate trycatch and tryfinally blocks earlier.
                  (mark-label catch)
-                 (emit `(leave 1))
                  (if finally
                      (begin (enter-finally-block '(call (top rethrow)) #f) ;; enter block via exception
                             (mark-label endl) ;; non-exceptional control flow enters here
@@ -4716,7 +4798,7 @@ f(x) = yt(x)
              ;; avoid duplicate newvar nodes
              (if (and (not (and (pair? code) (equal? (car code) e)))
                       ;; exclude deleted vars
-                      (assq (cadr e) (car (lam:vinfo lam))))
+                      (has? vinfo-table (cadr e)))
                  (emit e)
                  #f))
             ((global) ; keep global declarations as statements
@@ -4785,7 +4867,7 @@ f(x) = yt(x)
                val))
 
             ;; other top level expressions
-            ((import using export)
+            ((import using export public)
              (check-top-level e)
              (emit e)
              (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return))))
@@ -4798,7 +4880,7 @@ f(x) = yt(x)
                (cons (car e) args)))
 
             ;; metadata expressions
-            ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline)
+            ((lineinfo line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline purity)
              (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return))))
                (cond ((eq? (car e) 'line)
                       (set! current-loc e)
@@ -4836,21 +4918,18 @@ f(x) = yt(x)
     (compile e '() #t #t)
     (for-each (lambda (x)
                 (let ((point (car x))
-                      (hl    (cadr x))
-                      (src-tokens (caddr x))
+                      (src-handler-tokens (cadr x))
+                      (src-catch-tokens (caddr x))
                       (lab   (cadddr x)))
                   (let ((target-nesting (get label-nesting lab #f)))
                     (if (not target-nesting)
                         (error (string "label \"" lab "\" referenced but not defined")))
-                    (let ((target-level (car target-nesting)))
-                      (cond ((> target-level hl)
-                            (error (string "cannot goto label \"" lab "\" inside try/catch block")))
-                            ((= target-level hl)
-                             (set-cdr! point (cddr point))) ;; remove empty slot
-                            (else
-                             (set-car! (cdr point) `(leave ,(- hl target-level))))))
-                    (let ((pexc (pop-exc-expr src-tokens (cadr target-nesting))))
-                      (if pexc (set-cdr! point (cons pexc (cdr point))))))))
+                    (let ((target-handler-tokens (car target-nesting))
+                          (target-catch-tokens (cadr target-nesting)))
+                      (let ((plist (pop-handler-list src-handler-tokens target-handler-tokens lab)))
+                        (if plist (set-car! (cdr point) `(leave ,@plist))))
+                      (let ((pexc (pop-exc-expr src-catch-tokens target-catch-tokens)))
+                        (if pexc (set-cdr! point (cons pexc (cdr point)))))))))
               handler-goto-fixups)
     (if global-const-error
         (error (string "`global const` declaration not allowed inside function" (format-loc global-const-error))))
@@ -4929,24 +5008,24 @@ f(x) = yt(x)
   (let ((code         '(block))
         (locs         '(list))
         (linetable    '(list))
+        (linetablelen 0)
         (labltable    (table))
         (ssavtable    (table))
-        (reachable    #t)
         (current-loc  0)
         (current-file file)
         (current-line line)
         (locstack     '())
         (i            1))
     (define (emit e)
+      (or e (raise "missing value in IR"))
       (if (and (null? (cdr linetable))
                (not (and (pair? e) (eq? (car e) 'meta))))
           (begin (set! linetable (cons (make-lineinfo name file line) linetable))
+                 (set! linetablelen (+ linetablelen 1))
                  (set! current-loc 1)))
-      (if (or reachable
-              (and (pair? e) (memq (car e) '(meta inbounds gc_preserve_begin gc_preserve_end aliasscope popaliasscope inline noinline))))
-          (begin (set! code (cons e code))
-                 (set! i (+ i 1))
-                 (set! locs (cons current-loc locs)))))
+      (set! code (cons e code))
+      (set! i (+ i 1))
+      (set! locs (cons current-loc locs)))
     (let loop ((stmts (cdr body)))
       (if (pair? stmts)
           (let ((e (car stmts)))
@@ -4964,13 +5043,15 @@ f(x) = yt(x)
                                                    (make-lineinfo name current-file current-line)
                                                    (make-lineinfo name current-file current-line (caar locstack)))
                                                linetable))
-                         (set! current-loc (- (length linetable) 1)))))
+                         (set! linetablelen (+ linetablelen 1))
+                         (set! current-loc linetablelen))))
                   ((and (length> e 2) (eq? (car e) 'meta) (eq? (cadr e) 'push_loc))
                    (set! locstack (cons (list current-loc current-line current-file) locstack))
                    (set! current-file (caddr e))
                    (set! current-line 0)
                    (set! linetable (cons (make-lineinfo name current-file current-line current-loc) linetable))
-                   (set! current-loc (- (length linetable) 1)))
+                   (set! linetablelen (+ linetablelen 1))
+                   (set! current-loc linetablelen))
                   ((and (length= e 2) (eq? (car e) 'meta) (eq? (cadr e) 'pop_loc))
                    (let ((l (car locstack)))
                      (set! locstack (cdr locstack))
@@ -4978,7 +5059,6 @@ f(x) = yt(x)
                      (set! current-line (cadr l))
                      (set! current-file (caddr l))))
                   ((eq? (car e) 'label)
-                   (set! reachable #t)
                    (put! labltable (cadr e) i))
                   ((and (assignment? e) (ssavalue? (cadr e)))
                    (let ((idx (and (ssavalue? (caddr e)) (get ssavtable (cadr (caddr e)) #f))))
@@ -4989,9 +5069,7 @@ f(x) = yt(x)
                            (put! ssavtable (cadr (cadr e)) i)
                            (emit (caddr e))))))
                   (else
-                   (emit e)
-                   (if (or (eq? (car e) 'goto) (eq? (car e) 'return))
-                       (set! reachable #f))))
+                   (emit e)))
             (loop (cdr stmts)))))
     (vector (reverse code) (reverse locs) (reverse linetable) ssavtable labltable)))
 
@@ -5009,7 +5087,8 @@ f(x) = yt(x)
     (define slot-table (symbol-to-idx-map (map car (car (lam:vinfo lam)))))
     (define sp-table (symbol-to-idx-map (lam:sp lam)))
     (define (renumber-stuff e)
-      (cond ((symbol? e)
+      (cond ((eq? e UNUSED) (error "Attempted to use slot marked unused"))
+            ((symbol? e)
              (let ((idx (get slot-table e #f)))
                (if idx
                    `(slot ,idx)
@@ -5025,11 +5104,13 @@ f(x) = yt(x)
             ((or (atom? e) (quoted? e) (eq? (car e) 'global))
              e)
             ((ssavalue? e)
-             (let ((idx (or (get ssavalue-table (cadr e) #f)
-                            (error "ssavalue with no def"))))
+             (let ((idx (get ssavalue-table (cadr e) #f)))
+               (if (not idx) (begin (prn e) (prn lam) (error "ssavalue with no def")))
                `(ssavalue ,idx)))
-            ((memq (car e) '(goto enter))
-             (list* (car e) (get label-table (cadr e)) (cddr e)))
+            ((eq? (car e) 'goto)
+             `(goto ,(get label-table (cadr e))))
+            ((eq? (car e) 'enter)
+             `(enter ,(get label-table (cadr e)) ,@(map renumber-stuff (cddr e))))
             ((eq? (car e) 'gotoifnot)
              `(gotoifnot ,(renumber-stuff (cadr e)) ,(get label-table (caddr e))))
             ((eq? (car e) 'lambda)
@@ -5072,8 +5153,8 @@ f(x) = yt(x)
 
 (define *current-desugar-loc* #f)
 
-(define (julia-expand0 ex file line)
-  (with-bindings ((*current-desugar-loc* `(line ,line ,file)))
+(define (julia-expand0 ex lno)
+  (with-bindings ((*current-desugar-loc* lno))
    (trycatch (expand-forms ex)
              (lambda (e)
                (if (and (pair? e) (eq? (car e) 'error))
@@ -5088,4 +5169,4 @@ f(x) = yt(x)
 (define (julia-expand ex (file 'none) (line 0))
   (julia-expand1
    (julia-expand0
-    (julia-expand-macroscope ex) file line) file line))
+    (julia-expand-macroscope ex) `(line ,line ,file)) file line))
diff --git a/src/julia.expmap b/src/julia.expmap.in
similarity index 73%
rename from src/julia.expmap
rename to src/julia.expmap.in
index 4b4a792de52b9..213d087fdc2ad 100644
--- a/src/julia.expmap
+++ b/src/julia.expmap.in
@@ -1,10 +1,11 @@
-{
+@JULIA_SHLIB_SYMBOL_VERSION@ {
   global:
     pthread*;
     __stack_chk_guard;
     asprintf;
     bitvector_*;
     ios_*;
+    arraylist_grow;
     small_arraylist_grow;
     jl_*;
     ijl_*;
@@ -17,10 +18,7 @@
     memhash32;
     memhash32_seed;
     memhash_seed;
-    restore_arg_area_loc;
     restore_signals;
-    rl_clear_input;
-    save_arg_area_loc;
     u8_*;
     uv_*;
     add_library_mapping;
@@ -31,19 +29,21 @@
     _Z24jl_coverage_data_pointerN4llvm9StringRefEi;
     _Z22jl_coverage_alloc_lineN4llvm9StringRefEi;
     _Z22jl_malloc_data_pointerN4llvm9StringRefEi;
+    _jl_timing_*;
     LLVMExtra*;
+    JLJIT*;
     llvmGetPassPluginInfo;
 
     /* freebsd */
     environ;
     __progname;
 
-    /* compiler run-time intrinsics */
-    __gnu_h2f_ieee;
-    __extendhfsf2;
-    __gnu_f2h_ieee;
-    __truncdfhf2;
-
   local:
     *;
 };
+
+@LLVM_SHLIB_SYMBOL_VERSION@ {
+  global:
+    /* Make visible so that linker will merge duplicate definitions across DSO boundaries */
+    _ZN4llvm3Any6TypeId*;
+};
diff --git a/src/julia.h b/src/julia.h
index 1dfd6ea239d77..44dc913209c6b 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -3,11 +3,17 @@
 #ifndef JULIA_H
 #define JULIA_H
 
-#ifdef LIBRARY_EXPORTS
-#include "jl_internal_funcs.inc"
+#if defined(JL_LIBRARY_EXPORTS_INTERNAL) || defined(JL_LIBRARY_EXPORTS_CODEGEN)
+#define JL_LIBRARY_EXPORTS
+#endif
+#ifdef JL_LIBRARY_EXPORTS
+// Generated file, needs to be searched in include paths so that the builddir
+// retains priority
+#include <jl_internal_funcs.inc>
 #undef jl_setjmp
 #undef jl_longjmp
 #undef jl_egal
+#undef jl_genericmemory_owner
 #endif
 
 #include "julia_fasttls.h"
@@ -68,7 +74,7 @@
 typedef struct _jl_taggedvalue_t jl_taggedvalue_t;
 typedef struct _jl_tls_states_t *jl_ptls_t;
 
-#ifdef LIBRARY_EXPORTS
+#ifdef JL_LIBRARY_EXPORTS
 #include "uv.h"
 #endif
 #include "julia_atomics.h"
@@ -89,6 +95,13 @@ typedef struct _jl_value_t jl_value_t;
 
 struct _jl_taggedvalue_bits {
     uintptr_t gc:2;
+    uintptr_t in_image:1;
+    uintptr_t unused:1;
+#ifdef _P64
+    uintptr_t tag:60;
+#else
+    uintptr_t tag:28;
+#endif
 };
 
 JL_EXTENSION struct _jl_taggedvalue_t {
@@ -101,20 +114,24 @@ JL_EXTENSION struct _jl_taggedvalue_t {
     // jl_value_t value;
 };
 
+static inline jl_value_t *jl_to_typeof(uintptr_t t) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
 #ifdef __clang_gcanalyzer__
 JL_DLLEXPORT jl_taggedvalue_t *_jl_astaggedvalue(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 #define jl_astaggedvalue(v) _jl_astaggedvalue((jl_value_t*)(v))
 jl_value_t *_jl_valueof(jl_taggedvalue_t *tv JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 #define jl_valueof(v) _jl_valueof((jl_taggedvalue_t*)(v))
 JL_DLLEXPORT jl_value_t *_jl_typeof(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
-#define jl_typeof(v) _jl_typeof((jl_value_t*)(v))
+#define jl_typeof(v) (_jl_typeof((jl_value_t*)(v)))
+#define jl_typetagof(v) ((uintptr_t)_jl_typeof((jl_value_t*)(v)))
 #else
 #define jl_astaggedvalue(v)                                             \
     ((jl_taggedvalue_t*)((char*)(v) - sizeof(jl_taggedvalue_t)))
-#define jl_valueof(v)                                           \
+#define jl_valueof(v)                                                   \
     ((jl_value_t*)((char*)(v) + sizeof(jl_taggedvalue_t)))
 #define jl_typeof(v)                                                    \
-    ((jl_value_t*)(jl_astaggedvalue(v)->header & ~(uintptr_t)15))
+    jl_to_typeof(jl_typetagof(v))
+#define jl_typetagof(v)                                                 \
+    ((jl_astaggedvalue(v)->header) & ~(uintptr_t)15)
 #endif
 static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT
 {
@@ -123,6 +140,8 @@ static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT
     jl_atomic_store_relaxed((_Atomic(jl_value_t*)*)&tag->type, (jl_value_t*)t);
 }
 #define jl_typeis(v,t) (jl_typeof(v)==(jl_value_t*)(t))
+#define jl_typetagis(v,t) (jl_typetagof(v)==(uintptr_t)(t))
+#define jl_set_typetagof(v,t,gc) (jl_set_typeof((v), (void*)(((uintptr_t)(t) << 4) | (gc))))
 
 // Symbols are interned strings (hash-consed) stored as an invasive binary tree.
 // The string data is nul-terminated and hangs off the end of the struct.
@@ -150,50 +169,42 @@ typedef struct {
     // jl_value_t *data[];
 } jl_svec_t;
 
-typedef struct {
-    /*
-      how - allocation style
-      0 = data is inlined, or a foreign pointer we don't manage
-      1 = julia-allocated buffer that needs to be marked
-      2 = malloc-allocated pointer this array object manages
-      3 = has a pointer to the object that owns the data
-    */
-    uint16_t how:2;
-    uint16_t ndims:9;
-    uint16_t pooled:1;
-    uint16_t ptrarray:1; // representation is pointer array
-    uint16_t hasptr:1; // representation has embedded pointers
-    uint16_t isshared:1; // data is shared by multiple Arrays
-    uint16_t isaligned:1; // data allocated with memalign
-} jl_array_flags_t;
-
 JL_EXTENSION typedef struct {
     JL_DATA_TYPE
-    void *data;
     size_t length;
-    jl_array_flags_t flags;
-    uint16_t elsize;  // element size including alignment (dim 1 memory stride)
-    uint32_t offset;  // for 1-d only. does not need to get big.
-    size_t nrows;
-    union {
-        // 1d
-        size_t maxsize;
-        // Nd
-        size_t ncols;
-    };
-    // other dim sizes go here for ndims > 2
+    void *ptr;
+    // followed by padding and inline data, or owner pointer
+#ifdef _P64
+    // union {
+    //     jl_value_t *owner;
+    //     T inl[];
+    // };
+#else
+    //
+    // jl_value_t *owner;
+    // size_t padding[1];
+    // T inl[];
+#endif
+} jl_genericmemory_t;
 
-    // followed by alignment padding and inline data, or owner pointer
+JL_EXTENSION typedef struct {
+    JL_DATA_TYPE
+    void *ptr_or_offset;
+    jl_genericmemory_t *mem;
+} jl_genericmemoryref_t;
+
+JL_EXTENSION typedef struct {
+    JL_DATA_TYPE
+    jl_genericmemoryref_t ref;
+    size_t dimsize[]; // length for 1-D, otherwise length is mem->length
 } jl_array_t;
 
-// compute # of extra words needed to store dimensions
-STATIC_INLINE int jl_array_ndimwords(uint32_t ndims) JL_NOTSAFEPOINT
-{
-    return (ndims < 3 ? 0 : ndims-2);
-}
 
 typedef struct _jl_datatype_t jl_tupletype_t;
 struct _jl_code_instance_t;
+typedef struct _jl_method_instance_t jl_method_instance_t;
+typedef struct _jl_globalref_t jl_globalref_t;
+
 
 // TypeMap is an implicitly defined type
 // that can consist of any of the following nodes:
@@ -211,60 +222,71 @@ typedef jl_call_t *jl_callptr_t;
 // "speccall" calling convention signatures.
 // This describes some of the special ABI used by compiled julia functions.
 extern jl_call_t jl_fptr_args;
-JL_DLLEXPORT extern jl_callptr_t jl_fptr_args_addr;
+JL_DLLEXPORT extern const jl_callptr_t jl_fptr_args_addr;
 typedef jl_value_t *(*jl_fptr_args_t)(jl_value_t*, jl_value_t**, uint32_t);
 
 extern jl_call_t jl_fptr_const_return;
-JL_DLLEXPORT extern jl_callptr_t jl_fptr_const_return_addr;
+JL_DLLEXPORT extern const jl_callptr_t jl_fptr_const_return_addr;
 
 extern jl_call_t jl_fptr_sparam;
-JL_DLLEXPORT extern jl_callptr_t jl_fptr_sparam_addr;
+JL_DLLEXPORT extern const jl_callptr_t jl_fptr_sparam_addr;
 typedef jl_value_t *(*jl_fptr_sparam_t)(jl_value_t*, jl_value_t**, uint32_t, jl_svec_t*);
 
 extern jl_call_t jl_fptr_interpret_call;
-JL_DLLEXPORT extern jl_callptr_t jl_fptr_interpret_call_addr;
+JL_DLLEXPORT extern const jl_callptr_t jl_fptr_interpret_call_addr;
 
-typedef struct _jl_method_instance_t jl_method_instance_t;
+JL_DLLEXPORT extern const jl_callptr_t jl_f_opaque_closure_call_addr;
 
 typedef struct _jl_line_info_node_t {
     struct _jl_module_t *module;
-    jl_value_t *method;
+    jl_value_t *method; // may contain a jl_symbol, jl_method_t, or jl_method_instance_t
     jl_sym_t *file;
-    intptr_t line;
-    intptr_t inlined_at;
+    int32_t line;
+    int32_t inlined_at;
 } jl_line_info_node_t;
 
-// the following mirrors `struct EffectsOverride` in `base/compiler/types.jl`
+// the following mirrors `struct EffectsOverride` in `base/compiler/effects.jl`
 typedef union __jl_purity_overrides_t {
     struct {
-        uint8_t ipo_consistent  : 1;
-        uint8_t ipo_effect_free : 1;
-        uint8_t ipo_nothrow     : 1;
-        uint8_t ipo_terminates  : 1;
+        uint16_t ipo_consistent          : 1;
+        uint16_t ipo_effect_free         : 1;
+        uint16_t ipo_nothrow             : 1;
+        uint16_t ipo_terminates_globally : 1;
         // Weaker form of `terminates` that asserts
         // that any control flow syntactically in the method
         // is guaranteed to terminate, but does not make
         // assertions about any called functions.
-        uint8_t ipo_terminates_locally : 1;
-        uint8_t ipo_notaskstate : 1;
+        uint16_t ipo_terminates_locally  : 1;
+        uint16_t ipo_notaskstate         : 1;
+        uint16_t ipo_inaccessiblememonly : 1;
+        uint16_t ipo_noub                : 1;
+        uint16_t ipo_noub_if_noinbounds  : 1;
     } overrides;
-    uint8_t bits;
+    uint16_t bits;
 } _jl_purity_overrides_t;
 
+#define NUM_EFFECTS_OVERRIDES 9
+#define NUM_IR_FLAGS 11
+
 // This type describes a single function body
 typedef struct _jl_code_info_t {
     // ssavalue-indexed arrays of properties:
     jl_array_t *code;  // Any array of statements
     jl_value_t *codelocs; // Int32 array of indices into the line table
     jl_value_t *ssavaluetypes; // types of ssa values (or count of them)
-    jl_array_t *ssaflags; // flags associated with each statement:
-        // 0 = inbounds
-        // 1 = inline
-        // 2 = noinline
-        // 3 = <reserved> strict-ieee (strictfp)
-        // 4 = effect-free (may be deleted if unused)
-        // 5-6 = <unused>
-        // 7 = has out-of-band info
+    jl_array_t *ssaflags; // 32 bits flags associated with each statement:
+        // 1 << 0 = inbounds region
+        // 1 << 1 = callsite inline region
+        // 1 << 2 = callsite noinline region
+        // 1 << 3 = throw block
+        // 1 << 4 = :effect_free
+        // 1 << 5 = :nothrow
+        // 1 << 6 = :consistent
+        // 1 << 7 = :refined
+        // 1 << 8 = :noub
+        // 1 << 9 = :effect_free_if_inaccessiblememonly
+        // 1 << 10 = :inaccessiblemem_or_argmemonly
+        // 1 << 11-18 = callsite effects overrides
     // miscellaneous data:
     jl_value_t *method_for_inference_limit_heuristics; // optional method used during inference
     jl_value_t *linetable; // Table of locations [TODO: make this volatile like slotnames]
@@ -279,12 +301,15 @@ typedef struct _jl_code_info_t {
     size_t max_world;
     // various boolean properties:
     uint8_t inferred;
-    uint8_t inlineable;
     uint8_t propagate_inbounds;
-    uint8_t pure;
+    uint8_t has_fcall;
+    uint8_t nospecializeinfer;
     // uint8 settings
+    uint8_t inlining; // 0 = default; 1 = @inline; 2 = @noinline
     uint8_t constprop; // 0 = use heuristic; 1 = aggressive; 2 = none
     _jl_purity_overrides_t purity;
+    // uint16 settings
+    uint16_t inlining_cost;
 } jl_code_info_t;
 
 // This type describes a single method definition, and stores data
@@ -302,18 +327,18 @@ typedef struct _jl_method_t {
     jl_value_t *sig;
 
     // table of all jl_method_instance_t specializations we have
-    _Atomic(jl_svec_t*) specializations; // allocated as [hashable, ..., NULL, linear, ....]
-    _Atomic(jl_array_t*) speckeyset; // index lookup by hash into specializations
+    _Atomic(jl_value_t*) specializations; // allocated as [hashable, ..., NULL, linear, ....], or a single item
+    _Atomic(jl_genericmemory_t*) speckeyset; // index lookup by hash into specializations
 
     jl_value_t *slot_syms; // compacted list of slot names (String)
     jl_value_t *external_mt; // reference to the method table this method is part of, null if part of the internal table
     jl_value_t *source;  // original code template (jl_code_info_t, but may be compressed), null for builtins
-    _Atomic(struct _jl_method_instance_t*) unspecialized;  // unspecialized executable method instance, or null
+    _Atomic(jl_method_instance_t*) unspecialized;  // unspecialized executable method instance, or null
     jl_value_t *generator;  // executable code-generating function if available
     jl_array_t *roots;  // pointers in generated code (shared to reduce memory), or null
     // Identify roots by module-of-origin. We only track the module for roots added during incremental compilation.
     // May be NULL if no external roots have been added, otherwise it's a Vector{UInt64}
-    jl_array_t *root_blocks;   // RLE (build_id, offset) pairs (even/odd indexing)
+    jl_array_t *root_blocks;   // RLE (build_id.lo, offset) pairs (even/odd indexing)
     int32_t nroots_sysimg;     // # of roots stored in the system image
     jl_svec_t *ccallable; // svec(rettype, sig) if a ccallable entry point is requested for this
 
@@ -325,7 +350,7 @@ typedef struct _jl_method_t {
     // A function that compares two specializations of this method, returning
     // `true` if the first signature is to be considered "smaller" than the
     // second for purposes of recursion analysis. Set to NULL to use
-    // the default recusion relation.
+    // the default recursion relation.
     jl_value_t *recursion_relation;
 
     uint32_t nargs;
@@ -333,11 +358,14 @@ typedef struct _jl_method_t {
     uint32_t nospecialize;  // bit flags: which arguments should not be specialized
     uint32_t nkw;           // # of leading arguments that are actually keyword arguments
                             // of another method.
+    // various boolean properties
     uint8_t isva;
-    uint8_t pure;
     uint8_t is_for_opaque_closure;
+    uint8_t nospecializeinfer;
     // uint8 settings
-    uint8_t constprop;     // 0x00 = use heuristic; 0x01 = aggressive; 0x02 = none
+    uint8_t constprop;      // 0x00 = use heuristic; 0x01 = aggressive; 0x02 = none
+    uint8_t max_varargs;    // 0xFF = use heuristic; otherwise, max # of args to expand
+                            // varargs when specializing.
 
     // Override the conclusions of inter-procedural effect analysis,
     // forcing the conclusion to always true.
@@ -359,17 +387,18 @@ struct _jl_method_instance_t {
         jl_method_t *method; // method this is specialized from
     } def; // pointer back to the context for this code
     jl_value_t *specTypes;  // argument types this was specialized for
-    jl_svec_t *sparam_vals; // static parameter values, indexed by def.method->sparam_syms
-    jl_value_t *uninferred; // cached uncompressed code, for generated functions, top-level thunks, or the interpreter
-    jl_array_t *backedges; // list of method-instances which contain a call into this method-instance
+    jl_svec_t *sparam_vals; // static parameter values, indexed by def.method->sig
+    _Atomic(jl_value_t*) uninferred; // cached uncompressed code, for generated functions, top-level thunks, or the interpreter
+    jl_array_t *backedges; // list of method-instances which call this method-instance; `invoke` records (invokesig, caller) pairs
     jl_array_t *callbacks; // list of callback functions to inform external caches about invalidations
     _Atomic(struct _jl_code_instance_t*) cache;
     uint8_t inInference; // flags to tell if inference is running on this object
-    uint8_t precompiled; // true if this instance was generated by an explicit `precompile(...)` call
+    uint8_t cache_with_orig; // !cache_with_specTypes
+    _Atomic(uint8_t) precompiled; // true if this instance was generated by an explicit `precompile(...)` call
 };
 
 // OpaqueClosure
-typedef struct jl_opaque_closure_t {
+typedef struct _jl_opaque_closure_t {
     JL_DATA_TYPE
     jl_value_t *captures;
     size_t world;
@@ -390,43 +419,40 @@ typedef struct _jl_code_instance_t {
 
     // inference state cache
     jl_value_t *rettype; // return type for fptr
+    jl_value_t *exctype; // thrown type for fptr
     jl_value_t *rettype_const; // inferred constant return value, or null
-    jl_value_t *inferred; // inferred jl_code_info_t, or jl_nothing, or null
+    _Atomic(jl_value_t *) inferred; // inferred jl_code_info_t (may be compressed), or jl_nothing, or null
     //TODO: jl_array_t *edges; // stored information about edges from this object
     //TODO: uint8_t absolute_max; // whether true max world is unknown
 
     // purity results
-#ifdef JL_USE_ANON_UNIONS_FOR_PURITY_FLAGS
-    // see also encode_effects() and decode_effects() in `base/compiler/types.jl`,
-    union {
-        uint32_t ipo_purity_bits;
-        struct {
-            uint8_t ipo_consistent:2;
-            uint8_t ipo_effect_free:2;
-            uint8_t ipo_nothrow:2;
-            uint8_t ipo_terminates:2;
-            uint8_t ipo_nonoverlayed:1;
-        } ipo_purity_flags;
-    };
-    union {
-        uint32_t purity_bits;
-        struct {
-            uint8_t consistent:2;
-            uint8_t effect_free:2;
-            uint8_t nothrow:2;
-            uint8_t terminates:2;
-            uint8_t nonoverlayed:1;
-        } purity_flags;
-    };
-#else
+    // see also encode_effects() and decode_effects() in `base/compiler/effects.jl`,
     uint32_t ipo_purity_bits;
-    uint32_t purity_bits;
-#endif
-    jl_value_t *argescapes; // escape information of call arguments
+    // ipo_purity_flags:
+    //     uint8_t ipo_consistent          : 2;
+    //     uint8_t ipo_effect_free         : 2;
+    //     uint8_t ipo_nothrow             : 2;
+    //     uint8_t ipo_terminates          : 2;
+    //     uint8_t ipo_nonoverlayed        : 1;
+    //     uint8_t ipo_notaskstate         : 2;
+    //     uint8_t ipo_inaccessiblememonly : 2;
+    _Atomic(uint32_t) purity_bits;
+    // purity_flags:
+    //     uint8_t consistent          : 2;
+    //     uint8_t effect_free         : 2;
+    //     uint8_t nothrow             : 2;
+    //     uint8_t terminates          : 2;
+    //     uint8_t nonoverlayed        : 1;
+    //     uint8_t notaskstate         : 2;
+    //     uint8_t inaccessiblememonly : 2;
+    jl_value_t *analysis_results; // Analysis results about this code (IPO-safe)
 
     // compilation state cache
-    uint8_t isspecsig; // if specptr is a specialized function signature for specTypes->rettype
+    _Atomic(uint8_t) specsigflags; // & 0b001 == specptr is a specialized function signature for specTypes->rettype
+                                   // & 0b010 == invokeptr matches specptr
+                                   // & 0b100 == From image
     _Atomic(uint8_t) precompile;  // if set, this will be added to the output system image
+    uint8_t relocatability;  // nonzero if all roots are built into sysimg or tagged by module key
     _Atomic(jl_callptr_t) invoke; // jlcall entry point
     union _jl_generic_specptr_t {
         _Atomic(void*) fptr;
@@ -435,7 +461,6 @@ typedef struct _jl_code_instance_t {
         _Atomic(jl_fptr_sparam_t) fptr3;
         // 4 interpreter
     } specptr; // private data for `jlcall entry point
-    uint8_t relocatability;  // nonzero if all roots are built into sysimg or tagged by module key
 } jl_code_instance_t;
 
 // all values are callable as Functions
@@ -480,6 +505,7 @@ typedef struct {
     uint8_t abstract:1;
     uint8_t mutabl:1;
     uint8_t mayinlinealloc:1;
+    uint8_t _reserved:5;
     uint8_t max_methods; // override for inference's max_methods setting (0 = no additional limit or relaxation)
 } jl_typename_t;
 
@@ -509,12 +535,19 @@ typedef struct {
 } jl_fielddesc32_t;
 
 typedef struct {
+    uint32_t size;
     uint32_t nfields;
     uint32_t npointers; // number of pointers embedded inside
     int32_t first_ptr; // index of the first pointer (or -1)
     uint16_t alignment; // strictest alignment over all fields
-    uint16_t haspadding : 1; // has internal undefined bytes
-    uint16_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32, 3 -> foreign type
+    struct { // combine these fields into a struct so that we can take addressof them
+        uint16_t haspadding : 1; // has internal undefined bytes
+        uint16_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32, 3 -> foreign type
+        // metadata bit only for GenericMemory eltype layout
+        uint16_t arrayelem_isboxed : 1;
+        uint16_t arrayelem_isunion : 1;
+        uint16_t padding : 11;
+    } flags;
     // union {
     //     jl_fielddesc8_t field8[nfields];
     //     jl_fielddesc16_t field16[nfields];
@@ -535,16 +568,19 @@ typedef struct _jl_datatype_t {
     jl_svec_t *types;
     jl_value_t *instance;  // for singletons
     const jl_datatype_layout_t *layout;
-    int32_t size; // TODO: move to _jl_datatype_layout_t
-    // memoized properties
+    // memoized properties (set on construction)
     uint32_t hash;
-    uint8_t hasfreetypevars:1; // majority part of isconcrete computation
-    uint8_t isconcretetype:1; // whether this type can have instances
-    uint8_t isdispatchtuple:1; // aka isleaftupletype
-    uint8_t isbitstype:1; // relevant query for C-api and type-parameters
-    uint8_t zeroinit:1; // if one or more fields requires zero-initialization
-    uint8_t has_concrete_subtype:1; // If clear, no value will have this datatype
-    uint8_t cached_by_hash:1; // stored in hash-based set cache (instead of linear cache)
+    uint16_t hasfreetypevars:1; // majority part of isconcrete computation
+    uint16_t isconcretetype:1; // whether this type can have instances
+    uint16_t isdispatchtuple:1; // aka isleaftupletype
+    uint16_t isbitstype:1; // relevant query for C-api and type-parameters
+    uint16_t zeroinit:1; // if one or more fields requires zero-initialization
+    uint16_t has_concrete_subtype:1; // If clear, no value will have this datatype
+    uint16_t maybe_subtype_of_cache:1; // Computational bit for has_concrete_supertype. See description in jltypes.c.
+    uint16_t isprimitivetype:1; // whether this is declared with 'primitive type' keyword (sized, no fields, and immutable)
+    uint16_t ismutationfree:1; // whether any mutable memory is reachable through this type (in the type or via fields)
+    uint16_t isidentityfree:1; // whether this type or any object reachable through its fields has non-content-based identity
+    uint16_t smalltag:6; // whether this type has a small-tag optimization
 } jl_datatype_t;
 
 typedef struct _jl_vararg_t {
@@ -553,22 +589,24 @@ typedef struct _jl_vararg_t {
     jl_value_t *N;
 } jl_vararg_t;
 
-typedef struct {
+typedef struct _jl_weakref_t {
     JL_DATA_TYPE
     jl_value_t *value;
 } jl_weakref_t;
 
-typedef struct {
-    // not first-class
-    jl_sym_t *name;
+typedef struct _jl_binding_t {
+    JL_DATA_TYPE
     _Atomic(jl_value_t*) value;
-    _Atomic(jl_value_t*) globalref;  // cached GlobalRef for this binding
-    struct _jl_module_t* owner;  // for individual imported bindings -- TODO: make _Atomic
+    jl_globalref_t *globalref;  // cached GlobalRef for this binding
+    _Atomic(struct _jl_binding_t*) owner;  // for individual imported bindings (NULL until 'resolved')
     _Atomic(jl_value_t*) ty;  // binding type
     uint8_t constp:1;
-    uint8_t exportp:1;
+    uint8_t exportp:1; // `public foo` sets `publicp`, `export foo` sets both `publicp` and `exportp`
+    uint8_t publicp:1; // exportp without publicp is not allowed.
     uint8_t imported:1;
+    uint8_t usingfailed:1;
     uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package
+    uint8_t padding:1;
 } jl_binding_t;
 
 typedef struct {
@@ -580,10 +618,11 @@ typedef struct _jl_module_t {
     JL_DATA_TYPE
     jl_sym_t *name;
     struct _jl_module_t *parent;
+    _Atomic(jl_svec_t*) bindings;
+    _Atomic(jl_genericmemory_t*) bindingkeyset; // index lookup by name into bindings
     // hidden fields:
-    htable_t bindings;
     arraylist_t usings;  // modules with all bindings potentially imported
-    uint64_t build_id;
+    jl_uuid_t build_id;
     jl_uuid_t uuid;
     size_t primary_world;
     _Atomic(uint32_t) counter;
@@ -594,8 +633,15 @@ typedef struct _jl_module_t {
     uint8_t istopmod;
     int8_t max_methods;
     jl_mutex_t lock;
+    intptr_t hash;
 } jl_module_t;
 
+typedef struct _jl_globalref_t {
+    jl_module_t *mod;
+    jl_sym_t *name;
+    jl_binding_t *binding;
+} jl_globalref_t;
+
 // one Type-to-Value entry
 typedef struct _jl_typemap_entry_t {
     JL_DATA_TYPE
@@ -624,10 +670,10 @@ typedef struct _jl_typemap_level_t {
     // next split may be on Type{T} as LeafTypes then TypeName's parents up to Any
     // next split may be on LeafType
     // next split may be on TypeName
-    _Atomic(jl_array_t*) arg1; // contains LeafType
-    _Atomic(jl_array_t*) targ; // contains Type{LeafType}
-    _Atomic(jl_array_t*) name1; // contains non-abstract TypeName, for parents up to (excluding) Any
-    _Atomic(jl_array_t*) tname; // contains a dict of Type{TypeName}, for parents up to Any
+    _Atomic(jl_genericmemory_t*) arg1; // contains LeafType (in a map of non-abstract TypeName)
+    _Atomic(jl_genericmemory_t*) targ; // contains Type{LeafType} (in a map of non-abstract TypeName)
+    _Atomic(jl_genericmemory_t*) name1; // a map for a map for TypeName, for parents up to (excluding) Any
+    _Atomic(jl_genericmemory_t*) tname; // a map for Type{TypeName}, for parents up to (including) Any
     // next a linear list of things too complicated at this level for analysis (no more levels)
     _Atomic(jl_typemap_entry_t*) linear;
     // finally, start a new level if the type at offs is Any
@@ -637,14 +683,13 @@ typedef struct _jl_typemap_level_t {
 // contains the TypeMap for one Type
 typedef struct _jl_methtable_t {
     JL_DATA_TYPE
-    jl_sym_t *name; // sometimes a hack used by serialization to handle kwsorter
+    jl_sym_t *name; // sometimes used for debug printing
     _Atomic(jl_typemap_t*) defs;
-    _Atomic(jl_array_t*) leafcache;
+    _Atomic(jl_genericmemory_t*) leafcache;
     _Atomic(jl_typemap_t*) cache;
-    intptr_t max_args;  // max # of non-vararg arguments in a signature
-    jl_value_t *kwsorter;  // keyword argument sorter function
-    jl_module_t *module; // used for incremental serialization to locate original binding
-    jl_array_t *backedges;
+    _Atomic(intptr_t) max_args;  // max # of non-vararg arguments in a signature
+    jl_module_t *module; // sometimes used for debug printing
+    jl_array_t *backedges; // (sig, caller::MethodInstance) pairs
     jl_mutex_t writelock;
     uint8_t offs;  // 0, or 1 to skip splitting typemap on first (function) argument
     uint8_t frozen; // whether this accepts adding new methods
@@ -668,6 +713,69 @@ typedef struct {
 
 // constants and type objects -------------------------------------------------
 
+#define JL_SMALL_TYPEOF(XX) \
+    /* kinds */ \
+    XX(typeofbottom) \
+    XX(datatype) \
+    XX(unionall) \
+    XX(uniontype) \
+    /* type parameter objects */ \
+    XX(vararg) \
+    XX(tvar) \
+    XX(symbol) \
+    XX(module) \
+    /* special GC objects */ \
+    XX(simplevector) \
+    XX(string) \
+    XX(task) \
+    /* bits types with special allocators */ \
+    XX(bool) \
+    XX(char) \
+    /*XX(float16)*/ \
+    /*XX(float32)*/ \
+    /*XX(float64)*/ \
+    XX(int16) \
+    XX(int32) \
+    XX(int64) \
+    XX(int8) \
+    XX(uint16) \
+    XX(uint32) \
+    XX(uint64) \
+    XX(uint8) \
+    /* AST objects */ \
+    /* XX(argument) */ \
+    /* XX(newvarnode) */ \
+    /* XX(slotnumber) */ \
+    /* XX(ssavalue) */ \
+    /* end of JL_SMALL_TYPEOF */
+enum jl_small_typeof_tags {
+    jl_null_tag = 0,
+#define XX(name) jl_##name##_tag,
+    JL_SMALL_TYPEOF(XX)
+#undef XX
+    jl_tags_count,
+    jl_bitstags_first = jl_char_tag, // n.b. bool is not considered a bitstype, since it can be compared by pointer
+    jl_max_tags = 64
+};
+extern JL_DLLIMPORT jl_datatype_t *jl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
+#ifndef JL_LIBRARY_EXPORTS_INTERNAL
+static inline jl_value_t *jl_to_typeof(uintptr_t t)
+{
+    if (t < (jl_max_tags << 4))
+        return (jl_value_t*)jl_small_typeof[t / sizeof(*jl_small_typeof)];
+    return (jl_value_t*)t;
+}
+#else
+extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
+static inline jl_value_t *jl_to_typeof(uintptr_t t)
+{
+    if (t < (jl_max_tags << 4))
+        return (jl_value_t*)ijl_small_typeof[t / sizeof(*ijl_small_typeof)];
+    return (jl_value_t*)t;
+}
+#endif
+
+
 // kinds
 extern JL_DLLIMPORT jl_datatype_t *jl_typeofbottom_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_datatype_type JL_GLOBALLY_ROOTED;
@@ -681,13 +789,12 @@ extern JL_DLLIMPORT jl_datatype_t *jl_typename_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_typename_t *jl_type_typename JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_symbol_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_ssavalue_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_abstractslot_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_slotnumber_type JL_GLOBALLY_ROOTED;
-extern JL_DLLIMPORT jl_datatype_t *jl_typedslot_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_argument_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_const_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_partial_struct_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_partial_opaque_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_interconditional_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_method_match_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_simplevector_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_typename_t *jl_tuple_typename JL_GLOBALLY_ROOTED;
@@ -708,10 +815,17 @@ extern JL_DLLIMPORT jl_datatype_t *jl_code_instance_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_code_info_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_method_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_module_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_addrspace_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_addrspace_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_addrspacecore_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_unionall_t *jl_abstractarray_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_unionall_t *jl_densearray_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_unionall_t *jl_array_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_typename_t *jl_array_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_genericmemory_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_genericmemory_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_genericmemoryref_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_genericmemoryref_typename JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_weakref_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_abstractstring_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_string_type JL_GLOBALLY_ROOTED;
@@ -723,6 +837,7 @@ extern JL_DLLIMPORT jl_datatype_t *jl_typeerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_methoderror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_undefvarerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_atomicerror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_missingcodeerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_lineinfonode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_stackovf_exception JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_memory_exception JL_GLOBALLY_ROOTED;
@@ -730,8 +845,10 @@ extern JL_DLLIMPORT jl_value_t *jl_readonlymemory_exception JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_diverror_exception JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_undefref_exception JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_interrupt_exception JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_precompilable_error JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_boundserror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_an_empty_vec_any JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_an_empty_memory_any JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_an_empty_string JL_GLOBALLY_ROOTED;
 
 extern JL_DLLIMPORT jl_datatype_t *jl_bool_type JL_GLOBALLY_ROOTED;
@@ -747,6 +864,7 @@ extern JL_DLLIMPORT jl_datatype_t *jl_uint64_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_float16_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_float32_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_float64_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_bfloat16_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_floatingpoint_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_number_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_void_type JL_GLOBALLY_ROOTED;  // deprecated
@@ -768,12 +886,22 @@ extern JL_DLLIMPORT jl_value_t *jl_array_uint8_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_array_any_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_array_symbol_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_array_int32_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_array_uint32_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_array_uint64_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memory_uint8_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memory_uint16_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memory_uint32_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memory_uint64_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memory_any_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memoryref_uint8_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memoryref_any_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_expr_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_binding_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_globalref_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_linenumbernode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_gotonode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_gotoifnot_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_enternode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_returnnode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_phinode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_pinode_type JL_GLOBALLY_ROOTED;
@@ -791,6 +919,9 @@ extern JL_DLLIMPORT jl_value_t *jl_emptytuple JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_true JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_false JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_nothing JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_kwcall_func JL_GLOBALLY_ROOTED;
+
+extern JL_DLLIMPORT jl_value_t    *jl_libdl_dlopen_func JL_GLOBALLY_ROOTED;
 
 // gc -------------------------------------------------------------------------
 
@@ -824,6 +955,7 @@ extern void JL_GC_PUSH3(void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH4(void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH5(void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH7(void *, void *, void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
+extern void JL_GC_PUSH8(void *, void *, void *, void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void _JL_GC_PUSHARGS(jl_value_t **, size_t) JL_NOTSAFEPOINT;
 // This is necessary, because otherwise the analyzer considers this undefined
 // behavior and terminates the exploration
@@ -863,6 +995,9 @@ extern void JL_GC_POP() JL_NOTSAFEPOINT;
 #define JL_GC_PUSH7(arg1, arg2, arg3, arg4, arg5, arg6, arg7)                                           \
   void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(7), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7}; \
   jl_pgcstack = (jl_gcframe_t*)__gc_stkf;
+#define JL_GC_PUSH8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8)                                     \
+  void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(8), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8}; \
+  jl_pgcstack = (jl_gcframe_t*)__gc_stkf;
 
 
 #define JL_GC_PUSHARGS(rts_var,n)                                                                       \
@@ -889,6 +1024,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t);
 
 JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_finalize(jl_value_t *o);
 JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value);
 JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void);
@@ -899,12 +1035,15 @@ JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz);
 JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, struct _jl_task_t *owner) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz);
 JL_DLLEXPORT void jl_gc_use(jl_value_t *a);
+// Set GC memory trigger in bytes for greedy memory collecting
+JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem);
+JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void);
 
 JL_DLLEXPORT void jl_clear_malloc_data(void);
 
 // GC write barriers
 JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *root) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const jl_value_t *stored) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const void *stored, jl_datatype_t *dt) JL_NOTSAFEPOINT;
 
 STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
 {
@@ -924,6 +1063,7 @@ STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_
 
 STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
 {
+    // 3 == GC_OLD_MARKED
     // ptr is an immutable object
     if (__likely(jl_astaggedvalue(parent)->bits.gc != 3))
         return; // parent is young or in remset
@@ -932,13 +1072,20 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_
     jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
     const jl_datatype_layout_t *ly = dt->layout;
     if (ly->npointers)
-        jl_gc_queue_multiroot((jl_value_t*)parent, ptr);
+        jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt);
 }
 
 JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz);
 JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
                                          int isaligned, jl_value_t *owner);
 JL_DLLEXPORT void jl_gc_safepoint(void);
+JL_DLLEXPORT int jl_safepoint_suspend_thread(int tid, int waitstate);
+JL_DLLEXPORT int jl_safepoint_resume_thread(int tid) JL_NOTSAFEPOINT;
+
+void *mtarraylist_get(small_arraylist_t *_a, size_t idx) JL_NOTSAFEPOINT;
+size_t mtarraylist_length(small_arraylist_t *_a) JL_NOTSAFEPOINT;
+void mtarraylist_add(small_arraylist_t *_a, void *elt, size_t idx) JL_NOTSAFEPOINT;
+void mtarraylist_push(small_arraylist_t *_a, void *elt) JL_NOTSAFEPOINT;
 
 // object accessors -----------------------------------------------------------
 
@@ -954,7 +1101,7 @@ STATIC_INLINE jl_value_t *jl_svecset(
 #else
 STATIC_INLINE jl_value_t *jl_svecref(void *t JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
 {
-    assert(jl_typeis(t,jl_simplevector_type));
+    assert(jl_typetagis(t,jl_simplevector_tag << 4));
     assert(i < jl_svec_len(t));
     // while svec is supposedly immutable, in practice we sometimes publish it first
     // and set the values lazily
@@ -964,26 +1111,112 @@ STATIC_INLINE jl_value_t *jl_svecset(
     void *t JL_ROOTING_ARGUMENT JL_PROPAGATES_ROOT,
     size_t i, void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT
 {
-    assert(jl_typeis(t,jl_simplevector_type));
+    assert(jl_typetagis(t,jl_simplevector_tag << 4));
     assert(i < jl_svec_len(t));
-    // TODO: while svec is supposedly immutable, in practice we sometimes publish it first
-    // and set the values lazily. Those users should be using jl_atomic_store_release here.
-    jl_svec_data(t)[i] = (jl_value_t*)x;
+    // while svec is supposedly immutable, in practice we sometimes publish it
+    // first and set the values lazily. Those users occasionally might need to
+    // instead use jl_atomic_store_release here.
+    jl_atomic_store_relaxed((_Atomic(jl_value_t*)*)jl_svec_data(t) + i, (jl_value_t*)x);
     jl_gc_wb(t, x);
     return (jl_value_t*)x;
 }
 #endif
 
-#define jl_array_len(a)   (((jl_array_t*)(a))->length)
-#define jl_array_data(a)  ((void*)((jl_array_t*)(a))->data)
-#define jl_array_dim(a,i) ((&((jl_array_t*)(a))->nrows)[i])
-#define jl_array_dim0(a)  (((jl_array_t*)(a))->nrows)
-#define jl_array_nrows(a) (((jl_array_t*)(a))->nrows)
-#define jl_array_ndims(a) ((int32_t)(((jl_array_t*)a)->flags.ndims))
-#define jl_array_data_owner_offset(ndims) (offsetof(jl_array_t,ncols) + sizeof(size_t)*(1+jl_array_ndimwords(ndims))) // in bytes
-#define jl_array_data_owner(a) (*((jl_value_t**)((char*)a + jl_array_data_owner_offset(jl_array_ndims(a)))))
+#define jl_genericmemory_data_owner_field(a) (*(jl_value_t**)((jl_genericmemory_t*)(a) + 1))
 
-JL_DLLEXPORT char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT;
+#define jl_nparams(t)  jl_svec_len(((jl_datatype_t*)(t))->parameters)
+#define jl_tparam0(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 0)
+#define jl_tparam1(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 1)
+#define jl_tparam2(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 2)
+#define jl_tparam(t,i) jl_svecref(((jl_datatype_t*)(t))->parameters, i)
+#define jl_array_data(a,t) ((t*)((jl_array_t*)(a))->ref.ptr_or_offset)
+#define jl_array_data_(a) ((void*)((jl_array_t*)(a))->ref.ptr_or_offset)
+#define jl_array_dim(a,i) (((jl_array_t*)(a))->dimsize[i])
+#define jl_array_dim0(a)  (((jl_array_t*)(a))->dimsize[0])
+#define jl_array_nrows(a) (((jl_array_t*)(a))->dimsize[0])
+#define jl_array_ndims(a) (*(size_t*)jl_tparam1(jl_typetagof(a)))
+#define jl_array_maxsize(a) (((jl_array_t*)(a))->ref.mem->length)
+#define jl_array_len(a)   (jl_array_ndims(a) == 1 ? jl_array_nrows(a) : jl_array_maxsize(a))
+
+/*
+  how - allocation style
+  0 = data is inlined
+  1 = owns the gc-managed data, exclusively
+  2 = malloc-allocated pointer (may or may not own it)
+  3 = has a pointer to the object that owns the data pointer
+*/
+STATIC_INLINE int jl_genericmemory_how(jl_genericmemory_t *m) JL_NOTSAFEPOINT
+{
+    if (m->ptr == (void*)((char*)m + 16)) // JL_SMALL_BYTE_ALIGNMENT (from julia_internal.h)
+        return 0;
+    jl_value_t *owner = jl_genericmemory_data_owner_field(m);
+    if (owner == (jl_value_t*)m)
+        return 1;
+    if (owner == NULL)
+        return 2;
+    return 3;
+}
+
+STATIC_INLINE jl_value_t *jl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    if (jl_genericmemory_how(m) == 3)
+        return jl_genericmemory_data_owner_field(m);
+    return (jl_value_t*)m;
+}
+
+JL_DLLEXPORT char *jl_genericmemory_typetagdata(jl_genericmemory_t *m) JL_NOTSAFEPOINT;
+
+#ifdef __clang_gcanalyzer__
+jl_value_t **jl_genericmemory_ptr_data(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+STATIC_INLINE jl_value_t *jl_genericmemory_ptr_ref(void *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
+STATIC_INLINE jl_value_t *jl_genericmemory_ptr_set(
+    void *m JL_ROOTING_ARGUMENT, size_t i,
+    void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT;
+#else
+#define jl_genericmemory_ptr_data(a)  ((jl_value_t**)((jl_genericmemory_t*)(a))->ptr)
+STATIC_INLINE jl_value_t *jl_genericmemory_ptr_ref(void *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;
+    assert(((jl_datatype_t*)jl_typetagof(m_))->layout->flags.arrayelem_isboxed);
+    assert(i < m_->length);
+    return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)(m_->ptr)) + i);
+}
+STATIC_INLINE jl_value_t *jl_genericmemory_ptr_set(
+    void *m JL_ROOTING_ARGUMENT, size_t i,
+    void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;
+    assert(((jl_datatype_t*)jl_typetagof(m_))->layout->flags.arrayelem_isboxed);
+    assert(i < m_->length);
+    jl_atomic_store_release(((_Atomic(jl_value_t*)*)(m_->ptr)) + i, (jl_value_t*)x);
+    if (x) {
+        if (jl_genericmemory_how(m_) == 3)
+            m = (void*)jl_genericmemory_data_owner_field(m_);
+        jl_gc_wb(m, x);
+    }
+    return (jl_value_t*)x;
+}
+#endif
+
+STATIC_INLINE uint8_t jl_memory_uint8_ref(void *m, size_t i) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;
+    assert(jl_typetagis(m_, jl_memory_uint8_type));
+    assert(i < m_->length);
+    return ((uint8_t*)m_->ptr)[i];
+}
+STATIC_INLINE void jl_memory_uint8_set(void *m, size_t i, uint8_t x) JL_NOTSAFEPOINT
+{
+    jl_genericmemory_t *m_ = (jl_genericmemory_t*)m;
+    assert(jl_typetagis(m_, jl_memory_uint8_type));
+    assert(i < m_->length);
+    ((uint8_t*)m_->ptr)[i] = x;
+}
+
+STATIC_INLINE jl_value_t *jl_array_owner(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    return jl_genericmemory_owner(a->ref.mem);
+}
 
 #ifdef __clang_gcanalyzer__
 jl_value_t **jl_array_ptr_data(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
@@ -992,25 +1225,22 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set(
     void *a JL_ROOTING_ARGUMENT, size_t i,
     void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT;
 #else
-#define jl_array_ptr_data(a)  ((jl_value_t**)((jl_array_t*)(a))->data)
+#define jl_array_ptr_data(a) (jl_array_data(a, jl_value_t*))
 STATIC_INLINE jl_value_t *jl_array_ptr_ref(void *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
 {
-    assert(((jl_array_t*)a)->flags.ptrarray);
+    assert(((jl_datatype_t*)jl_typetagof(((jl_array_t*)a)->ref.mem))->layout->flags.arrayelem_isboxed);
     assert(i < jl_array_len(a));
-    return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)(jl_array_data(a))) + i);
+    return jl_atomic_load_relaxed(jl_array_data(a, _Atomic(jl_value_t*)) + i);
 }
 STATIC_INLINE jl_value_t *jl_array_ptr_set(
     void *a JL_ROOTING_ARGUMENT, size_t i,
     void *x JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT
 {
-    assert(((jl_array_t*)a)->flags.ptrarray);
+    assert(((jl_datatype_t*)jl_typetagof(((jl_array_t*)a)->ref.mem))->layout->flags.arrayelem_isboxed);
     assert(i < jl_array_len(a));
-    jl_atomic_store_release(((_Atomic(jl_value_t*)*)(jl_array_data(a))) + i, (jl_value_t*)x);
+    jl_atomic_store_release(jl_array_data(a, _Atomic(jl_value_t*)) + i, (jl_value_t*)x);
     if (x) {
-        if (((jl_array_t*)a)->flags.how == 3) {
-            a = jl_array_data_owner(a);
-        }
-        jl_gc_wb(a, x);
+        jl_gc_wb(jl_array_owner((jl_array_t*)a), x);
     }
     return (jl_value_t*)x;
 }
@@ -1018,20 +1248,26 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set(
 
 STATIC_INLINE uint8_t jl_array_uint8_ref(void *a, size_t i) JL_NOTSAFEPOINT
 {
+    assert(jl_typetagis(a, jl_array_uint8_type));
     assert(i < jl_array_len(a));
-    assert(jl_typeis(a, jl_array_uint8_type));
-    return ((uint8_t*)(jl_array_data(a)))[i];
+    return jl_array_data(a, uint8_t)[i];
 }
 STATIC_INLINE void jl_array_uint8_set(void *a, size_t i, uint8_t x) JL_NOTSAFEPOINT
 {
+    assert(jl_typetagis(a, jl_array_uint8_type));
     assert(i < jl_array_len(a));
-    assert(jl_typeis(a, jl_array_uint8_type));
-    ((uint8_t*)(jl_array_data(a)))[i] = x;
+    jl_array_data(a, uint8_t)[i] = x;
+}
+STATIC_INLINE void jl_array_uint32_set(void *a, size_t i, uint32_t x) JL_NOTSAFEPOINT
+{
+    assert(i < jl_array_len(a));
+    assert(jl_typetagis(a, jl_array_uint32_type) || jl_typetagis(a, jl_array_int32_type));
+    jl_array_data(a, uint32_t)[i] = x;
 }
 
 #define jl_exprarg(e,n) jl_array_ptr_ref(((jl_expr_t*)(e))->args, n)
 #define jl_exprargset(e, n, v) jl_array_ptr_set(((jl_expr_t*)(e))->args, n, v)
-#define jl_expr_nargs(e) jl_array_len(((jl_expr_t*)(e))->args)
+#define jl_expr_nargs(e) jl_array_nrows(((jl_expr_t*)(e))->args)
 
 #define jl_fieldref(s,i) jl_get_nth_field(((jl_value_t*)(s)),i)
 #define jl_fieldref_noalloc(s,i) jl_get_nth_field_noalloc(((jl_value_t*)(s)),i)
@@ -1045,28 +1281,26 @@ STATIC_INLINE void jl_array_uint8_set(void *a, size_t i, uint8_t x) JL_NOTSAFEPO
 #define jl_gotonode_label(x) (((intptr_t*)(x))[0])
 #define jl_gotoifnot_cond(x) (((jl_value_t**)(x))[0])
 #define jl_gotoifnot_label(x) (((intptr_t*)(x))[1])
+#define jl_enternode_catch_dest(x) (((intptr_t*)(x))[0])
+#define jl_enternode_scope(x) (((jl_value_t**)(x))[1])
 #define jl_globalref_mod(s) (*(jl_module_t**)(s))
 #define jl_globalref_name(s) (((jl_sym_t**)(s))[1])
 #define jl_quotenode_value(x) (((jl_value_t**)x)[0])
 #define jl_returnnode_value(x) (((jl_value_t**)x)[0])
 
-#define jl_nparams(t)  jl_svec_len(((jl_datatype_t*)(t))->parameters)
-#define jl_tparam0(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 0)
-#define jl_tparam1(t)  jl_svecref(((jl_datatype_t*)(t))->parameters, 1)
-#define jl_tparam(t,i) jl_svecref(((jl_datatype_t*)(t))->parameters, i)
-
 // get a pointer to the data in a datatype
 #define jl_data_ptr(v)  ((jl_value_t**)v)
 
 #define jl_string_data(s) ((char*)s + sizeof(void*))
 #define jl_string_len(s)  (*(size_t*)s)
 
-#define jl_gf_mtable(f) (((jl_datatype_t*)jl_typeof(f))->name->mt)
+#define jl_gf_ft_mtable(ft) (((jl_datatype_t*)ft)->name->mt)
+#define jl_gf_mtable(f) (jl_gf_ft_mtable(jl_typeof(f)))
 #define jl_gf_name(f)   (jl_gf_mtable(f)->name)
 
 // struct type info
-JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack);
-#define jl_get_fieldtypes(st) ((st)->types ? (st)->types : jl_compute_fieldtypes((st), NULL))
+JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack, int cacheable);
+#define jl_get_fieldtypes(st) ((st)->types ? (st)->types : jl_compute_fieldtypes((st), NULL, 0))
 STATIC_INLINE jl_svec_t *jl_field_names(jl_datatype_t *st) JL_NOTSAFEPOINT
 {
     return st->name->names;
@@ -1081,10 +1315,24 @@ STATIC_INLINE jl_value_t *jl_field_type_concrete(jl_datatype_t *st JL_PROPAGATES
     return jl_svecref(st->types, i);
 }
 
-#define jl_datatype_size(t)    (((jl_datatype_t*)t)->size)
-#define jl_datatype_align(t)   (((jl_datatype_t*)t)->layout->alignment)
-#define jl_datatype_nbits(t)   ((((jl_datatype_t*)t)->size)*8)
-#define jl_datatype_nfields(t) (((jl_datatype_t*)(t))->layout->nfields)
+STATIC_INLINE int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEPOINT
+{
+    return l->nfields == 0 && l->npointers > 0;
+}
+
+JL_DLLEXPORT jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+
+#define jl_inlinedatatype_layout(t) (((jl_datatype_t*)t)->layout)
+STATIC_INLINE const jl_datatype_layout_t *jl_datatype_layout(jl_datatype_t *t) JL_NOTSAFEPOINT
+{
+    if (jl_is_layout_opaque(t->layout)) // e.g. GenericMemory
+        t = (jl_datatype_t*)jl_unwrap_unionall(t->name->wrapper);
+    return t->layout;
+}
+#define jl_datatype_size(t)    (jl_datatype_layout((jl_datatype_t*)(t))->size)
+#define jl_datatype_align(t)   (jl_datatype_layout((jl_datatype_t*)(t))->alignment)
+#define jl_datatype_nbits(t)   ((jl_datatype_layout((jl_datatype_t*)(t))->size)*8)
+#define jl_datatype_nfields(t) (jl_datatype_layout((jl_datatype_t*)(t))->nfields)
 
 JL_DLLEXPORT void *jl_symbol_name(jl_sym_t *s);
 // inline version with strong type check to detect typos in a `->name` chain
@@ -1112,23 +1360,23 @@ static inline uint32_t jl_fielddesc_size(int8_t fielddesc_type) JL_NOTSAFEPOINT
 #define jl_dt_layout_fields(d) ((const char*)(d) + sizeof(jl_datatype_layout_t))
 static inline const char *jl_dt_layout_ptrs(const jl_datatype_layout_t *l) JL_NOTSAFEPOINT
 {
-    return jl_dt_layout_fields(l) + jl_fielddesc_size(l->fielddesc_type) * l->nfields;
+    return jl_dt_layout_fields(l) + jl_fielddesc_size(l->flags.fielddesc_type) * l->nfields;
 }
 
 #define DEFINE_FIELD_ACCESSORS(f)                                             \
     static inline uint32_t jl_field_##f(jl_datatype_t *st,                    \
                                         int i) JL_NOTSAFEPOINT                \
     {                                                                         \
-        const jl_datatype_layout_t *ly = st->layout;                          \
+        const jl_datatype_layout_t *ly = jl_datatype_layout(st);              \
         assert(i >= 0 && (size_t)i < ly->nfields);                            \
-        if (ly->fielddesc_type == 0) {                                        \
+        if (ly->flags.fielddesc_type == 0) {                                  \
             return ((const jl_fielddesc8_t*)jl_dt_layout_fields(ly))[i].f;    \
         }                                                                     \
-        else if (ly->fielddesc_type == 1) {                                   \
+        else if (ly->flags.fielddesc_type == 1) {                             \
             return ((const jl_fielddesc16_t*)jl_dt_layout_fields(ly))[i].f;   \
         }                                                                     \
         else {                                                                \
-            assert(ly->fielddesc_type == 2);                                  \
+            assert(ly->flags.fielddesc_type == 2);                            \
             return ((const jl_fielddesc32_t*)jl_dt_layout_fields(ly))[i].f;   \
         }                                                                     \
     }                                                                         \
@@ -1139,24 +1387,24 @@ DEFINE_FIELD_ACCESSORS(size)
 
 static inline int jl_field_isptr(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
 {
-    const jl_datatype_layout_t *ly = st->layout;
+    const jl_datatype_layout_t *ly = jl_datatype_layout(st);
     assert(i >= 0 && (size_t)i < ly->nfields);
-    return ((const jl_fielddesc8_t*)(jl_dt_layout_fields(ly) + jl_fielddesc_size(ly->fielddesc_type) * i))->isptr;
+    return ((const jl_fielddesc8_t*)(jl_dt_layout_fields(ly) + jl_fielddesc_size(ly->flags.fielddesc_type) * i))->isptr;
 }
 
 static inline uint32_t jl_ptr_offset(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
 {
-    const jl_datatype_layout_t *ly = st->layout;
+    const jl_datatype_layout_t *ly = st->layout; // NOT jl_datatype_layout(st)
     assert(i >= 0 && (size_t)i < ly->npointers);
     const void *ptrs = jl_dt_layout_ptrs(ly);
-    if (ly->fielddesc_type == 0) {
+    if (ly->flags.fielddesc_type == 0) {
         return ((const uint8_t*)ptrs)[i];
     }
-    else if (ly->fielddesc_type == 1) {
+    else if (ly->flags.fielddesc_type == 1) {
         return ((const uint16_t*)ptrs)[i];
     }
     else {
-        assert(ly->fielddesc_type == 2);
+        assert(ly->flags.fielddesc_type == 2);
         return ((const uint32_t*)ptrs)[i];
     }
 }
@@ -1185,65 +1433,64 @@ static inline int jl_field_isconst(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
 }
 
 
-static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEPOINT
-{
-    return l->nfields == 0 && l->npointers > 0;
-}
-
 // basic predicates -----------------------------------------------------------
 #define jl_is_nothing(v)     (((jl_value_t*)(v)) == ((jl_value_t*)jl_nothing))
 #define jl_is_tuple(v)       (((jl_datatype_t*)jl_typeof(v))->name == jl_tuple_typename)
 #define jl_is_namedtuple(v)  (((jl_datatype_t*)jl_typeof(v))->name == jl_namedtuple_typename)
-#define jl_is_svec(v)        jl_typeis(v,jl_simplevector_type)
+#define jl_is_svec(v)        jl_typetagis(v,jl_simplevector_tag<<4)
 #define jl_is_simplevector(v) jl_is_svec(v)
-#define jl_is_datatype(v)    jl_typeis(v,jl_datatype_type)
+#define jl_is_datatype(v)    jl_typetagis(v,jl_datatype_tag<<4)
 #define jl_is_mutable(t)     (((jl_datatype_t*)t)->name->mutabl)
 #define jl_is_mutable_datatype(t) (jl_is_datatype(t) && (((jl_datatype_t*)t)->name->mutabl))
 #define jl_is_immutable(t)   (!((jl_datatype_t*)t)->name->mutabl)
 #define jl_is_immutable_datatype(t) (jl_is_datatype(t) && (!((jl_datatype_t*)t)->name->mutabl))
-#define jl_is_uniontype(v)   jl_typeis(v,jl_uniontype_type)
-#define jl_is_typevar(v)     jl_typeis(v,jl_tvar_type)
-#define jl_is_unionall(v)    jl_typeis(v,jl_unionall_type)
-#define jl_is_typename(v)    jl_typeis(v,jl_typename_type)
-#define jl_is_int8(v)        jl_typeis(v,jl_int8_type)
-#define jl_is_int16(v)       jl_typeis(v,jl_int16_type)
-#define jl_is_int32(v)       jl_typeis(v,jl_int32_type)
-#define jl_is_int64(v)       jl_typeis(v,jl_int64_type)
-#define jl_is_uint8(v)       jl_typeis(v,jl_uint8_type)
-#define jl_is_uint16(v)      jl_typeis(v,jl_uint16_type)
-#define jl_is_uint32(v)      jl_typeis(v,jl_uint32_type)
-#define jl_is_uint64(v)      jl_typeis(v,jl_uint64_type)
-#define jl_is_bool(v)        jl_typeis(v,jl_bool_type)
-#define jl_is_symbol(v)      jl_typeis(v,jl_symbol_type)
-#define jl_is_ssavalue(v)    jl_typeis(v,jl_ssavalue_type)
-#define jl_is_slot(v)        (jl_typeis(v,jl_slotnumber_type) || jl_typeis(v,jl_typedslot_type))
-#define jl_is_expr(v)        jl_typeis(v,jl_expr_type)
-#define jl_is_globalref(v)   jl_typeis(v,jl_globalref_type)
-#define jl_is_gotonode(v)    jl_typeis(v,jl_gotonode_type)
-#define jl_is_gotoifnot(v)   jl_typeis(v,jl_gotoifnot_type)
-#define jl_is_returnnode(v)  jl_typeis(v,jl_returnnode_type)
-#define jl_is_argument(v)    jl_typeis(v,jl_argument_type)
-#define jl_is_pinode(v)      jl_typeis(v,jl_pinode_type)
-#define jl_is_phinode(v)     jl_typeis(v,jl_phinode_type)
-#define jl_is_phicnode(v)    jl_typeis(v,jl_phicnode_type)
-#define jl_is_upsilonnode(v) jl_typeis(v,jl_upsilonnode_type)
-#define jl_is_quotenode(v)   jl_typeis(v,jl_quotenode_type)
-#define jl_is_newvarnode(v)  jl_typeis(v,jl_newvarnode_type)
-#define jl_is_linenode(v)    jl_typeis(v,jl_linenumbernode_type)
-#define jl_is_method_instance(v) jl_typeis(v,jl_method_instance_type)
-#define jl_is_code_instance(v) jl_typeis(v,jl_code_instance_type)
-#define jl_is_code_info(v)   jl_typeis(v,jl_code_info_type)
-#define jl_is_method(v)      jl_typeis(v,jl_method_type)
-#define jl_is_module(v)      jl_typeis(v,jl_module_type)
-#define jl_is_mtable(v)      jl_typeis(v,jl_methtable_type)
-#define jl_is_task(v)        jl_typeis(v,jl_task_type)
-#define jl_is_string(v)      jl_typeis(v,jl_string_type)
+#define jl_is_uniontype(v)   jl_typetagis(v,jl_uniontype_tag<<4)
+#define jl_is_typevar(v)     jl_typetagis(v,jl_tvar_tag<<4)
+#define jl_is_unionall(v)    jl_typetagis(v,jl_unionall_tag<<4)
+#define jl_is_vararg(v)      jl_typetagis(v,jl_vararg_tag<<4)
+#define jl_is_typename(v)    jl_typetagis(v,jl_typename_type)
+#define jl_is_int8(v)        jl_typetagis(v,jl_int8_tag<<4)
+#define jl_is_int16(v)       jl_typetagis(v,jl_int16_tag<<4)
+#define jl_is_int32(v)       jl_typetagis(v,jl_int32_tag<<4)
+#define jl_is_int64(v)       jl_typetagis(v,jl_int64_tag<<4)
+#define jl_is_uint8(v)       jl_typetagis(v,jl_uint8_tag<<4)
+#define jl_is_uint16(v)      jl_typetagis(v,jl_uint16_tag<<4)
+#define jl_is_uint32(v)      jl_typetagis(v,jl_uint32_tag<<4)
+#define jl_is_uint64(v)      jl_typetagis(v,jl_uint64_tag<<4)
+#define jl_is_bool(v)        jl_typetagis(v,jl_bool_tag<<4)
+#define jl_is_symbol(v)      jl_typetagis(v,jl_symbol_tag<<4)
+#define jl_is_ssavalue(v)    jl_typetagis(v,jl_ssavalue_type)
+#define jl_is_slotnumber(v)  jl_typetagis(v,jl_slotnumber_type)
+#define jl_is_expr(v)        jl_typetagis(v,jl_expr_type)
+#define jl_is_binding(v)     jl_typetagis(v,jl_binding_type)
+#define jl_is_globalref(v)   jl_typetagis(v,jl_globalref_type)
+#define jl_is_gotonode(v)    jl_typetagis(v,jl_gotonode_type)
+#define jl_is_gotoifnot(v)   jl_typetagis(v,jl_gotoifnot_type)
+#define jl_is_returnnode(v)  jl_typetagis(v,jl_returnnode_type)
+#define jl_is_enternode(v)   jl_typetagis(v,jl_enternode_type)
+#define jl_is_argument(v)    jl_typetagis(v,jl_argument_type)
+#define jl_is_pinode(v)      jl_typetagis(v,jl_pinode_type)
+#define jl_is_phinode(v)     jl_typetagis(v,jl_phinode_type)
+#define jl_is_phicnode(v)    jl_typetagis(v,jl_phicnode_type)
+#define jl_is_upsilonnode(v) jl_typetagis(v,jl_upsilonnode_type)
+#define jl_is_quotenode(v)   jl_typetagis(v,jl_quotenode_type)
+#define jl_is_newvarnode(v)  jl_typetagis(v,jl_newvarnode_type)
+#define jl_is_linenode(v)    jl_typetagis(v,jl_linenumbernode_type)
+#define jl_is_method_instance(v) jl_typetagis(v,jl_method_instance_type)
+#define jl_is_code_instance(v) jl_typetagis(v,jl_code_instance_type)
+#define jl_is_code_info(v)   jl_typetagis(v,jl_code_info_type)
+#define jl_is_method(v)      jl_typetagis(v,jl_method_type)
+#define jl_is_module(v)      jl_typetagis(v,jl_module_tag<<4)
+#define jl_is_mtable(v)      jl_typetagis(v,jl_methtable_type)
+#define jl_is_task(v)        jl_typetagis(v,jl_task_tag<<4)
+#define jl_is_string(v)      jl_typetagis(v,jl_string_tag<<4)
 #define jl_is_cpointer(v)    jl_is_cpointer_type(jl_typeof(v))
 #define jl_is_pointer(v)     jl_is_cpointer_type(jl_typeof(v))
-#define jl_is_uint8pointer(v)jl_typeis(v,jl_uint8pointer_type)
+#define jl_is_uint8pointer(v)jl_typetagis(v,jl_uint8pointer_type)
 #define jl_is_llvmpointer(v) (((jl_datatype_t*)jl_typeof(v))->name == jl_llvmpointer_typename)
-#define jl_is_intrinsic(v)   jl_typeis(v,jl_intrinsic_type)
-#define jl_array_isbitsunion(a) (!(((jl_array_t*)(a))->flags.ptrarray) && jl_is_uniontype(jl_tparam0(jl_typeof(a))))
+#define jl_is_intrinsic(v)   jl_typetagis(v,jl_intrinsic_type)
+#define jl_is_addrspacecore(v) jl_typetagis(v,jl_addrspacecore_type)
+#define jl_genericmemory_isbitsunion(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_isunion)
 
 JL_DLLEXPORT int jl_subtype(jl_value_t *a, jl_value_t *b);
 
@@ -1253,45 +1500,49 @@ STATIC_INLINE int jl_is_kind(jl_value_t *v) JL_NOTSAFEPOINT
             v==(jl_value_t*)jl_unionall_type || v==(jl_value_t*)jl_typeofbottom_type);
 }
 
+STATIC_INLINE int jl_is_kindtag(uintptr_t t) JL_NOTSAFEPOINT
+{
+    t >>= 4;
+    return (t==(uintptr_t)jl_uniontype_tag || t==(uintptr_t)jl_datatype_tag ||
+            t==(uintptr_t)jl_unionall_tag || t==(uintptr_t)jl_typeofbottom_tag);
+}
+
 STATIC_INLINE int jl_is_type(jl_value_t *v) JL_NOTSAFEPOINT
 {
-    return jl_is_kind(jl_typeof(v));
+    return jl_is_kindtag(jl_typetagof(v));
 }
 
 STATIC_INLINE int jl_is_primitivetype(void *v) JL_NOTSAFEPOINT
 {
-    return (jl_is_datatype(v) && jl_is_immutable(v) &&
-            ((jl_datatype_t*)(v))->layout &&
-            jl_datatype_nfields(v) == 0 &&
-            jl_datatype_size(v) > 0);
+    return (jl_is_datatype(v) && ((jl_datatype_t*)(v))->isprimitivetype);
 }
 
 STATIC_INLINE int jl_is_structtype(void *v) JL_NOTSAFEPOINT
 {
     return (jl_is_datatype(v) &&
             !((jl_datatype_t*)(v))->name->abstract &&
-            !jl_is_primitivetype(v));
+            !((jl_datatype_t*)(v))->isprimitivetype);
 }
 
-STATIC_INLINE int jl_isbits(void *t) JL_NOTSAFEPOINT // corresponding to isbits() in julia
+STATIC_INLINE int jl_isbits(void *t) JL_NOTSAFEPOINT // corresponding to isbitstype() in julia
 {
-    return (jl_is_datatype(t) && ((jl_datatype_t*)t)->isbitstype);
+    return jl_is_datatype(t) && ((jl_datatype_t*)t)->isbitstype;
 }
 
 STATIC_INLINE int jl_is_datatype_singleton(jl_datatype_t *d) JL_NOTSAFEPOINT
 {
-    return (d->instance != NULL);
+    return d->instance != NULL && d->layout->size == 0 && d->layout->npointers == 0;
 }
 
 STATIC_INLINE int jl_is_abstracttype(void *v) JL_NOTSAFEPOINT
 {
-    return (jl_is_datatype(v) && ((jl_datatype_t*)(v))->name->abstract);
+    return jl_is_datatype(v) && ((jl_datatype_t*)(v))->name->abstract;
 }
 
 STATIC_INLINE int jl_is_array_type(void *t) JL_NOTSAFEPOINT
 {
-    return (jl_is_datatype(t) &&
-            ((jl_datatype_t*)(t))->name == jl_array_typename);
+    return jl_is_datatype(t) &&
+           ((jl_datatype_t*)(t))->name == jl_array_typename;
 }
 
 STATIC_INLINE int jl_is_array(void *v) JL_NOTSAFEPOINT
@@ -1300,6 +1551,42 @@ STATIC_INLINE int jl_is_array(void *v) JL_NOTSAFEPOINT
     return jl_is_array_type(t);
 }
 
+STATIC_INLINE int jl_is_genericmemory_type(void *t) JL_NOTSAFEPOINT
+{
+    return (jl_is_datatype(t) &&
+            ((jl_datatype_t*)(t))->name == jl_genericmemory_typename);
+}
+
+STATIC_INLINE int jl_is_genericmemory(void *v) JL_NOTSAFEPOINT
+{
+    jl_value_t *t = jl_typeof(v);
+    return jl_is_genericmemory_type(t);
+}
+
+STATIC_INLINE int jl_is_genericmemoryref_type(void *t) JL_NOTSAFEPOINT
+{
+    return (jl_is_datatype(t) &&
+            ((jl_datatype_t*)(t))->name == jl_genericmemoryref_typename);
+}
+
+STATIC_INLINE int jl_is_genericmemoryref(void *v) JL_NOTSAFEPOINT
+{
+    jl_value_t *t = jl_typeof(v);
+    return jl_is_genericmemoryref_type(t);
+}
+
+STATIC_INLINE int jl_is_addrspace_type(void *t) JL_NOTSAFEPOINT
+{
+    return (jl_is_datatype(t) &&
+            ((jl_datatype_t*)(t))->name == jl_addrspace_typename);
+}
+
+STATIC_INLINE int jl_is_addrspace(void *v) JL_NOTSAFEPOINT
+{
+    jl_value_t *t = jl_typeof(v);
+    return jl_is_addrspace_type(t);
+}
+
 
 STATIC_INLINE int jl_is_opaque_closure_type(void *t) JL_NOTSAFEPOINT
 {
@@ -1355,48 +1642,46 @@ STATIC_INLINE int jl_is_type_type(jl_value_t *v) JL_NOTSAFEPOINT
             ((jl_datatype_t*)(v))->name == ((jl_datatype_t*)jl_type_type->body)->name);
 }
 
-STATIC_INLINE int jl_is_array_zeroinit(jl_array_t *a) JL_NOTSAFEPOINT
+STATIC_INLINE int jl_is_genericmemory_zeroinit(jl_genericmemory_t *m) JL_NOTSAFEPOINT
 {
-    if (a->flags.ptrarray || a->flags.hasptr)
-        return 1;
-    jl_value_t *elty = jl_tparam0(jl_typeof(a));
-    return jl_is_datatype(elty) && ((jl_datatype_t*)elty)->zeroinit;
+    return ((jl_datatype_t*)jl_typeof(m))->zeroinit;
 }
 
 // object identity
 JL_DLLEXPORT int jl_egal(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_egal__special(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_egal__bitstag(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT;
 JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uintptr_t jl_type_hash(jl_value_t *v) JL_NOTSAFEPOINT;
 
-STATIC_INLINE int jl_egal__unboxed_(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
+STATIC_INLINE int jl_egal__unboxed_(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, uintptr_t dtag) JL_NOTSAFEPOINT
 {
-    if (dt->name->mutabl) {
-        if (dt == jl_simplevector_type || dt == jl_string_type || dt == jl_datatype_type)
-            return jl_egal__special(a, b, dt);
-        return 0;
+    if (dtag < jl_max_tags << 4) {
+        if (dtag == jl_symbol_tag << 4 || dtag == jl_bool_tag << 4)
+            return 0;
     }
-    return jl_egal__bits(a, b, dt);
+    else if (((jl_datatype_t*)dtag)->name->mutabl)
+        return 0;
+    return jl_egal__bitstag(a, b, dtag);
 }
 
 STATIC_INLINE int jl_egal_(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
 {
     if (a == b)
         return 1;
-    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(a);
-    if (dt != (jl_datatype_t*)jl_typeof(b))
+    uintptr_t dtag = jl_typetagof(a);
+    if (dtag != jl_typetagof(b))
         return 0;
-    return jl_egal__unboxed_(a, b, dt);
+    return jl_egal__unboxed_(a, b, dtag);
 }
 #define jl_egal(a, b) jl_egal_((a), (b))
 
 // type predicates and basic operations
-JL_DLLEXPORT int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_has_free_typevars(jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_has_typevar(jl_value_t *t, jl_tvar_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_has_typevar_from_unionall(jl_value_t *t, jl_unionall_t *ua);
-JL_DLLEXPORT int jl_subtype_env_size(jl_value_t *t);
+JL_DLLEXPORT int jl_subtype_env_size(jl_value_t *t) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_subtype_env(jl_value_t *x, jl_value_t *y, jl_value_t **env, int envsz);
 JL_DLLEXPORT int jl_isa(jl_value_t *a, jl_value_t *t);
 JL_DLLEXPORT int jl_types_equal(jl_value_t *a, jl_value_t *b);
@@ -1419,7 +1704,7 @@ STATIC_INLINE int jl_is_concrete_type(jl_value_t *v) JL_NOTSAFEPOINT
     return jl_is_datatype(v) && ((jl_datatype_t*)v)->isconcretetype;
 }
 
-JL_DLLEXPORT int jl_isa_compileable_sig(jl_tupletype_t *type, jl_method_t *definition);
+JL_DLLEXPORT int jl_isa_compileable_sig(jl_tupletype_t *type, jl_svec_t *sparams, jl_method_t *definition);
 
 // type constructors
 JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *inmodule, int abstract, int mutabl);
@@ -1428,10 +1713,11 @@ JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p
 JL_DLLEXPORT jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n);
 JL_DLLEXPORT jl_value_t *jl_apply_type1(jl_value_t *tc, jl_value_t *p1);
 JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2);
+JL_DLLEXPORT jl_value_t *jl_apply_type3(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2, jl_value_t *p3);
 JL_DLLEXPORT jl_datatype_t *jl_apply_modify_type(jl_value_t *dt);
 JL_DLLEXPORT jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt);
-JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type(jl_svec_t *params);
-JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np);
+JL_DLLEXPORT jl_value_t *jl_apply_tuple_type(jl_svec_t *params, int check); // if uncertain, set check=1
+JL_DLLEXPORT jl_value_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np);
 JL_DLLEXPORT jl_datatype_t *jl_new_datatype(jl_sym_t *name,
                                             jl_module_t *module,
                                             jl_datatype_t *super,
@@ -1465,7 +1751,6 @@ JL_DLLEXPORT jl_svec_t *jl_alloc_svec(size_t n);
 JL_DLLEXPORT jl_svec_t *jl_alloc_svec_uninit(size_t n);
 JL_DLLEXPORT jl_svec_t *jl_svec_copy(jl_svec_t *a);
 JL_DLLEXPORT jl_svec_t *jl_svec_fill(size_t n, jl_value_t *x);
-JL_DLLEXPORT jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *v);
 JL_DLLEXPORT jl_sym_t *jl_symbol(const char *str) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_symbol_lookup(const char *str) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_symbol_n(const char *str, size_t len) JL_NOTSAFEPOINT;
@@ -1474,13 +1759,12 @@ JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len);
 JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void);
 JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
                                                  jl_module_t *module,
-                                                 _Atomic(jl_value_t*) *bp, jl_value_t *bp_owner,
+                                                 _Atomic(jl_value_t*) *bp,
                                                  jl_binding_t *bnd);
 JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module);
-JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo);
+JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world);
 JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src);
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_function_t *jl_get_kwsorter(jl_value_t *ty);
 JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_int8(int8_t x) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_box_uint8(uint8_t x) JL_NOTSAFEPOINT;
@@ -1539,62 +1823,71 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field(jl_value_t *v, size_t i);
 // Like jl_get_nth_field above, but asserts if it needs to allocate
 JL_DLLEXPORT jl_value_t *jl_get_nth_field_noalloc(jl_value_t *v JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i);
-JL_DLLEXPORT void        jl_set_nth_field(jl_value_t *v, size_t i, jl_value_t *rhs) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void        jl_set_nth_field(jl_value_t *v, size_t i, jl_value_t *rhs);
 JL_DLLEXPORT int         jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int         jl_field_isdefined_checked(jl_value_t *v, size_t i);
 JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld);
 JL_DLLEXPORT jl_value_t *jl_value_ptr(jl_value_t *a);
 int jl_uniontype_size(jl_value_t *ty, size_t *sz);
 JL_DLLEXPORT int jl_islayout_inline(jl_value_t *eltype, size_t *fsz, size_t *al);
 
 // arrays
-JL_DLLEXPORT jl_array_t *jl_new_array(jl_value_t *atype, jl_value_t *dims);
-JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
-                                          jl_value_t *dims);
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
                                             size_t nel, int own_buffer);
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
                                          jl_value_t *dims, int own_buffer);
 
 JL_DLLEXPORT jl_array_t *jl_alloc_array_1d(jl_value_t *atype, size_t nr);
-JL_DLLEXPORT jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr,
-                                           size_t nc);
-JL_DLLEXPORT jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr,
-                                           size_t nc, size_t z);
+JL_DLLEXPORT jl_array_t *jl_alloc_array_2d(jl_value_t *atype, size_t nr, size_t nc);
+JL_DLLEXPORT jl_array_t *jl_alloc_array_3d(jl_value_t *atype, size_t nr, size_t nc, size_t z);
+JL_DLLEXPORT jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims);
 JL_DLLEXPORT jl_array_t *jl_pchar_to_array(const char *str, size_t len);
 JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len);
 JL_DLLEXPORT jl_value_t *jl_cstr_to_string(const char *str);
 JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len);
 JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a);
 JL_DLLEXPORT jl_array_t *jl_alloc_vec_any(size_t n);
-JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i);  // 0-indexed
-JL_DLLEXPORT jl_value_t *jl_ptrarrayref(jl_array_t *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;  // 0-indexed
-JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *v JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, size_t i);  // 0-indexed
-JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i);  // 0-indexed
-JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i);  // 0-indexed
 JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc);
 JL_DLLEXPORT void jl_array_del_end(jl_array_t *a, size_t dec);
-JL_DLLEXPORT void jl_array_grow_beg(jl_array_t *a, size_t inc);
-JL_DLLEXPORT void jl_array_del_beg(jl_array_t *a, size_t dec);
-JL_DLLEXPORT void jl_array_sizehint(jl_array_t *a, size_t sz);
 JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item);
 JL_DLLEXPORT void jl_array_ptr_1d_append(jl_array_t *a, jl_array_t *a2);
 JL_DLLEXPORT jl_value_t *jl_apply_array_type(jl_value_t *type, size_t dim);
-JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, size_t *tot, uint32_t ndims, size_t *dims, size_t elsz);
 // property access
 JL_DLLEXPORT void *jl_array_ptr(jl_array_t *a);
 JL_DLLEXPORT void *jl_array_eltype(jl_value_t *a);
 JL_DLLEXPORT int jl_array_rank(jl_value_t *a);
-JL_DLLEXPORT size_t jl_array_size(jl_value_t *a, int d);
+
+// genericmemory
+JL_DLLEXPORT jl_genericmemory_t *jl_new_genericmemory(jl_value_t *mtype, jl_value_t *dim);
+JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void *data,
+                                           size_t nel, int own_buffer);
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory(jl_value_t *mtype, size_t nel);
+JL_DLLEXPORT jl_genericmemory_t *jl_pchar_to_memory(const char *str, size_t len);
+JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_t len);
+JL_DLLEXPORT jl_genericmemory_t *jl_alloc_memory_any(size_t n);
+JL_DLLEXPORT jl_value_t *jl_genericmemoryref(jl_genericmemory_t *m, size_t i);  // 0-indexed
+JL_DLLEXPORT void jl_genericmemoryset(jl_genericmemory_t *m JL_ROOTING_ARGUMENT, jl_value_t *v JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, size_t i);  // 0-indexed
+JL_DLLEXPORT void jl_genericmemoryunset(jl_genericmemory_t *m, size_t i);  // 0-indexed
+JL_DLLEXPORT int jl_genericmemory_isassigned(jl_genericmemory_t *m, size_t i);  // 0-indexed
+
+JL_DLLEXPORT jl_genericmemoryref_t *jl_new_memoryref(jl_value_t *typ, jl_genericmemory_t *mem, void *data);
+JL_DLLEXPORT jl_value_t *jl_memoryrefget(jl_genericmemoryref_t m JL_PROPAGATES_ROOT);
+JL_DLLEXPORT jl_value_t *jl_ptrmemoryrefget(jl_genericmemoryref_t m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_memoryref_isassigned(jl_genericmemoryref_t m) JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT jl_genericmemoryref_t jl_memoryrefindex(jl_genericmemoryref_t m JL_PROPAGATES_ROOT, size_t idx) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, jl_value_t *v JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
+JL_DLLEXPORT void jl_memoryrefunset(jl_genericmemoryref_t m);
 
 // strings
 JL_DLLEXPORT const char *jl_string_ptr(jl_value_t *s);
 
 // modules and global variables
-extern JL_DLLEXPORT jl_module_t *jl_main_module JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_module_t *jl_core_module JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_module_t *jl_base_module JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_module_t *jl_top_module JL_GLOBALLY_ROOTED;
-JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name);
+extern JL_DLLIMPORT jl_module_t *jl_main_module JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_module_t *jl_core_module JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_module_t *jl_base_module JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_module_t *jl_top_module JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_module_t *jl_libdl_module JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name, jl_module_t *parent);
 JL_DLLEXPORT void jl_set_module_nospecialize(jl_module_t *self, int on);
 JL_DLLEXPORT void jl_set_module_optlevel(jl_module_t *self, int lvl);
 JL_DLLEXPORT int jl_get_module_optlevel(jl_module_t *m);
@@ -1608,25 +1901,28 @@ JL_DLLEXPORT int jl_get_module_max_methods(jl_module_t *m);
 JL_DLLEXPORT jl_binding_t *jl_get_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
 JL_DLLEXPORT jl_binding_t *jl_get_binding_or_error(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var);
-JL_DLLEXPORT jl_value_t *jl_binding_type(jl_module_t *m, jl_sym_t *var);
+JL_DLLEXPORT jl_value_t *jl_get_binding_type(jl_module_t *m, jl_sym_t *var);
 // get binding for assignment
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc);
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr_or_error(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
 JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
 JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var);
+JL_DLLEXPORT int jl_globalref_is_const(jl_globalref_t *gr);
+JL_DLLEXPORT int jl_globalref_boundp(jl_globalref_t *gr);
+JL_DLLEXPORT jl_value_t *jl_get_globalref_value(jl_globalref_t *gr);
 JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
+JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT);
 JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT);
-JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_value_t *rhs JL_MAYBE_UNROOTED);
-JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b);
+JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED);
+JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var);
 JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from);
 JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s);
 JL_DLLEXPORT void jl_module_use_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname);
 JL_DLLEXPORT void jl_module_import(jl_module_t *to, jl_module_t *from, jl_sym_t *s);
 JL_DLLEXPORT void jl_module_import_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname);
-JL_DLLEXPORT void jl_module_export(jl_module_t *from, jl_sym_t *s);
+JL_DLLEXPORT void jl_module_public(jl_module_t *from, jl_sym_t *s, int exported);
 JL_DLLEXPORT int jl_is_imported(jl_module_t *m, jl_sym_t *s);
 JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT void jl_add_standard_imports(jl_module_t *m);
@@ -1636,8 +1932,10 @@ STATIC_INLINE jl_function_t *jl_get_function(jl_module_t *m, const char *name)
 }
 
 // eq hash tables
-JL_DLLEXPORT jl_array_t *jl_eqtable_put(jl_array_t *h, jl_value_t *key, jl_value_t *val, int *inserted);
-JL_DLLEXPORT jl_value_t *jl_eqtable_get(jl_array_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_genericmemory_t *jl_eqtable_put(jl_genericmemory_t *h JL_ROOTING_ARGUMENT, jl_value_t *key, jl_value_t *val JL_ROOTED_ARGUMENT, int *inserted);
+JL_DLLEXPORT jl_value_t *jl_eqtable_get(jl_genericmemory_t *h JL_PROPAGATES_ROOT, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_eqtable_pop(jl_genericmemory_t *h, jl_value_t *key, jl_value_t *deflt, int *found);
+jl_value_t *jl_eqtable_getkey(jl_genericmemory_t *h JL_PROPAGATES_ROOT, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT;
 
 // system information
 JL_DLLEXPORT int jl_errno(void) JL_NOTSAFEPOINT;
@@ -1650,9 +1948,12 @@ JL_DLLEXPORT long jl_getallocationgranularity(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_is_debugbuild(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_get_UNAME(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_get_ARCH(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_get_libllvm(void) JL_NOTSAFEPOINT;
+JL_DLLIMPORT jl_value_t *jl_get_libllvm(void) JL_NOTSAFEPOINT;
 extern JL_DLLIMPORT int jl_n_threadpools;
-extern JL_DLLIMPORT int jl_n_threads;
+extern JL_DLLIMPORT _Atomic(int) jl_n_threads;
+extern JL_DLLIMPORT int jl_n_gcthreads;
+extern int jl_n_markthreads;
+extern int jl_n_sweepthreads;
 extern JL_DLLIMPORT int *jl_n_threads_per_pool;
 
 // environment entries
@@ -1674,7 +1975,8 @@ JL_DLLEXPORT void JL_NORETURN jl_type_error_rt(const char *fname,
                                                const char *context,
                                                jl_value_t *ty JL_MAYBE_UNROOTED,
                                                jl_value_t *got JL_MAYBE_UNROOTED);
-JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var);
+JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var, jl_value_t *scope JL_MAYBE_UNROOTED);
+JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_sym_t *type_name, jl_sym_t *var);
 JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str);
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error(jl_value_t *v JL_MAYBE_UNROOTED,
                                               jl_value_t *t JL_MAYBE_UNROOTED);
@@ -1720,10 +2022,10 @@ JL_DLLEXPORT void jl_exception_clear(void) JL_NOTSAFEPOINT;
 typedef enum {
     JL_IMAGE_CWD = 0,
     JL_IMAGE_JULIA_HOME = 1,
-    //JL_IMAGE_LIBJULIA = 2,
+    JL_IMAGE_IN_MEMORY = 2
 } JL_IMAGE_SEARCH;
 
-JL_DLLEXPORT const char *jl_get_libdir(void);
+JL_DLLIMPORT const char *jl_get_libdir(void);
 JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel);
 JL_DLLEXPORT void jl_init(void);
 JL_DLLEXPORT void jl_init_with_image(const char *julia_bindir,
@@ -1731,21 +2033,24 @@ JL_DLLEXPORT void jl_init_with_image(const char *julia_bindir,
 JL_DLLEXPORT const char *jl_get_default_sysimg_path(void);
 JL_DLLEXPORT int jl_is_initialized(void);
 JL_DLLEXPORT void jl_atexit_hook(int status);
+JL_DLLEXPORT void jl_task_wait_empty(void);
 JL_DLLEXPORT void jl_postoutput_hook(void);
 JL_DLLEXPORT void JL_NORETURN jl_exit(int status);
+JL_DLLEXPORT void JL_NORETURN jl_raise(int signo);
 JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle);
+JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void);
 
 JL_DLLEXPORT int jl_deserialize_verify_header(ios_t *s);
 JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname);
 JL_DLLEXPORT void jl_set_sysimg_so(void *handle);
-JL_DLLEXPORT ios_t *jl_create_system_image(void *);
-JL_DLLEXPORT void jl_save_system_image(const char *fname);
+JL_DLLEXPORT void jl_create_system_image(void **, jl_array_t *worklist, bool_t emit_split, ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos);
 JL_DLLEXPORT void jl_restore_system_image(const char *fname);
 JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len);
+JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int complete, const char *pkgimage);
+
 JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred);
-JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist);
-JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods);
-JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, size_t sz, jl_array_t *depmods);
+JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *ci);
+JL_DLLEXPORT void jl_write_compiler_output(void);
 
 // parsing
 JL_DLLEXPORT jl_value_t *jl_parse_all(const char *text, size_t text_len,
@@ -1807,17 +2112,19 @@ JL_DLLEXPORT void jl_register_newmeth_tracer(void (*callback)(jl_method_t *trace
 JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr JL_MAYBE_UNROOTED);
 
 // IR representation
-JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code);
-JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t *metadata, jl_array_t *data);
-JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_array_t *data) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint8_t jl_ir_flag_inlineable(jl_array_t *data) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint8_t jl_ir_flag_pure(jl_array_t *data) JL_NOTSAFEPOINT;
-JL_DLLEXPORT ssize_t jl_ir_nslots(jl_array_t *data) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_array_t *data, size_t i) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code);
+JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t *metadata, jl_value_t *data);
+JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_value_t *data) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint8_t jl_ir_flag_inlining(jl_value_t *data) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint8_t jl_ir_flag_has_fcall(jl_value_t *data) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_value_t *data) JL_NOTSAFEPOINT;
+JL_DLLEXPORT ssize_t jl_ir_nslots(jl_value_t *data) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_value_t *data, size_t i) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms);
 JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms);
 JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i);
 
+
 JL_DLLEXPORT int jl_is_operator(char *sym);
 JL_DLLEXPORT int jl_is_unary_operator(char *sym);
 JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym);
@@ -1864,6 +2171,7 @@ JL_DLLEXPORT void jl_sigatomic_end(void);
 // tasks and exceptions -------------------------------------------------------
 
 typedef struct _jl_timing_block_t jl_timing_block_t;
+typedef struct _jl_timing_event_t jl_timing_event_t;
 typedef struct _jl_excstack_t jl_excstack_t;
 
 // info describing an exception handler
@@ -1878,6 +2186,8 @@ typedef struct _jl_handler_t {
     size_t world_age;
 } jl_handler_t;
 
+#define JL_RNG_SIZE 5 // xoshiro 4 + splitmix 1
+
 typedef struct _jl_task_t {
     JL_DATA_TYPE
     jl_value_t *next; // invasive linked list for scheduler
@@ -1885,25 +2195,41 @@ typedef struct _jl_task_t {
     jl_value_t *tls;
     jl_value_t *donenotify;
     jl_value_t *result;
-    jl_value_t *logstate;
+    jl_value_t *scope;
     jl_function_t *start;
-    uint64_t rngState[4];
+    // 4 byte padding on 32-bit systems
+    // uint32_t padding0;
+    uint64_t rngState[JL_RNG_SIZE];
     _Atomic(uint8_t) _state;
     uint8_t sticky; // record whether this Task can be migrated to a new thread
     _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with
+    // 1 byte padding
+    // uint8_t padding1;
     // multiqueue priority
     uint16_t priority;
 
 // hidden state:
+
     // id of owning thread - does not need to be defined until the task runs
     _Atomic(int16_t) tid;
     // threadpool id
     int8_t threadpoolid;
+    // Reentrancy bits
+    // Bit 0: 1 if we are currently running inference/codegen
+    // Bit 1-2: 0-3 counter of how many times we've reentered inference
+    // Bit 3: 1 if we are writing the image and inference is illegal
+    uint8_t reentrant_timing;
+    // 2 bytes of padding on 32-bit, 6 bytes on 64-bit
+    // uint16_t padding2_32;
+    // uint48_t padding2_64;
     // saved gc stack top for context switches
     jl_gcframe_t *gcstack;
     size_t world_age;
     // quick lookup for current ptls
     jl_ptls_t ptls; // == jl_all_tls_states[tid]
+#ifdef USE_TRACY
+    const char *name;
+#endif
     // saved exception stack
     jl_excstack_t *excstack;
     // current exception handler
@@ -1928,10 +2254,13 @@ JL_DLLEXPORT void JL_NORETURN jl_throw(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow(void);
 JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED);
-JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e);
+JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e, jl_task_t *ct);
 JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
 #define jl_current_task (container_of(jl_get_pgcstack(), jl_task_t, gcstack))
 
+extern JL_DLLIMPORT int jl_task_gcstack_offset;
+extern JL_DLLIMPORT int jl_task_ptls_offset;
+
 #include "julia_locks.h"   // requires jl_task_t definition
 
 JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh);
@@ -1952,7 +2281,7 @@ void (jl_longjmp)(jmp_buf _Buf, int _Value);
 JL_DLLEXPORT int (ijl_setjmp)(jmp_buf _Buf);
 void (ijl_longjmp)(jmp_buf _Buf, int _Value);
 #endif
-#ifdef LIBRARY_EXPORTS
+#ifdef JL_LIBRARY_EXPORTS
 #define jl_setjmp_f ijl_setjmp
 #define jl_setjmp_name "ijl_setjmp"
 #define jl_setjmp(a,b) ijl_setjmp(a)
@@ -1978,8 +2307,13 @@ void (ijl_longjmp)(jmp_buf _Buf, int _Value);
 #define jl_setjmp_name "sigsetjmp"
 #endif
 #define jl_setjmp(a,b) sigsetjmp(a,b)
+#if defined(_COMPILER_ASAN_ENABLED_) && defined(__GLIBC__)
+extern void (*real_siglongjmp)(jmp_buf _Buf, int _Value);
+#define jl_longjmp(a,b) real_siglongjmp(a,b)
+#else
 #define jl_longjmp(a,b) siglongjmp(a,b)
 #endif
+#endif
 
 
 #ifdef __clang_gcanalyzer__
@@ -2022,7 +2356,7 @@ typedef int jl_uv_os_fd_t;
 
 JL_DLLEXPORT int jl_process_events(void);
 
-JL_DLLEXPORT struct uv_loop_s *jl_global_event_loop(void);
+JL_DLLEXPORT struct uv_loop_s *jl_global_event_loop(void) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_close_uv(struct uv_handle_s *handle);
 
@@ -2057,7 +2391,7 @@ extern JL_DLLEXPORT JL_STREAM *JL_STDERR;
 JL_DLLEXPORT JL_STREAM *jl_stdout_stream(void);
 JL_DLLEXPORT JL_STREAM *jl_stdin_stream(void);
 JL_DLLEXPORT JL_STREAM *jl_stderr_stream(void);
-JL_DLLEXPORT int jl_getch(void);
+JL_DLLEXPORT int jl_termios_size(void);
 
 // showing and std streams
 JL_DLLEXPORT void jl_flush_cstdio(void) JL_NOTSAFEPOINT;
@@ -2146,11 +2480,16 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT;
 #define JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES 1
 #define JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_NO 0
 
+#define JL_OPTIONS_USE_COMPILED_MODULES_EXISTING 2
 #define JL_OPTIONS_USE_COMPILED_MODULES_YES 1
 #define JL_OPTIONS_USE_COMPILED_MODULES_NO 0
 
+#define JL_OPTIONS_USE_PKGIMAGES_EXISTING 2
+#define JL_OPTIONS_USE_PKGIMAGES_YES 1
+#define JL_OPTIONS_USE_PKGIMAGES_NO 0
+
 // Version information
-#include "julia_version.h"
+#include <julia_version.h> // Generated file
 
 JL_DLLEXPORT extern int jl_ver_major(void);
 JL_DLLEXPORT extern int jl_ver_minor(void);
@@ -2191,18 +2530,22 @@ typedef struct {
 
     // controls the emission of debug-info. mirrors the clang options
     int gnu_pubnames;       // can we emit the gnu pubnames debuginfo
-    int debug_info_kind; // Enum for line-table-only, line-directives-only,
+    int debug_info_kind;    // Enum for line-table-only, line-directives-only,
                             // limited, standalone
+    int debug_info_level;   // equivalent to the -g level from the cli
+    int safepoint_on_entry; // Emit a safepoint on entry to each function
+    int gcstack_arg; // Pass the ptls value as an argument with swiftself
 
+    int use_jlplt; // Whether to use the Julia PLT mechanism or emit symbols directly
     // Cache access. Default: jl_rettype_inferred.
     jl_codeinstance_lookup_t lookup;
-
-    // If not `nothing`, rewrite all generic calls to call
-    // generic_context(f, args...) instead of f(args...).
-    jl_value_t *generic_context;
 } jl_cgparams_t;
 extern JL_DLLEXPORT int jl_default_debug_info_kind;
 
+typedef struct {
+    int emit_metadata;
+} jl_emission_params_t;
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/julia_assert.h b/src/julia_assert.h
index 4b120fd9e845b..13cbdbcd36f5b 100644
--- a/src/julia_assert.h
+++ b/src/julia_assert.h
@@ -10,6 +10,7 @@
 // Files that need `assert` should include this file after all other includes.
 // All files should also check `JL_NDEBUG` instead of `NDEBUG`.
 
+#pragma GCC visibility push(default)
 #ifdef NDEBUG
 #  ifndef JL_NDEBUG
 #    undef NDEBUG
@@ -28,3 +29,4 @@
 #    include <assert.h>
 #  endif
 #endif
+#pragma GCC visibility pop
diff --git a/src/julia_atomics.h b/src/julia_atomics.h
index cb14e535cd010..c4488f774c987 100644
--- a/src/julia_atomics.h
+++ b/src/julia_atomics.h
@@ -73,7 +73,18 @@ enum jl_memory_order {
  * are). We also need to access these atomic variables from the LLVM JIT code
  * which is very hard unless the layout of the object is fully specified.
  */
-#define jl_fence() atomic_thread_fence(memory_order_seq_cst)
+
+/**
+ * On modern Intel and AMD platforms `lock orq` on the SP is faster than
+ * `mfence`. GCC 11 did switch to this representation. See #48123
+ */
+#if defined(_CPU_X86_64_) && \
+    ((defined(__GNUC__) && __GNUC__ < 11) || \
+     (defined(__clang__)))
+    #define jl_fence() __asm__ volatile("lock orq $0 , (%rsp)")
+#else
+    #define jl_fence() atomic_thread_fence(memory_order_seq_cst)
+#endif
 #define jl_fence_release() atomic_thread_fence(memory_order_release)
 #define jl_signal_fence() atomic_signal_fence(memory_order_seq_cst)
 
@@ -149,6 +160,11 @@ bool jl_atomic_cmpswap_explicit(std::atomic<T> *ptr, T *expected, S val, std::me
 {
      return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, order, order);
 }
+template<class T, class S>
+bool jl_atomic_cmpswap_acqrel(std::atomic<T> *ptr, T *expected, S val)
+{
+     return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, memory_order_acq_rel, memory_order_acquire);
+}
 #define jl_atomic_cmpswap_relaxed(ptr, expected, val) jl_atomic_cmpswap_explicit(ptr, expected, val, memory_order_relaxed)
 template<class T, class S>
 T jl_atomic_exchange(std::atomic<T> *ptr, S desired)
@@ -180,6 +196,8 @@ extern "C" {
     atomic_compare_exchange_strong(obj, expected, desired)
 #  define jl_atomic_cmpswap_relaxed(obj, expected, desired) \
     atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_relaxed, memory_order_relaxed)
+#define jl_atomic_cmpswap_acqrel(obj, expected, desired) \
+    atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_acq_rel, memory_order_acquire)
 // TODO: Maybe add jl_atomic_cmpswap_weak for spin lock
 #  define jl_atomic_exchange(obj, desired)       \
     atomic_exchange(obj, desired)
@@ -240,6 +258,7 @@ extern "C" {
 #define jl_atomic_exchange_relaxed jl_atomic_exchange
 
 #undef jl_atomic_cmpswap
+#undef jl_atomic_cmpswap_acqrel
 #undef jl_atomic_cmpswap_relaxed
 #define jl_atomic_cmpswap(obj, expected, desired) \
     (__extension__({ \
@@ -253,6 +272,7 @@ extern "C" {
                 *x__analyzer__ = temp__analyzer__; \
             eq__analyzer__; \
         }))
+#define jl_atomic_cmpswap_acqrel jl_atomic_cmpswap
 #define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap
 
 #undef jl_atomic_store
diff --git a/src/julia_fasttls.h b/src/julia_fasttls.h
index 08f465badf8d3..1c0929717b293 100644
--- a/src/julia_fasttls.h
+++ b/src/julia_fasttls.h
@@ -3,6 +3,13 @@
 #ifndef JL_FASTTLS_H
 #define JL_FASTTLS_H
 
+#ifdef __cplusplus
+#include <atomic>
+#define _Atomic(T) std::atomic<T>
+#else
+#include <stdatomic.h>
+#endif
+
 // Thread-local storage access
 
 #ifdef __cplusplus
@@ -25,6 +32,7 @@ typedef jl_gcframe_t **(jl_get_pgcstack_func)(void);
 #if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
 #define JULIA_DEFINE_FAST_TLS                                                                   \
 static __attribute__((tls_model("local-exec"))) __thread jl_gcframe_t **jl_pgcstack_localexec;  \
+JL_DLLEXPORT _Atomic(char) jl_pgcstack_static_semaphore;                                        \
 JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack_static(void)                                        \
 {                                                                                               \
     return jl_pgcstack_localexec;                                                               \
diff --git a/src/julia_gcext.h b/src/julia_gcext.h
index 6787dafb4b7ee..27f0a6b5ec11c 100644
--- a/src/julia_gcext.h
+++ b/src/julia_gcext.h
@@ -49,6 +49,13 @@ JL_DLLEXPORT jl_datatype_t *jl_new_foreign_type(
         int haspointers,
         int large);
 
+
+#define HAVE_JL_REINIT_FOREIGN_TYPE 1
+JL_DLLEXPORT int jl_reinit_foreign_type(
+        jl_datatype_t *dt,
+        jl_markfunc_t markfunc,
+        jl_sweepfunc_t sweepfunc);
+
 JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt);
 
 JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void);
@@ -76,10 +83,10 @@ JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
 // Sweep functions will not automatically be called for objects of
 // foreign types, as that may not always be desired. Only calling
 // jl_gc_schedule_foreign_sweepfunc() on an object of a foreign type
-// will result in the custome sweep function actually being called.
+// will result in the custom sweep function actually being called.
 // This must be done at most once per object and should usually be
 // done right after allocating the object.
-JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t * bj);
+JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *bj);
 
 // The following functions enable support for conservative marking. This
 // functionality allows the user to determine if a machine word can be
@@ -120,6 +127,8 @@ JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void);
 // external allocations may not all be valid objects and that for those,
 // the user *must* validate that they have a proper type, i.e. that
 // jl_typeof(obj) is an actual type object.
+//
+// NOTE: Only valid to call from within a GC context.
 JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p);
 
 // Return a non-null pointer to the start of the stack area if the task
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 60583f2240aea..4f92c3eb500e3 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -4,6 +4,7 @@
 #define JL_INTERNAL_H
 
 #include "options.h"
+#include "julia_assert.h"
 #include "julia_locks.h"
 #include "julia_threads.h"
 #include "support/utils.h"
@@ -12,6 +13,7 @@
 #include "support/strtod.h"
 #include "gc-alloc-profiler.h"
 #include "support/rle.h"
+#include <stdint.h>
 #include <uv.h>
 #include <llvm-c/Types.h>
 #include <llvm-c/Orc.h>
@@ -20,19 +22,80 @@
 #else
 #define sleep(x) Sleep(1000*x)
 #endif
+#if defined(_CPU_ARM_)
+#include <sys/time.h>
+#endif
+
+// pragma visibility is more useful than -fvisibility
+#pragma GCC visibility push(hidden)
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 #ifdef _COMPILER_ASAN_ENABLED_
-void __sanitizer_start_switch_fiber(void**, const void*, size_t);
-void __sanitizer_finish_switch_fiber(void*, const void**, size_t*);
+#if defined(__GLIBC__) && defined(_CPU_X86_64_)
+/* TODO: This is terrible - we're reaching deep into glibc internals here.
+   We should probably just switch to our own setjmp/longjmp implementation. */
+#define JB_RSP 6
+static inline uintptr_t demangle_ptr(uintptr_t var)
+{
+    asm ("ror $17, %0\n\t"
+         "xor %%fs:0x30, %0\n\t"
+        : "=r" (var)
+        : "0" (var));
+    return var;
+}
+static inline uintptr_t jmpbuf_sp(jl_jmp_buf *buf)
+{
+    return demangle_ptr((uintptr_t)(*buf)[0].__jmpbuf[JB_RSP]);
+}
+#else
+#error Need to implement jmpbuf_sp for this architecture
+#endif
+JL_DLLIMPORT void __sanitizer_start_switch_fiber(void**, const void*, size_t);
+JL_DLLIMPORT void __sanitizer_finish_switch_fiber(void*, const void**, size_t*);
+JL_DLLIMPORT void __asan_unpoison_stack_memory(uintptr_t addr, size_t size);
+static inline void asan_unpoison_task_stack(jl_task_t *ct, jl_jmp_buf *buf)
+{
+    if (!ct)
+        return;
+    /* Unpoison everything from the base of the stack allocation to the address
+       that we're resetting to. The idea is to remove the poison from the frames
+       that we're skipping over, since they won't be unwound. */
+    uintptr_t top = jmpbuf_sp(buf);
+    uintptr_t bottom = (uintptr_t)ct->stkbuf;
+    __asan_unpoison_stack_memory(bottom, top - bottom);
+}
+static inline void asan_unpoison_stack_memory(uintptr_t addr, size_t size) {
+    __asan_unpoison_stack_memory(addr, size);
+}
+#else
+static inline void asan_unpoison_task_stack(jl_task_t *ct, jl_jmp_buf *buf) JL_NOTSAFEPOINT {}
+static inline void asan_unpoison_stack_memory(uintptr_t addr, size_t size) JL_NOTSAFEPOINT {}
+#endif
+#ifdef _COMPILER_MSAN_ENABLED_
+JL_DLLIMPORT void __msan_unpoison(const volatile void *a, size_t size) JL_NOTSAFEPOINT;
+JL_DLLIMPORT void __msan_allocated_memory(const volatile void *a, size_t size) JL_NOTSAFEPOINT;
+JL_DLLIMPORT void __msan_unpoison_string(const volatile char *a) JL_NOTSAFEPOINT;
+static inline void msan_allocated_memory(const volatile void *a, size_t size) JL_NOTSAFEPOINT {
+    __msan_allocated_memory(a, size);
+}
+static inline void msan_unpoison(const volatile void *a, size_t size) JL_NOTSAFEPOINT {
+    __msan_unpoison(a, size);
+}
+static inline void msan_unpoison_string(const volatile char *a) JL_NOTSAFEPOINT {
+    __msan_unpoison_string(a);
+}
+#else
+static inline void msan_unpoison(const volatile void *a, size_t size) JL_NOTSAFEPOINT {}
+static inline void msan_allocated_memory(const volatile void *a, size_t size) JL_NOTSAFEPOINT {}
+static inline void msan_unpoison_string(const volatile char *a) JL_NOTSAFEPOINT {}
 #endif
 #ifdef _COMPILER_TSAN_ENABLED_
-void *__tsan_create_fiber(unsigned flags);
-void *__tsan_get_current_fiber(void);
-void __tsan_destroy_fiber(void *fiber);
-void __tsan_switch_to_fiber(void *fiber, unsigned flags);
+JL_DLLIMPORT void *__tsan_create_fiber(unsigned flags);
+JL_DLLIMPORT void *__tsan_get_current_fiber(void);
+JL_DLLIMPORT void __tsan_destroy_fiber(void *fiber);
+JL_DLLIMPORT void __tsan_switch_to_fiber(void *fiber, unsigned flags);
 #endif
 #ifdef __cplusplus
 }
@@ -131,23 +194,33 @@ int jl_running_under_rr(int recheck) JL_NOTSAFEPOINT;
 // Returns time in nanosec
 JL_DLLEXPORT uint64_t jl_hrtime(void) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT void jl_set_peek_cond(uintptr_t);
 JL_DLLEXPORT double jl_get_profile_peek_duration(void);
 JL_DLLEXPORT void jl_set_profile_peek_duration(double);
 
 JL_DLLEXPORT void jl_init_profile_lock(void);
 JL_DLLEXPORT uintptr_t jl_lock_profile_rd_held(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_lock_profile(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_unlock_profile(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_lock_profile_wr(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_unlock_profile_wr(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_lock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
+JL_DLLEXPORT void jl_unlock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
+JL_DLLEXPORT void jl_lock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
+JL_DLLEXPORT void jl_unlock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
+int jl_lock_stackwalk(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
+void jl_unlock_stackwalk(int lockret) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE;
 
 // number of cycles since power-on
 static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
 {
 #if defined(_CPU_X86_64_)
+    // This is nopl 0(%rax, %rax, 1), but assembler are inconsistent about whether
+    // they emit that as a 4 or 5 byte sequence and we need to be guaranteed to use
+    // the 5 byte one.
+#define NOP5_OVERRIDE_NOP ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t"
     uint64_t low, high;
-    __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
+    // This instruction sequence is promised by rr to be patchable. rr can usually
+    // also patch `rdtsc` in regular code, but without the preceding nop, there could
+    // be an interfering branch into the middle of rr's patch region. Using this
+    // sequence prevents a massive rr-induced slowdown if the compiler happens to emit
+    // an unlucky pattern. See https://github.com/rr-debugger/rr/pull/3580.
+    __asm__ volatile(NOP5_OVERRIDE_NOP "rdtsc" : "=a"(low), "=d"(high));
     return (high << 32) | low;
 #elif defined(_CPU_X86_)
     int64_t ret;
@@ -161,6 +234,26 @@ static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
     int64_t virtual_timer_value;
     __asm__ volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
     return virtual_timer_value;
+#elif defined(_CPU_ARM_)
+    // V6 is the earliest arch that has a standard cyclecount
+#if (__ARM_ARCH >= 6)
+    uint32_t pmccntr;
+    uint32_t pmuseren;
+    uint32_t pmcntenset;
+    // Read the user mode perf monitor counter access permissions.
+    asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
+    if (pmuseren & 1) {  // Allows reading perfmon counters for user mode code.
+        asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
+        if (pmcntenset & 0x80000000ul) {  // Is it counting?
+            asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
+            // The counter is set up to count every 64th cycle
+            return (int64_t)(pmccntr) * 64;  // Should optimize to << 6
+        }
+    }
+#endif
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (int64_t)(tv.tv_sec) * 1000000 + tv.tv_usec;
 #elif defined(_CPU_PPC64_)
     // This returns a time-base, which is not always precisely a cycle-count.
     // https://reviews.llvm.org/D78084
@@ -200,19 +293,17 @@ STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
 // without risk of creating pointers out of thin air
 // TODO: replace with LLVM's llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32
 //       aka `__llvm_memmove_element_unordered_atomic_8` (for 64 bit)
-static inline void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOTSAFEPOINT
+static inline void memmove_refs(_Atomic(void*) *dstp, _Atomic(void*) *srcp, size_t n) JL_NOTSAFEPOINT
 {
     size_t i;
-    _Atomic(void*) *srcpa = (_Atomic(void*)*)srcp;
-    _Atomic(void*) *dstpa = (_Atomic(void*)*)dstp;
     if (dstp < srcp || dstp > srcp + n) {
         for (i = 0; i < n; i++) {
-            jl_atomic_store_release(dstpa + i, jl_atomic_load_relaxed(srcpa + i));
+            jl_atomic_store_release(dstp + i, jl_atomic_load_relaxed(srcp + i));
         }
     }
     else {
         for (i = 0; i < n; i++) {
-            jl_atomic_store_release(dstpa + n - i - 1, jl_atomic_load_relaxed(srcpa + n - i - 1));
+            jl_atomic_store_release(dstp + n - i - 1, jl_atomic_load_relaxed(srcp + n - i - 1));
         }
     }
 }
@@ -223,10 +314,13 @@ static inline void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOT
 #define GC_MARKED 1 // reachable and young
 #define GC_OLD    2 // if it is reachable it will be marked as old
 #define GC_OLD_MARKED (GC_OLD | GC_MARKED) // reachable and old
+#define GC_IN_IMAGE 4
 
 // useful constants
-extern jl_methtable_t *jl_type_type_mt JL_GLOBALLY_ROOTED;
-extern jl_methtable_t *jl_nonfunction_mt JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_methtable_t *jl_type_type_mt JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_methtable_t *jl_nonfunction_mt JL_GLOBALLY_ROOTED;
+extern jl_methtable_t *jl_kwcall_mt JL_GLOBALLY_ROOTED;
+extern JL_DLLEXPORT jl_method_t *jl_opaque_closure_method JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT _Atomic(size_t) jl_world_counter;
 
 typedef void (*tracer_cb)(jl_value_t *tracee);
@@ -234,10 +328,13 @@ extern tracer_cb jl_newmeth_tracer;
 void jl_call_tracer(tracer_cb callback, jl_value_t *tracee);
 void print_func_loc(JL_STREAM *s, jl_method_t *m);
 extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;
-void invalidate_backedges(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced_mi, size_t max_world, const char *why);
+JL_DLLEXPORT extern arraylist_t jl_linkage_blobs; // external linkage: sysimg/pkgimages
+JL_DLLEXPORT extern arraylist_t jl_image_relocs;  // external linkage: sysimg/pkgimages
+extern arraylist_t eytzinger_image_tree;
+extern arraylist_t eytzinger_idxs;
 
 extern JL_DLLEXPORT size_t jl_page_size;
-extern jl_function_t *jl_typeinf_func;
+extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT size_t jl_typeinf_world;
 extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
 extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
@@ -248,13 +345,12 @@ JL_DLLEXPORT extern const char *jl_filename;
 jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset,
                                    int osize);
 jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
-JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize);
+JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT;
 extern uv_mutex_t gc_perm_lock;
 void *jl_gc_perm_alloc_nolock(size_t sz, int zero,
     unsigned align, unsigned offset) JL_NOTSAFEPOINT;
-void *jl_gc_perm_alloc(size_t sz, int zero,
+JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero,
     unsigned align, unsigned offset) JL_NOTSAFEPOINT;
-void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v);
 void gc_sweep_sysimg(void);
 
 
@@ -277,27 +373,51 @@ static const int jl_gc_sizeclasses[] = {
     144, 160, 176, 192, 208, 224, 240, 256,
 
     // the following tables are computed for maximum packing efficiency via the formula:
-    // pg = 2^14
+    // pg = GC_SMALL_PAGE ? 2^12 : 2^14
     // sz = (div.(pg-8, rng).÷16)*16; hcat(sz, (pg-8).÷sz, pg .- (pg-8).÷sz.*sz)'
 
+#ifdef GC_SMALL_PAGE
+    // rng = 15:-1:2 (14 pools)
+    272, 288, 304, 336, 368, 400, 448, 496, 576, 672, 816, 1008, 1360, 2032
+//  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, /pool
+//  16, 64, 144, 64, 48, 96, 64, 128, 64, 64, 16, 64, 16, 32, bytes lost
+#else
     // rng = 60:-4:32 (8 pools)
     272, 288, 304, 336, 368, 400, 448, 496,
-//   60,  56,  53,  48,  44,  40,  36,  33, /pool
-//   64, 256, 272, 256, 192, 384, 256,  16, bytes lost
+//  60, 56, 53, 48, 44, 40, 36, 33, /pool
+//  64, 256, 272, 256, 192, 384, 256,  16, bytes lost
 
     // rng = 30:-2:16 (8 pools)
     544, 576, 624, 672, 736, 816, 896, 1008,
-//   30,  28,  26,  24,  22,  20,  18,  16, /pool
-//   64, 256, 160, 256, 192,  64, 256, 256, bytes lost
+//  30, 28, 26, 24, 22, 20, 18, 16, /pool
+//  64, 256, 160, 256, 192,  64, 256, 256, bytes lost
 
     // rng = 15:-1:8 (8 pools)
     1088, 1168, 1248, 1360, 1488, 1632, 1808, 2032
-//    15,   14,   13,   12,   11,   10,    9,    8, /pool
-//    64,   32,  160,   64,   16,   64,  112,  128, bytes lost
+//   15, 14, 13, 12, 11, 10, 9, 8, /pool
+//   64, 32, 160, 64, 16, 64, 112,  128, bytes lost
+#endif
 };
+#ifdef GC_SMALL_PAGE
+#ifdef _P64
+#  define JL_GC_N_POOLS 39
+#elif MAX_ALIGN == 8
+#  define JL_GC_N_POOLS 40
+#else
+#  define JL_GC_N_POOLS 41
+#endif
+#else
+#ifdef _P64
+#  define JL_GC_N_POOLS 49
+#elif MAX_ALIGN == 8
+#  define JL_GC_N_POOLS 50
+#else
+#  define JL_GC_N_POOLS 51
+#endif
+#endif
 static_assert(sizeof(jl_gc_sizeclasses) / sizeof(jl_gc_sizeclasses[0]) == JL_GC_N_POOLS, "");
 
-STATIC_INLINE int jl_gc_alignment(size_t sz)
+STATIC_INLINE int jl_gc_alignment(size_t sz) JL_NOTSAFEPOINT
 {
     if (sz == 0)
         return sizeof(void*);
@@ -317,14 +437,19 @@ STATIC_INLINE int jl_gc_alignment(size_t sz)
     return 16;
 #endif
 }
-JL_DLLEXPORT int jl_alignment(size_t sz);
+JL_DLLEXPORT int jl_alignment(size_t sz) JL_NOTSAFEPOINT;
 
 // the following table is computed as:
 // [searchsortedfirst(jl_gc_sizeclasses, i) - 1 for i = 0:16:jl_gc_sizeclasses[end]]
-static const uint8_t szclass_table[] = {0, 1, 3, 5, 7, 9, 11, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 29, 29, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48};
+static const uint8_t szclass_table[] =
+#ifdef GC_SMALL_PAGE
+    {0,1,3,5,7,9,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,28,29,29,30,30,31,31,31,32,32,32,33,33,33,33,33,34,34,34,34,34,34,35,35,35,35,35,35,35,35,35,36,36,36,36,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38};
+#else
+    {0,1,3,5,7,9,11,13,15,17,18,19,20,21,22,23,24,25,26,27,28,28,29,29,30,30,31,31,31,32,32,32,33,33,33,34,34,35,35,35,36,36,36,37,37,37,37,38,38,38,38,38,39,39,39,39,39,40,40,40,40,40,40,40,41,41,41,41,41,42,42,42,42,42,43,43,43,43,43,44,44,44,44,44,44,44,45,45,45,45,45,45,45,45,46,46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47,47,47,47,48,48,48,48,48,48,48,48,48,48,48,48,48,48};
+#endif
 static_assert(sizeof(szclass_table) == 128, "");
 
-STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz)
+STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz) JL_NOTSAFEPOINT
 {
     assert(sz <= 2032);
 #ifdef _P64
@@ -344,7 +469,7 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz)
     return klass + N;
 }
 
-STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz)
+STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFEPOINT
 {
     if (sz >= 16 && sz <= 152) {
 #ifdef _P64
@@ -405,9 +530,12 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, void *ty);
 #  define jl_gc_alloc(ptls, sz, ty) jl_gc_alloc_(ptls, sz, ty)
 #endif
 
-// jl_buff_tag must be a multiple of GC_PAGE_SZ so that it can't be
-// confused for an actual type reference.
-#define jl_buff_tag ((uintptr_t)0x4eadc000)
+// jl_buff_tag must be an actual pointer here, so it cannot be confused for an actual type reference.
+// defined as uint64_t[3] so that we can get the right alignment of this and a "type tag" on it
+const extern uint64_t _jl_buff_tag[3];
+#define jl_buff_tag ((uintptr_t)LLT_ALIGN((uintptr_t)&_jl_buff_tag[1],16))
+JL_DLLEXPORT uintptr_t jl_get_buff_tag(void);
+
 typedef void jl_gc_tracked_buffer_t; // For the benefit of the static analyzer
 STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz)
 {
@@ -425,10 +553,8 @@ STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
     o->header = tag | GC_OLD_MARKED;
     return jl_valueof(o);
 }
-jl_value_t *jl_permbox8(jl_datatype_t *t, int8_t x);
-jl_value_t *jl_permbox16(jl_datatype_t *t, int16_t x);
-jl_value_t *jl_permbox32(jl_datatype_t *t, int32_t x);
-jl_value_t *jl_permbox64(jl_datatype_t *t, int64_t x);
+jl_value_t *jl_permbox8(jl_datatype_t *t, uintptr_t tag, uint8_t x);
+jl_value_t *jl_permbox32(jl_datatype_t *t, uintptr_t tag, uint32_t x);
 jl_svec_t *jl_perm_symsvec(size_t n, ...);
 
 // this sizeof(__VA_ARGS__) trick can't be computed until C11, but that only matters to Clang in some situations
@@ -471,40 +597,22 @@ JL_DLLEXPORT void JL_NORETURN jl_throw_out_of_memory_error(void);
 JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT;
 void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT;
+void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT;
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT;
 void jl_gc_run_all_finalizers(jl_task_t *ct);
 void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task);
 void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT;
 
-// Set GC memory trigger in bytes for greedy memory collecting
-void jl_gc_set_max_memory(uint64_t max_mem);
-
-JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd) JL_NOTSAFEPOINT;
-void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT;
-
-STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t*
-{
-    if (__unlikely(jl_astaggedvalue(bnd)->bits.gc == 3 &&
-                   (jl_astaggedvalue(val)->bits.gc & 1) == 0))
-        jl_gc_queue_binding(bnd);
-}
-
-STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t*
-{
-    // if parent is marked and buf is not
-    if (__unlikely(jl_astaggedvalue(parent)->bits.gc & 1)) {
-        jl_task_t *ct = jl_current_task;
-        gc_setmark_buf(ct->ptls, bufptr, 3, minsz);
-    }
-}
-
-void jl_gc_debug_print_status(void);
-JL_DLLEXPORT void jl_gc_debug_critical_error(void);
+void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT;
 void jl_print_gc_stats(JL_STREAM *s);
 void jl_gc_reset_alloc_count(void);
 uint32_t jl_get_gs_ctr(void);
 void jl_set_gs_ctr(uint32_t ctr);
 
+typedef struct _jl_static_show_config_t { uint8_t quiet; } jl_static_show_config_t;
+size_t jl_static_show_func_sig_(JL_STREAM *s, jl_value_t *type, jl_static_show_config_t ctx) JL_NOTSAFEPOINT;
+
 STATIC_INLINE jl_value_t *undefref_check(jl_datatype_t *dt, jl_value_t *v) JL_NOTSAFEPOINT
 {
      if (dt->layout->first_ptr >= 0) {
@@ -518,10 +626,11 @@ STATIC_INLINE jl_value_t *undefref_check(jl_datatype_t *dt, jl_value_t *v) JL_NO
 // -- helper types -- //
 
 typedef struct {
-    uint8_t pure:1;
-    uint8_t propagate_inbounds:1;
-    uint8_t inlineable:1;
     uint8_t inferred:1;
+    uint8_t propagate_inbounds:1;
+    uint8_t has_fcall:1;
+    uint8_t nospecializeinfer:1;
+    uint8_t inlining:2; // 0 = use heuristic; 1 = aggressive; 2 = none
     uint8_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none
 } jl_code_info_flags_bitfield_t;
 
@@ -532,30 +641,41 @@ typedef union {
 
 // -- functions -- //
 
-// jl_code_info_flag_t code_info_flags(uint8_t pure, uint8_t propagate_inbounds, uint8_t inlineable, uint8_t inferred, uint8_t constprop);
 JL_DLLEXPORT jl_code_info_t *jl_type_infer(jl_method_instance_t *li, size_t world, int force);
 JL_DLLEXPORT jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *meth JL_PROPAGATES_ROOT, size_t world);
-jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
-void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec);
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
         jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
         size_t min_world, size_t max_world);
+JL_DLLEXPORT jl_code_instance_t *jl_get_codeinst_for_src(
+        jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_code_info_t *src);
 jl_method_instance_t *jl_get_unspecialized_from_mi(jl_method_instance_t *method JL_PROPAGATES_ROOT);
 jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);
 
+JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
+        jl_method_instance_t *mi, jl_value_t *rettype, jl_value_t *exctype,
+        jl_value_t *inferred_const, jl_value_t *inferred,
+        int32_t const_flags, size_t min_world, size_t max_world,
+        uint32_t ipo_effects, uint32_t effects, jl_value_t *analysis_results,
+        uint8_t relocatability);
+
+JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world);
 JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types);
-jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT);
-int jl_code_requires_compiler(jl_code_info_t *src);
+jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world);
+int jl_code_requires_compiler(jl_code_info_t *src, int include_force_compile);
 jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ast);
 JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void);
-void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
-                              int binding_effects);
+JL_DLLEXPORT void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
+                                           int binding_effects);
+
+int get_next_edge(jl_array_t *list, int i, jl_value_t** invokesig, jl_method_instance_t **caller) JL_NOTSAFEPOINT;
+int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_method_instance_t *caller);
+void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_method_instance_t *caller);
 
 JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_t* root);
 void jl_append_method_roots(jl_method_t *m, uint64_t modid, jl_array_t* roots);
-int get_root_reference(rle_reference *rr, jl_method_t *m, size_t i);
-jl_value_t *lookup_root(jl_method_t *m, uint64_t key, int index);
-int nroots_with_key(jl_method_t *m, uint64_t key);
+int get_root_reference(rle_reference *rr, jl_method_t *m, size_t i) JL_NOTSAFEPOINT;
+jl_value_t *lookup_root(jl_method_t *m, uint64_t key, int index) JL_NOTSAFEPOINT;
+int nroots_with_key(jl_method_t *m, uint64_t key) JL_NOTSAFEPOINT;
 
 int jl_valid_type_param(jl_value_t *v);
 
@@ -564,7 +684,6 @@ JL_DLLEXPORT jl_value_t *jl_apply_2va(jl_value_t *f, jl_value_t **args, uint32_t
 void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world);
 JL_DLLEXPORT jl_value_t *jl_get_exceptionf(jl_datatype_t *exception_type, const char *fmt, ...);
 
-JL_DLLEXPORT jl_value_t *jl_get_keyword_sorter(jl_value_t *f);
 JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t);
 
 #define JL_CALLABLE(name)                                               \
@@ -577,10 +696,10 @@ void jl_install_default_signal_handlers(void);
 void restore_signals(void);
 void jl_install_thread_signal_handler(jl_ptls_t ptls);
 
-JL_DLLEXPORT jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b);
+JL_DLLEXPORT jl_fptr_args_t jl_get_builtin_fptr(jl_datatype_t *dt);
 
 extern uv_loop_t *jl_io_loop;
-void jl_uv_flush(uv_stream_t *stream);
+JL_DLLEXPORT void jl_uv_flush(uv_stream_t *stream);
 
 typedef struct jl_typeenv_t {
     jl_tvar_t *var;
@@ -607,14 +726,16 @@ jl_svec_t *jl_outer_unionall_vars(jl_value_t *u);
 jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t **penv, int *issubty);
 jl_value_t *jl_type_intersection_env(jl_value_t *a, jl_value_t *b, jl_svec_t **penv);
 int jl_subtype_matching(jl_value_t *a, jl_value_t *b, jl_svec_t **penv);
-JL_DLLEXPORT int jl_types_egal(jl_value_t *a, jl_value_t *b);
+JL_DLLEXPORT int jl_types_egal(jl_value_t *a, jl_value_t *b) JL_NOTSAFEPOINT;
 // specificity comparison assuming !(a <: b) and !(b <: a)
 JL_DLLEXPORT int jl_type_morespecific_no_subtype(jl_value_t *a, jl_value_t *b);
 jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n);
 JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals);
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val);
+jl_unionall_t *jl_rename_unionall(jl_unionall_t *u);
 JL_DLLEXPORT jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u);
+JL_DLLEXPORT jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u);
 int jl_count_union_components(jl_value_t *v);
 JL_DLLEXPORT jl_value_t *jl_nth_union_component(jl_value_t *v JL_PROPAGATES_ROOT, int i) JL_NOTSAFEPOINT;
 int jl_find_union_component(jl_value_t *haystack, jl_value_t *needle, unsigned *nth) JL_NOTSAFEPOINT;
@@ -623,10 +744,11 @@ jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module,
 jl_datatype_t *jl_new_uninitialized_datatype(void);
 void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable);
 JL_DLLEXPORT jl_datatype_t *jl_wrap_Type(jl_value_t *t);  // x -> Type{x}
-jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n);
+jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check);
 void jl_reinstantiate_inner_types(jl_datatype_t *t);
 jl_datatype_t *jl_lookup_cache_type_(jl_datatype_t *type);
 void jl_cache_type_(jl_datatype_t *type);
+jl_svec_t *cache_rehash_set(jl_svec_t *a, size_t newsz);
 void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic) JL_NOTSAFEPOINT;
 jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic);
 jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *op, jl_value_t *rhs, int isatomic);
@@ -635,10 +757,12 @@ jl_expr_t *jl_exprn(jl_sym_t *head, size_t n);
 jl_function_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module);
 jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_t *module, jl_datatype_t *st);
 int jl_foreach_reachable_mtable(int (*visit)(jl_methtable_t *mt, void *env), void *env);
+int foreach_mtable_in_module(jl_module_t *m, int (*visit)(jl_methtable_t *mt, void *env), void *env);
 void jl_init_main_module(void);
 JL_DLLEXPORT int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT;
 jl_array_t *jl_get_loaded_modules(void);
 JL_DLLEXPORT int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree);
+int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2) JL_NOTSAFEPOINT;
 
 void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type);
 jl_value_t *jl_toplevel_eval_flex(jl_module_t *m, jl_value_t *e, int fast, int expanded);
@@ -650,38 +774,45 @@ jl_value_t *jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e,
                                           jl_code_info_t *src,
                                           jl_svec_t *sparam_vals);
 JL_DLLEXPORT int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT;
-jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule);
+jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr,
+                                       jl_module_t *inmodule, const char *file, int line);
 
-jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world);
+JL_DLLEXPORT jl_method_instance_t *jl_method_lookup_by_tt(jl_tupletype_t *tt, size_t world, jl_value_t *_mt);
+JL_DLLEXPORT jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world);
 
 jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value_t **args, size_t nargs);
 jl_value_t *jl_gf_invoke(jl_value_t *types, jl_value_t *f, jl_value_t **args, size_t nargs);
+JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup_worlds(jl_value_t *types, jl_value_t *mt, size_t world, size_t *min_world, size_t *max_world);
 JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
                                              size_t world, size_t *min_valid, size_t *max_valid, int *ambig);
+JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup_worlds(jl_value_t *types, jl_value_t *mt, size_t world, size_t *min_world, size_t *max_world);
+
 
-JL_DLLEXPORT jl_datatype_t *jl_first_argument_datatype(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+jl_datatype_t *jl_nth_argument_datatype(jl_value_t *argtypes JL_PROPAGATES_ROOT, int n) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_argument_datatype(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_methtable_t *jl_method_table_for(
     jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+jl_methtable_t *jl_kwmethod_table_for(
+    jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_methtable_t *jl_method_get_table(
-    jl_method_t *method) JL_NOTSAFEPOINT;
-jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAGATES_ROOT);
+    jl_method_t *method JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT int jl_pointer_egal(jl_value_t *t);
 JL_DLLEXPORT jl_value_t *jl_nth_slot_type(jl_value_t *sig JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 void jl_compute_field_offsets(jl_datatype_t *st);
-jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims,
-                                             int isunboxed, int hasptr, int isunion, int elsz);
 void jl_module_run_initializer(jl_module_t *m);
-jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
-JL_DLLEXPORT void jl_binding_deprecation_warning(jl_module_t *m, jl_binding_t *b);
+JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc);
+JL_DLLEXPORT void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *sym, jl_binding_t *b);
 extern jl_array_t *jl_module_init_order JL_GLOBALLY_ROOTED;
 extern htable_t jl_current_modules JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT jl_module_t *jl_precompile_toplevel_module JL_GLOBALLY_ROOTED;
-int jl_compile_extern_c(LLVMOrcThreadSafeModuleRef llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt);
+extern jl_genericmemory_t *jl_global_roots_list JL_GLOBALLY_ROOTED;
+extern jl_genericmemory_t *jl_global_roots_keyset JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert) JL_GLOBALLY_ROOTED;
 
 jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
-    jl_value_t *source,  jl_value_t **env, size_t nenv);
+    jl_value_t *source,  jl_value_t **env, size_t nenv, int do_compile);
 JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *source);
 
 // Each tuple can exist in one of 4 Vararg states:
@@ -696,11 +827,6 @@ typedef enum {
     JL_VARARG_UNBOUND = 3
 } jl_vararg_kind_t;
 
-STATIC_INLINE int jl_is_vararg(jl_value_t *v) JL_NOTSAFEPOINT
-{
-    return jl_typeof(v) == (jl_value_t*)jl_vararg_type;
-}
-
 STATIC_INLINE jl_value_t *jl_unwrap_vararg(jl_vararg_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
     assert(jl_is_vararg((jl_value_t*)v));
@@ -761,38 +887,35 @@ void jl_init_flisp(void);
 void jl_init_common_symbols(void);
 void jl_init_primitives(void) JL_GC_DISABLED;
 void jl_init_llvm(void);
-void jl_init_codegen(void);
 void jl_init_runtime_ccall(void);
 void jl_init_intrinsic_functions(void);
 void jl_init_intrinsic_properties(void);
 void jl_init_tasks(void) JL_GC_DISABLED;
-void jl_init_stack_limits(int ismaster, void **stack_hi, void **stack_lo);
+void jl_init_stack_limits(int ismaster, void **stack_hi, void **stack_lo) JL_NOTSAFEPOINT;
 jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi);
 void jl_init_serializer(void);
 void jl_gc_init(void);
 void jl_init_uv(void);
-void jl_init_thread_heap(jl_ptls_t ptls);
+void jl_init_thread_heap(jl_ptls_t ptls) JL_NOTSAFEPOINT;
 void jl_init_int32_int64_cache(void);
 JL_DLLEXPORT void jl_init_options(void);
 
-void jl_teardown_codegen(void);
-
 void jl_set_base_ctx(char *__stk);
 
 extern JL_DLLEXPORT ssize_t jl_tls_offset;
 extern JL_DLLEXPORT const int jl_tls_elf_support;
 void jl_init_threading(void);
 void jl_start_threads(void);
-int jl_effective_threads(void);
 
 // Whether the GC is running
 extern char *jl_safepoint_pages;
 STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr)
 {
     uintptr_t safepoint_addr = (uintptr_t)jl_safepoint_pages;
-    return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 3;
+    return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4;
 }
 extern _Atomic(uint32_t) jl_gc_running;
+extern _Atomic(uint32_t) jl_gc_disable_counter;
 // All the functions are safe to be called from within a signal handler
 // provided that the thread will not be interrupted by another asynchronous
 // signal.
@@ -815,7 +938,8 @@ void jl_safepoint_end_gc(void);
 // Wait for the GC to finish
 // This function does **NOT** modify the `gc_state` to inform the GC thread
 // The caller should set it **BEFORE** calling this function.
-void jl_safepoint_wait_gc(void);
+void jl_safepoint_wait_gc(void) JL_NOTSAFEPOINT;
+void jl_safepoint_wait_thread_resume(void) JL_NOTSAFEPOINT;
 
 // Set pending sigint and enable the mechanisms to deliver the sigint.
 void jl_safepoint_enable_sigint(void);
@@ -827,7 +951,7 @@ void jl_safepoint_defer_sigint(void);
 // Return `1` if the sigint should be delivered and `0` if there's no sigint
 // to be delivered.
 int jl_safepoint_consume_sigint(void);
-void jl_wake_libuv(void);
+void jl_wake_libuv(void) JL_NOTSAFEPOINT;
 
 void jl_set_pgcstack(jl_gcframe_t **) JL_NOTSAFEPOINT;
 #if defined(_OS_DARWIN_)
@@ -837,50 +961,28 @@ typedef DWORD jl_pgcstack_key_t;
 #else
 typedef jl_gcframe_t ***(*jl_pgcstack_key_t)(void) JL_NOTSAFEPOINT;
 #endif
-JL_DLLEXPORT void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k);
+JL_DLLEXPORT void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k) JL_NOTSAFEPOINT;
 
 #if !defined(_OS_WINDOWS_) && !defined(__APPLE__) && !defined(JL_DISABLE_LIBUNWIND)
 extern pthread_mutex_t in_signal_lock;
 #endif
 
-#if !defined(__clang_gcanalyzer__) && !defined(_OS_DARWIN_)
-static inline void jl_set_gc_and_wait(void)
+void jl_set_gc_and_wait(void); // n.b. not used on _OS_DARWIN_
+
+// Query if a Julia object is if a permalloc region (due to part of a sys- pkg-image)
+STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
 {
-    jl_task_t *ct = jl_current_task;
-    // reading own gc state doesn't need atomic ops since no one else
-    // should store to it.
-    int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
-    jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
-    jl_safepoint_wait_gc();
-    jl_atomic_store_release(&ct->ptls->gc_state, state);
+    return jl_image_relocs.len;
 }
-#endif
-void jl_gc_set_permalloc_region(void *start, void *end);
 
-typedef struct {
-    LLVMOrcThreadSafeModuleRef TSM;
-    LLVMValueRef F;
-} jl_llvmf_dump_t;
+size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world,
-        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary);
-JL_DLLEXPORT void jl_get_llvmf_defn(jl_llvmf_dump_t* dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params);
-JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary);
-JL_DLLEXPORT jl_value_t *jl_dump_function_ir(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo);
-JL_DLLEXPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary);
-
-void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy);
-void jl_dump_native(void *native_code,
-        const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname,
-        const char *sysimg_data, size_t sysimg_len);
-int32_t jl_get_llvm_gv(void *native_code, jl_value_t *p) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode,
-        int32_t *func_idx, int32_t *specfunc_idx);
+uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;
 
 // the first argument to jl_idtable_rehash is used to return a value
 // make sure it is rooted if it is used after the function returns
-JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz);
-_Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_genericmemory_t *jl_idtable_rehash(jl_genericmemory_t *a, size_t newsz);
+_Atomic(jl_value_t*) *jl_table_peek_bp(jl_genericmemory_t *a, jl_value_t *key) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t*);
 
@@ -888,21 +990,24 @@ JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *mo
 JL_DLLEXPORT jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache);
 jl_method_instance_t *jl_get_specialized(jl_method_t *m, jl_value_t *types, jl_svec_t *sp);
 JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_method_instance_t *li JL_PROPAGATES_ROOT, size_t min_world, size_t max_world);
-JL_DLLEXPORT jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
-JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *type, size_t world);
+JL_DLLEXPORT jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_value_t *type, size_t world);
 JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(
     jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams);
 jl_method_instance_t *jl_specializations_get_or_insert(jl_method_instance_t *mi_ins);
-JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_method_instance_t *caller);
+JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_value_t *invokesig, jl_method_instance_t *caller);
 JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_value_t *caller);
+JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
+                                     jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
+JL_DLLEXPORT extern jl_value_t *(*const jl_rettype_inferred_addr)(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t min_world, size_t max_world) JL_NOTSAFEPOINT;
 
 uint32_t jl_module_next_counter(jl_module_t *m) JL_NOTSAFEPOINT;
 jl_tupletype_t *arg_type_tuple(jl_value_t *arg1, jl_value_t **args, size_t nargs);
 
 JL_DLLEXPORT int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT;
 
-jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
-                     size_t lineno, size_t offset, jl_value_t *options);
+JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
+                                  size_t lineno, size_t offset, jl_value_t *options);
 
 //--------------------------------------------------
 // Backtraces
@@ -928,7 +1033,7 @@ jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
 // 2. An "extended entry": a mixture of raw data and pointers to julia objects
 //    which must be treated as GC roots.
 //
-// A single extended entry is seralized using multiple elements from the raw
+// A single extended entry is serialized using multiple elements from the raw
 // buffer; if `e` is the pointer to the first slot we have:
 //
 //   e[0]  JL_BT_NON_PTR_ENTRY  - Special marker to distinguish extended entries
@@ -1019,8 +1124,6 @@ typedef struct {
     int inlined;
 } jl_frame_t;
 
-// Might be called from unmanaged thread
-uint64_t jl_getUnwindInfo(uint64_t dwBase);
 #ifdef _OS_WINDOWS_
 #include <dbghelp.h>
 JL_DLLEXPORT EXCEPTION_DISPOSITION NTAPI __julia_personality(
@@ -1039,7 +1142,9 @@ extern JL_DLLEXPORT uv_mutex_t jl_in_stackwalk;
 #elif !defined(JL_DISABLE_LIBUNWIND)
 // This gives unwind only local unwinding options ==> faster code
 #  define UNW_LOCAL_ONLY
+#pragma GCC visibility push(default)
 #  include <libunwind.h>
+#pragma GCC visibility pop
 typedef unw_context_t bt_context_t;
 typedef unw_cursor_t bt_cursor_t;
 #  if (!defined(SYSTEM_LIBUNWIND) || UNW_VERSION_MAJOR > 1 ||   \
@@ -1062,15 +1167,17 @@ size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t
 size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx, jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT;
 #endif
 JL_DLLEXPORT jl_value_t *jl_get_backtrace(void);
-void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct);
-JL_DLLEXPORT void jl_raise_debugger(void);
-int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
+void jl_critical_error(int sig, int si_code, bt_context_t *context, jl_task_t *ct);
+JL_DLLEXPORT void jl_raise_debugger(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gdblookup(void* ip) JL_NOTSAFEPOINT;
 void jl_print_native_codeloc(uintptr_t ip) JL_NOTSAFEPOINT;
 void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_data) JL_NOTSAFEPOINT;
 #ifdef _OS_WINDOWS_
 JL_DLLEXPORT void jl_refresh_dbg_module_list(void);
 #endif
+int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) JL_NOTSAFEPOINT;
+void jl_thread_resume(int tid) JL_NOTSAFEPOINT;
+
 // *to is NULL or malloc'd pointer, from is allowed to be NULL
 STATIC_INLINE char *jl_copy_str(char **to, const char *from) JL_NOTSAFEPOINT
 {
@@ -1127,40 +1234,48 @@ STATIC_INLINE size_t jl_excstack_next(jl_excstack_t *stack, size_t itr) JL_NOTSA
     return itr-2 - jl_excstack_bt_size(stack, itr);
 }
 // Exception stack manipulation
-void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
+void jl_push_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
                       jl_value_t *exception JL_ROOTED_ARGUMENT,
                       jl_bt_element_t *bt_data, size_t bt_size);
 
 //--------------------------------------------------
 // congruential random number generator
 // for a small amount of thread-local randomness
-STATIC_INLINE void unbias_cong(uint64_t max, uint64_t *unbias) JL_NOTSAFEPOINT
-{
-    *unbias = UINT64_MAX - ((UINT64_MAX % max) + 1);
-}
-STATIC_INLINE uint64_t cong(uint64_t max, uint64_t unbias, uint64_t *seed) JL_NOTSAFEPOINT
+
+STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT
 {
-    while ((*seed = 69069 * (*seed) + 362437) > unbias)
-        ;
-    return *seed % max;
+    if (max == 0)
+        return 0;
+    uint64_t mask = ~(uint64_t)0;
+    --max;
+    mask >>= __builtin_clzll(max|1);
+    uint64_t x;
+    do {
+        *seed = 69069 * (*seed) + 362437;
+        x = *seed & mask;
+    } while (x > max);
+    return x;
 }
 JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_srand(uint64_t) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_init_rand(void);
 
+JL_DLLEXPORT extern void *jl_exe_handle;
+JL_DLLEXPORT extern void *jl_libjulia_handle;
 JL_DLLEXPORT extern void *jl_libjulia_internal_handle;
 JL_DLLEXPORT extern void *jl_RTLD_DEFAULT_handle;
+
 #if defined(_OS_WINDOWS_)
-JL_DLLEXPORT extern void *jl_exe_handle;
-JL_DLLEXPORT extern void *jl_libjulia_handle;
 JL_DLLEXPORT extern const char *jl_crtdll_basename;
 extern void *jl_ntdll_handle;
 extern void *jl_kernel32_handle;
 extern void *jl_crtdll_handle;
 extern void *jl_winsock_handle;
+void win32_formatmessage(DWORD code, char *reason, int len) JL_NOTSAFEPOINT;
 #endif
 
 JL_DLLEXPORT void *jl_get_library_(const char *f_lib, int throw_err);
+void *jl_find_dynamic_library_by_addr(void *symbol);
 #define jl_get_library(f_lib) jl_get_library_(f_lib, 1)
 JL_DLLEXPORT void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *hnd);
 JL_DLLEXPORT void *jl_lazy_load_and_lookup(jl_value_t *lib_val, const char *f_name);
@@ -1170,26 +1285,22 @@ JL_DLLEXPORT jl_value_t *jl_get_cfunction_trampoline(
     jl_unionall_t *env, jl_value_t **vals);
 
 
-// Windows only
+// Special filenames used to refer to internal julia libraries
 #define JL_EXE_LIBNAME                  ((const char*)1)
 #define JL_LIBJULIA_DL_LIBNAME          ((const char*)2)
 #define JL_LIBJULIA_INTERNAL_DL_LIBNAME ((const char*)3)
-JL_DLLEXPORT const char *jl_dlfind_win32(const char *name);
+JL_DLLEXPORT const char *jl_dlfind(const char *name);
 
 // libuv wrappers:
 JL_DLLEXPORT int jl_fs_rename(const char *src_path, const char *dst_path);
 
-#ifdef SEGV_EXCEPTION
-extern JL_DLLEXPORT jl_value_t *jl_segv_exception;
-#endif
-
 // -- Runtime intrinsics -- //
 JL_DLLEXPORT const char *jl_intrinsic_name(int f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT unsigned jl_intrinsic_nargs(int f) JL_NOTSAFEPOINT;
 
 STATIC_INLINE int is_valid_intrinsic_elptr(jl_value_t *ety)
 {
-    return ety == (jl_value_t*)jl_any_type || (jl_is_concrete_type(ety) && !jl_is_layout_opaque(((jl_datatype_t*)ety)->layout));
+    return ety == (jl_value_t*)jl_any_type || (jl_is_concrete_type(ety) && !jl_is_layout_opaque(((jl_datatype_t*)ety)->layout) && !jl_is_array_type(ety));
 }
 JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v);
 JL_DLLEXPORT jl_value_t *jl_pointerref(jl_value_t *p, jl_value_t *i, jl_value_t *align);
@@ -1220,7 +1331,6 @@ JL_DLLEXPORT jl_value_t *jl_add_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_sub_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_mul_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_div_float(jl_value_t *a, jl_value_t *b);
-JL_DLLEXPORT jl_value_t *jl_rem_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_fma_float(jl_value_t *a, jl_value_t *b, jl_value_t *c);
 JL_DLLEXPORT jl_value_t *jl_muladd_float(jl_value_t *a, jl_value_t *b, jl_value_t *c);
 
@@ -1285,10 +1395,9 @@ JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *a);
 JL_DLLEXPORT int jl_stored_inline(jl_value_t *el_type);
 JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a);
-JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i);
 JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary);
 
-JL_DLLEXPORT uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uintptr_t jl_object_id_(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_set_next_task(jl_task_t *task) JL_NOTSAFEPOINT;
 
 // -- synchronization utilities -- //
@@ -1297,15 +1406,25 @@ extern jl_mutex_t typecache_lock;
 extern JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
 
 #if defined(__APPLE__)
-void jl_mach_gc_end(void);
+void jl_mach_gc_end(void) JL_NOTSAFEPOINT;
+void jl_safepoint_resume_thread_mach(jl_ptls_t ptls2, int16_t tid2) JL_NOTSAFEPOINT;
 #endif
 
 // -- smallintset.c -- //
 
-typedef uint_t (*smallintset_hash)(size_t val, jl_svec_t *data);
-typedef int (*smallintset_eq)(size_t val, const void *key, jl_svec_t *data, uint_t hv);
-ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *key, jl_svec_t *data, uint_t hv);
-void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data);
+typedef uint_t (*smallintset_hash)(size_t val, jl_value_t *data);
+typedef int (*smallintset_eq)(size_t val, const void *key, jl_value_t *data, uint_t hv);
+ssize_t jl_smallintset_lookup(jl_genericmemory_t *cache, smallintset_eq eq, const void *key, jl_value_t *data, uint_t hv, int pop);
+void jl_smallintset_insert(_Atomic(jl_genericmemory_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_value_t *data);
+jl_genericmemory_t* smallintset_rehash(jl_genericmemory_t* a, smallintset_hash hash, jl_value_t *data, size_t newsz, size_t np);
+void smallintset_empty(const jl_genericmemory_t *a) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT jl_genericmemory_t *jl_idset_rehash(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, size_t newsz);
+JL_DLLEXPORT ssize_t jl_idset_peek_bp(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT;
+jl_value_t *jl_idset_get(jl_genericmemory_t *keys JL_PROPAGATES_ROOT, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_genericmemory_t *jl_idset_put_key(jl_genericmemory_t *keys, jl_value_t *key, ssize_t *newidx);
+JL_DLLEXPORT jl_genericmemory_t *jl_idset_put_idx(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, ssize_t idx);
+JL_DLLEXPORT ssize_t jl_idset_pop(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT;
 
 // -- typemap.c -- //
 
@@ -1357,20 +1476,25 @@ struct typemap_intersection_env {
     jl_typemap_intersection_visitor_fptr const fptr; // fptr to call on a match
     jl_value_t *const type; // type to match
     jl_value_t *const va; // the tparam0 for the vararg in type, if applicable (or NULL)
+    size_t search_slurp;
     // output values
+    size_t min_valid;
+    size_t max_valid;
     jl_value_t *ti; // intersection type
     jl_svec_t *env; // intersection env (initialize to null to perform intersection without an environment)
     int issubty;    // if `a <: b` is true in `intersect(a,b)`
 };
 int jl_typemap_intersection_visitor(jl_typemap_t *a, int offs, struct typemap_intersection_env *closure);
+void typemap_slurp_search(jl_typemap_entry_t *ml, struct typemap_intersection_env *closure);
 
 // -- simplevector.c -- //
 
-// For codegen only.
-JL_DLLEXPORT size_t (jl_svec_len)(jl_svec_t *t) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int8_t jl_svec_isassigned(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t i) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_value_t *jl_svec_ref(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t i);
+// check whether the specified number of arguments is compatible with the
+// specified number of parameters of the tuple type
+JL_DLLEXPORT int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_NOTSAFEPOINT;
 
+JL_DLLEXPORT jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0);
+JL_DLLEXPORT jl_value_t *jl_argtype_with_function_type(jl_value_t *ft JL_MAYBE_UNROOTED, jl_value_t *types0);
 
 JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type);
 
@@ -1389,8 +1513,6 @@ STATIC_INLINE void *jl_get_frame_addr(void)
 #endif
 }
 
-JL_DLLEXPORT jl_array_t *jl_array_cconvert_cstring(jl_array_t *a);
-
 // Log `msg` to the current logger by calling CoreLogging.logmsg_shim() on the
 // julia side. If any of module, group, id, file or line are NULL, these will
 // be passed to the julia side as `nothing`.  If `kwargs` is NULL an empty set
@@ -1409,6 +1531,7 @@ extern JL_DLLEXPORT jl_sym_t *jl_top_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_module_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_slot_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_export_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_public_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_import_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_toplevel_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_quote_sym;
@@ -1420,6 +1543,7 @@ extern JL_DLLEXPORT jl_sym_t *jl_return_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_lineinfo_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_lambda_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_assign_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_binding_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_globalref_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_do_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_method_sym;
@@ -1440,6 +1564,7 @@ extern JL_DLLEXPORT jl_sym_t *jl_thunk_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_foreigncall_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_as_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_global_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_local_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_list_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_dot_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_newvar_sym;
@@ -1447,7 +1572,6 @@ extern JL_DLLEXPORT jl_sym_t *jl_boundscheck_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_inbounds_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_copyast_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_cfunction_sym;
-extern JL_DLLEXPORT jl_sym_t *jl_pure_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_loopinfo_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_meta_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_inert_sym;
@@ -1465,6 +1589,7 @@ extern JL_DLLEXPORT jl_sym_t *jl_aggressive_constprop_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_no_constprop_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_purity_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_nospecialize_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_nospecializeinfer_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_macrocall_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_colon_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_hygienicscope_sym;
@@ -1478,6 +1603,8 @@ extern JL_DLLEXPORT jl_sym_t *jl_aliasscope_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_popaliasscope_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_optlevel_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_thismodule_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_eval_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_include_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_atom_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_statement_sym;
 extern JL_DLLEXPORT jl_sym_t *jl_all_sym;
@@ -1497,13 +1624,10 @@ extern JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
 JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing);
 JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing);
 
-struct _jl_sysimg_fptrs_t;
+struct _jl_image_fptrs_t;
 
-void jl_register_fptrs(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t *fptrs,
-                       jl_method_instance_t **linfos, size_t n);
-void jl_write_coverage_data(const char*);
+JL_DLLEXPORT void jl_write_coverage_data(const char*);
 void jl_write_malloc_log(void);
-void jl_write_compiler_output(void);
 
 #if jl_has_builtin(__builtin_unreachable) || defined(_COMPILER_GCC_) || defined(_COMPILER_INTEL_)
 #  define jl_unreachable() __builtin_unreachable()
@@ -1534,7 +1658,7 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
 #endif // _COMPILER_GCC_
 
 #ifdef __clang_gcanalyzer__
-  // Not a safepoint (so it dosn't free other values), but an artificial use.
+  // Not a safepoint (so it doesn't free other values), but an artificial use.
   // Usually this is unnecessary because the analyzer can see all real uses,
   // but sometimes real uses are harder for the analyzer to see, or it may
   // give up before it sees it, so this can be helpful to be explicit.
@@ -1543,15 +1667,75 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
   #define JL_GC_ASSERT_LIVE(x) (void)(x)
 #endif
 
-float __gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT;
-uint16_t __gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
+#ifdef _OS_WINDOWS_
+// On Windows, weak symbols do not default to 0 due to a GCC bug
+// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90826), use symbol
+// aliases with a known value instead.
+#define JL_WEAK_SYMBOL_OR_ALIAS_DEFAULT(sym) __attribute__((weak,alias(#sym)))
+#define JL_WEAK_SYMBOL_DEFAULT(sym) &sym
+#else
+#define JL_WEAK_SYMBOL_OR_ALIAS_DEFAULT(sym) __attribute__((weak))
+#define JL_WEAK_SYMBOL_DEFAULT(sym) NULL
+#endif
+
+//JL_DLLEXPORT float julia__gnu_h2f_ieee(half param) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT half julia__gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT half julia__truncdfhf2(double param) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT float julia__truncsfbf2(float param) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT float julia__truncdfbf2(double param) JL_NOTSAFEPOINT;
+//JL_DLLEXPORT double julia__extendhfdf2(half n) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT uint32_t jl_crc32c(uint32_t crc, const char *buf, size_t len);
+
+// -- exports from codegen -- //
+
+#define IR_FLAG_INBOUNDS 0x01
+
+JL_DLLIMPORT jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
+JL_DLLIMPORT void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec);
+JL_DLLIMPORT void jl_generate_fptr_for_oc_wrapper(jl_code_instance_t *unspec);
+JL_DLLIMPORT int jl_compile_extern_c(LLVMOrcThreadSafeModuleRef llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt);
+
+typedef struct {
+    LLVMOrcThreadSafeModuleRef TSM;
+    LLVMValueRef F;
+} jl_llvmf_dump_t;
+
+JL_DLLIMPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world,
+        char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary);
+JL_DLLIMPORT void jl_get_llvmf_defn(jl_llvmf_dump_t* dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params);
+JL_DLLIMPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char emit_mc, const char* asm_variant, const char *debuginfo, char binary);
+JL_DLLIMPORT jl_value_t *jl_dump_function_ir(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo);
+JL_DLLIMPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char emit_mc, const char* asm_variant, const char *debuginfo, char binary, char raw);
+
+JL_DLLIMPORT void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy, int imaging_mode, int cache, size_t world);
+JL_DLLIMPORT void jl_dump_native(void *native_code,
+        const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname,
+        ios_t *z, ios_t *s, jl_emission_params_t *params);
+JL_DLLIMPORT void jl_get_llvm_gvs(void *native_code, arraylist_t *gvs);
+JL_DLLIMPORT void jl_get_llvm_external_fns(void *native_code, arraylist_t *gvs);
+JL_DLLIMPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode,
+        int32_t *func_idx, int32_t *specfunc_idx);
+JL_DLLIMPORT void jl_register_fptrs(uint64_t image_base, const struct _jl_image_fptrs_t *fptrs,
+                                    jl_method_instance_t **linfos, size_t n);
+
+JL_DLLIMPORT void jl_init_codegen(void);
+JL_DLLIMPORT void jl_teardown_codegen(void) JL_NOTSAFEPOINT;
+JL_DLLIMPORT int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
+// n.b. this might be called from unmanaged thread:
+JL_DLLIMPORT uint64_t jl_getUnwindInfo(uint64_t dwBase);
 
 #ifdef __cplusplus
 }
 #endif
 
+#pragma GCC visibility pop
+
+
 #ifdef USE_DTRACE
-#include "uprobes.h.gen"
+// Generated file, needs to be searched in include paths so that the builddir
+// retains priority
+#include <uprobes.h.gen>
 
 // uprobes.h.gen on systems with DTrace, is auto-generated to include
 // `JL_PROBE_{PROBE}` and `JL_PROBE_{PROBE}_ENABLED()` macros for every probe
diff --git a/src/julia_locks.h b/src/julia_locks.h
index 234ff1fa8c0db..47e258f69aab2 100644
--- a/src/julia_locks.h
+++ b/src/julia_locks.h
@@ -3,8 +3,6 @@
 #ifndef JL_LOCKS_H
 #define JL_LOCKS_H
 
-#include "julia_assert.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -17,6 +15,7 @@ extern "C" {
 // The JL_LOCK* and JL_UNLOCK* macros are no-op for non-threading build
 // while the jl_mutex_* functions are always locking and unlocking the locks.
 
+JL_DLLEXPORT void _jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint);
 JL_DLLEXPORT void _jl_mutex_lock(jl_task_t *self, jl_mutex_t *lock);
 JL_DLLEXPORT int _jl_mutex_trylock_nogc(jl_task_t *self, jl_mutex_t *lock) JL_NOTSAFEPOINT;
@@ -29,7 +28,7 @@ static inline void jl_mutex_wait(jl_mutex_t *lock, int safepoint)
     _jl_mutex_wait(jl_current_task, lock, safepoint);
 }
 
-static inline void jl_mutex_lock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT
+static inline void jl_mutex_lock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER
 {
 #ifndef __clang_gcanalyzer__
     // Hide this body from the analyzer, otherwise it complains that we're calling
@@ -66,7 +65,7 @@ static inline void jl_mutex_lock(jl_mutex_t *lock)
     _jl_mutex_lock(jl_current_task, lock);
 }
 
-static inline int jl_mutex_trylock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT
+static inline int jl_mutex_trylock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER
 {
     return _jl_mutex_trylock_nogc(jl_current_task, lock);
 }
@@ -81,18 +80,17 @@ static inline void jl_mutex_unlock(jl_mutex_t *lock)
     _jl_mutex_unlock(jl_current_task, lock);
 }
 
-static inline void jl_mutex_unlock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT
+static inline void jl_mutex_unlock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE
 {
     _jl_mutex_unlock_nogc(lock);
 }
 
-static inline void jl_mutex_init(jl_mutex_t *lock) JL_NOTSAFEPOINT
+static inline void jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT
 {
-    jl_atomic_store_relaxed(&lock->owner, (jl_task_t*)NULL);
-    lock->count = 0;
+    _jl_mutex_init(lock, name);
 }
 
-#define JL_MUTEX_INIT(m) jl_mutex_init(m)
+#define JL_MUTEX_INIT(m, name) jl_mutex_init(m, name)
 #define JL_LOCK(m) jl_mutex_lock(m)
 #define JL_UNLOCK(m) jl_mutex_unlock(m)
 #define JL_LOCK_NOGC(m) jl_mutex_lock_nogc(m)
diff --git a/src/julia_threads.h b/src/julia_threads.h
index 6f1c4e50d4e95..292c11f61d60d 100644
--- a/src/julia_threads.h
+++ b/src/julia_threads.h
@@ -5,6 +5,7 @@
 #define JL_THREADS_H
 
 #include "julia_atomics.h"
+#include "work-stealing-queue.h"
 #ifndef _OS_WINDOWS_
 #include "pthread.h"
 #endif
@@ -79,6 +80,7 @@ typedef struct {
     void *stacktop;
 } _jl_ucontext_t;
 #endif
+#pragma GCC visibility push(default)
 #if defined(JL_HAVE_UNW_CONTEXT)
 #define UNW_LOCAL_ONLY
 #include <libunwind.h>
@@ -88,6 +90,7 @@ typedef unw_context_t _jl_ucontext_t;
 #include <ucontext.h>
 typedef ucontext_t _jl_ucontext_t;
 #endif
+#pragma GCC visibility pop
 #endif
 
 typedef struct {
@@ -98,12 +101,15 @@ typedef struct {
 #if defined(_COMPILER_TSAN_ENABLED_)
     void *tsan_state;
 #endif
+#if defined(_COMPILER_ASAN_ENABLED_)
+    void *asan_fake_stack;
+#endif
 } jl_ucontext_t;
 
 
 // handle to reference an OS thread
 #ifdef _OS_WINDOWS_
-typedef DWORD jl_thread_t;
+typedef HANDLE jl_thread_t;
 #else
 typedef pthread_t jl_thread_t;
 #endif
@@ -124,20 +130,21 @@ typedef struct {
 
 typedef struct {
     _Atomic(int64_t) allocd;
-    _Atomic(int64_t) freed;
+    _Atomic(int64_t) pool_live_bytes;
     _Atomic(uint64_t) malloc;
     _Atomic(uint64_t) realloc;
     _Atomic(uint64_t) poolalloc;
     _Atomic(uint64_t) bigalloc;
-    _Atomic(uint64_t) freecall;
+    _Atomic(int64_t) free_acc;
+    _Atomic(uint64_t) alloc_acc;
 } jl_thread_gc_num_t;
 
 typedef struct {
     // variable for tracking weak references
-    arraylist_t weak_refs;
+    small_arraylist_t weak_refs;
     // live tasks started on this thread
     // that are holding onto a stack from the pool
-    arraylist_t live_tasks;
+    small_arraylist_t live_tasks;
 
     // variables for tracking malloc'd arrays
     struct _mallocarray_t *mallocarrays;
@@ -147,7 +154,6 @@ typedef struct {
     struct _bigval_t *big_objects;
 
     // variables for tracking "remembered set"
-    arraylist_t rem_bindings;
     arraylist_t _remset[2]; // contains jl_value_t*
     // lower bound of the number of pointers inside remembered values
     int remset_nptr;
@@ -155,29 +161,18 @@ typedef struct {
     arraylist_t *last_remset;
 
     // variables for allocating objects from pools
-#ifdef _P64
-#  define JL_GC_N_POOLS 49
-#elif MAX_ALIGN == 8
-#  define JL_GC_N_POOLS 50
-#else
-#  define JL_GC_N_POOLS 51
-#endif
-    jl_gc_pool_t norm_pools[JL_GC_N_POOLS];
+#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
+    jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS];
 
 #define JL_N_STACK_POOLS 16
-    arraylist_t free_stacks[JL_N_STACK_POOLS];
+    small_arraylist_t free_stacks[JL_N_STACK_POOLS];
 } jl_thread_heap_t;
 
-// Cache of thread local change to global metadata during GC
-// This is sync'd after marking.
-typedef union _jl_gc_mark_data jl_gc_mark_data_t;
-
 typedef struct {
-    void **pc; // Current stack address for the pc (up growing)
-    jl_gc_mark_data_t *data; // Current stack address for the data (up growing)
-    void **pc_start; // Cached value of `gc_cache->pc_stack`
-    void **pc_end; // Cached value of `gc_cache->pc_stack_end`
-} jl_gc_mark_sp_t;
+    ws_queue_t chunk_queue;
+    ws_queue_t ptr_queue;
+    arraylist_t reclaim_set;
+} jl_gc_markqueue_t;
 
 typedef struct {
     // thread local increment of `perm_scanned_bytes`
@@ -195,12 +190,14 @@ typedef struct {
     // this makes sure that a single objects can only appear once in
     // the lists (the mark bit cannot be flipped to `0` without sweeping)
     void *big_obj[1024];
-    void **pc_stack;
-    void **pc_stack_end;
-    jl_gc_mark_data_t *data_stack;
 } jl_gc_mark_cache_t;
 
 struct _jl_bt_element_t;
+struct _jl_gc_pagemeta_t;
+
+typedef struct {
+    _Atomic(struct _jl_gc_pagemeta_t *) bottom;
+} jl_gc_page_stack_t;
 
 // This includes all the thread local states we care about for a thread.
 // Changes to TLS field types must be reflected in codegen.
@@ -209,7 +206,7 @@ typedef struct _jl_tls_states_t {
     int16_t tid;
     int8_t threadpoolid;
     uint64_t rngseed;
-    volatile size_t *safepoint;
+    _Atomic(volatile size_t *) safepoint; // may be changed to the suspend page by any thread
     _Atomic(int8_t) sleep_check_state; // read/write from foreign threads
     // Whether it is safe to execute GC at the same time.
 #define JL_GC_STATE_WAITING 1
@@ -223,9 +220,9 @@ typedef struct _jl_tls_states_t {
     // statements is prohibited from certain
     // callbacks (such as generated functions)
     // as it may make compilation undecidable
-    int8_t in_pure_callback;
-    int8_t in_finalizer;
-    int8_t disable_gc;
+    int16_t in_pure_callback;
+    int16_t in_finalizer;
+    int16_t disable_gc;
     // Counter to disable finalizer **on the current thread**
     int finalizers_inhibited;
     jl_thread_heap_t heap; // this is very large, and the offset is baked into codegen
@@ -260,12 +257,17 @@ typedef struct _jl_tls_states_t {
     int needs_resetstkoflw;
 #else
     void *signal_stack;
+    size_t signal_stack_size;
 #endif
     jl_thread_t system_id;
+    _Atomic(int16_t) suspend_count;
     arraylist_t finalizers;
+    jl_gc_page_stack_t page_metadata_allocd;
+    jl_gc_page_stack_t page_metadata_buffered;
+    jl_gc_markqueue_t mark_queue;
     jl_gc_mark_cache_t gc_cache;
     arraylist_t sweep_objs;
-    jl_gc_mark_sp_t gc_mark_sp;
+    _Atomic(int64_t) gc_sweeps_requested;
     // Saved exception for previous *external* API call or NULL if cleared.
     // Access via jl_exception_occurred().
     struct _jl_value_t *previous_exception;
@@ -281,13 +283,13 @@ typedef struct _jl_tls_states_t {
     )
 
     // some hidden state (usually just because we don't have the type's size declaration)
-#ifdef LIBRARY_EXPORTS
+#ifdef JL_LIBRARY_EXPORTS
     uv_mutex_t sleep_lock;
     uv_cond_t wake_signal;
 #endif
 } jl_tls_states_t;
 
-#ifndef LIBRARY_EXPORTS
+#ifndef JL_LIBRARY_EXPORTS
 // deprecated (only for external consumers)
 JL_DLLEXPORT void *jl_get_ptls_states(void);
 #endif
@@ -295,23 +297,28 @@ JL_DLLEXPORT void *jl_get_ptls_states(void);
 // Update codegen version in `ccall.cpp` after changing either `pause` or `wake`
 #ifdef __MIC__
 #  define jl_cpu_pause() _mm_delay_64(100)
+#  define jl_cpu_suspend() _mm_delay_64(100)
 #  define jl_cpu_wake() ((void)0)
 #  define JL_CPU_WAKE_NOOP 1
 #elif defined(_CPU_X86_64_) || defined(_CPU_X86_)  /* !__MIC__ */
 #  define jl_cpu_pause() _mm_pause()
+#  define jl_cpu_suspend() _mm_pause()
 #  define jl_cpu_wake() ((void)0)
 #  define JL_CPU_WAKE_NOOP 1
 #elif defined(_CPU_AARCH64_) || (defined(_CPU_ARM_) && __ARM_ARCH >= 7)
-#  define jl_cpu_pause() __asm__ volatile ("wfe" ::: "memory")
+#  define jl_cpu_pause() __asm__ volatile ("isb" ::: "memory")
+#  define jl_cpu_suspend() __asm__ volatile ("wfe" ::: "memory")
 #  define jl_cpu_wake() __asm__ volatile ("sev" ::: "memory")
 #  define JL_CPU_WAKE_NOOP 0
 #else
 #  define jl_cpu_pause() ((void)0)
+#  define jl_cpu_suspend() ((void)0)
 #  define jl_cpu_wake() ((void)0)
 #  define JL_CPU_WAKE_NOOP 1
 #endif
 
 JL_DLLEXPORT void (jl_cpu_pause)(void);
+JL_DLLEXPORT void (jl_cpu_suspend)(void);
 JL_DLLEXPORT void (jl_cpu_wake)(void);
 
 #ifdef __clang_gcanalyzer__
@@ -323,26 +330,26 @@ void jl_sigint_safepoint(jl_ptls_t tls);
 // This triggers a SegFault when we are in GC
 // Assign it to a variable to make sure the compiler emit the load
 // and to avoid Clang warning for -Wunused-volatile-lvalue
-#define jl_gc_safepoint_(ptls) do {                     \
-        jl_signal_fence();                              \
-        size_t safepoint_load = *ptls->safepoint;       \
-        jl_signal_fence();                              \
-        (void)safepoint_load;                           \
+#define jl_gc_safepoint_(ptls) do {                                            \
+        jl_signal_fence();                                                     \
+        size_t safepoint_load = jl_atomic_load_relaxed(&ptls->safepoint)[0];   \
+        jl_signal_fence();                                                     \
+        (void)safepoint_load;                                                  \
     } while (0)
-#define jl_sigint_safepoint(ptls) do {                  \
-        jl_signal_fence();                              \
-        size_t safepoint_load = ptls->safepoint[-1];    \
-        jl_signal_fence();                              \
-        (void)safepoint_load;                           \
+#define jl_sigint_safepoint(ptls) do {                                         \
+        jl_signal_fence();                                                     \
+        size_t safepoint_load = jl_atomic_load_relaxed(&ptls->safepoint)[-1];  \
+        jl_signal_fence();                                                     \
+        (void)safepoint_load;                                                  \
     } while (0)
 #endif
 STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state,
                                      int8_t old_state)
 {
     jl_atomic_store_release(&ptls->gc_state, state);
-    // A safe point is required if we transition from GC-safe region to
-    // non GC-safe region.
-    if (old_state && !state)
+    if (state == JL_GC_STATE_SAFE && old_state == 0)
+        jl_gc_safepoint_(ptls);
+    if (state == 0 && old_state == JL_GC_STATE_SAFE)
         jl_gc_safepoint_(ptls);
     return old_state;
 }
@@ -352,23 +359,23 @@ STATIC_INLINE int8_t jl_gc_state_save_and_set(jl_ptls_t ptls,
     return jl_gc_state_set(ptls, state, jl_atomic_load_relaxed(&ptls->gc_state));
 }
 #ifdef __clang_gcanalyzer__
-int8_t jl_gc_unsafe_enter(jl_ptls_t ptls); // Can be a safepoint
-int8_t jl_gc_unsafe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT;
-int8_t jl_gc_safe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT;
-int8_t jl_gc_safe_leave(jl_ptls_t ptls, int8_t state); // Can be a safepoint
+int8_t jl_gc_unsafe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE; // this could be a safepoint, but we will assume it is not
+void jl_gc_unsafe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
+int8_t jl_gc_safe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
+void jl_gc_safe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT_LEAVE; // this might not be a safepoint, but we have to assume it could be (statically)
 #else
 #define jl_gc_unsafe_enter(ptls) jl_gc_state_save_and_set(ptls, 0)
 #define jl_gc_unsafe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), 0))
 #define jl_gc_safe_enter(ptls) jl_gc_state_save_and_set(ptls, JL_GC_STATE_SAFE)
 #define jl_gc_safe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), JL_GC_STATE_SAFE))
 #endif
-JL_DLLEXPORT void (jl_gc_safepoint)(void);
 
 JL_DLLEXPORT void jl_gc_enable_finalizers(struct _jl_task_t *ct, int on);
-JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void);
+JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void);
 JL_DLLEXPORT void jl_gc_run_pending_finalizers(struct _jl_task_t *ct);
 extern JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers;
+JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_wakeup_thread(int16_t tid);
 
diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp
index b2aded025c0d1..953ecc1830142 100644
--- a/src/llvm-alloc-helpers.cpp
+++ b/src/llvm-alloc-helpers.cpp
@@ -2,11 +2,13 @@
 
 #include "llvm-version.h"
 #include "llvm-alloc-helpers.h"
-#include "codegen_shared.h"
+#include "llvm-codegen-shared.h"
 #include "julia_assert.h"
 
 #include <llvm/IR/IntrinsicInst.h>
 
+#define DEBUG_TYPE "escape-analysis"
+
 using namespace llvm;
 using namespace jl_alloc;
 
@@ -110,42 +112,69 @@ bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset,
     return true;
 }
 
-JL_USED_FUNC void AllocUseInfo::dump()
+JL_USED_FUNC void AllocUseInfo::dump(llvm::raw_ostream &OS)
 {
-    jl_safe_printf("escaped: %d\n", escaped);
-    jl_safe_printf("addrescaped: %d\n", addrescaped);
-    jl_safe_printf("returned: %d\n", returned);
-    jl_safe_printf("haserror: %d\n", haserror);
-    jl_safe_printf("hasload: %d\n", hasload);
-    jl_safe_printf("haspreserve: %d\n", haspreserve);
-    jl_safe_printf("hasunknownmem: %d\n", hasunknownmem);
-    jl_safe_printf("hastypeof: %d\n", hastypeof);
-    jl_safe_printf("refload: %d\n", refload);
-    jl_safe_printf("refstore: %d\n", refstore);
-    jl_safe_printf("Uses: %d\n", (unsigned)uses.size());
+    OS << "AllocUseInfo:\n";
+    OS << "escaped: " << escaped << '\n';
+    OS << "addrescaped: " << addrescaped << '\n';
+    OS << "returned: " << returned << '\n';
+    OS << "haserror: " << haserror << '\n';
+    OS << "hasload: " << hasload << '\n';
+    OS << "haspreserve: " << haspreserve << '\n';
+    OS << "hasunknownmem: " << hasunknownmem << '\n';
+    OS << "hastypeof: " << hastypeof << '\n';
+    OS << "refload: " << refload << '\n';
+    OS << "refstore: " << refstore << '\n';
+    OS << "allockind:";
+    if ((allockind & AllocFnKind::Uninitialized) != AllocFnKind::Unknown)
+      OS << " uninitialized";
+    if ((allockind & AllocFnKind::Zeroed) != AllocFnKind::Unknown)
+      OS << " zeroed";
+    OS << '\n';
+    OS << "Uses: " << uses.size() << '\n';
     for (auto inst: uses)
-        llvm_dump(inst);
+        inst->print(OS);
     if (!preserves.empty()) {
-        jl_safe_printf("Preserves: %d\n", (unsigned)preserves.size());
-        for (auto inst: preserves) {
-            llvm_dump(inst);
-        }
+        OS << "Preserves: " << preserves.size() << '\n';
+        for (auto inst: preserves)
+            inst->print(OS);
     }
-    if (!memops.empty()) {
-        jl_safe_printf("Memops: %d\n", (unsigned)memops.size());
-        for (auto &field: memops) {
-            jl_safe_printf("  Field %d @ %d\n", field.second.size, field.first);
-            jl_safe_printf("    Accesses:\n");
-            for (auto memop: field.second.accesses) {
-                jl_safe_printf("    ");
-                llvm_dump(memop.inst);
-            }
+    OS << "MemOps: " << memops.size() << '\n';
+    for (auto &field: memops) {
+        OS << "  offset: " << field.first << '\n';
+        OS << "  size: " << field.second.size << '\n';
+        OS << "  hasobjref: " << field.second.hasobjref << '\n';
+        OS << "  hasload: " << field.second.hasload << '\n';
+        OS << "  hasaggr: " << field.second.hasaggr << '\n';
+        OS << "  accesses: " << field.second.accesses.size() << '\n';
+        for (auto &memop: field.second.accesses) {
+            OS << "    ";
+            memop.inst->print(OS);
+            OS << '\n';
+            OS << "    " << (memop.isaggr ? "aggr" : "scalar") << '\n';
+            OS << "    " << (memop.isobjref ? "objref" : "bits") << '\n';
+            OS << "    " << memop.offset << '\n';
+            OS << "    " << memop.size << '\n';
         }
     }
 }
 
-void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options) {
+JL_USED_FUNC void AllocUseInfo::dump()
+{
+    dump(dbgs());
+}
+
+#ifndef __clang_gcanalyzer__
+#define REMARK(remark) if (options.ORE) options.ORE->emit(remark)
+#else
+#define REMARK(remark)
+#endif
+
+void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options) {
     required.use_info.reset();
+    Attribute allockind = I->getFnAttr(Attribute::AllocKind);
+    if (allockind.isValid())
+        required.use_info.allockind = allockind.getAllocKind();
     if (I->use_empty())
         return;
     CheckInst::Frame cur{I, 0, I->use_begin(), I->use_end()};
@@ -161,9 +190,16 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
     };
 
     auto check_inst = [&] (Instruction *inst, Use *use) {
+        LLVM_DEBUG(dbgs() << "Checking: " << *inst << "\n");
         if (isa<LoadInst>(inst)) {
             required.use_info.hasload = true;
-            if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, 0, cur.offset,
+            if (cur.offset == UINT32_MAX) {
+                LLVM_DEBUG(dbgs() << "Load inst has unknown offset\n");
+                auto elty = inst->getType();
+                required.use_info.has_unknown_objref |= hasObjref(elty);
+                required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.hasunknownmem = true;
+            } else if (!required.use_info.addMemOp(inst, 0, cur.offset,
                                                                inst->getType(),
                                                                false, required.DL))
                 required.use_info.hasunknownmem = true;
@@ -181,13 +217,16 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                             !isa<ConstantInt>(call->getArgOperand(2)) ||
                             !isa<ConstantInt>(call->getArgOperand(1)) ||
                             (cast<ConstantInt>(call->getArgOperand(2))->getLimitedValue() >=
-                             UINT32_MAX - cur.offset))
+                             UINT32_MAX - cur.offset)) {
+                            LLVM_DEBUG(dbgs() << "Memset inst has unknown offset\n");
                             required.use_info.hasunknownmem = true;
+                        }
                         return true;
                     }
                     if (id == Intrinsic::lifetime_start || id == Intrinsic::lifetime_end ||
                         isa<DbgInfoIntrinsic>(II))
                         return true;
+                    LLVM_DEBUG(dbgs() << "Unknown intrinsic, marking addrescape\n");
                     required.use_info.addrescaped = true;
                     return true;
                 }
@@ -208,31 +247,50 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 assert(use->get() == I);
                 return true;
             }
-            if (required.pass.write_barrier_func == callee ||
-                required.pass.write_barrier_binding_func == callee)
+            if (required.pass.write_barrier_func == callee)
                 return true;
             auto opno = use->getOperandNo();
             // Uses in `jl_roots` operand bundle are not counted as escaping, everything else is.
             if (!call->isBundleOperand(opno) ||
                 call->getOperandBundleForOperand(opno).getTagName() != "jl_roots") {
                 if (isa<UnreachableInst>(call->getParent()->getTerminator())) {
+                    LLVM_DEBUG(dbgs() << "Detected use of allocation in block terminating with unreachable, likely error function\n");
                     required.use_info.haserror = true;
                     return true;
                 }
+                LLVM_DEBUG(dbgs() << "Unknown call, marking escape\n");
+                REMARK([&]() {
+                    return OptimizationRemarkMissed(DEBUG_TYPE, "UnknownCall",
+                                                    inst)
+                           << "Unknown call, marking escape (" << ore::NV("Call", inst) << ")";
+                });
                 required.use_info.escaped = true;
                 return false;
             }
+            LLVM_DEBUG(dbgs() << "Call is in jl_roots bundle, marking haspreserve\n");
             required.use_info.haspreserve = true;
             return true;
         }
         if (auto store = dyn_cast<StoreInst>(inst)) {
             // Only store value count
             if (use->getOperandNo() != StoreInst::getPointerOperandIndex()) {
+                LLVM_DEBUG(dbgs() << "Object address is stored somewhere, marking escape\n");
+                REMARK([&]() {
+                    return OptimizationRemarkMissed(DEBUG_TYPE, "StoreObjAddr",
+                                                    inst)
+                           << "Object address is stored somewhere, marking escape (" << ore::NV("Store", inst) << ")";
+                });
                 required.use_info.escaped = true;
                 return false;
             }
             auto storev = store->getValueOperand();
-            if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
+            if (cur.offset == UINT32_MAX) {
+                LLVM_DEBUG(dbgs() << "Store inst has unknown offset\n");
+                auto elty = storev->getType();
+                required.use_info.has_unknown_objref |= hasObjref(elty);
+                required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.hasunknownmem = true;
+            } else if (!required.use_info.addMemOp(inst, use->getOperandNo(),
                                                                cur.offset, storev->getType(),
                                                                true, required.DL))
                 required.use_info.hasunknownmem = true;
@@ -241,6 +299,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
         if (isa<AtomicCmpXchgInst>(inst) || isa<AtomicRMWInst>(inst)) {
             // Only store value count
             if (use->getOperandNo() != isa<AtomicCmpXchgInst>(inst) ? AtomicCmpXchgInst::getPointerOperandIndex() : AtomicRMWInst::getPointerOperandIndex()) {
+                LLVM_DEBUG(dbgs() << "Object address is cmpxchg/rmw-ed somewhere, marking escape\n");
+                REMARK([&]() {
+                    return OptimizationRemarkMissed(DEBUG_TYPE, "StoreObjAddr",
+                                                    inst)
+                           << "Object address is cmpxchg/rmw-ed somewhere, marking escape (" << ore::NV("Store", inst) << ")";
+                });
                 required.use_info.escaped = true;
                 return false;
             }
@@ -248,8 +312,10 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
             auto storev = isa<AtomicCmpXchgInst>(inst) ? cast<AtomicCmpXchgInst>(inst)->getNewValOperand() : cast<AtomicRMWInst>(inst)->getValOperand();
             if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
                                                                cur.offset, storev->getType(),
-                                                               true, required.DL))
+                                                               true, required.DL)) {
+                LLVM_DEBUG(dbgs() << "Atomic inst has unknown offset\n");
                 required.use_info.hasunknownmem = true;
+            }
             required.use_info.refload = true;
             return true;
         }
@@ -263,10 +329,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
                 APInt apoffset(sizeof(void*) * 8, cur.offset, true);
                 if (!gep->accumulateConstantOffset(required.DL, apoffset) || apoffset.isNegative()) {
                     next_offset = UINT32_MAX;
+                    LLVM_DEBUG(dbgs() << "GEP inst has unknown offset\n");
                 }
                 else {
                     next_offset = apoffset.getLimitedValue();
                     if (next_offset > UINT32_MAX) {
+                        LLVM_DEBUG(dbgs() << "GEP inst exceeds 32-bit offset\n");
                         next_offset = UINT32_MAX;
                     }
                 }
@@ -276,9 +344,16 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
             return true;
         }
         if (isa<ReturnInst>(inst)) {
+            LLVM_DEBUG(dbgs() << "Allocation is returned\n");
             required.use_info.returned = true;
             return true;
         }
+        LLVM_DEBUG(dbgs() << "Unknown instruction, marking escape\n");
+        REMARK([&]() {
+            return OptimizationRemarkMissed(DEBUG_TYPE, "UnknownInst",
+                                            inst)
+                   << "Unknown instruction, marking escape (" << ore::NV("Inst", inst) << ")";
+        });
         required.use_info.escaped = true;
         return false;
     };
diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h
index 7238d71de973f..49c3b15332a56 100644
--- a/src/llvm-alloc-helpers.h
+++ b/src/llvm-alloc-helpers.h
@@ -6,6 +6,7 @@
 
 #include <llvm/ADT/SmallSet.h>
 #include <llvm/ADT/SmallVector.h>
+#include <llvm/Analysis/OptimizationRemarkEmitter.h>
 #include <llvm/IR/Instructions.h>
 
 #include <utility>
@@ -86,6 +87,13 @@ namespace jl_alloc {
         bool returned:1;
         // The object is used in an error function
         bool haserror:1;
+        // For checking attributes of "uninitialized" or "zeroed" or unknown
+        llvm::AllocFnKind allockind;
+
+        // The alloc has a Julia object reference not in an explicit field.
+        bool has_unknown_objref:1;
+        // The alloc has an aggregate Julia object reference not in an explicit field.
+        bool has_unknown_objrefaggr:1;
 
         void reset()
         {
@@ -99,10 +107,14 @@ namespace jl_alloc {
             hasunknownmem = false;
             returned = false;
             haserror = false;
+            allockind = llvm::AllocFnKind::Unknown;
+            has_unknown_objref = false;
+            has_unknown_objrefaggr = false;
             uses.clear();
             preserves.clear();
             memops.clear();
         }
+        void dump(llvm::raw_ostream &OS);
         void dump();
         bool addMemOp(llvm::Instruction *inst, unsigned opno, uint32_t offset, llvm::Type *elty,
                       bool isstore, const llvm::DataLayout &DL);
@@ -129,6 +141,7 @@ namespace jl_alloc {
         //will not be considered. Defaults to nullptr, which means all uses of the allocation
         //are considered
         const llvm::SmallPtrSetImpl<const llvm::BasicBlock*> *valid_set;
+        llvm::OptimizationRemarkEmitter *ORE = nullptr;
 
         EscapeAnalysisOptionalArgs() = default;
 
@@ -136,9 +149,14 @@ namespace jl_alloc {
             this->valid_set = valid_set;
             return *this;
         }
+
+        EscapeAnalysisOptionalArgs &with_optimization_remark_emitter(decltype(ORE) ORE) {
+            this->ORE = ORE;
+            return *this;
+        }
     };
 
-    void runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options=EscapeAnalysisOptionalArgs());
+    void runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options=EscapeAnalysisOptionalArgs());
 }
 
 
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index bdca20e124481..5df4f52aca425 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -10,9 +10,9 @@
 #include <llvm/ADT/SmallVector.h>
 #include <llvm/ADT/SetVector.h>
 #include <llvm/ADT/Statistic.h>
+#include <llvm/Analysis/OptimizationRemarkEmitter.h>
 #include <llvm/IR/Value.h>
 #include <llvm/IR/CFG.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Dominators.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
@@ -27,17 +27,17 @@
 
 #include <llvm/InitializePasses.h>
 
-#include "codegen_shared.h"
+#include "passes.h"
+#include "llvm-codegen-shared.h"
 #include "julia.h"
 #include "julia_internal.h"
 #include "llvm-pass-helpers.h"
 #include "llvm-alloc-helpers.h"
-#include "passes.h"
 
 #include <map>
 #include <set>
 
-#define DEBUG_TYPE "alloc_opt"
+#define DEBUG_TYPE "alloc-opt"
 #include "julia_assert.h"
 
 using namespace llvm;
@@ -58,6 +58,7 @@ static void removeGCPreserve(CallInst *call, Instruction *val)
     ++RemovedGCPreserve;
     auto replace = Constant::getNullValue(val->getType());
     call->replaceUsesOfWith(val, replace);
+    call->setAttributes(AttributeList());
     for (auto &arg: call->args()) {
         if (!isa<Constant>(arg.get())) {
             return;
@@ -78,6 +79,7 @@ static void removeGCPreserve(CallInst *call, Instruction *val)
  *
  * * load
  * * `pointer_from_objref`
+ * * `gc_loaded`
  * * Any real llvm intrinsics
  * * gc preserve intrinsics
  * * `ccall` gcroot array (`jl_roots` operand bundle)
@@ -93,10 +95,14 @@ static void removeGCPreserve(CallInst *call, Instruction *val)
  * TODO:
  * * Return twice
  * * Handle phi node.
- * * Look through `pointer_from_objref`.
  * * Handle jl_box*
  */
 
+#ifndef __clang_gcanalyzer__
+#define REMARK(remark) ORE.emit(remark)
+#else
+#define REMARK(remark) (void) 0;
+#endif
 struct AllocOpt : public JuliaPassContext {
 
     const DataLayout *DL;
@@ -111,6 +117,7 @@ struct AllocOpt : public JuliaPassContext {
 struct Optimizer {
     Optimizer(Function &F, AllocOpt &pass, function_ref<DominatorTree&()> GetDT)
         : F(F),
+          ORE(&F),
           pass(pass),
           GetDT(std::move(GetDT))
     {}
@@ -128,16 +135,18 @@ struct Optimizer {
     // insert llvm.lifetime.* calls for `ptr` with size `sz` based on the use of `orig`.
     void insertLifetime(Value *ptr, Constant *sz, Instruction *orig);
 
-    void checkInst(Instruction *I);
+    void checkInst(CallInst *I);
 
     void replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
                                  Instruction *orig_i, Instruction *new_i);
     void removeAlloc(CallInst *orig_inst);
-    void moveToStack(CallInst *orig_inst, size_t sz, bool has_ref);
+    void moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocFnKind allockind);
+    void initializeAlloca(IRBuilder<> &prolog_builder, AllocaInst *buff, AllocFnKind allockind);
     void splitOnStack(CallInst *orig_inst);
     void optimizeTag(CallInst *orig_inst);
 
     Function &F;
+    OptimizationRemarkEmitter ORE;
     AllocOpt &pass;
     DominatorTree *_DT = nullptr;
     function_ref<DominatorTree &()> GetDT;
@@ -214,25 +223,37 @@ void Optimizer::optimizeAll()
         size_t sz = item.second;
         checkInst(orig);
         if (use_info.escaped) {
+            REMARK([&]() {
+                return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
+                    << "GC allocation escaped " << ore::NV("GC Allocation", orig);
+            });
             if (use_info.hastypeof)
                 optimizeTag(orig);
             continue;
         }
         if (use_info.haserror || use_info.returned) {
+            REMARK([&]() {
+                return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
+                    << "GC allocation has error or was returned " << ore::NV("GC Allocation", orig);
+            });
             if (use_info.hastypeof)
                 optimizeTag(orig);
             continue;
         }
         if (!use_info.addrescaped && !use_info.hasload && (!use_info.haspreserve ||
                                                            !use_info.refstore)) {
+            REMARK([&]() {
+                return OptimizationRemark(DEBUG_TYPE, "Dead Allocation", orig)
+                    << "GC allocation removed " << ore::NV("GC Allocation", orig);
+            });
             // No one took the address, no one reads anything and there's no meaningful
             // preserve of fields (either no preserve/ccall or no object reference fields)
             // We can just delete all the uses.
             removeAlloc(orig);
             continue;
         }
-        bool has_ref = false;
-        bool has_refaggr = false;
+        bool has_ref = use_info.has_unknown_objref;
+        bool has_refaggr = use_info.has_unknown_objrefaggr;
         for (auto memop: use_info.memops) {
             auto &field = memop.second;
             if (field.hasobjref) {
@@ -245,18 +266,30 @@ void Optimizer::optimizeAll()
                 }
             }
         }
-        if (!use_info.hasunknownmem && !use_info.addrescaped && !has_refaggr) {
-            // No one actually care about the memory layout of this object, split it.
-            splitOnStack(orig);
-            continue;
-        }
         if (has_refaggr) {
+            REMARK([&]() {
+                return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
+                    << "GC allocation has unusual object reference, unable to move to stack " << ore::NV("GC Allocation", orig);
+            });
             if (use_info.hastypeof)
                 optimizeTag(orig);
             continue;
         }
+        if (!use_info.hasunknownmem && !use_info.addrescaped) {
+            REMARK([&](){
+                return OptimizationRemark(DEBUG_TYPE, "Stack Split Allocation", orig)
+                    << "GC allocation split on stack " << ore::NV("GC Allocation", orig);
+            });
+            // No one actually care about the memory layout of this object, split it.
+            splitOnStack(orig);
+            continue;
+        }
+        REMARK([&](){
+            return OptimizationRemark(DEBUG_TYPE, "Stack Move Allocation", orig)
+                << "GC allocation moved to stack " << ore::NV("GC Allocation", orig);
+        });
         // The object has no fields with mix reference access
-        moveToStack(orig, sz, has_ref);
+        moveToStack(orig, sz, has_ref, use_info.allockind);
     }
 }
 
@@ -278,7 +311,9 @@ bool Optimizer::isSafepoint(Instruction *inst)
         return false;
     if (auto callee = call->getCalledFunction()) {
         // Known functions emitted in codegen that are not safepoints
-        if (callee == pass.pointer_from_objref_func || callee->getName() == "memcmp") {
+        if (callee == pass.pointer_from_objref_func
+            || callee == pass.gc_loaded_func
+            || callee->getName() == "memcmp") {
             return false;
         }
     }
@@ -312,16 +347,26 @@ ssize_t Optimizer::getGCAllocSize(Instruction *I)
     if (call->getCalledOperand() != pass.alloc_obj_func)
         return -1;
     assert(call->arg_size() == 3);
-    size_t sz = (size_t)cast<ConstantInt>(call->getArgOperand(1))->getZExtValue();
+    auto CI = dyn_cast<ConstantInt>(call->getArgOperand(1));
+    if (!CI)
+        return -1;
+    size_t sz = (size_t)CI->getZExtValue();
     if (sz < IntegerType::MAX_INT_BITS / 8 && sz < INT32_MAX)
         return sz;
     return -1;
 }
 
-void Optimizer::checkInst(Instruction *I)
+void Optimizer::checkInst(CallInst *I)
 {
+    LLVM_DEBUG(dbgs() << "Running escape analysis on " << *I << "\n");
     jl_alloc::EscapeAnalysisRequiredArgs required{use_info, check_stack, pass, *pass.DL};
-    jl_alloc::runEscapeAnalysis(I, required);
+    jl_alloc::runEscapeAnalysis(I, required, jl_alloc::EscapeAnalysisOptionalArgs().with_optimization_remark_emitter(&ORE));
+    REMARK([&](){
+        std::string suse_info;
+        llvm::raw_string_ostream osuse_info(suse_info);
+        use_info.dump(osuse_info);
+        return OptimizationRemarkAnalysis(DEBUG_TYPE, "EscapeAnalysis", I) << "escape analysis for " << ore::NV("GC Allocation", I) << "\n" << ore::NV("UseInfo", osuse_info.str());
+    });
 }
 
 void Optimizer::insertLifetimeEnd(Value *ptr, Constant *sz, Instruction *insert)
@@ -390,41 +435,50 @@ void Optimizer::insertLifetime(Value *ptr, Constant *sz, Instruction *orig)
         abort();
     }
 #endif
-    // Record extra BBs that contain invisible uses.
+
+    // Record extra BBs that contain invisible uses with gc_preserve_{begin,end}.
+    // We traverse the dominator tree starting at each `gc_preserve_begin` and marking blocks
+    // as users until a corresponding `gc_preserve_end` is found. Blocks containing
+    // the `gc_preserve_end` have already been marked in the previous step.
     SmallSet<BasicBlock*, 8> extra_use;
     SmallVector<DomTreeNodeBase<BasicBlock>*, 8> dominated;
     for (auto preserve: use_info.preserves) {
-        for (auto RN = DT.getNode(preserve->getParent()); RN;
-             RN = dominated.empty() ? nullptr : dominated.pop_back_val()) {
-            for (auto N: *RN) {
-                auto bb = N->getBlock();
-                if (extra_use.count(bb))
-                    continue;
-                bool ended = false;
-                for (auto end: preserve->users()) {
-                    auto end_bb = cast<Instruction>(end)->getParent();
-                    auto end_node = DT.getNode(end_bb);
-                    if (end_bb == bb || (end_node && DT.dominates(end_node, N))) {
-                        ended = true;
-                        break;
-                    }
+        assert(dominated.empty());
+        dominated.push_back(DT.getNode(preserve->getParent()));
+        while (!dominated.empty()) {
+            auto N = dominated.pop_back_val();
+            if (!N) {
+                dominated.clear();
+                break;
+            }
+            auto bb = N->getBlock();
+            if (extra_use.count(bb))
+                continue;
+            bool ended = false;
+            for (auto end: preserve->users()) {
+                auto end_bb = cast<Instruction>(end)->getParent();
+                auto end_node = DT.getNode(end_bb);
+                if (end_bb == bb || (end_node && DT.dominates(end_node, N))) {
+                    ended = true;
+                    break;
                 }
-                if (ended)
-                    continue;
-                bbs.insert(bb);
-                extra_use.insert(bb);
-                dominated.push_back(N);
             }
+            if (ended)
+                continue;
+            bbs.insert(bb);
+            extra_use.insert(bb);
+            dominated.append(N->begin(), N->end());
         }
-        assert(dominated.empty());
     }
+
     // For each BB, find the first instruction(s) where the allocation is possibly dead.
     // If all successors are live, then there isn't one.
+    // If the BB has "invisible" uses, then there isn't one.
     // If all successors are dead, then it's the first instruction after the last use
     // within the BB.
     // If some successors are live and others are dead, it's the first instruction in
     // the successors that are dead.
-    std::vector<Instruction*> first_dead;
+    SmallVector<Instruction*, 0> first_dead;
     for (auto bb: bbs) {
         bool has_use = false;
         for (auto succ: successors(bb)) {
@@ -515,7 +569,7 @@ void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
     auto oldfType = call->getFunctionType();
     auto newfType = FunctionType::get(
             oldfType->getReturnType(),
-            makeArrayRef(argTys).slice(0, oldfType->getNumParams()),
+            ArrayRef<Type*>(argTys).slice(0, oldfType->getNumParams()),
             oldfType->isVarArg());
 
     // Accumulate an array of overloaded types for the given intrinsic
@@ -545,9 +599,25 @@ void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
     call->eraseFromParent();
 }
 
+void Optimizer::initializeAlloca(IRBuilder<> &prolog_builder, AllocaInst *buff, AllocFnKind allockind)
+{
+    if ((allockind & AllocFnKind::Uninitialized) != AllocFnKind::Unknown)
+        return;
+    assert(!buff->isArrayAllocation());
+    Type *T = buff->getAllocatedType();
+    Value *Init = UndefValue::get(T);
+    if ((allockind & AllocFnKind::Zeroed) != AllocFnKind::Unknown)
+        Init = Constant::getNullValue(T); // zero, as described
+    else if (allockind == AllocFnKind::Unknown)
+        Init = Constant::getNullValue(T); // assume zeroed since we didn't find the attribute
+    else
+        Init = prolog_builder.CreateFreeze(UndefValue::get(T)); // assume freeze, since LLVM does not natively support this case
+    prolog_builder.CreateStore(Init, buff);
+}
+
 // This function should not erase any safepoint so that the lifetime marker can find and cache
 // all the original safepoints.
-void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
+void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocFnKind allockind)
 {
     ++RemovedAllocs;
     ++StackAllocs;
@@ -573,7 +643,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
         // treat this as a non-mem2reg'd alloca
         // The ccall root and GC preserve handling below makes sure that
         // the alloca isn't optimized out.
-        buff = prolog_builder.CreateAlloca(pass.T_prjlvalue);
+        const DataLayout &DL = F.getParent()->getDataLayout();
+        auto asize = ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), sz / DL.getTypeAllocSize(pass.T_prjlvalue));
+        buff = prolog_builder.CreateAlloca(pass.T_prjlvalue, asize);
         buff->setAlignment(Align(align));
         ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext())));
     }
@@ -585,10 +657,14 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
             buffty = ArrayType::get(Type::getInt8Ty(pass.getLLVMContext()), sz);
         buff = prolog_builder.CreateAlloca(buffty);
         buff->setAlignment(Align(align));
-        ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext())));
+        ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext(), buff->getType()->getPointerAddressSpace())));
     }
     insertLifetime(ptr, ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), sz), orig_inst);
-    auto new_inst = cast<Instruction>(prolog_builder.CreateBitCast(ptr, JuliaType::get_pjlvalue_ty(prolog_builder.getContext())));
+    if (sz != 0 && !has_ref) { // TODO: fix has_ref case too
+        IRBuilder<> builder(orig_inst);
+        initializeAlloca(builder, buff, allockind);
+    }
+    Instruction *new_inst = cast<Instruction>(prolog_builder.CreateBitCast(ptr, JuliaType::get_pjlvalue_ty(prolog_builder.getContext(), buff->getType()->getPointerAddressSpace())));
     new_inst->takeName(orig_inst);
 
     auto simple_replace = [&] (Instruction *orig_i, Instruction *new_i) {
@@ -607,8 +683,10 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
         }
         return false;
     };
-    if (simple_replace(orig_inst, new_inst))
+    if (simple_replace(orig_inst, new_inst)) {
+        LLVM_DEBUG(dbgs() << "Simple replace of allocation was successful in stack move\n");
         return;
+    }
     assert(replace_stack.empty());
     ReplaceUses::Frame cur{orig_inst, new_inst};
     auto finish_cur = [&] () {
@@ -634,10 +712,15 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
         else if (auto call = dyn_cast<CallInst>(user)) {
             auto callee = call->getCalledOperand();
             if (pass.pointer_from_objref_func == callee) {
-                call->replaceAllUsesWith(new_i);
+                call->replaceAllUsesWith(prolog_builder.CreateAddrSpaceCast(new_i, call->getCalledFunction()->getReturnType()));
                 call->eraseFromParent();
                 return;
             }
+            //if (pass.gc_loaded_func == callee) {
+            //    call->replaceAllUsesWith(new_i);
+            //    call->eraseFromParent();
+            //    return;
+            //}
             if (pass.typeof_func == callee) {
                 ++RemovedTypeofs;
                 call->replaceAllUsesWith(tag);
@@ -654,8 +737,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
                 }
                 return;
             }
-            if (pass.write_barrier_func == callee ||
-                pass.write_barrier_binding_func == callee) {
+            if (pass.write_barrier_func == callee) {
                 ++RemovedWriteBarriers;
                 call->eraseFromParent();
                 return;
@@ -671,10 +753,12 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
             user->replaceUsesOfWith(orig_i, replace);
         }
         else if (isa<AddrSpaceCastInst>(user) || isa<BitCastInst>(user)) {
-            auto cast_t = PointerType::getWithSamePointeeType(cast<PointerType>(user->getType()), AddressSpace::Generic);
+            auto cast_t = PointerType::getWithSamePointeeType(cast<PointerType>(user->getType()), new_i->getType()->getPointerAddressSpace());
             auto replace_i = new_i;
             Type *new_t = new_i->getType();
             if (cast_t != new_t) {
+                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
+                assert(cast_t->getContext().supportsTypedPointers());
                 replace_i = new BitCastInst(replace_i, cast_t, "", user);
                 replace_i->setDebugLoc(user->getDebugLoc());
                 replace_i->takeName(user);
@@ -724,8 +808,10 @@ void Optimizer::removeAlloc(CallInst *orig_inst)
         }
         return false;
     };
-    if (simple_remove(orig_inst))
+    if (simple_remove(orig_inst)) {
+        LLVM_DEBUG(dbgs() << "Simple remove of allocation was successful in removeAlloc\n");
         return;
+    }
     assert(replace_stack.empty());
     ReplaceUses::Frame cur{orig_inst, nullptr};
     auto finish_cur = [&] () {
@@ -763,8 +849,7 @@ void Optimizer::removeAlloc(CallInst *orig_inst)
                 call->eraseFromParent();
                 return;
             }
-            if (pass.write_barrier_func == callee ||
-                pass.write_barrier_binding_func == callee) {
+            if (pass.write_barrier_func == callee) {
                 ++RemovedWriteBarriers;
                 call->eraseFromParent();
                 return;
@@ -812,6 +897,10 @@ void Optimizer::optimizeTag(CallInst *orig_inst)
             auto callee = call->getCalledOperand();
             if (pass.typeof_func == callee) {
                 ++RemovedTypeofs;
+                REMARK([&](){
+                    return OptimizationRemark(DEBUG_TYPE, "typeof", call)
+                        << "removed typeof call for GC allocation " << ore::NV("Alloc", orig_inst);
+                });
                 call->replaceAllUsesWith(tag);
                 // Push to the removed instructions to trigger `finalize` to
                 // return the correct result.
@@ -859,8 +948,10 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
             allocty = ArrayType::get(Type::getInt8Ty(pass.getLLVMContext()), field.size);
         }
         slot.slot = prolog_builder.CreateAlloca(allocty);
+        IRBuilder<> builder(orig_inst);
         insertLifetime(prolog_builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(prolog_builder.getContext())),
                        ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), field.size), orig_inst);
+        initializeAlloca(builder, slot.slot, use_info.allockind);
         slots.push_back(std::move(slot));
     }
     const auto nslots = slots.size();
@@ -888,8 +979,10 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
         }
         return false;
     };
-    if (simple_replace(orig_inst))
+    if (simple_replace(orig_inst)) {
+        LLVM_DEBUG(dbgs() << "Simple replace of allocation was successful in stack split\n");
         return;
+    }
     assert(replace_stack.empty());
     ReplaceUses::Frame cur{orig_inst, uint32_t(0)};
     auto finish_cur = [&] () {
@@ -969,7 +1062,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                 assert(slot.offset == offset);
                 auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
                 if (!isa<PointerType>(store_ty)) {
-                    store_val = builder.CreateBitCast(store_val, getSizeTy(builder.getContext()));
+                    store_val = builder.CreateBitCast(store_val, pass.DL->getIntPtrType(builder.getContext(), T_pjlvalue->getAddressSpace()));
                     store_val = builder.CreateIntToPtr(store_val, T_pjlvalue);
                     store_ty = T_pjlvalue;
                 }
@@ -1032,7 +1125,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                                 else {
                                     uint64_t intval;
                                     memset(&intval, val, 8);
-                                    Constant *val = ConstantInt::get(getSizeTy(builder.getContext()), intval);
+                                    Constant *val = ConstantInt::get(pass.DL->getIntPtrType(builder.getContext(), pass.T_prjlvalue->getAddressSpace()), intval);
                                     val = ConstantExpr::getIntToPtr(val, JuliaType::get_pjlvalue_ty(builder.getContext()));
                                     ptr = ConstantExpr::getAddrSpaceCast(val, pass.T_prjlvalue);
                                 }
@@ -1062,8 +1155,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                 call->eraseFromParent();
                 return;
             }
-            if (pass.write_barrier_func == callee ||
-                pass.write_barrier_binding_func == callee) {
+            if (pass.write_barrier_func == callee) {
                 ++RemovedWriteBarriers;
                 call->eraseFromParent();
                 return;
@@ -1084,10 +1176,12 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                     ref->setOrdering(AtomicOrdering::NotAtomic);
                     operands.push_back(ref);
                 }
+#ifndef __clang_analyzer__
+                // FIXME: SA finds "Called C++ object pointer is null" inside the LLVM code.
                 auto new_call = builder.CreateCall(pass.gc_preserve_begin_func, operands);
                 new_call->takeName(call);
-                new_call->setAttributes(call->getAttributes());
                 call->replaceAllUsesWith(new_call);
+#endif
                 call->eraseFromParent();
                 return;
             }
@@ -1100,7 +1194,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
             for (auto &bundle: bundles) {
                 if (bundle.getTag() != "jl_roots")
                     continue;
-                std::vector<Value*> operands;
+                SmallVector<Value*, 0> operands;
                 for (auto op: bundle.inputs()) {
                     if (op == orig_i || isa<Constant>(op))
                         continue;
@@ -1163,57 +1257,30 @@ bool AllocOpt::doInitialization(Module &M)
 
     DL = &M.getDataLayout();
 
-    lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { Type::getInt8PtrTy(M.getContext()) });
-    lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { Type::getInt8PtrTy(M.getContext()) });
+    lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { Type::getInt8PtrTy(M.getContext(), DL->getAllocaAddrSpace()) });
+    lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { Type::getInt8PtrTy(M.getContext(), DL->getAllocaAddrSpace()) });
 
     return true;
 }
 
 bool AllocOpt::runOnFunction(Function &F, function_ref<DominatorTree&()> GetDT)
 {
-    if (!alloc_obj_func)
+    if (!alloc_obj_func) {
+        LLVM_DEBUG(dbgs() << "AllocOpt: no alloc_obj function found, skipping pass\n");
         return false;
+    }
     Optimizer optimizer(F, *this, std::move(GetDT));
     optimizer.initialize();
     optimizer.optimizeAll();
     bool modified = optimizer.finalize();
-    assert(!verifyFunction(F, &errs()));
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyLLVMIR(F));
+#endif
     return modified;
 }
 
-struct AllocOptLegacy : public FunctionPass {
-    static char ID;
-    AllocOpt opt;
-    AllocOptLegacy() : FunctionPass(ID) {
-        llvm::initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
-    }
-    bool doInitialization(Module &m) override {
-        return opt.doInitialization(m);
-    }
-    bool runOnFunction(Function &F) override {
-        return opt.runOnFunction(F, [this]() -> DominatorTree & {return getAnalysis<DominatorTreeWrapperPass>().getDomTree();});
-    }
-    void getAnalysisUsage(AnalysisUsage &AU) const override
-    {
-        FunctionPass::getAnalysisUsage(AU);
-        AU.addRequired<DominatorTreeWrapperPass>();
-        AU.addPreserved<DominatorTreeWrapperPass>();
-        AU.setPreservesCFG();
-    }
-};
-
-char AllocOptLegacy::ID = 0;
-static RegisterPass<AllocOptLegacy> X("AllocOpt", "Promote heap allocation to stack",
-                                false /* Only looks at CFG */,
-                                false /* Analysis Pass */);
-
-}
-
-Pass *createAllocOptPass()
-{
-    return new AllocOptLegacy();
-}
 
+} // anonymous namespace
 PreservedAnalyses AllocOptPass::run(Function &F, FunctionAnalysisManager &AM) {
     AllocOpt opt;
     bool modified = opt.doInitialization(*F.getParent());
@@ -1228,8 +1295,3 @@ PreservedAnalyses AllocOptPass::run(Function &F, FunctionAnalysisManager &AM) {
         return PreservedAnalyses::all();
     }
 }
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddAllocOptPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createAllocOptPass());
-}
diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h
new file mode 100644
index 0000000000000..a4f77bc1b3b38
--- /dev/null
+++ b/src/llvm-codegen-shared.h
@@ -0,0 +1,575 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include <utility>
+#include <llvm/ADT/ArrayRef.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/Support/Debug.h>
+#include <llvm/IR/Attributes.h>
+#include <llvm/IR/DebugLoc.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/MDBuilder.h>
+
+#if JL_LLVM_VERSION >= 160000
+#include <llvm/Support/ModRef.h>
+#endif
+
+#include "julia.h"
+
+#define STR(csym)           #csym
+#define XSTR(csym)          STR(csym)
+
+#if JL_LLVM_VERSION >= 160000
+
+#include <optional>
+
+template<typename T>
+using Optional = std::optional<T>;
+static constexpr std::nullopt_t None = std::nullopt;
+
+#else
+
+#include <llvm/ADT/Optional.h>
+
+#endif
+
+enum AddressSpace {
+    Generic = 0,
+    Tracked = 10,
+    Derived = 11,
+    CalleeRooted = 12,
+    Loaded = 13,
+    FirstSpecial = Tracked,
+    LastSpecial = Loaded,
+};
+
+namespace JuliaType {
+    static inline llvm::StructType* get_jlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::StructType::get(C);
+    }
+
+    static inline llvm::PointerType* get_pjlvalue_ty(llvm::LLVMContext &C, unsigned addressSpace=0) {
+        return llvm::PointerType::get(get_jlvalue_ty(C), addressSpace);
+    }
+
+    static inline llvm::PointerType* get_prjlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::PointerType::get(get_jlvalue_ty(C), AddressSpace::Tracked);
+    }
+
+    static inline llvm::PointerType* get_ppjlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::PointerType::get(get_pjlvalue_ty(C), 0);
+    }
+
+    static inline llvm::PointerType* get_pprjlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::PointerType::get(get_prjlvalue_ty(C), 0);
+    }
+
+    static inline auto get_jlfunc_ty(llvm::LLVMContext &C) {
+        auto T_prjlvalue = get_prjlvalue_ty(C);
+        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
+        return llvm::FunctionType::get(T_prjlvalue, {
+                T_prjlvalue,  // function
+                T_pprjlvalue, // args[]
+                llvm::Type::getInt32Ty(C)}, // nargs
+            false);
+    }
+
+    static inline auto get_jlfunc2_ty(llvm::LLVMContext &C) {
+        auto T_prjlvalue = get_prjlvalue_ty(C);
+        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
+        return llvm::FunctionType::get(T_prjlvalue, {
+                T_prjlvalue,  // function
+                T_pprjlvalue, // args[]
+                llvm::Type::getInt32Ty(C), // nargs
+                T_prjlvalue},  // linfo
+            false);
+    }
+
+    static inline auto get_jlfunc3_ty(llvm::LLVMContext &C) {
+        auto T_prjlvalue = get_prjlvalue_ty(C);
+        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
+        auto T = get_pjlvalue_ty(C, Derived);
+        return llvm::FunctionType::get(T_prjlvalue, {
+                T,  // function
+                T_pprjlvalue, // args[]
+                llvm::Type::getInt32Ty(C)}, // nargs
+            false);
+    }
+
+    static inline auto get_jlfuncparams_ty(llvm::LLVMContext &C) {
+        auto T_prjlvalue = get_prjlvalue_ty(C);
+        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
+        return llvm::FunctionType::get(T_prjlvalue, {
+                T_prjlvalue,  // function
+                T_pprjlvalue, // args[]
+                llvm::Type::getInt32Ty(C),
+                T_pprjlvalue,  // linfo->sparam_vals
+                }, // nargs
+            false);
+    }
+
+    static inline auto get_voidfunc_ty(llvm::LLVMContext &C) {
+        return llvm::FunctionType::get(llvm::Type::getVoidTy(C), /*isVarArg*/false);
+    }
+
+    static inline auto get_pvoidfunc_ty(llvm::LLVMContext &C) {
+        return get_voidfunc_ty(C)->getPointerTo();
+    }
+}
+
+// return how many Tracked pointers are in T (count > 0),
+// and if there is anything else in T (all == false)
+struct CountTrackedPointers {
+    unsigned count = 0;
+    bool all = true;
+    bool derived = false;
+    CountTrackedPointers(llvm::Type *T, bool ignore_loaded=false);
+};
+
+unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::Type *DTy, llvm::IRBuilder<> &irbuilder);
+llvm::SmallVector<llvm::Value*, 0> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
+
+static inline void llvm_dump(llvm::Value *v)
+{
+    v->print(llvm::dbgs(), true);
+    llvm::dbgs() << "\n";
+}
+
+static inline void llvm_dump(llvm::Type *v)
+{
+    v->print(llvm::dbgs(), true);
+    llvm::dbgs() << "\n";
+}
+
+static inline void llvm_dump(llvm::Function *f)
+{
+    f->print(llvm::dbgs(), nullptr, false, true);
+}
+
+static inline void llvm_dump(llvm::Module *m)
+{
+    m->print(llvm::dbgs(), nullptr);
+}
+
+static inline void llvm_dump(llvm::Metadata *m)
+{
+    m->print(llvm::dbgs());
+    llvm::dbgs() << "\n";
+}
+
+static inline void llvm_dump(llvm::DebugLoc *dbg)
+{
+    dbg->print(llvm::dbgs());
+    llvm::dbgs() << "\n";
+}
+
+static inline std::pair<llvm::MDNode*,llvm::MDNode*> tbaa_make_child_with_context(llvm::LLVMContext &ctxt, const char *name, llvm::MDNode *parent=nullptr, bool isConstant=false)
+{
+    llvm::MDBuilder mbuilder(ctxt);
+    llvm::MDNode *jtbaa = mbuilder.createTBAARoot("jtbaa");
+    llvm::MDNode *tbaa_root = mbuilder.createTBAAScalarTypeNode("jtbaa", jtbaa);
+    llvm::MDNode *scalar = mbuilder.createTBAAScalarTypeNode(name, parent ? parent : tbaa_root);
+    llvm::MDNode *n = mbuilder.createTBAAStructTagNode(scalar, scalar, 0, isConstant);
+    return std::make_pair(n, scalar);
+}
+
+static inline llvm::MDNode *get_tbaa_const(llvm::LLVMContext &ctxt) {
+    return tbaa_make_child_with_context(ctxt, "jtbaa_const", nullptr, true).first;
+}
+
+static inline llvm::Instruction *tbaa_decorate(llvm::MDNode *md, llvm::Instruction *inst)
+{
+    using namespace llvm;
+    inst->setMetadata(llvm::LLVMContext::MD_tbaa, md);
+    if (llvm::isa<llvm::LoadInst>(inst) && md && md == get_tbaa_const(md->getContext())) {
+        inst->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(md->getContext(), None));
+    }
+    return inst;
+}
+
+// bitcast a value, but preserve its address space when dealing with pointer types
+static inline llvm::Value *emit_bitcast_with_builder(llvm::IRBuilder<> &builder, llvm::Value *v, llvm::Type *jl_value)
+{
+    using namespace llvm;
+    if (isa<PointerType>(jl_value) &&
+        v->getType()->getPointerAddressSpace() != jl_value->getPointerAddressSpace()) {
+        // Cast to the proper address space
+        Type *jl_value_addr = PointerType::getWithSamePointeeType(cast<PointerType>(jl_value), v->getType()->getPointerAddressSpace());
+        return builder.CreateBitCast(v, jl_value_addr);
+    }
+    else {
+        return builder.CreateBitCast(v, jl_value);
+    }
+}
+
+// Get PTLS through current task.
+static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *pgcstack)
+{
+    using namespace llvm;
+    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
+    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
+    const int pgcstack_offset = offsetof(jl_task_t, gcstack);
+    return builder.CreateInBoundsGEP(
+            T_pjlvalue, emit_bitcast_with_builder(builder, pgcstack, T_ppjlvalue),
+            ConstantInt::get(T_size, -(pgcstack_offset / sizeof(void *))),
+            "current_task");
+}
+
+// Get PTLS through current task.
+static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *current_task, llvm::MDNode *tbaa)
+{
+    using namespace llvm;
+    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
+    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
+    const int ptls_offset = offsetof(jl_task_t, ptls);
+    llvm::Value *pptls = builder.CreateInBoundsGEP(
+            T_pjlvalue, current_task,
+            ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
+            "ptls_field");
+    LoadInst *ptls_load = builder.CreateAlignedLoad(T_pjlvalue,
+            emit_bitcast_with_builder(builder, pptls, T_ppjlvalue), Align(sizeof(void *)), "ptls_load");
+    // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c.
+    tbaa_decorate(tbaa, ptls_load);
+    return builder.CreateBitCast(ptls_load, T_ppjlvalue, "ptls");
+}
+
+// Get signal page through current task.
+static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa)
+{
+    using namespace llvm;
+    // return builder.CreateCall(prepare_call(reuse_signal_page_func));
+    auto T_psize = T_size->getPointerTo();
+    auto T_ppsize = T_psize->getPointerTo();
+    int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
+    ptls = emit_bitcast_with_builder(builder, ptls, T_ppsize);
+    llvm::Value *psafepoint = builder.CreateInBoundsGEP(
+            T_psize, ptls, ConstantInt::get(T_size, nthfield));
+    LoadInst *ptls_load = builder.CreateAlignedLoad(
+            T_psize, psafepoint, Align(sizeof(void *)), "safepoint");
+    tbaa_decorate(tbaa, ptls_load);
+    return ptls_load;
+}
+
+static inline void emit_signal_fence(llvm::IRBuilder<> &builder)
+{
+    using namespace llvm;
+    builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SyncScope::SingleThread);
+}
+
+static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa, bool final = false)
+{
+    using namespace llvm;
+    llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, T_size, ptls, tbaa);
+    emit_signal_fence(builder);
+    Module *M = builder.GetInsertBlock()->getModule();
+    LLVMContext &C = builder.getContext();
+    // inline jlsafepoint_func->realize(M)
+    if (final) {
+        builder.CreateLoad(T_size, signal_page, true);
+    }
+    else {
+        Function *F = M->getFunction("julia.safepoint");
+        if (!F) {
+            auto T_psize = T_size->getPointerTo();
+            FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_psize}, false);
+            F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M);
+#if JL_LLVM_VERSION >= 160000
+            F->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
+#else
+            F->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+#endif
+        }
+        builder.CreateCall(F, {signal_page});
+    }
+    emit_signal_fence(builder);
+}
+
+static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state, bool final)
+{
+    using namespace llvm;
+    Type *T_int8 = state->getType();
+    llvm::Value *ptls_i8 = emit_bitcast_with_builder(builder, ptls, builder.getInt8PtrTy());
+    Constant *offset = ConstantInt::getSigned(builder.getInt32Ty(), offsetof(jl_tls_states_t, gc_state));
+    Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls_i8, ArrayRef<Value*>(offset), "gc_state");
+    if (old_state == nullptr) {
+        old_state = builder.CreateLoad(T_int8, gc_state);
+        cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic);
+    }
+    builder.CreateAlignedStore(state, gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
+    if (auto *C = dyn_cast<ConstantInt>(old_state))
+        if (C->isZero())
+            return old_state;
+    if (auto *C = dyn_cast<ConstantInt>(state))
+        if (!C->isZero())
+            return old_state;
+    BasicBlock *passBB = BasicBlock::Create(builder.getContext(), "safepoint", builder.GetInsertBlock()->getParent());
+    BasicBlock *exitBB = BasicBlock::Create(builder.getContext(), "after_safepoint", builder.GetInsertBlock()->getParent());
+    Constant *zero8 = ConstantInt::get(T_int8, 0);
+    builder.CreateCondBr(builder.CreateOr(builder.CreateICmpEQ(old_state, zero8), // if (!old_state || !state)
+                                          builder.CreateICmpEQ(state, zero8)),
+                         passBB, exitBB);
+    builder.SetInsertPoint(passBB);
+    MDNode *tbaa = get_tbaa_const(builder.getContext());
+    emit_gc_safepoint(builder, T_size, ptls, tbaa, final);
+    builder.CreateBr(exitBB);
+    builder.SetInsertPoint(exitBB);
+    return old_state;
+}
+
+static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final)
+{
+    using namespace llvm;
+    Value *state = builder.getInt8(0);
+    return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final);
+}
+
+static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final)
+{
+    using namespace llvm;
+    Value *old_state = builder.getInt8(0);
+    return emit_gc_state_set(builder, T_size, ptls, state, old_state, final);
+}
+
+static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final)
+{
+    using namespace llvm;
+    Value *state = builder.getInt8(JL_GC_STATE_SAFE);
+    return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final);
+}
+
+static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final)
+{
+    using namespace llvm;
+    Value *old_state = builder.getInt8(JL_GC_STATE_SAFE);
+    return emit_gc_state_set(builder, T_size, ptls, state, old_state, final);
+}
+
+// Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14.
+//
+// Once we no longer support LLVM < 14, these can be mechanically removed by
+// translating foo(Bar, …) into Bar->foo(…) resp. Bar.foo(…).
+namespace {
+using namespace llvm;
+
+inline void addFnAttr(CallInst *Target, Attribute::AttrKind Attr)
+{
+    Target->addFnAttr(Attr);
+}
+
+template<class T, class A>
+inline void addRetAttr(T *Target, A Attr)
+{
+    Target->addRetAttr(Attr);
+}
+
+inline void addAttributeAtIndex(Function *F, unsigned Index, Attribute Attr)
+{
+    F->addAttributeAtIndex(Index, Attr);
+}
+
+inline AttributeSet getFnAttrs(const AttributeList &Attrs)
+{
+    return Attrs.getFnAttrs();
+}
+
+inline AttributeSet getRetAttrs(const AttributeList &Attrs)
+{
+    return Attrs.getRetAttrs();
+}
+
+inline bool hasFnAttr(const AttributeList &L, Attribute::AttrKind Kind)
+{
+    return L.hasFnAttr(Kind);
+}
+
+inline AttributeList addAttributeAtIndex(const AttributeList &L, LLVMContext &C,
+                                         unsigned Index, Attribute::AttrKind Kind)
+{
+    return L.addAttributeAtIndex(C, Index, Kind);
+}
+
+inline AttributeList addAttributeAtIndex(const AttributeList &L, LLVMContext &C,
+                                         unsigned Index, Attribute Attr)
+{
+    return L.addAttributeAtIndex(C, Index, Attr);
+}
+
+inline AttributeList addAttributesAtIndex(const AttributeList &L, LLVMContext &C,
+                                          unsigned Index, const AttrBuilder &Builder)
+{
+    return L.addAttributesAtIndex(C, Index, Builder);
+}
+
+inline AttributeList addFnAttribute(const AttributeList &L, LLVMContext &C,
+                                    Attribute::AttrKind Kind)
+{
+    return L.addFnAttribute(C, Kind);
+}
+
+inline AttributeList addRetAttribute(const AttributeList &L, LLVMContext &C,
+                                     Attribute::AttrKind Kind)
+{
+    return L.addRetAttribute(C, Kind);
+}
+
+inline bool hasAttributesAtIndex(const AttributeList &L, unsigned Index)
+{
+    return L.hasAttributesAtIndex(Index);
+}
+
+inline Attribute getAttributeAtIndex(const AttributeList &L, unsigned Index, Attribute::AttrKind Kind)
+{
+    return L.getAttributeAtIndex(Index, Kind);
+}
+
+// Iterate through uses of a particular type.
+// Recursively scan through `ConstantExpr` and `ConstantAggregate` use.
+template<typename U>
+struct ConstantUses {
+    template<typename T>
+    struct Info {
+        llvm::Use *use;
+        T *val;
+        // If `samebits == true`, the offset the original value appears in the constant.
+        size_t offset;
+        // This specify whether the original value appears in the current value in exactly
+        // the same bit pattern (with possibly an offset determined by `offset`).
+        bool samebits;
+        Info(llvm::Use *use, T *val, size_t offset, bool samebits) :
+            use(use),
+            val(val),
+            offset(offset),
+            samebits(samebits)
+        {
+        }
+        Info(llvm::Use *use, size_t offset, bool samebits) :
+            use(use),
+            val(cast<T>(use->getUser())),
+            offset(offset),
+            samebits(samebits)
+        {
+        }
+    };
+    using UseInfo = Info<U>;
+    struct Frame : Info<llvm::Constant> {
+        template<typename... Args>
+        Frame(Args &&... args) :
+            Info<llvm::Constant>(std::forward<Args>(args)...),
+            cur(this->val->use_empty() ? nullptr : &*this->val->use_begin()),
+            _next(cur ? cur->getNext() : nullptr)
+        {
+        }
+    private:
+        void next()
+        {
+            cur = _next;
+            if (!cur)
+                return;
+            _next = cur->getNext();
+        }
+        llvm::Use *cur;
+        llvm::Use *_next;
+        friend struct ConstantUses;
+    };
+    ConstantUses(llvm::Constant *c, llvm::Module &M)
+        : stack{Frame(nullptr, c, 0u, true)},
+          M(M)
+    {
+        forward();
+    }
+    UseInfo get_info() const
+    {
+        auto &top = stack.back();
+        return UseInfo(top.cur, top.offset, top.samebits);
+    }
+    const auto &get_stack() const
+    {
+        return stack;
+    }
+    void next()
+    {
+        stack.back().next();
+        forward();
+    }
+    bool done()
+    {
+        return stack.empty();
+    }
+private:
+    void forward();
+    llvm::SmallVector<Frame, 4> stack;
+    llvm::Module &M;
+};
+
+template<typename U>
+void ConstantUses<U>::forward()
+{
+    assert(!stack.empty());
+    auto frame = &stack.back();
+    const auto &DL = M.getDataLayout();
+    auto pop = [&] {
+        stack.pop_back();
+        if (stack.empty()) {
+            return false;
+        }
+        frame = &stack.back();
+        return true;
+    };
+    auto push = [&] (llvm::Use *use, llvm::Constant *c, size_t offset, bool samebits) {
+        stack.emplace_back(use, c, offset, samebits);
+        frame = &stack.back();
+    };
+    auto handle_constaggr = [&] (llvm::Use *use, llvm::ConstantAggregate *aggr) {
+        if (!frame->samebits) {
+            push(use, aggr, 0, false);
+            return;
+        }
+        if (auto strct = dyn_cast<llvm::ConstantStruct>(aggr)) {
+            auto layout = DL.getStructLayout(strct->getType());
+            push(use, strct, frame->offset + layout->getElementOffset(use->getOperandNo()), true);
+        }
+        else if (auto ary = dyn_cast<llvm::ConstantArray>(aggr)) {
+            auto elty = ary->getType()->getElementType();
+            push(use, ary, frame->offset + DL.getTypeAllocSize(elty) * use->getOperandNo(), true);
+        }
+        else if (auto vec = dyn_cast<llvm::ConstantVector>(aggr)) {
+            auto elty = vec->getType()->getElementType();
+            push(use, vec, frame->offset + DL.getTypeAllocSize(elty) * use->getOperandNo(), true);
+        }
+        else {
+            abort();
+        }
+    };
+    auto handle_constexpr = [&] (llvm::Use *use, llvm::ConstantExpr *expr) {
+        if (!frame->samebits) {
+            push(use, expr, 0, false);
+            return;
+        }
+        auto opcode = expr->getOpcode();
+        if (opcode == llvm::Instruction::PtrToInt || opcode == llvm::Instruction::IntToPtr ||
+            opcode == llvm::Instruction::AddrSpaceCast || opcode == llvm::Instruction::BitCast) {
+            push(use, expr, frame->offset, true);
+        }
+        else {
+            push(use, expr, 0, false);
+        }
+    };
+    while (true) {
+        auto use = frame->cur;
+        if (!use) {
+            if (!pop())
+                return;
+            continue;
+        }
+        auto user = use->getUser();
+        if (isa<U>(user))
+            return;
+        frame->next();
+        if (auto aggr = dyn_cast<llvm::ConstantAggregate>(user)) {
+            handle_constaggr(use, aggr);
+        }
+        else if (auto expr = dyn_cast<llvm::ConstantExpr>(user)) {
+            handle_constexpr(use, expr);
+        }
+    }
+}
+}
diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp
index 75ac96e1b3060..2539c5cd2e37c 100644
--- a/src/llvm-cpufeatures.cpp
+++ b/src/llvm-cpufeatures.cpp
@@ -21,12 +21,10 @@
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/PassManager.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Target/TargetMachine.h>
 #include <llvm/Support/Debug.h>
 
-#include "julia.h"
 #include "jitlayers.h"
 
 #define DEBUG_TYPE "cpufeatures"
@@ -39,22 +37,20 @@ STATISTIC(LoweredWithoutFMA, "Number of have_fma's that were lowered to false");
 extern JuliaOJIT *jl_ExecutionEngine;
 
 // whether this platform unconditionally (i.e. without needing multiversioning) supports FMA
-Optional<bool> always_have_fma(Function &intr) {
-    auto intr_name = intr.getName();
-    auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
-
-#if defined(_CPU_AARCH64_)
-    return typ == "f32" || typ == "f64";
-#else
-    (void)typ;
-    return {};
-#endif
+Optional<bool> always_have_fma(Function &intr, const Triple &TT) JL_NOTSAFEPOINT {
+    if (TT.isAArch64()) {
+        auto intr_name = intr.getName();
+        auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
+        return typ == "f32" || typ == "f64";
+    } else {
+        return None;
+    }
 }
 
-bool have_fma(Function &intr, Function &caller) {
-    auto unconditional = always_have_fma(intr);
-    if (unconditional.hasValue())
-        return unconditional.getValue();
+static bool have_fma(Function &intr, Function &caller, const Triple &TT) JL_NOTSAFEPOINT {
+    auto unconditional = always_have_fma(intr, TT);
+    if (unconditional)
+        return *unconditional;
 
     auto intr_name = intr.getName();
     auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
@@ -63,24 +59,24 @@ bool have_fma(Function &intr, Function &caller) {
     StringRef FS =
         FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString();
 
-    SmallVector<StringRef, 6> Features;
+    SmallVector<StringRef, 128> Features;
     FS.split(Features, ',');
     for (StringRef Feature : Features)
-#if defined _CPU_ARM_
+    if (TT.isARM()) {
       if (Feature == "+vfp4")
         return typ == "f32" || typ == "f64";
       else if (Feature == "+vfp4sp")
         return typ == "f32";
-#else
+    } else if (TT.isX86()) {
       if (Feature == "+fma" || Feature == "+fma4")
         return typ == "f32" || typ == "f64";
-#endif
+    }
 
     return false;
 }
 
-void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) {
-    if (have_fma(intr, caller)) {
+void lowerHaveFMA(Function &intr, Function &caller, const Triple &TT, CallInst *I) JL_NOTSAFEPOINT {
+    if (have_fma(intr, caller, TT)) {
         ++LoweredWithFMA;
         I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1));
     } else {
@@ -90,8 +86,9 @@ void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) {
     return;
 }
 
-bool lowerCPUFeatures(Module &M)
+bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
 {
+    auto TT = Triple(M.getTargetTriple());
     SmallVector<Instruction*,6> Materialized;
 
     for (auto &F: M.functions()) {
@@ -101,7 +98,7 @@ bool lowerCPUFeatures(Module &M)
             for (Use &U: F.uses()) {
                 User *RU = U.getUser();
                 CallInst *I = cast<CallInst>(RU);
-                lowerHaveFMA(F, *I->getParent()->getParent(), I);
+                lowerHaveFMA(F, *I->getParent()->getParent(), TT, I);
                 Materialized.push_back(I);
             }
         }
@@ -111,46 +108,19 @@ bool lowerCPUFeatures(Module &M)
         for (auto I: Materialized) {
             I->eraseFromParent();
         }
-        assert(!verifyModule(M));
+#ifdef JL_VERIFY_PASSES
+        assert(!verifyLLVMIR(M));
+#endif
         return true;
     } else {
         return false;
     }
 }
 
-PreservedAnalyses CPUFeatures::run(Module &M, ModuleAnalysisManager &AM)
+PreservedAnalyses CPUFeaturesPass::run(Module &M, ModuleAnalysisManager &AM)
 {
     if (lowerCPUFeatures(M)) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     }
     return PreservedAnalyses::all();
 }
-
-namespace {
-struct CPUFeaturesLegacy : public ModulePass {
-    static char ID;
-    CPUFeaturesLegacy() : ModulePass(ID) {};
-
-    bool runOnModule(Module &M)
-    {
-        return lowerCPUFeatures(M);
-    }
-};
-
-char CPUFeaturesLegacy::ID = 0;
-static RegisterPass<CPUFeaturesLegacy>
-        Y("CPUFeatures",
-          "Lower calls to CPU feature testing intrinsics.",
-          false,
-          false);
-}
-
-Pass *createCPUFeaturesPass()
-{
-    return new CPUFeaturesLegacy();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createCPUFeaturesPass());
-}
diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp
index 054ec46162160..5d0d9f5d37c40 100644
--- a/src/llvm-demote-float16.cpp
+++ b/src/llvm-demote-float16.cpp
@@ -1,8 +1,9 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// This pass finds floating-point operations on 16-bit (half precision) values, and replaces
-// them by equivalent operations on 32-bit (single precision) values surrounded by a fpext
-// and fptrunc. This ensures that the exact semantics of IEEE floating-point are preserved.
+// This pass finds floating-point operations on 16-bit values (half precision and bfloat),
+// and replaces them by equivalent operations on 32-bit (single precision) values surrounded
+// by a fpext and fptrunc. This ensures that the exact semantics of IEEE floating-point are
+// preserved.
 //
 // Without this pass, back-ends that do not natively support half-precision (e.g. x86_64)
 // similarly pattern-match half-precision operations with single-precision equivalents, but
@@ -20,11 +21,12 @@
 #include <llvm/Pass.h>
 #include <llvm/ADT/Statistic.h>
 #include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Support/Debug.h>
+#include "julia.h"
+#include "jitlayers.h"
 
 #define DEBUG_TYPE "demote_float16"
 
@@ -43,17 +45,69 @@ INST_STATISTIC(FRem);
 INST_STATISTIC(FCmp);
 #undef INST_STATISTIC
 
+extern JuliaOJIT *jl_ExecutionEngine;
+
 namespace {
 
+static bool have_fp16(Function &caller, const Triple &TT) {
+    Attribute FSAttr = caller.getFnAttribute("target-features");
+    StringRef FS = "";
+    if (FSAttr.isValid())
+        FS = FSAttr.getValueAsString();
+    else if (jl_ExecutionEngine)
+        FS = jl_ExecutionEngine->getTargetFeatureString();
+    // else probably called from opt, just do nothing
+    if (TT.isAArch64()) {
+        if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){
+            return true;
+        }
+    } else if (TT.getArch() == Triple::x86_64) {
+        if (FS.find("+avx512fp16") != llvm::StringRef::npos){
+            return true;
+        }
+    }
+    if (caller.hasFnAttribute("julia.hasfp16")) {
+        return true;
+    }
+    return false;
+}
+
+static bool have_bf16(Function &caller, const Triple &TT) {
+    if (caller.hasFnAttribute("julia.hasbf16")) {
+        return true;
+    }
+
+    // there's no targets that fully support bfloat yet;,
+    // AVX512BF16 only provides conversion and dot product instructions.
+    return false;
+}
+
 static bool demoteFloat16(Function &F)
 {
+    auto TT = Triple(F.getParent()->getTargetTriple());
+    auto has_fp16 = have_fp16(F, TT);
+    auto has_bf16 = have_bf16(F, TT);
+    if (has_fp16 && has_bf16)
+        return false;
+
     auto &ctx = F.getContext();
-    auto T_float16 = Type::getHalfTy(ctx);
     auto T_float32 = Type::getFloatTy(ctx);
-
     SmallVector<Instruction *, 0> erase;
     for (auto &BB : F) {
         for (auto &I : BB) {
+            // check whether there's any 16-bit floating point operands to extend
+            bool Float16 = I.getType()->getScalarType()->isHalfTy();
+            bool BFloat16 = I.getType()->getScalarType()->isBFloatTy();
+            for (size_t i = 0; !BFloat16 && !Float16 && i < I.getNumOperands(); i++) {
+                Value *Op = I.getOperand(i);
+                if (!has_fp16 && Op->getType()->getScalarType()->isHalfTy())
+                    Float16 = true;
+                else if (!has_bf16 && Op->getType()->getScalarType()->isBFloatTy())
+                    BFloat16 = true;
+            }
+            if (!Float16 && !BFloat16)
+                continue;
+
             switch (I.getOpcode()) {
             case Instruction::FNeg:
             case Instruction::FAdd:
@@ -64,6 +118,7 @@ static bool demoteFloat16(Function &F)
             case Instruction::FCmp:
                 break;
             default:
+                // TODO: Do calls to llvm.fma.f16 may need to go to f64 to be correct?
                 continue;
             }
 
@@ -74,80 +129,83 @@ static bool demoteFloat16(Function &F)
 
             IRBuilder<> builder(&I);
 
-            // extend Float16 operands to Float32
-            bool OperandsChanged = false;
+            // extend 16-bit floating point operands
             SmallVector<Value *, 2> Operands(I.getNumOperands());
             for (size_t i = 0; i < I.getNumOperands(); i++) {
                 Value *Op = I.getOperand(i);
-                if (Op->getType() == T_float16) {
+                if (!has_fp16 && Op->getType()->getScalarType()->isHalfTy()) {
+                    // extend Float16 to Float32
                     ++TotalExt;
-                    Op = builder.CreateFPExt(Op, T_float32);
-                    OperandsChanged = true;
+                    Op = builder.CreateFPExt(Op, Op->getType()->getWithNewType(T_float32));
+                } else if (!has_bf16 && Op->getType()->getScalarType()->isBFloatTy()) {
+                    // extend BFloat16 to Float32
+                    ++TotalExt;
+                    Op = builder.CreateFPExt(Op, Op->getType()->getWithNewType(T_float32));
                 }
-                Operands[i] = (Op);
+                Operands[i] = Op;
             }
 
             // recreate the instruction if any operands changed,
-            // truncating the result back to Float16
-            if (OperandsChanged) {
-                Value *NewI;
-                ++TotalChanged;
-                switch (I.getOpcode()) {
-                case Instruction::FNeg:
-                    assert(Operands.size() == 1);
-                    ++FNegChanged;
-                    NewI = builder.CreateFNeg(Operands[0]);
-                    break;
-                case Instruction::FAdd:
-                    assert(Operands.size() == 2);
-                    ++FAddChanged;
-                    NewI = builder.CreateFAdd(Operands[0], Operands[1]);
-                    break;
-                case Instruction::FSub:
-                    assert(Operands.size() == 2);
-                    ++FSubChanged;
-                    NewI = builder.CreateFSub(Operands[0], Operands[1]);
-                    break;
-                case Instruction::FMul:
-                    assert(Operands.size() == 2);
-                    ++FMulChanged;
-                    NewI = builder.CreateFMul(Operands[0], Operands[1]);
-                    break;
-                case Instruction::FDiv:
-                    assert(Operands.size() == 2);
-                    ++FDivChanged;
-                    NewI = builder.CreateFDiv(Operands[0], Operands[1]);
-                    break;
-                case Instruction::FRem:
-                    assert(Operands.size() == 2);
-                    ++FRemChanged;
-                    NewI = builder.CreateFRem(Operands[0], Operands[1]);
-                    break;
-                case Instruction::FCmp:
-                    assert(Operands.size() == 2);
-                    ++FCmpChanged;
-                    NewI = builder.CreateFCmp(cast<FCmpInst>(&I)->getPredicate(),
-                                              Operands[0], Operands[1]);
-                    break;
-                default:
-                    abort();
-                }
-                cast<Instruction>(NewI)->copyMetadata(I);
-                cast<Instruction>(NewI)->copyFastMathFlags(&I);
-                if (NewI->getType() != I.getType()) {
-                    ++TotalTrunc;
-                    NewI = builder.CreateFPTrunc(NewI, I.getType());
-                }
-                I.replaceAllUsesWith(NewI);
-                erase.push_back(&I);
+            // truncating the result back to the original type
+            Value *NewI;
+            ++TotalChanged;
+            switch (I.getOpcode()) {
+            case Instruction::FNeg:
+                assert(Operands.size() == 1);
+                ++FNegChanged;
+                NewI = builder.CreateFNeg(Operands[0]);
+                break;
+            case Instruction::FAdd:
+                assert(Operands.size() == 2);
+                ++FAddChanged;
+                NewI = builder.CreateFAdd(Operands[0], Operands[1]);
+                break;
+            case Instruction::FSub:
+                assert(Operands.size() == 2);
+                ++FSubChanged;
+                NewI = builder.CreateFSub(Operands[0], Operands[1]);
+                break;
+            case Instruction::FMul:
+                assert(Operands.size() == 2);
+                ++FMulChanged;
+                NewI = builder.CreateFMul(Operands[0], Operands[1]);
+                break;
+            case Instruction::FDiv:
+                assert(Operands.size() == 2);
+                ++FDivChanged;
+                NewI = builder.CreateFDiv(Operands[0], Operands[1]);
+                break;
+            case Instruction::FRem:
+                assert(Operands.size() == 2);
+                ++FRemChanged;
+                NewI = builder.CreateFRem(Operands[0], Operands[1]);
+                break;
+            case Instruction::FCmp:
+                assert(Operands.size() == 2);
+                ++FCmpChanged;
+                NewI = builder.CreateFCmp(cast<FCmpInst>(&I)->getPredicate(),
+                                          Operands[0], Operands[1]);
+                break;
+            default:
+                abort();
             }
+            cast<Instruction>(NewI)->copyMetadata(I);
+            cast<Instruction>(NewI)->copyFastMathFlags(&I);
+            if (NewI->getType() != I.getType()) {
+                ++TotalTrunc;
+                NewI = builder.CreateFPTrunc(NewI, I.getType());
+            }
+            I.replaceAllUsesWith(NewI);
+            erase.push_back(&I);
         }
     }
 
     if (erase.size() > 0) {
         for (auto V : erase)
             V->eraseFromParent();
-        assert(!verifyFunction(F, &errs()));
+#ifdef JL_VERIFY_PASSES
+        assert(!verifyLLVMIR(F));
+#endif
         return true;
     }
     else
@@ -156,40 +214,10 @@ static bool demoteFloat16(Function &F)
 
 } // end anonymous namespace
 
-PreservedAnalyses DemoteFloat16::run(Function &F, FunctionAnalysisManager &AM)
+PreservedAnalyses DemoteFloat16Pass::run(Function &F, FunctionAnalysisManager &AM)
 {
     if (demoteFloat16(F)) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     }
     return PreservedAnalyses::all();
 }
-
-namespace {
-
-struct DemoteFloat16Legacy : public FunctionPass {
-    static char ID;
-    DemoteFloat16Legacy() : FunctionPass(ID){};
-
-private:
-    bool runOnFunction(Function &F) override {
-        return demoteFloat16(F);
-    }
-};
-
-char DemoteFloat16Legacy::ID = 0;
-static RegisterPass<DemoteFloat16Legacy>
-        Y("DemoteFloat16",
-          "Demote Float16 operations to Float32 equivalents.",
-          false,
-          false);
-} // end anonymous namespac
-
-Pass *createDemoteFloat16Pass()
-{
-    return new DemoteFloat16Legacy();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createDemoteFloat16Pass());
-}
diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp
index b542d478fc68c..5a53ce4d8e510 100644
--- a/src/llvm-final-gc-lowering.cpp
+++ b/src/llvm-final-gc-lowering.cpp
@@ -3,21 +3,29 @@
 #include "llvm-version.h"
 #include "passes.h"
 
-#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/ADT/Statistic.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/IntrinsicInst.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Verifier.h>
 #include <llvm/Pass.h>
 #include <llvm/Support/Debug.h>
 #include <llvm/Transforms/Utils/ModuleUtils.h>
 
-#include "codegen_shared.h"
+#include "llvm-codegen-shared.h"
 #include "julia.h"
 #include "julia_internal.h"
 #include "llvm-pass-helpers.h"
 
 #define DEBUG_TYPE "final_gc_lowering"
+STATISTIC(NewGCFrameCount, "Number of lowered newGCFrameFunc intrinsics");
+STATISTIC(PushGCFrameCount, "Number of lowered pushGCFrameFunc intrinsics");
+STATISTIC(PopGCFrameCount, "Number of lowered popGCFrameFunc intrinsics");
+STATISTIC(GetGCFrameSlotCount, "Number of lowered getGCFrameSlotFunc intrinsics");
+STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics");
+STATISTIC(QueueGCRootCount, "Number of lowered queueGCRootFunc intrinsics");
+STATISTIC(SafepointCount, "Number of lowered safepoint intrinsics");
 
 using namespace llvm;
 
@@ -32,18 +40,17 @@ using namespace llvm;
 
 struct FinalLowerGC: private JuliaPassContext {
     bool runOnFunction(Function &F);
-    bool doInitialization(Module &M);
-    bool doFinalization(Module &M);
 
 private:
     Function *queueRootFunc;
-    Function *queueBindingFunc;
     Function *poolAllocFunc;
     Function *bigAllocFunc;
+    Function *allocTypedFunc;
     Instruction *pgcstack;
+    Type *T_size;
 
     // Lowers a `julia.new_gc_frame` intrinsic.
-    Value *lowerNewGCFrame(CallInst *target, Function &F);
+    void lowerNewGCFrame(CallInst *target, Function &F);
 
     // Lowers a `julia.push_gc_frame` intrinsic.
     void lowerPushGCFrame(CallInst *target, Function &F);
@@ -52,89 +59,80 @@ struct FinalLowerGC: private JuliaPassContext {
     void lowerPopGCFrame(CallInst *target, Function &F);
 
     // Lowers a `julia.get_gc_frame_slot` intrinsic.
-    Value *lowerGetGCFrameSlot(CallInst *target, Function &F);
+    void lowerGetGCFrameSlot(CallInst *target, Function &F);
 
     // Lowers a `julia.gc_alloc_bytes` intrinsic.
-    Value *lowerGCAllocBytes(CallInst *target, Function &F);
+    void lowerGCAllocBytes(CallInst *target, Function &F);
 
     // Lowers a `julia.queue_gc_root` intrinsic.
-    Value *lowerQueueGCRoot(CallInst *target, Function &F);
+    void lowerQueueGCRoot(CallInst *target, Function &F);
 
-    // Lowers a `julia.queue_gc_binding` intrinsic.
-    Value *lowerQueueGCBinding(CallInst *target, Function &F);
+    // Lowers a `julia.safepoint` intrinsic.
+    void lowerSafepoint(CallInst *target, Function &F);
 };
 
-Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
+void FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
 {
+    ++NewGCFrameCount;
     assert(target->arg_size() == 1);
     unsigned nRoots = cast<ConstantInt>(target->getArgOperand(0))->getLimitedValue(INT_MAX);
 
     // Create the GC frame.
-    AllocaInst *gcframe = new AllocaInst(
-        T_prjlvalue,
-        0,
-        ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2),
-        Align(16));
-    gcframe->insertAfter(target);
+    IRBuilder<> builder(target);
+    auto gcframe_alloca = builder.CreateAlloca(T_prjlvalue, ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2));
+    gcframe_alloca->setAlignment(Align(16));
+    // addrspacecast as needed for non-0 alloca addrspace
+    auto gcframe = cast<Instruction>(builder.CreateAddrSpaceCast(gcframe_alloca, T_prjlvalue->getPointerTo(0)));
     gcframe->takeName(target);
 
     // Zero out the GC frame.
-    BitCastInst *tempSlot_i8 = new BitCastInst(gcframe, Type::getInt8PtrTy(F.getContext()), "");
-    tempSlot_i8->insertAfter(gcframe);
-    Type *argsT[2] = {tempSlot_i8->getType(), Type::getInt32Ty(F.getContext())};
-    Function *memset = Intrinsic::getDeclaration(F.getParent(), Intrinsic::memset, makeArrayRef(argsT));
-    Value *args[4] = {
-        tempSlot_i8, // dest
-        ConstantInt::get(Type::getInt8Ty(F.getContext()), 0), // val
-        ConstantInt::get(Type::getInt32Ty(F.getContext()), sizeof(jl_value_t*) * (nRoots + 2)), // len
-        ConstantInt::get(Type::getInt1Ty(F.getContext()), 0)}; // volatile
-    CallInst *zeroing = CallInst::Create(memset, makeArrayRef(args));
-    cast<MemSetInst>(zeroing)->setDestAlignment(16);
-    zeroing->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
-    zeroing->insertAfter(tempSlot_i8);
-
-    return gcframe;
+    auto ptrsize = F.getParent()->getDataLayout().getPointerSize();
+    builder.CreateMemSet(gcframe, Constant::getNullValue(Type::getInt8Ty(F.getContext())), ptrsize * (nRoots + 2), Align(16), tbaa_gcframe);
+
+    target->replaceAllUsesWith(gcframe);
+    target->eraseFromParent();
 }
 
 void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
 {
+    ++PushGCFrameCount;
     assert(target->arg_size() == 2);
     auto gcframe = target->getArgOperand(0);
     unsigned nRoots = cast<ConstantInt>(target->getArgOperand(1))->getLimitedValue(INT_MAX);
 
-    IRBuilder<> builder(target->getContext());
-    builder.SetInsertPoint(&*(++BasicBlock::iterator(target)));
+    IRBuilder<> builder(target);
     StoreInst *inst = builder.CreateAlignedStore(
-                ConstantInt::get(getSizeTy(F.getContext()), JL_GC_ENCODE_PUSHARGS(nRoots)),
+                ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)),
                 builder.CreateBitCast(
-                        builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0),
-                        getSizeTy(F.getContext())->getPointerTo()),
+                        builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0, "frame.nroots"),
+                        T_size->getPointerTo(), "frame.nroots"), // GEP of 0 becomes a noop and eats the name
                 Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(F.getContext());
     inst = builder.CreateAlignedStore(
-            builder.CreateAlignedLoad(T_ppjlvalue, pgcstack, Align(sizeof(void*))),
+            builder.CreateAlignedLoad(T_ppjlvalue, pgcstack, Align(sizeof(void*)), "task.gcstack"),
             builder.CreatePointerCast(
-                    builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1),
+                    builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1, "frame.prev"),
                     PointerType::get(T_ppjlvalue, 0)),
             Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
-    inst = builder.CreateAlignedStore(
+    builder.CreateAlignedStore(
             gcframe,
             builder.CreateBitCast(pgcstack, PointerType::get(PointerType::get(T_prjlvalue, 0), 0)),
             Align(sizeof(void*)));
+    target->eraseFromParent();
 }
 
 void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
 {
+    ++PopGCFrameCount;
     assert(target->arg_size() == 1);
     auto gcframe = target->getArgOperand(0);
 
-    IRBuilder<> builder(target->getContext());
-    builder.SetInsertPoint(target);
+    IRBuilder<> builder(target);
     Instruction *gcpop =
         cast<Instruction>(builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1));
-    Instruction *inst = builder.CreateAlignedLoad(T_prjlvalue, gcpop, Align(sizeof(void*)));
+    Instruction *inst = builder.CreateAlignedLoad(T_prjlvalue, gcpop, Align(sizeof(void*)), "frame.prev");
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     inst = builder.CreateAlignedStore(
         inst,
@@ -142,17 +140,18 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
             PointerType::get(T_prjlvalue, 0)),
         Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
+    target->eraseFromParent();
 }
 
-Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
+void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
 {
+    ++GetGCFrameSlotCount;
     assert(target->arg_size() == 2);
     auto gcframe = target->getArgOperand(0);
     auto index = target->getArgOperand(1);
 
     // Initialize an IR builder.
-    IRBuilder<> builder(target->getContext());
-    builder.SetInsertPoint(target);
+    IRBuilder<> builder(target);
 
     // The first two slots are reserved, so we'll add two to the index.
     index = builder.CreateAdd(index, ConstantInt::get(Type::getInt32Ty(F.getContext()), 2));
@@ -160,242 +159,133 @@ Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
     // Lower the intrinsic as a GEP.
     auto gep = builder.CreateInBoundsGEP(T_prjlvalue, gcframe, index);
     gep->takeName(target);
-    return gep;
+    target->replaceAllUsesWith(gep);
+    target->eraseFromParent();
 }
 
-Value *FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
+void FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
 {
+    ++QueueGCRootCount;
     assert(target->arg_size() == 1);
     target->setCalledFunction(queueRootFunc);
-    return target;
 }
 
-Value *FinalLowerGC::lowerQueueGCBinding(CallInst *target, Function &F)
+void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
 {
+    ++SafepointCount;
     assert(target->arg_size() == 1);
-    target->setCalledFunction(queueBindingFunc);
-    return target;
+    IRBuilder<> builder(target);
+    Value* signal_page = target->getOperand(0);
+    builder.CreateLoad(T_size, signal_page, true);
+    target->eraseFromParent();
 }
 
-Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
+void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
 {
-    assert(target->arg_size() == 2);
-    auto sz = (size_t)cast<ConstantInt>(target->getArgOperand(1))->getZExtValue();
-    // This is strongly architecture and OS dependent
-    int osize;
-    int offset = jl_gc_classify_pools(sz, &osize);
+    ++GCAllocBytesCount;
+    assert(target->arg_size() == 3);
+    CallInst *newI;
+
     IRBuilder<> builder(target);
-    builder.SetCurrentDebugLocation(target->getDebugLoc());
     auto ptls = target->getArgOperand(0);
-    CallInst *newI;
-    if (offset < 0) {
-        newI = builder.CreateCall(
-            bigAllocFunc,
-            { ptls, ConstantInt::get(getSizeTy(F.getContext()), sz + sizeof(void*)) });
-    }
-    else {
-        auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset);
-        auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
-        newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize });
+    auto type = target->getArgOperand(2);
+    uint64_t derefBytes = 0;
+    if (auto CI = dyn_cast<ConstantInt>(target->getArgOperand(1))) {
+        size_t sz = (size_t)CI->getZExtValue();
+        // This is strongly architecture and OS dependent
+        int osize;
+        int offset = jl_gc_classify_pools(sz, &osize);
+        if (offset < 0) {
+            newI = builder.CreateCall(
+                bigAllocFunc,
+                { ptls, ConstantInt::get(T_size, sz + sizeof(void*)), type });
+            if (sz > 0)
+                derefBytes = sz;
+        }
+        else {
+            auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset);
+            auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
+            newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize, type });
+            if (sz > 0)
+                derefBytes = sz;
+        }
+    } else {
+        auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), T_size);
+        size = builder.CreateAdd(size, ConstantInt::get(T_size, sizeof(void*)));
+        newI = builder.CreateCall(allocTypedFunc, { ptls, size, type });
+        derefBytes = sizeof(void*);
     }
     newI->setAttributes(newI->getCalledFunction()->getAttributes());
+    unsigned align = std::max((unsigned)target->getRetAlign().valueOrOne().value(), (unsigned)sizeof(void*));
+    newI->addRetAttr(Attribute::getWithAlignment(F.getContext(), Align(align)));
+    if (derefBytes > 0)
+        newI->addDereferenceableRetAttr(derefBytes);
     newI->takeName(target);
-    return newI;
-}
-
-bool FinalLowerGC::doInitialization(Module &M) {
-    // Initialize platform-agnostic references.
-    initAll(M);
-
-    // Initialize platform-specific references.
-    queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
-    queueBindingFunc = getOrDeclare(jl_well_known::GCQueueBinding);
-    poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
-    bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
-
-    GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc};
-    unsigned j = 0;
-    for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) {
-        if (!functionList[i])
-            continue;
-        if (i != j)
-            functionList[j] = functionList[i];
-        j++;
-    }
-    if (j != 0)
-        appendToCompilerUsed(M, ArrayRef<GlobalValue*>(functionList, j));
-    return true;
-}
-
-bool FinalLowerGC::doFinalization(Module &M)
-{
-    GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc};
-    queueRootFunc = queueBindingFunc = poolAllocFunc = bigAllocFunc = nullptr;
-    auto used = M.getGlobalVariable("llvm.compiler.used");
-    if (!used)
-        return false;
-    SmallPtrSet<Constant*, 16> InitAsSet(
-        functionList,
-        functionList + sizeof(functionList) / sizeof(void*));
-    bool changed = false;
-    SmallVector<Constant*, 16> init;
-    ConstantArray *CA = cast<ConstantArray>(used->getInitializer());
-    for (auto &Op : CA->operands()) {
-        Constant *C = cast_or_null<Constant>(Op);
-        if (InitAsSet.count(C->stripPointerCasts())) {
-            changed = true;
-            continue;
-        }
-        init.push_back(C);
-    }
-    if (!changed)
-        return false;
-    used->eraseFromParent();
-    if (init.empty())
-        return true;
-    ArrayType *ATy = ArrayType::get(Type::getInt8PtrTy(M.getContext()), init.size());
-    used = new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
-                                    ConstantArray::get(ATy, init), "llvm.compiler.used");
-    used->setSection("llvm.metadata");
-    return true;
-}
-
-template<typename TIterator>
-static void replaceInstruction(
-    Instruction *oldInstruction,
-    Value *newInstruction,
-    TIterator &it)
-{
-    if (newInstruction != oldInstruction) {
-        oldInstruction->replaceAllUsesWith(newInstruction);
-        it = oldInstruction->eraseFromParent();
-    }
-    else {
-        ++it;
-    }
+    target->replaceAllUsesWith(newI);
+    target->eraseFromParent();
 }
 
 bool FinalLowerGC::runOnFunction(Function &F)
 {
-    LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
-    // Check availability of functions again since they might have been deleted.
-    initFunctions(*F.getParent());
-    if (!pgcstack_getter)
+    initAll(*F.getParent());
+    if (!pgcstack_getter && !adoptthread_func) {
+        LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n");
         return false;
+    }
 
     // Look for a call to 'julia.get_pgcstack'.
     pgcstack = getPGCstack(F);
-    if (!pgcstack)
+    if (!pgcstack) {
+        LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << " no pgcstack\n");
         return false;
-
-    // Acquire intrinsic functions.
-    auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame);
-    auto pushGCFrameFunc = getOrNull(jl_intrinsics::pushGCFrame);
-    auto popGCFrameFunc = getOrNull(jl_intrinsics::popGCFrame);
-    auto getGCFrameSlotFunc = getOrNull(jl_intrinsics::getGCFrameSlot);
-    auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes);
-    auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot);
-    auto queueGCBindingFunc = getOrNull(jl_intrinsics::queueGCBinding);
+    }
+    LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
+    queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
+    poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
+    bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
+    allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
+    T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
 
     // Lower all calls to supported intrinsics.
-    for (BasicBlock &BB : F) {
-        for (auto it = BB.begin(); it != BB.end();) {
-            auto *CI = dyn_cast<CallInst>(&*it);
-            if (!CI) {
-                ++it;
+    for (auto &BB : F) {
+        for (auto &I : make_early_inc_range(BB)) {
+            auto *CI = dyn_cast<CallInst>(&I);
+            if (!CI)
                 continue;
-            }
 
             Value *callee = CI->getCalledOperand();
-
-            if (callee == newGCFrameFunc) {
-                replaceInstruction(CI, lowerNewGCFrame(CI, F), it);
-            }
-            else if (callee == pushGCFrameFunc) {
-                lowerPushGCFrame(CI, F);
-                it = CI->eraseFromParent();
-            }
-            else if (callee == popGCFrameFunc) {
-                lowerPopGCFrame(CI, F);
-                it = CI->eraseFromParent();
-            }
-            else if (callee == getGCFrameSlotFunc) {
-                replaceInstruction(CI, lowerGetGCFrameSlot(CI, F), it);
-            }
-            else if (callee == GCAllocBytesFunc) {
-                replaceInstruction(CI, lowerGCAllocBytes(CI, F), it);
-            }
-            else if (callee == queueGCRootFunc) {
-                replaceInstruction(CI, lowerQueueGCRoot(CI, F), it);
-            }
-            else if (callee == queueGCBindingFunc) {
-                replaceInstruction(CI, lowerQueueGCBinding(CI, F), it);
-            }
-            else {
-                ++it;
-            }
+            assert(callee);
+
+#define LOWER_INTRINSIC(INTRINSIC, LOWER_INTRINSIC_FUNC) \
+            do { \
+                auto intrinsic = getOrNull(jl_intrinsics::INTRINSIC); \
+                if (intrinsic == callee) { \
+                    LOWER_INTRINSIC_FUNC(CI, F); \
+                } \
+            } while (0)
+
+            LOWER_INTRINSIC(newGCFrame, lowerNewGCFrame);
+            LOWER_INTRINSIC(pushGCFrame, lowerPushGCFrame);
+            LOWER_INTRINSIC(popGCFrame, lowerPopGCFrame);
+            LOWER_INTRINSIC(getGCFrameSlot, lowerGetGCFrameSlot);
+            LOWER_INTRINSIC(GCAllocBytes, lowerGCAllocBytes);
+            LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot);
+            LOWER_INTRINSIC(safepoint, lowerSafepoint);
+
+#undef LOWER_INTRINSIC
         }
     }
 
     return true;
 }
 
-struct FinalLowerGCLegacy: public FunctionPass {
-    static char ID;
-    FinalLowerGCLegacy() : FunctionPass(ID), finalLowerGC(FinalLowerGC()) {}
-
-protected:
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-        FunctionPass::getAnalysisUsage(AU);
-    }
-
-private:
-    bool runOnFunction(Function &F) override;
-    bool doInitialization(Module &M) override;
-    bool doFinalization(Module &M) override;
-
-    FinalLowerGC finalLowerGC;
-};
-
-bool FinalLowerGCLegacy::runOnFunction(Function &F) {
-    return finalLowerGC.runOnFunction(F);
-}
-
-bool FinalLowerGCLegacy::doInitialization(Module &M) {
-    return finalLowerGC.doInitialization(M);
-}
-
-bool FinalLowerGCLegacy::doFinalization(Module &M) {
-    return finalLowerGC.doFinalization(M);
-}
-
-
-PreservedAnalyses FinalLowerGCPass::run(Module &M, ModuleAnalysisManager &AM)
+PreservedAnalyses FinalLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
 {
-    auto finalLowerGC = FinalLowerGC();
-    bool modified = false;
-    modified |= finalLowerGC.doInitialization(M);
-    for (auto &F : M.functions()) {
-        if (F.isDeclaration())
-            continue;
-        modified |= finalLowerGC.runOnFunction(F);
-    }
-    modified |= finalLowerGC.doFinalization(M);
-    if (modified) {
+    if (FinalLowerGC().runOnFunction(F)) {
+#ifdef JL_VERIFY_PASSES
+        assert(!verifyLLVMIR(F));
+#endif
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     }
     return PreservedAnalyses::all();
 }
-
-char FinalLowerGCLegacy::ID = 0;
-static RegisterPass<FinalLowerGCLegacy> X("FinalLowerGC", "Final GC intrinsic lowering pass", false, false);
-
-Pass *createFinalLowerGCPass()
-{
-    return new FinalLowerGCLegacy();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createFinalLowerGCPass());
-}
diff --git a/src/llvm-gc-invariant-verifier.cpp b/src/llvm-gc-invariant-verifier.cpp
index f2dd821c9551b..8c8e2e4546d04 100644
--- a/src/llvm-gc-invariant-verifier.cpp
+++ b/src/llvm-gc-invariant-verifier.cpp
@@ -14,7 +14,6 @@
 #include <llvm/Analysis/CFG.h>
 #include <llvm/IR/Value.h>
 #include <llvm/IR/Constants.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Dominators.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
@@ -26,7 +25,7 @@
 #include <llvm/Pass.h>
 #include <llvm/Support/Debug.h>
 
-#include "codegen_shared.h"
+#include "llvm-codegen-shared.h"
 #include "julia.h"
 
 #define DEBUG_TYPE "verify_gc_invariants"
@@ -162,13 +161,17 @@ void GCInvariantVerifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 void GCInvariantVerifier::visitCallInst(CallInst &CI) {
     Function *Callee = CI.getCalledFunction();
     if (Callee && (Callee->getName() == "julia.call" ||
-                   Callee->getName() == "julia.call2")) {
-        bool First = true;
+                   Callee->getName() == "julia.call2" ||
+                   Callee->getName() == "julia.call3")) {
+        unsigned Fixed = CI.getFunctionType()->getNumParams();
         for (Value *Arg : CI.args()) {
+            if (Fixed) {
+                Fixed--;
+                continue;
+            }
             Type *Ty = Arg->getType();
-            Check(Ty->isPointerTy() && cast<PointerType>(Ty)->getAddressSpace() == (First ? 0 : AddressSpace::Tracked),
+            Check(Ty->isPointerTy() && cast<PointerType>(Ty)->getAddressSpace() == AddressSpace::Tracked,
                 "Invalid derived pointer in jlcall", &CI);
-            First = false;
         }
     }
 }
@@ -193,36 +196,3 @@ PreservedAnalyses GCInvariantVerifierPass::run(Function &F, FunctionAnalysisMana
     }
     return PreservedAnalyses::all();
 }
-
-struct GCInvariantVerifierLegacy : public FunctionPass {
-    static char ID;
-    bool Strong;
-    GCInvariantVerifierLegacy(bool Strong=false) : FunctionPass(ID), Strong(Strong) {}
-
-public:
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-        FunctionPass::getAnalysisUsage(AU);
-        AU.setPreservesAll();
-    }
-
-    bool runOnFunction(Function &F) override {
-        GCInvariantVerifier GIV(Strong);
-        GIV.visit(F);
-        if (GIV.Broken) {
-            abort();
-        }
-        return false;
-    }
-};
-
-char GCInvariantVerifierLegacy::ID = 0;
-static RegisterPass<GCInvariantVerifierLegacy> X("GCInvariantVerifier", "GC Invariant Verification Pass", false, false);
-
-Pass *createGCInvariantVerifierPass(bool Strong) {
-    return new GCInvariantVerifierLegacy(Strong);
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass_impl(LLVMPassManagerRef PM, LLVMBool Strong)
-{
-    unwrap(PM)->add(createGCInvariantVerifierPass(Strong));
-}
diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp
index ad941adf2155d..e76beaa3df44f 100644
--- a/src/llvm-julia-licm.cpp
+++ b/src/llvm-julia-licm.cpp
@@ -8,18 +8,18 @@
 #include <llvm/Analysis/LoopIterator.h>
 #include <llvm/Analysis/MemorySSA.h>
 #include <llvm/Analysis/MemorySSAUpdater.h>
+#include <llvm/Analysis/OptimizationRemarkEmitter.h>
 #include <llvm/Analysis/ValueTracking.h>
 #include <llvm/Analysis/ScalarEvolution.h>
 #include <llvm/ADT/Statistic.h>
 #include <llvm/IR/Dominators.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Transforms/Utils/LoopUtils.h>
 
 #include "llvm-pass-helpers.h"
 #include "julia.h"
 #include "llvm-alloc-helpers.h"
-#include "codegen_shared.h"
+#include "llvm-codegen-shared.h"
 
 #define DEBUG_TYPE "julia-licm"
 
@@ -38,6 +38,12 @@ STATISTIC(HoistedAllocation, "Number of allocations hoisted out of a loop");
  * loop context as well but it is inside a loop where they matter the most.
  */
 
+#ifndef __clang_gcanalyzer__
+#define REMARK(remark) ORE.emit(remark)
+#else
+#define REMARK(remark) (void) 0;
+#endif
+
 namespace {
 
 //Stolen and modified from LICM.cpp
@@ -51,13 +57,13 @@ static void eraseInstruction(Instruction &I,
 //Stolen and modified from LICM.cpp
 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
                                   MemorySSAUpdater &MSSAU,
-                                  ScalarEvolution *SE) {
+                                  ScalarEvolution *SE,
+                                  MemorySSA::InsertionPlace Place = MemorySSA::BeforeTerminator) {
   I.moveBefore(&Dest);
   if (MSSAU.getMemorySSA())
     if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
             MSSAU.getMemorySSA()->getMemoryAccess(&I)))
-      MSSAU.moveToPlace(OldMemAcc, Dest.getParent(),
-                         MemorySSA::BeforeTerminator);
+      MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), Place);
   if (SE)
     SE->forgetValue(&I);
 }
@@ -116,18 +122,6 @@ static bool makeLoopInvariant(Loop *L, Value *V, bool &Changed, Instruction *Ins
   return true; // All non-instructions are loop-invariant.
 }
 
-struct JuliaLICMPassLegacy : public LoopPass {
-    static char ID;
-    JuliaLICMPassLegacy() : LoopPass(ID) {};
-
-    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-
-    protected:
-        void getAnalysisUsage(AnalysisUsage &AU) const override {
-            getLoopAnalysisUsage(AU);
-        }
-};
-
 struct JuliaLICM : public JuliaPassContext {
     function_ref<DominatorTree &()> GetDT;
     function_ref<LoopInfo &()> GetLI;
@@ -142,7 +136,7 @@ struct JuliaLICM : public JuliaPassContext {
                 GetMSSA(GetMSSA),
                 GetSE(GetSE) {}
 
-    bool runOnLoop(Loop *L)
+    bool runOnLoop(Loop *L, OptimizationRemarkEmitter &ORE)
     {
         // Get the preheader block to move instructions into,
         // required to run this pass.
@@ -156,9 +150,11 @@ struct JuliaLICM : public JuliaPassContext {
         // `gc_preserve_end_func` is optional since the input to
         // `gc_preserve_end_func` must be from `gc_preserve_begin_func`.
         // We also hoist write barriers here, so we don't exit if write_barrier_func exists
-        if (!gc_preserve_begin_func && !write_barrier_func && !write_barrier_binding_func &&
-            !alloc_obj_func)
+        if (!gc_preserve_begin_func && !write_barrier_func &&
+            !alloc_obj_func) {
+            LLVM_DEBUG(dbgs() << "No gc_preserve_begin_func or write_barrier_func or alloc_obj_func found, skipping JuliaLICM\n");
             return false;
+        }
         auto LI = &GetLI();
         auto DT = &GetDT();
         auto MSSA = GetMSSA();
@@ -168,7 +164,7 @@ struct JuliaLICM : public JuliaPassContext {
         // Lazy initialization of exit blocks insertion points.
         bool exit_pts_init = false;
         SmallVector<Instruction*, 8> _exit_pts;
-        auto get_exit_pts = [&] () -> ArrayRef<Instruction*> {
+        auto get_exit_pts = [&] () -> MutableArrayRef<Instruction*> {
             if (!exit_pts_init) {
                 exit_pts_init = true;
                 SmallVector<BasicBlock*, 8> exit_bbs;
@@ -214,6 +210,11 @@ struct JuliaLICM : public JuliaPassContext {
                         continue;
                     ++HoistedPreserveBegin;
                     moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
+                    LLVM_DEBUG(dbgs() << "Hoisted gc_preserve_begin: " << *call << "\n");
+                    REMARK([&](){
+                        return OptimizationRemark(DEBUG_TYPE, "Hoisted", call)
+                            << "hoisting preserve begin " << ore::NV("PreserveBegin", call);
+                    });
                     changed = true;
                 }
                 else if (callee == gc_preserve_end_func) {
@@ -228,93 +229,139 @@ struct JuliaLICM : public JuliaPassContext {
                         continue;
                     }
                     ++SunkPreserveEnd;
-                    moveInstructionBefore(*call, *exit_pts[0], MSSAU, SE);
+                    moveInstructionBefore(*call, *exit_pts[0], MSSAU, SE, MemorySSA::Beginning);
+                    exit_pts[0] = call;
+                    LLVM_DEBUG(dbgs() << "Sunk gc_preserve_end: " << *call << "\n");
+                    REMARK([&](){
+                        return OptimizationRemark(DEBUG_TYPE, "Sunk", call)
+                            << "sinking preserve end " << ore::NV("PreserveEnd", call);
+                    });
                     for (unsigned i = 1; i < exit_pts.size(); i++) {
                         // Clone exit
                         auto CI = CallInst::Create(call, {}, exit_pts[i]);
+                        exit_pts[i] = CI;
                         createNewInstruction(CI, call, MSSAU);
+                        LLVM_DEBUG(dbgs() << "Cloned and sunk gc_preserve_end: " << *CI << "\n");
+                        REMARK([&](){
+                            return OptimizationRemark(DEBUG_TYPE, "Sunk", call)
+                                << "cloning and sinking preserve end" << ore::NV("PreserveEnd", call);
+                        });
                     }
                 }
-                else if (callee == write_barrier_func ||
-                         callee == write_barrier_binding_func) {
+                else if (callee == write_barrier_func) {
                     bool valid = true;
                     for (std::size_t i = 0; i < call->arg_size(); i++) {
                         if (!makeLoopInvariant(L, call->getArgOperand(i),
                             changed, preheader->getTerminator(),
                             MSSAU, SE)) {
                             valid = false;
+                            LLVM_DEBUG(dbgs() << "Failed to hoist write barrier argument: " << *call->getArgOperand(i) << "\n");
                             break;
                         }
                     }
-                    if (valid) {
-                        ++HoistedWriteBarrier;
-                        moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
-                        changed = true;
+                    if (!valid) {
+                        LLVM_DEBUG(dbgs() << "Failed to hoist write barrier: " << *call << "\n");
+                        continue;
                     }
+                    ++HoistedWriteBarrier;
+                    moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
+                    changed = true;
+                    REMARK([&](){
+                        return OptimizationRemark(DEBUG_TYPE, "Hoist", call)
+                            << "hoisting write barrier " << ore::NV("GC Write Barrier", call);
+                    });
                 }
                 else if (callee == alloc_obj_func) {
-                    jl_alloc::AllocUseInfo use_info;
-                    jl_alloc::CheckInst::Stack check_stack;
-                    jl_alloc::EscapeAnalysisRequiredArgs required{use_info, check_stack, *this, DL};
-                    jl_alloc::runEscapeAnalysis(call, required, jl_alloc::EscapeAnalysisOptionalArgs().with_valid_set(&L->getBlocksSet()));
-                    if (use_info.escaped || use_info.addrescaped) {
-                        continue;
-                    }
                     bool valid = true;
                     for (std::size_t i = 0; i < call->arg_size(); i++) {
                         if (!makeLoopInvariant(L, call->getArgOperand(i), changed,
                             preheader->getTerminator(), MSSAU, SE)) {
                             valid = false;
+                            LLVM_DEBUG(dbgs() << "Failed to hoist alloc_obj argument: " << *call->getArgOperand(i) << "\n");
                             break;
                         }
                     }
+                    if (!valid) {
+                        LLVM_DEBUG(dbgs() << "Failed to hoist alloc_obj: " << *call << "\n");
+                        continue;
+                    }
+                    LLVM_DEBUG(dbgs() << "Running escape analysis for " << *call << "\n");
+                    jl_alloc::AllocUseInfo use_info;
+                    jl_alloc::CheckInst::Stack check_stack;
+                    jl_alloc::EscapeAnalysisRequiredArgs required{use_info, check_stack, *this, DL};
+                    jl_alloc::runEscapeAnalysis(call, required, jl_alloc::EscapeAnalysisOptionalArgs().with_valid_set(&L->getBlocksSet()).with_optimization_remark_emitter(&ORE));
+                    REMARK([&](){
+                        std::string suse_info;
+                        llvm::raw_string_ostream osuse_info(suse_info);
+                        use_info.dump(osuse_info);
+                        return OptimizationRemarkAnalysis(DEBUG_TYPE, "EscapeAnalysis", call) << "escape analysis for " << ore::NV("GC Allocation", call) << "\n" << ore::NV("UseInfo", osuse_info.str());
+                    });
+                    if (use_info.escaped) {
+                        REMARK([&](){
+                            return OptimizationRemarkMissed(DEBUG_TYPE, "Escape", call)
+                                << "not hoisting gc allocation " << ore::NV("GC Allocation", call)
+                                << " because it may escape";
+                        });
+                        continue;
+                    }
+                    if (use_info.addrescaped) {
+                        REMARK([&](){
+                            return OptimizationRemarkMissed(DEBUG_TYPE, "Escape", call)
+                                << "not hoisting gc allocation " << ore::NV("GC Allocation", call)
+                                << " because its address may escape";
+                        });
+                        continue;
+                    }
                     if (use_info.refstore) {
                         // We need to add write barriers to any stores
                         // that may start crossing generations
+                        REMARK([&](){
+                            return OptimizationRemarkMissed(DEBUG_TYPE, "Escape", call)
+                                << "not hoisting gc allocation " << ore::NV("GC Allocation", call)
+                                << " because it may have an object stored to it";
+                        });
                         continue;
                     }
-                    if (valid) {
-                        ++HoistedAllocation;
-                        moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
-                        changed = true;
+                    REMARK([&](){
+                        return OptimizationRemark(DEBUG_TYPE, "Hoist", call)
+                            << "hoisting gc allocation " << ore::NV("GC Allocation", call);
+                    });
+                    ++HoistedAllocation;
+                    moveInstructionBefore(*call, *preheader->getTerminator(), MSSAU, SE);
+                    IRBuilder<> builder(preheader->getTerminator());
+                    builder.SetCurrentDebugLocation(call->getDebugLoc());
+                    auto obj_i8 = builder.CreateBitCast(call, Type::getInt8PtrTy(call->getContext(), call->getType()->getPointerAddressSpace()));
+                    // Note that this alignment is assuming the GC allocates at least pointer-aligned memory
+                    auto align = Align(DL.getPointerSize(0));
+                    auto clear_obj = builder.CreateMemSet(obj_i8, ConstantInt::get(Type::getInt8Ty(call->getContext()), 0), call->getArgOperand(1), align);
+                    if (MSSAU.getMemorySSA()) {
+                        auto clear_mdef = MSSAU.createMemoryAccessInBB(clear_obj, nullptr, clear_obj->getParent(), MemorySSA::BeforeTerminator);
+                        MSSAU.insertDef(cast<MemoryDef>(clear_mdef), true);
                     }
+                    changed = true;
                 }
             }
         }
         if (changed && SE) {
+#if JL_LLVM_VERSION >= 160000
+            SE->forgetLoopDispositions();
+#else
             SE->forgetLoopDispositions(L);
+#endif
         }
-        assert(!verifyFunction(*L->getHeader()->getParent(), &errs()));
+#ifdef JL_VERIFY_PASSES
+        assert(!verifyLLVMIR(*L));
+#endif
         return changed;
     }
 };
 
-bool JuliaLICMPassLegacy::runOnLoop(Loop *L, LPPassManager &LPM) {
-    auto GetDT = [this]() -> DominatorTree & {
-        return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-    };
-    auto GetLI = [this]() -> LoopInfo & {
-        return getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-    };
-    auto GetMSSA = []() {
-        return nullptr;
-    };
-    auto GetSE = []() {
-        return nullptr;
-    };
-    auto juliaLICM = JuliaLICM(GetDT, GetLI, GetMSSA, GetSE);
-    return juliaLICM.runOnLoop(L);
-}
-
-char JuliaLICMPassLegacy::ID = 0;
-static RegisterPass<JuliaLICMPassLegacy>
-        Y("JuliaLICM", "LICM for julia specific intrinsics.",
-          false, false);
 } //namespace
 
 PreservedAnalyses JuliaLICMPass::run(Loop &L, LoopAnalysisManager &AM,
                           LoopStandardAnalysisResults &AR, LPMUpdater &U)
 {
+    OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
     auto GetDT = [&AR]() -> DominatorTree & {
         return AR.DT;
     };
@@ -328,7 +375,11 @@ PreservedAnalyses JuliaLICMPass::run(Loop &L, LoopAnalysisManager &AM,
         return &AR.SE;
     };
     auto juliaLICM = JuliaLICM(GetDT, GetLI, GetMSSA, GetSE);
-    if (juliaLICM.runOnLoop(&L)) {
+    if (juliaLICM.runOnLoop(&L, ORE)) {
+#ifdef JL_DEBUG_BUILD
+        if (AR.MSSA)
+            AR.MSSA->verifyMemorySSA();
+#endif
         auto preserved = getLoopPassPreservedAnalyses();
         preserved.preserveSet<CFGAnalyses>();
         preserved.preserve<MemorySSAAnalysis>();
@@ -336,13 +387,3 @@ PreservedAnalyses JuliaLICMPass::run(Loop &L, LoopAnalysisManager &AM,
     }
     return PreservedAnalyses::all();
 }
-
-Pass *createJuliaLICMPass()
-{
-    return new JuliaLICMPassLegacy();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraJuliaLICMPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createJuliaLICMPass());
-}
diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc
new file mode 100644
index 0000000000000..bd89c01c6fdfe
--- /dev/null
+++ b/src/llvm-julia-passes.inc
@@ -0,0 +1,27 @@
+//Module passes
+#ifdef MODULE_PASS
+MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass())
+MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass())
+MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass())
+MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass())
+MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass, RemoveAddrspacesPass())
+MODULE_PASS("LowerPTLSPass", LowerPTLSPass, LowerPTLSPass())
+#endif
+
+//Function passes
+#ifdef FUNCTION_PASS
+FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass, DemoteFloat16Pass())
+FUNCTION_PASS("CombineMulAdd", CombineMulAddPass, CombineMulAddPass())
+FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass, LateLowerGCPass())
+FUNCTION_PASS("AllocOpt", AllocOptPass, AllocOptPass())
+FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass, PropagateJuliaAddrspacesPass())
+FUNCTION_PASS("LowerExcHandlers", LowerExcHandlersPass, LowerExcHandlersPass())
+FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass, GCInvariantVerifierPass())
+FUNCTION_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass())
+#endif
+
+//Loop passes
+#ifdef LOOP_PASS
+LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass())
+LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass())
+#endif
diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp
index 36507e011f620..edb3aad8f2328 100644
--- a/src/llvm-late-gc-lowering.cpp
+++ b/src/llvm-late-gc-lowering.cpp
@@ -7,9 +7,11 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/ADT/BitVector.h>
+#include <llvm/ADT/SparseBitVector.h>
 #include <llvm/ADT/PostOrderIterator.h>
 #include <llvm/ADT/SetVector.h>
 #include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/SmallSet.h>
 #include "llvm/Analysis/CFG.h"
 #include <llvm/IR/Value.h>
 #include <llvm/IR/Constants.h>
@@ -17,9 +19,9 @@
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/MDBuilder.h>
 #include <llvm/IR/Module.h>
+#include <llvm/IR/ModuleSlotTracker.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Pass.h>
@@ -29,12 +31,13 @@
 
 #include <llvm/InitializePasses.h>
 
-#include "codegen_shared.h"
+#include "llvm-codegen-shared.h"
 #include "julia.h"
 #include "julia_internal.h"
 #include "julia_assert.h"
 #include "llvm-pass-helpers.h"
 #include <map>
+#include <string>
 
 #define DEBUG_TYPE "late_lower_gcroot"
 
@@ -137,7 +140,7 @@ using namespace llvm;
       not sunk into the gc frame. Nevertheless performing such sinking can still
       be profitable. Since all arguments to a jlcall are guaranteed to be live
       at that call in some gc slot, we can attempt to rearrange the slots within
-      the gc-frame, or re-use slots not assigned at that particular location
+      the gc-frame, or reuse slots not assigned at that particular location
       for the gcframe. However, even without this optimization, stack frames
       are at most two times larger than optimal (because regular stack coloring
       can merge the jlcall allocas).
@@ -224,16 +227,21 @@ using namespace llvm;
       simply sink the alloca into the GCFrame.
 */
 
+// 4096 bits == 64 words (64 bit words). Larger bit numbers are faster and doing something
+// substantially smaller here doesn't actually save much memory because of malloc overhead.
+// Too large is bad also though - 4096 was found to be a reasonable middle ground.
+using LargeSparseBitVector = SparseBitVector<4096>;
+
 struct BBState {
     // Uses in this BB
     // These do not get updated after local analysis
-    BitVector Defs;
-    BitVector PhiOuts;
-    BitVector UpExposedUses;
+    LargeSparseBitVector Defs;
+    LargeSparseBitVector PhiOuts;
+    LargeSparseBitVector UpExposedUses;
     // These get updated during dataflow
-    BitVector LiveIn;
-    BitVector LiveOut;
-    std::vector<int> Safepoints;
+    LargeSparseBitVector LiveIn;
+    LargeSparseBitVector LiveOut;
+    SmallVector<int, 0> Safepoints;
     int TopmostSafepoint = -1;
     bool HasSafepoint = false;
     // Have we gone through this basic block in our local scan yet?
@@ -251,14 +259,14 @@ struct State {
     // Cache of numbers assigned to IR values. This includes caching of numbers
     // for derived values
     std::map<Value *, int> AllPtrNumbering;
-    std::map<Value *, std::vector<int>> AllCompositeNumbering;
+    std::map<Value *, SmallVector<int, 0>> AllCompositeNumbering;
     // The reverse of the previous maps
     std::map<int, Value *> ReversePtrNumbering;
     // Neighbors in the coloring interference graph. I.e. for each value, the
     // indices of other values that are used simultaneously at some safe point.
-    std::vector<SetVector<int>> Neighbors;
+    SmallVector<LargeSparseBitVector, 0> Neighbors;
     // The result of the local analysis
-    std::map<BasicBlock *, BBState> BBStates;
+    std::map<const BasicBlock *, BBState> BBStates;
 
     // Refinement map. If all of the values are rooted
     // (-1 means an externally rooted value and -2 means a globally/permanently rooted value),
@@ -272,55 +280,38 @@ struct State {
 
     // GC preserves map. All safepoints dominated by the map key, but not any
     // of its uses need to preserve the values listed in the map value.
-    std::map<Instruction *, std::vector<int>> GCPreserves;
+    std::map<Instruction *, SmallVector<int, 0>> GCPreserves;
 
     // The assignment of numbers to safepoints. The indices in the map
     // are indices into the next three maps which store safepoint properties
     std::map<Instruction *, int> SafepointNumbering;
 
     // Reverse mapping index -> safepoint
-    std::vector<Instruction *> ReverseSafepointNumbering;
+    SmallVector<Instruction *, 0> ReverseSafepointNumbering;
 
     // Instructions that can return twice. For now, all values live at these
     // instructions will get their own, dedicated GC frame slots, because they
     // have unobservable control flow, so we can't be sure where they're
     // actually live. All of these are also considered safepoints.
-    std::vector<Instruction *> ReturnsTwice;
+    SmallVector<Instruction *, 0> ReturnsTwice;
 
     // The set of values live at a particular safepoint
-    std::vector<BitVector> LiveSets;
+    SmallVector< LargeSparseBitVector , 0> LiveSets;
     // Those values that - if live out from our parent basic block - are live
     // at this safepoint.
-    std::vector<std::vector<int>> LiveIfLiveOut;
+    SmallVector<SmallVector<int, 0>> LiveIfLiveOut;
     // The set of values that are kept alive by the callee.
-    std::vector<std::vector<int>> CalleeRoots;
+    SmallVector<SmallVector<int, 0>> CalleeRoots;
     // We don't bother doing liveness on Allocas that were not mem2reg'ed.
     // they just get directly sunk into the root array.
-    std::vector<AllocaInst *> Allocas;
+    SmallVector<AllocaInst *, 0> Allocas;
     DenseMap<AllocaInst *, unsigned> ArrayAllocas;
     DenseMap<AllocaInst *, AllocaInst *> ShadowAllocas;
-    std::vector<std::pair<StoreInst *, unsigned>> TrackedStores;
+    SmallVector<std::pair<StoreInst *, unsigned>, 0> TrackedStores;
     State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
 };
 
 
-
-struct LateLowerGCFrameLegacy: public FunctionPass {
-    static char ID;
-    LateLowerGCFrameLegacy() : FunctionPass(ID) {}
-
-protected:
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-        FunctionPass::getAnalysisUsage(AU);
-        AU.addRequired<DominatorTreeWrapperPass>();
-        AU.addPreserved<DominatorTreeWrapperPass>();
-        AU.setPreservesCFG();
-    }
-
-private:
-    bool runOnFunction(Function &F) override;
-};
-
 struct LateLowerGCFrame:  private JuliaPassContext {
     function_ref<DominatorTree &()> GetDT;
     LateLowerGCFrame(function_ref<DominatorTree &()> GetDT) : GetDT(GetDT) {}
@@ -331,8 +322,9 @@ struct LateLowerGCFrame:  private JuliaPassContext {
 private:
     CallInst *pgcstack;
 
-    void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
-    void NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses);
+    void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const ArrayRef<int> &SafepointsSoFar,
+                      SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
+    void NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses);
     void NoteUse(State &S, BBState &BBS, Value *V) {
         NoteUse(S, BBS, V, BBS.UpExposedUses);
     }
@@ -340,13 +332,13 @@ struct LateLowerGCFrame:  private JuliaPassContext {
     void LiftPhi(State &S, PHINode *Phi);
     void LiftSelect(State &S, SelectInst *SI);
     Value *MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore);
-    std::vector<Value*> MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore);
+    SmallVector<Value*, 0> MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore);
     Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore);
 
     int Number(State &S, Value *V);
     int NumberBase(State &S, Value *Base);
-    std::vector<int> NumberAll(State &S, Value *V);
-    std::vector<int> NumberAllBase(State &S, Value *Base);
+    SmallVector<int, 0> NumberAll(State &S, Value *V);
+    SmallVector<int, 0> NumberAllBase(State &S, Value *Base);
 
     void NoteOperandUses(State &S, BBState &BBS, User &UI);
     void MaybeTrackDst(State &S, MemTransferInst *MI);
@@ -354,17 +346,17 @@ struct LateLowerGCFrame:  private JuliaPassContext {
     State LocalScan(Function &F);
     void ComputeLiveness(State &S);
     void ComputeLiveSets(State &S);
-    std::vector<int> ColorRoots(const State &S);
-    void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame, Instruction *InsertBefore);
-    void PlaceGCFrameStores(State &S, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame);
-    void PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
+    SmallVector<int, 0> ColorRoots(const State &S);
+    void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame, Instruction *InsertBefore);
+    void PlaceGCFrameStores(State &S, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame);
+    void PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
     bool CleanupIR(Function &F, State *S, bool *CFGModified);
     void NoteUseChain(State &S, BBState &BBS, User *TheUser);
     SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
     void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
-    void RefineLiveSet(BitVector &LS, State &S, const std::vector<int> &CalleeRoots);
-    Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V);
-    Value *EmitLoadTag(IRBuilder<> &builder, Value *V);
+    void RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef<int> CalleeRoots);
+    Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V);
+    Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V);
 };
 
 static unsigned getValueAddrSpace(Value *V) {
@@ -386,16 +378,18 @@ static bool isSpecialPtr(Type *Ty) {
 
 // return how many Special pointers are in T (count > 0),
 // and if there is anything else in T (all == false)
-CountTrackedPointers::CountTrackedPointers(Type *T) {
+CountTrackedPointers::CountTrackedPointers(Type *T, bool ignore_loaded) {
     if (isa<PointerType>(T)) {
         if (isSpecialPtr(T)) {
+            if (ignore_loaded && T->getPointerAddressSpace() == AddressSpace::Loaded)
+                return;
             count++;
             if (T->getPointerAddressSpace() != AddressSpace::Tracked)
                 derived = true;
         }
     } else if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
         for (Type *ElT : T->subtypes()) {
-            auto sub = CountTrackedPointers(ElT);
+            auto sub = CountTrackedPointers(ElT, ignore_loaded);
             count += sub.count;
             all &= sub.all;
             derived |= sub.derived;
@@ -411,6 +405,20 @@ CountTrackedPointers::CountTrackedPointers(Type *T) {
         all = false;
 }
 
+bool hasLoadedTy(Type *T) {
+    if (isa<PointerType>(T)) {
+        if (T->getPointerAddressSpace() == AddressSpace::Loaded)
+            return true;
+    } else if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
+        for (Type *ElT : T->subtypes()) {
+            if (hasLoadedTy(ElT))
+                return true;
+        }
+    }
+    return false;
+}
+
+
 unsigned getCompositeNumElements(Type *T) {
     if (auto *ST = dyn_cast<StructType>(T))
         return ST->getNumElements();
@@ -423,9 +431,9 @@ unsigned getCompositeNumElements(Type *T) {
 }
 
 // Walk through a Type, and record the element path to every tracked value inside
-void TrackCompositeType(Type *T, std::vector<unsigned> &Idxs, std::vector<std::vector<unsigned>> &Numberings) {
+void TrackCompositeType(Type *T, SmallVector<unsigned, 0> &Idxs, SmallVector<SmallVector<unsigned, 0>, 0> &Numberings) {
     if (isa<PointerType>(T)) {
-        if (T->getPointerAddressSpace() == AddressSpace::Tracked)
+        if (isSpecialPtr(T))
             Numberings.push_back(Idxs);
     }
     else if (isa<StructType>(T) || isa<ArrayType>(T) || isa<VectorType>(T)) {
@@ -439,9 +447,9 @@ void TrackCompositeType(Type *T, std::vector<unsigned> &Idxs, std::vector<std::v
     }
 }
 
-std::vector<std::vector<unsigned>> TrackCompositeType(Type *T) {
-    std::vector<unsigned> Idxs;
-    std::vector<std::vector<unsigned>> Numberings;
+SmallVector<SmallVector<unsigned, 0>, 0> TrackCompositeType(Type *T) {
+    SmallVector<unsigned, 0> Idxs;
+    SmallVector<SmallVector<unsigned, 0>, 0> Numberings;
     TrackCompositeType(T, Idxs, Numberings);
     return Numberings;
 }
@@ -492,18 +500,19 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
             CurrentV = EEI->getVectorOperand();
         }
         else if (auto LI = dyn_cast<LoadInst>(CurrentV)) {
-            if (auto PtrT = dyn_cast<PointerType>(LI->getType()->getScalarType())) {
-                if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
-                    CurrentV = LI->getPointerOperand();
-                    fld_idx = -1;
-                    if (!isSpecialPtr(CurrentV->getType())) {
-                        // This could really be anything, but it's not loaded
-                        // from a tracked pointer, so it doesn't matter what
-                        // it is--just pick something simple.
-                        CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
-                    }
-                    continue;
+            if (hasLoadedTy(LI->getType())) {
+                // This is the old (now deprecated) implementation for loaded.
+                // New code should use the gc_loaded intrinsic to ensure that
+                // the load is paired with the correct Tracked value.
+                CurrentV = LI->getPointerOperand();
+                fld_idx = -1;
+                if (!isSpecialPtr(CurrentV->getType())) {
+                    // This could really be anything, but it's not loaded
+                    // from a tracked pointer, so it doesn't matter what
+                    // it is--just pick something simple.
+                    CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
                 }
+                continue;
             }
             // In general a load terminates a walk
             break;
@@ -525,36 +534,42 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
             if (II->getIntrinsicID() == Intrinsic::masked_load ||
                 II->getIntrinsicID() == Intrinsic::masked_gather) {
                 if (auto VTy = dyn_cast<VectorType>(II->getType())) {
-                    if (auto PtrT = dyn_cast<PointerType>(VTy->getElementType())) {
-                        if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
-                            Value *Mask = II->getOperand(2);
-                            Value *Passthrough = II->getOperand(3);
-                            if (!isa<Constant>(Mask) || !cast<Constant>(Mask)->isAllOnesValue()) {
-                                assert(isa<UndefValue>(Passthrough) && "unimplemented");
-                                (void)Passthrough;
+                    if (hasLoadedTy(VTy->getElementType())) {
+                        Value *Mask = II->getOperand(2);
+                        Value *Passthrough = II->getOperand(3);
+                        if (!isa<Constant>(Mask) || !cast<Constant>(Mask)->isAllOnesValue()) {
+                            assert(isa<UndefValue>(Passthrough) && "unimplemented");
+                            (void)Passthrough;
+                        }
+                        CurrentV = II->getOperand(0);
+                        if (II->getIntrinsicID() == Intrinsic::masked_load) {
+                            fld_idx = -1;
+                            if (!isSpecialPtr(CurrentV->getType())) {
+                                CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
                             }
-                            CurrentV = II->getOperand(0);
-                            if (II->getIntrinsicID() == Intrinsic::masked_load) {
-                                fld_idx = -1;
-                                if (!isSpecialPtr(CurrentV->getType())) {
+                        } else {
+                            if (auto VTy2 = dyn_cast<VectorType>(CurrentV->getType())) {
+                                if (!isSpecialPtr(VTy2->getElementType())) {
                                     CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
-                                }
-                            } else {
-                                if (auto VTy2 = dyn_cast<VectorType>(CurrentV->getType())) {
-                                    if (!isSpecialPtr(VTy2->getElementType())) {
-                                        CurrentV = ConstantPointerNull::get(Type::getInt8PtrTy(V->getContext()));
-                                        fld_idx = -1;
-                                    }
+                                    fld_idx = -1;
                                 }
                             }
-                            continue;
                         }
+                        continue;
                     }
                 }
                 // In general a load terminates a walk
                 break;
             }
         }
+        else if (auto CI = dyn_cast<CallInst>(CurrentV)) {
+            auto callee = CI->getCalledFunction();
+            if (callee && callee->getName() == "julia.gc_loaded") {
+                CurrentV = CI->getArgOperand(0);
+                continue;
+            }
+            break;
+        }
         else {
             break;
         }
@@ -585,7 +600,7 @@ Value *LateLowerGCFrame::MaybeExtractScalar(State &S, std::pair<Value*,int> ValE
     }
     else if (ValExpr.second != -1) {
         auto Tracked = TrackCompositeType(V->getType());
-        auto Idxs = makeArrayRef(Tracked.at(ValExpr.second));
+        auto Idxs = ArrayRef<unsigned>(Tracked[ValExpr.second]);
         auto IdxsNotVec = Idxs.slice(0, Idxs.size() - 1);
         Type *FinalT = ExtractValueInst::getIndexedType(V->getType(), IdxsNotVec);
         bool IsVector = isa<VectorType>(FinalT);
@@ -594,7 +609,7 @@ Value *LateLowerGCFrame::MaybeExtractScalar(State &S, std::pair<Value*,int> ValE
         if (T->getAddressSpace() != AddressSpace::Tracked) {
             // if V isn't tracked, get the shadow def
             auto Numbers = NumberAllBase(S, V);
-            int BaseNumber = Numbers.at(ValExpr.second);
+            int BaseNumber = Numbers[ValExpr.second];
             if (BaseNumber >= 0)
                 V = GetPtrForNumber(S, BaseNumber, InsertBefore);
             else
@@ -611,9 +626,9 @@ Value *LateLowerGCFrame::MaybeExtractScalar(State &S, std::pair<Value*,int> ValE
     return V;
 }
 
-std::vector<Value*> LateLowerGCFrame::MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore) {
+SmallVector<Value*, 0> LateLowerGCFrame::MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore) {
     auto Numbers = NumberAllBase(S, BaseVec);
-    std::vector<Value*> V{Numbers.size()};
+    SmallVector<Value*, 0> V{Numbers.size()};
     Value *V_rnull = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
     for (unsigned i = 0; i < V.size(); ++i) {
         if (Numbers[i] >= 0) // ignores undef and poison values
@@ -629,7 +644,7 @@ Value *LateLowerGCFrame::GetPtrForNumber(State &S, unsigned Num, Instruction *In
     Value *Val = S.ReversePtrNumbering[Num];
     unsigned Idx = -1;
     if (!isa<PointerType>(Val->getType())) {
-        const std::vector<int> &AllNums = S.AllCompositeNumbering[Val];
+        const SmallVector<int, 0> &AllNums = S.AllCompositeNumbering[Val];
         for (Idx = 0; Idx < AllNums.size(); ++Idx) {
             if ((unsigned)AllNums[Idx] == Num)
                 break;
@@ -646,20 +661,17 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
         // already visited here--nothing to do
         return;
     }
-    std::vector<int> Numbers;
-    unsigned NumRoots = 1;
-    if (auto VTy = dyn_cast<VectorType>(SI->getType())) {
-        ElementCount EC = VTy->getElementCount();
-        Numbers.resize(EC.getKnownMinValue(), -1);
-    }
-    else
-        assert(isa<PointerType>(SI->getType()) && "unimplemented");
     assert(!isTrackedValue(SI));
+    SmallVector<int, 0> Numbers;
+    unsigned NumRoots = 1;
+    Type *STy = SI->getType();
+    if (!isa<PointerType>(STy))
+        Numbers.resize(CountTrackedPointers(STy).count, -1);
     // find the base root for the arguments
     Value *TrueBase = MaybeExtractScalar(S, FindBaseValue(S, SI->getTrueValue(), false), SI);
     Value *FalseBase = MaybeExtractScalar(S, FindBaseValue(S, SI->getFalseValue(), false), SI);
-    std::vector<Value*> TrueBases;
-    std::vector<Value*> FalseBases;
+    SmallVector<Value*, 0> TrueBases;
+    SmallVector<Value*, 0> FalseBases;
     if (!isa<PointerType>(TrueBase->getType())) {
         TrueBases = MaybeExtractVector(S, TrueBase, SI);
         assert(TrueBases.size() == Numbers.size());
@@ -694,8 +706,11 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
                     ConstantInt::get(Type::getInt32Ty(Cond->getContext()), i),
                     "", SI);
         }
-        if (FalseElem->getType() != TrueElem->getType())
+        if (FalseElem->getType() != TrueElem->getType()) {
+            // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
+            assert(FalseElem->getContext().supportsTypedPointers());
             FalseElem = new BitCastInst(FalseElem, TrueElem->getType(), "", SI);
+        }
         SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI);
         int Number = ++S.MaxPtrNumber;
         S.AllPtrNumbering[SelectBase] = Number;
@@ -726,22 +741,19 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
         return;
     // need to handle each element (may just be one scalar)
     SmallVector<PHINode *, 2> lifted;
-    std::vector<int> Numbers;
+    SmallVector<int, 0> Numbers;
     unsigned NumRoots = 1;
-    if (auto VTy = dyn_cast<FixedVectorType>(Phi->getType())) {
-        NumRoots = VTy->getNumElements();
+    Type *PTy = Phi->getType();
+    if (!isa<PointerType>(PTy)) {
+        NumRoots = CountTrackedPointers(PTy).count;
         Numbers.resize(NumRoots);
     }
-    else {
-        // TODO: SVE
-        assert(isa<PointerType>(Phi->getType()) && "unimplemented");
-    }
     for (unsigned i = 0; i < NumRoots; ++i) {
         PHINode *lift = PHINode::Create(T_prjlvalue, Phi->getNumIncomingValues(), "gclift", Phi);
         int Number = ++S.MaxPtrNumber;
         S.AllPtrNumbering[lift] = Number;
         S.ReversePtrNumbering[Number] = lift;
-        if (!isa<VectorType>(Phi->getType()))
+        if (isa<PointerType>(PTy))
             S.AllPtrNumbering[Phi] = Number;
         else
             Numbers[i] = Number;
@@ -749,12 +761,13 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
     }
     if (!isa<PointerType>(Phi->getType()))
         S.AllCompositeNumbering[Phi] = Numbers;
+    SmallVector<DenseMap<Value*, Value*>, 4> CastedRoots(NumRoots);
     for (unsigned i = 0; i < Phi->getNumIncomingValues(); ++i) {
         Value *Incoming = Phi->getIncomingValue(i);
         BasicBlock *IncomingBB = Phi->getIncomingBlock(i);
         Instruction *Terminator = IncomingBB->getTerminator();
         Value *Base = MaybeExtractScalar(S, FindBaseValue(S, Incoming, false), Terminator);
-        std::vector<Value*> IncomingBases;
+        SmallVector<Value*, 0> IncomingBases;
         if (!isa<PointerType>(Base->getType())) {
             IncomingBases = MaybeExtractVector(S, Base, Terminator);
             assert(IncomingBases.size() == NumRoots);
@@ -766,8 +779,29 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
                 BaseElem = Base;
             else
                 BaseElem = IncomingBases[i];
-            if (BaseElem->getType() != T_prjlvalue)
-                BaseElem = new BitCastInst(BaseElem, T_prjlvalue, "", Terminator);
+            if (BaseElem->getType() != T_prjlvalue) {
+                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
+                assert(BaseElem->getContext().supportsTypedPointers());
+                auto &remap = CastedRoots[i][BaseElem];
+                if (!remap) {
+                    if (auto constant = dyn_cast<Constant>(BaseElem)) {
+                        remap = ConstantExpr::getBitCast(constant, T_prjlvalue, "");
+                    } else {
+                        Instruction *InsertBefore;
+                        if (auto arg = dyn_cast<Argument>(BaseElem)) {
+                            InsertBefore = &*arg->getParent()->getEntryBlock().getFirstInsertionPt();
+                        } else {
+                            assert(isa<Instruction>(BaseElem) && "Unknown value type detected!");
+                            InsertBefore = cast<Instruction>(BaseElem)->getNextNonDebugInstruction();
+                        }
+                        while (isa<PHINode>(InsertBefore)) {
+                            InsertBefore = InsertBefore->getNextNonDebugInstruction();
+                        }
+                        remap = new BitCastInst(BaseElem, T_prjlvalue, "", InsertBefore);
+                    }
+                }
+                BaseElem = remap;
+            }
             lift->addIncoming(BaseElem, IncomingBB);
         }
     }
@@ -793,11 +827,11 @@ int LateLowerGCFrame::NumberBase(State &S, Value *CurrentV)
         Number = -1;
     } else if (isa<SelectInst>(CurrentV) && !isTrackedValue(CurrentV)) {
         LiftSelect(S, cast<SelectInst>(CurrentV));
-        Number = S.AllPtrNumbering.at(CurrentV);
+        Number = S.AllPtrNumbering[CurrentV];
         return Number;
     } else if (isa<PHINode>(CurrentV) && !isTrackedValue(CurrentV)) {
         LiftPhi(S, cast<PHINode>(CurrentV));
-        Number = S.AllPtrNumbering.at(CurrentV);
+        Number = S.AllPtrNumbering[CurrentV];
         return Number;
     } else if (isa<ExtractValueInst>(CurrentV)) {
         auto Numbers = NumberAllBase(S, CurrentV);
@@ -820,7 +854,7 @@ int LateLowerGCFrame::Number(State &S, Value *V) {
         Number = NumberBase(S, CurrentV.first);
     } else {
         auto Numbers = NumberAllBase(S, CurrentV.first);
-        Number = Numbers.at(CurrentV.second);
+        Number = Numbers[CurrentV.second];
     }
     if (V != CurrentV.first)
         S.AllPtrNumbering[V] = Number;
@@ -828,18 +862,18 @@ int LateLowerGCFrame::Number(State &S, Value *V) {
 }
 
 // assign pointer numbers to a def instruction
-std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
+SmallVector<int, 0> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
     if (isa<PointerType>(CurrentV->getType())) {
         auto it = S.AllPtrNumbering.find(CurrentV);
         if (it != S.AllPtrNumbering.end())
-            return std::vector<int>({it->second});
+            return SmallVector<int, 0>({it->second});
     } else {
         auto it = S.AllCompositeNumbering.find(CurrentV);
         if (it != S.AllCompositeNumbering.end())
             return it->second;
     }
 
-    std::vector<int> Numbers;
+    SmallVector<int, 0> Numbers;
     auto tracked = CountTrackedPointers(CurrentV->getType());
     if (tracked.count == 0)
         return Numbers;
@@ -848,16 +882,16 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
         Numbers.resize(tracked.count, -1);
     }
     else if (auto *SVI = dyn_cast<ShuffleVectorInst>(CurrentV)) {
-        std::vector<int> Numbers1 = NumberAll(S, SVI->getOperand(0));
-        std::vector<int> Numbers2 = NumberAll(S, SVI->getOperand(1));
+        SmallVector<int, 0> Numbers1 = NumberAll(S, SVI->getOperand(0));
+        SmallVector<int, 0> Numbers2 = NumberAll(S, SVI->getOperand(1));
         auto Mask = SVI->getShuffleMask();
         for (auto idx : Mask) {
             if (idx == -1) {
                 Numbers.push_back(-1);
             } else if ((unsigned)idx < Numbers1.size()) {
-                Numbers.push_back(Numbers1.at(idx));
+                Numbers.push_back(Numbers1[idx]);
             } else {
-                Numbers.push_back(Numbers2.at(idx - Numbers1.size()));
+                Numbers.push_back(Numbers2[idx - Numbers1.size()]);
             }
         }
     } else if (auto *IEI = dyn_cast<InsertElementInst>(CurrentV)) {
@@ -870,11 +904,11 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
         Numbers = NumberAll(S, IVI->getAggregateOperand());
         auto Tracked = TrackCompositeType(IVI->getType());
         assert(Tracked.size() == Numbers.size());
-        std::vector<int> InsertNumbers = NumberAll(S, IVI->getInsertedValueOperand());
+        SmallVector<int, 0> InsertNumbers = NumberAll(S, IVI->getInsertedValueOperand());
         auto Idxs = IVI->getIndices();
         unsigned j = 0;
         for (unsigned i = 0; i < Tracked.size(); ++i) {
-            auto Elem = makeArrayRef(Tracked[i]);
+            auto Elem = ArrayRef<unsigned>(Tracked[i]);
             if (Elem.size() < Idxs.size())
                 continue;
             if (Idxs.equals(Elem.slice(0, Idxs.size()))) // Tracked.startswith(Idxs)
@@ -887,7 +921,7 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
         assert(Tracked.size() == BaseNumbers.size());
         auto Idxs = EVI->getIndices();
         for (unsigned i = 0; i < Tracked.size(); ++i) {
-            auto Elem = makeArrayRef(Tracked[i]);
+            auto Elem = ArrayRef<unsigned>(Tracked[i]);
             if (Elem.size() < Idxs.size())
                 continue;
             if (Idxs.equals(Elem.slice(0, Idxs.size()))) // Tracked.startswith(Idxs)
@@ -905,10 +939,10 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
             llvm_unreachable("Unexpected generating operation for derived values");
         }
         if (isa<PointerType>(CurrentV->getType())) {
-            auto Number = S.AllPtrNumbering.at(CurrentV);
+            auto Number = S.AllPtrNumbering[CurrentV];
             Numbers.resize(1, Number);
         } else {
-            Numbers = S.AllCompositeNumbering.at(CurrentV);
+            Numbers = S.AllCompositeNumbering[CurrentV];
         }
     } else {
         assert((isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) || isa<PHINode>(CurrentV) || isa<SelectInst>(CurrentV) ||
@@ -931,17 +965,17 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
 }
 
 // gets the pointer number for every gc tracked value inside V
-std::vector<int> LateLowerGCFrame::NumberAll(State &S, Value *V) {
+SmallVector<int, 0> LateLowerGCFrame::NumberAll(State &S, Value *V) {
     if (isa<PointerType>(V->getType())) {
         auto it = S.AllPtrNumbering.find(V);
         if (it != S.AllPtrNumbering.end())
-            return std::vector<int>({it->second});
+            return SmallVector<int, 0>({it->second});
     } else {
         auto it = S.AllCompositeNumbering.find(V);
         if (it != S.AllCompositeNumbering.end())
             return it->second;
     }
-    std::vector<int> Numbers;
+    SmallVector<int, 0> Numbers;
     auto tracked = CountTrackedPointers(V->getType());
     if (tracked.count == 0)
         return Numbers;
@@ -975,23 +1009,29 @@ std::vector<int> LateLowerGCFrame::NumberAll(State &S, Value *V) {
 
 
 static void MaybeResize(BBState &BBS, unsigned Idx) {
+    /*
     if (BBS.Defs.size() <= Idx) {
         BBS.Defs.resize(Idx + 1);
         BBS.UpExposedUses.resize(Idx + 1);
         BBS.PhiOuts.resize(Idx + 1);
     }
+    */
+}
+
+static bool HasBitSet(const LargeSparseBitVector &BV, unsigned Bit) {
+    return BV.test(Bit);
 }
 
 static bool HasBitSet(const BitVector &BV, unsigned Bit) {
     return Bit < BV.size() && BV[Bit];
 }
 
-static void NoteDef(State &S, BBState &BBS, int Num, const std::vector<int> &SafepointsSoFar) {
+static void NoteDef(State &S, BBState &BBS, int Num, const ArrayRef<int> &SafepointsSoFar) {
     assert(Num >= 0);
     MaybeResize(BBS, Num);
-    assert(BBS.Defs[Num] == 0 && "SSA Violation or misnumbering?");
-    BBS.Defs[Num] = 1;
-    BBS.UpExposedUses[Num] = 0;
+    assert(!BBS.Defs.test(Num) && "SSA Violation or misnumbering?");
+    BBS.Defs.set(Num);
+    BBS.UpExposedUses.reset(Num);
     // This value could potentially be live at any following safe point
     // if it ends up live out, so add it to the LiveIfLiveOut lists for all
     // following safepoints.
@@ -1000,7 +1040,9 @@ static void NoteDef(State &S, BBState &BBS, int Num, const std::vector<int> &Saf
     }
 }
 
-void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr) {
+void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def,
+                                    const ArrayRef<int> &SafepointsSoFar,
+                                    SmallVector<int, 1> &&RefinedPtr) {
     Type *RT = Def->getType();
     if (isa<PointerType>(RT)) {
         if (!isSpecialPtr(RT))
@@ -1012,7 +1054,7 @@ void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const st
             S.Refinements[Num] = std::move(RefinedPtr);
     }
     else {
-        std::vector<int> Nums = NumberAll(S, Def);
+        SmallVector<int, 0> Nums = NumberAll(S, Def);
         for (int Num : Nums) {
             NoteDef(S, BBS, Num, SafepointsSoFar);
             if (!RefinedPtr.empty())
@@ -1021,7 +1063,7 @@ void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const st
     }
 }
 
-static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, std::vector<int> CalleeRoots) {
+static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, SmallVectorImpl<int> &CalleeRoots) {
     int Number = ++S.MaxSafepointNumber;
     S.SafepointNumbering[CI] = Number;
     S.ReverseSafepointNumbering.push_back(CI);
@@ -1030,12 +1072,12 @@ static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, std::vector<int>
     // in this BB (i.e. even when they don't participate in the dataflow
     // computation)
     S.LiveSets.push_back(BBS.UpExposedUses);
-    S.LiveIfLiveOut.push_back(std::vector<int>{});
+    S.LiveIfLiveOut.push_back(SmallVector<int, 0>{});
     S.CalleeRoots.push_back(std::move(CalleeRoots));
     return Number;
 }
 
-void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses) {
+void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses) {
     // Short circuit to avoid having to deal with vectors of constants, etc.
     if (isa<Constant>(V))
         return;
@@ -1045,15 +1087,15 @@ void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses
             if (Num < 0)
                 return;
             MaybeResize(BBS, Num);
-            Uses[Num] = 1;
+            Uses.set(Num);
         }
     } else {
-        std::vector<int> Nums = NumberAll(S, V);
+        SmallVector<int, 0> Nums = NumberAll(S, V);
         for (int Num : Nums) {
             if (Num < 0)
                 continue;
             MaybeResize(BBS, Num);
-            Uses[Num] = 1;
+            Uses.set(Num);
         }
     }
 }
@@ -1086,31 +1128,44 @@ void RecursivelyVisit(callback f, Value *V) {
     }
 }
 
-static void dumpBitVectorValues(State &S, BitVector &BV) {
+static void dumpBitVectorValues(State &S, LargeSparseBitVector &BV, ModuleSlotTracker &MST) {
     bool first = true;
-    for (int Idx = BV.find_first(); Idx >= 0; Idx = BV.find_next(Idx)) {
+    for (auto Idx : BV) {
         if (!first)
             dbgs() << ", ";
         first = false;
-        S.ReversePtrNumbering[Idx]->printAsOperand(dbgs());
+        S.ReversePtrNumbering[Idx]->printAsOperand(dbgs(), false, MST);
     }
 }
 
+static void dumpBBState(const BasicBlock &BB, State &S, ModuleSlotTracker &MST)
+{
+    dbgs() << "Liveness analysis for BB " << BB.getName();
+    dbgs() << "\n\tDefs: ";
+    dumpBitVectorValues(S, S.BBStates[&BB].Defs, MST);
+    dbgs() << "\n\tPhiOuts: ";
+    dumpBitVectorValues(S, S.BBStates[&BB].PhiOuts, MST);
+    dbgs() << "\n\tUpExposedUses: ";
+    dumpBitVectorValues(S, S.BBStates[&BB].UpExposedUses, MST);
+    dbgs() << "\n\tLiveIn: ";
+    dumpBitVectorValues(S, S.BBStates[&BB].LiveIn, MST);
+    dbgs() << "\n\tLiveOut: ";
+    dumpBitVectorValues(S, S.BBStates[&BB].LiveOut, MST);
+    dbgs() << "\n";
+}
+
+JL_USED_FUNC static void dumpBBState(const BasicBlock &BB, State &S)
+{
+    ModuleSlotTracker MST(BB.getParent()->getParent());
+    dumpBBState(BB, S, MST);
+}
+
+
 /* Debugging utility to dump liveness information */
 JL_USED_FUNC static void dumpLivenessState(Function &F, State &S) {
+    ModuleSlotTracker MST(F.getParent());
     for (auto &BB : F) {
-        dbgs() << "Liveness analysis for BB " << BB.getName();
-        dbgs() << "\n\tDefs: ";
-        dumpBitVectorValues(S, S.BBStates[&BB].Defs);
-        dbgs() << "\n\tPhiOuts: ";
-        dumpBitVectorValues(S, S.BBStates[&BB].PhiOuts);
-        dbgs() << "\n\tUpExposedUses: ";
-        dumpBitVectorValues(S, S.BBStates[&BB].UpExposedUses);
-        dbgs() << "\n\tLiveIn: ";
-        dumpBitVectorValues(S, S.BBStates[&BB].LiveIn);
-        dbgs() << "\n\tLiveOut: ";
-        dumpBitVectorValues(S, S.BBStates[&BB].LiveOut);
-        dbgs() << "\n";
+        return dumpBBState(BB, S, MST);
     }
 }
 
@@ -1138,7 +1193,7 @@ static bool isLoadFromImmut(LoadInst *LI)
     if (LI->getMetadata(LLVMContext::MD_invariant_load))
         return true;
     MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa);
-    if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"}))
+    if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype", "jtbaa_memoryptr", "jtbaa_memorylen", "jtbaa_memoryown"}))
         return true;
     return false;
 }
@@ -1193,6 +1248,10 @@ static bool isLoadFromConstGV(Value *v, bool &task_local, PhiSet *seen = nullptr
             task_local = true;
             return true;
         }
+        if (callee && callee->getName() == "julia.gc_loaded") {
+            return isLoadFromConstGV(call->getArgOperand(0), task_local, seen) &&
+                   isLoadFromConstGV(call->getArgOperand(1), task_local, seen);
+        }
     }
     if (isa<Argument>(v)) {
         task_local = true;
@@ -1215,9 +1274,9 @@ static bool isLoadFromConstGV(LoadInst *LI, bool &task_local, PhiSet *seen)
     // We only emit single slot GV in codegen
     // but LLVM global merging can change the pointer operands to GEPs/bitcasts
     auto load_base = LI->getPointerOperand()->stripInBoundsOffsets();
+    assert(load_base); // Static analyzer
     auto gv = dyn_cast<GlobalVariable>(load_base);
-    if (isTBAA(LI->getMetadata(LLVMContext::MD_tbaa),
-               {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"})) {
+    if (isLoadFromImmut(LI)) {
         if (gv)
             return true;
         return isLoadFromConstGV(load_base, task_local, seen);
@@ -1288,7 +1347,7 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef<int> PHINumbers, State &S)
     //   value of -1 or -2 in the refinement map), or may be externally rooted by refinement to other
     //   values. Thus a value is not externally rooted if it either:
     //   either:
-    //     - Has no refinements (all obiviously externally rooted values are annotated by -1/-2 in the
+    //     - Has no refinements (all obviously externally rooted values are annotated by -1/-2 in the
     //       refinement map).
     //     - Recursively reaches a not-externally rooted value through its refinements
     //
@@ -1423,6 +1482,8 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef<int> PHINumbers, State &S)
             // This should have been handled by the first loop above.
             assert(j != 0 && j <= RefinedPtr.size());
             RefinedPtr.resize(j);
+        } else {
+            S.Refinements.erase(Num);
         }
         visited.reset();
     }
@@ -1450,20 +1511,18 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     if (II->getIntrinsicID() == Intrinsic::masked_load ||
                         II->getIntrinsicID() == Intrinsic::masked_gather) {
                         if (auto VTy = dyn_cast<VectorType>(II->getType())) {
-                            if (auto PtrT = dyn_cast<PointerType>(VTy->getElementType())) {
-                                if (isSpecialPtr(PtrT)) {
-                                    // LLVM sometimes tries to materialize these operations with undefined pointers in our non-integral address space.
-                                    // Hopefully LLVM didn't already propagate that information and poison our users. Set those to NULL now.
-                                    Value *passthru = II->getArgOperand(3);
-                                    if (isa<UndefValue>(passthru)) {
-                                        II->setArgOperand(3, Constant::getNullValue(passthru->getType()));
-                                    }
-                                    if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
-                                        // These are not real defs
-                                        continue;
-                                    }
+                            if (CountTrackedPointers(VTy->getElementType()).count) {
+                                // LLVM sometimes tries to materialize these operations with undefined pointers in our non-integral address space.
+                                // Hopefully LLVM didn't already propagate that information and poison our users. Set those to NULL now.
+                                Value *passthru = II->getArgOperand(3);
+                                if (isa<UndefValue>(passthru)) {
+                                    II->setArgOperand(3, Constant::getNullValue(passthru->getType()));
                                 }
                             }
+                            if (hasLoadedTy(VTy->getElementType())) {
+                                // These are not real defs
+                                continue;
+                            }
                         }
                     }
                 }
@@ -1471,13 +1530,16 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                 if (callee && callee == typeof_func) {
                     MaybeNoteDef(S, BBS, CI, BBS.Safepoints, SmallVector<int, 1>{-2});
                 }
+                else if (callee && callee->getName() == "julia.gc_loaded") {
+                    continue;
+                }
                 else {
                     MaybeNoteDef(S, BBS, CI, BBS.Safepoints);
                 }
                 if (CI->hasStructRetAttr()) {
                     Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType();
                     assert(cast<PointerType>(CI->getArgOperand(0)->getType())->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType()));
-                    auto tracked = CountTrackedPointers(ElT);
+                    auto tracked = CountTrackedPointers(ElT, true);
                     if (tracked.count) {
                         AllocaInst *SRet = dyn_cast<AllocaInst>((CI->arg_begin()[0])->stripInBoundsOffsets());
                         assert(SRet);
@@ -1530,7 +1592,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                 }
                 if (callee) {
                     if (callee == gc_preserve_begin_func) {
-                        std::vector<int> args;
+                        SmallVector<int, 0> args;
                         for (Use &U : CI->args()) {
                             Value *V = U;
                             if (isa<Constant>(V))
@@ -1542,7 +1604,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                                         args.push_back(Num);
                                 }
                             } else {
-                                std::vector<int> Nums = NumberAll(S, V);
+                                SmallVector<int, 0> Nums = NumberAll(S, V);
                                 for (int Num : Nums) {
                                     if (Num < 0)
                                         continue;
@@ -1558,25 +1620,43 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                         callee == gc_preserve_end_func || callee == typeof_func ||
                         callee == pgcstack_getter || callee->getName() == XSTR(jl_egal__unboxed) ||
                         callee->getName() == XSTR(jl_lock_value) || callee->getName() == XSTR(jl_unlock_value) ||
-                        callee == write_barrier_func || callee == write_barrier_binding_func ||
+                        callee == write_barrier_func || callee == gc_loaded_func ||
                         callee->getName() == "memcmp") {
                         continue;
                     }
+#if JL_LLVM_VERSION >= 160000
+                    if (callee->getMemoryEffects().onlyReadsMemory() ||
+                        callee->getMemoryEffects().onlyAccessesArgPointees()) {
+                        continue;
+                    }
+#else
                     if (callee->hasFnAttribute(Attribute::ReadNone) ||
                         callee->hasFnAttribute(Attribute::ReadOnly) ||
                         callee->hasFnAttribute(Attribute::ArgMemOnly)) {
                         continue;
                     }
+#endif
                     if (MemTransferInst *MI = dyn_cast<MemTransferInst>(CI)) {
                         MaybeTrackDst(S, MI);
                     }
                 }
-                if (isa<IntrinsicInst>(CI) || CI->hasFnAttr(Attribute::ArgMemOnly) ||
-                    CI->hasFnAttr(Attribute::ReadNone) || CI->hasFnAttr(Attribute::ReadOnly)) {
+#if JL_LLVM_VERSION >= 160000
+                if (isa<IntrinsicInst>(CI) ||
+                    CI->getMemoryEffects().onlyAccessesArgPointees() ||
+                    CI->getMemoryEffects().onlyReadsMemory()) {
                     // Intrinsics are never safepoints.
                     continue;
                 }
-                std::vector<int> CalleeRoots;
+#else
+                if (isa<IntrinsicInst>(CI) ||
+                    CI->hasFnAttr(Attribute::ArgMemOnly) ||
+                    CI->hasFnAttr(Attribute::ReadNone)   ||
+                    CI->hasFnAttr(Attribute::ReadOnly)) {
+                    // Intrinsics are never safepoints.
+                    continue;
+                }
+#endif
+                SmallVector<int, 0> CalleeRoots;
                 for (Use &U : CI->args()) {
                     // Find all callee rooted arguments.
                     // Record them instead of simply remove them from live values here
@@ -1594,7 +1674,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                         continue;
                     CalleeRoots.push_back(Num);
                 }
-                int SafepointNumber = NoteSafepoint(S, BBS, CI, std::move(CalleeRoots));
+                int SafepointNumber = NoteSafepoint(S, BBS, CI, CalleeRoots);
                 BBS.HasSafepoint = true;
                 BBS.TopmostSafepoint = SafepointNumber;
                 BBS.Safepoints.push_back(SafepointNumber);
@@ -1622,9 +1702,8 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     // task but we do need to issue write barriers for when the current task dies.
                     RefinedPtr.push_back(task_local ? -1 : -2);
                 }
-                if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
+                if (!hasLoadedTy(Ty))
                     MaybeNoteDef(S, BBS, LI, BBS.Safepoints, std::move(RefinedPtr));
-                }
                 NoteOperandUses(S, BBS, I);
             } else if (auto *LI = dyn_cast<AtomicCmpXchgInst>(&I)) {
                 Type *Ty = LI->getNewValOperand()->getType()->getScalarType();
@@ -1670,7 +1749,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     if (isa<PointerType>(Phi->getType())) {
                         PHINumbers.push_back(Number(S, Phi));
                     } else {
-                        std::vector<int> Nums = NumberAll(S, Phi);
+                        SmallVector<int, 0> Nums = NumberAll(S, Phi);
                         for (int Num : Nums)
                             PHINumbers.push_back(Num);
                     }
@@ -1718,7 +1797,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
 static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned> Idxs, IRBuilder<> &irbuilder) {
     Type *T_int32 = Type::getInt32Ty(V->getContext());
     if (isptr) {
-        std::vector<Value*> IdxList{Idxs.size() + 1};
+        SmallVector<Value*, 0> IdxList{Idxs.size() + 1};
         IdxList[0] = ConstantInt::get(T_int32, 0);
         for (unsigned j = 0; j < Idxs.size(); ++j) {
             IdxList[j + 1] = ConstantInt::get(T_int32, Idxs[j]);
@@ -1758,9 +1837,9 @@ static unsigned getFieldOffset(const DataLayout &DL, Type *STy, ArrayRef<unsigne
     return (unsigned)offset;
 }
 
-std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> &irbuilder, ArrayRef<unsigned> perm_offsets) {
+SmallVector<Value*, 0> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> &irbuilder, ArrayRef<unsigned> perm_offsets) {
     auto Tracked = TrackCompositeType(STy);
-    std::vector<Value*> Ptrs;
+    SmallVector<Value*, 0> Ptrs;
     unsigned perm_idx = 0;
     auto ignore_field = [&] (ArrayRef<unsigned> Idxs) {
         if (perm_idx >= perm_offsets.size())
@@ -1782,11 +1861,12 @@ std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBu
         return false;
     };
     for (unsigned i = 0; i < Tracked.size(); ++i) {
-        auto Idxs = makeArrayRef(Tracked[i]);
+        auto Idxs = ArrayRef<unsigned>(Tracked[i]);
         if (ignore_field(Idxs))
             continue;
         Value *Elem = ExtractScalar(Src, STy, isptr, Idxs, irbuilder);
-        Ptrs.push_back(Elem);
+        if (isTrackedValue(Elem)) // ignore addrspace Loaded when it appears
+            Ptrs.push_back(Elem);
     }
     return Ptrs;
 }
@@ -1877,14 +1957,11 @@ void LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) {
  */
 void LateLowerGCFrame::ComputeLiveness(State &S) {
     bool Converged = false;
-    /* Liveness is a reverse problem, so RPOT is a good way to
-     * perform this iteration.
-     */
-    ReversePostOrderTraversal<Function *> RPOT(S.F);
-    BitVector NewLive;
+    /* Liveness is a reverse problem, so post-order is a good way to perform this iteration. */
+    LargeSparseBitVector NewLive;
     while (!Converged) {
         bool AnyChanged = false;
-        for (BasicBlock *BB : RPOT) {
+        for (BasicBlock *BB : post_order(S.F)) {
             // This could all be done more efficiently, by only updating what
             // changed - Let's get it working first though.
             BBState &BBS = S.BBStates[BB];
@@ -1894,14 +1971,13 @@ void LateLowerGCFrame::ComputeLiveness(State &S) {
             }
             if (NewLive != BBS.LiveOut) {
                 AnyChanged = true;
-		BBS.LiveOut = NewLive;
-                MaybeResize(BBS, BBS.LiveOut.size() - 1);
+                BBS.LiveOut = NewLive;
             }
-            NewLive.reset(BBS.Defs);
+            NewLive.intersectWithComplement(BBS.Defs);
             NewLive |= BBS.UpExposedUses;
             if (NewLive != BBS.LiveIn) {
                 AnyChanged = true;
-		std::swap(BBS.LiveIn, NewLive);
+                std::swap(BBS.LiveIn, NewLive);
             }
         }
         Converged = !AnyChanged;
@@ -1914,8 +1990,8 @@ JL_USED_FUNC static void dumpSafepointsForBBName(Function &F, State &S, const ch
     for (auto it : S.SafepointNumbering) {
         if (it.first->getParent()->getName() == BBName) {
             dbgs() << "Live at " << *it.first << "\n";
-            BitVector &LS = S.LiveSets[it.second];
-            for (int Idx = LS.find_first(); Idx >= 0; Idx = LS.find_next(Idx)) {
+            LargeSparseBitVector &LS = S.LiveSets[it.second];
+            for (auto Idx : LS) {
                 dbgs() << "\t";
                 S.ReversePtrNumbering[Idx]->printAsOperand(dbgs());
                 dbgs() << "\n";
@@ -1924,63 +2000,70 @@ JL_USED_FUNC static void dumpSafepointsForBBName(Function &F, State &S, const ch
     }
 }
 
-void LateLowerGCFrame::RefineLiveSet(BitVector &LS, State &S, const std::vector<int> &CalleeRoots)
+static bool IsIndirectlyRooted(const State &S, LargeSparseBitVector &Visited, LargeSparseBitVector &IndirectlyRootedLS, const LargeSparseBitVector &LS, int RefPtr) {
+    if (HasBitSet(IndirectlyRootedLS, RefPtr))
+        return true;
+    if (HasBitSet(Visited, RefPtr))
+        return false;
+    const auto it = S.Refinements.find(RefPtr);
+    if (it == S.Refinements.end()) {
+        Visited.set(RefPtr);
+        return false;
+    }
+    const auto &RefinedPtr = it->second;
+    assert(!RefinedPtr.empty());
+    bool rooted = true;
+    for (auto NRefPtr: RefinedPtr) {
+        if (NRefPtr < 0 || IsIndirectlyRooted(S, Visited, IndirectlyRootedLS, LS, NRefPtr)) {
+            continue;
+        }
+        // Not indirectly rooted, but in LS - can be used to establish a root
+        if (HasBitSet(LS, NRefPtr))
+            continue;
+        rooted = false;
+        break;
+    }
+    if (rooted)
+        IndirectlyRootedLS.set(RefPtr);
+    Visited.set(RefPtr);
+    return rooted;
+}
+
+void LateLowerGCFrame::RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef<int> CalleeRoots)
 {
-    BitVector FullLS(S.MaxPtrNumber + 1, false);
-    FullLS |= LS;
-    // First expand the live set according to the refinement map
-    // so that we can see all the values that are effectively live.
+    // It is possible that a value is not directly rooted by the refinements in the live set, but rather
+    // indirectly by following the edges of the refinement graph to all the values that root it.
+    // For example, suppose we have:
+    // LS: 1 4 5
+    // Refinements: 1 -> {2,3}
+    //              2 -> 4
+    //              3 -> 5
+    // Even though {2,3} is not in the LiveSet, we can still refine, because we can follow the edges to
+    // the roots {4, 5} which are in the live set. The two bit vectors here cache the lookup for efficiency.
+    LargeSparseBitVector Visited;
+    LargeSparseBitVector IndirectlyRootedLS;
     for (auto Num: CalleeRoots) {
         // For callee rooted values, they are all kept alive at the safepoint.
         // Make sure they are marked (even though they probably are already)
         // so that other values can be refined to them.
-        FullLS[Num] = 1;
+        IndirectlyRootedLS.set(Num);
+        // Now unmark all values that are rooted by the callee after
+        // refining other values to them.
+        LS.reset(Num);
     }
-    bool changed;
-    do {
-        changed = false;
-        for (auto &kv: S.Refinements) {
-            int Num = kv.first;
-            if (Num < 0 || HasBitSet(FullLS, Num) || kv.second.empty())
-                continue;
-            bool live = true;
-            for (auto &refine: kv.second) {
-                if (refine < 0 || HasBitSet(FullLS, refine))
-                    continue;
-                live = false;
-                break;
-            }
-            if (live) {
-                changed = true;
-                FullLS[Num] = 1;
-            }
-        }
-    } while (changed);
+
     // Now remove all values from the LiveSet that's kept alive by other objects
     // This loop only mutate `LS` which isn't read from in the loop body so
     // a single pass is enough.
-    for (int Idx = LS.find_first(); Idx >= 0; Idx = LS.find_next(Idx)) {
-        if (!S.Refinements.count(Idx))
-            continue;
-        const auto &RefinedPtr = S.Refinements[Idx];
-        if (RefinedPtr.empty())
-            continue;
-        bool rooted = true;
-        for (auto RefPtr: RefinedPtr) {
-            if (RefPtr < 0 || HasBitSet(FullLS, RefPtr))
-                continue;
-            rooted = false;
-            break;
-        }
+    auto it = LS.begin();
+    while (it != LS.end()) {
+        int Idx = *it;
+        bool rooted = IsIndirectlyRooted(S, Visited, IndirectlyRootedLS, LS, Idx);
+        ++it;
         if (rooted) {
-            LS[Idx] = 0;
+            LS.reset(Idx);
         }
     }
-    for (auto Num: CalleeRoots) {
-        // Now unmark all values that are rooted by the callee after
-        // refining other values to them.
-        LS[Num] = 0;
-    }
 }
 
 void LateLowerGCFrame::ComputeLiveSets(State &S) {
@@ -1991,13 +2074,13 @@ void LateLowerGCFrame::ComputeLiveSets(State &S) {
         Instruction *Safepoint = it.first;
         BasicBlock *BB = Safepoint->getParent();
         BBState &BBS = S.BBStates[BB];
-        BitVector LiveAcross = BBS.LiveIn;
+        LargeSparseBitVector LiveAcross = BBS.LiveIn;
         LiveAcross &= BBS.LiveOut;
-        BitVector &LS = S.LiveSets[idx];
+        LargeSparseBitVector &LS = S.LiveSets[idx];
         LS |= LiveAcross;
         for (int Live : S.LiveIfLiveOut[idx]) {
             if (HasBitSet(BBS.LiveOut, Live))
-                LS[Live] = 1;
+                LS.set(Live);
         }
         RefineLiveSet(LS, S, S.CalleeRoots[idx]);
         // If the function has GC preserves, figure out whether we need to
@@ -2021,30 +2104,18 @@ void LateLowerGCFrame::ComputeLiveSets(State &S) {
                 if (OutsideRange)
                     continue;
                 for (unsigned Num : it2.second) {
-                    if (Num >= LS.size())
-                        LS.resize(Num + 1);
-                    LS[Num] = 1;
+                    LS.set(Num);
                 }
             }
         }
     }
     // Compute the interference graph
-    for (int i = 0; i <= S.MaxPtrNumber; ++i) {
-        SetVector<int> Neighbors;
-        BitVector NeighborBits(S.MaxPtrNumber);
-        for (auto it : S.SafepointNumbering) {
-            const BitVector &LS = S.LiveSets[it.second];
-            if ((unsigned)i >= LS.size() || !LS[i])
-                continue;
-            NeighborBits |= LS;
-        }
-        for (int Idx = NeighborBits.find_first(); Idx >= 0; Idx = NeighborBits.find_next(Idx)) {
-            // We explicitly let i be a neighbor of itself, to distinguish
-            // between being the only value live at a safepoint, vs not
-            // being live at any safepoint.
-            Neighbors.insert(Idx);
+    S.Neighbors.resize(S.MaxPtrNumber+1);
+    for (auto it : S.SafepointNumbering) {
+        const LargeSparseBitVector &LS = S.LiveSets[it.second];
+        for (int idx : LS) {
+            S.Neighbors[idx] |= LS;
         }
-        S.Neighbors.push_back(Neighbors);
     }
 }
 
@@ -2058,12 +2129,12 @@ struct PEOIterator {
         unsigned weight;
         unsigned pos;
     };
-    std::vector<Element> Elements;
-    std::vector<std::vector<int>> Levels;
-    const std::vector<SetVector<int>> &Neighbors;
-    PEOIterator(const std::vector<SetVector<int>> &Neighbors) : Neighbors(Neighbors) {
+    SmallVector<Element, 0> Elements;
+    SmallVector<SmallVector<int, 0>> Levels;
+    const SmallVector<LargeSparseBitVector, 0> &Neighbors;
+    PEOIterator(const SmallVector<LargeSparseBitVector, 0> &Neighbors) : Neighbors(Neighbors) {
         // Initialize State
-        std::vector<int> FirstLevel;
+        SmallVector<int, 0> FirstLevel;
         for (unsigned i = 0; i < Neighbors.size(); ++i) {
             FirstLevel.push_back(i);
             Element E{0, i};
@@ -2075,7 +2146,7 @@ struct PEOIterator {
         // Find the element in the highest bucket
         int NextElement = -1;
         while (NextElement == -1 && !Levels.empty()) {
-            std::vector<int> &LastLevel = Levels.back();
+            SmallVector<int, 0> &LastLevel = Levels.back();
             while (NextElement == -1 && !LastLevel.empty()) {
                 NextElement = LastLevel.back();
                 LastLevel.pop_back();
@@ -2085,7 +2156,7 @@ struct PEOIterator {
         }
         if (NextElement == -1)
             return NextElement;
-        // Make sure not to try to re-use this later.
+        // Make sure not to try to reuse this later.
         Elements[NextElement].weight = (unsigned)-1;
         // Raise neighbors
         for (int Neighbor : Neighbors[NextElement]) {
@@ -2100,7 +2171,7 @@ struct PEOIterator {
             // Raise the neighbor to the next level.
             NElement.weight += 1;
             if (NElement.weight >= Levels.size())
-                Levels.push_back(std::vector<int>{});
+                Levels.push_back(SmallVector<int, 0>{});
             Levels[NElement.weight].push_back(Neighbor);
             NElement.pos = Levels[NElement.weight].size()-1;
         }
@@ -2110,7 +2181,7 @@ struct PEOIterator {
     }
 };
 
-JL_USED_FUNC static void dumpColorAssignments(const State &S, std::vector<int> &Colors)
+JL_USED_FUNC static void dumpColorAssignments(const State &S, const ArrayRef<int> &Colors)
 {
     for (unsigned i = 0; i < Colors.size(); ++i) {
         if (Colors[i] == -1)
@@ -2121,8 +2192,8 @@ JL_USED_FUNC static void dumpColorAssignments(const State &S, std::vector<int> &
     }
 }
 
-std::vector<int> LateLowerGCFrame::ColorRoots(const State &S) {
-    std::vector<int> Colors;
+SmallVector<int, 0> LateLowerGCFrame::ColorRoots(const State &S) {
+    SmallVector<int, 0> Colors;
     Colors.resize(S.MaxPtrNumber + 1, -1);
     PEOIterator Ordering(S.Neighbors);
     int PreAssignedColors = 0;
@@ -2130,8 +2201,8 @@ std::vector<int> LateLowerGCFrame::ColorRoots(const State &S) {
        to returns_twice */
     for (auto it : S.ReturnsTwice) {
         int Num = S.SafepointNumbering.at(it);
-        const BitVector &LS = S.LiveSets[Num];
-        for (int Idx = LS.find_first(); Idx >= 0; Idx = LS.find_next(Idx)) {
+        const LargeSparseBitVector &LS = S.LiveSets[Num];
+        for (int Idx : LS) {
             if (Colors[Idx] == -1)
                 Colors[Idx] = PreAssignedColors++;
         }
@@ -2167,28 +2238,27 @@ std::vector<int> LateLowerGCFrame::ColorRoots(const State &S) {
 }
 
 // Size of T is assumed to be `sizeof(void*)`
-Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V)
+Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V)
 {
-    auto T_size = getSizeTy(T->getContext());
     assert(T == T_size || isa<PointerType>(T));
     auto TV = cast<PointerType>(V->getType());
     auto cast = builder.CreateBitCast(V, T->getPointerTo(TV->getAddressSpace()));
-    return builder.CreateInBoundsGEP(T, cast, ConstantInt::get(T_size, -1));
+    return builder.CreateInBoundsGEP(T, cast, ConstantInt::get(T_size, -1), V->getName() + ".tag_addr");
 }
 
-Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Value *V)
+Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V)
 {
-    auto T_size = getSizeTy(builder.getContext());
-    auto addr = EmitTagPtr(builder, T_size, V);
-    LoadInst *load = builder.CreateAlignedLoad(T_size, addr, Align(sizeof(size_t)));
+    auto addr = EmitTagPtr(builder, T_size, T_size, V);
+    auto &M = *builder.GetInsertBlock()->getModule();
+    LoadInst *load = builder.CreateAlignedLoad(T_size, addr, M.getDataLayout().getPointerABIAlignment(0), V->getName() + ".tag");
     load->setOrdering(AtomicOrdering::Unordered);
     load->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);
     MDBuilder MDB(load->getContext());
     auto *NullInt = ConstantInt::get(T_size, 0);
-    // We can be sure that the tag is larger than page size.
+    // We can be sure that the tag is at least 16 (1<<4)
     // Hopefully this is enough to convince LLVM that the value is still not NULL
     // after masking off the tag bits
-    auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(T_size, 4096));
+    auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(T_size, 16));
     load->setMetadata(LLVMContext::MD_range, MDB.createRange(NonNullInt, NullInt));
     return load;
 }
@@ -2236,10 +2306,9 @@ MDNode *createMutableTBAAAccessTag(MDNode *Tag) {
     return MDBuilder(Tag->getContext()).createMutableTBAAAccessTag(Tag);
 }
 
-
 bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
     auto T_int32 = Type::getInt32Ty(F.getContext());
-    auto T_size = getSizeTy(F.getContext());
+    auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
     bool ChangesMade = false;
     // We create one alloca for all the jlcall frames that haven't been processed
     // yet. LLVM would merge them anyway later, so might as well save it a bit
@@ -2248,12 +2317,13 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
     Instruction *StartOff = &*(F.getEntryBlock().begin());
     PointerType *T_pprjlvalue = nullptr;
     AllocaInst *Frame = nullptr;
+    unsigned allocaAddressSpace = F.getParent()->getDataLayout().getAllocaAddrSpace();
     if (T_prjlvalue) {
         T_pprjlvalue = T_prjlvalue->getPointerTo();
-        Frame = new AllocaInst(T_prjlvalue, 0,
-            ConstantInt::get(T_int32, maxframeargs), "", StartOff);
+        Frame = new AllocaInst(T_prjlvalue, allocaAddressSpace,
+            ConstantInt::get(T_int32, maxframeargs), "jlcallframe", StartOff);
     }
-    std::vector<CallInst*> write_barriers;
+    SmallVector<CallInst*, 0> write_barriers;
     for (BasicBlock &BB : F) {
         for (auto it = BB.begin(); it != BB.end();) {
             Instruction *I = &*it;
@@ -2263,7 +2333,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
             if (I->getMetadata(LLVMContext::MD_invariant_load))
                 I->setMetadata(LLVMContext::MD_invariant_load, NULL);
             if (MDNode *TBAA = I->getMetadata(LLVMContext::MD_tbaa)) {
-                if (TBAA->getNumOperands() == 4 && isTBAA(TBAA, {"jtbaa_const"})) {
+                if (TBAA->getNumOperands() == 4 && isTBAA(TBAA, {"jtbaa_const", "jtbaa_memoryptr", "jtbaa_memorylen", "tbaa_memoryown"})) {
                     MDNode *MutableTBAA = createMutableTBAAAccessTag(TBAA);
                     if (MutableTBAA != TBAA)
                         I->setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
@@ -2292,7 +2362,13 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 /* No replacement */
             } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) {
                 auto *obj = CI->getOperand(0);
-                auto *ASCI = new AddrSpaceCastInst(obj, JuliaType::get_pjlvalue_ty(obj->getContext()), "", CI);
+                auto *ASCI = new AddrSpaceCastInst(obj, CI->getType(), "", CI);
+                ASCI->takeName(CI);
+                CI->replaceAllUsesWith(ASCI);
+                UpdatePtrNumbering(CI, ASCI, S);
+            } else if (gc_loaded_func != nullptr && callee == gc_loaded_func) {
+                auto *obj = CI->getOperand(1);
+                auto *ASCI = new AddrSpaceCastInst(obj, CI->getType(), "", CI);
                 ASCI->takeName(CI);
                 CI->replaceAllUsesWith(ASCI);
                 UpdatePtrNumbering(CI, ASCI, S);
@@ -2303,22 +2379,6 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 IRBuilder<> builder(CI);
                 builder.SetCurrentDebugLocation(CI->getDebugLoc());
 
-                // Create a call to the `julia.gc_alloc_bytes` intrinsic, which is like
-                // `julia.gc_alloc_obj` except it doesn't set the tag.
-                auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
-                auto ptlsLoad = get_current_ptls_from_task(builder, CI->getArgOperand(0), tbaa_gcframe);
-                auto ptls = builder.CreateBitCast(ptlsLoad, Type::getInt8PtrTy(builder.getContext()));
-                auto newI = builder.CreateCall(
-                    allocBytesIntrinsic,
-                    {
-                        ptls,
-                        builder.CreateIntCast(
-                            CI->getArgOperand(1),
-                            allocBytesIntrinsic->getFunctionType()->getParamType(1),
-                            false)
-                    });
-                newI->takeName(CI);
-
                 // LLVM alignment/bit check is not happy about addrspacecast and refuse
                 // to remove write barrier because of it.
                 // We pretty much only load using `T_size` so try our best to strip
@@ -2340,8 +2400,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                     if (isLoadFromConstGV(LI, task_local) && getLoadValueAlign(LI) < 16) {
                         Type *T_int64 = Type::getInt64Ty(LI->getContext());
                         auto op = ConstantAsMetadata::get(ConstantInt::get(T_int64, 16));
-                        LI->setMetadata(LLVMContext::MD_align,
-                                        MDNode::get(LI->getContext(), { op }));
+                        LI->setMetadata(LLVMContext::MD_align, MDNode::get(LI->getContext(), { op }));
                     }
                 }
                 // As a last resort, if we didn't manage to strip down the tag
@@ -2357,9 +2416,39 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                         builder.CreateAlignmentAssumption(DL, tag, 16);
                     }
                 }
-                // Set the tag.
+
+                // Create a call to the `julia.gc_alloc_bytes` intrinsic, which is like
+                // `julia.gc_alloc_obj` except it specializes the call based on the constant
+                // size of the object to allocate, to save one indirection, and doesn't set
+                // the type tag. (Note that if the size is not a constant, it will call
+                // gc_alloc_obj, and will redundantly set the tag.)
+                auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
+                auto ptlsLoad = get_current_ptls_from_task(builder, T_size, CI->getArgOperand(0), tbaa_gcframe);
+                auto ptls = builder.CreateBitCast(ptlsLoad, Type::getInt8PtrTy(builder.getContext()));
+                auto newI = builder.CreateCall(
+                    allocBytesIntrinsic,
+                    {
+                        ptls,
+                        builder.CreateIntCast(
+                            CI->getArgOperand(1),
+                            allocBytesIntrinsic->getFunctionType()->getParamType(1),
+                            false),
+                        builder.CreatePtrToInt(tag, T_size),
+                    });
+                newI->setAttributes(allocBytesIntrinsic->getAttributes());
+                newI->addDereferenceableRetAttr(CI->getRetDereferenceableBytes());
+                newI->takeName(CI);
+                // Now, finally, set the tag. We do this in IR instead of in the C alloc
+                // function, to provide possible optimization opportunities. (I think? TBH
+                // the most recent editor of this code is not entirely clear on why we
+                // prefer to set the tag in the generated code. Providing optimization
+                // opportunities is the most likely reason; the tradeoff is slightly
+                // larger code size and increased compilation time, compiling this
+                // instruction at every allocation site, rather than once in the C alloc
+                // function.)
+                auto &M = *builder.GetInsertBlock()->getModule();
                 StoreInst *store = builder.CreateAlignedStore(
-                    tag, EmitTagPtr(builder, tag_type, newI), Align(sizeof(size_t)));
+                    tag, EmitTagPtr(builder, tag_type, T_size, newI), M.getDataLayout().getPointerABIAlignment(0));
                 store->setOrdering(AtomicOrdering::Unordered);
                 store->setMetadata(LLVMContext::MD_tbaa, tbaa_tag);
 
@@ -2373,15 +2462,14 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 assert(CI->arg_size() == 1);
                 IRBuilder<> builder(CI);
                 builder.SetCurrentDebugLocation(CI->getDebugLoc());
-                auto tag = EmitLoadTag(builder, CI->getArgOperand(0));
+                auto tag = EmitLoadTag(builder, T_size, CI->getArgOperand(0));
                 auto masked = builder.CreateAnd(tag, ConstantInt::get(T_size, ~(uintptr_t)15));
                 auto typ = builder.CreateAddrSpaceCast(builder.CreateIntToPtr(masked, JuliaType::get_pjlvalue_ty(masked->getContext())),
                                                        T_prjlvalue);
                 typ->takeName(CI);
                 CI->replaceAllUsesWith(typ);
                 UpdatePtrNumbering(CI, typ, S);
-            } else if ((write_barrier_func && callee == write_barrier_func) ||
-                       (write_barrier_binding_func && callee == write_barrier_binding_func)) {
+            } else if (write_barrier_func && callee == write_barrier_func) {
                 // The replacement for this requires creating new BasicBlocks
                 // which messes up the loop. Queue all of them to be replaced later.
                 assert(CI->arg_size() >= 1);
@@ -2390,14 +2478,15 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 ++it;
                 continue;
             } else if ((call_func && callee == call_func) ||
-                       (call2_func && callee == call2_func)) {
+                       (call2_func && callee == call2_func) ||
+                       (call3_func && callee == call3_func)) {
                 assert(T_prjlvalue);
                 size_t nargs = CI->arg_size();
                 size_t nframeargs = nargs-1;
-                if (callee == call_func)
-                    nframeargs -= 1;
-                else if (callee == call2_func)
+                if (callee == call2_func)
                     nframeargs -= 2;
+                else
+                    nframeargs -= 1;
                 SmallVector<Value*, 4> ReplacementArgs;
                 auto arg_it = CI->arg_begin();
                 assert(arg_it != CI->arg_end());
@@ -2422,7 +2511,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 }
                 ReplacementArgs.push_back(nframeargs == 0 ?
                     (llvm::Value*)ConstantPointerNull::get(T_pprjlvalue) :
-                    (llvm::Value*)Frame);
+                    Builder.CreateAddrSpaceCast(Frame, T_prjlvalue->getPointerTo(0)));
                 ReplacementArgs.push_back(ConstantInt::get(T_int32, nframeargs));
                 if (callee == call2_func) {
                     // move trailing arg to the end now
@@ -2430,7 +2519,9 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                     ReplacementArgs.erase(ReplacementArgs.begin());
                     ReplacementArgs.push_back(front);
                 }
-                FunctionType *FTy = callee == call2_func ? JuliaType::get_jlfunc2_ty(CI->getContext()) : JuliaType::get_jlfunc_ty(CI->getContext());
+                FunctionType *FTy = callee == call3_func ? JuliaType::get_jlfunc3_ty(CI->getContext()) :
+                                    callee == call2_func ? JuliaType::get_jlfunc2_ty(CI->getContext()) :
+                                                           JuliaType::get_jlfunc_ty(CI->getContext());
                 CallInst *NewCall = CallInst::Create(FTy, new_callee, ReplacementArgs, "", CI);
                 NewCall->setTailCallKind(CI->getTailCallKind());
                 auto callattrs = CI->getAttributes();
@@ -2471,17 +2562,28 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
         if (CFGModified) {
             *CFGModified = true;
         }
+        auto DebugInfoMeta = F.getParent()->getModuleFlag("julia.debug_level");
+        int debug_info = 1;
+        if (DebugInfoMeta != nullptr) {
+            debug_info = cast<ConstantInt>(cast<ConstantAsMetadata>(DebugInfoMeta)->getValue())->getZExtValue();
+        }
+
         IRBuilder<> builder(CI);
         builder.SetCurrentDebugLocation(CI->getDebugLoc());
-        auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3);
-        auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3));
+        auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED);
+        setName(parBits, "parent_bits", debug_info);
+        auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED));
+        setName(parOldMarked, "parent_old_marked", debug_info);
         auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
         builder.SetInsertPoint(mayTrigTerm);
+        setName(mayTrigTerm->getParent(), "may_trigger_wb", debug_info);
         Value *anyChldNotMarked = NULL;
         for (unsigned i = 1; i < CI->arg_size(); i++) {
             Value *child = CI->getArgOperand(i);
-            Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, child), 1);
-            Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0));
+            Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED);
+            setName(chldBit, "child_bit", debug_info);
+            Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0),"child_not_marked");
+            setName(chldNotMarked, "child_not_marked", debug_info);
             anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
         }
         assert(anyChldNotMarked); // handled by all_of test above
@@ -2489,13 +2591,11 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
         SmallVector<uint32_t, 2> Weights{1, 9};
         auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
                                                   MDB.createBranchWeights(Weights));
+        setName(trigTerm->getParent(), "trigger_wb", debug_info);
         builder.SetInsertPoint(trigTerm);
         if (CI->getCalledOperand() == write_barrier_func) {
             builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
         }
-        else if (CI->getCalledOperand() == write_barrier_binding_func) {
-            builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCBinding), parent);
-        }
         else {
             assert(false);
         }
@@ -2510,18 +2610,18 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
     return ChangesMade;
 }
 
-static void AddInPredLiveOuts(BasicBlock *BB, BitVector &LiveIn, State &S)
+static void AddInPredLiveOuts(BasicBlock *BB, LargeSparseBitVector &LiveIn, State &S)
 {
     bool First = true;
     std::set<BasicBlock *> Visited;
-    std::vector<BasicBlock *> WorkList;
+    SmallVector<BasicBlock *, 0> WorkList;
     WorkList.push_back(BB);
     while (!WorkList.empty()) {
         BB = &*WorkList.back();
         WorkList.pop_back();
         // Nothing is live at function entry
         if (BB == &S.F->getEntryBlock()) {
-            LiveIn.reset();
+            LiveIn.clear();
             return;
         }
         for (BasicBlock *Pred : predecessors(BB)) {
@@ -2546,38 +2646,41 @@ static void AddInPredLiveOuts(BasicBlock *BB, BitVector &LiveIn, State &S)
 }
 
 void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot,
-                                         const std::vector<int> &Colors, Value *GCFrame,
+                                         ArrayRef<int> Colors, Value *GCFrame,
                                          Instruction *InsertBefore) {
     // Get the slot address.
     auto slotAddress = CallInst::Create(
         getOrDeclare(jl_intrinsics::getGCFrameSlot),
         {GCFrame, ConstantInt::get(Type::getInt32Ty(InsertBefore->getContext()), Colors[R] + MinColorRoot)},
-        "", InsertBefore);
+        "gc_slot_addr_" + StringRef(std::to_string(Colors[R] + MinColorRoot)), InsertBefore);
 
     Value *Val = GetPtrForNumber(S, R, InsertBefore);
     // Pointee types don't have semantics, so the optimizer is
     // free to rewrite them if convenient. We need to change
     // it back here for the store.
-    if (Val->getType() != T_prjlvalue)
+    if (Val->getType() != T_prjlvalue) {
+        // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
+        assert(Val->getContext().supportsTypedPointers());
         Val = new BitCastInst(Val, T_prjlvalue, "", InsertBefore);
+    }
     new StoreInst(Val, slotAddress, InsertBefore);
 }
 
 void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
-                                          const std::vector<int> &Colors, Value *GCFrame)
+                                          ArrayRef<int> Colors, Value *GCFrame)
 {
     for (auto &BB : *S.F) {
         const BBState &BBS = S.BBStates[&BB];
         if (!BBS.HasSafepoint) {
             continue;
         }
-        BitVector LiveIn;
+        LargeSparseBitVector LiveIn;
         AddInPredLiveOuts(&BB, LiveIn, S);
-        const BitVector *LastLive = &LiveIn;
+        const LargeSparseBitVector *LastLive = &LiveIn;
         for(auto rit = BBS.Safepoints.rbegin();
               rit != BBS.Safepoints.rend(); ++rit ) {
-            const BitVector &NowLive = S.LiveSets[*rit];
-            for (int Idx = NowLive.find_first(); Idx >= 0; Idx = NowLive.find_next(Idx)) {
+            const LargeSparseBitVector &NowLive = S.LiveSets[*rit];
+            for (int Idx : NowLive) {
                 if (!HasBitSet(*LastLive, Idx)) {
                     PlaceGCFrameStore(S, Idx, MinColorRoot, Colors, GCFrame,
                       S.ReverseSafepointNumbering[*rit]);
@@ -2588,7 +2691,8 @@ void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
     }
 }
 
-void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>) {
+void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, State &S,
+                                                std::map<Value *, std::pair<int, int>>) {
     auto F = S.F;
     auto T_int32 = Type::getInt32Ty(F->getContext());
     int MaxColor = -1;
@@ -2614,19 +2718,20 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
         unsigned AllocaSlot = 2; // first two words are metadata
         auto replace_alloca = [this, gcframe, &AllocaSlot, T_int32](AllocaInst *&AI) {
             // Pick a slot for the alloca.
-            unsigned align = AI->getAlignment() / sizeof(void*); // TODO: use DataLayout pointer size
+            AI->getAlign();
+            unsigned align = AI->getAlign().value() / sizeof(void*); // TODO: use DataLayout pointer size
             assert(align <= 16 / sizeof(void*) && "Alignment exceeds llvm-final-gc-lowering abilities");
             if (align > 1)
                 AllocaSlot = LLT_ALIGN(AllocaSlot, align);
             Instruction *slotAddress = CallInst::Create(
                 getOrDeclare(jl_intrinsics::getGCFrameSlot),
-                {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)});
+                {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)}, "gc_slot_addr" + StringRef(std::to_string(AllocaSlot - 2)));
             slotAddress->insertAfter(gcframe);
             slotAddress->takeName(AI);
 
             // Check for lifetime intrinsics on this alloca, we can't keep them
             // because we're changing the semantics
-            std::vector<CallInst*> ToDelete;
+            SmallVector<CallInst*, 0> ToDelete;
             RecursivelyVisit<IntrinsicInst>([&](Use &VU) {
                 IntrinsicInst *II = cast<IntrinsicInst>(VU.getUser());
                 if ((II->getIntrinsicID() != Intrinsic::lifetime_start &&
@@ -2639,6 +2744,8 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
             }
             if (slotAddress->getType() != AI->getType()) {
                 // If we're replacing an ArrayAlloca, the pointer element type may need to be fixed up
+                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
+                assert(slotAddress->getContext().supportsTypedPointers());
                 auto BCI  = new BitCastInst(slotAddress, AI->getType());
                 BCI->insertAfter(slotAddress);
                 slotAddress = BCI;
@@ -2663,13 +2770,16 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
             for (unsigned i = 0; i < Store.second; ++i) {
                 auto slotAddress = CallInst::Create(
                     getOrDeclare(jl_intrinsics::getGCFrameSlot),
-                    {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)});
+                    {gcframe, ConstantInt::get(T_int32, AllocaSlot - 2)}, "gc_slot_addr" + StringRef(std::to_string(AllocaSlot - 2)));
                 slotAddress->insertAfter(gcframe);
                 auto ValExpr = std::make_pair(Base, isa<PointerType>(Base->getType()) ? -1 : i);
                 auto Elem = MaybeExtractScalar(S, ValExpr, SI);
-                if (Elem->getType() != T_prjlvalue)
+                if (Elem->getType() != T_prjlvalue) {
+                    // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
+                    assert(Elem->getContext().supportsTypedPointers());
                     Elem = new BitCastInst(Elem, T_prjlvalue, "", SI);
-                //auto Idxs = makeArrayRef(Tracked[i]);
+                }
+                //auto Idxs = ArrayRef<unsigned>(Tracked[i]);
                 //Value *Elem = ExtractScalar(Base, true, Idxs, SI);
                 Value *shadowStore = new StoreInst(Elem, slotAddress, SI);
                 (void)shadowStore;
@@ -2684,12 +2794,12 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
         // Insert GC frame stores
         PlaceGCFrameStores(S, AllocaSlot - 2, Colors, gcframe);
         // Insert GCFrame pops
-        for(Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
-            if (isa<ReturnInst>(I->getTerminator())) {
+        for (auto &BB : *F) {
+            if (isa<ReturnInst>(BB.getTerminator())) {
                 auto popGcframe = CallInst::Create(
                     getOrDeclare(jl_intrinsics::popGCFrame),
                     {gcframe});
-                popGcframe->insertBefore(I->getTerminator());
+                popGcframe->insertBefore(BB.getTerminator());
             }
         }
     }
@@ -2698,7 +2808,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
 bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
     initAll(*F.getParent());
     LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n");
-    if (!pgcstack_getter)
+    if (!pgcstack_getter && !adoptthread_func)
         return CleanupIR(F, nullptr, CFGModified);
 
     pgcstack = getPGCstack(F);
@@ -2707,29 +2817,25 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
 
     State S = LocalScan(F);
     ComputeLiveness(S);
-    std::vector<int> Colors = ColorRoots(S);
+    SmallVector<int, 0> Colors = ColorRoots(S);
     std::map<Value *, std::pair<int, int>> CallFrames; // = OptimizeCallFrames(S, Ordering);
     PlaceRootsAndUpdateCalls(Colors, S, CallFrames);
     CleanupIR(F, &S, CFGModified);
     return true;
 }
 
-bool LateLowerGCFrameLegacy::runOnFunction(Function &F) {
-    auto GetDT = [this]() -> DominatorTree & {
-        return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-    };
-    auto lateLowerGCFrame = LateLowerGCFrame(GetDT);
-    return lateLowerGCFrame.runOnFunction(F);
-}
-
-PreservedAnalyses LateLowerGC::run(Function &F, FunctionAnalysisManager &AM)
+PreservedAnalyses LateLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)
 {
     auto GetDT = [&AM, &F]() -> DominatorTree & {
         return AM.getResult<DominatorTreeAnalysis>(F);
     };
     auto lateLowerGCFrame = LateLowerGCFrame(GetDT);
     bool CFGModified = false;
-    if (lateLowerGCFrame.runOnFunction(F, &CFGModified)) {
+    bool modified = lateLowerGCFrame.runOnFunction(F, &CFGModified);
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyLLVMIR(F));
+#endif
+    if (modified) {
         if (CFGModified) {
             return PreservedAnalyses::none();
         } else {
@@ -2738,16 +2844,3 @@ PreservedAnalyses LateLowerGC::run(Function &F, FunctionAnalysisManager &AM)
     }
     return PreservedAnalyses::all();
 }
-
-
-char LateLowerGCFrameLegacy::ID = 0;
-static RegisterPass<LateLowerGCFrameLegacy> X("LateLowerGCFrame", "Late Lower GCFrame Pass", false, false);
-
-Pass *createLateLowerGCFramePass() {
-    return new LateLowerGCFrameLegacy();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createLateLowerGCFramePass());
-}
diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp
index 747066e731892..15866d0855fc1 100644
--- a/src/llvm-lower-handlers.cpp
+++ b/src/llvm-lower-handlers.cpp
@@ -7,6 +7,8 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/ADT/DepthFirstIterator.h>
+#include <llvm/ADT/Statistic.h>
+#include <llvm/ADT/Triple.h>
 #include <llvm/Analysis/CFG.h>
 #include <llvm/IR/BasicBlock.h>
 #include <llvm/IR/Constants.h>
@@ -15,18 +17,20 @@
 #include <llvm/IR/IntrinsicInst.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Value.h>
-#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/Verifier.h>
 #include <llvm/Pass.h>
 #include <llvm/Support/Debug.h>
 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
 
 #include "julia.h"
 #include "julia_assert.h"
-#include "codegen_shared.h"
+#include "llvm-codegen-shared.h"
 #include <map>
 
 #define DEBUG_TYPE "lower_handlers"
 #undef DEBUG
+STATISTIC(MaxExceptionHandlerDepth, "Maximum nesting of exception handlers");
+STATISTIC(ExceptionHandlerBuffers, "Number of exception handler buffers inserted");
 
 using namespace llvm;
 
@@ -56,7 +60,7 @@ using namespace llvm;
  *                 \                 /
  *              br i1 %cond, %left2, %right2
  *                 /                 \
- *           jl_pop_hander          ret
+ *           jl_pop_handler          ret
  *           ret
  *
  *    The frontend doesn't emit structures like this. However, the optimizer
@@ -77,24 +81,24 @@ namespace {
  * If the module doesn't have declarations for the jl_enter_handler and setjmp
  * functions, insert them.
  */
-static void ensure_enter_function(Module &M)
+static void ensure_enter_function(Module &M, const Triple &TT)
 {
     auto T_int8  = Type::getInt8Ty(M.getContext());
     auto T_pint8 = PointerType::get(T_int8, 0);
     auto T_void = Type::getVoidTy(M.getContext());
     auto T_int32 = Type::getInt32Ty(M.getContext());
     if (!M.getNamedValue(XSTR(jl_enter_handler))) {
-        std::vector<Type*> ehargs(0);
+        SmallVector<Type*, 0> ehargs(0);
         ehargs.push_back(T_pint8);
         Function::Create(FunctionType::get(T_void, ehargs, false),
                          Function::ExternalLinkage, XSTR(jl_enter_handler), &M);
     }
     if (!M.getNamedValue(jl_setjmp_name)) {
-        std::vector<Type*> args2(0);
+        SmallVector<Type*, 0> args2(0);
         args2.push_back(T_pint8);
-#ifndef _OS_WINDOWS_
-        args2.push_back(T_int32);
-#endif
+        if (!TT.isOSWindows()) {
+            args2.push_back(T_int32);
+        }
         Function::Create(FunctionType::get(T_int32, args2, false),
                          Function::ExternalLinkage, jl_setjmp_name, &M)
             ->addFnAttr(Attribute::ReturnsTwice);
@@ -103,10 +107,11 @@ static void ensure_enter_function(Module &M)
 
 static bool lowerExcHandlers(Function &F) {
     Module &M = *F.getParent();
+    Triple TT(M.getTargetTriple());
     Function *except_enter_func = M.getFunction("julia.except_enter");
     if (!except_enter_func)
         return false; // No EH frames in this module
-    ensure_enter_function(M);
+    ensure_enter_function(M, TT);
     Function *leave_func = M.getFunction(XSTR(jl_pop_handler));
     Function *jlenter_func = M.getFunction(XSTR(jl_enter_handler));
     Function *setjmp_func = M.getFunction(jl_setjmp_name);
@@ -156,6 +161,8 @@ static bool lowerExcHandlers(Function &F) {
         /* Remember the depth at the BB boundary */
         ExitDepth[BB] = Depth;
     }
+    MaxExceptionHandlerDepth.updateMax(MaxDepth);
+    ExceptionHandlerBuffers += MaxDepth;
 
     /* Step 2: EH Frame lowering */
     // Allocate stack space for each handler. We allocate these as separate
@@ -166,17 +173,24 @@ static bool lowerExcHandlers(Function &F) {
     Value *handler_sz64 = ConstantInt::get(Type::getInt64Ty(F.getContext()),
                                            sizeof(jl_handler_t));
     Instruction *firstInst = &F.getEntryBlock().front();
-    std::vector<AllocaInst *> buffs;
+    SmallVector<Instruction *, 0> buffs;
+    unsigned allocaAddressSpace = F.getParent()->getDataLayout().getAllocaAddrSpace();
     for (int i = 0; i < MaxDepth; ++i) {
-        auto *buff = new AllocaInst(Type::getInt8Ty(F.getContext()), 0,
+        auto *buff = new AllocaInst(Type::getInt8Ty(F.getContext()), allocaAddressSpace,
                 handler_sz, Align(16), "", firstInst);
-        buffs.push_back(buff);
+        if (allocaAddressSpace) {
+            AddrSpaceCastInst *buff_casted = new AddrSpaceCastInst(buff, Type::getInt8PtrTy(F.getContext(), AddressSpace::Generic));
+            buff_casted->insertAfter(buff);
+            buffs.push_back(buff_casted);
+        } else {
+            buffs.push_back(buff);
+        }
     }
 
     // Lower enter funcs
     for (auto it : EnterDepth) {
         assert(it.second >= 0);
-        AllocaInst *buff = buffs[it.second];
+        Instruction *buff = buffs[it.second];
         CallInst *enter = it.first;
         auto new_enter = CallInst::Create(jlenter_func, buff, "", enter);
         Value *lifetime_args[] = {
@@ -184,14 +198,15 @@ static bool lowerExcHandlers(Function &F) {
             buff
         };
         CallInst::Create(lifetime_start, lifetime_args, "", new_enter);
-#ifndef _OS_WINDOWS_
-        // For LLVM 3.3 compatibility
-        Value *args[] = {buff,
-                         ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
-        auto sj = CallInst::Create(setjmp_func, args, "", enter);
-#else
-        auto sj = CallInst::Create(setjmp_func, buff, "", enter);
-#endif
+        CallInst *sj;
+        if (!TT.isOSWindows()) {
+            // For LLVM 3.3 compatibility
+            Value *args[] = {buff,
+                            ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
+            sj = CallInst::Create(setjmp_func, args, "", enter);
+        } else {
+            sj = CallInst::Create(setjmp_func, buff, "", enter);
+        }
         // We need to mark this on the call site as well. See issue #6757
         sj->setCanReturnTwice();
         if (auto dbg = enter->getMetadata(LLVMContext::MD_dbg)) {
@@ -220,35 +235,14 @@ static bool lowerExcHandlers(Function &F) {
 
 } // anonymous namespace
 
-PreservedAnalyses LowerExcHandlers::run(Function &F, FunctionAnalysisManager &AM)
+PreservedAnalyses LowerExcHandlersPass::run(Function &F, FunctionAnalysisManager &AM)
 {
-    if (lowerExcHandlers(F)) {
+    bool modified = lowerExcHandlers(F);
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyLLVMIR(F));
+#endif
+    if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     }
     return PreservedAnalyses::all();
 }
-
-
-struct LowerExcHandlersLegacy : public FunctionPass {
-    static char ID;
-    LowerExcHandlersLegacy() : FunctionPass(ID)
-    {}
-    bool runOnFunction(Function &F) {
-        return lowerExcHandlers(F);
-    }
-};
-
-char LowerExcHandlersLegacy::ID = 0;
-static RegisterPass<LowerExcHandlersLegacy> X("LowerExcHandlers", "Lower Julia Exception Handlers",
-                                         false /* Only looks at CFG */,
-                                         false /* Analysis Pass */);
-
-Pass *createLowerExcHandlersPass()
-{
-    return new LowerExcHandlersLegacy();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddLowerExcHandlersPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createLowerExcHandlersPass());
-}
diff --git a/src/llvm-muladd.cpp b/src/llvm-muladd.cpp
index 148d1ca158c61..12f1c8ad765d9 100644
--- a/src/llvm-muladd.cpp
+++ b/src/llvm-muladd.cpp
@@ -7,8 +7,8 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/ADT/Statistic.h>
+#include <llvm/Analysis/OptimizationRemarkEmitter.h>
 #include <llvm/IR/Value.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
@@ -23,34 +23,51 @@
 #include "julia.h"
 #include "julia_assert.h"
 
-#define DEBUG_TYPE "combine_muladd"
+#define DEBUG_TYPE "combine-muladd"
 #undef DEBUG
 
 using namespace llvm;
 STATISTIC(TotalContracted, "Total number of multiplies marked for FMA");
 
+#ifndef __clang_gcanalyzer__
+#define REMARK(remark) ORE.emit(remark)
+#else
+#define REMARK(remark) (void) 0;
+#endif
+
 /**
  * Combine
  * ```
  * %v0 = fmul ... %a, %b
- * %v = fadd fast ... %v0, %c
+ * %v = fadd contract ... %v0, %c
  * ```
  * to
- * `%v = call fast @llvm.fmuladd.<...>(... %a, ... %b, ... %c)`
+ * `%v = call contract @llvm.fmuladd.<...>(... %a, ... %b, ... %c)`
  * when `%v0` has no other use
  */
 
 // Return true if we changed the mulOp
-static bool checkCombine(Value *maybeMul)
+static bool checkCombine(Value *maybeMul, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT
 {
     auto mulOp = dyn_cast<Instruction>(maybeMul);
     if (!mulOp || mulOp->getOpcode() != Instruction::FMul)
         return false;
-    if (!mulOp->hasOneUse())
+    if (!mulOp->hasOneUse()) {
+        LLVM_DEBUG(dbgs() << "mulOp has multiple uses: " << *maybeMul << "\n");
+        REMARK([&](){
+            return OptimizationRemarkMissed(DEBUG_TYPE, "Multiuse FMul", mulOp)
+                << "fmul had multiple uses " << ore::NV("fmul", mulOp);
+        });
         return false;
+    }
     // On 5.0+ we only need to mark the mulOp as contract and the backend will do the work for us.
     auto fmf = mulOp->getFastMathFlags();
     if (!fmf.allowContract()) {
+        LLVM_DEBUG(dbgs() << "Marking mulOp for FMA: " << *maybeMul << "\n");
+        REMARK([&](){
+            return OptimizationRemark(DEBUG_TYPE, "Marked for FMA", mulOp)
+                << "marked for fma " << ore::NV("fmul", mulOp);
+        });
         ++TotalContracted;
         fmf.setAllowContract(true);
         mulOp->copyFastMathFlags(fmf);
@@ -59,8 +76,9 @@ static bool checkCombine(Value *maybeMul)
     return false;
 }
 
-static bool combineMulAdd(Function &F)
+static bool combineMulAdd(Function &F) JL_NOTSAFEPOINT
 {
+    OptimizationRemarkEmitter ORE(&F);
     bool modified = false;
     for (auto &BB: F) {
         for (auto it = BB.begin(); it != BB.end();) {
@@ -68,15 +86,15 @@ static bool combineMulAdd(Function &F)
             it++;
             switch (I.getOpcode()) {
             case Instruction::FAdd: {
-                if (!I.isFast())
+                if (!I.hasAllowContract())
                     continue;
-                modified |= checkCombine(I.getOperand(0)) || checkCombine(I.getOperand(1));
+                modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE);
                 break;
             }
             case Instruction::FSub: {
-                if (!I.isFast())
+                if (!I.hasAllowContract())
                     continue;
-                modified |= checkCombine(I.getOperand(0)) || checkCombine(I.getOperand(1));
+                modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE);
                 break;
             }
             default:
@@ -84,41 +102,16 @@ static bool combineMulAdd(Function &F)
             }
         }
     }
-    assert(!verifyFunction(F, &errs()));
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyLLVMIR(F));
+#endif
     return modified;
 }
 
-PreservedAnalyses CombineMulAdd::run(Function &F, FunctionAnalysisManager &AM)
+PreservedAnalyses CombineMulAddPass::run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT
 {
     if (combineMulAdd(F)) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     }
     return PreservedAnalyses::all();
 }
-
-
-struct CombineMulAddLegacy : public FunctionPass {
-    static char ID;
-    CombineMulAddLegacy() : FunctionPass(ID)
-    {}
-
-private:
-    bool runOnFunction(Function &F) override {
-        return combineMulAdd(F);
-    }
-};
-
-char CombineMulAddLegacy::ID = 0;
-static RegisterPass<CombineMulAddLegacy> X("CombineMulAdd", "Combine mul and add to muladd",
-                                     false /* Only looks at CFG */,
-                                     false /* Analysis Pass */);
-
-Pass *createCombineMulAddPass()
-{
-    return new CombineMulAddLegacy();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddCombineMulAddPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createCombineMulAddPass());
-}
diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp
index 4badf555bcdbe..cf6afaf153fb1 100644
--- a/src/llvm-multiversioning.cpp
+++ b/src/llvm-multiversioning.cpp
@@ -3,6 +3,8 @@
 // Function multi-versioning
 // LLVM pass to clone function for different archs
 
+//see src/processor.h for documentation of the relevant globals inserted here
+
 #include "llvm-version.h"
 #include "passes.h"
 
@@ -10,16 +12,17 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/Pass.h>
+#include <llvm/ADT/BitVector.h>
 #include <llvm/ADT/Statistic.h>
+#include <llvm/ADT/Triple.h>
 #include <llvm/IR/Module.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/Constants.h>
+#include <llvm/IR/Dominators.h>
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/Analysis/LoopInfo.h>
 #include <llvm/Analysis/CallGraph.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/DebugInfoMetadata.h>
 #include <llvm/IR/Verifier.h>
@@ -35,7 +38,7 @@
 #include <set>
 #include <vector>
 
-#include "codegen_shared.h"
+#include "llvm-codegen-shared.h"
 #include "julia_assert.h"
 
 #define DEBUG_TYPE "julia_multiversioning"
@@ -43,11 +46,11 @@
 
 using namespace llvm;
 
-extern Optional<bool> always_have_fma(Function&);
+extern Optional<bool> always_have_fma(Function&, const Triple &TT);
 
 namespace {
 constexpr uint32_t clone_mask =
-    JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU;
+    JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU | JL_TARGET_CLONE_FLOAT16 | JL_TARGET_CLONE_BFLOAT16;
 
 // Treat identical mapping as missing and return `def` in that case.
 // We mainly need this to identify cloned function using value map after LLVM cloning
@@ -61,181 +64,287 @@ Value *map_get(T &&vmap, Value *key, Value *def=nullptr)
     return val;
 }
 
-// Iterate through uses of a particular type.
-// Recursively scan through `ConstantExpr` and `ConstantAggregate` use.
-template<typename U>
-struct ConstantUses {
-    template<typename T>
-    struct Info {
-        Use *use;
-        T *val;
-        // If `samebits == true`, the offset the original value appears in the constant.
-        size_t offset;
-        // This specify whether the original value appears in the current value in exactly
-        // the same bit pattern (with possibly an offset determined by `offset`).
-        bool samebits;
-        Info(Use *use, T *val, size_t offset, bool samebits) :
-            use(use),
-            val(val),
-            offset(offset),
-            samebits(samebits)
-        {
-        }
-        Info(Use *use, size_t offset, bool samebits) :
-            use(use),
-            val(cast<T>(use->getUser())),
-            offset(offset),
-            samebits(samebits)
-        {
-        }
-    };
-    using UseInfo = Info<U>;
-    struct Frame : Info<Constant> {
-        template<typename... Args>
-        Frame(Args &&... args) :
-            Info<Constant>(std::forward<Args>(args)...),
-            cur(this->val->use_empty() ? nullptr : &*this->val->use_begin()),
-            _next(cur ? cur->getNext() : nullptr)
-        {
-        }
-    private:
-        void next()
-        {
-            cur = _next;
-            if (!cur)
-                return;
-            _next = cur->getNext();
+static bool is_vector(FunctionType *ty)
+{
+    if (ty->getReturnType()->isVectorTy())
+        return true;
+    for (auto arg: ty->params()) {
+        if (arg->isVectorTy()) {
+            return true;
         }
-        Use *cur;
-        Use *_next;
-        friend struct ConstantUses;
-    };
-    ConstantUses(Constant *c, Module &M)
-        : stack{Frame(nullptr, c, 0u, true)},
-          M(M)
-    {
-        forward();
     }
-    UseInfo get_info() const
-    {
-        auto &top = stack.back();
-        return UseInfo(top.cur, top.offset, top.samebits);
+    return false;
+}
+
+static uint32_t collect_func_info(Function &F, const Triple &TT, bool &has_veccall)
+{
+    DominatorTree DT(F);
+    LoopInfo LI(DT);
+    uint32_t flag = 0;
+    if (!LI.empty())
+        flag |= JL_TARGET_CLONE_LOOP;
+    if (is_vector(F.getFunctionType())) {
+        flag |= JL_TARGET_CLONE_SIMD;
+        has_veccall = true;
     }
-    const SmallVector<Frame, 4> &get_stack() const
-    {
-        return stack;
+    for (auto &bb: F) {
+        for (auto &I: bb) {
+            if (auto call = dyn_cast<CallInst>(&I)) {
+                if (is_vector(call->getFunctionType())) {
+                    has_veccall = true;
+                    flag |= JL_TARGET_CLONE_SIMD;
+                }
+                if (auto callee = call->getCalledFunction()) {
+                    auto name = callee->getName();
+                    if (name.startswith("llvm.muladd.") || name.startswith("llvm.fma.")) {
+                        flag |= JL_TARGET_CLONE_MATH;
+                    }
+                    else if (name.startswith("julia.cpu.")) {
+                        if (name.startswith("julia.cpu.have_fma.")) {
+                            // for some platforms we know they always do (or don't) support
+                            // FMA. in those cases we don't need to clone the function.
+                            // always_have_fma returns an optional<bool>
+                            if (!always_have_fma(*callee, TT))
+                                flag |= JL_TARGET_CLONE_CPU;
+                        } else {
+                            flag |= JL_TARGET_CLONE_CPU;
+                        }
+                    }
+                }
+            }
+            else if (auto store = dyn_cast<StoreInst>(&I)) {
+                if (store->getValueOperand()->getType()->isVectorTy()) {
+                    flag |= JL_TARGET_CLONE_SIMD;
+                }
+            }
+            else if (I.getType()->isVectorTy()) {
+                flag |= JL_TARGET_CLONE_SIMD;
+            }
+            if (auto mathOp = dyn_cast<FPMathOperator>(&I)) {
+                if (mathOp->getFastMathFlags().any()) {
+                    flag |= JL_TARGET_CLONE_MATH;
+                }
+            }
+
+            for (size_t i = 0; i < I.getNumOperands(); i++) {
+                if(I.getOperand(i)->getType()->isHalfTy()) {
+                    flag |= JL_TARGET_CLONE_FLOAT16;
+                }
+                if(I.getOperand(i)->getType()->isBFloatTy()) {
+                    flag |= JL_TARGET_CLONE_BFLOAT16;
+                }
+            }
+            uint32_t veccall_flags = JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU | JL_TARGET_CLONE_FLOAT16 | JL_TARGET_CLONE_BFLOAT16;
+            if (has_veccall && (flag & veccall_flags) == veccall_flags) {
+                return flag;
+            }
+        }
     }
-    void next()
-    {
-        stack.back().next();
-        forward();
+    return flag;
+}
+
+struct TargetSpec {
+    std::string cpu_name;
+    std::string cpu_features;
+    uint32_t base;
+    uint32_t flags;
+
+    TargetSpec() = default;
+
+    static TargetSpec fromSpec(jl_target_spec_t &spec) {
+        TargetSpec out;
+        out.cpu_name = spec.cpu_name;
+        out.cpu_features = spec.cpu_features;
+        out.base = spec.base;
+        out.flags = spec.flags;
+        return out;
     }
-    bool done()
-    {
-        return stack.empty();
+
+    static TargetSpec fromMD(MDTuple *tup) {
+        TargetSpec out;
+        assert(tup->getNumOperands() == 4);
+        out.cpu_name = cast<MDString>(tup->getOperand(0))->getString().str();
+        out.cpu_features = cast<MDString>(tup->getOperand(1))->getString().str();
+        out.base = cast<ConstantInt>(cast<ConstantAsMetadata>(tup->getOperand(2))->getValue())->getZExtValue();
+        out.flags = cast<ConstantInt>(cast<ConstantAsMetadata>(tup->getOperand(3))->getValue())->getZExtValue();
+        return out;
+    }
+
+    MDNode *toMD(LLVMContext &ctx) const {
+        return MDTuple::get(ctx, {
+            MDString::get(ctx, cpu_name),
+            MDString::get(ctx, cpu_features),
+            ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(ctx), base)),
+            ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(ctx), flags))
+        });
     }
-private:
-    void forward();
-    SmallVector<Frame, 4> stack;
-    Module &M;
 };
 
-template<typename U>
-void ConstantUses<U>::forward()
-{
-    assert(!stack.empty());
-    auto frame = &stack.back();
-    const DataLayout &DL = M.getDataLayout();
-    auto pop = [&] {
-        stack.pop_back();
-        if (stack.empty()) {
-            return false;
-        }
-        frame = &stack.back();
-        return true;
-    };
-    auto push = [&] (Use *use, Constant *c, size_t offset, bool samebits) {
-        stack.emplace_back(use, c, offset, samebits);
-        frame = &stack.back();
-    };
-    auto handle_constaggr = [&] (Use *use, ConstantAggregate *aggr) {
-        if (!frame->samebits) {
-            push(use, aggr, 0, false);
-            return;
-        }
-        if (auto strct = dyn_cast<ConstantStruct>(aggr)) {
-            auto layout = DL.getStructLayout(strct->getType());
-            push(use, strct, frame->offset + layout->getElementOffset(use->getOperandNo()), true);
-        }
-        else if (auto ary = dyn_cast<ConstantArray>(aggr)) {
-            auto elty = ary->getType()->getElementType();
-            push(use, ary, frame->offset + DL.getTypeAllocSize(elty) * use->getOperandNo(), true);
-        }
-        else if (auto vec = dyn_cast<ConstantVector>(aggr)) {
-            auto elty = vec->getType()->getElementType();
-            push(use, vec, frame->offset + DL.getTypeAllocSize(elty) * use->getOperandNo(), true);
-        }
-        else {
-            jl_safe_printf("Unknown ConstantAggregate:\n");
-            llvm_dump(aggr);
-            abort();
-        }
-    };
-    auto handle_constexpr = [&] (Use *use, ConstantExpr *expr) {
-        if (!frame->samebits) {
-            push(use, expr, 0, false);
-            return;
-        }
-        auto opcode = expr->getOpcode();
-        if (opcode == Instruction::PtrToInt || opcode == Instruction::IntToPtr ||
-            opcode == Instruction::AddrSpaceCast || opcode == Instruction::BitCast) {
-            push(use, expr, frame->offset, true);
-        }
-        else {
-            push(use, expr, 0, false);
-        }
-    };
-    while (true) {
-        auto use = frame->cur;
-        if (!use) {
-            if (!pop())
-                return;
+static Optional<SmallVector<TargetSpec, 0>> get_target_specs(Module &M) {
+    auto md = M.getModuleFlag("julia.mv.specs");
+    if (!md)
+        return None;
+    auto tup = cast<MDTuple>(md);
+    SmallVector<TargetSpec, 0> out(tup->getNumOperands());
+    for (unsigned i = 0; i < tup->getNumOperands(); i++) {
+        out[i] = TargetSpec::fromMD(cast<MDTuple>(tup->getOperand(i).get()));
+    }
+    return out;
+}
+
+static void set_target_specs(Module &M, ArrayRef<TargetSpec> specs) {
+    SmallVector<Metadata *, 0> md;
+    md.reserve(specs.size());
+    for (auto &spec: specs) {
+        md.push_back(spec.toMD(M.getContext()));
+    }
+    M.addModuleFlag(Module::Error, "julia.mv.specs", MDTuple::get(M.getContext(), md));
+}
+
+static void annotate_module_clones(Module &M) {
+    auto TT = Triple(M.getTargetTriple());
+    CallGraph CG(M);
+    SmallVector<Function *, 0> orig_funcs;
+    for (auto &F: M) {
+        if (F.isDeclaration())
             continue;
+        orig_funcs.push_back(&F);
+    }
+    bool has_veccall = false;
+    SmallVector<TargetSpec, 0> specs;
+    if (auto maybe_specs = get_target_specs(M)) {
+        specs = std::move(*maybe_specs);
+    } else {
+        auto full_specs = jl_get_llvm_clone_targets();
+        specs.reserve(full_specs.size());
+        for (auto &spec: full_specs) {
+            specs.push_back(TargetSpec::fromSpec(spec));
         }
-        auto user = use->getUser();
-        if (isa<U>(user))
-            return;
-        frame->next();
-        if (auto aggr = dyn_cast<ConstantAggregate>(user)) {
-            handle_constaggr(use, aggr);
+        set_target_specs(M, specs);
+    }
+    SmallVector<APInt, 0> clones(orig_funcs.size(), APInt(specs.size(), 0));
+    BitVector subtarget_cloned(orig_funcs.size());
+
+    SmallVector<unsigned, 0> func_infos(orig_funcs.size());
+    for (unsigned i = 0; i < orig_funcs.size(); i++) {
+        func_infos[i] = collect_func_info(*orig_funcs[i], TT, has_veccall);
+    }
+    for (unsigned i = 1; i < specs.size(); i++) {
+        if (specs[i].flags & JL_TARGET_CLONE_ALL) {
+            for (unsigned j = 0; j < orig_funcs.size(); j++) {
+                clones[j].setBit(i);
+            }
+        } else {
+            unsigned flag = specs[i].flags & clone_mask;
+            std::set<Function*> sets[2];
+            for (unsigned j = 0; j < orig_funcs.size(); j++) {
+                if (!(func_infos[j] & flag)) {
+                    continue;
+                }
+                sets[0].insert(orig_funcs[j]);
+            }
+            std::set<Function*> all_origs(sets[0]);
+            auto *cur_set = &sets[0];
+            auto *next_set = &sets[1];
+            // Reduce dispatch by expand the cloning set to functions that are directly called by
+            // and calling cloned functions.
+            while (!cur_set->empty()) {
+                for (auto orig_f: *cur_set) {
+                    // Use the uncloned function since it's already in the call graph
+                    auto node = CG[orig_f];
+                    for (const auto &I: *node) {
+                        auto child_node = I.second;
+                        auto orig_child_f = child_node->getFunction();
+                        if (!orig_child_f)
+                            continue;
+                        // Already cloned
+                        if (all_origs.count(orig_child_f))
+                            continue;
+                        bool calling_clone = false;
+                        for (const auto &I2: *child_node) {
+                            auto orig_child_f2 = I2.second->getFunction();
+                            if (!orig_child_f2)
+                                continue;
+                            if (all_origs.count(orig_child_f2)) {
+                                calling_clone = true;
+                                break;
+                            }
+                        }
+                        if (!calling_clone)
+                            continue;
+                        next_set->insert(orig_child_f);
+                        all_origs.insert(orig_child_f);
+                    }
+                }
+                std::swap(cur_set, next_set);
+                next_set->clear();
+            }
+            for (unsigned j = 0; j < orig_funcs.size(); j++) {
+                if (all_origs.count(orig_funcs[j])) {
+                    clones[j].setBit(i);
+                    subtarget_cloned.set(j);
+                }
+            }
+        }
+    }
+    // if there's only one target, we won't need any relocation slots
+    // but even if there is one clone_all and one non-clone_all, we still need
+    // to check for relocation slots because we must fixup instruction uses to
+    // point at the right function.
+    if (specs.size() > 1) {
+        for (unsigned i = 0; i < orig_funcs.size(); i++) {
+            auto &F = *orig_funcs[i];
+            if (subtarget_cloned[i] && !ConstantUses<Instruction>(orig_funcs[i], M).done()) {
+                F.addFnAttr("julia.mv.reloc", "");
+            } else {
+                auto uses = ConstantUses<GlobalValue>(orig_funcs[i], M);
+                if (!uses.done()) {
+                    bool slot = false;
+                    for (; !uses.done(); uses.next()) {
+                        if (isa<GlobalAlias>(uses.get_info().val)) {
+                            slot = true;
+                            break;
+                        }
+                    }
+                    if (slot) {
+                        F.addFnAttr("julia.mv.reloc", "");
+                    } else {
+                        F.addFnAttr("julia.mv.fvar", "");
+                    }
+                }
+            }
         }
-        else if (auto expr = dyn_cast<ConstantExpr>(user)) {
-            handle_constexpr(use, expr);
+    }
+    SmallString<128> cloneset;
+    for (unsigned i = 0; i < orig_funcs.size(); i++) {
+        if (!clones[i].isZero()) {
+            auto &F = *orig_funcs[i];
+            cloneset.clear();
+            clones[i].toStringUnsigned(cloneset, 16);
+            F.addFnAttr("julia.mv.clones", cloneset);
         }
     }
+    if (has_veccall) {
+        M.addModuleFlag(Module::Max, "julia.mv.veccall", 1);
+    }
+    M.addModuleFlag(Module::Error, "julia.mv.annotated", 1);
 }
 
 struct CloneCtx {
     struct Target {
         int idx;
-        uint32_t flags;
         std::unique_ptr<ValueToValueMapTy> vmap; // ValueToValueMapTy is not movable....
-        // function ids that needs relocation to be initialized
-        std::set<uint32_t> relocs{};
-        Target(int idx, const jl_target_spec_t &spec) :
+        explicit Target(int idx) :
             idx(idx),
-            flags(spec.flags),
             vmap(new ValueToValueMapTy)
         {
         }
     };
     struct Group : Target {
-        std::vector<Target> clones;
-        std::set<uint32_t> clone_fs;
-        Group(int base, const jl_target_spec_t &spec) :
-            Target(base, spec),
-            clones{},
-            clone_fs{}
+        SmallVector<Target, 0> clones;
+        explicit Group(int base) :
+            Target(base),
+            clones{}
         {}
         Function *base_func(Function *orig_f) const
         {
@@ -243,125 +352,128 @@ struct CloneCtx {
                 return orig_f;
             return cast<Function>(vmap->lookup(orig_f));
         }
+
+        bool has_subtarget_clone(Function *orig_f) const
+        {
+            auto base = base_func(orig_f);
+            for (auto &clone: clones) {
+                if (map_get(*clone.vmap, base))
+                    return true;
+            }
+            return false;
+        }
     };
-    CloneCtx(Module &M, function_ref<LoopInfo&(Function&)> GetLI, function_ref<CallGraph&()> GetCG, bool allow_bad_fvars);
-    void clone_bases();
-    void collect_func_infos();
-    void clone_all_partials();
+    CloneCtx(Module &M, bool allow_bad_fvars);
+    void prepare_slots();
+    void clone_decls();
+    void clone_bodies();
     void fix_gv_uses();
+    void finalize_orig_clone_attr();
     void fix_inst_uses();
+    void finalize_orig_features();
     void emit_metadata();
 private:
     void prepare_vmap(ValueToValueMapTy &vmap);
-    bool is_vector(FunctionType *ty) const;
-    void clone_function(Function *F, Function *new_f, ValueToValueMapTy &vmap);
-    uint32_t collect_func_info(Function &F);
-    void check_partial(Group &grp, Target &tgt);
     void clone_partial(Group &grp, Target &tgt);
-    void add_features(Function *F, StringRef name, StringRef features, uint32_t flags) const;
-    template<typename T>
-    T *add_comdat(T *G) const;
-    uint32_t get_func_id(Function *F);
-    template<typename Stack>
-    Constant *rewrite_gv_init(const Stack& stack);
-    template<typename Stack>
-    Value *rewrite_inst_use(const Stack& stack, Value *replace, Instruction *insert_before);
-    std::pair<uint32_t,GlobalVariable*> get_reloc_slot(Function *F);
-    Constant *get_ptrdiff32(Constant *ptr, Constant *base) const;
-    template<typename T>
-    Constant *emit_offset_table(const std::vector<T*> &vars, StringRef name) const;
+    uint32_t get_func_id(Function *F) const;
+    std::pair<uint32_t,GlobalVariable*> get_reloc_slot(Function *F) const;
     void rewrite_alias(GlobalAlias *alias, Function* F);
 
     MDNode *tbaa_const;
-    std::vector<jl_target_spec_t> specs;
-    std::vector<Group> groups{};
-    std::vector<Function*> fvars;
-    std::vector<Constant*> gvars;
+    SmallVector<TargetSpec, 0> specs;
+    SmallVector<Group, 0> groups{};
+    SmallVector<Target *, 0> linearized;
+    SmallVector<Function*, 0> fvars;
+    SmallVector<Constant*, 0> gvars;
     Module &M;
-    function_ref<LoopInfo&(Function&)> GetLI;
-    function_ref<CallGraph&()> GetCG;
+    Type *T_size;
+    Triple TT;
 
-    // Map from original functiton to one based index in `fvars`
+    // Map from original function to one based index in `fvars`
     std::map<const Function*,uint32_t> func_ids{};
-    std::vector<Function*> orig_funcs{};
-    std::vector<uint32_t> func_infos{};
-    std::set<Function*> cloned{};
+    SmallVector<Function*, 0> orig_funcs{};
     // GV addresses and their corresponding function id (i.e. 0-based index in `fvars`)
-    std::vector<std::pair<Constant*,uint32_t>> gv_relocs{};
+    SmallVector<std::pair<Constant*,uint32_t>, 0> gv_relocs{};
     // Mapping from function id (i.e. 0-based index in `fvars`) to GVs to be initialized.
     std::map<uint32_t,GlobalVariable*> const_relocs;
-    // Functions that were referred to by a global alias, and might not have other uses.
-    std::set<uint32_t> alias_relocs;
-    bool has_veccall{false};
-    bool has_cloneall{false};
+    std::map<Function *, GlobalVariable*> extern_relocs;
     bool allow_bad_fvars{false};
 };
 
 template<typename T>
-static inline std::vector<T*> consume_gv(Module &M, const char *name, bool allow_bad_fvars)
+static inline SmallVector<T*, 0> consume_gv(Module &M, const char *name, bool allow_bad_fvars)
 {
     // Get information about sysimg export functions from the two global variables.
     // Strip them from the Module so that it's easier to handle the uses.
     GlobalVariable *gv = M.getGlobalVariable(name);
     assert(gv && gv->hasInitializer());
-    auto *ary = cast<ConstantArray>(gv->getInitializer());
-    unsigned nele = ary->getNumOperands();
-    std::vector<T*> res(nele);
-    unsigned i = 0;
-    while (i < nele) {
-        llvm::Value *val = ary->getOperand(i)->stripPointerCasts();
-        if (allow_bad_fvars && (!isa<T>(val) || (isa<Function>(val) && cast<Function>(val)->isDeclaration()))) {
-            // Shouldn't happen in regular use, but can happen in bugpoint.
-            nele--;
-            continue;
+    ArrayType *Ty = cast<ArrayType>(gv->getInitializer()->getType());
+    unsigned nele = Ty->getArrayNumElements();
+    SmallVector<T*, 0> res(nele);
+    ConstantArray *ary = nullptr;
+    if (gv->getInitializer()->isNullValue()) {
+        for (unsigned i = 0; i < nele; ++i)
+            res[i] = cast<T>(Constant::getNullValue(Ty->getArrayElementType()));
+    }
+    else {
+        ary = cast<ConstantArray>(gv->getInitializer());
+        unsigned i = 0;
+        while (i < nele) {
+            llvm::Value *val = ary->getOperand(i)->stripPointerCasts();
+            if (allow_bad_fvars && (!isa<T>(val) || (isa<Function>(val) && cast<Function>(val)->isDeclaration()))) {
+                // Shouldn't happen in regular use, but can happen in bugpoint.
+                nele--;
+                continue;
+            }
+            res[i++] = cast<T>(val);
         }
-        res[i++] = cast<T>(val);
+        res.resize(nele);
     }
-    res.resize(nele);
     assert(gv->use_empty());
     gv->eraseFromParent();
-    if (ary->use_empty())
+    if (ary && ary->use_empty())
         ary->destroyConstant();
     return res;
 }
 
 // Collect basic information about targets and functions.
-CloneCtx::CloneCtx(Module &M, function_ref<LoopInfo&(Function&)> GetLI, function_ref<CallGraph&()> GetCG, bool allow_bad_fvars)
+CloneCtx::CloneCtx(Module &M, bool allow_bad_fvars)
     : tbaa_const(tbaa_make_child_with_context(M.getContext(), "jtbaa_const", nullptr, true).first),
-      specs(jl_get_llvm_clone_targets()),
-      fvars(consume_gv<Function>(M, "jl_sysimg_fvars", allow_bad_fvars)),
-      gvars(consume_gv<Constant>(M, "jl_sysimg_gvars", false)),
+      specs(*get_target_specs(M)),
+      fvars(consume_gv<Function>(M, "jl_fvars", allow_bad_fvars)),
+      gvars(consume_gv<Constant>(M, "jl_gvars", false)),
       M(M),
-      GetLI(GetLI),
-      GetCG(GetCG),
+      T_size(M.getDataLayout().getIntPtrType(M.getContext())),
+      TT(M.getTargetTriple()),
       allow_bad_fvars(allow_bad_fvars)
 {
-    groups.emplace_back(0, specs[0]);
+    groups.emplace_back(0);
+    linearized.resize(specs.size());
+    linearized[0] = &groups[0];
+    SmallVector<unsigned, 0> group_ids(specs.size(), 0);
     uint32_t ntargets = specs.size();
     for (uint32_t i = 1; i < ntargets; i++) {
         auto &spec = specs[i];
         if (spec.flags & JL_TARGET_CLONE_ALL) {
-            has_cloneall = true;
-            groups.emplace_back(i, spec);
+            group_ids[i] = groups.size();
+            groups.emplace_back(i);
         }
         else {
-            auto base = spec.base;
-            bool found = false;
-            for (auto &grp: groups) {
-                if (grp.idx == base) {
-                    found = true;
-                    grp.clones.emplace_back(i, spec);
-                    break;
-                }
-            }
-            (void)found;
+            assert(0 <= spec.base && (unsigned) spec.base < i);
+            group_ids[i] = group_ids[spec.base];
+            groups[group_ids[i]].clones.emplace_back(i);
         }
     }
+    for (auto &grp: groups) {
+        for (auto &tgt: grp.clones)
+            linearized[tgt.idx] = &tgt;
+        linearized[grp.idx] = &grp;
+    }
     uint32_t nfvars = fvars.size();
     for (uint32_t i = 0; i < nfvars; i++)
         func_ids[fvars[i]] = i + 1;
     for (auto &F: M) {
-        if (F.empty())
+        if (F.empty() && !F.hasFnAttribute("julia.mv.clones"))
             continue;
         orig_funcs.push_back(&F);
     }
@@ -381,288 +493,132 @@ void CloneCtx::prepare_vmap(ValueToValueMapTy &vmap)
     }
 }
 
-void CloneCtx::clone_function(Function *F, Function *new_f, ValueToValueMapTy &vmap)
+void CloneCtx::prepare_slots()
 {
-    Function::arg_iterator DestI = new_f->arg_begin();
-    for (Function::const_arg_iterator J = F->arg_begin(); J != F->arg_end(); ++J) {
-        DestI->setName(J->getName());
-        vmap[&*J] = &*DestI++;
-    }
-    SmallVector<ReturnInst*,8> Returns;
-#if JL_LLVM_VERSION >= 130000
-    // We are cloning into the same module
-    CloneFunctionInto(new_f, F, vmap, CloneFunctionChangeType::GlobalChanges, Returns);
-#else
-    CloneFunctionInto(new_f, F, vmap, true, Returns);
-#endif
-}
-
-// Clone all clone_all targets. Makes sure that the base targets are all available.
-void CloneCtx::clone_bases()
-{
-    if (!has_cloneall)
-        return;
-    uint32_t ngrps = groups.size();
-    for (uint32_t gid = 1; gid < ngrps; gid++) {
-        auto &grp = groups[gid];
-        auto suffix = ".clone_" + std::to_string(grp.idx);
-        auto &vmap = *grp.vmap;
-        // Fill in old->new mapping. We need to do this before cloning the function so that
-        // the intra target calls are automatically fixed up on cloning.
-        for (auto F: orig_funcs) {
-            Function *new_f = Function::Create(F->getFunctionType(), F->getLinkage(),
-                                               F->getName() + suffix, &M);
-            new_f->copyAttributesFrom(F);
-            vmap[F] = new_f;
-        }
-        prepare_vmap(vmap);
-        for (auto F: orig_funcs) {
-            clone_function(F, cast<Function>(vmap.lookup(F)), vmap);
-        }
-    }
-}
-
-bool CloneCtx::is_vector(FunctionType *ty) const
-{
-    if (ty->getReturnType()->isVectorTy())
-        return true;
-    for (auto arg: ty->params()) {
-        if (arg->isVectorTy()) {
-            return true;
-        }
-    }
-    return false;
-}
-
-uint32_t CloneCtx::collect_func_info(Function &F)
-{
-    uint32_t flag = 0;
-    if (!GetLI(F).empty())
-        flag |= JL_TARGET_CLONE_LOOP;
-    if (is_vector(F.getFunctionType())) {
-        flag |= JL_TARGET_CLONE_SIMD;
-        has_veccall = true;
-    }
-    for (auto &bb: F) {
-        for (auto &I: bb) {
-            if (auto call = dyn_cast<CallInst>(&I)) {
-                if (is_vector(call->getFunctionType())) {
-                    has_veccall = true;
-                    flag |= JL_TARGET_CLONE_SIMD;
-                }
-                if (auto callee = call->getCalledFunction()) {
-                    auto name = callee->getName();
-                    if (name.startswith("llvm.muladd.") || name.startswith("llvm.fma.")) {
-                        flag |= JL_TARGET_CLONE_MATH;
-                    }
-                    else if (name.startswith("julia.cpu.")) {
-                        if (name.startswith("julia.cpu.have_fma.")) {
-                            // for some platforms we know they always do (or don't) support
-                            // FMA. in those cases we don't need to clone the function.
-                            if (!always_have_fma(*callee).hasValue())
-                                flag |= JL_TARGET_CLONE_CPU;
-                        } else {
-                            flag |= JL_TARGET_CLONE_CPU;
-                        }
-                    }
-                }
-            }
-            else if (auto store = dyn_cast<StoreInst>(&I)) {
-                if (store->getValueOperand()->getType()->isVectorTy()) {
-                    flag |= JL_TARGET_CLONE_SIMD;
-                }
-            }
-            else if (I.getType()->isVectorTy()) {
-                flag |= JL_TARGET_CLONE_SIMD;
-            }
-            if (auto mathOp = dyn_cast<FPMathOperator>(&I)) {
-                if (mathOp->getFastMathFlags().any()) {
-                    flag |= JL_TARGET_CLONE_MATH;
-                }
+    for (auto &F : orig_funcs) {
+        if (F->hasFnAttribute("julia.mv.reloc")) {
+            assert(F->hasFnAttribute("julia.mv.clones"));
+            GlobalVariable *GV = new GlobalVariable(M, F->getType(), false, GlobalValue::ExternalLinkage, nullptr, F->getName() + ".reloc_slot");
+            GV->setVisibility(GlobalValue::HiddenVisibility);
+            GV->setDSOLocal(true);
+            if (F->isDeclaration()) {
+                extern_relocs[F] = GV;
             }
-            if (has_veccall && (flag & JL_TARGET_CLONE_SIMD) && (flag & JL_TARGET_CLONE_MATH)) {
-                return flag;
+            else {
+                auto id = get_func_id(F);
+                const_relocs[id] = GV;
+                GV->setInitializer(Constant::getNullValue(F->getType()));
             }
         }
     }
-    return flag;
-}
-
-void CloneCtx::collect_func_infos()
-{
-    uint32_t nfuncs = orig_funcs.size();
-    func_infos.resize(nfuncs);
-    for (uint32_t i = 0; i < nfuncs; i++) {
-        func_infos[i] = collect_func_info(*orig_funcs[i]);
-    }
 }
 
-void CloneCtx::clone_all_partials()
+void CloneCtx::clone_decls()
 {
-    // First decide what to clone
-    // Do this before actually cloning the functions
-    // so that the call graph is easier to understand
-    for (auto &grp: groups) {
-        for (auto &tgt: grp.clones) {
-            check_partial(grp, tgt);
-        }
+    SmallVector<std::string, 0> suffixes(specs.size());
+    for (unsigned i = 1; i < specs.size(); i++) {
+        suffixes[i] = "." + std::to_string(i);
     }
-    for (auto &grp: groups) {
-        for (auto &tgt: grp.clones)
-            clone_partial(grp, tgt);
-        // Also set feature strings for base target functions
-        // now that all the actual cloning is done.
-        auto &base_spec = specs[grp.idx];
-        for (auto orig_f: orig_funcs) {
-            add_features(grp.base_func(orig_f), base_spec.cpu_name,
-                         base_spec.cpu_features, base_spec.flags);
-        }
-    }
-    func_infos.clear(); // We don't need this anymore
-}
-
-void CloneCtx::check_partial(Group &grp, Target &tgt)
-{
-    auto flag = specs[tgt.idx].flags & clone_mask;
-    auto suffix = ".clone_" + std::to_string(tgt.idx);
-    auto &vmap = *tgt.vmap;
-    uint32_t nfuncs = func_infos.size();
-
-    std::set<Function*> all_origs;
-    // Use a simple heuristic to decide which function we need to clone.
-    for (uint32_t i = 0; i < nfuncs; i++) {
-        if (!(func_infos[i] & flag))
+    for (auto &F : orig_funcs) {
+        if (!F->hasFnAttribute("julia.mv.clones"))
             continue;
-        auto orig_f = orig_funcs[i];
-        // Fill in old->new mapping. We need to do this before cloning the function so that
-        // the intra target calls are automatically fixed up on cloning.
-        auto F = grp.base_func(orig_f);
-        Function *new_f = Function::Create(F->getFunctionType(), F->getLinkage(),
-                                           F->getName() + suffix, &M);
-        new_f->copyAttributesFrom(F);
-        vmap[F] = new_f;
-        if (!has_cloneall)
-            cloned.insert(orig_f);
-        grp.clone_fs.insert(i);
-        all_origs.insert(orig_f);
-    }
-    std::set<Function*> sets[2]{all_origs, std::set<Function*>{}};
-    auto *cur_set = &sets[0];
-    auto *next_set = &sets[1];
-    // Reduce dispatch by expand the cloning set to functions that are directly called by
-    // and calling cloned functions.
-    auto &graph = GetCG();
-    while (!cur_set->empty()) {
-        for (auto orig_f: *cur_set) {
-            // Use the uncloned function since it's already in the call graph
-            auto node = graph[orig_f];
-            for (const auto &I: *node) {
-                auto child_node = I.second;
-                auto orig_child_f = child_node->getFunction();
-                if (!orig_child_f)
-                    continue;
-                // Already cloned
-                if (all_origs.count(orig_child_f))
-                    continue;
-                bool calling_clone = false;
-                for (const auto &I2: *child_node) {
-                    auto orig_child_f2 = I2.second->getFunction();
-                    if (!orig_child_f2)
-                        continue;
-                    if (all_origs.count(orig_child_f2)) {
-                        calling_clone = true;
-                        break;
-                    }
-                }
-                if (!calling_clone)
-                    continue;
-                next_set->insert(orig_child_f);
-                all_origs.insert(orig_child_f);
-                auto child_f = grp.base_func(orig_child_f);
-                Function *new_f = Function::Create(child_f->getFunctionType(),
-                                                   child_f->getLinkage(),
-                                                   child_f->getName() + suffix, &M);
-                new_f->copyAttributesFrom(child_f);
-                vmap[child_f] = new_f;
+        APInt clones(specs.size(), F->getFnAttribute("julia.mv.clones").getValueAsString(), 16);
+        for (unsigned i = 1; i < specs.size(); i++) {
+            if (!clones[i]) {
+                continue;
             }
-        }
-        std::swap(cur_set, next_set);
-        next_set->clear();
-    }
-    for (uint32_t i = 0; i < nfuncs; i++) {
-        // Only need to handle expanded functions
-        if (func_infos[i] & flag)
-            continue;
-        auto orig_f = orig_funcs[i];
-        if (all_origs.count(orig_f)) {
-            if (!has_cloneall)
-                cloned.insert(orig_f);
-            grp.clone_fs.insert(i);
+            auto new_F = Function::Create(F->getFunctionType(), F->getLinkage(), F->getName() + suffixes[i], &M);
+            new_F->copyAttributesFrom(F);
+            new_F->setVisibility(F->getVisibility());
+            new_F->setDSOLocal(true);
+            auto base_func = F;
+            if (!(specs[i].flags & JL_TARGET_CLONE_ALL))
+                base_func = static_cast<Group*>(linearized[specs[i].base])->base_func(F);
+            (*linearized[i]->vmap)[base_func] = new_F;
         }
     }
 }
 
-void CloneCtx::clone_partial(Group &grp, Target &tgt)
+static void clone_function(Function *F, Function *new_f, ValueToValueMapTy &vmap)
 {
-    auto &spec = specs[tgt.idx];
-    auto &vmap = *tgt.vmap;
-    uint32_t nfuncs = orig_funcs.size();
-    prepare_vmap(vmap);
-    for (uint32_t i = 0; i < nfuncs; i++) {
-        auto orig_f = orig_funcs[i];
-        auto F = grp.base_func(orig_f);
-        if (auto new_v = map_get(vmap, F)) {
-            auto new_f = cast<Function>(new_v);
-            assert(new_f != F);
-            clone_function(F, new_f, vmap);
-            // We can set the feature strings now since no one is going to
-            // clone these functions again.
-            add_features(new_f, spec.cpu_name, spec.cpu_features, spec.flags);
-        }
+    Function::arg_iterator DestI = new_f->arg_begin();
+    for (Function::const_arg_iterator J = F->arg_begin(); J != F->arg_end(); ++J) {
+        DestI->setName(J->getName());
+        vmap[&*J] = &*DestI++;
     }
+    SmallVector<ReturnInst*,8> Returns;
+    // We are cloning into the same module
+    CloneFunctionInto(new_f, F, vmap, CloneFunctionChangeType::GlobalChanges, Returns);
 }
 
-void CloneCtx::add_features(Function *F, StringRef name, StringRef features, uint32_t flags) const
+static void add_features(Function *F, TargetSpec &spec)
 {
     auto attr = F->getFnAttribute("target-features");
     if (attr.isStringAttribute()) {
         std::string new_features(attr.getValueAsString());
         new_features += ",";
-        new_features += features;
+        new_features += spec.cpu_features;
         F->addFnAttr("target-features", new_features);
     }
     else {
-        F->addFnAttr("target-features", features);
+        F->addFnAttr("target-features", spec.cpu_features);
     }
-    F->addFnAttr("target-cpu", name);
+    F->addFnAttr("target-cpu", spec.cpu_name);
     if (!F->hasFnAttribute(Attribute::OptimizeNone)) {
-        if (flags & JL_TARGET_OPTSIZE) {
+        if (spec.flags & JL_TARGET_OPTSIZE) {
             F->addFnAttr(Attribute::OptimizeForSize);
         }
-        else if (flags & JL_TARGET_MINSIZE) {
+        else if (spec.flags & JL_TARGET_MINSIZE) {
             F->addFnAttr(Attribute::MinSize);
         }
     }
 }
 
-uint32_t CloneCtx::get_func_id(Function *F)
+void CloneCtx::clone_bodies()
 {
-    auto &ref = func_ids[F];
-    if (!ref) {
-        if (allow_bad_fvars && F->isDeclaration()) {
-            // This should never happen in regular use, but can happen if
-            // bugpoint deletes the function. Just do something here to
-            // allow bugpoint to proceed.
-            return (uint32_t)-1;
+    for (auto F : orig_funcs) {
+        for (unsigned i = 0; i < groups.size(); i++) {
+            Function *group_F = F;
+            if (i != 0) {
+                group_F = groups[i].base_func(F);
+                if (!F->isDeclaration()) {
+                    clone_function(F, group_F, *groups[i].vmap);
+                }
+            }
+            for (auto &target : groups[i].clones) {
+                prepare_vmap(*target.vmap);
+                auto target_F = cast_or_null<Function>(map_get(*target.vmap, group_F));
+                if (target_F) {
+                    if (!F->isDeclaration()) {
+                        clone_function(group_F, target_F, *target.vmap);
+                    }
+                    add_features(target_F, specs[target.idx]);
+                    target_F->addFnAttr("julia.mv.clone", std::to_string(target.idx));
+                }
+            }
+            // don't set the original function's features yet,
+            // since we may clone it for later groups
+            if (i != 0) {
+                add_features(group_F, specs[groups[i].idx]);
+                group_F->addFnAttr("julia.mv.clone", std::to_string(groups[i].idx));
+            }
         }
-        fvars.push_back(F);
-        ref = fvars.size();
+        // still don't set the original function's features yet,
+        // since we'll copy function attributes if we need to rewrite
+        // the alias, and target specific attributes are illegal on
+        // alias trampolines unless the user explicitly specifies them
     }
-    return ref - 1;
+}
+
+uint32_t CloneCtx::get_func_id(Function *F) const
+{
+    auto ref = func_ids.find(F);
+    assert(ref != func_ids.end() && "Requesting id of non-fvar!");
+    return ref->second - 1;
 }
 
 template<typename Stack>
-Constant *CloneCtx::rewrite_gv_init(const Stack& stack)
+static Constant *rewrite_gv_init(const Stack& stack)
 {
     // Null initialize so that LLVM put it in the correct section.
     SmallVector<Constant*, 8> args;
@@ -713,18 +669,17 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
         Function::Create(F->getFunctionType(), alias->getLinkage(), "", &M);
     trampoline->copyAttributesFrom(F);
     trampoline->takeName(alias);
+    trampoline->setVisibility(alias->getVisibility());
+    trampoline->setDSOLocal(alias->isDSOLocal());
+    // drop multiversioning attributes, add alias attribute for testing purposes
+    trampoline->removeFnAttr("julia.mv.reloc");
+    trampoline->removeFnAttr("julia.mv.clones");
+    trampoline->addFnAttr("julia.mv.alias");
     alias->eraseFromParent();
 
     uint32_t id;
     GlobalVariable *slot;
     std::tie(id, slot) = get_reloc_slot(F);
-    for (auto &grp: groups) {
-        grp.relocs.insert(id);
-        for (auto &tgt: grp.clones) {
-            tgt.relocs.insert(id);
-        }
-    }
-    alias_relocs.insert(id);
 
     auto BB = BasicBlock::Create(F->getContext(), "top", trampoline);
     IRBuilder<> irbuilder(BB);
@@ -733,18 +688,16 @@ void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
     ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
     ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(F->getContext(), None));
 
-    std::vector<Value *> Args;
+    SmallVector<Value *, 0> Args;
     for (auto &arg : trampoline->args())
         Args.push_back(&arg);
-    auto call = irbuilder.CreateCall(F->getFunctionType(), ptr, makeArrayRef(Args));
-    if (F->isVarArg())
-#if (defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_PPC64_))
-        abort();    // musttail support is very bad on ARM, PPC, PPC64 (as of LLVM 3.9)
-#else
+    auto call = irbuilder.CreateCall(F->getFunctionType(), ptr, ArrayRef<Value *>(Args));
+    if (F->isVarArg()) {
+        assert(!TT.isARM() && !TT.isPPC() && "musttail not supported on ARM/PPC!");
         call->setTailCallKind(CallInst::TCK_MustTail);
-#endif
-    else
+    } else {
         call->setTailCallKind(CallInst::TCK_Tail);
+    }
 
     if (F->getReturnType() == Type::getVoidTy(F->getContext()))
         irbuilder.CreateRetVoid();
@@ -773,36 +726,48 @@ void CloneCtx::fix_gv_uses()
             assert(info.use->getOperandNo() == 0);
             assert(!val->isConstant());
             auto fid = get_func_id(orig_f);
-            auto addr = ConstantExpr::getPtrToInt(val, getSizeTy(val->getContext()));
+            auto addr = ConstantExpr::getPtrToInt(val, T_size);
             if (info.offset)
-                addr = ConstantExpr::getAdd(addr, ConstantInt::get(getSizeTy(val->getContext()), info.offset));
+                addr = ConstantExpr::getAdd(addr, ConstantInt::get(T_size, info.offset));
             gv_relocs.emplace_back(addr, fid);
             val->setInitializer(rewrite_gv_init(stack));
         }
         return changed;
     };
     for (auto orig_f: orig_funcs) {
-        if (!has_cloneall && !cloned.count(orig_f))
+        if (!orig_f->hasFnAttribute("julia.mv.clones"))
             continue;
         while (single_pass(orig_f)) {
         }
     }
 }
 
-std::pair<uint32_t,GlobalVariable*> CloneCtx::get_reloc_slot(Function *F)
+void CloneCtx::finalize_orig_clone_attr()
 {
-    // Null initialize so that LLVM put it in the correct section.
-    auto id = get_func_id(F);
-    auto &slot = const_relocs[id];
-    if (!slot)
-        slot = new GlobalVariable(M, F->getType(), false, GlobalVariable::InternalLinkage,
-                                  ConstantPointerNull::get(F->getType()),
-                                  F->getName() + ".reloc_slot");
-    return std::make_pair(id, slot);
+    for (auto orig_f: orig_funcs) {
+        if (!orig_f->hasFnAttribute("julia.mv.clones"))
+            continue;
+        orig_f->addFnAttr("julia.mv.clone", "0");
+    }
+}
+
+std::pair<uint32_t,GlobalVariable*> CloneCtx::get_reloc_slot(Function *F) const
+{
+    if (F->isDeclaration()) {
+        auto extern_decl = extern_relocs.find(F);
+        assert(extern_decl != extern_relocs.end() && "Missing extern relocation slot!");
+        return {(uint32_t)-1, extern_decl->second};
+    }
+    else {
+        auto id = get_func_id(F);
+        auto slot = const_relocs.find(id);
+        assert(slot != const_relocs.end() && "Missing relocation slot!");
+        return {id, slot->second};
+    }
 }
 
 template<typename Stack>
-Value *CloneCtx::rewrite_inst_use(const Stack& stack, Value *replace, Instruction *insert_before)
+static Value *rewrite_inst_use(const Stack& stack, Type *T_size, Value *replace, Instruction *insert_before)
 {
     SmallVector<Constant*, 8> args;
     uint32_t nlevel = stack.size();
@@ -818,6 +783,7 @@ Value *CloneCtx::rewrite_inst_use(const Stack& stack, Value *replace, Instructio
             replace = inst;
             continue;
         }
+        assert(val);
         unsigned nargs = val->getNumOperands();
         args.resize(nargs);
         for (unsigned j = 0; j < nargs; j++) {
@@ -839,7 +805,7 @@ Value *CloneCtx::rewrite_inst_use(const Stack& stack, Value *replace, Instructio
         }
         else if (isa<ConstantVector>(val)) {
             replace = InsertElementInst::Create(ConstantVector::get(args), replace,
-                                                ConstantInt::get(getSizeTy(insert_before->getContext()), idx), "",
+                                                ConstantInt::get(T_size, idx), "",
                                                 insert_before);
         }
         else {
@@ -851,96 +817,103 @@ Value *CloneCtx::rewrite_inst_use(const Stack& stack, Value *replace, Instructio
     return replace;
 }
 
+template<typename I2GV>
+static void replaceUsesWithLoad(Function &F, Type *T_size, I2GV should_replace, MDNode *tbaa_const) {
+    bool changed;
+    do {
+        changed = false;
+        for (auto uses = ConstantUses<Instruction>(&F, *F.getParent()); !uses.done(); uses.next()) {
+            auto info = uses.get_info();
+            auto use_i = info.val;
+            GlobalVariable *slot = should_replace(*use_i);
+            if (!slot)
+                continue;
+            Instruction *insert_before = use_i;
+            if (auto phi = dyn_cast<PHINode>(use_i))
+                insert_before = phi->getIncomingBlock(*info.use)->getTerminator();
+            Instruction *ptr = new LoadInst(F.getType(), slot, "", false, insert_before);
+            ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
+            ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(ptr->getContext(), None));
+            use_i->setOperand(info.use->getOperandNo(),
+                                rewrite_inst_use(uses.get_stack(), T_size, ptr,
+                                                insert_before));
+            changed = true;
+        }
+    } while (changed);
+}
+
 void CloneCtx::fix_inst_uses()
 {
     uint32_t nfuncs = orig_funcs.size();
     for (auto &grp: groups) {
-        auto suffix = ".clone_" + std::to_string(grp.idx);
         for (uint32_t i = 0; i < nfuncs; i++) {
-            if (!grp.clone_fs.count(i))
-                continue;
             auto orig_f = orig_funcs[i];
+            if (!grp.has_subtarget_clone(orig_f))
+                continue;
             auto F = grp.base_func(orig_f);
-            bool changed;
-            do {
-                changed = false;
-                for (auto uses = ConstantUses<Instruction>(F, M); !uses.done(); uses.next()) {
-                    auto info = uses.get_info();
-                    auto use_i = info.val;
-                    auto use_f = use_i->getFunction();
-                    if (!use_f->getName().endswith(suffix))
-                        continue;
-                    Instruction *insert_before = use_i;
-                    if (auto phi = dyn_cast<PHINode>(use_i))
-                        insert_before = phi->getIncomingBlock(*info.use)->getTerminator();
-                    uint32_t id;
-                    GlobalVariable *slot;
-                    std::tie(id, slot) = get_reloc_slot(orig_f);
-                    Instruction *ptr = new LoadInst(orig_f->getType(), slot, "", false, insert_before);
-                    ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
-                    ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(ptr->getContext(), None));
-                    use_i->setOperand(info.use->getOperandNo(),
-                                      rewrite_inst_use(uses.get_stack(), ptr,
-                                                       insert_before));
-
-                    grp.relocs.insert(id);
-                    for (auto &tgt: grp.clones) {
-                        // The enclosing function of the use is cloned,
-                        // no need to deal with this use on this target.
-                        if (map_get(*tgt.vmap, use_f))
-                            continue;
-                        tgt.relocs.insert(id);
-                    }
-
-                    changed = true;
-                }
-            } while (changed);
+            auto grpidx = std::to_string(grp.idx);
+            replaceUsesWithLoad(*F, T_size, [&](Instruction &I) -> GlobalVariable * {
+                uint32_t id;
+                GlobalVariable *slot;
+                auto use_f = I.getFunction();
+                if (!use_f->hasFnAttribute("julia.mv.clone") || use_f->getFnAttribute("julia.mv.clone").getValueAsString() != grpidx)
+                    return nullptr;
+                std::tie(id, slot) = get_reloc_slot(orig_f);
+                return slot;
+            }, tbaa_const);
         }
     }
 }
 
-template<typename T>
-inline T *CloneCtx::add_comdat(T *G) const
-{
-#if defined(_OS_WINDOWS_)
-    // add __declspec(dllexport) to everything marked for export
-    if (G->getLinkage() == GlobalValue::ExternalLinkage)
-        G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
-    else
-        G->setDLLStorageClass(GlobalValue::DefaultStorageClass);
-#endif
-    return G;
+void CloneCtx::finalize_orig_features() {
+    for (auto F : orig_funcs) {
+        add_features(F, specs[0]);
+    }
 }
 
-Constant *CloneCtx::get_ptrdiff32(Constant *ptr, Constant *base) const
+static Constant *get_ptrdiff32(Type *T_size, Constant *ptr, Constant *base)
 {
     if (ptr->getType()->isPointerTy())
-        ptr = ConstantExpr::getPtrToInt(ptr, getSizeTy(ptr->getContext()));
+        ptr = ConstantExpr::getPtrToInt(ptr, T_size);
     auto ptrdiff = ConstantExpr::getSub(ptr, base);
     return sizeof(void*) == 8 ? ConstantExpr::getTrunc(ptrdiff, Type::getInt32Ty(ptr->getContext())) : ptrdiff;
 }
 
 template<typename T>
-Constant *CloneCtx::emit_offset_table(const std::vector<T*> &vars, StringRef name) const
+static Constant *emit_offset_table(Module &M, Type *T_size, const SmallVectorImpl<T*> &vars,
+                                   StringRef name, StringRef suffix)
 {
     auto T_int32 = Type::getInt32Ty(M.getContext());
-    auto T_size = getSizeTy(M.getContext());
-    assert(!vars.empty());
-    add_comdat(GlobalAlias::create(T_size, 0, GlobalVariable::ExternalLinkage,
-                                   name + "_base",
-                                   ConstantExpr::getBitCast(vars[0], T_size->getPointerTo()), &M));
-    auto vbase = ConstantExpr::getPtrToInt(vars[0], T_size);
     uint32_t nvars = vars.size();
-    std::vector<Constant*> offsets(nvars + 1);
+    Constant *base = nullptr;
+    if (nvars > 0) {
+        base = ConstantExpr::getBitCast(vars[0], T_size->getPointerTo());
+        auto ga = GlobalAlias::create(T_size, 0, GlobalVariable::ExternalLinkage,
+                                       name + "_base" + suffix,
+                                       base, &M);
+        ga->setVisibility(GlobalValue::HiddenVisibility);
+        ga->setDSOLocal(true);
+    } else {
+        auto gv = new GlobalVariable(M, T_size, true, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), name + "_base" + suffix);
+        gv->setVisibility(GlobalValue::HiddenVisibility);
+        gv->setDSOLocal(true);
+        base = gv;
+    }
+    auto vbase = ConstantExpr::getPtrToInt(base, T_size);
+    SmallVector<Constant*, 0> offsets(nvars + 1);
     offsets[0] = ConstantInt::get(T_int32, nvars);
-    offsets[1] = ConstantInt::get(T_int32, 0);
-    for (uint32_t i = 1; i < nvars; i++)
-        offsets[i + 1] = get_ptrdiff32(vars[i], vbase);
+    if (nvars > 0) {
+        offsets[1] = ConstantInt::get(T_int32, 0);
+        for (uint32_t i = 1; i < nvars; i++)
+            offsets[i + 1] = get_ptrdiff32(T_size, vars[i], vbase);
+    }
     ArrayType *vars_type = ArrayType::get(T_int32, nvars + 1);
-    add_comdat(new GlobalVariable(M, vars_type, true,
+    auto gv = new GlobalVariable(M, vars_type, true,
                                   GlobalVariable::ExternalLinkage,
                                   ConstantArray::get(vars_type, offsets),
-                                  name + "_offsets"));
+                                  name + "_offsets" + suffix);
+    gv->setVisibility(GlobalValue::HiddenVisibility);
+    gv->setDSOLocal(true);
     return vbase;
 }
 
@@ -952,50 +925,30 @@ void CloneCtx::emit_metadata()
         return;
     }
 
+    StringRef suffix;
+    if (auto suffix_md = M.getModuleFlag("julia.mv.suffix")) {
+        suffix = cast<MDString>(suffix_md)->getString();
+    }
+
     // Store back the information about exported functions.
-    auto fbase = emit_offset_table(fvars, "jl_sysimg_fvars");
-    auto gbase = emit_offset_table(gvars, "jl_sysimg_gvars");
+    auto fbase = emit_offset_table(M, T_size, fvars, "jl_fvar", suffix);
+    auto gbase = emit_offset_table(M, T_size, gvars, "jl_gvar", suffix);
 
-    uint32_t ntargets = specs.size();
-    SmallVector<Target*, 8> targets(ntargets);
-    for (auto &grp: groups) {
-        targets[grp.idx] = &grp;
-        for (auto &tgt: grp.clones) {
-            targets[tgt.idx] = &tgt;
-        }
-    }
+    M.getGlobalVariable("jl_fvar_idxs")->setName("jl_fvar_idxs" + suffix);
+    M.getGlobalVariable("jl_gvar_idxs")->setName("jl_gvar_idxs" + suffix);
 
-    // Generate `jl_dispatch_target_ids`
-    {
-        const uint32_t base_flags = has_veccall ? JL_TARGET_VEC_CALL : 0;
-        std::vector<uint8_t> data;
-        auto push_i32 = [&] (uint32_t v) {
-            uint8_t buff[4];
-            memcpy(buff, &v, 4);
-            data.insert(data.end(), buff, buff + 4);
-        };
-        push_i32(ntargets);
-        for (uint32_t i = 0; i < ntargets; i++) {
-            push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME));
-            auto &specdata = specs[i].data;
-            data.insert(data.end(), specdata.begin(), specdata.end());
-        }
-        auto value = ConstantDataArray::get(M.getContext(), data);
-        add_comdat(new GlobalVariable(M, value->getType(), true,
-                                      GlobalVariable::ExternalLinkage,
-                                      value, "jl_dispatch_target_ids"));
-    }
+    uint32_t ntargets = specs.size();
 
     // Generate `jl_dispatch_reloc_slots`
     std::set<uint32_t> shared_relocs;
     {
         auto T_int32 = Type::getInt32Ty(M.getContext());
-        std::stable_sort(gv_relocs.begin(), gv_relocs.end(),
+        std::sort(gv_relocs.begin(), gv_relocs.end(),
                          [] (const std::pair<Constant*,uint32_t> &lhs,
                              const std::pair<Constant*,uint32_t> &rhs) {
                              return lhs.second < rhs.second;
                          });
-        std::vector<Constant*> values{nullptr};
+        SmallVector<Constant*, 0> values{nullptr};
         uint32_t gv_reloc_idx = 0;
         uint32_t ngv_relocs = gv_relocs.size();
         for (uint32_t id = 0; id < nfvars; id++) {
@@ -1007,30 +960,30 @@ void CloneCtx::emit_metadata()
                  gv_reloc_idx++) {
                 shared_relocs.insert(id);
                 values.push_back(id_v);
-                values.push_back(get_ptrdiff32(gv_relocs[gv_reloc_idx].first, gbase));
+                values.push_back(get_ptrdiff32(T_size, gv_relocs[gv_reloc_idx].first, gbase));
             }
             auto it = const_relocs.find(id);
             if (it != const_relocs.end()) {
-                values.push_back(id_v);
-                values.push_back(get_ptrdiff32(it->second, gbase));
-            }
-            if (alias_relocs.find(id) != alias_relocs.end()) {
                 shared_relocs.insert(id);
+                values.push_back(id_v);
+                values.push_back(get_ptrdiff32(T_size, it->second, gbase));
             }
         }
         values[0] = ConstantInt::get(T_int32, values.size() / 2);
         ArrayType *vars_type = ArrayType::get(T_int32, values.size());
-        add_comdat(new GlobalVariable(M, vars_type, true, GlobalVariable::ExternalLinkage,
+        auto gv = new GlobalVariable(M, vars_type, true, GlobalVariable::ExternalLinkage,
                                       ConstantArray::get(vars_type, values),
-                                      "jl_dispatch_reloc_slots"));
+                                      "jl_clone_slots" + suffix);
+        gv->setVisibility(GlobalValue::HiddenVisibility);
+        gv->setDSOLocal(true);
     }
 
     // Generate `jl_dispatch_fvars_idxs` and `jl_dispatch_fvars_offsets`
     {
-        std::vector<uint32_t> idxs;
-        std::vector<Constant*> offsets;
+        SmallVector<uint32_t, 0> idxs;
+        SmallVector<Constant*, 0> offsets;
         for (uint32_t i = 0; i < ntargets; i++) {
-            auto tgt = targets[i];
+            auto tgt = linearized[i];
             auto &spec = specs[i];
             uint32_t len_idx = idxs.size();
             idxs.push_back(0); // We will fill in the real value later.
@@ -1039,49 +992,53 @@ void CloneCtx::emit_metadata()
                 auto grp = static_cast<Group*>(tgt);
                 count = jl_sysimg_tag_mask;
                 for (uint32_t j = 0; j < nfvars; j++) {
-                    if (shared_relocs.count(j) || tgt->relocs.count(j)) {
+                    if (shared_relocs.count(j)) {
                         count++;
                         idxs.push_back(j);
                     }
                     if (i != 0) {
-                        offsets.push_back(get_ptrdiff32(grp->base_func(fvars[j]), fbase));
+                        offsets.push_back(get_ptrdiff32(T_size, grp->base_func(fvars[j]), fbase));
                     }
                 }
             }
             else {
                 auto baseidx = spec.base;
-                auto grp = static_cast<Group*>(targets[baseidx]);
+                auto grp = static_cast<Group*>(linearized[baseidx]);
                 idxs.push_back(baseidx);
                 for (uint32_t j = 0; j < nfvars; j++) {
                     auto base_f = grp->base_func(fvars[j]);
-                    if (shared_relocs.count(j) || tgt->relocs.count(j)) {
+                    if (shared_relocs.count(j)) {
                         count++;
                         idxs.push_back(jl_sysimg_tag_mask | j);
                         auto f = map_get(*tgt->vmap, base_f, base_f);
-                        offsets.push_back(get_ptrdiff32(cast<Function>(f), fbase));
+                        offsets.push_back(get_ptrdiff32(T_size, cast<Function>(f), fbase));
                     }
                     else if (auto f = map_get(*tgt->vmap, base_f)) {
                         count++;
                         idxs.push_back(j);
-                        offsets.push_back(get_ptrdiff32(cast<Function>(f), fbase));
+                        offsets.push_back(get_ptrdiff32(T_size, cast<Function>(f), fbase));
                     }
                 }
             }
             idxs[len_idx] = count;
         }
         auto idxval = ConstantDataArray::get(M.getContext(), idxs);
-        add_comdat(new GlobalVariable(M, idxval->getType(), true,
+        auto gv1 = new GlobalVariable(M, idxval->getType(), true,
                                       GlobalVariable::ExternalLinkage,
-                                      idxval, "jl_dispatch_fvars_idxs"));
+                                      idxval, "jl_clone_idxs" + suffix);
+        gv1->setVisibility(GlobalValue::HiddenVisibility);
+        gv1->setDSOLocal(true);
         ArrayType *offsets_type = ArrayType::get(Type::getInt32Ty(M.getContext()), offsets.size());
-        add_comdat(new GlobalVariable(M, offsets_type, true,
+        auto gv2 = new GlobalVariable(M, offsets_type, true,
                                       GlobalVariable::ExternalLinkage,
                                       ConstantArray::get(offsets_type, offsets),
-                                      "jl_dispatch_fvars_offsets"));
+                                      "jl_clone_offsets" + suffix);
+        gv2->setVisibility(GlobalValue::HiddenVisibility);
+        gv2->setDSOLocal(true);
     }
 }
 
-static bool runMultiVersioning(Module &M, function_ref<LoopInfo&(Function&)> GetLI, function_ref<CallGraph&()> GetCG, bool allow_bad_fvars)
+static bool runMultiVersioning(Module &M, bool allow_bad_fvars)
 {
     // Group targets and identify cloning bases.
     // Also initialize function info maps (we'll update these maps as we go)
@@ -1091,28 +1048,35 @@ static bool runMultiVersioning(Module &M, function_ref<LoopInfo&(Function&)> Get
     //     * Cloned function -> Original function (add as we clone functions)
     //     * Original function -> Base function (target specific and updated by LLVM)
     //     * ID -> relocation slots (const).
-    if (M.getName() == "sysimage")
+    if (!M.getModuleFlag("julia.mv.enable")) {
         return false;
+    }
 
-    GlobalVariable *fvars = M.getGlobalVariable("jl_sysimg_fvars");
-    GlobalVariable *gvars = M.getGlobalVariable("jl_sysimg_gvars");
+    // for opt testing purposes
+    bool annotated = !!M.getModuleFlag("julia.mv.annotated");
+    if (!annotated) {
+        annotate_module_clones(M);
+    }
+
+    // also for opt testing purposes
+    if (M.getModuleFlag("julia.mv.skipcloning")) {
+        assert(!annotated && "Multiversioning was enabled and annotations were added, but cloning was skipped!");
+        return true;
+    }
+
+    GlobalVariable *fvars = M.getGlobalVariable("jl_fvars");
+    GlobalVariable *gvars = M.getGlobalVariable("jl_gvars");
     if (allow_bad_fvars && (!fvars || !fvars->hasInitializer() || !isa<ConstantArray>(fvars->getInitializer()) ||
                             !gvars || !gvars->hasInitializer() || !isa<ConstantArray>(gvars->getInitializer())))
         return false;
 
-    CloneCtx clone(M, GetLI, GetCG, allow_bad_fvars);
+    CloneCtx clone(M, allow_bad_fvars);
 
-    // Collect a list of original functions and clone base functions
-    clone.clone_bases();
+    clone.prepare_slots();
 
-    // Collect function info (type of instruction used)
-    clone.collect_func_infos();
+    clone.clone_decls();
 
-    // If any partially cloned target exist decide which functions to clone for these targets.
-    // Clone functions for each group and collect a list of them.
-    // We can also add feature strings for cloned functions
-    // now that no additional cloning needs to be done.
-    clone.clone_all_partials();
+    clone.clone_bodies();
 
     // Scan **ALL** cloned functions (including full cloning for base target)
     // for global variables initialization use.
@@ -1120,6 +1084,10 @@ static bool runMultiVersioning(Module &M, function_ref<LoopInfo&(Function&)> Get
     // These relocations must be initialized for **ALL** targets.
     clone.fix_gv_uses();
 
+    // Now we have all the cloned functions, we can set the original functions'
+    // clone attribute to be 0
+    clone.finalize_orig_clone_attr();
+
     // For each group, scan all functions cloned by **PARTIALLY** cloned targets for
     // instruction use.
     // A function needs a const relocation slot if it is cloned and is called by a
@@ -1130,75 +1098,34 @@ static bool runMultiVersioning(Module &M, function_ref<LoopInfo&(Function&)> Get
     // A target needs a slot to be initialized iff at least one caller is not initialized.
     clone.fix_inst_uses();
 
+    //Now set the original functions' target-specific attributes, since nobody will look at those again
+    clone.finalize_orig_features();
+
     // Store back sysimg information with the correct format.
     // At this point, we should have fixed up all the uses of the cloned functions
     // and collected all the shared/target-specific relocations.
     clone.emit_metadata();
-
-    assert(!verifyModule(M));
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyLLVMIR(M));
+#endif
 
     return true;
 }
 
-struct MultiVersioningLegacy: public ModulePass {
-    static char ID;
-    MultiVersioningLegacy(bool allow_bad_fvars=false)
-        : ModulePass(ID), allow_bad_fvars(allow_bad_fvars)
-    {}
-
-private:
-    bool runOnModule(Module &M) override;
-    void getAnalysisUsage(AnalysisUsage &AU) const override
-    {
-        AU.addRequired<LoopInfoWrapperPass>();
-        AU.addRequired<CallGraphWrapperPass>();
-        AU.addPreserved<LoopInfoWrapperPass>();
-    }
-    bool allow_bad_fvars;
-};
+} // anonymous namespace
 
-bool MultiVersioningLegacy::runOnModule(Module &M)
+void multiversioning_preannotate(Module &M)
 {
-    auto GetLI = [this](Function &F) -> LoopInfo & {
-        return getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
-    };
-    auto GetCG = [this]() -> CallGraph & {
-        return getAnalysis<CallGraphWrapperPass>().getCallGraph();
-    };
-    return runMultiVersioning(M, GetLI, GetCG, allow_bad_fvars);
+    annotate_module_clones(M);
+    M.addModuleFlag(Module::ModFlagBehavior::Error, "julia.mv.enable", 1);
 }
 
-
-char MultiVersioningLegacy::ID = 0;
-static RegisterPass<MultiVersioningLegacy> X("JuliaMultiVersioning", "JuliaMultiVersioning Pass",
-                                       false /* Only looks at CFG */,
-                                       false /* Analysis Pass */);
-
-} // anonymous namespace
-
-PreservedAnalyses MultiVersioning::run(Module &M, ModuleAnalysisManager &AM)
+PreservedAnalyses MultiVersioningPass::run(Module &M, ModuleAnalysisManager &AM)
 {
-    auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
-    auto GetLI = [&](Function &F) -> LoopInfo & {
-        return FAM.getResult<LoopAnalysis>(F);
-    };
-    auto GetCG = [&]() -> CallGraph & {
-        return AM.getResult<CallGraphAnalysis>(M);
-    };
-    if (runMultiVersioning(M, GetLI, GetCG, external_use)) {
+    if (runMultiVersioning(M, external_use)) {
         auto preserved = PreservedAnalyses::allInSet<CFGAnalyses>();
         preserved.preserve<LoopAnalysis>();
         return preserved;
     }
     return PreservedAnalyses::all();
 }
-
-Pass *createMultiVersioningPass(bool allow_bad_fvars)
-{
-    return new MultiVersioningLegacy(allow_bad_fvars);
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddMultiVersioningPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createMultiVersioningPass(false));
-}
diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp
index f0c0c6ee77b44..d17ce3105135c 100644
--- a/src/llvm-pass-helpers.cpp
+++ b/src/llvm-pass-helpers.cpp
@@ -7,12 +7,13 @@
 
 #include "llvm-version.h"
 
+#include "llvm/IR/Attributes.h"
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Metadata.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Type.h>
 
-#include "codegen_shared.h"
+#include "llvm-codegen-shared.h"
 #include "julia_assert.h"
 #include "llvm-pass-helpers.h"
 
@@ -23,12 +24,11 @@ JuliaPassContext::JuliaPassContext()
 
         tbaa_gcframe(nullptr), tbaa_tag(nullptr),
 
-        pgcstack_getter(nullptr), gc_flush_func(nullptr),
+        pgcstack_getter(nullptr), adoptthread_func(nullptr), gc_flush_func(nullptr),
         gc_preserve_begin_func(nullptr), gc_preserve_end_func(nullptr),
-        pointer_from_objref_func(nullptr), alloc_obj_func(nullptr),
+        pointer_from_objref_func(nullptr), gc_loaded_func(nullptr), alloc_obj_func(nullptr),
         typeof_func(nullptr), write_barrier_func(nullptr),
-        write_barrier_binding_func(nullptr), call_func(nullptr),
-        call2_func(nullptr), module(nullptr)
+        call_func(nullptr), call2_func(nullptr), call3_func(nullptr), module(nullptr)
 {
 }
 
@@ -44,16 +44,18 @@ void JuliaPassContext::initFunctions(Module &M)
     tbaa_tag = tbaa_make_child_with_context(llvmctx, "jtbaa_tag", tbaa_data_scalar).first;
 
     pgcstack_getter = M.getFunction("julia.get_pgcstack");
+    adoptthread_func = M.getFunction("julia.get_pgcstack_or_new");
     gc_flush_func = M.getFunction("julia.gcroot_flush");
     gc_preserve_begin_func = M.getFunction("llvm.julia.gc_preserve_begin");
     gc_preserve_end_func = M.getFunction("llvm.julia.gc_preserve_end");
     pointer_from_objref_func = M.getFunction("julia.pointer_from_objref");
+    gc_loaded_func = M.getFunction("julia.gc_loaded");
     typeof_func = M.getFunction("julia.typeof");
     write_barrier_func = M.getFunction("julia.write_barrier");
-    write_barrier_binding_func = M.getFunction("julia.write_barrier_binding");
     alloc_obj_func = M.getFunction("julia.gc_alloc_obj");
     call_func = M.getFunction("julia.call");
     call2_func = M.getFunction("julia.call2");
+    call3_func = M.getFunction("julia.call3");
 }
 
 void JuliaPassContext::initAll(Module &M)
@@ -70,10 +72,13 @@ void JuliaPassContext::initAll(Module &M)
 
 llvm::CallInst *JuliaPassContext::getPGCstack(llvm::Function &F) const
 {
-    for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end();
-         pgcstack_getter && I != E; ++I) {
-        if (CallInst *callInst = dyn_cast<CallInst>(&*I)) {
-            if (callInst->getCalledOperand() == pgcstack_getter) {
+    if (!pgcstack_getter && !adoptthread_func)
+        return nullptr;
+    for (auto &I : F.getEntryBlock()) {
+        if (CallInst *callInst = dyn_cast<CallInst>(&I)) {
+            Value *callee = callInst->getCalledOperand();
+            if ((pgcstack_getter && callee == pgcstack_getter) ||
+                (adoptthread_func && callee == adoptthread_func)) {
                 return callInst;
             }
         }
@@ -99,7 +104,8 @@ llvm::Function *JuliaPassContext::getOrDeclare(
     else {
         // Otherwise, we'll declare it and add it to the module.
         // Declare the function.
-        auto func = desc.declare(*this);
+        auto T_size = module->getDataLayout().getIntPtrType(module->getContext());
+        auto func = desc.declare(T_size);
         // Add it to the function list.
         module->getFunctionList().push_back(func);
         // Return the newly created function.
@@ -114,26 +120,34 @@ namespace jl_intrinsics {
     static const char *PUSH_GC_FRAME_NAME = "julia.push_gc_frame";
     static const char *POP_GC_FRAME_NAME = "julia.pop_gc_frame";
     static const char *QUEUE_GC_ROOT_NAME = "julia.queue_gc_root";
-    static const char *QUEUE_GC_BINDING_NAME = "julia.queue_gc_binding";
+    static const char *SAFEPOINT_NAME = "julia.safepoint";
 
     // Annotates a function with attributes suitable for GC allocation
     // functions. Specifically, the return value is marked noalias and nonnull.
-    // The allocation size is set to the first argument.
-    static Function *addGCAllocAttributes(Function *target, LLVMContext &context)
+    static Function *addGCAllocAttributes(Function *target)
     {
+        auto FnAttrs = AttrBuilder(target->getContext());
+#if JL_LLVM_VERSION >= 160000
+        FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | inaccessibleMemOnly(ModRefInfo::ModRef));
+#endif
+        FnAttrs.addAllocKindAttr(AllocFnKind::Alloc);
+        FnAttrs.addAttribute(Attribute::WillReturn);
+        FnAttrs.addAttribute(Attribute::NoUnwind);
+        target->addFnAttrs(FnAttrs);
         addRetAttr(target, Attribute::NoAlias);
         addRetAttr(target, Attribute::NonNull);
-        target->addFnAttr(Attribute::getWithAllocSizeArgs(context, 1, None)); // returns %1 bytes
         return target;
     }
 
     const IntrinsicDescription getGCFrameSlot(
         GET_GC_FRAME_SLOT_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_pprjlvalue = JuliaType::get_pprjlvalue_ty(ctx);
             return Function::Create(
                 FunctionType::get(
-                    PointerType::get(context.T_prjlvalue, 0),
-                    {PointerType::get(context.T_prjlvalue, 0), Type::getInt32Ty(context.getLLVMContext())},
+                    T_pprjlvalue,
+                    {T_pprjlvalue, Type::getInt32Ty(ctx)},
                     false),
                 Function::ExternalLinkage,
                 GET_GC_FRAME_SLOT_NAME);
@@ -141,26 +155,29 @@ namespace jl_intrinsics {
 
     const IntrinsicDescription GCAllocBytes(
         GC_ALLOC_BYTES_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto intrinsic = Function::Create(
                 FunctionType::get(
-                    context.T_prjlvalue,
-                    { Type::getInt8PtrTy(context.getLLVMContext()),
-                        sizeof(size_t) == sizeof(uint32_t) ?
-                        Type::getInt32Ty(context.getLLVMContext()) :
-                        Type::getInt64Ty(context.getLLVMContext()) },
+                    T_prjlvalue,
+                    { Type::getInt8PtrTy(ctx),
+                        T_size,
+                        T_size }, // type
                     false),
                 Function::ExternalLinkage,
                 GC_ALLOC_BYTES_NAME);
-
-            return addGCAllocAttributes(intrinsic, context.getLLVMContext());
+            intrinsic->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 1, None));
+            return addGCAllocAttributes(intrinsic);
         });
 
     const IntrinsicDescription newGCFrame(
         NEW_GC_FRAME_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_pprjlvalue = JuliaType::get_pprjlvalue_ty(ctx);
             auto intrinsic = Function::Create(
-                FunctionType::get(PointerType::get(context.T_prjlvalue, 0), {Type::getInt32Ty(context.getLLVMContext())}, false),
+                FunctionType::get(T_pprjlvalue, {Type::getInt32Ty(ctx)}, false),
                 Function::ExternalLinkage,
                 NEW_GC_FRAME_NAME);
             addRetAttr(intrinsic, Attribute::NoAlias);
@@ -171,11 +188,13 @@ namespace jl_intrinsics {
 
     const IntrinsicDescription pushGCFrame(
         PUSH_GC_FRAME_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_pprjlvalue = JuliaType::get_pprjlvalue_ty(ctx);
             return Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    {PointerType::get(context.T_prjlvalue, 0), Type::getInt32Ty(context.getLLVMContext())},
+                    Type::getVoidTy(ctx),
+                    {T_pprjlvalue, Type::getInt32Ty(ctx)},
                     false),
                 Function::ExternalLinkage,
                 PUSH_GC_FRAME_NAME);
@@ -183,11 +202,13 @@ namespace jl_intrinsics {
 
     const IntrinsicDescription popGCFrame(
         POP_GC_FRAME_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_pprjlvalue = JuliaType::get_pprjlvalue_ty(ctx);
             return Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    {PointerType::get(context.T_prjlvalue, 0)},
+                    Type::getVoidTy(ctx),
+                    {T_pprjlvalue},
                     false),
                 Function::ExternalLinkage,
                 POP_GC_FRAME_NAME);
@@ -195,97 +216,127 @@ namespace jl_intrinsics {
 
     const IntrinsicDescription queueGCRoot(
         QUEUE_GC_ROOT_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto intrinsic = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    { T_prjlvalue },
                     false),
                 Function::ExternalLinkage,
                 QUEUE_GC_ROOT_NAME);
+#if JL_LLVM_VERSION >= 160000
+            intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
+#else
             intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+#endif
             return intrinsic;
         });
 
-    const IntrinsicDescription queueGCBinding(
-        QUEUE_GC_BINDING_NAME,
-        [](const JuliaPassContext &context) {
+    const IntrinsicDescription safepoint(
+        SAFEPOINT_NAME,
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_psize = T_size->getPointerTo();
             auto intrinsic = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    {T_psize},
                     false),
                 Function::ExternalLinkage,
-                QUEUE_GC_BINDING_NAME);
+                SAFEPOINT_NAME);
+#if JL_LLVM_VERSION >= 160000
+            intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
+#else
             intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+#endif
             return intrinsic;
         });
 }
 
 namespace jl_well_known {
-    static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc);
-    static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc);
+    static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc_instrumented);
+    static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc_instrumented);
     static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
-    static const char *GC_QUEUE_BINDING_NAME = XSTR(jl_gc_queue_binding);
+    static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed);
 
     using jl_intrinsics::addGCAllocAttributes;
 
     const WellKnownFunctionDescription GCBigAlloc(
         GC_BIG_ALLOC_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto bigAllocFunc = Function::Create(
                 FunctionType::get(
-                    context.T_prjlvalue,
-                    { Type::getInt8PtrTy(context.getLLVMContext()),
-                        sizeof(size_t) == sizeof(uint32_t) ?
-                        Type::getInt32Ty(context.getLLVMContext()) :
-                        Type::getInt64Ty(context.getLLVMContext()) },
+                    T_prjlvalue,
+                    { Type::getInt8PtrTy(ctx), T_size , T_size},
                     false),
                 Function::ExternalLinkage,
                 GC_BIG_ALLOC_NAME);
-
-            return addGCAllocAttributes(bigAllocFunc, context.getLLVMContext());
+            bigAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 1, None));
+            return addGCAllocAttributes(bigAllocFunc);
         });
 
     const WellKnownFunctionDescription GCPoolAlloc(
         GC_POOL_ALLOC_NAME,
-        [](const JuliaPassContext &context) {
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto poolAllocFunc = Function::Create(
                 FunctionType::get(
-                    context.T_prjlvalue,
-                    { Type::getInt8PtrTy(context.getLLVMContext()), Type::getInt32Ty(context.getLLVMContext()), Type::getInt32Ty(context.getLLVMContext()) },
+                    T_prjlvalue,
+                    { Type::getInt8PtrTy(ctx), Type::getInt32Ty(ctx), Type::getInt32Ty(ctx), T_size },
                     false),
                 Function::ExternalLinkage,
                 GC_POOL_ALLOC_NAME);
-
-            return addGCAllocAttributes(poolAllocFunc, context.getLLVMContext());
+            poolAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 2, None));
+            return addGCAllocAttributes(poolAllocFunc);
         });
 
-    const WellKnownFunctionDescription GCQueueBinding(
-        GC_QUEUE_BINDING_NAME,
-        [](const JuliaPassContext &context) {
+    const WellKnownFunctionDescription GCQueueRoot(
+        GC_QUEUE_ROOT_NAME,
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
             auto func = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue },
+                    Type::getVoidTy(ctx),
+                    { T_prjlvalue },
                     false),
                 Function::ExternalLinkage,
-                GC_QUEUE_BINDING_NAME);
+                GC_QUEUE_ROOT_NAME);
+#if JL_LLVM_VERSION >= 160000
+            func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
+#else
             func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+#endif
             return func;
         });
 
-    const WellKnownFunctionDescription GCQueueRoot(
-        GC_QUEUE_ROOT_NAME,
-        [](const JuliaPassContext &context) {
-            auto func = Function::Create(
+    const WellKnownFunctionDescription GCAllocTyped(
+        GC_ALLOC_TYPED_NAME,
+        [](Type *T_size) {
+            auto &ctx = T_size->getContext();
+            auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
+            auto allocTypedFunc = Function::Create(
                 FunctionType::get(
-                    Type::getVoidTy(context.getLLVMContext()),
-                    { context.T_prjlvalue },
+                    T_prjlvalue,
+                    { Type::getInt8PtrTy(ctx),
+                        T_size,
+                        T_size }, // type
                     false),
                 Function::ExternalLinkage,
-                GC_QUEUE_ROOT_NAME);
-            func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
-            return func;
+                GC_ALLOC_TYPED_NAME);
+            allocTypedFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 1, None));
+            return addGCAllocAttributes(allocTypedFunc);
         });
 }
+
+void setName(llvm::Value *V, const llvm::Twine &Name, int debug_info)
+{
+    if (debug_info >= 2 && !llvm::isa<llvm::Constant>(V)) {
+        V->setName(Name);
+    }
+}
diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h
index 64d5dc00e2c5b..346500df51ca1 100644
--- a/src/llvm-pass-helpers.h
+++ b/src/llvm-pass-helpers.h
@@ -10,6 +10,7 @@
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Type.h>
 #include <llvm/IR/Value.h>
+#include "analyzer_annotations.h"
 
 struct JuliaPassContext;
 
@@ -19,7 +20,7 @@ namespace jl_intrinsics {
     // intrinsics and declare new intrinsics if necessary.
     struct IntrinsicDescription final {
         // The type of function that declares an intrinsic.
-        typedef llvm::Function *(*DeclarationFunction)(const JuliaPassContext&);
+        typedef llvm::Function *(*DeclarationFunction)(llvm::Type *T_size) JL_NOTSAFEPOINT;
 
         // Creates an intrinsic description with a particular
         // name and declaration function.
@@ -50,16 +51,18 @@ struct JuliaPassContext {
 
     // Intrinsics.
     llvm::Function *pgcstack_getter;
+    llvm::Function *adoptthread_func;
     llvm::Function *gc_flush_func;
     llvm::Function *gc_preserve_begin_func;
     llvm::Function *gc_preserve_end_func;
     llvm::Function *pointer_from_objref_func;
+    llvm::Function *gc_loaded_func;
     llvm::Function *alloc_obj_func;
     llvm::Function *typeof_func;
     llvm::Function *write_barrier_func;
-    llvm::Function *write_barrier_binding_func;
     llvm::Function *call_func;
     llvm::Function *call2_func;
+    llvm::Function *call3_func;
 
     // Creates a pass context. Type and function pointers
     // are set to `nullptr`. Metadata nodes are initialized.
@@ -126,8 +129,8 @@ namespace jl_intrinsics {
     // `julia.queue_gc_root`: an intrinsic that queues a GC root.
     extern const IntrinsicDescription queueGCRoot;
 
-    // `julia.queue_gc_binding`: an intrinsic that queues a binding for GC.
-    extern const IntrinsicDescription queueGCBinding;
+    // `julia.safepoint`: an intrinsic that triggers a GC safepoint.
+    extern const IntrinsicDescription safepoint;
 }
 
 // A namespace for well-known Julia runtime function descriptions.
@@ -149,8 +152,10 @@ namespace jl_well_known {
     // `jl_gc_queue_root`: queues a GC root.
     extern const WellKnownFunctionDescription GCQueueRoot;
 
-    // `jl_gc_queue_binding`: queues a binding for GC.
-    extern const WellKnownFunctionDescription GCQueueBinding;
+    // `jl_gc_alloc_typed`: allocates bytes.
+    extern const WellKnownFunctionDescription GCAllocTyped;
 }
 
+void setName(llvm::Value *V, const llvm::Twine &Name, int debug_info);
+
 #endif
diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp
index 8da0e108c94d5..cc7dace28b24e 100644
--- a/src/llvm-propagate-addrspaces.cpp
+++ b/src/llvm-propagate-addrspaces.cpp
@@ -11,7 +11,6 @@
 #include <llvm/IR/ValueMap.h>
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/Dominators.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
@@ -22,9 +21,8 @@
 #include <llvm/Pass.h>
 #include <llvm/Support/Debug.h>
 
-#include "codegen_shared.h"
-#include "julia.h"
 #include "passes.h"
+#include "llvm-codegen-shared.h"
 
 #define DEBUG_TYPE "propagate_julia_addrspaces"
 
@@ -44,11 +42,11 @@ using namespace llvm;
 struct PropagateJuliaAddrspacesVisitor : public InstVisitor<PropagateJuliaAddrspacesVisitor> {
     DenseMap<Value *, Value *> LiftingMap;
     SmallPtrSet<Value *, 4> Visited;
-    std::vector<Instruction *> ToDelete;
-    std::vector<std::pair<Instruction *, Instruction *>> ToInsert;
+    SmallVector<Instruction *, 0> ToDelete;
+    SmallVector<std::pair<Instruction *, Instruction *>, 0> ToInsert;
 
 public:
-    Value *LiftPointer(Value *V, Instruction *InsertPt=nullptr);
+    Value *LiftPointer(Module *M, Value *V, Instruction *InsertPt=nullptr);
     void visitMemop(Instruction &I, Type *T, unsigned OpIndex);
     void visitLoadInst(LoadInst &LI);
     void visitStoreInst(StoreInst &SI);
@@ -58,7 +56,7 @@ struct PropagateJuliaAddrspacesVisitor : public InstVisitor<PropagateJuliaAddrsp
     void visitMemTransferInst(MemTransferInst &MTI);
 
 private:
-    void PoisonValues(std::vector<Value *> &Worklist);
+    void PoisonValues(SmallVectorImpl<Value *> &Worklist);
 };
 
 static unsigned getValueAddrSpace(Value *V) {
@@ -69,7 +67,7 @@ static bool isSpecialAS(unsigned AS) {
     return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial;
 }
 
-void PropagateJuliaAddrspacesVisitor::PoisonValues(std::vector<Value *> &Worklist) {
+void PropagateJuliaAddrspacesVisitor::PoisonValues(SmallVectorImpl<Value *> &Worklist) {
     while (!Worklist.empty()) {
         Value *CurrentV = Worklist.back();
         Worklist.pop_back();
@@ -82,10 +80,11 @@ void PropagateJuliaAddrspacesVisitor::PoisonValues(std::vector<Value *> &Worklis
     }
 }
 
-Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Value *V, Instruction *InsertPt) {
+Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruction *InsertPt) {
     SmallVector<Value *, 4> Stack;
-    std::vector<Value *> Worklist;
+    SmallVector<Value *, 0> Worklist;
     std::set<Value *> LocalVisited;
+    unsigned allocaAddressSpace = M->getDataLayout().getAllocaAddrSpace();
     Worklist.push_back(V);
     // Follow pointer casts back, see if we're based on a pointer in
     // an untracked address space, in which case we're allowed to drop
@@ -106,7 +105,6 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Value *V, Instruction *Inser
             }
             else if (auto *GEP = dyn_cast<GetElementPtrInst>(CurrentV)) {
                 if (LiftingMap.count(GEP)) {
-                    CurrentV = LiftingMap[GEP];
                     break;
                 } else if (Visited.count(GEP)) {
                     return nullptr;
@@ -157,7 +155,7 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Value *V, Instruction *Inser
     }
 
     // Go through and insert lifted versions of all instructions on the list.
-    std::vector<Value *> ToRevisit;
+    SmallVector<Value *, 0> ToRevisit;
     for (Value *V : Stack) {
         if (LiftingMap.count(V))
             continue;
@@ -165,15 +163,14 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Value *V, Instruction *Inser
             Instruction *InstV = cast<Instruction>(V);
             Instruction *NewV = InstV->clone();
             ToInsert.push_back(std::make_pair(NewV, InstV));
-            Type *NewRetTy = PointerType::getWithSamePointeeType(cast<PointerType>(InstV->getType()), AddressSpace::Generic);
+            Type *NewRetTy = PointerType::getWithSamePointeeType(cast<PointerType>(InstV->getType()), allocaAddressSpace);
             NewV->mutateType(NewRetTy);
             LiftingMap[InstV] = NewV;
             ToRevisit.push_back(NewV);
         }
     }
-
     auto CollapseCastsAndLift = [&](Value *CurrentV, Instruction *InsertPt) -> Value * {
-        PointerType *TargetType = PointerType::getWithSamePointeeType(cast<PointerType>(CurrentV->getType()), AddressSpace::Generic);
+        PointerType *TargetType = PointerType::getWithSamePointeeType(cast<PointerType>(CurrentV->getType()), allocaAddressSpace);
         while (!LiftingMap.count(CurrentV)) {
             if (isa<BitCastInst>(CurrentV))
                 CurrentV = cast<BitCastInst>(CurrentV)->getOperand(0);
@@ -188,6 +185,8 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Value *V, Instruction *Inser
         if (LiftingMap.count(CurrentV))
             CurrentV = LiftingMap[CurrentV];
         if (CurrentV->getType() != TargetType) {
+            // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
+            assert(CurrentV->getContext().supportsTypedPointers());
             auto *BCI = new BitCastInst(CurrentV, TargetType);
             ToInsert.push_back(std::make_pair(BCI, InsertPt));
             CurrentV = BCI;
@@ -222,7 +221,7 @@ void PropagateJuliaAddrspacesVisitor::visitMemop(Instruction &I, Type *T, unsign
     unsigned AS = Original->getType()->getPointerAddressSpace();
     if (!isSpecialAS(AS))
         return;
-    Value *Replacement = LiftPointer(Original, &I);
+    Value *Replacement = LiftPointer(I.getModule(), Original, &I);
     if (!Replacement)
         return;
     I.setOperand(OpIndex, Replacement);
@@ -248,7 +247,7 @@ void PropagateJuliaAddrspacesVisitor::visitMemSetInst(MemSetInst &MI) {
     unsigned AS = MI.getDestAddressSpace();
     if (!isSpecialAS(AS))
         return;
-    Value *Replacement = LiftPointer(MI.getRawDest());
+    Value *Replacement = LiftPointer(MI.getModule(), MI.getRawDest());
     if (!Replacement)
         return;
     Function *TheFn = Intrinsic::getDeclaration(MI.getModule(), Intrinsic::memset,
@@ -264,13 +263,13 @@ void PropagateJuliaAddrspacesVisitor::visitMemTransferInst(MemTransferInst &MTI)
         return;
     Value *Dest = MTI.getRawDest();
     if (isSpecialAS(DestAS)) {
-        Value *Replacement = LiftPointer(Dest, &MTI);
+        Value *Replacement = LiftPointer(MTI.getModule(), Dest, &MTI);
         if (Replacement)
             Dest = Replacement;
     }
     Value *Src = MTI.getRawSource();
     if (isSpecialAS(SrcAS)) {
-        Value *Replacement = LiftPointer(Src, &MTI);
+        Value *Replacement = LiftPointer(MTI.getModule(), Src, &MTI);
         if (Replacement)
             Src = Replacement;
     }
@@ -297,32 +296,15 @@ bool propagateJuliaAddrspaces(Function &F) {
     visitor.Visited.clear();
     return true;
 }
-
-struct PropagateJuliaAddrspacesLegacy : FunctionPass {
-    static char ID;
-
-    PropagateJuliaAddrspacesLegacy() : FunctionPass(ID) {}
-    bool runOnFunction(Function &F) override {
-        return propagateJuliaAddrspaces(F);
-    }
-};
-
-char PropagateJuliaAddrspacesLegacy::ID = 0;
-static RegisterPass<PropagateJuliaAddrspacesLegacy> X("PropagateJuliaAddrspaces", "Propagate (non-)rootedness information", false, false);
-
-Pass *createPropagateJuliaAddrspaces() {
-    return new PropagateJuliaAddrspacesLegacy();
-}
-
 PreservedAnalyses PropagateJuliaAddrspacesPass::run(Function &F, FunctionAnalysisManager &AM) {
-    if (propagateJuliaAddrspaces(F)) {
+    bool modified = propagateJuliaAddrspaces(F);
+
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyLLVMIR(F));
+#endif
+    if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     } else {
         return PreservedAnalyses::all();
     }
 }
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddPropagateJuliaAddrspaces_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createPropagateJuliaAddrspaces());
-}
diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp
index e948e1c1a10bc..d2650d6875cd4 100644
--- a/src/llvm-ptls.cpp
+++ b/src/llvm-ptls.cpp
@@ -9,20 +9,21 @@
 #include <llvm-c/Types.h>
 
 #include <llvm/Pass.h>
+#include <llvm/ADT/Triple.h>
 #include <llvm/IR/Module.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/IR/MDBuilder.h>
+#include <llvm/IR/Verifier.h>
 
 #include <llvm/IR/InlineAsm.h>
 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
 
 #include "julia.h"
 #include "julia_internal.h"
-#include "codegen_shared.h"
+#include "llvm-codegen-shared.h"
 #include "julia_assert.h"
 
 #define DEBUG_TYPE "lower_ptls"
@@ -35,148 +36,187 @@ typedef Instruction TerminatorInst;
 namespace {
 
 struct LowerPTLS {
-    LowerPTLS(bool imaging_mode=false)
-        : imaging_mode(imaging_mode)
+    LowerPTLS(Module &M, bool imaging_mode=false)
+        : imaging_mode(imaging_mode), M(&M), TargetTriple(M.getTargetTriple())
     {}
 
-    bool runOnModule(Module &M, bool *CFGModified);
+    bool run(bool *CFGModified);
 private:
     const bool imaging_mode;
     Module *M;
-    Function *pgcstack_getter;
-    MDNode *tbaa_const;
-    FunctionType *FT_pgcstack_getter;
-    PointerType *T_pgcstack_getter;
-    PointerType *T_pppjlvalue;
+    Triple TargetTriple;
+    MDNode *tbaa_const{nullptr};
+    MDNode *tbaa_gcframe{nullptr};
+    FunctionType *FT_pgcstack_getter{nullptr};
+    PointerType *T_pgcstack_getter{nullptr};
+    PointerType *T_pppjlvalue{nullptr};
+    Type *T_size{nullptr};
     GlobalVariable *pgcstack_func_slot{nullptr};
     GlobalVariable *pgcstack_key_slot{nullptr};
     GlobalVariable *pgcstack_offset{nullptr};
     void set_pgcstack_attrs(CallInst *pgcstack) const;
     Instruction *emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const;
     template<typename T> T *add_comdat(T *G) const;
-    GlobalVariable *create_aliased_global(Type *T, StringRef name) const;
-    void fix_pgcstack_use(CallInst *pgcstack, bool *CFGModified);
+    GlobalVariable *create_hidden_global(Type *T, StringRef name) const;
+    void fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, bool or_new, bool *CFGModified);
 };
 
 void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const
 {
+#if JL_LLVM_VERSION >= 160000
+    pgcstack->addFnAttr(Attribute::getWithMemoryEffects(pgcstack->getContext(), MemoryEffects::none()));
+#else
     addFnAttr(pgcstack, Attribute::ReadNone);
+#endif
     addFnAttr(pgcstack, Attribute::NoUnwind);
 }
 
 Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const
 {
+    IRBuilder<> builder(insertBefore);
     Value *tls;
-#if defined(_CPU_X86_64_) || defined(_CPU_X86_)
-    if (insertBefore->getFunction()->callsFunctionThatReturnsTwice()) {
+    if (TargetTriple.isX86() && insertBefore->getFunction()->callsFunctionThatReturnsTwice()) {
         // Workaround LLVM bug by hiding the offset computation
         // (and therefore the optimization opportunity) from LLVM.
         // Ref https://github.com/JuliaLang/julia/issues/17288
-        static const std::string const_asm_str = [&] () {
-            std::string stm;
-#  if defined(_CPU_X86_64_)
-            raw_string_ostream(stm) << "movq %fs:0, $0;\naddq $$" << jl_tls_offset << ", $0";
-#  else
-            raw_string_ostream(stm) << "movl %gs:0, $0;\naddl $$" << jl_tls_offset << ", $0";
-#  endif
-            return stm;
-        }();
-#  if defined(_CPU_X86_64_)
-        const char *dyn_asm_str = "movq %fs:0, $0;\naddq $1, $0";
-#  else
-        const char *dyn_asm_str = "movl %gs:0, $0;\naddl $1, $0";
-#  endif
+        std::string const_asm_str;
+        raw_string_ostream(const_asm_str) << (TargetTriple.getArch() == Triple::x86_64 ?
+            "movq %fs:0, $0;\naddq $$" : "movl %gs:0, $0;\naddl $$")
+            << jl_tls_offset << ", $0";
+        const char *dyn_asm_str = TargetTriple.getArch() == Triple::x86_64 ?
+            "movq %fs:0, $0;\naddq $1, $0" :
+            "movl %gs:0, $0;\naddl $1, $0";
 
         // The add instruction clobbers flags
         if (offset) {
-            std::vector<Type*> args(0);
+            SmallVector<Type*, 0> args(0);
             args.push_back(offset->getType());
-            auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), args, false),
+            auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), args, false),
                                      dyn_asm_str, "=&r,r,~{dirflag},~{fpsr},~{flags}", false);
-            tls = CallInst::Create(tp, offset, "pgcstack_i8", insertBefore);
+            tls = builder.CreateCall(tp, {offset}, "pgcstack");
         }
         else {
             auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false),
                                      const_asm_str.c_str(), "=r,~{dirflag},~{fpsr},~{flags}",
                                      false);
-            tls = CallInst::Create(tp, "pgcstack_i8", insertBefore);
+            tls = builder.CreateCall(tp, {}, "tls_pgcstack");
         }
-    }
-    else
-#endif
-    {
+    } else {
         // AArch64/ARM doesn't seem to have this issue.
         // (Possibly because there are many more registers and the offset is
         // positive and small)
         // It's also harder to emit the offset in a generic way on ARM/AArch64
         // (need to generate one or two `add` with shift) so let llvm emit
         // the add for now.
-#if defined(_CPU_AARCH64_)
-        const char *asm_str = "mrs $0, tpidr_el0";
-#elif defined(__ARM_ARCH) && __ARM_ARCH >= 7
-        const char *asm_str = "mrc p15, 0, $0, c13, c0, 3";
-#elif defined(_CPU_X86_64_)
-        const char *asm_str = "movq %fs:0, $0";
-#elif defined(_CPU_X86_)
-        const char *asm_str = "movl %gs:0, $0";
-#else
-        const char *asm_str = nullptr;
-        assert(0 && "Cannot emit thread pointer for this architecture.");
-#endif
+        const char *asm_str;
+        if (TargetTriple.isAArch64()) {
+            asm_str = "mrs $0, tpidr_el0";
+        } else if (TargetTriple.isARM()) {
+            asm_str = "mrc p15, 0, $0, c13, c0, 3";
+        } else if (TargetTriple.getArch() == Triple::x86_64) {
+            asm_str = "movq %fs:0, $0";
+        } else if (TargetTriple.getArch() == Triple::x86) {
+            asm_str = "movl %gs:0, $0";
+        } else {
+            llvm_unreachable("Cannot emit thread pointer for this architecture.");
+        }
         if (!offset)
-            offset = ConstantInt::getSigned(getSizeTy(insertBefore->getContext()), jl_tls_offset);
-        auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false), asm_str, "=r", false);
-        tls = CallInst::Create(tp, "thread_ptr", insertBefore);
-        tls = GetElementPtrInst::Create(Type::getInt8Ty(insertBefore->getContext()), tls, {offset}, "ppgcstack_i8", insertBefore);
+            offset = ConstantInt::getSigned(T_size, jl_tls_offset);
+        auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), false), asm_str, "=r", false);
+        tls = builder.CreateCall(tp, {}, "thread_ptr");
+        tls = builder.CreateGEP(Type::getInt8Ty(builder.getContext()), tls, {offset}, "tls_ppgcstack");
     }
-    tls = new BitCastInst(tls, T_pppjlvalue->getPointerTo(), "ppgcstack", insertBefore);
-    return new LoadInst(T_pppjlvalue, tls, "pgcstack", false, insertBefore);
+    tls = builder.CreateBitCast(tls, T_pppjlvalue->getPointerTo());
+    return builder.CreateLoad(T_pppjlvalue, tls, "tls_pgcstack");
 }
 
-GlobalVariable *LowerPTLS::create_aliased_global(Type *T, StringRef name) const
+GlobalVariable *LowerPTLS::create_hidden_global(Type *T, StringRef name) const
 {
-    // Create a static global variable and points a global alias to it so that
-    // the address is visible externally but LLVM can still assume that the
-    // address of this variable doesn't need dynamic relocation
-    // (can be accessed with a single PC-rel load).
-    auto GV = new GlobalVariable(*M, T, false, GlobalVariable::InternalLinkage,
-                                 Constant::getNullValue(T), name + ".real");
-    add_comdat(GlobalAlias::create(T, 0, GlobalVariable::ExternalLinkage,
-                                   name, GV, M));
+    auto GV = new GlobalVariable(*M, T, false, GlobalVariable::ExternalLinkage,
+                                 nullptr, name);
+    GV->setVisibility(GlobalValue::HiddenVisibility);
+    GV->setDSOLocal(true);
     return GV;
 }
 
-template<typename T>
-inline T *LowerPTLS::add_comdat(T *G) const
-{
-#if defined(_OS_WINDOWS_)
-    // add __declspec(dllexport) to everything marked for export
-    if (G->getLinkage() == GlobalValue::ExternalLinkage)
-        G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
-    else
-        G->setDLLStorageClass(GlobalValue::DefaultStorageClass);
-#endif
-    return G;
-}
-
-void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, bool *CFGModified)
+void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, bool or_new, bool *CFGModified)
 {
     if (pgcstack->use_empty()) {
         pgcstack->eraseFromParent();
         return;
     }
+    if (or_new) {
+        // pgcstack();
+        // if (pgcstack != nullptr)
+        //     last_gc_state = emit_gc_unsafe_enter(ctx);
+        //     phi = pgcstack;        // fast
+        // else
+        //     last_gc_state = gc_safe;
+        //     phi = adopt();         // slow
+        // use phi;
+        // if (!retboxed)
+        //     foreach(retinst)
+        //         emit_gc_unsafe_leave(ctx, last_gc_state);
+        IRBuilder<> builder(pgcstack->getNextNode());
+        auto phi = builder.CreatePHI(pgcstack->getType(), 2, "pgcstack");
+        pgcstack->replaceAllUsesWith(phi);
+        MDBuilder MDB(pgcstack->getContext());
+        SmallVector<uint32_t, 2> Weights{9, 1};
+        TerminatorInst *fastTerm;
+        TerminatorInst *slowTerm;
+        assert(pgcstack->getType()); // Static analyzer
+        builder.SetInsertPoint(phi);
+        auto cmp = builder.CreateICmpNE(pgcstack, Constant::getNullValue(pgcstack->getType()));
+        SplitBlockAndInsertIfThenElse(cmp, phi, &fastTerm, &slowTerm,
+                                      MDB.createBranchWeights(Weights));
+        if (CFGModified)
+            *CFGModified = true;
+        // emit slow branch code
+        CallInst *adopt = cast<CallInst>(pgcstack->clone());
+        Function *adoptFunc = M->getFunction(XSTR(jl_adopt_thread));
+        if (adoptFunc == NULL) {
+            adoptFunc = Function::Create(pgcstack_getter->getFunctionType(),
+                pgcstack_getter->getLinkage(), pgcstack_getter->getAddressSpace(),
+                XSTR(jl_adopt_thread), M);
+            adoptFunc->copyAttributesFrom(pgcstack_getter);
+            adoptFunc->copyMetadata(pgcstack_getter, 0);
+        }
+        adopt->setCalledFunction(adoptFunc);
+        adopt->insertBefore(slowTerm);
+        phi->addIncoming(adopt, slowTerm->getParent());
+        // emit fast branch code
+        builder.SetInsertPoint(fastTerm->getParent());
+        fastTerm->removeFromParent();
+        MDNode *tbaa = tbaa_gcframe;
+        Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, pgcstack), tbaa), true);
+        builder.Insert(fastTerm);
+        phi->addIncoming(pgcstack, fastTerm->getParent());
+        // emit pre-return cleanup
+        if (CountTrackedPointers(pgcstack->getParent()->getParent()->getReturnType()).count == 0) {
+            auto last_gc_state = PHINode::Create(Type::getInt8Ty(pgcstack->getContext()), 2, "", phi);
+            // if we called jl_adopt_thread, we must end this cfunction back in the safe-state
+            last_gc_state->addIncoming(ConstantInt::get(Type::getInt8Ty(M->getContext()), JL_GC_STATE_SAFE), slowTerm->getParent());
+            last_gc_state->addIncoming(prior, fastTerm->getParent());
+            for (auto &BB : *pgcstack->getParent()->getParent()) {
+                if (isa<ReturnInst>(BB.getTerminator())) {
+                    builder.SetInsertPoint(BB.getTerminator());
+                    emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, phi), tbaa), last_gc_state, true);
+                }
+            }
+        }
+    }
 
     if (imaging_mode) {
+        IRBuilder<> builder(pgcstack);
         if (jl_tls_elf_support) {
             // if (offset != 0)
-            //     pgcstack = tp + offset;
+            //     pgcstack = tp + offset; // fast
             // else
-            //     pgcstack = getter();
-            auto offset = new LoadInst(getSizeTy(pgcstack->getContext()), pgcstack_offset, "", false, pgcstack);
+            //     pgcstack = getter();    // slow
+            auto offset = builder.CreateLoad(T_size, pgcstack_offset);
             offset->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
             offset->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
-            auto cmp = new ICmpInst(pgcstack, CmpInst::ICMP_NE, offset,
-                                    Constant::getNullValue(offset->getType()));
+            auto cmp = builder.CreateICmpNE(offset, Constant::getNullValue(offset->getType()));
             MDBuilder MDB(pgcstack->getContext());
             SmallVector<uint32_t, 2> Weights{9, 1};
             TerminatorInst *fastTerm;
@@ -184,13 +224,17 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, bool *CFGModified)
             SplitBlockAndInsertIfThenElse(cmp, pgcstack, &fastTerm, &slowTerm,
                                           MDB.createBranchWeights(Weights));
             if (CFGModified)
-            *CFGModified = true;
+                *CFGModified = true;
 
             auto fastTLS = emit_pgcstack_tp(offset, fastTerm);
-            auto phi = PHINode::Create(T_pppjlvalue, 2, "", pgcstack);
+            // refresh the basic block in the builder
+            builder.SetInsertPoint(pgcstack);
+            auto phi = builder.CreatePHI(T_pppjlvalue, 2, "pgcstack");
             pgcstack->replaceAllUsesWith(phi);
             pgcstack->moveBefore(slowTerm);
-            auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack);
+            // refresh the basic block in the builder
+            builder.SetInsertPoint(pgcstack);
+            auto getter = builder.CreateLoad(T_pgcstack_getter, pgcstack_func_slot);
             getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
             getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
             pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter);
@@ -205,21 +249,21 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, bool *CFGModified)
         // variable to be filled (in `staticdata.c`) at initialization time of the sysimg.
         // This way we can bypass the extra indirection in `jl_get_pgcstack`
         // since we may not know which getter function to use ahead of time.
-        auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack);
+        auto getter = builder.CreateLoad(T_pgcstack_getter, pgcstack_func_slot);
         getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
         getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
-#if defined(_OS_DARWIN_)
-        auto key = new LoadInst(getSizeTy(pgcstack->getContext()), pgcstack_key_slot, "", false, pgcstack);
-        key->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
-        key->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
-        auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, getter, {key}, "", pgcstack);
-        new_pgcstack->takeName(pgcstack);
-        pgcstack->replaceAllUsesWith(new_pgcstack);
-        pgcstack->eraseFromParent();
-        pgcstack = new_pgcstack;
-#else
-        pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter);
-#endif
+        if (TargetTriple.isOSDarwin()) {
+            auto key = builder.CreateLoad(T_size, pgcstack_key_slot);
+            key->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
+            key->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
+            auto new_pgcstack = builder.CreateCall(FT_pgcstack_getter, getter, {key});
+            new_pgcstack->takeName(pgcstack);
+            pgcstack->replaceAllUsesWith(new_pgcstack);
+            pgcstack->eraseFromParent();
+            pgcstack = new_pgcstack;
+        } else {
+            pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter);
+        }
         set_pgcstack_attrs(pgcstack);
     }
     else if (jl_tls_offset != -1) {
@@ -231,82 +275,87 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, bool *CFGModified)
         jl_get_pgcstack_func *f;
         jl_pgcstack_key_t k;
         jl_pgcstack_getkey(&f, &k);
-        Constant *val = ConstantInt::get(getSizeTy(pgcstack->getContext()), (uintptr_t)f);
+        Constant *val = ConstantInt::get(T_size, (uintptr_t)f);
         val = ConstantExpr::getIntToPtr(val, T_pgcstack_getter);
-#if defined(_OS_DARWIN_)
-        assert(sizeof(k) == sizeof(uintptr_t));
-        Constant *key = ConstantInt::get(getSizeTy(pgcstack->getContext()), (uintptr_t)k);
-        auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, val, {key}, "", pgcstack);
-        new_pgcstack->takeName(pgcstack);
-        pgcstack->replaceAllUsesWith(new_pgcstack);
-        pgcstack->eraseFromParent();
-        pgcstack = new_pgcstack;
-#else
-        pgcstack->setCalledFunction(pgcstack->getFunctionType(), val);
-#endif
+        if (TargetTriple.isOSDarwin()) {
+            assert(sizeof(k) == sizeof(uintptr_t));
+            Constant *key = ConstantInt::get(T_size, (uintptr_t)k);
+            auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, val, {key}, "", pgcstack);
+            new_pgcstack->takeName(pgcstack);
+            pgcstack->replaceAllUsesWith(new_pgcstack);
+            pgcstack->eraseFromParent();
+            pgcstack = new_pgcstack;
+        } else {
+            pgcstack->setCalledFunction(pgcstack->getFunctionType(), val);
+        }
         set_pgcstack_attrs(pgcstack);
     }
 }
 
-bool LowerPTLS::runOnModule(Module &_M, bool *CFGModified)
+bool LowerPTLS::run(bool *CFGModified)
 {
-    M = &_M;
-    pgcstack_getter = M->getFunction("julia.get_pgcstack");
-    if (!pgcstack_getter)
-        return false;
-
-    tbaa_const = tbaa_make_child_with_context(_M.getContext(), "jtbaa_const", nullptr, true).first;
-
-    FT_pgcstack_getter = pgcstack_getter->getFunctionType();
-#if defined(_OS_DARWIN_)
-    assert(sizeof(jl_pgcstack_key_t) == sizeof(uintptr_t));
-    FT_pgcstack_getter = FunctionType::get(FT_pgcstack_getter->getReturnType(), {getSizeTy(_M.getContext())}, false);
-#endif
-    T_pgcstack_getter = FT_pgcstack_getter->getPointerTo();
-    T_pppjlvalue = cast<PointerType>(FT_pgcstack_getter->getReturnType());
-    if (imaging_mode) {
-        pgcstack_func_slot = create_aliased_global(T_pgcstack_getter, "jl_pgcstack_func_slot");
-        pgcstack_key_slot = create_aliased_global(getSizeTy(_M.getContext()), "jl_pgcstack_key_slot"); // >= sizeof(jl_pgcstack_key_t)
-        pgcstack_offset = create_aliased_global(getSizeTy(_M.getContext()), "jl_tls_offset");
-    }
+    bool need_init = true;
+    auto runOnGetter = [&](bool or_new) {
+        Function *pgcstack_getter = M->getFunction(or_new ? "julia.get_pgcstack_or_new" : "julia.get_pgcstack");
+        if (!pgcstack_getter)
+            return false;
+
+        if (need_init) {
+            tbaa_const = tbaa_make_child_with_context(M->getContext(), "jtbaa_const", nullptr, true).first;
+            tbaa_gcframe = tbaa_make_child_with_context(M->getContext(), "jtbaa_gcframe").first;
+            T_size = M->getDataLayout().getIntPtrType(M->getContext());
+
+            FT_pgcstack_getter = pgcstack_getter->getFunctionType();
+            if (TargetTriple.isOSDarwin()) {
+                assert(sizeof(jl_pgcstack_key_t) == sizeof(uintptr_t));
+                FT_pgcstack_getter = FunctionType::get(FT_pgcstack_getter->getReturnType(), {T_size}, false);
+            }
+            T_pgcstack_getter = FT_pgcstack_getter->getPointerTo();
+            T_pppjlvalue = cast<PointerType>(FT_pgcstack_getter->getReturnType());
+            if (imaging_mode) {
+                pgcstack_func_slot = create_hidden_global(T_pgcstack_getter, "jl_pgcstack_func_slot");
+                pgcstack_key_slot = create_hidden_global(T_size, "jl_pgcstack_key_slot"); // >= sizeof(jl_pgcstack_key_t)
+                pgcstack_offset = create_hidden_global(T_size, "jl_tls_offset");
+            }
+            need_init = false;
+        }
 
-    for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) {
-        auto call = cast<CallInst>(*it);
-        ++it;
-        assert(call->getCalledOperand() == pgcstack_getter);
-        fix_pgcstack_use(call, CFGModified);
-    }
-    assert(pgcstack_getter->use_empty());
-    pgcstack_getter->eraseFromParent();
-    return true;
+        for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) {
+            auto call = cast<CallInst>(*it);
+            ++it;
+            auto f = call->getCaller();
+            Value *pgcstack = NULL;
+            for (Function::arg_iterator arg = f->arg_begin(); arg != f->arg_end();++arg) {
+                if (arg->hasSwiftSelfAttr()){
+                    pgcstack = &*arg;
+                    break;
+                }
+            }
+            if (pgcstack) {
+                pgcstack->takeName(call);
+                call->replaceAllUsesWith(pgcstack);
+                call->eraseFromParent();
+                continue;
+            }
+            assert(call->getCalledOperand() == pgcstack_getter);
+            fix_pgcstack_use(call, pgcstack_getter, or_new, CFGModified);
+        }
+        assert(pgcstack_getter->use_empty());
+        pgcstack_getter->eraseFromParent();
+        return true;
+    };
+    return runOnGetter(false) + runOnGetter(true);
 }
-
-struct LowerPTLSLegacy: public ModulePass {
-    static char ID;
-    LowerPTLSLegacy(bool imaging_mode=false)
-        : ModulePass(ID),
-          imaging_mode(imaging_mode)
-    {}
-
-    bool imaging_mode;
-    bool runOnModule(Module &M) override {
-        LowerPTLS lower(imaging_mode);
-        return lower.runOnModule(M, nullptr);
-    }
-};
-
-char LowerPTLSLegacy::ID = 0;
-
-static RegisterPass<LowerPTLSLegacy> X("LowerPTLS", "LowerPTLS Pass",
-                                 false /* Only looks at CFG */,
-                                 false /* Analysis Pass */);
-
 } // anonymous namespace
 
 PreservedAnalyses LowerPTLSPass::run(Module &M, ModuleAnalysisManager &AM) {
-    LowerPTLS lower(imaging_mode);
+    LowerPTLS lower(M, imaging_mode);
     bool CFGModified = false;
-    if (lower.runOnModule(M, &CFGModified)) {
+    bool modified = lower.run(&CFGModified);
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyLLVMIR(M));
+#endif
+    if (modified) {
         if (CFGModified) {
             return PreservedAnalyses::none();
         } else {
@@ -315,13 +364,3 @@ PreservedAnalyses LowerPTLSPass::run(Module &M, ModuleAnalysisManager &AM) {
     }
     return PreservedAnalyses::all();
 }
-
-Pass *createLowerPTLSPass(bool imaging_mode)
-{
-    return new LowerPTLSLegacy(imaging_mode);
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddLowerPTLSPass_impl(LLVMPassManagerRef PM, LLVMBool imaging_mode)
-{
-    unwrap(PM)->add(createLowerPTLSPass(imaging_mode));
-}
diff --git a/src/llvm-remove-addrspaces.cpp b/src/llvm-remove-addrspaces.cpp
index a3f3cbb1fee72..e291e47b59b0e 100644
--- a/src/llvm-remove-addrspaces.cpp
+++ b/src/llvm-remove-addrspaces.cpp
@@ -7,14 +7,13 @@
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/InstIterator.h>
-#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/Verifier.h>
 #include <llvm/Support/Debug.h>
 #include <llvm/Transforms/Utils/Cloning.h>
 #include <llvm/Transforms/Utils/ValueMapper.h>
 
-#include "codegen_shared.h"
-#include "julia.h"
 #include "passes.h"
+#include "llvm-codegen-shared.h"
 
 #define DEBUG_TYPE "remove_addrspaces"
 
@@ -51,7 +50,7 @@ class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper {
             else {
                 //Remove once opaque pointer transition is complete
                 DstTy = PointerType::get(
-                        remapType(Ty->getPointerElementType()),
+                        remapType(Ty->getNonOpaquePointerElementType()),
                         ASRemapper(Ty->getAddressSpace()));
             }
         }
@@ -161,7 +160,7 @@ class AddrspaceRemoveValueMaterializer : public ValueMaterializer {
                     auto ptrty = cast<PointerType>(Src->getType()->getScalarType());
                     //Remove once opaque pointer transition is complete
                     if (!ptrty->isOpaque()) {
-                        Type *SrcTy = remapType(ptrty->getPointerElementType());
+                        Type *SrcTy = remapType(ptrty->getNonOpaquePointerElementType());
                         DstV = CE->getWithOperands(Ops, Ty, false, SrcTy);
                     }
                 }
@@ -336,18 +335,14 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
 
         GlobalVariable *NGV = cast<GlobalVariable>(VMap[GV]);
         if (GV->hasInitializer())
-            NGV->setInitializer(MapValue(GV->getInitializer(), VMap));
+            NGV->setInitializer(MapValue(GV->getInitializer(), VMap, RF_None, &TypeRemapper, &Materializer));
 
         SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
         GV->getAllMetadata(MDs);
         for (auto MD : MDs)
             NGV->addMetadata(
                     MD.first,
-#if JL_LLVM_VERSION >= 130000
                     *MapMetadata(MD.second, VMap));
-#else
-                    *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));
-#endif
 
         copyComdat(NGV, GV);
 
@@ -356,11 +351,9 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
 
     // Similarly, copy over and rewrite function bodies
     for (Function *F : Functions) {
-        if (F->isDeclaration())
-            continue;
-
         Function *NF = cast<Function>(VMap[F]);
         LLVM_DEBUG(dbgs() << "Processing function " << NF->getName() << "\n");
+        // we also need this to run for declarations, or attributes won't be copied
 
         Function::arg_iterator DestI = NF->arg_begin();
         for (Function::const_arg_iterator I = F->arg_begin(); I != F->arg_end();
@@ -374,46 +367,29 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
                 NF,
                 F,
                 VMap,
-#if JL_LLVM_VERSION >= 130000
                 CloneFunctionChangeType::GlobalChanges,
-#else
-                /*ModuleLevelChanges=*/true,
-#endif
                 Returns,
                 "",
                 nullptr,
                 &TypeRemapper,
                 &Materializer);
 
-        // CloneFunctionInto unconditionally copies the attributes from F to NF,
-        // without considering e.g. the byval attribute type.
+        // Update function attributes that contain types
         AttributeList Attrs = F->getAttributes();
         LLVMContext &C = F->getContext();
         for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
             for (Attribute::AttrKind TypedAttr :
                  {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) {
-#if JL_LLVM_VERSION >= 140000
                 auto Attr = Attrs.getAttributeAtIndex(i, TypedAttr);
-#else
-                auto Attr = Attrs.getAttribute(i, TypedAttr);
-#endif
                 if (Type *Ty = Attr.getValueAsType()) {
-#if JL_LLVM_VERSION >= 140000
                     Attrs = Attrs.replaceAttributeTypeAtIndex(
                         C, i, TypedAttr, TypeRemapper.remapType(Ty));
-#else
-                    Attrs = Attrs.replaceAttributeType(
-                        C, i, TypedAttr, TypeRemapper.remapType(Ty));
-#endif
                     break;
                 }
             }
         }
         NF->setAttributes(Attrs);
 
-        if (F->hasPersonalityFn())
-            NF->setPersonalityFn(MapValue(F->getPersonalityFn(), VMap));
-
         copyComdat(NF, F);
 
         RemoveNoopAddrSpaceCasts(NF);
@@ -424,7 +400,7 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
     for (GlobalAlias *GA : Aliases) {
         GlobalAlias *NGA = cast<GlobalAlias>(VMap[GA]);
         if (const Constant *C = GA->getAliasee())
-            NGA->setAliasee(MapValue(C, VMap));
+            NGA->setAliasee(MapValue(C, VMap, RF_None, &TypeRemapper, &Materializer));
 
         GA->setAliasee(nullptr);
     }
@@ -447,7 +423,7 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
     for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE;) {
         Function *F = &*FI++;
         if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) {
-            F->replaceAllUsesWith(Remangled.getValue());
+            F->replaceAllUsesWith(*Remangled);
             F->eraseFromParent();
         }
     }
@@ -456,36 +432,14 @@ bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
 }
 
 
-struct RemoveAddrspacesPassLegacy : public ModulePass {
-    static char ID;
-    AddrspaceRemapFunction ASRemapper;
-    RemoveAddrspacesPassLegacy(
-            AddrspaceRemapFunction ASRemapper = removeAllAddrspaces)
-        : ModulePass(ID), ASRemapper(ASRemapper){};
-
-public:
-    bool runOnModule(Module &M) override {
-        return removeAddrspaces(M, ASRemapper);
-    }
-};
-
-char RemoveAddrspacesPassLegacy::ID = 0;
-static RegisterPass<RemoveAddrspacesPassLegacy>
-        X("RemoveAddrspaces",
-          "Remove IR address space information.",
-          false,
-          false);
-
-Pass *createRemoveAddrspacesPass(
-        AddrspaceRemapFunction ASRemapper = removeAllAddrspaces)
-{
-    return new RemoveAddrspacesPassLegacy(ASRemapper);
-}
-
 RemoveAddrspacesPass::RemoveAddrspacesPass() : RemoveAddrspacesPass(removeAllAddrspaces) {}
 
 PreservedAnalyses RemoveAddrspacesPass::run(Module &M, ModuleAnalysisManager &AM) {
-    if (removeAddrspaces(M, ASRemapper)) {
+    bool modified = removeAddrspaces(M, ASRemapper);
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyLLVMIR(M));
+#endif
+    if (modified) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     } else {
         return PreservedAnalyses::all();
@@ -505,31 +459,7 @@ unsigned removeJuliaAddrspaces(unsigned AS)
         return AS;
 }
 
-struct RemoveJuliaAddrspacesPassLegacy : public ModulePass {
-    static char ID;
-    RemoveAddrspacesPassLegacy Pass;
-    RemoveJuliaAddrspacesPassLegacy() : ModulePass(ID), Pass(removeJuliaAddrspaces){};
-
-    bool runOnModule(Module &M) override { return Pass.runOnModule(M); }
-};
-
-char RemoveJuliaAddrspacesPassLegacy::ID = 0;
-static RegisterPass<RemoveJuliaAddrspacesPassLegacy>
-        Y("RemoveJuliaAddrspaces",
-          "Remove IR address space information.",
-          false,
-          false);
-
-Pass *createRemoveJuliaAddrspacesPass()
-{
-    return new RemoveJuliaAddrspacesPassLegacy();
-}
 
 PreservedAnalyses RemoveJuliaAddrspacesPass::run(Module &M, ModuleAnalysisManager &AM) {
     return RemoveAddrspacesPass(removeJuliaAddrspaces).run(M, AM);
 }
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddRemoveJuliaAddrspacesPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createRemoveJuliaAddrspacesPass());
-}
diff --git a/src/llvm-remove-ni.cpp b/src/llvm-remove-ni.cpp
index c252905dc75f9..85275dddb101e 100644
--- a/src/llvm-remove-ni.cpp
+++ b/src/llvm-remove-ni.cpp
@@ -6,7 +6,6 @@
 #include <llvm/Pass.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/PassManager.h>
-#include <llvm/IR/LegacyPassManager.h>
 #include <llvm/Support/Debug.h>
 
 #include "julia.h"
@@ -17,7 +16,7 @@ using namespace llvm;
 
 namespace {
 
-static bool removeNI(Module &M)
+static bool removeNI(Module &M) JL_NOTSAFEPOINT
 {
     auto dlstr = M.getDataLayoutStr();
     auto nistart = dlstr.find("-ni:");
@@ -36,39 +35,10 @@ static bool removeNI(Module &M)
 }
 }
 
-PreservedAnalyses RemoveNI::run(Module &M, ModuleAnalysisManager &AM)
+PreservedAnalyses RemoveNIPass::run(Module &M, ModuleAnalysisManager &AM)
 {
     if (removeNI(M)) {
         return PreservedAnalyses::allInSet<CFGAnalyses>();
     }
     return PreservedAnalyses::all();
 }
-
-namespace {
-struct RemoveNILegacy : public ModulePass {
-    static char ID;
-    RemoveNILegacy() : ModulePass(ID) {};
-
-    bool runOnModule(Module &M)
-    {
-        return removeNI(M);
-    }
-};
-
-char RemoveNILegacy::ID = 0;
-static RegisterPass<RemoveNILegacy>
-        Y("RemoveNI",
-          "Remove non-integral address space.",
-          false,
-          false);
-}
-
-Pass *createRemoveNIPass()
-{
-    return new RemoveNILegacy();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddRemoveNIPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createRemoveNIPass());
-}
diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp
index 15ae3492927ff..f29802b438e1e 100644
--- a/src/llvm-simdloop.cpp
+++ b/src/llvm-simdloop.cpp
@@ -11,7 +11,6 @@
 //    as independent of each other.
 //
 // The pass hinges on a call to a marker function that has metadata attached to it.
-// To construct the pass call `createLowerSimdLoopPass`.
 
 #include "support/dtypes.h"
 
@@ -20,7 +19,8 @@
 
 #include <llvm/ADT/Statistic.h>
 #include <llvm/Analysis/LoopPass.h>
-#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/Analysis/OptimizationRemarkEmitter.h>
+#include <llvm/Analysis/MemorySSA.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/Metadata.h>
 #include <llvm/IR/Verifier.h>
@@ -38,12 +38,18 @@ STATISTIC(SimdLoops, "Number of loops with SIMD instructions");
 STATISTIC(IVDepInstructions, "Number of instructions marked ivdep");
 STATISTIC(ReductionChains, "Number of reduction chains folded");
 STATISTIC(ReductionChainLength, "Total sum of instructions folded from reduction chain");
+STATISTIC(MaxChainLength, "Max length of reduction chain");
 STATISTIC(AddChains, "Addition reduction chains");
 STATISTIC(MulChains, "Multiply reduction chains");
 
+#ifndef __clang_gcanalyzer__
+#define REMARK(remark) ORE.emit(remark)
+#else
+#define REMARK(remark) (void) 0;
+#endif
 namespace {
 
-static unsigned getReduceOpcode(Instruction *J, Instruction *operand)
+static unsigned getReduceOpcode(Instruction *J, Instruction *operand) JL_NOTSAFEPOINT
 {
     switch (J->getOpcode()) {
     case Instruction::FSub:
@@ -66,7 +72,7 @@ static unsigned getReduceOpcode(Instruction *J, Instruction *operand)
 /// If Phi is part of a reduction cycle of FAdd, FSub, FMul or FDiv,
 /// mark the ops as permitting reassociation/commuting.
 /// As of LLVM 4.0, FDiv is not handled by the loop vectorizer
-static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L)
+static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution *SE) JL_NOTSAFEPOINT
 {
     typedef SmallVector<Instruction*, 8> chainVector;
     chainVector chain;
@@ -77,9 +83,13 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L)
         // Find the user of instruction I that is within loop L.
         for (User *UI : I->users()) { /*}*/
             Instruction *U = cast<Instruction>(UI);
-            if (L->contains(U)) {
+            if (L.contains(U)) {
                 if (J) {
                     LLVM_DEBUG(dbgs() << "LSL: not a reduction var because op has two internal uses: " << *I << "\n");
+                    REMARK([&]() {
+                        return OptimizationRemarkMissed(DEBUG_TYPE, "NotReductionVar", U)
+                               << "not a reduction variable because operation has two internal uses";
+                    });
                     return;
                 }
                 J = U;
@@ -87,6 +97,10 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L)
         }
         if (!J) {
             LLVM_DEBUG(dbgs() << "LSL: chain prematurely terminated at " << *I << "\n");
+            REMARK([&]() {
+                return OptimizationRemarkMissed(DEBUG_TYPE, "ChainPrematurelyTerminated", I)
+                       << "chain prematurely terminated at " << ore::NV("Instruction", I);
+            });
             return;
         }
         if (J == Phi) {
@@ -97,6 +111,10 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L)
             // Check that arithmetic op matches prior arithmetic ops in the chain.
             if (getReduceOpcode(J, I) != opcode) {
                 LLVM_DEBUG(dbgs() << "LSL: chain broke at " << *J << " because of wrong opcode\n");
+                REMARK([&](){
+                    return OptimizationRemarkMissed(DEBUG_TYPE, "ChainBroke", J)
+                           << "chain broke at " << ore::NV("Instruction", J) << " because of wrong opcode";
+                });
                 return;
             }
         }
@@ -105,6 +123,10 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L)
             opcode = getReduceOpcode(J, I);
             if (!opcode) {
                 LLVM_DEBUG(dbgs() << "LSL: first arithmetic op in chain is uninteresting" << *J << "\n");
+                REMARK([&]() {
+                    return OptimizationRemarkMissed(DEBUG_TYPE, "FirstArithmeticOpInChainIsUninteresting", J)
+                           << "first arithmetic op in chain is uninteresting";
+                });
                 return;
             }
         }
@@ -119,118 +141,107 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L)
             break;
     }
     ++ReductionChains;
+    int length = 0;
     for (chainVector::const_iterator K=chain.begin(); K!=chain.end(); ++K) {
         LLVM_DEBUG(dbgs() << "LSL: marking " << **K << "\n");
-        (*K)->setFast(true);
-        ++ReductionChainLength;
+        REMARK([&]() {
+            return OptimizationRemark(DEBUG_TYPE, "MarkedUnsafeAlgebra", *K)
+                   << "marked unsafe algebra on " << ore::NV("Instruction", *K);
+        });
+        (*K)->setHasAllowReassoc(true);
+        (*K)->setHasAllowContract(true);
+        if (SE)
+            SE->forgetValue(*K);
+        ++length;
     }
+    ReductionChainLength += length;
+    MaxChainLength.updateMax(length);
 }
 
-static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Function &)> GetLI)
+static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution *SE) JL_NOTSAFEPOINT
 {
-    bool Changed = false;
-    std::vector<Instruction*> ToDelete;
-    for (User *U : marker->users()) {
-        ++TotalMarkedLoops;
-        Instruction *I = cast<Instruction>(U);
-        ToDelete.push_back(I);
-
-        LoopInfo &LI = GetLI(*I->getParent()->getParent());
-        Loop *L = LI.getLoopFor(I->getParent());
-        I->removeFromParent();
-        if (!L)
-            continue;
-
-        LLVM_DEBUG(dbgs() << "LSL: loopinfo marker found\n");
-        bool simd = false;
-        bool ivdep = false;
-        SmallVector<Metadata *, 8> MDs;
-
-        BasicBlock *Lh = L->getHeader();
-        LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n");
-
-        // Reserve first location for self reference to the LoopID metadata node.
-        TempMDTuple TempNode = MDNode::getTemporary(Lh->getContext(), None);
-        MDs.push_back(TempNode.get());
-
-        // Walk `julia.loopinfo` metadata and filter out `julia.simdloop` and `julia.ivdep`
-        if (I->hasMetadataOtherThanDebugLoc()) {
-            MDNode *JLMD= I->getMetadata("julia.loopinfo");
-            if (JLMD) {
-                LLVM_DEBUG(dbgs() << "LSL: has julia.loopinfo metadata with " << JLMD->getNumOperands() <<" operands\n");
-                for (unsigned i = 0, ie = JLMD->getNumOperands(); i < ie; ++i) {
-                    Metadata *Op = JLMD->getOperand(i);
-                    const MDString *S = dyn_cast<MDString>(Op);
-                    if (S) {
-                        LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n");
-                        if (S->getString().startswith("julia")) {
-                            if (S->getString().equals("julia.simdloop"))
-                                simd = true;
-                            if (S->getString().equals("julia.ivdep"))
-                                ivdep = true;
-                            continue;
-                        }
-                    }
-                    MDs.push_back(Op);
-                }
+    MDNode *LoopID = L.getLoopID();
+    if (!LoopID)
+        return false;
+    bool simd = false;
+    bool ivdep = false;
+
+    BasicBlock *Lh = L.getHeader();
+    LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n");
+
+    SmallVector<Metadata*, 4> MDs(1);
+    // First Operand is self-reference
+    // Drop `julia.` prefixes
+    for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+        Metadata *Op = LoopID->getOperand(i);
+        const MDString *S = dyn_cast<MDString>(Op);
+        if (S) {
+            LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n");
+            if (S->getString().startswith("julia")) {
+                if (S->getString().equals("julia.simdloop"))
+                    simd = true;
+                if (S->getString().equals("julia.ivdep"))
+                    ivdep = true;
+                continue;
             }
         }
+        MDs.push_back(Op);
+    }
 
-        LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n");
-
-        MDNode *n = L->getLoopID();
-        if (n) {
-            // Loop already has a LoopID so copy over Metadata
-            // original loop id is operand 0
-            for (unsigned i = 1, ie = n->getNumOperands(); i < ie; ++i) {
-                Metadata *Op = n->getOperand(i);
-                MDs.push_back(Op);
-            }
-        }
-        MDNode *LoopID = MDNode::getDistinct(Lh->getContext(), MDs);
-        // Replace the temporary node with a self-reference.
-        LoopID->replaceOperandWith(0, LoopID);
-        L->setLoopID(LoopID);
-        assert(L->getLoopID());
-
+    LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n");
+    if (!simd && !ivdep)
+        return false;
+    ++TotalMarkedLoops;
+    LLVMContext &Context = L.getHeader()->getContext();
+    LoopID = MDNode::get(Context, MDs);
+    // Set operand 0 to refer to the loop id itself
+    LoopID->replaceOperandWith(0, LoopID);
+    L.setLoopID(LoopID);
+
+    REMARK([&]() {
+        return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", L.getStartLoc(), L.getHeader())
+            << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }";
+    });
+
+    // If ivdep is true we assume that there is no memory dependency between loop iterations
+    // This is a fairly strong assumption and does often not hold true for generic code.
+    if (ivdep) {
+        ++IVDepLoops;
         MDNode *m = MDNode::get(Lh->getContext(), ArrayRef<Metadata *>(LoopID));
-
-        // If ivdep is true we assume that there is no memory dependency between loop iterations
-        // This is a fairly strong assumption and does often not hold true for generic code.
-        if (ivdep) {
-            ++IVDepLoops;
-            // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop.
-            for (BasicBlock *BB : L->blocks()) {
-               for (Instruction &I : *BB) {
-                   if (I.mayReadOrWriteMemory()) {
-                       ++IVDepInstructions;
-                       I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m);
-                   }
-               }
+        // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop.
+        for (BasicBlock *BB : L.blocks()) {
+            for (Instruction &I : *BB) {
+                if (I.mayReadOrWriteMemory()) {
+                    ++IVDepInstructions;
+                    I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m);
+                }
             }
-            assert(L->isAnnotatedParallel());
         }
+        assert(L.isAnnotatedParallel());
+    }
 
-        if (simd) {
-            ++SimdLoops;
-            // Mark floating-point reductions as okay to reassociate/commute.
-            for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) {
-                if (PHINode *Phi = dyn_cast<PHINode>(I))
-                    enableUnsafeAlgebraIfReduction(Phi, L);
-                else
-                    break;
-            }
+    if (simd) {
+        ++SimdLoops;
+        // Mark floating-point reductions as okay to reassociate/commute.
+        for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) {
+            if (PHINode *Phi = dyn_cast<PHINode>(I))
+                enableUnsafeAlgebraIfReduction(Phi, L, ORE, SE);
+            else
+                break;
         }
 
-        Changed = true;
+        if (SE)
+#if JL_LLVM_VERSION >= 160000
+            SE->forgetLoopDispositions();
+#else
+            SE->forgetLoopDispositions(&L);
+#endif
     }
 
-    for (Instruction *I : ToDelete)
-        I->deleteValue();
-    marker->eraseFromParent();
-
-    assert(!verifyModule(M));
-    return Changed;
+#ifdef JL_VERIFY_PASSES
+    assert(!verifyLLVMIR(L));
+#endif
+    return true;
 }
 
 } // end anonymous namespace
@@ -241,77 +252,21 @@ static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Fu
 /// prevent SIMDization.
 
 
-PreservedAnalyses LowerSIMDLoop::run(Module &M, ModuleAnalysisManager &AM)
-{
-    Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker");
-
-    if (!loopinfo_marker)
-        return PreservedAnalyses::all();
-
-    FunctionAnalysisManager &FAM =
-      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+PreservedAnalyses LowerSIMDLoopPass::run(Loop &L, LoopAnalysisManager &AM,
+                          LoopStandardAnalysisResults &AR, LPMUpdater &U)
 
-    auto GetLI = [&FAM](Function &F) -> LoopInfo & {
-        return FAM.getResult<LoopAnalysis>(F);
-    };
-
-    if (markLoopInfo(M, loopinfo_marker, GetLI)) {
-        auto preserved = PreservedAnalyses::allInSet<CFGAnalyses>();
-        preserved.preserve<LoopAnalysis>();
+{
+    OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
+    if (processLoop(L, ORE, &AR.SE)) {
+#ifdef JL_DEBUG_BUILD
+        if (AR.MSSA)
+            AR.MSSA->verifyMemorySSA();
+#endif
+        auto preserved = getLoopPassPreservedAnalyses();
+        preserved.preserveSet<CFGAnalyses>();
+        preserved.preserve<MemorySSAAnalysis>();
         return preserved;
     }
 
     return PreservedAnalyses::all();
 }
-
-namespace {
-class LowerSIMDLoopLegacy : public ModulePass {
-    //LowerSIMDLoop Impl;
-
-public:
-  static char ID;
-
-  LowerSIMDLoopLegacy() : ModulePass(ID) {
-  }
-
-  bool runOnModule(Module &M) override {
-    bool Changed = false;
-
-    Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker");
-
-    auto GetLI = [this](Function &F) -> LoopInfo & {
-        return getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
-    };
-
-    if (loopinfo_marker)
-        Changed |= markLoopInfo(M, loopinfo_marker, GetLI);
-
-    return Changed;
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override
-  {
-      ModulePass::getAnalysisUsage(AU);
-      AU.addRequired<LoopInfoWrapperPass>();
-      AU.addPreserved<LoopInfoWrapperPass>();
-      AU.setPreservesCFG();
-  }
-};
-
-} // end anonymous namespace
-
-char LowerSIMDLoopLegacy::ID = 0;
-
-static RegisterPass<LowerSIMDLoopLegacy> X("LowerSIMDLoop", "LowerSIMDLoop Pass",
-                                     false /* Only looks at CFG */,
-                                     false /* Analysis Pass */);
-
-JL_DLLEXPORT Pass *createLowerSimdLoopPass()
-{
-    return new LowerSIMDLoopLegacy();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass_impl(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createLowerSimdLoopPass());
-}
diff --git a/src/llvm-version.h b/src/llvm-version.h
index 4e15e787b7de8..7b8dfbbae92d6 100644
--- a/src/llvm-version.h
+++ b/src/llvm-version.h
@@ -2,6 +2,7 @@
 
 #include <llvm/Config/llvm-config.h>
 #include "julia_assert.h"
+#include "platform.h"
 
 // The LLVM version used, JL_LLVM_VERSION, is represented as a 5-digit integer
 // of the form ABBCC, where A is the major version, B is minor, and C is patch.
@@ -9,11 +10,11 @@
 #define JL_LLVM_VERSION (LLVM_VERSION_MAJOR * 10000 + LLVM_VERSION_MINOR * 100 \
                         + LLVM_VERSION_PATCH)
 
-#if JL_LLVM_VERSION < 120000
-    #error Only LLVM versions >= 12.0.0 are supported by Julia
+#if JL_LLVM_VERSION < 140000
+    #error Only LLVM versions >= 14.0.0 are supported by Julia
 #endif
 
-#if JL_LLVM_VERSION >= 150000
+#if JL_LLVM_VERSION >= 160000
 #define JL_LLVM_OPAQUE_POINTERS 1
 #endif
 
diff --git a/src/llvm_api.cpp b/src/llvm_api.cpp
new file mode 100644
index 0000000000000..d56fb3a0497fa
--- /dev/null
+++ b/src/llvm_api.cpp
@@ -0,0 +1,166 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#undef DEBUG
+#include "llvm-version.h"
+#include "platform.h"
+
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+
+#include "jitlayers.h"
+#include "passes.h"
+
+#include <llvm-c/Core.h>
+#include <llvm-c/Error.h>
+#include <llvm-c/Orc.h>
+#include <llvm-c/OrcEE.h>
+#include <llvm-c/TargetMachine.h>
+#include <llvm-c/Types.h>
+#include <llvm/Support/CBindingWrapping.h>
+#include <llvm/Support/MemoryBuffer.h>
+
+namespace llvm {
+namespace orc {
+class OrcV2CAPIHelper {
+public:
+    using PoolEntry = orc::SymbolStringPtr::PoolEntry;
+    using PoolEntryPtr = orc::SymbolStringPtr::PoolEntryPtr;
+
+    // Move from SymbolStringPtr to PoolEntryPtr (no change in ref count).
+    static PoolEntryPtr moveFromSymbolStringPtr(SymbolStringPtr S)
+    {
+        PoolEntryPtr Result = nullptr;
+        std::swap(Result, S.S);
+        return Result;
+    }
+};
+} // namespace orc
+} // namespace llvm
+
+
+typedef struct JLOpaqueJuliaOJIT *JuliaOJITRef;
+typedef struct LLVMOrcOpaqueIRCompileLayer *LLVMOrcIRCompileLayerRef;
+
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JuliaOJIT, JuliaOJITRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::JITDylib, LLVMOrcJITDylibRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ExecutionSession, LLVMOrcExecutionSessionRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::OrcV2CAPIHelper::PoolEntry,
+                                   LLVMOrcSymbolStringPoolEntryRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::IRCompileLayer, LLVMOrcIRCompileLayerRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::MaterializationResponsibility,
+                                   LLVMOrcMaterializationResponsibilityRef)
+
+typedef struct LLVMOpaqueModulePassManager *LLVMModulePassManagerRef;
+typedef struct LLVMOpaqueFunctionPassManager *LLVMFunctionPassManagerRef;
+typedef struct LLVMOpaqueLoopPassManager *LLVMLoopPassManagerRef;
+
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::ModulePassManager, LLVMModulePassManagerRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::FunctionPassManager, LLVMFunctionPassManagerRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::LoopPassManager, LLVMLoopPassManagerRef)
+
+extern "C" {
+
+JL_DLLEXPORT_CODEGEN JuliaOJITRef JLJITGetJuliaOJIT_impl(void)
+{
+    return wrap(jl_ExecutionEngine);
+}
+
+JL_DLLEXPORT_CODEGEN LLVMOrcExecutionSessionRef
+JLJITGetLLVMOrcExecutionSession_impl(JuliaOJITRef JIT)
+{
+    return wrap(&unwrap(JIT)->getExecutionSession());
+}
+
+JL_DLLEXPORT_CODEGEN LLVMOrcJITDylibRef
+JLJITGetExternalJITDylib_impl(JuliaOJITRef JIT)
+{
+    return wrap(&unwrap(JIT)->getExternalJITDylib());
+}
+
+JL_DLLEXPORT_CODEGEN LLVMErrorRef JLJITAddObjectFile_impl(
+    JuliaOJITRef JIT, LLVMOrcJITDylibRef JD, LLVMMemoryBufferRef ObjBuffer)
+{
+    return wrap(unwrap(JIT)->addObjectFile(
+        *unwrap(JD), std::unique_ptr<MemoryBuffer>(unwrap(ObjBuffer))));
+}
+
+JL_DLLEXPORT_CODEGEN LLVMErrorRef JLJITAddLLVMIRModule_impl(
+    JuliaOJITRef JIT, LLVMOrcJITDylibRef JD, LLVMOrcThreadSafeModuleRef TSM)
+{
+    std::unique_ptr<orc::ThreadSafeModule> TmpTSM(unwrap(TSM));
+    return wrap(unwrap(JIT)->addExternalModule(*unwrap(JD), std::move(*TmpTSM)));
+}
+
+JL_DLLEXPORT_CODEGEN LLVMErrorRef
+JLJITLookup_impl(JuliaOJITRef JIT, LLVMOrcExecutorAddress *Result,
+                                   const char *Name, int ExternalJDOnly)
+{
+    auto Sym = unwrap(JIT)->findExternalJDSymbol(Name, ExternalJDOnly);
+    if (Sym) {
+        auto addr = Sym->getAddress();
+        *Result = orc::ExecutorAddr(addr).getValue();
+        return LLVMErrorSuccess;
+    }
+    else {
+        *Result = 0;
+        return wrap(Sym.takeError());
+    }
+}
+
+JL_DLLEXPORT_CODEGEN LLVMOrcSymbolStringPoolEntryRef
+JLJITMangleAndIntern_impl(JuliaOJITRef JIT,
+                                            const char *Name)
+{
+    return wrap(orc::OrcV2CAPIHelper::moveFromSymbolStringPtr(unwrap(JIT)->mangle(Name)));
+}
+
+JL_DLLEXPORT_CODEGEN const char *
+JLJITGetTripleString_impl(JuliaOJITRef JIT)
+{
+    return unwrap(JIT)->getTargetTriple().str().c_str();
+}
+
+JL_DLLEXPORT_CODEGEN const char
+JLJITGetGlobalPrefix_impl(JuliaOJITRef JIT)
+{
+    return unwrap(JIT)->getDataLayout().getGlobalPrefix();
+}
+
+JL_DLLEXPORT_CODEGEN const char *
+JLJITGetDataLayoutString_impl(JuliaOJITRef JIT)
+{
+    return unwrap(JIT)->getDataLayout().getStringRepresentation().c_str();
+}
+
+JL_DLLEXPORT_CODEGEN LLVMOrcIRCompileLayerRef
+JLJITGetIRCompileLayer_impl(JuliaOJITRef JIT)
+{
+    return wrap(&unwrap(JIT)->getIRCompileLayer());
+}
+
+#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \
+    JL_DLLEXPORT_CODEGEN void LLVMExtraMPMAdd##CLASS##_impl(LLVMModulePassManagerRef PM) \
+    { \
+        unwrap(PM)->addPass(CREATE_PASS); \
+    }
+#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \
+    JL_DLLEXPORT_CODEGEN void LLVMExtraFPMAdd##CLASS##_impl(LLVMFunctionPassManagerRef PM) \
+    { \
+        unwrap(PM)->addPass(CREATE_PASS); \
+    }
+#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \
+    JL_DLLEXPORT_CODEGEN void LLVMExtraLPMAdd##CLASS##_impl(LLVMLoopPassManagerRef PM) \
+    { \
+        unwrap(PM)->addPass(CREATE_PASS); \
+    }
+
+#include "llvm-julia-passes.inc"
+
+#undef MODULE_PASS
+#undef CGSCC_PASS
+#undef FUNCTION_PASS
+#undef LOOP_PASS
+
+} // extern "C"
diff --git a/src/llvmcalltest.cpp b/src/llvmcalltest.cpp
index b225111520c39..93c442445d79a 100644
--- a/src/llvmcalltest.cpp
+++ b/src/llvmcalltest.cpp
@@ -9,7 +9,7 @@
 #include <llvm/Support/raw_ostream.h>
 
 #include "julia.h"
-#include "codegen_shared.h"
+#include "llvm-codegen-shared.h"
 
 using namespace llvm;
 
@@ -17,11 +17,7 @@ using namespace llvm;
 #ifdef _OS_WINDOWS_
 #  define DLLEXPORT __declspec(dllexport)
 #else
-# if defined(_OS_LINUX_)
-#  define DLLEXPORT __attribute__ ((visibility("protected")))
-# else
 #  define DLLEXPORT __attribute__ ((visibility("default")))
-# endif
 #endif
 
 extern "C" {
diff --git a/src/mach_dyld_atfork.tbd b/src/mach_dyld_atfork.tbd
deleted file mode 100644
index 9a5d18099dbcf..0000000000000
--- a/src/mach_dyld_atfork.tbd
+++ /dev/null
@@ -1,25 +0,0 @@
---- !tapi-tbd
-# copied from XCode's libSystem.tbd (current-version: 1311)
-# to provide weak-linkage info for new symbols on old systems
-tbd-version:     4
-targets:         [ x86_64-macos, x86_64-maccatalyst, arm64-macos, arm64-maccatalyst,
-                   arm64e-macos, arm64e-maccatalyst ]
-uuids:
-  - target:          x86_64-macos
-    value:           AFE6C76A-B47A-35F5-91D0-4E9FC439E90D
-  - target:          x86_64-maccatalyst
-    value:           AFE6C76A-B47A-35F5-91D0-4E9FC439E90D
-  - target:          arm64-macos
-    value:           2EA09BDB-811B-33AA-BB58-4B53AA2DB522
-  - target:          arm64-maccatalyst
-    value:           2EA09BDB-811B-33AA-BB58-4B53AA2DB522
-  - target:          arm64e-macos
-    value:           09AB3723-C26D-3762-93BA-98E9C38B89C1
-  - target:          arm64e-maccatalyst
-    value:           09AB3723-C26D-3762-93BA-98E9C38B89C1
-install-name:    '/usr/lib/libSystem.B.dylib'
-exports:
-  - targets:         [ arm64-macos, arm64e-macos, x86_64-macos, x86_64-maccatalyst,
-                       arm64-maccatalyst, arm64e-maccatalyst ]
-    symbols:         [ __dyld_atfork_parent, __dyld_atfork_prepare ]
-...
diff --git a/src/macroexpand.scm b/src/macroexpand.scm
index 516dd9b29f354..424e921a35713 100644
--- a/src/macroexpand.scm
+++ b/src/macroexpand.scm
@@ -99,31 +99,32 @@
                               (vars  '()))
                      (if (null? binds)
                          (cons 'varlist vars)
-                         (cond
-                          ((or (symbol? (car binds)) (decl? (car binds)))
-                           ;; just symbol -> add local
-                           (loop (cdr binds)
-                                 (cons (decl-var (car binds)) vars)))
-                          ((and (length= (car binds) 3)
-                                (eq? (caar binds) '=))
-                           ;; some kind of assignment
-                           (cond
-                            ((or (symbol? (cadar binds))
-                                 (decl?   (cadar binds)))
-                             ;; a=b -> add argument
-                             (loop (cdr binds)
-                                   (cons (decl-var (cadar binds)) vars)))
-                            ((eventually-call? (cadar binds))
-                             ;; f()=c
-                             (let ((asgn (cadr (julia-expand0 (car binds) 'none 0))))
-                               (loop (cdr binds)
-                                     (cons (cadr asgn) vars))))
-                            ((and (pair? (cadar binds))
-                                  (eq? (caadar binds) 'tuple))
-                             (loop (cdr binds)
-                                   (append (map decl-var (lhs-vars (cadar binds))) vars)))
-                            (else '())))
-                          (else '())))))
+                         (let ((ux (unescape (car binds))))
+                              (cond
+                               ((or (symbol? ux) (decl? ux))
+                                ;; just symbol -> add local
+                                (loop (cdr binds)
+                                      (cons (let-decl-var ux) vars)))
+                               ((and (length= (car binds) 3)
+                                     (eq? (caar binds) '=))
+                                (set! ux (unescape (cadar binds)))
+                                ;; some kind of assignment
+                                (cond
+                                 ((or (symbol? ux) (decl? ux))
+                                  ;; a=b -> add argument
+                                  (loop (cdr binds)
+                                        (cons (let-decl-var ux) vars)))
+                                 ((eventually-call? (cadar binds))
+                                  ;; f()=c
+                                  (let ((name (assigned-name (cadar binds))))
+                                    (loop (cdr binds)
+                                          (cons name vars))))
+                                 ((and (pair? (cadar binds))
+                                       (eq? (caadar binds) 'tuple))
+                                  (loop (cdr binds)
+                                        (append (map let-decl-var (lhs-vars (cadar binds))) vars)))
+                                 (else '())))
+                               (else '()))))))
 
    ;; macro definition
    (pattern-lambda (macro (call name . argl) body)
@@ -180,9 +181,22 @@
 
 (define (unescape e)
   (if (and (pair? e) (eq? (car e) 'escape))
-      (cadr e)
+      (unescape (cadr e))
       e))
 
+(define (unescape-global-lhs e env m parent-scope inarg)
+  (cond ((not (pair? e)) e)
+        ((eq? (car e) 'escape) (unescape-global-lhs (cadr e) env m parent-scope inarg))
+        ((memq (car e) '(parameters tuple))
+         (list* (car e) (map (lambda (e)
+                          (unescape-global-lhs e env m parent-scope inarg))
+                        (cdr e))))
+        ((and (memq (car e) '(|::| kw)) (length= e 3))
+         (list (car e) (unescape-global-lhs (cadr e) env m parent-scope inarg)
+                       (resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))
+        (else
+         (resolve-expansion-vars-with-new-env e env m parent-scope inarg))))
+
 (define (typedef-expr-name e)
   (cond ((atom? e) e)
         ((or (eq? (car e) 'curly) (eq? (car e) '<:)) (typedef-expr-name (cadr e)))
@@ -194,7 +208,7 @@
         ((eq? (car e) 'curly) (cddr e))
         (else '())))
 
-(define (typevar-expr-name e) (car (analyze-typevar e)))
+(define (typevar-expr-name e) (unescape (car (analyze-typevar e))))
 
 ;; get the list of names from a list of `where` variable expressions
 (define (typevar-names lst)
@@ -263,13 +277,13 @@
       (list (cadr name))
       '()))
 
-;; resolve-expansion-vars-with-new-env, but turn on `inarg` once we get inside
-;; the formal argument list. `e` in general might be e.g. `(f{T}(x)::T) where T`,
+;; resolve-expansion-vars-with-new-env, but turn on `inarg` if we get inside
+;; a formal argument list. `e` in general might be e.g. `(f{T}(x)::T) where T`,
 ;; and we want `inarg` to be true for the `(x)` part.
-(define (resolve-in-function-lhs e env m parent-scope inarg)
-  (define (recur x) (resolve-in-function-lhs x env m parent-scope inarg))
+(define (resolve-in-lhs e env m parent-scope inarg)
+  (define (recur x) (resolve-in-lhs x env m parent-scope inarg))
   (define (other x) (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
-  (case (car e)
+  (case (and (pair? e) (car e))
     ((where) `(where ,(recur (cadr e)) ,@(map other (cddr e))))
     ((|::|)  `(|::| ,(recur (cadr e)) ,(other (caddr e))))
     ((call)  `(call ,(other (cadr e))
@@ -324,6 +338,25 @@
        (new-expansion-env-for x env outermost))
    m parent-scope inarg))
 
+(define (reescape ux x)
+  (if (and (pair? x) (eq? (car x) 'escape))
+    (reescape `(escape ,ux) (cadr x))
+    ux))
+
+;; type has special behavior: identifiers inside are
+;; field names, not expressions.
+(define (resolve-struct-field-expansion x env m parent-scope inarg)
+  (let ((ux (unescape x)))
+    (cond
+        ((atom? ux) ux)
+        ((and (pair? ux) (eq? (car ux) '|::|))
+         `(|::| ,(unescape (cadr ux))
+           ,(resolve-expansion-vars- (reescape (caddr ux) x) env m parent-scope inarg)))
+        ((and (pair? ux) (memq (car ux) '(const atomic)))
+         `(,(car ux) ,(resolve-struct-field-expansion (reescape (cadr ux) x) env m parent-scope inarg)))
+        (else
+         (resolve-expansion-vars-with-new-env x env m parent-scope inarg)))))
+
 (define (resolve-expansion-vars- e env m parent-scope inarg)
   (cond ((or (eq? e 'begin) (eq? e 'end) (eq? e 'ccall) (eq? e 'cglobal) (underscore-symbol? e))
          e)
@@ -344,53 +377,44 @@
                      (m (cadr scope))
                      (parent-scope (cdr parent-scope)))
                 (resolve-expansion-vars-with-new-env (cadr e) env m parent-scope inarg))))
-           ((global) (let ((arg (cadr e)))
-                       (cond ((symbol? arg) e)
-                             ((assignment? arg)
-                              `(global
-                                (= ,(unescape (cadr arg))
-                                   ,(resolve-expansion-vars-with-new-env (caddr arg) env m parent-scope inarg))))
-                             (else
-                              `(global ,(resolve-expansion-vars-with-new-env arg env m parent-scope inarg))))))
-           ((using import export meta line inbounds boundscheck loopinfo inline noinline) (map unescape e))
+           ((global)
+            `(global
+               ,@(map (lambda (arg)
+                       (if (assignment? arg)
+                           `(= ,(unescape-global-lhs (cadr arg) env m parent-scope inarg)
+                               ,(resolve-expansion-vars-with-new-env (caddr arg) env m parent-scope inarg))
+                           (unescape-global-lhs arg env m parent-scope inarg)))
+                      (cdr e))))
+           ((using import export meta line inbounds boundscheck loopinfo inline noinline purity) (map unescape e))
            ((macrocall) e) ; invalid syntax anyways, so just act like it's quoted.
            ((symboliclabel) e)
            ((symbolicgoto) e)
            ((struct)
             `(struct ,(cadr e) ,(resolve-expansion-vars- (caddr e) env m parent-scope inarg)
-                     ;; type has special behavior: identifiers inside are
-                     ;; field names, not expressions.
                      ,(map (lambda (x)
-                             (cond ((atom? x) x)
-                                   ((and (pair? x) (eq? (car x) '|::|))
-                                    `(|::| ,(cadr x)
-                                      ,(resolve-expansion-vars- (caddr x) env m parent-scope inarg)))
-                                   (else
-                                    (resolve-expansion-vars-with-new-env x env m parent-scope inarg))))
+                            (resolve-struct-field-expansion x env m parent-scope inarg))
                            (cadddr e))))
 
            ((parameters)
             (cons 'parameters
                   (map (lambda (x)
                          ;; `x` by itself after ; means `x=x`
-                         (let ((x (if (and (not inarg) (symbol? x))
-                                      `(kw ,x ,x)
-                                      x)))
+                         (let* ((ux (unescape x))
+                                (x (if (and (not inarg) (symbol? ux))
+                                       `(kw ,ux ,x)
+                                       x)))
                            (resolve-expansion-vars- x env m parent-scope #f)))
                        (cdr e))))
 
            ((->)
-            `(-> ,(resolve-in-function-lhs (tuple-wrap-arrow-sig (cadr e)) env m parent-scope inarg)
+            `(-> ,(resolve-in-lhs (tuple-wrap-arrow-sig (cadr e)) env m parent-scope inarg)
                  ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))
 
            ((= function)
-            (if (and (pair? (cadr e)) (function-def? e) (length> e 2))
-                ;; in (kw x 1) inside an arglist, the x isn't actually a kwarg
-                `(,(car e) ,(resolve-in-function-lhs (cadr e) env m parent-scope inarg)
-                  ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg))
-                `(,(car e) ,@(map (lambda (x)
-                                    (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
-                                  (cdr e)))))
+             `(,(car e) ,(resolve-in-lhs (cadr e) env m parent-scope inarg)
+                        ,@(map (lambda (x)
+                                   (resolve-expansion-vars-with-new-env x env m parent-scope inarg))
+                                 (cddr e))))
 
            ((kw)
             (cond
@@ -429,13 +453,14 @@
                                                               newenv m parent-scope inarg))
                                ;; expand initial values in old env
                                (resolve-expansion-vars- (caddr bind) env m parent-scope inarg))
-                              bind))
+                              (resolve-expansion-vars- bind newenv m parent-scope inarg)))
                         binds))
                  ,body)))
            ((hygienic-scope) ; TODO: move this lowering to resolve-scopes, instead of reimplementing it here badly
              (let ((parent-scope (cons (list env m) parent-scope))
                    (body (cadr e))
-                   (m (caddr e)))
+                   (m (caddr e))
+                   (lno  (cdddr e)))
               (resolve-expansion-vars-with-new-env body env m parent-scope inarg #t)))
            ((tuple)
             (cons (car e)
@@ -457,13 +482,14 @@
 (define (decl-var* e)
   (if (pair? e)
       (case (car e)
+        ((hygienic-scope) '())
         ((escape) '())
         ((call)   (decl-var* (cadr e)))
         ((=)      (decl-var* (cadr e)))
         ((curly)  (decl-var* (cadr e)))
         ((|::|)   (if (length= e 2) '() (decl-var* (cadr e))))
         ((where)  (decl-var* (cadr e)))
-        (else     (decl-var e)))
+        (else     e))
       e))
 
 (define (decl-vars* e)
@@ -471,6 +497,17 @@
       (apply append (map decl-vars* (cdr e)))
       (list (decl-var* e))))
 
+;; decl-var that can sort of handle scope hygiene, but very badly
+(define (let-decl-var e)
+  (if (pair? e)
+      (case (car e)
+        ((hygienic-scope) (let-decl-var (cadr e)))
+        ((escape) (let-decl-var (cadr e)))
+        ((|::|)   (if (length= e 2) '() (let-decl-var (cadr e))))
+        (else     e))
+      e))
+
+
 ;; count hygienic / escape pairs
 ;; and fold together a list resulting from applying the function to
 ;; any block at the same hygienic scope
@@ -561,7 +598,8 @@
         ((eq? (car e) 'module) e)
         ((eq? (car e) 'hygienic-scope)
          (let ((form (cadr e)) ;; form is the expression returned from expand-macros
-               (modu (caddr e))) ;; m is the macro's def module
+               (modu (caddr e)) ;; m is the macro's def module
+               (lno  (cdddr e))) ;; lno is (optionally) the line number node
            (resolve-expansion-vars form modu)))
         (else
          (map julia-expand-macroscopes- e))))
@@ -572,8 +610,9 @@
    ((eq? (car e) 'hygienic-scope)
     (let ((parent-scope (list relabels parent-scope))
           (body (cadr e))
-          (m (caddr e)))
-      `(hygienic-scope ,(rename-symbolic-labels- (cadr e) (table) parent-scope) ,m)))
+          (m (caddr e))
+          (lno (cdddr e)))
+      `(hygienic-scope ,(rename-symbolic-labels- (cadr e) (table) parent-scope) ,m ,@lno)))
    ((and (eq? (car e) 'escape) (not (null? parent-scope)))
     `(escape ,(apply rename-symbolic-labels- (cadr e) parent-scope)))
    ((or (eq? (car e) 'symbolicgoto) (eq? (car e) 'symboliclabel))
@@ -598,9 +637,5 @@
    (rename-symbolic-labels
     (julia-expand-quotes e))))
 
-(define (contains-macrocall e)
-  (and (pair? e)
-       (contains (lambda (e) (and (pair? e) (eq? (car e) 'macrocall))) e)))
-
 (define (julia-bq-macro x)
   (julia-bq-expand x 0))
diff --git a/src/method.c b/src/method.c
index 33abedcfdb62e..30bf9c5774f11 100644
--- a/src/method.c
+++ b/src/method.c
@@ -17,6 +17,8 @@ extern "C" {
 
 extern jl_value_t *jl_builtin_getfield;
 extern jl_value_t *jl_builtin_tuple;
+jl_methtable_t *jl_kwcall_mt;
+jl_method_t *jl_opaque_closure_method;
 
 jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
     int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva);
@@ -62,6 +64,21 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
         }
         return expr;
     }
+    else if (jl_is_enternode(expr)) {
+        jl_value_t *scope = jl_enternode_scope(expr);
+        if (scope) {
+            jl_value_t *val = resolve_globals(scope, module, sparam_vals, binding_effects, eager_resolve);
+            if (val != scope) {
+                intptr_t catch_dest = jl_enternode_catch_dest(expr);
+                JL_GC_PUSH1(&val);
+                expr = jl_new_struct_uninit(jl_enternode_type);
+                jl_enternode_catch_dest(expr) = catch_dest;
+                jl_enternode_scope(expr) = val;
+                JL_GC_POP();
+            }
+        }
+        return expr;
+    }
     else if (jl_is_gotoifnot(expr)) {
         jl_value_t *cond = resolve_globals(jl_gotoifnot_cond(expr), module, sparam_vals, binding_effects, eager_resolve);
         if (cond != jl_gotoifnot_cond(expr)) {
@@ -92,20 +109,22 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
             // ignore these
         }
         else {
-            size_t i = 0, nargs = jl_array_len(e->args);
+            size_t i = 0, nargs = jl_array_nrows(e->args);
             if (e->head == jl_opaque_closure_method_sym) {
                 if (nargs != 5) {
                     jl_error("opaque_closure_method: invalid syntax");
                 }
                 jl_value_t *name = jl_exprarg(e, 0);
-                jl_value_t *nargs = jl_exprarg(e, 1);
+                jl_value_t *oc_nargs = jl_exprarg(e, 1);
                 int isva = jl_exprarg(e, 2) == jl_true;
                 jl_value_t *functionloc = jl_exprarg(e, 3);
                 jl_value_t *ci = jl_exprarg(e, 4);
                 if (!jl_is_code_info(ci)) {
                     jl_error("opaque_closure_method: lambda should be a CodeInfo");
+                } else if (!jl_is_long(oc_nargs)) {
+                    jl_type_error("opaque_closure_method", (jl_value_t*)jl_long_type, oc_nargs);
                 }
-                jl_method_t *m = jl_make_opaque_closure_method(module, name, jl_unbox_long(nargs), functionloc, (jl_code_info_t*)ci, isva);
+                jl_method_t *m = jl_make_opaque_closure_method(module, name, jl_unbox_long(oc_nargs), functionloc, (jl_code_info_t*)ci, isva);
                 return (jl_value_t*)m;
             }
             if (e->head == jl_cfunction_sym) {
@@ -126,7 +145,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
                     }
                     JL_CATCH {
-                        if (jl_typeis(jl_current_exception(), jl_errorexception_type))
+                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
                             jl_error("could not evaluate cfunction return type (it might depend on a local variable)");
                         else
                             jl_rethrow();
@@ -138,7 +157,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
                     }
                     JL_CATCH {
-                        if (jl_typeis(jl_current_exception(), jl_errorexception_type))
+                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
                             jl_error("could not evaluate cfunction argument type (it might depend on a local variable)");
                         else
                             jl_rethrow();
@@ -159,7 +178,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
                     }
                     JL_CATCH {
-                        if (jl_typeis(jl_current_exception(), jl_errorexception_type))
+                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
                             jl_error("could not evaluate ccall return type (it might depend on a local variable)");
                         else
                             jl_rethrow();
@@ -171,7 +190,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
                     }
                     JL_CATCH {
-                        if (jl_typeis(jl_current_exception(), jl_errorexception_type))
+                        if (jl_typetagis(jl_current_exception(), jl_errorexception_type))
                             jl_error("could not evaluate ccall argument type (it might depend on a local variable)");
                         else
                             jl_rethrow();
@@ -188,7 +207,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                         jl_error("In ccall calling convention, expected two argument tuple or symbol.");
                     }
                     JL_TYPECHK(ccall method definition, symbol, jl_get_nth_field(cc, 0));
-                    JL_TYPECHK(ccall method definition, uint8, jl_get_nth_field(cc, 1));
+                    JL_TYPECHK(ccall method definition, uint16, jl_get_nth_field(cc, 1));
                 }
                 jl_exprargset(e, 0, resolve_globals(jl_exprarg(e, 0), module, sparam_vals, binding_effects, 1));
                 i++;
@@ -262,7 +281,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
 JL_DLLEXPORT void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
                               int binding_effects)
 {
-    size_t i, l = jl_array_len(stmts);
+    size_t i, l = jl_array_nrows(stmts);
     for (i = 0; i < l; i++) {
         jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
         jl_array_ptr_set(stmts, i, resolve_globals(stmt, m, sparam_vals, binding_effects, 0));
@@ -282,26 +301,26 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
     jl_expr_t *bodyex = (jl_expr_t*)jl_exprarg(ir, 2);
     jl_value_t *codelocs = jl_exprarg(ir, 3);
     li->linetable = jl_exprarg(ir, 4);
-    size_t nlocs = jl_array_len(codelocs);
+    size_t nlocs = jl_array_nrows(codelocs);
     li->codelocs = (jl_value_t*)jl_alloc_array_1d(jl_array_int32_type, nlocs);
     size_t j;
     for (j = 0; j < nlocs; j++) {
-        jl_arrayset((jl_array_t*)li->codelocs, jl_box_int32(jl_unbox_long(jl_arrayref((jl_array_t*)codelocs, j))),
-                    j);
+        jl_array_uint32_set((jl_array_t*)li->codelocs, j, jl_unbox_long(jl_array_ptr_ref((jl_array_t*)codelocs, j)));
     }
     assert(jl_is_expr(bodyex));
     jl_array_t *body = bodyex->args;
     li->code = body;
     jl_gc_wb(li, li->code);
-    size_t n = jl_array_len(body);
+    size_t n = jl_array_nrows(body);
     jl_value_t **bd = (jl_value_t**)jl_array_ptr_data((jl_array_t*)li->code);
-    li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, n);
+    li->ssaflags = jl_alloc_array_1d(jl_array_uint32_type, n);
     jl_gc_wb(li, li->ssaflags);
     int inbounds_depth = 0; // number of stacked inbounds
-    // isempty(inline_flags): no user annotation
-    // last(inline_flags) == 1: inline region
-    // last(inline_flags) == 0: noinline region
+    // isempty(inline_flags): no user callsite inline annotation
+    // last(inline_flags) == 1: callsite inline region
+    // last(inline_flags) == 0: callsite noinline region
     arraylist_t *inline_flags = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
+    arraylist_t *purity_exprs = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
     for (j = 0; j < n; j++) {
         jl_value_t *st = bd[j];
         int is_flag_stmt = 0;
@@ -311,24 +330,29 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
             jl_array_t *meta = ((jl_expr_t*)st)->args;
             for (k = 0; k < na; k++) {
                 jl_value_t *ma = jl_array_ptr_ref(meta, k);
-                if (ma == (jl_value_t*)jl_pure_sym)
-                    li->pure = 1;
-                else if (ma == (jl_value_t*)jl_inline_sym)
-                    li->inlineable = 1;
+                if (ma == (jl_value_t*)jl_inline_sym)
+                    li->inlining = 1;
+                else if (ma == (jl_value_t*)jl_noinline_sym)
+                    li->inlining = 2;
                 else if (ma == (jl_value_t*)jl_propagate_inbounds_sym)
                     li->propagate_inbounds = 1;
+                else if (ma == (jl_value_t*)jl_nospecializeinfer_sym)
+                    li->nospecializeinfer = 1;
                 else if (ma == (jl_value_t*)jl_aggressive_constprop_sym)
                     li->constprop = 1;
                 else if (ma == (jl_value_t*)jl_no_constprop_sym)
                     li->constprop = 2;
                 else if (jl_is_expr(ma) && ((jl_expr_t*)ma)->head == jl_purity_sym) {
-                    if (jl_expr_nargs(ma) == 6) {
+                    if (jl_expr_nargs(ma) == NUM_EFFECTS_OVERRIDES) {
                         li->purity.overrides.ipo_consistent = jl_unbox_bool(jl_exprarg(ma, 0));
                         li->purity.overrides.ipo_effect_free = jl_unbox_bool(jl_exprarg(ma, 1));
                         li->purity.overrides.ipo_nothrow = jl_unbox_bool(jl_exprarg(ma, 2));
-                        li->purity.overrides.ipo_terminates = jl_unbox_bool(jl_exprarg(ma, 3));
+                        li->purity.overrides.ipo_terminates_globally = jl_unbox_bool(jl_exprarg(ma, 3));
                         li->purity.overrides.ipo_terminates_locally = jl_unbox_bool(jl_exprarg(ma, 4));
                         li->purity.overrides.ipo_notaskstate = jl_unbox_bool(jl_exprarg(ma, 5));
+                        li->purity.overrides.ipo_inaccessiblememonly = jl_unbox_bool(jl_exprarg(ma, 6));
+                        li->purity.overrides.ipo_noub = jl_unbox_bool(jl_exprarg(ma, 7));
+                        li->purity.overrides.ipo_noub_if_noinbounds = jl_unbox_bool(jl_exprarg(ma, 8));
                     }
                 }
                 else
@@ -373,29 +397,53 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
             }
             bd[j] = jl_nothing;
         }
-        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_return_sym) {
-            jl_array_ptr_set(body, j, jl_new_struct(jl_returnnode_type, jl_exprarg(st, 0)));
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_purity_sym) {
+            is_flag_stmt = 1;
+            size_t na = jl_expr_nargs(st);
+            if (na == NUM_EFFECTS_OVERRIDES)
+                arraylist_push(purity_exprs, (void*)st);
+            else {
+                assert(na == 0);
+                arraylist_pop(purity_exprs);
+            }
+            bd[j] = jl_nothing;
         }
-
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_boundscheck_sym)
+            // Don't set IR_FLAG_INBOUNDS on boundscheck at the same level
+            is_flag_stmt = 1;
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_return_sym)
+            jl_array_ptr_set(body, j, jl_new_struct(jl_returnnode_type, jl_exprarg(st, 0)));
+        else if (jl_is_expr(st) && (((jl_expr_t*)st)->head == jl_foreigncall_sym || ((jl_expr_t*)st)->head == jl_cfunction_sym))
+            li->has_fcall = 1;
         if (is_flag_stmt)
-            jl_array_uint8_set(li->ssaflags, j, 0);
+            jl_array_uint32_set(li->ssaflags, j, 0);
         else {
-            uint8_t flag = 0;
+            uint32_t flag = 0;
             if (inbounds_depth > 0)
-                flag |= 1 << 0;
+                flag |= IR_FLAG_INBOUNDS;
             if (inline_flags->len > 0) {
-                void* inline_flag = inline_flags->items[inline_flags->len - 1];
+                void* inline_flag = inline_flags->items[inline_flags->len-1];
                 flag |= 1 << (inline_flag ? 1 : 2);
             }
-            jl_array_uint8_set(li->ssaflags, j, flag);
+            int n_purity_exprs = purity_exprs->len;
+            if (n_purity_exprs > 0) {
+                // apply all purity overrides
+                for (int i = 0; i < n_purity_exprs; i++) {
+                    void* purity_expr = purity_exprs->items[i];
+                    for (int j = 0; j < NUM_EFFECTS_OVERRIDES; j++) {
+                        flag |= jl_unbox_bool(jl_exprarg((jl_value_t*)purity_expr, j)) ? (1 << (NUM_IR_FLAGS+j)) : 0;
+                    }
+                }
+            }
+            jl_array_uint32_set(li->ssaflags, j, flag);
         }
     }
-    assert(inline_flags->len == 0); // malformed otherwise
-    arraylist_free(inline_flags);
-    free(inline_flags);
+    assert(inline_flags->len == 0 && purity_exprs->len == 0); // malformed otherwise
+    arraylist_free(inline_flags); arraylist_free(purity_exprs);
+    free(inline_flags); free(purity_exprs);
     jl_array_t *vinfo = (jl_array_t*)jl_exprarg(ir, 1);
     jl_array_t *vis = (jl_array_t*)jl_array_ptr_ref(vinfo, 0);
-    size_t nslots = jl_array_len(vis);
+    size_t nslots = jl_array_nrows(vis);
     jl_value_t *ssavalue_types = jl_array_ptr_ref(vinfo, 2);
     assert(jl_is_long(ssavalue_types));
     size_t nssavalue = jl_unbox_long(ssavalue_types);
@@ -432,19 +480,20 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
 JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
 {
     jl_task_t *ct = jl_current_task;
-    jl_method_instance_t *li =
+    jl_method_instance_t *mi =
         (jl_method_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_instance_t),
                                            jl_method_instance_type);
-    li->def.value = NULL;
-    li->specTypes = NULL;
-    li->sparam_vals = jl_emptysvec;
-    li->uninferred = NULL;
-    li->backedges = NULL;
-    li->callbacks = NULL;
-    jl_atomic_store_relaxed(&li->cache, NULL);
-    li->inInference = 0;
-    li->precompiled = 0;
-    return li;
+    mi->def.value = NULL;
+    mi->specTypes = NULL;
+    mi->sparam_vals = jl_emptysvec;
+    jl_atomic_store_relaxed(&mi->uninferred, NULL);
+    mi->backedges = NULL;
+    mi->callbacks = NULL;
+    jl_atomic_store_relaxed(&mi->cache, NULL);
+    mi->inInference = 0;
+    mi->cache_with_orig = 0;
+    jl_atomic_store_relaxed(&mi->precompiled, 0);
+    return mi;
 }
 
 JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void)
@@ -467,12 +516,14 @@ JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void)
     src->min_world = 1;
     src->max_world = ~(size_t)0;
     src->inferred = 0;
-    src->inlineable = 0;
     src->propagate_inbounds = 0;
-    src->pure = 0;
+    src->has_fcall = 0;
+    src->nospecializeinfer = 0;
     src->edges = jl_nothing;
     src->constprop = 0;
+    src->inlining = 0;
     src->purity.bits = 0;
+    src->inlining_cost = UINT16_MAX;
     return src;
 }
 
@@ -486,42 +537,46 @@ jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ir)
     return src;
 }
 
-void jl_add_function_name_to_lineinfo(jl_code_info_t *ci, jl_value_t *name)
+void jl_add_function_to_lineinfo(jl_code_info_t *ci, jl_value_t *func)
 {
+    // func may contain jl_symbol (function name), jl_method_t, or jl_method_instance_t
     jl_array_t *li = (jl_array_t*)ci->linetable;
-    size_t i, n = jl_array_len(li);
+    size_t i, n = jl_array_nrows(li);
     jl_value_t *rt = NULL, *lno = NULL, *inl = NULL;
     JL_GC_PUSH3(&rt, &lno, &inl);
     for (i = 0; i < n; i++) {
         jl_value_t *ln = jl_array_ptr_ref(li, i);
-        assert(jl_typeis(ln, jl_lineinfonode_type));
+        assert(jl_typetagis(ln, jl_lineinfonode_type));
         jl_value_t *mod = jl_fieldref_noalloc(ln, 0);
         jl_value_t *file = jl_fieldref_noalloc(ln, 2);
         lno = jl_fieldref(ln, 3);
         inl = jl_fieldref(ln, 4);
-        jl_value_t *ln_name = (jl_is_int32(inl) && jl_unbox_int32(inl) == 0) ? name : jl_fieldref_noalloc(ln, 1);
-        rt = jl_new_struct(jl_lineinfonode_type, mod, ln_name, file, lno, inl);
+        // respect a given linetable if available
+        jl_value_t *ln_func = jl_fieldref_noalloc(ln, 1);
+        if (jl_is_symbol(ln_func) && (jl_sym_t*)ln_func == jl_symbol("none") && jl_is_int32(inl) && jl_unbox_int32(inl) == 0)
+            ln_func = func;
+        rt = jl_new_struct(jl_lineinfonode_type, mod, ln_func, file, lno, inl);
         jl_array_ptr_set(li, i, rt);
     }
     JL_GC_POP();
 }
 
 // invoke (compiling if necessary) the jlcall function pointer for a method template
-STATIC_INLINE jl_value_t *jl_call_staged(jl_method_t *def, jl_value_t *generator, jl_svec_t *sparam_vals,
-                                         jl_value_t **args, uint32_t nargs)
+static jl_value_t *jl_call_staged(jl_method_t *def, jl_value_t *generator,
+        size_t world, jl_svec_t *sparam_vals, jl_value_t **args, uint32_t nargs)
 {
     size_t n_sparams = jl_svec_len(sparam_vals);
     jl_value_t **gargs;
-    size_t totargs = 1 + n_sparams + nargs + def->isva;
+    size_t totargs = 2 + n_sparams + def->nargs;
     JL_GC_PUSHARGS(gargs, totargs);
-    gargs[0] = generator;
-    memcpy(&gargs[1], jl_svec_data(sparam_vals), n_sparams * sizeof(void*));
-    memcpy(&gargs[1 + n_sparams], args, nargs * sizeof(void*));
-    if (def->isva) {
-        gargs[totargs-1] = jl_f_tuple(NULL, &gargs[1 + n_sparams + def->nargs - 1], nargs - (def->nargs - 1));
-        gargs[1 + n_sparams + def->nargs - 1] = gargs[totargs - 1];
-    }
-    jl_value_t *code = jl_apply(gargs, 1 + n_sparams + def->nargs);
+    gargs[0] = jl_box_ulong(world);
+    gargs[1] = jl_box_long(def->line);
+    gargs[1] = jl_new_struct(jl_linenumbernode_type, gargs[1], def->file);
+    memcpy(&gargs[2], jl_svec_data(sparam_vals), n_sparams * sizeof(void*));
+    memcpy(&gargs[2 + n_sparams], args, (def->nargs - def->isva) * sizeof(void*));
+    if (def->isva)
+        gargs[totargs - 1] = jl_f_tuple(NULL, &args[def->nargs - 1], nargs - def->nargs + 1);
+    jl_value_t *code = jl_apply_generic(generator, gargs, totargs);
     JL_GC_POP();
     return code;
 }
@@ -545,15 +600,18 @@ JL_DLLEXPORT jl_code_info_t *jl_expand_and_resolve(jl_value_t *ex, jl_module_t *
 
 // Return a newly allocated CodeInfo for the function signature
 // effectively described by the tuple (specTypes, env, Method) inside linfo
-JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
+JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world)
 {
-    if (linfo->uninferred) {
-        return (jl_code_info_t*)jl_copy_ast((jl_value_t*)linfo->uninferred);
+    jl_value_t *uninferred = jl_atomic_load_relaxed(&linfo->uninferred);
+    if (uninferred) {
+        assert(jl_is_code_info(uninferred)); // make sure this did not get `nothing` put here
+        return (jl_code_info_t*)jl_copy_ast((jl_value_t*)uninferred);
     }
 
-    JL_TIMING(STAGED_FUNCTION);
+    JL_TIMING(STAGED_FUNCTION, STAGED_FUNCTION);
     jl_value_t *tt = linfo->specTypes;
     jl_method_t *def = linfo->def.method;
+    jl_timing_show_method_instance(linfo, JL_TIMING_DEFAULT_BLOCK);
     jl_value_t *generator = def->generator;
     assert(generator != NULL);
     assert(jl_is_method(def));
@@ -567,13 +625,13 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
 
     JL_TRY {
         ct->ptls->in_pure_callback = 1;
-        // and the right world
         ct->world_age = def->primary_world;
 
         // invoke code generator
         jl_tupletype_t *ttdt = (jl_tupletype_t*)jl_unwrap_unionall(tt);
-        ex = jl_call_staged(def, generator, linfo->sparam_vals, jl_svec_data(ttdt->parameters), jl_nparams(ttdt));
+        ex = jl_call_staged(def, generator, world, linfo->sparam_vals, jl_svec_data(ttdt->parameters), jl_nparams(ttdt));
 
+        // do some post-processing
         if (jl_is_code_info(ex)) {
             func = (jl_code_info_t*)ex;
             jl_array_t *stmts = (jl_array_t*)func->code;
@@ -582,7 +640,6 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
         else {
             // Lower the user's expression and resolve references to the type parameters
             func = jl_expand_and_resolve(ex, def->module, linfo->sparam_vals);
-
             if (!jl_is_code_info(func)) {
                 if (jl_is_expr(func) && ((jl_expr_t*)func)->head == jl_error_sym) {
                     ct->ptls->in_pure_callback = 0;
@@ -591,14 +648,24 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
                 jl_error("The function body AST defined by this @generated function is not pure. This likely means it contains a closure, a comprehension or a generator.");
             }
         }
+        jl_add_function_to_lineinfo(func, (jl_value_t*)def->name);
 
         // If this generated function has an opaque closure, cache it for
         // correctness of method identity
-        for (int i = 0; i < jl_array_len(func->code); ++i) {
+        for (int i = 0; i < jl_array_nrows(func->code); ++i) {
             jl_value_t *stmt = jl_array_ptr_ref(func->code, i);
             if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == jl_new_opaque_closure_sym) {
-                linfo->uninferred = jl_copy_ast((jl_value_t*)func);
-                jl_gc_wb(linfo, linfo->uninferred);
+                if (jl_options.incremental && jl_generating_output())
+                    jl_error("Impossible to correctly handle OpaqueClosure inside @generated returned during precompile process.");
+                jl_value_t *uninferred = jl_copy_ast((jl_value_t*)func);
+                jl_value_t *old = NULL;
+                if (jl_atomic_cmpswap(&linfo->uninferred, &old, uninferred)) {
+                    jl_gc_wb(linfo, uninferred);
+                }
+                else {
+                    assert(jl_is_code_info(old));
+                    func = (jl_code_info_t*)old;
+                }
                 break;
             }
         }
@@ -606,7 +673,6 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
         ct->ptls->in_pure_callback = last_in;
         jl_lineno = last_lineno;
         ct->world_age = last_age;
-        jl_add_function_name_to_lineinfo(func, (jl_value_t*)def->name);
     }
     JL_CATCH {
         ct->ptls->in_pure_callback = last_in;
@@ -638,7 +704,7 @@ jl_method_instance_t *jl_get_specialized(jl_method_t *m, jl_value_t *types, jl_s
     return new_linfo;
 }
 
-static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
+JL_DLLEXPORT void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
 {
     uint8_t j;
     uint8_t called = 0;
@@ -658,17 +724,17 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
         }
     }
     m->called = called;
-    m->pure = src->pure;
+    m->nospecializeinfer = src->nospecializeinfer;
     m->constprop = src->constprop;
     m->purity.bits = src->purity.bits;
-    jl_add_function_name_to_lineinfo(src, (jl_value_t*)m->name);
+    jl_add_function_to_lineinfo(src, (jl_value_t*)m->name);
 
     jl_array_t *copy = NULL;
     jl_svec_t *sparam_vars = jl_outer_unionall_vars(m->sig);
     JL_GC_PUSH3(&copy, &sparam_vars, &src);
-    assert(jl_typeis(src->code, jl_array_any_type));
+    assert(jl_typetagis(src->code, jl_array_any_type));
     jl_array_t *stmts = (jl_array_t*)src->code;
-    size_t i, n = jl_array_len(stmts);
+    size_t i, n = jl_array_nrows(stmts);
     copy = jl_alloc_vec_any(n);
     for (i = 0; i < n; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts, i);
@@ -680,7 +746,7 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
                 size_t j;
                 for (j = 1; j < nargs; j++) {
                     jl_value_t *aj = jl_exprarg(st, j);
-                    if (!jl_is_slot(aj) && !jl_is_argument(aj))
+                    if (!jl_is_slotnumber(aj) && !jl_is_argument(aj))
                         continue;
                     int sn = (int)jl_slot_number(aj) - 2;
                     if (sn < 0) // @nospecialize on self is valid but currently ignored
@@ -701,23 +767,33 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
             else if (nargs >= 1 && jl_exprarg(st, 0) == (jl_value_t*)jl_specialize_sym) {
                 if (nargs == 1) // bare `@specialize` is special: it causes specialization on all args
                     m->nospecialize = 0;
+                for (j = 1; j < nargs; j++) {
+                    jl_value_t *aj = jl_exprarg(st, j);
+                    if (!jl_is_slotnumber(aj) && !jl_is_argument(aj))
+                        continue;
+                    int sn = (int)jl_slot_number(aj) - 2;
+                    if (sn < 0) // @specialize on self is valid but currently ignored
+                        continue;
+                    if (sn > (m->nargs - 2)) {
+                        jl_error("@specialize annotation applied to a non-argument");
+                    }
+                    if (sn >= sizeof(m->nospecialize) * 8) {
+                        jl_printf(JL_STDERR,
+                                  "WARNING: @specialize annotation only supported on the first %d arguments.\n",
+                                  (int)(sizeof(m->nospecialize) * 8));
+                        continue;
+                    }
+                    m->nospecialize &= ~(1 << sn);
+                }
                 st = jl_nothing;
             }
             else if (nargs == 2 && jl_exprarg(st, 0) == (jl_value_t*)jl_generated_sym) {
-                m->generator = NULL;
+                if (m->generator != NULL)
+                    jl_error("duplicate @generated function body");
                 jl_value_t *gexpr = jl_exprarg(st, 1);
-                if (jl_expr_nargs(gexpr) == 7) {
-                    // expects (new (core GeneratedFunctionStub) funcname argnames sp line file expandearly)
-                    jl_value_t *funcname = jl_exprarg(gexpr, 1);
-                    assert(jl_is_symbol(funcname));
-                    if (jl_get_global(m->module, (jl_sym_t*)funcname) != NULL) {
-                        m->generator = jl_toplevel_eval(m->module, gexpr);
-                        jl_gc_wb(m, m->generator);
-                    }
-                }
-                if (m->generator == NULL) {
-                    jl_error("invalid @generated function; try placing it in global scope");
-                }
+                // the frontend would put (new (core GeneratedFunctionStub) funcname argnames sp) here, for example
+                m->generator = jl_toplevel_eval(m->module, gexpr);
+                jl_gc_wb(m, m->generator);
                 st = jl_nothing;
             }
             else if (nargs == 1 && jl_exprarg(st, 0) == (jl_value_t*)jl_generated_only_sym) {
@@ -752,8 +828,8 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     jl_task_t *ct = jl_current_task;
     jl_method_t *m =
         (jl_method_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_t), jl_method_type);
-    jl_atomic_store_relaxed(&m->specializations, jl_emptysvec);
-    jl_atomic_store_relaxed(&m->speckeyset, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&m->specializations, (jl_value_t*)jl_emptysvec);
+    jl_atomic_store_relaxed(&m->speckeyset, (jl_genericmemory_t*)jl_an_empty_memory_any);
     m->sig = NULL;
     m->slot_syms = NULL;
     m->roots = NULL;
@@ -778,11 +854,57 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     m->primary_world = 1;
     m->deleted_world = ~(size_t)0;
     m->is_for_opaque_closure = 0;
+    m->nospecializeinfer = 0;
     m->constprop = 0;
-    JL_MUTEX_INIT(&m->writelock);
+    m->purity.bits = 0;
+    m->max_varargs = UINT8_MAX;
+    JL_MUTEX_INIT(&m->writelock, "method->writelock");
     return m;
 }
 
+// backedges ------------------------------------------------------------------
+
+// Use this in a `while` loop to iterate over the backedges in a MethodInstance.
+// `*invokesig` will be NULL if the call was made by ordinary dispatch, otherwise
+// it will be the signature supplied in an `invoke` call.
+// If you don't need `invokesig`, you can set it to NULL on input.
+// Initialize iteration with `i = 0`. Returns `i` for the next backedge to be extracted.
+int get_next_edge(jl_array_t *list, int i, jl_value_t** invokesig, jl_method_instance_t **caller) JL_NOTSAFEPOINT
+{
+    jl_value_t *item = jl_array_ptr_ref(list, i);
+    if (jl_is_method_instance(item)) {
+        // Not an `invoke` call, it's just the MethodInstance
+        if (invokesig != NULL)
+            *invokesig = NULL;
+        *caller = (jl_method_instance_t*)item;
+        return i + 1;
+    }
+    assert(jl_is_type(item));
+    // An `invoke` call, it's a (sig, MethodInstance) pair
+    if (invokesig != NULL)
+        *invokesig = item;
+    *caller = (jl_method_instance_t*)jl_array_ptr_ref(list, i + 1);
+    if (*caller)
+        assert(jl_is_method_instance(*caller));
+    return i + 2;
+}
+
+int set_next_edge(jl_array_t *list, int i, jl_value_t *invokesig, jl_method_instance_t *caller)
+{
+    if (invokesig)
+        jl_array_ptr_set(list, i++, invokesig);
+    jl_array_ptr_set(list, i++, caller);
+    return i;
+}
+
+void push_edge(jl_array_t *list, jl_value_t *invokesig, jl_method_instance_t *caller)
+{
+    if (invokesig)
+        jl_array_ptr_1d_push(list, invokesig);
+    jl_array_ptr_1d_push(list, (jl_value_t*)caller);
+    return;
+}
+
 // method definition ----------------------------------------------------------
 
 jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
@@ -814,53 +936,52 @@ jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name
 JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
                                                  jl_module_t *module,
                                                  _Atomic(jl_value_t*) *bp,
-                                                 jl_value_t *bp_owner,
                                                  jl_binding_t *bnd)
 {
     jl_value_t *gf = NULL;
 
     assert(name && bp);
     if (bnd && jl_atomic_load_relaxed(&bnd->value) != NULL && !bnd->constp)
-        jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(bnd->name));
+        jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
     gf = jl_atomic_load_relaxed(bp);
     if (gf != NULL) {
         if (!jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(gf)) && !jl_is_type(gf))
             jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
     }
     if (bnd)
-        bnd->constp = 1;
+        bnd->constp = 1; // XXX: use jl_declare_constant and jl_checked_assignment
     if (gf == NULL) {
         gf = (jl_value_t*)jl_new_generic_function(name, module);
         jl_atomic_store(bp, gf); // TODO: fix constp assignment data race
-        if (bp_owner) jl_gc_wb(bp_owner, gf);
+        if (bnd) jl_gc_wb(bnd, gf);
     }
     return gf;
 }
 
-static jl_methtable_t *first_methtable(jl_value_t *a JL_PROPAGATES_ROOT, int got_tuple1) JL_NOTSAFEPOINT
+static jl_methtable_t *nth_methtable(jl_value_t *a JL_PROPAGATES_ROOT, int n) JL_NOTSAFEPOINT
 {
     if (jl_is_datatype(a)) {
-        if (got_tuple1) {
+        if (n == 0) {
             jl_methtable_t *mt = ((jl_datatype_t*)a)->name->mt;
             if (mt != NULL)
                 return mt;
         }
-        if (jl_is_tuple_type(a)) {
-            if (jl_nparams(a) >= 1)
-                return first_methtable(jl_tparam0(a), 1);
+        else if (jl_is_tuple_type(a)) {
+            if (jl_nparams(a) >= n)
+                return nth_methtable(jl_tparam(a, n - 1), 0);
         }
     }
     else if (jl_is_typevar(a)) {
-        return first_methtable(((jl_tvar_t*)a)->ub, got_tuple1);
+        return nth_methtable(((jl_tvar_t*)a)->ub, n);
     }
     else if (jl_is_unionall(a)) {
-        return first_methtable(((jl_unionall_t*)a)->body, got_tuple1);
+        return nth_methtable(((jl_unionall_t*)a)->body, n);
     }
     else if (jl_is_uniontype(a)) {
         jl_uniontype_t *u = (jl_uniontype_t*)a;
-        jl_methtable_t *m1 = first_methtable(u->a, got_tuple1);
+        jl_methtable_t *m1 = nth_methtable(u->a, n);
         if ((jl_value_t*)m1 != jl_nothing) {
-            jl_methtable_t *m2 = first_methtable(u->b, got_tuple1);
+            jl_methtable_t *m2 = nth_methtable(u->b, n);
             if (m1 == m2)
                 return m1;
         }
@@ -871,18 +992,20 @@ static jl_methtable_t *first_methtable(jl_value_t *a JL_PROPAGATES_ROOT, int got
 // get the MethodTable for dispatch, or `nothing` if cannot be determined
 JL_DLLEXPORT jl_methtable_t *jl_method_table_for(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
-    return first_methtable(argtypes, 0);
+    return nth_methtable(argtypes, 1);
 }
 
-JL_DLLEXPORT jl_methtable_t *jl_method_get_table(jl_method_t *method JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+jl_methtable_t *jl_kwmethod_table_for(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
-    return method->external_mt ? (jl_methtable_t*)method->external_mt : jl_method_table_for(method->sig);
+    jl_methtable_t *kwmt = nth_methtable(argtypes, 3);
+    if ((jl_value_t*)kwmt == jl_nothing)
+        return NULL;
+    return kwmt;
 }
 
-// get the MethodTable implied by a single given type, or `nothing`
-JL_DLLEXPORT jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+JL_DLLEXPORT jl_methtable_t *jl_method_get_table(jl_method_t *method JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
-    return first_methtable(argt, 1);
+    return method->external_mt ? (jl_methtable_t*)method->external_mt : jl_method_table_for(method->sig);
 }
 
 jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
@@ -896,11 +1019,11 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
     jl_svec_t *atypes = (jl_svec_t*)jl_svecref(argdata, 0);
     jl_svec_t *tvars = (jl_svec_t*)jl_svecref(argdata, 1);
     jl_value_t *functionloc = jl_svecref(argdata, 2);
-    size_t nargs = jl_svec_len(atypes);
-    int isva = jl_is_vararg(jl_svecref(atypes, nargs - 1));
     assert(jl_is_svec(atypes));
-    assert(nargs > 0);
     assert(jl_is_svec(tvars));
+    size_t nargs = jl_svec_len(atypes);
+    assert(nargs > 0);
+    int isva = jl_is_vararg(jl_svecref(atypes, nargs - 1));
     if (!jl_is_type(jl_svecref(atypes, 0)) || (isva && nargs == 1))
         jl_error("function type in method definition is not a type");
     jl_sym_t *name;
@@ -909,13 +1032,9 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
     JL_GC_PUSH3(&f, &m, &argtype);
     size_t i, na = jl_svec_len(atypes);
 
-    argtype = (jl_value_t*)jl_apply_tuple_type(atypes);
-    for (i = jl_svec_len(tvars); i > 0; i--) {
-        jl_value_t *tv = jl_svecref(tvars, i - 1);
-        if (!jl_is_typevar(tv))
-            jl_type_error("method signature", (jl_value_t*)jl_tvar_type, tv);
-        argtype = jl_new_struct(jl_unionall_type, tv, argtype);
-    }
+    argtype = jl_apply_tuple_type(atypes, 1);
+    if (!jl_is_datatype(argtype))
+        jl_error("invalid type in method definition (Union{})");
 
     jl_methtable_t *external_mt = mt;
     if (!mt)
@@ -925,12 +1044,20 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
     if (mt->frozen)
         jl_error("cannot add methods to a builtin function");
 
+    assert(jl_is_linenode(functionloc));
+    jl_sym_t *file = (jl_sym_t*)jl_linenode_file(functionloc);
+    if (!jl_is_symbol(file))
+        file = jl_empty_sym;
+    int32_t line = jl_linenode_line(functionloc);
+
     // TODO: derive our debug name from the syntax instead of the type
-    name = mt->name;
-    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt || external_mt) {
+    jl_methtable_t *kwmt = mt == jl_kwcall_mt ? jl_kwmethod_table_for(argtype) : mt;
+    // if we have a kwcall, try to derive the name from the callee argument method table
+    name = (kwmt ? kwmt : mt)->name;
+    if (kwmt == jl_type_type_mt || kwmt == jl_nonfunction_mt || external_mt) {
         // our value for `name` is bad, try to guess what the syntax might have had,
         // like `jl_static_show_func_sig` might have come up with
-        jl_datatype_t *dt = jl_first_argument_datatype(argtype);
+        jl_datatype_t *dt = jl_nth_argument_datatype(argtype, mt == jl_kwcall_mt ? 3 : 1);
         if (dt != NULL) {
             name = dt->name->name;
             if (jl_is_type_type((jl_value_t*)dt)) {
@@ -941,67 +1068,83 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
             }
         }
     }
+
     if (!jl_is_code_info(f)) {
         // this occurs when there is a closure being added to an out-of-scope function
         // the user should only do this at the toplevel
         // the result is that the closure variables get interpolated directly into the IR
         f = jl_new_code_info_from_ir((jl_expr_t*)f);
     }
-    m = jl_new_method_uninit(module);
-    m->external_mt = (jl_value_t*)external_mt;
-    if (external_mt)
-        jl_gc_wb(m, external_mt);
-    m->sig = argtype;
-    m->name = name;
-    m->isva = isva;
-    m->nargs = nargs;
-    assert(jl_is_linenode(functionloc));
-    jl_value_t *file = jl_linenode_file(functionloc);
-    m->file = jl_is_symbol(file) ? (jl_sym_t*)file : jl_empty_sym;
-    m->line = jl_linenode_line(functionloc);
-    jl_method_set_source(m, f);
-
-    if (jl_has_free_typevars(argtype)) {
-        jl_exceptionf(jl_argumenterror_type,
-                      "method definition for %s at %s:%d has free type variables",
-                      jl_symbol_name(name),
-                      jl_symbol_name(m->file),
-                      m->line);
-    }
 
     for (i = 0; i < na; i++) {
         jl_value_t *elt = jl_svecref(atypes, i);
-        if (!jl_is_type(elt) && !jl_is_typevar(elt) && !jl_is_vararg(elt)) {
+        if (jl_is_vararg(elt)) {
+            if (i < na-1)
+                jl_exceptionf(jl_argumenterror_type,
+                              "Vararg on non-final argument in method definition for %s at %s:%d",
+                              jl_symbol_name(name),
+                              jl_symbol_name(file),
+                              line);
+            elt = jl_unwrap_vararg(elt);
+        }
+        int isvalid = (jl_is_type(elt) || jl_is_typevar(elt) || jl_is_vararg(elt)) && elt != jl_bottom_type;
+        if (!isvalid) {
             jl_sym_t *argname = (jl_sym_t*)jl_array_ptr_ref(f->slotnames, i);
             if (argname == jl_unused_sym)
                 jl_exceptionf(jl_argumenterror_type,
                               "invalid type for argument number %d in method definition for %s at %s:%d",
                               i,
                               jl_symbol_name(name),
-                              jl_symbol_name(m->file),
-                              m->line);
+                              jl_symbol_name(file),
+                              line);
             else
                 jl_exceptionf(jl_argumenterror_type,
                               "invalid type for argument %s in method definition for %s at %s:%d",
                               jl_symbol_name(argname),
                               jl_symbol_name(name),
-                              jl_symbol_name(m->file),
-                              m->line);
+                              jl_symbol_name(file),
+                              line);
         }
-        if (jl_is_vararg(elt) && i < na-1)
-            jl_exceptionf(jl_argumenterror_type,
-                          "Vararg on non-final argument in method definition for %s at %s:%d",
-                          jl_symbol_name(name),
-                          jl_symbol_name(m->file),
-                          m->line);
+    }
+    for (i = jl_svec_len(tvars); i > 0; i--) {
+        jl_value_t *tv = jl_svecref(tvars, i - 1);
+        if (!jl_is_typevar(tv))
+            jl_type_error("method signature", (jl_value_t*)jl_tvar_type, tv);
+        if (!jl_has_typevar(argtype, (jl_tvar_t*)tv)) // deprecate this to an error in v2
+            jl_printf(JL_STDERR,
+                      "WARNING: method definition for %s at %s:%d declares type variable %s but does not use it.\n",
+                      jl_symbol_name(name),
+                      jl_symbol_name(file),
+                      line,
+                      jl_symbol_name(((jl_tvar_t*)tv)->name));
+        argtype = jl_new_struct(jl_unionall_type, tv, argtype);
+    }
+    if (jl_has_free_typevars(argtype)) {
+        jl_exceptionf(jl_argumenterror_type,
+                      "method definition for %s at %s:%d has free type variables",
+                      jl_symbol_name(name),
+                      jl_symbol_name(file),
+                      line);
     }
 
+    m = jl_new_method_uninit(module);
+    m->external_mt = (jl_value_t*)external_mt;
+    if (external_mt)
+        jl_gc_wb(m, external_mt);
+    m->sig = argtype;
+    m->name = name;
+    m->isva = isva;
+    m->nargs = nargs;
+    m->file = file;
+    m->line = line;
+    jl_method_set_source(m, f);
+
 #ifdef RECORD_METHOD_ORDER
     if (jl_all_methods == NULL)
         jl_all_methods = jl_alloc_vec_any(0);
 #endif
     if (jl_all_methods != NULL) {
-        while (jl_array_len(jl_all_methods) < m->primary_world)
+        while (jl_array_nrows(jl_all_methods) < m->primary_world)
             jl_array_ptr_1d_push(jl_all_methods, NULL);
         jl_array_ptr_1d_push(jl_all_methods, (jl_value_t*)m);
     }
@@ -1016,28 +1159,70 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
 
 // root blocks
 
+// This section handles method roots. Roots are GC-preserved items needed to
+// represent lowered, type-inferred, and/or compiled code. These items are
+// stored in a flat list (`m.roots`), and during serialization and
+// deserialization of code we replace C-pointers to these items with a
+// relocatable reference. We use a bipartite reference, `(key, index)` pair,
+// where `key` identifies the module that added the root and `index` numbers
+// just those roots with the same `key`.
+//
+// During precompilation (serialization), we save roots that were added to
+// methods that are tagged with this package's module-key, even for "external"
+// methods not owned by a module currently being precompiled. During
+// deserialization, we load the new roots and append them to the method. When
+// code is deserialized (see ircode.c), we replace the bipartite reference with
+// the pointer to the memory address in the current session. The bipartite
+// reference allows us to cache both roots and references in precompilation .ji
+// files using a naming scheme that is independent of which packages are loaded
+// in arbitrary order.
+//
+// To track the module-of-origin for each root, methods also have a
+// `root_blocks` field that uses run-length encoding (RLE) storing `key` and the
+// (absolute) integer index within `roots` at which a block of roots with that
+// key begins. This makes it possible to look up an individual `(key, index)`
+// pair fairly efficiently. A given `key` may possess more than one block; the
+// `index` continues to increment regardless of block boundaries.
+//
+// Roots with `key = 0` are considered to be of unknown origin, and
+// CodeInstances referencing such roots will remain unserializable unless all
+// such roots were added at the time of system image creation. To track this
+// additional data, we use two fields:
+//
+// - methods have an `nroots_sysimg` field to count the number of roots defined
+//   at the time of writing the system image (such occur first in the list of
+//   roots). These are the cases with `key = 0` that do not prevent
+//   serialization.
+// - CodeInstances have a `relocatability` field which when 1 indicates that
+//   every root is "safe," meaning it was either added at sysimg creation or is
+//   tagged with a non-zero `key`. Even a single unsafe root will cause this to
+//   have value 0.
+
+// Get the key of the current (final) block of roots
 static uint64_t current_root_id(jl_array_t *root_blocks)
 {
     if (!root_blocks)
         return 0;
     assert(jl_is_array(root_blocks));
-    size_t nx2 = jl_array_len(root_blocks);
+    size_t nx2 = jl_array_nrows(root_blocks);
     if (nx2 == 0)
         return 0;
-    uint64_t *blocks = (uint64_t*)jl_array_data(root_blocks);
+    uint64_t *blocks = jl_array_data(root_blocks, uint64_t);
     return blocks[nx2-2];
 }
 
+// Add a new block of `len` roots with key `modid` (module id)
 static void add_root_block(jl_array_t *root_blocks, uint64_t modid, size_t len)
 {
     assert(jl_is_array(root_blocks));
     jl_array_grow_end(root_blocks, 2);
-    uint64_t *blocks = (uint64_t*)jl_array_data(root_blocks);
-    int nx2 = jl_array_len(root_blocks);
+    uint64_t *blocks = jl_array_data(root_blocks, uint64_t);
+    int nx2 = jl_array_nrows(root_blocks);
     blocks[nx2-2] = modid;
     blocks[nx2-1] = len;
 }
 
+// Allocate storage for roots
 static void prepare_method_for_roots(jl_method_t *m, uint64_t modid)
 {
     if (!m->roots) {
@@ -1050,29 +1235,31 @@ static void prepare_method_for_roots(jl_method_t *m, uint64_t modid)
     }
 }
 
+// Add a single root with owner `mod` to a method
 JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_t* root)
 {
     JL_GC_PUSH2(&m, &root);
     uint64_t modid = 0;
     if (mod) {
         assert(jl_is_module(mod));
-        modid = mod->build_id;
+        modid = mod->build_id.lo;
     }
     assert(jl_is_method(m));
     prepare_method_for_roots(m, modid);
     if (current_root_id(m->root_blocks) != modid)
-        add_root_block(m->root_blocks, modid, jl_array_len(m->roots));
+        add_root_block(m->root_blocks, modid, jl_array_nrows(m->roots));
     jl_array_ptr_1d_push(m->roots, root);
     JL_GC_POP();
 }
 
+// Add a list of roots with key `modid` to a method
 void jl_append_method_roots(jl_method_t *m, uint64_t modid, jl_array_t* roots)
 {
     JL_GC_PUSH2(&m, &roots);
     assert(jl_is_method(m));
     assert(jl_is_array(roots));
     prepare_method_for_roots(m, modid);
-    add_root_block(m->root_blocks, modid, jl_array_len(m->roots));
+    add_root_block(m->root_blocks, modid, jl_array_nrows(m->roots));
     jl_array_ptr_1d_append(m->roots, roots);
     JL_GC_POP();
 }
@@ -1086,7 +1273,7 @@ int get_root_reference(rle_reference *rr, jl_method_t *m, size_t i)
         rr->index = i;
         return i < m->nroots_sysimg;
     }
-    rle_index_to_reference(rr, i, (uint64_t*)jl_array_data(m->root_blocks), jl_array_len(m->root_blocks), 0);
+    rle_index_to_reference(rr, i, jl_array_data(m->root_blocks, uint64_t), jl_array_nrows(m->root_blocks), 0);
     if (rr->key)
         return 1;
     return i < m->nroots_sysimg;
@@ -1101,19 +1288,20 @@ jl_value_t *lookup_root(jl_method_t *m, uint64_t key, int index)
         return jl_array_ptr_ref(m->roots, index);
     }
     rle_reference rr = {key, index};
-    size_t i = rle_reference_to_index(&rr, (uint64_t*)jl_array_data(m->root_blocks), jl_array_len(m->root_blocks), 0);
+    size_t i = rle_reference_to_index(&rr, jl_array_data(m->root_blocks, uint64_t), jl_array_nrows(m->root_blocks), 0);
     return jl_array_ptr_ref(m->roots, i);
 }
 
+// Count the number of roots added by module with id `key`
 int nroots_with_key(jl_method_t *m, uint64_t key)
 {
     size_t nroots = 0;
     if (m->roots)
-        nroots = jl_array_len(m->roots);
+        nroots = jl_array_nrows(m->roots);
     if (!m->root_blocks)
         return key == 0 ? nroots : 0;
-    uint64_t *rletable = (uint64_t*)jl_array_data(m->root_blocks);
-    size_t j, nblocks2 = jl_array_len(m->root_blocks);
+    uint64_t *rletable = jl_array_data(m->root_blocks, uint64_t);
+    size_t j, nblocks2 = jl_array_nrows(m->root_blocks);
     int nwithkey = 0;
     for (j = 0; j < nblocks2; j+=2) {
         if (rletable[j] == key)
diff --git a/src/module.c b/src/module.c
index 50f3abaa36871..db1d86aa4a3ce 100644
--- a/src/module.c
+++ b/src/module.c
@@ -1,4 +1,5 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
+//
 
 /*
   modules and top-level bindings
@@ -11,47 +12,50 @@
 extern "C" {
 #endif
 
-JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, uint8_t default_names)
+JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, uint8_t default_names)
 {
     jl_task_t *ct = jl_current_task;
     const jl_uuid_t uuid_zero = {0, 0};
     jl_module_t *m = (jl_module_t*)jl_gc_alloc(ct->ptls, sizeof(jl_module_t),
                                                jl_module_type);
+    jl_set_typetagof(m, jl_module_tag, 0);
     assert(jl_is_symbol(name));
     m->name = name;
-    m->parent = NULL;
+    m->parent = parent;
     m->istopmod = 0;
     m->uuid = uuid_zero;
     static unsigned int mcounter; // simple counter backup, in case hrtime is not incrementing
-    m->build_id = jl_hrtime() + (++mcounter);
-    if (!m->build_id)
-        m->build_id++; // build id 0 is invalid
+    m->build_id.lo = jl_hrtime() + (++mcounter);
+    if (!m->build_id.lo)
+        m->build_id.lo++; // build id 0 is invalid
+    m->build_id.hi = ~(uint64_t)0;
     m->primary_world = 0;
-    m->counter = 1;
+    jl_atomic_store_relaxed(&m->counter, 1);
     m->nospecialize = 0;
     m->optlevel = -1;
     m->compile = -1;
     m->infer = -1;
     m->max_methods = -1;
-    JL_MUTEX_INIT(&m->lock);
-    htable_new(&m->bindings, 0);
+    m->hash = parent == NULL ? bitmix(name->hash, jl_module_type->hash) :
+        bitmix(name->hash, parent->hash);
+    JL_MUTEX_INIT(&m->lock, "module->lock");
+    jl_atomic_store_relaxed(&m->bindings, jl_emptysvec);
+    jl_atomic_store_relaxed(&m->bindingkeyset, (jl_genericmemory_t*)jl_an_empty_memory_any);
     arraylist_new(&m->usings, 0);
-    JL_GC_PUSH1(&m);
     if (jl_core_module && default_names) {
+        JL_GC_PUSH1(&m);
         jl_module_using(m, jl_core_module);
-    }
-    // export own name, so "using Foo" makes "Foo" itself visible
-    if (default_names) {
+        // export own name, so "using Foo" makes "Foo" itself visible
         jl_set_const(m, name, (jl_value_t*)m);
+        jl_module_public(m, name, 1);
+        JL_GC_POP();
     }
-    jl_module_export(m, name);
-    JL_GC_POP();
     return m;
 }
 
-JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name)
+JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name, jl_module_t *parent)
 {
-    return jl_new_module_(name, 1);
+    return jl_new_module_(name, parent, 1);
 }
 
 uint32_t jl_module_next_counter(jl_module_t *m)
@@ -62,10 +66,9 @@ uint32_t jl_module_next_counter(jl_module_t *m)
 JL_DLLEXPORT jl_value_t *jl_f_new_module(jl_sym_t *name, uint8_t std_imports, uint8_t default_names)
 {
     // TODO: should we prohibit this during incremental compilation?
-    jl_module_t *m = jl_new_module_(name, default_names);
+    // TODO: the parent module is a lie
+    jl_module_t *m = jl_new_module_(name, jl_main_module, default_names);
     JL_GC_PUSH1(&m);
-    m->parent = jl_main_module; // TODO: this is a lie
-    jl_gc_wb(m, m->parent);
     if (std_imports)
         jl_add_standard_imports(m);
     JL_GC_POP();
@@ -154,69 +157,88 @@ JL_DLLEXPORT uint8_t jl_istopmod(jl_module_t *mod)
     return mod->istopmod;
 }
 
-static jl_binding_t *new_binding(jl_sym_t *name)
+static jl_globalref_t *jl_new_globalref(jl_module_t *mod, jl_sym_t *name, jl_binding_t *b)
 {
     jl_task_t *ct = jl_current_task;
-    assert(jl_is_symbol(name));
-    jl_binding_t *b = (jl_binding_t*)jl_gc_alloc_buf(ct->ptls, sizeof(jl_binding_t));
-    b->name = name;
-    b->value = NULL;
-    b->owner = NULL;
-    b->ty = NULL;
+    jl_globalref_t *g = (jl_globalref_t*)jl_gc_alloc(ct->ptls, sizeof(jl_globalref_t), jl_globalref_type);
+    g->mod = mod;
+    jl_gc_wb(g, g->mod);
+    g->name = name;
+    g->binding = b;
+    return g;
+}
+
+static jl_binding_t *new_binding(jl_module_t *mod, jl_sym_t *name)
+{
+    jl_task_t *ct = jl_current_task;
+    assert(jl_is_module(mod) && jl_is_symbol(name));
+    jl_binding_t *b = (jl_binding_t*)jl_gc_alloc(ct->ptls, sizeof(jl_binding_t), jl_binding_type);
+    jl_atomic_store_relaxed(&b->value, NULL);
+    jl_atomic_store_relaxed(&b->owner, NULL);
+    jl_atomic_store_relaxed(&b->ty, NULL);
     b->globalref = NULL;
     b->constp = 0;
     b->exportp = 0;
+    b->publicp = 0;
     b->imported = 0;
     b->deprecated = 0;
+    b->usingfailed = 0;
+    b->padding = 0;
+    JL_GC_PUSH1(&b);
+    b->globalref = jl_new_globalref(mod, name, b);
+    JL_GC_POP();
     return b;
 }
 
-// get binding for assignment
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc)
-{
-    JL_LOCK(&m->lock);
-    jl_binding_t **bp = (jl_binding_t**)ptrhash_bp(&m->bindings, var);
-    jl_binding_t *b = *bp;
+extern jl_mutex_t jl_modules_mutex;
 
-    if (b != HT_NOTFOUND) {
-        if (b->owner != m) {
-            if (b->owner == NULL) {
-                b->owner = m;
-            }
-            else if (alloc) {
-                JL_UNLOCK(&m->lock);
-                jl_errorf("cannot assign a value to imported variable %s.%s from module %s",
-                          jl_symbol_name(b->owner->name), jl_symbol_name(var), jl_symbol_name(m->name));
+static void check_safe_newbinding(jl_module_t *m, jl_sym_t *var)
+{
+    if (jl_current_task->ptls->in_pure_callback)
+        jl_errorf("new globals cannot be created in a generated function");
+    if (jl_options.incremental && jl_generating_output()) {
+        JL_LOCK(&jl_modules_mutex);
+        int open = ptrhash_has(&jl_current_modules, (void*)m);
+        if (!open && jl_module_init_order != NULL) {
+            size_t i, l = jl_array_len(jl_module_init_order);
+            for (i = 0; i < l; i++) {
+                if (m == (jl_module_t*)jl_array_ptr_ref(jl_module_init_order, i)) {
+                    open = 1;
+                    break;
+                }
             }
         }
+        JL_UNLOCK(&jl_modules_mutex);
+        if (!open) {
+            jl_errorf("Creating a new global in closed module `%s` (`%s`) breaks incremental compilation "
+                      "because the side effects will not be permanent.",
+                      jl_symbol_name(m->name), jl_symbol_name(var));
+        }
     }
-    else if (alloc) {
-        b = new_binding(var);
-        b->owner = m;
-        *bp = b;
-        JL_GC_PROMISE_ROOTED(b);
-        jl_gc_wb_buf(m, b, sizeof(jl_binding_t));
-    }
-    else {
-        b = NULL;
-    }
-
-    JL_UNLOCK(&m->lock);
-    return b;
 }
 
-// Hash tables don't generically root their contents, but they do for bindings.
-// Express this to the analyzer.
-// NOTE: Must hold m->lock while calling these.
-#ifdef __clang_gcanalyzer__
-jl_binding_t *_jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT;
-#else
-static inline jl_binding_t *_jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT
-{
-    return (jl_binding_t*)ptrhash_get(&m->bindings, var);
-}
-#endif
+static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b, jl_module_t *m, jl_sym_t *var) JL_GLOBALLY_ROOTED;
 
+// get binding for assignment
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var)
+{
+    jl_binding_t *b = jl_get_module_binding(m, var, 1);
+    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
+    if (b2 != b) {
+        if (b2 == NULL)
+            check_safe_newbinding(m, var);
+        if (b2 != NULL || (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b)) {
+            jl_module_t *from = jl_binding_dbgmodule(b, m, var);
+            if (from == m)
+                jl_errorf("cannot assign a value to imported variable %s.%s",
+                          jl_symbol_name(from->name), jl_symbol_name(var));
+            else
+                jl_errorf("cannot assign a value to imported variable %s.%s from module %s",
+                          jl_symbol_name(from->name), jl_symbol_name(var), jl_symbol_name(m->name));
+        }
+    }
+    return b;
+}
 
 // return module of binding
 JL_DLLEXPORT jl_module_t *jl_get_module_of_binding(jl_module_t *m, jl_sym_t *var)
@@ -224,60 +246,45 @@ JL_DLLEXPORT jl_module_t *jl_get_module_of_binding(jl_module_t *m, jl_sym_t *var
     jl_binding_t *b = jl_get_binding(m, var);
     if (b == NULL)
         return NULL;
-    return b->owner;
+    return b->globalref->mod; // TODO: deprecate this?
 }
 
 // get binding for adding a method
-// like jl_get_binding_wr, but has different error paths
+// like jl_get_binding_wr, but has different error paths and messages
 JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK(&m->lock);
-    jl_binding_t **bp = (jl_binding_t**)ptrhash_bp(&m->bindings, var);
-    jl_binding_t *b = *bp;
-    JL_GC_PROMISE_ROOTED(b);
-
-    if (b != HT_NOTFOUND) {
-        if (b->owner != m) {
-            if (b->owner == NULL) {
-                b->owner = m;
+    jl_binding_t *b = jl_get_module_binding(m, var, 1);
+    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
+    if (b2 != b) {
+        if (b2 == NULL)
+            check_safe_newbinding(m, var);
+        if (b2 != NULL || (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b)) {
+            jl_value_t *f = jl_atomic_load_relaxed(&b2->value);
+            jl_module_t *from = jl_binding_dbgmodule(b, m, var);
+            if (f == NULL) {
+                // we must have implicitly imported this with using, so call jl_binding_dbgmodule to try to get the name of the module we got this from
+                jl_errorf("invalid method definition in %s: exported function %s.%s does not exist",
+                          jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
             }
-            else {
-                JL_UNLOCK(&m->lock);
-                jl_binding_t *b2 = jl_get_binding(b->owner, b->name);
-                if (b2 == NULL || b2->value == NULL)
-                    jl_errorf("invalid method definition: imported function %s.%s does not exist",
-                              jl_symbol_name(b->owner->name), jl_symbol_name(b->name));
-                // TODO: we might want to require explicitly importing types to add constructors
-                if (!b->imported && !jl_is_type(b2->value)) {
-                    jl_errorf("error in method definition: function %s.%s must be explicitly imported to be extended",
-                              jl_symbol_name(b->owner->name), jl_symbol_name(b->name));
-                }
-                return b2;
+            // TODO: we might want to require explicitly importing types to add constructors
+            //       or we might want to drop this error entirely
+            if (!b->imported && !(b2->constp && jl_is_type(f) && strcmp(jl_symbol_name(var), "=>") != 0)) {
+                jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended",
+                          jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
             }
+            return b2;
         }
     }
-    else {
-        b = new_binding(var);
-        b->owner = m;
-        *bp = b;
-        JL_GC_PROMISE_ROOTED(b);
-        jl_gc_wb_buf(m, b, sizeof(jl_binding_t));
-    }
-
-    JL_UNLOCK(&m->lock);
     return b;
 }
 
-static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname,
-                           int explici);
-
 typedef struct _modstack_t {
     jl_module_t *m;
     jl_sym_t *var;
     struct _modstack_t *prev;
 } modstack_t;
 
-static jl_binding_t *jl_get_binding_(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, modstack_t *st);
+static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, modstack_t *st);
 
 static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 
@@ -289,36 +296,49 @@ static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROO
 }
 #endif
 
+static int eq_bindings(jl_binding_t *owner, jl_binding_t *alias)
+{
+    assert(owner == jl_atomic_load_relaxed(&owner->owner));
+    if (owner == alias)
+        return 1;
+    alias = jl_atomic_load_relaxed(&alias->owner);
+    if (owner == alias)
+        return 1;
+    if (owner->constp && alias->constp && jl_atomic_load_relaxed(&owner->value) && jl_atomic_load_relaxed(&alias->value) == jl_atomic_load_relaxed(&owner->value))
+        return 1;
+    return 0;
+}
+
 // find a binding from a module's `usings` list
-// called while holding m->lock
-static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, modstack_t *st, int warn)
+static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, jl_module_t **from, modstack_t *st, int warn)
 {
     jl_binding_t *b = NULL;
     jl_module_t *owner = NULL;
-    for(int i=(int)m->usings.len-1; i >= 0; --i) {
+    JL_LOCK(&m->lock);
+    int i = (int)m->usings.len - 1;
+    JL_UNLOCK(&m->lock);
+    for (; i >= 0; --i) {
+        JL_LOCK(&m->lock);
         jl_module_t *imp = module_usings_getidx(m, i);
-        // TODO: make sure this can't deadlock
-        JL_LOCK(&imp->lock);
-        jl_binding_t *tempb = _jl_get_module_binding(imp, var);
-        JL_UNLOCK(&imp->lock);
-        if (tempb != HT_NOTFOUND && tempb->exportp) {
-            tempb = jl_get_binding_(imp, var, st);
-            if (tempb == NULL || tempb->owner == NULL)
+        JL_UNLOCK(&m->lock);
+        jl_binding_t *tempb = jl_get_module_binding(imp, var, 0);
+        if (tempb != NULL && tempb->exportp) {
+            tempb = jl_resolve_owner(NULL, imp, var, st); // find the owner for tempb
+            if (tempb == NULL)
                 // couldn't resolve; try next using (see issue #6105)
                 continue;
-            if (owner != NULL && tempb->owner != b->owner &&
-                !tempb->deprecated && !b->deprecated &&
-                !(tempb->constp && tempb->value && b->constp && b->value == tempb->value)) {
+            assert(jl_atomic_load_relaxed(&tempb->owner) == tempb);
+            if (b != NULL && !tempb->deprecated && !b->deprecated && !eq_bindings(tempb, b)) {
                 if (warn) {
-                    // mark this binding resolved (by creating it or setting the owner), to avoid repeating the warning
-                    (void)jl_get_binding_wr(m, var, 1);
-                    JL_UNLOCK(&m->lock);
+                    // set usingfailed=1 to avoid repeating this warning
+                    // the owner will still be NULL, so it can be later imported or defined
+                    tempb = jl_get_module_binding(m, var, 1);
+                    tempb->usingfailed = 1;
                     jl_printf(JL_STDERR,
                               "WARNING: both %s and %s export \"%s\"; uses of it in module %s must be qualified\n",
                               jl_symbol_name(owner->name),
                               jl_symbol_name(imp->name), jl_symbol_name(var),
                               jl_symbol_name(m->name));
-                    JL_LOCK(&m->lock);
                 }
                 return NULL;
             }
@@ -328,128 +348,196 @@ static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl
             }
         }
     }
+    *from = owner;
     return b;
 }
 
+// for error message printing: look up the module that exported a binding to m as var
+// this might not be the same as the owner of the binding, since the binding itself may itself have been imported from elsewhere
+static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b, jl_module_t *m, jl_sym_t *var)
+{
+    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
+    if (b2 != b && !b->imported) {
+        // for implicitly imported globals, try to re-resolve it to find the module we got it from most directly
+        jl_module_t *from = NULL;
+        b = using_resolve_binding(m, var, &from, NULL, 0);
+        if (b) {
+            if (b2 == NULL || jl_atomic_load_relaxed(&b->owner) == jl_atomic_load_relaxed(&b2->owner))
+                return from;
+            // if we did not find it (or accidentally found a different one), ignore this
+        }
+    }
+    return m;
+}
+
+static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t *b);
+
 // get binding for reading. might return NULL for unbound.
-static jl_binding_t *jl_get_binding_(jl_module_t *m, jl_sym_t *var, modstack_t *st)
+static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *m, jl_sym_t *var, modstack_t *st)
 {
-    modstack_t top = { m, var, st };
-    modstack_t *tmp = st;
-    while (tmp != NULL) {
-        if (tmp->m == m && tmp->var == var) {
-            // import cycle without finding actual location
+    if (b == NULL)
+        b = jl_get_module_binding(m, var, 1);
+    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
+    if (b2 == NULL) {
+        if (b->usingfailed)
             return NULL;
+        modstack_t top = { m, var, st };
+        modstack_t *tmp = st;
+        for (; tmp != NULL; tmp = tmp->prev) {
+            if (tmp->m == m && tmp->var == var) {
+                // import cycle without finding actual location
+                return NULL;
+            }
         }
-        tmp = tmp->prev;
-    }
-    JL_LOCK(&m->lock);
-    jl_binding_t *b = _jl_get_module_binding(m, var);
-    if (b == HT_NOTFOUND || b->owner == NULL) {
-        b = using_resolve_binding(m, var, &top, 1);
-        JL_UNLOCK(&m->lock);
-        if (b != NULL) {
-            // do a full import to prevent the result of this lookup
-            // from changing, for example if this var is assigned to
-            // later.
-            module_import_(m, b->owner, b->name, var, 0);
-            return b;
+        jl_module_t *from = NULL; // for error message printing
+        b2 = using_resolve_binding(m, var, &from, &top, 1);
+        if (b2 == NULL)
+            return NULL;
+        assert(from);
+        JL_GC_PROMISE_ROOTED(from); // gc-analysis does not understand output parameters
+        if (b2->deprecated) {
+            if (jl_atomic_load_relaxed(&b2->value) == jl_nothing) {
+                // silently skip importing deprecated values assigned to nothing (to allow later mutation)
+                return NULL;
+            }
+        }
+        // do a full import to prevent the result of this lookup from
+        // changing, for example if this var is assigned to later.
+        jl_binding_t *owner = NULL;
+        if (!jl_atomic_cmpswap(&b->owner, &owner, b2)) {
+            // concurrent import
+            return owner;
+        }
+        if (b2->deprecated) {
+            b->deprecated = 1; // we will warn about this below, but we might want to warn at the use sites too
+            if (m != jl_main_module && m != jl_base_module &&
+                jl_options.depwarn != JL_OPTIONS_DEPWARN_OFF) {
+                /* with #22763, external packages wanting to replace
+                   deprecated Base bindings should simply export the new
+                   binding */
+                jl_printf(JL_STDERR,
+                          "WARNING: using deprecated binding %s.%s in %s.\n",
+                          jl_symbol_name(from->name), jl_symbol_name(var),
+                          jl_symbol_name(m->name));
+                jl_binding_dep_message(from, var, b2);
+            }
         }
-        return NULL;
     }
-    JL_UNLOCK(&m->lock);
-    if (b->owner != m || b->name != var)
-        return jl_get_binding_(b->owner, b->name, &top);
-    return b;
+    assert(jl_atomic_load_relaxed(&b2->owner) == b2);
+    return b2;
 }
 
-// get owner of binding when accessing m.var, without resolving the binding
-JL_DLLEXPORT jl_value_t *jl_binding_owner(jl_module_t *m, jl_sym_t *var)
+// get the current likely owner of binding when accessing m.var, without resolving the binding (it may change later)
+JL_DLLEXPORT jl_binding_t *jl_binding_owner(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK(&m->lock);
-    jl_binding_t *b = (jl_binding_t*)ptrhash_get(&m->bindings, var);
-    if (b == HT_NOTFOUND || b->owner == NULL)
-        b = using_resolve_binding(m, var, NULL, 0);
-    JL_UNLOCK(&m->lock);
-    if (b == NULL || b->owner == NULL)
-        return jl_nothing;
-    return (jl_value_t*)b->owner;
+    jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    jl_module_t *from = m;
+    if (b == NULL || (!b->usingfailed && jl_atomic_load_relaxed(&b->owner) == NULL))
+        b = using_resolve_binding(m, var, &from, NULL, 0);
+    else
+        b = jl_atomic_load_relaxed(&b->owner);
+    return b;
 }
 
 // get type of binding m.var, without resolving the binding
-JL_DLLEXPORT jl_value_t *jl_binding_type(jl_module_t *m, jl_sym_t *var)
+JL_DLLEXPORT jl_value_t *jl_get_binding_type(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK(&m->lock);
-    jl_binding_t *b = (jl_binding_t*)ptrhash_get(&m->bindings, var);
-    if (b == HT_NOTFOUND || b->owner == NULL)
-        b = using_resolve_binding(m, var, NULL, 0);
-    JL_UNLOCK(&m->lock);
+    jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    if (b == NULL)
+        return jl_nothing;
+    b = jl_atomic_load_relaxed(&b->owner);
     if (b == NULL)
         return jl_nothing;
     jl_value_t *ty = jl_atomic_load_relaxed(&b->ty);
     return ty ? ty : jl_nothing;
 }
 
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr_or_error(jl_module_t *m, jl_sym_t *var)
-{
-    return jl_get_binding_wr(m, var, 1);
-}
-
 JL_DLLEXPORT jl_binding_t *jl_get_binding(jl_module_t *m, jl_sym_t *var)
 {
-    return jl_get_binding_(m, var, NULL);
+    return jl_resolve_owner(NULL, m, var, NULL);
 }
 
 JL_DLLEXPORT jl_binding_t *jl_get_binding_or_error(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_binding(m, var);
     if (b == NULL)
-        jl_undefined_var_error(var);
+        jl_undefined_var_error(var, (jl_value_t*)m);
+    // XXX: this only considers if the original is deprecated, not the binding in m
     if (b->deprecated)
-        jl_binding_deprecation_warning(m, b);
+        jl_binding_deprecation_warning(m, var, b);
     return b;
 }
 
 JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK(&m->lock);
-    jl_binding_t *b = (jl_binding_t*)ptrhash_get(&m->bindings, var);
-    if (b == HT_NOTFOUND) {
-        JL_UNLOCK(&m->lock);
-        return jl_new_struct(jl_globalref_type, m, var);
-    }
-    jl_value_t *globalref = jl_atomic_load_relaxed(&b->globalref);
-    if (globalref == NULL) {
-        jl_value_t *newref = jl_new_struct(jl_globalref_type, m, var);
-        if (jl_atomic_cmpswap_relaxed(&b->globalref, &globalref, newref)) {
-            JL_GC_PROMISE_ROOTED(newref);
-            globalref = newref;
-            jl_gc_wb(m, globalref);
-        }
-    }
-    JL_UNLOCK(&m->lock); // may GC
-    return globalref;
+    jl_binding_t *b = jl_get_module_binding(m, var, 1);
+    jl_globalref_t *globalref = b->globalref;
+    assert(globalref != NULL);
+    return (jl_value_t*)globalref;
 }
 
-static int eq_bindings(jl_binding_t *a, jl_binding_t *b)
+// does module m explicitly import s?
+JL_DLLEXPORT int jl_is_imported(jl_module_t *m, jl_sym_t *var)
 {
-    if (a==b) return 1;
-    if (a->name == b->name && a->owner == b->owner) return 1;
-    if (a->constp && a->value && b->constp && b->value == a->value) return 1;
-    return 0;
+    jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    return b && b->imported;
 }
 
-// does module m explicitly import s?
-JL_DLLEXPORT int jl_is_imported(jl_module_t *m, jl_sym_t *s)
+extern const char *jl_filename;
+extern int jl_lineno;
+
+static char const dep_message_prefix[] = "_dep_message_";
+
+static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t *b)
 {
-    JL_LOCK(&m->lock);
-    jl_binding_t *b = (jl_binding_t*)ptrhash_get(&m->bindings, s);
-    JL_UNLOCK(&m->lock);
-    return (b != HT_NOTFOUND && b->imported);
+    size_t prefix_len = strlen(dep_message_prefix);
+    size_t name_len = strlen(jl_symbol_name(name));
+    char *dep_binding_name = (char*)alloca(prefix_len+name_len+1);
+    memcpy(dep_binding_name, dep_message_prefix, prefix_len);
+    memcpy(dep_binding_name + prefix_len, jl_symbol_name(name), name_len);
+    dep_binding_name[prefix_len+name_len] = '\0';
+    jl_binding_t *dep_message_binding = jl_get_binding(m, jl_symbol(dep_binding_name));
+    jl_value_t *dep_message = NULL;
+    if (dep_message_binding != NULL)
+        dep_message = jl_atomic_load_relaxed(&dep_message_binding->value);
+    JL_GC_PUSH1(&dep_message);
+    if (dep_message != NULL) {
+        if (jl_is_string(dep_message)) {
+            jl_uv_puts(JL_STDERR, jl_string_data(dep_message), jl_string_len(dep_message));
+        }
+        else {
+            jl_static_show(JL_STDERR, dep_message);
+        }
+    }
+    else {
+        jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+        dep_message = v; // use as gc-root
+        if (v) {
+            if (jl_is_type(v) || jl_is_module(v)) {
+                jl_printf(JL_STDERR, ", use ");
+                jl_static_show(JL_STDERR, v);
+                jl_printf(JL_STDERR, " instead.");
+            }
+            else {
+                jl_methtable_t *mt = jl_gf_mtable(v);
+                if (mt != NULL) {
+                    jl_printf(JL_STDERR, ", use ");
+                    if (mt->module != jl_core_module) {
+                        jl_static_show(JL_STDERR, (jl_value_t*)mt->module);
+                        jl_printf(JL_STDERR, ".");
+                    }
+                    jl_printf(JL_STDERR, "%s", jl_symbol_name(mt->name));
+                    jl_printf(JL_STDERR, " instead.");
+                }
+            }
+        }
+    }
+    jl_printf(JL_STDERR, "\n");
+    JL_GC_POP();
 }
 
 // NOTE: we use explici since explicit is a C++ keyword
-static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname, int explici)
+static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *asname, jl_sym_t *s, int explici)
 {
     jl_binding_t *b = jl_get_binding(from, s);
     if (b == NULL) {
@@ -459,8 +547,10 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_s
                   jl_symbol_name(to->name));
     }
     else {
+        assert(jl_atomic_load_relaxed(&b->owner) == b);
         if (b->deprecated) {
-            if (b->value == jl_nothing) {
+            if (jl_atomic_load_relaxed(&b->value) == jl_nothing) {
+                // silently skip importing deprecated values assigned to nothing (to allow later mutation)
                 return;
             }
             else if (to != jl_main_module && to != jl_base_module &&
@@ -469,78 +559,45 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_s
                    deprecated Base bindings should simply export the new
                    binding */
                 jl_printf(JL_STDERR,
-                          "WARNING: importing deprecated binding %s.%s into %s.\n",
+                          "WARNING: importing deprecated binding %s.%s into %s%s%s.\n",
                           jl_symbol_name(from->name), jl_symbol_name(s),
-                          jl_symbol_name(to->name));
+                          jl_symbol_name(to->name),
+                          asname == s ? "" : " as ",
+                          asname == s ? "" : jl_symbol_name(asname));
+                jl_binding_dep_message(from, s, b);
             }
         }
 
-        JL_LOCK(&to->lock);
-        jl_binding_t **bp = (jl_binding_t**)ptrhash_bp(&to->bindings, asname);
-        jl_binding_t *bto = *bp;
-        if (bto != HT_NOTFOUND) {
-            if (bto == b) {
-                // importing a binding on top of itself. harmless.
+        jl_binding_t *bto = jl_get_module_binding(to, asname, 1);
+        if (bto == b) {
+            // importing a binding on top of itself. harmless.
+            return;
+        }
+        jl_binding_t *ownerto = NULL;
+        if (jl_atomic_cmpswap(&bto->owner, &ownerto, b)) {
+            bto->imported |= (explici != 0);
+            bto->deprecated |= b->deprecated; // we already warned about this above, but we might want to warn at the use sites too
+        }
+        else {
+            if (eq_bindings(b, bto)) {
+                // already imported
+                bto->imported |= (explici != 0);
             }
-            else if (bto->name != s) {
-                JL_UNLOCK(&to->lock);
+            else if (ownerto != bto) {
+                // already imported from somewhere else
                 jl_printf(JL_STDERR,
                           "WARNING: ignoring conflicting import of %s.%s into %s\n",
                           jl_symbol_name(from->name), jl_symbol_name(s),
                           jl_symbol_name(to->name));
-                return;
-            }
-            else if (bto->owner == b->owner) {
-                // already imported
-                bto->imported = (explici!=0);
-            }
-            else if (bto->owner != to && bto->owner != NULL) {
-                // already imported from somewhere else
-                jl_binding_t *bval = jl_get_binding(to, asname);
-                if (bval->constp && bval->value && b->constp && b->value == bval->value) {
-                    // equivalent binding
-                    bto->imported = (explici!=0);
-                    JL_UNLOCK(&to->lock);
-                }
-                else {
-                    JL_UNLOCK(&to->lock);
-                    jl_printf(JL_STDERR,
-                              "WARNING: ignoring conflicting import of %s.%s into %s\n",
-                              jl_symbol_name(from->name), jl_symbol_name(s),
-                              jl_symbol_name(to->name));
-                }
-                return;
-            }
-            else if (bto->constp || bto->value) {
-                // conflict with name owned by destination module
-                assert(bto->owner == to);
-                if (bto->constp && bto->value && b->constp && b->value == bto->value) {
-                    // equivalent binding
-                    JL_UNLOCK(&to->lock);
-                }
-                else {
-                    JL_UNLOCK(&to->lock);
-                    jl_printf(JL_STDERR,
-                              "WARNING: import of %s.%s into %s conflicts with an existing identifier; ignored.\n",
-                              jl_symbol_name(from->name), jl_symbol_name(s),
-                              jl_symbol_name(to->name));
-                }
-                return;
             }
             else {
-                bto->owner = b->owner;
-                bto->imported = (explici!=0);
+                // conflict with name owned by destination module
+                jl_printf(JL_STDERR,
+                          "WARNING: import of %s.%s into %s conflicts with an existing identifier; ignored.\n",
+                          jl_symbol_name(from->name), jl_symbol_name(s),
+                          jl_symbol_name(to->name));
             }
         }
-        else {
-            jl_binding_t *nb = new_binding(b->name);
-            nb->owner = b->owner;
-            nb->imported = (explici!=0);
-            nb->deprecated = b->deprecated;
-            *bp = nb;
-            jl_gc_wb_buf(to, nb, sizeof(jl_binding_t));
-        }
-        JL_UNLOCK(&to->lock);
     }
 }
 
@@ -551,7 +608,7 @@ JL_DLLEXPORT void jl_module_import(jl_module_t *to, jl_module_t *from, jl_sym_t
 
 JL_DLLEXPORT void jl_module_import_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname)
 {
-    module_import_(to, from, s, asname, 1);
+    module_import_(to, from, asname, s, 1);
 }
 
 JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s)
@@ -561,133 +618,210 @@ JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s)
 
 JL_DLLEXPORT void jl_module_use_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname)
 {
-    module_import_(to, from, s, asname, 0);
+    module_import_(to, from, asname, s, 0);
 }
 
+
 JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from)
 {
     if (to == from)
         return;
     JL_LOCK(&to->lock);
-    for(size_t i=0; i < to->usings.len; i++) {
+    for (size_t i = 0; i < to->usings.len; i++) {
         if (from == to->usings.items[i]) {
             JL_UNLOCK(&to->lock);
             return;
         }
     }
-    // TODO: make sure this can't deadlock
-    JL_LOCK(&from->lock);
+    arraylist_push(&to->usings, from);
+    jl_gc_wb(to, from);
+    JL_UNLOCK(&to->lock);
+
     // print a warning if something visible via this "using" conflicts with
     // an existing identifier. note that an identifier added later may still
     // silently override a "using" name. see issue #2054.
-    void **table = from->bindings.table;
-    for(size_t i=1; i < from->bindings.size; i+=2) {
-        if (table[i] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
-            if (b->exportp && (b->owner==from || b->imported)) {
-                jl_sym_t *var = (jl_sym_t*)table[i-1];
-                jl_binding_t **tobp = (jl_binding_t**)ptrhash_bp(&to->bindings, var);
-                if (*tobp != HT_NOTFOUND && (*tobp)->owner != NULL &&
-                    // don't warn for conflicts with the module name itself.
-                    // see issue #4715
-                    var != to->name &&
-                    !eq_bindings(jl_get_binding(to,var), b)) {
-                    // TODO: not ideal to print this while holding module locks
-                    jl_printf(JL_STDERR,
-                              "WARNING: using %s.%s in module %s conflicts with an existing identifier.\n",
-                              jl_symbol_name(from->name), jl_symbol_name(var),
-                              jl_symbol_name(to->name));
-                }
+    jl_svec_t *table = jl_atomic_load_relaxed(&from->bindings);
+    for (size_t i = 0; i < jl_svec_len(table); i++) {
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+        if ((void*)b == jl_nothing)
+            break;
+        if (b->exportp && (jl_atomic_load_relaxed(&b->owner) == b || b->imported)) {
+            jl_sym_t *var = b->globalref->name;
+            jl_binding_t *tob = jl_get_module_binding(to, var, 0);
+            if (tob && jl_atomic_load_relaxed(&tob->owner) != NULL &&
+                // don't warn for conflicts with the module name itself.
+                // see issue #4715
+                var != to->name &&
+                !eq_bindings(jl_atomic_load_relaxed(&tob->owner), b)) {
+                jl_printf(JL_STDERR,
+                          "WARNING: using %s.%s in module %s conflicts with an existing identifier.\n",
+                          jl_symbol_name(from->name), jl_symbol_name(var),
+                          jl_symbol_name(to->name));
             }
         }
+        table = jl_atomic_load_relaxed(&from->bindings);
     }
-    JL_UNLOCK(&from->lock);
-
-    arraylist_push(&to->usings, from);
-    jl_gc_wb(to, from);
-    JL_UNLOCK(&to->lock);
 }
 
-JL_DLLEXPORT void jl_module_export(jl_module_t *from, jl_sym_t *s)
+JL_DLLEXPORT void jl_module_public(jl_module_t *from, jl_sym_t *s, int exported)
 {
-    JL_LOCK(&from->lock);
-    jl_binding_t **bp = (jl_binding_t**)ptrhash_bp(&from->bindings, s);
-    if (*bp == HT_NOTFOUND) {
-        jl_binding_t *b = new_binding(s);
-        // don't yet know who the owner is
-        b->owner = NULL;
-        *bp = b;
-        jl_gc_wb_buf(from, b, sizeof(jl_binding_t));
-    }
-    assert(*bp != HT_NOTFOUND);
-    (*bp)->exportp = 1;
-    JL_UNLOCK(&from->lock);
+    jl_binding_t *b = jl_get_module_binding(from, s, 1);
+    b->publicp = 1;
+    b->exportp = exported;
 }
 
 JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_binding(m, var);
-    return b && (b->value != NULL);
+    return b && (jl_atomic_load_relaxed(&b->value) != NULL);
 }
 
 JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK(&m->lock);
-    jl_binding_t *b = (jl_binding_t*)ptrhash_get(&m->bindings, var);
-    JL_UNLOCK(&m->lock);
-    return b != HT_NOTFOUND && (b->exportp || b->owner==m);
+    jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    return b && (b->exportp || jl_atomic_load_relaxed(&b->owner) == b);
 }
 
 JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK(&m->lock);
-    jl_binding_t *b = _jl_get_module_binding(m, var);
-    JL_UNLOCK(&m->lock);
-    return b != HT_NOTFOUND && b->exportp;
+    jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    return b && b->exportp;
+}
+
+JL_DLLEXPORT int jl_module_public_p(jl_module_t *m, jl_sym_t *var)
+{
+    jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    return b && b->publicp;
 }
 
 JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK(&m->lock);
-    jl_binding_t *b = _jl_get_module_binding(m, var);
-    JL_UNLOCK(&m->lock);
-    return b != HT_NOTFOUND && b->owner != NULL;
+    jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    return b && jl_atomic_load_relaxed(&b->owner) != NULL;
 }
 
-JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var)
+static uint_t bindingkey_hash(size_t idx, jl_value_t *data)
 {
-    JL_LOCK(&m->lock);
-    jl_binding_t *b = _jl_get_module_binding(m, var);
-    JL_UNLOCK(&m->lock);
-    return b == HT_NOTFOUND ? NULL : b;
+    jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx);
+    jl_sym_t *var = b->globalref->name;
+    return var->hash;
+}
+
+static int bindingkey_eq(size_t idx, const void *var, jl_value_t *data, uint_t hv)
+{
+    jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx);
+    jl_sym_t *name = b->globalref->name;
+    return var == name;
+}
+
+JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m, jl_sym_t *var, int alloc)
+{
+    uint_t hv = var->hash;
+    for (int locked = 0; ; locked++) {
+        jl_genericmemory_t *bindingkeyset = jl_atomic_load_acquire(&m->bindingkeyset);
+        jl_svec_t *bindings = jl_atomic_load_relaxed(&m->bindings);
+        ssize_t idx = jl_smallintset_lookup(bindingkeyset, bindingkey_eq, var, (jl_value_t*)bindings, hv, 0); // acquire
+        if (idx != -1) {
+            jl_binding_t *b = (jl_binding_t*)jl_svecref(bindings, idx); // relaxed
+            if (locked)
+                JL_UNLOCK(&m->lock);
+            return b;
+        }
+        if (!alloc) {
+            return NULL;
+        }
+        else if (!locked) {
+            JL_LOCK(&m->lock);
+        }
+        else {
+            size_t i, cl = jl_svec_len(bindings);
+            for (i = cl; i > 0; i--) {
+                jl_value_t *b = jl_svecref(bindings, i - 1);
+                if (b != jl_nothing)
+                    break;
+            }
+            if (i == cl) {
+                size_t ncl = cl < 8 ? 8 : (cl*3)>>1; // grow 50%
+                jl_svec_t *nc = jl_alloc_svec_uninit(ncl);
+                if (i > 0)
+                    memcpy((char*)jl_svec_data(nc), jl_svec_data(bindings), sizeof(void*) * i);
+                for (size_t j = i; j < ncl; j++)
+                    jl_svec_data(nc)[j] = jl_nothing;
+                jl_atomic_store_release(&m->bindings, nc);
+                jl_gc_wb(m, nc);
+                bindings = nc;
+            }
+            jl_binding_t *b = new_binding(m, var);
+            assert(jl_svecref(bindings, i) == jl_nothing);
+            jl_svecset(bindings, i, b); // relaxed
+            jl_smallintset_insert(&m->bindingkeyset, (jl_value_t*)m, bindingkey_hash, i, (jl_value_t*)bindings); // release
+            JL_UNLOCK(&m->lock);
+            return b;
+        }
+    }
+}
+
+
+JL_DLLEXPORT jl_value_t *jl_get_globalref_value(jl_globalref_t *gr)
+{
+    jl_binding_t *b = gr->binding;
+    b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
+    // ignores b->deprecated
+    return b == NULL ? NULL : jl_atomic_load_relaxed(&b->value);
 }
 
 JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_binding(m, var);
-    if (b == NULL) return NULL;
-    if (b->deprecated) jl_binding_deprecation_warning(m, b);
-    return b->value;
+    if (b == NULL)
+        return NULL;
+    // XXX: this only considers if the original is deprecated, not the binding in m
+    if (b->deprecated)
+        jl_binding_deprecation_warning(m, var, b);
+    return jl_atomic_load_relaxed(&b->value);
+}
+
+JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
+{
+    jl_binding_t *bp = jl_get_binding_wr(m, var);
+    jl_checked_assignment(bp, m, var, val);
 }
 
 JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
 {
-    jl_binding_t *bp = jl_get_binding_wr(m, var, 1);
-    if (bp->value == NULL) {
+    // this function is mostly only used during initialization, so the data races here are not too important to us
+    jl_binding_t *bp = jl_get_module_binding(m, var, 1);
+    jl_binding_t *b2 = NULL;
+    if (!jl_atomic_cmpswap(&bp->owner, &b2, bp) && b2 != bp)
+        jl_errorf("invalid redefinition of constant %s", jl_symbol_name(var));
+    if (jl_atomic_load_relaxed(&bp->value) == NULL) {
+        jl_value_t *old_ty = NULL;
+        jl_atomic_cmpswap_relaxed(&bp->ty, &old_ty, (jl_value_t*)jl_any_type);
         uint8_t constp = 0;
         // if (jl_atomic_cmpswap(&bp->constp, &constp, 1)) {
         if (constp = bp->constp, bp->constp = 1, constp == 0) {
             jl_value_t *old = NULL;
             if (jl_atomic_cmpswap(&bp->value, &old, val)) {
-                jl_gc_wb_binding(bp, val);
+                jl_gc_wb(bp, val);
                 return;
             }
         }
-        jl_value_t *old_ty = NULL;
-        jl_atomic_cmpswap_relaxed(&bp->ty, &old_ty, (jl_value_t*)jl_any_type);
     }
-    jl_errorf("invalid redefinition of constant %s",
-              jl_symbol_name(bp->name));
+    jl_errorf("invalid redefinition of constant %s", jl_symbol_name(var));
+}
+
+JL_DLLEXPORT int jl_globalref_is_const(jl_globalref_t *gr)
+{
+    jl_binding_t *b = gr->binding;
+    b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
+    return b && b->constp;
+}
+
+JL_DLLEXPORT int jl_globalref_boundp(jl_globalref_t *gr)
+{
+    jl_binding_t *b = gr->binding;
+    b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
+    return b && jl_atomic_load_relaxed(&b->value) != NULL;
 }
 
 JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var)
@@ -700,6 +834,7 @@ JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var)
 //   0=not deprecated, 1=renamed, 2=moved to another package
 JL_DLLEXPORT void jl_deprecate_binding(jl_module_t *m, jl_sym_t *var, int flag)
 {
+    // XXX: this deprecates the original value, which might be imported from elsewhere
     jl_binding_t *b = jl_get_binding(m, var);
     if (b) b->deprecated = flag;
 }
@@ -707,29 +842,14 @@ JL_DLLEXPORT void jl_deprecate_binding(jl_module_t *m, jl_sym_t *var, int flag)
 JL_DLLEXPORT int jl_is_binding_deprecated(jl_module_t *m, jl_sym_t *var)
 {
     if (jl_binding_resolved_p(m, var)) {
+        // XXX: this only considers if the original is deprecated, not this precise binding
         jl_binding_t *b = jl_get_binding(m, var);
         return b && b->deprecated;
     }
     return 0;
 }
 
-extern const char *jl_filename;
-extern int jl_lineno;
-
-static char const dep_message_prefix[] = "_dep_message_";
-
-static jl_binding_t *jl_get_dep_message_binding(jl_module_t *m, jl_binding_t *deprecated_binding)
-{
-    size_t prefix_len = strlen(dep_message_prefix);
-    size_t name_len = strlen(jl_symbol_name(deprecated_binding->name));
-    char *dep_binding_name = (char*)alloca(prefix_len+name_len+1);
-    memcpy(dep_binding_name, dep_message_prefix, prefix_len);
-    memcpy(dep_binding_name + prefix_len, jl_symbol_name(deprecated_binding->name), name_len);
-    dep_binding_name[prefix_len+name_len] = '\0';
-    return jl_get_binding(m, jl_symbol(dep_binding_name));
-}
-
-void jl_binding_deprecation_warning(jl_module_t *m, jl_binding_t *b)
+void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *s, jl_binding_t *b)
 {
     // Only print a warning for deprecated == 1 (renamed).
     // For deprecated == 2 (moved to a package) the binding is to a function
@@ -737,123 +857,79 @@ void jl_binding_deprecation_warning(jl_module_t *m, jl_binding_t *b)
     if (b->deprecated == 1 && jl_options.depwarn) {
         if (jl_options.depwarn != JL_OPTIONS_DEPWARN_ERROR)
             jl_printf(JL_STDERR, "WARNING: ");
-        jl_binding_t *dep_message_binding = NULL;
-        if (b->owner) {
-            jl_printf(JL_STDERR, "%s.%s is deprecated",
-                      jl_symbol_name(b->owner->name), jl_symbol_name(b->name));
-            dep_message_binding = jl_get_dep_message_binding(b->owner, b);
-        }
-        else {
-            jl_printf(JL_STDERR, "%s is deprecated", jl_symbol_name(b->name));
-        }
-
-        if (dep_message_binding && dep_message_binding->value) {
-            if (jl_isa(dep_message_binding->value, (jl_value_t*)jl_string_type)) {
-                jl_uv_puts(JL_STDERR, jl_string_data(dep_message_binding->value),
-                    jl_string_len(dep_message_binding->value));
-            }
-            else {
-                jl_static_show(JL_STDERR, dep_message_binding->value);
-            }
-        }
-        else {
-            jl_value_t *v = b->value;
-            if (v) {
-                if (jl_is_type(v) || jl_is_module(v)) {
-                    jl_printf(JL_STDERR, ", use ");
-                    jl_static_show(JL_STDERR, v);
-                    jl_printf(JL_STDERR, " instead.");
-                }
-                else {
-                    jl_methtable_t *mt = jl_gf_mtable(v);
-                    if (mt != NULL && (mt->defs != jl_nothing ||
-                                       jl_isa(v, (jl_value_t*)jl_builtin_type))) {
-                        jl_printf(JL_STDERR, ", use ");
-                        if (mt->module != jl_core_module) {
-                            jl_static_show(JL_STDERR, (jl_value_t*)mt->module);
-                            jl_printf(JL_STDERR, ".");
-                        }
-                        jl_printf(JL_STDERR, "%s", jl_symbol_name(mt->name));
-                        jl_printf(JL_STDERR, " instead.");
-                    }
-                }
-            }
-        }
-        jl_printf(JL_STDERR, "\n");
+        assert(jl_atomic_load_relaxed(&b->owner) == b);
+        jl_printf(JL_STDERR, "%s.%s is deprecated",
+                  jl_symbol_name(m->name), jl_symbol_name(s));
+        jl_binding_dep_message(m, s, b);
 
         if (jl_options.depwarn != JL_OPTIONS_DEPWARN_ERROR) {
-            if (jl_lineno == 0) {
-                jl_printf(JL_STDERR, " in module %s\n", jl_symbol_name(m->name));
-            }
-            else {
+            if (jl_lineno != 0) {
                 jl_printf(JL_STDERR, "  likely near %s:%d\n", jl_filename, jl_lineno);
             }
         }
 
         if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR) {
-            if (b->owner)
-                jl_errorf("deprecated binding: %s.%s",
-                          jl_symbol_name(b->owner->name),
-                          jl_symbol_name(b->name));
-            else
-                jl_errorf("deprecated binding: %s", jl_symbol_name(b->name));
+            jl_errorf("use of deprecated variable: %s.%s",
+                      jl_symbol_name(m->name),
+                      jl_symbol_name(s));
         }
     }
 }
 
-JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_value_t *rhs)
+JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs)
 {
     jl_value_t *old_ty = NULL;
     if (!jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type)) {
         if (old_ty != (jl_value_t*)jl_any_type && jl_typeof(rhs) != old_ty) {
-            JL_GC_PUSH1(&rhs);
+            JL_GC_PUSH1(&rhs); // callee-rooted
             if (!jl_isa(rhs, old_ty))
-                jl_errorf("cannot assign an incompatible value to the global %s.",
-                          jl_symbol_name(b->name));
+                jl_errorf("cannot assign an incompatible value to the global %s.%s.",
+                          jl_symbol_name(mod->name), jl_symbol_name(var));
             JL_GC_POP();
         }
     }
     if (b->constp) {
         jl_value_t *old = NULL;
         if (jl_atomic_cmpswap(&b->value, &old, rhs)) {
-            jl_gc_wb_binding(b, rhs);
+            jl_gc_wb(b, rhs);
             return;
         }
         if (jl_egal(rhs, old))
             return;
         if (jl_typeof(rhs) != jl_typeof(old) || jl_is_type(rhs) || jl_is_module(rhs)) {
-#ifndef __clang_gcanalyzer__
-            jl_errorf("invalid redefinition of constant %s",
-                      jl_symbol_name(b->name));
-#endif
+            jl_errorf("invalid redefinition of constant %s.%s",
+                      jl_symbol_name(mod->name), jl_symbol_name(var));
+
         }
-        jl_safe_printf("WARNING: redefinition of constant %s. This may fail, cause incorrect answers, or produce other errors.\n",
-                       jl_symbol_name(b->name));
+        jl_safe_printf("WARNING: redefinition of constant %s.%s. This may fail, cause incorrect answers, or produce other errors.\n",
+                       jl_symbol_name(mod->name), jl_symbol_name(var));
     }
     jl_atomic_store_release(&b->value, rhs);
-    jl_gc_wb_binding(b, rhs);
+    jl_gc_wb(b, rhs);
 }
 
-JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b)
+JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var)
 {
-    if (b->value != NULL && !b->constp) {
-        jl_errorf("cannot declare %s constant; it already has a value",
-                  jl_symbol_name(b->name));
+    // n.b. jl_get_binding_wr should have ensured b->owner == b as mod.var
+    if (jl_atomic_load_relaxed(&b->owner) != b || (jl_atomic_load_relaxed(&b->value) != NULL && !b->constp)) {
+        jl_errorf("cannot declare %s.%s constant; it already has a value",
+                  jl_symbol_name(mod->name), jl_symbol_name(var));
     }
     b->constp = 1;
 }
 
 JL_DLLEXPORT jl_value_t *jl_module_usings(jl_module_t *m)
 {
-    jl_array_t *a = jl_alloc_array_1d(jl_array_any_type, 0);
-    JL_GC_PUSH1(&a);
     JL_LOCK(&m->lock);
-    for(int i=(int)m->usings.len-1; i >= 0; --i) {
-        jl_array_grow_end(a, 1);
+    int j = m->usings.len;
+    jl_array_t *a = jl_alloc_array_1d(jl_array_any_type, j);
+    JL_GC_PUSH1(&a);
+    for (int i = 0; j > 0; i++) {
+        j--;
         jl_module_t *imp = (jl_module_t*)m->usings.items[i];
-        jl_array_ptr_set(a,jl_array_dim0(a)-1, (jl_value_t*)imp);
+        jl_array_ptr_set(a, j, (jl_value_t*)imp);
     }
-    JL_UNLOCK(&m->lock);
+    JL_UNLOCK(&m->lock); // may gc
     JL_GC_POP();
     return (jl_value_t*)a;
 }
@@ -862,32 +938,40 @@ JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported)
 {
     jl_array_t *a = jl_alloc_array_1d(jl_array_symbol_type, 0);
     JL_GC_PUSH1(&a);
-    size_t i;
-    JL_LOCK(&m->lock);
-    void **table = m->bindings.table;
-    for (i = 1; i < m->bindings.size; i+=2) {
-        if (table[i] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
-            int hidden = jl_symbol_name(b->name)[0]=='#';
-            if ((b->exportp ||
-                 (imported && b->imported) ||
-                 (b->owner == m && !b->imported && (all || m == jl_main_module))) &&
-                (all || (!b->deprecated && !hidden))) {
-                jl_sym_t *in_module_name = (jl_sym_t*)table[i-1]; // the name in the module may not be b->name, use the httable key instead
-                jl_array_grow_end(a, 1);
-                //XXX: change to jl_arrayset if array storage allocation for Array{Symbols,1} changes:
-                jl_array_ptr_set(a, jl_array_dim0(a)-1, (jl_value_t*)in_module_name);
-            }
+    jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
+    for (size_t i = 0; i < jl_svec_len(table); i++) {
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+        if ((void*)b == jl_nothing)
+            break;
+        jl_sym_t *asname = b->globalref->name;
+        int hidden = jl_symbol_name(asname)[0]=='#';
+        int main_public = (m == jl_main_module && !(asname == jl_eval_sym || asname == jl_include_sym));
+        if ((b->publicp ||
+             (imported && b->imported) ||
+             (jl_atomic_load_relaxed(&b->owner) == b && !b->imported && (all || main_public))) &&
+            (all || (!b->deprecated && !hidden))) {
+            jl_array_grow_end(a, 1);
+            // n.b. change to jl_arrayset if array storage allocation for Array{Symbols,1} changes:
+            jl_array_ptr_set(a, jl_array_dim0(a)-1, (jl_value_t*)asname);
         }
+        table = jl_atomic_load_relaxed(&m->bindings);
     }
-    JL_UNLOCK(&m->lock);
     JL_GC_POP();
     return (jl_value_t*)a;
 }
 
 JL_DLLEXPORT jl_sym_t *jl_module_name(jl_module_t *m) { return m->name; }
 JL_DLLEXPORT jl_module_t *jl_module_parent(jl_module_t *m) { return m->parent; }
-JL_DLLEXPORT uint64_t jl_module_build_id(jl_module_t *m) { return m->build_id; }
+jl_module_t *jl_module_root(jl_module_t *m)
+{
+    while (1) {
+        if (m->parent == NULL || m->parent == m)
+            return m;
+        m = m->parent;
+    }
+}
+
+JL_DLLEXPORT jl_uuid_t jl_module_build_id(jl_module_t *m) { return m->build_id; }
 JL_DLLEXPORT jl_uuid_t jl_module_uuid(jl_module_t* m) { return m->uuid; }
 
 // TODO: make this part of the module constructor and read-only?
@@ -910,19 +994,30 @@ int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT
 // is to leave `Main` as empty as possible in the default system image.
 JL_DLLEXPORT void jl_clear_implicit_imports(jl_module_t *m)
 {
-    size_t i;
     JL_LOCK(&m->lock);
-    void **table = m->bindings.table;
-    for (i = 1; i < m->bindings.size; i+=2) {
-        if (table[i] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
-            if (b->owner != m && !b->imported)
-                table[i] = HT_NOTFOUND;
-        }
+    jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
+    for (size_t i = 0; i < jl_svec_len(table); i++) {
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+        if ((void*)b == jl_nothing)
+            break;
+        if (jl_atomic_load_relaxed(&b->owner) && jl_atomic_load_relaxed(&b->owner) != b && !b->imported)
+            jl_atomic_store_relaxed(&b->owner, NULL);
     }
     JL_UNLOCK(&m->lock);
 }
 
+JL_DLLEXPORT void jl_init_restored_module(jl_value_t *mod)
+{
+    if (!jl_generating_output() || jl_options.incremental) {
+        jl_module_run_initializer((jl_module_t*)mod);
+    }
+    else {
+        if (jl_module_init_order == NULL)
+            jl_module_init_order = jl_alloc_vec_any(0);
+        jl_array_ptr_1d_push(jl_module_init_order, mod);
+    }
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mtarraylist.c b/src/mtarraylist.c
new file mode 100644
index 0000000000000..8bad44797dab4
--- /dev/null
+++ b/src/mtarraylist.c
@@ -0,0 +1,81 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// this file provides some alternate API functions for small_arraylist (push and add)
+// which can be safely observed from other threads concurrently
+// there is only permitted to be a single writer thread (or a mutex)
+// but there can be any number of observers
+
+typedef struct {
+    _Atomic(uint32_t) len;
+    uint32_t max;
+    _Atomic(_Atomic(void*)*) items;
+    _Atomic(void*) _space[SMALL_AL_N_INLINE];
+} small_mtarraylist_t;
+
+// change capacity to at least newlen
+static void mtarraylist_resizeto(small_mtarraylist_t *a, size_t len, size_t newlen) JL_NOTSAFEPOINT
+{
+    size_t max = a->max;
+    if (newlen > max) {
+        size_t nm = max * 2;
+        if (nm == 0)
+            nm = 1;
+        while (newlen > nm)
+            nm *= 2;
+        void *olditems = (void*)jl_atomic_load_relaxed(&a->items);
+        void *p = calloc_s(nm * sizeof(void*));
+        memcpy(p, olditems, len * sizeof(void*));
+        jl_atomic_store_release(&a->items, (_Atomic(void*)*)p);
+        a->max = nm;
+        if (olditems != (void*)&a->_space[0]) {
+            jl_task_t *ct = jl_current_task;
+            jl_gc_add_quiescent(ct->ptls, (void**)olditems, free);
+        }
+    }
+}
+
+// single-threaded
+void mtarraylist_push(small_arraylist_t *_a, void *elt)
+{
+    small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
+    size_t len = jl_atomic_load_relaxed(&a->len);
+    mtarraylist_resizeto(a, len, len + 1);
+    jl_atomic_store_release(&jl_atomic_load_relaxed(&a->items)[len], elt);
+    jl_atomic_store_release(&a->len, len + 1);
+}
+
+// single-threaded
+void mtarraylist_add(small_arraylist_t *_a, void *elt, size_t idx)
+{
+    small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
+    size_t len = jl_atomic_load_relaxed(&a->len);
+    mtarraylist_resizeto(a, len, idx + 1);
+    jl_atomic_store_release(&jl_atomic_load_relaxed(&a->items)[idx], elt);
+    if (jl_atomic_load_relaxed(&a->len) < idx + 1)
+        jl_atomic_store_release(&a->len, idx + 1);
+}
+
+// concurrent-safe
+size_t mtarraylist_length(small_arraylist_t *_a)
+{
+    small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
+    return jl_atomic_load_relaxed(&a->len);
+}
+
+// concurrent-safe
+void *mtarraylist_get(small_arraylist_t *_a, size_t idx)
+{
+    small_mtarraylist_t *a = (small_mtarraylist_t*)_a;
+    size_t len = jl_atomic_load_acquire(&a->len);
+    if (idx >= len)
+        return NULL;
+    return jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&a->items)[idx]);
+}
diff --git a/src/opaque_closure.c b/src/opaque_closure.c
index d34989181b7ad..7fd6d5a0f8666 100644
--- a/src/opaque_closure.c
+++ b/src/opaque_closure.c
@@ -8,6 +8,11 @@ jl_value_t *jl_fptr_const_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **a
     return oc->captures;
 }
 
+jl_value_t *jl_fptr_const_opaque_closure_typeerror(jl_opaque_closure_t *oc, jl_value_t **args, size_t nargs)
+{
+    jl_type_error("OpaqueClosure", jl_tparam1(jl_typeof(oc)), oc->captures);
+}
+
 // determine whether `argt` is a valid argument type tuple for the given opaque closure method
 JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *source)
 {
@@ -22,23 +27,8 @@ JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *sourc
     return 1;
 }
 
-static jl_value_t *prepend_type(jl_value_t *t0, jl_tupletype_t *t)
-{
-    jl_svec_t *sig_args = NULL;
-    JL_GC_PUSH1(&sig_args);
-    size_t nsig = 1 + jl_svec_len(t->parameters);
-    sig_args = jl_alloc_svec_uninit(nsig);
-    jl_svecset(sig_args, 0, t0);
-    for (size_t i = 0; i < nsig-1; ++i) {
-        jl_svecset(sig_args, 1+i, jl_tparam(t, i));
-    }
-    jl_value_t *sigtype = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(sig_args), nsig);
-    JL_GC_POP();
-    return sigtype;
-}
-
 static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
-    jl_value_t *source_, jl_value_t *captures)
+    jl_value_t *source_, jl_value_t *captures, int do_compile)
 {
     if (!jl_is_tuple_type((jl_value_t*)argt)) {
         jl_error("OpaqueClosure argument tuple must be a tuple type");
@@ -56,46 +46,87 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t
     if (jl_nparams(argt) + 1 - jl_is_va_tuple(argt) < source->nargs - source->isva)
         jl_error("Argument type tuple has too few required arguments for method");
     jl_value_t *sigtype = NULL;
-    JL_GC_PUSH1(&sigtype);
-    sigtype = prepend_type(jl_typeof(captures), argt);
+    jl_value_t *selected_rt = rt_ub;
+    JL_GC_PUSH2(&sigtype, &selected_rt);
+    sigtype = jl_argtype_with_function(captures, (jl_value_t*)argt);
+
+    jl_method_instance_t *mi = jl_specializations_get_linfo(source, sigtype, jl_emptysvec);
+    jl_task_t *ct = jl_current_task;
+    size_t world = ct->world_age;
+    jl_code_instance_t *ci = NULL;
+    if (do_compile) {
+        ci = jl_compile_method_internal(mi, world);
+    }
+
+    jl_fptr_args_t invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+    void *specptr = NULL;
+
+    if (ci) {
+        invoke = (jl_fptr_args_t)jl_atomic_load_relaxed(&ci->invoke);
+        specptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
+
+        selected_rt = ci->rettype;
+        // If we're not allowed to generate a specsig with this, rt, fall
+        // back to the invoke wrapper. We could instead generate a specsig->specsig
+        // wrapper, but lets leave that for later.
+        if (!jl_subtype(rt_lb, selected_rt)) {
+            // TODO: It would be better to try to get a specialization with the
+            // correct rt check here (or we could codegen a wrapper).
+            specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+            jl_value_t *ts[2] = {rt_lb, (jl_value_t*)ci->rettype};
+            selected_rt = jl_type_union(ts, 2);
+        }
+        if (!jl_subtype(ci->rettype, rt_ub)) {
+            // TODO: It would be better to try to get a specialization with the
+            // correct rt check here (or we could codegen a wrapper).
+            specptr = NULL; invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+            selected_rt = jl_type_intersection(rt_ub, selected_rt);
+        }
+
+        if (invoke == (jl_fptr_args_t) jl_fptr_interpret_call) {
+            invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+        }
+        else if (invoke == (jl_fptr_args_t)jl_fptr_args && specptr) {
+            invoke = (jl_fptr_args_t)specptr;
+        }
+        else if (invoke == (jl_fptr_args_t)jl_fptr_const_return) {
+            invoke = jl_isa(ci->rettype_const, selected_rt) ?
+                (jl_fptr_args_t)jl_fptr_const_opaque_closure :
+                (jl_fptr_args_t)jl_fptr_const_opaque_closure_typeerror;
+            captures = ci->rettype_const;
+        }
+    }
 
-    jl_value_t *oc_type JL_ALWAYS_LEAFTYPE;
-    oc_type = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt, rt_ub);
+    jl_value_t *oc_type JL_ALWAYS_LEAFTYPE = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt, selected_rt);
     JL_GC_PROMISE_ROOTED(oc_type);
 
-    jl_method_instance_t *mi = jl_specializations_get_linfo(source, sigtype, jl_emptysvec);
-    size_t world = jl_atomic_load_acquire(&jl_world_counter);
-    jl_code_instance_t *ci = jl_compile_method_internal(mi, world);
+    if (!specptr) {
+        sigtype = jl_argtype_with_function_type((jl_value_t*)oc_type, (jl_value_t*)argt);
+        jl_method_instance_t *mi_generic = jl_specializations_get_linfo(jl_opaque_closure_method, sigtype, jl_emptysvec);
 
-    jl_task_t *ct = jl_current_task;
+        // OC wrapper methods are not world dependent
+        ci = jl_get_method_inferred(mi_generic, selected_rt, 1, ~(size_t)0);
+        if (!jl_atomic_load_acquire(&ci->invoke))
+            jl_generate_fptr_for_oc_wrapper(ci);
+        specptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
+    }
     jl_opaque_closure_t *oc = (jl_opaque_closure_t*)jl_gc_alloc(ct->ptls, sizeof(jl_opaque_closure_t), oc_type);
-    JL_GC_POP();
     oc->source = source;
     oc->captures = captures;
-    oc->specptr = NULL;
-    if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_interpret_call) {
-        oc->invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
-    }
-    else if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_args) {
-        oc->invoke = jl_atomic_load_relaxed(&ci->specptr.fptr1);
-    }
-    else if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) {
-        oc->invoke = (jl_fptr_args_t)jl_fptr_const_opaque_closure;
-        oc->captures = ci->rettype_const;
-    }
-    else {
-        oc->invoke = (jl_fptr_args_t)jl_atomic_load_relaxed(&ci->invoke);
-    }
     oc->world = world;
+    oc->invoke = invoke;
+    oc->specptr = specptr;
+
+    JL_GC_POP();
     return oc;
 }
 
 jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
-    jl_value_t *source_, jl_value_t **env, size_t nenv)
+    jl_value_t *source_, jl_value_t **env, size_t nenv, int do_compile)
 {
     jl_value_t *captures = jl_f_tuple(NULL, env, nenv);
     JL_GC_PUSH1(&captures);
-    jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, source_, captures);
+    jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, source_, captures, do_compile);
     JL_GC_POP();
     return oc;
 }
@@ -103,18 +134,8 @@ jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_
 jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
     int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva);
 
-JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
-        jl_method_instance_t *mi, jl_value_t *rettype,
-        jl_value_t *inferred_const, jl_value_t *inferred,
-        int32_t const_flags, size_t min_world, size_t max_world,
-        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
-        uint8_t relocatability);
-
-JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
-                                     jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
-
 JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
-    jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env)
+    jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env, int do_compile)
 {
     if (!ci->inferred)
         jl_error("CodeInfo must already be inferred");
@@ -123,15 +144,17 @@ JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tuplet
     JL_GC_PUSH3(&root, &sigtype, &inst);
     root = jl_box_long(lineno);
     root = jl_new_struct(jl_linenumbernode_type, root, file);
-    root = (jl_value_t*)jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, ci, isva);
+    jl_method_t *meth = jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, ci, isva);
+    root = (jl_value_t*)meth;
+    meth->primary_world = jl_current_task->world_age;
 
-    sigtype = prepend_type(jl_typeof(env), argt);
+    sigtype = jl_argtype_with_function(env, (jl_value_t*)argt);
     jl_method_instance_t *mi = jl_specializations_get_linfo((jl_method_t*)root, sigtype, jl_emptysvec);
-    inst = jl_new_codeinst(mi, rt_ub, NULL, (jl_value_t*)ci,
-        0, ((jl_method_t*)root)->primary_world, -1, 0, 0, jl_nothing, 0);
+    inst = jl_new_codeinst(mi, rt_ub, (jl_value_t*)jl_any_type, NULL, (jl_value_t*)ci,
+        0, meth->primary_world, -1, 0, 0, jl_nothing, 0);
     jl_mi_cache_insert(mi, inst);
 
-    jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, root, env);
+    jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, root, env, do_compile);
     JL_GC_POP();
     return oc;
 }
@@ -141,13 +164,12 @@ JL_CALLABLE(jl_new_opaque_closure_jlcall)
     if (nargs < 4)
         jl_error("new_opaque_closure: Not enough arguments");
     return (jl_value_t*)jl_new_opaque_closure((jl_tupletype_t*)args[0],
-        args[1], args[2], args[3], &args[4], nargs-4);
+        args[1], args[2], args[3], &args[4], nargs-4, 1);
 }
 
-
 // check whether the specified number of arguments is compatible with the
 // specified number of parameters of the tuple type
-STATIC_INLINE int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_NOTSAFEPOINT
+int jl_tupletype_length_compat(jl_value_t *v, size_t nargs)
 {
     v = jl_unwrap_unionall(v);
     assert(jl_is_tuple_type(v));
diff --git a/src/options.h b/src/options.h
index 5a1700708d9e7..685dea7b981db 100644
--- a/src/options.h
+++ b/src/options.h
@@ -64,11 +64,6 @@
 #endif
 #endif
 
-// SEGV_EXCEPTION turns segmentation faults into catchable julia exceptions.
-// This is not recommended, as the memory state after such an exception should
-// be considered untrusted, but can be helpful during development
-// #define SEGV_EXCEPTION
-
 // profiling options
 
 // GC_FINAL_STATS prints total GC stats at exit
@@ -83,8 +78,10 @@
 // OBJPROFILE counts objects by type
 // #define OBJPROFILE
 
-// Automatic Instrumenting Profiler
-//#define ENABLE_TIMINGS
+// pool allocator configuration options
+
+// GC_SMALL_PAGE allocates objects in 4k pages
+// #define GC_SMALL_PAGE
 
 
 // method dispatch profiling --------------------------------------------------
@@ -113,7 +110,9 @@
 
 // When not using COPY_STACKS the task-system is less memory efficient so
 // you probably want to choose a smaller default stack size (factor of 8-10)
-#ifdef _P64
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
+#define JL_STACK_SIZE (64*1024*1024)
+#elif defined(_P64)
 #define JL_STACK_SIZE (4*1024*1024)
 #else
 #define JL_STACK_SIZE (2*1024*1024)
@@ -126,7 +125,7 @@
 
 // controls for when threads sleep
 #define THREAD_SLEEP_THRESHOLD_NAME     "JULIA_THREAD_SLEEP_THRESHOLD"
-#define DEFAULT_THREAD_SLEEP_THRESHOLD  16*1000 // nanoseconds (16us)
+#define DEFAULT_THREAD_SLEEP_THRESHOLD  100*1000 // nanoseconds (100us)
 
 // defaults for # threads
 #define NUM_THREADS_NAME                "JULIA_NUM_THREADS"
@@ -137,35 +136,22 @@
 // threadpools specification
 #define THREADPOOLS_NAME                "JULIA_THREADPOOLS"
 
+// GC threads
+#define NUM_GC_THREADS_NAME             "JULIA_NUM_GC_THREADS"
+
 // affinitization behavior
 #define MACHINE_EXCLUSIVE_NAME          "JULIA_EXCLUSIVE"
 #define DEFAULT_MACHINE_EXCLUSIVE       0
 
-// partr -- parallel tasks runtime options ------------------------------------
-
-// multiq
-    // number of heaps = MULTIQ_HEAP_C * nthreads
-#define MULTIQ_HEAP_C                   4
-    // how many in each heap
-#define MULTIQ_TASKS_PER_HEAP           129
-
-// parfor
-    // tasks = niters / (GRAIN_K * nthreads)
-#define GRAIN_K                         4
-
-// synchronization
-    // narrivers = ((GRAIN_K * nthreads) ^ ARRIVERS_P) + 1
-    // limit for number of recursive parfors
-#define ARRIVERS_P                      2
-    // nreducers = narrivers * REDUCERS_FRAC
-#define REDUCERS_FRAC                   1
-
-
 // sanitizer defaults ---------------------------------------------------------
 
 // Automatically enable MEMDEBUG and KEEP_BODIES for the sanitizers
-#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
+#if defined(_COMPILER_ASAN_ENABLED_)
+// No MEMDEBUG for msan - we just poison allocated memory directly.
 #define MEMDEBUG
+#endif
+
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
 #define KEEP_BODIES
 #endif
 
diff --git a/src/passes.h b/src/passes.h
index 82922a95db565..9c3b0421670b5 100644
--- a/src/passes.h
+++ b/src/passes.h
@@ -3,100 +3,151 @@
 #ifndef JL_PASSES_H
 #define JL_PASSES_H
 
+#include "analyzer_annotations.h"
 #include <llvm/IR/PassManager.h>
 #include <llvm/Transforms/Scalar/LoopPassManager.h>
 
 using namespace llvm;
 
 // Function Passes
-struct DemoteFloat16 : PassInfoMixin<DemoteFloat16> {
-    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+struct DemoteFloat16Pass : PassInfoMixin<DemoteFloat16Pass> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
-struct CombineMulAdd : PassInfoMixin<CombineMulAdd> {
-    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+struct CombineMulAddPass : PassInfoMixin<CombineMulAddPass> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
 };
 
-struct LateLowerGC : PassInfoMixin<LateLowerGC> {
-    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+struct LateLowerGCPass : PassInfoMixin<LateLowerGCPass> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
 struct AllocOptPass : PassInfoMixin<AllocOptPass> {
-    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
 };
 
 struct PropagateJuliaAddrspacesPass : PassInfoMixin<PropagateJuliaAddrspacesPass> {
-    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
-struct LowerExcHandlers : PassInfoMixin<LowerExcHandlers> {
-    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+struct LowerExcHandlersPass : PassInfoMixin<LowerExcHandlersPass> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
 struct GCInvariantVerifierPass : PassInfoMixin<GCInvariantVerifierPass> {
     bool Strong;
-    GCInvariantVerifierPass(bool Strong = false) : Strong(Strong) {}
+    GCInvariantVerifierPass(bool Strong = false) JL_NOTSAFEPOINT : Strong(Strong) {}
 
-    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
-// Module Passes
-struct CPUFeatures : PassInfoMixin<CPUFeatures> {
-    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+struct FinalLowerGCPass : PassInfoMixin<FinalLowerGCPass> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
-struct RemoveNI : PassInfoMixin<RemoveNI> {
-    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
-};
-
-struct LowerSIMDLoop : PassInfoMixin<LowerSIMDLoop> {
-    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+// Module Passes
+struct CPUFeaturesPass : PassInfoMixin<CPUFeaturesPass> {
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
+    static bool isRequired() { return true; }
 };
 
-struct FinalLowerGCPass : PassInfoMixin<FinalLowerGCPass> {
-    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+struct RemoveNIPass : PassInfoMixin<RemoveNIPass> {
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
-struct MultiVersioning : PassInfoMixin<MultiVersioning> {
+struct MultiVersioningPass : PassInfoMixin<MultiVersioningPass> {
     bool external_use;
-    MultiVersioning(bool external_use = false) : external_use(external_use) {}
-    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+    MultiVersioningPass(bool external_use = false) : external_use(external_use) {}
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
 struct RemoveJuliaAddrspacesPass : PassInfoMixin<RemoveJuliaAddrspacesPass> {
-    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
 struct RemoveAddrspacesPass : PassInfoMixin<RemoveAddrspacesPass> {
     std::function<unsigned(unsigned)> ASRemapper;
-    RemoveAddrspacesPass();
-    RemoveAddrspacesPass(std::function<unsigned(unsigned)> ASRemapper) : ASRemapper(std::move(ASRemapper)) {}
+    RemoveAddrspacesPass() JL_NOTSAFEPOINT;
+    RemoveAddrspacesPass(std::function<unsigned(unsigned)> ASRemapper) JL_NOTSAFEPOINT : ASRemapper(std::move(ASRemapper)) {}
+    ~RemoveAddrspacesPass() JL_NOTSAFEPOINT = default;
 
-    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
 struct LowerPTLSPass : PassInfoMixin<LowerPTLSPass> {
     bool imaging_mode;
-    LowerPTLSPass(bool imaging_mode=false) : imaging_mode(imaging_mode) {}
+    LowerPTLSPass(bool imaging_mode=false) JL_NOTSAFEPOINT : imaging_mode(imaging_mode) {}
 
-    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT;
     static bool isRequired() { return true; }
 };
 
 // Loop Passes
 struct JuliaLICMPass : PassInfoMixin<JuliaLICMPass> {
     PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
-                          LoopStandardAnalysisResults &AR, LPMUpdater &U);
+                          LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT;
 };
 
+struct LowerSIMDLoopPass : PassInfoMixin<LowerSIMDLoopPass> {
+    PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                          LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT;
+};
+
+#define MODULE_MARKER_PASS(NAME) \
+    struct NAME##MarkerPass : PassInfoMixin<NAME##MarkerPass> { \
+        PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT { return PreservedAnalyses::all(); } \
+        static bool isRequired() { return true; } \
+    };
+
+#define FUNCTION_MARKER_PASS(NAME) \
+    struct NAME##MarkerPass : PassInfoMixin<NAME##MarkerPass> { \
+        PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT { return PreservedAnalyses::all(); } \
+        static bool isRequired() { return true; } \
+    };
+
+#define LOOP_MARKER_PASS(NAME) \
+    struct NAME##MarkerPass : PassInfoMixin<NAME##MarkerPass> { \
+        PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, \
+                              LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT { \
+            return PreservedAnalyses::all(); \
+        } \
+        static bool isRequired() { return true; } \
+    };
+
+// These are useful for debugging with --print-before/after
+MODULE_MARKER_PASS(BeforeOptimization)
+MODULE_MARKER_PASS(BeforeEarlySimplification)
+MODULE_MARKER_PASS(AfterEarlySimplification)
+MODULE_MARKER_PASS(BeforeEarlyOptimization)
+MODULE_MARKER_PASS(AfterEarlyOptimization)
+FUNCTION_MARKER_PASS(BeforeLoopOptimization)
+LOOP_MARKER_PASS(BeforeLICM)
+LOOP_MARKER_PASS(AfterLICM)
+LOOP_MARKER_PASS(BeforeLoopSimplification)
+LOOP_MARKER_PASS(AfterLoopSimplification)
+FUNCTION_MARKER_PASS(AfterLoopOptimization)
+FUNCTION_MARKER_PASS(BeforeScalarOptimization)
+FUNCTION_MARKER_PASS(AfterScalarOptimization)
+FUNCTION_MARKER_PASS(BeforeVectorization)
+FUNCTION_MARKER_PASS(AfterVectorization)
+MODULE_MARKER_PASS(BeforeIntrinsicLowering)
+MODULE_MARKER_PASS(AfterIntrinsicLowering)
+MODULE_MARKER_PASS(BeforeCleanup)
+MODULE_MARKER_PASS(AfterCleanup)
+MODULE_MARKER_PASS(AfterOptimization)
+
+bool verifyLLVMIR(const Module &M) JL_NOTSAFEPOINT;
+bool verifyLLVMIR(const Function &F) JL_NOTSAFEPOINT;
+bool verifyLLVMIR(const Loop &L) JL_NOTSAFEPOINT;
+
 #endif
diff --git a/src/pipeline.cpp b/src/pipeline.cpp
new file mode 100644
index 0000000000000..4b099521d33f9
--- /dev/null
+++ b/src/pipeline.cpp
@@ -0,0 +1,980 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include <llvm-version.h>
+#include "platform.h"
+
+//We don't care about uninitialized variables in LLVM; that's LLVM's problem
+#ifdef _COMPILER_GCC_
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
+// analysis passes
+#include <llvm/Analysis/Passes.h>
+#include <llvm/Analysis/BasicAliasAnalysis.h>
+#include <llvm/Analysis/GlobalsModRef.h>
+#include <llvm/Analysis/TargetTransformInfo.h>
+#include <llvm/Analysis/TypeBasedAliasAnalysis.h>
+#include <llvm/Analysis/ScopedNoAliasAA.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/PassManager.h>
+#include <llvm/IR/Verifier.h>
+#include <llvm/Transforms/IPO.h>
+#include <llvm/Transforms/Scalar.h>
+#include <llvm/Transforms/Vectorize.h>
+#include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
+#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
+#include <llvm/Transforms/Scalar/GVN.h>
+#include <llvm/Transforms/IPO/AlwaysInliner.h>
+#include <llvm/Transforms/IPO/StripDeadPrototypes.h>
+#include <llvm/Transforms/InstCombine/InstCombine.h>
+#include <llvm/Transforms/Scalar/InstSimplifyPass.h>
+#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
+#include <llvm/Passes/PassBuilder.h>
+#include <llvm/Passes/PassPlugin.h>
+
+// NewPM needs to manually include all the pass headers
+#include <llvm/Transforms/IPO/AlwaysInliner.h>
+#include <llvm/Transforms/IPO/Annotation2Metadata.h>
+#include <llvm/Transforms/IPO/ConstantMerge.h>
+#include <llvm/Transforms/IPO/ForceFunctionAttrs.h>
+#include <llvm/Transforms/IPO/GlobalDCE.h>
+#include <llvm/Transforms/InstCombine/InstCombine.h>
+#include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
+#include <llvm/Transforms/Instrumentation/MemorySanitizer.h>
+#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
+#include <llvm/Transforms/Scalar/ADCE.h>
+#include <llvm/Transforms/Scalar/AnnotationRemarks.h>
+#include <llvm/Transforms/Scalar/CorrelatedValuePropagation.h>
+#include <llvm/Transforms/Scalar/DCE.h>
+#include <llvm/Transforms/Scalar/DeadStoreElimination.h>
+#include <llvm/Transforms/Scalar/DivRemPairs.h>
+#include <llvm/Transforms/Scalar/EarlyCSE.h>
+#include <llvm/Transforms/Scalar/Float2Int.h>
+#include <llvm/Transforms/Scalar/GVN.h>
+#include <llvm/Transforms/Scalar/IndVarSimplify.h>
+#include <llvm/Transforms/Scalar/InductiveRangeCheckElimination.h>
+#include <llvm/Transforms/Scalar/InstSimplifyPass.h>
+#include <llvm/Transforms/Scalar/JumpThreading.h>
+#include <llvm/Transforms/Scalar/LICM.h>
+#include <llvm/Transforms/Scalar/LoopDeletion.h>
+#include <llvm/Transforms/Scalar/LoopDistribute.h>
+#include <llvm/Transforms/Scalar/LoopIdiomRecognize.h>
+#include <llvm/Transforms/Scalar/LoopInstSimplify.h>
+#include <llvm/Transforms/Scalar/LoopLoadElimination.h>
+#include <llvm/Transforms/Scalar/LoopRotation.h>
+#include <llvm/Transforms/Scalar/LoopSimplifyCFG.h>
+#include <llvm/Transforms/Scalar/LoopUnrollPass.h>
+#include <llvm/Transforms/Scalar/LowerConstantIntrinsics.h>
+#include <llvm/Transforms/Scalar/LowerExpectIntrinsic.h>
+#include <llvm/Transforms/Scalar/MemCpyOptimizer.h>
+#include <llvm/Transforms/Scalar/Reassociate.h>
+#include <llvm/Transforms/Scalar/SCCP.h>
+#include <llvm/Transforms/Scalar/SROA.h>
+#include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
+#include <llvm/Transforms/Scalar/SimplifyCFG.h>
+#include <llvm/Transforms/Scalar/WarnMissedTransforms.h>
+#include <llvm/Transforms/Utils/InjectTLIMappings.h>
+#include <llvm/Transforms/Vectorize/LoopVectorize.h>
+#include <llvm/Transforms/Vectorize/SLPVectorizer.h>
+#include <llvm/Transforms/Vectorize/VectorCombine.h>
+
+#ifdef _COMPILER_GCC_
+#pragma GCC diagnostic pop
+#endif
+
+#include <llvm/Target/TargetMachine.h>
+
+#include "julia.h"
+#include "julia_internal.h"
+#include "jitlayers.h"
+#include "julia_assert.h"
+#include "passes.h"
+
+using namespace llvm;
+
+namespace {
+    //Shamelessly stolen from Clang's approach to sanitizers
+    //TODO do we want to enable other sanitizers?
+    static void addSanitizerPasses(ModulePassManager &MPM, OptimizationLevel O) JL_NOTSAFEPOINT {
+        // Coverage sanitizer
+        // if (CodeGenOpts.hasSanitizeCoverage()) {
+        //   auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
+        //   MPM.addPass(ModuleSanitizerCoveragePass(
+        //       SancovOpts, CodeGenOpts.SanitizeCoverageAllowlistFiles,
+        //       CodeGenOpts.SanitizeCoverageIgnorelistFiles));
+        // }
+
+    #ifdef _COMPILER_MSAN_ENABLED_
+        auto MSanPass = [&](/*SanitizerMask Mask, */bool CompileKernel) JL_NOTSAFEPOINT {
+        // if (LangOpts.Sanitize.has(Mask)) {
+            // int TrackOrigins = CodeGenOpts.SanitizeMemoryTrackOrigins;
+            // bool Recover = CodeGenOpts.SanitizeRecover.has(Mask);
+
+            // MemorySanitizerOptions options(TrackOrigins, Recover, CompileKernel,{
+            //                             CodeGenOpts.SanitizeMemoryParamRetval);
+            MemorySanitizerOptions options;
+            MPM.addPass(ModuleMemorySanitizerPass(options));
+            FunctionPassManager FPM;
+            FPM.addPass(MemorySanitizerPass(options));
+            if (O != OptimizationLevel::O0) {
+            // MemorySanitizer inserts complex instrumentation that mostly
+            // follows the logic of the original code, but operates on
+            // "shadow" values. It can benefit from re-running some
+            // general purpose optimization passes.
+            FPM.addPass(EarlyCSEPass());
+            // TODO: Consider add more passes like in
+            // addGeneralOptsForMemorySanitizer. EarlyCSEPass makes visible
+            // difference on size. It's not clear if the rest is still
+            // useful. InstCombinePass breaks
+            // compiler-rt/test/msan/select_origin.cpp.
+            }
+            MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+        // }
+        };
+        MSanPass(/*SanitizerKind::Memory, */false);
+        // MSanPass(SanitizerKind::KernelMemory, true);
+    #endif
+
+    #ifdef _COMPILER_TSAN_ENABLED_
+        // if (LangOpts.Sanitize.has(SanitizerKind::Thread)) {
+        MPM.addPass(ModuleThreadSanitizerPass());
+        MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
+        // }
+    #endif
+
+
+    #ifdef _COMPILER_ASAN_ENABLED_
+        auto ASanPass = [&](/*SanitizerMask Mask, */bool CompileKernel) JL_NOTSAFEPOINT {
+        //   if (LangOpts.Sanitize.has(Mask)) {
+            // bool UseGlobalGC = asanUseGlobalsGC(TargetTriple, CodeGenOpts);
+            // bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator;
+            // llvm::AsanDtorKind DestructorKind =
+            //     CodeGenOpts.getSanitizeAddressDtor();
+            // AddressSanitizerOptions Opts;
+            // Opts.CompileKernel = CompileKernel;
+            // Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask);
+            // Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
+            // Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn();
+            // MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
+            // MPM.addPass(ModuleAddressSanitizerPass(
+            //     Opts, UseGlobalGC, UseOdrIndicator, DestructorKind));
+            //Let's assume the defaults are actually fine for our purposes
+            MPM.addPass(ModuleAddressSanitizerPass(AddressSanitizerOptions()));
+        //   }
+        };
+        ASanPass(/*SanitizerKind::Address, */false);
+        // ASanPass(SanitizerKind::KernelAddress, true);
+    #endif
+
+        // auto HWASanPass = [&](SanitizerMask Mask, bool CompileKernel) {
+        //   if (LangOpts.Sanitize.has(Mask)) {
+        //     bool Recover = CodeGenOpts.SanitizeRecover.has(Mask);
+        //     MPM.addPass(HWAddressSanitizerPass(
+        //         {CompileKernel, Recover,
+        //          /*DisableOptimization=*/CodeGenOpts.OptimizationLevel == 0}));
+        //   }
+        // };
+        // HWASanPass(/*SanitizerKind::HWAddress, */false);
+        // // HWASanPass(SanitizerKind::KernelHWAddress, true);
+
+        // if (LangOpts.Sanitize.has(SanitizerKind::DataFlow)) {
+        //   MPM.addPass(DataFlowSanitizerPass(LangOpts.NoSanitizeFiles));
+        // }
+    }
+
+#ifdef JL_DEBUG_BUILD
+    static inline void addVerificationPasses(ModulePassManager &MPM, bool llvm_only) JL_NOTSAFEPOINT {
+        if (!llvm_only)
+            MPM.addPass(llvm::createModuleToFunctionPassAdaptor(GCInvariantVerifierPass()));
+        MPM.addPass(VerifierPass());
+    }
+#endif
+
+    auto basicSimplifyCFGOptions() JL_NOTSAFEPOINT {
+        return SimplifyCFGOptions()
+            .convertSwitchRangeToICmp(true)
+            .convertSwitchToLookupTable(true)
+            .forwardSwitchCondToPhi(true);
+    }
+
+    auto aggressiveSimplifyCFGOptions() JL_NOTSAFEPOINT {
+        return SimplifyCFGOptions()
+            .convertSwitchRangeToICmp(true)
+            .convertSwitchToLookupTable(true)
+            .forwardSwitchCondToPhi(true)
+            //These mess with loop rotation, so only do them after that
+            .hoistCommonInsts(true)
+            // Causes an SRET assertion error in late-gc-lowering
+            // .sinkCommonInsts(true)
+            ;
+    }
+#if JL_LLVM_VERSION < 150000
+#define LICMOptions()
+#endif
+
+// At any given time exactly one of each pair of overloads is strictly unused
+#ifdef _COMPILER_GCC_
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#ifdef _COMPILER_CLANG_
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+    // Version check for our patch to allow invoking pipeline callbacks
+    // won't work if built with our LLVM but linked with system LLVM
+    template<typename PB> std::true_type hasInvokeCallbacks_helper(decltype(&PB::invokePipelineStartEPCallbacks)) JL_NOTSAFEPOINT;
+    std::false_type hasInvokeCallbacks_helper(...) JL_NOTSAFEPOINT;
+
+    // static constexpr bool hasInvokeCallbacks = decltype(hasInvokeCallbacks_helper<PassBuilder>(nullptr))::value;
+
+    //If PB is a nullptr, don't invoke anything (this happens when running julia from opt)
+    template<typename PB_t>
+    std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokePipelineStartCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
+        static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
+        if (!PB) return;
+        PB->invokePipelineStartEPCallbacks(MPM, O);
+    }
+    template<typename PB_t>
+    std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokePeepholeEPCallbacks(FunctionPassManager &FPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
+        static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
+        if (!PB) return;
+        PB->invokePeepholeEPCallbacks(FPM, O);
+    }
+    template<typename PB_t>
+    std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeEarlySimplificationCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
+        static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
+        if (!PB) return;
+        PB->invokePipelineEarlySimplificationEPCallbacks(MPM, O);
+    }
+    template<typename PB_t>
+    std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeCGSCCCallbacks(CGSCCPassManager &CGPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
+        static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
+        if (!PB) return;
+        PB->invokeCGSCCOptimizerLateEPCallbacks(CGPM, O);
+    }
+    template<typename PB_t>
+    std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeOptimizerEarlyCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
+        static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
+        if (!PB) return;
+        PB->invokeOptimizerEarlyEPCallbacks(MPM, O);
+    }
+    template<typename PB_t>
+    std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeLateLoopOptimizationCallbacks(LoopPassManager &LPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
+        static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
+        if (!PB) return;
+        PB->invokeLateLoopOptimizationsEPCallbacks(LPM, O);
+    }
+    template<typename PB_t>
+    std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeLoopOptimizerEndCallbacks(LoopPassManager &LPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
+        static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
+        if (!PB) return;
+        PB->invokeLoopOptimizerEndEPCallbacks(LPM, O);
+    }
+    template<typename PB_t>
+    std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeScalarOptimizerCallbacks(FunctionPassManager &FPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
+        static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
+        if (!PB) return;
+        PB->invokeScalarOptimizerLateEPCallbacks(FPM, O);
+    }
+    template<typename PB_t>
+    std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeVectorizerCallbacks(FunctionPassManager &FPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
+        static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
+        if (!PB) return;
+        PB->invokeVectorizerStartEPCallbacks(FPM, O);
+    }
+    template<typename PB_t>
+    std::enable_if_t<decltype(hasInvokeCallbacks_helper<PB_t>(nullptr))::value, void> invokeOptimizerLastCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT {
+        static_assert(std::is_same<PassBuilder, PB_t>::value, "Expected PassBuilder as second argument!");
+        if (!PB) return;
+        PB->invokeOptimizerLastEPCallbacks(MPM, O);
+    }
+
+    // Fallbacks
+    void invokePipelineStartCallbacks(...) {}
+    void invokePeepholeEPCallbacks(...) {}
+    void invokeEarlySimplificationCallbacks(...) {}
+    void invokeCGSCCCallbacks(...) {}
+    void invokeOptimizerEarlyCallbacks(...) {}
+    void invokeLateLoopOptimizationCallbacks(...) {}
+    void invokeLoopOptimizerEndCallbacks(...) {}
+    void invokeScalarOptimizerCallbacks(...) {}
+    void invokeVectorizerCallbacks(...) {}
+    void invokeOptimizerLastCallbacks(...) {}
+
+#ifdef _COMPILER_CLANG_
+#pragma clang diagnostic pop
+#endif
+
+#ifdef _COMPILER_GCC_
+#pragma GCC diagnostic pop
+#endif
+}
+
+//The actual pipelines
+//TODO Things we might want to consider:
+//* For vectorization
+//? loop unroll/jam after loop vectorization
+//? optimization remarks pass
+//? cse/cvp/instcombine/bdce/sccp/licm/unswitch after loop vectorization (
+// cleanup as much as possible before trying to slp vectorize)
+//* For optimization
+//? loop sink pass
+//? hot-cold splitting pass
+
+#define JULIA_PASS(ADD_PASS) if (!options.llvm_only) { ADD_PASS; } else do { } while (0)
+
+static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
+    MPM.addPass(BeforeEarlySimplificationMarkerPass());
+#ifdef JL_DEBUG_BUILD
+    addVerificationPasses(MPM, options.llvm_only);
+#endif
+    // Place after verification in case we want to force it anyways
+    MPM.addPass(ForceFunctionAttrsPass());
+    invokePipelineStartCallbacks(MPM, PB, O);
+    MPM.addPass(Annotation2MetadataPass());
+    MPM.addPass(ConstantMergePass());
+    {
+        FunctionPassManager FPM;
+        FPM.addPass(LowerExpectIntrinsicPass());
+        if (O.getSpeedupLevel() >= 2) {
+            JULIA_PASS(FPM.addPass(PropagateJuliaAddrspacesPass()));
+        }
+        // DCE must come before simplifycfg
+        // codegen can generate unused statements when generating builtin calls,
+        // and those dead statements can alter how simplifycfg optimizes the CFG
+        FPM.addPass(DCEPass());
+        FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions()));
+        if (O.getSpeedupLevel() >= 1) {
+#if JL_LLVM_VERSION >= 160000
+            // TODO check the LLVM 15 default.
+            FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
+#else
+            FPM.addPass(SROAPass());
+#endif
+        }
+        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+    }
+    invokeEarlySimplificationCallbacks(MPM, PB, O);
+    MPM.addPass(AfterEarlySimplificationMarkerPass());
+}
+
+static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
+    MPM.addPass(BeforeEarlyOptimizationMarkerPass());
+    invokeOptimizerEarlyCallbacks(MPM, PB, O);
+    {
+        CGSCCPassManager CGPM;
+        invokeCGSCCCallbacks(CGPM, PB, O);
+        if (O.getSpeedupLevel() >= 2) {
+            FunctionPassManager FPM;
+            JULIA_PASS(FPM.addPass(AllocOptPass()));
+            FPM.addPass(Float2IntPass());
+            FPM.addPass(LowerConstantIntrinsicsPass());
+            CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
+        }
+        MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+    }
+    if (O.getSpeedupLevel() >= 2) {
+        MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+    }
+    // MPM.addPass(createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+    if (options.dump_native) {
+        MPM.addPass(StripDeadPrototypesPass());
+        JULIA_PASS(MPM.addPass(MultiVersioningPass(options.external_use)));
+    }
+    JULIA_PASS(MPM.addPass(CPUFeaturesPass()));
+    if (O.getSpeedupLevel() >= 1) {
+        FunctionPassManager FPM;
+        if (O.getSpeedupLevel() >= 2) {
+#if JL_LLVM_VERSION >= 160000
+            // TODO check the LLVM 15 default.
+            FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
+#else
+            FPM.addPass(SROAPass());
+#endif
+            // SROA can duplicate PHI nodes which can block LowerSIMD
+            FPM.addPass(InstCombinePass());
+            FPM.addPass(JumpThreadingPass());
+            FPM.addPass(CorrelatedValuePropagationPass());
+            FPM.addPass(ReassociatePass());
+            FPM.addPass(EarlyCSEPass());
+            JULIA_PASS(FPM.addPass(AllocOptPass()));
+        } else { // if (O.getSpeedupLevel() >= 1) (exactly)
+            FPM.addPass(InstCombinePass());
+            FPM.addPass(EarlyCSEPass());
+        }
+        invokePeepholeEPCallbacks(FPM, PB, O);
+        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+    }
+    MPM.addPass(GlobalDCEPass());
+    MPM.addPass(AfterEarlyOptimizationMarkerPass());
+}
+
+static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
+    FPM.addPass(BeforeLoopOptimizationMarkerPass());
+    {
+        LoopPassManager LPM;
+        LPM.addPass(LowerSIMDLoopPass());
+        if (O.getSpeedupLevel() >= 2) {
+            LPM.addPass(LoopRotatePass());
+        }
+        invokeLateLoopOptimizationCallbacks(LPM, PB, O);
+        //We don't know if the loop callbacks support MSSA
+        FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
+    }
+    if (O.getSpeedupLevel() >= 2) {
+        LoopPassManager LPM;
+        LPM.addPass(BeforeLICMMarkerPass());
+        LPM.addPass(LICMPass(LICMOptions()));
+        LPM.addPass(JuliaLICMPass());
+        LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true));
+        LPM.addPass(LICMPass(LICMOptions()));
+        LPM.addPass(JuliaLICMPass());
+        LPM.addPass(AfterLICMMarkerPass());
+        //LICM needs MemorySSA now, so we must use it
+        FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true));
+    }
+    if (O.getSpeedupLevel() >= 2) {
+        FPM.addPass(IRCEPass());
+    }
+    {
+        LoopPassManager LPM;
+        LPM.addPass(BeforeLoopSimplificationMarkerPass());
+        if (O.getSpeedupLevel() >= 2) {
+            LPM.addPass(LoopInstSimplifyPass());
+            LPM.addPass(LoopIdiomRecognizePass());
+            LPM.addPass(IndVarSimplifyPass());
+            LPM.addPass(LoopDeletionPass());
+            // This unroll will only unroll loops when the trip count is known and small,
+            // so that no loop remains
+            LPM.addPass(LoopFullUnrollPass());
+        }
+        invokeLoopOptimizerEndCallbacks(LPM, PB, O);
+        LPM.addPass(AfterLoopSimplificationMarkerPass());
+        //We don't know if the loop end callbacks support MSSA
+        FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
+    }
+    FPM.addPass(AfterLoopOptimizationMarkerPass());
+}
+
+static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
+    FPM.addPass(BeforeScalarOptimizationMarkerPass());
+    if (O.getSpeedupLevel() >= 2) {
+        JULIA_PASS(FPM.addPass(AllocOptPass()));
+#if JL_LLVM_VERSION >= 160000
+        // TODO check the LLVM 15 default.
+        FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
+#else
+        FPM.addPass(SROAPass());
+#endif
+        FPM.addPass(InstSimplifyPass());
+        FPM.addPass(GVNPass());
+        FPM.addPass(MemCpyOptPass());
+        FPM.addPass(SCCPPass());
+        FPM.addPass(CorrelatedValuePropagationPass());
+        FPM.addPass(DCEPass());
+        FPM.addPass(IRCEPass());
+        FPM.addPass(InstCombinePass());
+        FPM.addPass(JumpThreadingPass());
+    }
+    if (O.getSpeedupLevel() >= 3) {
+        FPM.addPass(GVNPass());
+    }
+    if (O.getSpeedupLevel() >= 2) {
+        FPM.addPass(DSEPass());
+        invokePeepholeEPCallbacks(FPM, PB, O);
+        FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
+        JULIA_PASS(FPM.addPass(AllocOptPass()));
+        {
+            LoopPassManager LPM;
+            LPM.addPass(LoopDeletionPass());
+            LPM.addPass(LoopInstSimplifyPass());
+            FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
+        }
+        FPM.addPass(LoopDistributePass());
+    }
+    invokeScalarOptimizerCallbacks(FPM, PB, O);
+    FPM.addPass(AfterScalarOptimizationMarkerPass());
+}
+
+static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
+    FPM.addPass(BeforeVectorizationMarkerPass());
+    //TODO look into loop vectorize options
+    FPM.addPass(InjectTLIMappings());
+    FPM.addPass(LoopVectorizePass());
+    FPM.addPass(LoopLoadEliminationPass());
+    FPM.addPass(InstCombinePass());
+    FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
+    FPM.addPass(SLPVectorizerPass());
+    invokeVectorizerCallbacks(FPM, PB, O);
+    FPM.addPass(VectorCombinePass());
+    FPM.addPass(ADCEPass());
+    //TODO add BDCEPass here?
+    // This unroll will unroll vectorized loops
+    // as well as loops that we tried but failed to vectorize
+    FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false)));
+    FPM.addPass(AfterVectorizationMarkerPass());
+}
+
+static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
+    MPM.addPass(BeforeIntrinsicLoweringMarkerPass());
+    if (options.lower_intrinsics) {
+        //TODO barrier pass?
+        {
+            FunctionPassManager FPM;
+            JULIA_PASS(FPM.addPass(LowerExcHandlersPass()));
+            JULIA_PASS(FPM.addPass(GCInvariantVerifierPass(false)));
+            MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+        }
+        // Needed **before** LateLowerGCFrame on LLVM < 12
+        // due to bug in `CreateAlignmentAssumption`.
+        JULIA_PASS(MPM.addPass(RemoveNIPass()));
+        {
+            FunctionPassManager FPM;
+            JULIA_PASS(FPM.addPass(LateLowerGCPass()));
+            JULIA_PASS(FPM.addPass(FinalLowerGCPass()));
+            if (O.getSpeedupLevel() >= 2) {
+                FPM.addPass(GVNPass());
+                FPM.addPass(SCCPPass());
+                FPM.addPass(DCEPass());
+            }
+            MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+        }
+        JULIA_PASS(MPM.addPass(LowerPTLSPass(options.dump_native)));
+        if (O.getSpeedupLevel() >= 1) {
+            FunctionPassManager FPM;
+            FPM.addPass(InstCombinePass());
+            FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
+            MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+        }
+    } else {
+        JULIA_PASS(MPM.addPass(RemoveNIPass()));
+    }
+    MPM.addPass(AfterIntrinsicLoweringMarkerPass());
+}
+
+static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
+    MPM.addPass(BeforeCleanupMarkerPass());
+    if (O.getSpeedupLevel() >= 2) {
+        FunctionPassManager FPM;
+        JULIA_PASS(FPM.addPass(CombineMulAddPass()));
+        FPM.addPass(DivRemPairsPass());
+        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+    }
+    invokeOptimizerLastCallbacks(MPM, PB, O);
+    MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
+    addSanitizerPasses(MPM, O);
+    {
+        FunctionPassManager FPM;
+        JULIA_PASS(FPM.addPass(DemoteFloat16Pass()));
+        if (O.getSpeedupLevel() >= 2) {
+            FPM.addPass(GVNPass());
+        }
+        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+    }
+    MPM.addPass(AfterCleanupMarkerPass());
+}
+
+static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
+    MPM.addPass(BeforeOptimizationMarkerPass());
+    buildEarlySimplificationPipeline(MPM, PB, O, options);
+    MPM.addPass(AlwaysInlinerPass());
+    buildEarlyOptimizerPipeline(MPM, PB, O, options);
+    {
+        FunctionPassManager FPM;
+        buildLoopOptimizerPipeline(FPM, PB, O, options);
+        buildScalarOptimizerPipeline(FPM, PB, O, options);
+        if (O.getSpeedupLevel() >= 2) {
+            buildVectorPipeline(FPM, PB, O, options);
+        }
+        FPM.addPass(WarnMissedTransformationsPass());
+        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+    }
+    buildIntrinsicLoweringPipeline(MPM, PB, O, options);
+    buildCleanupPipeline(MPM, PB, O, options);
+    MPM.addPass(AfterOptimizationMarkerPass());
+}
+
+extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, void *PB, int Speedup, int Size,
+    int lower_intrinsics, int dump_native, int external_use, int llvm_only) JL_NOTSAFEPOINT
+{
+    OptimizationLevel O;
+    switch (Size) {
+        case 1:
+            O = OptimizationLevel::Os;
+            break;
+        default:
+            O = OptimizationLevel::Oz;
+            break;
+        case 0:
+            switch (Speedup) {
+                case 0:
+                    O = OptimizationLevel::O0;
+                    break;
+                case 1:
+                    O = OptimizationLevel::O1;
+                    break;
+                case 2:
+                    O = OptimizationLevel::O2;
+                    break;
+                default:
+                    O = OptimizationLevel::O3;
+                    break;
+            }
+    }
+    buildPipeline(*reinterpret_cast<ModulePassManager*>(MPM), reinterpret_cast<PassBuilder*>(PB), O,
+                    OptimizationOptions{!!lower_intrinsics, !!dump_native, !!external_use, !!llvm_only});
+}
+
+#undef JULIA_PASS
+
+namespace {
+
+    void adjustPIC(PassInstrumentationCallbacks &PIC) JL_NOTSAFEPOINT {
+//Borrowed from LLVM PassBuilder.cpp:386
+#define MODULE_PASS(NAME, CLASS, CREATE_PASS)                                         \
+PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)      \
+PIC.addClassToPassName(CLASS, NAME);
+#define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
+PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS)                                       \
+PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)    \
+PIC.addClassToPassName(CLASS, NAME);
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
+PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define LOOPNEST_PASS(NAME, CREATE_PASS)                                       \
+PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define LOOP_PASS(NAME, CLASS, CREATE_PASS)                                           \
+PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)        \
+PIC.addClassToPassName(CLASS, NAME);
+#define LOOP_ANALYSIS(NAME, CREATE_PASS)                                       \
+PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define CGSCC_PASS(NAME, CLASS, CREATE_PASS)                                          \
+PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)       \
+PIC.addClassToPassName(CLASS, NAME);
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS)                                      \
+PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+
+#include "llvm-julia-passes.inc"
+
+#undef MODULE_PASS
+#undef MODULE_PASS_WITH_PARAMS
+#undef MODULE_ANALYSIS
+#undef FUNCTION_PASS
+#undef FUNCTION_PASS_WITH_PARAMS
+#undef FUNCTION_ANALYSIS
+#undef LOOPNEST_PASS
+#undef LOOP_PASS
+#undef LOOP_PASS_WITH_PARAMS
+#undef LOOP_ANALYSIS
+#undef CGSCC_PASS
+#undef CGSCC_PASS_WITH_PARAMS
+#undef CGSCC_ANALYSIS
+        // Marker passes are set separately so that we don't export them by accident
+        PIC.addClassToPassName("BeforeOptimizationMarkerPass", "BeforeOptimization");
+        PIC.addClassToPassName("BeforeEarlySimplificationMarkerPass", "BeforeEarlySimplification");
+        PIC.addClassToPassName("AfterEarlySimplificationMarkerPass", "AfterEarlySimplification");
+        PIC.addClassToPassName("BeforeEarlyOptimizationMarkerPass", "BeforeEarlyOptimization");
+        PIC.addClassToPassName("AfterEarlyOptimizationMarkerPass", "AfterEarlyOptimization");
+        PIC.addClassToPassName("BeforeLoopOptimizationMarkerPass", "BeforeLoopOptimization");
+        PIC.addClassToPassName("BeforeLICMMarkerPass", "BeforeLICM");
+        PIC.addClassToPassName("AfterLICMMarkerPass", "AfterLICM");
+        PIC.addClassToPassName("BeforeLoopSimplificationMarkerPass", "BeforeLoopSimplification");
+        PIC.addClassToPassName("AfterLoopSimplificationMarkerPass", "AfterLoopSimplification");
+        PIC.addClassToPassName("AfterLoopOptimizationMarkerPass", "AfterLoopOptimization");
+        PIC.addClassToPassName("BeforeScalarOptimizationMarkerPass", "BeforeScalarOptimization");
+        PIC.addClassToPassName("AfterScalarOptimizationMarkerPass", "AfterScalarOptimization");
+        PIC.addClassToPassName("BeforeVectorizationMarkerPass", "BeforeVectorization");
+        PIC.addClassToPassName("AfterVectorizationMarkerPass", "AfterVectorization");
+        PIC.addClassToPassName("BeforeIntrinsicLoweringMarkerPass", "BeforeIntrinsicLowering");
+        PIC.addClassToPassName("AfterIntrinsicLoweringMarkerPass", "AfterIntrinsicLowering");
+        PIC.addClassToPassName("BeforeCleanupMarkerPass", "BeforeCleanup");
+        PIC.addClassToPassName("AfterCleanupMarkerPass", "AfterCleanup");
+        PIC.addClassToPassName("AfterOptimizationMarkerPass", "AfterOptimization");
+    }
+
+    FunctionAnalysisManager createFAM(OptimizationLevel O, TargetMachine &TM) JL_NOTSAFEPOINT {
+
+        FunctionAnalysisManager FAM;
+        // Register the AA manager first so that our version is the one used.
+        FAM.registerPass([&] JL_NOTSAFEPOINT {
+            AAManager AA;
+            if (O.getSpeedupLevel() >= 2) {
+                AA.registerFunctionAnalysis<BasicAA>();
+                AA.registerFunctionAnalysis<ScopedNoAliasAA>();
+                AA.registerFunctionAnalysis<TypeBasedAA>();
+            }
+            TM.registerDefaultAliasAnalyses(AA);
+            return AA;
+        });
+        // Register our TargetLibraryInfoImpl.
+        FAM.registerPass([&] JL_NOTSAFEPOINT { return llvm::TargetIRAnalysis(TM.getTargetIRAnalysis()); });
+        FAM.registerPass([&] JL_NOTSAFEPOINT { return llvm::TargetLibraryAnalysis(llvm::TargetLibraryInfoImpl(TM.getTargetTriple())); });
+        return FAM;
+    }
+
+    ModulePassManager createMPM(PassBuilder &PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT {
+        ModulePassManager MPM;
+        buildPipeline(MPM, &PB, O, options);
+        return MPM;
+    }
+}
+
+NewPM::NewPM(std::unique_ptr<TargetMachine> TM, OptimizationLevel O, OptimizationOptions options) :
+    TM(std::move(TM)), O(O), options(options), TimePasses() {}
+
+
+NewPM::~NewPM() = default;
+
+AnalysisManagers::AnalysisManagers(TargetMachine &TM, PassBuilder &PB, OptimizationLevel O) : LAM(), FAM(createFAM(O, TM)), CGAM(), MAM() {
+    PB.registerLoopAnalyses(LAM);
+    PB.registerFunctionAnalyses(FAM);
+    PB.registerCGSCCAnalyses(CGAM);
+    PB.registerModuleAnalyses(MAM);
+    PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+}
+
+AnalysisManagers::AnalysisManagers(PassBuilder &PB) : LAM(), FAM(), CGAM(), MAM() {
+    PB.registerLoopAnalyses(LAM);
+    PB.registerFunctionAnalyses(FAM);
+    PB.registerCGSCCAnalyses(CGAM);
+    PB.registerModuleAnalyses(MAM);
+    PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+}
+
+AnalysisManagers::~AnalysisManagers() = default;
+
+void NewPM::run(Module &M) {
+    //We must recreate the analysis managers every time
+    //so that analyses from previous runs of the pass manager
+    //do not hang around for the next run
+#if JL_LLVM_VERSION >= 160000
+    StandardInstrumentations SI(M.getContext(),false);
+#else
+    StandardInstrumentations SI(false);
+#endif
+    FunctionAnalysisManager FAM(createFAM(O, *TM.get()));
+    PassInstrumentationCallbacks PIC;
+    adjustPIC(PIC);
+    TimePasses.registerCallbacks(PIC);
+    SI.registerCallbacks(PIC, &FAM);
+    SI.getTimePasses().setOutStream(nulls()); //TODO: figure out a better way of doing this
+    LoopAnalysisManager LAM;
+    CGSCCAnalysisManager CGAM;
+    ModuleAnalysisManager MAM;
+    PassBuilder PB(TM.get(), PipelineTuningOptions(), None, &PIC);
+    PB.registerLoopAnalyses(LAM);
+    PB.registerFunctionAnalyses(FAM);
+    PB.registerCGSCCAnalyses(CGAM);
+    PB.registerModuleAnalyses(MAM);
+    PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+    ModulePassManager MPM = createMPM(PB, O, options);
+#ifndef __clang_gcanalyzer__ /* the analyzer cannot prove we have not added instrumentation callbacks with safepoints */
+    MPM.run(M, MAM);
+#endif
+}
+
+void NewPM::printTimers() {
+    TimePasses.print();
+}
+
+OptimizationLevel getOptLevel(int optlevel) {
+    switch (std::min(std::max(optlevel, 0), 3)) {
+        case 0:
+            return OptimizationLevel::O0;
+        case 1:
+            return OptimizationLevel::O1;
+        case 2:
+            return OptimizationLevel::O2;
+        case 3:
+            return OptimizationLevel::O3;
+    }
+    llvm_unreachable("cannot get here!");
+}
+
+//This part is also basically stolen from LLVM's PassBuilder.cpp file
+static Optional<std::pair<OptimizationLevel, OptimizationOptions>> parseJuliaPipelineOptions(StringRef name) {
+    if (name.consume_front("julia")) {
+        auto O = OptimizationLevel::O2;
+        auto options = OptimizationOptions::defaults();
+        if (!name.empty() && (!name.consume_front("<") || !name.consume_back(">"))) {
+            assert(false && "Expected pass options to be enclosed in <>!");
+        }
+        std::map<StringRef, bool*> option_pointers = {
+#define OPTION(name) {#name, &options.name}
+            OPTION(lower_intrinsics),
+            OPTION(dump_native),
+            OPTION(external_use),
+            OPTION(llvm_only)
+#undef OPTION
+        };
+        while (!name.empty()) {
+            StringRef option;
+            std::tie(option, name) = name.split(';');
+            bool enable = !option.consume_front("no_");
+            auto it = option_pointers.find(option);
+            if (it == option_pointers.end()) {
+                if (option.consume_front("level=")) {
+                    int level = 2;
+                    if (option.getAsInteger(0, level)) {
+                        assert(false && "Non-integer passed to julia level!");
+                    }
+                    switch (std::min(std::max(level, 0), 3)) {
+                        case 0:
+                            O = OptimizationLevel::O0;
+                            break;
+                        case 1:
+                            O = OptimizationLevel::O1;
+                            break;
+                        case 2:
+                            O = OptimizationLevel::O2;
+                            break;
+                        case 3:
+                            O = OptimizationLevel::O3;
+                            break;
+                    }
+                } else {
+                    errs() << "Unable to find julia option '" << option << "'!";
+                    assert(false && "Invalid option passed to julia pass!");
+                }
+            } else {
+                *it->second = enable;
+            }
+        }
+        return {{O, options}};
+    }
+    return None;
+}
+
+bool verifyLLVMIR(const Module &M) JL_NOTSAFEPOINT {
+    JL_TIMING(VERIFY_IR, VERIFY_Module);
+    if (verifyModule(M, &errs())) {
+        errs() << "Failed to verify module '" << M.getModuleIdentifier() << "', dumping entire module!\n\n";
+        errs() << M << "\n";
+        return true;
+    }
+    return false;
+}
+
+bool verifyLLVMIR(const Function &F) JL_NOTSAFEPOINT {
+    JL_TIMING(VERIFY_IR, VERIFY_Function);
+    if (verifyFunction(F, &errs())) {
+        errs() << "Failed to verify function '" << F.getName() << "', dumping entire module!\n\n";
+        errs() << *F.getParent() << "\n";
+        return true;
+    }
+    return false;
+}
+
+bool verifyLLVMIR(const Loop &L) JL_NOTSAFEPOINT {
+    JL_TIMING(VERIFY_IR, VERIFY_Loop);
+    if (verifyFunction(*L.getHeader()->getParent(), &errs())) {
+        errs() << "Failed to verify loop '" << L << "', dumping entire module!\n\n";
+        errs() << *L.getHeader()->getModule() << "\n";
+        return true;
+    }
+    return false;
+}
+
+// new pass manager plugin
+
+// NOTE: Instead of exporting all the constructors in passes.h we could
+// forward the callbacks to the respective passes. LLVM seems to prefer this,
+// and when we add the full pass builder having them directly will be helpful.
+static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT {
+    auto PIC = PB.getPassInstrumentationCallbacks();
+    if (PIC) {
+        adjustPIC(*PIC);
+    }
+    PB.registerPipelineParsingCallback(
+        [](StringRef Name, FunctionPassManager &PM,
+           ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
+#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
+#include "llvm-julia-passes.inc"
+#undef FUNCTION_PASS
+            if (Name.consume_front("GCInvariantVerifier")) {
+                if (Name.consume_front("<") && Name.consume_back(">")) {
+                    bool strong = true;
+                    if (Name.consume_front("no-")) {
+                        strong = false;
+                    }
+                    if (Name == "strong") {
+                        PM.addPass(GCInvariantVerifierPass(strong));
+                        return true;
+                    }
+                }
+                return false;
+            }
+            return false;
+        });
+
+    PB.registerPipelineParsingCallback(
+        [](StringRef Name, ModulePassManager &PM,
+           ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
+#define MODULE_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
+#include "llvm-julia-passes.inc"
+#undef MODULE_PASS
+            if (Name.consume_front("LowerPTLSPass")) {
+                if (Name.consume_front("<") && Name.consume_back(">")) {
+                    bool imaging_mode = true;
+                    if (Name.consume_front("no-")) {
+                        imaging_mode = false;
+                    }
+                    if (Name == "imaging") {
+                        PM.addPass(LowerPTLSPass(imaging_mode));
+                        return true;
+                    }
+                }
+                return false;
+            }
+            if (Name.consume_front("JuliaMultiVersioning")) {
+                if (Name.consume_front("<") && Name.consume_back(">")) {
+                    bool external_use = true;
+                    if (Name.consume_front("no-")) {
+                        external_use = false;
+                    }
+                    if (Name == "external") {
+                        PM.addPass(MultiVersioningPass(external_use));
+                        return true;
+                    }
+                }
+                return false;
+            }
+            //Add full pipelines here
+            auto julia_options = parseJuliaPipelineOptions(Name);
+            if (julia_options) {
+                ModulePassManager pipeline;
+                buildPipeline(pipeline, nullptr, julia_options->first, julia_options->second);
+                PM.addPass(std::move(pipeline));
+                return true;
+            }
+            return false;
+        });
+
+    PB.registerPipelineParsingCallback(
+        [](StringRef Name, LoopPassManager &PM,
+           ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
+#define LOOP_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; }
+#include "llvm-julia-passes.inc"
+#undef LOOP_PASS
+            return false;
+        });
+}
+
+extern "C" JL_DLLEXPORT_CODEGEN
+void jl_register_passbuilder_callbacks_impl(void *PB) JL_NOTSAFEPOINT {
+    registerCallbacks(*static_cast<PassBuilder*>(PB));
+}
+
+extern "C" JL_DLLEXPORT_CODEGEN
+::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() JL_NOTSAFEPOINT {
+      return {LLVM_PLUGIN_API_VERSION, "Julia", "1", registerCallbacks};
+}
diff --git a/src/precompile.c b/src/precompile.c
index 7713a312f2a4c..c40e867ea699e 100644
--- a/src/precompile.c
+++ b/src/precompile.c
@@ -10,6 +10,7 @@
 #include "julia.h"
 #include "julia_internal.h"
 #include "julia_assert.h"
+#include "serialize.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -20,17 +21,84 @@ JL_DLLEXPORT int jl_generating_output(void)
     return jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputji || jl_options.outputasm;
 }
 
-static void *jl_precompile(int all);
+void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) {
+    // Write the source-text for the dependent files
+    if (udeps) {
+        // Go back and update the source-text position to point to the current position
+        int64_t posfile = ios_pos(f);
+        ios_seek(f, srctextpos);
+        write_uint64(f, posfile);
+        ios_seek_end(f);
+        // Each source-text file is written as
+        //   int32: length of abspath
+        //   char*: abspath
+        //   uint64: length of src text
+        //   char*: src text
+        // At the end we write int32(0) as a terminal sentinel.
+        size_t len = jl_array_nrows(udeps);
+        static jl_value_t *replace_depot_func = NULL;
+        if (!replace_depot_func)
+            replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path"));
+        ios_t srctext;
+        jl_value_t *deptuple = NULL;
+        JL_GC_PUSH2(&deptuple, &udeps);
+        for (size_t i = 0; i < len; i++) {
+            deptuple = jl_array_ptr_ref(udeps, i);
+            jl_value_t *depmod = jl_fieldref(deptuple, 0);  // module
+            // Dependencies declared with `include_dependency` are excluded
+            // because these may not be Julia code (and could be huge)
+            if (depmod != (jl_value_t*)jl_main_module) {
+                jl_value_t *abspath = jl_fieldref(deptuple, 1);  // file abspath
+                const char *abspathstr = jl_string_data(abspath);
+                if (!abspathstr[0])
+                    continue;
+                ios_t *srctp = ios_file(&srctext, abspathstr, 1, 0, 0, 0);
+                if (!srctp) {
+                    jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n",
+                              abspathstr);
+                    continue;
+                }
+
+                jl_value_t **replace_depot_args;
+                JL_GC_PUSHARGS(replace_depot_args, 2);
+                replace_depot_args[0] = replace_depot_func;
+                replace_depot_args[1] = abspath;
+                jl_task_t *ct = jl_current_task;
+                size_t last_age = ct->world_age;
+                ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+                jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 2);
+                ct->world_age = last_age;
+                JL_GC_POP();
+
+                size_t slen = jl_string_len(depalias);
+                write_int32(f, slen);
+                ios_write(f, jl_string_data(depalias), slen);
+                posfile = ios_pos(f);
+                write_uint64(f, 0);   // placeholder for length of this file in bytes
+                uint64_t filelen = (uint64_t) ios_copyall(f, &srctext);
+                ios_close(&srctext);
+                ios_seek(f, posfile);
+                write_uint64(f, filelen);
+                ios_seek_end(f);
+            }
+        }
+        JL_GC_POP();
+    }
+    write_int32(f, 0); // mark the end of the source text
+}
 
-void jl_write_compiler_output(void)
+JL_DLLEXPORT void jl_write_compiler_output(void)
 {
     if (!jl_generating_output()) {
         return;
     }
 
-    void *native_code = NULL;
-    if (!jl_options.incremental)
-        native_code = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL);
+    jl_task_wait_empty(); // wait for most work to finish (except possibly finalizers)
+    jl_gc_collect(JL_GC_FULL);
+    jl_gc_collect(JL_GC_INCREMENTAL); // sweep finalizers
+    jl_task_t *ct = jl_current_task;
+    jl_gc_enable_finalizers(ct, 0); // now disable finalizers, as they could schedule more work or make other unexpected changes to reachability
+    jl_task_wait_empty(); // then make sure we are the only thread alive that could be running user code past here
 
     if (!jl_module_init_order) {
         jl_printf(JL_STDERR, "WARNING: --output requested, but no modules defined during run\n");
@@ -38,11 +106,12 @@ void jl_write_compiler_output(void)
     }
 
     jl_array_t *worklist = jl_module_init_order;
-    JL_GC_PUSH1(&worklist);
+    jl_array_t *udeps = NULL;
+    JL_GC_PUSH2(&worklist, &udeps);
     jl_module_init_order = jl_alloc_vec_any(0);
-    int i, l = jl_array_len(worklist);
+    int i, l = jl_array_nrows(worklist);
     for (i = 0; i < l; i++) {
-        jl_value_t *m = jl_ptrarrayref(worklist, i);
+        jl_value_t *m = jl_array_ptr_ref(worklist, i);
         jl_value_t *f = jl_get_global((jl_module_t*)m, jl_symbol("__init__"));
         if (f) {
             jl_array_ptr_1d_push(jl_module_init_order, m);
@@ -53,367 +122,74 @@ void jl_write_compiler_output(void)
                 // since it's a slightly duplication of effort
                 jl_value_t *tt = jl_is_type(f) ? (jl_value_t*)jl_wrap_Type(f) : jl_typeof(f);
                 JL_GC_PUSH1(&tt);
-                tt = (jl_value_t*)jl_apply_tuple_type_v(&tt, 1);
+                tt = jl_apply_tuple_type_v(&tt, 1);
                 jl_compile_hint((jl_tupletype_t*)tt);
                 JL_GC_POP();
             }
         }
     }
 
-    if (jl_options.incremental) {
-        if (jl_options.outputji)
-            if (jl_save_incremental(jl_options.outputji, worklist))
-                jl_exit(1);
-        if (jl_options.outputbc || jl_options.outputunoptbc)
-            jl_printf(JL_STDERR, "WARNING: incremental output to a .bc file is not implemented\n");
-        if (jl_options.outputo)
-            jl_printf(JL_STDERR, "WARNING: incremental output to a .o file is not implemented\n");
-        if (jl_options.outputasm)
-            jl_printf(JL_STDERR, "WARNING: incremental output to a .s file is not implemented\n");
-    }
-    else {
-        ios_t *s = NULL;
-        if (jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm)
-            s = jl_create_system_image(native_code);
-
-        if (jl_options.outputji) {
-            if (s == NULL) {
-                jl_save_system_image(jl_options.outputji);
-            }
-            else {
-                ios_t f;
-                if (ios_file(&f, jl_options.outputji, 1, 1, 1, 1) == NULL)
-                    jl_errorf("cannot open system image file \"%s\" for writing", jl_options.outputji);
-                ios_write(&f, (const char*)s->buf, (size_t)s->size);
-                ios_close(&f);
-            }
-        }
-
-        if (jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm) {
-            assert(s);
-            jl_dump_native(native_code,
-                           jl_options.outputbc,
-                           jl_options.outputunoptbc,
-                           jl_options.outputo,
-                           jl_options.outputasm,
-                           (const char*)s->buf, (size_t)s->size);
-            jl_postoutput_hook();
-        }
-    }
-    for (size_t i = 0; i < jl_current_modules.size; i += 2) {
-        if (jl_current_modules.table[i + 1] != HT_NOTFOUND) {
-            jl_printf(JL_STDERR, "\nWARNING: detected unclosed module: ");
-            jl_static_show(JL_STDERR, (jl_value_t*)jl_current_modules.table[i]);
-            jl_printf(JL_STDERR, "\n  ** incremental compilation may be broken for this module **\n\n");
-        }
-    }
-    JL_GC_POP();
-}
-
-// f{<:Union{...}}(...) is a common pattern
-// and expanding the Union may give a leaf function
-static void _compile_all_tvar_union(jl_value_t *methsig)
-{
-    if (!jl_is_unionall(methsig) && jl_is_dispatch_tupletype(methsig)) {
-        // usually can create a specialized version of the function,
-        // if the signature is already a dispatch type
-        if (jl_compile_hint((jl_tupletype_t*)methsig))
-            return;
-    }
-
-    int tvarslen = jl_subtype_env_size(methsig);
-    jl_value_t *sigbody = methsig;
-    jl_value_t **roots;
-    JL_GC_PUSHARGS(roots, 1 + 2 * tvarslen);
-    jl_value_t **env = roots + 1;
-    int *idx = (int*)alloca(sizeof(int) * tvarslen);
-    int i;
-    for (i = 0; i < tvarslen; i++) {
-        assert(jl_is_unionall(sigbody));
-        idx[i] = 0;
-        env[2 * i] = (jl_value_t*)((jl_unionall_t*)sigbody)->var;
-        env[2 * i + 1] = jl_bottom_type; // initialize the list with Union{}, since T<:Union{} is always a valid option
-        sigbody = ((jl_unionall_t*)sigbody)->body;
-    }
-
-    for (i = 0; i < tvarslen; /* incremented by inner loop */) {
-        jl_value_t **sig = &roots[0];
-        JL_TRY {
-            // TODO: wrap in UnionAll for each tvar in env[2*i + 1] ?
-            // currently doesn't matter much, since jl_compile_hint doesn't work on abstract types
-            *sig = (jl_value_t*)jl_instantiate_type_with(sigbody, env, tvarslen);
-        }
-        JL_CATCH {
-            goto getnext; // sigh, we found an invalid type signature. should we warn the user?
-        }
-        if (!jl_has_concrete_subtype(*sig))
-            goto getnext; // signature wouldn't be callable / is invalid -- skip it
-        if (jl_is_concrete_type(*sig)) {
-            if (jl_compile_hint((jl_tupletype_t *)*sig))
-                goto getnext; // success
-        }
-
-    getnext:
-        for (i = 0; i < tvarslen; i++) {
-            jl_tvar_t *tv = (jl_tvar_t*)env[2 * i];
-            if (jl_is_uniontype(tv->ub)) {
-                size_t l = jl_count_union_components(tv->ub);
-                size_t j = idx[i];
-                if (j == l) {
-                    env[2 * i + 1] = jl_bottom_type;
-                    idx[i] = 0;
-                }
-                else {
-                    jl_value_t *ty = jl_nth_union_component(tv->ub, j);
-                    if (!jl_is_concrete_type(ty))
-                        ty = (jl_value_t*)jl_new_typevar(tv->name, tv->lb, ty);
-                    env[2 * i + 1] = ty;
-                    idx[i] = j + 1;
-                    break;
-                }
-            }
-            else {
-                env[2 * i + 1] = (jl_value_t*)tv;
-            }
-        }
-    }
-    JL_GC_POP();
-}
-
-// f(::Union{...}, ...) is a common pattern
-// and expanding the Union may give a leaf function
-static void _compile_all_union(jl_value_t *sig)
-{
-    jl_tupletype_t *sigbody = (jl_tupletype_t*)jl_unwrap_unionall(sig);
-    size_t count_unions = 0;
-    size_t i, l = jl_svec_len(sigbody->parameters);
-    jl_svec_t *p = NULL;
-    jl_value_t *methsig = NULL;
-
-    for (i = 0; i < l; i++) {
-        jl_value_t *ty = jl_svecref(sigbody->parameters, i);
-        if (jl_is_uniontype(ty))
-            ++count_unions;
-        else if (ty == jl_bottom_type)
-            return; // why does this method exist?
-        else if (jl_is_datatype(ty) && !jl_has_free_typevars(ty) &&
-                 ((!jl_is_kind(ty) && ((jl_datatype_t*)ty)->isconcretetype) ||
-                  ((jl_datatype_t*)ty)->name == jl_type_typename))
-            return; // no amount of union splitting will make this a leaftype signature
-    }
-
-    if (count_unions == 0 || count_unions >= 6) {
-        _compile_all_tvar_union(sig);
-        return;
-    }
-
-    int *idx = (int*)alloca(sizeof(int) * count_unions);
-    for (i = 0; i < count_unions; i++) {
-        idx[i] = 0;
-    }
-
-    JL_GC_PUSH2(&p, &methsig);
-    int idx_ctr = 0, incr = 0;
-    while (!incr) {
-        p = jl_alloc_svec_uninit(l);
-        for (i = 0, idx_ctr = 0, incr = 1; i < l; i++) {
-            jl_value_t *ty = jl_svecref(sigbody->parameters, i);
-            if (jl_is_uniontype(ty)) {
-                assert(idx_ctr < count_unions);
-                size_t l = jl_count_union_components(ty);
-                size_t j = idx[idx_ctr];
-                jl_svecset(p, i, jl_nth_union_component(ty, j));
-                ++j;
-                if (incr) {
-                    if (j == l) {
-                        idx[idx_ctr] = 0;
-                    }
-                    else {
-                        idx[idx_ctr] = j;
-                        incr = 0;
-                    }
-                }
-                ++idx_ctr;
-            }
-            else {
-                jl_svecset(p, i, ty);
-            }
-        }
-        methsig = (jl_value_t*)jl_apply_tuple_type(p);
-        methsig = jl_rewrap_unionall(methsig, sig);
-        _compile_all_tvar_union(methsig);
-    }
+    assert(jl_precompile_toplevel_module == NULL);
+    void *native_code = NULL;
 
-    JL_GC_POP();
-}
+    bool_t emit_native = jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm;
 
-static void _compile_all_deq(jl_array_t *found)
-{
-    int found_i, found_l = jl_array_len(found);
-    jl_printf(JL_STDERR, "found %d uncompiled methods for compile-all\n", (int)found_l);
-    jl_method_instance_t *mi = NULL;
-    jl_value_t *src = NULL;
-    JL_GC_PUSH2(&mi, &src);
-    for (found_i = 0; found_i < found_l; found_i++) {
-        if (found_i % (1 + found_l / 300) == 0 || found_i == found_l - 1) // show 300 progress steps, to show progress without overwhelming log files
-            jl_printf(JL_STDERR, " %d / %d\r", found_i + 1, found_l);
-        jl_typemap_entry_t *ml = (jl_typemap_entry_t*)jl_array_ptr_ref(found, found_i);
-        jl_method_t *m = ml->func.method;
-        if (m->source == NULL) // TODO: generic implementations of generated functions
-            continue;
-        mi = jl_get_unspecialized(m);
-        assert(mi == jl_atomic_load_relaxed(&m->unspecialized)); // make sure we didn't get tricked by a generated function, since we can't handle those
-        jl_code_instance_t *ucache = jl_get_method_inferred(mi, (jl_value_t*)jl_any_type, 1, ~(size_t)0);
-        if (jl_atomic_load_relaxed(&ucache->invoke) != NULL)
-            continue;
-        src = m->source;
-        assert(src);
-        // TODO: we could now enable storing inferred function pointers in the `unspecialized` cache
-        //src = jl_type_infer(mi, jl_atomic_load_acquire(&jl_world_counter), 1);
-        //if (jl_atomic_load_relaxed(&ucache->invoke) != NULL)
-        //    continue;
+    const char *outputji = jl_options.outputji;
 
-        // first try to create leaf signatures from the signature declaration and compile those
-        _compile_all_union((jl_value_t*)ml->sig);
-        // then also compile the generic fallback
-        jl_generate_fptr_for_unspecialized(ucache);
-    }
-    JL_GC_POP();
-    jl_printf(JL_STDERR, "\n");
-}
+    bool_t emit_split = outputji && emit_native;
 
-static int compile_all_enq__(jl_typemap_entry_t *ml, void *env)
-{
-    jl_array_t *found = (jl_array_t*)env;
-    // method definition -- compile template field
-    jl_method_t *m = ml->func.method;
-    if (m->source) {
-        // found a method to compile
-        jl_array_ptr_1d_push(found, (jl_value_t*)ml);
-    }
-    return 1;
-}
+    ios_t *s = NULL;
+    ios_t *z = NULL;
+    int64_t srctextpos = 0 ;
+    jl_create_system_image(emit_native ? &native_code : NULL,
+                           jl_options.incremental ? worklist : NULL,
+                           emit_split, &s, &z, &udeps, &srctextpos);
 
+    if (!emit_split)
+        z = s;
 
-static int compile_all_enq_(jl_methtable_t *mt, void *env)
-{
-    jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), compile_all_enq__, env);
-    return 1;
-}
+    ios_t f;
 
-static void jl_compile_all_defs(void)
-{
-    // this "found" array will contain
-    // TypeMapEntries for Methods and MethodInstances that need to be compiled
-    jl_array_t *m = jl_alloc_vec_any(0);
-    JL_GC_PUSH1(&m);
-    int _changes = -1;
-    int attempts = 0;
-    while (1) {
-        jl_foreach_reachable_mtable(compile_all_enq_, m);
-        size_t changes = jl_array_len(m);
-        if (!changes)
-            break;
-        if (changes == _changes) {
-            if (++attempts > 5) {
-                jl_printf(JL_STDERR, "unable to compile %d methods for compile-all\n", (int)changes);
-                break;
-            }
-        } else {
-            attempts = 0;
-        }
-        _compile_all_deq(m);
-        jl_array_del_end(m, changes);
-        _changes = changes;
+    if (outputji) {
+        if (ios_file(&f, outputji, 1, 1, 1, 1) == NULL)
+            jl_errorf("cannot open system image file \"%s\" for writing", outputji);
+        ios_write(&f, (const char *)s->buf, (size_t)s->size);
+        ios_close(s);
+        free(s);
     }
-    JL_GC_POP();
-}
 
-static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closure)
-{
-    assert(jl_is_method_instance(mi));
-    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
-    while (codeinst) {
-        int do_compile = 0;
-        if (jl_atomic_load_relaxed(&codeinst->invoke) != jl_fptr_const_return) {
-            if (codeinst->inferred && codeinst->inferred != jl_nothing &&
-                jl_ir_flag_inferred((jl_array_t*)codeinst->inferred) &&
-                !jl_ir_flag_inlineable((jl_array_t*)codeinst->inferred)) {
-                do_compile = 1;
-            }
-            else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) {
-                do_compile = 1;
-            }
-        }
-        if (do_compile) {
-            jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
-            return 1;
-        }
-        codeinst = jl_atomic_load_relaxed(&codeinst->next);
+    // jl_dump_native writes the clone_targets into `s`
+    // We need to postpone the srctext writing after that.
+    if (native_code) {
+        ios_t *targets = outputji ? &f : NULL;
+        // jl_dump_native will close and free z when appropriate
+        // this is a horrible abstraction, but
+        // this helps reduce live memory significantly
+        jl_dump_native(native_code,
+                        jl_options.outputbc,
+                        jl_options.outputunoptbc,
+                        jl_options.outputo,
+                        jl_options.outputasm,
+                        z, targets, NULL);
+        jl_postoutput_hook();
     }
-    return 1;
-}
 
-static int precompile_enq_all_specializations__(jl_typemap_entry_t *def, void *closure)
-{
-    jl_method_t *m = def->func.method;
-    if ((m->name == jl_symbol("__init__") || m->ccallable) && jl_is_dispatch_tupletype(m->sig)) {
-        // ensure `__init__()` and @ccallables get strongly-hinted, specialized, and compiled
-        jl_method_instance_t *mi = jl_specializations_get_linfo(m, m->sig, jl_emptysvec);
-        jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
-    }
-    else {
-        jl_svec_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations);
-        size_t i, l = jl_svec_len(specializations);
-        for (i = 0; i < l; i++) {
-            jl_value_t *mi = jl_svecref(specializations, i);
-            if (mi != jl_nothing)
-                precompile_enq_specialization_((jl_method_instance_t*)mi, closure);
+    if (outputji) {
+        if (jl_options.incremental) {
+            write_srctext(&f, udeps, srctextpos);
         }
+        ios_close(&f);
     }
-    if (m->ccallable)
-        jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)m->ccallable);
-    return 1;
-}
-
-static int precompile_enq_all_specializations_(jl_methtable_t *mt, void *env)
-{
-    return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), precompile_enq_all_specializations__, env);
-}
 
-static void *jl_precompile(int all)
-{
-    if (all)
-        jl_compile_all_defs();
-    // this "found" array will contain function
-    // type signatures that were inferred but haven't been compiled
-    jl_array_t *m = jl_alloc_vec_any(0);
-    jl_array_t *m2 = NULL;
-    jl_method_instance_t *mi = NULL;
-    JL_GC_PUSH3(&m, &m2, &mi);
-    jl_foreach_reachable_mtable(precompile_enq_all_specializations_, m);
-    m2 = jl_alloc_vec_any(0);
-    for (size_t i = 0; i < jl_array_len(m); i++) {
-        jl_value_t *item = jl_array_ptr_ref(m, i);
-        if (jl_is_method_instance(item)) {
-            mi = (jl_method_instance_t*)item;
-            size_t min_world = 0;
-            size_t max_world = ~(size_t)0;
-            if (!jl_isa_compileable_sig((jl_tupletype_t*)mi->specTypes, mi->def.method))
-                mi = jl_get_specialization1((jl_tupletype_t*)mi->specTypes, jl_atomic_load_acquire(&jl_world_counter), &min_world, &max_world, 0);
-            if (mi)
-                jl_array_ptr_1d_push(m2, (jl_value_t*)mi);
-        }
-        else {
-            assert(jl_is_simplevector(item));
-            assert(jl_svec_len(item) == 2);
-            jl_array_ptr_1d_push(m2, item);
+    for (size_t i = 0; i < jl_current_modules.size; i += 2) {
+        if (jl_current_modules.table[i + 1] != HT_NOTFOUND) {
+            jl_printf(JL_STDERR, "\nWARNING: detected unclosed module: ");
+            jl_static_show(JL_STDERR, (jl_value_t*)jl_current_modules.table[i]);
+            jl_printf(JL_STDERR, "\n  ** incremental compilation may be broken for this module **\n\n");
         }
     }
-    m = NULL;
-    void *native_code = jl_create_native(m2, NULL, NULL, 0);
     JL_GC_POP();
-    return native_code;
+    jl_gc_enable_finalizers(ct, 1);
 }
 
 #ifdef __cplusplus
diff --git a/src/precompile_utils.c b/src/precompile_utils.c
new file mode 100644
index 0000000000000..0203569f33c37
--- /dev/null
+++ b/src/precompile_utils.c
@@ -0,0 +1,321 @@
+// f{<:Union{...}}(...) is a common pattern
+// and expanding the Union may give a leaf function
+static void _compile_all_tvar_union(jl_value_t *methsig)
+{
+    int tvarslen = jl_subtype_env_size(methsig);
+    jl_value_t *sigbody = methsig;
+    jl_value_t **roots;
+    JL_GC_PUSHARGS(roots, 1 + 2 * tvarslen);
+    jl_value_t **env = roots + 1;
+    int *idx = (int*)alloca(sizeof(int) * tvarslen);
+    int i;
+    for (i = 0; i < tvarslen; i++) {
+        assert(jl_is_unionall(sigbody));
+        idx[i] = 0;
+        env[2 * i] = (jl_value_t*)((jl_unionall_t*)sigbody)->var;
+        env[2 * i + 1] = jl_bottom_type; // initialize the list with Union{}, since T<:Union{} is always a valid option
+        sigbody = ((jl_unionall_t*)sigbody)->body;
+    }
+
+    for (i = 0; i < tvarslen; /* incremented by inner loop */) {
+        jl_value_t **sig = &roots[0];
+        JL_TRY {
+            // TODO: wrap in UnionAll for each tvar in env[2*i + 1] ?
+            // currently doesn't matter much, since jl_compile_hint doesn't work on abstract types
+            *sig = (jl_value_t*)jl_instantiate_type_with(sigbody, env, tvarslen);
+        }
+        JL_CATCH {
+            goto getnext; // sigh, we found an invalid type signature. should we warn the user?
+        }
+        if (!jl_has_concrete_subtype(*sig))
+            goto getnext; // signature wouldn't be callable / is invalid -- skip it
+        if (jl_is_concrete_type(*sig)) {
+            if (jl_compile_hint((jl_tupletype_t *)*sig))
+                goto getnext; // success
+        }
+
+    getnext:
+        for (i = 0; i < tvarslen; i++) {
+            jl_tvar_t *tv = (jl_tvar_t*)env[2 * i];
+            if (jl_is_uniontype(tv->ub)) {
+                size_t l = jl_count_union_components(tv->ub);
+                size_t j = idx[i];
+                if (j == l) {
+                    env[2 * i + 1] = jl_bottom_type;
+                    idx[i] = 0;
+                }
+                else {
+                    jl_value_t *ty = jl_nth_union_component(tv->ub, j);
+                    if (!jl_is_concrete_type(ty))
+                        ty = (jl_value_t*)jl_new_typevar(tv->name, tv->lb, ty);
+                    env[2 * i + 1] = ty;
+                    idx[i] = j + 1;
+                    break;
+                }
+            }
+            else {
+                env[2 * i + 1] = (jl_value_t*)tv;
+            }
+        }
+    }
+    JL_GC_POP();
+}
+
+// f(::Union{...}, ...) is a common pattern
+// and expanding the Union may give a leaf function
+static void _compile_all_union(jl_value_t *sig)
+{
+    jl_tupletype_t *sigbody = (jl_tupletype_t*)jl_unwrap_unionall(sig);
+    size_t count_unions = 0;
+    size_t i, l = jl_svec_len(sigbody->parameters);
+    jl_svec_t *p = NULL;
+    jl_value_t *methsig = NULL;
+
+    for (i = 0; i < l; i++) {
+        jl_value_t *ty = jl_svecref(sigbody->parameters, i);
+        if (jl_is_uniontype(ty))
+            ++count_unions;
+        else if (ty == jl_bottom_type)
+            return; // why does this method exist?
+        else if (jl_is_datatype(ty) && !jl_has_free_typevars(ty) &&
+                 ((!jl_is_kind(ty) && ((jl_datatype_t*)ty)->isconcretetype) ||
+                  ((jl_datatype_t*)ty)->name == jl_type_typename))
+            return; // no amount of union splitting will make this a leaftype signature
+    }
+
+    if (count_unions == 0 || count_unions >= 6) {
+        _compile_all_tvar_union(sig);
+        return;
+    }
+
+    int *idx = (int*)alloca(sizeof(int) * count_unions);
+    for (i = 0; i < count_unions; i++) {
+        idx[i] = 0;
+    }
+
+    JL_GC_PUSH2(&p, &methsig);
+    int idx_ctr = 0, incr = 0;
+    while (!incr) {
+        p = jl_alloc_svec_uninit(l);
+        for (i = 0, idx_ctr = 0, incr = 1; i < l; i++) {
+            jl_value_t *ty = jl_svecref(sigbody->parameters, i);
+            if (jl_is_uniontype(ty)) {
+                assert(idx_ctr < count_unions);
+                size_t l = jl_count_union_components(ty);
+                size_t j = idx[idx_ctr];
+                jl_svecset(p, i, jl_nth_union_component(ty, j));
+                ++j;
+                if (incr) {
+                    if (j == l) {
+                        idx[idx_ctr] = 0;
+                    }
+                    else {
+                        idx[idx_ctr] = j;
+                        incr = 0;
+                    }
+                }
+                ++idx_ctr;
+            }
+            else {
+                jl_svecset(p, i, ty);
+            }
+        }
+        methsig = jl_apply_tuple_type(p, 1);
+        methsig = jl_rewrap_unionall(methsig, sig);
+        _compile_all_tvar_union(methsig);
+    }
+
+    JL_GC_POP();
+}
+
+static int compile_all_collect__(jl_typemap_entry_t *ml, void *env)
+{
+    jl_array_t *allmeths = (jl_array_t*)env;
+    jl_method_t *m = ml->func.method;
+    if (m->external_mt)
+        return 1;
+    if (m->source) {
+        // method has a non-generated definition; can be compiled generically
+        jl_array_ptr_1d_push(allmeths, (jl_value_t*)m);
+    }
+    return 1;
+}
+
+static int compile_all_collect_(jl_methtable_t *mt, void *env)
+{
+    jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), compile_all_collect__, env);
+    return 1;
+}
+
+static void jl_compile_all_defs(jl_array_t *mis)
+{
+    jl_array_t *allmeths = jl_alloc_vec_any(0);
+    JL_GC_PUSH1(&allmeths);
+
+    jl_foreach_reachable_mtable(compile_all_collect_, allmeths);
+
+    size_t i, l = jl_array_nrows(allmeths);
+    for (i = 0; i < l; i++) {
+        jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(allmeths, i);
+        if (jl_is_datatype(m->sig) && jl_isa_compileable_sig((jl_tupletype_t*)m->sig, jl_emptysvec, m)) {
+            // method has a single compilable specialization, e.g. its definition
+            // signature is concrete. in this case we can just hint it.
+            jl_compile_hint((jl_tupletype_t*)m->sig);
+        }
+        else {
+            // first try to create leaf signatures from the signature declaration and compile those
+            _compile_all_union(m->sig);
+
+            // finally, compile a fully generic fallback that can work for all arguments
+            jl_method_instance_t *unspec = jl_get_unspecialized(m);
+            if (unspec)
+                jl_array_ptr_1d_push(mis, (jl_value_t*)unspec);
+        }
+    }
+
+    JL_GC_POP();
+}
+
+static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closure)
+{
+    assert(jl_is_method_instance(mi));
+    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
+    while (codeinst) {
+        int do_compile = 0;
+        if (jl_atomic_load_relaxed(&codeinst->invoke) != jl_fptr_const_return) {
+            jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
+            if (inferred &&
+                inferred != jl_nothing &&
+                jl_ir_flag_inferred(inferred) &&
+                (jl_ir_inlining_cost(inferred) == UINT16_MAX)) {
+                do_compile = 1;
+            }
+            else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) {
+                do_compile = 1;
+            }
+        }
+        if (do_compile) {
+            jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
+            return 1;
+        }
+        codeinst = jl_atomic_load_relaxed(&codeinst->next);
+    }
+    return 1;
+}
+
+static int precompile_enq_all_specializations__(jl_typemap_entry_t *def, void *closure)
+{
+    jl_method_t *m = def->func.method;
+    if (m->external_mt)
+        return 1;
+    if ((m->name == jl_symbol("__init__") || m->ccallable) && jl_is_dispatch_tupletype(m->sig)) {
+        // ensure `__init__()` and @ccallables get strongly-hinted, specialized, and compiled
+        jl_method_instance_t *mi = jl_specializations_get_linfo(m, m->sig, jl_emptysvec);
+        jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
+    }
+    else {
+        jl_value_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations);
+        if (!jl_is_svec(specializations)) {
+            precompile_enq_specialization_((jl_method_instance_t*)specializations, closure);
+        }
+        else {
+            size_t i, l = jl_svec_len(specializations);
+            for (i = 0; i < l; i++) {
+                jl_value_t *mi = jl_svecref(specializations, i);
+                if (mi != jl_nothing)
+                    precompile_enq_specialization_((jl_method_instance_t*)mi, closure);
+            }
+        }
+    }
+    if (m->ccallable)
+        jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)m->ccallable);
+    return 1;
+}
+
+static int precompile_enq_all_specializations_(jl_methtable_t *mt, void *env)
+{
+    return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), precompile_enq_all_specializations__, env);
+}
+
+static void *jl_precompile_(jl_array_t *m, int external_linkage)
+{
+    jl_array_t *m2 = NULL;
+    jl_method_instance_t *mi = NULL;
+    JL_GC_PUSH2(&m2, &mi);
+    m2 = jl_alloc_vec_any(0);
+    for (size_t i = 0; i < jl_array_nrows(m); i++) {
+        jl_value_t *item = jl_array_ptr_ref(m, i);
+        if (jl_is_method_instance(item)) {
+            mi = (jl_method_instance_t*)item;
+            size_t min_world = 0;
+            size_t max_world = ~(size_t)0;
+            if (mi != jl_atomic_load_relaxed(&mi->def.method->unspecialized) && !jl_isa_compileable_sig((jl_tupletype_t*)mi->specTypes, mi->sparam_vals, mi->def.method))
+                mi = jl_get_specialization1((jl_tupletype_t*)mi->specTypes, jl_atomic_load_acquire(&jl_world_counter), &min_world, &max_world, 0);
+            if (mi)
+                jl_array_ptr_1d_push(m2, (jl_value_t*)mi);
+        }
+        else {
+            assert(jl_is_simplevector(item));
+            assert(jl_svec_len(item) == 2);
+            jl_array_ptr_1d_push(m2, item);
+        }
+    }
+    void *native_code = jl_create_native(m2, NULL, NULL, 0, 1, external_linkage,
+                                         jl_atomic_load_acquire(&jl_world_counter));
+    JL_GC_POP();
+    return native_code;
+}
+
+static void *jl_precompile(int all)
+{
+    // array of MethodInstances and ccallable aliases to include in the output
+    jl_array_t *m = jl_alloc_vec_any(0);
+    JL_GC_PUSH1(&m);
+    if (all)
+        jl_compile_all_defs(m);
+    jl_foreach_reachable_mtable(precompile_enq_all_specializations_, m);
+    void *native_code = jl_precompile_(m, 0);
+    JL_GC_POP();
+    return native_code;
+}
+
+static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_methods, jl_array_t *new_specializations)
+{
+    if (!worklist)
+        return NULL;
+    // this "found" array will contain function
+    // type signatures that were inferred but haven't been compiled
+    jl_array_t *m = jl_alloc_vec_any(0);
+    JL_GC_PUSH1(&m);
+    size_t i, n = jl_array_nrows(worklist);
+    for (i = 0; i < n; i++) {
+        jl_module_t *mod = (jl_module_t*)jl_array_ptr_ref(worklist, i);
+        assert(jl_is_module(mod));
+        foreach_mtable_in_module(mod, precompile_enq_all_specializations_, m);
+    }
+    n = jl_array_nrows(extext_methods);
+    for (i = 0; i < n; i++) {
+        jl_method_t *method = (jl_method_t*)jl_array_ptr_ref(extext_methods, i);
+        assert(jl_is_method(method));
+        jl_value_t *specializations = jl_atomic_load_relaxed(&method->specializations);
+        if (!jl_is_svec(specializations)) {
+            precompile_enq_specialization_((jl_method_instance_t*)specializations, m);
+        }
+        else {
+            size_t j, l = jl_svec_len(specializations);
+            for (j = 0; j < l; j++) {
+                jl_value_t *mi = jl_svecref(specializations, j);
+                if (mi != jl_nothing)
+                    precompile_enq_specialization_((jl_method_instance_t*)mi, m);
+            }
+        }
+    }
+    n = jl_array_nrows(new_specializations);
+    for (i = 0; i < n; i++) {
+        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_specializations, i);
+        precompile_enq_specialization_(ci->def, m);
+    }
+    void *native_code = jl_precompile_(m, 1);
+    JL_GC_POP();
+    return native_code;
+}
diff --git a/src/processor.cpp b/src/processor.cpp
index b9dfc2b7f0b4e..ee521bd05c8b2 100644
--- a/src/processor.cpp
+++ b/src/processor.cpp
@@ -4,6 +4,8 @@
 
 #include "llvm-version.h"
 #include <llvm/ADT/StringRef.h>
+#include <llvm/ADT/ArrayRef.h>
+#include <llvm/ADT/SmallVector.h>
 #include <llvm/Support/MathExtras.h>
 #include <llvm/Support/raw_ostream.h>
 
@@ -17,6 +19,12 @@
 
 #include "julia_assert.h"
 
+#ifndef _OS_WINDOWS_
+#include <dlfcn.h>
+#endif
+
+#include <iostream>
+
 // CPU target string is a list of strings separated by `;` each string starts with a CPU
 // or architecture name and followed by an optional list of features separated by `,`.
 // A "generic" or empty CPU name means the basic required feature set of the target ISA
@@ -101,13 +109,13 @@ static inline bool test_nbit(const T1 &bits, T2 _bitidx)
 }
 
 template<typename T>
-static inline void unset_bits(T &bits)
+static inline void unset_bits(T &bits) JL_NOTSAFEPOINT
 {
     (void)bits;
 }
 
 template<typename T, typename T1, typename... Rest>
-static inline void unset_bits(T &bits, T1 _bitidx, Rest... rest)
+static inline void unset_bits(T &bits, T1 _bitidx, Rest... rest) JL_NOTSAFEPOINT
 {
     auto bitidx = static_cast<uint32_t>(_bitidx);
     auto u32idx = bitidx / 32;
@@ -136,7 +144,7 @@ static inline void set_bit(T &bits, T1 _bitidx, bool val)
 template<size_t n>
 struct FeatureList {
     uint32_t eles[n];
-    uint32_t &operator[](size_t pos)
+    uint32_t &operator[](size_t pos) JL_NOTSAFEPOINT
     {
         return eles[pos];
     }
@@ -249,7 +257,7 @@ static inline void mask_features(const FeatureList<n> masks, uint32_t *features)
 }
 
 // Turn feature list to a string the LLVM accept
-static inline std::string join_feature_strs(const std::vector<std::string> &strs)
+static inline std::string join_feature_strs(const llvm::ArrayRef<std::string> &strs)
 {
     size_t nstr = strs.size();
     if (!nstr)
@@ -269,7 +277,7 @@ static inline void append_ext_features(std::string &features, const std::string
     features.append(ext_features);
 }
 
-static inline void append_ext_features(std::vector<std::string> &features,
+static inline void append_ext_features(llvm::SmallVectorImpl<std::string> &features,
                                        const std::string &ext_features)
 {
     if (ext_features.empty())
@@ -291,12 +299,6 @@ static inline void append_ext_features(std::vector<std::string> &features,
  * Target specific type/constant definitions, always enable.
  */
 
-struct FeatureName {
-    const char *name;
-    uint32_t bit; // bit index into a `uint32_t` array;
-    uint32_t llvmver; // 0 if it is available on the oldest LLVM version we support
-};
-
 template<typename CPU, size_t n>
 struct CPUSpec {
     const char *name;
@@ -393,13 +395,11 @@ JL_UNUSED static uint32_t find_feature_bit(const FeatureName *features, size_t n
 // 1. CPU ID is less stable (they are not bound to hardware/OS API)
 // 2. We need to support CPU names that are not recognized by us and therefore doesn't have an ID
 // 3. CPU name is trivial to parse
-static inline std::vector<uint8_t> serialize_target_data(llvm::StringRef name,
-                                                         uint32_t nfeature,
-                                                         const uint32_t *features_en,
-                                                         const uint32_t *features_dis,
-                                                         llvm::StringRef ext_features)
+static inline llvm::SmallVector<uint8_t, 0>
+serialize_target_data(llvm::StringRef name, uint32_t nfeature, const uint32_t *features_en,
+                      const uint32_t *features_dis, llvm::StringRef ext_features)
 {
-    std::vector<uint8_t> res;
+    llvm::SmallVector<uint8_t, 0> res;
     auto add_data = [&] (const void *data, size_t sz) {
         if (sz == 0)
             return;
@@ -420,10 +420,9 @@ static inline std::vector<uint8_t> serialize_target_data(llvm::StringRef name,
 }
 
 template<size_t n>
-static inline std::vector<uint8_t> serialize_target_data(llvm::StringRef name,
-                                                         const FeatureList<n> &features_en,
-                                                         const FeatureList<n> &features_dis,
-                                                         llvm::StringRef ext_features)
+static inline llvm::SmallVector<uint8_t, 0>
+serialize_target_data(llvm::StringRef name, const FeatureList<n> &features_en,
+                      const FeatureList<n> &features_dis, llvm::StringRef ext_features)
 {
     return serialize_target_data(name, n, &features_en[0], &features_dis[0], ext_features);
 }
@@ -442,7 +441,7 @@ struct TargetData {
 // In addition to the serialized data, the first `uint32_t` gives the number of targets saved
 // and each target has a `uint32_t` flag before the serialized target data.
 template<size_t n>
-static inline std::vector<TargetData<n>> deserialize_target_data(const uint8_t *data)
+static inline llvm::SmallVector<TargetData<n>, 0> deserialize_target_data(const uint8_t *data)
 {
     auto load_data = [&] (void *dest, size_t sz) {
         memcpy(dest, data, sz);
@@ -457,7 +456,7 @@ static inline std::vector<TargetData<n>> deserialize_target_data(const uint8_t *
     };
     uint32_t ntarget;
     load_data(&ntarget, 4);
-    std::vector<TargetData<n>> res(ntarget);
+    llvm::SmallVector<TargetData<n>, 0> res(ntarget);
     for (uint32_t i = 0; i < ntarget; i++) {
         auto &target = res[i];
         load_data(&target.en.flags, 4);
@@ -499,12 +498,12 @@ static inline int get_clone_base(const char *start, const char *end)
 // Parse cmdline string. This handles `clone_all` and `base` special features.
 // Other feature names will be passed to `feature_cb` for target dependent parsing.
 template<size_t n, typename F>
-static inline std::vector<TargetData<n>>
+static inline llvm::SmallVector<TargetData<n>, 0>
 parse_cmdline(const char *option, F &&feature_cb)
 {
     if (!option)
         option = "native";
-    std::vector<TargetData<n>> res;
+    llvm::SmallVector<TargetData<n>, 0> res;
     TargetData<n> arg{};
     auto reset_arg = [&] {
         res.push_back(arg);
@@ -611,118 +610,220 @@ parse_cmdline(const char *option, F &&feature_cb)
 
 // Cached version of command line parsing
 template<size_t n, typename F>
-static inline std::vector<TargetData<n>> &get_cmdline_targets(F &&feature_cb)
+static inline llvm::SmallVector<TargetData<n>, 0> &get_cmdline_targets(F &&feature_cb)
 {
-    static std::vector<TargetData<n>> targets =
+    static llvm::SmallVector<TargetData<n>, 0> targets =
         parse_cmdline<n>(jl_options.cpu_target, std::forward<F>(feature_cb));
     return targets;
 }
 
+extern "C" {
+void *image_pointers_unavailable;
+extern void * JL_WEAK_SYMBOL_OR_ALIAS_DEFAULT(image_pointers_unavailable) jl_image_pointers;
+}
+
 // Load sysimg, use the `callback` for dispatch and perform all relocations
 // for the selected target.
 template<typename F>
-static inline jl_sysimg_fptrs_t parse_sysimg(void *hdl, F &&callback)
-{
-    jl_sysimg_fptrs_t res = {nullptr, 0, nullptr, 0, nullptr, nullptr};
-
-    // .data base
-    char *data_base;
-    jl_dlsym(hdl, "jl_sysimg_gvars_base", (void**)&data_base, 1);
-    // .text base
-    char *text_base;
-    jl_dlsym(hdl, "jl_sysimg_fvars_base", (void**)&text_base, 1);
-    res.base = text_base;
-
-    int32_t *offsets;
-    jl_dlsym(hdl, "jl_sysimg_fvars_offsets", (void**)&offsets, 1);
-    uint32_t nfunc = offsets[0];
-    res.offsets = offsets + 1;
-
-    void *ids;
-    jl_dlsym(hdl, "jl_dispatch_target_ids", &ids, 1);
-    uint32_t target_idx = callback(ids);
-
-    int32_t *reloc_slots;
-    jl_dlsym(hdl, "jl_dispatch_reloc_slots", (void **)&reloc_slots, 1);
-    const uint32_t nreloc = reloc_slots[0];
-    reloc_slots += 1;
-    uint32_t *clone_idxs;
-    int32_t *clone_offsets;
-    jl_dlsym(hdl, "jl_dispatch_fvars_idxs", (void**)&clone_idxs, 1);
-    jl_dlsym(hdl, "jl_dispatch_fvars_offsets", (void**)&clone_offsets, 1);
-    uint32_t tag_len = clone_idxs[0];
-    clone_idxs += 1;
-
-    assert(tag_len & jl_sysimg_tag_mask);
-    std::vector<const int32_t*> base_offsets = {res.offsets};
-    // Find target
-    for (uint32_t i = 0;i < target_idx;i++) {
-        uint32_t len = jl_sysimg_val_mask & tag_len;
-        if (jl_sysimg_tag_mask & tag_len) {
-            if (i != 0)
-                clone_offsets += nfunc;
-            clone_idxs += len + 1;
-        }
-        else {
-            clone_offsets += len;
-            clone_idxs += len + 2;
-        }
-        tag_len = clone_idxs[-1];
-        base_offsets.push_back(tag_len & jl_sysimg_tag_mask ? clone_offsets : nullptr);
+static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
+{
+    JL_TIMING(LOAD_IMAGE, LOAD_Processor);
+    jl_image_t res{};
+
+    const jl_image_pointers_t *pointers;
+    if (hdl == jl_exe_handle && &jl_image_pointers != JL_WEAK_SYMBOL_DEFAULT(image_pointers_unavailable))
+        pointers = (const jl_image_pointers_t *)&jl_image_pointers;
+    else
+        jl_dlsym(hdl, "jl_image_pointers", (void**)&pointers, 1);
+
+    const void *ids = pointers->target_data;
+    jl_value_t* rejection_reason = nullptr;
+    JL_GC_PUSH1(&rejection_reason);
+    uint32_t target_idx = callback(ids, &rejection_reason);
+    if (target_idx == (uint32_t)-1) {
+        jl_error(jl_string_ptr(rejection_reason));
     }
+    JL_GC_POP();
 
-    bool clone_all = (tag_len & jl_sysimg_tag_mask) != 0;
-    // Fill in return value
-    if (clone_all) {
-        // clone_all
-        if (target_idx != 0) {
-            res.offsets = clone_offsets;
-        }
+    if (pointers->header->version != 1) {
+        jl_error("Image file is not compatible with this version of Julia");
     }
-    else {
-        uint32_t base_idx = clone_idxs[0];
-        assert(base_idx < target_idx);
-        if (target_idx != 0) {
-            res.offsets = base_offsets[base_idx];
-            assert(res.offsets);
+
+    llvm::SmallVector<const char *, 0> fvars(pointers->header->nfvars);
+    llvm::SmallVector<const char *, 0> gvars(pointers->header->ngvars);
+
+    llvm::SmallVector<std::pair<uint32_t, const char *>, 0> clones;
+
+    for (unsigned i = 0; i < pointers->header->nshards; i++) {
+        auto shard = pointers->shards[i];
+
+        // .data base
+        char *data_base = (char *)shard.gvar_base;
+
+        // .text base
+        const char *text_base = shard.fvar_base;
+
+        const int32_t *offsets = shard.fvar_offsets;
+        uint32_t nfunc = offsets[0];
+        assert(nfunc <= pointers->header->nfvars);
+        offsets++;
+        const int32_t *reloc_slots = shard.clone_slots;
+        const uint32_t nreloc = reloc_slots[0];
+        reloc_slots += 1;
+        const uint32_t *clone_idxs = shard.clone_idxs;
+        const int32_t *clone_offsets = shard.clone_offsets;
+        uint32_t tag_len = clone_idxs[0];
+        clone_idxs += 1;
+
+        assert(tag_len & jl_sysimg_tag_mask);
+        llvm::SmallVector<const int32_t*, 0> base_offsets = {offsets};
+        // Find target
+        for (uint32_t i = 0;i < target_idx;i++) {
+            uint32_t len = jl_sysimg_val_mask & tag_len;
+            if (jl_sysimg_tag_mask & tag_len) {
+                if (i != 0)
+                    clone_offsets += nfunc;
+                clone_idxs += len + 1;
+            }
+            else {
+                clone_offsets += len;
+                clone_idxs += len + 2;
+            }
+            tag_len = clone_idxs[-1];
+            base_offsets.push_back(tag_len & jl_sysimg_tag_mask ? clone_offsets : nullptr);
         }
-        clone_idxs++;
-        res.nclones = tag_len;
-        res.clone_offsets = clone_offsets;
-        res.clone_idxs = clone_idxs;
-    }
-    // Do relocation
-    uint32_t reloc_i = 0;
-    uint32_t len = jl_sysimg_val_mask & tag_len;
-    for (uint32_t i = 0; i < len; i++) {
-        uint32_t idx = clone_idxs[i];
-        int32_t offset;
+
+        bool clone_all = (tag_len & jl_sysimg_tag_mask) != 0;
+        // Fill in return value
         if (clone_all) {
-            offset = res.offsets[idx];
-        }
-        else if (idx & jl_sysimg_tag_mask) {
-            idx = idx & jl_sysimg_val_mask;
-            offset = clone_offsets[i];
+            // clone_all
+            if (target_idx != 0) {
+                offsets = clone_offsets;
+            }
         }
         else {
-            continue;
+            uint32_t base_idx = clone_idxs[0];
+            assert(base_idx < target_idx);
+            if (target_idx != 0) {
+                offsets = base_offsets[base_idx];
+                assert(offsets);
+            }
+            clone_idxs++;
+            unsigned start = clones.size();
+            clones.resize(start + tag_len);
+            auto idxs = shard.fvar_idxs;
+            for (unsigned i = 0; i < tag_len; i++) {
+                clones[start + i] = {(clone_idxs[i] & ~jl_sysimg_val_mask) | idxs[clone_idxs[i] & jl_sysimg_val_mask], clone_offsets[i] + text_base};
+            }
         }
-        bool found = false;
-        for (; reloc_i < nreloc; reloc_i++) {
-            auto reloc_idx = ((const uint32_t*)reloc_slots)[reloc_i * 2];
-            if (reloc_idx == idx) {
-                found = true;
-                auto slot = (const void**)(data_base + reloc_slots[reloc_i * 2 + 1]);
-                *slot = offset + res.base;
+        // Do relocation
+        uint32_t reloc_i = 0;
+        uint32_t len = jl_sysimg_val_mask & tag_len;
+        for (uint32_t i = 0; i < len; i++) {
+            uint32_t idx = clone_idxs[i];
+            int32_t offset;
+            if (clone_all) {
+                offset = offsets[idx];
             }
-            else if (reloc_idx > idx) {
-                break;
+            else if (idx & jl_sysimg_tag_mask) {
+                idx = idx & jl_sysimg_val_mask;
+                offset = clone_offsets[i];
             }
+            else {
+                continue;
+            }
+            bool found = false;
+            for (; reloc_i < nreloc; reloc_i++) {
+                auto reloc_idx = ((const uint32_t*)reloc_slots)[reloc_i * 2];
+                if (reloc_idx == idx) {
+                    found = true;
+                    auto slot = (const void**)(data_base + reloc_slots[reloc_i * 2 + 1]);
+                    assert(slot);
+                    *slot = offset + text_base;
+                }
+                else if (reloc_idx > idx) {
+                    break;
+                }
+            }
+            assert(found && "Cannot find GOT entry for cloned function.");
+            (void)found;
+        }
+
+        auto fidxs = shard.fvar_idxs;
+        for (uint32_t i = 0; i < nfunc; i++) {
+            fvars[fidxs[i]] = text_base + offsets[i];
         }
-        assert(found && "Cannot find GOT entry for cloned function.");
-        (void)found;
+
+        auto gidxs = shard.gvar_idxs;
+        unsigned ngvars = shard.gvar_offsets[0];
+        assert(ngvars <= pointers->header->ngvars);
+        for (uint32_t i = 0; i < ngvars; i++) {
+            gvars[gidxs[i]] = data_base + shard.gvar_offsets[i+1];
+        }
+    }
+
+    if (!fvars.empty()) {
+        auto offsets = (int32_t *) malloc(sizeof(int32_t) * fvars.size());
+        res.fptrs.base = fvars[0];
+        for (size_t i = 0; i < fvars.size(); i++) {
+            assert(fvars[i] && "Missing function pointer!");
+            offsets[i] = fvars[i] - res.fptrs.base;
+        }
+        res.fptrs.offsets = offsets;
+        res.fptrs.noffsets = fvars.size();
     }
 
+    if (!gvars.empty()) {
+        auto offsets = (int32_t *) malloc(sizeof(int32_t) * gvars.size());
+        res.gvars_base = (uintptr_t *)gvars[0];
+        for (size_t i = 0; i < gvars.size(); i++) {
+            assert(gvars[i] && "Missing global variable pointer!");
+            offsets[i] = gvars[i] - (const char *)res.gvars_base;
+        }
+        res.gvars_offsets = offsets;
+        res.ngvars = gvars.size();
+    }
+
+    if (!clones.empty()) {
+        assert(!fvars.empty());
+        std::sort(clones.begin(), clones.end(),
+            [](const std::pair<uint32_t, const char *> &a, const std::pair<uint32_t, const char *> &b) {
+                return (a.first & jl_sysimg_val_mask) < (b.first & jl_sysimg_val_mask);
+        });
+        auto clone_offsets = (int32_t *) malloc(sizeof(int32_t) * clones.size());
+        auto clone_idxs = (uint32_t *) malloc(sizeof(uint32_t) * clones.size());
+        for (size_t i = 0; i < clones.size(); i++) {
+            clone_idxs[i] = clones[i].first;
+            clone_offsets[i] = clones[i].second - res.fptrs.base;
+        }
+        res.fptrs.clone_idxs = clone_idxs;
+        res.fptrs.clone_offsets = clone_offsets;
+        res.fptrs.nclones = clones.size();
+    }
+
+#ifdef _OS_WINDOWS_
+    res.base = (intptr_t)hdl;
+#else
+    Dl_info dlinfo;
+    if (dladdr((void*)pointers, &dlinfo) != 0) {
+        res.base = (intptr_t)dlinfo.dli_fbase;
+    }
+    else {
+        res.base = 0;
+    }
+#endif
+
+    {
+        void *pgcstack_func_slot = pointers->ptls->pgcstack_func_slot;
+        void *pgcstack_key_slot = pointers->ptls->pgcstack_key_slot;
+        jl_pgcstack_getkey((jl_get_pgcstack_func**)pgcstack_func_slot, (jl_pgcstack_key_t*)pgcstack_key_slot);
+
+        size_t *tls_offset_idx = pointers->ptls->tls_offset;
+        *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset);
+    }
+
+    res.jl_small_typeof = pointers->jl_small_typeof;
+
     return res;
 }
 
@@ -734,20 +835,24 @@ static inline void check_cmdline(T &&cmdline, bool imaging)
     // sysimg means. Make it an error for now.
     if (!imaging) {
         if (cmdline.size() > 1) {
-            jl_error("More than one command line CPU targets specified "
-                     "without a `--output-` flag specified");
+            jl_safe_printf("More than one command line CPU targets specified "
+                      "without a `--output-` flag specified");
+            exit(1);
         }
         if (cmdline[0].en.flags & JL_TARGET_CLONE_ALL) {
-            jl_error("\"clone_all\" feature specified "
-                     "without a `--output-` flag specified");
+            jl_safe_printf("\"clone_all\" feature specified "
+                      "without a `--output-` flag specified");
+            exit(1);
         }
         if (cmdline[0].en.flags & JL_TARGET_OPTSIZE) {
-            jl_error("\"opt_size\" feature specified "
-                     "without a `--output-` flag specified");
+            jl_safe_printf("\"opt_size\" feature specified "
+                      "without a `--output-` flag specified");
+            exit(1);
         }
         if (cmdline[0].en.flags & JL_TARGET_MINSIZE) {
-            jl_error("\"min_size\" feature specified "
-                     "without a `--output-` flag specified");
+            jl_safe_printf("\"min_size\" feature specified "
+                      "without a `--output-` flag specified");
+            exit(1);
         }
     }
 }
@@ -760,17 +865,20 @@ struct SysimgMatch {
 // Find the best match in the sysimg.
 // Select the best one based on the largest vector register and largest compatible feature set.
 template<typename S, typename T, typename F>
-static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_vector_size)
+static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_vector_size, jl_value_t **rejection_reason)
 {
     SysimgMatch match;
     bool match_name = false;
     int feature_size = 0;
+    llvm::SmallVector<const char *, 0> rejection_reasons;
+    rejection_reasons.reserve(sysimg.size());
     for (uint32_t i = 0; i < sysimg.size(); i++) {
         auto &imgt = sysimg[i];
         if (!(imgt.en.features & target.dis.features).empty()) {
             // Check sysimg enabled features against runtime disabled features
             // This is valid (and all what we can do)
             // even if one or both of the targets are unknown.
+            rejection_reasons.push_back("Rejecting this target due to use of runtime-disabled features\n");
             continue;
         }
         if (imgt.name == target.name) {
@@ -781,25 +889,44 @@ static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_v
             }
         }
         else if (match_name) {
+            rejection_reasons.push_back("Rejecting this target since another target has a cpu name match\n");
             continue;
         }
         int new_vsz = max_vector_size(imgt.en.features);
-        if (match.vreg_size > new_vsz)
+        if (match.vreg_size > new_vsz) {
+            rejection_reasons.push_back("Rejecting this target since another target has a larger vector register size\n");
             continue;
+        }
         int new_feature_size = imgt.en.features.nbits();
         if (match.vreg_size < new_vsz) {
             match.best_idx = i;
             match.vreg_size = new_vsz;
             feature_size = new_feature_size;
+            rejection_reasons.push_back("Updating best match to this target due to larger vector register size\n");
             continue;
         }
-        if (new_feature_size < feature_size)
+        if (new_feature_size < feature_size) {
+            rejection_reasons.push_back("Rejecting this target since another target has a larger feature set\n");
             continue;
+        }
         match.best_idx = i;
         feature_size = new_feature_size;
+        rejection_reasons.push_back("Updating best match to this target\n");
+    }
+    if (match.best_idx == (uint32_t)-1) {
+        // Construct a nice error message for debugging purposes
+        std::string error_msg = "Unable to find compatible target in cached code image.\n";
+        for (size_t i = 0; i < rejection_reasons.size(); i++) {
+            error_msg += "Target ";
+            error_msg += std::to_string(i);
+            error_msg += " (";
+            error_msg += sysimg[i].name;
+            error_msg += "): ";
+            error_msg += rejection_reasons[i];
+        }
+        if (rejection_reason)
+            *rejection_reason = jl_pchar_to_string(error_msg.data(), error_msg.size());
     }
-    if (match.best_idx == (uint32_t)-1)
-        jl_error("Unable to find compatible target in system image.");
     return match;
 }
 
@@ -851,3 +978,30 @@ static inline void dump_cpu_spec(uint32_t cpu, const FeatureList<n> &features,
 #include "processor_fallback.cpp"
 
 #endif
+
+extern "C" JL_DLLEXPORT jl_value_t* jl_reflect_clone_targets() {
+    auto specs = jl_get_llvm_clone_targets();
+    const uint32_t base_flags = 0;
+    llvm::SmallVector<uint8_t, 0> data;
+    auto push_i32 = [&] (uint32_t v) {
+        uint8_t buff[4];
+        memcpy(buff, &v, 4);
+        data.insert(data.end(), buff, buff + 4);
+    };
+    push_i32(specs.size());
+    for (uint32_t i = 0; i < specs.size(); i++) {
+        push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME));
+        auto &specdata = specs[i].data;
+        data.insert(data.end(), specdata.begin(), specdata.end());
+    }
+
+    jl_value_t *arr = (jl_value_t*)jl_alloc_array_1d(jl_array_uint8_type, data.size());
+    uint8_t *out = jl_array_data(arr, uint8_t);
+    memcpy(out, data.data(), data.size());
+    return arr;
+}
+
+extern "C" JL_DLLEXPORT void jl_reflect_feature_names(const FeatureName **fnames, size_t *nf) {
+    *fnames = feature_names;
+    *nf = nfeature_names;
+}
diff --git a/src/processor.h b/src/processor.h
index f3b571cf9b937..a4c8deb9a8796 100644
--- a/src/processor.h
+++ b/src/processor.h
@@ -14,82 +14,9 @@
 extern "C" {
 #endif
 
-/**
- * Related sysimg exported symbols
- *
- * In the following text, function refers to an abstract entity.
- * It corresponds to a `Function` that we emit in the codegen, and there might be multiple copies
- * of it in the system image. Only one of those copies will be used in a given session.
- * Function pointers refer to a real piece of code in the system image.
- * Each function might have multiple function pointers in the system image
- * and each function pointer will correspond to only one function.
- *
- * # Global function and base pointers
- * `jl_sysimg_gvars_base`:
- *     The address of this symbol is the base data pointer
- *     (all other data pointers are stored as offsets to this address)
- * `jl_sysimg_fvars_base`:
- *     The address of this symbol is the base function pointer
- *     (all other function pointers are stored as offsets to this address)
- * `jl_sysimg_fvars_offsets`: [static data]
- *     The array of function pointer offsets (`int32_t`) from the base pointer.
- *     This includes all julia functions in sysimg as well as all other functions that are cloned.
- *     The default function pointer is used if the function is cloned.
- *     The first element is the size of the array, which should **NOT** be used as the number
- *     of julia functions in the sysimg.
- *     Each entry in this array uniquely identifies a function we are interested in
- *     (the function may have multiple function pointers corresponding to different versions).
- *     In other sysimg info, all references to functions are stored as their `uint32_t` index
- *     in this array.
- *
- * # Target data and dispatch slots (Only needed by runtime during loading)
- * `jl_dispatch_target_ids`: [static data] serialize target data.
- *     This contains the number of targets which is needed to decode `jl_dispatch_fvars_idxs`
- *     in addition to the name and feature set of each target.
- * `jl_dispatch_reloc_slots`: [static data] location and index of relocation slots.
- *     Stored as pairs of function indices and `int32_t` offsets from `jl_sysimg_gvars_base`.
- *     The first element is an `uint32_t` giving the number of relocations.
- *     This is needed for functions whose address is used in a way that requires dispatch.
- *     We currently only support one type of relocation (i.e. absolute pointer) which is enough
- *     for all use in functions as well as GOT slot (for "PLT" callback).
- *     Note that not all functions being cloned are assigned a slot.
- *     This array is sorted by the function indices.
- *     There can be more than one slot per-function,
- *     i.e. there can be duplicated function indices.
- *
- * # Target functions
- * `jl_dispatch_fvars_idxs`: [static data] Target-specific function indices.
- *     For each target, this includes a tagged `uint32_t` length, an optional `uint32_t` index
- *     of the base target followed by an array of tagged function indices.
- *     The base target index is required to be smaller than the index of the current target
- *     and must be the default (`0`) or a `clone_all` target.
- *     If it's not `0`, the function pointer array for the `clone_all` target will be used as
- *     the base function pointer offsets instead.
- *     The tag bits for both the length and the indices are the top bit.
- *     A tagged length indicates that all of the functions are cloned and the indices follows
- *     are the ones that requires relocation. The base target index is omitted in this case.
- *     Otherwise, the length is the total number of functions that we are interested in
- *     for this target, which includes all cloned julia functions and
- *     all other cloned functions that requires relocation.
- *     A tagged index means that the function pointer should be filled into the GOT slots
- *     identified by `jl_dispatch_reloc_slots`. There could be more than one slot per function.
- *     (Note that a tagged index could corresponds to a functions pointer that's the same as
- *     the base one since this is the only way we currently represent relocations.)
- *     A tagged length implicitly tags all the indices and the indices will not have the tag bit
- *     set. The lengths in this variable is needed to decode `jl_dispatch_fvars_offsets`.
- * `jl_dispatch_fvars_offsets`: [static data] Target-specific function pointer offsets.
- *     This contains all the cloned functions that we are interested in and it needs to be decoded
- *     and used along with `jl_dispatch_fvars_idxs`.
- *     For the default target, there's no entries in this variable, if there's any relocations
- *     needed for the default target, the function pointers are taken from the global offset
- *     arrays directly.
- *     For a `clone_all` target (i.e. with the length in `jl_dispatch_fvars_idxs` tagged), this
- *     variable contains an offset array of the same length as the global one. Only the indices
- *     appearing in `jl_dispatch_fvars_idxs` need relocation and the dispatch code should return
- *     this array as the original/base function offsets.
- *     For other targets, this variable contains an offset array with the length defined in
- *     `jl_dispatch_fvars_idxs`. Tagged indices need relocations.
- */
+// Image metadata
+// Every image exports a `jl_image_pointers_t` as a global symbol `jl_image_pointers`.
+// This symbol acts as a root for all other code-related symbols in the image.
 
 enum {
     JL_TARGET_VEC_CALL = 1 << 0,
@@ -112,6 +39,10 @@ enum {
     JL_TARGET_MINSIZE = 1 << 7,
     // Clone when the function queries CPU features
     JL_TARGET_CLONE_CPU = 1 << 8,
+    // Clone when the function uses fp16
+    JL_TARGET_CLONE_FLOAT16 = 1 << 9,
+    // Clone when the function uses bf16
+    JL_TARGET_CLONE_BFLOAT16 = 1 << 10,
 };
 
 #define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) JL_FEATURE_DEF(name, bit, llvmver)
@@ -133,7 +64,7 @@ JL_DLLEXPORT int jl_test_cpu_feature(jl_cpu_feature_t feature);
 static const uint32_t jl_sysimg_tag_mask = 0x80000000u;
 static const uint32_t jl_sysimg_val_mask = ~((uint32_t)0x80000000u);
 
-typedef struct _jl_sysimg_fptrs_t {
+typedef struct _jl_image_fptrs_t {
     // base function pointer
     const char *base;
     // number of functions
@@ -151,7 +82,131 @@ typedef struct _jl_sysimg_fptrs_t {
     const int32_t *clone_offsets;
     // sorted indices of the cloned functions (including the tag bit)
     const uint32_t *clone_idxs;
-} jl_sysimg_fptrs_t;
+} jl_image_fptrs_t;
+
+typedef struct {
+    uint64_t base;
+    uintptr_t *gvars_base;
+    const int32_t *gvars_offsets;
+    uint32_t ngvars;
+    jl_image_fptrs_t fptrs;
+    void **jl_small_typeof;
+} jl_image_t;
+
+// The header for each image
+// Details important counts about the image
+typedef struct {
+    // The version of the image format
+    // Most up-to-date version is 1
+    uint32_t version;
+    // The number of shards in this image
+    uint32_t nshards;
+    // The total number of fvars in this image among all shards
+    uint32_t nfvars;
+    // The total number of gvars in this image among all shards
+    uint32_t ngvars;
+} jl_image_header_t;
+
+// Per-shard data for image shards. Each image contains header->nshards of these.
+typedef struct {
+
+    // This is the base function pointer
+    // (all other function pointers are stored as offsets to this address)
+    const char *fvar_base;
+    // The array of function pointer offsets (`int32_t`) from the base pointer.
+    // This includes all julia functions in sysimg as well as all other functions that are cloned.
+    // The default function pointer is used if the function is cloned.
+    // The first element is the size of the array, which should **NOT** be used as the number
+    // of julia functions in the sysimg.
+    // Each entry in this array uniquely identifies a function we are interested in
+    // (the function may have multiple function pointers corresponding to different versions).
+    // In other sysimg info, all references to functions are stored as their `uint32_t` index
+    // in this array.
+    const int32_t *fvar_offsets;
+    // This is the mapping of shard function index -> global function index
+    // staticdata.c relies on the same order of functions in the global function array being
+    // the same as what it saw when serializing the global function array. However, partitioning
+    // into multiple shards will cause functions to be reordered. This array is used to map
+    // back to the original function array for loading.
+    const uint32_t *fvar_idxs;
+    // This is the base data pointer
+    // (all other data pointers in this shard are stored as offsets to this address)
+    uintptr_t *gvar_base;
+    // This array of global variable offsets (`int32_t`) from the base pointer.
+    // Similar to fvar_offsets, but for gvars
+    const int32_t *gvar_offsets;
+    // This is the mapping of shard global variable index -> global global variable index
+    // Similar to fvar_idxs, but for gvars
+    const uint32_t *gvar_idxs;
+    // location and index of relocation slots.
+    // Stored as pairs of function indices and `int32_t` offsets from `jl_sysimg_gvars_base`.
+    // The first element is an `uint32_t` giving the number of relocations.
+    // This is needed for functions whose address is used in a way that requires dispatch.
+    // We currently only support one type of relocation (i.e. absolute pointer) which is enough
+    // for all use in functions as well as GOT slot (for "PLT" callback).
+    // Note that not all functions being cloned are assigned a slot.
+    // This array is sorted by the function indices.
+    // There can be more than one slot per-function,
+    // i.e. there can be duplicated function indices.
+    const int32_t *clone_slots;
+    //  Target-specific function pointer offsets.
+    //  This contains all the cloned functions that we are interested in and it needs to be decoded
+    //  and used along with `jl_dispatch_fvars_idxs`.
+    //  For the default target, there's no entries in this variable, if there's any relocations
+    //  needed for the default target, the function pointers are taken from the global offset
+    //  arrays directly.
+    //  For a `clone_all` target (i.e. with the length in `jl_dispatch_fvars_idxs` tagged), this
+    //  variable contains an offset array of the same length as the global one. Only the indices
+    //  appearing in `jl_dispatch_fvars_idxs` need relocation and the dispatch code should return
+    //  this array as the original/base function offsets.
+    //  For other targets, this variable contains an offset array with the length defined in
+    //  `jl_dispatch_fvars_idxs`. Tagged indices need relocations.
+    const int32_t *clone_offsets;
+    //  Target-specific function indices.
+    //  For each target, this includes a tagged `uint32_t` length, an optional `uint32_t` index
+    //  of the base target followed by an array of tagged function indices.
+    //  The base target index is required to be smaller than the index of the current target
+    //  and must be the default (`0`) or a `clone_all` target.
+    //  If it's not `0`, the function pointer array for the `clone_all` target will be used as
+    //  the base function pointer offsets instead.
+    //  The tag bits for both the length and the indices are the top bit.
+    //  A tagged length indicates that all of the functions are cloned and the indices follows
+    //  are the ones that requires relocation. The base target index is omitted in this case.
+    //  Otherwise, the length is the total number of functions that we are interested in
+    //  for this target, which includes all cloned julia functions and
+    //  all other cloned functions that requires relocation.
+    //  A tagged index means that the function pointer should be filled into the GOT slots
+    //  identified by `jl_dispatch_reloc_slots`. There could be more than one slot per function.
+    //  (Note that a tagged index could corresponds to a functions pointer that's the same as
+    //  the base one since this is the only way we currently represent relocations.)
+    //  A tagged length implicitly tags all the indices and the indices will not have the tag bit
+    //  set. The lengths in this variable is needed to decode `jl_dispatch_fvars_offsets`.
+    const uint32_t *clone_idxs;
+} jl_image_shard_t;
+
+// The TLS data for each image
+typedef struct {
+    void *pgcstack_func_slot;
+    void *pgcstack_key_slot;
+    size_t *tls_offset;
+} jl_image_ptls_t;
+
+//The root struct for images, points to all the other globals
+typedef struct {
+    // The image header, contains numerical global data
+    const jl_image_header_t *header;
+    // The shard table, contains per-shard data
+    const jl_image_shard_t *shards; // points to header->nshards length array
+    // The TLS data pointer
+    const jl_image_ptls_t *ptls;
+    // A copy of jl_small_typeof[]
+    void **jl_small_typeof;
+
+    //  serialized target data
+    //  This contains the number of targets
+    //  in addition to the name and feature set of each target.
+    const void *target_data;
+} jl_image_pointers_t;
 
 /**
  * Initialize the processor dispatch system with sysimg `hdl` (also initialize the sysimg itself).
@@ -163,13 +218,19 @@ typedef struct _jl_sysimg_fptrs_t {
  *
  * Return the data about the function pointers selected.
  */
-jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl);
+jl_image_t jl_init_processor_sysimg(void *hdl);
+jl_image_t jl_init_processor_pkgimg(void *hdl);
 
 // Return the name of the host CPU as a julia string.
 JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void);
+// Return the features of the host CPU as a julia string.
+JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void);
 // Dump the name and feature set of the host CPU
+JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits);
+// Check if the CPU has native FMA instructions;
 // For debugging only
 JL_DLLEXPORT void jl_dump_host_cpu(void);
+JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char* data);
 
 JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero);
 JL_DLLEXPORT int32_t jl_get_zero_subnormals(void);
@@ -190,14 +251,14 @@ extern JL_DLLEXPORT bool jl_processor_print_help;
  * If the detected/specified CPU name is not available on the LLVM version specified,
  * a fallback CPU name will be used. Unsupported features will be ignored.
  */
-extern "C" JL_DLLEXPORT std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags);
+extern "C" JL_DLLEXPORT std::pair<std::string,llvm::SmallVector<std::string, 0>> jl_get_llvm_target(bool imaging, uint32_t &flags) JL_NOTSAFEPOINT;
 
 /**
  * Returns the CPU name and feature string to be used by LLVM disassembler.
  *
  * This will return a generic CPU name and a full feature string.
  */
-extern "C" JL_DLLEXPORT const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void);
+extern "C" JL_DLLEXPORT const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void) JL_NOTSAFEPOINT;
 
 struct jl_target_spec_t {
     // LLVM target name
@@ -205,7 +266,7 @@ struct jl_target_spec_t {
     // LLVM feature string
     std::string cpu_features;
     // serialized identification data
-    std::vector<uint8_t> data;
+    llvm::SmallVector<uint8_t, 0> data;
     // Clone condition.
     uint32_t flags;
     // Base target index.
@@ -214,9 +275,18 @@ struct jl_target_spec_t {
 /**
  * Return the list of targets to clone
  */
-extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void);
-std::string jl_get_cpu_name_llvm(void);
-std::string jl_get_cpu_features_llvm(void);
+extern "C" JL_DLLEXPORT llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void) JL_NOTSAFEPOINT;
+std::string jl_get_cpu_name_llvm(void) JL_NOTSAFEPOINT;
+std::string jl_get_cpu_features_llvm(void) JL_NOTSAFEPOINT;
+
+struct FeatureName {
+    const char *name;
+    uint32_t bit; // bit index into a `uint32_t` array;
+    uint32_t llvmver; // 0 if it is available on the oldest LLVM version we support
+};
+
+extern "C" JL_DLLEXPORT jl_value_t* jl_reflect_clone_targets();
+extern "C" JL_DLLEXPORT void jl_reflect_feature_names(const FeatureName **feature_names, size_t *nfeatures);
 #endif
 
 #endif
diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp
index ea8dddf629d62..6a09e00cf23e7 100644
--- a/src/processor_arm.cpp
+++ b/src/processor_arm.cpp
@@ -1195,7 +1195,7 @@ static bool check_cpu_arch_ver(uint32_t cpu, arm_arch arch)
     return true;
 }
 
-static void shrink_big_little(std::vector<std::pair<uint32_t,CPUID>> &list,
+static void shrink_big_little(llvm::SmallVectorImpl<std::pair<uint32_t,CPUID>> &list,
                               const CPU *cpus, uint32_t ncpu)
 {
     auto find = [&] (uint32_t name) {
@@ -1260,7 +1260,7 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
 #endif
 
     std::set<uint32_t> cpus;
-    std::vector<std::pair<uint32_t,CPUID>> list;
+    llvm::SmallVector<std::pair<uint32_t,CPUID>, 0> list;
     // Ideally the feature detection above should be enough.
     // However depending on the kernel version not all features are available
     // and it's also impossible to detect the ISA version which contains
@@ -1481,7 +1481,7 @@ static inline void disable_depends(FeatureList<n> &features)
     ::disable_depends(features, Feature::deps, sizeof(Feature::deps) / sizeof(FeatureDep));
 }
 
-static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
+static const llvm::SmallVector<TargetData<feature_sz>, 0> &get_cmdline_targets(void)
 {
     auto feature_cb = [] (const char *str, size_t len, FeatureList<feature_sz> &list) {
 #ifdef _CPU_AARCH64_
@@ -1507,7 +1507,7 @@ static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
     return targets;
 }
 
-static std::vector<TargetData<feature_sz>> jit_targets;
+static llvm::SmallVector<TargetData<feature_sz>, 0> jit_targets;
 
 static TargetData<feature_sz> arg_target_data(const TargetData<feature_sz> &arg, bool require_host)
 {
@@ -1561,7 +1561,7 @@ static int max_vector_size(const FeatureList<feature_sz> &features)
 #endif
 }
 
-static uint32_t sysimg_init_cb(const void *id)
+static uint32_t sysimg_init_cb(const void *id, jl_value_t **rejection_reason)
 {
     // First see what target is requested for the JIT.
     auto &cmdline = get_cmdline_targets();
@@ -1573,7 +1573,9 @@ static uint32_t sysimg_init_cb(const void *id)
             t.name = nname;
         }
     }
-    auto match = match_sysimg_targets(sysimg, target, max_vector_size);
+    auto match = match_sysimg_targets(sysimg, target, max_vector_size, rejection_reason);
+    if (match.best_idx == -1)
+        return match.best_idx;
     // Now we've decided on which sysimg version to use.
     // Make sure the JIT target is compatible with it and save the JIT target.
     if (match.vreg_size != max_vector_size(target.en.features) &&
@@ -1586,6 +1588,19 @@ static uint32_t sysimg_init_cb(const void *id)
     return match.best_idx;
 }
 
+static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason JL_REQUIRE_ROOTED_SLOT)
+{
+    TargetData<feature_sz> target = jit_targets.front();
+    auto pkgimg = deserialize_target_data<feature_sz>((const uint8_t*)id);
+    for (auto &t: pkgimg) {
+        if (auto nname = normalize_cpu_name(t.name)) {
+            t.name = nname;
+        }
+    }
+    auto match = match_sysimg_targets(pkgimg, target, max_vector_size, rejection_reason);
+    return match.best_idx;
+}
+
 static void ensure_jit_target(bool imaging)
 {
     auto &cmdline = get_cmdline_targets();
@@ -1602,12 +1617,19 @@ static void ensure_jit_target(bool imaging)
         auto &t = jit_targets[i];
         if (t.en.flags & JL_TARGET_CLONE_ALL)
             continue;
+        auto &features0 = jit_targets[t.base].en.features;
         // Always clone when code checks CPU features
         t.en.flags |= JL_TARGET_CLONE_CPU;
+        static constexpr uint32_t clone_fp16[] = {Feature::fp16fml,Feature::fullfp16};
+        for (auto fe: clone_fp16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_FLOAT16;
+                break;
+            }
+        }
         // The most useful one in general...
         t.en.flags |= JL_TARGET_CLONE_LOOP;
 #ifdef _CPU_ARM_
-        auto &features0 = jit_targets[t.base].en.features;
         static constexpr uint32_t clone_math[] = {Feature::vfp3, Feature::vfp4, Feature::neon};
         for (auto fe: clone_math) {
             if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
@@ -1626,7 +1648,7 @@ static void ensure_jit_target(bool imaging)
     }
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_noext(const TargetData<feature_sz> &data)
 {
     std::string name = data.name;
@@ -1649,7 +1671,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
     if (name == "apple-a7")
         name = "cyclone";
 #endif
-    std::vector<std::string> feature_strs;
+    llvm::SmallVector<std::string, 0> feature_strs;
     for (auto &fename: feature_names) {
         if (fename.llvmver > JL_LLVM_VERSION)
             continue;
@@ -1717,7 +1739,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
     return std::make_pair(std::move(name), std::move(feature_strs));
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_vec(const TargetData<feature_sz> &data)
 {
     auto res0 = get_llvm_target_noext(data);
@@ -1781,14 +1803,55 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
     return jl_cstr_to_string(host_cpu_name().c_str());
 }
 
-jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl)
+JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
+{
+    return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
+}
+
+JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
+{
+#ifdef _CPU_AARCH64_
+    return jl_true;
+#else
+    TargetData<feature_sz> target = jit_targets.front();
+    FeatureList<feature_sz> features = target.en.features;
+    if (bits == 32 && test_nbit(features, Feature::vfp4sp))
+        return jl_true;
+    else if ((bits == 64 || bits == 32) && test_nbit(features, Feature::vfp4))
+        return jl_true;
+    else
+        return jl_false;
+#endif
+}
+
+jl_image_t jl_init_processor_sysimg(void *hdl)
 {
     if (!jit_targets.empty())
         jl_error("JIT targets already initialized");
     return parse_sysimg(hdl, sysimg_init_cb);
 }
 
-std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
+jl_image_t jl_init_processor_pkgimg(void *hdl)
+{
+    if (jit_targets.empty())
+        jl_error("JIT targets not initialized");
+    if (jit_targets.size() > 1)
+        jl_error("Expected only one JIT target");
+    return parse_sysimg(hdl, pkgimg_init_cb);
+}
+
+JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
+{
+    jl_value_t *rejection_reason = NULL;
+    JL_GC_PUSH1(&rejection_reason);
+    uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason);
+    JL_GC_POP();
+    if (match_idx == (uint32_t)-1)
+        return rejection_reason;
+    return jl_nothing;
+}
+
+std::pair<std::string,llvm::SmallVector<std::string, 0>> jl_get_llvm_target(bool imaging, uint32_t &flags)
 {
     ensure_jit_target(imaging);
     flags = jit_targets[0].en.flags;
@@ -1808,11 +1871,11 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
     return res;
 }
 
-std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
-    std::vector<jl_target_spec_t> res;
+    llvm::SmallVector<jl_target_spec_t, 0> res;
     for (auto &target: jit_targets) {
         auto features_en = target.en.features;
         auto features_dis = target.dis.features;
diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp
index 1f314eb460f0f..399d31af05ebf 100644
--- a/src/processor_fallback.cpp
+++ b/src/processor_fallback.cpp
@@ -2,6 +2,9 @@
 
 // Fallback processor detection and dispatch
 
+static constexpr FeatureName *feature_names = nullptr;
+static constexpr uint32_t nfeature_names = 0;
+
 namespace Fallback {
 
 static inline const std::string &host_cpu_name()
@@ -10,7 +13,7 @@ static inline const std::string &host_cpu_name()
     return name;
 }
 
-static const std::vector<TargetData<1>> &get_cmdline_targets(void)
+static const llvm::SmallVector<TargetData<1>, 0> &get_cmdline_targets(void)
 {
     auto feature_cb = [] (const char*, size_t, FeatureList<1>&) {
         return false;
@@ -18,7 +21,7 @@ static const std::vector<TargetData<1>> &get_cmdline_targets(void)
     return ::get_cmdline_targets<1>(feature_cb);
 }
 
-static std::vector<TargetData<1>> jit_targets;
+static llvm::SmallVector<TargetData<1>, 0> jit_targets;
 
 static TargetData<1> arg_target_data(const TargetData<1> &arg, bool require_host)
 {
@@ -33,7 +36,7 @@ static TargetData<1> arg_target_data(const TargetData<1> &arg, bool require_host
     return res;
 }
 
-static uint32_t sysimg_init_cb(const void *id)
+static uint32_t sysimg_init_cb(const void *id, jl_value_t **rejection_reason)
 {
     // First see what target is requested for the JIT.
     auto &cmdline = get_cmdline_targets();
@@ -51,6 +54,22 @@ static uint32_t sysimg_init_cb(const void *id)
     return best_idx;
 }
 
+static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason)
+{
+    TargetData<1> target = jit_targets.front();
+    // Find the last name match or use the default one.
+    uint32_t best_idx = 0;
+    auto pkgimg = deserialize_target_data<1>((const uint8_t*)id);
+    for (uint32_t i = 0; i < pkgimg.size(); i++) {
+        auto &imgt = pkgimg[i];
+        if (imgt.name == target.name) {
+            best_idx = i;
+        }
+    }
+
+    return best_idx;
+}
+
 static void ensure_jit_target(bool imaging)
 {
     auto &cmdline = get_cmdline_targets();
@@ -69,13 +88,13 @@ static void ensure_jit_target(bool imaging)
     }
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_noext(const TargetData<1> &data)
 {
-    return std::make_pair(data.name, std::vector<std::string>{});
+    return std::make_pair(data.name, llvm::SmallVector<std::string, 0>{});
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_vec(const TargetData<1> &data)
 {
     auto res0 = get_llvm_target_noext(data);
@@ -96,14 +115,23 @@ get_llvm_target_str(const TargetData<1> &data)
 
 using namespace Fallback;
 
-jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl)
+jl_image_t jl_init_processor_sysimg(void *hdl)
 {
     if (!jit_targets.empty())
         jl_error("JIT targets already initialized");
     return parse_sysimg(hdl, sysimg_init_cb);
 }
 
-std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
+jl_image_t jl_init_processor_pkgimg(void *hdl)
+{
+    if (jit_targets.empty())
+        jl_error("JIT targets not initialized");
+    if (jit_targets.size() > 1)
+        jl_error("Expected only one JIT target");
+    return parse_sysimg(hdl, pkgimg_init_cb);
+}
+
+std::pair<std::string,llvm::SmallVector<std::string, 0>> jl_get_llvm_target(bool imaging, uint32_t &flags)
 {
     ensure_jit_target(imaging);
     flags = jit_targets[0].en.flags;
@@ -117,11 +145,11 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
     return res;
 }
 
-extern "C" std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+extern "C" llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
-    std::vector<jl_target_spec_t> res;
+    llvm::SmallVector<jl_target_spec_t, 0> res;
     for (auto &target: jit_targets) {
         jl_target_spec_t ele;
         std::tie(ele.cpu_name, ele.cpu_features) = get_llvm_target_str(target);
@@ -139,12 +167,33 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
     return jl_cstr_to_string(host_cpu_name().c_str());
 }
 
+JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
+{
+    return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
+}
+
+JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
+{
+    return jl_false; // Match behaviour of have_fma in src/llvm-cpufeatures.cpp (assume false)
+}
+
 JL_DLLEXPORT void jl_dump_host_cpu(void)
 {
     jl_safe_printf("CPU: %s\n", host_cpu_name().c_str());
     jl_safe_printf("Features: %s\n", jl_get_cpu_features_llvm().c_str());
 }
 
+JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
+{
+    jl_value_t *rejection_reason = NULL;
+    JL_GC_PUSH1(&rejection_reason);
+    uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason);
+    JL_GC_POP();
+    if (match_idx == (uint32_t)-1)
+        return rejection_reason;
+    return jl_nothing;
+}
+
 extern "C" int jl_test_cpu_feature(jl_cpu_feature_t)
 {
     return 0;
diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp
index 77ee5afaf5e85..3f8c62acaed11 100644
--- a/src/processor_x86.cpp
+++ b/src/processor_x86.cpp
@@ -4,6 +4,7 @@
 
 // CPUID
 
+#include "julia.h"
 extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
 {
     asm volatile (
@@ -154,6 +155,9 @@ static constexpr FeatureDep deps[] = {
     {avx512vnni, avx512f},
     {avx512vp2intersect, avx512f},
     {avx512vpopcntdq, avx512f},
+    {avx512fp16, avx512bw},
+    {avx512fp16, avx512dq},
+    {avx512fp16, avx512vl},
     {amx_int8, amx_tile},
     {amx_bf16, amx_tile},
     {sse4a, sse3},
@@ -208,8 +212,8 @@ constexpr auto tigerlake = icelake | get_feature_masks(avx512vp2intersect, movdi
 constexpr auto alderlake = skylake | get_feature_masks(clwb, sha, waitpkg, shstk, gfni, vaes, vpclmulqdq, pconfig,
                                                        rdpid, movdiri, pku, movdir64b, serialize, ptwrite, avxvnni);
 constexpr auto sapphirerapids = icelake_server |
-    get_feature_masks(amx_tile, amx_int8, amx_bf16, avx512bf16, serialize, cldemote, waitpkg,
-                      ptwrite, tsxldtrk, enqcmd, shstk, avx512vp2intersect, movdiri, movdir64b);
+    get_feature_masks(amx_tile, amx_int8, amx_bf16, avx512bf16, avx512fp16, serialize, cldemote, waitpkg,
+                      avxvnni, uintr, ptwrite, tsxldtrk, enqcmd, shstk, avx512vp2intersect, movdiri, movdir64b);
 
 constexpr auto k8_sse3 = get_feature_masks(sse3, cx16);
 constexpr auto amdfam10 = k8_sse3 | get_feature_masks(sse4a, lzcnt, popcnt, sahf);
@@ -219,13 +223,16 @@ constexpr auto btver2 = btver1 | get_feature_masks(sse41, sse42, avx, aes, pclmu
                                                    movbe, xsave, xsaveopt);
 
 constexpr auto bdver1 = amdfam10 | get_feature_masks(xop, fma4, avx, ssse3, sse41, sse42, aes,
-                                                     prfchw, pclmul, xsave, lwp);
+                                                     prfchw, pclmul, xsave);
 constexpr auto bdver2 = bdver1 | get_feature_masks(f16c, bmi, tbm, fma);
 constexpr auto bdver3 = bdver2 | get_feature_masks(xsaveopt, fsgsbase);
 constexpr auto bdver4 = bdver3 | get_feature_masks(avx2, bmi2, mwaitx, movbe, rdrnd);
 
+// technically xsaves is part of znver1, znver2, and znver3
+// Disabled due to Erratum 1386
+// See: https://github.com/JuliaLang/julia/issues/50102
 constexpr auto znver1 = haswell | get_feature_masks(adx, aes, clflushopt, clzero, mwaitx, prfchw,
-                                                    rdseed, sha, sse4a, xsavec, xsaves);
+                                                    rdseed, sha, sse4a, xsavec);
 constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd);
 constexpr auto znver3 = znver2 | get_feature_masks(shstk, pku, vaes, vpclmulqdq);
 
@@ -765,7 +772,7 @@ static inline void disable_depends(FeatureList<n> &features)
     ::disable_depends(features, Feature::deps, sizeof(Feature::deps) / sizeof(FeatureDep));
 }
 
-static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
+static const llvm::SmallVector<TargetData<feature_sz>, 0> &get_cmdline_targets(void)
 {
     auto feature_cb = [] (const char *str, size_t len, FeatureList<feature_sz> &list) {
         auto fbit = find_feature_bit(feature_names, nfeature_names, str, len);
@@ -783,7 +790,7 @@ static const std::vector<TargetData<feature_sz>> &get_cmdline_targets(void)
     return targets;
 }
 
-static std::vector<TargetData<feature_sz>> jit_targets;
+static llvm::SmallVector<TargetData<feature_sz>, 0> jit_targets;
 
 static TargetData<feature_sz> arg_target_data(const TargetData<feature_sz> &arg, bool require_host)
 {
@@ -834,7 +841,7 @@ static int max_vector_size(const FeatureList<feature_sz> &features)
     return 16;
 }
 
-static uint32_t sysimg_init_cb(const void *id)
+static uint32_t sysimg_init_cb(const void *id, jl_value_t** rejection_reason)
 {
     // First see what target is requested for the JIT.
     auto &cmdline = get_cmdline_targets();
@@ -862,7 +869,9 @@ static uint32_t sysimg_init_cb(const void *id)
                  "virtualized environment.  Please read "
                  "https://docs.julialang.org/en/v1/devdocs/sysimg/ for more.");
     }
-    auto match = match_sysimg_targets(sysimg, target, max_vector_size);
+    auto match = match_sysimg_targets(sysimg, target, max_vector_size, rejection_reason);
+    if (match.best_idx == (uint32_t)-1)
+        return match.best_idx;
     // Now we've decided on which sysimg version to use.
     // Make sure the JIT target is compatible with it and save the JIT target.
     if (match.vreg_size != max_vector_size(target.en.features) &&
@@ -878,6 +887,19 @@ static uint32_t sysimg_init_cb(const void *id)
     return match.best_idx;
 }
 
+static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason)
+{
+    TargetData<feature_sz> target = jit_targets.front();
+    auto pkgimg = deserialize_target_data<feature_sz>((const uint8_t*)id);
+    for (auto &t: pkgimg) {
+        if (auto nname = normalize_cpu_name(t.name)) {
+            t.name = nname;
+        }
+    }
+    auto match = match_sysimg_targets(pkgimg, target, max_vector_size, rejection_reason);
+    return match.best_idx;
+}
+
 static void ensure_jit_target(bool imaging)
 {
     auto &cmdline = get_cmdline_targets();
@@ -917,10 +939,10 @@ static void ensure_jit_target(bool imaging)
                                                   Feature::avx512pf, Feature::avx512er,
                                                   Feature::avx512cd, Feature::avx512bw,
                                                   Feature::avx512vl, Feature::avx512vbmi,
-                                                  Feature::avx512vpopcntdq,
+                                                  Feature::avx512vpopcntdq, Feature::avxvnni,
                                                   Feature::avx512vbmi2, Feature::avx512vnni,
                                                   Feature::avx512bitalg, Feature::avx512bf16,
-                                                  Feature::avx512vp2intersect};
+                                                  Feature::avx512vp2intersect, Feature::avx512fp16};
         for (auto fe: clone_math) {
             if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
                 t.en.flags |= JL_TARGET_CLONE_MATH;
@@ -933,10 +955,24 @@ static void ensure_jit_target(bool imaging)
                 break;
             }
         }
+        static constexpr uint32_t clone_fp16[] = {Feature::avx512fp16};
+        for (auto fe: clone_fp16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_FLOAT16;
+                break;
+            }
+        }
+        static constexpr uint32_t clone_bf16[] = {Feature::avx512bf16};
+        for (auto fe: clone_bf16) {
+            if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
+                t.en.flags |= JL_TARGET_CLONE_BFLOAT16;
+                break;
+            }
+        }
     }
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_noext(const TargetData<feature_sz> &data)
 {
     std::string name = data.name;
@@ -955,7 +991,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
         name = "x86-64";
 #endif
     }
-    std::vector<std::string> features;
+    llvm::SmallVector<std::string, 0> features;
     for (auto &fename: feature_names) {
         if (fename.llvmver > JL_LLVM_VERSION)
             continue;
@@ -979,7 +1015,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
     return std::make_pair(std::move(name), std::move(features));
 }
 
-static std::pair<std::string,std::vector<std::string>>
+static std::pair<std::string,llvm::SmallVector<std::string, 0>>
 get_llvm_target_vec(const TargetData<feature_sz> &data)
 {
     auto res0 = get_llvm_target_noext(data);
@@ -1006,19 +1042,54 @@ JL_DLLEXPORT void jl_dump_host_cpu(void)
                   cpus, ncpu_names);
 }
 
+JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data)
+{
+    jl_value_t *rejection_reason = NULL;
+    JL_GC_PUSH1(&rejection_reason);
+    uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason);
+    JL_GC_POP();
+    if (match_idx == (uint32_t)-1)
+        return rejection_reason;
+    return jl_nothing;
+}
+
 JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)
 {
     return jl_cstr_to_string(host_cpu_name().c_str());
 }
 
-jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl)
+JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void)
+{
+    return jl_cstr_to_string(jl_get_cpu_features_llvm().c_str());
+}
+
+JL_DLLEXPORT jl_value_t *jl_cpu_has_fma(int bits)
+{
+    TargetData<feature_sz> target = jit_targets.front();
+    FeatureList<feature_sz> features = target.en.features;
+    if ((bits == 32 || bits == 64) && (test_nbit(features, Feature::fma) || test_nbit(features, Feature::fma4)))
+        return jl_true;
+    else
+        return jl_false;
+}
+
+jl_image_t jl_init_processor_sysimg(void *hdl)
 {
     if (!jit_targets.empty())
         jl_error("JIT targets already initialized");
     return parse_sysimg(hdl, sysimg_init_cb);
 }
 
-extern "C" JL_DLLEXPORT std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
+jl_image_t jl_init_processor_pkgimg(void *hdl)
+{
+    if (jit_targets.empty())
+        jl_error("JIT targets not initialized");
+    if (jit_targets.size() > 1)
+        jl_error("Expected only one JIT target");
+    return parse_sysimg(hdl, pkgimg_init_cb);
+}
+
+extern "C" JL_DLLEXPORT std::pair<std::string,llvm::SmallVector<std::string, 0>> jl_get_llvm_target(bool imaging, uint32_t &flags)
 {
     ensure_jit_target(imaging);
     flags = jit_targets[0].en.flags;
@@ -1032,11 +1103,11 @@ extern "C" JL_DLLEXPORT const std::pair<std::string,std::string> &jl_get_llvm_di
     return res;
 }
 
-extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+extern "C" JL_DLLEXPORT llvm::SmallVector<jl_target_spec_t, 0> jl_get_llvm_clone_targets(void)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
-    std::vector<jl_target_spec_t> res;
+    llvm::SmallVector<jl_target_spec_t, 0> res;
     for (auto &target: jit_targets) {
         auto features_en = target.en.features;
         auto features_dis = target.dis.features;
diff --git a/src/rtutils.c b/src/rtutils.c
index f3a2e745ed651..74f66ae0b1769 100644
--- a/src/rtutils.c
+++ b/src/rtutils.c
@@ -127,9 +127,34 @@ JL_DLLEXPORT void JL_NORETURN jl_type_error(const char *fname,
     jl_type_error_rt(fname, "", expected, got);
 }
 
-JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var)
+JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var, jl_value_t *scope)
 {
-    jl_throw(jl_new_struct(jl_undefvarerror_type, var));
+    if (!jl_undefvarerror_type) {
+        const char *s1 = "";
+        const char *s2 = "";
+        if (scope) {
+            if (jl_is_symbol(scope)) {
+                s1 = ", :";
+                s2 = jl_symbol_name((jl_sym_t*)scope);
+            }
+            else if (jl_is_module(scope)) {
+                s1 = ", module ";
+                s2 = jl_symbol_name(((jl_module_t*)scope)->name);
+            }
+            else {
+                s1 = ", ";
+                s2 = "unknown scope";
+            }
+        }
+        jl_errorf("UndefVarError(%s%s%s)", jl_symbol_name(var), s1, s2);
+    }
+    JL_GC_PUSH1(&scope);
+    jl_throw(jl_new_struct(jl_undefvarerror_type, var, scope));
+}
+
+JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_sym_t *type_name, jl_sym_t *var)
+{
+    jl_errorf("type %s has no field %s", jl_symbol_name(type_name), jl_symbol_name(var));
 }
 
 JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str) // == jl_exceptionf(jl_atomicerror_type, "%s", str)
@@ -202,12 +227,6 @@ JL_DLLEXPORT void JL_NORETURN jl_eof_error(void)
     jl_throw(jl_new_struct(eof_error));
 }
 
-// get kwsorter field, with appropriate error check and message
-JL_DLLEXPORT jl_value_t *jl_get_keyword_sorter(jl_value_t *f)
-{
-    return jl_get_kwsorter(jl_typeof(f));
-}
-
 JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t)
 {
     if (!jl_isa(x,t))
@@ -262,7 +281,6 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
     // This function should **NOT** have any safepoint before the ones at the
     // end.
     sig_atomic_t old_defer_signal = ct->ptls->defer_signal;
-    int8_t old_gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
     ct->eh = eh->prev;
     ct->gcstack = eh->gcstack;
     small_arraylist_t *locks = &ct->ptls->locks;
@@ -274,15 +292,13 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
     }
     ct->world_age = eh->world_age;
     ct->ptls->defer_signal = eh->defer_signal;
-    if (old_gc_state != eh->gc_state) {
+    int8_t old_gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    if (old_gc_state != eh->gc_state)
         jl_atomic_store_release(&ct->ptls->gc_state, eh->gc_state);
-        if (old_gc_state) {
-            jl_gc_safepoint_(ct->ptls);
-        }
-    }
-    if (old_defer_signal && !eh->defer_signal) {
+    if (!old_gc_state || !eh->gc_state) // it was or is unsafe now
+        jl_gc_safepoint_(ct->ptls);
+    if (old_defer_signal && !eh->defer_signal)
         jl_sigint_safepoint(ct->ptls);
-    }
     if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers) &&
             unlocks && eh->locks_len == 0) {
         jl_gc_run_pending_finalizers(ct);
@@ -324,7 +340,7 @@ static void jl_copy_excstack(jl_excstack_t *dest, jl_excstack_t *src) JL_NOTSAFE
     dest->top = src->top;
 }
 
-static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT,
+static void jl_reserve_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT,
                                 size_t reserved_size)
 {
     jl_excstack_t *s = *stack;
@@ -338,13 +354,14 @@ static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT,
     if (s)
         jl_copy_excstack(new_s, s);
     *stack = new_s;
+    jl_gc_wb(task, new_s);
 }
 
-void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
+void jl_push_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
                       jl_value_t *exception JL_ROOTED_ARGUMENT,
                       jl_bt_element_t *bt_data, size_t bt_size)
 {
-    jl_reserve_excstack(stack, (*stack ? (*stack)->top : 0) + bt_size + 2);
+    jl_reserve_excstack(task, stack, (*stack ? (*stack)->top : 0) + bt_size + 2);
     jl_excstack_t *s = *stack;
     jl_bt_element_t *rawstack = jl_excstack_raw(s);
     memcpy(rawstack + s->top, bt_data, sizeof(jl_bt_element_t)*bt_size);
@@ -363,7 +380,10 @@ JL_DLLEXPORT void *(jl_symbol_name)(jl_sym_t *s)
 // WARNING: THIS FUNCTION IS NEVER CALLED BUT INLINE BY CCALL
 JL_DLLEXPORT void *jl_array_ptr(jl_array_t *a)
 {
-    return a->data;
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(a->ref.mem))->layout;
+    if (layout->flags.arrayelem_isunion || layout->size == 0)
+        return (char*)a->ref.mem->ptr + (size_t)jl_array_data_(a);
+    return jl_array_data_(a);
 }
 JL_DLLEXPORT jl_value_t *jl_value_ptr(jl_value_t *a)
 {
@@ -530,7 +550,7 @@ JL_DLLEXPORT jl_value_t *jl_stdout_obj(void) JL_NOTSAFEPOINT
 {
     if (jl_base_module == NULL)
         return NULL;
-    jl_binding_t *stdout_obj = jl_get_module_binding(jl_base_module, jl_symbol("stdout"));
+    jl_binding_t *stdout_obj = jl_get_module_binding(jl_base_module, jl_symbol("stdout"), 0);
     return stdout_obj ? jl_atomic_load_relaxed(&stdout_obj->value) : NULL;
 }
 
@@ -538,20 +558,29 @@ JL_DLLEXPORT jl_value_t *jl_stderr_obj(void) JL_NOTSAFEPOINT
 {
     if (jl_base_module == NULL)
         return NULL;
-    jl_binding_t *stderr_obj = jl_get_module_binding(jl_base_module, jl_symbol("stderr"));
+    jl_binding_t *stderr_obj = jl_get_module_binding(jl_base_module, jl_symbol("stderr"), 0);
     return stderr_obj ? jl_atomic_load_relaxed(&stderr_obj->value) : NULL;
 }
 
 // toys for debugging ---------------------------------------------------------
 
-static size_t jl_show_svec(JL_STREAM *out, jl_svec_t *t, const char *head, const char *opn, const char *cls) JL_NOTSAFEPOINT
+struct recur_list {
+    struct recur_list *prev;
+    jl_value_t *v;
+};
+
+static size_t jl_static_show_x(JL_STREAM *out, jl_value_t *v, struct recur_list *depth, jl_static_show_config_t ctx) JL_NOTSAFEPOINT;
+static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt, struct recur_list *depth, jl_static_show_config_t ctx) JL_NOTSAFEPOINT;
+static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *prev, struct recur_list *depth, jl_static_show_config_t ctx) JL_NOTSAFEPOINT;
+
+static size_t jl_show_svec(JL_STREAM *out, jl_svec_t *t, const char *head, const char *opn, const char *cls, jl_static_show_config_t ctx) JL_NOTSAFEPOINT
 {
     size_t i, n=0, len = jl_svec_len(t);
     n += jl_printf(out, "%s", head);
     n += jl_printf(out, "%s", opn);
     for (i = 0; i < len; i++) {
         jl_value_t *v = jl_svecref(t,i);
-        n += jl_static_show(out, v);
+        n += jl_static_show_x(out, v, 0, ctx);
         if (i != len-1)
             n += jl_printf(out, ", ");
     }
@@ -559,14 +588,6 @@ static size_t jl_show_svec(JL_STREAM *out, jl_svec_t *t, const char *head, const
     return n;
 }
 
-struct recur_list {
-    struct recur_list *prev;
-    jl_value_t *v;
-};
-
-static size_t jl_static_show_x(JL_STREAM *out, jl_value_t *v, struct recur_list *depth) JL_NOTSAFEPOINT;
-static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *prev, struct recur_list *depth) JL_NOTSAFEPOINT;
-
 JL_DLLEXPORT int jl_id_start_char(uint32_t wc) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_id_char(uint32_t wc) JL_NOTSAFEPOINT;
 
@@ -583,29 +604,29 @@ JL_DLLEXPORT int jl_is_identifier(char *str) JL_NOTSAFEPOINT
     return 1;
 }
 
-static jl_datatype_t *first_arg_datatype(jl_value_t *a JL_PROPAGATES_ROOT, int got_tuple1) JL_NOTSAFEPOINT
+static jl_datatype_t *nth_arg_datatype(jl_value_t *a JL_PROPAGATES_ROOT, int n) JL_NOTSAFEPOINT
 {
     if (jl_is_datatype(a)) {
-        if (got_tuple1)
+        if (n == 0)
             return (jl_datatype_t*)a;
         if (jl_is_tuple_type(a)) {
-            if (jl_nparams(a) < 1)
+            if (jl_nparams(a) < n)
                 return NULL;
-            return first_arg_datatype(jl_tparam0(a), 1);
+            return nth_arg_datatype(jl_tparam(a, n - 1), 0);
         }
         return NULL;
     }
     else if (jl_is_typevar(a)) {
-        return first_arg_datatype(((jl_tvar_t*)a)->ub, got_tuple1);
+        return nth_arg_datatype(((jl_tvar_t*)a)->ub, n);
     }
     else if (jl_is_unionall(a)) {
-        return first_arg_datatype(((jl_unionall_t*)a)->body, got_tuple1);
+        return nth_arg_datatype(((jl_unionall_t*)a)->body, n);
     }
     else if (jl_is_uniontype(a)) {
         jl_uniontype_t *u = (jl_uniontype_t*)a;
-        jl_datatype_t *d1 = first_arg_datatype(u->a, got_tuple1);
+        jl_datatype_t *d1 = nth_arg_datatype(u->a, n);
         if (d1 == NULL) return NULL;
-        jl_datatype_t *d2 = first_arg_datatype(u->b, got_tuple1);
+        jl_datatype_t *d2 = nth_arg_datatype(u->b, n);
         if (d2 == NULL || d1->name != d2->name)
             return NULL;
         return d1;
@@ -614,15 +635,15 @@ static jl_datatype_t *first_arg_datatype(jl_value_t *a JL_PROPAGATES_ROOT, int g
 }
 
 // get DataType of first tuple element (if present), or NULL if cannot be determined
-JL_DLLEXPORT jl_datatype_t *jl_first_argument_datatype(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+jl_datatype_t *jl_nth_argument_datatype(jl_value_t *argtypes JL_PROPAGATES_ROOT, int n) JL_NOTSAFEPOINT
 {
-    return first_arg_datatype(argtypes, 0);
+    return nth_arg_datatype(argtypes, n);
 }
 
 // get DataType implied by a single given type, or `nothing`
 JL_DLLEXPORT jl_value_t *jl_argument_datatype(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
-    jl_datatype_t *dt = first_arg_datatype(argt, 1);
+    jl_datatype_t *dt = nth_arg_datatype(argt, 0);
     if (dt == NULL)
         return jl_nothing;
     return (jl_value_t*)dt;
@@ -631,9 +652,9 @@ JL_DLLEXPORT jl_value_t *jl_argument_datatype(jl_value_t *argt JL_PROPAGATES_ROO
 static int is_globname_binding(jl_value_t *v, jl_datatype_t *dv) JL_NOTSAFEPOINT
 {
     jl_sym_t *globname = dv->name->mt != NULL ? dv->name->mt->name : NULL;
-    if (globname && dv->name->module && jl_binding_resolved_p(dv->name->module, globname)) {
-        jl_binding_t *b = jl_get_module_binding(dv->name->module, globname);
-        if (b && b->constp) {
+    if (globname && dv->name->module) {
+        jl_binding_t *b = jl_get_module_binding(dv->name->module, globname, 0);
+        if (b && jl_atomic_load_relaxed(&b->owner) && b->constp) {
             jl_value_t *bv = jl_atomic_load_relaxed(&b->value);
             // The `||` makes this function work for both function instances and function types.
             if (bv == v || jl_typeof(bv) == v)
@@ -703,7 +724,7 @@ static int jl_static_is_function_(jl_datatype_t *vt) JL_NOTSAFEPOINT {
 // This is necessary to make sure that this function doesn't allocate any
 // memory through the Julia GC
 static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt,
-                                struct recur_list *depth) JL_NOTSAFEPOINT
+                                struct recur_list *depth, jl_static_show_config_t ctx) JL_NOTSAFEPOINT
 {
     size_t n = 0;
     if ((uintptr_t)vt < 4096U) {
@@ -711,9 +732,15 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if ((uintptr_t)v < 4096U) {
         n += jl_printf(out, "<?#%p::", (void*)v);
-        n += jl_static_show_x(out, (jl_value_t*)vt, depth);
+        n += jl_static_show_x(out, (jl_value_t*)vt, depth, ctx);
         n += jl_printf(out, ">");
     }
+    else if (vt == (jl_datatype_t*)jl_buff_tag) {
+        n += jl_printf(out, "<?#%p::jl_buff_tag marked memory>", (void*)v);
+    }
+    else if (vt == (jl_datatype_t*)(uintptr_t)(0xbabababababababaull & ~15)) {
+        n += jl_printf(out, "<?#%p::baaaaaad>", (void*)v);
+    }
     // These need to be special cased because they
     // exist only by pointer identity in early startup
     else if (v == (jl_value_t*)jl_simplevector_type) {
@@ -746,17 +773,17 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             n += jl_static_show_func_sig(out, li->def.method->sig);
         }
         else {
-            n += jl_static_show_x(out, (jl_value_t*)li->def.module, depth);
+            n += jl_static_show_x(out, (jl_value_t*)li->def.module, depth, ctx);
             n += jl_printf(out, ".<toplevel thunk> -> ");
-            n += jl_static_show_x(out, li->uninferred, depth);
+            n += jl_static_show_x(out, jl_atomic_load_relaxed(&li->uninferred), depth, ctx);
         }
     }
     else if (vt == jl_typename_type) {
-        n += jl_static_show_x(out, jl_unwrap_unionall(((jl_typename_t*)v)->wrapper), depth);
+        n += jl_static_show_x(out, jl_unwrap_unionall(((jl_typename_t*)v)->wrapper), depth, ctx);
         n += jl_printf(out, ".name");
     }
     else if (vt == jl_simplevector_type) {
-        n += jl_show_svec(out, (jl_svec_t*)v, "svec", "(", ")");
+        n += jl_show_svec(out, (jl_svec_t*)v, "svec", "(", ")", ctx);
     }
     else if (v == (jl_value_t*)jl_unionall_type) {
         // avoid printing `typeof(Type)` for `UnionAll`.
@@ -767,10 +794,10 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         n += jl_printf(out, "Vararg");
         if (vm->T) {
             n += jl_printf(out, "{");
-            n += jl_static_show_x(out, vm->T, depth);
+            n += jl_static_show_x(out, vm->T, depth, ctx);
             if (vm->N) {
                 n += jl_printf(out, ", ");
-                n += jl_static_show_x(out, vm->N, depth);
+                n += jl_static_show_x(out, vm->N, depth, ctx);
             }
             n += jl_printf(out, "}");
         }
@@ -797,7 +824,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             }
             if (taillen == tlen && taillen > 3) {
                 n += jl_printf(out, "NTuple{%d, ", tlen);
-                n += jl_static_show_x(out, jl_tparam0(dv), depth);
+                n += jl_static_show_x(out, jl_tparam0(dv), depth, ctx);
                 n += jl_printf(out, "}");
             }
             else {
@@ -805,22 +832,25 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 for (i = 0; i < (taillen > 3 ? tlen-taillen : tlen); i++) {
                     if (i > 0)
                         n += jl_printf(out, ", ");
-                    n += jl_static_show_x(out, jl_tparam(dv, i), depth);
+                    n += jl_static_show_x(out, jl_tparam(dv, i), depth, ctx);
                 }
                 if (taillen > 3) {
                     n += jl_printf(out, ", Vararg{");
-                    n += jl_static_show_x(out, jl_tparam(dv, tlen-1), depth);
+                    n += jl_static_show_x(out, jl_tparam(dv, tlen-1), depth, ctx);
                     n += jl_printf(out, ", %d}", taillen);
                 }
                 n += jl_printf(out, "}");
             }
             return n;
         }
+        if (ctx.quiet) {
+            return jl_printf(out, "%s", jl_symbol_name(dv->name->name));
+        }
         if (globfunc) {
             n += jl_printf(out, "typeof(");
         }
         if (jl_core_module && (dv->name->module != jl_core_module || !jl_module_exports_p(jl_core_module, sym))) {
-            n += jl_static_show_x(out, (jl_value_t*)dv->name->module, depth);
+            n += jl_static_show_x(out, (jl_value_t*)dv->name->module, depth, ctx);
             n += jl_printf(out, ".");
             size_t i = 0;
             if (globfunc && !jl_id_start_char(u8_nextchar(sn, &i))) {
@@ -841,7 +871,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 n += jl_printf(out, "{");
                 for (j = 0; j < tlen; j++) {
                     jl_value_t *p = jl_tparam(dv,j);
-                    n += jl_static_show_x(out, p, depth);
+                    n += jl_static_show_x(out, p, depth, ctx);
                     if (j != tlen-1)
                         n += jl_printf(out, ", ");
                 }
@@ -908,22 +938,22 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         n += jl_printf(out, "Union{");
         while (jl_is_uniontype(v)) {
             // tail-recurse on b to flatten the printing of the Union structure in the common case
-            n += jl_static_show_x(out, ((jl_uniontype_t*)v)->a, depth);
+            n += jl_static_show_x(out, ((jl_uniontype_t*)v)->a, depth, ctx);
             n += jl_printf(out, ", ");
             v = ((jl_uniontype_t*)v)->b;
         }
-        n += jl_static_show_x(out, v, depth);
+        n += jl_static_show_x(out, v, depth, ctx);
         n += jl_printf(out, "}");
     }
     else if (vt == jl_unionall_type) {
         jl_unionall_t *ua = (jl_unionall_t*)v;
-        n += jl_static_show_x(out, ua->body, depth);
+        n += jl_static_show_x(out, ua->body, depth, ctx);
         n += jl_printf(out, " where ");
-        n += jl_static_show_x(out, (jl_value_t*)ua->var, depth->prev);
+        n += jl_static_show_x(out, (jl_value_t*)ua->var, depth->prev, ctx);
     }
     else if (vt == jl_typename_type) {
         n += jl_printf(out, "typename(");
-        n += jl_static_show_x(out, jl_unwrap_unionall(((jl_typename_t*)v)->wrapper), depth);
+        n += jl_static_show_x(out, jl_unwrap_unionall(((jl_typename_t*)v)->wrapper), depth, ctx);
         n += jl_printf(out, ")");
     }
     else if (vt == jl_tvar_type) {
@@ -943,7 +973,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             int ua = jl_is_unionall(lb);
             if (ua)
                 n += jl_printf(out, "(");
-            n += jl_static_show_x(out, lb, depth);
+            n += jl_static_show_x(out, lb, depth, ctx);
             if (ua)
                 n += jl_printf(out, ")");
             n += jl_printf(out, "<:");
@@ -955,7 +985,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             n += jl_printf(out, "<:");
             if (ua)
                 n += jl_printf(out, "(");
-            n += jl_static_show_x(out, ub, depth);
+            n += jl_static_show_x(out, ub, depth, ctx);
             if (ua)
                 n += jl_printf(out, ")");
         }
@@ -963,7 +993,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     else if (vt == jl_module_type) {
         jl_module_t *m = (jl_module_t*)v;
         if (m->parent != m && m->parent != jl_main_module) {
-            n += jl_static_show_x(out, (jl_value_t*)m->parent, depth);
+            n += jl_static_show_x(out, (jl_value_t*)m->parent, depth, ctx);
             n += jl_printf(out, ".");
         }
         n += jl_printf(out, "%s", jl_symbol_name(m->name));
@@ -984,7 +1014,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                        (uintptr_t)((jl_ssavalue_t*)v)->id);
     }
     else if (vt == jl_globalref_type) {
-        n += jl_static_show_x(out, (jl_value_t*)jl_globalref_mod(v), depth);
+        n += jl_static_show_x(out, (jl_value_t*)jl_globalref_mod(v), depth, ctx);
         char *name = jl_symbol_name(jl_globalref_name(v));
         n += jl_printf(out, jl_is_identifier(name) ? ".%s" : ".:(%s)", name);
     }
@@ -999,7 +1029,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         else {
             n += jl_printf(out, ":(");
         }
-        n += jl_static_show_x(out, qv, depth);
+        n += jl_static_show_x(out, qv, depth, ctx);
         if (!jl_is_symbol(qv)) {
             n += jl_printf(out, " end");
         }
@@ -1009,52 +1039,91 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if (vt == jl_newvarnode_type) {
         n += jl_printf(out, "<newvar ");
-        n += jl_static_show_x(out, *(jl_value_t**)v, depth);
+        n += jl_static_show_x(out, *(jl_value_t**)v, depth, ctx);
         n += jl_printf(out, ">");
     }
     else if (vt == jl_linenumbernode_type) {
         n += jl_printf(out, "#= ");
-        n += jl_static_show_x(out, jl_linenode_file(v), depth);
+        n += jl_static_show_x(out, jl_linenode_file(v), depth, ctx);
         n += jl_printf(out, ":%" PRIuPTR " =#", jl_linenode_line(v));
     }
     else if (vt == jl_expr_type) {
         jl_expr_t *e = (jl_expr_t*)v;
-        if (e->head == jl_assign_sym && jl_array_len(e->args) == 2) {
-            n += jl_static_show_x(out, jl_exprarg(e,0), depth);
+        if (e->head == jl_assign_sym && jl_array_nrows(e->args) == 2) {
+            n += jl_static_show_x(out, jl_exprarg(e,0), depth, ctx);
             n += jl_printf(out, " = ");
-            n += jl_static_show_x(out, jl_exprarg(e,1), depth);
+            n += jl_static_show_x(out, jl_exprarg(e,1), depth, ctx);
         }
         else {
             char sep = ' ';
             n += jl_printf(out, "Expr(:%s", jl_symbol_name(e->head));
-            size_t i, len = jl_array_len(e->args);
+            size_t i, len = jl_array_nrows(e->args);
             for (i = 0; i < len; i++) {
                 n += jl_printf(out, ",%c", sep);
-                n += jl_static_show_x(out, jl_exprarg(e,i), depth);
+                n += jl_static_show_x(out, jl_exprarg(e,i), depth, ctx);
             }
             n += jl_printf(out, ")");
         }
     }
     else if (jl_array_type && jl_is_array_type(vt)) {
         n += jl_printf(out, "Array{");
-        n += jl_static_show_x(out, (jl_value_t*)jl_tparam0(vt), depth);
-        n += jl_printf(out, ", (");
+        jl_value_t *el_type = jl_tparam0(vt);
+        n += jl_static_show_x(out, el_type, depth, ctx);
+        jl_array_t *av = (jl_array_t*)v;
         size_t i, ndims = jl_array_ndims(v);
+        n += jl_printf(out, ", %" PRIdPTR "}(dims=(", ndims);
         if (ndims == 1)
             n += jl_printf(out, "%" PRIdPTR ",", jl_array_dim0(v));
         else
             for (i = 0; i < ndims; i++)
                 n += jl_printf(out, (i > 0 ? ", %" PRIdPTR : "%" PRIdPTR), jl_array_dim(v, i));
-        n += jl_printf(out, ")}[");
-        size_t j, tlen = jl_array_len(v);
-        jl_array_t *av = (jl_array_t*)v;
-        jl_value_t *el_type = jl_tparam0(vt);
-        char *typetagdata = (!av->flags.ptrarray && jl_is_uniontype(el_type)) ? jl_array_typetagdata(av) : NULL;
+        n += jl_printf(out, "), mem=");
+        n += jl_static_show_x(out, (jl_value_t*)av->ref.mem, depth, ctx);
+        n += jl_printf(out, ")");
+    }
+    else if (jl_genericmemoryref_type && jl_is_genericmemoryref_type(vt)) {
+        jl_genericmemoryref_t *ref = (jl_genericmemoryref_t*)v;
+        n += jl_printf(out, "MemoryRef(offset=");
+        size_t offset = (size_t)ref->ptr_or_offset;
+        if (ref->mem) {
+            const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typeof(ref->mem))->layout;
+            if (layout->size != 0 && !layout->flags.arrayelem_isunion)
+                offset = ((char*)offset - (char*)ref->mem->ptr) / layout->size;
+        }
+        n += jl_printf(out, "%" PRIdPTR, offset);
+        n += jl_printf(out, ", ptr_or_offset=%p, mem=", ref->ptr_or_offset);
+        n += jl_static_show_x(out, (jl_value_t*)ref->mem, depth, ctx);
+    }
+    else if (jl_genericmemory_type && jl_is_genericmemory_type(vt)) {
+        jl_genericmemory_t *m = (jl_genericmemory_t*)v;
+        jl_value_t *isatomic = jl_tparam0(vt);
+        jl_value_t *addrspace = jl_tparam2(vt);
+        if (isatomic == (jl_value_t*)jl_not_atomic_sym && jl_is_addrspacecore(addrspace) && jl_unbox_uint8(addrspace) == 0) {
+            n += jl_printf(out, "Memory{");
+        }
+        else {
+            n += jl_printf(out, "GenericMemory{");
+            n += jl_static_show_x(out, isatomic, depth, ctx);
+            n += jl_printf(out, ", ");
+            n += jl_static_show_x(out, addrspace, depth, ctx);
+            n += jl_printf(out, ", ");
+        }
+        jl_value_t *el_type = jl_tparam1(vt);
+        n += jl_static_show_x(out, el_type, depth, ctx);
+        size_t j, tlen = m->length;
+        n += jl_printf(out, "}(%" PRIdPTR ", %p)[", tlen, m->ptr);
+//#ifdef _P64
+//        n += jl_printf(out, "0x%016" PRIx64, tlen);
+//#else
+//        n += jl_printf(out, "0x%08" PRIx32, tlen);
+//#endif
+        const jl_datatype_layout_t *layout = vt->layout;
         int nlsep = 0;
-        if (av->flags.ptrarray) {
+        const char *typetagdata = NULL;
+        if (layout->flags.arrayelem_isboxed) {
             // print arrays with newlines, unless the elements are probably small
             for (j = 0; j < tlen; j++) {
-                jl_value_t **ptr = ((jl_value_t**)av->data) + j;
+                jl_value_t **ptr = ((jl_value_t**)m->ptr) + j;
                 jl_value_t *p = *ptr;
                 if (p != NULL && (uintptr_t)p >= 4096U) {
                     jl_value_t *p_ty = jl_typeof(p);
@@ -1067,36 +1136,45 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 }
             }
         }
-        if (nlsep && tlen > 1)
-            n += jl_printf(out, "\n  ");
-        for (j = 0; j < tlen; j++) {
-            if (av->flags.ptrarray) {
-                jl_value_t **ptr = ((jl_value_t**)av->data) + j;
-                n += jl_static_show_x(out, *ptr, depth);
-            }
-            else {
-                char *ptr = ((char*)av->data) + j * av->elsize;
-                n += jl_static_show_x_(out, (jl_value_t*)ptr,
-                        typetagdata ? (jl_datatype_t*)jl_nth_union_component(el_type, typetagdata[j]) : (jl_datatype_t*)el_type,
-                        depth);
+        else if (layout->flags.arrayelem_isunion) {
+            typetagdata = jl_genericmemory_typetagdata(m);
+        }
+        if (layout->size == 0 && tlen >= 3) {
+            n += jl_static_show_x_(out, (jl_value_t*)m->ptr, (jl_datatype_t*)el_type, depth, ctx);
+            n += jl_printf(out, ", ...");
+        }
+        else {
+            if (nlsep && tlen > 1)
+                n += jl_printf(out, "\n  ");
+            for (size_t j = 0; j < tlen; j++) {
+                if (layout->flags.arrayelem_isboxed) {
+                    jl_value_t **ptr = ((jl_value_t**)m->ptr) + j;
+                    n += jl_static_show_x(out, *ptr, depth, ctx);
+                }
+                else {
+                    char *ptr = ((char*)m->ptr) + j * layout->size;
+                    n += jl_static_show_x_(out, (jl_value_t*)ptr,
+                            (jl_datatype_t*)(typetagdata ? jl_nth_union_component(el_type, typetagdata[j]) : el_type),
+                            depth, ctx);
+                }
+                if (j != tlen - 1)
+                    n += jl_printf(out, nlsep ? ",\n  " : ", ");
             }
-            if (j != tlen - 1)
-                n += jl_printf(out, nlsep ? ",\n  " : ", ");
         }
         n += jl_printf(out, "]");
     }
     else if (vt == jl_loaderror_type) {
         n += jl_printf(out, "LoadError(at ");
-        n += jl_static_show_x(out, *(jl_value_t**)v, depth);
+        n += jl_static_show_x(out, *(jl_value_t**)v, depth, ctx);
         // Access the field directly to avoid allocation
         n += jl_printf(out, " line %" PRIdPTR, ((intptr_t*)v)[1]);
         n += jl_printf(out, ": ");
-        n += jl_static_show_x(out, ((jl_value_t**)v)[2], depth);
+        n += jl_static_show_x(out, ((jl_value_t**)v)[2], depth, ctx);
         n += jl_printf(out, ")");
     }
     else if (vt == jl_errorexception_type) {
         n += jl_printf(out, "ErrorException(");
-        n += jl_static_show_x(out, *(jl_value_t**)v, depth);
+        n += jl_static_show_x(out, *(jl_value_t**)v, depth, ctx);
         n += jl_printf(out, ")");
     }
     else if (jl_static_is_function_(vt) && is_globname_binding(v, (jl_datatype_t*)vt)) {
@@ -1106,7 +1184,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         int globfunc = is_globfunction(v, dv, &sym);
         int quote = 0;
         if (jl_core_module && (dv->name->module != jl_core_module || !jl_module_exports_p(jl_core_module, sym))) {
-            n += jl_static_show_x(out, (jl_value_t*)dv->name->module, depth);
+            n += jl_static_show_x(out, (jl_value_t*)dv->name->module, depth, ctx);
             n += jl_printf(out, ".");
 
             size_t i = 0;
@@ -1136,7 +1214,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 n += jl_printf(out, "NamedTuple");
         }
         else if (!istuple) {
-            n += jl_static_show_x(out, (jl_value_t*)vt, depth);
+            n += jl_static_show_x(out, (jl_value_t*)vt, depth, ctx);
         }
         n += jl_printf(out, "(");
         size_t nb = jl_datatype_size(vt);
@@ -1159,7 +1237,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 size_t offs = jl_field_offset(vt, i);
                 char *fld_ptr = (char*)v + offs;
                 if (jl_field_isptr(vt, i)) {
-                    n += jl_static_show_x(out, *(jl_value_t**)fld_ptr, depth);
+                    n += jl_static_show_x(out, *(jl_value_t**)fld_ptr, depth, ctx);
                 }
                 else {
                     jl_datatype_t *ft = (jl_datatype_t*)jl_field_type_concrete(vt, i);
@@ -1167,7 +1245,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                         uint8_t sel = ((uint8_t*)fld_ptr)[jl_field_size(vt, i) - 1];
                         ft = (jl_datatype_t*)jl_nth_union_component((jl_value_t*)ft, sel);
                     }
-                    n += jl_static_show_x_(out, (jl_value_t*)fld_ptr, ft, depth);
+                    n += jl_static_show_x_(out, (jl_value_t*)fld_ptr, ft, depth, ctx);
                 }
                 if ((istuple || isnamedtuple) && tlen == 1)
                     n += jl_printf(out, ",");
@@ -1177,26 +1255,26 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             if (vt == jl_typemap_entry_type) {
                 n += jl_printf(out, ", next=↩︎\n  ");
                 jl_value_t *next = (jl_value_t*)jl_atomic_load_relaxed(&((jl_typemap_entry_t*)v)->next);
-                n += jl_static_show_next_(out, next, v, depth);
+                n += jl_static_show_next_(out, next, v, depth, ctx);
             }
         }
         n += jl_printf(out, ")");
     }
     else {
         n += jl_printf(out, "<?#%p::", (void*)v);
-        n += jl_static_show_x(out, (jl_value_t*)vt, depth);
+        n += jl_static_show_x(out, (jl_value_t*)vt, depth, ctx);
         n += jl_printf(out, ">");
     }
     return n;
 }
 
-static size_t jl_static_show_x(JL_STREAM *out, jl_value_t *v, struct recur_list *depth) JL_NOTSAFEPOINT
+static size_t jl_static_show_x(JL_STREAM *out, jl_value_t *v, struct recur_list *depth, jl_static_show_config_t ctx) JL_NOTSAFEPOINT
 {
     // show values without calling a julia method or allocating through the GC
-    return jl_static_show_next_(out, v, NULL, depth);
+    return jl_static_show_next_(out, v, NULL, depth, ctx);
 }
 
-static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *prev, struct recur_list *depth) JL_NOTSAFEPOINT
+static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *prev, struct recur_list *depth, jl_static_show_config_t ctx) JL_NOTSAFEPOINT
 {
     // helper for showing a typemap list by following the next pointers
     // while being careful about avoiding any recursion due to malformed (circular) references
@@ -1211,13 +1289,13 @@ static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *pr
                       *newdepth = &this_item,
                       *p = depth;
     while (p) {
-        if (jl_typeis(v, jl_typemap_entry_type) && newdepth == &this_item) {
+        if (jl_typetagis(v, jl_typemap_entry_type) && newdepth == &this_item) {
             jl_value_t *m = p->v;
             unsigned nid = 1;
-            while (m && jl_typeis(m, jl_typemap_entry_type)) {
+            while (m && jl_typetagis(m, jl_typemap_entry_type)) {
                 if (m == v) {
                     return jl_printf(out, "<typemap reference #%u @-%u ", nid, dist) +
-                           jl_static_show_x(out, (jl_value_t*)((jl_typemap_entry_t*)m)->sig, depth) +
+                           jl_static_show_x(out, (jl_value_t*)((jl_typemap_entry_t*)m)->sig, depth, ctx) +
                            jl_printf(out, ">");
                 }
                 if (m == prev) {
@@ -1230,7 +1308,7 @@ static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *pr
                 jl_value_t *m2 = p->v;
                 if (m2 == mnext)
                     break;
-                while (m2 && jl_typeis(m2, jl_typemap_entry_type)) {
+                while (m2 && jl_typetagis(m2, jl_typemap_entry_type)) {
                     jl_value_t *mnext2 = (jl_value_t*)jl_atomic_load_relaxed(&((jl_typemap_entry_t*)m2)->next);
                     if (mnext2 == mnext) {
                         if (m2 != m)
@@ -1248,19 +1326,26 @@ static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *pr
         dist++;
         p = p->prev;
     }
-    return jl_static_show_x_(out, v, (jl_datatype_t*)jl_typeof(v), newdepth);
+    return jl_static_show_x_(out, v, (jl_datatype_t*)jl_typeof(v), newdepth, ctx);
 }
 
 JL_DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) JL_NOTSAFEPOINT
 {
-    return jl_static_show_x(out, v, 0);
+    jl_static_show_config_t ctx = { /* quiet */ 0 };
+    return jl_static_show_x(out, v, 0, ctx);
 }
 
 JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_NOTSAFEPOINT
+{
+    jl_static_show_config_t ctx = { /* quiet */ 0 };
+    return jl_static_show_func_sig_(s, type, ctx);
+}
+
+size_t jl_static_show_func_sig_(JL_STREAM *s, jl_value_t *type, jl_static_show_config_t ctx) JL_NOTSAFEPOINT
 {
     size_t n = 0;
     size_t i;
-    jl_value_t *ftype = (jl_value_t*)jl_first_argument_datatype(type);
+    jl_value_t *ftype = (jl_value_t*)jl_nth_argument_datatype(type, 1);
     if (ftype == NULL)
         return jl_static_show(s, type);
     jl_unionall_t *tvars = (jl_unionall_t*)type;
@@ -1279,12 +1364,14 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N
         n += jl_static_show(s, type);
         return n;
     }
-    if (jl_nparams(ftype) == 0 || ftype == ((jl_datatype_t*)ftype)->name->wrapper) {
+    if ((jl_nparams(ftype) == 0 || ftype == ((jl_datatype_t*)ftype)->name->wrapper) &&
+            ((jl_datatype_t*)ftype)->name->mt != jl_type_type_mt &&
+            ((jl_datatype_t*)ftype)->name->mt != jl_nonfunction_mt) {
         n += jl_printf(s, "%s", jl_symbol_name(((jl_datatype_t*)ftype)->name->mt->name));
     }
     else {
         n += jl_printf(s, "(::");
-        n += jl_static_show_x(s, ftype, depth);
+        n += jl_static_show_x(s, ftype, depth, ctx);
         n += jl_printf(s, ")");
     }
     size_t tl = jl_nparams(type);
@@ -1292,7 +1379,7 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N
     for (i = 1; i < tl; i++) {
         jl_value_t *tp = jl_tparam(type, i);
         if (i != tl - 1) {
-            n += jl_static_show_x(s, tp, depth);
+            n += jl_static_show_x(s, tp, depth, ctx);
             n += jl_printf(s, ", ");
         }
         else {
@@ -1300,13 +1387,13 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N
                 tp = jl_unwrap_vararg(tp);
                 if (jl_is_unionall(tp))
                     n += jl_printf(s, "(");
-                n += jl_static_show_x(s, tp, depth);
+                n += jl_static_show_x(s, tp, depth, ctx);
                 if (jl_is_unionall(tp))
                     n += jl_printf(s, ")");
                 n += jl_printf(s, "...");
             }
             else {
-                n += jl_static_show_x(s, tp, depth);
+                n += jl_static_show_x(s, tp, depth, ctx);
             }
         }
     }
@@ -1318,7 +1405,7 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N
         while (jl_is_unionall(tvars)) {
             if (!first)
                 n += jl_printf(s, ", ");
-            n += jl_static_show_x(s, (jl_value_t*)tvars->var, first ? NULL : depth);
+            n += jl_static_show_x(s, (jl_value_t*)tvars->var, first ? NULL : depth,  ctx);
             tvars = (jl_unionall_t*)tvars->body;
             if (!first)
                 depth += 1;
diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp
index e3543c9f62656..652dbedc7ebf3 100644
--- a/src/runtime_ccall.cpp
+++ b/src/runtime_ccall.cpp
@@ -31,14 +31,12 @@ void *jl_get_library_(const char *f_lib, int throw_err)
 {
     if (f_lib == NULL)
         return jl_RTLD_DEFAULT_handle;
-#ifdef _OS_WINDOWS_
     if (f_lib == JL_EXE_LIBNAME)
         return jl_exe_handle;
     if (f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME)
         return jl_libjulia_internal_handle;
     if (f_lib == JL_LIBJULIA_DL_LIBNAME)
         return jl_libjulia_handle;
-#endif
     JL_LOCK(&libmap_lock);
     // This is the only operation we do on the map, which doesn't invalidate
     // any references or iterators.
@@ -68,16 +66,20 @@ void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *
 extern "C" JL_DLLEXPORT
 void *jl_lazy_load_and_lookup(jl_value_t *lib_val, const char *f_name)
 {
-    char *f_lib;
+    void *lib_ptr;
 
     if (jl_is_symbol(lib_val))
-        f_lib = jl_symbol_name((jl_sym_t*)lib_val);
+        lib_ptr = jl_get_library(jl_symbol_name((jl_sym_t*)lib_val));
     else if (jl_is_string(lib_val))
-        f_lib = jl_string_data(lib_val);
-    else
+        lib_ptr = jl_get_library(jl_string_data(lib_val));
+    else if (jl_libdl_dlopen_func != NULL) {
+        // Call `dlopen(lib_val)`; this is the correct path for the `LazyLibrary` case,
+        // but it also takes any other value, and so we define `dlopen(x::Any) = throw(TypeError(...))`.
+        lib_ptr = jl_unbox_voidpointer(jl_apply_generic(jl_libdl_dlopen_func, &lib_val, 1));
+    } else
         jl_type_error("ccall", (jl_value_t*)jl_symbol_type, lib_val);
     void *ptr;
-    jl_dlsym(jl_get_library(f_lib), f_name, &ptr, 1);
+    jl_dlsym(lib_ptr, f_name, &ptr, 1);
     return ptr;
 }
 
@@ -353,7 +355,7 @@ jl_value_t *jl_get_cfunction_trampoline(
     uv_mutex_lock(&trampoline_lock);
     tramp = trampoline_alloc();
     ((void**)result)[0] = tramp;
-    tramp = init_trampoline(tramp, nval);
+    init_trampoline(tramp, nval);
     ptrhash_put(cache, (void*)fobj, result);
     uv_mutex_unlock(&trampoline_lock);
     return result;
@@ -362,6 +364,6 @@ JL_GCC_IGNORE_STOP
 
 void jl_init_runtime_ccall(void)
 {
-    JL_MUTEX_INIT(&libmap_lock);
+    JL_MUTEX_INIT(&libmap_lock, "libmap_lock");
     uv_mutex_init(&trampoline_lock);
 }
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index 89c9449e55920..4569c2081ae88 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -5,8 +5,6 @@
 //
 // this file assumes a little-endian processor, although that isn't too hard to fix
 // it also assumes two's complement negative numbers, which might be a bit harder to fix
-//
-// TODO: add half-float support
 
 #include "APInt-C.h"
 #include "julia.h"
@@ -14,10 +12,7 @@
 
 const unsigned int host_char_bit = 8;
 
-// float16 intrinsics
-// TODO: use LLVM's compiler-rt on all platforms (Xcode already links compiler-rt)
-
-#if !defined(_OS_DARWIN_)
+// float16 conversion helpers
 
 static inline float half_to_float(uint16_t ival) JL_NOTSAFEPOINT
 {
@@ -188,45 +183,207 @@ static inline uint16_t float_to_half(float param) JL_NOTSAFEPOINT
     return h;
 }
 
-JL_DLLEXPORT float __gnu_h2f_ieee(uint16_t param)
+static inline uint16_t double_to_half(double param) JL_NOTSAFEPOINT
 {
+    float temp = (float)param;
+    uint32_t tempi;
+    memcpy(&tempi, &temp, sizeof(temp));
+
+    // if Float16(res) is subnormal
+    if ((tempi&0x7fffffffu) < 0x38800000u) {
+        // shift so that the mantissa lines up where it would for normal Float16
+        uint32_t shift = 113u-((tempi & 0x7f800000u)>>23u);
+        if (shift<23u) {
+            tempi |= 0x00800000; // set implicit bit
+            tempi >>= shift;
+        }
+    }
+
+    // if we are halfway between 2 Float16 values
+    if ((tempi & 0x1fffu) == 0x1000u) {
+        memcpy(&tempi, &temp, sizeof(temp));
+        // adjust the value by 1 ULP in the direction that will make Float16(temp) give the right answer
+        tempi += (fabs(temp) < fabs(param)) - (fabs(param) < fabs(temp));
+        memcpy(&temp, &tempi, sizeof(temp));
+    }
+
+    return float_to_half(temp);
+}
+
+// x86-specific helpers for emulating the (B)Float16 ABI
+#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
+#include <xmmintrin.h>
+static inline __m128 return_in_xmm(uint16_t input) JL_NOTSAFEPOINT {
+    __m128 xmm_output;
+    asm (
+        "movd %[input], %%xmm0\n\t"
+        "movss %%xmm0, %[xmm_output]\n\t"
+        : [xmm_output] "=x" (xmm_output)
+        : [input] "r" ((uint32_t)input)
+        : "xmm0"
+    );
+    return xmm_output;
+}
+static inline uint16_t take_from_xmm(__m128 xmm_input) JL_NOTSAFEPOINT {
+    uint32_t output;
+    asm (
+        "movss %[xmm_input], %%xmm0\n\t"
+        "movd %%xmm0, %[output]\n\t"
+        : [output] "=r" (output)
+        : [xmm_input] "x" (xmm_input)
+        : "xmm0"
+    );
+    return (uint16_t)output;
+}
+#endif
+
+// float16 conversion API
+
+// for use in APInt (without the ABI shenanigans from below)
+uint16_t julia_float_to_half(float param) {
+    return float_to_half(param);
+}
+float julia_half_to_float(uint16_t param) {
     return half_to_float(param);
 }
 
-JL_DLLEXPORT float __extendhfsf2(uint16_t param)
+// starting with GCC 12 and Clang 15, we have _Float16 on most platforms
+// (but not on Windows; this may be a bug in the MSYS2 GCC compilers)
+#if ((defined(__GNUC__) && __GNUC__ > 11) || \
+     (defined(__clang__) && __clang_major__ > 14)) && \
+    !defined(_CPU_PPC64_) && !defined(_CPU_PPC_) && \
+    !defined(_OS_WINDOWS_)
+    #define FLOAT16_TYPE _Float16
+    #define FLOAT16_TO_UINT16(x) (*(uint16_t*)&(x))
+    #define FLOAT16_FROM_UINT16(x) (*(_Float16*)&(x))
+// on older compilers, we need to emulate the platform-specific ABI
+#elif defined(_CPU_X86_) || (defined(_CPU_X86_64_) && !defined(_OS_WINDOWS_))
+    // on x86, we can use __m128; except on Windows where x64 calling
+    // conventions expect to pass __m128 by reference.
+    #define FLOAT16_TYPE __m128
+    #define FLOAT16_TO_UINT16(x) take_from_xmm(x)
+    #define FLOAT16_FROM_UINT16(x) return_in_xmm(x)
+#elif defined(_CPU_PPC64_) || defined(_CPU_PPC_)
+    // on PPC, pass Float16 as if it were an integer, similar to the old x86 ABI
+    // before _Float16
+    #define FLOAT16_TYPE uint16_t
+    #define FLOAT16_TO_UINT16(x) (x)
+    #define FLOAT16_FROM_UINT16(x) (x)
+#else
+    // otherwise, pass using floating-point calling conventions
+    #define FLOAT16_TYPE float
+    #define FLOAT16_TO_UINT16(x) ((uint16_t)*(uint32_t*)&(x))
+    #define FLOAT16_FROM_UINT16(x) ({ uint32_t tmp = (uint32_t)(x); *(float*)&tmp; })
+#endif
+
+JL_DLLEXPORT float julia__gnu_h2f_ieee(FLOAT16_TYPE param)
 {
-    return half_to_float(param);
+    uint16_t param16 = FLOAT16_TO_UINT16(param);
+    return half_to_float(param16);
 }
 
-JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param)
+JL_DLLEXPORT FLOAT16_TYPE julia__gnu_f2h_ieee(float param)
 {
-    return float_to_half(param);
+    uint16_t res = float_to_half(param);
+    return FLOAT16_FROM_UINT16(res);
 }
 
-JL_DLLEXPORT uint16_t __truncdfhf2(double param)
+JL_DLLEXPORT FLOAT16_TYPE julia__truncdfhf2(double param)
 {
-    float res = (float)param;
-    uint32_t resi;
-    memcpy(&resi, &res, sizeof(res));
-    if ((resi&0x7fffffffu) < 0x38800000u){ // if Float16(res) is subnormal
-        // shift so that the mantissa lines up where it would for normal Float16
-        uint32_t shift = 113u-((resi & 0x7f800000u)>>23u);
-        if (shift<23u) {
-            resi |= 0x00800000; // set implicit bit
-            resi >>= shift;
-        }
-    }
-    if ((resi & 0x1fffu) == 0x1000u) { // if we are halfway between 2 Float16 values
-        memcpy(&resi, &res, sizeof(res));
-        // adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
-        resi += (fabs(res) < fabs(param)) - (fabs(param) < fabs(res));
-        memcpy(&res, &resi, sizeof(res));
+    uint16_t res = double_to_half(param);
+    return FLOAT16_FROM_UINT16(res);
+}
+
+
+// bfloat16 conversion helpers
+
+static inline uint16_t float_to_bfloat(float param) JL_NOTSAFEPOINT
+{
+    if (isnan(param))
+        return 0x7fc0;
+
+    uint32_t bits = *((uint32_t*) &param);
+
+    // round to nearest even
+    bits += 0x7fff + ((bits >> 16) & 1);
+    return (uint16_t)(bits >> 16);
+}
+
+static inline uint16_t double_to_bfloat(double param) JL_NOTSAFEPOINT
+{
+    float temp = (float)param;
+    uint32_t tempi;
+    memcpy(&tempi, &temp, sizeof(temp));
+
+    // bfloat16 uses the same exponent as float32, so we don't need special handling
+    // for subnormals when truncating float64 to bfloat16.
+
+    // if we are halfway between 2 bfloat16 values
+    if ((tempi & 0x1ffu) == 0x100u) {
+        // adjust the value by 1 ULP in the direction that will make bfloat16(temp) give the right answer
+        tempi += (fabs(temp) < fabs(param)) - (fabs(param) < fabs(temp));
+        memcpy(&temp, &tempi, sizeof(temp));
     }
-    return float_to_half(res);
+
+    return float_to_bfloat(temp);
 }
 
+static inline float bfloat_to_float(uint16_t param) JL_NOTSAFEPOINT
+{
+    uint32_t bits = ((uint32_t)param) << 16;
+    float result;
+    memcpy(&result, &bits, sizeof(result));
+    return result;
+}
+
+// bfloat16 conversion API
+
+// for use in APInt (without the ABI shenanigans from below)
+uint16_t julia_float_to_bfloat(float param) {
+    return float_to_bfloat(param);
+}
+float julia_bfloat_to_float(uint16_t param) {
+    return bfloat_to_float(param);
+}
+
+// starting with GCC 13 and Clang 17, we have __bf16 on most platforms
+// (but not on Windows; this may be a bug in the MSYS2 GCC compilers)
+#if ((defined(__GNUC__) && __GNUC__ > 12) || \
+     (defined(__clang__) && __clang_major__ > 16)) && \
+    !defined(_CPU_PPC64_) && !defined(_CPU_PPC_) && \
+    !defined(_OS_WINDOWS_)
+    #define BFLOAT16_TYPE __bf16
+    #define BFLOAT16_TO_UINT16(x) (*(uint16_t*)&(x))
+    #define BFLOAT16_FROM_UINT16(x) (*(__bf16*)&(x))
+// on older compilers, we need to emulate the platform-specific ABI.
+// for more details, see similar code above that deals with Float16.
+#elif defined(_CPU_X86_) || (defined(_CPU_X86_64_) && !defined(_OS_WINDOWS_))
+    #define BFLOAT16_TYPE __m128
+    #define BFLOAT16_TO_UINT16(x) take_from_xmm(x)
+    #define BFLOAT16_FROM_UINT16(x) return_in_xmm(x)
+#elif defined(_CPU_PPC64_) || defined(_CPU_PPC_)
+    #define BFLOAT16_TYPE uint16_t
+    #define BFLOAT16_TO_UINT16(x) (x)
+    #define BFLOAT16_FROM_UINT16(x) (x)
+#else
+    #define BFLOAT16_TYPE float
+    #define BFLOAT16_TO_UINT16(x) ((uint16_t)*(uint32_t*)&(x))
+    #define BFLOAT16_FROM_UINT16(x) ({ uint32_t tmp = (uint32_t)(x); *(float*)&tmp; })
 #endif
 
+JL_DLLEXPORT BFLOAT16_TYPE julia__truncsfbf2(float param) JL_NOTSAFEPOINT
+{
+    uint16_t res = float_to_bfloat(param);
+    return BFLOAT16_FROM_UINT16(res);
+}
+
+JL_DLLEXPORT BFLOAT16_TYPE julia__truncdfbf2(double param) JL_NOTSAFEPOINT
+{
+    uint16_t res = double_to_bfloat(param);
+    return BFLOAT16_FROM_UINT16(res);
+}
+
+
 // run time version of bitcast intrinsic
 JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v)
 {
@@ -419,6 +576,8 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp
         jl_atomic_error("atomic_pointerreplace: invalid atomic ordering");
     // TODO: filter other invalid orderings
     jl_value_t *ety = jl_tparam0(jl_typeof(p));
+    if (!is_valid_intrinsic_elptr(ety))
+        jl_error("atomic_pointerreplace: invalid pointer");
     char *pp = (char*)jl_unbox_long(p);
     jl_datatype_t *rettyp = jl_apply_cmpswap_type(ety);
     JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
@@ -437,8 +596,6 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp
         return result;
     }
     else {
-        if (!is_valid_intrinsic_elptr(ety))
-            jl_error("atomic_pointerreplace: invalid pointer");
         if (jl_typeof(x) != ety)
             jl_type_error("atomic_pointerreplace", ety, x);
         size_t nb = jl_datatype_size(ety);
@@ -480,14 +637,14 @@ JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty)
 
     char *f_lib = NULL;
     if (jl_is_tuple(v) && jl_nfields(v) > 1) {
-        jl_value_t *t1 = jl_fieldref_noalloc(v, 1);
-        v = jl_fieldref(v, 0);
+        jl_value_t *t1 = jl_fieldref(v, 1);
         if (jl_is_symbol(t1))
             f_lib = jl_symbol_name((jl_sym_t*)t1);
         else if (jl_is_string(t1))
             f_lib = jl_string_data(t1);
         else
             JL_TYPECHK(cglobal, symbol, t1)
+        v = jl_fieldref(v, 0);
     }
 
     char *f_name = NULL;
@@ -498,10 +655,8 @@ JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty)
     else
         JL_TYPECHK(cglobal, symbol, v)
 
-#ifdef _OS_WINDOWS_
     if (!f_lib)
-        f_lib = (char*)jl_dlfind_win32(f_name);
-#endif
+        f_lib = (char*)jl_dlfind(f_name);
 
     void *ptr;
     jl_dlsym(jl_get_library(f_lib), f_name, &ptr, 1);
@@ -551,9 +706,9 @@ static inline unsigned select_by_size(unsigned sz) JL_NOTSAFEPOINT
     }
 
 #define fp_select(a, func) \
-    sizeof(a) == sizeof(float) ? func##f((float)a) : func(a)
+    sizeof(a) <= sizeof(float) ? func##f((float)a) : func(a)
 #define fp_select2(a, b, func) \
-    sizeof(a) == sizeof(float) ? func##f(a, b) : func(a, b)
+    sizeof(a) <= sizeof(float) ? func##f(a, b) : func(a, b)
 
 // fast-function generators //
 
@@ -587,25 +742,39 @@ static inline unsigned jl_##name##nbits(unsigned runtime_nbits, void *pa) JL_NOT
 // nbits::number of bits in the *input*
 // c_type::c_type corresponding to nbits
 #define un_fintrinsic_ctype(OP, name, c_type) \
-static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \
+static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_NOTSAFEPOINT \
 { \
     c_type a = *(c_type*)pa; \
-    OP((c_type*)pr, a); \
+    OP(ty, (c_type*)pr, a); \
 }
 
 #define un_fintrinsic_half(OP, name) \
-static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \
+static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_NOTSAFEPOINT \
 { \
     uint16_t a = *(uint16_t*)pa; \
-    float A = __gnu_h2f_ieee(a); \
+    float A = half_to_float(a); \
     if (osize == 16) { \
         float R; \
-        OP(&R, A); \
-        *(uint16_t*)pr = __gnu_f2h_ieee(R); \
+        OP(ty, &R, A); \
+        *(uint16_t*)pr = float_to_half(R); \
     } else { \
-        OP((uint16_t*)pr, A); \
+        OP(ty, (uint16_t*)pr, A); \
     } \
-    }
+}
+
+#define un_fintrinsic_bfloat(OP, name) \
+static inline void name(unsigned osize, jl_value_t *ty, void *pa, void *pr) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    float A = bfloat_to_float(a); \
+    if (osize == 16) { \
+        float R; \
+        OP(ty, &R, A); \
+        *(uint16_t*)pr = float_to_bfloat(R); \
+    } else { \
+        OP(ty, (uint16_t*)pr, A); \
+    } \
+}
 
 // float or integer inputs
 // OP::Function macro(inputa, inputb)
@@ -625,11 +794,24 @@ static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pr)
 { \
     uint16_t a = *(uint16_t*)pa; \
     uint16_t b = *(uint16_t*)pb; \
-    float A = __gnu_h2f_ieee(a); \
-    float B = __gnu_h2f_ieee(b); \
+    float A = half_to_float(a); \
+    float B = half_to_float(b); \
     runtime_nbits = 16; \
     float R = OP(A, B); \
-    *(uint16_t*)pr = __gnu_f2h_ieee(R); \
+    *(uint16_t*)pr = float_to_half(R); \
+    *(uint16_t*)pr = float_to_half(R); \
+}
+
+#define bi_intrinsic_bfloat(OP, name) \
+static void jl_##name##bf16(unsigned runtime_nbits, void *pa, void *pb, void *pr) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    uint16_t b = *(uint16_t*)pb; \
+    float A = bfloat_to_float(a); \
+    float B = bfloat_to_float(b); \
+    runtime_nbits = 16; \
+    float R = OP(A, B); \
+    *(uint16_t*)pr = float_to_bfloat(R); \
 }
 
 // float or integer inputs, bool output
@@ -650,8 +832,19 @@ static int jl_##name##16(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSAFEP
 { \
     uint16_t a = *(uint16_t*)pa; \
     uint16_t b = *(uint16_t*)pb; \
-    float A = __gnu_h2f_ieee(a); \
-    float B = __gnu_h2f_ieee(b); \
+    float A = half_to_float(a); \
+    float B = half_to_float(b); \
+    runtime_nbits = 16; \
+    return OP(A, B); \
+}
+
+#define bool_intrinsic_bfloat(OP, name) \
+static int jl_##name##bf16(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    uint16_t b = *(uint16_t*)pb; \
+    float A = bfloat_to_float(a); \
+    float B = bfloat_to_float(b); \
     runtime_nbits = 16; \
     return OP(A, B); \
 }
@@ -691,12 +884,27 @@ static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pc,
     uint16_t a = *(uint16_t*)pa; \
     uint16_t b = *(uint16_t*)pb; \
     uint16_t c = *(uint16_t*)pc; \
-    float A = __gnu_h2f_ieee(a); \
-    float B = __gnu_h2f_ieee(b); \
-    float C = __gnu_h2f_ieee(c); \
+    float A = half_to_float(a); \
+    float B = half_to_float(b); \
+    float C = half_to_float(c); \
+    runtime_nbits = 16; \
+    float R = OP(A, B, C); \
+    *(uint16_t*)pr = float_to_half(R); \
+    *(uint16_t*)pr = float_to_half(R); \
+}
+
+#define ter_intrinsic_bfloat(OP, name) \
+static void jl_##name##bf16(unsigned runtime_nbits, void *pa, void *pb, void *pc, void *pr) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    uint16_t b = *(uint16_t*)pb; \
+    uint16_t c = *(uint16_t*)pc; \
+    float A = bfloat_to_float(a); \
+    float B = bfloat_to_float(b); \
+    float C = bfloat_to_float(c); \
     runtime_nbits = 16; \
     float R = OP(A, B, C); \
-    *(uint16_t*)pr = __gnu_f2h_ieee(R); \
+    *(uint16_t*)pr = float_to_bfloat(R); \
 }
 
 
@@ -707,7 +915,7 @@ SELECTOR_FUNC(intrinsic_1)
 #define un_iintrinsic(name, u) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a) \
 { \
-    return jl_iintrinsic_1(jl_typeof(a), a, #name, u##signbitbyte, jl_intrinsiclambda_ty1, name##_list); \
+    return jl_iintrinsic_1(a, #name, u##signbitbyte, jl_intrinsiclambda_ty1, name##_list); \
 }
 #define un_iintrinsic_fast(LLVMOP, OP, name, u) \
 un_iintrinsic_ctype(OP, name, 8, u##int##8_t) \
@@ -733,7 +941,7 @@ SELECTOR_FUNC(intrinsic_u1)
 #define uu_iintrinsic(name, u) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a) \
 { \
-    return jl_iintrinsic_1(jl_typeof(a), a, #name, u##signbitbyte, jl_intrinsiclambda_u1, name##_list); \
+    return jl_iintrinsic_1(a, #name, u##signbitbyte, jl_intrinsiclambda_u1, name##_list); \
 }
 #define uu_iintrinsic_fast(LLVMOP, OP, name, u) \
 uu_iintrinsic_ctype(OP, name, 8, u##int##8_t) \
@@ -755,14 +963,13 @@ static const select_intrinsic_u1_t name##_list = { \
 uu_iintrinsic(name, u)
 
 static inline
-jl_value_t *jl_iintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name,
+jl_value_t *jl_iintrinsic_1(jl_value_t *a, const char *name,
                             char (*getsign)(void*, unsigned),
                             jl_value_t *(*lambda1)(jl_value_t*, void*, unsigned, unsigned, const void*), const void *list)
 {
-    if (!jl_is_primitivetype(jl_typeof(a)))
-        jl_errorf("%s: value is not a primitive type", name);
+    jl_value_t *ty = jl_typeof(a);
     if (!jl_is_primitivetype(ty))
-        jl_errorf("%s: type is not a primitive type", name);
+        jl_errorf("%s: value is not a primitive type", name);
     void *pa = jl_data_ptr(a);
     unsigned isize = jl_datatype_size(jl_typeof(a));
     unsigned isize2 = next_power_of_two(isize);
@@ -813,7 +1020,7 @@ static inline jl_value_t *jl_intrinsiclambda_u1(jl_value_t *ty, void *pa, unsign
 
 // conversion operator
 
-typedef void (*intrinsic_cvt_t)(unsigned, void*, unsigned, void*);
+typedef void (*intrinsic_cvt_t)(jl_datatype_t*, void*, jl_datatype_t*, void*);
 typedef unsigned (*intrinsic_cvt_check_t)(unsigned, unsigned, void*);
 #define cvt_iintrinsic(LLVMOP, name) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *ty, jl_value_t *a) \
@@ -823,30 +1030,29 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *ty, jl_value_t *a) \
 
 static inline jl_value_t *jl_intrinsic_cvt(jl_value_t *ty, jl_value_t *a, const char *name, intrinsic_cvt_t op)
 {
+    JL_TYPECHKS(name, datatype, ty);
+    if (!jl_is_concrete_type(ty) || !jl_is_primitivetype(ty))
+        jl_errorf("%s: target type not a leaf primitive type", name);
     jl_value_t *aty = jl_typeof(a);
     if (!jl_is_primitivetype(aty))
         jl_errorf("%s: value is not a primitive type", name);
-    if (!jl_is_primitivetype(ty))
-        jl_errorf("%s: type is not a primitive type", name);
     void *pa = jl_data_ptr(a);
-    unsigned isize = jl_datatype_size(aty);
     unsigned osize = jl_datatype_size(ty);
     void *pr = alloca(osize);
-    unsigned isize_bits = isize * host_char_bit;
-    unsigned osize_bits = osize * host_char_bit;
-    op(isize_bits, pa, osize_bits, pr);
+    op((jl_datatype_t*)aty, pa, (jl_datatype_t*)ty, pr);
     return jl_new_bits(ty, pr);
 }
 
 // floating point
 
 #define un_fintrinsic_withtype(OP, name) \
+un_fintrinsic_bfloat(OP, jl_##name##bf16) \
 un_fintrinsic_half(OP, jl_##name##16) \
 un_fintrinsic_ctype(OP, jl_##name##32, float) \
 un_fintrinsic_ctype(OP, jl_##name##64, double) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *ty, jl_value_t *a) \
 { \
-    return jl_fintrinsic_1(ty, a, #name, jl_##name##16, jl_##name##32, jl_##name##64); \
+    return jl_fintrinsic_1(ty, a, #name, jl_##name##bf16, jl_##name##16, jl_##name##32, jl_##name##64); \
 }
 
 #define un_fintrinsic(OP, name) \
@@ -856,9 +1062,9 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a) \
     return jl_##name##_withtype(jl_typeof(a), a); \
 }
 
-typedef void (fintrinsic_op1)(unsigned, void*, void*);
+typedef void (fintrinsic_op1)(unsigned, jl_value_t*, void*, void*);
 
-static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, fintrinsic_op1 *halfop, fintrinsic_op1 *floatop, fintrinsic_op1 *doubleop)
+static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, fintrinsic_op1 *bfloatop, fintrinsic_op1 *halfop, fintrinsic_op1 *floatop, fintrinsic_op1 *doubleop)
 {
     jl_task_t *ct = jl_current_task;
     if (!jl_is_primitivetype(jl_typeof(a)))
@@ -872,13 +1078,16 @@ static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const c
     switch (sz) {
     /* choose the right size c-type operation based on the input */
     case 2:
-        halfop(sz2 * host_char_bit, pa, pr);
+        if (jl_typeof(a) == (jl_value_t*)jl_float16_type)
+            halfop(sz2 * host_char_bit, ty, pa, pr);
+        else /*if (jl_typeof(a) == (jl_value_t*)jl_bfloat16_type)*/
+            bfloatop(sz2 * host_char_bit, ty, pa, pr);
         break;
     case 4:
-        floatop(sz2 * host_char_bit, pa, pr);
+        floatop(sz2 * host_char_bit, ty, pa, pr);
         break;
     case 8:
-        doubleop(sz2 * host_char_bit, pa, pr);
+        doubleop(sz2 * host_char_bit, ty, pa, pr);
         break;
     default:
         jl_errorf("%s: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64", name);
@@ -1025,10 +1234,10 @@ static inline jl_value_t *jl_intrinsiclambda_checked(jl_value_t *ty, void *pa, v
     jl_value_t *params[2];
     params[0] = ty;
     params[1] = (jl_value_t*)jl_bool_type;
-    jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
+    jl_datatype_t *tuptyp = (jl_datatype_t*)jl_apply_tuple_type_v(params, 2);
     JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
     jl_task_t *ct = jl_current_task;
-    jl_value_t *newv = jl_gc_alloc(ct->ptls, ((jl_datatype_t*)tuptyp)->size, tuptyp);
+    jl_value_t *newv = jl_gc_alloc(ct->ptls, jl_datatype_size(tuptyp), tuptyp);
 
     intrinsic_checked_t op = select_intrinsic_checked(sz2, (const intrinsic_checked_t*)voidlist);
     int ovflw = op(sz * host_char_bit, pa, pb, jl_data_ptr(newv));
@@ -1050,6 +1259,7 @@ static inline jl_value_t *jl_intrinsiclambda_checkeddiv(jl_value_t *ty, void *pa
 // floating point
 
 #define bi_fintrinsic(OP, name) \
+    bi_intrinsic_bfloat(OP, name) \
     bi_intrinsic_half(OP, name) \
     bi_intrinsic_ctype(OP, name, 32, float) \
     bi_intrinsic_ctype(OP, name, 64, double) \
@@ -1067,7 +1277,10 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
     switch (sz) { \
     /* choose the right size c-type operation */ \
     case 2: \
-        jl_##name##16(16, pa, pb, pr); \
+        if ((jl_datatype_t*)ty == jl_float16_type) \
+            jl_##name##16(16, pa, pb, pr); \
+        else /*if ((jl_datatype_t*)ty == jl_bfloat16_type)*/ \
+            jl_##name##bf16(16, pa, pb, pr); \
         break; \
     case 4: \
         jl_##name##32(32, pa, pb, pr); \
@@ -1082,6 +1295,7 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
 }
 
 #define bool_fintrinsic(OP, name) \
+    bool_intrinsic_bfloat(OP, name) \
     bool_intrinsic_half(OP, name) \
     bool_intrinsic_ctype(OP, name, 32, float) \
     bool_intrinsic_ctype(OP, name, 64, double) \
@@ -1098,7 +1312,10 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
     switch (sz) { \
     /* choose the right size c-type operation */ \
     case 2: \
-        cmp = jl_##name##16(16, pa, pb); \
+        if ((jl_datatype_t*)ty == jl_float16_type) \
+            cmp = jl_##name##16(16, pa, pb); \
+        else /*if ((jl_datatype_t*)ty == jl_bfloat16_type)*/ \
+            cmp = jl_##name##bf16(16, pa, pb); \
         break; \
     case 4: \
         cmp = jl_##name##32(32, pa, pb); \
@@ -1113,6 +1330,7 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
 }
 
 #define ter_fintrinsic(OP, name) \
+    ter_intrinsic_bfloat(OP, name) \
     ter_intrinsic_half(OP, name) \
     ter_intrinsic_ctype(OP, name, 32, float) \
     ter_intrinsic_ctype(OP, name, 64, double) \
@@ -1130,7 +1348,10 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c)
     switch (sz) { \
     /* choose the right size c-type operation */ \
     case 2: \
-        jl_##name##16(16, pa, pb, pc, pr); \
+        if ((jl_datatype_t*)ty == jl_float16_type) \
+            jl_##name##16(16, pa, pb, pc, pr); \
+        else /*if ((jl_datatype_t*)ty == jl_bfloat16_type)*/ \
+            jl_##name##bf16(16, pa, pb, pc, pr); \
         break; \
     case 4: \
         jl_##name##32(32, pa, pb, pc, pr); \
@@ -1146,7 +1367,7 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c)
 
 // arithmetic
 #define neg(a) -a
-#define neg_float(pr, a) *pr = -a
+#define neg_float(ty, pr, a) *pr = -a
 un_iintrinsic_fast(LLVMNeg, neg, neg_int, u)
 #define add(a,b) a + b
 bi_iintrinsic_fast(LLVMAdd, add, add_int, u)
@@ -1172,7 +1393,6 @@ bi_fintrinsic(add,add_float)
 bi_fintrinsic(sub,sub_float)
 bi_fintrinsic(mul,mul_float)
 bi_fintrinsic(div,div_float)
-bi_fintrinsic(frem,rem_float)
 
 // ternary operators //
 // runtime fma is broken on windows, define julia_fma(f) ourself with fma_emulated as reference.
@@ -1318,7 +1538,7 @@ static inline int fpiseq##nbits(c_type a, c_type b) JL_NOTSAFEPOINT { \
 fpiseq_n(float, 32)
 fpiseq_n(double, 64)
 #define fpiseq(a,b) \
-    sizeof(a) == sizeof(float) ? fpiseq32(a, b) : fpiseq64(a, b)
+    sizeof(a) <= sizeof(float) ? fpiseq32(a, b) : fpiseq64(a, b)
 
 bool_fintrinsic(eq,eq_float)
 bool_fintrinsic(ne,ne_float)
@@ -1363,18 +1583,22 @@ cvt_iintrinsic(LLVMUItoFP, uitofp)
 cvt_iintrinsic(LLVMFPtoSI, fptosi)
 cvt_iintrinsic(LLVMFPtoUI, fptoui)
 
-#define fptrunc(pr, a) \
+#define fptrunc(tr, pr, a) \
         if (!(osize < 8 * sizeof(a))) \
             jl_error("fptrunc: output bitsize must be < input bitsize"); \
-        else if (osize == 16) \
-            *(uint16_t*)pr = __gnu_f2h_ieee(a); \
+        else if (osize == 16) { \
+            if ((jl_datatype_t*)tr == jl_float16_type) \
+                *(uint16_t*)pr = float_to_half(a); \
+            else /*if ((jl_datatype_t*)tr == jl_bfloat16_type)*/ \
+                *(uint16_t*)pr = float_to_bfloat(a); \
+        } \
         else if (osize == 32) \
             *(float*)pr = a; \
         else if (osize == 64) \
             *(double*)pr = a; \
         else \
             jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64");
-#define fpext(pr, a) \
+#define fpext(tr, pr, a) \
         if (!(osize >= 8 * sizeof(a))) \
             jl_error("fpext: output bitsize must be >= input bitsize"); \
         if (osize == 32) \
@@ -1431,12 +1655,12 @@ checked_iintrinsic_div(LLVMRem_uov, checked_urem_int, u)
 #define flipsign(a, b) \
         (b >= 0) ? a : -a
 bi_iintrinsic_fast(jl_LLVMFlipSign, flipsign, flipsign_int,  )
-#define abs_float(pr, a)      *pr = fp_select(a, fabs)
-#define ceil_float(pr, a)     *pr = fp_select(a, ceil)
-#define floor_float(pr, a)    *pr = fp_select(a, floor)
-#define trunc_float(pr, a)    *pr = fp_select(a, trunc)
-#define rint_float(pr, a)     *pr = fp_select(a, rint)
-#define sqrt_float(pr, a)     *pr = fp_select(a, sqrt)
+#define abs_float(ty, pr, a)      *pr = fp_select(a, fabs)
+#define ceil_float(ty, pr, a)     *pr = fp_select(a, ceil)
+#define floor_float(ty, pr, a)    *pr = fp_select(a, floor)
+#define trunc_float(ty, pr, a)    *pr = fp_select(a, trunc)
+#define rint_float(ty, pr, a)     *pr = fp_select(a, rint)
+#define sqrt_float(ty, pr, a)     *pr = fp_select(a, sqrt)
 #define copysign_float(a, b)  fp_select2(a, b, copysign)
 
 un_fintrinsic(abs_float,abs_float)
@@ -1447,16 +1671,15 @@ un_fintrinsic(trunc_float,trunc_llvm)
 un_fintrinsic(rint_float,rint_llvm)
 un_fintrinsic(sqrt_float,sqrt_llvm)
 un_fintrinsic(sqrt_float,sqrt_llvm_fast)
-
-JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)
-{
-    JL_TYPECHK(arraylen, array, a);
-    return jl_box_long(jl_array_len((jl_array_t*)a));
-}
+jl_value_t *jl_cpu_has_fma(int bits);
 
 JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
 {
-    JL_TYPECHK(have_fma, datatype, typ);
-    // TODO: run-time feature check?
-    return jl_false;
+    JL_TYPECHK(have_fma, datatype, typ); // TODO what about float16/bfloat16?
+    if (typ == (jl_value_t*)jl_float32_type)
+        return jl_cpu_has_fma(32);
+    else if (typ == (jl_value_t*)jl_float64_type)
+        return jl_cpu_has_fma(64);
+    else
+        return jl_false;
 }
diff --git a/src/safepoint.c b/src/safepoint.c
index b2feccf74e068..a05d37e492813 100644
--- a/src/safepoint.c
+++ b/src/safepoint.c
@@ -30,7 +30,8 @@ char *jl_safepoint_pages = NULL;
 // so that both safepoint load and pending signal load falls in this page.
 // The initialization of the `safepoint` pointer is done `ti_initthread`
 // in `threading.c`.
-uint8_t jl_safepoint_enable_cnt[3] = {0, 0, 0};
+// The fourth page is the count of suspended threads
+uint16_t jl_safepoint_enable_cnt[4] = {0, 0, 0, 0};
 
 // This lock should be acquired before enabling/disabling the safepoint
 // or accessing one of the following variables:
@@ -43,17 +44,18 @@ uint8_t jl_safepoint_enable_cnt[3] = {0, 0, 0};
 // load/store so that threads waiting for the GC doesn't have to also
 // fight on the safepoint lock...
 uv_mutex_t safepoint_lock;
-uv_cond_t safepoint_cond;
+uv_cond_t safepoint_cond_begin;
+uv_cond_t safepoint_cond_end;
 
 static void jl_safepoint_enable(int idx) JL_NOTSAFEPOINT
 {
     // safepoint_lock should be held
-    assert(0 <= idx && idx < 3);
+    assert(0 <= idx && idx <= 3);
     if (jl_safepoint_enable_cnt[idx]++ != 0) {
         // We expect this to be enabled at most twice
         // one for the GC, one for SIGINT.
         // Update this if this is not the case anymore in the future.
-        assert(jl_safepoint_enable_cnt[idx] <= 2);
+        assert(jl_safepoint_enable_cnt[idx] <= (idx == 3 ? INT16_MAX : 2));
         return;
     }
     // Now that we are requested to mprotect the page and it wasn't already.
@@ -62,14 +64,15 @@ static void jl_safepoint_enable(int idx) JL_NOTSAFEPOINT
     DWORD old_prot;
     VirtualProtect(pageaddr, jl_page_size, PAGE_NOACCESS, &old_prot);
 #else
-    mprotect(pageaddr, jl_page_size, PROT_NONE);
+    int r = mprotect(pageaddr, jl_page_size, PROT_NONE);
+    (void)r; //if (r) perror("mprotect");
 #endif
 }
 
 static void jl_safepoint_disable(int idx) JL_NOTSAFEPOINT
 {
     // safepoint_lock should be held
-    assert(0 <= idx && idx < 3);
+    assert(0 <= idx && idx <= 3);
     if (--jl_safepoint_enable_cnt[idx] != 0) {
         assert(jl_safepoint_enable_cnt[idx] > 0);
         return;
@@ -81,20 +84,22 @@ static void jl_safepoint_disable(int idx) JL_NOTSAFEPOINT
     DWORD old_prot;
     VirtualProtect(pageaddr, jl_page_size, PAGE_READONLY, &old_prot);
 #else
-    mprotect(pageaddr, jl_page_size, PROT_READ);
+    int r = mprotect(pageaddr, jl_page_size, PROT_READ);
+    (void)r; //if (r) perror("mprotect");
 #endif
 }
 
 void jl_safepoint_init(void)
 {
     uv_mutex_init(&safepoint_lock);
-    uv_cond_init(&safepoint_cond);
+    uv_cond_init(&safepoint_cond_begin);
+    uv_cond_init(&safepoint_cond_end);
     // jl_page_size isn't available yet.
     size_t pgsz = jl_getpagesize();
 #ifdef _OS_WINDOWS_
-    char *addr = (char*)VirtualAlloc(NULL, pgsz * 3, MEM_COMMIT, PAGE_READONLY);
+    char *addr = (char*)VirtualAlloc(NULL, pgsz * 4, MEM_COMMIT, PAGE_READONLY);
 #else
-    char *addr = (char*)mmap(0, pgsz * 3, PROT_READ,
+    char *addr = (char*)mmap(0, pgsz * 4, PROT_READ,
                              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     if (addr == MAP_FAILED)
         addr = NULL;
@@ -104,20 +109,68 @@ void jl_safepoint_init(void)
         jl_gc_debug_critical_error();
         abort();
     }
+//    // If we able to skip past the faulting safepoint instruction conditionally,
+//    // then we can make this safepoint page unconditional. But otherwise we
+//    // only enable this page when required, though it gives us less
+//    // fine-grained control over individual resume.
+//    char *pageaddr = addr + pgsz * 3;
+//#ifdef _OS_WINDOWS_
+//    DWORD old_prot;
+//    VirtualProtect(pageaddr, pgsz, PAGE_NOACCESS, &old_prot);
+//#else
+//    int r = mprotect(pageaddr, pgsz, PROT_NONE);
+//    (void)r; //if (r) perror("mprotect");
+//#endif
     // The signal page is for the gc safepoint.
     // The page before it is the sigint pending flag.
     jl_safepoint_pages = addr;
 }
 
-int jl_safepoint_start_gc(void)
+void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
 {
-    if (jl_n_threads == 1) {
-        jl_atomic_store_relaxed(&jl_gc_running, 1);
-        return 1;
+    JL_TIMING(GC, GC_Stop);
+#ifdef USE_TRACY
+    TracyCZoneCtx ctx = JL_TIMING_DEFAULT_BLOCK->tracy_ctx;
+    TracyCZoneColor(ctx, 0x696969);
+#endif
+    assert(gc_n_threads);
+    if (gc_n_threads > 1)
+        jl_wake_libuv();
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL) {
+            // This acquire load pairs with the release stores
+            // in the signal handler of safepoint so we are sure that
+            // all the stores on those threads are visible.
+            // We're currently also using atomic store release in mutator threads
+            // (in jl_gc_state_set), but we may want to use signals to flush the
+            // memory operations on those threads lazily instead.
+            while (!jl_atomic_load_relaxed(&ptls2->gc_state) || !jl_atomic_load_acquire(&ptls2->gc_state)) {
+                // Use system mutexes rather than spin locking to minimize wasted CPU time
+                // while we wait for other threads reach a safepoint.
+                // This is particularly important when run under rr.
+                uv_mutex_lock(&safepoint_lock);
+                if (!jl_atomic_load_relaxed(&ptls2->gc_state))
+                    uv_cond_wait(&safepoint_cond_begin, &safepoint_lock);
+                uv_mutex_unlock(&safepoint_lock);
+            }
+        }
     }
-    // The thread should have set this already
+}
+
+int jl_safepoint_start_gc(void)
+{
+    // The thread should have just set this before entry
     assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) == JL_GC_STATE_WAITING);
     uv_mutex_lock(&safepoint_lock);
+    uv_cond_broadcast(&safepoint_cond_begin);
+    // make sure we are permitted to run GC now (we might be required to stop instead)
+    jl_task_t *ct = jl_current_task;
+    while (jl_atomic_load_relaxed(&ct->ptls->suspend_count)) {
+        uv_mutex_unlock(&safepoint_lock);
+        jl_safepoint_wait_thread_resume();
+        uv_mutex_lock(&safepoint_lock);
+    }
     // In case multiple threads enter the GC at the same time, only allow
     // one of them to actually run the collection. We can't just let the
     // master thread do the GC since it might be running unmanaged code
@@ -128,6 +181,14 @@ int jl_safepoint_start_gc(void)
         jl_safepoint_wait_gc();
         return 0;
     }
+    // Foreign thread adoption disables the GC and waits for it to finish, however, that may
+    // introduce a race between it and this thread checking if the GC is enabled and only
+    // then setting jl_gc_running. To avoid that, check again now that we won that race.
+    if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
+        jl_atomic_store_release(&jl_gc_running, 0);
+        uv_mutex_unlock(&safepoint_lock);
+        return 0;
+    }
     jl_safepoint_enable(1);
     jl_safepoint_enable(2);
     uv_mutex_unlock(&safepoint_lock);
@@ -137,10 +198,6 @@ int jl_safepoint_start_gc(void)
 void jl_safepoint_end_gc(void)
 {
     assert(jl_atomic_load_relaxed(&jl_gc_running));
-    if (jl_n_threads == 1) {
-        jl_atomic_store_relaxed(&jl_gc_running, 0);
-        return;
-    }
     uv_mutex_lock(&safepoint_lock);
     // Need to reset the page protection before resetting the flag since
     // the thread will trigger a segfault immediately after returning from
@@ -148,18 +205,36 @@ void jl_safepoint_end_gc(void)
     jl_safepoint_disable(2);
     jl_safepoint_disable(1);
     jl_atomic_store_release(&jl_gc_running, 0);
-#  ifdef __APPLE__
+#  ifdef _OS_DARWIN_
     // This wakes up other threads on mac.
     jl_mach_gc_end();
 #  endif
     uv_mutex_unlock(&safepoint_lock);
-    uv_cond_broadcast(&safepoint_cond);
+    uv_cond_broadcast(&safepoint_cond_end);
+}
+
+void jl_set_gc_and_wait(void) // n.b. not used on _OS_DARWIN_
+{
+    jl_task_t *ct = jl_current_task;
+    // reading own gc state doesn't need atomic ops since no one else
+    // should store to it.
+    int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
+    uv_mutex_lock(&safepoint_lock);
+    uv_cond_broadcast(&safepoint_cond_begin);
+    uv_mutex_unlock(&safepoint_lock);
+    jl_safepoint_wait_gc();
+    jl_atomic_store_release(&ct->ptls->gc_state, state);
+    jl_safepoint_wait_thread_resume(); // block in thread-suspend now if requested, after clearing the gc_state
 }
 
-void jl_safepoint_wait_gc(void)
+// this is the core of jl_set_gc_and_wait
+void jl_safepoint_wait_gc(void) JL_NOTSAFEPOINT
 {
+    jl_task_t *ct = jl_current_task; (void)ct;
+    JL_TIMING_SUSPEND_TASK(GC_SAFEPOINT, ct);
     // The thread should have set this is already
-    assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) != 0);
+    assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != 0);
     // Use normal volatile load in the loop for speed until GC finishes.
     // Then use an acquire load to make sure the GC result is visible on this thread.
     while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) {
@@ -168,9 +243,110 @@ void jl_safepoint_wait_gc(void)
         // This is particularly important when run under rr.
         uv_mutex_lock(&safepoint_lock);
         if (jl_atomic_load_relaxed(&jl_gc_running))
-            uv_cond_wait(&safepoint_cond, &safepoint_lock);
+            uv_cond_wait(&safepoint_cond_end, &safepoint_lock);
+        uv_mutex_unlock(&safepoint_lock);
+    }
+}
+
+// equivalent to jl_set_gc_and_wait, but waiting on resume-thread lock instead
+void jl_safepoint_wait_thread_resume(void)
+{
+    jl_task_t *ct = jl_current_task;
+    // n.b. we do not permit a fast-path here that skips the lock acquire since
+    // we otherwise have no synchronization point to ensure that this thread
+    // will observe the change to the safepoint, even though the other thread
+    // might have already observed our gc_state.
+    // if (!jl_atomic_load_relaxed(&ct->ptls->suspend_count)) return;
+    int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
+    uv_mutex_lock(&ct->ptls->sleep_lock);
+    if (jl_atomic_load_relaxed(&ct->ptls->suspend_count)) {
+        // defer this broadcast until we determine whether uv_cond_wait is really going to be needed
+        uv_mutex_unlock(&ct->ptls->sleep_lock);
+        uv_mutex_lock(&safepoint_lock);
+        uv_cond_broadcast(&safepoint_cond_begin);
         uv_mutex_unlock(&safepoint_lock);
+        uv_mutex_lock(&ct->ptls->sleep_lock);
+    }
+    while (jl_atomic_load_relaxed(&ct->ptls->suspend_count))
+        uv_cond_wait(&ct->ptls->wake_signal, &ct->ptls->sleep_lock);
+    // must while still holding the mutex_unlock, so we know other threads in
+    // jl_safepoint_suspend_thread will observe this thread in the correct GC
+    // state, and not still stuck in JL_GC_STATE_WAITING
+    jl_atomic_store_release(&ct->ptls->gc_state, state);
+    uv_mutex_unlock(&ct->ptls->sleep_lock);
+}
+
+// n.b. suspended threads may still run in the GC or GC safe regions
+// but shouldn't be observable, depending on which enum the user picks (only 1 and 2 are typically recommended here)
+// waitstate = 0 : do not wait for suspend to finish
+// waitstate = 1 : wait for gc_state != 0 (JL_GC_STATE_WAITING or JL_GC_STATE_SAFE)
+// waitstate = 2 : wait for gc_state != 0 (JL_GC_STATE_WAITING or JL_GC_STATE_SAFE) and that GC is not running on that thread
+// waitstate = 3 : wait for full suspend (gc_state == JL_GC_STATE_WAITING) -- this may never happen if thread is sleeping currently
+// if another thread comes along and calls jl_safepoint_resume, we also return early
+// return new suspend count on success, 0 on failure
+int jl_safepoint_suspend_thread(int tid, int waitstate)
+{
+    if (0 > tid || tid >= jl_atomic_load_acquire(&jl_n_threads))
+        return 0;
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    uv_mutex_lock(&ptls2->sleep_lock);
+    int16_t suspend_count = jl_atomic_load_relaxed(&ptls2->suspend_count) + 1;
+    jl_atomic_store_relaxed(&ptls2->suspend_count, suspend_count);
+    if (suspend_count == 1) { // first to suspend
+        jl_safepoint_enable(3);
+        jl_atomic_store_relaxed(&ptls2->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size * 3 + sizeof(void*)));
     }
+    uv_mutex_unlock(&ptls2->sleep_lock);
+    if (waitstate) {
+        // wait for suspend (or another thread to call resume)
+        if (waitstate >= 2) {
+            // We currently cannot distinguish if a thread is helping run GC or
+            // not, so assume it is running GC and wait for GC to finish first.
+            // It will be unable to reenter helping with GC because we have
+            // changed its safepoint page.
+            jl_set_gc_and_wait();
+        }
+        while (jl_atomic_load_acquire(&ptls2->suspend_count) != 0) {
+            int8_t state2 = jl_atomic_load_acquire(&ptls2->gc_state);
+            if (waitstate <= 2 && state2 != 0)
+                break;
+            if (waitstate == 3 && state2 == JL_GC_STATE_WAITING)
+                break;
+            jl_cpu_pause(); // yield (wait for safepoint_cond_begin, for example)?
+        }
+    }
+    return suspend_count;
+}
+
+// return old suspend count on success, 0 on failure
+// n.b. threads often do not resume until after all suspended threads have been resumed!
+int jl_safepoint_resume_thread(int tid) JL_NOTSAFEPOINT
+{
+    if (0 > tid || tid >= jl_atomic_load_acquire(&jl_n_threads))
+        return 0;
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    uv_mutex_lock(&safepoint_lock);
+    uv_mutex_lock(&ptls2->sleep_lock);
+    int16_t suspend_count = jl_atomic_load_relaxed(&ptls2->suspend_count);
+    if (suspend_count == 1) { // last to unsuspend
+        if (tid == 0)
+            jl_atomic_store_relaxed(&ptls2->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size));
+        else
+            jl_atomic_store_relaxed(&ptls2->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size * 2 + sizeof(void*)));
+        uv_cond_signal(&ptls2->wake_signal);
+#ifdef _OS_DARWIN_
+        jl_safepoint_resume_thread_mach(ptls2, tid);
+#endif
+    }
+    if (suspend_count != 0) {
+        jl_atomic_store_relaxed(&ptls2->suspend_count, suspend_count - 1);
+        if (suspend_count == 1)
+            jl_safepoint_disable(3);
+    }
+    uv_mutex_unlock(&ptls2->sleep_lock);
+    uv_mutex_unlock(&safepoint_lock);
+    return suspend_count;
 }
 
 void jl_safepoint_enable_sigint(void)
diff --git a/src/partr.c b/src/scheduler.c
similarity index 53%
rename from src/partr.c
rename to src/scheduler.c
index e511fb8727bfb..50e15b286a8eb 100644
--- a/src/partr.c
+++ b/src/scheduler.c
@@ -26,6 +26,14 @@ static const int16_t not_sleeping = 0;
 // it is acceptable for the thread to be sleeping.
 static const int16_t sleeping = 1;
 
+// this thread is dead.
+static const int16_t sleeping_like_the_dead JL_UNUSED = 2;
+
+// a running count of how many threads are currently not_sleeping
+// plus a running count of the number of in-flight wake-ups
+// n.b. this may temporarily exceed jl_n_threads
+static _Atomic(int) nrunning = 0;
+
 // invariant: No thread is ever asleep unless sleep_check_state is sleeping (or we have a wakeup signal pending).
 // invariant: Any particular thread is not asleep unless that thread's sleep_check_state is sleeping.
 // invariant: The transition of a thread state to sleeping must be followed by a check that there wasn't work pending for it.
@@ -38,7 +46,14 @@ static const int16_t sleeping = 1;
 // * Enqueuer:
 //   * 2: `jl_atomic_load_relaxed(&ptls->sleep_check_state)` in `jl_wakeup_thread` returns `not_sleeping`
 // i.e., the dequeuer misses the enqueue and enqueuer misses the sleep state transition.
-
+// [^store_buffering_2]: and also
+// * Enqueuer:
+//   * 1a: `jl_atomic_store_relaxed(jl_uv_n_waiters, 1)` in `JL_UV_LOCK`
+//   * 1b: "cheap read" of `handle->pending` in `uv_async_send` (via `JL_UV_LOCK`) loads `0`
+// * Dequeuer:
+//   * 2a: store `2` to `handle->pending` in `uv_async_send` (via `JL_UV_LOCK` in `jl_task_get_next`)
+//   * 2b: `jl_atomic_load_relaxed(jl_uv_n_waiters)` in `jl_task_get_next` returns `0`
+// i.e., the dequeuer misses the `n_waiters` is set and enqueuer misses the `uv_stop` flag (in `signal_async`) transition to cleared
 
 JULIA_DEBUG_SLEEPWAKE(
 uint64_t wakeup_enter;
@@ -54,13 +69,13 @@ JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT
     if (was == tid)
         return 1;
     if (was == -1)
-        return jl_atomic_cmpswap(&task->tid, &was, tid);
+        return jl_atomic_cmpswap(&task->tid, &was, tid) || was == tid;
     return 0;
 }
 
 JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSAFEPOINT
 {
-    if (tpid < 0 || tpid >= jl_n_threadpools)
+    if (tpid < -1 || tpid >= jl_n_threadpools)
         return 0;
     task->threadpoolid = tpid;
     return 1;
@@ -68,16 +83,15 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA
 
 // GC functions used
 extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache,
-                                         jl_gc_mark_sp_t *sp, jl_value_t *obj) JL_NOTSAFEPOINT;
+                                         jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT;
 
 // parallel task runtime
 // ---
 
-JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max, uint32_t unbias)
+JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max)
 {
     jl_ptls_t ptls = jl_current_task->ptls;
-    // one-extend unbias back to 64-bits
-    return cong(max, -(uint64_t)-unbias, &ptls->rngseed);
+    return cong(max, &ptls->rngseed);
 }
 
 // initialize the threading infrastructure
@@ -98,7 +112,69 @@ void jl_init_threadinginfra(void)
 
 void JL_NORETURN jl_finish_task(jl_task_t *t);
 
-// thread function: used by all except the main thread
+static inline int may_mark(void) JL_NOTSAFEPOINT
+{
+    return (jl_atomic_load(&gc_n_threads_marking) > 0);
+}
+
+static inline int may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    return (jl_atomic_load(&ptls->gc_sweeps_requested) > 0);
+}
+
+// parallel gc thread function
+void jl_parallel_gc_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid and create heap)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0);
+    uv_barrier_wait(targ->barrier);
+
+    // free the thread argument here
+    free(targ);
+
+    while (1) {
+        uv_mutex_lock(&gc_threads_lock);
+        while (!may_mark() && !may_sweep(ptls)) {
+            uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
+        }
+        uv_mutex_unlock(&gc_threads_lock);
+        if (may_mark()) {
+            gc_mark_loop_parallel(ptls, 0);
+        }
+        if (may_sweep(ptls)) { // not an else!
+            gc_sweep_pool_parallel();
+            jl_atomic_fetch_add(&ptls->gc_sweeps_requested, -1);
+        }
+    }
+}
+
+// concurrent gc thread function
+void jl_concurrent_gc_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid and create heap)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0);
+    uv_barrier_wait(targ->barrier);
+
+    // free the thread argument here
+    free(targ);
+
+    while (1) {
+        uv_sem_wait(&gc_sweep_assists_needed);
+        gc_free_pages();
+    }
+}
+
+// thread function: used by all mutator threads except the main thread
 void jl_threadfun(void *arg)
 {
     jl_threadarg_t *targ = (jl_threadarg_t*)arg;
@@ -109,6 +185,8 @@ void jl_threadfun(void *arg)
     jl_init_stack_limits(0, &stack_lo, &stack_hi);
     // warning: this changes `jl_current_task`, so be careful not to call that from this function
     jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, 1);
+    assert(wasrunning); (void)wasrunning;
     JL_GC_PROMISE_ROOTED(ct);
 
     // wait for all threads
@@ -123,6 +201,20 @@ void jl_threadfun(void *arg)
 }
 
 
+
+void jl_init_thread_scheduler(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    uv_mutex_init(&ptls->sleep_lock);
+    uv_cond_init(&ptls->wake_signal);
+    // record that there is now another thread that may be used to schedule work
+    // we will decrement this again in scheduler_delete_thread, only slightly
+    // in advance of pthread_join (which hopefully itself also had been
+    // adopted by now and is included in nrunning too)
+    (void)jl_atomic_fetch_add_relaxed(&nrunning, 1);
+    // n.b. this is the only point in the code where we ignore the invariants on the ordering of nrunning
+    // since we are being initialized from foreign code, we could not necessarily have expected or predicted that to happen
+}
+
 int jl_running_under_rr(int recheck)
 {
 #ifdef _OS_LINUX_
@@ -149,7 +241,7 @@ int jl_running_under_rr(int recheck)
 
 
 //  sleep_check_after_threshold() -- if sleep_threshold ns have passed, return 1
-static int sleep_check_after_threshold(uint64_t *start_cycles)
+static int sleep_check_after_threshold(uint64_t *start_cycles) JL_NOTSAFEPOINT
 {
     JULIA_DEBUG_SLEEPWAKE( return 1 ); // hammer on the sleep/wake logic much harder
     /**
@@ -172,18 +264,31 @@ static int sleep_check_after_threshold(uint64_t *start_cycles)
     return 0;
 }
 
+static int set_not_sleeping(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
+        if (jl_atomic_exchange_relaxed(&ptls->sleep_check_state, not_sleeping) != not_sleeping) {
+            return 1;
+        }
+    }
+    int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, -1); // consume in-flight wakeup
+    assert(wasrunning > 1); (void)wasrunning;
+    return 0;
+}
 
-static int wake_thread(int16_t tid)
+static int wake_thread(int16_t tid) JL_NOTSAFEPOINT
 {
-    jl_ptls_t other = jl_all_tls_states[tid];
-    int8_t state = sleeping;
-
-    if (jl_atomic_load_relaxed(&other->sleep_check_state) == sleeping) {
-        if (jl_atomic_cmpswap_relaxed(&other->sleep_check_state, &state, not_sleeping)) {
-            JL_PROBE_RT_SLEEP_CHECK_WAKE(other, state);
-            uv_mutex_lock(&other->sleep_lock);
-            uv_cond_signal(&other->wake_signal);
-            uv_mutex_unlock(&other->sleep_lock);
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+
+    if (jl_atomic_load_relaxed(&ptls2->sleep_check_state) != not_sleeping) {
+        int8_t state = sleeping;
+        if (jl_atomic_cmpswap_relaxed(&ptls2->sleep_check_state, &state, not_sleeping)) {
+            int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, 1); // increment in-flight wakeup count
+            assert(wasrunning); (void)wasrunning;
+            JL_PROBE_RT_SLEEP_CHECK_WAKE(ptls2, state);
+            uv_mutex_lock(&ptls2->sleep_lock);
+            uv_cond_signal(&ptls2->wake_signal);
+            uv_mutex_unlock(&ptls2->sleep_lock);
             return 1;
         }
     }
@@ -191,7 +296,7 @@ static int wake_thread(int16_t tid)
 }
 
 
-static void wake_libuv(void)
+static void wake_libuv(void) JL_NOTSAFEPOINT
 {
     JULIA_DEBUG_SLEEPWAKE( io_wakeup_enter = cycleclock() );
     jl_wake_libuv();
@@ -199,7 +304,7 @@ static void wake_libuv(void)
 }
 
 /* ensure thread tid is awake if necessary */
-JL_DLLEXPORT void jl_wakeup_thread(int16_t tid)
+JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
 {
     jl_task_t *ct = jl_current_task;
     int16_t self = jl_atomic_load_relaxed(&ct->tid);
@@ -209,25 +314,29 @@ JL_DLLEXPORT void jl_wakeup_thread(int16_t tid)
     JULIA_DEBUG_SLEEPWAKE( wakeup_enter = cycleclock() );
     if (tid == self || tid == -1) {
         // we're already awake, but make sure we'll exit uv_run
+        // and that nrunning is updated if this is now considered in-flight
         jl_ptls_t ptls = ct->ptls;
-        if (jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping) {
-            jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping);
-            JL_PROBE_RT_SLEEP_CHECK_WAKEUP(ptls);
+        if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
+            if (jl_atomic_exchange_relaxed(&ptls->sleep_check_state, not_sleeping) != not_sleeping) {
+                int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, 1);
+                assert(wasrunning); (void)wasrunning;
+                JL_PROBE_RT_SLEEP_CHECK_WAKEUP(ptls);
+            }
         }
         if (uvlock == ct)
             uv_stop(jl_global_event_loop());
     }
     else {
         // something added to the sticky-queue: notify that thread
-        if (wake_thread(tid)) {
+        if (wake_thread(tid) && uvlock != ct) {
             // check if we need to notify uv_run too
             jl_fence();
-            jl_ptls_t other = jl_all_tls_states[tid];
+            jl_ptls_t other = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
             jl_task_t *tid_task = jl_atomic_load_relaxed(&other->current_task);
             // now that we have changed the thread to not-sleeping, ensure that
             // either it has not yet acquired the libuv lock, or that it will
             // observe the change of state to not_sleeping
-            if (uvlock != ct && jl_atomic_load_relaxed(&jl_uv_mutex.owner) == tid_task)
+            if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == tid_task)
                 wake_libuv();
         }
     }
@@ -237,7 +346,8 @@ JL_DLLEXPORT void jl_wakeup_thread(int16_t tid)
         // in the future, we might want to instead wake some fraction of threads,
         // and let each of those wake additional threads if they find work
         int anysleep = 0;
-        for (tid = 0; tid < jl_n_threads; tid++) {
+        int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+        for (tid = 0; tid < nthreads; tid++) {
             if (tid != self)
                 anysleep |= wake_thread(tid);
         }
@@ -257,7 +367,7 @@ static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q)
 {
     jl_gc_safepoint();
     jl_task_t *task = (jl_task_t*)jl_apply_generic(trypoptask, &q, 1);
-    if (jl_typeis(task, jl_task_type)) {
+    if (jl_is_task(task)) {
         int self = jl_atomic_load_relaxed(&jl_current_task->tid);
         jl_set_task_tid(task, self);
         return task;
@@ -270,16 +380,45 @@ static int check_empty(jl_value_t *checkempty)
     return jl_apply_generic(checkempty, NULL, 0) == jl_true;
 }
 
+jl_task_t *wait_empty JL_GLOBALLY_ROOTED;
+void jl_wait_empty_begin(void);
+void jl_wait_empty_end(void);
+
+void jl_task_wait_empty(void)
+{
+    jl_task_t *ct = jl_current_task;
+    if (jl_atomic_load_relaxed(&ct->tid) == 0 && jl_base_module) {
+        jl_wait_empty_begin();
+        jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("wait"));
+        wait_empty = ct;
+        size_t lastage = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        if (f)
+            jl_apply_generic(f, NULL, 0);
+        // we are back from jl_task_get_next now
+        ct->world_age = lastage;
+        wait_empty = NULL;
+        // TODO: move this lock acquire to before the wait_empty return and the
+        // unlock to the caller, so that we ensure new work (from uv_unref
+        // objects) didn't unexpectedly get scheduled and start running behind
+        // our back during the function return
+        JL_UV_LOCK();
+        jl_wait_empty_end();
+        JL_UV_UNLOCK();
+    }
+}
+
 static int may_sleep(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
     // sleep_check_state is only transitioned from not_sleeping to sleeping
     // by the thread itself. As a result, if this returns false, it will
     // continue returning false. If it returns true, we know the total
     // modification order of the fences.
-    jl_fence(); // [^store_buffering_1]
+    jl_fence(); // [^store_buffering_1] [^store_buffering_2]
     return jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping;
 }
 
+
 extern _Atomic(unsigned) _threadedregion;
 
 JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, jl_value_t *checkempty)
@@ -301,14 +440,13 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
 
         jl_cpu_pause();
         jl_ptls_t ptls = ct->ptls;
-        if (sleep_check_after_threshold(&start_cycles) || (!jl_atomic_load_relaxed(&_threadedregion) && ptls->tid == 0)) {
+        if (sleep_check_after_threshold(&start_cycles) || (ptls->tid == 0 && (!jl_atomic_load_relaxed(&_threadedregion) || wait_empty))) {
             // acquire sleep-check lock
             jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping);
             jl_fence(); // [^store_buffering_1]
             JL_PROBE_RT_SLEEP_CHECK_SLEEP(ptls);
             if (!check_empty(checkempty)) { // uses relaxed loads
-                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                    jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
+                if (set_not_sleeping(ptls)) {
                     JL_PROBE_RT_SLEEP_CHECK_TASKQ_WAKE(ptls);
                 }
                 continue;
@@ -317,8 +455,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
             if (ptls != ct->ptls) {
                 // sigh, a yield was detected, so let's go ahead and handle it anyway by starting over
                 ptls = ct->ptls;
-                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                    jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
+                if (set_not_sleeping(ptls)) {
                     JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
                 }
                 if (task)
@@ -326,14 +463,12 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
                 continue;
             }
             if (task) {
-                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                    jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
+                if (set_not_sleeping(ptls)) {
                     JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
                 }
                 return task;
             }
 
-
             // IO is always permitted, but outside a threaded region, only
             // thread 0 will process messages.
             // Inside a threaded region, any thread can listen for IO messages,
@@ -354,17 +489,26 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
             }
             else if (ptls->tid == 0) {
                 uvlock = 1;
-                JL_UV_LOCK(); // jl_mutex_lock(&jl_uv_mutex);
+                JL_UV_LOCK();
+            }
+            else {
+                // Since we might have started some IO work, we might need
+                // to ensure tid = 0 will go watch that new event source.
+                // If trylock would have succeeded, that may have been our
+                // responsibility, so need to make sure thread 0 will take care
+                // of us.
+                if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == NULL) // aka trylock
+                    jl_wakeup_thread(0);
             }
             if (uvlock) {
-                int active = 1;
-                // otherwise, we block until someone asks us for the lock
-                uv_loop_t *loop = jl_global_event_loop();
-                while (active && may_sleep(ptls)) {
-                    if (jl_atomic_load_relaxed(&jl_uv_n_waiters) != 0)
-                        // but if we won the race against someone who actually needs
-                        // the lock to do real work, we need to let them have it instead
-                        break;
+                int enter_eventloop = may_sleep(ptls);
+                int active = 0;
+                if (jl_atomic_load_relaxed(&jl_uv_n_waiters) != 0)
+                    // if we won the race against someone who actually needs
+                    // the lock to do real work, we need to let them have it instead
+                    enter_eventloop = 0;
+                if (enter_eventloop) {
+                    uv_loop_t *loop = jl_global_event_loop();
                     loop->stop_flag = 0;
                     JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_enter = cycleclock() );
                     active = uv_run(loop, UV_RUN_ONCE);
@@ -377,15 +521,17 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
                 // that just wanted to steal libuv from us. We will just go
                 // right back to sleep on the individual wake signal to let
                 // them take it from us without conflict.
-                if (!may_sleep(ptls)) {
+                if (active || !may_sleep(ptls)) {
+                    if (set_not_sleeping(ptls)) {
+                        JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
+                    }
                     start_cycles = 0;
                     continue;
                 }
-                if (!jl_atomic_load_relaxed(&_threadedregion) && active && ptls->tid == 0) {
+                if (!enter_eventloop && !jl_atomic_load_relaxed(&_threadedregion) && ptls->tid == 0) {
                     // thread 0 is the only thread permitted to run the event loop
                     // so it needs to stay alive, just spin-looping if necessary
-                    if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
-                        jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
+                    if (set_not_sleeping(ptls)) {
                         JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
                     }
                     start_cycles = 0;
@@ -393,19 +539,52 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
                 }
             }
 
+            // any thread which wants us running again will have to observe
+            // sleep_check_state==sleeping and increment nrunning for us
+            int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, -1);
+            assert(wasrunning);
+            if (wasrunning == 1) {
+                // This was the last running thread, and there is no thread with !may_sleep
+                // so make sure tid 0 is notified to check wait_empty
+                // TODO: this also might be a good time to check again that
+                // libuv's queue is truly empty, instead of during delete_thread
+                if (ptls->tid != 0) {
+                    uv_mutex_lock(&ptls->sleep_lock);
+                    uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock);
+                    uv_mutex_unlock(&ptls->sleep_lock);
+                }
+            }
+
             // the other threads will just wait for an individual wake signal to resume
             JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() );
             int8_t gc_state = jl_gc_safe_enter(ptls);
             uv_mutex_lock(&ptls->sleep_lock);
             while (may_sleep(ptls)) {
+                task = wait_empty;
+                if (ptls->tid == 0 && task && jl_atomic_load_relaxed(&nrunning) == 0) {
+                    wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, 1);
+                    assert(!wasrunning);
+                    wasrunning = !set_not_sleeping(ptls);
+                    assert(!wasrunning);
+                    JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                    if (!ptls->finalizers_inhibited)
+                        ptls->finalizers_inhibited++; // this annoyingly is rather sticky (we should like to reset it at the end of jl_task_wait_empty)
+                    break;
+                }
+                // else should we warn the user of certain deadlock here if tid == 0 && nrunning == 0?
                 uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock);
-                // TODO: help with gc work here, if applicable
             }
             assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
+            assert(jl_atomic_load_relaxed(&nrunning));
+            start_cycles = 0;
             uv_mutex_unlock(&ptls->sleep_lock);
             JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() );
             jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint
-            start_cycles = 0;
+            if (task) {
+                assert(task == wait_empty);
+                wait_empty = NULL;
+                return task;
+            }
         }
         else {
             // maybe check the kernel for new messages too
@@ -414,6 +593,27 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
     }
 }
 
+void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    int notsleeping = jl_atomic_exchange_relaxed(&ptls->sleep_check_state, sleeping_like_the_dead) == not_sleeping;
+    jl_fence();
+    if (notsleeping) {
+        if (jl_atomic_load_relaxed(&nrunning) == 1) {
+            jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
+            // This was the last running thread, and there is no thread with !may_sleep
+            // so make sure tid 0 is notified to check wait_empty
+            uv_mutex_lock(&ptls2->sleep_lock);
+            uv_cond_signal(&ptls2->wake_signal);
+            uv_mutex_unlock(&ptls2->sleep_lock);
+        }
+    }
+    else {
+        jl_atomic_fetch_add_relaxed(&nrunning, 1);
+    }
+    jl_wakeup_thread(0); // force thread 0 to see that we do not have the IO lock (and am dead)
+    jl_atomic_fetch_add_relaxed(&nrunning, -1);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/serialize.h b/src/serialize.h
index 69aaeb4c39787..1bd29e9cc5911 100644
--- a/src/serialize.h
+++ b/src/serialize.h
@@ -63,8 +63,10 @@ extern "C" {
 #define TAG_RETURNNODE         55
 #define TAG_ARGUMENT           56
 #define TAG_RELOC_METHODROOT   57
+#define TAG_BINDING            58
+#define TAG_MEMORYT            59
 
-#define LAST_TAG 57
+#define LAST_TAG 59
 
 #define write_uint8(s, n) ios_putc((n), (s))
 #define read_uint8(s) ((uint8_t)ios_getc((s)))
@@ -92,7 +94,7 @@ static inline uint64_t read_uint64(ios_t *s) JL_NOTSAFEPOINT
     return x;
 }
 
-static inline void write_int64(ios_t *s, int64_t i) JL_NOTSAFEPOINT
+static inline void write_uint64(ios_t *s, uint64_t i) JL_NOTSAFEPOINT
 {
     ios_write(s, (char*)&i, 8);
 }
@@ -121,6 +123,19 @@ static inline uint32_t read_uint32(ios_t *s) JL_NOTSAFEPOINT
     return x;
 }
 
+#ifdef _P64
+#define write_uint(s, i) write_uint64(s, i)
+#else
+#define write_uint(s, i) write_uint32(s, i)
+#endif
+
+#ifdef _P64
+#define read_uint(s) read_uint64(s)
+#else
+#define read_uint(s) read_uint32(s)
+#endif
+
+
 void *jl_lookup_ser_tag(jl_value_t *v);
 void *jl_lookup_common_symbol(jl_value_t *v);
 jl_value_t *jl_deser_tag(uint8_t tag);
diff --git a/src/signal-handling.c b/src/signal-handling.c
index fda1c9947c1b5..abe63ba6d2d7f 100644
--- a/src/signal-handling.c
+++ b/src/signal-handling.c
@@ -6,8 +6,8 @@
 #include <inttypes.h>
 #include "julia.h"
 #include "julia_internal.h"
-#ifndef _OS_WINDOWS_
 #include <unistd.h>
+#ifndef _OS_WINDOWS_
 #include <sys/mman.h>
 #endif
 
@@ -29,6 +29,52 @@ static const    uint64_t GIGA = 1000000000ULL;
 JL_DLLEXPORT void jl_profile_stop_timer(void);
 JL_DLLEXPORT int jl_profile_start_timer(void);
 
+///////////////////////
+// Utility functions //
+///////////////////////
+JL_DLLEXPORT int jl_profile_init(size_t maxsize, uint64_t delay_nsec)
+{
+    bt_size_max = maxsize;
+    nsecprof = delay_nsec;
+    if (bt_data_prof != NULL)
+        free((void*)bt_data_prof);
+    bt_data_prof = (jl_bt_element_t*) calloc(maxsize, sizeof(jl_bt_element_t));
+    if (bt_data_prof == NULL && maxsize > 0)
+        return -1;
+    bt_size_cur = 0;
+    return 0;
+}
+
+JL_DLLEXPORT uint8_t *jl_profile_get_data(void)
+{
+    return (uint8_t*) bt_data_prof;
+}
+
+JL_DLLEXPORT size_t jl_profile_len_data(void)
+{
+    return bt_size_cur;
+}
+
+JL_DLLEXPORT size_t jl_profile_maxlen_data(void)
+{
+    return bt_size_max;
+}
+
+JL_DLLEXPORT uint64_t jl_profile_delay_nsec(void)
+{
+    return nsecprof;
+}
+
+JL_DLLEXPORT void jl_profile_clear_data(void)
+{
+    bt_size_cur = 0;
+}
+
+JL_DLLEXPORT int jl_profile_is_running(void)
+{
+    return running;
+}
+
 // Any function that acquires this lock must be either a unmanaged thread
 // or in the GC safe region and must NOT allocate anything through the GC
 // while holding this lock.
@@ -109,8 +155,7 @@ static void jl_shuffle_int_array_inplace(int *carray, int size, uint64_t *seed)
     // The "modern Fisher–Yates shuffle" - O(n) algorithm
     // https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
     for (int i = size; i-- > 1; ) {
-        uint64_t unbias = UINT64_MAX; // slightly biased, but i is very small
-        size_t j = cong(i, unbias, seed);
+        size_t j = cong(i, seed);
         uint64_t tmp = carray[j];
         carray[j] = carray[i];
         carray[i] = tmp;
@@ -136,14 +181,10 @@ static int *profile_get_randperm(int size)
 
 JL_DLLEXPORT int jl_profile_is_buffer_full(void)
 {
-    // declare buffer full if there isn't enough room to take samples across all threads
-    #if defined(_OS_WINDOWS_)
-        uint64_t nthreads = 1; // windows only profiles the main thread
-    #else
-        uint64_t nthreads = jl_n_threads;
-    #endif
-    // the `+ 6` is for the two block terminators `0` plus 4 metadata entries
-    return bt_size_cur + (((JL_BT_MAX_ENTRY_SIZE + 1) + 6) * nthreads) > bt_size_max;
+    // Declare buffer full if there isn't enough room to sample even just the
+    // thread metadata and one max-sized frame. The `+ 6` is for the two block
+    // terminator `0`'s plus the 4 metadata entries.
+    return bt_size_cur + ((JL_BT_MAX_ENTRY_SIZE + 1) + 6) > bt_size_max;
 }
 
 static uint64_t jl_last_sigint_trigger = 0;
@@ -244,21 +285,27 @@ void jl_set_profile_peek_duration(double t)
     profile_peek_duration = t;
 }
 
-uintptr_t profile_show_peek_cond_loc;
-JL_DLLEXPORT void jl_set_peek_cond(uintptr_t cond)
+jl_mutex_t profile_show_peek_cond_lock;
+static uv_async_t *profile_show_peek_cond_loc;
+JL_DLLEXPORT void jl_set_peek_cond(uv_async_t *cond)
 {
+    JL_LOCK_NOGC(&profile_show_peek_cond_lock);
     profile_show_peek_cond_loc = cond;
+    JL_UNLOCK_NOGC(&profile_show_peek_cond_lock);
 }
 
 static void jl_check_profile_autostop(void)
 {
-    if ((profile_autostop_time != -1.0) && (jl_hrtime() > profile_autostop_time)) {
+    if (profile_show_peek_cond_loc != NULL && profile_autostop_time != -1.0 && jl_hrtime() > profile_autostop_time) {
         profile_autostop_time = -1.0;
         jl_profile_stop_timer();
         jl_safe_printf("\n==============================================================\n");
         jl_safe_printf("Profile collected. A report will print at the next yield point\n");
         jl_safe_printf("==============================================================\n\n");
-        uv_async_send((uv_async_t*)profile_show_peek_cond_loc);
+        JL_LOCK_NOGC(&profile_show_peek_cond_lock);
+        if (profile_show_peek_cond_loc != NULL)
+            uv_async_send(profile_show_peek_cond_loc);
+        JL_UNLOCK_NOGC(&profile_show_peek_cond_lock);
     }
 }
 
@@ -373,24 +420,41 @@ void jl_show_sigill(void *_ctx)
 #endif
 }
 
+// make it invalid for a task to return from this point to its stack
+// this is generally quite an foolish operation, but does free you up to do
+// arbitrary things on this stack now without worrying about corrupt state that
+// existed already on it
+void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT
+{
+    jl_set_safe_restore(NULL);
+    if (ct) {
+        ct->gcstack = NULL;
+        ct->eh = NULL;
+        ct->world_age = 1;
+        // Force all locks to drop. Is this a good idea? Of course not. But the alternative would probably deadlock instead of crashing.
+        small_arraylist_t *locks = &ct->ptls->locks;
+        for (size_t i = locks->len; i > 0; i--)
+            jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]);
+        locks->len = 0;
+        ct->ptls->in_pure_callback = 0;
+        ct->ptls->in_finalizer = 0;
+        ct->ptls->defer_signal = 0;
+        // forcibly exit GC (if we were in it) or safe into unsafe, without the mandatory safepoint
+        jl_atomic_store_release(&ct->ptls->gc_state, 0);
+        // allow continuing to use a Task that should have already died--unsafe necromancy!
+        jl_atomic_store_relaxed(&ct->_state, JL_TASK_STATE_RUNNABLE);
+    }
+}
+
 // what to do on a critical error on a thread
-void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct)
+void jl_critical_error(int sig, int si_code, bt_context_t *context, jl_task_t *ct)
 {
     jl_bt_element_t *bt_data = ct ? ct->ptls->bt_data : NULL;
     size_t *bt_size = ct ? &ct->ptls->bt_size : NULL;
     size_t i, n = ct ? *bt_size : 0;
     if (sig) {
         // kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit)
-        jl_set_safe_restore(NULL);
-        if (ct) {
-            ct->gcstack = NULL;
-            ct->eh = NULL;
-            ct->excstack = NULL;
-            ct->ptls->locks.len = 0;
-            ct->ptls->in_pure_callback = 0;
-            ct->ptls->in_finalizer = 1;
-            ct->world_age = 1;
-        }
+        jl_task_frame_noreturn(ct);
 #ifndef _OS_WINDOWS_
         sigset_t sset;
         sigemptyset(&sset);
@@ -411,7 +475,10 @@ void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct)
             sigaddset(&sset, sig);
         pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
 #endif
-        jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
+        if (si_code)
+            jl_safe_printf("\n[%d] signal %d (%d): %s\n", getpid(), sig, si_code, strsignal(sig));
+        else
+            jl_safe_printf("\n[%d] signal %d: %s\n", getpid(), sig, strsignal(sig));
     }
     jl_safe_printf("in expression starting at %s:%d\n", jl_filename, jl_lineno);
     if (context && ct) {
@@ -426,52 +493,6 @@ void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct)
     jl_gc_debug_critical_error();
 }
 
-///////////////////////
-// Utility functions //
-///////////////////////
-JL_DLLEXPORT int jl_profile_init(size_t maxsize, uint64_t delay_nsec)
-{
-    bt_size_max = maxsize;
-    nsecprof = delay_nsec;
-    if (bt_data_prof != NULL)
-        free((void*)bt_data_prof);
-    bt_data_prof = (jl_bt_element_t*) calloc(maxsize, sizeof(jl_bt_element_t));
-    if (bt_data_prof == NULL && maxsize > 0)
-        return -1;
-    bt_size_cur = 0;
-    return 0;
-}
-
-JL_DLLEXPORT uint8_t *jl_profile_get_data(void)
-{
-    return (uint8_t*) bt_data_prof;
-}
-
-JL_DLLEXPORT size_t jl_profile_len_data(void)
-{
-    return bt_size_cur;
-}
-
-JL_DLLEXPORT size_t jl_profile_maxlen_data(void)
-{
-    return bt_size_max;
-}
-
-JL_DLLEXPORT uint64_t jl_profile_delay_nsec(void)
-{
-    return nsecprof;
-}
-
-JL_DLLEXPORT void jl_profile_clear_data(void)
-{
-    bt_size_cur = 0;
-}
-
-JL_DLLEXPORT int jl_profile_is_running(void)
-{
-    return running;
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/signals-mach.c b/src/signals-mach.c
index ff1cc8f0a72f8..87a2eb4758d4a 100644
--- a/src/signals-mach.c
+++ b/src/signals-mach.c
@@ -36,56 +36,95 @@ extern int _keymgr_set_lockmode_processwide_ptr(unsigned int key, unsigned int m
 extern void _dyld_atfork_prepare(void) __attribute__((weak_import));
 extern void _dyld_atfork_parent(void) __attribute__((weak_import));
 //extern void _dyld_fork_child(void) __attribute__((weak_import));
+extern void _dyld_dlopen_atfork_prepare(void) __attribute__((weak_import));
+extern void _dyld_dlopen_atfork_parent(void) __attribute__((weak_import));
+//extern void _dyld_dlopen_atfork_child(void) __attribute__((weak_import));
 
 static void attach_exception_port(thread_port_t thread, int segv_only);
 
 // low 16 bits are the thread id, the next 8 bits are the original gc_state
 static arraylist_t suspended_threads;
 extern uv_mutex_t safepoint_lock;
-extern uv_cond_t safepoint_cond;
-void jl_mach_gc_end(void)
+extern uv_cond_t safepoint_cond_begin;
+
+// see jl_safepoint_wait_thread_resume
+void jl_safepoint_resume_thread_mach(jl_ptls_t ptls2, int16_t tid2)
 {
-    // Requires the safepoint lock to be held
+    // must be called with uv_mutex_lock(&safepoint_lock) and uv_mutex_lock(&ptls2->sleep_lock) held (in that order)
     for (size_t i = 0; i < suspended_threads.len; i++) {
         uintptr_t item = (uintptr_t)suspended_threads.items[i];
         int16_t tid = (int16_t)item;
         int8_t gc_state = (int8_t)(item >> 8);
-        jl_ptls_t ptls2 = jl_all_tls_states[tid];
+        if (tid != tid2)
+            continue;
         jl_atomic_store_release(&ptls2->gc_state, gc_state);
         thread_resume(pthread_mach_thread_np(ptls2->system_id));
+        suspended_threads.items[i] = suspended_threads.items[--suspended_threads.len];
+        break;
     }
-    suspended_threads.len = 0;
+    // thread hadn't actually reached a jl_mach_gc_wait call where we suspended it
 }
 
-// Suspend the thread and return `1` if the GC is running.
-// Otherwise return `0`
-static int jl_mach_gc_wait(jl_ptls_t ptls2,
-                           mach_port_t thread, int16_t tid)
+void jl_mach_gc_end(void)
 {
+    // must be called with uv_mutex_lock(&safepoint_lock) held
+    size_t j = 0;
+    for (size_t i = 0; i < suspended_threads.len; i++) {
+        uintptr_t item = (uintptr_t)suspended_threads.items[i];
+        int16_t tid = (int16_t)item;
+        int8_t gc_state = (int8_t)(item >> 8);
+        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+        uv_mutex_lock(&ptls2->sleep_lock);
+        if (jl_atomic_load_relaxed(&ptls2->suspend_count) == 0) {
+            jl_atomic_store_release(&ptls2->gc_state, gc_state);
+            thread_resume(pthread_mach_thread_np(ptls2->system_id));
+        }
+        else {
+            // this is the check for jl_safepoint_wait_thread_resume
+            suspended_threads.items[j++] = (void*)item;
+        }
+        uv_mutex_unlock(&ptls2->sleep_lock);
+    }
+    suspended_threads.len = j;
+}
+
+// implement jl_set_gc_and_wait from a different thread
+static void jl_mach_gc_wait(jl_ptls_t ptls2, mach_port_t thread, int16_t tid)
+{
+    // relaxed, since we don't mind missing one--we will hit another soon (immediately probably)
     uv_mutex_lock(&safepoint_lock);
-    if (!jl_atomic_load_relaxed(&jl_gc_running)) {
-        // relaxed, since gets set to zero only while the safepoint_lock was held
-        // this means we can tell if GC is done before we got the message or
-        // the safepoint was enabled for SIGINT.
-        uv_mutex_unlock(&safepoint_lock);
-        return 0;
+    // Since this gets set to zero only while the safepoint_lock was held this
+    // means we can tell for sure if GC is done before we got the message or
+    // the safepoint was enabled for SIGINT instead.
+    int doing_gc = jl_atomic_load_relaxed(&jl_gc_running);
+    int do_suspend = doing_gc;
+    int relaxed_suspend_count = !doing_gc && jl_atomic_load_relaxed(&ptls2->suspend_count) != 0;
+    if (relaxed_suspend_count) {
+        uv_mutex_lock(&ptls2->sleep_lock);
+        do_suspend = jl_atomic_load_relaxed(&ptls2->suspend_count) != 0;
+        // only do_suspend while holding the sleep_lock, otherwise we might miss a resume
     }
-    // Otherwise, set the gc state of the thread, suspend and record it
-    // TODO: TSAN will complain that it never saw the faulting task do an
-    // atomic release (it was in the kernel). And our attempt here does
-    // nothing, since we are a different thread, and it is not transitive).
-    //
-    // This also means we are not making this thread available for GC work.
-    // Eventually, we should probably release this signal to the original
-    // thread, (return KERN_FAILURE instead of KERN_SUCCESS) so that it
-    // triggers a SIGSEGV and gets handled by the usual codepath for unix.
-    int8_t gc_state = ptls2->gc_state;
-    jl_atomic_store_release(&ptls2->gc_state, JL_GC_STATE_WAITING);
-    uintptr_t item = tid | (((uintptr_t)gc_state) << 16);
-    arraylist_push(&suspended_threads, (void*)item);
-    thread_suspend(thread);
+    if (do_suspend) {
+        // Set the gc state of the thread, suspend and record it
+        //
+        // TODO: TSAN will complain that it never saw the faulting task do an
+        // atomic release (it was in the kernel). And our attempt here does
+        // nothing, since we are a different thread, and it is not transitive).
+        //
+        // This also means we are not making this thread available for GC work.
+        // Eventually, we should probably release this signal to the original
+        // thread, (return KERN_FAILURE instead of KERN_SUCCESS) so that it
+        // triggers a SIGSEGV and gets handled by the usual codepath for unix.
+        int8_t gc_state = ptls2->gc_state;
+        jl_atomic_store_release(&ptls2->gc_state, JL_GC_STATE_WAITING);
+        uintptr_t item = tid | (((uintptr_t)gc_state) << 16);
+        arraylist_push(&suspended_threads, (void*)item);
+        thread_suspend(thread);
+    }
+    if (relaxed_suspend_count)
+        uv_mutex_unlock(&ptls2->sleep_lock);
+    uv_cond_broadcast(&safepoint_cond_begin);
     uv_mutex_unlock(&safepoint_lock);
-    return 1;
 }
 
 static mach_port_t segv_port = 0;
@@ -93,20 +132,18 @@ static mach_port_t segv_port = 0;
 #define STR(x) #x
 #define XSTR(x) STR(x)
 #define HANDLE_MACH_ERROR(msg, retval) \
-    if (retval != KERN_SUCCESS) { mach_error(msg XSTR(: __FILE__:__LINE__:), (retval)); jl_exit(1); }
+    if (retval != KERN_SUCCESS) { mach_error(msg XSTR(: __FILE__:__LINE__:), (retval)); abort(); }
 
 void *mach_segv_listener(void *arg)
 {
     (void)arg;
-    while (1) {
-        int ret = mach_msg_server(mach_exc_server, 2048, segv_port, MACH_MSG_TIMEOUT_NONE);
-        jl_safe_printf("mach_msg_server: %s\n", mach_error_string(ret));
-        jl_exit(128 + SIGSEGV);
-    }
+    int ret = mach_msg_server(mach_exc_server, 2048, segv_port, MACH_MSG_TIMEOUT_NONE);
+    mach_error("mach_msg_server" XSTR(: __FILE__:__LINE__:), ret);
+    abort();
 }
 
 
-static void allocate_mach_handler()
+static void allocate_mach_handler(void)
 {
     // ensure KEYMGR_GCC3_DW2_OBJ_LIST is initialized, as this requires malloc
     // and thus can deadlock when used without first initializing it.
@@ -119,7 +156,8 @@ static void allocate_mach_handler()
     if (_keymgr_set_lockmode_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, NM_ALLOW_RECURSION))
         jl_error("_keymgr_set_lockmode_processwide_ptr failed");
 
-    arraylist_new(&suspended_threads, jl_n_threads);
+    int16_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    arraylist_new(&suspended_threads, nthreads); // we will resize later (inside safepoint_lock), if needed
     pthread_t thread;
     pthread_attr_t attr;
     kern_return_t ret;
@@ -177,13 +215,14 @@ static void jl_call_in_state(jl_ptls_t ptls2, host_thread_state_t *state,
 #else
 #error "julia: throw-in-context not supported on this platform"
 #endif
-    if (ptls2 == NULL || ptls2->signal_stack == NULL || is_addr_on_sigstack(ptls2, (void*)rsp)) {
+    if (ptls2 == NULL || is_addr_on_sigstack(ptls2, (void*)rsp)) {
         rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
     }
     else {
-        rsp = (uintptr_t)ptls2->signal_stack + sig_stack_size;
+        rsp = (uintptr_t)ptls2->signal_stack + (ptls2->signal_stack_size ? ptls2->signal_stack_size : sig_stack_size);
     }
     assert(rsp % 16 == 0);
+    rsp -= 16;
 
 #ifdef _CPU_X86_64_
     rsp -= sizeof(void*);
@@ -213,14 +252,13 @@ int is_write_fault(host_exception_state_t exc_state) {
 }
 #endif
 
-static void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exception)
+static void jl_throw_in_thread(jl_ptls_t ptls2, mach_port_t thread, jl_value_t *exception)
 {
     unsigned int count = MACH_THREAD_STATE_COUNT;
     host_thread_state_t state;
     kern_return_t ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, &count);
     HANDLE_MACH_ERROR("thread_get_state", ret);
-    jl_ptls_t ptls2 = jl_all_tls_states[tid];
-    if (!jl_get_safe_restore()) {
+    if (1) { // XXX: !jl_has_safe_restore(ptls2)
         assert(exception);
         ptls2->bt_size =
             rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, (bt_context_t *)&state,
@@ -245,7 +283,19 @@ static void segv_handler(int sig, siginfo_t *info, void *context)
     }
 }
 
-//mach_exc_server expects us to define this symbol locally
+// n.b. mach_exc_server expects us to define this symbol locally
+/* The documentation for catch_exception_raise says: A return value of
+ * KERN_SUCCESS indicates that the thread is to continue from the point of
+ * exception. A return value of MIG_NO_REPLY indicates that the exception was
+ * handled directly and the thread was restarted or terminated by the exception
+ * handler. A return value of MIG_DESTROY_REQUEST causes the kernel to try
+ * another exception handler (or terminate the thread). Any other value will
+ * cause mach_msg_server to remove the task and thread port references.
+ *
+ * However MIG_DESTROY_REQUEST does not exist, not does it appear the source
+ * code for mach_msg_server ever destroy those references (only the message
+ * itself).
+ */
 kern_return_t catch_mach_exception_raise(
     mach_port_t exception_port,
     mach_port_t thread,
@@ -263,35 +313,45 @@ kern_return_t catch_mach_exception_raise(
 #endif
     int16_t tid;
     jl_ptls_t ptls2 = NULL;
-    for (tid = 0; tid < jl_n_threads; tid++) {
-        jl_ptls_t _ptls2 = jl_all_tls_states[tid];
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    for (tid = 0; tid < nthreads; tid++) {
+        jl_ptls_t _ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+        if (jl_atomic_load_relaxed(&_ptls2->current_task) == NULL) {
+            // this thread is dead
+            continue;
+        }
         if (pthread_mach_thread_np(_ptls2->system_id) == thread) {
             ptls2 = _ptls2;
             break;
         }
     }
-    if (!ptls2 || ptls2->current_task == NULL) {
+    if (!ptls2) {
         // We don't know about this thread, let the kernel try another handler
         // instead. This shouldn't actually happen since we only register the
         // handler for the threads we know about.
         jl_safe_printf("ERROR: Exception handler triggered on unmanaged thread.\n");
         return KERN_INVALID_ARGUMENT;
     }
+    // XXX: jl_throw_in_thread or segv_handler will eventually check this, but
+    //      we would like to avoid some of this work if we could detect this earlier
+    // if (jl_has_safe_restore(ptls2)) {
+    //     jl_throw_in_thread(ptls2, thread, jl_stackovf_exception);
+    //     return KERN_SUCCESS;
+    // }
+    if (ptls2->gc_state == JL_GC_STATE_WAITING)
+        return KERN_FAILURE;
     if (exception == EXC_ARITHMETIC) {
-        jl_throw_in_thread(tid, thread, jl_diverror_exception);
+        jl_throw_in_thread(ptls2, thread, jl_diverror_exception);
         return KERN_SUCCESS;
     }
-    assert(exception == EXC_BAD_ACCESS);
+    assert(exception == EXC_BAD_ACCESS); // SIGSEGV or SIGBUS
+    if (codeCnt < 2 || code[0] != KERN_PROTECTION_FAILURE) // SEGV_ACCERR or BUS_ADRERR or BUS_ADRALN
+        return KERN_FAILURE;
+    uint64_t fault_addr = code[1];
     kern_return_t ret = thread_get_state(thread, HOST_EXCEPTION_STATE, (thread_state_t)&exc_state, &exc_count);
     HANDLE_MACH_ERROR("thread_get_state", ret);
-#ifdef _CPU_X86_64_
-    uint64_t fault_addr = exc_state.__faultvaddr;
-#else
-    uint64_t fault_addr = exc_state.__far;
-#endif
-    if (jl_addr_is_safepoint(fault_addr)) {
-        if (jl_mach_gc_wait(ptls2, thread, tid))
-            return KERN_SUCCESS;
+    if (jl_addr_is_safepoint(fault_addr) && !is_write_fault(exc_state)) {
+        jl_mach_gc_wait(ptls2, thread, tid);
         if (ptls2->tid != 0)
             return KERN_SUCCESS;
         if (ptls2->defer_signal) {
@@ -299,43 +359,22 @@ kern_return_t catch_mach_exception_raise(
         }
         else if (jl_safepoint_consume_sigint()) {
             jl_clear_force_sigint();
-            jl_throw_in_thread(tid, thread, jl_interrupt_exception);
-        }
-        return KERN_SUCCESS;
-    }
-    if (jl_get_safe_restore()) {
-        jl_throw_in_thread(tid, thread, jl_stackovf_exception);
-        return KERN_SUCCESS;
-    }
-#ifdef SEGV_EXCEPTION
-    if (1) {
-#else
-    if (msync((void*)(fault_addr & ~(jl_page_size - 1)), 1, MS_ASYNC) == 0) { // check if this was a valid address
-#endif
-        jl_value_t *excpt;
-        if (is_addr_on_stack(jl_atomic_load_relaxed(&ptls2->current_task), (void*)fault_addr)) {
-            excpt = jl_stackovf_exception;
-        }
-#ifdef SEGV_EXCEPTION
-        else if (msync((void*)(fault_addr & ~(jl_page_size - 1)), 1, MS_ASYNC) != 0) {
-            // no page mapped at this address
-            excpt = jl_segv_exception;
+            jl_throw_in_thread(ptls2, thread, jl_interrupt_exception);
         }
-#endif
-        else {
-            if (!is_write_fault(exc_state))
-                return KERN_INVALID_ARGUMENT;
-            excpt = jl_readonlymemory_exception;
-        }
-        jl_throw_in_thread(tid, thread, excpt);
-
         return KERN_SUCCESS;
     }
-    else {
-        thread0_exit_count++;
-        jl_exit_thread0(128 + SIGSEGV, NULL, 0);
-        return KERN_SUCCESS;
+    if (ptls2->current_task->eh == NULL)
+        return KERN_FAILURE;
+    jl_value_t *excpt;
+    if (is_addr_on_stack(jl_atomic_load_relaxed(&ptls2->current_task), (void*)fault_addr)) {
+        excpt = jl_stackovf_exception;
     }
+    else if (is_write_fault(exc_state)) // false for alignment errors
+        excpt = jl_readonlymemory_exception;
+    else
+        return KERN_FAILURE;
+    jl_throw_in_thread(ptls2, thread, excpt);
+    return KERN_SUCCESS;
 }
 
 //mach_exc_server expects us to define this symbol locally
@@ -381,9 +420,15 @@ static void attach_exception_port(thread_port_t thread, int segv_only)
     HANDLE_MACH_ERROR("thread_set_exception_ports", ret);
 }
 
-static void jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
+static int jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[tid];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    if (ptls2 == NULL) // this thread is not alive
+        return 0;
+    jl_task_t *ct2 = jl_atomic_load_relaxed(&ptls2->current_task);
+    if (ct2 == NULL) // this thread is already dead
+        return 0;
+
     mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
 
     kern_return_t ret = thread_suspend(thread);
@@ -395,18 +440,23 @@ static void jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
 
     // Get the state of the suspended thread
     ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)ctx, &count);
+    return 1;
 }
 
-static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx)
+int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
 {
-    static host_thread_state_t state;
-    jl_thread_suspend_and_get_state2(tid, &state);
-    *ctx = (unw_context_t*)&state;
+    (void)timeout;
+    host_thread_state_t state;
+    if (!jl_thread_suspend_and_get_state2(tid, &state)) {
+        return 0;
+    }
+    *ctx = *(unw_context_t*)&state;
+    return 1;
 }
 
-static void jl_thread_resume(int tid, int sig)
+void jl_thread_resume(int tid)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[tid];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
     kern_return_t ret = thread_resume(thread);
     HANDLE_MACH_ERROR("thread_resume", ret);
@@ -416,7 +466,7 @@ static void jl_thread_resume(int tid, int sig)
 // or if SIGINT happens too often.
 static void jl_try_deliver_sigint(void)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[0];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
 
     kern_return_t ret = thread_suspend(thread);
@@ -433,7 +483,7 @@ static void jl_try_deliver_sigint(void)
         if (force)
             jl_safe_printf("WARNING: Force throwing a SIGINT\n");
         jl_clear_force_sigint();
-        jl_throw_in_thread(0, thread, jl_interrupt_exception);
+        jl_throw_in_thread(ptls2, thread, jl_interrupt_exception);
     }
     else {
         jl_wake_libuv();
@@ -443,56 +493,41 @@ static void jl_try_deliver_sigint(void)
     HANDLE_MACH_ERROR("thread_resume", ret);
 }
 
-static void JL_NORETURN jl_exit_thread0_cb(int exitstate)
+static void JL_NORETURN jl_exit_thread0_cb(int signo)
 {
 CFI_NORETURN
-    jl_critical_error(exitstate - 128, NULL, jl_current_task);
-    jl_exit(exitstate);
+    jl_critical_error(signo, 0, NULL, jl_current_task);
+    jl_atexit_hook(128);
+    jl_raise(signo);
 }
 
-static void jl_exit_thread0(int exitstate, jl_bt_element_t *bt_data, size_t bt_size)
+static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[0];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
 
     host_thread_state_t state;
-    jl_thread_suspend_and_get_state2(0, &state);
-    unw_context_t *uc = (unw_context_t*)&state;
+    if (!jl_thread_suspend_and_get_state2(0, &state)) {
+        // thread 0 is gone? just do the signal ourself
+        jl_raise(signo);
+    }
 
     // This aborts `sleep` and other syscalls.
     kern_return_t ret = thread_abort(thread);
     HANDLE_MACH_ERROR("thread_abort", ret);
 
-    if (bt_data == NULL) {
-        // Must avoid extended backtrace frames here unless we're sure bt_data
-        // is properly rooted.
-        ptls2->bt_size = rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, uc, NULL);
-    }
-    else {
-        ptls2->bt_size = bt_size; // <= JL_MAX_BT_SIZE
-        memcpy(ptls2->bt_data, bt_data, ptls2->bt_size * sizeof(bt_data[0]));
-    }
-
-    void (*exit_func)(int) = &_exit;
-    if (thread0_exit_count <= 1) {
-        exit_func = &jl_exit_thread0_cb;
-    }
-    else if (thread0_exit_count == 2) {
-        exit_func = &exit;
-    }
-    else {
-        exit_func = &_exit;
-    }
+    ptls2->bt_size = bt_size; // <= JL_MAX_BT_SIZE
+    memcpy(ptls2->bt_data, bt_data, ptls2->bt_size * sizeof(bt_data[0]));
 
 #ifdef _CPU_X86_64_
     // First integer argument. Not portable but good enough =)
-    state.__rdi = exitstate;
+    state.__rdi = signo;
 #elif defined(_CPU_AARCH64_)
-    state.__x[0] = exitstate;
+    state.__x[0] = signo;
 #else
 #error Fill in first integer argument here
 #endif
-    jl_call_in_state(ptls2, &state, (void (*)(void))exit_func);
+    jl_call_in_state(ptls2, &state, (void (*)(void))&jl_exit_thread0_cb);
     unsigned int count = MACH_THREAD_STATE_COUNT;
     ret = thread_set_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, count);
     HANDLE_MACH_ERROR("thread_set_state", ret);
@@ -524,7 +559,7 @@ static kern_return_t profiler_segv_handler(
 
     // Not currently unwinding. Raise regular segfault
     if (forceDwarf == -2)
-        return KERN_INVALID_ARGUMENT;
+        return KERN_FAILURE;
 
     if (forceDwarf == 0)
         forceDwarf = 1;
@@ -572,7 +607,12 @@ static int jl_lock_profile_mach(int dlsymlock)
     // workaround for old keymgr bugs
     void *unused = NULL;
     int keymgr_locked = _keymgr_get_and_lock_processwide_ptr_2(KEYMGR_GCC3_DW2_OBJ_LIST, &unused) == 0;
-    // workaround for new dlsym4 bugs (API and bugs introduced in macOS 12.1)
+    // workaround for new dlsym4 bugs in the workaround for dlsym bugs: _dyld_atfork_prepare
+    // acquires its locks in the wrong order, but fortunately we happen to able to guard it
+    // with this call to force it to prevent that TSAN violation from causing a deadlock
+    if (dlsymlock && _dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
+        _dyld_dlopen_atfork_prepare();
+    // workaround for new dlsym4 bugs (API and bugs introduced circa macOS 12.1)
     if (dlsymlock && _dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
         _dyld_atfork_prepare();
     return keymgr_locked;
@@ -580,15 +620,24 @@ static int jl_lock_profile_mach(int dlsymlock)
 
 static void jl_unlock_profile_mach(int dlsymlock, int keymgr_locked)
 {
-    if (dlsymlock && _dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL) \
-        _dyld_atfork_parent(); \
+    if (dlsymlock && _dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
+        _dyld_atfork_parent();
+    if (dlsymlock && _dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
+        _dyld_dlopen_atfork_parent();
     if (keymgr_locked)
         _keymgr_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
     jl_unlock_profile();
 }
 
-#define jl_lock_profile()       int keymgr_locked = jl_lock_profile_mach(1)
-#define jl_unlock_profile()     jl_unlock_profile_mach(1, keymgr_locked)
+int jl_lock_stackwalk(void)
+{
+    return jl_lock_profile_mach(1);
+}
+
+void jl_unlock_stackwalk(int lockret)
+{
+    jl_unlock_profile_mach(1, lockret);
+}
 
 void *mach_profile_listener(void *arg)
 {
@@ -608,8 +657,9 @@ void *mach_profile_listener(void *arg)
         // (so that thread zero gets notified last)
         int keymgr_locked = jl_lock_profile_mach(0);
 
-        int *randperm = profile_get_randperm(jl_n_threads);
-        for (int idx = jl_n_threads; idx-- > 0; ) {
+        int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+        int *randperm = profile_get_randperm(nthreads);
+        for (int idx = nthreads; idx-- > 0; ) {
             // Stop the threads in the random or reverse round-robin order.
             int i = randperm[idx];
             // if there is no space left, break early
@@ -618,14 +668,19 @@ void *mach_profile_listener(void *arg)
                 break;
             }
 
+            if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
+                _dyld_dlopen_atfork_prepare();
             if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
                 _dyld_atfork_prepare(); // briefly acquire the dlsym lock
             host_thread_state_t state;
-            jl_thread_suspend_and_get_state2(i, &state);
+            int valid_thread = jl_thread_suspend_and_get_state2(i, &state);
             unw_context_t *uc = (unw_context_t*)&state;
             if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
                 _dyld_atfork_parent(); // quickly release the dlsym lock
-
+            if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL)
+                _dyld_dlopen_atfork_parent();
+            if (!valid_thread)
+                continue;
             if (running) {
 #ifdef LLVMLIBUNWIND
                 /*
@@ -660,12 +715,12 @@ void *mach_profile_listener(void *arg)
 #else
                 bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
 #endif
-                jl_ptls_t ptls = jl_all_tls_states[i];
+                jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[i];
 
                 // store threadid but add 1 as 0 is preserved to indicate end of block
                 bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
 
-                // store task id
+                // store task id (never null)
                 bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
 
                 // store cpu cycle clock
@@ -679,7 +734,7 @@ void *mach_profile_listener(void *arg)
                 bt_data_prof[bt_size_cur++].uintptr = 0;
             }
             // We're done! Resume the thread.
-            jl_thread_resume(i, 0);
+            jl_thread_resume(i);
         }
         jl_unlock_profile_mach(0, keymgr_locked);
         if (running) {
diff --git a/src/signals-unix.c b/src/signals-unix.c
index 34a77fc6fad6e..eb51a5fccfaba 100644
--- a/src/signals-unix.c
+++ b/src/signals-unix.c
@@ -27,9 +27,7 @@
 #ifdef __APPLE__ // Darwin's mach ports allow signal-free thread management
 #define HAVE_MACH
 #define HAVE_KEVENT
-#elif defined(__FreeBSD__) // generic bsd
-#define HAVE_ITIMER
-#else // generic linux
+#else // generic Linux or BSD
 #define HAVE_TIMER
 #endif
 
@@ -37,10 +35,8 @@
 #include <sys/event.h>
 #endif
 
-// 8M signal stack, same as default stack size and enough
-// for reasonable finalizers.
-// Should also be enough for parallel GC when we have it =)
-#define sig_stack_size (8 * 1024 * 1024)
+// 8M signal stack, same as default stack size (though we barely use this)
+static const size_t sig_stack_size = 8 * 1024 * 1024;
 
 #include "julia_assert.h"
 
@@ -62,7 +58,7 @@ bt_context_t *jl_to_bt_context(void *sigctx)
 }
 
 static int thread0_exit_count = 0;
-static void jl_exit_thread0(int exitstate, jl_bt_element_t *bt_data, size_t bt_size);
+static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size);
 
 static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *_ctx)
 {
@@ -96,15 +92,16 @@ static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *
 static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr)
 {
     // One guard page for signal_stack.
-    return !((char*)ptr < (char*)ptls->signal_stack - jl_page_size ||
-             (char*)ptr > (char*)ptls->signal_stack + sig_stack_size);
+    return ptls->signal_stack == NULL ||
+           ((char*)ptr >= (char*)ptls->signal_stack - jl_page_size &&
+            (char*)ptr <= (char*)ptls->signal_stack + (ptls->signal_stack_size ? ptls->signal_stack_size : sig_stack_size));
 }
 
 // Modify signal context `_ctx` so that `fptr` will execute when the signal
 // returns. `fptr` will execute on the signal stack, and must not return.
 // jl_call_in_ctx is also currently executing on that signal stack,
 // so be careful not to smash it
-static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_ctx)
+JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_ctx)
 {
     // Modifying the ucontext should work but there is concern that
     // sigreturn oriented programming mitigation can work against us
@@ -113,22 +110,21 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c
     // checks that the syscall is made in the signal handler and that
     // the ucontext address is valid. Hopefully the value of the ucontext
     // will not be part of the validation...
-    if (!ptls || !ptls->signal_stack) {
+    if (!ptls) {
         sigset_t sset;
         sigemptyset(&sset);
         sigaddset(&sset, sig);
-        sigprocmask(SIG_UNBLOCK, &sset, NULL);
+        pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
         fptr();
         return;
     }
     uintptr_t rsp = jl_get_rsp_from_ctx(_ctx);
-    if (is_addr_on_sigstack(ptls, (void*)rsp)) {
+    if (is_addr_on_sigstack(ptls, (void*)rsp))
         rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
-    }
-    else {
-        rsp = (uintptr_t)ptls->signal_stack + sig_stack_size;
-    }
+    else
+        rsp = (uintptr_t)ptls->signal_stack + (ptls->signal_stack_size ? ptls->signal_stack_size : sig_stack_size);
     assert(rsp % 16 == 0);
+    rsp -= 16;
 #if defined(_OS_LINUX_) && defined(_CPU_X86_64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
@@ -195,7 +191,7 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c
     sigset_t sset;
     sigemptyset(&sset);
     sigaddset(&sset, sig);
-    sigprocmask(SIG_UNBLOCK, &sset, NULL);
+    pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
     fptr();
 #endif
 }
@@ -231,20 +227,41 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context)
     uv_tty_reset_mode();
     if (sig == SIGILL)
         jl_show_sigill(context);
-    jl_critical_error(sig, jl_to_bt_context(context), jl_get_current_task());
-    if (sig != SIGSEGV &&
-        sig != SIGBUS &&
-        sig != SIGILL) {
+    jl_task_t *ct = jl_get_current_task();
+    jl_critical_error(sig, info->si_code, jl_to_bt_context(context), ct);
+    if (ct)
+        jl_atomic_store_relaxed(&ct->ptls->safepoint, (size_t*)NULL + 1);
+    if (info->si_code == 0 ||
+        info->si_code == SI_USER ||
+#ifdef SI_KERNEL
+        info->si_code == SI_KERNEL ||
+#endif
+        info->si_code == SI_QUEUE ||
+        info->si_code == SI_MESGQ ||
+        info->si_code == SI_ASYNCIO ||
+#ifdef SI_SIGIO
+        info->si_code == SI_SIGIO ||
+#endif
+#ifdef SI_TKILL
+        info->si_code == SI_TKILL ||
+#endif
+        info->si_code == SI_TIMER)
         raise(sig);
-    }
-    // fall-through return to re-execute faulting statement (but without the error handler)
+    else if (sig != SIGSEGV &&
+             sig != SIGBUS &&
+             sig != SIGILL &&
+             sig != SIGFPE &&
+             sig != SIGTRAP)
+        raise(sig);
+    // fall-through return to re-execute faulting statement (but without the
+    // error handler and the pgcstack having been destroyed)
 }
 
 #if defined(_CPU_X86_64_) || defined(_CPU_X86_)
 enum x86_trap_flags {
     USER_MODE = 0x4,
     WRITE_FAULT = 0x2,
-    PAGE_PRESENT = 0x1
+    PAGE_PRESENT = 0x1 // whether this page is currently mapped into memory
 };
 
 int exc_reg_is_write_fault(uintptr_t err) {
@@ -254,11 +271,21 @@ int exc_reg_is_write_fault(uintptr_t err) {
 enum aarch64_esr_layout {
     EC_MASK = ((uint32_t)0b111111) << 26,
     EC_DATA_ABORT = ((uint32_t)0b100100) << 26,
+    DFSC_MASK = ((uint32_t)0b111111) << 0,
     ISR_DA_WnR = ((uint32_t)1) << 6
 };
 
 int exc_reg_is_write_fault(uintptr_t esr) {
-    return (esr & EC_MASK) == EC_DATA_ABORT && (esr & ISR_DA_WnR);
+    // n.b. we check that DFSC is either a permission fault (page in memory but not writable) or a translation fault (page not in memory)
+    // but because of info->si_code == SEGV_ACCERR, we know the kernel could have brought the page into memory.
+    // Access faults happen when trying to write to code or secure memory, which is a more severe violation, so we ignore those.
+    // AArch64 appears to leaves it up to a given implementer whether atomic update errors are reported as read or write faults.
+    return (esr & EC_MASK) == EC_DATA_ABORT &&
+           (((esr & DFSC_MASK) >= 0b000100 &&   // Translation flag fault, level 0.
+             (esr & DFSC_MASK) <= 0b000111) ||  // Translation fault, level 3.
+            ((esr & DFSC_MASK) >= 0b001100 &&   // Permission flag fault, level 0.
+             (esr & DFSC_MASK) <= 0b001111)) && // Permission fault, level 3.
+           (esr & ISR_DA_WnR); // Attempted write
 }
 #endif
 
@@ -266,6 +293,18 @@ int exc_reg_is_write_fault(uintptr_t esr) {
 #include "signals-mach.c"
 #else
 
+int jl_lock_stackwalk(void)
+{
+    jl_lock_profile();
+    return 0;
+}
+
+void jl_unlock_stackwalk(int lockret)
+{
+    (void)lockret;
+    jl_unlock_profile();
+}
+
 
 #if defined(_OS_LINUX_) && (defined(_CPU_X86_64_) || defined(_CPU_X86_))
 int is_write_fault(void *context) {
@@ -274,8 +313,8 @@ int is_write_fault(void *context) {
 }
 #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
 struct linux_aarch64_ctx_header {
-	uint32_t magic;
-	uint32_t size;
+    uint32_t magic;
+    uint32_t size;
 };
 const uint32_t linux_esr_magic = 0x45535201;
 
@@ -306,27 +345,34 @@ int is_write_fault(void *context) {
 
 static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context)
 {
-    return (is_addr_on_sigstack(ptls, ptr) &&
+    return (ptls->signal_stack != NULL &&
+            is_addr_on_sigstack(ptls, ptr) &&
             is_addr_on_sigstack(ptls, (void*)jl_get_rsp_from_ctx(context)));
 }
 
-static void segv_handler(int sig, siginfo_t *info, void *context)
+JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
 {
+    assert(sig == SIGSEGV || sig == SIGBUS);
     if (jl_get_safe_restore()) { // restarting jl_ or profile
         jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
         return;
     }
     jl_task_t *ct = jl_get_current_task();
-    if (ct == NULL) {
+    if (ct == NULL || ct->ptls == NULL || jl_atomic_load_relaxed(&ct->ptls->gc_state) == JL_GC_STATE_WAITING) {
         sigdie_handler(sig, info, context);
         return;
     }
-    assert(sig == SIGSEGV || sig == SIGBUS);
-    if (jl_addr_is_safepoint((uintptr_t)info->si_addr)) {
+    if (sig == SIGSEGV && info->si_code == SEGV_ACCERR && jl_addr_is_safepoint((uintptr_t)info->si_addr) && !is_write_fault(context)) {
         jl_set_gc_and_wait();
         // Do not raise sigint on worker thread
         if (jl_atomic_load_relaxed(&ct->tid) != 0)
             return;
+        // n.b. if the user might have seen that we were in a state where it
+        // was safe to run GC concurrently, we might briefly enter a state
+        // where our execution is not consistent with the gc_state of this
+        // thread. That will quickly be rectified when we rerun the faulting
+        // instruction and end up right back here, or we start to run the
+        // exception handler and immediately hit the safepoint there.
         if (ct->ptls->defer_signal) {
             jl_safepoint_defer_sigint();
         }
@@ -336,7 +382,9 @@ static void segv_handler(int sig, siginfo_t *info, void *context)
         }
         return;
     }
-    if (is_addr_on_stack(ct, info->si_addr)) { // stack overflow
+    if (ct->eh == NULL)
+        sigdie_handler(sig, info, context);
+    if ((sig != SIGBUS || info->si_code == BUS_ADRERR) && is_addr_on_stack(ct, info->si_addr)) { // stack overflow and not a BUS_ADRALN (alignment error)
         jl_throw_in_ctx(ct, jl_stackovf_exception, sig, context);
     }
     else if (jl_is_on_sigstack(ct->ptls, info->si_addr, context)) {
@@ -346,33 +394,35 @@ static void segv_handler(int sig, siginfo_t *info, void *context)
         // (we are already corrupting that stack running this function)
         // so just call `_exit` to terminate immediately.
         jl_safe_printf("ERROR: Signal stack overflow, exit\n");
-        _exit(sig + 128);
+        jl_raise(sig);
     }
     else if (sig == SIGSEGV && info->si_code == SEGV_ACCERR && is_write_fault(context)) {  // writing to read-only memory (e.g., mmap)
         jl_throw_in_ctx(ct, jl_readonlymemory_exception, sig, context);
     }
     else {
-#ifdef SEGV_EXCEPTION
-        jl_throw_in_ctx(ct, jl_segv_exception, sig, context);
-#else
         sigdie_handler(sig, info, context);
-#endif
     }
 }
 
 #if !defined(JL_DISABLE_LIBUNWIND)
-static unw_context_t *signal_context;
+static bt_context_t *signal_context;
 pthread_mutex_t in_signal_lock;
 static pthread_cond_t exit_signal_cond;
 static pthread_cond_t signal_caught_cond;
 
-static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx)
+int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
 {
     struct timespec ts;
     clock_gettime(CLOCK_REALTIME, &ts);
-    ts.tv_sec += 1;
+    ts.tv_sec += timeout;
     pthread_mutex_lock(&in_signal_lock);
-    jl_ptls_t ptls2 = jl_all_tls_states[tid];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
+    if (ct2 == NULL) {
+        // this thread is not alive or already dead
+        pthread_mutex_unlock(&in_signal_lock);
+        return 0;
+    }
     jl_atomic_store_release(&ptls2->signal_request, 1);
     pthread_kill(ptls2->system_id, SIGUSR2);
     // wait for thread to acknowledge
@@ -380,9 +430,8 @@ static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx)
     if (err == ETIMEDOUT) {
         sig_atomic_t request = 1;
         if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
-            *ctx = NULL;
             pthread_mutex_unlock(&in_signal_lock);
-            return;
+            return 0;
         }
         // Request is either now 0 (meaning the other thread is waiting for
         //   exit_signal_cond already),
@@ -399,15 +448,16 @@ static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx)
     // checking it is 0, and add an acquire barrier for good measure)
     int request = jl_atomic_load_acquire(&ptls2->signal_request);
     assert(request == 0); (void) request;
-    *ctx = signal_context;
+    jl_atomic_store_release(&ptls2->signal_request, 1); // prepare to resume normally
+    *ctx = *signal_context;
+    return 1;
 }
 
-static void jl_thread_resume(int tid, int sig)
+void jl_thread_resume(int tid)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[tid];
-    jl_atomic_store_release(&ptls2->signal_request, sig == -1 ? 3 : 1);
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     pthread_cond_broadcast(&exit_signal_cond);
-    pthread_cond_wait(&signal_caught_cond, &in_signal_lock); // wait for thread to acknowledge
+    pthread_cond_wait(&signal_caught_cond, &in_signal_lock); // wait for thread to acknowledge (so that signal_request doesn't get mixed up)
     // The other thread is waiting to leave exit_signal_cond (verify that here by
     // checking it is 0, and add an acquire barrier for good measure)
     int request = jl_atomic_load_acquire(&ptls2->signal_request);
@@ -420,7 +470,7 @@ static void jl_thread_resume(int tid, int sig)
 // or if SIGINT happens too often.
 static void jl_try_deliver_sigint(void)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[0];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     jl_safepoint_enable_sigint();
     jl_wake_libuv();
     jl_atomic_store_release(&ptls2->signal_request, 2);
@@ -430,43 +480,31 @@ static void jl_try_deliver_sigint(void)
 
 // Write only by signal handling thread, read only by main thread
 // no sync necessary.
-static int thread0_exit_state = 0;
+static int thread0_exit_signo = 0;
 static void JL_NORETURN jl_exit_thread0_cb(void)
 {
 CFI_NORETURN
-    // This can get stuck if it happens at an unfortunate spot
-    // (unavoidable due to its async nature).
-    // Try harder to exit each time if we get multiple exit requests.
-    if (thread0_exit_count <= 1) {
-        jl_critical_error(thread0_exit_state - 128, NULL, jl_current_task);
-        jl_exit(thread0_exit_state);
-    }
-    else if (thread0_exit_count == 2) {
-        exit(thread0_exit_state);
-    }
-    else {
-        _exit(thread0_exit_state);
-    }
+    jl_critical_error(thread0_exit_signo, 0, NULL, jl_current_task);
+    jl_atexit_hook(128);
+    jl_raise(thread0_exit_signo);
 }
 
-static void jl_exit_thread0(int state, jl_bt_element_t *bt_data, size_t bt_size)
+static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[0];
-    if (thread0_exit_count <= 1) {
-        unw_context_t *signal_context;
-        jl_thread_suspend_and_get_state(0, &signal_context);
-        if (signal_context != NULL) {
-            thread0_exit_state = state;
-            ptls2->bt_size = bt_size; // <= JL_MAX_BT_SIZE
-            memcpy(ptls2->bt_data, bt_data, ptls2->bt_size * sizeof(bt_data[0]));
-            jl_thread_resume(0, -1);
-            return;
-        }
-    }
-    thread0_exit_state = state;
-    jl_atomic_store_release(&ptls2->signal_request, 3);
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
+    bt_context_t signal_context;
     // This also makes sure `sleep` is aborted.
-    pthread_kill(ptls2->system_id, SIGUSR2);
+    if (jl_thread_suspend_and_get_state(0, 30, &signal_context)) {
+        thread0_exit_signo = signo;
+        ptls2->bt_size = bt_size; // <= JL_MAX_BT_SIZE
+        memcpy(ptls2->bt_data, bt_data, ptls2->bt_size * sizeof(bt_data[0]));
+        jl_atomic_store_release(&ptls2->signal_request, 3);
+        jl_thread_resume(0); // resume with message 3 (call jl_exit_thread0_cb)
+    }
+    else {
+        // thread 0 is gone? just do the exit ourself
+        jl_raise(signo);
+    }
 }
 
 // request:
@@ -475,7 +513,7 @@ static void jl_exit_thread0(int state, jl_bt_element_t *bt_data, size_t bt_size)
 //  1: get state
 //  2: throw sigint if `!defer_signal && io_wait` or if force throw threshold
 //     is reached
-//  3: exit with `thread0_exit_state`
+//  3: raise `thread0_exit_signo` and try to exit
 void usr2_handler(int sig, siginfo_t *info, void *ctx)
 {
     jl_task_t *ct = jl_get_current_task();
@@ -577,37 +615,6 @@ JL_DLLEXPORT void jl_profile_stop_timer(void)
     }
 }
 
-#elif defined(HAVE_ITIMER)
-// BSD-style timers
-#include <string.h>
-#include <sys/time.h>
-struct itimerval timerprof;
-
-JL_DLLEXPORT int jl_profile_start_timer(void)
-{
-    timerprof.it_interval.tv_sec = 0;
-    timerprof.it_interval.tv_usec = 0;
-    timerprof.it_value.tv_sec = nsecprof / GIGA;
-    timerprof.it_value.tv_usec = ((nsecprof % GIGA) + 999) / 1000;
-    // Because SIGUSR1 is multipurpose, set `running` before so that we know that the first SIGUSR1 came from the timer
-    running = 1;
-    if (setitimer(ITIMER_PROF, &timerprof, NULL) == -1) {
-        running = 0;
-        return -3;
-    }
-    return 0;
-}
-
-JL_DLLEXPORT void jl_profile_stop_timer(void)
-{
-    if (running) {
-        memset(&timerprof, 0, sizeof(timerprof));
-        setitimer(ITIMER_PROF, &timerprof, NULL);
-        last_timer_delete_time = jl_hrtime();
-        running = 0;
-    }
-}
-
 #else
 
 #error no profile tools available
@@ -631,49 +638,56 @@ static void allocate_segv_handler(void)
     }
 }
 
-static void *alloc_sigstack(size_t *ssize)
-{
-    void *stk = jl_malloc_stack(ssize, NULL);
-    if (stk == MAP_FAILED)
-        jl_errorf("fatal error allocating signal stack: mmap: %s", strerror(errno));
-    return stk;
-}
-
 void jl_install_thread_signal_handler(jl_ptls_t ptls)
 {
-    size_t ssize = sig_stack_size;
-    void *signal_stack = alloc_sigstack(&ssize);
-    ptls->signal_stack = signal_stack;
-    stack_t ss;
-    ss.ss_flags = 0;
-    ss.ss_size = ssize - 16;
-    ss.ss_sp = signal_stack;
-    if (sigaltstack(&ss, NULL) < 0) {
-        jl_errorf("fatal error: sigaltstack: %s", strerror(errno));
-    }
-
 #ifdef HAVE_MACH
     attach_exception_port(pthread_mach_thread_np(ptls->system_id), 0);
 #endif
+    stack_t ss;
+    if (sigaltstack(NULL, &ss) < 0)
+        jl_errorf("fatal error: sigaltstack: %s", strerror(errno));
+    if ((ss.ss_flags & SS_DISABLE) != SS_DISABLE)
+        return; // someone else appears to have already set this up, so just use that
+    size_t ssize = sig_stack_size;
+    void *signal_stack = jl_malloc_stack(&ssize, NULL);
+    ss.ss_flags = 0;
+    ss.ss_size = ssize;
+    assert(ssize != 0);
+    if (signal_stack == NULL) {
+        signal_stack = malloc(ssize);
+        ssize = 0;
+        if (signal_stack == NULL)
+            jl_safe_printf("\nwarning: julia signal alt stack could not be allocated (StackOverflowError will be fatal on this thread).\n");
+        else
+            jl_safe_printf("\nwarning: julia signal stack allocated without guard page (launch foreign threads earlier to avoid this warning).\n");
+    }
+    if (signal_stack != NULL) {
+        ss.ss_sp = signal_stack;
+        if (sigaltstack(&ss, NULL) < 0)
+            jl_errorf("fatal error: sigaltstack: %s", strerror(errno));
+        ptls->signal_stack = signal_stack;
+        ptls->signal_stack_size = ssize;
+    }
 }
 
-static void jl_sigsetset(sigset_t *sset)
-{
-    sigemptyset(sset);
-    sigaddset(sset, SIGINT);
-    sigaddset(sset, SIGTERM);
-    sigaddset(sset, SIGABRT);
-    sigaddset(sset, SIGQUIT);
+const static int sigwait_sigs[] = {
+    SIGINT, SIGTERM, SIGQUIT,
 #ifdef SIGINFO
-    sigaddset(sset, SIGINFO);
+    SIGINFO,
 #else
-    sigaddset(sset, SIGUSR1);
+    SIGUSR1,
 #endif
 #if defined(HAVE_TIMER)
-    sigaddset(sset, SIGUSR1);
-#elif defined(HAVE_ITIMER)
-    sigaddset(sset, SIGPROF);
+    SIGUSR1,
 #endif
+    0
+};
+
+static void jl_sigsetset(sigset_t *sset)
+{
+    sigemptyset(sset);
+    for (const int *sig = sigwait_sigs; *sig; sig++)
+        sigaddset(sset, *sig);
 }
 
 #ifdef HAVE_KEVENT
@@ -688,6 +702,7 @@ static void kqueue_signal(int *sigqueue, struct kevent *ev, int sig)
         *sigqueue = -1;
     }
     else {
+        // kqueue gets signals before SIG_IGN, but does not remove them from pending (unlike sigwait)
         signal(sig, SIG_IGN);
     }
 }
@@ -698,6 +713,14 @@ void trigger_profile_peek(void)
     jl_safe_printf("\n======================================================================================\n");
     jl_safe_printf("Information request received. A stacktrace will print followed by a %.1f second profile\n", profile_peek_duration);
     jl_safe_printf("======================================================================================\n");
+    if (bt_size_max == 0){
+        // If the buffer hasn't been initialized, initialize with default size
+        // Keep these values synchronized with Profile.default_init()
+        if (jl_profile_init(10000000, 1000000) == -1) {
+            jl_safe_printf("ERROR: could not initialize the profile buffer");
+            return;
+        }
+    }
     bt_size_cur = 0; // clear profile buffer
     if (jl_profile_start_timer() < 0)
         jl_safe_printf("ERROR: Could not start profile timer\n");
@@ -722,20 +745,13 @@ static void *signal_listener(void *arg)
         perror("signal kqueue");
     }
     else {
-        kqueue_signal(&sigqueue, &ev, SIGINT);
-        kqueue_signal(&sigqueue, &ev, SIGTERM);
-        kqueue_signal(&sigqueue, &ev, SIGABRT);
-        kqueue_signal(&sigqueue, &ev, SIGQUIT);
-#ifdef SIGINFO
-        kqueue_signal(&sigqueue, &ev, SIGINFO);
-#else
-        kqueue_signal(&sigqueue, &ev, SIGUSR1);
-#endif
-#if defined(HAVE_TIMER)
-        kqueue_signal(&sigqueue, &ev, SIGUSR1);
-#elif defined(HAVE_ITIMER)
-        kqueue_signal(&sigqueue, &ev, SIGPROF);
-#endif
+        for (const int *sig = sigwait_sigs; *sig; sig++)
+            kqueue_signal(&sigqueue, &ev, *sig);
+        if (sigqueue == -1) {
+            // re-enable sigwait for these
+            for (const int *sig = sigwait_sigs; *sig; sig++)
+                signal(*sig, SIG_DFL);
+        }
     }
 #endif
     while (1) {
@@ -752,6 +768,8 @@ static void *signal_listener(void *arg)
             if (nevents != 1) {
                 close(sigqueue);
                 sigqueue = -1;
+                for (const int *sig = sigwait_sigs; *sig; sig++)
+                    signal(*sig, SIG_DFL);
                 continue;
             }
             sig = ev.ident;
@@ -775,11 +793,9 @@ static void *signal_listener(void *arg)
         profile = (sig == SIGUSR1);
 #if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309L
         if (profile && !(info.si_code == SI_TIMER &&
-	            info.si_value.sival_ptr == &timerprof))
+                info.si_value.sival_ptr == &timerprof))
             profile = 0;
 #endif
-#elif defined(HAVE_ITIMER)
-        profile = (sig == SIGPROF);
 #endif
 #endif
 
@@ -822,31 +838,55 @@ static void *signal_listener(void *arg)
             doexit = 0;
         }
 #endif
+        if (doexit) {
+            // The exit can get stuck if it happens at an unfortunate spot in thread 0
+            // (unavoidable due to its async nature).
+            // Try much harder to exit next time, if we get multiple exit requests.
+            // 1. unblock the signal, so this thread can be killed by it
+            // 2. reset the tty next, because we might die before we get another chance to do that
+            // 3. attempt a graceful cleanup of julia, followed by an abrupt end to the C runtime (except for fflush)
+            // 4. kill this thread with `raise`, to preserve the signo / exit code / and coredump configuration
+            // Similar to jl_raise, but a slightly different order of operations
+            sigset_t sset;
+            sigemptyset(&sset);
+            sigaddset(&sset, sig);
+            pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
+#ifdef HAVE_KEVENT
+            signal(sig, SIG_DFL);
+#endif
+            uv_tty_reset_mode();
+            thread0_exit_count++;
+            fflush(NULL);
+            if (thread0_exit_count > 1) {
+                raise(sig); // very unlikely to return
+                _exit(128 + sig);
+            }
+        }
 
+        int nthreads = jl_atomic_load_acquire(&jl_n_threads);
         bt_size = 0;
 #if !defined(JL_DISABLE_LIBUNWIND)
-        unw_context_t *signal_context;
+        bt_context_t signal_context;
         // sample each thread, round-robin style in reverse order
         // (so that thread zero gets notified last)
         if (critical || profile) {
-            jl_lock_profile();
+            int lockret = jl_lock_stackwalk();
             int *randperm;
             if (profile)
-                 randperm = profile_get_randperm(jl_n_threads);
-            for (int idx = jl_n_threads; idx-- > 0; ) {
+                 randperm = profile_get_randperm(nthreads);
+            for (int idx = nthreads; idx-- > 0; ) {
                 // Stop the threads in the random or reverse round-robin order.
                 int i = profile ? randperm[idx] : idx;
                 // notify thread to stop
-                jl_thread_suspend_and_get_state(i, &signal_context);
-                if (signal_context == NULL)
+                if (!jl_thread_suspend_and_get_state(i, 1, &signal_context))
                     continue;
 
                 // do backtrace on thread contexts for critical signals
                 // this part must be signal-handler safe
                 if (critical) {
                     bt_size += rec_backtrace_ctx(bt_data + bt_size,
-                            JL_MAX_BT_SIZE / jl_n_threads - 1,
-                            signal_context, NULL);
+                            JL_MAX_BT_SIZE / nthreads - 1,
+                            &signal_context, NULL);
                     bt_data[bt_size++].uintptr = 0;
                 }
 
@@ -868,16 +908,16 @@ static void *signal_listener(void *arg)
                         } else {
                             // Get backtrace data
                             bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
-                                    bt_size_max - bt_size_cur - 1, signal_context, NULL);
+                                    bt_size_max - bt_size_cur - 1, &signal_context, NULL);
                         }
                         jl_set_safe_restore(old_buf);
 
-                        jl_ptls_t ptls2 = jl_all_tls_states[i];
+                        jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[i];
 
                         // store threadid but add 1 as 0 is preserved to indicate end of block
                         bt_data_prof[bt_size_cur++].uintptr = ptls2->tid + 1;
 
-                        // store task id
+                        // store task id (never null)
                         bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task);
 
                         // store cpu cycle clock
@@ -893,17 +933,15 @@ static void *signal_listener(void *arg)
                 }
 
                 // notify thread to resume
-                jl_thread_resume(i, sig);
+                jl_thread_resume(i);
             }
-            jl_unlock_profile();
+            jl_unlock_stackwalk(lockret);
         }
 #ifndef HAVE_MACH
         if (profile && running) {
             jl_check_profile_autostop();
 #if defined(HAVE_TIMER)
             timer_settime(timerprof, 0, &itsprof, NULL);
-#elif defined(HAVE_ITIMER)
-            setitimer(ITIMER_PROF, &timerprof, NULL);
 #endif
         }
 #endif
@@ -911,26 +949,29 @@ static void *signal_listener(void *arg)
 
         // this part is async with the running of the rest of the program
         // and must be thread-safe, but not necessarily signal-handler safe
-        if (critical) {
-            if (doexit) {
-                thread0_exit_count++;
-                jl_exit_thread0(128 + sig, bt_data, bt_size);
+        if (doexit) {
+//            // this is probably always SI_USER (0x10001 / 65537), so we suppress it
+//            int si_code = 0;
+//#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309L && !HAVE_KEVENT
+//            si_code = info.si_code;
+//#endif
+            jl_exit_thread0(sig, bt_data, bt_size);
+        }
+        else if (critical) {
+            // critical in this case actually means SIGINFO request
+#ifndef SIGINFO // SIGINFO already prints something similar automatically
+            int nrunning = 0;
+            for (int idx = nthreads; idx-- > 0; ) {
+                jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[idx];
+                nrunning += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
             }
-            else {
-#ifndef SIGINFO // SIGINFO already prints this automatically
-                int nrunning = 0;
-                for (int idx = jl_n_threads; idx-- > 0; ) {
-                    jl_ptls_t ptls2 = jl_all_tls_states[idx];
-                    nrunning += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
-                }
-                jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), nrunning, jl_n_threads);
+            jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), nrunning, nthreads);
 #endif
 
-                jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
-                size_t i;
-                for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
-                    jl_print_bt_entry_codeloc(bt_data + i);
-                }
+            jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
+            size_t i;
+            for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
+                jl_print_bt_entry_codeloc(bt_data + i);
             }
         }
     }
@@ -944,7 +985,7 @@ void restore_signals(void)
 
     sigset_t sset;
     jl_sigsetset(&sset);
-    sigprocmask(SIG_SETMASK, &sset, 0);
+    pthread_sigmask(SIG_SETMASK, &sset, 0);
 
 #if !defined(HAVE_MACH) && !defined(JL_DISABLE_LIBUNWIND)
     if (pthread_mutex_init(&in_signal_lock, NULL) != 0 ||
@@ -967,7 +1008,7 @@ static void fpe_handler(int sig, siginfo_t *info, void *context)
         return;
     }
     jl_task_t *ct = jl_get_current_task();
-    if (ct == NULL) // exception on foreign thread is fatal
+    if (ct == NULL || ct->eh == NULL) // exception on foreign thread is fatal
         sigdie_handler(sig, info, context);
     else
         jl_throw_in_ctx(ct, jl_diverror_exception, sig, context);
@@ -978,16 +1019,43 @@ static void sigint_handler(int sig)
     jl_sigint_passed = 1;
 }
 
+#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
+static void sigtrap_handler(int sig, siginfo_t *info, void *context)
+{
+    uintptr_t pc = ((ucontext_t*)context)->uc_mcontext->__ss.__pc; // TODO: Do this in linux as well
+    uint32_t* code = (uint32_t*)(pc);                              // https://gcc.gnu.org/legacy-ml/gcc-patches/2013-11/msg02228.html
+    if (*code == 0xd4200020) { // brk #0x1 which is what LLVM defines as trap
+        signal(sig, SIG_DFL);
+        sig = SIGILL; // redefine this as as an "unreachable reached" error message
+        sigdie_handler(sig, info, context);
+    }
+}
+#endif
+
 void jl_install_default_signal_handlers(void)
 {
     struct sigaction actf;
     memset(&actf, 0, sizeof(struct sigaction));
     sigemptyset(&actf.sa_mask);
     actf.sa_sigaction = fpe_handler;
-    actf.sa_flags = SA_ONSTACK | SA_SIGINFO;
+    actf.sa_flags = SA_SIGINFO;
     if (sigaction(SIGFPE, &actf, NULL) < 0) {
         jl_errorf("fatal error: sigaction: %s", strerror(errno));
     }
+#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
+    struct sigaction acttrap;
+    memset(&acttrap, 0, sizeof(struct sigaction));
+    sigemptyset(&acttrap.sa_mask);
+    acttrap.sa_sigaction = sigtrap_handler;
+    acttrap.sa_flags = SA_SIGINFO;
+    if (sigaction(SIGTRAP, &acttrap, NULL) < 0) {
+        jl_errorf("fatal error: sigaction: %s", strerror(errno));
+    }
+#else
+    if (signal(SIGTRAP, SIG_IGN) == SIG_ERR) {
+        jl_error("fatal error: Couldn't set SIGTRAP");
+    }
+#endif
     struct sigaction actint;
     memset(&actint, 0, sizeof(struct sigaction));
     sigemptyset(&actint.sa_mask);
@@ -999,9 +1067,6 @@ void jl_install_default_signal_handlers(void)
     if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
         jl_error("fatal error: Couldn't set SIGPIPE");
     }
-    if (signal(SIGTRAP, SIG_IGN) == SIG_ERR) {
-        jl_error("fatal error: Couldn't set SIGTRAP");
-    }
 
 #if defined(HAVE_MACH)
     allocate_mach_handler();
@@ -1034,11 +1099,6 @@ void jl_install_default_signal_handlers(void)
     }
     // need to ensure the following signals are not SIG_IGN, even though they will be blocked
     act_die.sa_flags = SA_SIGINFO | SA_RESTART | SA_RESETHAND;
-#if defined(HAVE_ITIMER)
-    if (sigaction(SIGPROF, &act_die, NULL) < 0) {
-        jl_errorf("fatal error: sigaction: %s", strerror(errno));
-    }
-#endif
 #ifdef SIGINFO
     if (sigaction(SIGINFO, &act_die, NULL) < 0) {
         jl_errorf("fatal error: sigaction: %s", strerror(errno));
diff --git a/src/signals-win.c b/src/signals-win.c
index 178a7463b8d50..f763b71e1cf32 100644
--- a/src/signals-win.c
+++ b/src/signals-win.c
@@ -4,7 +4,7 @@
 // Note that this file is `#include`d by "signal-handling.c"
 #include <mmsystem.h> // hidden by LEAN_AND_MEAN
 
-#define sig_stack_size 131072 // 128k reserved for SEGV handling
+static const size_t sig_stack_size = 131072; // 128k reserved for backtrace_fiber for stack overflow handling
 
 // Copied from MINGW_FLOAT_H which may not be found due to a collision with the builtin gcc float.h
 // eventually we can probably integrate this into OpenLibm.
@@ -85,14 +85,16 @@ void __cdecl crt_sig_handler(int sig, int num)
             jl_try_throw_sigint();
         }
         break;
-    default: // SIGSEGV, (SSIGTERM, IGILL)
-        if (jl_get_safe_restore())
-            jl_rethrow();
+    default: // SIGSEGV, SIGTERM, SIGILL, SIGABRT
+        if (sig == SIGSEGV && jl_get_safe_restore()) {
+            signal(sig, (void (__cdecl *)(int))crt_sig_handler);
+            jl_sig_throw();
+        }
         memset(&Context, 0, sizeof(Context));
         RtlCaptureContext(&Context);
         if (sig == SIGILL)
             jl_show_sigill(&Context);
-        jl_critical_error(sig, &Context, jl_get_current_task());
+        jl_critical_error(sig, 0, &Context, jl_get_current_task());
         raise(sig);
     }
 }
@@ -122,10 +124,8 @@ void restore_signals(void)
     SetConsoleCtrlHandler(NULL, 0);
 }
 
-void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread)
+void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *excpt, PCONTEXT ctxThread)
 {
-    jl_task_t *ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
 #if defined(_CPU_X86_64_)
     DWORD64 Rsp = (ctxThread->Rsp & (DWORD64)-16) - 8;
 #elif defined(_CPU_X86_)
@@ -133,8 +133,9 @@ void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread)
 #else
 #error WIN16 not supported :P
 #endif
-    if (!jl_get_safe_restore()) {
+    if (ct && !jl_get_safe_restore()) {
         assert(excpt != NULL);
+        jl_ptls_t ptls = ct->ptls;
         ptls->bt_size = 0;
         if (excpt != jl_stackovf_exception) {
             ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread,
@@ -165,7 +166,7 @@ HANDLE hMainThread = INVALID_HANDLE_VALUE;
 // Try to throw the exception in the master thread.
 static void jl_try_deliver_sigint(void)
 {
-    jl_ptls_t ptls2 = jl_all_tls_states[0];
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     jl_lock_profile();
     jl_safepoint_enable_sigint();
     jl_wake_libuv();
@@ -191,7 +192,8 @@ static void jl_try_deliver_sigint(void)
             jl_safe_printf("error: GetThreadContext failed\n");
             return;
         }
-        jl_throw_in_ctx(jl_interrupt_exception, &ctxThread);
+        jl_task_t *ct = jl_atomic_load_relaxed(&ptls2->current_task);
+        jl_throw_in_ctx(ct, jl_interrupt_exception, &ctxThread);
         ctxThread.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
         if (!SetThreadContext(hMainThread, &ctxThread)) {
             jl_safe_printf("error: SetThreadContext failed\n");
@@ -226,102 +228,113 @@ static BOOL WINAPI sigint_handler(DWORD wsig) //This needs winapi types to guara
 
 LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
 {
-    jl_task_t *ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    if (ExceptionInfo->ExceptionRecord->ExceptionFlags == 0) {
+    if (ExceptionInfo->ExceptionRecord->ExceptionFlags != 0)
+        return EXCEPTION_CONTINUE_SEARCH;
+    jl_task_t *ct = jl_get_current_task();
+    if (ct != NULL && ct->ptls != NULL && ct->ptls->gc_state != JL_GC_STATE_WAITING) {
+        jl_ptls_t ptls = ct->ptls;
         switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
-            case EXCEPTION_INT_DIVIDE_BY_ZERO:
+        case EXCEPTION_INT_DIVIDE_BY_ZERO:
+            if (ct->eh != NULL) {
                 fpreset();
-                jl_throw_in_ctx(jl_diverror_exception, ExceptionInfo->ContextRecord);
+                jl_throw_in_ctx(ct, jl_diverror_exception, ExceptionInfo->ContextRecord);
                 return EXCEPTION_CONTINUE_EXECUTION;
-            case EXCEPTION_STACK_OVERFLOW:
+            }
+            break;
+        case EXCEPTION_STACK_OVERFLOW:
+            if (ct->eh != NULL) {
                 ptls->needs_resetstkoflw = 1;
-                jl_throw_in_ctx(jl_stackovf_exception, ExceptionInfo->ContextRecord);
+                jl_throw_in_ctx(ct, jl_stackovf_exception, ExceptionInfo->ContextRecord);
                 return EXCEPTION_CONTINUE_EXECUTION;
-            case EXCEPTION_ACCESS_VIOLATION:
-                if (jl_addr_is_safepoint(ExceptionInfo->ExceptionRecord->ExceptionInformation[1])) {
-                    jl_set_gc_and_wait();
-                    // Do not raise sigint on worker thread
-                    if (ptls->tid != 0)
-                        return EXCEPTION_CONTINUE_EXECUTION;
-                    if (ptls->defer_signal) {
-                        jl_safepoint_defer_sigint();
-                    }
-                    else if (jl_safepoint_consume_sigint()) {
-                        jl_clear_force_sigint();
-                        jl_throw_in_ctx(jl_interrupt_exception, ExceptionInfo->ContextRecord);
-                    }
+            }
+            break;
+        case EXCEPTION_ACCESS_VIOLATION:
+            if (jl_addr_is_safepoint(ExceptionInfo->ExceptionRecord->ExceptionInformation[1])) {
+                jl_set_gc_and_wait();
+                // Do not raise sigint on worker thread
+                if (ptls->tid != 0)
                     return EXCEPTION_CONTINUE_EXECUTION;
+                if (ptls->defer_signal) {
+                    jl_safepoint_defer_sigint();
                 }
-                if (jl_get_safe_restore()) {
-                    jl_throw_in_ctx(NULL, ExceptionInfo->ContextRecord);
-                    return EXCEPTION_CONTINUE_EXECUTION;
+                else if (jl_safepoint_consume_sigint()) {
+                    jl_clear_force_sigint();
+                    jl_throw_in_ctx(ct, jl_interrupt_exception, ExceptionInfo->ContextRecord);
                 }
+                return EXCEPTION_CONTINUE_EXECUTION;
+            }
+            if (jl_get_safe_restore()) {
+                jl_throw_in_ctx(NULL, NULL, ExceptionInfo->ContextRecord);
+                return EXCEPTION_CONTINUE_EXECUTION;
+            }
+            if (ct->eh != NULL) {
                 if (ExceptionInfo->ExceptionRecord->ExceptionInformation[0] == 1) { // writing to read-only memory (e.g. mmap)
-                    jl_throw_in_ctx(jl_readonlymemory_exception, ExceptionInfo->ContextRecord);
+                    jl_throw_in_ctx(ct, jl_readonlymemory_exception, ExceptionInfo->ContextRecord);
                     return EXCEPTION_CONTINUE_EXECUTION;
                 }
+            }
+        default:
+            break;
         }
-        if (ExceptionInfo->ExceptionRecord->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION) {
-            jl_safe_printf("\n");
-            jl_show_sigill(ExceptionInfo->ContextRecord);
-        }
-        jl_safe_printf("\nPlease submit a bug report with steps to reproduce this fault, and any error messages that follow (in their entirety). Thanks.\nException: ");
-        switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
-            case EXCEPTION_ACCESS_VIOLATION:
-                jl_safe_printf("EXCEPTION_ACCESS_VIOLATION"); break;
-            case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
-                jl_safe_printf("EXCEPTION_ARRAY_BOUNDS_EXCEEDED"); break;
-            case EXCEPTION_BREAKPOINT:
-                jl_safe_printf("EXCEPTION_BREAKPOINT"); break;
-            case EXCEPTION_DATATYPE_MISALIGNMENT:
-                jl_safe_printf("EXCEPTION_DATATYPE_MISALIGNMENT"); break;
-            case EXCEPTION_FLT_DENORMAL_OPERAND:
-                jl_safe_printf("EXCEPTION_FLT_DENORMAL_OPERAND"); break;
-            case EXCEPTION_FLT_DIVIDE_BY_ZERO:
-                jl_safe_printf("EXCEPTION_FLT_DIVIDE_BY_ZERO"); break;
-            case EXCEPTION_FLT_INEXACT_RESULT:
-                jl_safe_printf("EXCEPTION_FLT_INEXACT_RESULT"); break;
-            case EXCEPTION_FLT_INVALID_OPERATION:
-                jl_safe_printf("EXCEPTION_FLT_INVALID_OPERATION"); break;
-            case EXCEPTION_FLT_OVERFLOW:
-                jl_safe_printf("EXCEPTION_FLT_OVERFLOW"); break;
-            case EXCEPTION_FLT_STACK_CHECK:
-                jl_safe_printf("EXCEPTION_FLT_STACK_CHECK"); break;
-            case EXCEPTION_FLT_UNDERFLOW:
-                jl_safe_printf("EXCEPTION_FLT_UNDERFLOW"); break;
-            case EXCEPTION_ILLEGAL_INSTRUCTION:
-                jl_safe_printf("EXCEPTION_ILLEGAL_INSTRUCTION"); break;
-            case EXCEPTION_IN_PAGE_ERROR:
-                jl_safe_printf("EXCEPTION_IN_PAGE_ERROR"); break;
-            case EXCEPTION_INT_DIVIDE_BY_ZERO:
-                jl_safe_printf("EXCEPTION_INT_DIVIDE_BY_ZERO"); break;
-            case EXCEPTION_INT_OVERFLOW:
-                jl_safe_printf("EXCEPTION_INT_OVERFLOW"); break;
-            case EXCEPTION_INVALID_DISPOSITION:
-                jl_safe_printf("EXCEPTION_INVALID_DISPOSITION"); break;
-            case EXCEPTION_NONCONTINUABLE_EXCEPTION:
-                jl_safe_printf("EXCEPTION_NONCONTINUABLE_EXCEPTION"); break;
-            case EXCEPTION_PRIV_INSTRUCTION:
-                jl_safe_printf("EXCEPTION_PRIV_INSTRUCTION"); break;
-            case EXCEPTION_SINGLE_STEP:
-                jl_safe_printf("EXCEPTION_SINGLE_STEP"); break;
-            case EXCEPTION_STACK_OVERFLOW:
-                jl_safe_printf("EXCEPTION_STACK_OVERFLOW"); break;
-            default:
-                jl_safe_printf("UNKNOWN"); break;
-        }
-        jl_safe_printf(" at 0x%Ix -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
-        jl_print_native_codeloc((uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
-
-        jl_critical_error(0, ExceptionInfo->ContextRecord, ct);
-        static int recursion = 0;
-        if (recursion++)
-            exit(1);
-        else
-            jl_exit(1);
     }
-    return EXCEPTION_CONTINUE_SEARCH;
+    if (ExceptionInfo->ExceptionRecord->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION) {
+        jl_safe_printf("\n");
+        jl_show_sigill(ExceptionInfo->ContextRecord);
+    }
+    jl_safe_printf("\nPlease submit a bug report with steps to reproduce this fault, and any error messages that follow (in their entirety). Thanks.\nException: ");
+    switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
+    case EXCEPTION_ACCESS_VIOLATION:
+        jl_safe_printf("EXCEPTION_ACCESS_VIOLATION"); break;
+    case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
+        jl_safe_printf("EXCEPTION_ARRAY_BOUNDS_EXCEEDED"); break;
+    case EXCEPTION_BREAKPOINT:
+        jl_safe_printf("EXCEPTION_BREAKPOINT"); break;
+    case EXCEPTION_DATATYPE_MISALIGNMENT:
+        jl_safe_printf("EXCEPTION_DATATYPE_MISALIGNMENT"); break;
+    case EXCEPTION_FLT_DENORMAL_OPERAND:
+        jl_safe_printf("EXCEPTION_FLT_DENORMAL_OPERAND"); break;
+    case EXCEPTION_FLT_DIVIDE_BY_ZERO:
+        jl_safe_printf("EXCEPTION_FLT_DIVIDE_BY_ZERO"); break;
+    case EXCEPTION_FLT_INEXACT_RESULT:
+        jl_safe_printf("EXCEPTION_FLT_INEXACT_RESULT"); break;
+    case EXCEPTION_FLT_INVALID_OPERATION:
+        jl_safe_printf("EXCEPTION_FLT_INVALID_OPERATION"); break;
+    case EXCEPTION_FLT_OVERFLOW:
+        jl_safe_printf("EXCEPTION_FLT_OVERFLOW"); break;
+    case EXCEPTION_FLT_STACK_CHECK:
+        jl_safe_printf("EXCEPTION_FLT_STACK_CHECK"); break;
+    case EXCEPTION_FLT_UNDERFLOW:
+        jl_safe_printf("EXCEPTION_FLT_UNDERFLOW"); break;
+    case EXCEPTION_ILLEGAL_INSTRUCTION:
+        jl_safe_printf("EXCEPTION_ILLEGAL_INSTRUCTION"); break;
+    case EXCEPTION_IN_PAGE_ERROR:
+        jl_safe_printf("EXCEPTION_IN_PAGE_ERROR"); break;
+    case EXCEPTION_INT_DIVIDE_BY_ZERO:
+        jl_safe_printf("EXCEPTION_INT_DIVIDE_BY_ZERO"); break;
+    case EXCEPTION_INT_OVERFLOW:
+        jl_safe_printf("EXCEPTION_INT_OVERFLOW"); break;
+    case EXCEPTION_INVALID_DISPOSITION:
+        jl_safe_printf("EXCEPTION_INVALID_DISPOSITION"); break;
+    case EXCEPTION_NONCONTINUABLE_EXCEPTION:
+        jl_safe_printf("EXCEPTION_NONCONTINUABLE_EXCEPTION"); break;
+    case EXCEPTION_PRIV_INSTRUCTION:
+        jl_safe_printf("EXCEPTION_PRIV_INSTRUCTION"); break;
+    case EXCEPTION_SINGLE_STEP:
+        jl_safe_printf("EXCEPTION_SINGLE_STEP"); break;
+    case EXCEPTION_STACK_OVERFLOW:
+        jl_safe_printf("EXCEPTION_STACK_OVERFLOW"); break;
+    default:
+        jl_safe_printf("UNKNOWN"); break;
+    }
+    jl_safe_printf(" at 0x%Ix -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
+    jl_print_native_codeloc((uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
+
+    jl_critical_error(0, 0, ExceptionInfo->ContextRecord, ct);
+    static int recursion = 0;
+    if (recursion++)
+        exit(1);
+    else
+        jl_exit(1);
 }
 
 JL_DLLEXPORT void jl_install_sigint_handler(void)
@@ -331,6 +344,54 @@ JL_DLLEXPORT void jl_install_sigint_handler(void)
 
 static volatile HANDLE hBtThread = 0;
 
+int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
+{
+    (void)timeout;
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    if (ptls2 == NULL) // this thread is not alive
+        return 0;
+    jl_task_t *ct2 = jl_atomic_load_relaxed(&ptls2->current_task);
+    if (ct2 == NULL) // this thread is already dead
+        return 0;
+    HANDLE hThread = ptls2->system_id;
+    if ((DWORD)-1 == SuspendThread(hThread))
+        return 0;
+    assert(sizeof(*ctx) == sizeof(CONTEXT));
+    memset(ctx, 0, sizeof(CONTEXT));
+    ctx->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
+    if (!GetThreadContext(hThread, ctx)) {
+        if ((DWORD)-1 == ResumeThread(hThread))
+            abort();
+        return 0;
+    }
+    return 1;
+}
+
+void jl_thread_resume(int tid)
+{
+    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
+    HANDLE hThread = ptls2->system_id;
+    if ((DWORD)-1 == ResumeThread(hThread)) {
+        fputs("failed to resume main thread! aborting.", stderr);
+        abort();
+    }
+}
+
+int jl_lock_stackwalk(void)
+{
+    uv_mutex_lock(&jl_in_stackwalk);
+    jl_lock_profile();
+    return 0;
+}
+
+void jl_unlock_stackwalk(int lockret)
+{
+    (void)lockret;
+    jl_unlock_profile();
+    uv_mutex_unlock(&jl_in_stackwalk);
+}
+
+
 static DWORD WINAPI profile_bt( LPVOID lparam )
 {
     // Note: illegal to use jl_* functions from this thread except for profiling-specific functions
@@ -344,58 +405,45 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
                 continue;
             }
             else {
-                uv_mutex_lock(&jl_in_stackwalk);
-                jl_lock_profile();
-                if ((DWORD)-1 == SuspendThread(hMainThread)) {
-                    fputs("failed to suspend main thread. aborting profiling.", stderr);
-                    break;
-                }
+                // TODO: bring this up to parity with other OS by adding loop over tid here
+                int lockret = jl_lock_stackwalk();
                 CONTEXT ctxThread;
-                memset(&ctxThread, 0, sizeof(CONTEXT));
-                ctxThread.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
-                if (!GetThreadContext(hMainThread, &ctxThread)) {
-                    fputs("failed to get context from main thread. aborting profiling.", stderr);
+                if (!jl_thread_suspend_and_get_state(0, 0, &ctxThread)) {
+                    jl_unlock_stackwalk(lockret);
+                    fputs("failed to suspend main thread. aborting profiling.", stderr);
                     jl_profile_stop_timer();
+                    break;
                 }
-                else {
-                    // Get backtrace data
-                    bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
-                            bt_size_max - bt_size_cur - 1, &ctxThread, NULL);
+                // Get backtrace data
+                bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
+                        bt_size_max - bt_size_cur - 1, &ctxThread, NULL);
 
-                    jl_ptls_t ptls = jl_all_tls_states[0]; // given only profiling hMainThread
+                jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; // given only profiling hMainThread
 
-                    // store threadid but add 1 as 0 is preserved to indicate end of block
-                    bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
+                // store threadid but add 1 as 0 is preserved to indicate end of block
+                bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
 
-                    // store task id
-                    bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
+                // store task id (never null)
+                bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
 
-                    // store cpu cycle clock
-                    bt_data_prof[bt_size_cur++].uintptr = cycleclock();
+                // store cpu cycle clock
+                bt_data_prof[bt_size_cur++].uintptr = cycleclock();
 
-                    // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
-                    bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
+                // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
 
-                    // Mark the end of this block with two 0's
-                    bt_data_prof[bt_size_cur++].uintptr = 0;
-                    bt_data_prof[bt_size_cur++].uintptr = 0;
-                }
-                jl_unlock_profile();
-                uv_mutex_unlock(&jl_in_stackwalk);
-                if ((DWORD)-1 == ResumeThread(hMainThread)) {
-                    jl_profile_stop_timer();
-                    fputs("failed to resume main thread! aborting.", stderr);
-                    jl_gc_debug_critical_error();
-                    abort();
-                }
+                // Mark the end of this block with two 0's
+                bt_data_prof[bt_size_cur++].uintptr = 0;
+                bt_data_prof[bt_size_cur++].uintptr = 0;
+                jl_unlock_stackwalk(lockret);
+                jl_thread_resume(0);
                 jl_check_profile_autostop();
             }
         }
     }
-    jl_unlock_profile();
     uv_mutex_unlock(&jl_in_stackwalk);
     jl_profile_stop_timer();
-    hBtThread = 0;
+    hBtThread = NULL;
     return 0;
 }
 
@@ -470,11 +518,15 @@ void jl_install_default_signal_handlers(void)
 
 void jl_install_thread_signal_handler(jl_ptls_t ptls)
 {
-    size_t ssize = sig_stack_size;
-    void *stk = jl_malloc_stack(&ssize, NULL);
-    collect_backtrace_fiber.uc_stack.ss_sp = (void*)stk;
-    collect_backtrace_fiber.uc_stack.ss_size = ssize;
-    jl_makecontext(&collect_backtrace_fiber, start_backtrace_fiber);
-    uv_mutex_init(&backtrace_lock);
-    have_backtrace_fiber = 1;
+    if (!have_backtrace_fiber) {
+        size_t ssize = sig_stack_size;
+        void *stk = jl_malloc_stack(&ssize, NULL);
+        if (stk == NULL)
+            jl_errorf("fatal error allocating signal stack: mmap: %s", strerror(errno));
+        collect_backtrace_fiber.uc_stack.ss_sp = (void*)stk;
+        collect_backtrace_fiber.uc_stack.ss_size = ssize;
+        jl_makecontext(&collect_backtrace_fiber, start_backtrace_fiber);
+        uv_mutex_init(&backtrace_lock);
+        have_backtrace_fiber = 1;
+    }
 }
diff --git a/src/simplevector.c b/src/simplevector.c
index 988cf18ccc9b6..5f1fd744abd0c 100644
--- a/src/simplevector.c
+++ b/src/simplevector.c
@@ -23,6 +23,7 @@ jl_svec_t *(jl_perm_symsvec)(size_t n, ...)
 {
     if (n == 0) return jl_emptysvec;
     jl_svec_t *jv = (jl_svec_t*)jl_gc_permobj((n + 1) * sizeof(void*), jl_simplevector_type);
+    jl_set_typetagof(jv, jl_simplevector_tag, jl_astaggedvalue(jv)->bits.gc);
     jl_svec_set_len_unsafe(jv, n);
     va_list args;
     va_start(args, n);
@@ -37,6 +38,7 @@ JL_DLLEXPORT jl_svec_t *jl_svec1(void *a)
     jl_task_t *ct = jl_current_task;
     jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ct->ptls, sizeof(void*) * 2,
                                            jl_simplevector_type);
+    jl_set_typetagof(v, jl_simplevector_tag, 0);
     jl_svec_set_len_unsafe(v, 1);
     jl_svec_data(v)[0] = (jl_value_t*)a;
     return v;
@@ -47,6 +49,7 @@ JL_DLLEXPORT jl_svec_t *jl_svec2(void *a, void *b)
     jl_task_t *ct = jl_current_task;
     jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ct->ptls, sizeof(void*) * 3,
                                            jl_simplevector_type);
+    jl_set_typetagof(v, jl_simplevector_tag, 0);
     jl_svec_set_len_unsafe(v, 2);
     jl_svec_data(v)[0] = (jl_value_t*)a;
     jl_svec_data(v)[1] = (jl_value_t*)b;
@@ -59,6 +62,7 @@ JL_DLLEXPORT jl_svec_t *jl_alloc_svec_uninit(size_t n)
     if (n == 0) return jl_emptysvec;
     jl_svec_t *jv = (jl_svec_t*)jl_gc_alloc(ct->ptls, (n + 1) * sizeof(void*),
                                             jl_simplevector_type);
+    jl_set_typetagof(jv, jl_simplevector_tag, 0);
     jl_svec_set_len_unsafe(jv, n);
     return jv;
 }
@@ -75,7 +79,7 @@ JL_DLLEXPORT jl_svec_t *jl_svec_copy(jl_svec_t *a)
 {
     size_t n = jl_svec_len(a);
     jl_svec_t *c = jl_alloc_svec_uninit(n);
-    memmove_refs((void**)jl_svec_data(c), (void**)jl_svec_data(a), n);
+    memmove_refs((_Atomic(void*)*)jl_svec_data(c), (_Atomic(void*)*)jl_svec_data(a), n);
     return c;
 }
 
@@ -92,16 +96,3 @@ JL_DLLEXPORT size_t (jl_svec_len)(jl_svec_t *t) JL_NOTSAFEPOINT
 {
     return jl_svec_len(t);
 }
-
-JL_DLLEXPORT int8_t jl_svec_isassigned(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t i) JL_NOTSAFEPOINT
-{
-    return jl_svecref(t, (size_t)i) != NULL;
-}
-
-JL_DLLEXPORT jl_value_t *jl_svec_ref(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t i)
-{
-    jl_value_t *v = jl_svecref(t, (size_t)i);
-    if (__unlikely(v == NULL))
-        jl_throw(jl_undefref_exception);
-    return v;
-}
diff --git a/src/smallintset.c b/src/smallintset.c
index 54fdad616a758..df67239f79fb5 100644
--- a/src/smallintset.c
+++ b/src/smallintset.c
@@ -13,78 +13,114 @@
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
 #define h2index(hv, sz) (size_t)((hv) & ((sz)-1))
 
+// a set of small positive integers representing the indices into another set
+// (or dict) where the hash is derived from the keys in the set via the lambdas
+// `hash` and `eq` supports concurrent calls to jl_smallintset_lookup (giving
+// acquire ordering), provided that a lock is held over calls to
+// smallintset_rehash, and the elements of `data` support release-consume
+// atomics.
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-static inline size_t jl_intref(const jl_array_t *arr, size_t idx) JL_NOTSAFEPOINT
+static inline size_t ignore_tombstone(size_t val, size_t tombstone) JL_NOTSAFEPOINT
+{
+    return val == tombstone ? 0 : val;
+}
+static inline size_t jl_intref(const jl_genericmemory_t *arr, size_t idx) JL_NOTSAFEPOINT
+{
+    jl_value_t *el = (jl_value_t*)jl_typetagof(arr);
+    if (el == jl_memory_uint8_type)
+        return ignore_tombstone(jl_atomic_load_relaxed(&((_Atomic(uint8_t)*)arr->ptr)[idx]), (uint8_t)-1);
+    else if (el == jl_memory_uint16_type)
+        return ignore_tombstone(jl_atomic_load_relaxed(&((_Atomic(uint16_t)*)arr->ptr)[idx]), (uint16_t)-1);
+    else if (el == jl_memory_uint32_type)
+        return ignore_tombstone(jl_atomic_load_relaxed(&((_Atomic(uint32_t)*)arr->ptr)[idx]), (uint32_t)-1);
+    else
+        abort();
+}
+
+static inline size_t acquire_tombstone(size_t val, size_t tombstone) JL_NOTSAFEPOINT
+{
+    return val == tombstone ? (size_t)-1 : val;
+}
+static inline size_t jl_intref_acquire(const jl_genericmemory_t *arr, size_t idx) JL_NOTSAFEPOINT
 {
-    jl_value_t *el = jl_tparam0(jl_typeof(arr));
-    if (el == (jl_value_t*)jl_uint8_type)
-        return ((uint8_t*)jl_array_data(arr))[idx];
-    else if (el == (jl_value_t*)jl_uint16_type)
-        return ((uint16_t*)jl_array_data(arr))[idx];
-    else if (el == (jl_value_t*)jl_uint32_type)
-        return ((uint32_t*)jl_array_data(arr))[idx];
+    jl_value_t *el = (jl_value_t*)jl_typetagof(arr);
+    if (el == jl_memory_uint8_type)
+        return acquire_tombstone(jl_atomic_load_acquire(&((_Atomic(uint8_t)*)arr->ptr)[idx]), (uint8_t)-1);
+    else if (el == jl_memory_uint16_type)
+        return acquire_tombstone(jl_atomic_load_acquire(&((_Atomic(uint16_t)*)arr->ptr)[idx]), (uint16_t)-1);
+    else if (el == jl_memory_uint32_type)
+        return acquire_tombstone(jl_atomic_load_acquire(&((_Atomic(uint32_t)*)arr->ptr)[idx]), (uint32_t)-1);
     else
         abort();
 }
 
-static inline void jl_intset(const jl_array_t *arr, size_t idx, size_t val) JL_NOTSAFEPOINT
+static inline void jl_intset_release(const jl_genericmemory_t *arr, size_t idx, size_t val) JL_NOTSAFEPOINT
 {
-    jl_value_t *el = jl_tparam0(jl_typeof(arr));
-    if (el == (jl_value_t*)jl_uint8_type)
-        ((uint8_t*)jl_array_data(arr))[idx] = val;
-    else if (el == (jl_value_t*)jl_uint16_type)
-        ((uint16_t*)jl_array_data(arr))[idx] = val;
-    else if (el == (jl_value_t*)jl_uint32_type)
-        ((uint32_t*)jl_array_data(arr))[idx] = val;
+    jl_value_t *el = (jl_value_t*)jl_typetagof(arr);
+    if (el == jl_memory_uint8_type)
+        jl_atomic_store_release(&((_Atomic(uint8_t)*)arr->ptr)[idx], val);
+    else if (el == jl_memory_uint16_type)
+        jl_atomic_store_release(&((_Atomic(uint16_t)*)arr->ptr)[idx], val);
+    else if (el == jl_memory_uint32_type)
+        jl_atomic_store_release(&((_Atomic(uint32_t)*)arr->ptr)[idx], val);
     else
         abort();
 }
 
-static inline size_t jl_max_int(const jl_array_t *arr)
+static inline size_t jl_max_int(const jl_genericmemory_t *arr) JL_NOTSAFEPOINT
 {
-    jl_value_t *el = jl_tparam0(jl_typeof(arr));
-    if (el == (jl_value_t*)jl_uint8_type)
+    jl_value_t *el = (jl_value_t*)jl_typetagof(arr);
+    if (el == jl_memory_uint8_type)
         return 0xFF;
-    else if (el == (jl_value_t*)jl_uint16_type)
+    else if (el == jl_memory_uint16_type)
         return 0xFFFF;
-    else if (el == (jl_value_t*)jl_uint32_type)
+    else if (el == jl_memory_uint32_type)
         return 0xFFFFFFFF;
-    else if (el == (jl_value_t*)jl_any_type)
+    else if (el == jl_memory_any_type)
         return 0;
     else
         abort();
 }
 
-static jl_array_t *jl_alloc_int_1d(size_t np, size_t len)
+void smallintset_empty(const jl_genericmemory_t *a) JL_NOTSAFEPOINT
+{
+    size_t elsize;
+    jl_value_t *el = (jl_value_t*)jl_typetagof(a);
+    if (el == jl_memory_uint8_type)
+        elsize = sizeof(uint8_t);
+    else if (el == jl_memory_uint16_type)
+        elsize = sizeof(uint16_t);
+    else if (el == jl_memory_uint32_type)
+        elsize = sizeof(uint32_t);
+    else if (el == jl_memory_any_type)
+        elsize = 0;
+    else
+        abort();
+    memset(a->ptr, 0, a->length * elsize);
+}
+
+static jl_genericmemory_t *jl_alloc_int_1d(size_t np, size_t len)
 {
     jl_value_t *ty;
-    if (np < 0xFF) {
-        ty = jl_array_uint8_type;
-     }
-    else if (np < 0xFFFF) {
-        static jl_value_t *int16 JL_ALWAYS_LEAFTYPE = NULL;
-        if (int16 == NULL)
-            int16 = jl_apply_array_type((jl_value_t*)jl_uint16_type, 1);
-        ty = int16;
-    }
-    else {
-        assert(np < 0x7FFFFFFF);
-        static jl_value_t *int32 JL_ALWAYS_LEAFTYPE = NULL;
-        if (int32 == NULL)
-            int32 = jl_apply_array_type((jl_value_t*)jl_uint32_type, 1);
-        ty = int32;
-    }
-    jl_array_t *a = jl_alloc_array_1d(ty, len);
-    memset(a->data, 0, len * a->elsize);
+    if (np < 0xFF)
+        ty = jl_memory_uint8_type;
+    else if (np < 0xFFFF)
+        ty = jl_memory_uint16_type;
+    else
+        ty = jl_memory_uint32_type;
+    assert(np < 0x7FFFFFFF);
+    jl_genericmemory_t *a = jl_alloc_genericmemory(ty, len);
+    smallintset_empty(a);
     return a;
 }
 
-ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *key, jl_svec_t *data, uint_t hv)
+ssize_t jl_smallintset_lookup(jl_genericmemory_t *cache, smallintset_eq eq, const void *key, jl_value_t *data, uint_t hv, int pop)
 {
-    size_t sz = jl_array_len(cache);
+    size_t sz = cache->length;
     if (sz == 0)
         return -1;
     JL_GC_PUSH1(&cache);
@@ -93,13 +129,15 @@ ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *
     size_t orig = index;
     size_t iter = 0;
     do {
-        size_t val1 = jl_intref(cache, index);
+        size_t val1 = jl_intref_acquire(cache, index);
         if (val1 == 0) {
             JL_GC_POP();
             return -1;
         }
-        if (eq(val1 - 1, key, data, hv)) {
+        if (val1 != -1 && eq(val1 - 1, key, data, hv)) {
             JL_GC_POP();
+            if (pop)
+                jl_intset_release(cache, index, (size_t)-1); // replace with tombstone
             return val1 - 1;
         }
         index = (index + 1) & (sz - 1);
@@ -109,9 +147,9 @@ ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *
     return -1;
 }
 
-static int smallintset_insert_(jl_array_t *a, uint_t hv, size_t val1)
+static int smallintset_insert_(jl_genericmemory_t *a, uint_t hv, size_t val1) JL_NOTSAFEPOINT
 {
-    size_t sz = jl_array_len(a);
+    size_t sz = a->length;
     if (sz <= 1)
         return 0;
     size_t orig, index, iter;
@@ -121,7 +159,7 @@ static int smallintset_insert_(jl_array_t *a, uint_t hv, size_t val1)
     size_t maxprobe = max_probe(sz);
     do {
         if (jl_intref(a, index) == 0) {
-            jl_intset(a, index, val1);
+            jl_intset_release(a, index, val1);
             return 1;
         }
         index = (index + 1) & (sz - 1);
@@ -129,16 +167,17 @@ static int smallintset_insert_(jl_array_t *a, uint_t hv, size_t val1)
     } while (iter <= maxprobe && index != orig);
     return 0;
 }
+//}
 
-static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np);
-
-void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data)
+void jl_smallintset_insert(_Atomic(jl_genericmemory_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_value_t *data)
 {
-    jl_array_t *a = jl_atomic_load_relaxed(pcache);
-    if (val + 1 >  jl_max_int(a))
-        smallintset_rehash(pcache, parent, hash, data, jl_array_len(a), val + 1);
+    jl_genericmemory_t *a = jl_atomic_load_relaxed(pcache);
+    if (val + 1 >= jl_max_int(a)) {
+        a = smallintset_rehash(a, hash, data, a->length, val + 1);
+        jl_atomic_store_release(pcache, a);
+        if (parent) jl_gc_wb(parent, a);
+    }
     while (1) {
-        a = jl_atomic_load_relaxed(pcache);
         if (smallintset_insert_(a, hash(val, data), val + 1))
             return;
 
@@ -148,21 +187,22 @@ void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, sma
         /* lots of time rehashing all the keys over and over. */
         size_t newsz;
         a = jl_atomic_load_relaxed(pcache);
-        size_t sz = jl_array_len(a);
+        size_t sz = a->length;
         if (sz < HT_N_INLINE)
             newsz = HT_N_INLINE;
         else if (sz >= (1 << 19) || (sz <= (1 << 8)))
             newsz = sz << 1;
         else
             newsz = sz << 2;
-        smallintset_rehash(pcache, parent, hash, data, newsz, 0);
+        a = smallintset_rehash(a, hash, data, newsz, 0);
+        jl_atomic_store_release(pcache, a);
+        if (parent) jl_gc_wb(parent, a);
     }
 }
 
-static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np)
+jl_genericmemory_t* smallintset_rehash(jl_genericmemory_t* a, smallintset_hash hash, jl_value_t *data, size_t newsz, size_t np)
 {
-    jl_array_t *a = jl_atomic_load_relaxed(pcache);
-    size_t sz = jl_array_len(a);
+    size_t sz = a->length;
     size_t i;
     for (i = 0; i < sz; i += 1) {
         size_t val = jl_intref(a, i);
@@ -170,7 +210,7 @@ static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent,
             np = val;
     }
     while (1) {
-        jl_array_t *newa = jl_alloc_int_1d(np, newsz);
+        jl_genericmemory_t *newa = jl_alloc_int_1d(np + 1, newsz);
         JL_GC_PUSH1(&newa);
         for (i = 0; i < sz; i += 1) {
             size_t val1 = jl_intref(a, i);
@@ -181,16 +221,12 @@ static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent,
             }
         }
         JL_GC_POP();
-        if (i == sz) {
-            jl_atomic_store_release(pcache, newa);
-            jl_gc_wb(parent, newa);
-            return;
-        }
+        if (i == sz)
+            return newa;
         newsz <<= 1;
     }
 }
 
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/stackwalk.c b/src/stackwalk.c
index a6ca5f3d73493..6efb177927637 100644
--- a/src/stackwalk.c
+++ b/src/stackwalk.c
@@ -214,10 +214,10 @@ NOINLINE size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip
     int r = jl_unw_get(&context);
     if (r < 0)
         return 0;
-    jl_gcframe_t *pgcstack = jl_pgcstack;
     bt_cursor_t cursor;
-    if (!jl_unw_init(&cursor, &context))
+    if (!jl_unw_init(&cursor, &context) || maxsize == 0)
         return 0;
+    jl_gcframe_t *pgcstack = jl_pgcstack;
     size_t bt_size = 0;
     jl_unw_stepn(&cursor, bt_data, &bt_size, NULL, maxsize, skip + 1, &pgcstack, 0);
     return bt_size;
@@ -260,21 +260,21 @@ JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp, int skip)
             uintptr_t *sp_ptr = NULL;
             if (returnsp) {
                 jl_array_grow_end(sp, maxincr);
-                sp_ptr = (uintptr_t*)jl_array_data(sp) + offset;
+                sp_ptr = jl_array_data(sp, uintptr_t) + offset;
             }
             size_t size_incr = 0;
-            have_more_frames = jl_unw_stepn(&cursor, (jl_bt_element_t*)jl_array_data(ip) + offset,
+            have_more_frames = jl_unw_stepn(&cursor, jl_array_data(ip, jl_bt_element_t) + offset,
                                             &size_incr, sp_ptr, maxincr, skip, &pgcstack, 0);
             skip = 0;
             offset += size_incr;
         }
-        jl_array_del_end(ip, jl_array_len(ip) - offset);
+        jl_array_del_end(ip, jl_array_nrows(ip) - offset);
         if (returnsp)
-            jl_array_del_end(sp, jl_array_len(sp) - offset);
+            jl_array_del_end(sp, jl_array_nrows(sp) - offset);
 
         size_t n = 0;
-        jl_bt_element_t *bt_data = (jl_bt_element_t*)jl_array_data(ip);
-        while (n < jl_array_len(ip)) {
+        jl_bt_element_t *bt_data = jl_array_data(ip, jl_bt_element_t);
+        while (n < jl_array_nrows(ip)) {
             jl_bt_element_t *bt_entry = bt_data + n;
             if (!jl_bt_is_native(bt_entry)) {
                 size_t njlvals = jl_bt_num_jlvals(bt_entry);
@@ -303,7 +303,7 @@ static void decode_backtrace(jl_bt_element_t *bt_data, size_t bt_size,
     bt = *btout = jl_alloc_array_1d(array_ptr_void_type, bt_size);
     static_assert(sizeof(jl_bt_element_t) == sizeof(void*),
                   "jl_bt_element_t is presented as Ptr{Cvoid} on julia side");
-    memcpy(bt->data, bt_data, bt_size * sizeof(jl_bt_element_t));
+    memcpy(jl_array_data(bt, jl_bt_element_t), bt_data, bt_size * sizeof(jl_bt_element_t));
     bt2 = *bt2out = jl_alloc_array_1d(jl_array_any_type, 0);
     // Scan the backtrace buffer for any gc-managed values
     for (size_t i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
@@ -321,6 +321,7 @@ static void decode_backtrace(jl_bt_element_t *bt_data, size_t bt_size,
 
 JL_DLLEXPORT jl_value_t *jl_get_backtrace(void)
 {
+    JL_TIMING(STACKWALK, STACKWALK_Backtrace);
     jl_excstack_t *s = jl_current_task->excstack;
     jl_bt_element_t *bt_data = NULL;
     size_t bt_size = 0;
@@ -343,6 +344,7 @@ JL_DLLEXPORT jl_value_t *jl_get_backtrace(void)
 JL_DLLEXPORT jl_value_t *jl_get_excstack(jl_task_t* task, int include_bt, int max_entries)
 {
     JL_TYPECHK(current_exceptions, task, (jl_value_t*)task);
+    JL_TIMING(STACKWALK, STACKWALK_Excstack);
     jl_task_t *ct = jl_current_task;
     if (task != ct && jl_atomic_load_relaxed(&task->_state) == JL_TASK_STATE_RUNNABLE) {
         jl_error("Inspecting the exception stack of a task which might "
@@ -661,17 +663,17 @@ void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_entry) JL_NOTSAFEPOINT
         jl_value_t *code = jl_bt_entry_jlvalue(bt_entry, 0);
         if (jl_is_method_instance(code)) {
             // When interpreting a method instance, need to unwrap to find the code info
-            code = ((jl_method_instance_t*)code)->uninferred;
+            code = jl_atomic_load_relaxed(&((jl_method_instance_t*)code)->uninferred);
         }
         if (jl_is_code_info(code)) {
             jl_code_info_t *src = (jl_code_info_t*)code;
             // See also the debug info handling in codegen.cpp.
             // NB: debuginfoloc is 1-based!
-            intptr_t debuginfoloc = ((int32_t*)jl_array_data(src->codelocs))[ip];
+            intptr_t debuginfoloc = jl_array_data(src->codelocs, int32_t)[ip];
             while (debuginfoloc != 0) {
                 jl_line_info_node_t *locinfo = (jl_line_info_node_t*)
                     jl_array_ptr_ref(src->linetable, debuginfoloc - 1);
-                assert(jl_typeis(locinfo, jl_lineinfonode_type));
+                assert(jl_typetagis(locinfo, jl_lineinfonode_type));
                 const char *func_name = "Unknown";
                 jl_value_t *method = locinfo->method;
                 if (jl_is_method_instance(method))
@@ -722,7 +724,7 @@ static void JuliaInitializeLongjmpXorKey(void)
 }
 #endif
 
-JL_UNUSED static uintptr_t ptr_demangle(uintptr_t p)
+JL_UNUSED static uintptr_t ptr_demangle(uintptr_t p) JL_NOTSAFEPOINT
 {
 #if defined(__GLIBC__)
 #if defined(_CPU_X86_)
@@ -789,7 +791,7 @@ _os_tsd_get_direct(unsigned long slot)
 // Unconditionally defined ptrauth_strip (instead of using the ptrauth.h header)
 // since libsystem will likely be compiled with -mbranch-protection, and we currently are not.
 // code from https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
-static inline uint64_t ptrauth_strip(uint64_t __value, unsigned int __key) {
+static inline uint64_t ptrauth_strip(uint64_t __value, unsigned int __key) JL_NOTSAFEPOINT {
   // On the stack the link register is protected with Pointer
   // Authentication Code when compiled with -mbranch-protection.
   // Let's strip the PAC unconditionally because xpaclri is in the NOP space,
@@ -807,7 +809,7 @@ static inline uint64_t ptrauth_strip(uint64_t __value, unsigned int __key) {
 
 __attribute__((always_inline, pure))
 static __inline__ void**
-_os_tsd_get_base(void)
+_os_tsd_get_base(void) JL_NOTSAFEPOINT
 {
 #if defined(__arm__)
     uintptr_t tsd;
@@ -829,7 +831,7 @@ _os_tsd_get_base(void)
 #ifdef _os_tsd_get_base
 __attribute__((always_inline))
 static __inline__ void*
-_os_tsd_get_direct(unsigned long slot)
+_os_tsd_get_direct(unsigned long slot) JL_NOTSAFEPOINT
 {
     return _os_tsd_get_base()[slot];
 }
@@ -837,14 +839,14 @@ _os_tsd_get_direct(unsigned long slot)
 
 __attribute__((always_inline, pure))
 static __inline__ uintptr_t
-_os_ptr_munge_token(void)
+_os_ptr_munge_token(void) JL_NOTSAFEPOINT
 {
     return (uintptr_t)_os_tsd_get_direct(__TSD_PTR_MUNGE);
 }
 
 __attribute__((always_inline, pure))
 JL_UNUSED static __inline__ uintptr_t
-_os_ptr_munge(uintptr_t ptr)
+_os_ptr_munge(uintptr_t ptr) JL_NOTSAFEPOINT
 {
     return ptr ^ _os_ptr_munge_token();
 }
@@ -854,7 +856,7 @@ _os_ptr_munge(uintptr_t ptr)
 
 extern bt_context_t *jl_to_bt_context(void *sigctx);
 
-void jl_rec_backtrace(jl_task_t *t)
+static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
 {
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
@@ -863,209 +865,242 @@ void jl_rec_backtrace(jl_task_t *t)
         ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
         return;
     }
-    if (t->copy_stack || !t->started || t->stkbuf == NULL)
-        return;
-    int16_t old = -1;
-    if (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid)
-        return;
     bt_context_t *context = NULL;
-#if defined(_OS_WINDOWS_)
     bt_context_t c;
-    memset(&c, 0, sizeof(c));
-    _JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.ctx.uc_mcontext;
+    int16_t old = -1;
+    while (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid) {
+        int lockret = jl_lock_stackwalk();
+        // if this task is already running somewhere, we need to stop the thread it is running on and query its state
+        if (!jl_thread_suspend_and_get_state(old, 0, &c)) {
+            jl_unlock_stackwalk(lockret);
+            return;
+        }
+        jl_unlock_stackwalk(lockret);
+        if (jl_atomic_load_relaxed(&t->tid) == old) {
+            jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[old];
+            if (ptls2->previous_task == t || // we might print the wrong stack here, since we can't know whether we executed the swapcontext yet or not, but it at least avoids trying to access the state inside uc_mcontext which might not be set yet
+                (ptls2->previous_task == NULL && jl_atomic_load_relaxed(&ptls2->current_task) == t)) { // this case should be always accurate
+                // use the thread context for the unwind state
+                context = &c;
+            }
+            break;
+        }
+        // got the wrong thread stopped, try again
+        jl_thread_resume(old);
+    }
+    if (context == NULL && (!t->copy_stack && t->started && t->stkbuf != NULL)) {
+        // need to read the context from the task stored state
+#if defined(_OS_WINDOWS_)
+        memset(&c, 0, sizeof(c));
+        _JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.ctx.uc_mcontext;
 #if defined(_CPU_X86_64_)
-    c.Rbx = mctx->Rbx;
-    c.Rsp = mctx->Rsp;
-    c.Rbp = mctx->Rbp;
-    c.Rsi = mctx->Rsi;
-    c.Rdi = mctx->Rdi;
-    c.R12 = mctx->R12;
-    c.R13 = mctx->R13;
-    c.R14 = mctx->R14;
-    c.R15 = mctx->R15;
-    c.Rip = mctx->Rip;
-    memcpy(&c.Xmm6, &mctx->Xmm6, 10 * sizeof(mctx->Xmm6)); // Xmm6-Xmm15
+        c.Rbx = mctx->Rbx;
+        c.Rsp = mctx->Rsp;
+        c.Rbp = mctx->Rbp;
+        c.Rsi = mctx->Rsi;
+        c.Rdi = mctx->Rdi;
+        c.R12 = mctx->R12;
+        c.R13 = mctx->R13;
+        c.R14 = mctx->R14;
+        c.R15 = mctx->R15;
+        c.Rip = mctx->Rip;
+        memcpy(&c.Xmm6, &mctx->Xmm6, 10 * sizeof(mctx->Xmm6)); // Xmm6-Xmm15
 #else
-    c.Eip = mctx->Eip;
-    c.Esp = mctx->Esp;
-    c.Ebp = mctx->Ebp;
+        c.Eip = mctx->Eip;
+        c.Esp = mctx->Esp;
+        c.Ebp = mctx->Ebp;
 #endif
-    context = &c;
+        context = &c;
 #elif defined(JL_HAVE_UNW_CONTEXT)
-    context = &t->ctx.ctx;
+        context = &t->ctx.ctx;
 #elif defined(JL_HAVE_UCONTEXT)
-    context = jl_to_bt_context(&t->ctx.ctx);
+        context = jl_to_bt_context(&t->ctx.ctx);
 #elif defined(JL_HAVE_ASM)
-    bt_context_t c;
-    memset(&c, 0, sizeof(c));
- #if defined(_OS_LINUX_) && defined(__GLIBC__)
-    __jmp_buf *mctx = &t->ctx.ctx.uc_mcontext->__jmpbuf;
-    mcontext_t *mc = &c.uc_mcontext;
-  #if defined(_CPU_X86_)
-    // https://github.com/bminor/glibc/blame/master/sysdeps/i386/__longjmp.S
-    // https://github.com/bminor/glibc/blame/master/sysdeps/i386/jmpbuf-offsets.h
-    // https://github.com/bminor/musl/blame/master/src/setjmp/i386/longjmp.s
-    mc->gregs[REG_EBX] = (*mctx)[0];
-    mc->gregs[REG_ESI] = (*mctx)[1];
-    mc->gregs[REG_EDI] = (*mctx)[2];
-    mc->gregs[REG_EBP] = (*mctx)[3];
-    mc->gregs[REG_ESP] = (*mctx)[4];
-    mc->gregs[REG_EIP] = (*mctx)[5];
-    // ifdef PTR_DEMANGLE ?
-    mc->gregs[REG_ESP] = ptr_demangle(mc->gregs[REG_ESP]);
-    mc->gregs[REG_EIP] = ptr_demangle(mc->gregs[REG_EIP]);
-    context = &c;
-  #elif defined(_CPU_X86_64_)
-    // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/__longjmp.S
-    // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/jmpbuf-offsets.h
-    // https://github.com/bminor/musl/blame/master/src/setjmp/x86_64/setjmp.s
-    mc->gregs[REG_RBX] = (*mctx)[0];
-    mc->gregs[REG_RBP] = (*mctx)[1];
-    mc->gregs[REG_R12] = (*mctx)[2];
-    mc->gregs[REG_R13] = (*mctx)[3];
-    mc->gregs[REG_R14] = (*mctx)[4];
-    mc->gregs[REG_R15] = (*mctx)[5];
-    mc->gregs[REG_RSP] = (*mctx)[6];
-    mc->gregs[REG_RIP] = (*mctx)[7];
-    // ifdef PTR_DEMANGLE ?
-    mc->gregs[REG_RBP] = ptr_demangle(mc->gregs[REG_RBP]);
-    mc->gregs[REG_RSP] = ptr_demangle(mc->gregs[REG_RSP]);
-    mc->gregs[REG_RIP] = ptr_demangle(mc->gregs[REG_RIP]);
-    context = &c;
-  #elif defined(_CPU_ARM_)
-    // https://github.com/bminor/glibc/blame/master/sysdeps/arm/__longjmp.S
-    // https://github.com/bminor/glibc/blame/master/sysdeps/arm/include/bits/setjmp.h
-    // https://github.com/bminor/musl/blame/master/src/setjmp/arm/longjmp.S
-    mc->arm_sp = (*mctx)[0];
-    mc->arm_lr = (*mctx)[1];
-    mc->arm_r4 = (*mctx)[2]; // aka v1
-    mc->arm_r5 = (*mctx)[3]; // aka v2
-    mc->arm_r6 = (*mctx)[4]; // aka v3
-    mc->arm_r7 = (*mctx)[5]; // aka v4
-    mc->arm_r8 = (*mctx)[6]; // aka v5
-    mc->arm_r9 = (*mctx)[7]; // aka v6 aka sb
-    mc->arm_r10 = (*mctx)[8]; // aka v7 aka sl
-    mc->arm_fp = (*mctx)[10]; // aka v8 aka r11
-    // ifdef PTR_DEMANGLE ?
-    mc->arm_sp = ptr_demangle(mc->arm_sp);
-    mc->arm_lr = ptr_demangle(mc->arm_lr);
-    mc->arm_pc = mc->arm_lr;
-    context = &c;
-  #elif defined(_CPU_AARCH64_)
-    // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/__longjmp.S
-    // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/jmpbuf-offsets.h
-    // https://github.com/bminor/musl/blame/master/src/setjmp/aarch64/longjmp.s
-    // https://github.com/libunwind/libunwind/blob/ec171c9ba7ea3abb2a1383cee2988a7abd483a1f/src/aarch64/unwind_i.h#L62
-    unw_fpsimd_context_t *mcfp = (unw_fpsimd_context_t*)&mc->__reserved;
-    mc->regs[19] = (*mctx)[0];
-    mc->regs[20] = (*mctx)[1];
-    mc->regs[21] = (*mctx)[2];
-    mc->regs[22] = (*mctx)[3];
-    mc->regs[23] = (*mctx)[4];
-    mc->regs[24] = (*mctx)[5];
-    mc->regs[25] = (*mctx)[6];
-    mc->regs[26] = (*mctx)[7];
-    mc->regs[27] = (*mctx)[8];
-    mc->regs[28] = (*mctx)[9];
-    mc->regs[29] = (*mctx)[10]; // aka fp
-    mc->regs[30] = (*mctx)[11]; // aka lr
-    // Yes, they did skip 12 why writing the code originally; and, no, I do not know why.
-    mc->sp = (*mctx)[13];
-    mcfp->vregs[7] = (*mctx)[14]; // aka d8
-    mcfp->vregs[8] = (*mctx)[15]; // aka d9
-    mcfp->vregs[9] = (*mctx)[16]; // aka d10
-    mcfp->vregs[10] = (*mctx)[17]; // aka d11
-    mcfp->vregs[11] = (*mctx)[18]; // aka d12
-    mcfp->vregs[12] = (*mctx)[19]; // aka d13
-    mcfp->vregs[13] = (*mctx)[20]; // aka d14
-    mcfp->vregs[14] = (*mctx)[21]; // aka d15
-    // ifdef PTR_DEMANGLE ?
-    mc->sp = ptr_demangle(mc->sp);
-    mc->regs[30] = ptr_demangle(mc->regs[30]);
-    mc->pc = mc->regs[30];
-    context = &c;
-  #else
-   #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown linux")
-   (void)mc;
-   (void)c;
-   (void)mctx;
-  #endif
- #elif defined(_OS_DARWIN_)
-    sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
-  #if defined(_CPU_X86_64_)
-    // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/x86_64/_setjmp.s
-    x86_thread_state64_t *mc = (x86_thread_state64_t*)&c;
-    mc->__rbx = ((uint64_t*)mctx)[0];
-    mc->__rbp = ((uint64_t*)mctx)[1];
-    mc->__rsp = ((uint64_t*)mctx)[2];
-    mc->__r12 = ((uint64_t*)mctx)[3];
-    mc->__r13 = ((uint64_t*)mctx)[4];
-    mc->__r14 = ((uint64_t*)mctx)[5];
-    mc->__r15 = ((uint64_t*)mctx)[6];
-    mc->__rip = ((uint64_t*)mctx)[7];
-    // added in libsystem_plaform 177.200.16 (macOS Mojave 10.14.3)
-    // prior to that _os_ptr_munge_token was (hopefully) typically 0,
-    // so x ^ 0 == x and this is a no-op
-    mc->__rbp = _OS_PTR_UNMUNGE(mc->__rbp);
-    mc->__rsp = _OS_PTR_UNMUNGE(mc->__rsp);
-    mc->__rip = _OS_PTR_UNMUNGE(mc->__rip);
-    context = &c;
-  #elif defined(_CPU_AARCH64_)
-    // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/arm64/setjmp.s
-    // https://github.com/apple/darwin-xnu/blob/main/osfmk/mach/arm/_structs.h
-    // https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/libunwind/src/Registers.hpp -> Registers_arm64
-    arm_thread_state64_t *mc = (arm_thread_state64_t*)&c;
-    mc->__x[19] = ((uint64_t*)mctx)[0];
-    mc->__x[20] = ((uint64_t*)mctx)[1];
-    mc->__x[21] = ((uint64_t*)mctx)[2];
-    mc->__x[22] = ((uint64_t*)mctx)[3];
-    mc->__x[23] = ((uint64_t*)mctx)[4];
-    mc->__x[24] = ((uint64_t*)mctx)[5];
-    mc->__x[25] = ((uint64_t*)mctx)[6];
-    mc->__x[26] = ((uint64_t*)mctx)[7];
-    mc->__x[27] = ((uint64_t*)mctx)[8];
-    mc->__x[28] = ((uint64_t*)mctx)[9];
-    mc->__x[10] = ((uint64_t*)mctx)[10];
-    mc->__x[11] = ((uint64_t*)mctx)[11];
-    mc->__x[12] = ((uint64_t*)mctx)[12];
-    // 13 is reserved/unused
-    double *mcfp = (double*)&mc[1];
-    mcfp[7] = ((uint64_t*)mctx)[14]; // aka d8
-    mcfp[8] = ((uint64_t*)mctx)[15]; // aka d9
-    mcfp[9] = ((uint64_t*)mctx)[16]; // aka d10
-    mcfp[10] = ((uint64_t*)mctx)[17]; // aka d11
-    mcfp[11] = ((uint64_t*)mctx)[18]; // aka d12
-    mcfp[12] = ((uint64_t*)mctx)[19]; // aka d13
-    mcfp[13] = ((uint64_t*)mctx)[20]; // aka d14
-    mcfp[14] = ((uint64_t*)mctx)[21]; // aka d15
-    mc->__fp = _OS_PTR_UNMUNGE(mc->__x[10]);
-    mc->__lr = _OS_PTR_UNMUNGE(mc->__x[11]);
-    mc->__x[12] = _OS_PTR_UNMUNGE(mc->__x[12]);
-    mc->__sp = mc->__x[12];
-    // libunwind is broken for signed-pointers, but perhaps best not to leave the signed pointer lying around either
-    mc->__pc = ptrauth_strip(mc->__lr, 0);
-    mc->__pad = 0; // aka __ra_sign_state = not signed
-    context = &c;
-  #else
-   #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown darwin")
-    (void)mctx;
-    (void)c;
-  #endif
- #else
-  #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown system")
-  (void)c;
- #endif
+        memset(&c, 0, sizeof(c));
+     #if defined(_OS_LINUX_) && defined(__GLIBC__)
+        __jmp_buf *mctx = &t->ctx.ctx.uc_mcontext->__jmpbuf;
+        mcontext_t *mc = &c.uc_mcontext;
+      #if defined(_CPU_X86_)
+        // https://github.com/bminor/glibc/blame/master/sysdeps/i386/__longjmp.S
+        // https://github.com/bminor/glibc/blame/master/sysdeps/i386/jmpbuf-offsets.h
+        // https://github.com/bminor/musl/blame/master/src/setjmp/i386/longjmp.s
+        mc->gregs[REG_EBX] = (*mctx)[0];
+        mc->gregs[REG_ESI] = (*mctx)[1];
+        mc->gregs[REG_EDI] = (*mctx)[2];
+        mc->gregs[REG_EBP] = (*mctx)[3];
+        mc->gregs[REG_ESP] = (*mctx)[4];
+        mc->gregs[REG_EIP] = (*mctx)[5];
+        // ifdef PTR_DEMANGLE ?
+        mc->gregs[REG_ESP] = ptr_demangle(mc->gregs[REG_ESP]);
+        mc->gregs[REG_EIP] = ptr_demangle(mc->gregs[REG_EIP]);
+        context = &c;
+      #elif defined(_CPU_X86_64_)
+        // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/__longjmp.S
+        // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/jmpbuf-offsets.h
+        // https://github.com/bminor/musl/blame/master/src/setjmp/x86_64/setjmp.s
+        mc->gregs[REG_RBX] = (*mctx)[0];
+        mc->gregs[REG_RBP] = (*mctx)[1];
+        mc->gregs[REG_R12] = (*mctx)[2];
+        mc->gregs[REG_R13] = (*mctx)[3];
+        mc->gregs[REG_R14] = (*mctx)[4];
+        mc->gregs[REG_R15] = (*mctx)[5];
+        mc->gregs[REG_RSP] = (*mctx)[6];
+        mc->gregs[REG_RIP] = (*mctx)[7];
+        // ifdef PTR_DEMANGLE ?
+        mc->gregs[REG_RBP] = ptr_demangle(mc->gregs[REG_RBP]);
+        mc->gregs[REG_RSP] = ptr_demangle(mc->gregs[REG_RSP]);
+        mc->gregs[REG_RIP] = ptr_demangle(mc->gregs[REG_RIP]);
+        context = &c;
+      #elif defined(_CPU_ARM_)
+        // https://github.com/bminor/glibc/blame/master/sysdeps/arm/__longjmp.S
+        // https://github.com/bminor/glibc/blame/master/sysdeps/arm/include/bits/setjmp.h
+        // https://github.com/bminor/musl/blame/master/src/setjmp/arm/longjmp.S
+        mc->arm_sp = (*mctx)[0];
+        mc->arm_lr = (*mctx)[1];
+        mc->arm_r4 = (*mctx)[2]; // aka v1
+        mc->arm_r5 = (*mctx)[3]; // aka v2
+        mc->arm_r6 = (*mctx)[4]; // aka v3
+        mc->arm_r7 = (*mctx)[5]; // aka v4
+        mc->arm_r8 = (*mctx)[6]; // aka v5
+        mc->arm_r9 = (*mctx)[7]; // aka v6 aka sb
+        mc->arm_r10 = (*mctx)[8]; // aka v7 aka sl
+        mc->arm_fp = (*mctx)[10]; // aka v8 aka r11
+        // ifdef PTR_DEMANGLE ?
+        mc->arm_sp = ptr_demangle(mc->arm_sp);
+        mc->arm_lr = ptr_demangle(mc->arm_lr);
+        mc->arm_pc = mc->arm_lr;
+        context = &c;
+      #elif defined(_CPU_AARCH64_)
+        // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/__longjmp.S
+        // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/jmpbuf-offsets.h
+        // https://github.com/bminor/musl/blame/master/src/setjmp/aarch64/longjmp.s
+        // https://github.com/libunwind/libunwind/blob/ec171c9ba7ea3abb2a1383cee2988a7abd483a1f/src/aarch64/unwind_i.h#L62
+        unw_fpsimd_context_t *mcfp = (unw_fpsimd_context_t*)&mc->__reserved;
+        mc->regs[19] = (*mctx)[0];
+        mc->regs[20] = (*mctx)[1];
+        mc->regs[21] = (*mctx)[2];
+        mc->regs[22] = (*mctx)[3];
+        mc->regs[23] = (*mctx)[4];
+        mc->regs[24] = (*mctx)[5];
+        mc->regs[25] = (*mctx)[6];
+        mc->regs[26] = (*mctx)[7];
+        mc->regs[27] = (*mctx)[8];
+        mc->regs[28] = (*mctx)[9];
+        mc->regs[29] = (*mctx)[10]; // aka fp
+        mc->regs[30] = (*mctx)[11]; // aka lr
+        // Yes, they did skip 12 why writing the code originally; and, no, I do not know why.
+        mc->sp = (*mctx)[13];
+        mcfp->vregs[7] = (*mctx)[14]; // aka d8
+        mcfp->vregs[8] = (*mctx)[15]; // aka d9
+        mcfp->vregs[9] = (*mctx)[16]; // aka d10
+        mcfp->vregs[10] = (*mctx)[17]; // aka d11
+        mcfp->vregs[11] = (*mctx)[18]; // aka d12
+        mcfp->vregs[12] = (*mctx)[19]; // aka d13
+        mcfp->vregs[13] = (*mctx)[20]; // aka d14
+        mcfp->vregs[14] = (*mctx)[21]; // aka d15
+        // ifdef PTR_DEMANGLE ?
+        mc->sp = ptr_demangle(mc->sp);
+        mc->regs[30] = ptr_demangle(mc->regs[30]);
+        mc->pc = mc->regs[30];
+        context = &c;
+      #else
+       #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown linux")
+       (void)mc;
+       (void)c;
+       (void)mctx;
+      #endif
+     #elif defined(_OS_DARWIN_)
+        sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
+      #if defined(_CPU_X86_64_)
+        // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/x86_64/_setjmp.s
+        x86_thread_state64_t *mc = (x86_thread_state64_t*)&c;
+        mc->__rbx = ((uint64_t*)mctx)[0];
+        mc->__rbp = ((uint64_t*)mctx)[1];
+        mc->__rsp = ((uint64_t*)mctx)[2];
+        mc->__r12 = ((uint64_t*)mctx)[3];
+        mc->__r13 = ((uint64_t*)mctx)[4];
+        mc->__r14 = ((uint64_t*)mctx)[5];
+        mc->__r15 = ((uint64_t*)mctx)[6];
+        mc->__rip = ((uint64_t*)mctx)[7];
+        // added in libsystem_platform 177.200.16 (macOS Mojave 10.14.3)
+        // prior to that _os_ptr_munge_token was (hopefully) typically 0,
+        // so x ^ 0 == x and this is a no-op
+        mc->__rbp = _OS_PTR_UNMUNGE(mc->__rbp);
+        mc->__rsp = _OS_PTR_UNMUNGE(mc->__rsp);
+        mc->__rip = _OS_PTR_UNMUNGE(mc->__rip);
+        context = &c;
+      #elif defined(_CPU_AARCH64_)
+        // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/arm64/setjmp.s
+        // https://github.com/apple/darwin-xnu/blob/main/osfmk/mach/arm/_structs.h
+        // https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/libunwind/src/Registers.hpp -> Registers_arm64
+        arm_thread_state64_t *mc = (arm_thread_state64_t*)&c;
+        mc->__x[19] = ((uint64_t*)mctx)[0];
+        mc->__x[20] = ((uint64_t*)mctx)[1];
+        mc->__x[21] = ((uint64_t*)mctx)[2];
+        mc->__x[22] = ((uint64_t*)mctx)[3];
+        mc->__x[23] = ((uint64_t*)mctx)[4];
+        mc->__x[24] = ((uint64_t*)mctx)[5];
+        mc->__x[25] = ((uint64_t*)mctx)[6];
+        mc->__x[26] = ((uint64_t*)mctx)[7];
+        mc->__x[27] = ((uint64_t*)mctx)[8];
+        mc->__x[28] = ((uint64_t*)mctx)[9];
+        mc->__x[10] = ((uint64_t*)mctx)[10];
+        mc->__x[11] = ((uint64_t*)mctx)[11];
+        mc->__x[12] = ((uint64_t*)mctx)[12];
+        // 13 is reserved/unused
+        double *mcfp = (double*)&mc[1];
+        mcfp[7] = ((uint64_t*)mctx)[14]; // aka d8
+        mcfp[8] = ((uint64_t*)mctx)[15]; // aka d9
+        mcfp[9] = ((uint64_t*)mctx)[16]; // aka d10
+        mcfp[10] = ((uint64_t*)mctx)[17]; // aka d11
+        mcfp[11] = ((uint64_t*)mctx)[18]; // aka d12
+        mcfp[12] = ((uint64_t*)mctx)[19]; // aka d13
+        mcfp[13] = ((uint64_t*)mctx)[20]; // aka d14
+        mcfp[14] = ((uint64_t*)mctx)[21]; // aka d15
+        mc->__fp = _OS_PTR_UNMUNGE(mc->__x[10]);
+        mc->__lr = _OS_PTR_UNMUNGE(mc->__x[11]);
+        mc->__x[12] = _OS_PTR_UNMUNGE(mc->__x[12]);
+        mc->__sp = mc->__x[12];
+        // libunwind is broken for signed-pointers, but perhaps best not to leave the signed pointer lying around either
+        mc->__pc = ptrauth_strip(mc->__lr, 0);
+        mc->__pad = 0; // aka __ra_sign_state = not signed
+        context = &c;
+      #else
+       #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown darwin")
+        (void)mctx;
+        (void)c;
+      #endif
+     #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
+        sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
+        mcontext_t *mc = &c.uc_mcontext;
+        // https://github.com/freebsd/freebsd-src/blob/releng/13.1/lib/libc/amd64/gen/_setjmp.S
+        mc->mc_rip = ((long*)mctx)[0];
+        mc->mc_rbx = ((long*)mctx)[1];
+        mc->mc_rsp = ((long*)mctx)[2];
+        mc->mc_rbp = ((long*)mctx)[3];
+        mc->mc_r12 = ((long*)mctx)[4];
+        mc->mc_r13 = ((long*)mctx)[5];
+        mc->mc_r14 = ((long*)mctx)[6];
+        mc->mc_r15 = ((long*)mctx)[7];
+        context = &c;
+     #else
+      #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown system")
+      (void)c;
+     #endif
 #elif defined(JL_HAVE_ASYNCIFY)
- #pragma message("jl_rec_backtrace not defined for ASYNCIFY")
+     #pragma message("jl_rec_backtrace not defined for ASYNCIFY")
 #elif defined(JL_HAVE_SIGALTSTACK)
- #pragma message("jl_rec_backtrace not defined for SIGALTSTACK")
+     #pragma message("jl_rec_backtrace not defined for SIGALTSTACK")
 #else
- #pragma message("jl_rec_backtrace not defined for unknown task system")
+     #pragma message("jl_rec_backtrace not defined for unknown task system")
 #endif
+    }
     if (context)
-        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, context, t->gcstack);
+        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, context,  t->gcstack);
     if (old == -1)
         jl_atomic_store_relaxed(&t->tid, old);
+    else if (old != ptls->tid)
+        jl_thread_resume(old);
 }
 
 //--------------------------------------------------
@@ -1091,7 +1126,9 @@ JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT
         jl_print_bt_entry_codeloc(bt_data + i);
     }
 }
-JL_DLLEXPORT void jlbacktracet(jl_task_t *t)
+
+// Print backtrace for specified task to jl_safe_printf stderr
+JL_DLLEXPORT void jlbacktracet(jl_task_t *t) JL_NOTSAFEPOINT
 {
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
@@ -1108,6 +1145,69 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
     jlbacktrace();
 }
 
+extern int gc_first_tid;
+
+// Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr
+JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
+{
+    size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (size_t i = 0; i < nthreads; i++) {
+        // skip GC threads since they don't have tasks
+        if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
+            continue;
+        }
+        jl_ptls_t ptls2 = allstates[i];
+        if (ptls2 == NULL) {
+            continue;
+        }
+        small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
+        size_t n = mtarraylist_length(live_tasks);
+        int t_state = JL_TASK_STATE_DONE;
+        jl_task_t *t = ptls2->root_task;
+        if (t != NULL)
+            t_state = jl_atomic_load_relaxed(&t->_state);
+        jl_safe_printf("==== Thread %d created %zu live tasks\n",
+                ptls2->tid + 1, n + (t_state != JL_TASK_STATE_DONE));
+        if (show_done || t_state != JL_TASK_STATE_DONE) {
+            jl_safe_printf("     ---- Root task (%p)\n", ptls2->root_task);
+            if (t != NULL) {
+                jl_safe_printf("          (sticky: %d, started: %d, state: %d, tid: %d)\n",
+                        t->sticky, t->started, t_state,
+                        jl_atomic_load_relaxed(&t->tid) + 1);
+                if (t->stkbuf != NULL) {
+                    jlbacktracet(t);
+                }
+                else {
+                    jl_safe_printf("      no stack\n");
+                }
+            }
+            jl_safe_printf("     ---- End root task\n");
+        }
+
+        for (size_t j = 0; j < n; j++) {
+            jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, j);
+            if (t == NULL)
+                continue;
+            int t_state = jl_atomic_load_relaxed(&t->_state);
+            if (!show_done && t_state == JL_TASK_STATE_DONE)
+                continue;
+            jl_safe_printf("     ---- Task %zu (%p)\n", j + 1, t);
+            // n.b. this information might not be consistent with the stack printing after it, since it could start running or change tid, etc.
+            jl_safe_printf("          (sticky: %d, started: %d, state: %d, tid: %d)\n",
+                    t->sticky, t->started, t_state,
+                    jl_atomic_load_relaxed(&t->tid) + 1);
+            if (t->stkbuf != NULL)
+                jlbacktracet(t);
+            else
+                jl_safe_printf("      no stack\n");
+            jl_safe_printf("     ---- End task %zu\n", j + 1);
+        }
+        jl_safe_printf("==== End thread %d\n", ptls2->tid + 1);
+    }
+    jl_safe_printf("==== Done\n");
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/staticdata.c b/src/staticdata.c
index 87bd3aa48b1db..8244c5a4373ce 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -3,33 +3,24 @@
 /*
   saving and restoring system images
 
-  This performs serialization and deserialization of in-memory data. The dump.c file is similar, but has less complete coverage:
-  dump.c has no knowledge of native code (and simply discards it), whereas this supports native code caching in .o files.
-  Duplication is avoided by elevating the .o-serialized versions of global variables and native-compiled functions to become
-  the authoritative source for such entities in the system image, with references to these objects appropriately inserted into
-  the (de)serialized version of Julia's internal data. This makes deserialization simple and fast: we only need to deal with
-  pointer relocation, registering with the garbage collector, and making note of special internal types. During serialization,
-  we also need to pay special attention to things like builtin functions, C-implemented types (those in jltypes.c), the metadata
-  for documentation, optimal layouts, integration with native system image generation, and preparing other preprocessing
-  directives.
-
-  dump.c has capabilities missing from this serializer, most notably the ability to handle external references. This is not needed
-  for system images as they are self-contained. However, it would be needed to support incremental compilation of packages.
+  This performs serialization and deserialization of system and package images. It creates and saves a compact binary
+  blob, making deserialization "simple" and fast: we "only" need to deal with uniquing, pointer relocation,
+  method root insertion, registering with the garbage collector, making note of special internal types, and
+  backedges/invalidation. Special objects include things like builtin functions, C-implemented types (those in jltypes.c),
+  the metadata for documentation, optimal layouts, integration with native system image generation, and preparing other
+  preprocessing directives.
 
   During serialization, the flow has several steps:
 
-  - step 1 inserts relevant items into `backref_table`, an `obj` => `id::Int` mapping. `id` is assigned by
-    order of insertion. This is effectively a recursive traversal, singling out items like pointers and symbols
-    that need restoration when the system image is loaded. This stage is implemented by `jl_serialize_value`
-    and its callees; while it would be simplest to use recursion, this risks stack overflow, so recursion is mimicked
+  - step 1 inserts relevant items into `serialization_order`, an `obj` => `id::Int` mapping. `id` is assigned by
+    order of insertion. This stage is implemented by `jl_queue_for_serialization` and its callees;
+    while it would be simplest to use recursion, this risks stack overflow, so recursion is mimicked
     using a work-queue managed by `jl_serialize_reachable`.
 
-    It's worth emphasizing that despite the name `jl_serialize_value`, the only goal of this stage is to
-    insert objects into `backref_table`. The entire system gets inserted, either directly or indirectly via
-    fields of other objects. Objects requiring pointer relocation or gc registration must be inserted directly.
-    In later stages, such objects get referenced by their `id`.
+    It's worth emphasizing that the only goal of this stage is to insert objects into `serialization_order`.
+    In later stages, such objects get written in order of `id`.
 
-  - step 2 (the biggest of four steps) takes all items in `backref_table` and actually serializes them ordered
+  - step 2 (the biggest of four steps) takes all items in `serialization_order` and actually serializes them ordered
     by `id`. The system is serialized into several distinct streams (see `jl_serializer_state`), a "main stream"
     (the `s` field) as well as parallel streams for writing specific categories of additional internal data (e.g.,
     global data invisible to codegen, as well as deserialization "touch-up" tables, see below). These different streams
@@ -46,22 +37,46 @@
     one of the corresponding categorical list, then `index = t << RELOC_TAG_OFFSET + i`. The simplest source for the
     details of this encoding can be found in the pair of functions `get_reloc_for_item` and `get_item_for_reloc`.
 
+    `uniquing` also holds the serialized location of external DataTypes, MethodInstances, and singletons
+    in the serialized blob (i.e., new-at-the-time-of-serialization specializations).
+
     Most of step 2 is handled by `jl_write_values`, followed by special handling of the dedicated parallel streams.
 
   - step 3 combines the different sections (fields of `jl_serializer_state`) into one
 
-  - step 4 writes the values of the hard-coded tagged items and `reinit_list`/`ccallable_list`
-
-The tables written to the serializer stream make deserialization fairly straightforward. Much of the "real work" is
-done by `get_item_for_reloc`.
+  - step 4 writes the values of the hard-coded tagged items and `ccallable_list`
+
+Much of the "real work" during deserialization is done by `get_item_for_reloc`. But a few items require specific
+attention:
+- uniquing: during deserialization, the target item (an "external" type or MethodInstance) must be checked against
+  the running system to see whether such an object already exists (i.e., whether some other previously-loaded package
+  or workload has created such types/MethodInstances previously) or whether it needs to be created de-novo.
+  In either case, all references at `location` must be updated to the one in the running system.
+    `new_dt_objs` is a hash set of newly allocated datatype-reachable objects
+- method root insertion: when new specializations generate new roots, these roots must be inserted into
+  method root tables
+- backedges & invalidation: external edges have to be checked against the running system and any invalidations executed.
+
+Encoding of a pointer:
+- in the location of the pointer, we initially write zero padding
+- for both relocs_list and gctags_list, we write loc/backrefid (for gctags_list this is handled by the caller of write_gctaggedfield,
+  for relocs_list it's handled by write_pointerfield)
+- when writing to disk, both call get_reloc_for_item, and its return value (subject to modification by gc bits)
+  ends up being written into the data stream (s->s), and the data stream's position written to s->relocs
+
+External links:
+- location holds the offset
+- loc/0 in relocs_list
 
 */
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h> // printf
+#include <inttypes.h> // PRIxPTR
 
 #include "julia.h"
 #include "julia_internal.h"
+#include "julia_gcext.h"
 #include "builtin_proto.h"
 #include "processor.h"
 #include "serialize.h"
@@ -73,6 +88,9 @@ done by `get_item_for_reloc`.
 #include "valgrind.h"
 #include "julia_assert.h"
 
+#include "staticdata_utils.c"
+#include "precompile_utils.c"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -80,7 +98,7 @@ extern "C" {
 // TODO: put WeakRefs on the weak_refs list during deserialization
 // TODO: handle finalizers
 
-#define NUM_TAGS    154
+#define NUM_TAGS    179
 
 // An array of references that need to be restored from the sysimg
 // This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C.
@@ -100,8 +118,8 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_slotnumber_type);
         INSERT_TAG(jl_simplevector_type);
         INSERT_TAG(jl_array_type);
-        INSERT_TAG(jl_typedslot_type);
         INSERT_TAG(jl_expr_type);
+        INSERT_TAG(jl_binding_type);
         INSERT_TAG(jl_globalref_type);
         INSERT_TAG(jl_string_type);
         INSERT_TAG(jl_module_type);
@@ -114,11 +132,13 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_gotonode_type);
         INSERT_TAG(jl_quotenode_type);
         INSERT_TAG(jl_gotoifnot_type);
+        INSERT_TAG(jl_enternode_type);
         INSERT_TAG(jl_argument_type);
         INSERT_TAG(jl_returnnode_type);
         INSERT_TAG(jl_const_type);
         INSERT_TAG(jl_partial_struct_type);
         INSERT_TAG(jl_partial_opaque_type);
+        INSERT_TAG(jl_interconditional_type);
         INSERT_TAG(jl_method_match_type);
         INSERT_TAG(jl_pinode_type);
         INSERT_TAG(jl_phinode_type);
@@ -145,7 +165,6 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_abstractstring_type);
         INSERT_TAG(jl_array_any_type);
         INSERT_TAG(jl_intrinsic_type);
-        INSERT_TAG(jl_abstractslot_type);
         INSERT_TAG(jl_methtable_type);
         INSERT_TAG(jl_typemap_level_type);
         INSERT_TAG(jl_typemap_entry_type);
@@ -158,6 +177,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_emptytuple_type);
         INSERT_TAG(jl_array_symbol_type);
         INSERT_TAG(jl_array_uint8_type);
+        INSERT_TAG(jl_array_uint32_type);
         INSERT_TAG(jl_array_int32_type);
         INSERT_TAG(jl_array_uint64_type);
         INSERT_TAG(jl_int32_type);
@@ -174,10 +194,23 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_float16_type);
         INSERT_TAG(jl_float32_type);
         INSERT_TAG(jl_float64_type);
+        INSERT_TAG(jl_bfloat16_type);
         INSERT_TAG(jl_floatingpoint_type);
         INSERT_TAG(jl_number_type);
         INSERT_TAG(jl_signed_type);
         INSERT_TAG(jl_pair_type);
+        INSERT_TAG(jl_genericmemory_type);
+        INSERT_TAG(jl_memory_any_type);
+        INSERT_TAG(jl_memory_uint8_type);
+        INSERT_TAG(jl_memory_uint16_type);
+        INSERT_TAG(jl_memory_uint32_type);
+        INSERT_TAG(jl_memory_uint64_type);
+        INSERT_TAG(jl_genericmemoryref_type);
+        INSERT_TAG(jl_memoryref_any_type);
+        INSERT_TAG(jl_memoryref_uint8_type);
+        INSERT_TAG(jl_addrspace_type);
+        INSERT_TAG(jl_addrspace_typename);
+        INSERT_TAG(jl_addrspacecore_type);
 
         // special typenames
         INSERT_TAG(jl_tuple_typename);
@@ -188,6 +221,8 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_namedtuple_typename);
         INSERT_TAG(jl_vecelement_typename);
         INSERT_TAG(jl_opaque_closure_typename);
+        INSERT_TAG(jl_genericmemory_typename);
+        INSERT_TAG(jl_genericmemoryref_typename);
 
         // special exceptions
         INSERT_TAG(jl_errorexception_type);
@@ -205,6 +240,8 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_undefref_exception);
         INSERT_TAG(jl_readonlymemory_exception);
         INSERT_TAG(jl_atomicerror_type);
+        INSERT_TAG(jl_missingcodeerror_type);
+        INSERT_TAG(jl_precompilable_error);
 
         // other special values
         INSERT_TAG(jl_emptysvec);
@@ -213,6 +250,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_true);
         INSERT_TAG(jl_an_empty_string);
         INSERT_TAG(jl_an_empty_vec_any);
+        INSERT_TAG(jl_an_empty_memory_any);
         INSERT_TAG(jl_module_init_order);
         INSERT_TAG(jl_core_module);
         INSERT_TAG(jl_base_module);
@@ -221,6 +259,9 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_typeinf_func);
         INSERT_TAG(jl_type_type_mt);
         INSERT_TAG(jl_nonfunction_mt);
+        INSERT_TAG(jl_kwcall_mt);
+        INSERT_TAG(jl_kwcall_func);
+        INSERT_TAG(jl_opaque_closure_method);
 
         // some Core.Builtin Functions that we want to be able to reference:
         INSERT_TAG(jl_builtin_throw);
@@ -241,10 +282,11 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_builtin_modifyfield);
         INSERT_TAG(jl_builtin_replacefield);
         INSERT_TAG(jl_builtin_fieldtype);
-        INSERT_TAG(jl_builtin_arrayref);
-        INSERT_TAG(jl_builtin_const_arrayref);
-        INSERT_TAG(jl_builtin_arrayset);
-        INSERT_TAG(jl_builtin_arraysize);
+        INSERT_TAG(jl_builtin_memoryref);
+        INSERT_TAG(jl_builtin_memoryrefoffset);
+        INSERT_TAG(jl_builtin_memoryrefget);
+        INSERT_TAG(jl_builtin_memoryrefset);
+        INSERT_TAG(jl_builtin_memoryref_isassigned);
         INSERT_TAG(jl_builtin_apply_type);
         INSERT_TAG(jl_builtin_applicable);
         INSERT_TAG(jl_builtin_invoke);
@@ -252,17 +294,12 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_builtin_ifelse);
         INSERT_TAG(jl_builtin__typebody);
         INSERT_TAG(jl_builtin_donotdelete);
+        INSERT_TAG(jl_builtin_compilerbarrier);
         INSERT_TAG(jl_builtin_getglobal);
         INSERT_TAG(jl_builtin_setglobal);
         // n.b. must update NUM_TAGS when you add something here
-
-        // All optional tags must be placed at the end, so that we
-        // don't accidentally have a `NULL` in the middle
-#ifdef SEGV_EXCEPTION
-        INSERT_TAG(jl_segv_exception);
-#endif
 #undef INSERT_TAG
-        assert(i >= (NUM_TAGS-2) && i < NUM_TAGS);
+        assert(i == NUM_TAGS - 1);
     }
     return (jl_value_t**const*const) _tags;
 }
@@ -273,23 +310,142 @@ static uintptr_t nsym_tag;
 // array of definitions for the predefined tagged object types
 // (reverse of symbol_table)
 static arraylist_t deser_sym;
-
-// table of all objects that are serialized
-static htable_t backref_table;
-static int backref_table_numel;
-static arraylist_t layout_table;     // cache of `position(s)` for each `id` in `backref_table`
+// Predefined tags that do not have special handling in `externally_linked`
+static htable_t external_objects;
+
+static htable_t serialization_order; // to break cycles, mark all objects that are serialized
+static htable_t unique_ready; // as we serialize types, we need to know if all reachable objects are also already serialized. This tracks whether `immediate` has been set for all of them.
+static htable_t nullptrs;
+// FIFO queue for objects to be serialized. Anything requiring fixup upon deserialization
+// must be "toplevel" in this queue. For types, parameters and field types must appear
+// before the "wrapper" type so they can be properly recached against the running system.
+static arraylist_t serialization_queue;
+static arraylist_t layout_table;     // cache of `position(s)` for each `id` in `serialization_order`
 static arraylist_t object_worklist;  // used to mimic recursion by jl_serialize_reachable
 
-// Both `reinit_list` and `ccallable_list` are lists of (size_t pos, code) entries
-// for the serializer to mark values in need of rework during deserialization
-// codes:
-//   1: typename   (reinit_list)
-//   2: module     (reinit_list)
-//   3: method     (ccallable_list)
-static arraylist_t reinit_list;
+// Permanent list of void* (begin, end+1) pairs of system/package images we've loaded previously
+// together with their module build_ids (used for external linkage)
+// jl_linkage_blobs.items[2i:2i+1] correspond to build_ids[i]   (0-offset indexing)
+arraylist_t jl_linkage_blobs;
+arraylist_t jl_image_relocs;
+
+// Eytzinger tree of images. Used for very fast jl_object_in_image queries
+// See https://algorithmica.org/en/eytzinger
+arraylist_t eytzinger_image_tree;
+arraylist_t eytzinger_idxs;
+static uintptr_t img_min;
+static uintptr_t img_max;
+
+static int ptr_cmp(const void *l, const void *r)
+{
+    uintptr_t left = *(const uintptr_t*)l;
+    uintptr_t right = *(const uintptr_t*)r;
+    return (left > right) - (left < right);
+}
+
+// Build an eytzinger tree from a sorted array
+static int eytzinger(uintptr_t *src, uintptr_t *dest, size_t i, size_t k, size_t n)
+{
+    if (k <= n) {
+        i = eytzinger(src, dest, i, 2 * k, n);
+        dest[k-1] = src[i];
+        i++;
+        i = eytzinger(src, dest, i, 2 * k + 1, n);
+    }
+    return i;
+}
+
+static size_t eyt_obj_idx(jl_value_t *obj) JL_NOTSAFEPOINT
+{
+    size_t n = eytzinger_image_tree.len - 1;
+    if (n == 0)
+        return n;
+    assert(n % 2 == 0 && "Eytzinger tree not even length!");
+    uintptr_t cmp = (uintptr_t) obj;
+    if (cmp <= img_min || cmp > img_max)
+        return n;
+    uintptr_t *tree = (uintptr_t*)eytzinger_image_tree.items;
+    size_t k = 1;
+    // note that k preserves the history of how we got to the current node
+    while (k <= n) {
+        int greater = (cmp > tree[k - 1]);
+        k <<= 1;
+        k |= greater;
+    }
+    // Free to assume k is nonzero, since we start with k = 1
+    // and cmp > gc_img_min
+    // This shift does a fast revert of the path until we get
+    // to a node that evaluated less than cmp.
+    k >>= (__builtin_ctzll(k) + 1);
+    assert(k != 0);
+    assert(k <= n && "Eytzinger tree index out of bounds!");
+    assert(tree[k - 1] < cmp && "Failed to find lower bound for object!");
+    return k - 1;
+}
+
+//used in staticdata.c after we add an image
+void rebuild_image_blob_tree(void)
+{
+    size_t inc = 1 + jl_linkage_blobs.len - eytzinger_image_tree.len;
+    assert(eytzinger_idxs.len == eytzinger_image_tree.len);
+    assert(eytzinger_idxs.max == eytzinger_image_tree.max);
+    arraylist_grow(&eytzinger_idxs, inc);
+    arraylist_grow(&eytzinger_image_tree, inc);
+    eytzinger_idxs.items[eytzinger_idxs.len - 1] = (void*)jl_linkage_blobs.len;
+    eytzinger_image_tree.items[eytzinger_image_tree.len - 1] = (void*)1; // outside image
+    for (size_t i = 0; i < jl_linkage_blobs.len; i++) {
+        assert((uintptr_t) jl_linkage_blobs.items[i] % 4 == 0 && "Linkage blob not 4-byte aligned!");
+        // We abuse the pointer here a little so that a couple of properties are true:
+        // 1. a start and an end are never the same value. This simplifies the binary search.
+        // 2. ends are always after starts. This also simplifies the binary search.
+        // We assume that there exist no 0-size blobs, but that's a safe assumption
+        // since it means nothing could be there anyways
+        uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
+        eytzinger_idxs.items[i] = (void*)(val + (i & 1));
+    }
+    qsort(eytzinger_idxs.items, eytzinger_idxs.len - 1, sizeof(void*), ptr_cmp);
+    img_min = (uintptr_t) eytzinger_idxs.items[0];
+    img_max = (uintptr_t) eytzinger_idxs.items[eytzinger_idxs.len - 2] + 1;
+    eytzinger((uintptr_t*)eytzinger_idxs.items, (uintptr_t*)eytzinger_image_tree.items, 0, 1, eytzinger_idxs.len - 1);
+    // Reuse the scratch memory to store the indices
+    // Still O(nlogn) because binary search
+    for (size_t i = 0; i < jl_linkage_blobs.len; i ++) {
+        uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
+        // This is the same computation as in the prior for loop
+        uintptr_t eyt_val = val + (i & 1);
+        size_t eyt_idx = eyt_obj_idx((jl_value_t*)(eyt_val + 1)); assert(eyt_idx < eytzinger_idxs.len - 1);
+        assert(eytzinger_image_tree.items[eyt_idx] == (void*)eyt_val && "Eytzinger tree failed to find object!");
+        if (i & 1)
+            eytzinger_idxs.items[eyt_idx] = (void*)n_linkage_blobs();
+        else
+            eytzinger_idxs.items[eyt_idx] = (void*)(i / 2);
+    }
+}
+
+static int eyt_obj_in_img(jl_value_t *obj) JL_NOTSAFEPOINT
+{
+    assert((uintptr_t) obj % 4 == 0 && "Object not 4-byte aligned!");
+    int idx = eyt_obj_idx(obj);
+    // Now we use a tiny trick: tree[idx] & 1 is whether or not tree[idx] is a
+    // start (0) or an end (1) of a blob. If it's a start, then the object is
+    // in the image, otherwise it is not.
+    int in_image = ((uintptr_t)eytzinger_image_tree.items[idx] & 1) == 0;
+    return in_image;
+}
+
+size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
+{
+    assert((uintptr_t) v % 4 == 0 && "Object not 4-byte aligned!");
+    int eyt_idx = eyt_obj_idx(v);
+    // We fill the invalid slots with the length, so we can just return that
+    size_t idx = (size_t) eytzinger_idxs.items[eyt_idx];
+    return idx;
+}
 
-// @ccallable entry points to install
-static arraylist_t ccallable_list;
+uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
+{
+    return eyt_obj_in_img(obj);
+}
 
 // hash of definitions for predefined function pointers
 static htable_t fptr_to_id;
@@ -305,46 +461,75 @@ static const jl_fptr_args_t id_to_fptrs[] = {
     &jl_f_throw, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa,
     &jl_f_typeassert, &jl_f__apply_iterate, &jl_f__apply_pure,
     &jl_f__call_latest, &jl_f__call_in_world, &jl_f__call_in_world_total, &jl_f_isdefined,
-    &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call, &jl_f_invoke_kwsorter,
+    &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call,
     &jl_f_getfield, &jl_f_setfield, &jl_f_swapfield, &jl_f_modifyfield,
-    &jl_f_replacefield, &jl_f_fieldtype, &jl_f_nfields,
-    &jl_f_arrayref, &jl_f_const_arrayref, &jl_f_arrayset, &jl_f_arraysize, &jl_f_apply_type,
+    &jl_f_replacefield, &jl_f_fieldtype, &jl_f_nfields, &jl_f_apply_type,
+    &jl_f_memoryref, &jl_f_memoryrefoffset, &jl_f_memoryrefget, &jl_f_memoryrefset, &jl_f_memoryref_isassigned,
     &jl_f_applicable, &jl_f_invoke, &jl_f_sizeof, &jl_f__expr, &jl_f__typevar,
     &jl_f_ifelse, &jl_f__structtype, &jl_f__abstracttype, &jl_f__primitivetype,
     &jl_f__typebody, &jl_f__setsuper, &jl_f__equiv_typedef, &jl_f_get_binding_type,
-    &jl_f_set_binding_type, &jl_f_opaque_closure_call, &jl_f_donotdelete,
-    &jl_f_getglobal, &jl_f_setglobal, &jl_f_finalizer,
+    &jl_f_set_binding_type, &jl_f_opaque_closure_call, &jl_f_donotdelete, &jl_f_compilerbarrier,
+    &jl_f_getglobal, &jl_f_setglobal, &jl_f_finalizer, &jl_f__compute_sparams, &jl_f__svec_ref,
+    &jl_f_current_scope,
     NULL };
 
 typedef struct {
     ios_t *s;                   // the main stream
-    ios_t *const_data;          // codegen-invisible internal data (e.g., datatype layouts, list-like typename fields, foreign types, internal arrays)
+    ios_t *const_data;          // GC-invisible internal data (e.g., datatype layouts, list-like typename fields, foreign types, internal arrays)
     ios_t *symbols;             // names (char*) of symbols (some may be referenced by pointer in generated code)
     ios_t *relocs;              // for (de)serializing relocs_list and gctags_list
     ios_t *gvar_record;         // serialized array mapping gvid => spos
     ios_t *fptr_record;         // serialized array mapping fptrid => spos
+    arraylist_t memowner_list;  // a list of memory locations that have shared owners
+    arraylist_t memref_list;    // a list of memoryref locations
     arraylist_t relocs_list;    // a list of (location, target) pairs, see description at top
     arraylist_t gctags_list;    //      "
+    arraylist_t uniquing_types; // a list of locations that reference types that must be de-duplicated
+    arraylist_t uniquing_objs;  // a list of locations that reference non-types that must be de-duplicated
+    arraylist_t fixup_types;    // a list of locations of types requiring (re)caching
+    arraylist_t fixup_objs;     // a list of locations of objects requiring (re)caching
+    arraylist_t ccallable_list; // @ccallable entry points to install
+    // mapping from a buildid_idx to a depmods_idx
+    jl_array_t *buildid_depmods_idxs;
+    // record of build_ids for all external linkages, in order of serialization for the current sysimg/pkgimg
+    // conceptually, the base pointer for the jth externally-linked item is determined from
+    //     i = findfirst(==(link_ids[j]), build_ids)
+    //     blob_base = jl_linkage_blobs.items[2i]                     # 0-offset indexing
+    // We need separate lists since they are intermingled at creation but split when written.
+    jl_array_t *link_ids_relocs;
+    jl_array_t *link_ids_gctags;
+    jl_array_t *link_ids_gvars;
+    jl_array_t *link_ids_external_fnvars;
     jl_ptls_t ptls;
+    htable_t callers_with_edges;
+    jl_image_t *image;
+    int8_t incremental;
 } jl_serializer_state;
 
-static jl_value_t *jl_idtable_type = NULL;
-static jl_typename_t *jl_idtable_typename = NULL;
 static jl_value_t *jl_bigint_type = NULL;
 static int gmp_limb_size = 0;
-
 static jl_sym_t *jl_docmeta_sym = NULL;
 
+#ifdef _P64
+#define RELOC_TAG_OFFSET 61
+#define DEPS_IDX_OFFSET 40    // only on 64-bit can we encode the dependency-index as part of the tagged reloc
+#else
+// this supports up to 8 RefTags, 512MB of pointer data, and 4/2 (64/32-bit) GB of constant data.
+#define RELOC_TAG_OFFSET 29
+#define DEPS_IDX_OFFSET RELOC_TAG_OFFSET
+#endif
+
+
 // Tags of category `t` are located at offsets `t << RELOC_TAG_OFFSET`
 // Consequently there is room for 2^RELOC_TAG_OFFSET pointers, etc
 enum RefTags {
-    DataRef,           // mutable data
-    ConstDataRef,      // constant data (e.g., layouts)
-    TagRef,            // items serialized via their tags
-    SymbolRef,         // symbols
-    BindingRef,        // module bindings
-    FunctionRef,       // generic functions
-    BuiltinFunctionRef // builtin functions
+    DataRef,            // mutable data
+    ConstDataRef,       // constant data (e.g., layouts)
+    TagRef,             // items serialized via their tags
+    SymbolRef,          // symbols
+    FunctionRef,        // functions
+    SysimageLinkage,    // reference to the sysimage (from pkgimage)
+    ExternalLinkage     // reference to some other pkgimage
 };
 
 // calling conventions for internal entry points.
@@ -359,22 +544,44 @@ typedef enum {
     JL_API_MAX
 } jl_callingconv_t;
 
+// Sub-divisions of some RefTags
+const uintptr_t BuiltinFunctionTag = ((uintptr_t)1 << (RELOC_TAG_OFFSET - 1));
 
-// this supports up to 8 RefTags, 512MB of pointer data, and 4/2 (64/32-bit) GB of constant data.
-// if a larger size is required, will need to add support for writing larger relocations in many cases below
-#define RELOC_TAG_OFFSET 29
 
-// --- Static Compile ---
+#if RELOC_TAG_OFFSET <= 32
+typedef uint32_t reloc_t;
+#else
+typedef uint64_t reloc_t;
+#endif
+static void write_reloc_t(ios_t *s, uintptr_t reloc_id) JL_NOTSAFEPOINT
+{
+    if (sizeof(reloc_t) <= sizeof(uint32_t)) {
+        assert(reloc_id < UINT32_MAX);
+        write_uint32(s, reloc_id);
+    }
+    else {
+        write_uint64(s, reloc_id);
+    }
+}
+
+// Reporting to PkgCacheInspector
+typedef struct {
+    size_t sysdata;
+    size_t isbitsdata;
+    size_t symboldata;
+    size_t tagslist;
+    size_t reloclist;
+    size_t gvarlist;
+    size_t fptrlist;
+} pkgcachesizes;
 
+// --- Static Compile ---
 static void *jl_sysimg_handle = NULL;
-static uint64_t sysimage_base = 0;
-static uintptr_t *sysimg_gvars_base = NULL;
-static const int32_t *sysimg_gvars_offsets = NULL;
-static jl_sysimg_fptrs_t sysimg_fptrs;
+static jl_image_t sysimage;
 
-static inline uintptr_t *sysimg_gvars(uintptr_t *base, size_t idx)
+static inline uintptr_t *sysimg_gvars(uintptr_t *base, const int32_t *offsets, size_t idx)
 {
-    return base + sysimg_gvars_offsets[idx] / sizeof(base[0]);
+    return base + offsets[idx] / sizeof(base[0]);
 }
 
 JL_DLLEXPORT int jl_running_on_valgrind(void)
@@ -382,212 +589,375 @@ JL_DLLEXPORT int jl_running_on_valgrind(void)
     return RUNNING_ON_VALGRIND;
 }
 
+void *system_image_data_unavailable;
+extern void * JL_WEAK_SYMBOL_OR_ALIAS_DEFAULT(system_image_data_unavailable) jl_system_image_data;
+extern void * JL_WEAK_SYMBOL_OR_ALIAS_DEFAULT(system_image_data_unavailable) jl_system_image_size;
 static void jl_load_sysimg_so(void)
 {
     int imaging_mode = jl_generating_output() && !jl_options.incremental;
     // in --build mode only use sysimg data, not precompiled native code
     if (!imaging_mode && jl_options.use_sysimage_native_code==JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES) {
-        jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_base", (void **)&sysimg_gvars_base, 1);
-        jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimg_gvars_offsets, 1);
-        sysimg_gvars_offsets += 1;
-        assert(sysimg_fptrs.base);
-
-        void *pgcstack_func_slot;
-        jl_dlsym(jl_sysimg_handle, "jl_pgcstack_func_slot", &pgcstack_func_slot, 1);
-        void *pgcstack_key_slot;
-        jl_dlsym(jl_sysimg_handle, "jl_pgcstack_key_slot", &pgcstack_key_slot, 1);
-        jl_pgcstack_getkey((jl_get_pgcstack_func**)pgcstack_func_slot, (jl_pgcstack_key_t*)pgcstack_key_slot);
-
-        size_t *tls_offset_idx;
-        jl_dlsym(jl_sysimg_handle, "jl_tls_offset", (void **)&tls_offset_idx, 1);
-        *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset);
-
-#ifdef _OS_WINDOWS_
-        sysimage_base = (intptr_t)jl_sysimg_handle;
-#else
-        Dl_info dlinfo;
-        if (dladdr((void*)sysimg_gvars_base, &dlinfo) != 0) {
-            sysimage_base = (intptr_t)dlinfo.dli_fbase;
-        }
-        else {
-            sysimage_base = 0;
-        }
-#endif
+        assert(sysimage.fptrs.base);
     }
     else {
-        memset(&sysimg_fptrs, 0, sizeof(sysimg_fptrs));
+        memset(&sysimage.fptrs, 0, sizeof(sysimage.fptrs));
     }
     const char *sysimg_data;
-    jl_dlsym(jl_sysimg_handle, "jl_system_image_data", (void **)&sysimg_data, 1);
+    if (jl_sysimg_handle == jl_exe_handle &&
+            &jl_system_image_data != JL_WEAK_SYMBOL_DEFAULT(system_image_data_unavailable))
+        sysimg_data = (const char*)&jl_system_image_data;
+    else
+        jl_dlsym(jl_sysimg_handle, "jl_system_image_data", (void **)&sysimg_data, 1);
     size_t *plen;
-    jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1);
+    if (jl_sysimg_handle == jl_exe_handle &&
+            &jl_system_image_size != JL_WEAK_SYMBOL_DEFAULT(system_image_data_unavailable))
+        plen = (size_t *)&jl_system_image_size;
+    else
+        jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1);
     jl_restore_system_image_data(sysimg_data, *plen);
 }
 
 
 // --- serializer ---
 
-static uintptr_t jl_fptr_id(void *fptr)
+#define NBOX_C 1024
+
+static int jl_needs_serialization(jl_serializer_state *s, jl_value_t *v) JL_NOTSAFEPOINT
 {
-    void **pbp = ptrhash_bp(&fptr_to_id, fptr);
-    if (*pbp == HT_NOTFOUND || fptr == NULL)
+    // ignore items that are given a special relocation representation
+    if (s->incremental && jl_object_in_image(v))
         return 0;
-    else
-        return *(uintptr_t*)pbp;
+
+    if (v == NULL || jl_is_symbol(v) || v == jl_nothing) {
+        return 0;
+    }
+    else if (jl_typetagis(v, jl_int64_tag << 4)) {
+        int64_t i64 = *(int64_t*)v + NBOX_C / 2;
+        if ((uint64_t)i64 < NBOX_C)
+            return 0;
+    }
+    else if (jl_typetagis(v, jl_int32_tag << 4)) {
+        int32_t i32 = *(int32_t*)v + NBOX_C / 2;
+        if ((uint32_t)i32 < NBOX_C)
+            return 0;
+    }
+    else if (jl_typetagis(v, jl_uint8_tag << 4)) {
+        return 0;
+    }
+    else if (v == (jl_value_t*)s->ptls->root_task) {
+        return 0;
+    }
+
+    return 1;
 }
 
-#define jl_serialize_value(s, v) jl_serialize_value_(s,(jl_value_t*)(v),1)
-static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int recursive);
 
+static int caching_tag(jl_value_t *v) JL_NOTSAFEPOINT
+{
+    if (jl_is_method_instance(v)) {
+        jl_method_instance_t *mi = (jl_method_instance_t*)v;
+        jl_value_t *m = mi->def.value;
+        if (jl_is_method(m) && jl_object_in_image(m))
+            return 1 + type_in_worklist(mi->specTypes);
+    }
+    if (jl_is_datatype(v)) {
+        jl_datatype_t *dt = (jl_datatype_t*)v;
+        if (jl_is_tuple_type(dt) ? !dt->isconcretetype : dt->hasfreetypevars)
+            return 0; // aka !is_cacheable from jltypes.c
+        if (jl_object_in_image((jl_value_t*)dt->name))
+            return 1 + type_in_worklist(v);
+    }
+    jl_value_t *dtv = jl_typeof(v);
+    if (jl_is_datatype_singleton((jl_datatype_t*)dtv)) {
+        return 1 - type_in_worklist(dtv); // these are already recached in the datatype in the image
+    }
+    return 0;
+}
 
-static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m)
+static int needs_recaching(jl_value_t *v) JL_NOTSAFEPOINT
 {
-    jl_serialize_value(s, m->name);
-    jl_serialize_value(s, m->parent);
-    size_t i;
-    void **table = m->bindings.table;
-    for (i = 0; i < m->bindings.size; i += 2) {
-        if (table[i+1] != HT_NOTFOUND) {
-            jl_serialize_value(s, (jl_value_t*)table[i]);
-            jl_binding_t *b = (jl_binding_t*)table[i+1];
-            jl_serialize_value(s, b->name);
-            if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata)
-                jl_serialize_value(s, jl_nothing);
-            else
-                jl_serialize_value(s, jl_atomic_load_relaxed(&b->value));
-            jl_serialize_value(s, jl_atomic_load_relaxed(&b->globalref));
-            jl_serialize_value(s, b->owner);
-            jl_serialize_value(s, jl_atomic_load_relaxed(&b->ty));
-        }
-    }
+    return caching_tag(v) == 2;
+}
 
-    for (i = 0; i < m->usings.len; i++) {
-        jl_serialize_value(s, (jl_value_t*)m->usings.items[i]);
-    }
+static int needs_uniquing(jl_value_t *v) JL_NOTSAFEPOINT
+{
+    assert(!jl_object_in_image(v));
+    return caching_tag(v) == 1;
+}
+
+static void record_field_change(jl_value_t **addr, jl_value_t *newval) JL_NOTSAFEPOINT
+{
+    ptrhash_put(&field_replace, (void*)addr, newval);
 }
 
-static jl_value_t *get_replaceable_field(jl_value_t **addr)
+static jl_value_t *get_replaceable_field(jl_value_t **addr, int mutabl) JL_GC_DISABLED
 {
     jl_value_t *fld = (jl_value_t*)ptrhash_get(&field_replace, addr);
-    if (fld == HT_NOTFOUND)
-        return *addr;
+    if (fld == HT_NOTFOUND) {
+        fld = *addr;
+        if (mutabl && fld && jl_is_cpointer_type(jl_typeof(fld)) && jl_unbox_voidpointer(fld) != NULL && jl_unbox_voidpointer(fld) != (void*)(uintptr_t)-1) {
+            void **nullval = ptrhash_bp(&nullptrs, (void*)jl_typeof(fld));
+            if (*nullval == HT_NOTFOUND) {
+                void *C_NULL = NULL;
+                *nullval = (void*)jl_new_bits(jl_typeof(fld), &C_NULL);
+            }
+            fld = (jl_value_t*)*nullval;
+        }
+        return fld;
+    }
     return fld;
 }
 
-#define NBOX_C 1024
+static uintptr_t jl_fptr_id(void *fptr)
+{
+    void **pbp = ptrhash_bp(&fptr_to_id, fptr);
+    if (*pbp == HT_NOTFOUND || fptr == NULL)
+        return 0;
+    else
+        return *(uintptr_t*)pbp;
+}
+
+// `jl_queue_for_serialization` adds items to `serialization_order`
+#define jl_queue_for_serialization(s, v) jl_queue_for_serialization_((s), (jl_value_t*)(v), 1, 0)
+static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED;
+
 
-static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int recursive)
+static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_t *m) JL_GC_DISABLED
 {
-    // ignore items that are given a special representation
-    if (v == NULL || jl_is_symbol(v) || v == jl_nothing) {
-        return;
-    }
-    else if (jl_typeis(v, jl_task_type)) {
-        if (v == (jl_value_t*)s->ptls->root_task) {
-            jl_serialize_value(s, ((jl_task_t*)v)->tls);
-            return;
+    jl_queue_for_serialization(s, m->name);
+    jl_queue_for_serialization(s, m->parent);
+    jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindings));
+    jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindingkeyset));
+    if (jl_options.strip_metadata) {
+        jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
+        for (size_t i = 0; i < jl_svec_len(table); i++) {
+            jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+            if ((void*)b == jl_nothing)
+                break;
+            jl_sym_t *name = b->globalref->name;
+            if (name == jl_docmeta_sym && jl_atomic_load_relaxed(&b->value))
+                record_field_change((jl_value_t**)&b->value, jl_nothing);
         }
     }
-    else if (jl_typeis(v, jl_int64_type)) {
-        int64_t i64 = *(int64_t*)v + NBOX_C / 2;
-        if ((uint64_t)i64 < NBOX_C)
-            return;
-    }
-    else if (jl_typeis(v, jl_int32_type)) {
-        int32_t i32 = *(int32_t*)v + NBOX_C / 2;
-        if ((uint32_t)i32 < NBOX_C)
-            return;
-    }
-    else if (jl_typeis(v, jl_uint8_type)) {
-        return;
+
+    for (size_t i = 0; i < m->usings.len; i++) {
+        jl_queue_for_serialization(s, (jl_value_t*)m->usings.items[i]);
     }
-    arraylist_push(&object_worklist, (void*)((uintptr_t)v | recursive));
 }
 
-static void jl_serialize_value__(jl_serializer_state *s, jl_value_t *v, int recursive)
+// Anything that requires uniquing or fixing during deserialization needs to be "toplevel"
+// in serialization (i.e., have its own entry in `serialization_order`). Consequently,
+// objects that act as containers for other potentially-"problematic" objects must add such "children"
+// to the queue.
+// Most objects use preorder traversal. But things that need uniquing require postorder:
+// you want to handle uniquing of `Dict{String,Float64}` before you tackle `Vector{Dict{String,Float64}}`.
+// Uniquing is done in `serialization_order`, so the very first mention of such an object must
+// be the "source" rather than merely a cross-reference.
+static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED
 {
-    void **bp = ptrhash_bp(&backref_table, v);
-    if (*bp != HT_NOTFOUND) {
-        return;
+    jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
+    jl_queue_for_serialization_(s, (jl_value_t*)t, 1, immediate);
+
+    if (!recursive)
+        goto done_fields;
+
+    if (s->incremental && jl_is_datatype(v) && immediate) {
+        jl_datatype_t *dt = (jl_datatype_t*)v;
+        // ensure super is queued (though possibly not yet handled, since it may have cycles)
+        jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, 1);
+        // ensure all type parameters are recached
+        jl_queue_for_serialization_(s, (jl_value_t*)dt->parameters, 1, 1);
+        if (jl_is_datatype_singleton(dt) && needs_uniquing(dt->instance)) {
+            assert(jl_needs_serialization(s, dt->instance)); // should be true, since we visited dt
+            // do not visit dt->instance for our template object as it leads to unwanted cycles here
+            // (it may get serialized from elsewhere though)
+            record_field_change(&dt->instance, jl_nothing);
+        }
+        immediate = 0; // do not handle remaining fields immediately (just field types remains)
+    }
+    if (s->incremental && jl_is_method_instance(v)) {
+        jl_method_instance_t *mi = (jl_method_instance_t*)v;
+        jl_value_t *def = mi->def.value;
+        if (needs_uniquing(v)) {
+            // we only need 3 specific fields of this (the rest are not used)
+            jl_queue_for_serialization(s, mi->def.value);
+            jl_queue_for_serialization(s, mi->specTypes);
+            jl_queue_for_serialization(s, (jl_value_t*)mi->sparam_vals);
+            goto done_fields;
+        }
+        else if (jl_is_method(def) && jl_object_in_image(def)) {
+            // we only need 3 specific fields of this (the rest are restored afterward, if valid)
+            // in particular, cache is repopulated by jl_mi_cache_insert for all foreign function,
+            // so must not be present here
+            record_field_change((jl_value_t**)&mi->uninferred, NULL);
+            record_field_change((jl_value_t**)&mi->backedges, NULL);
+            record_field_change((jl_value_t**)&mi->callbacks, NULL);
+            record_field_change((jl_value_t**)&mi->cache, NULL);
+        }
+        else {
+            assert(!needs_recaching(v));
+        }
+        // n.b. opaque closures cannot be inspected and relied upon like a
+        // normal method since they can get improperly introduced by generated
+        // functions, so if they appeared at all, we will probably serialize
+        // them wrong and segfault. The jl_code_for_staged function should
+        // prevent this from happening, so we do not need to detect that user
+        // error now.
+    }
+    if (s->incremental && jl_is_globalref(v)) {
+        jl_globalref_t *gr = (jl_globalref_t*)v;
+        if (jl_object_in_image((jl_value_t*)gr->mod)) {
+            record_field_change((jl_value_t**)&gr->binding, NULL);
+        }
+    }
+    if (jl_is_typename(v)) {
+        jl_typename_t *tn = (jl_typename_t*)v;
+        // don't recurse into several fields (yet)
+        jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&tn->cache), 0, 1);
+        jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&tn->linearcache), 0, 1);
+        if (s->incremental) {
+            assert(!jl_object_in_image((jl_value_t*)tn->module));
+            assert(!jl_object_in_image((jl_value_t*)tn->wrapper));
+        }
+    }
+    if (s->incremental && jl_is_code_instance(v)) {
+        jl_code_instance_t *ci = (jl_code_instance_t*)v;
+        // make sure we don't serialize other reachable cache entries of foreign methods
+        if (jl_object_in_image((jl_value_t*)ci->def->def.value)) {
+            // TODO: if (ci in ci->defs->cache)
+            record_field_change((jl_value_t**)&ci->next, NULL);
+        }
     }
 
-    size_t item = ++backref_table_numel;
-    assert(item < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize");
-    char *pos = (char*)HT_NOTFOUND + item;
-    *bp = (void*)pos;
 
-    // some values have special representations
-    jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
-    jl_serialize_value(s, t);
+    if (immediate) // must be things that can be recursively handled, and valid as type parameters
+        assert(jl_is_immutable(t) || jl_is_typevar(v) || jl_is_symbol(v) || jl_is_svec(v));
 
-    if (t->layout->npointers == 0) {
-        // skip it
+    const jl_datatype_layout_t *layout = t->layout;
+    if (layout->npointers == 0) {
+        // bitstypes do not require recursion
     }
     else if (jl_is_svec(v)) {
-        if (!recursive)
-            return;
         size_t i, l = jl_svec_len(v);
         jl_value_t **data = jl_svec_data(v);
         for (i = 0; i < l; i++) {
-            jl_serialize_value(s, data[i]);
+            jl_queue_for_serialization_(s, data[i], 1, immediate);
         }
     }
     else if (jl_is_array(v)) {
         jl_array_t *ar = (jl_array_t*)v;
-        jl_serialize_value(s, jl_typeof(ar));
-        if (ar->flags.ptrarray) {
-            size_t i, l = jl_array_len(ar);
+        jl_value_t *mem = get_replaceable_field((jl_value_t**)&ar->ref.mem, 1);
+        jl_queue_for_serialization_(s, mem, 1, immediate);
+    }
+    else if (jl_is_genericmemory(v)) {
+        jl_genericmemory_t *m = (jl_genericmemory_t*)v;
+        const char *data = (const char*)m->ptr;
+        if (jl_genericmemory_how(m) == 3) {
+            jl_queue_for_serialization_(s, jl_genericmemory_data_owner_field(v), 1, immediate);
+        }
+        else if (layout->flags.arrayelem_isboxed) {
+            size_t i, l = m->length;
             for (i = 0; i < l; i++) {
-                jl_serialize_value(s, jl_array_ptr_ref(ar, i));
+                jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[i], 1);
+                jl_queue_for_serialization_(s, fld, 1, immediate);
             }
         }
-        else if (ar->flags.hasptr) {
-            const char *data = (const char*)jl_array_data(ar);
-            uint16_t elsz = ar->elsize;
-            size_t i, l = jl_array_len(ar);
-            jl_datatype_t *et = (jl_datatype_t*)jl_tparam0(jl_typeof(ar));
-            size_t j, np = et->layout->npointers;
+        else if (layout->first_ptr >= 0) {
+            uint16_t elsz = layout->size;
+            size_t i, l = m->length;
+            size_t j, np = layout->npointers;
             for (i = 0; i < l; i++) {
                 for (j = 0; j < np; j++) {
-                    uint32_t ptr = jl_ptr_offset(et, j);
-                    jl_value_t *fld = ((jl_value_t**)data)[ptr];
-                    JL_GC_PROMISE_ROOTED(fld);
-                    jl_serialize_value(s, fld);
+                    uint32_t ptr = jl_ptr_offset(t, j);
+                    jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], 1);
+                    jl_queue_for_serialization_(s, fld, 1, immediate);
                 }
                 data += elsz;
             }
         }
     }
-    else if (jl_typeis(v, jl_module_type)) {
-        jl_serialize_module(s, (jl_module_t*)v);
+    else if (jl_typetagis(v, jl_module_tag << 4)) {
+        jl_queue_module_for_serialization(s, (jl_module_t*)v);
     }
-    else if (jl_is_typename(v)) {
-        jl_typename_t *tn = (jl_typename_t*)v;
-        jl_serialize_value(s, tn->name);
-        jl_serialize_value(s, tn->module);
-        jl_serialize_value(s, tn->names);
-        jl_serialize_value(s, tn->wrapper);
-        jl_serialize_value(s, tn->Typeofwrapper);
-        jl_serialize_value_(s, (jl_value_t*)tn->cache, 0);
-        jl_serialize_value_(s, (jl_value_t*)tn->linearcache, 0);
-        jl_serialize_value(s, tn->mt);
-        jl_serialize_value(s, tn->partial);
-    }
-    else if (t->layout->nfields > 0) {
+    else if (layout->nfields > 0) {
         char *data = (char*)jl_data_ptr(v);
-        size_t i, np = t->layout->npointers;
+        size_t i, np = layout->npointers;
         for (i = 0; i < np; i++) {
             uint32_t ptr = jl_ptr_offset(t, i);
-            jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr]);
-            jl_serialize_value(s, fld);
+            int mutabl = t->name->mutabl;
+            if (jl_is_binding(v) && ((jl_binding_t*)v)->constp && i == 0) // value field depends on constp field
+                mutabl = 0;
+            jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], mutabl);
+            jl_queue_for_serialization_(s, fld, 1, immediate);
+        }
+    }
+
+done_fields: ;
+
+    // We've encountered an item we need to cache
+    void **bp = ptrhash_bp(&serialization_order, v);
+    assert(*bp != (void*)(uintptr_t)-1);
+    if (s->incremental) {
+        void **bp2 = ptrhash_bp(&unique_ready, v);
+        if (*bp2 == HT_NOTFOUND)
+            assert(*bp == (void*)(uintptr_t)-2);
+        else if (*bp != (void*)(uintptr_t)-2)
+            return;
+    }
+    else {
+        assert(*bp == (void*)(uintptr_t)-2);
+    }
+    arraylist_push(&serialization_queue, (void*) v);
+    size_t idx = serialization_queue.len - 1;
+    assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize");
+
+    *bp = (void*)((char*)HT_NOTFOUND + 1 + idx);
+}
+
+static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED
+{
+    if (!jl_needs_serialization(s, v))
+        return;
+
+    jl_value_t *t = jl_typeof(v);
+    // Items that require postorder traversal must visit their children prior to insertion into
+    // the worklist/serialization_order (and also before their first use)
+    if (s->incremental && !immediate) {
+        if (jl_is_datatype(t) && needs_uniquing(v))
+            immediate = 1;
+        if (jl_is_datatype_singleton((jl_datatype_t*)t) && needs_uniquing(v))
+            immediate = 1;
+    }
+
+    void **bp = ptrhash_bp(&serialization_order, v);
+    if (*bp == HT_NOTFOUND) {
+        *bp = (void*)(uintptr_t)(immediate ? -2 : -1);
+    }
+    else {
+        if (!s->incremental || !immediate || !recursive)
+            return;
+        void **bp2 = ptrhash_bp(&unique_ready, v);
+        if (*bp2 == HT_NOTFOUND)
+            *bp2 = v; // now is unique_ready
+        else {
+            assert(*bp != (void*)(uintptr_t)-1);
+            return; // already was unique_ready
         }
+        assert(*bp != (void*)(uintptr_t)-2); // should be unique_ready then
+        if (*bp == (void*)(uintptr_t)-1)
+            *bp = (void*)(uintptr_t)-2; // now immediate
     }
+
+    if (immediate)
+        jl_insert_into_serialization_queue(s, v, recursive, immediate);
+    else
+        arraylist_push(&object_worklist, (void*)v);
 }
 
 // Do a pre-order traversal of the to-serialize worklist, in the identical order
-// to the calls to jl_serialize_value would occur in a purely recursive
+// to the calls to jl_queue_for_serialization would occur in a purely recursive
 // implementation, but without potentially running out of stack.
-static void jl_serialize_reachable(jl_serializer_state *s)
+static void jl_serialize_reachable(jl_serializer_state *s) JL_GC_DISABLED
 {
     size_t i, prevlen = 0;
     while (object_worklist.len) {
@@ -600,10 +970,16 @@ static void jl_serialize_reachable(jl_serializer_state *s)
             object_worklist.items[j] = tmp;
         }
         prevlen = --object_worklist.len;
-        uintptr_t v = (uintptr_t)object_worklist.items[prevlen];
-        int recursive = v & 1;
-        v &= ~(uintptr_t)1; // untag v
-        jl_serialize_value__(s, (jl_value_t*)v, recursive);
+        jl_value_t *v = (jl_value_t*)object_worklist.items[prevlen];
+        void **bp = ptrhash_bp(&serialization_order, (void*)v);
+        assert(*bp != HT_NOTFOUND && *bp != (void*)(uintptr_t)-2);
+        if (*bp == (void*)(uintptr_t)-1) { // might have been eagerly handled for post-order while in the lazy pre-order queue
+            *bp = (void*)(uintptr_t)-2;
+            jl_insert_into_serialization_queue(s, v, 1, 0);
+        }
+        else {
+            assert(s->incremental);
+        }
     }
 }
 
@@ -617,20 +993,6 @@ static void ios_ensureroom(ios_t *s, size_t newsize) JL_NOTSAFEPOINT
     }
 }
 
-// Maybe encode a global variable. `gid` is the LLVM index, 0 if the object is not serialized
-// in the generated code (and thus not a gvar from that standpoint, maybe only stored in the internal-data sysimg).
-// `reloc_id` is the RefTags-encoded `target`.
-static void record_gvar(jl_serializer_state *s, int gid, uintptr_t reloc_id) JL_NOTSAFEPOINT
-{
-    if (gid == 0)
-        return;
-    ios_ensureroom(s->gvar_record, gid * sizeof(uint32_t));
-    ios_seek(s->gvar_record, (gid - 1) * sizeof(uint32_t));
-    assert(reloc_id < UINT32_MAX);
-    write_uint32(s->gvar_record, reloc_id);
-}
-
-
 static void write_padding(ios_t *s, size_t nb) JL_NOTSAFEPOINT
 {
     static const char zeros[16] = {0};
@@ -642,18 +1004,42 @@ static void write_padding(ios_t *s, size_t nb) JL_NOTSAFEPOINT
         ios_write(s, zeros, nb);
 }
 
-
 static void write_pointer(ios_t *s) JL_NOTSAFEPOINT
 {
     assert((ios_pos(s) & (sizeof(void*) - 1)) == 0 && "stream misaligned for writing a word-sized value");
-    write_padding(s, sizeof(void*));
+    write_uint(s, 0);
+}
+
+// Records the buildid holding `v` and returns the tagged offset within the corresponding image
+static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) {
+    size_t i = external_blob_index(v);
+    if (i < n_linkage_blobs()) {
+        // We found the sysimg/pkg that this item links against
+        // Compute the relocation code
+        size_t offset = (uintptr_t)v - (uintptr_t)jl_linkage_blobs.items[2*i];
+        offset /= sizeof(void*);
+        assert(offset < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "offset to external image too large");
+        assert(n_linkage_blobs() == jl_array_nrows(s->buildid_depmods_idxs));
+        size_t depsidx = jl_array_data(s->buildid_depmods_idxs, uint32_t)[i]; // map from build_id_idx -> deps_idx
+        assert(depsidx < INT32_MAX);
+        if (depsidx < ((uintptr_t)1 << (RELOC_TAG_OFFSET - DEPS_IDX_OFFSET)) && offset < ((uintptr_t)1 << DEPS_IDX_OFFSET))
+            // if it fits in a SysimageLinkage type, use that representation
+            return ((uintptr_t)SysimageLinkage << RELOC_TAG_OFFSET) + ((uintptr_t)depsidx << DEPS_IDX_OFFSET) + offset;
+        // otherwise, we store the image key in `link_ids`
+        assert(link_ids && jl_is_array(link_ids));
+        jl_array_grow_end(link_ids, 1);
+        uint32_t *link_id_data  = jl_array_data(link_ids, uint32_t);  // wait until after the `grow`
+        link_id_data[jl_array_nrows(link_ids) - 1] = depsidx;
+        return ((uintptr_t)ExternalLinkage << RELOC_TAG_OFFSET) + offset;
+    }
+    return 0;
 }
 
-// Return the integer `id` for `v`. Generically this is looked up in `backref_table`,
+// Return the integer `id` for `v`. Generically this is looked up in `serialization_order`,
 // but symbols, small integers, and a couple of special items (`nothing` and the root Task)
 // have special handling.
-#define backref_id(s, v) _backref_id(s, (jl_value_t*)(v))
-static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v) JL_NOTSAFEPOINT
+#define backref_id(s, v, link_ids) _backref_id(s, (jl_value_t*)(v), link_ids)
+static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) JL_NOTSAFEPOINT
 {
     assert(v != NULL && "cannot get backref to NULL object");
     void *idx = HT_NOTFOUND;
@@ -676,100 +1062,97 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v) JL_NOTSAFEPO
     else if (v == jl_nothing) {
         return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + 1;
     }
-    else if (jl_typeis(v, jl_int64_type)) {
+    else if (jl_typetagis(v, jl_int64_tag << 4)) {
         int64_t i64 = *(int64_t*)v + NBOX_C / 2;
         if ((uint64_t)i64 < NBOX_C)
             return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i64 + 2;
     }
-    else if (jl_typeis(v, jl_int32_type)) {
+    else if (jl_typetagis(v, jl_int32_tag << 4)) {
         int32_t i32 = *(int32_t*)v + NBOX_C / 2;
         if ((uint32_t)i32 < NBOX_C)
             return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i32 + 2 + NBOX_C;
     }
-    else if (jl_typeis(v, jl_uint8_type)) {
+    else if (jl_typetagis(v, jl_uint8_tag << 4)) {
         uint8_t u8 = *(uint8_t*)v;
         return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + u8 + 2 + NBOX_C + NBOX_C;
     }
+    if (s->incremental && jl_object_in_image(v)) {
+        assert(link_ids);
+        uintptr_t item = add_external_linkage(s, v, link_ids);
+        assert(item && "no external linkage identified");
+        return item;
+    }
     if (idx == HT_NOTFOUND) {
-        idx = ptrhash_get(&backref_table, v);
-        assert(idx != HT_NOTFOUND && "object missed during jl_serialize_value pass");
+        idx = ptrhash_get(&serialization_order, v);
+        if (idx == HT_NOTFOUND) {
+            jl_(jl_typeof(v));
+            jl_(v);
+        }
+        assert(idx != HT_NOTFOUND && "object missed during jl_queue_for_serialization pass");
+        assert(idx != (void*)(uintptr_t)-1 && "object missed during jl_insert_into_serialization_queue pass");
+        assert(idx != (void*)(uintptr_t)-2 && "object missed during jl_insert_into_serialization_queue pass");
     }
     return (char*)idx - 1 - (char*)HT_NOTFOUND;
 }
 
 
+static void record_uniquing(jl_serializer_state *s, jl_value_t *fld, uintptr_t offset) JL_NOTSAFEPOINT
+{
+    if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld)) {
+        if (jl_is_datatype(fld) || jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(fld)))
+            arraylist_push(&s->uniquing_types, (void*)(uintptr_t)offset);
+        else
+            arraylist_push(&s->uniquing_objs, (void*)(uintptr_t)offset);
+    }
+}
+
 // Save blank space in stream `s` for a pointer `fld`, storing both location and target
 // in `relocs_list`.
 static void write_pointerfield(jl_serializer_state *s, jl_value_t *fld) JL_NOTSAFEPOINT
 {
     if (fld != NULL) {
         arraylist_push(&s->relocs_list, (void*)(uintptr_t)ios_pos(s->s));
-        arraylist_push(&s->relocs_list, (void*)backref_id(s, fld));
+        arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs));
+        record_uniquing(s, fld, ios_pos(s->s));
     }
     write_pointer(s->s);
 }
 
 // Save blank space in stream `s` for a pointer `fld`, storing both location and target
 // in `gctags_list`.
-static void write_gctaggedfield(jl_serializer_state *s, uintptr_t ref) JL_NOTSAFEPOINT
+static void write_gctaggedfield(jl_serializer_state *s, jl_datatype_t *ref) JL_NOTSAFEPOINT
 {
+    // jl_printf(JL_STDOUT, "gctaggedfield: position %p, value 0x%lx\n", (void*)(uintptr_t)ios_pos(s->s), ref);
     arraylist_push(&s->gctags_list, (void*)(uintptr_t)ios_pos(s->s));
-    arraylist_push(&s->gctags_list, (void*)ref);
+    arraylist_push(&s->gctags_list, (void*)backref_id(s, ref, s->link_ids_gctags));
     write_pointer(s->s);
 }
 
+
 // Special handling from `jl_write_values` for modules
-static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t *m)
+static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t *m) JL_GC_DISABLED
 {
     size_t reloc_offset = ios_pos(s->s);
     size_t tot = sizeof(jl_module_t);
     ios_write(s->s, (char*)m, tot);     // raw memory dump of the `jl_module_t` structure
+    // will need to recreate the binding table for this
+    arraylist_push(&s->fixup_objs, (void*)reloc_offset);
 
     // Handle the fields requiring special attention
     jl_module_t *newm = (jl_module_t*)&s->s->buf[reloc_offset];
     newm->name = NULL;
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, name)));
-    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->name));
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->name, s->link_ids_relocs));
     newm->parent = NULL;
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, parent)));
-    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->parent));
-    newm->primary_world = jl_atomic_load_acquire(&jl_world_counter);
-
-    // write out the bindings table as a list
-    // immediately after jl_module_t
-    // (the ptrhash will need to be recreated on load)
-    size_t count = 0;
-    size_t i;
-    void **table = m->bindings.table;
-    for (i = 0; i < m->bindings.size; i += 2) {
-        if (table[i+1] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i+1];
-            write_pointerfield(s, (jl_value_t*)table[i]);
-            tot += sizeof(void*);
-            write_gctaggedfield(s, (uintptr_t)BindingRef << RELOC_TAG_OFFSET);
-            tot += sizeof(void*);
-            size_t binding_reloc_offset = ios_pos(s->s);
-            record_gvar(s, jl_get_llvm_gv(native_functions, (jl_value_t*)b),
-                    ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + binding_reloc_offset);
-            write_pointerfield(s, (jl_value_t*)b->name);
-            if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata)
-                write_pointerfield(s, jl_nothing);
-            else
-                write_pointerfield(s, jl_atomic_load_relaxed(&b->value));
-            write_pointerfield(s, jl_atomic_load_relaxed(&b->globalref));
-            write_pointerfield(s, (jl_value_t*)b->owner);
-            write_pointerfield(s, jl_atomic_load_relaxed(&b->ty));
-            size_t flag_offset = offsetof(jl_binding_t, ty) + sizeof(b->ty);
-            ios_write(s->s, (char*)b + flag_offset, sizeof(*b) - flag_offset);
-            tot += sizeof(jl_binding_t);
-            count += 1;
-        }
-    }
-    assert(ios_pos(s->s) - reloc_offset == tot);
-    newm = (jl_module_t*)&s->s->buf[reloc_offset]; // buf might have been reallocated
-    newm->bindings.size = count; // stash the count in newm->size
-    newm->bindings.table = NULL;
-    memset(&newm->bindings._space, 0, sizeof(newm->bindings._space));
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->parent, s->link_ids_relocs));
+    jl_atomic_store_relaxed(&newm->bindings, NULL);
+    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, bindings)));
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, jl_atomic_load_relaxed(&m->bindings), s->link_ids_relocs));
+    jl_atomic_store_relaxed(&newm->bindingkeyset, NULL);
+    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, bindingkeyset)));
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, jl_atomic_load_relaxed(&m->bindingkeyset), s->link_ids_relocs));
+    newm->primary_world = ~(size_t)0;
 
     // write out the usings list
     memset(&newm->usings._space, 0, sizeof(newm->usings._space));
@@ -780,7 +1163,7 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t
         size_t i;
         for (i = 0; i < m->usings.len; i++) {
             arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings._space[i])));
-            arraylist_push(&s->relocs_list, (void*)backref_id(s, m->usings._space[i]));
+            arraylist_push(&s->relocs_list, (void*)backref_id(s, m->usings._space[i], s->link_ids_relocs));
         }
     }
     else {
@@ -797,224 +1180,320 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t
             tot += sizeof(void*);
         }
     }
+    assert(ios_pos(s->s) - reloc_offset == tot);
+}
+
+static void record_memoryref(jl_serializer_state *s, size_t reloc_offset, jl_genericmemoryref_t ref) {
+    ios_t *f = s->s;
+    // make some header modifications in-place
+    jl_genericmemoryref_t *newref = (jl_genericmemoryref_t*)&f->buf[reloc_offset];
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(ref.mem))->layout;
+    if (!layout->flags.arrayelem_isunion && layout->size != 0) {
+        newref->ptr_or_offset = (void*)((char*)ref.ptr_or_offset - (char*)ref.mem->ptr); // relocation offset (bytes)
+        arraylist_push(&s->memref_list, (void*)reloc_offset); // relocation location
+        arraylist_push(&s->memref_list, NULL); // relocation target (ignored)
+    }
+}
+
+static void record_memoryrefs_inside(jl_serializer_state *s, jl_datatype_t *t, size_t reloc_offset, const char *data)
+{
+    assert(jl_is_datatype(t));
+    size_t i, nf = jl_datatype_nfields(t);
+    for (i = 0; i < nf; i++) {
+        size_t offset = jl_field_offset(t, i);
+        if (jl_field_isptr(t, i))
+            continue;
+        jl_value_t *ft = jl_field_type_concrete(t, i);
+        if (jl_is_uniontype(ft))
+            continue;
+        if (jl_is_genericmemoryref_type(ft))
+            record_memoryref(s, reloc_offset + offset, *(jl_genericmemoryref_t*)(data + offset));
+        else
+            record_memoryrefs_inside(s, (jl_datatype_t*)ft, reloc_offset + offset, data + offset);
+    }
 }
 
-#if 0
-static size_t jl_sort_size(jl_datatype_t *dt)
+static void record_gvars(jl_serializer_state *s, arraylist_t *globals) JL_NOTSAFEPOINT
 {
-    if (dt == jl_simplevector_type)
-        return SIZE_MAX - 5;
-    if (dt == jl_string_type)
-        return SIZE_MAX - 4;
-    if (dt->name == jl_array_typename)
-        return SIZE_MAX - 3;
-    if (dt == jl_datatype_type)
-        return SIZE_MAX - 2;
-    if (dt == jl_module_type)
-        return SIZE_MAX - 1;
-    return jl_datatype_size(dt);
+    for (size_t i = 0; i < globals->len; i++)
+        jl_queue_for_serialization(s, globals->items[i]);
 }
-#endif
 
-// Used by `qsort` to order `backref_table` by `id`
-static int sysimg_sort_order(const void *pa, const void *pb)
-{
-    uintptr_t sa = ((uintptr_t*)pa)[1];
-    uintptr_t sb = ((uintptr_t*)pb)[1];
-    return (sa > sb ? 1 : (sa < sb ? -1 : 0));
-#if 0
-    jl_value_t *a = *(jl_value_t**)pa;
-    jl_datatype_t *tya = (jl_datatype_t*)jl_typeof(a);
-    size_t sa = jl_sort_size(tya);
-    jl_value_t *b = *(jl_value_t**)pb;
-    jl_datatype_t *tyb = (jl_datatype_t*)jl_typeof(b);
-    size_t sb = jl_sort_size(tyb);
-    if (sa == sb) {
-        sa = tya->uid;
-        sb = tyb->uid;
-    }
-    return (sa > sb ? 1 : (sa < sb ? -1 : 0));
+static void record_external_fns(jl_serializer_state *s, arraylist_t *external_fns) JL_NOTSAFEPOINT
+{
+    if (!s->incremental) {
+        assert(external_fns->len == 0);
+        (void) external_fns;
+        return;
+    }
+
+    // We could call jl_queue_for_serialization here, but that should
+    // always be a no-op.
+#ifndef JL_NDEBUG
+    for (size_t i = 0; i < external_fns->len; i++) {
+        jl_code_instance_t *ci = (jl_code_instance_t*)external_fns->items[i];
+        assert(jl_atomic_load_relaxed(&ci->specsigflags) & 0b100);
+    }
 #endif
 }
 
 jl_value_t *jl_find_ptr = NULL;
-// The main function for serializing all the items queued in `backref_table`
-static void jl_write_values(jl_serializer_state *s)
+// The main function for serializing all the items queued in `serialization_order`
+// (They are also stored in `serialization_queue` which is order-preserving, unlike the hash table used
+//  for `serialization_order`).
+static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
 {
-    arraylist_t objects_list;
-    arraylist_new(&objects_list, backref_table_numel * 2);
+    size_t l = serialization_queue.len;
 
     arraylist_new(&layout_table, 0);
-    arraylist_grow(&layout_table, backref_table_numel);
-    memset(layout_table.items, 0, backref_table_numel * sizeof(void*));
-
-    // Order `backref_table` by `id`
-    size_t i, len = backref_table.size;
-    void **p = backref_table.table;
-    for (i = 0; i < len; i += 2) {
-        char *reloc_id = (char*)p[i + 1];
-        if (reloc_id != HT_NOTFOUND) {
-            jl_value_t *v = (jl_value_t*)p[i];
-            uintptr_t item = reloc_id - 1 - (char*)HT_NOTFOUND;
-            objects_list.items[objects_list.len++] = (void*)v;
-            objects_list.items[objects_list.len++] = (void*)item;
-        }
-    }
-    assert(backref_table_numel * 2 == objects_list.len);
-    qsort(objects_list.items, backref_table_numel, sizeof(void*) * 2, sysimg_sort_order);
+    arraylist_grow(&layout_table, l * 2);
+    memset(layout_table.items, 0, l * 2 * sizeof(void*));
 
     // Serialize all entries
-    for (i = 0, len = backref_table_numel * 2; i < len; i += 2) {
-        jl_value_t *v = (jl_value_t*)objects_list.items[i];           // the object
+    for (size_t item = 0; item < l; item++) {
+        jl_value_t *v = (jl_value_t*)serialization_queue.items[item];           // the object
         JL_GC_PROMISE_ROOTED(v);
-        uintptr_t item = (uintptr_t)objects_list.items[i + 1];        // the id
+        assert(!(s->incremental && jl_object_in_image(v)));
         jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
-        assert((t->instance == NULL || t->instance == v) && "detected singleton construction corruption");
-        // realign stream to expected gc alignment (16 bytes)
-        uintptr_t skip_header_pos = ios_pos(s->s) + sizeof(jl_taggedvalue_t);
-        write_padding(s->s, LLT_ALIGN(skip_header_pos, 16) - skip_header_pos);
+        assert((!jl_is_datatype_singleton(t) || t->instance == v) && "detected singleton construction corruption");
+        int mutabl = t->name->mutabl;
+        ios_t *f = s->s;
+        if (t->smalltag) {
+            if (t->layout->npointers == 0 || t == jl_string_type) {
+                if (jl_datatype_nfields(t) == 0 || mutabl == 0 || t == jl_string_type) {
+                    f = s->const_data;
+                }
+            }
+        }
+
+        // realign stream to expected gc alignment (16 bytes) after tag
+        uintptr_t skip_header_pos = ios_pos(f) + sizeof(jl_taggedvalue_t);
+        uintptr_t object_id_expected = mutabl &&
+                 t != jl_datatype_type &&
+                 t != jl_typename_type &&
+                 t != jl_string_type &&
+                 t != jl_simplevector_type &&
+                 t != jl_module_type;
+        if (object_id_expected)
+            skip_header_pos += sizeof(size_t);
+        write_padding(f, LLT_ALIGN(skip_header_pos, 16) - skip_header_pos);
+
         // write header
-        write_gctaggedfield(s, backref_id(s, t));
-        size_t reloc_offset = ios_pos(s->s);
+        if (object_id_expected)
+            write_uint(f, jl_object_id(v));
+        if (s->incremental && jl_needs_serialization(s, (jl_value_t*)t) && needs_uniquing((jl_value_t*)t))
+            arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(ios_pos(f)|1));
+        if (f == s->const_data)
+            write_uint(s->const_data, ((uintptr_t)t->smalltag << 4) | GC_OLD_MARKED | GC_IN_IMAGE);
+        else
+            write_gctaggedfield(s, t);
+        size_t reloc_offset = ios_pos(f);
         assert(item < layout_table.len && layout_table.items[item] == NULL);
-        layout_table.items[item] = (void*)reloc_offset;               // store the inverse mapping of `backref_table` (`id` => object)
-        record_gvar(s, jl_get_llvm_gv(native_functions, v), ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + reloc_offset);
+        layout_table.items[item] = (void*)(reloc_offset | (f == s->const_data)); // store the inverse mapping of `serialization_order` (`id` => object-as-streampos)
+
+        if (s->incremental) {
+            if (needs_uniquing(v)) {
+                if (jl_is_method_instance(v)) {
+                    assert(f == s->s);
+                    jl_method_instance_t *mi = (jl_method_instance_t*)v;
+                    write_pointerfield(s, mi->def.value);
+                    write_pointerfield(s, mi->specTypes);
+                    write_pointerfield(s, (jl_value_t*)mi->sparam_vals);
+                    continue;
+                }
+                else if (!jl_is_datatype(v)) {
+                    assert(jl_is_datatype_singleton(t) && "unreachable");
+                }
+            }
+            else if (needs_recaching(v)) {
+                arraylist_push(jl_is_datatype(v) ? &s->fixup_types : &s->fixup_objs, (void*)reloc_offset);
+            }
+            else if (jl_typetagis(v, jl_binding_type)) {
+                jl_binding_t *b = (jl_binding_t*)v;
+                if (b->globalref == NULL || jl_object_in_image((jl_value_t*)b->globalref->mod))
+                    jl_error("Binding cannot be serialized"); // no way (currently) to recover its identity
+                // Assign type Any to any owned bindings that don't have a type.
+                // We don't want these accidentally managing to diverge later in different compilation units.
+                if (jl_atomic_load_relaxed(&b->owner) == b) {
+                    jl_value_t *old_ty = NULL;
+                    jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
+                }
+            }
+        }
 
         // write data
-        if (jl_is_cpointer(v)) {
-            write_pointer(s->s);
-        }
-        else if (jl_is_array(v)) {
+        if (jl_is_array(v)) {
+            assert(f == s->s);
             // Internal data for types in julia.h with `jl_array_t` field(s)
-#define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
             jl_array_t *ar = (jl_array_t*)v;
-            jl_value_t *et = jl_tparam0(jl_typeof(v));
-            size_t alen = jl_array_len(ar);
-            size_t datasize = alen * ar->elsize;
-            size_t tot = datasize;
-            int isbitsunion = jl_array_isbitsunion(ar);
-            if (isbitsunion)
-                tot += alen;
-            else if (ar->elsize == 1)
-                tot += 1;
-            int ndimwords = jl_array_ndimwords(ar->flags.ndims);
-            size_t headersize = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
             // copy header
-            ios_write(s->s, (char*)v, headersize);
-            size_t alignment_amt = JL_SMALL_BYTE_ALIGNMENT;
-            if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
-                alignment_amt = JL_CACHE_BYTE_ALIGNMENT;
+            size_t headersize = sizeof(jl_array_t) + jl_array_ndims(ar)*sizeof(size_t);
+            ios_write(f, (char*)v, headersize);
             // make some header modifications in-place
-            jl_array_t *newa = (jl_array_t*)&s->s->buf[reloc_offset];
-            if (newa->flags.ndims == 1)
-                newa->maxsize = alen;
-            newa->offset = 0;
-            newa->flags.how = 0;
-            newa->flags.pooled = 0;
-            newa->flags.isshared = 0;
-
-            // write data
-            if (!ar->flags.ptrarray && !ar->flags.hasptr) {
-                // Non-pointer eltypes get encoded in the const_data section
-                uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), alignment_amt);
-                write_padding(s->const_data, data - ios_pos(s->const_data));
-                // write data and relocations
-                newa->data = NULL; // relocation offset
-                data /= sizeof(void*);
-                assert(data < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to constant data too large");
-                arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
-                arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target
-                if (jl_is_cpointer_type(et)) {
-                    // reset Ptr elements to C_NULL
-                    size_t i;
-                    for (i = 0; i < alen; i++)
-                        write_pointer(s->const_data);
-                }
-                else {
-                    if (isbitsunion) {
-                        ios_write(s->const_data, (char*)jl_array_data(ar), datasize);
-                        ios_write(s->const_data, jl_array_typetagdata(ar), alen);
+            jl_array_t *newa = (jl_array_t*)&f->buf[reloc_offset];
+            newa->ref.mem = NULL; // relocation offset
+            arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, ref.mem))); // relocation location
+            jl_value_t *mem = get_replaceable_field((jl_value_t**)&ar->ref.mem, 1);
+            arraylist_push(&s->relocs_list, (void*)backref_id(s, mem, s->link_ids_relocs)); // relocation target
+            record_memoryref(s, reloc_offset + offsetof(jl_array_t, ref), ar->ref);
+        }
+        else if (jl_is_genericmemory(v)) {
+            assert(f == s->s);
+            // Internal data for types in julia.h with `jl_genericmemory_t` field(s)
+            jl_genericmemory_t *m = (jl_genericmemory_t*)v;
+            const jl_datatype_layout_t *layout = t->layout;
+            size_t len = m->length;
+            if (jl_genericmemory_how(m) == 3 && jl_is_genericmemory(jl_genericmemory_data_owner_field(m))) {
+                jl_genericmemory_t *owner = (jl_genericmemory_t*)jl_genericmemory_data_owner_field(m);
+                size_t data = ((char*)m->ptr - (char*)owner->ptr); // relocation offset (bytes)
+                write_uint(f, len);
+                write_uint(f, data);
+                write_pointerfield(s, (jl_value_t*)owner);
+                // similar to record_memoryref, but the field is always an (offset) pointer
+                arraylist_push(&s->memowner_list, (void*)(reloc_offset + offsetof(jl_genericmemory_t, ptr))); // relocation location
+                arraylist_push(&s->memowner_list, NULL); // relocation target (ignored)
+            }
+            // else if (jl_genericmemory_how(m) == 3) {
+            //     jl_value_t *owner = jl_genericmemory_data_owner_field(m);
+            //     write_uint(f, len);
+            //     write_pointerfield(s, owner);
+            //     write_pointerfield(s, owner);
+            //     jl_genericmemory_t *new_mem = (jl_genericmemory_t*)&f->buf[reloc_offset];
+            //     assert(new_mem->ptr == NULL);
+            //     new_mem->ptr = (void*)((char*)m->ptr - (char*)owner); // relocation offset
+            // }
+            else {
+                size_t datasize = len * layout->size;
+                size_t tot = datasize;
+                int isbitsunion = layout->flags.arrayelem_isunion;
+                if (isbitsunion)
+                    tot += len;
+                size_t headersize = sizeof(jl_genericmemory_t);
+                // copy header
+                ios_write(f, (char*)v, headersize);
+                // write data
+                if (!layout->flags.arrayelem_isboxed && layout->first_ptr < 0) {
+                    // set owner to NULL
+                    write_pointer(f);
+                    // Non-pointer eltypes get encoded in the const_data section
+                    size_t alignment_amt = JL_SMALL_BYTE_ALIGNMENT;
+                    if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
+                        alignment_amt = JL_CACHE_BYTE_ALIGNMENT;
+                    uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), alignment_amt);
+                    write_padding(s->const_data, data - ios_pos(s->const_data));
+                    // write data and relocations
+                    jl_genericmemory_t *new_mem = (jl_genericmemory_t*)&f->buf[reloc_offset];
+                    new_mem->ptr = NULL; // relocation offset
+                    data /= sizeof(void*);
+                    assert(data < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to constant data too large");
+                    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_genericmemory_t, ptr))); // relocation location
+                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target
+                    jl_value_t *et = jl_tparam1(t);
+                    if (jl_is_cpointer_type(et)) {
+                        // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
+                        const intptr_t *data = (const intptr_t*)m->ptr;
+                        size_t i;
+                        for (i = 0; i < len; i++) {
+                            if (data[i] != -1)
+                                write_pointer(s->const_data);
+                            else
+                                ios_write(s->const_data, (char*)&data[i], sizeof(data[i]));
+                        }
                     }
                     else {
-                        ios_write(s->const_data, (char*)jl_array_data(ar), tot);
+                        if (isbitsunion) {
+                            ios_write(s->const_data, (char*)m->ptr, datasize);
+                            ios_write(s->const_data, jl_genericmemory_typetagdata(m), len);
+                        }
+                        else {
+                            ios_write(s->const_data, (char*)m->ptr, tot);
+                        }
                     }
+                    if (len == 0) // TODO: should we have a zero-page, instead of writing each type's fragment separately?
+                        write_padding(s->const_data, layout->size ? layout->size : isbitsunion);
+                    else if (jl_genericmemory_how(m) == 3 && jl_is_string(jl_genericmemory_data_owner_field(m)))
+                        write_padding(s->const_data, 1);
                 }
-            }
-            else {
-                // Pointer eltypes are encoded in the mutable data section
-                size_t data = LLT_ALIGN(ios_pos(s->s), alignment_amt);
-                size_t padding_amt = data - ios_pos(s->s);
-                write_padding(s->s, padding_amt);
-                headersize += padding_amt;
-                newa->data = (void*)headersize; // relocation offset
-                arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
-                arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target
-                if (ar->flags.hasptr) {
-                    // copy all of the data first
-                    const char *data = (const char*)jl_array_data(ar);
-                    ios_write(s->s, data, datasize);
-                    // the rewrite all of the embedded pointers to null+relocation
-                    uint16_t elsz = ar->elsize;
-                    size_t j, np = ((jl_datatype_t*)et)->layout->npointers;
-                    size_t i;
-                    for (i = 0; i < alen; i++) {
-                        for (j = 0; j < np; j++) {
-                            size_t offset = i * elsz + jl_ptr_offset(((jl_datatype_t*)et), j) * sizeof(jl_value_t*);
-                            jl_value_t *fld = *(jl_value_t**)&data[offset];
-                            if (fld != NULL) {
-                                arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + headersize + offset)); // relocation location
-                                arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target
-                                memset(&s->s->buf[reloc_offset + headersize + offset], 0, sizeof(fld)); // relocation offset (none)
-                            }
-                            else {
-                                assert(*(jl_value_t**)&s->s->buf[reloc_offset + headersize + offset] == NULL);
+                else {
+                    // Pointer eltypes are encoded in the mutable data section
+                    headersize = LLT_ALIGN(headersize, JL_SMALL_BYTE_ALIGNMENT);
+                    size_t data = LLT_ALIGN(ios_pos(f), JL_SMALL_BYTE_ALIGNMENT);
+                    write_padding(f, data - ios_pos(f));
+                    assert(reloc_offset + headersize == ios_pos(f));
+                    jl_genericmemory_t *new_mem = (jl_genericmemory_t*)&f->buf[reloc_offset];
+                    new_mem->ptr = (void*)headersize; // relocation offset
+                    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_genericmemory_t, ptr))); // relocation location
+                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target
+                    if (!layout->flags.arrayelem_isboxed) {
+                        // copy all of the data first
+                        const char *data = (const char*)m->ptr;
+                        ios_write(f, data, datasize);
+                        // the rewrite all of the embedded pointers to null+relocation
+                        uint16_t elsz = layout->size;
+                        size_t j, np = layout->first_ptr < 0 ? 0 : layout->npointers;
+                        size_t i;
+                        for (i = 0; i < len; i++) {
+                            for (j = 0; j < np; j++) {
+                                size_t offset = i * elsz + jl_ptr_offset(t, j) * sizeof(jl_value_t*);
+                                jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], 1);
+                                size_t fld_pos = reloc_offset + headersize + offset;
+                                if (fld != NULL) {
+                                    arraylist_push(&s->relocs_list, (void*)(uintptr_t)fld_pos); // relocation location
+                                    arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); // relocation target
+                                    record_uniquing(s, fld, fld_pos);
+                                }
+                                memset(&f->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none)
                             }
                         }
                     }
-                }
-                else {
-                    size_t i;
-                    for (i = 0; i < alen; i++) {
-                        jl_value_t *e = jl_array_ptr_ref(v, i);
-                        write_pointerfield(s, e);
+                    else {
+                        jl_value_t **data = (jl_value_t**)m->ptr;
+                        size_t i;
+                        for (i = 0; i < len; i++) {
+                            jl_value_t *e = get_replaceable_field(&data[i], 1);
+                            write_pointerfield(s, e);
+                        }
                     }
                 }
             }
         }
         else if (jl_typeis(v, jl_module_type)) {
+            assert(f == s->s);
             jl_write_module(s, item, (jl_module_t*)v);
-            // will need to recreate the binding table for this
-            arraylist_push(&reinit_list, (void*)item);
-            arraylist_push(&reinit_list, (void*)2);
         }
-        else if (jl_typeis(v, jl_task_type)) {
+        else if (jl_typetagis(v, jl_task_tag << 4)) {
             jl_error("Task cannot be serialized");
         }
         else if (jl_is_svec(v)) {
-            ios_write(s->s, (char*)v, sizeof(void*));
-            size_t i, l = jl_svec_len(v);
+            assert(f == s->s);
+            ios_write(f, (char*)v, sizeof(void*));
+            size_t ii, l = jl_svec_len(v);
             assert(l > 0 || (jl_svec_t*)v == jl_emptysvec);
-            for (i = 0; i < l; i++) {
-                write_pointerfield(s, jl_svecref(v, i));
+            for (ii = 0; ii < l; ii++) {
+                write_pointerfield(s, jl_svecref(v, ii));
             }
         }
         else if (jl_is_string(v)) {
-            ios_write(s->s, (char*)v, sizeof(void*) + jl_string_len(v));
-            write_uint8(s->s, '\0'); // null-terminated strings for easier C-compatibility
+            ios_write(f, (char*)v, sizeof(void*) + jl_string_len(v));
+            write_uint8(f, '\0'); // null-terminated strings for easier C-compatibility
+        }
+        else if (jl_is_foreign_type(t) == 1) {
+            jl_error("Cannot serialize instances of foreign datatypes");
         }
         else if (jl_datatype_nfields(t) == 0) {
+            // The object has no fields, so we just snapshot its byte representation
             assert(t->layout->npointers == 0);
-            if (t->size > 0)
-                ios_write(s->s, (char*)v, t->size);
+            ios_write(f, (char*)v, jl_datatype_size(t));
         }
-        else if (jl_bigint_type && jl_typeis(v, jl_bigint_type)) {
+        else if (jl_bigint_type && jl_typetagis(v, jl_bigint_type)) {
             // foreign types require special handling
+            assert(f == s->s);
             jl_value_t *sizefield = jl_get_nth_field(v, 1);
             int32_t sz = jl_unbox_int32(sizefield);
             int32_t nw = (sz == 0 ? 1 : (sz < 0 ? -sz : sz));
             size_t nb = nw * gmp_limb_size;
-            ios_write(s->s, (char*)&nw, sizeof(int32_t));
-            ios_write(s->s, (char*)&sz, sizeof(int32_t));
+            ios_write(f, (char*)&nw, sizeof(int32_t));
+            ios_write(f, (char*)&sz, sizeof(int32_t));
             uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), 8);
             write_padding(s->const_data, data - ios_pos(s->const_data));
             data /= sizeof(void*);
@@ -1023,7 +1502,7 @@ static void jl_write_values(jl_serializer_state *s)
             arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target
             void *pdata = jl_unbox_voidpointer(jl_get_nth_field(v, 2));
             ios_write(s->const_data, (char*)pdata, nb);
-            write_pointer(s->s);
+            write_pointer(f);
         }
         else {
             // Generic object::DataType serialization by field
@@ -1033,16 +1512,16 @@ static void jl_write_values(jl_serializer_state *s)
             for (i = 0; i < nf; i++) {
                 size_t offset = jl_field_offset(t, i);
                 const char *slot = data + offset;
-                write_padding(s->s, offset - tot);
+                write_padding(f, offset - tot);
                 tot = offset;
                 size_t fsz = jl_field_size(t, i);
-                if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i))) {
-                    // reset Ptr fields to C_NULL
+                if (t->name->mutabl && jl_is_cpointer_type(jl_field_type_concrete(t, i)) && *(intptr_t*)slot != -1) {
+                    // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
                     assert(!jl_field_isptr(t, i));
-                    write_pointer(s->s);
+                    write_pointer(f);
                 }
                 else if (fsz > 0) {
-                    ios_write(s->s, slot, fsz);
+                    ios_write(f, slot, fsz);
                 }
                 tot += fsz;
             }
@@ -1050,38 +1529,93 @@ static void jl_write_values(jl_serializer_state *s)
             size_t np = t->layout->npointers;
             for (i = 0; i < np; i++) {
                 size_t offset = jl_ptr_offset(t, i) * sizeof(jl_value_t*);
-                jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset]);
+                int mutabl = t->name->mutabl;
+                if (jl_is_binding(v) && ((jl_binding_t*)v)->constp && i == 0) // value field depends on constp field
+                    mutabl = 0;
+                jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], mutabl);
+                size_t fld_pos = offset + reloc_offset;
                 if (fld != NULL) {
-                    arraylist_push(&s->relocs_list, (void*)(uintptr_t)(offset + reloc_offset)); // relocation location
-                    arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target
-                    memset(&s->s->buf[offset + reloc_offset], 0, sizeof(fld)); // relocation offset (none)
+                    arraylist_push(&s->relocs_list, (void*)(uintptr_t)(fld_pos)); // relocation location
+                    arraylist_push(&s->relocs_list, (void*)backref_id(s, fld, s->link_ids_relocs)); // relocation target
+                    record_uniquing(s, fld, fld_pos);
                 }
+                memset(&f->buf[fld_pos], 0, sizeof(fld)); // relocation offset (none)
             }
 
+            // Need do a tricky fieldtype walk an record all memoryref we find inlined in this value
+            record_memoryrefs_inside(s, t, reloc_offset, data);
+
             // A few objects need additional handling beyond the generic serialization above
-            if (jl_is_method(v)) {
-                write_padding(s->s, sizeof(jl_method_t) - tot);
-                if (((jl_method_t*)v)->ccallable) {
-                    arraylist_push(&ccallable_list, (void*)item);
-                    arraylist_push(&ccallable_list, (void*)3);
+            if (s->incremental && jl_typetagis(v, jl_typemap_entry_type)) {
+                assert(f == s->s);
+                jl_typemap_entry_t *newentry = (jl_typemap_entry_t*)&s->s->buf[reloc_offset];
+                if (newentry->max_world == ~(size_t)0) {
+                    if (newentry->min_world > 1) {
+                        newentry->min_world = ~(size_t)0;
+                        arraylist_push(&s->fixup_objs, (void*)reloc_offset);
+                    }
+                }
+                else {
+                    // garbage newentry - delete it :(
+                    newentry->min_world = 1;
+                    newentry->max_world = 0;
+                }
+            }
+            else if (jl_is_method(v)) {
+                assert(f == s->s);
+                write_padding(f, sizeof(jl_method_t) - tot); // hidden fields
+                jl_method_t *m = (jl_method_t*)v;
+                jl_method_t *newm = (jl_method_t*)&f->buf[reloc_offset];
+                if (s->incremental) {
+                    if (newm->deleted_world != ~(size_t)0)
+                        newm->deleted_world = 1;
+                    else
+                        arraylist_push(&s->fixup_objs, (void*)reloc_offset);
+                    newm->primary_world = ~(size_t)0;
+                } else {
+                    newm->nroots_sysimg = m->roots ? jl_array_len(m->roots) : 0;
                 }
+                if (m->ccallable)
+                    arraylist_push(&s->ccallable_list, (void*)reloc_offset);
+            }
+            else if (jl_is_method_instance(v)) {
+                assert(f == s->s);
+                jl_method_instance_t *newmi = (jl_method_instance_t*)&f->buf[reloc_offset];
+                jl_atomic_store_relaxed(&newmi->precompiled, 0);
             }
             else if (jl_is_code_instance(v)) {
+                assert(f == s->s);
                 // Handle the native-code pointers
+                assert(f == s->s);
                 jl_code_instance_t *m = (jl_code_instance_t*)v;
-                jl_code_instance_t *newm = (jl_code_instance_t*)&s->s->buf[reloc_offset];
-
-                newm->invoke = NULL;
-                newm->isspecsig = 0;
-                newm->specptr.fptr = NULL;
+                jl_code_instance_t *newm = (jl_code_instance_t*)&f->buf[reloc_offset];
+
+                if (s->incremental) {
+                    arraylist_push(&s->fixup_objs, (void*)reloc_offset);
+                    if (m->min_world > 1)
+                        newm->min_world = ~(size_t)0;     // checks that we reprocess this upon deserialization
+                    if (m->max_world != ~(size_t)0)
+                        newm->max_world = 0;
+                    else {
+                        if (jl_atomic_load_relaxed(&m->inferred)  && ptrhash_has(&s->callers_with_edges, m->def))
+                            newm->max_world = 1;  // sentinel value indicating this will need validation
+                        if (m->min_world > 0 && jl_atomic_load_relaxed(&m->inferred) ) {
+                            // TODO: also check if this object is part of the codeinst cache
+                            // will check on deserialize if this cache entry is still valid
+                        }
+                    }
+                }
+                jl_atomic_store_relaxed(&newm->invoke, NULL);
+                jl_atomic_store_relaxed(&newm->specsigflags, 0);
+                jl_atomic_store_relaxed(&newm->specptr.fptr, NULL);
                 int8_t fptr_id = JL_API_NULL;
                 int8_t builtin_id = 0;
-                if (m->invoke == jl_fptr_const_return) {
+                if (jl_atomic_load_relaxed(&m->invoke) == jl_fptr_const_return) {
                     fptr_id = JL_API_CONST;
                 }
                 else {
                     if (jl_is_method(m->def->def.method)) {
-                        builtin_id = jl_fptr_id(m->specptr.fptr);
+                        builtin_id = jl_fptr_id(jl_atomic_load_relaxed(&m->specptr.fptr));
                         if (builtin_id) { // found in the table of builtins
                             assert(builtin_id >= 2);
                             fptr_id = JL_API_BUILTIN;
@@ -1101,45 +1635,54 @@ static void jl_write_values(jl_serializer_state *s)
                                     assert(invokeptr_id > 0);
                                     ios_ensureroom(s->fptr_record, invokeptr_id * sizeof(void*));
                                     ios_seek(s->fptr_record, (invokeptr_id - 1) * sizeof(void*));
-                                    write_uint32(s->fptr_record, ~reloc_offset);
+                                    write_reloc_t(s->fptr_record, (reloc_t)~reloc_offset);
 #ifdef _P64
-                                    write_padding(s->fptr_record, 4);
+                                    if (sizeof(reloc_t) < 8)
+                                        write_padding(s->fptr_record, 8 - sizeof(reloc_t));
 #endif
                                 }
                                 if (specfptr_id) {
                                     assert(specfptr_id > invokeptr_id && specfptr_id > 0);
                                     ios_ensureroom(s->fptr_record, specfptr_id * sizeof(void*));
                                     ios_seek(s->fptr_record, (specfptr_id - 1) * sizeof(void*));
-                                    write_uint32(s->fptr_record, reloc_offset);
+                                    write_reloc_t(s->fptr_record, reloc_offset);
 #ifdef _P64
-                                    write_padding(s->fptr_record, 4);
+                                    if (sizeof(reloc_t) < 8)
+                                        write_padding(s->fptr_record, 8 - sizeof(reloc_t));
 #endif
                                 }
                             }
                         }
                     }
                 }
-                newm->invoke = NULL; // relocation offset
+                jl_atomic_store_relaxed(&newm->invoke, NULL); // relocation offset
                 if (fptr_id != JL_API_NULL) {
+                    assert(fptr_id < BuiltinFunctionTag && "too many functions to serialize");
                     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_code_instance_t, invoke))); // relocation location
                     arraylist_push(&s->relocs_list, (void*)(((uintptr_t)FunctionRef << RELOC_TAG_OFFSET) + fptr_id)); // relocation target
                 }
                 if (builtin_id >= 2) {
                     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_code_instance_t, specptr.fptr))); // relocation location
-                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)BuiltinFunctionRef << RELOC_TAG_OFFSET) + builtin_id - 2)); // relocation target
+                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)FunctionRef << RELOC_TAG_OFFSET) + BuiltinFunctionTag + builtin_id - 2)); // relocation target
                 }
             }
             else if (jl_is_datatype(v)) {
+                assert(f == s->s);
                 jl_datatype_t *dt = (jl_datatype_t*)v;
-                jl_datatype_t *newdt = (jl_datatype_t*)&s->s->buf[reloc_offset];
+                jl_datatype_t *newdt = (jl_datatype_t*)&f->buf[reloc_offset];
+
                 if (dt->layout != NULL) {
                     size_t nf = dt->layout->nfields;
                     size_t np = dt->layout->npointers;
-                    size_t fieldsize = jl_fielddesc_size(dt->layout->fielddesc_type);
+                    size_t fieldsize = 0;
+                    uint8_t is_foreign_type = dt->layout->flags.fielddesc_type == 3;
+                    if (!is_foreign_type) {
+                        fieldsize = jl_fielddesc_size(dt->layout->flags.fielddesc_type);
+                    }
                     char *flddesc = (char*)dt->layout;
                     size_t fldsize = sizeof(jl_datatype_layout_t) + nf * fieldsize;
-                    if (dt->layout->first_ptr != -1)
-                        fldsize += np << dt->layout->fielddesc_type;
+                    if (!is_foreign_type && dt->layout->first_ptr != -1)
+                        fldsize += np << dt->layout->flags.fielddesc_type;
                     uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*));
                     write_padding(s->const_data, layout - ios_pos(s->const_data)); // realign stream
                     newdt->layout = NULL; // relocation offset
@@ -1147,11 +1690,19 @@ static void jl_write_values(jl_serializer_state *s)
                     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_datatype_t, layout))); // relocation location
                     arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + layout)); // relocation target
                     ios_write(s->const_data, flddesc, fldsize);
+                    if (is_foreign_type) {
+                        // make sure we have space for the extra hidden pointers
+                        // zero them since they will need to be re-initialized externally
+                        assert(fldsize == sizeof(jl_datatype_layout_t));
+                        jl_fielddescdyn_t dyn = {0, 0};
+                        ios_write(s->const_data, (char*)&dyn, sizeof(jl_fielddescdyn_t));
+                    }
                 }
             }
             else if (jl_is_typename(v)) {
+                assert(f == s->s);
                 jl_typename_t *tn = (jl_typename_t*)v;
-                jl_typename_t *newtn = (jl_typename_t*)&s->s->buf[reloc_offset];
+                jl_typename_t *newtn = (jl_typename_t*)&f->buf[reloc_offset];
                 if (tn->atomicfields != NULL) {
                     size_t nb = (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t);
                     uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*));
@@ -1173,85 +1724,35 @@ static void jl_write_values(jl_serializer_state *s)
                     ios_write(s->const_data, (char*)tn->constfields, nb);
                 }
             }
-            else if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) {
-                // will need to rehash this, later (after types are fully constructed)
-                arraylist_push(&reinit_list, (void*)item);
-                arraylist_push(&reinit_list, (void*)1);
+            else if (jl_is_globalref(v)) {
+                assert(f == s->s);
+                jl_globalref_t *gr = (jl_globalref_t*)v;
+                if (s->incremental && jl_object_in_image((jl_value_t*)gr->mod)) {
+                    // will need to populate the binding field later
+                    arraylist_push(&s->fixup_objs, (void*)reloc_offset);
+                }
+            }
+            else if (jl_is_genericmemoryref(v)) {
+                assert(f == s->s);
+                record_memoryref(s, reloc_offset, *(jl_genericmemoryref_t*)v);
             }
             else {
-                write_padding(s->s, t->size - tot);
+                write_padding(f, jl_datatype_size(t) - tot);
             }
         }
     }
 }
 
-
-// Record all symbols that get referenced by the generated code
-// and queue them for pointer relocation
-static void jl_write_gv_syms(jl_serializer_state *s, jl_sym_t *v)
-{
-    // since symbols are static, they might not have had a
-    // reference anywhere in the code image other than here
-    int32_t gv = jl_get_llvm_gv(native_functions, (jl_value_t*)v);
-    if (gv != 0) {
-        uintptr_t item = backref_id(s, v);
-        assert(item >> RELOC_TAG_OFFSET == SymbolRef);
-        record_gvar(s, gv, item);
-    }
-    if (v->left)
-        jl_write_gv_syms(s, v->left);
-    if (v->right)
-        jl_write_gv_syms(s, v->right);
-}
-
-// Record all hardcoded-tagged items that get referenced by
-// the generated code and queue them for pointer relocation
-static void jl_write_gv_tagref(jl_serializer_state *s, jl_value_t *v)
-{
-    int32_t gv = jl_get_llvm_gv(native_functions, (jl_value_t*)v);
-    if (gv != 0) {
-        uintptr_t item = backref_id(s, v);
-        assert(item >> RELOC_TAG_OFFSET == TagRef);
-        record_gvar(s, gv, item);
-    }
-}
-static void jl_write_gv_tagrefs(jl_serializer_state *s)
-{
-    // this also ensures all objects referenced in the code have
-    // references in the system image to their global variable
-    // since codegen knows that some integer boxes are static,
-    // they might not have had a reference anywhere in the code
-    // image other than here
-    size_t i;
-    jl_write_gv_tagref(s, (jl_value_t*)s->ptls->root_task);
-    jl_write_gv_tagref(s, s->ptls->root_task->tls);
-    jl_write_gv_tagref(s, jl_nothing);
-    for (i = 0; i < NBOX_C; i++) {
-        jl_write_gv_tagref(s, jl_box_int32((int32_t)i - NBOX_C / 2));
-        jl_write_gv_tagref(s, jl_box_int64((int64_t)i - NBOX_C / 2));
-    }
-    for (i = 0; i < 256; i++) {
-        jl_write_gv_tagref(s, jl_box_uint8(i));
-    }
-}
-
-static inline uint32_t load_uint32(uintptr_t *base)
-{
-    uint32_t v = jl_load_unaligned_i32((void*)*base);
-    *base += 4;
-    return v;
-}
-
-
 // In deserialization, create Symbols and set up the
 // index for backreferencing
 static void jl_read_symbols(jl_serializer_state *s)
 {
-    assert(deser_sym.len == nsym_tag);
+    assert(deser_sym.len == 0);
     uintptr_t base = (uintptr_t)&s->symbols->buf[0];
     uintptr_t end = base + s->symbols->size;
     while (base < end) {
-        uint32_t len = load_uint32(&base);
+        uint32_t len = jl_load_unaligned_i32((void*)base);
+        base += 4;
         const char *str = (const char*)base;
         base += len + 1;
         //printf("symbol %3d: %s\n", len, str);
@@ -1272,8 +1773,16 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
         assert(reloc_item < layout_table.len);
         uintptr_t reloc_base = (uintptr_t)layout_table.items[reloc_item];
         assert(reloc_base != 0 && "layout offset missing for relocation item");
+        if (reloc_base & 1) {
+            // convert to a ConstDataRef
+            tag = ConstDataRef;
+            reloc_base &= ~(uintptr_t)1;
+            assert(LLT_ALIGN(reloc_base, sizeof(void*)) == reloc_base);
+            reloc_base /= sizeof(void*);
+            assert(reloc_offset == 0);
+        }
         // write reloc_offset into s->s at pos
-        return reloc_base + reloc_offset;
+        return ((uintptr_t)tag << RELOC_TAG_OFFSET) + reloc_base + reloc_offset;
     }
     else {
         // just write the item reloc_id directly
@@ -1289,16 +1798,19 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
         case TagRef:
             assert(offset < 2 * NBOX_C + 258 && "corrupt relocation item id");
             break;
-        case BindingRef:
-            assert(offset == 0 && "corrupt relocation offset");
+        case FunctionRef:
+            if (offset & BuiltinFunctionTag) {
+                offset &= ~BuiltinFunctionTag;
+                assert(offset < sizeof(id_to_fptrs) / sizeof(*id_to_fptrs) && "unknown function pointer id");
+            }
+            else {
+                assert(offset < JL_API_MAX && "unknown function pointer id");
+            }
             break;
-        case BuiltinFunctionRef:
-            assert(offset < sizeof(id_to_fptrs) / sizeof(*id_to_fptrs) && "unknown function pointer id");
+        case SysimageLinkage:
             break;
-        case FunctionRef:
-            assert(offset < JL_API_MAX && "unknown function pointer id");
+        case ExternalLinkage:
             break;
-        case DataRef:
         default:
             assert(0 && "corrupt relocation item id");
             abort();
@@ -1309,21 +1821,21 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
 }
 
 // Compute target location at deserialization
-static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t base, size_t size, uint32_t reloc_id)
+static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t base, uintptr_t reloc_id, jl_array_t *link_ids, int *link_index) JL_NOTSAFEPOINT
 {
     enum RefTags tag = (enum RefTags)(reloc_id >> RELOC_TAG_OFFSET);
     size_t offset = (reloc_id & (((uintptr_t)1 << RELOC_TAG_OFFSET) - 1));
     switch (tag) {
     case DataRef:
-        assert(offset <= size);
-        return base + offset;
+        assert(offset <= s->s->size);
+        return (uintptr_t)base + offset;
     case ConstDataRef:
-        return (uintptr_t)s->const_data->buf + (offset * sizeof(void*));
+        offset *= sizeof(void*);
+        assert(offset <= s->const_data->size);
+        return (uintptr_t)s->const_data->buf + offset;
     case SymbolRef:
         assert(offset < deser_sym.len && deser_sym.items[offset] && "corrupt relocation item id");
         return (uintptr_t)deser_sym.items[offset];
-    case BindingRef:
-        return jl_buff_tag | GC_OLD_MARKED;
     case TagRef:
         if (offset == 0)
             return (uintptr_t)s->ptls->root_task;
@@ -1341,17 +1853,19 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
         // offset -= 256;
         assert(0 && "corrupt relocation item id");
         jl_unreachable(); // terminate control flow if assertion is disabled.
-    case BuiltinFunctionRef:
-        assert(offset < sizeof(id_to_fptrs) / sizeof(*id_to_fptrs) && "unknown function pointer ID");
-        return (uintptr_t)id_to_fptrs[offset];
     case FunctionRef:
+        if (offset & BuiltinFunctionTag) {
+            offset &= ~BuiltinFunctionTag;
+            assert(offset < sizeof(id_to_fptrs) / sizeof(*id_to_fptrs) && "unknown function pointer ID");
+            return (uintptr_t)id_to_fptrs[offset];
+        }
         switch ((jl_callingconv_t)offset) {
         case JL_API_BOXED:
-            if (sysimg_fptrs.base)
+            if (s->image->fptrs.base)
                 return (uintptr_t)jl_fptr_args;
             JL_FALLTHROUGH;
         case JL_API_WITH_PARAMETERS:
-            if (sysimg_fptrs.base)
+            if (s->image->fptrs.base)
                 return (uintptr_t)jl_fptr_sparam;
             return (uintptr_t)NULL;
         case JL_API_CONST:
@@ -1365,128 +1879,274 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
         //default:
             assert("corrupt relocation item id");
         }
+    case SysimageLinkage: {
+#ifdef _P64
+        size_t depsidx = offset >> DEPS_IDX_OFFSET;
+        offset &= ((size_t)1 << DEPS_IDX_OFFSET) - 1;
+#else
+        size_t depsidx = 0;
+#endif
+        assert(s->buildid_depmods_idxs && depsidx < jl_array_len(s->buildid_depmods_idxs));
+        size_t i = jl_array_data(s->buildid_depmods_idxs, uint32_t)[depsidx];
+        assert(2*i < jl_linkage_blobs.len);
+        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*);
+    }
+    case ExternalLinkage: {
+        assert(link_ids);
+        assert(link_index);
+        assert(0 <= *link_index && *link_index < jl_array_len(link_ids));
+        uint32_t depsidx = jl_array_data(link_ids, uint32_t)[*link_index];
+        *link_index += 1;
+        assert(depsidx < jl_array_len(s->buildid_depmods_idxs));
+        size_t i = jl_array_data(s->buildid_depmods_idxs, uint32_t)[depsidx];
+        assert(2*i < jl_linkage_blobs.len);
+        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*);
+    }
     }
     abort();
 }
 
 
-static void jl_write_skiplist(ios_t *s, char *base, size_t size, arraylist_t *list)
+static void jl_finish_relocs(char *base, size_t size, arraylist_t *list)
 {
-    size_t i;
-    for (i = 0; i < list->len; i += 2) {
+    for (size_t i = 0; i < list->len; i += 2) {
         size_t pos = (size_t)list->items[i];
-        size_t item = (size_t)list->items[i + 1];
+        size_t item = (size_t)list->items[i + 1];   // item is tagref-encoded
         uintptr_t *pv = (uintptr_t*)(base + pos);
         assert(pos < size && pos != 0);
         *pv = get_reloc_for_item(item, *pv);
-        // record pos in relocations list
-        // TODO: save space by using delta-compression
-        assert(pos < UINT32_MAX);
-        write_uint32(s, pos);
     }
-    write_uint32(s, 0);
 }
 
-
-static void jl_write_relocations(jl_serializer_state *s)
+static void jl_write_offsetlist(ios_t *s, size_t size, arraylist_t *list)
 {
-    char *base = &s->s->buf[0];
-    jl_write_skiplist(s->relocs, base, s->s->size, &s->gctags_list);
-    jl_write_skiplist(s->relocs, base, s->s->size, &s->relocs_list);
+    for (size_t i = 0; i < list->len; i += 2) {
+        size_t last_pos = i ? (size_t)list->items[i - 2] : 0;
+        size_t pos = (size_t)list->items[i];
+        assert(pos < size && pos != 0);
+        // write pos as compressed difference.
+        size_t pos_diff = pos - last_pos;
+        while (pos_diff) {
+            assert(pos_diff >= 0);
+            if (pos_diff <= 127) {
+                write_int8(s, pos_diff);
+                break;
+            }
+            else {
+                // Extract the next 7 bits
+                int8_t ns = pos_diff & (int8_t)0x7F;
+                pos_diff >>= 7;
+                // Set the high bit if there's still more
+                ns |= (!!pos_diff) << 7;
+                write_int8(s, ns);
+            }
+        }
+    }
+    write_int8(s, 0);
 }
 
 
-static void jl_read_relocations(jl_serializer_state *s, uint8_t bits)
+static void jl_write_arraylist(ios_t *s, arraylist_t *list)
 {
-    uintptr_t base = (uintptr_t)&s->s->buf[0];
-    size_t size = s->s->size;
+    write_uint(s, list->len);
+    ios_write(s, (const char*)list->items, list->len * sizeof(void*));
+}
+
+static void jl_read_reloclist(jl_serializer_state *s, jl_array_t *link_ids, uint8_t bits)
+{
+    uintptr_t base = (uintptr_t)s->s->buf;
+    uintptr_t last_pos = 0;
+    uint8_t *current = (uint8_t *)(s->relocs->buf + s->relocs->bpos);
+    int link_index = 0;
     while (1) {
-        uintptr_t val = (uintptr_t)&s->relocs->buf[s->relocs->bpos];
-        uint32_t offset = load_uint32(&val);
-        s->relocs->bpos += sizeof(uint32_t);
-        if (offset == 0)
+        // Read the offset of the next object
+        size_t pos_diff = 0;
+        size_t cnt = 0;
+        while (1) {
+            assert(s->relocs->bpos <= s->relocs->size);
+            assert((char *)current <= (char *)(s->relocs->buf + s->relocs->size));
+            int8_t c = *current++;
+            s->relocs->bpos += 1;
+
+            pos_diff |= ((size_t)c & 0x7F) << (7 * cnt++);
+            if ((c >> 7) == 0)
+                break;
+        }
+        if (pos_diff == 0)
             break;
-        uintptr_t *pv = (uintptr_t*)(base + offset);
+
+        uintptr_t pos = last_pos + pos_diff;
+        last_pos = pos;
+        uintptr_t *pv = (uintptr_t *)(base + pos);
         uintptr_t v = *pv;
-        v = get_item_for_reloc(s, base, size, v);
+        v = get_item_for_reloc(s, base, v, link_ids, &link_index);
+        if (bits && v && ((jl_datatype_t*)v)->smalltag)
+            v = (uintptr_t)((jl_datatype_t*)v)->smalltag << 4; // TODO: should we have a representation that supports sweep without a relocation step?
         *pv = v | bits;
     }
+    assert(!link_ids || link_index == jl_array_len(link_ids));
 }
 
-static char* sysimg_base;
-static char* sysimg_relocs;
-void gc_sweep_sysimg(void)
+static void jl_read_memreflist(jl_serializer_state *s)
 {
-    uintptr_t base = (uintptr_t)sysimg_base;
-    uintptr_t relocs = (uintptr_t)sysimg_relocs;
-    if (relocs == 0)
-        return;
+    uintptr_t base = (uintptr_t)s->s->buf;
+    uintptr_t last_pos = 0;
+    uint8_t *current = (uint8_t *)(s->relocs->buf + s->relocs->bpos);
     while (1) {
-        uint32_t offset = load_uint32(&relocs);
-        if (offset == 0)
+        // Read the offset of the next object
+        size_t pos_diff = 0;
+        size_t cnt = 0;
+        while (1) {
+            assert(s->relocs->bpos <= s->relocs->size);
+            assert((char *)current <= (char *)(s->relocs->buf + s->relocs->size));
+            int8_t c = *current++;
+            s->relocs->bpos += 1;
+
+            pos_diff |= ((size_t)c & 0x7F) << (7 * cnt++);
+            if ((c >> 7) == 0)
+                break;
+        }
+        if (pos_diff == 0)
             break;
-        jl_taggedvalue_t *o = (jl_taggedvalue_t*)(base + offset);
-        o->bits.gc = GC_OLD;
+
+        uintptr_t pos = last_pos + pos_diff;
+        last_pos = pos;
+        jl_genericmemoryref_t *pv = (jl_genericmemoryref_t*)(base + pos);
+        size_t offset = (size_t)pv->ptr_or_offset;
+        pv->ptr_or_offset = (void*)((char*)pv->mem->ptr + offset);
     }
 }
 
+
+static void jl_read_arraylist(ios_t *s, arraylist_t *list)
+{
+    size_t list_len = read_uint(s);
+    arraylist_new(list, 0);
+    arraylist_grow(list, list_len);
+    ios_read(s, (char*)list->items, list_len * sizeof(void*));
+}
+
+void gc_sweep_sysimg(void)
+{
+    size_t nblobs = n_linkage_blobs();
+    if (nblobs == 0)
+        return;
+    assert(jl_linkage_blobs.len == 2*nblobs);
+    assert(jl_image_relocs.len == nblobs);
+    for (size_t i = 0; i < 2*nblobs; i+=2) {
+        reloc_t *relocs = (reloc_t*)jl_image_relocs.items[i>>1];
+        if (!relocs)
+            continue;
+        uintptr_t base = (uintptr_t)jl_linkage_blobs.items[i];
+        uintptr_t last_pos = 0;
+        uint8_t *current = (uint8_t *)relocs;
+        while (1) {
+            // Read the offset of the next object
+            size_t pos_diff = 0;
+            size_t cnt = 0;
+            while (1) {
+                int8_t c = *current++;
+                pos_diff |= ((size_t)c & 0x7F) << (7 * cnt++);
+                if ((c >> 7) == 0)
+                    break;
+            }
+            if (pos_diff == 0)
+                break;
+
+            uintptr_t pos = last_pos + pos_diff;
+            last_pos = pos;
+            jl_taggedvalue_t *o = (jl_taggedvalue_t *)(base + pos);
+            o->bits.gc = GC_OLD;
+            assert(o->bits.in_image == 1);
+        }
+    }
+}
+
+// jl_write_value and jl_read_value are used for storing Julia objects that are adjuncts to
+// the image proper. For example, new methods added to external callables require
+// insertion into the appropriate method table.
 #define jl_write_value(s, v) _jl_write_value((s), (jl_value_t*)(v))
 static void _jl_write_value(jl_serializer_state *s, jl_value_t *v)
 {
     if (v == NULL) {
-        write_uint32(s->s, 0);
+        write_reloc_t(s->s, 0);
         return;
     }
-    uintptr_t item = backref_id(s, v);
+    uintptr_t item = backref_id(s, v, NULL);
     uintptr_t reloc = get_reloc_for_item(item, 0);
-    assert(reloc < UINT32_MAX);
-    write_uint32(s->s, reloc);
+    write_reloc_t(s->s, reloc);
 }
 
-
 static jl_value_t *jl_read_value(jl_serializer_state *s)
 {
-    uintptr_t base = (uintptr_t)&s->s->buf[0];
-    size_t size = s->s->size;
-    uintptr_t val = base + s->s->bpos;
-    uint32_t offset = load_uint32(&val);
-    s->s->bpos += sizeof(uint32_t);
+    uintptr_t base = (uintptr_t)s->s->buf;
+    uintptr_t offset = *(reloc_t*)(base + (uintptr_t)s->s->bpos);
+    s->s->bpos += sizeof(reloc_t);
     if (offset == 0)
         return NULL;
-    return (jl_value_t*)get_item_for_reloc(s, base, size, offset);
+    return (jl_value_t*)get_item_for_reloc(s, base, offset, NULL, NULL);
+}
+
+// The next two, `jl_read_offset` and `jl_delayed_reloc`, are essentially a split version
+// of `jl_read_value` that allows usage of the relocation data rather than passing NULL
+// to `get_item_for_reloc`.
+// This works around what would otherwise be an order-dependency conundrum: objects
+// that may require relocation data have to be inserted into `serialization_order`,
+// and that may include some of the adjunct data that gets serialized via
+// `jl_write_value`. But we can't interpret them properly until we read the relocation
+// data, and that happens after we pull items out of the serialization stream.
+static uintptr_t jl_read_offset(jl_serializer_state *s)
+{
+    uintptr_t base = (uintptr_t)&s->s->buf[0];
+    uintptr_t offset = *(reloc_t*)(base + (uintptr_t)s->s->bpos);
+    s->s->bpos += sizeof(reloc_t);
+    return offset;
+}
+
+static jl_value_t *jl_delayed_reloc(jl_serializer_state *s, uintptr_t offset) JL_GC_DISABLED
+{
+    if (!offset)
+        return NULL;
+    uintptr_t base = (uintptr_t)s->s->buf;
+    int link_index = 0;
+    jl_value_t *ret = (jl_value_t*)get_item_for_reloc(s, base, offset, s->link_ids_relocs, &link_index);
+    assert(!s->link_ids_relocs || link_index < jl_array_len(s->link_ids_relocs));
+    return ret;
 }
 
 
-static void jl_update_all_fptrs(jl_serializer_state *s)
+static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image)
 {
-    jl_sysimg_fptrs_t fvars = sysimg_fptrs;
+    jl_image_fptrs_t fvars = image->fptrs;
     // make these NULL now so we skip trying to restore GlobalVariable pointers later
-    sysimg_gvars_base = NULL;
-    sysimg_fptrs.base = NULL;
+    image->gvars_base = NULL;
+    image->fptrs.base = NULL;
     if (fvars.base == NULL)
         return;
-    int sysimg_fvars_max = s->fptr_record->size / sizeof(void*);
+
+    memcpy(image->jl_small_typeof, &jl_small_typeof, sizeof(jl_small_typeof));
+
+    int img_fvars_max = s->fptr_record->size / sizeof(void*);
     size_t i;
     uintptr_t base = (uintptr_t)&s->s->buf[0];
     // These will become MethodInstance references, but they start out as a list of
     // offsets into `s` for CodeInstances
     jl_method_instance_t **linfos = (jl_method_instance_t**)&s->fptr_record->buf[0];
     uint32_t clone_idx = 0;
-    for (i = 0; i < sysimg_fvars_max; i++) {
-        uintptr_t val = (uintptr_t)&linfos[i];
-        uint32_t offset = load_uint32(&val);
+    for (i = 0; i < img_fvars_max; i++) {
+        reloc_t offset = *(reloc_t*)&linfos[i];
         linfos[i] = NULL;
         if (offset != 0) {
             int specfunc = 1;
-            if (offset & ((uintptr_t)1 << (8 * sizeof(uint32_t) - 1))) {
+            if (offset & ((uintptr_t)1 << (8 * sizeof(reloc_t) - 1))) {
                 // if high bit is set, this is the func wrapper, not the specfunc
                 specfunc = 0;
                 offset = ~offset;
             }
             jl_code_instance_t *codeinst = (jl_code_instance_t*)(base + offset);
             uintptr_t base = (uintptr_t)fvars.base;
-            assert(jl_is_method(codeinst->def->def.method) && codeinst->invoke != jl_fptr_const_return);
-            assert(specfunc ? codeinst->invoke != NULL : codeinst->invoke == NULL);
+            assert(jl_is_method(codeinst->def->def.method) && jl_atomic_load_relaxed(&codeinst->invoke) != jl_fptr_const_return);
+            assert(specfunc ? jl_atomic_load_relaxed(&codeinst->invoke) != NULL : jl_atomic_load_relaxed(&codeinst->invoke) == NULL);
             linfos[i] = codeinst->def;     // now it's a MethodInstance
             int32_t offset = fvars.offsets[i];
             for (; clone_idx < fvars.nclones; clone_idx++) {
@@ -1499,150 +2159,134 @@ static void jl_update_all_fptrs(jl_serializer_state *s)
             }
             void *fptr = (void*)(base + offset);
             if (specfunc) {
-                codeinst->specptr.fptr = fptr;
-                codeinst->isspecsig = 1; // TODO: set only if confirmed to be true
+                jl_atomic_store_relaxed(&codeinst->specptr.fptr, fptr);
+                jl_atomic_store_relaxed(&codeinst->specsigflags, 0b111); // TODO: set only if confirmed to be true
             }
             else {
-                codeinst->invoke = (jl_callptr_t)fptr;
+                jl_atomic_store_relaxed(&codeinst->invoke,(jl_callptr_t)fptr);
             }
         }
     }
     // Tell LLVM about the native code
-    jl_register_fptrs(sysimage_base, &fvars, linfos, sysimg_fvars_max);
+    jl_register_fptrs(image->base, &fvars, linfos, img_fvars_max);
 }
 
+static uint32_t write_gvars(jl_serializer_state *s, arraylist_t *globals, arraylist_t *external_fns) JL_NOTSAFEPOINT
+{
+    size_t len = globals->len + external_fns->len;
+    ios_ensureroom(s->gvar_record, len * sizeof(reloc_t));
+    for (size_t i = 0; i < globals->len; i++) {
+        void *g = globals->items[i];
+        uintptr_t item = backref_id(s, g, s->link_ids_gvars);
+        uintptr_t reloc = get_reloc_for_item(item, 0);
+        write_reloc_t(s->gvar_record, reloc);
+        record_uniquing(s, (jl_value_t*)g, ((i << 2) | 2)); // mark as gvar && !tag
+    }
+    for (size_t i = 0; i < external_fns->len; i++) {
+        jl_code_instance_t *ci = (jl_code_instance_t*)external_fns->items[i];
+        assert(ci && (jl_atomic_load_relaxed(&ci->specsigflags) & 0b001));
+        uintptr_t item = backref_id(s, (void*)ci, s->link_ids_external_fnvars);
+        uintptr_t reloc = get_reloc_for_item(item, 0);
+        write_reloc_t(s->gvar_record, reloc);
+    }
+    return globals->len;
+}
 
 // Pointer relocation for native-code referenced global variables
-static void jl_update_all_gvars(jl_serializer_state *s)
+static void jl_update_all_gvars(jl_serializer_state *s, jl_image_t *image, uint32_t external_fns_begin)
 {
-    if (sysimg_gvars_base == NULL)
+    if (image->gvars_base == NULL)
         return;
-    size_t gvname_index = 0;
-    uintptr_t base = (uintptr_t)&s->s->buf[0];
-    size_t size = s->s->size;
-    uintptr_t gvars = (uintptr_t)&s->gvar_record->buf[0];
-    uintptr_t end = gvars + s->gvar_record->size;
-    while (gvars < end) {
-        uint32_t offset = load_uint32(&gvars);
-        if (offset) {
-            uintptr_t v = get_item_for_reloc(s, base, size, offset);
-            *sysimg_gvars(sysimg_gvars_base, gvname_index) = v;
+    uintptr_t base = (uintptr_t)s->s->buf;
+    size_t i = 0;
+    size_t l = s->gvar_record->size / sizeof(reloc_t);
+    reloc_t *gvars = (reloc_t*)&s->gvar_record->buf[0];
+    int gvar_link_index = 0;
+    int external_fns_link_index = 0;
+    assert(l == image->ngvars);
+    for (i = 0; i < l; i++) {
+        uintptr_t offset = gvars[i];
+        uintptr_t v = 0;
+        if (i < external_fns_begin) {
+            v = get_item_for_reloc(s, base, offset, s->link_ids_gvars, &gvar_link_index);
         }
-        gvname_index += 1;
-    }
-}
-
-
-// Reinitialization
-static void jl_finalize_serializer(jl_serializer_state *s, arraylist_t *list)
-{
-    size_t i, l;
-
-    // record list of reinitialization functions
-    l = list->len;
-    for (i = 0; i < l; i += 2) {
-        size_t item = (size_t)list->items[i];
-        size_t reloc_offset = (size_t)layout_table.items[item];
-        assert(reloc_offset != 0);
-        write_uint32(s->s, (uint32_t)reloc_offset);
-        write_uint32(s->s, (uint32_t)((uintptr_t)list->items[i + 1]));
+        else {
+            v = get_item_for_reloc(s, base, offset, s->link_ids_external_fnvars, &external_fns_link_index);
+        }
+        uintptr_t *gv = sysimg_gvars(image->gvars_base, image->gvars_offsets, i);
+        *gv = v;
     }
-    write_uint32(s->s, 0);
+    assert(!s->link_ids_gvars || gvar_link_index == jl_array_len(s->link_ids_gvars));
+    assert(!s->link_ids_external_fnvars || external_fns_link_index == jl_array_len(s->link_ids_external_fnvars));
 }
 
-
-static void jl_reinit_item(jl_value_t *v, int how) JL_GC_DISABLED
+static void jl_root_new_gvars(jl_serializer_state *s, jl_image_t *image, uint32_t external_fns_begin)
 {
-    switch (how) {
-        case 1: { // rehash IdDict
-            jl_array_t **a = (jl_array_t**)v;
-            assert(jl_is_array(*a));
-            // Assume *a don't need a write barrier
-            *a = jl_idtable_rehash(*a, jl_array_len(*a));
-            jl_gc_wb(v, *a);
-            break;
-        }
-        case 2: { // rebuild the binding table for module v
-            jl_module_t *mod = (jl_module_t*)v;
-            assert(jl_is_module(mod));
-            size_t nbindings = mod->bindings.size;
-            htable_new(&mod->bindings, nbindings);
-            struct binding {
-                jl_sym_t *asname;
-                uintptr_t tag;
-                jl_binding_t b;
-            } *b;
-            b = (struct binding*)&mod[1];
-            while (nbindings > 0) {
-                ptrhash_put(&mod->bindings, b->asname, &b->b);
-                b += 1;
-                nbindings -= 1;
-            }
-            if (mod->usings.items != &mod->usings._space[0]) {
-                void **newitems = (void**)malloc_s(mod->usings.max * sizeof(void*));
-                memcpy(newitems, mod->usings.items, mod->usings.len * sizeof(void*));
-                mod->usings.items = newitems;
-            }
-            break;
+    if (image->gvars_base == NULL)
+        return;
+    size_t i = 0;
+    size_t l = s->gvar_record->size / sizeof(reloc_t);
+    for (i = 0; i < l; i++) {
+        uintptr_t *gv = sysimg_gvars(image->gvars_base, image->gvars_offsets, i);
+        uintptr_t v = *gv;
+        if (i < external_fns_begin) {
+            if (!jl_is_binding(v))
+                v = (uintptr_t)jl_as_global_root((jl_value_t*)v, 1);
         }
-        case 3: { // install ccallable entry point in JIT
-            jl_svec_t *sv = ((jl_method_t*)v)->ccallable;
-            int success = jl_compile_extern_c(NULL, NULL, jl_sysimg_handle, jl_svecref(sv, 0), jl_svecref(sv, 1));
-            assert(success); (void)success;
-            break;
+        else {
+            jl_code_instance_t *codeinst = (jl_code_instance_t*) v;
+            assert(codeinst && (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b01) && jl_atomic_load_relaxed(&codeinst->specptr.fptr));
+            v = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
         }
-        default:
-            assert(0 && "corrupt deserialization state");
-            abort();
+        *gv = v;
     }
 }
 
 
-static void jl_finalize_deserializer(jl_serializer_state *s) JL_GC_DISABLED
+static void jl_compile_extern(jl_method_t *m, void *sysimg_handle) JL_GC_DISABLED
 {
-    // run reinitialization functions
-    uintptr_t base = (uintptr_t)&s->s->buf[0];
-    while (1) {
-        size_t offset = read_uint32(s->s);
-        if (offset == 0)
-            break;
-        jl_value_t *v = (jl_value_t*)(base + offset);
-        jl_reinit_item(v, read_uint32(s->s));
-    }
+    // install ccallable entry point in JIT
+    assert(m); // makes clang-sa happy
+    jl_svec_t *sv = m->ccallable;
+    int success = jl_compile_extern_c(NULL, NULL, sysimg_handle, jl_svecref(sv, 0), jl_svecref(sv, 1));
+    if (!success)
+        jl_safe_printf("WARNING: @ccallable was already defined for this method name\n"); // enjoy a very bad time
+    assert(success || !sysimg_handle);
 }
 
 
-
-// Code below helps slim down the images
-static void jl_scan_type_cache_gv(jl_serializer_state *s, jl_svec_t *cache)
+static void jl_reinit_ccallable(arraylist_t *ccallable_list, char *base, void *sysimg_handle)
 {
-    size_t l = jl_svec_len(cache), i;
-    for (i = 0; i < l; i++) {
-        jl_value_t *ti = jl_svecref(cache, i);
-        if (ti == NULL || ti == jl_nothing)
-            continue;
-        if (jl_get_llvm_gv(native_functions, ti)) {
-            jl_serialize_value(s, ti);
-        }
-        else if (jl_is_datatype(ti)) {
-            jl_value_t *singleton = ((jl_datatype_t*)ti)->instance;
-            if (singleton && jl_get_llvm_gv(native_functions, singleton))
-                jl_serialize_value(s, ti);
-        }
+    for (size_t i = 0; i < ccallable_list->len; i++) {
+        uintptr_t item = (uintptr_t)ccallable_list->items[i];
+        jl_method_t *m = (jl_method_t*)(base + item);
+        jl_compile_extern(m, sysimg_handle);
     }
 }
 
-// remove cached types not referenced in the stream
-static void jl_prune_type_cache_hash(jl_svec_t *cache)
+
+// Code below helps slim down the images by
+// removing cached types not referenced in the stream
+static jl_svec_t *jl_prune_type_cache_hash(jl_svec_t *cache) JL_GC_DISABLED
 {
     size_t l = jl_svec_len(cache), i;
+    if (l == 0)
+        return cache;
     for (i = 0; i < l; i++) {
         jl_value_t *ti = jl_svecref(cache, i);
-        if (ti == NULL || ti == jl_nothing)
+        if (ti == jl_nothing)
             continue;
-        if (ptrhash_get(&backref_table, ti) == HT_NOTFOUND)
+        if (ptrhash_get(&serialization_order, ti) == HT_NOTFOUND)
             jl_svecset(cache, i, jl_nothing);
     }
+    void *idx = ptrhash_get(&serialization_order, cache);
+    assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
+    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == cache);
+    cache = cache_rehash_set(cache, l);
+    // redirect all references to the old cache to relocate to the new cache object
+    ptrhash_put(&serialization_order, cache, idx);
+    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = cache;
+    return cache;
 }
 
 static void jl_prune_type_cache_linear(jl_svec_t *cache)
@@ -1650,14 +2294,13 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache)
     size_t l = jl_svec_len(cache), ins = 0, i;
     for (i = 0; i < l; i++) {
         jl_value_t *ti = jl_svecref(cache, i);
-        if (ti == NULL)
+        if (ti == jl_nothing)
             break;
-        if (ptrhash_get(&backref_table, ti) != HT_NOTFOUND)
+        if (ptrhash_get(&serialization_order, ti) != HT_NOTFOUND)
             jl_svecset(cache, ins++, ti);
     }
-    if (i > ins) {
-        memset(&jl_svec_data(cache)[ins], 0, (i - ins) * sizeof(jl_value_t*));
-    }
+    while (ins < l)
+        jl_svecset(cache, ins++, jl_nothing);
 }
 
 static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig)
@@ -1667,13 +2310,13 @@ static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig
     int compressed = 0;
     if (!jl_is_code_info(ci_)) {
         compressed = 1;
-        ci = jl_uncompress_ir(m, NULL, (jl_array_t*)ci_);
+        ci = jl_uncompress_ir(m, NULL, (jl_value_t*)ci_);
     }
     else {
         ci = (jl_code_info_t*)ci_;
     }
     // leave codelocs length the same so the compiler can assume that; just zero it
-    memset(jl_array_data(ci->codelocs), 0, jl_array_len(ci->codelocs)*sizeof(int32_t));
+    memset(jl_array_data(ci->codelocs, int32_t), 0, jl_array_len(ci->codelocs)*sizeof(int32_t));
     // empty linetable
     if (jl_is_array(ci->linetable))
         jl_array_del_end((jl_array_t*)ci->linetable, jl_array_len(ci->linetable));
@@ -1696,29 +2339,29 @@ static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig
     return ret;
 }
 
-static void record_field_change(jl_value_t **addr, jl_value_t *newval)
-{
-    ptrhash_put(&field_replace, (void*)addr, newval);
-}
-
 static void strip_specializations_(jl_method_instance_t *mi)
 {
     assert(jl_is_method_instance(mi));
-    jl_code_instance_t *codeinst = mi->cache;
+    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
     while (codeinst) {
-        if (codeinst->inferred && codeinst->inferred != jl_nothing) {
+        jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred);
+        if (inferred && inferred != jl_nothing) {
             if (jl_options.strip_ir) {
-                record_field_change(&codeinst->inferred, jl_nothing);
+                record_field_change((jl_value_t**)&codeinst->inferred, jl_nothing);
             }
             else if (jl_options.strip_metadata) {
-                codeinst->inferred = strip_codeinfo_meta(mi->def.method, codeinst->inferred, 0);
-                jl_gc_wb(codeinst, codeinst->inferred);
+                jl_value_t *stripped = strip_codeinfo_meta(mi->def.method, inferred, 0);
+                if (jl_atomic_cmpswap_relaxed(&codeinst->inferred, &inferred, stripped)) {
+                    jl_gc_wb(codeinst, stripped);
+                }
             }
         }
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
     if (jl_options.strip_ir) {
-        record_field_change(&mi->uninferred, NULL);
+        record_field_change((jl_value_t**)&mi->uninferred, NULL);
+        record_field_change((jl_value_t**)&mi->backedges, NULL);
+        record_field_change((jl_value_t**)&mi->callbacks, NULL);
     }
 }
 
@@ -1728,8 +2371,8 @@ static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
     if (m->source) {
         int stripped_ir = 0;
         if (jl_options.strip_ir) {
-            if (m->unspecialized) {
-                jl_code_instance_t *unspec = jl_atomic_load_relaxed(&m->unspecialized->cache);
+            if (jl_atomic_load_relaxed(&m->unspecialized)) {
+                jl_code_instance_t *unspec = jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&m->unspecialized)->cache);
                 if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) {
                     // we have a generic compiled version, so can remove the IR
                     record_field_change(&m->source, jl_nothing);
@@ -1750,21 +2393,30 @@ static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
             jl_gc_wb(m, m->source);
         }
     }
-    jl_svec_t *specializations = m->specializations;
-    size_t i, l = jl_svec_len(specializations);
-    for (i = 0; i < l; i++) {
-        jl_value_t *mi = jl_svecref(specializations, i);
-        if (mi != jl_nothing)
-            strip_specializations_((jl_method_instance_t*)mi);
+    jl_value_t *specializations = jl_atomic_load_relaxed(&m->specializations);
+    if (!jl_is_svec(specializations)) {
+        strip_specializations_((jl_method_instance_t*)specializations);
+    }
+    else {
+        size_t i, l = jl_svec_len(specializations);
+        for (i = 0; i < l; i++) {
+            jl_value_t *mi = jl_svecref(specializations, i);
+            if (mi != jl_nothing)
+                strip_specializations_((jl_method_instance_t*)mi);
+        }
     }
-    if (m->unspecialized)
-        strip_specializations_(m->unspecialized);
+    if (jl_atomic_load_relaxed(&m->unspecialized))
+        strip_specializations_(jl_atomic_load_relaxed(&m->unspecialized));
+    if (jl_options.strip_ir && m->root_blocks)
+        record_field_change((jl_value_t**)&m->root_blocks, NULL);
     return 1;
 }
 
 static int strip_all_codeinfos_(jl_methtable_t *mt, void *_env)
 {
-    return jl_typemap_visitor(mt->defs, strip_all_codeinfos__, NULL);
+    if (jl_options.strip_ir && mt->backedges)
+        record_field_change((jl_value_t**)&mt->backedges, NULL);
+    return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), strip_all_codeinfos__, NULL);
 }
 
 static void jl_strip_all_codeinfos(void)
@@ -1772,59 +2424,157 @@ static void jl_strip_all_codeinfos(void)
     jl_foreach_reachable_mtable(strip_all_codeinfos_, NULL);
 }
 
-// Method roots created during sysimg construction are exempted from
-// triggering non-relocatability of compressed CodeInfos.
-// Set the number of such roots in each method when the sysimg is
-// serialized.
-static int set_nroots_sysimg__(jl_typemap_entry_t *def, void *_env)
+// --- entry points ---
+
+jl_genericmemory_t *jl_global_roots_list;
+jl_genericmemory_t *jl_global_roots_keyset;
+jl_mutex_t global_roots_lock;
+
+JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
 {
-    jl_method_t *m = def->func.method;
-    m->nroots_sysimg = m->roots ? jl_array_len(m->roots) : 0;
-    return 1;
+    if (jl_is_datatype(val)) {
+        jl_datatype_t *dt = (jl_datatype_t*)val;
+        if (jl_unwrap_unionall(dt->name->wrapper) == val)
+            return 1;
+        return (jl_is_tuple_type(val) ? dt->isconcretetype : !dt->hasfreetypevars); // aka is_cacheable from jltypes.c
+    }
+    if (jl_is_bool(val) || jl_is_symbol(val) ||
+            val == (jl_value_t*)jl_any_type || val == (jl_value_t*)jl_bottom_type || val == (jl_value_t*)jl_core_module)
+        return 1;
+    if (val == ((jl_datatype_t*)jl_typeof(val))->instance)
+        return 1;
+    return 0;
 }
 
-static int set_nroots_sysimg_(jl_methtable_t *mt, void *_env)
+static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT JL_GLOBALLY_ROOTED
 {
-    return jl_typemap_visitor(mt->defs, set_nroots_sysimg__, NULL);
+    t = jl_unwrap_unionall(t);
+    if (jl_is_datatype(t))
+        return ((jl_datatype_t*)t)->name->wrapper;
+    return NULL;
 }
 
-static void jl_set_nroots_sysimg(void)
+JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert)
 {
-    jl_foreach_reachable_mtable(set_nroots_sysimg_, NULL);
+    if (jl_is_globally_rooted(val))
+        return val;
+    jl_value_t *tw = extract_wrapper(val);
+    if (tw && (val == tw || jl_types_egal(val, tw)))
+        return tw;
+    if (jl_is_uint8(val))
+        return jl_box_uint8(jl_unbox_uint8(val));
+    if (jl_is_int32(val)) {
+        int32_t n = jl_unbox_int32(val);
+        if ((uint32_t)(n+512) < 1024)
+            return jl_box_int32(n);
+    }
+    else if (jl_is_int64(val)) {
+        uint64_t n = jl_unbox_uint64(val);
+        if ((uint64_t)(n+512) < 1024)
+            return jl_box_int64(n);
+    }
+    // check table before acquiring lock to reduce writer contention
+    jl_value_t *rval = jl_idset_get(jl_global_roots_list, jl_global_roots_keyset, val);
+    if (rval)
+        return rval;
+    JL_LOCK(&global_roots_lock);
+    rval = jl_idset_get(jl_global_roots_list, jl_global_roots_keyset, val);
+    if (rval) {
+        val = rval;
+    }
+    else if (insert) {
+        ssize_t idx;
+        jl_global_roots_list = jl_idset_put_key(jl_global_roots_list, val, &idx);
+        jl_global_roots_keyset = jl_idset_put_idx(jl_global_roots_list, jl_global_roots_keyset, idx);
+    }
+    else {
+        val = NULL;
+    }
+    JL_UNLOCK(&global_roots_lock);
+    return val;
 }
 
-// --- entry points ---
+static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *newly_inferred, uint64_t worklist_key,
+                           /* outputs */  jl_array_t **extext_methods, jl_array_t **new_specializations,
+                                          jl_array_t **method_roots_list, jl_array_t **ext_targets, jl_array_t **edges)
+{
+    // extext_methods: [method1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist
+    // ext_targets: [invokesig1, callee1, matches1, ...] non-worklist callees of worklist-owned methods
+    //              ordinary dispatch: invokesig=NULL, callee is MethodInstance
+    //              `invoke` dispatch: invokesig is signature, callee is MethodInstance
+    //              abstract call: callee is signature
+    // edges: [caller1, ext_targets_indexes1, ...] for worklist-owned methods calling external methods
+    assert(edges_map == NULL);
+
+    // Save the inferred code from newly inferred, external methods
+    *new_specializations = queue_external_cis(newly_inferred);
+
+    // Collect method extensions and edges data
+    JL_GC_PUSH1(&edges_map);
+    if (edges)
+        edges_map = jl_alloc_memory_any(0);
+    *extext_methods = jl_alloc_vec_any(0);
+    jl_collect_methtable_from_mod(jl_type_type_mt, *extext_methods);
+    jl_collect_methtable_from_mod(jl_nonfunction_mt, *extext_methods);
+    size_t i, len = jl_array_len(mod_array);
+    for (i = 0; i < len; i++) {
+        jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i);
+        assert(jl_is_module(m));
+        if (m->parent == m) // some toplevel modules (really just Base) aren't actually
+            jl_collect_extext_methods_from_mod(*extext_methods, m);
+    }
+
+    if (edges) {
+        size_t world = jl_atomic_load_acquire(&jl_world_counter);
+        jl_collect_missing_backedges(jl_type_type_mt);
+        jl_collect_missing_backedges(jl_nonfunction_mt);
+        // jl_collect_extext_methods_from_mod and jl_collect_missing_backedges also accumulate data in callers_with_edges.
+        // Process this to extract `edges` and `ext_targets`.
+        *ext_targets = jl_alloc_vec_any(0);
+        *edges = jl_alloc_vec_any(0);
+        *method_roots_list = jl_alloc_vec_any(0);
+        // Collect the new method roots
+        jl_collect_new_roots(*method_roots_list, *new_specializations, worklist_key);
+        jl_collect_edges(*edges, *ext_targets, *new_specializations, world);
+    }
+    assert(edges_map == NULL); // jl_collect_edges clears this when done
 
-static void jl_init_serializer2(int);
-static void jl_cleanup_serializer2(void);
+    JL_GC_POP();
+}
 
-static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED
+// In addition to the system image (where `worklist = NULL`), this can also save incremental images with external linkage
+static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
+                                           jl_array_t *worklist, jl_array_t *extext_methods,
+                                           jl_array_t *new_specializations, jl_array_t *method_roots_list,
+                                           jl_array_t *ext_targets, jl_array_t *edges) JL_GC_DISABLED
 {
-    jl_gc_collect(JL_GC_FULL);
-    jl_gc_collect(JL_GC_INCREMENTAL);   // sweep finalizers
-    JL_TIMING(SYSIMG_DUMP);
-
-    htable_new(&field_replace, 10000);
+    htable_new(&field_replace, 0);
     // strip metadata and IR when requested
     if (jl_options.strip_metadata || jl_options.strip_ir)
         jl_strip_all_codeinfos();
-    jl_set_nroots_sysimg();
 
     int en = jl_gc_enable(0);
-    jl_init_serializer2(1);
-    htable_reset(&backref_table, 250000);
-    arraylist_new(&reinit_list, 0);
-    arraylist_new(&ccallable_list, 0);
+    nsym_tag = 0;
+    htable_new(&symbol_table, 0);
+    htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs));
+    uintptr_t i;
+    for (i = 0; id_to_fptrs[i] != NULL; i++) {
+        ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2));
+    }
+    htable_new(&serialization_order, 25000);
+    htable_new(&unique_ready, 0);
+    htable_new(&nullptrs, 0);
     arraylist_new(&object_worklist, 0);
-    backref_table_numel = 0;
+    arraylist_new(&serialization_queue, 0);
     ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record;
-    ios_mem(&sysimg,     1000000);
-    ios_mem(&const_data,  100000);
-    ios_mem(&symbols,     100000);
-    ios_mem(&relocs,      100000);
-    ios_mem(&gvar_record, 100000);
-    ios_mem(&fptr_record, 100000);
-    jl_serializer_state s;
+    ios_mem(&sysimg, 0);
+    ios_mem(&const_data, 0);
+    ios_mem(&symbols, 0);
+    ios_mem(&relocs, 0);
+    ios_mem(&gvar_record, 0);
+    ios_mem(&fptr_record, 0);
+    jl_serializer_state s = {0};
+    s.incremental = !(worklist == NULL);
     s.s = &sysimg;
     s.const_data = &const_data;
     s.symbols = &symbols;
@@ -1832,20 +2582,41 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED
     s.gvar_record = &gvar_record;
     s.fptr_record = &fptr_record;
     s.ptls = jl_current_task->ptls;
+    arraylist_new(&s.memowner_list, 0);
+    arraylist_new(&s.memref_list, 0);
     arraylist_new(&s.relocs_list, 0);
     arraylist_new(&s.gctags_list, 0);
-    jl_value_t **const*const tags = get_tags();
-
-    // empty!(Core.ARGS)
-    if (jl_core_module != NULL) {
-        jl_array_t *args = (jl_array_t*)jl_get_global(jl_core_module, jl_symbol("ARGS"));
-        if (args != NULL) {
-            jl_array_del_end(args, jl_array_len(args));
+    arraylist_new(&s.uniquing_types, 0);
+    arraylist_new(&s.uniquing_objs, 0);
+    arraylist_new(&s.fixup_types, 0);
+    arraylist_new(&s.fixup_objs, 0);
+    arraylist_new(&s.ccallable_list, 0);
+    s.buildid_depmods_idxs = image_to_depmodidx(mod_array);
+    s.link_ids_relocs = jl_alloc_array_1d(jl_array_int32_type, 0);
+    s.link_ids_gctags = jl_alloc_array_1d(jl_array_int32_type, 0);
+    s.link_ids_gvars = jl_alloc_array_1d(jl_array_int32_type, 0);
+    s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_int32_type, 0);
+    htable_new(&s.callers_with_edges, 0);
+    jl_value_t **const*const tags = get_tags(); // worklist == NULL ? get_tags() : NULL;
+
+    arraylist_t gvars;
+    arraylist_t external_fns;
+    arraylist_new(&gvars, 0);
+    arraylist_new(&external_fns, 0);
+    if (native_functions) {
+        jl_get_llvm_gvs(native_functions, &gvars);
+        jl_get_llvm_external_fns(native_functions, &external_fns);
+    }
+
+    if (worklist == NULL) {
+        // empty!(Core.ARGS)
+        if (jl_core_module != NULL) {
+            jl_array_t *args = (jl_array_t*)jl_get_global(jl_core_module, jl_symbol("ARGS"));
+            if (args != NULL) {
+                jl_array_del_end(args, jl_array_len(args));
+            }
         }
     }
-
-    jl_idtable_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("IdDict")) : NULL;
-    jl_idtable_typename = jl_base_module ? ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_idtable_type))->name : NULL;
     jl_bigint_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("BigInt")) : NULL;
     if (jl_bigint_type) {
         gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")),
@@ -1857,136 +2628,369 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED
             jl_docmeta_sym = (jl_sym_t*)jl_get_global((jl_module_t*)docs, jl_symbol("META"));
         }
     }
+    jl_genericmemory_t *global_roots_list = NULL;
+    jl_genericmemory_t *global_roots_keyset = NULL;
 
     { // step 1: record values (recursively) that need to go in the image
         size_t i;
-        for (i = 0; tags[i] != NULL; i++) {
-            jl_value_t *tag = *tags[i];
-            jl_serialize_value(&s, tag);
+        if (worklist == NULL) {
+            for (i = 0; tags[i] != NULL; i++) {
+                jl_value_t *tag = *tags[i];
+                jl_queue_for_serialization(&s, tag);
+            }
+            jl_queue_for_serialization(&s, s.ptls->root_task->tls);
         }
-        jl_serialize_reachable(&s);
-        // step 1.1: check for values only found in the generated code
-        arraylist_t typenames;
-        arraylist_new(&typenames, 0);
-        for (i = 0; i < backref_table.size; i += 2) {
-            jl_typename_t *tn = (jl_typename_t*)backref_table.table[i];
-            if (tn == HT_NOTFOUND || !jl_is_typename(tn))
-                continue;
-            arraylist_push(&typenames, tn);
+        else {
+            // To ensure we don't have to manually update the list, go through all tags and queue any that are not otherwise
+            // judged to be externally-linked
+            htable_new(&external_objects, NUM_TAGS);
+            for (size_t i = 0; tags[i] != NULL; i++) {
+                jl_value_t *tag = *tags[i];
+                ptrhash_put(&external_objects, tag, tag);
+            }
+            // Queue the worklist itself as the first item we serialize
+            jl_queue_for_serialization(&s, worklist);
+            jl_queue_for_serialization(&s, jl_module_init_order);
+            // Classify the CodeInstances with respect to their need for validation
+            classify_callers(&s.callers_with_edges, edges);
         }
-        for (i = 0; i < typenames.len; i++) {
-            jl_typename_t *tn = (jl_typename_t*)typenames.items[i];
-            jl_scan_type_cache_gv(&s, tn->cache);
-            jl_scan_type_cache_gv(&s, tn->linearcache);
+        // step 1.1: as needed, serialize the data needed for insertion into the running system
+        if (extext_methods) {
+            assert(ext_targets);
+            assert(edges);
+            // Queue method extensions
+            jl_queue_for_serialization(&s, extext_methods);
+            // Queue the new specializations
+            jl_queue_for_serialization(&s, new_specializations);
+            // Queue the new roots
+            jl_queue_for_serialization(&s, method_roots_list);
+            // Queue the edges
+            jl_queue_for_serialization(&s, ext_targets);
+            jl_queue_for_serialization(&s, edges);
         }
         jl_serialize_reachable(&s);
-        // step 1.2: prune (garbage collect) some special weak references from
-        // built-in type caches
-        for (i = 0; i < typenames.len; i++) {
-            jl_typename_t *tn = (jl_typename_t*)typenames.items[i];
-            jl_prune_type_cache_hash(tn->cache);
-            jl_prune_type_cache_linear(tn->linearcache);
+        // step 1.2: ensure all gvars are part of the sysimage too
+        record_gvars(&s, &gvars);
+        record_external_fns(&s, &external_fns);
+        jl_serialize_reachable(&s);
+        // step 1.3: prune (garbage collect) special weak references from the jl_global_roots_list
+        if (worklist == NULL) {
+            global_roots_list = jl_alloc_memory_any(0);
+            global_roots_keyset = jl_alloc_memory_any(0);
+            for (size_t i = 0; i < jl_global_roots_list->length; i++) {
+                jl_value_t *val = jl_genericmemory_ptr_ref(jl_global_roots_list, i);
+                if (val && ptrhash_get(&serialization_order, val) != HT_NOTFOUND) {
+                    ssize_t idx;
+                    global_roots_list = jl_idset_put_key(global_roots_list, val, &idx);
+                    global_roots_keyset = jl_idset_put_idx(global_roots_list, global_roots_keyset, idx);
+                }
+            }
+            jl_queue_for_serialization(&s, global_roots_list);
+            jl_queue_for_serialization(&s, global_roots_keyset);
+            jl_serialize_reachable(&s);
+        }
+        // step 1.4: prune (garbage collect) some special weak references from
+        // built-in type caches too
+        for (i = 0; i < serialization_queue.len; i++) {
+            jl_typename_t *tn = (jl_typename_t*)serialization_queue.items[i];
+            if (jl_is_typename(tn)) {
+                jl_atomic_store_relaxed(&tn->cache,
+                    jl_prune_type_cache_hash(jl_atomic_load_relaxed(&tn->cache)));
+                jl_gc_wb(tn, jl_atomic_load_relaxed(&tn->cache));
+                jl_prune_type_cache_linear(jl_atomic_load_relaxed(&tn->linearcache));
+            }
         }
-        arraylist_free(&typenames);
     }
 
+    uint32_t external_fns_begin = 0;
     { // step 2: build all the sysimg sections
-        write_padding(&sysimg, sizeof(uint32_t));
+        write_padding(&sysimg, sizeof(uintptr_t));
         jl_write_values(&s);
-        jl_write_relocations(&s);
-        jl_write_gv_syms(&s, jl_get_root_symbol());
-        jl_write_gv_tagrefs(&s);
+        external_fns_begin = write_gvars(&s, &gvars, &external_fns);
     }
 
-    if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET) ||
-        const_data.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)*sizeof(void*)) {
-        jl_printf(JL_STDERR, "ERROR: system image too large\n");
+    // This ensures that we can use the low bit of addresses for
+    // identifying end pointers in gc's eytzinger search.
+    write_padding(&sysimg, 4 - (sysimg.size % 4));
+    write_padding(&const_data, 4 - (const_data.size % 4));
+
+    if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
+        jl_printf(
+            JL_STDERR,
+            "ERROR: system image too large: sysimg.size is 0x%" PRIxPTR " but the limit is 0x%" PRIxPTR "\n",
+            (uintptr_t)sysimg.size,
+            ((uintptr_t)1 << RELOC_TAG_OFFSET)
+        );
+        jl_exit(1);
+    }
+    if (const_data.size / sizeof(void*) > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
+        jl_printf(
+            JL_STDERR,
+            "ERROR: system image too large: const_data.size is 0x%" PRIxPTR " but the limit is 0x%" PRIxPTR "\n",
+            (uintptr_t)const_data.size,
+            ((uintptr_t)1 << RELOC_TAG_OFFSET)*sizeof(void*)
+        );
         jl_exit(1);
     }
+    htable_free(&s.callers_with_edges);
 
     // step 3: combine all of the sections into one file
-    write_uint32(f, sysimg.size - sizeof(uint32_t));
-    ios_seek(&sysimg, sizeof(uint32_t));
+    assert(ios_pos(f) % JL_CACHE_BYTE_ALIGNMENT == 0);
+    ssize_t sysimg_offset = ios_pos(f);
+    write_uint(f, sysimg.size - sizeof(uintptr_t));
+    ios_seek(&sysimg, sizeof(uintptr_t));
     ios_copyall(f, &sysimg);
+    size_t sysimg_size = s.s->size;
+    assert(ios_pos(f) - sysimg_offset == sysimg_size);
     ios_close(&sysimg);
 
-    write_uint32(f, const_data.size);
+    write_uint(f, const_data.size);
     // realign stream to max-alignment for data
-    write_padding(f, LLT_ALIGN(ios_pos(f), 16) - ios_pos(f));
+    write_padding(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(f));
     ios_seek(&const_data, 0);
     ios_copyall(f, &const_data);
     ios_close(&const_data);
 
-    write_uint32(f, symbols.size);
+    write_uint(f, symbols.size);
+    write_padding(f, LLT_ALIGN(ios_pos(f), 8) - ios_pos(f));
     ios_seek(&symbols, 0);
     ios_copyall(f, &symbols);
     ios_close(&symbols);
 
-    write_uint32(f, relocs.size);
+    // Prepare and write the relocations sections, now that the rest of the image is laid out
+    char *base = &f->buf[0];
+    jl_finish_relocs(base + sysimg_offset, sysimg_size, &s.gctags_list);
+    jl_finish_relocs(base + sysimg_offset, sysimg_size, &s.relocs_list);
+    jl_write_offsetlist(s.relocs, sysimg_size, &s.gctags_list);
+    jl_write_offsetlist(s.relocs, sysimg_size, &s.relocs_list);
+    jl_write_offsetlist(s.relocs, sysimg_size, &s.memowner_list);
+    jl_write_offsetlist(s.relocs, sysimg_size, &s.memref_list);
+    if (s.incremental) {
+        jl_write_arraylist(s.relocs, &s.uniquing_types);
+        jl_write_arraylist(s.relocs, &s.uniquing_objs);
+        jl_write_arraylist(s.relocs, &s.fixup_types);
+    }
+    jl_write_arraylist(s.relocs, &s.fixup_objs);
+    write_uint(f, relocs.size);
+    write_padding(f, LLT_ALIGN(ios_pos(f), 8) - ios_pos(f));
     ios_seek(&relocs, 0);
     ios_copyall(f, &relocs);
     ios_close(&relocs);
 
-    write_uint32(f, gvar_record.size);
+    write_uint(f, gvar_record.size);
+    write_padding(f, LLT_ALIGN(ios_pos(f), 8) - ios_pos(f));
     ios_seek(&gvar_record, 0);
     ios_copyall(f, &gvar_record);
     ios_close(&gvar_record);
 
-    write_uint32(f, fptr_record.size);
+    write_uint(f, fptr_record.size);
+    write_padding(f, LLT_ALIGN(ios_pos(f), 8) - ios_pos(f));
     ios_seek(&fptr_record, 0);
     ios_copyall(f, &fptr_record);
     ios_close(&fptr_record);
 
     { // step 4: record locations of special roots
+        write_padding(f, LLT_ALIGN(ios_pos(f), 8) - ios_pos(f));
         s.s = f;
-        size_t i;
-        for (i = 0; tags[i] != NULL; i++) {
-            jl_value_t *tag = *tags[i];
-            jl_write_value(&s, tag);
+        if (worklist == NULL) {
+            size_t i;
+            for (i = 0; tags[i] != NULL; i++) {
+                jl_value_t *tag = *tags[i];
+                jl_write_value(&s, tag);
+            }
+            jl_write_value(&s, global_roots_list);
+            jl_write_value(&s, global_roots_keyset);
+            jl_write_value(&s, s.ptls->root_task->tls);
+            write_uint32(f, jl_get_gs_ctr());
+            write_uint(f, jl_atomic_load_acquire(&jl_world_counter));
+            write_uint(f, jl_typeinf_world);
+        }
+        else {
+            jl_write_value(&s, worklist);
+            // save module initialization order
+            if (jl_module_init_order != NULL) {
+                size_t i, l = jl_array_len(jl_module_init_order);
+                for (i = 0; i < l; i++) {
+                    // verify that all these modules were saved
+                    assert(ptrhash_get(&serialization_order, jl_array_ptr_ref(jl_module_init_order, i)) != HT_NOTFOUND);
+                }
+            }
+            jl_write_value(&s, jl_module_init_order);
+            jl_write_value(&s, extext_methods);
+            jl_write_value(&s, new_specializations);
+            jl_write_value(&s, method_roots_list);
+            jl_write_value(&s, ext_targets);
+            jl_write_value(&s, edges);
         }
-        jl_write_value(&s, s.ptls->root_task->tls);
-        write_uint32(f, jl_get_gs_ctr());
-        write_uint32(f, jl_atomic_load_acquire(&jl_world_counter));
-        write_uint32(f, jl_typeinf_world);
-        jl_finalize_serializer(&s, &reinit_list);
-        jl_finalize_serializer(&s, &ccallable_list);
+        write_uint32(f, jl_array_len(s.link_ids_gctags));
+        ios_write(f, (char*)jl_array_data(s.link_ids_gctags, uint32_t), jl_array_len(s.link_ids_gctags) * sizeof(uint32_t));
+        write_uint32(f, jl_array_len(s.link_ids_relocs));
+        ios_write(f, (char*)jl_array_data(s.link_ids_relocs, uint32_t), jl_array_len(s.link_ids_relocs) * sizeof(uint32_t));
+        write_uint32(f, jl_array_len(s.link_ids_gvars));
+        ios_write(f, (char*)jl_array_data(s.link_ids_gvars, uint32_t), jl_array_len(s.link_ids_gvars) * sizeof(uint32_t));
+        write_uint32(f, jl_array_len(s.link_ids_external_fnvars));
+        ios_write(f, (char*)jl_array_data(s.link_ids_external_fnvars, uint32_t), jl_array_len(s.link_ids_external_fnvars) * sizeof(uint32_t));
+        write_uint32(f, external_fns_begin);
+        jl_write_arraylist(s.s, &s.ccallable_list);
     }
 
     assert(object_worklist.len == 0);
     arraylist_free(&object_worklist);
+    arraylist_free(&serialization_queue);
     arraylist_free(&layout_table);
-    arraylist_free(&reinit_list);
-    arraylist_free(&ccallable_list);
+    arraylist_free(&s.ccallable_list);
+    arraylist_free(&s.memowner_list);
+    arraylist_free(&s.memref_list);
     arraylist_free(&s.relocs_list);
     arraylist_free(&s.gctags_list);
+    arraylist_free(&gvars);
+    arraylist_free(&external_fns);
     htable_free(&field_replace);
-    jl_cleanup_serializer2();
+    if (worklist)
+        htable_free(&external_objects);
+    htable_free(&serialization_order);
+    htable_free(&unique_ready);
+    htable_free(&nullptrs);
+    htable_free(&symbol_table);
+    htable_free(&fptr_to_id);
+    nsym_tag = 0;
 
     jl_gc_enable(en);
 }
 
-JL_DLLEXPORT ios_t *jl_create_system_image(void *_native_data)
+static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_array_t *mod_array, jl_array_t **udeps, int64_t *srctextpos, int64_t *checksumpos)
 {
-    ios_t *f = (ios_t*)malloc_s(sizeof(ios_t));
-    ios_mem(f, 0);
-    native_functions = _native_data;
-    jl_save_system_image_to_stream(f);
-    return f;
+    assert(jl_precompile_toplevel_module == NULL);
+    jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1);
+
+    *checksumpos = write_header(f, 0);
+    write_uint8(f, jl_cache_flags());
+    // write description of contents (name, uuid, buildid)
+    write_worklist_for_header(f, worklist);
+    // Determine unique (module, abspath, fsize, hash, mtime) dependencies for the files defining modules in the worklist
+    // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header
+    // (abspath will be converted to a relocateable @depot path before writing, cf. Base.replace_depot_path).
+    // Also write Preferences.
+    // last word of the dependency list is the end of the data / start of the srctextpos
+    *srctextpos = write_dependency_list(f, worklist, udeps);  // srctextpos: position of srctext entry in header index (update later)
+    // write description of requirements for loading (modules that must be pre-loaded if initialization is to succeed)
+    // this can return errors during deserialize,
+    // best to keep it early (before any actual initialization)
+    write_mod_list(f, mod_array);
 }
 
-JL_DLLEXPORT size_t ios_write_direct(ios_t *dest, ios_t *src);
-JL_DLLEXPORT void jl_save_system_image(const char *fname)
+JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *worklist, bool_t emit_split,
+                                         ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos)
 {
-    ios_t f;
-    if (ios_file(&f, fname, 1, 1, 1, 1) == NULL) {
-        jl_errorf("cannot open system image file \"%s\" for writing", fname);
+    jl_gc_collect(JL_GC_FULL);
+    jl_gc_collect(JL_GC_INCREMENTAL);   // sweep finalizers
+    JL_TIMING(SYSIMG_DUMP, SYSIMG_DUMP);
+
+    // iff emit_split
+    // write header and src_text to one file f/s
+    // write systemimg to a second file ff/z
+    jl_task_t *ct = jl_current_task;
+    ios_t *f = (ios_t*)malloc_s(sizeof(ios_t));
+    ios_mem(f, 0);
+
+    ios_t *ff = NULL;
+    if (emit_split) {
+        ff = (ios_t*)malloc_s(sizeof(ios_t));
+        ios_mem(ff, 0);
+    } else {
+        ff = f;
+    }
+
+    jl_array_t *mod_array = NULL, *extext_methods = NULL, *new_specializations = NULL;
+    jl_array_t *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL;
+    int64_t checksumpos = 0;
+    int64_t checksumpos_ff = 0;
+    int64_t datastartpos = 0;
+    JL_GC_PUSH6(&mod_array, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges);
+
+    if (worklist) {
+        mod_array = jl_get_loaded_modules();  // __toplevel__ modules loaded in this session (from Base.loaded_modules_array)
+        // Generate _native_data`
+        if (_native_data != NULL) {
+            jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist),
+                                          &extext_methods, &new_specializations, NULL, NULL, NULL);
+            jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1);
+            *_native_data = jl_precompile_worklist(worklist, extext_methods, new_specializations);
+            jl_precompile_toplevel_module = NULL;
+            extext_methods = NULL;
+            new_specializations = NULL;
+        }
+        jl_write_header_for_incremental(f, worklist, mod_array, udeps, srctextpos, &checksumpos);
+        if (emit_split) {
+            checksumpos_ff = write_header(ff, 1);
+            write_uint8(ff, jl_cache_flags());
+            write_mod_list(ff, mod_array);
+        }
+        else {
+            checksumpos_ff = checksumpos;
+        }
     }
-    JL_SIGATOMIC_BEGIN();
-    jl_save_system_image_to_stream(&f);
-    ios_close(&f);
-    JL_SIGATOMIC_END();
+    else if (_native_data != NULL) {
+        *_native_data = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL);
+    }
+
+    // Make sure we don't run any Julia code concurrently after this point
+    // since it will invalidate our serialization preparations
+    jl_gc_enable_finalizers(ct, 0);
+    assert((ct->reentrant_timing & 0b1110) == 0);
+    ct->reentrant_timing |= 0b1000;
+    if (worklist) {
+        jl_prepare_serialization_data(mod_array, newly_inferred, jl_worklist_key(worklist),
+                                      &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges);
+        if (!emit_split) {
+            write_int32(f, 0); // No clone_targets
+            write_padding(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(f));
+        }
+        else {
+            write_padding(ff, LLT_ALIGN(ios_pos(ff), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(ff));
+        }
+        datastartpos = ios_pos(ff);
+    }
+    if (_native_data != NULL)
+        native_functions = *_native_data;
+    jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges);
+    if (_native_data != NULL)
+        native_functions = NULL;
+    // make sure we don't run any Julia code concurrently before this point
+    // Re-enable running julia code for postoutput hooks, atexit, etc.
+    jl_gc_enable_finalizers(ct, 1);
+    ct->reentrant_timing &= ~0b1000u;
+    jl_precompile_toplevel_module = NULL;
+
+    if (worklist) {
+        // Go back and update the checksum in the header
+        int64_t dataendpos = ios_pos(ff);
+        uint32_t checksum = jl_crc32c(0, &ff->buf[datastartpos], dataendpos - datastartpos);
+        ios_seek(ff, checksumpos_ff);
+        write_uint64(ff, checksum | ((uint64_t)0xfafbfcfd << 32));
+        write_uint64(ff, datastartpos);
+        write_uint64(ff, dataendpos);
+        ios_seek(ff, dataendpos);
+
+        // Write the checksum to the split header if necessary
+        if (emit_split) {
+            int64_t cur = ios_pos(f);
+            ios_seek(f, checksumpos);
+            write_uint64(f, checksum | ((uint64_t)0xfafbfcfd << 32));
+            ios_seek(f, cur);
+            // Next we will write the clone_targets and afterwards the srctext
+        }
+    }
+
+    JL_GC_POP();
+    *s = f;
+    if (emit_split)
+        *z = ff;
+    return;
 }
 
+JL_DLLEXPORT size_t ios_write_direct(ios_t *dest, ios_t *src);
+
 // Takes in a path of the form "usr/lib/julia/sys.so" (jl_restore_system_image should be passed the same string)
 JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname)
 {
@@ -2005,22 +3009,41 @@ JL_DLLEXPORT void jl_preload_sysimg_so(const char *fname)
 JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
 {
     void* *jl_RTLD_DEFAULT_handle_pointer;
-    int symbol_found = jl_dlsym(handle, "jl_RTLD_DEFAULT_handle_pointer", (void **)&jl_RTLD_DEFAULT_handle_pointer, 0);
-    if (!symbol_found || (void*)&jl_RTLD_DEFAULT_handle != *jl_RTLD_DEFAULT_handle_pointer)
-        jl_error("System image file failed consistency check: maybe opened the wrong version?");
+    if (handle != jl_RTLD_DEFAULT_handle) {
+        int symbol_found = jl_dlsym(handle, "jl_RTLD_DEFAULT_handle_pointer", (void **)&jl_RTLD_DEFAULT_handle_pointer, 0);
+        if (!symbol_found || (void*)&jl_RTLD_DEFAULT_handle != *jl_RTLD_DEFAULT_handle_pointer)
+            jl_error("System image file failed consistency check: maybe opened the wrong version?");
+    }
     if (jl_options.cpu_target == NULL)
         jl_options.cpu_target = "native";
     jl_sysimg_handle = handle;
-    sysimg_fptrs = jl_init_processor_sysimg(handle);
+    sysimage = jl_init_processor_sysimg(handle);
 }
 
-static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED
+#ifndef JL_NDEBUG
+// skip the performance optimizations of jl_types_equal and just use subtyping directly
+// one of these types is invalid - that's why we're doing the recache type operation
+// static int jl_invalid_types_equal(jl_datatype_t *a, jl_datatype_t *b)
+// {
+//     return jl_subtype((jl_value_t*)a, (jl_value_t*)b) && jl_subtype((jl_value_t*)b, (jl_value_t*)a);
+// }
+#endif
+
+extern void rebuild_image_blob_tree(void);
+extern void export_jl_small_typeof(void);
+
+static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum,
+                                /* outputs */    jl_array_t **restored,         jl_array_t **init_order,
+                                                 jl_array_t **extext_methods,
+                                                 jl_array_t **new_specializations, jl_array_t **method_roots_list,
+                                                 jl_array_t **ext_targets, jl_array_t **edges,
+                                                 char **base, arraylist_t *ccallable_list, pkgcachesizes *cachesizes) JL_GC_DISABLED
 {
-    JL_TIMING(SYSIMG_LOAD);
     int en = jl_gc_enable(0);
-    jl_init_serializer2(0);
     ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record;
-    jl_serializer_state s;
+    jl_serializer_state s = {0};
+    s.incremental = restored != NULL; // jl_linkage_blobs.len > 0;
+    s.image = image;
     s.s = NULL;
     s.const_data = &const_data;
     s.symbols = &symbols;
@@ -2028,60 +3051,124 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED
     s.gvar_record = &gvar_record;
     s.fptr_record = &fptr_record;
     s.ptls = jl_current_task->ptls;
-    arraylist_new(&s.relocs_list, 0);
-    arraylist_new(&s.gctags_list, 0);
     jl_value_t **const*const tags = get_tags();
+    htable_t new_dt_objs;
+    htable_new(&new_dt_objs, 0);
+    arraylist_new(&deser_sym, 0);
 
     // step 1: read section map
     assert(ios_pos(f) == 0 && f->bm == bm_mem);
-    size_t sizeof_sysimg = read_uint32(f);
-    ios_static_buffer(&sysimg, f->buf, sizeof_sysimg + sizeof(uint32_t));
-    ios_skip(f, sizeof_sysimg);
+    size_t sizeof_sysdata = read_uint(f);
+    ios_static_buffer(&sysimg, f->buf, sizeof_sysdata + sizeof(uintptr_t));
+    ios_skip(f, sizeof_sysdata);
 
-    size_t sizeof_constdata = read_uint32(f);
+    size_t sizeof_constdata = read_uint(f);
     // realign stream to max-alignment for data
-    ios_seek(f, LLT_ALIGN(ios_pos(f), 16));
+    ios_seek(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT));
     ios_static_buffer(&const_data, f->buf + f->bpos, sizeof_constdata);
     ios_skip(f, sizeof_constdata);
 
-    size_t sizeof_symbols = read_uint32(f);
+    size_t sizeof_sysimg = f->bpos;
+
+    size_t sizeof_symbols = read_uint(f);
+    ios_seek(f, LLT_ALIGN(ios_pos(f), 8));
     ios_static_buffer(&symbols, f->buf + f->bpos, sizeof_symbols);
     ios_skip(f, sizeof_symbols);
 
-    size_t sizeof_relocations = read_uint32(f);
+    size_t sizeof_relocations = read_uint(f);
+    ios_seek(f, LLT_ALIGN(ios_pos(f), 8));
     assert(!ios_eof(f));
     ios_static_buffer(&relocs, f->buf + f->bpos, sizeof_relocations);
     ios_skip(f, sizeof_relocations);
 
-    size_t sizeof_gvar_record = read_uint32(f);
+    size_t sizeof_gvar_record = read_uint(f);
+    ios_seek(f, LLT_ALIGN(ios_pos(f), 8));
     assert(!ios_eof(f));
     ios_static_buffer(&gvar_record, f->buf + f->bpos, sizeof_gvar_record);
     ios_skip(f, sizeof_gvar_record);
 
-    size_t sizeof_fptr_record = read_uint32(f);
+    size_t sizeof_fptr_record = read_uint(f);
+    ios_seek(f, LLT_ALIGN(ios_pos(f), 8));
     assert(!ios_eof(f));
     ios_static_buffer(&fptr_record, f->buf + f->bpos, sizeof_fptr_record);
     ios_skip(f, sizeof_fptr_record);
 
     // step 2: get references to special values
+    ios_seek(f, LLT_ALIGN(ios_pos(f), 8));
+    assert(!ios_eof(f));
     s.s = f;
-    size_t i;
-    for (i = 0; tags[i] != NULL; i++) {
-        jl_value_t **tag = tags[i];
-        *tag = jl_read_value(&s);
-    }
-    // set typeof extra-special values now that we have the type set by tags above
-    jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header;
-    jl_astaggedvalue(jl_nothing)->header = (uintptr_t)jl_nothing_type | jl_astaggedvalue(jl_nothing)->header;
-    s.ptls->root_task->tls = jl_read_value(&s);
-    jl_gc_wb(s.ptls->root_task, s.ptls->root_task->tls);
-    jl_init_int32_int64_cache();
-    jl_init_box_caches();
-
-    uint32_t gs_ctr = read_uint32(f);
-    jl_atomic_store_release(&jl_world_counter, read_uint32(f));
-    jl_typeinf_world = read_uint32(f);
-    jl_set_gs_ctr(gs_ctr);
+    uintptr_t offset_restored = 0, offset_init_order = 0, offset_extext_methods = 0, offset_new_specializations = 0, offset_method_roots_list = 0;
+    uintptr_t offset_ext_targets = 0, offset_edges = 0;
+    if (!s.incremental) {
+        size_t i;
+        for (i = 0; tags[i] != NULL; i++) {
+            jl_value_t **tag = tags[i];
+            *tag = jl_read_value(&s);
+        }
+#define XX(name) \
+        ijl_small_typeof[(jl_##name##_tag << 4) / sizeof(*ijl_small_typeof)] = jl_##name##_type;
+        JL_SMALL_TYPEOF(XX)
+#undef XX
+        export_jl_small_typeof();
+        jl_global_roots_list = (jl_genericmemory_t*)jl_read_value(&s);
+        jl_global_roots_keyset = (jl_genericmemory_t*)jl_read_value(&s);
+        // set typeof extra-special values now that we have the type set by tags above
+        jl_astaggedvalue(jl_nothing)->header = (uintptr_t)jl_nothing_type | jl_astaggedvalue(jl_nothing)->header;
+        s.ptls->root_task->tls = jl_read_value(&s);
+        jl_gc_wb(s.ptls->root_task, s.ptls->root_task->tls);
+        jl_init_int32_int64_cache();
+        jl_init_box_caches();
+
+        uint32_t gs_ctr = read_uint32(f);
+        jl_atomic_store_release(&jl_world_counter, read_uint(f));
+        jl_typeinf_world = read_uint(f);
+        jl_set_gs_ctr(gs_ctr);
+    }
+    else {
+        jl_atomic_fetch_add(&jl_world_counter, 1);
+        offset_restored = jl_read_offset(&s);
+        offset_init_order = jl_read_offset(&s);
+        offset_extext_methods = jl_read_offset(&s);
+        offset_new_specializations = jl_read_offset(&s);
+        offset_method_roots_list = jl_read_offset(&s);
+        offset_ext_targets = jl_read_offset(&s);
+        offset_edges = jl_read_offset(&s);
+    }
+    s.buildid_depmods_idxs = depmod_to_imageidx(depmods);
+    size_t nlinks_gctags = read_uint32(f);
+    if (nlinks_gctags > 0) {
+        s.link_ids_gctags = jl_alloc_array_1d(jl_array_int32_type, nlinks_gctags);
+        ios_read(f, (char*)jl_array_data(s.link_ids_gctags, uint32_t), nlinks_gctags * sizeof(uint32_t));
+    }
+    size_t nlinks_relocs = read_uint32(f);
+    if (nlinks_relocs > 0) {
+        s.link_ids_relocs = jl_alloc_array_1d(jl_array_int32_type, nlinks_relocs);
+        ios_read(f, (char*)jl_array_data(s.link_ids_relocs, uint32_t), nlinks_relocs * sizeof(uint32_t));
+    }
+    size_t nlinks_gvars = read_uint32(f);
+    if (nlinks_gvars > 0) {
+        s.link_ids_gvars = jl_alloc_array_1d(jl_array_int32_type, nlinks_gvars);
+        ios_read(f, (char*)jl_array_data(s.link_ids_gvars, uint32_t), nlinks_gvars * sizeof(uint32_t));
+    }
+    size_t nlinks_external_fnvars = read_uint32(f);
+    if (nlinks_external_fnvars > 0) {
+        s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_int32_type, nlinks_external_fnvars);
+        ios_read(f, (char*)jl_array_data(s.link_ids_external_fnvars, uint32_t), nlinks_external_fnvars * sizeof(uint32_t));
+    }
+    uint32_t external_fns_begin = read_uint32(f);
+    jl_read_arraylist(s.s, ccallable_list ? ccallable_list : &s.ccallable_list);
+    if (s.incremental) {
+        assert(restored && init_order && extext_methods && new_specializations && method_roots_list && ext_targets && edges);
+        *restored = (jl_array_t*)jl_delayed_reloc(&s, offset_restored);
+        *init_order = (jl_array_t*)jl_delayed_reloc(&s, offset_init_order);
+        *extext_methods = (jl_array_t*)jl_delayed_reloc(&s, offset_extext_methods);
+        *new_specializations = (jl_array_t*)jl_delayed_reloc(&s, offset_new_specializations);
+        *method_roots_list = (jl_array_t*)jl_delayed_reloc(&s, offset_method_roots_list);
+        *ext_targets = (jl_array_t*)jl_delayed_reloc(&s, offset_ext_targets);
+        *edges = (jl_array_t*)jl_delayed_reloc(&s, offset_edges);
+        if (!*new_specializations)
+            *new_specializations = jl_alloc_vec_any(0);
+    }
     s.s = NULL;
 
     // step 3: apply relocations
@@ -2089,24 +3176,307 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED
     jl_read_symbols(&s);
     ios_close(&symbols);
 
-    sysimg_base = &sysimg.buf[0];
-    sysimg_relocs = &relocs.buf[0];
-    jl_gc_set_permalloc_region((void*)sysimg_base, (void*)(sysimg_base + sysimg.size));
+    char *image_base = (char*)&sysimg.buf[0];
+    reloc_t *relocs_base = (reloc_t*)&relocs.buf[0];
+    if (base)
+        *base = image_base;
 
     s.s = &sysimg;
-    jl_read_relocations(&s, GC_OLD_MARKED); // gctags
+    jl_read_reloclist(&s, s.link_ids_gctags, GC_OLD | GC_IN_IMAGE); // gctags
     size_t sizeof_tags = ios_pos(&relocs);
     (void)sizeof_tags;
-    jl_read_relocations(&s, 0); // general relocs
+    jl_read_reloclist(&s, s.link_ids_relocs, 0); // general relocs
+    jl_read_memreflist(&s); // memowner_list relocs (must come before memref_list reads the pointers and after general relocs computes the pointers)
+    jl_read_memreflist(&s); // memref_list relocs
+    // s.link_ids_gvars will be processed in `jl_update_all_gvars`
+    // s.link_ids_external_fns will be processed in `jl_update_all_gvars`
+    jl_update_all_gvars(&s, image, external_fns_begin); // gvars relocs
+    if (s.incremental) {
+        jl_read_arraylist(s.relocs, &s.uniquing_types);
+        jl_read_arraylist(s.relocs, &s.uniquing_objs);
+        jl_read_arraylist(s.relocs, &s.fixup_types);
+    }
+    else {
+        arraylist_new(&s.uniquing_types, 0);
+        arraylist_new(&s.uniquing_objs, 0);
+        arraylist_new(&s.fixup_types, 0);
+    }
+    jl_read_arraylist(s.relocs, &s.fixup_objs);
+    // Perform the uniquing of objects that we don't "own" and consequently can't promise
+    // weren't created by some other package before this one got loaded:
+    // - iterate through all objects that need to be uniqued. The first encounter has to be the
+    //   "reconstructable blob". We either look up the object (if something has created it previously)
+    //   or construct it for the first time, crucially outside the pointer range of any pkgimage.
+    //   This ensures it stays unique-worthy.
+    // - after we've stored the address of the "real" object (which for convenience we do among the data
+    //   written to allow lookup/reconstruction), then we have to update references to that "reconstructable blob":
+    //   instead of performing the relocation within the package image, we instead (re)direct all references
+    //   to the external object.
+    arraylist_t cleanup_list;
+    arraylist_new(&cleanup_list, 0);
+    arraylist_t delay_list;
+    arraylist_new(&delay_list, 0);
+    for (size_t i = 0; i < s.uniquing_types.len; i++) {
+        uintptr_t item = (uintptr_t)s.uniquing_types.items[i];
+        // check whether we are operating on the typetag
+        // (needing to ignore GC bits) or a regular field
+        int tag = (item & 1) == 1;
+        // check whether this is a gvar index
+        int gvar = (item & 2) == 2;
+        item &= ~(uintptr_t)3;
+        uintptr_t *pfld;
+        jl_value_t **obj, *newobj;
+        if (gvar) {
+            if (image->gvars_base == NULL)
+                continue;
+            item >>= 2;
+            assert(item < s.gvar_record->size / sizeof(reloc_t));
+            pfld = sysimg_gvars(image->gvars_base, image->gvars_offsets, item);
+            obj = *(jl_value_t***)pfld;
+            assert(tag == 0);
+        }
+        else {
+            pfld = (uintptr_t*)(image_base + item);
+            if (tag)
+                obj = (jl_value_t**)jl_typeof(jl_valueof(pfld));
+            else
+                obj = *(jl_value_t***)pfld;
+            if ((char*)obj > (char*)pfld) {
+                assert(tag == 0);
+                arraylist_push(&delay_list, pfld);
+                arraylist_push(&delay_list, obj);
+                ptrhash_put(&new_dt_objs, (void*)obj, obj); // mark obj as invalid
+                *pfld = (uintptr_t)NULL;
+                continue;
+            }
+        }
+        uintptr_t otyp = jl_typetagof(obj);   // the original type of the object that was written here
+        assert(image_base < (char*)obj && (char*)obj <= image_base + sizeof_sysimg);
+        if (otyp == jl_datatype_tag << 4) {
+            jl_datatype_t *dt = (jl_datatype_t*)obj[0], *newdt;
+            if (jl_is_datatype(dt)) {
+                newdt = dt; // already done
+            }
+            else {
+                dt = (jl_datatype_t*)obj;
+                arraylist_push(&cleanup_list, (void*)obj);
+                ptrhash_remove(&new_dt_objs, (void*)obj); // unmark obj as invalid before must_be_new_dt
+                if (must_be_new_dt((jl_value_t*)dt, &new_dt_objs, image_base, sizeof_sysimg))
+                    newdt = NULL;
+                else
+                    newdt = jl_lookup_cache_type_(dt);
+                if (newdt == NULL) {
+                    // make a non-owned copy of obj so we don't accidentally
+                    // assume this is the unique copy later
+                    newdt = jl_new_uninitialized_datatype();
+                    jl_astaggedvalue(newdt)->bits.gc = GC_OLD;
+                    // leave most fields undefined for now, but we may need instance later,
+                    // and we overwrite the name field (field 0) now so preserve it too
+                    if (dt->instance) {
+                        if (dt->instance == jl_nothing)
+                            dt->instance = jl_gc_permobj(0, newdt);
+                        newdt->instance = dt->instance;
+                    }
+                    static_assert(offsetof(jl_datatype_t, name) == 0, "");
+                    newdt->name = dt->name;
+                    ptrhash_put(&new_dt_objs, (void*)newdt, dt);
+                }
+                else {
+                    assert(newdt->hash == dt->hash);
+                }
+                obj[0] = (jl_value_t*)newdt;
+            }
+            newobj = (jl_value_t*)newdt;
+        }
+        else {
+            assert(!(image_base < (char*)otyp && (char*)otyp <= image_base + sizeof_sysimg));
+            newobj = ((jl_datatype_t*)otyp)->instance;
+            assert(newobj && newobj != jl_nothing);
+            arraylist_push(&cleanup_list, (void*)obj);
+        }
+        if (tag)
+            *pfld = (uintptr_t)newobj | GC_OLD | GC_IN_IMAGE;
+        else
+            *pfld = (uintptr_t)newobj;
+        assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
+        assert(jl_typetagis(obj, otyp));
+    }
+    // A few fields (reached via super) might be self-recursive. This is rare, but handle them now.
+    // They cannot be instances though, since the type must fully exist before the singleton field can be allocated
+    for (size_t i = 0; i < delay_list.len; ) {
+        uintptr_t *pfld = (uintptr_t*)delay_list.items[i++];
+        jl_value_t **obj = (jl_value_t **)delay_list.items[i++];
+        assert(jl_is_datatype(obj));
+        jl_datatype_t *dt = (jl_datatype_t*)obj[0];
+        assert(jl_is_datatype(dt));
+        jl_value_t *newobj = (jl_value_t*)dt;
+        *pfld = (uintptr_t)newobj;
+        assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
+    }
+    arraylist_free(&delay_list);
+    // now that all the fields of dt are assigned and unique, copy them into
+    // their final newdt memory location: this ensures we do not accidentally
+    // think this pkg image has the singular unique copy of it
+    void **table = new_dt_objs.table;
+    for (size_t i = 0; i < new_dt_objs.size; i += 2) {
+        void *dt = table[i + 1];
+        if (dt != HT_NOTFOUND) {
+            jl_datatype_t *newdt = (jl_datatype_t*)table[i];
+            jl_typename_t *name = newdt->name;
+            static_assert(offsetof(jl_datatype_t, name) == 0, "");
+            assert(*(void**)dt == (void*)newdt);
+            *newdt = *(jl_datatype_t*)dt; // copy the datatype fields (except field 1, which we corrupt above)
+            newdt->name = name;
+        }
+    }
+    // we should never see these pointers again, so scramble their memory, so any attempt to look at them crashes
+    for (size_t i = 0; i < cleanup_list.len; i++) {
+        void *item = cleanup_list.items[i];
+        jl_taggedvalue_t *o = jl_astaggedvalue(item);
+        jl_value_t *t = jl_typeof(item); // n.b. might be 0xbabababa already
+        if (t == (jl_value_t*)jl_datatype_type)
+            memset(o, 0xba, sizeof(jl_value_t*) + sizeof(jl_datatype_t));
+        else
+            memset(o, 0xba, sizeof(jl_value_t*) + 0); // singleton
+        o->bits.in_image = 1;
+    }
+    arraylist_grow(&cleanup_list, -cleanup_list.len);
+    // finally cache all our new types now
+    for (size_t i = 0; i < new_dt_objs.size; i += 2) {
+        void *dt = table[i + 1];
+        if (dt != HT_NOTFOUND) {
+            jl_datatype_t *newdt = (jl_datatype_t*)table[i];
+            jl_cache_type_(newdt);
+        }
+    }
+    for (size_t i = 0; i < s.fixup_types.len; i++) {
+        uintptr_t item = (uintptr_t)s.fixup_types.items[i];
+        jl_value_t *obj = (jl_value_t*)(image_base + item);
+        assert(jl_is_datatype(obj));
+        jl_cache_type_((jl_datatype_t*)obj);
+    }
+    // Perform fixups: things like updating world ages, inserting methods & specializations, etc.
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    for (size_t i = 0; i < s.uniquing_objs.len; i++) {
+        uintptr_t item = (uintptr_t)s.uniquing_objs.items[i];
+        // check whether this is a gvar index
+        int gvar = (item & 2) == 2;
+        item &= ~(uintptr_t)3;
+        uintptr_t *pfld;
+        jl_value_t **obj, *newobj;
+        if (gvar) {
+            if (image->gvars_base == NULL)
+                continue;
+            item >>= 2;
+            assert(item < s.gvar_record->size / sizeof(reloc_t));
+            pfld = sysimg_gvars(image->gvars_base, image->gvars_offsets, item);
+            obj = *(jl_value_t***)pfld;
+        }
+        else {
+            pfld = (uintptr_t*)(image_base + item);
+            obj = *(jl_value_t***)pfld;
+        }
+        uintptr_t otyp = jl_typetagof(obj);   // the original type of the object that was written here
+        if (otyp == (uintptr_t)jl_method_instance_type) {
+            assert(image_base < (char*)obj && (char*)obj <= image_base + sizeof_sysimg);
+            jl_value_t *m = obj[0];
+            if (jl_is_method_instance(m)) {
+                newobj = m; // already done
+            }
+            else {
+                arraylist_push(&cleanup_list, (void*)obj);
+                jl_value_t *specTypes = obj[1];
+                jl_value_t *sparams = obj[2];
+                newobj = (jl_value_t*)jl_specializations_get_linfo((jl_method_t*)m, specTypes, (jl_svec_t*)sparams);
+                obj[0] = newobj;
+            }
+        }
+        else {
+            abort(); // should be unreachable
+        }
+        *pfld = (uintptr_t)newobj;
+        assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
+        assert(jl_typetagis(obj, otyp));
+    }
+    arraylist_free(&s.uniquing_types);
+    arraylist_free(&s.uniquing_objs);
+    for (size_t i = 0; i < cleanup_list.len; i++) {
+        void *item = cleanup_list.items[i];
+        jl_taggedvalue_t *o = jl_astaggedvalue(item);
+        jl_value_t *t = jl_typeof(item);
+        if (t == (jl_value_t*)jl_method_instance_type)
+            memset(o, 0xba, sizeof(jl_value_t*) * 3); // only specTypes and sparams fields stored
+        o->bits.in_image = 1;
+    }
+    arraylist_free(&cleanup_list);
+    for (size_t i = 0; i < s.fixup_objs.len; i++) {
+        uintptr_t item = (uintptr_t)s.fixup_objs.items[i];
+        jl_value_t *obj = (jl_value_t*)(image_base + item);
+        if (jl_typetagis(obj, jl_typemap_entry_type)) {
+            jl_typemap_entry_t *entry = (jl_typemap_entry_t*)obj;
+            entry->min_world = world;
+        }
+        else if (jl_is_method(obj)) {
+            jl_method_t *m = (jl_method_t*)obj;
+            m->primary_world = world;
+        }
+        else if (jl_is_method_instance(obj)) {
+            jl_method_instance_t *newobj = jl_specializations_get_or_insert((jl_method_instance_t*)obj);
+            assert(newobj == (jl_method_instance_t*)obj); // strict insertion expected
+            (void)newobj;
+        }
+        else if (jl_is_code_instance(obj)) {
+            jl_code_instance_t *ci = (jl_code_instance_t*)obj;
+            assert(s.incremental);
+            ci->min_world = world;
+            if (ci->max_world != 0)
+                jl_array_ptr_1d_push(*new_specializations, (jl_value_t*)ci);
+        }
+        else if (jl_is_globalref(obj)) {
+            continue; // wait until all the module binding tables have been initialized
+        }
+        else if (jl_is_module(obj)) {
+            // rebuild the binding table for module v
+            // TODO: maybe want to hold the lock on `v`, but that only strongly matters for async / thread safety
+            // and we are already bad at that
+            jl_module_t *mod = (jl_module_t*)obj;
+            mod->build_id.hi = checksum;
+            mod->primary_world = world;
+            if (mod->usings.items != &mod->usings._space[0]) {
+                // arraylist_t assumes we called malloc to get this memory, so make that true now
+                void **newitems = (void**)malloc_s(mod->usings.max * sizeof(void*));
+                memcpy(newitems, mod->usings.items, mod->usings.len * sizeof(void*));
+                mod->usings.items = newitems;
+            }
+        }
+        else {
+            abort();
+        }
+    }
+    // Now pick up the globalref binding pointer field
+    for (size_t i = 0; i < s.fixup_objs.len; i++) {
+        uintptr_t item = (uintptr_t)s.fixup_objs.items[i];
+        jl_value_t *obj = (jl_value_t*)(image_base + item);
+        if (jl_is_globalref(obj)) {
+            jl_globalref_t *r = (jl_globalref_t*)obj;
+            if (r->binding == NULL) {
+                jl_globalref_t *gr = (jl_globalref_t*)jl_module_globalref(r->mod, r->name);
+                r->binding = gr->binding;
+                jl_gc_wb(r, gr->binding);
+            }
+        }
+    }
+    arraylist_free(&s.fixup_types);
+    arraylist_free(&s.fixup_objs);
+
+    if (s.incremental)
+        jl_root_new_gvars(&s, image, external_fns_begin);
     ios_close(&relocs);
     ios_close(&const_data);
-    jl_update_all_gvars(&s); // gvars relocs
     ios_close(&gvar_record);
-    s.s = NULL;
 
-    s.s = f;
-    // reinit items except ccallables
-    jl_finalize_deserializer(&s);
+    htable_free(&new_dt_objs);
+
     s.s = NULL;
 
     if (0) {
@@ -2118,7 +3488,7 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED
                "   reloc list: %8u\n"
                "    gvar list: %8u\n"
                "    fptr list: %8u\n",
-            (unsigned)sizeof_sysimg,
+            (unsigned)sizeof_sysdata,
             (unsigned)sizeof_constdata,
             (unsigned)sizeof_symbols,
             (unsigned)sizeof_tags,
@@ -2126,21 +3496,177 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED
             (unsigned)sizeof_gvar_record,
             (unsigned)sizeof_fptr_record);
     }
+    if (cachesizes) {
+        cachesizes->sysdata = sizeof_sysdata;
+        cachesizes->isbitsdata = sizeof_constdata;
+        cachesizes->symboldata = sizeof_symbols;
+        cachesizes->tagslist = sizeof_tags;
+        cachesizes->reloclist = sizeof_relocations - sizeof_tags;
+        cachesizes->gvarlist = sizeof_gvar_record;
+        cachesizes->fptrlist = sizeof_fptr_record;
+    }
 
     s.s = &sysimg;
-    jl_init_codegen();
-    jl_update_all_fptrs(&s); // fptr relocs and registration
-    // reinit ccallables, which require codegen to be initialized
-    s.s = f;
-    jl_finalize_deserializer(&s);
+    jl_update_all_fptrs(&s, image); // fptr relocs and registration
+    if (!ccallable_list) {
+        // TODO: jl_sysimg_handle or img_handle?
+        jl_reinit_ccallable(&s.ccallable_list, image_base, jl_sysimg_handle);
+        arraylist_free(&s.ccallable_list);
+    }
+    s.s = NULL;
 
     ios_close(&fptr_record);
     ios_close(&sysimg);
-    s.s = NULL;
 
-    jl_gc_reset_alloc_count();
+    if (!s.incremental)
+        jl_gc_reset_alloc_count();
+    arraylist_free(&deser_sym);
+
+    // Prepare for later external linkage against the sysimg
+    // Also sets up images for protection against garbage collection
+    arraylist_push(&jl_linkage_blobs, (void*)image_base);
+    arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg));
+    arraylist_push(&jl_image_relocs, (void*)relocs_base);
+    jl_timing_counter_inc(JL_TIMING_COUNTER_ImageSize, sizeof_sysimg + sizeof(uintptr_t));
+    rebuild_image_blob_tree();
+
+    // jl_printf(JL_STDOUT, "%ld blobs to link against\n", jl_linkage_blobs.len >> 1);
     jl_gc_enable(en);
-    jl_cleanup_serializer2();
+}
+
+static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_t *checksum, int64_t *dataendpos, int64_t *datastartpos)
+{
+    uint8_t pkgimage = 0;
+    if (ios_eof(f) || 0 == (*checksum = jl_read_verify_header(f, &pkgimage, dataendpos, datastartpos)) || (*checksum >> 32 != 0xfafbfcfd)) {
+        return jl_get_exceptionf(jl_errorexception_type,
+                "Precompile file header verification checks failed.");
+    }
+    uint8_t flags = read_uint8(f);
+    if (pkgimage && !jl_match_cache_flags(flags)) {
+        return jl_get_exceptionf(jl_errorexception_type, "Pkgimage flags mismatch");
+    }
+    if (!pkgimage) {
+        // skip past the worklist
+        size_t len;
+        while ((len = read_int32(f)))
+            ios_skip(f, len + 3 * sizeof(uint64_t));
+        // skip past the dependency list
+        size_t deplen = read_uint64(f);
+        ios_skip(f, deplen - sizeof(uint64_t));
+        read_uint64(f); // where is this write coming from?
+    }
+
+    // verify that the system state is valid
+    return read_verify_mod_list(f, depmods);
+}
+
+// TODO?: refactor to make it easier to create the "package inspector"
+static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname, int needs_permalloc)
+{
+    JL_TIMING(LOAD_IMAGE, LOAD_Pkgimg);
+    jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, pkgname);
+    uint64_t checksum = 0;
+    int64_t dataendpos = 0;
+    int64_t datastartpos = 0;
+    jl_value_t *verify_fail = jl_validate_cache_file(f, depmods, &checksum, &dataendpos, &datastartpos);
+
+    if (verify_fail)
+        return verify_fail;
+
+    assert(datastartpos > 0 && datastartpos < dataendpos);
+    needs_permalloc = jl_options.permalloc_pkgimg || needs_permalloc;
+    jl_value_t *restored = NULL;
+    jl_array_t *init_order = NULL, *extext_methods = NULL, *new_specializations = NULL, *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL;
+    jl_svec_t *cachesizes_sv = NULL;
+    char *base;
+    arraylist_t ccallable_list;
+    JL_GC_PUSH8(&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &cachesizes_sv);
+
+    { // make a permanent in-memory copy of f (excluding the header)
+        ios_bufmode(f, bm_none);
+        JL_SIGATOMIC_BEGIN();
+        size_t len = dataendpos - datastartpos;
+        char *sysimg;
+        int success = !needs_permalloc;
+        ios_seek(f, datastartpos);
+        if (needs_permalloc)
+            sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0);
+        else
+            sysimg = &f->buf[f->bpos];
+        if (needs_permalloc)
+            success = ios_readall(f, sysimg, len) == len;
+        if (!success || jl_crc32c(0, sysimg, len) != (uint32_t)checksum) {
+            restored = jl_get_exceptionf(jl_errorexception_type, "Error reading package image file.");
+            JL_SIGATOMIC_END();
+        }
+        else {
+            if (needs_permalloc)
+                ios_close(f);
+            ios_static_buffer(f, sysimg, len);
+            pkgcachesizes cachesizes;
+            jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &base, &ccallable_list, &cachesizes);
+            JL_SIGATOMIC_END();
+
+            // Insert method extensions
+            jl_insert_methods(extext_methods);
+            // No special processing of `new_specializations` is required because recaching handled it
+            // Add roots to methods
+            jl_copy_roots(method_roots_list, jl_worklist_key((jl_array_t*)restored));
+            // Handle edges
+            size_t world = jl_atomic_load_acquire(&jl_world_counter);
+            jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets, (jl_array_t*)new_specializations, world); // restore external backedges (needs to be last)
+            // reinit ccallables
+            jl_reinit_ccallable(&ccallable_list, base, pkgimage_handle);
+            arraylist_free(&ccallable_list);
+
+            if (completeinfo) {
+                cachesizes_sv = jl_alloc_svec(7);
+                jl_svecset(cachesizes_sv, 0, jl_box_long(cachesizes.sysdata));
+                jl_svecset(cachesizes_sv, 1, jl_box_long(cachesizes.isbitsdata));
+                jl_svecset(cachesizes_sv, 2, jl_box_long(cachesizes.symboldata));
+                jl_svecset(cachesizes_sv, 3, jl_box_long(cachesizes.tagslist));
+                jl_svecset(cachesizes_sv, 4, jl_box_long(cachesizes.reloclist));
+                jl_svecset(cachesizes_sv, 5, jl_box_long(cachesizes.gvarlist));
+                jl_svecset(cachesizes_sv, 6, jl_box_long(cachesizes.fptrlist));
+                restored = (jl_value_t*)jl_svec(8, restored, init_order, extext_methods, new_specializations, method_roots_list,
+                                                   ext_targets, edges, cachesizes_sv);
+            }
+            else {
+                restored = (jl_value_t*)jl_svec(2, restored, init_order);
+            }
+        }
+    }
+
+    JL_GC_POP();
+    return restored;
+}
+
+static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image, uint32_t checksum)
+{
+    JL_TIMING(LOAD_IMAGE, LOAD_Sysimg);
+    jl_restore_system_image_from_stream_(f, image, NULL, checksum | ((uint64_t)0xfdfcfbfa << 32), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+}
+
+JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(void* pkgimage_handle, const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname, int needs_permalloc)
+{
+    ios_t f;
+    ios_static_buffer(&f, (char*)buf, sz);
+    jl_value_t *ret = jl_restore_package_image_from_stream(pkgimage_handle, &f, image, depmods, completeinfo, pkgname, needs_permalloc);
+    ios_close(&f);
+    return ret;
+}
+
+JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods, int completeinfo, const char *pkgname)
+{
+    ios_t f;
+    if (ios_file(&f, fname, 1, 0, 0, 0) == NULL) {
+        return jl_get_exceptionf(jl_errorexception_type,
+            "Cache file \"%s\" not found.\n", fname);
+    }
+    jl_image_t pkgimage = {};
+    jl_value_t *ret = jl_restore_package_image_from_stream(NULL, &f, &pkgimage, depmods, completeinfo, pkgname, 1);
+    ios_close(&f);
+    return ret;
 }
 
 // TODO: need to enforce that the alignment of the buffer is suitable for vectors
@@ -2169,8 +3695,9 @@ JL_DLLEXPORT void jl_restore_system_image(const char *fname)
         if (ios_readall(&f, sysimg, len) != len)
             jl_errorf("Error reading system image file.");
         ios_close(&f);
+        uint32_t checksum = jl_crc32c(0, sysimg, len);
         ios_static_buffer(&f, sysimg, len);
-        jl_restore_system_image_from_stream(&f);
+        jl_restore_system_image_from_stream(&f, &sysimage, checksum);
         ios_close(&f);
         JL_SIGATOMIC_END();
     }
@@ -2181,36 +3708,40 @@ JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len)
     ios_t f;
     JL_SIGATOMIC_BEGIN();
     ios_static_buffer(&f, (char*)buf, len);
-    jl_restore_system_image_from_stream(&f);
+    uint32_t checksum = jl_crc32c(0, buf, len);
+    jl_restore_system_image_from_stream(&f, &sysimage, checksum);
     ios_close(&f);
     JL_SIGATOMIC_END();
 }
 
-// --- init ---
-
-static void jl_init_serializer2(int for_serialize)
+JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, jl_array_t *depmods, int completeinfo, const char *pkgname, int ignore_native)
 {
-    if (for_serialize) {
-        htable_new(&symbol_table, 0);
-        htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs));
-        htable_new(&backref_table, 0);
-        uintptr_t i;
-        for (i = 0; id_to_fptrs[i] != NULL; i++) {
-            ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2));
-        }
+    void *pkgimg_handle = jl_dlopen(fname, JL_RTLD_LAZY);
+    if (!pkgimg_handle) {
+#ifdef _OS_WINDOWS_
+        int err;
+        char reason[256];
+        err = GetLastError();
+        win32_formatmessage(err, reason, sizeof(reason));
+#else
+        const char *reason = dlerror();
+#endif
+        jl_errorf("Error opening package file %s: %s\n", fname, reason);
     }
-    else {
-        arraylist_new(&deser_sym, 0);
+    const char *pkgimg_data;
+    jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1);
+    size_t *plen;
+    jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1);
+
+    jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle);
+
+    if (ignore_native){
+        memset(&pkgimage.fptrs, 0, sizeof(pkgimage.fptrs));
     }
-    nsym_tag = 0;
-}
 
-static void jl_cleanup_serializer2(void)
-{
-    htable_reset(&symbol_table, 0);
-    htable_reset(&fptr_to_id, 0);
-    htable_reset(&backref_table, 0);
-    arraylist_free(&deser_sym);
+    jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_handle, pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname, 0);
+
+    return mod;
 }
 
 #ifdef __cplusplus
diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c
new file mode 100644
index 0000000000000..35233cf0795f0
--- /dev/null
+++ b/src/staticdata_utils.c
@@ -0,0 +1,1296 @@
+// inverse of backedges graph (caller=>callees hash)
+jl_genericmemory_t *edges_map JL_GLOBALLY_ROOTED = NULL; // rooted for the duration of our uses of this
+
+static void write_float64(ios_t *s, double x) JL_NOTSAFEPOINT
+{
+    write_uint64(s, *((uint64_t*)&x));
+}
+
+// Decide if `t` must be new, because it points to something new.
+// If it is new, the object (in particular, the super field) might not be entirely
+// valid for the cache, so we want to finish transforming it before attempting
+// to look in the cache for it
+int must_be_new_dt(jl_value_t *t, htable_t *news, char *image_base, size_t sizeof_sysimg)
+{
+    //if (jl_object_in_image(t))
+    //    return 0; // fast-path for rejection
+    assert(ptrhash_get(news, (void*)t) != (void*)t);
+    if (ptrhash_has(news, (void*)t) || ptrhash_has(news, (void*)jl_typeof(t)))
+        return 1;
+    if (!(image_base < (char*)t && (char*)t <= image_base + sizeof_sysimg))
+        return 0; // fast-path for rejection
+    if (jl_is_uniontype(t)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)t;
+        return must_be_new_dt(u->a, news, image_base, sizeof_sysimg) ||
+               must_be_new_dt(u->b, news, image_base, sizeof_sysimg);
+    }
+    else if (jl_is_unionall(t)) {
+        jl_unionall_t *ua = (jl_unionall_t*)t;
+        return must_be_new_dt((jl_value_t*)ua->var, news, image_base, sizeof_sysimg) ||
+               must_be_new_dt(ua->body, news, image_base, sizeof_sysimg);
+    }
+    else if (jl_is_typevar(t)) {
+        jl_tvar_t *tv = (jl_tvar_t*)t;
+        return must_be_new_dt(tv->lb, news, image_base, sizeof_sysimg) ||
+               must_be_new_dt(tv->ub, news, image_base, sizeof_sysimg);
+    }
+    else if (jl_is_vararg(t)) {
+        jl_vararg_t *tv = (jl_vararg_t*)t;
+        if (tv->T && must_be_new_dt(tv->T, news, image_base, sizeof_sysimg))
+            return 1;
+        if (tv->N && must_be_new_dt(tv->N, news, image_base, sizeof_sysimg))
+            return 1;
+    }
+    else if (jl_is_datatype(t)) {
+        jl_datatype_t *dt = (jl_datatype_t*)t;
+        assert(jl_object_in_image((jl_value_t*)dt->name) && "type_in_worklist mistake?");
+        jl_datatype_t *super = dt->super;
+        // check if super is news, since then we must be new also
+        // (it is also possible that super is indeterminate now, wait for `t`
+        // to be resolved, then will be determined later and fixed up by the
+        // delay_list, for this and any other references to it).
+        while (super != jl_any_type) {
+            assert(super);
+            if (ptrhash_has(news, (void*)super))
+                return 1;
+            if (!(image_base < (char*)super && (char*)super <= image_base + sizeof_sysimg))
+               break; // fast-path for rejection of super
+            // otherwise super might be something that was not cached even though a later supertype might be
+            // for example while handling `Type{Mask{4, U} where U}`, if we have `Mask{4, U} <: AbstractSIMDVector{4}`
+            super = super->super;
+        }
+        jl_svec_t *tt = dt->parameters;
+        size_t i, l = jl_svec_len(tt);
+        for (i = 0; i < l; i++)
+            if (must_be_new_dt(jl_tparam(dt, i), news, image_base, sizeof_sysimg))
+                return 1;
+    }
+    else {
+        return must_be_new_dt(jl_typeof(t), news, image_base, sizeof_sysimg);
+    }
+    return 0;
+}
+
+static uint64_t jl_worklist_key(jl_array_t *worklist) JL_NOTSAFEPOINT
+{
+    assert(jl_is_array(worklist));
+    size_t len = jl_array_nrows(worklist);
+    if (len > 0) {
+        jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(worklist, len-1);
+        assert(jl_is_module(topmod));
+        return topmod->build_id.lo;
+    }
+    return 0;
+}
+
+static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED /*FIXME*/;
+// Mutex for newly_inferred
+jl_mutex_t newly_inferred_mutex;
+
+// Register array of newly-inferred MethodInstances
+// This gets called as the first step of Base.include_package_for_output
+JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred)
+{
+    assert(_newly_inferred == NULL || jl_is_array(_newly_inferred));
+    newly_inferred = (jl_array_t*) _newly_inferred;
+}
+
+JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t* ci)
+{
+    JL_LOCK(&newly_inferred_mutex);
+    size_t end = jl_array_nrows(newly_inferred);
+    jl_array_grow_end(newly_inferred, 1);
+    jl_array_ptr_set(newly_inferred, end, ci);
+    JL_UNLOCK(&newly_inferred_mutex);
+}
+
+
+// compute whether a type references something internal to worklist
+// and thus could not have existed before deserialize
+// and thus does not need delayed unique-ing
+static int type_in_worklist(jl_value_t *v) JL_NOTSAFEPOINT
+{
+    if (jl_object_in_image(v))
+        return 0; // fast-path for rejection
+    if (jl_is_uniontype(v)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)v;
+        return type_in_worklist(u->a) ||
+               type_in_worklist(u->b);
+    }
+    else if (jl_is_unionall(v)) {
+        jl_unionall_t *ua = (jl_unionall_t*)v;
+        return type_in_worklist((jl_value_t*)ua->var) ||
+               type_in_worklist(ua->body);
+    }
+    else if (jl_is_typevar(v)) {
+        jl_tvar_t *tv = (jl_tvar_t*)v;
+        return type_in_worklist(tv->lb) ||
+               type_in_worklist(tv->ub);
+    }
+    else if (jl_is_vararg(v)) {
+        jl_vararg_t *tv = (jl_vararg_t*)v;
+        if (tv->T && type_in_worklist(tv->T))
+            return 1;
+        if (tv->N && type_in_worklist(tv->N))
+            return 1;
+    }
+    else if (jl_is_datatype(v)) {
+        jl_datatype_t *dt = (jl_datatype_t*)v;
+        if (!jl_object_in_image((jl_value_t*)dt->name))
+            return 1;
+        jl_svec_t *tt = dt->parameters;
+        size_t i, l = jl_svec_len(tt);
+        for (i = 0; i < l; i++)
+            if (type_in_worklist(jl_tparam(dt, i)))
+                return 1;
+    }
+    else {
+        return type_in_worklist(jl_typeof(v));
+    }
+    return 0;
+}
+
+// When we infer external method instances, ensure they link back to the
+// package. Otherwise they might be, e.g., for external macros.
+// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
+static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, arraylist_t *stack)
+{
+    jl_module_t *mod = mi->def.module;
+    if (jl_is_method(mod))
+        mod = ((jl_method_t*)mod)->module;
+    assert(jl_is_module(mod));
+    if (jl_atomic_load_relaxed(&mi->precompiled) || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) {
+        return 1;
+    }
+    if (!mi->backedges) {
+        return 0;
+    }
+    void **bp = ptrhash_bp(visited, mi);
+    // HT_NOTFOUND: not yet analyzed
+    // HT_NOTFOUND + 1: no link back
+    // HT_NOTFOUND + 2: does link back
+    // HT_NOTFOUND + 3: does link back, and included in new_specializations already
+    // HT_NOTFOUND + 4 + depth: in-progress
+    int found = (char*)*bp - (char*)HT_NOTFOUND;
+    if (found)
+        return found - 1;
+    arraylist_push(stack, (void*)mi);
+    int depth = stack->len;
+    *bp = (void*)((char*)HT_NOTFOUND + 4 + depth); // preliminarily mark as in-progress
+    size_t i = 0, n = jl_array_nrows(mi->backedges);
+    int cycle = depth;
+    while (i < n) {
+        jl_method_instance_t *be;
+        i = get_next_edge(mi->backedges, i, NULL, &be);
+        int child_found = has_backedge_to_worklist(be, visited, stack);
+        if (child_found == 1 || child_found == 2) {
+            // found what we were looking for, so terminate early
+            found = 1;
+            break;
+        }
+        else if (child_found >= 3 && child_found - 3 < cycle) {
+            // record the cycle will resolve at depth "cycle"
+            cycle = child_found - 3;
+            assert(cycle);
+        }
+    }
+    if (!found && cycle != depth)
+        return cycle + 3;
+    // If we are the top of the current cycle, now mark all other parts of
+    // our cycle with what we found.
+    // Or if we found a backedge, also mark all of the other parts of the
+    // cycle as also having an backedge.
+    while (stack->len >= depth) {
+        void *mi = arraylist_pop(stack);
+        bp = ptrhash_bp(visited, mi);
+        assert((char*)*bp - (char*)HT_NOTFOUND == 5 + stack->len);
+        *bp = (void*)((char*)HT_NOTFOUND + 1 + found);
+    }
+    return found;
+}
+
+// Given the list of CodeInstances that were inferred during the build, select
+// those that are (1) external, (2) still valid, (3) are inferred to be called
+// from the worklist or explicitly added by a `precompile` statement, and
+// (4) are the most recently computed result for that method.
+// These will be preserved in the image.
+static jl_array_t *queue_external_cis(jl_array_t *list)
+{
+    if (list == NULL)
+        return NULL;
+    size_t i;
+    htable_t visited;
+    arraylist_t stack;
+    assert(jl_is_array(list));
+    size_t n0 = jl_array_nrows(list);
+    htable_new(&visited, n0);
+    arraylist_new(&stack, 0);
+    jl_array_t *new_specializations = jl_alloc_vec_any(0);
+    JL_GC_PUSH1(&new_specializations);
+    for (i = n0; i-- > 0; ) {
+        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(list, i);
+        assert(jl_is_code_instance(ci));
+        if (!ci->relocatability)
+            continue;
+        jl_method_instance_t *mi = ci->def;
+        jl_method_t *m = mi->def.method;
+        if (jl_atomic_load_relaxed(&ci->inferred) && jl_is_method(m) && jl_object_in_image((jl_value_t*)m->module)) {
+            int found = has_backedge_to_worklist(mi, &visited, &stack);
+            assert(found == 0 || found == 1 || found == 2);
+            assert(stack.len == 0);
+            if (found == 1 && ci->max_world == ~(size_t)0) {
+                void **bp = ptrhash_bp(&visited, mi);
+                if (*bp != (void*)((char*)HT_NOTFOUND + 3)) {
+                    *bp = (void*)((char*)HT_NOTFOUND + 3);
+                    jl_array_ptr_1d_push(new_specializations, (jl_value_t*)ci);
+                }
+            }
+        }
+    }
+    htable_free(&visited);
+    arraylist_free(&stack);
+    JL_GC_POP();
+    // reverse new_specializations
+    n0 = jl_array_nrows(new_specializations);
+    jl_value_t **news = jl_array_data(new_specializations, jl_value_t*);
+    for (i = 0; i < n0; i++) {
+        jl_value_t *temp = news[i];
+        news[i] = news[n0 - i - 1];
+        news[n0 - i - 1] = temp;
+    }
+    return new_specializations;
+}
+
+// New roots for external methods
+static void jl_collect_new_roots(jl_array_t *roots, jl_array_t *new_specializations, uint64_t key)
+{
+    htable_t mset;
+    htable_new(&mset, 0);
+    size_t l = new_specializations ? jl_array_nrows(new_specializations) : 0;
+    for (size_t i = 0; i < l; i++) {
+        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(new_specializations, i);
+        assert(jl_is_code_instance(ci));
+        jl_method_t *m = ci->def->def.method;
+        assert(jl_is_method(m));
+        ptrhash_put(&mset, (void*)m, (void*)m);
+    }
+    int nwithkey;
+    void *const *table = mset.table;
+    jl_array_t *newroots = NULL;
+    JL_GC_PUSH1(&newroots);
+    for (size_t i = 0; i < mset.size; i += 2) {
+        if (table[i+1] != HT_NOTFOUND) {
+            jl_method_t *m = (jl_method_t*)table[i];
+            assert(jl_is_method(m));
+            nwithkey = nroots_with_key(m, key);
+            if (nwithkey) {
+                jl_array_ptr_1d_push(roots, (jl_value_t*)m);
+                newroots = jl_alloc_vec_any(nwithkey);
+                jl_array_ptr_1d_push(roots, (jl_value_t*)newroots);
+                rle_iter_state rootiter = rle_iter_init(0);
+                uint64_t *rletable = NULL;
+                size_t nblocks2 = 0, nroots = jl_array_nrows(m->roots), k = 0;
+                if (m->root_blocks) {
+                    rletable = jl_array_data(m->root_blocks, uint64_t);
+                    nblocks2 = jl_array_nrows(m->root_blocks);
+                }
+                while (rle_iter_increment(&rootiter, nroots, rletable, nblocks2))
+                    if (rootiter.key == key)
+                        jl_array_ptr_set(newroots, k++, jl_array_ptr_ref(m->roots, rootiter.i));
+                assert(k == nwithkey);
+            }
+        }
+    }
+    JL_GC_POP();
+    htable_free(&mset);
+}
+
+// Create the forward-edge map (caller => callees)
+// the intent of these functions is to invert the backedges tree
+// for anything that points to a method not part of the worklist
+//
+// from MethodTables
+static void jl_collect_missing_backedges(jl_methtable_t *mt)
+{
+    jl_array_t *backedges = mt->backedges;
+    if (backedges) {
+        size_t i, l = jl_array_nrows(backedges);
+        for (i = 1; i < l; i += 2) {
+            jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i);
+            jl_value_t *missing_callee = jl_array_ptr_ref(backedges, i - 1);  // signature of abstract callee
+            jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL);
+            if (edges == NULL) {
+                edges = jl_alloc_vec_any(0);
+                JL_GC_PUSH1(&edges);
+                edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL);
+                JL_GC_POP();
+            }
+            jl_array_ptr_1d_push(edges, NULL);
+            jl_array_ptr_1d_push(edges, missing_callee);
+        }
+    }
+}
+
+
+// from MethodInstances
+static void collect_backedges(jl_method_instance_t *callee, int internal)
+{
+    jl_array_t *backedges = callee->backedges;
+    if (backedges) {
+        size_t i = 0, l = jl_array_nrows(backedges);
+        while (i < l) {
+            jl_value_t *invokeTypes;
+            jl_method_instance_t *caller;
+            i = get_next_edge(backedges, i, &invokeTypes, &caller);
+            jl_array_t *edges = (jl_array_t*)jl_eqtable_get(edges_map, (jl_value_t*)caller, NULL);
+            if (edges == NULL) {
+                edges = jl_alloc_vec_any(0);
+                JL_GC_PUSH1(&edges);
+                edges_map = jl_eqtable_put(edges_map, (jl_value_t*)caller, (jl_value_t*)edges, NULL);
+                JL_GC_POP();
+            }
+            jl_array_ptr_1d_push(edges, invokeTypes);
+            jl_array_ptr_1d_push(edges, (jl_value_t*)callee);
+        }
+    }
+}
+
+
+// For functions owned by modules not on the worklist, call this on each method.
+// - if the method is owned by a worklist module, add it to the list of things to be
+//   fully serialized
+// - Collect all backedges (may be needed later when we invert this list).
+static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure)
+{
+    jl_array_t *s = (jl_array_t*)closure;
+    jl_method_t *m = ml->func.method;
+    if (s && !jl_object_in_image((jl_value_t*)m->module)) {
+        jl_array_ptr_1d_push(s, (jl_value_t*)m);
+    }
+    if (edges_map == NULL)
+        return 1;
+    jl_value_t *specializations = jl_atomic_load_relaxed(&m->specializations);
+    if (!jl_is_svec(specializations)) {
+        jl_method_instance_t *callee = (jl_method_instance_t*)specializations;
+        collect_backedges(callee, !s);
+    }
+    else {
+        size_t i, l = jl_svec_len(specializations);
+        for (i = 0; i < l; i++) {
+            jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i);
+            if ((jl_value_t*)callee != jl_nothing)
+                collect_backedges(callee, !s);
+        }
+    }
+    return 1;
+}
+
+static int jl_collect_methtable_from_mod(jl_methtable_t *mt, void *env)
+{
+    if (!jl_object_in_image((jl_value_t*)mt))
+        env = NULL; // do not collect any methods from here
+    jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), jl_collect_methcache_from_mod, env);
+    if (env && edges_map)
+        jl_collect_missing_backedges(mt);
+    return 1;
+}
+
+// Collect methods of external functions defined by modules in the worklist
+// "extext" = "extending external"
+// Also collect relevant backedges
+static void jl_collect_extext_methods_from_mod(jl_array_t *s, jl_module_t *m)
+{
+    foreach_mtable_in_module(m, jl_collect_methtable_from_mod, s);
+}
+
+static void jl_record_edges(jl_method_instance_t *caller, arraylist_t *wq, jl_array_t *edges)
+{
+    jl_array_t *callees = NULL;
+    JL_GC_PUSH2(&caller, &callees);
+    callees = (jl_array_t*)jl_eqtable_pop(edges_map, (jl_value_t*)caller, NULL, NULL);
+    if (callees != NULL) {
+        jl_array_ptr_1d_push(edges, (jl_value_t*)caller);
+        jl_array_ptr_1d_push(edges, (jl_value_t*)callees);
+        size_t i, l = jl_array_nrows(callees);
+        for (i = 1; i < l; i += 2) {
+            jl_method_instance_t *c = (jl_method_instance_t*)jl_array_ptr_ref(callees, i);
+            if (c && jl_is_method_instance(c)) {
+                arraylist_push(wq, c);
+            }
+        }
+    }
+    JL_GC_POP();
+}
+
+
+// Extract `edges` and `ext_targets` from `edges_map`
+// `edges` = [caller1, targets_indexes1, ...], the list of methods and their edges
+// `ext_targets` is [invokesig1, callee1, matches1, ...], the edges for each target
+static void jl_collect_edges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *external_cis, size_t world)
+{
+    htable_t external_mis;
+    htable_new(&external_mis, 0);
+    if (external_cis) {
+        for (size_t i = 0; i < jl_array_nrows(external_cis); i++) {
+            jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(external_cis, i);
+            jl_method_instance_t *mi = ci->def;
+            ptrhash_put(&external_mis, (void*)mi, (void*)mi);
+        }
+    }
+    arraylist_t wq;
+    arraylist_new(&wq, 0);
+    void **table = (void**) edges_map->ptr;    // edges_map is caller => callees
+    size_t table_size = edges_map->length;
+    for (size_t i = 0; i < table_size; i += 2) {
+        assert(table == edges_map->ptr && table_size == edges_map->length &&
+               "edges_map changed during iteration");
+        jl_method_instance_t *caller = (jl_method_instance_t*)table[i];
+        jl_array_t *callees = (jl_array_t*)table[i + 1];
+        if (callees == NULL)
+            continue;
+        assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
+        if (!jl_object_in_image((jl_value_t*)caller->def.method->module) ||
+            ptrhash_get(&external_mis, caller) != HT_NOTFOUND) {
+            jl_record_edges(caller, &wq, edges);
+        }
+    }
+    htable_free(&external_mis);
+    while (wq.len) {
+        jl_method_instance_t *caller = (jl_method_instance_t*)arraylist_pop(&wq);
+        jl_record_edges(caller, &wq, edges);
+    }
+    arraylist_free(&wq);
+    edges_map = NULL;
+    htable_t edges_map2;
+    htable_new(&edges_map2, 0);
+    htable_t edges_ids;
+    size_t l = edges ? jl_array_nrows(edges) : 0;
+    htable_new(&edges_ids, l);
+    for (size_t i = 0; i < l / 2; i++) {
+        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, i * 2);
+        void *target = (void*)((char*)HT_NOTFOUND + i + 1);
+        ptrhash_put(&edges_ids, (void*)caller, target);
+    }
+    // process target list to turn it into a memoized validity table
+    // and compute the old methods list, ready for serialization
+    jl_value_t *matches = NULL;
+    jl_array_t *callee_ids = NULL;
+    jl_value_t *sig = NULL;
+    JL_GC_PUSH3(&matches, &callee_ids, &sig);
+    for (size_t i = 0; i < l; i += 2) {
+        jl_array_t *callees = (jl_array_t*)jl_array_ptr_ref(edges, i + 1);
+        size_t l = jl_array_nrows(callees);
+        callee_ids = jl_alloc_array_1d(jl_array_int32_type, l + 1);
+        int32_t *idxs = jl_array_data(callee_ids, int32_t);
+        idxs[0] = 0;
+        size_t nt = 0;
+        for (size_t j = 0; j < l; j += 2) {
+            jl_value_t *invokeTypes = jl_array_ptr_ref(callees, j);
+            jl_value_t *callee = jl_array_ptr_ref(callees, j + 1);
+            assert(callee && "unsupported edge");
+
+            if (jl_is_method_instance(callee)) {
+                jl_methtable_t *mt = jl_method_get_table(((jl_method_instance_t*)callee)->def.method);
+                if (!jl_object_in_image((jl_value_t*)mt))
+                    continue;
+            }
+
+            // (nullptr, c) => call
+            // (invokeTypes, c) => invoke
+            // (nullptr, invokeTypes) => missing call
+            // (invokeTypes, nullptr) => missing invoke (unused--inferred as Any)
+            void *target = ptrhash_get(&edges_map2, invokeTypes ? (void*)invokeTypes : (void*)callee);
+            if (target == HT_NOTFOUND) {
+                size_t min_valid = 0;
+                size_t max_valid = ~(size_t)0;
+                if (invokeTypes) {
+                    assert(jl_is_method_instance(callee));
+                    jl_method_t *m = ((jl_method_instance_t*)callee)->def.method;
+                    matches = (jl_value_t*)m; // valid because there is no method replacement permitted
+#ifndef NDEBUG
+                    jl_methtable_t *mt = jl_method_get_table(m);
+                    if ((jl_value_t*)mt != jl_nothing) {
+                        jl_value_t *matches = jl_gf_invoke_lookup_worlds(invokeTypes, (jl_value_t*)mt, world, &min_valid, &max_valid);
+                        if (matches != jl_nothing) {
+                            assert(m == ((jl_method_match_t*)matches)->method);
+                        }
+                    }
+#endif
+                }
+                else {
+                    if (jl_is_method_instance(callee)) {
+                        jl_method_instance_t *mi = (jl_method_instance_t*)callee;
+                        sig = jl_type_intersection(mi->def.method->sig, (jl_value_t*)mi->specTypes);
+                    }
+                    else {
+                        sig = callee;
+                    }
+                    int ambig = 0;
+                    matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing,
+                            INT32_MAX, 0, world, &min_valid, &max_valid, &ambig);
+                    sig = NULL;
+                    if (matches == jl_nothing) {
+                        callee_ids = NULL; // invalid
+                        break;
+                    }
+                    size_t k;
+                    for (k = 0; k < jl_array_nrows(matches); k++) {
+                        jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k);
+                        jl_array_ptr_set(matches, k, match->method);
+                    }
+                }
+                jl_array_ptr_1d_push(ext_targets, invokeTypes);
+                jl_array_ptr_1d_push(ext_targets, callee);
+                jl_array_ptr_1d_push(ext_targets, matches);
+                target = (void*)((char*)HT_NOTFOUND + jl_array_nrows(ext_targets) / 3);
+                ptrhash_put(&edges_map2, (void*)callee, target);
+            }
+            idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1;
+        }
+        jl_array_ptr_set(edges, i + 1, callee_ids); // swap callees for ids
+        if (!callee_ids)
+            continue;
+        idxs[0] = nt;
+        // record place of every method in edges
+        // add method edges to the callee_ids list
+        for (size_t j = 0; j < l; j += 2) {
+            jl_value_t *callee = jl_array_ptr_ref(callees, j + 1);
+            if (callee && jl_is_method_instance(callee)) {
+                void *target = ptrhash_get(&edges_ids, (void*)callee);
+                if (target != HT_NOTFOUND) {
+                    idxs[++nt] = (char*)target - (char*)HT_NOTFOUND - 1;
+                }
+            }
+        }
+        jl_array_del_end(callee_ids, l - nt);
+    }
+    JL_GC_POP();
+    htable_free(&edges_map2);
+}
+
+// Headers
+
+// serialize information about all loaded modules
+static void write_mod_list(ios_t *s, jl_array_t *a)
+{
+    size_t i;
+    size_t len = jl_array_nrows(a);
+    for (i = 0; i < len; i++) {
+        jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(a, i);
+        assert(jl_is_module(m));
+        if (jl_object_in_image((jl_value_t*)m)) {
+            const char *modname = jl_symbol_name(m->name);
+            size_t l = strlen(modname);
+            write_int32(s, l);
+            ios_write(s, modname, l);
+            write_uint64(s, m->uuid.hi);
+            write_uint64(s, m->uuid.lo);
+            write_uint64(s, m->build_id.hi);
+            write_uint64(s, m->build_id.lo);
+        }
+    }
+    write_int32(s, 0);
+}
+
+// OPT_LEVEL should always be the upper bits
+#define OPT_LEVEL 6
+
+JL_DLLEXPORT uint8_t jl_cache_flags(void)
+{
+    // OOICCDDP
+    uint8_t flags = 0;
+    flags |= (jl_options.use_pkgimages & 1); // 0-bit
+    flags |= (jl_options.debug_level & 3) << 1; // 1-2 bit
+    flags |= (jl_options.check_bounds & 3) << 3; // 3-4 bit
+    flags |= (jl_options.can_inline & 1) << 5; // 5-bit
+    flags |= (jl_options.opt_level & 3) << OPT_LEVEL; // 6-7 bit
+    return flags;
+}
+
+JL_DLLEXPORT uint8_t jl_match_cache_flags(uint8_t flags)
+{
+    // 1. Check which flags are relevant
+    uint8_t current_flags = jl_cache_flags();
+    uint8_t supports_pkgimage = (current_flags & 1);
+    uint8_t is_pkgimage = (flags & 1);
+
+    // For .ji packages ignore other flags
+    if (!supports_pkgimage && !is_pkgimage) {
+        return 1;
+    }
+
+    // If package images are optional, ignore that bit (it will be unset in current_flags)
+    if (jl_options.use_pkgimages == JL_OPTIONS_USE_PKGIMAGES_EXISTING) {
+        flags &= ~1;
+    }
+
+    // 2. Check all flags, execept opt level must be exact
+    uint8_t mask = (1 << OPT_LEVEL)-1;
+    if ((flags & mask) != (current_flags & mask))
+        return 0;
+    // 3. allow for higher optimization flags in cache
+    flags >>= OPT_LEVEL;
+    current_flags >>= OPT_LEVEL;
+    return flags >= current_flags;
+}
+
+// "magic" string and version header of .ji file
+static const int JI_FORMAT_VERSION = 12;
+static const char JI_MAGIC[] = "\373jli\r\n\032\n"; // based on PNG signature
+static const uint16_t BOM = 0xFEFF; // byte-order marker
+static int64_t write_header(ios_t *s, uint8_t pkgimage)
+{
+    ios_write(s, JI_MAGIC, strlen(JI_MAGIC));
+    write_uint16(s, JI_FORMAT_VERSION);
+    ios_write(s, (char *) &BOM, 2);
+    write_uint8(s, sizeof(void*));
+    ios_write(s, JL_BUILD_UNAME, strlen(JL_BUILD_UNAME)+1);
+    ios_write(s, JL_BUILD_ARCH, strlen(JL_BUILD_ARCH)+1);
+    ios_write(s, JULIA_VERSION_STRING, strlen(JULIA_VERSION_STRING)+1);
+    const char *branch = jl_git_branch(), *commit = jl_git_commit();
+    ios_write(s, branch, strlen(branch)+1);
+    ios_write(s, commit, strlen(commit)+1);
+    write_uint8(s, pkgimage);
+    int64_t checksumpos = ios_pos(s);
+    write_uint64(s, 0); // eventually will hold checksum for the content portion of this (build_id.hi)
+    write_uint64(s, 0); // eventually will hold dataendpos
+    write_uint64(s, 0); // eventually will hold datastartpos
+    return checksumpos;
+}
+
+// serialize information about the result of deserializing this file
+static void write_worklist_for_header(ios_t *s, jl_array_t *worklist)
+{
+    int i, l = jl_array_nrows(worklist);
+    for (i = 0; i < l; i++) {
+        jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(worklist, i);
+        if (workmod->parent == jl_main_module || workmod->parent == workmod) {
+            size_t l = strlen(jl_symbol_name(workmod->name));
+            write_int32(s, l);
+            ios_write(s, jl_symbol_name(workmod->name), l);
+            write_uint64(s, workmod->uuid.hi);
+            write_uint64(s, workmod->uuid.lo);
+            write_uint64(s, workmod->build_id.lo);
+        }
+    }
+    write_int32(s, 0);
+}
+
+static void write_module_path(ios_t *s, jl_module_t *depmod) JL_NOTSAFEPOINT
+{
+    if (depmod->parent == jl_main_module || depmod->parent == depmod)
+        return;
+    const char *mname = jl_symbol_name(depmod->name);
+    size_t slen = strlen(mname);
+    write_module_path(s, depmod->parent);
+    write_int32(s, slen);
+    ios_write(s, mname, slen);
+}
+
+// Cache file header
+// Serialize the global Base._require_dependencies array of pathnames that
+// are include dependencies. Also write Preferences and return
+// the location of the srctext "pointer" in the header index.
+static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t **udepsp)
+{
+    int64_t initial_pos = 0;
+    int64_t pos = 0;
+    static jl_array_t *deps = NULL;
+    if (!deps)
+        deps = (jl_array_t*)jl_get_global(jl_base_module, jl_symbol("_require_dependencies"));
+
+    // unique(deps) to eliminate duplicates while preserving order:
+    // we preserve order so that the topmost included .jl file comes first
+    static jl_value_t *unique_func = NULL;
+    if (!unique_func)
+        unique_func = jl_get_global(jl_base_module, jl_symbol("unique"));
+    jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps};
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+    jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL);
+    ct->world_age = last_age;
+
+    static jl_value_t *replace_depot_func = NULL;
+    if (!replace_depot_func)
+        replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path"));
+
+    // write a placeholder for total size so that we can quickly seek past all of the
+    // dependencies if we don't need them
+    initial_pos = ios_pos(s);
+    write_uint64(s, 0);
+    size_t i, l = udeps ? jl_array_nrows(udeps) : 0;
+    for (i = 0; i < l; i++) {
+        jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
+        jl_value_t *deppath = jl_fieldref(deptuple, 1);
+
+        if (replace_depot_func) {
+            jl_value_t **replace_depot_args;
+            JL_GC_PUSHARGS(replace_depot_args, 2);
+            replace_depot_args[0] = replace_depot_func;
+            replace_depot_args[1] = deppath;
+            ct = jl_current_task;
+            size_t last_age = ct->world_age;
+            ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+            deppath = (jl_value_t*)jl_apply(replace_depot_args, 2);
+            ct->world_age = last_age;
+            JL_GC_POP();
+        }
+
+        size_t slen = jl_string_len(deppath);
+        write_int32(s, slen);
+        ios_write(s, jl_string_data(deppath), slen);
+        write_uint64(s, jl_unbox_uint64(jl_fieldref(deptuple, 2)));    // fsize
+        write_uint32(s, jl_unbox_uint32(jl_fieldref(deptuple, 3)));    // hash
+        write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 4)));  // mtime
+        jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0);  // evaluating module
+        jl_module_t *depmod_top = depmod;
+        while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top)
+            depmod_top = depmod_top->parent;
+        unsigned provides = 0;
+        size_t j, lj = jl_array_nrows(worklist);
+        for (j = 0; j < lj; j++) {
+            jl_module_t *workmod = (jl_module_t*)jl_array_ptr_ref(worklist, j);
+            if (workmod->parent == jl_main_module || workmod->parent == workmod) {
+                ++provides;
+                if (workmod == depmod_top) {
+                    write_int32(s, provides);
+                    write_module_path(s, depmod);
+                    break;
+                }
+            }
+        }
+        write_int32(s, 0);
+    }
+    write_int32(s, 0); // terminator, for ease of reading
+
+    // Calculate Preferences hash for current package.
+    jl_value_t *prefs_hash = NULL;
+    jl_value_t *prefs_list = NULL;
+    JL_GC_PUSH1(&prefs_list);
+    if (jl_base_module) {
+        // Toplevel module is the module we're currently compiling, use it to get our preferences hash
+        jl_value_t * toplevel = (jl_value_t*)jl_get_global(jl_base_module, jl_symbol("__toplevel__"));
+        jl_value_t * prefs_hash_func = jl_get_global(jl_base_module, jl_symbol("get_preferences_hash"));
+        jl_value_t * get_compiletime_prefs_func = jl_get_global(jl_base_module, jl_symbol("get_compiletime_preferences"));
+
+        if (toplevel && prefs_hash_func && get_compiletime_prefs_func) {
+            // Temporary invoke in newest world age
+            size_t last_age = ct->world_age;
+            ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+
+            // call get_compiletime_prefs(__toplevel__)
+            jl_value_t *args[3] = {get_compiletime_prefs_func, (jl_value_t*)toplevel, NULL};
+            prefs_list = (jl_value_t*)jl_apply(args, 2);
+
+            // Call get_preferences_hash(__toplevel__, prefs_list)
+            args[0] = prefs_hash_func;
+            args[2] = prefs_list;
+            prefs_hash = (jl_value_t*)jl_apply(args, 3);
+
+            // Reset world age to normal
+            ct->world_age = last_age;
+        }
+    }
+
+    // If we successfully got the preferences, write it out, otherwise write `0` for this `.ji` file.
+    if (prefs_hash != NULL && prefs_list != NULL) {
+        size_t i, l = jl_array_nrows(prefs_list);
+        for (i = 0; i < l; i++) {
+            jl_value_t *pref_name = jl_array_ptr_ref(prefs_list, i);
+            size_t slen = jl_string_len(pref_name);
+            write_int32(s, slen);
+            ios_write(s, jl_string_data(pref_name), slen);
+        }
+        write_int32(s, 0); // terminator
+        write_uint64(s, jl_unbox_uint64(prefs_hash));
+    }
+    else {
+        // This is an error path, but let's at least generate a valid `.ji` file.
+        // We declare an empty list of preference names, followed by a zero-hash.
+        // The zero-hash is not what would be generated for an empty set of preferences,
+        // and so this `.ji` file will be invalidated by a future non-erroring pass
+        // through this function.
+        write_int32(s, 0);
+        write_uint64(s, 0);
+    }
+    JL_GC_POP(); // for prefs_list
+
+    // write a dummy file position to indicate the beginning of the source-text
+    pos = ios_pos(s);
+    ios_seek(s, initial_pos);
+    write_uint64(s, pos - initial_pos);
+    ios_seek(s, pos);
+    write_uint64(s, 0);
+    return pos;
+}
+
+
+// Deserialization
+
+// Add methods to external (non-worklist-owned) functions
+static void jl_insert_methods(jl_array_t *list)
+{
+    size_t i, l = jl_array_nrows(list);
+    for (i = 0; i < l; i++) {
+        jl_method_t *meth = (jl_method_t*)jl_array_ptr_ref(list, i);
+        assert(jl_is_method(meth));
+        assert(!meth->is_for_opaque_closure);
+        jl_methtable_t *mt = jl_method_get_table(meth);
+        assert((jl_value_t*)mt != jl_nothing);
+        jl_method_table_insert(mt, meth, NULL);
+    }
+}
+
+static void jl_copy_roots(jl_array_t *method_roots_list, uint64_t key)
+{
+    size_t i, l = jl_array_nrows(method_roots_list);
+    for (i = 0; i < l; i+=2) {
+        jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(method_roots_list, i);
+        jl_array_t *roots = (jl_array_t*)jl_array_ptr_ref(method_roots_list, i+1);
+        if (roots) {
+            assert(jl_is_array(roots));
+            jl_append_method_roots(m, key, roots);
+        }
+    }
+}
+
+
+// verify that these edges intersect with the same methods as before
+static jl_array_t *jl_verify_edges(jl_array_t *targets, size_t minworld)
+{
+    JL_TIMING(VERIFY_IMAGE, VERIFY_Edges);
+    size_t i, l = jl_array_nrows(targets) / 3;
+    static jl_value_t *ulong_array JL_ALWAYS_LEAFTYPE = NULL;
+    if (ulong_array == NULL)
+        ulong_array = jl_apply_array_type((jl_value_t*)jl_ulong_type, 1);
+    jl_array_t *maxvalids = jl_alloc_array_1d(ulong_array, l);
+    memset(jl_array_data(maxvalids, size_t), 0, l * sizeof(size_t));
+    jl_value_t *loctag = NULL;
+    jl_value_t *matches = NULL;
+    jl_value_t *sig = NULL;
+    JL_GC_PUSH4(&maxvalids, &matches, &sig, &loctag);
+    for (i = 0; i < l; i++) {
+        jl_value_t *invokesig = jl_array_ptr_ref(targets, i * 3);
+        jl_value_t *callee = jl_array_ptr_ref(targets, i * 3 + 1);
+        jl_value_t *expected = jl_array_ptr_ref(targets, i * 3 + 2);
+        size_t min_valid = 0;
+        size_t max_valid = ~(size_t)0;
+        if (invokesig) {
+            assert(callee && "unsupported edge");
+            jl_method_t *m = ((jl_method_instance_t*)callee)->def.method;
+            if (jl_egal(invokesig, m->sig)) {
+                // the invoke match is `m` for `m->sig`, unless `m` is invalid
+                if (m->deleted_world < max_valid)
+                    max_valid = 0;
+            }
+            else {
+                jl_methtable_t *mt = jl_method_get_table(m);
+                if ((jl_value_t*)mt == jl_nothing) {
+                    max_valid = 0;
+                }
+                else {
+                    matches = jl_gf_invoke_lookup_worlds(invokesig, (jl_value_t*)mt, minworld, &min_valid, &max_valid);
+                    if (matches == jl_nothing) {
+                         max_valid = 0;
+                    }
+                    else {
+                        matches = (jl_value_t*)((jl_method_match_t*)matches)->method;
+                        if (matches != expected) {
+                            max_valid = 0;
+                        }
+                    }
+                }
+            }
+        }
+        else {
+            if (jl_is_method_instance(callee)) {
+                jl_method_instance_t *mi = (jl_method_instance_t*)callee;
+                sig = jl_type_intersection(mi->def.method->sig, (jl_value_t*)mi->specTypes);
+            }
+            else {
+                sig = callee;
+            }
+            assert(jl_is_array(expected));
+            int ambig = 0;
+            // TODO: possibly need to included ambiguities too (for the optimizer correctness)?
+            // len + 1 is to allow us to log causes of invalidation (SnoopCompile's @snoopr)
+            matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing,
+                    _jl_debug_method_invalidation ? INT32_MAX : jl_array_nrows(expected),
+                    0, minworld, &min_valid, &max_valid, &ambig);
+            sig = NULL;
+            if (matches == jl_nothing) {
+                max_valid = 0;
+            }
+            else {
+                // setdiff!(matches, expected)
+                size_t j, k, ins = 0;
+                if (jl_array_nrows(matches) != jl_array_nrows(expected)) {
+                    max_valid = 0;
+                }
+                for (k = 0; k < jl_array_nrows(matches); k++) {
+                    jl_method_t *match = ((jl_method_match_t*)jl_array_ptr_ref(matches, k))->method;
+                    size_t l = jl_array_nrows(expected);
+                    for (j = 0; j < l; j++)
+                        if (match == (jl_method_t*)jl_array_ptr_ref(expected, j))
+                            break;
+                    if (j == l) {
+                        // intersection has a new method or a method was
+                        // deleted--this is now probably no good, just invalidate
+                        // everything about it now
+                        max_valid = 0;
+                        if (!_jl_debug_method_invalidation)
+                            break;
+                        jl_array_ptr_set(matches, ins++, match);
+                    }
+                }
+                if (max_valid != ~(size_t)0 && _jl_debug_method_invalidation)
+                    jl_array_del_end((jl_array_t*)matches, jl_array_nrows(matches) - ins);
+            }
+        }
+        jl_array_data(maxvalids, size_t)[i] = max_valid;
+        if (max_valid != ~(size_t)0 && _jl_debug_method_invalidation) {
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, invokesig ? (jl_value_t*)invokesig : callee);
+            loctag = jl_cstr_to_string("insert_backedges_callee");
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+            loctag = jl_box_int32((int32_t)i);
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, matches);
+        }
+        //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)invokesig);
+        //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)callee);
+        //ios_puts(valid ? "valid\n" : "INVALID\n", ios_stderr);
+    }
+    JL_GC_POP();
+    return maxvalids;
+}
+
+// Combine all edges relevant to a method to initialize the maxvalids list
+static jl_array_t *jl_verify_methods(jl_array_t *edges, jl_array_t *maxvalids)
+{
+    JL_TIMING(VERIFY_IMAGE, VERIFY_Methods);
+    jl_value_t *loctag = NULL;
+    jl_array_t *maxvalids2 = NULL;
+    JL_GC_PUSH2(&loctag, &maxvalids2);
+    size_t i, l = jl_array_nrows(edges) / 2;
+    maxvalids2 = jl_alloc_array_1d(jl_typeof(maxvalids), l);
+    size_t *maxvalids2_data = jl_array_data(maxvalids2, size_t);
+    memset(maxvalids2_data, 0, l * sizeof(size_t));
+    for (i = 0; i < l; i++) {
+        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
+        assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
+        jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1);
+        assert(jl_typetagis((jl_value_t*)callee_ids, jl_array_int32_type));
+        if (callee_ids == NULL) {
+            // serializing the edges had failed
+            maxvalids2_data[i] = 0;
+        }
+        else {
+            int32_t *idxs = jl_array_data(callee_ids, int32_t);
+            size_t j;
+            maxvalids2_data[i] = ~(size_t)0;
+            for (j = 0; j < idxs[0]; j++) {
+                int32_t idx = idxs[j + 1];
+                size_t max_valid = jl_array_data(maxvalids, size_t)[idx];
+                if (max_valid != ~(size_t)0 && _jl_debug_method_invalidation) {
+                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller);
+                    loctag = jl_cstr_to_string("verify_methods");
+                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+                    loctag = jl_box_int32((int32_t)idx);
+                    jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+                }
+                if (max_valid < maxvalids2_data[i])
+                    maxvalids2_data[i] = max_valid;
+                if (max_valid == 0)
+                    break;
+            }
+        }
+        //jl_static_show((JL_STREAM*)ios_stderr, (jl_value_t*)caller);
+        //ios_puts(maxvalid2_data[i] == ~(size_t)0 ? "valid\n" : "INVALID\n", ios_stderr);
+    }
+    JL_GC_POP();
+    return maxvalids2;
+}
+
+
+// Visit the entire call graph, starting from edges[idx] to determine if that method is valid
+// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
+// and slightly modified with an early termination option once the computation reaches its minimum
+static int jl_verify_graph_edge(size_t *maxvalids2_data, jl_array_t *edges, size_t idx, arraylist_t *visited, arraylist_t *stack)
+{
+    if (maxvalids2_data[idx] == 0) {
+        visited->items[idx] = (void*)1;
+        return 0;
+    }
+    size_t cycle = (size_t)visited->items[idx];
+    if (cycle != 0)
+        return cycle - 1; // depth remaining
+    jl_value_t *cause = NULL;
+    arraylist_push(stack, (void*)idx);
+    size_t depth = stack->len;
+    visited->items[idx] = (void*)(1 + depth);
+    jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, idx * 2 + 1);
+    assert(jl_typetagis((jl_value_t*)callee_ids, jl_array_int32_type));
+    int32_t *idxs = jl_array_data(callee_ids, int32_t);
+    size_t i, n = jl_array_nrows(callee_ids);
+    cycle = depth;
+    for (i = idxs[0] + 1; i < n; i++) {
+        int32_t childidx = idxs[i];
+        int child_cycle = jl_verify_graph_edge(maxvalids2_data, edges, childidx, visited, stack);
+        size_t child_max_valid = maxvalids2_data[childidx];
+        if (child_max_valid < maxvalids2_data[idx]) {
+            maxvalids2_data[idx] = child_max_valid;
+            cause = jl_array_ptr_ref(edges, childidx * 2);
+        }
+        if (child_max_valid == 0) {
+            // found what we were looking for, so terminate early
+            break;
+        }
+        else if (child_cycle && child_cycle < cycle) {
+            // record the cycle will resolve at depth "cycle"
+            cycle = child_cycle;
+        }
+    }
+    size_t max_valid = maxvalids2_data[idx];
+    if (max_valid != 0 && cycle != depth)
+        return cycle;
+    // If we are the top of the current cycle, now mark all other parts of
+    // our cycle with what we found.
+    // Or if we found a failed edge, also mark all of the other parts of the
+    // cycle as also having an failed edge.
+    while (stack->len >= depth) {
+        size_t childidx = (size_t)arraylist_pop(stack);
+        assert(visited->items[childidx] == (void*)(2 + stack->len));
+        if (idx != childidx) {
+            if (max_valid < maxvalids2_data[childidx])
+                maxvalids2_data[childidx] = max_valid;
+        }
+        visited->items[childidx] = (void*)1;
+        if (_jl_debug_method_invalidation && max_valid != ~(size_t)0) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(edges, childidx * 2);
+            jl_value_t *loctag = NULL;
+            JL_GC_PUSH1(&loctag);
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);
+            loctag = jl_cstr_to_string("verify_methods");
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)cause);
+            JL_GC_POP();
+        }
+    }
+    return 0;
+}
+
+// Visit all entries in edges, verify if they are valid
+static void jl_verify_graph(jl_array_t *edges, jl_array_t *maxvalids2)
+{
+    JL_TIMING(VERIFY_IMAGE, VERIFY_Graph);
+    arraylist_t stack, visited;
+    arraylist_new(&stack, 0);
+    size_t i, n = jl_array_nrows(edges) / 2;
+    arraylist_new(&visited, n);
+    memset(visited.items, 0, n * sizeof(size_t));
+    size_t *maxvalids2_data = jl_array_data(maxvalids2, size_t);
+    for (i = 0; i < n; i++) {
+        assert(visited.items[i] == (void*)0 || visited.items[i] == (void*)1);
+        int child_cycle = jl_verify_graph_edge(maxvalids2_data, edges, i, &visited, &stack);
+        assert(child_cycle == 0); (void)child_cycle;
+        assert(stack.len == 0);
+        assert(visited.items[i] == (void*)1);
+    }
+    arraylist_free(&stack);
+    arraylist_free(&visited);
+}
+
+// Restore backedges to external targets
+// `edges` = [caller1, targets_indexes1, ...], the list of worklist-owned methods calling external methods.
+// `ext_targets` is [invokesig1, callee1, matches1, ...], the global set of non-worklist callees of worklist-owned methods.
+static void jl_insert_backedges(jl_array_t *edges, jl_array_t *ext_targets, jl_array_t *ci_list, size_t minworld)
+{
+    // determine which CodeInstance objects are still valid in our image
+    jl_array_t *valids = jl_verify_edges(ext_targets, minworld);
+    JL_GC_PUSH1(&valids);
+    valids = jl_verify_methods(edges, valids); // consumes edges valids, initializes methods valids
+    jl_verify_graph(edges, valids); // propagates methods valids for each edge
+    size_t i, l;
+
+    // next build a map from external MethodInstances to their CodeInstance for insertion
+    l = jl_array_nrows(ci_list);
+    htable_t visited;
+    htable_new(&visited, l);
+    for (i = 0; i < l; i++) {
+        jl_code_instance_t *ci = (jl_code_instance_t*)jl_array_ptr_ref(ci_list, i);
+        assert(ci->min_world == minworld);
+        if (ci->max_world == 1) { // sentinel value: has edges to external callables
+            ptrhash_put(&visited, (void*)ci->def, (void*)ci);
+        }
+        else {
+            assert(ci->max_world == ~(size_t)0);
+            jl_method_instance_t *caller = ci->def;
+            if (jl_atomic_load_relaxed(&ci->inferred)  && jl_rettype_inferred(caller, minworld, ~(size_t)0) == jl_nothing) {
+                jl_mi_cache_insert(caller, ci);
+            }
+            //jl_static_show((jl_stream*)ios_stderr, (jl_value_t*)caller);
+            //ios_puts("free\n", ios_stderr);
+        }
+    }
+
+    // next enable any applicable new codes
+    l = jl_array_nrows(edges) / 2;
+    for (i = 0; i < l; i++) {
+        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
+        size_t maxvalid = jl_array_data(valids, size_t)[i];
+        if (maxvalid == ~(size_t)0) {
+            // if this callee is still valid, add all the backedges
+            jl_array_t *callee_ids = (jl_array_t*)jl_array_ptr_ref(edges, 2 * i + 1);
+            int32_t *idxs = jl_array_data(callee_ids, int32_t);
+            for (size_t j = 0; j < idxs[0]; j++) {
+                int32_t idx = idxs[j + 1];
+                jl_value_t *invokesig = jl_array_ptr_ref(ext_targets, idx * 3);
+                jl_value_t *callee = jl_array_ptr_ref(ext_targets, idx * 3 + 1);
+                if (callee && jl_is_method_instance(callee)) {
+                    jl_method_instance_add_backedge((jl_method_instance_t*)callee, invokesig, caller);
+                }
+                else {
+                    jl_value_t *sig = callee == NULL ? invokesig : callee;
+                    jl_methtable_t *mt = jl_method_table_for(sig);
+                    // FIXME: rarely, `callee` has an unexpected `Union` signature,
+                    // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344
+                    // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)`
+                    // This workaround exposes us to (rare) 265-violations.
+                    if ((jl_value_t*)mt != jl_nothing)
+                        jl_method_table_add_backedge(mt, sig, (jl_value_t*)caller);
+                }
+            }
+        }
+        // then enable any methods associated with it
+        void *ci = ptrhash_get(&visited, (void*)caller);
+        //assert(ci != HT_NOTFOUND);
+        if (ci != HT_NOTFOUND) {
+            // have some new external code to use
+            assert(jl_is_code_instance(ci));
+            jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
+            assert(codeinst->min_world == minworld && jl_atomic_load_relaxed(&codeinst->inferred) );
+            codeinst->max_world = maxvalid;
+            if (jl_rettype_inferred(caller, minworld, maxvalid) == jl_nothing) {
+                jl_mi_cache_insert(caller, codeinst);
+            }
+        }
+    }
+
+    htable_free(&visited);
+    JL_GC_POP();
+}
+
+static void classify_callers(htable_t *callers_with_edges, jl_array_t *edges)
+{
+    size_t l = edges ? jl_array_nrows(edges) / 2 : 0;
+    for (size_t i = 0; i < l; i++) {
+        jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(edges, 2 * i);
+        ptrhash_put(callers_with_edges, (void*)caller, (void*)caller);
+    }
+}
+
+static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *depmods)
+{
+    if (!jl_main_module->build_id.lo) {
+        return jl_get_exceptionf(jl_errorexception_type,
+                "Main module uuid state is invalid for module deserialization.");
+    }
+    size_t i, l = jl_array_nrows(depmods);
+    for (i = 0; ; i++) {
+        size_t len = read_int32(s);
+        if (len == 0 && i == l)
+            return NULL; // success
+        if (len == 0 || i == l)
+            return jl_get_exceptionf(jl_errorexception_type, "Wrong number of entries in module list.");
+        char *name = (char*)alloca(len + 1);
+        ios_readall(s, name, len);
+        name[len] = '\0';
+        jl_uuid_t uuid;
+        uuid.hi = read_uint64(s);
+        uuid.lo = read_uint64(s);
+        jl_uuid_t build_id;
+        build_id.hi = read_uint64(s);
+        build_id.lo = read_uint64(s);
+        jl_sym_t *sym = _jl_symbol(name, len);
+        jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(depmods, i);
+        if (!m || !jl_is_module(m) || m->uuid.hi != uuid.hi || m->uuid.lo != uuid.lo || m->name != sym ||
+                m->build_id.hi != build_id.hi || m->build_id.lo != build_id.lo) {
+            return jl_get_exceptionf(jl_errorexception_type,
+                "Invalid input in module list: expected %s.", name);
+        }
+    }
+}
+
+static int readstr_verify(ios_t *s, const char *str, int include_null)
+{
+    size_t i, len = strlen(str) + include_null;
+    for (i = 0; i < len; ++i)
+        if ((char)read_uint8(s) != str[i])
+            return 0;
+    return 1;
+}
+
+JL_DLLEXPORT uint64_t jl_read_verify_header(ios_t *s, uint8_t *pkgimage, int64_t *dataendpos, int64_t *datastartpos)
+{
+    uint16_t bom;
+    uint64_t checksum = 0;
+    if (readstr_verify(s, JI_MAGIC, 0) &&
+        read_uint16(s) == JI_FORMAT_VERSION &&
+        ios_read(s, (char *) &bom, 2) == 2 && bom == BOM &&
+        read_uint8(s) == sizeof(void*) &&
+        readstr_verify(s, JL_BUILD_UNAME, 1) &&
+        readstr_verify(s, JL_BUILD_ARCH, 1) &&
+        readstr_verify(s, JULIA_VERSION_STRING, 1) &&
+        readstr_verify(s, jl_git_branch(), 1) &&
+        readstr_verify(s, jl_git_commit(), 1))
+    {
+        *pkgimage = read_uint8(s);
+        checksum = read_uint64(s);
+        *datastartpos = (int64_t)read_uint64(s);
+        *dataendpos = (int64_t)read_uint64(s);
+    }
+    return checksum;
+}
+
+// Returns `depmodidxs` where `j = depmodidxs[i]` corresponds to the blob `depmods[j]` in `write_mod_list`
+static jl_array_t *image_to_depmodidx(jl_array_t *depmods)
+{
+    if (!depmods)
+        return NULL;
+    assert(jl_array_nrows(depmods) < INT32_MAX && "too many dependencies to serialize");
+    size_t lbids = n_linkage_blobs();
+    size_t ldeps = jl_array_nrows(depmods);
+    jl_array_t *depmodidxs = jl_alloc_array_1d(jl_array_int32_type, lbids);
+    int32_t *dmidxs = jl_array_data(depmodidxs, int32_t);
+    memset(dmidxs, -1, lbids * sizeof(int32_t));
+    dmidxs[0] = 0; // the sysimg can also be found at idx 0, by construction
+    for (size_t i = 0, j = 0; i < ldeps; i++) {
+        jl_value_t *depmod = jl_array_ptr_ref(depmods, i);
+        size_t idx = external_blob_index(depmod);
+        if (idx < lbids) { // jl_object_in_image
+            j++;
+            if (dmidxs[idx] == -1)
+                dmidxs[idx] = j;
+        }
+    }
+    return depmodidxs;
+}
+
+// Returns `imageidxs` where `j = imageidxs[i]` is the blob corresponding to `depmods[j]`
+static jl_array_t *depmod_to_imageidx(jl_array_t *depmods)
+{
+    if (!depmods)
+        return NULL;
+    size_t ldeps = jl_array_nrows(depmods);
+    jl_array_t *imageidxs = jl_alloc_array_1d(jl_array_int32_type, ldeps + 1);
+    int32_t *imgidxs = jl_array_data(imageidxs, int32_t);
+    imgidxs[0] = 0;
+    for (size_t i = 0; i < ldeps; i++) {
+        jl_value_t *depmod = jl_array_ptr_ref(depmods, i);
+        size_t j = external_blob_index(depmod);
+        assert(j < INT32_MAX);
+        imgidxs[i + 1] = (int32_t)j;
+    }
+    return imageidxs;
+}
diff --git a/src/subtype.c b/src/subtype.c
index aea5b80a5cadf..17a6bf7041e3b 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -65,21 +65,22 @@ typedef struct jl_varbinding_t {
     jl_value_t *lb;
     jl_value_t *ub;
     int8_t right;       // whether this variable came from the right side of `A <: B`
+    int8_t occurs;      // occurs in any position
     int8_t occurs_inv;  // occurs in invariant position
     int8_t occurs_cov;  // # of occurrences in covariant position
     int8_t concrete;    // 1 if another variable has a constraint forcing this one to be concrete
+    int8_t max_offset;  // record the maximum positive offset of the variable (up to 32)
+                        // max_offset < 0 if this variable occurs outside VarargNum.
     // constraintkind: in covariant position, we try three different ways to compute var ∩ type:
     // let ub = var.ub ∩ type
     // 0 - var.ub <: type ? var : ub
     // 1 - var.ub = ub; return var
-    // 2 - either (var.ub = ub; return var), or return ub
+    // 2 - var.lb = lb; return ub
     int8_t constraintkind;
-    int8_t intvalued;      // must be integer-valued; i.e. occurs as N in Vararg{_,N}
+    int8_t intvalued; // intvalued: must be integer-valued; i.e. occurs as N in Vararg{_,N}
     int8_t limited;
+    int8_t intersected; // whether this variable has been intersected
     int16_t depth0;         // # of invariant constructors nested around the UnionAll type for this var
-    // when this variable's integer value is compared to that of another,
-    // it equals `other + offset`. used by vararg length parameters.
-    int16_t offset;
     // array of typevars that our bounds depend on, whose UnionAlls need to be
     // moved outside ours.
     jl_array_t *innervars;
@@ -96,12 +97,14 @@ typedef struct jl_stenv_t {
     jl_value_t **envout;      // for passing caller the computed bounds of right-side variables
     int envsz;                // length of envout
     int envidx;               // current index in envout
-    int invdepth;             // # of invariant constructors we're nested in on the left
-    int Rinvdepth;            // # of invariant constructors we're nested in on the right
+    int invdepth;             // current number of invariant constructors we're nested in
     int ignore_free;          // treat free vars as black boxes; used during intersection
     int intersection;         // true iff subtype is being called from intersection
     int emptiness_only;       // true iff intersection only needs to test for emptiness
     int triangular;           // when intersecting Ref{X} with Ref{<:Y}
+    // Used to represent the length difference between 2 vararg.
+    // intersect(X, Y) ==> X = Y + Loffset
+    int Loffset;
 } jl_stenv_t;
 
 // state manipulation utilities
@@ -154,71 +157,158 @@ static void statestack_set(jl_unionstate_t *st, int i, int val) JL_NOTSAFEPOINT
         memcpy(&(dst)->stack, (saved)->stack, ((saved)->used+7)/8);     \
     } while (0);
 
+static int current_env_length(jl_stenv_t *e)
+{
+    jl_varbinding_t *v = e->vars;
+    int len = 0;
+    while (v) {
+        len++;
+        v = v->prev;
+    }
+    return len;
+}
+
 typedef struct {
     int8_t *buf;
     int rdepth;
-    int8_t _space[16];
+    int8_t _space[32]; // == 8 * 4
+    jl_gcframe_t gcframe;
+    jl_value_t *roots[24]; // == 8 * 3
 } jl_savedenv_t;
 
-static void save_env(jl_stenv_t *e, jl_value_t **root, jl_savedenv_t *se)
+static void re_save_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
 {
-    jl_varbinding_t *v = e->vars;
-    int len=0;
-    while (v != NULL) {
-        len++;
-        v = v->prev;
+    jl_value_t **roots = NULL;
+    int nroots = 0;
+    if (root) {
+        if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
+            jl_svec_t *sv = (jl_svec_t*)se->roots[0];
+            assert(jl_is_svec(sv));
+            roots = jl_svec_data(sv);
+            nroots = jl_svec_len(sv);
+        }
+        else {
+            roots = se->roots;
+            nroots = se->gcframe.nroots >> 2;
+        }
     }
-    if (root)
-        *root = (jl_value_t*)jl_alloc_svec(len * 3);
-    se->buf = (int8_t*)(len > 8 ? malloc_s(len * 2) : &se->_space);
-#ifdef __clang_gcanalyzer__
-    memset(se->buf, 0, len * 2);
-#endif
-    int i=0, j=0; v = e->vars;
+    jl_varbinding_t *v = e->vars;
+    int i = 0, j = 0;
     while (v != NULL) {
         if (root) {
-            jl_svecset(*root, i++, v->lb);
-            jl_svecset(*root, i++, v->ub);
-            jl_svecset(*root, i++, (jl_value_t*)v->innervars);
+            roots[i++] = v->lb;
+            roots[i++] = v->ub;
+            roots[i++] = (jl_value_t*)v->innervars;
         }
+        se->buf[j++] = v->occurs;
         se->buf[j++] = v->occurs_inv;
         se->buf[j++] = v->occurs_cov;
+        se->buf[j++] = v->max_offset;
         v = v->prev;
     }
+    assert(i == nroots); (void)nroots;
     se->rdepth = e->Runions.depth;
 }
 
+static void alloc_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
+{
+    jl_task_t *ct = jl_current_task;
+    int len = current_env_length(e);
+    se->gcframe.nroots = 0;
+    se->gcframe.prev = NULL;
+    se->roots[0] = NULL;
+    if (len > 8) {
+        if (root) {
+            se->gcframe.nroots = JL_GC_ENCODE_PUSHARGS(1);
+            se->gcframe.prev = ct->gcstack;
+            ct->gcstack = &se->gcframe;
+            jl_svec_t *sv = jl_alloc_svec(len * 3);
+            se->roots[0] = (jl_value_t*)sv;
+        }
+    }
+    else {
+        if (root && len) {
+            for (int i = 0; i < len * 3; i++)
+                se->roots[i] = NULL;
+            se->gcframe.nroots = JL_GC_ENCODE_PUSHARGS(len * 3);
+            se->gcframe.prev = ct->gcstack;
+            ct->gcstack = &se->gcframe;
+        }
+    }
+    se->buf = (len > 8 ? (int8_t*)malloc_s(len * 4) : se->_space);
+#ifdef __clang_gcanalyzer__
+    memset(se->buf, 0, len * 3);
+#endif
+}
+
+static void save_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
+{
+    alloc_env(e, se, root);
+    re_save_env(e, se, root);
+}
+
 static void free_env(jl_savedenv_t *se) JL_NOTSAFEPOINT
 {
+    if (se->gcframe.nroots) {
+        assert(jl_current_task->gcstack == &se->gcframe);
+        JL_GC_POP();
+    }
     if (se->buf != se->_space)
         free(se->buf);
     se->buf = NULL;
 }
 
-static void restore_env(jl_stenv_t *e, jl_value_t *root, jl_savedenv_t *se) JL_NOTSAFEPOINT
+static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPOINT
 {
+    jl_value_t **roots = NULL;
+    int nroots = 0;
+    if (root) {
+        if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
+            jl_svec_t *sv = (jl_svec_t*)se->roots[0];
+            assert(jl_is_svec(sv));
+            roots = jl_svec_data(sv);
+            nroots = jl_svec_len(sv);
+        }
+        else {
+            roots = se->roots;
+            nroots = se->gcframe.nroots >> 2;
+        }
+    }
     jl_varbinding_t *v = e->vars;
     int i = 0, j = 0;
     while (v != NULL) {
-        if (root) v->lb = jl_svecref(root, i);
-        i++;
-        if (root) v->ub = jl_svecref(root, i);
-        i++;
-        if (root) v->innervars = (jl_array_t*)jl_svecref(root, i);
-        i++;
+        if (root) {
+            v->lb = roots[i++];
+            v->ub = roots[i++];
+            v->innervars = (jl_array_t*)roots[i++];
+        }
+        v->occurs = se->buf[j++];
         v->occurs_inv = se->buf[j++];
         v->occurs_cov = se->buf[j++];
+        v->max_offset = se->buf[j++];
         v = v->prev;
     }
+    assert(i == nroots); (void)nroots;
     e->Runions.depth = se->rdepth;
     if (e->envout && e->envidx < e->envsz)
         memset(&e->envout[e->envidx], 0, (e->envsz - e->envidx)*sizeof(void*));
 }
 
+static void clean_occurs(jl_stenv_t *e)
+{
+    jl_varbinding_t *v = e->vars;
+    while (v) {
+        v->occurs = 0;
+        v = v->prev;
+    }
+}
+
+#define flip_offset(e) ((e)->Loffset *= -1)
+
 // type utilities
 
 // quickly test that two types are identical
-static int obviously_egal(jl_value_t *a, jl_value_t *b)
+static int obviously_egal(jl_value_t *a, jl_value_t *b) JL_NOTSAFEPOINT
 {
     if (a == (jl_value_t*)jl_typeofbottom_type->super)
         a = (jl_value_t*)jl_typeofbottom_type; // supertype(typeof(Union{})) is equal to, although distinct from, itself
@@ -282,11 +372,8 @@ static int obviously_unequal(jl_value_t *a, jl_value_t *b)
             if (ad->name != bd->name)
                 return 1;
             int istuple = (ad->name == jl_tuple_typename);
-            if ((jl_is_concrete_type(a) || jl_is_concrete_type(b)) &&
-                jl_type_equality_is_identity(a, b)) {
-                if (!istuple && ad->name != jl_type_typename) // HACK: can't properly normalize Tuple{Float64} == Tuple{<:Float64} like types or Type{T} types
-                    return 1;
-            }
+            if (jl_type_equality_is_identity(a, b))
+                return 1;
             size_t i, np;
             if (istuple) {
                 size_t na = jl_nparams(ad), nb = jl_nparams(bd);
@@ -343,19 +430,44 @@ static int in_union(jl_value_t *u, jl_value_t *x) JL_NOTSAFEPOINT
     return in_union(((jl_uniontype_t*)u)->a, x) || in_union(((jl_uniontype_t*)u)->b, x);
 }
 
-static int obviously_disjoint(jl_value_t *a, jl_value_t *b, int specificity)
+static int obviously_in_union(jl_value_t *u, jl_value_t *x)
+{
+    jl_value_t *a = NULL, *b = NULL;
+    if (jl_is_uniontype(x)) {
+        a = ((jl_uniontype_t*)x)->a;
+        b = ((jl_uniontype_t*)x)->b;
+        JL_GC_PUSH2(&a, &b);
+        int res = obviously_in_union(u, a) && obviously_in_union(u, b);
+        JL_GC_POP();
+        return res;
+    }
+    if (jl_is_uniontype(u)) {
+        a = ((jl_uniontype_t*)u)->a;
+        b = ((jl_uniontype_t*)u)->b;
+        JL_GC_PUSH2(&a, &b);
+        int res = obviously_in_union(a, x) || obviously_in_union(b, x);
+        JL_GC_POP();
+        return res;
+    }
+    return obviously_egal(u, x);
+}
+
+int obviously_disjoint(jl_value_t *a, jl_value_t *b, int specificity)
 {
     if (a == b || a == (jl_value_t*)jl_any_type || b == (jl_value_t*)jl_any_type)
         return 0;
     if (specificity && a == (jl_value_t*)jl_typeofbottom_type)
         return 0;
-    if (jl_is_concrete_type(a) && jl_is_concrete_type(b) &&
-        jl_type_equality_is_identity(a, b) &&
-        (((jl_datatype_t*)a)->name != jl_tuple_typename ||
-         ((jl_datatype_t*)b)->name != jl_tuple_typename))
+    if (jl_is_concrete_type(a) && jl_is_concrete_type(b) && jl_type_equality_is_identity(a, b))
         return 1;
     if (jl_is_unionall(a)) a = jl_unwrap_unionall(a);
     if (jl_is_unionall(b)) b = jl_unwrap_unionall(b);
+    if (jl_is_uniontype(a))
+        return obviously_disjoint(((jl_uniontype_t *)a)->a, b, specificity) &&
+               obviously_disjoint(((jl_uniontype_t *)a)->b, b, specificity);
+    if (jl_is_uniontype(b))
+        return obviously_disjoint(a, ((jl_uniontype_t *)b)->a, specificity) &&
+               obviously_disjoint(a, ((jl_uniontype_t *)b)->b, specificity);
     if (jl_is_datatype(a) && jl_is_datatype(b)) {
         jl_datatype_t *ad = (jl_datatype_t*)a, *bd = (jl_datatype_t*)b;
         if (ad->name != bd->name) {
@@ -431,19 +543,16 @@ static int obviously_disjoint(jl_value_t *a, jl_value_t *b, int specificity)
     return 0;
 }
 
+jl_value_t *simple_union(jl_value_t *a, jl_value_t *b);
 // compute a least upper bound of `a` and `b`
 static jl_value_t *simple_join(jl_value_t *a, jl_value_t *b)
 {
-    if (a == jl_bottom_type || b == (jl_value_t*)jl_any_type || obviously_egal(a,b))
+    if (a == jl_bottom_type || b == (jl_value_t*)jl_any_type || obviously_egal(a, b))
         return b;
     if (b == jl_bottom_type || a == (jl_value_t*)jl_any_type)
         return a;
     if (!(jl_is_type(a) || jl_is_typevar(a)) || !(jl_is_type(b) || jl_is_typevar(b)))
         return (jl_value_t*)jl_any_type;
-    if (jl_is_uniontype(a) && in_union(a, b))
-        return a;
-    if (jl_is_uniontype(b) && in_union(b, a))
-        return b;
     if (jl_is_kind(a) && jl_is_type_type(b) && jl_typeof(jl_tparam0(b)) == a)
         return a;
     if (jl_is_kind(b) && jl_is_type_type(a) && jl_typeof(jl_tparam0(a)) == b)
@@ -452,18 +561,14 @@ static jl_value_t *simple_join(jl_value_t *a, jl_value_t *b)
         return a;
     if (jl_is_typevar(b) && obviously_egal(a, ((jl_tvar_t*)b)->lb))
         return b;
-    if (!jl_has_free_typevars(a) && !jl_has_free_typevars(b) &&
-        // issue #24521: don't merge Type{T} where typeof(T) varies
-        !(jl_is_type_type(a) && jl_is_type_type(b) && jl_typeof(jl_tparam0(a)) != jl_typeof(jl_tparam0(b)))) {
-        if (jl_subtype(a, b)) return b;
-        if (jl_subtype(b, a)) return a;
-    }
-    return jl_new_struct(jl_uniontype_type, a, b);
+    return simple_union(a, b);
 }
 
-// compute a greatest lower bound of `a` and `b`
-// in many cases, we need to over-estimate this by returning `b`.
-static jl_value_t *simple_meet(jl_value_t *a, jl_value_t *b)
+jl_value_t *simple_intersect(jl_value_t *a, jl_value_t *b, int overesi);
+// Compute a greatest lower bound of `a` and `b`
+// For the subtype path, we need to over-estimate this by returning `b` in many cases.
+// But for `merge_env`, we'd better under-estimate and return a `Union{}`
+static jl_value_t *simple_meet(jl_value_t *a, jl_value_t *b, int overesi)
 {
     if (a == (jl_value_t*)jl_any_type || b == jl_bottom_type || obviously_egal(a,b))
         return b;
@@ -471,10 +576,6 @@ static jl_value_t *simple_meet(jl_value_t *a, jl_value_t *b)
         return a;
     if (!(jl_is_type(a) || jl_is_typevar(a)) || !(jl_is_type(b) || jl_is_typevar(b)))
         return jl_bottom_type;
-    if (jl_is_uniontype(a) && in_union(a, b))
-        return b;
-    if (jl_is_uniontype(b) && in_union(b, a))
-        return a;
     if (jl_is_kind(a) && jl_is_type_type(b) && jl_typeof(jl_tparam0(b)) == a)
         return b;
     if (jl_is_kind(b) && jl_is_type_type(a) && jl_typeof(jl_tparam0(a)) == b)
@@ -483,56 +584,56 @@ static jl_value_t *simple_meet(jl_value_t *a, jl_value_t *b)
         return a;
     if (jl_is_typevar(b) && obviously_egal(a, ((jl_tvar_t*)b)->ub))
         return b;
-    if (obviously_disjoint(a, b, 0))
-        return jl_bottom_type;
-    if (!jl_has_free_typevars(a) && !jl_has_free_typevars(b)) {
-        if (jl_subtype(a, b)) return a;
-        if (jl_subtype(b, a)) return b;
-    }
-    return b;
-}
-
-static jl_unionall_t *rename_unionall(jl_unionall_t *u)
-{
-    jl_tvar_t *v = jl_new_typevar(u->var->name, u->var->lb, u->var->ub);
-    jl_value_t *t = NULL;
-    JL_GC_PUSH2(&v, &t);
-    t = jl_instantiate_unionall(u, (jl_value_t*)v);
-    t = jl_new_struct(jl_unionall_type, v, t);
-    JL_GC_POP();
-    return (jl_unionall_t*)t;
+    return simple_intersect(a, b, overesi);
 }
 
 // main subtyping algorithm
 
 static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param);
 
-static jl_value_t *pick_union_element(jl_value_t *u JL_PROPAGATES_ROOT, jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
+static int next_union_state(jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
+{
+    jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
+    if (state->more == 0)
+        return 0;
+    // reset `used` and let `pick_union_decision` clean the stack.
+    state->used = state->more;
+    statestack_set(state, state->used - 1, 1);
+    return 1;
+}
+
+static int pick_union_decision(jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
 {
     jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
+    if (state->depth >= state->used) {
+        statestack_set(state, state->used, 0);
+        state->used++;
+    }
+    int ui = statestack_get(state, state->depth);
+    state->depth++;
+    if (ui == 0)
+        state->more = state->depth; // memorize that this was the deepest available choice
+    return ui;
+}
+
+static jl_value_t *pick_union_element(jl_value_t *u JL_PROPAGATES_ROOT, jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
+{
     do {
-        if (state->depth >= state->used) {
-            statestack_set(state, state->used, 0);
-            state->used++;
-        }
-        int ui = statestack_get(state, state->depth);
-        state->depth++;
-        if (ui == 0) {
-            state->more = state->depth; // memorize that this was the deepest available choice
-            u = ((jl_uniontype_t*)u)->a;
-        }
-        else {
+        if (pick_union_decision(e, R))
             u = ((jl_uniontype_t*)u)->b;
-        }
+        else
+            u = ((jl_uniontype_t*)u)->a;
     } while (jl_is_uniontype(u));
     return u;
 }
 
-static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param);
+static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int limit_slow);
 
 // subtype for variable bounds consistency check. needs its own forall/exists environment.
 static int subtype_ccheck(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
+    if (jl_is_long(x) && jl_is_long(y))
+        return jl_unbox_long(x) == jl_unbox_long(y) + e->Loffset;
     if (x == y)
         return 1;
     if (x == jl_bottom_type && jl_is_type(y))
@@ -544,24 +645,16 @@ static int subtype_ccheck(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
     if (x == (jl_value_t*)jl_any_type && jl_is_datatype(y))
         return 0;
     jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
-    jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
-    int sub;
-    e->Lunions.used = e->Runions.used = 0;
-    e->Runions.depth = 0;
-    e->Runions.more = 0;
-    e->Lunions.depth = 0;
-    e->Lunions.more = 0;
-
-    sub = forall_exists_subtype(x, y, e, 0);
-
-    pop_unionstate(&e->Runions, &oldRunions);
+    int sub = local_forall_exists_subtype(x, y, e, 0, 1);
     pop_unionstate(&e->Lunions, &oldLunions);
     return sub;
 }
 
 static int subtype_left_var(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
 {
-    if (x == y)
+    if (jl_is_long(x) && jl_is_long(y))
+        return jl_unbox_long(x) == jl_unbox_long(y) + e->Loffset;
+    if (x == y && !(jl_is_unionall(y)))
         return 1;
     if (x == jl_bottom_type && jl_is_type(y))
         return 1;
@@ -578,15 +671,21 @@ static int subtype_left_var(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int par
 // of determining whether the variable is concrete.
 static void record_var_occurrence(jl_varbinding_t *vb, jl_stenv_t *e, int param) JL_NOTSAFEPOINT
 {
+    if (vb != NULL)
+        vb->occurs = 1;
     if (vb != NULL && param) {
         // saturate counters at 2; we don't need values bigger than that
-        if (param == 2 && (vb->right ? e->Rinvdepth : e->invdepth) > vb->depth0) {
+        if (param == 2 && e->invdepth > vb->depth0) {
             if (vb->occurs_inv < 2)
                 vb->occurs_inv++;
         }
         else if (vb->occurs_cov < 2) {
             vb->occurs_cov++;
         }
+        // Always set `max_offset` to `-1` during the 1st round intersection.
+        // Would be recovered in `intersect_varargs`/`subtype_tuple_varargs` if needed.
+        if (!vb->intersected)
+            vb->max_offset = -1;
     }
 }
 
@@ -602,7 +701,9 @@ static int var_outside(jl_stenv_t *e, jl_tvar_t *x, jl_tvar_t *y)
     return 0;
 }
 
-static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int R, int d);
+static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int depth);
+
+static int reachable_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e);
 
 // check that type var `b` is <: `a`, and update b's upper bound.
 static int var_lt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
@@ -611,6 +712,10 @@ static int var_lt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     if (bb == NULL)
         return e->ignore_free || subtype_left_var(b->ub, a, e, param);
     record_var_occurrence(bb, e, param);
+    assert(!jl_is_long(a) || e->Loffset == 0);
+    if (e->Loffset != 0 && !jl_is_typevar(a) &&
+        a != jl_bottom_type && a != (jl_value_t *)jl_any_type)
+        return 0;
     if (!bb->right)  // check ∀b . b<:a
         return subtype_left_var(bb->ub, a, e, param);
     if (bb->ub == a)
@@ -620,12 +725,14 @@ static int var_lt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     // for this to work we need to compute issub(left,right) before issub(right,left),
     // since otherwise the issub(a, bb.ub) check in var_gt becomes vacuous.
     if (e->intersection) {
-        jl_value_t *ub = intersect_aside(bb->ub, a, e, 0, bb->depth0);
-        if (ub != (jl_value_t*)b)
+        jl_value_t *ub = intersect_aside(a, bb->ub, e, bb->depth0);
+        JL_GC_PUSH1(&ub);
+        if (ub != (jl_value_t*)b && (!jl_is_typevar(ub) || !reachable_var(ub, b, e)))
             bb->ub = ub;
+        JL_GC_POP();
     }
     else {
-        bb->ub = simple_meet(bb->ub, a);
+        bb->ub = simple_meet(bb->ub, a, 1);
     }
     assert(bb->ub != (jl_value_t*)b);
     if (jl_is_typevar(a)) {
@@ -639,8 +746,6 @@ static int var_lt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     return 1;
 }
 
-static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOTSAFEPOINT;
-
 // check that type var `b` is >: `a`, and update b's lower bound.
 static int var_gt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
 {
@@ -648,19 +753,21 @@ static int var_gt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     if (bb == NULL)
         return e->ignore_free || subtype_left_var(a, b->lb, e, param);
     record_var_occurrence(bb, e, param);
+    assert(!jl_is_long(a) || e->Loffset == 0);
+    if (e->Loffset != 0 && !jl_is_typevar(a) &&
+        a != jl_bottom_type && a != (jl_value_t *)jl_any_type)
+        return 0;
     if (!bb->right)  // check ∀b . b>:a
         return subtype_left_var(a, bb->lb, e, param);
-    if (bb->lb == bb->ub) {
-        if (jl_is_typevar(bb->lb) && !jl_is_type(a) && !jl_is_typevar(a))
-            return var_gt((jl_tvar_t*)bb->lb, a, e, param);
-        if (jl_is_typevar(a) && !jl_is_type(bb->lb) && !jl_is_typevar(bb->lb))
-            return var_lt((jl_tvar_t*)a, bb->lb, e, param);
-    }
+    if (bb->lb == a)
+        return 1;
     if (!((bb->ub == (jl_value_t*)jl_any_type && !jl_is_type(a) && !jl_is_typevar(a)) || subtype_ccheck(a, bb->ub, e)))
         return 0;
     jl_value_t *lb = simple_join(bb->lb, a);
-    if (!e->intersection || !subtype_by_bounds(lb, (jl_value_t*)b, e))
+    JL_GC_PUSH1(&lb);
+    if (!e->intersection || !jl_is_typevar(lb) || !reachable_var(lb, b, e))
         bb->lb = lb;
+    JL_GC_POP();
     // this bound should not be directly circular
     assert(bb->lb != (jl_value_t*)b);
     if (jl_is_typevar(a)) {
@@ -671,6 +778,30 @@ static int var_gt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     return 1;
 }
 
+static int subtype_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int R, int param)
+{
+    if (e->intersection) {
+        jl_varbinding_t *bb = lookup(e, (jl_tvar_t*)b);
+        jl_value_t *bub = bb ? bb->ub : ((jl_tvar_t*)b)->ub;
+        jl_value_t *blb = bb ? bb->lb : ((jl_tvar_t*)b)->lb;
+        if (bub == blb && jl_is_typevar(bub)) {
+            int sub = subtype_var((jl_tvar_t *)bub, a, e, R, param);
+            return sub;
+        }
+    }
+    if (e->Loffset != 0 && jl_is_long(a)) {
+        int old_offset = R ? -e->Loffset : e->Loffset;
+        jl_value_t *na = jl_box_long(jl_unbox_long(a) + old_offset);
+        JL_GC_PUSH1(&na);
+        e->Loffset = 0;
+        int sub = R ? var_gt(b, na, e, param) : var_lt(b, na, e, param);
+        e->Loffset = R ? -old_offset : old_offset;
+        JL_GC_POP();
+        return sub;
+    }
+    return R ? var_gt(b, a, e, param) : var_lt(b, a, e, param);
+}
+
 // check that a type is concrete or quasi-concrete (Type{T}).
 // this is used to check concrete typevars:
 // issubtype is false if the lower bound of a concrete type var is not concrete.
@@ -716,13 +847,15 @@ static jl_value_t *widen_Type(jl_value_t *t JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 // when a static parameter value is not known exactly.
 static jl_value_t *fix_inferred_var_bound(jl_tvar_t *var, jl_value_t *ty JL_MAYBE_UNROOTED)
 {
+    if (ty == NULL) // may happen if the user is intersecting with an incomplete type
+        return (jl_value_t*)var;
     if (!jl_is_typevar(ty) && jl_has_free_typevars(ty)) {
         jl_value_t *ans = ty;
         jl_array_t *vs = NULL;
         JL_GC_PUSH2(&ans, &vs);
         vs = jl_find_free_typevars(ty);
         int i;
-        for (i = 0; i < jl_array_len(vs); i++) {
+        for (i = 0; i < jl_array_nrows(vs); i++) {
             ans = jl_type_unionall((jl_tvar_t*)jl_array_ptr_ref(vs, i), ans);
         }
         ans = (jl_value_t*)jl_new_typevar(var->name, jl_bottom_type, ans);
@@ -736,7 +869,7 @@ static int var_occurs_inside(jl_value_t *v, jl_tvar_t *var, int inside, int want
 
 typedef int (*tvar_callback)(void*, int8_t, jl_stenv_t *, int);
 
-static int var_occurs_invariant(jl_value_t *v, jl_tvar_t *var, int inv) JL_NOTSAFEPOINT
+static int var_occurs_invariant(jl_value_t *v, jl_tvar_t *var) JL_NOTSAFEPOINT
 {
     return var_occurs_inside(v, var, 0, 1);
 }
@@ -752,7 +885,7 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
             // outer var can only refer to inner var if bounds changed
             (btemp->lb != btemp->var->lb && jl_has_typevar(btemp->lb, u->var)) ||
             (btemp->ub != btemp->var->ub && jl_has_typevar(btemp->ub, u->var))) {
-            u = rename_unionall(u);
+            u = jl_rename_unionall(u);
             break;
         }
         btemp = btemp->prev;
@@ -764,8 +897,8 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
 static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
     u = unalias_unionall(u, e);
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0,
-                           R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           e->invdepth, NULL, e->vars };
     JL_GC_PUSH4(&u, &vb.lb, &vb.ub, &vb.innervars);
     e->vars = &vb;
     int ans;
@@ -776,44 +909,16 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8
         // widen Type{x} to typeof(x) in argument position
         if (!vb.occurs_inv)
             vb.lb = widen_Type(vb.lb);
-        // fill variable values into `envout` up to `envsz`
-        if (e->envidx < e->envsz) {
-            jl_value_t *val;
-            if (vb.intvalued && vb.lb == (jl_value_t*)jl_any_type)
-                val = (jl_value_t*)jl_wrap_vararg(NULL, NULL);
-            else if (!vb.occurs_inv && vb.lb != jl_bottom_type)
-                val = is_leaf_bound(vb.lb) ? vb.lb : (jl_value_t*)jl_new_typevar(u->var->name, jl_bottom_type, vb.lb);
-            else if (vb.lb == vb.ub)
-                val = vb.lb;
-            else if (vb.lb != jl_bottom_type)
-                // TODO: for now return the least solution, which is what
-                // method parameters expect.
-                val = vb.lb;
-            else if (vb.lb == u->var->lb && vb.ub == u->var->ub)
-                val = (jl_value_t*)u->var;
-            else
-                val = (jl_value_t*)jl_new_typevar(u->var->name, vb.lb, vb.ub);
-            jl_value_t *oldval = e->envout[e->envidx];
-            // if we try to assign different variable values (due to checking
-            // multiple union members), consider the value unknown.
-            if (oldval && !jl_egal(oldval, val))
-                e->envout[e->envidx] = (jl_value_t*)u->var;
-            else
-                e->envout[e->envidx] = fix_inferred_var_bound(u->var, val);
-            // TODO: substitute the value (if any) of this variable into previous envout entries
         }
-    }
-    else {
-        ans = R ? subtype(t, u->body, e, param) :
-                  subtype(u->body, t, e, param);
-    }
+    else
+        ans = subtype(u->body, t, e, param);
 
     // handle the "diagonal dispatch" rule, which says that a type var occurring more
     // than once, and only in covariant position, is constrained to concrete types. E.g.
     //  ( Tuple{Int, Int}    <: Tuple{T, T} where T) but
     // !( Tuple{Int, String} <: Tuple{T, T} where T)
     // Then check concreteness by checking that the lower bound is not an abstract type.
-    int diagonal = vb.occurs_cov > 1 && !var_occurs_invariant(u->body, u->var, 0);
+    int diagonal = vb.occurs_cov > 1 && !var_occurs_invariant(u->body, u->var);
     if (ans && (vb.concrete || (diagonal && is_leaf_typevar(u->var)))) {
         if (vb.concrete && !diagonal && !is_leaf_bound(vb.ub)) {
             // a non-diagonal var can only be a subtype of a diagonal var if its
@@ -845,14 +950,41 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8
             jl_value_t *vl = btemp->lb;
             // TODO: this takes a significant amount of time
             if (btemp->depth0 != vb.depth0 &&
-                ((vu != (jl_value_t*)vb.var && btemp->var->ub != vu && var_occurs_inside(vu, vb.var, 0, 1)) ||
-                 (vl != (jl_value_t*)vb.var && btemp->var->lb != vl && var_occurs_inside(vl, vb.var, 0, 1)))) {
+                ((vu != (jl_value_t*)vb.var && btemp->var->ub != vu && var_occurs_inside(vu, vb.var, 0, 0)) ||
+                 (vl != (jl_value_t*)vb.var && btemp->var->lb != vl && var_occurs_inside(vl, vb.var, 0, 0)))) {
                 ans = 0; break;
             }
             btemp = btemp->prev;
         }
     }
 
+    // fill variable values into `envout` up to `envsz`
+    if (R && ans && e->envidx < e->envsz) {
+        jl_value_t *val;
+        if (vb.intvalued && vb.lb == (jl_value_t*)jl_any_type)
+            val = (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0); // special token result that represents N::Int in the envout
+        else if (!vb.occurs_inv && vb.lb != jl_bottom_type)
+            val = is_leaf_bound(vb.lb) ? vb.lb : (jl_value_t*)jl_new_typevar(u->var->name, jl_bottom_type, vb.lb);
+        else if (vb.lb == vb.ub)
+            val = vb.lb;
+        else if (vb.lb != jl_bottom_type)
+            // TODO: for now return the least solution, which is what
+            // method parameters expect.
+            val = vb.lb;
+        else if (vb.lb == u->var->lb && vb.ub == u->var->ub)
+            val = (jl_value_t*)u->var;
+        else
+            val = (jl_value_t*)jl_new_typevar(u->var->name, vb.lb, vb.ub);
+        jl_value_t *oldval = e->envout[e->envidx];
+        // if we try to assign different variable values (due to checking
+        // multiple union members), consider the value unknown.
+        if (oldval && !jl_egal(oldval, val))
+            e->envout[e->envidx] = (jl_value_t*)u->var;
+        else
+            e->envout[e->envidx] = val;
+        // TODO: substitute the value (if any) of this variable into previous envout entries
+    }
+
     JL_GC_POP();
     return ans;
 }
@@ -866,10 +998,8 @@ static int check_vararg_length(jl_value_t *v, ssize_t n, jl_stenv_t *e)
         jl_value_t *nn = jl_box_long(n);
         JL_GC_PUSH1(&nn);
         e->invdepth++;
-        e->Rinvdepth++;
         int ans = subtype(nn, N, e, 2) && subtype(N, nn, e, 0);
         e->invdepth--;
-        e->Rinvdepth--;
         JL_GC_POP();
         if (!ans)
             return 0;
@@ -879,17 +1009,6 @@ static int check_vararg_length(jl_value_t *v, ssize_t n, jl_stenv_t *e)
 
 static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e);
 
-struct subtype_tuple_env {
-    jl_datatype_t *xd, *yd;
-    jl_value_t *lastx, *lasty;
-    size_t lx, ly;
-    size_t i, j;
-    int vx, vy;
-    jl_value_t *vtx;
-    jl_value_t *vty;
-    jl_vararg_kind_t vvx, vvy;
-} JL_ROOTED_VALUE_COLLECTION;
-
 static int subtype_tuple_varargs(
     jl_vararg_t *vtx, jl_vararg_t *vty,
     size_t vx, size_t vy,
@@ -898,39 +1017,30 @@ static int subtype_tuple_varargs(
     jl_value_t *xp0 = jl_unwrap_vararg(vtx); jl_value_t *xp1 = jl_unwrap_vararg_num(vtx);
     jl_value_t *yp0 = jl_unwrap_vararg(vty); jl_value_t *yp1 = jl_unwrap_vararg_num(vty);
 
+    jl_varbinding_t *xlv = NULL, *ylv = NULL;
+    if (xp1 && jl_is_typevar(xp1))
+        xlv = lookup(e, (jl_tvar_t*)xp1);
+    if (yp1 && jl_is_typevar(yp1))
+        ylv = lookup(e, (jl_tvar_t*)yp1);
+
+    int8_t max_offsetx = xlv ? xlv->max_offset : 0;
+    int8_t max_offsety = ylv ? ylv->max_offset : 0;
+
+    jl_value_t *xl = xlv ? xlv->lb : xp1;
+    jl_value_t *yl = ylv ? ylv->lb : yp1;
+
     if (!xp1) {
-        jl_value_t *yl = yp1;
-        if (yl) {
-            // Unconstrained on the left, constrained on the right
-            if (jl_is_typevar(yl)) {
-                jl_varbinding_t *ylv = lookup(e, (jl_tvar_t*)yl);
-                if (ylv)
-                    yl = ylv->lb;
-            }
-            if (jl_is_long(yl)) {
-                return 0;
-            }
-        }
+        // Unconstrained on the left, constrained on the right
+        if (yl && jl_is_long(yl))
+            return 0;
     }
     else {
-        jl_value_t *xl = jl_unwrap_vararg_num(vtx);
-        if (jl_is_typevar(xl)) {
-            jl_varbinding_t *xlv = lookup(e, (jl_tvar_t*)xl);
-            if (xlv)
-                xl = xlv->lb;
-        }
         if (jl_is_long(xl)) {
             if (jl_unbox_long(xl) + 1 == vx) {
                 // LHS is exhausted. We're a subtype if the RHS is either
                 // exhausted as well or unbounded (in which case we need to
                 // set it to 0).
-                jl_value_t *yl = jl_unwrap_vararg_num(vty);
                 if (yl) {
-                    if (jl_is_typevar(yl)) {
-                        jl_varbinding_t *ylv = lookup(e, (jl_tvar_t*)yl);
-                        if (ylv)
-                            yl = ylv->lb;
-                    }
                     if (jl_is_long(yl)) {
                         return jl_unbox_long(yl) + 1 == vy;
                     }
@@ -970,32 +1080,63 @@ static int subtype_tuple_varargs(
         }
 
         if (ylv) {
-            if (ylv->depth0 != e->invdepth || ylv->occurs_inv)
+            if (ylv->depth0 != e->invdepth ||
+                ylv->lb != jl_bottom_type ||
+                ylv->ub != (jl_value_t *)jl_any_type)
                 return 0;
             ylv->intvalued = 1;
         }
         // set lb to Any. Since `intvalued` is set, we'll interpret that
         // appropriately.
         e->invdepth++;
-        e->Rinvdepth++;
         int ans = subtype((jl_value_t*)jl_any_type, yp1, e, 2);
+        if (ylv && !ylv->intersected)
+            ylv->max_offset = max_offsety;
         e->invdepth--;
-        e->Rinvdepth--;
         return ans;
     }
 
     // Vararg{T,N} <: Vararg{T2,N2}; equate N and N2
     e->invdepth++;
-    e->Rinvdepth++;
     JL_GC_PUSH2(&xp1, &yp1);
-    if (xp1 && jl_is_long(xp1) && vx != 1)
-        xp1 = jl_box_long(jl_unbox_long(xp1) - vx + 1);
-    if (jl_is_long(yp1) && vy != 1)
-        yp1 = jl_box_long(jl_unbox_long(yp1) - vy + 1);
-    int ans = forall_exists_equal(xp1, yp1, e);
+    int ans;
+    jl_varbinding_t *bxp1 = jl_is_typevar(xp1) ? lookup(e, (jl_tvar_t *)xp1) : NULL;
+    jl_varbinding_t *byp1 = jl_is_typevar(yp1) ? lookup(e, (jl_tvar_t *)yp1) : NULL;
+    if (bxp1) {
+        if (bxp1->intvalued == 0)
+            bxp1->intvalued = 1;
+        if (jl_is_long(bxp1->lb))
+            xp1 = bxp1->lb;
+    }
+    if (byp1) {
+        if (byp1->intvalued == 0)
+            byp1->intvalued = 1;
+        if (jl_is_long(byp1->lb))
+            yp1 = byp1->lb;
+    }
+    if (jl_is_long(xp1) && jl_is_long(yp1))
+        ans = jl_unbox_long(xp1) - vx == jl_unbox_long(yp1) - vy;
+    else {
+        if (jl_is_long(xp1) && vx != vy) {
+            xp1 = jl_box_long(jl_unbox_long(xp1) + vy - vx);
+            vx = vy;
+        }
+        if (jl_is_long(yp1) && vy != vx) {
+            yp1 = jl_box_long(jl_unbox_long(yp1) + vx - vy);
+            vy = vx;
+        }
+        assert(e->Loffset == 0);
+        e->Loffset = vx - vy;
+        ans = forall_exists_equal(xp1, yp1, e);
+        assert(e->Loffset == vx - vy);
+        e->Loffset = 0;
+    }
     JL_GC_POP();
+    if (ylv && !ylv->intersected)
+        ylv->max_offset = max_offsety;
+    if (xlv && !xlv->intersected)
+        xlv->max_offset = max_offsetx;
     e->invdepth--;
-    e->Rinvdepth--;
     return ans;
 }
 
@@ -1033,7 +1174,8 @@ static int subtype_tuple_tail(jl_datatype_t *xd, jl_datatype_t *yd, int8_t R, jl
                 xi = jl_tparam(xd, lx-1);
                 if (jl_is_vararg(xi)) {
                     all_varargs = 1;
-                    vy += lx - i;
+                    // count up to lx-2 rather than lx-1.
+                    vy += lx - i - 1;
                     vx = 1;
                 } else {
                     break;
@@ -1053,25 +1195,21 @@ static int subtype_tuple_tail(jl_datatype_t *xd, jl_datatype_t *yd, int8_t R, jl
             return !!vx;
 
         xi = vx ? jl_unwrap_vararg(xi) : xi;
-        int x_same = lastx && jl_egal(xi, lastx);
-        if (vy) {
-            yi = jl_unwrap_vararg(yi);
-            // keep track of number of consecutive identical types compared to Vararg
-            if (x_same)
-                x_reps++;
-            else
-                x_reps = 1;
-        }
+        yi = vy ? jl_unwrap_vararg(yi) : yi;
+        int x_same = vx > 1 || (lastx && obviously_egal(xi, lastx));
+        int y_same = vy > 1 || (lasty && obviously_egal(yi, lasty));
+        // keep track of number of consecutive identical subtyping
+        x_reps = y_same && x_same ? x_reps + 1 : 1;
         if (x_reps > 2) {
-            // an identical type on the left doesn't need to be compared to a Vararg
+            // an identical type on the left doesn't need to be compared to the same
             // element type on the right more than twice.
         }
         else if (x_same && e->Runions.depth == 0 &&
-            ((yi == lasty && !jl_has_free_typevars(xi) && !jl_has_free_typevars(yi)) ||
+            ((y_same && !jl_has_free_typevars(xi) && !jl_has_free_typevars(yi)) ||
              (yi == lastx && !vx && vy && jl_is_concrete_type(xi)))) {
             // fast path for repeated elements
         }
-        else if (e->Runions.depth == 0 && e->Lunions.depth == 0 && !jl_has_free_typevars(xi) && !jl_has_free_typevars(yi)) {
+        else if (e->Runions.depth == 0 && !jl_has_free_typevars(xi) && !jl_has_free_typevars(yi)) {
             // fast path for separable sub-formulas
             if (!jl_subtype(xi, yi))
                 return 0;
@@ -1180,15 +1318,9 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
             // of unions and vars: if matching `typevar <: union`, first try to match the whole
             // union against the variable before trying to take it apart to see if there are any
             // variables lurking inside.
-            jl_unionstate_t *state = &e->Runions;
-            if (state->depth >= state->used) {
-                statestack_set(state, state->used, 0);
-                state->used++;
-            }
-            ui = statestack_get(state, state->depth);
-            state->depth++;
-            if (ui == 0)
-                state->more = state->depth; // memorize that this was the deepest available choice
+            // note: for forall var, there's no need to split y if it has no free typevars.
+            jl_varbinding_t *xx = lookup(e, (jl_tvar_t *)x);
+            ui = ((xx && xx->right) || jl_has_free_typevars(y)) && pick_union_decision(e, 1);
         }
         if (ui == 1)
             y = pick_union_element(y, e, 1);
@@ -1228,10 +1360,20 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
             // to other left-side variables, so using || here is safe.
             return subtype(xub, y, e, param) || subtype(x, ylb, e, param);
         }
-        return var_lt((jl_tvar_t*)x, y, e, param);
+        if (jl_is_unionall(y)) {
+            jl_varbinding_t *xb = lookup(e, (jl_tvar_t*)x);
+            if (xb == NULL ? !e->ignore_free : !xb->right) {
+                // We'd better unwrap `y::UnionAll` eagerly if `x` isa ∀-var.
+                // This makes sure the following cases work correct:
+                // 1) `∀T <: Union{∃S, SomeType{P}} where {P}`: `S == Any` ==> `S >: T`
+                // 2) `∀T <: Union{∀T, SomeType{P}} where {P}`:
+                return subtype_unionall(x, (jl_unionall_t*)y, e, 1, param);
+            }
+        }
+        return subtype_var((jl_tvar_t*)x, y, e, 0, param);
     }
     if (jl_is_typevar(y))
-        return var_gt((jl_tvar_t*)y, x, e, param);
+        return subtype_var((jl_tvar_t*)y, x, e, 1, param);
     if (y == (jl_value_t*)jl_any_type && !jl_has_free_typevars(x))
         return 1;
     if (x == jl_bottom_type && !jl_has_free_typevars(y))
@@ -1282,15 +1424,14 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
             // The answer is true iff `T` has full bounds (as in `Type`), but this needs to
             // be checked at the same depth where `Type{T}` occurs --- the depth of the LHS
             // doesn't matter because it (e.g. `DataType`) doesn't actually contain the variable.
-            int saved = e->invdepth;
-            e->invdepth = e->Rinvdepth;
             int issub = subtype((jl_value_t*)jl_type_type, y, e, param);
-            e->invdepth = saved;
             return issub;
         }
         while (xd != jl_any_type && xd->name != yd->name) {
-            if (xd->super == NULL)
+            if (xd->super == NULL) {
+                assert(xd->parameters && jl_is_typename(xd->name));
                 jl_errorf("circular type parameter constraint in definition of %s", jl_symbol_name(xd->name->name));
+            }
             xd = xd->super;
         }
         if (xd == jl_any_type) return 0;
@@ -1299,7 +1440,6 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
         size_t i, np = jl_nparams(xd);
         int ans = 1;
         e->invdepth++;
-        e->Rinvdepth++;
         for (i=0; i < np; i++) {
             jl_value_t *xi = jl_tparam(xd, i), *yi = jl_tparam(yd, i);
             if (!(xi == yi || forall_exists_equal(xi, yi, e))) {
@@ -1307,11 +1447,12 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
             }
         }
         e->invdepth--;
-        e->Rinvdepth--;
         return ans;
     }
     if (jl_is_type(y))
         return x == jl_bottom_type;
+    if (jl_is_long(x) && jl_is_long(y))
+        return jl_unbox_long(x) == jl_unbox_long(y) + e->Loffset;
     return jl_egal(x, y);
 }
 
@@ -1338,54 +1479,126 @@ static int is_definite_length_tuple_type(jl_value_t *x)
     return k == JL_VARARG_NONE || k == JL_VARARG_INT;
 }
 
-static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
-{
-    if (obviously_egal(x, y)) return 1;
+static int _forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int *count, int *noRmore);
 
-    if ((is_indefinite_length_tuple_type(x) && is_definite_length_tuple_type(y)) ||
-        (is_definite_length_tuple_type(x) && is_indefinite_length_tuple_type(y)))
+static int may_contain_union_decision(jl_value_t *x, jl_stenv_t *e, jl_typeenv_t *log) JL_NOTSAFEPOINT
+{
+    if (x == NULL || x == (jl_value_t*)jl_any_type || x == jl_bottom_type)
         return 0;
+    if (jl_is_unionall(x))
+        return may_contain_union_decision(((jl_unionall_t *)x)->body, e, log);
+    if (jl_is_datatype(x)) {
+        jl_datatype_t *xd = (jl_datatype_t *)x;
+        for (int i = 0; i < jl_nparams(xd); i++) {
+            jl_value_t *param = jl_tparam(xd, i);
+            if (jl_is_vararg(param))
+                param = jl_unwrap_vararg(param);
+            if (may_contain_union_decision(param, e, log))
+                return 1;
+        }
+        return 0;
+    }
+    if (!jl_is_typevar(x))
+        return jl_is_type(x);
+    jl_typeenv_t *t = log;
+    while (t != NULL) {
+        if (x == (jl_value_t *)t->var)
+            return 1;
+        t = t->prev;
+    }
+    jl_typeenv_t newlog = { (jl_tvar_t*)x, NULL, log };
+    jl_varbinding_t *xb = lookup(e, (jl_tvar_t *)x);
+    return may_contain_union_decision(xb ? xb->lb : ((jl_tvar_t *)x)->lb, e, &newlog) ||
+           may_contain_union_decision(xb ? xb->ub : ((jl_tvar_t *)x)->ub, e, &newlog);
+}
 
-    jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
-    e->Lunions.used = 0;
+static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int limit_slow)
+{
+    int16_t oldRmore = e->Runions.more;
     int sub;
-
-    if (!jl_has_free_typevars(x) || !jl_has_free_typevars(y)) {
+    // fast-path for #49857
+    if (obviously_in_union(y, x))
+        return 1;
+    int kindx = !jl_has_free_typevars(x);
+    int kindy = !jl_has_free_typevars(y);
+    if (kindx && kindy)
+        return jl_subtype(x, y);
+    if (may_contain_union_decision(y, e, NULL) && pick_union_decision(e, 1) == 0) {
         jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
-        e->Runions.used = 0;
-        e->Runions.depth = 0;
-        e->Runions.more = 0;
-        e->Lunions.depth = 0;
-        e->Lunions.more = 0;
-
-        sub = forall_exists_subtype(x, y, e, 2);
-
+        e->Lunions.used = e->Runions.used = 0;
+        e->Lunions.depth = e->Runions.depth = 0;
+        e->Lunions.more = e->Runions.more = 0;
+        int count = 0, noRmore = 0;
+        sub = _forall_exists_subtype(x, y, e, param, &count, &noRmore);
         pop_unionstate(&e->Runions, &oldRunions);
+        // we should not try the slow path if `forall_exists_subtype` has tested all cases;
+        // Once limit_slow == 1, also skip it if
+        // 1) `forall_exists_subtype` return false
+        // 2) the left `Union` looks big
+        if (limit_slow == -1)
+            limit_slow = kindx || kindy;
+        if (noRmore || (limit_slow && (count > 3  || !sub)))
+            e->Runions.more = oldRmore;
     }
     else {
-        int lastset = 0;
+        // slow path
+        e->Lunions.used = 0;
         while (1) {
             e->Lunions.more = 0;
             e->Lunions.depth = 0;
-            sub = subtype(x, y, e, 2);
-            int set = e->Lunions.more;
-            if (!sub || !set)
+            sub = subtype(x, y, e, param);
+            if (!sub || !next_union_state(e, 0))
                 break;
-            for (int i = set; i <= lastset; i++)
-                statestack_set(&e->Lunions, i, 0);
-            lastset = set - 1;
-            statestack_set(&e->Lunions, lastset, 1);
         }
     }
+    return sub;
+}
+
+static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
+{
+    if (obviously_egal(x, y)) return 1;
+
+    if ((is_indefinite_length_tuple_type(x) && is_definite_length_tuple_type(y)) ||
+        (is_definite_length_tuple_type(x) && is_indefinite_length_tuple_type(y)))
+        return 0;
+
+    if (jl_is_datatype(x) && jl_is_datatype(y)) {
+        // Fastpath for nested constructor. Skip the unneeded `>:` check.
+        // Note: since there is no changes to the environment or union stack implied by `x` or `y`, this will simply forward to calling
+        // `forall_exists_equal(xi, yi, e)` on each parameter `(xi, yi)` of `(x, y)`,
+        // which means this subtype call will give the same result for `subtype(x, y)` and `subtype(y, x)`.
+        jl_datatype_t *xd = (jl_datatype_t*)x, *yd = (jl_datatype_t*)y;
+        if (xd->name != yd->name)
+            return 0;
+        if (xd->name != jl_tuple_typename)
+            return subtype(x, y, e, 2);
+    }
+
+    if ((jl_is_uniontype(x) && jl_is_uniontype(y))) {
+        // For 2 unions, first try a more efficient greedy algorithm that compares the unions
+        // componentwise. If failed, `exists_subtype` would memorize that this branch should be skipped.
+        // Note: this is valid because the normal path checks `>:` locally.
+        if (pick_union_decision(e, 1) == 0) {
+            return forall_exists_equal(((jl_uniontype_t *)x)->a, ((jl_uniontype_t *)y)->a, e) &&
+                   forall_exists_equal(((jl_uniontype_t *)x)->b, ((jl_uniontype_t *)y)->b, e);
+        }
+    }
+
+    jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
 
+    int sub = local_forall_exists_subtype(x, y, e, 2, -1);
+    if (sub) {
+        flip_offset(e);
+        sub = local_forall_exists_subtype(y, x, e, 0, 0);
+        flip_offset(e);
+    }
     pop_unionstate(&e->Lunions, &oldLunions);
-    return sub && subtype(y, x, e, 0);
+    return sub;
 }
 
-static int exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, jl_value_t *saved, jl_savedenv_t *se, int param)
+static int exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, jl_savedenv_t *se, int param)
 {
     e->Runions.used = 0;
-    int lastset = 0;
     while (1) {
         e->Runions.depth = 0;
         e->Runions.more = 0;
@@ -1393,63 +1606,68 @@ static int exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, jl_value_
         e->Lunions.more = 0;
         if (subtype(x, y, e, param))
             return 1;
-        restore_env(e, saved, se);
-        int set = e->Runions.more;
-        if (!set)
+        if (next_union_state(e, 1)) {
+            // We preserve `envout` here as `subtype_unionall` needs previous assigned env values.
+            int oldidx = e->envidx;
+            e->envidx = e->envsz;
+            restore_env(e, se, 1);
+            e->envidx = oldidx;
+        }
+        else {
+            restore_env(e, se, 1);
             return 0;
-        for (int i = set; i <= lastset; i++)
-            statestack_set(&e->Runions, i, 0);
-        lastset = set - 1;
-        statestack_set(&e->Runions, lastset, 1);
+        }
     }
 }
 
-static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
+static int _forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param, int *count, int *noRmore)
 {
     // The depth recursion has the following shape, after simplification:
     // ∀₁
     //   ∃₁
     assert(e->Runions.depth == 0);
     assert(e->Lunions.depth == 0);
-    jl_value_t *saved=NULL; jl_savedenv_t se;
-    JL_GC_PUSH1(&saved);
-    save_env(e, &saved, &se);
+    jl_savedenv_t se;
+    save_env(e, &se, 1);
 
     e->Lunions.used = 0;
-    int lastset = 0;
     int sub;
+    if (count) *count = 0;
+    if (noRmore) *noRmore = 1;
     while (1) {
-        sub = exists_subtype(x, y, e, saved, &se, param);
-        int set = e->Lunions.more;
-        if (!sub || !set)
+        sub = exists_subtype(x, y, e, &se, param);
+        if (count) *count = (*count < 4) ? *count + 1 : 4;
+        if (noRmore) *noRmore = *noRmore && e->Runions.more == 0;
+        if (!sub || !next_union_state(e, 0))
             break;
-        free_env(&se);
-        save_env(e, &saved, &se);
-        for (int i = set; i <= lastset; i++)
-            statestack_set(&e->Lunions, i, 0);
-        lastset = set - 1;
-        statestack_set(&e->Lunions, lastset, 1);
+        re_save_env(e, &se, 1);
     }
 
     free_env(&se);
-    JL_GC_POP();
     return sub;
 }
 
+static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
+{
+    return _forall_exists_subtype(x, y, e, param, NULL, NULL);
+}
+
 static void init_stenv(jl_stenv_t *e, jl_value_t **env, int envsz)
 {
     e->vars = NULL;
-    assert(env != NULL || envsz == 0);
     e->envsz = envsz;
     e->envout = env;
-    if (envsz)
+    if (envsz) {
+        assert(env != NULL);
         memset(env, 0, envsz*sizeof(void*));
+    }
     e->envidx = 0;
-    e->invdepth = e->Rinvdepth = 0;
+    e->invdepth = 0;
     e->ignore_free = 0;
     e->intersection = 0;
     e->emptiness_only = 0;
     e->triangular = 0;
+    e->Loffset = 0;
     e->Lunions.depth = 0;      e->Runions.depth = 0;
     e->Lunions.more = 0;       e->Runions.more = 0;
     e->Lunions.used = 0;       e->Runions.used = 0;
@@ -1632,7 +1850,7 @@ static int obvious_subtype(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *su
     if (jl_is_datatype(y)) {
         int istuple = (((jl_datatype_t*)y)->name == jl_tuple_typename);
         int iscov = istuple;
-        // TODO: this would be a nice fast-path to have, unfortuanately,
+        // TODO: this would be a nice fast-path to have, unfortunately,
         //       datatype allocation fails to correctly hash-cons them
         //       and the subtyping tests include tests for this case
         //if (!iscov && ((jl_datatype_t*)y)->isconcretetype && !jl_is_type_type(x)) {
@@ -1776,7 +1994,7 @@ static int obvious_subtype(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *su
                     jl_value_t *body = find_var_body(y0, (jl_tvar_t*)b);
                     if (body == NULL)
                         body = y0;
-                    if (var_occurs_invariant(body, (jl_tvar_t*)b, 0))
+                    if (var_occurs_invariant(body, (jl_tvar_t*)b))
                         return 0;
                 }
                 if (nparams_expanded_x > npy && jl_is_typevar(b) && concrete_min(a1) > 1) {
@@ -1887,34 +2105,34 @@ JL_DLLEXPORT int jl_subtype_env(jl_value_t *x, jl_value_t *y, jl_value_t **env,
     if (obvious_subtype == 0 || (obvious_subtype == 1 && envsz == 0))
         subtype = obvious_subtype; // this ensures that running in a debugger doesn't change the result
 #endif
+    if (env) {
+        jl_unionall_t *ub = (jl_unionall_t*)y;
+        int i;
+        for (i = 0; i < envsz; i++) {
+            assert(jl_is_unionall(ub));
+            jl_tvar_t *var = ub->var;
+            env[i] = fix_inferred_var_bound(var, env[i]);
+            ub = (jl_unionall_t*)ub->body;
+        }
+    }
     return subtype;
 }
 
-static int subtype_in_env_(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int invdepth, int Rinvdepth)
+static int subtype_in_env(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
     jl_stenv_t e2;
     init_stenv(&e2, NULL, 0);
     e2.vars = e->vars;
     e2.intersection = e->intersection;
     e2.ignore_free = e->ignore_free;
-    e2.invdepth = invdepth;
-    e2.Rinvdepth = Rinvdepth;
+    e2.invdepth = e->invdepth;
     e2.envsz = e->envsz;
     e2.envout = e->envout;
     e2.envidx = e->envidx;
+    e2.Loffset = e->Loffset;
     return forall_exists_subtype(x, y, &e2, 0);
 }
 
-static int subtype_in_env(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
-{
-    return subtype_in_env_(x, y, e, e->invdepth, e->Rinvdepth);
-}
-
-static int subtype_bounds_in_env(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int R, int d)
-{
-    return subtype_in_env_(x, y, e, R ? e->invdepth : d, R ? d : e->Rinvdepth);
-}
-
 JL_DLLEXPORT int jl_subtype(jl_value_t *x, jl_value_t *y)
 {
     return jl_subtype_env(x, y, NULL, 0);
@@ -2048,23 +2266,39 @@ int jl_has_intersect_type_not_kind(jl_value_t *t)
     t = jl_unwrap_unionall(t);
     if (t == (jl_value_t*)jl_any_type)
         return 1;
-    if (jl_is_uniontype(t)) {
+    assert(!jl_is_vararg(t));
+    if (jl_is_uniontype(t))
         return jl_has_intersect_type_not_kind(((jl_uniontype_t*)t)->a) ||
                jl_has_intersect_type_not_kind(((jl_uniontype_t*)t)->b);
-    }
-    if (jl_is_typevar(t)) {
+    if (jl_is_typevar(t))
         return jl_has_intersect_type_not_kind(((jl_tvar_t*)t)->ub);
-    }
-    if (jl_is_datatype(t)) {
+    if (jl_is_datatype(t))
         if (((jl_datatype_t*)t)->name == jl_type_typename)
             return 1;
-    }
     return 0;
 }
 
+// compute if DataType<:t || Union<:t || UnionAll<:t etc.
+int jl_has_intersect_kind_not_type(jl_value_t *t)
+{
+    t = jl_unwrap_unionall(t);
+    if (t == (jl_value_t*)jl_any_type || jl_is_kind(t))
+        return 1;
+    assert(!jl_is_vararg(t));
+    if (jl_is_uniontype(t))
+        return jl_has_intersect_kind_not_type(((jl_uniontype_t*)t)->a) ||
+               jl_has_intersect_kind_not_type(((jl_uniontype_t*)t)->b);
+    if (jl_is_typevar(t))
+        return jl_has_intersect_kind_not_type(((jl_tvar_t*)t)->ub);
+    return 0;
+}
+
+
 JL_DLLEXPORT int jl_isa(jl_value_t *x, jl_value_t *t)
 {
-    if (jl_typeis(x,t) || t == (jl_value_t*)jl_any_type)
+    if (t == (jl_value_t*)jl_any_type || jl_typetagis(x,t))
+        return 1;
+    if (jl_typetagof(x) < (jl_max_tags << 4) && jl_is_datatype(t) && jl_typetagis(x,((jl_datatype_t*)t)->smalltag << 4))
         return 1;
     if (jl_is_type(x)) {
         if (t == (jl_value_t*)jl_type_type)
@@ -2109,7 +2343,7 @@ JL_DLLEXPORT int jl_isa(jl_value_t *x, jl_value_t *t)
             return 0;
         }
     }
-    if (jl_is_concrete_type(t) && jl_type_equality_is_identity(jl_typeof(x), t))
+    if (jl_is_concrete_type(t))
         return 0;
     return jl_subtype(jl_typeof(x), t);
 }
@@ -2121,24 +2355,23 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
 static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e);
 
 // intersect in nested union environment, similar to subtype_ccheck
-static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int R, int d)
+static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int depth)
 {
     // band-aid for #30335
     if (x == (jl_value_t*)jl_any_type && !jl_is_typevar(y))
         return y;
     if (y == (jl_value_t*)jl_any_type && !jl_is_typevar(x))
         return x;
+    // band-aid for #46736
+    if (obviously_egal(x, y))
+        return x;
 
     jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
-    int savedepth = e->invdepth, Rsavedepth = e->Rinvdepth;
-    // TODO: this doesn't quite make sense
-    e->invdepth = e->Rinvdepth = d;
-
+    int savedepth = e->invdepth;
+    e->invdepth = depth;
     jl_value_t *res = intersect_all(x, y, e);
-
-    pop_unionstate(&e->Runions, &oldRunions);
     e->invdepth = savedepth;
-    e->Rinvdepth = Rsavedepth;
+    pop_unionstate(&e->Runions, &oldRunions);
     return res;
 }
 
@@ -2161,20 +2394,28 @@ static jl_value_t *intersect_union(jl_value_t *x, jl_uniontype_t *u, jl_stenv_t
 }
 
 // set a variable to a non-type constant
-static jl_value_t *set_var_to_const(jl_varbinding_t *bb, jl_value_t *v JL_MAYBE_UNROOTED, jl_varbinding_t *othervar)
+static jl_value_t *set_var_to_const(jl_varbinding_t *bb, jl_value_t *v JL_MAYBE_UNROOTED, jl_stenv_t *e, int R)
 {
-    int offset = bb->offset;
-    if (othervar && offset == 0)
-        offset = -othervar->offset;
-    assert(!othervar || othervar->offset == -offset);
+    int offset = R ? -e->Loffset : e->Loffset;
     if (bb->lb == jl_bottom_type && bb->ub == (jl_value_t*)jl_any_type) {
-        if (jl_is_long(v))
-            v = jl_box_long(jl_unbox_long(v) + offset);
-        bb->lb = bb->ub = v;
+        if (offset == 0)
+            bb->lb = bb->ub = v;
+        else if (jl_is_long(v)) {
+            size_t iv = jl_unbox_long(v);
+            v = jl_box_long(iv + offset);
+            bb->lb = bb->ub = v;
+            // Here we always return the shorter `Vararg`'s length.
+            if (offset > 0)
+                return jl_box_long(iv);
+        }
+        else
+            return jl_bottom_type;
     }
     else if (jl_is_long(v) && jl_is_long(bb->lb)) {
-        if (jl_unbox_long(v) != jl_unbox_long(bb->lb))
+        if (jl_unbox_long(v) + offset != jl_unbox_long(bb->lb))
             return jl_bottom_type;
+        // Here we always return the shorter `Vararg`'s length.
+        if (offset < 0) return bb->lb;
     }
     else if (!jl_egal(v, bb->lb)) {
         return jl_bottom_type;
@@ -2182,31 +2423,70 @@ static jl_value_t *set_var_to_const(jl_varbinding_t *bb, jl_value_t *v JL_MAYBE_
     return v;
 }
 
-static jl_value_t *bound_var_below(jl_tvar_t *tv, jl_varbinding_t *bb, jl_stenv_t *e) {
+static jl_value_t *bound_var_below(jl_tvar_t *tv, jl_varbinding_t *bb, jl_stenv_t *e, int R) {
     if (!bb)
         return (jl_value_t*)tv;
     if (bb->depth0 != e->invdepth)
         return jl_bottom_type;
+    e->invdepth++;
     record_var_occurrence(bb, e, 2);
+    e->invdepth--;
+    int offset = R ? -e->Loffset : e->Loffset;
     if (jl_is_long(bb->lb)) {
-        if (bb->offset == 0)
-            return bb->lb;
-        if (jl_unbox_long(bb->lb) < bb->offset)
+        ssize_t blb = jl_unbox_long(bb->lb);
+        if (blb < offset || blb < 0)
             return jl_bottom_type;
-        return jl_box_long(jl_unbox_long(bb->lb) - bb->offset);
+        // Here we always return the shorter `Vararg`'s length.
+        if (offset <= 0)
+            return bb->lb;
+        return jl_box_long(blb - offset);
+    }
+    if (offset > 0) {
+        if (bb->innervars == NULL)
+            bb->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
+        jl_value_t *ntv = NULL;
+        JL_GC_PUSH1(&ntv);
+        ntv = (jl_value_t *)jl_new_typevar(tv->name, jl_bottom_type, (jl_value_t *)jl_any_type);
+        jl_array_ptr_1d_push(bb->innervars, ntv);
+        JL_GC_POP();
+        return ntv;
     }
     return (jl_value_t*)tv;
 }
 
-static int try_subtype_in_env(jl_value_t *a, jl_value_t *b, jl_stenv_t *e, int R, int d)
+static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOTSAFEPOINT;
+
+// similar to `subtype_by_bounds`, used to avoid stack-overflow caused by circulation constraints.
+static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
 {
-    jl_value_t *root=NULL; jl_savedenv_t se;
-    JL_GC_PUSH1(&root);
-    save_env(e, &root, &se);
-    int ret = subtype_bounds_in_env(a, b, e, R, d);
-    restore_env(e, root, &se);
-    free_env(&se);
-    JL_GC_POP();
+    if (jl_is_uniontype(a))
+        return try_subtype_by_bounds(((jl_uniontype_t *)a)->a, b, e) &&
+               try_subtype_by_bounds(((jl_uniontype_t *)a)->b, b, e);
+    else if (jl_is_uniontype(b))
+        return try_subtype_by_bounds(a, ((jl_uniontype_t *)b)->a, e) ||
+               try_subtype_by_bounds(a, ((jl_uniontype_t *)b)->b, e);
+    else if (jl_egal(a, b))
+        return 1;
+    else if (!jl_is_typevar(b))
+        return 0;
+    jl_varbinding_t *vb = e->vars;
+    while (vb != NULL) {
+        if (subtype_by_bounds(b, (jl_value_t *)vb->var, e) && obviously_in_union(a, vb->ub))
+            return 1;
+        vb = vb->prev;
+    }
+    return 0;
+}
+
+static int try_subtype_in_env(jl_value_t *a, jl_value_t *b, jl_stenv_t *e)
+{
+    if (a == jl_bottom_type || b == (jl_value_t *)jl_any_type || try_subtype_by_bounds(a, b, e))
+        return 1;
+    jl_savedenv_t se;
+    save_env(e, &se, 1);
+    int ret = subtype_in_env(a, b, e);
+    restore_env(e, &se, 1);
+    free_env(&se);
     return ret;
 }
 
@@ -2225,7 +2505,7 @@ static void set_bound(jl_value_t **bound, jl_value_t *val, jl_tvar_t *v, jl_sten
 }
 
 // subtype, treating all vars as existential
-static int subtype_in_env_existential(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int R, int d)
+static int subtype_in_env_existential(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
     jl_varbinding_t *v = e->vars;
     int len = 0;
@@ -2244,7 +2524,7 @@ static int subtype_in_env_existential(jl_value_t *x, jl_value_t *y, jl_stenv_t *
         v->right = 1;
         v = v->prev;
     }
-    int issub = subtype_bounds_in_env(x, y, e, R, d);
+    int issub = subtype_in_env(x, y, e);
     n = 0; v = e->vars;
     while (n < len) {
         assert(v != NULL);
@@ -2256,16 +2536,31 @@ static int subtype_in_env_existential(jl_value_t *x, jl_value_t *y, jl_stenv_t *
 }
 
 // See if var y is reachable from x via bounds; used to avoid cycles.
-static int reachable_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e)
+static int _reachable_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e, jl_typeenv_t *log)
 {
     if (in_union(x, (jl_value_t*)y))
         return 1;
+    if (jl_is_uniontype(x))
+        return _reachable_var(((jl_uniontype_t *)x)->a, y, e, log) ||
+               _reachable_var(((jl_uniontype_t *)x)->b, y, e, log);
     if (!jl_is_typevar(x))
         return 0;
+    jl_typeenv_t *t = log;
+    while (t != NULL) {
+        if (x == (jl_value_t *)t->var)
+            return 0;
+        t = t->prev;
+    }
     jl_varbinding_t *xv = lookup(e, (jl_tvar_t*)x);
-    if (xv == NULL)
-        return 0;
-    return reachable_var(xv->ub, y, e) || reachable_var(xv->lb, y, e);
+    jl_value_t *lb = xv == NULL ? ((jl_tvar_t*)x)->lb : xv->lb;
+    jl_value_t *ub = xv == NULL ? ((jl_tvar_t*)x)->ub : xv->ub;
+    jl_typeenv_t newlog = { (jl_tvar_t*)x, NULL, log };
+    return _reachable_var(ub, y, e, &newlog) || _reachable_var(lb, y, e, &newlog);
+}
+
+static int reachable_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e)
+{
+    return _reachable_var(x, y, e, NULL);
 }
 
 // check whether setting v == t implies v == SomeType{v}, which is unsatisfiable.
@@ -2283,27 +2578,30 @@ static int check_unsat_bound(jl_value_t *t, jl_tvar_t *v, jl_stenv_t *e) JL_NOTS
     return 0;
 }
 
+
 static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int8_t R, int param)
 {
     jl_varbinding_t *bb = lookup(e, b);
     if (bb == NULL)
-        return R ? intersect_aside(a, b->ub, e, 1, 0) : intersect_aside(b->ub, a, e, 0, 0);
+        return R ? intersect_aside(a, b->ub, e, 0) : intersect_aside(b->ub, a, e, 0);
     if (reachable_var(bb->lb, b, e) || reachable_var(bb->ub, b, e))
         return a;
-    if (bb->lb == bb->ub && jl_is_typevar(bb->lb)) {
-        return intersect(a, bb->lb, e, param);
-    }
+    if (bb->lb == bb->ub && jl_is_typevar(bb->lb))
+        return R ? intersect(a, bb->lb, e, param) : intersect(bb->lb, a, e, param);
     if (!jl_is_type(a) && !jl_is_typevar(a))
-        return set_var_to_const(bb, a, NULL);
-    int d = bb->depth0;
-    jl_value_t *root=NULL; jl_savedenv_t se;
+        return set_var_to_const(bb, a, e, R);
+    jl_savedenv_t se;
     if (param == 2) {
         jl_value_t *ub = NULL;
-        JL_GC_PUSH2(&ub, &root);
+        JL_GC_PUSH1(&ub);
         if (!jl_has_free_typevars(a)) {
-            save_env(e, &root, &se);
-            int issub = subtype_in_env_existential(bb->lb, a, e, 0, d) && subtype_in_env_existential(a, bb->ub, e, 1, d);
-            restore_env(e, root, &se);
+            save_env(e, &se, 1);
+            int issub = subtype_in_env_existential(bb->lb, a, e);
+            restore_env(e, &se, 1);
+            if (issub) {
+                issub = subtype_in_env_existential(a, bb->ub, e);
+                restore_env(e, &se, 1);
+            }
             free_env(&se);
             if (!issub) {
                 JL_GC_POP();
@@ -2313,11 +2611,11 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         }
         else {
             e->triangular++;
-            ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
+            ub = R ? intersect_aside(a, bb->ub, e, bb->depth0) : intersect_aside(bb->ub, a, e, bb->depth0);
             e->triangular--;
-            save_env(e, &root, &se);
-            int issub = subtype_in_env_existential(bb->lb, ub, e, 0, d);
-            restore_env(e, root, &se);
+            save_env(e, &se, 1);
+            int issub = subtype_in_env_existential(bb->lb, ub, e);
+            restore_env(e, &se, 1);
             free_env(&se);
             if (!issub) {
                 JL_GC_POP();
@@ -2332,12 +2630,16 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
                 }
             }
             bb->ub = ub;
-            bb->lb = ub;
+            if ((jl_is_uniontype(ub) && !jl_is_uniontype(a)) ||
+                (jl_is_unionall(ub) && !jl_is_unionall(a)))
+                ub = (jl_value_t*)b;
+            else
+                bb->lb = ub;
         }
         JL_GC_POP();
         return ub;
     }
-    jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
+    jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, bb->depth0) : intersect_aside(bb->ub, a, e, bb->depth0);
     if (ub == jl_bottom_type)
         return jl_bottom_type;
     if (bb->constraintkind == 1 || e->triangular) {
@@ -2348,7 +2650,7 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
     }
     else if (bb->constraintkind == 0) {
         JL_GC_PUSH1(&ub);
-        if (!jl_is_typevar(a) && try_subtype_in_env(bb->ub, a, e, 0, d)) {
+        if (!jl_is_typevar(a) && try_subtype_in_env(bb->ub, a, e)) {
             JL_GC_POP();
             return (jl_value_t*)b;
         }
@@ -2356,14 +2658,24 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         return ub;
     }
     assert(bb->constraintkind == 2);
-    if (!jl_is_typevar(a)) {
-        if (ub == a && bb->lb != jl_bottom_type)
-            return ub;
-        else if (jl_egal(bb->ub, bb->lb))
-            return ub;
-        set_bound(&bb->ub, ub, b, e);
-    }
-    return (jl_value_t*)b;
+    if (ub == a && bb->lb != jl_bottom_type)
+        return ub;
+    if (jl_egal(bb->ub, bb->lb))
+        return ub;
+    if (is_leaf_bound(ub))
+        set_bound(&bb->lb, ub, b, e);
+    // TODO: can we improve this bound by pushing a new variable into the environment
+    // and adding that to the lower bound of our variable?
+    //jl_value_t *ntv = NULL;
+    //JL_GC_PUSH2(&ntv, &ub);
+    //if (bb->innervars == NULL)
+    //    bb->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
+    //ntv = (jl_value_t*)jl_new_typevar(b->name, bb->lb, ub);
+    //jl_array_ptr_1d_push(bb->innervars, ntv);
+    //jl_value_t *lb = simple_join(b->lb, ntv);
+    //JL_GC_POP();
+    //bb->lb = lb;
+    return ub;
 }
 
 // test whether `var` occurs inside constructors. `want_inv` tests only inside
@@ -2406,12 +2718,68 @@ static int var_occurs_inside(jl_value_t *v, jl_tvar_t *var, int inside, int want
     return 0;
 }
 
+static jl_value_t *omit_bad_union(jl_value_t *u, jl_tvar_t *t)
+{
+    if (!jl_has_typevar(u, t))
+        return u; // return u if possible as many checks use `==`.
+    jl_value_t *res = NULL;
+    if (jl_is_unionall(u)) {
+        jl_tvar_t *var = ((jl_unionall_t *)u)->var;
+        jl_value_t *ub = var->ub, *body = ((jl_unionall_t *)u)->body;
+        assert(var != t);
+        JL_GC_PUSH3(&ub, &body, &var);
+        body = omit_bad_union(body, t);
+        if (!jl_has_typevar(body, var)) {
+            res = body;
+        }
+        else if (jl_has_typevar(var->lb, t)) {
+            res = jl_bottom_type;
+        }
+        else {
+            ub = omit_bad_union(ub, t);
+            if (ub == jl_bottom_type && var->lb != ub) {
+                res = jl_bottom_type;
+            }
+            else if (obviously_egal(var->lb, ub)) {
+                JL_TRY {
+                    res = jl_substitute_var(body, var, ub);
+                }
+                JL_CATCH {
+                    res = jl_bottom_type;
+                }
+            }
+            else {
+                if (ub != var->ub) {
+                    var = jl_new_typevar(var->name, var->lb, ub);
+                    body = jl_substitute_var(body, ((jl_unionall_t *)u)->var, (jl_value_t *)var);
+                }
+                res = jl_new_struct(jl_unionall_type, var, body);
+            }
+        }
+        JL_GC_POP();
+    }
+    else if (jl_is_uniontype(u)) {
+        jl_value_t *a = ((jl_uniontype_t *)u)->a;
+        jl_value_t *b = ((jl_uniontype_t *)u)->b;
+        JL_GC_PUSH2(&a, &b);
+        a = omit_bad_union(a, t);
+        b = omit_bad_union(b, t);
+        res = simple_join(a, b);
+        JL_GC_POP();
+    }
+    else {
+        res = jl_bottom_type;
+    }
+    assert(res != NULL);
+    return res;
+}
+
 // Caller might not have rooted `res`
 static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbinding_t *vb, jl_unionall_t *u, jl_stenv_t *e)
 {
-    jl_value_t *varval = NULL;
+    jl_value_t *varval = NULL, *ilb = NULL, *iub = NULL, *nivar = NULL;
     jl_tvar_t *newvar = vb->var;
-    JL_GC_PUSH2(&res, &newvar);
+    JL_GC_PUSH5(&res, &newvar, &ilb, &iub, &nivar);
     // try to reduce var to a single value
     if (jl_is_long(vb->ub) && jl_is_typevar(vb->lb)) {
         varval = vb->ub;
@@ -2420,10 +2788,9 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         // given x<:T<:x, substitute x for T
         varval = vb->ub;
     }
-    // TODO: `vb.occurs_cov == 1` here allows substituting Tuple{<:X} => Tuple{X},
-    // which is valid but changes some ambiguity errors so we don't need to do it yet.
-    else if ((/*vb->occurs_cov == 1 || */is_leaf_bound(vb->ub)) &&
-             !var_occurs_invariant(u->body, u->var, 0)) {
+    // TODO: `vb.occurs_cov == 1`, we could also substitute Tuple{<:X} => Tuple{X},
+    // but it may change some ambiguity errors so we don't need to do it yet.
+    else if (vb->occurs_cov && is_leaf_bound(vb->ub) && !jl_has_free_typevars(vb->ub)) {
         // replace T<:x with x in covariant position when possible
         varval = vb->ub;
     }
@@ -2446,63 +2813,160 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         newvar = jl_new_typevar(vb->var->name, vb->lb, vb->ub);
 
     // remove/replace/rewrap free occurrences of this var in the environment
-    jl_varbinding_t *btemp = e->vars;
-    int wrap = 1;
-    while (btemp != NULL) {
-        if (jl_has_typevar(btemp->lb, vb->var)) {
+
+    // I. Handle indirect innervars (make them behave like direct innervars).
+    //   1) record if btemp->lb/ub has indirect innervars.
+    //   2) substitute `vb->var` with `varval`/`varval`
+    //   note: We only store the innervar in the outmost `varbinding`,
+    //       thus we must check all inner env to ensure the recording/substitution
+    //       is complete
+    int len = current_env_length(e);
+    int8_t *blinding_has_innerdep = (int8_t *)alloca(len);
+    memset(blinding_has_innerdep, 0, len);
+    for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
+        if (btemp->innervars != NULL) {
+            for (size_t i = 0; i < jl_array_len(btemp->innervars); i++) {
+                jl_tvar_t *ivar = (jl_tvar_t*)jl_array_ptr_ref(btemp->innervars, i);
+                ilb = ivar->lb; iub = ivar->ub;
+                int has_innerdep = 0;
+                if (jl_has_typevar(ilb, vb->var)) {
+                    has_innerdep = 1;
+                    if (varval) {
+                        JL_TRY {
+                            ilb = jl_substitute_var(ilb, vb->var, varval);
+                        }
+                        JL_CATCH {
+                            res = jl_bottom_type;
+                        }
+                    }
+                    else if (newvar != vb->var) {
+                        ilb = jl_substitute_var(ilb, vb->var, (jl_value_t*)newvar);
+                    }
+                }
+                if (jl_has_typevar(iub, vb->var)) {
+                    has_innerdep = 1;
+                    if (varval) {
+                        JL_TRY {
+                            iub = jl_substitute_var(iub, vb->var, varval);
+                        }
+                        JL_CATCH {
+                            res = jl_bottom_type;
+                        }
+                    }
+                    else if (newvar != vb->var) {
+                        iub = jl_substitute_var(iub, vb->var, (jl_value_t*)newvar);
+                    }
+                }
+                if (!has_innerdep) continue;
+                int need_substitution = 0;
+                if (ilb != ivar->lb || iub != ivar->ub) {
+                    need_substitution = 1;
+                    nivar = (jl_value_t *)jl_new_typevar(ivar->name, ilb, iub);
+                    jl_array_ptr_set(btemp->innervars, i, nivar);
+                    if (jl_has_typevar(res, ivar))
+                        res = jl_substitute_var(res, ivar, nivar);
+                }
+                int envind = 0;
+                for (jl_varbinding_t *btemp2 = e->vars; btemp2 != btemp->prev; btemp2 = btemp2->prev) {
+                    if (jl_has_typevar(btemp2->lb, ivar)) {
+                        if (need_substitution)
+                            btemp2->lb = jl_substitute_var(btemp2->lb, ivar, nivar);
+                        blinding_has_innerdep[envind] |= 1;
+                    }
+                    if (jl_has_typevar(btemp2->ub, ivar)) {
+                        if (need_substitution)
+                            btemp2->ub = jl_substitute_var(btemp2->ub, ivar, nivar);
+                        blinding_has_innerdep[envind] |= 2;
+                    }
+                    envind++;
+                }
+            }
+        }
+    }
+    // II. Handle direct innervars.
+    jl_varbinding_t *wrap = NULL;
+    int envind = 0;
+    for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
+        int has_innerdep = blinding_has_innerdep[envind++];
+        int lb_has_innerdep = has_innerdep & 1;
+        int ub_has_innerdep = has_innerdep & 2;
+        assert(!has_innerdep || btemp->depth0 == vb->depth0);
+        int lb_has_dep = jl_has_typevar(btemp->lb, vb->var);
+        int ub_has_dep = jl_has_typevar(btemp->ub, vb->var);
+        if (lb_has_innerdep || lb_has_dep) {
             if (vb->lb == (jl_value_t*)btemp->var) {
                 JL_GC_POP();
                 return jl_bottom_type;
             }
             if (varval) {
-                JL_TRY {
-                    btemp->lb = jl_substitute_var(btemp->lb, vb->var, varval);
-                }
-                JL_CATCH {
-                    res = jl_bottom_type;
+                if (lb_has_dep) { // inner substitution has been handled
+                    JL_TRY {
+                        btemp->lb = jl_substitute_var(btemp->lb, vb->var, varval);
+                    }
+                    JL_CATCH {
+                        res = jl_bottom_type;
+                    }
                 }
             }
-            else if (btemp->lb == (jl_value_t*)vb->var)
+            else if (btemp->lb == (jl_value_t*)vb->var) {
                 btemp->lb = vb->lb;
-            else if (btemp->depth0 == vb->depth0 && !jl_has_typevar(vb->lb, btemp->var) &&
-                     !jl_has_typevar(vb->ub, btemp->var) && jl_has_typevar(btemp->ub, vb->var)) {
+            }
+            else if (btemp->depth0 == vb->depth0 && !jl_has_typevar(vb->lb, btemp->var) && !jl_has_typevar(vb->ub, btemp->var)) {
                 // if our variable is T, and some outer variable has constraint S = Ref{T},
                 // move the `where T` outside `where S` instead of putting it here. issue #21243.
-                if (btemp->innervars == NULL)
-                    btemp->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
-                if (newvar != vb->var) {
+                if (newvar != vb->var && lb_has_dep) // inner substitution has been handled
                     btemp->lb = jl_substitute_var(btemp->lb, vb->var, (jl_value_t*)newvar);
-                    btemp->ub = jl_substitute_var(btemp->ub, vb->var, (jl_value_t*)newvar);
-                }
-                jl_array_ptr_1d_push(btemp->innervars, (jl_value_t*)newvar);
-                wrap = 0;
-                btemp = btemp->prev;
-                continue;
+                wrap = btemp;
             }
-            else
+            else {
                 btemp->lb = jl_new_struct(jl_unionall_type, vb->var, btemp->lb);
+            }
             assert((jl_value_t*)btemp->var != btemp->lb);
         }
-        if (jl_has_typevar(btemp->ub, vb->var)) {
+        if (ub_has_innerdep || ub_has_dep) {
             if (vb->ub == (jl_value_t*)btemp->var) {
-                JL_GC_POP();
-                return jl_bottom_type;
+                // TODO: handle `omit_bad_union` correctly if `ub_has_innerdep`
+                btemp->ub = omit_bad_union(btemp->ub, vb->var);
+                if (btemp->ub == jl_bottom_type && btemp->ub != btemp->lb) {
+                    JL_GC_POP();
+                    return jl_bottom_type;
+                }
             }
             if (varval) {
-                JL_TRY {
-                    btemp->ub = jl_substitute_var(btemp->ub, vb->var, varval);
-                }
-                JL_CATCH {
-                    res = jl_bottom_type;
+                if (ub_has_dep) { // inner substitution has been handled
+                    JL_TRY {
+                        btemp->ub = jl_substitute_var(btemp->ub, vb->var, varval);
+                    }
+                    JL_CATCH {
+                        res = jl_bottom_type;
+                    }
                 }
             }
-            else if (btemp->ub == (jl_value_t*)vb->var)
+            else if (btemp->ub == (jl_value_t*)vb->var) {
+                // TODO: this loses some constraints, such as in this test, where we replace T4<:S3 (e.g. T4==S3 since T4 only appears covariantly once) with T4<:Any
+                // a = Tuple{Float64,T3,T4} where T4 where T3
+                // b = Tuple{S2,Tuple{S3},S3} where S2 where S3
+                // Tuple{Float64, T3, T4} where {S3, T3<:Tuple{S3}, T4<:S3}
                 btemp->ub = vb->ub;
+            }
+            else if (btemp->depth0 == vb->depth0 && !jl_has_typevar(vb->lb, btemp->var) && !jl_has_typevar(vb->ub, btemp->var)) {
+                if (newvar != vb->var && ub_has_dep) // inner substitution has been handled
+                    btemp->ub = jl_substitute_var(btemp->ub, vb->var, (jl_value_t*)newvar);
+                wrap = btemp;
+            }
             else
                 btemp->ub = jl_new_struct(jl_unionall_type, vb->var, btemp->ub);
             assert((jl_value_t*)btemp->var != btemp->ub);
         }
-        btemp = btemp->prev;
+    }
+
+    if (wrap) {
+        // We only assign the newvar with the outmost var.
+        // This make sure we never create a UnionAll with 2 identical vars.
+        if (wrap->innervars == NULL)
+            wrap->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
+        jl_array_ptr_1d_push(wrap->innervars, (jl_value_t*)newvar);
+        // TODO: should we move all the innervars here too?
     }
 
     // if `v` still occurs, re-wrap body in `UnionAll v` or eliminate the UnionAll
@@ -2525,17 +2989,32 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
             if (newvar != vb->var)
                 res = jl_substitute_var(res, vb->var, (jl_value_t*)newvar);
             varval = (jl_value_t*)newvar;
-            if (wrap)
+            if (!wrap)
                 res = jl_type_unionall((jl_tvar_t*)newvar, res);
         }
     }
 
-    if (res != jl_bottom_type && vb->innervars != NULL) {
-        int i;
-        for(i=0; i < jl_array_len(vb->innervars); i++) {
+    if (vb->innervars != NULL) {
+        for (size_t i = 0; i < jl_array_nrows(vb->innervars); i++) {
             jl_tvar_t *var = (jl_tvar_t*)jl_array_ptr_ref(vb->innervars, i);
-            if (jl_has_typevar(res, var))
-                res = jl_type_unionall((jl_tvar_t*)var, res);
+            // the `btemp->prev` walk is only giving a sort of post-order guarantee (since we are
+            // iterating 2 trees at once), so once we set `wrap`, there might remain other branches
+            // of the type walk that now still may have incomplete bounds: finish those now too
+            jl_varbinding_t *wrap = NULL;
+            for (jl_varbinding_t *btemp = e->vars; btemp != NULL; btemp = btemp->prev) {
+                if (btemp->depth0 == vb->depth0 && (jl_has_typevar(btemp->lb, var) || jl_has_typevar(btemp->ub, var))) {
+                    wrap = btemp;
+                }
+            }
+            if (wrap) {
+                if (wrap->innervars == NULL)
+                    wrap->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
+                jl_array_ptr_1d_push(wrap->innervars, (jl_value_t*)var);
+            }
+            else if (res != jl_bottom_type) {
+                if (jl_has_typevar(res, var))
+                    res = jl_type_unionall((jl_tvar_t*)var, res);
+            }
         }
     }
 
@@ -2544,7 +3023,7 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         if (!varval || (!is_leaf_bound(varval) && !vb->occurs_inv))
             e->envout[e->envidx] = (jl_value_t*)vb->var;
         else if (!(oldval && jl_is_typevar(oldval) && jl_is_long(varval)))
-            e->envout[e->envidx] = fix_inferred_var_bound(vb->var, varval);
+            e->envout[e->envidx] = varval;
     }
 
     JL_GC_POP();
@@ -2566,7 +3045,7 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
         }
         if (btemp->var == u->var || btemp->lb == (jl_value_t*)u->var ||
             btemp->ub == (jl_value_t*)u->var) {
-            u = rename_unionall(u);
+            u = jl_rename_unionall(u);
             break;
         }
         btemp = btemp->prev;
@@ -2584,7 +3063,7 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
         res = intersect(u->body, t, e, param);
     }
     vb->concrete |= (vb->occurs_cov > 1 && is_leaf_typevar(u->var) &&
-                     !var_occurs_invariant(u->body, u->var, 0));
+                     !var_occurs_invariant(u->body, u->var));
 
     // handle the "diagonal dispatch" rule, which says that a type var occurring more
     // than once, and only in covariant position, is constrained to concrete types. E.g.
@@ -2606,10 +3085,17 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
             // T=Bottom in covariant position
             res = jl_bottom_type;
         }
-        else if (jl_has_typevar(vb->lb, u->var) || jl_has_typevar(vb->ub, u->var)) {
+        else if (jl_has_typevar(vb->lb, u->var)) {
             // fail on circular constraints
             res = jl_bottom_type;
         }
+        else {
+            JL_GC_PUSH1(&res);
+            vb->ub = omit_bad_union(vb->ub, u->var);
+            JL_GC_POP();
+            if (vb->ub == jl_bottom_type && vb->ub != vb->lb)
+                res = jl_bottom_type;
+        }
     }
     if (res != jl_bottom_type)
         // res is rooted by callee
@@ -2618,31 +3104,72 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
     return res;
 }
 
+static int always_occurs_cov(jl_value_t *v, jl_tvar_t *var, int param) JL_NOTSAFEPOINT
+{
+    if (param > 1) {
+        return 0;
+    }
+    else if (v == (jl_value_t*)var) {
+        return param == 1;
+    }
+    else if (jl_is_uniontype(v)) {
+        return always_occurs_cov(((jl_uniontype_t*)v)->a, var, param) &&
+               always_occurs_cov(((jl_uniontype_t*)v)->b, var, param);
+    }
+    else if (jl_is_unionall(v)) {
+        jl_unionall_t *ua = (jl_unionall_t*)v;
+        return ua->var != var && (
+            always_occurs_cov(ua->var->ub, var, 0) ||
+            always_occurs_cov(ua->body, var, param));
+    }
+    else if (jl_is_vararg(v)) {
+        jl_vararg_t *vm = (jl_vararg_t*)v;
+        return vm->T && always_occurs_cov(vm->T, var, param);
+    }
+    else if (jl_is_datatype(v)) {
+        int nparam = jl_is_tuple_type(v) ? 1 : param;
+        for (size_t i = 0; i < jl_nparams(v); i++) {
+            if (always_occurs_cov(jl_tparam(v, i), var, nparam))
+                return 1;
+        }
+    }
+    return 0;
+}
+
 static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
-    jl_value_t *res=NULL, *save=NULL;
+    jl_value_t *res = NULL;
     jl_savedenv_t se;
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0,
-                           R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
-    JL_GC_PUSH5(&res, &vb.lb, &vb.ub, &save, &vb.innervars);
-    save_env(e, &save, &se);
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                           e->invdepth, NULL, e->vars };
+    JL_GC_PUSH4(&res, &vb.lb, &vb.ub, &vb.innervars);
+    save_env(e, &se, 1);
+    int noinv = !var_occurs_invariant(u->body, u->var);
+    if (is_leaf_typevar(u->var) && noinv && always_occurs_cov(u->body, u->var, param))
+        vb.constraintkind = 1;
     res = intersect_unionall_(t, u, e, R, param, &vb);
+    vb.intersected = 1;
     if (vb.limited) {
         // if the environment got too big, avoid tree recursion and propagate the flag
         if (e->vars)
             e->vars->limited = 1;
     }
     else if (res != jl_bottom_type) {
-        if (vb.concrete || vb.occurs_inv>1 || u->var->lb != jl_bottom_type || (vb.occurs_inv && vb.occurs_cov)) {
-            restore_env(e, NULL, &se);
-            vb.occurs_cov = vb.occurs_inv = 0;
+        int constraint1 = vb.constraintkind;
+        if (vb.concrete || vb.occurs_inv>1 || (vb.occurs_inv && vb.occurs_cov))
             vb.constraintkind = vb.concrete ? 1 : 2;
-            res = intersect_unionall_(t, u, e, R, param, &vb);
-        }
-        else if (vb.occurs_cov && !var_occurs_invariant(u->body, u->var, 0)) {
-            restore_env(e, save, &se);
-            vb.occurs_cov = vb.occurs_inv = 0;
+        else if (u->var->lb != jl_bottom_type)
+            vb.constraintkind = 2;
+        else if (vb.occurs_cov && noinv)
             vb.constraintkind = 1;
+        int reintersection = constraint1 != vb.constraintkind || vb.concrete;
+        if (reintersection) {
+            if (constraint1 == 1) {
+                vb.lb = vb.var->lb;
+                vb.ub = vb.var->ub;
+            }
+            restore_env(e, &se, vb.constraintkind == 1 ? 1 : 0);
+            vb.occurs = vb.occurs_cov = vb.occurs_inv = 0;
             res = intersect_unionall_(t, u, e, R, param, &vb);
         }
     }
@@ -2651,6 +3178,8 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
     return res;
 }
 
+static jl_value_t *intersect_invariant(jl_value_t *x, jl_value_t *y, jl_stenv_t *e);
+
 // check n = (length of vararg type v)
 static int intersect_vararg_length(jl_value_t *v, ssize_t n, jl_stenv_t *e, int8_t R)
 {
@@ -2659,16 +3188,15 @@ static int intersect_vararg_length(jl_value_t *v, ssize_t n, jl_stenv_t *e, int8
     if (N && jl_is_typevar(N)) {
         jl_value_t *len = jl_box_long(n);
         JL_GC_PUSH1(&len);
-        jl_value_t *il = R ? intersect(len, N, e, 2) : intersect(N, len, e, 2);
+        jl_value_t *il = R ? intersect_invariant(len, N, e) : intersect_invariant(N, len, e);
         JL_GC_POP();
-        if (il == jl_bottom_type)
+        if (il == NULL || il == jl_bottom_type)
             return 0;
     }
     return 1;
 }
 
-static jl_value_t *intersect_invariant(jl_value_t *x, jl_value_t *y, jl_stenv_t *e);
-static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, jl_stenv_t *e, int param)
+static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t offset, jl_stenv_t *e, int param)
 {
     // Vararg: covariant in first parameter, invariant in second
     jl_value_t *xp1=jl_unwrap_vararg(vmx), *xp2=jl_unwrap_vararg_num(vmx),
@@ -2679,26 +3207,40 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, jl_sten
     if (intersect(xp1, yp1, e, param==0 ? 1 : param) == jl_bottom_type)
         return jl_bottom_type;
     jl_value_t *i2=NULL, *ii = intersect(xp1, yp1, e, 1);
-    if (ii == jl_bottom_type) return jl_bottom_type;
-    JL_GC_PUSH2(&ii, &i2);
+    if (ii == jl_bottom_type)
+        return jl_bottom_type;
     if (!xp2 && !yp2) {
-        ii = (jl_value_t*)jl_wrap_vararg(ii, NULL);
-        JL_GC_POP();
+        if (obviously_egal(xp1, ii))
+            ii = (jl_value_t*)vmx;
+        else if (obviously_egal(yp1, ii))
+            ii = (jl_value_t*)vmy;
+        else {
+            JL_GC_PUSH1(&ii);
+            ii = (jl_value_t*)jl_wrap_vararg(ii, NULL, 1);
+            JL_GC_POP();
+        }
         return ii;
     }
-    if (xp2 && jl_is_typevar(xp2)) {
-        jl_varbinding_t *xb = lookup(e, (jl_tvar_t*)xp2);
+    JL_GC_PUSH2(&ii, &i2);
+    assert(e->Loffset == 0);
+    e->Loffset = offset;
+    jl_varbinding_t *xb = NULL, *yb = NULL;
+    int8_t max_offsetx = 0, max_offsety = 0;
+    if (xp2) {
+        assert(jl_is_typevar(xp2));
+        xb = lookup(e, (jl_tvar_t*)xp2);
         if (xb) xb->intvalued = 1;
-        if (!yp2) {
-            i2 = bound_var_below((jl_tvar_t*)xp2, xb, e);
-        }
+        if (xb) max_offsetx = xb->max_offset;
+        if (!yp2)
+            i2 = bound_var_below((jl_tvar_t*)xp2, xb, e, 0);
     }
-    if (yp2 && jl_is_typevar(yp2)) {
-        jl_varbinding_t *yb = lookup(e, (jl_tvar_t*)yp2);
+    if (yp2) {
+        assert(jl_is_typevar(yp2));
+        yb = lookup(e, (jl_tvar_t*)yp2);
         if (yb) yb->intvalued = 1;
-        if (!xp2) {
-            i2 = bound_var_below((jl_tvar_t*)yp2, yb, e);
-        }
+        if (yb) max_offsety = yb->max_offset;
+        if (!xp2)
+            i2 = bound_var_below((jl_tvar_t*)yp2, yb, e, 1);
     }
     if (xp2 && yp2) {
         // Vararg{T,N} <: Vararg{T2,N2}; equate N and N2
@@ -2709,7 +3251,29 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, jl_sten
             i2 = jl_bottom_type;
         }
     }
-    ii = i2 == jl_bottom_type ? (jl_value_t*)jl_bottom_type : (jl_value_t*)jl_wrap_vararg(ii, i2);
+    assert(e->Loffset == offset);
+    e->Loffset = 0;
+    if (i2 == jl_bottom_type) {
+        ii = (jl_value_t*)jl_bottom_type;
+    }
+    else {
+        if (xb && !xb->intersected) {
+            xb->max_offset = max_offsetx;
+            if (offset > xb->max_offset && xb->max_offset >= 0)
+                xb->max_offset = offset > 32 ? 32 : offset;
+        }
+        if (yb && !yb->intersected) {
+            yb->max_offset = max_offsety;
+            if (-offset > yb->max_offset && yb->max_offset >= 0)
+                yb->max_offset = -offset > 32 ? 32 : -offset;
+        }
+        if (xp2 && obviously_egal(xp1, ii) && obviously_egal(xp2, i2))
+            ii = (jl_value_t*)vmx;
+        else if (yp2 && obviously_egal(yp1, ii) && obviously_egal(yp2, i2))
+            ii = (jl_value_t*)vmy;
+        else
+            ii = (jl_value_t*)jl_wrap_vararg(ii, i2, 1);
+    }
     JL_GC_POP();
     return ii;
 }
@@ -2718,77 +3282,117 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, jl_sten
 static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, int param)
 {
     size_t lx = jl_nparams(xd), ly = jl_nparams(yd);
+    size_t llx = lx, lly = ly;
     if (lx == 0 && ly == 0)
         return (jl_value_t*)yd;
-    int vx=0, vy=0, vvx = (lx > 0 && jl_is_vararg(jl_tparam(xd, lx-1)));
-    int vvy = (ly > 0 && jl_is_vararg(jl_tparam(yd, ly-1)));
-    if (!vvx && !vvy && lx != ly)
-        return jl_bottom_type;
-    jl_svec_t *params = jl_alloc_svec(lx > ly ? lx : ly);
-    jl_value_t *res=NULL;
-    JL_GC_PUSH1(&params);
+    int vx=0, vy=0;
+    jl_vararg_kind_t vvx = lx > 0 ? jl_vararg_kind(jl_tparam(xd, lx-1)) : JL_VARARG_NONE;
+    jl_vararg_kind_t vvy = ly > 0 ? jl_vararg_kind(jl_tparam(yd, ly-1)) : JL_VARARG_NONE;
+    if (vvx == JL_VARARG_INT)
+        llx += jl_unbox_long(jl_unwrap_vararg_num((jl_vararg_t *)jl_tparam(xd, lx-1))) - 1;
+    if (vvy == JL_VARARG_INT)
+        lly += jl_unbox_long(jl_unwrap_vararg_num((jl_vararg_t *)jl_tparam(yd, ly-1))) - 1;
+    if (vvx == JL_VARARG_BOUND && (vvy == JL_VARARG_BOUND || vvy == JL_VARARG_UNBOUND)) {
+        jl_value_t *xlen = jl_unwrap_vararg_num((jl_vararg_t*)jl_tparam(xd, lx-1));
+        assert(xlen && jl_is_typevar(xlen));
+        jl_varbinding_t *xb = lookup(e, (jl_tvar_t*)xlen);
+        if (xb && xb->intersected && xb->max_offset > 0) {
+            assert(xb->max_offset <= 32);
+            llx += xb->max_offset;
+        }
+    }
+    if (vvy == JL_VARARG_BOUND && (vvx == JL_VARARG_BOUND || vvx == JL_VARARG_UNBOUND)) {
+        jl_value_t *ylen = jl_unwrap_vararg_num((jl_vararg_t*)jl_tparam(yd, ly-1));
+        assert(ylen && jl_is_typevar(ylen));
+        jl_varbinding_t *yb = lookup(e, (jl_tvar_t*)ylen);
+        if (yb && yb->intersected && yb->max_offset > 0) {
+            assert(yb->max_offset <= 32);
+            lly += yb->max_offset;
+        }
+    }
+
+    if ((vvx == JL_VARARG_NONE || vvx == JL_VARARG_INT) &&
+        (vvy == JL_VARARG_NONE || vvy == JL_VARARG_INT)) {
+        if (llx != lly)
+            return jl_bottom_type;
+    }
+
+    size_t np = llx > lly ? llx : lly;
+    jl_value_t *res = NULL;
+    jl_svec_t *p = NULL;
+    jl_value_t **params;
+    jl_value_t **roots;
+    JL_GC_PUSHARGS(roots, np < 64 ? np : 1);
+    if (np < 64) {
+        params = roots;
+    }
+    else {
+        p = jl_alloc_svec(np);
+        roots[0] = (jl_value_t*)p;
+        params = jl_svec_data(p);
+    }
     size_t i=0, j=0;
     jl_value_t *xi, *yi;
+    int isx = 1, isy = 1; // try to reuse the object x or y as res whenever we can (e.g. when it is the supertype) instead of allocating a copy
     while (1) {
         vx = vy = 0;
-        xi = i < lx ? jl_tparam(xd, i) : NULL;
-        yi = j < ly ? jl_tparam(yd, j) : NULL;
+        xi = i < llx ? jl_tparam(xd, i < lx ? i : lx - 1) : NULL;
+        yi = j < lly ? jl_tparam(yd, j < ly ? j : ly - 1) : NULL;
         if (xi == NULL && yi == NULL) {
-            assert(i == j && i == jl_svec_len(params));
+            assert(i == j && i == np);
             break;
         }
-        if (xi && jl_is_vararg(xi)) vx = 1;
-        if (yi && jl_is_vararg(yi)) vy = 1;
+        if (xi && jl_is_vararg(xi)) vx = vvx == JL_VARARG_UNBOUND || (vvx == JL_VARARG_BOUND && i == llx - 1);
+        if (yi && jl_is_vararg(yi)) vy = vvy == JL_VARARG_UNBOUND || (vvy == JL_VARARG_BOUND && j == lly - 1);
         if (xi == NULL || yi == NULL) {
-            res = jl_bottom_type;
-            if (vx && intersect_vararg_length(xi, ly+1-lx, e, 0))
-                res = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(params), j);
-            if (vy && intersect_vararg_length(yi, lx+1-ly, e, 1))
-                res = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(params), i);
+            if (vx && intersect_vararg_length(xi, lly+1-llx, e, 0)) {
+                np = j;
+                p = NULL;
+            }
+            else if (vy && intersect_vararg_length(yi, llx+1-lly, e, 1)) {
+                np = i;
+                p = NULL;
+            }
+            else {
+                res = jl_bottom_type;
+            }
             break;
         }
-        jl_varbinding_t *xb=NULL, *yb=NULL;
         jl_value_t *ii = NULL;
         if (vx && vy) {
-            // {A^n...,Vararg{T,N}} ∩ {Vararg{S,M}} = {(A∩S)^n...,Vararg{T∩S,N}} plus N = M-n
-            jl_value_t *xlen = jl_unwrap_vararg_num(xi);
-            if (xlen && jl_is_typevar(xlen)) {
-                xb = lookup(e, (jl_tvar_t*)xlen);
-                if (xb)
-                    xb->offset = ly-lx;
-            }
-            jl_value_t *ylen = jl_unwrap_vararg_num(yi);
-            if (ylen && jl_is_typevar(ylen)) {
-                yb = lookup(e, (jl_tvar_t*)ylen);
-                if (yb)
-                    yb->offset = lx-ly;
-            }
             ii = intersect_varargs((jl_vararg_t*)xi,
                                    (jl_vararg_t*)yi,
-                                   e, param);
-            if (xb) xb->offset = 0;
-            if (yb) yb->offset = 0;
-        } else {
-            if (vx)
-                xi = jl_unwrap_vararg(xi);
-            if (vy)
-                yi = jl_unwrap_vararg(yi);
-            ii = intersect(xi, yi, e, param == 0 ? 1 : param);
+                                   lly - llx, // xi's offset: {A^n...,Vararg{T,N}} ∩ {Vararg{S,M}}
+                                            // {(A∩S)^n...,Vararg{T∩S,N}} plus N = M-n
+                                   e,
+                                   param);
+        }
+        else {
+            ii = intersect(jl_is_vararg(xi) ? jl_unwrap_vararg(xi) : xi,
+                           jl_is_vararg(yi) ? jl_unwrap_vararg(yi) : yi,
+                           e,
+                           param == 0 ? 1 : param);
         }
         if (ii == jl_bottom_type) {
             if (vx && vy) {
+                jl_varbinding_t *xb=NULL, *yb=NULL;
+                jl_value_t *xlen = jl_unwrap_vararg_num(xi);
+                assert(xlen == NULL || jl_is_typevar(xlen));
+                if (xlen) xb = lookup(e, (jl_tvar_t*)xlen);
+                jl_value_t *ylen = jl_unwrap_vararg_num(yi);
+                assert(ylen == NULL || jl_is_typevar(ylen));
+                if (ylen) yb = lookup(e, (jl_tvar_t*)ylen);
                 int len = i > j ? i : j;
-                if ((xb && jl_is_long(xb->lb) && lx-1+jl_unbox_long(xb->lb) != len) ||
-                    (yb && jl_is_long(yb->lb) && ly-1+jl_unbox_long(yb->lb) != len)) {
-                    res = jl_bottom_type;
-                }
-                else if (param == 2 && jl_is_unionall(xi) != jl_is_unionall(yi)) {
+                if ((xb && jl_is_long(xb->lb) && llx-1+jl_unbox_long(xb->lb) != len) ||
+                    (yb && jl_is_long(yb->lb) && lly-1+jl_unbox_long(yb->lb) != len)) {
                     res = jl_bottom_type;
                 }
                 else {
-                    if (xb) set_var_to_const(xb, jl_box_long(len-lx+1), yb);
-                    if (yb) set_var_to_const(yb, jl_box_long(len-ly+1), xb);
-                    res = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(params), len);
+                    assert(e->Loffset == 0);
+                    if (xb) set_var_to_const(xb, jl_box_long(len-llx+1), e, 0);
+                    if (yb) set_var_to_const(yb, jl_box_long(len-lly+1), e, 1);
+                    np = len;
+                    p = NULL;
                 }
             }
             else {
@@ -2796,15 +3400,44 @@ static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_sten
             }
             break;
         }
-        jl_svecset(params, (i > j ? i : j), ii);
+        isx = isx && ii == xi;
+        isy = isy && ii == yi;
+        if (p)
+            jl_svecset(p, (i > j ? i : j), ii);
+        else
+            params[i > j ? i : j] = ii;
         if (vx && vy)
             break;
-        if (i < lx-1 || !vx) i++;
-        if (j < ly-1 || !vy) j++;
+        if (!vx) i++;
+        if (!vy) j++;
     }
     // TODO: handle Vararg with explicit integer length parameter
-    if (res == NULL)
-        res = (jl_value_t*)jl_apply_tuple_type(params);
+    if (res == NULL) {
+        assert(!p || np == jl_svec_len(p));
+        isx = isx && lx == np;
+        isy = isy && ly == np;
+        if (!isx && !isy) {
+            // do a more careful check now for equivalence
+            if (lx == np) {
+                isx = 1;
+                for (i = 0; i < np; i++)
+                    isx = isx && obviously_egal(params[i], jl_tparam(xd, i));
+            }
+            if (!isx && ly == np) {
+                isy = 1;
+                for (i = 0; i < np; i++)
+                    isy = isy && obviously_egal(params[i], jl_tparam(yd, i));
+            }
+        }
+        if (isx)
+            res = (jl_value_t*)xd;
+        else if (isy)
+            res = (jl_value_t*)yd;
+        else if (p)
+            res = jl_apply_tuple_type(p, 1);
+        else
+            res = jl_apply_tuple_type_v(params, np);
+    }
     JL_GC_POP();
     return res;
 }
@@ -2821,85 +3454,51 @@ static void flip_vars(jl_stenv_t *e)
 // intersection where xd nominally inherits from yd
 static jl_value_t *intersect_sub_datatype(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, int R, int param)
 {
+    // attempt to populate additional constraints into `e`
+    // if that attempt fails, then return bottom
+    // otherwise return xd (finish_unionall will later handle propagating those constraints)
+    assert(e->Loffset == 0);
     jl_value_t *isuper = R ? intersect((jl_value_t*)yd, (jl_value_t*)xd->super, e, param) :
                              intersect((jl_value_t*)xd->super, (jl_value_t*)yd, e, param);
-    if (isuper == jl_bottom_type) return jl_bottom_type;
-    if (jl_nparams(xd) == 0 || jl_nparams(xd->super) == 0 || !jl_has_free_typevars((jl_value_t*)xd))
-        return (jl_value_t*)xd;
-    jl_value_t *super_pattern=NULL;
-    JL_GC_PUSH2(&isuper, &super_pattern);
-    jl_value_t *wrapper = xd->name->wrapper;
-    super_pattern = jl_rewrap_unionall((jl_value_t*)((jl_datatype_t*)jl_unwrap_unionall(wrapper))->super,
-                                       wrapper);
-    int envsz = jl_subtype_env_size(super_pattern);
-    jl_value_t *ii = jl_bottom_type;
-    {
-        jl_value_t **env;
-        JL_GC_PUSHARGS(env, envsz);
-        jl_stenv_t tempe;
-        init_stenv(&tempe, env, envsz);
-        tempe.ignore_free = 1;
-        if (subtype_in_env(isuper, super_pattern, &tempe)) {
-            jl_value_t *wr = wrapper;
-            int i;
-            for(i=0; i<envsz; i++) {
-                // if a parameter is not constrained by the supertype, use the original
-                // parameter value from `x`. this is detected by the value in `env` being
-                // the exact typevar from the type's `wrapper`, or a free typevar.
-                jl_value_t *ei = env[i];
-                if (ei == (jl_value_t*)((jl_unionall_t*)wr)->var ||
-                    (jl_is_typevar(ei) && lookup(e, (jl_tvar_t*)ei) == NULL))
-                    env[i] = jl_tparam(xd,i);
-                wr = ((jl_unionall_t*)wr)->body;
-            }
-            JL_TRY {
-                ii = jl_apply_type(wrapper, env, envsz);
-            }
-            JL_CATCH {
-                ii = jl_bottom_type;
-            }
-        }
-        JL_GC_POP();
-    }
-    JL_GC_POP();
-    return ii;
+    if (isuper == jl_bottom_type)
+        return jl_bottom_type;
+    return (jl_value_t*)xd;
 }
 
 static jl_value_t *intersect_invariant(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
-    if (!jl_has_free_typevars(x) && !jl_has_free_typevars(y)) {
+    if (e->Loffset == 0 && !jl_has_free_typevars(x) && !jl_has_free_typevars(y)) {
         return (jl_subtype(x,y) && jl_subtype(y,x)) ? y : NULL;
     }
     e->invdepth++;
-    e->Rinvdepth++;
     jl_value_t *ii = intersect(x, y, e, 2);
     e->invdepth--;
-    e->Rinvdepth--;
-    if (jl_is_typevar(x) && jl_is_typevar(y) && (jl_is_typevar(ii) || !jl_is_type(ii)))
-        return ii;
+    if (jl_is_typevar(x) && jl_is_typevar(y) && jl_is_typevar(ii))
+        return ii; // skip the following check due to possible circular constraints.
     if (ii == jl_bottom_type) {
         if (!subtype_in_env(x, jl_bottom_type, e))
             return NULL;
-        flip_vars(e);
+        flip_vars(e); flip_offset(e);
         if (!subtype_in_env(y, jl_bottom_type, e)) {
-            flip_vars(e);
+            flip_vars(e); flip_offset(e);
             return NULL;
         }
-        flip_vars(e);
+        flip_vars(e); flip_offset(e);
         return jl_bottom_type;
     }
-    jl_value_t *root=NULL;
     jl_savedenv_t se;
-    JL_GC_PUSH2(&ii, &root);
-    save_env(e, &root, &se);
-    if (!subtype_in_env_existential(x, y, e, 0, e->invdepth)) {
+    JL_GC_PUSH1(&ii);
+    save_env(e, &se, 1);
+    if (!subtype_in_env_existential(x, y, e))
         ii = NULL;
-    }
     else {
-        if (!subtype_in_env_existential(y, x, e, 0, e->invdepth))
+        restore_env(e, &se, 1);
+        flip_offset(e);
+        if (!subtype_in_env_existential(y, x, e))
             ii = NULL;
+        flip_offset(e);
     }
-    restore_env(e, root, &se);
+    restore_env(e, &se, 1);
     free_env(&se);
     JL_GC_POP();
     return ii;
@@ -2908,6 +3507,7 @@ static jl_value_t *intersect_invariant(jl_value_t *x, jl_value_t *y, jl_stenv_t
 // intersection where x == Type{...} and y is not
 static jl_value_t *intersect_type_type(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int8_t R)
 {
+    assert(e->Loffset == 0);
     jl_value_t *p0 = jl_tparam0(x);
     if (!jl_is_typevar(p0))
         return (jl_typeof(p0) == y) ? x : jl_bottom_type;
@@ -2956,6 +3556,23 @@ static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOT
     return compareto_var(x, (jl_tvar_t*)y, e, -1) || compareto_var(y, (jl_tvar_t*)x, e, 1);
 }
 
+static int has_typevar_via_env(jl_value_t *x, jl_tvar_t *t, jl_stenv_t *e)
+{
+    if (e->Loffset == 0) {
+        jl_varbinding_t *temp = e->vars;
+        while (temp != NULL) {
+            if (temp->var == t)
+                break;
+            if (temp->lb == temp->ub &&
+                temp->lb == (jl_value_t *)t &&
+                jl_has_typevar(x, temp->var))
+                return 1;
+            temp = temp->prev;
+        }
+    }
+    return jl_has_typevar(x, t);
+}
+
 // `param` means we are currently looking at a parameter of a type constructor
 // (as opposed to being outside any type constructor, or comparing variable bounds).
 // this is used to record the positions where type variables occur for the
@@ -2982,72 +3599,139 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                 jl_value_t *xub = xx ? xx->ub : ((jl_tvar_t*)x)->ub;
                 jl_value_t *ylb = yy ? yy->lb : ((jl_tvar_t*)y)->lb;
                 jl_value_t *yub = yy ? yy->ub : ((jl_tvar_t*)y)->ub;
-                record_var_occurrence(xx, e, param);
                 if (xx && yy && xx->depth0 != yy->depth0) {
+                    record_var_occurrence(xx, e, param);
                     record_var_occurrence(yy, e, param);
                     return subtype_in_env(yy->ub, yy->lb, e) ? y : jl_bottom_type;
                 }
                 if (xub == xlb && jl_is_typevar(xub)) {
+                    record_var_occurrence(xx, e, param);
                     if (y == xub) {
                         record_var_occurrence(yy, e, param);
                         return y;
                     }
-                    return intersect(y, xub, e, param);
+                    if (R) flip_offset(e);
+                    jl_value_t *res = intersect(xub, y, e, param);
+                    if (R) flip_offset(e);
+                    return res;
                 }
+                if (yub == ylb && jl_is_typevar(yub)) {
+                    record_var_occurrence(yy, e, param);
+                    if (R) flip_offset(e);
+                    jl_value_t *res = intersect(x, yub, e, param);
+                    if (R) flip_offset(e);
+                    return res;
+                }
+                if (xub == xlb && jl_is_typevar(xub)) {
+                    record_var_occurrence(xx, e, param);
+                    if (y == xub) {
+                        record_var_occurrence(yy, e, param);
+                        return y;
+                    }
+                    if (R) flip_offset(e);
+                    jl_value_t *res = intersect(xub, y, e, param);
+                    if (R) flip_offset(e);
+                    return res;
+                }
+                if (yub == ylb && jl_is_typevar(yub)) {
+                    record_var_occurrence(yy, e, param);
+                    if (R) flip_offset(e);
+                    jl_value_t *res = intersect(x, yub, e, param);
+                    if (R) flip_offset(e);
+                    return res;
+                }
+                record_var_occurrence(xx, e, param);
                 record_var_occurrence(yy, e, param);
+                int xoffset = R ? -e->Loffset : e->Loffset;
                 if (!jl_is_type(ylb) && !jl_is_typevar(ylb)) {
                     if (xx)
-                        return set_var_to_const(xx, ylb, yy);
-                    if ((xlb == jl_bottom_type && xub == (jl_value_t*)jl_any_type) || jl_egal(xlb, ylb))
-                        return ylb;
+                        return set_var_to_const(xx, ylb, e, R);
+                    if ((xlb == jl_bottom_type && xub == (jl_value_t*)jl_any_type) || jl_egal(xlb, ylb)) {
+                        if (xoffset == 0)
+                            return ylb;
+                        else if (jl_is_long(ylb)) {
+                            if (xoffset > 0)
+                                return ylb;
+                            else
+                                return jl_box_long(jl_unbox_long(ylb) + xoffset);
+                        }
+                    }
                     return jl_bottom_type;
                 }
                 if (!jl_is_type(xlb) && !jl_is_typevar(xlb)) {
                     if (yy)
-                        return set_var_to_const(yy, xlb, xx);
-                    if (ylb == jl_bottom_type && yub == (jl_value_t*)jl_any_type)
-                        return xlb;
+                        return set_var_to_const(yy, xlb, e, !R);
+                    if (ylb == jl_bottom_type && yub == (jl_value_t*)jl_any_type) {
+                        if (xoffset == 0)
+                            return xlb;
+                        else if (jl_is_long(xlb)) {
+                            if (xoffset < 0)
+                                return xlb;
+                            else
+                                return jl_box_long(jl_unbox_long(ylb) - xoffset);
+                        }
+                    }
                     return jl_bottom_type;
                 }
                 int ccheck;
-                if (yub == xub ||
+                if (R) flip_offset(e);
+                if (xlb == xub && ylb == yub &&
+                    jl_has_typevar(xlb, (jl_tvar_t *)y) &&
+                    jl_has_typevar(ylb, (jl_tvar_t *)x)) {
+                    // special case for e.g.
+                    // 1) Val{Y}<:X<:Val{Y} && Val{X}<:Y<:Val{X}
+                    // 2) Y<:X<:Y && Val{X}<:Y<:Val{X} => Val{Y}<:Y<:Val{Y}
+                    ccheck = 0;
+                }
+                else if (yub == xub ||
                     (subtype_by_bounds(xlb, yub, e) && subtype_by_bounds(ylb, xub, e))) {
                     ccheck = 1;
                 }
                 else {
                     if (R) flip_vars(e);
-                    ccheck = subtype_in_env(xlb, yub, e) && subtype_in_env(ylb, xub, e);
+                    ccheck = subtype_in_env(xlb, yub, e);
+                    if (ccheck) {
+                        flip_offset(e);
+                        ccheck = subtype_in_env(ylb, xub, e);
+                        flip_offset(e);
+                    }
                     if (R) flip_vars(e);
                 }
+                if (R) flip_offset(e);
                 if (!ccheck)
                     return jl_bottom_type;
-                if (var_occurs_inside(xub, (jl_tvar_t*)y, 0, 0) && var_occurs_inside(yub, (jl_tvar_t*)x, 0, 0)) {
+                if ((has_typevar_via_env(xub, (jl_tvar_t*)y, e) || has_typevar_via_env(xub, (jl_tvar_t*)x, e)) &&
+                    (has_typevar_via_env(yub, (jl_tvar_t*)x, e) || has_typevar_via_env(yub, (jl_tvar_t*)y, e))) {
+                    // TODO: This doesn't make much sense.
                     // circular constraint. the result will be Bottom, but in the meantime
                     // we need to avoid computing intersect(xub, yub) since it won't terminate.
                     return y;
                 }
                 jl_value_t *ub=NULL, *lb=NULL;
                 JL_GC_PUSH2(&lb, &ub);
-                ub = intersect_aside(xub, yub, e, 0, xx ? xx->depth0 : 0);
+                int d = xx ? xx->depth0 : yy ? yy->depth0 : 0;
+                ub = R ? intersect_aside(yub, xub, e, d) : intersect_aside(xub, yub, e, d);
                 if (reachable_var(xlb, (jl_tvar_t*)y, e))
                     lb = ylb;
                 else
                     lb = simple_join(xlb, ylb);
-                if (yy) {
+                if (yy && xoffset == 0) {
                     yy->lb = lb;
                     if (!reachable_var(ub, (jl_tvar_t*)y, e))
                         yy->ub = ub;
                     assert(yy->ub != y);
                     assert(yy->lb != y);
                 }
-                if (xx && !reachable_var(y, (jl_tvar_t*)x, e)) {
+                if (xx && xoffset == 0 && !reachable_var(y, (jl_tvar_t*)x, e)) {
                     xx->lb = y;
                     xx->ub = y;
                     assert(xx->ub != x);
                 }
                 JL_GC_POP();
-                return y;
+                // Here we always return the shorter `Vararg`'s length.
+                return xoffset < 0 ? x : y;
             }
+            assert(e->Loffset == 0);
             record_var_occurrence(xx, e, param);
             record_var_occurrence(yy, e, param);
             if (xx && yy && xx->concrete && !yy->concrete) {
@@ -3062,7 +3746,7 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
         record_var_occurrence(lookup(e, (jl_tvar_t*)y), e, param);
         return intersect_var((jl_tvar_t*)y, x, e, 1, param);
     }
-    if (!jl_has_free_typevars(x) && !jl_has_free_typevars(y)) {
+    if (e->Loffset == 0 && !jl_has_free_typevars(x) && !jl_has_free_typevars(y)) {
         if (jl_subtype(x, y)) return x;
         if (jl_subtype(y, x)) return y;
     }
@@ -3083,9 +3767,9 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
     if (jl_is_unionall(x)) {
         if (jl_is_unionall(y)) {
             jl_value_t *a=NULL, *b=jl_bottom_type, *res=NULL;
-            JL_GC_PUSH2(&a,&b);
+            JL_GC_PUSH2(&a, &b);
             jl_savedenv_t se;
-            save_env(e, NULL, &se);
+            save_env(e, &se, 0);
             a = intersect_unionall(y, (jl_unionall_t*)x, e, 0, param);
             if (jl_is_unionall(a)) {
                 jl_unionall_t *ua = (jl_unionall_t*)a;
@@ -3093,7 +3777,7 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                     jl_unionall_t *ub = (jl_unionall_t*)ua->body;
                     if (jl_has_typevar(ub->var->ub, ua->var) ||
                         jl_has_typevar(ub->var->lb, ua->var)) {
-                        restore_env(e, NULL, &se); // restore counts
+                        restore_env(e, &se, 0); // restore counts
                         b = intersect_unionall(x, (jl_unionall_t*)y, e, 1, param);
                     }
                 }
@@ -3130,20 +3814,40 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
             size_t i, np = jl_nparams(xd);
             jl_value_t **newparams;
             JL_GC_PUSHARGS(newparams, np);
-            for (i=0; i < np; i++) {
+            int isx = 1, isy = 1; // try to reuse the object x or y as res whenever we can (e.g. when it is the supertype) instead of allocating a copy
+            for (i = 0; i < np; i++) {
                 jl_value_t *xi = jl_tparam(xd, i), *yi = jl_tparam(yd, i);
                 jl_value_t *ii = intersect_invariant(xi, yi, e);
                 if (ii == NULL)
                     break;
+                isx = isx && ii == xi;
+                isy = isy && ii == yi;
                 newparams[i] = ii;
             }
             jl_value_t *res = jl_bottom_type;
-            if (i >= np) {
-                JL_TRY {
-                    res = jl_apply_type(xd->name->wrapper, newparams, np);
+            if (i == np) {
+                if (!isx && !isy) {
+                    // do a more careful check now for equivalence
+                    isx = 1;
+                    for (i = 0; i < np; i++)
+                        isx = isx && obviously_egal(newparams[i], jl_tparam(xd, i));
+                    if (!isx) {
+                        isy = 1;
+                        for (i = 0; i < np; i++)
+                            isy = isy && obviously_egal(newparams[i], jl_tparam(yd, i));
+                    }
                 }
-                JL_CATCH {
-                    res = jl_bottom_type;
+                if (isx)
+                    res = x;
+                else if (isy)
+                    res = y;
+                else {
+                    JL_TRY {
+                        res = jl_apply_type(xd->name->wrapper, newparams, np);
+                    }
+                    JL_CATCH {
+                        res = jl_bottom_type;
+                    }
                 }
             }
             JL_GC_POP();
@@ -3166,64 +3870,191 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
     return jl_bottom_type;
 }
 
+static int merge_env(jl_stenv_t *e, jl_savedenv_t *se, int count)
+{
+    if (count == 0)
+        alloc_env(e, se, 1);
+    jl_value_t **roots = NULL;
+    int nroots = 0;
+    if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
+        jl_svec_t *sv = (jl_svec_t*)se->roots[0];
+        assert(jl_is_svec(sv));
+        roots = jl_svec_data(sv);
+        nroots = jl_svec_len(sv);
+    }
+    else {
+        roots = se->roots;
+        nroots = se->gcframe.nroots >> 2;
+    }
+    int m = 0, n = 0;
+    jl_varbinding_t *v = e->vars;
+    while (v != NULL) {
+        if (count == 0) {
+            // need to initialize this
+            se->buf[m] = 0;
+            se->buf[m+1] = 0;
+            se->buf[m+2] = 0;
+            se->buf[m+3] = v->max_offset;
+        }
+        if (v->occurs) {
+            // only merge lb/ub/innervars if this var occurs.
+            jl_value_t *b1, *b2;
+            b1 = roots[n];
+            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+            b2 = v->lb;
+            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+            roots[n] = b1 ? simple_meet(b1, b2, 0) : b2;
+            b1 = roots[n+1];
+            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+            b2 = v->ub;
+            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+            roots[n+1] = b1 ? simple_join(b1, b2) : b2;
+            b1 = roots[n+2];
+            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+            b2 = (jl_value_t*)v->innervars;
+            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+            if (b2 && b1 != b2) {
+                if (b1)
+                    jl_array_ptr_1d_append((jl_array_t*)b1, (jl_array_t*)b2);
+                else
+                    roots[n+2] = b2;
+            }
+            // record the meeted vars.
+            se->buf[m] = 1;
+        }
+        // always merge occurs_inv/cov by max (never decrease)
+        if (v->occurs_inv > se->buf[m+1])
+            se->buf[m+1] = v->occurs_inv;
+        if (v->occurs_cov > se->buf[m+2])
+            se->buf[m+2] = v->occurs_cov;
+        // always merge max_offset by min
+        if (!v->intersected && v->max_offset < se->buf[m+3])
+            se->buf[m+3] = v->max_offset;
+        m = m + 4;
+        n = n + 3;
+        v = v->prev;
+    }
+    assert(n == nroots); (void)nroots;
+    return count + 1;
+}
+
+// merge untouched vars' info.
+static void final_merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se)
+{
+    jl_value_t **merged = NULL;
+    jl_value_t **saved = NULL;
+    int nroots = 0;
+    assert(se->gcframe.nroots == me->gcframe.nroots);
+    if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
+        jl_svec_t *sv = (jl_svec_t*)se->roots[0];
+        assert(jl_is_svec(sv));
+        saved = jl_svec_data(sv);
+        nroots = jl_svec_len(sv);
+        sv = (jl_svec_t*)me->roots[0];
+        assert(jl_is_svec(sv));
+        merged = jl_svec_data(sv);
+        assert(nroots == jl_svec_len(sv));
+    }
+    else {
+        saved = se->roots;
+        merged = me->roots;
+        nroots = se->gcframe.nroots >> 2;
+    }
+    assert(nroots == current_env_length(e) * 3);
+    assert(nroots % 3 == 0);
+    for (int n = 0, m = 0; n < nroots; n += 3, m += 4) {
+        if (merged[n] == NULL)
+            merged[n] = saved[n];
+        if (merged[n+1] == NULL)
+            merged[n+1] = saved[n+1];
+        jl_value_t *b1, *b2;
+        b1 = merged[n+2];
+        JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+        b2 = saved[n+2];
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know this came from our GC frame
+        if (b2 && b1 != b2) {
+            if (b1)
+                jl_array_ptr_1d_append((jl_array_t*)b1, (jl_array_t*)b2);
+            else
+                merged[n+2] = b2;
+        }
+        me->buf[m] |= se->buf[m];
+    }
+}
+
+static void expand_local_env(jl_stenv_t *e, jl_value_t *res)
+{
+    jl_varbinding_t *v = e->vars;
+    // Here we pull in some typevar missed in fastpath.
+    while (v != NULL) {
+        v->occurs = v->occurs || jl_has_typevar(res, v->var);
+        assert(v->occurs == 0 || v->occurs == 1);
+        v = v->prev;
+    }
+    v = e->vars;
+    while (v != NULL) {
+        if (v->occurs == 1) {
+            jl_varbinding_t *v2 = e->vars;
+            while (v2 != NULL) {
+                if (v2 != v && v2->occurs == 0)
+                    v2->occurs = -(jl_has_typevar(v->lb, v2->var) || jl_has_typevar(v->ub, v2->var));
+                v2 = v2->prev;
+            }
+        }
+        v = v->prev;
+    }
+}
+
 static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
     e->Runions.depth = 0;
     e->Runions.more = 0;
     e->Runions.used = 0;
     jl_value_t **is;
-    JL_GC_PUSHARGS(is, 3);
-    jl_value_t **saved = &is[2];
-    jl_savedenv_t se;
-    save_env(e, saved, &se);
-    int lastset = 0, niter = 0, total_iter = 0;
-    jl_value_t *ii = intersect(x, y, e, 0);
-    is[0] = ii;  // root
-    if (ii == jl_bottom_type) {
-        restore_env(e, *saved, &se);
-    }
-    else {
-        free_env(&se);
-        save_env(e, saved, &se);
-    }
-    while (e->Runions.more) {
-        if (e->emptiness_only && ii != jl_bottom_type)
+    JL_GC_PUSHARGS(is, 2);
+    jl_savedenv_t se, me;
+    save_env(e, &se, 1);
+    int niter = 0, total_iter = 0;
+    clean_occurs(e);
+    is[0] = intersect(x, y, e, 0); // root
+    if (is[0] != jl_bottom_type) {
+        expand_local_env(e, is[0]);
+        niter = merge_env(e, &me, niter);
+    }
+    restore_env(e, &se, 1);
+    while (next_union_state(e, 1)) {
+        if (e->emptiness_only && is[0] != jl_bottom_type)
             break;
         e->Runions.depth = 0;
-        int set = e->Runions.more - 1;
         e->Runions.more = 0;
-        statestack_set(&e->Runions, set, 1);
-        for (int i = set + 1; i <= lastset; i++)
-            statestack_set(&e->Runions, i, 0);
-        lastset = set;
 
-        is[0] = ii;
+        clean_occurs(e);
         is[1] = intersect(x, y, e, 0);
-        if (is[1] == jl_bottom_type) {
-            restore_env(e, *saved, &se);
-        }
-        else {
-            free_env(&se);
-            save_env(e, saved, &se);
+        if (is[1] != jl_bottom_type) {
+            expand_local_env(e, is[1]);
+            niter = merge_env(e, &me, niter);
         }
+        restore_env(e, &se, 1);
         if (is[0] == jl_bottom_type)
-            ii = is[1];
-        else if (is[1] == jl_bottom_type)
-            ii = is[0];
-        else {
+            is[0] = is[1];
+        else if (is[1] != jl_bottom_type) {
             // TODO: the repeated subtype checks in here can get expensive
-            ii = jl_type_union(is, 2);
-            niter++;
+            is[0] = jl_type_union(is, 2);
         }
         total_iter++;
-        if (niter > 3 || total_iter > 400000) {
-            ii = y;
+        if (niter > 4 || total_iter > 400000) {
+            is[0] = y;
             break;
         }
     }
+    if (niter) {
+        final_merge_env(e, &me, &se);
+        restore_env(e, &me, 1);
+        free_env(&me);
+    }
     free_env(&se);
     JL_GC_POP();
-    return ii;
+    return is[0];
 }
 
 // type intersection entry points
@@ -3344,13 +4175,14 @@ static jl_value_t *switch_union_tuple(jl_value_t *a, jl_value_t *b)
         ts[1] = jl_tparam(b, i);
         jl_svecset(vec, i, jl_type_union(ts, 2));
     }
-    jl_value_t *ans = (jl_value_t*)jl_apply_tuple_type(vec);
+    jl_value_t *ans = jl_apply_tuple_type(vec, 1);
     JL_GC_POP();
     return ans;
 }
 
 // `a` might have a non-empty intersection with some concrete type b even if !(a<:b) and !(b<:a)
 // For example a=`Tuple{Type{<:Vector}}` and b=`Tuple{DataType}`
+// TODO: this query is partly available memoized as jl_type_equality_is_identity
 static int might_intersect_concrete(jl_value_t *a)
 {
     if (jl_is_unionall(a))
@@ -3400,9 +4232,9 @@ jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t *
         *ans = a; sz = szb;
         if (issubty) *issubty = 1;
     }
-    else if (lta && ltb) {
-        goto bot;
-    }
+    // else if (lta && ltb) { // !jl_type_equality_is_identity known in this case because obviously_disjoint returned false
+    //     goto bot;
+    // }
     else if (jl_subtype(b, a)) {
         *ans = b;
     }
@@ -3432,7 +4264,7 @@ jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t *
             if (jl_is_uniontype(ans_unwrapped)) {
                 ans_unwrapped = switch_union_tuple(((jl_uniontype_t*)ans_unwrapped)->a, ((jl_uniontype_t*)ans_unwrapped)->b);
                 if (ans_unwrapped != NULL) {
-                    *ans = jl_rewrap_unionall(ans_unwrapped, *ans);
+                    *ans = jl_rewrap_unionall_(ans_unwrapped, *ans);
                 }
             }
             JL_GC_POP();
@@ -3453,17 +4285,20 @@ jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t *
         }
     }
     if (sz == 0 && szb > 0) {
-        while (jl_is_unionall(b)) {
-            env[i++] = (jl_value_t*)((jl_unionall_t*)b)->var;
-            b = ((jl_unionall_t*)b)->body;
+        jl_unionall_t *ub = (jl_unionall_t*)b;
+        while (jl_is_unionall(ub)) {
+            env[i++] = (jl_value_t*)ub->var;
+            ub = (jl_unionall_t*)ub->body;
         }
         sz = szb;
     }
     if (penv) {
         jl_svec_t *e = jl_alloc_svec(sz);
-        *penv = e;
-        for(i=0; i < sz; i++)
+        for (i = 0; i < sz; i++) {
+            assert(env[i]);
             jl_svecset(e, i, env[i]);
+        }
+        *penv = e;
     }
  bot:
     JL_GC_POP();
@@ -3504,50 +4339,358 @@ int jl_subtype_matching(jl_value_t *a, jl_value_t *b, jl_svec_t **penv)
         // copy env to svec for return
         int i = 0;
         jl_svec_t *e = jl_alloc_svec(szb);
-        *penv = e;
-        for (i = 0; i < szb; i++)
+        for (i = 0; i < szb; i++) {
+            assert(env[i]);
             jl_svecset(e, i, env[i]);
+        }
+        *penv = e;
     }
     JL_GC_POP();
     return sub;
 }
 
+// type utils
+static void check_diagonal(jl_value_t *t, jl_varbinding_t *troot, int param)
+{
+    if (jl_is_uniontype(t)) {
+        int i, len = 0;
+        jl_varbinding_t *v;
+        for (v = troot; v != NULL; v = v->prev)
+            len++;
+        int8_t *occurs = (int8_t *)alloca(len);
+        for (v = troot, i = 0; v != NULL; v = v->prev, i++)
+            occurs[i] = v->occurs_inv | (v->occurs_cov << 2);
+        check_diagonal(((jl_uniontype_t *)t)->a, troot, param);
+        for (v = troot, i = 0; v != NULL; v = v->prev, i++) {
+            int8_t occurs_inv = occurs[i] & 3;
+            int8_t occurs_cov = occurs[i] >> 2;
+            occurs[i] = v->occurs_inv | (v->occurs_cov << 2);
+            v->occurs_inv = occurs_inv;
+            v->occurs_cov = occurs_cov;
+        }
+        check_diagonal(((jl_uniontype_t *)t)->b, troot, param);
+        for (v = troot, i = 0; v != NULL; v = v->prev, i++) {
+            if (v->occurs_inv < (occurs[i] & 3))
+                v->occurs_inv = occurs[i] & 3;
+            if (v->occurs_cov < (occurs[i] >> 2))
+                v->occurs_cov = occurs[i] >> 2;
+        }
+    }
+    else if (jl_is_unionall(t)) {
+        assert(troot != NULL);
+        jl_varbinding_t *v1 = troot, *v2 = troot->prev;
+        while (v2 != NULL) {
+            if (v2->var == ((jl_unionall_t *)t)->var) {
+                v1->prev = v2->prev;
+                break;
+            }
+            v1 = v2;
+            v2 = v2->prev;
+        }
+        check_diagonal(((jl_unionall_t *)t)->body, troot, param);
+        v1->prev = v2;
+    }
+    else if (jl_is_datatype(t)) {
+        int nparam = jl_is_tuple_type(t) ? 1 : 2;
+        if (nparam < param) nparam = param;
+        for (size_t i = 0; i < jl_nparams(t); i++) {
+            check_diagonal(jl_tparam(t, i), troot, nparam);
+        }
+    }
+    else if (jl_is_vararg(t)) {
+        jl_value_t *T = jl_unwrap_vararg(t);
+        jl_value_t *N = jl_unwrap_vararg_num(t);
+        int n = (N && jl_is_long(N)) ? jl_unbox_long(N) : 2;
+        if (T && n > 0) check_diagonal(T, troot, param);
+        if (T && n > 1) check_diagonal(T, troot, param);
+        if (N)          check_diagonal(N, troot, 2);
+    }
+    else if (jl_is_typevar(t)) {
+        jl_varbinding_t *v = troot;
+        for (; v != NULL; v = v->prev) {
+            if (v->var == (jl_tvar_t *)t) {
+                if (param == 1 && v->occurs_cov < 2) v->occurs_cov++;
+                if (param == 2 && v->occurs_inv < 2) v->occurs_inv++;
+                break;
+            }
+        }
+        if (v == NULL)
+            check_diagonal(((jl_tvar_t *)t)->ub, troot, 0);
+    }
+}
+
+static jl_value_t *insert_nondiagonal(jl_value_t *type, jl_varbinding_t *troot, int widen2ub)
+{
+    if (jl_is_typevar(type)) {
+        int concretekind = widen2ub > 1 ? 0 : 1;
+        jl_varbinding_t *v = troot;
+        for (; v != NULL; v = v->prev) {
+            if (v->occurs_inv == 0 &&
+                v->occurs_cov > concretekind &&
+                v->var == (jl_tvar_t *)type)
+                break;
+        }
+        if (v != NULL) {
+            if (widen2ub) {
+                type = insert_nondiagonal(((jl_tvar_t *)type)->ub, troot, 2);
+            }
+            else {
+                // we must replace each covariant occurrence of newvar with a different newvar2<:newvar (diagonal rule)
+                if (v->innervars == NULL)
+                    v->innervars = jl_alloc_array_1d(jl_array_any_type, 0);
+                jl_value_t *newvar = NULL, *lb = v->var->lb, *ub = (jl_value_t *)v->var;
+                jl_array_t *innervars = v->innervars;
+                JL_GC_PUSH4(&newvar, &lb, &ub, &innervars);
+                newvar = (jl_value_t *)jl_new_typevar(v->var->name, lb, ub);
+                jl_array_ptr_1d_push(innervars, newvar);
+                JL_GC_POP();
+                type = newvar;
+            }
+        }
+    }
+    else if (jl_is_unionall(type)) {
+        jl_value_t *body = ((jl_unionall_t*)type)->body;
+        jl_tvar_t *var = ((jl_unionall_t*)type)->var;
+        jl_varbinding_t *v = troot;
+        for (; v != NULL; v = v->prev) {
+            if (v->var == var)
+                break;
+        }
+        if (v) v->var = NULL; // Temporarily remove `type->var` from binding list.
+        jl_value_t *newbody = insert_nondiagonal(body, troot, widen2ub);
+        if (v) v->var = var; // And restore it after inner insertation.
+        jl_value_t *newvar = NULL;
+        JL_GC_PUSH2(&newbody, &newvar);
+        if (body == newbody || jl_has_typevar(newbody, var)) {
+            if (body != newbody)
+                newbody = jl_new_struct(jl_unionall_type, var, newbody);
+            // n.b. we do not widen lb, since that would be the wrong direction
+            newvar = insert_nondiagonal(var->ub, troot, widen2ub);
+            if (newvar != var->ub) {
+                newvar = (jl_value_t*)jl_new_typevar(var->name, var->lb, newvar);
+                newbody = jl_apply_type1(newbody, newvar);
+                newbody = jl_type_unionall((jl_tvar_t*)newvar, newbody);
+            }
+        }
+        type = newbody;
+        JL_GC_POP();
+    }
+    else if (jl_is_uniontype(type)) {
+        jl_value_t *a = ((jl_uniontype_t*)type)->a;
+        jl_value_t *b = ((jl_uniontype_t*)type)->b;
+        jl_value_t *newa = NULL;
+        jl_value_t *newb = NULL;
+        JL_GC_PUSH2(&newa, &newb);
+        newa = insert_nondiagonal(a, troot, widen2ub);
+        newb = insert_nondiagonal(b, troot, widen2ub);
+        if (newa != a || newb != b)
+            type = simple_union(newa, newb);
+        JL_GC_POP();
+    }
+    else if (jl_is_vararg(type)) {
+        // As for Vararg we'd better widen it's var to ub as otherwise they are still diagonal
+        jl_value_t *t = jl_unwrap_vararg(type);
+        jl_value_t *n = jl_unwrap_vararg_num(type);
+        if (widen2ub == 0)
+            widen2ub = !(n && jl_is_long(n)) || jl_unbox_long(n) > 1;
+        jl_value_t *newt;
+        JL_GC_PUSH2(&newt, &n);
+        newt = insert_nondiagonal(t, troot, widen2ub);
+        if (t != newt)
+            type = (jl_value_t *)jl_wrap_vararg(newt, n, 0);
+        JL_GC_POP();
+    }
+    else if (jl_is_datatype(type)) {
+        if (jl_is_tuple_type(type)) {
+            jl_svec_t *newparams = NULL;
+            jl_value_t *newelt = NULL;
+            JL_GC_PUSH2(&newparams, &newelt);
+            for (size_t i = 0; i < jl_nparams(type); i++) {
+                jl_value_t *elt = jl_tparam(type, i);
+                newelt = insert_nondiagonal(elt, troot, widen2ub);
+                if (elt != newelt) {
+                    if (!newparams)
+                        newparams = jl_svec_copy(((jl_datatype_t*)type)->parameters);
+                    jl_svecset(newparams, i, newelt);
+                }
+            }
+            if (newparams)
+                type = (jl_value_t*)jl_apply_tuple_type(newparams, 1);
+            JL_GC_POP();
+        }
+    }
+    return type;
+}
+
+static jl_value_t *_widen_diagonal(jl_value_t *t, jl_varbinding_t *troot) {
+    check_diagonal(t, troot, 0);
+    int any_concrete = 0;
+    for (jl_varbinding_t *v = troot; v != NULL; v = v->prev)
+        any_concrete |= v->occurs_cov > 1 && v->occurs_inv == 0;
+    if (!any_concrete)
+        return t; // no diagonal
+    return insert_nondiagonal(t, troot, 0);
+}
+
+static jl_value_t *widen_diagonal(jl_value_t *t, jl_unionall_t *u, jl_varbinding_t *troot)
+{
+    jl_varbinding_t vb = { u->var, NULL, NULL, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, troot };
+    jl_value_t *nt;
+    JL_GC_PUSH2(&vb.innervars, &nt);
+    if (jl_is_unionall(u->body))
+        nt = widen_diagonal(t, (jl_unionall_t *)u->body, &vb);
+    else
+        nt = _widen_diagonal(t, &vb);
+    if (vb.innervars != NULL) {
+        for (size_t i = 0; i < jl_array_nrows(vb.innervars); i++) {
+            jl_tvar_t *var = (jl_tvar_t*)jl_array_ptr_ref(vb.innervars, i);
+            nt = jl_type_unionall(var, nt);
+        }
+    }
+    JL_GC_POP();
+    return nt;
+}
+
+JL_DLLEXPORT jl_value_t *jl_widen_diagonal(jl_value_t *t, jl_unionall_t *ua)
+{
+    return widen_diagonal(t, ua, NULL);
+}
 
 // specificity comparison
 
-static int eq_msp(jl_value_t *a, jl_value_t *b, jl_typeenv_t *env)
+static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0, jl_typeenv_t *env)
 {
     if (!(jl_is_type(a) || jl_is_typevar(a)) ||
         !(jl_is_type(b) || jl_is_typevar(b)))
         return jl_egal(a, b);
+    if (a == b) // assume the TypeVar env is the same??
+        return 1;
+    if (jl_typeof(a) == jl_typeof(b) && jl_types_egal(a, b))
+        return 1;
+    if (obviously_unequal(a, b))
+        return 0;
+    // the following is an interleaved version of:
+    //   return jl_type_equal(a, b)
+    // where we try to do the fast checks before the expensive ones
+    if (jl_is_datatype(a) && !jl_is_concrete_type(b)) {
+        // if one type looks simpler, check it on the right
+        // first in order to reject more quickly.
+        jl_value_t *temp = a;
+        a = b;
+        b = temp;
+    }
+    // first check if a <: b has an obvious answer
+    int subtype_ab = 2;
+    if (b == (jl_value_t*)jl_any_type || a == jl_bottom_type) {
+        subtype_ab = 1;
+    }
+    else if (obvious_subtype(a, b, b0, &subtype_ab)) {
+#ifdef NDEBUG
+        if (subtype_ab == 0)
+            return 0;
+#endif
+    }
+    else {
+        subtype_ab = 3;
+    }
+    // next check if b <: a has an obvious answer
+    int subtype_ba = 2;
+    if (a == (jl_value_t*)jl_any_type || b == jl_bottom_type) {
+        subtype_ba = 1;
+    }
+    else if (obvious_subtype(b, a, a0, &subtype_ba)) {
+#ifdef NDEBUG
+        if (subtype_ba == 0)
+            return 0;
+#endif
+    }
+    else {
+        subtype_ba = 3;
+    }
+    // finally, do full subtyping for any inconclusive test
     JL_GC_PUSH2(&a, &b);
-    jl_typeenv_t *e = env;
-    while (e != NULL) {
-        a = jl_type_unionall(e->var, a);
-        b = jl_type_unionall(e->var, b);
-        e = e->prev;
+    jl_typeenv_t *env2 = env;
+    while (env2 != NULL) {
+        a = jl_type_unionall(env2->var, a);
+        b = jl_type_unionall(env2->var, b);
+        env2 = env2->prev;
+    }
+    jl_stenv_t e;
+#ifdef NDEBUG
+    if (subtype_ab != 1)
+#endif
+    {
+        init_stenv(&e, NULL, 0);
+        int subtype = forall_exists_subtype(a, b, &e, 0);
+        assert(subtype_ab == 3 || subtype_ab == subtype || jl_has_free_typevars(a) || jl_has_free_typevars(b));
+#ifndef NDEBUG
+        if (subtype_ab != 0 && subtype_ab != 1) // ensures that running in a debugger doesn't change the result
+#endif
+        subtype_ab = subtype;
+#ifdef NDEBUG
+        if (subtype_ab == 0) {
+            JL_GC_POP();
+            return 0;
+        }
+#endif
+    }
+#ifdef NDEBUG
+    if (subtype_ba != 1)
+#endif
+    {
+        init_stenv(&e, NULL, 0);
+        int subtype = forall_exists_subtype(b, a, &e, 0);
+        assert(subtype_ba == 3 || subtype_ba == subtype || jl_has_free_typevars(a) || jl_has_free_typevars(b));
+#ifndef NDEBUG
+        if (subtype_ba != 0 && subtype_ba != 1) // ensures that running in a debugger doesn't change the result
+#endif
+        subtype_ba = subtype;
     }
-    int eq = jl_types_equal(a, b);
     JL_GC_POP();
-    return eq;
+    // all tests successful
+    return subtype_ab && subtype_ba;
 }
 
-static int sub_msp(jl_value_t *a, jl_value_t *b, jl_typeenv_t *env)
+static int sub_msp(jl_value_t *x, jl_value_t *y, jl_value_t *y0, jl_typeenv_t *env)
 {
-    JL_GC_PUSH2(&a, &b);
+    jl_stenv_t e;
+    if (y == (jl_value_t*)jl_any_type || x == jl_bottom_type)
+        return 1;
+    if (x == y ||
+        (jl_typeof(x) == jl_typeof(y) &&
+         (jl_is_unionall(y) || jl_is_uniontype(y)) &&
+         jl_types_egal(x, y))) {
+        return 1;
+    }
+    int obvious_sub = 2;
+    if (obvious_subtype(x, y, y0, &obvious_sub)) {
+#ifdef NDEBUG
+        return obvious_sub;
+#endif
+    }
+    else {
+        obvious_sub = 3;
+    }
+    JL_GC_PUSH2(&x, &y);
     while (env != NULL) {
-        if (jl_is_type(a) || jl_is_typevar(a))
-            a = jl_type_unionall(env->var, a);
-        if (jl_is_type(b) || jl_is_typevar(b))
-            b = jl_type_unionall(env->var, b);
+        if (jl_is_type(x) || jl_is_typevar(x))
+            x = jl_type_unionall(env->var, x);
+        if (jl_is_type(y) || jl_is_typevar(y))
+            y = jl_type_unionall(env->var, y);
         env = env->prev;
     }
-    int sub = jl_subtype(a, b);
+    init_stenv(&e, NULL, 0);
+    int subtype = forall_exists_subtype(x, y, &e, 0);
+    assert(obvious_sub == 3 || obvious_sub == subtype || jl_has_free_typevars(x) || jl_has_free_typevars(y));
+#ifndef NDEBUG
+    if (obvious_sub == 0 || obvious_sub == 1)
+        subtype = obvious_sub; // this ensures that running in a debugger doesn't change the result
+#endif
     JL_GC_POP();
-    return sub;
+    return subtype;
 }
 
-static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_typeenv_t *env);
+static int type_morespecific_(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0, int invariant, jl_typeenv_t *env);
 
 static int num_occurs(jl_tvar_t *v, jl_typeenv_t *env);
 
@@ -3570,7 +4713,7 @@ static jl_value_t *nth_tuple_elt(jl_datatype_t *t JL_PROPAGATES_ROOT, size_t i)
     return NULL;
 }
 
-static int tuple_morespecific(jl_datatype_t *cdt, jl_datatype_t *pdt, int invariant, jl_typeenv_t *env)
+static int tuple_morespecific(jl_datatype_t *cdt, jl_datatype_t *pdt, jl_value_t *c0, jl_value_t *p0, int invariant, jl_typeenv_t *env)
 {
     size_t plen = jl_nparams(pdt);
     if (plen == 0) return 0;
@@ -3600,8 +4743,8 @@ static int tuple_morespecific(jl_datatype_t *cdt, jl_datatype_t *pdt, int invari
             break;
         }
 
-        if (type_morespecific_(pe, ce, invariant, env)) {
-            assert(!type_morespecific_(ce, pe, invariant, env));
+        if (type_morespecific_(pe, ce, p0, c0, invariant, env)) {
+            assert(!type_morespecific_(ce, pe, c0, p0, invariant, env));
             return 0;
         }
 
@@ -3614,9 +4757,9 @@ static int tuple_morespecific(jl_datatype_t *cdt, jl_datatype_t *pdt, int invari
         if (cva && pva && i >= clen-1 && i >= plen-1 && (some_morespecific || (cdiag && !pdiag)))
             return 1;
 
-        int cms = type_morespecific_(ce, pe, invariant, env);
+        int cms = type_morespecific_(ce, pe, c0, p0, invariant, env);
 
-        if (!cms && !sub_msp(ce, pe, env)) {
+        if (!cms && !sub_msp(ce, pe, p0, env)) {
             /*
               A bound vararg tuple can be more specific despite disjoint elements in order to
               preserve transitivity. For example in
@@ -3629,7 +4772,7 @@ static int tuple_morespecific(jl_datatype_t *cdt, jl_datatype_t *pdt, int invari
         }
 
         // Tuple{..., T} not more specific than Tuple{..., Vararg{S}} if S is diagonal
-        if (!cms && i == clen-1 && clen == plen && !cva && pva && eq_msp(ce, pe, env) &&
+        if (!cms && i == clen-1 && clen == plen && !cva && pva && eq_msp(ce, pe, c0, p0, env) &&
             jl_is_typevar(ce) && jl_is_typevar(pe) && !cdiag && pdiag)
             return 0;
 
@@ -3658,7 +4801,7 @@ static size_t tuple_full_length(jl_value_t *t)
 
 // Called when a is a bound-vararg and b is not a vararg. Sets the vararg length
 // in a to match b, as long as this makes some earlier argument more specific.
-static int args_morespecific_fix1(jl_value_t *a, jl_value_t *b, int swap, jl_typeenv_t *env)
+static int args_morespecific_fix1(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0, int swap, jl_typeenv_t *env)
 {
     size_t n = jl_nparams(a);
     int taillen = tuple_full_length(b)-n+1;
@@ -3678,12 +4821,12 @@ static int args_morespecific_fix1(jl_value_t *a, jl_value_t *b, int swap, jl_typ
     }
     int ret = -1;
     if (changed) {
-        if (eq_msp(b, (jl_value_t*)new_a, env))
+        if (eq_msp(b, (jl_value_t*)new_a, b0, a0, env))
             ret = swap;
         else if (swap)
-            ret = type_morespecific_(b, (jl_value_t*)new_a, 0, env);
+            ret = type_morespecific_(b, (jl_value_t*)new_a, b0, a0, 0, env);
         else
-            ret = type_morespecific_((jl_value_t*)new_a, b, 0, env);
+            ret = type_morespecific_((jl_value_t*)new_a, b, a0, b0, 0, env);
     }
     JL_GC_POP();
     return ret;
@@ -3728,38 +4871,61 @@ static int num_occurs(jl_tvar_t *v, jl_typeenv_t *env)
     return 0;
 }
 
+int tuple_cmp_typeofbottom(jl_datatype_t *a, jl_datatype_t *b)
+{
+    size_t i, la = jl_nparams(a), lb = jl_nparams(b);
+    for (i = 0; i < la || i < lb; i++) {
+        jl_value_t *pa = i < la ? jl_tparam(a, i) : NULL;
+        jl_value_t *pb = i < lb ? jl_tparam(b, i) : NULL;
+        assert(jl_typeofbottom_type); // for clang-sa
+        int xa = pa == (jl_value_t*)jl_typeofbottom_type || pa == (jl_value_t*)jl_typeofbottom_type->super;
+        int xb = pb == (jl_value_t*)jl_typeofbottom_type || pb == (jl_value_t*)jl_typeofbottom_type->super;
+        if (xa != xb)
+            return xa - xb;
+    }
+    return 0;
+}
+
+
 #define HANDLE_UNIONALL_A                                               \
     jl_unionall_t *ua = (jl_unionall_t*)a;                              \
     jl_typeenv_t newenv = { ua->var, 0x0, env };                        \
     newenv.val = (jl_value_t*)(intptr_t)count_occurs(ua->body, ua->var); \
-    return type_morespecific_(ua->body, b, invariant, &newenv)
+    return type_morespecific_(ua->body, b, a0, b0, invariant, &newenv)
 
 #define HANDLE_UNIONALL_B                                               \
     jl_unionall_t *ub = (jl_unionall_t*)b;                              \
     jl_typeenv_t newenv = { ub->var, 0x0, env };                        \
     newenv.val = (jl_value_t*)(intptr_t)count_occurs(ub->body, ub->var); \
-    return type_morespecific_(a, ub->body, invariant, &newenv)
+    return type_morespecific_(a, ub->body, a0, b0, invariant, &newenv)
 
-static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_typeenv_t *env)
+static int type_morespecific_(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0, int invariant, jl_typeenv_t *env)
 {
     if (a == b)
         return 0;
 
     if (jl_is_tuple_type(a) && jl_is_tuple_type(b)) {
+        // compare whether a and b have Type{Union{}} included,
+        // which makes them instantly the most specific, regardless of all else,
+        // for whichever is left most (the left-to-right behavior here ensures
+        // we do not need to keep track of conflicts with multiple methods).
+        int msp = tuple_cmp_typeofbottom((jl_datatype_t*)a, (jl_datatype_t*)b);
+        if (msp)
+            return msp > 0;
         // When one is JL_VARARG_BOUND and the other has fixed length,
         // allow the argument length to fix the tvar
         jl_vararg_kind_t akind = jl_va_tuple_kind((jl_datatype_t*)a);
         jl_vararg_kind_t bkind = jl_va_tuple_kind((jl_datatype_t*)b);
         int ans = -1;
         if (akind == JL_VARARG_BOUND && bkind < JL_VARARG_BOUND) {
-            ans = args_morespecific_fix1(a, b, 0, env);
+            ans = args_morespecific_fix1(a, b, a0, b0, 0, env);
             if (ans == 1) return 1;
         }
         if (bkind == JL_VARARG_BOUND && akind < JL_VARARG_BOUND) {
-            ans = args_morespecific_fix1(b, a, 1, env);
+            ans = args_morespecific_fix1(b, a, b0, a0, 1, env);
             if (ans == 0) return 0;
         }
-        return tuple_morespecific((jl_datatype_t*)a, (jl_datatype_t*)b, invariant, env);
+        return tuple_morespecific((jl_datatype_t*)a, (jl_datatype_t*)b, a0, b0, invariant, env);
     }
 
     if (!invariant) {
@@ -3773,13 +4939,13 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
         }
         // Union a is more specific than b if some element of a is more specific than b, but
         // not vice-versa.
-        if (sub_msp(b, a, env))
+        if (sub_msp(b, a, a0, env))
             return 0;
         jl_uniontype_t *u = (jl_uniontype_t*)a;
-        if (type_morespecific_(u->a, b, invariant, env) || type_morespecific_(u->b, b, invariant, env)) {
+        if (type_morespecific_(u->a, b, a0, b0, invariant, env) || type_morespecific_(u->b, b, a0, b0, invariant, env)) {
             if (jl_is_uniontype(b)) {
                 jl_uniontype_t *v = (jl_uniontype_t*)b;
-                if (type_morespecific_(v->a, a, invariant, env) || type_morespecific_(v->b, a, invariant, env))
+                if (type_morespecific_(v->a, a, b0, a0, invariant, env) || type_morespecific_(v->b, a, b0, a0, invariant, env))
                     return 0;
             }
             return 1;
@@ -3793,11 +4959,11 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
         jl_value_t *tp0a = jl_tparam0(a);
         if (jl_is_typevar(tp0a)) {
             jl_value_t *ub = ((jl_tvar_t*)tp0a)->ub;
-            if (jl_is_kind(b) && !sub_msp((jl_value_t*)jl_any_type, ub, env))
+            if (jl_is_kind(b) && !sub_msp((jl_value_t*)jl_any_type, ub, b0, env))
                 return 1;
         }
         else if (tp0a == jl_bottom_type) {
-            if (sub_msp(b, (jl_value_t*)jl_type_type, env))
+            if (sub_msp(b, (jl_value_t*)jl_type_type, (jl_value_t*)jl_type_type, env))
                 return 1;
         }
         else if (b == (jl_value_t*)jl_datatype_type || b == (jl_value_t*)jl_unionall_type ||
@@ -3811,8 +4977,8 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
             HANDLE_UNIONALL_A;
         }
         jl_uniontype_t *u = (jl_uniontype_t*)b;
-        if (type_morespecific_(a, u->a, invariant, env) || type_morespecific_(a, u->b, invariant, env))
-            return !type_morespecific_(b, a, invariant, env);
+        if (type_morespecific_(a, u->a, a0, b0, invariant, env) || type_morespecific_(a, u->b, a0, b0, invariant, env))
+            return !type_morespecific_(b, a, b0, a0, invariant, env);
         return 0;
     }
 
@@ -3828,7 +4994,7 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
                     if (tta->name != jl_type_typename) return 1;
                     jl_value_t *tp0 = jl_tparam0(b);
                     if (jl_is_typevar(tp0)) {
-                        if (sub_msp((jl_value_t*)jl_any_type, ((jl_tvar_t*)tp0)->ub, env))
+                        if (sub_msp((jl_value_t*)jl_any_type, ((jl_tvar_t*)tp0)->ub, b0, env))
                             return 1;
                     }
                 }
@@ -3841,11 +5007,11 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
                     int bfree = jl_has_free_typevars(bpara);
                     if (!afree && !bfree && !jl_types_equal(apara, bpara))
                         return 0;
-                    if (type_morespecific_(apara, bpara, 1, env) && (jl_is_typevar(apara) || !afree || bfree))
+                    if (type_morespecific_(apara, bpara, a0, b0, 1, env) && (jl_is_typevar(apara) || !afree || bfree))
                         ascore += 1;
-                    else if (type_morespecific_(bpara, apara, 1, env) && (jl_is_typevar(bpara) || !bfree || afree))
+                    else if (type_morespecific_(bpara, apara, b0, a0, 1, env) && (jl_is_typevar(bpara) || !bfree || afree))
                         bscore += 1;
-                    else if (eq_msp(apara, bpara, env)) {
+                    else if (eq_msp(apara, bpara, a0, b0, env)) {
                         if (!afree && bfree)
                             ascore += 1;
                         else if (afree && !bfree)
@@ -3884,13 +5050,13 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
     if (jl_is_typevar(a)) {
         if (jl_is_typevar(b)) {
             return (( type_morespecific_((jl_value_t*)((jl_tvar_t*)a)->ub,
-                                         (jl_value_t*)((jl_tvar_t*)b)->ub, 0, env) &&
+                                         (jl_value_t*)((jl_tvar_t*)b)->ub, a0, b0, 0, env) &&
                      !type_morespecific_((jl_value_t*)((jl_tvar_t*)a)->lb,
-                                         (jl_value_t*)((jl_tvar_t*)b)->lb, 0, env)) ||
+                                         (jl_value_t*)((jl_tvar_t*)b)->lb, a0, b0, 0, env)) ||
                     ( type_morespecific_((jl_value_t*)((jl_tvar_t*)b)->lb,
-                                         (jl_value_t*)((jl_tvar_t*)a)->lb, 0, env) &&
+                                         (jl_value_t*)((jl_tvar_t*)a)->lb, b0, a0, 0, env) &&
                      !type_morespecific_((jl_value_t*)((jl_tvar_t*)b)->ub,
-                                         (jl_value_t*)((jl_tvar_t*)a)->ub, 0, env)));
+                                         (jl_value_t*)((jl_tvar_t*)a)->ub, b0, a0, 0, env)));
         }
         if (!jl_is_type(b))
             return 0;
@@ -3899,7 +5065,7 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
                 return 1;
             if (!jl_has_free_typevars(b))
                 return 0;
-            if (eq_msp(((jl_tvar_t*)a)->ub, b, env))
+            if (eq_msp(((jl_tvar_t*)a)->ub, b, a0, b0, env))
                 return num_occurs((jl_tvar_t*)a, env) >= 2;
         }
         else {
@@ -3908,7 +5074,7 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
                 num_occurs((jl_tvar_t*)a, env) >= 2)
                 return 1;
         }
-        return type_morespecific_(((jl_tvar_t*)a)->ub, b, 0, env);
+        return type_morespecific_(((jl_tvar_t*)a)->ub, b, a0, b0, 0, env);
     }
     if (jl_is_typevar(b)) {
         if (!jl_is_type(a))
@@ -3917,21 +5083,21 @@ static int type_morespecific_(jl_value_t *a, jl_value_t *b, int invariant, jl_ty
             if (((jl_tvar_t*)b)->ub == jl_bottom_type)
                 return 0;
             if (jl_has_free_typevars(a)) {
-                if (type_morespecific_(a, ((jl_tvar_t*)b)->ub, 0, env))
+                if (type_morespecific_(a, ((jl_tvar_t*)b)->ub, a0, b0, 0, env))
                     return 1;
-                if (eq_msp(a, ((jl_tvar_t*)b)->ub, env))
+                if (eq_msp(a, ((jl_tvar_t*)b)->ub, a0, b0, env))
                     return num_occurs((jl_tvar_t*)b, env) < 2;
                 return 0;
             }
             else {
                 if (obviously_disjoint(a, ((jl_tvar_t*)b)->ub, 1))
                     return 0;
-                if (type_morespecific_(((jl_tvar_t*)b)->ub, a, 0, env))
+                if (type_morespecific_(((jl_tvar_t*)b)->ub, a, b0, a0, 0, env))
                     return 0;
                 return 1;
             }
         }
-        return type_morespecific_(a, ((jl_tvar_t*)b)->ub, 0, env);
+        return type_morespecific_(a, ((jl_tvar_t*)b)->ub, a0, b0, 0, env);
     }
 
     if (jl_is_unionall(a)) {
@@ -3954,12 +5120,12 @@ JL_DLLEXPORT int jl_type_morespecific(jl_value_t *a, jl_value_t *b)
         return 0;
     if (jl_subtype(a, b))
         return 1;
-    return type_morespecific_(a, b, 0, NULL);
+    return type_morespecific_(a, b, a, b, 0, NULL);
 }
 
 JL_DLLEXPORT int jl_type_morespecific_no_subtype(jl_value_t *a, jl_value_t *b)
 {
-    return type_morespecific_(a, b, 0, NULL);
+    return type_morespecific_(a, b, a, b, 0, NULL);
 }
 
 #ifdef __cplusplus
diff --git a/src/support/Makefile b/src/support/Makefile
index a884aa5fd47e0..1ee98a4eabdee 100644
--- a/src/support/Makefile
+++ b/src/support/Makefile
@@ -24,7 +24,7 @@ HEADERS := $(wildcard *.h) $(LIBUV_INC)/uv.h
 OBJS := $(SRCS:%=$(BUILDDIR)/%.o)
 DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
 
-FLAGS := $(HFILEDIRS:%=-I%) -I$(LIBUV_INC) -I$(UTF8PROC_INC) -DLIBRARY_EXPORTS -DUTF8PROC_EXPORTS
+FLAGS := $(HFILEDIRS:%=-I%) -I$(LIBUV_INC) -I$(UTF8PROC_INC) -DJL_LIBRARY_EXPORTS_INTERNAL -DUTF8PROC_EXPORTS
 FLAGS += -Wall -Wno-strict-aliasing -fvisibility=hidden -Wpointer-arith -Wundef
 JCFLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
 
diff --git a/src/support/MurmurHash3.c b/src/support/MurmurHash3.c
index fce7351f90ffe..7eaded17c379f 100644
--- a/src/support/MurmurHash3.c
+++ b/src/support/MurmurHash3.c
@@ -8,12 +8,11 @@
 // non-native version will be less than optimal.
 
 #include "MurmurHash3.h"
+#include "dtypes.h"
 
 //-----------------------------------------------------------------------------
 // Platform-specific functions and macros
 
-#define FORCE_INLINE inline __attribute__((always_inline))
-
 static inline uint32_t rotl32 ( uint32_t x, int8_t r )
 {
   return (x << r) | (x >> (32 - r));
@@ -58,11 +57,11 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
 
 //-----------------------------------------------------------------------------
 
-void MurmurHash3_x86_32 ( const void * key, int len,
+void MurmurHash3_x86_32 ( const void * key, size_t len,
                           uint32_t seed, void * out )
 {
   const uint8_t * data = (const uint8_t*)key;
-  const int nblocks = len / 4;
+  const size_t nblocks = len / 4;
 
   uint32_t h1 = seed;
 
@@ -74,7 +73,7 @@ void MurmurHash3_x86_32 ( const void * key, int len,
 
   const uint8_t * tail = data + nblocks*4;
 
-  for(int i = -nblocks; i; i++)
+  for(size_t i = -nblocks; i; i++)
   {
     uint32_t k1 = jl_load_unaligned_i32(tail + sizeof(uint32_t)*i);
 
@@ -112,11 +111,11 @@ void MurmurHash3_x86_32 ( const void * key, int len,
 
 //-----------------------------------------------------------------------------
 
-void MurmurHash3_x86_128 ( const void * key, const int len,
+void MurmurHash3_x86_128 ( const void * key, const size_t len,
                            uint32_t seed, void * out )
 {
   const uint8_t * data = (const uint8_t*)key;
-  const int nblocks = len / 16;
+  const size_t nblocks = len / 16;
 
   uint32_t h1 = seed;
   uint32_t h2 = seed;
@@ -133,7 +132,7 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
 
   const uint8_t *tail = data + nblocks*16;
 
-  for(int i = -nblocks; i; i++)
+  for(size_t i = -nblocks; i; i++)
   {
     uint32_t k1 = jl_load_unaligned_i32(tail + sizeof(uint32_t)*(i*4 + 0));
     uint32_t k2 = jl_load_unaligned_i32(tail + sizeof(uint32_t)*(i*4 + 1));
@@ -218,11 +217,11 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
 
 //-----------------------------------------------------------------------------
 
-void MurmurHash3_x64_128 ( const void * key, const int len,
+void MurmurHash3_x64_128 ( const void * key, const size_t len,
                            const uint32_t seed, void * out )
 {
   const uint8_t * data = (const uint8_t*)key;
-  const int nblocks = len / 16;
+  const size_t nblocks = len / 16;
 
   uint64_t h1 = seed;
   uint64_t h2 = seed;
@@ -233,7 +232,7 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
   //----------
   // body
 
-  for(int i = 0; i < nblocks; i++)
+  for(size_t i = 0; i < nblocks; i++)
   {
     uint64_t k1 = jl_load_unaligned_i64(data + sizeof(uint64_t)*(i*2 + 0));
     uint64_t k2 = jl_load_unaligned_i64(data + sizeof(uint64_t)*(i*2 + 1));
diff --git a/src/support/MurmurHash3.h b/src/support/MurmurHash3.h
index e3e7da9df62fa..6137098d6828c 100644
--- a/src/support/MurmurHash3.h
+++ b/src/support/MurmurHash3.h
@@ -8,14 +8,14 @@
 //-----------------------------------------------------------------------------
 // Platform-specific functions and macros
 #include <stdint.h>
-
+#include <stddef.h>
 //-----------------------------------------------------------------------------
 
-void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x86_32  ( const void * key, size_t len, uint32_t seed, void * out );
 
-void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x86_128 ( const void * key, size_t len, uint32_t seed, void * out );
 
-void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+void MurmurHash3_x64_128 ( const void * key, size_t len, uint32_t seed, void * out );
 
 //-----------------------------------------------------------------------------
 
diff --git a/src/support/analyzer_annotations.h b/src/support/analyzer_annotations.h
index 70b5a273953f1..69827e4d77f37 100644
--- a/src/support/analyzer_annotations.h
+++ b/src/support/analyzer_annotations.h
@@ -12,6 +12,8 @@
 
 #define JL_PROPAGATES_ROOT __attribute__((annotate("julia_propagates_root")))
 #define JL_NOTSAFEPOINT __attribute__((annotate("julia_not_safepoint")))
+#define JL_NOTSAFEPOINT_ENTER __attribute__((annotate("julia_notsafepoint_enter")))
+#define JL_NOTSAFEPOINT_LEAVE __attribute__((annotate("julia_notsafepoint_leave")))
 #define JL_MAYBE_UNROOTED __attribute__((annotate("julia_maybe_unrooted")))
 #define JL_GLOBALLY_ROOTED __attribute__((annotate("julia_globally_rooted")))
 #define JL_ROOTING_ARGUMENT __attribute__((annotate("julia_rooting_argument")))
@@ -20,12 +22,11 @@
 #define JL_ALWAYS_LEAFTYPE JL_GLOBALLY_ROOTED
 #define JL_ROOTS_TEMPORARILY __attribute__((annotate("julia_temporarily_roots")))
 #define JL_REQUIRE_ROOTED_SLOT __attribute__((annotate("julia_require_rooted_slot")))
-#define JL_ROOTED_VALUE_COLLECTION __attribute__((annotate("julia_rooted_value_collection")))
 #ifdef __cplusplus
 extern "C" {
 #endif
   void JL_GC_PROMISE_ROOTED(void *v) JL_NOTSAFEPOINT;
-  void jl_may_leak(uintptr_t) JL_NOTSAFEPOINT;
+  void jl_may_leak(void *v) JL_NOTSAFEPOINT;
 #ifdef __cplusplus
 }
 #endif
@@ -34,6 +35,8 @@ extern "C" {
 
 #define JL_PROPAGATES_ROOT
 #define JL_NOTSAFEPOINT
+#define JL_NOTSAFEPOINT_ENTER
+#define JL_NOTSAFEPOINT_LEAVE
 #define JL_MAYBE_UNROOTED
 #define JL_GLOBALLY_ROOTED
 #define JL_ROOTING_ARGUMENT
@@ -42,7 +45,6 @@ extern "C" {
 #define JL_ALWAYS_LEAFTYPE
 #define JL_ROOTS_TEMPORARILY
 #define JL_REQUIRE_ROOTED_SLOT
-#define JL_ROOTED_VALUE_COLLECTION
 #define JL_GC_PROMISE_ROOTED(x) (void)(x)
 #define jl_may_leak(x) (void)(x)
 
diff --git a/src/support/arraylist.h b/src/support/arraylist.h
index 03bfd45f8f525..6ad2f0e2f28c9 100644
--- a/src/support/arraylist.h
+++ b/src/support/arraylist.h
@@ -25,7 +25,7 @@ void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT;
 
 void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
 void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT;
-void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT;
 
 typedef struct {
     uint32_t len;
diff --git a/src/support/dtypes.h b/src/support/dtypes.h
index d49ae0b22b5f9..4e9868ed45c23 100644
--- a/src/support/dtypes.h
+++ b/src/support/dtypes.h
@@ -72,23 +72,30 @@ typedef intptr_t ssize_t;
 
 #ifdef _OS_WINDOWS_
 #define STDCALL  __stdcall
-# ifdef LIBRARY_EXPORTS
+# ifdef JL_LIBRARY_EXPORTS_INTERNAL
 #  define JL_DLLEXPORT __declspec(dllexport)
-# else
-#  define JL_DLLEXPORT __declspec(dllimport)
 # endif
+# ifdef JL_LIBRARY_EXPORTS_CODEGEN
+#  define JL_DLLEXPORT_CODEGEN __declspec(dllexport)
+# endif
+#define JL_HIDDEN
 #define JL_DLLIMPORT   __declspec(dllimport)
 #else
 #define STDCALL
-# define JL_DLLEXPORT __attribute__ ((visibility("default")))
-#define JL_DLLIMPORT
+#define JL_DLLIMPORT __attribute__ ((visibility("default")))
+#define JL_HIDDEN __attribute__ ((visibility("hidden")))
+#endif
+#ifndef JL_DLLEXPORT
+# define JL_DLLEXPORT JL_DLLIMPORT
+#endif
+#ifndef JL_DLLEXPORT_CODEGEN
+# define JL_DLLEXPORT_CODEGEN JL_DLLIMPORT
 #endif
 
 #ifdef _OS_LINUX_
 #include <endian.h>
 #define LITTLE_ENDIAN  __LITTLE_ENDIAN
 #define BIG_ENDIAN     __BIG_ENDIAN
-#define PDP_ENDIAN     __PDP_ENDIAN
 #define BYTE_ORDER     __BYTE_ORDER
 #endif
 
@@ -96,19 +103,16 @@ typedef intptr_t ssize_t;
 #include <machine/endian.h>
 #define __LITTLE_ENDIAN  LITTLE_ENDIAN
 #define __BIG_ENDIAN     BIG_ENDIAN
-#define __PDP_ENDIAN     PDP_ENDIAN
 #define __BYTE_ORDER     BYTE_ORDER
 #endif
 
 #ifdef _OS_WINDOWS_
 #define __LITTLE_ENDIAN    1234
 #define __BIG_ENDIAN       4321
-#define __PDP_ENDIAN       3412
 #define __BYTE_ORDER       __LITTLE_ENDIAN
 #define __FLOAT_WORD_ORDER __LITTLE_ENDIAN
 #define LITTLE_ENDIAN      __LITTLE_ENDIAN
 #define BIG_ENDIAN         __BIG_ENDIAN
-#define PDP_ENDIAN         __PDP_ENDIAN
 #define BYTE_ORDER         __BYTE_ORDER
 #endif
 
@@ -117,6 +121,7 @@ typedef intptr_t ssize_t;
 #define LLT_FREE(x) free(x)
 
 #define STATIC_INLINE static inline
+#define FORCE_INLINE static inline __attribute__((always_inline))
 
 #if defined(_OS_WINDOWS_) && !defined(_COMPILER_GCC_)
 #  define NOINLINE __declspec(noinline)
@@ -331,6 +336,23 @@ STATIC_INLINE void jl_store_unaligned_i16(void *ptr, uint16_t val) JL_NOTSAFEPOI
     memcpy(ptr, &val, 2);
 }
 
+STATIC_INLINE void *calloc_s(size_t sz) JL_NOTSAFEPOINT {
+    int last_errno = errno;
+#ifdef _OS_WINDOWS_
+    DWORD last_error = GetLastError();
+#endif
+    void *p = calloc(sz == 0 ? 1 : sz, 1);
+    if (p == NULL) {
+        perror("(julia) calloc");
+        abort();
+    }
+#ifdef _OS_WINDOWS_
+    SetLastError(last_error);
+#endif
+    errno = last_errno;
+    return p;
+}
+
 STATIC_INLINE void *malloc_s(size_t sz) JL_NOTSAFEPOINT {
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
diff --git a/src/support/htable.h b/src/support/htable.h
index 0b5196374e2b6..4f821493beee8 100644
--- a/src/support/htable.h
+++ b/src/support/htable.h
@@ -47,13 +47,13 @@ int HTNAME##_has(htable_t *h, void *key) JL_NOTSAFEPOINT;               \
 int HTNAME##_remove(htable_t *h, void *key) JL_NOTSAFEPOINT;            \
 void **HTNAME##_bp(htable_t *h, void *key) JL_NOTSAFEPOINT;
 
-#define HTPROT_R(HTNAME)                                                \
-void *HTNAME##_get_r(htable_t *h, void *key, void *ctx);                \
-void HTNAME##_put_r(htable_t *h, void *key, void *val, void *ctx);      \
-void HTNAME##_adjoin_r(htable_t *h, void *key, void *val, void *ctx);   \
-int HTNAME##_has_r(htable_t *h, void *key, void *ctx);                  \
-int HTNAME##_remove_r(htable_t *h, void *key, void *ctx);               \
-void **HTNAME##_bp_r(htable_t *h, void *key, void *ctx);
+#define HTPROT_R(HTNAME)                                                                \
+void *HTNAME##_get_r(htable_t *h, void *key, void *ctx) JL_NOTSAFEPOINT;                \
+void HTNAME##_put_r(htable_t *h, void *key, void *val, void *ctx) JL_NOTSAFEPOINT;      \
+void HTNAME##_adjoin_r(htable_t *h, void *key, void *val, void *ctx) JL_NOTSAFEPOINT;   \
+int HTNAME##_has_r(htable_t *h, void *key, void *ctx) JL_NOTSAFEPOINT;                  \
+int HTNAME##_remove_r(htable_t *h, void *key, void *ctx) JL_NOTSAFEPOINT;               \
+void **HTNAME##_bp_r(htable_t *h, void *key, void *ctx) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
 }
diff --git a/src/support/ios.c b/src/support/ios.c
index c0f1c92572b78..7f70112c82cc0 100644
--- a/src/support/ios.c
+++ b/src/support/ios.c
@@ -196,6 +196,9 @@ static char *_buf_realloc(ios_t *s, size_t sz)
 
     if (sz <= s->maxsize) return s->buf;
 
+    if (!s->growable)
+        return NULL;
+
     if (s->ownbuf && s->buf != &s->local[0]) {
         // if we own the buffer we're free to resize it
         temp = (char*)LLT_REALLOC(s->buf, sz);
@@ -207,8 +210,10 @@ static char *_buf_realloc(ios_t *s, size_t sz)
         if (temp == NULL)
             return NULL;
         s->ownbuf = 1;
-        if (s->size > 0)
+        if (s->size > 0) {
+            assert(s->buf != NULL);
             memcpy(temp, s->buf, (size_t)s->size);
+        }
     }
 
     s->buf = temp;
@@ -597,12 +602,12 @@ int ios_eof(ios_t *s)
 {
     if (s->state == bst_rd && s->bpos < s->size)
         return 0;
+    if (s->_eof)
+        return 1;
     if (s->bm == bm_mem)
-        return (s->_eof ? 1 : 0);
+        return 0;
     if (s->fd == -1)
         return 1;
-    if (s->_eof)
-        return 1;
     return 0;
     /*
     if (_fd_available(s->fd))
@@ -612,6 +617,12 @@ int ios_eof(ios_t *s)
     */
 }
 
+void ios_reseteof(ios_t *s)
+{
+    if (s->bm != bm_mem && s->fd != -1)
+        s->_eof = 0;
+}
+
 int ios_eof_blocking(ios_t *s)
 {
     if (s->state == bst_rd && s->bpos < s->size)
@@ -718,8 +729,10 @@ char *ios_take_buffer(ios_t *s, size_t *psize)
         buf = (char*)LLT_ALLOC((size_t)s->size + 1);
         if (buf == NULL)
             return NULL;
-        if (s->size)
+        if (s->size) {
+            assert(s->buf != NULL);
             memcpy(buf, s->buf, (size_t)s->size);
+        }
     }
     else if (s->size == s->maxsize) {
         buf = (char*)LLT_REALLOC(s->buf, (size_t)s->size + 1);
@@ -829,7 +842,7 @@ size_t ios_copyall(ios_t *to, ios_t *from)
 
 #define LINE_CHUNK_SIZE 160
 
-size_t ios_copyuntil(ios_t *to, ios_t *from, char delim)
+size_t ios_copyuntil(ios_t *to, ios_t *from, char delim, int keep)
 {
     size_t total = 0, avail = (size_t)(from->size - from->bpos);
     while (!ios_eof(from)) {
@@ -847,9 +860,9 @@ size_t ios_copyuntil(ios_t *to, ios_t *from, char delim)
             avail = 0;
         }
         else {
-            size_t ntowrite = pd - (from->buf+from->bpos) + 1;
+            size_t ntowrite = pd - (from->buf+from->bpos) + (keep != 0);
             written = ios_write(to, from->buf+from->bpos, ntowrite);
-            from->bpos += ntowrite;
+            from->bpos += ntowrite + (keep == 0);
             total += written;
             return total;
         }
@@ -892,6 +905,7 @@ static void _ios_init(ios_t *s)
     s->readable = 1;
     s->writable = 1;
     s->rereadable = 0;
+    s->growable = 1;
 }
 
 /* stream object initializers. we do no allocation. */
@@ -935,9 +949,11 @@ ios_t *ios_file(ios_t *s, const char *fname, int rd, int wr, int create, int tru
 {
     int flags;
     int fd;
-    if (!(rd || wr))
+    if (!(rd || wr)) {
         // must specify read and/or write
+        errno = EINVAL;
         goto open_file_err;
+    }
     flags = wr ? (rd ? O_RDWR : O_WRONLY) : O_RDONLY;
     if (create) flags |= O_CREAT;
     if (trunc)  flags |= O_TRUNC;
@@ -1078,7 +1094,7 @@ int ios_putc(int c, ios_t *s)
 
 int ios_getc(ios_t *s)
 {
-    char ch;
+    char ch = 0;
     if (s->state == bst_rd && s->bpos < s->size) {
         ch = s->buf[s->bpos++];
     }
@@ -1211,9 +1227,11 @@ char *ios_readline(ios_t *s)
 {
     ios_t dest;
     ios_mem(&dest, 0);
-    ios_copyuntil(&dest, s, '\n');
+    ios_copyuntil(&dest, s, '\n', 1);
     size_t n;
-    return ios_take_buffer(&dest, &n);
+    char * ret = ios_take_buffer(&dest, &n);
+    ios_close(&dest);
+    return ret;
 }
 
 extern int vasprintf(char **strp, const char *fmt, va_list ap);
diff --git a/src/support/ios.h b/src/support/ios.h
index 9d0f42d6d1bc4..6eab9e21c45b6 100644
--- a/src/support/ios.h
+++ b/src/support/ios.h
@@ -19,13 +19,13 @@ extern "C" {
 typedef enum { bm_none=1000, bm_line, bm_block, bm_mem } bufmode_t;
 typedef enum { bst_none, bst_rd, bst_wr } bufstate_t;
 
-#define IOS_INLSIZE 54
+#define IOS_INLSIZE 83
 #define IOS_BUFSIZE 32768
 
 #ifdef _P64
-#define ON_P64(x) x
+#define IF_P64(x,y) x
 #else
-#define ON_P64(x)
+#define IF_P64(x,y) y
 #endif
 
 // We allow ios_t as a cvalue in flisp, which only guarantees pointer
@@ -36,10 +36,8 @@ JL_ATTRIBUTE_ALIGN_PTRSIZE(typedef struct {
     // in general, you can do any operation in any state.
     char *buf;        // start of buffer
 
-    int errcode;
-
-    ON_P64(int _pad_bm;)      // put bm at same offset as type field of uv_stream_s
-    bufmode_t bm;     //
+    IF_P64(int64_t userdata;, int errcode;)
+    bufmode_t bm;     // bm must be at same offset as type field of uv_stream_s
     bufstate_t state;
 
     int64_t maxsize;    // space allocated to buffer
@@ -51,6 +49,8 @@ JL_ATTRIBUTE_ALIGN_PTRSIZE(typedef struct {
     size_t lineno;    // current line number
     size_t u_colno;     // current column number (in Unicode charwidths)
 
+    IF_P64(int errcode;, int64_t userdata;)
+
     // pointer-size integer to support platforms where it might have
     // to be a pointer
     long fd;
@@ -74,11 +74,14 @@ JL_ATTRIBUTE_ALIGN_PTRSIZE(typedef struct {
     // request durable writes (fsync)
     // unsigned char durable:1;
 
-    int64_t userdata;
+    // this declares that the buffer should not be (re-)alloc'd when
+    // attempting to write beyond its current maxsize.
+    unsigned char growable:1;
+
     char local[IOS_INLSIZE];
 } ios_t);
 
-#undef ON_P64
+#undef IF_P64
 
 extern void (*ios_set_io_wait_func)(int);
 /* low-level interface functions */
@@ -105,7 +108,7 @@ JL_DLLEXPORT int ios_get_writable(ios_t *s);
 JL_DLLEXPORT void ios_set_readonly(ios_t *s);
 JL_DLLEXPORT size_t ios_copy(ios_t *to, ios_t *from, size_t nbytes);
 JL_DLLEXPORT size_t ios_copyall(ios_t *to, ios_t *from);
-JL_DLLEXPORT size_t ios_copyuntil(ios_t *to, ios_t *from, char delim) JL_NOTSAFEPOINT;
+JL_DLLEXPORT size_t ios_copyuntil(ios_t *to, ios_t *from, char delim, int keep) JL_NOTSAFEPOINT;
 JL_DLLEXPORT size_t ios_nchomp(ios_t *from, size_t ntowrite);
 // ensure at least n bytes are buffered if possible. returns # available.
 JL_DLLEXPORT size_t ios_readprep(ios_t *from, size_t n);
@@ -128,8 +131,8 @@ void ios_init_stdstreams(void);
 
 /* high-level functions - output */
 JL_DLLEXPORT int ios_pututf8(ios_t *s, uint32_t wc);
-JL_DLLEXPORT int ios_printf(ios_t *s, const char *format, ...);
-JL_DLLEXPORT int ios_vprintf(ios_t *s, const char *format, va_list args);
+JL_DLLEXPORT int ios_printf(ios_t *s, const char *format, ...) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int ios_vprintf(ios_t *s, const char *format, va_list args) JL_NOTSAFEPOINT;
 
 /* high-level stream functions - input */
 JL_DLLEXPORT int ios_getutf8(ios_t *s, uint32_t *pwc);
diff --git a/src/support/platform.h b/src/support/platform.h
index cf65fa01423fe..56f8cafbc89fa 100644
--- a/src/support/platform.h
+++ b/src/support/platform.h
@@ -43,24 +43,37 @@
 #error Unsupported compiler
 #endif
 
+
+#define JL_NO_ASAN
+#define JL_NO_MSAN
+#define JL_NO_TSAN
 #if defined(__has_feature) // Clang flavor
 #if __has_feature(address_sanitizer)
 #define _COMPILER_ASAN_ENABLED_
+#undef JL_NO_ASAN
+#define JL_NO_ASAN __attribute__((no_sanitize("address")))
 #endif
 #if __has_feature(memory_sanitizer)
 #define _COMPILER_MSAN_ENABLED_
+#undef JL_NO_MSAN
+#define JL_NO_MSAN __attribute__((no_sanitize("memory")))
 #endif
 #if __has_feature(thread_sanitizer)
 #if __clang_major__ < 11
 #error Thread sanitizer runtime libraries in clang < 11 leak memory and cannot be used
 #endif
 #define _COMPILER_TSAN_ENABLED_
+#undef JL_NO_TSAN
+#define JL_NO_TSAN __attribute__((no_sanitize("thread")))
 #endif
 #else // GCC flavor
 #if defined(__SANITIZE_ADDRESS__)
 #define _COMPILER_ASAN_ENABLED_
+#undef JL_NO_ASAN
+#define JL_NO_ASAN __attribute__((no_sanitize("address")))
 #endif
 #endif // __has_feature
+#define JL_NO_SANITIZE JL_NO_ASAN JL_NO_MSAN JL_NO_TSAN
 
 /*******************************************************************************
 *                               OS                                             *
diff --git a/src/support/rle.h b/src/support/rle.h
index f85d9f35c4b80..bd2fdafc0f79f 100644
--- a/src/support/rle.h
+++ b/src/support/rle.h
@@ -10,6 +10,7 @@ extern "C" {
 #include <stddef.h>
 #include <stdint.h>
 #include <assert.h>
+#include "analyzer_annotations.h"
 
 /* Run-length encoding (RLE) utilities */
 /* In the RLE table, even indexes encode the key (the item classification), odd indexes encode the item index */
@@ -28,8 +29,8 @@ typedef struct _rle_iter_state_t {
     uint64_t key;  // current identifier
 } rle_iter_state;
 
-rle_iter_state rle_iter_init(/* implicit value of key for indexes prior to first explicit rle pair */ uint64_t key0);
-int rle_iter_increment(rle_iter_state *state, /* number of items */ size_t len, uint64_t *rletable, /*length of rletable */ size_t npairs);
+rle_iter_state rle_iter_init(/* implicit value of key for indexes prior to first explicit rle pair */ uint64_t key0) JL_NOTSAFEPOINT;
+int rle_iter_increment(rle_iter_state *state, /* number of items */ size_t len, uint64_t *rletable, /*length of rletable */ size_t npairs) JL_NOTSAFEPOINT;
 
 /* indexing */
 typedef struct {
@@ -37,8 +38,8 @@ typedef struct {
     int index;     // number of preceding items in the list with the same key
 } rle_reference;
 
-void rle_index_to_reference(rle_reference *rr, /* item index */ size_t i, uint64_t *rletable, size_t npairs, uint64_t key0);
-size_t rle_reference_to_index(rle_reference *rr, uint64_t *rletable, size_t npairs, uint64_t key0);
+void rle_index_to_reference(rle_reference *rr, /* item index */ size_t i, uint64_t *rletable, size_t npairs, uint64_t key0) JL_NOTSAFEPOINT;
+size_t rle_reference_to_index(rle_reference *rr, uint64_t *rletable, size_t npairs, uint64_t key0) JL_NOTSAFEPOINT;
 
 
 #ifdef __cplusplus
diff --git a/src/symbol.c b/src/symbol.c
index 14606c82b9778..c9c0c0e533924 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -35,12 +35,10 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT
 {
     jl_sym_t *sym;
     size_t nb = symbol_nbytes(len);
-    assert(jl_symbol_type && "not initialized");
-
     jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc_nolock(nb, 0, sizeof(void*), 0);
     sym = (jl_sym_t*)jl_valueof(tag);
     // set to old marked so that we won't look at it in the GC or write barrier.
-    tag->header = ((uintptr_t)jl_symbol_type) | GC_OLD_MARKED;
+    jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED);
     jl_atomic_store_relaxed(&sym->left, NULL);
     jl_atomic_store_relaxed(&sym->right, NULL);
     sym->hash = hash_symbol(str, len);
diff --git a/src/sys.c b/src/sys.c
index 2f512888c1873..7407da4e3514e 100644
--- a/src/sys.c
+++ b/src/sys.c
@@ -280,15 +280,16 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint
             return str;
         }
         a = jl_alloc_array_1d(jl_array_uint8_type, n - nchomp);
-        memcpy(jl_array_data(a), s->buf + s->bpos, n - nchomp);
+        memcpy(jl_array_data(a, uint8_t), s->buf + s->bpos, n - nchomp);
         s->bpos += n;
     }
     else {
         a = jl_alloc_array_1d(jl_array_uint8_type, 80);
         ios_t dest;
         ios_mem(&dest, 0);
-        ios_setbuf(&dest, (char*)a->data, 80, 0);
-        size_t n = ios_copyuntil(&dest, s, delim);
+        char *mem = jl_array_data(a, char);
+        ios_setbuf(&dest, (char*)mem, 80, 0);
+        size_t n = ios_copyuntil(&dest, s, delim, 1);
         if (chomp && n > 0 && dest.buf[n - 1] == delim) {
             n--;
             if (chomp == 2 && n > 0 && dest.buf[n - 1] == '\r') {
@@ -298,13 +299,11 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint
             assert(truncret == 0);
             (void)truncret; // ensure the variable is used to avoid warnings
         }
-        if (dest.buf != a->data) {
+        if (dest.buf != mem) {
             a = jl_take_buffer(&dest);
         }
         else {
-            a->length = n;
-            a->nrows = n;
-            ((char*)a->data)[n] = '\0';
+            a->dimsize[0] = n;
         }
         if (str) {
             JL_GC_PUSH1(&a);
@@ -316,6 +315,50 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint
     return (jl_value_t*)a;
 }
 
+// read up to buflen bytes, including delim, into buf.  returns number of bytes read.
+JL_DLLEXPORT size_t jl_readuntil_buf(ios_t *s, uint8_t delim, uint8_t *buf, size_t buflen)
+{
+    // manually inlined common case
+    size_t avail = (size_t)(s->size - s->bpos);
+    if (avail > buflen) avail = buflen;
+    char *pd = (char*)memchr(s->buf + s->bpos, delim, avail);
+    if (pd) {
+        size_t n = pd - (s->buf + s->bpos) + 1;
+        memcpy(buf, s->buf + s->bpos, n);
+        s->bpos += n;
+        return n;
+    }
+    else {
+        size_t total = avail;
+        memcpy(buf, s->buf + s->bpos, avail);
+        s->bpos += avail;
+        if (avail == buflen) return total;
+
+        // code derived from ios_copyuntil
+        while (!ios_eof(s)) {
+            avail = ios_readprep(s, 160); // read LINE_CHUNK_SIZE
+            if (avail == 0) break;
+            if (total+avail > buflen) avail = buflen-total;
+            char *pd = (char*)memchr(s->buf+s->bpos, delim, avail);
+            if (pd == NULL) {
+                memcpy(buf+total, s->buf+s->bpos, avail);
+                s->bpos += avail;
+                total += avail;
+                if (buflen == total) return total;
+            }
+            else {
+                size_t ntowrite = pd - (s->buf+s->bpos) + 1;
+                memcpy(buf+total, s->buf+s->bpos, ntowrite);
+                s->bpos += ntowrite;
+                total += ntowrite;
+                return total;
+            }
+        }
+        s->_eof = 1;
+        return total;
+    }
+}
+
 JL_DLLEXPORT int jl_ios_buffer_n(ios_t *s, const size_t n)
 {
     size_t space, ret;
@@ -517,28 +560,11 @@ JL_DLLEXPORT JL_STREAM *jl_stdin_stream(void)  { return JL_STDIN; }
 JL_DLLEXPORT JL_STREAM *jl_stdout_stream(void) { return JL_STDOUT; }
 JL_DLLEXPORT JL_STREAM *jl_stderr_stream(void) { return JL_STDERR; }
 
-// terminal workarounds
-JL_DLLEXPORT int jl_getch(void) JL_NOTSAFEPOINT
-{
+JL_DLLEXPORT int jl_termios_size(void) {
 #if defined(_OS_WINDOWS_)
-    // Windows has an actual `_getch()`, use that:
-    return _getch();
+    return 0;
 #else
-    // On all other platforms, we do the POSIX terminal manipulation dance
-    char c;
-    int r;
-    struct termios old_termios = {0};
-    struct termios new_termios = {0};
-    if (tcgetattr(0, &old_termios) != 0)
-        return -1;
-    new_termios = old_termios;
-    cfmakeraw(&new_termios);
-    if (tcsetattr(0, TCSADRAIN, &new_termios) != 0)
-        return -1;
-    r = read(0, &c, 1);
-    if (tcsetattr(0, TCSADRAIN, &old_termios) != 0)
-        return -1;
-    return r == 1 ? c : -1;
+    return sizeof(struct termios);
 #endif
 }
 
@@ -749,13 +775,12 @@ static _Atomic(uint64_t) g_rngseed;
 JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT
 {
     uint64_t max = UINT64_MAX;
-    uint64_t unbias = UINT64_MAX;
     uint64_t rngseed0 = jl_atomic_load_relaxed(&g_rngseed);
     uint64_t rngseed;
     uint64_t rnd;
     do {
         rngseed = rngseed0;
-        rnd = cong(max, unbias, &rngseed);
+        rnd = cong(max, &rngseed);
     } while (!jl_atomic_cmpswap_relaxed(&g_rngseed, &rngseed0, rngseed));
     return rnd;
 }
diff --git a/src/task.c b/src/task.c
index c12cb5a522099..8905b49e87f8e 100644
--- a/src/task.c
+++ b/src/task.c
@@ -42,15 +42,34 @@ extern "C" {
 #endif
 
 #if defined(_COMPILER_ASAN_ENABLED_)
-static inline void sanitizer_start_switch_fiber(const void* bottom, size_t size) {
-    __sanitizer_start_switch_fiber(NULL, bottom, size);
+#if __GLIBC__
+#include <dlfcn.h>
+// Bypass the ASAN longjmp wrapper - we are unpoisoning the stack ourselves,
+// since ASAN normally unpoisons far too much.
+// c.f. interceptor in jl_dlopen as well
+void (*real_siglongjmp)(jmp_buf _Buf, int _Value) = NULL;
+#endif
+static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_task_t *from, jl_task_t *to) {
+    if (to->copy_stack)
+        __sanitizer_start_switch_fiber(&from->ctx.asan_fake_stack, (char*)ptls->stackbase-ptls->stacksize, ptls->stacksize);
+    else
+        __sanitizer_start_switch_fiber(&from->ctx.asan_fake_stack, to->stkbuf, to->bufsz);
+}
+static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_task_t *to) {
+    if (to->copy_stack)
+        __sanitizer_start_switch_fiber(NULL, (char*)ptls->stackbase-ptls->stacksize, ptls->stacksize);
+    else
+        __sanitizer_start_switch_fiber(NULL, to->stkbuf, to->bufsz);
 }
-static inline void sanitizer_finish_switch_fiber(void) {
-    __sanitizer_finish_switch_fiber(NULL, NULL, NULL);
+static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *current) {
+    __sanitizer_finish_switch_fiber(current->ctx.asan_fake_stack, NULL, NULL);
+        //(const void**)&last->stkbuf,
+        //&last->bufsz);
 }
 #else
-static inline void sanitizer_start_switch_fiber(const void* bottom, size_t size) {}
-static inline void sanitizer_finish_switch_fiber(void) {}
+static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_task_t *from, jl_task_t *to) JL_NOTSAFEPOINT {}
+static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_task_t *to) JL_NOTSAFEPOINT {}
+static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *current) JL_NOTSAFEPOINT {}
 #endif
 
 #if defined(_COMPILER_TSAN_ENABLED_)
@@ -109,7 +128,11 @@ static inline void sanitizer_finish_switch_fiber(void) {}
 #define MINSTKSZ 131072
 #endif
 
+#ifdef _COMPILER_ASAN_ENABLED_
+#define ROOT_TASK_STACK_ADJUSTMENT 0
+#else
 #define ROOT_TASK_STACK_ADJUSTMENT 3000000
+#endif
 
 #ifdef JL_HAVE_ASYNCIFY
 // Switching logic is implemented in JavaScript
@@ -133,13 +156,62 @@ static int always_copy_stacks = 1;
 static int always_copy_stacks = 0;
 #endif
 
+#if defined(_COMPILER_ASAN_ENABLED_)
+extern void __asan_get_shadow_mapping(size_t *shadow_scale, size_t *shadow_offset);
+
+JL_NO_ASAN void *memcpy_noasan(void *dest, const void *src, size_t n) {
+  char *d = (char*)dest;
+  const char *s = (const char *)src;
+  for (size_t i = 0; i < n; ++i)
+    d[i] = s[i];
+  return dest;
+}
+
+JL_NO_ASAN void *memcpy_a16_noasan(uint64_t *dest, const uint64_t *src, size_t nb) {
+  uint64_t *end = (uint64_t*)((char*)src + nb);
+  while (src < end)
+    *(dest++) = *(src++);
+  return dest;
+}
+
+/* Copy stack are allocated as regular bigval objects and do no go through free_stack,
+   which would otherwise unpoison it before returning to the GC pool */
+static void asan_free_copy_stack(void *stkbuf, size_t bufsz) {
+    __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
+}
+#else
+static void asan_free_copy_stack(void *stkbuf, size_t bufsz) {}
+#endif
+
 #ifdef COPY_STACKS
-static void memcpy_a16(uint64_t *to, uint64_t *from, size_t nb)
+static void JL_NO_ASAN JL_NO_MSAN memcpy_stack_a16(uint64_t *to, uint64_t *from, size_t nb)
 {
+#if defined(_COMPILER_ASAN_ENABLED_)
+    /* Asan keeps shadow memory for everything on the stack. However, in general,
+       this function may touch invalid portions of the stack, since it just moves
+       the stack around. To keep ASAN's stack tracking capability intact, we need
+       to move the shadow memory along with the stack memory itself. */
+    size_t shadow_offset;
+    size_t shadow_scale;
+    __asan_get_shadow_mapping(&shadow_scale, &shadow_offset);
+    uintptr_t from_addr = (((uintptr_t)from) >> shadow_scale) + shadow_offset;
+    uintptr_t to_addr = (((uintptr_t)to) >> shadow_scale) + shadow_offset;
+    // Make sure that the shadow scale is compatible with the alignment, so
+    // we can copy whole bytes.
+    assert(shadow_scale <= 4);
+    size_t shadow_nb = nb >> shadow_scale;
+    // Copy over the shadow memory
+    memcpy_noasan((char*)to_addr, (char*)from_addr, shadow_nb);
+    memcpy_a16_noasan(jl_assume_aligned(to, 16), jl_assume_aligned(from, 16), nb);
+#elif defined(_COMPILER_MSAN_ENABLED_)
+# warning This function is imcompletely implemented for MSAN (TODO).
+    memcpy((char*)jl_assume_aligned(to, 16), (char*)jl_assume_aligned(from, 16), nb);
+#else
     memcpy((char*)jl_assume_aligned(to, 16), (char*)jl_assume_aligned(from, 16), nb);
     //uint64_t *end = (uint64_t*)((char*)from + nb);
     //while (from < end)
     //    *(to++) = *(from++);
+#endif
 }
 
 static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt)
@@ -150,6 +222,7 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt
     size_t nb = stackbase - frame_addr;
     void *buf;
     if (lastt->bufsz < nb) {
+        asan_free_copy_stack(lastt->stkbuf, lastt->bufsz);
         buf = (void*)jl_gc_alloc_buf(ptls, nb);
         lastt->stkbuf = buf;
         lastt->bufsz = nb;
@@ -160,14 +233,14 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt
     *pt = NULL; // clear the gc-root for the target task before copying the stack for saving
     lastt->copy_stack = nb;
     lastt->sticky = 1;
-    memcpy_a16((uint64_t*)buf, (uint64_t*)frame_addr, nb);
+    memcpy_stack_a16((uint64_t*)buf, (uint64_t*)frame_addr, nb);
     // this task's stack could have been modified after
     // it was marked by an incremental collection
     // move the barrier back instead of walking it again here
     jl_gc_wb_back(lastt);
 }
 
-static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_t ptls, char *p)
+JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_t ptls, char *p)
 {
     size_t nb = t->copy_stack;
     char *_x = (char*)ptls->stackbase - nb;
@@ -181,9 +254,8 @@ static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_t ptls, cha
     }
     void *_y = t->stkbuf;
     assert(_x != NULL && _y != NULL);
-    memcpy_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
+    memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
 
-    sanitizer_start_switch_fiber(t->stkbuf, t->bufsz);
 #if defined(_OS_WINDOWS_)
     jl_setcontext(&t->ctx.copy_ctx);
 #else
@@ -192,14 +264,14 @@ static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_t ptls, cha
     abort(); // unreachable
 }
 
-static void restore_stack2(jl_task_t *t, jl_ptls_t ptls, jl_task_t *lastt)
+JL_NO_ASAN static void restore_stack2(jl_task_t *t, jl_ptls_t ptls, jl_task_t *lastt)
 {
     assert(t->copy_stack && !lastt->copy_stack);
     size_t nb = t->copy_stack;
     char *_x = (char*)ptls->stackbase - nb;
     void *_y = t->stkbuf;
     assert(_x != NULL && _y != NULL);
-    memcpy_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
+    memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
 #if defined(JL_HAVE_UNW_CONTEXT)
     volatile int returns = 0;
     int r = unw_getcontext(&lastt->ctx.ctx);
@@ -213,7 +285,6 @@ static void restore_stack2(jl_task_t *t, jl_ptls_t ptls, jl_task_t *lastt)
 #else
 #error COPY_STACKS is incompatible with this platform
 #endif
-    sanitizer_start_switch_fiber(t->stkbuf, t->bufsz);
     tsan_switch_to_copyctx(&t->ctx);
 #if defined(_OS_WINDOWS_)
     jl_setcontext(&t->ctx.copy_ctx);
@@ -235,8 +306,10 @@ void JL_NORETURN jl_finish_task(jl_task_t *t)
         jl_atomic_store_release(&t->_state, JL_TASK_STATE_FAILED);
     else
         jl_atomic_store_release(&t->_state, JL_TASK_STATE_DONE);
-    if (t->copy_stack) // early free of stkbuf
+    if (t->copy_stack) { // early free of stkbuf
+        asan_free_copy_stack(t->stkbuf, t->bufsz);
         t->stkbuf = NULL;
+    }
     // ensure that state is cleared
     ct->ptls->in_finalizer = 0;
     ct->ptls->in_pure_callback = 0;
@@ -254,7 +327,7 @@ void JL_NORETURN jl_finish_task(jl_task_t *t)
             jl_apply(args, 2);
         }
         JL_CATCH {
-            jl_no_exc_handler(jl_current_exception());
+            jl_no_exc_handler(jl_current_exception(), ct);
         }
     }
     jl_gc_debug_critical_error();
@@ -265,7 +338,8 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid
 {
     size_t off = 0;
 #ifndef _OS_WINDOWS_
-    if (jl_all_tls_states[0]->root_task == task) {
+    jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
+    if (ptls0->root_task == task) {
         // See jl_init_root_task(). The root task of the main thread
         // has its buffer enlarged by an artificial 3000000 bytes, but
         // that means that the start of the buffer usually points to
@@ -306,7 +380,8 @@ JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
     else if (task->stkbuf) {
         *total_start = *active_start = (char*)task->stkbuf;
 #ifndef _OS_WINDOWS_
-        if (jl_all_tls_states[0]->root_task == task) {
+        jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
+        if (ptls0->root_task == task) {
             // See jl_init_root_task(). The root task of the main thread
             // has its buffer enlarged by an artificial 3000000 bytes, but
             // that means that the start of the buffer usually points to
@@ -362,7 +437,7 @@ JL_DLLEXPORT jl_task_t *jl_get_next_task(void) JL_NOTSAFEPOINT
 const char tsan_state_corruption[] = "TSAN state corrupted. Exiting HARD!\n";
 #endif
 
-static void ctx_switch(jl_task_t *lastt)
+JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
 {
     jl_ptls_t ptls = lastt->ptls;
     jl_task_t **pt = &ptls->next_task;
@@ -406,6 +481,7 @@ static void ctx_switch(jl_task_t *lastt)
     if (killed) {
         *pt = NULL; // can't fail after here: clear the gc-root for the target task now
         lastt->gcstack = NULL;
+        lastt->eh = NULL;
         if (!lastt->copy_stack && lastt->stkbuf) {
             // early free of stkbuf back to the pool
             jl_release_task_stack(ptls, lastt);
@@ -416,7 +492,7 @@ static void ctx_switch(jl_task_t *lastt)
         if (lastt->copy_stack) { // save the old copy-stack
             save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail)
             if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0)) {
-                sanitizer_finish_switch_fiber();
+                sanitizer_finish_switch_fiber(ptls->previous_task, jl_atomic_load_relaxed(&ptls->current_task));
                 // TODO: mutex unlock the thread we just switched from
                 return;
             }
@@ -441,12 +517,25 @@ static void ctx_switch(jl_task_t *lastt)
     if (t->started) {
 #ifdef COPY_STACKS
         if (t->copy_stack) {
-            if (!killed && !lastt->copy_stack)
+            if (lastt->copy_stack) {
+                // Switching from copystack to copystack. Clear any shadow stack
+                // memory above the saved shadow stack.
+                uintptr_t stacktop = (uintptr_t)ptls->stackbase - t->copy_stack;
+                uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
+                if (stackbottom < stacktop)
+                    asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+            }
+            if (!killed && !lastt->copy_stack) {
+                sanitizer_start_switch_fiber(ptls, lastt, t);
                 restore_stack2(t, ptls, lastt);
-            else {
+            } else {
                 tsan_switch_to_copyctx(&t->ctx);
-                if (killed)
+                if (killed) {
+                    sanitizer_start_switch_fiber_killed(ptls, t);
                     tsan_destroy_copyctx(ptls, &lastt->ctx);
+                } else {
+                    sanitizer_start_switch_fiber(ptls, lastt, t);
+                }
 
                 if (lastt->copy_stack) {
                     restore_stack(t, ptls, NULL); // (doesn't return)
@@ -459,14 +548,26 @@ static void ctx_switch(jl_task_t *lastt)
         else
 #endif
         {
-            sanitizer_start_switch_fiber(t->stkbuf, t->bufsz);
+            if (lastt->copy_stack) {
+                // Switching away from a copystack to a non-copystack. Clear
+                // the whole shadow stack now, because otherwise we won't know
+                // how much stack memory to clear the next time we switch to
+                // a copystack.
+                uintptr_t stacktop = (uintptr_t)ptls->stackbase;
+                uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
+                // We're not restoring the stack, but we still need to unpoison the
+                // stack, so it starts with a pristine stack.
+                asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+            }
             if (killed) {
+                sanitizer_start_switch_fiber_killed(ptls, t);
                 tsan_switch_to_ctx(&t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
                 jl_set_fiber(&t->ctx); // (doesn't return)
                 abort(); // unreachable
             }
             else {
+                sanitizer_start_switch_fiber(ptls, lastt, t);
                 if (lastt->copy_stack) {
                     // Resume at the jl_setjmp earlier in this function,
                     // don't do a full task swap
@@ -480,11 +581,20 @@ static void ctx_switch(jl_task_t *lastt)
         }
     }
     else {
-        sanitizer_start_switch_fiber(t->stkbuf, t->bufsz);
+        if (lastt->copy_stack) {
+            uintptr_t stacktop = (uintptr_t)ptls->stackbase;
+            uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
+            // We're not restoring the stack, but we still need to unpoison the
+            // stack, so it starts with a pristine stack.
+            asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+        }
         if (t->copy_stack && always_copy_stacks) {
             tsan_switch_to_ctx(&t->ctx);
             if (killed) {
+                sanitizer_start_switch_fiber_killed(ptls, t);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
+            } else {
+                sanitizer_start_switch_fiber(ptls, lastt, t);
             }
 #ifdef COPY_STACKS
 #if defined(_OS_WINDOWS_)
@@ -497,12 +607,14 @@ static void ctx_switch(jl_task_t *lastt)
         }
         else {
             if (killed) {
+                sanitizer_start_switch_fiber_killed(ptls, t);
                 tsan_switch_to_ctx(&t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
                 jl_start_fiber_set(&t->ctx); // (doesn't return)
                 abort();
             }
-            else if (lastt->copy_stack) {
+            sanitizer_start_switch_fiber(ptls, lastt, t);
+            if (lastt->copy_stack) {
                 // Resume at the jl_setjmp earlier in this function
                 tsan_switch_to_ctx(&t->ctx);
                 jl_start_fiber_set(&t->ctx); // (doesn't return)
@@ -513,10 +625,10 @@ static void ctx_switch(jl_task_t *lastt)
             }
         }
     }
-    sanitizer_finish_switch_fiber();
+    sanitizer_finish_switch_fiber(ptls->previous_task, jl_atomic_load_relaxed(&ptls->current_task));
 }
 
-JL_DLLEXPORT void jl_switch(void)
+JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
 {
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
@@ -524,6 +636,7 @@ JL_DLLEXPORT void jl_switch(void)
     if (t == ct) {
         return;
     }
+    int8_t gc_state = jl_gc_unsafe_enter(ptls);
     if (t->started && t->stkbuf == NULL)
         jl_error("attempt to switch to exited task");
     if (ptls->in_finalizer)
@@ -537,17 +650,10 @@ JL_DLLEXPORT void jl_switch(void)
 
     // Store old values on the stack and reset
     sig_atomic_t defer_signal = ptls->defer_signal;
-    int8_t gc_state = jl_gc_unsafe_enter(ptls);
     int finalizers_inhibited = ptls->finalizers_inhibited;
     ptls->finalizers_inhibited = 0;
 
-#ifdef ENABLE_TIMINGS
-    jl_timing_block_t *blk = ptls->timing_stack;
-    if (blk)
-        jl_timing_block_stop(blk);
-    ptls->timing_stack = NULL;
-#endif
-
+    jl_timing_block_t *blk = jl_timing_block_task_exit(ct, ptls);
     ctx_switch(ct);
 
 #ifdef MIGRATE_TASKS
@@ -567,32 +673,24 @@ JL_DLLEXPORT void jl_switch(void)
            0 != ct->ptls &&
            0 == ptls->finalizers_inhibited);
     ptls->finalizers_inhibited = finalizers_inhibited;
+    jl_timing_block_task_enter(ct, ptls, blk); (void)blk;
 
-#ifdef ENABLE_TIMINGS
-    assert(ptls->timing_stack == NULL);
-    ptls->timing_stack = blk;
-    if (blk)
-        jl_timing_block_start(blk);
-#else
-    (void)ct;
-#endif
-
-    jl_gc_unsafe_leave(ptls, gc_state);
     sig_atomic_t other_defer_signal = ptls->defer_signal;
     ptls->defer_signal = defer_signal;
     if (other_defer_signal && !defer_signal)
         jl_sigint_safepoint(ptls);
 
     JL_PROBE_RT_RUN_TASK(ct);
+    jl_gc_unsafe_leave(ptls, gc_state);
 }
 
-JL_DLLEXPORT void jl_switchto(jl_task_t **pt)
+JL_DLLEXPORT void jl_switchto(jl_task_t **pt) JL_NOTSAFEPOINT_ENTER // n.b. this does not actually enter a safepoint
 {
     jl_set_next_task(*pt);
     jl_switch();
 }
 
-JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e)
+JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct)
 {
     // NULL exception objects are used when rethrowing. we don't have a handler to process
     // the exception stack, so at least report the exception at the top of the stack.
@@ -603,41 +701,67 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e)
     jl_static_show((JL_STREAM*)STDERR_FILENO, e);
     jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
     jlbacktrace(); // written to STDERR_FILENO
+    if (ct == NULL)
+        jl_raise(6);
     jl_exit(1);
 }
 
-// yield to exception handler
-static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_MAYBE_UNROOTED)
-{
-    assert(!jl_get_safe_restore());
-    jl_ptls_t ptls = ct->ptls;
-    ptls->io_wait = 0;
-    JL_GC_PUSH1(&exception);
-    jl_gc_unsafe_enter(ptls);
-    if (exception) {
-        // The temporary ptls->bt_data is rooted by special purpose code in the
-        // GC. This exists only for the purpose of preserving bt_data until we
-        // set ptls->bt_size=0 below.
-        jl_push_excstack(&ct->excstack, exception,
-                          ptls->bt_data, ptls->bt_size);
-        ptls->bt_size = 0;
-    }
-    assert(ct->excstack && ct->excstack->top);
-    jl_handler_t *eh = ct->eh;
-    if (eh != NULL) {
+/* throw_internal - yield to exception handler */
+
 #ifdef ENABLE_TIMINGS
-        jl_timing_block_t *cur_block = ptls->timing_stack;
-        while (cur_block && eh->timing_stack != cur_block) {
-            cur_block = jl_pop_timing_block(cur_block);
-        }
+#define pop_timings_stack()                                                    \
+        jl_timing_block_t *cur_block = ptls->timing_stack;                     \
+        while (cur_block && eh->timing_stack != cur_block) {                   \
+            cur_block = jl_timing_block_pop(cur_block);                        \
+        }                                                                      \
         assert(cur_block == eh->timing_stack);
+#else
+#define pop_timings_stack() /* Nothing */
 #endif
-        jl_longjmp(eh->eh_ctx, 1);
-    }
-    else {
-        jl_no_exc_handler(exception);
-    }
+
+#define throw_internal_body(altstack)                                          \
+    assert(!jl_get_safe_restore());                                            \
+    jl_ptls_t ptls = ct->ptls;                                                 \
+    ptls->io_wait = 0;                                                         \
+    jl_gc_unsafe_enter(ptls);                                                  \
+    if (exception) {                                                           \
+        /* The temporary ptls->bt_data is rooted by special purpose code in the\
+           GC. This exists only for the purpose of preserving bt_data until we \
+           set ptls->bt_size=0 below. */                                       \
+        jl_push_excstack(ct, &ct->excstack, exception,                             \
+                          ptls->bt_data, ptls->bt_size);                       \
+        ptls->bt_size = 0;                                                     \
+    }                                                                          \
+    assert(ct->excstack && ct->excstack->top);                                 \
+    jl_handler_t *eh = ct->eh;                                                 \
+    if (eh != NULL) {                                                          \
+        if (altstack) ptls->sig_exception = NULL;                              \
+        pop_timings_stack()                                                    \
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);                             \
+        jl_longjmp(eh->eh_ctx, 1);                                             \
+    }                                                                          \
+    else {                                                                     \
+        jl_no_exc_handler(exception, ct);                                      \
+    }                                                                          \
     assert(0);
+
+static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_MAYBE_UNROOTED)
+{
+CFI_NORETURN
+    JL_GC_PUSH1(&exception);
+    throw_internal_body(0);
+    jl_unreachable();
+}
+
+/* On the signal stack, we don't want to create any asan frames, but we do on the
+   normal, stack, so we split this function in two, depending on which context
+   we're calling it in. This also lets us avoid making a GC frame on the altstack,
+   which might end up getting corrupted if we recur here through another signal. */
+JL_NO_ASAN static void JL_NORETURN throw_internal_altstack(jl_task_t *ct, jl_value_t *exception)
+{
+CFI_NORETURN
+    throw_internal_body(1);
+    jl_unreachable();
 }
 
 // record backtrace and raise an error
@@ -645,11 +769,13 @@ JL_DLLEXPORT void jl_throw(jl_value_t *e JL_MAYBE_UNROOTED)
 {
     assert(e != NULL);
     jl_jmp_buf *safe_restore = jl_get_safe_restore();
-    if (safe_restore)
-        jl_longjmp(*safe_restore, 1);
     jl_task_t *ct = jl_get_current_task();
-    if (ct == NULL) // During startup
-        jl_no_exc_handler(e);
+    if (safe_restore) {
+        asan_unpoison_task_stack(ct, safe_restore);
+        jl_longjmp(*safe_restore, 1);
+    }
+    if (ct == NULL) // During startup, or on other threads
+        jl_no_exc_handler(e, ct);
     record_backtrace(ct->ptls, 1);
     throw_internal(ct, e);
 }
@@ -667,17 +793,19 @@ JL_DLLEXPORT void jl_rethrow(void)
 // Special case throw for errors detected inside signal handlers.  This is not
 // (cannot be) called directly in the signal handler itself, but is returned to
 // after the signal handler exits.
-JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void)
+JL_DLLEXPORT JL_NO_ASAN void JL_NORETURN jl_sig_throw(void)
 {
 CFI_NORETURN
     jl_jmp_buf *safe_restore = jl_get_safe_restore();
-    if (safe_restore)
-        jl_longjmp(*safe_restore, 1);
     jl_task_t *ct = jl_current_task;
+    if (safe_restore) {
+        asan_unpoison_task_stack(ct, safe_restore);
+        jl_longjmp(*safe_restore, 1);
+    }
     jl_ptls_t ptls = ct->ptls;
     jl_value_t *e = ptls->sig_exception;
-    ptls->sig_exception = NULL;
-    throw_internal(ct, e);
+    JL_GC_PROMISE_ROOTED(e);
+    throw_internal_altstack(ct, e);
 }
 
 JL_DLLEXPORT void jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED)
@@ -703,7 +831,7 @@ JL_DLLEXPORT void jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED)
    ACM Trans. Math. Softw., 2021.
 
    There is a pure Julia implementation in stdlib that tends to be faster when used from
-   within Julia, due to inlining and more agressive architecture-specific optimizations.
+   within Julia, due to inlining and more aggressive architecture-specific optimizations.
 */
 uint64_t jl_genrandom(uint64_t rngState[4]) JL_NOTSAFEPOINT
 {
@@ -729,34 +857,244 @@ uint64_t jl_genrandom(uint64_t rngState[4]) JL_NOTSAFEPOINT
     return res;
 }
 
-static void rng_split(jl_task_t *from, jl_task_t *to) JL_NOTSAFEPOINT
+/*
+The jl_rng_split function forks a task's RNG state in a way that is essentially
+guaranteed to avoid collisions between the RNG streams of all tasks. The main
+RNG is the xoshiro256++ RNG whose state is stored in rngState[0..3]. There is
+also a small internal RNG used for task forking stored in rngState[4]. This
+state is used to iterate a linear congruential generator (LCG), which is then
+put through four different variations of the strongest PCG output function,
+referred to as PCG-RXS-M-XS-64 [1]. This output function is invertible: it maps
+a 64-bit state to 64-bit output. This is one of the reasons it's not recommended
+for general purpose RNGs unless space is at an absolute premium, but in our
+usage invertibility is actually a benefit (as is explained below) and adding as
+little additional memory overhead to each task object as possible is preferred.
+
+The goal of jl_rng_split is to perturb the state of each child task's RNG in
+such a way each that for an entire tree of tasks spawned starting with a given
+state in a root task, no two tasks have the same RNG state. Moreover, we want to
+do this in a way that is deterministic and repeatable based on (1) the root
+task's seed, (2) how many random numbers are generated, and (3) the task tree
+structure. The RNG state of a parent task is allowed to affect the initial RNG
+state of a child task, but the mere fact that a child was spawned should not
+alter the RNG output of the parent. This second requirement rules out using the
+main RNG to seed children: if we use the main RNG, we either advance it, which
+affects the parent's RNG stream or, if we don't advance it, then every child
+would have an identical RNG stream. Therefore some separate state must be
+maintained and changed upon forking a child task while leaving the main RNG
+state unchanged.
+
+The basic approach is that used by the DotMix [2] and SplitMix [3] RNG systems:
+each task is uniquely identified by a sequence of "pedigree" numbers, indicating
+where in the task tree it was spawned. This vector of pedigree coordinates is
+then reduced to a single value by computing a dot product with a shared vector
+of random weights. The weights are common but each pedigree of each task is
+distinct, so the dot product of each task is unlikely to be the same. The DotMix
+paper provides a proof that this dot product hash value (referred to as a
+"compression function") is collision resistant in the sense the the pairwise
+collision probability of two distinct tasks is 1/N where N is the number of
+possible weight values. Both DotMix and SplitMix use a prime value of N because
+the proof requires that the difference between two distinct pedigree coordinates
+have a multiplicative inverse, which is guaranteed by N being prime since all
+values are invertible then. We take a somewhat different approach: instead of
+assigning n-ary pedigree coordinates, we assign binary tree coordinates to
+tasks, which means that our pedigree vectors have only 0/1 and differences
+between them can only be -1, 0 or 1. Since the only possible non-zero coordinate
+differences are ±1 which are invertible regardless of the modulus, we can use a
+modulus of 2^64, which is far easier and more efficient then using a prime
+modulus. It also means that when accumulating the dot product incrementally, as
+described in SplitMix, we don't need to multiply weights by anything, we simply
+add the random weight for the current task tree depth to the parent's dot
+product to derive the child's dot product.
+
+we instead limit pedigree coordinates to being binary, guaranteeing
+invertibility regardless of modulus. When a task spawns a child, the parent and
+child share the parent's previous pedigree prefix and the parent appends a zero
+to its coordinates, which doesn't affect the task's dot product value, while the
+child appends a one, which does produce a new dot product. In this manner a
+binary pedigree vector uniquely identifies each task and since the coordinates
+are binary, the difference between coordinates is always invertible: 1 and -1
+are their own multiplicative inverses regardless of the modulus.
+
+How does our assignment of pedigree coordinates to tasks differ from DotMix and
+SplitMix? In DotMix and SplitMix, each task has a fixed pedigree vector that
+never changes. The root tasks's pedigree is `()`, its first child's pedigree is
+`(0,)`, its second child's pedigree is `(2,)` and so on. The length of a task's
+pedigree tuple corresponds to how many ancestors tasks it has. Our approach
+instead appends 0 to the parent's pedigree when it forks a child and appends 1
+to the child's pedigree at the same time. The root task starts with a pedigree
+of `()` as before, but when it spawns a child, we update its pedigree to `(0,)`
+and give its child a pedigree of `(1,)`. When the root task then spawns a second
+child, we update its pedigree to `(0,0)` and give it's second child a pedigree
+of `(0,1)`. If the first child spawns a grandchild, the child's pedigree is
+changed from `(1,)` to `(1,0)` and the grandchild is assigned a pedigree of
+`(1,1)`. In other words, DotMix and SplitMix build an n-ary tree where every
+node is a task: parent nodes are higher up the tree and child tasks are children
+in the pedigree tree. Our approach is to build a binary tree where only leaves
+are tasks and each task spawn replaces a leaf in the tree with two leaves: the
+parent moves to the left/zero leaf while the child is the right/one leaf. Since
+the tree is binary, the pedigree coordinates are binary.
+
+It may seem odd for a task's pedigree coordinates to change, but note that we
+only ever append zeros to a task's pedigree, which does not change its dot
+product. So while the pedigree changes, the dot product is fixed. What purpose
+does appending zeros like this serve if the task's dot product doesn't change?
+Changing the pedigree length (which is also the binary tree depth) ensures that
+the next child spawned by that task will have new and different dot product from
+the previous child since it will have a different pseudo-random weight added to
+the parent's dot product value. Whereas the pedigree length in DotMix and
+SplitMix is unchanging and corresponds to how many ancestors a task has, in our
+scheme the pedigree length corresponds to the number of ancestors *plus*
+children a task has, which increases every time it spawns another child.
+
+We use the LCG in rngState[4] to generate pseudorandom weights for the dot
+product. Each time a child is forked, we update the LCG in both parent and child
+tasks. In the parent, that's all we have to do -- the main RNG state remains
+unchanged. (Recall that spawning a child should *not* affect subsequent RNG
+draws in the parent). The next time the parent forks a child, the dot product
+weight used will be different, corresponding to being a level deeper in the
+pedigree tree. In the child, we use the LCG state to generate four pseudorandom
+64-bit weights (more below) and add each weight to one of the xoshiro256 state
+registers, rngState[0..3]. If we assume the main RNG remains unused in all
+tasks, then each register rngState[0..3] accumulates a different dot product
+hash as additional child tasks are spawned. Each one is collision resistant with
+a pairwise collision chance of only 1/2^64. Assuming that the four pseudorandom
+64-bit weight streams are sufficiently independent, the pairwise collision
+probability for distinct tasks is 1/2^256. If we somehow managed to spawn a
+trillion tasks, the probability of a collision would be on the order of 1/10^54.
+In other words, practically impossible. Put another way, this is the same as the
+probability of two SHA256 hash values accidentally colliding, which we generally
+consider so unlikely as not to be worth worrying about.
+
+What about the random "junk" that's in the xoshiro256 state registers from
+normal use of the RNG? For a tree of tasks spawned with no intervening samples
+taken from the main RNG, all tasks start with the same junk which doesn't affect
+the chance of collision. The Dot/SplitMix papers even suggest adding a random
+base value to the dot product, so we can consider whatever happens to be in the
+xoshiro256 registers to be that. What if the main RNG gets used between task
+forks? In that case, the initial state registers will be different. The DotMix
+collision resistance proof doesn't apply without modification, but we can
+generalize the setup by adding a different base constant to each compression
+function and observe that we still have a 1/N chance of the weight value
+matching that exact difference. This proves collision resistance even between
+tasks whose dot product hashes are computed with arbitrary offsets. We can
+conclude that this scheme provides collision resistance even in the face of
+different starting states of the main RNG. Does this seem too good to be true?
+Perhaps another way of thinking about it will help. Suppose we seeded each task
+completely randomly. Then there would also be a 1/2^256 chance of collision,
+just as the DotMix proof gives. Essentially what the proof is telling us is that
+if the weights are chosen uniformly and uncorrelated with the rest of the
+compression function, then the dot product construction is a good enough way to
+pseudorandomly seed each task based on its parent's RNG state and where in the
+task tree it lives. From that perspective, all we need to believe is that the
+dot product construction is random enough (assuming the weights are), and it
+becomes easier to believe that adding an arbitrary constant to each dot product
+value doesn't make its randomness any worse.
+
+This leaves us with the question of how to generate four pseudorandom weights to
+add to the rngState[0..3] registers at each depth of the task tree. The scheme
+used here is that a single 64-bit LCG state is iterated in both parent and child
+at each task fork, and four different variations of the PCG-RXS-M-XS-64 output
+function are applied to that state to generate four different pseudorandom
+weights. Another obvious way to generate four weights would be to iterate the
+LCG four times per task split. There are two main reasons we've chosen to use
+four output variants instead:
+
+1. Advancing four times per fork reduces the set of possible weights that each
+   register can be perturbed by from 2^64 to 2^60. Since collision resistance is
+   proportional to the number of possible weight values, that would reduce
+   collision resistance. While it would still be strong engough, why reduce it?
+
+2. It's easier to compute four PCG output variants in parallel. Iterating the
+   LCG is inherently sequential. PCG variants can be computed independently. All
+   four can even be computed at once with SIMD vector instructions. The C
+   compiler doesn't currently choose to do that transformation, but it could.
+
+A key question is whether the approach of using four variations of PCG-RXS-M-XS
+is sufficiently random both within and between streams to provide the collision
+resistance we expect. We obviously can't test that with 256 bits, but we have
+tested it with a reduced state analogue using four PCG-RXS-M-XS-8 output
+variations applied to a common 8-bit LCG. Test results do indicate sufficient
+independence: a single register has collisions at 2^5 while four registers only
+start having collisions at 2^20. This is actually better scaling of collision
+resistance than we theoretically expect. In theory, with one byte of resistance
+we have a 50% chance of some collision at 20 tasks, which matches what we see,
+but four bytes should give a 50% chance of collision at 2^17 tasks and our
+reduced size analogue construction remains collision free at 2^19 tasks. This
+may be due to the next observation, which is that the way we generate
+pseudorandom weights actually guarantees collision avoidance in many common
+situations rather than merely providing collision resistance and thus is better
+than true randomness.
+
+In the specific case where a parent task spawns a sequence of child tasks with
+no intervening usage of its main RNG, the parent and child tasks are actually
+_guaranteed_ to have different RNG states. This is true because the four PCG
+streams each produce every possible 2^64 bit output exactly once in the full
+2^64 period of the LCG generator. This is considered a weakness of PCG-RXS-M-XS
+when used as a general purpose RNG, but is quite beneficial in this application.
+Since each of up to 2^64 children will be perturbed by different weights, they
+cannot have hash collisions. What about parent colliding with child? That can
+only happen if all four main RNG registers are perturbed by exactly zero. This
+seems unlikely, but could it occur? Consider the core of the output function:
+
+    p ^= p >> ((p >> 59) + 5);
+    p *= m[i];
+    p ^= p >> 43
+
+It's easy to check that this maps zero to zero. An unchanged parent RNG can only
+happen if all four `p` values are zero at the end of this, which implies that
+they were all zero at the beginning. However, that is impossible since the four
+`p` values differ from `x` by different additive constants, so they cannot all
+be zero. Stated more generally, this non-collision property: assuming the main
+RNG isn't used between task forks, sibling and parent tasks cannot have RNG
+collisions. If the task tree structure is more deeply nested or if there are
+intervening uses of the main RNG, we're back to relying on "merely" 256 bits of
+collision resistance, but it's nice to know that in what is likely the most
+common case, RNG collisions are actually impossible. This fact may also explain
+better-than-theoretical collision resistance observed in our experiment with a
+reduced size analogue of our hashing system.
+
+[1]: https://www.pcg-random.org/pdf/hmc-cs-2014-0905.pdf
+
+[2]: http://supertech.csail.mit.edu/papers/dprng.pdf
+
+[3]: https://gee.cs.oswego.edu/dl/papers/oopsla14.pdf
+*/
+void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT
 {
-    /* TODO: consider a less ad-hoc construction
-       Ideally we could just use the output of the random stream to seed the initial
-       state of the child. Out of an overabundance of caution we multiply with
-       effectively random coefficients, to break possible self-interactions.
-
-       It is not the goal to mix bits -- we work under the assumption that the
-       source is well-seeded, and its output looks effectively random.
-       However, xoshiro has never been studied in the mode where we seed the
-       initial state with the output of another xoshiro instance.
-
-       Constants have nothing up their sleeve:
-       0x02011ce34bce797f == hash(UInt(1))|0x01
-       0x5a94851fb48a6e05 == hash(UInt(2))|0x01
-       0x3688cf5d48899fa7 == hash(UInt(3))|0x01
-       0x867b4bb4c42e5661 == hash(UInt(4))|0x01
-    */
-    to->rngState[0] = 0x02011ce34bce797f * jl_genrandom(from->rngState);
-    to->rngState[1] = 0x5a94851fb48a6e05 * jl_genrandom(from->rngState);
-    to->rngState[2] = 0x3688cf5d48899fa7 * jl_genrandom(from->rngState);
-    to->rngState[3] = 0x867b4bb4c42e5661 * jl_genrandom(from->rngState);
+    // load and advance the internal LCG state
+    uint64_t x = src[4];
+    src[4] = dst[4] = x * 0xd1342543de82ef95 + 1;
+    // high spectrum multiplier from https://arxiv.org/abs/2001.05304
+
+    static const uint64_t a[4] = {
+        0xe5f8fa077b92a8a8, // random additive offsets...
+        0x7a0cd918958c124d,
+        0x86222f7d388588d4,
+        0xd30cbd35f2b64f52
+    };
+    static const uint64_t m[4] = {
+        0xaef17502108ef2d9, // standard PCG multiplier
+        0xf34026eeb86766af, // random odd multipliers...
+        0x38fd70ad58dd9fbb,
+        0x6677f9b93ab0c04d
+    };
+
+    // PCG-RXS-M-XS-64 output with four variants
+    for (int i = 0; i < 4; i++) {
+        uint64_t p = x + a[i];
+        p ^= p >> ((p >> 59) + 5);
+        p *= m[i];
+        p ^= p >> 43;
+        dst[i] = src[i] + p; // SplitMix dot product
+    }
 }
 
 JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion_future, size_t ssize)
 {
     jl_task_t *ct = jl_current_task;
     jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type);
+    jl_set_typetagof(t, jl_task_tag, 0);
     JL_PROBE_RT_NEW_TASK(ct, t);
     t->copy_stack = 0;
     if (ssize == 0) {
@@ -787,10 +1125,10 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     t->result = jl_nothing;
     t->donenotify = completion_future;
     jl_atomic_store_relaxed(&t->_isexception, 0);
-    // Inherit logger state from parent task
-    t->logstate = ct->logstate;
+    // Inherit scope from parent task
+    t->scope = ct->scope;
     // Fork task-local random state from parent
-    rng_split(ct, t);
+    jl_rng_split(t->rngState, ct->rngState);
     // there is no active exception handler available on this stack yet
     t->eh = NULL;
     t->sticky = 1;
@@ -802,6 +1140,8 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     t->threadpoolid = ct->threadpoolid;
     t->ptls = NULL;
     t->world_age = ct->world_age;
+    t->reentrant_timing = 0;
+    jl_timing_task_init(t);
 
 #ifdef COPY_STACKS
     if (!t->copy_stack) {
@@ -818,6 +1158,9 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
 #endif
 #ifdef _COMPILER_TSAN_ENABLED_
     t->ctx.tsan_state = __tsan_create_fiber(0);
+#endif
+#ifdef _COMPILER_ASAN_ENABLED_
+    t->ctx.asan_fake_stack = NULL;
 #endif
     return t;
 }
@@ -890,13 +1233,44 @@ void jl_init_tasks(void) JL_GC_DISABLED
         exit(1);
     }
 #endif
+#if defined(_COMPILER_ASAN_ENABLED_) && __GLIBC__
+    void *libc_handle = dlopen("libc.so.6", RTLD_NOW | RTLD_NOLOAD);
+    if (libc_handle) {
+        *(void**)&real_siglongjmp = dlsym(libc_handle, "siglongjmp");
+        dlclose(libc_handle);
+    }
+    if (real_siglongjmp == NULL) {
+        jl_safe_printf("failed to get real siglongjmp\n");
+        exit(1);
+    }
+#endif
 }
 
-STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void)
+#if defined(_COMPILER_ASAN_ENABLED_)
+STATIC_OR_JS void NOINLINE JL_NORETURN _start_task(void);
+#endif
+
+STATIC_OR_JS void NOINLINE JL_NORETURN JL_NO_ASAN start_task(void)
+{
+CFI_NORETURN
+#if defined(_COMPILER_ASAN_ENABLED_)
+    // First complete the fiber switch, otherwise ASAN will be confused
+    // when it unpoisons the stack in _start_task
+#ifdef __clang_gcanalyzer__
+    jl_task_t *ct = jl_get_current_task();
+#else
+    jl_task_t *ct = jl_current_task;
+#endif
+    jl_ptls_t ptls = ct->ptls;
+    sanitizer_finish_switch_fiber(ptls->previous_task, ct);
+    _start_task();
+}
+
+STATIC_OR_JS void NOINLINE JL_NORETURN _start_task(void)
 {
 CFI_NORETURN
+#endif
     // this runs the first time we switch to a task
-    sanitizer_finish_switch_fiber();
 #ifdef __clang_gcanalyzer__
     jl_task_t *ct = jl_get_current_task();
 #else
@@ -915,9 +1289,10 @@ CFI_NORETURN
 
     ct->started = 1;
     JL_PROBE_RT_START_TASK(ct);
+    jl_timing_block_task_enter(ct, ptls, NULL);
     if (jl_atomic_load_relaxed(&ct->_isexception)) {
         record_backtrace(ptls, 0);
-        jl_push_excstack(&ct->excstack, ct->result,
+        jl_push_excstack(ct, &ct->excstack, ct->result,
                          ptls->bt_data, ptls->bt_size);
         res = ct->result;
     }
@@ -927,7 +1302,7 @@ CFI_NORETURN
                 ptls->defer_signal = 0;
                 jl_sigint_safepoint(ptls);
             }
-            JL_TIMING(ROOT);
+            JL_TIMING(ROOT, ROOT);
             res = jl_apply(&ct->start, 1);
         }
         JL_CATCH {
@@ -1112,7 +1487,7 @@ static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 #endif
 
 #if defined(JL_HAVE_ASM)
-static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
+JL_NO_ASAN static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
 #ifdef JL_HAVE_UNW_CONTEXT
@@ -1129,7 +1504,7 @@ static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
     tsan_switch_to_ctx(t);
     jl_start_fiber_set(t); // doesn't return
 }
-static void jl_start_fiber_set(jl_ucontext_t *t)
+JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t)
 {
     char *stk = ((char**)&t->ctx)[0];
     size_t ssize = ((size_t*)&t->ctx)[1];
@@ -1225,9 +1600,9 @@ static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
     _jl_ucontext_t base_ctx;
     memcpy(&base_ctx, &ptls->base_ctx, sizeof(base_ctx));
     sigfillset(&set);
-    if (sigprocmask(SIG_BLOCK, &set, &oset) != 0) {
+    if (pthread_sigmask(SIG_BLOCK, &set, &oset) != 0) {
        jl_free_stack(stk, *ssize);
-       jl_error("sigprocmask failed");
+       jl_error("pthread_sigmask failed");
     }
     uc_stack.ss_sp = stk;
     uc_stack.ss_size = *ssize;
@@ -1259,9 +1634,9 @@ static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
        jl_free_stack(stk, *ssize);
        jl_error("sigaltstack failed");
     }
-    if (sigprocmask(SIG_SETMASK, &oset, NULL) != 0) {
+    if (pthread_sigmask(SIG_SETMASK, &oset, NULL) != 0) {
        jl_free_stack(stk, *ssize);
-       jl_error("sigprocmask failed");
+       jl_error("pthread_sigmask failed");
     }
     if (&ptls->base_ctx != t) {
         memcpy(&t, &ptls->base_ctx, sizeof(base_ctx));
@@ -1328,6 +1703,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     if (jl_nothing == NULL) // make a placeholder
         jl_nothing = jl_gc_permobj(0, jl_nothing_type);
     jl_task_t *ct = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type);
+    jl_set_typetagof(ct, jl_task_tag, 0);
     memset(ct, 0, sizeof(jl_task_t));
     void *stack = stack_lo;
     size_t ssize = (char*)stack_hi - (char*)stack_lo;
@@ -1347,6 +1723,12 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
         ct->stkbuf = stack;
         ct->bufsz = ssize;
     }
+
+#ifdef USE_TRACY
+    char *unique_string = (char *)malloc(strlen("Root") + 1);
+    strcpy(unique_string, "Root");
+    ct->name = unique_string;
+#endif
     ct->started = 1;
     ct->next = jl_nothing;
     ct->queue = jl_nothing;
@@ -1356,7 +1738,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     ct->result = jl_nothing;
     ct->donenotify = jl_nothing;
     jl_atomic_store_relaxed(&ct->_isexception, 0);
-    ct->logstate = jl_nothing;
+    ct->scope = jl_nothing;
     ct->eh = NULL;
     ct->gcstack = NULL;
     ct->excstack = NULL;
@@ -1365,6 +1747,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     ct->sticky = 1;
     ct->ptls = ptls;
     ct->world_age = 1; // OK to run Julia code on this task
+    ct->reentrant_timing = 0;
     ptls->root_task = ct;
     jl_atomic_store_relaxed(&ptls->current_task, ct);
     JL_GC_PROMISE_ROOTED(ct);
@@ -1374,6 +1757,11 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 #ifdef _COMPILER_TSAN_ENABLED_
     ct->ctx.tsan_state = __tsan_get_current_fiber();
 #endif
+#ifdef _COMPILER_ASAN_ENABLED_
+    ct->ctx.asan_fake_stack = NULL;
+#endif
+
+    jl_timing_block_task_enter(ct, ptls, NULL);
 
 #ifdef COPY_STACKS
     // initialize the base_ctx from which all future copy_stacks will be copies
@@ -1388,12 +1776,15 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 #endif
         if (jl_setjmp(ptls->copy_stack_ctx.uc_mcontext, 0))
             start_task(); // sanitizer_finish_switch_fiber is part of start_task
-        return ct;
     }
-    ssize = JL_STACK_SIZE;
-    char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
-    ptls->stackbase = stkbuf + ssize;
-    ptls->stacksize = ssize;
+    else {
+        ssize = JL_STACK_SIZE;
+        char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
+        if (stkbuf != NULL) {
+            ptls->stackbase = stkbuf + ssize;
+            ptls->stacksize = ssize;
+        }
+    }
 #endif
 
     if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON)
diff --git a/src/threading.c b/src/threading.c
index 2cebdb22fc0aa..648f7d935bac2 100644
--- a/src/threading.c
+++ b/src/threading.c
@@ -10,10 +10,14 @@
 #include "julia_internal.h"
 #include "julia_assert.h"
 
+#ifdef USE_ITTAPI
+#include "ittapi/ittnotify.h"
+#endif
+
 // Ref https://www.uclibc.org/docs/tls.pdf
 // For variant 1 JL_ELF_TLS_INIT_SIZE is the size of the thread control block (TCB)
 // For variant 2 JL_ELF_TLS_INIT_SIZE is 0
-#ifdef _OS_LINUX_
+#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
 #  if defined(_CPU_X86_64_) || defined(_CPU_X86_)
 #    define JL_ELF_TLS_VARIANT 2
 #    define JL_ELF_TLS_INIT_SIZE 0
@@ -30,6 +34,11 @@
 #  include <link.h>
 #endif
 
+// `ElfW` was added to FreeBSD in 12.3 but we still support 12.2
+#if defined(_OS_FREEBSD_) && !defined(ElfW)
+#  define ElfW(x) __ElfN(x)
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -46,12 +55,16 @@ JL_DLLEXPORT void *jl_get_ptls_states(void)
     return jl_current_task->ptls;
 }
 
+static void jl_delete_thread(void*);
+
 #if !defined(_OS_WINDOWS_)
+static pthread_key_t jl_task_exit_key;
 static pthread_key_t jl_safe_restore_key;
 
 __attribute__((constructor)) void _jl_init_safe_restore(void)
 {
     pthread_key_create(&jl_safe_restore_key, NULL);
+    pthread_key_create(&jl_task_exit_key, jl_delete_thread);
 }
 
 JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void)
@@ -124,21 +137,26 @@ static DWORD jl_safe_restore_key;
 BOOLEAN WINAPI DllMain(IN HINSTANCE hDllHandle, IN DWORD nReason,
                        IN LPVOID Reserved)
 {
+    jl_task_t *ct;
     switch (nReason) {
     case DLL_PROCESS_ATTACH:
         jl_pgcstack_key = TlsAlloc();
         assert(jl_pgcstack_key != TLS_OUT_OF_INDEXES);
         jl_safe_restore_key = TlsAlloc();
         assert(jl_safe_restore_key != TLS_OUT_OF_INDEXES);
-        // Fall through
-    case DLL_THREAD_ATTACH:
-        break;
-    case DLL_THREAD_DETACH:
         break;
     case DLL_PROCESS_DETACH:
         TlsFree(jl_pgcstack_key);
         TlsFree(jl_safe_restore_key);
         break;
+    case DLL_THREAD_ATTACH:
+        // will call jl_adopt_thread lazily on-demand
+        break;
+    case DLL_THREAD_DETACH:
+        ct = jl_get_current_task();
+        if (ct != NULL)
+            jl_delete_thread((void*)ct->ptls);
+        break;
     }
     return 1; // success
 }
@@ -245,7 +263,7 @@ static jl_gcframe_t **jl_get_pgcstack_init(void)
     // are used. Since the address of TLS variables should be constant,
     // changing the getter address can result in weird crashes.
 
-    // This is clearly not thread safe but should be fine since we
+    // This is clearly not thread-safe but should be fine since we
     // make sure the tls states callback is finalized before adding
     // multiple threads
 #  if JL_USE_IFUNC
@@ -282,8 +300,10 @@ JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack(void) JL_GLOBALLY_ROOTED
 
 void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
 {
+#ifndef __clang_gcanalyzer__
     if (jl_get_pgcstack_cb == jl_get_pgcstack_init)
         jl_get_pgcstack_init();
+#endif
     // for codegen
     *f = jl_get_pgcstack_cb;
     *k = jl_pgcstack_key;
@@ -291,8 +311,12 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
 #endif
 
 static uv_mutex_t tls_lock; // controls write-access to these variables:
-jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
+_Atomic(jl_ptls_t*) jl_all_tls_states JL_GLOBALLY_ROOTED;
+int jl_all_tls_states_size;
 static uv_cond_t cond;
+// concurrent reads are permitted, using the same pattern as mtsmall_arraylist
+// it is implemented separately because the API of direct jl_all_tls_states use is already widely prevalent
+void jl_init_thread_scheduler(jl_ptls_t ptls) JL_NOTSAFEPOINT;
 
 // return calling thread's ID
 JL_DLLEXPORT int16_t jl_threadid(void)
@@ -302,7 +326,8 @@ JL_DLLEXPORT int16_t jl_threadid(void)
 
 JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT
 {
-    if (tid < 0 || tid >= jl_n_threads)
+    int nthreads = jl_atomic_load_acquire(&jl_n_threads);
+    if (tid < 0 || tid >= nthreads)
         jl_error("invalid tid");
     int n = 0;
     for (int i = 0; i < jl_n_threadpools; i++) {
@@ -310,14 +335,25 @@ JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT
         if (tid < n)
             return (int8_t)i;
     }
-    jl_error("internal error: couldn't determine threadpool id");
+    return -1; // everything else uses threadpool -1 (does not belong to any threadpool)
 }
 
 jl_ptls_t jl_init_threadtls(int16_t tid)
 {
+#ifndef _OS_WINDOWS_
+    if (pthread_getspecific(jl_task_exit_key))
+        abort();
+#endif
+    if (jl_get_pgcstack() != NULL)
+        abort();
     jl_ptls_t ptls = (jl_ptls_t)calloc(1, sizeof(jl_tls_states_t));
-    ptls->system_id = (jl_thread_t)(uintptr_t)uv_thread_self();
+#ifndef _OS_WINDOWS_
+    pthread_setspecific(jl_task_exit_key, (void*)ptls);
+#endif
+    ptls->system_id = uv_thread_self();
     ptls->rngseed = jl_rand();
+    if (tid == 0)
+        ptls->disable_gc = 1;
 #ifdef _OS_WINDOWS_
     if (tid == 0) {
         if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
@@ -328,16 +364,15 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
         }
     }
 #endif
-    ptls->tid = tid;
     jl_atomic_store_relaxed(&ptls->gc_state, 0); // GC unsafe
     // Conditionally initialize the safepoint address. See comment in
     // `safepoint.c`
     if (tid == 0) {
-        ptls->safepoint = (size_t*)(jl_safepoint_pages + jl_page_size);
+        jl_atomic_store_relaxed(&ptls->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size));
     }
     else {
-        ptls->safepoint = (size_t*)(jl_safepoint_pages + jl_page_size * 2 +
-                                    sizeof(size_t));
+        jl_atomic_store_relaxed(&ptls->safepoint, (size_t*)(jl_safepoint_pages + jl_page_size * 2 +
+                                sizeof(size_t)));
     }
     jl_bt_element_t *bt_data = (jl_bt_element_t*)
         malloc_s(sizeof(jl_bt_element_t) * (JL_MAX_BT_SIZE + 1));
@@ -345,15 +380,143 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
     ptls->bt_data = bt_data;
     small_arraylist_new(&ptls->locks, 0);
     jl_init_thread_heap(ptls);
+    jl_init_thread_scheduler(ptls);
 
-    uv_mutex_init(&ptls->sleep_lock);
-    uv_cond_init(&ptls->wake_signal);
-
-    jl_all_tls_states[tid] = ptls;
+    uv_mutex_lock(&tls_lock);
+    if (tid == -1)
+        tid = jl_atomic_load_relaxed(&jl_n_threads);
+    ptls->tid = tid;
+    jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
+    if (jl_all_tls_states_size <= tid) {
+        int i, newsize = jl_all_tls_states_size + tid + 2;
+        jl_ptls_t *newpptls = (jl_ptls_t*)calloc(newsize, sizeof(jl_ptls_t));
+        for (i = 0; i < jl_all_tls_states_size; i++) {
+            newpptls[i] = allstates[i];
+        }
+        jl_atomic_store_release(&jl_all_tls_states, newpptls);
+        jl_all_tls_states_size = newsize;
+        jl_gc_add_quiescent(ptls, (void**)allstates, free);
+        allstates = newpptls;
+    }
+    allstates[tid] = ptls;
+    if (jl_atomic_load_relaxed(&jl_n_threads) < tid + 1)
+        jl_atomic_store_release(&jl_n_threads, tid + 1);
+    jl_fence();
+    uv_mutex_unlock(&tls_lock);
 
     return ptls;
 }
 
+JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void)
+{
+    // `jl_init_threadtls` puts us in a GC unsafe region, so ensure GC isn't running.
+    // we can't use a normal safepoint because we don't have signal handlers yet.
+    // we also can't use jl_safepoint_wait_gc because that assumes we're in a task.
+    jl_atomic_fetch_add(&jl_gc_disable_counter, 1);
+    while (jl_atomic_load_acquire(&jl_gc_running)) {
+        jl_cpu_pause();
+    }
+    // this check is coupled with the one in `jl_safepoint_wait_gc`, where we observe if a
+    // foreign thread has asked to disable the GC, guaranteeing the order of events.
+
+    // initialize this thread (assign tid, create heap, set up root task)
+    jl_ptls_t ptls = jl_init_threadtls(-1);
+    void *stack_lo, *stack_hi;
+    jl_init_stack_limits(0, &stack_lo, &stack_hi);
+
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi); // assumes the GC is disabled
+    JL_GC_PROMISE_ROOTED(ct);
+    uv_random(NULL, NULL, &ct->rngState, sizeof(ct->rngState), 0, NULL);
+    jl_atomic_fetch_add(&jl_gc_disable_counter, -1);
+    return &ct->gcstack;
+}
+
+void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT;
+void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT;
+
+static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
+{
+#ifndef _OS_WINDOWS_
+    pthread_setspecific(jl_task_exit_key, NULL);
+#endif
+    jl_ptls_t ptls = (jl_ptls_t)value;
+    // safepoint until GC exit, in case GC was running concurrently while in
+    // prior unsafe-region (before we let it release the stack memory)
+    (void)jl_gc_unsafe_enter(ptls);
+    scheduler_delete_thread(ptls);
+    // try to free some state we do not need anymore
+#ifndef _OS_WINDOWS_
+    void *signal_stack = ptls->signal_stack;
+    size_t signal_stack_size = ptls->signal_stack_size;
+    if (signal_stack != NULL) {
+        stack_t ss;
+        if (sigaltstack(NULL, &ss))
+            jl_errorf("fatal error: sigaltstack: %s", strerror(errno));
+        if (ss.ss_sp == signal_stack) {
+            ss.ss_flags = SS_DISABLE;
+            if (sigaltstack(&ss, NULL) != 0) {
+                jl_errorf("warning: sigaltstack: %s (will leak this memory)", strerror(errno));
+                signal_stack = NULL;
+            }
+        }
+        if (signal_stack != NULL) {
+            if (signal_stack_size)
+                jl_free_stack(signal_stack, signal_stack_size);
+            else
+                free(signal_stack);
+        }
+        ptls->signal_stack = NULL;
+    }
+#endif
+    // Acquire the profile write lock, to ensure we are not racing with the `kill`
+    // call in the profile code which will also try to look at this thread.
+    // We have no control over when the user calls pthread_join, so we must do
+    // this here by blocking. This also synchronizes our read of `current_task`
+    // (which is the flag we currently use to check the liveness state of a thread).
+#ifdef _OS_WINDOWS_
+    jl_lock_profile_wr();
+#elif defined(JL_DISABLE_LIBUNWIND)
+    // nothing
+#elif defined(__APPLE__)
+    jl_lock_profile_wr();
+#else
+    pthread_mutex_lock(&in_signal_lock);
+#endif
+    // need to clear pgcstack and eh, but we can clear everything now too
+    jl_task_frame_noreturn(jl_atomic_load_relaxed(&ptls->current_task));
+    if (jl_set_task_tid(ptls->root_task, ptls->tid)) {
+        // the system will probably free this stack memory soon
+        // so prevent any other thread from accessing it later
+        jl_task_frame_noreturn(ptls->root_task);
+    }
+    else {
+        // Uh oh. The user cleared the sticky bit so it started running
+        // elsewhere, then called pthread_exit on this thread. This is not
+        // recoverable. Though we could just hang here, a fatal message is better.
+        jl_safe_printf("fatal: thread exited from wrong Task.\n");
+        abort();
+    }
+    jl_atomic_store_relaxed(&ptls->current_task, NULL); // dead
+    // finally, release all of the locks we had grabbed
+#ifdef _OS_WINDOWS_
+    jl_unlock_profile_wr();
+#elif defined(JL_DISABLE_LIBUNWIND)
+    // nothing
+#elif defined(__APPLE__)
+    jl_unlock_profile_wr();
+#else
+    pthread_mutex_unlock(&in_signal_lock);
+#endif
+    // then park in safe-region
+    (void)jl_gc_safe_enter(ptls);
+}
+
+//// debugging hack: if we are exiting too fast for error message printing on threads,
+//// enabling this will stall that first thread just before exiting, to give
+//// the other threads time to fail and emit their failure message
+//__attribute__((destructor)) static void _waitthreaddeath(void) { sleep(1); }
+
 JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
 jl_mutex_t typecache_lock;
 
@@ -460,6 +623,10 @@ static void jl_check_tls(void)
 JL_DLLEXPORT const int jl_tls_elf_support = 0;
 #endif
 
+extern int jl_n_markthreads;
+extern int jl_n_sweepthreads;
+extern int gc_first_tid;
+
 // interface to Julia; sets up to make the runtime thread-safe
 void jl_init_threading(void)
 {
@@ -467,7 +634,6 @@ void jl_init_threading(void)
 
     uv_mutex_init(&tls_lock);
     uv_cond_init(&cond);
-
 #ifdef JL_ELF_TLS_VARIANT
     jl_check_tls();
 #endif
@@ -476,17 +642,16 @@ void jl_init_threading(void)
     // specified on the command line (and so are in `jl_options`) or by the
     // environment variable. Set the globals `jl_n_threadpools`, `jl_n_threads`
     // and `jl_n_threads_per_pool`.
-    jl_n_threadpools = 1;
-    jl_n_threads = JULIA_NUM_THREADS;
-    int16_t nthreads = jl_n_threads, nthreadsi = 0;
+    jl_n_threadpools = 2;
+    int16_t nthreads = JULIA_NUM_THREADS;
+    int16_t nthreadsi = 0;
     char *endptr, *endptri;
 
     if (jl_options.nthreads != 0) { // --threads specified
-        jl_n_threadpools = jl_options.nthreadpools;
         nthreads = jl_options.nthreads_per_pool[0];
         if (nthreads < 0)
             nthreads = jl_effective_threads();
-        if (jl_n_threadpools == 2)
+        if (jl_options.nthreadpools == 2)
             nthreadsi = jl_options.nthreads_per_pool[1];
     }
     else if ((cp = getenv(NUM_THREADS_NAME))) { // ENV[NUM_THREADS_NAME] specified
@@ -511,31 +676,77 @@ void jl_init_threading(void)
                 if (errno != 0 || endptri == cp || nthreadsi < 0)
                     nthreadsi = 0;
             }
-            if (nthreadsi > 0)
-                jl_n_threadpools++;
         }
     }
 
-    jl_n_threads = nthreads + nthreadsi;
-    jl_n_threads_per_pool = (int *)malloc(2 * sizeof(int));
-    jl_n_threads_per_pool[0] = nthreads;
-    jl_n_threads_per_pool[1] = nthreadsi;
+    int cpu = jl_cpu_threads();
+    jl_n_markthreads = jl_options.nmarkthreads - 1;
+    jl_n_sweepthreads = jl_options.nsweepthreads;
+    if (jl_n_markthreads == -1) { // --gcthreads not specified
+        if ((cp = getenv(NUM_GC_THREADS_NAME))) { // ENV[NUM_GC_THREADS_NAME] specified
+            errno = 0;
+            jl_n_markthreads = (uint64_t)strtol(cp, &endptr, 10) - 1;
+            if (errno != 0 || endptr == cp || nthreads <= 0)
+                jl_n_markthreads = 0;
+            cp = endptr;
+            if (*cp == ',') {
+                cp++;
+                errno = 0;
+                jl_n_sweepthreads = strtol(cp, &endptri, 10);
+                if (errno != 0 || endptri == cp || jl_n_sweepthreads < 0) {
+                    jl_n_sweepthreads = 0;
+                }
+            }
+        }
+        else {
+            // if `--gcthreads` or ENV[NUM_GCTHREADS_NAME] was not specified,
+            // set the number of mark threads to half of compute threads
+            // and number of sweep threads to 0
+            if (nthreads <= 1) {
+                jl_n_markthreads = 0;
+            }
+            else {
+                jl_n_markthreads = (nthreads / 2) - 1;
+            }
+            // if `--gcthreads` or ENV[NUM_GCTHREADS_NAME] was not specified,
+            // cap the number of threads that may run the mark phase to
+            // the number of CPU cores
+            if (jl_n_markthreads + 1 >= cpu) {
+                jl_n_markthreads = cpu - 1;
+            }
+        }
+    }
+    // warn the user if they try to run with a number
+    // of GC threads which is larger than the number
+    // of physical cores
+    if (jl_n_markthreads + 1 > cpu) {
+        jl_safe_printf("WARNING: running Julia with %d GC threads on %d CPU cores\n", jl_n_markthreads + 1, cpu);
+    }
+    int16_t ngcthreads = jl_n_markthreads + jl_n_sweepthreads;
 
-#ifndef __clang_gcanalyzer__
-    jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
-#endif
+    jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads;
+    jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int));
+    jl_n_threads_per_pool[0] = nthreadsi;
+    jl_n_threads_per_pool[1] = nthreads;
+    assert(jl_all_tls_states_size > 0);
+    jl_atomic_store_release(&jl_all_tls_states, (jl_ptls_t*)calloc(jl_all_tls_states_size, sizeof(jl_ptls_t)));
+    jl_atomic_store_release(&jl_n_threads, jl_all_tls_states_size);
+    jl_n_gcthreads = ngcthreads;
+    gc_first_tid = nthreads;
 }
 
 static uv_barrier_t thread_init_done;
 
 void jl_start_threads(void)
 {
+    int nthreads = jl_atomic_load_relaxed(&jl_n_threads);
+    int ngcthreads = jl_n_gcthreads;
     int cpumasksize = uv_cpumask_size();
     char *cp;
     int i, exclusive;
     uv_thread_t uvtid;
-    if (cpumasksize < jl_n_threads) // also handles error case
-        cpumasksize = jl_n_threads;
+    if (cpumasksize < nthreads) // also handles error case
+        cpumasksize = nthreads;
     char *mask = (char*)alloca(cpumasksize);
 
     // do we have exclusive use of the machine? default is no
@@ -548,7 +759,7 @@ void jl_start_threads(void)
     // according to a 'compact' policy
     // non-exclusive: no affinity settings; let the kernel move threads about
     if (exclusive) {
-        if (jl_n_threads > jl_cpu_threads()) {
+        if (nthreads > jl_cpu_threads()) {
             jl_printf(JL_STDERR, "ERROR: Too many threads requested for %s option.\n", MACHINE_EXCLUSIVE_NAME);
             exit(1);
         }
@@ -559,21 +770,29 @@ void jl_start_threads(void)
         mask[0] = 0;
     }
 
-    // The analyzer doesn't know jl_n_threads doesn't change, help it
-    size_t nthreads = jl_n_threads;
-
     // create threads
     uv_barrier_init(&thread_init_done, nthreads);
 
+    // GC/System threads need to be after the worker threads.
+    int nworker_threads = nthreads - ngcthreads;
+
     for (i = 1; i < nthreads; ++i) {
         jl_threadarg_t *t = (jl_threadarg_t *)malloc_s(sizeof(jl_threadarg_t)); // ownership will be passed to the thread
         t->tid = i;
         t->barrier = &thread_init_done;
-        uv_thread_create(&uvtid, jl_threadfun, t);
-        if (exclusive) {
-            mask[i] = 1;
-            uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
-            mask[i] = 0;
+        if (i < nworker_threads) {
+            uv_thread_create(&uvtid, jl_threadfun, t);
+            if (exclusive) {
+                mask[i] = 1;
+                uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
+                mask[i] = 0;
+            }
+        }
+        else if (i == nthreads - 1 && jl_n_sweepthreads == 1) {
+            uv_thread_create(&uvtid, jl_concurrent_gc_threadfun, t);
+        }
+        else {
+            uv_thread_create(&uvtid, jl_parallel_gc_threadfun, t);
         }
         uv_thread_detach(&uvtid);
     }
@@ -606,6 +825,15 @@ JL_DLLEXPORT void jl_exit_threaded_region(void)
     }
 }
 
+// Profiling stubs
+
+void _jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT
+{
+    jl_atomic_store_relaxed(&lock->owner, (jl_task_t*)NULL);
+    lock->count = 0;
+    jl_profile_lock_init(lock, name);
+}
+
 void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
 {
     jl_task_t *owner = jl_atomic_load_relaxed(&lock->owner);
@@ -613,9 +841,18 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
         lock->count++;
         return;
     }
+    // Don't use JL_TIMING for instant acquires, results in large blowup of events
+    jl_profile_lock_start_wait(lock);
+    if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
+        lock->count = 1;
+        jl_profile_lock_acquired(lock);
+        return;
+    }
+    JL_TIMING(LOCK_SPIN, LOCK_SPIN);
     while (1) {
         if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
             lock->count = 1;
+            jl_profile_lock_acquired(lock);
             return;
         }
         if (safepoint) {
@@ -628,7 +865,7 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
                 uv_cond_wait(&cond, &tls_lock);
             uv_mutex_unlock(&tls_lock);
         }
-        jl_cpu_pause();
+        jl_cpu_suspend();
         owner = jl_atomic_load_relaxed(&lock->owner);
     }
 }
@@ -691,6 +928,7 @@ void _jl_mutex_unlock_nogc(jl_mutex_t *lock)
     assert(jl_atomic_load_relaxed(&lock->owner) == jl_current_task &&
            "Unlocking a lock in a different thread.");
     if (--lock->count == 0) {
+        jl_profile_lock_release_start(lock);
         jl_atomic_store_release(&lock->owner, (jl_task_t*)NULL);
         jl_cpu_wake();
         if (jl_running_under_rr(0)) {
@@ -699,6 +937,7 @@ void _jl_mutex_unlock_nogc(jl_mutex_t *lock)
             uv_cond_broadcast(&cond);
             uv_mutex_unlock(&tls_lock);
         }
+        jl_profile_lock_release_end(lock);
     }
 #endif
 }
diff --git a/src/threading.h b/src/threading.h
index 4c6f1e19881f5..260ecffa30dd5 100644
--- a/src/threading.h
+++ b/src/threading.h
@@ -12,7 +12,7 @@ extern "C" {
 
 #define PROFILE_JL_THREADING            0
 
-extern jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED; /* thread local storage */
+extern _Atomic(jl_ptls_t*) jl_all_tls_states JL_GLOBALLY_ROOTED; /* thread local storage */
 
 typedef struct _jl_threadarg_t {
     int16_t tid;
@@ -21,10 +21,12 @@ typedef struct _jl_threadarg_t {
 } jl_threadarg_t;
 
 // each thread must initialize its TLS
-jl_ptls_t jl_init_threadtls(int16_t tid);
+jl_ptls_t jl_init_threadtls(int16_t tid) JL_NOTSAFEPOINT;
 
 // provided by a threading infrastructure
 void jl_init_threadinginfra(void);
+void jl_parallel_gc_threadfun(void *arg);
+void jl_concurrent_gc_threadfun(void *arg);
 void jl_threadfun(void *arg);
 
 #ifdef __cplusplus
diff --git a/src/timing.c b/src/timing.c
index 929a09305f993..590e52b8d523d 100644
--- a/src/timing.c
+++ b/src/timing.c
@@ -6,44 +6,181 @@
 #include "options.h"
 #include "stdio.h"
 
+#if defined(USE_TRACY) || defined(USE_ITTAPI)
+#define DISABLE_FREQUENT_EVENTS
+#endif
+
+jl_module_t *jl_module_root(jl_module_t *m);
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 #ifdef ENABLE_TIMINGS
-#include "timing.h"
 
 #ifndef HAVE_TIMING_SUPPORT
 #error Timings are not supported on your compiler
 #endif
 
 static uint64_t t0;
-JL_DLLEXPORT uint64_t jl_timing_data[(int)JL_TIMING_LAST] = {0};
-const char *jl_timing_names[(int)JL_TIMING_LAST] =
+
+JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_SUBSYSTEM_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)];
+
+// Used to as an item limit when several strings of metadata can
+// potentially be associated with a single timing zone.
+JL_DLLEXPORT uint32_t jl_timing_print_limit = 10;
+
+const char *jl_timing_subsystems[(int)JL_TIMING_SUBSYSTEM_LAST] =
     {
-#define X(name) #name
-        JL_TIMING_OWNERS
+#define X(name) #name,
+        JL_TIMING_SUBSYSTEMS
 #undef X
     };
 
+JL_DLLEXPORT jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST];
+
+#ifdef USE_TIMING_COUNTS
+static arraylist_t jl_timing_counts_events;
+static jl_mutex_t jl_timing_counts_events_lock;
+#endif //USE_TIMING_COUNTS
+
+#ifdef USE_ITTAPI
+static arraylist_t jl_timing_ittapi_events;
+static jl_mutex_t jl_timing_ittapi_events_lock;
+#endif //USE_ITTAPI
+
+#ifdef USE_TIMING_COUNTS
+static int cmp_counts_events(const void *a, const void *b) {
+    jl_timing_counts_event_t *event_a = *(jl_timing_counts_event_t **)a;
+    jl_timing_counts_event_t *event_b = *(jl_timing_counts_event_t **)b;
+    return strcmp(event_a->name, event_b->name);
+}
+#endif
+
 void jl_print_timings(void)
 {
+#ifdef USE_TIMING_COUNTS
+    qsort(jl_timing_counts_events.items, jl_timing_counts_events.len,
+          sizeof(jl_timing_counts_event_t *), cmp_counts_events);
+
+    JL_LOCK_NOGC(&jl_timing_counts_events_lock);
     uint64_t total_time = cycleclock() - t0;
     uint64_t root_time = total_time;
-    for (int i = 0; i < JL_TIMING_LAST; i++) {
-        root_time -= jl_timing_data[i];
+    jl_timing_counts_event_t *root_event;
+    for (int i = 0; i < jl_timing_counts_events.len; i++) {
+        jl_timing_counts_event_t *other_event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[i];
+        if (strcmp(other_event->name, "ROOT") == 0) {
+            root_event = other_event;
+        } else {
+            root_time -= jl_atomic_load_relaxed(&other_event->self);
+        }
     }
-    jl_timing_data[0] = root_time;
-    for (int i = 0; i < JL_TIMING_LAST; i++) {
-        if (jl_timing_data[i] != 0)
-            fprintf(stderr, "%-25s : %5.2f %%   %" PRIu64 "\n", jl_timing_names[i],
-                    100 * (((double)jl_timing_data[i]) / total_time), jl_timing_data[i]);
+    jl_atomic_store_relaxed(&root_event->self, root_time);
+    jl_atomic_store_relaxed(&root_event->total, total_time);
+
+    fprintf(stderr, "\nJULIA TIMINGS\n");
+    fprintf(stderr, "%-25s, %-30s, %-30s\n", "Event", "Self Cycles (% of Total)", "Total Cycles (% of Total)");
+    for (int i = 0; i < jl_timing_counts_events.len; i++) {
+        jl_timing_counts_event_t *event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[i];
+        uint64_t self = jl_atomic_load_relaxed(&event->self);
+        uint64_t total = jl_atomic_load_relaxed(&event->total);
+        if (total != 0)
+            fprintf(stderr, "%-25s, %20" PRIu64 " (%5.2f %%), %20" PRIu64 " (%5.2f %%)\n",
+                    event->name,
+                    self, 100 * (((double)self) / total_time),
+                    total, 100 * (((double)total) / total_time));
     }
+    JL_UNLOCK_NOGC(&jl_timing_counts_events_lock);
+
+    fprintf(stderr, "\nJULIA COUNTERS\n");
+    fprintf(stderr, "%-25s, %-20s\n", "Counter", "Value");
+#define X(name) do { \
+        int64_t val = (int64_t) jl_atomic_load_relaxed(&jl_timing_counters[(int)JL_TIMING_COUNTER_##name].basic_counter); \
+        if (val != 0) \
+            fprintf(stderr, "%-25s, %20" PRIi64 "\n", #name, val); \
+    } while (0);
+
+    JL_TIMING_COUNTERS
+#undef X
+#endif
+}
+
+static int indirect_strcmp(const void *a, const void *b) {
+    return strcmp(*(const char **)a, *(const char **)b);
 }
 
 void jl_init_timing(void)
 {
     t0 = cycleclock();
+
+    _Static_assert(JL_TIMING_SUBSYSTEM_LAST < sizeof(uint64_t) * CHAR_BIT, "Too many timing subsystems!");
+
+#ifdef USE_TIMING_COUNTS
+    JL_MUTEX_INIT(&jl_timing_counts_events_lock, "jl_timing_counts_events_lock");
+
+    // Create events list for counts backend
+    arraylist_new(&jl_timing_counts_events, 1);
+
+    jl_timing_counts_event_t *root_event = (jl_timing_counts_event_t *)malloc(sizeof(jl_timing_counts_event_t));
+    arraylist_push(&jl_timing_counts_events, (void *)root_event);
+
+    root_event->name = "ROOT";
+    jl_atomic_store_relaxed(&root_event->self, 0);
+    jl_atomic_store_relaxed(&root_event->total, 0);
+#endif
+
+#ifdef USE_ITTAPI
+    // Create events list for ITTAPI backend
+    JL_MUTEX_INIT(&jl_timing_ittapi_events_lock, "jl_timing_ittapi_events_lock");
+    arraylist_new(&jl_timing_ittapi_events, 0);
+#endif
+
+    // Sort the subsystem names for quick enable/disable lookups
+    qsort(jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST,
+          sizeof(const char *), indirect_strcmp);
+
+    int i __attribute__((unused)) = 0;
+#ifdef USE_ITTAPI
+    i = 0;
+#define X(name) jl_timing_counters[i++].ittapi_counter = __itt_counter_create(#name, "julia.runtime");
+    JL_TIMING_COUNTERS
+#undef X
+#endif
+#ifdef USE_TRACY
+    i = 0;
+#define X(counter_name) jl_timing_counters[i].tracy_counter = (jl_tracy_counter_t){0, #counter_name}; \
+        TracyCPlotConfig(jl_timing_counters[i++].tracy_counter.name, TracyPlotFormatNumber, /* rectilinear */ 1, /* fill */ 1, /* color */ 0);
+    JL_TIMING_COUNTERS
+#undef X
+    // We reference these by enum indexing and then asking for the name, since that allows the compiler
+    // to catch name mismatches.
+    TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_HeapSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
+    TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
+    TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITCodeSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
+    TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITDataSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
+    TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_ImageSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
+#endif
+
+/**
+ * These sources often generate millions of events / minute. Although Tracy
+ * can generally keep up with that, those events also bloat the saved ".tracy"
+ * files, so we disable them by default.
+ **/
+#ifdef DISABLE_FREQUENT_EVENTS
+    uint8_t error = 0;
+    error |= jl_timing_set_enable("ROOT", 0);
+    error |= jl_timing_set_enable("TYPE_CACHE_LOOKUP", 0);
+    error |= jl_timing_set_enable("METHOD_MATCH", 0);
+    error |= jl_timing_set_enable("METHOD_LOOKUP_FAST", 0);
+    error |= jl_timing_set_enable("AST_COMPRESS", 0);
+    error |= jl_timing_set_enable("AST_UNCOMPRESS", 0);
+    if (error)
+        jl_error("invalid timing subsystem encountered in jl_init_timing");
+#endif
+
+    // Apply e.g. JULIA_TIMING_SUBSYSTEMS="+GC,-INFERENCE" and
+    //            JULIA_TIMING_METADATA_PRINT_LIMIT=20
+    jl_timing_apply_env();
 }
 
 void jl_destroy_timing(void)
@@ -51,25 +188,477 @@ void jl_destroy_timing(void)
     jl_ptls_t ptls = jl_current_task->ptls;
     jl_timing_block_t *stack = ptls->timing_stack;
     while (stack) {
-        _jl_timing_block_destroy(stack);
+        jl_timing_block_end(stack);
         stack = stack->prev;
     }
 }
 
-jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block)
+static const int get_timing_subsystem(const char *subsystem) {
+    const char **match = (const char **)bsearch(
+        &subsystem, jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST,
+        sizeof(const char *), indirect_strcmp
+    );
+    if (!match)
+        return JL_TIMING_SUBSYSTEM_LAST;
+
+    return (int)(match - &jl_timing_subsystems[0]);
+}
+
+#ifdef USE_ITTAPI
+
+typedef struct {
+    __itt_event event;
+    const char *name;
+} cached_ittapi_event_t;
+
+static __itt_event _jl_timing_ittapi_event_create(const char *event) {
+    JL_LOCK_NOGC(&jl_timing_ittapi_events_lock);
+    const size_t n = jl_timing_ittapi_events.len;
+    for (size_t i = 0; i < n; i++) {
+        cached_ittapi_event_t *other_event = (cached_ittapi_event_t *)jl_timing_ittapi_events.items[i];
+        if (strcmp(event, other_event->name) == 0) {
+            JL_UNLOCK_NOGC(&jl_timing_ittapi_events_lock);
+            return other_event->event;
+        }
+    }
+
+    // No matching event found - create a new one
+    cached_ittapi_event_t *new_event = (cached_ittapi_event_t *)malloc(sizeof(cached_ittapi_event_t));
+    arraylist_push(&jl_timing_ittapi_events, (void *)new_event);
+    new_event->name = event;
+    new_event->event = __itt_event_create(event, strlen(event));
+    JL_UNLOCK_NOGC(&jl_timing_ittapi_events_lock);
+
+    return new_event->event;
+}
+
+#endif // USE_ITTAPI
+
+#ifdef USE_TIMING_COUNTS
+
+// This function is analogous to __itt_event_create but for the counts backend
+//
+// `event` is required to live forever
+static jl_timing_counts_event_t *_jl_timing_counts_event_create(const char *event) {
+    JL_LOCK_NOGC(&jl_timing_counts_events_lock);
+    const size_t n = jl_timing_counts_events.len;
+    for (size_t i = 0; i < n; i++) {
+        jl_timing_counts_event_t *other_event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[i];
+        if (strcmp(event, other_event->name) == 0) {
+            JL_UNLOCK_NOGC(&jl_timing_counts_events_lock);
+            return other_event;
+        }
+    }
+
+    // No matching event found - create a new one
+    jl_timing_counts_event_t *new_event = (jl_timing_counts_event_t *)malloc(sizeof(jl_timing_counts_event_t));
+    arraylist_push(&jl_timing_counts_events, (void *)new_event);
+    new_event->name = event;
+    jl_atomic_store_relaxed(&new_event->self, 0);
+    jl_atomic_store_relaxed(&new_event->total, 0);
+    JL_UNLOCK_NOGC(&jl_timing_counts_events_lock);
+
+    return new_event;
+}
+
+STATIC_INLINE void _jl_timing_counts_pause(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT {
+#ifdef JL_DEBUG_BUILD
+    assert(block->running);
+    block->running = 0;
+#endif
+    block->total += t - block->start;
+}
+
+STATIC_INLINE void _jl_timing_counts_resume(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT {
+#ifdef JL_DEBUG_BUILD
+    assert(!block->running);
+    block->running = 1;
+#endif
+    block->start = t;
+}
+
+STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT {
+    block->total = 0;
+    block->start = t;
+    block->t0 = t;
+#ifdef JL_DEBUG_BUILD
+    block->running = 1;
+#endif
+}
+
+STATIC_INLINE void _jl_timing_counts_stop(jl_timing_block_t *block, uint64_t t) JL_NOTSAFEPOINT {
+#ifdef JL_DEBUG_BUILD
+    assert(block->counts_ctx.running);
+    block->counts_ctx.running = 0;
+#endif
+    jl_timing_counts_event_t *event = block->event->counts_event;
+    block->counts_ctx.total += t - block->counts_ctx.start;
+    jl_atomic_fetch_add_relaxed(&event->self, block->counts_ctx.total);
+    jl_atomic_fetch_add_relaxed(&event->total, t - block->counts_ctx.t0);
+}
+
+#endif // USE_TIMING_COUNTS
+
+JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, const char *name, const char *function, const char *file, int line, int color) {
+    int maybe_subsystem = get_timing_subsystem(subsystem);
+    if (maybe_subsystem >= JL_TIMING_SUBSYSTEM_LAST) {
+        jl_errorf("invalid timing subsystem name: %s", subsystem);
+        return NULL;
+    }
+
+    jl_timing_event_t *event = (jl_timing_event_t *) malloc(sizeof(jl_timing_event_t));
+    event->subsystem = maybe_subsystem;
+
+#ifdef USE_TIMING_COUNTS
+    event->counts_event = _jl_timing_counts_event_create(name);
+#endif // USE_TIMING_COUNTS
+
+#ifdef USE_ITTAPI
+    event->ittapi_event = _jl_timing_ittapi_event_create(name);
+#endif // USE_ITTAPI
+
+#ifdef USE_TRACY
+    event->tracy_srcloc.name = name;
+    event->tracy_srcloc.function = function;
+    event->tracy_srcloc.file = file;
+    event->tracy_srcloc.line = line;
+    event->tracy_srcloc.color = color;
+#endif // USE_TRACY
+
+    return event;
+}
+
+JL_DLLEXPORT void _jl_timing_block_init(char *buf, size_t size, jl_timing_event_t *event) {
+    if (size < sizeof(jl_timing_block_t)) {
+        jl_errorf("jl_timing_block_t buffer must be at least %d bytes", sizeof(jl_timing_block_t));
+        return;
+    }
+
+    jl_timing_block_t *block = (jl_timing_block_t *)buf;
+    memset(block, 0, sizeof(jl_timing_block_t));
+    block->event = event;
+}
+
+JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *block) {
+    assert(!block->is_running);
+    if (!_jl_timing_enabled(block->event->subsystem)) return;
+    if (jl_get_pgcstack() == NULL) return; // not setup on this thread
+
+    uint64_t t = cycleclock(); (void)t;
+    _COUNTS_START(&block->counts_ctx, t);
+    _ITTAPI_START(block);
+    _TRACY_START(block);
+
+    jl_timing_block_t **prevp = &jl_current_task->ptls->timing_stack;
+    block->prev = *prevp;
+    block->is_running = 1;
+    if (block->prev) {
+        _COUNTS_PAUSE(&block->prev->counts_ctx, t);
+    }
+    *prevp = block;
+}
+
+JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *block) {
+    if (block->is_running) {
+        uint64_t t = cycleclock(); (void)t;
+        _ITTAPI_STOP(block);
+        _TRACY_STOP(block->tracy_ctx);
+        _COUNTS_STOP(block, t);
+
+        jl_task_t *ct = jl_current_task;
+        jl_timing_block_t **pcur = &ct->ptls->timing_stack;
+        assert(*pcur == block);
+        *pcur = block->prev;
+        if (block->prev) {
+            _COUNTS_RESUME(&block->prev->counts_ctx, t);
+        }
+    }
+}
+
+jl_timing_block_t *jl_timing_block_pop(jl_timing_block_t *cur_block)
 {
-    _jl_timing_block_destroy(cur_block);
+    jl_timing_block_end(cur_block);
     return cur_block->prev;
 }
 
-void jl_timing_block_start(jl_timing_block_t *cur_block)
+void jl_timing_block_task_enter(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t *prev_blk)
+{
+    if (prev_blk != NULL) {
+        assert(ptls->timing_stack == NULL);
+
+        ptls->timing_stack = prev_blk;
+        if (prev_blk != NULL) {
+            _COUNTS_RESUME(&prev_blk->counts_ctx, cycleclock());
+        }
+    }
+
+#ifdef USE_TRACY
+    TracyCFiberEnter(ct->name);
+#else
+    (void)ct;
+#endif
+}
+
+jl_timing_block_t *jl_timing_block_task_exit(jl_task_t *ct, jl_ptls_t ptls)
+{
+#ifdef USE_TRACY
+    // Tracy is fairly strict about not leaving a fiber that hasn't
+    // been entered, which happens often when connecting to a running
+    // Julia session.
+    //
+    // Eventually, Tracy will support telling the server which fibers
+    // are active upon connection, but until then we work around the
+    // problem by not explicitly leaving the fiber at all.
+    //
+    // Later when we enter the new fiber directly, that will cause the
+    // the active fiber to be left implicitly.
+
+    //TracyCFiberLeave;
+#endif
+    (void)ct;
+
+    jl_timing_block_t *blk = ptls->timing_stack;
+    ptls->timing_stack = NULL;
+
+    if (blk != NULL) {
+        _COUNTS_PAUSE(&blk->counts_ctx, cycleclock());
+    }
+    return blk;
+}
+
+JL_DLLEXPORT void jl_timing_show(jl_value_t *v, jl_timing_block_t *cur_block)
+{
+#ifdef USE_TRACY
+    ios_t buf;
+    ios_mem(&buf, IOS_INLSIZE);
+    buf.growable = 0; // Restrict to inline buffer to avoid allocation
+
+    jl_static_show((JL_STREAM*)&buf, v);
+    if (buf.size == buf.maxsize)
+        memset(&buf.buf[IOS_INLSIZE - 3], '.', 3);
+
+    TracyCZoneText(cur_block->tracy_ctx, buf.buf, buf.size);
+#endif
+}
+
+JL_DLLEXPORT void jl_timing_show_module(jl_module_t *m, jl_timing_block_t *cur_block)
+{
+#ifdef USE_TRACY
+    jl_module_t *root = jl_module_root(m);
+    if (root == m || root == jl_main_module) {
+        const char *module_name = jl_symbol_name(m->name);
+        TracyCZoneText(cur_block->tracy_ctx, module_name, strlen(module_name));
+    } else {
+        jl_timing_printf(cur_block, "%s.%s", jl_symbol_name(root->name), jl_symbol_name(m->name));
+    }
+#endif
+}
+
+JL_DLLEXPORT void jl_timing_show_filename(const char *path, jl_timing_block_t *cur_block)
+{
+#ifdef USE_TRACY
+    const char *filename = gnu_basename(path);
+    TracyCZoneText(cur_block->tracy_ctx, filename, strlen(filename));
+#endif
+}
+
+JL_DLLEXPORT void jl_timing_show_location(const char *file, int line, jl_module_t* mod, jl_timing_block_t *cur_block)
+{
+#ifdef USE_TRACY
+    jl_module_t *root = jl_module_root(mod);
+    if (root == mod || root == jl_main_module) {
+        jl_timing_printf(cur_block, "%s:%d in %s",
+                         gnu_basename(file),
+                         line,
+                         jl_symbol_name(mod->name));
+    } else {
+        // TODO: generalize to print the entire module hierarchy
+        jl_timing_printf(cur_block, "%s:%d in %s.%s",
+                         gnu_basename(file),
+                         line,
+                         jl_symbol_name(root->name),
+                         jl_symbol_name(mod->name));
+    }
+#endif
+}
+
+JL_DLLEXPORT void jl_timing_show_method_instance(jl_method_instance_t *mi, jl_timing_block_t *cur_block)
+{
+    jl_timing_show_func_sig(mi->specTypes, cur_block);
+    if (jl_is_method(mi->def.value)) {
+        jl_method_t *def = mi->def.method;
+        jl_timing_show_location(jl_symbol_name(def->file), def->line, def->module, cur_block);
+    } else {
+        jl_timing_printf(cur_block, "<top-level thunk> in %s",
+                         jl_symbol_name(mi->def.module->name));
+    }
+}
+
+JL_DLLEXPORT void jl_timing_show_method(jl_method_t *method, jl_timing_block_t *cur_block)
+{
+    jl_timing_show((jl_value_t *)method, cur_block);
+    jl_timing_show_location(jl_symbol_name(method->file), method->line, method->module, cur_block);
+}
+
+JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_block)
+{
+#ifdef USE_TRACY
+    ios_t buf;
+    ios_mem(&buf, IOS_INLSIZE);
+    buf.growable = 0; // Restrict to inline buffer to avoid allocation
+
+    jl_static_show_config_t config = { /* quiet */ 1 };
+    jl_static_show_func_sig_((JL_STREAM*)&buf, v, config);
+    if (buf.size == buf.maxsize)
+        memset(&buf.buf[IOS_INLSIZE - 3], '.', 3);
+
+    TracyCZoneText(cur_block->tracy_ctx, buf.buf, buf.size);
+#endif
+}
+
+JL_DLLEXPORT void jl_timing_show_macro(jl_method_instance_t *macro, jl_value_t* lno, jl_module_t* mod, jl_timing_block_t *cur_block)
+{
+    jl_timing_printf(cur_block, "%s", jl_symbol_name(macro->def.method->name));
+    assert(jl_typetagis(lno, jl_linenumbernode_type));
+    jl_timing_show_location(jl_symbol_name((jl_sym_t*)jl_fieldref(lno, 1)),
+                            jl_unbox_int64(jl_fieldref(lno, 0)),
+                            mod, cur_block);
+}
+
+JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...)
 {
-    _jl_timing_block_start(cur_block, cycleclock());
+    va_list args;
+    va_start(args, format);
+
+#ifdef USE_TRACY
+    ios_t buf;
+    ios_mem(&buf, IOS_INLSIZE);
+    buf.growable = 0; // Restrict to inline buffer to avoid allocation
+
+    jl_vprintf((JL_STREAM*)&buf, format, args);
+    if (buf.size == buf.maxsize)
+        memset(&buf.buf[IOS_INLSIZE - 3], '.', 3);
+
+    TracyCZoneText(cur_block->tracy_ctx, buf.buf, buf.size);
+#endif
+    va_end(args);
 }
 
-void jl_timing_block_stop(jl_timing_block_t *cur_block)
+JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str)
 {
-    _jl_timing_block_stop(cur_block, cycleclock());
+#ifdef USE_TRACY
+    TracyCZoneText(cur_block->tracy_ctx, str, strlen(str));
+#endif
+}
+
+void jl_timing_task_init(jl_task_t *t)
+{
+#ifdef USE_TRACY
+    jl_value_t *start_type = jl_typeof(t->start);
+    const char *start_name = "";
+    if (jl_is_datatype(start_type))
+        start_name = jl_symbol_name(((jl_datatype_t *) start_type)->name->name);
+
+    static uint16_t task_id = 1;
+
+    // XXX: Tracy uses this as a handle internally and requires that this
+    // string live forever, so this allocation is intentionally leaked.
+    char *fiber_name;
+    if (start_name[0] == '#') {
+        jl_method_instance_t *mi = jl_method_lookup(&t->start, 1, jl_get_world_counter());
+        const char *filename = gnu_basename(jl_symbol_name(mi->def.method->file));
+        const char *module_name = jl_symbol_name(mi->def.method->module->name);
+
+        // 26 characters in "Task 65535 (:0000000 in )\0"
+        size_t fiber_name_len = strlen(filename) + strlen(module_name) + 26;
+        fiber_name = (char *)malloc(fiber_name_len);
+        snprintf(fiber_name, fiber_name_len,  "Task %d (%s:%d in %s)",
+                 task_id++, filename, mi->def.method->line, module_name);
+    } else {
+
+        // 16 characters in "Task 65535 (\"\")\0"
+        size_t fiber_name_len = strlen(start_name) + 16;
+        fiber_name = (char *)malloc(fiber_name_len);
+        snprintf(fiber_name, fiber_name_len,  "Task %d (\"%s\")",
+                 task_id++, start_name);
+    }
+
+    t->name = fiber_name;
+#endif
+}
+
+JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled)
+{
+    int i = get_timing_subsystem(subsystem);
+    if (i >= JL_TIMING_SUBSYSTEM_LAST)
+        return -1;
+
+    uint64_t subsystem_bit = 1ul << (i % (sizeof(uint64_t) * CHAR_BIT));
+    if (enabled) {
+        jl_atomic_fetch_and_relaxed(jl_timing_disable_mask + (i / (sizeof(uint64_t) * CHAR_BIT)), ~subsystem_bit);
+    } else {
+        jl_atomic_fetch_or_relaxed(jl_timing_disable_mask + (i / (sizeof(uint64_t) * CHAR_BIT)), subsystem_bit);
+    }
+    return 0;
+}
+
+static void jl_timing_set_enable_from_env(void)
+{
+    const char *env = getenv("JULIA_TIMING_SUBSYSTEMS");
+    if (!env)
+        return;
+
+    // Copy `env`, so that we can modify it
+    size_t sz = strlen(env) + 1;
+    char *env_copy = (char *)malloc(sz);
+    memcpy(env_copy, env, sz);
+
+    char *subsystem = env_copy;
+    char *ch = subsystem;
+    uint8_t enable = 1;
+    while (1) {
+        // +SUBSYSTEM means enable, -SUBSYSTEM means disable
+        if (*subsystem == '+' || *subsystem == '-')
+            enable = (*subsystem++ == '+');
+
+        if (*ch == ',') {
+            *ch++ = '\0';
+            if ((*subsystem != '\0') && jl_timing_set_enable(subsystem, enable))
+                fprintf(stderr, "warning: unable to configure timing for non-existent subsystem \"%s\"\n", subsystem);
+
+            subsystem = ch;
+            enable = 1;
+        }
+        else if (*ch == '\0') {
+            if ((*subsystem != '\0') && jl_timing_set_enable(subsystem, enable))
+                fprintf(stderr, "warning: unable to configure timing for non-existent subsystem \"%s\"\n", subsystem);
+
+            break;
+        }
+        else ch++;
+    }
+    free(env_copy);
+}
+
+static void jl_timing_set_print_limit_from_env(void)
+{
+    const char *const env = getenv("JULIA_TIMING_METADATA_PRINT_LIMIT");
+    if (!env)
+        return;
+
+    char *endp;
+    long value = strtol(env, &endp, 10);
+    if (*endp == '\0' && value >= 0 && value <= UINT32_MAX)
+        jl_timing_print_limit = (uint32_t)value;
+}
+
+void jl_timing_apply_env(void)
+{
+    // JULIA_TIMING_SUBSYSTEMS
+    jl_timing_set_enable_from_env();
+
+    // JULIA_TIMING_METADATA_PRINT_LIMIT
+    jl_timing_set_print_limit_from_env();
 }
 
 #else
@@ -77,6 +666,15 @@ void jl_timing_block_stop(jl_timing_block_t *cur_block)
 void jl_init_timing(void) { }
 void jl_destroy_timing(void) { }
 
+JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, const char *name, const char *function, const char *file, int line, int color) { return NULL; }
+
+JL_DLLEXPORT void _jl_timing_block_init(char *buf, size_t size, jl_timing_event_t *event) { }
+JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *block) { }
+JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *block) { }
+
+JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled) { return -1; }
+JL_DLLEXPORT uint32_t jl_timing_print_limit = 0;
+
 #endif
 
 #ifdef __cplusplus
diff --git a/src/timing.h b/src/timing.h
index fd84707ad5d2c..1cc82b67e2b6a 100644
--- a/src/timing.h
+++ b/src/timing.h
@@ -3,162 +3,436 @@
 #ifndef JL_TIMING_H
 #define JL_TIMING_H
 
+#include "julia.h"
+
+static inline const char *gnu_basename(const char *path)
+{
+    const char *base = strrchr(path, '/');
+#ifdef _WIN32
+    const char *backslash = strrchr(path, '\\');
+    if (backslash > base)
+        base = backslash;
+#endif
+    return base ? base+1 : path;
+}
+
+#ifdef USE_TRACY
+typedef struct {
+    _Atomic(int64_t) val;
+    char* name;
+} jl_tracy_counter_t;
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
+
 void jl_init_timing(void);
-void jl_destroy_timing(void);
+void jl_destroy_timing(void) JL_NOTSAFEPOINT;
+
+// Update the enable bit-mask to enable/disable tracing events for
+// the subsystem in `jl_timing_subsystems` matching the provided string.
+//
+// Returns -1 if no matching sub-system was found.
+JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled);
+
+// Check for environment vars "JULIA_TIMING_METADATA_PRINT_LIMIT" and
+// "JULIA_TIMING_SUBSYSTEMS" and if present apply these to the metadata
+// print limit and the timings enable mask, respectively.
+//
+// For example, to enable INFERENCE and METHOD_MATCH and disable GC:
+//     JULIA_TIMING_SUBSYSTEMS="+INFERENCE,-GC,+METHOD_MATCH"
+//
+// For example, to increase the metadata item print limit from 10 to 20:
+//     JULIA_TIMING_METADATA_PRINT_LIMIT=20
+void jl_timing_apply_env(void);
+
+// Configurable item limit, runtime code should use this to limit printing
+// when adding potentially many items of metadata to a single timing zone.
+extern JL_DLLEXPORT uint32_t jl_timing_print_limit;
+
+JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, const char *name, const char *function, const char *file, int line, int color);
+JL_DLLEXPORT void _jl_timing_block_init(char *buf, size_t size, jl_timing_event_t *event);
+JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *cur_block);
+JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *cur_block);
+
 #ifdef __cplusplus
 }
 #endif
 
-#ifndef ENABLE_TIMINGS
-#define JL_TIMING(owner)
+#if defined(_COMPILER_CLANG_)
+#define HAVE_TIMING_SUPPORT
+#elif defined(_COMPILER_GCC_)
+#define HAVE_TIMING_SUPPORT
+#endif
+
+#if defined( USE_TRACY ) || defined( USE_ITTAPI ) || defined( USE_TIMING_COUNTS )
+#define ENABLE_TIMINGS
+#endif
+
+#if !defined( ENABLE_TIMINGS ) || !defined( HAVE_TIMING_SUPPORT )
+
+#define JL_TIMING(subsystem, event)
+#define JL_TIMING_CREATE_BLOCK(new_block_name, subsystem, event)
+
+#define JL_TIMING_SUSPEND_TASK(subsystem, ct)
+
+#define jl_timing_show(v, b)
+#define jl_timing_show_module(m, b)
+#define jl_timing_show_filename(f, b)
+#define jl_timing_show_method_instance(mi, b)
+#define jl_timing_show_method(mi, b)
+#define jl_timing_show_func_sig(tt, b)
+#define jl_timing_show_location(file, line, mod, b)
+#define jl_timing_show_macro(macro, lno, mod, b)
+#define jl_timing_printf(b, f, ...)
+#define jl_timing_puts(b, s)
+#define jl_timing_task_init(t)
+#define jl_timing_event_create(blk)
+#define jl_timing_block_start(blk)
+#define jl_timing_block_task_enter(ct, ptls, blk)
+#define jl_timing_block_task_exit(ct, ptls) ((jl_timing_block_t *)NULL)
+#define jl_timing_block_pop(blk)
+
+#define jl_timing_counter_inc(counter, value)
+#define jl_timing_counter_dec(counter, value)
+
+#define jl_profile_lock_init(lock, name)
+#define jl_profile_lock_start_wait(lock)
+#define jl_profile_lock_acquired(lock)
+#define jl_profile_lock_release_start(lock)
+#define jl_profile_lock_release_end(lock)
+
 #else
 
 #include "julia_assert.h"
+#ifdef USE_TRACY
+#include "tracy/TracyC.h"
+typedef struct ___tracy_source_location_data TracySrcLocData;
+#endif
+
+#ifdef USE_ITTAPI
+#include <ittapi/ittnotify.h>
+#endif
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 void jl_print_timings(void);
-jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block);
-void jl_timing_block_start(jl_timing_block_t *cur_block);
-void jl_timing_block_stop(jl_timing_block_t *cur_block);
+
+void jl_timing_task_init(jl_task_t *t);
+void jl_timing_block_task_enter(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t *prev_blk);
+jl_timing_block_t *jl_timing_block_task_exit(jl_task_t *ct, jl_ptls_t ptls);
+jl_timing_block_t *jl_timing_block_pop(jl_timing_block_t *cur_block);
+
+// Add the output of `jl_static_show(x)` as a text annotation to the
+// profiling region corresponding to `cur_block`.
+//
+// If larger than IOS_INLSIZE (~80 characters), text is truncated.
+JL_DLLEXPORT void jl_timing_show(jl_value_t *v, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_show_module(jl_module_t *m, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_show_filename(const char *path, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_show_method_instance(jl_method_instance_t *mi, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_show_method(jl_method_t *method, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_show_location(const char *file, int line, jl_module_t* mod, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_show_macro(jl_method_instance_t *macro, jl_value_t* lno, jl_module_t* mod, jl_timing_block_t *cur_block);
+JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...);
+JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str);
+
+#define jl_timing_event_create(subsystem, name, function, file, line, color) _jl_timing_event_create(subsystem, name, function, file, line, color)
+#define jl_timing_block_start(blk) _jl_timing_block_start(blk)
+#define jl_timing_block_end(blk) _jl_timing_block_end(blk)
+
 #ifdef __cplusplus
 }
 #endif
 
-#ifdef __cplusplus
-#define HAVE_TIMING_SUPPORT
-#elif defined(_COMPILER_CLANG_)
-#define HAVE_TIMING_SUPPORT
-#elif defined(_COMPILER_GCC_)
-#define HAVE_TIMING_SUPPORT
-#endif
+#define JL_TIMING_DEFAULT_BLOCK (&__timing_block)
 
-#ifndef HAVE_TIMING_SUPPORT
-#define JL_TIMING(owner)
-#else
+#define JL_TIMING_SUBSYSTEMS     \
+        X(ROOT)                  \
+        X(GC)                    \
+        X(LOWERING)              \
+        X(PARSING)               \
+        X(INFERENCE)             \
+        X(CODEGEN)               \
+        X(METHOD_LOOKUP_SLOW)    \
+        X(METHOD_LOOKUP_FAST)    \
+        X(CODEINST_COMPILE)      \
+        X(LLVM_JIT)              \
+        X(METHOD_MATCH)          \
+        X(TYPE_CACHE_LOOKUP)     \
+        X(TYPE_CACHE_INSERT)     \
+        X(STAGED_FUNCTION)       \
+        X(MACRO_INVOCATION)      \
+        X(AST_COMPRESS)          \
+        X(AST_UNCOMPRESS)        \
+        X(SYSIMG_DUMP)           \
+        X(NATIVE_AOT)            \
+        X(ADD_METHOD)            \
+        X(LOAD_MODULE)           \
+        X(LOAD_IMAGE)            \
+        X(VERIFY_IMAGE)          \
+        X(VERIFY_IR)             \
+        X(SAVE_MODULE)           \
+        X(INIT_MODULE)           \
+        X(LOCK_SPIN)             \
+        X(STACKWALK)             \
+        X(DL_OPEN)               \
+        X(JULIA_INIT)            \
 
-#define JL_TIMING_OWNERS          \
-        X(ROOT),                  \
-        X(GC),                    \
-        X(LOWERING),              \
-        X(PARSING),               \
-        X(INFERENCE),             \
-        X(CODEGEN),               \
-        X(METHOD_LOOKUP_SLOW),    \
-        X(METHOD_LOOKUP_FAST),    \
-        X(LLVM_OPT),              \
-        X(LLVM_MODULE_FINISH),    \
-        X(METHOD_MATCH),          \
-        X(TYPE_CACHE_LOOKUP),     \
-        X(TYPE_CACHE_INSERT),     \
-        X(STAGED_FUNCTION),       \
-        X(MACRO_INVOCATION),      \
-        X(AST_COMPRESS),          \
-        X(AST_UNCOMPRESS),        \
-        X(SYSIMG_LOAD),           \
-        X(SYSIMG_DUMP),           \
-        X(NATIVE_DUMP),           \
-        X(ADD_METHOD),            \
-        X(LOAD_MODULE),           \
-        X(SAVE_MODULE),           \
-        X(INIT_MODULE),
-
-enum jl_timing_owners {
-#define X(name) JL_TIMING_ ## name
-    JL_TIMING_OWNERS
+
+#define JL_TIMING_COUNTERS \
+        X(Invalidations) \
+        X(HeapSize) \
+        X(JITSize) \
+        X(JITCodeSize) \
+        X(JITDataSize) \
+        X(ImageSize) \
+
+
+enum jl_timing_subsystem {
+#define X(name) JL_TIMING_ ## name,
+    JL_TIMING_SUBSYSTEMS
 #undef X
-    JL_TIMING_LAST
+    JL_TIMING_SUBSYSTEM_LAST
 };
 
-extern uint64_t jl_timing_data[(int)JL_TIMING_LAST];
-extern const char *jl_timing_names[(int)JL_TIMING_LAST];
+enum jl_timing_counter_types {
+#define X(name) JL_TIMING_COUNTER_ ## name,
+    JL_TIMING_COUNTERS
+#undef X
+    JL_TIMING_COUNTER_LAST
+};
 
-struct _jl_timing_block_t { // typedef in julia.h
-    jl_timing_block_t *prev;
+#define TIMING_XCONCAT(x1, x2) x1##x2
+#define TIMING_CONCAT(x1, x2) TIMING_XCONCAT(x1, x2)
+
+/**
+ * Timing Backend: Aggregated timing counts (implemented in timing.c)
+ **/
+
+typedef struct jl_timing_counts_event_t {
+    const char *name;
+    _Atomic(uint64_t) self;
+    _Atomic(uint64_t) total;
+} jl_timing_counts_event_t;
+
+typedef struct _jl_timing_counts_t {
     uint64_t total;
+    uint64_t start;
     uint64_t t0;
-    int owner;
 #ifdef JL_DEBUG_BUILD
     uint8_t running;
 #endif
-};
+} jl_timing_counts_t;
 
-STATIC_INLINE void _jl_timing_block_stop(jl_timing_block_t *block, uint64_t t) {
-#ifdef JL_DEBUG_BUILD
-    assert(block->running);
-    block->running = 0;
+#ifdef USE_TIMING_COUNTS
+#define _COUNTS_EVENT_MEMBER             jl_timing_counts_event_t *counts_event;
+#define _COUNTS_BLOCK_MEMBER             jl_timing_counts_t counts_ctx;
+#define _COUNTS_START(block, t)          _jl_timing_counts_start(block, t)
+#define _COUNTS_STOP(block, t)           _jl_timing_counts_stop(block, t)
+#define _COUNTS_PAUSE(block, t)          _jl_timing_counts_pause(block, t)
+#define _COUNTS_RESUME(block, t)         _jl_timing_counts_resume(block, t)
+#else
+#define _COUNTS_EVENT_MEMBER
+#define _COUNTS_BLOCK_MEMBER
+#define _COUNTS_START(block, t)
+#define _COUNTS_STOP(block, t)
+#define _COUNTS_PAUSE(block, t)
+#define _COUNTS_RESUME(block, t)
 #endif
-    block->total += t - block->t0;
-}
 
-STATIC_INLINE void _jl_timing_block_start(jl_timing_block_t *block, uint64_t t) {
-#ifdef JL_DEBUG_BUILD
-    assert(!block->running);
-    block->running = 1;
+/**
+ * Timing Backend: Tracy
+ **/
+
+#ifdef USE_TRACY
+#define _TRACY_EVENT_MEMBER              TracySrcLocData tracy_srcloc;
+#define _TRACY_BLOCK_MEMBER              TracyCZoneCtx tracy_ctx;
+#define _TRACY_START(block)              (block)->tracy_ctx = ___tracy_emit_zone_begin( &(block)->event->tracy_srcloc, 1 );
+#define _TRACY_STOP(ctx)                 TracyCZoneEnd(ctx)
+#else
+#define _TRACY_EVENT_MEMBER
+#define _TRACY_BLOCK_MEMBER
+#define _TRACY_START(block)
+#define _TRACY_STOP(ctx)
 #endif
-    block->t0 = t;
-}
 
-STATIC_INLINE uint64_t _jl_timing_block_init(jl_timing_block_t *block, int owner) {
-    uint64_t t = cycleclock();
-    block->owner = owner;
-    block->total = 0;
-#ifdef JL_DEBUG_BUILD
-    block->running = 0;
+/**
+ * Timing Backend: Intel VTune (ITTAPI)
+ **/
+
+#ifdef USE_ITTAPI
+#define _ITTAPI_EVENT_MEMBER              __itt_event ittapi_event;
+#define _ITTAPI_BLOCK_MEMBER
+#define _ITTAPI_START(block)              __itt_event_start((block)->event->ittapi_event)
+#define _ITTAPI_STOP(block)               __itt_event_end((block)->event->ittapi_event)
+#else
+#define _ITTAPI_EVENT_MEMBER
+#define _ITTAPI_BLOCK_MEMBER
+#define _ITTAPI_START(block)
+#define _ITTAPI_STOP(block)
 #endif
-    _jl_timing_block_start(block, t);
-    return t;
+
+/**
+ * Top-level jl_timing implementation
+ **/
+
+extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_SUBSYSTEM_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)];
+extern const char *jl_timing_subsystems[(int)JL_TIMING_SUBSYSTEM_LAST];
+
+/**
+ * Stores all static attributes associated with a profiling event.
+ *
+ * A single event can be used to create many timing blocks with
+ * the same name/source information.
+ **/
+struct _jl_timing_event_t { // typedef in julia.h
+    _TRACY_EVENT_MEMBER
+    _ITTAPI_EVENT_MEMBER
+    _COUNTS_EVENT_MEMBER
+
+    int subsystem;
+};
+
+/**
+ * Stores all dynamic attributes associated with a timing block.
+ *
+ * Every time the application enters an instrumented block of code,
+ * a new timing block is created. A timing block corresponds to one
+ * "span" of time in the profiler.
+ **/
+struct _jl_timing_block_t { // typedef in julia.h
+    struct _jl_timing_block_t *prev;
+    jl_timing_event_t *event;
+
+    _TRACY_BLOCK_MEMBER
+    _ITTAPI_BLOCK_MEMBER
+    _COUNTS_BLOCK_MEMBER
+
+    uint8_t is_running;
+};
+
+STATIC_INLINE int _jl_timing_enabled(int subsystem) JL_NOTSAFEPOINT {
+    return (jl_atomic_load_relaxed(jl_timing_disable_mask + subsystem / (sizeof(uint64_t) * CHAR_BIT)) & (1 << (subsystem % (sizeof(uint64_t) * CHAR_BIT)))) == 0;
 }
 
-STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) {
-    uint64_t t = _jl_timing_block_init(block, owner);
-    jl_task_t *ct = jl_current_task;
-    jl_timing_block_t **prevp = &ct->ptls->timing_stack;
-    block->prev = *prevp;
-    if (block->prev)
-        _jl_timing_block_stop(block->prev, t);
-    *prevp = block;
+typedef struct _jl_timing_suspend_t {
+    jl_task_t *ct;
+} jl_timing_suspend_t;
+
+STATIC_INLINE void _jl_timing_suspend_ctor(jl_timing_suspend_t *suspend, const char *subsystem, jl_task_t *ct) JL_NOTSAFEPOINT {
+    suspend->ct = ct;
+#ifdef USE_TRACY
+    TracyCFiberEnter(subsystem);
+#endif
 }
 
-STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) {
-    uint64_t t = cycleclock();
-    jl_task_t *ct = jl_current_task;
-    _jl_timing_block_stop(block, t);
-    jl_timing_data[block->owner] += block->total;
-    jl_timing_block_t **pcur = &ct->ptls->timing_stack;
-    assert(*pcur == block);
-    *pcur = block->prev;
-    if (block->prev)
-        _jl_timing_block_start(block->prev, t);
+STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_NOTSAFEPOINT {
+#ifdef USE_TRACY
+    TracyCFiberEnter(suspend->ct->name);
+#endif
 }
 
-#ifdef __cplusplus
-struct jl_timing_block_cpp_t {
-    jl_timing_block_t block;
-    jl_timing_block_cpp_t(int owner) {
-        _jl_timing_block_ctor(&block, owner);
-    }
-    ~jl_timing_block_cpp_t() {
-        _jl_timing_block_destroy(&block);
-    }
-    jl_timing_block_cpp_t(const jl_timing_block_cpp_t&) = delete;
-    jl_timing_block_cpp_t(const jl_timing_block_cpp_t&&) = delete;
-    jl_timing_block_cpp_t& operator=(const jl_timing_block_cpp_t &) = delete;
-    jl_timing_block_cpp_t& operator=(const jl_timing_block_cpp_t &&) = delete;
-};
-#define JL_TIMING(owner) jl_timing_block_cpp_t __timing_block(JL_TIMING_ ## owner)
+#define JL_TIMING(subsystem, event)                                           \
+    JL_TIMING_CREATE_BLOCK(__timing_block, subsystem, event);                 \
+    jl_timing_block_start(&__timing_block)
+
+#define JL_TIMING_CREATE_BLOCK(block, subsystem_name, event_name)             \
+    static jl_timing_event_t *TIMING_CONCAT(__timing_event, __LINE__) = 0;    \
+    if (!TIMING_CONCAT(__timing_event, __LINE__))                             \
+        TIMING_CONCAT(__timing_event, __LINE__) = jl_timing_event_create(     \
+            #subsystem_name, #event_name, __func__, __FILE__, __LINE__, 0     \
+        );                                                                    \
+    __attribute__((cleanup(_jl_timing_block_end)))                            \
+    jl_timing_block_t block = { 0 };                                          \
+    block.event = TIMING_CONCAT(__timing_event, __LINE__)
+
+#define JL_TIMING_SUSPEND_TASK(subsystem, ct) \
+    __attribute__((cleanup(_jl_timing_suspend_destroy))) \
+    jl_timing_suspend_t __timing_suspend; \
+    _jl_timing_suspend_ctor(&__timing_suspend, #subsystem, ct)
+
+// Counting
+#ifdef USE_ITTAPI
+#define _ITTAPI_COUNTER_MEMBER __itt_counter ittapi_counter;
 #else
-#define JL_TIMING(owner) \
-    __attribute__((cleanup(_jl_timing_block_destroy))) \
-    jl_timing_block_t __timing_block; \
-    _jl_timing_block_ctor(&__timing_block, JL_TIMING_ ## owner)
+#define _ITTAPI_COUNTER_MEMBER
 #endif
 
+#ifdef USE_TRACY
+# define _TRACY_COUNTER_MEMBER jl_tracy_counter_t tracy_counter;
+# else
+# define _TRACY_COUNTER_MEMBER
 #endif
+
+#ifdef USE_TIMING_COUNTS
+#define _COUNTS_MEMBER _Atomic(uint64_t) basic_counter;
+#else
+#define _COUNTS_MEMBER
+#endif
+
+typedef struct {
+    _ITTAPI_COUNTER_MEMBER
+    _TRACY_COUNTER_MEMBER
+    _COUNTS_MEMBER
+} jl_timing_counter_t;
+
+JL_DLLEXPORT extern jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST];
+
+static inline void jl_timing_counter_inc(int counter, uint64_t val) JL_NOTSAFEPOINT {
+#ifdef USE_ITTAPI
+    __itt_counter_inc_delta(jl_timing_counters[counter].ittapi_counter, val);
+#endif
+#ifdef USE_TRACY
+    jl_tracy_counter_t *tracy_counter = &jl_timing_counters[counter].tracy_counter;
+    uint64_t oldval = jl_atomic_fetch_add_relaxed(&tracy_counter->val, val);
+    TracyCPlotI(tracy_counter->name, oldval + val);
+#endif
+#ifdef USE_TIMING_COUNTS
+    jl_atomic_fetch_add_relaxed(&jl_timing_counters[counter].basic_counter, val);
+#endif
+}
+
+static inline void jl_timing_counter_dec(int counter, uint64_t val) JL_NOTSAFEPOINT {
+#ifdef USE_ITTAPI
+    __itt_counter_dec_delta(jl_timing_counters[counter].ittapi_counter, val);
+#endif
+#ifdef USE_TRACY
+    jl_tracy_counter_t *tracy_counter = &jl_timing_counters[counter].tracy_counter;
+    uint64_t oldval = jl_atomic_fetch_add_relaxed(&tracy_counter->val, -val);
+    TracyCPlotI(tracy_counter->name, oldval - val);
+#endif
+#ifdef USE_TIMING_COUNTS
+    jl_atomic_fetch_add_relaxed(&jl_timing_counters[counter].basic_counter, -(int64_t)val);
+#endif
+}
+
+// Locking profiling
+static inline void jl_profile_lock_init(jl_mutex_t *lock, const char *name) {
+#ifdef USE_ITTAPI
+    __itt_sync_create(lock, "jl_mutex_t", name, __itt_attr_mutex);
+#endif
+}
+static inline void jl_profile_lock_start_wait(jl_mutex_t *lock) {
+#ifdef USE_ITTAPI
+    __itt_sync_prepare(lock);
+#endif
+}
+static inline void jl_profile_lock_acquired(jl_mutex_t *lock) {
+#ifdef USE_ITTAPI
+    __itt_sync_acquired(lock);
+#endif
+}
+static inline void jl_profile_lock_release_start(jl_mutex_t *lock) {
+#ifdef USE_ITTAPI
+    __itt_sync_releasing(lock);
+#endif
+}
+static inline void jl_profile_lock_release_end(jl_mutex_t *lock) {}
+
 #endif
 
 #endif
diff --git a/src/toplevel.c b/src/toplevel.c
index ff089c1aebfa6..cb078a05d0ce4 100644
--- a/src/toplevel.c
+++ b/src/toplevel.c
@@ -48,7 +48,7 @@ JL_DLLEXPORT void jl_add_standard_imports(jl_module_t *m)
 void jl_init_main_module(void)
 {
     assert(jl_main_module == NULL);
-    jl_main_module = jl_new_module(jl_symbol("Main"));
+    jl_main_module = jl_new_module(jl_symbol("Main"), NULL);
     jl_main_module->parent = jl_main_module;
     jl_set_const(jl_main_module, jl_symbol("Core"),
                  (jl_value_t*)jl_core_module);
@@ -64,7 +64,8 @@ static jl_function_t *jl_module_get_initializer(jl_module_t *m JL_PROPAGATES_ROO
 
 void jl_module_run_initializer(jl_module_t *m)
 {
-    JL_TIMING(INIT_MODULE);
+    JL_TIMING(INIT_MODULE, INIT_MODULE);
+    jl_timing_show_module(m, JL_TIMING_DEFAULT_BLOCK);
     jl_function_t *f = jl_module_get_initializer(m);
     if (f == NULL)
         return;
@@ -120,7 +121,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
 {
     jl_task_t *ct = jl_current_task;
     assert(ex->head == jl_module_sym);
-    if (jl_array_len(ex->args) != 3 || !jl_is_expr(jl_exprarg(ex, 2))) {
+    if (jl_array_nrows(ex->args) != 3 || !jl_is_expr(jl_exprarg(ex, 2))) {
         jl_error("syntax: malformed module expression");
     }
 
@@ -134,7 +135,8 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         jl_type_error("module", (jl_value_t*)jl_symbol_type, (jl_value_t*)name);
     }
 
-    jl_module_t *newm = jl_new_module(name);
+    int is_parent__toplevel__ = jl_is__toplevel__mod(parent_module);
+    jl_module_t *newm = jl_new_module(name, is_parent__toplevel__ ? NULL : parent_module);
     jl_value_t *form = (jl_value_t*)newm;
     JL_GC_PUSH1(&form);
     JL_LOCK(&jl_modules_mutex);
@@ -145,7 +147,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
 
     // copy parent environment info into submodule
     newm->uuid = parent_module->uuid;
-    if (jl_is__toplevel__mod(parent_module)) {
+    if (is_parent__toplevel__) {
         newm->parent = newm;
         jl_register_root_module(newm);
         if (jl_options.incremental) {
@@ -153,9 +155,8 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         }
     }
     else {
-        newm->parent = parent_module;
-        jl_binding_t *b = jl_get_binding_wr(parent_module, name, 1);
-        jl_declare_constant(b);
+        jl_binding_t *b = jl_get_binding_wr(parent_module, name);
+        jl_declare_constant(b, parent_module, name);
         jl_value_t *old = NULL;
         if (!jl_atomic_cmpswap(&b->value, &old, (jl_value_t*)newm)) {
             if (!jl_is_module(old)) {
@@ -166,7 +167,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
             jl_printf(JL_STDERR, "WARNING: replacing module %s.\n", jl_symbol_name(name));
             old = jl_atomic_exchange(&b->value, (jl_value_t*)newm);
         }
-        jl_gc_wb_binding(b, newm);
+        jl_gc_wb(b, newm);
         if (old != NULL) {
             // create a hidden gc root for the old module
             JL_LOCK(&jl_modules_mutex);
@@ -184,18 +185,29 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     size_t last_age = ct->world_age;
 
     // add standard imports unless baremodule
+    jl_array_t *exprs = ((jl_expr_t*)jl_exprarg(ex, 2))->args;
+    int lineno = 0;
+    const char *filename = "none";
+    if (jl_array_nrows(exprs) > 0) {
+        jl_value_t *lineex = jl_array_ptr_ref(exprs, 0);
+        if (jl_is_linenode(lineex)) {
+            lineno = jl_linenode_line(lineex);
+            jl_value_t *file = jl_linenode_file(lineex);
+            if (jl_is_symbol(file))
+                filename = jl_symbol_name((jl_sym_t*)file);
+        }
+    }
     if (std_imports) {
         if (jl_base_module != NULL) {
             jl_add_standard_imports(newm);
         }
         // add `eval` function
-        form = jl_call_scm_on_ast("module-default-defs", (jl_value_t*)ex, newm);
+        form = jl_call_scm_on_ast_and_loc("module-default-defs", (jl_value_t*)name, newm, filename, lineno);
         jl_toplevel_eval_flex(newm, form, 0, 1);
         form = NULL;
     }
 
-    jl_array_t *exprs = ((jl_expr_t*)jl_exprarg(ex, 2))->args;
-    for (int i = 0; i < jl_array_len(exprs); i++) {
+    for (int i = 0; i < jl_array_nrows(exprs); i++) {
         // process toplevel form
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         form = jl_expand_stmt_with_loc(jl_array_ptr_ref(exprs, i), newm, jl_filename, jl_lineno);
@@ -208,17 +220,17 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
 #if 0
     // some optional post-processing steps
     size_t i;
-    void **table = newm->bindings.table;
-    for(i=1; i < newm->bindings.size; i+=2) {
-        if (table[i] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
+    jl_svec_t *table = jl_atomic_load_relaxed(&newm->bindings);
+    for (size_t i = 0; i < jl_svec_len(table); i++) {
+        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+        if ((void*)b != jl_nothing) {
             // remove non-exported macros
             if (jl_symbol_name(b->name)[0]=='@' &&
-                !b->exportp && b->owner == newm)
+                !b->exportp && b->owner == b)
                 b->value = NULL;
             // error for unassigned exports
             /*
-            if (b->exportp && b->owner==newm && b->value==NULL)
+            if (b->exportp && b->owner==b && b->value==NULL)
                 jl_errorf("identifier %s exported from %s is not initialized",
                           jl_symbol_name(b->name), jl_symbol_name(newm->name));
             */
@@ -242,7 +254,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     form = NULL;
     if (!jl_generating_output()) {
         if (!ptrhash_has(&jl_current_modules, (void*)newm->parent)) {
-            size_t i, l = jl_array_len(jl_module_init_order);
+            size_t i, l = jl_array_nrows(jl_module_init_order);
             size_t ns = 0;
             form = (jl_value_t*)jl_alloc_vec_any(0);
             for (i = 0; i < l; i++) {
@@ -261,7 +273,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     JL_UNLOCK(&jl_modules_mutex);
 
     if (form) {
-        size_t i, l = jl_array_len(form);
+        size_t i, l = jl_array_nrows(form);
         for (i = 0; i < l; i++) {
             jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(form, i);
             JL_GC_PROMISE_ROOTED(m);
@@ -299,7 +311,7 @@ static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f
 
 void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type) {
     // create uninitialized mutable binding for "global x" decl sometimes or probably
-    size_t i, l = jl_array_len(ex->args);
+    size_t i, l = jl_array_nrows(ex->args);
     for (i = 0; i < l; i++) {
         jl_value_t *arg = jl_exprarg(ex, i);
         jl_module_t *gm;
@@ -314,7 +326,7 @@ void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type) {
             gs = (jl_sym_t*)arg;
         }
         if (!jl_binding_resolved_p(gm, gs)) {
-            jl_binding_t *b = jl_get_binding_wr(gm, gs, 1);
+            jl_binding_t *b = jl_get_binding_wr(gm, gs);
             if (set_type) {
                 jl_value_t *old_ty = NULL;
                 // maybe set the type too, perhaps
@@ -340,7 +352,7 @@ JL_DLLEXPORT jl_module_t *jl_base_relative_to(jl_module_t *m)
     return jl_top_module;
 }
 
-static void expr_attributes(jl_value_t *v, int *has_intrinsics, int *has_defs, int *has_opaque)
+static void expr_attributes(jl_value_t *v, jl_array_t *body, int *has_ccall, int *has_defs, int *has_opaque)
 {
     if (!jl_is_expr(v))
         return;
@@ -364,11 +376,11 @@ static void expr_attributes(jl_value_t *v, int *has_intrinsics, int *has_defs, i
         *has_defs = 1;
     }
     else if (head == jl_cfunction_sym) {
-        *has_intrinsics = 1;
+        *has_ccall = 1;
         return;
     }
     else if (head == jl_foreigncall_sym) {
-        *has_intrinsics = 1;
+        *has_ccall = 1;
         return;
     }
     else if (head == jl_new_opaque_closure_sym) {
@@ -378,6 +390,9 @@ static void expr_attributes(jl_value_t *v, int *has_intrinsics, int *has_defs, i
     else if (head == jl_call_sym && jl_expr_nargs(e) > 0) {
         jl_value_t *called = NULL;
         jl_value_t *f = jl_exprarg(e, 0);
+        if (jl_is_ssavalue(f)) {
+            f = jl_array_ptr_ref(body, ((jl_ssavalue_t*)f)->id - 1);
+        }
         if (jl_is_globalref(f)) {
             jl_module_t *mod = jl_globalref_mod(f);
             jl_sym_t *name = jl_globalref_name(f);
@@ -393,7 +408,7 @@ static void expr_attributes(jl_value_t *v, int *has_intrinsics, int *has_defs, i
         }
         if (called != NULL) {
             if (jl_is_intrinsic(called) && jl_unbox_int32(called) == (int)llvmcall) {
-                *has_intrinsics = 1;
+                *has_ccall = 1;
             }
             if (called == jl_builtin__typebody) {
                 *has_defs = 1;
@@ -402,35 +417,35 @@ static void expr_attributes(jl_value_t *v, int *has_intrinsics, int *has_defs, i
         return;
     }
     int i;
-    for (i = 0; i < jl_array_len(e->args); i++) {
+    for (i = 0; i < jl_array_nrows(e->args); i++) {
         jl_value_t *a = jl_exprarg(e, i);
         if (jl_is_expr(a))
-            expr_attributes(a, has_intrinsics, has_defs, has_opaque);
+            expr_attributes(a, body, has_ccall, has_defs, has_opaque);
     }
 }
 
-int jl_code_requires_compiler(jl_code_info_t *src)
+int jl_code_requires_compiler(jl_code_info_t *src, int include_force_compile)
 {
     jl_array_t *body = src->code;
-    assert(jl_typeis(body, jl_array_any_type));
+    assert(jl_typetagis(body, jl_array_any_type));
     size_t i;
-    int has_intrinsics = 0, has_defs = 0, has_opaque = 0;
-    if (jl_has_meta(body, jl_force_compile_sym))
+    int has_ccall = 0, has_defs = 0, has_opaque = 0;
+    if (include_force_compile && jl_has_meta(body, jl_force_compile_sym))
         return 1;
-    for(i=0; i < jl_array_len(body); i++) {
+    for(i=0; i < jl_array_nrows(body); i++) {
         jl_value_t *stmt = jl_array_ptr_ref(body,i);
-        expr_attributes(stmt, &has_intrinsics, &has_defs, &has_opaque);
-        if (has_intrinsics)
+        expr_attributes(stmt, body, &has_ccall, &has_defs, &has_opaque);
+        if (has_ccall)
             return 1;
     }
     return 0;
 }
 
-static void body_attributes(jl_array_t *body, int *has_intrinsics, int *has_defs, int *has_loops, int *has_opaque, int *forced_compile)
+static void body_attributes(jl_array_t *body, int *has_ccall, int *has_defs, int *has_loops, int *has_opaque, int *forced_compile)
 {
     size_t i;
     *has_loops = 0;
-    for(i=0; i < jl_array_len(body); i++) {
+    for(i=0; i < jl_array_nrows(body); i++) {
         jl_value_t *stmt = jl_array_ptr_ref(body,i);
         if (!*has_loops) {
             if (jl_is_gotonode(stmt)) {
@@ -442,13 +457,16 @@ static void body_attributes(jl_array_t *body, int *has_intrinsics, int *has_defs
                     *has_loops = 1;
             }
         }
-        expr_attributes(stmt, has_intrinsics, has_defs, has_opaque);
+        expr_attributes(stmt, body, has_ccall, has_defs, has_opaque);
     }
     *forced_compile = jl_has_meta(body, jl_force_compile_sym);
 }
 
 static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_ROOTED
 {
+    JL_TIMING(LOAD_IMAGE, LOAD_Require);
+    jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "%s", jl_symbol_name(var));
+
     static jl_value_t *require_func = NULL;
     int build_mode = jl_generating_output();
     jl_module_t *m = NULL;
@@ -478,7 +496,7 @@ static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_RO
 static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PROPAGATES_ROOT,
                                      jl_array_t *args, jl_sym_t **name, const char *keyword) JL_GLOBALLY_ROOTED
 {
-    if (jl_array_len(args) == 0)
+    if (jl_array_nrows(args) == 0)
         jl_errorf("malformed \"%s\" statement", keyword);
     jl_sym_t *var = (jl_sym_t*)jl_array_ptr_ref(args, 0);
     size_t i = 1;
@@ -502,14 +520,14 @@ static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PR
         else {
             m = call_require(where, var);
         }
-        if (i == jl_array_len(args))
+        if (i == jl_array_nrows(args))
             return m;
     }
     else {
         // `.A.B.C`: strip off leading dots by following parent links
         m = where;
         while (1) {
-            if (i >= jl_array_len(args))
+            if (i >= jl_array_nrows(args))
                 jl_error("invalid module path");
             var = (jl_sym_t*)jl_array_ptr_ref(args, i);
             if (var != jl_dot_sym)
@@ -526,7 +544,7 @@ static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PR
             jl_type_error(keyword, (jl_value_t*)jl_symbol_type, (jl_value_t*)var);
         if (var == jl_dot_sym)
             jl_errorf("invalid %s path: \".\" in identifier path", keyword);
-        if (i == jl_array_len(args)-1)
+        if (i == jl_array_nrows(args)-1)
             break;
         m = (jl_module_t*)jl_eval_global_var(m, var);
         JL_GC_PROMISE_ROOTED(m);
@@ -545,6 +563,7 @@ int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT
          ((jl_expr_t*)e)->head == jl_import_sym ||
          ((jl_expr_t*)e)->head == jl_using_sym ||
          ((jl_expr_t*)e)->head == jl_export_sym ||
+         ((jl_expr_t*)e)->head == jl_public_sym ||
          ((jl_expr_t*)e)->head == jl_thunk_sym ||
          ((jl_expr_t*)e)->head == jl_global_sym ||
          ((jl_expr_t*)e)->head == jl_const_sym ||
@@ -560,12 +579,13 @@ int jl_needs_lowering(jl_value_t *e) JL_NOTSAFEPOINT
     jl_expr_t *ex = (jl_expr_t*)e;
     jl_sym_t *head = ex->head;
     if (head == jl_module_sym || head == jl_import_sym || head == jl_using_sym ||
-        head == jl_export_sym || head == jl_thunk_sym || head == jl_toplevel_sym ||
-        head == jl_error_sym || head == jl_incomplete_sym || head == jl_method_sym) {
+        head == jl_export_sym || head == jl_public_sym || head == jl_thunk_sym ||
+        head == jl_toplevel_sym || head == jl_error_sym || head == jl_incomplete_sym ||
+        head == jl_method_sym) {
         return 0;
     }
     if (head == jl_global_sym || head == jl_const_sym) {
-        size_t i, l = jl_array_len(ex->args);
+        size_t i, l = jl_array_nrows(ex->args);
         for (i = 0; i < l; i++) {
             jl_value_t *a = jl_exprarg(ex, i);
             if (!jl_is_symbol(a) && !jl_is_globalref(a))
@@ -579,7 +599,7 @@ int jl_needs_lowering(jl_value_t *e) JL_NOTSAFEPOINT
 static jl_method_instance_t *method_instance_for_thunk(jl_code_info_t *src, jl_module_t *module)
 {
     jl_method_instance_t *li = jl_new_method_instance_uninit();
-    li->uninferred = (jl_value_t*)src;
+    jl_atomic_store_relaxed(&li->uninferred, (jl_value_t*)src);
     li->specTypes = (jl_value_t*)jl_emptytuple_type;
     li->def.module = module;
     return li;
@@ -589,25 +609,22 @@ static void import_module(jl_module_t *JL_NONNULL m, jl_module_t *import, jl_sym
 {
     assert(m);
     jl_sym_t *name = asname ? asname : import->name;
-    jl_binding_t *b;
-    if (jl_binding_resolved_p(m, name)) {
-        b = jl_get_binding(m, name);
-        jl_value_t *bv = jl_atomic_load_relaxed(&b->value);
-        if ((!b->constp && b->owner != m) || (bv && bv != (jl_value_t*)import)) {
+    // TODO: this is a bit race-y with what error message we might print
+    jl_binding_t *b = jl_get_module_binding(m, name, 0);
+    jl_binding_t *b2;
+    if (b != NULL && (b2 = jl_atomic_load_relaxed(&b->owner)) != NULL) {
+        if (b2->constp && jl_atomic_load_relaxed(&b2->value) == (jl_value_t*)import)
+            return;
+        if (b2 != b)
             jl_errorf("importing %s into %s conflicts with an existing global",
                       jl_symbol_name(name), jl_symbol_name(m->name));
-        }
     }
     else {
-        b = jl_get_binding_wr(m, name, 1);
-        b->imported = 1;
-    }
-    if (!b->constp) {
-        // TODO: constp is not threadsafe
-        jl_atomic_store_release(&b->value, (jl_value_t*)import);
-        b->constp = 1;
-        jl_gc_wb(m, (jl_value_t*)import);
+        b = jl_get_binding_wr(m, name);
     }
+    jl_declare_constant(b, m, name);
+    jl_checked_assignment(b, m, name, (jl_value_t*)import);
+    b->imported = 1;
 }
 
 // in `import A.B: x, y, ...`, evaluate the `A.B` part if it exists
@@ -644,19 +661,28 @@ static void check_macro_rename(jl_sym_t *from, jl_sym_t *to, const char *keyword
         jl_errorf("cannot rename non-macro \"%s\" to macro \"%s\" in \"%s\"", n1, n2, keyword);
 }
 
-// Format msg and eval `throw(ErrorException(msg)))` in module `m`.
-// Used in `jl_toplevel_eval_flex` instead of `jl_errorf` so that the error
+// Eval `throw(ErrorException(msg)))` in module `m`.
+// Used in `jl_toplevel_eval_flex` instead of `jl_throw` so that the error
 // location in julia code gets into the backtrace.
-static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...)
+static void jl_eval_throw(jl_module_t *m, jl_value_t *exc)
 {
     jl_value_t *throw_ex = (jl_value_t*)jl_exprn(jl_call_sym, 2);
     JL_GC_PUSH1(&throw_ex);
     jl_exprargset(throw_ex, 0, jl_builtin_throw);
+    jl_exprargset(throw_ex, 1, exc);
+    jl_toplevel_eval_flex(m, throw_ex, 0, 0);
+    JL_GC_POP();
+}
+
+// Format error message and call jl_eval
+static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...)
+{
     va_list args;
     va_start(args, fmt);
-    jl_exprargset(throw_ex, 1, jl_vexceptionf(jl_errorexception_type, fmt, args));
+    jl_value_t *exc = jl_vexceptionf(jl_errorexception_type, fmt, args);
     va_end(args);
-    jl_toplevel_eval_flex(m, throw_ex, 0, 0);
+    JL_GC_PUSH1(&exc);
+    jl_eval_throw(m, exc);
     JL_GC_POP();
 }
 
@@ -677,7 +703,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
             char *n = jl_symbol_name((jl_sym_t*)e), *n0 = n;
             while (*n == '_') ++n;
             if (*n == 0 && n > n0)
-                jl_eval_errorf(m, "all-underscore identifier used as rvalue");
+                jl_eval_errorf(m, "all-underscore identifiers are write-only and their values cannot be used in expressions");
         }
         return jl_interpret_toplevel_expr_in(m, e, NULL, NULL);
     }
@@ -816,12 +842,14 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         JL_GC_POP();
         return jl_nothing;
     }
-    else if (head == jl_export_sym) {
-        for (size_t i = 0; i < jl_array_len(ex->args); i++) {
+    else if (head == jl_export_sym || head == jl_public_sym) {
+        int exp = (head == jl_export_sym);
+        for (size_t i = 0; i < jl_array_nrows(ex->args); i++) {
             jl_sym_t *name = (jl_sym_t*)jl_array_ptr_ref(ex->args, i);
             if (!jl_is_symbol(name))
-                jl_eval_errorf(m, "syntax: malformed \"export\" statement");
-            jl_module_export(m, name);
+                jl_eval_errorf(m, exp ? "syntax: malformed \"export\" statement" :
+                                        "syntax: malformed \"public\" statement");
+            jl_module_public(m, name, exp);
         }
         JL_GC_POP();
         return jl_nothing;
@@ -844,15 +872,15 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
             gm = m;
             gs = (jl_sym_t*)arg;
         }
-        jl_binding_t *b = jl_get_binding_wr(gm, gs, 1);
-        jl_declare_constant(b);
+        jl_binding_t *b = jl_get_binding_wr(gm, gs);
+        jl_declare_constant(b, gm, gs);
         JL_GC_POP();
         return jl_nothing;
     }
     else if (head == jl_toplevel_sym) {
         jl_value_t *res = jl_nothing;
         int i;
-        for (i = 0; i < jl_array_len(ex->args); i++) {
+        for (i = 0; i < jl_array_nrows(ex->args); i++) {
             res = jl_toplevel_eval_flex(m, jl_array_ptr_ref(ex->args, i), fast, 0);
         }
         JL_GC_POP();
@@ -863,7 +891,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
             jl_eval_errorf(m, "malformed \"%s\" expression", jl_symbol_name(head));
         if (jl_is_string(jl_exprarg(ex, 0)))
             jl_eval_errorf(m, "syntax: %s", jl_string_data(jl_exprarg(ex, 0)));
-        jl_throw(jl_exprarg(ex, 0));
+        jl_eval_throw(m, jl_exprarg(ex, 0));
     }
     else if (jl_is_symbol(ex)) {
         JL_GC_POP();
@@ -874,16 +902,17 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         return (jl_value_t*)ex;
     }
 
-    int has_intrinsics = 0, has_defs = 0, has_loops = 0, has_opaque = 0, forced_compile = 0;
+    int has_ccall = 0, has_defs = 0, has_loops = 0, has_opaque = 0, forced_compile = 0;
     assert(head == jl_thunk_sym);
     thk = (jl_code_info_t*)jl_exprarg(ex, 0);
-    assert(jl_is_code_info(thk));
-    assert(jl_typeis(thk->code, jl_array_any_type));
-    body_attributes((jl_array_t*)thk->code, &has_intrinsics, &has_defs, &has_loops, &has_opaque, &forced_compile);
+    if (!jl_is_code_info(thk) || !jl_typetagis(thk->code, jl_array_any_type)) {
+        jl_eval_errorf(m, "malformed \"thunk\" statement");
+    }
+    body_attributes((jl_array_t*)thk->code, &has_ccall, &has_defs, &has_loops, &has_opaque, &forced_compile);
 
     jl_value_t *result;
-    if (forced_compile || has_intrinsics ||
-            (!has_defs && fast && has_loops &&
+    if (has_ccall ||
+            ((forced_compile || (!has_defs && fast && has_loops)) &&
             jl_options.compile_enabled != JL_OPTIONS_COMPILE_OFF &&
             jl_options.compile_enabled != JL_OPTIONS_COMPILE_MIN &&
             jl_get_module_compile(m) != JL_OPTIONS_COMPILE_OFF &&
@@ -922,14 +951,16 @@ JL_DLLEXPORT jl_value_t *jl_toplevel_eval(jl_module_t *m, jl_value_t *v)
 }
 
 // Check module `m` is open for `eval/include`, or throw an error.
-static void jl_check_open_for(jl_module_t *m, const char* funcname)
+JL_DLLEXPORT void jl_check_top_level_effect(jl_module_t *m, char *fname)
 {
+    if (jl_current_task->ptls->in_pure_callback)
+        jl_errorf("%s cannot be used in a generated function", fname);
     if (jl_options.incremental && jl_generating_output()) {
         if (m != jl_main_module) { // TODO: this was grand-fathered in
             JL_LOCK(&jl_modules_mutex);
             int open = ptrhash_has(&jl_current_modules, (void*)m);
             if (!open && jl_module_init_order != NULL) {
-                size_t i, l = jl_array_len(jl_module_init_order);
+                size_t i, l = jl_array_nrows(jl_module_init_order);
                 for (i = 0; i < l; i++) {
                     if (m == (jl_module_t*)jl_array_ptr_ref(jl_module_init_order, i)) {
                         open = 1;
@@ -943,25 +974,15 @@ static void jl_check_open_for(jl_module_t *m, const char* funcname)
                 jl_errorf("Evaluation into the closed module `%s` breaks incremental compilation "
                           "because the side effects will not be permanent. "
                           "This is likely due to some other module mutating `%s` with `%s` during "
-                          "precompilation - don't do this.", name, name, funcname);
+                          "precompilation - don't do this.", name, name, fname);
             }
         }
     }
 }
 
-JL_DLLEXPORT void jl_check_top_level_effect(jl_module_t *m, char *fname)
-{
-    if (jl_current_task->ptls->in_pure_callback)
-        jl_errorf("%s cannot be used in a generated function", fname);
-    jl_check_open_for(m, fname);
-}
-
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval_in(jl_module_t *m, jl_value_t *ex)
 {
-    jl_task_t *ct = jl_current_task;
-    if (ct->ptls->in_pure_callback)
-        jl_error("eval cannot be used in a generated function");
-    jl_check_open_for(m, "eval");
+    jl_check_top_level_effect(m, "eval");
     jl_value_t *v = NULL;
     int last_lineno = jl_lineno;
     const char *last_filename = jl_filename;
@@ -1007,10 +1028,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
     if (!jl_is_string(text) || !jl_is_string(filename)) {
         jl_errorf("Expected `String`s for `text` and `filename`");
     }
-    jl_task_t *ct = jl_current_task;
-    if (ct->ptls->in_pure_callback)
-        jl_error("cannot use include inside a generated function");
-    jl_check_open_for(module, "include");
+    jl_check_top_level_effect(module, "include");
 
     jl_value_t *result = jl_nothing;
     jl_value_t *ast = NULL;
@@ -1023,6 +1041,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
         jl_errorf("jl_parse_all() must generate a top level expression");
     }
 
+    jl_task_t *ct = jl_current_task;
     int last_lineno = jl_lineno;
     const char *last_filename = jl_filename;
     size_t last_age = ct->world_age;
diff --git a/src/typemap.c b/src/typemap.c
index dfa8ac67f6abc..32fda9166f8f0 100644
--- a/src/typemap.c
+++ b/src/typemap.c
@@ -9,7 +9,7 @@
 #endif
 #include "julia_assert.h"
 
-#define MAX_METHLIST_COUNT 12 // this can strongly affect the sysimg size and speed!
+#define MAX_METHLIST_COUNT 6 // this helps configure the sysimg size and speed.
 
 #ifdef __cplusplus
 extern "C" {
@@ -23,7 +23,7 @@ static int jl_is_any(jl_value_t *t1)
     return t1 == (jl_value_t*)jl_any_type;
 }
 
-static jl_value_t *jl_type_extract_name(jl_value_t *t1 JL_PROPAGATES_ROOT)
+static jl_value_t *jl_type_extract_name(jl_value_t *t1 JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
     if (jl_is_unionall(t1))
         t1 = jl_unwrap_unionall(t1);
@@ -33,6 +33,9 @@ static jl_value_t *jl_type_extract_name(jl_value_t *t1 JL_PROPAGATES_ROOT)
     else if (jl_is_typevar(t1)) {
         return jl_type_extract_name(((jl_tvar_t*)t1)->ub);
     }
+    else if (t1 == jl_bottom_type || t1 == (jl_value_t*)jl_typeofbottom_type || t1 == (jl_value_t*)jl_typeofbottom_type->super) {
+        return (jl_value_t*)jl_typeofbottom_type->name; // put Union{} and typeof(Union{}) and Type{Union{}} together for convenience
+    }
     else if (jl_is_datatype(t1)) {
         jl_datatype_t *dt = (jl_datatype_t*)t1;
         if (!jl_is_kind(t1))
@@ -63,6 +66,9 @@ static int jl_type_extract_name_precise(jl_value_t *t1, int invariant)
     else if (jl_is_typevar(t1)) {
         return jl_type_extract_name_precise(((jl_tvar_t*)t1)->ub, 0);
     }
+    else if (t1 == jl_bottom_type || t1 == (jl_value_t*)jl_typeofbottom_type || t1 == (jl_value_t*)jl_typeofbottom_type->super) {
+        return 1;
+    }
     else if (jl_is_datatype(t1)) {
         jl_datatype_t *dt = (jl_datatype_t*)t1;
         if ((invariant || !dt->name->abstract) && !jl_is_kind(t1))
@@ -84,6 +90,18 @@ static int jl_type_extract_name_precise(jl_value_t *t1, int invariant)
     return 1;
 }
 
+// return whether Type{Union{}} is a subtype of Type{t1} (which may have free typevars)
+static int jl_parameter_includes_bottom(jl_value_t *t1)
+{
+    if (jl_is_typevar(t1) || t1 == jl_bottom_type)
+        return 1;
+    else if (jl_is_uniontype(t1)) {
+        jl_uniontype_t *u1 = (jl_uniontype_t*)t1;
+        return jl_parameter_includes_bottom(u1->a) && jl_parameter_includes_bottom(u1->b);
+    }
+    return 0;
+}
+
 
 // ----- Type Signature Subtype Testing ----- //
 
@@ -249,7 +267,7 @@ static inline int sig_match_simple(jl_value_t *arg1, jl_value_t **args, size_t n
 
 // predicate to fast-test if this type is a leaf type that can exist in the cache
 // and does not need a more expensive linear scan to find all intersections
-// be careful not to put non-leaf types or DataType/UnionAll/Union in the
+// we try not to put non-leaf types or DataType/UnionAll/Union in the
 // argument cache, since they should have a lower priority and so will go in some
 // later list
 static int is_cache_leaf(jl_value_t *ty, int tparam)
@@ -259,52 +277,61 @@ static int is_cache_leaf(jl_value_t *ty, int tparam)
     return (jl_is_concrete_type(ty) && (tparam || !jl_is_kind(ty)));
 }
 
-static _Atomic(jl_typemap_t*) *mtcache_hash_lookup_bp(jl_array_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
+static _Atomic(jl_value_t*) *mtcache_hash_lookup_bp(jl_genericmemory_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
 {
-    if (cache == (jl_array_t*)jl_an_empty_vec_any)
+    if (cache == (jl_genericmemory_t*)jl_an_empty_memory_any)
         return NULL;
-    _Atomic(jl_typemap_t*) *pml = jl_table_peek_bp(cache, ty);
+    _Atomic(jl_value_t*) *pml = jl_table_peek_bp(cache, ty);
     JL_GC_PROMISE_ROOTED(pml); // clang-sa doesn't trust our JL_PROPAGATES_ROOT claim
     return pml;
 }
 
-static void mtcache_hash_insert(_Atomic(jl_array_t*) *cache, jl_value_t *parent, jl_value_t *key, jl_typemap_t *val)
+static void mtcache_hash_insert(_Atomic(jl_genericmemory_t*) *cache, jl_value_t *parent, jl_value_t *key, jl_typemap_t *val)
 {
     int inserted = 0;
-    jl_array_t *a = jl_atomic_load_relaxed(cache);
-    if (a == (jl_array_t*)jl_an_empty_vec_any) {
-        a = jl_alloc_vec_any(16);
+    jl_genericmemory_t *a = jl_atomic_load_relaxed(cache);
+    if (a == (jl_genericmemory_t*)jl_an_empty_memory_any) {
+        a = jl_alloc_memory_any(16);
         jl_atomic_store_release(cache, a);
-        jl_gc_wb(parent, a);
+        if (parent)
+            jl_gc_wb(parent, a);
     }
     a = jl_eqtable_put(a, key, val, &inserted);
     assert(inserted);
     if (a != jl_atomic_load_relaxed(cache)) {
         jl_atomic_store_release(cache, a);
-        jl_gc_wb(parent, a);
+        if (parent)
+            jl_gc_wb(parent, a);
     }
 }
 
-static jl_typemap_t *mtcache_hash_lookup(jl_array_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
+static jl_typemap_t *mtcache_hash_lookup(jl_genericmemory_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
 {
-    if (cache == (jl_array_t*)jl_an_empty_vec_any)
+    if (cache == (jl_genericmemory_t*)jl_an_empty_memory_any)
         return (jl_typemap_t*)jl_nothing;
     jl_typemap_t *ml = (jl_typemap_t*)jl_eqtable_get(cache, ty, jl_nothing);
-    JL_GC_PROMISE_ROOTED(ml); // clang-sa doesn't trust our JL_PROPAGATES_ROOT claim
     return ml;
 }
 
 // ----- Sorted Type Signature Lookup Matching ----- //
 
-static int jl_typemap_array_visitor(jl_array_t *a, jl_typemap_visitor_fptr fptr, void *closure)
+static int jl_typemap_memory_visitor(jl_genericmemory_t *a, jl_typemap_visitor_fptr fptr, void *closure)
 {
-    size_t i, l = jl_array_len(a);
-    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_data(a);
+    size_t i, l = a->length;
+    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) a->ptr;
     for (i = 1; i < l; i += 2) {
         jl_value_t *d = jl_atomic_load_relaxed(&data[i]);
         JL_GC_PROMISE_ROOTED(d);
-        if (d && !jl_typemap_visitor(d, fptr, closure))
-            return 0;
+        if (d == NULL)
+            continue;
+        if (jl_is_genericmemory(d)) {
+            if (!jl_typemap_memory_visitor((jl_genericmemory_t*)d, fptr, closure))
+                return 0;
+        }
+        else {
+            if (!jl_typemap_visitor(d, fptr, closure))
+                return 0;
+        }
     }
     return 1;
 }
@@ -325,23 +352,23 @@ int jl_typemap_visitor(jl_typemap_t *cache, jl_typemap_visitor_fptr fptr, void *
 {
     if (jl_typeof(cache) == (jl_value_t*)jl_typemap_level_type) {
         jl_typemap_level_t *node = (jl_typemap_level_t*)cache;
-        jl_array_t *a;
+        jl_genericmemory_t *a;
         JL_GC_PUSH1(&a);
         a = jl_atomic_load_relaxed(&node->targ);
-        if (a != (jl_array_t*)jl_an_empty_vec_any)
-            if (!jl_typemap_array_visitor(a, fptr, closure))
+        if (a != (jl_genericmemory_t*)jl_an_empty_memory_any)
+            if (!jl_typemap_memory_visitor(a, fptr, closure))
                 goto exit;
         a = jl_atomic_load_relaxed(&node->arg1);
-        if (a != (jl_array_t*)jl_an_empty_vec_any)
-            if (!jl_typemap_array_visitor(a, fptr, closure))
+        if (a != (jl_genericmemory_t*)jl_an_empty_memory_any)
+            if (!jl_typemap_memory_visitor(a, fptr, closure))
                 goto exit;
         a = jl_atomic_load_relaxed(&node->tname);
-        if (a != (jl_array_t*)jl_an_empty_vec_any)
-            if (!jl_typemap_array_visitor(a, fptr, closure))
+        if (a != (jl_genericmemory_t*)jl_an_empty_memory_any)
+            if (!jl_typemap_memory_visitor(a, fptr, closure))
                 goto exit;
         a = jl_atomic_load_relaxed(&node->name1);
-        if (a != (jl_array_t*)jl_an_empty_vec_any)
-            if (!jl_typemap_array_visitor(a, fptr, closure))
+        if (a != (jl_genericmemory_t*)jl_an_empty_memory_any)
+            if (!jl_typemap_memory_visitor(a, fptr, closure))
                 goto exit;
         if (!jl_typemap_node_visitor(jl_atomic_load_relaxed(&node->linear), fptr, closure))
             goto exit;
@@ -349,17 +376,16 @@ int jl_typemap_visitor(jl_typemap_t *cache, jl_typemap_visitor_fptr fptr, void *
             goto exit;
         JL_GC_POP();
         return 1;
+exit:
+        JL_GC_POP();
+        return 0;
     }
     else {
         return jl_typemap_node_visitor((jl_typemap_entry_t*)cache, fptr, closure);
     }
-
-exit:
-    JL_GC_POP();
-    return 0;
 }
 
-static unsigned jl_supertype_height(jl_datatype_t *dt)
+static unsigned jl_supertype_height(jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
     unsigned height = 1;
     while (dt != jl_any_type) {
@@ -370,8 +396,10 @@ static unsigned jl_supertype_height(jl_datatype_t *dt)
 }
 
 // return true if a and b might intersect in the type domain (over just their type-names)
-static int tname_intersection(jl_datatype_t *a, jl_typename_t *bname, unsigned ha)
+static int tname_intersection_dt(jl_datatype_t *a, jl_typename_t *bname, unsigned ha) JL_NOTSAFEPOINT
 {
+    if (a == jl_any_type)
+        return 1;
     jl_datatype_t *b = (jl_datatype_t*)jl_unwrap_unionall(bname->wrapper);
     unsigned hb = 1;
     while (b != jl_any_type) {
@@ -387,15 +415,70 @@ static int tname_intersection(jl_datatype_t *a, jl_typename_t *bname, unsigned h
     return a->name == bname;
 }
 
-// tparam bit 1 is ::Type{T} (vs. T)
-// tparam bit 2 is typename(T) (vs. T)
-static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty, int tparam,
-                                                 int offs, struct typemap_intersection_env *closure)
+static int tname_intersection(jl_value_t *a, jl_typename_t *bname, int8_t tparam) JL_NOTSAFEPOINT
+{
+    if (a == (jl_value_t*)jl_any_type)
+        return 1;
+    a = jl_unwrap_unionall(a);
+    assert(!jl_is_vararg(a));
+    if (jl_is_uniontype(a))
+        return tname_intersection(((jl_uniontype_t*)a)->a, bname, tparam) ||
+               tname_intersection(((jl_uniontype_t*)a)->b, bname, tparam);
+    if (jl_is_typevar(a))
+        return tname_intersection(((jl_tvar_t*)a)->ub, bname, tparam);
+    if (jl_is_datatype(a)) {
+        if (tparam) {
+            if (!jl_is_type_type(a))
+                return 0;
+            a = jl_unwrap_unionall(jl_tparam0(a));
+            if (!jl_is_datatype(a))
+                return tname_intersection(a, bname, 0);
+        }
+        return tname_intersection_dt((jl_datatype_t*)a, bname, jl_supertype_height((jl_datatype_t*)a));
+    }
+    return 0;
+}
+
+static int concrete_intersects(jl_value_t *t, jl_value_t *ty, int8_t tparam)
+{
+    if (ty == (jl_value_t*)jl_any_type) // easy case: Any always matches
+        return 1;
+    if (tparam & 1)
+        return jl_isa(t, ty); // (Type{t} <: ty), where is_leaf_type(t) => isa(t, ty)
+    else
+        return t == ty || jl_subtype(t, ty);
+}
+
+// tparam bit 0 is ::Type{T} (vs. T)
+// tparam bit 1 is typename(T) (vs. T)
+static int jl_typemap_intersection_memory_visitor(jl_genericmemory_t *a, jl_value_t *ty, int8_t tparam,
+                                                 int8_t offs, struct typemap_intersection_env *closure)
 {
     JL_GC_PUSH1(&a);
-    size_t i, l = jl_array_len(a);
-    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_data(a);
-    unsigned height = tparam & 2 ? jl_supertype_height((jl_datatype_t*)ty) : 0;
+    size_t i, l = a->length;
+    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) a->ptr;
+    unsigned height = 0;
+    jl_datatype_t *tydt = jl_any_type;
+    if (tparam & 2) {
+        // try to extract a description of ty for intersections, but since we
+        jl_value_t *ttype = jl_unwrap_unionall(ty);
+        if (tparam & 1)
+            // extract T from Type{T} (if possible)
+            ttype = jl_is_type_type(ttype) ? jl_tparam0(ttype) : NULL;
+        if (ttype && jl_is_datatype(ttype)) {
+            tydt = (jl_datatype_t*)ttype;
+        }
+        else if (ttype) {
+            ttype = jl_type_extract_name(ttype);
+            tydt = ttype ? (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)ttype)->wrapper) : NULL;
+        }
+        if (tydt == jl_any_type)
+            ty = (jl_value_t*)jl_any_type;
+        else if (tydt == NULL)
+            tydt = jl_any_type;
+        else
+            height = jl_supertype_height(tydt);
+    }
     for (i = 0; i < l; i += 2) {
         jl_value_t *t = jl_atomic_load_relaxed(&data[i]);
         JL_GC_PROMISE_ROOTED(t);
@@ -404,18 +487,24 @@ static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty,
         if (tparam & 2) {
             jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i + 1]);
             JL_GC_PROMISE_ROOTED(ml);
-            if (ty == (jl_value_t*)jl_any_type || // easy case: Any always matches
-                tname_intersection((jl_datatype_t*)ty, (jl_typename_t*)t, height)) {
-                if (!jl_typemap_intersection_visitor(ml, offs + 1, closure))
-                    goto exit;
+            if (tydt == jl_any_type ?
+                    tname_intersection(ty, (jl_typename_t*)t, tparam & 1) :
+                    tname_intersection_dt(tydt, (jl_typename_t*)t, height)) {
+                if ((tparam & 1) && t == (jl_value_t*)jl_typeofbottom_type->name) // skip Type{Union{}} and Type{typeof(Union{})}, since the caller should have already handled those
+                    continue;
+                if (jl_is_genericmemory(ml)) {
+                    if (!jl_typemap_intersection_memory_visitor((jl_genericmemory_t*)ml, ty, tparam & ~2, offs, closure))
+                        goto exit;
+                }
+                else {
+                    if (!jl_typemap_intersection_visitor(ml, offs + 1, closure))
+                        goto exit;
+                }
             }
         }
         else {
-            // `t` is a leaftype, so intersection test becomes subtype
-            if (ty == (jl_value_t*)jl_any_type || // easy case: Any always matches
-                (tparam & 1
-                 ? (jl_typeof(t) == ty || jl_isa(t, ty)) // (Type{t} <: ty), where is_leaf_type(t) => isa(t, ty)
-                 : (t == ty || jl_subtype(t, ty)))) {
+            // `t` is a leaftype, so intersection test becomes subtype (after excluding kinds)
+            if (concrete_intersects(t, ty, tparam)) {
                 jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i + 1]);
                 JL_GC_PROMISE_ROOTED(ml);
                 // NOTE: ml might be NULL if we're racing with the thread that's inserting the item
@@ -432,6 +521,7 @@ static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty,
     return 0;
 }
 
+
 // calls fptr on each jl_typemap_entry_t in cache in sort order
 // for which type ∩ ml->type != Union{}, until fptr return false
 static int jl_typemap_intersection_node_visitor(jl_typemap_entry_t *ml, struct typemap_intersection_env *closure)
@@ -440,38 +530,64 @@ static int jl_typemap_intersection_node_visitor(jl_typemap_entry_t *ml, struct t
     // mark this `register` because (for branch prediction)
     // that can be absolutely critical for speed
     register jl_typemap_intersection_visitor_fptr fptr = closure->fptr;
-    while (ml != (void*)jl_nothing) {
-        if (closure->type == (jl_value_t*)ml->sig) {
-            // fast-path for the intersection of a type with itself
-            if (closure->env)
-                closure->env = jl_outer_unionall_vars((jl_value_t*)ml->sig);
-            closure->ti = closure->type;
-            closure->issubty = 1;
-            if (!fptr(ml, closure))
-                return 0;
+    for (;  ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
+        if (closure->max_valid < ml->min_world)
+            continue;
+        if (closure->min_valid > ml->max_world)
+            continue;
+        jl_svec_t **penv = NULL;
+        if (closure->env) {
+            closure->env = jl_emptysvec;
+            penv = &closure->env;
         }
-        else {
-            jl_svec_t **penv = NULL;
-            if (closure->env) {
-                closure->env = jl_emptysvec;
-                penv = &closure->env;
-            }
-            closure->ti = jl_type_intersection_env_s(closure->type, (jl_value_t*)ml->sig, penv, &closure->issubty);
-            if (closure->ti != (jl_value_t*)jl_bottom_type) {
-                // In some corner cases type intersection is conservative and returns something
-                // for intersect(A, B) even though A is a dispatch tuple and !(A <: B).
-                // For dispatch purposes in such a case we know there's no match. This check
-                // fixes issue #30394.
-                if (closure->issubty || !jl_is_dispatch_tupletype(closure->type))
-                    if (!fptr(ml, closure))
-                        return 0;
-            }
+        closure->ti = jl_type_intersection_env_s(closure->type, (jl_value_t*)ml->sig, penv, &closure->issubty);
+        if (closure->ti != (jl_value_t*)jl_bottom_type) {
+            // In some corner cases type intersection is conservative and returns something
+            // for intersect(A, B) even though A is a dispatch tuple and !(A <: B).
+            // For dispatch purposes in such a case we know there's no match. This check
+            // fixes issue #30394.
+            if (closure->issubty || !jl_is_dispatch_tupletype(closure->type))
+                if (!fptr(ml, closure))
+                    return 0;
         }
-        ml = jl_atomic_load_relaxed(&ml->next);
     }
     return 1;
 }
 
+int jl_has_intersect_type_not_kind(jl_value_t *t);
+int jl_has_intersect_kind_not_type(jl_value_t *t);
+
+// if TypeVar tv is used covariantly, it cannot be Union{}
+int has_covariant_var(jl_datatype_t *ttypes, jl_tvar_t *tv)
+{
+    size_t i, l = jl_nparams(ttypes);
+    for (i = 0; i < l; i++)
+        if (jl_tparam(ttypes, i) == (jl_value_t*)tv)
+            return 1;
+    return 0;
+}
+
+void typemap_slurp_search(jl_typemap_entry_t *ml, struct typemap_intersection_env *closure)
+{
+    // n.b. we could consider mt->max_args here too, so this optimization
+    //      usually works even if the user forgets the `slurp...` argument, but
+    //      there is discussion that parameter may be going away? (and it is
+    //      already not accurately up-to-date for all tables currently anyways)
+    if (closure->search_slurp && ml->va) {
+        jl_value_t *sig = jl_unwrap_unionall((jl_value_t*)ml->sig);
+        size_t nargs = jl_nparams(sig);
+        if (nargs > 1 && nargs - 1 == closure->search_slurp) {
+            jl_vararg_t *va = (jl_vararg_t*)jl_tparam(sig, nargs - 1);
+            assert(jl_is_vararg((jl_value_t*)va));
+            if (va->T == (jl_value_t*)jl_any_type && va->N == NULL) {
+                // instruct typemap it can set exclude_typeofbottom on parameter nargs
+                // since we found the necessary slurp argument
+                closure->search_slurp = 0;
+            }
+        }
+    }
+}
+
 int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
                                     struct typemap_intersection_env *closure)
 {
@@ -480,13 +596,12 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
     //TODO: fast-path for leaf-type tuples?
     //if (ttypes->isdispatchtuple) {
     //    register jl_typemap_intersection_visitor_fptr fptr = closure->fptr;
-    //        struct jl_typemap_assoc search = {(jl_value_t*)closure->type, world, closure->env, 0, ~(size_t)0};
-    //        jl_typemap_entry_t *ml = jl_typemap_assoc_by_type(map, search, offs, /*subtype*/1);
-    //        if (ml) {
-    //            closure->env = search->env;
-    //            if (!fptr(ml, closure))
-    //                return 0;
-    //        }
+    //    struct jl_typemap_assoc search = {(jl_value_t*)closure->type, world, closure->env, 0, ~(size_t)0};
+    //    jl_typemap_entry_t *ml = jl_typemap_assoc_by_type(map, search, offs, /*subtype*/1);
+    //    if (ml) {
+    //        closure->env = search->env;
+    //        if (!fptr(ml, closure))
+    //            return 0;
     //    }
     //    return 1;
     //}
@@ -508,115 +623,186 @@ int jl_typemap_intersection_visitor(jl_typemap_t *map, int offs,
         if (ty) {
             while (jl_is_typevar(ty))
                 ty = ((jl_tvar_t*)ty)->ub;
-            jl_value_t *typetype = jl_unwrap_unionall(ty);
-            typetype = jl_is_type_type(typetype) ? jl_tparam0(typetype) : NULL;
             // approxify the tparam until we have a valid type
-            if (jl_has_free_typevars(ty)) {
-                ty = jl_unwrap_unionall(ty);
-                if (jl_is_datatype(ty))
-                    ty = ((jl_datatype_t*)ty)->name->wrapper;
-                else
-                    ty = (jl_value_t*)jl_any_type;
+            if (jl_has_free_typevars(ty))
+                ty = jl_rewrap_unionall(ty, closure->type);
+            JL_GC_PUSH1(&ty);
+            jl_genericmemory_t *targ = jl_atomic_load_relaxed(&cache->targ);
+            jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname);
+            int maybe_type = 0;
+            int maybe_kind = 0;
+            int exclude_typeofbottom = 0;
+            jl_value_t *typetype = NULL;
+            jl_value_t *name = NULL;
+            // pre-check: optimized pre-intersection test to see if `ty` could intersect with any Type or Kind
+            if (targ != (jl_genericmemory_t*)jl_an_empty_memory_any || tname != (jl_genericmemory_t*)jl_an_empty_memory_any) {
+                maybe_kind = jl_has_intersect_kind_not_type(ty);
+                maybe_type = maybe_kind || jl_has_intersect_type_not_kind(ty);
+                if (maybe_type && !maybe_kind) {
+                    typetype = jl_unwrap_unionall(ty);
+                    typetype = jl_is_type_type(typetype) ? jl_tparam0(typetype) : NULL;
+                    name = typetype ? jl_type_extract_name(typetype) : NULL;
+                    if (!typetype)
+                        exclude_typeofbottom = !jl_subtype((jl_value_t*)jl_typeofbottom_type, ty);
+                    else if (jl_is_typevar(typetype))
+                        exclude_typeofbottom = has_covariant_var((jl_datatype_t*)ttypes, (jl_tvar_t*)typetype);
+                    else
+                        exclude_typeofbottom = !jl_parameter_includes_bottom(typetype);
+                }
             }
-            jl_array_t *targ = jl_atomic_load_relaxed(&cache->targ);
-            if (targ != (jl_array_t*)jl_an_empty_vec_any) {
-                if (typetype && !jl_has_free_typevars(typetype)) {
-                    if (is_cache_leaf(typetype, 1)) {
-                        // direct lookup of leaf types
-                        jl_typemap_t *ml = mtcache_hash_lookup(targ, typetype);
-                        if (ml != jl_nothing) {
-                            if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) return 0;
+            // First check for intersections with methods defined on Type{T}, where T was a concrete type
+            if (targ != (jl_genericmemory_t*)jl_an_empty_memory_any && maybe_type &&
+                    (!typetype || jl_has_free_typevars(typetype) || is_cache_leaf(typetype, 1))) { // otherwise cannot contain this particular kind, so don't bother with checking
+                if (!exclude_typeofbottom) {
+                    // detect Type{Union{}}, Type{Type{Union{}}}, and Type{typeof(Union{}} and do those early here
+                    // otherwise the possibility of encountering `Type{Union{}}` in this intersection may
+                    // be forcing us to do some extra work here whenever we see a typevar, even though
+                    // the likelihood of that value actually occurring is frequently likely to be
+                    // zero (or result in an ambiguous match)
+                    targ = jl_atomic_load_relaxed(&cache->targ); // may be GC'd during type-intersection
+                    jl_value_t *ml = mtcache_hash_lookup(targ, (jl_value_t*)jl_typeofbottom_type->name);
+                    if (ml != jl_nothing) {
+                        size_t search_slurp = closure->search_slurp;
+                        closure->search_slurp = offs + 1;
+                        if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) {
+                            closure->search_slurp = search_slurp;
+                            JL_GC_POP();
+                            return 0;
                         }
+                        if (closure->search_slurp == 0)
+                            exclude_typeofbottom = 1;
+                        closure->search_slurp = search_slurp;
                     }
                 }
-                else {
-                    // else an array scan is required to check subtypes
-                    // first, fast-path: optimized pre-intersection test to see if `ty` could intersect with any Type
-                    if (typetype || !jl_has_empty_intersection((jl_value_t*)jl_type_type, ty)) {
-                        targ = jl_atomic_load_relaxed(&cache->targ); // may be GC'd during type-intersection
-                        if (!jl_typemap_intersection_array_visitor(targ, ty, 1, offs, closure)) return 0;
+                if (name != (jl_value_t*)jl_typeofbottom_type->name) {
+                    targ = jl_atomic_load_relaxed(&cache->targ); // may be GC'd earlier
+                    if (exclude_typeofbottom && name && jl_type_extract_name_precise(typetype, 1)) {
+                        // attempt semi-direct lookup of types via their names
+                        // consider the type name first
+                        jl_value_t *ml = mtcache_hash_lookup(targ, (jl_value_t*)name);
+                        if (jl_is_genericmemory(ml)) {
+                            if (typetype && !jl_has_free_typevars(typetype)) {
+                                // direct lookup of leaf types
+                                if (is_cache_leaf(typetype, 1)) {
+                                    ml = mtcache_hash_lookup((jl_genericmemory_t*)ml, typetype);
+                                    if (ml != jl_nothing) {
+                                        if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) { JL_GC_POP(); return 0; }
+                                    }
+                                }
+                            }
+                            else {
+                                if (!jl_typemap_intersection_memory_visitor((jl_genericmemory_t*)ml, ty, 1, offs, closure)) { JL_GC_POP(); return 0; }
+                            }
+                        }
+                        else if (ml != jl_nothing) {
+                            if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) { JL_GC_POP(); return 0; }
+                        }
+                    }
+                    else {
+                        // else a scan is required to consider all the possible subtypes
+                        if (!jl_typemap_intersection_memory_visitor(targ, exclude_typeofbottom && !maybe_kind ? ty : (jl_value_t*)jl_any_type, 3, offs, closure)) { JL_GC_POP(); return 0; }
                     }
                 }
             }
-            jl_array_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
-            if (cachearg1 != (jl_array_t*)jl_an_empty_vec_any) {
+            jl_genericmemory_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
+            if (cachearg1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                 if (is_cache_leaf(ty, 0)) {
+                    jl_typename_t *name = ty == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)ty)->name;
                     // direct lookup of leaf types
-                    jl_typemap_t *ml = mtcache_hash_lookup(cachearg1, ty);
+                    jl_value_t *ml = mtcache_hash_lookup(cachearg1, (jl_value_t*)name);
+                    if (jl_is_genericmemory(ml))
+                        ml = mtcache_hash_lookup((jl_genericmemory_t*)ml, ty);
                     if (ml != jl_nothing) {
-                        if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) return 0;
+                        if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) { JL_GC_POP(); return 0; }
                     }
                 }
                 else {
-                    // else an array scan is required to check subtypes
-                    if (!jl_typemap_intersection_array_visitor(cachearg1, ty, 0, offs, closure)) return 0;
+                    jl_value_t *name = jl_type_extract_name(ty);
+                    if (name && jl_type_extract_name_precise(ty, 0)) {
+                        // direct lookup of leaf types
+                        jl_value_t *ml = mtcache_hash_lookup(cachearg1, name);
+                        if (jl_is_genericmemory(ml)) {
+                            if (!jl_typemap_intersection_memory_visitor((jl_genericmemory_t*)ml, ty, 0, offs, closure)) { JL_GC_POP(); return 0; }
+                        }
+                        else {
+                            if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) { JL_GC_POP(); return 0; }
+                        }
+                    }
+                    else {
+                        // else a scan is required to check subtypes
+                        if (!jl_typemap_intersection_memory_visitor(cachearg1, ty, 2, offs, closure)) { JL_GC_POP(); return 0; }
+                    }
                 }
             }
-            jl_array_t *tname = jl_atomic_load_relaxed(&cache->tname);
-            if (tname != (jl_array_t*)jl_an_empty_vec_any) {
-                jl_value_t *name = typetype ? jl_type_extract_name(typetype) : NULL;
-                if (name && !jl_is_typevar(typetype)) {
-                    // semi-direct lookup of types
-                    // TODO: the possibility of encountering `Type{Union{}}` in this intersection may
+            // Next check for intersections with methods defined on Type{T}, where T was not concrete (it might even have been a TypeVar), but had an extractable TypeName
+            if (tname != (jl_genericmemory_t*)jl_an_empty_memory_any && maybe_type) {
+                if (!exclude_typeofbottom || (!typetype && jl_isa((jl_value_t*)jl_typeofbottom_type, ty))) {
+                    // detect Type{Union{}}, Type{Type{Union{}}}, and Type{typeof(Union{}} and do those early here
+                    // otherwise the possibility of encountering `Type{Union{}}` in this intersection may
                     // be forcing us to do some extra work here whenever we see a typevar, even though
-                    // the likelyhood of that value actually occurring is frequently likely to be
+                    // the likelihood of that value actually occurring is frequently likely to be
                     // zero (or result in an ambiguous match)
-                    jl_datatype_t *super = (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)name)->wrapper);
-                    if (jl_type_extract_name_precise(typetype, 1)) {
-                        // just consider the type and its direct super types
-                        while (1) {
-                            tname = jl_atomic_load_relaxed(&cache->tname); // reload after callback
-                            jl_typemap_t *ml = mtcache_hash_lookup(tname, (jl_value_t*)super->name);
-                            if (ml != jl_nothing) {
-                                if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) return 0;
-                            }
-                            if (super == jl_any_type)
-                                break;
-                            super = super->super;
+                    tname = jl_atomic_load_relaxed(&cache->tname);  // may be GC'd earlier
+                    jl_value_t *ml = mtcache_hash_lookup(tname, (jl_value_t*)jl_typeofbottom_type->name);
+                    if (ml != jl_nothing) {
+                        size_t search_slurp = closure->search_slurp;
+                        closure->search_slurp = offs + 1;
+                        if (!jl_typemap_intersection_visitor((jl_typemap_t*)ml, offs+1, closure)) {
+                            closure->search_slurp = search_slurp;
+                            JL_GC_POP();
+                            return 0;
                         }
+                        if (closure->search_slurp == 0)
+                            exclude_typeofbottom = 1;
+                        closure->search_slurp = search_slurp;
                     }
-                    else {
-                        // consider all of the possible subtypes
-                        if (!jl_typemap_intersection_array_visitor(tname, (jl_value_t*)super, 3, offs, closure)) return 0;
+                }
+                if (exclude_typeofbottom && name && jl_type_extract_name_precise(typetype, 1)) {
+                    // semi-direct lookup of types
+                    // just consider the type and its direct super types
+                    jl_datatype_t *super = (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)name)->wrapper);
+                    if (super->name == jl_typeofbottom_type->name)
+                        super = super->super; // this was handled above
+                    while (1) {
+                        tname = jl_atomic_load_relaxed(&cache->tname); // reload after callback
+                        jl_typemap_t *ml = mtcache_hash_lookup(tname, (jl_value_t*)super->name);
+                        if (ml != jl_nothing) {
+                            if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) { JL_GC_POP(); return 0; }
+                        }
+                        if (super == jl_any_type)
+                            break;
+                        super = super->super;
                     }
                 }
                 else {
-                    // else an array scan is required to check subtypes
-                    // first, fast-path: optimized pre-intersection test to see if `ty` could intersect with any Type
-                    if (name || !jl_has_empty_intersection((jl_value_t*)jl_type_type, ty)) {
-                        tname = jl_atomic_load_relaxed(&cache->tname);  // may be GC'd during type-intersection
-                        if (!jl_typemap_intersection_array_visitor(tname, (jl_value_t*)jl_any_type, 3, offs, closure)) return 0;
-                    }
+                    // else a scan is required to check subtypes of typetype too
+                    tname = jl_atomic_load_relaxed(&cache->tname);  // may be GC'd earlier
+                    if (!jl_typemap_intersection_memory_visitor(tname, exclude_typeofbottom && !maybe_kind ? ty : (jl_value_t*)jl_any_type, 3, offs, closure)) { JL_GC_POP(); return 0; }
                 }
             }
-            jl_array_t *name1 = jl_atomic_load_relaxed(&cache->name1);
-            if (name1 != (jl_array_t*)jl_an_empty_vec_any) {
+            jl_genericmemory_t *name1 = jl_atomic_load_relaxed(&cache->name1);
+            if (name1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                 jl_value_t *name = jl_type_extract_name(ty);
-                if (name) {
+                if (name && jl_type_extract_name_precise(ty, 0)) {
                     jl_datatype_t *super = (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)name)->wrapper);
-                    if (jl_type_extract_name_precise(ty, 0)) {
-                        // direct lookup of concrete types
-                        while (1) {
-                            name1 = jl_atomic_load_relaxed(&cache->name1); // reload after callback
-                            jl_typemap_t *ml = mtcache_hash_lookup(name1, (jl_value_t*)super->name);
-                            if (ml != jl_nothing) {
-                                if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) return 0;
-                            }
-                            if (super == jl_any_type)
-                                break;
-                            super = super->super;
+                    // direct lookup of concrete types
+                    while (1) {
+                        name1 = jl_atomic_load_relaxed(&cache->name1); // reload after callback
+                        jl_typemap_t *ml = mtcache_hash_lookup(name1, (jl_value_t*)super->name);
+                        if (ml != jl_nothing) {
+                            if (!jl_typemap_intersection_visitor(ml, offs+1, closure)) { JL_GC_POP(); return 0; }
                         }
-                    }
-                    else {
-                        // consider all of the possible subtypes too
-                        if (!jl_typemap_intersection_array_visitor(name1, (jl_value_t*)super, 2, offs, closure)) return 0;
+                        if (super == jl_any_type)
+                            break;
+                        super = super->super;
                     }
                 }
                 else {
-                    // else an array scan is required to check subtypes
-                    if (!jl_typemap_intersection_array_visitor(name1, (jl_value_t*)jl_any_type, 2, offs, closure)) return 0;
+                    // else a scan is required to check subtypes
+                    if (!jl_typemap_intersection_memory_visitor(name1, ty, 2, offs, closure)) { JL_GC_POP(); return 0; }
                 }
             }
+            JL_GC_POP();
         }
         if (!jl_typemap_intersection_node_visitor(jl_atomic_load_relaxed(&cache->linear), closure))
             return 0;
@@ -650,6 +836,10 @@ static jl_typemap_entry_t *jl_typemap_entry_assoc_by_type(
     size_t n = jl_nparams(unw);
     int typesisva = n == 0 ? 0 : jl_is_vararg(jl_tparam(unw, n-1));
     for (; ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
+        if (search->max_valid < ml->min_world)
+            continue;
+        if (search->min_valid > ml->max_world)
+            continue;
         size_t lensig = jl_nparams(jl_unwrap_unionall((jl_value_t*)ml->sig));
         if (lensig == n || (ml->va && lensig <= n+1)) {
             int resetenv = 0, ismatch = 1;
@@ -799,11 +989,14 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
             if (jl_is_type_type(ty)) {
                 jl_value_t *a0 = jl_tparam0(ty);
                 if (is_cache_leaf(a0, 1)) {
-                    jl_array_t *targ = jl_atomic_load_relaxed(&cache->targ);
-                    if (targ != (jl_array_t*)jl_an_empty_vec_any) {
-                        jl_typemap_t *ml = mtcache_hash_lookup(targ, a0);
+                    jl_genericmemory_t *targ = jl_atomic_load_relaxed(&cache->targ);
+                    if (targ != (jl_genericmemory_t*)jl_an_empty_memory_any) {
+                        jl_typename_t *name = a0 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)a0)->name;
+                        jl_value_t *ml = mtcache_hash_lookup(targ, (jl_value_t*)name);
+                        if (jl_is_genericmemory(ml))
+                            ml = mtcache_hash_lookup((jl_genericmemory_t*)ml, a0);
                         if (ml != jl_nothing) {
-                            jl_typemap_entry_t *li = jl_typemap_assoc_by_type(ml, search, offs + 1, subtype);
+                            jl_typemap_entry_t *li = jl_typemap_assoc_by_type((jl_typemap_t*)ml, search, offs + 1, subtype);
                             if (li) return li;
                         }
                     }
@@ -811,11 +1004,14 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 }
             }
             if (is_cache_leaf(ty, 0)) {
-                jl_array_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
-                if (cachearg1 != (jl_array_t*)jl_an_empty_vec_any) {
-                    jl_typemap_t *ml = mtcache_hash_lookup(cachearg1, ty);
+                jl_genericmemory_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
+                if (cachearg1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
+                    jl_typename_t *name = ty == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)ty)->name;
+                    jl_value_t *ml = mtcache_hash_lookup(cachearg1, (jl_value_t*)name);
+                    if (jl_is_genericmemory(ml))
+                        ml = mtcache_hash_lookup((jl_genericmemory_t*)ml, ty);
                     if (ml != jl_nothing) {
-                        jl_typemap_entry_t *li = jl_typemap_assoc_by_type(ml, search, offs + 1, subtype);
+                        jl_typemap_entry_t *li = jl_typemap_assoc_by_type((jl_typemap_t*)ml, search, offs + 1, subtype);
                         if (li) return li;
                     }
                 }
@@ -824,8 +1020,8 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
         }
         if (ty || subtype) {
             // now look at the optimized TypeName caches
-            jl_array_t *tname = jl_atomic_load_relaxed(&cache->tname);
-            if (tname != (jl_array_t*)jl_an_empty_vec_any) {
+            jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname);
+            if (tname != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                 jl_value_t *a0 = ty && jl_is_type_type(ty) ? jl_type_extract_name(jl_tparam0(ty)) : NULL;
                 if (a0) { // TODO: if we start analyzing Union types in jl_type_extract_name, then a0 might be over-approximated here, leading us to miss possible subtypes
                     jl_datatype_t *super = (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)a0)->wrapper);
@@ -843,9 +1039,10 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 }
                 else {
                     if (!ty || !jl_has_empty_intersection((jl_value_t*)jl_type_type, ty)) {
+                        jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname); // reload after type-intersect
                         // couldn't figure out unique `a0` initial point, so scan all for matches
-                        size_t i, l = jl_array_len(tname);
-                        _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(tname);
+                        size_t i, l = tname->length;
+                        _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) jl_genericmemory_ptr_data(tname);
                         JL_GC_PUSH1(&tname);
                         for (i = 1; i < l; i += 2) {
                             jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i]);
@@ -861,8 +1058,8 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                     }
                 }
             }
-            jl_array_t *name1 = jl_atomic_load_relaxed(&cache->name1);
-            if (name1 != (jl_array_t*)jl_an_empty_vec_any) {
+            jl_genericmemory_t *name1 = jl_atomic_load_relaxed(&cache->name1);
+            if (name1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
                 if (ty) {
                     jl_value_t *a0 = jl_type_extract_name(ty);
                     if (a0) { // TODO: if we start analyzing Union types in jl_type_extract_name, then a0 might be over-approximated here, leading us to miss possible subtypes
@@ -883,8 +1080,8 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 }
                 else {
                     // doing subtype, but couldn't figure out unique `ty`, so scan all for supertypes
-                    size_t i, l = jl_array_len(name1);
-                    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(name1);
+                    size_t i, l = name1->length;
+                    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) jl_genericmemory_ptr_data(name1);
                     JL_GC_PUSH1(&name1);
                     for (i = 1; i < l; i += 2) {
                         jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i]);
@@ -1002,20 +1199,26 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
         jl_value_t *a1 = (offs == 0 ? arg1 : args[offs - 1]);
         jl_value_t *ty = jl_typeof(a1);
         assert(jl_is_datatype(ty));
-        jl_array_t *targ = jl_atomic_load_relaxed(&cache->targ);
-        if (ty == (jl_value_t*)jl_datatype_type && targ != (jl_array_t*)jl_an_empty_vec_any && is_cache_leaf(a1, 1)) {
-            jl_typemap_t *ml_or_cache = mtcache_hash_lookup(targ, a1);
+        jl_genericmemory_t *targ = jl_atomic_load_relaxed(&cache->targ);
+        if (targ != (jl_genericmemory_t*)jl_an_empty_memory_any && is_cache_leaf(a1, 1)) {
+            jl_typename_t *name = a1 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)a1)->name;
+            jl_value_t *ml_or_cache = mtcache_hash_lookup(targ, (jl_value_t*)name);
+            if (jl_is_genericmemory(ml_or_cache))
+                ml_or_cache = mtcache_hash_lookup((jl_genericmemory_t*)ml_or_cache, a1);
             jl_typemap_entry_t *ml = jl_typemap_assoc_exact(ml_or_cache, arg1, args, n, offs+1, world);
             if (ml) return ml;
         }
-        jl_array_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
-        if (cachearg1 != (jl_array_t*)jl_an_empty_vec_any && is_cache_leaf(ty, 0)) {
-            jl_typemap_t *ml_or_cache = mtcache_hash_lookup(cachearg1, ty);
-            jl_typemap_entry_t *ml = jl_typemap_assoc_exact(ml_or_cache, arg1, args, n, offs+1, world);
+        jl_genericmemory_t *cachearg1 = jl_atomic_load_relaxed(&cache->arg1);
+        if (cachearg1 != (jl_genericmemory_t*)jl_an_empty_memory_any && is_cache_leaf(ty, 0)) {
+            jl_typename_t *name = ty == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)ty)->name;
+            jl_value_t *ml_or_cache = mtcache_hash_lookup(cachearg1, (jl_value_t*)name);
+            if (jl_is_genericmemory(ml_or_cache))
+                ml_or_cache = mtcache_hash_lookup((jl_genericmemory_t*)ml_or_cache, ty);
+            jl_typemap_entry_t *ml = jl_typemap_assoc_exact((jl_typemap_t*)ml_or_cache, arg1, args, n, offs+1, world);
             if (ml) return ml;
         }
-        jl_array_t *tname = jl_atomic_load_relaxed(&cache->tname);
-        if (jl_is_kind(ty) && tname != (jl_array_t*)jl_an_empty_vec_any) {
+        jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname);
+        if (jl_is_kind(ty) && tname != (jl_genericmemory_t*)jl_an_empty_memory_any) {
             jl_value_t *name = jl_type_extract_name(a1);
             if (name) {
                 if (ty != (jl_value_t*)jl_datatype_type)
@@ -1033,8 +1236,8 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
             }
             else {
                 // couldn't figure out unique `name` initial point, so must scan all for matches
-                size_t i, l = jl_array_len(tname);
-                _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(tname);
+                size_t i, l = tname->length;
+                _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) jl_genericmemory_ptr_data(tname);
                 JL_GC_PUSH1(&tname);
                 for (i = 1; i < l; i += 2) {
                     jl_typemap_t *ml_or_cache = jl_atomic_load_relaxed(&data[i]);
@@ -1049,8 +1252,8 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
                 JL_GC_POP();
             }
         }
-        jl_array_t *name1 = jl_atomic_load_relaxed(&cache->name1);
-        if (name1 != (jl_array_t*)jl_an_empty_vec_any) {
+        jl_genericmemory_t *name1 = jl_atomic_load_relaxed(&cache->name1);
+        if (name1 != (jl_genericmemory_t*)jl_an_empty_memory_any) {
             while (1) {
                 name1 = jl_atomic_load_relaxed(&cache->name1); // reload after tree descent (which may hit safepoints)
                 jl_typemap_t *ml_or_cache = mtcache_hash_lookup(
@@ -1095,19 +1298,23 @@ static jl_typemap_level_t *jl_new_typemap_level(void)
     jl_typemap_level_t *cache =
         (jl_typemap_level_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typemap_level_t),
                                          jl_typemap_level_type);
-    jl_atomic_store_relaxed(&cache->arg1, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&cache->targ, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&cache->name1, (jl_array_t*)jl_an_empty_vec_any);
-    jl_atomic_store_relaxed(&cache->tname, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&cache->arg1, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_atomic_store_relaxed(&cache->targ, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_atomic_store_relaxed(&cache->name1, (jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_atomic_store_relaxed(&cache->tname, (jl_genericmemory_t*)jl_an_empty_memory_any);
     jl_atomic_store_relaxed(&cache->linear, (jl_typemap_entry_t*)jl_nothing);
     jl_atomic_store_relaxed(&cache->any, jl_nothing);
     return cache;
 }
 
-static jl_typemap_level_t *jl_method_convert_list_to_cache(
-        jl_typemap_t *map, jl_typemap_entry_t *ml, int8_t offs)
+static void jl_typemap_memory_insert_(
+        jl_typemap_t *map, _Atomic(jl_genericmemory_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
+        jl_value_t *parent, int8_t tparam, int8_t offs, jl_value_t *doublesplit);
+
+static jl_value_t *jl_method_convert_list_to_cache(
+        jl_typemap_t *map, jl_typemap_entry_t *ml, int8_t tparam, int8_t offs, int8_t doublesplit)
 {
-    jl_typemap_level_t *cache = jl_new_typemap_level();
+    jl_value_t *cache = doublesplit ? jl_an_empty_memory_any : (jl_value_t*)jl_new_typemap_level();
     jl_typemap_entry_t *next = NULL;
     JL_GC_PUSH3(&cache, &next, &ml);
     while (ml != (void*)jl_nothing) {
@@ -1115,7 +1322,25 @@ static jl_typemap_level_t *jl_method_convert_list_to_cache(
         jl_atomic_store_relaxed(&ml->next, (jl_typemap_entry_t*)jl_nothing);
         // n.b. this is being done concurrently with lookups!
         // TODO: is it safe to be doing this concurrently with lookups?
-        jl_typemap_level_insert_(map, cache, ml, offs);
+        if (doublesplit) {
+            jl_value_t *key = jl_unwrap_unionall((jl_value_t*)ml->sig);
+            size_t len = jl_nparams(key);
+            if (offs < len-1)
+                key = jl_tparam(key, offs);
+            else
+                key = jl_tparam(key, len-1);
+            if (jl_is_vararg(key))
+                key = jl_unwrap_vararg(key);
+            if (key == (jl_value_t*)jl_typeofbottom_type)
+                key = (jl_value_t*)jl_assume(jl_typeofbottom_type)->super;
+            if (tparam) {
+                assert(jl_is_type_type(key));
+                key = jl_tparam0(key);
+            }
+            jl_typemap_memory_insert_(map, (_Atomic(jl_genericmemory_t*)*)&cache, key, ml, NULL, 0, offs, NULL);
+        }
+        else
+            jl_typemap_level_insert_(map, (jl_typemap_level_t*)cache, ml, offs);
         ml = next;
     }
     JL_GC_POP();
@@ -1141,23 +1366,33 @@ static void jl_typemap_list_insert_(
     jl_gc_wb(parent, newrec);
 }
 
+// n.b. tparam value only needed if doublesplit is set (for jl_method_convert_list_to_cache)
 static void jl_typemap_insert_generic(
-        jl_typemap_t *map, _Atomic(jl_typemap_t*) *pml, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, int8_t offs)
+        jl_typemap_t *map, _Atomic(jl_value_t*) *pml, jl_value_t *parent,
+        jl_typemap_entry_t *newrec, int8_t tparam, int8_t offs, jl_value_t *doublesplit)
 {
-    jl_typemap_t *ml = jl_atomic_load_relaxed(pml);
+    jl_value_t *ml = jl_atomic_load_relaxed(pml);
+    if (jl_is_genericmemory(ml)) {
+        assert(doublesplit);
+        jl_typemap_memory_insert_(map, (_Atomic(jl_genericmemory_t*)*)pml, doublesplit, newrec, parent, 0, offs, NULL);
+        return;
+    }
     if (jl_typeof(ml) == (jl_value_t*)jl_typemap_level_type) {
+        assert(!doublesplit);
         jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
         return;
     }
 
     unsigned count = jl_typemap_list_count_locked((jl_typemap_entry_t*)ml);
     if (count > MAX_METHLIST_COUNT) {
-        ml = (jl_typemap_t*)jl_method_convert_list_to_cache(
-            map, (jl_typemap_entry_t*)ml, offs);
+        ml = jl_method_convert_list_to_cache(
+            map, (jl_typemap_entry_t*)ml, tparam, offs, doublesplit != NULL);
         jl_atomic_store_release(pml, ml);
         jl_gc_wb(parent, ml);
-        jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
+        if (doublesplit)
+            jl_typemap_memory_insert_(map, (_Atomic(jl_genericmemory_t*)*)pml, doublesplit, newrec, parent, 0, offs, NULL);
+        else
+            jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
         return;
     }
 
@@ -1165,16 +1400,16 @@ static void jl_typemap_insert_generic(
         parent, newrec);
 }
 
-static void jl_typemap_array_insert_(
-        jl_typemap_t *map, _Atomic(jl_array_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
-        jl_value_t *parent, int8_t offs)
+static void jl_typemap_memory_insert_(
+        jl_typemap_t *map, _Atomic(jl_genericmemory_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
+        jl_value_t *parent, int8_t tparam, int8_t offs, jl_value_t *doublesplit)
 {
-    jl_array_t *cache = jl_atomic_load_relaxed(pcache);
-    _Atomic(jl_typemap_t*) *pml = mtcache_hash_lookup_bp(cache, key);
-    if (pml != NULL)
-        jl_typemap_insert_generic(map, pml, (jl_value_t*)cache, newrec, offs+1);
-    else
+    jl_genericmemory_t *cache = jl_atomic_load_relaxed(pcache);
+    _Atomic(jl_value_t*) *pml = mtcache_hash_lookup_bp(cache, key);
+    if (pml == NULL)
         mtcache_hash_insert(pcache, parent, key, (jl_typemap_t*)newrec);
+    else
+        jl_typemap_insert_generic(map, pml, (jl_value_t*) cache, newrec, tparam, offs + (doublesplit ? 0 : 1), doublesplit);
 }
 
 static void jl_typemap_level_insert_(
@@ -1205,7 +1440,7 @@ static void jl_typemap_level_insert_(
         t1 = (jl_value_t*)jl_assume(jl_typeofbottom_type)->super;
     // If the type at `offs` is Any, put it in the Any list
     if (t1 && jl_is_any(t1)) {
-        jl_typemap_insert_generic(map, &cache->any, (jl_value_t*)cache, newrec, offs+1);
+        jl_typemap_insert_generic(map, &cache->any, (jl_value_t*)cache, newrec, 0, offs+1, NULL);
         return;
     }
     // Don't put Varargs in the optimized caches (too hard to handle in lookup and bp)
@@ -1216,12 +1451,14 @@ static void jl_typemap_level_insert_(
             // and we use the table indexed for that purpose.
             jl_value_t *a0 = jl_tparam0(t1);
             if (is_cache_leaf(a0, 1)) {
-                jl_typemap_array_insert_(map, &cache->targ, a0, newrec, (jl_value_t*)cache, offs);
+                jl_typename_t *name = a0 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)a0)->name;
+                jl_typemap_memory_insert_(map, &cache->targ, (jl_value_t*)name, newrec, (jl_value_t*)cache, 1, offs, jl_is_datatype(name->wrapper) ? NULL : a0);
                 return;
             }
         }
         if (is_cache_leaf(t1, 0)) {
-            jl_typemap_array_insert_(map, &cache->arg1, t1, newrec, (jl_value_t*)cache, offs);
+            jl_typename_t *name = t1 == jl_bottom_type ? jl_typeofbottom_type->name : ((jl_datatype_t*)t1)->name;
+            jl_typemap_memory_insert_(map, &cache->arg1, (jl_value_t*)name, newrec, (jl_value_t*)cache, 0, offs, jl_is_datatype(name->wrapper) ? NULL : t1);
             return;
         }
 
@@ -1231,12 +1468,12 @@ static void jl_typemap_level_insert_(
         if (jl_is_type_type(t1)) {
             a0 = jl_type_extract_name(jl_tparam0(t1));
             jl_datatype_t *super = a0 ? (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)a0)->wrapper) : jl_any_type;
-            jl_typemap_array_insert_(map, &cache->tname, (jl_value_t*)super->name, newrec, (jl_value_t*)cache, offs);
+            jl_typemap_memory_insert_(map, &cache->tname, (jl_value_t*)super->name, newrec, (jl_value_t*)cache, 1, offs, NULL);
             return;
         }
         a0 = jl_type_extract_name(t1);
         if (a0 && a0 != (jl_value_t*)jl_any_type->name) {
-            jl_typemap_array_insert_(map, &cache->name1, a0, newrec, (jl_value_t*)cache, offs);
+            jl_typemap_memory_insert_(map, &cache->name1, a0, newrec, (jl_value_t*)cache, 0, offs, NULL);
             return;
         }
     }
@@ -1292,7 +1529,7 @@ void jl_typemap_insert(_Atomic(jl_typemap_t *) *pcache, jl_value_t *parent,
         jl_typemap_entry_t *newrec, int8_t offs)
 {
     jl_typemap_t *cache = jl_atomic_load_relaxed(pcache);
-    jl_typemap_insert_generic(cache, pcache, parent, newrec, offs);
+    jl_typemap_insert_generic(cache, pcache, parent, newrec, 0, offs, NULL);
 }
 
 #ifdef __cplusplus
diff --git a/src/work-stealing-queue.h b/src/work-stealing-queue.h
new file mode 100644
index 0000000000000..38429e02886e9
--- /dev/null
+++ b/src/work-stealing-queue.h
@@ -0,0 +1,102 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef WORK_STEALING_QUEUE_H
+#define WORK_STEALING_QUEUE_H
+
+#include "julia_atomics.h"
+#include "assert.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =======
+// Chase and Lev's work-stealing queue, optimized for
+// weak memory models by Le et al.
+//
+// * Chase D., Lev Y. Dynamic Circular Work-Stealing queue
+// * Le N. M. et al. Correct and Efficient Work-Stealing for
+//   Weak Memory Models
+// =======
+
+typedef struct {
+    char *buffer;
+    int32_t capacity;
+    int32_t mask;
+} ws_array_t;
+
+static inline ws_array_t *create_ws_array(size_t capacity, int32_t eltsz) JL_NOTSAFEPOINT
+{
+    ws_array_t *a = (ws_array_t *)malloc_s(sizeof(ws_array_t));
+    a->buffer = (char *)malloc_s(capacity * eltsz);
+    a->capacity = capacity;
+    a->mask = capacity - 1;
+    return a;
+}
+
+typedef struct {
+    _Atomic(int64_t) top;
+    _Atomic(int64_t) bottom;
+    _Atomic(ws_array_t *) array;
+} ws_queue_t;
+
+static inline ws_array_t *ws_queue_push(ws_queue_t *q, void *elt, int32_t eltsz) JL_NOTSAFEPOINT
+{
+    int64_t b = jl_atomic_load_relaxed(&q->bottom);
+    int64_t t = jl_atomic_load_acquire(&q->top);
+    ws_array_t *ary = jl_atomic_load_relaxed(&q->array);
+    ws_array_t *old_ary = NULL;
+    if (__unlikely(b - t > ary->capacity - 1)) {
+        ws_array_t *new_ary = create_ws_array(2 * ary->capacity, eltsz);
+        for (int i = 0; i < ary->capacity; i++) {
+            memcpy(new_ary->buffer + ((t + i) & new_ary->mask) * eltsz, ary->buffer + ((t + i) & ary->mask) * eltsz, eltsz);
+        }
+        jl_atomic_store_release(&q->array, new_ary);
+        old_ary = ary;
+        ary = new_ary;
+    }
+    memcpy(ary->buffer + (b & ary->mask) * eltsz, elt, eltsz);
+    jl_fence_release();
+    jl_atomic_store_relaxed(&q->bottom, b + 1);
+    return old_ary;
+}
+
+static inline void ws_queue_pop(ws_queue_t *q, void *dest, int32_t eltsz) JL_NOTSAFEPOINT
+{
+    int64_t b = jl_atomic_load_relaxed(&q->bottom) - 1;
+    ws_array_t *ary = jl_atomic_load_relaxed(&q->array);
+    jl_atomic_store_relaxed(&q->bottom, b);
+    jl_fence();
+    int64_t t = jl_atomic_load_relaxed(&q->top);
+    if (__likely(t <= b)) {
+        memcpy(dest, ary->buffer + (b & ary->mask) * eltsz, eltsz);
+        if (t == b) {
+            if (!jl_atomic_cmpswap(&q->top, &t, t + 1))
+                memset(dest, 0, eltsz);
+            jl_atomic_store_relaxed(&q->bottom, b + 1);
+        }
+    }
+    else {
+        memset(dest, 0, eltsz);
+        jl_atomic_store_relaxed(&q->bottom, b + 1);
+    }
+}
+
+static inline void ws_queue_steal_from(ws_queue_t *q, void *dest, int32_t eltsz) JL_NOTSAFEPOINT
+{
+    int64_t t = jl_atomic_load_acquire(&q->top);
+    jl_fence();
+    int64_t b = jl_atomic_load_acquire(&q->bottom);
+    if (t < b) {
+        ws_array_t *ary = jl_atomic_load_relaxed(&q->array);
+        memcpy(dest, ary->buffer + (t & ary->mask) * eltsz, eltsz);
+        if (!jl_atomic_cmpswap(&q->top, &t, t + 1))
+            memset(dest, 0, eltsz);
+    }
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/stdlib/.gitignore b/stdlib/.gitignore
index 038b2d9602b2a..ce744aa43d9f5 100644
--- a/stdlib/.gitignore
+++ b/stdlib/.gitignore
@@ -1,12 +1,12 @@
 /srccache
+/DelimitedFiles-*
+/DelimitedFiles
 /Pkg-*
 /Pkg
 /Statistics-*
 /Statistics
 /LibCURL-*
 /LibCURL
-/DelimitedFiles-*
-/DelimitedFiles
 /Downloads-*
 /Downloads
 /ArgTools-*
@@ -21,5 +21,12 @@
 /SparseArrays
 /SHA-*
 /SHA
+/LazyArtifacts-*
+/LazyArtifacts
+/Distributed-*
+/Distributed
+/StyledStrings-*
+/StyledStrings
 /*_jll/StdlibArtifacts.toml
 /*/Manifest.toml
+/*.image
diff --git a/stdlib/ArgTools.version b/stdlib/ArgTools.version
index 0ae273bb18db6..ad2febe81e46e 100644
--- a/stdlib/ArgTools.version
+++ b/stdlib/ArgTools.version
@@ -1,4 +1,4 @@
 ARGTOOLS_BRANCH = master
-ARGTOOLS_SHA1 = 08b11b2707593d4d7f92e5f1b9dba7668285ff82
+ARGTOOLS_SHA1 = 4eccde45ddc27e4f7fc9094b2861c684e062adb2
 ARGTOOLS_GIT_URL := https://github.com/JuliaIO/ArgTools.jl.git
 ARGTOOLS_TAR_URL = https://api.github.com/repos/JuliaIO/ArgTools.jl/tarball/$1
diff --git a/stdlib/Artifacts/Project.toml b/stdlib/Artifacts/Project.toml
index 7251b79cea8c1..c4e5cc031375c 100644
--- a/stdlib/Artifacts/Project.toml
+++ b/stdlib/Artifacts/Project.toml
@@ -1,5 +1,6 @@
 name = "Artifacts"
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl
index cdd8ca2fb2da5..348d0d8990f58 100644
--- a/stdlib/Artifacts/src/Artifacts.jl
+++ b/stdlib/Artifacts/src/Artifacts.jl
@@ -18,7 +18,7 @@ function parse_toml(path::String)
     Base.parsed_toml(path)
 end
 
-# keep in sync with Base.project_names and Base.manifest_names
+# keep in sync with Base.project_names
 const artifact_names = ("JuliaArtifacts.toml", "Artifacts.toml")
 
 const ARTIFACTS_DIR_OVERRIDE = Ref{Union{String,Nothing}}(nothing)
@@ -52,16 +52,33 @@ function artifacts_dirs(args...)
         return String[abspath(depot, "artifacts", args...) for depot in Base.DEPOT_PATH]
     else
         # If we've been given an override, use _only_ that directory.
-        return String[abspath(ARTIFACTS_DIR_OVERRIDE[], args...)]
+        return String[abspath(ARTIFACTS_DIR_OVERRIDE[]::String, args...)]
     end
 end
 
+# Recursive function, let's not make this a closure because it then has to
+# be boxed.
+function parse_mapping(mapping::String, name::String, override_file::String)
+    if !isabspath(mapping) && !isempty(mapping)
+        mapping = tryparse(Base.SHA1, mapping)
+        if mapping === nothing
+            @error("Invalid override in '$(override_file)': entry '$(name)' must map to an absolute path or SHA1 hash!")
+        end
+    end
+    return mapping
+end
+function parse_mapping(mapping::Dict, name::String, override_file::String)
+    return Dict(k => parse_mapping(v, name, override_file) for (k, v) in mapping)
+end
+# Fallthrough for invalid Overrides.toml files
+parse_mapping(mapping, name::String, override_file::String) = nothing
+
 """
     ARTIFACT_OVERRIDES
 
-Artifact locations can be overridden by writing `Override.toml` files within the artifact
+Artifact locations can be overridden by writing `Overrides.toml` files within the artifact
 directories of Pkg depots.  For example, in the default depot `~/.julia`, one may create
-a `~/.julia/artifacts/Override.toml` file with the following contents:
+a `~/.julia/artifacts/Overrides.toml` file with the following contents:
 
     78f35e74ff113f02274ce60dab6e92b4546ef806 = "/path/to/replacement"
     c76f8cda85f83a06d17de6c57aabf9e294eb2537 = "fb886e813a4aed4147d5979fcdf27457d20aa35d"
@@ -88,7 +105,7 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
     #
     # Overrides per UUID/bound name are intercepted upon Artifacts.toml load, and new
     # entries within the "hash" overrides are generated on-the-fly.  Thus, all redirects
-    # mechanisticly happen through the "hash" overrides.
+    # mechanistically happen through the "hash" overrides.
     overrides = Dict{Symbol,Any}(
         # Overrides by UUID
         :UUID => Dict{Base.UUID,Dict{String,Union{String,SHA1}}}(),
@@ -103,24 +120,9 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
         # Load the toml file
         depot_override_dict = parse_toml(override_file)
 
-        function parse_mapping(mapping::String, name::String)
-            if !isabspath(mapping) && !isempty(mapping)
-                mapping = tryparse(Base.SHA1, mapping)
-                if mapping === nothing
-                    @error("Invalid override in '$(override_file)': entry '$(name)' must map to an absolute path or SHA1 hash!")
-                end
-            end
-            return mapping
-        end
-        function parse_mapping(mapping::Dict, name::String)
-            return Dict(k => parse_mapping(v, name) for (k, v) in mapping)
-        end
-        # Fallthrough for invalid Overrides.toml files
-        parse_mapping(mapping, name::String) = nothing
-
         for (k, mapping) in depot_override_dict
             # First, parse the mapping. Is it an absolute path, a valid SHA1-hash, or neither?
-            mapping = parse_mapping(mapping, k)
+            mapping = parse_mapping(mapping, k, override_file)
             if mapping === nothing
                 @error("Invalid override in '$(override_file)': failed to parse entry `$(k)`")
                 continue
@@ -242,7 +244,7 @@ end
 """
     artifact_exists(hash::SHA1; honor_overrides::Bool=true)
 
-Returns whether or not the given artifact (identified by its sha1 git tree hash) exists
+Return whether or not the given artifact (identified by its sha1 git tree hash) exists
 on-disk.  Note that it is possible that the given artifact exists in multiple locations
 (e.g. within multiple depots).
 
@@ -267,7 +269,7 @@ function unpack_platform(entry::Dict{String,Any}, name::String,
     end
 
     if !haskey(entry, "arch")
-        @error("Invalid artifacts file at '$(artifacts_toml)': platform-specific artifact entrty '$name' missing 'arch' key")
+        @error("Invalid artifacts file at '$(artifacts_toml)': platform-specific artifact entry '$name' missing 'arch' key")
         return nothing
     end
 
@@ -313,7 +315,7 @@ end
 """
     process_overrides(artifact_dict::Dict, pkg_uuid::Base.UUID)
 
-When loading an `Artifacts.toml` file, we must check `Override.toml` files to see if any
+When loading an `Artifacts.toml` file, we must check `Overrides.toml` files to see if any
 of the artifacts within it have been overridden by UUID.  If they have, we honor the
 overrides by inspecting the hashes of the targeted artifacts, then overriding them to
 point to the given override, punting the actual redirection off to the hash-based
@@ -325,7 +327,7 @@ function process_overrides(artifact_dict::Dict, pkg_uuid::Base.UUID)
     # override for this UUID, and inserting new overrides for those hashes.
     overrides = load_overrides()
     if haskey(overrides[:UUID], pkg_uuid)
-        pkg_overrides = overrides[:UUID][pkg_uuid]
+        pkg_overrides = overrides[:UUID][pkg_uuid]::Dict{String, <:Any}
 
         for name in keys(artifact_dict)
             # Skip names that we're not overriding
@@ -455,7 +457,7 @@ end
                                   include_lazy = false,
                                   pkg_uuid = nothing)
 
-Returns a dictionary where every entry is an artifact from the given `Artifacts.toml`
+Return a dictionary where every entry is an artifact from the given `Artifacts.toml`
 that should be downloaded for the requested platform.  Lazy artifacts are included if
 `include_lazy` is set.
 """
@@ -546,7 +548,7 @@ function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dic
             if nameof(lazyartifacts) in (:Pkg, :Artifacts)
                 Base.depwarn("using Pkg instead of using LazyArtifacts is deprecated", :var"@artifact_str", force=true)
             end
-            return jointail(lazyartifacts.ensure_artifact_installed(string(name), artifacts_toml; platform), path_tail)
+            return jointail(lazyartifacts.ensure_artifact_installed(string(name), meta, artifacts_toml; platform), path_tail)
         end
         error("Artifact $(repr(name)) is a lazy artifact; package developers must call `using LazyArtifacts` in $(__module__) before using lazy artifacts.")
     end
@@ -560,7 +562,8 @@ function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dic
     suggestion_str = if query_override(hash) !== nothing
         "Check that your `Overrides.toml` file is correct (https://pkgdocs.julialang.org/v1/artifacts/#Overriding-artifact-locations)."
     else
-        "Try `using Pkg; Pkg.instantiate()` to re-install all missing resources."
+        "Try `using Pkg; Pkg.instantiate()` to re-install all missing resources if the artifact is part of a package \
+         or call `Pkg.ensure_artifact_installed` (https://pkgdocs.julialang.org/v1/api/#Pkg.Artifacts.ensure_artifact_installed) if not."
     end
 
     error("Artifact $(repr(name)) was not found by looking in the $(path_str)$suggestion_str")
@@ -569,7 +572,7 @@ end
 raw"""
     split_artifact_slash(name::String)
 
-Splits an artifact indexing string by path deliminters, isolates the first path element,
+Splits an artifact indexing string by path delimiters, isolates the first path element,
 returning that and the `joinpath()` of the remaining arguments.  This normalizes all path
 separators to the native path separator for the current platform.  Examples:
 
@@ -610,7 +613,7 @@ end
     artifact_slash_lookup(name::String, atifact_dict::Dict,
                           artifacts_toml::String, platform::Platform)
 
-Returns `artifact_name`, `artifact_path_tail`, and `hash` by looking the results up in
+Return `artifact_name`, `artifact_path_tail`, and `hash` by looking the results up in
 the given `artifacts_toml`, first extracting the name and path tail from the given `name`
 to support slash-indexing within the given artifact.
 """
@@ -711,7 +714,7 @@ end
 with_artifacts_directory(f::Function, artifacts_dir::AbstractString) =
     with_artifacts_directory(f, String(artifacts_dir)::String)
 query_override(pkg::Base.UUID, artifact_name::AbstractString; overrides::Dict=load_overrides()) =
-    query_override(pkg, String(artifact_name)::String; overrides=convert(Dict{Symbol, Any}(overrides)))
+    query_override(pkg, String(artifact_name)::String; overrides=convert(Dict{Symbol, Any}, overrides))
 unpack_platform(entry::Dict, name::AbstractString, artifacts_toml::AbstractString) =
     unpack_platform(convert(Dict{String, Any}, entry), String(name)::String, String(artifacts_toml)::String)
 load_artifacts_toml(artifacts_toml::AbstractString; kwargs...) =
@@ -737,5 +740,8 @@ artifact_slash_lookup(name::AbstractString, artifact_dict::Dict, artifacts_toml:
 precompile(load_artifacts_toml, (String,))
 precompile(NamedTuple{(:pkg_uuid,)}, (Tuple{Base.UUID},))
 precompile(Core.kwfunc(load_artifacts_toml), (NamedTuple{(:pkg_uuid,), Tuple{Base.UUID}}, typeof(load_artifacts_toml), String))
+precompile(parse_mapping, (String, String, String))
+precompile(parse_mapping, (Dict{String, Any}, String, String))
+
 
 end # module Artifacts
diff --git a/stdlib/Artifacts/test/runtests.jl b/stdlib/Artifacts/test/runtests.jl
index 67117217be549..db0d5d4c53ab1 100644
--- a/stdlib/Artifacts/test/runtests.jl
+++ b/stdlib/Artifacts/test/runtests.jl
@@ -1,12 +1,92 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+import Base: SHA1
 
 using Artifacts, Test, Base.BinaryPlatforms
-using Artifacts: with_artifacts_directory, pack_platform!, unpack_platform
+using Artifacts: with_artifacts_directory, pack_platform!, unpack_platform, load_overrides
+using TOML
 
 # prepare for the package tests by ensuring the required artifacts are downloaded now
 artifacts_dir = mktempdir()
 run(addenv(`$(Base.julia_cmd()) --color=no $(joinpath(@__DIR__, "refresh_artifacts.jl")) $(artifacts_dir)`, "TERM"=>"dumb"))
 
+@testset "Load Overrides" begin
+    """
+        create_test_overrides_toml(temp_dir::String)
+
+    Create "Overrides.toml" in the given `temp_dir`.
+    """
+    function create_test_overrides_toml(temp_dir::String)
+        # Define the overrides
+        overrides = Dict(
+            "78f35e74ff113f02274ce60dab6e92b4546ef806" => "/path/to/replacement",
+            "c76f8cda85f83a06d17de6c57aabf9e294eb2537" => "fb886e813a4aed4147d5979fcdf27457d20aa35d",
+            "d57dbccd-ca19-4d82-b9b8-9d660942965b" => Dict(
+                "c_simple" => "/path/to/c_simple_dir",
+                "libfoo" => "fb886e813a4aed4147d5979fcdf27457d20aa35d"
+            )
+        )
+
+        # Get the artifacts directory
+        artifacts_dir = joinpath(temp_dir, "artifacts")
+
+        # Ensure the artifacts directory exists
+        isdir(artifacts_dir) || mkdir(artifacts_dir)
+
+        # Get the path to the Overrides.toml file
+        overrides_path = joinpath(artifacts_dir, "Overrides.toml")
+
+        # Create the Overrides.toml file
+        open(overrides_path, "w") do io
+            TOML.print(io, overrides)
+        end
+    end
+
+    # Specify the expected test result when depot path does not exist or no overriding happened
+    empty_output = Dict{Symbol, Any}(
+        :UUID => Dict{Base.UUID, Dict{String, Union{SHA1, String}}}(),
+        :hash => Dict{SHA1, Union{SHA1, String}}()
+    )
+
+    # Specify the expected test result when overriding happened
+    expected_output = Dict{Symbol, Any}(
+        :UUID => Dict{Base.UUID, Dict{String, Union{SHA1, String}}}(Base.UUID("d57dbccd-ca19-4d82-b9b8-9d660942965b") => Dict("c_simple" => "/path/to/c_simple_dir", "libfoo" => SHA1("fb886e813a4aed4147d5979fcdf27457d20aa35d"))),
+        :hash => Dict{SHA1, Union{SHA1, String}}(SHA1("78f35e74ff113f02274ce60dab6e92b4546ef806") => "/path/to/replacement", SHA1("c76f8cda85f83a06d17de6c57aabf9e294eb2537") => SHA1("fb886e813a4aed4147d5979fcdf27457d20aa35d"))
+    )
+
+    # Test `load_overrides()` works with *no* "Overrides.toml" file
+    @test load_overrides() == empty_output
+
+    # Create a temporary directory
+    mktempdir() do temp_dir
+        # Back up the old `DEPOT_PATH``
+        old_depot_path = copy(Base.DEPOT_PATH)
+
+        # Set `DEPOT_PATH` to that directory
+        empty!(Base.DEPOT_PATH)
+        push!(Base.DEPOT_PATH, temp_dir)
+
+        try
+            # Create "Overrides.toml" for the test
+            create_test_overrides_toml(temp_dir)
+
+            # Test `load_overrides()` works *with* "Overrides.toml" file but non-nothing ARTIFACT_OVERRIDES[]
+            @test load_overrides() == empty_output
+
+            # Test `load_overrides()` works *with* "Overrides.toml" file with force parameter, which overrides even when `ARTIFACT_OVERRIDES[] !== nothing``
+            @test load_overrides(force=true) == expected_output
+        finally # Make sure `DEPOT_PATH` will be restored to the status quo in the event of a bug
+            # Restore the old `DEPOT_PATH` to avoid messing with any other code
+            empty!(Base.DEPOT_PATH)
+            append!(Base.DEPOT_PATH, old_depot_path)
+        end
+    end
+    # Temporary directory and test "Overrides.toml" file will be automatically deleted when out of scope
+    # This means after this block, the system *should* behave like this test never happened.
+
+    # Test the "Overrides.toml" file is cleared back to the status quo
+    @test load_overrides(force=true) == empty_output
+end
+
 @testset "Artifact Paths" begin
     mktempdir() do tempdir
         with_artifacts_directory(tempdir) do
@@ -120,6 +200,23 @@ end
     end
 end
 
+@testset "artifact_hash()" begin
+    # Use the Linus OS on an ARMv7L architecture for the tests to make tests reproducible
+    armv7l_linux = Platform("armv7l", "linux")
+
+    # Check the first key in Artifacts.toml is hashed correctly
+    @test artifact_hash("HelloWorldC", joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux) ==
+            SHA1("5a8288c8a30578c0d0f24a9cded29579517ce7a8")
+
+    # Check the second key in Artifacts.toml is hashed correctly
+    @test artifact_hash("socrates", joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux) ==
+            SHA1("43563e7631a7eafae1f9f8d9d332e3de44ad7239")
+
+    # Check artifact_hash() works for any AbstractString
+    @test artifact_hash(SubString("HelloWorldC0", 1, 11), joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux) ==
+            SHA1("5a8288c8a30578c0d0f24a9cded29579517ce7a8")
+end
+
 @testset "select_downloadable_artifacts()" begin
     armv7l_linux = Platform("armv7l", "linux")
     artifacts = select_downloadable_artifacts(joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux)
@@ -161,6 +258,6 @@ end
 @testset "`Artifacts.artifact_names` and friends" begin
     n = length(Artifacts.artifact_names)
     @test length(Base.project_names) == n
-    @test length(Base.manifest_names) == n
+    @test length(Base.manifest_names) == 2n # there are two manifest names per project name
     @test length(Base.preferences_names) == n
 end
diff --git a/stdlib/Base64/Project.toml b/stdlib/Base64/Project.toml
index 68d63837fc385..14796beb7e21a 100644
--- a/stdlib/Base64/Project.toml
+++ b/stdlib/Base64/Project.toml
@@ -1,5 +1,6 @@
 name = "Base64"
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Base64/src/Base64.jl b/stdlib/Base64/src/Base64.jl
index 108faa18f5b85..f1fef096888ed 100644
--- a/stdlib/Base64/src/Base64.jl
+++ b/stdlib/Base64/src/Base64.jl
@@ -33,7 +33,7 @@ include("decode.jl")
 """
     stringmime(mime, x; context=nothing)
 
-Returns an `AbstractString` containing the representation of `x` in the
+Return an `AbstractString` containing the representation of `x` in the
 requested `mime` type. This is similar to [`repr(mime, x)`](@ref) except
 that binary data is base64-encoded as an ASCII string.
 
diff --git a/stdlib/Base64/src/buffer.jl b/stdlib/Base64/src/buffer.jl
index 44a9c0931ac95..009a6d56cfde8 100644
--- a/stdlib/Base64/src/buffer.jl
+++ b/stdlib/Base64/src/buffer.jl
@@ -2,37 +2,37 @@
 
 # Data buffer for pipes.
 mutable struct Buffer
-    data::Vector{UInt8}
-    ptr::Ptr{UInt8}
+    const data::Memory{UInt8}
+    offset::Int
     size::Int
 
     function Buffer(bufsize)
-        data = Vector{UInt8}(undef, bufsize)
-        return new(data, pointer(data), 0)
+        data = Memory{UInt8}(undef, bufsize)
+        return new(data, 0, 0)
     end
 end
 
 Base.empty!(buffer::Buffer) = buffer.size = 0
-Base.getindex(buffer::Buffer, i::Integer) = unsafe_load(buffer.ptr, i)
-Base.setindex!(buffer::Buffer, v::UInt8, i::Integer) = unsafe_store!(buffer.ptr, v, i)
+Base.getindex(buffer::Buffer, i::Integer) = buffer.data[buffer.offset + i]
+Base.setindex!(buffer::Buffer, v::UInt8, i::Integer) = buffer.data[buffer.offset + i] = v
 Base.firstindex(buffer::Buffer) = 1
 Base.lastindex(buffer::Buffer) = buffer.size
-Base.pointer(buffer::Buffer) = buffer.ptr
-capacity(buffer::Buffer) = Int(pointer(buffer.data, lastindex(buffer.data) + 1) - buffer.ptr)
+Base.pointer(buffer::Buffer) = pointer(buffer.data) + buffer.offset
+capacity(buffer::Buffer) = length(buffer.data) - buffer.offset
 
 function consumed!(buffer::Buffer, n::Integer)
     @assert n ≤ buffer.size
-    buffer.ptr += n
+    buffer.offset += n
     buffer.size -= n
 end
 
 function read_to_buffer(io::IO, buffer::Buffer)
-    offset = buffer.ptr - pointer(buffer.data)
+    offset = buffer.offset
     copyto!(buffer.data, 1, buffer.data, offset + 1, buffer.size)
-    buffer.ptr = pointer(buffer.data)
+    buffer.offset = 0
     if !eof(io)
         n = min(bytesavailable(io), capacity(buffer) - buffer.size)
-        unsafe_read(io, buffer.ptr + buffer.size, n)
+        unsafe_read(io, pointer(buffer) + buffer.size, n)
         buffer.size += n
     end
     return
diff --git a/stdlib/CRC32c/Project.toml b/stdlib/CRC32c/Project.toml
index c1de88cbc7c52..d3ab5ff019503 100644
--- a/stdlib/CRC32c/Project.toml
+++ b/stdlib/CRC32c/Project.toml
@@ -1,5 +1,6 @@
 name = "CRC32c"
 uuid = "8bf52ea8-c179-5cab-976a-9e18b702a9bc"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/CRC32c/src/CRC32c.jl b/stdlib/CRC32c/src/CRC32c.jl
index 42a5f468a8886..35d2d4cb339d6 100644
--- a/stdlib/CRC32c/src/CRC32c.jl
+++ b/stdlib/CRC32c/src/CRC32c.jl
@@ -36,7 +36,7 @@ function crc32c end
 
 
 crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) = Base._crc32c(a, crc)
-crc32c(s::String, crc::UInt32=0x00000000) = Base._crc32c(s, crc)
+crc32c(s::Union{String, SubString{String}}, crc::UInt32=0x00000000) = Base._crc32c(s, crc)
 
 """
     crc32c(io::IO, [nb::Integer,] crc::UInt32=0x00000000)
diff --git a/stdlib/CRC32c/test/runtests.jl b/stdlib/CRC32c/test/runtests.jl
index b385880850abc..41a7ea2ab62fa 100644
--- a/stdlib/CRC32c/test/runtests.jl
+++ b/stdlib/CRC32c/test/runtests.jl
@@ -6,7 +6,9 @@ using CRC32c
 function test_crc32c(crc32c)
     # CRC32c checksum (test data generated from @andrewcooke's CRC.jl package)
     for (n,crc) in [(0,0x00000000),(1,0xa016d052),(2,0x03f89f52),(3,0xf130f21e),(4,0x29308cf4),(5,0x53518fab),(6,0x4f4dfbab),(7,0xbd3a64dc),(8,0x46891f81),(9,0x5a14b9f9),(10,0xb219db69),(11,0xd232a91f),(12,0x51a15563),(13,0x9f92de41),(14,0x4d8ae017),(15,0xc8b74611),(16,0xa0de6714),(17,0x672c992a),(18,0xe8206eb6),(19,0xc52fd285),(20,0x327b0397),(21,0x318263dd),(22,0x08485ccd),(23,0xea44d29e),(24,0xf6c0cb13),(25,0x3969bba2),(26,0x6a8810ec),(27,0x75b3d0df),(28,0x82d535b1),(29,0xbdf7fc12),(30,0x1f836b7d),(31,0xd29f33af),(32,0x8e4acb3e),(33,0x1cbee2d1),(34,0xb25f7132),(35,0xb0fa484c),(36,0xb9d262b4),(37,0x3207fe27),(38,0xa024d7ac),(39,0x49a2e7c5),(40,0x0e2c157f),(41,0x25f7427f),(42,0x368c6adc),(43,0x75efd4a5),(44,0xa84c5c31),(45,0x0fc817b2),(46,0x8d99a881),(47,0x5cc3c078),(48,0x9983d5e2),(49,0x9267c2db),(50,0xc96d4745),(51,0x058d8df3),(52,0x453f9cf3),(53,0xb714ade1),(54,0x55d3c2bc),(55,0x495710d0),(56,0x3bddf494),(57,0x4f2577d0),(58,0xdae0f604),(59,0x3c57c632),(60,0xfe39bbb0),(61,0x6f5d1d41),(62,0x7d996665),(63,0x68c738dc),(64,0x8dfea7ae)]
-        @test crc32c(UInt8[1:n;]) == crc == crc32c(String(UInt8[1:n;]))
+        s = String(UInt8[1:n;])
+        ss = SubString(String(UInt8[0:(n+1);]), 2:(n+1))
+        @test crc32c(UInt8[1:n;]) == crc == crc32c(s) == crc32c(ss)
     end
 
     # test that crc parameter is equivalent to checksum of concatenated data,
@@ -43,12 +45,25 @@ function test_crc32c(crc32c)
             rm(f, force=true)
         end
     end
+
+    # test longer arrays to cover all the code paths in crc32c.c
+    LONG = 8192 # from crc32c.c
+    SHORT = 256 # from crc32c.c
+    n = LONG*3+SHORT*3+SHORT*2+64+7
+    big = vcat(reinterpret(UInt8, hton.(0x74d7f887 .^ (1:n÷4))), UInt8[1:n%4;])
+    for (offset,crc) in [(0, 0x13a5ecd5), (1, 0xecf34b7e), (2, 0xfa71b596), (3, 0xbfd24745), (4, 0xf0cb3370), (5, 0xb0ec88b5), (6, 0x258c20a8), (7, 0xa9bd638d)]
+        @test crc == crc32c(@view big[1+offset:end])
+    end
 end
 unsafe_crc32c_sw(a, n, crc) =
     ccall(:jl_crc32c_sw, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, n)
 crc32c_sw(a::Union{Array{UInt8},Base.FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N},
           crc::UInt32=0x00000000) = unsafe_crc32c_sw(a, length(a), crc)
-crc32c_sw(s::String, crc::UInt32=0x00000000) = unsafe_crc32c_sw(s, sizeof(s), crc)
+
+function crc32c_sw(s::Union{String, SubString{String}}, crc::UInt32=0x00000000)
+    unsafe_crc32c_sw(s, sizeof(s), crc)
+end
+
 function crc32c_sw(io::IO, nb::Integer, crc::UInt32=0x00000000)
     nb < 0 && throw(ArgumentError("number of bytes to checksum must be ≥ 0"))
     buf = Vector{UInt8}(undef, min(nb, 24576))
diff --git a/stdlib/CompilerSupportLibraries_jll/Project.toml b/stdlib/CompilerSupportLibraries_jll/Project.toml
index 877a1ab5b005c..3e15ff6b87b71 100644
--- a/stdlib/CompilerSupportLibraries_jll/Project.toml
+++ b/stdlib/CompilerSupportLibraries_jll/Project.toml
@@ -2,9 +2,9 @@ name = "CompilerSupportLibraries_jll"
 uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 
 # NOTE: When updating this, also make sure to update the value
-# `CSL_NEXT_GLIBCXX_VERSION` in `deps/csl.mk`, to properly disable
+# `CSL_NEXT_GLIBCXX_VERSION` in `Make.inc`, to properly disable
 # automatic usage of BB-built CSLs on extremely up-to-date systems!
-version = "0.5.2+0"
+version = "1.1.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl b/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
index 1b2c0cd41cbe2..bd7a0571f9d5a 100644
--- a/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
+++ b/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
@@ -14,13 +14,13 @@ export libgfortran, libstdcxx, libgomp
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libgfortran_handle = C_NULL
-libgfortran_path = ""
-libstdcxx_handle = C_NULL
-libstdcxx_path = ""
-libgomp_handle = C_NULL
-libgomp_path = ""
+artifact_dir::String = ""
+libgfortran_handle::Ptr{Cvoid} = C_NULL
+libgfortran_path::String = ""
+libstdcxx_handle::Ptr{Cvoid} = C_NULL
+libstdcxx_path::String = ""
+libgomp_handle::Ptr{Cvoid} = C_NULL
+libgomp_path::String = ""
 
 if Sys.iswindows()
     if arch(HostPlatform()) == "x86_64"
@@ -31,8 +31,9 @@ if Sys.iswindows()
     const libgfortran = string("libgfortran-", libgfortran_version(HostPlatform()).major, ".dll")
     const libstdcxx = "libstdc++-6.dll"
     const libgomp = "libgomp-1.dll"
+    const libssp = "libssp-0.dll"
 elseif Sys.isapple()
-    if arch(HostPlatform()) == "aarch64"
+    if arch(HostPlatform()) == "aarch64" || libgfortran_version(HostPlatform()) == v"5"
         const libgcc_s = "@rpath/libgcc_s.1.1.dylib"
     else
         const libgcc_s = "@rpath/libgcc_s.1.dylib"
@@ -40,11 +41,15 @@ elseif Sys.isapple()
     const libgfortran = string("@rpath/", "libgfortran.", libgfortran_version(HostPlatform()).major, ".dylib")
     const libstdcxx = "@rpath/libstdc++.6.dylib"
     const libgomp = "@rpath/libgomp.1.dylib"
+    const libssp = "@rpath/libssp.0.dylib"
 else
     const libgcc_s = "libgcc_s.so.1"
     const libgfortran = string("libgfortran.so.", libgfortran_version(HostPlatform()).major)
     const libstdcxx = "libstdc++.so.6"
     const libgomp = "libgomp.so.1"
+    if libc(HostPlatform()) != "musl"
+        const libssp = "libssp.so.0"
+    end
 end
 
 function __init__()
@@ -56,6 +61,9 @@ function __init__()
     global libstdcxx_path = dlpath(libstdcxx_handle)
     global libgomp_handle = dlopen(libgomp)
     global libgomp_path = dlpath(libgomp_handle)
+    @static if libc(HostPlatform()) != "musl"
+        dlopen(libssp; throw_error = false)
+    end
     global artifact_dir = dirname(Sys.BINDIR)
     LIBPATH[] = dirname(libgcc_s_path)
     push!(LIBPATH_list, LIBPATH[])
diff --git a/stdlib/Dates/Project.toml b/stdlib/Dates/Project.toml
index fe225055bad98..45da6ad1a0152 100644
--- a/stdlib/Dates/Project.toml
+++ b/stdlib/Dates/Project.toml
@@ -1,5 +1,6 @@
 name = "Dates"
 uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+version = "1.11.0"
 
 [deps]
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
diff --git a/stdlib/Dates/docs/src/index.md b/stdlib/Dates/docs/src/index.md
index 4a7456b72a801..db452b1912a27 100644
--- a/stdlib/Dates/docs/src/index.md
+++ b/stdlib/Dates/docs/src/index.md
@@ -96,8 +96,7 @@ missing parts of dates and times so long as the preceding parts are given. The o
 default values.  For example, `Date("1981-03", dateformat"y-m-d")` returns `1981-03-01`, whilst
 `Date("31/12", dateformat"d/m/y")` gives `0001-12-31`.  (Note that the default year is
 1 AD/CE.)
-Consequently, an empty string will always return `0001-01-01` for `Date`s,
-and `0001-01-01T00:00:00.000` for `DateTime`s.
+An empty string, however, always throws an `ArgumentError`.
 
 Fixed-width slots are specified by repeating the period character the number of times corresponding
 to the width with no delimiter between characters. So `dateformat"yyyymmdd"` would correspond to a date
@@ -153,14 +152,13 @@ an optional third argument of type `DateFormat` specifying the format; for examp
 `parse(Date, "06.23.2013", dateformat"m.d.y")`, or
 `tryparse(DateTime, "1999-12-31T23:59:59")` which uses the default format.
 The notable difference between the functions is that with [`tryparse`](@ref),
-an error is not thrown if the string is in an invalid format;
-instead `nothing` is returned.  Note however that as with the constructors
-above, empty date and time parts assume
-default values and consequently an empty string (`""`) is valid
-for _any_ `DateFormat`, giving for example a `Date` of `0001-01-01`.  Code
-relying on `parse` or `tryparse` for `Date` and `DateTime` parsing should
-therefore also check whether parsed strings are empty before using the
-result.
+an error is not thrown if the string is empty or in an invalid format;
+instead `nothing` is returned.
+
+!!! compat "Julia 1.9"
+    Before Julia 1.9, empty strings could be passed to constructors and `parse`
+    without error, returning as appropriate `DateTime(1)`, `Date(1)` or `Time(0)`.
+    Likewise, `tryparse` did not return `nothing`.
 
 A full suite of parsing and formatting tests and examples is available in [`stdlib/Dates/test/io.jl`](https://github.com/JuliaLang/julia/blob/master/stdlib/Dates/test/io.jl).
 
@@ -206,8 +204,7 @@ ERROR: MethodError: no method matching *(::Date, ::Date)
 [...]
 
 julia> dt / dt2
-ERROR: MethodError: no method matching inv(::Date)
-[...]
+ERROR: MethodError: no method matching /(::Date, ::Date)
 
 julia> dt - dt2
 4411 days
@@ -342,12 +339,12 @@ First the mapping is loaded into the `LOCALES` variable:
 julia> french_months = ["janvier", "février", "mars", "avril", "mai", "juin",
                         "juillet", "août", "septembre", "octobre", "novembre", "décembre"];
 
-julia> french_monts_abbrev = ["janv","févr","mars","avril","mai","juin",
+julia> french_months_abbrev = ["janv","févr","mars","avril","mai","juin",
                               "juil","août","sept","oct","nov","déc"];
 
 julia> french_days = ["lundi","mardi","mercredi","jeudi","vendredi","samedi","dimanche"];
 
-julia> Dates.LOCALES["french"] = Dates.DateLocale(french_months, french_monts_abbrev, french_days, [""]);
+julia> Dates.LOCALES["french"] = Dates.DateLocale(french_months, french_months_abbrev, french_days, [""]);
 ```
 
  The above mentioned functions can then be used to perform the queries:
@@ -364,7 +361,7 @@ julia> Dates.monthabbr(t;locale="french")
 ```
 
 Since the abbreviated versions of the days are not loaded, trying to use the
-function `dayabbr` will error.
+function `dayabbr` will throw an error.
 
 ```jldoctest tdate2
 julia> Dates.dayabbr(t;locale="french")
@@ -643,8 +640,8 @@ by 10.
 As Julia [`Date`](@ref) and [`DateTime`](@ref) values are represented according to the ISO 8601
 standard, `0000-01-01T00:00:00` was chosen as base (or "rounding epoch") from which to begin the
 count of days (and milliseconds) used in rounding calculations. (Note that this differs slightly
-from Julia's internal representation of [`Date`](@ref) s using Rata Die notation; but since the
-ISO 8601 standard is most visible to the end user, `0000-01-01T00:00:00` was chosen as the rounding
+from Julia's internal representation of [`Date`](@ref) s using [Rata Die notation](https://en.wikipedia.org/wiki/Rata_Die);
+but since the ISO 8601 standard is most visible to the end user, `0000-01-01T00:00:00` was chosen as the rounding
 epoch instead of the `0000-12-31T00:00:00` used internally to minimize confusion.)
 
 The only exception to the use of `0000-01-01T00:00:00` as the rounding epoch is when rounding
diff --git a/stdlib/Dates/src/Dates.jl b/stdlib/Dates/src/Dates.jl
index 6164216cbd1af..a111ea24089c4 100644
--- a/stdlib/Dates/src/Dates.jl
+++ b/stdlib/Dates/src/Dates.jl
@@ -14,13 +14,13 @@ For time zone functionality, see the TimeZones.jl package.
 julia> dt = DateTime(2017,12,31,23,59,59,999)
 2017-12-31T23:59:59.999
 
-julia> d1 = Date(Dates.Month(12), Dates.Year(2017))
+julia> d1 = Date(Month(12), Year(2017))
 2017-12-01
 
-julia> d2 = Date("2017-12-31", Dates.DateFormat("y-m-d"))
+julia> d2 = Date("2017-12-31", DateFormat("y-m-d"))
 2017-12-31
 
-julia> Dates.yearmonthday(d2)
+julia> yearmonthday(d2)
 (2017, 12, 31)
 
 julia> d2-d1
diff --git a/stdlib/Dates/src/accessors.jl b/stdlib/Dates/src/accessors.jl
index 10e0142c83f21..211b5678c90d8 100644
--- a/stdlib/Dates/src/accessors.jl
+++ b/stdlib/Dates/src/accessors.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # Convert # of Rata Die days to proleptic Gregorian calendar y,m,d,w
-# Reference: http://mysite.verizon.net/aesir_research/date/date0.htm
+# Reference: https://www.researchgate.net/profile/Peter-Baum/publication/316558298_Date_Algorithms/links/5f90c3f992851c14bcdb0da6/Date-Algorithms.pdf
 function yearmonthday(days)
     z = days + 306; h = 100z - 25; a = fld(h, 3652425); b = a - fld(a, 4)
     y = fld(100b + h, 36525); c = b + z - 365y - fld(y, 4); m = div(5c + 456, 153)
@@ -97,13 +97,13 @@ week of 2004.
 
 # Examples
 ```jldoctest
-julia> Dates.week(Date(1989, 6, 22))
+julia> week(Date(1989, 6, 22))
 25
 
-julia> Dates.week(Date(2005, 1, 1))
+julia> week(Date(2005, 1, 1))
 53
 
-julia> Dates.week(Date(2004, 12, 31))
+julia> week(Date(2004, 12, 31))
 53
 ```
 """
diff --git a/stdlib/Dates/src/adjusters.jl b/stdlib/Dates/src/adjusters.jl
index d5617ba8cf93c..245e2678a9d77 100644
--- a/stdlib/Dates/src/adjusters.jl
+++ b/stdlib/Dates/src/adjusters.jl
@@ -29,7 +29,7 @@ Truncates the value of `dt` according to the provided `Period` type.
 
 # Examples
 ```jldoctest
-julia> trunc(Dates.DateTime("1996-01-01T12:30:00"), Dates.Day)
+julia> trunc(DateTime("1996-01-01T12:30:00"), Day)
 1996-01-01T00:00:00
 ```
 """
@@ -43,7 +43,7 @@ Adjusts `dt` to the Monday of its week.
 
 # Examples
 ```jldoctest
-julia> Dates.firstdayofweek(DateTime("1996-01-05T12:30:00"))
+julia> firstdayofweek(DateTime("1996-01-05T12:30:00"))
 1996-01-01T00:00:00
 ```
 """
@@ -59,7 +59,7 @@ Adjusts `dt` to the Sunday of its week.
 
 # Examples
 ```jldoctest
-julia> Dates.lastdayofweek(DateTime("1996-01-05T12:30:00"))
+julia> lastdayofweek(DateTime("1996-01-05T12:30:00"))
 1996-01-07T00:00:00
 ```
 """
@@ -75,7 +75,7 @@ Adjusts `dt` to the first day of its month.
 
 # Examples
 ```jldoctest
-julia> Dates.firstdayofmonth(DateTime("1996-05-20"))
+julia> firstdayofmonth(DateTime("1996-05-20"))
 1996-05-01T00:00:00
 ```
 """
@@ -91,7 +91,7 @@ Adjusts `dt` to the last day of its month.
 
 # Examples
 ```jldoctest
-julia> Dates.lastdayofmonth(DateTime("1996-05-20"))
+julia> lastdayofmonth(DateTime("1996-05-20"))
 1996-05-31T00:00:00
 ```
 """
@@ -110,7 +110,7 @@ Adjusts `dt` to the first day of its year.
 
 # Examples
 ```jldoctest
-julia> Dates.firstdayofyear(DateTime("1996-05-20"))
+julia> firstdayofyear(DateTime("1996-05-20"))
 1996-01-01T00:00:00
 ```
 """
@@ -126,7 +126,7 @@ Adjusts `dt` to the last day of its year.
 
 # Examples
 ```jldoctest
-julia> Dates.lastdayofyear(DateTime("1996-05-20"))
+julia> lastdayofyear(DateTime("1996-05-20"))
 1996-12-31T00:00:00
 ```
 """
@@ -145,10 +145,10 @@ Adjusts `dt` to the first day of its quarter.
 
 # Examples
 ```jldoctest
-julia> Dates.firstdayofquarter(DateTime("1996-05-20"))
+julia> firstdayofquarter(DateTime("1996-05-20"))
 1996-04-01T00:00:00
 
-julia> Dates.firstdayofquarter(DateTime("1996-08-20"))
+julia> firstdayofquarter(DateTime("1996-08-20"))
 1996-07-01T00:00:00
 ```
 """
@@ -168,10 +168,10 @@ Adjusts `dt` to the last day of its quarter.
 
 # Examples
 ```jldoctest
-julia> Dates.lastdayofquarter(DateTime("1996-05-20"))
+julia> lastdayofquarter(DateTime("1996-05-20"))
 1996-06-30T00:00:00
 
-julia> Dates.lastdayofquarter(DateTime("1996-08-20"))
+julia> lastdayofquarter(DateTime("1996-08-20"))
 1996-09-30T00:00:00
 ```
 """
@@ -221,13 +221,13 @@ pursue before throwing an error (given that `f::Function` is never satisfied).
 
 # Examples
 ```jldoctest
-julia> Date(date -> Dates.week(date) == 20, 2010, 01, 01)
+julia> Date(date -> week(date) == 20, 2010, 01, 01)
 2010-05-17
 
-julia> Date(date -> Dates.year(date) == 2010, 2000, 01, 01)
+julia> Date(date -> year(date) == 2010, 2000, 01, 01)
 2010-01-01
 
-julia> Date(date -> Dates.month(date) == 10, 2000, 01, 01; limit = 5)
+julia> Date(date -> month(date) == 10, 2000, 01, 01; limit = 5)
 ERROR: ArgumentError: Adjustment limit reached: 5 iterations
 Stacktrace:
 [...]
@@ -248,10 +248,10 @@ pursue before throwing an error (in the case that `f::Function` is never satisfi
 
 # Examples
 ```jldoctest
-julia> DateTime(dt -> Dates.second(dt) == 40, 2010, 10, 20, 10; step = Dates.Second(1))
+julia> DateTime(dt -> second(dt) == 40, 2010, 10, 20, 10; step = Second(1))
 2010-10-20T10:00:40
 
-julia> DateTime(dt -> Dates.hour(dt) == 20, 2010, 10, 20, 10; step = Dates.Hour(1), limit = 5)
+julia> DateTime(dt -> hour(dt) == 20, 2010, 10, 20, 10; step = Hour(1), limit = 5)
 ERROR: ArgumentError: Adjustment limit reached: 5 iterations
 Stacktrace:
 [...]
@@ -291,13 +291,13 @@ arguments are provided, the default step will be `Millisecond(1)` instead of `Se
 
 # Examples
 ```jldoctest
-julia> Dates.Time(t -> Dates.minute(t) == 30, 20)
+julia> Time(t -> minute(t) == 30, 20)
 20:30:00
 
-julia> Dates.Time(t -> Dates.minute(t) == 0, 20)
+julia> Time(t -> minute(t) == 0, 20)
 20:00:00
 
-julia> Dates.Time(t -> Dates.hour(t) == 10, 3; limit = 5)
+julia> Time(t -> hour(t) == 10, 3; limit = 5)
 ERROR: ArgumentError: Adjustment limit reached: 5 iterations
 Stacktrace:
 [...]
diff --git a/stdlib/Dates/src/arithmetic.jl b/stdlib/Dates/src/arithmetic.jl
index 6537f4e1caa82..83a2873b43409 100644
--- a/stdlib/Dates/src/arithmetic.jl
+++ b/stdlib/Dates/src/arithmetic.jl
@@ -7,6 +7,8 @@
 # TimeType arithmetic
 (+)(x::TimeType) = x
 (-)(x::T, y::T) where {T<:TimeType} = x.instant - y.instant
+(-)(x::T, y::T) where {T<:AbstractDateTime} = x.instant - y.instant
+(-)(x::AbstractDateTime, y::AbstractDateTime) = -(promote(x, y)...)
 
 # Date-Time arithmetic
 """
diff --git a/stdlib/Dates/src/conversions.jl b/stdlib/Dates/src/conversions.jl
index 161dc3791afab..30f1f2581d1fa 100644
--- a/stdlib/Dates/src/conversions.jl
+++ b/stdlib/Dates/src/conversions.jl
@@ -46,9 +46,11 @@ Take the number of seconds since unix epoch `1970-01-01T00:00:00` and convert to
 corresponding `DateTime`.
 """
 function unix2datetime(x)
-    rata = UNIXEPOCH + round(Int64, Int64(1000) * x)
+    # Rounding should match `now` below
+    rata = UNIXEPOCH + trunc(Int64, Int64(1000) * x)
     return DateTime(UTM(rata))
 end
+
 """
     datetime2unix(dt::DateTime) -> Float64
 
@@ -80,6 +82,13 @@ today() = Date(now())
     now(::Type{UTC}) -> DateTime
 
 Return a `DateTime` corresponding to the user's system time as UTC/GMT.
+For other time zones, see the TimeZones.jl package.
+
+# Example
+```julia
+julia> now(UTC)
+2023-01-04T10:52:24.864
+```
 """
 now(::Type{UTC}) = unix2datetime(time())
 
diff --git a/stdlib/Dates/src/deprecated.jl b/stdlib/Dates/src/deprecated.jl
index 3c8a58f6e75e7..b50d8501e7570 100644
--- a/stdlib/Dates/src/deprecated.jl
+++ b/stdlib/Dates/src/deprecated.jl
@@ -65,3 +65,6 @@ for op in (:+, :-)
         end
     end
 end
+
+@deprecate argerror(msg::String) ArgumentError(msg) false
+@deprecate argerror() nothing false
diff --git a/stdlib/Dates/src/io.jl b/stdlib/Dates/src/io.jl
index 7e007ced0bbee..257e86064c2fb 100644
--- a/stdlib/Dates/src/io.jl
+++ b/stdlib/Dates/src/io.jl
@@ -55,7 +55,7 @@ Base.show(io::IO, ::MIME"text/plain", t::Time) = print(io, t)
 Base.print(io::IO, t::Time) = print(io, string(t))
 
 function Base.show(io::IO, t::Time)
-    if get(io, :compact, false)
+    if get(io, :compact, false)::Bool
         print(io, t)
     else
         values = [
@@ -356,23 +356,23 @@ Construct a date formatting object that can be used for parsing date strings or
 formatting a date object as a string. The following character codes can be used to construct the `format`
 string:
 
-| Code       | Matches   | Comment                                                      |
-|:-----------|:----------|:-------------------------------------------------------------|
-| `y`        | 1996, 96  | Returns year of 1996, 0096                                   |
-| `Y`        | 1996, 96  | Returns year of 1996, 0096. Equivalent to `y`                |
-| `m`        | 1, 01     | Matches 1 or 2-digit months                                  |
-| `u`        | Jan       | Matches abbreviated months according to the `locale` keyword |
-| `U`        | January   | Matches full month names according to the `locale` keyword   |
-| `d`        | 1, 01     | Matches 1 or 2-digit days                                    |
-| `H`        | 00        | Matches hours (24-hour clock)                                |
-| `I`        | 00        | For outputting hours with 12-hour clock                      |
-| `M`        | 00        | Matches minutes                                              |
-| `S`        | 00        | Matches seconds                                              |
-| `s`        | .500      | Matches milliseconds                                         |
-| `e`        | Mon, Tues | Matches abbreviated days of the week                         |
-| `E`        | Monday    | Matches full name days of the week                           |
-| `p`        | AM        | Matches AM/PM (case-insensitive)                             |
-| `yyyymmdd` | 19960101  | Matches fixed-width year, month, and day                     |
+| Code       | Matches   | Comment                                                       |
+|:-----------|:----------|:--------------------------------------------------------------|
+| `Y`        | 1996, 96  | Returns year of 1996, 0096                                    |
+| `y`        | 1996, 96  | Same as `Y` on `parse` but discards excess digits on `format` |
+| `m`        | 1, 01     | Matches 1 or 2-digit months                                   |
+| `u`        | Jan       | Matches abbreviated months according to the `locale` keyword  |
+| `U`        | January   | Matches full month names according to the `locale` keyword    |
+| `d`        | 1, 01     | Matches 1 or 2-digit days                                     |
+| `H`        | 00        | Matches hours (24-hour clock)                                 |
+| `I`        | 00        | For outputting hours with 12-hour clock                       |
+| `M`        | 00        | Matches minutes                                               |
+| `S`        | 00        | Matches seconds                                               |
+| `s`        | .500      | Matches milliseconds                                          |
+| `e`        | Mon, Tues | Matches abbreviated days of the week                          |
+| `E`        | Monday    | Matches full name days of the week                            |
+| `p`        | AM        | Matches AM/PM (case-insensitive)                              |
+| `yyyymmdd` | 19960101  | Matches fixed-width year, month, and day                      |
 
 Characters not listed above are normally treated as delimiters between date and time slots.
 For example a `dt` string of "1996-01-15T00:00:00.0" would have a `format` string like
@@ -414,8 +414,6 @@ function DateFormat(f::AbstractString, locale::DateLocale=ENGLISH)
 
         if !isempty(prev)
             letter, width = prev
-            typ = CONVERSION_SPECIFIERS[letter]
-
             push!(tokens, DatePart{letter}(width, isempty(tran)))
         end
 
@@ -434,8 +432,6 @@ function DateFormat(f::AbstractString, locale::DateLocale=ENGLISH)
 
     if !isempty(prev)
         letter, width = prev
-        typ = CONVERSION_SPECIFIERS[letter]
-
         push!(tokens, DatePart{letter}(width, false))
     end
 
@@ -451,12 +447,8 @@ function DateFormat(f::AbstractString, locale::AbstractString)
     DateFormat(f, LOCALES[locale])
 end
 
-function Base.show(io::IO, df::DateFormat)
-    print(io, "dateformat\"")
-    for t in df.tokens
-        _show_content(io, t)
-    end
-    print(io, '"')
+function Base.show(io::IO, df::DateFormat{S,T}) where {S,T}
+    print(io, "dateformat\"", S, '"')
 end
 Base.Broadcast.broadcastable(x::DateFormat) = Ref(x)
 
diff --git a/stdlib/Dates/src/parse.jl b/stdlib/Dates/src/parse.jl
index a5bbc686c955d..62d44177de877 100644
--- a/stdlib/Dates/src/parse.jl
+++ b/stdlib/Dates/src/parse.jl
@@ -198,6 +198,7 @@ end
 
 function Base.parse(::Type{DateTime}, s::AbstractString, df::typeof(ISODateTimeFormat))
     i, end_pos = firstindex(s), lastindex(s)
+    i > end_pos && throw(ArgumentError("Cannot parse an empty string as a DateTime"))
 
     local dy
     dm = dd = Int64(1)
@@ -279,6 +280,7 @@ end
 
 function Base.parse(::Type{T}, str::AbstractString, df::DateFormat=default_format(T)) where T<:TimeType
     pos, len = firstindex(str), lastindex(str)
+    pos > len && throw(ArgumentError("Cannot parse an empty string as a Date or Time"))
     val = tryparsenext_internal(T, str, pos, len, df, true)
     @assert val !== nothing
     values, endpos = val
@@ -287,6 +289,7 @@ end
 
 function Base.tryparse(::Type{T}, str::AbstractString, df::DateFormat=default_format(T)) where T<:TimeType
     pos, len = firstindex(str), lastindex(str)
+    pos > len && return nothing
     res = tryparsenext_internal(T, str, pos, len, df, false)
     res === nothing && return nothing
     values, endpos = res
diff --git a/stdlib/Dates/src/periods.jl b/stdlib/Dates/src/periods.jl
index 021e91924ce59..c1d94d3d62321 100644
--- a/stdlib/Dates/src/periods.jl
+++ b/stdlib/Dates/src/periods.jl
@@ -17,8 +17,6 @@ for period in (:Year, :Quarter, :Month, :Week, :Day, :Hour, :Minute, :Second, :M
     accessor_str = lowercase(period_str)
     # Convenience method for show()
     @eval _units(x::$period) = " " * $accessor_str * (abs(value(x)) == 1 ? "" : "s")
-    # periodisless
-    @eval periodisless(x::$period, y::$period) = value(x) < value(y)
     # AbstractString parsing (mainly for IO code)
     @eval $period(x::AbstractString) = $period(Base.parse(Int64, x))
     # The period type is printed when output, thus it already implies its own typeinfo
@@ -60,7 +58,7 @@ Base.isfinite(::Union{Type{P}, P}) where {P<:Period} = true
 """
     default(p::Period) -> Period
 
-Returns a sensible "default" value for the input Period by returning `T(1)` for Year,
+Return a sensible "default" value for the input Period by returning `T(1)` for Year,
 Month, and Day, and `T(0)` for Hour, Minute, Second, and Millisecond.
 """
 function default end
@@ -105,43 +103,6 @@ Base.gcdx(a::T, b::T) where {T<:Period} = ((g, x, y) = gcdx(value(a), value(b));
 Base.abs(a::T) where {T<:Period} = T(abs(value(a)))
 Base.sign(x::Period) = sign(value(x))
 
-periodisless(::Period,::Year)        = true
-periodisless(::Period,::Quarter)     = true
-periodisless(::Year,::Quarter)       = false
-periodisless(::Period,::Month)       = true
-periodisless(::Year,::Month)         = false
-periodisless(::Quarter,::Month)      = false
-periodisless(::Period,::Week)        = true
-periodisless(::Year,::Week)          = false
-periodisless(::Quarter,::Week)       = false
-periodisless(::Month,::Week)         = false
-periodisless(::Period,::Day)         = true
-periodisless(::Year,::Day)           = false
-periodisless(::Quarter,::Day)        = false
-periodisless(::Month,::Day)          = false
-periodisless(::Week,::Day)           = false
-periodisless(::Period,::Hour)        = false
-periodisless(::Minute,::Hour)        = true
-periodisless(::Second,::Hour)        = true
-periodisless(::Millisecond,::Hour)   = true
-periodisless(::Microsecond,::Hour)   = true
-periodisless(::Nanosecond,::Hour)    = true
-periodisless(::Period,::Minute)      = false
-periodisless(::Second,::Minute)      = true
-periodisless(::Millisecond,::Minute) = true
-periodisless(::Microsecond,::Minute) = true
-periodisless(::Nanosecond,::Minute)  = true
-periodisless(::Period,::Second)      = false
-periodisless(::Millisecond,::Second) = true
-periodisless(::Microsecond,::Second) = true
-periodisless(::Nanosecond,::Second)  = true
-periodisless(::Period,::Millisecond) = false
-periodisless(::Microsecond,::Millisecond) = true
-periodisless(::Nanosecond,::Millisecond)  = true
-periodisless(::Period,::Microsecond)      = false
-periodisless(::Nanosecond,::Microsecond)  = true
-periodisless(::Period,::Nanosecond)       = false
-
 # return (next coarser period, conversion factor):
 coarserperiod(::Type{P}) where {P<:Period} = (P, 1)
 coarserperiod(::Type{Nanosecond})  = (Microsecond, 1000)
@@ -160,37 +121,40 @@ coarserperiod(::Type{Month}) = (Year, 12)
     CompoundPeriod
 
 A `CompoundPeriod` is useful for expressing time periods that are not a fixed multiple of
-smaller periods. For example, \"a year and a  day\" is not a fixed number of days, but can
+smaller periods. For example, "a year and a  day" is not a fixed number of days, but can
 be expressed using a `CompoundPeriod`. In fact, a `CompoundPeriod` is automatically
 generated by addition of different period types, e.g. `Year(1) + Day(1)` produces a
 `CompoundPeriod` result.
 """
 struct CompoundPeriod <: AbstractTime
-    periods::Array{Period, 1}
+    periods::Vector{Period}
     function CompoundPeriod(p::Vector{Period})
         n = length(p)
         if n > 1
-            sort!(p, rev=true, lt=periodisless)
+            # We sort periods in decreasing order (rev = true) according to the length of
+            # the period's type (by = tons ∘ oneunit). We sort by type, not value, so that
+            # we can merge equal types.
+            #
+            # This works by computing how many nanoseconds are in a single period, and sorting
+            # by that. For example, (tons ∘ oneunit)(Week(10)) = tons(oneunit(Week(10))) =
+            # tons(Week(1)) ≈ 6.0e14, which is less than (tons ∘ oneunit)(Month(-2)) ≈ 2.6e15
+            sort!(p, rev = true, by = tons ∘ oneunit)
             # canonicalize p by merging equal period types and removing zeros
             i = j = 1
             while j <= n
                 k = j + 1
-                while k <= n
-                    if typeof(p[j]) == typeof(p[k])
-                        p[j] += p[k]
-                        k += 1
-                    else
-                        break
-                    end
+                while k <= n && typeof(p[j]) == typeof(p[k])
+                    p[j] += p[k]
+                    k += 1
                 end
-                if p[j] != zero(p[j])
+                if !iszero(p[j])
                     p[i] = p[j]
                     i += 1
                 end
                 j = k
             end
             n = i - 1 # new length
-            p  = resize!(p, n)
+            p = resize!(p, n)
         elseif n == 1 && value(p[1]) == 0
             p = Period[]
         end
@@ -250,16 +214,16 @@ Reduces the `CompoundPeriod` into its canonical form by applying the following r
 
 # Examples
 ```jldoctest
-julia> Dates.canonicalize(Dates.CompoundPeriod(Dates.Hour(12), Dates.Hour(13)))
+julia> canonicalize(Dates.CompoundPeriod(Dates.Hour(12), Dates.Hour(13)))
 1 day, 1 hour
 
-julia> Dates.canonicalize(Dates.CompoundPeriod(Dates.Hour(-1), Dates.Minute(1)))
+julia> canonicalize(Dates.CompoundPeriod(Dates.Hour(-1), Dates.Minute(1)))
 -59 minutes
 
-julia> Dates.canonicalize(Dates.CompoundPeriod(Dates.Month(1), Dates.Week(-2)))
+julia> canonicalize(Dates.CompoundPeriod(Dates.Month(1), Dates.Week(-2)))
 1 month, -2 weeks
 
-julia> Dates.canonicalize(Dates.CompoundPeriod(Dates.Minute(50000)))
+julia> canonicalize(Dates.CompoundPeriod(Dates.Minute(50000)))
 4 weeks, 6 days, 17 hours, 20 minutes
 ```
 """
@@ -361,7 +325,7 @@ end
 Base.show(io::IO,x::CompoundPeriod) = print(io, string(x))
 
 Base.convert(::Type{T}, x::CompoundPeriod) where T<:Period =
-    isconcretetype(T) ? sum(T, x.periods) : throw(MethodError(convert,(T,x)))
+    isconcretetype(T) ? sum(T, x.periods; init = zero(T)) : throw(MethodError(convert,(T,x)))
 
 # E.g. Year(1) + Day(1)
 (+)(x::Period,y::Period) = CompoundPeriod(Period[x, y])
@@ -419,70 +383,44 @@ end
 # hitting the deprecated construct-to-convert fallback.
 (::Type{T})(p::Period) where {T<:Period} = convert(T, p)::T
 
-# FixedPeriod conversions and promotion rules
-const fixedperiod_conversions = [(:Week, 7), (:Day, 24), (:Hour, 60), (:Minute, 60), (:Second, 1000),
-                                 (:Millisecond, 1000), (:Microsecond, 1000), (:Nanosecond, 1)]
-for i = 1:length(fixedperiod_conversions)
-    T, n = fixedperiod_conversions[i]
-    N = Int64(1)
-    for j = (i - 1):-1:1 # less-precise periods
-        Tc, nc = fixedperiod_conversions[j]
-        N *= nc
-        vmax = typemax(Int64) ÷ N
-        vmin = typemin(Int64) ÷ N
-        @eval function Base.convert(::Type{$T}, x::$Tc)
-            $vmin ≤ value(x) ≤ $vmax || throw(InexactError(:convert, $T, x))
-            return $T(value(x) * $N)
+# Conversions and promotion rules
+function define_conversions(periods)
+    for i = eachindex(periods)
+        T, n = periods[i]
+        N = Int64(1)
+        for j = (i - 1):-1:firstindex(periods) # less-precise periods
+            Tc, nc = periods[j]
+            N *= nc
+            vmax = typemax(Int64) ÷ N
+            vmin = typemin(Int64) ÷ N
+            @eval function Base.convert(::Type{$T}, x::$Tc)
+                $vmin ≤ value(x) ≤ $vmax || throw(InexactError(:convert, $T, x))
+                return $T(value(x) * $N)
+            end
+        end
+        N = n
+        for j = (i + 1):lastindex(periods) # more-precise periods
+            Tc, nc = periods[j]
+            @eval Base.convert(::Type{$T}, x::$Tc) = $T(divexact(value(x), $N))
+            @eval Base.promote_rule(::Type{$T}, ::Type{$Tc}) = $Tc
+            N *= nc
         end
-    end
-    N = n
-    for j = (i + 1):length(fixedperiod_conversions) # more-precise periods
-        Tc, nc = fixedperiod_conversions[j]
-        @eval Base.convert(::Type{$T}, x::$Tc) = $T(divexact(value(x), $N))
-        @eval Base.promote_rule(::Type{$T}, ::Type{$Tc}) = $Tc
-        N *= nc
-    end
-end
-
-# other periods with fixed conversions but which aren't fixed time periods
-const OtherPeriod = Union{Month, Quarter, Year}
-let vmax = typemax(Int64) ÷ 12, vmin = typemin(Int64) ÷ 12
-    @eval function Base.convert(::Type{Month}, x::Year)
-        $vmin ≤ value(x) ≤ $vmax || throw(InexactError(:convert, Month, x))
-        Month(value(x) * 12)
-    end
-end
-Base.convert(::Type{Year}, x::Month) = Year(divexact(value(x), 12))
-Base.promote_rule(::Type{Year}, ::Type{Month}) = Month
-
-let vmax = typemax(Int64) ÷ 4, vmin = typemin(Int64) ÷ 4
-    @eval function Base.convert(::Type{Quarter}, x::Year)
-        $vmin ≤ value(x) ≤ $vmax || throw(InexactError(:convert, Quarter, x))
-        Quarter(value(x) * 4)
-    end
-end
-Base.convert(::Type{Year}, x::Quarter) = Year(divexact(value(x), 4))
-Base.promote_rule(::Type{Year}, ::Type{Quarter}) = Quarter
-
-let vmax = typemax(Int64) ÷ 3, vmin = typemin(Int64) ÷ 3
-    @eval function Base.convert(::Type{Month}, x::Quarter)
-        $vmin ≤ value(x) ≤ $vmax || throw(InexactError(:convert, Month, x))
-        Month(value(x) * 3)
     end
 end
-Base.convert(::Type{Quarter}, x::Month) = Quarter(divexact(value(x), 3))
-Base.promote_rule(::Type{Quarter}, ::Type{Month}) = Month
-
+define_conversions([(:Week, 7), (:Day, 24), (:Hour, 60), (:Minute, 60), (:Second, 1000),
+                    (:Millisecond, 1000), (:Microsecond, 1000), (:Nanosecond, 1)])
+define_conversions([(:Year, 4), (:Quarter, 3), (:Month, 1)])
 
 # fixed is not comparable to other periods, except when both are zero (#37459)
+const OtherPeriod = Union{Month, Quarter, Year}
 (==)(x::FixedPeriod, y::OtherPeriod) = iszero(x) & iszero(y)
 (==)(x::OtherPeriod, y::FixedPeriod) = y == x
 
 const zero_or_fixedperiod_seed = UInt === UInt64 ? 0x5b7fc751bba97516 : 0xeae0fdcb
 const nonzero_otherperiod_seed = UInt === UInt64 ? 0xe1837356ff2d2ac9 : 0x170d1b00
-otherperiod_seed(x::OtherPeriod) = iszero(value(x)) ? zero_or_fixedperiod_seed : nonzero_otherperiod_seed
+otherperiod_seed(x) = iszero(value(x)) ? zero_or_fixedperiod_seed : nonzero_otherperiod_seed
 # tons() will overflow for periods longer than ~300,000 years, implying a hash collision
-# which is relatively harmless given how infrequent such periods should appear
+# which is relatively harmless given how infrequently such periods should appear
 Base.hash(x::FixedPeriod, h::UInt) = hash(tons(x), h + zero_or_fixedperiod_seed)
 # Overflow can also happen here for really long periods (~8e17 years)
 Base.hash(x::Year, h::UInt) = hash(12 * value(x), h + otherperiod_seed(x))
@@ -505,17 +443,13 @@ Base.isless(x::CompoundPeriod, y::Period) = x < CompoundPeriod(y)
 Base.isless(x::CompoundPeriod, y::CompoundPeriod) = tons(x) < tons(y)
 # truncating conversions to milliseconds, nanoseconds and days:
 # overflow can happen for periods longer than ~300,000 years
-toms(c::Nanosecond)  = div(value(c), 1000000)
-toms(c::Microsecond) = div(value(c), 1000)
+toms(c::Nanosecond)  = div(value(c), 1000000, RoundNearest)
+toms(c::Microsecond) = div(value(c), 1000, RoundNearest)
 toms(c::Millisecond) = value(c)
 toms(c::Second)      = 1000 * value(c)
 toms(c::Minute)      = 60000 * value(c)
 toms(c::Hour)        = 3600000 * value(c)
-toms(c::Day)         = 86400000 * value(c)
-toms(c::Week)        = 604800000 * value(c)
-toms(c::Month)       = 86400000.0 * 30.436875 * value(c)
-toms(c::Quarter)     = 86400000.0 * 91.310625 * value(c)
-toms(c::Year)        = 86400000.0 * 365.2425 * value(c)
+toms(c::Period)      = 86400000 * days(c)
 toms(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : Float64(sum(toms, c.periods))
 tons(x)              = toms(x) * 1000000
 tons(x::Microsecond) = value(x) * 1000
@@ -531,3 +465,7 @@ days(c::Year)        = 365.2425 * value(c)
 days(c::Quarter)     = 91.310625 * value(c)
 days(c::Month)       = 30.436875 * value(c)
 days(c::CompoundPeriod) = isempty(c.periods) ? 0.0 : Float64(sum(days, c.periods))
+seconds(x::Nanosecond) = value(x) / 1000000000
+seconds(x::Microsecond) = value(x) / 1000000
+seconds(x::Millisecond) = value(x) / 1000
+seconds(x::Period) = value(Second(x))
diff --git a/stdlib/Dates/src/query.jl b/stdlib/Dates/src/query.jl
index c204f750f5de2..4f3b5a5c4b095 100644
--- a/stdlib/Dates/src/query.jl
+++ b/stdlib/Dates/src/query.jl
@@ -93,10 +93,10 @@ Return 366 if the year of `dt` is a leap year, otherwise return 365.
 
 # Examples
 ```jldoctest
-julia> Dates.daysinyear(1999)
+julia> daysinyear(1999)
 365
 
-julia> Dates.daysinyear(2000)
+julia> daysinyear(2000)
 366
 ```
 """
@@ -114,7 +114,7 @@ Return the day of the week as an [`Int64`](@ref) with `1 = Monday, 2 = Tuesday,
 
 # Examples
 ```jldoctest
-julia> Dates.dayofweek(Date("2000-01-01"))
+julia> dayofweek(Date("2000-01-01"))
 6
 ```
 """
@@ -159,10 +159,10 @@ the given `locale`. Also accepts `Integer`.
 
 # Examples
 ```jldoctest
-julia> Dates.dayname(Date("2000-01-01"))
+julia> dayname(Date("2000-01-01"))
 "Saturday"
 
-julia> Dates.dayname(4)
+julia> dayname(4)
 "Thursday"
 ```
 """
@@ -179,10 +179,10 @@ in the given `locale`. Also accepts `Integer`.
 
 # Examples
 ```jldoctest
-julia> Dates.dayabbr(Date("2000-01-01"))
+julia> dayabbr(Date("2000-01-01"))
 "Sat"
 
-julia> Dates.dayabbr(3)
+julia> dayabbr(3)
 "Wed"
 ```
 """
@@ -209,13 +209,13 @@ month, etc.` In the range 1:5.
 
 # Examples
 ```jldoctest
-julia> Dates.dayofweekofmonth(Date("2000-02-01"))
+julia> dayofweekofmonth(Date("2000-02-01"))
 1
 
-julia> Dates.dayofweekofmonth(Date("2000-02-08"))
+julia> dayofweekofmonth(Date("2000-02-08"))
 2
 
-julia> Dates.dayofweekofmonth(Date("2000-02-15"))
+julia> dayofweekofmonth(Date("2000-02-15"))
 3
 ```
 """
@@ -240,10 +240,10 @@ function.
 
 # Examples
 ```jldoctest
-julia> Dates.daysofweekinmonth(Date("2005-01-01"))
+julia> daysofweekinmonth(Date("2005-01-01"))
 5
 
-julia> Dates.daysofweekinmonth(Date("2005-01-04"))
+julia> daysofweekinmonth(Date("2005-01-04"))
 4
 ```
 """
@@ -569,10 +569,10 @@ Return the full name of the month of the `Date` or `DateTime` or `Integer` in th
 
 # Examples
 ```jldoctest
-julia> Dates.monthname(Date("2005-01-04"))
+julia> monthname(Date("2005-01-04"))
 "January"
 
-julia> Dates.monthname(2)
+julia> monthname(2)
 "February"
 ```
 """
@@ -588,7 +588,7 @@ Return the abbreviated month name of the `Date` or `DateTime` or `Integer` in th
 
 # Examples
 ```jldoctest
-julia> Dates.monthabbr(Date("2005-01-04"))
+julia> monthabbr(Date("2005-01-04"))
 "Jan"
 
 julia> monthabbr(2)
@@ -606,13 +606,13 @@ Return the number of days in the month of `dt`. Value will be 28, 29, 30, or 31.
 
 # Examples
 ```jldoctest
-julia> Dates.daysinmonth(Date("2000-01"))
+julia> daysinmonth(Date("2000-01"))
 31
 
-julia> Dates.daysinmonth(Date("2001-02"))
+julia> daysinmonth(Date("2001-02"))
 28
 
-julia> Dates.daysinmonth(Date("2000-02"))
+julia> daysinmonth(Date("2000-02"))
 29
 ```
 """
@@ -626,10 +626,10 @@ Return `true` if the year of `dt` is a leap year.
 
 # Examples
 ```jldoctest
-julia> Dates.isleapyear(Date("2004"))
+julia> isleapyear(Date("2004"))
 true
 
-julia> Dates.isleapyear(Date("2005"))
+julia> isleapyear(Date("2005"))
 false
 ```
 """
diff --git a/stdlib/Dates/src/ranges.jl b/stdlib/Dates/src/ranges.jl
index 3939d3661ec66..c4299c7b02be5 100644
--- a/stdlib/Dates/src/ranges.jl
+++ b/stdlib/Dates/src/ranges.jl
@@ -4,6 +4,7 @@
 
 StepRange{<:Dates.DatePeriod,<:Real}(start, step, stop) =
     throw(ArgumentError("must specify step as a Period when constructing Dates ranges"))
+Base.:(:)(a::T, b::T) where {T<:Date} = (:)(a, Day(1), b)
 
 # Given a start and end date, how many steps/periods are in between
 guess(a::DateTime, b::DateTime, c) = floor(Int64, (Int128(value(b)) - Int128(value(a))) / toms(c))
@@ -42,7 +43,7 @@ function Base.steprange_last(start::T, step, stop) where T<:TimeType
         else
             diff = stop - start
             if (diff > zero(diff)) != (stop > start)
-                throw(OverflowError())
+                throw(OverflowError("Difference between stop and start overflowed"))
             end
             remain = stop - (start + step * len(start, stop, step))
             last = stop - remain
diff --git a/stdlib/Dates/src/rounding.jl b/stdlib/Dates/src/rounding.jl
index 53e680a6bfd1b..b5b6e52decba8 100644
--- a/stdlib/Dates/src/rounding.jl
+++ b/stdlib/Dates/src/rounding.jl
@@ -94,13 +94,13 @@ For convenience, `precision` may be a type instead of a value: `floor(x, Dates.H
 shortcut for `floor(x, Dates.Hour(1))`.
 
 ```jldoctest
-julia> floor(Dates.Day(16), Dates.Week)
+julia> floor(Day(16), Week)
 2 weeks
 
-julia> floor(Dates.Minute(44), Dates.Minute(15))
+julia> floor(Minute(44), Minute(15))
 30 minutes
 
-julia> floor(Dates.Hour(36), Dates.Day)
+julia> floor(Hour(36), Day)
 1 day
 ```
 
@@ -122,13 +122,13 @@ For convenience, `p` may be a type instead of a value: `floor(dt, Dates.Hour)` i
 for `floor(dt, Dates.Hour(1))`.
 
 ```jldoctest
-julia> floor(Date(1985, 8, 16), Dates.Month)
+julia> floor(Date(1985, 8, 16), Month)
 1985-08-01
 
-julia> floor(DateTime(2013, 2, 13, 0, 31, 20), Dates.Minute(15))
+julia> floor(DateTime(2013, 2, 13, 0, 31, 20), Minute(15))
 2013-02-13T00:30:00
 
-julia> floor(DateTime(2016, 8, 6, 12, 0, 0), Dates.Day)
+julia> floor(DateTime(2016, 8, 6, 12, 0, 0), Day)
 2016-08-06T00:00:00
 ```
 """
@@ -143,13 +143,13 @@ For convenience, `p` may be a type instead of a value: `ceil(dt, Dates.Hour)` is
 for `ceil(dt, Dates.Hour(1))`.
 
 ```jldoctest
-julia> ceil(Date(1985, 8, 16), Dates.Month)
+julia> ceil(Date(1985, 8, 16), Month)
 1985-09-01
 
-julia> ceil(DateTime(2013, 2, 13, 0, 31, 20), Dates.Minute(15))
+julia> ceil(DateTime(2013, 2, 13, 0, 31, 20), Minute(15))
 2013-02-13T00:45:00
 
-julia> ceil(DateTime(2016, 8, 6, 12, 0, 0), Dates.Day)
+julia> ceil(DateTime(2016, 8, 6, 12, 0, 0), Day)
 2016-08-07T00:00:00
 ```
 """
@@ -168,13 +168,13 @@ For convenience, `precision` may be a type instead of a value: `ceil(x, Dates.Ho
 shortcut for `ceil(x, Dates.Hour(1))`.
 
 ```jldoctest
-julia> ceil(Dates.Day(16), Dates.Week)
+julia> ceil(Day(16), Week)
 3 weeks
 
-julia> ceil(Dates.Minute(44), Dates.Minute(15))
+julia> ceil(Minute(44), Minute(15))
 45 minutes
 
-julia> ceil(Dates.Hour(36), Dates.Day)
+julia> ceil(Hour(36), Day)
 2 days
 ```
 
@@ -218,13 +218,13 @@ For convenience, `p` may be a type instead of a value: `round(dt, Dates.Hour)` i
 for `round(dt, Dates.Hour(1))`.
 
 ```jldoctest
-julia> round(Date(1985, 8, 16), Dates.Month)
+julia> round(Date(1985, 8, 16), Month)
 1985-08-01
 
-julia> round(DateTime(2013, 2, 13, 0, 31, 20), Dates.Minute(15))
+julia> round(DateTime(2013, 2, 13, 0, 31, 20), Minute(15))
 2013-02-13T00:30:00
 
-julia> round(DateTime(2016, 8, 6, 12, 0, 0), Dates.Day)
+julia> round(DateTime(2016, 8, 6, 12, 0, 0), Day)
 2016-08-07T00:00:00
 ```
 
@@ -248,13 +248,13 @@ For convenience, `precision` may be a type instead of a value: `round(x, Dates.H
 shortcut for `round(x, Dates.Hour(1))`.
 
 ```jldoctest
-julia> round(Dates.Day(16), Dates.Week)
+julia> round(Day(16), Week)
 2 weeks
 
-julia> round(Dates.Minute(44), Dates.Minute(15))
+julia> round(Minute(44), Minute(15))
 45 minutes
 
-julia> round(Dates.Hour(36), Dates.Day)
+julia> round(Hour(36), Day)
 2 days
 ```
 
diff --git a/stdlib/Dates/src/types.jl b/stdlib/Dates/src/types.jl
index 0cdac884fb7fe..7391c277b0718 100644
--- a/stdlib/Dates/src/types.jl
+++ b/stdlib/Dates/src/types.jl
@@ -103,7 +103,7 @@ end
 UTM(x) = UTInstant(Millisecond(x))
 UTD(x) = UTInstant(Day(x))
 
-# Calendar types provide rules for interpretating instant
+# Calendar types provide rules for interpreting instant
 # timelines in human-readable form.
 abstract type Calendar <: AbstractTime end
 
@@ -142,8 +142,28 @@ abstract type AbstractDateTime <: TimeType end
 """
     DateTime
 
-`DateTime` wraps a `UTInstant{Millisecond}` and interprets it according to the proleptic
-Gregorian calendar.
+`DateTime` represents a point in time according to the proleptic Gregorian calendar.
+The finest resolution of the time is millisecond (i.e., microseconds or
+nanoseconds cannot be represented by this type). The type supports fixed-point
+arithmetic, and thus is prone to underflowing (and overflowing). A notable
+consequence is rounding when adding a `Microsecond` or a `Nanosecond`:
+
+```jldoctest
+julia> dt = DateTime(2023, 8, 19, 17, 45, 32, 900)
+2023-08-19T17:45:32.900
+
+julia> dt + Millisecond(1)
+2023-08-19T17:45:32.901
+
+julia> dt + Microsecond(1000) # 1000us == 1ms
+2023-08-19T17:45:32.901
+
+julia> dt + Microsecond(999) # 999us rounded to 1000us
+2023-08-19T17:45:32.901
+
+julia> dt + Microsecond(1499) # 1499 rounded to 1000us
+2023-08-19T17:45:32.901
+```
 """
 struct DateTime <: AbstractDateTime
     instant::UTInstant{Millisecond}
@@ -199,20 +219,11 @@ daysinmonth(y,m) = DAYSINMONTH[m] + (m == 2 && isleapyear(y))
 """
     validargs(::Type{<:TimeType}, args...) -> Union{ArgumentError, Nothing}
 
-Determine whether the given arguments consitute valid inputs for the given type.
+Determine whether the given arguments constitute valid inputs for the given type.
 Returns either an `ArgumentError`, or [`nothing`](@ref) in case of success.
 """
 function validargs end
 
-"""
-    argerror([msg]) -> Union{ArgumentError, Nothing}
-
-Return an `ArgumentError` object with the given message,
-or [`nothing`](@ref) if no message is provided. For use by `validargs`.
-"""
-argerror(msg::String) = ArgumentError(msg)
-argerror() = nothing
-
 # Julia uses 24-hour clocks internally, but user input can be AM/PM with 12pm == noon and 12am == midnight.
 @enum AMPM AM PM TWENTYFOURHOUR
 function adjusthour(h::Int64, ampm::AMPM)
@@ -240,18 +251,18 @@ end
 
 function validargs(::Type{DateTime}, y::Int64, m::Int64, d::Int64,
                    h::Int64, mi::Int64, s::Int64, ms::Int64, ampm::AMPM=TWENTYFOURHOUR)
-    0 < m < 13 || return argerror("Month: $m out of range (1:12)")
-    0 < d < daysinmonth(y, m) + 1 || return argerror("Day: $d out of range (1:$(daysinmonth(y, m)))")
+    0 < m < 13 || return ArgumentError("Month: $m out of range (1:12)")
+    0 < d < daysinmonth(y, m) + 1 || return ArgumentError("Day: $d out of range (1:$(daysinmonth(y, m)))")
     if ampm == TWENTYFOURHOUR # 24-hour clock
         -1 < h < 24 || (h == 24 && mi==s==ms==0) ||
-            return argerror("Hour: $h out of range (0:23)")
+            return ArgumentError("Hour: $h out of range (0:23)")
     else
-        0 < h < 13 || return argerror("Hour: $h out of range (1:12)")
+        0 < h < 13 || return ArgumentError("Hour: $h out of range (1:12)")
     end
-    -1 < mi < 60 || return argerror("Minute: $mi out of range (0:59)")
-    -1 < s < 60 || return argerror("Second: $s out of range (0:59)")
-    -1 < ms < 1000 || return argerror("Millisecond: $ms out of range (0:999)")
-    return argerror()
+    -1 < mi < 60 || return ArgumentError("Minute: $mi out of range (0:59)")
+    -1 < s < 60 || return ArgumentError("Second: $s out of range (0:59)")
+    -1 < ms < 1000 || return ArgumentError("Millisecond: $ms out of range (0:999)")
+    return nothing
 end
 
 DateTime(dt::Base.Libc.TmStruct) = DateTime(1900 + dt.year, 1 + dt.month, dt.mday, dt.hour, dt.min, dt.sec)
@@ -268,9 +279,9 @@ function Date(y::Int64, m::Int64=1, d::Int64=1)
 end
 
 function validargs(::Type{Date}, y::Int64, m::Int64, d::Int64)
-    0 < m < 13 || return argerror("Month: $m out of range (1:12)")
-    0 < d < daysinmonth(y, m) + 1 || return argerror("Day: $d out of range (1:$(daysinmonth(y, m)))")
-    return argerror()
+    0 < m < 13 || return ArgumentError("Month: $m out of range (1:12)")
+    0 < d < daysinmonth(y, m) + 1 || return ArgumentError("Day: $d out of range (1:$(daysinmonth(y, m)))")
+    return nothing
 end
 
 Date(dt::Base.Libc.TmStruct) = Date(1900 + dt.year, 1 + dt.month, dt.mday)
@@ -289,16 +300,16 @@ end
 
 function validargs(::Type{Time}, h::Int64, mi::Int64, s::Int64, ms::Int64, us::Int64, ns::Int64, ampm::AMPM=TWENTYFOURHOUR)
     if ampm == TWENTYFOURHOUR # 24-hour clock
-        -1 < h < 24 || return argerror("Hour: $h out of range (0:23)")
+        -1 < h < 24 || return ArgumentError("Hour: $h out of range (0:23)")
     else
-        0 < h < 13 || return argerror("Hour: $h out of range (1:12)")
+        0 < h < 13 || return ArgumentError("Hour: $h out of range (1:12)")
     end
-    -1 < mi < 60 || return argerror("Minute: $mi out of range (0:59)")
-    -1 < s < 60 || return argerror("Second: $s out of range (0:59)")
-    -1 < ms < 1000 || return argerror("Millisecond: $ms out of range (0:999)")
-    -1 < us < 1000 || return argerror("Microsecond: $us out of range (0:999)")
-    -1 < ns < 1000 || return argerror("Nanosecond: $ns out of range (0:999)")
-    return argerror()
+    -1 < mi < 60 || return ArgumentError("Minute: $mi out of range (0:59)")
+    -1 < s < 60 || return ArgumentError("Second: $s out of range (0:59)")
+    -1 < ms < 1000 || return ArgumentError("Millisecond: $ms out of range (0:999)")
+    -1 < us < 1000 || return ArgumentError("Microsecond: $us out of range (0:999)")
+    -1 < ns < 1000 || return ArgumentError("Nanosecond: $ns out of range (0:999)")
+    return nothing
 end
 
 Time(dt::Base.Libc.TmStruct) = Time(dt.hour, dt.min, dt.sec)
@@ -469,14 +480,14 @@ Base.hash(x::Time, h::UInt) =
     hash(hour(x), hash(minute(x), hash(second(x),
         hash(millisecond(x), hash(microsecond(x), hash(nanosecond(x), h))))))
 
-Base.sleep(duration::Period) = sleep(toms(duration) / 1000)
+Base.sleep(duration::Period) = sleep(seconds(duration))
 
 function Base.Timer(delay::Period; interval::Period=Second(0))
-    Timer(toms(delay) / 1000, interval=toms(interval) / 1000)
+    Timer(seconds(delay), interval=seconds(interval))
 end
 
 function Base.timedwait(testcb, timeout::Period; pollint::Period=Millisecond(100))
-    timedwait(testcb, toms(timeout) / 1000, pollint=toms(pollint) / 1000)
+    timedwait(testcb, seconds(timeout), pollint=seconds(pollint))
 end
 
 Base.OrderStyle(::Type{<:AbstractTime}) = Base.Ordered()
diff --git a/stdlib/Dates/test/accessors.jl b/stdlib/Dates/test/accessors.jl
index 819fa8c40ddbc..b690a81d70e49 100644
--- a/stdlib/Dates/test/accessors.jl
+++ b/stdlib/Dates/test/accessors.jl
@@ -29,7 +29,7 @@ using Test
     @test Dates.yearmonthday(730120) == (2000, 1, 1)
 end
 @testset "year/month/day" begin
-    # year, month, and day return the indivial components
+    # year, month, and day return the individual components
     # of yearmonthday, avoiding additional calculations when possible
     @test Dates.year(-1) == 0
     @test Dates.month(-1) == 12
diff --git a/stdlib/Dates/test/arithmetic.jl b/stdlib/Dates/test/arithmetic.jl
index 485fea5624066..333ba3a7c0088 100644
--- a/stdlib/Dates/test/arithmetic.jl
+++ b/stdlib/Dates/test/arithmetic.jl
@@ -10,6 +10,21 @@ using Dates
     b = Dates.Time(11, 59, 59)
     @test Dates.CompoundPeriod(a - b) == Dates.Hour(12)
 end
+
+struct MonthlyDate <: TimeType
+    instant::Dates.UTInstant{Month}
+end
+struct OtherTime <: Dates.AbstractDateTime
+    instant::Dates.UTInstant{Nanosecond}
+end
+@testset "TimeType arithmetic" begin
+    @test_throws MethodError DateTime(2023, 5, 2) - Date(2023, 5, 1)
+    # check that - between two same-type TimeTypes works by default
+    @test MonthlyDate(Dates.UTInstant(Month(10))) - MonthlyDate(Dates.UTInstant(Month(1))) == Month(9)
+    # ... and between two same-type AbstractDateTimes
+    @test OtherTime(Dates.UTInstant(Nanosecond(2))) - OtherTime(Dates.UTInstant(Nanosecond(1))) == Nanosecond(1)
+end
+
 @testset "Wrapping arithmetic for Months" begin
     # This ends up being trickier than expected because
     # the user might do 2014-01-01 + Month(-14)
@@ -256,6 +271,24 @@ end
         @test dt - Dates.Millisecond(1) == Dates.DateTime(1972, 6, 30, 23, 59, 58, 999)
         @test dt + Dates.Millisecond(-1) == Dates.DateTime(1972, 6, 30, 23, 59, 58, 999)
     end
+    @testset "DateTime-Microsecond arithmetic" begin
+        dt = Dates.DateTime(1999, 12, 27)
+        @test dt + Dates.Microsecond(1) == dt
+        @test dt + Dates.Microsecond(501) == Dates.DateTime(1999, 12, 27, 0, 0, 0, 1)
+        @test dt + Dates.Microsecond(1499) == Dates.DateTime(1999, 12, 27, 0, 0, 0, 1)
+        @test dt - Dates.Microsecond(1) == dt
+        @test dt - Dates.Microsecond(501) == Dates.DateTime(1999, 12, 26, 23, 59, 59, 999)
+        @test dt - Dates.Microsecond(1499) == Dates.DateTime(1999, 12, 26, 23, 59, 59, 999)
+    end
+    @testset "DateTime-Nanosecond arithmetic" begin
+        dt = Dates.DateTime(1999, 12, 27)
+        @test dt + Dates.Nanosecond(1) == dt
+        @test dt + Dates.Nanosecond(500_001) == Dates.DateTime(1999, 12, 27, 0, 0, 0, 1)
+        @test dt + Dates.Nanosecond(1_499_999) == Dates.DateTime(1999, 12, 27, 0, 0, 0, 1)
+        @test dt - Dates.Nanosecond(1) == dt
+        @test dt - Dates.Nanosecond(500_001) == Dates.DateTime(1999, 12, 26, 23, 59, 59, 999)
+        @test dt - Dates.Nanosecond(1_499_999) == Dates.DateTime(1999, 12, 26, 23, 59, 59, 999)
+    end
 end
 @testset "Date arithmetic" begin
     @testset "Date-Year arithmetic" begin
diff --git a/stdlib/Dates/test/conversions.jl b/stdlib/Dates/test/conversions.jl
index 488af4110e884..99572b41b4f90 100644
--- a/stdlib/Dates/test/conversions.jl
+++ b/stdlib/Dates/test/conversions.jl
@@ -60,10 +60,16 @@ end
 
     if Sys.isapple()
         withenv("TZ" => "UTC") do
-            @test abs(Dates.now() - now(Dates.UTC)) < Dates.Second(1)
+            a = Dates.now()
+            b = Dates.now(Dates.UTC)
+            c = Dates.now()
+            @test a <= b <= c
         end
     end
-    @test abs(Dates.now() - now(Dates.UTC)) < Dates.Hour(16)
+    a = Dates.now()
+    b = now(Dates.UTC)
+    c = Dates.now()
+    @test abs(a - b) < Dates.Hour(16) + abs(c - a)
 end
 @testset "Issue #9171, #9169" begin
     let t = Dates.Period[Dates.Week(2), Dates.Day(14), Dates.Hour(14 * 24), Dates.Minute(14 * 24 * 60), Dates.Second(14 * 24 * 60 * 60), Dates.Millisecond(14 * 24 * 60 * 60 * 1000)]
diff --git a/stdlib/Dates/test/io.jl b/stdlib/Dates/test/io.jl
index 1c50676eb8346..2c99ac45d0c58 100644
--- a/stdlib/Dates/test/io.jl
+++ b/stdlib/Dates/test/io.jl
@@ -60,7 +60,9 @@ end
 end
 
 @testset "DateFormat printing" begin
-    @test sprint(show, DateFormat("yyyzzxmmdd\\MHH:MM:SS\\P")) == "dateformat\"yyyzzxmmdd\\MHH:MM:SSP\""
+    @test sprint(show, DateFormat("yyyzzxmmdd\\MHH:MM:SS\\P")) == "dateformat\"yyyzzxmmdd\\MHH:MM:SS\\P\""
+    @test sprint(show, dateformat"yyyy-mm-dd\THH:MM:SS.s") == "dateformat\"yyyy-mm-dd\\THH:MM:SS.s\""
+    @test sprint(show, dateformat"yyyy-mm-ddTHH:MM:SS.s") == "dateformat\"yyyy-mm-ddTHH:MM:SS.s\""
     @test sprint(show, DateFormat("yyy").tokens[1]) == "DatePart(yyy)"
     @test sprint(show, DateFormat("mmzzdd").tokens[2]) == "Delim(zz)"
     @test sprint(show, DateFormat("ddxmm").tokens[2]) == "Delim(x)"
@@ -548,7 +550,7 @@ end
             @test Time("$t12", "$HH:MMp") == t
         end
         local tmstruct, strftime
-        withlocales(["C"]) do
+        withlocales(["C"]) do locale
             # test am/pm comparison handling
             tmstruct = Libc.strptime("%I:%M%p", t12)
             strftime = Libc.strftime("%I:%M%p", tmstruct)
@@ -586,4 +588,34 @@ end
     @test (@inferred Nothing g()) == datetime
 end
 
+@testset "Issue #43883: parsing empty strings" begin
+    for (T, name, fmt) in zip(
+            (DateTime, Date, Time),
+            ("DateTime", "Date or Time", "Date or Time"),
+            ("yyyy-mm-ddHHMMSS.s", "yyymmdd", "HHMMSS")
+        )
+        @test_throws ArgumentError T("")
+        @test_throws ArgumentError T("", fmt)
+        @test_throws ArgumentError T("", DateFormat(fmt))
+        try
+            T("")
+            @test false
+        catch err
+            @test err.msg == "Cannot parse an empty string as a $name"
+        end
+
+        @test_throws ArgumentError parse(T, "")
+        @test_throws ArgumentError parse(T, "", DateFormat(fmt))
+        try
+            parse(T, "")
+            @test false
+        catch err
+            @test err.msg == "Cannot parse an empty string as a $name"
+        end
+
+        @test tryparse(T, "") === nothing
+        @test tryparse(T, "", DateFormat(fmt)) === nothing
+    end
+end
+
 end
diff --git a/stdlib/Dates/test/periods.jl b/stdlib/Dates/test/periods.jl
index c37a1666375a9..7cebfc55e7735 100644
--- a/stdlib/Dates/test/periods.jl
+++ b/stdlib/Dates/test/periods.jl
@@ -283,7 +283,7 @@ Beat(p::Period) = Beat(Dates.toms(p) ÷ 86400)
     Dates.toms(b::Beat) = Dates.value(b) * 86400
     Dates._units(b::Beat) = " beat" * (abs(Dates.value(b)) == 1 ? "" : "s")
     Base.promote_rule(::Type{Dates.Day}, ::Type{Beat}) = Dates.Millisecond
-    Base.convert(::Type{T}, b::Beat) where {T<:Dates.Millisecond} = T(Dates.toms(b))
+    Base.convert(::Type{T}, b::Beat) where {T<:Dates.Millisecond} = T(Dates.toms(b))::T
 
     @test Beat(1000) == Dates.Day(1)
     @test Beat(1) < Dates.Day(1)
@@ -329,6 +329,14 @@ end
     @test Dates.default(Dates.Nanosecond) == zero(Dates.Nanosecond)
 end
 @testset "Conversions" begin
+    @test Dates.toms(1499 * us) == 1
+    @test Dates.toms(501 * us) == 1
+    @test Dates.toms(us) == 0
+
+    @test Dates.toms(1_499_999 * ns) == 1
+    @test Dates.toms(500_001 * ns) == 1
+    @test Dates.toms(ns) == 0
+
     @test Dates.toms(ms) == Dates.value(Dates.Millisecond(ms)) == 1
     @test Dates.toms(s)  == Dates.value(Dates.Millisecond(s)) == 1000
     @test Dates.toms(mi) == Dates.value(Dates.Millisecond(mi)) == 60000
@@ -343,6 +351,15 @@ end
     @test Dates.days(Dates.Hour(24)) == 1
     @test Dates.days(d) == 1
     @test Dates.days(w) == 7
+
+    @test Dates.seconds(ns) == 0.000000001
+    @test Dates.seconds(us) == 0.000001
+    @test Dates.seconds(ms) == 0.001
+    @test Dates.seconds(s) == 1
+    @test Dates.seconds(mi) == 60
+    @test Dates.seconds(h) == 3600
+    @test Dates.seconds(d) == 86400
+    @test Dates.seconds(w) == 604800
 end
 @testset "issue #9214" begin
     @test 2s + (7ms + 1ms) == (2s + 7ms) + 1ms == 1ms + (2s + 7ms) == 1ms + (1s + 7ms) + 1s == 1ms + (2s + 3d + 7ms) + (-3d) == (1ms + (2s + 3d)) + (7ms - 3d) == (1ms + (2s + 3d)) - (3d - 7ms)
@@ -523,6 +540,7 @@ end
     @test convert(Second, Minute(1) + Second(30)) === Second(90)
     @test convert(Minute, Minute(1) + Second(60)) === Minute(2)
     @test convert(Millisecond, Minute(1) + Second(30)) === Millisecond(90_000)
+    @test convert(Millisecond,  Dates.CompoundPeriod()) === Millisecond(0)
     @test_throws InexactError convert(Minute, Minute(1) + Second(30))
     @test_throws MethodError convert(Month, Minute(1) + Second(30))
     @test_throws MethodError convert(Second, Month(1) + Second(30))
diff --git a/stdlib/Dates/test/ranges.jl b/stdlib/Dates/test/ranges.jl
index 52416fc95ec0c..d4339dcde51d4 100644
--- a/stdlib/Dates/test/ranges.jl
+++ b/stdlib/Dates/test/ranges.jl
@@ -596,4 +596,19 @@ a = Dates.Time(23, 1, 1)
     @test length(utm_typemin:-Millisecond(1):utm_typemin) == 1
 end
 
+# Issue #45816
+@testset "default step for date ranges" begin
+    r = Date(2000, 1, 1):Date(2000, 12, 31)
+    @test step(r) === Day(1)
+    @test length(r) == 366
+end
+
+# Issue #48209
+@testset "steprange_last overflow" begin
+    epoch = Date(Date(1) - Day(1))
+    dmax = epoch + Day(typemax(fieldtype(Day, :value)))
+    dmin = epoch + Day(typemin(fieldtype(Day, :value)))
+    @test_throws OverflowError StepRange(dmin, Day(1), dmax)
+end
+
 end  # RangesTest module
diff --git a/stdlib/Dates/test/rounding.jl b/stdlib/Dates/test/rounding.jl
index 13e276c4426e8..85c90981423d3 100644
--- a/stdlib/Dates/test/rounding.jl
+++ b/stdlib/Dates/test/rounding.jl
@@ -189,7 +189,7 @@ end
     @test round(x, Dates.Nanosecond) == x
 end
 
-@testset "Rouding DateTime to Date" begin
+@testset "Rounding DateTime to Date" begin
     now_ = DateTime(2020, 9, 1, 13)
     for p in (Year, Month, Day)
         for r in (RoundUp, RoundDown)
diff --git a/stdlib/Dates/test/types.jl b/stdlib/Dates/test/types.jl
index 8823e56e41a2f..35a793867dc5a 100644
--- a/stdlib/Dates/test/types.jl
+++ b/stdlib/Dates/test/types.jl
@@ -74,6 +74,12 @@ ms = Dates.Millisecond(1)
                          Dates.Hour(4), Dates.Second(10)) == Dates.DateTime(1, 2, 1, 4, 0, 10)
 end
 
+@testset "DateTime construction from Date and Time" begin
+    @test Dates.DateTime(Dates.Date(2023, 08, 07), Dates.Time(12)) == Dates.DateTime(2023, 08, 07, 12, 0, 0, 0)
+    @test_throws InexactError Dates.DateTime(Dates.Date(2023, 08, 07), Dates.Time(12, 0, 0, 0, 42))
+    @test_throws InexactError Dates.DateTime(Dates.Date(2023, 08, 07), Dates.Time(12, 0, 0, 0, 0, 42))
+end
+
 @testset "Date construction by parts" begin
     test = Dates.Date(Dates.UTD(734869))
     @test Dates.Date(2013) == test
@@ -273,6 +279,11 @@ end
 
 end
 
+@testset "timer" begin
+    @test hasmethod(Timer, (Period,))
+    @test hasmethod(Timer, (Function, Period))
+end
+
 @testset "timedwait" begin
     @test timedwait(() -> false, Second(0); pollint=Millisecond(1)) === :timed_out
 end
diff --git a/stdlib/DelimitedFiles.version b/stdlib/DelimitedFiles.version
index 972918a83b75e..d741690a96838 100644
--- a/stdlib/DelimitedFiles.version
+++ b/stdlib/DelimitedFiles.version
@@ -1,4 +1,4 @@
 DELIMITEDFILES_BRANCH = main
-DELIMITEDFILES_SHA1 = f520e069d2eb8282e8a07dcb384fe0e0c6293bc3
+DELIMITEDFILES_SHA1 = db79c842f95f55b1f8d8037c0d3363ab21cd3b90
 DELIMITEDFILES_GIT_URL := https://github.com/JuliaData/DelimitedFiles.jl.git
 DELIMITEDFILES_TAR_URL = https://api.github.com/repos/JuliaData/DelimitedFiles.jl/tarball/$1
diff --git a/stdlib/Distributed.version b/stdlib/Distributed.version
new file mode 100644
index 0000000000000..ca528066e9403
--- /dev/null
+++ b/stdlib/Distributed.version
@@ -0,0 +1,4 @@
+DISTRIBUTED_BRANCH = master
+DISTRIBUTED_SHA1 = 41c01069533e22a6ce6b794746e4b3aa9f5a25cd
+DISTRIBUTED_GIT_URL := https://github.com/JuliaLang/Distributed.jl
+DISTRIBUTED_TAR_URL = https://api.github.com/repos/JuliaLang/Distributed.jl/tarball/$1
diff --git a/stdlib/Distributed/Project.toml b/stdlib/Distributed/Project.toml
deleted file mode 100644
index ecec870290041..0000000000000
--- a/stdlib/Distributed/Project.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-name = "Distributed"
-uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
-
-[deps]
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
-
-[extras]
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["LinearAlgebra", "Test"]
diff --git a/stdlib/Distributed/docs/src/index.md b/stdlib/Distributed/docs/src/index.md
deleted file mode 100644
index 00b40de49b396..0000000000000
--- a/stdlib/Distributed/docs/src/index.md
+++ /dev/null
@@ -1,71 +0,0 @@
-# [Distributed Computing](@id man-distributed)
-Tools for distributed parallel processing.
-
-```@docs
-Distributed.addprocs
-Distributed.nprocs
-Distributed.nworkers
-Distributed.procs()
-Distributed.procs(::Integer)
-Distributed.workers
-Distributed.rmprocs
-Distributed.interrupt
-Distributed.myid
-Distributed.pmap
-Distributed.RemoteException
-Distributed.ProcessExitedException
-Distributed.Future
-Distributed.RemoteChannel
-Distributed.fetch(::Distributed.Future)
-Distributed.fetch(::RemoteChannel)
-Distributed.remotecall(::Any, ::Integer, ::Any...)
-Distributed.remotecall_wait(::Any, ::Integer, ::Any...)
-Distributed.remotecall_fetch(::Any, ::Integer, ::Any...)
-Distributed.remote_do(::Any, ::Integer, ::Any...)
-Distributed.put!(::RemoteChannel, ::Any...)
-Distributed.put!(::Distributed.Future, ::Any)
-Distributed.take!(::RemoteChannel, ::Any...)
-Distributed.isready(::RemoteChannel, ::Any...)
-Distributed.isready(::Distributed.Future)
-Distributed.AbstractWorkerPool
-Distributed.WorkerPool
-Distributed.CachingPool
-Distributed.default_worker_pool
-Distributed.clear!(::CachingPool)
-Distributed.remote
-Distributed.remotecall(::Any, ::AbstractWorkerPool, ::Any...)
-Distributed.remotecall_wait(::Any, ::AbstractWorkerPool, ::Any...)
-Distributed.remotecall_fetch(::Any, ::AbstractWorkerPool, ::Any...)
-Distributed.remote_do(::Any, ::AbstractWorkerPool, ::Any...)
-Distributed.@spawnat
-Distributed.@fetch
-Distributed.@fetchfrom
-Distributed.@distributed
-Distributed.@everywhere
-Distributed.clear!(::Any, ::Any; ::Any)
-Distributed.remoteref_id
-Distributed.channel_from_id
-Distributed.worker_id_from_socket
-Distributed.cluster_cookie()
-Distributed.cluster_cookie(::Any)
-```
-
-## Cluster Manager Interface
-
-This interface provides a mechanism to launch and manage Julia workers on different cluster environments.
-There are two types of managers present in Base: `LocalManager`, for launching additional workers on the
-same host, and `SSHManager`, for launching on remote hosts via `ssh`. TCP/IP sockets are used to connect
-and transport messages between processes. It is possible for Cluster Managers to provide a different transport.
-
-```@docs
-Distributed.ClusterManager
-Distributed.WorkerConfig
-Distributed.launch
-Distributed.manage
-Distributed.kill(::ClusterManager, ::Int, ::WorkerConfig)
-Distributed.connect(::ClusterManager, ::Int, ::WorkerConfig)
-Distributed.init_worker
-Distributed.start_worker
-Distributed.process_messages
-Distributed.default_addprocs_params
-```
diff --git a/stdlib/Distributed/src/Distributed.jl b/stdlib/Distributed/src/Distributed.jl
deleted file mode 100644
index 3bcbc7b67f60d..0000000000000
--- a/stdlib/Distributed/src/Distributed.jl
+++ /dev/null
@@ -1,115 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-Tools for distributed parallel processing.
-"""
-module Distributed
-
-# imports for extension
-import Base: getindex, wait, put!, take!, fetch, isready, push!, length,
-             hash, ==, kill, close, isopen, showerror
-
-# imports for use
-using Base: Process, Semaphore, JLOptions, buffer_writes, @async_unwrap,
-            VERSION_STRING, binding_module, atexit, julia_exename,
-            julia_cmd, AsyncGenerator, acquire, release, invokelatest,
-            shell_escape_posixly, shell_escape_csh,
-            shell_escape_wincmd, escape_microsoft_c_args,
-            uv_error, something, notnothing, isbuffered, mapany
-using Base.Threads: Event
-
-using Serialization, Sockets
-import Serialization: serialize, deserialize
-import Sockets: connect, wait_connected
-
-# NOTE: clusterserialize.jl imports additional symbols from Serialization for use
-
-export
-    @spawn,
-    @spawnat,
-    @fetch,
-    @fetchfrom,
-    @everywhere,
-    @distributed,
-
-    AbstractWorkerPool,
-    addprocs,
-    CachingPool,
-    clear!,
-    ClusterManager,
-    default_worker_pool,
-    init_worker,
-    interrupt,
-    launch,
-    manage,
-    myid,
-    nprocs,
-    nworkers,
-    pmap,
-    procs,
-    remote,
-    remotecall,
-    remotecall_fetch,
-    remotecall_wait,
-    remote_do,
-    rmprocs,
-    workers,
-    WorkerPool,
-    RemoteChannel,
-    Future,
-    WorkerConfig,
-    RemoteException,
-    ProcessExitedException,
-
-    process_messages,
-    remoteref_id,
-    channel_from_id,
-    worker_id_from_socket,
-    cluster_cookie,
-    start_worker,
-
-# Used only by shared arrays.
-    check_same_host
-
-function _require_callback(mod::Base.PkgId)
-    if Base.toplevel_load[] && myid() == 1 && nprocs() > 1
-        # broadcast top-level (e.g. from Main) import/using from node 1 (only)
-        @sync for p in procs()
-            p == 1 && continue
-            @async_unwrap remotecall_wait(p) do
-                Base.require(mod)
-                nothing
-            end
-        end
-    end
-end
-
-const REF_ID = Threads.Atomic{Int}(1)
-next_ref_id() = Threads.atomic_add!(REF_ID, 1)
-
-struct RRID
-    whence::Int
-    id::Int
-
-    RRID() = RRID(myid(), next_ref_id())
-    RRID(whence, id) = new(whence, id)
-end
-
-hash(r::RRID, h::UInt) = hash(r.whence, hash(r.id, h))
-==(r::RRID, s::RRID) = (r.whence==s.whence && r.id==s.id)
-
-include("clusterserialize.jl")
-include("cluster.jl")   # cluster setup and management, addprocs
-include("messages.jl")
-include("process_messages.jl")  # process incoming messages
-include("remotecall.jl")  # the remotecall* api
-include("macros.jl")      # @spawn and friends
-include("workerpool.jl")
-include("pmap.jl")
-include("managers.jl")    # LocalManager and SSHManager
-
-function __init__()
-    init_parallel()
-end
-
-end
diff --git a/stdlib/Distributed/src/cluster.jl b/stdlib/Distributed/src/cluster.jl
deleted file mode 100644
index 37f1660e19478..0000000000000
--- a/stdlib/Distributed/src/cluster.jl
+++ /dev/null
@@ -1,1362 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    ClusterManager
-
-Supertype for cluster managers, which control workers processes as a cluster.
-Cluster managers implement how workers can be added, removed and communicated with.
-`SSHManager` and `LocalManager` are subtypes of this.
-"""
-abstract type ClusterManager end
-
-"""
-    WorkerConfig
-
-Type used by [`ClusterManager`](@ref)s to control workers added to their clusters. Some fields
-are used by all cluster managers to access a host:
-  * `io` -- the connection used to access the worker (a subtype of `IO` or `Nothing`)
-  * `host` -- the host address (either a `String` or `Nothing`)
-  * `port` -- the port on the host used to connect to the worker (either an `Int` or `Nothing`)
-
-Some are used by the cluster manager to add workers to an already-initialized host:
-  * `count` -- the number of workers to be launched on the host
-  * `exename` -- the path to the Julia executable on the host, defaults to `"\$(Sys.BINDIR)/julia"` or
-    `"\$(Sys.BINDIR)/julia-debug"`
-  * `exeflags` -- flags to use when lauching Julia remotely
-
-The `userdata` field is used to store information for each worker by external managers.
-
-Some fields are used by `SSHManager` and similar managers:
-  * `tunnel` -- `true` (use tunneling), `false` (do not use tunneling), or [`nothing`](@ref) (use default for the manager)
-  * `multiplex` -- `true` (use SSH multiplexing for tunneling) or `false`
-  * `forward` -- the forwarding option used for `-L` option of ssh
-  * `bind_addr` -- the address on the remote host to bind to
-  * `sshflags` -- flags to use in establishing the SSH connection
-  * `max_parallel` -- the maximum number of workers to connect to in parallel on the host
-
-Some fields are used by both `LocalManager`s and `SSHManager`s:
-  * `connect_at` -- determines whether this is a worker-to-worker or driver-to-worker setup call
-  * `process` -- the process which will be connected (usually the manager will assign this during [`addprocs`](@ref))
-  * `ospid` -- the process ID according to the host OS, used to interrupt worker processes
-  * `environ` -- private dictionary used to store temporary information by Local/SSH managers
-  * `ident` -- worker as identified by the [`ClusterManager`](@ref)
-  * `connect_idents` -- list of worker ids the worker must connect to if using a custom topology
-  * `enable_threaded_blas` -- `true`, `false`, or `nothing`, whether to use threaded BLAS or not on the workers
-"""
-mutable struct WorkerConfig
-    # Common fields relevant to all cluster managers
-    io::Union{IO, Nothing}
-    host::Union{String, Nothing}
-    port::Union{Int, Nothing}
-
-    # Used when launching additional workers at a host
-    count::Union{Int, Symbol, Nothing}
-    exename::Union{String, Cmd, Nothing}
-    exeflags::Union{Cmd, Nothing}
-
-    # External cluster managers can use this to store information at a per-worker level
-    # Can be a dict if multiple fields need to be stored.
-    userdata::Any
-
-    # SSHManager / SSH tunnel connections to workers
-    tunnel::Union{Bool, Nothing}
-    multiplex::Union{Bool, Nothing}
-    forward::Union{String, Nothing}
-    bind_addr::Union{String, Nothing}
-    sshflags::Union{Cmd, Nothing}
-    max_parallel::Union{Int, Nothing}
-
-    # Used by Local/SSH managers
-    connect_at::Any
-
-    process::Union{Process, Nothing}
-    ospid::Union{Int, Nothing}
-
-    # Private dictionary used to store temporary information by Local/SSH managers.
-    environ::Union{Dict, Nothing}
-
-    # Connections to be setup depending on the network topology requested
-    ident::Any      # Worker as identified by the Cluster Manager.
-    # List of other worker idents this worker must connect with. Used with topology T_CUSTOM.
-    connect_idents::Union{Array, Nothing}
-
-    # Run multithreaded blas on worker
-    enable_threaded_blas::Union{Bool, Nothing}
-
-    function WorkerConfig()
-        wc = new()
-        for n in 1:fieldcount(WorkerConfig)
-            setfield!(wc, n, nothing)
-        end
-        wc
-    end
-end
-
-@enum WorkerState W_CREATED W_CONNECTED W_TERMINATING W_TERMINATED
-mutable struct Worker
-    id::Int
-    msg_lock::Threads.ReentrantLock # Lock for del_msgs, add_msgs, and gcflag
-    del_msgs::Array{Any,1} # XXX: Could del_msgs and add_msgs be Channels?
-    add_msgs::Array{Any,1}
-    @atomic gcflag::Bool
-    state::WorkerState
-    c_state::Condition      # wait for state changes
-    ct_time::Float64        # creation time
-    conn_func::Any          # used to setup connections lazily
-
-    r_stream::IO
-    w_stream::IO
-    w_serializer::ClusterSerializer  # writes can happen from any task hence store the
-                                     # serializer as part of the Worker object
-    manager::ClusterManager
-    config::WorkerConfig
-    version::Union{VersionNumber, Nothing}   # Julia version of the remote process
-    initialized::Event
-
-    function Worker(id::Int, r_stream::IO, w_stream::IO, manager::ClusterManager;
-                             version::Union{VersionNumber, Nothing}=nothing,
-                             config::WorkerConfig=WorkerConfig())
-        w = Worker(id)
-        w.r_stream = r_stream
-        w.w_stream = buffer_writes(w_stream)
-        w.w_serializer = ClusterSerializer(w.w_stream)
-        w.manager = manager
-        w.config = config
-        w.version = version
-        set_worker_state(w, W_CONNECTED)
-        register_worker_streams(w)
-        w
-    end
-
-    Worker(id::Int) = Worker(id, nothing)
-    function Worker(id::Int, conn_func)
-        @assert id > 0
-        if haskey(map_pid_wrkr, id)
-            return map_pid_wrkr[id]
-        end
-        w=new(id, Threads.ReentrantLock(), [], [], false, W_CREATED, Condition(), time(), conn_func)
-        w.initialized = Event()
-        register_worker(w)
-        w
-    end
-
-    Worker() = Worker(get_next_pid())
-end
-
-function set_worker_state(w, state)
-    w.state = state
-    notify(w.c_state; all=true)
-end
-
-function check_worker_state(w::Worker)
-    if w.state === W_CREATED
-        if !isclusterlazy()
-            if PGRP.topology === :all_to_all
-                # Since higher pids connect with lower pids, the remote worker
-                # may not have connected to us yet. Wait for some time.
-                wait_for_conn(w)
-            else
-                error("peer $(w.id) is not connected to $(myid()). Topology : " * string(PGRP.topology))
-            end
-        else
-            w.ct_time = time()
-            if myid() > w.id
-                t = @async exec_conn_func(w)
-            else
-                # route request via node 1
-                t = @async remotecall_fetch((p,to_id) -> remotecall_fetch(exec_conn_func, p, to_id), 1, w.id, myid())
-            end
-            errormonitor(t)
-            wait_for_conn(w)
-        end
-    end
-end
-
-exec_conn_func(id::Int) = exec_conn_func(worker_from_id(id)::Worker)
-function exec_conn_func(w::Worker)
-    try
-        f = notnothing(w.conn_func)
-        # Will be called if some other task tries to connect at the same time.
-        w.conn_func = () -> wait_for_conn(w)
-        f()
-    catch e
-        w.conn_func = () -> throw(e)
-        rethrow()
-    end
-    nothing
-end
-
-function wait_for_conn(w)
-    if w.state === W_CREATED
-        timeout =  worker_timeout() - (time() - w.ct_time)
-        timeout <= 0 && error("peer $(w.id) has not connected to $(myid())")
-
-        @async (sleep(timeout); notify(w.c_state; all=true))
-        wait(w.c_state)
-        w.state === W_CREATED && error("peer $(w.id) didn't connect to $(myid()) within $timeout seconds")
-    end
-    nothing
-end
-
-## process group creation ##
-
-mutable struct LocalProcess
-    id::Int
-    bind_addr::String
-    bind_port::UInt16
-    cookie::String
-    LocalProcess() = new(1)
-end
-
-worker_timeout() = parse(Float64, get(ENV, "JULIA_WORKER_TIMEOUT", "60.0"))
-
-
-## worker creation and setup ##
-"""
-    start_worker([out::IO=stdout], cookie::AbstractString=readline(stdin); close_stdin::Bool=true, stderr_to_stdout::Bool=true)
-
-`start_worker` is an internal function which is the default entry point for
-worker processes connecting via TCP/IP. It sets up the process as a Julia cluster
-worker.
-
-host:port information is written to stream `out` (defaults to stdout).
-
-The function reads the cookie from stdin if required, and  listens on a free port
-(or if specified, the port in the `--bind-to` command line option) and schedules
-tasks to process incoming TCP connections and requests. It also (optionally)
-closes stdin and redirects stderr to stdout.
-
-It does not return.
-"""
-start_worker(cookie::AbstractString=readline(stdin); kwargs...) = start_worker(stdout, cookie; kwargs...)
-function start_worker(out::IO, cookie::AbstractString=readline(stdin); close_stdin::Bool=true, stderr_to_stdout::Bool=true)
-    init_multi()
-
-    if close_stdin # workers will not use it
-        redirect_stdin(devnull)
-        close(stdin)
-    end
-    stderr_to_stdout && redirect_stderr(stdout)
-
-    init_worker(cookie)
-    interface = IPv4(LPROC.bind_addr)
-    if LPROC.bind_port == 0
-        port_hint = 9000 + (getpid() % 1000)
-        (port, sock) = listenany(interface, UInt16(port_hint))
-        LPROC.bind_port = port
-    else
-        sock = listen(interface, LPROC.bind_port)
-    end
-    errormonitor(@async while isopen(sock)
-        client = accept(sock)
-        process_messages(client, client, true)
-    end)
-    print(out, "julia_worker:")  # print header
-    print(out, "$(string(LPROC.bind_port))#") # print port
-    print(out, LPROC.bind_addr)
-    print(out, '\n')
-    flush(out)
-
-    Sockets.nagle(sock, false)
-    Sockets.quickack(sock, true)
-
-    if ccall(:jl_running_on_valgrind,Cint,()) != 0
-        println(out, "PID = $(getpid())")
-    end
-
-    try
-        # To prevent hanging processes on remote machines, newly launched workers exit if the
-        # master process does not connect in time.
-        check_master_connect()
-        while true; wait(); end
-    catch err
-        print(stderr, "unhandled exception on $(myid()): $(err)\nexiting.\n")
-    end
-
-    close(sock)
-    exit(0)
-end
-
-
-function redirect_worker_output(ident, stream)
-    t = @async while !eof(stream)
-        line = readline(stream)
-        if startswith(line, "      From worker ")
-            # stdout's of "additional" workers started from an initial worker on a host are not available
-            # on the master directly - they are routed via the initial worker's stdout.
-            println(line)
-        else
-            println("      From worker $(ident):\t$line")
-        end
-    end
-    errormonitor(t)
-end
-
-struct LaunchWorkerError <: Exception
-    msg::String
-end
-
-Base.showerror(io::IO, e::LaunchWorkerError) = print(io, e.msg)
-
-# The default TCP transport relies on the worker listening on a free
-# port available and printing its bind address and port.
-# The master process uses this to connect to the worker and subsequently
-# setup a all-to-all network.
-function read_worker_host_port(io::IO)
-    t0 = time_ns()
-
-    # Wait at most for JULIA_WORKER_TIMEOUT seconds to read host:port
-    # info from the worker
-    timeout = worker_timeout() * 1e9
-    # We expect the first line to contain the host:port string. However, as
-    # the worker may be launched via ssh or a cluster manager like SLURM,
-    # ignore any informational / warning lines printed by the launch command.
-    # If we do not find the host:port string in the first 1000 lines, treat it
-    # as an error.
-
-    ntries = 1000
-    leader = String[]
-    try
-        while ntries > 0
-            readtask = @async readline(io)
-            yield()
-            while !istaskdone(readtask) && ((time_ns() - t0) < timeout)
-                sleep(0.05)
-            end
-            !istaskdone(readtask) && break
-
-            conninfo = fetch(readtask)
-            if isempty(conninfo) && !isopen(io)
-                throw(LaunchWorkerError("Unable to read host:port string from worker. Launch command exited with error?"))
-            end
-
-            ntries -= 1
-            bind_addr, port = parse_connection_info(conninfo)
-            if !isempty(bind_addr)
-                return bind_addr, port
-            end
-
-            # collect unmatched lines
-            push!(leader, conninfo)
-        end
-        close(io)
-        if ntries > 0
-            throw(LaunchWorkerError("Timed out waiting to read host:port string from worker."))
-        else
-            throw(LaunchWorkerError("Unexpected output from worker launch command. Host:port string not found."))
-        end
-    finally
-        for line in leader
-            println("\tFrom worker startup:\t", line)
-        end
-    end
-end
-
-function parse_connection_info(str)
-    m = match(r"^julia_worker:(\d+)#(.*)", str)
-    if m !== nothing
-        (String(m.captures[2]), parse(UInt16, m.captures[1]))
-    else
-        ("", UInt16(0))
-    end
-end
-
-"""
-    init_worker(cookie::AbstractString, manager::ClusterManager=DefaultClusterManager())
-
-Called by cluster managers implementing custom transports. It initializes a newly launched
-process as a worker. Command line argument `--worker[=<cookie>]` has the effect of initializing a
-process as a worker using TCP/IP sockets for transport.
-`cookie` is a [`cluster_cookie`](@ref).
-"""
-function init_worker(cookie::AbstractString, manager::ClusterManager=DefaultClusterManager())
-    myrole!(:worker)
-
-    # On workers, the default cluster manager connects via TCP sockets. Custom
-    # transports will need to call this function with their own manager.
-    global cluster_manager
-    cluster_manager = manager
-
-    # Since our pid has yet to be set, ensure no RemoteChannel / Future  have been created or addprocs() called.
-    @assert nprocs() <= 1
-    @assert isempty(PGRP.refs)
-    @assert isempty(client_refs)
-
-    # System is started in head node mode, cleanup related entries
-    empty!(PGRP.workers)
-    empty!(map_pid_wrkr)
-
-    cluster_cookie(cookie)
-    nothing
-end
-
-
-# The main function for adding worker processes.
-# `manager` is of type ClusterManager. The respective managers are responsible
-# for launching the workers. All keyword arguments (plus a few default values)
-# are available as a dictionary to the `launch` methods
-#
-# Only one addprocs can be in progress at any time
-#
-const worker_lock = ReentrantLock()
-
-"""
-    addprocs(manager::ClusterManager; kwargs...) -> List of process identifiers
-
-Launches worker processes via the specified cluster manager.
-
-For example, Beowulf clusters are supported via a custom cluster manager implemented in
-the package `ClusterManagers.jl`.
-
-The number of seconds a newly launched worker waits for connection establishment from the
-master can be specified via variable `JULIA_WORKER_TIMEOUT` in the worker process's
-environment. Relevant only when using TCP/IP as transport.
-
-To launch workers without blocking the REPL, or the containing function
-if launching workers programmatically, execute `addprocs` in its own task.
-
-# Examples
-
-```julia
-# On busy clusters, call `addprocs` asynchronously
-t = @async addprocs(...)
-```
-
-```julia
-# Utilize workers as and when they come online
-if nprocs() > 1   # Ensure at least one new worker is available
-   ....   # perform distributed execution
-end
-```
-
-```julia
-# Retrieve newly launched worker IDs, or any error messages
-if istaskdone(t)   # Check if `addprocs` has completed to ensure `fetch` doesn't block
-    if nworkers() == N
-        new_pids = fetch(t)
-    else
-        fetch(t)
-    end
-end
-```
-"""
-function addprocs(manager::ClusterManager; kwargs...)
-    init_multi()
-
-    cluster_mgmt_from_master_check()
-
-    lock(worker_lock)
-    try
-        addprocs_locked(manager::ClusterManager; kwargs...)
-    finally
-        unlock(worker_lock)
-    end
-end
-
-function addprocs_locked(manager::ClusterManager; kwargs...)
-    params = merge(default_addprocs_params(manager), Dict{Symbol,Any}(kwargs))
-    topology(Symbol(params[:topology]))
-
-    if PGRP.topology !== :all_to_all
-        params[:lazy] = false
-    end
-
-    if PGRP.lazy === nothing || nprocs() == 1
-        PGRP.lazy = params[:lazy]
-    elseif isclusterlazy() != params[:lazy]
-        throw(ArgumentError(string("Active workers with lazy=", isclusterlazy(),
-                                    ". Cannot set lazy=", params[:lazy])))
-    end
-
-    # References to launched workers, filled when each worker is fully initialized and
-    # has connected to all nodes.
-    launched_q = Int[]   # Asynchronously filled by the launch method
-
-    # The `launch` method should add an object of type WorkerConfig for every
-    # worker launched. It provides information required on how to connect
-    # to it.
-
-    # FIXME: launched should be a Channel, launch_ntfy should be a Threads.Condition
-    # but both are part of the public interface. This means we currently can't use
-    # `Threads.@spawn` in the code below.
-    launched = WorkerConfig[]
-    launch_ntfy = Condition()
-
-    # call manager's `launch` is a separate task. This allows the master
-    # process initiate the connection setup process as and when workers come
-    # online
-    t_launch = @async launch(manager, params, launched, launch_ntfy)
-
-    @sync begin
-        while true
-            if isempty(launched)
-                istaskdone(t_launch) && break
-                @async (sleep(1); notify(launch_ntfy))
-                wait(launch_ntfy)
-            end
-
-            if !isempty(launched)
-                wconfig = popfirst!(launched)
-                let wconfig=wconfig
-                    @async setup_launched_worker(manager, wconfig, launched_q)
-                end
-            end
-        end
-    end
-
-    Base.wait(t_launch)      # catches any thrown errors from the launch task
-
-    # Since all worker-to-worker setups may not have completed by the time this
-    # function returns to the caller, send the complete list to all workers.
-    # Useful for nprocs(), nworkers(), etc to return valid values on the workers.
-    all_w = workers()
-    for pid in all_w
-        remote_do(set_valid_processes, pid, all_w)
-    end
-
-    sort!(launched_q)
-end
-
-function set_valid_processes(plist::Array{Int})
-    for pid in setdiff(plist, workers())
-        myid() != pid && Worker(pid)
-    end
-end
-
-"""
-    default_addprocs_params(mgr::ClusterManager) -> Dict{Symbol, Any}
-
-Implemented by cluster managers. The default keyword parameters passed when calling
-`addprocs(mgr)`. The minimal set of options is available by calling
-`default_addprocs_params()`
-"""
-default_addprocs_params(::ClusterManager) = default_addprocs_params()
-default_addprocs_params() = Dict{Symbol,Any}(
-    :topology => :all_to_all,
-    :dir      => pwd(),
-    :exename  => joinpath(Sys.BINDIR, julia_exename()),
-    :exeflags => ``,
-    :env      => [],
-    :enable_threaded_blas => false,
-    :lazy => true)
-
-
-function setup_launched_worker(manager, wconfig, launched_q)
-    pid = create_worker(manager, wconfig)
-    push!(launched_q, pid)
-
-    # When starting workers on remote multi-core hosts, `launch` can (optionally) start only one
-    # process on the remote machine, with a request to start additional workers of the
-    # same type. This is done by setting an appropriate value to `WorkerConfig.cnt`.
-    cnt = something(wconfig.count, 1)
-    if cnt === :auto
-        cnt = wconfig.environ[:cpu_threads]
-    end
-    cnt = cnt - 1   # Removing self from the requested number
-
-    if cnt > 0
-        launch_n_additional_processes(manager, pid, wconfig, cnt, launched_q)
-    end
-end
-
-
-function launch_n_additional_processes(manager, frompid, fromconfig, cnt, launched_q)
-    @sync begin
-        exename = notnothing(fromconfig.exename)
-        exeflags = something(fromconfig.exeflags, ``)
-        cmd = `$exename $exeflags`
-
-        new_addresses = remotecall_fetch(launch_additional, frompid, cnt, cmd)
-        for address in new_addresses
-            (bind_addr, port) = address
-
-            wconfig = WorkerConfig()
-            for x in [:host, :tunnel, :multiplex, :sshflags, :exeflags, :exename, :enable_threaded_blas]
-                Base.setproperty!(wconfig, x, Base.getproperty(fromconfig, x))
-            end
-            wconfig.bind_addr = bind_addr
-            wconfig.port = port
-
-            let wconfig=wconfig
-                @async begin
-                    pid = create_worker(manager, wconfig)
-                    remote_do(redirect_output_from_additional_worker, frompid, pid, port)
-                    push!(launched_q, pid)
-                end
-            end
-        end
-    end
-end
-
-function create_worker(manager, wconfig)
-    # only node 1 can add new nodes, since nobody else has the full list of address:port
-    @assert LPROC.id == 1
-    timeout = worker_timeout()
-
-    # initiate a connect. Does not wait for connection completion in case of TCP.
-    w = Worker()
-    local r_s, w_s
-    try
-        (r_s, w_s) = connect(manager, w.id, wconfig)
-    catch ex
-        try
-            deregister_worker(w.id)
-            kill(manager, w.id, wconfig)
-        finally
-            rethrow(ex)
-        end
-    end
-
-    w = Worker(w.id, r_s, w_s, manager; config=wconfig)
-    # install a finalizer to perform cleanup if necessary
-    finalizer(w) do w
-        if myid() == 1
-            manage(w.manager, w.id, w.config, :finalize)
-        end
-    end
-
-    # set when the new worker has finshed connections with all other workers
-    ntfy_oid = RRID()
-    rr_ntfy_join = lookup_ref(ntfy_oid)
-    rr_ntfy_join.waitingfor = myid()
-
-    # Start a new task to handle inbound messages from connected worker in master.
-    # Also calls `wait_connected` on TCP streams.
-    process_messages(w.r_stream, w.w_stream, false)
-
-    # send address information of all workers to the new worker.
-    # Cluster managers set the address of each worker in `WorkerConfig.connect_at`.
-    # A new worker uses this to setup an all-to-all network if topology :all_to_all is specified.
-    # Workers with higher pids connect to workers with lower pids. Except process 1 (master) which
-    # initiates connections to all workers.
-
-    # Connection Setup Protocol:
-    # - Master sends 16-byte cookie followed by 16-byte version string and a JoinPGRP message to all workers
-    # - On each worker
-    #   - Worker responds with a 16-byte version followed by a JoinCompleteMsg
-    #   - Connects to all workers less than its pid. Sends the cookie, version and an IdentifySocket message
-    #   - Workers with incoming connection requests write back their Version and an IdentifySocketAckMsg message
-    # - On master, receiving a JoinCompleteMsg triggers rr_ntfy_join (signifies that worker setup is complete)
-
-    join_list = []
-    if PGRP.topology === :all_to_all
-        # need to wait for lower worker pids to have completed connecting, since the numerical value
-        # of pids is relevant to the connection process, i.e., higher pids connect to lower pids and they
-        # require the value of config.connect_at which is set only upon connection completion
-        for jw in PGRP.workers
-            if (jw.id != 1) && (jw.id < w.id)
-                (jw.state === W_CREATED) && wait(jw.c_state)
-                push!(join_list, jw)
-            end
-        end
-
-    elseif PGRP.topology === :custom
-        # wait for requested workers to be up before connecting to them.
-        filterfunc(x) = (x.id != 1) && isdefined(x, :config) &&
-            (notnothing(x.config.ident) in something(wconfig.connect_idents, []))
-
-        wlist = filter(filterfunc, PGRP.workers)
-        waittime = 0
-        while wconfig.connect_idents !== nothing &&
-              length(wlist) < length(wconfig.connect_idents)
-            if waittime >= timeout
-                error("peer workers did not connect within $timeout seconds")
-            end
-            sleep(1.0)
-            waittime += 1
-            wlist = filter(filterfunc, PGRP.workers)
-        end
-
-        for wl in wlist
-            (wl.state === W_CREATED) && wait(wl.c_state)
-            push!(join_list, wl)
-        end
-    end
-
-    all_locs = mapany(x -> isa(x, Worker) ?
-                      (something(x.config.connect_at, ()), x.id) :
-                      ((), x.id, true),
-                      join_list)
-    send_connection_hdr(w, true)
-    enable_threaded_blas = something(wconfig.enable_threaded_blas, false)
-    join_message = JoinPGRPMsg(w.id, all_locs, PGRP.topology, enable_threaded_blas, isclusterlazy())
-    send_msg_now(w, MsgHeader(RRID(0,0), ntfy_oid), join_message)
-
-    @async manage(w.manager, w.id, w.config, :register)
-    # wait for rr_ntfy_join with timeout
-    timedout = false
-    @async (sleep($timeout); timedout = true; put!(rr_ntfy_join, 1))
-    wait(rr_ntfy_join)
-    if timedout
-        error("worker did not connect within $timeout seconds")
-    end
-    lock(client_refs) do
-        delete!(PGRP.refs, ntfy_oid)
-    end
-
-    return w.id
-end
-
-
-# Called on the first worker on a remote host. Used to optimize launching
-# of multiple workers on a remote host (to leverage multi-core)
-
-additional_io_objs=Dict()
-function launch_additional(np::Integer, cmd::Cmd)
-    io_objs = Vector{Any}(undef, np)
-    addresses = Vector{Any}(undef, np)
-
-    for i in 1:np
-        io = open(detach(cmd), "r+")
-        write_cookie(io)
-        io_objs[i] = io.out
-    end
-
-    for (i,io) in enumerate(io_objs)
-        (host, port) = read_worker_host_port(io)
-        addresses[i] = (host, port)
-        additional_io_objs[port] = io
-    end
-
-    return addresses
-end
-
-function redirect_output_from_additional_worker(pid, port)
-    io = additional_io_objs[port]
-    redirect_worker_output("$pid", io)
-    delete!(additional_io_objs, port)
-    nothing
-end
-
-function check_master_connect()
-    timeout = worker_timeout() * 1e9
-    # If we do not have at least process 1 connect to us within timeout
-    # we log an error and exit, unless we're running on valgrind
-    if ccall(:jl_running_on_valgrind,Cint,()) != 0
-        return
-    end
-    @async begin
-        start = time_ns()
-        while !haskey(map_pid_wrkr, 1) && (time_ns() - start) < timeout
-            sleep(1.0)
-        end
-
-        if !haskey(map_pid_wrkr, 1)
-            print(stderr, "Master process (id 1) could not connect within $(timeout/1e9) seconds.\nexiting.\n")
-            exit(1)
-        end
-    end
-end
-
-
-"""
-    cluster_cookie() -> cookie
-
-Return the cluster cookie.
-"""
-cluster_cookie() = (init_multi(); LPROC.cookie)
-
-"""
-    cluster_cookie(cookie) -> cookie
-
-Set the passed cookie as the cluster cookie, then returns it.
-"""
-function cluster_cookie(cookie)
-    init_multi()
-    # The cookie must be an ASCII string with length <=  HDR_COOKIE_LEN
-    @assert isascii(cookie)
-    @assert length(cookie) <= HDR_COOKIE_LEN
-
-    cookie = rpad(cookie, HDR_COOKIE_LEN)
-
-    LPROC.cookie = cookie
-    cookie
-end
-
-
-let next_pid = 2    # 1 is reserved for the client (always)
-    global get_next_pid
-    function get_next_pid()
-        retval = next_pid
-        next_pid += 1
-        retval
-    end
-end
-
-mutable struct ProcessGroup
-    name::String
-    workers::Array{Any,1}
-    refs::Dict{RRID,Any}                  # global references
-    topology::Symbol
-    lazy::Union{Bool, Nothing}
-
-    ProcessGroup(w::Array{Any,1}) = new("pg-default", w, Dict(), :all_to_all, nothing)
-end
-const PGRP = ProcessGroup([])
-
-function topology(t)
-    @assert t in [:all_to_all, :master_worker, :custom]
-    if (PGRP.topology==t) || ((myid()==1) && (nprocs()==1)) || (myid() > 1)
-        PGRP.topology = t
-    else
-        error("Workers with Topology $(PGRP.topology) already exist. Requested Topology $(t) cannot be set.")
-    end
-    t
-end
-
-isclusterlazy() = something(PGRP.lazy, false)
-
-get_bind_addr(pid::Integer) = get_bind_addr(worker_from_id(pid))
-get_bind_addr(w::LocalProcess) = LPROC.bind_addr
-function get_bind_addr(w::Worker)
-    if w.config.bind_addr === nothing
-        if w.id != myid()
-            w.config.bind_addr = remotecall_fetch(get_bind_addr, w.id, w.id)
-        end
-    end
-    w.config.bind_addr
-end
-
-# globals
-const LPROC = LocalProcess()
-const LPROCROLE = Ref{Symbol}(:master)
-const HDR_VERSION_LEN=16
-const HDR_COOKIE_LEN=16
-const map_pid_wrkr = Dict{Int, Union{Worker, LocalProcess}}()
-const map_sock_wrkr = IdDict()
-const map_del_wrkr = Set{Int}()
-
-# whether process is a master or worker in a distributed setup
-myrole() = LPROCROLE[]
-function myrole!(proctype::Symbol)
-    LPROCROLE[] = proctype
-end
-
-# cluster management related API
-"""
-    myid()
-
-Get the id of the current process.
-
-# Examples
-```julia-repl
-julia> myid()
-1
-
-julia> remotecall_fetch(() -> myid(), 4)
-4
-```
-"""
-myid() = LPROC.id
-
-"""
-    nprocs()
-
-Get the number of available processes.
-
-# Examples
-```julia-repl
-julia> nprocs()
-3
-
-julia> workers()
-2-element Array{Int64,1}:
- 2
- 3
-```
-"""
-function nprocs()
-    if myid() == 1 || (PGRP.topology === :all_to_all && !isclusterlazy())
-        n = length(PGRP.workers)
-        # filter out workers in the process of being setup/shutdown.
-        for jw in PGRP.workers
-            if !isa(jw, LocalProcess) && (jw.state !== W_CONNECTED)
-                n = n - 1
-            end
-        end
-        return n
-    else
-        return length(PGRP.workers)
-    end
-end
-
-"""
-    nworkers()
-
-Get the number of available worker processes. This is one less than [`nprocs()`](@ref). Equal to
-`nprocs()` if `nprocs() == 1`.
-
-# Examples
-```julia-repl
-\$ julia -p 2
-
-julia> nprocs()
-3
-
-julia> nworkers()
-2
-```
-"""
-function nworkers()
-    n = nprocs()
-    n == 1 ? 1 : n-1
-end
-
-"""
-    procs()
-
-Return a list of all process identifiers, including pid 1 (which is not included by [`workers()`](@ref)).
-
-# Examples
-```julia-repl
-\$ julia -p 2
-
-julia> procs()
-3-element Array{Int64,1}:
- 1
- 2
- 3
-```
-"""
-function procs()
-    if myid() == 1 || (PGRP.topology === :all_to_all  && !isclusterlazy())
-        # filter out workers in the process of being setup/shutdown.
-        return Int[x.id for x in PGRP.workers if isa(x, LocalProcess) || (x.state === W_CONNECTED)]
-    else
-        return Int[x.id for x in PGRP.workers]
-    end
-end
-
-function id_in_procs(id)  # faster version of `id in procs()`
-    if myid() == 1 || (PGRP.topology === :all_to_all  && !isclusterlazy())
-        for x in PGRP.workers
-            if (x.id::Int) == id && (isa(x, LocalProcess) || (x::Worker).state === W_CONNECTED)
-                return true
-            end
-        end
-    else
-        for x in PGRP.workers
-            if (x.id::Int) == id
-                return true
-            end
-        end
-    end
-    return false
-end
-
-"""
-    procs(pid::Integer)
-
-Return a list of all process identifiers on the same physical node.
-Specifically all workers bound to the same ip-address as `pid` are returned.
-"""
-function procs(pid::Integer)
-    if myid() == 1
-        all_workers = [x for x in PGRP.workers if isa(x, LocalProcess) || (x.state === W_CONNECTED)]
-        if (pid == 1) || (isa(map_pid_wrkr[pid].manager, LocalManager))
-            Int[x.id for x in filter(w -> (w.id==1) || (isa(w.manager, LocalManager)), all_workers)]
-        else
-            ipatpid = get_bind_addr(pid)
-            Int[x.id for x in filter(w -> get_bind_addr(w) == ipatpid, all_workers)]
-        end
-    else
-        remotecall_fetch(procs, 1, pid)
-    end
-end
-
-"""
-    workers()
-
-Return a list of all worker process identifiers.
-
-# Examples
-```julia-repl
-\$ julia -p 2
-
-julia> workers()
-2-element Array{Int64,1}:
- 2
- 3
-```
-"""
-function workers()
-    allp = procs()
-    if length(allp) == 1
-       allp
-    else
-       filter(x -> x != 1, allp)
-    end
-end
-
-function cluster_mgmt_from_master_check()
-    if myid() != 1
-        throw(ErrorException("Only process 1 can add and remove workers"))
-    end
-end
-
-"""
-    rmprocs(pids...; waitfor=typemax(Int))
-
-Remove the specified workers. Note that only process 1 can add or remove
-workers.
-
-Argument `waitfor` specifies how long to wait for the workers to shut down:
-  - If unspecified, `rmprocs` will wait until all requested `pids` are removed.
-  - An [`ErrorException`](@ref) is raised if all workers cannot be terminated before
-    the requested `waitfor` seconds.
-  - With a `waitfor` value of 0, the call returns immediately with the workers
-    scheduled for removal in a different task. The scheduled [`Task`](@ref) object is
-    returned. The user should call [`wait`](@ref) on the task before invoking any other
-    parallel calls.
-
-# Examples
-```julia-repl
-\$ julia -p 5
-
-julia> t = rmprocs(2, 3, waitfor=0)
-Task (runnable) @0x0000000107c718d0
-
-julia> wait(t)
-
-julia> workers()
-3-element Array{Int64,1}:
- 4
- 5
- 6
-```
-"""
-function rmprocs(pids...; waitfor=typemax(Int))
-    cluster_mgmt_from_master_check()
-
-    pids = vcat(pids...)
-    if waitfor == 0
-        t = @async _rmprocs(pids, typemax(Int))
-        yield()
-        return t
-    else
-        _rmprocs(pids, waitfor)
-        # return a dummy task object that user code can wait on.
-        return @async nothing
-    end
-end
-
-function _rmprocs(pids, waitfor)
-    lock(worker_lock)
-    try
-        rmprocset = Union{LocalProcess, Worker}[]
-        for p in pids
-            if p == 1
-                @warn "rmprocs: process 1 not removed"
-            else
-                if haskey(map_pid_wrkr, p)
-                    w = map_pid_wrkr[p]
-                    set_worker_state(w, W_TERMINATING)
-                    kill(w.manager, p, w.config)
-                    push!(rmprocset, w)
-                end
-            end
-        end
-
-        start = time_ns()
-        while (time_ns() - start) < waitfor*1e9
-            all(w -> w.state === W_TERMINATED, rmprocset) && break
-            sleep(min(0.1, waitfor - (time_ns() - start)/1e9))
-        end
-
-        unremoved = [wrkr.id for wrkr in filter(w -> w.state !== W_TERMINATED, rmprocset)]
-        if length(unremoved) > 0
-            estr = string("rmprocs: pids ", unremoved, " not terminated after ", waitfor, " seconds.")
-            throw(ErrorException(estr))
-        end
-    finally
-        unlock(worker_lock)
-    end
-end
-
-
-"""
-    ProcessExitedException(worker_id::Int)
-
-After a client Julia process has exited, further attempts to reference the dead child will
-throw this exception.
-"""
-struct ProcessExitedException <: Exception
-    worker_id::Int
-end
-
-# No-arg constructor added for compatibility with Julia 1.0 & 1.1, should be deprecated in the future
-ProcessExitedException() = ProcessExitedException(-1)
-
-worker_from_id(i) = worker_from_id(PGRP, i)
-function worker_from_id(pg::ProcessGroup, i)
-    if !isempty(map_del_wrkr) && in(i, map_del_wrkr)
-        throw(ProcessExitedException(i))
-    end
-    w = get(map_pid_wrkr, i, nothing)
-    if w === nothing
-        if myid() == 1
-            error("no process with id $i exists")
-        end
-        w = Worker(i)
-        map_pid_wrkr[i] = w
-    else
-        w = w::Union{Worker, LocalProcess}
-    end
-    w
-end
-
-"""
-    worker_id_from_socket(s) -> pid
-
-A low-level API which, given a `IO` connection or a `Worker`,
-returns the `pid` of the worker it is connected to.
-This is useful when writing custom [`serialize`](@ref) methods for a type,
-which optimizes the data written out depending on the receiving process id.
-"""
-function worker_id_from_socket(s)
-    w = get(map_sock_wrkr, s, nothing)
-    if isa(w,Worker)
-        if s === w.r_stream || s === w.w_stream
-            return w.id
-        end
-    end
-    if isa(s,IOStream) && fd(s)==-1
-        # serializing to a local buffer
-        return myid()
-    end
-    return -1
-end
-
-
-register_worker(w) = register_worker(PGRP, w)
-function register_worker(pg, w)
-    push!(pg.workers, w)
-    map_pid_wrkr[w.id] = w
-end
-
-function register_worker_streams(w)
-    map_sock_wrkr[w.r_stream] = w
-    map_sock_wrkr[w.w_stream] = w
-end
-
-deregister_worker(pid) = deregister_worker(PGRP, pid)
-function deregister_worker(pg, pid)
-    pg.workers = filter(x -> !(x.id == pid), pg.workers)
-    w = pop!(map_pid_wrkr, pid, nothing)
-    if isa(w, Worker)
-        if isdefined(w, :r_stream)
-            pop!(map_sock_wrkr, w.r_stream, nothing)
-            if w.r_stream != w.w_stream
-                pop!(map_sock_wrkr, w.w_stream, nothing)
-            end
-        end
-
-        if myid() == 1 && (myrole() === :master) && isdefined(w, :config)
-            # Notify the cluster manager of this workers death
-            manage(w.manager, w.id, w.config, :deregister)
-            if PGRP.topology !== :all_to_all || isclusterlazy()
-                for rpid in workers()
-                    try
-                        remote_do(deregister_worker, rpid, pid)
-                    catch
-                    end
-                end
-            end
-        end
-    end
-    push!(map_del_wrkr, pid)
-
-    # delete this worker from our remote reference client sets
-    ids = []
-    tonotify = []
-    lock(client_refs) do
-        for (id, rv) in pg.refs
-            if in(pid, rv.clientset)
-                push!(ids, id)
-            end
-            if rv.waitingfor == pid
-                push!(tonotify, (id, rv))
-            end
-        end
-        for id in ids
-            del_client(pg, id, pid)
-        end
-
-        # throw exception to tasks waiting for this pid
-        for (id, rv) in tonotify
-            close(rv.c, ProcessExitedException(pid))
-            delete!(pg.refs, id)
-        end
-    end
-    return
-end
-
-
-function interrupt(pid::Integer)
-    @assert myid() == 1
-    w = map_pid_wrkr[pid]
-    if isa(w, Worker)
-        manage(w.manager, w.id, w.config, :interrupt)
-    end
-    return
-end
-
-"""
-    interrupt(pids::Integer...)
-
-Interrupt the current executing task on the specified workers. This is equivalent to
-pressing Ctrl-C on the local machine. If no arguments are given, all workers are interrupted.
-"""
-interrupt(pids::Integer...) = interrupt([pids...])
-
-"""
-    interrupt(pids::AbstractVector=workers())
-
-Interrupt the current executing task on the specified workers. This is equivalent to
-pressing Ctrl-C on the local machine. If no arguments are given, all workers are interrupted.
-"""
-function interrupt(pids::AbstractVector=workers())
-    @assert myid() == 1
-    @sync begin
-        for pid in pids
-            @async interrupt(pid)
-        end
-    end
-end
-
-wp_bind_addr(p::LocalProcess) = p.bind_addr
-wp_bind_addr(p) = p.config.bind_addr
-
-function check_same_host(pids)
-    if myid() != 1
-        return remotecall_fetch(check_same_host, 1, pids)
-    else
-        # We checkfirst if all test pids have been started using the local manager,
-        # else we check for the same bind_to addr. This handles the special case
-        # where the local ip address may change - as during a system sleep/awake
-        if all(p -> (p==1) || (isa(map_pid_wrkr[p].manager, LocalManager)), pids)
-            return true
-        else
-            first_bind_addr = notnothing(wp_bind_addr(map_pid_wrkr[pids[1]]))
-            return all(p -> notnothing(wp_bind_addr(map_pid_wrkr[p])) == first_bind_addr, pids[2:end])
-        end
-    end
-end
-
-function terminate_all_workers()
-    myid() != 1 && return
-
-    if nprocs() > 1
-        try
-            rmprocs(workers(); waitfor=5.0)
-        catch _ex
-            @warn "Forcibly interrupting busy workers" exception=_ex
-            # Might be computation bound, interrupt them and try again
-            interrupt(workers())
-            try
-                rmprocs(workers(); waitfor=5.0)
-            catch _ex2
-                @error "Unable to terminate all workers" exception=_ex2,catch_backtrace()
-            end
-        end
-    end
-end
-
-# initialize the local proc network address / port
-function init_bind_addr()
-    opts = JLOptions()
-    if opts.bindto != C_NULL
-        bind_to = split(unsafe_string(opts.bindto), ":")
-        bind_addr = string(parse(IPAddr, bind_to[1]))
-        if length(bind_to) > 1
-            bind_port = parse(Int,bind_to[2])
-        else
-            bind_port = 0
-        end
-    else
-        bind_port = 0
-        try
-            bind_addr = string(getipaddr())
-        catch
-            # All networking is unavailable, initialize bind_addr to the loopback address
-            # Will cause an exception to be raised only when used.
-            bind_addr = "127.0.0.1"
-        end
-    end
-    global LPROC
-    LPROC.bind_addr = bind_addr
-    LPROC.bind_port = UInt16(bind_port)
-end
-
-using Random: randstring
-
-let inited = false
-    # do initialization that's only needed when there is more than 1 processor
-    global function init_multi()
-        if !inited
-            inited = true
-            push!(Base.package_callbacks, _require_callback)
-            atexit(terminate_all_workers)
-            init_bind_addr()
-            cluster_cookie(randstring(HDR_COOKIE_LEN))
-        end
-        return nothing
-    end
-end
-
-function init_parallel()
-    start_gc_msgs_task()
-
-    # start in "head node" mode, if worker, will override later.
-    global PGRP
-    global LPROC
-    LPROC.id = 1
-    @assert isempty(PGRP.workers)
-    register_worker(LPROC)
-end
-
-write_cookie(io::IO) = print(io.in, string(cluster_cookie(), "\n"))
-
-# Starts workers specified by (-n|--procs) and --machine-file command line options
-function process_opts(opts)
-    # startup worker.
-    # opts.startupfile, opts.load, etc should should not be processed for workers.
-    if opts.worker == 1
-        # does not return
-        if opts.cookie != C_NULL
-            start_worker(unsafe_string(opts.cookie))
-        else
-            start_worker()
-        end
-    end
-
-    # Propagate --threads to workers
-    exeflags = opts.nthreads > 0 ? `--threads=$(opts.nthreads)` : ``
-
-    # add processors
-    if opts.nprocs > 0
-        addprocs(opts.nprocs; exeflags=exeflags)
-    end
-
-    # load processes from machine file
-    if opts.machine_file != C_NULL
-        addprocs(load_machine_file(unsafe_string(opts.machine_file)); exeflags=exeflags)
-    end
-    return nothing
-end
-
-
-function load_machine_file(path::AbstractString)
-    machines = []
-    for line in split(read(path, String),'\n'; keepempty=false)
-        s = split(line, '*'; keepempty=false)
-        map!(strip, s, s)
-        if length(s) > 1
-            cnt = all(isdigit, s[1]) ? parse(Int,s[1]) : Symbol(s[1])
-            push!(machines,(s[2], cnt))
-        else
-            push!(machines,line)
-        end
-    end
-    return machines
-end
diff --git a/stdlib/Distributed/src/clusterserialize.jl b/stdlib/Distributed/src/clusterserialize.jl
deleted file mode 100644
index 0acd4ce68c45b..0000000000000
--- a/stdlib/Distributed/src/clusterserialize.jl
+++ /dev/null
@@ -1,254 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Serialization: serialize_cycle, deserialize_cycle, writetag,
-                     serialize_typename, deserialize_typename,
-                     TYPENAME_TAG, TASK_TAG, reset_state, serialize_type
-using Serialization.__deserialized_types__
-
-import Serialization: object_number, lookup_object_number, remember_object
-
-mutable struct ClusterSerializer{I<:IO} <: AbstractSerializer
-    io::I
-    counter::Int
-    table::IdDict{Any,Any}
-    pending_refs::Vector{Int}
-
-    pid::Int                                     # Worker we are connected to.
-    tn_obj_sent::Set{UInt64}                     # TypeName objects sent
-    glbs_sent::Dict{Symbol, Tuple{UInt64, UInt64}}   # (key,value) -> (symbol, (hash_value, objectid))
-    glbs_in_tnobj::Dict{UInt64, Vector{Symbol}}  # Track globals referenced in
-                                                 # anonymous functions.
-    anonfunc_id::UInt64
-
-    function ClusterSerializer{I}(io::I) where I<:IO
-        new(io, 0, IdDict(), Int[], worker_id_from_socket(io),
-            Set{UInt64}(), Dict{UInt64, UInt64}(), Dict{UInt64, Vector{Symbol}}(), 0)
-    end
-end
-ClusterSerializer(io::IO) = ClusterSerializer{typeof(io)}(io)
-
-const object_numbers = WeakKeyDict()
-const obj_number_salt = Ref(0)
-function object_number(s::ClusterSerializer, @nospecialize(l))
-    global obj_number_salt, object_numbers
-    if haskey(object_numbers, l)
-        return object_numbers[l]
-    end
-    # a hash function that always gives the same number to the same
-    # object on the same machine, and is unique over all machines.
-    ln = obj_number_salt[]+(UInt64(myid())<<44)
-    obj_number_salt[] += 1
-    object_numbers[l] = ln
-    return ln::UInt64
-end
-
-const known_object_data = Dict{UInt64,Any}()
-
-function lookup_object_number(s::ClusterSerializer, n::UInt64)
-    return get(known_object_data, n, nothing)
-end
-
-function remember_object(s::ClusterSerializer, @nospecialize(o), n::UInt64)
-    known_object_data[n] = o
-    if isa(o, Core.TypeName) && !haskey(object_numbers, o)
-        # set up reverse mapping for serialize
-        object_numbers[o] = n
-    end
-    return nothing
-end
-
-function deserialize(s::ClusterSerializer, ::Type{Core.TypeName})
-    full_body_sent = deserialize(s)
-    number = read(s.io, UInt64)
-    if !full_body_sent
-        tn = lookup_object_number(s, number)::Core.TypeName
-        remember_object(s, tn, number)
-        deserialize_cycle(s, tn)
-    else
-        tn = deserialize_typename(s, number)
-    end
-
-    # retrieve arrays of global syms sent if any and deserialize them all.
-    foreach(sym->deserialize_global_from_main(s, sym), deserialize(s))
-    return tn
-end
-
-function serialize(s::ClusterSerializer, t::Core.TypeName)
-    serialize_cycle(s, t) && return
-    writetag(s.io, TYPENAME_TAG)
-
-    identifier = object_number(s, t)
-    send_whole = !(identifier in s.tn_obj_sent)
-    serialize(s, send_whole)
-    write(s.io, identifier)
-    if send_whole
-        # Track globals referenced in this anonymous function.
-        # This information is used to resend modified globals when we
-        # only send the identifier.
-        prev = s.anonfunc_id
-        s.anonfunc_id = identifier
-        serialize_typename(s, t)
-        s.anonfunc_id = prev
-        push!(s.tn_obj_sent, identifier)
-        finalizer(t) do x
-            cleanup_tname_glbs(s, identifier)
-        end
-    end
-
-    # Send global refs if required.
-    syms = syms_2b_sent(s, identifier)
-    serialize(s, syms)
-    foreach(sym->serialize_global_from_main(s, sym), syms)
-    nothing
-end
-
-function serialize(s::ClusterSerializer, g::GlobalRef)
-    # Record if required and then invoke the default GlobalRef serializer.
-    sym = g.name
-    if g.mod === Main && isdefined(g.mod, sym)
-        if (binding_module(Main, sym) === Main) && (s.anonfunc_id != 0) &&
-            !startswith(string(sym), "#") # Anonymous functions are handled via FULL_GLOBALREF_TAG
-
-            push!(get!(s.glbs_in_tnobj, s.anonfunc_id, []), sym)
-        end
-    end
-
-    invoke(serialize, Tuple{AbstractSerializer, GlobalRef}, s, g)
-end
-
-# Send/resend a global binding if
-# a) has not been sent previously, i.e., we are seeing this binding for the first time, or,
-# b) hash value has changed or
-# c) hash value is same but of a different object, i.e. objectid has changed or
-# d) is a bits type
-function syms_2b_sent(s::ClusterSerializer, identifier)
-    lst = Symbol[]
-    check_syms = get(s.glbs_in_tnobj, identifier, Symbol[])
-    for sym in check_syms
-        v = getfield(Main, sym)
-
-        if isbits(v)
-            push!(lst, sym)
-        else
-            if haskey(s.glbs_sent, sym)
-                # We have sent this binding before, see if it has changed.
-                hval, oid = s.glbs_sent[sym]
-                if hval != hash(sym, hash(v)) || oid != objectid(v)
-                    push!(lst, sym)
-                end
-            else
-                push!(lst, sym)
-            end
-        end
-    end
-    return unique(lst)
-end
-
-function serialize_global_from_main(s::ClusterSerializer, sym)
-    v = getfield(Main, sym)
-
-    if !isbits(v)
-        s.glbs_sent[sym] = (hash(sym, hash(v)), objectid(v))
-    end
-
-    serialize(s, isconst(Main, sym))
-    serialize(s, v)
-end
-
-function deserialize_global_from_main(s::ClusterSerializer, sym)
-    sym_isconst = deserialize(s)
-    v = deserialize(s)
-    if isdefined(Main, sym) && (sym_isconst || isconst(Main, sym))
-        if isequal(getfield(Main, sym), v)
-            # same value; ok
-            return nothing
-        else
-            @warn "Cannot transfer global variable $sym; it already has a value."
-            return nothing
-        end
-    end
-    if sym_isconst
-        ccall(:jl_set_const, Cvoid, (Any, Any, Any), Main, sym, v)
-    else
-        setglobal!(Main, sym, v)
-    end
-    return nothing
-end
-
-function cleanup_tname_glbs(s::ClusterSerializer, identifier)
-    delete!(s.glbs_in_tnobj, identifier)
-end
-
-# TODO: cleanup from s.tn_obj_sent
-
-
-# Specialized serialize-deserialize implementations for CapturedException to partially
-# recover from any deserialization errors in `CapturedException.ex`
-
-function serialize(s::ClusterSerializer, ex::CapturedException)
-    serialize_type(s, typeof(ex))
-    serialize(s, string(typeof(ex.ex))) # String type should not result in a deser error
-    serialize(s, ex.processed_bt)       # Currently should not result in a deser error
-    serialize(s, ex.ex)                 # can result in a UndefVarError on the remote node
-                                        # if a type used in ex.ex is undefined on the remote node.
-end
-
-function original_ex(s::ClusterSerializer, ex_str, remote_stktrace)
-    local pid_str = ""
-    try
-        pid_str = string(" from worker ", worker_id_from_socket(s.io))
-    catch
-    end
-
-    stk_str = remote_stktrace ? "Remote" : "Local"
-    ErrorException(string("Error deserializing a remote exception", pid_str, "\n",
-                          "Remote(original) exception of type ", ex_str, "\n",
-                          stk_str,  " stacktrace : "))
-end
-
-function deserialize(s::ClusterSerializer, t::Type{<:CapturedException})
-    ex_str = deserialize(s)
-    local bt
-    local capex
-    try
-        bt = deserialize(s)
-    catch e
-        throw(CompositeException([
-            original_ex(s, ex_str, false),
-            CapturedException(e, catch_backtrace())
-        ]))
-    end
-
-    try
-        capex = deserialize(s)
-    catch e
-        throw(CompositeException([
-            CapturedException(original_ex(s, ex_str, true), bt),
-            CapturedException(e, catch_backtrace())
-        ]))
-    end
-
-    return CapturedException(capex, bt)
-end
-
-"""
-    clear!(syms, pids=workers(); mod=Main)
-
-Clears global bindings in modules by initializing them to `nothing`.
-`syms` should be of type [`Symbol`](@ref) or a collection of `Symbol`s . `pids` and `mod`
-identify the processes and the module in which global variables are to be
-reinitialized. Only those names found to be defined under `mod` are cleared.
-
-An exception is raised if a global constant is requested to be cleared.
-"""
-function clear!(syms, pids=workers(); mod=Main)
-    @sync for p in pids
-        @async_unwrap remotecall_wait(clear_impl!, p, syms, mod)
-    end
-end
-clear!(sym::Symbol, pid::Int; mod=Main) = clear!([sym], [pid]; mod=mod)
-clear!(sym::Symbol, pids=workers(); mod=Main) = clear!([sym], pids; mod=mod)
-clear!(syms, pid::Int; mod=Main) = clear!(syms, [pid]; mod=mod)
-
-clear_impl!(syms, mod::Module) = foreach(x->clear_impl!(x,mod), syms)
-clear_impl!(sym::Symbol, mod::Module) = isdefined(mod, sym) && @eval(mod, global $sym = nothing)
diff --git a/stdlib/Distributed/src/macros.jl b/stdlib/Distributed/src/macros.jl
deleted file mode 100644
index a767c7a40d9c9..0000000000000
--- a/stdlib/Distributed/src/macros.jl
+++ /dev/null
@@ -1,361 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-let nextidx = Threads.Atomic{Int}(0)
-    global nextproc
-    function nextproc()
-        idx = Threads.atomic_add!(nextidx, 1)
-        return workers()[(idx % nworkers()) + 1]
-    end
-end
-
-spawnat(p, thunk) = remotecall(thunk, p)
-
-spawn_somewhere(thunk) = spawnat(nextproc(),thunk)
-
-"""
-    @spawn expr
-
-Create a closure around an expression and run it on an automatically-chosen process,
-returning a [`Future`](@ref) to the result.
-This macro is deprecated; `@spawnat :any expr` should be used instead.
-
-# Examples
-```julia-repl
-julia> addprocs(3);
-
-julia> f = @spawn myid()
-Future(2, 1, 5, nothing)
-
-julia> fetch(f)
-2
-
-julia> f = @spawn myid()
-Future(3, 1, 7, nothing)
-
-julia> fetch(f)
-3
-```
-
-!!! compat "Julia 1.3"
-    As of Julia 1.3 this macro is deprecated. Use `@spawnat :any` instead.
-"""
-macro spawn(expr)
-    thunk = esc(:(()->($expr)))
-    var = esc(Base.sync_varname)
-    quote
-        local ref = spawn_somewhere($thunk)
-        if $(Expr(:islocal, var))
-            put!($var, ref)
-        end
-        ref
-    end
-end
-
-"""
-    @spawnat p expr
-
-Create a closure around an expression and run the closure
-asynchronously on process `p`. Return a [`Future`](@ref) to the result.
-If `p` is the quoted literal symbol `:any`, then the system will pick a
-processor to use automatically.
-
-# Examples
-```julia-repl
-julia> addprocs(3);
-
-julia> f = @spawnat 2 myid()
-Future(2, 1, 3, nothing)
-
-julia> fetch(f)
-2
-
-julia> f = @spawnat :any myid()
-Future(3, 1, 7, nothing)
-
-julia> fetch(f)
-3
-```
-
-!!! compat "Julia 1.3"
-    The `:any` argument is available as of Julia 1.3.
-"""
-macro spawnat(p, expr)
-    thunk = esc(:(()->($expr)))
-    var = esc(Base.sync_varname)
-    if p === QuoteNode(:any)
-        spawncall = :(spawn_somewhere($thunk))
-    else
-        spawncall = :(spawnat($(esc(p)), $thunk))
-    end
-    quote
-        local ref = $spawncall
-        if $(Expr(:islocal, var))
-            put!($var, ref)
-        end
-        ref
-    end
-end
-
-"""
-    @fetch expr
-
-Equivalent to `fetch(@spawnat :any expr)`.
-See [`fetch`](@ref) and [`@spawnat`](@ref).
-
-# Examples
-```julia-repl
-julia> addprocs(3);
-
-julia> @fetch myid()
-2
-
-julia> @fetch myid()
-3
-
-julia> @fetch myid()
-4
-
-julia> @fetch myid()
-2
-```
-"""
-macro fetch(expr)
-    thunk = esc(:(()->($expr)))
-    :(remotecall_fetch($thunk, nextproc()))
-end
-
-"""
-    @fetchfrom
-
-Equivalent to `fetch(@spawnat p expr)`.
-See [`fetch`](@ref) and [`@spawnat`](@ref).
-
-# Examples
-```julia-repl
-julia> addprocs(3);
-
-julia> @fetchfrom 2 myid()
-2
-
-julia> @fetchfrom 4 myid()
-4
-```
-"""
-macro fetchfrom(p, expr)
-    thunk = esc(:(()->($expr)))
-    :(remotecall_fetch($thunk, $(esc(p))))
-end
-
-# extract a list of modules to import from an expression
-extract_imports!(imports, x) = imports
-function extract_imports!(imports, ex::Expr)
-    if Meta.isexpr(ex, (:import, :using))
-        push!(imports, ex)
-    elseif Meta.isexpr(ex, :let)
-        extract_imports!(imports, ex.args[2])
-    elseif Meta.isexpr(ex, (:toplevel, :block))
-        for arg in ex.args
-            extract_imports!(imports, arg)
-        end
-    end
-    return imports
-end
-extract_imports(x) = extract_imports!(Any[], x)
-
-"""
-    @everywhere [procs()] expr
-
-Execute an expression under `Main` on all `procs`.
-Errors on any of the processes are collected into a
-[`CompositeException`](@ref) and thrown. For example:
-
-    @everywhere bar = 1
-
-will define `Main.bar` on all current processes. Any processes added later
-(say with [`addprocs()`](@ref)) will not have the expression defined.
-
-Unlike [`@spawnat`](@ref), `@everywhere` does not capture any local variables.
-Instead, local variables can be broadcast using interpolation:
-
-    foo = 1
-    @everywhere bar = \$foo
-
-The optional argument `procs` allows specifying a subset of all
-processes to have execute the expression.
-
-Similar to calling `remotecall_eval(Main, procs, expr)`, but with two extra features:
-
-    - `using` and `import` statements run on the calling process first, to ensure
-      packages are precompiled.
-    - The current source file path used by `include` is propagated to other processes.
-"""
-macro everywhere(ex)
-    procs = GlobalRef(@__MODULE__, :procs)
-    return esc(:($(Distributed).@everywhere $procs() $ex))
-end
-
-macro everywhere(procs, ex)
-    imps = extract_imports(ex)
-    return quote
-        $(isempty(imps) ? nothing : Expr(:toplevel, imps...)) # run imports locally first
-        let ex = Expr(:toplevel, :(task_local_storage()[:SOURCE_PATH] = $(get(task_local_storage(), :SOURCE_PATH, nothing))), $(esc(Expr(:quote, ex)))),
-            procs = $(esc(procs))
-            remotecall_eval(Main, procs, ex)
-        end
-    end
-end
-
-"""
-    remotecall_eval(m::Module, procs, expression)
-
-Execute an expression under module `m` on the processes
-specified in `procs`.
-Errors on any of the processes are collected into a
-[`CompositeException`](@ref) and thrown.
-
-See also [`@everywhere`](@ref).
-"""
-function remotecall_eval(m::Module, procs, ex)
-    @sync begin
-        run_locally = 0
-        for pid in procs
-            if pid == myid()
-                run_locally += 1
-            else
-                @async_unwrap remotecall_wait(Core.eval, pid, m, ex)
-            end
-        end
-        yield() # ensure that the remotecalls have had a chance to start
-
-        # execute locally last as we do not want local execution to block serialization
-        # of the request to remote nodes.
-        for _ in 1:run_locally
-            @async Core.eval(m, ex)
-        end
-    end
-    nothing
-end
-
-# optimized version of remotecall_eval for a single pid
-# and which also fetches the return value
-function remotecall_eval(m::Module, pid::Int, ex)
-    return remotecall_fetch(Core.eval, pid, m, ex)
-end
-
-
-# Statically split range [firstIndex,lastIndex] into equal sized chunks for np processors
-function splitrange(firstIndex::Int, lastIndex::Int, np::Int)
-    each, extras = divrem(lastIndex-firstIndex+1, np)
-    nchunks = each > 0 ? np : extras
-    chunks = Vector{UnitRange{Int}}(undef, nchunks)
-    lo = firstIndex
-    for i in 1:nchunks
-        hi = lo + each - 1
-        if extras > 0
-            hi += 1
-            extras -= 1
-        end
-        chunks[i] = lo:hi
-        lo = hi+1
-    end
-    return chunks
-end
-
-function preduce(reducer, f, R)
-    chunks = splitrange(Int(firstindex(R)), Int(lastindex(R)), nworkers())
-    all_w = workers()[1:length(chunks)]
-
-    w_exec = Task[]
-    for (idx,pid) in enumerate(all_w)
-        t = Task(()->remotecall_fetch(f, pid, reducer, R, first(chunks[idx]), last(chunks[idx])))
-        schedule(t)
-        push!(w_exec, t)
-    end
-    reduce(reducer, Any[fetch(t) for t in w_exec])
-end
-
-function pfor(f, R)
-    t = @async @sync for c in splitrange(Int(firstindex(R)), Int(lastindex(R)), nworkers())
-        @spawnat :any f(R, first(c), last(c))
-    end
-    errormonitor(t)
-end
-
-function make_preduce_body(var, body)
-    quote
-        function (reducer, R, lo::Int, hi::Int)
-            $(esc(var)) = R[lo]
-            ac = $(esc(body))
-            if lo != hi
-                for $(esc(var)) in R[(lo+1):hi]
-                    ac = reducer(ac, $(esc(body)))
-                end
-            end
-            ac
-        end
-    end
-end
-
-function make_pfor_body(var, body)
-    quote
-        function (R, lo::Int, hi::Int)
-            for $(esc(var)) in R[lo:hi]
-                $(esc(body))
-            end
-        end
-    end
-end
-
-"""
-    @distributed
-
-A distributed memory, parallel for loop of the form :
-
-    @distributed [reducer] for var = range
-        body
-    end
-
-The specified range is partitioned and locally executed across all workers. In case an
-optional reducer function is specified, `@distributed` performs local reductions on each worker
-with a final reduction on the calling process.
-
-Note that without a reducer function, `@distributed` executes asynchronously, i.e. it spawns
-independent tasks on all available workers and returns immediately without waiting for
-completion. To wait for completion, prefix the call with [`@sync`](@ref), like :
-
-    @sync @distributed for var = range
-        body
-    end
-"""
-macro distributed(args...)
-    na = length(args)
-    if na==1
-        loop = args[1]
-    elseif na==2
-        reducer = args[1]
-        loop = args[2]
-    else
-        throw(ArgumentError("wrong number of arguments to @distributed"))
-    end
-    if !isa(loop,Expr) || loop.head !== :for
-        error("malformed @distributed loop")
-    end
-    var = loop.args[1].args[1]
-    r = loop.args[1].args[2]
-    body = loop.args[2]
-    if Meta.isexpr(body, :block) && body.args[end] isa LineNumberNode
-        resize!(body.args, length(body.args) - 1)
-    end
-    if na==1
-        syncvar = esc(Base.sync_varname)
-        return quote
-            local ref = pfor($(make_pfor_body(var, body)), $(esc(r)))
-            if $(Expr(:islocal, syncvar))
-                put!($syncvar, ref)
-            end
-            ref
-        end
-    else
-        return :(preduce($(esc(reducer)), $(make_preduce_body(var, body)), $(esc(r))))
-    end
-end
diff --git a/stdlib/Distributed/src/managers.jl b/stdlib/Distributed/src/managers.jl
deleted file mode 100644
index 7b048807eddae..0000000000000
--- a/stdlib/Distributed/src/managers.jl
+++ /dev/null
@@ -1,750 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Built-in SSH and Local Managers
-
-struct SSHManager <: ClusterManager
-    machines::Dict
-
-    function SSHManager(machines)
-        # machines => array of machine elements
-        # machine => address or (address, cnt)
-        # address => string of form `[user@]host[:port] bind_addr[:bind_port]`
-        # cnt => :auto or number
-        # :auto launches NUM_CORES number of workers at address
-        # number launches the specified number of workers at address
-        mhist = Dict()
-        for m in machines
-            if isa(m, Tuple)
-                host=m[1]
-                cnt=m[2]
-            else
-                host=m
-                cnt=1
-            end
-            current_cnt = get(mhist, host, 0)
-
-            if isa(cnt, Number)
-                mhist[host] = isa(current_cnt, Number) ? current_cnt + Int(cnt) : Int(cnt)
-            else
-                mhist[host] = cnt
-            end
-        end
-        new(mhist)
-    end
-end
-
-
-function check_addprocs_args(manager, kwargs)
-    valid_kw_names = keys(default_addprocs_params(manager))
-    for keyname in keys(kwargs)
-        !(keyname in valid_kw_names) && throw(ArgumentError("Invalid keyword argument $(keyname)"))
-    end
-end
-
-# SSHManager
-
-# start and connect to processes via SSH, optionally through an SSH tunnel.
-# the tunnel is only used from the head (process 1); the nodes are assumed
-# to be mutually reachable without a tunnel, as is often the case in a cluster.
-# Default value of kw arg max_parallel is the default value of MaxStartups in sshd_config
-# A machine is either a <hostname> or a tuple of (<hostname>, count)
-"""
-    addprocs(machines; tunnel=false, sshflags=\`\`, max_parallel=10, kwargs...) -> List of process identifiers
-
-Add worker processes on remote machines via SSH. Configuration is done with keyword
-arguments (see below). In particular, the `exename` keyword can be used to specify
-the path to the `julia` binary on the remote machine(s).
-
-`machines` is a vector of "machine specifications" which are given as strings of
-the form `[user@]host[:port] [bind_addr[:port]]`. `user` defaults to current user and `port`
-to the standard SSH port. If `[bind_addr[:port]]` is specified, other workers will connect
-to this worker at the specified `bind_addr` and `port`.
-
-It is possible to launch multiple processes on a remote host by using a tuple in the
-`machines` vector or the form `(machine_spec, count)`, where `count` is the number of
-workers to be launched on the specified host. Passing `:auto` as the worker count will
-launch as many workers as the number of CPU threads on the remote host.
-
-**Examples**:
-```julia
-addprocs([
-    "remote1",               # one worker on 'remote1' logging in with the current username
-    "user@remote2",          # one worker on 'remote2' logging in with the 'user' username
-    "user@remote3:2222",     # specifying SSH port to '2222' for 'remote3'
-    ("user@remote4", 4),     # launch 4 workers on 'remote4'
-    ("user@remote5", :auto), # launch as many workers as CPU threads on 'remote5'
-])
-```
-
-**Keyword arguments**:
-
-* `tunnel`: if `true` then SSH tunneling will be used to connect to the worker from the
-  master process. Default is `false`.
-
-* `multiplex`: if `true` then SSH multiplexing is used for SSH tunneling. Default is `false`.
-
-* `ssh`: the name or path of the SSH client executable used to start the workers.
-  Default is `"ssh"`.
-
-* `sshflags`: specifies additional ssh options, e.g. ``` sshflags=\`-i /home/foo/bar.pem\` ```
-
-* `max_parallel`: specifies the maximum number of workers connected to in parallel at a
-  host. Defaults to 10.
-
-* `shell`: specifies the type of shell to which ssh connects on the workers.
-
-    + `shell=:posix`: a POSIX-compatible Unix/Linux shell
-      (sh, ksh, bash, dash, zsh, etc.). The default.
-
-    + `shell=:csh`: a Unix C shell (csh, tcsh).
-
-    + `shell=:wincmd`: Microsoft Windows `cmd.exe`.
-
-* `dir`: specifies the working directory on the workers. Defaults to the host's current
-  directory (as found by `pwd()`)
-
-* `enable_threaded_blas`: if `true` then  BLAS will run on multiple threads in added
-  processes. Default is `false`.
-
-* `exename`: name of the `julia` executable. Defaults to `"\$(Sys.BINDIR)/julia"` or
-  `"\$(Sys.BINDIR)/julia-debug"` as the case may be. It is recommended that a common Julia
-  version is used on all remote machines because serialization and code distribution might
-  fail otherwise.
-
-* `exeflags`: additional flags passed to the worker processes.
-
-* `topology`: Specifies how the workers connect to each other. Sending a message between
-  unconnected workers results in an error.
-
-    + `topology=:all_to_all`: All processes are connected to each other. The default.
-
-    + `topology=:master_worker`: Only the driver process, i.e. `pid` 1 connects to the
-      workers. The workers do not connect to each other.
-
-    + `topology=:custom`: The `launch` method of the cluster manager specifies the
-      connection topology via fields `ident` and `connect_idents` in `WorkerConfig`.
-      A worker with a cluster manager identity `ident` will connect to all workers specified
-      in `connect_idents`.
-
-* `lazy`: Applicable only with `topology=:all_to_all`. If `true`, worker-worker connections
-  are setup lazily, i.e. they are setup at the first instance of a remote call between
-  workers. Default is true.
-
-* `env`: provide an array of string pairs such as
-  `env=["JULIA_DEPOT_PATH"=>"/depot"]` to request that environment variables
-  are set on the remote machine. By default only the environment variable
-  `JULIA_WORKER_TIMEOUT` is passed automatically from the local to the remote
-  environment.
-
-* `cmdline_cookie`: pass the authentication cookie via the `--worker` commandline
-   option. The (more secure) default behaviour of passing the cookie via ssh stdio
-   may hang with Windows workers that use older (pre-ConPTY) Julia or Windows versions,
-   in which case `cmdline_cookie=true` offers a work-around.
-
-!!! compat "Julia 1.6"
-    The keyword arguments `ssh`, `shell`, `env` and `cmdline_cookie`
-    were added in Julia 1.6.
-
-Environment variables:
-
-If the master process fails to establish a connection with a newly launched worker within
-60.0 seconds, the worker treats it as a fatal situation and terminates.
-This timeout can be controlled via environment variable `JULIA_WORKER_TIMEOUT`.
-The value of `JULIA_WORKER_TIMEOUT` on the master process specifies the number of seconds a
-newly launched worker waits for connection establishment.
-"""
-function addprocs(machines::AbstractVector; kwargs...)
-    manager = SSHManager(machines)
-    check_addprocs_args(manager, kwargs)
-    addprocs(manager; kwargs...)
-end
-
-default_addprocs_params(::SSHManager) =
-    merge(default_addprocs_params(),
-          Dict{Symbol,Any}(
-              :ssh            => "ssh",
-              :sshflags       => ``,
-              :shell          => :posix,
-              :cmdline_cookie => false,
-              :env            => [],
-              :tunnel         => false,
-              :multiplex      => false,
-              :max_parallel   => 10))
-
-function launch(manager::SSHManager, params::Dict, launched::Array, launch_ntfy::Condition)
-    # Launch one worker on each unique host in parallel. Additional workers are launched later.
-    # Wait for all launches to complete.
-    @sync for (i, (machine, cnt)) in enumerate(manager.machines)
-        let machine=machine, cnt=cnt
-             @async try
-                launch_on_machine(manager, $machine, $cnt, params, launched, launch_ntfy)
-            catch e
-                print(stderr, "exception launching on machine $(machine) : $(e)\n")
-            end
-        end
-    end
-    notify(launch_ntfy)
-end
-
-
-Base.show(io::IO, manager::SSHManager) = print(io, "SSHManager(machines=", manager.machines, ")")
-
-
-function parse_machine(machine::AbstractString)
-    hoststr = ""
-    portnum = nothing
-
-    if machine[begin] == '['  # ipv6 bracket notation (RFC 2732)
-        ipv6_end = findlast(']', machine)
-        if ipv6_end === nothing
-            throw(ArgumentError("invalid machine definition format string: invalid port format \"$machine\""))
-        end
-        hoststr = machine[begin+1 : prevind(machine,ipv6_end)]
-        machine_def = split(machine[ipv6_end : end] , ':')
-    else    # ipv4
-        machine_def = split(machine, ':')
-        hoststr = machine_def[1]
-    end
-
-    if length(machine_def) > 2
-        throw(ArgumentError("invalid machine definition format string: invalid port format \"$machine_def\""))
-    end
-
-    if length(machine_def) == 2
-        portstr = machine_def[2]
-
-        portnum = tryparse(Int, portstr)
-        if portnum === nothing
-            msg = "invalid machine definition format string: invalid port format \"$machine_def\""
-            throw(ArgumentError(msg))
-        end
-
-        if portnum < 1 || portnum > 65535
-            msg = "invalid machine definition format string: invalid port number \"$machine_def\""
-            throw(ArgumentError(msg))
-        end
-    end
-    (hoststr, portnum)
-end
-
-function launch_on_machine(manager::SSHManager, machine::AbstractString, cnt, params::Dict, launched::Array, launch_ntfy::Condition)
-    shell = params[:shell]
-    ssh = params[:ssh]
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-    tunnel = params[:tunnel]
-    multiplex = params[:multiplex]
-    cmdline_cookie = params[:cmdline_cookie]
-    env = Dict{String,String}(params[:env])
-
-    # machine could be of the format [user@]host[:port] bind_addr[:bind_port]
-    # machine format string is split on whitespace
-    machine_bind = split(machine)
-    if isempty(machine_bind)
-        throw(ArgumentError("invalid machine definition format string: \"$machine\$"))
-    end
-    if length(machine_bind) > 1
-        exeflags = `--bind-to $(machine_bind[2]) $exeflags`
-    end
-    if cmdline_cookie
-        exeflags = `$exeflags --worker=$(cluster_cookie())`
-    else
-        exeflags = `$exeflags --worker`
-    end
-
-    host, portnum = parse_machine(machine_bind[1])
-    portopt = portnum === nothing ? `` : `-p $portnum`
-    sshflags = `$(params[:sshflags]) $portopt`
-
-    if tunnel
-        # First it checks if ssh multiplexing has been already enabled and the master process is running.
-        # If it's already running, later ssh sessions also use the same ssh multiplexing session even if
-        # `multiplex` is not explicitly specified; otherwise the tunneling session launched later won't
-        # go to background and hang. This is because of OpenSSH implementation.
-        if success(`$ssh $sshflags -O check $host`)
-            multiplex = true
-        elseif multiplex
-            # automatically create an SSH multiplexing session at the next SSH connection
-            controlpath = "~/.ssh/julia-%r@%h:%p"
-            sshflags = `$sshflags -o ControlMaster=auto -o ControlPath=$controlpath -o ControlPersist=no`
-        end
-    end
-
-    # Build up the ssh command
-
-    # pass on some environment variables by default
-    for var in ["JULIA_WORKER_TIMEOUT"]
-        if !haskey(env, var) && haskey(ENV, var)
-            env[var] = ENV[var]
-        end
-    end
-
-    # Julia process with passed in command line flag arguments
-    if shell == :posix
-        # ssh connects to a POSIX shell
-
-        cmds = "exec $(shell_escape_posixly(exename)) $(shell_escape_posixly(exeflags))"
-        # set environment variables
-        for (var, val) in env
-            occursin(r"^[a-zA-Z_][a-zA-Z_0-9]*\z", var) ||
-                throw(ArgumentError("invalid env key $var"))
-            cmds = "export $(var)=$(shell_escape_posixly(val))\n$cmds"
-        end
-        # change working directory
-        cmds = "cd -- $(shell_escape_posixly(dir))\n$cmds"
-
-        # shell login (-l) with string command (-c) to launch julia process
-        remotecmd = shell_escape_posixly(`sh -l -c $cmds`)
-
-    elseif shell == :csh
-        # ssh connects to (t)csh
-
-        remotecmd = "exec $(shell_escape_csh(exename)) $(shell_escape_csh(exeflags))"
-
-        # set environment variables
-        for (var, val) in env
-            occursin(r"^[a-zA-Z_][a-zA-Z_0-9]*\z", var) ||
-                throw(ArgumentError("invalid env key $var"))
-            remotecmd = "setenv $(var) $(shell_escape_csh(val))\n$remotecmd"
-        end
-        # change working directory
-        if dir !== nothing && dir != ""
-            remotecmd = "cd $(shell_escape_csh(dir))\n$remotecmd"
-        end
-
-    elseif shell == :wincmd
-        # ssh connects to Windows cmd.exe
-
-        any(c -> c == '"', exename) && throw(ArgumentError("invalid exename"))
-
-        remotecmd = shell_escape_wincmd(escape_microsoft_c_args(exename, exeflags...))
-        # change working directory
-        if dir !== nothing && dir != ""
-            any(c -> c == '"', dir) && throw(ArgumentError("invalid dir"))
-            remotecmd = "pushd \"$(dir)\" && $remotecmd"
-        end
-        # set environment variables
-        for (var, val) in env
-            occursin(r"^[a-zA-Z0-9_()[\]{}\$\\/#',;\.@!?*+-]+\z", var) || throw(ArgumentError("invalid env key $var"))
-            remotecmd = "set $(var)=$(shell_escape_wincmd(val))&& $remotecmd"
-        end
-
-    else
-        throw(ArgumentError("invalid shell"))
-    end
-
-    # remote launch with ssh with given ssh flags / host / port information
-    # -T → disable pseudo-terminal allocation
-    # -a → disable forwarding of auth agent connection
-    # -x → disable X11 forwarding
-    # -o ClearAllForwardings → option if forwarding connections and
-    #                          forwarded connections are causing collisions
-    cmd = `$ssh -T -a -x -o ClearAllForwardings=yes $sshflags $host $remotecmd`
-
-    # launch the remote Julia process
-
-    # detach launches the command in a new process group, allowing it to outlive
-    # the initial julia process (Ctrl-C and teardown methods are handled through messages)
-    # for the launched processes.
-    io = open(detach(cmd), "r+")
-    cmdline_cookie || write_cookie(io)
-
-    wconfig = WorkerConfig()
-    wconfig.io = io.out
-    wconfig.host = host
-    wconfig.tunnel = tunnel
-    wconfig.multiplex = multiplex
-    wconfig.sshflags = sshflags
-    wconfig.exeflags = exeflags
-    wconfig.exename = exename
-    wconfig.count = cnt
-    wconfig.max_parallel = params[:max_parallel]
-    wconfig.enable_threaded_blas = params[:enable_threaded_blas]
-
-
-    push!(launched, wconfig)
-    notify(launch_ntfy)
-end
-
-
-function manage(manager::SSHManager, id::Integer, config::WorkerConfig, op::Symbol)
-    id = Int(id)
-    if op === :interrupt
-        ospid = config.ospid
-        if ospid !== nothing
-            host = notnothing(config.host)
-            sshflags = notnothing(config.sshflags)
-            if !success(`ssh -T -a -x -o ClearAllForwardings=yes -n $sshflags $host "kill -2 $ospid"`)
-                @error "Error sending a Ctrl-C to julia worker $id on $host"
-            end
-        else
-            # This state can happen immediately after an addprocs
-            @error "Worker $id cannot be presently interrupted."
-        end
-    end
-end
-
-let tunnel_port = 9201
-    global next_tunnel_port
-    function next_tunnel_port()
-        retval = tunnel_port
-        if tunnel_port > 32000
-            tunnel_port = 9201
-        else
-            tunnel_port += 1
-        end
-        retval
-    end
-end
-
-
-"""
-    ssh_tunnel(user, host, bind_addr, port, sshflags, multiplex) -> localport
-
-Establish an SSH tunnel to a remote worker.
-Return a port number `localport` such that `localhost:localport` connects to `host:port`.
-"""
-function ssh_tunnel(user, host, bind_addr, port, sshflags, multiplex)
-    port = Int(port)
-    cnt = ntries = 100
-
-    # the connection is forwarded to `port` on the remote server over the local port `localport`
-    while cnt > 0
-        localport = next_tunnel_port()
-        if multiplex
-            # It assumes that an ssh multiplexing session has been already started by the remote worker.
-            cmd = `ssh $sshflags -O forward -L $localport:$bind_addr:$port $user@$host`
-        else
-            # if we cannot do port forwarding, fail immediately
-            # the -f option backgrounds the ssh session
-            # `sleep 60` command specifies that an alloted time of 60 seconds is allowed to start the
-            # remote julia process and establish the network connections specified by the process topology.
-            # If no connections are made within 60 seconds, ssh will exit and an error will be printed on the
-            # process that launched the remote process.
-            ssh = `ssh -T -a -x -o ExitOnForwardFailure=yes`
-            cmd = detach(`$ssh -f $sshflags $user@$host -L $localport:$bind_addr:$port sleep 60`)
-        end
-        if success(cmd)
-            return localport
-        end
-        cnt -= 1
-    end
-
-    throw(ErrorException(
-        string("unable to create SSH tunnel after ", ntries, " tries. No free port?")))
-end
-
-
-# LocalManager
-struct LocalManager <: ClusterManager
-    np::Int
-    restrict::Bool  # Restrict binding to 127.0.0.1 only
-end
-
-"""
-    addprocs(np::Integer=Sys.CPU_THREADS; restrict=true, kwargs...) -> List of process identifiers
-
-Launch `np` workers on the local host using the in-built `LocalManager`.
-
-Local workers inherit the current package environment (i.e., active project,
-[`LOAD_PATH`](@ref), and [`DEPOT_PATH`](@ref)) from the main process.
-
-**Keyword arguments**:
- - `restrict::Bool`: if `true` (default) binding is restricted to `127.0.0.1`.
- - `dir`, `exename`, `exeflags`, `env`, `topology`, `lazy`, `enable_threaded_blas`: same effect
-   as for `SSHManager`, see documentation for [`addprocs(machines::AbstractVector)`](@ref).
-
-!!! compat "Julia 1.9"
-    The inheriting of the package environment and the `env` keyword argument were
-    added in Julia 1.9.
-"""
-function addprocs(np::Integer=Sys.CPU_THREADS; restrict=true, kwargs...)
-    manager = LocalManager(np, restrict)
-    check_addprocs_args(manager, kwargs)
-    addprocs(manager; kwargs...)
-end
-
-Base.show(io::IO, manager::LocalManager) = print(io, "LocalManager()")
-
-function launch(manager::LocalManager, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-    bind_to = manager.restrict ? `127.0.0.1` : `$(LPROC.bind_addr)`
-    env = Dict{String,String}(params[:env])
-
-    # TODO: Maybe this belongs in base/initdefs.jl as a package_environment() function
-    #       together with load_path() etc. Might be useful to have when spawning julia
-    #       processes outside of Distributed.jl too.
-    # JULIA_(LOAD|DEPOT)_PATH are used to populate (LOAD|DEPOT)_PATH on startup,
-    # but since (LOAD|DEPOT)_PATH might have changed they are re-serialized here.
-    # Users can opt-out of this by passing `env = ...` to addprocs(...).
-    pathsep = Sys.iswindows() ? ";" : ":"
-    if get(env, "JULIA_LOAD_PATH", nothing) === nothing
-        env["JULIA_LOAD_PATH"] = join(LOAD_PATH, pathsep)
-    end
-    if get(env, "JULIA_DEPOT_PATH", nothing) === nothing
-        env["JULIA_DEPOT_PATH"] = join(DEPOT_PATH, pathsep)
-    end
-    # Set the active project on workers using JULIA_PROJECT.
-    # Users can opt-out of this by (i) passing `env = ...` or (ii) passing
-    # `--project=...` as `exeflags` to addprocs(...).
-    project = Base.ACTIVE_PROJECT[]
-    if project !== nothing && get(env, "JULIA_PROJECT", nothing) === nothing
-        env["JULIA_PROJECT"] = project
-    end
-
-    for i in 1:manager.np
-        cmd = `$(julia_cmd(exename)) $exeflags --bind-to $bind_to --worker`
-        io = open(detach(setenv(addenv(cmd, env), dir=dir)), "r+")
-        write_cookie(io)
-
-        wconfig = WorkerConfig()
-        wconfig.process = io
-        wconfig.io = io.out
-        wconfig.enable_threaded_blas = params[:enable_threaded_blas]
-        push!(launched, wconfig)
-    end
-
-    notify(c)
-end
-
-function manage(manager::LocalManager, id::Integer, config::WorkerConfig, op::Symbol)
-    if op === :interrupt
-        kill(config.process, 2)
-    end
-end
-
-"""
-    launch(manager::ClusterManager, params::Dict, launched::Array, launch_ntfy::Condition)
-
-Implemented by cluster managers. For every Julia worker launched by this function, it should
-append a `WorkerConfig` entry to `launched` and notify `launch_ntfy`. The function MUST exit
-once all workers, requested by `manager` have been launched. `params` is a dictionary of all
-keyword arguments [`addprocs`](@ref) was called with.
-"""
-launch
-
-"""
-    manage(manager::ClusterManager, id::Integer, config::WorkerConfig. op::Symbol)
-
-Implemented by cluster managers. It is called on the master process, during a worker's
-lifetime, with appropriate `op` values:
-
-- with `:register`/`:deregister` when a worker is added / removed from the Julia worker pool.
-- with `:interrupt` when `interrupt(workers)` is called. The `ClusterManager`
-  should signal the appropriate worker with an interrupt signal.
-- with `:finalize` for cleanup purposes.
-"""
-manage
-
-# DefaultClusterManager for the default TCP transport - used by both SSHManager and LocalManager
-
-struct DefaultClusterManager <: ClusterManager
-end
-
-const tunnel_hosts_map = Dict{String, Semaphore}()
-
-"""
-    connect(manager::ClusterManager, pid::Int, config::WorkerConfig) -> (instrm::IO, outstrm::IO)
-
-Implemented by cluster managers using custom transports. It should establish a logical
-connection to worker with id `pid`, specified by `config` and return a pair of `IO`
-objects. Messages from `pid` to current process will be read off `instrm`, while messages to
-be sent to `pid` will be written to `outstrm`. The custom transport implementation must
-ensure that messages are delivered and received completely and in order.
-`connect(manager::ClusterManager.....)` sets up TCP/IP socket connections in-between
-workers.
-"""
-function connect(manager::ClusterManager, pid::Int, config::WorkerConfig)
-    if config.connect_at !== nothing
-        # this is a worker-to-worker setup call.
-        return connect_w2w(pid, config)
-    end
-
-    # master connecting to workers
-    if config.io !== nothing
-        (bind_addr, port::Int) = read_worker_host_port(config.io)
-        pubhost = something(config.host, bind_addr)
-        config.host = pubhost
-        config.port = port
-    else
-        pubhost = notnothing(config.host)
-        port = notnothing(config.port)
-        bind_addr = something(config.bind_addr, pubhost)
-    end
-
-    tunnel = something(config.tunnel, false)
-
-    s = split(pubhost,'@')
-    user = ""
-    if length(s) > 1
-        user = s[1]
-        pubhost = s[2]
-    else
-        if haskey(ENV, "USER")
-            user = ENV["USER"]
-        elseif tunnel
-            error("USER must be specified either in the environment ",
-                  "or as part of the hostname when tunnel option is used")
-        end
-    end
-
-    if tunnel
-        if !haskey(tunnel_hosts_map, pubhost)
-            tunnel_hosts_map[pubhost] = Semaphore(something(config.max_parallel, typemax(Int)))
-        end
-        sem = tunnel_hosts_map[pubhost]
-
-        sshflags = notnothing(config.sshflags)
-        multiplex = something(config.multiplex, false)
-        acquire(sem)
-        try
-            (s, bind_addr, forward) = connect_to_worker_with_tunnel(pubhost, bind_addr, port, user, sshflags, multiplex)
-            config.forward = forward
-        finally
-            release(sem)
-        end
-    else
-        (s, bind_addr) = connect_to_worker(bind_addr, port)
-    end
-
-    config.bind_addr = bind_addr
-
-    # write out a subset of the connect_at required for further worker-worker connection setups
-    config.connect_at = (bind_addr, port)
-
-    if config.io !== nothing
-        let pid = pid
-            redirect_worker_output(pid, notnothing(config.io))
-        end
-    end
-
-    (s, s)
-end
-
-function connect_w2w(pid::Int, config::WorkerConfig)
-    (rhost, rport) = notnothing(config.connect_at)::Tuple{String, Int}
-    config.host = rhost
-    config.port = rport
-    (s, bind_addr) = connect_to_worker(rhost, rport)
-    (s,s)
-end
-
-const client_port = Ref{UInt16}(0)
-
-function socket_reuse_port(iptype)
-    if ccall(:jl_has_so_reuseport, Int32, ()) == 1
-        sock = TCPSocket(delay = false)
-
-        # Some systems (e.g. Linux) require the port to be bound before setting REUSEPORT
-        bind_early = Sys.islinux()
-
-        bind_early && bind_client_port(sock, iptype)
-        rc = ccall(:jl_tcp_reuseport, Int32, (Ptr{Cvoid},), sock.handle)
-        if rc < 0
-            close(sock)
-
-            # This is an issue only on systems with lots of client connections, hence delay the warning
-            nworkers() > 128 && @warn "Error trying to reuse client port number, falling back to regular socket" maxlog=1
-
-            # provide a clean new socket
-            return TCPSocket()
-        end
-        bind_early || bind_client_port(sock, iptype)
-        return sock
-    else
-        return TCPSocket()
-    end
-end
-
-function bind_client_port(sock::TCPSocket, iptype)
-    bind_host = iptype(0)
-    if Sockets.bind(sock, bind_host, client_port[])
-        _addr, port = getsockname(sock)
-        client_port[] = port
-    end
-    return sock
-end
-
-function connect_to_worker(host::AbstractString, port::Integer)
-    # Avoid calling getaddrinfo if possible - involves a DNS lookup
-    # host may be a stringified ipv4 / ipv6 address or a dns name
-    bind_addr = nothing
-    try
-        bind_addr = parse(IPAddr,host)
-    catch
-        bind_addr = getaddrinfo(host)
-    end
-
-    iptype = typeof(bind_addr)
-    sock = socket_reuse_port(iptype)
-    connect(sock, bind_addr, UInt16(port))
-
-    (sock, string(bind_addr))
-end
-
-
-function connect_to_worker_with_tunnel(host::AbstractString, bind_addr::AbstractString, port::Integer, tunnel_user::AbstractString, sshflags, multiplex)
-    localport = ssh_tunnel(tunnel_user, host, bind_addr, UInt16(port), sshflags, multiplex)
-    s = connect("localhost", localport)
-    forward = "$localport:$bind_addr:$port"
-    (s, bind_addr, forward)
-end
-
-
-function cancel_ssh_tunnel(config::WorkerConfig)
-    host = notnothing(config.host)
-    sshflags = notnothing(config.sshflags)
-    tunnel = something(config.tunnel, false)
-    multiplex = something(config.multiplex, false)
-    if tunnel && multiplex
-        forward = notnothing(config.forward)
-        run(`ssh $sshflags -O cancel -L $forward $host`)
-    end
-end
-
-
-"""
-    kill(manager::ClusterManager, pid::Int, config::WorkerConfig)
-
-Implemented by cluster managers.
-It is called on the master process, by [`rmprocs`](@ref).
-It should cause the remote worker specified by `pid` to exit.
-`kill(manager::ClusterManager.....)` executes a remote `exit()`
-on `pid`.
-"""
-function kill(manager::ClusterManager, pid::Int, config::WorkerConfig)
-    remote_do(exit, pid)
-    nothing
-end
-
-function kill(manager::SSHManager, pid::Int, config::WorkerConfig)
-    remote_do(exit, pid)
-    cancel_ssh_tunnel(config)
-    nothing
-end
-
-function kill(manager::LocalManager, pid::Int, config::WorkerConfig; exit_timeout = 15, term_timeout = 15)
-    # First, try sending `exit()` to the remote over the usual control channels
-    remote_do(exit, pid)
-
-    timer_task = @async begin
-        sleep(exit_timeout)
-
-        # Check to see if our child exited, and if not, send an actual kill signal
-        if !process_exited(config.process)
-            @warn("Failed to gracefully kill worker $(pid), sending SIGTERM")
-            kill(config.process, Base.SIGTERM)
-
-            sleep(term_timeout)
-            if !process_exited(config.process)
-                @warn("Worker $(pid) ignored SIGTERM, sending SIGKILL")
-                kill(config.process, Base.SIGKILL)
-            end
-        end
-    end
-    errormonitor(timer_task)
-    return nothing
-end
diff --git a/stdlib/Distributed/src/messages.jl b/stdlib/Distributed/src/messages.jl
deleted file mode 100644
index fe3e5ab90b028..0000000000000
--- a/stdlib/Distributed/src/messages.jl
+++ /dev/null
@@ -1,215 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-abstract type AbstractMsg end
-
-
-## Wire format description
-#
-# Each message has three parts, which are written in order to the worker's stream.
-#  1) A header of type MsgHeader is serialized to the stream (via `serialize`).
-#  2) A message of type AbstractMsg is then serialized.
-#  3) Finally, a fixed boundary of 10 bytes is written.
-
-# Message header stored separately from body to be able to send back errors if
-# a deserialization error occurs when reading the message body.
-struct MsgHeader
-    response_oid::RRID
-    notify_oid::RRID
-    MsgHeader(respond_oid=RRID(0,0), notify_oid=RRID(0,0)) =
-        new(respond_oid, notify_oid)
-end
-
-# Special oid (0,0) uses to indicate a null ID.
-# Used instead of Union{Int, Nothing} to decrease wire size of header.
-null_id(id) =  id == RRID(0, 0)
-
-struct CallMsg{Mode} <: AbstractMsg
-    f::Any
-    args::Tuple
-    kwargs
-end
-struct CallWaitMsg <: AbstractMsg
-    f::Any
-    args::Tuple
-    kwargs
-end
-struct RemoteDoMsg <: AbstractMsg
-    f::Any
-    args::Tuple
-    kwargs
-end
-struct ResultMsg <: AbstractMsg
-    value::Any
-end
-
-
-# Worker initialization messages
-struct IdentifySocketMsg <: AbstractMsg
-    from_pid::Int
-end
-
-struct IdentifySocketAckMsg <: AbstractMsg
-end
-
-struct JoinPGRPMsg <: AbstractMsg
-    self_pid::Int
-    other_workers::Array
-    topology::Symbol
-    enable_threaded_blas::Bool
-    lazy::Bool
-end
-struct JoinCompleteMsg <: AbstractMsg
-    cpu_threads::Int
-    ospid::Int
-end
-
-# Avoiding serializing AbstractMsg containers results in a speedup
-# of approximately 10%. Can be removed once module Serialization
-# has been suitably improved.
-
-const msgtypes = Any[CallWaitMsg, IdentifySocketAckMsg, IdentifySocketMsg,
-                     JoinCompleteMsg, JoinPGRPMsg, RemoteDoMsg, ResultMsg,
-                     CallMsg{:call}, CallMsg{:call_fetch}]
-
-for (idx, tname) in enumerate(msgtypes)
-    exprs = Any[ :(serialize(s, o.$fld)) for fld in fieldnames(tname) ]
-    @eval function serialize_msg(s::AbstractSerializer, o::$tname)
-        write(s.io, UInt8($idx))
-        $(exprs...)
-        return nothing
-    end
-end
-
-let msg_cases = :(@assert false "Message type index ($idx) expected to be between 1:$($(length(msgtypes)))")
-    for i = length(msgtypes):-1:1
-        mti = msgtypes[i]
-        msg_cases = :(if idx == $i
-                          $(Expr(:call, QuoteNode(mti), fill(:(deserialize(s)), fieldcount(mti))...))
-                      else
-                          $msg_cases
-                      end)
-    end
-    @eval function deserialize_msg(s::AbstractSerializer)
-        idx = read(s.io, UInt8)
-        return $msg_cases
-    end
-end
-
-function send_msg_unknown(s::IO, header, msg)
-    error("attempt to send to unknown socket")
-end
-
-function send_msg(s::IO, header, msg)
-    id = worker_id_from_socket(s)
-    if id > -1
-        return send_msg(worker_from_id(id), header, msg)
-    end
-    send_msg_unknown(s, header, msg)
-end
-
-function send_msg_now(s::IO, header, msg::AbstractMsg)
-    id = worker_id_from_socket(s)
-    if id > -1
-        return send_msg_now(worker_from_id(id), header, msg)
-    end
-    send_msg_unknown(s, header, msg)
-end
-function send_msg_now(w::Worker, header, msg)
-    send_msg_(w, header, msg, true)
-end
-
-function send_msg(w::Worker, header, msg)
-    send_msg_(w, header, msg, false)
-end
-
-function flush_gc_msgs(w::Worker)
-    if !isdefined(w, :w_stream)
-        return
-    end
-    add_msgs = nothing
-    del_msgs = nothing
-    @lock w.msg_lock begin
-        if !w.gcflag # No work needed for this worker
-            return
-        end
-        @atomic w.gcflag = false
-        if !isempty(w.add_msgs)
-            add_msgs = w.add_msgs
-            w.add_msgs = Any[]
-        end
-
-        if !isempty(w.del_msgs)
-            del_msgs = w.del_msgs
-            w.del_msgs = Any[]
-        end
-    end
-    if add_msgs !== nothing
-        remote_do(add_clients, w, add_msgs)
-    end
-    if del_msgs !== nothing
-        remote_do(del_clients, w, del_msgs)
-    end
-    return
-end
-
-# Boundary inserted between messages on the wire, used for recovering
-# from deserialization errors. Picked arbitrarily.
-# A size of 10 bytes indicates ~ ~1e24 possible boundaries, so chance of collision
-# with message contents is negligible.
-const MSG_BOUNDARY = UInt8[0x79, 0x8e, 0x8e, 0xf5, 0x6e, 0x9b, 0x2e, 0x97, 0xd5, 0x7d]
-
-# Faster serialization/deserialization of MsgHeader and RRID
-function serialize_hdr_raw(io, hdr)
-    write(io, hdr.response_oid.whence, hdr.response_oid.id, hdr.notify_oid.whence, hdr.notify_oid.id)
-end
-
-function deserialize_hdr_raw(io)
-    data = read!(io, Ref{NTuple{4,Int}}())[]
-    return MsgHeader(RRID(data[1], data[2]), RRID(data[3], data[4]))
-end
-
-function send_msg_(w::Worker, header, msg, now::Bool)
-    check_worker_state(w)
-    if myid() != 1 && !isa(msg, IdentifySocketMsg) && !isa(msg, IdentifySocketAckMsg)
-        wait(w.initialized)
-    end
-    io = w.w_stream
-    lock(io)
-    try
-        reset_state(w.w_serializer)
-        serialize_hdr_raw(io, header)
-        invokelatest(serialize_msg, w.w_serializer, msg)  # io is wrapped in w_serializer
-        write(io, MSG_BOUNDARY)
-
-        if !now && w.gcflag
-            flush_gc_msgs(w)
-        else
-            flush(io)
-        end
-    finally
-        unlock(io)
-    end
-end
-
-function flush_gc_msgs()
-    try
-        for w in (PGRP::ProcessGroup).workers
-            if isa(w,Worker) && (w.state == W_CONNECTED) && w.gcflag
-                flush_gc_msgs(w)
-            end
-        end
-    catch e
-        bt = catch_backtrace()
-        @async showerror(stderr, e, bt)
-    end
-end
-
-function send_connection_hdr(w::Worker, cookie=true)
-    # For a connection initiated from the remote side to us, we only send the version,
-    # else when we initiate a connection we first send the cookie followed by our version.
-    # The remote side validates the cookie.
-    if cookie
-        write(w.w_stream, LPROC.cookie)
-    end
-    write(w.w_stream, rpad(VERSION_STRING, HDR_VERSION_LEN)[1:HDR_VERSION_LEN])
-end
diff --git a/stdlib/Distributed/src/pmap.jl b/stdlib/Distributed/src/pmap.jl
deleted file mode 100644
index 603dfa7e031ce..0000000000000
--- a/stdlib/Distributed/src/pmap.jl
+++ /dev/null
@@ -1,300 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-struct BatchProcessingError <: Exception
-    data
-    ex
-end
-
-"""
-    pgenerate([::WorkerPool], f, c...) -> iterator
-
-Apply `f` to each element of `c` in parallel using available workers and tasks.
-
-For multiple collection arguments, apply `f` elementwise.
-
-Results are returned in order as they become available.
-
-Note that `f` must be made available to all worker processes; see
-[Code Availability and Loading Packages](@ref code-availability)
-for details.
-"""
-function pgenerate(p::WorkerPool, f, c)
-    if length(p) == 0
-        return AsyncGenerator(f, c; ntasks=()->nworkers(p))
-    end
-    batches = batchsplit(c, min_batch_count = length(p) * 3)
-    return Iterators.flatten(AsyncGenerator(remote(p, b -> asyncmap(f, b)), batches))
-end
-pgenerate(p::WorkerPool, f, c1, c...) = pgenerate(p, a->f(a...), zip(c1, c...))
-pgenerate(f, c) = pgenerate(default_worker_pool(), f, c)
-pgenerate(f, c1, c...) = pgenerate(a->f(a...), zip(c1, c...))
-
-"""
-    pmap(f, [::AbstractWorkerPool], c...; distributed=true, batch_size=1, on_error=nothing, retry_delays=[], retry_check=nothing) -> collection
-
-Transform collection `c` by applying `f` to each element using available
-workers and tasks.
-
-For multiple collection arguments, apply `f` elementwise.
-
-Note that `f` must be made available to all worker processes; see
-[Code Availability and Loading Packages](@ref code-availability) for details.
-
-If a worker pool is not specified, all available workers, i.e., the default worker pool
-is used.
-
-By default, `pmap` distributes the computation over all specified workers. To use only the
-local process and distribute over tasks, specify `distributed=false`.
-This is equivalent to using [`asyncmap`](@ref). For example,
-`pmap(f, c; distributed=false)` is equivalent to `asyncmap(f,c; ntasks=()->nworkers())`
-
-`pmap` can also use a mix of processes and tasks via the `batch_size` argument. For batch sizes
-greater than 1, the collection is processed in multiple batches, each of length `batch_size` or less.
-A batch is sent as a single request to a free worker, where a local [`asyncmap`](@ref) processes
-elements from the batch using multiple concurrent tasks.
-
-Any error stops `pmap` from processing the remainder of the collection. To override this behavior
-you can specify an error handling function via argument `on_error` which takes in a single argument, i.e.,
-the exception. The function can stop the processing by rethrowing the error, or, to continue, return any value
-which is then returned inline with the results to the caller.
-
-Consider the following two examples. The first one returns the exception object inline,
-the second a 0 in place of any exception:
-```julia-repl
-julia> pmap(x->iseven(x) ? error("foo") : x, 1:4; on_error=identity)
-4-element Array{Any,1}:
- 1
-  ErrorException("foo")
- 3
-  ErrorException("foo")
-
-julia> pmap(x->iseven(x) ? error("foo") : x, 1:4; on_error=ex->0)
-4-element Array{Int64,1}:
- 1
- 0
- 3
- 0
-```
-
-Errors can also be handled by retrying failed computations. Keyword arguments `retry_delays` and
-`retry_check` are passed through to [`retry`](@ref) as keyword arguments `delays` and `check`
-respectively. If batching is specified, and an entire batch fails, all items in
-the batch are retried.
-
-Note that if both `on_error` and `retry_delays` are specified, the `on_error` hook is called
-before retrying. If `on_error` does not throw (or rethrow) an exception, the element will not
-be retried.
-
-Example: On errors, retry `f` on an element a maximum of 3 times without any delay between retries.
-```julia
-pmap(f, c; retry_delays = zeros(3))
-```
-
-Example: Retry `f` only if the exception is not of type [`InexactError`](@ref), with exponentially increasing
-delays up to 3 times. Return a `NaN` in place for all `InexactError` occurrences.
-```julia
-pmap(f, c; on_error = e->(isa(e, InexactError) ? NaN : rethrow()), retry_delays = ExponentialBackOff(n = 3))
-```
-"""
-function pmap(f, p::AbstractWorkerPool, c; distributed=true, batch_size=1, on_error=nothing,
-                                           retry_delays=[], retry_check=nothing)
-    f_orig = f
-    # Don't do remote calls if there are no workers.
-    if (length(p) == 0) || (length(p) == 1 && fetch(p.channel) == myid())
-        distributed = false
-    end
-
-    # Don't do batching if not doing remote calls.
-    if !distributed
-        batch_size = 1
-    end
-
-    # If not batching, do simple remote call.
-    if batch_size == 1
-        if on_error !== nothing
-            f = wrap_on_error(f, on_error)
-        end
-
-        if distributed
-            f = remote(p, f)
-        end
-
-        if length(retry_delays) > 0
-            f = wrap_retry(f, retry_delays, retry_check)
-        end
-
-        return asyncmap(f, c; ntasks=()->nworkers(p))
-    else
-        # During batch processing, We need to ensure that if on_error is set, it is called
-        # for each element in error, and that we return as many elements as the original list.
-        # retry, if set, has to be called element wise and we will do a best-effort
-        # to ensure that we do not call mapped function on the same element more than length(retry_delays).
-        # This guarantee is not possible in case of worker death / network errors, wherein
-        # we will retry the entire batch on a new worker.
-
-        handle_errors = ((on_error !== nothing) || (length(retry_delays) > 0))
-
-        # Unlike the non-batch case, in batch mode, we trap all errors and the on_error hook (if present)
-        # is processed later in non-batch mode.
-        if handle_errors
-            f = wrap_on_error(f, (x,e)->BatchProcessingError(x,e); capture_data=true)
-        end
-
-        f = wrap_batch(f, p, handle_errors)
-        results = asyncmap(f, c; ntasks=()->nworkers(p), batch_size=batch_size)
-
-        # process errors if any.
-        if handle_errors
-            process_batch_errors!(p, f_orig, results, on_error, retry_delays, retry_check)
-        end
-
-        return results
-    end
-end
-
-pmap(f, p::AbstractWorkerPool, c1, c...; kwargs...) = pmap(a->f(a...), p, zip(c1, c...); kwargs...)
-pmap(f, c; kwargs...) = pmap(f, default_worker_pool(), c; kwargs...)
-pmap(f, c1, c...; kwargs...) = pmap(a->f(a...), zip(c1, c...); kwargs...)
-
-function wrap_on_error(f, on_error; capture_data=false)
-    return x -> begin
-        try
-            f(x)
-        catch e
-            if capture_data
-                on_error(x, e)
-            else
-                on_error(e)
-            end
-        end
-    end
-end
-
-function wrap_retry(f, retry_delays, retry_check)
-    retry(delays=retry_delays, check=retry_check) do x
-        try
-            f(x)
-        catch e
-            rethrow(extract_exception(e))
-        end
-    end
-end
-
-function wrap_batch(f, p, handle_errors)
-    f = asyncmap_batch(f)
-    return batch -> begin
-        try
-            remotecall_fetch(f, p, batch)
-        catch e
-            if handle_errors
-                return Any[BatchProcessingError(b, e) for b in batch]
-            else
-                rethrow()
-            end
-        end
-    end
-end
-
-asyncmap_batch(f) = batch -> asyncmap(x->f(x...), batch)
-extract_exception(e) = isa(e, RemoteException) ? e.captured.ex : e
-
-
-function process_batch_errors!(p, f, results, on_error, retry_delays, retry_check)
-    # Handle all the ones in error in another pmap, with batch size set to 1
-    reprocess = Tuple{Int,BatchProcessingError}[]
-    for (idx, v) in enumerate(results)
-        if isa(v, BatchProcessingError)
-            push!(reprocess, (idx,v))
-        end
-    end
-
-    if length(reprocess) > 0
-        errors = [x[2] for x in reprocess]
-        exceptions = Any[x.ex for x in errors]
-        state = iterate(retry_delays)
-        state !== nothing && (state = state[2])
-        error_processed = let state=state
-            if (length(retry_delays)::Int > 0) &&
-                    (retry_check === nothing || all([retry_check(state,ex)[2] for ex in exceptions]))
-                # BatchProcessingError.data is a tuple of original args
-                pmap(x->f(x...), p, Any[x.data for x in errors];
-                        on_error = on_error, retry_delays = collect(retry_delays)[2:end::Int], retry_check = retry_check)
-            elseif on_error !== nothing
-                map(on_error, exceptions)
-            else
-                throw(CompositeException(exceptions))
-            end
-        end
-
-        for (idx, v) in enumerate(error_processed)
-            results[reprocess[idx][1]] = v
-        end
-    end
-    nothing
-end
-
-"""
-    head_and_tail(c, n) -> head, tail
-
-Return `head`: the first `n` elements of `c`;
-and `tail`: an iterator over the remaining elements.
-
-```jldoctest
-julia> b, c = Distributed.head_and_tail(1:10, 3)
-([1, 2, 3], Base.Iterators.Rest{UnitRange{Int64}, Int64}(1:10, 3))
-
-julia> collect(c)
-7-element Vector{Int64}:
-  4
-  5
-  6
-  7
-  8
-  9
- 10
-```
-"""
-function head_and_tail(c, n)
-    head = Vector{eltype(c)}(undef, n)
-    n == 0 && return (head, c)
-    i = 1
-    y = iterate(c)
-    y === nothing && return (resize!(head, 0), ())
-    head[i] = y[1]
-    while i < n
-        y = iterate(c, y[2])
-        y === nothing && return (resize!(head, i), ())
-        i += 1
-        head[i] = y[1]
-    end
-    return head, Iterators.rest(c, y[2])
-end
-
-"""
-    batchsplit(c; min_batch_count=1, max_batch_size=100) -> iterator
-
-Split a collection into at least `min_batch_count` batches.
-
-Equivalent to `partition(c, max_batch_size)` when `length(c) >> max_batch_size`.
-"""
-function batchsplit(c; min_batch_count=1, max_batch_size=100)
-    if min_batch_count < 1
-        throw(ArgumentError("min_batch_count must be ≥ 1, got $min_batch_count"))
-    end
-
-    if max_batch_size < 1
-        throw(ArgumentError("max_batch_size must be ≥ 1, got $max_batch_size"))
-    end
-
-    # Split collection into batches, then peek at the first few batches
-    batches = Iterators.partition(c, max_batch_size)
-    head, tail = head_and_tail(batches, min_batch_count)
-
-    # If there are not enough batches, use a smaller batch size
-    if length(head) < min_batch_count
-        batch_size = max(1, div(sum(length, head), min_batch_count))
-        return Iterators.partition(collect(Iterators.flatten(head)), batch_size)
-    end
-
-    return Iterators.flatten((head, tail))
-end
diff --git a/stdlib/Distributed/src/process_messages.jl b/stdlib/Distributed/src/process_messages.jl
deleted file mode 100644
index 7bbf7cfde943b..0000000000000
--- a/stdlib/Distributed/src/process_messages.jl
+++ /dev/null
@@ -1,386 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# data stored by the owner of a remote reference
-def_rv_channel() = Channel(1)
-mutable struct RemoteValue
-    c::AbstractChannel
-    clientset::BitSet # Set of workerids that have a reference to this channel.
-                      # Keeping ids instead of a count aids in cleaning up upon
-                      # a worker exit.
-
-    waitingfor::Int   # processor we need to hear from to fill this, or 0
-
-    synctake::Union{ReentrantLock, Nothing}  # A lock used to synchronize the
-                      # specific case of a local put! / remote take! on an
-                      # unbuffered store. github issue #29932
-
-    function RemoteValue(c)
-        c_is_buffered = false
-        try
-            c_is_buffered = isbuffered(c)
-        catch
-        end
-
-        if c_is_buffered
-            return new(c, BitSet(), 0, nothing)
-        else
-            return new(c, BitSet(), 0, ReentrantLock())
-        end
-    end
-end
-
-wait(rv::RemoteValue) = wait(rv.c)
-
-# A wrapper type to handle issue #29932 which requires locking / unlocking of
-# RemoteValue.synctake outside of lexical scope.
-struct SyncTake
-    v::Any
-    rv::RemoteValue
-end
-
-## core messages: do, call, fetch, wait, ref, put! ##
-struct RemoteException <: Exception
-    pid::Int
-    captured::CapturedException
-end
-
-"""
-    capture_exception(ex::RemoteException, bt)
-
-Returns `ex::RemoteException` which has already captured a backtrace (via it's [`CapturedException`](@ref) field `captured`).
-"""
-Base.capture_exception(ex::RemoteException, bt) = ex
-
-"""
-    RemoteException(captured)
-
-Exceptions on remote computations are captured and rethrown locally.  A `RemoteException`
-wraps the `pid` of the worker and a captured exception. A `CapturedException` captures the
-remote exception and a serializable form of the call stack when the exception was raised.
-"""
-RemoteException(captured) = RemoteException(myid(), captured)
-function showerror(io::IO, re::RemoteException)
-    (re.pid != myid()) && print(io, "On worker ", re.pid, ":\n")
-    showerror(io, re.captured)
-end
-
-function run_work_thunk(thunk::Function, print_error::Bool)
-    local result
-    try
-        result = thunk()
-    catch err
-        ce = CapturedException(err, catch_backtrace())
-        result = RemoteException(ce)
-        print_error && showerror(stderr, ce)
-    end
-    return result
-end
-function run_work_thunk(rv::RemoteValue, thunk)
-    put!(rv, run_work_thunk(thunk, false))
-    nothing
-end
-
-function schedule_call(rid, thunk)
-    return lock(client_refs) do
-        rv = RemoteValue(def_rv_channel())
-        (PGRP::ProcessGroup).refs[rid] = rv
-        push!(rv.clientset, rid.whence)
-        errormonitor(@async run_work_thunk(rv, thunk))
-        return rv
-    end
-end
-
-
-function deliver_result(sock::IO, msg, oid, value)
-    #print("$(myid()) sending result $oid\n")
-    if msg === :call_fetch || isa(value, RemoteException)
-        val = value
-    else
-        val = :OK
-    end
-    try
-        send_msg_now(sock, MsgHeader(oid), ResultMsg(val))
-    catch e
-        # terminate connection in case of serialization error
-        # otherwise the reading end would hang
-        @error "Fatal error on process $(myid())" exception=e,catch_backtrace()
-        wid = worker_id_from_socket(sock)
-        close(sock)
-        if myid()==1
-            rmprocs(wid)
-        elseif wid == 1
-            exit(1)
-        else
-            remote_do(rmprocs, 1, wid)
-        end
-    end
-end
-
-## message event handlers ##
-function process_messages(r_stream::TCPSocket, w_stream::TCPSocket, incoming::Bool=true)
-    errormonitor(@async process_tcp_streams(r_stream, w_stream, incoming))
-end
-
-function process_tcp_streams(r_stream::TCPSocket, w_stream::TCPSocket, incoming::Bool)
-    Sockets.nagle(r_stream, false)
-    Sockets.quickack(r_stream, true)
-    wait_connected(r_stream)
-    if r_stream != w_stream
-        Sockets.nagle(w_stream, false)
-        Sockets.quickack(w_stream, true)
-        wait_connected(w_stream)
-    end
-    message_handler_loop(r_stream, w_stream, incoming)
-end
-
-"""
-    process_messages(r_stream::IO, w_stream::IO, incoming::Bool=true)
-
-Called by cluster managers using custom transports. It should be called when the custom
-transport implementation receives the first message from a remote worker. The custom
-transport must manage a logical connection to the remote worker and provide two
-`IO` objects, one for incoming messages and the other for messages addressed to the
-remote worker.
-If `incoming` is `true`, the remote peer initiated the connection.
-Whichever of the pair initiates the connection sends the cluster cookie and its
-Julia version number to perform the authentication handshake.
-
-See also [`cluster_cookie`](@ref).
-"""
-function process_messages(r_stream::IO, w_stream::IO, incoming::Bool=true)
-    errormonitor(@async message_handler_loop(r_stream, w_stream, incoming))
-end
-
-function message_handler_loop(r_stream::IO, w_stream::IO, incoming::Bool)
-    wpid=0          # the worker r_stream is connected to.
-    boundary = similar(MSG_BOUNDARY)
-    try
-        version = process_hdr(r_stream, incoming)
-        serializer = ClusterSerializer(r_stream)
-
-        # The first message will associate wpid with r_stream
-        header = deserialize_hdr_raw(r_stream)
-        msg = deserialize_msg(serializer)
-        handle_msg(msg, header, r_stream, w_stream, version)
-        wpid = worker_id_from_socket(r_stream)
-        @assert wpid > 0
-
-        readbytes!(r_stream, boundary, length(MSG_BOUNDARY))
-
-        while true
-            reset_state(serializer)
-            header = deserialize_hdr_raw(r_stream)
-            # println("header: ", header)
-
-            try
-                msg = invokelatest(deserialize_msg, serializer)
-            catch e
-                # Deserialization error; discard bytes in stream until boundary found
-                boundary_idx = 1
-                while true
-                    # This may throw an EOF error if the terminal boundary was not written
-                    # correctly, triggering the higher-scoped catch block below
-                    byte = read(r_stream, UInt8)
-                    if byte == MSG_BOUNDARY[boundary_idx]
-                        boundary_idx += 1
-                        if boundary_idx > length(MSG_BOUNDARY)
-                            break
-                        end
-                    else
-                        boundary_idx = 1
-                    end
-                end
-
-                # remotecalls only rethrow RemoteExceptions. Any other exception is treated as
-                # data to be returned. Wrap this exception in a RemoteException.
-                remote_err = RemoteException(myid(), CapturedException(e, catch_backtrace()))
-                # println("Deserialization error. ", remote_err)
-                if !null_id(header.response_oid)
-                    ref = lookup_ref(header.response_oid)
-                    put!(ref, remote_err)
-                end
-                if !null_id(header.notify_oid)
-                    deliver_result(w_stream, :call_fetch, header.notify_oid, remote_err)
-                end
-                continue
-            end
-            readbytes!(r_stream, boundary, length(MSG_BOUNDARY))
-
-            # println("got msg: ", typeof(msg))
-            handle_msg(msg, header, r_stream, w_stream, version)
-        end
-    catch e
-        # Check again as it may have been set in a message handler but not propagated to the calling block above
-        if wpid < 1
-            wpid = worker_id_from_socket(r_stream)
-        end
-
-        if wpid < 1
-            println(stderr, e, CapturedException(e, catch_backtrace()))
-            println(stderr, "Process($(myid())) - Unknown remote, closing connection.")
-        elseif !(wpid in map_del_wrkr)
-            werr = worker_from_id(wpid)
-            oldstate = werr.state
-            set_worker_state(werr, W_TERMINATED)
-
-            # If unhandleable error occurred talking to pid 1, exit
-            if wpid == 1
-                if isopen(w_stream)
-                    @error "Fatal error on process $(myid())" exception=e,catch_backtrace()
-                end
-                exit(1)
-            end
-
-            # Will treat any exception as death of node and cleanup
-            # since currently we do not have a mechanism for workers to reconnect
-            # to each other on unhandled errors
-            deregister_worker(wpid)
-        end
-
-        close(r_stream)
-        close(w_stream)
-
-        if (myid() == 1) && (wpid > 1)
-            if oldstate != W_TERMINATING
-                println(stderr, "Worker $wpid terminated.")
-                rethrow()
-            end
-        end
-
-        return nothing
-    end
-end
-
-function process_hdr(s, validate_cookie)
-    if validate_cookie
-        cookie = read(s, HDR_COOKIE_LEN)
-        if length(cookie) < HDR_COOKIE_LEN
-            error("Cookie read failed. Connection closed by peer.")
-        end
-
-        self_cookie = cluster_cookie()
-        for i in 1:HDR_COOKIE_LEN
-            if UInt8(self_cookie[i]) != cookie[i]
-                error("Process($(myid())) - Invalid connection credentials sent by remote.")
-            end
-        end
-    end
-
-    # When we have incompatible julia versions trying to connect to each other,
-    # and can be detected, raise an appropriate error.
-    # For now, just return the version.
-    version = read(s, HDR_VERSION_LEN)
-    if length(version) < HDR_VERSION_LEN
-        error("Version read failed. Connection closed by peer.")
-    end
-
-    return VersionNumber(strip(String(version)))
-end
-
-function handle_msg(msg::CallMsg{:call}, header, r_stream, w_stream, version)
-    schedule_call(header.response_oid, ()->invokelatest(msg.f, msg.args...; msg.kwargs...))
-end
-function handle_msg(msg::CallMsg{:call_fetch}, header, r_stream, w_stream, version)
-    errormonitor(@async begin
-        v = run_work_thunk(()->invokelatest(msg.f, msg.args...; msg.kwargs...), false)
-        if isa(v, SyncTake)
-            try
-                deliver_result(w_stream, :call_fetch, header.notify_oid, v.v)
-            finally
-                unlock(v.rv.synctake)
-            end
-        else
-            deliver_result(w_stream, :call_fetch, header.notify_oid, v)
-        end
-        nothing
-    end)
-end
-
-function handle_msg(msg::CallWaitMsg, header, r_stream, w_stream, version)
-    errormonitor(@async begin
-        rv = schedule_call(header.response_oid, ()->invokelatest(msg.f, msg.args...; msg.kwargs...))
-        deliver_result(w_stream, :call_wait, header.notify_oid, fetch(rv.c))
-        nothing
-    end)
-end
-
-function handle_msg(msg::RemoteDoMsg, header, r_stream, w_stream, version)
-    errormonitor(@async run_work_thunk(()->invokelatest(msg.f, msg.args...; msg.kwargs...), true))
-end
-
-function handle_msg(msg::ResultMsg, header, r_stream, w_stream, version)
-    put!(lookup_ref(header.response_oid), msg.value)
-end
-
-function handle_msg(msg::IdentifySocketMsg, header, r_stream, w_stream, version)
-    # register a new peer worker connection
-    w = Worker(msg.from_pid, r_stream, w_stream, cluster_manager; version=version)
-    send_connection_hdr(w, false)
-    send_msg_now(w, MsgHeader(), IdentifySocketAckMsg())
-    notify(w.initialized)
-end
-
-function handle_msg(msg::IdentifySocketAckMsg, header, r_stream, w_stream, version)
-    w = map_sock_wrkr[r_stream]
-    w.version = version
-end
-
-function handle_msg(msg::JoinPGRPMsg, header, r_stream, w_stream, version)
-    LPROC.id = msg.self_pid
-    controller = Worker(1, r_stream, w_stream, cluster_manager; version=version)
-    notify(controller.initialized)
-    register_worker(LPROC)
-    topology(msg.topology)
-
-    if !msg.enable_threaded_blas
-        Base.disable_library_threading()
-    end
-
-    lazy = msg.lazy
-    PGRP.lazy = lazy
-
-    @sync for (connect_at, rpid) in msg.other_workers
-        wconfig = WorkerConfig()
-        wconfig.connect_at = connect_at
-
-        let rpid=rpid, wconfig=wconfig
-            if lazy
-                # The constructor registers the object with a global registry.
-                Worker(rpid, ()->connect_to_peer(cluster_manager, rpid, wconfig))
-            else
-                @async connect_to_peer(cluster_manager, rpid, wconfig)
-            end
-        end
-    end
-
-    send_connection_hdr(controller, false)
-    send_msg_now(controller, MsgHeader(RRID(0,0), header.notify_oid), JoinCompleteMsg(Sys.CPU_THREADS, getpid()))
-end
-
-function connect_to_peer(manager::ClusterManager, rpid::Int, wconfig::WorkerConfig)
-    try
-        (r_s, w_s) = connect(manager, rpid, wconfig)
-        w = Worker(rpid, r_s, w_s, manager; config=wconfig)
-        process_messages(w.r_stream, w.w_stream, false)
-        send_connection_hdr(w, true)
-        send_msg_now(w, MsgHeader(), IdentifySocketMsg(myid()))
-        notify(w.initialized)
-    catch e
-        @error "Error on $(myid()) while connecting to peer $rpid, exiting" exception=e,catch_backtrace()
-        exit(1)
-    end
-end
-
-function handle_msg(msg::JoinCompleteMsg, header, r_stream, w_stream, version)
-    w = map_sock_wrkr[r_stream]
-    environ = something(w.config.environ, Dict())
-    environ[:cpu_threads] = msg.cpu_threads
-    w.config.environ = environ
-    w.config.ospid = msg.ospid
-    w.version = version
-
-    ntfy_channel = lookup_ref(header.notify_oid)
-    put!(ntfy_channel, w.id)
-
-    push!(default_worker_pool(), w.id)
-end
diff --git a/stdlib/Distributed/src/remotecall.jl b/stdlib/Distributed/src/remotecall.jl
deleted file mode 100644
index d4bf767537c1d..0000000000000
--- a/stdlib/Distributed/src/remotecall.jl
+++ /dev/null
@@ -1,780 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-import Base: eltype
-
-abstract type AbstractRemoteRef end
-
-"""
-    client_refs
-
-Tracks whether a particular `AbstractRemoteRef`
-(identified by its RRID) exists on this worker.
-
-The `client_refs` lock is also used to synchronize access to `.refs` and associated `clientset` state.
-"""
-const client_refs = WeakKeyDict{AbstractRemoteRef, Nothing}() # used as a WeakKeySet
-
-"""
-    Future(w::Int, rrid::RRID, v::Union{Some, Nothing}=nothing)
-
-A `Future` is a placeholder for a single computation
-of unknown termination status and time.
-For multiple potential computations, see `RemoteChannel`.
-See `remoteref_id` for identifying an `AbstractRemoteRef`.
-"""
-mutable struct Future <: AbstractRemoteRef
-    where::Int
-    whence::Int
-    id::Int
-    lock::ReentrantLock
-    @atomic v::Union{Some{Any}, Nothing}
-
-    Future(w::Int, rrid::RRID, v::Union{Some, Nothing}=nothing) =
-        (r = new(w,rrid.whence,rrid.id,ReentrantLock(),v); return test_existing_ref(r))
-
-    Future(t::NTuple{4, Any}) = new(t[1],t[2],t[3],ReentrantLock(),t[4])  # Useful for creating dummy, zeroed-out instances
-end
-
-"""
-    RemoteChannel(pid::Integer=myid())
-
-Make a reference to a `Channel{Any}(1)` on process `pid`.
-The default `pid` is the current process.
-
-    RemoteChannel(f::Function, pid::Integer=myid())
-
-Create references to remote channels of a specific size and type. `f` is a function that
-when executed on `pid` must return an implementation of an `AbstractChannel`.
-
-For example, `RemoteChannel(()->Channel{Int}(10), pid)`, will return a reference to a
-channel of type `Int` and size 10 on `pid`.
-
-The default `pid` is the current process.
-"""
-mutable struct RemoteChannel{T<:AbstractChannel} <: AbstractRemoteRef
-    where::Int
-    whence::Int
-    id::Int
-
-    function RemoteChannel{T}(w::Int, rrid::RRID) where T<:AbstractChannel
-        r = new(w, rrid.whence, rrid.id)
-        return test_existing_ref(r)
-    end
-
-    function RemoteChannel{T}(t::Tuple) where T<:AbstractChannel
-        return new(t[1],t[2],t[3])
-    end
-end
-
-function test_existing_ref(r::AbstractRemoteRef)
-    found = getkey(client_refs, r, nothing)
-    if found !== nothing
-        @assert r.where > 0
-        if isa(r, Future)
-            # this is only for copying the reference from Future to RemoteRef (just created)
-            fv_cache = @atomic :acquire found.v
-            rv_cache = @atomic :monotonic r.v
-            if fv_cache === nothing && rv_cache !== nothing
-                # we have recd the value from another source, probably a deserialized ref, send a del_client message
-                send_del_client(r)
-                @lock found.lock begin
-                    @atomicreplace found.v nothing => rv_cache
-                end
-            end
-        end
-        return found::typeof(r)
-    end
-
-    client_refs[r] = nothing
-    finalizer(finalize_ref, r)
-    return r
-end
-
-function finalize_ref(r::AbstractRemoteRef)
-    if r.where > 0 # Handle the case of the finalizer having been called manually
-        if trylock(client_refs.lock) # trylock doesn't call wait which causes yields
-            try
-                delete!(client_refs.ht, r) # direct removal avoiding locks
-                if isa(r, RemoteChannel)
-                    send_del_client_no_lock(r)
-                else
-                    # send_del_client only if the reference has not been set
-                    v_cache = @atomic :monotonic r.v
-                    v_cache === nothing && send_del_client_no_lock(r)
-                    @atomic :monotonic r.v = nothing
-                end
-                r.where = 0
-            finally
-                unlock(client_refs.lock)
-            end
-        else
-            finalizer(finalize_ref, r)
-            return nothing
-        end
-    end
-    nothing
-end
-
-"""
-    Future(pid::Integer=myid())
-
-Create a `Future` on process `pid`.
-The default `pid` is the current process.
-"""
-Future(pid::Integer=myid()) = Future(pid, RRID())
-Future(w::LocalProcess) = Future(w.id)
-Future(w::Worker) = Future(w.id)
-
-RemoteChannel(pid::Integer=myid()) = RemoteChannel{Channel{Any}}(pid, RRID())
-
-function RemoteChannel(f::Function, pid::Integer=myid())
-    remotecall_fetch(pid, f, RRID()) do f, rrid
-        rv=lookup_ref(rrid, f)
-        RemoteChannel{typeof(rv.c)}(myid(), rrid)
-    end
-end
-
-Base.eltype(::Type{RemoteChannel{T}}) where {T} = eltype(T)
-
-hash(r::AbstractRemoteRef, h::UInt) = hash(r.whence, hash(r.id, h))
-==(r::AbstractRemoteRef, s::AbstractRemoteRef) = (r.whence==s.whence && r.id==s.id)
-
-"""
-    remoteref_id(r::AbstractRemoteRef) -> RRID
-
-`Future`s and `RemoteChannel`s are identified by fields:
-
-* `where` - refers to the node where the underlying object/storage
-  referred to by the reference actually exists.
-
-* `whence` - refers to the node the remote reference was created from.
-  Note that this is different from the node where the underlying object
-  referred to actually exists. For example calling `RemoteChannel(2)`
-  from the master process would result in a `where` value of 2 and
-  a `whence` value of 1.
-
-* `id` is unique across all references created from the worker specified by `whence`.
-
-Taken together,  `whence` and `id` uniquely identify a reference across all workers.
-
-`remoteref_id` is a low-level API which returns a `RRID`
-object that wraps `whence` and `id` values of a remote reference.
-"""
-remoteref_id(r::AbstractRemoteRef) = RRID(r.whence, r.id)
-
-"""
-    channel_from_id(id) -> c
-
-A low-level API which returns the backing `AbstractChannel` for an `id` returned by
-[`remoteref_id`](@ref).
-The call is valid only on the node where the backing channel exists.
-"""
-function channel_from_id(id)
-    rv = lock(client_refs) do
-        return get(PGRP.refs, id, false)
-    end
-    if rv === false
-        throw(ErrorException("Local instance of remote reference not found"))
-    end
-    return rv.c
-end
-
-lookup_ref(rrid::RRID, f=def_rv_channel) = lookup_ref(PGRP, rrid, f)
-function lookup_ref(pg, rrid, f)
-    return lock(client_refs) do
-        rv = get(pg.refs, rrid, false)
-        if rv === false
-            # first we've heard of this ref
-            rv = RemoteValue(invokelatest(f))
-            pg.refs[rrid] = rv
-            push!(rv.clientset, rrid.whence)
-        end
-        return rv
-    end::RemoteValue
-end
-
-"""
-    isready(rr::Future)
-
-Determine whether a [`Future`](@ref) has a value stored to it.
-
-If the argument `Future` is owned by a different node, this call will block to wait for the answer.
-It is recommended to wait for `rr` in a separate task instead
-or to use a local [`Channel`](@ref) as a proxy:
-
-```julia
-p = 1
-f = Future(p)
-errormonitor(@async put!(f, remotecall_fetch(long_computation, p)))
-isready(f)  # will not block
-```
-"""
-function isready(rr::Future)
-    v_cache = @atomic rr.v
-    v_cache === nothing || return true
-
-    rid = remoteref_id(rr)
-    return if rr.where == myid()
-        isready(lookup_ref(rid).c)
-    else
-        remotecall_fetch(rid->isready(lookup_ref(rid).c), rr.where, rid)
-    end
-end
-
-"""
-    isready(rr::RemoteChannel, args...)
-
-Determine whether a [`RemoteChannel`](@ref) has a value stored to it.
-Note that this function can cause race conditions, since by the
-time you receive its result it may no longer be true. However,
-it can be safely used on a [`Future`](@ref) since they are assigned only once.
-"""
-function isready(rr::RemoteChannel, args...)
-    rid = remoteref_id(rr)
-    return if rr.where == myid()
-        isready(lookup_ref(rid).c, args...)
-    else
-        remotecall_fetch(rid->isready(lookup_ref(rid).c, args...), rr.where, rid)
-    end
-end
-
-del_client(rr::AbstractRemoteRef) = del_client(remoteref_id(rr), myid())
-
-del_client(id, client) = del_client(PGRP, id, client)
-function del_client(pg, id, client)
-    lock(client_refs) do
-        _del_client(pg, id, client)
-    end
-    nothing
-end
-
-function _del_client(pg, id, client)
-    rv = get(pg.refs, id, false)
-    if rv !== false
-        delete!(rv.clientset, client)
-        if isempty(rv.clientset)
-            delete!(pg.refs, id)
-            #print("$(myid()) collected $id\n")
-        end
-    end
-    nothing
-end
-
-function del_clients(pairs::Vector)
-    for p in pairs
-        del_client(p[1], p[2])
-    end
-end
-
-# The task below is coalescing the `flush_gc_msgs` call
-# across multiple producers, see `send_del_client`,
-# and `send_add_client`.
-# XXX: Is this worth the additional complexity?
-#      `flush_gc_msgs` has to iterate over all connected workers.
-const any_gc_flag = Threads.Condition()
-function start_gc_msgs_task()
-    errormonitor(
-        Threads.@spawn begin
-            while true
-                lock(any_gc_flag) do
-                    # this might miss events
-                    wait(any_gc_flag)
-                end
-                # Use invokelatest() so that custom message transport streams
-                # for workers can be defined in a newer world age than the Task
-                # which runs the loop here.
-                invokelatest(flush_gc_msgs) # handles throws internally
-            end
-        end
-    )
-end
-
-# Function can be called within a finalizer
-function send_del_client(rr)
-    if rr.where == myid()
-        del_client(rr)
-    elseif id_in_procs(rr.where) # process only if a valid worker
-        process_worker(rr)
-    end
-end
-
-function send_del_client_no_lock(rr)
-    # for gc context to avoid yields
-    if rr.where == myid()
-        _del_client(PGRP, remoteref_id(rr), myid())
-    elseif id_in_procs(rr.where) # process only if a valid worker
-        process_worker(rr)
-    end
-end
-
-function publish_del_msg!(w::Worker, msg)
-    lock(w.msg_lock) do
-        push!(w.del_msgs, msg)
-        @atomic w.gcflag = true
-    end
-    lock(any_gc_flag) do
-        notify(any_gc_flag)
-    end
-end
-
-function process_worker(rr)
-    w = worker_from_id(rr.where)::Worker
-    msg = (remoteref_id(rr), myid())
-
-    # Needs to aquire a lock on the del_msg queue
-    T = Threads.@spawn begin
-        publish_del_msg!($w, $msg)
-    end
-    Base.errormonitor(T)
-
-    return
-end
-
-function add_client(id, client)
-    lock(client_refs) do
-        rv = lookup_ref(id)
-        push!(rv.clientset, client)
-    end
-    nothing
-end
-
-function add_clients(pairs::Vector)
-    for p in pairs
-        add_client(p[1], p[2]...)
-    end
-end
-
-function send_add_client(rr::AbstractRemoteRef, i)
-    if rr.where == myid()
-        add_client(remoteref_id(rr), i)
-    elseif (i != rr.where) && id_in_procs(rr.where)
-        # don't need to send add_client if the message is already going
-        # to the processor that owns the remote ref. it will add_client
-        # itself inside deserialize().
-        w = worker_from_id(rr.where)
-        lock(w.msg_lock) do
-            push!(w.add_msgs, (remoteref_id(rr), i))
-            @atomic w.gcflag = true
-        end
-        lock(any_gc_flag) do
-            notify(any_gc_flag)
-        end
-    end
-end
-
-channel_type(rr::RemoteChannel{T}) where {T} = T
-
-function serialize(s::ClusterSerializer, f::Future)
-    v_cache = @atomic f.v
-    if v_cache === nothing
-        p = worker_id_from_socket(s.io)
-        (p !== f.where) && send_add_client(f, p)
-    end
-    invoke(serialize, Tuple{ClusterSerializer, Any}, s, f)
-end
-
-function serialize(s::ClusterSerializer, rr::RemoteChannel)
-    p = worker_id_from_socket(s.io)
-    (p !== rr.where) && send_add_client(rr, p)
-    invoke(serialize, Tuple{ClusterSerializer, Any}, s, rr)
-end
-
-function deserialize(s::ClusterSerializer, t::Type{<:Future})
-    fc = invoke(deserialize, Tuple{ClusterSerializer, DataType}, s, t) # deserialized copy
-    f2 = Future(fc.where, RRID(fc.whence, fc.id), fc.v) # ctor adds to client_refs table
-
-    # 1) send_add_client() is not executed when the ref is being serialized
-    #    to where it exists, hence do it here.
-    # 2) If we have received a 'fetch'ed Future or if the Future ctor found an
-    #    already 'fetch'ed instance in client_refs (Issue #25847), we should not
-    #    track it in the backing RemoteValue store.
-    f2v_cache = @atomic f2.v
-    if f2.where == myid() && f2v_cache === nothing
-        add_client(remoteref_id(f2), myid())
-    end
-    f2
-end
-
-function deserialize(s::ClusterSerializer, t::Type{<:RemoteChannel})
-    rr = invoke(deserialize, Tuple{ClusterSerializer, DataType}, s, t)
-    if rr.where == myid()
-        # send_add_client() is not executed when the ref is being
-        # serialized to where it exists
-        add_client(remoteref_id(rr), myid())
-    end
-    # call ctor to make sure this rr gets added to the client_refs table
-    RemoteChannel{channel_type(rr)}(rr.where, RRID(rr.whence, rr.id))
-end
-
-# Future and RemoteChannel are serializable only in a running cluster.
-# Serialize zeroed-out values to non ClusterSerializer objects
-function serialize(s::AbstractSerializer, ::Future)
-    zero_fut = Future((0,0,0,nothing))
-    invoke(serialize, Tuple{AbstractSerializer, Any}, s, zero_fut)
-end
-
-function serialize(s::AbstractSerializer, ::RemoteChannel)
-    zero_rc = RemoteChannel{Channel{Any}}((0,0,0))
-    invoke(serialize, Tuple{AbstractSerializer, Any}, s, zero_rc)
-end
-
-
-# make a thunk to call f on args in a way that simulates what would happen if
-# the function were sent elsewhere
-function local_remotecall_thunk(f, args, kwargs)
-    return ()->invokelatest(f, args...; kwargs...)
-end
-
-function remotecall(f, w::LocalProcess, args...; kwargs...)
-    rr = Future(w)
-    schedule_call(remoteref_id(rr), local_remotecall_thunk(f, args, kwargs))
-    return rr
-end
-
-function remotecall(f, w::Worker, args...; kwargs...)
-    rr = Future(w)
-    send_msg(w, MsgHeader(remoteref_id(rr)), CallMsg{:call}(f, args, kwargs))
-    return rr
-end
-
-"""
-    remotecall(f, id::Integer, args...; kwargs...) -> Future
-
-Call a function `f` asynchronously on the given arguments on the specified process.
-Return a [`Future`](@ref).
-Keyword arguments, if any, are passed through to `f`.
-"""
-remotecall(f, id::Integer, args...; kwargs...) = remotecall(f, worker_from_id(id), args...; kwargs...)
-
-function remotecall_fetch(f, w::LocalProcess, args...; kwargs...)
-    v=run_work_thunk(local_remotecall_thunk(f,args, kwargs), false)
-    return isa(v, RemoteException) ? throw(v) : v
-end
-
-function remotecall_fetch(f, w::Worker, args...; kwargs...)
-    # can be weak, because the program will have no way to refer to the Ref
-    # itself, it only gets the result.
-    oid = RRID()
-    rv = lookup_ref(oid)
-    rv.waitingfor = w.id
-    send_msg(w, MsgHeader(RRID(0,0), oid), CallMsg{:call_fetch}(f, args, kwargs))
-    v = take!(rv)
-    lock(client_refs) do
-        delete!(PGRP.refs, oid)
-    end
-    return isa(v, RemoteException) ? throw(v) : v
-end
-
-"""
-    remotecall_fetch(f, id::Integer, args...; kwargs...)
-
-Perform `fetch(remotecall(...))` in one message.
-Keyword arguments, if any, are passed through to `f`.
-Any remote exceptions are captured in a
-[`RemoteException`](@ref) and thrown.
-
-See also [`fetch`](@ref) and [`remotecall`](@ref).
-
-# Examples
-```julia-repl
-\$ julia -p 2
-
-julia> remotecall_fetch(sqrt, 2, 4)
-2.0
-
-julia> remotecall_fetch(sqrt, 2, -4)
-ERROR: On worker 2:
-DomainError with -4.0:
-sqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x)).
-...
-```
-"""
-remotecall_fetch(f, id::Integer, args...; kwargs...) =
-    remotecall_fetch(f, worker_from_id(id), args...; kwargs...)
-
-remotecall_wait(f, w::LocalProcess, args...; kwargs...) = wait(remotecall(f, w, args...; kwargs...))
-
-function remotecall_wait(f, w::Worker, args...; kwargs...)
-    prid = RRID()
-    rv = lookup_ref(prid)
-    rv.waitingfor = w.id
-    rr = Future(w)
-    send_msg(w, MsgHeader(remoteref_id(rr), prid), CallWaitMsg(f, args, kwargs))
-    v = fetch(rv.c)
-    lock(client_refs) do
-        delete!(PGRP.refs, prid)
-    end
-    isa(v, RemoteException) && throw(v)
-    return rr
-end
-
-"""
-    remotecall_wait(f, id::Integer, args...; kwargs...)
-
-Perform a faster `wait(remotecall(...))` in one message on the `Worker` specified by worker id `id`.
-Keyword arguments, if any, are passed through to `f`.
-
-See also [`wait`](@ref) and [`remotecall`](@ref).
-"""
-remotecall_wait(f, id::Integer, args...; kwargs...) =
-    remotecall_wait(f, worker_from_id(id), args...; kwargs...)
-
-function remote_do(f, w::LocalProcess, args...; kwargs...)
-    # the LocalProcess version just performs in local memory what a worker
-    # does when it gets a :do message.
-    # same for other messages on LocalProcess.
-    thk = local_remotecall_thunk(f, args, kwargs)
-    schedule(Task(thk))
-    nothing
-end
-
-function remote_do(f, w::Worker, args...; kwargs...)
-    send_msg(w, MsgHeader(), RemoteDoMsg(f, args, kwargs))
-    nothing
-end
-
-
-"""
-    remote_do(f, id::Integer, args...; kwargs...) -> nothing
-
-Executes `f` on worker `id` asynchronously.
-Unlike [`remotecall`](@ref), it does not store the
-result of computation, nor is there a way to wait for its completion.
-
-A successful invocation indicates that the request has been accepted for execution on
-the remote node.
-
-While consecutive `remotecall`s to the same worker are serialized in the order they are
-invoked, the order of executions on the remote worker is undetermined. For example,
-`remote_do(f1, 2); remotecall(f2, 2); remote_do(f3, 2)` will serialize the call
-to `f1`, followed by `f2` and `f3` in that order. However, it is not guaranteed that `f1`
-is executed before `f3` on worker 2.
-
-Any exceptions thrown by `f` are printed to [`stderr`](@ref) on the remote worker.
-
-Keyword arguments, if any, are passed through to `f`.
-"""
-remote_do(f, id::Integer, args...; kwargs...) = remote_do(f, worker_from_id(id), args...; kwargs...)
-
-# have the owner of rr call f on it
-function call_on_owner(f, rr::AbstractRemoteRef, args...)
-    rid = remoteref_id(rr)
-    if rr.where == myid()
-        f(rid, args...)
-    else
-        remotecall_fetch(f, rr.where, rid, args...)
-    end
-end
-
-function wait_ref(rid, caller, args...)
-    v = fetch_ref(rid, args...)
-    if isa(v, RemoteException)
-        if myid() == caller
-            throw(v)
-        else
-            return v
-        end
-    end
-    nothing
-end
-
-"""
-    wait(r::Future)
-
-Wait for a value to become available for the specified [`Future`](@ref).
-"""
-wait(r::Future) = (v_cache = @atomic r.v; v_cache !== nothing && return r; call_on_owner(wait_ref, r, myid()); r)
-
-"""
-    wait(r::RemoteChannel, args...)
-
-Wait for a value to become available on the specified [`RemoteChannel`](@ref).
-"""
-wait(r::RemoteChannel, args...) = (call_on_owner(wait_ref, r, myid(), args...); r)
-
-"""
-    fetch(x::Future)
-
-Wait for and get the value of a [`Future`](@ref). The fetched value is cached locally.
-Further calls to `fetch` on the same reference return the cached value. If the remote value
-is an exception, throws a [`RemoteException`](@ref) which captures the remote exception and backtrace.
-"""
-function fetch(r::Future)
-    v_cache = @atomic r.v
-    v_cache !== nothing && return something(v_cache)
-
-    if r.where == myid()
-        rv, v_cache = @lock r.lock begin
-            v_cache = @atomic :monotonic r.v
-            rv = v_cache === nothing ? lookup_ref(remoteref_id(r)) : nothing
-            rv, v_cache
-        end
-
-        if v_cache !== nothing
-            return something(v_cache)
-        else
-            v_local = fetch(rv.c)
-        end
-    else
-        v_local = call_on_owner(fetch_ref, r)
-    end
-
-    v_cache = @atomic r.v
-
-    if v_cache === nothing # call_on_owner case
-        v_old, status = @lock r.lock begin
-            @atomicreplace r.v nothing => Some(v_local)
-        end
-        # status == true - when value obtained through call_on_owner
-        # status == false - any other situation: atomicreplace fails, because by the time the lock is obtained cache will be populated
-        # why? local put! performs caching and putting into channel under r.lock
-
-        # for local put! use the cached value, for call_on_owner cases just take the v_local as it was just cached in r.v
-
-        # remote calls getting the value from `call_on_owner` used to return the value directly without wrapping it in `Some(x)`
-        # so we're doing the same thing here
-        if status
-            send_del_client(r)
-            return v_local
-        else # this `v_cache` is returned at the end of the function
-            v_cache = v_old
-        end
-    end
-
-    send_del_client(r)
-    something(v_cache)
-end
-
-fetch_ref(rid, args...) = fetch(lookup_ref(rid).c, args...)
-
-"""
-    fetch(c::RemoteChannel)
-
-Wait for and get a value from a [`RemoteChannel`](@ref). Exceptions raised are the
-same as for a [`Future`](@ref). Does not remove the item fetched.
-"""
-fetch(r::RemoteChannel, args...) = call_on_owner(fetch_ref, r, args...)::eltype(r)
-
-isready(rv::RemoteValue, args...) = isready(rv.c, args...)
-
-"""
-    put!(rr::Future, v)
-
-Store a value to a [`Future`](@ref) `rr`.
-`Future`s are write-once remote references.
-A `put!` on an already set `Future` throws an `Exception`.
-All asynchronous remote calls return `Future`s and set the
-value to the return value of the call upon completion.
-"""
-function put!(r::Future, v)
-    if r.where == myid()
-        rid = remoteref_id(r)
-        rv = lookup_ref(rid)
-        isready(rv) && error("Future can be set only once")
-        @lock r.lock begin
-            put!(rv, v) # this notifies the tasks waiting on the channel in fetch
-            set_future_cache(r, v) # set the cache before leaving the lock, so that the notified tasks already see it cached
-        end
-        del_client(rid, myid())
-    else
-        @lock r.lock begin # same idea as above if there were any local tasks fetching on this Future
-            call_on_owner(put_future, r, v, myid())
-            set_future_cache(r, v)
-        end
-    end
-    r
-end
-
-function set_future_cache(r::Future, v)
-    _, ok = @atomicreplace r.v nothing => Some(v)
-    ok || error("internal consistency error detected for Future")
-end
-
-function put_future(rid, v, caller)
-    rv = lookup_ref(rid)
-    isready(rv) && error("Future can be set only once")
-    put!(rv, v)
-    # The caller has the value and hence can be removed from the remote store.
-    del_client(rid, caller)
-    nothing
-end
-
-
-put!(rv::RemoteValue, args...) = put!(rv.c, args...)
-function put_ref(rid, caller, args...)
-    rv = lookup_ref(rid)
-    put!(rv, args...)
-    if myid() == caller && rv.synctake !== nothing
-        # Wait till a "taken" value is serialized out - github issue #29932
-        lock(rv.synctake)
-        unlock(rv.synctake)
-    end
-    nothing
-end
-
-"""
-    put!(rr::RemoteChannel, args...)
-
-Store a set of values to the [`RemoteChannel`](@ref).
-If the channel is full, blocks until space is available.
-Return the first argument.
-"""
-put!(rr::RemoteChannel, args...) = (call_on_owner(put_ref, rr, myid(), args...); rr)
-
-# take! is not supported on Future
-
-take!(rv::RemoteValue, args...) = take!(rv.c, args...)
-function take_ref(rid, caller, args...)
-    rv = lookup_ref(rid)
-    synctake = false
-    if myid() != caller && rv.synctake !== nothing
-        # special handling for local put! / remote take! on unbuffered channel
-        # github issue #29932
-        synctake = true
-        lock(rv.synctake)
-    end
-
-    v = try
-        take!(rv, args...)
-    catch e
-        # avoid unmatched unlock when exception occurs
-        # github issue #33972
-        synctake && unlock(rv.synctake)
-        rethrow(e)
-    end
-
-    isa(v, RemoteException) && (myid() == caller) && throw(v)
-
-    if synctake
-        return SyncTake(v, rv)
-    else
-        return v
-    end
-end
-
-"""
-    take!(rr::RemoteChannel, args...)
-
-Fetch value(s) from a [`RemoteChannel`](@ref) `rr`,
-removing the value(s) in the process.
-"""
-take!(rr::RemoteChannel, args...) = call_on_owner(take_ref, rr, myid(), args...)::eltype(rr)
-
-# close and isopen are not supported on Future
-
-close_ref(rid) = (close(lookup_ref(rid).c); nothing)
-close(rr::RemoteChannel) = call_on_owner(close_ref, rr)
-
-isopen_ref(rid) = isopen(lookup_ref(rid).c)
-isopen(rr::RemoteChannel) = call_on_owner(isopen_ref, rr)
-
-getindex(r::RemoteChannel) = fetch(r)
-getindex(r::Future) = fetch(r)
-
-getindex(r::Future, args...) = getindex(fetch(r), args...)
-function getindex(r::RemoteChannel, args...)
-    if r.where == myid()
-        return getindex(fetch(r), args...)
-    end
-    return remotecall_fetch(getindex, r.where, r, args...)
-end
diff --git a/stdlib/Distributed/src/workerpool.jl b/stdlib/Distributed/src/workerpool.jl
deleted file mode 100644
index 0cada2db103de..0000000000000
--- a/stdlib/Distributed/src/workerpool.jl
+++ /dev/null
@@ -1,359 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    AbstractWorkerPool
-
-Supertype for worker pools such as [`WorkerPool`](@ref) and [`CachingPool`](@ref).
-An `AbstractWorkerPool` should implement:
-  - [`push!`](@ref) - add a new worker to the overall pool (available + busy)
-  - [`put!`](@ref) - put back a worker to the available pool
-  - [`take!`](@ref) - take a worker from the available pool (to be used for remote function execution)
-  - [`length`](@ref) - number of workers available in the overall pool
-  - [`isready`](@ref) - return false if a `take!` on the pool would block, else true
-
-The default implementations of the above (on a `AbstractWorkerPool`) require fields
-  - `channel::Channel{Int}`
-  - `workers::Set{Int}`
-where `channel` contains free worker pids and `workers` is the set of all workers associated with this pool.
-"""
-abstract type AbstractWorkerPool end
-
-mutable struct WorkerPool <: AbstractWorkerPool
-    channel::Channel{Int}
-    workers::Set{Int}
-    ref::RemoteChannel
-
-    WorkerPool(c::Channel, ref::RemoteChannel) = new(c, Set{Int}(), ref)
-end
-
-function WorkerPool()
-    wp = WorkerPool(Channel{Int}(typemax(Int)), RemoteChannel())
-    put!(wp.ref, WeakRef(wp))
-    wp
-end
-
-"""
-    WorkerPool(workers::Union{Vector{Int},AbstractRange{Int}})
-
-Create a `WorkerPool` from a vector or range of worker ids.
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> WorkerPool([2, 3])
-WorkerPool(Channel{Int64}(sz_max:9223372036854775807,sz_curr:2), Set([2, 3]), RemoteChannel{Channel{Any}}(1, 1, 6))
-
-julia> WorkerPool(2:4)
-WorkerPool(Channel{Int64}(sz_max:9223372036854775807,sz_curr:2), Set([4, 2, 3]), RemoteChannel{Channel{Any}}(1, 1, 7))
-```
-"""
-function WorkerPool(workers::Union{Vector{Int},AbstractRange{Int}})
-    pool = WorkerPool()
-    foreach(w->push!(pool, w), workers)
-    return pool
-end
-
-# On workers where this pool has been serialized to, instantiate with a dummy local channel.
-WorkerPool(ref::RemoteChannel) = WorkerPool(Channel{Int}(1), ref)
-
-function serialize(S::AbstractSerializer, pool::WorkerPool)
-    # Allow accessing a worker pool from other processors. When serialized,
-    # initialize the `ref` to point to self and only send the ref.
-    # Other workers will forward all put!, take!, calls to the process owning
-    # the ref (and hence the pool).
-    Serialization.serialize_type(S, typeof(pool))
-    serialize(S, pool.ref)
-end
-
-deserialize(S::AbstractSerializer, t::Type{T}) where {T<:WorkerPool} = T(deserialize(S))
-
-wp_local_push!(pool::AbstractWorkerPool, w::Int) = (push!(pool.workers, w); put!(pool.channel, w); pool)
-wp_local_length(pool::AbstractWorkerPool) = length(pool.workers)
-wp_local_isready(pool::AbstractWorkerPool) = isready(pool.channel)
-
-function wp_local_put!(pool::AbstractWorkerPool, w::Int)
-    # In case of default_worker_pool, the master is implictly considered a worker, i.e.,
-    # it is not present in pool.workers.
-    # Confirm the that the worker is part of a pool before making it available.
-    w in pool.workers && put!(pool.channel, w)
-    w
-end
-
-function wp_local_workers(pool::AbstractWorkerPool)
-    if length(pool) == 0 && pool === default_worker_pool()
-        return [1]
-    else
-        return collect(pool.workers)
-    end
-end
-
-function wp_local_nworkers(pool::AbstractWorkerPool)
-    if length(pool) == 0 && pool === default_worker_pool()
-        return 1
-    else
-        return length(pool.workers)
-    end
-end
-
-function wp_local_take!(pool::AbstractWorkerPool)
-    # Find an active worker
-    worker = 0
-    while true
-        if length(pool) == 0
-            if pool === default_worker_pool()
-                # No workers, the master process is used as a worker
-                worker = 1
-                break
-            else
-                throw(ErrorException("No active worker available in pool"))
-            end
-        end
-
-        worker = take!(pool.channel)
-        if id_in_procs(worker)
-            break
-        else
-            delete!(pool.workers, worker) # Remove invalid worker from pool
-        end
-    end
-    return worker
-end
-
-function remotecall_pool(rc_f, f, pool::AbstractWorkerPool, args...; kwargs...)
-    worker = take!(pool)
-    try
-        rc_f(f, worker, args...; kwargs...)
-    finally
-        put!(pool, worker)
-    end
-end
-
-# Check if pool is local or remote and forward calls if required.
-# NOTE: remotecall_fetch does it automatically, but this will be more efficient as
-# it avoids the overhead associated with a local remotecall.
-
-for (func, rt) = ((:length, Int), (:isready, Bool), (:workers, Vector{Int}), (:nworkers, Int), (:take!, Int))
-    func_local = Symbol(string("wp_local_", func))
-    @eval begin
-        function ($func)(pool::WorkerPool)
-            if pool.ref.where != myid()
-                return remotecall_fetch(ref->($func_local)(fetch(ref).value), pool.ref.where, pool.ref)::$rt
-            else
-                return ($func_local)(pool)
-            end
-        end
-
-        # default impl
-        ($func)(pool::AbstractWorkerPool) = ($func_local)(pool)
-    end
-end
-
-for func = (:push!, :put!)
-    func_local = Symbol(string("wp_local_", func))
-    @eval begin
-        function ($func)(pool::WorkerPool, w::Int)
-            if pool.ref.where != myid()
-                return remotecall_fetch((ref, w)->($func_local)(fetch(ref).value, w), pool.ref.where, pool.ref, w)
-            else
-                return ($func_local)(pool, w)
-            end
-        end
-
-        # default impl
-        ($func)(pool::AbstractWorkerPool, w::Int) = ($func_local)(pool, w)
-    end
-end
-
-
-"""
-    remotecall(f, pool::AbstractWorkerPool, args...; kwargs...) -> Future
-
-[`WorkerPool`](@ref) variant of `remotecall(f, pid, ....)`. Wait for and take a free worker from `pool` and perform a `remotecall` on it.
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> wp = WorkerPool([2, 3]);
-
-julia> A = rand(3000);
-
-julia> f = remotecall(maximum, wp, A)
-Future(2, 1, 6, nothing)
-```
-In this example, the task ran on pid 2, called from pid 1.
-"""
-remotecall(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remotecall, f, pool, args...; kwargs...)
-
-
-"""
-    remotecall_wait(f, pool::AbstractWorkerPool, args...; kwargs...) -> Future
-
-[`WorkerPool`](@ref) variant of `remotecall_wait(f, pid, ....)`. Wait for and take a free worker from `pool` and
-perform a `remotecall_wait` on it.
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> wp = WorkerPool([2, 3]);
-
-julia> A = rand(3000);
-
-julia> f = remotecall_wait(maximum, wp, A)
-Future(3, 1, 9, nothing)
-
-julia> fetch(f)
-0.9995177101692958
-```
-"""
-remotecall_wait(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remotecall_wait, f, pool, args...; kwargs...)
-
-
-"""
-    remotecall_fetch(f, pool::AbstractWorkerPool, args...; kwargs...) -> result
-
-[`WorkerPool`](@ref) variant of `remotecall_fetch(f, pid, ....)`. Waits for and takes a free worker from `pool` and
-performs a `remotecall_fetch` on it.
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> wp = WorkerPool([2, 3]);
-
-julia> A = rand(3000);
-
-julia> remotecall_fetch(maximum, wp, A)
-0.9995177101692958
-```
-"""
-remotecall_fetch(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remotecall_fetch, f, pool, args...; kwargs...)
-
-"""
-    remote_do(f, pool::AbstractWorkerPool, args...; kwargs...) -> nothing
-
-[`WorkerPool`](@ref) variant of `remote_do(f, pid, ....)`. Wait for and take a free worker from `pool` and
-perform a `remote_do` on it.
-"""
-remote_do(f, pool::AbstractWorkerPool, args...; kwargs...) = remotecall_pool(remote_do, f, pool, args...; kwargs...)
-
-const _default_worker_pool = Ref{Union{WorkerPool, Nothing}}(nothing)
-
-"""
-    default_worker_pool()
-
-[`WorkerPool`](@ref) containing idle [`workers`](@ref) - used by `remote(f)` and [`pmap`](@ref) (by default).
-
-# Examples
-```julia-repl
-\$ julia -p 3
-
-julia> default_worker_pool()
-WorkerPool(Channel{Int64}(sz_max:9223372036854775807,sz_curr:3), Set([4, 2, 3]), RemoteChannel{Channel{Any}}(1, 1, 4))
-```
-"""
-function default_worker_pool()
-    # On workers retrieve the default worker pool from the master when accessed
-    # for the first time
-    if _default_worker_pool[] === nothing
-        if myid() == 1
-            _default_worker_pool[] = WorkerPool()
-        else
-            _default_worker_pool[] = remotecall_fetch(()->default_worker_pool(), 1)
-        end
-    end
-    return _default_worker_pool[]
-end
-
-"""
-    remote([p::AbstractWorkerPool], f) -> Function
-
-Return an anonymous function that executes function `f` on an available worker
-(drawn from [`WorkerPool`](@ref) `p` if provided) using [`remotecall_fetch`](@ref).
-"""
-remote(f) = (args...; kwargs...)->remotecall_fetch(f, default_worker_pool(), args...; kwargs...)
-remote(p::AbstractWorkerPool, f) = (args...; kwargs...)->remotecall_fetch(f, p, args...; kwargs...)
-
-mutable struct CachingPool <: AbstractWorkerPool
-    channel::Channel{Int}
-    workers::Set{Int}
-
-    # Mapping between a tuple (worker_id, f) and a RemoteChannel
-    map_obj2ref::IdDict{Tuple{Int, Function}, RemoteChannel}
-
-    function CachingPool()
-        wp = new(Channel{Int}(typemax(Int)), Set{Int}(), IdDict{Tuple{Int, Function}, RemoteChannel}())
-        finalizer(clear!, wp)
-        wp
-    end
-end
-
-serialize(s::AbstractSerializer, cp::CachingPool) = throw(ErrorException("CachingPool objects are not serializable."))
-
-"""
-    CachingPool(workers::Vector{Int})
-
-An implementation of an `AbstractWorkerPool`.
-[`remote`](@ref), [`remotecall_fetch`](@ref),
-[`pmap`](@ref) (and other remote calls which execute functions remotely)
-benefit from caching the serialized/deserialized functions on the worker nodes,
-especially closures (which may capture large amounts of data).
-
-The remote cache is maintained for the lifetime of the returned `CachingPool` object.
-To clear the cache earlier, use `clear!(pool)`.
-
-For global variables, only the bindings are captured in a closure, not the data.
-`let` blocks can be used to capture global data.
-
-# Examples
-```julia
-const foo = rand(10^8);
-wp = CachingPool(workers())
-let foo = foo
-    pmap(i -> sum(foo) + i, wp, 1:100);
-end
-```
-
-The above would transfer `foo` only once to each worker.
-
-"""
-function CachingPool(workers::Vector{Int})
-    pool = CachingPool()
-    for w in workers
-        push!(pool, w)
-    end
-    return pool
-end
-
-"""
-    clear!(pool::CachingPool) -> pool
-
-Removes all cached functions from all participating workers.
-"""
-function clear!(pool::CachingPool)
-    for (_,rr) in pool.map_obj2ref
-        finalize(rr)
-    end
-    empty!(pool.map_obj2ref)
-    pool
-end
-
-exec_from_cache(rr::RemoteChannel, args...; kwargs...) = fetch(rr)(args...; kwargs...)
-function exec_from_cache(f_ref::Tuple{Function, RemoteChannel}, args...; kwargs...)
-    put!(f_ref[2], f_ref[1])        # Cache locally
-    f_ref[1](args...; kwargs...)
-end
-
-function remotecall_pool(rc_f, f, pool::CachingPool, args...; kwargs...)
-    worker = take!(pool)
-    f_ref = get(pool.map_obj2ref, (worker, f), (f, RemoteChannel(worker)))
-    isa(f_ref, Tuple) && (pool.map_obj2ref[(worker, f)] = f_ref[2])   # Add to tracker
-
-    try
-        rc_f(exec_from_cache, worker, f_ref, args...; kwargs...)
-    finally
-        put!(pool, worker)
-    end
-end
diff --git a/stdlib/Distributed/test/distributed_exec.jl b/stdlib/Distributed/test/distributed_exec.jl
deleted file mode 100644
index 0be94b28e5da5..0000000000000
--- a/stdlib/Distributed/test/distributed_exec.jl
+++ /dev/null
@@ -1,1882 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test, Distributed, Random, Serialization, Sockets
-import Distributed: launch, manage
-
-@test cluster_cookie() isa String
-
-include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
-
-@test Distributed.extract_imports(:(begin; import Foo, Bar; let; using Baz; end; end)) ==
-      Any[:(import Foo, Bar), :(using Baz)]
-
-# Test a few "remote" invocations when no workers are present
-@test remote(myid)() == 1
-@test pmap(identity, 1:100) == [1:100...]
-@test 100 == @distributed (+) for i in 1:100
-        1
-    end
-
-addprocs_with_testenv(4)
-@test nprocs() == 5
-
-# distributed loading of packages
-
-# setup
-@everywhere begin
-    old_act_proj = Base.ACTIVE_PROJECT[]
-    pushfirst!(Base.LOAD_PATH, "@")
-    Base.ACTIVE_PROJECT[] = joinpath(Sys.BINDIR, "..", "share", "julia", "test", "TestPkg")
-end
-
-# cause precompilation of TestPkg to avoid race condition
-Base.compilecache(Base.identify_package("TestPkg"))
-
-@everywhere using TestPkg
-@everywhere using TestPkg
-
-@everywhere begin
-    Base.ACTIVE_PROJECT[] = old_act_proj
-    popfirst!(Base.LOAD_PATH)
-end
-
-@everywhere using Test, Random, LinearAlgebra
-
-id_me = myid()
-id_other = filter(x -> x != id_me, procs())[rand(1:(nprocs()-1))]
-
-# Test role
-@everywhere using Distributed
-@test Distributed.myrole() === :master
-for wid = workers()
-    wrole = remotecall_fetch(wid) do
-        Distributed.myrole()
-    end
-    @test wrole === :worker
-end
-
-# Test remote()
-let
-    pool = default_worker_pool()
-
-    count = 0
-    count_condition = Condition()
-
-    function remote_wait(c)
-        @async_logerr begin
-            count += 1
-            remote(take!)(c)
-            count -= 1
-            notify(count_condition)
-        end
-        yield()
-    end
-
-    testchannels = [RemoteChannel() for i in 1:nworkers()]
-    testcount = 0
-    @test isready(pool) == true
-    for c in testchannels
-        @test count == testcount
-        remote_wait(c)
-        testcount += 1
-    end
-    @test count == testcount
-    @test isready(pool) == false
-
-    for c in testchannels
-        @test count == testcount
-        put!(c, "foo")
-        testcount -= 1
-        (count == testcount) || wait(count_condition)
-        @test count == testcount
-        @test isready(pool) == true
-    end
-
-    @test count == 0
-
-    for c in testchannels
-        @test count == testcount
-        remote_wait(c)
-        testcount += 1
-    end
-    @test count == testcount
-    @test isready(pool) == false
-
-    for c in reverse(testchannels)
-        @test count == testcount
-        put!(c, "foo")
-        testcount -= 1
-        (count == testcount) || wait(count_condition)
-        @test count == testcount
-        @test isready(pool) == true
-    end
-
-    @test count == 0
-end
-
-# Test Futures
-function testf(id)
-    f=Future(id)
-    @test isready(f) == false
-    @test f.v === nothing
-    put!(f, :OK)
-    @test isready(f) == true
-    @test f.v !== nothing
-
-    @test_throws ErrorException put!(f, :OK) # Cannot put! to a already set future
-    @test_throws MethodError take!(f) # take! is unsupported on a Future
-
-    @test fetch(f) == :OK
-end
-
-testf(id_me)
-testf(id_other)
-
-function poll_while(f::Function; timeout_seconds::Integer = 120)
-    start_time = time_ns()
-    while f()
-        sleep(1)
-        if ( ( time_ns() - start_time )/1e9 ) > timeout_seconds
-            @error "Timed out" timeout_seconds
-            return false
-        end
-    end
-    return true
-end
-
-function _getenv_include_thread_unsafe()
-    environment_variable_name = "JULIA_TEST_INCLUDE_THREAD_UNSAFE"
-    default_value = "false"
-    environment_variable_value = strip(get(ENV, environment_variable_name, default_value))
-    b = parse(Bool, environment_variable_value)::Bool
-    return b
-end
-const _env_include_thread_unsafe = _getenv_include_thread_unsafe()
-function include_thread_unsafe_tests()
-    if Threads.nthreads() > 1
-        if _env_include_thread_unsafe
-            return true
-        end
-        msg = "Skipping a thread-unsafe test because `Threads.nthreads() > 1`"
-        @warn msg Threads.nthreads()
-        Test.@test_broken false
-        return false
-    end
-    return true
-end
-
-# Distributed GC tests for Futures
-function test_futures_dgc(id)
-    f = remotecall(myid, id)
-    fid = remoteref_id(f)
-
-    # remote value should be deleted after a fetch
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid) == true
-    @test f.v === nothing
-    @test fetch(f) == id
-    @test f.v !== nothing
-    yield(); # flush gc msgs
-    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid))
-
-    # if unfetched, it should be deleted after a finalize
-    f = remotecall(myid, id)
-    fid = remoteref_id(f)
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid) == true
-    @test f.v === nothing
-    finalize(f)
-    yield(); # flush gc msgs
-    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid))
-end
-
-test_futures_dgc(id_me)
-test_futures_dgc(id_other)
-
-# if sent to another worker, it should not be deleted till all references are fetched.
-wid1 = workers()[1]
-wid2 = workers()[2]
-f = remotecall(myid, wid1)
-fid = remoteref_id(f)
-
-fstore = RemoteChannel(wid2)
-put!(fstore, f)
-
-@test fetch(f) == wid1
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == true
-remotecall_fetch(r->(fetch(fetch(r)); yield()), wid2, fstore)
-sleep(0.5) # to ensure that wid2 gc messages have been executed on wid1
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == false
-
-# put! should release remote reference since it would have been cached locally
-f = Future(wid1)
-fid = remoteref_id(f)
-
-# should not be created remotely till accessed
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == false
-# create it remotely
-isready(f)
-
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == true
-put!(f, :OK)
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == false
-@test fetch(f) == :OK
-
-# RemoteException should be thrown on a put! when another process has set the value
-f = Future(wid1)
-fid = remoteref_id(f)
-
-fstore = RemoteChannel(wid2)
-put!(fstore, f) # send f to wid2
-put!(f, :OK) # set value from master
-
-@test remotecall_fetch(k->haskey(Distributed.PGRP.refs, k), wid1, fid) == true
-
-testval = remotecall_fetch(wid2, fstore) do x
-    try
-        put!(fetch(x), :OK)
-        return 0
-    catch e
-        if isa(e, RemoteException)
-            return 1
-        else
-            return 2
-        end
-    end
-end
-@test testval == 1
-
-# Issue number #25847
-@everywhere function f25847(ref)
-    fetch(ref)
-    return true
-end
-
-f = remotecall_wait(identity, id_other, ones(10))
-rrid = Distributed.RRID(f.whence, f.id)
-remotecall_fetch(f25847, id_other, f)
-@test BitSet([id_me]) == remotecall_fetch(()->Distributed.PGRP.refs[rrid].clientset, id_other)
-
-remotecall_fetch(f25847, id_other, f)
-@test BitSet([id_me]) == remotecall_fetch(()->Distributed.PGRP.refs[rrid].clientset, id_other)
-
-finalize(f)
-yield() # flush gc msgs
-@test poll_while(() -> remotecall_fetch(chk_rrid->(yield(); haskey(Distributed.PGRP.refs, chk_rrid)), id_other, rrid))
-
-# Distributed GC tests for RemoteChannels
-function test_remoteref_dgc(id)
-    rr = RemoteChannel(id)
-    put!(rr, :OK)
-    rrid = remoteref_id(rr)
-
-    # remote value should be deleted after finalizing the ref
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid) == true
-    @test fetch(rr) == :OK
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid) == true
-    finalize(rr)
-    yield(); # flush gc msgs
-    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid))
-end
-test_remoteref_dgc(id_me)
-test_remoteref_dgc(id_other)
-
-# if sent to another worker, it should not be deleted till the other worker has also finalized.
-let wid1 = workers()[1],
-    wid2 = workers()[2],
-    rr = RemoteChannel(wid1),
-    rrid = remoteref_id(rr),
-    fstore = RemoteChannel(wid2)
-
-    put!(fstore, rr)
-    if include_thread_unsafe_tests()
-        @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
-    end
-    finalize(rr) # finalize locally
-    yield() # flush gc msgs
-    if include_thread_unsafe_tests()
-        @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
-    end
-    remotecall_fetch(r -> (finalize(take!(r)); yield(); nothing), wid2, fstore) # finalize remotely
-    sleep(0.5) # to ensure that wid2 messages have been executed on wid1
-    @test poll_while(() -> remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid))
-end
-
-# Tests for issue #23109 - should not hang.
-f = @spawnat :any rand(1, 1)
-@Base.Experimental.sync begin
-    for _ in 1:10
-        @async fetch(f)
-    end
-end
-
-wid1, wid2 = workers()[1:2]
-f = @spawnat wid1 rand(1,1)
-@Base.Experimental.sync begin
-    @async fetch(f)
-    @async remotecall_fetch(()->fetch(f), wid2)
-end
-
-
-@test fetch(@spawnat id_other myid()) == id_other
-@test (@fetchfrom id_other myid()) == id_other
-
-pids=[]
-for i in 1:nworkers()
-    push!(pids, @fetch myid())
-end
-@test sort(pids) == sort(workers())
-
-
-# test getindex on Futures and RemoteChannels
-function test_indexing(rr)
-    a = rand(5,5)
-    put!(rr, a)
-    @test rr[2,3] == a[2,3]
-    @test rr[] == a
-end
-
-test_indexing(Future())
-test_indexing(Future(id_other))
-test_indexing(RemoteChannel())
-test_indexing(RemoteChannel(id_other))
-
-# Test ser/deser to non-ClusterSerializer objects.
-function test_regular_io_ser(ref::Distributed.AbstractRemoteRef)
-    io = IOBuffer()
-    serialize(io, ref)
-    seekstart(io)
-    ref2 = deserialize(io)
-    for fld in fieldnames(typeof(ref))
-        v = getfield(ref2, fld)
-        if isa(v, Number)
-            @test v === zero(typeof(v))
-        elseif fld == :lock
-            @test v isa ReentrantLock
-            @test !islocked(v)
-        elseif v !== nothing
-            error(string("Add test for field ", fld))
-        end
-    end
-end
-
-test_regular_io_ser(Future())
-test_regular_io_ser(RemoteChannel())
-
-# Test @distributed load balancing - all processors should get either M or M+1
-# iterations out of the loop range for some M.
-ids = @distributed((a,b)->[a;b], for i=1:7; myid(); end)
-workloads = Int[sum(ids .== i) for i in 2:nprocs()]
-@test maximum(workloads) - minimum(workloads) <= 1
-
-# @distributed reduction should work even with very short ranges
-@test @distributed(+, for i=1:2; i; end) == 3
-
-@test_throws ArgumentError sleep(-1)
-@test_throws ArgumentError timedwait(()->false, 0.1, pollint=-0.5)
-
-# specify pids for pmap
-@test sort(workers()[1:2]) == sort(unique(pmap(x->(sleep(0.1);myid()), WorkerPool(workers()[1:2]), 1:10)))
-
-# Testing buffered  and unbuffered reads
-# This large array should write directly to the socket
-a = fill(1, 10^6)
-@test a == remotecall_fetch((x)->x, id_other, a)
-
-# Not a bitstype, should be buffered
-s = [randstring() for x in 1:10^5]
-@test s == remotecall_fetch((x)->x, id_other, s)
-
-#large number of small requests
-num_small_requests = 10000
-@test fill(id_other, num_small_requests) == [remotecall_fetch(myid, id_other) for i in 1:num_small_requests]
-
-# test parallel sends of large arrays from multiple tasks to the same remote worker
-ntasks = 10
-rr_list = [Channel(1) for x in 1:ntasks]
-
-for rr in rr_list
-    local rr
-    let rr = rr
-        @async try
-            for i in 1:10
-                a = rand(2*10^5)
-                @test a == remotecall_fetch(x->x, id_other, a)
-                yield()
-            end
-            put!(rr, :OK)
-        catch
-            put!(rr, :ERROR)
-        end
-    end
-end
-
-@test [fetch(rr) for rr in rr_list] == [:OK for x in 1:ntasks]
-
-function test_channel(c)
-    @test isopen(c) == true
-    put!(c, 1)
-    put!(c, "Hello")
-    put!(c, 5.0)
-
-    @test isready(c) == true
-    @test isopen(c) == true
-    @test fetch(c) == 1
-    @test fetch(c) == 1   # Should not have been popped previously
-    @test take!(c) == 1
-    @test take!(c) == "Hello"
-    @test fetch(c) == 5.0
-    @test take!(c) == 5.0
-    @test isready(c) == false
-    @test isopen(c) == true
-    close(c)
-    @test isopen(c) == false
-end
-
-test_channel(Channel(10))
-test_channel(RemoteChannel(()->Channel(10)))
-
-c=Channel{Int}(1)
-@test_throws MethodError put!(c, "Hello")
-
-# test channel iterations
-function test_iteration(in_c, out_c)
-    t=@async for v in in_c
-        put!(out_c, v)
-    end
-
-    @test isopen(in_c) == true
-    put!(in_c, 1)
-    @test take!(out_c) == 1
-    put!(in_c, "Hello")
-    close(in_c)
-    @test take!(out_c) == "Hello"
-    @test isopen(in_c) == false
-    @test_throws InvalidStateException put!(in_c, :foo)
-    yield()
-    @test istaskdone(t) == true
-end
-
-test_iteration(Channel(10), Channel(10))
-# make sure exceptions propagate when waiting on Tasks
-@test_throws CompositeException (@sync (@async error("oops")))
-try
-    @sync begin
-        for i in 1:5
-            @async error(i)
-        end
-    end
-    error("unexpected")
-catch ex
-    @test typeof(ex) == CompositeException
-    @test length(ex) == 5
-    @test typeof(ex.exceptions[1]) == TaskFailedException
-    @test typeof(ex.exceptions[1].task.exception) == ErrorException
-    # test start, next, and done
-    for (i, i_ex) in enumerate(ex)
-        @test i == parse(Int, i_ex.task.exception.msg)
-    end
-    # test showerror
-    err_str = sprint(showerror, ex)
-    err_one_str = sprint(showerror, ex.exceptions[1])
-    @test err_str == err_one_str * "\n\n...and 4 more exceptions.\n"
-end
-@test sprint(showerror, CompositeException()) == "CompositeException()\n"
-
-function test_remoteexception_thrown(expr)
-    try
-        expr()
-        error("unexpected")
-    catch ex
-        @test typeof(ex) == RemoteException
-        @test typeof(ex.captured) == CapturedException
-        @test typeof(ex.captured.ex) == ErrorException
-        @test ex.captured.ex.msg == "foobar"
-    end
-end
-
-for id in [id_other, id_me]
-    local id
-    test_remoteexception_thrown() do
-        remotecall_fetch(id) do
-            throw(ErrorException("foobar"))
-        end
-    end
-    test_remoteexception_thrown() do
-        remotecall_wait(id) do
-            throw(ErrorException("foobar"))
-        end
-    end
-    test_remoteexception_thrown() do
-        wait(remotecall(id) do
-            throw(ErrorException("foobar"))
-        end)
-    end
-end
-
-# make sure the stackframe from the remote error can be serialized
-let ex
-    try
-        remotecall_fetch(id_other) do
-            @eval module AModuleLocalToOther
-                foo() = throw(ErrorException("A.error"))
-                foo()
-            end
-        end
-    catch ex
-    end
-    @test (ex::RemoteException).pid == id_other
-    @test ((ex.captured::CapturedException).ex::ErrorException).msg == "A.error"
-    bt = ex.captured.processed_bt::Array{Any,1}
-    @test length(bt) > 1
-    frame, repeated = bt[1]::Tuple{Base.StackTraces.StackFrame, Int}
-    @test frame.func == :foo
-    @test frame.linfo === nothing
-    @test repeated == 1
-end
-
-# pmap tests. Needs at least 4 processors dedicated to the below tests. Which we currently have
-# since the distributed tests are now spawned as a separate set.
-
-# Test all combinations of pmap keyword args.
-pmap_args = [
-                (:distributed, [:default, false]),
-                (:batch_size, [:default,2]),
-                (:on_error, [:default, e -> (e.msg == "foobar" ? true : rethrow())]),
-                (:retry_delays, [:default, fill(0.001, 1000)]),
-                (:retry_check, [:default, (s,e) -> (s,endswith(e.msg,"foobar"))]),
-            ]
-
-kwdict = Dict()
-function walk_args(i)
-    if i > length(pmap_args)
-        kwargs = []
-        for (k,v) in kwdict
-            if v !== :default
-                push!(kwargs, (k,v))
-            end
-        end
-
-        data = 1:100
-
-        testw = kwdict[:distributed] === false ? [1] : workers()
-
-        if kwdict[:retry_delays] !== :default
-            mapf = x -> iseven(myid()) ? error("notfoobar") : (x*2, myid())
-            results_test = pmap_res -> begin
-                results = [x[1] for x in pmap_res]
-                pids = [x[2] for x in pmap_res]
-                @test results == [2:2:200...]
-                for p in testw
-                    if isodd(p)
-                        @test p in pids
-                    else
-                        @test !(p in pids)
-                    end
-                end
-            end
-        elseif kwdict[:on_error] === :default
-            mapf = x -> (x*2, myid())
-            results_test = pmap_res -> begin
-                results = [x[1] for x in pmap_res]
-                pids = [x[2] for x in pmap_res]
-                @test results == [2:2:200...]
-                for p in testw
-                    @test p in pids
-                end
-            end
-        else
-            mapf = x -> iseven(x) ? error("foobar") : (x*2, myid())
-            results_test = pmap_res -> begin
-                w = testw
-                for (idx,x) in enumerate(data)
-                    if iseven(x)
-                        @test pmap_res[idx] == true
-                    else
-                        @test pmap_res[idx][1] == x*2
-                        @test pmap_res[idx][2] in w
-                    end
-                end
-            end
-        end
-
-        try
-            results_test(pmap(mapf, data; kwargs...))
-        catch
-            println("pmap executing with args : ", kwargs)
-            rethrow()
-        end
-
-        return
-    end
-
-    kwdict[pmap_args[i][1]] = pmap_args[i][2][1]
-    walk_args(i+1)
-
-    kwdict[pmap_args[i][1]] = pmap_args[i][2][2]
-    walk_args(i+1)
-end
-
-# Start test for various kw arg combinations
-walk_args(1)
-
-include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "generic_map_tests.jl"))
-empty_pool = WorkerPool([myid()])
-pmap_fallback = (f, c...) -> pmap(f, empty_pool, c...)
-generic_map_tests(pmap_fallback)
-
-# pmap with various types. Test for equivalence with map
-run_map_equivalence_tests(pmap)
-@test pmap(uppercase, "Hello World!") == map(uppercase, "Hello World!")
-
-
-# Simple test for pmap throws error
-let error_thrown = false
-    try
-        pmap(x -> x == 50 ? error("foobar") : x, 1:100)
-    catch e
-        @test e.captured.ex.msg == "foobar"
-        error_thrown = true
-    end
-    @test error_thrown
-end
-
-# Test pmap with a generator type iterator
-@test [1:100...] == pmap(x->x, Base.Generator(x->(sleep(0.0001); x), 1:100))
-
-# Test pgenerate
-n = 10
-as = [rand(4,4) for i in 1:n]
-bs = deepcopy(as)
-cs = collect(Distributed.pgenerate(x->(sleep(rand()*0.1); svd(x)), bs))
-svdas = map(svd, as)
-for i in 1:n
-    @test cs[i].U ≈ svdas[i].U
-    @test cs[i].S ≈ svdas[i].S
-    @test cs[i].V ≈ svdas[i].V
-end
-
-# Test that the default worker pool cycles through all workers
-pmap(_->myid(), 1:nworkers())  # priming run
-@test nworkers() == length(unique(pmap(_->myid(), 1:100)))
-
-# Test same behaviour when executed on a worker
-@test nworkers() == length(unique(remotecall_fetch(()->pmap(_->myid(), 1:100), id_other)))
-
-# Same tests with custom worker pools.
-wp = WorkerPool(workers())
-@test nworkers() == length(unique(pmap(_->myid(), wp, 1:100)))
-@test nworkers() == length(unique(remotecall_fetch(wp->pmap(_->myid(), wp, 1:100), id_other, wp)))
-wp = WorkerPool(2:3)
-@test sort(unique(pmap(_->myid(), wp, 1:100))) == [2,3]
-
-# CachingPool tests
-wp = CachingPool(workers())
-@test [1:100...] == pmap(x->x, wp, 1:100)
-
-clear!(wp)
-@test length(wp.map_obj2ref) == 0
-
-# The below block of tests are usually run only on local development systems, since:
-# - tests which print errors
-# - addprocs tests are memory intensive
-# - ssh addprocs requires sshd to be running locally with passwordless login enabled.
-# The test block is enabled by defining env JULIA_TESTFULL=1
-
-DoFullTest = Bool(parse(Int,(get(ENV, "JULIA_TESTFULL", "0"))))
-
-if DoFullTest
-    println("Testing exception printing on remote worker from a `remote_do` call")
-    println("Please ensure the remote error and backtrace is displayed on screen")
-
-    remote_do(id_other) do
-        throw(ErrorException("TESTING EXCEPTION ON REMOTE DO. PLEASE IGNORE"))
-    end
-    sleep(0.5)  # Give some time for the above error to be printed
-
-    println("\n\nThe following 'invalid connection credentials' error messages are to be ignored.")
-    all_w = workers()
-    # Test sending fake data to workers. The worker processes will print an
-    # error message but should not terminate.
-    for w in Distributed.PGRP.workers
-        if isa(w, Distributed.Worker)
-            local s = connect(w.config.host, w.config.port)
-            write(s, randstring(32))
-        end
-    end
-    @test workers() == all_w
-    @test all([p == remotecall_fetch(myid, p) for p in all_w])
-
-if Sys.isunix() # aka have ssh
-    function test_n_remove_pids(new_pids)
-        for p in new_pids
-            w_in_remote = sort(remotecall_fetch(workers, p))
-            try
-                @test intersect(new_pids, w_in_remote) == new_pids
-            catch
-                print("p       :     $p\n")
-                print("newpids :     $new_pids\n")
-                print("w_in_remote : $w_in_remote\n")
-                print("intersect   : $(intersect(new_pids, w_in_remote))\n\n\n")
-                rethrow()
-            end
-        end
-
-        remotecall_fetch(rmprocs, 1, new_pids)
-    end
-
-    print("\n\nTesting SSHManager. A minimum of 4GB of RAM is recommended.\n")
-    print("Please ensure: \n")
-    print("1) sshd is running locally with passwordless login enabled.\n")
-    print("2) Env variable USER is defined and is the ssh user.\n")
-    print("3) Port 9300 is not in use.\n")
-
-    sshflags = `-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o LogLevel=ERROR `
-    #Issue #9951
-    hosts=[]
-    localhost_aliases = ["localhost", string(getipaddr()), "127.0.0.1"]
-    num_workers = parse(Int,(get(ENV, "JULIA_ADDPROCS_NUM", "9")))
-
-    for i in 1:(num_workers/length(localhost_aliases))
-        append!(hosts, localhost_aliases)
-    end
-
-    print("\nTesting SSH addprocs with $(length(hosts)) workers...\n")
-    new_pids = addprocs_with_testenv(hosts; sshflags=sshflags)
-    @test length(new_pids) == length(hosts)
-    test_n_remove_pids(new_pids)
-
-    print("\nMixed ssh addprocs with :auto\n")
-    new_pids = addprocs_with_testenv(["localhost", ("127.0.0.1", :auto), "localhost"]; sshflags=sshflags)
-    @test length(new_pids) == (2 + Sys.CPU_THREADS)
-    test_n_remove_pids(new_pids)
-
-    print("\nMixed ssh addprocs with numeric counts\n")
-    new_pids = addprocs_with_testenv([("localhost", 2), ("127.0.0.1", 2), "localhost"]; sshflags=sshflags)
-    @test length(new_pids) == 5
-    test_n_remove_pids(new_pids)
-
-    print("\nssh addprocs with tunnel\n")
-    new_pids = addprocs_with_testenv([("localhost", num_workers)]; tunnel=true, sshflags=sshflags)
-    @test length(new_pids) == num_workers
-    test_n_remove_pids(new_pids)
-
-    print("\nssh addprocs with tunnel (SSH multiplexing)\n")
-    new_pids = addprocs_with_testenv([("localhost", num_workers)]; tunnel=true, multiplex=true, sshflags=sshflags)
-    @test length(new_pids) == num_workers
-    controlpath = joinpath(homedir(), ".ssh", "julia-$(ENV["USER"])@localhost:22")
-    @test issocket(controlpath)
-    test_n_remove_pids(new_pids)
-    @test :ok == timedwait(()->!issocket(controlpath), 10.0; pollint=0.5)
-
-    print("\nAll supported formats for hostname\n")
-    h1 = "localhost"
-    user = ENV["USER"]
-    h2 = "$user@$h1"
-    h3 = "$h2:22"
-    h4 = "$h3 $(string(getipaddr()))"
-    h5 = "$h4:9300"
-
-    new_pids = addprocs_with_testenv([h1, h2, h3, h4, h5]; sshflags=sshflags)
-    @test length(new_pids) == 5
-    test_n_remove_pids(new_pids)
-
-    print("\nkeyword arg exename\n")
-    for exename in [`$(joinpath(Sys.BINDIR, Base.julia_exename()))`, "$(joinpath(Sys.BINDIR, Base.julia_exename()))"]
-        for addp_func in [()->addprocs_with_testenv(["localhost"]; exename=exename, exeflags=test_exeflags, sshflags=sshflags),
-                          ()->addprocs_with_testenv(1; exename=exename, exeflags=test_exeflags)]
-
-            local new_pids = addp_func()
-            @test length(new_pids) == 1
-            test_n_remove_pids(new_pids)
-        end
-    end
-
-end # unix-only
-end # full-test
-
-let t = @task 42
-    schedule(t, ErrorException(""), error=true)
-    @test_throws TaskFailedException(t) Base.wait(t)
-end
-
-# issue #8207
-let A = Any[]
-    @distributed (+) for i in (push!(A,1); 1:2)
-        i
-    end
-    @test length(A) == 1
-end
-
-# issue #13168
-function f13168(n)
-    val = 0
-    for i = 1:n
-        val += sum(rand(n, n)^2)
-    end
-    return val
-end
-let t = schedule(@task f13168(100))
-    @test t.state == :runnable
-    @test t.queue !== nothing
-    @test_throws ErrorException schedule(t)
-    yield()
-    @test t.state == :done
-    @test t.queue === nothing
-    @test_throws ErrorException schedule(t)
-    @test isa(fetch(t), Float64)
-end
-
-# issue #13122
-@test remotecall_fetch(identity, workers()[1], C_NULL) === C_NULL
-
-# issue #11062
-function t11062()
-    @async v11062 = 1
-    v11062 = 2
-end
-
-@test t11062() == 2
-
-# issue #15406
-v15406 = remotecall_wait(() -> 1, id_other)
-fetch(v15406)
-remotecall_wait(fetch, id_other, v15406)
-
-
-# issue #43396
-# Covers the remote fetch where the value returned is `nothing`
-# May be caused by attempting to unwrap a non-`Some` type with `something`
-# `call_on_owner` ref fetches return values not wrapped in `Some`
-# and have to be returned directly
-@test nothing === fetch(remotecall(() -> nothing, workers()[1]))
-@test 10 === fetch(remotecall(() -> 10, workers()[1]))
-
-
-# Test various forms of remotecall* invocations
-
-@everywhere f_args(v1, v2=0; kw1=0, kw2=0) = v1+v2+kw1+kw2
-
-function test_f_args(result, args...; kwargs...)
-    @test fetch(remotecall(args...; kwargs...)) == result
-    @test fetch(remotecall_wait(args...; kwargs...)) == result
-    @test remotecall_fetch(args...; kwargs...) == result
-
-    # A visual test - remote_do should NOT print any errors
-    remote_do(args...; kwargs...)
-end
-
-for tid in [id_other, id_me, default_worker_pool()]
-    test_f_args(1, f_args, tid, 1)
-    test_f_args(3, f_args, tid, 1, 2)
-    test_f_args(5, f_args, tid, 1; kw1=4)
-    test_f_args(13, f_args, tid, 1; kw1=4, kw2=8)
-    test_f_args(15, f_args, tid, 1, 2; kw1=4, kw2=8)
-end
-
-# Test remote_do
-f=Future(id_me)
-remote_do(fut->put!(fut, myid()), id_me, f)
-@test fetch(f) == id_me
-
-f=Future(id_other)
-remote_do(fut->put!(fut, myid()), id_other, f)
-@test fetch(f) == id_other
-
-# Github issue #29932
-rc_unbuffered = RemoteChannel(()->Channel{Vector{Float64}}(0))
-@test eltype(rc_unbuffered) == Vector{Float64}
-
-@async begin
-    # Trigger direct write (no buffering) of largish array
-    array_sz = Int(Base.SZ_UNBUFFERED_IO/8) + 1
-    largev = zeros(array_sz)
-    for i in 1:10
-        largev[1] = float(i)
-        put!(rc_unbuffered, largev)
-    end
-end
-
-@test remotecall_fetch(rc -> begin
-        for i in 1:10
-            take!(rc)[1] != float(i) && error("Failed")
-        end
-        return :OK
-    end, id_other, rc_unbuffered) == :OK
-
-# github issue 33972
-rc_unbuffered_other = RemoteChannel(()->Channel{Int}(0), id_other)
-close(rc_unbuffered_other)
-try; take!(rc_unbuffered_other); catch; end
-@test !remotecall_fetch(rc -> islocked(Distributed.lookup_ref(remoteref_id(rc)).synctake),
-                        id_other, rc_unbuffered_other)
-
-# github PR #14456
-n = DoFullTest ? 6 : 5
-for i = 1:10^n
-    fetch(@spawnat myid() myid())
-end
-
-# issue #15451
-@test remotecall_fetch(x->(y->2y)(x)+1, workers()[1], 3) == 7
-
-# issue #16091
-mutable struct T16091 end
-wid = workers()[1]
-@test try
-    remotecall_fetch(()->T16091, wid)
-    false
-catch ex
-    ((ex::RemoteException).captured::CapturedException).ex === UndefVarError(:T16091)
-end
-@test try
-    remotecall_fetch(identity, wid, T16091)
-    false
-catch ex
-    ((ex::RemoteException).captured::CapturedException).ex === UndefVarError(:T16091)
-end
-
-f16091a() = 1
-remotecall_fetch(()->eval(:(f16091a() = 2)), wid)
-@test remotecall_fetch(f16091a, wid) === 2
-@test remotecall_fetch((myid)->remotecall_fetch(f16091a, myid), wid, myid()) === 1
-
-# these will only heisen-fail, since it depends on the gensym counter collisions:
-f16091b = () -> 1
-remotecall_fetch(()->eval(:(f16091b = () -> 2)), wid)
-@test remotecall_fetch(f16091b, 2) === 1
-# Global anonymous functions are over-written...
-@test remotecall_fetch((myid)->remotecall_fetch(f16091b, myid), wid, myid()) === 1
-
-# ...while local anonymous functions are by definition, local.
-let
-    f16091c = () -> 1
-    @test remotecall_fetch(f16091c, 2) === 1
-    @test remotecall_fetch(
-        myid -> begin
-            let
-                f16091c = () -> 2
-                remotecall_fetch(f16091c, myid)
-            end
-        end, wid, myid()) === 2
-end
-
-# issue #16451
-rng=RandomDevice()
-retval = @distributed (+) for _ in 1:10
-    rand(rng)
-end
-@test retval > 0.0 && retval < 10.0
-
-rand(rng)
-retval = @distributed (+) for _ in 1:10
-    rand(rng)
-end
-@test retval > 0.0 && retval < 10.0
-
-# serialization tests
-wrkr1 = workers()[1]
-wrkr2 = workers()[end]
-
-@test remotecall_fetch(p->remotecall_fetch(myid, p), wrkr1, wrkr2) == wrkr2
-
-# Send f to wrkr1 and wrkr2. Then try calling f on wrkr2 from wrkr1
-f_myid = ()->myid()
-@test wrkr1 == remotecall_fetch(f_myid, wrkr1)
-@test wrkr2 == remotecall_fetch(f_myid, wrkr2)
-@test wrkr2 == remotecall_fetch((f, p)->remotecall_fetch(f, p), wrkr1, f_myid, wrkr2)
-
-# Deserialization error recovery test
-# locally defined module, but unavailable on workers
-module LocalFoo
-    global foo=1
-end
-
-let
-    @test_throws RemoteException remotecall_fetch(()->LocalFoo.foo, 2)
-
-    bad_thunk = ()->NonexistantModule.f()
-    @test_throws RemoteException remotecall_fetch(bad_thunk, 2)
-
-    # Test that the stream is still usable
-    @test remotecall_fetch(()->:test,2) == :test
-    ref = remotecall(bad_thunk, 2)
-    @test_throws RemoteException fetch(ref)
-end
-
-# Test calling @everywhere from a module not defined on the workers
-module LocalBar
-    using Distributed
-    bar() = @everywhere new_bar()=myid()
-end
-LocalBar.bar()
-for p in procs()
-    @test p == remotecall_fetch(new_bar, p)
-end
-
-# @everywhere (remotecall_eval) behaviors (#22589)
-let (p, p2) = filter!(p -> p != myid(), procs())
-    @test (myid() + 1) == @everywhere myid() (myid() + 1)
-    @test (p * 2) == @everywhere p (myid() * 2)
-    @test 1 == @everywhere p defined_on_p = 1
-    @test !@isdefined defined_on_p
-    @test !isdefined(Main, :defined_on_p)
-    @test remotecall_fetch(isdefined, p, Main, :defined_on_p)
-    @test !remotecall_fetch(isdefined, p2, Main, :defined_on_p)
-    @test nothing === @everywhere [p, p] defined_on_p += 1
-    @test 3 === @everywhere p defined_on_p
-    let ref = Ref(0)
-        @test nothing ===
-            @everywhere [myid(), p, myid(), myid(), p] begin
-                Test.@test Main === @__MODULE__
-                $ref[] += 1
-            end
-        @test ref[] == 3
-    end
-    function test_throw_on(procs, msg)
-        try
-            @everywhere procs error($msg)
-            error("test failed to throw")
-        catch excpt
-            if procs isa Int
-                ex = Any[excpt]
-            else
-                ex = (excpt::CompositeException).exceptions
-            end
-            for (p, ex) in zip(procs, ex)
-                local p
-                if procs isa Int || p != myid()
-                    @test (ex::RemoteException).pid == p
-                    ex = ((ex::RemoteException).captured::CapturedException).ex
-                else
-                    ex = (ex::TaskFailedException).task.exception
-                end
-                @test (ex::ErrorException).msg == msg
-            end
-        end
-    end
-    test_throw_on(p, "everywhere on p")
-    test_throw_on(myid(), "everywhere on myid")
-    test_throw_on([p, myid()], "everywhere on myid and p")
-    test_throw_on([p2, p], "everywhere on p and p2")
-end
-
-# Test addprocs enable_threaded_blas parameter
-
-function get_remote_num_threads(processes_added)
-    return [remotecall_fetch(BLAS.get_num_threads, proc_id) for proc_id in processes_added]
-end
-
-function test_blas_config(pid, expected)
-    for worker in Distributed.PGRP.workers
-        if worker.id == pid
-            @test worker.config.enable_threaded_blas == expected
-            return
-        end
-    end
-end
-
-function test_add_procs_threaded_blas()
-    master_blas_thread_count = BLAS.get_num_threads()
-    if master_blas_thread_count === nothing
-        @warn "Skipping blas num threads tests due to unsupported blas version"
-        return
-    end
-
-    # Test with default enable_threaded_blas false
-    processes_added = addprocs_with_testenv(2)
-    for proc_id in processes_added
-        test_blas_config(proc_id, false)
-    end
-
-    # Master thread should not have changed
-    @test BLAS.get_num_threads() == master_blas_thread_count
-
-    # Threading disabled in children by default
-    thread_counts_by_process = get_remote_num_threads(processes_added)
-    for thread_count in thread_counts_by_process
-        @test thread_count == 1
-    end
-    rmprocs(processes_added)
-
-    processes_added = addprocs_with_testenv(2, enable_threaded_blas=true)
-    for proc_id in processes_added
-        test_blas_config(proc_id, true)
-    end
-
-    @test BLAS.get_num_threads() == master_blas_thread_count
-
-    # BLAS.set_num_threads(`num`) doesn't  cause BLAS.get_num_threads to return `num`
-    # depending on the machine, the BLAS version, and BLAS configuration, so
-    # we need a very lenient test.
-    thread_counts_by_process = get_remote_num_threads(processes_added)
-    for thread_count in thread_counts_by_process
-        @test thread_count >= 1
-    end
-    rmprocs(processes_added)
-end
-test_add_procs_threaded_blas()
-
-#19687
-if false ### TODO: The logic that is supposed to implement this is racy - Disabled for now
-# ensure no race conditions between rmprocs and addprocs
-for i in 1:5
-    p = addprocs_with_testenv(1)[1]
-    @spawnat p sleep(5)
-    rmprocs(p; waitfor=0)
-end
-
-# Test if a wait has been called on rmprocs(...;waitfor=0), further remotecalls
-# don't throw errors.
-for i in 1:5
-    p = addprocs_with_testenv(1)[1]
-    np = nprocs()
-    @spawnat p sleep(5)
-    Base.wait(rmprocs(p; waitfor=0))
-    for pid in procs()
-        @test pid == remotecall_fetch(myid, pid)
-    end
-    @test nprocs() == np - 1
-end
-
-# Test that an exception is thrown if workers are unable to be removed within requested time.
-if DoFullTest
-    pids=addprocs_with_testenv(4);
-    @test_throws ErrorException rmprocs(pids; waitfor=0.001);
-    # wait for workers to be removed
-    while any(in(procs()), pids)
-        sleep(0.1)
-    end
-end
-end
-
-# Test addprocs/rmprocs from master node only
-for f in [ ()->addprocs(1; exeflags=test_exeflags), ()->rmprocs(workers()) ]
-    local f
-    try
-        remotecall_fetch(f, id_other)
-        error("Unexpected")
-    catch ex
-        @test isa(ex, RemoteException)
-        @test ex.captured.ex.msg == "Only process 1 can add and remove workers"
-    end
-end
-
-# Test the following addprocs error conditions
-# - invalid host name - github issue #20372
-# - julia exe exiting with an error
-# - timeout reading host:port from worker stdout
-# - host:port not found in worker stdout in the first 1000 lines
-
-struct ErrorSimulator <: ClusterManager
-    mode
-end
-
-function launch(manager::ErrorSimulator, params::Dict, launched::Array, c::Condition)
-    exename = params[:exename]
-    dir = params[:dir]
-
-    cmd = `$(Base.julia_cmd(exename)) --startup-file=no`
-    if manager.mode == :timeout
-        cmd = `$cmd -e "sleep(10)"`
-    elseif manager.mode == :ntries
-        cmd = `$cmd -e "[println(x) for x in 1:1001]"`
-    elseif manager.mode == :exit
-        cmd = `$cmd -e "exit(-1)"`
-    else
-        error("Unknown mode")
-    end
-    io = open(detach(setenv(cmd, dir=dir)))
-
-    wconfig = WorkerConfig()
-    wconfig.process = io
-    wconfig.io = io.out
-    push!(launched, wconfig)
-    notify(c)
-end
-
-testruns = Any[]
-
-if DoFullTest
-    append!(testruns, [(()->addprocs_with_testenv(["errorhost20372"]), "Unable to read host:port string from worker. Launch command exited with error?", ())])
-end
-
-append!(testruns, [
-    (()->addprocs_with_testenv(ErrorSimulator(:exit)), "Unable to read host:port string from worker. Launch command exited with error?", ()),
-    (()->addprocs_with_testenv(ErrorSimulator(:ntries)), "Unexpected output from worker launch command. Host:port string not found.", ()),
-    (()->addprocs_with_testenv(ErrorSimulator(:timeout)), "Timed out waiting to read host:port string from worker.", ("JULIA_WORKER_TIMEOUT"=>"1",))
-])
-
-for (addp_testf, expected_errstr, env) in testruns
-    old_stdout = stdout
-    stdout_out, stdout_in = redirect_stdout()
-    stdout_txt = @async filter!(readlines(stdout_out)) do s
-            return !startswith(s, "\tFrom worker startup:\t")
-        end
-    try
-        withenv(env...) do
-            addp_testf()
-        end
-        error("Unexpected")
-    catch ex
-        redirect_stdout(old_stdout)
-        close(stdout_in)
-        @test isempty(fetch(stdout_txt))
-        @test isa(ex, CompositeException)
-        @test ex.exceptions[1].task.exception.msg == expected_errstr
-    end
-end
-
-
-# Auto serialization of globals from Main.
-# bitstypes
-global v1 = 1
-@test remotecall_fetch(()->v1, id_other) == v1
-@test remotecall_fetch(()->isdefined(Main, :v1), id_other)
-for i in 2:5
-    global v1 = i
-    @test remotecall_fetch(()->v1, id_other) == i
-end
-
-# non-bitstypes
-global v2 = zeros(10)
-for i in 1:5
-    v2[i] = i
-    @test remotecall_fetch(()->v2, id_other) == v2
-end
-
-# Different global bindings to the same object
-global v3 = fill(1., 10)
-global v4 = v3
-@test remotecall_fetch(()->v3, id_other) == remotecall_fetch(()->v4, id_other)
-@test remotecall_fetch(()->isdefined(Main, :v3), id_other)
-@test remotecall_fetch(()->isdefined(Main, :v4), id_other)
-
-# Global references to Types and Modules should work if they are locally defined
-global v5 = Int
-global v6 = Distributed
-@test remotecall_fetch(()->v5, id_other) === Int
-@test remotecall_fetch(()->v6, id_other) === Distributed
-
-struct FooStructLocal end
-module FooModLocal end
-v5 = FooStructLocal
-v6 = FooModLocal
-@test_throws RemoteException remotecall_fetch(()->v5, id_other)
-@test_throws RemoteException remotecall_fetch(()->v6, id_other)
-
-@everywhere struct FooStructEverywhere end
-@everywhere module FooModEverywhere end
-v5 = FooStructEverywhere
-v6 = FooModEverywhere
-@test remotecall_fetch(()->v5, id_other) === FooStructEverywhere
-@test remotecall_fetch(()->v6, id_other) === FooModEverywhere
-
-# hash value same but different object instance
-v7 = ones(10)
-oid1 = objectid(v7)
-hval1 = hash(v7)
-@test v7 == @fetchfrom id_other v7
-remote_oid1 = @fetchfrom id_other objectid(v7)
-
-v7 = ones(10)
-@test oid1 != objectid(v7)
-@test hval1 == hash(v7)
-@test remote_oid1 != @fetchfrom id_other objectid(v7)
-
-
-# Github issue #31252
-v31252 = :a
-@test :a == @fetchfrom id_other v31252
-
-v31252 = :b
-@test :b == @fetchfrom id_other v31252
-
-v31252 = :a
-@test :a == @fetchfrom id_other v31252
-
-
-# Test that a global is not being repeatedly serialized when
-# a) referenced multiple times in the closure
-# b) hash value has not changed.
-
-@everywhere begin
-    using Serialization
-    global testsercnt_d = Dict()
-    mutable struct TestSerCnt
-        v
-    end
-    import Base.hash, Base.==
-    hash(x::TestSerCnt, h::UInt) = hash(hash(x.v), h)
-    ==(x1::TestSerCnt, x2::TestSerCnt) = (x1.v == x2.v)
-
-    function Serialization.serialize(s::AbstractSerializer, t::TestSerCnt)
-        Serialization.serialize_type(s, TestSerCnt)
-        serialize(s, t.v)
-        global testsercnt_d
-        cnt = get!(testsercnt_d, objectid(t), 0)
-        testsercnt_d[objectid(t)] = cnt+1
-    end
-
-    Serialization.deserialize(s::AbstractSerializer, ::Type{TestSerCnt}) = TestSerCnt(deserialize(s))
-end
-
-# hash value of tsc is not changed
-global tsc = TestSerCnt(zeros(10))
-for i in 1:5
-    remotecall_fetch(()->tsc, id_other)
-end
-# should have been serialized only once
-@test testsercnt_d[objectid(tsc)] == 1
-
-# hash values are changed
-n=5
-testsercnt_d[objectid(tsc)] = 0
-for i in 1:n
-    tsc.v[i] = i
-    remotecall_fetch(()->tsc, id_other)
-end
-# should have been serialized as many times as the loop
-@test testsercnt_d[objectid(tsc)] == n
-
-# Multiple references in a closure should be serialized only once.
-global mrefs = TestSerCnt(fill(1.,10))
-@test remotecall_fetch(()->(mrefs.v, 2*mrefs.v, 3*mrefs.v), id_other) == (fill(1.,10), fill(2.,10), fill(3.,10))
-@test testsercnt_d[objectid(mrefs)] == 1
-
-
-# nested anon functions
-global f1 = x->x
-global f2 = x->f1(x)
-v = rand()
-@test remotecall_fetch(f2, id_other, v) == v
-@test remotecall_fetch(x->f2(x), id_other, v) == v
-
-# consts
-const c1 = fill(1., 10)
-@test remotecall_fetch(()->c1, id_other) == c1
-@test remotecall_fetch(()->isconst(Main, :c1), id_other)
-
-# Test same calls with local vars
-function wrapped_var_ser_tests()
-    # bitstypes
-    local lv1 = 1
-    @test remotecall_fetch(()->lv1, id_other) == lv1
-    @test !remotecall_fetch(()->isdefined(Main, :lv1), id_other)
-    for i in 2:5
-        lv1 = i
-        @test remotecall_fetch(()->lv1, id_other) == i
-    end
-
-    # non-bitstypes
-    local lv2 = zeros(10)
-    for i in 1:5
-        lv2[i] = i
-        @test remotecall_fetch(()->lv2, id_other) == lv2
-    end
-
-    # nested anon functions
-    local lf1 = x->x
-    local lf2 = x->lf1(x)
-    v = rand()
-    @test remotecall_fetch(lf2, id_other, v) == v
-    @test remotecall_fetch(x->lf2(x), id_other, v) == v
-end
-
-wrapped_var_ser_tests()
-
-# Test internal data structures being cleaned up upon gc.
-global ids_cleanup = fill(1., 6)
-global ids_func = ()->ids_cleanup
-
-clust_ser = (Distributed.worker_from_id(id_other)).w_serializer
-@test remotecall_fetch(ids_func, id_other) == ids_cleanup
-
-# TODO Add test for cleanup from `clust_ser.glbs_in_tnobj`
-
-# reported github issues - Mostly tests with globals and various distributed macros
-#2669, #5390
-v2669=10
-@test fetch(@spawnat :any (1+v2669)) == 11
-
-#12367
-refs = []
-if true
-    n = 10
-    for p in procs()
-        push!(refs, @spawnat p begin
-            @sync for i in 1:n
-                nothing
-            end
-        end)
-    end
-end
-foreach(wait, refs)
-
-#6760
-if true
-    a = 2
-    x = @distributed (vcat) for k=1:2
-        sin(a)
-    end
-end
-@test x == map(_->sin(2), 1:2)
-
-let thrown = false
-    try
-        remotecall_fetch(sqrt, 2, -1)
-    catch e
-        thrown = true
-        local b = IOBuffer()
-        showerror(b, e)
-        @test occursin("sqrt will only return", String(take!(b)))
-    end
-    @test thrown
-end
-
-# issue #34333
-let
-    @test fetch(remotecall(Float64, id_other, 1)) == Float64(1)
-    @test fetch(remotecall_wait(Float64, id_other, 1)) == Float64(1)
-    @test remotecall_fetch(Float64, id_other, 1) == Float64(1)
-end
-
-#19463
-function foo19463()
-    w1 = workers()[1]
-    w2 = workers()[2]
-    w3 = workers()[3]
-
-    b1 = () -> 1
-    b2 = () -> fetch(@spawnat w1 b1()) + 1
-    b3 = () -> fetch(@spawnat w2 b2()) + 1
-    b4 = () -> fetch(@spawnat w3 b3()) + 1
-    b4()
-end
-@test foo19463() == 4
-
-# Testing clear!
-function setup_syms(n, pids)
-    syms = []
-    for i in 1:n
-        symstr = string("clrtest", randstring())
-        sym = Symbol(symstr)
-        eval(:(global $sym = rand()))
-        for p in pids
-            eval(:(@test $sym == remotecall_fetch(()->$sym, $p)))
-            eval(:(@test remotecall_fetch(isdefined, $p, Main, Symbol($symstr))))
-        end
-        push!(syms, sym)
-    end
-    syms
-end
-
-function test_clear(syms, pids)
-    for p in pids
-        for sym in syms
-            remote_val = remotecall_fetch(()->getfield(Main, sym), p)
-            @test remote_val === nothing
-            @test remote_val != getfield(Main, sym)
-        end
-    end
-end
-
-syms = setup_syms(1, [id_other])
-clear!(syms[1], id_other)
-test_clear(syms, [id_other])
-
-syms = setup_syms(1, workers())
-clear!(syms[1], workers())
-test_clear(syms, workers())
-
-syms = setup_syms(3, [id_other])
-clear!(syms, id_other)
-test_clear(syms, [id_other])
-
-syms = setup_syms(3, workers())
-clear!(syms, workers())
-test_clear(syms, workers())
-
-# Test partial recovery from a deserialization error in CapturedException
-try
-    expr = quote
-                mutable struct DontExistOn1
-                    x
-                end
-                throw(BoundsError(DontExistOn1(1), 1))
-           end
-
-    remotecall_fetch(()->eval(expr), id_other)
-    error("unexpected")
-catch ex
-    @test isa(ex.captured.ex.exceptions[1].ex, ErrorException)
-    @test occursin("BoundsError", ex.captured.ex.exceptions[1].ex.msg)
-    @test ex.captured.ex.exceptions[2].ex == UndefVarError(:DontExistOn1)
-end
-
-let
-    # creates a new worker in a different folder and tries to include file
-    tmp_dir = mktempdir()
-    tmp_dir2 = joinpath(tmp_dir, "2")
-    tmp_file = joinpath(tmp_dir2, "testfile")
-    tmp_file2 = joinpath(tmp_dir2, "testfile2")
-    proc = addprocs_with_testenv(1, dir=tmp_dir)
-    try
-        mkdir(tmp_dir2)
-        write(tmp_file, "23.32 + 32 + myid() + include(\"testfile2\")")
-        write(tmp_file2, "myid() * 2")
-        function test_include_fails_to_open_file(fname)
-            try
-                include(fname)
-            catch exc
-                path = joinpath(@__DIR__, fname)
-                @test exc isa SystemError
-                @test exc.prefix == "opening file $(repr(path))"
-            end
-        end
-        test_include_fails_to_open_file("testfile")
-        test_include_fails_to_open_file("testfile2")
-        test_include_fails_to_open_file(joinpath("2", "testfile2"))
-        @test include(tmp_file) == 58.32
-        @test remotecall_fetch(include, proc[1], joinpath("2", "testfile")) == 55.32 + proc[1] * 3
-    finally
-        rmprocs(proc)
-        rm(tmp_file, force=true)
-        rm(tmp_file2, force=true)
-        rm(tmp_dir2, force=true)
-        #rm(tmp_dir, force=true)
-    end
-end
-# cookie and command line option `--worker` tests. remove workers, set cookie and test
-struct WorkerArgTester <: ClusterManager
-    worker_opt
-    write_cookie
-end
-
-function launch(manager::WorkerArgTester, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-
-    cmd = `$exename $exeflags --bind-to $(Distributed.LPROC.bind_addr) $(manager.worker_opt)`
-    cmd = pipeline(detach(setenv(cmd, dir=dir)))
-    io = open(cmd, "r+")
-    manager.write_cookie && Distributed.write_cookie(io)
-
-    wconfig = WorkerConfig()
-    wconfig.process = io
-    wconfig.io = io.out
-    push!(launched, wconfig)
-
-    notify(c)
-end
-manage(::WorkerArgTester, ::Integer, ::WorkerConfig, ::Symbol) = nothing
-
-nprocs()>1 && rmprocs(workers())
-
-npids = addprocs_with_testenv(WorkerArgTester(`--worker`, true))
-@test remotecall_fetch(myid, npids[1]) == npids[1]
-rmprocs(npids)
-
-cluster_cookie("")  # An empty string is a valid cookie
-npids = addprocs_with_testenv(WorkerArgTester(`--worker=`, false))
-@test remotecall_fetch(myid, npids[1]) == npids[1]
-rmprocs(npids)
-
-cluster_cookie("foobar") # custom cookie
-npids = addprocs_with_testenv(WorkerArgTester(`--worker=foobar`, false))
-@test remotecall_fetch(myid, npids[1]) == npids[1]
-
-# tests for start_worker options to retain stdio (issue #31035)
-struct RetainStdioTester <: ClusterManager
-    close_stdin::Bool
-    stderr_to_stdout::Bool
-end
-
-function launch(manager::RetainStdioTester, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-
-    jlcmd = "using Distributed; start_worker(\"\"; close_stdin=$(manager.close_stdin), stderr_to_stdout=$(manager.stderr_to_stdout));"
-    cmd = detach(setenv(`$exename $exeflags --bind-to $(Distributed.LPROC.bind_addr) -e $jlcmd`, dir=dir))
-    proc = open(cmd, "r+")
-
-    wconfig = WorkerConfig()
-    wconfig.process = proc
-    wconfig.io = proc.out
-    push!(launched, wconfig)
-
-    notify(c)
-end
-manage(::RetainStdioTester, ::Integer, ::WorkerConfig, ::Symbol) = nothing
-
-
-nprocs()>1 && rmprocs(workers())
-cluster_cookie("")
-
-for close_stdin in (true, false), stderr_to_stdout in (true, false)
-    local npids = addprocs_with_testenv(RetainStdioTester(close_stdin,stderr_to_stdout))
-    @test remotecall_fetch(myid, npids[1]) == npids[1]
-    if close_stdin
-        @test remotecall_fetch(()->stdin === devnull && !isreadable(stdin), npids[1])
-    else
-        @test remotecall_fetch(()->stdin !== devnull && isopen(stdin) && isreadable(stdin), npids[1])
-    end
-    @test stderr_to_stdout == remotecall_fetch(()->(stderr === stdout), npids[1])
-    rmprocs(npids)
-end
-
-# Issue # 22865
-# Must be run on a new cluster, i.e., all workers must be in the same state.
-@assert nprocs() == 1
-p1,p2 = addprocs_with_testenv(2)
-@everywhere f22865(p) = remotecall_fetch(x->x.*2, p, fill(1.,2))
-@test fill(2.,2) == remotecall_fetch(f22865, p1, p2)
-rmprocs(p1, p2)
-
-function reuseport_tests()
-    # Run the test on all processes.
-    results = asyncmap(procs()) do p
-        remotecall_fetch(p) do
-            ports_lower = []        # ports of pids lower than myid()
-            ports_higher = []       # ports of pids higher than myid()
-            for w in Distributed.PGRP.workers
-                w.id == myid() && continue
-                port = Sockets._sockname(w.r_stream, true)[2]
-                if (w.id == 1)
-                    # master connects to workers
-                    push!(ports_higher, port)
-                elseif w.id < myid()
-                    push!(ports_lower, port)
-                elseif w.id > myid()
-                    push!(ports_higher, port)
-                end
-            end
-            @assert (length(ports_lower) + length(ports_higher)) == nworkers()
-            for portset in [ports_lower, ports_higher]
-                if (length(portset) > 0) && (length(unique(portset)) != 1)
-                    @warn "SO_REUSEPORT TESTS FAILED. UNSUPPORTED/OLDER UNIX VERSION?"
-                    return 0
-                end
-            end
-            return myid()
-        end
-    end
-
-    # Ensure that the code has indeed been successfully executed everywhere
-    @test all(in(results), procs())
-end
-
-# Test that the client port is reused. SO_REUSEPORT may not be supported on
-# all UNIX platforms, Linux kernels prior to 3.9 and older versions of OSX
-@assert nprocs() == 1
-addprocs_with_testenv(4; lazy=false)
-if ccall(:jl_has_so_reuseport, Int32, ()) == 1
-    reuseport_tests()
-else
-    @info "SO_REUSEPORT is unsupported, skipping reuseport tests"
-end
-
-# issue #27933
-a27933 = :_not_defined_27933
-@test remotecall_fetch(()->a27933, first(workers())) === a27933
-
-# PR #28651
-for T in (UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64)
-    local n = @distributed (+) for i in Base.OneTo(T(10))
-        i
-    end
-    @test n == 55
-end
-
-# issue #28966
-let code = """
-    import Distributed
-    Distributed.addprocs(1)
-    Distributed.@everywhere f() = myid()
-    for w in Distributed.workers()
-        @assert Distributed.remotecall_fetch(f, w) == w
-    end
-    """
-    @test success(`$(Base.julia_cmd()) --startup-file=no -e $code`)
-end
-
-# PR 32431: tests for internal Distributed.head_and_tail
-let (h, t) = Distributed.head_and_tail(1:10, 3)
-    @test h == 1:3
-    @test collect(t) == 4:10
-end
-let (h, t) = Distributed.head_and_tail(1:10, 0)
-    @test h == []
-    @test collect(t) == 1:10
-end
-let (h, t) = Distributed.head_and_tail(1:3, 5)
-    @test h == 1:3
-    @test collect(t) == []
-end
-let (h, t) = Distributed.head_and_tail(1:3, 3)
-    @test h == 1:3
-    @test collect(t) == []
-end
-let (h, t) = Distributed.head_and_tail(Int[], 3)
-    @test h == []
-    @test collect(t) == []
-end
-let (h, t) = Distributed.head_and_tail(Int[], 0)
-    @test h == []
-    @test collect(t) == []
-end
-
-# issue #35937
-let e = @test_throws RemoteException pmap(1) do _
-            wait(@async error(42))
-        end
-    # check that the inner TaskFailedException is correctly formed & can be printed
-    es = sprint(showerror, e.value)
-    @test contains(es, ":\nTaskFailedException\nStacktrace:\n")
-    @test contains(es, "\n\n    nested task error:")
-    @test contains(es, "\n\n    nested task error: 42\n")
-end
-
-# issue #27429, propagate relative `include` path to workers
-@everywhere include("includefile.jl")
-for p in procs()
-    @test @fetchfrom(p, i27429) == 27429
-end
-
-# Propagation of package environments for local workers (#28781)
-let julia = `$(Base.julia_cmd()) --startup-file=no`; mktempdir() do tmp
-    project = mkdir(joinpath(tmp, "project"))
-    depots = [mkdir(joinpath(tmp, "depot1")), mkdir(joinpath(tmp, "depot2"))]
-    load_path = [mkdir(joinpath(tmp, "load_path")), "@stdlib", "@"]
-    pathsep = Sys.iswindows() ? ";" : ":"
-    env = Dict(
-        "JULIA_DEPOT_PATH" => join(depots, pathsep),
-        "JULIA_LOAD_PATH" => join(load_path, pathsep),
-        # Explicitly propagate `TMPDIR`, in the event that we're running on a
-        # CI system where `TMPDIR` is special.
-        "TMPDIR" => dirname(tmp),
-    )
-    setupcode = """
-    using Distributed, Test
-    @everywhere begin
-        depot_path() = DEPOT_PATH
-        load_path() = LOAD_PATH
-        active_project() = Base.ACTIVE_PROJECT[]
-    end
-    """
-    testcode = setupcode * """
-    for w in workers()
-        @test remotecall_fetch(depot_path, w)          == DEPOT_PATH
-        @test remotecall_fetch(load_path, w)           == LOAD_PATH
-        @test remotecall_fetch(Base.load_path, w)      == Base.load_path()
-        @test remotecall_fetch(active_project, w)      == Base.ACTIVE_PROJECT[]
-        @test remotecall_fetch(Base.active_project, w) == Base.active_project()
-    end
-    """
-    # No active project
-    extracode = """
-    for w in workers()
-        @test remotecall_fetch(active_project, w) === Base.ACTIVE_PROJECT[] === nothing
-    end
-    """
-    cmd = setenv(`$(julia) -p1 -e $(testcode * extracode)`, env)
-    @test success(cmd)
-    # --project
-    extracode = """
-    for w in workers()
-        @test remotecall_fetch(active_project, w) == Base.ACTIVE_PROJECT[] ==
-              $(repr(project))
-    end
-    """
-    cmd = setenv(`$(julia) --project=$(project) -p1 -e $(testcode * extracode)`, env)
-    @test success(cmd)
-    # JULIA_PROJECT
-    cmd = setenv(`$(julia) -p1 -e $(testcode * extracode)`,
-                 (env["JULIA_PROJECT"] = project; env))
-    @test success(cmd)
-    # Pkg.activate(...)
-    activateish = """
-    Base.ACTIVE_PROJECT[] = $(repr(project))
-    using Distributed
-    addprocs(1)
-    """
-    cmd = setenv(`$(julia) -e $(activateish * testcode * extracode)`, env)
-    @test success(cmd)
-    # JULIA_(LOAD|DEPOT)_PATH
-    shufflecode = """
-    d = reverse(DEPOT_PATH)
-    append!(empty!(DEPOT_PATH), d)
-    l = reverse(LOAD_PATH)
-    append!(empty!(LOAD_PATH), l)
-    """
-    addcode = """
-    using Distributed
-    addprocs(1) # after shuffling
-    """
-    extracode = """
-    for w in workers()
-        @test remotecall_fetch(load_path, w) == $(repr(reverse(load_path)))
-        @test remotecall_fetch(depot_path, w) == $(repr(reverse(depots)))
-    end
-    """
-    cmd = setenv(`$(julia) -e $(shufflecode * addcode * testcode * extracode)`, env)
-    @test success(cmd)
-    # Mismatch when shuffling after proc addition
-    failcode = shufflecode * setupcode * """
-    for w in workers()
-        @test remotecall_fetch(load_path, w) == reverse(LOAD_PATH) == $(repr(load_path))
-        @test remotecall_fetch(depot_path, w) == reverse(DEPOT_PATH) == $(repr(depots))
-    end
-    """
-    cmd = setenv(`$(julia) -p1 -e $(failcode)`, env)
-    @test success(cmd)
-    # Passing env or exeflags to addprocs(...) to override defaults
-    envcode = """
-    using Distributed
-    project = mktempdir()
-    env = Dict(
-        "JULIA_LOAD_PATH" => LOAD_PATH[1],
-        "JULIA_DEPOT_PATH" => DEPOT_PATH[1],
-        "TMPDIR" => ENV["TMPDIR"],
-    )
-    addprocs(1; env = env, exeflags = `--project=\$(project)`)
-    env["JULIA_PROJECT"] = project
-    addprocs(1; env = env)
-    """ * setupcode * """
-    for w in workers()
-        @test remotecall_fetch(depot_path, w)          == [DEPOT_PATH[1]]
-        @test remotecall_fetch(load_path, w)           == [LOAD_PATH[1]]
-        @test remotecall_fetch(active_project, w)      == project
-        @test remotecall_fetch(Base.active_project, w) == joinpath(project, "Project.toml")
-    end
-    """
-    cmd = setenv(`$(julia) -e $(envcode)`, env)
-    @test success(cmd)
-end end
-
-include("splitrange.jl")
-
-# Clear all workers for timeout tests (issue #45785)
-rmprocs(workers())
-begin
-    # First, assert that we get no messages when we close a cooperative worker
-    w = only(addprocs(1))
-    @test_nowarn begin
-        wait(rmprocs([w]))
-    end
-
-    # Next, ensure we get a log message when a worker does not cleanly exit
-    w = only(addprocs(1))
-    @test_logs (:warn, r"sending SIGTERM") begin
-        remote_do(w) do
-            # Cause the 'exit()' message that `rmprocs()` sends to do nothing
-            Core.eval(Base, :(exit() = nothing))
-        end
-        wait(rmprocs([w]))
-    end
-end
-
-# Run topology tests last after removing all workers, since a given
-# cluster at any time only supports a single topology.
-rmprocs(workers())
-include("topology.jl")
diff --git a/stdlib/Distributed/test/includefile.jl b/stdlib/Distributed/test/includefile.jl
deleted file mode 100644
index faea6c11aaf6a..0000000000000
--- a/stdlib/Distributed/test/includefile.jl
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# this is used to test that relative include paths work on other processes
-
-i27429 = 27429
diff --git a/stdlib/Distributed/test/managers.jl b/stdlib/Distributed/test/managers.jl
deleted file mode 100644
index 7971222c7511a..0000000000000
--- a/stdlib/Distributed/test/managers.jl
+++ /dev/null
@@ -1,26 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test
-using Distributed
-using Sockets
-using Distributed: parse_machine, SSHManager, LocalManager
-
-@test parse_machine("127.0.0.1") == ("127.0.0.1", nothing)
-@test parse_machine("127.0.0.1:80") == ("127.0.0.1", 80)
-@test parse_machine("[2001:db8::1]") == ("2001:db8::1", nothing)
-@test parse_machine("[2001:db8::1]:443") == ("2001:db8::1", 443)
-
-@test parse_machine("127.0.0.1:90") == ("127.0.0.1", 90)
-@test parse_machine("127.0.0.1:1") == ("127.0.0.1", 1)
-@test parse_machine("127.0.0.1:65535") == ("127.0.0.1", 65535)
-
-@test_throws ArgumentError parse_machine("127.0.0.1:-1")
-@test_throws ArgumentError parse_machine("127.0.0.1:0")
-@test_throws ArgumentError parse_machine("127.0.0.1:65536")
-@test_throws ArgumentError parse_machine("[2001:db8::1]:443:888")
-@test_throws ArgumentError parse_machine("[2001:db8::1")
-@test_throws ArgumentError parse_machine("[2001:db8::1]:aaa")
-
-@test occursin(r"^SSHManager\(machines=.*\)$",
-               sprint((t,x) -> show(t, "text/plain", x), SSHManager("127.0.0.1")))
-@test sprint((t,x) -> show(t, "text/plain", x), LocalManager(1, true)) == "LocalManager()"
diff --git a/stdlib/Distributed/test/runtests.jl b/stdlib/Distributed/test/runtests.jl
deleted file mode 100644
index d34d07cc48a21..0000000000000
--- a/stdlib/Distributed/test/runtests.jl
+++ /dev/null
@@ -1,14 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Run the distributed test outside of the main driver since it needs its own
-# set of dedicated workers.
-include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
-disttestfile = joinpath(@__DIR__, "distributed_exec.jl")
-
-cmd = `$test_exename $test_exeflags $disttestfile`
-
-if !success(pipeline(cmd; stdout=stdout, stderr=stderr)) && ccall(:jl_running_on_valgrind,Cint,()) == 0
-    error("Distributed test failed, cmd : $cmd")
-end
-
-include("managers.jl")
diff --git a/stdlib/Distributed/test/splitrange.jl b/stdlib/Distributed/test/splitrange.jl
deleted file mode 100644
index 9f3c9c92a3ffa..0000000000000
--- a/stdlib/Distributed/test/splitrange.jl
+++ /dev/null
@@ -1,33 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test
-using Distributed
-using Distributed: splitrange
-
-@test splitrange(1, 11, 1) == Array{UnitRange{Int64},1}([1:11])
-@test splitrange(0, 10, 1) == Array{UnitRange{Int64},1}([0:10])
-@test splitrange(-1, 9, 1) == Array{UnitRange{Int64},1}([-1:9])
-
-@test splitrange(1, 11, 2) == Array{UnitRange{Int64},1}([1:6,7:11])
-@test splitrange(0, 10, 2) == Array{UnitRange{Int64},1}([0:5,6:10])
-@test splitrange(-1, 9, 2) == Array{UnitRange{Int64},1}([-1:4,5:9])
-
-@test splitrange(1, 11, 3) == Array{UnitRange{Int64},1}([1:4,5:8,9:11])
-@test splitrange(0, 10, 3) == Array{UnitRange{Int64},1}([0:3,4:7,8:10])
-@test splitrange(-1, 9, 3) == Array{UnitRange{Int64},1}([-1:2,3:6,7:9])
-
-@test splitrange(1, 3, 3) == Array{UnitRange{Int64},1}([1:1,2:2,3:3])
-@test splitrange(1, 3, 4) == Array{UnitRange{Int64},1}([1:1,2:2,3:3])
-@test splitrange(0, 2, 3) == Array{UnitRange{Int64},1}([0:0,1:1,2:2])
-@test splitrange(0, 2, 4) == Array{UnitRange{Int64},1}([0:0,1:1,2:2])
-@test splitrange(-1, 1, 3) == Array{UnitRange{Int64},1}([-1:-1,0:0,1:1])
-@test splitrange(-1, 1, 4) == Array{UnitRange{Int64},1}([-1:-1,0:0,1:1])
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :OffsetArrays) || @eval Main @everywhere include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
-using .Main.OffsetArrays
-
-oa = OffsetArray([123, -345], (-2,))
-@sync @distributed for i in eachindex(oa)
-    @test i ∈ (-1, 0)
-end
diff --git a/stdlib/Distributed/test/topology.jl b/stdlib/Distributed/test/topology.jl
deleted file mode 100644
index 2a659931ed306..0000000000000
--- a/stdlib/Distributed/test/topology.jl
+++ /dev/null
@@ -1,143 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Random
-
-pids = addprocs_with_testenv(4; topology="master_worker")
-
-let p1 = pids[1], p2 = pids[2]
-    @test_throws RemoteException remotecall_fetch(()->remotecall_fetch(myid, p2), p1)
-end
-
-function test_worker_counts()
-    # check if the nprocs/nworkers/workers are the same on the remaining workers
-    np=nprocs()
-    nw=nworkers()
-    ws=sort(workers())
-
-    for p in workers()
-        @test (true, true, true) == remotecall_fetch(p, np, nw, ws) do x,y,z
-            (x==nprocs(), y==nworkers(), z==sort(workers()))
-        end
-    end
-end
-
-function remove_workers_and_test()
-    while nworkers() > 0
-        rmprocs(workers()[1])
-        test_worker_counts()
-        if nworkers() == nprocs()
-            break
-        end
-    end
-end
-
-remove_workers_and_test()
-
-# connect even pids to other even pids, odd to odd.
-mutable struct TopoTestManager <: ClusterManager
-    np::Integer
-end
-
-function launch(manager::TopoTestManager, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-
-    cmd = `$exename $exeflags --bind-to $(Distributed.LPROC.bind_addr) --worker`
-    cmd = pipeline(detach(setenv(cmd, dir=dir)))
-    for i in 1:manager.np
-        io = open(cmd, "r+")
-        Distributed.write_cookie(io)
-
-        wconfig = WorkerConfig()
-        wconfig.process = io
-        wconfig.io = io.out
-        wconfig.ident = i
-        wconfig.connect_idents = Vector(i+2:2:manager.np)
-        push!(launched, wconfig)
-    end
-
-    notify(c)
-end
-
-const map_pid_ident=Dict()
-function manage(manager::TopoTestManager, id::Integer, config::WorkerConfig, op::Symbol)
-    if op == :register
-        map_pid_ident[id] = config.ident
-    elseif op == :interrupt
-        kill(config.process, 2)
-    end
-end
-
-addprocs_with_testenv(TopoTestManager(8); topology="custom")
-
-while true
-    if any(x->get(map_pid_ident, x, 0)==0, workers())
-        yield()
-    else
-        break
-    end
-end
-
-let p1, p2
-for p1 in workers()
-    for p2 in workers()
-        i1 = map_pid_ident[p1]
-        i2 = map_pid_ident[p2]
-        if (iseven(i1) && iseven(i2)) || (isodd(i1) && isodd(i2))
-            @test p2 == remotecall_fetch(p->remotecall_fetch(myid, p), p1, p2)
-        else
-            @test_throws RemoteException remotecall_fetch(p->remotecall_fetch(myid, p), p1, p2)
-        end
-    end
-end
-end
-
-remove_workers_and_test()
-
-# test `lazy` connection setup
-function def_count_conn()
-    @everywhere function count_connected_workers()
-        count(x -> isa(x, Distributed.Worker) && isdefined(x, :r_stream) && isopen(x.r_stream),
-                Distributed.PGRP.workers)
-    end
-end
-
-addprocs_with_testenv(8)
-def_count_conn()
-
-# Test for 10 random combinations
-wl = workers()
-combinations = []
-while length(combinations) < 10
-    from = rand(wl)
-    to = rand(wl)
-    if from == to || ((from,to) in combinations) || ((to,from) in combinations)
-        continue
-    else
-        push!(combinations, (from,to))
-    end
-end
-
-# Initially only master-worker connections ought to be setup
-expected_num_conns = 8
-let num_conns = sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers()))
-    @test num_conns == expected_num_conns
-end
-
-for (i, (from,to)) in enumerate(combinations)
-    remotecall_wait(topid->remotecall_fetch(myid, topid), from, to)
-    global expected_num_conns += 2    # one connection endpoint on both from and to
-    let num_conns = sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers()))
-        @test num_conns == expected_num_conns
-    end
-end
-
-# With lazy=false, all connections ought to be setup during `addprocs`
-rmprocs(workers())
-addprocs_with_testenv(8; lazy=false)
-def_count_conn()
-@test sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers())) == 64
-
-# Cannot add more workers with a different `lazy` value
-@test_throws ArgumentError addprocs_with_testenv(1; lazy=true)
diff --git a/stdlib/Downloads.version b/stdlib/Downloads.version
index 4072369c387da..c5bd4d7a0d473 100644
--- a/stdlib/Downloads.version
+++ b/stdlib/Downloads.version
@@ -1,4 +1,4 @@
 DOWNLOADS_BRANCH = master
-DOWNLOADS_SHA1 = 78255d4927312181542b29ec6f063b0d5880189e
+DOWNLOADS_SHA1 = 8a614d592810b15d17885838dec61da244a12e09
 DOWNLOADS_GIT_URL := https://github.com/JuliaLang/Downloads.jl.git
 DOWNLOADS_TAR_URL = https://api.github.com/repos/JuliaLang/Downloads.jl/tarball/$1
diff --git a/stdlib/FileWatching/Project.toml b/stdlib/FileWatching/Project.toml
index 1da637fd4259d..5edcfdadd085d 100644
--- a/stdlib/FileWatching/Project.toml
+++ b/stdlib/FileWatching/Project.toml
@@ -1,5 +1,6 @@
 name = "FileWatching"
 uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/FileWatching/docs/src/index.md b/stdlib/FileWatching/docs/src/index.md
index 6c332511f578f..a420d49232345 100644
--- a/stdlib/FileWatching/docs/src/index.md
+++ b/stdlib/FileWatching/docs/src/index.md
@@ -20,6 +20,7 @@ A simple utility tool for creating advisory pidfiles (lock files).
 
 ```@docs
 mkpidlock
+trymkpidlock
 close(lock::LockMonitor)
 ```
 
diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl
index e266bff7ec7d1..0e68cd97681ff 100644
--- a/stdlib/FileWatching/src/FileWatching.jl
+++ b/stdlib/FileWatching/src/FileWatching.jl
@@ -18,7 +18,8 @@ export
     PollingFileWatcher,
     FDWatcher,
     # pidfile:
-    mkpidlock
+    mkpidlock,
+    trymkpidlock
 
 import Base: @handle_as, wait, close, eventloop, notify_error, IOError,
     _sizeof_uv_poll, _sizeof_uv_fs_poll, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError,
@@ -215,7 +216,7 @@ mutable struct _FDWatcher
                 t.refcount = (0, 0)
                 t.active = (false, false)
                 @static if Sys.isunix()
-                    if FDWatchers[t.fdnum] == t
+                    if FDWatchers[t.fdnum] === t
                         FDWatchers[t.fdnum] = nothing
                     end
                 end
@@ -462,6 +463,11 @@ function __init__()
     global uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Ptr{Cvoid}))
     global uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
     global uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
+
+    Base.mkpidlock_hook = mkpidlock
+    Base.trymkpidlock_hook = trymkpidlock
+    Base.parse_pidfile_hook = Pidfile.parse_pidfile
+
     nothing
 end
 
@@ -721,7 +727,7 @@ function poll_fd(s::Union{RawFD, Sys.iswindows() ? WindowsRawSocket : Union{}},
                     end
                 end
             catch ex
-                ex isa EOFError() || rethrow()
+                ex isa EOFError || rethrow()
                 return FDEvent()
             end
         else
@@ -744,9 +750,11 @@ end
     watch_file(path::AbstractString, timeout_s::Real=-1)
 
 Watch file or directory `path` for changes until a change occurs or `timeout_s` seconds have
-elapsed.
+elapsed. This function does not poll the file system and instead uses platform-specific
+functionality to receive notifications from the operating system (e.g. via inotify on Linux).
+See the NodeJS documentation linked below for details.
 
-The returned value is an object with boolean fields `changed`, `renamed`, and `timedout`,
+The returned value is an object with boolean fields `renamed`, `changed`, and `timedout`,
 giving the result of watching the file.
 
 This behavior of this function varies slightly across platforms. See
@@ -773,13 +781,15 @@ watch_file(s::AbstractString, timeout_s::Real=-1) = watch_file(String(s), Float6
     watch_folder(path::AbstractString, timeout_s::Real=-1)
 
 Watches a file or directory `path` for changes until a change has occurred or `timeout_s`
-seconds have elapsed.
+seconds have elapsed. This function does not poll the file system and instead uses platform-specific
+functionality to receive notifications from the operating system (e.g. via inotify on Linux).
+See the NodeJS documentation linked below for details.
 
 This will continuing tracking changes for `path` in the background until
 `unwatch_folder` is called on the same `path`.
 
 The returned value is an pair where the first field is the name of the changed file (if available)
-and the second field is an object with boolean fields `changed`, `renamed`, and `timedout`,
+and the second field is an object with boolean fields `renamed`, `changed`, and `timedout`,
 giving the event.
 
 This behavior of this function varies slightly across platforms. See
@@ -881,6 +891,6 @@ function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::R
 end
 
 include("pidfile.jl")
-import .Pidfile: mkpidlock
+import .Pidfile: mkpidlock, trymkpidlock
 
 end
diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl
index 8416765a57b97..71b596ba35f12 100644
--- a/stdlib/FileWatching/src/pidfile.jl
+++ b/stdlib/FileWatching/src/pidfile.jl
@@ -1,7 +1,7 @@
 module Pidfile
 
 
-export mkpidlock
+export mkpidlock, trymkpidlock
 
 using Base:
     IOError, UV_EEXIST, UV_ESRCH,
@@ -17,7 +17,8 @@ using ..FileWatching: watch_file
 using Base.Sys: iswindows
 
 """
-    mkpidlock([f::Function], at::String, [pid::Cint, proc::Process]; kwopts...)
+    mkpidlock([f::Function], at::String, [pid::Cint]; kwopts...)
+    mkpidlock(at::String, proc::Process; kwopts...)
 
 Create a pidfile lock for the path "at" for the current process
 or the process identified by pid or proc. Can take a function to execute once locked,
@@ -31,8 +32,9 @@ your program, so the `finalizer` does not reclaim it early.
 Optional keyword arguments:
  - `mode`: file access mode (modified by the process umask). Defaults to world-readable.
  - `poll_interval`: Specify the maximum time to between attempts (if `watch_file` doesn't work)
- - `stale_age`: Delete an existing pidfile (ignoring the lock) if its mtime is older than this.
-     The file won't be deleted until 25x longer than this if the pid in the file appears that it may be valid.
+ - `stale_age`: Delete an existing pidfile (ignoring the lock) if it is older than this many seconds, based on its mtime.
+     The file won't be deleted until 5x longer than this if the pid in the file appears that it may be valid.
+     Or 25x longer if `refresh` is overridden to 0 to disable lock refreshing.
      By default this is disabled (`stale_age` = 0), but a typical recommended value would be about 3-5x an
      estimated normal completion time.
  - `refresh`: Keeps a lock from becoming stale by updating the mtime every interval of time that passes.
@@ -41,6 +43,16 @@ Optional keyword arguments:
 """
 function mkpidlock end
 
+"""
+    trymkpidlock([f::Function], at::String, [pid::Cint]; kwopts...)
+    trymkpidlock(at::String, proc::Process; kwopts...)
+
+Like `mkpidlock` except returns `false` instead of waiting if the file is already locked.
+
+!!! compat "Julia 1.10"
+    This function requires at least Julia 1.10.
+"""
+function trymkpidlock end
 
 # mutable only because we want to add a finalizer
 mutable struct LockMonitor
@@ -53,7 +65,7 @@ mutable struct LockMonitor
         atdir, atname = splitdir(at)
         isempty(atdir) && (atdir = pwd())
         at = realpath(atdir) * path_separator * atname
-        fd = open_exclusive(at; stale_age=stale_age, kwopts...)
+        fd = open_exclusive(at; stale_age, refresh, kwopts...)
         update = nothing
         try
             write_pidfile(fd, pid)
@@ -65,6 +77,7 @@ mutable struct LockMonitor
             lock = new(at, fd, update)
             finalizer(close, lock)
         catch ex
+            update === nothing || close(update)
             tryrmopenfile(at)
             close(fd)
             rethrow(ex)
@@ -88,13 +101,28 @@ end
 function mkpidlock(at::String, proc::Process; kwopts...)
     lock = mkpidlock(at, getpid(proc); kwopts...)
     closer = @async begin
-        wait(proc)
-        close(lock)
+        try
+            wait(proc)
+        finally
+            close(lock)
+        end
     end
-    isdefined(Base, :errormonitor) && Base.errormonitor(closer)
+    Base.errormonitor(closer)
     return lock
 end
 
+function trymkpidlock(args...; kwargs...)
+    try
+        mkpidlock(args...; kwargs..., wait=false)
+    catch ex
+        if ex isa PidlockedError
+            return false
+        else
+            rethrow()
+        end
+    end
+end
+
 """
     Base.touch(::Pidfile.LockMonitor)
 
@@ -162,15 +190,16 @@ function isvalidpid(hostname::AbstractString, pid::Cuint)
 end
 
 """
-    stale_pidfile(path::String, stale_age::Real) :: Bool
+    stale_pidfile(path::String, stale_age::Real, refresh::Real) :: Bool
 
 Helper function for `open_exclusive` for deciding if a pidfile is stale.
 """
-function stale_pidfile(path::String, stale_age::Real)
+function stale_pidfile(path::String, stale_age::Real, refresh::Real)
     pid, hostname, age = parse_pidfile(path)
     age < -stale_age && @warn "filesystem time skew detected" path=path
+    longer_factor = refresh == 0 ? 25 : 5
     if age > stale_age
-        if (age > stale_age * 25) || !isvalidpid(hostname, pid)
+        if (age > stale_age * longer_factor) || !isvalidpid(hostname, pid)
             return true
         end
     end
@@ -192,8 +221,12 @@ function tryopen_exclusive(path::String, mode::Integer = 0o444)
     return nothing
 end
 
+struct PidlockedError <: Exception
+    msg::AbstractString
+end
+
 """
-    open_exclusive(path::String; mode, poll_interval, stale_age) :: File
+    open_exclusive(path::String; mode, poll_interval, wait, stale_age, refresh) :: File
 
 Create a new a file for read-write advisory-exclusive access.
 If `wait` is `false` then error out if the lock files exist
@@ -205,20 +238,21 @@ function open_exclusive(path::String;
                         mode::Integer = 0o444 #= read-only =#,
                         poll_interval::Real = 10 #= seconds =#,
                         wait::Bool = true #= return on failure if false =#,
-                        stale_age::Real = 0 #= disabled =#)
+                        stale_age::Real = 0 #= disabled =#,
+                        refresh::Real = stale_age/2)
     # fast-path: just try to open it
     file = tryopen_exclusive(path, mode)
     file === nothing || return file
     if !wait
         if file === nothing && stale_age > 0
-            if stale_age > 0 && stale_pidfile(path, stale_age)
+            if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
                 @warn "attempting to remove probably stale pidfile" path=path
                 tryrmopenfile(path)
             end
             file = tryopen_exclusive(path, mode)
         end
         if file === nothing
-            error("Failed to get pidfile lock for $(repr(path)).")
+            throw(PidlockedError("Failed to get pidfile lock for $(repr(path))."))
         else
             return file
         end
@@ -237,7 +271,7 @@ function open_exclusive(path::String;
         file = tryopen_exclusive(path, mode)
         file === nothing || return file
         Base.wait(t) # sleep for a bit before trying again
-        if stale_age > 0 && stale_pidfile(path, stale_age)
+        if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
             # if the file seems stale, try to remove it before attempting again
             # set stale_age to zero so we won't attempt again, even if the attempt fails
             stale_age -= stale_age
diff --git a/stdlib/FileWatching/test/pidfile.jl b/stdlib/FileWatching/test/pidfile.jl
index febc082518edf..3464a24175632 100644
--- a/stdlib/FileWatching/test/pidfile.jl
+++ b/stdlib/FileWatching/test/pidfile.jl
@@ -109,7 +109,7 @@ end
         rm("pidfile")
         deleted = true
     end
-    isdefined(Base, :errormonitor) && Base.errormonitor(rmtask)
+    Base.errormonitor(rmtask)
     @test isfile("pidfile")
     @test !deleted
 
@@ -146,7 +146,7 @@ end
         rm("pidfile")
         deleted = true
     end
-    isdefined(Base, :errormonitor) && Base.errormonitor(rmtask)
+    Base.errormonitor(rmtask)
     @test isfile("pidfile")
     @test !deleted
     # open the pidfile again (should wait for it to disappear first)
@@ -177,17 +177,17 @@ end
             @test Pidfile.tryrmopenfile("pidfile")
             deleted = true
         end
-        isdefined(Base, :errormonitor) && Base.errormonitor(rmtask)
+        Base.errormonitor(rmtask)
 
         t1 = time()
-        @test_throws ErrorException open_exclusive("pidfile", wait=false)
+        @test_throws Pidfile.PidlockedError open_exclusive("pidfile", wait=false)
         @test time()-t1 ≈ 0 atol=1
 
         sleep(1)
         @test !deleted
 
         t1 = time()
-        @test_throws ErrorException open_exclusive("pidfile", wait=false)
+        @test_throws Pidfile.PidlockedError open_exclusive("pidfile", wait=false)
         @test time()-t1 ≈ 0 atol=1
 
         wait(rmtask)
@@ -203,18 +203,33 @@ end
 
 @assert !ispath("pidfile")
 @testset "open_exclusive: break lock" begin
-    # test for stale_age
-    t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
-    try
-        write_pidfile(f, getpid())
-    finally
+    @testset "using stale_age without lock refreshing" begin
+        t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10, refresh=0)::File
+        try
+            write_pidfile(f, getpid())
+        finally
+            close(f)
+        end
+        @test t < 2
+        t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1, refresh=0)::File
         close(f)
+        @test 20 < t < 50
+        rm("pidfile")
+    end
+
+    @testset "using stale_age with lock refreshing on (default)" begin
+        t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
+        try
+            write_pidfile(f, getpid())
+        finally
+            close(f)
+        end
+        @test t < 2
+        t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=5)::File
+        close(f)
+        @test 20 < t < 50
+        rm("pidfile")
     end
-    @test t < 2
-    t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1)::File
-    close(f)
-    @test 20 < t < 50
-    rm("pidfile")
 
     t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
     close(f)
@@ -243,10 +258,10 @@ end
             return close(lockf)
         end
     end
-    isdefined(Base, :errormonitor) && Base.errormonitor(waittask)
+    Base.errormonitor(waittask)
 
     # mkpidlock with no waiting
-    t = @elapsed @test_throws ErrorException mkpidlock("pidfile", wait=false)
+    t = @elapsed @test_throws Pidfile.PidlockedError mkpidlock("pidfile", wait=false)
     @test t ≈ 0 atol=1
 
     t = @elapsed lockf1 = mkpidlock(joinpath(dir, "pidfile"))
@@ -272,8 +287,14 @@ end
 
     # Just for coverage's sake, run a test with do-block syntax
     lock_times = Float64[]
+    synchronizer = Base.Event()
+    synchronizer2 = Base.Event()
     t_loop = @async begin
         for idx in 1:100
+            if idx == 1
+                wait(synchronizer)
+                notify(synchronizer2)
+            end
             t = @elapsed mkpidlock("do_block_pidfile") do
                 # nothing
             end
@@ -281,12 +302,14 @@ end
             push!(lock_times, t)
         end
     end
-    isdefined(Base, :errormonitor) && Base.errormonitor(t_loop)
+    Base.errormonitor(t_loop)
     mkpidlock("do_block_pidfile") do
+        notify(synchronizer)
+        wait(synchronizer2)
         sleep(3)
     end
     wait(t_loop)
-    @test maximum(lock_times) > 2
+    @test lock_times[1] >= 3
     @test minimum(lock_times) < 1
 end
 
@@ -346,7 +369,7 @@ end
     @test lockf.update === nothing
 
     sleep(1)
-    t = @elapsed @test_throws ErrorException mkpidlock("pidfile-2", wait=false, stale_age=1, poll_interval=1, refresh=0)
+    t = @elapsed @test_throws Pidfile.PidlockedError mkpidlock("pidfile-2", wait=false, stale_age=1, poll_interval=1, refresh=0)
     @test t ≈ 0 atol=1
 
     sleep(5)
diff --git a/stdlib/FileWatching/test/runtests.jl b/stdlib/FileWatching/test/runtests.jl
index 419ae48dd0a75..75b17b5f0e511 100644
--- a/stdlib/FileWatching/test/runtests.jl
+++ b/stdlib/FileWatching/test/runtests.jl
@@ -15,8 +15,8 @@ using Base: uv_error, Experimental
 
 n = 20
 intvls = [2, .2, .1, .005, .00001]
-
 pipe_fds = fill((Base.INVALID_OS_HANDLE, Base.INVALID_OS_HANDLE), n)
+
 for i in 1:n
     if Sys.iswindows() || i > n ÷ 2
         uv_error("socketpair", ccall(:uv_socketpair, Cint, (Cint, Cint, Ptr{NTuple{2, Base.OS_HANDLE}}, Cint, Cint), 1, (Sys.iswindows() ? 6 : 0), Ref(pipe_fds, i), 0, 0))
@@ -32,7 +32,9 @@ for i in 1:n
     if !fd_in_limits && Sys.islinux()
         run(`ls -la /proc/$(getpid())/fd`)
     end
-    @test fd_in_limits
+    if !Sys.isapple()
+        @test fd_in_limits
+    end
 end
 
 function pfd_tst_reads(idx, intvl)
@@ -72,7 +74,7 @@ end
 
 # Odd numbers trigger reads, even numbers timeout
 for (i, intvl) in enumerate(intvls)
-    @Experimental.sync begin
+    Experimental.@sync begin
         global ready = 0
         global ready_c = Condition()
         for idx in 1:n
@@ -183,16 +185,19 @@ function test_init_afile()
     @test(watch_folder(dir) == (F_PATH => FileWatching.FileEvent(FileWatching.UV_RENAME)))
     @test close(open(file, "w")) === nothing
     sleep(3)
-    let c
-        c = watch_folder(dir, 0)
-        if F_GETPATH
-            @test c.first == F_PATH
-            @test c.second.changed ⊻ c.second.renamed
-            @test !c.second.timedout
-        else # we don't expect to be able to detect file changes in this case
-            @test c.first == ""
-            @test !c.second.changed && !c.second.renamed
-            @test c.second.timedout
+    if !Sys.isapple()
+        let c
+            c = watch_folder(dir, 0)
+
+            if F_GETPATH
+                @test c.first == F_PATH
+                @test c.second.changed ⊻ c.second.renamed
+                @test !c.second.timedout
+            else # we don't expect to be able to detect file changes in this case
+                @test c.first == ""
+                @test !c.second.changed && !c.second.renamed
+                @test c.second.timedout
+            end
         end
     end
     @test unwatch_folder(dir) === nothing
@@ -324,8 +329,10 @@ function test_dirmonitor_wait2(tval)
                     fname, events = wait(fm)
                 end
                 for i = 1:3
-                    @test fname == "$F_PATH$i"
-                    @test !events.changed && !events.timedout && events.renamed
+                    @testset let (fname, events) = (fname, events)
+                        @test fname == "$F_PATH$i"
+                        @test !events.changed && !events.timedout && events.renamed
+                    end
                     i == 3 && break
                     fname, events = wait(fm)
                 end
@@ -367,60 +374,62 @@ test_monitor_wait_poll()
 test_monitor_wait_poll()
 test_watch_file_timeout(0.2)
 test_watch_file_change(6)
-test_dirmonitor_wait2(0.2)
-test_dirmonitor_wait2(0.2)
 
-mv(file, file * "~")
-mv(file * "~", file)
-let changes = []
-    while true
-        let c
-            Sys.iswindows() && sleep(0.1)
-            @test @elapsed(c = watch_folder(dir, 0.0)) < 0.5
-            push!(changes, c)
-            (c.second::FileWatching.FileEvent).timedout && break
-        end
-    end
-    if F_GETPATH
-        @test 12 < length(changes) < 48
-    else
-        @test 5 < length(changes) < 16
-    end
-    @test pop!(changes) == ("" => FileWatching.FileEvent())
-    if F_GETPATH
-        Sys.iswindows() && @test pop!(changes) == (F_PATH => FileWatching.FileEvent(FileWatching.UV_CHANGE))
-        p = pop!(changes)
-        if !Sys.isapple()
-            @test p == (F_PATH => FileWatching.FileEvent(FileWatching.UV_RENAME))
-        end
-        while changes[end][1] == F_PATH
-            @test pop!(changes)[2] == FileWatching.FileEvent(FileWatching.UV_RENAME)
-        end
-        p = pop!(changes)
-        if !Sys.isapple()
-            @test p == (F_PATH * "~" => FileWatching.FileEvent(FileWatching.UV_RENAME))
-        end
-        while changes[end][1] == F_PATH * "~"
-            @test pop!(changes)[2] == FileWatching.FileEvent(FileWatching.UV_RENAME)
+if !Sys.isapple()
+    test_dirmonitor_wait2(0.2)
+    test_dirmonitor_wait2(0.2)
+
+    mv(file, file * "~")
+    mv(file * "~", file)
+    let changes = []
+        while true
+            let c
+                Sys.iswindows() && sleep(0.1)
+                @test @elapsed(c = watch_folder(dir, 0.0)) < 0.5
+                push!(changes, c)
+                (c.second::FileWatching.FileEvent).timedout && break
+            end
         end
-        if changes[end][1] == F_PATH
-            @test pop!(changes)[2] == FileWatching.FileEvent(FileWatching.UV_RENAME)
+        if F_GETPATH
+            @test 12 < length(changes) < 48
+        else
+            @test 5 < length(changes) < 16
         end
-        for j = 1:4
-            for i = 3:-1:1
-                while changes[end - 1][1] == "$F_PATH$i"
-                    @test let x = pop!(changes)[2]; x.changed ⊻ x.renamed; end
-                end
-                p = pop!(changes)
-                if !Sys.isapple()
-                    @test p == ("$F_PATH$i" => FileWatching.FileEvent(FileWatching.UV_RENAME))
+        @test pop!(changes) == ("" => FileWatching.FileEvent())
+        if F_GETPATH
+            Sys.iswindows() && @test pop!(changes) == (F_PATH => FileWatching.FileEvent(FileWatching.UV_CHANGE))
+            p = pop!(changes)
+            if !Sys.isapple()
+                @test p == (F_PATH => FileWatching.FileEvent(FileWatching.UV_RENAME))
+            end
+            while changes[end][1] == F_PATH
+                @test pop!(changes)[2] == FileWatching.FileEvent(FileWatching.UV_RENAME)
+            end
+            p = pop!(changes)
+            if !Sys.isapple()
+                @test p == (F_PATH * "~" => FileWatching.FileEvent(FileWatching.UV_RENAME))
+            end
+            while changes[end][1] == F_PATH * "~"
+                @test pop!(changes)[2] == FileWatching.FileEvent(FileWatching.UV_RENAME)
+            end
+            if changes[end][1] == F_PATH
+                @test pop!(changes)[2] == FileWatching.FileEvent(FileWatching.UV_RENAME)
+            end
+            for j = 1:4
+                for i = 3:-1:1
+                    while changes[end - 1][1] == "$F_PATH$i"
+                        @test let x = pop!(changes)[2]; x.changed ⊻ x.renamed; end
+                    end
+                    p = pop!(changes)
+                    if !Sys.isapple()
+                        @test p == ("$F_PATH$i" => FileWatching.FileEvent(FileWatching.UV_RENAME))
+                    end
                 end
             end
         end
+        @test all(x -> (isa(x, Pair) && x[1] == F_PATH && (x[2].changed ⊻ x[2].renamed)), changes) || changes
     end
-    @test all(x -> (isa(x, Pair) && x[1] == F_PATH && (x[2].changed ⊻ x[2].renamed)), changes) || changes
 end
-
 @test_throws(Base._UVError("FileMonitor (start)", Base.UV_ENOENT),
              watch_file("____nonexistent_file", 10))
 @test_throws(Base._UVError("FolderMonitor (start)", Base.UV_ENOENT),
diff --git a/stdlib/Future/Project.toml b/stdlib/Future/Project.toml
index ffdbaf94b9853..c09489812ce01 100644
--- a/stdlib/Future/Project.toml
+++ b/stdlib/Future/Project.toml
@@ -1,5 +1,6 @@
 name = "Future"
 uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+version = "1.11.0"
 
 [deps]
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/stdlib/GMP_jll/Project.toml b/stdlib/GMP_jll/Project.toml
index 510b6f6a49c60..9f3b917257bfa 100644
--- a/stdlib/GMP_jll/Project.toml
+++ b/stdlib/GMP_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "GMP_jll"
 uuid = "781609d7-10c4-51f6-84f2-b8444358ff6d"
-version = "6.2.1+2"
+version = "6.2.1+6"
 
 [deps]
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
diff --git a/stdlib/GMP_jll/src/GMP_jll.jl b/stdlib/GMP_jll/src/GMP_jll.jl
index 90daa24b150ed..fde2fc15acf90 100644
--- a/stdlib/GMP_jll/src/GMP_jll.jl
+++ b/stdlib/GMP_jll/src/GMP_jll.jl
@@ -13,11 +13,11 @@ export libgmp, libgmpxx
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libgmp_handle = C_NULL
-libgmp_path = ""
-libgmpxx_handle = C_NULL
-libgmpxx_path = ""
+artifact_dir::String = ""
+libgmp_handle::Ptr{Cvoid} = C_NULL
+libgmp_path::String = ""
+libgmpxx_handle::Ptr{Cvoid} = C_NULL
+libgmpxx_path::String = ""
 
 if Sys.iswindows()
     const libgmp = "libgmp-10.dll"
diff --git a/stdlib/InteractiveUtils/Project.toml b/stdlib/InteractiveUtils/Project.toml
index e13902375e005..53cc9218eff5d 100644
--- a/stdlib/InteractiveUtils/Project.toml
+++ b/stdlib/InteractiveUtils/Project.toml
@@ -1,5 +1,6 @@
 name = "InteractiveUtils"
 uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+version = "1.11.0"
 
 [deps]
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
diff --git a/stdlib/InteractiveUtils/docs/src/index.md b/stdlib/InteractiveUtils/docs/src/index.md
index 9ad4b5a7cea80..5ee8e57adc848 100644
--- a/stdlib/InteractiveUtils/docs/src/index.md
+++ b/stdlib/InteractiveUtils/docs/src/index.md
@@ -1,6 +1,6 @@
 # [Interactive Utilities](@id man-interactive-utils)
 
-This module is intended for interactive work. It is loaded automaticaly in [interactive mode](@ref command-line-options).
+This module is intended for interactive work. It is loaded automatically in [interactive mode](@ref command-line-interface).
 
 ```@docs
 InteractiveUtils.apropos
diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
index ad295345fabfd..629cf88cd8ce6 100644
--- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl
+++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
@@ -11,8 +11,8 @@ export apropos, edit, less, code_warntype, code_llvm, code_native, methodswith,
 import Base.Docs.apropos
 
 using Base: unwrap_unionall, rewrap_unionall, isdeprecated, Bottom, show_unquoted, summarysize,
-    to_tuple_type, signature_type, format_bytes
-
+    signature_type, format_bytes
+using Base.Libc
 using Markdown
 
 include("editless.jl")
@@ -21,20 +21,23 @@ include("macros.jl")
 include("clipboard.jl")
 
 """
-    varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, minsize::Int = 0)
+    varinfo(m::Module=Main, pattern::Regex=r""; all=false, imported=false, recursive=false, sortby::Symbol=:name, minsize::Int=0)
 
-Return a markdown table giving information about exported global variables in a module, optionally restricted
+Return a markdown table giving information about public global variables in a module, optionally restricted
 to those matching `pattern`.
 
 The memory consumption estimate is an approximate lower bound on the size of the internal structure of the object.
 
-- `all` : also list non-exported objects defined in the module, deprecated objects, and compiler-generated objects.
+- `all` : also list non-public objects defined in the module, deprecated objects, and compiler-generated objects.
 - `imported` : also list objects explicitly imported from other modules.
 - `recursive` : recursively include objects in sub-modules, observing the same settings in each.
 - `sortby` : the column to sort results by. Options are `:name` (default), `:size`, and `:summary`.
 - `minsize` : only includes objects with size at least `minsize` bytes. Defaults to `0`.
+
+The output of `varinfo` is intended for display purposes only.  See also [`names`](@ref) to get an array of symbols defined in
+a module, which is suitable for more general manipulations.
 """
-function varinfo(m::Module=Base.active_module(), pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, recursive::Bool = false, minsize::Int=0)
+function varinfo(m::Module=Base.active_module(), pattern::Regex=r""; all::Bool = false, imported::Bool = false, recursive::Bool = false, sortby::Symbol = :name, minsize::Int=0)
     sortby in (:name, :size, :summary) || throw(ArgumentError("Unrecognized `sortby` value `:$sortby`. Possible options are `:name`, `:size`, and `:summary`"))
     rows = Vector{Any}[]
     workqueue = [(m, ""),]
@@ -60,11 +63,11 @@ function varinfo(m::Module=Base.active_module(), pattern::Regex=r""; all::Bool =
             end
         end
     end
-    let (col, rev) = if sortby == :name
+    let (col, rev) = if sortby === :name
             1, false
-        elseif sortby == :size
+        elseif sortby === :size
             4, true
-        elseif sortby == :summary
+        elseif sortby === :summary
             3, false
         else
             @assert "unreachable"
@@ -96,8 +99,25 @@ function versioninfo(io::IO=stdout; verbose::Bool=false)
     if !isempty(Base.GIT_VERSION_INFO.commit_short)
         println(io, "Commit $(Base.GIT_VERSION_INFO.commit_short) ($(Base.GIT_VERSION_INFO.date_string))")
     end
-    if ccall(:jl_is_debugbuild, Cint, ())!=0
-        println(io, "DEBUG build")
+    official_release = Base.TAGGED_RELEASE_BANNER == "Official https://julialang.org/ release"
+    if Base.isdebugbuild() || !isempty(Base.TAGGED_RELEASE_BANNER) || (Base.GIT_VERSION_INFO.tagged_commit && !official_release)
+        println(io, "Build Info:")
+        if Base.isdebugbuild()
+            println(io, "  DEBUG build")
+        end
+        if !isempty(Base.TAGGED_RELEASE_BANNER)
+            println(io, "  ", Base.TAGGED_RELEASE_BANNER)
+        end
+        if Base.GIT_VERSION_INFO.tagged_commit && !official_release
+            println(io,
+                """
+
+                    Note: This is an unofficial build, please report bugs to the project
+                    responsible for this build and not to the Julia project unless you can
+                    reproduce the issue using official builds available at https://julialang.org/downloads
+                """
+            )
+        end
     end
     println(io, "Platform Info:")
     println(io, "  OS: ", Sys.iswindows() ? "Windows" : Sys.isapple() ?
@@ -139,9 +159,8 @@ function versioninfo(io::IO=stdout; verbose::Bool=false)
         println(io)
     end
     println(io, "  WORD_SIZE: ", Sys.WORD_SIZE)
-    println(io, "  LIBM: ",Base.libm_name)
     println(io, "  LLVM: libLLVM-",Base.libllvm_version," (", Sys.JIT, ", ", Sys.CPU_NAME, ")")
-    println(io, "  Threads: ", Threads.nthreads(), " on ", Sys.CPU_THREADS, " virtual cores")
+    println(io, "  Threads: ", Threads.maxthreadid(), " on ", Sys.CPU_THREADS, " virtual cores")
 
     function is_nonverbose_env(k::String)
         return occursin(r"^JULIA_|^DYLD_|^LD_", k)
@@ -182,8 +201,10 @@ The optional second argument restricts the search to a particular module or func
 
 If keyword `supertypes` is `true`, also return arguments with a parent type of `typ`,
 excluding type `Any`.
+
+See also: [`methods`](@ref).
 """
-function methodswith(t::Type, f::Base.Callable, meths = Method[]; supertypes::Bool=false)
+function methodswith(@nospecialize(t::Type), @nospecialize(f::Base.Callable), meths = Method[]; supertypes::Bool=false)
     for d in methods(f)
         if any(function (x)
                    let x = rewrap_unionall(x, d.sig)
@@ -200,7 +221,7 @@ function methodswith(t::Type, f::Base.Callable, meths = Method[]; supertypes::Bo
     return meths
 end
 
-function _methodswith(t::Type, m::Module, supertypes::Bool)
+function _methodswith(@nospecialize(t::Type), m::Module, supertypes::Bool)
     meths = Method[]
     for nm in names(m)
         if isdefined(m, nm)
@@ -213,9 +234,9 @@ function _methodswith(t::Type, m::Module, supertypes::Bool)
     return unique(meths)
 end
 
-methodswith(t::Type, m::Module; supertypes::Bool=false) = _methodswith(t, m, supertypes)
+methodswith(@nospecialize(t::Type), m::Module; supertypes::Bool=false) = _methodswith(t, m, supertypes)
 
-function methodswith(t::Type; supertypes::Bool=false)
+function methodswith(@nospecialize(t::Type); supertypes::Bool=false)
     meths = Method[]
     for mod in Base.loaded_modules_array()
         append!(meths, _methodswith(t, mod, supertypes))
@@ -298,7 +319,7 @@ end
 # TODO: @deprecate peakflops to LinearAlgebra
 export peakflops
 """
-    peakflops(n::Integer=2000; parallel::Bool=false)
+    peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
 
 `peakflops` computes the peak flop rate of the computer by using double precision
 [`gemm!`](@ref LinearAlgebra.BLAS.gemm!). For more information see
@@ -308,12 +329,12 @@ export peakflops
     This function will be moved from `InteractiveUtils` to `LinearAlgebra` in the
     future. In Julia 1.1 and later it is available as `LinearAlgebra.peakflops`.
 """
-function peakflops(n::Integer=2000; parallel::Bool=false)
-    # Base.depwarn("`peakflop`s have moved to the LinearAlgebra module, " *
+function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
+    # Base.depwarn("`peakflops` has moved to the LinearAlgebra module, " *
     #              "add `using LinearAlgebra` to your imports.", :peakflops)
     let LinearAlgebra = Base.require(Base.PkgId(
             Base.UUID((0x37e2e46d_f89d_539d,0xb4ee_838fcccc9c8e)), "LinearAlgebra"))
-        return LinearAlgebra.peakflops(n; parallel = parallel)
+        return LinearAlgebra.peakflops(n, eltype=eltype, ntrials=ntrials, parallel=parallel)
     end
 end
 
@@ -333,7 +354,8 @@ function report_bug(kind)
                 push!(empty!(LOAD_PATH), joinpath(tmp, "Project.toml"))
                 old_active_project = Base.ACTIVE_PROJECT[]
                 Base.ACTIVE_PROJECT[] = nothing
-                Pkg.add(Pkg.PackageSpec(BugReportingId.name, BugReportingId.uuid))
+                pkgspec = @invokelatest Pkg.PackageSpec(BugReportingId.name, BugReportingId.uuid)
+                @invokelatest Pkg.add(pkgspec)
                 BugReporting = Base.require(BugReportingId)
                 append!(empty!(LOAD_PATH), old_load_path)
                 Base.ACTIVE_PROJECT[] = old_active_project
@@ -342,7 +364,7 @@ function report_bug(kind)
     else
         BugReporting = Base.require(BugReportingId)
     end
-    return Base.invokelatest(BugReporting.make_interactive_report, kind, ARGS)
+    return @invokelatest BugReporting.make_interactive_report(kind, ARGS)
 end
 
 end
diff --git a/stdlib/InteractiveUtils/src/clipboard.jl b/stdlib/InteractiveUtils/src/clipboard.jl
index 7bc718b91b2bd..6bcd61584a2b8 100644
--- a/stdlib/InteractiveUtils/src/clipboard.jl
+++ b/stdlib/InteractiveUtils/src/clipboard.jl
@@ -51,7 +51,7 @@ elseif Sys.islinux() || Sys.KERNEL === :FreeBSD
         _clipboardcmd !== nothing && return _clipboardcmd
         for cmd in (:xclip, :xsel, :wlclipboard)
             # wl-clipboard ships wl-copy/paste individually
-            c = cmd == :wlclipboard ? Symbol("wl-copy") : cmd
+            c = cmd === :wlclipboard ? Symbol("wl-copy") : cmd
             success(pipeline(`which $c`, devnull)) && return _clipboardcmd = cmd
         end
         pkgs = @static if Sys.KERNEL === :FreeBSD
@@ -83,14 +83,14 @@ elseif Sys.iswindows()
         x_u16 = Base.cwstring(x)
         pdata = Ptr{UInt16}(C_NULL)
         function cleanup(cause)
-            errno = cause == :success ? UInt32(0) : Libc.GetLastError()
+            errno = cause === :success ? UInt32(0) : Libc.GetLastError()
             if cause !== :OpenClipboard
                 if cause !== :success && pdata != C_NULL
                     ccall((:GlobalFree, "kernel32"), stdcall, Cint, (Ptr{UInt16},), pdata)
                 end
                 ccall((:CloseClipboard, "user32"), stdcall, Cint, ()) == 0 && Base.windowserror(:CloseClipboard) # this should never fail
             end
-            cause == :success || Base.windowserror(cause, errno)
+            cause === :success || Base.windowserror(cause, errno)
             nothing
         end
         ccall((:OpenClipboard, "user32"), stdcall, Cint, (Ptr{Cvoid},), C_NULL) == 0 && return Base.windowserror(:OpenClipboard)
@@ -100,28 +100,28 @@ elseif Sys.iswindows()
         pdata == C_NULL && return cleanup(:GlobalAlloc)
         plock = ccall((:GlobalLock, "kernel32"), stdcall, Ptr{UInt16}, (Ptr{UInt16},), pdata)
         plock == C_NULL && return cleanup(:GlobalLock)
-        ccall(:memcpy, Ptr{UInt16}, (Ptr{UInt16}, Ptr{UInt16}, Csize_t), plock, x_u16, sizeof(x_u16))
+        GC.@preserve x_u16 memcpy(plock, Base.unsafe_convert(Ptr{UInt16}, Base.cconvert(Ptr{UInt16}, x_u16)), sizeof(x_u16))
         unlock = ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{UInt16},), pdata)
         (unlock == 0 && Libc.GetLastError() == 0) || return cleanup(:GlobalUnlock) # this should never fail
-        pset = ccall((:SetClipboardData, "user32"), stdcall, Ptr{UInt16}, (Cuint, Ptr{UInt16}), 13, pdata)
+        pset = ccall((:SetClipboardData, "user32"), stdcall, Ptr{UInt16}, (Cuint, Ptr{UInt16}), 13, pdata) # CF_UNICODETEXT
         pdata != pset && return cleanup(:SetClipboardData)
         cleanup(:success)
     end
     clipboard(x) = clipboard(sprint(print, x)::String)
     function clipboard()
         function cleanup(cause)
-            errno = cause == :success ? UInt32(0) : Libc.GetLastError()
+            errno = cause === :success ? UInt32(0) : Libc.GetLastError()
             if cause !== :OpenClipboard
                 ccall((:CloseClipboard, "user32"), stdcall, Cint, ()) == 0 && Base.windowserror(:CloseClipboard) # this should never fail
             end
-            if cause !== :success && (cause !== :GetClipboardData || errno != 0)
+            if cause !== :success && !(cause === :GetClipboardData && (errno == 0x8004006A || errno == 0x800401D3)) # ignore DV_E_CLIPFORMAT and CLIPBRD_E_BAD_DATA from GetClipboardData
                 Base.windowserror(cause, errno)
             end
             ""
         end
         ccall((:OpenClipboard, "user32"), stdcall, Cint, (Ptr{Cvoid},), C_NULL) == 0 && return Base.windowserror(:OpenClipboard)
         ccall(:SetLastError, stdcall, Cvoid, (UInt32,), 0) # allow distinguishing if the clipboard simply didn't have text
-        pdata = ccall((:GetClipboardData, "user32"), stdcall, Ptr{UInt16}, (Cuint,), 13)
+        pdata = ccall((:GetClipboardData, "user32"), stdcall, Ptr{UInt16}, (Cuint,), 13) # CF_UNICODETEXT
         pdata == C_NULL && return cleanup(:GetClipboardData)
         plock = ccall((:GlobalLock, "kernel32"), stdcall, Ptr{UInt16}, (Ptr{UInt16},), pdata)
         plock == C_NULL && return cleanup(:GlobalLock)
@@ -154,7 +154,7 @@ Send a printed form of `x` to the operating system clipboard ("copy").
 clipboard(x)
 
 """
-    clipboard() -> AbstractString
+    clipboard() -> String
 
 Return a string with the contents of the operating system clipboard ("paste").
 """
diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl
index 8e66c881e415c..4e5141c0de08e 100644
--- a/stdlib/InteractiveUtils/src/codeview.jl
+++ b/stdlib/InteractiveUtils/src/codeview.jl
@@ -27,19 +27,31 @@ end
 
 # displaying type warnings
 
-function warntype_type_printer(io::IO, @nospecialize(ty), used::Bool)
-    used || return
-    str = "::$ty"
+function warntype_type_printer(io::IO; @nospecialize(type), used::Bool, show_type::Bool=true, _...)
+    (show_type && used) || return nothing
+    str = "::$type"
     if !highlighting[:warntype]
         print(io, str)
-    elseif ty isa Union && Base.is_expected_union(ty)
+    elseif type isa Union && is_expected_union(type)
         Base.emphasize(io, str, Base.warn_color()) # more mild user notification
-    elseif ty isa Type && (!Base.isdispatchelem(ty) || ty == Core.Box)
+    elseif type isa Type && (!Base.isdispatchelem(type) || type == Core.Box)
         Base.emphasize(io, str)
     else
         Base.printstyled(io, str, color=:cyan) # show the "good" type
     end
-    nothing
+    return nothing
+end
+
+# True if one can be pretty certain that the compiler handles this union well,
+# i.e. must be small with concrete types.
+function is_expected_union(u::Union)
+    Base.unionlen(u) < 4 || return false
+    for x in Base.uniontypes(u)
+        if !Base.isdispatchelem(x) || x == Core.Box
+            return false
+        end
+    end
+    return true
 end
 
 """
@@ -47,15 +59,20 @@ end
 
 Prints lowered and type-inferred ASTs for the methods matching the given generic function
 and type signature to `io` which defaults to `stdout`. The ASTs are annotated in such a way
-as to cause "non-leaf" types to be emphasized (if color is available, displayed in red).
-This serves as a warning of potential type instability. Not all non-leaf types are particularly
-problematic for performance, so the results need to be used judiciously.
-In particular, unions containing either [`missing`](@ref) or [`nothing`](@ref) are displayed in yellow, since
-these are often intentional.
+as to cause "non-leaf" types which may be problematic for performance to be emphasized
+(if color is available, displayed in red). This serves as a warning of potential type instability.
+
+Not all non-leaf types are particularly problematic for performance, and the performance
+characteristics of a particular type is an implementation detail of the compiler.
+`code_warntype` will err on the side of coloring types red if they might be a performance
+concern, so some types may be colored red even if they do not impact performance.
+Small unions of concrete types are usually not a concern, so these are highlighted in yellow.
 
 Keyword argument `debuginfo` may be one of `:source` or `:none` (default), to specify the verbosity of code comments.
 
 See [`@code_warntype`](@ref man-code-warntype) for more information.
+
+See also: [`@code_warntype`](@ref), [`code_typed`](@ref), [`code_lowered`](@ref), [`code_llvm`](@ref), [`code_native`](@ref).
 """
 function code_warntype(io::IO, @nospecialize(f), @nospecialize(t=Base.default_tt(f));
                        debuginfo::Symbol=:default, optimize::Bool=false, kwargs...)
@@ -125,13 +142,13 @@ function code_warntype(io::IO, @nospecialize(f), @nospecialize(t=Base.default_tt
                 end
                 print(io, "  ", slotnames[i])
                 if isa(slottypes, Vector{Any})
-                    warntype_type_printer(io, slottypes[i], true)
+                    warntype_type_printer(io; type=slottypes[i], used=true)
                 end
                 println(io)
             end
         end
         print(io, "Body")
-        warntype_type_printer(io, rettype, true)
+        warntype_type_printer(io; type=rettype, used=true)
         println(io)
         irshow_config = Base.IRShow.IRShowConfig(lineprinter(src), warntype_type_printer)
         Base.IRShow.show_ir(lambda_io, src, irshow_config)
@@ -139,10 +156,9 @@ function code_warntype(io::IO, @nospecialize(f), @nospecialize(t=Base.default_tt
     end
     nothing
 end
-code_warntype(@nospecialize(f), @nospecialize(t=Base.default_tt(f)); kwargs...) =
-    code_warntype(stdout, f, t; kwargs...)
+code_warntype(args...; kwargs...) = (@nospecialize; code_warntype(stdout, args...; kwargs...))
 
-import Base.CodegenParams
+using Base: CodegenParams
 
 const GENERIC_SIG_WARNING = "; WARNING: This code may not match what actually runs.\n"
 const OC_MISMATCH_WARNING =
@@ -152,10 +168,11 @@ const OC_MISMATCH_WARNING =
 """
 
 # Printing code representations in IR and assembly
+
 function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool,
-                        strip_ir_metadata::Bool, dump_module::Bool, syntax::Symbol,
+                        raw::Bool, dump_module::Bool, syntax::Symbol,
                         optimize::Bool, debuginfo::Symbol, binary::Bool,
-                        params::CodegenParams=CodegenParams(debug_info_kind=Cint(0)))
+                        params::CodegenParams=CodegenParams(debug_info_kind=Cint(0), debug_info_level=Cint(2), safepoint_on_entry=raw, gcstack_arg=raw))
     ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
     if isa(f, Core.Builtin)
         throw(ArgumentError("argument is not a generic function"))
@@ -164,22 +181,23 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
     # get the MethodInstance for the method match
     if !isa(f, Core.OpaqueClosure)
         world = Base.get_world_counter()
-        match = Base._which(signature_type(f, t), world)
-        linfo = Core.Compiler.specialize_method(match)
+        match = Base._which(signature_type(f, t); world)
+        mi = Core.Compiler.specialize_method(match)
         # TODO: use jl_is_cacheable_sig instead of isdispatchtuple
-        isdispatchtuple(linfo.specTypes) || (warning = GENERIC_SIG_WARNING)
+        isdispatchtuple(mi.specTypes) || (warning = GENERIC_SIG_WARNING)
     else
         world = UInt64(f.world)
+        tt = Base.to_tuple_type(t)
         if Core.Compiler.is_source_inferred(f.source.source)
             # OC was constructed from inferred source. There's only one
             # specialization and we can't infer anything more precise either.
             world = f.source.primary_world
-            linfo = f.source.specializations[1]
-            Core.Compiler.hasintersect(typeof(f).parameters[1], t) || (warning = OC_MISMATCH_WARNING)
+            mi = f.source.specializations::Core.MethodInstance
+            Core.Compiler.hasintersect(typeof(f).parameters[1], tt) || (warning = OC_MISMATCH_WARNING)
         else
-            linfo = Core.Compiler.specialize_method(f.source, Tuple{typeof(f.captures), t.parameters...}, Core.svec())
-            actual = isdispatchtuple(linfo.specTypes)
-            isdispatchtuple(linfo.specTypes) || (warning = GENERIC_SIG_WARNING)
+            mi = Core.Compiler.specialize_method(f.source, Tuple{typeof(f.captures), tt.parameters...}, Core.svec())
+            actual = isdispatchtuple(mi.specTypes)
+            isdispatchtuple(mi.specTypes) || (warning = GENERIC_SIG_WARNING)
         end
     end
     # get the code for it
@@ -193,21 +211,25 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
             throw(ArgumentError("'syntax' must be either :intel or :att"))
         end
         if dump_module
-            str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary, params)
+            # we want module metadata, so use LLVM to generate assembly output
+            str = _dump_function_native_assembly(mi, world, wrapper, syntax, debuginfo, binary, raw, params)
         else
-            str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary)
+            # if we don't want the module metadata, just disassemble what our JIT has
+            str = _dump_function_native_disassembly(mi, world, wrapper, syntax, debuginfo, binary)
         end
     else
-        str = _dump_function_linfo_llvm(linfo, world, wrapper, strip_ir_metadata, dump_module, optimize, debuginfo, params)
+        str = _dump_function_llvm(mi, world, wrapper, !raw, dump_module, optimize, debuginfo, params)
     end
     str = warning * str
     return str
 end
 
-function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool)
-    str = ccall(:jl_dump_method_asm, Ref{String},
-                (Any, UInt, Bool, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool),
-                linfo, world, false, wrapper, syntax, debuginfo, binary)
+function _dump_function_native_disassembly(mi::Core.MethodInstance, world::UInt,
+                                           wrapper::Bool, syntax::Symbol,
+                                           debuginfo::Symbol, binary::Bool)
+    str = @ccall jl_dump_method_asm(mi::Any, world::UInt, false::Bool, wrapper::Bool,
+                                    syntax::Ptr{UInt8}, debuginfo::Ptr{UInt8},
+                                    binary::Bool)::Ref{String}
     return str
 end
 
@@ -216,27 +238,30 @@ struct LLVMFDump
     f::Ptr{Cvoid} # opaque
 end
 
-function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool, params::CodegenParams)
+function _dump_function_native_assembly(mi::Core.MethodInstance, world::UInt,
+                                        wrapper::Bool, syntax::Symbol, debuginfo::Symbol,
+                                        binary::Bool, raw::Bool, params::CodegenParams)
     llvmf_dump = Ref{LLVMFDump}()
-    ccall(:jl_get_llvmf_defn, Cvoid, (Ptr{LLVMFDump}, Any, UInt, Bool, Bool, CodegenParams), llvmf_dump, linfo, world, wrapper, true, params)
+    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump},mi::Any, world::UInt, wrapper::Bool,
+                             true::Bool, params::CodegenParams)::Cvoid
     llvmf_dump[].f == C_NULL && error("could not compile the specified method")
-    str = ccall(:jl_dump_function_asm, Ref{String},
-                (Ptr{LLVMFDump}, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool),
-                llvmf_dump, false, syntax, debuginfo, binary)
+    str = @ccall jl_dump_function_asm(llvmf_dump::Ptr{LLVMFDump}, false::Bool,
+                                      syntax::Ptr{UInt8}, debuginfo::Ptr{UInt8},
+                                      binary::Bool, raw::Bool)::Ref{String}
     return str
 end
 
-function _dump_function_linfo_llvm(
-        linfo::Core.MethodInstance, world::UInt, wrapper::Bool,
+function _dump_function_llvm(
+        mi::Core.MethodInstance, world::UInt, wrapper::Bool,
         strip_ir_metadata::Bool, dump_module::Bool,
         optimize::Bool, debuginfo::Symbol,
         params::CodegenParams)
     llvmf_dump = Ref{LLVMFDump}()
-    ccall(:jl_get_llvmf_defn, Cvoid, (Ptr{LLVMFDump}, Any, UInt, Bool, Bool, CodegenParams), llvmf_dump, linfo, world, wrapper, optimize, params)
+    @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump}, mi::Any, world::UInt,
+                             wrapper::Bool, optimize::Bool, params::CodegenParams)::Cvoid
     llvmf_dump[].f == C_NULL && error("could not compile the specified method")
-    str = ccall(:jl_dump_function_ir, Ref{String},
-                (Ptr{LLVMFDump}, Bool, Bool, Ptr{UInt8}),
-                llvmf_dump, strip_ir_metadata, dump_module, debuginfo)
+    str = @ccall jl_dump_function_ir(llvmf_dump::Ptr{LLVMFDump}, strip_ir_metadata::Bool,
+                                     dump_module::Bool, debuginfo::Ptr{UInt8})::Ref{String}
     return str
 end
 
@@ -250,46 +275,47 @@ If the `optimize` keyword is unset, the code will be shown before LLVM optimizat
 All metadata and dbg.* calls are removed from the printed bitcode. For the full IR, set the `raw` keyword to true.
 To dump the entire module that encapsulates the function (with declarations), set the `dump_module` keyword to true.
 Keyword argument `debuginfo` may be one of source (default) or none, to specify the verbosity of code comments.
+
+See also: [`@code_llvm`](@ref), [`code_warntype`](@ref), [`code_typed`](@ref), [`code_lowered`](@ref), [`code_native`](@ref).
 """
-function code_llvm(io::IO, @nospecialize(f), @nospecialize(types), raw::Bool,
-                   dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default)
-    d = _dump_function(f, types, false, false, !raw, dump_module, :att, optimize, debuginfo, false)
-    if highlighting[:llvm] && get(io, :color, false)
+function code_llvm(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f));
+                   raw::Bool=false, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default,
+                   params::CodegenParams=CodegenParams(debug_info_kind=Cint(0), debug_info_level=Cint(2), safepoint_on_entry=raw, gcstack_arg=raw))
+    d = _dump_function(f, types, false, false, raw, dump_module, :intel, optimize, debuginfo, false, params)
+    if highlighting[:llvm] && get(io, :color, false)::Bool
         print_llvm(io, d)
     else
         print(io, d)
     end
 end
-code_llvm(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f)); raw::Bool=false, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) =
-    code_llvm(io, f, types, raw, dump_module, optimize, debuginfo)
-code_llvm(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); raw=false, dump_module=false, optimize=true, debuginfo::Symbol=:default) =
-    code_llvm(stdout, f, types; raw, dump_module, optimize, debuginfo)
+code_llvm(args...; kwargs...) = (@nospecialize; code_llvm(stdout, args...; kwargs...))
 
 """
-    code_native([io=stdout,], f, types; syntax=:att, debuginfo=:default, binary=false, dump_module=true)
+    code_native([io=stdout,], f, types; syntax=:intel, debuginfo=:default, binary=false, dump_module=true)
 
 Prints the native assembly instructions generated for running the method matching the given
 generic function and type signature to `io`.
 
-* Set assembly syntax by setting `syntax` to `:att` (default) for AT&T syntax or `:intel` for Intel syntax.
+* Set assembly syntax by setting `syntax` to `:intel` (default) for intel syntax or `:att` for AT&T syntax.
 * Specify verbosity of code comments by setting `debuginfo` to `:source` (default) or `:none`.
 * If `binary` is `true`, also print the binary machine code for each instruction precedented by an abbreviated address.
 * If `dump_module` is `false`, do not print metadata such as rodata or directives.
+* If `raw` is `false`, uninteresting instructions (like the safepoint function prologue) are elided.
 
-See also: [`@code_native`](@ref), [`code_llvm`](@ref), [`code_typed`](@ref) and [`code_lowered`](@ref)
+See also: [`@code_native`](@ref), [`code_warntype`](@ref), [`code_typed`](@ref), [`code_lowered`](@ref), [`code_llvm`](@ref).
 """
 function code_native(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f));
-                     dump_module::Bool=true, syntax::Symbol=:att, debuginfo::Symbol=:default, binary::Bool=false)
-    d = _dump_function(f, types, true, false, false, dump_module, syntax, true, debuginfo, binary)
-    if highlighting[:native] && get(io, :color, false)
+                     dump_module::Bool=true, syntax::Symbol=:intel, raw::Bool=false,
+                     debuginfo::Symbol=:default, binary::Bool=false,
+                     params::CodegenParams=CodegenParams(debug_info_kind=Cint(0), debug_info_level=Cint(2), safepoint_on_entry=raw, gcstack_arg=raw))
+    d = _dump_function(f, types, true, false, raw, dump_module, syntax, true, debuginfo, binary, params)
+    if highlighting[:native] && get(io, :color, false)::Bool
         print_native(io, d)
     else
         print(io, d)
     end
 end
-code_native(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); dump_module::Bool=true, syntax::Symbol=:att, debuginfo::Symbol=:default, binary::Bool=false) =
-    code_native(stdout, f, types; dump_module, syntax, debuginfo, binary)
-code_native(::IO, ::Any, ::Symbol) = error("invalid code_native call") # resolve ambiguous call
+code_native(args...; kwargs...) = (@nospecialize; code_native(stdout, args...; kwargs...))
 
 ## colorized IR and assembly printing
 
diff --git a/stdlib/InteractiveUtils/src/editless.jl b/stdlib/InteractiveUtils/src/editless.jl
index 6fcc9e9423822..539e9b12f4071 100644
--- a/stdlib/InteractiveUtils/src/editless.jl
+++ b/stdlib/InteractiveUtils/src/editless.jl
@@ -65,6 +65,7 @@ already work:
 - nano
 - micro
 - kak
+- helix
 - textmate
 - mate
 - kate
@@ -123,8 +124,10 @@ function define_default_editors()
         `$cmd $path`
     end
     # vim family
-    for (editors, wait) in  [[Any["vim", "vi", "nvim", "mvim"], true],
-                             [Any["\bgvim"],                    false]]
+    for (editors, wait) in [
+        [["vim", "vi", "nvim", "mvim"], true],
+        [[r"\bgvim"], false],
+    ]
         define_editor(editors; wait) do cmd, path, line, column
             cmd = line == 0 ? `$cmd $path` :
                 column == 0 ? `$cmd +$line $path` :
@@ -134,24 +137,31 @@ function define_default_editors()
     define_editor("nano"; wait=true) do cmd, path, line, column
         cmd = `$cmd +$line,$column $path`
     end
-    # emacs (must check that emacs not running in -t/-nw before regex match for general emacs)
-    for (editors, wait) in [[Any[r"\bemacs"],                                                                           false],
-                            [Any[r"\bemacs\b.*\s(-nw|--no-window-system)\b", r"\bemacsclient\b.\s*-(-?nw|t|-?tty)\b"], true]]
+    # emacs (must check that emacs not running in -t/-nw
+    # before regex match for general emacs)
+    for (editors, wait) in [
+        [[r"\bemacs"], false],
+        [[r"\bemacs\b.*\s(-nw|--no-window-system)\b",
+          r"\bemacsclient\b.\s*-(-?nw|t|-?tty)\b"], true],
+    ]
         define_editor(editors; wait) do cmd, path, line, column
             `$cmd +$line:$column $path`
         end
     end
-    # Other editors
+    # other editors
     define_editor("gedit") do cmd, path, line, column
         `$cmd +$line:$column $path`
     end
-    define_editor(Any["micro", "kak"]; wait=true) do cmd, path, line, column
+    define_editor(["micro", "kak"]; wait=true) do cmd, path, line, column
         `$cmd +$line $path`
     end
+    define_editor(["hx", "helix"]; wait=true) do cmd, path, line, column
+        `$cmd $path:$line:$column`
+    end
     define_editor(["textmate", "mate", "kate"]) do cmd, path, line, column
         `$cmd $path -l $line`
     end
-    define_editor(Any[r"\bsubl", r"\batom", "pycharm", "bbedit"]) do cmd, path, line, column
+    define_editor([r"\bsubl", r"\batom", "pycharm", "bbedit"]) do cmd, path, line, column
         `$cmd $path:$line`
     end
     define_editor(["code", "code-insiders"]) do cmd, path, line, column
diff --git a/stdlib/InteractiveUtils/src/macros.jl b/stdlib/InteractiveUtils/src/macros.jl
index b0005e6d7d783..a840dd7ea43bb 100644
--- a/stdlib/InteractiveUtils/src/macros.jl
+++ b/stdlib/InteractiveUtils/src/macros.jl
@@ -2,7 +2,7 @@
 
 # macro wrappers for various reflection functions
 
-import Base: typesof, insert!
+import Base: typesof, insert!, replace_ref_begin_end!, infer_effects
 
 separate_kwargs(args...; kwargs...) = (args, values(kwargs))
 
@@ -24,7 +24,8 @@ function recursive_dotcalls!(ex, args, i=1)
         end
     end
     (start, branches) = ex.head === :. ? (1, ex.args[2].args) : (2, ex.args)
-    for j in start:length(branches)
+    length_branches = length(branches)::Int
+    for j in start:length_branches
         branch, i = recursive_dotcalls!(branches[j], args, i)
         branches[j] = branch
     end
@@ -32,6 +33,9 @@ function recursive_dotcalls!(ex, args, i=1)
 end
 
 function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
+    if Meta.isexpr(ex0, :ref)
+        ex0 = replace_ref_begin_end!(ex0)
+    end
     if isa(ex0, Expr)
         if ex0.head === :do && Meta.isexpr(get(ex0.args, 1, nothing), :call)
             if length(ex0.args) != 2
@@ -39,12 +43,12 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
             end
             i = findlast(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex0.args[1].args)
             args = copy(ex0.args[1].args)
-            insert!(args, (isnothing(i) ? 2 : i+1), ex0.args[2])
+            insert!(args, (isnothing(i) ? 2 : 1+i::Int), ex0.args[2])
             ex0 = Expr(:call, args...)
         end
         if ex0.head === :. || (ex0.head === :call && ex0.args[1] !== :.. && string(ex0.args[1])[1] == '.')
             codemacro = startswith(string(fcn), "code_")
-            if codemacro && ex0.args[2] isa Expr
+            if codemacro && (ex0.head === :call || ex0.args[2] isa Expr)
                 # Manually wrap a dot call in a function
                 args = Any[]
                 ex, i = recursive_dotcalls!(copy(ex0), args)
@@ -53,7 +57,7 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
                 dotfuncdef = Expr(:local, Expr(:(=), Expr(:call, dotfuncname, xargs...), ex))
                 return quote
                     $(esc(dotfuncdef))
-                    local args = typesof($(map(esc, args)...))
+                    local args = $typesof($(map(esc, args)...))
                     $(fcn)($(esc(dotfuncname)), args; $(kws...))
                 end
             elseif !codemacro
@@ -77,7 +81,7 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
                                   :(error("expression is not a function call"))
                               end)
                         else
-                            local args = typesof($(map(esc, ex0.args)...))
+                            local args = $typesof($(map(esc, ex0.args)...))
                             $(fcn)(Base.getproperty, args)
                         end
                     end
@@ -93,7 +97,7 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
             return quote
                 local arg1 = $(esc(ex0.args[1]))
                 local args, kwargs = $separate_kwargs($(map(esc, ex0.args[2:end])...))
-                $(fcn)(Core.kwfunc(arg1),
+                $(fcn)(Core.kwcall,
                        Tuple{typeof(kwargs), Core.Typeof(arg1), map(Core.Typeof, args)...};
                        $(kws...))
             end
@@ -208,7 +212,7 @@ macro which(ex0::Symbol)
     return :(which($__module__, $ex0))
 end
 
-for fname in [:code_warntype, :code_llvm, :code_native]
+for fname in [:code_warntype, :code_llvm, :code_native, :infer_effects]
     @eval begin
         macro ($fname)(ex0...)
             gen_call_with_extracted_types_and_kwargs(__module__, $(Expr(:quote, fname)), ex0)
@@ -248,7 +252,7 @@ end
 
 Applied to a function or macro call, it evaluates the arguments to the specified call, and
 returns a tuple `(filename,line)` giving the location for the method that would be called for those arguments.
-It calls out to the `functionloc` function.
+It calls out to the [`functionloc`](@ref) function.
 """
 :@functionloc
 
@@ -267,7 +271,7 @@ See also: [`@less`](@ref), [`@edit`](@ref).
 """
     @less
 
-Evaluates the arguments to the function or macro call, determines their types, and calls the `less`
+Evaluates the arguments to the function or macro call, determines their types, and calls the [`less`](@ref)
 function on the resulting expression.
 
 See also: [`@edit`](@ref), [`@which`](@ref), [`@code_lowered`](@ref).
@@ -277,7 +281,7 @@ See also: [`@edit`](@ref), [`@which`](@ref), [`@code_lowered`](@ref).
 """
     @edit
 
-Evaluates the arguments to the function or macro call, determines their types, and calls the `edit`
+Evaluates the arguments to the function or macro call, determines their types, and calls the [`edit`](@ref)
 function on the resulting expression.
 
 See also: [`@less`](@ref), [`@which`](@ref).
@@ -293,6 +297,8 @@ Evaluates the arguments to the function or macro call, determines their types, a
     @code_typed optimize=true foo(x)
 
 to control whether additional optimizations, such as inlining, are also applied.
+
+See also: [`code_typed`](@ref), [`@code_warntype`](@ref), [`@code_lowered`](@ref), [`@code_llvm`](@ref), [`@code_native`](@ref).
 """
 :@code_typed
 
@@ -301,6 +307,8 @@ to control whether additional optimizations, such as inlining, are also applied.
 
 Evaluates the arguments to the function or macro call, determines their types, and calls
 [`code_lowered`](@ref) on the resulting expression.
+
+See also: [`code_lowered`](@ref), [`@code_warntype`](@ref), [`@code_typed`](@ref), [`@code_llvm`](@ref), [`@code_native`](@ref).
 """
 :@code_lowered
 
@@ -309,6 +317,8 @@ Evaluates the arguments to the function or macro call, determines their types, a
 
 Evaluates the arguments to the function or macro call, determines their types, and calls
 [`code_warntype`](@ref) on the resulting expression.
+
+See also: [`code_warntype`](@ref), [`@code_typed`](@ref), [`@code_lowered`](@ref), [`@code_llvm`](@ref), [`@code_native`](@ref).
 """
 :@code_warntype
 
@@ -327,6 +337,8 @@ by putting them and their value before the function call, like this:
 `raw` makes all metadata and dbg.* calls visible.
 `debuginfo` may be one of `:source` (default) or `:none`,  to specify the verbosity of code comments.
 `dump_module` prints the entire module that encapsulates the function.
+
+See also: [`code_llvm`](@ref), [`@code_warntype`](@ref), [`@code_typed`](@ref), [`@code_lowered`](@ref), [`@code_native`](@ref).
 """
 :@code_llvm
 
@@ -341,12 +353,12 @@ by putting it before the function call, like this:
 
     @code_native syntax=:intel debuginfo=:default binary=true dump_module=false f(x)
 
-* Set assembly syntax by setting `syntax` to `:att` (default) for AT&T syntax or `:intel` for Intel syntax.
+* Set assembly syntax by setting `syntax` to `:intel` (default) for Intel syntax or `:att` for AT&T syntax.
 * Specify verbosity of code comments by setting `debuginfo` to `:source` (default) or `:none`.
 * If `binary` is `true`, also print the binary machine code for each instruction precedented by an abbreviated address.
 * If `dump_module` is `false`, do not print metadata such as rodata or directives.
 
-See also: [`code_native`](@ref), [`@code_llvm`](@ref), [`@code_typed`](@ref) and [`@code_lowered`](@ref)
+See also: [`code_native`](@ref), [`@code_warntype`](@ref), [`@code_typed`](@ref), [`@code_lowered`](@ref), [`@code_llvm`](@ref).
 """
 :@code_native
 
@@ -356,39 +368,35 @@ See also: [`code_native`](@ref), [`@code_llvm`](@ref), [`@code_typed`](@ref) and
 A macro to execute an expression and produce a report of any time spent importing packages and their
 dependencies. Any compilation time will be reported as a percentage, and how much of which was recompilation, if any.
 
-If a package's dependencies have already been imported either globally or by another dependency they will
-not appear under that package and the package will accurately report a faster load time than if it were to
-be loaded in isolation.
+One line is printed per package or package extension. The duration shown is the time to import that package itself, not including the time to load any of its dependencies.
+
+On Julia 1.9+ [package extensions](@ref man-extensions) will show as Parent → Extension.
 
-!!! compat "Julia 1.9"
-    Reporting of any compilation and recompilation time was added in Julia 1.9
+!!! note
+    During the load process a package sequentially imports all of its dependencies, not just its direct dependencies.
 
 ```julia-repl
 julia> @time_imports using CSV
-      0.4 ms    ┌ IteratorInterfaceExtensions
-     11.1 ms  ┌ TableTraits 84.88% compilation time
-    145.4 ms  ┌ SentinelArrays 66.73% compilation time
-     42.3 ms  ┌ Parsers 19.66% compilation time
-      4.1 ms  ┌ Compat
-      8.2 ms  ┌ OrderedCollections
-      1.4 ms    ┌ Zlib_jll
-      2.3 ms    ┌ TranscodingStreams
-      6.1 ms  ┌ CodecZlib
-      0.3 ms  ┌ DataValueInterfaces
-     15.2 ms  ┌ FilePathsBase 30.06% compilation time
-      9.3 ms    ┌ InlineStrings
-      1.5 ms    ┌ DataAPI
-     31.4 ms  ┌ WeakRefStrings
-     14.8 ms  ┌ Tables
-     24.2 ms  ┌ PooledArrays
-   2002.4 ms  CSV 83.49% compilation time
+     50.7 ms  Parsers 17.52% compilation time
+      0.2 ms  DataValueInterfaces
+      1.6 ms  DataAPI
+      0.1 ms  IteratorInterfaceExtensions
+      0.1 ms  TableTraits
+     17.5 ms  Tables
+     26.8 ms  PooledArrays
+    193.7 ms  SentinelArrays 75.12% compilation time
+      8.6 ms  InlineStrings
+     20.3 ms  WeakRefStrings
+      2.0 ms  TranscodingStreams
+      1.4 ms  Zlib_jll
+      1.8 ms  CodecZlib
+      0.8 ms  Compat
+     13.1 ms  FilePathsBase 28.39% compilation time
+   1681.2 ms  CSV 92.40% compilation time
 ```
 
-!!! note
-    During the load process a package sequentially imports where necessary all of its dependencies, not just
-    its direct dependencies. That is also true for the dependencies themselves so nested importing will likely
-    occur, but not always. Therefore the nesting shown in this output report is not equivalent to the dependency
-    tree, but does indicate where import time has accumulated.
+!!! compat "Julia 1.8"
+    This macro requires at least Julia 1.8
 
 """
 :@time_imports
diff --git a/stdlib/InteractiveUtils/test/highlighting.jl b/stdlib/InteractiveUtils/test/highlighting.jl
index 0026c0b855730..b72c9dbe72795 100644
--- a/stdlib/InteractiveUtils/test/highlighting.jl
+++ b/stdlib/InteractiveUtils/test/highlighting.jl
@@ -10,7 +10,6 @@ myzeros(::Type{T}, ::Type{S}, ::Type{R}, dims::Tuple{Vararg{Integer, N}}, dims2:
     seekstart(io)
     @test startswith(readline(io), "MethodInstance for ")
     @test occursin(r"^  from myzeros\(::.*Type.*{T}, ::", readline(io))
-    readline(io) # skip location information from method printing - already tested in base
     @test occursin(r"^Static Parameters$", readline(io))
     @test occursin(r"^  T <: .*Integer", readline(io))
     @test occursin(r"^  .*Signed.* <: R <: .*Real", readline(io))
@@ -73,7 +72,7 @@ end
     @test occursin("\e", String(take!(io)))
 end
 
-function hilight_llvm(s)
+function highlight_llvm(s)
     io = IOBuffer()
     InteractiveUtils.print_llvm(IOContext(io, :color=>true), s)
     r = String(take!(io))
@@ -83,7 +82,7 @@ function hilight_llvm(s)
     flush(stdout)
     r
 end
-function hilight_native(s, arch)
+function highlight_native(s, arch)
     io = IOBuffer()
     InteractiveUtils.print_native(IOContext(io, :color=>true), s, arch)
     r = String(take!(io))
@@ -93,8 +92,8 @@ function hilight_native(s, arch)
     flush(stdout)
     r
 end
-hilight_x86(s) = hilight_native(s, :x86)
-hilight_arm(s) = hilight_native(s, :arm)
+highlight_x86(s) = highlight_native(s, :x86)
+highlight_arm(s) = highlight_native(s, :arm)
 
 function esc_code(s)
     io = IOBuffer()
@@ -125,41 +124,41 @@ const XU = B * "}" * XB
 
 @testset "LLVM IR" begin
     @testset "comment" begin
-        @test hilight_llvm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
+        @test highlight_llvm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
     end
-    @testset "lavel" begin
-        @test hilight_llvm("top:") == "$(L)top:$(XL)\n"
+    @testset "label" begin
+        @test highlight_llvm("top:") == "$(L)top:$(XL)\n"
 
-        @test hilight_llvm("L7:\t\t; preds = %top") ==
+        @test highlight_llvm("L7:\t\t; preds = %top") ==
             "$(L)L7:$(XL)\t\t$(C); preds = %top$(XC)\n"
     end
     @testset "define" begin
-        @test hilight_llvm("define double @julia_func_1234(float) {") ==
+        @test highlight_llvm("define double @julia_func_1234(float) {") ==
             "$(K)define$(XK) $(T)double$(XT) " *
             "$(F)@julia_func_1234$(XF)$P$(T)float$(XT)$XP $U\n"
 
-        @test hilight_llvm("}") == "$XU\n"
+        @test highlight_llvm("}") == "$XU\n"
     end
 
     @testset "declare" begin
-        @test hilight_llvm("declare i32 @jl_setjmp(i8*) #2") ==
+        @test highlight_llvm("declare i32 @jl_setjmp(i8*) #2") ==
             "$(K)declare$(XK) $(T)i32$(XT) " *
             "$(F)@jl_setjmp$(XF)$P$(T)i8$(XT)$(D)*$(XD)$XP $(D)#2$(XD)\n"
     end
 
     @testset "type" begin
-        @test hilight_llvm("%jl_value_t = type opaque") ==
+        @test highlight_llvm("%jl_value_t = type opaque") ==
             "$(V)%jl_value_t$(XV) $EQU $(K)type$(XK) $(T)opaque$(XT)\n"
     end
 
     @testset "target" begin
         datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
-        @test hilight_llvm("target datalayout = \"$datalayout\"") ==
+        @test highlight_llvm("target datalayout = \"$datalayout\"") ==
             "$(K)target$(XK) $(K)datalayout$(XK) $EQU $(V)\"$datalayout\"$(XV)\n"
     end
 
     @testset "attributes" begin
-        @test hilight_llvm(
+        @test highlight_llvm(
             """attributes #1 = { uwtable "frame-pointer"="all" }""") ==
             "$(K)attributes$(XK) $(D)#1$(XD) $EQU " *
             "$U $(K)uwtable$(XK) $(V)\"frame-pointer\"$(XV)$EQU" *
@@ -167,57 +166,57 @@ const XU = B * "}" * XB
     end
 
     @testset "terminator" begin
-        @test hilight_llvm("  ret i8 %12") ==
+        @test highlight_llvm("  ret i8 %12") ==
             "  $(I)ret$(XI) $(T)i8$(XT) $(V)%12$(XV)\n"
 
-        @test hilight_llvm("  br i1 %2, label %L6, label %L4") ==
+        @test highlight_llvm("  br i1 %2, label %L6, label %L4") ==
             "  $(I)br$(XI) $(T)i1$(XT) $(V)%2$(XV)$COM " *
             "$(T)label$(XT) $(L)%L6$(XL)$COM $(T)label$(XT) $(L)%L4$(XL)\n"
 
-        @test hilight_llvm("  br label %L5") ==
+        @test highlight_llvm("  br label %L5") ==
             "  $(I)br$(XI) $(T)label$(XT) $(L)%L5$(XL)\n"
 
-        @test hilight_llvm("  unreachable") == "  $(I)unreachable$(XI)\n"
+        @test highlight_llvm("  unreachable") == "  $(I)unreachable$(XI)\n"
     end
 
     @testset "arithmetic" begin
-        @test hilight_llvm("   %11 = add nuw nsw i64 %value_phi10, 1") ==
+        @test highlight_llvm("   %11 = add nuw nsw i64 %value_phi10, 1") ==
             "   $(V)%11$(XV) $EQU $(I)add$(XI) $(K)nuw$(XK) $(K)nsw$(XK) " *
             "$(T)i64$(XT) $(V)%value_phi10$(XV)$COM $(N)1$(XN)\n"
 
-        @test hilight_llvm("   %13 = fadd double %12, -2.000000e+00") ==
+        @test highlight_llvm("   %13 = fadd double %12, -2.000000e+00") ==
             "   $(V)%13$(XV) $EQU $(I)fadd$(XI) " *
             "$(T)double$(XT) $(V)%12$(XV)$COM $(N)-2.000000e+00$(XN)\n"
 
-        @test hilight_llvm("      %21 = fmul contract double %20, 0x0123456789ABCDEF") ==
+        @test highlight_llvm("      %21 = fmul contract double %20, 0x0123456789ABCDEF") ==
             "      $(V)%21$(XV) $EQU $(I)fmul$(XI) $(K)contract$(XK) " *
             "$(T)double$(XT) $(V)%20$(XV)$COM $(N)0x0123456789ABCDEF$(XN)\n"
     end
 
     @testset "bitwise" begin
-        @test hilight_llvm("   %31 = shl i64 %value_phi4, 52") ==
+        @test highlight_llvm("   %31 = shl i64 %value_phi4, 52") ==
             "   $(V)%31$(XV) $EQU " *
             "$(I)shl$(XI) $(T)i64$(XT) $(V)%value_phi4$(XV)$COM $(N)52$(XN)\n"
     end
 
     @testset "aggregate" begin
-        @test hilight_llvm("    %4 = extractvalue { i64, i1 } %1, 0") ==
+        @test highlight_llvm("    %4 = extractvalue { i64, i1 } %1, 0") ==
             "    $(V)%4$(XV) $EQU $(I)extractvalue$(XI) " *
             "$U $(T)i64$(XT)$COM $(T)i1$(XT) $XU $(V)%1$(XV)$COM $(N)0$(XN)\n"
     end
 
     @testset "memory access" begin
-        @test hilight_llvm("  %dims = alloca [1 x i64], align 8") ==
+        @test highlight_llvm("  %dims = alloca [1 x i64], align 8") ==
             "  $(V)%dims$(XV) $EQU $(I)alloca$(XI) " *
             "$S$(N)1$(XN) $(D)x$(XD) $(T)i64$(XT)$XS$COM $(K)align$(XK) $(N)8$(XN)\n"
 
-        @test hilight_llvm("    %51 = load i32," *
+        @test highlight_llvm("    %51 = load i32," *
                            " i32* inttoptr (i64 226995504 to i32*), align 16") ==
             "    $(V)%51$(XV) $EQU $(I)load$(XI) $(T)i32$(XT)$COM " *
             "$(T)i32$(XT)$(D)*$(XD) $(K)inttoptr$(XK) $P$(T)i64$(XT) $(N)226995504$(XN) " *
             "$(K)to$(XK) $(T)i32$(XT)$(D)*$(XD)$XP$COM $(K)align$(XK) $(N)16$(XN)\n"
 
-        @test hilight_llvm("    %53 = load %jl_value_t addrspace(10)*, " *
+        @test highlight_llvm("    %53 = load %jl_value_t addrspace(10)*, " *
                            "%jl_value_t addrspace(10)* addrspace(11)* %52, align 8") ==
             "    $(V)%53$(XV) $EQU $(I)load$(XI) $(V)%jl_value_t$(XV) " *
             "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)*$(XD)$COM " *
@@ -225,37 +224,37 @@ const XU = B * "}" * XB
             "$(K)addrspace$(XK)$P$(N)11$(XN)$XP$(D)*$(XD) " *
             "$(V)%52$(XV)$COM $(K)align$(XK) $(N)8$(XN)\n"
 
-        @test hilight_llvm("    store i64 %61, i64 addrspace(11)* %60, align 8") ==
+        @test highlight_llvm("    store i64 %61, i64 addrspace(11)* %60, align 8") ==
             "    $(I)store$(XI) $(T)i64$(XT) $(V)%61$(XV)$COM " *
             "$(T)i64$(XT) $(K)addrspace$(XK)$P$(N)11$(XN)$XP$(D)*$(XD) " *
             "$(V)%60$(XV)$COM $(K)align$(XK) $(N)8$(XN)\n"
 
-        @test hilight_llvm("  store volatile %jl_value_t addrspace(10)** %62, " *
+        @test highlight_llvm("  store volatile %jl_value_t addrspace(10)** %62, " *
                            "%jl_value_t addrspace(10)*** %63, align 8") ==
             "  $(I)store$(XI) $(K)volatile$(XK) $(V)%jl_value_t$(XV) " *
             "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)**$(XD) $(V)%62$(XV)$COM " *
             "$(V)%jl_value_t$(XV) $(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)***$(XD) " *
             "$(V)%63$(XV)$COM $(K)align$(XK) $(N)8$(XN)\n"
 
-        @test hilight_llvm("     %71 = getelementptr i8, i8* %70, i64 8") ==
+        @test highlight_llvm("     %71 = getelementptr i8, i8* %70, i64 8") ==
             "     $(V)%71$(XV) $EQU $(I)getelementptr$(XI) $(T)i8$(XT)$COM " *
             "$(T)i8$(XT)$(D)*$(XD) $(V)%70$(XV)$COM $(T)i64$(XT) $(N)8$(XN)\n"
     end
 
     @testset "conversion" begin
-        @test hilight_llvm("  %22 = zext i1 %21 to i8") ==
+        @test highlight_llvm("  %22 = zext i1 %21 to i8") ==
             "  $(V)%22$(XV) $EQU $(I)zext$(XI) $(T)i1$(XT) $(V)%21$(XV) " *
             "$(K)to$(XK) $(T)i8$(XT)\n"
 
-        @test hilight_llvm("     %24 = sitofp i64 %23 to double") ==
+        @test highlight_llvm("     %24 = sitofp i64 %23 to double") ==
             "     $(V)%24$(XV) $EQU $(I)sitofp$(XI) $(T)i64$(XT) $(V)%23$(XV) " *
             "$(K)to$(XK) $(T)double$(XT)\n"
 
-        @test hilight_llvm("  %26 = ptrtoint i8* %25 to i64") ==
+        @test highlight_llvm("  %26 = ptrtoint i8* %25 to i64") ==
             "  $(V)%26$(XV) $EQU $(I)ptrtoint$(XI) $(T)i8$(XT)$(D)*$(XD) " *
             "$(V)%25$(XV) $(K)to$(XK) $(T)i64$(XT)\n"
 
-        @test hilight_llvm("  %28 = bitcast %jl_value_t addrspace(10)* %27 " *
+        @test highlight_llvm("  %28 = bitcast %jl_value_t addrspace(10)* %27 " *
                            "to [2 x i16] addrspace(10)*") ==
             "  $(V)%28$(XV) $EQU $(I)bitcast$(XI) $(V)%jl_value_t$(XV) " *
             "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)*$(XD) $(V)%27$(XV) " *
@@ -264,20 +263,20 @@ const XU = B * "}" * XB
     end
 
     @testset "other" begin
-        @test hilight_llvm("  %31 = icmp slt i64 %30, 0") ==
+        @test highlight_llvm("  %31 = icmp slt i64 %30, 0") ==
             "  $(V)%31$(XV) $EQU $(I)icmp$(XI) $(I)slt$(XI) " *
             "$(T)i64$(XT) $(V)%30$(XV)$COM $(N)0$(XN)\n"
 
-        @test hilight_llvm("  %value_phi34 = phi double [ %33, %L50 ], [ %32, %L60 ]") ==
+        @test highlight_llvm("  %value_phi34 = phi double [ %33, %L50 ], [ %32, %L60 ]") ==
             "  $(V)%value_phi34$(XV) $EQU $(I)phi$(XI) $(T)double$(XT) " *
             "$S $(V)%33$(XV)$COM $(L)%L50$(XL) $XS$COM " *
             "$S $(V)%32$(XV)$COM $(L)%L60$(XL) $XS\n"
 
-        @test hilight_llvm("   %.v = select i1 %35, i64 %36, i64 63") ==
+        @test highlight_llvm("   %.v = select i1 %35, i64 %36, i64 63") ==
             "   $(V)%.v$(XV) $EQU $(I)select$(XI) $(T)i1$(XT) $(V)%35$(XV)$COM " *
             "$(T)i64$(XT) $(V)%36$(XV)$COM $(T)i64$(XT) $(N)63$(XN)\n"
 
-        @test hilight_llvm("   %38 = call i64 @llvm.cttz.i64(i64 %37, i1 false)") ==
+        @test highlight_llvm("   %38 = call i64 @llvm.cttz.i64(i64 %37, i1 false)") ==
             "   $(V)%38$(XV) $EQU $(I)call$(XI) $(T)i64$(XT) " *
             "$(F)@llvm.cttz.i64$(XF)$P$(T)i64$(XT) $(V)%37$(XV)$COM " *
             "$(T)i1$(XT) $(K)false$(XK)$XP\n"
@@ -286,133 +285,133 @@ end
 
 @testset "x86 ASM" begin
     @testset "comment" begin
-        @test hilight_x86("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
+        @test highlight_x86("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
     end
     @testset "label" begin
-        @test hilight_x86("L123:") == "$(L)L123:$(XL)\n"
+        @test highlight_x86("L123:") == "$(L)L123:$(XL)\n"
     end
     @testset "directive" begin
-        @test hilight_x86("\t.text") == "\t$(D).text$(XD)\n"
+        @test highlight_x86("\t.text") == "\t$(D).text$(XD)\n"
     end
 
     @testset "0-operand" begin
         # AT&T
-        @test hilight_x86("\tretq") == "\t$(I)retq$(XI)\n"
+        @test highlight_x86("\tretq") == "\t$(I)retq$(XI)\n"
 
         # Intel
-        @test hilight_x86("\tret") == "\t$(I)ret$(XI)\n"
+        @test highlight_x86("\tret") == "\t$(I)ret$(XI)\n"
     end
     @testset "1-operand" begin
         # AT&T
-        @test hilight_x86("\tpopq\t%rax") == "\t$(I)popq$(XI)\t$(V)%rax$(XV)\n"
+        @test highlight_x86("\tpopq\t%rax") == "\t$(I)popq$(XI)\t$(V)%rax$(XV)\n"
 
-        @test hilight_x86("\tpushl\t\$4294967295\t# imm = 0xFFFFFFFF") ==
+        @test highlight_x86("\tpushl\t\$4294967295\t# imm = 0xFFFFFFFF") ==
             "\t$(I)pushl$(XI)\t$(N)\$4294967295$(XN)\t$(C)# imm = 0xFFFFFFFF$(XC)\n"
 
-        @test hilight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
+        @test highlight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
 
-        @test hilight_x86("\tnopw\t%cs:(%rax,%rax)") ==
+        @test highlight_x86("\tnopw\t%cs:(%rax,%rax)") ==
             "\t$(I)nopw$(XI)\t$(V)%cs$(XV)$COL$P$(V)%rax$(XV)$COM$(V)%rax$(XV)$XP\n"
 
         # Intel
-        @test hilight_x86("\tpop\trax") == "\t$(I)pop$(XI)\t$(V)rax$(XV)\n"
+        @test highlight_x86("\tpop\trax") == "\t$(I)pop$(XI)\t$(V)rax$(XV)\n"
 
-        @test hilight_x86("\tpush\t4294967295") ==
+        @test highlight_x86("\tpush\t4294967295") ==
             "\t$(I)push$(XI)\t$(N)4294967295$(XN)\n"
 
-        @test hilight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
+        @test highlight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
 
-        @test hilight_x86("\tnop\tword ptr cs:[rax + rax]") ==
+        @test highlight_x86("\tnop\tword ptr cs:[rax + rax]") ==
             "\t$(I)nop$(XI)\t$(K)word$(XK) $(K)ptr$(XK) " *
             "$(V)cs$(XV)$COL$S$(V)rax$(XV) $(D)+$(XD) $(V)rax$(XV)$XS\n"
     end
     @testset "2-operand" begin
         # AT&T
-        @test hilight_x86("\tshrq\t\$63, %rcx") ==
+        @test highlight_x86("\tshrq\t\$63, %rcx") ==
             "\t$(I)shrq$(XI)\t$(N)\$63$(XN)$COM $(V)%rcx$(XV)\n"
 
-        @test hilight_x86("\tvmovsd\t(%rsi,%rdx,8), %xmm1\t# xmm1 = mem[0],zero") ==
+        @test highlight_x86("\tvmovsd\t(%rsi,%rdx,8), %xmm1\t# xmm1 = mem[0],zero") ==
             "\t$(I)vmovsd$(XI)\t$P$(V)%rsi$(XV)$COM$(V)%rdx$(XV)$COM$(N)8$(XN)$XP" *
             "$COM $(V)%xmm1$(XV)\t$(C)# xmm1 = mem[0],zero$(XC)\n"
 
-        @test hilight_x86("\tmovabsq\t\$\"#string#338\", %rax") ==
+        @test highlight_x86("\tmovabsq\t\$\"#string#338\", %rax") ==
             "\t$(I)movabsq$(XI)\t$(F)\$\"#string#338\"$(XF)$COM $(V)%rax$(XV)\n"
 
         # Intel
-        @test hilight_x86("\tshr\trcx, 63") ==
+        @test highlight_x86("\tshr\trcx, 63") ==
             "\t$(I)shr$(XI)\t$(V)rcx$(XV)$COM $(N)63$(XN)\n"
 
-        @test hilight_x86(
+        @test highlight_x86(
             "\tvmovsd\txmm1, dword ptr [rsi + 8*rdx]\t# xmm1 = mem[0],zero") ==
             "\t$(I)vmovsd$(XI)\t$(V)xmm1$(XV)$COM $(K)dword$(XK) $(K)ptr$(XK) " *
             "$S$(V)rsi$(XV) $(D)+$(XD) $(N)8$(XN)$(D)*$(XD)$(V)rdx$(XV)$XS" *
             "\t$(C)# xmm1 = mem[0],zero$(XC)\n"
 
-        @test hilight_x86("\tmovabs\trax, offset \"#string#338\"") ==
+        @test highlight_x86("\tmovabs\trax, offset \"#string#338\"") ==
             "\t$(I)movabs$(XI)\t$(V)rax$(XV)$COM " *
             "$(K)offset$(XK) $(F)\"#string#338\"$(XF)\n"
     end
     @testset "3-operand" begin
         # AT&T
-        @test hilight_x86("\tvaddsd\t(%rax), %xmm0, %xmm0") ==
+        @test highlight_x86("\tvaddsd\t(%rax), %xmm0, %xmm0") ==
             "\t$(I)vaddsd$(XI)\t$P$(V)%rax$(XV)$XP$COM " *
             "$(V)%xmm0$(XV)$COM $(V)%xmm0$(XV)\n"
 
         # Intel
-        @test hilight_x86("\tvaddsd\txmm0, xmm0, qword ptr [rax]") ==
+        @test highlight_x86("\tvaddsd\txmm0, xmm0, qword ptr [rax]") ==
             "\t$(I)vaddsd$(XI)\t$(V)xmm0$(XV)$COM $(V)xmm0$(XV)$COM " *
             "$(K)qword$(XK) $(K)ptr$(XK) $S$(V)rax$(XV)$XS\n"
     end
     @testset "4-operand" begin
         # AT&T
-        @test hilight_x86("\tvroundsd\t\$4, %xmm1, %xmm1, %xmm1") ==
+        @test highlight_x86("\tvroundsd\t\$4, %xmm1, %xmm1, %xmm1") ==
             "\t$(I)vroundsd$(XI)\t$(N)\$4$(XN)$COM " *
             "$(V)%xmm1$(XV)$COM $(V)%xmm1$(XV)$COM $(V)%xmm1$(XV)\n"
 
         # Intel
-        @test hilight_x86("\tvroundsd\txmm1, xmm1, xmm1, 4") ==
+        @test highlight_x86("\tvroundsd\txmm1, xmm1, xmm1, 4") ==
             "\t$(I)vroundsd$(XI)\t" *
             "$(V)xmm1$(XV)$COM $(V)xmm1$(XV)$COM $(V)xmm1$(XV)$COM $(N)4$(XN)\n"
     end
     @testset "AVX-512" begin
         # AT&T
-        @test hilight_x86("\tvmovaps\t(%eax), %zmm0") ==
+        @test highlight_x86("\tvmovaps\t(%eax), %zmm0") ==
             "\t$(I)vmovaps$(XI)\t$P$(V)%eax$(XV)$XP$COM $(V)%zmm0$(XV)\n"
 
-        @test hilight_x86("\tvpaddd\t%zmm3, %zmm1, %zmm1 {%k1}") ==
+        @test highlight_x86("\tvpaddd\t%zmm3, %zmm1, %zmm1 {%k1}") ==
             "\t$(I)vpaddd$(XI)\t$(V)%zmm3$(XV)$COM $(V)%zmm1$(XV)$COM " *
             "$(V)%zmm1$(XV) $U$(V)%k1$(XV)$XU\n"
 
-        @test hilight_x86("\tvdivpd\t%zmm3, %zmm1, %zmm0 {%k1} {z}") ==
+        @test highlight_x86("\tvdivpd\t%zmm3, %zmm1, %zmm0 {%k1} {z}") ==
             "\t$(I)vdivpd$(XI)\t$(V)%zmm3$(XV)$COM $(V)%zmm1$(XV)$COM " *
             "$(V)%zmm0$(XV) $U$(V)%k1$(XV)$XU $U$(K)z$(XK)$XU\n"
 
-        @test hilight_x86("\tvdivps\t(%ebx){1to16}, %zmm5, %zmm4") ==
+        @test highlight_x86("\tvdivps\t(%ebx){1to16}, %zmm5, %zmm4") ==
             "\t$(I)vdivps$(XI)\t$P$(V)%ebx$(XV)$XP$U$(K)1to16$(XK)$XU$COM " *
             "$(V)%zmm5$(XV)$COM $(V)%zmm4$(XV)\n"
 
-        @test hilight_x86("\tvcvtsd2si\t{rn-sae}, %xmm0, %eax") ==
+        @test highlight_x86("\tvcvtsd2si\t{rn-sae}, %xmm0, %eax") ==
             "\t$(I)vcvtsd2si$(XI)\t$U$(K)rn-sae$(XK)$XU$COM " *
             "$(V)%xmm0$(XV)$COM $(V)%eax$(XV)\n"
 
         # Intel
-        @test hilight_x86("\tvmovaps\tzmm0, zmmword ptr [eax]") ==
+        @test highlight_x86("\tvmovaps\tzmm0, zmmword ptr [eax]") ==
             "\t$(I)vmovaps$(XI)\t$(V)zmm0$(XV)$COM " *
             "$(K)zmmword$(XK) $(K)ptr$(XK) $S$(V)eax$(XV)$XS\n"
 
-        @test hilight_x86("\tvpaddd\tzmm1 {k1}, zmm1, zmm3") ==
+        @test highlight_x86("\tvpaddd\tzmm1 {k1}, zmm1, zmm3") ==
             "\t$(I)vpaddd$(XI)\t$(V)zmm1$(XV) $U$(V)k1$(XV)$XU$COM " *
             "$(V)zmm1$(XV)$COM $(V)zmm3$(XV)\n"
 
-        @test hilight_x86("\tvdivpd\tzmm0 {k1} {z}, zmm1, zmm3") ==
+        @test highlight_x86("\tvdivpd\tzmm0 {k1} {z}, zmm1, zmm3") ==
             "\t$(I)vdivpd$(XI)\t$(V)zmm0$(XV) $U$(V)k1$(XV)$XU $U$(K)z$(XK)$XU$COM " *
             "$(V)zmm1$(XV)$COM $(V)zmm3$(XV)\n"
 
-        @test hilight_x86("\tvdivps\tzmm4, zmm5, dword ptr [ebx]{1to16}") ==
+        @test highlight_x86("\tvdivps\tzmm4, zmm5, dword ptr [ebx]{1to16}") ==
             "\t$(I)vdivps$(XI)\t$(V)zmm4$(XV)$COM $(V)zmm5$(XV)$COM " *
             "$(K)dword$(XK) $(K)ptr$(XK) $S$(V)ebx$(XV)$XS$U$(K)1to16$(XK)$XU\n"
 
-        @test hilight_x86("\tvcvtsd2si\teax, xmm0$(XV), {rn-sae}") ==
+        @test highlight_x86("\tvcvtsd2si\teax, xmm0$(XV), {rn-sae}") ==
             "\t$(I)vcvtsd2si$(XI)\t$(V)eax$(XV)$COM " *
             "$(V)xmm0$(XV)$COM $U$(K)rn-sae$(XK)$XU\n"
     end
@@ -420,74 +419,74 @@ end
 
 @testset "ARM ASM" begin
     @testset "comment" begin
-        @test hilight_arm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
+        @test highlight_arm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
     end
     @testset "label" begin
-        @test hilight_arm("L45:") == "$(L)L45:$(XL)\n"
+        @test highlight_arm("L45:") == "$(L)L45:$(XL)\n"
     end
     @testset "directive" begin
-        @test hilight_arm("\t.text") == "\t$(D).text$(XD)\n"
+        @test highlight_arm("\t.text") == "\t$(D).text$(XD)\n"
     end
 
     @testset "0-operand" begin
-        @test hilight_arm("\tret") == "\t$(I)ret$(XI)\n"
+        @test highlight_arm("\tret") == "\t$(I)ret$(XI)\n"
     end
     @testset "1-operand" begin
-        @test hilight_arm("\tbl\t0x12") == "\t$(I)bl$(XI)\t$(N)0x12$(XN)\n"
+        @test highlight_arm("\tbl\t0x12") == "\t$(I)bl$(XI)\t$(N)0x12$(XN)\n"
 
-        @test hilight_arm("\tb\tL345") == "\t$(I)b$(XI)\t$(L)L345$(XL)\n"
+        @test highlight_arm("\tb\tL345") == "\t$(I)b$(XI)\t$(L)L345$(XL)\n"
 
-        @test hilight_arm("\tb.gt\tL67") == "\t$(I)b.gt$(XI)\t$(L)L67$(XL)\n"
+        @test highlight_arm("\tb.gt\tL67") == "\t$(I)b.gt$(XI)\t$(L)L67$(XL)\n"
 
-        @test hilight_arm("\tpop\t{r11, pc}") ==
+        @test highlight_arm("\tpop\t{r11, pc}") ==
             "\t$(I)pop$(XI)\t$U$(V)r11$(XV)$COM $(V)pc$(XV)$XU\n"
     end
     @testset "2-operand" begin
-        @test hilight_arm("\tcmp\tx10, #2047\t// =2047") ==
+        @test highlight_arm("\tcmp\tx10, #2047\t// =2047") ==
             "\t$(I)cmp$(XI)\t$(V)x10$(XV)$COM $(N)#2047$(XN)\t$(C)// =2047$(XC)\n"
 
-        @test hilight_arm("\tldr\td1, [x10]") ==
+        @test highlight_arm("\tldr\td1, [x10]") ==
             "\t$(I)ldr$(XI)\t$(V)d1$(XV)$COM $S$(V)x10$(XV)$XS\n"
 
-        @test hilight_arm("\tstr\tx30, [sp, #-16]!") ==
+        @test highlight_arm("\tstr\tx30, [sp, #-16]!") ==
             "\t$(I)str$(XI)\t$(V)x30$(XV)$COM " *
             "$S$(V)sp$(XV)$COM $(N)#-16$(XN)$XS$(K)!$(XK)\n"
 
-        @test hilight_arm("\tmov\tv0.16b, v1.16b") ==
+        @test highlight_arm("\tmov\tv0.16b, v1.16b") ==
             "\t$(I)mov$(XI)\t$(V)v0.16b$(XV)$COM $(V)v1.16b$(XV)\n"
     end
     @testset "3-operand" begin
-        @test hilight_arm("\tfmul\td2, d0, d2") ==
+        @test highlight_arm("\tfmul\td2, d0, d2") ==
             "\t$(I)fmul$(XI)\t$(V)d2$(XV)$COM $(V)d0$(XV)$COM $(V)d2$(XV)\n"
 
-        @test hilight_arm("\tmovk\tx10, #65535, lsl #32") ==
+        @test highlight_arm("\tmovk\tx10, #65535, lsl #32") ==
             "\t$(I)movk$(XI)\t$(V)x10$COM $(N)#65535$(XN)$COM $(K)lsl$(XK) $(N)#32$(XN)\n"
 
-        @test hilight_arm("\tcneg\tx8, x8, ge") ==
+        @test highlight_arm("\tcneg\tx8, x8, ge") ==
             "\t$(I)cneg$(XI)\t$(V)x8$(XV)$COM $(V)x8$(XV)$COM $(K)ge$(XK)\n"
     end
     @testset "4-operand" begin
-        @test hilight_arm("\tadd\tx8, x9, x8, lsl #52") ==
+        @test highlight_arm("\tadd\tx8, x9, x8, lsl #52") ==
             "\t$(I)add$(XI)\t$(V)x8$(XV)$COM $(V)x9$(XV)$COM $(V)x8$(XV)$COM " *
             "$(K)lsl$(XK) $(N)#52$(XN)\n"
 
-        @test hilight_arm("\tfcsel\td1, d0, d1, eq") ==
+        @test highlight_arm("\tfcsel\td1, d0, d1, eq") ==
             "\t$(I)fcsel$(XI)\t" *
             "$(V)d1$(XV)$COM $(V)d0$(XV)$COM $(V)d1$(XV)$COM $(K)eq$(XK)\n"
     end
     @testset "NEON" begin
-        hilight_arm("\tvmul.f32\tq8, q9, q8") ==
+        highlight_arm("\tvmul.f32\tq8, q9, q8") ==
             "\t$(I)vmul.f32$(XI)\t$(V)q8$(XV)$COM $(V)q9$(XV)$COM $(V)q8$(XV)\n"
-        hilight_arm("\tvcvt.s32.f64\ts2, d20") ==
+        highlight_arm("\tvcvt.s32.f64\ts2, d20") ==
             "\t$(I)vcvt.s32.f64$(XI)\t$(V)s2$(XV)$COM $(V)d20$(XV)\n"
-        hilight_arm("\tvld1.32\t{d18, d19}, [r1]") ==
+        highlight_arm("\tvld1.32\t{d18, d19}, [r1]") ==
             "\t$(I)vld1.32$(XI)\t$U$(V)d18$(XV)$COM $(V)d19$(XV)$XU$COM $S$(V)r1$(XV)$XS\n"
     end
     @testset "SVE" begin
-        hilight_arm("\tld1d\tz1.d, p0/z, [x0, x4, lsl #3]") ==
+        highlight_arm("\tld1d\tz1.d, p0/z, [x0, x4, lsl #3]") ==
             "\t$(I)ld1d$(XI)\t$(V)z1.d$(XV)$COM " *
             "$(V)p0$(XV)$(K)/z$(XK)$COM " *
             "$S$(V)x0$(XV)$COM $(V)x4$(XV)$COM $(K)lsl$(XK) $(N)#3$(XN)$XS\n"
-        hilight_arm("\tb.first\tL123") == "\t$(I)b.first$(XI)\t$(L)L123$(XL)"
+        highlight_arm("\tb.first\tL123") == "\t$(I)b.first$(XI)\t$(L)L123$(XL)"
     end
 end
diff --git a/stdlib/InteractiveUtils/test/runtests.jl b/stdlib/InteractiveUtils/test/runtests.jl
index 50236e7c8cfc5..10dfbd1aca0ed 100644
--- a/stdlib/InteractiveUtils/test/runtests.jl
+++ b/stdlib/InteractiveUtils/test/runtests.jl
@@ -51,6 +51,23 @@ tag = "UNION"
 @test warntype_hastag(pos_unstable, Tuple{Float64}, tag)
 @test !warntype_hastag(pos_stable, Tuple{Float64}, tag)
 
+for u in Any[
+    Union{Int, UInt},
+    Union{Nothing, Vector{Tuple{String, Tuple{Char, Char}}}},
+    Union{Char, UInt8, UInt},
+    Union{Tuple{Int, Int}, Tuple{Char, Int}, Nothing},
+    Union{Missing, Nothing}
+]
+    @test InteractiveUtils.is_expected_union(u)
+end
+
+for u in Any[
+    Union{Nothing, Tuple{Vararg{Char}}},
+    Union{Missing, Array},
+    Union{Int, Tuple{Any, Int}}
+]
+    @test !InteractiveUtils.is_expected_union(u)
+end
 mutable struct Stable{T,N}
     A::Array{T,N}
 end
@@ -212,7 +229,7 @@ module Tmp14173
 end
 varinfo(Tmp14173) # warm up
 const MEMDEBUG = ccall(:jl_is_memdebug, Bool, ())
-@test @allocated(varinfo(Tmp14173)) < (MEMDEBUG ? 300000 : 100000)
+@test @allocated(varinfo(Tmp14173)) < (MEMDEBUG ? 300000 : 125000)
 
 # PR #24997: test that `varinfo` doesn't fail when encountering `missing`
 module A
@@ -245,7 +262,7 @@ const curmod_str = curmod === Main ? "Main" : join(curmod_name, ".")
 
 @test_throws ErrorException("\"this_is_not_defined\" is not defined in module $curmod_str") @which this_is_not_defined
 # issue #13264
-@test (@which vcat(1...)).name == :vcat
+@test (@which vcat(1...)).name === :vcat
 
 # PR #28122, issue #25474
 @test (@which [1][1]).name === :getindex
@@ -314,7 +331,7 @@ end
 
 # manually generate a broken function, which will break codegen
 # and make sure Julia doesn't crash
-@eval @noinline @Base.constprop :none f_broken_code() = 0
+@eval @noinline Base.@constprop :none f_broken_code() = 0
 let m = which(f_broken_code, ())
    let src = Base.uncompressed_ast(m)
        src.code = Any[
@@ -373,7 +390,7 @@ struct A14637
     x
 end
 a14637 = A14637(0)
-@test (@which a14637.x).name == :getproperty
+@test (@which a14637.x).name === :getproperty
 @test (@functionloc a14637.x)[2] isa Integer
 
 # Issue #28615
@@ -383,6 +400,13 @@ a14637 = A14637(0)
 @test (@code_typed max.(Ref(true).x))[2] == Bool
 @test !isempty(@code_typed optimize=false max.(Ref.([5, 6])...))
 
+# Issue # 45889
+@test !isempty(@code_typed 3 .+ 6)
+@test !isempty(@code_typed 3 .+ 6 .+ 7)
+@test !isempty(@code_typed optimize=false (.- [3,4]))
+@test !isempty(@code_typed optimize=false (6 .- [3,4]))
+@test !isempty(@code_typed optimize=false (.- 0.5))
+
 # Issue #36261
 @test (@code_typed max.(1 .+ 3, 5 - 7))[2] == Int
 f36261(x,y) = 3x + 4y
@@ -432,6 +456,8 @@ end # module ReflectionTest
 
 @test_throws ArgumentError("argument is not a generic function") code_llvm(===, Tuple{Int, Int})
 @test_throws ArgumentError("argument is not a generic function") code_native(===, Tuple{Int, Int})
+@test_throws ErrorException("argument tuple type must contain only types") code_native(sum, (Int64,1))
+@test_throws ErrorException("expected tuple type") code_native(sum, Vector{Int64})
 
 # Issue #18883, code_llvm/code_native for generated functions
 @generated f18883() = nothing
@@ -608,7 +634,7 @@ end
     export B41010
 
     ms = methodswith(A41010, @__MODULE__) |> collect
-    @test ms[1].name == :B41010
+    @test ms[1].name === :B41010
 end
 
 # macro options should accept both literals and variables
@@ -673,7 +699,7 @@ end
 
 @testset "code_llvm on opaque_closure" begin
     let ci = code_typed(+, (Int, Int))[1][1]
-        ir = Core.Compiler.inflate_ir(ci, Any[], Any[Tuple{}, Int, Int])
+        ir = Core.Compiler.inflate_ir(ci)
         oc = Core.OpaqueClosure(ir)
         @test (code_llvm(devnull, oc, Tuple{Int, Int}); true)
         let io = IOBuffer()
@@ -682,3 +708,16 @@ end
         end
     end
 end
+
+@testset "begin/end in gen_call_with_extracted_types users" begin
+    mktemp() do f, io
+        redirect_stdout(io) do
+            a = [1,2]
+            @test (@code_typed a[1:end]).second == Vector{Int}
+            @test (@code_llvm a[begin:2]) === nothing
+            @test (@code_native a[begin:end]) === nothing
+        end
+    end
+end
+
+@test Base.infer_effects(sin, (Int,)) == InteractiveUtils.@infer_effects sin(42)
diff --git a/stdlib/LLD_jll/Project.toml b/stdlib/LLD_jll/Project.toml
new file mode 100644
index 0000000000000..454da0a0b221b
--- /dev/null
+++ b/stdlib/LLD_jll/Project.toml
@@ -0,0 +1,19 @@
+name = "LLD_jll"
+uuid = "d55e3150-da41-5e91-b323-ecfd1eec6109"
+version = "15.0.7+10"
+
+[deps]
+Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+libLLVM_jll = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.9"
+libLLVM_jll = "15.0.7"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/LLD_jll/src/LLD_jll.jl b/stdlib/LLD_jll/src/LLD_jll.jl
new file mode 100644
index 0000000000000..55ccec9cc4005
--- /dev/null
+++ b/stdlib/LLD_jll/src/LLD_jll.jl
@@ -0,0 +1,107 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/LLD_jll.jl
+
+baremodule LLD_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export lld
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir::String = ""
+lld_path::String = ""
+if Sys.iswindows()
+    const lld_exe = "lld.exe"
+else
+    const lld_exe = "lld"
+end
+
+if Sys.iswindows()
+    const LIBPATH_env = "PATH"
+    const LIBPATH_default = ""
+    const pathsep = ';'
+elseif Sys.isapple()
+    const LIBPATH_env = "DYLD_FALLBACK_LIBRARY_PATH"
+    const LIBPATH_default = "~/lib:/usr/local/lib:/lib:/usr/lib"
+    const pathsep = ':'
+else
+    const LIBPATH_env = "LD_LIBRARY_PATH"
+    const LIBPATH_default = ""
+    const pathsep = ':'
+end
+
+function adjust_ENV!(env::Dict, PATH::String, LIBPATH::String, adjust_PATH::Bool, adjust_LIBPATH::Bool)
+    if adjust_LIBPATH
+        LIBPATH_base = get(env, LIBPATH_env, expanduser(LIBPATH_default))
+        if !isempty(LIBPATH_base)
+            env[LIBPATH_env] = string(LIBPATH, pathsep, LIBPATH_base)
+        else
+            env[LIBPATH_env] = LIBPATH
+        end
+    end
+    if adjust_PATH && (LIBPATH_env != "PATH" || !adjust_LIBPATH)
+        if adjust_PATH
+            if !isempty(get(env, "PATH", ""))
+                env["PATH"] = string(PATH, pathsep, env["PATH"])
+            else
+                env["PATH"] = PATH
+            end
+        end
+    end
+    return env
+end
+
+function lld(f::Function; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true)
+    env = adjust_ENV!(copy(ENV), PATH[], LIBPATH[], adjust_PATH, adjust_LIBPATH)
+    withenv(env...) do
+        return f(lld_path)
+    end
+end
+function lld(; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true)
+    env = adjust_ENV!(copy(ENV), PATH[], LIBPATH[], adjust_PATH, adjust_LIBPATH)
+    return Cmd(Cmd([lld_path]); env)
+end
+
+function init_lld_path()
+    # Prefer our own bundled lld, but if we don't have one, pick it up off of the PATH
+    # If this is an in-tree build, `lld` will live in `tools`.  Otherwise, it'll be in `private_libexecdir`
+    for bundled_lld_path in (joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, lld_exe),
+                             joinpath(Sys.BINDIR, "..", "tools", lld_exe),
+                             joinpath(Sys.BINDIR, lld_exe))
+        if isfile(bundled_lld_path)
+            global lld_path = abspath(bundled_lld_path)
+            return
+        end
+    end
+    global lld_path = something(Sys.which(lld_exe), lld_exe)
+end
+
+function __init__()
+    global artifact_dir = dirname(Sys.BINDIR)
+    init_lld_path()
+    PATH[] = dirname(lld_path)
+    push!(PATH_list, PATH[])
+    if Sys.iswindows()
+        # On windows, the dynamic libraries (.dll) are in Sys.BINDIR ("usr\\bin")
+        append!(LIBPATH_list, [joinpath(Sys.BINDIR, Base.LIBDIR, "julia"), Sys.BINDIR])
+    else
+        append!(LIBPATH_list, [joinpath(Sys.BINDIR, Base.LIBDIR, "julia"), joinpath(Sys.BINDIR, Base.LIBDIR)])
+    end
+    LIBPATH[] = join(LIBPATH_list, pathsep)
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+
+end  # module libLLD_jll
diff --git a/stdlib/LLD_jll/test/runtests.jl b/stdlib/LLD_jll/test/runtests.jl
new file mode 100644
index 0000000000000..f8eccfe939dce
--- /dev/null
+++ b/stdlib/LLD_jll/test/runtests.jl
@@ -0,0 +1,9 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, LLD_jll
+
+@testset "LLD_jll" begin
+    @test isfile(LLD_jll.lld_path)
+    flavor = Sys.isapple() ? "darwin" : (Sys.iswindows() ? "link" : "gnu")
+    @test success(`$(LLD_jll.lld()) -flavor $flavor --version`)
+end
diff --git a/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl b/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
index 2196323ad35aa..5c4026291a673 100644
--- a/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
+++ b/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
@@ -14,9 +14,9 @@ export llvmlibunwind
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-llvmlibunwind_handle = C_NULL
-llvmlibunwind_path = ""
+artifact_dir::String = ""
+llvmlibunwind_handle::Ptr{Cvoid} = C_NULL
+llvmlibunwind_path::String = ""
 
 const llvmlibunwind = "libunwind"
 
diff --git a/stdlib/LazyArtifacts.version b/stdlib/LazyArtifacts.version
new file mode 100644
index 0000000000000..4246ec3ad5d1a
--- /dev/null
+++ b/stdlib/LazyArtifacts.version
@@ -0,0 +1,4 @@
+LAZYARTIFACTS_BRANCH = main
+LAZYARTIFACTS_SHA1 = e9a36338d5d0dfa4b222f4e11b446cbb7ea5836c
+LAZYARTIFACTS_GIT_URL := https://github.com/JuliaPackaging/LazyArtifacts.jl.git
+LAZYARTIFACTS_TAR_URL = https://api.github.com/repos/JuliaPackaging/LazyArtifacts.jl/tarball/$1
diff --git a/stdlib/LazyArtifacts/Project.toml b/stdlib/LazyArtifacts/Project.toml
deleted file mode 100644
index ea9afc9d12dba..0000000000000
--- a/stdlib/LazyArtifacts/Project.toml
+++ /dev/null
@@ -1,12 +0,0 @@
-name = "LazyArtifacts"
-uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
-
-[deps]
-Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-
-[extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Test"]
diff --git a/stdlib/LazyArtifacts/docs/src/index.md b/stdlib/LazyArtifacts/docs/src/index.md
deleted file mode 100644
index 9de6b219c6988..0000000000000
--- a/stdlib/LazyArtifacts/docs/src/index.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Lazy Artifacts
-
-```@meta
-DocTestSetup = :(using LazyArtifacts)
-```
-
-In order for a package to download artifacts lazily, `LazyArtifacts` must be
-explicitly listed as a dependency of that package.
-
-For further information on artifacts, see [Artifacts](@ref).
diff --git a/stdlib/LazyArtifacts/src/LazyArtifacts.jl b/stdlib/LazyArtifacts/src/LazyArtifacts.jl
deleted file mode 100644
index b783276ac6081..0000000000000
--- a/stdlib/LazyArtifacts/src/LazyArtifacts.jl
+++ /dev/null
@@ -1,15 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module LazyArtifacts
-
-# reexport the Artifacts API
-using Artifacts: Artifacts,
-       artifact_exists, artifact_path, artifact_meta, artifact_hash,
-       select_downloadable_artifacts, find_artifacts_toml, @artifact_str
-export artifact_exists, artifact_path, artifact_meta, artifact_hash,
-       select_downloadable_artifacts, find_artifacts_toml, @artifact_str
-
-# define a function for satisfying lazy Artifact downloads
-using Pkg.Artifacts: ensure_artifact_installed
-
-end
diff --git a/stdlib/LazyArtifacts/test/Artifacts.toml b/stdlib/LazyArtifacts/test/Artifacts.toml
deleted file mode 100644
index 4b715b74c128b..0000000000000
--- a/stdlib/LazyArtifacts/test/Artifacts.toml
+++ /dev/null
@@ -1,155 +0,0 @@
-[[HelloWorldC]]
-arch = "aarch64"
-git-tree-sha1 = "95fce80ec703eeb5f4270fef6821b38d51387499"
-os = "macos"
-
-    [[HelloWorldC.download]]
-    sha256 = "23f45918421881de8e9d2d471c70f6b99c26edd1dacd7803d2583ba93c8bbb28"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-apple-darwin.tar.gz"
-[[HelloWorldC]]
-arch = "aarch64"
-git-tree-sha1 = "1ccbaad776766366943fd5a66a8cbc9877ee8df9"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "82bca07ff25a75875936116ca977285160a2afcc4f58dd160c7b1600f55da655"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-gnu.tar.gz"
-[[HelloWorldC]]
-arch = "aarch64"
-git-tree-sha1 = "dc43ab874611cfc26641741c31b8230276d7d664"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "36b7c554f1cb04d5282b991c66a10b2100085ac8deb2156bf52b4f7c4e406c04"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-musl.tar.gz"
-[[HelloWorldC]]
-arch = "armv6l"
-call_abi = "eabihf"
-git-tree-sha1 = "b7128521583d02d2dbe9c8de6fe156b79df781d9"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "5e094b9c6e4c6a77ecc8dfc2b841ac1f2157f6a81f4c47f1e0d3e9a04eec7945"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-gnueabihf.tar.gz"
-[[HelloWorldC]]
-arch = "armv6l"
-call_abi = "eabihf"
-git-tree-sha1 = "edb3893a154519d6786234f5c83994c34e11feed"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "0a2203f061ba2ef7ce4c452ec7874be3acc6db1efac8091f85d113c3404e6bb6"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-musleabihf.tar.gz"
-[[HelloWorldC]]
-arch = "armv7l"
-call_abi = "eabihf"
-git-tree-sha1 = "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "a4392a4c8f834c97f9d8822ddfb1813d8674fa602eeaf04d6359c0a9e98478ec"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-gnueabihf.tar.gz"
-[[HelloWorldC]]
-arch = "armv7l"
-call_abi = "eabihf"
-git-tree-sha1 = "169c261b321c4dc95894cdd2db9d0d0caa84677f"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "ed1aacbf197a6c78988725a39defad130ed31a2258f8e7846f73b459821f21d3"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-musleabihf.tar.gz"
-[[HelloWorldC]]
-arch = "i686"
-git-tree-sha1 = "fd35f9155dc424602d01fbf983eb76be3217a28f"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "048fcff5ff47a3cc1e84a2688935fcd658ad1c7e7c52c0e81fe88ce6c3697aba"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-gnu.tar.gz"
-[[HelloWorldC]]
-arch = "i686"
-git-tree-sha1 = "8db14df0f1d2a3ed9c6a7b053a590ca6527eb95e"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "d521b4420392b8365de5ed0ef38a3b6c822665d7c257d3eef6f725c205bb3d78"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-musl.tar.gz"
-[[HelloWorldC]]
-arch = "i686"
-git-tree-sha1 = "56f82168947b8dc7bb98038f063209b9f864eaff"
-os = "windows"
-
-    [[HelloWorldC.download]]
-    sha256 = "de578cf5ee2f457e9ff32089cbe17d03704a929980beddf4c41f4c0eb32f19c6"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-w64-mingw32.tar.gz"
-[[HelloWorldC]]
-arch = "powerpc64le"
-git-tree-sha1 = "9c8902b62f5b1aaa7c2839c804bed7c3a0912c7b"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "63ddbfbb6ea0cafef544cc25415e7ebee6ee0a69db0878d0d4e1ed27c0ae0ab5"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.powerpc64le-linux-gnu.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "f8ab5a03697f9afc82210d8a2be1d94509aea8bc"
-os = "macos"
-
-    [[HelloWorldC.download]]
-    sha256 = "f5043338613672b12546c59359c7997c5381a9a60b86aeb951dee74de428d5e3"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-apple-darwin.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "1ed3d81088f16e3a1fa4e3d4c4c509b8c117fecf"
-libc = "glibc"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "a18212e7984b08b23bec06e8bf9286a89b9fa2e8ee0dd46af3b852fe22013a4f"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-gnu.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "c04ef757b8bb773d17a0fd0ea396e52db1c7c385"
-libc = "musl"
-os = "linux"
-
-    [[HelloWorldC.download]]
-    sha256 = "7a3d1b09410989508774f00e073ea6268edefcaba7617fc5085255ec8e82555b"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-musl.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "5f7e7abf7d545a1aaa368f22e3e01ea0268870b1"
-os = "freebsd"
-
-    [[HelloWorldC.download]]
-    sha256 = "56aedffe38fe20294e93cfc2eb0a193c8e2ddda5a697b302e77ff48ac1195198"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-unknown-freebsd.tar.gz"
-[[HelloWorldC]]
-arch = "x86_64"
-git-tree-sha1 = "2f1a6d4f82cd1eea785a5141b992423c09491f1b"
-os = "windows"
-
-    [[HelloWorldC.download]]
-    sha256 = "aad77a16cbc9752f6ec62549a28c7e9f3f7f57919f6fa9fb924e0c669b11f8c4"
-    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-w64-mingw32.tar.gz"
-
-[socrates]
-git-tree-sha1 = "43563e7631a7eafae1f9f8d9d332e3de44ad7239"
-lazy = true
-
-    [[socrates.download]]
-    url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.gz"
-    sha256 = "e65d2f13f2085f2c279830e863292312a72930fee5ba3c792b14c33ce5c5cc58"
-
-    [[socrates.download]]
-    url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.bz2"
-    sha256 = "13fc17b97be41763b02cbb80e9d048302cec3bd3d446c2ed6e8210bddcd3ac76"
diff --git a/stdlib/LazyArtifacts/test/runtests.jl b/stdlib/LazyArtifacts/test/runtests.jl
deleted file mode 100644
index 1c8bbee269144..0000000000000
--- a/stdlib/LazyArtifacts/test/runtests.jl
+++ /dev/null
@@ -1,31 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using LazyArtifacts
-using Test
-
-mktempdir() do tempdir
-    LazyArtifacts.Artifacts.with_artifacts_directory(tempdir) do
-        redirect_stderr(devnull) do
-            socrates_dir = artifact"socrates"
-            @test isdir(socrates_dir)
-        end
-        ex = @test_throws ErrorException artifact"HelloWorldC"
-        @test startswith(ex.value.msg, "Artifact \"HelloWorldC\" was not found")
-    end
-end
-
-# Need to set depwarn flag before testing deprecations
-@test success(run(setenv(`$(Base.julia_cmd()) --depwarn=no --startup-file=no -e '
-    using Artifacts, Pkg
-    using Test
-    mktempdir() do tempdir
-        Artifacts.with_artifacts_directory(tempdir) do
-            redirect_stderr(devnull) do
-                socrates_dir = @test_logs(
-                        (:warn, "using Pkg instead of using LazyArtifacts is deprecated"),
-                        artifact"socrates")
-                @test isdir(socrates_dir)
-            end
-        end
-    end'`,
-    dir=@__DIR__)))
diff --git a/stdlib/LibCURL.version b/stdlib/LibCURL.version
index 715ca76a40cdf..216ab4e7aca22 100644
--- a/stdlib/LibCURL.version
+++ b/stdlib/LibCURL.version
@@ -1,4 +1,4 @@
 LIBCURL_BRANCH = master
-LIBCURL_SHA1 = fd8af649b38ae20c3ff7f5dca53753512ca00376
+LIBCURL_SHA1 = a65b64f6eabc932f63c2c0a4a5fb5d75f3e688d0
 LIBCURL_GIT_URL := https://github.com/JuliaWeb/LibCURL.jl.git
 LIBCURL_TAR_URL = https://api.github.com/repos/JuliaWeb/LibCURL.jl/tarball/$1
diff --git a/stdlib/LibCURL_jll/Project.toml b/stdlib/LibCURL_jll/Project.toml
index 3719fcbf37bef..aa84637e0dc82 100644
--- a/stdlib/LibCURL_jll/Project.toml
+++ b/stdlib/LibCURL_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibCURL_jll"
 uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
-version = "7.83.1+1"
+version = "8.4.0+0"
 
 [deps]
 LibSSH2_jll = "29816b5a-b9ab-546f-933c-edad1886dfa8"
diff --git a/stdlib/LibCURL_jll/src/LibCURL_jll.jl b/stdlib/LibCURL_jll/src/LibCURL_jll.jl
index 0911e68678657..cd67bfac0006a 100644
--- a/stdlib/LibCURL_jll/src/LibCURL_jll.jl
+++ b/stdlib/LibCURL_jll/src/LibCURL_jll.jl
@@ -14,16 +14,16 @@ export libcurl
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libcurl_handle = C_NULL
-libcurl_path = ""
+artifact_dir::String = ""
+libcurl_handle::Ptr{Cvoid} = C_NULL
+libcurl_path::String = ""
 
 if Sys.iswindows()
     const libcurl = "libcurl-4.dll"
 elseif Sys.isapple()
     const libcurl = "@rpath/libcurl.4.dylib"
 else
-    const libcurl = "libcurl.so"
+    const libcurl = "libcurl.so.4"
 end
 
 function __init__()
diff --git a/stdlib/LibGit2/Project.toml b/stdlib/LibGit2/Project.toml
index da78f70fa1005..83d359d6a4f1a 100644
--- a/stdlib/LibGit2/Project.toml
+++ b/stdlib/LibGit2/Project.toml
@@ -1,8 +1,10 @@
 name = "LibGit2"
 uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+version = "1.11.0"
 
 [deps]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+LibGit2_jll = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
 NetworkOptions = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
diff --git a/stdlib/LibGit2/src/LibGit2.jl b/stdlib/LibGit2/src/LibGit2.jl
index ece246864e51f..43b960e8d509d 100644
--- a/stdlib/LibGit2/src/LibGit2.jl
+++ b/stdlib/LibGit2/src/LibGit2.jl
@@ -14,6 +14,8 @@ using SHA: sha1, sha256
 
 export with, GitRepo, GitConfig
 
+using LibGit2_jll
+
 const GITHUB_REGEX =
     r"^(?:(?:ssh://)?git@|git://|https://(?:[\w\.\+\-]+@)?)github.com[:/](([^/].+)/(.+?))(?:\.git)?$"i
 
@@ -87,7 +89,7 @@ is in the repository.
 
 # Examples
 ```julia-repl
-julia> repo = LibGit2.GitRepo(repo_path);
+julia> repo = GitRepo(repo_path);
 
 julia> LibGit2.add!(repo, test_file);
 
@@ -230,7 +232,7 @@ Return `true` if `a`, a [`GitHash`](@ref) in string form, is an ancestor of
 
 # Examples
 ```julia-repl
-julia> repo = LibGit2.GitRepo(repo_path);
+julia> repo = GitRepo(repo_path);
 
 julia> LibGit2.add!(repo, test_file1);
 
@@ -477,7 +479,7 @@ current changes. Note that this detaches the current HEAD.
 
 # Examples
 ```julia
-repo = LibGit2.init(repo_path)
+repo = LibGit2.GitRepo(repo_path)
 open(joinpath(LibGit2.path(repo), "file1"), "w") do f
     write(f, "111\n")
 end
@@ -594,6 +596,44 @@ function clone(repo_url::AbstractString, repo_path::AbstractString;
     return repo
 end
 
+"""
+    connect(rmt::GitRemote, direction::Consts.GIT_DIRECTION; kwargs...)
+
+Open a connection to a remote. `direction` can be either `DIRECTION_FETCH`
+or `DIRECTION_PUSH`.
+
+The keyword arguments are:
+  * `credentials::Creds=nothing`: provides credentials and/or settings when authenticating
+    against a private repository.
+  * `callbacks::Callbacks=Callbacks()`: user provided callbacks and payloads.
+"""
+function connect(rmt::GitRemote, direction::Consts.GIT_DIRECTION;
+                 credentials::Creds=nothing,
+                 callbacks::Callbacks=Callbacks())
+    cred_payload = reset!(CredentialPayload(credentials))
+    if !haskey(callbacks, :credentials)
+        callbacks[:credentials] = (credentials_cb(), cred_payload)
+    elseif haskey(callbacks, :credentials) && credentials !== nothing
+        throw(ArgumentError(string(
+            "Unable to both use the provided `credentials` as a payload when the ",
+            "`callbacks` also contain a credentials payload.")))
+    end
+
+    remote_callbacks = RemoteCallbacks(callbacks)
+    try
+        connect(rmt, direction, remote_callbacks)
+    catch err
+        if isa(err, GitError) && err.code === Error.EAUTH
+            reject(cred_payload)
+        else
+            Base.shred!(cred_payload)
+        end
+        rethrow()
+    end
+    approve(cred_payload)
+    return rmt
+end
+
 """ git reset [<committish>] [--] <pathspecs>... """
 function reset!(repo::GitRepo, committish::AbstractString, pathspecs::AbstractString...)
     obj = GitObject(repo, isempty(committish) ? Consts.HEAD_FILE : committish)
@@ -848,7 +888,7 @@ function rebase!(repo::GitRepo, upstream::AbstractString="", newbase::AbstractSt
             end
         finally
             if !isempty(newbase)
-                close(onto_ann)
+                close(onto_ann::GitAnnotated)
             end
             close(upst_ann)
             close(head_ann)
@@ -983,7 +1023,7 @@ function ensure_initialized()
 end
 
 @noinline function initialize()
-    @check ccall((:git_libgit2_init, :libgit2), Cint, ())
+    @check ccall((:git_libgit2_init, libgit2), Cint, ())
 
     cert_loc = NetworkOptions.ca_roots()
     cert_loc !== nothing && set_ssl_cert_locations(cert_loc)
@@ -991,7 +1031,7 @@ end
     atexit() do
         # refcount zero, no objects to be finalized
         if Threads.atomic_sub!(REFCOUNT, 1) == 1
-            ccall((:git_libgit2_shutdown, :libgit2), Cint, ())
+            ccall((:git_libgit2_shutdown, libgit2), Cint, ())
         end
     end
 end
@@ -1003,7 +1043,7 @@ function set_ssl_cert_locations(cert_loc)
     else # files, /dev/null, non-existent paths, etc.
         cert_file = cert_loc
     end
-    ret = @ccall "libgit2".git_libgit2_opts(
+        ret = @ccall libgit2.git_libgit2_opts(
         Consts.SET_SSL_CERT_LOCATIONS::Cint;
         cert_file::Cstring,
         cert_dir::Cstring)::Cint
@@ -1029,7 +1069,7 @@ end
 Sets the system tracing configuration to the specified level.
 """
 function trace_set(level::Union{Integer,Consts.GIT_TRACE_LEVEL}, cb=trace_cb())
-    @check @ccall "libgit2".git_trace_set(level::Cint, cb::Ptr{Cvoid})::Cint
+    @check @ccall libgit2.git_trace_set(level::Cint, cb::Ptr{Cvoid})::Cint
 end
 
 end # module
diff --git a/stdlib/LibGit2/src/blame.jl b/stdlib/LibGit2/src/blame.jl
index 3aa94e30200b4..89071ea9c6f79 100644
--- a/stdlib/LibGit2/src/blame.jl
+++ b/stdlib/LibGit2/src/blame.jl
@@ -11,7 +11,7 @@ which commits to probe - see [`BlameOptions`](@ref) for more information.
 function GitBlame(repo::GitRepo, path::AbstractString; options::BlameOptions=BlameOptions())
     ensure_initialized()
     blame_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_blame_file, :libgit2), Cint,
+    @check ccall((:git_blame_file, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Ptr{BlameOptions}),
                    blame_ptr_ptr, repo.ptr, path, Ref(options))
     return GitBlame(repo, blame_ptr_ptr[])
@@ -27,7 +27,7 @@ that function later.
 """
 function counthunks(blame::GitBlame)
     ensure_initialized()
-    return ccall((:git_blame_get_hunk_count, :libgit2), Int32, (Ptr{Cvoid},), blame.ptr)
+    return ccall((:git_blame_get_hunk_count, libgit2), Int32, (Ptr{Cvoid},), blame.ptr)
 end
 
 function Base.getindex(blame::GitBlame, i::Integer)
@@ -36,7 +36,7 @@ function Base.getindex(blame::GitBlame, i::Integer)
     end
     ensure_initialized()
     GC.@preserve blame begin
-        hunk_ptr = ccall((:git_blame_get_hunk_byindex, :libgit2),
+        hunk_ptr = ccall((:git_blame_get_hunk_byindex, libgit2),
                           Ptr{BlameHunk},
                           (Ptr{Cvoid}, Csize_t), blame.ptr, i-1)
         elem = unsafe_load(hunk_ptr)
diff --git a/stdlib/LibGit2/src/blob.jl b/stdlib/LibGit2/src/blob.jl
index efd7a14c9c6f7..1941989b5f529 100644
--- a/stdlib/LibGit2/src/blob.jl
+++ b/stdlib/LibGit2/src/blob.jl
@@ -2,7 +2,7 @@
 
 function Base.length(blob::GitBlob)
     ensure_initialized()
-    return ccall((:git_blob_rawsize, :libgit2), Int64, (Ptr{Cvoid},), blob.ptr)
+    return ccall((:git_blob_rawsize, libgit2), Int64, (Ptr{Cvoid},), blob.ptr)
 end
 
 """
@@ -20,7 +20,7 @@ is binary and not valid Unicode.
 """
 function rawcontent(blob::GitBlob)
     ensure_initialized()
-    ptr = ccall((:git_blob_rawcontent, :libgit2), Ptr{UInt8}, (Ptr{Cvoid},), blob.ptr)
+    ptr = ccall((:git_blob_rawcontent, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), blob.ptr)
     copy(unsafe_wrap(Array, ptr, (length(blob),), own = false))
 end
 
@@ -47,7 +47,7 @@ the first 8000 bytes.
 """
 function isbinary(blob::GitBlob)
     ensure_initialized()
-    bin_flag = ccall((:git_blob_is_binary, :libgit2), Cint, (Ptr{Cvoid},), blob.ptr)
+    bin_flag = ccall((:git_blob_is_binary, libgit2), Cint, (Ptr{Cvoid},), blob.ptr)
     return bin_flag == 1
 end
 
@@ -67,7 +67,7 @@ id = LibGit2.addblob!(repo, blob_file)
 function addblob!(repo::GitRepo, path::AbstractString)
     ensure_initialized()
     id_ref = Ref{GitHash}()
-    @check ccall((:git_blob_create_from_disk, :libgit2), Cint,
+    @check ccall((:git_blob_create_from_disk, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{Cvoid}, Cstring),
                  id_ref, repo.ptr, path)
     return id_ref[]
diff --git a/stdlib/LibGit2/src/callbacks.jl b/stdlib/LibGit2/src/callbacks.jl
index 83ac58010ac32..043e04e0dfad6 100644
--- a/stdlib/LibGit2/src/callbacks.jl
+++ b/stdlib/LibGit2/src/callbacks.jl
@@ -9,7 +9,7 @@ function mirror_callback(remote::Ptr{Ptr{Cvoid}}, repo_ptr::Ptr{Cvoid},
     ensure_initialized()
     # Create the remote with a mirroring url
     fetch_spec = "+refs/*:refs/*"
-    err = ccall((:git_remote_create_with_fetchspec, :libgit2), Cint,
+    err = ccall((:git_remote_create_with_fetchspec, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Cstring),
                 remote, repo_ptr, name, url, fetch_spec)
     err != 0 && return Cint(err)
@@ -43,7 +43,7 @@ end
 function user_abort()
     ensure_initialized()
     # Note: Potentially it could be better to just throw a Julia error.
-    ccall((:giterr_set_str, :libgit2), Cvoid,
+    ccall((:giterr_set_str, libgit2), Cvoid,
           (Cint, Cstring), Cint(Error.Callback),
           "Aborting, user cancelled credential request.")
     return Cint(Error.EUSER)
@@ -51,7 +51,7 @@ end
 
 function prompt_limit()
     ensure_initialized()
-    ccall((:giterr_set_str, :libgit2), Cvoid,
+    ccall((:giterr_set_str, libgit2), Cvoid,
           (Cint, Cstring), Cint(Error.Callback),
           "Aborting, maximum number of prompts reached.")
     return Cint(Error.EAUTH)
@@ -59,7 +59,7 @@ end
 
 function exhausted_abort()
     ensure_initialized()
-    ccall((:giterr_set_str, :libgit2), Cvoid,
+    ccall((:giterr_set_str, libgit2), Cvoid,
           (Cint, Cstring), Cint(Error.Callback),
           "All authentication methods have failed.")
     return Cint(Error.EAUTH)
@@ -79,7 +79,7 @@ function authenticate_ssh(libgit2credptr::Ptr{Ptr{Cvoid}}, p::CredentialPayload,
 
     # first try ssh-agent if credentials support its usage
     if p.use_ssh_agent && username_ptr != Cstring(C_NULL) && (!revised || !isfilled(cred))
-        err = ccall((:git_cred_ssh_key_from_agent, :libgit2), Cint,
+        err = ccall((:git_cred_ssh_key_from_agent, libgit2), Cint,
                     (Ptr{Ptr{Cvoid}}, Cstring), libgit2credptr, username_ptr)
 
         p.use_ssh_agent = false  # use ssh-agent only one time
@@ -175,7 +175,7 @@ function authenticate_ssh(libgit2credptr::Ptr{Ptr{Cvoid}}, p::CredentialPayload,
     if !revised
         return exhausted_abort()
     end
-    return ccall((:git_cred_ssh_key_new, :libgit2), Cint,
+    return ccall((:git_cred_ssh_key_new, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Cstring, Cstring, Cstring, Cstring),
                  libgit2credptr, cred.user, cred.pubkey, cred.prvkey, cred.pass)
 end
@@ -195,9 +195,9 @@ function authenticate_userpass(libgit2credptr::Ptr{Ptr{Cvoid}}, p::CredentialPay
     if p.use_git_helpers && (!revised || !isfilled(cred))
         git_cred = GitCredential(p.config, p.url)
 
-         # Use `deepcopy` to ensure shredding the `git_cred` does not shred the `cred`s copy
+         # Use `copy` to ensure shredding the `git_cred` does not shred the `cred`s copy
         cred.user = something(git_cred.username, "")
-        cred.pass = deepcopy(something(git_cred.password, ""))
+        cred.pass = git_cred.password !== nothing ? copy(git_cred.password) : ""
         Base.shred!(git_cred)
         revised = true
 
@@ -235,7 +235,7 @@ function authenticate_userpass(libgit2credptr::Ptr{Ptr{Cvoid}}, p::CredentialPay
         return exhausted_abort()
     end
 
-    return ccall((:git_cred_userpass_plaintext_new, :libgit2), Cint,
+    return ccall((:git_cred_userpass_plaintext_new, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Cstring, Cstring),
                  libgit2credptr, cred.user, cred.pass)
 end
@@ -276,21 +276,23 @@ function credentials_callback(libgit2credptr::Ptr{Ptr{Cvoid}}, url_ptr::Cstring,
     # information cached inside the payload.
     if isempty(p.url)
         p.url = unsafe_string(url_ptr)
-        m = match(URL_REGEX, p.url)
+        m = match(URL_REGEX, p.url)::RegexMatch
 
         p.scheme = something(m[:scheme], SubString(""))
         p.username = something(m[:user], SubString(""))
-        p.host = m[:host]
+        p.host = something(m[:host])
 
         # When an explicit credential is supplied we will make sure to use the given
         # credential during the first callback by modifying the allowed types. The
         # modification only is in effect for the first callback since `allowed_types` cannot
         # be mutated.
-        if p.explicit !== nothing
-            cred = p.explicit
+        cache = p.cache
+        explicit = p.explicit
+        if explicit !== nothing
+            cred = explicit
 
             # Copy explicit credentials to avoid mutating approved credentials.
-            # invalidation fix from cred being non-inferrable
+            # invalidation fix from cred being non-inferable
             p.credential = Base.invokelatest(deepcopy, cred)
 
             if isa(cred, SSHCredential)
@@ -300,16 +302,15 @@ function credentials_callback(libgit2credptr::Ptr{Ptr{Cvoid}}, url_ptr::Cstring,
             else
                 allowed_types &= Cuint(0)  # Unhandled credential type
             end
-        elseif p.cache !== nothing
+        elseif cache !== nothing
             cred_id = credential_identifier(p.scheme, p.host)
 
             # Perform a deepcopy as we do not want to mutate approved cached credentials
-            if haskey(p.cache, cred_id)
-                # invalidation fix from p.cache[cred_id] being non-inferrable
-                p.credential = Base.invokelatest(deepcopy, p.cache[cred_id])
+            if haskey(cache, cred_id)
+                # invalidation fix from cache[cred_id] being non-inferable
+                p.credential = Base.invokelatest(deepcopy, cache[cred_id])
             end
         end
-
         p.first_pass = true
     else
         p.first_pass = false
@@ -338,7 +339,7 @@ function credentials_callback(libgit2credptr::Ptr{Ptr{Cvoid}}, url_ptr::Cstring,
     if err == 0
         if p.explicit !== nothing
             ensure_initialized()
-            ccall((:giterr_set_str, :libgit2), Cvoid, (Cint, Cstring), Cint(Error.Callback),
+            ccall((:giterr_set_str, libgit2), Cvoid, (Cint, Cstring), Cint(Error.Callback),
                   "The explicitly provided credential is incompatible with the requested " *
                   "authentication methods.")
         end
@@ -447,7 +448,7 @@ function ssh_knownhost_check(
 )
     if (m = match(r"^(.+):(\d+)$", host)) !== nothing
         host = m.captures[1]
-        port = parse(Int, m.captures[2])
+        port = parse(Int, something(m.captures[2]))
     else
         port = 22 # default SSH port
     end
diff --git a/stdlib/LibGit2/src/commit.jl b/stdlib/LibGit2/src/commit.jl
index 5d3c666af4bbb..ceb56ee45d3b7 100644
--- a/stdlib/LibGit2/src/commit.jl
+++ b/stdlib/LibGit2/src/commit.jl
@@ -14,8 +14,8 @@ function message(c::GitCommit, raw::Bool=false)
     ensure_initialized()
     GC.@preserve c begin
         local msg_ptr::Cstring
-        msg_ptr = raw ? ccall((:git_commit_message_raw, :libgit2), Cstring, (Ptr{Cvoid},), c.ptr) :
-                        ccall((:git_commit_message, :libgit2), Cstring, (Ptr{Cvoid},), c.ptr)
+        msg_ptr = raw ? ccall((:git_commit_message_raw, libgit2), Cstring, (Ptr{Cvoid},), c.ptr) :
+                        ccall((:git_commit_message, libgit2), Cstring, (Ptr{Cvoid},), c.ptr)
         if msg_ptr == C_NULL
             return nothing
         end
@@ -33,7 +33,7 @@ the person who made changes to the relevant file(s). See also [`committer`](@ref
 function author(c::GitCommit)
     ensure_initialized()
     GC.@preserve c begin
-        ptr = ccall((:git_commit_author, :libgit2), Ptr{SignatureStruct}, (Ptr{Cvoid},), c.ptr)
+        ptr = ccall((:git_commit_author, libgit2), Ptr{SignatureStruct}, (Ptr{Cvoid},), c.ptr)
         @assert ptr != C_NULL
         sig = Signature(ptr)
     end
@@ -51,7 +51,7 @@ a `committer` who committed it.
 function committer(c::GitCommit)
     ensure_initialized()
     GC.@preserve c begin
-        ptr = ccall((:git_commit_committer, :libgit2), Ptr{SignatureStruct}, (Ptr{Cvoid},), c.ptr)
+        ptr = ccall((:git_commit_committer, libgit2), Ptr{SignatureStruct}, (Ptr{Cvoid},), c.ptr)
         sig = Signature(ptr)
     end
     return sig
@@ -74,7 +74,7 @@ function commit(repo::GitRepo,
     commit_id_ptr = Ref(GitHash())
     nparents = length(parents)
     parentptrs = Ptr{Cvoid}[c.ptr for c in parents]
-    @check ccall((:git_commit_create, :libgit2), Cint,
+    @check ccall((:git_commit_create, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{Cvoid}, Ptr{UInt8},
                   Ptr{SignatureStruct}, Ptr{SignatureStruct},
                   Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid},
@@ -147,3 +147,45 @@ function commit(repo::GitRepo, msg::AbstractString;
     end
     return commit_id
 end
+
+"""
+    parentcount(c::GitCommit)
+
+Get the number of parents of this commit.
+
+See also [`parent`](@ref), [`parent_id`](@ref).
+"""
+parentcount(c::GitCommit) =
+    Int(ccall((:git_commit_parentcount, libgit2), Cuint, (Ptr{Cvoid},), c))
+
+"""
+    parent(c::GitCommit, n)
+
+Get the `n`-th (1-based) parent of the commit.
+
+See also [`parentcount`](@ref), [`parent_id`](@ref).
+"""
+function parent(c::GitCommit, n)
+    ptr_ref = Ref{Ptr{Cvoid}}()
+    @check ccall((:git_commit_parent, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cuint), ptr_ref, c, n - 1)
+    return GitCommit(c.owner, ptr_ref[])
+end
+
+"""
+    parent_id(c::GitCommit, n)
+
+Get the oid of the `n`-th (1-based) parent for a commit.
+
+See also [`parentcount`](@ref), [`parent`](@ref).
+"""
+function parent_id(c::GitCommit, n)
+    oid_ptr = ccall((:git_commit_parent_id, libgit2), Ptr{GitHash},
+                    (Ptr{Cvoid}, Cuint), c, n - 1)
+    if oid_ptr == C_NULL
+        # 0-based indexing mimicking the error message from libgit2
+        throw(GitError(Error.Invalid, Error.ENOTFOUND,
+                       "parent $(n - 1) does not exist"))
+    end
+    return unsafe_load(oid_ptr)
+end
diff --git a/stdlib/LibGit2/src/config.jl b/stdlib/LibGit2/src/config.jl
index a54cd352aa063..affe881abde08 100644
--- a/stdlib/LibGit2/src/config.jl
+++ b/stdlib/LibGit2/src/config.jl
@@ -13,7 +13,7 @@ function GitConfig(path::AbstractString,
     ensure_initialized()
     # create new config object
     cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_new, :libgit2), Cint, (Ptr{Ptr{Cvoid}},), cfg_ptr_ptr)
+    @check ccall((:git_config_new, libgit2), Cint, (Ptr{Ptr{Cvoid}},), cfg_ptr_ptr)
     cfg = GitConfig(cfg_ptr_ptr[])
     try
         addfile(cfg, path, level, repo, force)
@@ -34,7 +34,7 @@ used.
 function GitConfig(repo::GitRepo)
     ensure_initialized()
     cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_config, :libgit2), Cint,
+    @check ccall((:git_repository_config, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), cfg_ptr_ptr, repo.ptr)
     return GitConfig(repo, cfg_ptr_ptr[])
 end
@@ -49,14 +49,14 @@ options outside a specific git repository.
 function GitConfig(level::Consts.GIT_CONFIG = Consts.CONFIG_LEVEL_DEFAULT)
     ensure_initialized()
     cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_open_default, :libgit2), Cint,
+    @check ccall((:git_config_open_default, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}},), cfg_ptr_ptr)
     cfg = GitConfig(cfg_ptr_ptr[])
     if level != Consts.CONFIG_LEVEL_DEFAULT
         glb_cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
         tmpcfg = cfg
         try
-            @check ccall((:git_config_open_level, :libgit2), Cint,
+            @check ccall((:git_config_open_level, libgit2), Cint,
                          (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint),
                           glb_cfg_ptr_ptr, cfg.ptr, Cint(level))
             cfg = GitConfig(glb_cfg_ptr_ptr[])
@@ -90,12 +90,12 @@ function addfile(cfg::GitConfig, path::AbstractString,
                  force::Bool=false)
     ensure_initialized()
     @static if LibGit2.VERSION >= v"0.27.0"
-        @check ccall((:git_config_add_file_ondisk, :libgit2), Cint,
+        @check ccall((:git_config_add_file_ondisk, libgit2), Cint,
                      (Ptr{Ptr{Cvoid}}, Cstring, Cint, Ptr{Cvoid}, Cint),
                      cfg.ptr, path, Cint(level), isa(repo, GitRepo) ? repo.ptr : C_NULL, Cint(force))
     else
         repo === nothing || error("repo argument is not supported in this version of LibGit2")
-        @check ccall((:git_config_add_file_ondisk, :libgit2), Cint,
+        @check ccall((:git_config_add_file_ondisk, libgit2), Cint,
                      (Ptr{Ptr{Cvoid}}, Cstring, Cint, Cint),
                      cfg.ptr, path, Cint(level), Cint(force))
     end
@@ -104,7 +104,7 @@ end
 function get(::Type{<:AbstractString}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     buf_ref = Ref(Buffer())
-    @check ccall((:git_config_get_string_buf, :libgit2), Cint,
+    @check ccall((:git_config_get_string_buf, libgit2), Cint,
                  (Ptr{Buffer}, Ptr{Cvoid}, Cstring), buf_ref, c.ptr, name)
     buf = buf_ref[]
     str = unsafe_string(buf.ptr, buf.size)
@@ -115,7 +115,7 @@ end
 function get(::Type{Bool}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cint(0))
-    @check ccall((:git_config_get_bool, :libgit2), Cint,
+    @check ccall((:git_config_get_bool, libgit2), Cint,
           (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
     return Bool(val_ptr[])
 end
@@ -123,7 +123,7 @@ end
 function get(::Type{Int32}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cint(0))
-    @check ccall((:git_config_get_int32, :libgit2), Cint,
+    @check ccall((:git_config_get_int32, libgit2), Cint,
           (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
     return val_ptr[]
 end
@@ -131,7 +131,7 @@ end
 function get(::Type{Int64}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cintmax_t(0))
-    @check ccall((:git_config_get_int64, :libgit2), Cint,
+    @check ccall((:git_config_get_int64, libgit2), Cint,
           (Ptr{Cintmax_t}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
     return val_ptr[]
 end
@@ -164,33 +164,33 @@ end
 
 function set!(c::GitConfig, name::AbstractString, value::AbstractString)
     ensure_initialized()
-    @check ccall((:git_config_set_string, :libgit2), Cint,
+    @check ccall((:git_config_set_string, libgit2), Cint,
                   (Ptr{Cvoid}, Cstring, Cstring), c.ptr, name, value)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Bool)
     ensure_initialized()
     bval = Int32(value)
-    @check ccall((:git_config_set_bool, :libgit2), Cint,
+    @check ccall((:git_config_set_bool, libgit2), Cint,
                   (Ptr{Cvoid}, Cstring, Cint), c.ptr, name, bval)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Int32)
     ensure_initialized()
-    @check ccall((:git_config_set_int32, :libgit2), Cint,
+    @check ccall((:git_config_set_int32, libgit2), Cint,
                   (Ptr{Cvoid}, Cstring, Cint), c.ptr, name, value)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Int64)
     ensure_initialized()
-    @check ccall((:git_config_set_int64, :libgit2), Cint,
+    @check ccall((:git_config_set_int64, libgit2), Cint,
                   (Ptr{Cvoid}, Cstring, Cintmax_t), c.ptr, name, value)
 end
 
 function GitConfigIter(cfg::GitConfig)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_iterator_new, :libgit2), Cint,
+    @check ccall((:git_config_iterator_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), ci_ptr, cfg.ptr)
     return GitConfigIter(ci_ptr[])
 end
@@ -198,7 +198,7 @@ end
 function GitConfigIter(cfg::GitConfig, name::AbstractString)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_multivar_iterator_new, :libgit2), Cint,
+    @check ccall((:git_config_multivar_iterator_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
                   ci_ptr, cfg.ptr, name, C_NULL)
     return GitConfigIter(ci_ptr[])
@@ -207,7 +207,7 @@ end
 function GitConfigIter(cfg::GitConfig, name::AbstractString, value::Regex)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_multivar_iterator_new, :libgit2), Cint,
+    @check ccall((:git_config_multivar_iterator_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
                   ci_ptr, cfg.ptr, name, value.pattern)
     return GitConfigIter(ci_ptr[])
@@ -216,7 +216,7 @@ end
 function GitConfigIter(cfg::GitConfig, name::Regex)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_config_iterator_glob_new, :libgit2), Cint,
+    @check ccall((:git_config_iterator_glob_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
                   ci_ptr, cfg.ptr, name.pattern)
     return GitConfigIter(ci_ptr[])
@@ -225,7 +225,7 @@ end
 function Base.iterate(ci::GitConfigIter, state=nothing)
     ensure_initialized()
     entry_ptr_ptr = Ref{Ptr{ConfigEntry}}(C_NULL)
-    err = ccall((:git_config_next, :libgit2), Cint,
+    err = ccall((:git_config_next, libgit2), Cint,
                  (Ptr{Ptr{ConfigEntry}}, Ptr{Cvoid}), entry_ptr_ptr, ci.ptr)
     if err == Cint(Error.GIT_OK)
         return (unsafe_load(entry_ptr_ptr[]), nothing)
diff --git a/stdlib/LibGit2/src/consts.jl b/stdlib/LibGit2/src/consts.jl
index f3a460108db6b..1f28a3bdbe887 100644
--- a/stdlib/LibGit2/src/consts.jl
+++ b/stdlib/LibGit2/src/consts.jl
@@ -10,12 +10,15 @@ const REMOTE_ORIGIN = "origin"
 
 # objs
 @enum(OBJECT,
-      OBJ_ANY    = -2,
-      OBJ_BAD    = -1,
-      OBJ_COMMIT = 1,
-      OBJ_TREE   = 2,
-      OBJ_BLOB   = 3,
-      OBJ_TAG    = 4)
+      OBJ_ANY       = -2,
+      OBJ_BAD       = -1,
+      OBJ_COMMIT    = 1,
+      OBJ_TREE      = 2,
+      OBJ_BLOB      = 3,
+      OBJ_TAG       = 4,
+      OBJ_OFS_DELTA = 6,
+      OBJ_REF_DELTA = 7)
+const OBJ_INVALID = OBJ_BAD
 
 #revwalk
 const SORT_NONE        = Cint(0)
@@ -26,8 +29,10 @@ const SORT_REVERSE     = Cint(1 << 2)
 # refs
 const REF_INVALID  = Cint(0)
 const REF_OID      = Cint(1)
+const REF_DIRECT   = REF_OID
 const REF_SYMBOLIC = Cint(2)
 const REF_LISTALL  = REF_OID | REF_SYMBOLIC
+const REF_ALL      = REF_LISTALL
 
 # blame
 const BLAME_NORMAL                          = Cuint(0)
@@ -36,6 +41,8 @@ const BLAME_TRACK_COPIES_SAME_COMMIT_MOVES  = Cuint(1 << 1)
 const BLAME_TRACK_COPIES_SAME_COMMIT_COPIES = Cuint(1 << 2)
 const BLAME_TRACK_COPIES_ANY_COMMIT_COPIES  = Cuint(1 << 3)
 const BLAME_FIRST_PARENT                    = Cuint(1 << 4)
+const BLAME_USE_MAILMAP                     = Cuint(1 << 5)
+const BLAME_IGNORE_WHITESPACE               = Cuint(1 << 6)
 
 # checkout
 const CHECKOUT_NONE                    = Cuint(0)
@@ -57,6 +64,9 @@ const CHECKOUT_DONT_OVERWRITE_IGNORED  = Cuint(1 << 19)
 const CHECKOUT_CONFLICT_STYLE_MERGE    = Cuint(1 << 20)
 const CHECKOUT_CONFLICT_STYLE_DIFF3    = Cuint(1 << 21)
 const CHECKOUT_DONT_REMOVE_EXISTING    = Cuint(1 << 22)
+const CHECKOUT_DONT_WRITE_INDEX        = Cuint(1 << 23)
+const CHECKOUT_DRY_RUN                 = Cuint(1 << 24)
+const CHECKOUT_CONFLICT_STYLE_ZDIFF3   = Cuint(1 << 25)
 
 const CHECKOUT_UPDATE_SUBMODULES       = Cuint(1 << 16)
 const CHECKOUT_UPDATE_SUBMODULES_IF_CHANGED = Cuint(1 << 17)
@@ -87,6 +97,11 @@ const DIFF_IGNORE_CASE                = Cuint(1 << 10)
 const DIFF_DISABLE_PATHSPEC_MATCH     = Cuint(1 << 12)
 const DIFF_SKIP_BINARY_CHECK          = Cuint(1 << 13)
 const DIFF_ENABLE_FAST_UNTRACKED_DIRS = Cuint(1 << 14)
+const DIFF_UPDATE_INDEX               = Cuint(1 << 15)
+const DIFF_INCLUDE_UNREADABLE         = Cuint(1 << 16)
+const DIFF_INCLUDE_UNREADABLE_AS_UNTRACKED = Cuint(1 << 17)
+const DIFF_INDENT_HEURISTIC           = Cuint(1 << 18)
+const DIFF_IGNORE_BLANK_LINES         = Cuint(1 << 19)
 
 const DIFF_FORCE_TEXT               = Cuint(1 << 20)
 const DIFF_FORCE_BINARY             = Cuint(1 << 21)
@@ -97,16 +112,20 @@ const DIFF_SHOW_UNTRACKED_CONTENT   = Cuint(1 << 25)
 const DIFF_SHOW_UNMODIFIED          = Cuint(1 << 26)
 const DIFF_PATIENCE                 = Cuint(1 << 28)
 const DIFF_MINIMAL                  = Cuint(1 << 29)
+const DIFF_SHOW_BINARY              = Cuint(1 << 30)
 
 const DIFF_FLAG_BINARY     = Cuint(1 << 0)
 const DIFF_FLAG_NOT_BINARY = Cuint(1 << 1)
 const DIFF_FLAG_VALID_OID  = Cuint(1 << 2)
+const DIFF_FLAG_EXISTS     = Cuint(1 << 3)
+const DIFF_FLAG_VALID_SIZE = Cuint(1 << 4)
 
 const DIFF_FORMAT_PATCH        = Cuint(1)
 const DIFF_FORMAT_PATCH_HEADER = Cuint(2)
 const DIFF_FORMAT_RAW          = Cuint(3)
 const DIFF_FORMAT_NAME_ONLY    = Cuint(4)
 const DIFF_FORMAT_NAME_STATUS  = Cuint(5)
+const DIFF_FORMAT_PATCH_ID     = Cuint(6)
 
 @enum(DELTA_STATUS, DELTA_UNMODIFIED = Cint(0),
                     DELTA_ADDED      = Cint(1),
@@ -116,7 +135,9 @@ const DIFF_FORMAT_NAME_STATUS  = Cuint(5)
                     DELTA_COPIED     = Cint(5),
                     DELTA_IGNORED    = Cint(6),
                     DELTA_UNTRACKED  = Cint(7),
-                    DELTA_TYPECHANGE = Cint(8))
+                    DELTA_TYPECHANGE = Cint(8),
+                    DELTA_UNREADABLE = Cint(9),
+                    DELTA_CONFLICTED = Cint(10))
 
 # index
 const IDXENTRY_NAMEMASK   = (0x0fff)
@@ -165,7 +186,8 @@ const INDEX_STAGE_ANY = Cint(-1)
 @enum(GIT_MERGE, MERGE_FIND_RENAMES     = 1 << 0,
                  MERGE_FAIL_ON_CONFLICT = 1 << 1,
                  MERGE_SKIP_REUC        = 1 << 2,
-                 MERGE_NO_RECURSIVE     = 1 << 3)
+                 MERGE_NO_RECURSIVE     = 1 << 3,
+                 MERGE_VIRTUAL_BASE     = 1 << 4)
 
 @enum(GIT_MERGE_FILE, MERGE_FILE_DEFAULT                  = 0,       # Defaults
                       MERGE_FILE_STYLE_MERGE              = 1 << 0,  # Create standard conflicted merge files
@@ -175,7 +197,13 @@ const INDEX_STAGE_ANY = Cint(-1)
                       MERGE_FILE_IGNORE_WHITESPACE_CHANGE = 1 << 4,  # Ignore changes in amount of whitespace
                       MERGE_FILE_IGNORE_WHITESPACE_EOL    = 1 << 5,  # Ignore whitespace at end of line
                       MERGE_FILE_DIFF_PATIENCE            = 1 << 6,  # Use the "patience diff" algorithm
-                      MERGE_FILE_DIFF_MINIMAL             = 1 << 7)  # Take extra time to find minimal diff
+                      MERGE_FILE_DIFF_MINIMAL             = 1 << 7,  # Take extra time to find minimal diff
+                      MERGE_FILE_STYLE_ZDIFF3             = 1 << 8,  # Create zdiff3 ("zealous diff3")-style files
+
+                      # Do not produce file conflicts when common regions have
+                      # changed; keep the conflict markers in the file and accept
+                      # that as the merge result.
+                      MERGE_FILE_ACCEPT_CONFLICTS         = 1 << 9)
 """ Option flags for git merge file favoritism.
   * `MERGE_FILE_FAVOR_NORMAL`: if both sides of the merge have changes to a section,
     make a note of the conflict in the index which `git checkout` will use to create
@@ -316,6 +344,7 @@ const STATUS_OPT_INCLUDE_UNREADABLE_AS_UNTRACKED  = Cuint(1 << 15)
 # certificate types from `enum git_cert_t` in `cert.h`.
 const CERT_TYPE_TLS = 1 # GIT_CERT_X509
 const CERT_TYPE_SSH = 2 # GIT_CERT_HOSTKEY_LIBSSH2
+const CERT_TYPE_STRARRAY = 3 # GIT_CERT_STRARRAY
 
 # certificate callback return values
 const PASSTHROUGH = -30
@@ -326,6 +355,7 @@ const CERT_ACCEPT =  0
 const CERT_SSH_MD5    = 1 << 0
 const CERT_SSH_SHA1   = 1 << 1
 const CERT_SSH_SHA256 = 1 << 2
+const CERT_SSH_RAW    = 1 << 3
 
 # libssh2 known host constants
 const LIBSSH2_KNOWNHOST_TYPE_PLAIN  = 1
@@ -341,6 +371,10 @@ const LIBSSH2_KNOWNHOST_CHECK_MISMATCH = 1
 const LIBSSH2_KNOWNHOST_CHECK_NOTFOUND = 2
 const LIBSSH2_KNOWNHOST_CHECK_FAILURE  = 3
 
+# Constants for fetch depth (shallowness of fetch).
+const FETCH_DEPTH_FULL = 0
+const FETCH_DEPTH_UNSHALLOW = 2147483647
+
 @enum(GIT_SUBMODULE_IGNORE, SUBMODULE_IGNORE_UNSPECIFIED  = -1, # use the submodule's configuration
                             SUBMODULE_IGNORE_NONE         = 1,  # any change or untracked == dirty
                             SUBMODULE_IGNORE_UNTRACKED    = 2,  # dirty if tracked files change
@@ -357,9 +391,11 @@ Option flags for `GitRepo`.
 @enum(GIT_REPOSITORY_OPEN, REPOSITORY_OPEN_DEFAULT   = 0,
                            REPOSITORY_OPEN_NO_SEARCH = 1<<0,
                            REPOSITORY_OPEN_CROSS_FS  = 1<<1,
-                           REPOSITORY_OPEN_BARE      = 1<<2)
+                           REPOSITORY_OPEN_BARE      = 1<<2,
+                           REPOSITORY_OPEN_NO_DOTGIT = 1<<3,
+                           REPOSITORY_OPEN_FROM_ENV  = 1<<4)
 
-@enum(GIT_BRANCH, BRANCH_LOCAL = 1, BRANCH_REMOTE = 2)
+@enum(GIT_BRANCH, BRANCH_LOCAL = 1, BRANCH_REMOTE = 2, BRANCH_ALL = 1 | 2)
 
 @enum(GIT_FILEMODE, FILEMODE_UNREADABLE          = 0o000000,
                     FILEMODE_TREE                = 0o040000,
@@ -432,19 +468,49 @@ Global library options.
 
 These are used to select which global option to set or get and are used in `git_libgit2_opts()`.
 """
-@enum(GIT_OPT, GET_MWINDOW_SIZE         = 0,
-               SET_MWINDOW_SIZE         = 1,
-               GET_MWINDOW_MAPPED_LIMIT = 2,
-               SET_MWINDOW_MAPPED_LIMIT = 3,
-               GET_SEARCH_PATH          = 4,
-               SET_SEARCH_PATH          = 5,
-               SET_CACHE_OBJECT_LIMIT   = 6,
-               SET_CACHE_MAX_SIZE       = 7,
-               ENABLE_CACHING           = 8,
-               GET_CACHED_MEMORY        = 9,
-               GET_TEMPLATE_PATH        = 10,
-               SET_TEMPLATE_PATH        = 11,
-               SET_SSL_CERT_LOCATIONS   = 12)
+@enum(GIT_OPT, GET_MWINDOW_SIZE = 0,
+               SET_MWINDOW_SIZE,
+               GET_MWINDOW_MAPPED_LIMIT,
+               SET_MWINDOW_MAPPED_LIMIT,
+               GET_SEARCH_PATH,
+               SET_SEARCH_PATH,
+               SET_CACHE_OBJECT_LIMIT,
+               SET_CACHE_MAX_SIZE,
+               ENABLE_CACHING,
+               GET_CACHED_MEMORY,
+               GET_TEMPLATE_PATH,
+               SET_TEMPLATE_PATH,
+               SET_SSL_CERT_LOCATIONS,
+               SET_USER_AGENT,
+               ENABLE_STRICT_OBJECT_CREATION,
+               ENABLE_STRICT_SYMBOLIC_REF_CREATION,
+               SET_SSL_CIPHERS,
+               GET_USER_AGENT,
+               ENABLE_OFS_DELTA,
+               ENABLE_FSYNC_GITDIR,
+               GET_WINDOWS_SHAREMODE,
+               SET_WINDOWS_SHAREMODE,
+               ENABLE_STRICT_HASH_VERIFICATION,
+               SET_ALLOCATOR,
+               ENABLE_UNSAVED_INDEX_SAFETY,
+               GET_PACK_MAX_OBJECTS,
+               SET_PACK_MAX_OBJECTS,
+               DISABLE_PACK_KEEP_FILE_CHECKS,
+               ENABLE_HTTP_EXPECT_CONTINUE,
+               GET_MWINDOW_FILE_LIMIT,
+               SET_MWINDOW_FILE_LIMIT,
+               SET_ODB_PACKED_PRIORITY,
+               SET_ODB_LOOSE_PRIORITY,
+               GET_EXTENSIONS,
+               SET_EXTENSIONS,
+               GET_OWNER_VALIDATION,
+               SET_OWNER_VALIDATION,
+               GET_HOMEDIR,
+               SET_HOMEDIR,
+               SET_SERVER_CONNECT_TIMEOUT,
+               GET_SERVER_CONNECT_TIMEOUT,
+               SET_SERVER_TIMEOUT,
+               GET_SERVER_TIMEOUT)
 
 """
 Option flags for `GitProxy`.
@@ -468,4 +534,14 @@ Option flags for `GitProxy`.
     TRACE_TRACE
 end
 
+# The type of object id
+@enum(GIT_OID_TYPE,
+      OID_DEFAULT = 0,
+      OID_SHA1 = 1)
+
+# Direction of the connection.
+@enum(GIT_DIRECTION,
+      DIRECTION_FETCH = 0,
+      DIRECTION_PUSH = 1)
+
 end
diff --git a/stdlib/LibGit2/src/diff.jl b/stdlib/LibGit2/src/diff.jl
index f2aa2feb2c2e9..044c6331dc1f1 100644
--- a/stdlib/LibGit2/src/diff.jl
+++ b/stdlib/LibGit2/src/diff.jl
@@ -27,11 +27,11 @@ function diff_tree(repo::GitRepo, tree::GitTree, pathspecs::AbstractString=""; c
     ensure_initialized()
     diff_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     if cached
-        @check ccall((:git_diff_tree_to_index, :libgit2), Cint,
+        @check ccall((:git_diff_tree_to_index, libgit2), Cint,
                      (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
                      diff_ptr_ptr, repo.ptr, tree.ptr, C_NULL, isempty(pathspecs) ? C_NULL : pathspecs)
     else
-        @check ccall((:git_diff_tree_to_workdir_with_index, :libgit2), Cint,
+        @check ccall((:git_diff_tree_to_workdir_with_index, libgit2), Cint,
                      (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
                      diff_ptr_ptr, repo.ptr, tree.ptr, isempty(pathspecs) ? C_NULL : pathspecs)
     end
@@ -51,7 +51,7 @@ to compare a commit on another branch with the current latest commit on `master`
 function diff_tree(repo::GitRepo, oldtree::GitTree, newtree::GitTree)
     ensure_initialized()
     diff_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_diff_tree_to_tree, :libgit2), Cint,
+    @check ccall((:git_diff_tree_to_tree, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
                    diff_ptr_ptr, repo.ptr, oldtree.ptr, newtree.ptr, C_NULL)
     return GitDiff(repo, diff_ptr_ptr[])
@@ -67,7 +67,7 @@ files were changed, how many insertions were made, and how many deletions were m
 function GitDiffStats(diff::GitDiff)
     ensure_initialized()
     diff_stat_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_diff_get_stats, :libgit2), Cint,
+    @check ccall((:git_diff_get_stats, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}),
                   diff_stat_ptr_ptr, diff.ptr)
     return GitDiffStats(diff.owner, diff_stat_ptr_ptr[])
@@ -83,7 +83,7 @@ are to be included or not).
 """
 function files_changed(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_files_changed, :libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_files_changed, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
 end
 
 """
@@ -96,7 +96,7 @@ are to be included or not).
 """
 function insertions(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_insertions, :libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_insertions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
 end
 
 """
@@ -109,12 +109,12 @@ are to be included or not).
 """
 function deletions(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_deletions, :libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_deletions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
 end
 
 function count(diff::GitDiff)
     ensure_initialized()
-    return ccall((:git_diff_num_deltas, :libgit2), Cint, (Ptr{Cvoid},), diff.ptr)
+    return ccall((:git_diff_num_deltas, libgit2), Cint, (Ptr{Cvoid},), diff.ptr)
 end
 
 function Base.getindex(diff::GitDiff, i::Integer)
@@ -122,7 +122,7 @@ function Base.getindex(diff::GitDiff, i::Integer)
         throw(BoundsError(diff, (i,)))
     end
     ensure_initialized()
-    delta_ptr = ccall((:git_diff_get_delta, :libgit2),
+    delta_ptr = ccall((:git_diff_get_delta, libgit2),
                       Ptr{DiffDelta},
                       (Ptr{Cvoid}, Csize_t), diff.ptr, i-1)
     return unsafe_load(delta_ptr)
diff --git a/stdlib/LibGit2/src/error.jl b/stdlib/LibGit2/src/error.jl
index 219b8cdf88e69..1a493006ea1b5 100644
--- a/stdlib/LibGit2/src/error.jl
+++ b/stdlib/LibGit2/src/error.jl
@@ -3,6 +3,7 @@
 module Error
 
 import ..LibGit2: ensure_initialized
+using LibGit2_jll
 
 export GitError
 
@@ -32,7 +33,8 @@ export GitError
             EMISMATCH       = Cint(-33), # hashsum mismatch in object
             EINDEXDIRTY     = Cint(-34), # unsaved changes in the index would be overwritten
             EAPPLYFAIL      = Cint(-35), # patch application failed
-            EOWNER          = Cint(-36)) # the object is not owned by the current user
+            EOWNER          = Cint(-36), # the object is not owned by the current user
+            TIMEOUT         = Cint(-37)) # The operation timed out
 
 @enum(Class, None,
              NoMemory,
@@ -68,7 +70,9 @@ export GitError
              Patch,
              WorkTree,
              SHA1,
-             HTTP)
+             HTTP,
+             Internal,
+             Grafts)
 
 struct ErrorStruct
     message::Ptr{UInt8}
@@ -84,7 +88,7 @@ Base.show(io::IO, err::GitError) = print(io, "GitError(Code:$(err.code), Class:$
 
 function last_error()
     ensure_initialized()
-    err = ccall((:giterr_last, :libgit2), Ptr{ErrorStruct}, ())
+    err = ccall((:giterr_last, libgit2), Ptr{ErrorStruct}, ())
     if err != C_NULL
         err_obj   = unsafe_load(err)
         err_class = Class(err_obj.class)
diff --git a/stdlib/LibGit2/src/gitcredential.jl b/stdlib/LibGit2/src/gitcredential.jl
index 1b97c29cd933e..ea97d87d444ae 100644
--- a/stdlib/LibGit2/src/gitcredential.jl
+++ b/stdlib/LibGit2/src/gitcredential.jl
@@ -46,7 +46,8 @@ function Base.shred!(cred::GitCredential)
     cred.host = nothing
     cred.path = nothing
     cred.username = nothing
-    cred.password !== nothing && Base.shred!(cred.password)
+    pwd = cred.password
+    pwd !== nothing && Base.shred!(pwd)
     cred.password = nothing
     return cred
 end
@@ -122,7 +123,7 @@ function Base.read!(io::IO, cred::GitCredential)
         if key == "url"
             # Any components which are missing from the URL will be set to empty
             # https://git-scm.com/docs/git-credential#git-credential-codeurlcode
-            Base.shred!(parse(GitCredential, value)) do urlcred
+            Base.shred!(parse(GitCredential, value::AbstractString)) do urlcred
                 copy!(cred, urlcred)
             end
         elseif key in GIT_CRED_ATTRIBUTES
@@ -182,16 +183,16 @@ end
 
 function run!(helper::GitCredentialHelper, operation::AbstractString, cred::GitCredential)
     cmd = `$(helper.cmd) $operation`
-    p = open(cmd, "r+")
-
-    # Provide the helper with the credential information we know
-    write(p, cred)
-    write(p, "\n")
-    t = @async close(p.in)
-
-    # Process the response from the helper
-    Base.read!(p, cred)
-    wait(p)
+    open(cmd, "r+") do p
+        # Provide the helper with the credential information we know
+        write(p, cred)
+        write(p, "\n")
+        t = @async close(p.in)
+
+        # Process the response from the helper
+        Base.read!(p, cred)
+        wait(t)
+    end
 
     return cred
 end
@@ -219,7 +220,7 @@ function credential_helpers(cfg::GitConfig, cred::GitCredential)
     helpers = GitCredentialHelper[]
 
     # https://git-scm.com/docs/gitcredentials#gitcredentials-helper
-    for entry in GitConfigIter(cfg, r"credential.*\.helper")
+    for entry in GitConfigIter(cfg, r"credential.*\.helper$")
         section, url, name, value = split_cfg_entry(entry)
         @assert name == "helper"
 
diff --git a/stdlib/LibGit2/src/index.jl b/stdlib/LibGit2/src/index.jl
index b8baf624540b0..15e04d16b5756 100644
--- a/stdlib/LibGit2/src/index.jl
+++ b/stdlib/LibGit2/src/index.jl
@@ -8,7 +8,7 @@ Load the index file for the repository `repo`.
 function GitIndex(repo::GitRepo)
     ensure_initialized()
     idx_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_index, :libgit2), Cint,
+    @check ccall((:git_repository_index, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), idx_ptr_ptr, repo.ptr)
     return GitIndex(repo, idx_ptr_ptr[])
 end
@@ -25,7 +25,7 @@ has changed since the last time it was loaded into `idx`.
 """
 function read!(idx::GitIndex, force::Bool = false)
     ensure_initialized()
-    @check ccall((:git_index_read, :libgit2), Cint, (Ptr{Cvoid}, Cint), idx.ptr, Cint(force))
+    @check ccall((:git_index_read, libgit2), Cint, (Ptr{Cvoid}, Cint), idx.ptr, Cint(force))
     return idx
 end
 
@@ -36,7 +36,7 @@ Write the state of index `idx` to disk using a file lock.
 """
 function write!(idx::GitIndex)
     ensure_initialized()
-    @check ccall((:git_index_write, :libgit2), Cint, (Ptr{Cvoid},), idx.ptr)
+    @check ccall((:git_index_write, libgit2), Cint, (Ptr{Cvoid},), idx.ptr)
     return idx
 end
 
@@ -51,7 +51,7 @@ repository cannot be bare. `idx` must not contain any files with conflicts.
 function write_tree!(idx::GitIndex)
     ensure_initialized()
     oid_ptr = Ref(GitHash())
-    @check ccall((:git_index_write_tree, :libgit2), Cint,
+    @check ccall((:git_index_write_tree, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{Cvoid}), oid_ptr, idx.ptr)
     return oid_ptr[]
 end
@@ -73,7 +73,7 @@ Read the tree `tree` (or the tree pointed to by `treehash` in the repository own
 """
 function read_tree!(idx::GitIndex, tree::GitTree)
     ensure_initialized()
-    @check ccall((:git_index_read_tree, :libgit2), Cint,
+    @check ccall((:git_index_read_tree, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}), idx.ptr, tree.ptr)
 end
 read_tree!(idx::GitIndex, hash::AbstractGitHash) =
@@ -104,7 +104,7 @@ with respect to ignored files:
 function add!(idx::GitIndex, files::AbstractString...;
               flags::Cuint = Consts.INDEX_ADD_DEFAULT)
     ensure_initialized()
-    @check ccall((:git_index_add_all, :libgit2), Cint,
+    @check ccall((:git_index_add_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Cuint, Ptr{Cvoid}, Ptr{Cvoid}),
                  idx.ptr, collect(files), flags, C_NULL, C_NULL)
 end
@@ -120,7 +120,7 @@ database.
 """
 function update!(idx::GitIndex, files::AbstractString...)
     ensure_initialized()
-    @check ccall((:git_index_update_all, :libgit2), Cint,
+    @check ccall((:git_index_update_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{Cvoid}, Ptr{Cvoid}),
                  idx.ptr, collect(files), C_NULL, C_NULL)
 end
@@ -134,7 +134,7 @@ of the `repo`).
 """
 function remove!(idx::GitIndex, files::AbstractString...)
     ensure_initialized()
-    @check ccall((:git_index_remove_all, :libgit2), Cint,
+    @check ccall((:git_index_remove_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{Cvoid}, Ptr{Cvoid}),
                  idx.ptr, collect(files), C_NULL, C_NULL)
 end
@@ -173,13 +173,13 @@ end
 
 function count(idx::GitIndex)
     ensure_initialized()
-    return ccall((:git_index_entrycount, :libgit2), Csize_t, (Ptr{Cvoid},), idx.ptr)
+    return ccall((:git_index_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), idx.ptr)
 end
 
 function Base.getindex(idx::GitIndex, i::Integer)
     ensure_initialized()
     GC.@preserve idx begin
-        ie_ptr = ccall((:git_index_get_byindex, :libgit2),
+        ie_ptr = ccall((:git_index_get_byindex, libgit2),
                        Ptr{IndexEntry},
                        (Ptr{Cvoid}, Csize_t), idx.ptr, i-1)
         ie_ptr == C_NULL && return nothing
@@ -191,7 +191,7 @@ end
 function Base.findall(path::String, idx::GitIndex)
     ensure_initialized()
     pos_ref = Ref{Csize_t}(0)
-    ret = ccall((:git_index_find, :libgit2), Cint,
+    ret = ccall((:git_index_find, libgit2), Cint,
                   (Ref{Csize_t}, Ptr{Cvoid}, Cstring), pos_ref, idx.ptr, path)
     ret == Cint(Error.ENOTFOUND) && return nothing
     return pos_ref[]+1
@@ -210,7 +210,7 @@ of a multi-branch "octopus" merge, stages `2`, `3`, and `4` might be used).
 """
 function stage(ie::IndexEntry)
     ensure_initialized()
-    return ccall((:git_index_entry_stage, :libgit2), Cint, (Ptr{IndexEntry},), Ref(ie))
+    return ccall((:git_index_entry_stage, libgit2), Cint, (Ptr{IndexEntry},), Ref(ie))
 end
 
 function Base.show(io::IO, idx::GitIndex)
diff --git a/stdlib/LibGit2/src/merge.jl b/stdlib/LibGit2/src/merge.jl
index 0b2ddab1e8512..7c946315fdd86 100644
--- a/stdlib/LibGit2/src/merge.jl
+++ b/stdlib/LibGit2/src/merge.jl
@@ -16,7 +16,7 @@ branch head described using `GitReference`.
 function GitAnnotated(repo::GitRepo, commit_id::GitHash)
     ensure_initialized()
     ann_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_annotated_commit_lookup, :libgit2), Cint,
+    @check ccall((:git_annotated_commit_lookup, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}),
                    ann_ptr_ptr, repo.ptr, Ref(commit_id))
     return GitAnnotated(repo, ann_ptr_ptr[])
@@ -25,7 +25,7 @@ end
 function GitAnnotated(repo::GitRepo, ref::GitReference)
     ensure_initialized()
     ann_ref_ref = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_annotated_commit_from_ref, :libgit2), Cint,
+    @check ccall((:git_annotated_commit_from_ref, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}),
                    ann_ref_ref, repo.ptr, ref.ptr)
     return GitAnnotated(repo, ann_ref_ref[])
@@ -34,7 +34,7 @@ end
 function GitAnnotated(repo::GitRepo, fh::FetchHead)
     ensure_initialized()
     ann_ref_ref = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_annotated_commit_from_fetchhead, :libgit2), Cint,
+    @check ccall((:git_annotated_commit_from_fetchhead, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Ptr{GitHash}),
                    ann_ref_ref, repo.ptr, fh.name, fh.url, Ref(fh.oid))
     return GitAnnotated(repo, ann_ref_ref[])
@@ -49,7 +49,7 @@ end
 function GitHash(ann::GitAnnotated)
     ensure_initialized()
     GC.@preserve ann begin
-        oid = unsafe_load(ccall((:git_annotated_commit_id, :libgit2), Ptr{GitHash}, (Ptr{Cvoid},), ann.ptr))
+        oid = unsafe_load(ccall((:git_annotated_commit_id, libgit2), Ptr{GitHash}, (Ptr{Cvoid},), ann.ptr))
     end
     return oid
 end
@@ -88,7 +88,7 @@ function merge_analysis(repo::GitRepo, anns::Vector{GitAnnotated})
     preference = Ref{Cint}(0)
     anns_ref = Ref(Base.map(a->a.ptr, anns), 1)
     anns_size = Csize_t(length(anns))
-    @check ccall((:git_merge_analysis, :libgit2), Cint,
+    @check ccall((:git_merge_analysis, libgit2), Cint,
                   (Ptr{Cint}, Ptr{Cint}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t),
                    analysis, preference, repo.ptr, anns_ref, anns_size)
     return analysis[], preference[]
@@ -147,7 +147,7 @@ function merge!(repo::GitRepo, anns::Vector{GitAnnotated};
                 checkout_opts::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     anns_size = Csize_t(length(anns))
-    @check ccall((:git_merge, :libgit2), Cint,
+    @check ccall((:git_merge, libgit2), Cint,
                   (Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t,
                    Ptr{MergeOptions}, Ptr{CheckoutOptions}),
                    repo.ptr, Base.map(x->x.ptr, anns), anns_size,
@@ -261,7 +261,7 @@ function merge_base(repo::GitRepo, one::AbstractString, two::AbstractString)
     oid2_ptr = Ref(GitHash(two))
     moid_ptr = Ref(GitHash())
     moid = try
-        @check ccall((:git_merge_base, :libgit2), Cint,
+        @check ccall((:git_merge_base, libgit2), Cint,
                 (Ptr{GitHash}, Ptr{Cvoid}, Ptr{GitHash}, Ptr{GitHash}),
                 moid_ptr, repo.ptr, oid1_ptr, oid2_ptr)
         moid_ptr[]
diff --git a/stdlib/LibGit2/src/oid.jl b/stdlib/LibGit2/src/oid.jl
index 1074f003ebd2f..be4944791f55c 100644
--- a/stdlib/LibGit2/src/oid.jl
+++ b/stdlib/LibGit2/src/oid.jl
@@ -13,7 +13,7 @@ function GitHash(ptr::Ptr{UInt8})
     end
     ensure_initialized()
     oid_ptr = Ref(GitHash())
-    @check ccall((:git_oid_fromraw, :libgit2), Cint,
+    @check ccall((:git_oid_fromraw, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{UInt8}), oid_ptr, ptr)
     return oid_ptr[]
 end
@@ -43,7 +43,7 @@ function GitHash(id::AbstractString)
     end
     ensure_initialized()
     oid_ptr = Ref{GitHash}()
-    @check ccall((:git_oid_fromstrn, :libgit2), Cint,
+    @check ccall((:git_oid_fromstrn, libgit2), Cint,
               (Ptr{GitHash}, Ptr{UInt8}, Csize_t), oid_ptr, bstr, len)
     return oid_ptr[]
 end
@@ -56,7 +56,7 @@ Construct a `GitShortHash` from the data stored in the given [`Buffer`](@ref).
 function GitShortHash(buf::Buffer)
     ensure_initialized()
     oid_ptr = Ref{GitHash}()
-    @check ccall((:git_oid_fromstrn, :libgit2), Cint,
+    @check ccall((:git_oid_fromstrn, libgit2), Cint,
               (Ptr{GitHash}, Ptr{UInt8}, Csize_t), oid_ptr, buf.ptr, buf.size)
     GitShortHash(oid_ptr[], buf.size)
 end
@@ -71,7 +71,7 @@ function GitShortHash(id::AbstractString)
     bstr = String(id)
     len = sizeof(bstr)
     oid_ptr = Ref{GitHash}()
-    @check ccall((:git_oid_fromstrn, :libgit2), Cint,
+    @check ccall((:git_oid_fromstrn, libgit2), Cint,
               (Ptr{GitHash}, Ptr{UInt8}, Csize_t), oid_ptr, bstr, len)
     GitShortHash(oid_ptr[], len)
 end
@@ -113,7 +113,7 @@ function GitHash(ref::GitReference)
     reftype(ref) != Consts.REF_OID && return GitHash()
     ensure_initialized()
     GC.@preserve ref begin
-        oid_ptr = ccall((:git_reference_target, :libgit2), Ptr{UInt8}, (Ptr{Cvoid},), ref.ptr)
+        oid_ptr = ccall((:git_reference_target, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), ref.ptr)
         oid_ptr == C_NULL && return GitHash()
         oid = GitHash(oid_ptr)
     end
@@ -131,7 +131,7 @@ function GitHash(repo::GitRepo, ref_name::AbstractString)
     isempty(repo) && return GitHash()
     ensure_initialized()
     oid_ptr  = Ref(GitHash())
-    @check ccall((:git_reference_name_to_id, :libgit2), Cint,
+    @check ccall((:git_reference_name_to_id, libgit2), Cint,
                     (Ptr{GitHash}, Ptr{Cvoid}, Cstring),
                      oid_ptr, repo.ptr, ref_name)
     return oid_ptr[]
@@ -144,7 +144,7 @@ Get the identifier (`GitHash`) of `obj`.
 """
 function GitHash(obj::GitObject)
     ensure_initialized()
-    GitHash(ccall((:git_object_id, :libgit2), Ptr{UInt8}, (Ptr{Cvoid},), obj.ptr))
+    GitHash(ccall((:git_object_id, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), obj.ptr))
 end
 
 ==(obj1::GitObject, obj2::GitObject) = GitHash(obj1) == GitHash(obj2)
@@ -154,12 +154,12 @@ end
 
 Get a shortened identifier (`GitShortHash`) of `obj`. The minimum length (in characters)
 is determined by the `core.abbrev` config option, and will be of sufficient length to
-unambiuously identify the object in the repository.
+unambiguously identify the object in the repository.
 """
 function GitShortHash(obj::GitObject)
     ensure_initialized()
     buf_ref = Ref(Buffer())
-    @check ccall((:git_object_short_id, :libgit2), Cint,
+    @check ccall((:git_object_short_id, libgit2), Cint,
                  (Ptr{Buffer},Ptr{Cvoid}), buf_ref, obj.ptr)
     sid = GitShortHash(buf_ref[])
     free(buf_ref)
@@ -187,7 +187,7 @@ Base.hash(id::GitHash, h::UInt) = hash(id.val, h)
 
 function Base.cmp(id1::GitHash, id2::GitHash)
     ensure_initialized()
-    Int(ccall((:git_oid_cmp, :libgit2), Cint,
+    Int(ccall((:git_oid_cmp, libgit2), Cint,
               (Ptr{GitHash}, Ptr{GitHash}),
               Ref(id1), Ref(id2)))
 end
@@ -195,7 +195,7 @@ function Base.cmp(id1::GitShortHash, id2::GitShortHash)
     ensure_initialized()
     # shortened hashes appear at the beginning of the order, i.e.
     # 000 < 01 < 010 < 011 < 0112
-    c = Int(ccall((:git_oid_ncmp, :libgit2), Cint,
+    c = Int(ccall((:git_oid_ncmp, libgit2), Cint,
                   (Ptr{GitHash}, Ptr{GitHash}, Csize_t),
                   Ref(id1.hash), Ref(id2.hash), min(id1.len, id2.len)))
     return c == 0 ? cmp(id1.len, id2.len) : c
diff --git a/stdlib/LibGit2/src/rebase.jl b/stdlib/LibGit2/src/rebase.jl
index 51b52ef006c38..b36c2f3f475cf 100644
--- a/stdlib/LibGit2/src/rebase.jl
+++ b/stdlib/LibGit2/src/rebase.jl
@@ -5,7 +5,7 @@ function GitRebase(repo::GitRepo, branch::GitAnnotated, upstream::GitAnnotated;
                    opts::RebaseOptions = RebaseOptions())
     ensure_initialized()
     rebase_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_rebase_init, :libgit2), Cint,
+    @check ccall((:git_rebase_init, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid},
                    Ptr{Cvoid}, Ptr{RebaseOptions}),
                    rebase_ptr_ptr, repo.ptr, branch.ptr, upstream.ptr,
@@ -15,7 +15,7 @@ end
 
 function count(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_operation_entrycount, :libgit2), Csize_t, (Ptr{Cvoid},), rb.ptr)
+    return ccall((:git_rebase_operation_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), rb.ptr)
 end
 
 """
@@ -28,7 +28,7 @@ has not yet been called or iteration over `rb` has not yet begun), return
 """
 function current(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_operation_current, :libgit2), Csize_t, (Ptr{Cvoid},), rb.ptr)
+    return ccall((:git_rebase_operation_current, libgit2), Csize_t, (Ptr{Cvoid},), rb.ptr)
 end
 
 function Base.getindex(rb::GitRebase, i::Integer)
@@ -37,7 +37,7 @@ function Base.getindex(rb::GitRebase, i::Integer)
     end
     ensure_initialized()
     GC.@preserve rb begin
-        rb_op_ptr = ccall((:git_rebase_operation_byindex, :libgit2),
+        rb_op_ptr = ccall((:git_rebase_operation_byindex, libgit2),
                           Ptr{RebaseOperation},
                           (Ptr{Cvoid}, Csize_t), rb.ptr, i-1)
         rb_op = unsafe_load(rb_op_ptr)
@@ -49,7 +49,7 @@ function Base.iterate(rb::GitRebase, state=nothing)
     ensure_initialized()
     rb_op_ptr_ptr = Ref{Ptr{RebaseOperation}}(C_NULL)
     GC.@preserve rb begin
-        err = ccall((:git_rebase_next, :libgit2), Cint,
+        err = ccall((:git_rebase_next, libgit2), Cint,
                     (Ptr{Ptr{RebaseOperation}}, Ptr{Cvoid}),
                     rb_op_ptr_ptr, rb.ptr)
         if err == Cint(Error.GIT_OK)
@@ -78,7 +78,7 @@ function commit(rb::GitRebase, sig::GitSignature)
     ensure_initialized()
     oid_ptr = Ref(GitHash())
     try
-        @check ccall((:git_rebase_commit, :libgit2), Error.Code,
+        @check ccall((:git_rebase_commit, libgit2), Error.Code,
                      (Ptr{GitHash}, Ptr{Cvoid}, Ptr{SignatureStruct}, Ptr{SignatureStruct}, Ptr{UInt8}, Ptr{UInt8}),
                       oid_ptr, rb.ptr, C_NULL, sig.ptr, C_NULL, C_NULL)
     catch err
@@ -100,7 +100,7 @@ rebase had completed), and `-1` for other errors.
 """
 function abort(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_abort, :libgit2), Csize_t,
+    return ccall((:git_rebase_abort, libgit2), Csize_t,
                       (Ptr{Cvoid},), rb.ptr)
 end
 
@@ -113,7 +113,7 @@ rebase finishes successfully, `-1` if there is an error.
 """
 function finish(rb::GitRebase, sig::GitSignature)
     ensure_initialized()
-    return ccall((:git_rebase_finish, :libgit2), Csize_t,
+    return ccall((:git_rebase_finish, libgit2), Csize_t,
                   (Ptr{Cvoid}, Ptr{SignatureStruct}),
                    rb.ptr, sig.ptr)
 end
diff --git a/stdlib/LibGit2/src/reference.jl b/stdlib/LibGit2/src/reference.jl
index 345c546946ee5..9f849ed01a00f 100644
--- a/stdlib/LibGit2/src/reference.jl
+++ b/stdlib/LibGit2/src/reference.jl
@@ -3,7 +3,7 @@
 function GitReference(repo::GitRepo, refname::AbstractString)
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_reference_lookup, :libgit2), Cint,
+    @check ccall((:git_reference_lookup, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
                    ref_ptr_ptr, repo.ptr, refname)
     return GitReference(repo, ref_ptr_ptr[])
@@ -13,7 +13,7 @@ function GitReference(repo::GitRepo, obj_oid::GitHash, refname::AbstractString =
                       force::Bool=false, msg::AbstractString="")
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_reference_create, :libgit2), Cint,
+    @check ccall((:git_reference_create, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{UInt8}, Ptr{GitHash}, Cint, Cstring),
                    ref_ptr_ptr, repo.ptr, refname, Ref(obj_oid), Cint(force),
                    isempty(msg) ? C_NULL : msg)
@@ -28,7 +28,7 @@ to this branch will have no parents.
 """
 function isorphan(repo::GitRepo)
     ensure_initialized()
-    r = @check ccall((:git_repository_head_unborn, :libgit2), Cint,
+    r = @check ccall((:git_repository_head_unborn, libgit2), Cint,
                      (Ptr{Cvoid},), repo.ptr)
     r != 0
 end
@@ -41,7 +41,7 @@ Return a `GitReference` to the current HEAD of `repo`.
 function head(repo::GitRepo)
     ensure_initialized()
     head_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_head, :libgit2), Cint,
+    @check ccall((:git_repository_head, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), head_ptr_ptr, repo.ptr)
     return GitReference(repo, head_ptr_ptr[])
 end
@@ -53,7 +53,7 @@ Return a shortened version of the name of `ref` that's
 "human-readable".
 
 ```julia-repl
-julia> repo = LibGit2.GitRepo(path_to_repo);
+julia> repo = GitRepo(path_to_repo);
 
 julia> branch_ref = LibGit2.head(repo);
 
@@ -68,7 +68,7 @@ function shortname(ref::GitReference)
     isempty(ref) && return ""
     ensure_initialized()
     GC.@preserve ref begin
-        name_ptr = ccall((:git_reference_shorthand, :libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
+        name_ptr = ccall((:git_reference_shorthand, libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
         name_ptr == C_NULL && return ""
         name = unsafe_string(name_ptr)
     end
@@ -85,7 +85,7 @@ Return a `Cint` corresponding to the type of `ref`:
 """
 function reftype(ref::GitReference)
     ensure_initialized()
-    return ccall((:git_reference_type, :libgit2), Cint, (Ptr{Cvoid},), ref.ptr)
+    return ccall((:git_reference_type, libgit2), Cint, (Ptr{Cvoid},), ref.ptr)
 end
 
 """
@@ -100,7 +100,7 @@ function fullname(ref::GitReference)
     reftype(ref) == Consts.REF_OID && return ""
     ensure_initialized()
     GC.@preserve ref begin
-        rname = ccall((:git_reference_symbolic_target, :libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
+        rname = ccall((:git_reference_symbolic_target, libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
         rname == C_NULL && return ""
         name = unsafe_string(rname)
     end
@@ -116,7 +116,7 @@ function name(ref::GitReference)
     isempty(ref) && return ""
     ensure_initialized()
     GC.@preserve ref begin
-        name_ptr = ccall((:git_reference_name, :libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
+        name_ptr = ccall((:git_reference_name, libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
         name_ptr == C_NULL && return ""
         name = unsafe_string(name_ptr)
     end
@@ -128,7 +128,7 @@ function branch(ref::GitReference)
     ensure_initialized()
     str_ptr_ptr = Ref{Cstring}()
     GC.@preserve ref begin
-        @check ccall((:git_branch_name, :libgit2), Cint,
+        @check ccall((:git_branch_name, libgit2), Cint,
                       (Ptr{Cstring}, Ptr{Cvoid},), str_ptr_ptr, ref.ptr)
         str = unsafe_string(str_ptr_ptr[])
     end
@@ -138,7 +138,7 @@ end
 function ishead(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
-    err = ccall((:git_branch_is_head, :libgit2), Cint,
+    err = ccall((:git_branch_is_head, libgit2), Cint,
                   (Ptr{Cvoid},), ref.ptr)
     return err == 1
 end
@@ -146,7 +146,7 @@ end
 function isbranch(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
-    err = ccall((:git_reference_is_branch, :libgit2), Cint,
+    err = ccall((:git_reference_is_branch, libgit2), Cint,
                   (Ptr{Cvoid},), ref.ptr)
     return err == 1
 end
@@ -154,7 +154,7 @@ end
 function istag(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
-    err = ccall((:git_reference_is_tag, :libgit2), Cint,
+    err = ccall((:git_reference_is_tag, libgit2), Cint,
                   (Ptr{Cvoid},), ref.ptr)
     return err == 1
 end
@@ -162,7 +162,7 @@ end
 function isremote(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
-    err = ccall((:git_reference_is_remote, :libgit2), Cint,
+    err = ccall((:git_reference_is_remote, libgit2), Cint,
                   (Ptr{Cvoid},), ref.ptr)
     return err == 1
 end
@@ -199,7 +199,7 @@ then `ref` will be peeled until an object other than a [`GitTag`](@ref) is obtai
 function peel(::Type{T}, ref::GitReference) where T<:GitObject
     ensure_initialized()
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_reference_peel, :libgit2), Cint,
+    @check ccall((:git_reference_peel, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), obj_ptr_ptr, ref.ptr, Consts.OBJECT(T))
     return T(ref.owner, obj_ptr_ptr[])
 end
@@ -213,7 +213,7 @@ Get a list of all reference names in the `repo` repository.
 function ref_list(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
-    @check ccall((:git_reference_list, :libgit2), Cint,
+    @check ccall((:git_reference_list, libgit2), Cint,
                       (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
@@ -235,7 +235,7 @@ function create_branch(repo::GitRepo,
                        force::Bool=false)
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_branch_create, :libgit2), Cint,
+    @check ccall((:git_branch_create, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}, Cint),
                    ref_ptr_ptr, repo.ptr, bname, commit_obj.ptr, Cint(force))
     return GitReference(repo, ref_ptr_ptr[])
@@ -248,7 +248,7 @@ Delete the branch pointed to by `branch`.
 """
 function delete_branch(branch::GitReference)
     ensure_initialized()
-    @check ccall((:git_branch_delete, :libgit2), Cint, (Ptr{Cvoid},), branch.ptr)
+    @check ccall((:git_branch_delete, libgit2), Cint, (Ptr{Cvoid},), branch.ptr)
 end
 
 """
@@ -259,7 +259,7 @@ Set the HEAD of `repo` to the object pointed to by `ref`.
 function head!(repo::GitRepo, ref::GitReference)
     ensure_initialized()
     ref_name = name(ref)
-    @check ccall((:git_repository_set_head, :libgit2), Cint,
+    @check ccall((:git_repository_set_head, libgit2), Cint,
                   (Ptr{Cvoid}, Cstring), repo.ptr, ref_name)
     return ref
 end
@@ -280,7 +280,7 @@ function lookup_branch(repo::GitRepo,
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     branch_type = remote ? Consts.BRANCH_REMOTE : Consts.BRANCH_LOCAL
-    err = ccall((:git_branch_lookup, :libgit2), Cint,
+    err = ccall((:git_branch_lookup, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{UInt8}, Cint),
                   ref_ptr_ptr, repo.ptr, branch_name, branch_type)
     if err != Int(Error.GIT_OK)
@@ -307,7 +307,7 @@ function upstream(ref::GitReference)
     isempty(ref) && return nothing
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    err = ccall((:git_branch_upstream, :libgit2), Cint,
+    err = ccall((:git_branch_upstream, libgit2), Cint,
                   (Ref{Ptr{Cvoid}}, Ptr{Cvoid},), ref_ptr_ptr, ref.ptr)
     if err != Int(Error.GIT_OK)
         if err == Int(Error.ENOTFOUND)
@@ -326,7 +326,7 @@ repository(ref::GitReference) = ref.owner
 function target!(ref::GitReference, new_oid::GitHash; msg::AbstractString="")
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_reference_set_target, :libgit2), Cint,
+    @check ccall((:git_reference_set_target, libgit2), Cint,
              (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Cstring),
              ref_ptr_ptr, ref.ptr, Ref(new_oid), isempty(msg) ? C_NULL : msg)
     return GitReference(ref.owner, ref_ptr_ptr[])
@@ -335,7 +335,7 @@ end
 function GitBranchIter(repo::GitRepo, flags::Cint=Cint(Consts.BRANCH_LOCAL))
     ensure_initialized()
     bi_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_branch_iterator_new, :libgit2), Cint,
+    @check ccall((:git_branch_iterator_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), bi_ptr, repo.ptr, flags)
     return GitBranchIter(repo, bi_ptr[])
 end
@@ -344,7 +344,7 @@ function Base.iterate(bi::GitBranchIter, state=nothing)
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     btype = Ref{Cint}()
-    err = ccall((:git_branch_next, :libgit2), Cint,
+    err = ccall((:git_branch_next, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cint}, Ptr{Cvoid}),
                   ref_ptr_ptr, btype, bi.ptr)
     if err == Cint(Error.GIT_OK)
diff --git a/stdlib/LibGit2/src/remote.jl b/stdlib/LibGit2/src/remote.jl
index 384a3b21bdbfa..07afecebfd373 100644
--- a/stdlib/LibGit2/src/remote.jl
+++ b/stdlib/LibGit2/src/remote.jl
@@ -14,7 +14,7 @@ remote = LibGit2.GitRemote(repo, "upstream", repo_url)
 function GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_remote_create, :libgit2), Cint,
+    @check ccall((:git_remote_create, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
                 rmt_ptr_ptr, repo.ptr, rmt_name, rmt_url)
     return GitRemote(repo, rmt_ptr_ptr[])
@@ -37,7 +37,7 @@ remote = LibGit2.GitRemote(repo, "upstream", repo_url, refspec)
 function GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractString, fetch_spec::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_remote_create_with_fetchspec, :libgit2), Cint,
+    @check ccall((:git_remote_create_with_fetchspec, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Cstring),
                 rmt_ptr_ptr, repo.ptr, rmt_name, rmt_url, fetch_spec)
     return GitRemote(repo, rmt_ptr_ptr[])
@@ -57,12 +57,25 @@ remote = LibGit2.GitRemoteAnon(repo, repo_url)
 function GitRemoteAnon(repo::GitRepo, url::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_remote_create_anonymous, :libgit2), Cint,
+    @check ccall((:git_remote_create_anonymous, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
                 rmt_ptr_ptr, repo.ptr, url)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
+"""
+    GitRemoteDetached(url::AbstractString) -> GitRemote
+
+Create a remote without a connected local repo.
+"""
+function GitRemoteDetached(url::AbstractString)
+    ensure_initialized()
+    rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
+    @check ccall((:git_remote_create_detached, libgit2), Cint,
+                 (Ptr{Ptr{Cvoid}}, Cstring), rmt_ptr_ptr, url)
+    return GitRemote(rmt_ptr_ptr[])
+end
+
 """
     lookup_remote(repo::GitRepo, remote_name::AbstractString) -> Union{GitRemote, Nothing}
 
@@ -80,7 +93,7 @@ LibGit2.lookup_remote(repo, remote_name) # will return nothing
 function lookup_remote(repo::GitRepo, remote_name::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    err = ccall((:git_remote_lookup, :libgit2), Cint,
+    err = ccall((:git_remote_lookup, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
                 rmt_ptr_ptr, repo.ptr, remote_name)
     if err == Int(Error.GIT_OK)
@@ -95,7 +108,7 @@ end
 function get(::Type{GitRemote}, repo::GitRepo, rmt_name::AbstractString)
     ensure_initialized()
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_remote_lookup, :libgit2), Cint,
+    @check ccall((:git_remote_lookup, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
                 rmt_ptr_ptr, repo.ptr, rmt_name)
     return GitRemote(repo, rmt_ptr_ptr[])
@@ -120,7 +133,7 @@ julia> LibGit2.url(remote)
 """
 function url(rmt::GitRemote)
     ensure_initialized()
-    url_ptr = ccall((:git_remote_url, :libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
+    url_ptr = ccall((:git_remote_url, libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
     url_ptr == C_NULL && return ""
     return unsafe_string(url_ptr)
 end
@@ -144,7 +157,7 @@ julia> LibGit2.push_url(LibGit2.get(LibGit2.GitRemote, repo, "origin"))
 """
 function push_url(rmt::GitRemote)
     ensure_initialized()
-    url_ptr = ccall((:git_remote_pushurl, :libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
+    url_ptr = ccall((:git_remote_pushurl, libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
     url_ptr == C_NULL && return ""
     return unsafe_string(url_ptr)
 end
@@ -170,7 +183,7 @@ julia> name(remote)
 """
 function name(rmt::GitRemote)
     ensure_initialized()
-    name_ptr = ccall((:git_remote_name, :libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
+    name_ptr = ccall((:git_remote_name, libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
     name_ptr == C_NULL && return ""
     return unsafe_string(name_ptr)
 end
@@ -194,7 +207,7 @@ String["+refs/heads/*:refs/remotes/upstream/*"]
 function fetch_refspecs(rmt::GitRemote)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
-    @check ccall((:git_remote_get_fetch_refspecs, :libgit2), Cint,
+    @check ccall((:git_remote_get_fetch_refspecs, libgit2), Cint,
                  (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt.ptr)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
@@ -224,7 +237,7 @@ String["refs/heads/master"]
 function push_refspecs(rmt::GitRemote)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
-    @check ccall((:git_remote_get_push_refspecs, :libgit2), Cint,
+    @check ccall((:git_remote_get_push_refspecs, libgit2), Cint,
                  (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt.ptr)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
@@ -247,7 +260,7 @@ String["+refs/heads/*:refs/remotes/upstream/*"]
 """
 function add_fetch!(repo::GitRepo, rmt::GitRemote, fetch_spec::String)
     ensure_initialized()
-    @check ccall((:git_remote_add_fetch, :libgit2), Cint,
+    @check ccall((:git_remote_add_fetch, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring, Cstring), repo.ptr,
                  name(rmt), fetch_spec)
 end
@@ -276,7 +289,7 @@ String["refs/heads/master"]
 """
 function add_push!(repo::GitRepo, rmt::GitRemote, push_spec::String)
     ensure_initialized()
-    @check ccall((:git_remote_add_push, :libgit2), Cint,
+    @check ccall((:git_remote_add_push, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring, Cstring), repo.ptr,
                  name(rmt), push_spec)
 end
@@ -296,7 +309,7 @@ function fetch(rmt::GitRemote, refspecs::Vector{<:AbstractString};
                msg::AbstractString="")
     ensure_initialized()
     msg = "libgit2.fetch: $msg"
-    @check ccall((:git_remote_fetch, :libgit2), Cint,
+    @check ccall((:git_remote_fetch, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{FetchOptions}, Cstring),
                  rmt.ptr, isempty(refspecs) ? C_NULL : refspecs, Ref(options), msg)
 end
@@ -321,7 +334,7 @@ The keyword arguments are:
 function push(rmt::GitRemote, refspecs::Vector{<:AbstractString};
               force::Bool = false, options::PushOptions = PushOptions())
     ensure_initialized()
-    @check ccall((:git_remote_push, :libgit2), Cint,
+    @check ccall((:git_remote_push, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{PushOptions}),
                  rmt.ptr, isempty(refspecs) ? C_NULL : refspecs, Ref(options))
 end
@@ -333,7 +346,7 @@ Delete the `remote_name` from the git `repo`.
 """
 function remote_delete(repo::GitRepo, remote_name::AbstractString)
     ensure_initialized()
-    @check ccall((:git_remote_delete, :libgit2), Cint,
+    @check ccall((:git_remote_delete, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring),
                  repo.ptr, remote_name)
 end
@@ -352,7 +365,7 @@ function set_remote_fetch_url end
 
 function set_remote_fetch_url(repo::GitRepo, remote_name::AbstractString, url::AbstractString)
     ensure_initialized()
-    @check ccall((:git_remote_set_url, :libgit2), Cint,
+    @check ccall((:git_remote_set_url, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring, Cstring),
                  repo.ptr, remote_name, url)
 end
@@ -375,7 +388,7 @@ function set_remote_push_url end
 
 function set_remote_push_url(repo::GitRepo, remote_name::AbstractString, url::AbstractString)
     ensure_initialized()
-    @check ccall((:git_remote_set_pushurl, :libgit2), Cint,
+    @check ccall((:git_remote_set_pushurl, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring, Cstring),
                  repo.ptr, remote_name, url)
 end
@@ -414,3 +427,65 @@ function set_remote_url(path::AbstractString, remote_name::AbstractString, url::
         set_remote_url(repo, remote_name, url)
     end
 end
+
+function connect(rmt::GitRemote, direction::Consts.GIT_DIRECTION,
+                 callbacks::RemoteCallbacks)
+    @check ccall((:git_remote_connect, libgit2),
+                 Cint, (Ptr{Cvoid}, Cint, Ref{RemoteCallbacks}, Ptr{Cvoid}, Ptr{Cvoid}),
+                 rmt.ptr, direction, callbacks, C_NULL, C_NULL)
+    return rmt
+end
+
+"""
+    connected(rmt::GitRemote)
+
+Check whether the remote is connected
+"""
+function connected(rmt::GitRemote)
+    return ccall((:git_remote_connected, libgit2), Cint, (Ptr{Cvoid},), rmt.ptr) != 0
+end
+
+"""
+    disconnect(rmt::GitRemote)
+
+Close the connection to the remote.
+"""
+function disconnect(rmt::GitRemote)
+    @check ccall((:git_remote_disconnect, libgit2), Cint, (Ptr{Cvoid},), rmt.ptr)
+    return
+end
+
+"""
+    default_branch(rmt::GitRemote)
+
+Retrieve the name of the remote's default branch.
+
+This function must only be called after connecting (See [`connect`](@ref)).
+"""
+function default_branch(rmt::GitRemote)
+    buf_ref = Ref(Buffer())
+    @check ccall((:git_remote_default_branch, libgit2), Cint,
+                 (Ptr{Buffer}, Ptr{Cvoid}), buf_ref, rmt.ptr)
+    buf = buf_ref[]
+    str = unsafe_string(buf.ptr, buf.size)
+    free(buf_ref)
+    return str
+end
+
+"""
+    ls(rmt::GitRemote) -> Vector{GitRemoteHead}
+
+Get the remote repository's reference advertisement list.
+
+This function must only be called after connecting (See [`connect`](@ref)).
+"""
+function ls(rmt::GitRemote)
+    nheads = Ref{Csize_t}()
+    head_refs = Ref{Ptr{Ptr{_GitRemoteHead}}}()
+    @check ccall((:git_remote_ls, libgit2), Cint,
+                 (Ptr{Ptr{Ptr{_GitRemoteHead}}}, Ptr{Csize_t}, Ptr{Cvoid}),
+                 head_refs, nheads, rmt.ptr)
+    head_ptr = head_refs[]
+    return [GitRemoteHead(unsafe_load(unsafe_load(head_ptr, i)))
+            for i in 1:nheads[]]
+end
diff --git a/stdlib/LibGit2/src/repository.jl b/stdlib/LibGit2/src/repository.jl
index 994d0a9f32875..8297ae92a6a00 100644
--- a/stdlib/LibGit2/src/repository.jl
+++ b/stdlib/LibGit2/src/repository.jl
@@ -8,7 +8,7 @@ Open a git repository at `path`.
 function GitRepo(path::AbstractString)
     ensure_initialized()
     repo_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_open, :libgit2), Cint,
+    @check ccall((:git_repository_open, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Cstring), repo_ptr_ptr, path)
     return GitRepo(repo_ptr_ptr[])
 end
@@ -23,7 +23,7 @@ function GitRepoExt(path::AbstractString, flags::Cuint = Cuint(Consts.REPOSITORY
     ensure_initialized()
     separator = @static Sys.iswindows() ? ";" : ":"
     repo_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_open_ext, :libgit2), Cint,
+    @check ccall((:git_repository_open_ext, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Cstring, Cuint, Cstring),
                  repo_ptr_ptr, path, flags, separator)
     return GitRepo(repo_ptr_ptr[])
@@ -32,7 +32,7 @@ end
 function cleanup(r::GitRepo)
     if r.ptr != C_NULL
         ensure_initialized()
-        @check ccall((:git_repository__cleanup, :libgit2), Cint, (Ptr{Cvoid},), r.ptr)
+        @check ccall((:git_repository__cleanup, libgit2), Cint, (Ptr{Cvoid},), r.ptr)
     end
 end
 
@@ -46,7 +46,7 @@ is `true`, no working directory will be created.
 function init(path::AbstractString, bare::Bool=false)
     ensure_initialized()
     repo_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_repository_init, :libgit2), Cint,
+    @check ccall((:git_repository_init, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Cstring, Cuint), repo_ptr_ptr, path, bare)
     return GitRepo(repo_ptr_ptr[])
 end
@@ -97,7 +97,7 @@ tree, and no tracking information for remote branches or configurations is prese
 function isbare(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    return ccall((:git_repository_is_bare, :libgit2), Cint, (Ptr{Cvoid},), repo.ptr) == 1
+    return ccall((:git_repository_is_bare, libgit2), Cint, (Ptr{Cvoid},), repo.ptr) == 1
 end
 
 """
@@ -109,7 +109,7 @@ Determine if `repo` is detached - that is, whether its HEAD points to a commit
 function isattached(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    ccall((:git_repository_head_detached, :libgit2), Cint, (Ptr{Cvoid},), repo.ptr) != 1
+    ccall((:git_repository_head_detached, libgit2), Cint, (Ptr{Cvoid},), repo.ptr) != 1
 end
 
 @doc """
@@ -139,7 +139,7 @@ function (::Type{T})(repo::GitRepo, spec::AbstractString) where T<:GitObject
     ensure_initialized()
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @assert repo.ptr != C_NULL
-    @check ccall((:git_revparse_single, :libgit2), Cint,
+    @check ccall((:git_revparse_single, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), obj_ptr_ptr, repo.ptr, spec)
     # check object is of correct type
     if T != GitObject && T != GitUnknownObject
@@ -155,7 +155,7 @@ function (::Type{T})(repo::GitRepo, oid::GitHash) where T<:GitObject
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
 
     @assert repo.ptr != C_NULL
-    @check ccall((:git_object_lookup, :libgit2), Cint,
+    @check ccall((:git_object_lookup, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Consts.OBJECT),
                  obj_ptr_ptr, repo.ptr, oid_ptr, Consts.OBJECT(T))
 
@@ -167,7 +167,7 @@ function (::Type{T})(repo::GitRepo, oid::GitShortHash) where T<:GitObject
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
 
     @assert repo.ptr != C_NULL
-    @check ccall((:git_object_lookup_prefix, :libgit2), Cint,
+    @check ccall((:git_object_lookup_prefix, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Csize_t, Consts.OBJECT),
                  obj_ptr_ptr, repo.ptr, oid_ptr, oid.len, Consts.OBJECT(T))
 
@@ -190,7 +190,7 @@ See also [`workdir`](@ref), [`path`](@ref).
 function gitdir(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    return unsafe_string(ccall((:git_repository_path, :libgit2), Cstring,
+    return unsafe_string(ccall((:git_repository_path, libgit2), Cstring,
                         (Ptr{Cvoid},), repo.ptr))
 end
 
@@ -211,7 +211,7 @@ See also [`gitdir`](@ref), [`path`](@ref).
 function workdir(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    sptr = ccall((:git_repository_workdir, :libgit2), Cstring,
+    sptr = ccall((:git_repository_workdir, libgit2), Cstring,
                 (Ptr{Cvoid},), repo.ptr)
     sptr == C_NULL && throw(GitError(Error.Object, Error.ERROR, "No working directory found."))
     return unsafe_string(sptr)
@@ -255,7 +255,7 @@ function peel(::Type{T}, obj::GitObject) where T<:GitObject
     ensure_initialized()
     new_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
 
-    @check ccall((:git_object_peel, :libgit2), Cint,
+    @check ccall((:git_object_peel, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), new_ptr_ptr, obj.ptr, Consts.OBJECT(T))
 
     return T(obj.owner, new_ptr_ptr[])
@@ -285,7 +285,7 @@ function GitDescribeResult(committish::GitObject;
                            options::DescribeOptions=DescribeOptions())
     ensure_initialized()
     result_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_describe_commit, :libgit2), Cint,
+    @check ccall((:git_describe_commit, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{DescribeOptions}),
                  result_ptr_ptr, committish.ptr, Ref(options))
     return GitDescribeResult(committish.owner, result_ptr_ptr[])
@@ -312,7 +312,7 @@ function GitDescribeResult(repo::GitRepo; options::DescribeOptions=DescribeOptio
     ensure_initialized()
     result_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @assert repo.ptr != C_NULL
-    @check ccall((:git_describe_workdir, :libgit2), Cint,
+    @check ccall((:git_describe_workdir, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{DescribeOptions}),
                  result_ptr_ptr, repo.ptr, Ref(options))
     return GitDescribeResult(repo, result_ptr_ptr[])
@@ -329,7 +329,7 @@ Formatting options are controlled by the keyword argument:
 function format(result::GitDescribeResult; options::DescribeFormatOptions=DescribeFormatOptions())
     ensure_initialized()
     buf_ref = Ref(Buffer())
-    @check ccall((:git_describe_format, :libgit2), Cint,
+    @check ccall((:git_describe_format, libgit2), Cint,
                  (Ptr{Buffer}, Ptr{Cvoid}, Ptr{DescribeFormatOptions}),
                  buf_ref, result.ptr, Ref(options))
     buf = buf_ref[]
@@ -355,7 +355,7 @@ function checkout_tree(repo::GitRepo, obj::GitObject;
                        options::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_checkout_tree, :libgit2), Cint,
+    @check ccall((:git_checkout_tree, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CheckoutOptions}),
                  repo.ptr, obj.ptr, Ref(options))
 end
@@ -371,7 +371,7 @@ function checkout_index(repo::GitRepo, idx::Union{GitIndex, Nothing} = nothing;
                         options::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_checkout_index, :libgit2), Cint,
+    @check ccall((:git_checkout_index, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CheckoutOptions}),
                  repo.ptr,
                  idx === nothing ? C_NULL : idx.ptr,
@@ -391,7 +391,7 @@ Update the index and working tree of `repo` to match the commit pointed to by HE
 function checkout_head(repo::GitRepo; options::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_checkout_head, :libgit2), Cint,
+    @check ccall((:git_checkout_head, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{CheckoutOptions}),
                  repo.ptr, Ref(options))
 end
@@ -410,7 +410,7 @@ The keyword argument `options` sets checkout and merge options for the cherrypic
 function cherrypick(repo::GitRepo, commit::GitCommit; options::CherrypickOptions = CherrypickOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_cherrypick, :libgit2), Cint,
+    @check ccall((:git_cherrypick, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CherrypickOptions}),
                  repo.ptr, commit.ptr, Ref(options))
 end
@@ -419,7 +419,7 @@ end
 function reset!(repo::GitRepo, obj::Union{GitObject, Nothing}, pathspecs::AbstractString...)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_reset_default, :libgit2), Cint,
+    @check ccall((:git_reset_default, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{StrArrayStruct}),
                  repo.ptr,
                  obj === nothing ? C_NULL : obj.ptr,
@@ -432,7 +432,7 @@ function reset!(repo::GitRepo, obj::GitObject, mode::Cint;
                checkout_opts::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_reset, :libgit2), Cint,
+    @check ccall((:git_reset, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Cint, Ptr{CheckoutOptions}),
                   repo.ptr, obj.ptr, mode, Ref(checkout_opts))
     return head_oid(repo)
@@ -456,7 +456,7 @@ function clone(repo_url::AbstractString, repo_path::AbstractString,
     ensure_initialized()
     clone_opts_ref = Ref(clone_opts)
     repo_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_clone, :libgit2), Cint,
+    @check ccall((:git_clone, libgit2), Cint,
             (Ptr{Ptr{Cvoid}}, Cstring, Cstring, Ref{CloneOptions}),
             repo_ptr_ptr, repo_url, repo_path, clone_opts_ref)
     return GitRepo(repo_ptr_ptr[])
@@ -490,7 +490,7 @@ function fetchheads(repo::GitRepo)
     fh = FetchHead[]
     ffcb = fetchhead_foreach_cb()
     @assert repo.ptr != C_NULL
-    @check ccall((:git_repository_fetchhead_foreach, :libgit2), Cint,
+    @check ccall((:git_repository_fetchhead_foreach, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Any),
                  repo.ptr, ffcb, fh)
     return fh
@@ -505,7 +505,7 @@ function remotes(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
     @assert repo.ptr != C_NULL
-    @check ccall((:git_remote_list, :libgit2), Cint,
+    @check ccall((:git_remote_list, libgit2), Cint,
                   (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
diff --git a/stdlib/LibGit2/src/signature.jl b/stdlib/LibGit2/src/signature.jl
index 9c13bc2256ef5..85e62cd8c2b7e 100644
--- a/stdlib/LibGit2/src/signature.jl
+++ b/stdlib/LibGit2/src/signature.jl
@@ -13,7 +13,7 @@ Signature(sig::GitSignature) = Signature(sig.ptr)
 function Signature(name::AbstractString, email::AbstractString)
     ensure_initialized()
     sig_ptr_ptr = Ref{Ptr{SignatureStruct}}(C_NULL)
-    @check ccall((:git_signature_now, :libgit2), Cint,
+    @check ccall((:git_signature_now, libgit2), Cint,
                  (Ptr{Ptr{SignatureStruct}}, Cstring, Cstring), sig_ptr_ptr, name, email)
     sig = GitSignature(sig_ptr_ptr[])
     s = Signature(sig.ptr)
@@ -31,7 +31,7 @@ end
 function Base.convert(::Type{GitSignature}, sig::Signature)
     ensure_initialized()
     sig_ptr_ptr = Ref{Ptr{SignatureStruct}}(C_NULL)
-    @check ccall((:git_signature_new, :libgit2), Cint,
+    @check ccall((:git_signature_new, libgit2), Cint,
                  (Ptr{Ptr{SignatureStruct}}, Cstring, Cstring, Int64, Cint),
                  sig_ptr_ptr, sig.name, sig.email, sig.time, sig.time_offset)
     return GitSignature(sig_ptr_ptr[])
@@ -66,7 +66,7 @@ end
 function default_signature(repo::GitRepo)
     ensure_initialized()
     sig_ptr_ptr = Ref{Ptr{SignatureStruct}}(C_NULL)
-    @check ccall((:git_signature_default, :libgit2), Cint,
+    @check ccall((:git_signature_default, libgit2), Cint,
                  (Ptr{Ptr{SignatureStruct}}, Ptr{Cvoid}), sig_ptr_ptr, repo.ptr)
     return GitSignature(sig_ptr_ptr[])
 end
diff --git a/stdlib/LibGit2/src/status.jl b/stdlib/LibGit2/src/status.jl
index cd871681e4ae9..c1cb2fb1c5a9c 100644
--- a/stdlib/LibGit2/src/status.jl
+++ b/stdlib/LibGit2/src/status.jl
@@ -12,7 +12,7 @@ submodules or not. See [`StatusOptions`](@ref) for more information.
 function GitStatus(repo::GitRepo; status_opts=StatusOptions())
     ensure_initialized()
     stat_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_status_list_new, :libgit2), Cint,
+    @check ccall((:git_status_list_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{StatusOptions}),
                   stat_ptr_ptr, repo.ptr, Ref(status_opts))
     return GitStatus(repo, stat_ptr_ptr[])
@@ -20,7 +20,7 @@ end
 
 function Base.length(status::GitStatus)
     ensure_initialized()
-    return Int(ccall((:git_status_list_entrycount, :libgit2), Csize_t,
+    return Int(ccall((:git_status_list_entrycount, libgit2), Csize_t,
                       (Ptr{Ptr{Cvoid}},), status.ptr))
 end
 
@@ -28,7 +28,7 @@ function Base.getindex(status::GitStatus, i::Integer)
     1 <= i <= length(status) || throw(BoundsError())
     ensure_initialized()
     GC.@preserve status begin
-        entry_ptr = ccall((:git_status_byindex, :libgit2),
+        entry_ptr = ccall((:git_status_byindex, libgit2),
                           Ptr{StatusEntry},
                           (Ptr{Cvoid}, Csize_t),
                           status.ptr, i-1)
@@ -49,7 +49,7 @@ and needs to be staged and committed.
 function status(repo::GitRepo, path::String)
     ensure_initialized()
     status_ptr = Ref{Cuint}(0)
-    ret =  ccall((:git_status_file, :libgit2), Cint,
+    ret =  ccall((:git_status_file, libgit2), Cint,
                   (Ref{Cuint}, Ptr{Cvoid}, Cstring),
                   status_ptr, repo.ptr, path)
     (ret == Cint(Error.ENOTFOUND) || ret == Cint(Error.EAMBIGUOUS)) && return nothing
diff --git a/stdlib/LibGit2/src/tag.jl b/stdlib/LibGit2/src/tag.jl
index 4209a4e2f917d..0e3d2b398a835 100644
--- a/stdlib/LibGit2/src/tag.jl
+++ b/stdlib/LibGit2/src/tag.jl
@@ -8,7 +8,7 @@ Get a list of all tags in the git repository `repo`.
 function tag_list(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
-    @check ccall((:git_tag_list, :libgit2), Cint,
+    @check ccall((:git_tag_list, libgit2), Cint,
                  (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
@@ -22,7 +22,7 @@ Remove the git tag `tag` from the repository `repo`.
 """
 function tag_delete(repo::GitRepo, tag::AbstractString)
     ensure_initialized()
-    @check ccall((:git_tag_delete, :libgit2), Cint,
+    @check ccall((:git_tag_delete, libgit2), Cint,
                   (Ptr{Cvoid}, Cstring), repo.ptr, tag)
 end
 
@@ -46,7 +46,7 @@ function tag_create(repo::GitRepo, tag::AbstractString, commit::Union{AbstractSt
         commit_obj === nothing && return oid_ptr[] # return empty oid
         with(convert(GitSignature, sig)) do git_sig
             ensure_initialized()
-            @check ccall((:git_tag_create, :libgit2), Cint,
+            @check ccall((:git_tag_create, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}, Ptr{SignatureStruct}, Cstring, Cint),
                   oid_ptr, repo.ptr, tag, commit_obj.ptr, git_sig.ptr, msg, Cint(force))
         end
@@ -62,7 +62,7 @@ The name of `tag` (e.g. `"v0.5"`).
 function name(tag::GitTag)
     ensure_initialized()
     GC.@preserve tag begin
-        str_ptr = ccall((:git_tag_name, :libgit2), Cstring, (Ptr{Cvoid},), tag.ptr)
+        str_ptr = ccall((:git_tag_name, libgit2), Cstring, (Ptr{Cvoid},), tag.ptr)
         str_ptr == C_NULL && throw(Error.GitError(Error.ERROR))
         str = unsafe_string(str_ptr)
     end
@@ -78,7 +78,7 @@ The `GitHash` of the target object of `tag`.
 function target(tag::GitTag)
     ensure_initialized()
     GC.@preserve tag begin
-        oid_ptr = ccall((:git_tag_target_id, :libgit2), Ptr{GitHash}, (Ptr{Cvoid},), tag.ptr)
+        oid_ptr = ccall((:git_tag_target_id, libgit2), Ptr{GitHash}, (Ptr{Cvoid},), tag.ptr)
         oid_ptr == C_NULL && throw(Error.GitError(Error.ERROR))
         str = unsafe_load(oid_ptr)
     end
diff --git a/stdlib/LibGit2/src/tree.jl b/stdlib/LibGit2/src/tree.jl
index 1ef8a2eb75003..1aeeec96ea778 100644
--- a/stdlib/LibGit2/src/tree.jl
+++ b/stdlib/LibGit2/src/tree.jl
@@ -2,7 +2,7 @@
 
 function GitTree(c::GitCommit)
     tree_out = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_commit_tree, :libgit2), Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), tree_out, c)
+    @check ccall((:git_commit_tree, libgit2), Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), tree_out, c)
     GitTree(repository(c), tree_out[])
 end
 
@@ -35,7 +35,7 @@ function treewalk(f, tree::GitTree, post::Bool = false)
             entry = GitTreeEntry(tree, entry_ptr, false)
             return f(root, entry)
         end, Cint, (Cstring, Ptr{Cvoid}, Ref{Vector{Any}}))
-    err = ccall((:git_tree_walk, :libgit2), Cint,
+    err = ccall((:git_tree_walk, libgit2), Cint,
                 (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Any),
                 tree.ptr, post, cbf, payload)
     if err < 0
@@ -58,7 +58,7 @@ Return the filename of the object on disk to which `te` refers.
 """
 function filename(te::GitTreeEntry)
     ensure_initialized()
-    str = ccall((:git_tree_entry_name, :libgit2), Cstring, (Ptr{Cvoid},), te.ptr)
+    str = ccall((:git_tree_entry_name, libgit2), Cstring, (Ptr{Cvoid},), te.ptr)
     str != C_NULL && return unsafe_string(str)
     return nothing
 end
@@ -70,7 +70,7 @@ Return the UNIX filemode of the object on disk to which `te` refers as an intege
 """
 function filemode(te::GitTreeEntry)
     ensure_initialized()
-    return ccall((:git_tree_entry_filemode, :libgit2), Cint, (Ptr{Cvoid},), te.ptr)
+    return ccall((:git_tree_entry_filemode, libgit2), Cint, (Ptr{Cvoid},), te.ptr)
 end
 
 """
@@ -81,7 +81,7 @@ one of the types which [`objtype`](@ref) returns, e.g. a `GitTree` or `GitBlob`.
 """
 function entrytype(te::GitTreeEntry)
     ensure_initialized()
-    otype = ccall((:git_tree_entry_type, :libgit2), Cint, (Ptr{Cvoid},), te.ptr)
+    otype = ccall((:git_tree_entry_type, libgit2), Cint, (Ptr{Cvoid},), te.ptr)
     return objtype(Consts.OBJECT(otype))
 end
 
@@ -93,7 +93,7 @@ Return the [`GitHash`](@ref) of the object to which `te` refers.
 function entryid(te::GitTreeEntry)
     ensure_initialized()
     GC.@preserve te begin
-        oid_ptr = ccall((:git_tree_entry_id, :libgit2), Ptr{UInt8}, (Ptr{Cvoid},), te.ptr)
+        oid_ptr = ccall((:git_tree_entry_id, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), te.ptr)
         oid = GitHash(oid_ptr)
     end
     return oid
@@ -101,7 +101,7 @@ end
 
 function count(tree::GitTree)
     ensure_initialized()
-    return ccall((:git_tree_entrycount, :libgit2), Csize_t, (Ptr{Cvoid},), tree.ptr)
+    return ccall((:git_tree_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), tree.ptr)
 end
 
 function Base.getindex(tree::GitTree, i::Integer)
@@ -109,7 +109,7 @@ function Base.getindex(tree::GitTree, i::Integer)
         throw(BoundsError(tree, i))
     end
     ensure_initialized()
-    te_ptr = ccall((:git_tree_entry_byindex, :libgit2),
+    te_ptr = ccall((:git_tree_entry_byindex, libgit2),
                    Ptr{Cvoid},
                    (Ptr{Cvoid}, Csize_t), tree.ptr, i-1)
     return GitTreeEntry(tree, te_ptr, false)
@@ -133,7 +133,7 @@ function (::Type{T})(te::GitTreeEntry) where T<:GitObject
     ensure_initialized()
     repo = repository(te)
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_tree_entry_to_object, :libgit2), Cint,
+    @check ccall((:git_tree_entry_to_object, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}),
                    obj_ptr_ptr, repo, te)
     return T(repo, obj_ptr_ptr[])
@@ -162,7 +162,7 @@ function _getindex(tree::GitTree, target::AbstractString)
     end
 
     entry = Ref{Ptr{Cvoid}}(C_NULL)
-    err = ccall((:git_tree_entry_bypath, :libgit2), Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), entry, tree, target)
+    err = ccall((:git_tree_entry_bypath, libgit2), Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), entry, tree, target)
     err == Int(Error.ENOTFOUND) && return nothing
     err < 0 && throw(Error.GitError(err))
     entry = GitTreeEntry(tree, entry[], true #= N.B.: Most other lookups need false here =#)
diff --git a/stdlib/LibGit2/src/types.jl b/stdlib/LibGit2/src/types.jl
index d5ed9014aea86..96cea96d013e5 100644
--- a/stdlib/LibGit2/src/types.jl
+++ b/stdlib/LibGit2/src/types.jl
@@ -2,7 +2,7 @@
 
 using Base: something
 import Base.@kwdef
-import .Consts: GIT_SUBMODULE_IGNORE, GIT_MERGE_FILE_FAVOR, GIT_MERGE_FILE, GIT_CONFIG
+import .Consts: GIT_SUBMODULE_IGNORE, GIT_MERGE_FILE_FAVOR, GIT_MERGE_FILE, GIT_CONFIG, GIT_OID_TYPE
 
 const OID_RAWSZ = 20
 const OID_HEXSZ = OID_RAWSZ * 2
@@ -99,7 +99,7 @@ StrArrayStruct() = StrArrayStruct(C_NULL, 0)
 
 function free(sa_ref::Base.Ref{StrArrayStruct})
     ensure_initialized()
-    ccall((:git_strarray_free, :libgit2), Cvoid, (Ptr{StrArrayStruct},), sa_ref)
+    ccall((:git_strarray_free, libgit2), Cvoid, (Ptr{StrArrayStruct},), sa_ref)
 end
 
 """
@@ -126,7 +126,7 @@ Buffer() = Buffer(C_NULL, 0, 0)
 
 function free(buf_ref::Base.Ref{Buffer})
     ensure_initialized()
-    ccall((:git_buf_free, :libgit2), Cvoid, (Ptr{Buffer},), buf_ref)
+    ccall((:git_buf_free, libgit2), Cvoid, (Ptr{Buffer},), buf_ref)
 end
 
 """
@@ -346,6 +346,9 @@ The fields represent:
     @static if LibGit2.VERSION >= v"0.25.0"
         proxy_opts::ProxyOptions       = ProxyOptions()
     end
+    @static if LibGit2.VERSION >= v"1.7.0"
+        depth::Cuint                   = Cuint(Consts.FETCH_DEPTH_FULL)
+    end
     @static if LibGit2.VERSION >= v"1.4.0"
         follow_redirects::Cuint        = Cuint(0)
     end
@@ -439,6 +442,9 @@ The fields represent:
     # options controlling how the diff text is generated
     context_lines::UInt32                    = UInt32(3)
     interhunk_lines::UInt32                  = UInt32(0)
+    @static if LibGit2.VERSION >= v"1.7.0"
+        oid_type::GIT_OID_TYPE               = Consts.OID_DEFAULT
+    end
     id_abbrev::UInt16                        = UInt16(7)
     max_size::Int64                          = Int64(512*1024*1024) #512Mb
     old_prefix::Cstring                      = Cstring(C_NULL)
@@ -904,12 +910,13 @@ end
 
 Matches the [`git_config_entry`](https://libgit2.org/libgit2/#HEAD/type/git_config_entry) struct.
 """
-@kwdef struct ConfigEntry
-    name::Cstring       = Cstring(C_NULL)
-    value::Cstring      = Cstring(C_NULL)
-    level::GIT_CONFIG   = Consts.CONFIG_LEVEL_DEFAULT
-    free::Ptr{Cvoid}    = C_NULL
-    payload::Any        = nothing
+struct ConfigEntry
+    name::Cstring
+    value::Cstring
+    include_depth::Cuint
+    level::GIT_CONFIG
+    free::Ptr{Cvoid}
+    payload::Ptr{Cvoid} # User is not permitted to read or write this field
 end
 @assert Base.allocatedinline(ConfigEntry)
 
@@ -1003,7 +1010,7 @@ for (typ, owntyp, sup, cname) in Tuple{Symbol,Any,Symbol,Symbol}[
     (:GitRepo,           nothing,                 :AbstractGitObject, :git_repository),
     (:GitConfig,         :(Union{GitRepo, Nothing}), :AbstractGitObject, :git_config),
     (:GitIndex,          :(Union{GitRepo, Nothing}), :AbstractGitObject, :git_index),
-    (:GitRemote,         :GitRepo,                :AbstractGitObject, :git_remote),
+    (:GitRemote,         :(Union{GitRepo, Nothing}), :AbstractGitObject, :git_remote),
     (:GitRevWalker,      :GitRepo,                :AbstractGitObject, :git_revwalk),
     (:GitReference,      :GitRepo,                :AbstractGitObject, :git_reference),
     (:GitDescribeResult, :GitRepo,                :AbstractGitObject, :git_describe_result),
@@ -1063,11 +1070,11 @@ for (typ, owntyp, sup, cname) in Tuple{Symbol,Any,Symbol,Symbol}[
     @eval function Base.close(obj::$typ)
         if obj.ptr != C_NULL
             ensure_initialized()
-            ccall(($(string(cname, :_free)), :libgit2), Cvoid, (Ptr{Cvoid},), obj.ptr)
+            ccall(($(string(cname, :_free)), libgit2), Cvoid, (Ptr{Cvoid},), obj.ptr)
             obj.ptr = C_NULL
             if Threads.atomic_sub!(REFCOUNT, 1) == 1
                 # will the last finalizer please turn out the lights?
-                ccall((:git_libgit2_shutdown, :libgit2), Cint, ())
+                ccall((:git_libgit2_shutdown, libgit2), Cint, ())
             end
         end
     end
@@ -1097,7 +1104,7 @@ end
 function Base.close(obj::GitSignature)
     if obj.ptr != C_NULL
         ensure_initialized()
-        ccall((:git_signature_free, :libgit2), Cvoid, (Ptr{SignatureStruct},), obj.ptr)
+        ccall((:git_signature_free, libgit2), Cvoid, (Ptr{SignatureStruct},), obj.ptr)
         obj.ptr = C_NULL
     end
 end
@@ -1196,7 +1203,7 @@ Consts.OBJECT(::Type{GitObject})        = Consts.OBJ_ANY
 
 function Consts.OBJECT(ptr::Ptr{Cvoid})
     ensure_initialized()
-    ccall((:git_object_type, :libgit2), Consts.OBJECT, (Ptr{Cvoid},), ptr)
+    ccall((:git_object_type, libgit2), Consts.OBJECT, (Ptr{Cvoid},), ptr)
 end
 
 """
@@ -1389,7 +1396,8 @@ CredentialPayload(p::CredentialPayload) = p
 function Base.shred!(p::CredentialPayload)
     # Note: Avoid shredding the `explicit` or `cache` fields as these are just references
     # and it is not our responsibility to shred them.
-    p.credential !== nothing && Base.shred!(p.credential)
+    credential = p.credential
+    credential !== nothing && Base.shred!(credential)
     p.credential = nothing
 end
 
@@ -1430,8 +1438,9 @@ function approve(p::CredentialPayload; shred::Bool=true)
 
     # Each `approve` call needs to avoid shredding the passed in credential as we need
     # the credential information intact for subsequent approve calls.
-    if p.cache !== nothing
-        approve(p.cache, cred, p.url)
+    cache = p.cache
+    if cache !== nothing
+        approve(cache, cred, p.url)
         shred = false  # Avoid wiping `cred` as this would also wipe the cached copy
     end
     if p.allow_git_helpers
@@ -1460,8 +1469,9 @@ function reject(p::CredentialPayload; shred::Bool=true)
 
     # Note: each `reject` call needs to avoid shredding the passed in credential as we need
     # the credential information intact for subsequent reject calls.
-    if p.cache !== nothing
-        reject(p.cache, cred, p.url)
+    cache = p.cache
+    if cache !== nothing
+        reject(cache, cred, p.url)
     end
     if p.allow_git_helpers
         reject(p.config, cred, p.url)
@@ -1476,3 +1486,26 @@ end
 
 # Useful for functions which can handle various kinds of credentials
 const Creds = Union{CredentialPayload, AbstractCredential, CachedCredentials, Nothing}
+
+struct _GitRemoteHead
+    available_local::Cint
+    oid::GitHash
+    loid::GitHash
+    name::Cstring
+    symref_target::Cstring
+end
+
+struct GitRemoteHead
+    available_local::Bool
+    oid::GitHash
+    loid::GitHash
+    name::String
+    symref_target::Union{Nothing,String}
+    function GitRemoteHead(head::_GitRemoteHead)
+        name = unsafe_string(head.name)
+        symref_target = (head.symref_target != C_NULL ?
+            unsafe_string(head.symref_target) : nothing)
+        return new(head.available_local != 0,
+                   head.oid, head.loid, name, symref_target)
+    end
+end
diff --git a/stdlib/LibGit2/src/utils.jl b/stdlib/LibGit2/src/utils.jl
index b601ea4efe601..f62663a6ea4ca 100644
--- a/stdlib/LibGit2/src/utils.jl
+++ b/stdlib/LibGit2/src/utils.jl
@@ -37,7 +37,7 @@ function version()
     major = Ref{Cint}(0)
     minor = Ref{Cint}(0)
     patch = Ref{Cint}(0)
-    @check ccall((:git_libgit2_version, :libgit2), Cint,
+    @check ccall((:git_libgit2_version, libgit2), Cint,
                  (Ref{Cint}, Ref{Cint}, Ref{Cint}), major, minor, patch)
     return VersionNumber(major[], minor[], patch[])
 end
@@ -72,7 +72,7 @@ Return a list of git features the current version of libgit2 supports, such as
 threading or using HTTPS or SSH.
 """
 function features()
-    feat = ccall((:git_libgit2_features, :libgit2), Cint, ())
+    feat = ccall((:git_libgit2_features, libgit2), Cint, ())
     res = Consts.GIT_FEATURE[]
     for f in instances(Consts.GIT_FEATURE)
         isset(feat, Cuint(f)) && Base.push!(res, f)
@@ -171,7 +171,7 @@ end
 
 function credential_identifier(url::AbstractString)
     m = match(URL_REGEX, url)
-    scheme = something(m[:scheme], "")
-    host = m[:host]
+    scheme = something(m[:scheme], SubString(""))
+    host = something(m[:host])
     credential_identifier(scheme, host)
 end
diff --git a/stdlib/LibGit2/src/walker.jl b/stdlib/LibGit2/src/walker.jl
index 468e6899a7aa8..e43687b014226 100644
--- a/stdlib/LibGit2/src/walker.jl
+++ b/stdlib/LibGit2/src/walker.jl
@@ -21,7 +21,7 @@ Since the `GitHash` is unique to a commit, `cnt` will be `1`.
 function GitRevWalker(repo::GitRepo)
     ensure_initialized()
     w_ptr = Ref{Ptr{Cvoid}}(C_NULL)
-    @check ccall((:git_revwalk_new, :libgit2), Cint,
+    @check ccall((:git_revwalk_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), w_ptr, repo.ptr)
     return GitRevWalker(repo, w_ptr[])
 end
@@ -29,7 +29,7 @@ end
 function Base.iterate(w::GitRevWalker, state=nothing)
     ensure_initialized()
     id_ptr = Ref(GitHash())
-    err = ccall((:git_revwalk_next, :libgit2), Cint,
+    err = ccall((:git_revwalk_next, libgit2), Cint,
                 (Ptr{GitHash}, Ptr{Cvoid}), id_ptr, w.ptr)
     if err == Cint(Error.GIT_OK)
         return (id_ptr[], nothing)
@@ -51,7 +51,7 @@ during the walk.
 """
 function push_head!(w::GitRevWalker)
     ensure_initialized()
-    @check ccall((:git_revwalk_push_head, :libgit2), Cint, (Ptr{Cvoid},), w.ptr)
+    @check ccall((:git_revwalk_push_head, libgit2), Cint, (Ptr{Cvoid},), w.ptr)
     return w
 end
 
@@ -64,20 +64,20 @@ of that year as `cid` and then passing the resulting `w` to [`LibGit2.map`](@ref
 """
 function push!(w::GitRevWalker, cid::GitHash)
     ensure_initialized()
-    @check ccall((:git_revwalk_push, :libgit2), Cint, (Ptr{Cvoid}, Ptr{GitHash}), w.ptr, Ref(cid))
+    @check ccall((:git_revwalk_push, libgit2), Cint, (Ptr{Cvoid}, Ptr{GitHash}), w.ptr, Ref(cid))
     return w
 end
 
 function push!(w::GitRevWalker, range::AbstractString)
     ensure_initialized()
-    @check ccall((:git_revwalk_push_range, :libgit2), Cint, (Ptr{Cvoid}, Ptr{UInt8}), w.ptr, range)
+    @check ccall((:git_revwalk_push_range, libgit2), Cint, (Ptr{Cvoid}, Ptr{UInt8}), w.ptr, range)
     return w
 end
 
 function Base.sort!(w::GitRevWalker; by::Cint = Consts.SORT_NONE, rev::Bool=false)
     ensure_initialized()
     rev && (by |= Consts.SORT_REVERSE)
-    @check ccall((:git_revwalk_sorting, :libgit2), Cint, (Ptr{Cvoid}, Cint), w.ptr, by)
+    @check ccall((:git_revwalk_sorting, libgit2), Cint, (Ptr{Cvoid}, Cint), w.ptr, by)
     return w
 end
 
diff --git a/stdlib/LibGit2/test/libgit2-tests.jl b/stdlib/LibGit2/test/libgit2-tests.jl
new file mode 100644
index 0000000000000..c5abca15ca719
--- /dev/null
+++ b/stdlib/LibGit2/test/libgit2-tests.jl
@@ -0,0 +1,3245 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module LibGit2Tests
+
+import LibGit2
+using LibGit2_jll
+using Test
+using Random, Serialization, Sockets
+
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :FakePTYs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FakePTYs.jl"))
+import .Main.FakePTYs: with_fake_pty
+
+const timeout = 60
+
+function challenge_prompt(code::Expr, challenges)
+    input_code = tempname()
+    open(input_code, "w") do fp
+        serialize(fp, code)
+    end
+    output_file = tempname()
+    torun = """
+        import LibGit2
+        using Serialization
+        result = open($(repr(input_code))) do fp
+            eval(deserialize(fp))
+        end
+        open($(repr(output_file)), "w") do fp
+            serialize(fp, result)
+        end"""
+    cmd = `$(Base.julia_cmd()) --startup-file=no -e $torun`
+    try
+        challenge_prompt(cmd, challenges)
+        return open(output_file, "r") do fp
+            deserialize(fp)
+        end
+    finally
+        isfile(output_file) && rm(output_file)
+        isfile(input_code) && rm(input_code)
+    end
+    return nothing
+end
+
+function challenge_prompt(cmd::Cmd, challenges)
+    function format_output(output)
+        str = read(seekstart(output), String)
+        isempty(str) && return ""
+        return "Process output found:\n\"\"\"\n$str\n\"\"\""
+    end
+    out = IOBuffer()
+    with_fake_pty() do pts, ptm
+        p = run(detach(cmd), pts, pts, pts, wait=false) # getpass uses stderr by default
+        Base.close_stdio(pts)
+
+        # Kill the process if it takes too long. Typically occurs when process is waiting
+        # for input.
+        timer = Channel{Symbol}(1)
+        watcher = @async begin
+            waited = 0
+            while waited < timeout && process_running(p)
+                sleep(1)
+                waited += 1
+            end
+
+            if process_running(p)
+                kill(p)
+                put!(timer, :timeout)
+            elseif success(p)
+                put!(timer, :success)
+            else
+                put!(timer, :failure)
+            end
+
+            # SIGKILL stubborn processes
+            if process_running(p)
+                sleep(3)
+                process_running(p) && kill(p, Base.SIGKILL)
+            end
+            wait(p)
+        end
+
+        wroteall = false
+        try
+            for (challenge, response) in challenges
+                write(out, readuntil(ptm, challenge, keep=true))
+                if !isopen(ptm)
+                    error("Could not locate challenge: \"$challenge\". ",
+                          format_output(out))
+                end
+                write(ptm, response)
+            end
+            wroteall = true
+
+            # Capture output from process until `pts` is closed
+            write(out, ptm)
+        catch ex
+            if !(wroteall && ex isa Base.IOError && ex.code == Base.UV_EIO)
+                # ignore EIO from `ptm` after `pts` dies
+                error("Process failed possibly waiting for a response. ",
+                      format_output(out))
+            end
+        end
+
+        status = fetch(timer)
+        close(ptm)
+        if status !== :success
+            if status === :timeout
+                error("Process timed out possibly waiting for a response. ",
+                      format_output(out))
+            else
+                error("Failed process. ", format_output(out), "\n", p)
+            end
+        end
+        wait(watcher)
+    end
+    nothing
+end
+
+const LIBGIT2_MIN_VER = v"1.0.0"
+const LIBGIT2_HELPER_PATH = joinpath(@__DIR__, "libgit2-helpers.jl")
+
+const KEY_DIR = joinpath(@__DIR__, "keys")
+const HOME = Sys.iswindows() ? "USERPROFILE" : "HOME"  # Environment variable name for home
+const GIT_INSTALLED = try
+    success(`git --version`)
+catch
+    false
+end
+
+function get_global_dir()
+    buf = Ref(LibGit2.Buffer())
+
+    LibGit2.@check @ccall libgit2.git_libgit2_opts(
+        LibGit2.Consts.GET_SEARCH_PATH::Cint;
+        LibGit2.Consts.CONFIG_LEVEL_GLOBAL::Cint,
+        buf::Ptr{LibGit2.Buffer})::Cint
+    path = unsafe_string(buf[].ptr)
+    LibGit2.free(buf)
+    return path
+end
+
+function set_global_dir(dir)
+    LibGit2.@check @ccall libgit2.git_libgit2_opts(
+        LibGit2.Consts.SET_SEARCH_PATH::Cint;
+        LibGit2.Consts.CONFIG_LEVEL_GLOBAL::Cint,
+        dir::Cstring)::Cint
+    return
+end
+
+function with_libgit2_temp_home(f)
+    mktempdir() do tmphome
+        oldpath = get_global_dir()
+        set_global_dir(tmphome)
+        try
+            @test get_global_dir() == tmphome
+            f(tmphome)
+        finally
+            set_global_dir(oldpath)
+        end
+        return
+    end
+end
+
+#########
+# TESTS #
+#########
+
+@testset "Check library version" begin
+    v = LibGit2.version()
+    @test v.major == LIBGIT2_MIN_VER.major && v.minor >= LIBGIT2_MIN_VER.minor
+end
+
+@testset "Check library features" begin
+    f = LibGit2.features()
+    @test findfirst(isequal(LibGit2.Consts.FEATURE_SSH), f) !== nothing
+    @test findfirst(isequal(LibGit2.Consts.FEATURE_HTTPS), f) !== nothing
+end
+
+@testset "OID" begin
+    z = LibGit2.GitHash()
+    @test LibGit2.iszero(z)
+    @test z == zero(LibGit2.GitHash)
+    @test z == LibGit2.GitHash(z)
+    rs = string(z)
+    rr = LibGit2.raw(z)
+    @test z == LibGit2.GitHash(rr)
+    @test z == LibGit2.GitHash(rs)
+    @test z == LibGit2.GitHash(pointer(rr))
+
+    @test LibGit2.GitShortHash(z, 20) == LibGit2.GitShortHash(rs[1:20])
+    @test_throws ArgumentError LibGit2.GitHash(Ptr{UInt8}(C_NULL))
+    @test_throws ArgumentError LibGit2.GitHash(rand(UInt8, 2*LibGit2.OID_RAWSZ))
+    @test_throws ArgumentError LibGit2.GitHash("a")
+end
+
+@testset "StrArrayStruct" begin
+    p = ["XXX","YYY"]
+    a = Base.cconvert(Ptr{LibGit2.StrArrayStruct}, p)
+    b = Base.unsafe_convert(Ptr{LibGit2.StrArrayStruct}, a)
+    @test p == convert(Vector{String}, unsafe_load(b))
+    @noinline gcuse(a) = a
+    gcuse(a)
+end
+
+@testset "Signature" begin
+    sig = LibGit2.Signature("AAA", "AAA@BBB.COM", round(time(); digits=0), 0)
+    git_sig = convert(LibGit2.GitSignature, sig)
+    sig2 = LibGit2.Signature(git_sig)
+    close(git_sig)
+    @test sig.name == sig2.name
+    @test sig.email == sig2.email
+    @test sig.time == sig2.time
+    sig3 = LibGit2.Signature("AAA","AAA@BBB.COM")
+    @test sig3.name == sig.name
+    @test sig3.email == sig.email
+end
+
+@testset "Default config" begin
+    with_libgit2_temp_home() do tmphome
+        cfg = LibGit2.GitConfig()
+        @test isa(cfg, LibGit2.GitConfig)
+        @test LibGit2.getconfig("fake.property", "") == ""
+        LibGit2.set!(cfg, "fake.property", "AAAA")
+        @test LibGit2.getconfig("fake.property", "") == "AAAA"
+    end
+end
+
+@testset "Trace" begin
+    code = "import LibGit2; LibGit2.trace_set(LibGit2.Consts.TRACE_DEBUG); exit(LibGit2.trace_set(0))"
+    run(`$(Base.julia_cmd()) --startup-file=no -e $code`)
+end
+
+# See #21872 and #21636
+LibGit2.version() >= v"0.26.0" && Sys.isunix() && @testset "Default config with symlink" begin
+    with_libgit2_temp_home() do tmphome
+        write(joinpath(tmphome, "real_gitconfig"), "[fake]\n\tproperty = BBB")
+        symlink(joinpath(tmphome, "real_gitconfig"),
+                joinpath(tmphome, ".gitconfig"))
+        cfg = LibGit2.GitConfig()
+        @test isa(cfg, LibGit2.GitConfig)
+        LibGit2.getconfig("fake.property", "") == "BBB"
+        LibGit2.set!(cfg, "fake.property", "AAAA")
+        LibGit2.getconfig("fake.property", "") == "AAAA"
+    end
+end
+
+@testset "Git URL parsing" begin
+    @testset "HTTPS URL" begin
+        m = match(LibGit2.URL_REGEX, "https://user:pass@server.com:80/org/project.git")
+        @test m[:scheme] == "https"
+        @test m[:user] == "user"
+        @test m[:password] == "pass"
+        @test m[:host] == "server.com"
+        @test m[:port] == "80"
+        @test m[:path] == "org/project.git"
+    end
+
+    @testset "SSH URL" begin
+        m = match(LibGit2.URL_REGEX, "ssh://user:pass@server:22/project.git")
+        @test m[:scheme] == "ssh"
+        @test m[:user] == "user"
+        @test m[:password] == "pass"
+        @test m[:host] == "server"
+        @test m[:port] == "22"
+        @test m[:path] == "project.git"
+    end
+
+    @testset "SSH URL, scp-like syntax" begin
+        m = match(LibGit2.URL_REGEX, "user@server:project.git")
+        @test m[:scheme] === nothing
+        @test m[:user] == "user"
+        @test m[:password] === nothing
+        @test m[:host] == "server"
+        @test m[:port] === nothing
+        @test m[:path] == "project.git"
+    end
+
+    # scp-like syntax corner case. The SCP syntax does not support port so everything after
+    # the colon is part of the path.
+    @testset "scp-like syntax, no port" begin
+        m = match(LibGit2.URL_REGEX, "server:1234/repo")
+        @test m[:scheme] === nothing
+        @test m[:user] === nothing
+        @test m[:password] === nothing
+        @test m[:host] == "server"
+        @test m[:port] === nothing
+        @test m[:path] == "1234/repo"
+    end
+
+    @testset "HTTPS URL, realistic" begin
+        m = match(LibGit2.URL_REGEX, "https://github.com/JuliaLang/Example.jl.git")
+        @test m[:scheme] == "https"
+        @test m[:user] === nothing
+        @test m[:password] === nothing
+        @test m[:host] == "github.com"
+        @test m[:port] === nothing
+        @test m[:path] == "JuliaLang/Example.jl.git"
+    end
+
+    @testset "SSH URL, realistic" begin
+        m = match(LibGit2.URL_REGEX, "git@github.com:JuliaLang/Example.jl.git")
+        @test m[:scheme] === nothing
+        @test m[:user] == "git"
+        @test m[:password] === nothing
+        @test m[:host] == "github.com"
+        @test m[:port] === nothing
+        @test m[:path] == "JuliaLang/Example.jl.git"
+    end
+
+    @testset "usernames with special characters" begin
+        m = match(LibGit2.URL_REGEX, "user-name@hostname.com")
+        @test m[:user] == "user-name"
+    end
+
+    @testset "HTTPS URL, no path" begin
+        m = match(LibGit2.URL_REGEX, "https://user:pass@server.com:80")
+        @test m[:path] === nothing
+    end
+
+    @testset "scp-like syntax, no path" begin
+        m = match(LibGit2.URL_REGEX, "user@server:")
+        @test m[:path] == ""
+
+        m = match(LibGit2.URL_REGEX, "user@server")
+        @test m[:path] === nothing
+    end
+
+    @testset "HTTPS URL, invalid path" begin
+        m = match(LibGit2.URL_REGEX, "https://git@server:repo")
+        @test m === nothing
+    end
+
+    # scp-like syntax should have a colon separating the hostname from the path
+    @testset "scp-like syntax, invalid path" begin
+        m = match(LibGit2.URL_REGEX, "git@server/repo")
+        @test m === nothing
+    end
+end
+
+@testset "Git URL formatting" begin
+    @testset "HTTPS URL" begin
+        url = LibGit2.git_url(
+            scheme="https",
+            username="user",
+            host="server.com",
+            port=80,
+            path="org/project.git")
+        @test url == "https://user@server.com:80/org/project.git"
+    end
+
+    @testset "SSH URL" begin
+        url = LibGit2.git_url(
+            scheme="ssh",
+            username="user",
+            host="server",
+            port="22",
+            path="project.git")
+        @test url == "ssh://user@server:22/project.git"
+    end
+
+    @testset "SSH URL, scp-like syntax" begin
+        url = LibGit2.git_url(
+            username="user",
+            host="server",
+            path="project.git")
+        @test url == "user@server:project.git"
+    end
+
+    @testset "HTTPS URL, realistic" begin
+        url = LibGit2.git_url(
+            scheme="https",
+            host="github.com",
+            path="JuliaLang/Example.jl.git")
+        @test url == "https://github.com/JuliaLang/Example.jl.git"
+    end
+
+    @testset "SSH URL, realistic" begin
+        url = LibGit2.git_url(
+            username="git",
+            host="github.com",
+            path="JuliaLang/Example.jl.git")
+        @test url == "git@github.com:JuliaLang/Example.jl.git"
+    end
+
+    @testset "HTTPS URL, no path" begin
+        url = LibGit2.git_url(
+            scheme="https",
+            username="user",
+            host="server.com",
+            port="80")
+        @test url == "https://user@server.com:80"
+    end
+
+    @testset "scp-like syntax, no path" begin
+        url = LibGit2.git_url(
+            username="user",
+            host="server.com")
+        @test url == "user@server.com"
+    end
+
+    @testset "HTTP URL, path includes slash prefix" begin
+        url = LibGit2.git_url(
+            scheme="http",
+            host="server.com",
+            path="/path")
+        @test url == "http://server.com/path"
+    end
+
+    @testset "empty" begin
+        @test_throws ArgumentError LibGit2.git_url()
+
+        @test LibGit2.git_url(host="server.com") == "server.com"
+        url = LibGit2.git_url(
+            scheme="",
+            username="",
+            host="server.com",
+            port="",
+            path="")
+        @test url == "server.com"
+    end
+end
+
+@testset "Passphrase Required" begin
+    @testset "missing file" begin
+        @test !LibGit2.is_passphrase_required("")
+
+        file = joinpath(KEY_DIR, "foobar")
+        @test !isfile(file)
+        @test !LibGit2.is_passphrase_required(file)
+    end
+
+    @testset "not private key" begin
+        @test !LibGit2.is_passphrase_required(joinpath(KEY_DIR, "invalid.pub"))
+    end
+
+    @testset "private key, with passphrase" begin
+        @test LibGit2.is_passphrase_required(joinpath(KEY_DIR, "valid-passphrase"))
+    end
+
+    @testset "private key, no passphrase" begin
+        @test !LibGit2.is_passphrase_required(joinpath(KEY_DIR, "valid"))
+    end
+end
+
+@testset "GitCredential" begin
+    @testset "missing" begin
+        str = ""
+        cred = read!(IOBuffer(str), LibGit2.GitCredential())
+        @test cred == LibGit2.GitCredential()
+        @test sprint(write, cred) == str
+        Base.shred!(cred)
+    end
+
+    @testset "empty" begin
+        str = """
+            protocol=
+            host=
+            path=
+            username=
+            password=
+            """
+        cred = read!(IOBuffer(str), LibGit2.GitCredential())
+        @test cred == LibGit2.GitCredential("", "", "", "", "")
+        @test sprint(write, cred) == str
+        Base.shred!(cred)
+    end
+
+    @testset "input/output" begin
+        str = """
+            protocol=https
+            host=example.com
+            username=alice
+            password=*****
+            """
+        expected_cred = LibGit2.GitCredential("https", "example.com", nothing, "alice", "*****")
+
+        cred = read!(IOBuffer(str), LibGit2.GitCredential())
+        @test cred == expected_cred
+        @test sprint(write, cred) == str
+        Base.shred!(cred)
+        Base.shred!(expected_cred)
+    end
+
+    @testset "extra newline" begin
+        # The "Git for Windows" installer will also install the "Git Credential Manager for
+        # Windows" (https://github.com/Microsoft/Git-Credential-Manager-for-Windows) (also
+        # known as "manager" in the .gitconfig files). This credential manager returns an
+        # additional newline when returning the results.
+        str = """
+            protocol=https
+            host=example.com
+            path=
+            username=bob
+            password=*****
+
+            """
+        expected_cred = LibGit2.GitCredential("https", "example.com", "", "bob", "*****")
+
+        cred = read!(IOBuffer(str), LibGit2.GitCredential())
+        @test cred == expected_cred
+        @test sprint(write, cred) * "\n" == str
+        Base.shred!(cred)
+        Base.shred!(expected_cred)
+    end
+
+    @testset "unknown attribute" begin
+        str = """
+            protocol=https
+            host=example.com
+            attribute=value
+            username=bob
+            password=*****
+            """
+        expected_cred = LibGit2.GitCredential("https", "example.com", nothing, "bob", "*****")
+        expected_log = (:warn, "Unknown git credential attribute found: \"attribute\"")
+
+        cred = @test_logs expected_log read!(IOBuffer(str), LibGit2.GitCredential())
+        @test cred == expected_cred
+        Base.shred!(cred)
+        Base.shred!(expected_cred)
+    end
+
+    @testset "use http path" begin
+        cred = LibGit2.GitCredential("https", "example.com", "dir/file", "alice", "*****")
+        expected = """
+            protocol=https
+            host=example.com
+            username=alice
+            password=*****
+            """
+
+        @test cred.use_http_path
+        cred.use_http_path = false
+
+        @test cred.path == "dir/file"
+        @test sprint(write, cred) == expected
+        Base.shred!(cred)
+    end
+
+    @testset "URL input/output" begin
+        str = """
+            host=example.com
+            password=bar
+            url=https://a@b/c
+            username=foo
+            """
+        expected_str = """
+            protocol=https
+            host=b
+            path=c
+            username=foo
+            """
+        expected_cred = LibGit2.GitCredential("https", "b", "c", "foo", nothing)
+
+        cred = read!(IOBuffer(str), LibGit2.GitCredential())
+        @test cred == expected_cred
+        @test sprint(write, cred) == expected_str
+        Base.shred!(cred)
+        Base.shred!(expected_cred)
+    end
+
+    @testset "ismatch" begin
+        # Equal
+        cred = LibGit2.GitCredential("https", "github.com")
+        @test LibGit2.ismatch("https://github.com", cred)
+        Base.shred!(cred)
+
+        # Credential hostname is different
+        cred = LibGit2.GitCredential("https", "github.com")
+        @test !LibGit2.ismatch("https://myhost", cred)
+        Base.shred!(cred)
+
+        # Credential is less specific than URL
+        cred = LibGit2.GitCredential("https")
+        @test !LibGit2.ismatch("https://github.com", cred)
+        Base.shred!(cred)
+
+        # Credential is more specific than URL
+        cred = LibGit2.GitCredential("https", "github.com", "path", "user", "pass")
+        @test LibGit2.ismatch("https://github.com", cred)
+        Base.shred!(cred)
+
+        # Credential needs to have an "" username to match
+        cred = LibGit2.GitCredential("https", "github.com", nothing, "")
+        @test LibGit2.ismatch("https://@github.com", cred)
+        Base.shred!(cred)
+
+        cred = LibGit2.GitCredential("https", "github.com", nothing, nothing)
+        @test !LibGit2.ismatch("https://@github.com", cred)
+        Base.shred!(cred)
+    end
+
+    @testset "GITHUB_REGEX" begin
+        github_regex_test = function(url, user, repo)
+            m = match(LibGit2.GITHUB_REGEX, url)
+            @test m !== nothing
+            @test m[1] == "$user/$repo"
+            @test m[2] == user
+            @test m[3] == repo
+        end
+        user = "User"
+        repo = "Repo"
+        github_regex_test("git@github.com/$user/$repo.git", user, repo)
+        github_regex_test("https://github.com/$user/$repo.git", user, repo)
+        github_regex_test("https://username@github.com/$user/$repo.git", user, repo)
+        github_regex_test("ssh://git@github.com/$user/$repo.git", user, repo)
+        github_regex_test("git@github.com/$user/$repo", user, repo)
+        github_regex_test("https://github.com/$user/$repo", user, repo)
+        github_regex_test("https://username@github.com/$user/$repo", user, repo)
+        github_regex_test("ssh://git@github.com/$user/$repo", user, repo)
+        @test !occursin(LibGit2.GITHUB_REGEX, "git@notgithub.com/$user/$repo.git")
+    end
+
+    @testset "UserPasswordCredential/url constructor" begin
+        user_pass_cred = LibGit2.UserPasswordCredential("user", "*******")
+        url = "https://github.com"
+        expected_cred = LibGit2.GitCredential("https", "github.com", nothing, "user", "*******")
+
+        cred = LibGit2.GitCredential(user_pass_cred, url)
+        @test cred == expected_cred
+
+        # Shredding the UserPasswordCredential shouldn't result in information being lost
+        # inside of a GitCredential.
+        Base.shred!(user_pass_cred)
+        @test cred == expected_cred
+
+        Base.shred!(cred)
+        Base.shred!(expected_cred)
+    end
+end
+
+mktempdir() do dir
+    dir = realpath(dir)
+    # test parameters
+    repo_url = "https://github.com/JuliaLang/Example.jl"
+    cache_repo = joinpath(dir, "Example")
+    test_repo = joinpath(dir, "Example.Test")
+    test_sig = LibGit2.Signature("TEST", "TEST@TEST.COM", round(time(); digits=0), 0)
+    test_dir = "testdir"
+    test_file = "$(test_dir)/testfile"
+    config_file = "testconfig"
+    commit_msg1 = randstring(10)
+    commit_msg2 = randstring(10)
+    commit_oid1 = LibGit2.GitHash()
+    commit_oid2 = LibGit2.GitHash()
+    commit_oid3 = LibGit2.GitHash()
+    default_branch = LibGit2.getconfig("init.defaultBranch", "master")
+    test_branch = "test_branch"
+    test_branch2 = "test_branch_two"
+    tag1 = "tag1"
+    tag2 = "tag2"
+
+    @testset "Configuration" begin
+        LibGit2.with(LibGit2.GitConfig(joinpath(dir, config_file), LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
+            @test_throws LibGit2.Error.GitError LibGit2.get(AbstractString, cfg, "tmp.str")
+            @test isempty(LibGit2.get(cfg, "tmp.str", "")) == true
+
+            LibGit2.set!(cfg, "tmp.str", "AAAA")
+            LibGit2.set!(cfg, "tmp.int32", Int32(1))
+            LibGit2.set!(cfg, "tmp.int64", Int64(1))
+            LibGit2.set!(cfg, "tmp.bool", true)
+
+            @test LibGit2.get(cfg, "tmp.str", "") == "AAAA"
+            @test LibGit2.get(cfg, "tmp.int32", Int32(0)) == Int32(1)
+            @test LibGit2.get(cfg, "tmp.int64", Int64(0)) == Int64(1)
+            @test LibGit2.get(cfg, "tmp.bool", false) == true
+
+            # Ordering of entries appears random when using `LibGit2.set!`
+            count = 0
+            for entry in LibGit2.GitConfigIter(cfg, r"tmp.*")
+                count += 1
+                name, value = unsafe_string(entry.name), unsafe_string(entry.value)
+                if name == "tmp.str"
+                    @test value == "AAAA"
+                elseif name == "tmp.int32"
+                    @test value == "1"
+                elseif name == "tmp.int64"
+                    @test value == "1"
+                elseif name == "tmp.bool"
+                    @test value == "true"
+                else
+                    error("Found unexpected entry: $name")
+                end
+                show_str = sprint(show, entry)
+                @test show_str == string("ConfigEntry(\"", name, "\", \"", value, "\")")
+            end
+            @test count == 4
+        end
+    end
+
+    @testset "Configuration Iteration" begin
+        config_path = joinpath(dir, config_file)
+
+        # Write config entries with duplicate names
+        open(config_path, "a") do fp
+            write(fp, """
+                [credential]
+                    helper = store
+                    username = julia
+                [credential]
+                    helper = cache
+                """)
+        end
+
+        LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
+            # Will only see the last entry
+            @test LibGit2.get(cfg, "credential.helper", "") == "cache"
+
+            count = 0
+            for entry in LibGit2.GitConfigIter(cfg, "credential.helper")
+                count += 1
+                name, value = unsafe_string(entry.name), unsafe_string(entry.value)
+                @test name == "credential.helper"
+                @test value == (count == 1 ? "store" : "cache")
+            end
+            @test count == 2
+        end
+    end
+
+    @testset "Initializing repository" begin
+        @testset "with remote branch" begin
+            LibGit2.with(LibGit2.init(cache_repo)) do repo
+                @test isdir(cache_repo)
+                @test LibGit2.path(repo) == LibGit2.posixpath(realpath(cache_repo))
+                @test isdir(joinpath(cache_repo, ".git"))
+                # set a remote branch
+                branch = "upstream"
+                LibGit2.GitRemote(repo, branch, repo_url) |> close
+
+                # test remote's representation in the repo's config
+                config = joinpath(cache_repo, ".git", "config")
+                lines = split(open(x->read(x, String), config, "r"), "\n")
+                @test any(map(x->x == "[remote \"upstream\"]", lines))
+
+                LibGit2.with(LibGit2.get(LibGit2.GitRemote, repo, branch)) do remote
+                    # test various remote properties
+                    @test LibGit2.url(remote) == repo_url
+                    @test LibGit2.push_url(remote) == ""
+                    @test LibGit2.name(remote) == "upstream"
+                    @test isa(remote, LibGit2.GitRemote)
+
+                    # test showing a GitRemote object
+                    @test sprint(show, remote) == "GitRemote:\nRemote name: upstream url: $repo_url"
+                end
+                # test setting and getting the remote's URL
+                @test LibGit2.isattached(repo)
+                LibGit2.set_remote_url(repo, "upstream", "unknown")
+                LibGit2.with(LibGit2.get(LibGit2.GitRemote, repo, branch)) do remote
+                    @test LibGit2.url(remote) == "unknown"
+                    @test LibGit2.push_url(remote) == "unknown"
+                    @test sprint(show, remote) == "GitRemote:\nRemote name: upstream url: unknown"
+                end
+                LibGit2.set_remote_url(cache_repo, "upstream", repo_url)
+                LibGit2.with(LibGit2.get(LibGit2.GitRemote, repo, branch)) do remote
+                    @test LibGit2.url(remote) == repo_url
+                    @test LibGit2.push_url(remote) == repo_url
+                    @test sprint(show, remote) == "GitRemote:\nRemote name: upstream url: $repo_url"
+                    LibGit2.add_fetch!(repo, remote, "upstream")
+
+                    # test setting fetch and push refspecs
+                    @test LibGit2.fetch_refspecs(remote) == String["+refs/heads/*:refs/remotes/upstream/*"]
+                    LibGit2.add_push!(repo, remote, "refs/heads/master")
+                end
+                LibGit2.with(LibGit2.get(LibGit2.GitRemote, repo, branch)) do remote
+                    @test LibGit2.push_refspecs(remote) == String["refs/heads/master"]
+                end
+                # constructor with a refspec
+                LibGit2.with(LibGit2.GitRemote(repo, "upstream2", repo_url, "upstream")) do remote
+                    @test sprint(show, remote) == "GitRemote:\nRemote name: upstream2 url: $repo_url"
+                    @test LibGit2.fetch_refspecs(remote) == String["upstream"]
+                end
+
+                LibGit2.with(LibGit2.GitRemoteAnon(repo, repo_url)) do remote
+                    @test LibGit2.url(remote) == repo_url
+                    @test LibGit2.push_url(remote) == ""
+                    @test LibGit2.name(remote) == ""
+                    @test isa(remote, LibGit2.GitRemote)
+                end
+            end
+        end
+
+        @testset "bare" begin
+            path = joinpath(dir, "Example.Bare")
+            LibGit2.with(LibGit2.init(path, true)) do repo
+                @test isdir(path)
+                @test LibGit2.path(repo) == LibGit2.posixpath(realpath(path))
+                @test isfile(joinpath(path, LibGit2.Consts.HEAD_FILE))
+                @test LibGit2.isattached(repo)
+            end
+
+            path = joinpath("garbagefakery", "Example.Bare")
+            try
+                LibGit2.GitRepo(path)
+                error("unexpected")
+            catch e
+                @test typeof(e) == LibGit2.GitError
+                @test startswith(
+                    lowercase(sprint(show, e)),
+                    lowercase("GitError(Code:ENOTFOUND, Class:OS, failed to resolve path"))
+            end
+            path = joinpath(dir, "Example.BareTwo")
+            LibGit2.with(LibGit2.init(path, true)) do repo
+                #just to see if this works
+                LibGit2.cleanup(repo)
+            end
+        end
+    end
+
+    @testset "Cloning repository" begin
+        function bare_repo_tests(repo, repo_path)
+            @test isdir(repo_path)
+            @test LibGit2.path(repo) == LibGit2.posixpath(realpath(repo_path))
+            @test isfile(joinpath(repo_path, LibGit2.Consts.HEAD_FILE))
+            @test LibGit2.isattached(repo)
+            @test LibGit2.remotes(repo) == ["origin"]
+        end
+        @testset "bare" begin
+            repo_path = joinpath(dir, "Example.Bare1")
+            LibGit2.with(LibGit2.clone(cache_repo, repo_path, isbare = true)) do repo
+                bare_repo_tests(repo, repo_path)
+            end
+        end
+        @testset "bare with remote callback" begin
+            repo_path = joinpath(dir, "Example.Bare2")
+            LibGit2.with(LibGit2.clone(cache_repo, repo_path, isbare = true, remote_cb = LibGit2.mirror_cb())) do repo
+                bare_repo_tests(repo, repo_path)
+                LibGit2.with(LibGit2.get(LibGit2.GitRemote, repo, "origin")) do rmt
+                    @test LibGit2.fetch_refspecs(rmt)[1] == "+refs/*:refs/*"
+                end
+            end
+        end
+        @testset "normal" begin
+            LibGit2.with(LibGit2.clone(cache_repo, test_repo)) do repo
+                @test isdir(test_repo)
+                @test LibGit2.path(repo) == LibGit2.posixpath(realpath(test_repo))
+                @test isdir(joinpath(test_repo, ".git"))
+                @test LibGit2.workdir(repo) == LibGit2.path(repo)*"/"
+                @test LibGit2.isattached(repo)
+                @test LibGit2.isorphan(repo)
+                repo_str = sprint(show, repo)
+                @test repo_str == "LibGit2.GitRepo($(sprint(show,LibGit2.path(repo))))"
+            end
+        end
+        @testset "credentials callback conflict" begin
+            callbacks = LibGit2.Callbacks(:credentials => (C_NULL, 0))
+            cred_payload = LibGit2.CredentialPayload()
+            @test_throws ArgumentError LibGit2.clone(cache_repo, test_repo, callbacks=callbacks, credentials=cred_payload)
+        end
+    end
+
+    @testset "Update cache repository" begin
+
+        @testset "with commits" begin
+            repo = LibGit2.GitRepo(cache_repo)
+            repo_dir = joinpath(cache_repo,test_dir)
+            mkdir(repo_dir)
+            repo_file = open(joinpath(cache_repo,test_file), "a")
+            try
+                # create commits
+                println(repo_file, commit_msg1)
+                flush(repo_file)
+                LibGit2.add!(repo, test_file)
+                @test LibGit2.iszero(commit_oid1)
+                commit_oid1 = LibGit2.commit(repo, commit_msg1; author=test_sig, committer=test_sig)
+                @test !LibGit2.iszero(commit_oid1)
+                @test LibGit2.GitHash(LibGit2.head(cache_repo)) == commit_oid1
+
+                println(repo_file, randstring(10))
+                flush(repo_file)
+                LibGit2.add!(repo, test_file)
+                commit_oid3 = LibGit2.commit(repo, randstring(10); author=test_sig, committer=test_sig)
+
+                println(repo_file, commit_msg2)
+                flush(repo_file)
+                LibGit2.add!(repo, test_file)
+                @test LibGit2.iszero(commit_oid2)
+                commit_oid2 = LibGit2.commit(repo, commit_msg2; author=test_sig, committer=test_sig)
+                @test !LibGit2.iszero(commit_oid2)
+
+                # test getting list of commit authors
+                auths = LibGit2.authors(repo)
+                @test length(auths) == 3
+                for auth in auths
+                    @test auth.name == test_sig.name
+                    @test auth.time == test_sig.time
+                    @test auth.email == test_sig.email
+                end
+
+                # check various commit properties - commit_oid1 happened before
+                # commit_oid2, so it *is* an ancestor of commit_oid2
+                @test LibGit2.is_ancestor_of(string(commit_oid1), string(commit_oid2), repo)
+                @test LibGit2.iscommit(string(commit_oid1), repo)
+                @test !LibGit2.iscommit(string(commit_oid1)*"fake", repo)
+                @test LibGit2.iscommit(string(commit_oid2), repo)
+
+                # lookup commits
+                LibGit2.with(LibGit2.GitCommit(repo, commit_oid1)) do cmt
+                    @test LibGit2.Consts.OBJECT(typeof(cmt)) == LibGit2.Consts.OBJ_COMMIT
+                    @test commit_oid1 == LibGit2.GitHash(cmt)
+                    short_oid1 = LibGit2.GitShortHash(string(commit_oid1))
+                    @test string(commit_oid1) == string(short_oid1)
+                    @test cmp(commit_oid1, short_oid1) == 0
+                    @test cmp(short_oid1, commit_oid1) == 0
+                    @test !(short_oid1 < commit_oid1)
+
+                    # test showing ShortHash
+                    short_str = sprint(show, short_oid1)
+                    @test short_str == "GitShortHash(\"$(string(short_oid1))\")"
+                    short_oid2 = LibGit2.GitShortHash(cmt)
+                    @test startswith(string(commit_oid1), string(short_oid2))
+
+                    LibGit2.with(LibGit2.GitCommit(repo, short_oid2)) do cmt2
+                        @test commit_oid1 == LibGit2.GitHash(cmt2)
+                    end
+                    # check that the author and committer signatures are correct
+                    auth = LibGit2.author(cmt)
+                    @test isa(auth, LibGit2.Signature)
+                    @test auth.name == test_sig.name
+                    @test auth.time == test_sig.time
+                    @test auth.email == test_sig.email
+                    short_auth = LibGit2.author(LibGit2.GitCommit(repo, short_oid1))
+                    @test short_auth.name == test_sig.name
+                    @test short_auth.time == test_sig.time
+                    @test short_auth.email == test_sig.email
+                    cmtr = LibGit2.committer(cmt)
+                    @test isa(cmtr, LibGit2.Signature)
+                    @test cmtr.name == test_sig.name
+                    @test cmtr.time == test_sig.time
+                    @test cmtr.email == test_sig.email
+                    @test LibGit2.message(cmt) == commit_msg1
+
+                    # test that the parent is correct
+                    @test LibGit2.parentcount(cmt) == 0
+                    LibGit2.with(LibGit2.GitCommit(repo, commit_oid3)) do cmt3
+                        @test LibGit2.parentcount(cmt3) == 1
+                        @test LibGit2.parent_id(cmt3, 1) == commit_oid1
+                        @test LibGit2.GitHash(LibGit2.parent(cmt3, 1)) == commit_oid1
+                    end
+
+                    # test showing the commit
+                    showstr = split(sprint(show, cmt), "\n")
+                    # the time of the commit will vary so just test the first two parts
+                    @test occursin("Git Commit:", showstr[1])
+                    @test occursin("Commit Author: Name: TEST, Email: TEST@TEST.COM, Time:", showstr[2])
+                    @test occursin("Committer: Name: TEST, Email: TEST@TEST.COM, Time:", showstr[3])
+                    @test occursin("SHA:", showstr[4])
+                    @test showstr[5] == "Message:"
+                    @test showstr[6] == commit_msg1
+                    @test LibGit2.revcount(repo, string(commit_oid1), string(commit_oid3)) == (-1,0)
+
+                    blame = LibGit2.GitBlame(repo, test_file)
+                    @test LibGit2.counthunks(blame) == 3
+                    @test_throws BoundsError getindex(blame, LibGit2.counthunks(blame)+1)
+                    @test_throws BoundsError getindex(blame, 0)
+                    sig = LibGit2.Signature(blame[1].orig_signature)
+                    @test sig.name == cmtr.name
+                    @test sig.email == cmtr.email
+                    show_strs = split(sprint(show, blame[1]), "\n")
+                    @test show_strs[1] == "GitBlameHunk:"
+                    @test show_strs[2] == "Original path: $test_file"
+                    @test show_strs[3] == "Lines in hunk: 1"
+                    @test show_strs[4] == "Final commit oid: $commit_oid1"
+                    @test show_strs[6] == "Original commit oid: $commit_oid1"
+                    @test length(show_strs) == 7
+                end
+            finally
+                close(repo)
+                close(repo_file)
+            end
+        end
+
+        @testset "with branch" begin
+            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
+                brnch = LibGit2.branch(repo)
+                LibGit2.with(LibGit2.head(repo)) do brref
+                    # various branch properties
+                    @test LibGit2.isbranch(brref)
+                    @test !LibGit2.isremote(brref)
+                    @test LibGit2.name(brref) == "refs/heads/$(default_branch)"
+                    @test LibGit2.shortname(brref) == default_branch
+                    @test LibGit2.ishead(brref)
+                    @test LibGit2.upstream(brref) === nothing
+
+                    # showing the GitReference to this branch
+                    show_strs = split(sprint(show, brref), "\n")
+                    @test show_strs[1] == "GitReference:"
+                    @test show_strs[2] == "Branch with name refs/heads/$(default_branch)"
+                    @test show_strs[3] == "Branch is HEAD."
+                    @test repo.ptr == LibGit2.repository(brref).ptr
+                    @test brnch == default_branch
+                    @test LibGit2.headname(repo) == default_branch
+
+                    # create a branch *without* setting its tip as HEAD
+                    LibGit2.branch!(repo, test_branch, string(commit_oid1), set_head=false)
+                    # null because we are looking for a REMOTE branch
+                    @test LibGit2.lookup_branch(repo, test_branch, true) === nothing
+                    # not nothing because we are now looking for a LOCAL branch
+                    LibGit2.with(LibGit2.lookup_branch(repo, test_branch, false)) do tbref
+                        @test LibGit2.shortname(tbref) == test_branch
+                        @test LibGit2.upstream(tbref) === nothing
+                    end
+                    @test LibGit2.lookup_branch(repo, test_branch2, true) === nothing
+                    # test deleting the branch
+                    LibGit2.branch!(repo, test_branch2; set_head=false)
+                    LibGit2.with(LibGit2.lookup_branch(repo, test_branch2, false)) do tbref
+                        @test LibGit2.shortname(tbref) == test_branch2
+                        LibGit2.delete_branch(tbref)
+                        @test LibGit2.lookup_branch(repo, test_branch2, true) === nothing
+                    end
+                end
+                branches = map(b->LibGit2.shortname(b[1]), LibGit2.GitBranchIter(repo))
+                @test default_branch in branches
+                @test test_branch in branches
+            end
+        end
+
+        @testset "with default configuration" begin
+            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
+                try
+                    LibGit2.Signature(repo)
+                catch ex
+                    # these test configure repo with new signature
+                    # in case when global one does not exist
+                    @test isa(ex, LibGit2.Error.GitError) == true
+
+                    cfg = LibGit2.GitConfig(repo)
+                    LibGit2.set!(cfg, "user.name", "AAAA")
+                    LibGit2.set!(cfg, "user.email", "BBBB@BBBB.COM")
+                    sig = LibGit2.Signature(repo)
+                    @test sig.name == "AAAA"
+                    @test sig.email == "BBBB@BBBB.COM"
+                    @test LibGit2.getconfig(repo, "user.name", "") == "AAAA"
+                    @test LibGit2.getconfig(cache_repo, "user.name", "") == "AAAA"
+                end
+            end
+        end
+
+        @testset "with tags" begin
+            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
+                tags = LibGit2.tag_list(repo)
+                @test length(tags) == 0
+
+                # create tag and extract it from a GitReference
+                tag_oid1 = LibGit2.tag_create(repo, tag1, commit_oid1, sig=test_sig)
+                @test !LibGit2.iszero(tag_oid1)
+                tags = LibGit2.tag_list(repo)
+                @test length(tags) == 1
+                @test tag1 in tags
+                tag1ref = LibGit2.GitReference(repo, "refs/tags/$tag1")
+                # because this is a reference to an OID
+                @test isempty(LibGit2.fullname(tag1ref))
+
+                # test showing a GitReference to a GitTag, and the GitTag itself
+                show_strs = split(sprint(show, tag1ref), "\n")
+                @test show_strs[1] == "GitReference:"
+                @test show_strs[2] == "Tag with name refs/tags/$tag1"
+                tag1tag = LibGit2.peel(LibGit2.GitTag, tag1ref)
+                @test LibGit2.name(tag1tag) == tag1
+                @test LibGit2.target(tag1tag) == commit_oid1
+                @test sprint(show, tag1tag) == "GitTag:\nTag name: $tag1 target: $commit_oid1"
+                # peels to the commit the tag points to
+                tag1cmt = LibGit2.peel(tag1ref)
+                @test LibGit2.GitHash(tag1cmt) == commit_oid1
+                tag_oid2 = LibGit2.tag_create(repo, tag2, commit_oid2)
+                @test !LibGit2.iszero(tag_oid2)
+                tags = LibGit2.tag_list(repo)
+                @test length(tags) == 2
+                @test tag2 in tags
+
+                refs = LibGit2.ref_list(repo)
+                @test refs == ["refs/heads/$(default_branch)", "refs/heads/test_branch", "refs/tags/tag1", "refs/tags/tag2"]
+                # test deleting a tag
+                LibGit2.tag_delete(repo, tag1)
+                tags = LibGit2.tag_list(repo)
+                @test length(tags) == 1
+                @test tag2 ∈ tags
+                @test tag1 ∉ tags
+
+                # test git describe functions applied to these GitTags
+                description = LibGit2.GitDescribeResult(repo)
+                fmtted_description = LibGit2.format(description)
+                @test sprint(show, description) == "GitDescribeResult:\n$fmtted_description\n"
+                @test fmtted_description == "tag2"
+                description = LibGit2.GitDescribeResult(LibGit2.GitObject(repo, "HEAD"))
+                fmtted_description = LibGit2.format(description)
+                @test sprint(show, description) == "GitDescribeResult:\n$fmtted_description\n"
+                @test fmtted_description == "tag2"
+            end
+        end
+
+        @testset "status" begin
+            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
+                status = LibGit2.GitStatus(repo)
+                @test length(status) == 0
+                @test_throws BoundsError status[1]
+                repo_file = open(joinpath(cache_repo,"statusfile"), "a")
+
+                # create commits
+                println(repo_file, commit_msg1)
+                flush(repo_file)
+                LibGit2.add!(repo, test_file)
+                status = LibGit2.GitStatus(repo)
+                @test length(status) != 0
+                @test_throws BoundsError status[0]
+                @test_throws BoundsError status[length(status)+1]
+                # we've added a file - show that it is new
+                @test status[1].status == LibGit2.Consts.STATUS_WT_NEW
+                close(repo_file)
+            end
+        end
+
+        @testset "blobs" begin
+            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
+                # this is slightly dubious, as it assumes the object has not been packed
+                # could be replaced by another binary format
+                hash_string = string(commit_oid1)
+                blob_file   = joinpath(cache_repo,".git/objects", hash_string[1:2], hash_string[3:end])
+
+                id = LibGit2.addblob!(repo, blob_file)
+                blob = LibGit2.GitBlob(repo, id)
+                @test LibGit2.isbinary(blob)
+                len1 = length(blob)
+
+                # test showing a GitBlob
+                blob_show_strs = split(sprint(show, blob), "\n")
+                @test blob_show_strs[1] == "GitBlob:"
+                @test occursin("Blob id:", blob_show_strs[2])
+                @test blob_show_strs[3] == "Contents are binary."
+
+                blob2 = LibGit2.GitBlob(repo, LibGit2.GitHash(blob))
+                @test LibGit2.isbinary(blob2)
+                @test length(blob2) == len1
+                @test blob  == blob2
+                @test blob !== blob2
+            end
+        end
+        @testset "trees" begin
+            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
+                @test_throws LibGit2.Error.GitError LibGit2.GitTree(repo, "HEAD")
+                tree = LibGit2.GitTree(repo, "HEAD^{tree}")
+                @test isa(tree, LibGit2.GitTree)
+                @test isa(LibGit2.GitObject(repo, "HEAD^{tree}"), LibGit2.GitTree)
+                @test LibGit2.Consts.OBJECT(typeof(tree)) == LibGit2.Consts.OBJ_TREE
+                @test LibGit2.count(tree) == 1
+
+                # test showing the GitTree and its entries
+                tree_str = sprint(show, tree)
+                @test tree_str == "GitTree:\nOwner: $(LibGit2.repository(tree))\nNumber of entries: 1\n"
+                @test_throws BoundsError tree[0]
+                @test_throws BoundsError tree[2]
+                tree_entry = tree[1]
+                subtree = LibGit2.GitTree(tree_entry)
+                @test_throws BoundsError subtree[0]
+                @test_throws BoundsError subtree[2]
+                tree_entry = subtree[1]
+                @test LibGit2.filemode(tree_entry) == 33188
+                te_str = sprint(show, tree_entry)
+                ref_te_str = "GitTreeEntry:\nEntry name: testfile\nEntry type: LibGit2.GitBlob\nEntry OID: "
+                ref_te_str *= "$(LibGit2.entryid(tree_entry))\n"
+                @test te_str == ref_te_str
+                blob = LibGit2.GitBlob(tree_entry)
+                blob_str = sprint(show, blob)
+                @test blob_str == "GitBlob:\nBlob id: $(LibGit2.GitHash(blob))\nContents:\n$(LibGit2.content(blob))\n"
+
+                # tests for walking the tree and accessing objects
+                @test tree[""] == tree
+                @test tree["/"] == tree
+                @test isa(tree[test_dir], LibGit2.GitTree)
+                @test tree["$test_dir/"] == tree[test_dir]
+                @test isa(tree[test_file], LibGit2.GitBlob)
+                @test_throws KeyError tree["nonexistent"]
+
+                # test workaround for git_tree_walk issue
+                # https://github.com/libgit2/libgit2/issues/4693
+                ccall((:giterr_set_str, libgit2), Cvoid, (Cint, Cstring),
+                      Cint(LibGit2.Error.Invalid), "previous error")
+                try
+                    # file needs to exist in tree in order to trigger the stop walk condition
+                    tree[test_file]
+                catch err
+                    if isa(err, LibGit2.Error.GitError) && err.class == LibGit2.Error.Invalid
+                        @test false
+                    else
+                        rethrow()
+                    end
+                end
+            end
+        end
+
+        @testset "diff" begin
+            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
+                @test !LibGit2.isdirty(repo)
+                @test !LibGit2.isdirty(repo, test_file)
+                @test !LibGit2.isdirty(repo, "nonexistent")
+                @test !LibGit2.isdiff(repo, "HEAD")
+                @test !LibGit2.isdirty(repo, cached=true)
+                @test !LibGit2.isdirty(repo, test_file, cached=true)
+                @test !LibGit2.isdirty(repo, "nonexistent", cached=true)
+                @test !LibGit2.isdiff(repo, "HEAD", cached=true)
+                open(joinpath(cache_repo,test_file), "a") do f
+                    println(f, "zzzz")
+                end
+                @test LibGit2.isdirty(repo)
+                @test LibGit2.isdirty(repo, test_file)
+                @test !LibGit2.isdirty(repo, "nonexistent")
+                @test LibGit2.isdiff(repo, "HEAD")
+                @test !LibGit2.isdirty(repo, cached=true)
+                @test !LibGit2.isdiff(repo, "HEAD", cached=true)
+                LibGit2.add!(repo, test_file)
+                @test LibGit2.isdirty(repo)
+                @test LibGit2.isdiff(repo, "HEAD")
+                @test LibGit2.isdirty(repo, cached=true)
+                @test LibGit2.isdiff(repo, "HEAD", cached=true)
+                tree = LibGit2.GitTree(repo, "HEAD^{tree}")
+
+                # test properties of the diff_tree
+                diff = LibGit2.diff_tree(repo, tree, "", cached=true)
+                @test LibGit2.count(diff) == 1
+                @test_throws BoundsError diff[0]
+                @test_throws BoundsError diff[2]
+                @test LibGit2.Consts.DELTA_STATUS(diff[1].status) == LibGit2.Consts.DELTA_MODIFIED
+                @test diff[1].nfiles == 2
+
+                # test showing a DiffDelta
+                diff_strs = split(sprint(show, diff[1]), '\n')
+                @test diff_strs[1] == "DiffDelta:"
+                @test diff_strs[2] == "Status: DELTA_MODIFIED"
+                @test diff_strs[3] == "Number of files: 2"
+                @test diff_strs[4] == "Old file:"
+                @test diff_strs[5] == "DiffFile:"
+                @test occursin("Oid:", diff_strs[6])
+                @test occursin("Path:", diff_strs[7])
+                @test occursin("Size:", diff_strs[8])
+                @test isempty(diff_strs[9])
+                @test diff_strs[10] == "New file:"
+
+                # test showing a GitDiff
+                diff_strs = split(sprint(show, diff), '\n')
+                @test diff_strs[1] == "GitDiff:"
+                @test diff_strs[2] == "Number of deltas: 1"
+                @test diff_strs[3] == "GitDiffStats:"
+                @test diff_strs[4] == "Files changed: 1"
+                @test diff_strs[5] == "Insertions: 1"
+                @test diff_strs[6] == "Deletions: 0"
+
+                LibGit2.commit(repo, "zzz")
+                @test !LibGit2.isdirty(repo)
+                @test !LibGit2.isdiff(repo, "HEAD")
+                @test !LibGit2.isdirty(repo, cached=true)
+                @test !LibGit2.isdiff(repo, "HEAD", cached=true)
+            end
+        end
+    end
+
+    function setup_clone_repo(cache_repo::AbstractString, path::AbstractString; name="AAAA", email="BBBB@BBBB.COM")
+        repo = LibGit2.clone(cache_repo, path)
+        # need to set this for merges to succeed
+        cfg = LibGit2.GitConfig(repo)
+        LibGit2.set!(cfg, "user.name", name)
+        LibGit2.set!(cfg, "user.email", email)
+        return repo
+    end
+    # TO DO: add more tests for various merge
+    # preference options
+    function add_and_commit_file(repo, filenm, filecontent)
+        open(joinpath(LibGit2.path(repo), filenm),"w") do f
+            write(f, filecontent)
+        end
+        LibGit2.add!(repo, filenm)
+        return LibGit2.commit(repo, "add $filenm")
+    end
+    @testset "Fastforward merges" begin
+        LibGit2.with(setup_clone_repo(cache_repo, joinpath(dir, "Example.FF"))) do repo
+            # Sets up a branch "branch/ff_a" which will be two commits ahead
+            # of "master". It's possible to fast-forward merge "branch/ff_a"
+            # into "master", which is the default behavior.
+            oldhead = LibGit2.head_oid(repo)
+            LibGit2.branch!(repo, "branch/ff_a")
+            add_and_commit_file(repo, "ff_file1", "111\n")
+            add_and_commit_file(repo, "ff_file2", "222\n")
+            LibGit2.branch!(repo, "master")
+            # switch back, now try to ff-merge the changes
+            # from branch/a
+            # set up the merge using GitAnnotated objects
+            upst_ann = LibGit2.GitAnnotated(repo, "branch/ff_a")
+            head_ann = LibGit2.GitAnnotated(repo, "master")
+
+            # ff merge them
+            @test LibGit2.merge!(repo, [upst_ann], true)
+            @test LibGit2.is_ancestor_of(string(oldhead), string(LibGit2.head_oid(repo)), repo)
+
+            # Repeat the process, but specifying a commit to merge in as opposed
+            # to a branch name or GitAnnotated.
+            oldhead = LibGit2.head_oid(repo)
+            LibGit2.branch!(repo, "branch/ff_b")
+            add_and_commit_file(repo, "ff_file3", "333\n")
+            branchhead = add_and_commit_file(repo, "ff_file4", "444\n")
+            LibGit2.branch!(repo, "master")
+            # switch back, now try to ff-merge the changes
+            # from branch/a using committish
+            @test LibGit2.merge!(repo, committish=string(branchhead))
+            @test LibGit2.is_ancestor_of(string(oldhead), string(LibGit2.head_oid(repo)), repo)
+
+            # Repeat the process, but specifying a branch name to merge in as opposed
+            # to a commit or GitAnnotated.
+            oldhead = LibGit2.head_oid(repo)
+            LibGit2.branch!(repo, "branch/ff_c")
+            add_and_commit_file(repo, "ff_file5", "555\n")
+            branchhead = add_and_commit_file(repo, "ff_file6", "666\n")
+            LibGit2.branch!(repo, "master")
+            # switch back, now try to ff-merge the changes
+            # from branch/ff_c using branch name
+            @test LibGit2.merge!(repo, branch="refs/heads/branch/ff_c")
+            @test LibGit2.is_ancestor_of(string(oldhead), string(LibGit2.head_oid(repo)), repo)
+
+            LibGit2.branch!(repo, "branch/ff_d")
+            branchhead = add_and_commit_file(repo, "ff_file7", "777\n")
+            LibGit2.branch!(repo, "master")
+            # switch back, now try to ff-merge the changes
+            # from branch/a
+            # set up the merge using GitAnnotated objects
+            # from a fetchhead
+            fh = LibGit2.fetchheads(repo)
+            upst_ann = LibGit2.GitAnnotated(repo, fh[1])
+            @test LibGit2.merge!(repo, [upst_ann], true)
+            @test LibGit2.is_ancestor_of(string(oldhead), string(LibGit2.head_oid(repo)), repo)
+        end
+    end
+
+    @testset "Cherrypick" begin
+        LibGit2.with(setup_clone_repo(cache_repo, joinpath(dir, "Example.Cherrypick"))) do repo
+            # Create a commit on the new branch and cherry-pick it over to
+            # master. Since the cherry-pick does *not* make a new commit on
+            # master, we have to create our own commit of the dirty state.
+            oldhead = LibGit2.head_oid(repo)
+            LibGit2.branch!(repo, "branch/cherry_a")
+            cmt_oid = add_and_commit_file(repo, "file1", "111\n")
+            cmt = LibGit2.GitCommit(repo, cmt_oid)
+            # switch back, try to cherrypick
+            # from branch/cherry_a
+            LibGit2.branch!(repo, "master")
+            LibGit2.cherrypick(repo, cmt, options=LibGit2.CherrypickOptions())
+            cmt_oid2 = LibGit2.commit(repo, "add file1")
+            @test isempty(LibGit2.diff_files(repo, "master", "branch/cherry_a"))
+        end
+    end
+
+    @testset "Merges" begin
+        LibGit2.with(setup_clone_repo(cache_repo, joinpath(dir, "Example.Merge"))) do repo
+            oldhead = LibGit2.head_oid(repo)
+            LibGit2.branch!(repo, "branch/merge_a")
+            add_and_commit_file(repo, "file1", "111\n")
+            # switch back, add a commit, try to merge
+            # from branch/merge_a
+            LibGit2.branch!(repo, default_branch)
+
+            # test for showing a Reference to a non-HEAD branch
+            brref = LibGit2.GitReference(repo, "refs/heads/branch/merge_a")
+            @test LibGit2.name(brref) == "refs/heads/branch/merge_a"
+            @test !LibGit2.ishead(brref)
+            show_strs = split(sprint(show, brref), "\n")
+            @test show_strs[1] == "GitReference:"
+            @test show_strs[2] == "Branch with name refs/heads/branch/merge_a"
+            @test show_strs[3] == "Branch is not HEAD."
+
+            add_and_commit_file(repo, "file2", "222\n")
+            upst_ann = LibGit2.GitAnnotated(repo, "branch/merge_a")
+            head_ann = LibGit2.GitAnnotated(repo, default_branch)
+
+            # (fail to) merge them because we can't fastforward
+            @test_logs (:warn,"Cannot perform fast-forward merge") !LibGit2.merge!(repo, [upst_ann], true)
+            # merge them now that we allow non-ff
+            @test_logs (:info,"Review and commit merged changes") LibGit2.merge!(repo, [upst_ann], false)
+            @test LibGit2.is_ancestor_of(string(oldhead), string(LibGit2.head_oid(repo)), repo)
+
+            # go back to merge_a and rename a file
+            LibGit2.branch!(repo, "branch/merge_b")
+            mv(joinpath(LibGit2.path(repo),"file1"),joinpath(LibGit2.path(repo),"mvfile1"))
+            LibGit2.add!(repo, "mvfile1")
+            LibGit2.commit(repo, "move file1")
+            LibGit2.branch!(repo, default_branch)
+            upst_ann = LibGit2.GitAnnotated(repo, "branch/merge_b")
+            rename_flag = Cint(0)
+            rename_flag = LibGit2.toggle(rename_flag, Cint(0)) # turns on the find renames opt
+            mos = LibGit2.MergeOptions(flags=rename_flag)
+            @test_logs (:info,"Review and commit merged changes") LibGit2.merge!(repo, [upst_ann], merge_opts=mos)
+        end
+    end
+
+    @testset "push" begin
+        up_path = joinpath(dir, "Example.PushUp")
+        up_repo = setup_clone_repo(cache_repo, up_path)
+        our_repo = setup_clone_repo(cache_repo, joinpath(dir, "Example.Push"))
+        try
+            add_and_commit_file(our_repo, "file1", "111\n")
+            if LibGit2.version() >= v"0.26.0" # See #21872, #21639 and #21597
+                # we cannot yet locally push to non-bare repos
+                @test_throws LibGit2.GitError LibGit2.push(our_repo, remoteurl=up_path)
+            end
+        finally
+            close(our_repo)
+            close(up_repo)
+        end
+
+        @testset "credentials callback conflict" begin
+            callbacks = LibGit2.Callbacks(:credentials => (C_NULL, 0))
+            cred_payload = LibGit2.CredentialPayload()
+
+            LibGit2.with(LibGit2.GitRepo(joinpath(dir, "Example.Push"))) do repo
+                @test_throws ArgumentError LibGit2.push(repo, callbacks=callbacks, credentials=cred_payload)
+            end
+        end
+    end
+
+    @testset "Show closed repo" begin
+        # Make sure this doesn't crash
+        buf = IOBuffer()
+        Base.show(buf, LibGit2.with(identity, LibGit2.GitRepo(test_repo)))
+        @test String(take!(buf)) == "LibGit2.GitRepo(<closed>)"
+    end
+
+    @testset "Fetch from cache repository" begin
+        LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
+            # fetch changes
+            @test LibGit2.fetch(repo) == 0
+            @test !isfile(joinpath(test_repo, test_file))
+
+            # ff merge them
+            @test LibGit2.merge!(repo, fastforward=true)
+
+            # because there was not any file we need to reset branch
+            head_oid = LibGit2.head_oid(repo)
+            new_head = LibGit2.reset!(repo, head_oid, LibGit2.Consts.RESET_HARD)
+            @test isfile(joinpath(test_repo, test_file))
+            @test new_head == head_oid
+
+            # GitAnnotated for a fetchhead
+            fh_ann = LibGit2.GitAnnotated(repo, LibGit2.Consts.FETCH_HEAD)
+            @test LibGit2.GitHash(fh_ann) == head_oid
+
+            # Detach HEAD - no merge
+            LibGit2.checkout!(repo, string(commit_oid3))
+            @test_throws LibGit2.Error.GitError LibGit2.merge!(repo, fastforward=true)
+
+            # Switch to a branch without remote - no merge
+            LibGit2.branch!(repo, test_branch)
+            @test_throws LibGit2.Error.GitError LibGit2.merge!(repo, fastforward=true)
+
+            # Set the username and email for the test_repo (needed for rebase)
+            cfg = LibGit2.GitConfig(repo)
+            LibGit2.set!(cfg, "user.name", "AAAA")
+            LibGit2.set!(cfg, "user.email", "BBBB@BBBB.COM")
+
+            # If upstream argument is empty, libgit2 will look for tracking
+            # information. If the current branch isn't tracking any upstream
+            # the rebase should fail.
+            @test_throws LibGit2.GitError LibGit2.rebase!(repo)
+            # Try rebasing on master instead
+            newhead = LibGit2.rebase!(repo, default_branch)
+            @test newhead == head_oid
+
+            # Switch to the master branch
+            LibGit2.branch!(repo, default_branch)
+
+            fetch_heads = LibGit2.fetchheads(repo)
+            @test fetch_heads[1].name == "refs/heads/$(default_branch)"
+            @test fetch_heads[1].ismerge == true # we just merged master
+            @test fetch_heads[2].name == "refs/heads/test_branch"
+            @test fetch_heads[2].ismerge == false
+            @test fetch_heads[3].name == "refs/tags/tag2"
+            @test fetch_heads[3].ismerge == false
+            for fh in fetch_heads
+                @test fh.url == cache_repo
+                fh_strs = split(sprint(show, fh), '\n')
+                @test fh_strs[1] == "FetchHead:"
+                @test fh_strs[2] == "Name: $(fh.name)"
+                @test fh_strs[3] == "URL: $(fh.url)"
+                @test fh_strs[5] == "Merged: $(fh.ismerge)"
+            end
+        end
+
+        @testset "credentials callback conflict" begin
+            callbacks = LibGit2.Callbacks(:credentials => (C_NULL, 0))
+            cred_payload = LibGit2.CredentialPayload()
+
+            LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
+                @test_throws ArgumentError LibGit2.fetch(repo, callbacks=callbacks, credentials=cred_payload)
+            end
+        end
+    end
+
+    @testset "Examine test repository" begin
+        @testset "files" begin
+            @test readlines(joinpath(test_repo, test_file)) == readlines(joinpath(cache_repo, test_file))
+        end
+
+        @testset "tags & branches" begin
+            LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
+                # all tag in place
+                tags = LibGit2.tag_list(repo)
+                @test length(tags) == 1
+                @test tag2 in tags
+
+                # all tag in place
+                branches = map(b->LibGit2.shortname(b[1]), LibGit2.GitBranchIter(repo))
+                @test default_branch in branches
+                @test test_branch in branches
+
+                # issue #16337
+                LibGit2.with(LibGit2.GitReference(repo, "refs/tags/$tag2")) do tag2ref
+                    @test_throws LibGit2.Error.GitError LibGit2.upstream(tag2ref)
+                end
+            end
+        end
+
+        @testset "commits with revwalk" begin
+            repo = LibGit2.GitRepo(test_repo)
+            cache = LibGit2.GitRepo(cache_repo)
+            try
+                # test map with oid
+                oids = LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
+                    LibGit2.map((oid,repo)->(oid,repo), walker, oid=commit_oid1, by=LibGit2.Consts.SORT_TIME)
+                end
+                @test length(oids) == 1
+                # test map with range
+                str_1 = string(commit_oid1)
+                str_3 = string(commit_oid3)
+                oids = LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
+                    LibGit2.map((oid,repo)->(oid,repo), walker, range="$str_1..$str_3", by=LibGit2.Consts.SORT_TIME)
+                end
+                @test length(oids) == 1
+
+                test_oids = LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
+                    LibGit2.map((oid,repo)->string(oid), walker, by = LibGit2.Consts.SORT_TIME)
+                end
+                cache_oids = LibGit2.with(LibGit2.GitRevWalker(cache)) do walker
+                    LibGit2.map((oid,repo)->string(oid), walker, by = LibGit2.Consts.SORT_TIME)
+                end
+                for i in eachindex(oids)
+                    @test cache_oids[i] == test_oids[i]
+                end
+                # test with specified oid
+                LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
+                    @test LibGit2.count((oid,repo)->(oid == commit_oid1), walker, oid=commit_oid1, by=LibGit2.Consts.SORT_TIME) == 1
+                end
+                # test without specified oid
+                LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
+                    @test LibGit2.count((oid,repo)->(oid == commit_oid1), walker, by=LibGit2.Consts.SORT_TIME) == 1
+                end
+            finally
+                close(repo)
+                close(cache)
+            end
+        end
+    end
+
+    @testset "Modify and reset repository" begin
+        LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
+            # check index for file
+            LibGit2.with(LibGit2.GitIndex(repo)) do idx
+                i = findall(test_file, idx)
+                @test i !== nothing
+                idx_entry = idx[i]
+                @test idx_entry !== nothing
+                idx_entry_str = sprint(show, idx_entry)
+                @test idx_entry_str == "IndexEntry($(string(idx_entry.id)))"
+                @test LibGit2.stage(idx_entry) == 0
+
+                i = findall("zzz", idx)
+                @test i === nothing
+                idx_str = sprint(show, idx)
+                @test idx_str == "GitIndex:\nRepository: $(LibGit2.repository(idx))\nNumber of elements: 1\n"
+
+                LibGit2.remove!(repo, test_file)
+                LibGit2.read!(repo)
+                @test LibGit2.count(idx) == 0
+                LibGit2.add!(repo, test_file)
+                LibGit2.update!(repo, test_file)
+                @test LibGit2.count(idx) == 1
+            end
+
+            # check non-existent file status
+            st = LibGit2.status(repo, "XYZ")
+            @test st === nothing
+
+            # check file status
+            st = LibGit2.status(repo, test_file)
+            @test st !== nothing
+            @test LibGit2.isset(st, LibGit2.Consts.STATUS_CURRENT)
+
+            # modify file
+            open(joinpath(test_repo, test_file), "a") do io
+                write(io, 0x41)
+            end
+
+            # file modified but not staged
+            st_mod = LibGit2.status(repo, test_file)
+            @test !LibGit2.isset(st_mod, LibGit2.Consts.STATUS_INDEX_MODIFIED)
+            @test LibGit2.isset(st_mod, LibGit2.Consts.STATUS_WT_MODIFIED)
+
+            # stage file
+            LibGit2.add!(repo, test_file)
+
+            # modified file staged
+            st_stg = LibGit2.status(repo, test_file)
+            @test LibGit2.isset(st_stg, LibGit2.Consts.STATUS_INDEX_MODIFIED)
+            @test !LibGit2.isset(st_stg, LibGit2.Consts.STATUS_WT_MODIFIED)
+
+            # try to unstage to unknown commit
+            @test_throws LibGit2.Error.GitError LibGit2.reset!(repo, "XYZ", test_file)
+
+            # status should not change
+            st_new = LibGit2.status(repo, test_file)
+            @test st_new == st_stg
+
+            # try to unstage to HEAD
+            new_head = LibGit2.reset!(repo, LibGit2.Consts.HEAD_FILE, test_file)
+            st_uns = LibGit2.status(repo, test_file)
+            @test st_uns == st_mod
+
+            # reset repo
+            @test_throws LibGit2.Error.GitError LibGit2.reset!(repo, LibGit2.GitHash(), LibGit2.Consts.RESET_HARD)
+
+            new_head = LibGit2.reset!(repo, LibGit2.head_oid(repo), LibGit2.Consts.RESET_HARD)
+            open(joinpath(test_repo, test_file), "r") do io
+                @test read(io)[end] != 0x41
+            end
+        end
+    end
+
+    @testset "Modify remote" begin
+        path = test_repo
+        LibGit2.with(LibGit2.GitRepo(path)) do repo
+            remote_name = "test"
+            url = "https://test.com/repo"
+
+            @test LibGit2.lookup_remote(repo, remote_name) === nothing
+
+            for r in (repo, path)
+                # Set just the fetch URL
+                LibGit2.set_remote_fetch_url(r, remote_name, url)
+                remote = LibGit2.lookup_remote(repo, remote_name)
+                @test LibGit2.name(remote) == remote_name
+                @test LibGit2.url(remote) == url
+                @test LibGit2.push_url(remote) == ""
+
+                LibGit2.remote_delete(repo, remote_name)
+                @test LibGit2.lookup_remote(repo, remote_name) === nothing
+
+                # Set just the push URL
+                LibGit2.set_remote_push_url(r, remote_name, url)
+                remote = LibGit2.lookup_remote(repo, remote_name)
+                @test LibGit2.name(remote) == remote_name
+                @test LibGit2.url(remote) == ""
+                @test LibGit2.push_url(remote) == url
+
+                LibGit2.remote_delete(repo, remote_name)
+                @test LibGit2.lookup_remote(repo, remote_name) === nothing
+
+                # Set the fetch and push URL
+                LibGit2.set_remote_url(r, remote_name, url)
+                remote = LibGit2.lookup_remote(repo, remote_name)
+                @test LibGit2.name(remote) == remote_name
+                @test LibGit2.url(remote) ==  url
+                @test LibGit2.push_url(remote) == url
+
+                LibGit2.remote_delete(repo, remote_name)
+                @test LibGit2.lookup_remote(repo, remote_name) === nothing
+            end
+            # Invalid remote name
+            @test_throws LibGit2.GitError LibGit2.set_remote_url(repo, "", url)
+            @test_throws LibGit2.GitError LibGit2.set_remote_url(repo, remote_name, "")
+        end
+    end
+
+    @testset "rebase" begin
+        LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
+            LibGit2.branch!(repo, "branch/a")
+
+            oldhead = LibGit2.head_oid(repo)
+            add_and_commit_file(repo, "file1", "111\n")
+            add_and_commit_file(repo, "file2", "222\n")
+            LibGit2.branch!(repo, "branch/b")
+
+            # squash last 2 commits
+            new_head = LibGit2.reset!(repo, oldhead, LibGit2.Consts.RESET_SOFT)
+            @test new_head == oldhead
+            LibGit2.commit(repo, "squash file1 and file2")
+
+            # add another file
+            newhead = add_and_commit_file(repo, "file3", "333\n")
+            @test LibGit2.diff_files(repo, "branch/a", "branch/b", filter=Set([LibGit2.Consts.DELTA_ADDED])) == ["file3"]
+            @test LibGit2.diff_files(repo, "branch/a", "branch/b", filter=Set([LibGit2.Consts.DELTA_MODIFIED])) == []
+            # switch back and rebase
+            LibGit2.branch!(repo, "branch/a")
+            newnewhead = LibGit2.rebase!(repo, "branch/b")
+
+            # issue #19624
+            @test newnewhead == newhead
+
+            # add yet another file
+            add_and_commit_file(repo, "file4", "444\n")
+            # rebase with onto
+            newhead = LibGit2.rebase!(repo, "branch/a", default_branch)
+
+            newerhead = LibGit2.head_oid(repo)
+            @test newerhead == newhead
+
+            # add yet more files
+            add_and_commit_file(repo, "file5", "555\n")
+            pre_abort_head = add_and_commit_file(repo, "file6", "666\n")
+            # Rebase type
+            head_ann = LibGit2.GitAnnotated(repo, "branch/a")
+            upst_ann = LibGit2.GitAnnotated(repo, default_branch)
+            rb = LibGit2.GitRebase(repo, head_ann, upst_ann)
+            @test_throws BoundsError rb[3]
+            @test_throws BoundsError rb[0]
+            rbo, _ = iterate(rb)
+            rbo_str = sprint(show, rbo)
+            @test rbo_str == "RebaseOperation($(string(rbo.id)))\nOperation type: REBASE_OPERATION_PICK\n"
+            rb_str = sprint(show, rb)
+            @test rb_str == "GitRebase:\nNumber: 2\nCurrently performing operation: 1\n"
+            rbo = rb[2]
+            rbo_str = sprint(show, rbo)
+            @test rbo_str == "RebaseOperation($(string(rbo.id)))\nOperation type: REBASE_OPERATION_PICK\n"
+
+            # test rebase abort
+            LibGit2.abort(rb)
+            @test LibGit2.head_oid(repo) == pre_abort_head
+        end
+    end
+
+    @testset "merge" begin
+        LibGit2.with(setup_clone_repo(cache_repo, joinpath(dir, "Example.simple_merge"))) do repo
+            LibGit2.branch!(repo, "branch/merge_a")
+
+            a_head = LibGit2.head_oid(repo)
+            add_and_commit_file(repo, "merge_file1", "111\n")
+            LibGit2.branch!(repo, default_branch)
+            a_head_ann = LibGit2.GitAnnotated(repo, "branch/merge_a")
+            # merge returns true if successful
+            @test_logs (:info,"Review and commit merged changes") LibGit2.merge!(repo, [a_head_ann])
+        end
+    end
+
+    @testset "Transact test repository" begin
+        LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
+            cp(joinpath(test_repo, test_file), joinpath(test_repo, "CCC"))
+            cp(joinpath(test_repo, test_file), joinpath(test_repo, "AAA"))
+            LibGit2.add!(repo, "AAA")
+            @test_throws ErrorException LibGit2.transact(repo) do trepo
+                mv(joinpath(test_repo, test_file), joinpath(test_repo, "BBB"))
+                LibGit2.add!(trepo, "BBB")
+                oid = LibGit2.commit(trepo, "test commit"; author=test_sig, committer=test_sig)
+                error("Force recovery")
+            end
+            @test isfile(joinpath(test_repo, "AAA"))
+            @test isfile(joinpath(test_repo, "CCC"))
+            @test !isfile(joinpath(test_repo, "BBB"))
+            @test isfile(joinpath(test_repo, test_file))
+        end
+    end
+
+    @testset "checkout_head" begin
+        LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
+            # modify file
+            repo_file = open(joinpath(cache_repo,test_file), "a")
+            println(repo_file, commit_msg1 * randstring(10))
+            close(repo_file)
+            # and checkout HEAD once more
+            LibGit2.checkout_head(repo, options=LibGit2.CheckoutOptions(checkout_strategy=LibGit2.Consts.CHECKOUT_FORCE))
+            @test LibGit2.headname(repo) == default_branch
+            @test !LibGit2.isdirty(repo)
+        end
+    end
+
+    @testset "checkout/headname" begin
+        LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
+            LibGit2.checkout!(repo, string(commit_oid1))
+            @test !LibGit2.isattached(repo)
+            @test LibGit2.headname(repo) == "(detached from $(string(commit_oid1)[1:7]))"
+        end
+    end
+
+    if Sys.isunix()
+        @testset "checkout/proptest" begin
+            LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
+                cp(joinpath(test_repo, test_file), joinpath(test_repo, "proptest"))
+                LibGit2.add!(repo, "proptest")
+                id1 = LibGit2.commit(repo, "test property change 1")
+                # change in file permissions (#17610)
+                chmod(joinpath(test_repo, "proptest"),0o744)
+                LibGit2.add!(repo, "proptest")
+                id2 = LibGit2.commit(repo, "test property change 2")
+                LibGit2.checkout!(repo, string(id1))
+                @test !LibGit2.isdirty(repo)
+                # change file to symlink (#18420)
+                mv(joinpath(test_repo, "proptest"), joinpath(test_repo, "proptest2"))
+                symlink(joinpath(test_repo, "proptest2"), joinpath(test_repo, "proptest"))
+                LibGit2.add!(repo, "proptest", "proptest2")
+                id3 = LibGit2.commit(repo, "test symlink change")
+                LibGit2.checkout!(repo, string(id1))
+                @test !LibGit2.isdirty(repo)
+            end
+        end
+    end
+
+
+    @testset "Credentials" begin
+        creds_user = "USER"
+        creds_pass = Base.SecretBuffer("PASS")
+        creds = LibGit2.UserPasswordCredential(creds_user, creds_pass)
+        @test creds.user == creds_user
+        @test creds.pass == creds_pass
+        creds2 = LibGit2.UserPasswordCredential(creds_user, creds_pass)
+        @test creds == creds2
+
+        sshcreds = LibGit2.SSHCredential(creds_user, creds_pass)
+        @test sshcreds.user == creds_user
+        @test sshcreds.pass == creds_pass
+        @test sshcreds.prvkey == ""
+        @test sshcreds.pubkey == ""
+        sshcreds2 = LibGit2.SSHCredential(creds_user, creds_pass)
+        @test sshcreds == sshcreds2
+
+        Base.shred!(creds)
+        Base.shred!(creds2)
+        Base.shred!(sshcreds)
+        Base.shred!(sshcreds2)
+        Base.shred!(creds_pass)
+    end
+
+    @testset "CachedCredentials" begin
+        cache = LibGit2.CachedCredentials()
+
+        url = "https://github.com/JuliaLang/Example.jl"
+        cred_id = LibGit2.credential_identifier(url)
+        cred = LibGit2.UserPasswordCredential("julia", "password")
+
+        @test !haskey(cache, cred_id)
+        password = Base.SecretBuffer("password")
+
+        # Attempt to reject a credential which wasn't stored
+        LibGit2.reject(cache, cred, url)
+        @test !haskey(cache, cred_id)
+        @test cred.user == "julia"
+        @test cred.pass == password
+
+        # Approve a credential which causes it to be stored
+        LibGit2.approve(cache, cred, url)
+        @test haskey(cache, cred_id)
+        @test cache[cred_id] === cred
+
+        # Approve the same credential again which does not overwrite
+        LibGit2.approve(cache, cred, url)
+        @test haskey(cache, cred_id)
+        @test cache[cred_id] === cred
+
+        # Overwrite an already cached credential
+        dup_cred = deepcopy(cred)
+        LibGit2.approve(cache, dup_cred, url)  # Shreds overwritten `cred`
+        @test haskey(cache, cred_id)
+        @test cache[cred_id] === dup_cred
+        @test cred.user != "julia"
+        @test cred.pass != password
+        @test dup_cred.user == "julia"
+        @test dup_cred.pass == password
+
+        cred = dup_cred
+
+        # Reject an approved credential
+        @test cache[cred_id] === cred
+        LibGit2.reject(cache, cred, url)  # Avoids shredding the credential passed in
+        @test !haskey(cache, cred_id)
+        @test cred.user == "julia"
+        @test cred.pass == password
+
+        # Reject and shred an approved credential
+        dup_cred = deepcopy(cred)
+        LibGit2.approve(cache, cred, url)
+
+        LibGit2.reject(cache, dup_cred, url)  # Shred `cred` but not passed in `dup_cred`
+        @test !haskey(cache, cred_id)
+        @test cred.user != "julia"
+        @test cred.pass != password
+        @test dup_cred.user == "julia"
+        @test dup_cred.pass == password
+
+        Base.shred!(dup_cred)
+        Base.shred!(cache)
+        Base.shred!(password)
+    end
+
+    @testset "Git credential username" begin
+        @testset "fill username" begin
+            config_path = joinpath(dir, config_file)
+            isfile(config_path) && rm(config_path)
+
+            LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
+                # No credential settings should be set for these tests
+                @test isempty(collect(LibGit2.GitConfigIter(cfg, r"credential.*")))
+
+                github_cred = LibGit2.GitCredential("https", "github.com")
+                mygit_cred = LibGit2.GitCredential("https", "mygithost")
+
+                # No credential settings in configuration.
+                username = LibGit2.default_username(cfg, github_cred)
+                @test username === nothing
+
+                # Add a credential setting for a specific for a URL
+                LibGit2.set!(cfg, "credential.https://github.com.username", "foo")
+
+                username = LibGit2.default_username(cfg, github_cred)
+                @test username == "foo"
+
+                username = LibGit2.default_username(cfg, mygit_cred)
+                @test username === nothing
+
+                # Add a global credential setting after the URL specific setting. The first
+                # setting to match will be the one that is used.
+                LibGit2.set!(cfg, "credential.username", "bar")
+
+                username = LibGit2.default_username(cfg, github_cred)
+                @test username == "foo"
+
+                username = LibGit2.default_username(cfg, mygit_cred)
+                @test username == "bar"
+
+                Base.shred!(github_cred)
+                Base.shred!(mygit_cred)
+            end
+        end
+
+        @testset "empty username" begin
+            config_path = joinpath(dir, config_file)
+            isfile(config_path) && rm(config_path)
+
+            LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
+                # No credential settings should be set for these tests
+                @test isempty(collect(LibGit2.GitConfigIter(cfg, r"credential.*")))
+
+                # An empty username should count as being set
+                LibGit2.set!(cfg, "credential.https://github.com.username", "")
+                LibGit2.set!(cfg, "credential.username", "name")
+
+                github_cred = LibGit2.GitCredential("https", "github.com")
+                mygit_cred = LibGit2.GitCredential("https", "mygithost", "path")
+
+                username = LibGit2.default_username(cfg, github_cred)
+                @test username == ""
+
+                username = LibGit2.default_username(cfg, mygit_cred)
+                @test username == "name"
+
+                Base.shred!(github_cred)
+                Base.shred!(mygit_cred)
+            end
+        end
+    end
+
+    @testset "Git helpers useHttpPath" begin
+        @testset "use_http_path" begin
+            config_path = joinpath(dir, config_file)
+            isfile(config_path) && rm(config_path)
+
+            LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
+                # No credential settings should be set for these tests
+                @test isempty(collect(LibGit2.GitConfigIter(cfg, r"credential.*")))
+
+                github_cred = LibGit2.GitCredential("https", "github.com")
+                mygit_cred = LibGit2.GitCredential("https", "mygithost")
+
+                # No credential settings in configuration.
+                @test !LibGit2.use_http_path(cfg, github_cred)
+                @test !LibGit2.use_http_path(cfg, mygit_cred)
+
+                # Add a credential setting for a specific for a URL
+                LibGit2.set!(cfg, "credential.https://github.com.useHttpPath", "true")
+
+                @test LibGit2.use_http_path(cfg, github_cred)
+                @test !LibGit2.use_http_path(cfg, mygit_cred)
+
+                # Invert the current settings.
+                LibGit2.set!(cfg, "credential.useHttpPath", "true")
+                LibGit2.set!(cfg, "credential.https://github.com.useHttpPath", "false")
+
+                @test !LibGit2.use_http_path(cfg, github_cred)
+                @test LibGit2.use_http_path(cfg, mygit_cred)
+
+                Base.shred!(github_cred)
+                Base.shred!(mygit_cred)
+            end
+        end
+    end
+
+    @testset "GitCredentialHelper" begin
+        GitCredentialHelper = LibGit2.GitCredentialHelper
+        GitCredential = LibGit2.GitCredential
+
+        @testset "parse" begin
+            @test parse(GitCredentialHelper, "!echo hello") == GitCredentialHelper(`echo hello`)
+            @test parse(GitCredentialHelper, "/bin/bash") == GitCredentialHelper(`/bin/bash`)
+            @test parse(GitCredentialHelper, "store") == GitCredentialHelper(`git credential-store`)
+        end
+
+        @testset "credential_helpers" begin
+            config_path = joinpath(dir, config_file)
+
+            # Note: LibGit2.set! doesn't allow us to set duplicates or ordering
+            open(config_path, "w+") do fp
+                write(fp, """
+                    [credential]
+                        helper = !echo first
+                    [credential "https://mygithost"]
+                        helper = ""
+                    [credential]
+                        helper = !echo second
+                    """)
+                # Git for Windows uses this config (see issue #45693)
+                write(fp,"""
+                    [credential "helperselector"]
+                        selected = manager-core
+                    """)
+            end
+
+            LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
+                expected = [
+                    GitCredentialHelper(`echo first`),
+                    GitCredentialHelper(`echo second`),
+                ]
+
+                github_cred = GitCredential("https", "github.com")
+                mygit_cred = GitCredential("https", "mygithost")
+
+                @test LibGit2.credential_helpers(cfg, github_cred) == expected
+                @test LibGit2.credential_helpers(cfg, mygit_cred) == expected[2:2]
+
+                Base.shred!(github_cred)
+                Base.shred!(mygit_cred)
+            end
+        end
+
+        @testset "approve/reject" begin
+            # In order to use the "store" credential helper `git` needs to be installed and
+            # on the path.
+            if GIT_INSTALLED
+                credential_path = joinpath(dir, ".git-credentials")
+                isfile(credential_path) && rm(credential_path)
+
+                # Requires `git` to be installed and available on the path.
+                helper = parse(LibGit2.GitCredentialHelper, "store")
+
+                # Set HOME to control where the .git-credentials file is written.
+                # Note: In Cygwin environments `git` will use HOME instead of USERPROFILE.
+                # Setting both environment variables ensures home was overridden.
+                withenv("HOME" => dir, "USERPROFILE" => dir) do
+                    query = LibGit2.GitCredential("https", "mygithost")
+                    filled = LibGit2.GitCredential("https", "mygithost", nothing, "bob", "s3cre7")
+
+                    @test !isfile(credential_path)
+
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == query
+                    end
+
+                    LibGit2.approve(helper, filled)
+                    @test isfile(credential_path)
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == filled
+                    end
+
+                    LibGit2.reject(helper, filled)
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == query
+                    end
+
+                    Base.shred!(query)
+                    Base.shred!(filled)
+                end
+            end
+        end
+
+        @testset "approve/reject with path" begin
+            # In order to use the "store" credential helper `git` needs to be installed and
+            # on the path.
+            if GIT_INSTALLED
+                credential_path = joinpath(dir, ".git-credentials")
+                isfile(credential_path) && rm(credential_path)
+
+                # Requires `git` to be installed and available on the path.
+                helper = parse(LibGit2.GitCredentialHelper, "store")
+
+                # Set HOME to control where the .git-credentials file is written.
+                # Note: In Cygwin environments `git` will use HOME instead of USERPROFILE.
+                # Setting both environment variables ensures home was overridden.
+                withenv("HOME" => dir, "USERPROFILE" => dir) do
+                    query = LibGit2.GitCredential("https", "mygithost")
+                    query_a = LibGit2.GitCredential("https", "mygithost", "a")
+                    query_b = LibGit2.GitCredential("https", "mygithost", "b")
+
+                    filled_a = LibGit2.GitCredential("https", "mygithost", "a", "alice", "1234")
+                    filled_b = LibGit2.GitCredential("https", "mygithost", "b", "bob", "s3cre7")
+
+                    function without_path(cred)
+                        c = deepcopy(cred)
+                        c.path = nothing
+                        c
+                    end
+
+                    filled_without_path_a = without_path(filled_a)
+                    filled_without_path_b = without_path(filled_b)
+
+                    @test !isfile(credential_path)
+
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == query
+                    end
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_a))) do result
+                        @test result == query_a
+                    end
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_b))) do result
+                        @test result == query_b
+                    end
+
+                    LibGit2.approve(helper, filled_a)
+                    @test isfile(credential_path)
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == filled_without_path_a
+                    end
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_a))) do result
+                        @test result == filled_a
+                    end
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_b))) do result
+                        @test result == query_b
+                    end
+
+                    LibGit2.approve(helper, filled_b)
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == filled_without_path_b
+                    end
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_a))) do result
+                        @test result == filled_a
+                    end
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_b))) do result
+                        @test result == filled_b
+                    end
+
+                    LibGit2.reject(helper, filled_b)
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == filled_without_path_a
+                    end
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_a))) do result
+                        @test result == filled_a
+                    end
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_b))) do result
+                        @test result == query_b
+                    end
+
+                    Base.shred!(query)
+                    Base.shred!(query_a)
+                    Base.shred!(query_b)
+                    Base.shred!(filled_a)
+                    Base.shred!(filled_b)
+                    Base.shred!(filled_without_path_a)
+                    Base.shred!(filled_without_path_b)
+                end
+            end
+        end
+
+        @testset "approve/reject with UserPasswordCredential" begin
+            # In order to use the "store" credential helper `git` needs to be installed and
+            # on the path.
+            if GIT_INSTALLED
+                config_path = joinpath(dir, config_file)
+                isfile(config_path) && rm(config_path)
+
+                credential_path = joinpath(dir, ".git-credentials")
+                isfile(credential_path) && rm(credential_path)
+
+                LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
+                    query = LibGit2.GitCredential("https", "mygithost")
+                    filled = LibGit2.GitCredential("https", "mygithost", nothing, "alice", "1234")
+                    user_pass_cred = LibGit2.UserPasswordCredential("alice", "1234")
+                    url = "https://mygithost"
+
+                    # Requires `git` to be installed and available on the path.
+                    LibGit2.set!(cfg, "credential.helper", "store --file \"$credential_path\"")
+                    helper = only(LibGit2.credential_helpers(cfg, query))
+
+                    @test !isfile(credential_path)
+
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == query
+                    end
+
+                    LibGit2.approve(cfg, user_pass_cred, url)
+                    @test isfile(credential_path)
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == filled
+                    end
+
+                    LibGit2.reject(cfg, user_pass_cred, url)
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == query
+                    end
+
+                    Base.shred!(query)
+                    Base.shred!(filled)
+                    Base.shred!(user_pass_cred)
+                end
+            end
+        end
+    end
+
+    # The following tests require that we can fake a TTY so that we can provide passwords
+    # which use the `getpass` function. At the moment we can only fake this on UNIX based
+    # systems.
+    if Sys.isunix()
+        git_ok = LibGit2.GitError(
+            LibGit2.Error.None, LibGit2.Error.GIT_OK,
+            "No errors")
+
+        abort_prompt = LibGit2.GitError(
+            LibGit2.Error.Callback, LibGit2.Error.EUSER,
+            "Aborting, user cancelled credential request.")
+
+        prompt_limit = LibGit2.GitError(
+            LibGit2.Error.Callback, LibGit2.Error.EAUTH,
+            "Aborting, maximum number of prompts reached.")
+
+        incompatible_error = LibGit2.GitError(
+            LibGit2.Error.Callback, LibGit2.Error.EAUTH,
+            "The explicitly provided credential is incompatible with the requested " *
+            "authentication methods.")
+
+        exhausted_error = LibGit2.GitError(
+            LibGit2.Error.Callback, LibGit2.Error.EAUTH,
+            "All authentication methods have failed.")
+
+        @testset "SSH credential prompt" begin
+            url = "git@github.com:test/package.jl"
+            username = "git"
+
+            valid_key = joinpath(KEY_DIR, "valid")
+            valid_cred = LibGit2.SSHCredential(username, "", valid_key, valid_key * ".pub")
+
+            valid_p_key = joinpath(KEY_DIR, "valid-passphrase")
+            passphrase = "secret"
+            valid_p_cred = LibGit2.SSHCredential(username, passphrase, valid_p_key, valid_p_key * ".pub")
+
+            invalid_key = joinpath(KEY_DIR, "invalid")
+
+            function gen_ex(cred; username="git")
+                url = username !== nothing && !isempty(username) ? "$username@" : ""
+                url *= "github.com:test/package.jl"
+                quote
+                    include($LIBGIT2_HELPER_PATH)
+                    credential_loop($cred, $url, $username)
+                end
+            end
+
+            ssh_ex = gen_ex(valid_cred)
+            ssh_p_ex = gen_ex(valid_p_cred)
+            ssh_u_ex = gen_ex(valid_cred, username=nothing)
+
+            # Note: We cannot use the default ~/.ssh/id_rsa for tests since we cannot be
+            # sure a users will actually have these files. Instead we will use the ENV
+            # variables to set the default values.
+
+            # ENV credentials are valid
+            withenv("SSH_KEY_PATH" => valid_key) do
+                err, auth_attempts, p = challenge_prompt(ssh_ex, [])
+                @test err == git_ok
+                @test auth_attempts == 1
+            end
+
+            # ENV credentials are valid but requires a passphrase
+            withenv("SSH_KEY_PATH" => valid_p_key) do
+                challenges = [
+                    "Passphrase for $valid_p_key: " => "$passphrase\n",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_p_ex, challenges)
+                @test err == git_ok
+                @test auth_attempts == 1
+
+                # User mistypes passphrase.
+                # Note: In reality LibGit2 will raise an error upon using the invalid SSH
+                # credentials. Since we don't control the internals of LibGit2 though they
+                # could also just re-call the credential callback like they do for HTTP.
+                challenges = [
+                    "Passphrase for $valid_p_key: " => "foo\n",
+                    "Private key location for 'git@github.com' [$valid_p_key]: " => "\n",
+                    "Passphrase for $valid_p_key: " => "$passphrase\n",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_p_ex, challenges)
+                @test err == git_ok
+                @test auth_attempts == 2
+
+                # User sends EOF in passphrase prompt which aborts the credential request
+                challenges = [
+                    "Passphrase for $valid_p_key: " => "\x04",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_p_ex, challenges)
+                @test err == abort_prompt
+                @test auth_attempts == 1
+
+                # User provides an empty passphrase
+                challenges = [
+                    "Passphrase for $valid_p_key: " => "\n",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_p_ex, challenges)
+                @test err == abort_prompt
+                @test auth_attempts == 1
+            end
+
+            # ENV credential requiring passphrase
+            withenv("SSH_KEY_PATH" => valid_p_key, "SSH_KEY_PASS" => passphrase) do
+                err, auth_attempts, p = challenge_prompt(ssh_p_ex, [])
+                @test err == git_ok
+                @test auth_attempts == 1
+            end
+
+            # Missing username
+            withenv("SSH_KEY_PATH" => valid_key) do
+                # User provides a valid username
+                challenges = [
+                    "Username for 'github.com': " => "$username\n",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_u_ex, challenges)
+                @test err == git_ok
+                @test auth_attempts == 1
+
+                # User sends EOF in username prompt which aborts the credential request
+                challenges = [
+                    "Username for 'github.com': " => "\x04",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_u_ex, challenges)
+                @test err == abort_prompt
+                @test auth_attempts == 1
+
+                # User provides an empty username
+                challenges = [
+                    "Username for 'github.com': " => "\n",
+                    "Username for 'github.com': " => "\x04",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_u_ex, challenges)
+                @test err == abort_prompt
+                @test auth_attempts == 2
+
+                # User repeatedly chooses an invalid username
+                challenges = [
+                    "Username for 'github.com': " => "foo\n",
+                    "Username for 'github.com' [foo]: " => "\n",
+                    "Private key location for 'foo@github.com' [$valid_key]: " => "\n",
+                    "Username for 'github.com' [foo]: " => "\x04",  # Need to manually abort
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_u_ex, challenges)
+                @test err == abort_prompt
+                @test auth_attempts == 3
+
+                # Credential callback is given an empty string in the `username_ptr`
+                # instead of the C_NULL in the other missing username tests.
+                ssh_user_empty_ex = gen_ex(valid_cred, username="")
+                challenges = [
+                    "Username for 'github.com': " => "$username\n",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_user_empty_ex, challenges)
+                @test err == git_ok
+                @test auth_attempts == 1
+            end
+
+            # Explicitly setting these env variables to be empty means the user will be
+            # given a prompt with no defaults set.
+            withenv("SSH_KEY_PATH" => nothing,
+                    "SSH_PUB_KEY_PATH" => nothing,
+                    "SSH_KEY_PASS" => nothing,
+                    HOME => dir) do
+
+                # Set the USERPROFILE / HOME above to be a directory that does not contain
+                # the "~/.ssh/id_rsa" file. If this file exists the credential callback
+                # will default to use this private key instead of triggering a prompt.
+                @test !isfile(joinpath(homedir(), ".ssh", "id_rsa"))
+
+                # User provides valid credentials
+                challenges = [
+                    "Private key location for 'git@github.com': " => "$valid_key\n",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
+                @test err == git_ok
+                @test auth_attempts == 1
+
+                # User provides valid credentials that requires a passphrase
+                challenges = [
+                    "Private key location for 'git@github.com': " => "$valid_p_key\n",
+                    "Passphrase for $valid_p_key: " => "$passphrase\n",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_p_ex, challenges)
+                @test err == git_ok
+                @test auth_attempts == 1
+
+                # User sends EOF in private key prompt which aborts the credential request
+                challenges = [
+                    "Private key location for 'git@github.com': " => "\x04",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
+                @test err == abort_prompt
+                @test auth_attempts == 1
+
+                # User provides an empty private key which triggers a re-prompt
+                challenges = [
+                    "Private key location for 'git@github.com': " => "\n",
+                    "Private key location for 'git@github.com': " => "\x04",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
+                @test err == abort_prompt
+                @test auth_attempts == 2
+
+                # User provides an invalid private key until prompt limit reached.
+                # Note: the prompt should not supply an invalid default.
+                challenges = [
+                    "Private key location for 'git@github.com': " => "foo\n",
+                    "Private key location for 'git@github.com' [foo]: " => "foo\n",
+                    "Private key location for 'git@github.com' [foo]: " => "foo\n",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
+                @test err == prompt_limit
+                @test auth_attempts == 3
+            end
+
+            # Explicitly setting these env variables to an existing but invalid key pair
+            # means the user will be given a prompt with that defaults to the given values.
+            withenv("SSH_KEY_PATH" => invalid_key,
+                    "SSH_PUB_KEY_PATH" => invalid_key * ".pub") do
+                challenges = [
+                    "Private key location for 'git@github.com' [$invalid_key]: " => "$valid_key\n",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
+                @test err == git_ok
+                @test auth_attempts == 2
+
+                # User repeatedly chooses the default invalid private key until prompt limit reached
+                challenges = [
+                    "Private key location for 'git@github.com' [$invalid_key]: " => "\n",
+                    "Private key location for 'git@github.com' [$invalid_key]: " => "\n",
+                    "Private key location for 'git@github.com' [$invalid_key]: " => "\n",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
+                @test err == prompt_limit
+                @test auth_attempts == 4
+            end
+
+            # Explicitly set the public key ENV variable to a non-existent file.
+            withenv("SSH_KEY_PATH" => valid_key,
+                    "SSH_PUB_KEY_PATH" => valid_key * ".public") do
+                @test !isfile(ENV["SSH_PUB_KEY_PATH"])
+
+                challenges = [
+                    # "Private key location for 'git@github.com' [$valid_key]: " => "\n"
+                    "Public key location for 'git@github.com' [$valid_key.public]: " => "$valid_key.pub\n"
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
+                @test err == git_ok
+                @test auth_attempts == 1
+            end
+
+            # Explicitly set the public key ENV variable to a public key that doesn't match
+            # the private key.
+            withenv("SSH_KEY_PATH" => valid_key,
+                    "SSH_PUB_KEY_PATH" => invalid_key * ".pub") do
+                @test isfile(ENV["SSH_PUB_KEY_PATH"])
+
+                challenges = [
+                    "Private key location for 'git@github.com' [$valid_key]: " => "\n"
+                    "Public key location for 'git@github.com' [$invalid_key.pub]: " => "$valid_key.pub\n"
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
+                @test err == git_ok
+                @test auth_attempts == 2
+            end
+
+            Base.shred!(valid_cred)
+            Base.shred!(valid_p_cred)
+        end
+
+        @testset "SSH known host checking" begin
+            CHECK_MATCH    = LibGit2.Consts.LIBSSH2_KNOWNHOST_CHECK_MATCH
+            CHECK_MISMATCH = LibGit2.Consts.LIBSSH2_KNOWNHOST_CHECK_MISMATCH
+            CHECK_NOTFOUND = LibGit2.Consts.LIBSSH2_KNOWNHOST_CHECK_NOTFOUND
+            CHECK_FAILURE  = LibGit2.Consts.LIBSSH2_KNOWNHOST_CHECK_FAILURE
+
+            # randomly generated hashes matching no hosts
+            random_key = collect(reinterpret(Cchar, codeunits("\0\0\0\assh-rsa\0\0\0\x01#\0\0\0\x81\0¿\x95\xbe9\xfc9g\n:\xcf&\x06YA\xb5`\x97\xc13A\xbf;T+C\xc9Ut J>\xc5ҍ\xc4_S\x8a \xc1S\xeb\x15FH\xd2a\x04.D\xeeb\xac\x8f\xdb\xcc\xef\xc4l G\x9bR\xafp\x17s<=\x12\xab\x04ڳif\\A\x9ba0\xde%\xdei\x04\xc3\r\xb3\x81w\x88\xec\xc0f\x15A;AÝ\xc0r\xa1\u5fe\xd3\xf6)8\x8e\xa3\xcbc\xee\xdd\$\x04\x0f\xc1\xb4\x1f\xcc\xecK\xe0\x99")))
+            # hashes of the unique github.com fingerprint
+            github_key = collect(reinterpret(Cchar, codeunits("\0\0\0\assh-rsa\0\0\0\x01#\0\0\x01\x01\0\xab`;\x85\x11\xa6vy\xbd\xb5@\xdb;\xd2\x03K\0J\xe96\xd0k\xe3\xd7`\xf0\x8f˪\xdbN\xb4\xedóǑ\xc7\n\xae\x9at\xc9Xi\xe4wD!«\xea\x92\xe5T0_8\xb5\xfdAK2\b\xe5t\xc37\xe3 \x93e\x18F,vRɋ1\xe1n}\xa6R;\xd2\0t*dD\xd8?\xcd^\x172\xd06sǷ\x81\x15UH{U\xf0\xc4IO8)\xec\xe6\x0f\x94%Z\x95˚\xf57\xd7\xfc\x8c\x7f\xe4\x9e\xf3\x18GN\xf2\x92\t\x92\x05\"e\xb0\xa0n\xa6mJ\x16\x7f\xd9\xf3\xa4\x8a\x1aJ0~\xc1\xea\xaaQI\xa9i\xa6\xac]V\xa5\xefb~Q}\x81\xfbdO[t\\OG\x8e\xcd\b*\x94\x92\xf7D\xaa\xd3&\xf7l\x8cM\xc9\x10\vƫyF\x1d&W\xcbo\x06\xde\xc9.kd\xa6V/\xf0\xe3 \x84\xea\x06\xce\x0e\xa9\xd3ZX;\xfb\0\xbaӌ\x9d\x19p<T\x98\x92\xe5\xaaxܕ\xe2PQ@i")))
+            # hashes of the middle github.com fingerprint
+            gitlab_key = collect(reinterpret(Cchar, codeunits("\0\0\0\vssh-ed25519\0\0\0 \a\xee\br\x95N:\xae\xc6\xfbz\bέtn\x12.\x9dA\xb6\x7f\xe79\xe1\xc7\x13\x95\x0e\xcd\x17_")))
+
+            # various known hosts files
+            no_file = tempname()
+            empty_file = tempname(); touch(empty_file)
+            known_hosts = joinpath(@__DIR__, "known_hosts")
+            wrong_hosts = tempname()
+            open(wrong_hosts, write=true) do io
+                for line in eachline(known_hosts)
+                    words = split(line)
+                    words[1] = words[1] == "github.com" ? "gitlab.com" :
+                               words[1] == "gitlab.com" ? "github.com" :
+                               words[1]
+                    println(io, join(words, " "))
+                end
+            end
+
+            @testset "unknown host" begin
+                host = "unknown.host"
+                for key in [github_key, gitlab_key, random_key],
+                    files in [[no_file], [empty_file], [known_hosts]]
+                    check = LibGit2.ssh_knownhost_check(files, host, key)
+                    @test check == CHECK_NOTFOUND
+                end
+            end
+
+            @testset "known hosts" begin
+                for (host, key) in [
+                        "github.com" => github_key,
+                        "gitlab.com" => gitlab_key,
+                    ]
+                    for files in [[no_file], [empty_file]]
+                        check = LibGit2.ssh_knownhost_check(files, host, key)
+                        @test check == CHECK_NOTFOUND
+                    end
+                    for files in [
+                            [known_hosts],
+                            [empty_file, known_hosts],
+                            [known_hosts, empty_file],
+                            [known_hosts, wrong_hosts],
+                        ]
+                        check = LibGit2.ssh_knownhost_check(files, host, key)
+                        @test check == CHECK_MATCH
+                    end
+                    for files in [
+                            [wrong_hosts],
+                            [empty_file, wrong_hosts],
+                            [wrong_hosts, empty_file],
+                            [wrong_hosts, known_hosts],
+                        ]
+                        check = LibGit2.ssh_knownhost_check(files, host, key)
+                        @test check == CHECK_MISMATCH
+                    end
+                end
+            end
+
+            rm(empty_file)
+        end
+
+        @testset "HTTPS credential prompt" begin
+            url = "https://github.com/test/package.jl"
+
+            valid_username = "julia"
+            valid_password = randstring(16)
+            valid_cred = LibGit2.UserPasswordCredential(valid_username, valid_password)
+
+            https_ex = quote
+                include($LIBGIT2_HELPER_PATH)
+                credential_loop($valid_cred, $url)
+            end
+
+            # User provides a valid username and password
+            challenges = [
+                "Username for 'https://github.com': " => "$valid_username\n",
+                "Password for 'https://$valid_username@github.com': " => "$valid_password\n",
+            ]
+            err, auth_attempts, p = challenge_prompt(https_ex, challenges)
+            @test err == git_ok
+            @test auth_attempts == 1
+
+            # User sends EOF in username prompt which aborts the credential request
+            challenges = [
+                "Username for 'https://github.com': " => "\x04",
+            ]
+            err, auth_attempts, p = challenge_prompt(https_ex, challenges)
+            @test err == abort_prompt
+            @test auth_attempts == 1
+
+            # User sends EOF in password prompt which aborts the credential request
+            challenges = [
+                "Username for 'https://github.com': " => "foo\n",
+                "Password for 'https://foo@github.com': " => "\x04",
+            ]
+            err, auth_attempts, p = challenge_prompt(https_ex, challenges)
+            @test err == abort_prompt
+            @test auth_attempts == 1
+
+            # User provides an empty password which aborts the credential request since we
+            # cannot tell it apart from an EOF.
+            challenges = [
+                "Username for 'https://github.com': " => "foo\n",
+                "Password for 'https://foo@github.com': " => "\n",
+            ]
+            err, auth_attempts, p = challenge_prompt(https_ex, challenges)
+            @test err == abort_prompt
+            @test auth_attempts == 1
+
+            # User repeatedly chooses invalid username/password until the prompt limit is
+            # reached
+            challenges = [
+                "Username for 'https://github.com': " => "foo\n",
+                "Password for 'https://foo@github.com': " => "bar\n",
+                "Username for 'https://github.com' [foo]: " => "foo\n",
+                "Password for 'https://foo@github.com': " => "bar\n",
+                "Username for 'https://github.com' [foo]: " => "foo\n",
+                "Password for 'https://foo@github.com': " => "bar\n",
+            ]
+            err, auth_attempts, p = challenge_prompt(https_ex, challenges)
+            @test err == prompt_limit
+            @test auth_attempts == 3
+
+            Base.shred!(valid_cred)
+        end
+
+        @testset "SSH agent username" begin
+            url = "github.com:test/package.jl"
+
+            valid_key = joinpath(KEY_DIR, "valid")
+            valid_cred = LibGit2.SSHCredential("git", "", valid_key, valid_key * ".pub")
+
+            function gen_ex(; username="git")
+                quote
+                    include($LIBGIT2_HELPER_PATH)
+                    payload = CredentialPayload(allow_prompt=false, allow_ssh_agent=true,
+                                                allow_git_helpers=false)
+                    credential_loop($valid_cred, $url, $username, payload)
+                end
+            end
+
+            # An empty string username_ptr
+            ex = gen_ex(username="")
+            err, auth_attempts, p = challenge_prompt(ex, [])
+            @test err == exhausted_error
+            @test auth_attempts == 3
+
+            # A null username_ptr passed into `git_cred_ssh_key_from_agent` can cause a
+            # segfault.
+            ex = gen_ex(username=nothing)
+            err, auth_attempts, p = challenge_prompt(ex, [])
+            @test err == exhausted_error
+            @test auth_attempts == 2
+
+            Base.shred!(valid_cred)
+        end
+
+        @testset "SSH default" begin
+            mktempdir() do home_dir
+                url = "github.com:test/package.jl"
+
+                default_key = joinpath(home_dir, ".ssh", "id_rsa")
+                mkdir(dirname(default_key))
+
+                valid_key = joinpath(KEY_DIR, "valid")
+                valid_cred = LibGit2.SSHCredential("git", "", valid_key, valid_key * ".pub")
+
+                valid_p_key = joinpath(KEY_DIR, "valid-passphrase")
+                passphrase = "secret"
+                valid_p_cred = LibGit2.SSHCredential("git", passphrase, valid_p_key, valid_p_key * ".pub")
+
+                function gen_ex(cred)
+                    quote
+                        valid_cred = $cred
+
+                        default_cred = deepcopy(valid_cred)
+                        default_cred.prvkey = $default_key
+                        default_cred.pubkey = $default_key * ".pub"
+
+                        cp(valid_cred.prvkey, default_cred.prvkey)
+                        cp(valid_cred.pubkey, default_cred.pubkey)
+
+                        try
+                            include($LIBGIT2_HELPER_PATH)
+                            credential_loop(default_cred, $url, "git", shred=false)
+                        finally
+                            rm(default_cred.prvkey)
+                            rm(default_cred.pubkey)
+                        end
+                    end
+                end
+
+                withenv("SSH_KEY_PATH" => nothing,
+                        "SSH_PUB_KEY_PATH" => nothing,
+                        "SSH_KEY_PASS" => nothing,
+                        HOME => home_dir) do
+
+                    # Automatically use the default key
+                    ex = gen_ex(valid_cred)
+                    err, auth_attempts, p = challenge_prompt(ex, [])
+                    @test err == git_ok
+                    @test auth_attempts == 1
+                    @test p.credential.prvkey == default_key
+                    @test p.credential.pubkey == default_key * ".pub"
+
+                    # Confirm the private key if any other prompting is required
+                    ex = gen_ex(valid_p_cred)
+                    challenges = [
+                        "Private key location for 'git@github.com' [$default_key]: " => "\n",
+                        "Passphrase for $default_key: " => "$passphrase\n",
+                    ]
+                    err, auth_attempts, p = challenge_prompt(ex, challenges)
+                    @test err == git_ok
+                    @test auth_attempts == 1
+                end
+
+                Base.shred!(valid_cred)
+                Base.shred!(valid_p_cred)
+            end
+        end
+
+        @testset "SSH expand tilde" begin
+            url = "git@github.com:test/package.jl"
+
+            valid_key = joinpath(KEY_DIR, "valid")
+            valid_cred = LibGit2.SSHCredential("git", "", valid_key, valid_key * ".pub")
+
+            invalid_key = joinpath(KEY_DIR, "invalid")
+
+            ssh_ex = quote
+                include($LIBGIT2_HELPER_PATH)
+                payload = CredentialPayload(allow_prompt=true, allow_ssh_agent=false,
+                                            allow_git_helpers=false)
+                credential_loop($valid_cred, $url, "git", payload, shred=false)
+            end
+
+            withenv("SSH_KEY_PATH" => nothing,
+                    "SSH_PUB_KEY_PATH" => nothing,
+                    "SSH_KEY_PASS" => nothing,
+                    HOME => KEY_DIR) do
+
+                # Expand tilde during the private key prompt
+                challenges = [
+                    "Private key location for 'git@github.com': " => "~/valid\n",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
+                @test err == git_ok
+                @test auth_attempts == 1
+                @test p.credential.prvkey == abspath(valid_key)
+            end
+
+            withenv("SSH_KEY_PATH" => valid_key,
+                    "SSH_PUB_KEY_PATH" => invalid_key * ".pub",
+                    "SSH_KEY_PASS" => nothing,
+                    HOME => KEY_DIR) do
+
+                # Expand tilde during the public key prompt
+                challenges = [
+                    "Private key location for 'git@github.com' [$valid_key]: " => "\n",
+                    "Public key location for 'git@github.com' [$invalid_key.pub]: " => "~/valid.pub\n",
+                ]
+                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
+                @test err == git_ok
+                @test auth_attempts == 2
+                @test p.credential.pubkey == abspath(valid_key * ".pub")
+            end
+
+            Base.shred!(valid_cred)
+        end
+
+        @testset "SSH explicit credentials" begin
+            url = "git@github.com:test/package.jl"
+            username = "git"
+
+            valid_p_key = joinpath(KEY_DIR, "valid-passphrase")
+            passphrase = "secret"
+            valid_cred = LibGit2.SSHCredential(username, passphrase, valid_p_key, valid_p_key * ".pub")
+
+            invalid_key = joinpath(KEY_DIR, "invalid")
+            invalid_cred = LibGit2.SSHCredential(username, "", invalid_key, invalid_key * ".pub")
+
+            function gen_ex(cred; allow_prompt=true, allow_ssh_agent=false)
+                quote
+                    include($LIBGIT2_HELPER_PATH)
+                    payload = CredentialPayload($cred, allow_prompt=$allow_prompt,
+                                                allow_ssh_agent=$allow_ssh_agent,
+                                                allow_git_helpers=false)
+                    credential_loop($valid_cred, $url, $username, payload)
+                end
+            end
+
+            # Explicitly provided credential is correct. Note: allowing prompting and
+            # SSH agent to ensure they are skipped.
+            ex = gen_ex(valid_cred, allow_prompt=true, allow_ssh_agent=true)
+            err, auth_attempts, p = challenge_prompt(ex, [])
+            @test err == git_ok
+            @test auth_attempts == 1
+            @test p.explicit == valid_cred
+            @test p.credential != valid_cred
+
+            # Explicitly provided credential is incorrect
+            ex = gen_ex(invalid_cred, allow_prompt=false, allow_ssh_agent=false)
+            err, auth_attempts, p = challenge_prompt(ex, [])
+            @test err == exhausted_error
+            @test auth_attempts == 3
+            @test p.explicit == invalid_cred
+            @test p.credential != invalid_cred
+
+            Base.shred!(valid_cred)
+            Base.shred!(invalid_cred)
+        end
+
+        @testset "HTTPS explicit credentials" begin
+            url = "https://github.com/test/package.jl"
+
+            valid_cred = LibGit2.UserPasswordCredential("julia", randstring(16))
+            invalid_cred = LibGit2.UserPasswordCredential("alice", randstring(15))
+
+            function gen_ex(cred; allow_prompt=true)
+                quote
+                    include($LIBGIT2_HELPER_PATH)
+                    payload = CredentialPayload($cred, allow_prompt=$allow_prompt,
+                                                allow_git_helpers=false)
+                    credential_loop($valid_cred, $url, "", payload)
+                end
+            end
+
+            # Explicitly provided credential is correct
+            ex = gen_ex(valid_cred, allow_prompt=true)
+            err, auth_attempts, p = challenge_prompt(ex, [])
+            @test err == git_ok
+            @test auth_attempts == 1
+            @test p.explicit == valid_cred
+            @test p.credential != valid_cred
+
+            # Explicitly provided credential is incorrect
+            ex = gen_ex(invalid_cred, allow_prompt=false)
+            err, auth_attempts, p = challenge_prompt(ex, [])
+            @test err == exhausted_error
+            @test auth_attempts == 2
+            @test p.explicit == invalid_cred
+            @test p.credential != invalid_cred
+
+            Base.shred!(valid_cred)
+            Base.shred!(invalid_cred)
+        end
+
+        @testset "Cached credentials" begin
+            url = "https://github.com/test/package.jl"
+            cred_id = "https://github.com"
+
+            valid_username = "julia"
+            valid_password = randstring(16)
+            valid_cred = LibGit2.UserPasswordCredential(valid_username, valid_password)
+
+            invalid_username = "alice"
+            invalid_password = randstring(15)
+            invalid_cred = LibGit2.UserPasswordCredential(invalid_username, invalid_password)
+
+            function gen_ex(; cached_cred=nothing, allow_prompt=true)
+                quote
+                    include($LIBGIT2_HELPER_PATH)
+                    cache = CachedCredentials()
+                    $(cached_cred !== nothing && :(LibGit2.approve(cache, $cached_cred, $url)))
+                    payload = CredentialPayload(cache, allow_prompt=$allow_prompt,
+                                                allow_git_helpers=false)
+                    credential_loop($valid_cred, $url, "", payload)
+                end
+            end
+
+            # Cache contains a correct credential
+            err, auth_attempts, p = challenge_prompt(gen_ex(cached_cred=valid_cred), [])
+            @test err == git_ok
+            @test auth_attempts == 1
+
+            # Note: Approved cached credentials are not shredded
+
+            # Add a credential into the cache
+            ex = gen_ex()
+            challenges = [
+                "Username for 'https://github.com': " => "$valid_username\n",
+                "Password for 'https://$valid_username@github.com': " => "$valid_password\n",
+            ]
+            err, auth_attempts, p = challenge_prompt(ex, challenges)
+            cache = p.cache
+            @test err == git_ok
+            @test auth_attempts == 1
+            @test typeof(cache) == LibGit2.CachedCredentials
+            @test cache.cred == Dict(cred_id => valid_cred)
+            @test p.credential == valid_cred
+
+            # Replace a credential in the cache
+            ex = gen_ex(cached_cred=invalid_cred)
+            challenges = [
+                "Username for 'https://github.com' [alice]: " => "$valid_username\n",
+                "Password for 'https://$valid_username@github.com': " => "$valid_password\n",
+            ]
+            err, auth_attempts, p = challenge_prompt(ex, challenges)
+            cache = p.cache
+            @test err == git_ok
+            @test auth_attempts == 2
+            @test typeof(cache) == LibGit2.CachedCredentials
+            @test cache.cred == Dict(cred_id => valid_cred)
+            @test p.credential == valid_cred
+
+            # Canceling a credential request should leave the cache unmodified
+            ex = gen_ex(cached_cred=invalid_cred)
+            challenges = [
+                "Username for 'https://github.com' [alice]: " => "foo\n",
+                "Password for 'https://foo@github.com': " => "bar\n",
+                "Username for 'https://github.com' [foo]: " => "\x04",
+            ]
+            err, auth_attempts, p = challenge_prompt(ex, challenges)
+            cache = p.cache
+            @test err == abort_prompt
+            @test auth_attempts == 3
+            @test typeof(cache) == LibGit2.CachedCredentials
+            @test cache.cred == Dict(cred_id => invalid_cred)
+            @test p.credential != invalid_cred
+
+            # An EAUTH error should remove credentials from the cache
+            ex = gen_ex(cached_cred=invalid_cred, allow_prompt=false)
+            err, auth_attempts, p = challenge_prompt(ex, [])
+            cache = p.cache
+            @test err == exhausted_error
+            @test auth_attempts == 2
+            @test typeof(cache) == LibGit2.CachedCredentials
+            @test cache.cred == Dict()
+            @test p.credential != invalid_cred
+
+            Base.shred!(valid_cred)
+            Base.shred!(invalid_cred)
+        end
+
+        @testset "HTTPS git helper username" begin
+            url = "https://github.com/test/package.jl"
+
+            valid_username = "julia"
+            valid_password = randstring(16)
+            valid_cred = LibGit2.UserPasswordCredential(valid_username, valid_password)
+
+            config_path = joinpath(dir, config_file)
+            write(config_path, """
+                [credential]
+                    username = $valid_username
+                """)
+
+            https_ex = quote
+                include($LIBGIT2_HELPER_PATH)
+                LibGit2.with(LibGit2.GitConfig($config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
+                    payload = CredentialPayload(nothing,
+                                                nothing, cfg,
+                                                allow_git_helpers=true)
+                    credential_loop($valid_cred, $url, nothing, payload, shred=false)
+                end
+            end
+
+            # Username is supplied from the git configuration file
+            challenges = [
+                "Username for 'https://github.com' [$valid_username]: " => "\n",
+                "Password for 'https://$valid_username@github.com': " => "$valid_password\n",
+            ]
+            err, auth_attempts, p = challenge_prompt(https_ex, challenges)
+            @test err == git_ok
+            @test auth_attempts == 1
+
+            # Verify credential wasn't accidentally zeroed (#24731)
+            @test p.credential == valid_cred
+
+            Base.shred!(valid_cred)
+        end
+
+        @testset "HTTPS git helper password" begin
+            if GIT_INSTALLED
+                url = "https://github.com/test/package.jl"
+
+                valid_username = "julia"
+                valid_password = randstring(16)
+                valid_cred = LibGit2.UserPasswordCredential(valid_username, valid_password)
+
+                cred_file = joinpath(dir, "test-credentials")
+                config_path = joinpath(dir, config_file)
+                write(config_path, """
+                    [credential]
+                        helper = store --file $cred_file
+                    """)
+
+                # Directly write to the cleartext credential store. Note: we are not using
+                # the LibGit2.approve message to avoid any possibility of the tests
+                # accidentally writing to a user's global store.
+                write(cred_file, "https://$valid_username:$valid_password@github.com")
+
+                https_ex = quote
+                    include($LIBGIT2_HELPER_PATH)
+                    LibGit2.with(LibGit2.GitConfig($config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
+                        payload = CredentialPayload(nothing,
+                                                    nothing, cfg,
+                                                    allow_git_helpers=true)
+                        credential_loop($valid_cred, $url, nothing, payload, shred=false)
+                    end
+                end
+
+                # Username will be provided by the credential helper
+                challenges = []
+                err, auth_attempts, p = challenge_prompt(https_ex, challenges)
+                @test err == git_ok
+                @test auth_attempts == 1
+
+                # Verify credential wasn't accidentally zeroed (#24731)
+                @test p.credential == valid_cred
+
+                Base.shred!(valid_cred)
+            end
+        end
+
+        @testset "Incompatible explicit credentials" begin
+            # User provides a user/password credential where a SSH credential is required.
+            valid_cred = LibGit2.UserPasswordCredential("foo", "bar")
+            expect_ssh_ex = quote
+                include($LIBGIT2_HELPER_PATH)
+                payload = CredentialPayload($valid_cred, allow_ssh_agent=false,
+                                            allow_git_helpers=false)
+                credential_loop($valid_cred, "ssh://github.com/repo", "",
+                                Cuint(LibGit2.Consts.CREDTYPE_SSH_KEY), payload)
+            end
+
+            err, auth_attempts, p = challenge_prompt(expect_ssh_ex, [])
+            @test err == incompatible_error
+            @test auth_attempts == 1
+            @test p.explicit == valid_cred
+            @test p.credential != valid_cred
+
+            Base.shred!(valid_cred)
+
+            # User provides a SSH credential where a user/password credential is required.
+            valid_cred = LibGit2.SSHCredential("foo", "", "", "")
+            expect_https_ex = quote
+                include($LIBGIT2_HELPER_PATH)
+                payload = CredentialPayload($valid_cred, allow_ssh_agent=false,
+                                            allow_git_helpers=false)
+                credential_loop($valid_cred, "https://github.com/repo", "",
+                                Cuint(LibGit2.Consts.CREDTYPE_USERPASS_PLAINTEXT), payload)
+            end
+
+            err, auth_attempts, p = challenge_prompt(expect_https_ex, [])
+            @test err == incompatible_error
+            @test auth_attempts == 1
+            @test p.explicit == valid_cred
+            @test p.credential != valid_cred
+
+            Base.shred!(valid_cred)
+        end
+
+        # A hypothetical scenario where the allowed authentication can either be
+        # SSH or username/password.
+        @testset "SSH & HTTPS authentication" begin
+            allowed_types = Cuint(LibGit2.Consts.CREDTYPE_SSH_KEY) |
+                Cuint(LibGit2.Consts.CREDTYPE_USERPASS_PLAINTEXT)
+
+            # User provides a user/password credential where a SSH credential is required.
+            valid_cred = LibGit2.UserPasswordCredential("foo", "bar")
+            ex = quote
+                include($LIBGIT2_HELPER_PATH)
+                payload = CredentialPayload($valid_cred, allow_ssh_agent=false,
+                                            allow_git_helpers=false)
+                credential_loop($valid_cred, "foo://github.com/repo", "",
+                                $allowed_types, payload)
+            end
+
+            err, auth_attempts, p = challenge_prompt(ex, [])
+            @test err == git_ok
+            @test auth_attempts == 1
+
+            Base.shred!(valid_cred)
+        end
+
+        @testset "CredentialPayload reset" begin
+            urls = [
+                "https://github.com/test/package.jl"
+                "https://myhost.com/demo.jl"
+            ]
+
+            valid_username = "julia"
+            valid_password = randstring(16)
+            valid_cred = LibGit2.UserPasswordCredential(valid_username, valid_password)
+
+            # Users should be able to re-use the same payload if the state is reset
+            ex = quote
+                include($LIBGIT2_HELPER_PATH)
+                user = nothing
+                payload = CredentialPayload(allow_git_helpers=false)
+                first_result = credential_loop($valid_cred, $(urls[1]), user, payload)
+                LibGit2.reset!(payload)
+                second_result = credential_loop($valid_cred, $(urls[2]), user, payload)
+                (first_result, second_result)
+            end
+
+            challenges = [
+                "Username for 'https://github.com': " => "$valid_username\n",
+                "Password for 'https://$valid_username@github.com': " => "$valid_password\n",
+                "Username for 'https://myhost.com': " => "$valid_username\n",
+                "Password for 'https://$valid_username@myhost.com': " => "$valid_password\n",
+            ]
+            first_result, second_result = challenge_prompt(ex, challenges)
+
+            err, auth_attempts, p = first_result
+            @test err == git_ok
+            @test auth_attempts == 1
+
+            err, auth_attempts, p = second_result
+            @test err == git_ok
+            @test auth_attempts == 1
+
+            Base.shred!(valid_cred)
+        end
+    end
+
+    # Note: Tests only work on linux as SSL_CERT_FILE is only respected on linux systems.
+    @testset "Hostname verification" begin
+        openssl_installed = false
+        common_name = ""
+        if Sys.islinux()
+            try
+                # OpenSSL needs to be on the path
+                openssl_installed = !isempty(read(`openssl version`, String))
+            catch ex
+                @warn "Skipping hostname verification tests. Is `openssl` on the path?" exception=ex
+            end
+
+            # Find a hostname that maps to the loopback address
+            hostnames = ["localhost"]
+
+            # In minimal environments a hostname might not be available (issue #20758)
+            try
+                # In some environments, namely Macs, the hostname "macbook.local" is bound
+                # to the external address while "macbook" is bound to the loopback address.
+                pushfirst!(hostnames, replace(gethostname(), r"\..*$" => ""))
+            catch
+            end
+
+            loopbacks = (ip"127.0.0.1", ip"::1")
+            for hostname in hostnames
+                local addr
+                try
+                    addr = getaddrinfo(hostname)
+                catch
+                    continue
+                end
+
+                if addr ∈ loopbacks
+                    common_name = hostname
+                    break
+                end
+            end
+
+            if isempty(common_name)
+                @warn "Skipping hostname verification tests. Unable to determine a hostname which maps to the loopback address"
+            end
+        end
+        if openssl_installed && !isempty(common_name)
+            mktempdir() do root
+                key = joinpath(root, common_name * ".key")
+                cert = joinpath(root, common_name * ".crt")
+                pem = joinpath(root, common_name * ".pem")
+
+                # Generated a certificate which has the CN set correctly but no subjectAltName
+                run(pipeline(`openssl req -new -x509 -newkey rsa:2048 -sha256 -nodes -keyout $key -out $cert -days 1 -subj "/CN=$common_name"`, stderr=devnull))
+                run(`openssl x509 -in $cert -out $pem -outform PEM`)
+
+                local pobj, port
+                for attempt in 1:10
+                    # Find an available port by listening, but there's a race condition where
+                    # another process could grab this port, so retry on failure
+                    port, server = listenany(49152)
+                    close(server)
+
+                    # Make a fake Julia package and minimal HTTPS server with our generated
+                    # certificate. The minimal server can't actually serve a Git repository.
+                    mkdir(joinpath(root, "Example.jl"))
+                    pobj = cd(root) do
+                        run(pipeline(`openssl s_server -key $key -cert $cert -WWW -accept $port`, stderr=RawFD(2)), wait=false)
+                    end
+                    @test readuntil(pobj, "ACCEPT") == ""
+
+                    # Two options: Either we reached "ACCEPT" and the process is running and ready
+                    # or it failed to listen and exited, in which case we try again.
+                    process_running(pobj) && break
+                end
+
+                @test process_running(pobj)
+
+                if process_running(pobj)
+                    errfile = joinpath(root, "error")
+                    repo_url = "https://$common_name:$port/Example.jl"
+                    repo_dir = joinpath(root, "dest")
+                    code = """
+                        using Serialization
+                        import LibGit2
+                        dest_dir = "$repo_dir"
+                        open("$errfile", "w+") do f
+                            try
+                                repo = LibGit2.clone("$repo_url", dest_dir)
+                            catch err
+                                serialize(f, err)
+                            finally
+                                isdir(dest_dir) && rm(dest_dir, recursive=true)
+                            end
+                        end
+                    """
+                    cmd = `$(Base.julia_cmd()) --startup-file=no -e $code`
+
+                    try
+                        # The generated certificate is normally invalid
+                        run(cmd)
+                        err = open(errfile, "r") do f
+                            deserialize(f)
+                        end
+                        @test err.code == LibGit2.Error.ERROR
+                        @test startswith(lowercase(err.msg),
+                                        lowercase("user rejected certificate for localhost"))
+
+                        rm(errfile)
+
+                        # Specify that Julia use only the custom certificate. Note: we need to
+                        # spawn a new Julia process in order for this ENV variable to take effect.
+                        withenv("SSL_CERT_FILE" => pem) do
+                            run(cmd)
+                            err = open(errfile, "r") do f
+                                deserialize(f)
+                            end
+                            @test err.code == LibGit2.Error.ERROR
+                            @test occursin(r"invalid content-type: '?text/plain'?"i, err.msg)
+                        end
+
+                        # OpenSSL s_server should still be running
+                        @test process_running(pobj)
+                    finally
+                        kill(pobj)
+                    end
+                end
+            end
+        end
+    end
+end
+
+let cache = LibGit2.CachedCredentials()
+    get!(cache, "foo", LibGit2.SSHCredential("", "bar"))
+    Base.shred!(cache)
+    @test all(cache["foo"].pass.data .== UInt(0))
+end
+
+end # module
diff --git a/stdlib/LibGit2/test/libgit2.jl b/stdlib/LibGit2/test/libgit2.jl
index af140fe97f6d1..c78ecc8fa8bfc 100644
--- a/stdlib/LibGit2/test/libgit2.jl
+++ b/stdlib/LibGit2/test/libgit2.jl
@@ -1,3222 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-module LibGit2Tests
-
-import LibGit2
-using Test
-using Random, Serialization, Sockets
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :FakePTYs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FakePTYs.jl"))
-import .Main.FakePTYs: with_fake_pty
-
-function challenge_prompt(code::Expr, challenges; timeout::Integer=60, debug::Bool=true)
-    input_code = tempname()
-    open(input_code, "w") do fp
-        serialize(fp, code)
-    end
-    output_file = tempname()
-    wrapped_code = quote
-        using Serialization
-        result = open($input_code) do fp
-            eval(deserialize(fp))
-        end
-        open($output_file, "w") do fp
-            serialize(fp, result)
-        end
-    end
-    torun = "import LibGit2; $wrapped_code"
-    cmd = `$(Base.julia_cmd()) --startup-file=no -e $torun`
-    try
-        challenge_prompt(cmd, challenges, timeout=timeout, debug=debug)
-        return open(output_file, "r") do fp
-            deserialize(fp)
-        end
-    finally
-        isfile(output_file) && rm(output_file)
-        isfile(input_code) && rm(input_code)
-    end
-    return nothing
-end
-
-function challenge_prompt(cmd::Cmd, challenges; timeout::Integer=60, debug::Bool=true)
-    function format_output(output)
-        !debug && return ""
-        str = read(seekstart(output), String)
-        isempty(str) && return ""
-        return "Process output found:\n\"\"\"\n$str\n\"\"\""
-    end
-    out = IOBuffer()
-    with_fake_pty() do pts, ptm
-        p = run(detach(cmd), pts, pts, pts, wait=false)
-        Base.close_stdio(pts)
-
-        # Kill the process if it takes too long. Typically occurs when process is waiting
-        # for input.
-        timer = Channel{Symbol}(1)
-        watcher = @async begin
-            waited = 0
-            while waited < timeout && process_running(p)
-                sleep(1)
-                waited += 1
-            end
-
-            if process_running(p)
-                kill(p)
-                put!(timer, :timeout)
-            elseif success(p)
-                put!(timer, :success)
-            else
-                put!(timer, :failure)
-            end
-
-            # SIGKILL stubborn processes
-            if process_running(p)
-                sleep(3)
-                process_running(p) && kill(p, Base.SIGKILL)
-            end
-            wait(p)
-        end
-
-        for (challenge, response) in challenges
-            write(out, readuntil(ptm, challenge, keep=true))
-            if !isopen(ptm)
-                error("Could not locate challenge: \"$challenge\". ",
-                      format_output(out))
-            end
-            write(ptm, response)
-        end
-
-        # Capture output from process until `pts` is closed
-        try
-            write(out, ptm)
-        catch ex
-            if !(ex isa Base.IOError && ex.code == Base.UV_EIO)
-                rethrow() # ignore EIO from `ptm` after `pts` dies
-            end
-        end
-
-        status = fetch(timer)
-        close(ptm)
-        if status != :success
-            if status == :timeout
-                error("Process timed out possibly waiting for a response. ",
-                      format_output(out))
-            else
-                error("Failed process. ", format_output(out), "\n", p)
-            end
-        end
-        wait(watcher)
-    end
-    nothing
-end
-
-const LIBGIT2_MIN_VER = v"1.0.0"
-const LIBGIT2_HELPER_PATH = joinpath(@__DIR__, "libgit2-helpers.jl")
-
-const KEY_DIR = joinpath(@__DIR__, "keys")
-const HOME = Sys.iswindows() ? "USERPROFILE" : "HOME"  # Environment variable name for home
-const GIT_INSTALLED = try
-    success(`git --version`)
-catch
-    false
-end
-
-function get_global_dir()
-    buf = Ref(LibGit2.Buffer())
-
-    LibGit2.@check @ccall "libgit2".git_libgit2_opts(
-        LibGit2.Consts.GET_SEARCH_PATH::Cint;
-        LibGit2.Consts.CONFIG_LEVEL_GLOBAL::Cint,
-        buf::Ptr{LibGit2.Buffer})::Cint
-    path = unsafe_string(buf[].ptr)
-    LibGit2.free(buf)
-    return path
-end
-
-function set_global_dir(dir)
-    LibGit2.@check @ccall "libgit2".git_libgit2_opts(
-        LibGit2.Consts.SET_SEARCH_PATH::Cint;
-        LibGit2.Consts.CONFIG_LEVEL_GLOBAL::Cint,
-        dir::Cstring)::Cint
-    return
-end
-
-function with_libgit2_temp_home(f)
-    mktempdir() do tmphome
-        oldpath = get_global_dir()
-        set_global_dir(tmphome)
-        try
-            @test get_global_dir() == tmphome
-            f(tmphome)
-        finally
-            set_global_dir(oldpath)
-        end
-        return
-    end
-end
-
-#########
-# TESTS #
-#########
-
-@testset "Check library version" begin
-    v = LibGit2.version()
-    @test v.major == LIBGIT2_MIN_VER.major && v.minor >= LIBGIT2_MIN_VER.minor
-end
-
-@testset "Check library features" begin
-    f = LibGit2.features()
-    @test findfirst(isequal(LibGit2.Consts.FEATURE_SSH), f) !== nothing
-    @test findfirst(isequal(LibGit2.Consts.FEATURE_HTTPS), f) !== nothing
-end
-
-@testset "OID" begin
-    z = LibGit2.GitHash()
-    @test LibGit2.iszero(z)
-    @test z == zero(LibGit2.GitHash)
-    @test z == LibGit2.GitHash(z)
-    rs = string(z)
-    rr = LibGit2.raw(z)
-    @test z == LibGit2.GitHash(rr)
-    @test z == LibGit2.GitHash(rs)
-    @test z == LibGit2.GitHash(pointer(rr))
-
-    @test LibGit2.GitShortHash(z, 20) == LibGit2.GitShortHash(rs[1:20])
-    @test_throws ArgumentError LibGit2.GitHash(Ptr{UInt8}(C_NULL))
-    @test_throws ArgumentError LibGit2.GitHash(rand(UInt8, 2*LibGit2.OID_RAWSZ))
-    @test_throws ArgumentError LibGit2.GitHash("a")
-end
-
-@testset "StrArrayStruct" begin
-    p = ["XXX","YYY"]
-    a = Base.cconvert(Ptr{LibGit2.StrArrayStruct}, p)
-    b = Base.unsafe_convert(Ptr{LibGit2.StrArrayStruct}, a)
-    @test p == convert(Vector{String}, unsafe_load(b))
-    @noinline gcuse(a) = a
-    gcuse(a)
-end
-
-@testset "Signature" begin
-    sig = LibGit2.Signature("AAA", "AAA@BBB.COM", round(time(); digits=0), 0)
-    git_sig = convert(LibGit2.GitSignature, sig)
-    sig2 = LibGit2.Signature(git_sig)
-    close(git_sig)
-    @test sig.name == sig2.name
-    @test sig.email == sig2.email
-    @test sig.time == sig2.time
-    sig3 = LibGit2.Signature("AAA","AAA@BBB.COM")
-    @test sig3.name == sig.name
-    @test sig3.email == sig.email
-end
-
-@testset "Default config" begin
-    with_libgit2_temp_home() do tmphome
-        cfg = LibGit2.GitConfig()
-        @test isa(cfg, LibGit2.GitConfig)
-        @test LibGit2.getconfig("fake.property", "") == ""
-        LibGit2.set!(cfg, "fake.property", "AAAA")
-        @test LibGit2.getconfig("fake.property", "") == "AAAA"
-    end
-end
-
-@testset "Trace" begin
-    code = "import LibGit2; LibGit2.trace_set(LibGit2.Consts.TRACE_DEBUG); exit(LibGit2.trace_set(0))"
-    p = run(`$(Base.julia_cmd()) --startup-file=no -e $code`, wait=false); wait(p)
-    @test success(p)
-end
-
-# See #21872 and #21636
-LibGit2.version() >= v"0.26.0" && Sys.isunix() && @testset "Default config with symlink" begin
-    with_libgit2_temp_home() do tmphome
-        write(joinpath(tmphome, "real_gitconfig"), "[fake]\n\tproperty = BBB")
-        symlink(joinpath(tmphome, "real_gitconfig"),
-                joinpath(tmphome, ".gitconfig"))
-        cfg = LibGit2.GitConfig()
-        @test isa(cfg, LibGit2.GitConfig)
-        LibGit2.getconfig("fake.property", "") == "BBB"
-        LibGit2.set!(cfg, "fake.property", "AAAA")
-        LibGit2.getconfig("fake.property", "") == "AAAA"
-    end
-end
-
-@testset "Git URL parsing" begin
-    @testset "HTTPS URL" begin
-        m = match(LibGit2.URL_REGEX, "https://user:pass@server.com:80/org/project.git")
-        @test m[:scheme] == "https"
-        @test m[:user] == "user"
-        @test m[:password] == "pass"
-        @test m[:host] == "server.com"
-        @test m[:port] == "80"
-        @test m[:path] == "org/project.git"
-    end
-
-    @testset "SSH URL" begin
-        m = match(LibGit2.URL_REGEX, "ssh://user:pass@server:22/project.git")
-        @test m[:scheme] == "ssh"
-        @test m[:user] == "user"
-        @test m[:password] == "pass"
-        @test m[:host] == "server"
-        @test m[:port] == "22"
-        @test m[:path] == "project.git"
-    end
-
-    @testset "SSH URL, scp-like syntax" begin
-        m = match(LibGit2.URL_REGEX, "user@server:project.git")
-        @test m[:scheme] === nothing
-        @test m[:user] == "user"
-        @test m[:password] === nothing
-        @test m[:host] == "server"
-        @test m[:port] === nothing
-        @test m[:path] == "project.git"
-    end
-
-    # scp-like syntax corner case. The SCP syntax does not support port so everything after
-    # the colon is part of the path.
-    @testset "scp-like syntax, no port" begin
-        m = match(LibGit2.URL_REGEX, "server:1234/repo")
-        @test m[:scheme] === nothing
-        @test m[:user] === nothing
-        @test m[:password] === nothing
-        @test m[:host] == "server"
-        @test m[:port] === nothing
-        @test m[:path] == "1234/repo"
-    end
-
-    @testset "HTTPS URL, realistic" begin
-        m = match(LibGit2.URL_REGEX, "https://github.com/JuliaLang/Example.jl.git")
-        @test m[:scheme] == "https"
-        @test m[:user] === nothing
-        @test m[:password] === nothing
-        @test m[:host] == "github.com"
-        @test m[:port] === nothing
-        @test m[:path] == "JuliaLang/Example.jl.git"
-    end
-
-    @testset "SSH URL, realistic" begin
-        m = match(LibGit2.URL_REGEX, "git@github.com:JuliaLang/Example.jl.git")
-        @test m[:scheme] === nothing
-        @test m[:user] == "git"
-        @test m[:password] === nothing
-        @test m[:host] == "github.com"
-        @test m[:port] === nothing
-        @test m[:path] == "JuliaLang/Example.jl.git"
-    end
-
-    @testset "usernames with special characters" begin
-        m = match(LibGit2.URL_REGEX, "user-name@hostname.com")
-        @test m[:user] == "user-name"
-    end
-
-    @testset "HTTPS URL, no path" begin
-        m = match(LibGit2.URL_REGEX, "https://user:pass@server.com:80")
-        @test m[:path] === nothing
-    end
-
-    @testset "scp-like syntax, no path" begin
-        m = match(LibGit2.URL_REGEX, "user@server:")
-        @test m[:path] == ""
-
-        m = match(LibGit2.URL_REGEX, "user@server")
-        @test m[:path] === nothing
-    end
-
-    @testset "HTTPS URL, invalid path" begin
-        m = match(LibGit2.URL_REGEX, "https://git@server:repo")
-        @test m === nothing
-    end
-
-    # scp-like syntax should have a colon separating the hostname from the path
-    @testset "scp-like syntax, invalid path" begin
-        m = match(LibGit2.URL_REGEX, "git@server/repo")
-        @test m === nothing
-    end
-end
-
-@testset "Git URL formatting" begin
-    @testset "HTTPS URL" begin
-        url = LibGit2.git_url(
-            scheme="https",
-            username="user",
-            host="server.com",
-            port=80,
-            path="org/project.git")
-        @test url == "https://user@server.com:80/org/project.git"
-    end
-
-    @testset "SSH URL" begin
-        url = LibGit2.git_url(
-            scheme="ssh",
-            username="user",
-            host="server",
-            port="22",
-            path="project.git")
-        @test url == "ssh://user@server:22/project.git"
-    end
-
-    @testset "SSH URL, scp-like syntax" begin
-        url = LibGit2.git_url(
-            username="user",
-            host="server",
-            path="project.git")
-        @test url == "user@server:project.git"
-    end
-
-    @testset "HTTPS URL, realistic" begin
-        url = LibGit2.git_url(
-            scheme="https",
-            host="github.com",
-            path="JuliaLang/Example.jl.git")
-        @test url == "https://github.com/JuliaLang/Example.jl.git"
-    end
-
-    @testset "SSH URL, realistic" begin
-        url = LibGit2.git_url(
-            username="git",
-            host="github.com",
-            path="JuliaLang/Example.jl.git")
-        @test url == "git@github.com:JuliaLang/Example.jl.git"
-    end
-
-    @testset "HTTPS URL, no path" begin
-        url = LibGit2.git_url(
-            scheme="https",
-            username="user",
-            host="server.com",
-            port="80")
-        @test url == "https://user@server.com:80"
-    end
-
-    @testset "scp-like syntax, no path" begin
-        url = LibGit2.git_url(
-            username="user",
-            host="server.com")
-        @test url == "user@server.com"
-    end
-
-    @testset "HTTP URL, path includes slash prefix" begin
-        url = LibGit2.git_url(
-            scheme="http",
-            host="server.com",
-            path="/path")
-        @test url == "http://server.com/path"
-    end
-
-    @testset "empty" begin
-        @test_throws ArgumentError LibGit2.git_url()
-
-        @test LibGit2.git_url(host="server.com") == "server.com"
-        url = LibGit2.git_url(
-            scheme="",
-            username="",
-            host="server.com",
-            port="",
-            path="")
-        @test url == "server.com"
-    end
-end
-
-@testset "Passphrase Required" begin
-    @testset "missing file" begin
-        @test !LibGit2.is_passphrase_required("")
-
-        file = joinpath(KEY_DIR, "foobar")
-        @test !isfile(file)
-        @test !LibGit2.is_passphrase_required(file)
-    end
-
-    @testset "not private key" begin
-        @test !LibGit2.is_passphrase_required(joinpath(KEY_DIR, "invalid.pub"))
-    end
-
-    @testset "private key, with passphrase" begin
-        @test LibGit2.is_passphrase_required(joinpath(KEY_DIR, "valid-passphrase"))
-    end
-
-    @testset "private key, no passphrase" begin
-        @test !LibGit2.is_passphrase_required(joinpath(KEY_DIR, "valid"))
-    end
-end
-
-@testset "GitCredential" begin
-    @testset "missing" begin
-        str = ""
-        cred = read!(IOBuffer(str), LibGit2.GitCredential())
-        @test cred == LibGit2.GitCredential()
-        @test sprint(write, cred) == str
-        Base.shred!(cred)
-    end
-
-    @testset "empty" begin
-        str = """
-            protocol=
-            host=
-            path=
-            username=
-            password=
-            """
-        cred = read!(IOBuffer(str), LibGit2.GitCredential())
-        @test cred == LibGit2.GitCredential("", "", "", "", "")
-        @test sprint(write, cred) == str
-        Base.shred!(cred)
-    end
-
-    @testset "input/output" begin
-        str = """
-            protocol=https
-            host=example.com
-            username=alice
-            password=*****
-            """
-        expected_cred = LibGit2.GitCredential("https", "example.com", nothing, "alice", "*****")
-
-        cred = read!(IOBuffer(str), LibGit2.GitCredential())
-        @test cred == expected_cred
-        @test sprint(write, cred) == str
-        Base.shred!(cred)
-        Base.shred!(expected_cred)
-    end
-
-    @testset "extra newline" begin
-        # The "Git for Windows" installer will also install the "Git Credential Manager for
-        # Windows" (https://github.com/Microsoft/Git-Credential-Manager-for-Windows) (also
-        # known as "manager" in the .gitconfig files). This credential manager returns an
-        # additional newline when returning the results.
-        str = """
-            protocol=https
-            host=example.com
-            path=
-            username=bob
-            password=*****
-
-            """
-        expected_cred = LibGit2.GitCredential("https", "example.com", "", "bob", "*****")
-
-        cred = read!(IOBuffer(str), LibGit2.GitCredential())
-        @test cred == expected_cred
-        @test sprint(write, cred) * "\n" == str
-        Base.shred!(cred)
-        Base.shred!(expected_cred)
-    end
-
-    @testset "unknown attribute" begin
-        str = """
-            protocol=https
-            host=example.com
-            attribute=value
-            username=bob
-            password=*****
-            """
-        expected_cred = LibGit2.GitCredential("https", "example.com", nothing, "bob", "*****")
-        expected_log = (:warn, "Unknown git credential attribute found: \"attribute\"")
-
-        cred = @test_logs expected_log read!(IOBuffer(str), LibGit2.GitCredential())
-        @test cred == expected_cred
-        Base.shred!(cred)
-        Base.shred!(expected_cred)
-    end
-
-    @testset "use http path" begin
-        cred = LibGit2.GitCredential("https", "example.com", "dir/file", "alice", "*****")
-        expected = """
-            protocol=https
-            host=example.com
-            username=alice
-            password=*****
-            """
-
-        @test cred.use_http_path
-        cred.use_http_path = false
-
-        @test cred.path == "dir/file"
-        @test sprint(write, cred) == expected
-        Base.shred!(cred)
-    end
-
-    @testset "URL input/output" begin
-        str = """
-            host=example.com
-            password=bar
-            url=https://a@b/c
-            username=foo
-            """
-        expected_str = """
-            protocol=https
-            host=b
-            path=c
-            username=foo
-            """
-        expected_cred = LibGit2.GitCredential("https", "b", "c", "foo", nothing)
-
-        cred = read!(IOBuffer(str), LibGit2.GitCredential())
-        @test cred == expected_cred
-        @test sprint(write, cred) == expected_str
-        Base.shred!(cred)
-        Base.shred!(expected_cred)
-    end
-
-    @testset "ismatch" begin
-        # Equal
-        cred = LibGit2.GitCredential("https", "github.com")
-        @test LibGit2.ismatch("https://github.com", cred)
-        Base.shred!(cred)
-
-        # Credential hostname is different
-        cred = LibGit2.GitCredential("https", "github.com")
-        @test !LibGit2.ismatch("https://myhost", cred)
-        Base.shred!(cred)
-
-        # Credential is less specific than URL
-        cred = LibGit2.GitCredential("https")
-        @test !LibGit2.ismatch("https://github.com", cred)
-        Base.shred!(cred)
-
-        # Credential is more specific than URL
-        cred = LibGit2.GitCredential("https", "github.com", "path", "user", "pass")
-        @test LibGit2.ismatch("https://github.com", cred)
-        Base.shred!(cred)
-
-        # Credential needs to have an "" username to match
-        cred = LibGit2.GitCredential("https", "github.com", nothing, "")
-        @test LibGit2.ismatch("https://@github.com", cred)
-        Base.shred!(cred)
-
-        cred = LibGit2.GitCredential("https", "github.com", nothing, nothing)
-        @test !LibGit2.ismatch("https://@github.com", cred)
-        Base.shred!(cred)
-    end
-
-    @testset "GITHUB_REGEX" begin
-        github_regex_test = function(url, user, repo)
-            m = match(LibGit2.GITHUB_REGEX, url)
-            @test m !== nothing
-            @test m[1] == "$user/$repo"
-            @test m[2] == user
-            @test m[3] == repo
-        end
-        user = "User"
-        repo = "Repo"
-        github_regex_test("git@github.com/$user/$repo.git", user, repo)
-        github_regex_test("https://github.com/$user/$repo.git", user, repo)
-        github_regex_test("https://username@github.com/$user/$repo.git", user, repo)
-        github_regex_test("ssh://git@github.com/$user/$repo.git", user, repo)
-        github_regex_test("git@github.com/$user/$repo", user, repo)
-        github_regex_test("https://github.com/$user/$repo", user, repo)
-        github_regex_test("https://username@github.com/$user/$repo", user, repo)
-        github_regex_test("ssh://git@github.com/$user/$repo", user, repo)
-        @test !occursin(LibGit2.GITHUB_REGEX, "git@notgithub.com/$user/$repo.git")
-    end
-
-    @testset "UserPasswordCredential/url constructor" begin
-        user_pass_cred = LibGit2.UserPasswordCredential("user", "*******")
-        url = "https://github.com"
-        expected_cred = LibGit2.GitCredential("https", "github.com", nothing, "user", "*******")
-
-        cred = LibGit2.GitCredential(user_pass_cred, url)
-        @test cred == expected_cred
-
-        # Shredding the UserPasswordCredential shouldn't result in information being lost
-        # inside of a GitCredential.
-        Base.shred!(user_pass_cred)
-        @test cred == expected_cred
-
-        Base.shred!(cred)
-        Base.shred!(expected_cred)
-    end
-end
-
+# Set HOME to control where the .gitconfig file may be found.
+# Note: In Cygwin environments `git` will use HOME instead of USERPROFILE.
+# Setting both environment variables ensures home was overridden.
 mktempdir() do dir
     dir = realpath(dir)
-    # test parameters
-    repo_url = "https://github.com/JuliaLang/Example.jl"
-    cache_repo = joinpath(dir, "Example")
-    test_repo = joinpath(dir, "Example.Test")
-    test_sig = LibGit2.Signature("TEST", "TEST@TEST.COM", round(time(); digits=0), 0)
-    test_dir = "testdir"
-    test_file = "$(test_dir)/testfile"
-    config_file = "testconfig"
-    commit_msg1 = randstring(10)
-    commit_msg2 = randstring(10)
-    commit_oid1 = LibGit2.GitHash()
-    commit_oid2 = LibGit2.GitHash()
-    commit_oid3 = LibGit2.GitHash()
-    default_branch = LibGit2.getconfig("init.defaultBranch", "master")
-    test_branch = "test_branch"
-    test_branch2 = "test_branch_two"
-    tag1 = "tag1"
-    tag2 = "tag2"
-
-    @testset "Configuration" begin
-        LibGit2.with(LibGit2.GitConfig(joinpath(dir, config_file), LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
-            @test_throws LibGit2.Error.GitError LibGit2.get(AbstractString, cfg, "tmp.str")
-            @test isempty(LibGit2.get(cfg, "tmp.str", "")) == true
-
-            LibGit2.set!(cfg, "tmp.str", "AAAA")
-            LibGit2.set!(cfg, "tmp.int32", Int32(1))
-            LibGit2.set!(cfg, "tmp.int64", Int64(1))
-            LibGit2.set!(cfg, "tmp.bool", true)
-
-            @test LibGit2.get(cfg, "tmp.str", "") == "AAAA"
-            @test LibGit2.get(cfg, "tmp.int32", Int32(0)) == Int32(1)
-            @test LibGit2.get(cfg, "tmp.int64", Int64(0)) == Int64(1)
-            @test LibGit2.get(cfg, "tmp.bool", false) == true
-
-            # Ordering of entries appears random when using `LibGit2.set!`
-            count = 0
-            for entry in LibGit2.GitConfigIter(cfg, r"tmp.*")
-                count += 1
-                name, value = unsafe_string(entry.name), unsafe_string(entry.value)
-                if name == "tmp.str"
-                    @test value == "AAAA"
-                elseif name == "tmp.int32"
-                    @test value == "1"
-                elseif name == "tmp.int64"
-                    @test value == "1"
-                elseif name == "tmp.bool"
-                    @test value == "true"
-                else
-                    error("Found unexpected entry: $name")
-                end
-                show_str = sprint(show, entry)
-                @test show_str == string("ConfigEntry(\"", name, "\", \"", value, "\")")
-            end
-            @test count == 4
-        end
-    end
-
-    @testset "Configuration Iteration" begin
-        config_path = joinpath(dir, config_file)
-
-        # Write config entries with duplicate names
-        open(config_path, "a") do fp
-            write(fp, """
-                [credential]
-                    helper = store
-                    username = julia
-                [credential]
-                    helper = cache
-                """)
-        end
-
-        LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
-            # Will only see the last entry
-            @test LibGit2.get(cfg, "credential.helper", "") == "cache"
-
-            count = 0
-            for entry in LibGit2.GitConfigIter(cfg, "credential.helper")
-                count += 1
-                name, value = unsafe_string(entry.name), unsafe_string(entry.value)
-                @test name == "credential.helper"
-                @test value == (count == 1 ? "store" : "cache")
-            end
-            @test count == 2
-        end
-    end
-
-    @testset "Initializing repository" begin
-        @testset "with remote branch" begin
-            LibGit2.with(LibGit2.init(cache_repo)) do repo
-                @test isdir(cache_repo)
-                @test LibGit2.path(repo) == LibGit2.posixpath(realpath(cache_repo))
-                @test isdir(joinpath(cache_repo, ".git"))
-                # set a remote branch
-                branch = "upstream"
-                LibGit2.GitRemote(repo, branch, repo_url) |> close
-
-                # test remote's representation in the repo's config
-                config = joinpath(cache_repo, ".git", "config")
-                lines = split(open(x->read(x, String), config, "r"), "\n")
-                @test any(map(x->x == "[remote \"upstream\"]", lines))
-
-                LibGit2.with(LibGit2.get(LibGit2.GitRemote, repo, branch)) do remote
-                    # test various remote properties
-                    @test LibGit2.url(remote) == repo_url
-                    @test LibGit2.push_url(remote) == ""
-                    @test LibGit2.name(remote) == "upstream"
-                    @test isa(remote, LibGit2.GitRemote)
-
-                    # test showing a GitRemote object
-                    @test sprint(show, remote) == "GitRemote:\nRemote name: upstream url: $repo_url"
-                end
-                # test setting and getting the remote's URL
-                @test LibGit2.isattached(repo)
-                LibGit2.set_remote_url(repo, "upstream", "unknown")
-                LibGit2.with(LibGit2.get(LibGit2.GitRemote, repo, branch)) do remote
-                    @test LibGit2.url(remote) == "unknown"
-                    @test LibGit2.push_url(remote) == "unknown"
-                    @test sprint(show, remote) == "GitRemote:\nRemote name: upstream url: unknown"
-                end
-                LibGit2.set_remote_url(cache_repo, "upstream", repo_url)
-                LibGit2.with(LibGit2.get(LibGit2.GitRemote, repo, branch)) do remote
-                    @test LibGit2.url(remote) == repo_url
-                    @test LibGit2.push_url(remote) == repo_url
-                    @test sprint(show, remote) == "GitRemote:\nRemote name: upstream url: $repo_url"
-                    LibGit2.add_fetch!(repo, remote, "upstream")
-
-                    # test setting fetch and push refspecs
-                    @test LibGit2.fetch_refspecs(remote) == String["+refs/heads/*:refs/remotes/upstream/*"]
-                    LibGit2.add_push!(repo, remote, "refs/heads/master")
-                end
-                LibGit2.with(LibGit2.get(LibGit2.GitRemote, repo, branch)) do remote
-                    @test LibGit2.push_refspecs(remote) == String["refs/heads/master"]
-                end
-                # constructor with a refspec
-                LibGit2.with(LibGit2.GitRemote(repo, "upstream2", repo_url, "upstream")) do remote
-                    @test sprint(show, remote) == "GitRemote:\nRemote name: upstream2 url: $repo_url"
-                    @test LibGit2.fetch_refspecs(remote) == String["upstream"]
-                end
-
-                LibGit2.with(LibGit2.GitRemoteAnon(repo, repo_url)) do remote
-                    @test LibGit2.url(remote) == repo_url
-                    @test LibGit2.push_url(remote) == ""
-                    @test LibGit2.name(remote) == ""
-                    @test isa(remote, LibGit2.GitRemote)
-                end
-            end
-        end
-
-        @testset "bare" begin
-            path = joinpath(dir, "Example.Bare")
-            LibGit2.with(LibGit2.init(path, true)) do repo
-                @test isdir(path)
-                @test LibGit2.path(repo) == LibGit2.posixpath(realpath(path))
-                @test isfile(joinpath(path, LibGit2.Consts.HEAD_FILE))
-                @test LibGit2.isattached(repo)
-            end
-
-            path = joinpath("garbagefakery", "Example.Bare")
-            try
-                LibGit2.GitRepo(path)
-                error("unexpected")
-            catch e
-                @test typeof(e) == LibGit2.GitError
-                @test startswith(
-                    lowercase(sprint(show, e)),
-                    lowercase("GitError(Code:ENOTFOUND, Class:OS, failed to resolve path"))
-            end
-            path = joinpath(dir, "Example.BareTwo")
-            LibGit2.with(LibGit2.init(path, true)) do repo
-                #just to see if this works
-                LibGit2.cleanup(repo)
-            end
-        end
-    end
-
-    @testset "Cloning repository" begin
-        function bare_repo_tests(repo, repo_path)
-            @test isdir(repo_path)
-            @test LibGit2.path(repo) == LibGit2.posixpath(realpath(repo_path))
-            @test isfile(joinpath(repo_path, LibGit2.Consts.HEAD_FILE))
-            @test LibGit2.isattached(repo)
-            @test LibGit2.remotes(repo) == ["origin"]
-        end
-        @testset "bare" begin
-            repo_path = joinpath(dir, "Example.Bare1")
-            LibGit2.with(LibGit2.clone(cache_repo, repo_path, isbare = true)) do repo
-                bare_repo_tests(repo, repo_path)
-            end
-        end
-        @testset "bare with remote callback" begin
-            repo_path = joinpath(dir, "Example.Bare2")
-            LibGit2.with(LibGit2.clone(cache_repo, repo_path, isbare = true, remote_cb = LibGit2.mirror_cb())) do repo
-                bare_repo_tests(repo, repo_path)
-                LibGit2.with(LibGit2.get(LibGit2.GitRemote, repo, "origin")) do rmt
-                    @test LibGit2.fetch_refspecs(rmt)[1] == "+refs/*:refs/*"
-                end
-            end
-        end
-        @testset "normal" begin
-            LibGit2.with(LibGit2.clone(cache_repo, test_repo)) do repo
-                @test isdir(test_repo)
-                @test LibGit2.path(repo) == LibGit2.posixpath(realpath(test_repo))
-                @test isdir(joinpath(test_repo, ".git"))
-                @test LibGit2.workdir(repo) == LibGit2.path(repo)*"/"
-                @test LibGit2.isattached(repo)
-                @test LibGit2.isorphan(repo)
-                repo_str = sprint(show, repo)
-                @test repo_str == "LibGit2.GitRepo($(sprint(show,LibGit2.path(repo))))"
-            end
-        end
-        @testset "credentials callback conflict" begin
-            callbacks = LibGit2.Callbacks(:credentials => (C_NULL, 0))
-            cred_payload = LibGit2.CredentialPayload()
-            @test_throws ArgumentError LibGit2.clone(cache_repo, test_repo, callbacks=callbacks, credentials=cred_payload)
-        end
-    end
-
-    @testset "Update cache repository" begin
-
-        @testset "with commits" begin
-            repo = LibGit2.GitRepo(cache_repo)
-            repo_dir = joinpath(cache_repo,test_dir)
-            mkdir(repo_dir)
-            repo_file = open(joinpath(cache_repo,test_file), "a")
-            try
-                # create commits
-                println(repo_file, commit_msg1)
-                flush(repo_file)
-                LibGit2.add!(repo, test_file)
-                @test LibGit2.iszero(commit_oid1)
-                commit_oid1 = LibGit2.commit(repo, commit_msg1; author=test_sig, committer=test_sig)
-                @test !LibGit2.iszero(commit_oid1)
-                @test LibGit2.GitHash(LibGit2.head(cache_repo)) == commit_oid1
-
-                println(repo_file, randstring(10))
-                flush(repo_file)
-                LibGit2.add!(repo, test_file)
-                commit_oid3 = LibGit2.commit(repo, randstring(10); author=test_sig, committer=test_sig)
-
-                println(repo_file, commit_msg2)
-                flush(repo_file)
-                LibGit2.add!(repo, test_file)
-                @test LibGit2.iszero(commit_oid2)
-                commit_oid2 = LibGit2.commit(repo, commit_msg2; author=test_sig, committer=test_sig)
-                @test !LibGit2.iszero(commit_oid2)
-
-                # test getting list of commit authors
-                auths = LibGit2.authors(repo)
-                @test length(auths) == 3
-                for auth in auths
-                    @test auth.name == test_sig.name
-                    @test auth.time == test_sig.time
-                    @test auth.email == test_sig.email
-                end
-
-                # check various commit properties - commit_oid1 happened before
-                # commit_oid2, so it *is* an ancestor of commit_oid2
-                @test LibGit2.is_ancestor_of(string(commit_oid1), string(commit_oid2), repo)
-                @test LibGit2.iscommit(string(commit_oid1), repo)
-                @test !LibGit2.iscommit(string(commit_oid1)*"fake", repo)
-                @test LibGit2.iscommit(string(commit_oid2), repo)
-
-                # lookup commits
-                LibGit2.with(LibGit2.GitCommit(repo, commit_oid1)) do cmt
-                    @test LibGit2.Consts.OBJECT(typeof(cmt)) == LibGit2.Consts.OBJ_COMMIT
-                    @test commit_oid1 == LibGit2.GitHash(cmt)
-                    short_oid1 = LibGit2.GitShortHash(string(commit_oid1))
-                    @test string(commit_oid1) == string(short_oid1)
-                    @test cmp(commit_oid1, short_oid1) == 0
-                    @test cmp(short_oid1, commit_oid1) == 0
-                    @test !(short_oid1 < commit_oid1)
-
-                    # test showing ShortHash
-                    short_str = sprint(show, short_oid1)
-                    @test short_str == "GitShortHash(\"$(string(short_oid1))\")"
-                    short_oid2 = LibGit2.GitShortHash(cmt)
-                    @test startswith(string(commit_oid1), string(short_oid2))
-
-                    LibGit2.with(LibGit2.GitCommit(repo, short_oid2)) do cmt2
-                        @test commit_oid1 == LibGit2.GitHash(cmt2)
-                    end
-                    # check that the author and committer signatures are correct
-                    auth = LibGit2.author(cmt)
-                    @test isa(auth, LibGit2.Signature)
-                    @test auth.name == test_sig.name
-                    @test auth.time == test_sig.time
-                    @test auth.email == test_sig.email
-                    short_auth = LibGit2.author(LibGit2.GitCommit(repo, short_oid1))
-                    @test short_auth.name == test_sig.name
-                    @test short_auth.time == test_sig.time
-                    @test short_auth.email == test_sig.email
-                    cmtr = LibGit2.committer(cmt)
-                    @test isa(cmtr, LibGit2.Signature)
-                    @test cmtr.name == test_sig.name
-                    @test cmtr.time == test_sig.time
-                    @test cmtr.email == test_sig.email
-                    @test LibGit2.message(cmt) == commit_msg1
-
-                    # test showing the commit
-                    showstr = split(sprint(show, cmt), "\n")
-                    # the time of the commit will vary so just test the first two parts
-                    @test occursin("Git Commit:", showstr[1])
-                    @test occursin("Commit Author: Name: TEST, Email: TEST@TEST.COM, Time:", showstr[2])
-                    @test occursin("Committer: Name: TEST, Email: TEST@TEST.COM, Time:", showstr[3])
-                    @test occursin("SHA:", showstr[4])
-                    @test showstr[5] == "Message:"
-                    @test showstr[6] == commit_msg1
-                    @test LibGit2.revcount(repo, string(commit_oid1), string(commit_oid3)) == (-1,0)
-
-                    blame = LibGit2.GitBlame(repo, test_file)
-                    @test LibGit2.counthunks(blame) == 3
-                    @test_throws BoundsError getindex(blame, LibGit2.counthunks(blame)+1)
-                    @test_throws BoundsError getindex(blame, 0)
-                    sig = LibGit2.Signature(blame[1].orig_signature)
-                    @test sig.name == cmtr.name
-                    @test sig.email == cmtr.email
-                    show_strs = split(sprint(show, blame[1]), "\n")
-                    @test show_strs[1] == "GitBlameHunk:"
-                    @test show_strs[2] == "Original path: $test_file"
-                    @test show_strs[3] == "Lines in hunk: 1"
-                    @test show_strs[4] == "Final commit oid: $commit_oid1"
-                    @test show_strs[6] == "Original commit oid: $commit_oid1"
-                    @test length(show_strs) == 7
-                end
-            finally
-                close(repo)
-                close(repo_file)
-            end
-        end
-
-        @testset "with branch" begin
-            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
-                brnch = LibGit2.branch(repo)
-                LibGit2.with(LibGit2.head(repo)) do brref
-                    # various branch properties
-                    @test LibGit2.isbranch(brref)
-                    @test !LibGit2.isremote(brref)
-                    @test LibGit2.name(brref) == "refs/heads/$(default_branch)"
-                    @test LibGit2.shortname(brref) == default_branch
-                    @test LibGit2.ishead(brref)
-                    @test LibGit2.upstream(brref) === nothing
-
-                    # showing the GitReference to this branch
-                    show_strs = split(sprint(show, brref), "\n")
-                    @test show_strs[1] == "GitReference:"
-                    @test show_strs[2] == "Branch with name refs/heads/$(default_branch)"
-                    @test show_strs[3] == "Branch is HEAD."
-                    @test repo.ptr == LibGit2.repository(brref).ptr
-                    @test brnch == default_branch
-                    @test LibGit2.headname(repo) == default_branch
-
-                    # create a branch *without* setting its tip as HEAD
-                    LibGit2.branch!(repo, test_branch, string(commit_oid1), set_head=false)
-                    # null because we are looking for a REMOTE branch
-                    @test LibGit2.lookup_branch(repo, test_branch, true) === nothing
-                    # not nothing because we are now looking for a LOCAL branch
-                    LibGit2.with(LibGit2.lookup_branch(repo, test_branch, false)) do tbref
-                        @test LibGit2.shortname(tbref) == test_branch
-                        @test LibGit2.upstream(tbref) === nothing
-                    end
-                    @test LibGit2.lookup_branch(repo, test_branch2, true) === nothing
-                    # test deleting the branch
-                    LibGit2.branch!(repo, test_branch2; set_head=false)
-                    LibGit2.with(LibGit2.lookup_branch(repo, test_branch2, false)) do tbref
-                        @test LibGit2.shortname(tbref) == test_branch2
-                        LibGit2.delete_branch(tbref)
-                        @test LibGit2.lookup_branch(repo, test_branch2, true) === nothing
-                    end
-                end
-                branches = map(b->LibGit2.shortname(b[1]), LibGit2.GitBranchIter(repo))
-                @test default_branch in branches
-                @test test_branch in branches
-            end
-        end
-
-        @testset "with default configuration" begin
-            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
-                try
-                    LibGit2.Signature(repo)
-                catch ex
-                    # these test configure repo with new signature
-                    # in case when global one does not exsist
-                    @test isa(ex, LibGit2.Error.GitError) == true
-
-                    cfg = LibGit2.GitConfig(repo)
-                    LibGit2.set!(cfg, "user.name", "AAAA")
-                    LibGit2.set!(cfg, "user.email", "BBBB@BBBB.COM")
-                    sig = LibGit2.Signature(repo)
-                    @test sig.name == "AAAA"
-                    @test sig.email == "BBBB@BBBB.COM"
-                    @test LibGit2.getconfig(repo, "user.name", "") == "AAAA"
-                    @test LibGit2.getconfig(cache_repo, "user.name", "") == "AAAA"
-                end
-            end
-        end
-
-        @testset "with tags" begin
-            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
-                tags = LibGit2.tag_list(repo)
-                @test length(tags) == 0
-
-                # create tag and extract it from a GitReference
-                tag_oid1 = LibGit2.tag_create(repo, tag1, commit_oid1, sig=test_sig)
-                @test !LibGit2.iszero(tag_oid1)
-                tags = LibGit2.tag_list(repo)
-                @test length(tags) == 1
-                @test tag1 in tags
-                tag1ref = LibGit2.GitReference(repo, "refs/tags/$tag1")
-                # because this is a reference to an OID
-                @test isempty(LibGit2.fullname(tag1ref))
-
-                # test showing a GitReference to a GitTag, and the GitTag itself
-                show_strs = split(sprint(show, tag1ref), "\n")
-                @test show_strs[1] == "GitReference:"
-                @test show_strs[2] == "Tag with name refs/tags/$tag1"
-                tag1tag = LibGit2.peel(LibGit2.GitTag, tag1ref)
-                @test LibGit2.name(tag1tag) == tag1
-                @test LibGit2.target(tag1tag) == commit_oid1
-                @test sprint(show, tag1tag) == "GitTag:\nTag name: $tag1 target: $commit_oid1"
-                # peels to the commit the tag points to
-                tag1cmt = LibGit2.peel(tag1ref)
-                @test LibGit2.GitHash(tag1cmt) == commit_oid1
-                tag_oid2 = LibGit2.tag_create(repo, tag2, commit_oid2)
-                @test !LibGit2.iszero(tag_oid2)
-                tags = LibGit2.tag_list(repo)
-                @test length(tags) == 2
-                @test tag2 in tags
-
-                refs = LibGit2.ref_list(repo)
-                @test refs == ["refs/heads/$(default_branch)", "refs/heads/test_branch", "refs/tags/tag1", "refs/tags/tag2"]
-                # test deleting a tag
-                LibGit2.tag_delete(repo, tag1)
-                tags = LibGit2.tag_list(repo)
-                @test length(tags) == 1
-                @test tag2 ∈ tags
-                @test tag1 ∉ tags
-
-                # test git describe functions applied to these GitTags
-                description = LibGit2.GitDescribeResult(repo)
-                fmtted_description = LibGit2.format(description)
-                @test sprint(show, description) == "GitDescribeResult:\n$fmtted_description\n"
-                @test fmtted_description == "tag2"
-                description = LibGit2.GitDescribeResult(LibGit2.GitObject(repo, "HEAD"))
-                fmtted_description = LibGit2.format(description)
-                @test sprint(show, description) == "GitDescribeResult:\n$fmtted_description\n"
-                @test fmtted_description == "tag2"
-            end
-        end
-
-        @testset "status" begin
-            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
-                status = LibGit2.GitStatus(repo)
-                @test length(status) == 0
-                @test_throws BoundsError status[1]
-                repo_file = open(joinpath(cache_repo,"statusfile"), "a")
-
-                # create commits
-                println(repo_file, commit_msg1)
-                flush(repo_file)
-                LibGit2.add!(repo, test_file)
-                status = LibGit2.GitStatus(repo)
-                @test length(status) != 0
-                @test_throws BoundsError status[0]
-                @test_throws BoundsError status[length(status)+1]
-                # we've added a file - show that it is new
-                @test status[1].status == LibGit2.Consts.STATUS_WT_NEW
-                close(repo_file)
-            end
-        end
-
-        @testset "blobs" begin
-            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
-                # this is slightly dubious, as it assumes the object has not been packed
-                # could be replaced by another binary format
-                hash_string = string(commit_oid1)
-                blob_file   = joinpath(cache_repo,".git/objects", hash_string[1:2], hash_string[3:end])
-
-                id = LibGit2.addblob!(repo, blob_file)
-                blob = LibGit2.GitBlob(repo, id)
-                @test LibGit2.isbinary(blob)
-                len1 = length(blob)
-
-                # test showing a GitBlob
-                blob_show_strs = split(sprint(show, blob), "\n")
-                @test blob_show_strs[1] == "GitBlob:"
-                @test occursin("Blob id:", blob_show_strs[2])
-                @test blob_show_strs[3] == "Contents are binary."
-
-                blob2 = LibGit2.GitBlob(repo, LibGit2.GitHash(blob))
-                @test LibGit2.isbinary(blob2)
-                @test length(blob2) == len1
-                @test blob  == blob2
-                @test blob !== blob2
-            end
-        end
-        @testset "trees" begin
-            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
-                @test_throws LibGit2.Error.GitError LibGit2.GitTree(repo, "HEAD")
-                tree = LibGit2.GitTree(repo, "HEAD^{tree}")
-                @test isa(tree, LibGit2.GitTree)
-                @test isa(LibGit2.GitObject(repo, "HEAD^{tree}"), LibGit2.GitTree)
-                @test LibGit2.Consts.OBJECT(typeof(tree)) == LibGit2.Consts.OBJ_TREE
-                @test LibGit2.count(tree) == 1
-
-                # test showing the GitTree and its entries
-                tree_str = sprint(show, tree)
-                @test tree_str == "GitTree:\nOwner: $(LibGit2.repository(tree))\nNumber of entries: 1\n"
-                @test_throws BoundsError tree[0]
-                @test_throws BoundsError tree[2]
-                tree_entry = tree[1]
-                subtree = LibGit2.GitTree(tree_entry)
-                @test_throws BoundsError subtree[0]
-                @test_throws BoundsError subtree[2]
-                tree_entry = subtree[1]
-                @test LibGit2.filemode(tree_entry) == 33188
-                te_str = sprint(show, tree_entry)
-                ref_te_str = "GitTreeEntry:\nEntry name: testfile\nEntry type: LibGit2.GitBlob\nEntry OID: "
-                ref_te_str *= "$(LibGit2.entryid(tree_entry))\n"
-                @test te_str == ref_te_str
-                blob = LibGit2.GitBlob(tree_entry)
-                blob_str = sprint(show, blob)
-                @test blob_str == "GitBlob:\nBlob id: $(LibGit2.GitHash(blob))\nContents:\n$(LibGit2.content(blob))\n"
-
-                # tests for walking the tree and accessing objects
-                @test tree[""] == tree
-                @test tree["/"] == tree
-                @test isa(tree[test_dir], LibGit2.GitTree)
-                @test tree["$test_dir/"] == tree[test_dir]
-                @test isa(tree[test_file], LibGit2.GitBlob)
-                @test_throws KeyError tree["nonexistent"]
-
-                # test workaround for git_tree_walk issue
-                # https://github.com/libgit2/libgit2/issues/4693
-                ccall((:giterr_set_str, :libgit2), Cvoid, (Cint, Cstring),
-                      Cint(LibGit2.Error.Invalid), "previous error")
-                try
-                    # file needs to exist in tree in order to trigger the stop walk condition
-                    tree[test_file]
-                catch err
-                    if isa(err, LibGit2.Error.GitError) && err.class == LibGit2.Error.Invalid
-                        @test false
-                    else
-                        rethrow()
-                    end
-                end
-            end
-        end
-
-        @testset "diff" begin
-            LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
-                @test !LibGit2.isdirty(repo)
-                @test !LibGit2.isdirty(repo, test_file)
-                @test !LibGit2.isdirty(repo, "nonexistent")
-                @test !LibGit2.isdiff(repo, "HEAD")
-                @test !LibGit2.isdirty(repo, cached=true)
-                @test !LibGit2.isdirty(repo, test_file, cached=true)
-                @test !LibGit2.isdirty(repo, "nonexistent", cached=true)
-                @test !LibGit2.isdiff(repo, "HEAD", cached=true)
-                open(joinpath(cache_repo,test_file), "a") do f
-                    println(f, "zzzz")
-                end
-                @test LibGit2.isdirty(repo)
-                @test LibGit2.isdirty(repo, test_file)
-                @test !LibGit2.isdirty(repo, "nonexistent")
-                @test LibGit2.isdiff(repo, "HEAD")
-                @test !LibGit2.isdirty(repo, cached=true)
-                @test !LibGit2.isdiff(repo, "HEAD", cached=true)
-                LibGit2.add!(repo, test_file)
-                @test LibGit2.isdirty(repo)
-                @test LibGit2.isdiff(repo, "HEAD")
-                @test LibGit2.isdirty(repo, cached=true)
-                @test LibGit2.isdiff(repo, "HEAD", cached=true)
-                tree = LibGit2.GitTree(repo, "HEAD^{tree}")
-
-                # test properties of the diff_tree
-                diff = LibGit2.diff_tree(repo, tree, "", cached=true)
-                @test LibGit2.count(diff) == 1
-                @test_throws BoundsError diff[0]
-                @test_throws BoundsError diff[2]
-                @test LibGit2.Consts.DELTA_STATUS(diff[1].status) == LibGit2.Consts.DELTA_MODIFIED
-                @test diff[1].nfiles == 2
-
-                # test showing a DiffDelta
-                diff_strs = split(sprint(show, diff[1]), '\n')
-                @test diff_strs[1] == "DiffDelta:"
-                @test diff_strs[2] == "Status: DELTA_MODIFIED"
-                @test diff_strs[3] == "Number of files: 2"
-                @test diff_strs[4] == "Old file:"
-                @test diff_strs[5] == "DiffFile:"
-                @test occursin("Oid:", diff_strs[6])
-                @test occursin("Path:", diff_strs[7])
-                @test occursin("Size:", diff_strs[8])
-                @test isempty(diff_strs[9])
-                @test diff_strs[10] == "New file:"
-
-                # test showing a GitDiff
-                diff_strs = split(sprint(show, diff), '\n')
-                @test diff_strs[1] == "GitDiff:"
-                @test diff_strs[2] == "Number of deltas: 1"
-                @test diff_strs[3] == "GitDiffStats:"
-                @test diff_strs[4] == "Files changed: 1"
-                @test diff_strs[5] == "Insertions: 1"
-                @test diff_strs[6] == "Deletions: 0"
-
-                LibGit2.commit(repo, "zzz")
-                @test !LibGit2.isdirty(repo)
-                @test !LibGit2.isdiff(repo, "HEAD")
-                @test !LibGit2.isdirty(repo, cached=true)
-                @test !LibGit2.isdiff(repo, "HEAD", cached=true)
-            end
-        end
-    end
-
-    function setup_clone_repo(cache_repo::AbstractString, path::AbstractString; name="AAAA", email="BBBB@BBBB.COM")
-        repo = LibGit2.clone(cache_repo, path)
-        # need to set this for merges to succeed
-        cfg = LibGit2.GitConfig(repo)
-        LibGit2.set!(cfg, "user.name", name)
-        LibGit2.set!(cfg, "user.email", email)
-        return repo
-    end
-    # TO DO: add more tests for various merge
-    # preference options
-    function add_and_commit_file(repo, filenm, filecontent)
-        open(joinpath(LibGit2.path(repo), filenm),"w") do f
-            write(f, filecontent)
-        end
-        LibGit2.add!(repo, filenm)
-        return LibGit2.commit(repo, "add $filenm")
-    end
-    @testset "Fastforward merges" begin
-        LibGit2.with(setup_clone_repo(cache_repo, joinpath(dir, "Example.FF"))) do repo
-            # Sets up a branch "branch/ff_a" which will be two commits ahead
-            # of "master". It's possible to fast-forward merge "branch/ff_a"
-            # into "master", which is the default behavior.
-            oldhead = LibGit2.head_oid(repo)
-            LibGit2.branch!(repo, "branch/ff_a")
-            add_and_commit_file(repo, "ff_file1", "111\n")
-            add_and_commit_file(repo, "ff_file2", "222\n")
-            LibGit2.branch!(repo, "master")
-            # switch back, now try to ff-merge the changes
-            # from branch/a
-            # set up the merge using GitAnnotated objects
-            upst_ann = LibGit2.GitAnnotated(repo, "branch/ff_a")
-            head_ann = LibGit2.GitAnnotated(repo, "master")
-
-            # ff merge them
-            @test LibGit2.merge!(repo, [upst_ann], true)
-            @test LibGit2.is_ancestor_of(string(oldhead), string(LibGit2.head_oid(repo)), repo)
-
-            # Repeat the process, but specifying a commit to merge in as opposed
-            # to a branch name or GitAnnotated.
-            oldhead = LibGit2.head_oid(repo)
-            LibGit2.branch!(repo, "branch/ff_b")
-            add_and_commit_file(repo, "ff_file3", "333\n")
-            branchhead = add_and_commit_file(repo, "ff_file4", "444\n")
-            LibGit2.branch!(repo, "master")
-            # switch back, now try to ff-merge the changes
-            # from branch/a using committish
-            @test LibGit2.merge!(repo, committish=string(branchhead))
-            @test LibGit2.is_ancestor_of(string(oldhead), string(LibGit2.head_oid(repo)), repo)
-
-            # Repeat the process, but specifying a branch name to merge in as opposed
-            # to a commit or GitAnnotated.
-            oldhead = LibGit2.head_oid(repo)
-            LibGit2.branch!(repo, "branch/ff_c")
-            add_and_commit_file(repo, "ff_file5", "555\n")
-            branchhead = add_and_commit_file(repo, "ff_file6", "666\n")
-            LibGit2.branch!(repo, "master")
-            # switch back, now try to ff-merge the changes
-            # from branch/ff_c using branch name
-            @test LibGit2.merge!(repo, branch="refs/heads/branch/ff_c")
-            @test LibGit2.is_ancestor_of(string(oldhead), string(LibGit2.head_oid(repo)), repo)
-
-            LibGit2.branch!(repo, "branch/ff_d")
-            branchhead = add_and_commit_file(repo, "ff_file7", "777\n")
-            LibGit2.branch!(repo, "master")
-            # switch back, now try to ff-merge the changes
-            # from branch/a
-            # set up the merge using GitAnnotated objects
-            # from a fetchhead
-            fh = LibGit2.fetchheads(repo)
-            upst_ann = LibGit2.GitAnnotated(repo, fh[1])
-            @test LibGit2.merge!(repo, [upst_ann], true)
-            @test LibGit2.is_ancestor_of(string(oldhead), string(LibGit2.head_oid(repo)), repo)
-        end
-    end
-
-    @testset "Cherrypick" begin
-        LibGit2.with(setup_clone_repo(cache_repo, joinpath(dir, "Example.Cherrypick"))) do repo
-            # Create a commit on the new branch and cherry-pick it over to
-            # master. Since the cherry-pick does *not* make a new commit on
-            # master, we have to create our own commit of the dirty state.
-            oldhead = LibGit2.head_oid(repo)
-            LibGit2.branch!(repo, "branch/cherry_a")
-            cmt_oid = add_and_commit_file(repo, "file1", "111\n")
-            cmt = LibGit2.GitCommit(repo, cmt_oid)
-            # switch back, try to cherrypick
-            # from branch/cherry_a
-            LibGit2.branch!(repo, "master")
-            LibGit2.cherrypick(repo, cmt, options=LibGit2.CherrypickOptions())
-            cmt_oid2 = LibGit2.commit(repo, "add file1")
-            @test isempty(LibGit2.diff_files(repo, "master", "branch/cherry_a"))
-        end
-    end
-
-    @testset "Merges" begin
-        LibGit2.with(setup_clone_repo(cache_repo, joinpath(dir, "Example.Merge"))) do repo
-            oldhead = LibGit2.head_oid(repo)
-            LibGit2.branch!(repo, "branch/merge_a")
-            add_and_commit_file(repo, "file1", "111\n")
-            # switch back, add a commit, try to merge
-            # from branch/merge_a
-            LibGit2.branch!(repo, default_branch)
-
-            # test for showing a Reference to a non-HEAD branch
-            brref = LibGit2.GitReference(repo, "refs/heads/branch/merge_a")
-            @test LibGit2.name(brref) == "refs/heads/branch/merge_a"
-            @test !LibGit2.ishead(brref)
-            show_strs = split(sprint(show, brref), "\n")
-            @test show_strs[1] == "GitReference:"
-            @test show_strs[2] == "Branch with name refs/heads/branch/merge_a"
-            @test show_strs[3] == "Branch is not HEAD."
-
-            add_and_commit_file(repo, "file2", "222\n")
-            upst_ann = LibGit2.GitAnnotated(repo, "branch/merge_a")
-            head_ann = LibGit2.GitAnnotated(repo, default_branch)
-
-            # (fail to) merge them because we can't fastforward
-            @test_logs (:warn,"Cannot perform fast-forward merge") !LibGit2.merge!(repo, [upst_ann], true)
-            # merge them now that we allow non-ff
-            @test_logs (:info,"Review and commit merged changes") LibGit2.merge!(repo, [upst_ann], false)
-            @test LibGit2.is_ancestor_of(string(oldhead), string(LibGit2.head_oid(repo)), repo)
-
-            # go back to merge_a and rename a file
-            LibGit2.branch!(repo, "branch/merge_b")
-            mv(joinpath(LibGit2.path(repo),"file1"),joinpath(LibGit2.path(repo),"mvfile1"))
-            LibGit2.add!(repo, "mvfile1")
-            LibGit2.commit(repo, "move file1")
-            LibGit2.branch!(repo, default_branch)
-            upst_ann = LibGit2.GitAnnotated(repo, "branch/merge_b")
-            rename_flag = Cint(0)
-            rename_flag = LibGit2.toggle(rename_flag, Cint(0)) # turns on the find renames opt
-            mos = LibGit2.MergeOptions(flags=rename_flag)
-            @test_logs (:info,"Review and commit merged changes") LibGit2.merge!(repo, [upst_ann], merge_opts=mos)
-        end
-    end
-
-    @testset "push" begin
-        up_path = joinpath(dir, "Example.PushUp")
-        up_repo = setup_clone_repo(cache_repo, up_path)
-        our_repo = setup_clone_repo(cache_repo, joinpath(dir, "Example.Push"))
-        try
-            add_and_commit_file(our_repo, "file1", "111\n")
-            if LibGit2.version() >= v"0.26.0" # See #21872, #21639 and #21597
-                # we cannot yet locally push to non-bare repos
-                @test_throws LibGit2.GitError LibGit2.push(our_repo, remoteurl=up_path)
-            end
-        finally
-            close(our_repo)
-            close(up_repo)
-        end
-
-        @testset "credentials callback conflict" begin
-            callbacks = LibGit2.Callbacks(:credentials => (C_NULL, 0))
-            cred_payload = LibGit2.CredentialPayload()
-
-            LibGit2.with(LibGit2.GitRepo(joinpath(dir, "Example.Push"))) do repo
-                @test_throws ArgumentError LibGit2.push(repo, callbacks=callbacks, credentials=cred_payload)
-            end
-        end
-    end
-
-    @testset "Show closed repo" begin
-        # Make sure this doesn't crash
-        buf = IOBuffer()
-        Base.show(buf, LibGit2.with(identity, LibGit2.GitRepo(test_repo)))
-        @test String(take!(buf)) == "LibGit2.GitRepo(<closed>)"
-    end
-
-    @testset "Fetch from cache repository" begin
-        LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
-            # fetch changes
-            @test LibGit2.fetch(repo) == 0
-            @test !isfile(joinpath(test_repo, test_file))
-
-            # ff merge them
-            @test LibGit2.merge!(repo, fastforward=true)
-
-            # because there was not any file we need to reset branch
-            head_oid = LibGit2.head_oid(repo)
-            new_head = LibGit2.reset!(repo, head_oid, LibGit2.Consts.RESET_HARD)
-            @test isfile(joinpath(test_repo, test_file))
-            @test new_head == head_oid
-
-            # GitAnnotated for a fetchhead
-            fh_ann = LibGit2.GitAnnotated(repo, LibGit2.Consts.FETCH_HEAD)
-            @test LibGit2.GitHash(fh_ann) == head_oid
-
-            # Detach HEAD - no merge
-            LibGit2.checkout!(repo, string(commit_oid3))
-            @test_throws LibGit2.Error.GitError LibGit2.merge!(repo, fastforward=true)
-
-            # Switch to a branch without remote - no merge
-            LibGit2.branch!(repo, test_branch)
-            @test_throws LibGit2.Error.GitError LibGit2.merge!(repo, fastforward=true)
-
-            # Set the username and email for the test_repo (needed for rebase)
-            cfg = LibGit2.GitConfig(repo)
-            LibGit2.set!(cfg, "user.name", "AAAA")
-            LibGit2.set!(cfg, "user.email", "BBBB@BBBB.COM")
-
-            # If upstream argument is empty, libgit2 will look for tracking
-            # information. If the current branch isn't tracking any upstream
-            # the rebase should fail.
-            @test_throws LibGit2.GitError LibGit2.rebase!(repo)
-            # Try rebasing on master instead
-            newhead = LibGit2.rebase!(repo, default_branch)
-            @test newhead == head_oid
-
-            # Switch to the master branch
-            LibGit2.branch!(repo, default_branch)
-
-            fetch_heads = LibGit2.fetchheads(repo)
-            @test fetch_heads[1].name == "refs/heads/$(default_branch)"
-            @test fetch_heads[1].ismerge == true # we just merged master
-            @test fetch_heads[2].name == "refs/heads/test_branch"
-            @test fetch_heads[2].ismerge == false
-            @test fetch_heads[3].name == "refs/tags/tag2"
-            @test fetch_heads[3].ismerge == false
-            for fh in fetch_heads
-                @test fh.url == cache_repo
-                fh_strs = split(sprint(show, fh), '\n')
-                @test fh_strs[1] == "FetchHead:"
-                @test fh_strs[2] == "Name: $(fh.name)"
-                @test fh_strs[3] == "URL: $(fh.url)"
-                @test fh_strs[5] == "Merged: $(fh.ismerge)"
-            end
-        end
-
-        @testset "credentials callback conflict" begin
-            callbacks = LibGit2.Callbacks(:credentials => (C_NULL, 0))
-            cred_payload = LibGit2.CredentialPayload()
-
-            LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
-                @test_throws ArgumentError LibGit2.fetch(repo, callbacks=callbacks, credentials=cred_payload)
-            end
-        end
-    end
-
-    @testset "Examine test repository" begin
-        @testset "files" begin
-            @test readlines(joinpath(test_repo, test_file)) == readlines(joinpath(cache_repo, test_file))
-        end
-
-        @testset "tags & branches" begin
-            LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
-                # all tag in place
-                tags = LibGit2.tag_list(repo)
-                @test length(tags) == 1
-                @test tag2 in tags
-
-                # all tag in place
-                branches = map(b->LibGit2.shortname(b[1]), LibGit2.GitBranchIter(repo))
-                @test default_branch in branches
-                @test test_branch in branches
-
-                # issue #16337
-                LibGit2.with(LibGit2.GitReference(repo, "refs/tags/$tag2")) do tag2ref
-                    @test_throws LibGit2.Error.GitError LibGit2.upstream(tag2ref)
-                end
-            end
-        end
-
-        @testset "commits with revwalk" begin
-            repo = LibGit2.GitRepo(test_repo)
-            cache = LibGit2.GitRepo(cache_repo)
-            try
-                # test map with oid
-                oids = LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
-                    LibGit2.map((oid,repo)->(oid,repo), walker, oid=commit_oid1, by=LibGit2.Consts.SORT_TIME)
-                end
-                @test length(oids) == 1
-                # test map with range
-                str_1 = string(commit_oid1)
-                str_3 = string(commit_oid3)
-                oids = LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
-                    LibGit2.map((oid,repo)->(oid,repo), walker, range="$str_1..$str_3", by=LibGit2.Consts.SORT_TIME)
-                end
-                @test length(oids) == 1
-
-                test_oids = LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
-                    LibGit2.map((oid,repo)->string(oid), walker, by = LibGit2.Consts.SORT_TIME)
-                end
-                cache_oids = LibGit2.with(LibGit2.GitRevWalker(cache)) do walker
-                    LibGit2.map((oid,repo)->string(oid), walker, by = LibGit2.Consts.SORT_TIME)
-                end
-                for i in eachindex(oids)
-                    @test cache_oids[i] == test_oids[i]
-                end
-                # test with specified oid
-                LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
-                    @test LibGit2.count((oid,repo)->(oid == commit_oid1), walker, oid=commit_oid1, by=LibGit2.Consts.SORT_TIME) == 1
-                end
-                # test without specified oid
-                LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
-                    @test LibGit2.count((oid,repo)->(oid == commit_oid1), walker, by=LibGit2.Consts.SORT_TIME) == 1
-                end
-            finally
-                close(repo)
-                close(cache)
-            end
-        end
-    end
-
-    @testset "Modify and reset repository" begin
-        LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
-            # check index for file
-            LibGit2.with(LibGit2.GitIndex(repo)) do idx
-                i = findall(test_file, idx)
-                @test i !== nothing
-                idx_entry = idx[i]
-                @test idx_entry !== nothing
-                idx_entry_str = sprint(show, idx_entry)
-                @test idx_entry_str == "IndexEntry($(string(idx_entry.id)))"
-                @test LibGit2.stage(idx_entry) == 0
-
-                i = findall("zzz", idx)
-                @test i === nothing
-                idx_str = sprint(show, idx)
-                @test idx_str == "GitIndex:\nRepository: $(LibGit2.repository(idx))\nNumber of elements: 1\n"
-
-                LibGit2.remove!(repo, test_file)
-                LibGit2.read!(repo)
-                @test LibGit2.count(idx) == 0
-                LibGit2.add!(repo, test_file)
-                LibGit2.update!(repo, test_file)
-                @test LibGit2.count(idx) == 1
-            end
-
-            # check non-existent file status
-            st = LibGit2.status(repo, "XYZ")
-            @test st === nothing
-
-            # check file status
-            st = LibGit2.status(repo, test_file)
-            @test st !== nothing
-            @test LibGit2.isset(st, LibGit2.Consts.STATUS_CURRENT)
-
-            # modify file
-            open(joinpath(test_repo, test_file), "a") do io
-                write(io, 0x41)
-            end
-
-            # file modified but not staged
-            st_mod = LibGit2.status(repo, test_file)
-            @test !LibGit2.isset(st_mod, LibGit2.Consts.STATUS_INDEX_MODIFIED)
-            @test LibGit2.isset(st_mod, LibGit2.Consts.STATUS_WT_MODIFIED)
-
-            # stage file
-            LibGit2.add!(repo, test_file)
-
-            # modified file staged
-            st_stg = LibGit2.status(repo, test_file)
-            @test LibGit2.isset(st_stg, LibGit2.Consts.STATUS_INDEX_MODIFIED)
-            @test !LibGit2.isset(st_stg, LibGit2.Consts.STATUS_WT_MODIFIED)
-
-            # try to unstage to unknown commit
-            @test_throws LibGit2.Error.GitError LibGit2.reset!(repo, "XYZ", test_file)
-
-            # status should not change
-            st_new = LibGit2.status(repo, test_file)
-            @test st_new == st_stg
-
-            # try to unstage to HEAD
-            new_head = LibGit2.reset!(repo, LibGit2.Consts.HEAD_FILE, test_file)
-            st_uns = LibGit2.status(repo, test_file)
-            @test st_uns == st_mod
-
-            # reset repo
-            @test_throws LibGit2.Error.GitError LibGit2.reset!(repo, LibGit2.GitHash(), LibGit2.Consts.RESET_HARD)
-
-            new_head = LibGit2.reset!(repo, LibGit2.head_oid(repo), LibGit2.Consts.RESET_HARD)
-            open(joinpath(test_repo, test_file), "r") do io
-                @test read(io)[end] != 0x41
-            end
-        end
+    withenv("HOME" => dir, "USERPROFILE" => dir) do
+        include("libgit2-tests.jl")
     end
-
-    @testset "Modify remote" begin
-        path = test_repo
-        LibGit2.with(LibGit2.GitRepo(path)) do repo
-            remote_name = "test"
-            url = "https://test.com/repo"
-
-            @test LibGit2.lookup_remote(repo, remote_name) === nothing
-
-            for r in (repo, path)
-                # Set just the fetch URL
-                LibGit2.set_remote_fetch_url(r, remote_name, url)
-                remote = LibGit2.lookup_remote(repo, remote_name)
-                @test LibGit2.name(remote) == remote_name
-                @test LibGit2.url(remote) == url
-                @test LibGit2.push_url(remote) == ""
-
-                LibGit2.remote_delete(repo, remote_name)
-                @test LibGit2.lookup_remote(repo, remote_name) === nothing
-
-                # Set just the push URL
-                LibGit2.set_remote_push_url(r, remote_name, url)
-                remote = LibGit2.lookup_remote(repo, remote_name)
-                @test LibGit2.name(remote) == remote_name
-                @test LibGit2.url(remote) == ""
-                @test LibGit2.push_url(remote) == url
-
-                LibGit2.remote_delete(repo, remote_name)
-                @test LibGit2.lookup_remote(repo, remote_name) === nothing
-
-                # Set the fetch and push URL
-                LibGit2.set_remote_url(r, remote_name, url)
-                remote = LibGit2.lookup_remote(repo, remote_name)
-                @test LibGit2.name(remote) == remote_name
-                @test LibGit2.url(remote) ==  url
-                @test LibGit2.push_url(remote) == url
-
-                LibGit2.remote_delete(repo, remote_name)
-                @test LibGit2.lookup_remote(repo, remote_name) === nothing
-            end
-            # Invalid remote name
-            @test_throws LibGit2.GitError LibGit2.set_remote_url(repo, "", url)
-            @test_throws LibGit2.GitError LibGit2.set_remote_url(repo, remote_name, "")
-        end
-    end
-
-    @testset "rebase" begin
-        LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
-            LibGit2.branch!(repo, "branch/a")
-
-            oldhead = LibGit2.head_oid(repo)
-            add_and_commit_file(repo, "file1", "111\n")
-            add_and_commit_file(repo, "file2", "222\n")
-            LibGit2.branch!(repo, "branch/b")
-
-            # squash last 2 commits
-            new_head = LibGit2.reset!(repo, oldhead, LibGit2.Consts.RESET_SOFT)
-            @test new_head == oldhead
-            LibGit2.commit(repo, "squash file1 and file2")
-
-            # add another file
-            newhead = add_and_commit_file(repo, "file3", "333\n")
-            @test LibGit2.diff_files(repo, "branch/a", "branch/b", filter=Set([LibGit2.Consts.DELTA_ADDED])) == ["file3"]
-            @test LibGit2.diff_files(repo, "branch/a", "branch/b", filter=Set([LibGit2.Consts.DELTA_MODIFIED])) == []
-            # switch back and rebase
-            LibGit2.branch!(repo, "branch/a")
-            newnewhead = LibGit2.rebase!(repo, "branch/b")
-
-            # issue #19624
-            @test newnewhead == newhead
-
-            # add yet another file
-            add_and_commit_file(repo, "file4", "444\n")
-            # rebase with onto
-            newhead = LibGit2.rebase!(repo, "branch/a", default_branch)
-
-            newerhead = LibGit2.head_oid(repo)
-            @test newerhead == newhead
-
-            # add yet more files
-            add_and_commit_file(repo, "file5", "555\n")
-            pre_abort_head = add_and_commit_file(repo, "file6", "666\n")
-            # Rebase type
-            head_ann = LibGit2.GitAnnotated(repo, "branch/a")
-            upst_ann = LibGit2.GitAnnotated(repo, default_branch)
-            rb = LibGit2.GitRebase(repo, head_ann, upst_ann)
-            @test_throws BoundsError rb[3]
-            @test_throws BoundsError rb[0]
-            rbo, _ = iterate(rb)
-            rbo_str = sprint(show, rbo)
-            @test rbo_str == "RebaseOperation($(string(rbo.id)))\nOperation type: REBASE_OPERATION_PICK\n"
-            rb_str = sprint(show, rb)
-            @test rb_str == "GitRebase:\nNumber: 2\nCurrently performing operation: 1\n"
-            rbo = rb[2]
-            rbo_str = sprint(show, rbo)
-            @test rbo_str == "RebaseOperation($(string(rbo.id)))\nOperation type: REBASE_OPERATION_PICK\n"
-
-            # test rebase abort
-            LibGit2.abort(rb)
-            @test LibGit2.head_oid(repo) == pre_abort_head
-        end
-    end
-
-    @testset "merge" begin
-        LibGit2.with(setup_clone_repo(cache_repo, joinpath(dir, "Example.simple_merge"))) do repo
-            LibGit2.branch!(repo, "branch/merge_a")
-
-            a_head = LibGit2.head_oid(repo)
-            add_and_commit_file(repo, "merge_file1", "111\n")
-            LibGit2.branch!(repo, default_branch)
-            a_head_ann = LibGit2.GitAnnotated(repo, "branch/merge_a")
-            # merge returns true if successful
-            @test_logs (:info,"Review and commit merged changes") LibGit2.merge!(repo, [a_head_ann])
-        end
-    end
-
-    @testset "Transact test repository" begin
-        LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
-            cp(joinpath(test_repo, test_file), joinpath(test_repo, "CCC"))
-            cp(joinpath(test_repo, test_file), joinpath(test_repo, "AAA"))
-            LibGit2.add!(repo, "AAA")
-            @test_throws ErrorException LibGit2.transact(repo) do trepo
-                mv(joinpath(test_repo, test_file), joinpath(test_repo, "BBB"))
-                LibGit2.add!(trepo, "BBB")
-                oid = LibGit2.commit(trepo, "test commit"; author=test_sig, committer=test_sig)
-                error("Force recovery")
-            end
-            @test isfile(joinpath(test_repo, "AAA"))
-            @test isfile(joinpath(test_repo, "CCC"))
-            @test !isfile(joinpath(test_repo, "BBB"))
-            @test isfile(joinpath(test_repo, test_file))
-        end
-    end
-
-    @testset "checkout_head" begin
-        LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
-            # modify file
-            repo_file = open(joinpath(cache_repo,test_file), "a")
-            println(repo_file, commit_msg1 * randstring(10))
-            close(repo_file)
-            # and checkout HEAD once more
-            LibGit2.checkout_head(repo, options=LibGit2.CheckoutOptions(checkout_strategy=LibGit2.Consts.CHECKOUT_FORCE))
-            @test LibGit2.headname(repo) == default_branch
-            @test !LibGit2.isdirty(repo)
-        end
-    end
-
-    @testset "checkout/headname" begin
-        LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
-            LibGit2.checkout!(repo, string(commit_oid1))
-            @test !LibGit2.isattached(repo)
-            @test LibGit2.headname(repo) == "(detached from $(string(commit_oid1)[1:7]))"
-        end
-    end
-
-    if Sys.isunix()
-        @testset "checkout/proptest" begin
-            LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
-                cp(joinpath(test_repo, test_file), joinpath(test_repo, "proptest"))
-                LibGit2.add!(repo, "proptest")
-                id1 = LibGit2.commit(repo, "test property change 1")
-                # change in file permissions (#17610)
-                chmod(joinpath(test_repo, "proptest"),0o744)
-                LibGit2.add!(repo, "proptest")
-                id2 = LibGit2.commit(repo, "test property change 2")
-                LibGit2.checkout!(repo, string(id1))
-                @test !LibGit2.isdirty(repo)
-                # change file to symlink (#18420)
-                mv(joinpath(test_repo, "proptest"), joinpath(test_repo, "proptest2"))
-                symlink(joinpath(test_repo, "proptest2"), joinpath(test_repo, "proptest"))
-                LibGit2.add!(repo, "proptest", "proptest2")
-                id3 = LibGit2.commit(repo, "test symlink change")
-                LibGit2.checkout!(repo, string(id1))
-                @test !LibGit2.isdirty(repo)
-            end
-        end
-    end
-
-
-    @testset "Credentials" begin
-        creds_user = "USER"
-        creds_pass = Base.SecretBuffer("PASS")
-        creds = LibGit2.UserPasswordCredential(creds_user, creds_pass)
-        @test creds.user == creds_user
-        @test creds.pass == creds_pass
-        creds2 = LibGit2.UserPasswordCredential(creds_user, creds_pass)
-        @test creds == creds2
-
-        sshcreds = LibGit2.SSHCredential(creds_user, creds_pass)
-        @test sshcreds.user == creds_user
-        @test sshcreds.pass == creds_pass
-        @test sshcreds.prvkey == ""
-        @test sshcreds.pubkey == ""
-        sshcreds2 = LibGit2.SSHCredential(creds_user, creds_pass)
-        @test sshcreds == sshcreds2
-
-        Base.shred!(creds)
-        Base.shred!(creds2)
-        Base.shred!(sshcreds)
-        Base.shred!(sshcreds2)
-        Base.shred!(creds_pass)
-    end
-
-    @testset "CachedCredentials" begin
-        cache = LibGit2.CachedCredentials()
-
-        url = "https://github.com/JuliaLang/Example.jl"
-        cred_id = LibGit2.credential_identifier(url)
-        cred = LibGit2.UserPasswordCredential("julia", "password")
-
-        @test !haskey(cache, cred_id)
-        password = Base.SecretBuffer("password")
-
-        # Attempt to reject a credential which wasn't stored
-        LibGit2.reject(cache, cred, url)
-        @test !haskey(cache, cred_id)
-        @test cred.user == "julia"
-        @test cred.pass == password
-
-        # Approve a credential which causes it to be stored
-        LibGit2.approve(cache, cred, url)
-        @test haskey(cache, cred_id)
-        @test cache[cred_id] === cred
-
-        # Approve the same credential again which does not overwrite
-        LibGit2.approve(cache, cred, url)
-        @test haskey(cache, cred_id)
-        @test cache[cred_id] === cred
-
-        # Overwrite an already cached credential
-        dup_cred = deepcopy(cred)
-        LibGit2.approve(cache, dup_cred, url)  # Shreds overwritten `cred`
-        @test haskey(cache, cred_id)
-        @test cache[cred_id] === dup_cred
-        @test cred.user != "julia"
-        @test cred.pass != password
-        @test dup_cred.user == "julia"
-        @test dup_cred.pass == password
-
-        cred = dup_cred
-
-        # Reject an approved credential
-        @test cache[cred_id] === cred
-        LibGit2.reject(cache, cred, url)  # Avoids shredding the credential passed in
-        @test !haskey(cache, cred_id)
-        @test cred.user == "julia"
-        @test cred.pass == password
-
-        # Reject and shred an approved credential
-        dup_cred = deepcopy(cred)
-        LibGit2.approve(cache, cred, url)
-
-        LibGit2.reject(cache, dup_cred, url)  # Shred `cred` but not passed in `dup_cred`
-        @test !haskey(cache, cred_id)
-        @test cred.user != "julia"
-        @test cred.pass != password
-        @test dup_cred.user == "julia"
-        @test dup_cred.pass == password
-
-        Base.shred!(dup_cred)
-        Base.shred!(cache)
-        Base.shred!(password)
-    end
-
-    @testset "Git credential username" begin
-        @testset "fill username" begin
-            config_path = joinpath(dir, config_file)
-            isfile(config_path) && rm(config_path)
-
-            LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
-                # No credential settings should be set for these tests
-                @test isempty(collect(LibGit2.GitConfigIter(cfg, r"credential.*")))
-
-                github_cred = LibGit2.GitCredential("https", "github.com")
-                mygit_cred = LibGit2.GitCredential("https", "mygithost")
-
-                # No credential settings in configuration.
-                username = LibGit2.default_username(cfg, github_cred)
-                @test username === nothing
-
-                # Add a credential setting for a specific for a URL
-                LibGit2.set!(cfg, "credential.https://github.com.username", "foo")
-
-                username = LibGit2.default_username(cfg, github_cred)
-                @test username == "foo"
-
-                username = LibGit2.default_username(cfg, mygit_cred)
-                @test username === nothing
-
-                # Add a global credential setting after the URL specific setting. The first
-                # setting to match will be the one that is used.
-                LibGit2.set!(cfg, "credential.username", "bar")
-
-                username = LibGit2.default_username(cfg, github_cred)
-                @test username == "foo"
-
-                username = LibGit2.default_username(cfg, mygit_cred)
-                @test username == "bar"
-
-                Base.shred!(github_cred)
-                Base.shred!(mygit_cred)
-            end
-        end
-
-        @testset "empty username" begin
-            config_path = joinpath(dir, config_file)
-            isfile(config_path) && rm(config_path)
-
-            LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
-                # No credential settings should be set for these tests
-                @test isempty(collect(LibGit2.GitConfigIter(cfg, r"credential.*")))
-
-                # An empty username should count as being set
-                LibGit2.set!(cfg, "credential.https://github.com.username", "")
-                LibGit2.set!(cfg, "credential.username", "name")
-
-                github_cred = LibGit2.GitCredential("https", "github.com")
-                mygit_cred = LibGit2.GitCredential("https", "mygithost", "path")
-
-                username = LibGit2.default_username(cfg, github_cred)
-                @test username == ""
-
-                username = LibGit2.default_username(cfg, mygit_cred)
-                @test username == "name"
-
-                Base.shred!(github_cred)
-                Base.shred!(mygit_cred)
-            end
-        end
-    end
-
-    @testset "Git helpers useHttpPath" begin
-        @testset "use_http_path" begin
-            config_path = joinpath(dir, config_file)
-            isfile(config_path) && rm(config_path)
-
-            LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
-                # No credential settings should be set for these tests
-                @test isempty(collect(LibGit2.GitConfigIter(cfg, r"credential.*")))
-
-                github_cred = LibGit2.GitCredential("https", "github.com")
-                mygit_cred = LibGit2.GitCredential("https", "mygithost")
-
-                # No credential settings in configuration.
-                @test !LibGit2.use_http_path(cfg, github_cred)
-                @test !LibGit2.use_http_path(cfg, mygit_cred)
-
-                # Add a credential setting for a specific for a URL
-                LibGit2.set!(cfg, "credential.https://github.com.useHttpPath", "true")
-
-                @test LibGit2.use_http_path(cfg, github_cred)
-                @test !LibGit2.use_http_path(cfg, mygit_cred)
-
-                # Invert the current settings.
-                LibGit2.set!(cfg, "credential.useHttpPath", "true")
-                LibGit2.set!(cfg, "credential.https://github.com.useHttpPath", "false")
-
-                @test !LibGit2.use_http_path(cfg, github_cred)
-                @test LibGit2.use_http_path(cfg, mygit_cred)
-
-                Base.shred!(github_cred)
-                Base.shred!(mygit_cred)
-            end
-        end
-    end
-
-    @testset "GitCredentialHelper" begin
-        GitCredentialHelper = LibGit2.GitCredentialHelper
-        GitCredential = LibGit2.GitCredential
-
-        @testset "parse" begin
-            @test parse(GitCredentialHelper, "!echo hello") == GitCredentialHelper(`echo hello`)
-            @test parse(GitCredentialHelper, "/bin/bash") == GitCredentialHelper(`/bin/bash`)
-            @test parse(GitCredentialHelper, "store") == GitCredentialHelper(`git credential-store`)
-        end
-
-        @testset "empty helper" begin
-            config_path = joinpath(dir, config_file)
-
-            # Note: LibGit2.set! doesn't allow us to set duplicates or ordering
-            open(config_path, "w+") do fp
-                write(fp, """
-                    [credential]
-                        helper = !echo first
-                    [credential "https://mygithost"]
-                        helper = ""
-                    [credential]
-                        helper = !echo second
-                    """)
-            end
-
-            LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
-                iter = LibGit2.GitConfigIter(cfg, r"credential.*\.helper")
-                @test LibGit2.split_cfg_entry.(iter) == [
-                    ("credential", "", "helper", "!echo first"),
-                    ("credential", "https://mygithost", "helper", ""),
-                    ("credential", "", "helper", "!echo second"),
-                ]
-
-                expected = [
-                    GitCredentialHelper(`echo first`),
-                    GitCredentialHelper(`echo second`),
-                ]
-
-                github_cred = GitCredential("https", "github.com")
-                mygit_cred = GitCredential("https", "mygithost")
-
-                @test LibGit2.credential_helpers(cfg, github_cred) == expected
-                @test LibGit2.credential_helpers(cfg, mygit_cred) == expected[2:2]
-
-                Base.shred!(github_cred)
-                Base.shred!(mygit_cred)
-            end
-        end
-
-        @testset "approve/reject" begin
-            # In order to use the "store" credential helper `git` needs to be installed and
-            # on the path.
-            if GIT_INSTALLED
-                credential_path = joinpath(dir, ".git-credentials")
-                isfile(credential_path) && rm(credential_path)
-
-                # Requires `git` to be installed and available on the path.
-                helper = parse(LibGit2.GitCredentialHelper, "store")
-
-                # Set HOME to control where the .git-credentials file is written.
-                # Note: In Cygwin environments `git` will use HOME instead of USERPROFILE.
-                # Setting both environment variables ensures home was overridden.
-                withenv("HOME" => dir, "USERPROFILE" => dir) do
-                    query = LibGit2.GitCredential("https", "mygithost")
-                    filled = LibGit2.GitCredential("https", "mygithost", nothing, "bob", "s3cre7")
-
-                    @test !isfile(credential_path)
-
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
-                        @test result == query
-                    end
-
-                    LibGit2.approve(helper, filled)
-                    @test isfile(credential_path)
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
-                        @test result == filled
-                    end
-
-                    LibGit2.reject(helper, filled)
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
-                        @test result == query
-                    end
-
-                    Base.shred!(query)
-                    Base.shred!(filled)
-                end
-            end
-        end
-
-        @testset "approve/reject with path" begin
-            # In order to use the "store" credential helper `git` needs to be installed and
-            # on the path.
-            if GIT_INSTALLED
-                credential_path = joinpath(dir, ".git-credentials")
-                isfile(credential_path) && rm(credential_path)
-
-                # Requires `git` to be installed and available on the path.
-                helper = parse(LibGit2.GitCredentialHelper, "store")
-
-                # Set HOME to control where the .git-credentials file is written.
-                # Note: In Cygwin environments `git` will use HOME instead of USERPROFILE.
-                # Setting both environment variables ensures home was overridden.
-                withenv("HOME" => dir, "USERPROFILE" => dir) do
-                    query = LibGit2.GitCredential("https", "mygithost")
-                    query_a = LibGit2.GitCredential("https", "mygithost", "a")
-                    query_b = LibGit2.GitCredential("https", "mygithost", "b")
-
-                    filled_a = LibGit2.GitCredential("https", "mygithost", "a", "alice", "1234")
-                    filled_b = LibGit2.GitCredential("https", "mygithost", "b", "bob", "s3cre7")
-
-                    function without_path(cred)
-                        c = deepcopy(cred)
-                        c.path = nothing
-                        c
-                    end
-
-                    filled_without_path_a = without_path(filled_a)
-                    filled_without_path_b = without_path(filled_b)
-
-                    @test !isfile(credential_path)
-
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
-                        @test result == query
-                    end
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_a))) do result
-                        @test result == query_a
-                    end
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_b))) do result
-                        @test result == query_b
-                    end
-
-                    LibGit2.approve(helper, filled_a)
-                    @test isfile(credential_path)
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
-                        @test result == filled_without_path_a
-                    end
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_a))) do result
-                        @test result == filled_a
-                    end
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_b))) do result
-                        @test result == query_b
-                    end
-
-                    LibGit2.approve(helper, filled_b)
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
-                        @test result == filled_without_path_b
-                    end
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_a))) do result
-                        @test result == filled_a
-                    end
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_b))) do result
-                        @test result == filled_b
-                    end
-
-                    LibGit2.reject(helper, filled_b)
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
-                        @test result == filled_without_path_a
-                    end
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_a))) do result
-                        @test result == filled_a
-                    end
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query_b))) do result
-                        @test result == query_b
-                    end
-
-                    Base.shred!(query)
-                    Base.shred!(query_a)
-                    Base.shred!(query_b)
-                    Base.shred!(filled_a)
-                    Base.shred!(filled_b)
-                    Base.shred!(filled_without_path_a)
-                    Base.shred!(filled_without_path_b)
-                end
-            end
-        end
-
-        @testset "approve/reject with UserPasswordCredential" begin
-            # In order to use the "store" credential helper `git` needs to be installed and
-            # on the path.
-            if GIT_INSTALLED
-                config_path = joinpath(dir, config_file)
-                isfile(config_path) && rm(config_path)
-
-                credential_path = joinpath(dir, ".git-credentials")
-                isfile(credential_path) && rm(credential_path)
-
-                LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
-                    query = LibGit2.GitCredential("https", "mygithost")
-                    filled = LibGit2.GitCredential("https", "mygithost", nothing, "alice", "1234")
-                    user_pass_cred = LibGit2.UserPasswordCredential("alice", "1234")
-                    url = "https://mygithost"
-
-                    # Requires `git` to be installed and available on the path.
-                    LibGit2.set!(cfg, "credential.helper", "store --file \"$credential_path\"")
-                    helper = only(LibGit2.credential_helpers(cfg, query))
-
-                    @test !isfile(credential_path)
-
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
-                        @test result == query
-                    end
-
-                    LibGit2.approve(cfg, user_pass_cred, url)
-                    @test isfile(credential_path)
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
-                        @test result == filled
-                    end
-
-                    LibGit2.reject(cfg, user_pass_cred, url)
-                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
-                        @test result == query
-                    end
-
-                    Base.shred!(query)
-                    Base.shred!(filled)
-                    Base.shred!(user_pass_cred)
-                end
-            end
-        end
-    end
-
-    # The following tests require that we can fake a TTY so that we can provide passwords
-    # which use the `getpass` function. At the moment we can only fake this on UNIX based
-    # systems.
-    if Sys.isunix()
-        git_ok = LibGit2.GitError(
-            LibGit2.Error.None, LibGit2.Error.GIT_OK,
-            "No errors")
-
-        abort_prompt = LibGit2.GitError(
-            LibGit2.Error.Callback, LibGit2.Error.EUSER,
-            "Aborting, user cancelled credential request.")
-
-        prompt_limit = LibGit2.GitError(
-            LibGit2.Error.Callback, LibGit2.Error.EAUTH,
-            "Aborting, maximum number of prompts reached.")
-
-        incompatible_error = LibGit2.GitError(
-            LibGit2.Error.Callback, LibGit2.Error.EAUTH,
-            "The explicitly provided credential is incompatible with the requested " *
-            "authentication methods.")
-
-        exhausted_error = LibGit2.GitError(
-            LibGit2.Error.Callback, LibGit2.Error.EAUTH,
-            "All authentication methods have failed.")
-
-        @testset "SSH credential prompt" begin
-            url = "git@github.com:test/package.jl"
-            username = "git"
-
-            valid_key = joinpath(KEY_DIR, "valid")
-            valid_cred = LibGit2.SSHCredential(username, "", valid_key, valid_key * ".pub")
-
-            valid_p_key = joinpath(KEY_DIR, "valid-passphrase")
-            passphrase = "secret"
-            valid_p_cred = LibGit2.SSHCredential(username, passphrase, valid_p_key, valid_p_key * ".pub")
-
-            invalid_key = joinpath(KEY_DIR, "invalid")
-
-            function gen_ex(cred; username="git")
-                url = username !== nothing && !isempty(username) ? "$username@" : ""
-                url *= "github.com:test/package.jl"
-                quote
-                    include($LIBGIT2_HELPER_PATH)
-                    credential_loop($cred, $url, $username)
-                end
-            end
-
-            ssh_ex = gen_ex(valid_cred)
-            ssh_p_ex = gen_ex(valid_p_cred)
-            ssh_u_ex = gen_ex(valid_cred, username=nothing)
-
-            # Note: We cannot use the default ~/.ssh/id_rsa for tests since we cannot be
-            # sure a users will actually have these files. Instead we will use the ENV
-            # variables to set the default values.
-
-            # ENV credentials are valid
-            withenv("SSH_KEY_PATH" => valid_key) do
-                err, auth_attempts, p = challenge_prompt(ssh_ex, [])
-                @test err == git_ok
-                @test auth_attempts == 1
-            end
-
-            # ENV credentials are valid but requires a passphrase
-            withenv("SSH_KEY_PATH" => valid_p_key) do
-                challenges = [
-                    "Passphrase for $valid_p_key: " => "$passphrase\n",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_p_ex, challenges)
-                @test err == git_ok
-                @test auth_attempts == 1
-
-                # User mistypes passphrase.
-                # Note: In reality LibGit2 will raise an error upon using the invalid SSH
-                # credentials. Since we don't control the internals of LibGit2 though they
-                # could also just re-call the credential callback like they do for HTTP.
-                challenges = [
-                    "Passphrase for $valid_p_key: " => "foo\n",
-                    "Private key location for 'git@github.com' [$valid_p_key]: " => "\n",
-                    "Passphrase for $valid_p_key: " => "$passphrase\n",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_p_ex, challenges)
-                @test err == git_ok
-                @test auth_attempts == 2
-
-                # User sends EOF in passphrase prompt which aborts the credential request
-                challenges = [
-                    "Passphrase for $valid_p_key: " => "\x04",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_p_ex, challenges)
-                @test err == abort_prompt
-                @test auth_attempts == 1
-
-                # User provides an empty passphrase
-                challenges = [
-                    "Passphrase for $valid_p_key: " => "\n",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_p_ex, challenges)
-                @test err == abort_prompt
-                @test auth_attempts == 1
-            end
-
-            # ENV credential requiring passphrase
-            withenv("SSH_KEY_PATH" => valid_p_key, "SSH_KEY_PASS" => passphrase) do
-                err, auth_attempts, p = challenge_prompt(ssh_p_ex, [])
-                @test err == git_ok
-                @test auth_attempts == 1
-            end
-
-            # Missing username
-            withenv("SSH_KEY_PATH" => valid_key) do
-                # User provides a valid username
-                challenges = [
-                    "Username for 'github.com': " => "$username\n",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_u_ex, challenges)
-                @test err == git_ok
-                @test auth_attempts == 1
-
-                # User sends EOF in username prompt which aborts the credential request
-                challenges = [
-                    "Username for 'github.com': " => "\x04",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_u_ex, challenges)
-                @test err == abort_prompt
-                @test auth_attempts == 1
-
-                # User provides an empty username
-                challenges = [
-                    "Username for 'github.com': " => "\n",
-                    "Username for 'github.com': " => "\x04",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_u_ex, challenges)
-                @test err == abort_prompt
-                @test auth_attempts == 2
-
-                # User repeatedly chooses an invalid username
-                challenges = [
-                    "Username for 'github.com': " => "foo\n",
-                    "Username for 'github.com' [foo]: " => "\n",
-                    "Private key location for 'foo@github.com' [$valid_key]: " => "\n",
-                    "Username for 'github.com' [foo]: " => "\x04",  # Need to manually abort
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_u_ex, challenges)
-                @test err == abort_prompt
-                @test auth_attempts == 3
-
-                # Credential callback is given an empty string in the `username_ptr`
-                # instead of the C_NULL in the other missing username tests.
-                ssh_user_empty_ex = gen_ex(valid_cred, username="")
-                challenges = [
-                    "Username for 'github.com': " => "$username\n",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_user_empty_ex, challenges)
-                @test err == git_ok
-                @test auth_attempts == 1
-            end
-
-            # Explicitly setting these env variables to be empty means the user will be
-            # given a prompt with no defaults set.
-            withenv("SSH_KEY_PATH" => nothing,
-                    "SSH_PUB_KEY_PATH" => nothing,
-                    "SSH_KEY_PASS" => nothing,
-                    HOME => dir) do
-
-                # Set the USERPROFILE / HOME above to be a directory that does not contain
-                # the "~/.ssh/id_rsa" file. If this file exists the credential callback
-                # will default to use this private key instead of triggering a prompt.
-                @test !isfile(joinpath(homedir(), ".ssh", "id_rsa"))
-
-                # User provides valid credentials
-                challenges = [
-                    "Private key location for 'git@github.com': " => "$valid_key\n",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
-                @test err == git_ok
-                @test auth_attempts == 1
-
-                # User provides valid credentials that requires a passphrase
-                challenges = [
-                    "Private key location for 'git@github.com': " => "$valid_p_key\n",
-                    "Passphrase for $valid_p_key: " => "$passphrase\n",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_p_ex, challenges)
-                @test err == git_ok
-                @test auth_attempts == 1
-
-                # User sends EOF in private key prompt which aborts the credential request
-                challenges = [
-                    "Private key location for 'git@github.com': " => "\x04",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
-                @test err == abort_prompt
-                @test auth_attempts == 1
-
-                # User provides an empty private key which triggers a re-prompt
-                challenges = [
-                    "Private key location for 'git@github.com': " => "\n",
-                    "Private key location for 'git@github.com': " => "\x04",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
-                @test err == abort_prompt
-                @test auth_attempts == 2
-
-                # User provides an invalid private key until prompt limit reached.
-                # Note: the prompt should not supply an invalid default.
-                challenges = [
-                    "Private key location for 'git@github.com': " => "foo\n",
-                    "Private key location for 'git@github.com' [foo]: " => "foo\n",
-                    "Private key location for 'git@github.com' [foo]: " => "foo\n",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
-                @test err == prompt_limit
-                @test auth_attempts == 3
-            end
-
-            # Explicitly setting these env variables to an existing but invalid key pair
-            # means the user will be given a prompt with that defaults to the given values.
-            withenv("SSH_KEY_PATH" => invalid_key,
-                    "SSH_PUB_KEY_PATH" => invalid_key * ".pub") do
-                challenges = [
-                    "Private key location for 'git@github.com' [$invalid_key]: " => "$valid_key\n",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
-                @test err == git_ok
-                @test auth_attempts == 2
-
-                # User repeatedly chooses the default invalid private key until prompt limit reached
-                challenges = [
-                    "Private key location for 'git@github.com' [$invalid_key]: " => "\n",
-                    "Private key location for 'git@github.com' [$invalid_key]: " => "\n",
-                    "Private key location for 'git@github.com' [$invalid_key]: " => "\n",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
-                @test err == prompt_limit
-                @test auth_attempts == 4
-            end
-
-            # Explicitly set the public key ENV variable to a non-existent file.
-            withenv("SSH_KEY_PATH" => valid_key,
-                    "SSH_PUB_KEY_PATH" => valid_key * ".public") do
-                @test !isfile(ENV["SSH_PUB_KEY_PATH"])
-
-                challenges = [
-                    # "Private key location for 'git@github.com' [$valid_key]: " => "\n"
-                    "Public key location for 'git@github.com' [$valid_key.public]: " => "$valid_key.pub\n"
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
-                @test err == git_ok
-                @test auth_attempts == 1
-            end
-
-            # Explicitly set the public key ENV variable to a public key that doesn't match
-            # the private key.
-            withenv("SSH_KEY_PATH" => valid_key,
-                    "SSH_PUB_KEY_PATH" => invalid_key * ".pub") do
-                @test isfile(ENV["SSH_PUB_KEY_PATH"])
-
-                challenges = [
-                    "Private key location for 'git@github.com' [$valid_key]: " => "\n"
-                    "Public key location for 'git@github.com' [$invalid_key.pub]: " => "$valid_key.pub\n"
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
-                @test err == git_ok
-                @test auth_attempts == 2
-            end
-
-            Base.shred!(valid_cred)
-            Base.shred!(valid_p_cred)
-        end
-
-        @testset "SSH known host checking" begin
-            CHECK_MATCH    = LibGit2.Consts.LIBSSH2_KNOWNHOST_CHECK_MATCH
-            CHECK_MISMATCH = LibGit2.Consts.LIBSSH2_KNOWNHOST_CHECK_MISMATCH
-            CHECK_NOTFOUND = LibGit2.Consts.LIBSSH2_KNOWNHOST_CHECK_NOTFOUND
-            CHECK_FAILURE  = LibGit2.Consts.LIBSSH2_KNOWNHOST_CHECK_FAILURE
-
-            # randomly generated hashes matching no hosts
-            random_key = collect(reinterpret(Cchar, codeunits("\0\0\0\assh-rsa\0\0\0\x01#\0\0\0\x81\0¿\x95\xbe9\xfc9g\n:\xcf&\x06YA\xb5`\x97\xc13A\xbf;T+C\xc9Ut J>\xc5ҍ\xc4_S\x8a \xc1S\xeb\x15FH\xd2a\x04.D\xeeb\xac\x8f\xdb\xcc\xef\xc4l G\x9bR\xafp\x17s<=\x12\xab\x04ڳif\\A\x9ba0\xde%\xdei\x04\xc3\r\xb3\x81w\x88\xec\xc0f\x15A;AÝ\xc0r\xa1\u5fe\xd3\xf6)8\x8e\xa3\xcbc\xee\xdd\$\x04\x0f\xc1\xb4\x1f\xcc\xecK\xe0\x99")))
-            # hashes of the unique github.com fingerprint
-            github_key = collect(reinterpret(Cchar, codeunits("\0\0\0\assh-rsa\0\0\0\x01#\0\0\x01\x01\0\xab`;\x85\x11\xa6vy\xbd\xb5@\xdb;\xd2\x03K\0J\xe96\xd0k\xe3\xd7`\xf0\x8f˪\xdbN\xb4\xedóǑ\xc7\n\xae\x9at\xc9Xi\xe4wD!«\xea\x92\xe5T0_8\xb5\xfdAK2\b\xe5t\xc37\xe3 \x93e\x18F,vRɋ1\xe1n}\xa6R;\xd2\0t*dD\xd8?\xcd^\x172\xd06sǷ\x81\x15UH{U\xf0\xc4IO8)\xec\xe6\x0f\x94%Z\x95˚\xf57\xd7\xfc\x8c\x7f\xe4\x9e\xf3\x18GN\xf2\x92\t\x92\x05\"e\xb0\xa0n\xa6mJ\x16\x7f\xd9\xf3\xa4\x8a\x1aJ0~\xc1\xea\xaaQI\xa9i\xa6\xac]V\xa5\xefb~Q}\x81\xfbdO[t\\OG\x8e\xcd\b*\x94\x92\xf7D\xaa\xd3&\xf7l\x8cM\xc9\x10\vƫyF\x1d&W\xcbo\x06\xde\xc9.kd\xa6V/\xf0\xe3 \x84\xea\x06\xce\x0e\xa9\xd3ZX;\xfb\0\xbaӌ\x9d\x19p<T\x98\x92\xe5\xaaxܕ\xe2PQ@i")))
-            # hashes of the middle github.com fingerprint
-            gitlab_key = collect(reinterpret(Cchar, codeunits("\0\0\0\vssh-ed25519\0\0\0 \a\xee\br\x95N:\xae\xc6\xfbz\bέtn\x12.\x9dA\xb6\x7f\xe79\xe1\xc7\x13\x95\x0e\xcd\x17_")))
-
-            # various known hosts files
-            no_file = tempname()
-            empty_file = tempname(); touch(empty_file)
-            known_hosts = joinpath(@__DIR__, "known_hosts")
-            wrong_hosts = tempname()
-            open(wrong_hosts, write=true) do io
-                for line in eachline(known_hosts)
-                    words = split(line)
-                    words[1] = words[1] == "github.com" ? "gitlab.com" :
-                               words[1] == "gitlab.com" ? "github.com" :
-                               words[1]
-                    println(io, join(words, " "))
-                end
-            end
-
-            @testset "unknown host" begin
-                host = "unknown.host"
-                for key in [github_key, gitlab_key, random_key],
-                    files in [[no_file], [empty_file], [known_hosts]]
-                    check = LibGit2.ssh_knownhost_check(files, host, key)
-                    @test check == CHECK_NOTFOUND
-                end
-            end
-
-            @testset "known hosts" begin
-                for (host, key) in [
-                        "github.com" => github_key,
-                        "gitlab.com" => gitlab_key,
-                    ]
-                    for files in [[no_file], [empty_file]]
-                        check = LibGit2.ssh_knownhost_check(files, host, key)
-                        @test check == CHECK_NOTFOUND
-                    end
-                    for files in [
-                            [known_hosts],
-                            [empty_file, known_hosts],
-                            [known_hosts, empty_file],
-                            [known_hosts, wrong_hosts],
-                        ]
-                        check = LibGit2.ssh_knownhost_check(files, host, key)
-                        @test check == CHECK_MATCH
-                    end
-                    for files in [
-                            [wrong_hosts],
-                            [empty_file, wrong_hosts],
-                            [wrong_hosts, empty_file],
-                            [wrong_hosts, known_hosts],
-                        ]
-                        check = LibGit2.ssh_knownhost_check(files, host, key)
-                        @test check == CHECK_MISMATCH
-                    end
-                end
-            end
-
-            rm(empty_file)
-        end
-
-        @testset "HTTPS credential prompt" begin
-            url = "https://github.com/test/package.jl"
-
-            valid_username = "julia"
-            valid_password = randstring(16)
-            valid_cred = LibGit2.UserPasswordCredential(valid_username, valid_password)
-
-            https_ex = quote
-                include($LIBGIT2_HELPER_PATH)
-                credential_loop($valid_cred, $url)
-            end
-
-            # User provides a valid username and password
-            challenges = [
-                "Username for 'https://github.com': " => "$valid_username\n",
-                "Password for 'https://$valid_username@github.com': " => "$valid_password\n",
-            ]
-            err, auth_attempts, p = challenge_prompt(https_ex, challenges)
-            @test err == git_ok
-            @test auth_attempts == 1
-
-            # User sends EOF in username prompt which aborts the credential request
-            challenges = [
-                "Username for 'https://github.com': " => "\x04",
-            ]
-            err, auth_attempts, p = challenge_prompt(https_ex, challenges)
-            @test err == abort_prompt
-            @test auth_attempts == 1
-
-            # User sends EOF in password prompt which aborts the credential request
-            challenges = [
-                "Username for 'https://github.com': " => "foo\n",
-                "Password for 'https://foo@github.com': " => "\x04",
-            ]
-            err, auth_attempts, p = challenge_prompt(https_ex, challenges)
-            @test err == abort_prompt
-            @test auth_attempts == 1
-
-            # User provides an empty password which aborts the credential request since we
-            # cannot tell it apart from an EOF.
-            challenges = [
-                "Username for 'https://github.com': " => "foo\n",
-                "Password for 'https://foo@github.com': " => "\n",
-            ]
-            err, auth_attempts, p = challenge_prompt(https_ex, challenges)
-            @test err == abort_prompt
-            @test auth_attempts == 1
-
-            # User repeatedly chooses invalid username/password until the prompt limit is
-            # reached
-            challenges = [
-                "Username for 'https://github.com': " => "foo\n",
-                "Password for 'https://foo@github.com': " => "bar\n",
-                "Username for 'https://github.com' [foo]: " => "foo\n",
-                "Password for 'https://foo@github.com': " => "bar\n",
-                "Username for 'https://github.com' [foo]: " => "foo\n",
-                "Password for 'https://foo@github.com': " => "bar\n",
-            ]
-            err, auth_attempts, p = challenge_prompt(https_ex, challenges)
-            @test err == prompt_limit
-            @test auth_attempts == 3
-
-            Base.shred!(valid_cred)
-        end
-
-        @testset "SSH agent username" begin
-            url = "github.com:test/package.jl"
-
-            valid_key = joinpath(KEY_DIR, "valid")
-            valid_cred = LibGit2.SSHCredential("git", "", valid_key, valid_key * ".pub")
-
-            function gen_ex(; username="git")
-                quote
-                    include($LIBGIT2_HELPER_PATH)
-                    payload = CredentialPayload(allow_prompt=false, allow_ssh_agent=true,
-                                                allow_git_helpers=false)
-                    credential_loop($valid_cred, $url, $username, payload)
-                end
-            end
-
-            # An empty string username_ptr
-            ex = gen_ex(username="")
-            err, auth_attempts, p = challenge_prompt(ex, [])
-            @test err == exhausted_error
-            @test auth_attempts == 3
-
-            # A null username_ptr passed into `git_cred_ssh_key_from_agent` can cause a
-            # segfault.
-            ex = gen_ex(username=nothing)
-            err, auth_attempts, p = challenge_prompt(ex, [])
-            @test err == exhausted_error
-            @test auth_attempts == 2
-
-            Base.shred!(valid_cred)
-        end
-
-        @testset "SSH default" begin
-            mktempdir() do home_dir
-                url = "github.com:test/package.jl"
-
-                default_key = joinpath(home_dir, ".ssh", "id_rsa")
-                mkdir(dirname(default_key))
-
-                valid_key = joinpath(KEY_DIR, "valid")
-                valid_cred = LibGit2.SSHCredential("git", "", valid_key, valid_key * ".pub")
-
-                valid_p_key = joinpath(KEY_DIR, "valid-passphrase")
-                passphrase = "secret"
-                valid_p_cred = LibGit2.SSHCredential("git", passphrase, valid_p_key, valid_p_key * ".pub")
-
-                function gen_ex(cred)
-                    quote
-                        valid_cred = $cred
-
-                        default_cred = deepcopy(valid_cred)
-                        default_cred.prvkey = $default_key
-                        default_cred.pubkey = $default_key * ".pub"
-
-                        cp(valid_cred.prvkey, default_cred.prvkey)
-                        cp(valid_cred.pubkey, default_cred.pubkey)
-
-                        try
-                            include($LIBGIT2_HELPER_PATH)
-                            credential_loop(default_cred, $url, "git", shred=false)
-                        finally
-                            rm(default_cred.prvkey)
-                            rm(default_cred.pubkey)
-                        end
-                    end
-                end
-
-                withenv("SSH_KEY_PATH" => nothing,
-                        "SSH_PUB_KEY_PATH" => nothing,
-                        "SSH_KEY_PASS" => nothing,
-                        HOME => home_dir) do
-
-                    # Automatically use the default key
-                    ex = gen_ex(valid_cred)
-                    err, auth_attempts, p = challenge_prompt(ex, [])
-                    @test err == git_ok
-                    @test auth_attempts == 1
-                    @test p.credential.prvkey == default_key
-                    @test p.credential.pubkey == default_key * ".pub"
-
-                    # Confirm the private key if any other prompting is required
-                    ex = gen_ex(valid_p_cred)
-                    challenges = [
-                        "Private key location for 'git@github.com' [$default_key]: " => "\n",
-                        "Passphrase for $default_key: " => "$passphrase\n",
-                    ]
-                    err, auth_attempts, p = challenge_prompt(ex, challenges)
-                    @test err == git_ok
-                    @test auth_attempts == 1
-                end
-
-                Base.shred!(valid_cred)
-                Base.shred!(valid_p_cred)
-            end
-        end
-
-        @testset "SSH expand tilde" begin
-            url = "git@github.com:test/package.jl"
-
-            valid_key = joinpath(KEY_DIR, "valid")
-            valid_cred = LibGit2.SSHCredential("git", "", valid_key, valid_key * ".pub")
-
-            invalid_key = joinpath(KEY_DIR, "invalid")
-
-            ssh_ex = quote
-                include($LIBGIT2_HELPER_PATH)
-                payload = CredentialPayload(allow_prompt=true, allow_ssh_agent=false,
-                                            allow_git_helpers=false)
-                credential_loop($valid_cred, $url, "git", payload, shred=false)
-            end
-
-            withenv("SSH_KEY_PATH" => nothing,
-                    "SSH_PUB_KEY_PATH" => nothing,
-                    "SSH_KEY_PASS" => nothing,
-                    HOME => KEY_DIR) do
-
-                # Expand tilde during the private key prompt
-                challenges = [
-                    "Private key location for 'git@github.com': " => "~/valid\n",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
-                @test err == git_ok
-                @test auth_attempts == 1
-                @test p.credential.prvkey == abspath(valid_key)
-            end
-
-            withenv("SSH_KEY_PATH" => valid_key,
-                    "SSH_PUB_KEY_PATH" => invalid_key * ".pub",
-                    "SSH_KEY_PASS" => nothing,
-                    HOME => KEY_DIR) do
-
-                # Expand tilde during the public key prompt
-                challenges = [
-                    "Private key location for 'git@github.com' [$valid_key]: " => "\n",
-                    "Public key location for 'git@github.com' [$invalid_key.pub]: " => "~/valid.pub\n",
-                ]
-                err, auth_attempts, p = challenge_prompt(ssh_ex, challenges)
-                @test err == git_ok
-                @test auth_attempts == 2
-                @test p.credential.pubkey == abspath(valid_key * ".pub")
-            end
-
-            Base.shred!(valid_cred)
-        end
-
-        @testset "SSH explicit credentials" begin
-            url = "git@github.com:test/package.jl"
-            username = "git"
-
-            valid_p_key = joinpath(KEY_DIR, "valid-passphrase")
-            passphrase = "secret"
-            valid_cred = LibGit2.SSHCredential(username, passphrase, valid_p_key, valid_p_key * ".pub")
-
-            invalid_key = joinpath(KEY_DIR, "invalid")
-            invalid_cred = LibGit2.SSHCredential(username, "", invalid_key, invalid_key * ".pub")
-
-            function gen_ex(cred; allow_prompt=true, allow_ssh_agent=false)
-                quote
-                    include($LIBGIT2_HELPER_PATH)
-                    payload = CredentialPayload($cred, allow_prompt=$allow_prompt,
-                                                allow_ssh_agent=$allow_ssh_agent,
-                                                allow_git_helpers=false)
-                    credential_loop($valid_cred, $url, $username, payload)
-                end
-            end
-
-            # Explicitly provided credential is correct. Note: allowing prompting and
-            # SSH agent to ensure they are skipped.
-            ex = gen_ex(valid_cred, allow_prompt=true, allow_ssh_agent=true)
-            err, auth_attempts, p = challenge_prompt(ex, [])
-            @test err == git_ok
-            @test auth_attempts == 1
-            @test p.explicit == valid_cred
-            @test p.credential != valid_cred
-
-            # Explicitly provided credential is incorrect
-            ex = gen_ex(invalid_cred, allow_prompt=false, allow_ssh_agent=false)
-            err, auth_attempts, p = challenge_prompt(ex, [])
-            @test err == exhausted_error
-            @test auth_attempts == 3
-            @test p.explicit == invalid_cred
-            @test p.credential != invalid_cred
-
-            Base.shred!(valid_cred)
-            Base.shred!(invalid_cred)
-        end
-
-        @testset "HTTPS explicit credentials" begin
-            url = "https://github.com/test/package.jl"
-
-            valid_cred = LibGit2.UserPasswordCredential("julia", randstring(16))
-            invalid_cred = LibGit2.UserPasswordCredential("alice", randstring(15))
-
-            function gen_ex(cred; allow_prompt=true)
-                quote
-                    include($LIBGIT2_HELPER_PATH)
-                    payload = CredentialPayload($cred, allow_prompt=$allow_prompt,
-                                                allow_git_helpers=false)
-                    credential_loop($valid_cred, $url, "", payload)
-                end
-            end
-
-            # Explicitly provided credential is correct
-            ex = gen_ex(valid_cred, allow_prompt=true)
-            err, auth_attempts, p = challenge_prompt(ex, [])
-            @test err == git_ok
-            @test auth_attempts == 1
-            @test p.explicit == valid_cred
-            @test p.credential != valid_cred
-
-            # Explicitly provided credential is incorrect
-            ex = gen_ex(invalid_cred, allow_prompt=false)
-            err, auth_attempts, p = challenge_prompt(ex, [])
-            @test err == exhausted_error
-            @test auth_attempts == 2
-            @test p.explicit == invalid_cred
-            @test p.credential != invalid_cred
-
-            Base.shred!(valid_cred)
-            Base.shred!(invalid_cred)
-        end
-
-        @testset "Cached credentials" begin
-            url = "https://github.com/test/package.jl"
-            cred_id = "https://github.com"
-
-            valid_username = "julia"
-            valid_password = randstring(16)
-            valid_cred = LibGit2.UserPasswordCredential(valid_username, valid_password)
-
-            invalid_username = "alice"
-            invalid_password = randstring(15)
-            invalid_cred = LibGit2.UserPasswordCredential(invalid_username, invalid_password)
-
-            function gen_ex(; cached_cred=nothing, allow_prompt=true)
-                quote
-                    include($LIBGIT2_HELPER_PATH)
-                    cache = CachedCredentials()
-                    $(cached_cred !== nothing && :(LibGit2.approve(cache, $cached_cred, $url)))
-                    payload = CredentialPayload(cache, allow_prompt=$allow_prompt,
-                                                allow_git_helpers=false)
-                    credential_loop($valid_cred, $url, "", payload)
-                end
-            end
-
-            # Cache contains a correct credential
-            err, auth_attempts, p = challenge_prompt(gen_ex(cached_cred=valid_cred), [])
-            @test err == git_ok
-            @test auth_attempts == 1
-
-            # Note: Approved cached credentials are not shredded
-
-            # Add a credential into the cache
-            ex = gen_ex()
-            challenges = [
-                "Username for 'https://github.com': " => "$valid_username\n",
-                "Password for 'https://$valid_username@github.com': " => "$valid_password\n",
-            ]
-            err, auth_attempts, p = challenge_prompt(ex, challenges)
-            cache = p.cache
-            @test err == git_ok
-            @test auth_attempts == 1
-            @test typeof(cache) == LibGit2.CachedCredentials
-            @test cache.cred == Dict(cred_id => valid_cred)
-            @test p.credential == valid_cred
-
-            # Replace a credential in the cache
-            ex = gen_ex(cached_cred=invalid_cred)
-            challenges = [
-                "Username for 'https://github.com' [alice]: " => "$valid_username\n",
-                "Password for 'https://$valid_username@github.com': " => "$valid_password\n",
-            ]
-            err, auth_attempts, p = challenge_prompt(ex, challenges)
-            cache = p.cache
-            @test err == git_ok
-            @test auth_attempts == 2
-            @test typeof(cache) == LibGit2.CachedCredentials
-            @test cache.cred == Dict(cred_id => valid_cred)
-            @test p.credential == valid_cred
-
-            # Canceling a credential request should leave the cache unmodified
-            ex = gen_ex(cached_cred=invalid_cred)
-            challenges = [
-                "Username for 'https://github.com' [alice]: " => "foo\n",
-                "Password for 'https://foo@github.com': " => "bar\n",
-                "Username for 'https://github.com' [foo]: " => "\x04",
-            ]
-            err, auth_attempts, p = challenge_prompt(ex, challenges)
-            cache = p.cache
-            @test err == abort_prompt
-            @test auth_attempts == 3
-            @test typeof(cache) == LibGit2.CachedCredentials
-            @test cache.cred == Dict(cred_id => invalid_cred)
-            @test p.credential != invalid_cred
-
-            # An EAUTH error should remove credentials from the cache
-            ex = gen_ex(cached_cred=invalid_cred, allow_prompt=false)
-            err, auth_attempts, p = challenge_prompt(ex, [])
-            cache = p.cache
-            @test err == exhausted_error
-            @test auth_attempts == 2
-            @test typeof(cache) == LibGit2.CachedCredentials
-            @test cache.cred == Dict()
-            @test p.credential != invalid_cred
-
-            Base.shred!(valid_cred)
-            Base.shred!(invalid_cred)
-        end
-
-        @testset "HTTPS git helper username" begin
-            url = "https://github.com/test/package.jl"
-
-            valid_username = "julia"
-            valid_password = randstring(16)
-            valid_cred = LibGit2.UserPasswordCredential(valid_username, valid_password)
-
-            config_path = joinpath(dir, config_file)
-            write(config_path, """
-                [credential]
-                    username = $valid_username
-                """)
-
-            https_ex = quote
-                include($LIBGIT2_HELPER_PATH)
-                LibGit2.with(LibGit2.GitConfig($config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
-                    payload = CredentialPayload(nothing,
-                                                nothing, cfg,
-                                                allow_git_helpers=true)
-                    credential_loop($valid_cred, $url, nothing, payload, shred=false)
-                end
-            end
-
-            # Username is supplied from the git configuration file
-            challenges = [
-                "Username for 'https://github.com' [$valid_username]: " => "\n",
-                "Password for 'https://$valid_username@github.com': " => "$valid_password\n",
-            ]
-            err, auth_attempts, p = challenge_prompt(https_ex, challenges)
-            @test err == git_ok
-            @test auth_attempts == 1
-
-            # Verify credential wasn't accidentally zeroed (#24731)
-            @test p.credential == valid_cred
-
-            Base.shred!(valid_cred)
-        end
-
-        @testset "HTTPS git helper password" begin
-            if GIT_INSTALLED
-                url = "https://github.com/test/package.jl"
-
-                valid_username = "julia"
-                valid_password = randstring(16)
-                valid_cred = LibGit2.UserPasswordCredential(valid_username, valid_password)
-
-                cred_file = joinpath(dir, "test-credentials")
-                config_path = joinpath(dir, config_file)
-                write(config_path, """
-                    [credential]
-                        helper = store --file $cred_file
-                    """)
-
-                # Directly write to the cleartext credential store. Note: we are not using
-                # the LibGit2.approve message to avoid any possibility of the tests
-                # accidentally writing to a user's global store.
-                write(cred_file, "https://$valid_username:$valid_password@github.com")
-
-                https_ex = quote
-                    include($LIBGIT2_HELPER_PATH)
-                    LibGit2.with(LibGit2.GitConfig($config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
-                        payload = CredentialPayload(nothing,
-                                                    nothing, cfg,
-                                                    allow_git_helpers=true)
-                        credential_loop($valid_cred, $url, nothing, payload, shred=false)
-                    end
-                end
-
-                # Username will be provided by the credential helper
-                challenges = []
-                err, auth_attempts, p = challenge_prompt(https_ex, challenges)
-                @test err == git_ok
-                @test auth_attempts == 1
-
-                # Verify credential wasn't accidentally zeroed (#24731)
-                @test p.credential == valid_cred
-
-                Base.shred!(valid_cred)
-            end
-        end
-
-        @testset "Incompatible explicit credentials" begin
-            # User provides a user/password credential where a SSH credential is required.
-            valid_cred = LibGit2.UserPasswordCredential("foo", "bar")
-            expect_ssh_ex = quote
-                include($LIBGIT2_HELPER_PATH)
-                payload = CredentialPayload($valid_cred, allow_ssh_agent=false,
-                                            allow_git_helpers=false)
-                credential_loop($valid_cred, "ssh://github.com/repo", "",
-                                Cuint(LibGit2.Consts.CREDTYPE_SSH_KEY), payload)
-            end
-
-            err, auth_attempts, p = challenge_prompt(expect_ssh_ex, [])
-            @test err == incompatible_error
-            @test auth_attempts == 1
-            @test p.explicit == valid_cred
-            @test p.credential != valid_cred
-
-            Base.shred!(valid_cred)
-
-            # User provides a SSH credential where a user/password credential is required.
-            valid_cred = LibGit2.SSHCredential("foo", "", "", "")
-            expect_https_ex = quote
-                include($LIBGIT2_HELPER_PATH)
-                payload = CredentialPayload($valid_cred, allow_ssh_agent=false,
-                                            allow_git_helpers=false)
-                credential_loop($valid_cred, "https://github.com/repo", "",
-                                Cuint(LibGit2.Consts.CREDTYPE_USERPASS_PLAINTEXT), payload)
-            end
-
-            err, auth_attempts, p = challenge_prompt(expect_https_ex, [])
-            @test err == incompatible_error
-            @test auth_attempts == 1
-            @test p.explicit == valid_cred
-            @test p.credential != valid_cred
-
-            Base.shred!(valid_cred)
-        end
-
-        # A hypothetical scenario where the allowed authentication can either be
-        # SSH or username/password.
-        @testset "SSH & HTTPS authentication" begin
-            allowed_types = Cuint(LibGit2.Consts.CREDTYPE_SSH_KEY) |
-                Cuint(LibGit2.Consts.CREDTYPE_USERPASS_PLAINTEXT)
-
-            # User provides a user/password credential where a SSH credential is required.
-            valid_cred = LibGit2.UserPasswordCredential("foo", "bar")
-            ex = quote
-                include($LIBGIT2_HELPER_PATH)
-                payload = CredentialPayload($valid_cred, allow_ssh_agent=false,
-                                            allow_git_helpers=false)
-                credential_loop($valid_cred, "foo://github.com/repo", "",
-                                $allowed_types, payload)
-            end
-
-            err, auth_attempts, p = challenge_prompt(ex, [])
-            @test err == git_ok
-            @test auth_attempts == 1
-
-            Base.shred!(valid_cred)
-        end
-
-        @testset "CredentialPayload reset" begin
-            urls = [
-                "https://github.com/test/package.jl"
-                "https://myhost.com/demo.jl"
-            ]
-
-            valid_username = "julia"
-            valid_password = randstring(16)
-            valid_cred = LibGit2.UserPasswordCredential(valid_username, valid_password)
-
-            # Users should be able to re-use the same payload if the state is reset
-            ex = quote
-                include($LIBGIT2_HELPER_PATH)
-                user = nothing
-                payload = CredentialPayload(allow_git_helpers=false)
-                first_result = credential_loop($valid_cred, $(urls[1]), user, payload)
-                LibGit2.reset!(payload)
-                second_result = credential_loop($valid_cred, $(urls[2]), user, payload)
-                (first_result, second_result)
-            end
-
-            challenges = [
-                "Username for 'https://github.com': " => "$valid_username\n",
-                "Password for 'https://$valid_username@github.com': " => "$valid_password\n",
-                "Username for 'https://myhost.com': " => "$valid_username\n",
-                "Password for 'https://$valid_username@myhost.com': " => "$valid_password\n",
-            ]
-            first_result, second_result = challenge_prompt(ex, challenges)
-
-            err, auth_attempts, p = first_result
-            @test err == git_ok
-            @test auth_attempts == 1
-
-            err, auth_attempts, p = second_result
-            @test err == git_ok
-            @test auth_attempts == 1
-
-            Base.shred!(valid_cred)
-        end
-    end
-
-    # Note: Tests only work on linux as SSL_CERT_FILE is only respected on linux systems.
-    @testset "Hostname verification" begin
-        openssl_installed = false
-        common_name = ""
-        if Sys.islinux()
-            try
-                # OpenSSL needs to be on the path
-                openssl_installed = !isempty(read(`openssl version`, String))
-            catch ex
-                @warn "Skipping hostname verification tests. Is `openssl` on the path?" exception=ex
-            end
-
-            # Find a hostname that maps to the loopback address
-            hostnames = ["localhost"]
-
-            # In minimal environments a hostname might not be available (issue #20758)
-            try
-                # In some environments, namely Macs, the hostname "macbook.local" is bound
-                # to the external address while "macbook" is bound to the loopback address.
-                pushfirst!(hostnames, replace(gethostname(), r"\..*$" => ""))
-            catch
-            end
-
-            loopback = ip"127.0.0.1"
-            for hostname in hostnames
-                local addr
-                try
-                    addr = getaddrinfo(hostname)
-                catch
-                    continue
-                end
-
-                if addr == loopback
-                    common_name = hostname
-                    break
-                end
-            end
-
-            if isempty(common_name)
-                @warn "Skipping hostname verification tests. Unable to determine a hostname which maps to the loopback address"
-            end
-        end
-        if openssl_installed && !isempty(common_name)
-            mktempdir() do root
-                key = joinpath(root, common_name * ".key")
-                cert = joinpath(root, common_name * ".crt")
-                pem = joinpath(root, common_name * ".pem")
-
-                # Generated a certificate which has the CN set correctly but no subjectAltName
-                run(pipeline(`openssl req -new -x509 -newkey rsa:2048 -sha256 -nodes -keyout $key -out $cert -days 1 -subj "/CN=$common_name"`, stderr=devnull))
-                run(`openssl x509 -in $cert -out $pem -outform PEM`)
-
-                # Find an available port by listening
-                port, server = listenany(49152)
-                close(server)
-
-                # Make a fake Julia package and minimal HTTPS server with our generated
-                # certificate. The minimal server can't actually serve a Git repository.
-                mkdir(joinpath(root, "Example.jl"))
-                pobj = cd(root) do
-                    run(`openssl s_server -key $key -cert $cert -WWW -accept $port`, wait=false)
-                end
-
-                errfile = joinpath(root, "error")
-                repo_url = "https://$common_name:$port/Example.jl"
-                repo_dir = joinpath(root, "dest")
-                code = """
-                    using Serialization
-                    import LibGit2
-                    dest_dir = "$repo_dir"
-                    open("$errfile", "w+") do f
-                        try
-                            repo = LibGit2.clone("$repo_url", dest_dir)
-                        catch err
-                            serialize(f, err)
-                        finally
-                            isdir(dest_dir) && rm(dest_dir, recursive=true)
-                        end
-                    end
-                """
-                cmd = `$(Base.julia_cmd()) --startup-file=no -e $code`
-
-                try
-                    # The generated certificate is normally invalid
-                    run(cmd)
-                    err = open(errfile, "r") do f
-                        deserialize(f)
-                    end
-                    @test err.code == LibGit2.Error.ECERTIFICATE
-                    @test startswith(lowercase(err.msg),
-                                     lowercase("The SSL certificate is invalid"))
-
-                    rm(errfile)
-
-                    # Specify that Julia use only the custom certificate. Note: we need to
-                    # spawn a new Julia process in order for this ENV variable to take effect.
-                    withenv("SSL_CERT_FILE" => pem) do
-                        run(cmd)
-                        err = open(errfile, "r") do f
-                            deserialize(f)
-                        end
-                        @test err.code == LibGit2.Error.ERROR
-                        @test occursin(r"invalid content-type: '?text/plain'?"i, err.msg)
-                    end
-
-                    # OpenSSL s_server should still be running
-                    @test process_running(pobj)
-                finally
-                    kill(pobj)
-                end
-            end
-        end
-    end
-end
-
-let cache = LibGit2.CachedCredentials()
-    get!(cache, "foo", LibGit2.SSHCredential("", "bar"))
-    Base.shred!(cache)
-    @test all(cache["foo"].pass.data .== UInt(0))
 end
-
-end # module
diff --git a/stdlib/LibGit2/test/online-tests.jl b/stdlib/LibGit2/test/online-tests.jl
new file mode 100644
index 0000000000000..4c5f346894b3d
--- /dev/null
+++ b/stdlib/LibGit2/test/online-tests.jl
@@ -0,0 +1,118 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module LibGit2OnlineTests
+
+using Test
+import LibGit2
+using Random
+
+function transfer_progress(progress::Ptr{LibGit2.TransferProgress}, payload::Dict)
+    status = payload[:transfer_progress]
+    progress = unsafe_load(progress)
+
+    status[] = (current=progress.received_objects, total=progress.total_objects)
+
+    return Cint(0)
+end
+
+#########
+# TESTS #
+#########
+# init & clone
+mktempdir() do dir
+    repo_url = "https://github.com/JuliaLang/Example.jl"
+
+    @testset "Cloning repository" begin
+        @testset "HTTPS protocol" begin
+            repo_path = joinpath(dir, "Example.HTTPS")
+            c = LibGit2.CredentialPayload(allow_prompt=false, allow_git_helpers=false)
+            repo = LibGit2.clone(repo_url, repo_path, credentials=c)
+            try
+                @test isdir(repo_path)
+                @test isdir(joinpath(repo_path, ".git"))
+            finally
+                close(repo)
+            end
+        end
+
+        @testset "Transfer progress callbacks" begin
+            status = Ref((current=0, total=-1))
+            callbacks = LibGit2.Callbacks(
+                :transfer_progress => (
+                    @cfunction(transfer_progress, Cint, (Ptr{LibGit2.TransferProgress}, Any)),
+                    status,
+                )
+            )
+
+            repo_path = joinpath(dir, "Example.TransferProgress")
+            c = LibGit2.CredentialPayload(allow_prompt=false, allow_git_helpers=false)
+            repo = LibGit2.clone(repo_url, repo_path, credentials=c, callbacks=callbacks)
+            try
+                @test isdir(repo_path)
+                @test isdir(joinpath(repo_path, ".git"))
+
+                @test status[].total >= 0
+                @test status[].current == status[].total
+            finally
+                close(repo)
+            end
+        end
+
+        @testset "Incorrect URL" begin
+            repo_path = joinpath(dir, "Example.IncorrectURL")
+            # credentials are required because github tries to authenticate on unknown repo
+            cred = LibGit2.UserPasswordCredential("JeffBezanson", "hunter2") # make sure Jeff is using a good password :)
+            c = LibGit2.CredentialPayload(cred, allow_prompt=false, allow_git_helpers=false)
+            try
+                LibGit2.clone(repo_url*randstring(10), repo_path, credentials=c)
+                error("unexpected")
+            catch ex
+                @test isa(ex, LibGit2.Error.GitError)
+                # Return code seems to vary, see #32186, #32219
+                @test ex.code ∈ (LibGit2.Error.EAUTH, LibGit2.Error.ERROR)
+            end
+            Base.shred!(cred)
+        end
+
+        @testset "Empty Credentials" begin
+            repo_path = joinpath(dir, "Example.EmptyCredentials")
+            # credentials are required because github tries to authenticate on unknown repo
+            cred = LibGit2.UserPasswordCredential("","") # empty credentials cause authentication error
+            c = LibGit2.CredentialPayload(cred, allow_prompt=false, allow_git_helpers=false)
+            try
+                LibGit2.clone(repo_url*randstring(10), repo_path, credentials=c)
+                error("unexpected")
+            catch ex
+                @test isa(ex, LibGit2.Error.GitError)
+                @test ex.code == LibGit2.Error.EAUTH
+            end
+        end
+    end
+end
+
+@testset "Remote" begin
+    repo_url = "https://github.com/JuliaLang/Example.jl"
+    LibGit2.with(LibGit2.GitRemoteDetached(repo_url)) do remote
+        @test !LibGit2.connected(remote)
+        c = LibGit2.CredentialPayload(allow_prompt=false, allow_git_helpers=false)
+        LibGit2.connect(remote, LibGit2.Consts.DIRECTION_FETCH, credentials=c)
+        @test LibGit2.connected(remote)
+        remote_heads = LibGit2.ls(remote)
+        default_branch = LibGit2.default_branch(remote)
+        @test !isempty(remote_heads)
+        @test startswith(default_branch, "refs/heads/")
+        @test any(head.name == default_branch for head in remote_heads)
+        LibGit2.disconnect(remote)
+        @test !LibGit2.connected(remote)
+    end
+end
+
+# needs to be run in separate process so it can re-initialize libgit2
+# with a useless self-signed certificate authority root certificate
+file = joinpath(@__DIR__, "bad_ca_roots.jl")
+cmd = `$(Base.julia_cmd()) --depwarn=no --startup-file=no $file`
+if !success(pipeline(cmd; stdout=stdout, stderr=stderr))
+    error("bad CA roots tests failed, cmd : $cmd")
+end
+
+end # module
diff --git a/stdlib/LibGit2/test/online.jl b/stdlib/LibGit2/test/online.jl
index 96b6bf5b22371..b2bcab83d9f4e 100644
--- a/stdlib/LibGit2/test/online.jl
+++ b/stdlib/LibGit2/test/online.jl
@@ -1,101 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-module LibGit2OnlineTests
-
-using Test
-import LibGit2
-using Random
-
-function transfer_progress(progress::Ptr{LibGit2.TransferProgress}, payload::Dict)
-    status = payload[:transfer_progress]
-    progress = unsafe_load(progress)
-
-    status[] = (current=progress.received_objects, total=progress.total_objects)
-
-    return Cint(0)
-end
-
-#########
-# TESTS #
-#########
-# init & clone
+# Set HOME to control where the .gitconfig file may be found.
+# Note: In Cygwin environments `git` will use HOME instead of USERPROFILE.
+# Setting both environment variables ensures home was overridden.
 mktempdir() do dir
-    repo_url = "https://github.com/JuliaLang/Example.jl"
-
-    @testset "Cloning repository" begin
-        @testset "HTTPS protocol" begin
-            repo_path = joinpath(dir, "Example.HTTPS")
-            c = LibGit2.CredentialPayload(allow_prompt=false, allow_git_helpers=false)
-            repo = LibGit2.clone(repo_url, repo_path, credentials=c)
-            try
-                @test isdir(repo_path)
-                @test isdir(joinpath(repo_path, ".git"))
-            finally
-                close(repo)
-            end
-        end
-
-        @testset "Transfer progress callbacks" begin
-            status = Ref((current=0, total=-1))
-            callbacks = LibGit2.Callbacks(
-                :transfer_progress => (
-                    @cfunction(transfer_progress, Cint, (Ptr{LibGit2.TransferProgress}, Any)),
-                    status,
-                )
-            )
-
-            repo_path = joinpath(dir, "Example.TransferProgress")
-            c = LibGit2.CredentialPayload(allow_prompt=false, allow_git_helpers=false)
-            repo = LibGit2.clone(repo_url, repo_path, credentials=c, callbacks=callbacks)
-            try
-                @test isdir(repo_path)
-                @test isdir(joinpath(repo_path, ".git"))
-
-                @test status[].total >= 0
-                @test status[].current == status[].total
-            finally
-                close(repo)
-            end
-        end
-
-        @testset "Incorrect URL" begin
-            repo_path = joinpath(dir, "Example.IncorrectURL")
-            # credentials are required because github tries to authenticate on unknown repo
-            cred = LibGit2.UserPasswordCredential("JeffBezanson", "hunter2") # make sure Jeff is using a good password :)
-            c = LibGit2.CredentialPayload(cred, allow_prompt=false, allow_git_helpers=false)
-            try
-                LibGit2.clone(repo_url*randstring(10), repo_path, credentials=c)
-                error("unexpected")
-            catch ex
-                @test isa(ex, LibGit2.Error.GitError)
-                # Return code seems to vary, see #32186, #32219
-                @test ex.code ∈ (LibGit2.Error.EAUTH, LibGit2.Error.ERROR)
-            end
-            Base.shred!(cred)
-        end
-
-        @testset "Empty Credentials" begin
-            repo_path = joinpath(dir, "Example.EmptyCredentials")
-            # credentials are required because github tries to authenticate on unknown repo
-            cred = LibGit2.UserPasswordCredential("","") # empty credentials cause authentication error
-            c = LibGit2.CredentialPayload(cred, allow_prompt=false, allow_git_helpers=false)
-            try
-                LibGit2.clone(repo_url*randstring(10), repo_path, credentials=c)
-                error("unexpected")
-            catch ex
-                @test isa(ex, LibGit2.Error.GitError)
-                @test ex.code == LibGit2.Error.EAUTH
-            end
-        end
+    dir = realpath(dir)
+    withenv("HOME" => dir, "USERPROFILE" => dir) do
+        include("online-tests.jl")
     end
 end
-
-# needs to be run in separate process so it can re-initialize libgit2
-# with a useless self-signed certificate authority root certificate
-file = joinpath(@__DIR__, "bad_ca_roots.jl")
-cmd = `$(Base.julia_cmd()) --depwarn=no --startup-file=no $file`
-if !success(pipeline(cmd; stdout=stdout, stderr=stderr))
-    error("bad CA roots tests failed, cmd : $cmd")
-end
-
-end # module
diff --git a/stdlib/LibGit2/test/runtests.jl b/stdlib/LibGit2/test/runtests.jl
index 69b20014d11e1..88aea77f25671 100644
--- a/stdlib/LibGit2/test/runtests.jl
+++ b/stdlib/LibGit2/test/runtests.jl
@@ -1,4 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-include("libgit2.jl")
-include("online.jl")
+using Test
+@testset verbose=true "LibGit2 $test" for test in eachline(joinpath(@__DIR__, "testgroups"))
+    include("$test.jl")
+end
diff --git a/stdlib/LibGit2_jll/Project.toml b/stdlib/LibGit2_jll/Project.toml
index c91b2dd5caeff..e64d86dc6c9b7 100644
--- a/stdlib/LibGit2_jll/Project.toml
+++ b/stdlib/LibGit2_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibGit2_jll"
 uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
-version = "1.4.3+2"
+version = "1.7.1+0"
 
 [deps]
 MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
diff --git a/stdlib/LibGit2_jll/src/LibGit2_jll.jl b/stdlib/LibGit2_jll/src/LibGit2_jll.jl
index 88480bbd84048..ff625a6494a26 100644
--- a/stdlib/LibGit2_jll/src/LibGit2_jll.jl
+++ b/stdlib/LibGit2_jll/src/LibGit2_jll.jl
@@ -14,16 +14,16 @@ export libgit2
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libgit2_handle = C_NULL
-libgit2_path = ""
+artifact_dir::String = ""
+libgit2_handle::Ptr{Cvoid} = C_NULL
+libgit2_path::String = ""
 
 if Sys.iswindows()
     const libgit2 = "libgit2.dll"
 elseif Sys.isapple()
-    const libgit2 = "@rpath/libgit2.1.4.dylib"
+    const libgit2 = "@rpath/libgit2.1.7.dylib"
 else
-    const libgit2 = "libgit2.so.1.4"
+    const libgit2 = "libgit2.so.1.7"
 end
 
 function __init__()
diff --git a/stdlib/LibGit2_jll/test/runtests.jl b/stdlib/LibGit2_jll/test/runtests.jl
index 93fe0e958b7e2..86dcf659e7ab2 100644
--- a/stdlib/LibGit2_jll/test/runtests.jl
+++ b/stdlib/LibGit2_jll/test/runtests.jl
@@ -7,5 +7,5 @@ using Test, Libdl, LibGit2_jll
     minor = Ref{Cint}(0)
     patch = Ref{Cint}(0)
     @test ccall((:git_libgit2_version, libgit2), Cint, (Ref{Cint}, Ref{Cint}, Ref{Cint}), major, minor, patch) == 0
-    @test VersionNumber(major[], minor[], patch[]) == v"1.4.3"
+    @test VersionNumber(major[], minor[], patch[]) == v"1.7.1"
 end
diff --git a/stdlib/LibSSH2_jll/Project.toml b/stdlib/LibSSH2_jll/Project.toml
index 8334a86d1c23a..def4fb02e399c 100644
--- a/stdlib/LibSSH2_jll/Project.toml
+++ b/stdlib/LibSSH2_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibSSH2_jll"
 uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
-version = "1.10.2+0"
+version = "1.11.0+1"
 
 [deps]
 MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
diff --git a/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl b/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
index 66987b30d090c..a809f7a912d6b 100644
--- a/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
+++ b/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
@@ -14,9 +14,9 @@ export libssh2
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libssh2_handle = C_NULL
-libssh2_path = ""
+artifact_dir::String = ""
+libssh2_handle::Ptr{Cvoid} = C_NULL
+libssh2_path::String = ""
 
 if Sys.iswindows()
     const libssh2 = "libssh2.dll"
diff --git a/stdlib/LibUV_jll/Project.toml b/stdlib/LibUV_jll/Project.toml
index 9441fbf857263..605c1115b3d34 100644
--- a/stdlib/LibUV_jll/Project.toml
+++ b/stdlib/LibUV_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibUV_jll"
 uuid = "183b4373-6708-53ba-ad28-60e28bb38547"
-version = "2.0.1+6"
+version = "2.0.1+14"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LibUV_jll/src/LibUV_jll.jl b/stdlib/LibUV_jll/src/LibUV_jll.jl
index e4897138cc6cc..767f055eb019f 100644
--- a/stdlib/LibUV_jll/src/LibUV_jll.jl
+++ b/stdlib/LibUV_jll/src/LibUV_jll.jl
@@ -6,41 +6,6 @@ baremodule LibUV_jll
 using Base, Libdl
 Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
-const PATH_list = String[]
-const LIBPATH_list = String[]
-
-export libuv
-
-# These get calculated in __init__()
-const PATH = Ref("")
-const LIBPATH = Ref("")
-artifact_dir = ""
-libuv_handle = C_NULL
-libuv_path = ""
-
-if Sys.iswindows()
-    const libuv = "libuv-2.dll"
-elseif Sys.isapple()
-    const libuv = "@rpath/libuv.2.dylib"
-else
-    const libuv = "libuv.so.2"
-end
-
-function __init__()
-    global libuv_handle = dlopen(libuv)
-    global libuv_path = dlpath(libuv_handle)
-    global artifact_dir = dirname(Sys.BINDIR)
-    LIBPATH[] = dirname(libuv_path)
-    push!(LIBPATH_list, LIBPATH[])
-end
-
-# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
-# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
-# there isn't one.  It instead returns the overall Julia prefix.
-is_available() = true
-find_artifact_dir() = artifact_dir
-dev_jll() = error("stdlib JLLs cannot be dev'ed")
-best_wrapper = nothing
-get_libuv_path() = libuv_path
+# NOTE: This file is currently empty, as we link libuv statically for now.
 
 end  # module LibUV_jll
diff --git a/stdlib/LibUV_jll/test/runtests.jl b/stdlib/LibUV_jll/test/runtests.jl
index 26c50b92c0c2d..0615edebaa070 100644
--- a/stdlib/LibUV_jll/test/runtests.jl
+++ b/stdlib/LibUV_jll/test/runtests.jl
@@ -1,8 +1,3 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, Libdl, LibUV_jll
-
-@testset "LibUV_jll" begin
-    vn = VersionNumber(unsafe_string(ccall((:uv_version_string, libuv), Cstring, ())))
-    @test vn == v"2.0.0-dev"
-end
diff --git a/stdlib/LibUnwind_jll/Project.toml b/stdlib/LibUnwind_jll/Project.toml
index df4cc9df68b28..0e8684a212944 100644
--- a/stdlib/LibUnwind_jll/Project.toml
+++ b/stdlib/LibUnwind_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibUnwind_jll"
 uuid = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
-version = "1.5.0+2"
+version = "1.7.2+2"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl b/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
index ae79e790a999b..12abeaf598151 100644
--- a/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
+++ b/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
@@ -14,9 +14,9 @@ export libunwind
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libunwind_handle = C_NULL
-libunwind_path = ""
+artifact_dir::String = ""
+libunwind_handle::Ptr{Cvoid} = C_NULL
+libunwind_path::String = ""
 
 const libunwind = "libunwind.so.8"
 
diff --git a/stdlib/Libdl/Project.toml b/stdlib/Libdl/Project.toml
index 26e5bf0cdefd7..7fab4b9334260 100644
--- a/stdlib/Libdl/Project.toml
+++ b/stdlib/Libdl/Project.toml
@@ -1,5 +1,6 @@
 name = "Libdl"
 uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Libdl/src/Libdl.jl b/stdlib/Libdl/src/Libdl.jl
index df3f62c807fed..0df70ea1daac5 100644
--- a/stdlib/Libdl/src/Libdl.jl
+++ b/stdlib/Libdl/src/Libdl.jl
@@ -4,10 +4,11 @@ module Libdl
 # Just re-export Base.Libc.Libdl:
 export DL_LOAD_PATH, RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL,
     RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW, dlclose, dlopen, dlopen_e, dlsym, dlsym_e,
-    dlpath, find_library, dlext, dllist
+    dlpath, find_library, dlext, dllist, LazyLibrary, LazyLibraryPath, BundledLazyLibraryPath
 
 import Base.Libc.Libdl: DL_LOAD_PATH, RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL,
                         RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW, dlclose, dlopen, dlopen_e, dlsym, dlsym_e,
-                        dlpath, find_library, dlext, dllist
+                        dlpath, find_library, dlext, dllist, LazyLibrary, LazyLibraryPath,
+                        BundledLazyLibraryPath, default_rtld_flags, add_dependency!
 
 end # module
diff --git a/stdlib/Libdl/test/runtests.jl b/stdlib/Libdl/test/runtests.jl
index 5c06dd929f1a1..e500b68dec34b 100644
--- a/stdlib/Libdl/test/runtests.jl
+++ b/stdlib/Libdl/test/runtests.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test
-import Libdl
+using Libdl
 
 # these could fail on an embedded installation
 # but for now, we don't handle that case
@@ -27,17 +27,15 @@ end
 @test_throws ArgumentError Libdl.dlsym(C_NULL, :foo)
 @test_throws ArgumentError Libdl.dlsym_e(C_NULL, :foo)
 
-cd(@__DIR__) do
-
 # Find the library directory by finding the path of libjulia-internal (or libjulia-internal-debug,
 # as the case may be) to get the private library directory
 private_libdir = if Base.DARWIN_FRAMEWORK
-    if ccall(:jl_is_debugbuild, Cint, ()) != 0
+    if Base.isdebugbuild()
         dirname(abspath(Libdl.dlpath(Base.DARWIN_FRAMEWORK_NAME * "_debug")))
     else
         joinpath(dirname(abspath(Libdl.dlpath(Base.DARWIN_FRAMEWORK_NAME))),"Frameworks")
     end
-elseif ccall(:jl_is_debugbuild, Cint, ()) != 0
+elseif Base.isdebugbuild()
     dirname(abspath(Libdl.dlpath("libjulia-internal-debug")))
 else
     dirname(abspath(Libdl.dlpath("libjulia-internal")))
@@ -267,4 +265,67 @@ mktempdir() do dir
     end
 end
 
-end
+## Tests for LazyLibrary
+@testset "LazyLibrary" begin; mktempdir() do dir
+    lclf_path = joinpath(private_libdir, "libccalllazyfoo.$(Libdl.dlext)")
+    lclb_path = joinpath(private_libdir, "libccalllazybar.$(Libdl.dlext)")
+
+    # Ensure that our modified copy of `libccalltest` is not currently loaded
+    @test !any(contains.(dllist(), lclf_path))
+    @test !any(contains.(dllist(), lclb_path))
+
+    # Create a `LazyLibrary` structure that loads `libccalllazybar`
+    global lclf_loaded = false
+    global lclb_loaded = false
+
+    # We don't provide `dlclose()` on `LazyLibrary`'s, you have to manage it yourself:
+    function close_libs()
+        global lclf_loaded = false
+        global lclb_loaded = false
+        if libccalllazybar.handle != C_NULL
+            dlclose(libccalllazybar.handle)
+        end
+        if libccalllazyfoo.handle != C_NULL
+            dlclose(libccalllazyfoo.handle)
+        end
+        @atomic libccalllazyfoo.handle = C_NULL
+        @atomic libccalllazybar.handle = C_NULL
+        @test !any(contains.(dllist(), lclf_path))
+        @test !any(contains.(dllist(), lclb_path))
+    end
+
+    global libccalllazyfoo = LazyLibrary(lclf_path; on_load_callback=() -> global lclf_loaded = true)
+    global libccalllazybar = LazyLibrary(lclb_path; dependencies=[libccalllazyfoo], on_load_callback=() -> global lclb_loaded = true)
+
+    # Creating `LazyLibrary` doesn't actually load anything
+    @test !lclf_loaded
+    @test !lclb_loaded
+
+    # Explicitly calling `dlopen()` does:
+    dlopen(libccalllazybar)
+    @test lclf_loaded
+    @test lclb_loaded
+    close_libs()
+
+    # Test that the library gets loaded when you use `ccall()`
+    @test ccall((:bar, libccalllazybar), Cint, (Cint,), 2) == 6
+    @test lclf_loaded
+    @test lclb_loaded
+    close_libs()
+
+    # Test that `@ccall` works:
+    @test @ccall(libccalllazybar.bar(2::Cint)::Cint) == 6
+    @test lclf_loaded
+    @test lclb_loaded
+    close_libs()
+
+    # Test that `dlpath()` works
+    @test dlpath(libccalllazybar) == realpath(string(libccalllazybar.path))
+    @test lclf_loaded
+    close_libs()
+
+    # Test that we can use lazily-evaluated library names:
+    libname = LazyLibraryPath(private_libdir, "libccalllazyfoo.$(Libdl.dlext)")
+    lazy_name_lazy_lib = LazyLibrary(libname)
+    @test dlpath(lazy_name_lazy_lib) == realpath(string(libname))
+end; end
diff --git a/stdlib/LinearAlgebra/Project.toml b/stdlib/LinearAlgebra/Project.toml
index 46653aa795209..892de0397c219 100644
--- a/stdlib/LinearAlgebra/Project.toml
+++ b/stdlib/LinearAlgebra/Project.toml
@@ -1,5 +1,6 @@
 name = "LinearAlgebra"
 uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+version = "1.11.0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md
index 88e700685a0d3..d263dfe299206 100644
--- a/stdlib/LinearAlgebra/docs/src/index.md
+++ b/stdlib/LinearAlgebra/docs/src/index.md
@@ -150,7 +150,10 @@ julia> sB\x
  -1.1086956521739126
  -1.4565217391304346
 ```
-The `\` operation here performs the linear solution. The left-division operator is pretty powerful and it's easy to write compact, readable code that is flexible enough to solve all sorts of systems of linear equations.
+
+The `\` operation here performs the linear solution. The left-division operator is pretty
+powerful and it's easy to write compact, readable code that is flexible enough to solve all
+sorts of systems of linear equations.
 
 ## Special matrices
 
@@ -181,8 +184,8 @@ as well as whether hooks to various optimized methods for them in LAPACK are ava
 
 | Matrix type                   | `+` | `-` | `*` | `\` | Other functions with optimized methods                      |
 |:----------------------------- |:--- |:--- |:--- |:--- |:----------------------------------------------------------- |
-| [`Symmetric`](@ref)           |     |     |     | MV  | [`inv`](@ref), [`sqrt`](@ref), [`exp`](@ref)                |
-| [`Hermitian`](@ref)           |     |     |     | MV  | [`inv`](@ref), [`sqrt`](@ref), [`exp`](@ref)                |
+| [`Symmetric`](@ref)           |     |     |     | MV  | [`inv`](@ref), [`sqrt`](@ref), [`cbrt`](@ref), [`exp`](@ref)                |
+| [`Hermitian`](@ref)           |     |     |     | MV  | [`inv`](@ref), [`sqrt`](@ref), [`cbrt`](@ref), [`exp`](@ref)                |
 | [`UpperTriangular`](@ref)     |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
 | [`UnitUpperTriangular`](@ref) |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
 | [`LowerTriangular`](@ref)     |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
@@ -276,12 +279,11 @@ to first compute the Hessenberg factorization `F` of `A` via the [`hessenberg`](
 Given `F`, Julia employs an efficient algorithm for `(F+μ*I) \ b` (equivalent to `(A+μ*I)x \ b`) and related
 operations like determinants.
 
-
 ## [Matrix factorizations](@id man-linalg-factorizations)
 
 [Matrix factorizations (a.k.a. matrix decompositions)](https://en.wikipedia.org/wiki/Matrix_decomposition)
 compute the factorization of a matrix into a product of matrices, and are one of the central concepts
-in linear algebra.
+in (numerical) linear algebra.
 
 The following table summarizes the types of matrix factorizations that have been implemented in
 Julia. Details of their associated methods can be found in the [Standard functions](@ref) section
@@ -306,6 +308,98 @@ of the Linear Algebra documentation.
 | `Schur`            | [Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition)                                       |
 | `GeneralizedSchur` | [Generalized Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition#Generalized_Schur_decomposition) |
 
+Adjoints and transposes of [`Factorization`](@ref) objects are lazily wrapped in
+`AdjointFactorization` and `TransposeFactorization` objects, respectively. Generically,
+transpose of real `Factorization`s are wrapped as `AdjointFactorization`.
+
+## [Orthogonal matrices (`AbstractQ`)](@id man-linalg-abstractq)
+
+Some matrix factorizations generate orthogonal/unitary "matrix" factors. These
+factorizations include QR-related factorizations obtained from calls to [`qr`](@ref), i.e.,
+`QR`, `QRCompactWY` and `QRPivoted`, the Hessenberg factorization obtained from calls to
+[`hessenberg`](@ref), and the LQ factorization obtained from [`lq`](@ref). While these
+orthogonal/unitary factors admit a matrix representation, their internal representation
+is, for performance and memory reasons, different. Hence, they should be rather viewed as
+matrix-backed, function-based linear operators. In particular, reading, for instance, a
+column of its matrix representation requires running "matrix"-vector multiplication code,
+rather than simply reading out data from memory (possibly filling parts of the vector with
+structural zeros). Another clear distinction from other, non-triangular matrix types is
+that the underlying multiplication code allows for in-place modification during multiplication.
+Furthermore, objects of specific `AbstractQ` subtypes as those created via [`qr`](@ref),
+[`hessenberg`](@ref) and [`lq`](@ref) can behave like a square or a rectangular matrix
+depending on context:
+
+```julia
+julia> using LinearAlgebra
+
+julia> Q = qr(rand(3,2)).Q
+3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}
+
+julia> Matrix(Q)
+3×2 Matrix{Float64}:
+ -0.320597   0.865734
+ -0.765834  -0.475694
+ -0.557419   0.155628
+
+julia> Q*I
+3×3 Matrix{Float64}:
+ -0.320597   0.865734  -0.384346
+ -0.765834  -0.475694  -0.432683
+ -0.557419   0.155628   0.815514
+
+julia> Q*ones(2)
+3-element Vector{Float64}:
+  0.5451367118802273
+ -1.241527373086654
+ -0.40179067589600226
+
+julia> Q*ones(3)
+3-element Vector{Float64}:
+  0.16079054743832022
+ -1.674209978965636
+  0.41372375588835797
+
+julia> ones(1,2) * Q'
+1×3 Matrix{Float64}:
+ 0.545137  -1.24153  -0.401791
+
+julia> ones(1,3) * Q'
+1×3 Matrix{Float64}:
+ 0.160791  -1.67421  0.413724
+```
+
+Due to this distinction from dense or structured matrices, the abstract `AbstractQ` type
+does not subtype `AbstractMatrix`, but instead has its own type hierarchy. Custom types
+that subtype `AbstractQ` can rely on generic fallbacks if the following interface is satisfied.
+For example, for
+
+```julia
+struct MyQ{T} <: LinearAlgebra.AbstractQ{T}
+    # required fields
+end
+```
+
+provide overloads for
+
+```julia
+Base.size(Q::MyQ) # size of corresponding square matrix representation
+Base.convert(::Type{AbstractQ{T}}, Q::MyQ) # eltype promotion [optional]
+LinearAlgebra.lmul!(Q::MyQ, x::AbstractVecOrMat) # left-multiplication
+LinearAlgebra.rmul!(A::AbstractMatrix, Q::MyQ) # right-multiplication
+```
+
+If `eltype` promotion is not of interest, the `convert` method is unnecessary, since by
+default `convert(::Type{AbstractQ{T}}, Q::AbstractQ{T})` returns `Q` itself.
+Adjoints of `AbstractQ`-typed objects are lazily wrapped in an `AdjointQ` wrapper type,
+which requires its own `LinearAlgebra.lmul!` and `LinearAlgebra.rmul!` methods. Given this
+set of methods, any `Q::MyQ` can be used like a matrix, preferably in a multiplicative
+context: multiplication via `*` with scalars, vectors and matrices from left and right,
+obtaining a matrix representation of `Q` via `Matrix(Q)` (or `Q*I`) and indexing into the
+matrix representation all work. In contrast, addition and subtraction as well as more
+generally broadcasting over elements in the matrix representation fail because that would
+be highly inefficient. For such use cases, consider computing the matrix representation
+up front and cache it for future reuse.
+
 ## Standard functions
 
 Linear algebra functions in Julia are largely implemented by calling functions from [LAPACK](http://www.netlib.org/lapack/).
@@ -324,6 +418,8 @@ LinearAlgebra.dot(::Any, ::Any, ::Any)
 LinearAlgebra.cross
 LinearAlgebra.axpy!
 LinearAlgebra.axpby!
+LinearAlgebra.rotate!
+LinearAlgebra.reflect!
 LinearAlgebra.factorize
 LinearAlgebra.Diagonal
 LinearAlgebra.Bidiagonal
@@ -420,6 +516,7 @@ Base.:^(::AbstractMatrix, ::Number)
 Base.:^(::Number, ::AbstractMatrix)
 LinearAlgebra.log(::StridedMatrix)
 LinearAlgebra.sqrt(::StridedMatrix)
+LinearAlgebra.cbrt(::AbstractMatrix{<:Real})
 LinearAlgebra.cos(::StridedMatrix{<:Real})
 LinearAlgebra.sin(::StridedMatrix{<:Real})
 LinearAlgebra.sincos(::StridedMatrix{<:Real})
@@ -458,13 +555,17 @@ LinearAlgebra.ishermitian
 Base.transpose
 LinearAlgebra.transpose!
 LinearAlgebra.Transpose
+LinearAlgebra.TransposeFactorization
 Base.adjoint
 LinearAlgebra.adjoint!
 LinearAlgebra.Adjoint
+LinearAlgebra.AdjointFactorization
 Base.copy(::Union{Transpose,Adjoint})
 LinearAlgebra.stride1
 LinearAlgebra.checksquare
 LinearAlgebra.peakflops
+LinearAlgebra.hermitianpart
+LinearAlgebra.hermitianpart!
 ```
 
 ## Low-level matrix operations
@@ -492,9 +593,11 @@ linear algebra routines it is useful to call the BLAS functions directly.
 
 `LinearAlgebra.BLAS` provides wrappers for some of the BLAS functions. Those BLAS functions
 that overwrite one of the input arrays have names ending in `'!'`.  Usually, a BLAS function has
-four methods defined, for [`Float64`](@ref), [`Float32`](@ref), `ComplexF64`, and `ComplexF32` arrays.
+four methods defined, for [`Float32`](@ref), [`Float64`](@ref), [`ComplexF32`](@ref Complex),
+and [`ComplexF64`](@ref Complex) arrays.
 
 ### [BLAS character arguments](@id stdlib-blas-chars)
+
 Many BLAS functions accept arguments that determine whether to transpose an argument (`trans`),
 which triangle of a matrix to reference (`uplo` or `ul`),
 whether the diagonal of a triangular matrix can be assumed to
@@ -502,18 +605,21 @@ be all ones (`dA`) or which side of a matrix multiplication
 the input argument belongs on (`side`). The possibilities are:
 
 #### [Multiplication order](@id stdlib-blas-side)
+
 | `side` | Meaning                                                             |
 |:-------|:--------------------------------------------------------------------|
 | `'L'`  | The argument goes on the *left* side of a matrix-matrix operation.  |
 | `'R'`  | The argument goes on the *right* side of a matrix-matrix operation. |
 
 #### [Triangle referencing](@id stdlib-blas-uplo)
+
 | `uplo`/`ul` | Meaning                                               |
 |:------------|:------------------------------------------------------|
 | `'U'`       | Only the *upper* triangle of the matrix will be used. |
 | `'L'`       | Only the *lower* triangle of the matrix will be used. |
 
 #### [Transposition operation](@id stdlib-blas-trans)
+
 | `trans`/`tX` | Meaning                                                 |
 |:-------------|:--------------------------------------------------------|
 | `'N'`        | The input matrix `X` is not transposed or conjugated.   |
@@ -521,6 +627,7 @@ the input argument belongs on (`side`). The possibilities are:
 | `'C'`        | The input matrix `X` will be conjugated and transposed. |
 
 #### [Unit diagonal](@id stdlib-blas-diag)
+
 | `diag`/`dX` | Meaning                                                   |
 |:------------|:----------------------------------------------------------|
 | `'N'`       | The diagonal values of the matrix `X` will be read.       |
@@ -528,61 +635,126 @@ the input argument belongs on (`side`). The possibilities are:
 
 ```@docs
 LinearAlgebra.BLAS
+LinearAlgebra.BLAS.set_num_threads
+LinearAlgebra.BLAS.get_num_threads
+```
+
+BLAS functions can be divided into three groups, also called three levels,
+depending on when they were first proposed, the type of input parameters,
+and the complexity of the operation.
+
+### Level 1 BLAS functions
+
+The level 1 BLAS functions were first proposed in [(Lawson, 1979)][Lawson-1979] and
+define operations between scalars and vectors.
+
+[Lawson-1979]: https://dl.acm.org/doi/10.1145/355841.355847
+
+```@docs
+# xROTG
+# xROTMG
+LinearAlgebra.BLAS.rot!
+# xROTM
+# xSWAP
+LinearAlgebra.BLAS.scal!
+LinearAlgebra.BLAS.scal
+LinearAlgebra.BLAS.blascopy!
+# xAXPY!
+# xAXPBY!
 LinearAlgebra.BLAS.dot
 LinearAlgebra.BLAS.dotu
 LinearAlgebra.BLAS.dotc
-LinearAlgebra.BLAS.blascopy!
+# xxDOT
 LinearAlgebra.BLAS.nrm2
 LinearAlgebra.BLAS.asum
-LinearAlgebra.BLAS.axpy!
-LinearAlgebra.BLAS.axpby!
-LinearAlgebra.BLAS.scal!
-LinearAlgebra.BLAS.scal
 LinearAlgebra.BLAS.iamax
-LinearAlgebra.BLAS.ger!
-LinearAlgebra.BLAS.syr!
-LinearAlgebra.BLAS.syrk!
-LinearAlgebra.BLAS.syrk
-LinearAlgebra.BLAS.syr2k!
-LinearAlgebra.BLAS.syr2k
-LinearAlgebra.BLAS.her!
-LinearAlgebra.BLAS.herk!
-LinearAlgebra.BLAS.herk
-LinearAlgebra.BLAS.her2k!
-LinearAlgebra.BLAS.her2k
+```
+
+### Level 2 BLAS functions
+
+The level 2 BLAS functions were published in [(Dongarra, 1988)][Dongarra-1988],
+and define matrix-vector operations.
+
+[Dongarra-1988]: https://dl.acm.org/doi/10.1145/42288.42291
+
+**return a vector**
+
+```@docs
+LinearAlgebra.BLAS.gemv!
+LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any)
 LinearAlgebra.BLAS.gbmv!
 LinearAlgebra.BLAS.gbmv
+LinearAlgebra.BLAS.hemv!
+LinearAlgebra.BLAS.hemv(::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.hemv(::Any, ::Any, ::Any)
+# hbmv!, hbmv
+LinearAlgebra.BLAS.hpmv!
+LinearAlgebra.BLAS.symv!
+LinearAlgebra.BLAS.symv(::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.symv(::Any, ::Any, ::Any)
 LinearAlgebra.BLAS.sbmv!
 LinearAlgebra.BLAS.sbmv(::Any, ::Any, ::Any, ::Any, ::Any)
 LinearAlgebra.BLAS.sbmv(::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.spmv!
+LinearAlgebra.BLAS.trmv!
+LinearAlgebra.BLAS.trmv
+# xTBMV
+# xTPMV
+LinearAlgebra.BLAS.trsv!
+LinearAlgebra.BLAS.trsv
+# xTBSV
+# xTPSV
+```
+
+**return a matrix**
+
+```@docs
+LinearAlgebra.BLAS.ger!
+# xGERU
+# xGERC
+LinearAlgebra.BLAS.her!
+# xHPR
+# xHER2
+# xHPR2
+LinearAlgebra.BLAS.syr!
+LinearAlgebra.BLAS.spr!
+# xSYR2
+# xSPR2
+```
+
+### Level 3 BLAS functions
+
+The level 3 BLAS functions were published in [(Dongarra, 1990)][Dongarra-1990],
+and define matrix-matrix operations.
+
+[Dongarra-1990]: https://dl.acm.org/doi/10.1145/77626.79170
+
+```@docs
+LinearAlgebra.BLAS.gemmt!
+LinearAlgebra.BLAS.gemmt(::Any, ::Any, ::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.gemmt(::Any, ::Any, ::Any, ::Any, ::Any)
 LinearAlgebra.BLAS.gemm!
 LinearAlgebra.BLAS.gemm(::Any, ::Any, ::Any, ::Any, ::Any)
 LinearAlgebra.BLAS.gemm(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.gemv!
-LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any)
 LinearAlgebra.BLAS.symm!
 LinearAlgebra.BLAS.symm(::Any, ::Any, ::Any, ::Any, ::Any)
 LinearAlgebra.BLAS.symm(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.symv!
-LinearAlgebra.BLAS.symv(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.symv(::Any, ::Any, ::Any)
 LinearAlgebra.BLAS.hemm!
 LinearAlgebra.BLAS.hemm(::Any, ::Any, ::Any, ::Any, ::Any)
 LinearAlgebra.BLAS.hemm(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.hemv!
-LinearAlgebra.BLAS.hemv(::Any, ::Any, ::Any, ::Any)
-LinearAlgebra.BLAS.hemv(::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.syrk!
+LinearAlgebra.BLAS.syrk
+LinearAlgebra.BLAS.herk!
+LinearAlgebra.BLAS.herk
+LinearAlgebra.BLAS.syr2k!
+LinearAlgebra.BLAS.syr2k
+LinearAlgebra.BLAS.her2k!
+LinearAlgebra.BLAS.her2k
 LinearAlgebra.BLAS.trmm!
 LinearAlgebra.BLAS.trmm
 LinearAlgebra.BLAS.trsm!
 LinearAlgebra.BLAS.trsm
-LinearAlgebra.BLAS.trmv!
-LinearAlgebra.BLAS.trmv
-LinearAlgebra.BLAS.trsv!
-LinearAlgebra.BLAS.trsv
-LinearAlgebra.BLAS.set_num_threads
-LinearAlgebra.BLAS.get_num_threads
 ```
 
 ## LAPACK functions
@@ -629,6 +801,7 @@ LinearAlgebra.LAPACK.ggsvd!
 LinearAlgebra.LAPACK.ggsvd3!
 LinearAlgebra.LAPACK.geevx!
 LinearAlgebra.LAPACK.ggev!
+LinearAlgebra.LAPACK.ggev3!
 LinearAlgebra.LAPACK.gtsv!
 LinearAlgebra.LAPACK.gttrf!
 LinearAlgebra.LAPACK.gttrs!
@@ -669,6 +842,7 @@ LinearAlgebra.LAPACK.hetri!
 LinearAlgebra.LAPACK.hetrs!
 LinearAlgebra.LAPACK.syev!
 LinearAlgebra.LAPACK.syevr!
+LinearAlgebra.LAPACK.syevd!
 LinearAlgebra.LAPACK.sygvd!
 LinearAlgebra.LAPACK.bdsqr!
 LinearAlgebra.LAPACK.bdsdc!
@@ -677,10 +851,12 @@ LinearAlgebra.LAPACK.gehrd!
 LinearAlgebra.LAPACK.orghr!
 LinearAlgebra.LAPACK.gees!
 LinearAlgebra.LAPACK.gges!
+LinearAlgebra.LAPACK.gges3!
 LinearAlgebra.LAPACK.trexc!
 LinearAlgebra.LAPACK.trsen!
 LinearAlgebra.LAPACK.tgsen!
 LinearAlgebra.LAPACK.trsyl!
+LinearAlgebra.LAPACK.hseqr!
 ```
 
 ```@meta
diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
index 38f7b8588d1fa..10cc9a2f3459a 100644
--- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl
+++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
@@ -9,146 +9,153 @@ module LinearAlgebra
 
 import Base: \, /, *, ^, +, -, ==
 import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, asec, asech,
-    asin, asinh, atan, atanh, axes, big, broadcast, ceil, cis, conj, convert, copy, copyto!,
-    copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor, getindex, hcat,
-    getproperty, imag, inv, isapprox, isequal, isone, iszero, IndexStyle, kron, kron!,
-    length, log, map, ndims, one, oneunit, parent, permutedims, power_by_squaring,
-    print_matrix, promote_rule, real, round, sec, sech, setindex!, show, similar, sin,
-    sincos, sinh, size, sqrt, strides, stride, tan, tanh, transpose, trunc, typed_hcat,
-    vec, zero
-using Base: IndexLinear, promote_eltype, promote_op, promote_typeof,
+    asin, asinh, atan, atanh, axes, big, broadcast, cbrt, ceil, cis, collect, conj, convert,
+    copy, copyto!, copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor,
+    getindex, hcat, getproperty, imag, inv, invpermuterows!, isapprox, isequal, isone, iszero,
+    IndexStyle, kron, kron!, length, log, map, ndims, one, oneunit, parent, permutecols!,
+    permutedims, permuterows!, power_by_squaring, promote_rule, real, sec, sech, setindex!,
+    show, similar, sin, sincos, sinh, size, sqrt, strides, stride, tan, tanh, transpose, trunc,
+    typed_hcat, vec, view, zero
+using Base: IndexLinear, promote_eltype, promote_op, promote_typeof, print_matrix,
     @propagate_inbounds, reduce, typed_hvcat, typed_vcat, require_one_based_indexing,
-    Splat
+    splat
 using Base.Broadcast: Broadcasted, broadcasted
+using Base.PermutedDimsArrays: CommutativeOps
 using OpenBLAS_jll
 using libblastrampoline_jll
 import Libdl
 
 export
 # Modules
-    LAPACK,
     BLAS,
+    LAPACK,
 
 # Types
     Adjoint,
-    Transpose,
-    SymTridiagonal,
-    Tridiagonal,
     Bidiagonal,
-    Factorization,
     BunchKaufman,
     Cholesky,
     CholeskyPivoted,
     ColumnNorm,
+    Diagonal,
     Eigen,
+    Factorization,
     GeneralizedEigen,
     GeneralizedSVD,
     GeneralizedSchur,
+    Hermitian,
     Hessenberg,
-    LU,
     LDLt,
+    LQ,
+    LU,
+    LowerTriangular,
     NoPivot,
-    RowNonZero,
     QR,
     QRPivoted,
-    LQ,
-    Schur,
-    SVD,
-    Hermitian,
     RowMaximum,
+    RowNonZero,
+    SVD,
+    Schur,
+    SymTridiagonal,
     Symmetric,
-    LowerTriangular,
-    UpperTriangular,
+    Transpose,
+    Tridiagonal,
+    UniformScaling,
     UnitLowerTriangular,
     UnitUpperTriangular,
     UpperHessenberg,
-    Diagonal,
-    UniformScaling,
+    UpperTriangular,
+
 
 # Functions
-    axpy!,
+    adjoint!,
+    adjoint,
     axpby!,
-    bunchkaufman,
+    axpy!,
     bunchkaufman!,
-    cholesky,
+    bunchkaufman,
     cholesky!,
+    cholesky,
     cond,
     condskeel,
-    copyto!,
     copy_transpose!,
+    copyto!,
+    copytrito!,
     cross,
-    adjoint,
-    adjoint!,
     det,
     diag,
     diagind,
     diagm,
     dot,
-    eigen,
     eigen!,
+    eigen,
     eigmax,
     eigmin,
-    eigvals,
     eigvals!,
+    eigvals,
     eigvecs,
     factorize,
     givens,
-    hessenberg,
+    hermitianpart!,
+    hermitianpart,
     hessenberg!,
+    hessenberg,
     isdiag,
     ishermitian,
-    isposdef,
     isposdef!,
+    isposdef,
     issuccess,
     issymmetric,
     istril,
     istriu,
+    kron!,
     kron,
     ldiv!,
     ldlt!,
     ldlt,
+    lmul!,
     logabsdet,
     logdet,
-    lowrankdowndate,
     lowrankdowndate!,
-    lowrankupdate,
+    lowrankdowndate,
     lowrankupdate!,
-    lu,
+    lowrankupdate,
+    lq!,
+    lq,
     lu!,
+    lu,
     lyap,
     mul!,
-    lmul!,
-    rmul!,
     norm,
-    normalize,
     normalize!,
+    normalize,
     nullspace,
+    opnorm,
     ordschur!,
     ordschur,
     pinv,
-    qr,
     qr!,
-    lq,
-    lq!,
-    opnorm,
+    qr,
     rank,
     rdiv!,
     reflect!,
+    rmul!,
     rotate!,
-    schur,
     schur!,
-    svd,
+    schur,
     svd!,
+    svd,
     svdvals!,
     svdvals,
     sylvester,
     tr,
-    transpose,
     transpose!,
-    tril,
-    triu,
+    transpose,
     tril!,
+    tril,
     triu!,
+    triu,
+
 
 # Operators
     \,
@@ -157,6 +164,17 @@ export
 # Constants
     I
 
+# not exported, but public names
+public AbstractTriangular,
+        Givens,
+        checksquare,
+        hermitian,
+        hermitian_type,
+        isbanded,
+        peakflops,
+        symmetric,
+        symmetric_type
+
 const BlasFloat = Union{Float64,Float32,ComplexF64,ComplexF32}
 const BlasReal = Union{Float64,Float32}
 const BlasComplex = Union{ComplexF64,ComplexF32}
@@ -281,6 +299,10 @@ The reason for this is that factorization itself is both expensive and typically
 and performance-critical situations requiring `ldiv!` usually also require fine-grained
 control over the factorization of `A`.
 
+!!! note
+    Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as
+    these are already in a factorized form
+
 # Examples
 ```jldoctest
 julia> A = [1 2.2 4; 3.1 0.2 3; 4 1 2];
@@ -318,6 +340,10 @@ The reason for this is that factorization itself is both expensive and typically
 and performance-critical situations requiring `ldiv!` usually also require fine-grained
 control over the factorization of `A`.
 
+!!! note
+    Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as
+    these are already in a factorized form
+
 # Examples
 ```jldoctest
 julia> A = [1 2.2 4; 3.1 0.2 3; 4 1 2];
@@ -355,6 +381,10 @@ The reason for this is that factorization itself is both expensive and typically
 (although it can also be done in-place via, e.g., [`lu!`](@ref)),
 and performance-critical situations requiring `rdiv!` usually also require fine-grained
 control over the factorization of `B`.
+
+!!! note
+    Certain structured matrix types, such as `Diagonal` and `UpperTriangular`, are permitted, as
+    these are already in a factorized form
 """
 rdiv!(A, B)
 
@@ -415,8 +445,6 @@ include("tridiag.jl")
 include("triangular.jl")
 
 include("factorization.jl")
-include("qr.jl")
-include("lq.jl")
 include("eigen.jl")
 include("svd.jl")
 include("symmetric.jl")
@@ -427,7 +455,10 @@ include("diagonal.jl")
 include("symmetriceigen.jl")
 include("bidiag.jl")
 include("uniformscaling.jl")
+include("qr.jl")
+include("lq.jl")
 include("hessenberg.jl")
+include("abstractq.jl")
 include("givens.jl")
 include("special.jl")
 include("bitarray.jl")
@@ -440,6 +471,34 @@ const ⋅ = dot
 const × = cross
 export ⋅, ×
 
+wrapper_char(::AbstractArray) = 'N'
+wrapper_char(::Adjoint) = 'C'
+wrapper_char(::Adjoint{<:Real}) = 'T'
+wrapper_char(::Transpose) = 'T'
+wrapper_char(A::Hermitian) = A.uplo == 'U' ? 'H' : 'h'
+wrapper_char(A::Hermitian{<:Real}) = A.uplo == 'U' ? 'S' : 's'
+wrapper_char(A::Symmetric) = A.uplo == 'U' ? 'S' : 's'
+
+Base.@constprop :aggressive function wrap(A::AbstractVecOrMat, tA::AbstractChar)
+    if tA == 'N'
+        return A
+    elseif tA == 'T'
+        return transpose(A)
+    elseif tA == 'C'
+        return adjoint(A)
+    elseif tA == 'H'
+        return Hermitian(A, :U)
+    elseif tA == 'h'
+        return Hermitian(A, :L)
+    elseif tA == 'S'
+        return Symmetric(A, :U)
+    else # tA == 's'
+        return Symmetric(A, :L)
+    end
+end
+
+_unwrap(A::AbstractVecOrMat) = A
+
 ## convenience methods
 ## return only the solution of a least squares problem while avoiding promoting
 ## vectors to matrices.
@@ -447,20 +506,47 @@ _cut_B(x::AbstractVector, r::UnitRange) = length(x)  > length(r) ? x[r]   : x
 _cut_B(X::AbstractMatrix, r::UnitRange) = size(X, 1) > length(r) ? X[r,:] : X
 
 # SymTridiagonal ev can be the same length as dv, but the last element is
-# ignored. However, some methods can fail if they read the entired ev
+# ignored. However, some methods can fail if they read the entire ev
 # rather than just the meaningful elements. This is a helper function
 # for getting only the meaningful elements of ev. See #41089
-_evview(S::SymTridiagonal) = @view S.ev[begin:length(S.dv) - 1]
+_evview(S::SymTridiagonal) = @view S.ev[begin:begin + length(S.dv) - 2]
 
 ## append right hand side with zeros if necessary
 _zeros(::Type{T}, b::AbstractVector, n::Integer) where {T} = zeros(T, max(length(b), n))
 _zeros(::Type{T}, B::AbstractMatrix, n::Integer) where {T} = zeros(T, max(size(B, 1), n), size(B, 2))
 
+# convert to Vector, if necessary
+_makevector(x::Vector) = x
+_makevector(x::AbstractVector) = Vector(x)
+
+# append a zero element / drop the last element
+_pushzero(A) = (B = similar(A, length(A)+1); @inbounds B[begin:end-1] .= A; @inbounds B[end] = zero(eltype(B)); B)
+_droplast!(A) = deleteat!(A, lastindex(A))
+
+# some trait like this would be cool
+# onedefined(::Type{T}) where {T} = hasmethod(one, (T,))
+# but we are actually asking for oneunit(T), that is, however, defined for generic T as
+# `T(one(T))`, so the question is equivalent for whether one(T) is defined
+onedefined(::Type) = false
+onedefined(::Type{<:Number}) = true
+
+# initialize return array for op(A, B)
+_init_eltype(::typeof(*), ::Type{TA}, ::Type{TB}) where {TA,TB} =
+    (onedefined(TA) && onedefined(TB)) ?
+        typeof(matprod(oneunit(TA), oneunit(TB))) :
+        promote_op(matprod, TA, TB)
+_init_eltype(op, ::Type{TA}, ::Type{TB}) where {TA,TB} =
+    (onedefined(TA) && onedefined(TB)) ?
+        typeof(op(oneunit(TA), oneunit(TB))) :
+        promote_op(op, TA, TB)
+_initarray(op, ::Type{TA}, ::Type{TB}, C) where {TA,TB} =
+    similar(C, _init_eltype(op, TA, TB), size(C))
+
 # General fallback definition for handling under- and overdetermined system as well as square problems
 # While this definition is pretty general, it does e.g. promote to common element type of lhs and rhs
-# which is required by LAPACK but not SuiteSpase which allows real-complex solves in some cases. Hence,
+# which is required by LAPACK but not SuiteSparse which allows real-complex solves in some cases. Hence,
 # we restrict this method to only the LAPACK factorizations in LinearAlgebra.
-# The definition is put here since it explicitly references all the Factorizion structs so it has
+# The definition is put here since it explicitly references all the Factorization structs so it has
 # to be located after all the files that define the structs.
 const LAPACKFactorizations{T,S} = Union{
     BunchKaufman{T,S},
@@ -471,7 +557,12 @@ const LAPACKFactorizations{T,S} = Union{
     QRCompactWY{T,S},
     QRPivoted{T,S},
     SVD{T,<:Real,S}}
-function (\)(F::Union{<:LAPACKFactorizations,Adjoint{<:Any,<:LAPACKFactorizations}}, B::AbstractVecOrMat)
+
+(\)(F::LAPACKFactorizations, B::AbstractVecOrMat) = ldiv(F, B)
+(\)(F::AdjointFactorization{<:Any,<:LAPACKFactorizations}, B::AbstractVecOrMat) = ldiv(F, B)
+(\)(F::TransposeFactorization{<:Any,<:LU}, B::AbstractVecOrMat) = ldiv(F, B)
+
+function ldiv(F::Factorization, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     m, n = size(F)
     if m != size(B, 1)
@@ -501,17 +592,27 @@ function (\)(F::Union{<:LAPACKFactorizations,Adjoint{<:Any,<:LAPACKFactorization
 end
 # disambiguate
 (\)(F::LAPACKFactorizations{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
-    invoke(\, Tuple{Factorization{T}, VecOrMat{Complex{T}}}, F, B)
+    @invoke \(F::Factorization{T}, B::VecOrMat{Complex{T}})
+(\)(F::AdjointFactorization{T,<:LAPACKFactorizations}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    ldiv(F, B)
+(\)(F::TransposeFactorization{T,<:LU}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    ldiv(F, B)
 
 """
-    LinearAlgebra.peakflops(n::Integer=2000; parallel::Bool=false)
+    LinearAlgebra.peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
 
 `peakflops` computes the peak flop rate of the computer by using double precision
 [`gemm!`](@ref LinearAlgebra.BLAS.gemm!). By default, if no arguments are specified, it
-multiplies a matrix of size `n x n`, where `n = 2000`. If the underlying BLAS is using
+multiplies two `Float64` matrices of size `n x n`, where `n = 4096`. If the underlying BLAS is using
 multiple threads, higher flop rates are realized. The number of BLAS threads can be set with
 [`BLAS.set_num_threads(n)`](@ref).
 
+If the keyword argument `eltype` is provided, `peakflops` will construct matrices with elements
+of type `eltype` for calculating the peak flop rate.
+
+By default, `peakflops` will use the best timing from 3 trials. If the `ntrials` keyword argument
+is provided, `peakflops` will use those many trials for picking the best timing.
+
 If the keyword argument `parallel` is set to `true`, `peakflops` is run in parallel on all
 the worker processors. The flop rate of the entire parallel computer is returned. When
 running in parallel, only 1 BLAS thread is used. The argument `n` still refers to the size
@@ -521,19 +622,23 @@ of the problem that is solved on each processor.
     This function requires at least Julia 1.1. In Julia 1.0 it is available from
     the standard library `InteractiveUtils`.
 """
-function peakflops(n::Integer=2000; parallel::Bool=false)
-    a = fill(1.,100,100)
-    t = @elapsed a2 = a*a
-    a = fill(1.,n,n)
-    t = @elapsed a2 = a*a
-    @assert a2[1,1] == n
+function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
+    t = zeros(Float64, ntrials)
+    for i=1:ntrials
+        a = ones(eltype,n,n)
+        t[i] = @elapsed a2 = a*a
+        @assert a2[1,1] == n
+    end
+
     if parallel
         let Distributed = Base.require(Base.PkgId(
                 Base.UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed"))
-            return sum(Distributed.pmap(peakflops, fill(n, Distributed.nworkers())))
+            nworkers = @invokelatest Distributed.nworkers()
+            results = @invokelatest Distributed.pmap(peakflops, fill(n, nworkers))
+            return sum(results)
         end
     else
-        return 2*Float64(n)^3 / t
+        return 2*Float64(n)^3 / minimum(t)
     end
 end
 
@@ -548,21 +653,37 @@ function versioninfo(io::IO=stdout)
         println(io, indent, "--> ", lib.libname, " (", interface, ")")
     end
     println(io, "Threading:")
-    println(io, indent, "Threads.nthreads() = ", Base.Threads.nthreads())
+    println(io, indent, "Threads.threadpoolsize() = ", Threads.threadpoolsize())
+    println(io, indent, "Threads.maxthreadid() = ", Base.Threads.maxthreadid())
     println(io, indent, "LinearAlgebra.BLAS.get_num_threads() = ", BLAS.get_num_threads())
     println(io, "Relevant environment variables:")
     env_var_names = [
         "JULIA_NUM_THREADS",
         "MKL_DYNAMIC",
         "MKL_NUM_THREADS",
-        "OPENBLAS_NUM_THREADS",
+         # OpenBLAS has a hierarchy of environment variables for setting the
+         # number of threads, see
+         # https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
+        ("OPENBLAS_NUM_THREADS", "GOTO_NUM_THREADS", "OMP_NUM_THREADS"),
     ]
     printed_at_least_one_env_var = false
+    print_var(io, indent, name) = println(io, indent, name, " = ", ENV[name])
     for name in env_var_names
-        if haskey(ENV, name)
-            value = ENV[name]
-            println(io, indent, name, " = ", value)
-            printed_at_least_one_env_var = true
+        if name isa Tuple
+            # If `name` is a Tuple, then find the first environment which is
+            # defined, and disregard the following ones.
+            for nm in name
+                if haskey(ENV, nm)
+                    print_var(io, indent, nm)
+                    printed_at_least_one_env_var = true
+                    break
+                end
+            end
+        else
+            if haskey(ENV, name)
+                print_var(io, indent, name)
+                printed_at_least_one_env_var = true
+            end
         end
     end
     if !printed_at_least_one_env_var
@@ -573,7 +694,8 @@ end
 
 function __init__()
     try
-        BLAS.lbt_forward(OpenBLAS_jll.libopenblas_path; clear=true)
+        verbose = parse(Bool, get(ENV, "LBT_VERBOSE", "false"))
+        BLAS.lbt_forward(OpenBLAS_jll.libopenblas_path; clear=true, verbose)
         BLAS.check()
     catch ex
         Base.showerror_nostdio(ex, "WARNING: Error during initialization of module LinearAlgebra")
@@ -581,8 +703,13 @@ function __init__()
     # register a hook to disable BLAS threading
     Base.at_disable_library_threading(() -> BLAS.set_num_threads(1))
 
-    if !haskey(ENV, "OPENBLAS_NUM_THREADS")
-        BLAS.set_num_threads(max(1, Sys.CPU_THREADS ÷ 2))
+    # https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
+    if !haskey(ENV, "OPENBLAS_NUM_THREADS") && !haskey(ENV, "GOTO_NUM_THREADS") && !haskey(ENV, "OMP_NUM_THREADS")
+        @static if Sys.isapple() && Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "aarch64"
+            BLAS.set_num_threads(max(1, Sys.CPU_THREADS))
+        else
+            BLAS.set_num_threads(max(1, Sys.CPU_THREADS ÷ 2))
+        end
     end
 end
 
diff --git a/stdlib/LinearAlgebra/src/abstractq.jl b/stdlib/LinearAlgebra/src/abstractq.jl
new file mode 100644
index 0000000000000..b0d53320f4aa3
--- /dev/null
+++ b/stdlib/LinearAlgebra/src/abstractq.jl
@@ -0,0 +1,589 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+abstract type AbstractQ{T} end
+
+struct AdjointQ{T,S<:AbstractQ{T}} <: AbstractQ{T}
+    Q::S
+end
+
+parent(adjQ::AdjointQ) = adjQ.Q
+eltype(::Type{<:AbstractQ{T}}) where {T} = T
+Base.eltypeof(Q::AbstractQ) = eltype(Q)
+ndims(::AbstractQ) = 2
+
+# inversion/adjoint/transpose
+inv(Q::AbstractQ) = Q'
+adjoint(Q::AbstractQ) = AdjointQ(Q)
+transpose(Q::AbstractQ{<:Real}) = AdjointQ(Q)
+transpose(Q::AbstractQ) = error("transpose not implemented for $(typeof(Q)). Consider using adjoint instead of transpose.")
+adjoint(adjQ::AdjointQ) = adjQ.Q
+
+# promotion with AbstractMatrix, at least for equal eltypes
+promote_rule(::Type{<:AbstractMatrix{T}}, ::Type{<:AbstractQ{T}}) where {T} =
+    (@inline; Union{AbstractMatrix{T},AbstractQ{T}})
+
+# conversion
+# the following eltype promotion should be defined for each subtype `QType`
+# convert(::Type{AbstractQ{T}}, Q::QType) where {T} = QType{T}(Q)
+# and then care has to be taken that
+# QType{T}(Q::QType{T}) where T = ...
+# is implemented as a no-op
+
+# the following conversion method ensures functionality when the above method is not defined
+# (as for HessenbergQ), but no eltype conversion is required either (say, in multiplication)
+convert(::Type{AbstractQ{T}}, Q::AbstractQ{T}) where {T} = Q
+convert(::Type{AbstractQ{T}}, adjQ::AdjointQ{T}) where {T} = adjQ
+convert(::Type{AbstractQ{T}}, adjQ::AdjointQ) where {T} = convert(AbstractQ{T}, adjQ.Q)'
+
+# ... to matrix
+collect(Q::AbstractQ) = copyto!(Matrix{eltype(Q)}(undef, size(Q)), Q)
+Matrix{T}(Q::AbstractQ) where {T} = convert(Matrix{T}, Q*I) # generic fallback, yields square matrix
+Matrix{T}(adjQ::AdjointQ{S}) where {T,S} = convert(Matrix{T}, lmul!(adjQ, Matrix{S}(I, size(adjQ))))
+Matrix(Q::AbstractQ{T}) where {T} = Matrix{T}(Q)
+Array{T}(Q::AbstractQ) where {T} = Matrix{T}(Q)
+Array(Q::AbstractQ) = Matrix(Q)
+convert(::Type{T}, Q::AbstractQ) where {T<:AbstractArray} = T(Q)
+# legacy
+@deprecate(convert(::Type{AbstractMatrix{T}}, Q::AbstractQ) where {T},
+    convert(LinearAlgebra.AbstractQ{T}, Q))
+
+function size(Q::AbstractQ, dim::Integer)
+    if dim < 1
+        throw(BoundsError())
+    elseif dim <= 2 # && 1 <= dim
+        return size(Q)[dim]
+    else # 2 < dim
+        return 1
+    end
+end
+size(adjQ::AdjointQ) = reverse(size(adjQ.Q))
+
+# comparison
+(==)(Q::AbstractQ, A::AbstractMatrix) = lmul!(Q, Matrix{eltype(Q)}(I, size(A))) == A
+(==)(A::AbstractMatrix, Q::AbstractQ) = Q == A
+(==)(Q::AbstractQ, P::AbstractQ) = Matrix(Q) == Matrix(P)
+isapprox(Q::AbstractQ, A::AbstractMatrix; kwargs...) =
+    isapprox(lmul!(Q, Matrix{eltype(Q)}(I, size(A))), A, kwargs...)
+isapprox(A::AbstractMatrix, Q::AbstractQ; kwargs...) = isapprox(Q, A, kwargs...)
+isapprox(Q::AbstractQ, P::AbstractQ; kwargs...) = isapprox(Matrix(Q), Matrix(P), kwargs...)
+
+# pseudo-array behaviour, required for indexing with `begin` or `end`
+axes(Q::AbstractQ) = map(Base.oneto, size(Q))
+axes(Q::AbstractQ, d::Integer) = d in (1, 2) ? axes(Q)[d] : Base.OneTo(1)
+
+copymutable(Q::AbstractQ{T}) where {T} = lmul!(Q, Matrix{T}(I, size(Q)))
+copy(Q::AbstractQ) = copymutable(Q)
+
+# getindex
+@inline function getindex(Q::AbstractQ, inds...)
+    @boundscheck Base.checkbounds_indices(Bool, axes(Q), inds) || Base.throw_boundserror(Q, inds)
+    return _getindex(Q, inds...)
+end
+@inline getindex(Q::AbstractQ, ::Colon) = copymutable(Q)[:]
+@inline getindex(Q::AbstractQ, ::Colon, ::Colon) = copy(Q)
+
+@inline _getindex(Q::AbstractQ, inds...) = @inbounds copymutable(Q)[inds...]
+@inline function _getindex(Q::AbstractQ, ::Colon, J::AbstractVector{<:Integer})
+    Y = zeros(eltype(Q), size(Q, 2), length(J))
+    @inbounds for (i,j) in enumerate(J)
+        Y[j,i] = oneunit(eltype(Q))
+    end
+    lmul!(Q, Y)
+end
+@inline _getindex(Q::AbstractQ, I::AbstractVector{Int}, J::AbstractVector{Int}) = @inbounds Q[:,J][I,:]
+@inline function _getindex(Q::AbstractQ, ::Colon, j::Int)
+    y = zeros(eltype(Q), size(Q, 2))
+    y[j] = oneunit(eltype(Q))
+    lmul!(Q, y)
+end
+@inline _getindex(Q::AbstractQ, i::Int, j::Int) = @inbounds Q[:,j][i]
+
+# needed because AbstractQ does not subtype AbstractMatrix
+qr(Q::AbstractQ{T}, arg...; kwargs...) where {T} = qr!(Matrix{_qreltype(T)}(Q), arg...; kwargs...)
+lq(Q::AbstractQ{T}, arg...; kwargs...) where {T} = lq!(Matrix{lq_eltype(T)}(Q), arg...; kwargs...)
+hessenberg(Q::AbstractQ{T}) where {T} = hessenberg!(Matrix{eigtype(T)}(Q))
+
+# needed when used interchangeably with AbstractMatrix (analogous to views of ranges)
+view(A::AbstractQ, I...) = getindex(A, I...)
+
+# specialization avoiding the fallback using slow `getindex`
+function copyto!(dest::AbstractMatrix, src::AbstractQ)
+    copyto!(dest, I)
+    lmul!(src, dest)
+end
+# needed to resolve method ambiguities
+function copyto!(dest::PermutedDimsArray{T,2,perm}, src::AbstractQ) where {T,perm}
+    if perm == (1, 2)
+        copyto!(parent(dest), src)
+    else
+        @assert perm == (2, 1) # there are no other permutations of two indices
+        if T <: Real
+            copyto!(parent(dest), I)
+            lmul!(src', parent(dest))
+        else
+            # LAPACK does not offer inplace lmul!(transpose(Q), B) for complex Q
+            tmp = similar(parent(dest))
+            copyto!(tmp, I)
+            rmul!(tmp, src)
+            permutedims!(parent(dest), tmp, (2, 1))
+        end
+    end
+    return dest
+end
+# used in concatenations: Base.__cat_offset1!
+Base._copy_or_fill!(A, inds, Q::AbstractQ) = (A[inds...] = collect(Q))
+# overloads of helper functions
+Base.cat_size(A::AbstractQ) = size(A)
+Base.cat_size(A::AbstractQ, d) = size(A, d)
+Base.cat_length(a::AbstractQ) = prod(size(a))
+Base.cat_ndims(a::AbstractQ) = ndims(a)
+Base.cat_indices(A::AbstractQ, d) = axes(A, d)
+Base.cat_similar(A::AbstractQ, T::Type, shape::Tuple) = Array{T}(undef, shape)
+Base.cat_similar(A::AbstractQ, T::Type, shape::Vector) = Array{T}(undef, shape...)
+
+function show(io::IO, ::MIME{Symbol("text/plain")}, Q::AbstractQ)
+    print(io, Base.dims2string(size(Q)), ' ', summary(Q))
+end
+
+# multiplication
+# generically, treat AbstractQ like a matrix with its definite size
+qsize_check(Q::AbstractQ, B::AbstractVecOrMat) =
+    size(Q, 2) == size(B, 1) ||
+        throw(DimensionMismatch("second dimension of Q, $(size(Q,2)), must coincide with first dimension of B, $(size(B,1))"))
+qsize_check(A::AbstractVecOrMat, Q::AbstractQ) =
+    size(A, 2) == size(Q, 1) ||
+        throw(DimensionMismatch("second dimension of A, $(size(A,2)), must coincide with first dimension of Q, $(size(Q,1))"))
+qsize_check(Q::AbstractQ, P::AbstractQ) =
+    size(Q, 2) == size(P, 1) ||
+        throw(DimensionMismatch("second dimension of A, $(size(Q,2)), must coincide with first dimension of B, $(size(P,1))"))
+
+# mimic the AbstractArray fallback
+*(Q::AbstractQ{<:Number}) = Q
+
+(*)(Q::AbstractQ, J::UniformScaling) = Q*J.λ
+function (*)(Q::AbstractQ, b::Number)
+    T = promote_type(eltype(Q), typeof(b))
+    lmul!(convert(AbstractQ{T}, Q), Matrix{T}(b*I, size(Q)))
+end
+function (*)(Q::AbstractQ, B::AbstractVector)
+    T = promote_type(eltype(Q), eltype(B))
+    qsize_check(Q, B)
+    mul!(similar(B, T, size(Q, 1)), convert(AbstractQ{T}, Q), B)
+end
+function (*)(Q::AbstractQ, B::AbstractMatrix)
+    T = promote_type(eltype(Q), eltype(B))
+    qsize_check(Q, B)
+    mul!(similar(B, T, (size(Q, 1), size(B, 2))), convert(AbstractQ{T}, Q), B)
+end
+
+(*)(J::UniformScaling, Q::AbstractQ) = J.λ*Q
+function (*)(a::Number, Q::AbstractQ)
+    T = promote_type(typeof(a), eltype(Q))
+    rmul!(Matrix{T}(a*I, size(Q)), convert(AbstractQ{T}, Q))
+end
+function (*)(A::AbstractVector, Q::AbstractQ)
+    T = promote_type(eltype(A), eltype(Q))
+    qsize_check(A, Q)
+    return mul!(similar(A, T, length(A)), A, convert(AbstractQ{T}, Q))
+end
+function (*)(A::AbstractMatrix, Q::AbstractQ)
+    T = promote_type(eltype(A), eltype(Q))
+    qsize_check(A, Q)
+    return mul!(similar(A, T, (size(A, 1), size(Q, 2))), A, convert(AbstractQ{T}, Q))
+end
+(*)(u::AdjointAbsVec, Q::AbstractQ) = (Q'u')'
+
+### Q*Q (including adjoints)
+(*)(Q::AbstractQ, P::AbstractQ) = Q * (P*I)
+
+### mul!
+function mul!(C::AbstractVecOrMat{T}, Q::AbstractQ{T}, B::Union{AbstractVecOrMat,AbstractQ}) where {T}
+    require_one_based_indexing(C, B)
+    mB, nB = size(B, 1), size(B, 2)
+    mC, nC = size(C, 1), size(C, 2)
+    qsize_check(Q, B)
+    nB != nC && throw(DimensionMismatch())
+    if mB < mC
+        inds = CartesianIndices(axes(B))
+        copyto!(view(C, inds), B)
+        C[CartesianIndices((mB+1:mC, axes(C, 2)))] .= zero(T)
+        return lmul!(Q, C)
+    else
+        return lmul!(Q, copyto!(C, B))
+    end
+end
+function mul!(C::AbstractVecOrMat{T}, A::AbstractVecOrMat, Q::AbstractQ{T}) where {T}
+    require_one_based_indexing(C, A)
+    mA, nA = size(A, 1), size(A, 2)
+    mC, nC = size(C, 1), size(C, 2)
+    mA != mC && throw(DimensionMismatch())
+    qsize_check(A, Q)
+    if nA < nC
+        inds = CartesianIndices(axes(A))
+        copyto!(view(C, inds), A)
+        C[CartesianIndices((axes(C, 1), nA+1:nC))] .= zero(T)
+        return rmul!(C, Q)
+    else
+        return rmul!(copyto!(C, A), Q)
+    end
+end
+
+### division
+\(Q::AbstractQ, A::AbstractVecOrMat) = Q'*A
+/(A::AbstractVecOrMat, Q::AbstractQ) = A*Q'
+ldiv!(Q::AbstractQ, A::AbstractVecOrMat) = lmul!(Q', A)
+ldiv!(C::AbstractVecOrMat, Q::AbstractQ, A::AbstractVecOrMat) = mul!(C, Q', A)
+rdiv!(A::AbstractVecOrMat, Q::AbstractQ) = rmul!(A, Q')
+
+logabsdet(Q::AbstractQ) = (d = det(Q); return log(abs(d)), sign(d))
+function logdet(A::AbstractQ)
+    d, s = logabsdet(A)
+    return d + log(s)
+end
+
+###########################################################
+################ Q from QR decompositions #################
+###########################################################
+
+"""
+    QRPackedQ <: LinearAlgebra.AbstractQ
+
+The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QR`](@ref) or
+[`QRPivoted`](@ref) format.
+"""
+struct QRPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractQ{T}
+    factors::S
+    τ::C
+
+    function QRPackedQ{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
+        require_one_based_indexing(factors, τ)
+        new{T,S,C}(factors, τ)
+    end
+end
+QRPackedQ(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
+    QRPackedQ{T,typeof(factors),typeof(τ)}(factors, τ)
+QRPackedQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
+    QRPackedQ(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(QRPackedQ{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
+           QRPackedQ{T,S,typeof(τ)}(factors, τ), false)
+
+"""
+    QRCompactWYQ <: LinearAlgebra.AbstractQ
+
+The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QRCompactWY`](@ref)
+format.
+"""
+struct QRCompactWYQ{S, M<:AbstractMatrix{S}, C<:AbstractMatrix{S}} <: AbstractQ{S}
+    factors::M
+    T::C
+
+    function QRCompactWYQ{S,M,C}(factors, T) where {S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}}
+        require_one_based_indexing(factors, T)
+        new{S,M,C}(factors, T)
+    end
+end
+QRCompactWYQ(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S} =
+    QRCompactWYQ{S,typeof(factors),typeof(T)}(factors, T)
+QRCompactWYQ{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S} =
+    QRCompactWYQ(convert(AbstractMatrix{S}, factors), convert(AbstractMatrix{S}, T))
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(QRCompactWYQ{S,M}(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S,M},
+           QRCompactWYQ{S,M,typeof(T)}(factors, T), false)
+
+QRPackedQ{T}(Q::QRPackedQ) where {T} = QRPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(AbstractVector{T}, Q.τ))
+QRCompactWYQ{S}(Q::QRCompactWYQ) where {S} = QRCompactWYQ(convert(AbstractMatrix{S}, Q.factors), convert(AbstractMatrix{S}, Q.T))
+
+# override generic square fallback
+Matrix{T}(Q::Union{QRCompactWYQ{S},QRPackedQ{S}}) where {T,S} =
+    convert(Matrix{T}, lmul!(Q, Matrix{S}(I, size(Q, 1), min(size(Q.factors)...))))
+Matrix(Q::Union{QRCompactWYQ{S},QRPackedQ{S}}) where {S} = Matrix{S}(Q)
+
+convert(::Type{AbstractQ{T}}, Q::QRPackedQ) where {T} = QRPackedQ{T}(Q)
+convert(::Type{AbstractQ{T}}, Q::QRCompactWYQ) where {T} = QRCompactWYQ{T}(Q)
+
+size(Q::Union{QRCompactWYQ,QRPackedQ}, dim::Integer) =
+    size(Q.factors, dim == 2 ? 1 : dim)
+size(Q::Union{QRCompactWYQ,QRPackedQ}) = (n = size(Q.factors, 1); (n, n))
+
+## Multiplication
+### QB
+lmul!(A::QRCompactWYQ{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
+    LAPACK.gemqrt!('L', 'N', A.factors, A.T, B)
+lmul!(A::QRPackedQ{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
+    LAPACK.ormqr!('L', 'N', A.factors, A.τ, B)
+function lmul!(A::QRPackedQ, B::AbstractVecOrMat)
+    require_one_based_indexing(B)
+    mA, nA = size(A.factors)
+    mB, nB = size(B,1), size(B,2)
+    if mA != mB
+        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
+    end
+    Afactors = A.factors
+    @inbounds begin
+        for k = min(mA,nA):-1:1
+            for j = 1:nB
+                vBj = B[k,j]
+                for i = k+1:mB
+                    vBj += conj(Afactors[i,k])*B[i,j]
+                end
+                vBj = A.τ[k]*vBj
+                B[k,j] -= vBj
+                for i = k+1:mB
+                    B[i,j] -= Afactors[i,k]*vBj
+                end
+            end
+        end
+    end
+    B
+end
+
+### QcB
+lmul!(adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
+    (Q = adjQ.Q; LAPACK.gemqrt!('L', 'T', Q.factors, Q.T, B))
+lmul!(adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
+    (Q = adjQ.Q; LAPACK.gemqrt!('L', 'C', Q.factors, Q.T, B))
+lmul!(adjQ::AdjointQ{<:Any,<:QRPackedQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
+    (Q = adjQ.Q; LAPACK.ormqr!('L', 'T', Q.factors, Q.τ, B))
+lmul!(adjQ::AdjointQ{<:Any,<:QRPackedQ{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
+    (Q = adjQ.Q; LAPACK.ormqr!('L', 'C', Q.factors, Q.τ, B))
+function lmul!(adjA::AdjointQ{<:Any,<:QRPackedQ}, B::AbstractVecOrMat)
+    require_one_based_indexing(B)
+    A = adjA.Q
+    mA, nA = size(A.factors)
+    mB, nB = size(B,1), size(B,2)
+    if mA != mB
+        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
+    end
+    Afactors = A.factors
+    @inbounds begin
+        for k = 1:min(mA,nA)
+            for j = 1:nB
+                vBj = B[k,j]
+                for i = k+1:mB
+                    vBj += conj(Afactors[i,k])*B[i,j]
+                end
+                vBj = conj(A.τ[k])*vBj
+                B[k,j] -= vBj
+                for i = k+1:mB
+                    B[i,j] -= Afactors[i,k]*vBj
+                end
+            end
+        end
+    end
+    B
+end
+
+### AQ
+rmul!(A::StridedVecOrMat{T}, B::QRCompactWYQ{T,<:StridedMatrix}) where {T<:BlasFloat} =
+    LAPACK.gemqrt!('R', 'N', B.factors, B.T, A)
+rmul!(A::StridedVecOrMat{T}, B::QRPackedQ{T,<:StridedMatrix}) where {T<:BlasFloat} =
+    LAPACK.ormqr!('R', 'N', B.factors, B.τ, A)
+function rmul!(A::AbstractVecOrMat, Q::QRPackedQ)
+    require_one_based_indexing(A)
+    mQ, nQ = size(Q.factors)
+    mA, nA = size(A,1), size(A,2)
+    if nA != mQ
+        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
+    end
+    Qfactors = Q.factors
+    @inbounds begin
+        for k = 1:min(mQ,nQ)
+            for i = 1:mA
+                vAi = A[i,k]
+                for j = k+1:mQ
+                    vAi += A[i,j]*Qfactors[j,k]
+                end
+                vAi = vAi*Q.τ[k]
+                A[i,k] -= vAi
+                for j = k+1:nA
+                    A[i,j] -= vAi*conj(Qfactors[j,k])
+                end
+            end
+        end
+    end
+    A
+end
+
+### AQc
+rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasReal} =
+    (Q = adjQ.Q; LAPACK.gemqrt!('R', 'T', Q.factors, Q.T, A))
+rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasComplex} =
+    (Q = adjQ.Q; LAPACK.gemqrt!('R', 'C', Q.factors, Q.T, A))
+rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRPackedQ{T}}) where {T<:BlasReal} =
+    (Q = adjQ.Q; LAPACK.ormqr!('R', 'T', Q.factors, Q.τ, A))
+rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRPackedQ{T}}) where {T<:BlasComplex} =
+    (Q = adjQ.Q; LAPACK.ormqr!('R', 'C', Q.factors, Q.τ, A))
+function rmul!(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:QRPackedQ})
+    require_one_based_indexing(A)
+    Q = adjQ.Q
+    mQ, nQ = size(Q.factors)
+    mA, nA = size(A,1), size(A,2)
+    if nA != mQ
+        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
+    end
+    Qfactors = Q.factors
+    @inbounds begin
+        for k = min(mQ,nQ):-1:1
+            for i = 1:mA
+                vAi = A[i,k]
+                for j = k+1:mQ
+                    vAi += A[i,j]*Qfactors[j,k]
+                end
+                vAi = vAi*conj(Q.τ[k])
+                A[i,k] -= vAi
+                for j = k+1:nA
+                    A[i,j] -= vAi*conj(Qfactors[j,k])
+                end
+            end
+        end
+    end
+    A
+end
+
+det(Q::QRPackedQ) = _det_tau(Q.τ)
+det(Q::QRCompactWYQ) =
+    prod(i -> _det_tau(_diagview(Q.T[:, i:min(i + size(Q.T, 1), size(Q.T, 2))])),
+         1:size(Q.T, 1):size(Q.T, 2))
+
+_diagview(A) = @view A[diagind(A)]
+
+# Compute `det` from the number of Householder reflections.  Handle
+# the case `Q.τ` contains zeros.
+_det_tau(τs::AbstractVector{<:Real}) =
+    isodd(count(!iszero, τs)) ? -one(eltype(τs)) : one(eltype(τs))
+
+# In complex case, we need to compute the non-unit eigenvalue `λ = 1 - c*τ`
+# (where `c = v'v`) of each Householder reflector.  As we know that the
+# reflector must have the determinant of 1, it must satisfy `abs2(λ) == 1`.
+# Combining this with the constraint `c > 0`, it turns out that the eigenvalue
+# (hence the determinant) can be computed as `λ = -sign(τ)^2`.
+# See: https://github.com/JuliaLang/julia/pull/32887#issuecomment-521935716
+_det_tau(τs) = prod(τ -> iszero(τ) ? one(τ) : -sign(τ)^2, τs)
+
+###########################################################
+######## Q from Hessenberg decomposition ##################
+###########################################################
+
+"""
+    HessenbergQ <: AbstractQ
+
+Given a [`Hessenberg`](@ref) factorization object `F`, `F.Q` returns
+a `HessenbergQ` object, which is an implicit representation of the unitary
+matrix `Q` in the Hessenberg factorization `QHQ'` represented by `F`.
+This `F.Q` object can be efficiently multiplied by matrices or vectors,
+and can be converted to an ordinary matrix type with `Matrix(F.Q)`.
+"""
+struct HessenbergQ{T,S<:AbstractMatrix,W<:AbstractVector,sym} <: AbstractQ{T}
+    uplo::Char
+    factors::S
+    τ::W
+    function HessenbergQ{T,S,W,sym}(uplo::AbstractChar, factors, τ) where {T,S<:AbstractMatrix,W<:AbstractVector,sym}
+        new(uplo, factors, τ)
+    end
+end
+HessenbergQ(F::Hessenberg{<:Any,<:UpperHessenberg,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,false}(F.uplo, F.factors, F.τ)
+HessenbergQ(F::Hessenberg{<:Any,<:SymTridiagonal,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,true}(F.uplo, F.factors, F.τ)
+
+size(Q::HessenbergQ, dim::Integer) = size(getfield(Q, :factors), dim == 2 ? 1 : dim)
+size(Q::HessenbergQ) = size(Q, 1), size(Q, 2)
+
+# HessenbergQ from LAPACK/BLAS (as opposed to Julia libraries like GenericLinearAlgebra)
+const BlasHessenbergQ{T,sym} = HessenbergQ{T,<:StridedMatrix{T},<:StridedVector{T},sym} where {T<:BlasFloat,sym}
+
+## reconstruct the original matrix
+Matrix{T}(Q::BlasHessenbergQ{<:Any,false}) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ))
+Matrix{T}(Q::BlasHessenbergQ{<:Any,true}) where {T} = convert(Matrix{T}, LAPACK.orgtr!(Q.uplo, copy(Q.factors), Q.τ))
+
+lmul!(Q::BlasHessenbergQ{T,false}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
+    LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
+rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,false}) where {T<:BlasFloat} =
+    LAPACK.ormhr!('R', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
+lmul!(adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,false}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
+    (Q = adjQ.Q; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
+rmul!(X::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,false}}) where {T<:BlasFloat} =
+    (Q = adjQ.Q; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
+
+lmul!(Q::BlasHessenbergQ{T,true}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
+    LAPACK.ormtr!('L', Q.uplo, 'N', Q.factors, Q.τ, X)
+rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,true}) where {T<:BlasFloat} =
+    LAPACK.ormtr!('R', Q.uplo, 'N', Q.factors, Q.τ, X)
+lmul!(adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,true}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
+    (Q = adjQ.Q; LAPACK.ormtr!('L', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
+rmul!(X::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:BlasHessenbergQ{T,true}}) where {T<:BlasFloat} =
+    (Q = adjQ.Q; LAPACK.ormtr!('R', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
+
+lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T} = rmul!(X', Q')'
+rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q', X')'
+lmul!(adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T}  = rmul!(X', adjQ')'
+rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')'
+
+# flexible left-multiplication (and adjoint right-multiplication)
+qsize_check(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, B::AbstractVecOrMat) =
+    size(B, 1) in size(Q.factors) ||
+        throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(Q.factors))"))
+qsize_check(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}}) =
+    (Q = adjQ.Q; size(A, 2) in size(Q.factors) ||
+        throw(DimensionMismatch("second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))")))
+
+det(Q::HessenbergQ) = _det_tau(Q.τ)
+
+###########################################################
+################ Q from LQ decomposition ##################
+###########################################################
+
+struct LQPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractQ{T}
+    factors::S
+    τ::C
+end
+
+LQPackedQ{T}(Q::LQPackedQ) where {T} = LQPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(AbstractVector{T}, Q.τ))
+@deprecate(AbstractMatrix{T}(Q::LQPackedQ) where {T},
+    convert(AbstractQ{T}, Q),
+    false)
+Matrix{T}(A::LQPackedQ) where {T} = convert(Matrix{T}, LAPACK.orglq!(copy(A.factors), A.τ))
+convert(::Type{AbstractQ{T}}, Q::LQPackedQ) where {T} = LQPackedQ{T}(Q)
+
+# size(Q::LQPackedQ) yields the shape of Q's square form
+size(Q::LQPackedQ) = (n = size(Q.factors, 2); return n, n)
+
+## Multiplication
+# out-of-place right application of LQPackedQs
+#
+# these methods: (1) check whether the applied-to matrix's (A's) appropriate dimension
+# (columns for A_*, rows for Ac_*) matches the number of columns (nQ) of the LQPackedQ (Q),
+# and if so effectively apply Q's square form to A without additional shenanigans; and
+# (2) if the preceding dimensions do not match, check whether the appropriate dimension of
+# A instead matches the number of rows of the matrix of which Q is a factor (i.e.
+# size(Q.factors, 1)), and if so implicitly apply Q's truncated form to A by zero extending
+# A as necessary for check (1) to pass (if possible) and then applying Q's square form
+
+qsize_check(adjQ::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractVecOrMat) =
+    size(B, 1) in size(adjQ.Q.factors) ||
+        throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(adjQ.Q.factors))"))
+qsize_check(A::AbstractVecOrMat, Q::LQPackedQ) =
+    size(A, 2) in size(Q.factors) ||
+        throw(DimensionMismatch("second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))"))
+
+# in-place right-application of LQPackedQs
+# these methods require that the applied-to matrix's (A's) number of columns
+# match the number of columns (nQ) of the LQPackedQ (Q) (necessary for in-place
+# operation, and the underlying LAPACK routine (ormlq) treats the implicit Q
+# as its (nQ-by-nQ) square form)
+rmul!(A::StridedVecOrMat{T}, B::LQPackedQ{T}) where {T<:BlasFloat} =
+    LAPACK.ormlq!('R', 'N', B.factors, B.τ, A)
+rmul!(A::StridedVecOrMat{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasReal} =
+    (B = adjB.Q; LAPACK.ormlq!('R', 'T', B.factors, B.τ, A))
+rmul!(A::StridedVecOrMat{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasComplex} =
+    (B = adjB.Q; LAPACK.ormlq!('R', 'C', B.factors, B.τ, A))
+
+### QB / QcB
+lmul!(A::LQPackedQ{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormlq!('L','N',A.factors,A.τ,B)
+lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
+    (A = adjA.Q; LAPACK.ormlq!('L', 'T', A.factors, A.τ, B))
+lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
+    (A = adjA.Q; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B))
+
+# In LQ factorization, `Q` is expressed as the product of the adjoint of the
+# reflectors.  Thus, `det` has to be conjugated.
+det(Q::LQPackedQ) = conj(_det_tau(Q.τ))
diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl
index a8574743cb933..5ad678e82f014 100644
--- a/stdlib/LinearAlgebra/src/adjtrans.jl
+++ b/stdlib/LinearAlgebra/src/adjtrans.jl
@@ -1,8 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Base: @propagate_inbounds
-import Base: length, size, axes, IndexStyle, getindex, setindex!, parent, vec, convert, similar
-
 ### basic definitions (types, aliases, constructors, abstractarray interface, sundry similar)
 
 # note that Adjoint and Transpose must be able to wrap not only vectors and matrices
@@ -12,7 +9,7 @@ import Base: length, size, axes, IndexStyle, getindex, setindex!, parent, vec, c
     Adjoint
 
 Lazy wrapper type for an adjoint view of the underlying linear algebra object,
-usually an `AbstractVector`/`AbstractMatrix`, but also some `Factorization`, for instance.
+usually an `AbstractVector`/`AbstractMatrix`.
 Usually, the `Adjoint` constructor should not be called directly, use [`adjoint`](@ref)
 instead. To materialize the view use [`copy`](@ref).
 
@@ -39,7 +36,7 @@ end
     Transpose
 
 Lazy wrapper type for a transpose view of the underlying linear algebra object,
-usually an `AbstractVector`/`AbstractMatrix`, but also some `Factorization`, for instance.
+usually an `AbstractVector`/`AbstractMatrix`.
 Usually, the `Transpose` constructor should not be called directly, use [`transpose`](@ref)
 instead. To materialize the view use [`copy`](@ref).
 
@@ -67,6 +64,30 @@ end
 Adjoint(A) = Adjoint{Base.promote_op(adjoint,eltype(A)),typeof(A)}(A)
 Transpose(A) = Transpose{Base.promote_op(transpose,eltype(A)),typeof(A)}(A)
 
+"""
+    inplace_adj_or_trans(::AbstractArray) -> adjoint!|transpose!|copyto!
+    inplace_adj_or_trans(::Type{<:AbstractArray}) -> adjoint!|transpose!|copyto!
+
+Return [`adjoint!`](@ref) from an `Adjoint` type or object and
+[`transpose!`](@ref) from a `Transpose` type or object. Otherwise,
+return [`copyto!`](@ref). Note that `Adjoint` and `Transpose` have
+to be the outer-most wrapper object for a non-`identity` function to be
+returned.
+"""
+inplace_adj_or_trans(::T) where {T <: AbstractArray} = inplace_adj_or_trans(T)
+inplace_adj_or_trans(::Type{<:AbstractArray}) = copyto!
+inplace_adj_or_trans(::Type{<:Adjoint}) = adjoint!
+inplace_adj_or_trans(::Type{<:Transpose}) = transpose!
+
+# unwraps Adjoint, Transpose, Symmetric, Hermitian
+_unwrap(A::Adjoint)   = parent(A)
+_unwrap(A::Transpose) = parent(A)
+
+# unwraps Adjoint and Transpose only
+_unwrap_at(A) = A
+_unwrap_at(A::Adjoint)   = parent(A)
+_unwrap_at(A::Transpose) = parent(A)
+
 Base.dataids(A::Union{Adjoint, Transpose}) = Base.dataids(A.parent)
 Base.unaliascopy(A::Union{Adjoint,Transpose}) = typeof(A)(Base.unaliascopy(A.parent))
 
@@ -237,8 +258,8 @@ julia> transpose(v) * v # compute the dot product
 
 For a matrix of matrices, the individual blocks are recursively operated on:
 ```jldoctest
-julia> C = reshape(1:4, 2, 2)
-2×2 reshape(::UnitRange{Int64}, 2, 2) with eltype Int64:
+julia> C = [1 3; 2 4]
+2×2 Matrix{Int64}:
  1  3
  2  4
 
@@ -260,6 +281,11 @@ adjoint(A::Adjoint) = A.parent
 transpose(A::Transpose) = A.parent
 adjoint(A::Transpose{<:Real}) = A.parent
 transpose(A::Adjoint{<:Real}) = A.parent
+adjoint(A::Transpose{<:Any,<:Adjoint}) = transpose(A.parent.parent)
+transpose(A::Adjoint{<:Any,<:Transpose}) = adjoint(A.parent.parent)
+# disambiguation
+adjoint(A::Transpose{<:Real,<:Adjoint}) = transpose(A.parent.parent)
+transpose(A::Adjoint{<:Real,<:Transpose}) = A.parent
 
 # printing
 function Base.showarg(io::IO, v::Adjoint, toplevel)
@@ -291,14 +317,19 @@ wrapperop(_) = identity
 wrapperop(::Adjoint) = adjoint
 wrapperop(::Transpose) = transpose
 
+# the following fallbacks can be removed if Adjoint/Transpose are restricted to AbstractVecOrMat
+size(A::AdjOrTrans) = reverse(size(A.parent))
+axes(A::AdjOrTrans) = reverse(axes(A.parent))
 # AbstractArray interface, basic definitions
 length(A::AdjOrTrans) = length(A.parent)
 size(v::AdjOrTransAbsVec) = (1, length(v.parent))
 size(A::AdjOrTransAbsMat) = reverse(size(A.parent))
-axes(v::AdjOrTransAbsVec) = (Base.OneTo(1), axes(v.parent)...)
+axes(v::AdjOrTransAbsVec) = (axes(v.parent,2), axes(v.parent)...)
 axes(A::AdjOrTransAbsMat) = reverse(axes(A.parent))
 IndexStyle(::Type{<:AdjOrTransAbsVec}) = IndexLinear()
 IndexStyle(::Type{<:AdjOrTransAbsMat}) = IndexCartesian()
+@propagate_inbounds Base.isassigned(v::AdjOrTransAbsVec, i::Int) = isassigned(v.parent, i-1+first(axes(v.parent)[1]))
+@propagate_inbounds Base.isassigned(v::AdjOrTransAbsMat, i::Int, j::Int) = isassigned(v.parent, j, i)
 @propagate_inbounds getindex(v::AdjOrTransAbsVec{T}, i::Int) where {T} = wrapperop(v)(v.parent[i-1+first(axes(v.parent)[1])])::T
 @propagate_inbounds getindex(A::AdjOrTransAbsMat{T}, i::Int, j::Int) where {T} = wrapperop(A)(A.parent[j, i])::T
 @propagate_inbounds setindex!(v::AdjOrTransAbsVec, x, i::Int) = (setindex!(v.parent, wrapperop(v)(x), i-1+first(axes(v.parent)[1])); v)
@@ -308,8 +339,8 @@ IndexStyle(::Type{<:AdjOrTransAbsMat}) = IndexCartesian()
 @propagate_inbounds getindex(v::AdjOrTransAbsVec, ::Colon, ::Colon) = wrapperop(v)(v.parent[:])
 
 # conversion of underlying storage
-convert(::Type{Adjoint{T,S}}, A::Adjoint) where {T,S} = Adjoint{T,S}(convert(S, A.parent))
-convert(::Type{Transpose{T,S}}, A::Transpose) where {T,S} = Transpose{T,S}(convert(S, A.parent))
+convert(::Type{Adjoint{T,S}}, A::Adjoint) where {T,S} = Adjoint{T,S}(convert(S, A.parent))::Adjoint{T,S}
+convert(::Type{Transpose{T,S}}, A::Transpose) where {T,S} = Transpose{T,S}(convert(S, A.parent))::Transpose{T,S}
 
 # Strides and pointer for transposed strided arrays — but only if the elements are actually stored in memory
 Base.strides(A::Adjoint{<:Real, <:AbstractVector}) = (stride(A.parent, 2), stride(A.parent, 1))
@@ -318,8 +349,8 @@ Base.strides(A::Transpose{<:Any, <:AbstractVector}) = (stride(A.parent, 2), stri
 Base.strides(A::Adjoint{<:Real, <:AbstractMatrix}) = reverse(strides(A.parent))
 Base.strides(A::Transpose{<:Any, <:AbstractMatrix}) = reverse(strides(A.parent))
 
-Base.unsafe_convert(::Type{Ptr{T}}, A::Adjoint{<:Real, <:AbstractVecOrMat}) where {T} = Base.unsafe_convert(Ptr{T}, A.parent)
-Base.unsafe_convert(::Type{Ptr{T}}, A::Transpose{<:Any, <:AbstractVecOrMat}) where {T} = Base.unsafe_convert(Ptr{T}, A.parent)
+Base.cconvert(::Type{Ptr{T}}, A::Adjoint{<:Real, <:AbstractVecOrMat}) where {T} = Base.cconvert(Ptr{T}, A.parent)
+Base.cconvert(::Type{Ptr{T}}, A::Transpose{<:Any, <:AbstractVecOrMat}) where {T} = Base.cconvert(Ptr{T}, A.parent)
 
 Base.elsize(::Type{<:Adjoint{<:Real, P}}) where {P<:AbstractVecOrMat} = Base.elsize(P)
 Base.elsize(::Type{<:Transpose{<:Any, P}}) where {P<:AbstractVecOrMat} = Base.elsize(P)
@@ -369,37 +400,60 @@ map(f, avs::AdjointAbsVec...) = adjoint(map((xs...) -> adjoint(f(adjoint.(xs)...
 map(f, tvs::TransposeAbsVec...) = transpose(map((xs...) -> transpose(f(transpose.(xs)...)), parent.(tvs)...))
 quasiparentt(x) = parent(x); quasiparentt(x::Number) = x # to handle numbers in the defs below
 quasiparenta(x) = parent(x); quasiparenta(x::Number) = conj(x) # to handle numbers in the defs below
+quasiparentc(x) = parent(parent(x)); quasiparentc(x::Number) = conj(x) # to handle numbers in the defs below
 broadcast(f, avs::Union{Number,AdjointAbsVec}...) = adjoint(broadcast((xs...) -> adjoint(f(adjoint.(xs)...)), quasiparenta.(avs)...))
 broadcast(f, tvs::Union{Number,TransposeAbsVec}...) = transpose(broadcast((xs...) -> transpose(f(transpose.(xs)...)), quasiparentt.(tvs)...))
 # Hack to preserve behavior after #32122; this needs to be done with a broadcast style instead to support dotted fusion
 Broadcast.broadcast_preserving_zero_d(f, avs::Union{Number,AdjointAbsVec}...) = adjoint(broadcast((xs...) -> adjoint(f(adjoint.(xs)...)), quasiparenta.(avs)...))
 Broadcast.broadcast_preserving_zero_d(f, tvs::Union{Number,TransposeAbsVec}...) = transpose(broadcast((xs...) -> transpose(f(transpose.(xs)...)), quasiparentt.(tvs)...))
+Broadcast.broadcast_preserving_zero_d(f, tvs::Union{Number,Transpose{<:Any,<:AdjointAbsVec}}...) =
+    transpose(adjoint(broadcast((xs...) -> adjoint(transpose(f(conj.(xs)...))), quasiparentc.(tvs)...)))
+Broadcast.broadcast_preserving_zero_d(f, tvs::Union{Number,Adjoint{<:Any,<:TransposeAbsVec}}...) =
+    adjoint(transpose(broadcast((xs...) -> transpose(adjoint(f(conj.(xs)...))), quasiparentc.(tvs)...)))
 # TODO unify and allow mixed combinations with a broadcast style
 
 
 ### reductions
-# faster to sum the Array than to work through the wrapper
-Base._mapreduce_dim(f, op, init::Base._InitialValue, A::Transpose, dims::Colon) =
-    transpose(Base._mapreduce_dim(_sandwich(transpose, f), _sandwich(transpose, op), init, parent(A), dims))
-Base._mapreduce_dim(f, op, init::Base._InitialValue, A::Adjoint, dims::Colon) =
-    adjoint(Base._mapreduce_dim(_sandwich(adjoint, f), _sandwich(adjoint, op), init, parent(A), dims))
+# faster to sum the Array than to work through the wrapper (but only in commutative reduction ops as in Base/permuteddimsarray.jl)
+Base._mapreduce_dim(f, op::CommutativeOps, init::Base._InitialValue, A::Transpose, dims::Colon) =
+    Base._mapreduce_dim(f∘transpose, op, init, parent(A), dims)
+Base._mapreduce_dim(f, op::CommutativeOps, init::Base._InitialValue, A::Adjoint, dims::Colon) =
+    Base._mapreduce_dim(f∘adjoint, op, init, parent(A), dims)
+# in prod, use fast path only in the commutative case to avoid surprises
+Base._mapreduce_dim(f::typeof(identity), op::Union{typeof(*),typeof(Base.mul_prod)}, init::Base._InitialValue, A::Transpose{<:Union{Real,Complex}}, dims::Colon) =
+    Base._mapreduce_dim(f∘transpose, op, init, parent(A), dims)
+Base._mapreduce_dim(f::typeof(identity), op::Union{typeof(*),typeof(Base.mul_prod)}, init::Base._InitialValue, A::Adjoint{<:Union{Real,Complex}}, dims::Colon) =
+    Base._mapreduce_dim(f∘adjoint, op, init, parent(A), dims)
+# count allows for optimization only if the parent array has Bool eltype
+Base._count(::typeof(identity), A::Transpose{Bool}, ::Colon, init) = Base._count(identity, parent(A), :, init)
+Base._count(::typeof(identity), A::Adjoint{Bool}, ::Colon, init) = Base._count(identity, parent(A), :, init)
+Base._any(f, A::Transpose, ::Colon) = Base._any(f∘transpose, parent(A), :)
+Base._any(f, A::Adjoint, ::Colon) = Base._any(f∘adjoint, parent(A), :)
+Base._all(f, A::Transpose, ::Colon) = Base._all(f∘transpose, parent(A), :)
+Base._all(f, A::Adjoint, ::Colon) = Base._all(f∘adjoint, parent(A), :)
 # sum(A'; dims)
-Base.mapreducedim!(f, op, B::AbstractArray, A::TransposeAbsMat) =
-    transpose(Base.mapreducedim!(_sandwich(transpose, f), _sandwich(transpose, op), transpose(B), parent(A)))
-Base.mapreducedim!(f, op, B::AbstractArray, A::AdjointAbsMat) =
-    adjoint(Base.mapreducedim!(_sandwich(adjoint, f), _sandwich(adjoint, op), adjoint(B), parent(A)))
-
-_sandwich(adj::Function, fun) = (xs...,) -> adj(fun(map(adj, xs)...))
-for fun in [:identity, :add_sum, :mul_prod] #, :max, :min]
-    @eval _sandwich(::Function, ::typeof(Base.$fun)) = Base.$fun
-end
-
+Base.mapreducedim!(f, op::CommutativeOps, B::AbstractArray, A::TransposeAbsMat) =
+    (Base.mapreducedim!(f∘transpose, op, switch_dim12(B), parent(A)); B)
+Base.mapreducedim!(f, op::CommutativeOps, B::AbstractArray, A::AdjointAbsMat) =
+    (Base.mapreducedim!(f∘adjoint, op, switch_dim12(B), parent(A)); B)
+Base.mapreducedim!(f::typeof(identity), op::Union{typeof(*),typeof(Base.mul_prod)}, B::AbstractArray, A::TransposeAbsMat{<:Union{Real,Complex}}) =
+    (Base.mapreducedim!(f∘transpose, op, switch_dim12(B), parent(A)); B)
+Base.mapreducedim!(f::typeof(identity), op::Union{typeof(*),typeof(Base.mul_prod)}, B::AbstractArray, A::AdjointAbsMat{<:Union{Real,Complex}}) =
+    (Base.mapreducedim!(f∘adjoint, op, switch_dim12(B), parent(A)); B)
+
+switch_dim12(B::AbstractVector) = permutedims(B)
+switch_dim12(B::AbstractVector{<:Number}) = transpose(B) # avoid allocs due to permutedims
+switch_dim12(B::AbstractArray{<:Any,0}) = B
+switch_dim12(B::AbstractArray) = PermutedDimsArray(B, (2, 1, ntuple(Base.Fix1(+,2), ndims(B) - 2)...))
 
 ### linear algebra
 
 (-)(A::Adjoint)   = Adjoint(  -A.parent)
 (-)(A::Transpose) = Transpose(-A.parent)
 
+tr(A::Adjoint) = adjoint(tr(parent(A)))
+tr(A::Transpose) = transpose(tr(parent(A)))
+
 ## multiplication *
 
 function _dot_nonrecursive(u, v)
@@ -422,10 +476,6 @@ end
 
 # vector * Adjoint/Transpose-vector
 *(u::AbstractVector, v::AdjOrTransAbsVec) = broadcast(*, u, v)
-# Adjoint/Transpose-vector * Adjoint/Transpose-vector
-# (necessary for disambiguation with fallback methods in linalg/matmul)
-*(u::AdjointAbsVec, v::AdjointAbsVec) = throw(MethodError(*, (u, v)))
-*(u::TransposeAbsVec, v::TransposeAbsVec) = throw(MethodError(*, (u, v)))
 
 # AdjOrTransAbsVec{<:Any,<:AdjOrTransAbsVec} is a lazy conj vectors
 # We need to expand the combinations to avoid ambiguities
@@ -441,16 +491,19 @@ pinv(v::TransposeAbsVec, tol::Real = 0) = pinv(conj(v.parent)).parent
 
 ## left-division \
 \(u::AdjOrTransAbsVec, v::AdjOrTransAbsVec) = pinv(u) * v
-\(u::AdjointAbsVec, y::Number) = adjoint(conj(y) / u.parent)
-\(u::TransposeAbsVec, y::Number) = transpose(y / u.parent)
 
 
 ## right-division /
 /(u::AdjointAbsVec, A::AbstractMatrix) = adjoint(adjoint(A) \ u.parent)
 /(u::TransposeAbsVec, A::AbstractMatrix) = transpose(transpose(A) \ u.parent)
-/(u::AdjointAbsVec, A::Transpose{<:Any,<:AbstractMatrix}) = adjoint(conj(A.parent) \ u.parent) # technically should be adjoint(copy(adjoint(copy(A))) \ u.parent)
-/(u::TransposeAbsVec, A::Adjoint{<:Any,<:AbstractMatrix}) = transpose(conj(A.parent) \ u.parent) # technically should be transpose(copy(transpose(copy(A))) \ u.parent)
+/(u::AdjointAbsVec, A::TransposeAbsMat) = adjoint(conj(A.parent) \ u.parent) # technically should be adjoint(copy(adjoint(copy(A))) \ u.parent)
+/(u::TransposeAbsVec, A::AdjointAbsMat) = transpose(conj(A.parent) \ u.parent) # technically should be transpose(copy(transpose(copy(A))) \ u.parent)
 
 ## complex conjugate
 conj(A::Transpose) = adjoint(A.parent)
 conj(A::Adjoint) = transpose(A.parent)
+
+## structured matrix methods ##
+function Base.replace_in_print_matrix(A::AdjOrTrans,i::Integer,j::Integer,s::AbstractString)
+    Base.replace_in_print_matrix(parent(A), j, i, s)
+end
diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl
index 317ed15af770c..78c79b6fcefac 100644
--- a/stdlib/LinearAlgebra/src/bidiag.jl
+++ b/stdlib/LinearAlgebra/src/bidiag.jl
@@ -22,6 +22,9 @@ function Bidiagonal{T}(dv::AbstractVector, ev::AbstractVector, uplo::Union{Symbo
                convert(AbstractVector{T}, ev)::AbstractVector{T},
                uplo)
 end
+function Bidiagonal{T,V}(A::Bidiagonal) where {T,V<:AbstractVector{T}}
+    Bidiagonal{T,V}(A.dv, A.ev, A.uplo)
+end
 
 """
     Bidiagonal(dv::V, ev::V, uplo::Symbol) where V <: AbstractVector
@@ -127,6 +130,32 @@ function bidiagzero(A::Bidiagonal{<:AbstractMatrix}, i, j)
     end
 end
 
+@inline function Base.isassigned(A::Bidiagonal, i::Int, j::Int)
+    @boundscheck checkbounds(Bool, A, i, j) || return false
+    if i == j
+        return @inbounds isassigned(A.dv, i)
+    elseif A.uplo == 'U' && (i == j - 1)
+        return @inbounds isassigned(A.ev, i)
+    elseif A.uplo == 'L' && (i == j + 1)
+        return @inbounds isassigned(A.ev, j)
+    else
+        return true
+    end
+end
+
+@inline function Base.isstored(A::Bidiagonal, i::Int, j::Int)
+    @boundscheck checkbounds(A, i, j)
+    if i == j
+        return @inbounds Base.isstored(A.dv, i)
+    elseif A.uplo == 'U' && (i == j - 1)
+        return @inbounds Base.isstored(A.ev, i)
+    elseif A.uplo == 'L' && (i == j + 1)
+        return @inbounds Base.isstored(A.ev, j)
+    else
+        return false
+    end
+end
+
 @inline function getindex(A::Bidiagonal{T}, i::Integer, j::Integer) where T
     @boundscheck checkbounds(A, i, j)
     if i == j
@@ -167,8 +196,9 @@ end
 #Converting from Bidiagonal to dense Matrix
 function Matrix{T}(A::Bidiagonal) where T
     n = size(A, 1)
-    B = zeros(T, n, n)
+    B = Matrix{T}(undef, n, n)
     n == 0 && return B
+    n > 1 && fill!(B, zero(T))
     @inbounds for i = 1:n - 1
         B[i,i] = A.dv[i]
         if A.uplo == 'U'
@@ -184,7 +214,7 @@ Matrix(A::Bidiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(A)
 Array(A::Bidiagonal) = Matrix(A)
 promote_rule(::Type{Matrix{T}}, ::Type{<:Bidiagonal{S}}) where {T,S} =
     @isdefined(T) && @isdefined(S) ? Matrix{promote_type(T,S)} : Matrix
-promote_rule(::Type{Matrix}, ::Type{<:Bidiagonal}) = Matrix
+promote_rule(::Type{<:Matrix}, ::Type{<:Bidiagonal}) = Matrix
 
 #Converting from Bidiagonal to Tridiagonal
 function Tridiagonal{T}(A::Bidiagonal) where T
@@ -198,13 +228,22 @@ promote_rule(::Type{<:Tridiagonal{T}}, ::Type{<:Bidiagonal{S}}) where {T,S} =
 promote_rule(::Type{<:Tridiagonal}, ::Type{<:Bidiagonal}) = Tridiagonal
 
 # When asked to convert Bidiagonal to AbstractMatrix{T}, preserve structure by converting to Bidiagonal{T} <: AbstractMatrix{T}
-AbstractMatrix{T}(A::Bidiagonal) where {T} = convert(Bidiagonal{T}, A)
+AbstractMatrix{T}(A::Bidiagonal) where {T} = Bidiagonal{T}(A)
+AbstractMatrix{T}(A::Bidiagonal{T}) where {T} = copy(A)
 
-convert(T::Type{<:Bidiagonal}, m::AbstractMatrix) = m isa T ? m : T(m)
+convert(::Type{T}, m::AbstractMatrix) where {T<:Bidiagonal} = m isa T ? m : T(m)::T
 
 similar(B::Bidiagonal, ::Type{T}) where {T} = Bidiagonal(similar(B.dv, T), similar(B.ev, T), B.uplo)
-similar(B::Bidiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = zeros(T, dims...)
+similar(B::Bidiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(B.dv, T, dims)
+
+tr(B::Bidiagonal) = sum(B.dv)
 
+function kron(A::Diagonal, B::Bidiagonal)
+    # `_droplast!` is only guaranteed to work with `Vector`
+    kdv = _makevector(kron(diag(A), B.dv))
+    kev = _droplast!(_makevector(kron(diag(A), _pushzero(B.ev))))
+    Bidiagonal(kdv, kev, B.uplo)
+end
 
 ###################
 # LAPACK routines #
@@ -233,16 +272,7 @@ function show(io::IO, M::Bidiagonal)
     print_matrix(io, (M.ev)')
 end
 
-size(M::Bidiagonal) = (length(M.dv), length(M.dv))
-function size(M::Bidiagonal, d::Integer)
-    if d < 1
-        throw(ArgumentError("dimension must be ≥ 1, got $d"))
-    elseif d <= 2
-        return length(M.dv)
-    else
-        return 1
-    end
-end
+size(M::Bidiagonal) = (n = length(M.dv); (n, n))
 
 #Elementary operations
 for func in (:conj, :copy, :real, :imag)
@@ -394,41 +424,34 @@ function ==(A::Bidiagonal, B::Bidiagonal)
     end
 end
 
+const BandedMatrix = Union{Bidiagonal,Diagonal,Tridiagonal,SymTridiagonal} # or BiDiTriSym
 const BiTriSym = Union{Bidiagonal,Tridiagonal,SymTridiagonal}
+const TriSym = Union{Tridiagonal,SymTridiagonal}
 const BiTri = Union{Bidiagonal,Tridiagonal}
-@inline mul!(C::AbstractMatrix,   A::SymTridiagonal,     B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix,   A::BiTriSym,           B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix,   A::AbstractTriangular, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix,   A::AbstractMatrix,     B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix,   A::Diagonal,           B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Adjoint{<:Any,<:AbstractVecOrMat}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Transpose{<:Any,<:AbstractVecOrMat}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractVector, A::BiTriSym, B::AbstractVector, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BiTriSym, B::AbstractVecOrMat, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BiTriSym, B::Diagonal, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BiTriSym, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BiTriSym, B::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractVector, A::BiTriSym, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = throw(MethodError(mul!, (C, A, B)), MulAddMul(alpha, beta))
-@inline mul!(C::AbstractVector, A::BiTriSym, B::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = throw(MethodError(mul!, (C, A, B)), MulAddMul(alpha, beta))
+@inline mul!(C::AbstractVector, A::BandedMatrix, B::AbstractVector, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractVector, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::AbstractMatrix, B::BandedMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::BandedMatrix, B::BandedMatrix, alpha::Number, beta::Number) = _mul!(C, A, B, MulAddMul(alpha, beta))
+
+lmul!(A::Bidiagonal, B::AbstractVecOrMat) = @inline _mul!(B, A, B, MulAddMul())
+rmul!(B::AbstractMatrix, A::Bidiagonal) = @inline _mul!(B, B, A, MulAddMul())
 
 function check_A_mul_B!_sizes(C, A, B)
-    require_one_based_indexing(C)
-    require_one_based_indexing(A)
-    require_one_based_indexing(B)
-    nA, mA = size(A)
-    nB, mB = size(B)
-    nC, mC = size(C)
-    if nA != nC
-        throw(DimensionMismatch("sizes size(A)=$(size(A)) and size(C) = $(size(C)) must match at first entry."))
-    elseif mA != nB
-        throw(DimensionMismatch("second entry of size(A)=$(size(A)) and first entry of size(B) = $(size(B)) must match."))
-    elseif mB != mC
-        throw(DimensionMismatch("sizes size(B)=$(size(B)) and size(C) = $(size(C)) must match at first second entry."))
+    mA, nA = size(A)
+    mB, nB = size(B)
+    mC, nC = size(C)
+    if mA != mC
+        throw(DimensionMismatch("first dimension of A, $mA, and first dimension of output C, $mC, must match"))
+    elseif nA != mB
+        throw(DimensionMismatch("second dimension of A, $nA, and first dimension of B, $mB, must match"))
+    elseif nB != nC
+        throw(DimensionMismatch("second dimension of output C, $nC, and second dimension of B, $nB, must match"))
     end
 end
 
 # function to get the internally stored vectors for Bidiagonal and [Sym]Tridiagonal
-# to avoid allocations in A_mul_B_td! below (#24324, #24578)
+# to avoid allocations in _mul! below (#24324, #24578)
 _diag(A::Tridiagonal, k) = k == -1 ? A.dl : k == 0 ? A.d : A.du
 _diag(A::SymTridiagonal, k) = k == 0 ? A.dv : A.ev
 function _diag(A::Bidiagonal, k)
@@ -441,8 +464,11 @@ function _diag(A::Bidiagonal, k)
     end
 end
 
-function A_mul_B_td!(C::AbstractMatrix, A::BiTriSym, B::BiTriSym,
-                     _add::MulAddMul = MulAddMul())
+_mul!(C::AbstractMatrix, A::BiTriSym, B::TriSym, _add::MulAddMul = MulAddMul()) =
+    _bibimul!(C, A, B, _add)
+_mul!(C::AbstractMatrix, A::BiTriSym, B::Bidiagonal, _add::MulAddMul = MulAddMul()) =
+    _bibimul!(C, A, B, _add)
+function _bibimul!(C, A, B, _add)
     check_A_mul_B!_sizes(C, A, B)
     n = size(A,1)
     n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
@@ -499,10 +525,11 @@ function A_mul_B_td!(C::AbstractMatrix, A::BiTriSym, B::BiTriSym,
     C
 end
 
-function A_mul_B_td!(C::AbstractMatrix, A::BiTriSym, B::Diagonal,
-                     _add::MulAddMul = MulAddMul())
+function _mul!(C::AbstractMatrix, A::BiTriSym, B::Diagonal, _add::MulAddMul = MulAddMul())
+    require_one_based_indexing(C)
     check_A_mul_B!_sizes(C, A, B)
     n = size(A,1)
+    iszero(n) && return C
     n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
     _rmul_or_fill!(C, _add.beta)  # see the same use above
     iszero(_add.alpha) && return C
@@ -534,10 +561,8 @@ function A_mul_B_td!(C::AbstractMatrix, A::BiTriSym, B::Diagonal,
     C
 end
 
-function A_mul_B_td!(C::AbstractVecOrMat, A::BiTriSym, B::AbstractVecOrMat,
-                     _add::MulAddMul = MulAddMul())
-    require_one_based_indexing(C)
-    require_one_based_indexing(B)
+function _mul!(C::AbstractVecOrMat, A::BiTriSym, B::AbstractVecOrMat, _add::MulAddMul = MulAddMul())
+    require_one_based_indexing(C, B)
     nA = size(A,1)
     nB = size(B,2)
     if !(size(C,1) == size(B,1) == nA)
@@ -546,6 +571,7 @@ function A_mul_B_td!(C::AbstractVecOrMat, A::BiTriSym, B::AbstractVecOrMat,
     if size(C,2) != nB
         throw(DimensionMismatch("A has second dimension $nA, B has $(size(B,2)), C has $(size(C,2)) but all must match"))
     end
+    iszero(nA) && return C
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     nA <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
     l = _diag(A, -1)
@@ -565,8 +591,8 @@ function A_mul_B_td!(C::AbstractVecOrMat, A::BiTriSym, B::AbstractVecOrMat,
     C
 end
 
-function A_mul_B_td!(C::AbstractMatrix, A::AbstractMatrix, B::BiTriSym,
-                     _add::MulAddMul = MulAddMul())
+function _mul!(C::AbstractMatrix, A::AbstractMatrix, B::TriSym, _add::MulAddMul = MulAddMul())
+    require_one_based_indexing(C, A)
     check_A_mul_B!_sizes(C, A, B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     n = size(A,1)
@@ -600,8 +626,38 @@ function A_mul_B_td!(C::AbstractMatrix, A::AbstractMatrix, B::BiTriSym,
     C
 end
 
-function A_mul_B_td!(C::AbstractMatrix, A::Diagonal, B::BiTriSym,
-                     _add::MulAddMul = MulAddMul())
+function _mul!(C::AbstractMatrix, A::AbstractMatrix, B::Bidiagonal, _add::MulAddMul = MulAddMul())
+    require_one_based_indexing(C, A)
+    check_A_mul_B!_sizes(C, A, B)
+    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
+    if size(A, 1) <= 3 || size(B, 2) <= 1
+        return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
+    end
+    m, n = size(A)
+    @inbounds if B.uplo == 'U'
+        for i in 1:m
+            for j in n:-1:2
+                _modify!(_add, A[i,j] * B.dv[j] + A[i,j-1] * B.ev[j-1], C, (i, j))
+            end
+            _modify!(_add, A[i,1] * B.dv[1], C, (i, 1))
+        end
+    else # uplo == 'L'
+        for i in 1:m
+            for j in 1:n-1
+                _modify!(_add, A[i,j] * B.dv[j] + A[i,j+1] * B.ev[j], C, (i, j))
+            end
+            _modify!(_add, A[i,n] * B.dv[n], C, (i, n))
+        end
+    end
+    C
+end
+
+_mul!(C::AbstractMatrix, A::Diagonal, B::Bidiagonal, _add::MulAddMul = MulAddMul()) =
+    _dibimul!(C, A, B, _add)
+_mul!(C::AbstractMatrix, A::Diagonal, B::TriSym, _add::MulAddMul = MulAddMul()) =
+    _dibimul!(C, A, B, _add)
+function _dibimul!(C, A, B, _add)
+    require_one_based_indexing(C)
     check_A_mul_B!_sizes(C, A, B)
     n = size(A,1)
     n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
@@ -636,80 +692,55 @@ function A_mul_B_td!(C::AbstractMatrix, A::Diagonal, B::BiTriSym,
     C
 end
 
-function *(A::AbstractTriangular, B::Union{SymTridiagonal, Tridiagonal})
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    A_mul_B_td!(zeros(TS, size(A)), A, B)
-end
-
-const UpperOrUnitUpperTriangular{T} = Union{UpperTriangular{T}, UnitUpperTriangular{T}}
-const LowerOrUnitLowerTriangular{T} = Union{LowerTriangular{T}, UnitLowerTriangular{T}}
-
 function *(A::UpperOrUnitUpperTriangular, B::Bidiagonal)
     TS = promote_op(matprod, eltype(A), eltype(B))
-    C = A_mul_B_td!(zeros(TS, size(A)), A, B)
+    C = mul!(similar(A, TS, size(A)), A, B)
     return B.uplo == 'U' ? UpperTriangular(C) : C
 end
 
 function *(A::LowerOrUnitLowerTriangular, B::Bidiagonal)
     TS = promote_op(matprod, eltype(A), eltype(B))
-    C = A_mul_B_td!(zeros(TS, size(A)), A, B)
+    C = mul!(similar(A, TS, size(A)), A, B)
     return B.uplo == 'L' ? LowerTriangular(C) : C
 end
 
-function *(A::Union{SymTridiagonal, Tridiagonal}, B::AbstractTriangular)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    A_mul_B_td!(zeros(TS, size(A)), A, B)
-end
-
 function *(A::Bidiagonal, B::UpperOrUnitUpperTriangular)
     TS = promote_op(matprod, eltype(A), eltype(B))
-    C = A_mul_B_td!(zeros(TS, size(A)), A, B)
+    C = mul!(similar(B, TS, size(B)), A, B)
     return A.uplo == 'U' ? UpperTriangular(C) : C
 end
 
 function *(A::Bidiagonal, B::LowerOrUnitLowerTriangular)
     TS = promote_op(matprod, eltype(A), eltype(B))
-    C = A_mul_B_td!(zeros(TS, size(A)), A, B)
+    C = mul!(similar(B, TS, size(B)), A, B)
     return A.uplo == 'L' ? LowerTriangular(C) : C
 end
 
-function *(A::BiTri, B::Diagonal)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    A_mul_B_td!(similar(A, TS), A, B)
-end
-
-function *(A::Diagonal, B::BiTri)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    A_mul_B_td!(similar(B, TS), A, B)
-end
-
 function *(A::Diagonal, B::SymTridiagonal)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    A_mul_B_td!(Tridiagonal(zeros(TS, size(A, 1)-1), zeros(TS, size(A, 1)), zeros(TS, size(A, 1)-1)), A, B)
+    TS = promote_op(*, eltype(A), eltype(B))
+    out = Tridiagonal(similar(A, TS, size(A, 1)-1), similar(A, TS, size(A, 1)), similar(A, TS, size(A, 1)-1))
+    mul!(out, A, B)
 end
 
 function *(A::SymTridiagonal, B::Diagonal)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    A_mul_B_td!(Tridiagonal(zeros(TS, size(A, 1)-1), zeros(TS, size(A, 1)), zeros(TS, size(A, 1)-1)), A, B)
-end
-
-function *(A::BiTriSym, B::BiTriSym)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    mul!(similar(A, TS, size(A)), A, B)
+    TS = promote_op(*, eltype(A), eltype(B))
+    out = Tridiagonal(similar(A, TS, size(A, 1)-1), similar(A, TS, size(A, 1)), similar(A, TS, size(A, 1)-1))
+    mul!(out, A, B)
 end
 
 function dot(x::AbstractVector, B::Bidiagonal, y::AbstractVector)
     require_one_based_indexing(x, y)
     nx, ny = length(x), length(y)
     (nx == size(B, 1) == ny) || throw(DimensionMismatch())
-    if iszero(nx)
-        return dot(zero(eltype(x)), zero(eltype(B)), zero(eltype(y)))
+    if nx ≤ 1
+        nx == 0 && return dot(zero(eltype(x)), zero(eltype(B)), zero(eltype(y)))
+        return dot(x[1], B.dv[1], y[1])
     end
     ev, dv = B.ev, B.dv
-    if B.uplo == 'U'
+    @inbounds if B.uplo == 'U'
         x₀ = x[1]
         r = dot(x[1], dv[1], y[1])
-        @inbounds for j in 2:nx-1
+        for j in 2:nx-1
             x₋, x₀ = x₀, x[j]
             r += dot(adjoint(ev[j-1])*x₋ + adjoint(dv[j])*x₀, y[j])
         end
@@ -719,7 +750,7 @@ function dot(x::AbstractVector, B::Bidiagonal, y::AbstractVector)
         x₀ = x[1]
         x₊ = x[2]
         r = dot(adjoint(dv[1])*x₀ + adjoint(ev[1])*x₊, y[1])
-        @inbounds for j in 2:nx-1
+        for j in 2:nx-1
             x₀, x₊ = x₊, x[j+1]
             r += dot(adjoint(dv[j])*x₀ + adjoint(ev[j])*x₊, y[j])
         end
@@ -765,57 +796,39 @@ function ldiv!(c::AbstractVecOrMat, A::Bidiagonal, b::AbstractVecOrMat)
     end
     return c
 end
-ldiv!(A::Transpose{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
-ldiv!(A::Adjoint{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
-ldiv!(c::AbstractVecOrMat, A::Transpose{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) =
-    (_rdiv!(transpose(c), transpose(b), transpose(A)); return c)
-ldiv!(c::AbstractVecOrMat, A::Adjoint{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) =
-    (_rdiv!(adjoint(c), adjoint(b), adjoint(A)); return c)
+ldiv!(A::AdjOrTrans{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
+ldiv!(c::AbstractVecOrMat, A::AdjOrTrans{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) =
+    (t = wrapperop(A); _rdiv!(t(c), t(b), t(A)); return c)
 
 ### Generic promotion methods and fallbacks
-function \(A::Bidiagonal{<:Number}, B::AbstractVecOrMat{<:Number})
-    TA, TB = eltype(A), eltype(B)
-    TAB = typeof((oneunit(TA))\oneunit(TB))
-    ldiv!(zeros(TAB, size(B)), A, B)
-end
-\(A::Bidiagonal, B::AbstractVecOrMat) = ldiv!(copy(B), A, B)
-\(tA::Transpose{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = copy(tA) \ B
-\(adjA::Adjoint{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = copy(adjA) \ B
+\(A::Bidiagonal, B::AbstractVecOrMat) = ldiv!(_initarray(\, eltype(A), eltype(B), B), A, B)
+\(xA::AdjOrTrans{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = copy(xA) \ B
 
 ### Triangular specializations
-function \(B::Bidiagonal{<:Number}, U::UpperOrUnitUpperTriangular{<:Number})
-    T = typeof((oneunit(eltype(B)))\oneunit(eltype(U)))
-    A = ldiv!(zeros(T, size(U)), B, U)
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function \(B::Bidiagonal, U::UpperOrUnitUpperTriangular)
-    A = ldiv!(copy(parent(U)), B, U)
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function \(B::Bidiagonal{<:Number}, L::LowerOrUnitLowerTriangular{<:Number})
-    T = typeof((oneunit(eltype(B)))\oneunit(eltype(L)))
-    A = ldiv!(zeros(T, size(L)), B, L)
-    return B.uplo == 'L' ? LowerTriangular(A) : A
+for tri in (:UpperTriangular, :UnitUpperTriangular)
+    @eval function \(B::Bidiagonal, U::$tri)
+        A = ldiv!(_initarray(\, eltype(B), eltype(U), U), B, U)
+        return B.uplo == 'U' ? UpperTriangular(A) : A
+    end
+    @eval function \(U::$tri, B::Bidiagonal)
+        A = ldiv!(_initarray(\, eltype(U), eltype(B), U), U, B)
+        return B.uplo == 'U' ? UpperTriangular(A) : A
+    end
 end
-function \(B::Bidiagonal, L::LowerOrUnitLowerTriangular)
-    A = ldiv!(copy(parent(L)), B, L)
-    return B.uplo == 'L' ? LowerTriangular(A) : A
+for tri in (:LowerTriangular, :UnitLowerTriangular)
+    @eval function \(B::Bidiagonal, L::$tri)
+        A = ldiv!(_initarray(\, eltype(B), eltype(L), L), B, L)
+        return B.uplo == 'L' ? LowerTriangular(A) : A
+    end
+    @eval function \(L::$tri, B::Bidiagonal)
+        A = ldiv!(_initarray(\, eltype(L), eltype(B), L), L, B)
+        return B.uplo == 'L' ? LowerTriangular(A) : A
+    end
 end
 
-function \(U::UpperOrUnitUpperTriangular{<:Number}, B::Bidiagonal{<:Number})
-    T = typeof((oneunit(eltype(U)))/oneunit(eltype(B)))
-    A = ldiv!(U, copy_similar(B, T))
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function \(L::LowerOrUnitLowerTriangular{<:Number}, B::Bidiagonal{<:Number})
-    T = typeof((oneunit(eltype(L)))/oneunit(eltype(B)))
-    A = ldiv!(L, copy_similar(B, T))
-    return B.uplo == 'L' ? LowerTriangular(A) : A
-end
 ### Diagonal specialization
-function \(B::Bidiagonal{<:Number}, D::Diagonal{<:Number})
-    T = typeof((oneunit(eltype(B)))\oneunit(eltype(D)))
-    A = ldiv!(zeros(T, size(D)), B, D)
+function \(B::Bidiagonal, D::Diagonal)
+    A = ldiv!(_initarray(\, eltype(B), eltype(D), D), B, D)
     return B.uplo == 'U' ? UpperTriangular(A) : LowerTriangular(A)
 end
 
@@ -861,61 +874,43 @@ function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Bidiagonal)
     C
 end
 rdiv!(A::AbstractMatrix, B::Bidiagonal) = @inline _rdiv!(A, A, B)
-rdiv!(A::AbstractMatrix, B::Adjoint{<:Any,<:Bidiagonal}) = @inline _rdiv!(A, A, B)
-rdiv!(A::AbstractMatrix, B::Transpose{<:Any,<:Bidiagonal}) = @inline _rdiv!(A, A, B)
-_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Adjoint{<:Any,<:Bidiagonal}) =
-    (ldiv!(adjoint(C), adjoint(B), adjoint(A)); return C)
-_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Transpose{<:Any,<:Bidiagonal}) =
-    (ldiv!(transpose(C), transpose(B), transpose(A)); return C)
+rdiv!(A::AbstractMatrix, B::AdjOrTrans{<:Any,<:Bidiagonal}) = @inline _rdiv!(A, A, B)
+_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::AdjOrTrans{<:Any,<:Bidiagonal}) =
+    (t = wrapperop(B); ldiv!(t(C), t(B), t(A)); return C)
 
-function /(A::AbstractMatrix{<:Number}, B::Bidiagonal{<:Number})
-    TA, TB = eltype(A), eltype(B)
-    TAB = typeof((oneunit(TA))/oneunit(TB))
-    _rdiv!(zeros(TAB, size(A)), A, B)
-end
-/(A::AbstractMatrix, B::Bidiagonal) = _rdiv!(copy(A), A, B)
+/(A::AbstractMatrix, B::Bidiagonal) = _rdiv!(_initarray(/, eltype(A), eltype(B), A), A, B)
 
 ### Triangular specializations
-function /(U::UpperOrUnitUpperTriangular{<:Number}, B::Bidiagonal{<:Number})
-    T = typeof((oneunit(eltype(U)))/oneunit(eltype(B)))
-    A = _rdiv!(zeros(T, size(U)), U, B)
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function /(U::UpperOrUnitUpperTriangular, B::Bidiagonal)
-    A = _rdiv!(copy(parent(U)), U, B)
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function /(L::LowerOrUnitLowerTriangular{<:Number}, B::Bidiagonal{<:Number})
-    T = typeof((oneunit(eltype(L)))/oneunit(eltype(B)))
-    A = _rdiv!(zeros(T, size(L)), L, B)
-    return B.uplo == 'L' ? LowerTriangular(A) : A
-end
-function /(L::LowerOrUnitLowerTriangular, B::Bidiagonal)
-    A = _rdiv!(copy(parent(L)), L, B)
-    return B.uplo == 'L' ? LowerTriangular(A) : A
-end
-function /(B::Bidiagonal{<:Number}, U::UpperOrUnitUpperTriangular{<:Number})
-    T = typeof((oneunit(eltype(B)))/oneunit(eltype(U)))
-    A = rdiv!(copy_similar(B, T), U)
-    return B.uplo == 'U' ? UpperTriangular(A) : A
-end
-function /(B::Bidiagonal{<:Number}, L::LowerOrUnitLowerTriangular{<:Number})
-    T = typeof((oneunit(eltype(B)))\oneunit(eltype(L)))
-    A = rdiv!(copy_similar(B, T), L)
-    return B.uplo == 'L' ? LowerTriangular(A) : A
+for tri in (:UpperTriangular, :UnitUpperTriangular)
+    @eval function /(U::$tri, B::Bidiagonal)
+        A = _rdiv!(_initarray(/, eltype(U), eltype(B), U), U, B)
+        return B.uplo == 'U' ? UpperTriangular(A) : A
+    end
+    @eval function /(B::Bidiagonal, U::$tri)
+        A = _rdiv!(_initarray(/, eltype(B), eltype(U), U), B, U)
+        return B.uplo == 'U' ? UpperTriangular(A) : A
+    end
 end
+for tri in (:LowerTriangular, :UnitLowerTriangular)
+    @eval function /(L::$tri, B::Bidiagonal)
+        A = _rdiv!(_initarray(/, eltype(L), eltype(B), L), L, B)
+        return B.uplo == 'L' ? LowerTriangular(A) : A
+    end
+    @eval function /(B::Bidiagonal, L::$tri)
+        A = _rdiv!(_initarray(/, eltype(B), eltype(L), L), B, L)
+        return B.uplo == 'L' ? LowerTriangular(A) : A
+    end
+end
+
 ### Diagonal specialization
-function /(D::Diagonal{<:Number}, B::Bidiagonal{<:Number})
-    T = typeof((oneunit(eltype(D)))/oneunit(eltype(B)))
-    A = _rdiv!(zeros(T, size(D)), D, B)
+function /(D::Diagonal, B::Bidiagonal)
+    A = _rdiv!(_initarray(/, eltype(D), eltype(B), D), D, B)
     return B.uplo == 'U' ? UpperTriangular(A) : LowerTriangular(A)
 end
 
 /(A::AbstractMatrix, B::Transpose{<:Any,<:Bidiagonal}) = A / copy(B)
 /(A::AbstractMatrix, B::Adjoint{<:Any,<:Bidiagonal}) = A / copy(B)
 # disambiguation
-/(A::AdjointAbsVec{<:Number}, B::Bidiagonal{<:Number}) = adjoint(adjoint(B) \ parent(A))
-/(A::TransposeAbsVec{<:Number}, B::Bidiagonal{<:Number}) = transpose(transpose(B) \ parent(A))
 /(A::AdjointAbsVec, B::Bidiagonal) = adjoint(adjoint(B) \ parent(A))
 /(A::TransposeAbsVec, B::Bidiagonal) = transpose(transpose(B) \ parent(A))
 /(A::AdjointAbsVec, B::Transpose{<:Any,<:Bidiagonal}) = adjoint(adjoint(B) \ parent(A))
@@ -926,13 +921,13 @@ end
 factorize(A::Bidiagonal) = A
 function inv(B::Bidiagonal{T}) where T
     n = size(B, 1)
-    dest = zeros(typeof(oneunit(T)\one(T)), (n, n))
-    ldiv!(dest, B, Diagonal{typeof(one(T)\one(T))}(I, n))
+    dest = zeros(typeof(inv(oneunit(T))), (n, n))
+    ldiv!(dest, B, Diagonal{typeof(one(T)/one(T))}(I, n))
     return B.uplo == 'U' ? UpperTriangular(dest) : LowerTriangular(dest)
 end
 
 # Eigensystems
-eigvals(M::Bidiagonal) = M.dv
+eigvals(M::Bidiagonal) = copy(M.dv)
 function eigvecs(M::Bidiagonal{T}) where T
     n = length(M.dv)
     Q = Matrix{T}(undef, n,n)
diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl
index 2710559e57d6b..6cc0edfd9f3a1 100644
--- a/stdlib/LinearAlgebra/src/blas.jl
+++ b/stdlib/LinearAlgebra/src/blas.jl
@@ -9,44 +9,63 @@ import Base: copyto!
 using Base: require_one_based_indexing, USE_BLAS64
 
 export
+# Note: `xFUNC_NAME` is a placeholder for not exported BLAS functions
+#   ref: http://www.netlib.org/blas/blasqr.pdf
 # Level 1
-    asum,
-    axpy!,
-    axpby!,
-    blascopy!,
-    dotc,
-    dotu,
+    # xROTG
+    # xROTMG
     rot!,
+    # xROTM
+    # xSWAP
     scal!,
     scal,
+    blascopy!,
+    # xAXPY!,
+    # xAXPBY!,
+    # xDOT
+    dotc,
+    dotu,
+    # xxDOT
     nrm2,
+    asum,
     iamax,
 # Level 2
-    gbmv!,
-    gbmv,
     gemv!,
     gemv,
+    gbmv!,
+    gbmv,
     hemv!,
     hemv,
+    # xHBMV
     hpmv!,
+    symv!,
+    symv,
     sbmv!,
     sbmv,
     spmv!,
-    spr!,
-    symv!,
-    symv,
-    trsv!,
-    trsv,
     trmv!,
     trmv,
+    # xTBMV
+    # xTPMV
+    trsv!,
+    trsv,
+    # xTBSV
+    # xTPSV
     ger!,
-    syr!,
+    geru!,
+    # xGERU
+    # xGERC
     her!,
+    # xHPR
+    # xHER2
+    # xHPR2
+    syr!,
+    spr!,
+    # xSYR2
+    # xSPR2
 # Level 3
-    herk!,
-    herk,
-    her2k!,
-    her2k,
+    gemmt!,
+    gemmt,
     gemm!,
     gemm,
     symm!,
@@ -55,8 +74,12 @@ export
     hemm,
     syrk!,
     syrk,
+    herk!,
+    herk,
     syr2k!,
     syr2k,
+    her2k!,
+    her2k,
     trmm!,
     trmm,
     trsm!,
@@ -66,6 +89,13 @@ using ..LinearAlgebra: libblastrampoline, BlasReal, BlasComplex, BlasFloat, Blas
 
 include("lbt.jl")
 
+# Legacy bindings that some packages (such as NNlib.jl) use.
+# We maintain these for backwards-compatibility but new packages
+# should not look at these, instead preferring to parse the output
+# of BLAS.get_config()
+const libblas = libblastrampoline
+const liblapack = libblastrampoline
+
 vendor() = :lbt
 
 """
@@ -147,18 +177,19 @@ end
 # Level 1
 # A help function to pick the pointer and inc for 1d like inputs.
 @inline function vec_pointer_stride(x::AbstractArray, stride0check = nothing)
-    isdense(x) && return pointer(x), 1 # simpify runtime check when possibe
-    ndims(x) == 1 || strides(x) == Base.size_to_strides(stride(x, 1), size(x)...) ||
-        throw(ArgumentError("only support vector like inputs"))
-    st = stride(x, 1)
+    Base._checkcontiguous(Bool, x) && return pointer(x), 1 # simplify runtime check when possible
+    st, ptr = checkedstride(x), pointer(x)
     isnothing(stride0check) || (st == 0 && throw(stride0check))
-    ptr = st > 0 ? pointer(x) : pointer(x, lastindex(x))
+    ptr += min(st, 0) * sizeof(eltype(x)) * (length(x) - 1)
     ptr, st
 end
-isdense(x) = x isa DenseArray
-isdense(x::Base.FastContiguousSubArray) = isdense(parent(x))
-isdense(x::Base.ReshapedArray) = isdense(parent(x))
-isdense(x::Base.ReinterpretArray) = isdense(parent(x))
+function checkedstride(x::AbstractArray)
+    szs::Dims = size(x)
+    sts::Dims = strides(x)
+    _, st, n = Base.merge_adjacent_dim(szs, sts)
+    n === ndims(x) && return st
+    throw(ArgumentError("only support vector like inputs"))
+end
 ## copy
 
 """
@@ -968,6 +999,9 @@ The scalar inputs `α` and `β` must be complex or real numbers.
 The array inputs `x`, `y` and `AP` must all be of `ComplexF32` or `ComplexF64` type.
 
 Return the updated `y`.
+
+!!! compat "Julia 1.5"
+    `hpmv!` requires at least Julia 1.5.
 """
 hpmv!
 
@@ -1125,6 +1159,9 @@ The scalar inputs `α` and `β` must be real.
 The array inputs `x`, `y` and `AP` must all be of `Float32` or `Float64` type.
 
 Return the updated `y`.
+
+!!! compat "Julia 1.5"
+    `spmv!` requires at least Julia 1.5.
 """
 spmv!
 
@@ -1193,6 +1230,9 @@ The scalar input `α` must be real.
 
 The array inputs `x` and `AP` must all be of `Float32` or `Float64` type.
 Return the updated `AP`.
+
+!!! compat "Julia 1.8"
+    `spr!` requires at least Julia 1.8.
 """
 spr!
 
@@ -1378,6 +1418,41 @@ for (fname, elty) in ((:dger_,:Float64),
     end
 end
 
+### geru
+
+"""
+    geru!(alpha, x, y, A)
+
+Rank-1 update of the matrix `A` with vectors `x` and `y` as `alpha*x*transpose(y) + A`.
+"""
+function geru! end
+
+for (fname, elty) in ((:zgeru_,:ComplexF64), (:cgeru_,:ComplexF32))
+    @eval begin
+        function geru!(α::$elty, x::AbstractVector{$elty}, y::AbstractVector{$elty}, A::AbstractMatrix{$elty})
+            require_one_based_indexing(A, x, y)
+            m, n = size(A)
+            if m != length(x) || n != length(y)
+                throw(DimensionMismatch(lazy"A has size ($m,$n), x has length $(length(x)), y has length $(length(y))"))
+            end
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("input vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+                (Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
+                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                 Ref{BlasInt}),
+                 m, n, α, px, stx, py, sty, A, max(1,stride(A,2)))
+            A
+        end
+    end
+end
+for elty in (:Float64, :Float32)
+    @eval begin
+        geru!(α::$elty, x::AbstractVector{$elty}, y::AbstractVector{$elty}, A::AbstractMatrix{$elty}) =
+            ger!(α, x, y, A)
+    end
+end
+
 ### syr
 
 """
@@ -1444,6 +1519,88 @@ end
 # Level 3
 ## (GE) general matrix-matrix multiplication
 
+"""
+    gemmt!(uplo, tA, tB, alpha, A, B, beta, C)
+
+Update the lower or upper triangular part specified by [`uplo`](@ref stdlib-blas-uplo) of `C` as
+`alpha*A*B + beta*C` or the other variants according to [`tA`](@ref stdlib-blas-trans) and `tB`.
+Return the updated `C`.
+
+!!! compat "Julia 1.11"
+    `gemmt!` requires at least Julia 1.11.
+"""
+function gemmt! end
+
+for (gemmt, elty) in
+        ((:dgemmt_,:Float64),
+         (:sgemmt_,:Float32),
+         (:zgemmt_,:ComplexF64),
+         (:cgemmt_,:ComplexF32))
+    @eval begin
+             # SUBROUTINE DGEMMT(UPLO,TRANSA,TRANSB,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+             # *     .. Scalar Arguments ..
+             #       DOUBLE PRECISION ALPHA,BETA
+             #       INTEGER K,LDA,LDB,LDC,N
+             #       CHARACTER UPLO,TRANSA,TRANSB
+             # *     .. Array Arguments ..
+             #       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
+        function gemmt!(uplo::AbstractChar, transA::AbstractChar, transB::AbstractChar,
+                        alpha::Union{($elty), Bool},
+                        A::AbstractVecOrMat{$elty}, B::AbstractVecOrMat{$elty},
+                        beta::Union{($elty), Bool},
+                        C::AbstractVecOrMat{$elty})
+            chkuplo(uplo)
+            require_one_based_indexing(A, B, C)
+            m = size(A, transA == 'N' ? 1 : 2)
+            ka = size(A, transA == 'N' ? 2 : 1)
+            kb = size(B, transB == 'N' ? 1 : 2)
+            n = size(B, transB == 'N' ? 2 : 1)
+            if ka != kb || m != n || m != size(C,1) || n != size(C,2)
+                throw(DimensionMismatch(lazy"A has size ($m,$ka), B has size ($kb,$n), C has size $(size(C))"))
+            end
+            chkstride1(A)
+            chkstride1(B)
+            chkstride1(C)
+            ccall((@blasfunc($gemmt), libblastrampoline), Cvoid,
+                (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
+                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
+                 Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
+                 Ref{BlasInt}, Clong, Clong, Clong),
+                 uplo, transA, transB, n,
+                 ka, alpha, A, max(1,stride(A,2)),
+                 B, max(1,stride(B,2)), beta, C,
+                 max(1,stride(C,2)), 1, 1, 1)
+            C
+        end
+        function gemmt(uplo::AbstractChar, transA::AbstractChar, transB::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            gemmt!(uplo, transA, transB, alpha, A, B, zero($elty), similar(B, $elty, (size(A, transA == 'N' ? 1 : 2), size(B, transB == 'N' ? 2 : 1))))
+        end
+        function gemmt(uplo::AbstractChar, transA::AbstractChar, transB::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            gemmt(uplo, transA, transB, one($elty), A, B)
+        end
+    end
+end
+
+"""
+    gemmt(uplo, tA, tB, alpha, A, B)
+
+Return the lower or upper triangular part specified by [`uplo`](@ref stdlib-blas-uplo) of `A*B` or the other three variants according to [`tA`](@ref stdlib-blas-trans) and `tB`.
+
+!!! compat "Julia 1.11"
+    `gemmt` requires at least Julia 1.11.
+"""
+gemmt(uplo, tA, tB, alpha, A, B)
+
+"""
+    gemmt(uplo, tA, tB, A, B)
+
+Return the lower or upper triangular part specified by [`uplo`](@ref stdlib-blas-uplo) of `A*B` or the other three variants according to [`tA`](@ref stdlib-blas-trans) and `tB`.
+
+!!! compat "Julia 1.11"
+    `gemmt` requires at least Julia 1.11.
+"""
+gemmt(uplo, tA, tB, A, B)
+
 """
     gemm!(tA, tB, alpha, A, B, beta, C)
 
@@ -1539,11 +1696,27 @@ for (mfname, elty) in ((:dsymm_,:Float64),
             require_one_based_indexing(A, B, C)
             m, n = size(C)
             j = checksquare(A)
-            if j != (side == 'L' ? m : n)
-                throw(DimensionMismatch(lazy"A has size $(size(A)), C has size ($m,$n)"))
-            end
-            if size(B,2) != n
-                throw(DimensionMismatch(lazy"B has second dimension $(size(B,2)) but needs to match second dimension of C, $n"))
+            M, N = size(B)
+            if side == 'L'
+                if j != m
+                    throw(DimensionMismatch(lazy"A has first dimension $j but needs to match first dimension of C, $m"))
+                end
+                if N != n
+                    throw(DimensionMismatch(lazy"B has second dimension $N but needs to match second dimension of C, $n"))
+                end
+                if j != M
+                    throw(DimensionMismatch(lazy"A has second dimension $j but needs to match first dimension of B, $M"))
+                end
+            else
+                if j != n
+                    throw(DimensionMismatch(lazy"B has second dimension $j but needs to match second dimension of C, $n"))
+                end
+                if N != j
+                    throw(DimensionMismatch(lazy"A has second dimension $N but needs to match first dimension of B, $j"))
+                end
+                if M != m
+                    throw(DimensionMismatch(lazy"A has first dimension $M but needs to match first dimension of C, $m"))
+                end
             end
             chkstride1(A)
             chkstride1(B)
@@ -1613,11 +1786,27 @@ for (mfname, elty) in ((:zhemm_,:ComplexF64),
             require_one_based_indexing(A, B, C)
             m, n = size(C)
             j = checksquare(A)
-            if j != (side == 'L' ? m : n)
-                throw(DimensionMismatch(lazy"A has size $(size(A)), C has size ($m,$n)"))
-            end
-            if size(B,2) != n
-                throw(DimensionMismatch(lazy"B has second dimension $(size(B,2)) but needs to match second dimension of C, $n"))
+            M, N = size(B)
+            if side == 'L'
+                if j != m
+                    throw(DimensionMismatch(lazy"A has first dimension $j but needs to match first dimension of C, $m"))
+                end
+                if N != n
+                    throw(DimensionMismatch(lazy"B has second dimension $N but needs to match second dimension of C, $n"))
+                end
+                if j != M
+                    throw(DimensionMismatch(lazy"A has second dimension $j but needs to match first dimension of B, $M"))
+                end
+            else
+                if j != n
+                    throw(DimensionMismatch(lazy"B has second dimension $j but needs to match second dimension of C, $n"))
+                end
+                if N != j
+                    throw(DimensionMismatch(lazy"A has second dimension $N but needs to match first dimension of B, $j"))
+                end
+                if M != m
+                    throw(DimensionMismatch(lazy"A has first dimension $M but needs to match first dimension of C, $m"))
+                end
             end
             chkstride1(A)
             chkstride1(B)
@@ -1675,14 +1864,14 @@ hemm!
 
 Rank-k update of the symmetric matrix `C` as `alpha*A*transpose(A) + beta*C` or
 `alpha*transpose(A)*A + beta*C` according to [`trans`](@ref stdlib-blas-trans).
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Returns `C`.
+Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Return `C`.
 """
 function syrk! end
 
 """
     syrk(uplo, trans, alpha, A)
 
-Returns either the upper triangle or the lower triangle of `A`,
+Return either the upper triangle or the lower triangle of `A`,
 according to [`uplo`](@ref stdlib-blas-uplo),
 of `alpha*A*transpose(A)` or `alpha*transpose(A)*A`,
 according to [`trans`](@ref stdlib-blas-trans).
@@ -1854,7 +2043,7 @@ end
 """
     syr2k(uplo, trans, A, B)
 
-Returns the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*transpose(B) + B*transpose(A)`
+Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*transpose(B) + B*transpose(A)`
 or `transpose(A)*B + transpose(B)*A`, according to [`trans`](@ref stdlib-blas-trans).
 """
 syr2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat, B::AbstractVecOrMat) = syr2k(uplo, trans, one(eltype(A)), A, B)
@@ -1907,14 +2096,14 @@ end
 Rank-2k update of the Hermitian matrix `C` as
 `alpha*A*B' + alpha*B*A' + beta*C` or `alpha*A'*B + alpha*B'*A + beta*C`
 according to [`trans`](@ref stdlib-blas-trans). The scalar `beta` has to be real.
-Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Returns `C`.
+Only the [`uplo`](@ref stdlib-blas-uplo) triangle of `C` is used. Return `C`.
 """
 function her2k! end
 
 """
     her2k(uplo, trans, alpha, A, B)
 
-Returns the [`uplo`](@ref stdlib-blas-uplo) triangle of `alpha*A*B' + alpha*B*A'`
+Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `alpha*A*B' + alpha*B*A'`
 or `alpha*A'*B + alpha*B'*A`, according to [`trans`](@ref stdlib-blas-trans).
 """
 her2k(uplo, trans, alpha, A, B)
@@ -1922,7 +2111,7 @@ her2k(uplo, trans, alpha, A, B)
 """
     her2k(uplo, trans, A, B)
 
-Returns the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*B' + B*A'`
+Return the [`uplo`](@ref stdlib-blas-uplo) triangle of `A*B' + B*A'`
 or `A'*B + B'*A`, according to [`trans`](@ref stdlib-blas-trans).
 """
 her2k(uplo, trans, A, B)
@@ -1937,14 +2126,14 @@ Update `B` as `alpha*A*B` or one of the other three variants determined by
 Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
 [`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
 are assumed to be all ones.
-Returns the updated `B`.
+Return the updated `B`.
 """
 function trmm! end
 
 """
     trmm(side, ul, tA, dA, alpha, A, B)
 
-Returns `alpha*A*B` or one of the other three variants determined by
+Return `alpha*A*B` or one of the other three variants determined by
 [`side`](@ref stdlib-blas-side) and [`tA`](@ref stdlib-blas-trans).
 Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
 [`dA`](@ref stdlib-blas-diag) determines if the diagonal values are read or
diff --git a/stdlib/LinearAlgebra/src/bunchkaufman.jl b/stdlib/LinearAlgebra/src/bunchkaufman.jl
index 7961f97e58299..f995b3b8444c4 100644
--- a/stdlib/LinearAlgebra/src/bunchkaufman.jl
+++ b/stdlib/LinearAlgebra/src/bunchkaufman.jl
@@ -80,15 +80,15 @@ BunchKaufman(A::AbstractMatrix{T}, ipiv::AbstractVector{<:Integer}, uplo::Abstra
              symmetric::Bool, rook::Bool, info::BlasInt) where {T} =
         BunchKaufman{T,typeof(A),typeof(ipiv)}(A, ipiv, uplo, symmetric, rook, info)
 # backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(BunchKaufman(LD, ipiv, uplo, symmetric, rook, info) where {T,S},
-           BunchKaufman{T,S,typeof(ipiv)}(LD, ipiv, uplo, symmetric, rook, info))
+@deprecate(BunchKaufman{T,S}(LD, ipiv, uplo, symmetric, rook, info) where {T,S},
+           BunchKaufman{T,S,typeof(ipiv)}(LD, ipiv, uplo, symmetric, rook, info), false)
 
 # iteration for destructuring into components
 Base.iterate(S::BunchKaufman) = (S.D, Val(:UL))
 Base.iterate(S::BunchKaufman, ::Val{:UL}) = (S.uplo == 'L' ? S.L : S.U, Val(:p))
 Base.iterate(S::BunchKaufman, ::Val{:p}) = (S.p, Val(:done))
 Base.iterate(S::BunchKaufman, ::Val{:done}) = nothing
-
+copy(S::BunchKaufman) = BunchKaufman(copy(S.LD), copy(S.ipiv), S.uplo, S.symmetric, S.rook, S.info)
 
 """
     bunchkaufman!(A, rook::Bool=false; check = true) -> BunchKaufman
@@ -96,13 +96,13 @@ Base.iterate(S::BunchKaufman, ::Val{:done}) = nothing
 `bunchkaufman!` is the same as [`bunchkaufman`](@ref), but saves space by overwriting the
 input `A`, instead of creating a copy.
 """
-function bunchkaufman!(A::RealHermSymComplexSym{T,S} where {T<:BlasReal,S<:StridedMatrix},
+function bunchkaufman!(A::RealHermSymComplexSym{<:BlasReal,<:StridedMatrix},
                        rook::Bool = false; check::Bool = true)
     LD, ipiv, info = rook ? LAPACK.sytrf_rook!(A.uplo, A.data) : LAPACK.sytrf!(A.uplo, A.data)
     check && checknonsingular(info)
     BunchKaufman(LD, ipiv, A.uplo, true, rook, info)
 end
-function bunchkaufman!(A::Hermitian{T,S} where {T<:BlasComplex,S<:StridedMatrix{T}},
+function bunchkaufman!(A::Hermitian{<:BlasComplex,<:StridedMatrix},
                        rook::Bool = false; check::Bool = true)
     LD, ipiv, info = rook ? LAPACK.hetrf_rook!(A.uplo, A.data) : LAPACK.hetrf!(A.uplo, A.data)
     check && checknonsingular(info)
@@ -197,7 +197,7 @@ julia> S.L*S.D*S.L' - A[S.p, S.p]
 ```
 """
 bunchkaufman(A::AbstractMatrix{T}, rook::Bool=false; check::Bool = true) where {T} =
-    bunchkaufman!(copymutable_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check)
+    bunchkaufman!(eigencopy_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check)
 
 BunchKaufman{T}(B::BunchKaufman) where {T} =
     BunchKaufman(convert(Matrix{T}, B.LD), B.ipiv, B.uplo, B.symmetric, B.rook, B.info)
@@ -237,7 +237,7 @@ function _ipiv2perm_bk(v::AbstractVector{T}, maxi::Integer, uplo::AbstractChar,
     return p
 end
 
-function getproperty(B::BunchKaufman{T}, d::Symbol) where {T<:BlasFloat}
+function getproperty(B::BunchKaufman{T,<:StridedMatrix}, d::Symbol) where {T<:BlasFloat}
     n = size(B, 1)
     if d === :p
         return _ipiv2perm_bk(getfield(B, :ipiv), n, getfield(B, :uplo), B.rook)
@@ -278,6 +278,27 @@ end
 Base.propertynames(B::BunchKaufman, private::Bool=false) =
     (:p, :P, :L, :U, :D, (private ? fieldnames(typeof(B)) : ())...)
 
+function getproperties!(B::BunchKaufman{T,<:StridedMatrix}) where {T<:BlasFloat}
+    # NOTE: Unlike in the 'getproperty' function, in this function L/U and D are computed in place.
+    if B.rook
+        LUD, od = LAPACK.syconvf_rook!(B.uplo, 'C', B.LD, B.ipiv)
+    else
+        LUD, od = LAPACK.syconv!(B.uplo, B.LD, B.ipiv)
+    end
+    if B.uplo == 'U'
+        M = UnitUpperTriangular(LUD)
+        du = od[2:end]
+        # Avoid aliasing dl and du.
+        dl = B.symmetric ? du : conj.(du)
+    else
+        M = UnitLowerTriangular(LUD)
+        dl = od[1:end-1]
+        # Avoid aliasing dl and du.
+        du = B.symmetric ? dl : conj.(dl)
+    end
+    return (M, Tridiagonal(dl, diag(LUD), du), B.p)
+end
+
 issuccess(B::BunchKaufman) = B.info == 0
 
 function adjoint(B::BunchKaufman)
@@ -302,7 +323,7 @@ function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, B::BunchKaufman)
     end
 end
 
-function inv(B::BunchKaufman{<:BlasReal})
+function inv(B::BunchKaufman{<:BlasReal,<:StridedMatrix})
     if B.rook
         copytri!(LAPACK.sytri_rook!(B.uplo, copy(B.LD), B.ipiv), B.uplo, true)
     else
@@ -310,7 +331,7 @@ function inv(B::BunchKaufman{<:BlasReal})
     end
 end
 
-function inv(B::BunchKaufman{<:BlasComplex})
+function inv(B::BunchKaufman{<:BlasComplex,<:StridedMatrix})
     if issymmetric(B)
         if B.rook
             copytri!(LAPACK.sytri_rook!(B.uplo, copy(B.LD), B.ipiv), B.uplo)
@@ -326,14 +347,14 @@ function inv(B::BunchKaufman{<:BlasComplex})
     end
 end
 
-function ldiv!(B::BunchKaufman{T}, R::StridedVecOrMat{T}) where T<:BlasReal
+function ldiv!(B::BunchKaufman{T,<:StridedMatrix}, R::StridedVecOrMat{T}) where {T<:BlasReal}
     if B.rook
         LAPACK.sytrs_rook!(B.uplo, B.LD, B.ipiv, R)
     else
         LAPACK.sytrs!(B.uplo, B.LD, B.ipiv, R)
     end
 end
-function ldiv!(B::BunchKaufman{T}, R::StridedVecOrMat{T}) where T<:BlasComplex
+function ldiv!(B::BunchKaufman{T,<:StridedMatrix}, R::StridedVecOrMat{T}) where {T<:BlasComplex}
     if B.rook
         if issymmetric(B)
             LAPACK.sytrs_rook!(B.uplo, B.LD, B.ipiv, R)
@@ -348,11 +369,6 @@ function ldiv!(B::BunchKaufman{T}, R::StridedVecOrMat{T}) where T<:BlasComplex
         end
     end
 end
-# There is no fallback solver for Bunch-Kaufman so we'll have to promote to same element type
-function ldiv!(B::BunchKaufman{T}, R::StridedVecOrMat{S}) where {T,S}
-    TS = promote_type(T,S)
-    return ldiv!(convert(BunchKaufman{TS}, B), convert(AbstractArray{TS}, R))
-end
 
 function logabsdet(F::BunchKaufman)
     M = F.LD
diff --git a/stdlib/LinearAlgebra/src/cholesky.jl b/stdlib/LinearAlgebra/src/cholesky.jl
index d11630fcb6a5f..528eca5c3d8a3 100644
--- a/stdlib/LinearAlgebra/src/cholesky.jl
+++ b/stdlib/LinearAlgebra/src/cholesky.jl
@@ -168,7 +168,7 @@ CholeskyPivoted(A::AbstractMatrix{T}, uplo::AbstractChar, piv::AbstractVector{<:
     CholeskyPivoted{T,typeof(A),typeof(piv)}(A, uplo, piv, rank, tol, info)
 # backwards-compatible constructors (remove with Julia 2.0)
 @deprecate(CholeskyPivoted{T,S}(factors, uplo, piv, rank, tol, info) where {T,S<:AbstractMatrix},
-           CholeskyPivoted{T,S,typeof(piv)}(factors, uplo, piv, rank, tol, info))
+           CholeskyPivoted{T,S,typeof(piv)}(factors, uplo, piv, rank, tol, info), false)
 
 
 # iteration for destructuring into components
@@ -178,10 +178,8 @@ Base.iterate(C::CholeskyPivoted, ::Val{:done}) = nothing
 
 
 # make a copy that allow inplace Cholesky factorization
-@inline choltype(A) = promote_type(typeof(sqrt(oneunit(eltype(A)))), Float32)
-@inline cholcopy(A::StridedMatrix) = copymutable_oftype(A, choltype(A))
-@inline cholcopy(A::RealHermSymComplexHerm) = copymutable_oftype(A, choltype(A))
-@inline cholcopy(A::AbstractMatrix) = copy_similar(A, choltype(A))
+choltype(A) = promote_type(typeof(sqrt(oneunit(eltype(A)))), Float32)
+cholcopy(A::AbstractMatrix) = eigencopy_oftype(A, choltype(A))
 
 # _chol!. Internal methods for calling unpivoted Cholesky
 ## BLAS/LAPACK element types
@@ -208,7 +206,7 @@ function _chol!(A::AbstractMatrix, ::Type{UpperTriangular})
             A[k,k] = Akk
             Akk, info = _chol!(Akk, UpperTriangular)
             if info != 0
-                return UpperTriangular(A), info
+                return UpperTriangular(A), convert(BlasInt, k)
             end
             A[k,k] = Akk
             AkkInv = inv(copy(Akk'))
@@ -235,7 +233,7 @@ function _chol!(A::AbstractMatrix, ::Type{LowerTriangular})
             A[k,k] = Akk
             Akk, info = _chol!(Akk, LowerTriangular)
             if info != 0
-                return LowerTriangular(A), info
+                return LowerTriangular(A), convert(BlasInt, k)
             end
             A[k,k] = Akk
             AkkInv = inv(Akk)
@@ -253,11 +251,12 @@ function _chol!(A::AbstractMatrix, ::Type{LowerTriangular})
 end
 
 ## Numbers
-function _chol!(x::Number, uplo)
+function _chol!(x::Number, _)
     rx = real(x)
+    iszero(rx) && return (rx, convert(BlasInt, 1))
     rxr = sqrt(abs(rx))
     rval =  convert(promote_type(typeof(x), typeof(rxr)), rxr)
-    rx == abs(x) ? (rval, convert(BlasInt, 0)) : (rval, convert(BlasInt, 1))
+    return (rval, convert(BlasInt, rx != abs(x)))
 end
 
 ## for StridedMatrices, check that matrix is symmetric/Hermitian
@@ -400,14 +399,16 @@ true
 ```
 """
 cholesky(A::AbstractMatrix, ::NoPivot=NoPivot(); check::Bool = true) =
-    cholesky!(cholcopy(A); check)
+    _cholesky(cholcopy(A); check)
 @deprecate cholesky(A::Union{StridedMatrix,RealHermSymComplexHerm{<:Real,<:StridedMatrix}}, ::Val{false}; check::Bool = true) cholesky(A, NoPivot(); check) false
 
 function cholesky(A::AbstractMatrix{Float16}, ::NoPivot=NoPivot(); check::Bool = true)
-    X = cholesky!(cholcopy(A); check = check)
+    X = _cholesky(cholcopy(A); check = check)
     return Cholesky{Float16}(X)
 end
 @deprecate cholesky(A::Union{StridedMatrix{Float16},RealHermSymComplexHerm{Float16,<:StridedMatrix}}, ::Val{false}; check::Bool = true) cholesky(A, NoPivot(); check) false
+# allow packages like SparseArrays.jl to hook into here and redirect to out-of-place `cholesky`
+_cholesky(A::AbstractMatrix, args...; kwargs...) = cholesky!(A, args...; kwargs...)
 
 ## With pivoting
 """
@@ -466,11 +467,11 @@ true
 ```
 """
 cholesky(A::AbstractMatrix, ::RowMaximum; tol = 0.0, check::Bool = true) =
-    cholesky!(cholcopy(A), RowMaximum(); tol, check)
+    _cholesky(cholcopy(A), RowMaximum(); tol, check)
 @deprecate cholesky(A::Union{StridedMatrix,RealHermSymComplexHerm{<:Real,<:StridedMatrix}}, ::Val{true}; tol = 0.0, check::Bool = true) cholesky(A, RowMaximum(); tol, check) false
 
 function cholesky(A::AbstractMatrix{Float16}, ::RowMaximum; tol = 0.0, check::Bool = true)
-    X = cholesky!(cholcopy(A), RowMaximum(); tol, check)
+    X = _cholesky(cholcopy(A), RowMaximum(); tol, check)
     return CholeskyPivoted{Float16}(X)
 end
 
@@ -557,7 +558,7 @@ issuccess(C::Union{Cholesky,CholeskyPivoted}) = C.info == 0
 
 adjoint(C::Union{Cholesky,CholeskyPivoted}) = C
 
-function show(io::IO, mime::MIME{Symbol("text/plain")}, C::Cholesky{<:Any,<:AbstractMatrix})
+function show(io::IO, mime::MIME{Symbol("text/plain")}, C::Cholesky)
     if issuccess(C)
         summary(io, C); println(io)
         println(io, "$(C.uplo) factor:")
@@ -567,7 +568,7 @@ function show(io::IO, mime::MIME{Symbol("text/plain")}, C::Cholesky{<:Any,<:Abst
     end
 end
 
-function show(io::IO, mime::MIME{Symbol("text/plain")}, C::CholeskyPivoted{<:Any,<:AbstractMatrix})
+function show(io::IO, mime::MIME{Symbol("text/plain")}, C::CholeskyPivoted)
     summary(io, C); println(io)
     println(io, "$(C.uplo) factor with rank $(rank(C)):")
     show(io, mime, C.uplo == 'U' ? C.U : C.L)
@@ -578,7 +579,7 @@ end
 ldiv!(C::Cholesky{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
     LAPACK.potrs!(C.uplo, C.factors, B)
 
-function ldiv!(C::Cholesky{<:Any,<:AbstractMatrix}, B::StridedVecOrMat)
+function ldiv!(C::Cholesky, B::AbstractVecOrMat)
     if C.uplo == 'L'
         return ldiv!(adjoint(LowerTriangular(C.factors)), ldiv!(LowerTriangular(C.factors), B))
     else
@@ -586,10 +587,10 @@ function ldiv!(C::Cholesky{<:Any,<:AbstractMatrix}, B::StridedVecOrMat)
     end
 end
 
-function ldiv!(C::CholeskyPivoted{T}, B::StridedVector{T}) where T<:BlasFloat
+function ldiv!(C::CholeskyPivoted{T,<:StridedMatrix}, B::StridedVector{T}) where T<:BlasFloat
     invpermute!(LAPACK.potrs!(C.uplo, C.factors, permute!(B, C.piv)), C.piv)
 end
-function ldiv!(C::CholeskyPivoted{T}, B::StridedMatrix{T}) where T<:BlasFloat
+function ldiv!(C::CholeskyPivoted{T,<:StridedMatrix}, B::StridedMatrix{T}) where T<:BlasFloat
     n = size(C, 1)
     for i=1:size(B, 2)
         permute!(view(B, 1:n, i), C.piv)
@@ -630,7 +631,7 @@ function ldiv!(C::CholeskyPivoted, B::AbstractMatrix)
     B
 end
 
-function rdiv!(B::AbstractMatrix, C::Cholesky{<:Any,<:AbstractMatrix})
+function rdiv!(B::AbstractMatrix, C::Cholesky)
     if C.uplo == 'L'
         return rdiv!(rdiv!(B, adjoint(LowerTriangular(C.factors))), LowerTriangular(C.factors))
     else
@@ -705,7 +706,7 @@ inv!(C::Cholesky{<:BlasFloat,<:StridedMatrix}) =
 
 inv(C::Cholesky{<:BlasFloat,<:StridedMatrix}) = inv!(copy(C))
 
-function inv(C::CholeskyPivoted)
+function inv(C::CholeskyPivoted{<:BlasFloat,<:StridedMatrix})
     ipiv = invperm(C.piv)
     copytri!(LAPACK.potri!(C.uplo, copy(C.factors)), C.uplo, true)[ipiv, ipiv]
 end
diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl
index d23dca5e6488e..683f7e45cb28d 100644
--- a/stdlib/LinearAlgebra/src/dense.jl
+++ b/stdlib/LinearAlgebra/src/dense.jl
@@ -136,7 +136,7 @@ function triu!(M::AbstractMatrix, k::Integer)
     m, n = size(M)
     for j in 1:min(n, m + k)
         for i in max(1, j - k + 1):m
-            M[i,j] = zero(M[i,j])
+            @inbounds M[i,j] = zero(M[i,j])
         end
     end
     M
@@ -198,13 +198,24 @@ function fillband!(A::AbstractMatrix{T}, x, l, u) where T
     return A
 end
 
-diagind(m::Integer, n::Integer, k::Integer=0) =
+diagind(m::Integer, n::Integer, k::Integer=0) = diagind(IndexLinear(), m, n, k)
+diagind(::IndexLinear, m::Integer, n::Integer, k::Integer=0) =
     k <= 0 ? range(1-k, step=m+1, length=min(m+k, n)) : range(k*m+1, step=m+1, length=min(m, n-k))
 
+function diagind(::IndexCartesian, m::Integer, n::Integer, k::Integer=0)
+    Cstart = CartesianIndex(1 + max(0,-k), 1 + max(0,k))
+    Cstep = CartesianIndex(1, 1)
+    length = max(0, k <= 0 ? min(m+k, n) : min(m, n-k))
+    StepRangeLen(Cstart, Cstep, length)
+end
+
 """
-    diagind(M, k::Integer=0)
+    diagind(M::AbstractMatrix, [k::Integer=0,] indstyle::IndexStyle = IndexLinear())
 
 An `AbstractRange` giving the indices of the `k`th diagonal of the matrix `M`.
+Optionally, an index style may be specified which determines the type of the range returned.
+If `indstyle isa IndexLinear` (default), this returns an `AbstractRange{Integer}`.
+On the other hand, if `indstyle isa IndexCartesian`, this returns an `AbstractRange{CartesianIndex{2}}`.
 
 See also: [`diag`](@ref), [`diagm`](@ref), [`Diagonal`](@ref).
 
@@ -220,11 +231,13 @@ julia> diagind(A,-1)
 2:4:6
 ```
 """
-function diagind(A::AbstractMatrix, k::Integer=0)
+function diagind(A::AbstractMatrix, k::Integer=0, indexstyle::IndexStyle = IndexLinear())
     require_one_based_indexing(A)
-    diagind(size(A,1), size(A,2), k)
+    diagind(indexstyle, size(A,1), size(A,2), k)
 end
 
+diagind(A::AbstractMatrix, indexstyle::IndexStyle) = diagind(A, 0, indexstyle)
+
 """
     diag(M, k::Integer=0)
 
@@ -246,7 +259,7 @@ julia> diag(A,1)
  6
 ```
 """
-diag(A::AbstractMatrix, k::Integer=0) = A[diagind(A,k)]
+diag(A::AbstractMatrix, k::Integer=0) = A[diagind(A, k, IndexStyle(A))]
 
 """
     diagm(kv::Pair{<:Integer,<:AbstractVector}...)
@@ -344,33 +357,73 @@ diagm(m::Integer, n::Integer, v::AbstractVector) = diagm(m, n, 0 => v)
 function tr(A::Matrix{T}) where T
     n = checksquare(A)
     t = zero(T)
-    for i=1:n
+    @inbounds @simd for i in 1:n
         t += A[i,i]
     end
     t
 end
 
+_kronsize(A::AbstractMatrix, B::AbstractMatrix) = map(*, size(A), size(B))
+_kronsize(A::AbstractMatrix, B::AbstractVector) = (size(A, 1)*length(B), size(A, 2))
+_kronsize(A::AbstractVector, B::AbstractMatrix) = (length(A)*size(B, 1), size(B, 2))
+
 """
     kron!(C, A, B)
 
-`kron!` is the in-place version of [`kron`](@ref). Computes `kron(A, B)` and stores the result in `C`
-overwriting the existing value of `C`.
-
-!!! tip
-    Bounds checking can be disabled by [`@inbounds`](@ref), but you need to take care of the shape
-    of `C`, `A`, `B` yourself.
+Computes the Kronecker product of `A` and `B` and stores the result in `C`,
+overwriting the existing content of `C`. This is the in-place version of [`kron`](@ref).
 
 !!! compat "Julia 1.6"
     This function requires Julia 1.6 or later.
 """
-@inline function kron!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix)
-    require_one_based_indexing(A, B)
-    @boundscheck (size(C) == (size(A,1)*size(B,1), size(A,2)*size(B,2))) || throw(DimensionMismatch())
-    m = 0
-    @inbounds for j = 1:size(A,2), l = 1:size(B,2), i = 1:size(A,1)
+function kron!(C::AbstractVecOrMat, A::AbstractVecOrMat, B::AbstractVecOrMat)
+    size(C) == _kronsize(A, B) || throw(DimensionMismatch("kron!"))
+    _kron!(C, A, B)
+end
+function kron!(c::AbstractVector, a::AbstractVector, b::AbstractVector)
+    length(c) == length(a) * length(b) || throw(DimensionMismatch("kron!"))
+    m = firstindex(c)
+    @inbounds for i in eachindex(a)
+        ai = a[i]
+        for k in eachindex(b)
+            c[m] = ai*b[k]
+            m += 1
+        end
+    end
+    return c
+end
+kron!(c::AbstractVecOrMat, a::AbstractVecOrMat, b::Number) = mul!(c, a, b)
+kron!(c::AbstractVecOrMat, a::Number, b::AbstractVecOrMat) = mul!(c, a, b)
+
+function _kron!(C, A::AbstractMatrix, B::AbstractMatrix)
+    m = firstindex(C)
+    @inbounds for j in axes(A,2), l in axes(B,2), i in axes(A,1)
         Aij = A[i,j]
-        for k = 1:size(B,1)
-            C[m += 1] = Aij*B[k,l]
+        for k in axes(B,1)
+            C[m] = Aij*B[k,l]
+            m += 1
+        end
+    end
+    return C
+end
+function _kron!(C, A::AbstractMatrix, b::AbstractVector)
+    m = firstindex(C)
+    @inbounds for j in axes(A,2), i in axes(A,1)
+        Aij = A[i,j]
+        for k in eachindex(b)
+            C[m] = Aij*b[k]
+            m += 1
+        end
+    end
+    return C
+end
+function _kron!(C, a::AbstractVector, B::AbstractMatrix)
+    m = firstindex(C)
+    @inbounds for l in axes(B,2), i in eachindex(a)
+        ai = a[i]
+        for k in axes(B,1)
+            C[m] = ai*B[k,l]
+            m += 1
         end
     end
     return C
@@ -379,7 +432,7 @@ end
 """
     kron(A, B)
 
-Kronecker tensor product of two vectors or two matrices.
+Computes the Kronecker product of two vectors, matrices or numbers.
 
 For real vectors `v` and `w`, the Kronecker product is related to the outer product by
 `kron(v,w) == vec(w * transpose(v))` or
@@ -422,31 +475,16 @@ julia> reshape(kron(v,w), (length(w), length(v)))
  5  10
 ```
 """
-function kron(a::AbstractMatrix{T}, b::AbstractMatrix{S}) where {T,S}
-    R = Matrix{promote_op(*,T,S)}(undef, size(a,1)*size(b,1), size(a,2)*size(b,2))
-    return @inbounds kron!(R, a, b)
+function kron(A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S}) where {T,S}
+    R = Matrix{promote_op(*,T,S)}(undef, _kronsize(A, B))
+    return kron!(R, A, B)
 end
-
-kron!(c::AbstractVecOrMat, a::AbstractVecOrMat, b::Number) = mul!(c, a, b)
-kron!(c::AbstractVecOrMat, a::Number, b::AbstractVecOrMat) = mul!(c, a, b)
-
-Base.@propagate_inbounds function kron!(c::AbstractVector, a::AbstractVector, b::AbstractVector)
-    C = reshape(c, length(a)*length(b), 1)
-    A = reshape(a ,length(a), 1)
-    B = reshape(b, length(b), 1)
-    kron!(C, A, B)
-    return c
+function kron(a::AbstractVector{T}, b::AbstractVector{S}) where {T,S}
+    c = Vector{promote_op(*,T,S)}(undef, length(a)*length(b))
+    return kron!(c, a, b)
 end
-
-Base.@propagate_inbounds kron!(C::AbstractMatrix, a::AbstractMatrix, b::AbstractVector) = kron!(C, a, reshape(b, length(b), 1))
-Base.@propagate_inbounds kron!(C::AbstractMatrix, a::AbstractVector, b::AbstractMatrix) = kron!(C, reshape(a, length(a), 1), b)
-
 kron(a::Number, b::Union{Number, AbstractVecOrMat}) = a * b
 kron(a::AbstractVecOrMat, b::Number) = a * b
-kron(a::AbstractVector, b::AbstractVector) = vec(kron(reshape(a ,length(a), 1), reshape(b, length(b), 1)))
-kron(a::AbstractMatrix, b::AbstractVector) = kron(a, reshape(b, length(b), 1))
-kron(a::AbstractVector, b::AbstractMatrix) = kron(reshape(a, length(a), 1), b)
-
 kron(a::AdjointAbsVec, b::AdjointAbsVec) = adjoint(kron(adjoint(a), adjoint(b)))
 kron(a::AdjOrTransAbsVec, b::AdjOrTransAbsVec) = transpose(kron(transpose(a), transpose(b)))
 
@@ -465,7 +503,7 @@ end
 function schurpow(A::AbstractMatrix, p)
     if istriu(A)
         # Integer part
-        retmat = A ^ floor(p)
+        retmat = A ^ floor(Integer, p)
         # Real part
         if p - floor(p) == 0.5
             # special case: A^0.5 === sqrt(A)
@@ -476,7 +514,7 @@ function schurpow(A::AbstractMatrix, p)
     else
         S,Q,d = Schur{Complex}(schur(A))
         # Integer part
-        R = S ^ floor(p)
+        R = S ^ floor(Integer, p)
         # Real part
         if p - floor(p) == 0.5
             # special case: A^0.5 === sqrt(A)
@@ -566,10 +604,9 @@ julia> exp(A)
  0.0      2.71828
 ```
 """
-exp(A::StridedMatrix{<:BlasFloat}) = exp!(copy(A))
-exp(A::StridedMatrix{<:Union{Integer,Complex{<:Integer}}}) = exp!(float.(A))
-exp(A::Adjoint{<:Any,<:AbstractMatrix}) = adjoint(exp(parent(A)))
-exp(A::Transpose{<:Any,<:AbstractMatrix}) = transpose(exp(parent(A)))
+exp(A::AbstractMatrix) = exp!(copy_similar(A, eigtype(eltype(A))))
+exp(A::AdjointAbsMat) = adjoint(exp(parent(A)))
+exp(A::TransposeAbsMat) = transpose(exp(parent(A)))
 
 """
     cis(A::AbstractMatrix)
@@ -697,7 +734,7 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
         V = mul!(tmp2, A6, tmp1, true, true)
 
         tmp1 .= V .+ U
-        tmp2 .= V .- U # tmp2 aleady contained V but this seems more readable
+        tmp2 .= V .- U # tmp2 already contained V but this seems more readable
         X = LAPACK.gesv!(tmp2, tmp1)[1] # X now contains r_13 in Higham 2008
 
         if s > 0
@@ -730,7 +767,7 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
 end
 
 ## Swap rows i and j and columns i and j in X
-function rcswap!(i::Integer, j::Integer, X::StridedMatrix{<:Number})
+function rcswap!(i::Integer, j::Integer, X::AbstractMatrix{<:Number})
     for k = 1:size(X,1)
         X[k,i], X[k,j] = X[k,j], X[k,i]
     end
@@ -740,7 +777,7 @@ function rcswap!(i::Integer, j::Integer, X::StridedMatrix{<:Number})
 end
 
 """
-    log(A::StridedMatrix)
+    log(A::AbstractMatrix)
 
 If `A` has no negative real eigenvalue, compute the principal matrix logarithm of `A`, i.e.
 the unique matrix ``X`` such that ``e^X = A`` and ``-\\pi < Im(\\lambda) < \\pi`` for all
@@ -771,7 +808,7 @@ julia> log(A)
  0.0  1.0
 ```
 """
-function log(A::StridedMatrix)
+function log(A::AbstractMatrix)
     # If possible, use diagonalization
     if ishermitian(A)
         logHermA = log(Hermitian(A))
@@ -799,8 +836,8 @@ function log(A::StridedMatrix)
     end
 end
 
-log(A::Adjoint{<:Any,<:AbstractMatrix}) = adjoint(log(parent(A)))
-log(A::Transpose{<:Any,<:AbstractMatrix}) = transpose(log(parent(A)))
+log(A::AdjointAbsMat) = adjoint(log(parent(A)))
+log(A::TransposeAbsMat) = transpose(log(parent(A)))
 
 """
     sqrt(A::AbstractMatrix)
@@ -848,10 +885,12 @@ julia> sqrt(A)
  0.0  2.0
 ```
 """
-sqrt(::StridedMatrix)
+sqrt(::AbstractMatrix)
 
-function sqrt(A::StridedMatrix{T}) where {T<:Union{Real,Complex}}
-    if ishermitian(A)
+function sqrt(A::AbstractMatrix{T}) where {T<:Union{Real,Complex}}
+    if checksquare(A) == 0
+        return copy(A)
+    elseif ishermitian(A)
         sqrtHermA = sqrt(Hermitian(A))
         return ishermitian(sqrtHermA) ? copytri!(parent(sqrtHermA), 'U', true) : parent(sqrtHermA)
     elseif istriu(A)
@@ -878,19 +917,65 @@ function sqrt(A::StridedMatrix{T}) where {T<:Union{Real,Complex}}
     end
 end
 
-sqrt(A::Adjoint{<:Any,<:AbstractMatrix}) = adjoint(sqrt(parent(A)))
-sqrt(A::Transpose{<:Any,<:AbstractMatrix}) = transpose(sqrt(parent(A)))
+sqrt(A::AdjointAbsMat) = adjoint(sqrt(parent(A)))
+sqrt(A::TransposeAbsMat) = transpose(sqrt(parent(A)))
+
+"""
+    cbrt(A::AbstractMatrix{<:Real})
+
+Computes the real-valued cube root of a real-valued matrix `A`. If `T = cbrt(A)`, then
+we have `T*T*T ≈ A`, see example given below.
+
+If `A` is symmetric, i.e., of type `HermOrSym{<:Real}`, then ([`eigen`](@ref)) is used to
+find the cube root. Otherwise, a specialized version of the p-th root algorithm [^S03] is
+utilized, which exploits the real-valued Schur decomposition ([`schur`](@ref))
+to compute the cube root.
+
+[^S03]:
+
+    Matthew I. Smith, "A Schur Algorithm for Computing Matrix pth Roots",
+    SIAM Journal on Matrix Analysis and Applications, vol. 24, 2003, pp. 971–989.
+    [doi:10.1137/S0895479801392697](https://doi.org/10.1137/s0895479801392697)
+
+# Examples
+```jldoctest
+julia> A = [0.927524 -0.15857; -1.3677 -1.01172]
+2×2 Matrix{Float64}:
+  0.927524  -0.15857
+ -1.3677    -1.01172
+
+julia> T = cbrt(A)
+2×2 Matrix{Float64}:
+  0.910077  -0.151019
+ -1.30257   -0.936818
+
+julia> T*T*T ≈ A
+true
+```
+"""
+function cbrt(A::AbstractMatrix{<:Real})
+    if checksquare(A) == 0
+        return copy(A)
+    elseif issymmetric(A)
+        return cbrt(Symmetric(A, :U))
+    else
+        S = schur(A)
+        return S.Z * _cbrt_quasi_triu!(S.T) * S.Z'
+    end
+end
+
+# Cube roots of adjoint and transpose matrices
+cbrt(A::AdjointAbsMat) = adjoint(cbrt(parent(A)))
+cbrt(A::TransposeAbsMat) = transpose(cbrt(parent(A)))
 
 function inv(A::StridedMatrix{T}) where T
     checksquare(A)
-    S = typeof((one(T)*zero(T) + one(T)*zero(T))/one(T))
-    AA = convert(AbstractArray{S}, A)
-    if istriu(AA)
-        Ai = triu!(parent(inv(UpperTriangular(AA))))
-    elseif istril(AA)
-        Ai = tril!(parent(inv(LowerTriangular(AA))))
+    if istriu(A)
+        Ai = triu!(parent(inv(UpperTriangular(A))))
+    elseif istril(A)
+        Ai = tril!(parent(inv(LowerTriangular(A))))
     else
-        Ai = inv!(lu(AA))
+        Ai = inv!(lu(A))
         Ai = convert(typeof(parent(Ai)), Ai)
     end
     return Ai
@@ -1316,7 +1401,7 @@ julia> factorize(A) # factorize will check to see that A is already factorized
 This returns a `5×5 Bidiagonal{Float64}`, which can now be passed to other linear algebra functions
 (e.g. eigensolvers) which will use specialized methods for `Bidiagonal` types.
 """
-function factorize(A::StridedMatrix{T}) where T
+function factorize(A::AbstractMatrix{T}) where T
     m, n = size(A)
     if m == n
         if m == 1 return A[1] end
@@ -1412,7 +1497,7 @@ The default relative tolerance is `n*ϵ`, where `n` is the size of the smallest
 dimension of `M`, and `ϵ` is the [`eps`](@ref) of the element type of `M`.
 
 For inverting dense ill-conditioned matrices in a least-squares sense,
-`rtol = sqrt(eps(real(float(one(eltype(M))))))` is recommended.
+`rtol = sqrt(eps(real(float(oneunit(eltype(M))))))` is recommended.
 
 For more information, see [^issue8859], [^B96], [^S84], [^KY88].
 
@@ -1442,9 +1527,9 @@ julia> M * N
 
 [^KY88]: Konstantinos Konstantinides and Kung Yao, "Statistical analysis of effective singular values in matrix rank determination", IEEE Transactions on Acoustics, Speech and Signal Processing, 36(5), 1988, 757-763. [doi:10.1109/29.1585](https://doi.org/10.1109/29.1585)
 """
-function pinv(A::AbstractMatrix{T}; atol::Real = 0.0, rtol::Real = (eps(real(float(one(T))))*min(size(A)...))*iszero(atol)) where T
+function pinv(A::AbstractMatrix{T}; atol::Real = 0.0, rtol::Real = (eps(real(float(oneunit(T))))*min(size(A)...))*iszero(atol)) where T
     m, n = size(A)
-    Tout = typeof(zero(T)/sqrt(one(T) + one(T)))
+    Tout = typeof(zero(T)/sqrt(oneunit(T) + oneunit(T)))
     if m == 0 || n == 0
         return similar(A, Tout, (n, m))
     end
@@ -1459,10 +1544,10 @@ function pinv(A::AbstractMatrix{T}; atol::Real = 0.0, rtol::Real = (eps(real(flo
         return B
     end
     SVD         = svd(A)
-    tol         = max(rtol*maximum(SVD.S), atol)
+    tol2        = max(rtol*maximum(SVD.S), atol)
     Stype       = eltype(SVD.S)
     Sinv        = fill!(similar(A, Stype, length(SVD.S)), 0)
-    index       = SVD.S .> tol
+    index       = SVD.S .> tol2
     Sinv[index] .= pinv.(view(SVD.S, index))
     return SVD.Vt' * (Diagonal(Sinv) * SVD.U')
 end
@@ -1512,13 +1597,13 @@ julia> nullspace(M, atol=0.95)
  1.0
 ```
 """
-function nullspace(A::AbstractVecOrMat; atol::Real = 0.0, rtol::Real = (min(size(A, 1), size(A, 2))*eps(real(float(one(eltype(A))))))*iszero(atol))
+function nullspace(A::AbstractVecOrMat; atol::Real = 0.0, rtol::Real = (min(size(A, 1), size(A, 2))*eps(real(float(oneunit(eltype(A))))))*iszero(atol))
     m, n = size(A, 1), size(A, 2)
     (m == 0 || n == 0) && return Matrix{eigtype(eltype(A))}(I, n, n)
     SVD = svd(A; full=true)
     tol = max(atol, SVD.S[1]*rtol)
     indstart = sum(s -> s .> tol, SVD.S) + 1
-    return copy(SVD.Vt[indstart:end,:]')
+    return copy((@view SVD.Vt[indstart:end,:])')
 end
 
 """
@@ -1580,21 +1665,22 @@ julia> X = sylvester(A, B, C)
  -4.46667   1.93333
   3.73333  -1.8
 
-julia> A*X + X*B + C
-2×2 Matrix{Float64}:
-  2.66454e-15  1.77636e-15
- -3.77476e-15  4.44089e-16
+julia> A*X + X*B ≈ -C
+true
 ```
 """
-function sylvester(A::StridedMatrix{T},B::StridedMatrix{T},C::StridedMatrix{T}) where T<:BlasFloat
+function sylvester(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix)
+    T = promote_type(float(eltype(A)), float(eltype(B)), float(eltype(C)))
+    return sylvester(copy_similar(A, T), copy_similar(B, T), copy_similar(C, T))
+end
+function sylvester(A::AbstractMatrix{T}, B::AbstractMatrix{T}, C::AbstractMatrix{T}) where {T<:BlasFloat}
     RA, QA = schur(A)
     RB, QB = schur(B)
-
-    D = -(adjoint(QA) * (C*QB))
-    Y, scale = LAPACK.trsyl!('N','N', RA, RB, D)
-    rmul!(QA*(Y * adjoint(QB)), inv(scale))
+    D = QA' * C * QB
+    D .= .-D
+    Y, scale = LAPACK.trsyl!('N', 'N', RA, RB, D)
+    rmul!(QA * Y * QB', inv(scale))
 end
-sylvester(A::StridedMatrix{T}, B::StridedMatrix{T}, C::StridedMatrix{T}) where {T<:Integer} = sylvester(float(A), float(B), float(C))
 
 Base.@propagate_inbounds function _sylvester_2x1!(A, B, C)
     b = B[1]
@@ -1652,18 +1738,19 @@ julia> X = lyap(A, B)
   0.5  -0.5
  -0.5   0.25
 
-julia> A*X + X*A' + B
-2×2 Matrix{Float64}:
- 0.0          6.66134e-16
- 6.66134e-16  8.88178e-16
+julia> A*X + X*A' ≈ -B
+true
 ```
 """
-function lyap(A::StridedMatrix{T}, C::StridedMatrix{T}) where {T<:BlasFloat}
+function lyap(A::AbstractMatrix, C::AbstractMatrix)
+    T = promote_type(float(eltype(A)), float(eltype(C)))
+    return lyap(copy_similar(A, T), copy_similar(C, T))
+end
+function lyap(A::AbstractMatrix{T}, C::AbstractMatrix{T}) where {T<:BlasFloat}
     R, Q = schur(A)
-
-    D = -(adjoint(Q) * (C*Q))
+    D = Q' * C * Q
+    D .= .-D
     Y, scale = LAPACK.trsyl!('N', T <: Complex ? 'C' : 'T', R, R, D)
-    rmul!(Q*(Y * adjoint(Q)), inv(scale))
+    rmul!(Q * Y * Q', inv(scale))
 end
-lyap(A::StridedMatrix{T}, C::StridedMatrix{T}) where {T<:Integer} = lyap(float(A), float(C))
 lyap(a::Union{Real,Complex}, c::Union{Real,Complex}) = -c/(2real(a))
diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl
index fab0f36660c46..bd2958d1b222b 100644
--- a/stdlib/LinearAlgebra/src/diagonal.jl
+++ b/stdlib/LinearAlgebra/src/diagonal.jl
@@ -24,13 +24,14 @@ end
 """
     Diagonal(V::AbstractVector)
 
-Construct a matrix with `V` as its diagonal.
+Construct a lazy matrix with `V` as its diagonal.
 
-See also [`diag`](@ref), [`diagm`](@ref).
+See also [`UniformScaling`](@ref) for the lazy identity matrix `I`,
+[`diagm`](@ref) to make a dense matrix, and [`diag`](@ref) to extract diagonal elements.
 
 # Examples
 ```jldoctest
-julia> Diagonal([1, 10, 100])
+julia> d = Diagonal([1, 10, 100])
 3×3 Diagonal{$Int, Vector{$Int}}:
  1   ⋅    ⋅
  ⋅  10    ⋅
@@ -40,6 +41,31 @@ julia> diagm([7, 13])
 2×2 Matrix{$Int}:
  7   0
  0  13
+
+julia> ans + I
+2×2 Matrix{Int64}:
+ 8   0
+ 0  14
+
+julia> I(2)
+2×2 Diagonal{Bool, Vector{Bool}}:
+ 1  ⋅
+ ⋅  1
+```
+
+!!! note
+    A one-column matrix is not treated like a vector, but instead calls the
+    method `Diagonal(A::AbstractMatrix)` which extracts 1-element `diag(A)`:
+
+```jldoctest
+julia> A = transpose([7.0 13.0])
+2×1 transpose(::Matrix{Float64}) with eltype Float64:
+  7.0
+ 13.0
+
+julia> Diagonal(A)
+1×1 Diagonal{Float64, Vector{Float64}}:
+ 7.0
 ```
 """
 Diagonal(V::AbstractVector)
@@ -47,41 +73,52 @@ Diagonal(V::AbstractVector)
 """
     Diagonal(A::AbstractMatrix)
 
-Construct a matrix from the diagonal of `A`.
+Construct a matrix from the principal diagonal of `A`.
+The input matrix `A` may be rectangular, but the output will
+be square.
 
 # Examples
 ```jldoctest
-julia> A = permutedims(reshape(1:15, 5, 3))
-3×5 Matrix{Int64}:
-  1   2   3   4   5
-  6   7   8   9  10
- 11  12  13  14  15
+julia> A = [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> D = Diagonal(A)
+2×2 Diagonal{Int64, Vector{Int64}}:
+ 1  ⋅
+ ⋅  4
+
+julia> A = [1 2 3; 4 5 6]
+2×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
 
 julia> Diagonal(A)
-3×3 Diagonal{$Int, Vector{$Int}}:
- 1  ⋅   ⋅
- ⋅  7   ⋅
- ⋅  ⋅  13
-
-julia> diag(A, 2)
-3-element Vector{$Int}:
-  3
-  9
- 15
+2×2 Diagonal{Int64, Vector{Int64}}:
+ 1  ⋅
+ ⋅  5
 ```
 """
 Diagonal(A::AbstractMatrix) = Diagonal(diag(A))
+Diagonal{T}(A::AbstractMatrix) where T = Diagonal{T}(diag(A))
+function convert(::Type{T}, A::AbstractMatrix) where T<:Diagonal
+    checksquare(A)
+    isdiag(A) ? T(A) : throw(InexactError(:convert, T, A))
+end
 
 Diagonal(D::Diagonal) = D
 Diagonal{T}(D::Diagonal{T}) where {T} = D
 Diagonal{T}(D::Diagonal) where {T} = Diagonal{T}(D.diag)
 
 AbstractMatrix{T}(D::Diagonal) where {T} = Diagonal{T}(D)
+AbstractMatrix{T}(D::Diagonal{T}) where {T} = copy(D)
 Matrix(D::Diagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(D)
 Array(D::Diagonal{T}) where {T} = Matrix(D)
 function Matrix{T}(D::Diagonal) where {T}
     n = size(D, 1)
-    B = zeros(T, n, n)
+    B = Matrix{T}(undef, n, n)
+    n > 1 && fill!(B, zero(T))
     @inbounds for i in 1:n
         B[i,i] = D.diag[i]
     end
@@ -96,17 +133,44 @@ Construct an uninitialized `Diagonal{T}` of length `n`. See `undef`.
 Diagonal{T}(::UndefInitializer, n::Integer) where T = Diagonal(Vector{T}(undef, n))
 
 similar(D::Diagonal, ::Type{T}) where {T} = Diagonal(similar(D.diag, T))
-similar(::Diagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = zeros(T, dims...)
+similar(D::Diagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(D.diag, T, dims)
 
 copyto!(D1::Diagonal, D2::Diagonal) = (copyto!(D1.diag, D2.diag); D1)
 
 size(D::Diagonal) = (n = length(D.diag); (n,n))
 
-function size(D::Diagonal,d::Integer)
-    if d<1
-        throw(ArgumentError("dimension must be ≥ 1, got $d"))
+axes(D::Diagonal) = (ax = axes(D.diag, 1); (ax, ax))
+
+@inline function Base.isassigned(D::Diagonal, i::Int, j::Int)
+    @boundscheck checkbounds(Bool, D, i, j) || return false
+    if i == j
+        @inbounds r = isassigned(D.diag, i)
+    else
+        r = true
     end
-    return d<=2 ? length(D.diag) : 1
+    r
+end
+
+@inline function Base.isstored(D::Diagonal, i::Int, j::Int)
+    @boundscheck checkbounds(D, i, j)
+    if i == j
+        @inbounds r = Base.isstored(D.diag, i)
+    else
+        r = false
+    end
+    r
+end
+
+function Base.minimum(D::Diagonal{T}) where T <: Number
+    mindiag = minimum(D.diag)
+    size(D, 1) > 1 && return (min(zero(T), mindiag))
+    return mindiag
+end
+
+function Base.maximum(D::Diagonal{T}) where T <: Number
+    maxdiag = Base.maximum(D.diag)
+    size(D, 1) > 1 && return (max(zero(T), maxdiag))
+    return maxdiag
 end
 
 @inline function getindex(D::Diagonal, i::Int, j::Int)
@@ -139,6 +203,8 @@ end
 
 parent(D::Diagonal) = D.diag
 
+copy(D::Diagonal) = Diagonal(copy(D.diag))
+
 ishermitian(D::Diagonal{<:Real}) = true
 ishermitian(D::Diagonal{<:Number}) = isreal(D.diag)
 ishermitian(D::Diagonal) = all(ishermitian, D.diag)
@@ -245,52 +311,24 @@ function (*)(D::Diagonal, V::AbstractVector)
 end
 
 (*)(A::AbstractMatrix, D::Diagonal) =
+    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag))), A, D)
+(*)(A::HermOrSym, D::Diagonal) =
     mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), A, D)
 (*)(D::Diagonal, A::AbstractMatrix) =
+    mul!(similar(A, promote_op(*, eltype(D.diag), eltype(A))), D, A)
+(*)(D::Diagonal, A::HermOrSym) =
     mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), D, A)
 
 rmul!(A::AbstractMatrix, D::Diagonal) = @inline mul!(A, A, D)
 lmul!(D::Diagonal, B::AbstractVecOrMat) = @inline mul!(B, D, B)
 
-#TODO: It seems better to call (D' * adjA')' directly?
-function *(adjA::Adjoint{<:Any,<:AbstractMatrix}, D::Diagonal)
-    A = adjA.parent
-    Ac = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
-    adjoint!(Ac, A)
-    rmul!(Ac, D)
-end
-
-function *(transA::Transpose{<:Any,<:AbstractMatrix}, D::Diagonal)
-    A = transA.parent
-    At = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
-    transpose!(At, A)
-    rmul!(At, D)
-end
-
-*(D::Diagonal, adjQ::Adjoint{<:Any,<:Union{QRCompactWYQ,QRPackedQ}}) =
-    rmul!(Array{promote_type(eltype(D), eltype(adjQ))}(D), adjQ)
-
-function *(D::Diagonal, adjA::Adjoint{<:Any,<:AbstractMatrix})
-    A = adjA.parent
-    Ac = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
-    adjoint!(Ac, A)
-    lmul!(D, Ac)
-end
-
-function *(D::Diagonal, transA::Transpose{<:Any,<:AbstractMatrix})
-    A = transA.parent
-    At = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
-    transpose!(At, A)
-    lmul!(D, At)
-end
-
-@inline function __muldiag!(out, D::Diagonal, B, alpha, beta)
-    require_one_based_indexing(B)
-    require_one_based_indexing(out)
+function __muldiag!(out, D::Diagonal, B, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
+    require_one_based_indexing(out, B)
+    alpha, beta = _add.alpha, _add.beta
     if iszero(alpha)
         _rmul_or_fill!(out, beta)
     else
-        if iszero(beta)
+        if bis0
             @inbounds for j in axes(B, 2)
                 @simd for i in axes(B, 1)
                     out[i,j] = D.diag[i] * B[i,j] * alpha
@@ -306,13 +344,13 @@ end
     end
     return out
 end
-@inline function __muldiag!(out, A, D::Diagonal, alpha, beta)
-    require_one_based_indexing(A)
-    require_one_based_indexing(out)
+function __muldiag!(out, A, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
+    require_one_based_indexing(out, A)
+    alpha, beta = _add.alpha, _add.beta
     if iszero(alpha)
         _rmul_or_fill!(out, beta)
     else
-        if iszero(beta)
+        if bis0
             @inbounds for j in axes(A, 2)
                 dja = D.diag[j] * alpha
                 @simd for i in axes(A, 1)
@@ -330,13 +368,14 @@ end
     end
     return out
 end
-@inline function __muldiag!(out::Diagonal, D1::Diagonal, D2::Diagonal, alpha, beta)
+function __muldiag!(out::Diagonal, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
     d1 = D1.diag
     d2 = D2.diag
+    alpha, beta = _add.alpha, _add.beta
     if iszero(alpha)
         _rmul_or_fill!(out.diag, beta)
     else
-        if iszero(beta)
+        if bis0
             @inbounds @simd for i in eachindex(out.diag)
                 out.diag[i] = d1[i] * d2[i] * alpha
             end
@@ -348,8 +387,9 @@ end
     end
     return out
 end
-@inline function __muldiag!(out, D1::Diagonal, D2::Diagonal, alpha, beta)
+function __muldiag!(out, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
     require_one_based_indexing(out)
+    alpha, beta = _add.alpha, _add.beta
     mA = size(D1, 1)
     d1 = D1.diag
     d2 = D2.diag
@@ -362,39 +402,38 @@ end
     return out
 end
 
-@inline function _muldiag!(out, A, B, alpha, beta)
+function _mul_diag!(out, A, B, _add)
     _muldiag_size_check(out, A, B)
-    __muldiag!(out, A, B, alpha, beta)
+    __muldiag!(out, A, B, _add)
     return out
 end
 
-# Get ambiguous method if try to unify AbstractVector/AbstractMatrix here using AbstractVecOrMat
-@inline mul!(out::AbstractVector, D::Diagonal, V::AbstractVector, alpha::Number, beta::Number) =
-    _muldiag!(out, D, V, alpha, beta)
-@inline mul!(out::AbstractMatrix, D::Diagonal, B::AbstractMatrix, alpha::Number, beta::Number) =
-    _muldiag!(out, D, B, alpha, beta)
-@inline mul!(out::AbstractMatrix, D::Diagonal, B::Adjoint{<:Any,<:AbstractVecOrMat},
-             alpha::Number, beta::Number) = _muldiag!(out, D, B, alpha, beta)
-@inline mul!(out::AbstractMatrix, D::Diagonal, B::Transpose{<:Any,<:AbstractVecOrMat},
-             alpha::Number, beta::Number) = _muldiag!(out, D, B, alpha, beta)
-
-@inline mul!(out::AbstractMatrix, A::AbstractMatrix, D::Diagonal, alpha::Number, beta::Number) =
-    _muldiag!(out, A, D, alpha, beta)
-@inline mul!(out::AbstractMatrix, A::Adjoint{<:Any,<:AbstractVecOrMat}, D::Diagonal,
-             alpha::Number, beta::Number) = _muldiag!(out, A, D, alpha, beta)
-@inline mul!(out::AbstractMatrix, A::Transpose{<:Any,<:AbstractVecOrMat}, D::Diagonal,
-             alpha::Number, beta::Number) = _muldiag!(out, A, D, alpha, beta)
-@inline mul!(C::Diagonal, Da::Diagonal, Db::Diagonal, alpha::Number, beta::Number) =
-    _muldiag!(C, Da, Db, alpha, beta)
-
-mul!(C::AbstractMatrix, Da::Diagonal, Db::Diagonal, alpha::Number, beta::Number) =
-    _muldiag!(C, Da, Db, alpha, beta)
-
-_init(op, A::AbstractArray{<:Number}, B::AbstractArray{<:Number}) =
-    (_ -> zero(typeof(op(oneunit(eltype(A)), oneunit(eltype(B))))))
-_init(op, A::AbstractArray, B::AbstractArray) = promote_op(op, eltype(A), eltype(B))
-
-/(A::AbstractVecOrMat, D::Diagonal) = _rdiv!(_init(/, A, D).(A), A, D)
+_mul!(out::AbstractVecOrMat, D::Diagonal, V::AbstractVector, _add) =
+    _mul_diag!(out, D, V, _add)
+_mul!(out::AbstractMatrix, D::Diagonal, B::AbstractMatrix, _add) =
+    _mul_diag!(out, D, B, _add)
+_mul!(out::AbstractMatrix, A::AbstractMatrix, D::Diagonal, _add) =
+    _mul_diag!(out, A, D, _add)
+_mul!(C::Diagonal, Da::Diagonal, Db::Diagonal, _add) =
+    _mul_diag!(C, Da, Db, _add)
+_mul!(C::AbstractMatrix, Da::Diagonal, Db::Diagonal, _add) =
+    _mul_diag!(C, Da, Db, _add)
+
+function (*)(Da::Diagonal, A::AbstractMatrix, Db::Diagonal)
+    _muldiag_size_check(Da, A)
+    _muldiag_size_check(A, Db)
+    return broadcast(*, Da.diag, A, permutedims(Db.diag))
+end
+
+function (*)(Da::Diagonal, Db::Diagonal, Dc::Diagonal)
+    _muldiag_size_check(Da, Db)
+    _muldiag_size_check(Db, Dc)
+    return Diagonal(Da.diag .* Db.diag .* Dc.diag)
+end
+
+/(A::AbstractVecOrMat, D::Diagonal) = _rdiv!(similar(A, _init_eltype(/, eltype(A), eltype(D))), A, D)
+/(A::HermOrSym, D::Diagonal) = _rdiv!(similar(A, _init_eltype(/, eltype(A), eltype(D)), size(A)), A, D)
+
 rdiv!(A::AbstractVecOrMat, D::Diagonal) = @inline _rdiv!(A, A, D)
 # avoid copy when possible via internal 3-arg backend
 function _rdiv!(B::AbstractVecOrMat, A::AbstractVecOrMat, D::Diagonal)
@@ -419,8 +458,8 @@ function \(D::Diagonal, B::AbstractVector)
     isnothing(j) || throw(SingularException(j))
     return D.diag .\ B
 end
-\(D::Diagonal, B::AbstractMatrix) =
-    ldiv!(_init(\, D, B).(B), D, B)
+\(D::Diagonal, B::AbstractMatrix) = ldiv!(similar(B, _init_eltype(\, eltype(D), eltype(B))), D, B)
+\(D::Diagonal, B::HermOrSym) = ldiv!(similar(B, _init_eltype(\, eltype(D), eltype(B)), size(B)), D, B)
 
 ldiv!(D::Diagonal, B::AbstractVecOrMat) = @inline ldiv!(B, D, B)
 function ldiv!(B::AbstractVecOrMat, D::Diagonal, A::AbstractVecOrMat)
@@ -557,22 +596,23 @@ for Tri in (:UpperTriangular, :LowerTriangular)
     # 3-arg ldiv!
     @eval ldiv!(C::$Tri, D::Diagonal, A::$Tri) = $Tri(ldiv!(C.data, D, A.data))
     @eval ldiv!(C::$Tri, D::Diagonal, A::$UTri) = $Tri(_setdiag!(ldiv!(C.data, D, A.data), inv, D.diag))
-    # 3-arg mul!: invoke 5-arg mul! rather than lmul!
-    @eval mul!(C::$Tri, A::Union{$Tri,$UTri}, D::Diagonal) = mul!(C, A, D, true, false)
+    # 3-arg mul! is disambiguated in special.jl
     # 5-arg mul!
-    @eval @inline mul!(C::$Tri, D::Diagonal, A::$Tri, α::Number, β::Number) = $Tri(mul!(C.data, D, A.data, α, β))
-    @eval @inline function mul!(C::$Tri, D::Diagonal, A::$UTri, α::Number, β::Number)
+    @eval _mul!(C::$Tri, D::Diagonal, A::$Tri, _add) = $Tri(mul!(C.data, D, A.data, _add.alpha, _add.beta))
+    @eval function _mul!(C::$Tri, D::Diagonal, A::$UTri, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
+        α, β = _add.alpha, _add.beta
         iszero(α) && return _rmul_or_fill!(C, β)
-        diag′ = iszero(β) ? nothing : diag(C)
+        diag′ = bis0 ? nothing : diag(C)
         data = mul!(C.data, D, A.data, α, β)
-        $Tri(_setdiag!(data, MulAddMul(α, β), D.diag, diag′))
+        $Tri(_setdiag!(data, _add, D.diag, diag′))
     end
-    @eval @inline mul!(C::$Tri, A::$Tri, D::Diagonal, α::Number, β::Number) = $Tri(mul!(C.data, A.data, D, α, β))
-    @eval @inline function mul!(C::$Tri, A::$UTri, D::Diagonal, α::Number, β::Number)
+    @eval _mul!(C::$Tri, A::$Tri, D::Diagonal, _add) = $Tri(mul!(C.data, A.data, D, _add.alpha, _add.beta))
+    @eval function _mul!(C::$Tri, A::$UTri, D::Diagonal, _add::MulAddMul{ais1,bis0}) where {ais1,bis0}
+        α, β = _add.alpha, _add.beta
         iszero(α) && return _rmul_or_fill!(C, β)
-        diag′ = iszero(β) ? nothing : diag(C)
+        diag′ = bis0 ? nothing : diag(C)
         data = mul!(C.data, A.data, D, α, β)
-        $Tri(_setdiag!(data, MulAddMul(α, β), D.diag, diag′))
+        $Tri(_setdiag!(data, _add, D.diag, diag′))
     end
 end
 
@@ -590,7 +630,21 @@ end
     return C
 end
 
-kron(A::Diagonal{<:Number}, B::Diagonal{<:Number}) = Diagonal(kron(A.diag, B.diag))
+kron(A::Diagonal, B::Diagonal) = Diagonal(kron(A.diag, B.diag))
+
+function kron(A::Diagonal, B::SymTridiagonal)
+    kdv = kron(diag(A), B.dv)
+    # We don't need to drop the last element
+    kev = kron(diag(A), _pushzero(_evview(B)))
+    SymTridiagonal(kdv, kev)
+end
+function kron(A::Diagonal, B::Tridiagonal)
+    # `_droplast!` is only guaranteed to work with `Vector`
+    kd = _makevector(kron(diag(A), B.d))
+    kdl = _droplast!(_makevector(kron(diag(A), _pushzero(B.dl))))
+    kdu = _droplast!(_makevector(kron(diag(A), _pushzero(B.du))))
+    Tridiagonal(kdl, kd, kdu)
+end
 
 @inline function kron!(C::AbstractMatrix, A::Diagonal, B::AbstractMatrix)
     require_one_based_indexing(B)
@@ -641,7 +695,8 @@ end
 conj(D::Diagonal) = Diagonal(conj(D.diag))
 transpose(D::Diagonal{<:Number}) = D
 transpose(D::Diagonal) = Diagonal(transpose.(D.diag))
-adjoint(D::Diagonal{<:Number}) = conj(D)
+adjoint(D::Diagonal{<:Number}) = Diagonal(vec(adjoint(D.diag)))
+adjoint(D::Diagonal{<:Number,<:Base.ReshapedArray{<:Number,1,<:Adjoint}}) = Diagonal(adjoint(parent(D.diag)))
 adjoint(D::Diagonal) = Diagonal(adjoint.(D.diag))
 permutedims(D::Diagonal) = D
 permutedims(D::Diagonal, perm) = (Base.checkdims_perm(D, D, perm); D)
@@ -674,10 +729,13 @@ for f in (:exp, :cis, :log, :sqrt,
     @eval $f(D::Diagonal) = Diagonal($f.(D.diag))
 end
 
+# Cube root of a real-valued diagonal matrix
+cbrt(A::Diagonal{<:Real}) = Diagonal(cbrt.(A.diag))
+
 function inv(D::Diagonal{T}) where T
-    Di = similar(D.diag, typeof(inv(zero(T))))
+    Di = similar(D.diag, typeof(inv(oneunit(T))))
     for i = 1:length(D.diag)
-        if D.diag[i] == zero(T)
+        if iszero(D.diag[i])
             throw(SingularException(i))
         end
         Di[i] = inv(D.diag[i])
@@ -686,30 +744,65 @@ function inv(D::Diagonal{T}) where T
 end
 
 function pinv(D::Diagonal{T}) where T
-    Di = similar(D.diag, typeof(inv(zero(T))))
+    Di = similar(D.diag, typeof(inv(oneunit(T))))
     for i = 1:length(D.diag)
-        isfinite(inv(D.diag[i])) ? Di[i]=inv(D.diag[i]) : Di[i]=zero(T)
+        if !iszero(D.diag[i])
+            invD = inv(D.diag[i])
+            if isfinite(invD)
+                Di[i] = invD
+                continue
+            end
+        end
+        # fallback
+        Di[i] = zero(T)
     end
     Diagonal(Di)
 end
 function pinv(D::Diagonal{T}, tol::Real) where T
-    Di = similar(D.diag, typeof(inv(zero(T))))
-    if( !isempty(D.diag) ) maxabsD = maximum(abs.(D.diag)) end
-    for i = 1:length(D.diag)
-        if( abs(D.diag[i]) > tol*maxabsD && isfinite(inv(D.diag[i])) )
-            Di[i]=inv(D.diag[i])
-        else
-            Di[i]=zero(T)
+    Di = similar(D.diag, typeof(inv(oneunit(T))))
+    if !isempty(D.diag)
+        maxabsD = maximum(abs, D.diag)
+        for i = 1:length(D.diag)
+            if abs(D.diag[i]) > tol*maxabsD
+                invD = inv(D.diag[i])
+                if isfinite(invD)
+                    Di[i] = invD
+                    continue
+                end
+            end
+            # fallback
+            Di[i] = zero(T)
         end
     end
     Diagonal(Di)
 end
 
+# TODO Docstrings for eigvals, eigvecs, eigen all mention permute, scale, sortby as keyword args
+# but not all of them below provide them. Do we need to fix that?
 #Eigensystem
 eigvals(D::Diagonal{<:Number}; permute::Bool=true, scale::Bool=true) = copy(D.diag)
 eigvals(D::Diagonal; permute::Bool=true, scale::Bool=true) =
-    [eigvals(x) for x in D.diag] #For block matrices, etc.
-eigvecs(D::Diagonal) = Matrix{eltype(D)}(I, size(D))
+    reduce(vcat, eigvals(x) for x in D.diag) #For block matrices, etc.
+function eigvecs(D::Diagonal{T}) where T<:AbstractMatrix
+    diag_vecs = [ eigvecs(x) for x in D.diag ]
+    matT = reduce((a,b) -> promote_type(typeof(a),typeof(b)), diag_vecs)
+    ncols_diag = [ size(x, 2) for x in D.diag ]
+    nrows = size(D, 1)
+    vecs = Matrix{Vector{eltype(matT)}}(undef, nrows, sum(ncols_diag))
+    for j in axes(D, 2), i in axes(D, 1)
+        jj = sum(view(ncols_diag,1:j-1))
+        if i == j
+            for k in 1:ncols_diag[j]
+                vecs[i,jj+k] = diag_vecs[i][:,k]
+            end
+        else
+            for k in 1:ncols_diag[j]
+                vecs[i,jj+k] = zeros(eltype(T), ncols_diag[i])
+            end
+        end
+    end
+    return vecs
+end
 function eigen(D::Diagonal; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=nothing)
     if any(!isfinite, D.diag)
         throw(ArgumentError("matrix contains Infs or NaNs"))
@@ -718,7 +811,7 @@ function eigen(D::Diagonal; permute::Bool=true, scale::Bool=true, sortby::Union{
     λ = eigvals(D)
     if !isnothing(sortby)
         p = sortperm(λ; alg=QuickSort, by=sortby)
-        λ = λ[p] # make a copy, otherwise this permutes D.diag
+        λ = λ[p]
         evecs = zeros(Td, size(D))
         @inbounds for i in eachindex(p)
             evecs[p[i],i] = one(Td)
@@ -728,6 +821,41 @@ function eigen(D::Diagonal; permute::Bool=true, scale::Bool=true, sortby::Union{
     end
     Eigen(λ, evecs)
 end
+function eigen(D::Diagonal{<:AbstractMatrix}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=nothing)
+    if any(any(!isfinite, x) for x in D.diag)
+        throw(ArgumentError("matrix contains Infs or NaNs"))
+    end
+    λ = eigvals(D)
+    evecs = eigvecs(D)
+    if !isnothing(sortby)
+        p = sortperm(λ; alg=QuickSort, by=sortby)
+        λ = λ[p]
+        evecs = evecs[:,p]
+    end
+    Eigen(λ, evecs)
+end
+function eigen(Da::Diagonal, Db::Diagonal; sortby::Union{Function,Nothing}=nothing)
+    if any(!isfinite, Da.diag) || any(!isfinite, Db.diag)
+        throw(ArgumentError("matrices contain Infs or NaNs"))
+    end
+    if any(iszero, Db.diag)
+        throw(ArgumentError("right-hand side diagonal matrix is singular"))
+    end
+    return GeneralizedEigen(eigen(Db \ Da; sortby)...)
+end
+function eigen(A::AbstractMatrix, D::Diagonal; sortby::Union{Function,Nothing}=nothing)
+    if any(iszero, D.diag)
+        throw(ArgumentError("right-hand side diagonal matrix is singular"))
+    end
+    if size(A, 1) == size(A, 2) && isdiag(A)
+        return eigen(Diagonal(A), D; sortby)
+    elseif all(isposdef, D.diag)
+        S = promote_type(eigtype(eltype(A)), eltype(D))
+        return eigen(A, cholesky(Diagonal{S}(D)); sortby)
+    else
+        return eigen!(D \ A; sortby)
+    end
+end
 
 #Singular system
 svdvals(D::Diagonal{<:Number}) = sort!(abs.(D.diag), rev = true)
@@ -749,12 +877,12 @@ function svd(D::Diagonal{T}) where {T<:Number}
 end
 
 # disambiguation methods: * and / of Diagonal and Adj/Trans AbsVec
-*(x::AdjointAbsVec, D::Diagonal) = Adjoint(map((t,s) -> t'*s, D.diag, parent(x)))
-*(x::TransposeAbsVec, D::Diagonal) = Transpose(map((t,s) -> transpose(t)*s, D.diag, parent(x)))
+*(u::AdjointAbsVec, D::Diagonal) = (D'u')'
+*(u::TransposeAbsVec, D::Diagonal) = transpose(transpose(D) * transpose(u))
 *(x::AdjointAbsVec,   D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
 *(x::TransposeAbsVec, D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
-/(u::AdjointAbsVec, D::Diagonal) = adjoint(adjoint(D) \ u.parent)
-/(u::TransposeAbsVec, D::Diagonal) = transpose(transpose(D) \ u.parent)
+/(u::AdjointAbsVec, D::Diagonal) = (D' \ u')'
+/(u::TransposeAbsVec, D::Diagonal) = transpose(transpose(D) \ transpose(u))
 # disambiguation methods: Call unoptimized version for user defined AbstractTriangular.
 *(A::AbstractTriangular, D::Diagonal) = @invoke *(A::AbstractMatrix, D::Diagonal)
 *(D::Diagonal, A::AbstractTriangular) = @invoke *(D::Diagonal, A::AbstractMatrix)
@@ -764,12 +892,15 @@ dot(x::AbstractVector, D::Diagonal, y::AbstractVector) = _mapreduce_prod(dot, x,
 dot(A::Diagonal, B::Diagonal) = dot(A.diag, B.diag)
 function dot(D::Diagonal, B::AbstractMatrix)
     size(D) == size(B) || throw(DimensionMismatch("Matrix sizes $(size(D)) and $(size(B)) differ"))
-    return dot(D.diag, view(B, diagind(B)))
+    return dot(D.diag, view(B, diagind(B, IndexStyle(B))))
 end
 
 dot(A::AbstractMatrix, B::Diagonal) = conj(dot(B, A))
 
 function _mapreduce_prod(f, x, D::Diagonal, y)
+    if !(length(x) == length(D.diag) == length(y))
+        throw(DimensionMismatch("x has length $(length(x)), D has size $(size(D)), and y has $(length(y))"))
+    end
     if isempty(x) && isempty(D) && isempty(y)
         return zero(promote_op(f, eltype(x), eltype(D), eltype(y)))
     else
@@ -794,8 +925,10 @@ end
 @deprecate cholesky!(A::Diagonal, ::Val{false}; check::Bool = true) cholesky!(A::Diagonal, NoPivot(); check) false
 @deprecate cholesky(A::Diagonal, ::Val{false}; check::Bool = true) cholesky(A::Diagonal, NoPivot(); check) false
 
-@inline cholcopy(A::Diagonal) = copymutable_oftype(A, choltype(A))
-@inline cholcopy(A::RealHermSymComplexHerm{<:Real,<:Diagonal}) = copymutable_oftype(A, choltype(A))
+inv(C::Cholesky{<:Any,<:Diagonal}) = Diagonal(map(inv∘abs2, C.factors.diag))
+
+cholcopy(A::Diagonal) = copymutable_oftype(A, choltype(A))
+cholcopy(A::RealHermSymComplexHerm{<:Any,<:Diagonal}) = Diagonal(copy_similar(diag(A), choltype(A)))
 
 function getproperty(C::Cholesky{<:Any,<:Diagonal}, d::Symbol)
     Cfactors = getfield(C, :factors)
diff --git a/stdlib/LinearAlgebra/src/eigen.jl b/stdlib/LinearAlgebra/src/eigen.jl
index 14de91a9180af..4ba540d42b261 100644
--- a/stdlib/LinearAlgebra/src/eigen.jl
+++ b/stdlib/LinearAlgebra/src/eigen.jl
@@ -173,7 +173,8 @@ function eigen!(A::StridedMatrix{T}; permute::Bool=true, scale::Bool=true, sortb
     n = size(A, 2)
     n == 0 && return Eigen(zeros(T, 0), zeros(T, 0, 0))
     ishermitian(A) && return eigen!(Hermitian(A), sortby=sortby)
-    eval, evec = LAPACK.geevx!(permute ? (scale ? 'B' : 'P') : (scale ? 'S' : 'N'), 'N', 'V', 'N', A)[[2,4]]
+    E = LAPACK.geevx!(permute ? (scale ? 'B' : 'P') : (scale ? 'S' : 'N'), 'N', 'V', 'N', A)
+    eval, evec = E[2], E[4]
     return Eigen(sorteig!(eval, evec, sortby)...)
 end
 
@@ -182,7 +183,9 @@ end
 
 Compute the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
 which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
-matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
+matrix `F.vectors`. This corresponds to solving an eigenvalue problem of the form
+`Ax =  λx`, where `A` is a matrix, `x` is an eigenvector, and `λ` is an eigenvalue.
+(The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
 
 Iterating the decomposition produces the components `F.values` and `F.vectors`.
 
@@ -233,18 +236,23 @@ true
 ```
 """
 function eigen(A::AbstractMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where T
-    AA = copymutable_oftype(A, eigtype(T))
-    isdiag(AA) && return eigen(Diagonal(AA); permute=permute, scale=scale, sortby=sortby)
-    return eigen!(AA; permute=permute, scale=scale, sortby=sortby)
+    _eigen(A; permute, scale, sortby)
 end
 function eigen(A::AbstractMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where {T <: Union{Float16,Complex{Float16}}}
-    AA = copymutable_oftype(A, eigtype(T))
-    isdiag(AA) && return eigen(Diagonal(AA); permute=permute, scale=scale, sortby=sortby)
-    A = eigen!(AA; permute, scale, sortby)
-    values = convert(AbstractVector{isreal(A.values) ? Float16 : Complex{Float16}}, A.values)
-    vectors = convert(AbstractMatrix{isreal(A.vectors) ? Float16 : Complex{Float16}}, A.vectors)
+    E = _eigen(A; permute, scale, sortby)
+    values = convert(AbstractVector{isreal(E.values) ? Float16 : Complex{Float16}}, E.values)
+    vectors = convert(AbstractMatrix{isreal(E.vectors) ? Float16 : Complex{Float16}}, E.vectors)
     return Eigen(values, vectors)
 end
+function _eigen(A::AbstractMatrix{T}; permute=true, scale=true, sortby=eigsortby) where {T}
+    isdiag(A) && return eigen(Diagonal{eigtype(T)}(diag(A)); sortby)
+    if ishermitian(A)
+        eigen!(eigencopy_oftype(Hermitian(A), eigtype(T)); sortby)
+    else
+        eigen!(eigencopy_oftype(A, eigtype(T)); permute, scale, sortby)
+    end
+end
+
 eigen(x::Number) = Eigen([x], fill(one(x), 1, 1))
 
 """
@@ -333,7 +341,7 @@ julia> eigvals(diag_matrix)
 ```
 """
 eigvals(A::AbstractMatrix{T}; kws...) where T =
-    eigvals!(copymutable_oftype(A, eigtype(T)); kws...)
+    eigvals!(eigencopy_oftype(A, eigtype(T)); kws...)
 
 """
 For a scalar input, `eigvals` will return a scalar.
@@ -436,7 +444,11 @@ det(A::Eigen) = prod(A.values)
 function eigen!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasReal
     issymmetric(A) && isposdef(B) && return eigen!(Symmetric(A), Symmetric(B), sortby=sortby)
     n = size(A, 1)
-    alphar, alphai, beta, _, vr = LAPACK.ggev!('N', 'V', A, B)
+    if LAPACK.version() < v"3.6.0"
+        alphar, alphai, beta, _, vr = LAPACK.ggev!('N', 'V', A, B)
+    else
+        alphar, alphai, beta, _, vr = LAPACK.ggev3!('N', 'V', A, B)
+    end
     iszero(alphai) && return GeneralizedEigen(sorteig!(alphar ./ beta, vr, sortby)...)
 
     vecs = zeros(Complex{T}, n, n)
@@ -458,7 +470,11 @@ end
 
 function eigen!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasComplex
     ishermitian(A) && isposdef(B) && return eigen!(Hermitian(A), Hermitian(B), sortby=sortby)
-    alpha, beta, _, vr = LAPACK.ggev!('N', 'V', A, B)
+    if LAPACK.version() < v"3.6.0"
+        alpha, beta, _, vr = LAPACK.ggev!('N', 'V', A, B)
+    else
+        alpha, beta, _, vr = LAPACK.ggev3!('N', 'V', A, B)
+    end
     return GeneralizedEigen(sorteig!(alpha./beta, vr, sortby)...)
 end
 
@@ -468,6 +484,8 @@ end
 Compute the generalized eigenvalue decomposition of `A` and `B`, returning a
 [`GeneralizedEigen`](@ref) factorization object `F` which contains the generalized eigenvalues in
 `F.values` and the generalized eigenvectors in the columns of the matrix `F.vectors`.
+This corresponds to solving a generalized eigenvalue problem of the form
+`Ax =  λBx`, where `A, B` are matrices, `x` is an eigenvector, and `λ` is an eigenvalue.
 (The `k`th generalized eigenvector can be obtained from the slice `F.vectors[:, k]`.)
 
 Iterating the decomposition produces the components `F.values` and `F.vectors`.
@@ -507,12 +525,20 @@ true
 ```
 """
 function eigen(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}; kws...) where {TA,TB}
-    S = promote_type(eigtype(TA),TB)
-    eigen!(copymutable_oftype(A, S), copymutable_oftype(B, S); kws...)
+    S = promote_type(eigtype(TA), TB)
+    eigen!(copy_similar(A, S), copy_similar(B, S); kws...)
 end
-
 eigen(A::Number, B::Number) = eigen(fill(A,1,1), fill(B,1,1))
 
+"""
+    LinearAlgebra.eigencopy_oftype(A::AbstractMatrix, ::Type{S})
+
+Creates a dense copy of `A` with eltype `S` by calling `copy_similar(A, S)`.
+In the case of `Hermitian` or `Symmetric` matrices additionally retains the wrapper,
+together with the `uplo` field.
+"""
+eigencopy_oftype(A, S) = copy_similar(A, S)
+
 """
     eigvals!(A, B; sortby) -> values
 
@@ -553,12 +579,20 @@ julia> B
 """
 function eigvals!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasReal
     issymmetric(A) && isposdef(B) && return sorteig!(eigvals!(Symmetric(A), Symmetric(B)), sortby)
-    alphar, alphai, beta, vl, vr = LAPACK.ggev!('N', 'N', A, B)
+    if LAPACK.version() < v"3.6.0"
+        alphar, alphai, beta, vl, vr = LAPACK.ggev!('N', 'N', A, B)
+    else
+        alphar, alphai, beta, vl, vr = LAPACK.ggev3!('N', 'N', A, B)
+    end
     return sorteig!((iszero(alphai) ? alphar : complex.(alphar, alphai))./beta, sortby)
 end
 function eigvals!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function,Nothing}=eigsortby) where T<:BlasComplex
     ishermitian(A) && isposdef(B) && return sorteig!(eigvals!(Hermitian(A), Hermitian(B)), sortby)
-    alpha, beta, vl, vr = LAPACK.ggev!('N', 'N', A, B)
+    if LAPACK.version() < v"3.6.0"
+        alpha, beta, vl, vr = LAPACK.ggev!('N', 'N', A, B)
+    else
+        alpha, beta, vl, vr = LAPACK.ggev3!('N', 'N', A, B)
+    end
     return sorteig!(alpha./beta, sortby)
 end
 
@@ -586,8 +620,8 @@ julia> eigvals(A,B)
 ```
 """
 function eigvals(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}; kws...) where {TA,TB}
-    S = promote_type(eigtype(TA),TB)
-    return eigvals!(copymutable_oftype(A, S), copymutable_oftype(B, S); kws...)
+    S = promote_type(eigtype(TA), TB)
+    return eigvals!(copy_similar(A, S), copy_similar(B, S); kws...)
 end
 
 """
diff --git a/stdlib/LinearAlgebra/src/exceptions.jl b/stdlib/LinearAlgebra/src/exceptions.jl
index ae29b8bc2f7b9..574decf79fc07 100644
--- a/stdlib/LinearAlgebra/src/exceptions.jl
+++ b/stdlib/LinearAlgebra/src/exceptions.jl
@@ -38,7 +38,7 @@ function Base.showerror(io::IO, ex::PosDefException)
     else
         print(io, "positive definite")
     end
-    print(io, "; Cholesky factorization failed.")
+    print(io, "; Factorization failed.")
 end
 
 struct RankDeficientException <: Exception
@@ -50,7 +50,7 @@ end
 
 Exception thrown when a matrix factorization/solve encounters a zero in a pivot (diagonal)
 position and cannot proceed.  This may *not* mean that the matrix is singular:
-it may be fruitful to switch to a diffent factorization such as pivoted LU
+it may be fruitful to switch to a different factorization such as pivoted LU
 that can re-order variables to eliminate spurious zero pivots.
 The `info` field indicates the location of (one of) the zero pivot(s).
 """
diff --git a/stdlib/LinearAlgebra/src/factorization.jl b/stdlib/LinearAlgebra/src/factorization.jl
index 83ec4e1187d40..6f5a631cf9164 100644
--- a/stdlib/LinearAlgebra/src/factorization.jl
+++ b/stdlib/LinearAlgebra/src/factorization.jl
@@ -11,9 +11,58 @@ matrix factorizations.
 """
 abstract type Factorization{T} end
 
+"""
+    AdjointFactorization
+
+Lazy wrapper type for the adjoint of the underlying `Factorization` object. Usually, the
+`AdjointFactorization` constructor should not be called directly, use
+[`adjoint(:: Factorization)`](@ref) instead.
+"""
+struct AdjointFactorization{T,S<:Factorization} <: Factorization{T}
+    parent::S
+end
+AdjointFactorization(F::Factorization) =
+    AdjointFactorization{Base.promote_op(adjoint,eltype(F)),typeof(F)}(F)
+
+"""
+    TransposeFactorization
+
+Lazy wrapper type for the transpose of the underlying `Factorization` object. Usually, the
+`TransposeFactorization` constructor should not be called directly, use
+[`transpose(:: Factorization)`](@ref) instead.
+"""
+struct TransposeFactorization{T,S<:Factorization} <: Factorization{T}
+    parent::S
+end
+TransposeFactorization(F::Factorization) =
+    TransposeFactorization{Base.promote_op(adjoint,eltype(F)),typeof(F)}(F)
+
 eltype(::Type{<:Factorization{T}}) where {T} = T
-size(F::Adjoint{<:Any,<:Factorization}) = reverse(size(parent(F)))
-size(F::Transpose{<:Any,<:Factorization}) = reverse(size(parent(F)))
+size(F::AdjointFactorization) = reverse(size(parent(F)))
+size(F::TransposeFactorization) = reverse(size(parent(F)))
+size(F::Union{AdjointFactorization,TransposeFactorization}, d::Integer) = d in (1, 2) ? size(F)[d] : 1
+parent(F::Union{AdjointFactorization,TransposeFactorization}) = F.parent
+
+"""
+    adjoint(F::Factorization)
+
+Lazy adjoint of the factorization `F`. By default, returns an
+[`AdjointFactorization`](@ref) wrapper.
+"""
+adjoint(F::Factorization) = AdjointFactorization(F)
+"""
+    transpose(F::Factorization)
+
+Lazy transpose of the factorization `F`. By default, returns a [`TransposeFactorization`](@ref),
+except for `Factorization`s with real `eltype`, in which case returns an [`AdjointFactorization`](@ref).
+"""
+transpose(F::Factorization) = TransposeFactorization(F)
+transpose(F::Factorization{<:Real}) = AdjointFactorization(F)
+adjoint(F::AdjointFactorization) = F.parent
+transpose(F::TransposeFactorization) = F.parent
+transpose(F::AdjointFactorization{<:Real}) = F.parent
+conj(A::TransposeFactorization) = adjoint(A.parent)
+conj(A::AdjointFactorization) = transpose(A.parent)
 
 checkpositivedefinite(info) = info == 0 || throw(PosDefException(info))
 checknonsingular(info, ::RowMaximum) = info == 0 || throw(SingularException(info))
@@ -32,12 +81,12 @@ Test that a factorization of a matrix succeeded.
 ```jldoctest
 julia> F = cholesky([1 0; 0 1]);
 
-julia> LinearAlgebra.issuccess(F)
+julia> issuccess(F)
 true
 
 julia> F = lu([1 0; 0 0]; check = false);
 
-julia> LinearAlgebra.issuccess(F)
+julia> issuccess(F)
 false
 ```
 """
@@ -54,70 +103,63 @@ function det(F::Factorization)
 end
 
 convert(::Type{T}, f::T) where {T<:Factorization} = f
-convert(::Type{T}, f::Factorization) where {T<:Factorization} = T(f)
+convert(::Type{T}, f::Factorization) where {T<:Factorization} = T(f)::T
 
-convert(::Type{T}, f::Factorization) where {T<:AbstractArray} = T(f)
+convert(::Type{T}, f::Factorization) where {T<:AbstractArray} = T(f)::T
 
 ### General promotion rules
 Factorization{T}(F::Factorization{T}) where {T} = F
-# This is a bit odd since the return is not a Factorization but it works well in generic code
-Factorization{T}(A::Adjoint{<:Any,<:Factorization}) where {T} =
+# This no longer looks odd since the return _is_ a Factorization!
+Factorization{T}(A::AdjointFactorization) where {T} =
     adjoint(Factorization{T}(parent(A)))
+Factorization{T}(A::TransposeFactorization) where {T} =
+    transpose(Factorization{T}(parent(A)))
 inv(F::Factorization{T}) where {T} = (n = size(F, 1); ldiv!(F, Matrix{T}(I, n, n)))
 
 Base.hash(F::Factorization, h::UInt) = mapreduce(f -> hash(getfield(F, f)), hash, 1:nfields(F); init=h)
 Base.:(==)(  F::T, G::T) where {T<:Factorization} = all(f -> getfield(F, f) == getfield(G, f), 1:nfields(F))
 Base.isequal(F::T, G::T) where {T<:Factorization} = all(f -> isequal(getfield(F, f), getfield(G, f)), 1:nfields(F))::Bool
 
-function Base.show(io::IO, x::Adjoint{<:Any,<:Factorization})
-    print(io, "Adjoint of ")
+function Base.show(io::IO, x::AdjointFactorization)
+    print(io, "adjoint of ")
     show(io, parent(x))
 end
-function Base.show(io::IO, x::Transpose{<:Any,<:Factorization})
-    print(io, "Transpose of ")
+function Base.show(io::IO, x::TransposeFactorization)
+    print(io, "transpose of ")
     show(io, parent(x))
 end
-function Base.show(io::IO, ::MIME"text/plain", x::Adjoint{<:Any,<:Factorization})
-    print(io, "Adjoint of ")
+function Base.show(io::IO, ::MIME"text/plain", x::AdjointFactorization)
+    print(io, "adjoint of ")
     show(io, MIME"text/plain"(), parent(x))
 end
-function Base.show(io::IO, ::MIME"text/plain", x::Transpose{<:Any,<:Factorization})
-    print(io, "Transpose of ")
+function Base.show(io::IO, ::MIME"text/plain", x::TransposeFactorization)
+    print(io, "transpose of ")
     show(io, MIME"text/plain"(), parent(x))
 end
 
+function (\)(F::Factorization, B::AbstractVecOrMat)
+    require_one_based_indexing(B)
+    TFB = typeof(oneunit(eltype(F)) \ oneunit(eltype(B)))
+    ldiv!(F, copy_similar(B, TFB))
+end
+(\)(F::TransposeFactorization, B::AbstractVecOrMat) = conj!(adjoint(F.parent) \ conj.(B))
 # With a real lhs and complex rhs with the same precision, we can reinterpret
 # the complex rhs as a real rhs with twice the number of columns or rows
-function (\)(F::Factorization{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
+function (\)(F::Factorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal}
     require_one_based_indexing(B)
     c2r = reshape(copy(transpose(reinterpret(T, reshape(B, (1, length(B)))))), size(B, 1), 2*size(B, 2))
     x = ldiv!(F, c2r)
     return reshape(copy(reinterpret(Complex{T}, copy(transpose(reshape(x, div(length(x), 2), 2))))), _ret_size(F, B))
 end
-function (/)(B::VecOrMat{Complex{T}}, F::Factorization{T}) where T<:BlasReal
-    require_one_based_indexing(B)
-    x = rdiv!(copy(reinterpret(T, B)), F)
-    return copy(reinterpret(Complex{T}, x))
-end
-
-function \(F::Union{Factorization, Adjoint{<:Any,<:Factorization}}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    ldiv!(F, copy_similar(B, TFB))
-end
-
-function /(B::AbstractMatrix, F::Union{Factorization, Adjoint{<:Any,<:Factorization}})
-    require_one_based_indexing(B)
-    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    rdiv!(copy_similar(B, TFB), F)
-end
-/(adjB::AdjointAbsVec, adjF::Adjoint{<:Any,<:Factorization}) = adjoint(adjF.parent \ adjB.parent)
-/(B::TransposeAbsVec, adjF::Adjoint{<:Any,<:Factorization}) = adjoint(adjF.parent \ adjoint(B))
-
+# don't do the reinterpretation for [Adjoint/Transpose]Factorization
+(\)(F::TransposeFactorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    conj!(adjoint(parent(F)) \ conj.(B))
+(\)(F::AdjointFactorization{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    @invoke \(F::typeof(F), B::VecOrMat)
 
 function ldiv!(Y::AbstractVector, A::Factorization, B::AbstractVector)
     require_one_based_indexing(Y, B)
-    m, n = size(A, 1), size(A, 2)
+    m, n = size(A)
     if m > n
         Bc = copy(B)
         ldiv!(A, Bc)
@@ -128,7 +170,7 @@ function ldiv!(Y::AbstractVector, A::Factorization, B::AbstractVector)
 end
 function ldiv!(Y::AbstractMatrix, A::Factorization, B::AbstractMatrix)
     require_one_based_indexing(Y, B)
-    m, n = size(A, 1), size(A, 2)
+    m, n = size(A)
     if m > n
         Bc = copy(B)
         ldiv!(A, Bc)
@@ -139,13 +181,26 @@ function ldiv!(Y::AbstractMatrix, A::Factorization, B::AbstractMatrix)
     end
 end
 
-# fallback methods for transposed solves
-\(F::Transpose{<:Any,<:Factorization{<:Real}}, B::AbstractVecOrMat) = adjoint(F.parent) \ B
-\(F::Transpose{<:Any,<:Factorization}, B::AbstractVecOrMat) = conj.(adjoint(F.parent) \ conj.(B))
-
-/(B::AbstractMatrix, F::Transpose{<:Any,<:Factorization{<:Real}}) = B / adjoint(F.parent)
-/(B::AbstractMatrix, F::Transpose{<:Any,<:Factorization}) = conj.(conj.(B) / adjoint(F.parent))
-/(B::AdjointAbsVec, F::Transpose{<:Any,<:Factorization{<:Real}}) = B / adjoint(F.parent)
-/(B::TransposeAbsVec, F::Transpose{<:Any,<:Factorization{<:Real}}) = B / adjoint(F.parent)
-/(B::AdjointAbsVec, F::Transpose{<:Any,<:Factorization}) = conj.(conj.(B) / adjoint(F.parent))
-/(B::TransposeAbsVec, F::Transpose{<:Any,<:Factorization}) = conj.(conj.(B) / adjoint(F.parent))
+function (/)(B::AbstractMatrix, F::Factorization)
+    require_one_based_indexing(B)
+    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
+    rdiv!(copy_similar(B, TFB), F)
+end
+# reinterpretation trick for complex lhs and real factorization
+function (/)(B::Union{Matrix{Complex{T}},AdjOrTrans{Complex{T},Vector{Complex{T}}}}, F::Factorization{T}) where {T<:BlasReal}
+    require_one_based_indexing(B)
+    x = rdiv!(copy(reinterpret(T, B)), F)
+    return copy(reinterpret(Complex{T}, x))
+end
+# don't do the reinterpretation for [Adjoint/Transpose]Factorization
+(/)(B::Union{Matrix{Complex{T}},AdjOrTrans{Complex{T},Vector{Complex{T}}}}, F::TransposeFactorization{T}) where {T<:BlasReal} =
+    @invoke /(B::AbstractMatrix, F::Factorization)
+(/)(B::Matrix{Complex{T}}, F::AdjointFactorization{T}) where {T<:BlasReal} =
+    @invoke /(B::AbstractMatrix, F::Factorization)
+(/)(B::Adjoint{Complex{T},Vector{Complex{T}}}, F::AdjointFactorization{T}) where {T<:BlasReal} =
+    (F' \ B')'
+(/)(B::Transpose{Complex{T},Vector{Complex{T}}}, F::TransposeFactorization{T}) where {T<:BlasReal} =
+    transpose(transpose(F) \ transpose(B))
+
+rdiv!(B::AbstractMatrix, A::TransposeFactorization) = transpose(ldiv!(A.parent, transpose(B)))
+rdiv!(B::AbstractMatrix, A::AdjointFactorization) = adjoint(ldiv!(A.parent, adjoint(B)))
diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl
index 36ced82eb6c7d..ca01646820ccf 100644
--- a/stdlib/LinearAlgebra/src/generic.jl
+++ b/stdlib/LinearAlgebra/src/generic.jl
@@ -338,7 +338,7 @@ julia> triu(a)
  0.0  0.0  0.0  1.0
 ```
 """
-triu(M::AbstractMatrix) = triu!(copy(M))
+triu(M::AbstractMatrix) = triu!(copymutable(M))
 
 """
     tril(M)
@@ -362,12 +362,12 @@ julia> tril(a)
  1.0  1.0  1.0  1.0
 ```
 """
-tril(M::AbstractMatrix) = tril!(copy(M))
+tril(M::AbstractMatrix) = tril!(copymutable(M))
 
 """
     triu(M, k::Integer)
 
-Returns the upper triangle of `M` starting from the `k`th superdiagonal.
+Return the upper triangle of `M` starting from the `k`th superdiagonal.
 
 # Examples
 ```jldoctest
@@ -393,12 +393,12 @@ julia> triu(a,-3)
  1.0  1.0  1.0  1.0
 ```
 """
-triu(M::AbstractMatrix,k::Integer) = triu!(copy(M),k)
+triu(M::AbstractMatrix,k::Integer) = triu!(copymutable(M),k)
 
 """
     tril(M, k::Integer)
 
-Returns the lower triangle of `M` starting from the `k`th superdiagonal.
+Return the lower triangle of `M` starting from the `k`th superdiagonal.
 
 # Examples
 ```jldoctest
@@ -424,7 +424,7 @@ julia> tril(a,-3)
  1.0  0.0  0.0  0.0
 ```
 """
-tril(M::AbstractMatrix,k::Integer) = tril!(copy(M),k)
+tril(M::AbstractMatrix,k::Integer) = tril!(copymutable(M),k)
 
 """
     triu!(M)
@@ -461,7 +461,7 @@ norm_sqr(x::Union{T,Complex{T},Rational{T}}) where {T<:Integer} = abs2(float(x))
 
 function generic_norm2(x)
     maxabs = normInf(x)
-    (iszero(maxabs) || isinf(maxabs)) && return maxabs
+    (ismissing(maxabs) || iszero(maxabs) || isinf(maxabs)) && return maxabs
     (v, s) = iterate(x)::Tuple
     T = typeof(maxabs)
     if isfinite(length(x)*maxabs*maxabs) && !iszero(maxabs*maxabs) # Scaling not necessary
@@ -472,6 +472,7 @@ function generic_norm2(x)
             (v, s) = y
             sum += norm_sqr(v)
         end
+        ismissing(sum) && return missing
         return convert(T, sqrt(sum))
     else
         sum = abs2(norm(v)/maxabs)
@@ -481,6 +482,7 @@ function generic_norm2(x)
             (v, s) = y
             sum += (norm(v)/maxabs)^2
         end
+        ismissing(sum) && return missing
         return convert(T, maxabs*sqrt(sum))
     end
 end
@@ -491,7 +493,7 @@ function generic_normp(x, p)
     (v, s) = iterate(x)::Tuple
     if p > 1 || p < -1 # might need to rescale to avoid overflow
         maxabs = p > 1 ? normInf(x) : normMinusInf(x)
-        (iszero(maxabs) || isinf(maxabs)) && return maxabs
+        (ismissing(maxabs) || iszero(maxabs) || isinf(maxabs)) && return maxabs
         T = typeof(maxabs)
     else
         T = typeof(float(norm(v)))
@@ -503,15 +505,18 @@ function generic_normp(x, p)
             y = iterate(x, s)
             y === nothing && break
             (v, s) = y
+            ismissing(v) && return missing
             sum += norm(v)^spp
         end
         return convert(T, sum^inv(spp))
     else # rescaling
         sum = (norm(v)/maxabs)^spp
+        ismissing(sum) && return missing
         while true
             y = iterate(x, s)
             y === nothing && break
             (v, s) = y
+            ismissing(v) && return missing
             sum += (norm(v)/maxabs)^spp
         end
         return convert(T, maxabs*sum^inv(spp))
@@ -750,8 +755,8 @@ This is equivalent to [`norm`](@ref).
 @inline opnorm(x::Number, p::Real=2) = norm(x, p)
 
 """
-    opnorm(A::Adjoint{<:Any,<:AbstracVector}, q::Real=2)
-    opnorm(A::Transpose{<:Any,<:AbstracVector}, q::Real=2)
+    opnorm(A::Adjoint{<:Any,<:AbstractVector}, q::Real=2)
+    opnorm(A::Transpose{<:Any,<:AbstractVector}, q::Real=2)
 
 For Adjoint/Transpose-wrapped vectors, return the operator ``q``-norm of `A`, which is
 equivalent to the `p`-norm with value `p = q/(q-1)`. They coincide at `p = q = 2`.
@@ -800,7 +805,7 @@ opnorm(v::AdjointAbsVec, q::Real) = q == Inf ? norm(conj(v.parent), 1) : norm(co
 opnorm(v::AdjointAbsVec) = norm(conj(v.parent))
 opnorm(v::TransposeAbsVec) = norm(v.parent)
 
-norm(v::Union{TransposeAbsVec,AdjointAbsVec}, p::Real) = norm(v.parent, p)
+norm(v::AdjOrTrans, p::Real) = norm(v.parent, p)
 
 """
     dot(x, y)
@@ -942,13 +947,22 @@ dot(x::AbstractVector, transA::Transpose{<:Real}, y::AbstractVector) = adjoint(d
     rank(A::AbstractMatrix; atol::Real=0, rtol::Real=atol>0 ? 0 : n*ϵ)
     rank(A::AbstractMatrix, rtol::Real)
 
-Compute the rank of a matrix by counting how many singular
-values of `A` have magnitude greater than `max(atol, rtol*σ₁)` where `σ₁` is
-`A`'s largest singular value. `atol` and `rtol` are the absolute and relative
+Compute the numerical rank of a matrix by counting how many outputs of
+`svdvals(A)` are greater than `max(atol, rtol*σ₁)` where `σ₁` is `A`'s largest
+calculated singular value. `atol` and `rtol` are the absolute and relative
 tolerances, respectively. The default relative tolerance is `n*ϵ`, where `n`
 is the size of the smallest dimension of `A`, and `ϵ` is the [`eps`](@ref) of
 the element type of `A`.
 
+!!! note
+    Numerical rank can be a sensitive and imprecise characterization of
+    ill-conditioned matrices with singular values that are close to the threshold
+    tolerance `max(atol, rtol*σ₁)`. In such cases, slight perturbations to the
+    singular-value computation or to the matrix can change the result of `rank`
+    by pushing one or more singular values across the threshold. These variations
+    can even occur due to changes in floating-point errors between different Julia
+    versions, architectures, compilers, or operating systems.
+
 !!! compat "Julia 1.1"
     The `atol` and `rtol` keyword arguments requires at least Julia 1.1.
     In Julia 1.0 `rtol` is available as a positional argument, but this
@@ -976,9 +990,9 @@ function rank(A::AbstractMatrix; atol::Real = 0.0, rtol::Real = (min(size(A)...)
     isempty(A) && return 0 # 0-dimensional case
     s = svdvals(A)
     tol = max(atol, rtol*s[1])
-    count(x -> x > tol, s)
+    count(>(tol), s)
 end
-rank(x::Number) = iszero(x) ? 0 : 1
+rank(x::Union{Number,AbstractVector}) = iszero(x) ? 0 : 1
 
 """
     tr(M)
@@ -1141,6 +1155,9 @@ function (/)(A::AbstractVecOrMat, B::AbstractVecOrMat)
     size(A,2) != size(B,2) && throw(DimensionMismatch("Both inputs should have the same number of columns"))
     return copy(adjoint(adjoint(B) \ adjoint(A)))
 end
+# \(A::StridedMatrix,x::Number) = inv(A)*x Should be added at some point when the old elementwise version has been deprecated long enough
+# /(x::Number,A::StridedMatrix) = x*inv(A)
+/(x::Number, v::AbstractVector) = x*pinv(v)
 
 cond(x::Number) = iszero(x) ? Inf : 1.0
 cond(x::Number, p) = cond(x)
@@ -1267,16 +1284,17 @@ false
 julia> istriu(a, -1)
 true
 
-julia> b = [1 im; 0 -1]
-2×2 Matrix{Complex{Int64}}:
- 1+0im   0+1im
- 0+0im  -1+0im
-
-julia> istriu(b)
-true
+julia> c = [1 1 1; 1 1 1; 0 1 1]
+3×3 Matrix{Int64}:
+ 1  1  1
+ 1  1  1
+ 0  1  1
 
-julia> istriu(b, 1)
+julia> istriu(c)
 false
+
+julia> istriu(c, -1)
+true
 ```
 """
 function istriu(A::AbstractMatrix, k::Integer = 0)
@@ -1311,16 +1329,17 @@ false
 julia> istril(a, 1)
 true
 
-julia> b = [1 0; -im -1]
-2×2 Matrix{Complex{Int64}}:
- 1+0im   0+0im
- 0-1im  -1+0im
-
-julia> istril(b)
-true
+julia> c = [1 1 0; 1 1 1; 1 1 1]
+3×3 Matrix{Int64}:
+ 1  1  0
+ 1  1  1
+ 1  1  1
 
-julia> istril(b, -1)
+julia> istril(c)
 false
+
+julia> istril(c, 1)
+true
 ```
 """
 function istril(A::AbstractMatrix, k::Integer = 0)
@@ -1419,9 +1438,7 @@ isdiag(x::Number) = true
     axpy!(α, x::AbstractArray, y::AbstractArray)
 
 Overwrite `y` with `x * α + y` and return `y`.
-If `x` and `y` have the same axes, it's equivalent with `y .+= x .* a`
-
-See also [`BLAS.axpy!`](@ref)
+If `x` and `y` have the same axes, it's equivalent with `y .+= x .* a`.
 
 # Examples
 ```jldoctest
@@ -1441,10 +1458,11 @@ function axpy!(α, x::AbstractArray, y::AbstractArray)
     if n != length(y)
         throw(DimensionMismatch("x has length $n, but y has length $(length(y))"))
     end
+    iszero(α) && return y
     for (IY, IX) in zip(eachindex(y), eachindex(x))
         @inbounds y[IY] += x[IX]*α
     end
-    y
+    return y
 end
 
 function axpy!(α, x::AbstractArray, rx::AbstractArray{<:Integer}, y::AbstractArray, ry::AbstractArray{<:Integer})
@@ -1455,19 +1473,18 @@ function axpy!(α, x::AbstractArray, rx::AbstractArray{<:Integer}, y::AbstractAr
     elseif !checkindex(Bool, eachindex(IndexLinear(), y), ry)
         throw(BoundsError(y, ry))
     end
+    iszero(α) && return y
     for (IY, IX) in zip(eachindex(ry), eachindex(rx))
         @inbounds y[ry[IY]] += x[rx[IX]]*α
     end
-    y
+    return y
 end
 
 """
     axpby!(α, x::AbstractArray, β, y::AbstractArray)
 
 Overwrite `y` with `x * α + y * β` and return `y`.
-If `x` and `y` have the same axes, it's equivalent with `y .= x .* a .+ y .* β`
-
-See also [`BLAS.axpby!`](@ref)
+If `x` and `y` have the same axes, it's equivalent with `y .= x .* a .+ y .* β`.
 
 # Examples
 ```jldoctest
@@ -1486,6 +1503,7 @@ function axpby!(α, x::AbstractArray, β, y::AbstractArray)
     if length(x) != length(y)
         throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
     end
+    iszero(α) && isone(β) && return y
     for (IX, IY) in zip(eachindex(x), eachindex(y))
         @inbounds y[IY] = x[IX]*α + y[IY]*β
     end
@@ -1578,7 +1596,11 @@ end
     ξ1/ν
 end
 
-# apply reflector from left
+"""
+    reflectorApply!(x, τ, A)
+
+Multiplies `A` in-place by a Householder reflection on the left. It is equivalent to `A .= (I - τ*[1; x] * [1; x]')*A`.
+"""
 @inline function reflectorApply!(x::AbstractVector, τ::Number, A::AbstractVecOrMat)
     require_one_based_indexing(x)
     m, n = size(A, 1), size(A, 2)
@@ -1656,15 +1678,19 @@ julia> logabsdet(B)
 (0.6931471805599453, 1.0)
 ```
 """
-logabsdet(A::AbstractMatrix) = logabsdet(lu(A, check=false))
-
+function logabsdet(A::AbstractMatrix)
+    if istriu(A) || istril(A)
+        return logabsdet(UpperTriangular(A))
+    end
+    return logabsdet(lu(A, check=false))
+end
 logabsdet(a::Number) = log(abs(a)), sign(a)
 
 """
     logdet(M)
 
-Log of matrix determinant. Equivalent to `log(det(M))`, but may provide
-increased accuracy and/or speed.
+Logarithm of matrix determinant. Equivalent to `log(det(M))`, but may provide
+increased accuracy and avoids overflow/underflow.
 
 # Examples
 ```jldoctest
@@ -1734,7 +1760,7 @@ Calculates the determinant of a matrix using the
 [Bareiss Algorithm](https://en.wikipedia.org/wiki/Bareiss_algorithm).
 Also refer to [`det_bareiss!`](@ref).
 """
-det_bareiss(M) = det_bareiss!(copy(M))
+det_bareiss(M) = det_bareiss!(copymutable(M))
 
 
 
@@ -1771,10 +1797,11 @@ function isapprox(x::AbstractArray, y::AbstractArray;
     nans::Bool=false, norm::Function=norm)
     d = norm(x - y)
     if isfinite(d)
-        return d <= max(atol, rtol*max(norm(x), norm(y)))
+        return iszero(rtol) ? d <= atol : d <= max(atol, rtol*max(norm(x), norm(y)))
     else
         # Fall back to a component-wise approximate comparison
-        return all(ab -> isapprox(ab[1], ab[2]; rtol=rtol, atol=atol, nans=nans), zip(x, y))
+        # (mapreduce instead of all for greater generality [#44893])
+        return mapreduce((a, b) -> isapprox(a, b; rtol=rtol, atol=atol, nans=nans), &, x, y)
     end
 end
 
@@ -1790,21 +1817,18 @@ function normalize!(a::AbstractArray, p::Real=2)
     __normalize!(a, nrm)
 end
 
-@inline function __normalize!(a::AbstractArray, nrm::Real)
+@inline function __normalize!(a::AbstractArray, nrm)
     # The largest positive floating point number whose inverse is less than infinity
     δ = inv(prevfloat(typemax(nrm)))
-
     if nrm ≥ δ # Safe to multiply with inverse
         invnrm = inv(nrm)
         rmul!(a, invnrm)
-
     else # scale elements to avoid overflow
         εδ = eps(one(nrm))/δ
         rmul!(a, εδ)
         rmul!(a, inv(nrm*εδ))
     end
-
-    a
+    return a
 end
 
 """
@@ -1873,3 +1897,48 @@ end
 
 normalize(x) = x / norm(x)
 normalize(x, p::Real) = x / norm(x, p)
+
+"""
+    copytrito!(B, A, uplo) -> B
+
+Copies a triangular part of a matrix `A` to another matrix `B`.
+`uplo` specifies the part of the matrix `A` to be copied to `B`.
+Set `uplo = 'L'` for the lower triangular part or `uplo = 'U'
+for the upper triangular part.
+
+!!! compat "Julia 1.11"
+    `copytrito!` requires at least Julia 1.11.
+
+# Examples
+```jldoctest
+julia> A = [1 2 ; 3 4];
+
+julia> B = [0 0 ; 0 0];
+
+julia> copytrito!(B, A, 'L')
+2×2 Matrix{Int64}:
+ 1  0
+ 3  4
+```
+"""
+function copytrito!(B::AbstractMatrix, A::AbstractMatrix, uplo::AbstractChar)
+    require_one_based_indexing(A, B)
+    BLAS.chkuplo(uplo)
+    m,n = size(A)
+    m1,n1 = size(B)
+    (m1 < m || n1 < n) && throw(DimensionMismatch("B of size ($m1,$n1) should have at least the same number of rows and columns than A of size ($m,$n)"))
+    if uplo == 'U'
+        for j=1:n
+            for i=1:min(j,m)
+                @inbounds B[i,j] = A[i,j]
+            end
+        end
+    else  # uplo == 'L'
+        for j=1:n
+            for i=j:m
+                @inbounds B[i,j] = A[i,j]
+            end
+        end
+    end
+    return B
+end
diff --git a/stdlib/LinearAlgebra/src/givens.jl b/stdlib/LinearAlgebra/src/givens.jl
index 155d8d6f23ce6..4239c8dc4ed48 100644
--- a/stdlib/LinearAlgebra/src/givens.jl
+++ b/stdlib/LinearAlgebra/src/givens.jl
@@ -3,23 +3,24 @@
 # givensAlgorithm functions are derived from LAPACK, see below
 
 abstract type AbstractRotation{T} end
+struct AdjointRotation{T,S<:AbstractRotation{T}} <: AbstractRotation{T}
+    R::S
+end
 
 transpose(R::AbstractRotation) = error("transpose not implemented for $(typeof(R)). Consider using adjoint instead of transpose.")
 
-function (*)(R::AbstractRotation{T}, A::AbstractVecOrMat{S}) where {T,S}
+(*)(R::AbstractRotation, A::AbstractVector) = _rot_mul_vecormat(R, A)
+(*)(R::AbstractRotation, A::AbstractMatrix) = _rot_mul_vecormat(R, A)
+function _rot_mul_vecormat(R::AbstractRotation{T}, A::AbstractVecOrMat{S}) where {T,S}
     TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
     lmul!(convert(AbstractRotation{TS}, R), copy_similar(A, TS))
 end
-(*)(A::AbstractVector, adjR::Adjoint{<:Any,<:AbstractRotation}) = _absvecormat_mul_adjrot(A, adjR)
-(*)(A::AbstractMatrix, adjR::Adjoint{<:Any,<:AbstractRotation}) = _absvecormat_mul_adjrot(A, adjR)
-function _absvecormat_mul_adjrot(A::AbstractVecOrMat{T}, adjR::Adjoint{<:Any,<:AbstractRotation{S}}) where {T,S}
-    R = adjR.parent
-    TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
-    rmul!(TS.(A), convert(AbstractRotation{TS}, R)')
-end
-function(*)(A::AbstractMatrix{T}, R::AbstractRotation{S}) where {T,S}
+
+(*)(A::AbstractVector, R::AbstractRotation) = _vecormat_mul_rot(A, R)
+(*)(A::AbstractMatrix, R::AbstractRotation) = _vecormat_mul_rot(A, R)
+function _vecormat_mul_rot(A::AbstractVecOrMat{T}, R::AbstractRotation{S}) where {T,S}
     TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
-    rmul!(TS.(A), convert(AbstractRotation{TS}, R))
+    rmul!(copy_similar(A, TS), convert(AbstractRotation{TS}, R))
 end
 
 """
@@ -44,7 +45,9 @@ struct Rotation{T} <: AbstractRotation{T}
 end
 
 convert(::Type{T}, r::T) where {T<:AbstractRotation} = r
-convert(::Type{T}, r::AbstractRotation) where {T<:AbstractRotation} = T(r)
+convert(::Type{T}, r::AbstractRotation) where {T<:AbstractRotation} = T(r)::T
+convert(::Type{AbstractRotation{T}}, r::AdjointRotation) where {T} = convert(AbstractRotation{T}, r.R)'
+convert(::Type{AbstractRotation{T}}, r::AdjointRotation{T}) where {T} = r
 
 Givens(i1, i2, c, s) = Givens(i1, i2, promote(c, s)...)
 Givens{T}(G::Givens{T}) where {T} = G
@@ -55,12 +58,11 @@ AbstractRotation{T}(G::Givens) where {T} = Givens{T}(G)
 AbstractRotation{T}(R::Rotation) where {T} = Rotation{T}(R)
 
 adjoint(G::Givens) = Givens(G.i1, G.i2, G.c', -G.s)
-adjoint(R::Rotation) = Adjoint(R)
-function Base.copy(aG::Adjoint{<:Any,<:Givens})
-    G = aG.parent
-    return Givens(G.i1, G.i2, conj(G.c), -G.s)
-end
-Base.copy(aR::Adjoint{<:Any,Rotation{T}}) where {T} = Rotation{T}(reverse!([r' for r in aR.parent.rotations]))
+adjoint(R::AbstractRotation) = AdjointRotation(R)
+adjoint(adjR::AdjointRotation) = adjR.R
+
+Base.copy(aR::AdjointRotation{T,Rotation{T}}) where {T} =
+    Rotation{T}([r' for r in Iterators.reverse(aR.R.rotations)])
 
 floatmin2(::Type{Float32}) = reinterpret(Float32, 0x26000000)
 floatmin2(::Type{Float64}) = reinterpret(Float64, 0x21a0000000000000)
@@ -74,7 +76,6 @@ floatmin2(::Type{T}) where {T} = (twopar = 2one(T); twopar^trunc(Integer,log(flo
 # NAG Ltd.
 function givensAlgorithm(f::T, g::T) where T<:AbstractFloat
     onepar = one(T)
-    twopar = 2one(T)
     T0 = typeof(onepar) # dimensionless
     zeropar = T0(zero(T)) # must be dimensionless
 
@@ -103,7 +104,7 @@ function givensAlgorithm(f::T, g::T) where T<:AbstractFloat
                 f1 *= safmn2
                 g1 *= safmn2
                 scalepar = max(abs(f1), abs(g1))
-                if scalepar < safmx2u break end
+                if scalepar < safmx2u || count >= 20 break end
             end
             r = sqrt(f1*f1 + g1*g1)
             cs = f1/r
@@ -147,7 +148,7 @@ end
 # Univ. of Colorado Denver
 # NAG Ltd.
 function givensAlgorithm(f::Complex{T}, g::Complex{T}) where T<:AbstractFloat
-    twopar, onepar = 2one(T), one(T)
+    onepar = one(T)
     T0 = typeof(onepar) # dimensionless
     zeropar = T0(zero(T)) # must be dimensionless
     czero = complex(zeropar)
@@ -168,7 +169,7 @@ function givensAlgorithm(f::Complex{T}, g::Complex{T}) where T<:AbstractFloat
             fs *= safmn2
             gs *= safmn2
             scalepar *= safmn2
-            if scalepar < safmx2u break end
+            if scalepar < safmx2u || count >= 20 break end
         end
     elseif scalepar <= safmn2u
         if g == 0
@@ -191,13 +192,13 @@ function givensAlgorithm(f::Complex{T}, g::Complex{T}) where T<:AbstractFloat
         # This is a rare case: F is very small.
         if f == 0
             cs = zero(T)
-            r = complex(hypot(real(g), imag(g)))
+            r = complex(abs(g))
             # do complex/real division explicitly with two real divisions
-            d = hypot(real(gs), imag(gs))
+            d = abs(gs)
             sn = complex(real(gs)/d, -imag(gs)/d)
             return cs, sn, r
         end
-        f2s = hypot(real(fs), imag(fs))
+        f2s = abs(fs)
         # g2 and g2s are accurate
         # g2 is at least safmin, and g2s is at least safmn2
         g2s = sqrt(g2)
@@ -212,7 +213,7 @@ function givensAlgorithm(f::Complex{T}, g::Complex{T}) where T<:AbstractFloat
         # make sure abs(ff) = 1
         # do complex/real division explicitly with 2 real divisions
         if abs1(f) > 1
-            d = hypot(real(f), imag(f))
+            d = abs(f)
             ff = complex(real(f)/d, imag(f)/d)
         else
             dr = safmx2*real(f)
@@ -291,7 +292,7 @@ function givens(f::T, g::T, i1::Integer, i2::Integer) where T
     c, s, r = givensAlgorithm(f, g)
     if i1 > i2
         s = -conj(s)
-        i1,i2 = i2,i1
+        i1, i2 = i2, i1
     end
     Givens(i1, i2, c, s), r
 end
@@ -311,7 +312,7 @@ B[i2,j] = 0
 See also [`LinearAlgebra.Givens`](@ref).
 """
 givens(A::AbstractMatrix, i1::Integer, i2::Integer, j::Integer) =
-    givens(A[i1,j], A[i2,j],i1,i2)
+    givens(A[i1,j], A[i2,j], i1, i2)
 
 
 """
@@ -329,9 +330,7 @@ B[i2] = 0
 
 See also [`LinearAlgebra.Givens`](@ref).
 """
-givens(x::AbstractVector, i1::Integer, i2::Integer) =
-    givens(x[i1], x[i2], i1, i2)
-
+givens(x::AbstractVector, i1::Integer, i2::Integer) = givens(x[i1], x[i2], i1, i2)
 
 function getindex(G::Givens, i::Integer, j::Integer)
     if i == j
@@ -380,29 +379,51 @@ function lmul!(G::Givens, R::Rotation)
     push!(R.rotations, G)
     return R
 end
-function lmul!(R::Rotation, A::AbstractMatrix)
-    @inbounds for i = 1:length(R.rotations)
+function rmul!(R::Rotation, G::Givens)
+    pushfirst!(R.rotations, G)
+    return R
+end
+
+function lmul!(R::Rotation, A::AbstractVecOrMat)
+    @inbounds for i in eachindex(R.rotations)
         lmul!(R.rotations[i], A)
     end
     return A
 end
-function rmul!(A::AbstractMatrix, adjR::Adjoint{<:Any,<:Rotation})
-    R = adjR.parent
-    @inbounds for i = 1:length(R.rotations)
+function rmul!(A::AbstractMatrix, R::Rotation)
+    @inbounds for i in eachindex(R.rotations)
+        rmul!(A, R.rotations[i])
+    end
+    return A
+end
+
+function lmul!(adjR::AdjointRotation{<:Any,<:Rotation}, A::AbstractVecOrMat)
+    R = adjR.R
+    @inbounds for i in eachindex(R.rotations)
+        lmul!(adjoint(R.rotations[i]), A)
+    end
+    return A
+end
+function rmul!(A::AbstractMatrix, adjR::AdjointRotation{<:Any,<:Rotation})
+    R = adjR.R
+    @inbounds for i in eachindex(R.rotations)
         rmul!(A, adjoint(R.rotations[i]))
     end
     return A
 end
-*(G1::Givens{T}, G2::Givens{T}) where {T} = Rotation(push!(push!(Givens{T}[], G2), G1))
-
-# TODO: None of the following disambiguation methods are great. They should perhaps
-# instead be MethodErrors, or revised.
-#
-# disambiguation methods: *(Adj/Trans of AbsVec or AbsMat, Adj of AbstractRotation)
-*(A::Adjoint{<:Any,<:AbstractVector}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
-*(A::Adjoint{<:Any,<:AbstractMatrix}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
-*(A::Transpose{<:Any,<:AbstractVector}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
-*(A::Transpose{<:Any,<:AbstractMatrix}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
-# disambiguation methods: *(Diag/AbsTri, Adj of AbstractRotation)
-*(A::Diagonal, B::Adjoint{<:Any,<:AbstractRotation}) = A * copy(B)
-*(A::AbstractTriangular, B::Adjoint{<:Any,<:AbstractRotation}) = A * copy(B)
+
+function *(G1::Givens{S}, G2::Givens{T}) where {S,T}
+    TS = promote_type(T, S)
+    Rotation{TS}([convert(AbstractRotation{TS}, G2), convert(AbstractRotation{TS}, G1)])
+end
+function *(G::Givens{T}, Gs::Givens{T}...) where {T}
+    return Rotation([reverse(Gs)..., G])
+end
+function *(G::Givens{S}, R::Rotation{T}) where {S,T}
+    TS = promote_type(T, S)
+    Rotation(vcat(convert(AbstractRotation{TS}, R).rotations, convert(AbstractRotation{TS}, G)))
+end
+function *(R::Rotation{S}, G::Givens{T}) where {S,T}
+    TS = promote_type(T, S)
+    Rotation(vcat(convert(AbstractRotation{TS}, G), convert(AbstractRotation{TS}, R).rotations))
+end
diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl
index a95a73dfc8819..87f7305f0146c 100644
--- a/stdlib/LinearAlgebra/src/hessenberg.jl
+++ b/stdlib/LinearAlgebra/src/hessenberg.jl
@@ -55,7 +55,6 @@ UpperHessenberg{T}(H::UpperHessenberg) where {T} = UpperHessenberg{T}(H.data)
 UpperHessenberg(A::AbstractMatrix) = UpperHessenberg{eltype(A),typeof(A)}(A)
 Matrix(H::UpperHessenberg{T}) where {T} = Matrix{T}(H)
 Array(H::UpperHessenberg) = Matrix(H)
-size(H::UpperHessenberg, d) = size(H.data, d)
 size(H::UpperHessenberg) = size(H.data)
 parent(H::UpperHessenberg) = H.data
 
@@ -63,22 +62,31 @@ parent(H::UpperHessenberg) = H.data
 similar(H::UpperHessenberg, ::Type{T}) where {T} = UpperHessenberg(similar(H.data, T))
 similar(H::UpperHessenberg, ::Type{T}, dims::Dims{N}) where {T,N} = similar(H.data, T, dims)
 
-AbstractMatrix{T}(H::UpperHessenberg) where {T} = UpperHessenberg(AbstractMatrix{T}(H.data))
+AbstractMatrix{T}(H::UpperHessenberg) where {T} = UpperHessenberg{T}(H)
+AbstractMatrix{T}(H::UpperHessenberg{T}) where {T} = copy(H)
 
 copy(H::UpperHessenberg) = UpperHessenberg(copy(H.data))
 real(H::UpperHessenberg{<:Real}) = H
 real(H::UpperHessenberg{<:Complex}) = UpperHessenberg(triu!(real(H.data),-1))
 imag(H::UpperHessenberg) = UpperHessenberg(triu!(imag(H.data),-1))
 
+function istriu(A::UpperHessenberg, k::Integer=0)
+    k <= -1 && return true
+    return _istriu(A, k)
+end
+
 function Matrix{T}(H::UpperHessenberg) where T
     m,n = size(H)
     return triu!(copyto!(Matrix{T}(undef, m, n), H.data), -1)
 end
 
-getindex(H::UpperHessenberg{T}, i::Integer, j::Integer) where {T} =
+Base.isassigned(H::UpperHessenberg, i::Int, j::Int) =
+    i <= j+1 ? isassigned(H.data, i, j) : true
+
+Base.@propagate_inbounds getindex(H::UpperHessenberg{T}, i::Integer, j::Integer) where {T} =
     i <= j+1 ? convert(T, H.data[i,j]) : zero(T)
 
-function setindex!(A::UpperHessenberg, x, i::Integer, j::Integer)
+Base.@propagate_inbounds function setindex!(A::UpperHessenberg, x, i::Integer, j::Integer)
     if i > j+1
         x == 0 || throw(ArgumentError("cannot set index in the lower triangular part " *
             "($i, $j) of an UpperHessenberg matrix to a nonzero value ($x)"))
@@ -124,71 +132,32 @@ for T = (:Number, :UniformScaling, :Diagonal)
 end
 
 function *(H::UpperHessenberg, U::UpperOrUnitUpperTriangular)
-    T = typeof(oneunit(eltype(H))*oneunit(eltype(U)))
-    HH = copy_similar(H, T)
-    rmul!(HH, U)
+    HH = mul!(_initarray(*, eltype(H), eltype(U), H), H, U)
     UpperHessenberg(HH)
 end
 function *(U::UpperOrUnitUpperTriangular, H::UpperHessenberg)
-    T = typeof(oneunit(eltype(H))*oneunit(eltype(U)))
-    HH = copy_similar(H, T)
-    lmul!(U, HH)
+    HH = mul!(_initarray(*, eltype(U), eltype(H), H), U, H)
     UpperHessenberg(HH)
 end
 
 function /(H::UpperHessenberg, U::UpperTriangular)
-    T = typeof(oneunit(eltype(H))/oneunit(eltype(U)))
-    HH = copy_similar(H, T)
-    rdiv!(HH, U)
+    HH = _rdiv!(_initarray(/, eltype(H), eltype(U), H), H, U)
     UpperHessenberg(HH)
 end
 function /(H::UpperHessenberg, U::UnitUpperTriangular)
-    T = typeof(oneunit(eltype(H))/oneunit(eltype(U)))
-    HH = copy_similar(H, T)
-    rdiv!(HH, U)
+    HH = _rdiv!(_initarray(/, eltype(H), eltype(U), H), H, U)
     UpperHessenberg(HH)
 end
 
 function \(U::UpperTriangular, H::UpperHessenberg)
-    T = typeof(oneunit(eltype(U))\oneunit(eltype(H)))
-    HH = copy_similar(H, T)
-    ldiv!(U, HH)
+    HH = ldiv!(_initarray(\, eltype(U), eltype(H), H), U, H)
     UpperHessenberg(HH)
 end
 function \(U::UnitUpperTriangular, H::UpperHessenberg)
-    T = typeof(oneunit(eltype(U))\oneunit(eltype(H)))
-    HH = copy_similar(H, T)
-    ldiv!(U, HH)
+    HH = ldiv!(_initarray(\, eltype(U), eltype(H), H), U, H)
     UpperHessenberg(HH)
 end
 
-function *(H::UpperHessenberg, B::Bidiagonal)
-    TS = promote_op(matprod, eltype(H), eltype(B))
-    A = A_mul_B_td!(zeros(TS, size(H)), H, B)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-function *(B::Bidiagonal, H::UpperHessenberg)
-    TS = promote_op(matprod, eltype(B), eltype(H))
-    A = A_mul_B_td!(zeros(TS, size(H)), B, H)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-
-/(H::UpperHessenberg, B::Bidiagonal) = _rdiv(H, B)
-/(H::UpperHessenberg{<:Number}, B::Bidiagonal{<:Number}) = _rdiv(H, B)
-function _rdiv(H::UpperHessenberg, B::Bidiagonal)
-    T = typeof(oneunit(eltype(H))/oneunit(eltype(B)))
-    A = _rdiv!(zeros(T, size(H)), H, B)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-
-\(B::Bidiagonal{<:Number}, H::UpperHessenberg{<:Number}) = _ldiv(B, H)
-\(B::Bidiagonal, H::UpperHessenberg) = _ldiv(B, H)
-function _ldiv(B::Bidiagonal, H::UpperHessenberg)
-    T = typeof(oneunit(eltype(B))\oneunit(eltype(H)))
-    A = ldiv!(zeros(T, size(H)), B, H)
-    return B.uplo == 'U' ? UpperHessenberg(A) : A
-end
-
 # Solving (H+µI)x = b: we can do this in O(m²) time and O(m) memory
 # (in-place in x) by the RQ algorithm from:
 #
@@ -421,10 +390,12 @@ Hessenberg(F::Hessenberg, μ::Number) = Hessenberg(F.factors, F.τ, F.H, F.uplo;
 
 copy(F::Hessenberg{<:Any,<:UpperHessenberg}) = Hessenberg(copy(F.factors), copy(F.τ); μ=F.μ)
 copy(F::Hessenberg{<:Any,<:SymTridiagonal}) = Hessenberg(copy(F.factors), copy(F.τ), copy(F.H), F.uplo; μ=F.μ)
-size(F::Hessenberg, d) = size(F.H, d)
+size(F::Hessenberg, d::Integer) = size(F.H, d)
 size(F::Hessenberg) = size(F.H)
 
-adjoint(F::Hessenberg) = Adjoint(F)
+transpose(F::Hessenberg{<:Real}) = F'
+transpose(::Hessenberg) =
+    throw(ArgumentError("transpose of Hessenberg decomposition is not supported, consider using adjoint"))
 
 # iteration for destructuring into components
 Base.iterate(S::Hessenberg) = (S.Q, Val(:H))
@@ -434,7 +405,7 @@ Base.iterate(S::Hessenberg, ::Val{:done}) = nothing
 
 hessenberg!(A::StridedMatrix{<:BlasFloat}) = Hessenberg(LAPACK.gehrd!(A)...)
 
-function hessenberg!(A::Union{Symmetric{<:BlasReal},Hermitian{<:BlasFloat}})
+function hessenberg!(A::Union{Symmetric{<:BlasReal,<:StridedMatrix},Hermitian{<:BlasFloat,<:StridedMatrix}})
     factors, τ, d, e = LAPACK.hetrd!(A.uplo, A.data)
     return Hessenberg(factors, τ, SymTridiagonal(d, e), A.uplo)
 end
@@ -478,11 +449,7 @@ julia> A = [4. 9. 7.; 4. 4. 1.; 4. 3. 2.]
 
 julia> F = hessenberg(A)
 Hessenberg{Float64, UpperHessenberg{Float64, Matrix{Float64}}, Matrix{Float64}, Vector{Float64}, Bool}
-Q factor:
-3×3 LinearAlgebra.HessenbergQ{Float64, Matrix{Float64}, Vector{Float64}, false}:
- 1.0   0.0        0.0
- 0.0  -0.707107  -0.707107
- 0.0  -0.707107   0.707107
+Q factor: 3×3 LinearAlgebra.HessenbergQ{Float64, Matrix{Float64}, Vector{Float64}, false}
 H factor:
 3×3 UpperHessenberg{Float64, Matrix{Float64}}:
   4.0      -11.3137       -1.41421
@@ -502,56 +469,27 @@ true
 ```
 """
 hessenberg(A::AbstractMatrix{T}) where T =
-    hessenberg!(copymutable_oftype(A, eigtype(T)))
+    hessenberg!(eigencopy_oftype(A, eigtype(T)))
 
 function show(io::IO, mime::MIME"text/plain", F::Hessenberg)
     summary(io, F)
     if !iszero(F.μ)
         print("\nwith shift μI for μ = ", F.μ)
     end
-    println(io, "\nQ factor:")
+    print(io, "\nQ factor: ")
     show(io, mime, F.Q)
     println(io, "\nH factor:")
     show(io, mime, F.H)
 end
 
-"""
-    HessenbergQ <: AbstractQ
-
-Given a [`Hessenberg`](@ref) factorization object `F`, `F.Q` returns
-a `HessenbergQ` object, which is an implicit representation of the unitary
-matrix `Q` in the Hessenberg factorization `QHQ'` represented by `F`.
-This `F.Q` object can be efficiently multiplied by matrices or vectors,
-and can be converted to an ordinary matrix type with `Matrix(F.Q)`.
-"""
-struct HessenbergQ{T,S<:AbstractMatrix,W<:AbstractVector,sym} <: AbstractQ{T}
-    uplo::Char
-    factors::S
-    τ::W
-    function HessenbergQ{T,S,W,sym}(uplo::AbstractChar, factors, τ) where {T,S<:AbstractMatrix,W<:AbstractVector,sym}
-        new(uplo, factors, τ)
-    end
-end
-HessenbergQ(F::Hessenberg{<:Any,<:UpperHessenberg,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,false}(F.uplo, F.factors, F.τ)
-HessenbergQ(F::Hessenberg{<:Any,<:SymTridiagonal,S,W}) where {S,W} = HessenbergQ{eltype(F.factors),S,W,true}(F.uplo, F.factors, F.τ)
-
 function getproperty(F::Hessenberg, d::Symbol)
     d === :Q && return HessenbergQ(F)
     return getfield(F, d)
 end
 
-size(Q::HessenbergQ, dim::Integer) = size(getfield(Q, :factors), dim == 2 ? 1 : dim)
-size(Q::HessenbergQ) = size(Q, 1), size(Q, 2)
-
 Base.propertynames(F::Hessenberg, private::Bool=false) =
     (:Q, :H, :μ, (private ? (:τ, :factors, :uplo) : ())...)
 
-# HessenbergQ from LAPACK/BLAS (as opposed to Julia libraries like GenericLinearAlgebra)
-const BlasHessenbergQ{T,sym} = HessenbergQ{T,<:StridedMatrix{T},<:StridedVector{T},sym} where {T<:BlasFloat,sym}
-
-## reconstruct the original matrix
-Matrix{T}(Q::BlasHessenbergQ{<:Any,false}) where {T} = convert(Matrix{T}, LAPACK.orghr!(1, size(Q.factors, 1), copy(Q.factors), Q.τ))
-Matrix{T}(Q::BlasHessenbergQ{<:Any,true}) where {T} = convert(Matrix{T}, LAPACK.orgtr!(Q.uplo, copy(Q.factors), Q.τ))
 AbstractArray(F::Hessenberg) = AbstractMatrix(F)
 Matrix(F::Hessenberg) = Array(AbstractArray(F))
 Array(F::Hessenberg) = Matrix(F)
@@ -571,31 +509,6 @@ function AbstractMatrix(F::Hessenberg)
     end
 end
 
-# adjoint(Q::HessenbergQ{<:Real})
-
-lmul!(Q::BlasHessenbergQ{T,false}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
-rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,false}) where {T<:BlasFloat} =
-    LAPACK.ormhr!('R', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
-lmul!(adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,false}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    (Q = adjQ.parent; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
-rmul!(X::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,false}}) where {T<:BlasFloat} =
-    (Q = adjQ.parent; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
-
-lmul!(Q::BlasHessenbergQ{T,true}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    LAPACK.ormtr!('L', Q.uplo, 'N', Q.factors, Q.τ, X)
-rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,true}) where {T<:BlasFloat} =
-    LAPACK.ormtr!('R', Q.uplo, 'N', Q.factors, Q.τ, X)
-lmul!(adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,true}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    (Q = adjQ.parent; LAPACK.ormtr!('L', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
-rmul!(X::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,true}}) where {T<:BlasFloat} =
-    (Q = adjQ.parent; LAPACK.ormtr!('R', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
-
-lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T} = rmul!(X', Q')'
-rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q', X')'
-lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T}  = rmul!(X', adjQ')'
-rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')'
-
 # multiply x by the entries of M in the upper-k triangle, which contains
 # the entries of the upper-Hessenberg matrix H for k=-1
 function rmul_triu!(M::AbstractMatrix, x, k::Integer=0)
@@ -686,8 +599,7 @@ function rdiv!(B::AbstractVecOrMat{<:Complex}, F::Hessenberg{<:Complex,<:Any,<:A
     return B .= Complex.(Br,Bi)
 end
 
-ldiv!(F::Adjoint{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')'
-rdiv!(B::AbstractMatrix, F::Adjoint{<:Any,<:Hessenberg}) = ldiv!(F', B')'
+ldiv!(F::AdjointFactorization{<:Any,<:Hessenberg}, B::AbstractVecOrMat) = rdiv!(B', F')'
 
 det(F::Hessenberg) = det(F.H; shift=F.μ)
 logabsdet(F::Hessenberg) = logabsdet(F.H; shift=F.μ)
diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl
index cd438f142a793..51dd73f517a31 100644
--- a/stdlib/LinearAlgebra/src/lapack.jl
+++ b/stdlib/LinearAlgebra/src/lapack.jl
@@ -8,10 +8,16 @@ Interfaces to LAPACK subroutines.
 using ..LinearAlgebra.BLAS: @blasfunc, chkuplo
 
 using ..LinearAlgebra: libblastrampoline, BlasFloat, BlasInt, LAPACKException, DimensionMismatch,
-    SingularException, PosDefException, chkstride1, checksquare,triu, tril, dot
+    SingularException, PosDefException, chkstride1, checksquare, triu, tril, dot
 
 using Base: iszero, require_one_based_indexing
 
+
+# Legacy binding maintained for backwards-compatibility but new packages
+# should not look at this, instead preferring to parse the output
+# of BLAS.get_config()
+const liblapack = libblastrampoline
+
 #Generic LAPACK error handlers
 """
 Handle only negative LAPACK error codes
@@ -548,12 +554,12 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
         # *     .. Array Arguments ..
         #       INTEGER            IPIV( * )
         #       DOUBLE PRECISION   A( LDA, * )
-        function getrf!(A::AbstractMatrix{$elty})
+        function getrf!(A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt}; check::Bool=true)
             require_one_based_indexing(A)
+            check && chkfinite(A)
             chkstride1(A)
             m, n = size(A)
             lda  = max(1,stride(A, 2))
-            ipiv = similar(A, BlasInt, min(m,n))
             info = Ref{BlasInt}()
             ccall((@blasfunc($getrf), libblastrampoline), Cvoid,
                   (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
@@ -607,7 +613,9 @@ Compute the pivoted `QR` factorization of `A`, `AP = QR` using BLAS level 3.
 reflectors. The arguments `jpvt` and `tau` are optional and allow
 for passing preallocated arrays. When passed, `jpvt` must have length greater
 than or equal to `n` if `A` is an `(m x n)` matrix and `tau` must have length
-greater than or equal to the smallest dimension of `A`.
+greater than or equal to the smallest dimension of `A`. On entry, if `jpvt[j]`
+does not equal zero then the `j`th column of `A` is permuted to the front of
+`AP`.
 
 `A`, `jpvt`, and `tau` are modified in-place.
 """
@@ -672,15 +680,13 @@ Returns `A` and `tau` modified in-place.
 gerqf!(A::AbstractMatrix, tau::AbstractVector)
 
 """
-    getrf!(A) -> (A, ipiv, info)
+    getrf!(A, ipiv) -> (A, ipiv, info)
 
-Compute the pivoted `LU` factorization of `A`, `A = LU`.
-
-Returns `A`, modified in-place, `ipiv`, the pivoting information, and an `info`
-code which indicates success (`info = 0`), a singular value in `U`
-(`info = i`, in which case `U[i,i]` is singular), or an error code (`info < 0`).
+Compute the pivoted `LU` factorization of `A`, `A = LU`. `ipiv` contains the pivoting
+information and `info` a code which indicates success (`info = 0`), a singular value
+in `U` (`info = i`, in which case `U[i,i]` is singular), or an error code (`info < 0`).
 """
-getrf!(A::AbstractMatrix, tau::AbstractVector)
+getrf!(A::AbstractMatrix, ipiv::AbstractVector; check::Bool=true)
 
 """
     gelqf!(A) -> (A, tau)
@@ -744,6 +750,17 @@ which parameterize the elementary reflectors of the factorization.
 """
 gerqf!(A::AbstractMatrix{<:BlasFloat}) = ((m,n) = size(A); gerqf!(A, similar(A, min(m, n))))
 
+"""
+    getrf!(A) -> (A, ipiv, info)
+
+Compute the pivoted `LU` factorization of `A`, `A = LU`.
+
+Returns `A`, modified in-place, `ipiv`, the pivoting information, and an `info`
+code which indicates success (`info = 0`), a singular value in `U`
+(`info = i`, in which case `U[i,i]` is singular), or an error code (`info < 0`).
+"""
+getrf!(A::AbstractMatrix{T}; check::Bool=true) where {T <: BlasFloat} = ((m,n) = size(A); getrf!(A, similar(A, BlasInt, min(m, n)); check))
+
 ## Tools to compute and apply elementary reflectors
 for (larfg, elty) in
     ((:dlarfg_, Float64),
@@ -1003,6 +1020,9 @@ for (gels, gesv, getrs, getri, elty) in
             if n != size(B, 1)
                 throw(DimensionMismatch("B has leading dimension $(size(B,1)), but needs $n"))
             end
+            if n != length(ipiv)
+                throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs to be $n"))
+            end
             nrhs = size(B, 2)
             info = Ref{BlasInt}()
             ccall((@blasfunc($getrs), libblastrampoline), Cvoid,
@@ -2016,9 +2036,9 @@ the orthogonal/unitary matrix `Q` is computed. If `jobu`, `jobv`, or `jobq` is
 ggsvd3!
 
 ## Expert driver and generalized eigenvalue problem
-for (geevx, ggev, elty) in
-    ((:dgeevx_,:dggev_,:Float64),
-     (:sgeevx_,:sggev_,:Float32))
+for (geevx, ggev, ggev3, elty) in
+    ((:dgeevx_,:dggev_,:dggev3_,:Float64),
+     (:sgeevx_,:sggev_,:sggev3_,:Float32))
     @eval begin
         #     SUBROUTINE DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI,
         #                          VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM,
@@ -2086,7 +2106,7 @@ for (geevx, ggev, elty) in
                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                        Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
                        Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt},
+                       Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
                        Clong, Clong, Clong, Clong),
                        balanc, jobvl, jobvr, sense,
                        n, A, lda, wr,
@@ -2153,7 +2173,71 @@ for (geevx, ggev, elty) in
                      Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                      Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
                      Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{BlasInt}, Clong, Clong),
+                     Ref{BlasInt}, Clong, Clong),
+                    jobvl, jobvr, n, A,
+                    lda, B, ldb, alphar,
+                    alphai, beta, vl, ldvl,
+                    vr, ldvr, work, lwork,
+                    info, 1, 1)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(work[1])
+                    resize!(work, lwork)
+                end
+            end
+            alphar, alphai, beta, vl, vr
+        end
+
+        #       SUBROUTINE DGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI,
+        #      $                   BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO )
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOBVL, JOBVR
+        #       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+        # *     ..
+        # *     .. Array Arguments ..
+        #       DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+        #      $                   B( LDB, * ), BETA( * ), VL( LDVL, * ),
+        #      $                   VR( LDVR, * ), WORK( * )
+        function ggev3!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            require_one_based_indexing(A, B)
+            chkstride1(A,B)
+            n, m = checksquare(A,B)
+            if n != m
+                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
+            end
+            lda = max(1, stride(A, 2))
+            ldb = max(1, stride(B, 2))
+            alphar = similar(A, $elty, n)
+            alphai = similar(A, $elty, n)
+            beta = similar(A, $elty, n)
+            ldvl = 0
+            if jobvl == 'V'
+                ldvl = n
+            elseif jobvl == 'N'
+                ldvl = 1
+            else
+                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
+            end
+            vl = similar(A, $elty, ldvl, n)
+            ldvr = 0
+            if jobvr == 'V'
+                ldvr = n
+            elseif jobvr == 'N'
+                ldvr = 1
+            else
+                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
+            end
+            vr = similar(A, $elty, ldvr, n)
+            work = Vector{$elty}(undef, 1)
+            lwork = BlasInt(-1)
+            info = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1]
+                ccall((@blasfunc($ggev3), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
+                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                     Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
+                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                     Ref{BlasInt}, Clong, Clong),
                     jobvl, jobvr, n, A,
                     lda, B, ldb, alphar,
                     alphai, beta, vl, ldvl,
@@ -2170,9 +2254,9 @@ for (geevx, ggev, elty) in
     end
 end
 
-for (geevx, ggev, elty, relty) in
-    ((:zgeevx_,:zggev_,:ComplexF64,:Float64),
-     (:cgeevx_,:cggev_,:ComplexF32,:Float32))
+for (geevx, ggev, ggev3, elty, relty) in
+    ((:zgeevx_,:zggev_,:zggev3_,:ComplexF64,:Float64),
+     (:cgeevx_,:cggev_,:cggev3_,:ComplexF32,:Float32))
     @eval begin
         #     SUBROUTINE ZGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL,
         #                          LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, RCONDE,
@@ -2234,7 +2318,7 @@ for (geevx, ggev, elty, relty) in
                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                        Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$relty}, Ptr{$relty},
                        Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$relty}, Ptr{BlasInt}, Clong, Clong, Clong, Clong),
+                       Ptr{$relty}, Ref{BlasInt}, Clong, Clong, Clong, Clong),
                        balanc, jobvl, jobvr, sense,
                        n, A, lda, w,
                        VL, max(1,ldvl), VR, max(1,ldvr),
@@ -2300,7 +2384,72 @@ for (geevx, ggev, elty, relty) in
                      Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                      Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                      Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
-                     Ptr{BlasInt}, Clong, Clong),
+                     Ref{BlasInt}, Clong, Clong),
+                    jobvl, jobvr, n, A,
+                    lda, B, ldb, alpha,
+                    beta, vl, ldvl, vr,
+                    ldvr, work, lwork, rwork,
+                    info, 1, 1)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(work[1])
+                    resize!(work, lwork)
+                end
+            end
+            alpha, beta, vl, vr
+        end
+
+        # SUBROUTINE ZGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA,
+        #      $                  VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO )
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOBVL, JOBVR
+        #       INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+        # *     ..
+        # *     .. Array Arguments ..
+        #       DOUBLE PRECISION   RWORK( * )
+        #       COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
+        #      $                   BETA( * ), VL( LDVL, * ), VR( LDVR, * ),
+        #      $                   WORK( * )
+        function ggev3!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            require_one_based_indexing(A, B)
+            chkstride1(A, B)
+            n, m = checksquare(A, B)
+            if n != m
+                throw(DimensionMismatch("A has dimensions $(size(A)), and B has dimensions $(size(B)), but A and B must have the same size"))
+            end
+            lda = max(1, stride(A, 2))
+            ldb = max(1, stride(B, 2))
+            alpha = similar(A, $elty, n)
+            beta = similar(A, $elty, n)
+            ldvl = 0
+            if jobvl == 'V'
+                ldvl = n
+            elseif jobvl == 'N'
+                ldvl = 1
+            else
+                throw(ArgumentError("jobvl must be 'V' or 'N', but $jobvl was passed"))
+            end
+            vl = similar(A, $elty, ldvl, n)
+            ldvr = 0
+            if jobvr == 'V'
+                ldvr = n
+            elseif jobvr == 'N'
+                ldvr = 1
+            else
+                throw(ArgumentError("jobvr must be 'V' or 'N', but $jobvr was passed"))
+            end
+            vr = similar(A, $elty, ldvr, n)
+            work = Vector{$elty}(undef, 1)
+            lwork = BlasInt(-1)
+            rwork = Vector{$relty}(undef, 8n)
+            info = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1]
+                ccall((@blasfunc($ggev3), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
+                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                     Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
+                     Ref{BlasInt}, Clong, Clong),
                     jobvl, jobvr, n, A,
                     lda, B, ldb, alpha,
                     beta, vl, ldvl, vr,
@@ -2346,6 +2495,17 @@ corresponding eigenvectors are computed.
 """
 ggev!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
 
+"""
+    ggev3!(jobvl, jobvr, A, B) -> (alpha, beta, vl, vr)
+
+Finds the generalized eigendecomposition of `A` and `B` using a blocked
+algorithm. If `jobvl = N`, the left eigenvectors aren't computed. If
+`jobvr = N`, the right eigenvectors aren't computed. If `jobvl = V` or
+`jobvr = V`, the corresponding eigenvectors are computed.  This function
+requires LAPACK 3.6.0.
+"""
+ggev3!(jobvl::AbstractChar, jobvr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
+
 # One step incremental condition estimation of max/min singular values
 for (laic1, elty) in
     ((:dlaic1_,:Float64),
@@ -2366,17 +2526,17 @@ for (laic1, elty) in
             if j != length(w)
                 throw(DimensionMismatch("vectors must have same length, but length of x is $j and length of w is $(length(w))"))
             end
-            sestpr = Vector{$elty}(undef, 1)
-            s = Vector{$elty}(undef, 1)
-            c = Vector{$elty}(undef, 1)
+            sestpr = Ref{$elty}()
+            s = Ref{$elty}()
+            c = Ref{$elty}()
             ccall((@blasfunc($laic1), libblastrampoline), Cvoid,
                 (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{$elty},
-                 Ptr{$elty}, Ref{$elty}, Ptr{$elty}, Ptr{$elty},
-                 Ptr{$elty}),
+                 Ptr{$elty}, Ref{$elty}, Ref{$elty}, Ref{$elty},
+                 Ref{$elty}),
                 job, j, x, sest,
                 w, gamma, sestpr, s,
                 c)
-            sestpr[1], s[1], c[1]
+            sestpr[], s[], c[]
         end
     end
 end
@@ -2400,17 +2560,17 @@ for (laic1, elty, relty) in
             if j != length(w)
                 throw(DimensionMismatch("vectors must have same length, but length of x is $j and length of w is $(length(w))"))
             end
-            sestpr = Vector{$relty}(undef, 1)
-            s = Vector{$elty}(undef, 1)
-            c = Vector{$elty}(undef, 1)
+            sestpr = Ref{$relty}()
+            s = Ref{$elty}()
+            c = Ref{$elty}()
             ccall((@blasfunc($laic1), libblastrampoline), Cvoid,
                 (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{$relty},
-                 Ptr{$elty}, Ref{$elty}, Ptr{$relty}, Ptr{$elty},
-                 Ptr{$elty}),
+                 Ptr{$elty}, Ref{$elty}, Ref{$relty}, Ref{$elty},
+                 Ref{$elty}),
                 job, j, x, sest,
                 w, gamma, sestpr, s,
                 c)
-            sestpr[1], s[1], c[1]
+            sestpr[], s[], c[]
         end
     end
 end
@@ -4037,11 +4197,10 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
         # *     .. Array Arguments ..
         #       INTEGER            IPIV( * )
         #       DOUBLE PRECISION   A( LDA, * ), WORK( * )
-        function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+        function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
             chkstride1(A)
             n = checksquare(A)
             chkuplo(uplo)
-            ipiv  = similar(A, BlasInt, n)
             if n == 0
                 return A, ipiv, zero(BlasInt)
             end
@@ -4062,6 +4221,12 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
             return A, ipiv, info[]
         end
 
+        function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+            n = checksquare(A)
+            ipiv = similar(A, BlasInt, n)
+            sytrf!(uplo, A, ipiv)
+        end
+
         #       SUBROUTINE DSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
         # *     .. Scalar Arguments ..
         #       CHARACTER          UPLO
@@ -4380,11 +4545,10 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
         # *     .. Array Arguments ..
         #       INTEGER            IPIV( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function hetrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+        function hetrf!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
             chkstride1(A)
             n = checksquare(A)
             chkuplo(uplo)
-            ipiv  = similar(A, BlasInt, n)
             work  = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
@@ -4402,6 +4566,12 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
             A, ipiv, info[]
         end
 
+        function hetrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+            n = checksquare(A)
+            ipiv = similar(A, BlasInt, n)
+            hetrf!(uplo, A, ipiv)
+        end
+
 #       SUBROUTINE ZHETRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
 # *     .. Scalar Arguments ..
 #       CHARACTER          UPLO
@@ -4648,11 +4818,10 @@ for (sysv, sytrf, sytri, sytrs, elty, relty) in
         # *     .. Array Arguments ..
         #       INTEGER            IPIV( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+        function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
             chkstride1(A)
             n = checksquare(A)
             chkuplo(uplo)
-            ipiv = similar(A, BlasInt, n)
             if n == 0
                 return A, ipiv, zero(BlasInt)
             end
@@ -4673,6 +4842,12 @@ for (sysv, sytrf, sytri, sytrs, elty, relty) in
             A, ipiv, info[]
         end
 
+        function sytrf!(uplo::AbstractChar, A::AbstractMatrix{$elty})
+            n = checksquare(A)
+            ipiv = similar(A, BlasInt, n)
+            sytrf!(uplo, A, ipiv)
+        end
+
 #       SUBROUTINE ZSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
 # *     .. Scalar Arguments ..
 #       CHARACTER          UPLO
@@ -4958,6 +5133,20 @@ zero at position `info`.
 """
 sytrf!(uplo::AbstractChar, A::AbstractMatrix)
 
+"""
+    sytrf!(uplo, A, ipiv) -> (A, ipiv, info)
+
+Computes the Bunch-Kaufman factorization of a symmetric matrix `A`. If
+`uplo = U`, the upper half of `A` is stored. If `uplo = L`, the lower
+half is stored.
+
+Returns `A`, overwritten by the factorization, the pivot vector `ipiv`, and
+the error code `info` which is a non-negative integer. If `info` is positive
+the matrix is singular and the diagonal part of the factorization is exactly
+zero at position `info`.
+"""
+sytrf!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt})
+
 """
     sytri!(uplo, A, ipiv)
 
@@ -5003,6 +5192,20 @@ zero at position `info`.
 """
 hetrf!(uplo::AbstractChar, A::AbstractMatrix)
 
+"""
+    hetrf!(uplo, A, ipiv) -> (A, ipiv, info)
+
+Computes the Bunch-Kaufman factorization of a Hermitian matrix `A`. If
+`uplo = U`, the upper half of `A` is stored. If `uplo = L`, the lower
+half is stored.
+
+Returns `A`, overwritten by the factorization, the pivot vector `ipiv`, and
+the error code `info` which is a non-negative integer. If `info` is positive
+the matrix is singular and the diagonal part of the factorization is exactly
+zero at position `info`.
+"""
+hetrf!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt})
+
 """
     hetri!(uplo, A, ipiv)
 
@@ -5023,9 +5226,9 @@ solution `X`.
 hetrs!(uplo::AbstractChar, A::AbstractMatrix, ipiv::AbstractVector{BlasInt}, B::AbstractVecOrMat)
 
 # Symmetric (real) eigensolvers
-for (syev, syevr, sygvd, elty) in
-    ((:dsyev_,:dsyevr_,:dsygvd_,:Float64),
-     (:ssyev_,:ssyevr_,:ssygvd_,:Float32))
+for (syev, syevr, syevd, sygvd, elty) in
+    ((:dsyev_,:dsyevr_,:dsyevd_,:dsygvd_,:Float64),
+     (:ssyev_,:ssyevr_,:ssyevd_,:ssygvd_,:Float32))
     @eval begin
         #       SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO )
         # *     .. Scalar Arguments ..
@@ -5078,7 +5281,7 @@ for (syev, syevr, sygvd, elty) in
             end
             lda = stride(A,2)
             m = Ref{BlasInt}()
-            w = similar(A, $elty, n)
+            W = similar(A, $elty, n)
             ldz = n
             if jobz == 'N'
                 Z = similar(A, $elty, ldz, 0)
@@ -5102,7 +5305,7 @@ for (syev, syevr, sygvd, elty) in
                     jobz, range, uplo, n,
                     A, max(1,lda), vl, vu,
                     il, iu, abstol, m,
-                    w, Z, max(1,ldz), isuppz,
+                    W, Z, max(1,ldz), isuppz,
                     work, lwork, iwork, liwork,
                     info, 1, 1, 1)
                 chklapackerror(info[])
@@ -5113,11 +5316,51 @@ for (syev, syevr, sygvd, elty) in
                     resize!(iwork, liwork)
                 end
             end
-            w[1:m[]], Z[:,1:(jobz == 'V' ? m[] : 0)]
+            W[1:m[]], Z[:,1:(jobz == 'V' ? m[] : 0)]
         end
         syevr!(jobz::AbstractChar, A::AbstractMatrix{$elty}) =
             syevr!(jobz, 'A', 'U', A, 0.0, 0.0, 0, 0, -1.0)
 
+        #       SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK,
+        #      $                   IWORK, LIWORK, INFO )
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOBZ, UPLO
+        #       INTEGER            INFO, LDA, LIWORK, LWORK, N
+        # *     ..
+        # *     .. Array Arguments ..
+        #       INTEGER            IWORK( * )
+        #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
+        function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+            chkstride1(A)
+            n = checksquare(A)
+            chkuplofinite(A, uplo)
+            lda = stride(A,2)
+            m = Ref{BlasInt}()
+            W = similar(A, $elty, n)
+            work   = Vector{$elty}(undef, 1)
+            lwork  = BlasInt(-1)
+            iwork  = Vector{BlasInt}(undef, 1)
+            liwork = BlasInt(-1)
+            info   = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
+                ccall((@blasfunc($syevd), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
+                        Ptr{BlasInt}, Clong, Clong),
+                    jobz, uplo, n, A, max(1,lda),
+                    W, work, lwork, iwork, liwork,
+                    info, 1, 1)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(real(work[1]))
+                    resize!(work, lwork)
+                    liwork = iwork[1]
+                    resize!(iwork, liwork)
+                end
+            end
+            jobz == 'V' ? (W, A) : W
+        end
+
         # Generalized eigenproblem
         #           SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
         #      $                   LWORK, IWORK, LIWORK, INFO )
@@ -5166,9 +5409,9 @@ for (syev, syevr, sygvd, elty) in
     end
 end
 # Hermitian eigensolvers
-for (syev, syevr, sygvd, elty, relty) in
-    ((:zheev_,:zheevr_,:zhegvd_,:ComplexF64,:Float64),
-     (:cheev_,:cheevr_,:chegvd_,:ComplexF32,:Float32))
+for (syev, syevr, syevd, sygvd, elty, relty) in
+    ((:zheev_,:zheevr_,:zheevd_,:zhegvd_,:ComplexF64,:Float64),
+     (:cheev_,:cheevr_,:cheevd_,:chegvd_,:ComplexF32,:Float32))
     @eval begin
         # SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, INFO )
         # *     .. Scalar Arguments ..
@@ -5180,6 +5423,7 @@ for (syev, syevr, sygvd, elty, relty) in
         #       COMPLEX*16         A( LDA, * ), WORK( * )
         function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
             chkstride1(A)
+            chkuplofinite(A, uplo)
             n = checksquare(A)
             W     = similar(A, $relty, n)
             work  = Vector{$elty}(undef, 1)
@@ -5218,6 +5462,7 @@ for (syev, syevr, sygvd, elty, relty) in
         function syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty},
                         vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
             chkstride1(A)
+            chkuplofinite(A, uplo)
             n = checksquare(A)
             if range == 'I' && !(1 <= il <= iu <= n)
                 throw(ArgumentError("illegal choice of eigenvalue indices (il = $il, iu=$iu), which must be between 1 and n = $n"))
@@ -5227,7 +5472,7 @@ for (syev, syevr, sygvd, elty, relty) in
             end
             lda = max(1,stride(A,2))
             m = Ref{BlasInt}()
-            w = similar(A, $relty, n)
+            W = similar(A, $relty, n)
             if jobz == 'N'
                 ldz = 1
                 Z = similar(A, $elty, ldz, 0)
@@ -5255,7 +5500,7 @@ for (syev, syevr, sygvd, elty, relty) in
                       jobz, range, uplo, n,
                       A, lda, vl, vu,
                       il, iu, abstol, m,
-                      w, Z, ldz, isuppz,
+                      W, Z, ldz, isuppz,
                       work, lwork, rwork, lrwork,
                       iwork, liwork, info,
                       1, 1, 1)
@@ -5269,11 +5514,56 @@ for (syev, syevr, sygvd, elty, relty) in
                     resize!(iwork, liwork)
                 end
             end
-            w[1:m[]], Z[:,1:(jobz == 'V' ? m[] : 0)]
+            W[1:m[]], Z[:,1:(jobz == 'V' ? m[] : 0)]
         end
         syevr!(jobz::AbstractChar, A::AbstractMatrix{$elty}) =
             syevr!(jobz, 'A', 'U', A, 0.0, 0.0, 0, 0, -1.0)
 
+        #       SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,
+        #      $                   LRWORK, IWORK, LIWORK, INFO )
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOBZ, UPLO
+        #       INTEGER            INFO, LDA, LIWORK, LRWORK, LWORK, N
+        # *     ..
+        # *     .. Array Arguments ..
+        #       INTEGER            IWORK( * )
+        #       DOUBLE PRECISION   RWORK( * )
+        #       COMPLEX*16         A( LDA, * ), WORK( * )
+        function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+            chkstride1(A)
+            chkuplofinite(A, uplo)
+            n = checksquare(A)
+            lda = max(1, stride(A,2))
+            m = Ref{BlasInt}()
+            W = similar(A, $relty, n)
+            work   = Vector{$elty}(undef, 1)
+            lwork  = BlasInt(-1)
+            rwork  = Vector{$relty}(undef, 1)
+            lrwork = BlasInt(-1)
+            iwork  = Vector{BlasInt}(undef, 1)
+            liwork = BlasInt(-1)
+            info   = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1], lrwork as rwork[1] and liwork as iwork[1]
+                ccall((@blasfunc($syevd), liblapack), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                    Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ref{BlasInt},
+                    Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
+                    jobz, uplo, n, A, stride(A,2),
+                    W, work, lwork, rwork, lrwork,
+                    iwork, liwork, info, 1, 1)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(real(work[1]))
+                    resize!(work, lwork)
+                    lrwork = BlasInt(rwork[1])
+                    resize!(rwork, lrwork)
+                    liwork = iwork[1]
+                    resize!(iwork, liwork)
+                end
+            end
+            jobz == 'V' ? (W, A) : W
+        end
+
         #       SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
         #      $                   LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO )
         # *     .. Scalar Arguments ..
@@ -5286,6 +5576,8 @@ for (syev, syevr, sygvd, elty, relty) in
         #       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
         function sygvd!(itype::Integer, jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
             chkstride1(A, B)
+            chkuplofinite(A, uplo)
+            chkuplofinite(B, uplo)
             n, m = checksquare(A, B)
             if n != m
                 throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
@@ -5353,6 +5645,20 @@ The eigenvalues are returned in `W` and the eigenvectors in `Z`.
 syevr!(jobz::AbstractChar, range::AbstractChar, uplo::AbstractChar, A::AbstractMatrix,
        vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
 
+"""
+    syevd!(jobz, uplo, A)
+
+Finds the eigenvalues (`jobz = N`) or eigenvalues and eigenvectors
+(`jobz = V`) of a symmetric matrix `A`. If `uplo = U`, the upper triangle
+of `A` is used. If `uplo = L`, the lower triangle of `A` is used.
+
+Use the divide-and-conquer method, instead of the QR iteration used by
+`syev!` or multiple relatively robust representations used by `syevr!`.
+See James W. Demmel et al, SIAM J. Sci. Comput. 30, 3, 1508 (2008) for
+a comparison of the accuracy and performatce of different methods.
+"""
+syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix)
+
 """
     sygvd!(itype, jobz, uplo, A, B) -> (w, A, B)
 
@@ -5455,7 +5761,7 @@ for (bdsdc, elty) in
             elseif compq == 'P'
                 @warn "COMPQ='P' is not tested"
                 #TODO turn this into an actual LAPACK call
-                #smlsiz=ilaenv(9, $elty==:Float64 ? 'dbdsqr' : 'sbdsqr', string(uplo, compq), n,n,n,n)
+                #smlsiz=ilaenv(9, $elty === :Float64 ? 'dbdsqr' : 'sbdsqr', string(uplo, compq), n,n,n,n)
                 smlsiz=100 #For now, completely overkill
                 ldq = n*(11+2*smlsiz+8*round(Int,log((n/(smlsiz+1)))/log(2)))
                 ldiq = n*(3+3*round(Int,log(n/(smlsiz+1))/log(2)))
@@ -5730,6 +6036,104 @@ for (ormhr, elty) in
     end
 end
 
+for (hseqr, elty) in
+    ((:zhseqr_,:ComplexF64),
+     (:chseqr_,:ComplexF32))
+    @eval begin
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOB, COMPZ
+        #       INTEGER            N, ILO, IHI, LWORK, LDH, LDZ, INFO
+        # *     ..
+        # *     .. Array Arguments ..
+        #       COMPLEX*16         H( LDH, * ), Z( LDZ, * ), WORK( * )
+        function hseqr!(job::AbstractChar, compz::AbstractChar, ilo::Integer, ihi::Integer,
+                        H::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
+            require_one_based_indexing(H, Z)
+            chkstride1(H)
+            n = checksquare(H)
+            checksquare(Z) == n || throw(DimensionMismatch())
+            ldh = max(1, stride(H, 2))
+            ldz = max(1, stride(Z, 2))
+            w = similar(H, $elty, n)
+            work = Vector{$elty}(undef, 1)
+            lwork = BlasInt(-1)
+            info = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1]
+                ccall((@blasfunc($hseqr), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
+                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                    Ptr{BlasInt}),
+                    job, compz, n, ilo, ihi,
+                    H, ldh, w, Z, ldz, work,
+                    lwork, info)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(real(work[1]))
+                    resize!(work, lwork)
+                end
+            end
+            H, Z, w
+        end
+    end
+end
+
+for (hseqr, elty) in
+    ((:dhseqr_,:Float64),
+     (:shseqr_,:Float32))
+    @eval begin
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOB, COMPZ
+        #       INTEGER            N, ILO, IHI, LWORK, LDH, LDZ, INFO
+        # *     ..
+        # *     .. Array Arguments ..
+        #       COMPLEX*16         H( LDH, * ), Z( LDZ, * ), WORK( * )
+        function hseqr!(job::AbstractChar, compz::AbstractChar, ilo::Integer, ihi::Integer,
+                        H::AbstractMatrix{$elty}, Z::AbstractMatrix{$elty})
+            require_one_based_indexing(H, Z)
+            chkstride1(H)
+            n = checksquare(H)
+            checksquare(Z) == n || throw(DimensionMismatch())
+            ldh = max(1, stride(H, 2))
+            ldz = max(1, stride(Z, 2))
+            wr = similar(H, $elty, n)
+            wi = similar(H, $elty, n)
+            work = Vector{$elty}(undef, 1)
+            lwork = BlasInt(-1)
+            info = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1]
+                ccall((@blasfunc($hseqr), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
+                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
+                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                    Ptr{BlasInt}),
+                    job, compz, n, ilo, ihi,
+                    H, ldh, wr, wi, Z, ldz, work,
+                    lwork, info)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(real(work[1]))
+                    resize!(work, lwork)
+                end
+            end
+            H, Z, complex.(wr, wi)
+        end
+    end
+end
+hseqr!(H::StridedMatrix{T}, Z::StridedMatrix{T}) where {T<:BlasFloat} = hseqr!('S', 'V', 1, size(H, 1), H, Z)
+hseqr!(H::StridedMatrix{T}) where {T<:BlasFloat} = hseqr!('S', 'I', 1, size(H, 1), H, similar(H))
+
+"""
+    hseqr!(job, compz, ilo, ihi, H, Z) -> (H, Z, w)
+
+Computes all eigenvalues and (optionally) the Schur factorization of a matrix
+reduced to Hessenberg form. If `H` is balanced with `gebal!`
+then `ilo` and `ihi` are the outputs of `gebal!`. Otherwise they should be
+`ilo = 1` and `ihi = size(H,2)`. `tau` contains the elementary reflectors of
+the factorization.
+"""
+hseqr!(job::AbstractChar, compz::AbstractChar, ilo::Integer, ihi::Integer, H::AbstractMatrix, Z::AbstractMatrix)
+
 for (hetrd, elty) in
     ((:dsytrd_,Float64),
      (:ssytrd_,Float32),
@@ -5884,9 +6288,9 @@ for (ormtr, elty) in
     end
 end
 
-for (gees, gges, elty) in
-    ((:dgees_,:dgges_,:Float64),
-     (:sgees_,:sgges_,:Float32))
+for (gees, gges, gges3, elty) in
+    ((:dgees_,:dgges_,:dgges3_,:Float64),
+     (:sgees_,:sgges_,:sgges3_,:Float32))
     @eval begin
         #     .. Scalar Arguments ..
         #     CHARACTER          JOBVS, SORT
@@ -5913,7 +6317,7 @@ for (gees, gges, elty) in
                     (Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid}, Ref{BlasInt},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
                         Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{Cvoid}, Ptr{BlasInt}, Clong, Clong),
+                        Ref{BlasInt}, Ptr{Cvoid}, Ref{BlasInt}, Clong, Clong),
                     jobvs, 'N', C_NULL, n,
                         A, max(1, stride(A, 2)), sdim, wr,
                         wi, vs, ldvs, work,
@@ -5960,7 +6364,56 @@ for (gees, gges, elty) in
                         Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
                         Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                         Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{Cvoid},
-                        Ptr{BlasInt}, Clong, Clong, Clong),
+                        Ref{BlasInt}, Clong, Clong, Clong),
+                    jobvsl, jobvsr, 'N', C_NULL,
+                    n, A, max(1,stride(A, 2)), B,
+                    max(1,stride(B, 2)), sdim, alphar, alphai,
+                    beta, vsl, ldvsl, vsr,
+                    ldvsr, work, lwork, C_NULL,
+                    info, 1, 1, 1)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(real(work[1]))
+                    resize!(work, lwork)
+                end
+            end
+            A, B, complex.(alphar, alphai), beta, vsl[1:(jobvsl == 'V' ? n : 0),:], vsr[1:(jobvsr == 'V' ? n : 0),:]
+        end
+
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOBVSL, JOBVSR, SORT
+        #       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
+        # *     ..
+        # *     .. Array Arguments ..
+        #       LOGICAL            BWORK( * )
+        #       DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+        #      $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ),
+        #      $                   VSR( LDVSR, * ), WORK( * )
+        function gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            chkstride1(A, B)
+            n, m = checksquare(A, B)
+            if n != m
+                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
+            end
+            sdim = BlasInt(0)
+            alphar = similar(A, $elty, n)
+            alphai = similar(A, $elty, n)
+            beta = similar(A, $elty, n)
+            ldvsl = jobvsl == 'V' ? max(1, n) : 1
+            vsl = similar(A, $elty, ldvsl, n)
+            ldvsr = jobvsr == 'V' ? max(1, n) : 1
+            vsr = similar(A, $elty, ldvsr, n)
+            work = Vector{$elty}(undef, 1)
+            lwork = BlasInt(-1)
+            info = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1]
+                ccall((@blasfunc($gges3), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
+                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                        Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
+                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{Cvoid},
+                        Ref{BlasInt}, Clong, Clong, Clong),
                     jobvsl, jobvsr, 'N', C_NULL,
                     n, A, max(1,stride(A, 2)), B,
                     max(1,stride(B, 2)), sdim, alphar, alphai,
@@ -5978,9 +6431,9 @@ for (gees, gges, elty) in
     end
 end
 
-for (gees, gges, elty, relty) in
-    ((:zgees_,:zgges_,:ComplexF64,:Float64),
-     (:cgees_,:cgges_,:ComplexF32,:Float32))
+for (gees, gges, gges3, elty, relty) in
+    ((:zgees_,:zgges_,:zgges3_,:ComplexF64,:Float64),
+     (:cgees_,:cgges_,:cgges3_,:ComplexF32,:Float32))
     @eval begin
         # *     .. Scalar Arguments ..
         #       CHARACTER          JOBVS, SORT
@@ -6008,7 +6461,7 @@ for (gees, gges, elty, relty) in
                     (Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid}, Ref{BlasInt},
                         Ptr{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ptr{$relty}, Ptr{Cvoid}, Ptr{BlasInt}, Clong, Clong),
+                        Ptr{$relty}, Ptr{Cvoid}, Ref{BlasInt}, Clong, Clong),
                     jobvs, sort, C_NULL, n,
                         A, max(1, stride(A, 2)), sdim, w,
                         vs, ldvs, work, lwork,
@@ -6056,7 +6509,57 @@ for (gees, gges, elty, relty) in
                         Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{Cvoid},
-                        Ptr{BlasInt}, Clong, Clong, Clong),
+                        Ref{BlasInt}, Clong, Clong, Clong),
+                    jobvsl, jobvsr, 'N', C_NULL,
+                    n, A, max(1, stride(A, 2)), B,
+                    max(1, stride(B, 2)), sdim, alpha, beta,
+                    vsl, ldvsl, vsr, ldvsr,
+                    work, lwork, rwork, C_NULL,
+                    info, 1, 1, 1)
+                chklapackerror(info[])
+                if i == 1
+                    lwork = BlasInt(real(work[1]))
+                    resize!(work, lwork)
+                end
+            end
+            A, B, alpha, beta, vsl[1:(jobvsl == 'V' ? n : 0),:], vsr[1:(jobvsr == 'V' ? n : 0),:]
+        end
+
+        # *     .. Scalar Arguments ..
+        #       CHARACTER          JOBVSL, JOBVSR, SORT
+        #       INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
+        # *     ..
+        # *     .. Array Arguments ..
+        #       LOGICAL            BWORK( * )
+        #       DOUBLE PRECISION   RWORK( * )
+        #       COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
+        #      $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
+        #      $                   WORK( * )
+        function gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            chkstride1(A, B)
+            n, m = checksquare(A, B)
+            if n != m
+                throw(DimensionMismatch("dimensions of A, ($n,$n), and B, ($m,$m), must match"))
+            end
+            sdim = BlasInt(0)
+            alpha = similar(A, $elty, n)
+            beta = similar(A, $elty, n)
+            ldvsl = jobvsl == 'V' ? max(1, n) : 1
+            vsl = similar(A, $elty, ldvsl, n)
+            ldvsr = jobvsr == 'V' ? max(1, n) : 1
+            vsr = similar(A, $elty, ldvsr, n)
+            work = Vector{$elty}(undef, 1)
+            lwork = BlasInt(-1)
+            rwork = Vector{$relty}(undef, 8n)
+            info = Ref{BlasInt}()
+            for i = 1:2  # first call returns lwork as work[1]
+                ccall((@blasfunc($gges3), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
+                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
+                        Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
+                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                        Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{Cvoid},
+                        Ref{BlasInt}, Clong, Clong, Clong),
                     jobvsl, jobvsr, 'N', C_NULL,
                     n, A, max(1, stride(A, 2)), B,
                     max(1, stride(B, 2)), sdim, alpha, beta,
@@ -6098,6 +6601,18 @@ vectors are returned in `vsl` and the right Schur vectors are returned in `vsr`.
 """
 gges!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
 
+"""
+    gges3!(jobvsl, jobvsr, A, B) -> (A, B, alpha, beta, vsl, vsr)
+
+Computes the generalized eigenvalues, generalized Schur form, left Schur
+vectors (`jobsvl = V`), or right Schur vectors (`jobvsr = V`) of `A` and
+`B` using a blocked algorithm. This function requires LAPACK 3.6.0.
+
+The generalized eigenvalues are returned in `alpha` and `beta`. The left Schur
+vectors are returned in `vsl` and the right Schur vectors are returned in `vsr`.
+"""
+gges3!(jobvsl::AbstractChar, jobvsr::AbstractChar, A::AbstractMatrix, B::AbstractMatrix)
+
 for (trexc, trsen, tgsen, elty) in
     ((:dtrexc_, :dtrsen_, :dtgsen_, :Float64),
      (:strexc_, :strsen_, :stgsen_, :Float32))
@@ -6479,4 +6994,57 @@ Returns `X` (overwriting `C`) and `scale`.
 """
 trsyl!(transa::AbstractChar, transb::AbstractChar, A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, isgn::Int=1)
 
+for (fn, elty) in ((:dlacpy_, :Float64),
+                   (:slacpy_, :Float32),
+                   (:zlacpy_, :ComplexF64),
+                   (:clacpy_, :ComplexF32))
+    @eval begin
+        # SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
+        #     .. Scalar Arguments ..
+        #      CHARACTER          UPLO
+        #      INTEGER            LDA, LDB, M, N
+        #     ..
+        #     .. Array Arguments ..
+        #     DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+        #     ..
+        function lacpy!(B::AbstractMatrix{$elty}, A::AbstractMatrix{$elty}, uplo::AbstractChar)
+            require_one_based_indexing(A, B)
+            chkstride1(A, B)
+            m,n = size(A)
+            m1,n1 = size(B)
+            (m1 < m || n1 < n) && throw(DimensionMismatch("B of size ($m1,$n1) should have at least the same number of rows and columns than A of size ($m,$n)"))
+            lda = max(1, stride(A, 2))
+            ldb = max(1, stride(B, 2))
+            ccall((@blasfunc($fn), libblastrampoline), Cvoid,
+                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
+                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Clong),
+                  uplo, m, n, A, lda, B, ldb, 1)
+            B
+        end
+    end
+end
+
+"""
+    lacpy!(B, A, uplo) -> B
+
+Copies all or part of a matrix `A` to another matrix `B`.
+uplo specifies the part of the matrix `A` to be copied to `B`.
+Set `uplo = 'L'` for the lower triangular part, `uplo = 'U'`
+for the upper triangular part, any other character for all
+the matrix `A`.
+
+# Examples
+```jldoctest
+julia> A = [1. 2. ; 3. 4.];
+
+julia> B = [0. 0. ; 0. 0.];
+
+julia> LAPACK.lacpy!(B, A, 'U')
+2×2 Matrix{Float64}:
+ 1.0  2.0
+ 0.0  4.0
+```
+"""
+lacpy!(B::AbstractMatrix, A::AbstractMatrix, uplo::AbstractChar)
+
 end # module
diff --git a/stdlib/LinearAlgebra/src/lbt.jl b/stdlib/LinearAlgebra/src/lbt.jl
index 7648157a01a7d..b133741611adc 100644
--- a/stdlib/LinearAlgebra/src/lbt.jl
+++ b/stdlib/LinearAlgebra/src/lbt.jl
@@ -83,11 +83,17 @@ struct lbt_config_t
     exported_symbols::Ptr{Cstring}
     num_exported_symbols::UInt32
 end
-const LBT_BUILDFLAGS_DEEPBINDLESS = 0x01
-const LBT_BUILDFLAGS_F2C_CAPABLE  = 0x02
+const LBT_BUILDFLAGS_DEEPBINDLESS     = 0x01
+const LBT_BUILDFLAGS_F2C_CAPABLE      = 0x02
+const LBT_BUILDFLAGS_CBLAS_DIVERGENCE = 0x04
+const LBT_BUILDFLAGS_COMPLEX_RETSTYLE = 0x08
+const LBT_BUILDFLAGS_SYMBOL_TRIMMING  = 0x10
 const LBT_BUILDFLAGS_MAP = Dict(
     LBT_BUILDFLAGS_DEEPBINDLESS => :deepbindless,
     LBT_BUILDFLAGS_F2C_CAPABLE => :f2c_capable,
+    LBT_BUILDFLAGS_CBLAS_DIVERGENCE => :cblas_divergence,
+    LBT_BUILDFLAGS_COMPLEX_RETSTYLE => :complex_retstyle,
+    LBT_BUILDFLAGS_SYMBOL_TRIMMING  => :symbol_trimming,
 )
 
 struct LBTConfig
@@ -159,9 +165,9 @@ function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, lbt::LBTConfig)
     println(io, "Libraries: ")
     for (i,l) in enumerate(lbt.loaded_libs)
         char = i == length(lbt.loaded_libs) ? "└" : "├"
-        interface_str = if l.interface == :ilp64
+        interface_str = if l.interface === :ilp64
             "ILP64"
-        elseif l.interface == :lp64
+        elseif l.interface === :lp64
             " LP64"
         else
             "UNKWN"
@@ -207,9 +213,10 @@ function lbt_set_num_threads(nthreads)
     return ccall((:lbt_set_num_threads, libblastrampoline), Cvoid, (Int32,), nthreads)
 end
 
-function lbt_forward(path; clear::Bool = false, verbose::Bool = false, suffix_hint::Union{String,Nothing} = nothing)
+function lbt_forward(path::AbstractString; clear::Bool = false, verbose::Bool = false, suffix_hint::Union{String,Nothing} = nothing)
     _clear_config_with() do
-        return ccall((:lbt_forward, libblastrampoline), Int32, (Cstring, Int32, Int32, Cstring), path, clear ? 1 : 0, verbose ? 1 : 0, something(suffix_hint, C_NULL))
+        return ccall((:lbt_forward, libblastrampoline), Int32, (Cstring, Int32, Int32, Cstring),
+                     path, clear ? 1 : 0, verbose ? 1 : 0, something(suffix_hint, C_NULL))
     end
 end
 
@@ -240,7 +247,7 @@ If the given `symbol_name` is not contained within the list of exported symbols,
 function lbt_find_backing_library(symbol_name, interface::Symbol;
                                   config::LBTConfig = lbt_get_config())
     if interface ∉ (:ilp64, :lp64)
-        throw(Argument("Invalid interface specification: '$(interface)'"))
+        throw(ArgumentError("Invalid interface specification: '$(interface)'"))
     end
     symbol_idx = findfirst(s -> s == symbol_name, config.exported_symbols)
     if symbol_idx === nothing
diff --git a/stdlib/LinearAlgebra/src/ldlt.jl b/stdlib/LinearAlgebra/src/ldlt.jl
index 8c6bfee435186..d3d6234961c44 100644
--- a/stdlib/LinearAlgebra/src/ldlt.jl
+++ b/stdlib/LinearAlgebra/src/ldlt.jl
@@ -62,7 +62,7 @@ LDLt{T}(F::LDLt) where {T} = LDLt(convert(AbstractMatrix{T}, F.data)::AbstractMa
 Factorization{T}(F::LDLt{T}) where {T} = F
 Factorization{T}(F::LDLt) where {T} = LDLt{T}(F)
 
-function getproperty(F::LDLt, d::Symbol)
+function getproperty(F::LDLt{<:Any, <:SymTridiagonal}, d::Symbol)
     Fdata = getfield(F, :data)
     if d === :d
         return Fdata.dv
@@ -211,7 +211,7 @@ function logabsdet(F::LDLt{<:Any,<:SymTridiagonal})
 end
 
 # Conversion methods
-function SymTridiagonal(F::LDLt)
+function SymTridiagonal(F::LDLt{<:Any, <:SymTridiagonal})
     e = copy(F.data.ev)
     d = copy(F.data.dv)
     e .*= d[1:end-1]
diff --git a/stdlib/LinearAlgebra/src/lq.jl b/stdlib/LinearAlgebra/src/lq.jl
index 80933cf3c6f46..07d918c4374a5 100644
--- a/stdlib/LinearAlgebra/src/lq.jl
+++ b/stdlib/LinearAlgebra/src/lq.jl
@@ -27,10 +27,7 @@ L factor:
 2×2 Matrix{Float64}:
  -8.60233   0.0
   4.41741  -0.697486
-Q factor:
-2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}:
- -0.581238  -0.813733
- -0.813733   0.581238
+Q factor: 2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}
 
 julia> S.L * S.Q
 2×2 Matrix{Float64}:
@@ -58,19 +55,13 @@ LQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
     LQ(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
 # backwards-compatible constructors (remove with Julia 2.0)
 @deprecate(LQ{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
-           LQ{T,S,typeof(τ)}(factors, τ))
+           LQ{T,S,typeof(τ)}(factors, τ), false)
 
 # iteration for destructuring into components
 Base.iterate(S::LQ) = (S.L, Val(:Q))
 Base.iterate(S::LQ, ::Val{:Q}) = (S.Q, Val(:done))
 Base.iterate(S::LQ, ::Val{:done}) = nothing
 
-struct LQPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractMatrix{T}
-    factors::S
-    τ::C
-end
-
-
 """
     lq!(A) -> LQ
 
@@ -78,6 +69,7 @@ Compute the [`LQ`](@ref) factorization of `A`, using the input
 matrix as a workspace. See also [`lq`](@ref).
 """
 lq!(A::StridedMatrix{<:BlasFloat}) = LQ(LAPACK.gelqf!(A)...)
+
 """
     lq(A) -> S::LQ
 
@@ -104,10 +96,7 @@ L factor:
 2×2 Matrix{Float64}:
  -8.60233   0.0
   4.41741  -0.697486
-Q factor:
-2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}:
- -0.581238  -0.813733
- -0.813733   0.581238
+Q factor: 2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}
 
 julia> S.L * S.Q
 2×2 Matrix{Float64}:
@@ -120,7 +109,7 @@ julia> l == S.L &&  q == S.Q
 true
 ```
 """
-lq(A::AbstractMatrix{T}) where {T}  = lq!(copymutable_oftype(A, lq_eltype(T)))
+lq(A::AbstractMatrix{T}) where {T} = lq!(copy_similar(A, lq_eltype(T)))
 lq(x::Number) = lq!(fill(convert(lq_eltype(typeof(x)), x), 1, 1))
 
 lq_eltype(::Type{T}) where {T} = typeof(zero(T) / sqrt(abs2(one(T))))
@@ -135,8 +124,11 @@ AbstractArray(A::LQ) = AbstractMatrix(A)
 Matrix(A::LQ) = Array(AbstractArray(A))
 Array(A::LQ) = Matrix(A)
 
-adjoint(A::LQ) = Adjoint(A)
-Base.copy(F::Adjoint{T,<:LQ{T}}) where {T} =
+transpose(F::LQ{<:Real}) = F'
+transpose(::LQ) =
+    throw(ArgumentError("transpose of LQ decomposition is not supported, consider using adjoint"))
+
+Base.copy(F::AdjointFactorization{T,<:LQ{T}}) where {T} =
     QR{T,typeof(F.parent.factors),typeof(F.parent.τ)}(copy(adjoint(F.parent.factors)), copy(F.parent.τ))
 
 function getproperty(F::LQ, d::Symbol)
@@ -153,173 +145,29 @@ end
 Base.propertynames(F::LQ, private::Bool=false) =
     (:L, :Q, (private ? fieldnames(typeof(F)) : ())...)
 
-getindex(A::LQPackedQ, i::Integer, j::Integer) =
-    lmul!(A, setindex!(zeros(eltype(A), size(A, 2)), 1, j))[i]
+# getindex(A::LQPackedQ, i::Integer, j::Integer) =
+#     lmul!(A, setindex!(zeros(eltype(A), size(A, 2)), 1, j))[i]
 
 function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LQ)
     summary(io, F); println(io)
     println(io, "L factor:")
     show(io, mime, F.L)
-    println(io, "\nQ factor:")
+    print(io, "\nQ factor: ")
     show(io, mime, F.Q)
 end
 
-LQPackedQ{T}(Q::LQPackedQ) where {T} = LQPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(Vector{T}, Q.τ))
-AbstractMatrix{T}(Q::LQPackedQ) where {T} = LQPackedQ{T}(Q)
-Matrix{T}(A::LQPackedQ) where {T} = convert(Matrix{T}, LAPACK.orglq!(copy(A.factors),A.τ))
-Matrix(A::LQPackedQ{T}) where {T} = Matrix{T}(A)
-Array{T}(A::LQPackedQ{T}) where {T} = Matrix{T}(A)
-Array(A::LQPackedQ) = Matrix(A)
-
 size(F::LQ, dim::Integer) = size(getfield(F, :factors), dim)
 size(F::LQ)               = size(getfield(F, :factors))
 
-# size(Q::LQPackedQ) yields the shape of Q's square form
-function size(Q::LQPackedQ)
-    n = size(Q.factors, 2)
-    return n, n
-end
-function size(Q::LQPackedQ, dim::Integer)
-    if dim < 1
-        throw(BoundsError())
-    elseif dim <= 2 # && 1 <= dim
-        return size(Q.factors, 2)
-    else # 2 < dim
-        return 1
-    end
-end
-
-
 ## Multiplication by LQ
-function lmul!(A::LQ, B::StridedVecOrMat)
+function lmul!(A::LQ, B::AbstractVecOrMat)
     lmul!(LowerTriangular(A.L), view(lmul!(A.Q, B), 1:size(A,1), axes(B,2)))
     return B
 end
-function *(A::LQ{TA}, B::StridedVecOrMat{TB}) where {TA,TB}
+function *(A::LQ{TA}, B::AbstractVecOrMat{TB}) where {TA,TB}
     TAB = promote_type(TA, TB)
-    _cut_B(lmul!(convert(Factorization{TAB}, A), copymutable_oftype(B, TAB)), 1:size(A,1))
-end
-
-## Multiplication by Q
-### QB
-lmul!(A::LQPackedQ{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormlq!('L','N',A.factors,A.τ,B)
-function (*)(A::LQPackedQ, B::StridedVecOrMat)
-    TAB = promote_type(eltype(A), eltype(B))
-    lmul!(AbstractMatrix{TAB}(A), copymutable_oftype(B, TAB))
-end
-
-### QcB
-lmul!(adjA::Adjoint{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-    (A = adjA.parent; LAPACK.ormlq!('L', 'T', A.factors, A.τ, B))
-lmul!(adjA::Adjoint{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-    (A = adjA.parent; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B))
-
-function *(adjA::Adjoint{<:Any,<:LQPackedQ}, B::StridedVecOrMat)
-    A = adjA.parent
-    TAB = promote_type(eltype(A), eltype(B))
-    if size(B,1) == size(A.factors,2)
-        lmul!(adjoint(AbstractMatrix{TAB}(A)), copymutable_oftype(B, TAB))
-    elseif size(B,1) == size(A.factors,1)
-        lmul!(adjoint(AbstractMatrix{TAB}(A)), [B; zeros(TAB, size(A.factors, 2) - size(A.factors, 1), size(B, 2))])
-    else
-        throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of A, $(size(A))"))
-    end
-end
-
-### QBc/QcBc
-function *(A::LQPackedQ, adjB::Adjoint{<:Any,<:StridedVecOrMat})
-    B = adjB.parent
-    TAB = promote_type(eltype(A), eltype(B))
-    BB = similar(B, TAB, (size(B, 2), size(B, 1)))
-    adjoint!(BB, B)
-    return lmul!(A, BB)
-end
-function *(adjA::Adjoint{<:Any,<:LQPackedQ}, adjB::Adjoint{<:Any,<:StridedVecOrMat})
-    B = adjB.parent
-    TAB = promote_type(eltype(adjA.parent), eltype(B))
-    BB = similar(B, TAB, (size(B, 2), size(B, 1)))
-    adjoint!(BB, B)
-    return lmul!(adjA, BB)
-end
-
-# in-place right-application of LQPackedQs
-# these methods require that the applied-to matrix's (A's) number of columns
-# match the number of columns (nQ) of the LQPackedQ (Q) (necessary for in-place
-# operation, and the underlying LAPACK routine (ormlq) treats the implicit Q
-# as its (nQ-by-nQ) square form)
-rmul!(A::StridedMatrix{T}, B::LQPackedQ{T}) where {T<:BlasFloat} =
-    LAPACK.ormlq!('R', 'N', B.factors, B.τ, A)
-rmul!(A::StridedMatrix{T}, adjB::Adjoint{<:Any,<:LQPackedQ{T}}) where {T<:BlasReal} =
-    (B = adjB.parent; LAPACK.ormlq!('R', 'T', B.factors, B.τ, A))
-rmul!(A::StridedMatrix{T}, adjB::Adjoint{<:Any,<:LQPackedQ{T}}) where {T<:BlasComplex} =
-    (B = adjB.parent; LAPACK.ormlq!('R', 'C', B.factors, B.τ, A))
-
-# out-of-place right application of LQPackedQs
-#
-# LQPackedQ's out-of-place multiplication behavior is context dependent. specifically,
-# if the inner dimension in the multiplication is the LQPackedQ's second dimension,
-# the LQPackedQ behaves like its square form. if the inner dimension in the
-# multiplication is the LQPackedQ's first dimension, the LQPackedQ behaves like either
-# its square form or its truncated form depending on the shape of the other object
-# involved in the multiplication. we treat these cases separately.
-#
-# (1) the inner dimension in the multiplication is the LQPackedQ's second dimension.
-# in this case, the LQPackedQ behaves like its square form.
-#
-function *(A::StridedVecOrMat, adjQ::Adjoint{<:Any,<:LQPackedQ})
-    Q = adjQ.parent
-    TR = promote_type(eltype(A), eltype(Q))
-    return rmul!(copymutable_oftype(A, TR), adjoint(AbstractMatrix{TR}(Q)))
+    _cut_B(lmul!(convert(Factorization{TAB}, A), copy_similar(B, TAB)), 1:size(A,1))
 end
-function *(adjA::Adjoint{<:Any,<:StridedMatrix}, adjQ::Adjoint{<:Any,<:LQPackedQ})
-    A, Q = adjA.parent, adjQ.parent
-    TR = promote_type(eltype(A), eltype(Q))
-    C = adjoint!(similar(A, TR, reverse(size(A))), A)
-    return rmul!(C, adjoint(AbstractMatrix{TR}(Q)))
-end
-#
-# (2) the inner dimension in the multiplication is the LQPackedQ's first dimension.
-# in this case, the LQPackedQ behaves like either its square form or its
-# truncated form depending on the shape of the other object in the multiplication.
-#
-# these methods: (1) check whether the applied-to matrix's (A's) appropriate dimension
-# (columns for A_*, rows for Ac_*) matches the number of columns (nQ) of the LQPackedQ (Q),
-# and if so effectively apply Q's square form to A without additional shenanigans; and
-# (2) if the preceding dimensions do not match, check whether the appropriate dimension of
-# A instead matches the number of rows of the matrix of which Q is a factor (i.e.
-# size(Q.factors, 1)), and if so implicitly apply Q's truncated form to A by zero extending
-# A as necessary for check (1) to pass (if possible) and then applying Q's square form
-#
-function *(A::StridedVecOrMat, Q::LQPackedQ)
-    TR = promote_type(eltype(A), eltype(Q))
-    if size(A, 2) == size(Q.factors, 2)
-        C = copymutable_oftype(A, TR)
-    elseif size(A, 2) == size(Q.factors, 1)
-        C = zeros(TR, size(A, 1), size(Q.factors, 2))
-        copyto!(C, 1, A, 1, length(A))
-    else
-        _rightappdimmismatch("columns")
-    end
-    return rmul!(C, AbstractMatrix{TR}(Q))
-end
-function *(adjA::Adjoint{<:Any,<:StridedMatrix}, Q::LQPackedQ)
-    A = adjA.parent
-    TR = promote_type(eltype(A), eltype(Q))
-    if size(A, 1) == size(Q.factors, 2)
-        C = adjoint!(similar(A, TR, reverse(size(A))), A)
-    elseif size(A, 1) == size(Q.factors, 1)
-        C = zeros(TR, size(A, 2), size(Q.factors, 2))
-        adjoint!(view(C, :, 1:size(A, 1)), A)
-    else
-        _rightappdimmismatch("rows")
-    end
-    return rmul!(C, AbstractMatrix{TR}(Q))
-end
-_rightappdimmismatch(rowsorcols) =
-    throw(DimensionMismatch(string("the number of $(rowsorcols) of the matrix on the left ",
-        "must match either (1) the number of columns of the (LQPackedQ) matrix on the right ",
-        "or (2) the number of rows of that (LQPackedQ) matrix's internal representation ",
-        "(the factorization's originating matrix's number of rows)")))
 
 # With a real lhs and complex rhs with the same precision, we can reinterpret
 # the complex rhs as a real rhs with twice the number of columns
@@ -334,7 +182,7 @@ function (\)(F::LQ{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
 end
 
 
-function ldiv!(A::LQ, B::StridedVecOrMat)
+function ldiv!(A::LQ, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     m, n = size(A)
     m ≤ n || throw(DimensionMismatch("LQ solver does not support overdetermined systems (more rows than columns)"))
@@ -343,7 +191,7 @@ function ldiv!(A::LQ, B::StridedVecOrMat)
     return lmul!(adjoint(A.Q), B)
 end
 
-function ldiv!(Fadj::Adjoint{<:Any,<:LQ}, B::StridedVecOrMat)
+function ldiv!(Fadj::AdjointFactorization{<:Any,<:LQ}, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     m, n = size(Fadj)
     m >= n || throw(DimensionMismatch("solver does not support underdetermined systems (more columns than rows)"))
@@ -353,7 +201,3 @@ function ldiv!(Fadj::Adjoint{<:Any,<:LQ}, B::StridedVecOrMat)
     ldiv!(UpperTriangular(adjoint(F.L)), view(B, 1:size(F,1), axes(B,2)))
     return B
 end
-
-# In LQ factorization, `Q` is expressed as the product of the adjoint of the
-# reflectors.  Thus, `det` has to be conjugated.
-det(Q::LQPackedQ) = conj(_det_tau(Q.τ))
diff --git a/stdlib/LinearAlgebra/src/lu.jl b/stdlib/LinearAlgebra/src/lu.jl
index 1948f5d18001f..25e32a8ab0828 100644
--- a/stdlib/LinearAlgebra/src/lu.jl
+++ b/stdlib/LinearAlgebra/src/lu.jl
@@ -64,7 +64,7 @@ LU{T}(factors::AbstractMatrix, ipiv::AbstractVector{<:Integer}, info::Integer) w
 # backwards-compatible constructors (remove with Julia 2.0)
 @deprecate(LU{T,S}(factors::AbstractMatrix{T}, ipiv::AbstractVector{<:Integer},
                    info::BlasInt) where {T,S},
-           LU{T,S,typeof(ipiv)}(factors, ipiv, info))
+           LU{T,S,typeof(ipiv)}(factors, ipiv, info), false)
 
 # iteration for destructuring into components
 Base.iterate(S::LU) = (S.L, Val(:U))
@@ -72,21 +72,18 @@ Base.iterate(S::LU, ::Val{:U}) = (S.U, Val(:p))
 Base.iterate(S::LU, ::Val{:p}) = (S.p, Val(:done))
 Base.iterate(S::LU, ::Val{:done}) = nothing
 
-adjoint(F::LU) = Adjoint(F)
-transpose(F::LU) = Transpose(F)
+# LU prefers transpose over adjoint in the real case, override the generic fallback
+adjoint(F::LU{<:Real}) = TransposeFactorization(F)
+transpose(F::LU{<:Real}) = TransposeFactorization(F)
 
-# StridedMatrix
+# the following method is meant to catch calls to lu!(A::LAPACKArray) without a pivoting strategy
 lu!(A::StridedMatrix{<:BlasFloat}; check::Bool = true) = lu!(A, RowMaximum(); check=check)
 function lu!(A::StridedMatrix{T}, ::RowMaximum; check::Bool = true) where {T<:BlasFloat}
-    lpt = LAPACK.getrf!(A)
+    lpt = LAPACK.getrf!(A; check)
     check && checknonsingular(lpt[3])
     return LU{T,typeof(lpt[1]),typeof(lpt[2])}(lpt[1], lpt[2], lpt[3])
 end
-function lu!(A::StridedMatrix{<:BlasFloat}, pivot::NoPivot; check::Bool = true)
-    return generic_lufact!(A, pivot; check = check)
-end
-
-function lu!(A::HermOrSym, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T); check::Bool = true)
+function lu!(A::HermOrSym{T}, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T); check::Bool = true) where {T}
     copytri!(A.data, A.uplo, isa(A, Hermitian))
     lu!(A.data, pivot; check = check)
 end
@@ -132,10 +129,11 @@ Stacktrace:
 [...]
 ```
 """
-lu!(A::StridedMatrix, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(eltype(A)); check::Bool = true) =
+lu!(A::AbstractMatrix, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(eltype(A)); check::Bool = true) =
     generic_lufact!(A, pivot; check = check)
-function generic_lufact!(A::StridedMatrix{T}, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T);
+function generic_lufact!(A::AbstractMatrix{T}, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T);
                          check::Bool = true) where {T}
+    check && LAPACK.chkfinite(A)
     # Extract values
     m, n = size(A)
     minmn = min(m,n)
@@ -298,12 +296,13 @@ julia> l == F.L && u == F.U && p == F.p
 true
 ```
 """
-function lu(A::AbstractMatrix{T}, pivot::Union{RowMaximum,NoPivot,RowNonZero} = lupivottype(T); check::Bool = true) where {T}
-    lu!(_lucopy(A, lutype(T)), pivot; check = check)
-end
+lu(A::AbstractMatrix{T}, args...; kwargs...) where {T} =
+    _lu(_lucopy(A, lutype(T)), args...; kwargs...)
 # TODO: remove for Julia v2.0
 @deprecate lu(A::AbstractMatrix, ::Val{true}; check::Bool = true) lu(A, RowMaximum(); check=check)
 @deprecate lu(A::AbstractMatrix, ::Val{false}; check::Bool = true) lu(A, NoPivot(); check=check)
+# allow packages like SparseArrays.jl to interfere here and call their own `lu`
+_lu(A::AbstractMatrix, args...; kwargs...) = lu!(A, args...; kwargs...)
 
 _lucopy(A::AbstractMatrix, T) = copy_similar(A, T)
 _lucopy(A::HermOrSym, T)      = copymutable_oftype(A, T)
@@ -327,7 +326,7 @@ Factorization{T}(F::LU) where {T} = LU{T}(F)
 copy(A::LU{T,S,P}) where {T,S,P} = LU{T,S,P}(copy(A.factors), copy(A.ipiv), A.info)
 
 size(A::LU)    = size(getfield(A, :factors))
-size(A::LU, i) = size(getfield(A, :factors), i)
+size(A::LU, i::Integer) = size(getfield(A, :factors), i)
 
 function ipiv2perm(v::AbstractVector{T}, maxi::Integer) where T
     require_one_based_indexing(v)
@@ -338,7 +337,7 @@ function ipiv2perm(v::AbstractVector{T}, maxi::Integer) where T
     return p
 end
 
-function getproperty(F::LU{T,<:StridedMatrix}, d::Symbol) where T
+function getproperty(F::LU{T}, d::Symbol) where T
     m, n = size(F)
     if d === :L
         L = tril!(getfield(F, :factors)[1:m, 1:min(m,n)])
@@ -372,10 +371,10 @@ function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LU)
     end
 end
 
-_apply_ipiv_rows!(A::LU, B::StridedVecOrMat) = _ipiv_rows!(A, 1 : length(A.ipiv), B)
-_apply_inverse_ipiv_rows!(A::LU, B::StridedVecOrMat) = _ipiv_rows!(A, length(A.ipiv) : -1 : 1, B)
+_apply_ipiv_rows!(A::LU, B::AbstractVecOrMat) = _ipiv_rows!(A, 1 : length(A.ipiv), B)
+_apply_inverse_ipiv_rows!(A::LU, B::AbstractVecOrMat) = _ipiv_rows!(A, length(A.ipiv) : -1 : 1, B)
 
-function _ipiv_rows!(A::LU, order::OrdinalRange, B::StridedVecOrMat)
+function _ipiv_rows!(A::LU, order::OrdinalRange, B::AbstractVecOrMat)
     for i = order
         if i != A.ipiv[i]
             _swap_rows!(B, i, A.ipiv[i])
@@ -384,22 +383,22 @@ function _ipiv_rows!(A::LU, order::OrdinalRange, B::StridedVecOrMat)
     B
 end
 
-function _swap_rows!(B::StridedVector, i::Integer, j::Integer)
+function _swap_rows!(B::AbstractVector, i::Integer, j::Integer)
     B[i], B[j] = B[j], B[i]
     B
 end
 
-function _swap_rows!(B::StridedMatrix, i::Integer, j::Integer)
+function _swap_rows!(B::AbstractMatrix, i::Integer, j::Integer)
     for col = 1 : size(B, 2)
         B[i,col], B[j,col] = B[j,col], B[i,col]
     end
     B
 end
 
-_apply_ipiv_cols!(A::LU, B::StridedVecOrMat) = _ipiv_cols!(A, 1 : length(A.ipiv), B)
-_apply_inverse_ipiv_cols!(A::LU, B::StridedVecOrMat) = _ipiv_cols!(A, length(A.ipiv) : -1 : 1, B)
+_apply_ipiv_cols!(A::LU, B::AbstractVecOrMat) = _ipiv_cols!(A, 1 : length(A.ipiv), B)
+_apply_inverse_ipiv_cols!(A::LU, B::AbstractVecOrMat) = _ipiv_cols!(A, length(A.ipiv) : -1 : 1, B)
 
-function _ipiv_cols!(A::LU, order::OrdinalRange, B::StridedVecOrMat)
+function _ipiv_cols!(A::LU, order::OrdinalRange, B::AbstractVecOrMat)
     for i = order
         if i != A.ipiv[i]
             _swap_cols!(B, i, A.ipiv[i])
@@ -408,18 +407,18 @@ function _ipiv_cols!(A::LU, order::OrdinalRange, B::StridedVecOrMat)
     B
 end
 
-function _swap_cols!(B::StridedVector, i::Integer, j::Integer)
+function _swap_cols!(B::AbstractVector, i::Integer, j::Integer)
     _swap_rows!(B, i, j)
 end
 
-function _swap_cols!(B::StridedMatrix, i::Integer, j::Integer)
+function _swap_cols!(B::AbstractMatrix, i::Integer, j::Integer)
     for row = 1 : size(B, 1)
         B[row,i], B[row,j] = B[row,j], B[row,i]
     end
     B
 end
 
-function rdiv!(A::StridedVecOrMat, B::LU{<:Any,<:StridedMatrix})
+function rdiv!(A::AbstractVecOrMat, B::LU)
     rdiv!(rdiv!(A, UpperTriangular(B.factors)), UnitLowerTriangular(B.factors))
     _apply_inverse_ipiv_cols!(B, A)
 end
@@ -427,54 +426,34 @@ end
 ldiv!(A::LU{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
     LAPACK.getrs!('N', A.factors, A.ipiv, B)
 
-function ldiv!(A::LU{<:Any,<:StridedMatrix}, B::StridedVecOrMat)
+function ldiv!(A::LU, B::AbstractVecOrMat)
     _apply_ipiv_rows!(A, B)
     ldiv!(UpperTriangular(A.factors), ldiv!(UnitLowerTriangular(A.factors), B))
 end
 
-ldiv!(transA::Transpose{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
+ldiv!(transA::TransposeFactorization{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
     (A = transA.parent; LAPACK.getrs!('T', A.factors, A.ipiv, B))
 
-function ldiv!(transA::Transpose{<:Any,<:LU{<:Any,<:StridedMatrix}}, B::StridedVecOrMat)
+function ldiv!(transA::TransposeFactorization{<:Any,<:LU}, B::AbstractVecOrMat)
     A = transA.parent
     ldiv!(transpose(UnitLowerTriangular(A.factors)), ldiv!(transpose(UpperTriangular(A.factors)), B))
     _apply_inverse_ipiv_rows!(A, B)
 end
 
-ldiv!(adjF::Adjoint{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:Real} =
-    (F = adjF.parent; ldiv!(transpose(F), B))
-ldiv!(adjA::Adjoint{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
+ldiv!(adjA::AdjointFactorization{T,<:LU{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
     (A = adjA.parent; LAPACK.getrs!('C', A.factors, A.ipiv, B))
 
-function ldiv!(adjA::Adjoint{<:Any,<:LU{<:Any,<:StridedMatrix}}, B::StridedVecOrMat)
+function ldiv!(adjA::AdjointFactorization{<:Any,<:LU}, B::AbstractVecOrMat)
     A = adjA.parent
     ldiv!(adjoint(UnitLowerTriangular(A.factors)), ldiv!(adjoint(UpperTriangular(A.factors)), B))
     _apply_inverse_ipiv_rows!(A, B)
 end
 
-(\)(A::Adjoint{<:Any,<:LU}, B::Adjoint{<:Any,<:StridedVecOrMat}) = A \ copy(B)
-(\)(A::Transpose{<:Any,<:LU}, B::Transpose{<:Any,<:StridedVecOrMat}) = A \ copy(B)
-(\)(A::Adjoint{T,<:LU{T,<:StridedMatrix}}, B::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasComplex} =
+(\)(A::AdjointFactorization{T,<:LU{T,<:StridedMatrix}}, B::Adjoint{T,<:StridedVecOrMat{T}}) where {T<:BlasComplex} =
     LAPACK.getrs!('C', A.parent.factors, A.parent.ipiv, copy(B))
-(\)(A::Transpose{T,<:LU{T,<:StridedMatrix}}, B::Transpose{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} =
+(\)(A::TransposeFactorization{T,<:LU{T,<:StridedMatrix}}, B::Transpose{T,<:StridedVecOrMat{T}}) where {T<:BlasFloat} =
     LAPACK.getrs!('T', A.parent.factors, A.parent.ipiv, copy(B))
 
-function (/)(A::AbstractMatrix, F::Adjoint{<:Any,<:LU})
-    T = promote_type(eltype(A), eltype(F))
-    return adjoint(ldiv!(F.parent, copymutable_oftype(adjoint(A), T)))
-end
-# To avoid ambiguities with definitions in adjtrans.jl and factorizations.jl
-(/)(adjA::Adjoint{<:Any,<:AbstractVector}, F::Adjoint{<:Any,<:LU}) = adjoint(F.parent \ adjA.parent)
-(/)(adjA::Adjoint{<:Any,<:AbstractMatrix}, F::Adjoint{<:Any,<:LU}) = adjoint(F.parent \ adjA.parent)
-function (/)(trA::Transpose{<:Any,<:AbstractVector}, F::Adjoint{<:Any,<:LU})
-    T = promote_type(eltype(trA), eltype(F))
-    return adjoint(ldiv!(F.parent, conj!(copymutable_oftype(trA.parent, T))))
-end
-function (/)(trA::Transpose{<:Any,<:AbstractMatrix}, F::Adjoint{<:Any,<:LU})
-    T = promote_type(eltype(trA), eltype(F))
-    return adjoint(ldiv!(F.parent, conj!(copymutable_oftype(trA.parent, T))))
-end
-
 function det(F::LU{T}) where T
     n = checksquare(F)
     issuccess(F) || return zero(T)
@@ -515,22 +494,33 @@ inv!(A::LU{T,<:StridedMatrix}) where {T} =
 inv(A::LU{<:BlasFloat,<:StridedMatrix}) = inv!(copy(A))
 
 # Tridiagonal
-
-# See dgttrf.f
 function lu!(A::Tridiagonal{T,V}, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true) where {T,V}
-    # Extract values
     n = size(A, 1)
+    has_du2_defined = isdefined(A, :du2) && length(A.du2) == max(0, n-2)
+    if has_du2_defined
+        du2 = A.du2::V
+    else
+        du2 = similar(A.d, max(0, n-2))::V
+    end
+    _lu_tridiag!(A.dl, A.d, A.du, du2, Vector{BlasInt}(undef, n), pivot, check)
+end
+function lu!(F::LU{<:Any,<:Tridiagonal}, A::Tridiagonal, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true)
+    B = F.factors
+    size(B) == size(A) || throw(DimensionMismatch())
+    copyto!(B, A)
+    _lu_tridiag!(B.dl, B.d, B.du, B.du2, F.ipiv, pivot, check)
+end
+# See dgttrf.f
+@inline function _lu_tridiag!(dl, d, du, du2, ipiv, pivot, check)
+    T = eltype(d)
+    V = typeof(d)
+
+    # Extract values
+    n = length(d)
 
     # Initialize variables
     info = 0
-    ipiv = Vector{BlasInt}(undef, n)
-    dl = A.dl
-    d = A.d
-    du = A.du
-    if dl === du
-        throw(ArgumentError("off-diagonals of `A` must not alias"))
-    end
-    du2 = fill!(similar(d, n-2), 0)::V
+    fill!(du2, 0)
 
     @inbounds begin
         for i = 1:n
@@ -585,9 +575,8 @@ function lu!(A::Tridiagonal{T,V}, pivot::Union{RowMaximum,NoPivot} = RowMaximum(
             end
         end
     end
-    B = Tridiagonal{T,V}(dl, d, du, du2)
     check && checknonsingular(info, pivot)
-    return LU{T,Tridiagonal{T,V},typeof(ipiv)}(B, ipiv, convert(BlasInt, info))
+    return LU{T,Tridiagonal{T,V},typeof(ipiv)}(Tridiagonal{T,V}(dl, d, du, du2), ipiv, convert(BlasInt, info))
 end
 
 factorize(A::Tridiagonal) = lu(A)
@@ -650,7 +639,7 @@ function ldiv!(A::LU{T,Tridiagonal{T,V}}, B::AbstractVecOrMat) where {T,V}
     return B
 end
 
-function ldiv!(transA::Transpose{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
+function ldiv!(transA::TransposeFactorization{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
     require_one_based_indexing(B)
     A = transA.parent
     n = size(A,1)
@@ -687,7 +676,7 @@ function ldiv!(transA::Transpose{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVec
 end
 
 # Ac_ldiv_B!(A::LU{T,Tridiagonal{T}}, B::AbstractVecOrMat) where {T<:Real} = At_ldiv_B!(A,B)
-function ldiv!(adjA::Adjoint{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
+function ldiv!(adjA::AdjointFactorization{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
     require_one_based_indexing(B)
     A = adjA.parent
     n = size(A,1)
@@ -724,8 +713,6 @@ function ldiv!(adjA::Adjoint{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMa
 end
 
 rdiv!(B::AbstractMatrix, A::LU) = transpose(ldiv!(transpose(A), transpose(B)))
-rdiv!(B::AbstractMatrix, A::Transpose{<:Any,<:LU}) = transpose(ldiv!(A.parent, transpose(B)))
-rdiv!(B::AbstractMatrix, A::Adjoint{<:Any,<:LU}) = adjoint(ldiv!(A.parent, adjoint(B)))
 
 # Conversions
 AbstractMatrix(F::LU) = (F.L * F.U)[invperm(F.p),:]
diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl
index 7646aae29d1b9..cc1c954258a88 100644
--- a/stdlib/LinearAlgebra/src/matmul.jl
+++ b/stdlib/LinearAlgebra/src/matmul.jl
@@ -1,11 +1,15 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Matrix-matrix multiplication
+# matmul.jl: Everything to do with dense matrix multiplication
 
-AdjOrTransStridedMat{T} = Union{Adjoint{T, <:StridedMatrix}, Transpose{T, <:StridedMatrix}}
-StridedMaybeAdjOrTransMat{T} = Union{StridedMatrix{T}, Adjoint{T, <:StridedMatrix}, Transpose{T, <:StridedMatrix}}
+# unused internal constant, here for legacy reasons
+const tilebufsize = 10800  # Approximately 32k/3
 
-# matmul.jl: Everything to do with dense matrix multiplication
+# Matrix-matrix multiplication
+
+AdjOrTransStridedMat{T} = Union{Adjoint{<:Any, <:StridedMatrix{T}}, Transpose{<:Any, <:StridedMatrix{T}}}
+StridedMaybeAdjOrTransMat{T} = Union{StridedMatrix{T}, Adjoint{<:Any, <:StridedMatrix{T}}, Transpose{<:Any, <:StridedMatrix{T}}}
+StridedMaybeAdjOrTransVecOrMat{T} = Union{StridedVecOrMat{T}, AdjOrTrans{<:Any, <:StridedVecOrMat{T}}}
 
 matprod(x, y) = x*y + x*y
 
@@ -46,83 +50,47 @@ function *(transx::Transpose{<:Any,<:StridedVector{T}}, y::StridedVector{T}) whe
 end
 
 # Matrix-vector multiplication
-function (*)(A::StridedMatrix{T}, x::StridedVector{S}) where {T<:BlasFloat,S<:Real}
+function (*)(A::StridedMaybeAdjOrTransMat{T}, x::StridedVector{S}) where {T<:BlasFloat,S<:Real}
     TS = promote_op(matprod, T, S)
     y = isconcretetype(TS) ? convert(AbstractVector{TS}, x) : x
     mul!(similar(x, TS, size(A,1)), A, y)
 end
 function (*)(A::AbstractMatrix{T}, x::AbstractVector{S}) where {T,S}
     TS = promote_op(matprod, T, S)
-    mul!(similar(x,TS,axes(A,1)),A,x)
+    mul!(similar(x, TS, axes(A,1)), A, x)
 end
 
 # these will throw a DimensionMismatch unless B has 1 row (or 1 col for transposed case):
-(*)(a::AbstractVector, tB::Transpose{<:Any,<:AbstractMatrix}) = reshape(a, length(a), 1) * tB
-(*)(a::AbstractVector, adjB::Adjoint{<:Any,<:AbstractMatrix}) = reshape(a, length(a), 1) * adjB
+(*)(a::AbstractVector, tB::TransposeAbsMat) = reshape(a, length(a), 1) * tB
+(*)(a::AbstractVector, adjB::AdjointAbsMat) = reshape(a, length(a), 1) * adjB
 (*)(a::AbstractVector, B::AbstractMatrix) = reshape(a, length(a), 1) * B
 
-@inline mul!(y::StridedVector{T}, A::StridedVecOrMat{T}, x::StridedVector{T},
-             alpha::Number, beta::Number) where {T<:BlasFloat} =
-    gemv!(y, 'N', A, x, alpha, beta)
-
-# Complex matrix times real vector.
-# Reinterpret the matrix as a real matrix and do real matvec compuation.
-@inline mul!(y::StridedVector{Complex{T}}, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
-        alpha::Number, beta::Number) where {T<:BlasReal} =
-    gemv!(y, 'N', A, x, alpha, beta)
-
-# Real matrix times complex vector.
-# Multiply the matrix with the real and imaginary parts separately
-@inline mul!(y::StridedVector{Complex{T}}, A::StridedMaybeAdjOrTransMat{T}, x::StridedVector{Complex{T}},
-        alpha::Number, beta::Number) where {T<:BlasReal} =
-    gemv!(y, A isa StridedArray ? 'N' : 'T', A isa StridedArray ? A : parent(A), x, alpha, beta)
-
 @inline mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector,
-             alpha::Number, beta::Number) =
-    generic_matvecmul!(y, 'N', A, x, MulAddMul(alpha, beta))
-
-function *(tA::Transpose{<:Any,<:StridedMatrix{T}}, x::StridedVector{S}) where {T<:BlasFloat,S}
-    TS = promote_op(matprod, T, S)
-    mul!(similar(x, TS, size(tA, 1)), tA, convert(AbstractVector{TS}, x))
-end
-function *(tA::Transpose{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where {T,S}
-    TS = promote_op(matprod, T, S)
-    mul!(similar(x, TS, size(tA, 1)), tA, x)
-end
-@inline mul!(y::StridedVector{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
-                      alpha::Number, beta::Number) where {T<:BlasFloat} =
-    gemv!(y, 'T', tA.parent, x, alpha, beta)
-@inline mul!(y::AbstractVector, tA::Transpose{<:Any,<:AbstractVecOrMat}, x::AbstractVector,
-                      alpha::Number, beta::Number) =
-    generic_matvecmul!(y, 'T', tA.parent, x, MulAddMul(alpha, beta))
-
-function *(adjA::Adjoint{<:Any,<:StridedMatrix{T}}, x::StridedVector{S}) where {T<:BlasFloat,S}
-    TS = promote_op(matprod, T, S)
-    mul!(similar(x, TS, size(adjA, 1)), adjA, convert(AbstractVector{TS}, x))
-end
-function *(adjA::Adjoint{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where {T,S}
-    TS = promote_op(matprod, T, S)
-    mul!(similar(x, TS, size(adjA, 1)), adjA, x)
-end
-
-@inline mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
-                      alpha::Number, beta::Number) where {T<:BlasReal} =
-    mul!(y, transpose(adjA.parent), x, alpha, beta)
-@inline mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
-                      alpha::Number, beta::Number) where {T<:BlasComplex} =
-    gemv!(y, 'C', adjA.parent, x, alpha, beta)
-@inline mul!(y::AbstractVector, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, x::AbstractVector,
-                      alpha::Number, beta::Number) =
-    generic_matvecmul!(y, 'C', adjA.parent, x, MulAddMul(alpha, beta))
+                alpha::Number, beta::Number) =
+    generic_matvecmul!(y, wrapper_char(A), _unwrap(A), x, MulAddMul(alpha, beta))
+# BLAS cases
+# equal eltypes
+@inline generic_matvecmul!(y::StridedVector{T}, tA, A::StridedVecOrMat{T}, x::StridedVector{T},
+                _add::MulAddMul=MulAddMul()) where {T<:BlasFloat} =
+    gemv!(y, tA, A, x, _add.alpha, _add.beta)
+# Real (possibly transposed) matrix times complex vector.
+# Multiply the matrix with the real and imaginary parts separately
+@inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}},
+                _add::MulAddMul=MulAddMul()) where {T<:BlasReal} =
+    gemv!(y, tA, A, x, _add.alpha, _add.beta)
+# Complex matrix times real vector.
+# Reinterpret the matrix as a real matrix and do real matvec computation.
+# works only in cooperation with BLAS when A is untransposed (tA == 'N')
+# but that check is included in gemv! anyway
+@inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
+                _add::MulAddMul=MulAddMul()) where {T<:BlasReal} =
+    gemv!(y, tA, A, x, _add.alpha, _add.beta)
 
 # Vector-Matrix multiplication
 (*)(x::AdjointAbsVec,   A::AbstractMatrix) = (A'*x')'
 (*)(x::TransposeAbsVec, A::AbstractMatrix) = transpose(transpose(A)*transpose(x))
 
-_parent(A) = A
-_parent(A::Adjoint) = parent(A)
-_parent(A::Transpose) = parent(A)
-
+# Matrix-matrix multiplication
 """
     *(A::AbstractMatrix, B::AbstractMatrix)
 
@@ -146,33 +114,29 @@ end
 function (*)(A::StridedMaybeAdjOrTransMat{<:BlasReal}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
     TS = promote_type(eltype(A), eltype(B))
     mul!(similar(B, TS, (size(A, 1), size(B, 2))),
-         wrapperop(A)(convert(AbstractArray{TS}, _parent(A))),
-         wrapperop(B)(convert(AbstractArray{TS}, _parent(B))))
+         wrapperop(A)(convert(AbstractArray{TS}, _unwrap(A))),
+         wrapperop(B)(convert(AbstractArray{TS}, _unwrap(B))))
 end
 function (*)(A::StridedMaybeAdjOrTransMat{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasComplex})
     TS = promote_type(eltype(A), eltype(B))
     mul!(similar(B, TS, (size(A, 1), size(B, 2))),
-         wrapperop(A)(convert(AbstractArray{TS}, _parent(A))),
-         wrapperop(B)(convert(AbstractArray{TS}, _parent(B))))
+         wrapperop(A)(convert(AbstractArray{TS}, _unwrap(A))),
+         wrapperop(B)(convert(AbstractArray{TS}, _unwrap(B))))
 end
 
-@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
-                      alpha::Number, beta::Number) where {T<:BlasFloat}
-    return gemm_wrapper!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-end
 # Complex Matrix times real matrix: We use that it is generally faster to reinterpret the
 # first matrix as a real matrix and carry out real matrix matrix multiply
 function (*)(A::StridedMatrix{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
     TS = promote_type(eltype(A), eltype(B))
     mul!(similar(B, TS, (size(A, 1), size(B, 2))),
          convert(AbstractArray{TS}, A),
-         wrapperop(B)(convert(AbstractArray{real(TS)}, _parent(B))))
+         wrapperop(B)(convert(AbstractArray{real(TS)}, _unwrap(B))))
 end
 function (*)(A::AdjOrTransStridedMat{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
     TS = promote_type(eltype(A), eltype(B))
     mul!(similar(B, TS, (size(A, 1), size(B, 2))),
          copymutable_oftype(A, TS), # remove AdjOrTrans to use reinterpret trick below
-         wrapperop(B)(convert(AbstractArray{real(TS)}, _parent(B))))
+         wrapperop(B)(convert(AbstractArray{real(TS)}, _unwrap(B))))
 end
 # the following case doesn't seem to benefit from the translation A*B = (B' * A')'
 function (*)(A::StridedMatrix{<:BlasReal}, B::StridedMatrix{<:BlasComplex})
@@ -253,18 +217,24 @@ end
 """
     mul!(Y, A, B) -> Y
 
-Calculates the matrix-matrix or matrix-vector product ``AB`` and stores the result in `Y`,
+Calculates the matrix-matrix or matrix-vector product ``A B`` and stores the result in `Y`,
 overwriting the existing value of `Y`. Note that `Y` must not be aliased with either `A` or
 `B`.
 
 # Examples
 ```jldoctest
-julia> A=[1.0 2.0; 3.0 4.0]; B=[1.0 1.0; 1.0 1.0]; Y = similar(B); mul!(Y, A, B);
+julia> A = [1.0 2.0; 3.0 4.0]; B = [1.0 1.0; 1.0 1.0]; Y = similar(B);
+
+julia> mul!(Y, A, B) === Y
+true
 
 julia> Y
 2×2 Matrix{Float64}:
  3.0  3.0
  7.0  7.0
+
+julia> Y == A * B
+true
 ```
 
 # Implementation
@@ -272,9 +242,7 @@ For custom matrix and vector types, it is recommended to implement
 5-argument `mul!` rather than implementing 3-argument `mul!` directly
 if possible.
 """
-@inline function mul!(C, A, B)
-    return mul!(C, A, B, true, false)
-end
+mul!(C, A, B) = mul!(C, A, B, true, false)
 
 """
     mul!(C, A, B, α, β) -> C
@@ -288,20 +256,33 @@ aliased with either `A` or `B`.
 
 # Examples
 ```jldoctest
-julia> A=[1.0 2.0; 3.0 4.0]; B=[1.0 1.0; 1.0 1.0]; C=[1.0 2.0; 3.0 4.0];
+julia> A = [1.0 2.0; 3.0 4.0]; B = [1.0 1.0; 1.0 1.0]; C = [1.0 2.0; 3.0 4.0];
 
-julia> mul!(C, A, B, 100.0, 10.0) === C
+julia> α, β = 100.0, 10.0;
+
+julia> mul!(C, A, B, α, β) === C
 true
 
 julia> C
 2×2 Matrix{Float64}:
  310.0  320.0
  730.0  740.0
+
+julia> C_original = [1.0 2.0; 3.0 4.0]; # A copy of the original value of C
+
+julia> C == A * B * α + C_original * β
+true
 ```
 """
-@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, B::AbstractVecOrMat,
-             alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, B::AbstractVecOrMat, α::Number, β::Number) =
+    generic_matmatmul!(
+        C,
+        wrapper_char(A),
+        wrapper_char(B),
+        _unwrap(A),
+        _unwrap(B),
+        MulAddMul(α, β)
+    )
 
 """
     rmul!(A, B)
@@ -315,9 +296,9 @@ see [`QR`](@ref).
 ```jldoctest
 julia> A = [0 1; 1 0];
 
-julia> B = LinearAlgebra.UpperTriangular([1 2; 0 3]);
+julia> B = UpperTriangular([1 2; 0 3]);
 
-julia> LinearAlgebra.rmul!(A, B);
+julia> rmul!(A, B);
 
 julia> A
 2×2 Matrix{Int64}:
@@ -348,9 +329,9 @@ see [`QR`](@ref).
 ```jldoctest
 julia> B = [0 1; 1 0];
 
-julia> A = LinearAlgebra.UpperTriangular([1 2; 0 3]);
+julia> A = UpperTriangular([1 2; 0 3]);
 
-julia> LinearAlgebra.lmul!(A, B);
+julia> lmul!(A, B);
 
 julia> B
 2×2 Matrix{Int64}:
@@ -369,107 +350,52 @@ julia> lmul!(F.Q, B)
 """
 lmul!(A, B)
 
-@inline function mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = tA.parent
-    if A === B
-        return syrk_wrapper!(C, 'T', A, MulAddMul(alpha, beta))
-    else
-        return gemm_wrapper!(C, 'T', 'N', A, B, MulAddMul(alpha, beta))
+# THE one big BLAS dispatch
+# aggressive constant propagation makes mul!(C, A, B) invoke gemm_wrapper! directly
+Base.@constprop :aggressive function generic_matmatmul!(C::StridedMatrix{T}, tA, tB, A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
+                                    _add::MulAddMul=MulAddMul()) where {T<:BlasFloat}
+    if all(in(('N', 'T', 'C')), (tA, tB))
+        if tA == 'T' && tB == 'N' && A === B
+            return syrk_wrapper!(C, 'T', A, _add)
+        elseif tA == 'N' && tB == 'T' && A === B
+            return syrk_wrapper!(C, 'N', A, _add)
+        elseif tA == 'C' && tB == 'N' && A === B
+            return herk_wrapper!(C, 'C', A, _add)
+        elseif tA == 'N' && tB == 'C' && A === B
+            return herk_wrapper!(C, 'N', A, _add)
+        else
+            return gemm_wrapper!(C, tA, tB, A, B, _add)
+        end
     end
-end
-@inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'T', 'N', tA.parent, B, MulAddMul(alpha, beta))
-
-@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    B = tB.parent
-    if A === B
-        return syrk_wrapper!(C, 'N', A, MulAddMul(alpha, beta))
-    else
-        return gemm_wrapper!(C, 'N', 'T', A, B, MulAddMul(alpha, beta))
+    alpha, beta = promote(_add.alpha, _add.beta, zero(T))
+    if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
+        if (tA == 'S' || tA == 's') && tB == 'N'
+            return BLAS.symm!('L', tA == 'S' ? 'U' : 'L', alpha, A, B, beta, C)
+        elseif (tB == 'S' || tB == 's') && tA == 'N'
+            return BLAS.symm!('R', tB == 'S' ? 'U' : 'L', alpha, B, A, beta, C)
+        elseif (tA == 'H' || tA == 'h') && tB == 'N'
+            return BLAS.hemm!('L', tA == 'H' ? 'U' : 'L', alpha, A, B, beta, C)
+        elseif (tB == 'H' || tB == 'h') && tA == 'N'
+            return BLAS.hemm!('R', tB == 'H' ? 'U' : 'L', alpha, B, A, beta, C)
+        end
     end
+    return _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
 end
+
 # Complex matrix times (transposed) real matrix. Reinterpret the first matrix to real for efficiency.
-@inline mul!(C::StridedMatrix{Complex{T}}, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
-                    alpha::Number, beta::Number) where {T<:BlasReal} =
-    gemm_wrapper!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-@inline mul!(C::StridedMatrix{Complex{T}}, A::StridedVecOrMat{Complex{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
-                    alpha::Number, beta::Number) where {T<:BlasReal} =
-    gemm_wrapper!(C, 'N', 'T', A, parent(tB), MulAddMul(alpha, beta))
-
-# collapsing the following two defs with C::AbstractVecOrMat yields ambiguities
-@inline mul!(C::AbstractVector, A::AbstractVecOrMat, tB::Transpose{<:Any,<:AbstractVecOrMat},
-             alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'N', 'T', A, tB.parent, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, tB::Transpose{<:Any,<:AbstractVecOrMat},
-             alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'N', 'T', A, tB.parent, MulAddMul(alpha, beta))
-
-@inline mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat} =
-    gemm_wrapper!(C, 'T', 'T', tA.parent, tB.parent, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, tB::Transpose{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'T', 'T', tA.parent, tB.parent, MulAddMul(alpha, beta))
-
-@inline mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat} =
-    gemm_wrapper!(C, 'T', 'C', tA.parent, adjB.parent, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, tB::Adjoint{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'T', 'C', tA.parent, tB.parent, MulAddMul(alpha, beta))
-
-@inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
-                 alpha::Real, beta::Real) where {T<:BlasReal} =
-    mul!(C, transpose(adjA.parent), B, alpha, beta)
-@inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
-                 alpha::Number, beta::Number) where {T<:BlasComplex}
-    A = adjA.parent
-    if A === B
-        return herk_wrapper!(C, 'C', A, MulAddMul(alpha, beta))
+Base.@constprop :aggressive function generic_matmatmul!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
+                    _add::MulAddMul=MulAddMul()) where {T<:BlasReal}
+    if all(in(('N', 'T', 'C')), (tA, tB))
+        gemm_wrapper!(C, tA, tB, A, B, _add)
     else
-        return gemm_wrapper!(C, 'C', 'N', A, B, MulAddMul(alpha, beta))
+        _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
     end
 end
-@inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'C', 'N', adjA.parent, B, MulAddMul(alpha, beta))
-
-@inline mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:StridedVecOrMat{<:BlasReal}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat} =
-    mul!(C, A, transpose(adjB.parent), alpha, beta)
-@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasComplex}
-    B = adjB.parent
-    if A === B
-        return herk_wrapper!(C, 'N', A, MulAddMul(alpha, beta))
-    else
-        return gemm_wrapper!(C, 'N', 'C', A, B, MulAddMul(alpha, beta))
-    end
-end
-@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'N', 'C', A, adjB.parent, MulAddMul(alpha, beta))
-
-@inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat} =
-    gemm_wrapper!(C, 'C', 'C', adjA.parent, adjB.parent, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'C', 'C', adjA.parent, adjB.parent, MulAddMul(alpha, beta))
-
-@inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat} =
-    gemm_wrapper!(C, 'C', 'T', adjA.parent, tB.parent, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, tB::Transpose{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'C', 'T', adjA.parent, tB.parent, MulAddMul(alpha, beta))
+
 
 # Supporting functions for matrix multiplication
 
-# copy transposed(adjoint) of upper(lower) side-digonals. Optionally include diagonal.
+# copy transposed(adjoint) of upper(lower) side-diagonals. Optionally include diagonal.
 @inline function copytri!(A::AbstractMatrix, uplo::AbstractChar, conjugate::Bool=false, diag::Bool=false)
     n = checksquare(A)
     off = diag ? 0 : 1
@@ -487,7 +413,8 @@ end
     A
 end
 
-function gemv!(y::StridedVector{T}, tA::AbstractChar, A::StridedVecOrMat{T}, x::StridedVector{T},
+Base.@constprop :aggressive function gemv!(y::StridedVector{T}, tA::AbstractChar,
+                A::StridedVecOrMat{T}, x::StridedVector{T},
                α::Number=true, β::Number=false) where {T<:BlasFloat}
     mA, nA = lapack_size(tA, A)
     nA != length(x) &&
@@ -499,14 +426,25 @@ function gemv!(y::StridedVector{T}, tA::AbstractChar, A::StridedVecOrMat{T}, x::
     alpha, beta = promote(α, β, zero(T))
     if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
         stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
-        !iszero(stride(x, 1)) # We only check input's stride here.
-        return BLAS.gemv!(tA, alpha, A, x, beta, y)
+        !iszero(stride(x, 1)) && # We only check input's stride here.
+        if tA in ('N', 'T', 'C')
+            return BLAS.gemv!(tA, alpha, A, x, beta, y)
+        elseif tA in ('S', 's')
+            return BLAS.symv!(tA == 'S' ? 'U' : 'L', alpha, A, x, beta, y)
+        elseif tA in ('H', 'h')
+            return BLAS.hemv!(tA == 'H' ? 'U' : 'L', alpha, A, x, beta, y)
+        end
+    end
+    if tA in ('S', 's', 'H', 'h')
+        # re-wrap again and use plain ('N') matvec mul algorithm,
+        # because _generic_matvecmul! can't handle the HermOrSym cases specifically
+        return _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β))
     else
-        return generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
+        return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
     end
 end
 
-function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
+Base.@constprop :aggressive function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
     α::Number = true, β::Number = false) where {T<:BlasReal}
     mA, nA = lapack_size(tA, A)
     nA != length(x) &&
@@ -523,12 +461,14 @@ function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMa
         BLAS.gemv!(tA, alpha, reinterpret(T, A), x, beta, reinterpret(T, y))
         return y
     else
-        return generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
+        Anew, ta = tA in ('S', 's', 'H', 'h') ? (wrap(A, tA), 'N') : (A, tA)
+        return _generic_matvecmul!(y, ta, Anew, x, MulAddMul(α, β))
     end
 end
 
-function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}},
-    α::Number = true, β::Number = false) where {T<:BlasFloat}
+Base.@constprop :aggressive function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar,
+        A::StridedVecOrMat{T}, x::StridedVector{Complex{T}},
+        α::Number = true, β::Number = false) where {T<:BlasReal}
     mA, nA = lapack_size(tA, A)
     nA != length(x) &&
         throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match length of x, $(length(x))"))
@@ -539,14 +479,18 @@ function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMa
     alpha, beta = promote(α, β, zero(T))
     @views if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
         stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
-        !iszero(stride(x, 1))
+        !iszero(stride(x, 1)) && tA in ('N', 'T', 'C')
         xfl = reinterpret(reshape, T, x) # Use reshape here.
         yfl = reinterpret(reshape, T, y)
         BLAS.gemv!(tA, alpha, A, xfl[1, :], beta, yfl[1, :])
         BLAS.gemv!(tA, alpha, A, xfl[2, :], beta, yfl[2, :])
         return y
+    elseif tA in ('S', 's', 'H', 'h')
+        # re-wrap again and use plain ('N') matvec mul algorithm,
+        # because _generic_matvecmul! can't handle the HermOrSym cases specifically
+        return _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β))
     else
-        return generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
+        return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
     end
 end
 
@@ -633,7 +577,11 @@ function gemm_wrapper(tA::AbstractChar, tB::AbstractChar,
     mA, nA = lapack_size(tA, A)
     mB, nB = lapack_size(tB, B)
     C = similar(B, T, mA, nB)
-    gemm_wrapper!(C, tA, tB, A, B)
+    if all(in(('N', 'T', 'C')), (tA, tB))
+        gemm_wrapper!(C, tA, tB, A, B)
+    else
+        _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
+    end
 end
 
 function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar,
@@ -673,7 +621,7 @@ function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar
         stride(C, 2) >= size(C, 1))
         return BLAS.gemm!(tA, tB, alpha, A, B, beta, C)
     end
-    generic_matmatmul!(C, tA, tB, A, B, _add)
+    _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
 end
 
 function gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::AbstractChar,
@@ -712,11 +660,11 @@ function gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::Abs
         stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 &&
         stride(A, 2) >= size(A, 1) &&
         stride(B, 2) >= size(B, 1) &&
-        stride(C, 2) >= size(C, 1)) && tA == 'N'
+        stride(C, 2) >= size(C, 1) && tA == 'N')
         BLAS.gemm!(tA, tB, alpha, reinterpret(T, A), B, beta, reinterpret(T, C))
         return C
     end
-    generic_matmatmul!(C, tA, tB, A, B, _add)
+    _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
 end
 
 # blas.jl defines matmul for floats; other integer and mixed precision
@@ -750,9 +698,16 @@ end
 # NOTE: the generic version is also called as fallback for
 #       strides != 1 cases
 
-function generic_matvecmul!(C::AbstractVector{R}, tA, A::AbstractVecOrMat, B::AbstractVector,
-                            _add::MulAddMul = MulAddMul()) where R
+@inline function generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector,
+                                    _add::MulAddMul = MulAddMul())
+    Anew, ta = tA in ('S', 's', 'H', 'h') ? (wrap(A, tA), 'N') : (A, tA)
+    return _generic_matvecmul!(C, ta, Anew, B, _add)
+end
+
+function _generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector,
+                            _add::MulAddMul = MulAddMul())
     require_one_based_indexing(C, A, B)
+    @assert tA in ('N', 'T', 'C')
     mB = length(B)
     mA, nA = lapack_size(tA, A)
     if mB != nA
@@ -807,7 +762,7 @@ function generic_matvecmul!(C::AbstractVector{R}, tA, A::AbstractVecOrMat, B::Ab
         end
         for k = 1:mB
             aoffs = (k-1)*Astride
-            b = _add(B[k], false)
+            b = _add(B[k])
             for i = 1:mA
                 C[i] += A[aoffs + i] * b
             end
@@ -824,199 +779,66 @@ function generic_matmatmul(tA, tB, A::AbstractVecOrMat{T}, B::AbstractMatrix{S})
     generic_matmatmul!(C, tA, tB, A, B)
 end
 
-const tilebufsize = 10800  # Approximately 32k/3
+# aggressive const prop makes mixed eltype mul!(C, A, B) invoke _generic_matmatmul! directly
+Base.@constprop :aggressive generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, _add::MulAddMul) =
+    _generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
 
-function generic_matmatmul!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix,
-                            _add::MulAddMul=MulAddMul())
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-    mC, nC = size(C)
-
-    if iszero(_add.alpha)
-        return _rmul_or_fill!(C, _add.beta)
-    end
-    if mA == nA == mB == nB == mC == nC == 2
-        return matmul2x2!(C, tA, tB, A, B, _add)
-    end
-    if mA == nA == mB == nB == mC == nC == 3
-        return matmul3x3!(C, tA, tB, A, B, _add)
-    end
-    _generic_matmatmul!(C, tA, tB, A, B, _add)
-end
-
-generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, _add::MulAddMul) =
-    _generic_matmatmul!(C, tA, tB, A, B, _add)
-
-function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S},
+@noinline function _generic_matmatmul!(C::AbstractVecOrMat{R}, A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S},
                              _add::MulAddMul) where {T,S,R}
-    require_one_based_indexing(C, A, B)
-    mA, nA = lapack_size(tA, A)
-    mB, nB = lapack_size(tB, B)
-    if mB != nA
-        throw(DimensionMismatch(lazy"matrix A has dimensions ($mA,$nA), matrix B has dimensions ($mB,$nB)"))
-    end
-    if size(C,1) != mA || size(C,2) != nB
-        throw(DimensionMismatch(lazy"result C has dimensions $(size(C)), needs ($mA,$nB)"))
+    AxM = axes(A, 1)
+    AxK = axes(A, 2) # we use two `axes` calls in case of `AbstractVector`
+    BxK = axes(B, 1)
+    BxN = axes(B, 2)
+    CxM = axes(C, 1)
+    CxN = axes(C, 2)
+    if AxM != CxM
+        throw(DimensionMismatch(lazy"matrix A has axes ($AxM,$AxK), matrix C has axes ($CxM,$CxN)"))
     end
-
-    if iszero(_add.alpha) || isempty(A) || isempty(B)
-        return _rmul_or_fill!(C, _add.beta)
+    if AxK != BxK
+        throw(DimensionMismatch(lazy"matrix A has axes ($AxM,$AxK), matrix B has axes ($BxK,$CxN)"))
     end
-
-    tile_size = 0
-    if isbitstype(R) && isbitstype(T) && isbitstype(S) && (tA == 'N' || tB != 'N')
-        tile_size = floor(Int, sqrt(tilebufsize / max(sizeof(R), sizeof(S), sizeof(T), 1)))
+    if BxN != CxN
+        throw(DimensionMismatch(lazy"matrix B has axes ($BxK,$BxN), matrix C has axes ($CxM,$CxN)"))
     end
-    @inbounds begin
-    if tile_size > 0
-        sz = (tile_size, tile_size)
-        Atile = Array{T}(undef, sz)
-        Btile = Array{S}(undef, sz)
-
-        z1 = zero(A[1, 1]*B[1, 1] + A[1, 1]*B[1, 1])
-        z = convert(promote_type(typeof(z1), R), z1)
-
-        if mA < tile_size && nA < tile_size && nB < tile_size
-            copy_transpose!(Atile, 1:nA, 1:mA, tA, A, 1:mA, 1:nA)
-            copyto!(Btile, 1:mB, 1:nB, tB, B, 1:mB, 1:nB)
-            for j = 1:nB
-                boff = (j-1)*tile_size
-                for i = 1:mA
-                    aoff = (i-1)*tile_size
-                    s = z
-                    for k = 1:nA
-                        s += Atile[aoff+k] * Btile[boff+k]
-                    end
-                    _modify!(_add, s, C, (i,j))
-                end
-            end
-        else
-            Ctile = Array{R}(undef, sz)
-            for jb = 1:tile_size:nB
-                jlim = min(jb+tile_size-1,nB)
-                jlen = jlim-jb+1
-                for ib = 1:tile_size:mA
-                    ilim = min(ib+tile_size-1,mA)
-                    ilen = ilim-ib+1
-                    fill!(Ctile, z)
-                    for kb = 1:tile_size:nA
-                        klim = min(kb+tile_size-1,mB)
-                        klen = klim-kb+1
-                        copy_transpose!(Atile, 1:klen, 1:ilen, tA, A, ib:ilim, kb:klim)
-                        copyto!(Btile, 1:klen, 1:jlen, tB, B, kb:klim, jb:jlim)
-                        for j=1:jlen
-                            bcoff = (j-1)*tile_size
-                            for i = 1:ilen
-                                aoff = (i-1)*tile_size
-                                s = z
-                                for k = 1:klen
-                                    s += Atile[aoff+k] * Btile[bcoff+k]
-                                end
-                                Ctile[bcoff+i] += s
-                            end
-                        end
-                    end
-                    if isone(_add.alpha) && iszero(_add.beta)
-                        copyto!(C, ib:ilim, jb:jlim, Ctile, 1:ilen, 1:jlen)
-                    else
-                        C[ib:ilim, jb:jlim] .= @views _add.(Ctile[1:ilen, 1:jlen], C[ib:ilim, jb:jlim])
-                    end
-                end
+    if isbitstype(R) && sizeof(R) ≤ 16 && !(A isa Adjoint || A isa Transpose)
+        _rmul_or_fill!(C, _add.beta)
+        (iszero(_add.alpha) || isempty(A) || isempty(B)) && return C
+        @inbounds for n in BxN, k in BxK
+            Balpha = B[k,n]*_add.alpha
+            @simd for m in AxM
+                C[m,n] = muladd(A[m,k], Balpha, C[m,n])
             end
         end
+    elseif isbitstype(R) && sizeof(R) ≤ 16 && ((A isa Adjoint && B isa Adjoint) || (A isa Transpose && B isa Transpose))
+        _rmul_or_fill!(C, _add.beta)
+        (iszero(_add.alpha) || isempty(A) || isempty(B)) && return C
+        t = wrapperop(A)
+        pB = parent(B)
+        pA = parent(A)
+        tmp = similar(C, CxN)
+        ci = first(CxM)
+        ta = t(_add.alpha)
+        for i in AxM
+            mul!(tmp, pB, view(pA, :, i))
+            C[ci,:] .+= t.(ta .* tmp)
+            ci += 1
+        end
     else
-        # Multiplication for non-plain-data uses the naive algorithm
-        if tA == 'N'
-            if tB == 'N'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[i, 1]*B[1, j] + A[i, 1]*B[1, j])
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[i, k]*B[k, j]
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            elseif tB == 'T'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[i, 1]*transpose(B[j, 1]) + A[i, 1]*transpose(B[j, 1]))
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[i, k] * transpose(B[j, k])
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            else
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[i, 1]*B[j, 1]' + A[i, 1]*B[j, 1]')
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[i, k]*B[j, k]'
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            end
-        elseif tA == 'T'
-            if tB == 'N'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(transpose(A[1, i])*B[1, j] + transpose(A[1, i])*B[1, j])
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += transpose(A[k, i]) * B[k, j]
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            elseif tB == 'T'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(transpose(A[1, i])*transpose(B[j, 1]) + transpose(A[1, i])*transpose(B[j, 1]))
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += transpose(A[k, i]) * transpose(B[j, k])
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            else
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(transpose(A[1, i])*B[j, 1]' + transpose(A[1, i])*B[j, 1]')
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += transpose(A[k, i]) * adjoint(B[j, k])
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            end
-        else
-            if tB == 'N'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[1, i]'*B[1, j] + A[1, i]'*B[1, j])
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[k, i]'B[k, j]
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            elseif tB == 'T'
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[1, i]'*transpose(B[j, 1]) + A[1, i]'*transpose(B[j, 1]))
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += adjoint(A[k, i]) * transpose(B[j, k])
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
-            else
-                for i = 1:mA, j = 1:nB
-                    z2 = zero(A[1, i]'*B[j, 1]' + A[1, i]'*B[j, 1]')
-                    Ctmp = convert(promote_type(R, typeof(z2)), z2)
-                    for k = 1:nA
-                        Ctmp += A[k, i]'B[j, k]'
-                    end
-                    _modify!(_add, Ctmp, C, (i,j))
-                end
+        if iszero(_add.alpha) || isempty(A) || isempty(B)
+            return _rmul_or_fill!(C, _add.beta)
+        end
+        a1 = first(AxK)
+        b1 = first(BxK)
+        @inbounds for i in AxM, j in BxN
+            z2 = zero(A[i, a1]*B[b1, j] + A[i, a1]*B[b1, j])
+            Ctmp = convert(promote_type(R, typeof(z2)), z2)
+            @simd for k in AxK
+                Ctmp = muladd(A[i, k], B[k, j], Ctmp)
             end
+            _modify!(_add, Ctmp, C, (i,j))
         end
     end
-    end # @inbounds
-    C
+    return C
 end
 
 
@@ -1032,7 +854,9 @@ function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat
         throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
     end
     @inbounds begin
-    if tA == 'T'
+    if tA == 'N'
+        A11 = A[1,1]; A12 = A[1,2]; A21 = A[2,1]; A22 = A[2,2]
+    elseif tA == 'T'
         # TODO making these lazy could improve perf
         A11 = copy(transpose(A[1,1])); A12 = copy(transpose(A[2,1]))
         A21 = copy(transpose(A[1,2])); A22 = copy(transpose(A[2,2]))
@@ -1040,10 +864,23 @@ function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat
         # TODO making these lazy could improve perf
         A11 = copy(A[1,1]'); A12 = copy(A[2,1]')
         A21 = copy(A[1,2]'); A22 = copy(A[2,2]')
-    else
-        A11 = A[1,1]; A12 = A[1,2]; A21 = A[2,1]; A22 = A[2,2]
+    elseif tA == 'S'
+        A11 = symmetric(A[1,1], :U); A12 = A[1,2]
+        A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U)
+    elseif tA == 's'
+        A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1]))
+        A21 = A[2,1]; A22 = symmetric(A[2,2], :L)
+    elseif tA == 'H'
+        A11 = hermitian(A[1,1], :U); A12 = A[1,2]
+        A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U)
+    else # if tA == 'h'
+        A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1]))
+        A21 = A[2,1]; A22 = hermitian(A[2,2], :L)
     end
-    if tB == 'T'
+    if tB == 'N'
+        B11 = B[1,1]; B12 = B[1,2];
+        B21 = B[2,1]; B22 = B[2,2]
+    elseif tB == 'T'
         # TODO making these lazy could improve perf
         B11 = copy(transpose(B[1,1])); B12 = copy(transpose(B[2,1]))
         B21 = copy(transpose(B[1,2])); B22 = copy(transpose(B[2,2]))
@@ -1051,9 +888,18 @@ function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat
         # TODO making these lazy could improve perf
         B11 = copy(B[1,1]'); B12 = copy(B[2,1]')
         B21 = copy(B[1,2]'); B22 = copy(B[2,2]')
-    else
-        B11 = B[1,1]; B12 = B[1,2];
-        B21 = B[2,1]; B22 = B[2,2]
+    elseif tB == 'S'
+        B11 = symmetric(B[1,1], :U); B12 = B[1,2]
+        B21 = copy(transpose(B[1,2])); B22 = symmetric(B[2,2], :U)
+    elseif tB == 's'
+        B11 = symmetric(B[1,1], :L); B12 = copy(transpose(B[2,1]))
+        B21 = B[2,1]; B22 = symmetric(B[2,2], :L)
+    elseif tB == 'H'
+        B11 = hermitian(B[1,1], :U); B12 = B[1,2]
+        B21 = copy(adjoint(B[1,2])); B22 = hermitian(B[2,2], :U)
+    else # if tB == 'h'
+        B11 = hermitian(B[1,1], :L); B12 = copy(adjoint(B[2,1]))
+        B21 = B[2,1]; B22 = hermitian(B[2,2], :L)
     end
     _modify!(_add, A11*B11 + A12*B21, C, (1,1))
     _modify!(_add, A11*B12 + A12*B22, C, (1,2))
@@ -1075,7 +921,11 @@ function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat
         throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
     end
     @inbounds begin
-    if tA == 'T'
+    if tA == 'N'
+        A11 = A[1,1]; A12 = A[1,2]; A13 = A[1,3]
+        A21 = A[2,1]; A22 = A[2,2]; A23 = A[2,3]
+        A31 = A[3,1]; A32 = A[3,2]; A33 = A[3,3]
+    elseif tA == 'T'
         # TODO making these lazy could improve perf
         A11 = copy(transpose(A[1,1])); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1]))
         A21 = copy(transpose(A[1,2])); A22 = copy(transpose(A[2,2])); A23 = copy(transpose(A[3,2]))
@@ -1085,13 +935,29 @@ function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat
         A11 = copy(A[1,1]'); A12 = copy(A[2,1]'); A13 = copy(A[3,1]')
         A21 = copy(A[1,2]'); A22 = copy(A[2,2]'); A23 = copy(A[3,2]')
         A31 = copy(A[1,3]'); A32 = copy(A[2,3]'); A33 = copy(A[3,3]')
-    else
-        A11 = A[1,1]; A12 = A[1,2]; A13 = A[1,3]
-        A21 = A[2,1]; A22 = A[2,2]; A23 = A[2,3]
-        A31 = A[3,1]; A32 = A[3,2]; A33 = A[3,3]
+    elseif tA == 'S'
+        A11 = symmetric(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3]
+        A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U); A23 = A[2,3]
+        A31 = copy(transpose(A[1,3])); A32 = copy(transpose(A[2,3])); A33 = symmetric(A[3,3], :U)
+    elseif tA == 's'
+        A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1]))
+        A21 = A[2,1]; A22 = symmetric(A[2,2], :L); A23 = copy(transpose(A[3,2]))
+        A31 = A[3,1]; A32 = A[3,2]; A33 = symmetric(A[3,3], :L)
+    elseif tA == 'H'
+        A11 = hermitian(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3]
+        A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U); A23 = A[2,3]
+        A31 = copy(adjoint(A[1,3])); A32 = copy(adjoint(A[2,3])); A33 = hermitian(A[3,3], :U)
+    else # if tA == 'h'
+        A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1])); A13 = copy(adjoint(A[3,1]))
+        A21 = A[2,1]; A22 = hermitian(A[2,2], :L); A23 = copy(adjoint(A[3,2]))
+        A31 = A[3,1]; A32 = A[3,2]; A33 = hermitian(A[3,3], :L)
     end
 
-    if tB == 'T'
+    if tB == 'N'
+        B11 = B[1,1]; B12 = B[1,2]; B13 = B[1,3]
+        B21 = B[2,1]; B22 = B[2,2]; B23 = B[2,3]
+        B31 = B[3,1]; B32 = B[3,2]; B33 = B[3,3]
+    elseif tB == 'T'
         # TODO making these lazy could improve perf
         B11 = copy(transpose(B[1,1])); B12 = copy(transpose(B[2,1])); B13 = copy(transpose(B[3,1]))
         B21 = copy(transpose(B[1,2])); B22 = copy(transpose(B[2,2])); B23 = copy(transpose(B[3,2]))
@@ -1101,10 +967,22 @@ function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat
         B11 = copy(B[1,1]'); B12 = copy(B[2,1]'); B13 = copy(B[3,1]')
         B21 = copy(B[1,2]'); B22 = copy(B[2,2]'); B23 = copy(B[3,2]')
         B31 = copy(B[1,3]'); B32 = copy(B[2,3]'); B33 = copy(B[3,3]')
-    else
-        B11 = B[1,1]; B12 = B[1,2]; B13 = B[1,3]
-        B21 = B[2,1]; B22 = B[2,2]; B23 = B[2,3]
-        B31 = B[3,1]; B32 = B[3,2]; B33 = B[3,3]
+    elseif tB == 'S'
+        B11 = symmetric(B[1,1], :U); B12 = B[1,2]; B13 = B[1,3]
+        B21 = copy(transpose(B[1,2])); B22 = symmetric(B[2,2], :U); B23 = B[2,3]
+        B31 = copy(transpose(B[1,3])); B32 = copy(transpose(B[2,3])); B33 = symmetric(B[3,3], :U)
+    elseif tB == 's'
+        B11 = symmetric(B[1,1], :L); B12 = copy(transpose(B[2,1])); B13 = copy(transpose(B[3,1]))
+        B21 = B[2,1]; B22 = symmetric(B[2,2], :L); B23 = copy(transpose(B[3,2]))
+        B31 = B[3,1]; B32 = B[3,2]; B33 = symmetric(B[3,3], :L)
+    elseif tB == 'H'
+        B11 = hermitian(B[1,1], :U); B12 = B[1,2]; B13 = B[1,3]
+        B21 = copy(adjoint(B[1,2])); B22 = hermitian(B[2,2], :U); B23 = B[2,3]
+        B31 = copy(adjoint(B[1,3])); B32 = copy(adjoint(B[2,3])); B33 = hermitian(B[3,3], :U)
+    else # if tB == 'h'
+        B11 = hermitian(B[1,1], :L); B12 = copy(adjoint(B[2,1])); B13 = copy(adjoint(B[3,1]))
+        B21 = B[2,1]; B22 = hermitian(B[2,2], :L); B23 = copy(adjoint(B[3,2]))
+        B31 = B[3,1]; B32 = B[3,2]; B33 = hermitian(B[3,3], :L)
     end
 
     _modify!(_add, A11*B11 + A12*B21 + A13*B31, C, (1,1))
diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl
index 61e3b092b2a38..782e4778c56c9 100644
--- a/stdlib/LinearAlgebra/src/qr.jl
+++ b/stdlib/LinearAlgebra/src/qr.jl
@@ -32,7 +32,6 @@ The object has two fields:
     ``v_i`` is the ``i``th column of the matrix `V = I + tril(F.factors, -1)`.
 
 * `τ` is a vector  of length `min(m,n)` containing the coefficients ``\tau_i``.
-
 """
 struct QR{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: Factorization{T}
     factors::S
@@ -49,7 +48,7 @@ QR{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
     QR(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
 # backwards-compatible constructors (remove with Julia 2.0)
 @deprecate(QR{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
-           QR{T,S,typeof(τ)}(factors, τ))
+           QR{T,S,typeof(τ)}(factors, τ), false)
 
 # iteration for destructuring into components
 Base.iterate(S::QR) = (S.Q, Val(:R))
@@ -126,7 +125,7 @@ QRCompactWY{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S} =
     QRCompactWY(convert(AbstractMatrix{S}, factors), convert(AbstractMatrix{S}, T))
 # backwards-compatible constructors (remove with Julia 2.0)
 @deprecate(QRCompactWY{S,M}(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S,M},
-           QRCompactWY{S,M,typeof(T)}(factors, T))
+           QRCompactWY{S,M,typeof(T)}(factors, T), false)
 
 # iteration for destructuring into components
 Base.iterate(S::QRCompactWY) = (S.Q, Val(:R))
@@ -159,7 +158,7 @@ function Base.hash(F::QRCompactWY, h::UInt)
     return hash(F.factors, foldr(hash, _triuppers_qr(F.T); init=hash(QRCompactWY, h)))
 end
 function Base.:(==)(A::QRCompactWY, B::QRCompactWY)
-    return A.factors == B.factors && all(Splat(==), zip(_triuppers_qr.((A.T, B.T))...))
+    return A.factors == B.factors && all(splat(==), zip(_triuppers_qr.((A.T, B.T))...))
 end
 function Base.isequal(A::QRCompactWY, B::QRCompactWY)
     return isequal(A.factors, B.factors) && all(zip(_triuppers_qr.((A.T, B.T))...)) do (a, b)
@@ -219,7 +218,7 @@ QRPivoted{T}(factors::AbstractMatrix, τ::AbstractVector,
 # backwards-compatible constructors (remove with Julia 2.0)
 @deprecate(QRPivoted{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T},
                           jpvt::AbstractVector{<:Integer}) where {T,S},
-           QRPivoted{T,S,typeof(τ),typeof(jpvt)}(factors, τ, jpvt))
+           QRPivoted{T,S,typeof(τ),typeof(jpvt)}(factors, τ, jpvt), false)
 
 # iteration for destructuring into components
 Base.iterate(S::QRPivoted) = (S.Q, Val(:R))
@@ -298,7 +297,7 @@ qr!(A::StridedMatrix{<:BlasFloat}, ::ColumnNorm) = QRPivoted(LAPACK.geqp3!(A)...
 """
     qr!(A, pivot = NoPivot(); blocksize)
 
-`qr!` is the same as [`qr`](@ref) when `A` is a subtype of [`StridedMatrix`](@ref),
+`qr!` is the same as [`qr`](@ref) when `A` is a subtype of [`AbstractMatrix`](@ref),
 but saves space by overwriting the input `A`, instead of creating a copy.
 An [`InexactError`](@ref) exception is thrown if the factorization produces a number not
 representable by the element type of `A`, e.g. for integer types.
@@ -314,11 +313,8 @@ julia> a = [1. 2.; 3. 4.]
  3.0  4.0
 
 julia> qr!(a)
-QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}
-Q factor:
-2×2 QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}:
- -0.316228  -0.948683
- -0.948683   0.316228
+LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}
+Q factor: 2×2 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}
 R factor:
 2×2 Matrix{Float64}:
  -3.16228  -4.42719
@@ -370,6 +366,11 @@ The individual components of the decomposition `F` can be retrieved via property
  - `F.p`: the permutation vector of the pivot ([`QRPivoted`](@ref) only)
  - `F.P`: the permutation matrix of the pivot ([`QRPivoted`](@ref) only)
 
+!!! note
+    Each reference to the upper triangular factor via `F.R` allocates a new array.
+    It is therefore advisable to cache that array, say, by `R = F.R` and continue working
+    with `R`.
+
 Iterating the decomposition produces the components `Q`, `R`, and if extant `p`.
 
 The following functions are available for the `QR` objects: [`inv`](@ref), [`size`](@ref),
@@ -380,14 +381,14 @@ norm solution.
 
 Multiplication with respect to either full/square or non-full/square `Q` is allowed, i.e. both `F.Q*F.R`
 and `F.Q*A` are supported. A `Q` matrix can be converted into a regular matrix with
-[`Matrix`](@ref).  This operation returns the "thin" Q factor, i.e., if `A` is `m`×`n` with `m>=n`, then
+[`Matrix`](@ref). This operation returns the "thin" Q factor, i.e., if `A` is `m`×`n` with `m>=n`, then
 `Matrix(F.Q)` yields an `m`×`n` matrix with orthonormal columns.  To retrieve the "full" Q factor, an
-`m`×`m` orthogonal matrix, use `F.Q*Matrix(I,m,m)`.  If `m<=n`, then `Matrix(F.Q)` yields an `m`×`m`
+`m`×`m` orthogonal matrix, use `F.Q*I` or `collect(F.Q)`. If `m<=n`, then `Matrix(F.Q)` yields an `m`×`m`
 orthogonal matrix.
 
 The block size for QR decomposition can be specified by keyword argument
 `blocksize :: Integer` when `pivot == NoPivot()` and `A isa StridedMatrix{<:BlasFloat}`.
-It is ignored when `blocksize > minimum(size(A))`.  See [`QRCompactWY`](@ref).
+It is ignored when `blocksize > minimum(size(A))`. See [`QRCompactWY`](@ref).
 
 !!! compat "Julia 1.4"
     The `blocksize` keyword argument requires Julia 1.4 or later.
@@ -401,12 +402,8 @@ julia> A = [3.0 -6.0; 4.0 -8.0; 0.0 1.0]
  0.0   1.0
 
 julia> F = qr(A)
-QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}
-Q factor:
-3×3 QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}:
- -0.6   0.0   0.8
- -0.8   0.0  -0.6
-  0.0  -1.0   0.0
+LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}
+Q factor: 3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}
 R factor:
 2×2 Matrix{Float64}:
  -5.0  10.0
@@ -425,12 +422,15 @@ true
 function qr(A::AbstractMatrix{T}, arg...; kwargs...) where T
     require_one_based_indexing(A)
     AA = copy_similar(A, _qreltype(T))
-    return qr!(AA, arg...; kwargs...)
+    return _qr(AA, arg...; kwargs...)
 end
 # TODO: remove in Julia v2.0
 @deprecate qr(A::AbstractMatrix, ::Val{false}; kwargs...) qr(A, NoPivot(); kwargs...)
 @deprecate qr(A::AbstractMatrix, ::Val{true}; kwargs...)  qr(A, ColumnNorm(); kwargs...)
 
+# allow packages like SparseArrays.jl to hook into here and redirect to out-of-place `qr`
+_qr(A::AbstractMatrix, args...; kwargs...) = qr!(A, args...; kwargs...)
+
 qr(x::Number) = qr(fill(x,1,1))
 function qr(v::AbstractVector)
     require_one_based_indexing(v)
@@ -458,7 +458,7 @@ Array(F::QRPivoted) = Matrix(F)
 
 function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Union{QR, QRCompactWY, QRPivoted})
     summary(io, F); println(io)
-    println(io, "Q factor:")
+    print(io, "Q factor: ")
     show(io, mime, F.Q)
     println(io, "\nR factor:")
     show(io, mime, F.R)
@@ -514,413 +514,113 @@ end
 Base.propertynames(F::QRPivoted, private::Bool=false) =
     (:R, :Q, :p, :P, (private ? fieldnames(typeof(F)) : ())...)
 
-adjoint(F::Union{QR,QRPivoted,QRCompactWY}) = Adjoint(F)
-
-abstract type AbstractQ{T} <: AbstractMatrix{T} end
+transpose(F::Union{QR{<:Real},QRPivoted{<:Real},QRCompactWY{<:Real}}) = F'
+transpose(::Union{QR,QRPivoted,QRCompactWY}) =
+    throw(ArgumentError("transpose of QR decomposition is not supported, consider using adjoint"))
 
-inv(Q::AbstractQ) = Q'
-
-"""
-    QRPackedQ <: AbstractMatrix
-
-The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QR`](@ref) or
-[`QRPivoted`](@ref) format.
-"""
-struct QRPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractQ{T}
-    factors::S
-    τ::C
-
-    function QRPackedQ{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
-        require_one_based_indexing(factors)
-        new{T,S,C}(factors, τ)
-    end
-end
-QRPackedQ(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
-    QRPackedQ{T,typeof(factors),typeof(τ)}(factors, τ)
-QRPackedQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
-    QRPackedQ(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QRPackedQ{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
-           QRPackedQ{T,S,typeof(τ)}(factors, τ))
-
-"""
-    QRCompactWYQ <: AbstractMatrix
-
-The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QRCompactWY`](@ref)
-format.
-"""
-struct QRCompactWYQ{S, M<:AbstractMatrix{S}, C<:AbstractMatrix{S}} <: AbstractQ{S}
-    factors::M
-    T::C
-
-    function QRCompactWYQ{S,M,C}(factors, T) where {S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}}
-        require_one_based_indexing(factors)
-        new{S,M,C}(factors, T)
-    end
-end
-QRCompactWYQ(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S} =
-    QRCompactWYQ{S,typeof(factors),typeof(T)}(factors, T)
-QRCompactWYQ{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S} =
-    QRCompactWYQ(convert(AbstractMatrix{S}, factors), convert(AbstractMatrix{S}, T))
-# backwards-compatible constructors (remove with Julia 2.0)
-@deprecate(QRCompactWYQ{S,M}(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S,M},
-           QRCompactWYQ{S,M,typeof(T)}(factors, T))
-
-QRPackedQ{T}(Q::QRPackedQ) where {T} = QRPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(Vector{T}, Q.τ))
-AbstractMatrix{T}(Q::QRPackedQ{T}) where {T} = Q
-AbstractMatrix{T}(Q::QRPackedQ) where {T} = QRPackedQ{T}(Q)
-QRCompactWYQ{S}(Q::QRCompactWYQ) where {S} = QRCompactWYQ(convert(AbstractMatrix{S}, Q.factors), convert(AbstractMatrix{S}, Q.T))
-AbstractMatrix{S}(Q::QRCompactWYQ{S}) where {S} = Q
-AbstractMatrix{S}(Q::QRCompactWYQ) where {S} = QRCompactWYQ{S}(Q)
-Matrix{T}(Q::AbstractQ{S}) where {T,S} = Matrix{T}(lmul!(Q, Matrix{S}(I, size(Q, 1), min(size(Q.factors)...))))
-Matrix(Q::AbstractQ{T}) where {T} = Matrix{T}(Q)
-Array{T}(Q::AbstractQ) where {T} = Matrix{T}(Q)
-Array(Q::AbstractQ) = Matrix(Q)
-
-size(F::Union{QR,QRCompactWY,QRPivoted}, dim::Integer) = size(getfield(F, :factors), dim)
 size(F::Union{QR,QRCompactWY,QRPivoted}) = size(getfield(F, :factors))
-size(Q::Union{QRCompactWYQ,QRPackedQ}, dim::Integer) =
-    size(getfield(Q, :factors), dim == 2 ? 1 : dim)
-size(Q::Union{QRCompactWYQ,QRPackedQ}) = size(Q, 1), size(Q, 2)
-
-copymutable(Q::AbstractQ{T}) where {T} = lmul!(Q, Matrix{T}(I, size(Q)))
-copy(Q::AbstractQ) = copymutable(Q)
-getindex(Q::AbstractQ, inds...) = copymutable(Q)[inds...]
-getindex(Q::AbstractQ, ::Colon, ::Colon) = copy(Q)
-
-function getindex(Q::AbstractQ, ::Colon, j::Int)
-    y = zeros(eltype(Q), size(Q, 2))
-    y[j] = 1
-    lmul!(Q, y)
-end
+size(F::Union{QR,QRCompactWY,QRPivoted}, dim::Integer) = size(getfield(F, :factors), dim)
 
-getindex(Q::AbstractQ, i::Int, j::Int) = Q[:, j][i]
 
-# specialization avoiding the fallback using slow `getindex`
-function copyto!(dest::AbstractMatrix, src::AbstractQ)
-    copyto!(dest, I)
-    lmul!(src, dest)
+function ldiv!(A::QRCompactWY{T}, b::AbstractVector{T}) where {T}
+    require_one_based_indexing(b)
+    m, n = size(A)
+    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), b), 1:size(A, 2)))
+    return b
 end
-# needed to resolve method ambiguities
-function copyto!(dest::PermutedDimsArray{T,2,perm}, src::AbstractQ) where {T,perm}
-    if perm == (1, 2)
-        copyto!(parent(dest), src)
-    else
-        @assert perm == (2, 1) # there are no other permutations of two indices
-        if T <: Real
-            copyto!(parent(dest), I)
-            lmul!(src', parent(dest))
-        else
-            # LAPACK does not offer inplace lmul!(transpose(Q), B) for complex Q
-            tmp = similar(parent(dest))
-            copyto!(tmp, I)
-            rmul!(tmp, src)
-            permutedims!(parent(dest), tmp, (2, 1))
-        end
-    end
-    return dest
+function ldiv!(A::QRCompactWY{T}, B::AbstractMatrix{T}) where {T}
+    require_one_based_indexing(B)
+    m, n = size(A)
+    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), B), 1:size(A, 2), 1:size(B, 2)))
+    return B
 end
 
-## Multiplication by Q
-### QB
-lmul!(A::QRCompactWYQ{T,S}, B::StridedVecOrMat{T}) where {T<:BlasFloat, S<:StridedMatrix} =
-    LAPACK.gemqrt!('L', 'N', A.factors, A.T, B)
-lmul!(A::QRPackedQ{T,S}, B::StridedVecOrMat{T}) where {T<:BlasFloat, S<:StridedMatrix} =
-    LAPACK.ormqr!('L', 'N', A.factors, A.τ, B)
-function lmul!(A::QRPackedQ, B::AbstractVecOrMat)
+# Julia implementation similar to xgelsy
+function ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractMatrix{T}, rcond::Real) where {T<:BlasFloat}
     require_one_based_indexing(B)
-    mA, nA = size(A.factors)
-    mB, nB = size(B,1), size(B,2)
-    if mA != mB
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
-    end
-    Afactors = A.factors
-    @inbounds begin
-        for k = min(mA,nA):-1:1
-            for j = 1:nB
-                vBj = B[k,j]
-                for i = k+1:mB
-                    vBj += conj(Afactors[i,k])*B[i,j]
-                end
-                vBj = A.τ[k]*vBj
-                B[k,j] -= vBj
-                for i = k+1:mB
-                    B[i,j] -= Afactors[i,k]*vBj
-                end
-            end
-        end
-    end
-    B
-end
+    m, n = size(A)
 
-function (*)(A::AbstractQ, b::StridedVector)
-    TAb = promote_type(eltype(A), eltype(b))
-    Anew = convert(AbstractMatrix{TAb}, A)
-    if size(A.factors, 1) == length(b)
-        bnew = copymutable_oftype(b, TAb)
-    elseif size(A.factors, 2) == length(b)
-        bnew = [b; zeros(TAb, size(A.factors, 1) - length(b))]
-    else
-        throw(DimensionMismatch("vector must have length either $(size(A.factors, 1)) or $(size(A.factors, 2))"))
+    if m > size(B, 1) || n > size(B, 1)
+        throw(DimensionMismatch("B has leading dimension $(size(B, 1)) but needs at least $(max(m, n))"))
     end
-    lmul!(Anew, bnew)
-end
-function (*)(A::AbstractQ, B::StridedMatrix)
-    TAB = promote_type(eltype(A), eltype(B))
-    Anew = convert(AbstractMatrix{TAB}, A)
-    if size(A.factors, 1) == size(B, 1)
-        Bnew = copymutable_oftype(B, TAB)
-    elseif size(A.factors, 2) == size(B, 1)
-        Bnew = [B; zeros(TAB, size(A.factors, 1) - size(B,1), size(B, 2))]
-    else
-        throw(DimensionMismatch("first dimension of matrix must have size either $(size(A.factors, 1)) or $(size(A.factors, 2))"))
-    end
-    lmul!(Anew, Bnew)
-end
 
-function (*)(A::AbstractQ, b::Number)
-    TAb = promote_type(eltype(A), typeof(b))
-    dest = similar(A, TAb)
-    copyto!(dest, b*I)
-    lmul!(A, dest)
-end
-
-### QcB
-lmul!(adjA::Adjoint{<:Any,<:QRCompactWYQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasReal,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.gemqrt!('L', 'T', A.factors, A.T, B))
-lmul!(adjA::Adjoint{<:Any,<:QRCompactWYQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasComplex,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.gemqrt!('L', 'C', A.factors, A.T, B))
-lmul!(adjA::Adjoint{<:Any,<:QRPackedQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasReal,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.ormqr!('L', 'T', A.factors, A.τ, B))
-lmul!(adjA::Adjoint{<:Any,<:QRPackedQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasComplex,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.ormqr!('L', 'C', A.factors, A.τ, B))
-function lmul!(adjA::Adjoint{<:Any,<:QRPackedQ}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    A = adjA.parent
-    mA, nA = size(A.factors)
-    mB, nB = size(B,1), size(B,2)
-    if mA != mB
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but B has dimensions ($mB, $nB)"))
+    if length(A.factors) == 0 || length(B) == 0
+        return B, 0
     end
-    Afactors = A.factors
+
     @inbounds begin
-        for k = 1:min(mA,nA)
-            for j = 1:nB
-                vBj = B[k,j]
-                for i = k+1:mB
-                    vBj += conj(Afactors[i,k])*B[i,j]
-                end
-                vBj = conj(A.τ[k])*vBj
-                B[k,j] -= vBj
-                for i = k+1:mB
-                    B[i,j] -= Afactors[i,k]*vBj
-                end
-            end
+        smin = smax = abs(A.factors[1])
+
+        if smax == 0
+            return fill!(B, 0), 0
         end
-    end
-    B
-end
-function *(adjQ::Adjoint{<:Any,<:AbstractQ}, B::StridedVecOrMat)
-    Q = adjQ.parent
-    TQB = promote_type(eltype(Q), eltype(B))
-    return lmul!(adjoint(convert(AbstractMatrix{TQB}, Q)), copymutable_oftype(B, TQB))
-end
 
-### QBc/QcBc
-function *(Q::AbstractQ, adjB::Adjoint{<:Any,<:StridedVecOrMat})
-    B = adjB.parent
-    TQB = promote_type(eltype(Q), eltype(B))
-    Bc = similar(B, TQB, (size(B, 2), size(B, 1)))
-    adjoint!(Bc, B)
-    return lmul!(convert(AbstractMatrix{TQB}, Q), Bc)
-end
-function *(adjQ::Adjoint{<:Any,<:AbstractQ}, adjB::Adjoint{<:Any,<:StridedVecOrMat})
-    Q, B = adjQ.parent, adjB.parent
-    TQB = promote_type(eltype(Q), eltype(B))
-    Bc = similar(B, TQB, (size(B, 2), size(B, 1)))
-    adjoint!(Bc, B)
-    return lmul!(adjoint(convert(AbstractMatrix{TQB}, Q)), Bc)
-end
+        mn = min(m, n)
 
-### AQ
-rmul!(A::StridedVecOrMat{T}, B::QRCompactWYQ{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
-    LAPACK.gemqrt!('R', 'N', B.factors, B.T, A)
-rmul!(A::StridedVecOrMat{T}, B::QRPackedQ{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
-    LAPACK.ormqr!('R', 'N', B.factors, B.τ, A)
-function rmul!(A::StridedMatrix,Q::QRPackedQ)
-    mQ, nQ = size(Q.factors)
-    mA, nA = size(A,1), size(A,2)
-    if nA != mQ
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
-    end
-    Qfactors = Q.factors
-    @inbounds begin
-        for k = 1:min(mQ,nQ)
-            for i = 1:mA
-                vAi = A[i,k]
-                for j = k+1:mQ
-                    vAi += A[i,j]*Qfactors[j,k]
-                end
-                vAi = vAi*Q.τ[k]
-                A[i,k] -= vAi
-                for j = k+1:nA
-                    A[i,j] -= vAi*conj(Qfactors[j,k])
-                end
-            end
-        end
-    end
-    A
-end
+        # allocate temporary work space
+        tmp  = Vector{T}(undef, 2mn)
+        wmin = view(tmp, 1:mn)
+        wmax = view(tmp, mn+1:2mn)
 
-function (*)(A::StridedMatrix, Q::AbstractQ)
-    TAQ = promote_type(eltype(A), eltype(Q))
+        rnk = 1
+        wmin[1] = 1
+        wmax[1] = 1
 
-    return rmul!(copymutable_oftype(A, TAQ), convert(AbstractMatrix{TAQ}, Q))
-end
+        while rnk < mn
+            i = rnk + 1
 
-function (*)(a::Number, B::AbstractQ)
-    TaB = promote_type(typeof(a), eltype(B))
-    dest = similar(B, TaB)
-    copyto!(dest, a*I)
-    rmul!(dest, B)
-end
+            smin, s1, c1 = LAPACK.laic1!(2, view(wmin, 1:rnk), smin, view(A.factors, 1:rnk, i), A.factors[i,i])
+            smax, s2, c2 = LAPACK.laic1!(1, view(wmax, 1:rnk), smax, view(A.factors, 1:rnk, i), A.factors[i,i])
 
-### AQc
-rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasReal} =
-    (B = adjB.parent; LAPACK.gemqrt!('R', 'T', B.factors, B.T, A))
-rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasComplex} =
-    (B = adjB.parent; LAPACK.gemqrt!('R', 'C', B.factors, B.T, A))
-rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRPackedQ{T}}) where {T<:BlasReal} =
-    (B = adjB.parent; LAPACK.ormqr!('R', 'T', B.factors, B.τ, A))
-rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRPackedQ{T}}) where {T<:BlasComplex} =
-    (B = adjB.parent; LAPACK.ormqr!('R', 'C', B.factors, B.τ, A))
-function rmul!(A::StridedMatrix, adjQ::Adjoint{<:Any,<:QRPackedQ})
-    Q = adjQ.parent
-    mQ, nQ = size(Q.factors)
-    mA, nA = size(A,1), size(A,2)
-    if nA != mQ
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA) but matrix Q has dimensions ($mQ, $nQ)"))
-    end
-    Qfactors = Q.factors
-    @inbounds begin
-        for k = min(mQ,nQ):-1:1
-            for i = 1:mA
-                vAi = A[i,k]
-                for j = k+1:mQ
-                    vAi += A[i,j]*Qfactors[j,k]
-                end
-                vAi = vAi*conj(Q.τ[k])
-                A[i,k] -= vAi
-                for j = k+1:nA
-                    A[i,j] -= vAi*conj(Qfactors[j,k])
-                end
+            if smax*rcond > smin
+                break
             end
-        end
-    end
-    A
-end
-function *(A::StridedMatrix, adjB::Adjoint{<:Any,<:AbstractQ})
-    B = adjB.parent
-    TAB = promote_type(eltype(A),eltype(B))
-    BB = convert(AbstractMatrix{TAB}, B)
-    if size(A,2) == size(B.factors, 1)
-        AA = copy_similar(A, TAB)
-        return rmul!(AA, adjoint(BB))
-    elseif size(A,2) == size(B.factors,2)
-        return rmul!([A zeros(TAB, size(A, 1), size(B.factors, 1) - size(B.factors, 2))], adjoint(BB))
-    else
-        throw(DimensionMismatch("matrix A has dimensions $(size(A)) but matrix B has dimensions $(size(B))"))
-    end
-end
-*(u::AdjointAbsVec, A::Adjoint{<:Any,<:AbstractQ}) = adjoint(A.parent * u.parent)
 
+            for j in 1:rnk
+                wmin[j] *= s1
+                wmax[j] *= s2
+            end
+            wmin[i] = c1
+            wmax[i] = c2
 
-### AcQ/AcQc
-function *(adjA::Adjoint{<:Any,<:StridedVecOrMat}, Q::AbstractQ)
-    A = adjA.parent
-    TAQ = promote_type(eltype(A), eltype(Q))
-    Ac = similar(A, TAQ, (size(A, 2), size(A, 1)))
-    adjoint!(Ac, A)
-    return rmul!(Ac, convert(AbstractMatrix{TAQ}, Q))
-end
-function *(adjA::Adjoint{<:Any,<:StridedVecOrMat}, adjQ::Adjoint{<:Any,<:AbstractQ})
-    A, Q = adjA.parent, adjQ.parent
-    TAQ = promote_type(eltype(A), eltype(Q))
-    Ac = similar(A, TAQ, (size(A, 2), size(A, 1)))
-    adjoint!(Ac, A)
-    return rmul!(Ac, adjoint(convert(AbstractMatrix{TAQ}, Q)))
-end
+            rnk += 1
+        end
 
-### mul!
-function mul!(C::StridedVecOrMat{T}, Q::AbstractQ{T}, B::StridedVecOrMat{T}) where {T}
-    require_one_based_indexing(C, B)
-    mB = size(B, 1)
-    mC = size(C, 1)
-    if mB < mC
-        inds = CartesianIndices(B)
-        copyto!(C, inds, B, inds)
-        C[CartesianIndices((mB+1:mC, axes(C, 2)))] .= zero(T)
-        return lmul!(Q, C)
-    else
-        return lmul!(Q, copyto!(C, B))
-    end
-end
-mul!(C::StridedVecOrMat{T}, A::StridedVecOrMat{T}, Q::AbstractQ{T}) where {T} = rmul!(copyto!(C, A), Q)
-mul!(C::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:AbstractQ{T}}, B::StridedVecOrMat{T}) where {T} = lmul!(adjQ, copyto!(C, B))
-mul!(C::StridedVecOrMat{T}, A::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:AbstractQ{T}}) where {T} = rmul!(copyto!(C, A), adjQ)
+        if rnk < n
+            C, τ = LAPACK.tzrzf!(A.factors[1:rnk, :])
+            work = vec(C)
+        else
+            C, τ = A.factors, A.τ
+            work = resize!(tmp, n)
+        end
 
-function ldiv!(A::QRCompactWY{T}, b::StridedVector{T}) where {T<:BlasFloat}
-    m,n = size(A)
-    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), b), 1:size(A, 2)))
-    return b
-end
-function ldiv!(A::QRCompactWY{T}, B::StridedMatrix{T}) where {T<:BlasFloat}
-    m,n = size(A)
-    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), B), 1:size(A, 2), 1:size(B, 2)))
-    return B
-end
+        lmul!(adjoint(A.Q), view(B, 1:m, :))
+        ldiv!(UpperTriangular(view(C, 1:rnk, 1:rnk)), view(B, 1:rnk, :))
 
-# Julia implementation similar to xgelsy
-function ldiv!(A::QRPivoted{T}, B::StridedMatrix{T}, rcond::Real) where T<:BlasFloat
-    mA, nA = size(A.factors)
-    nr = min(mA,nA)
-    nrhs = size(B, 2)
-    if nr == 0
-        return B, 0
-    end
-    ar = abs(A.factors[1])
-    if ar == 0
-        B[1:nA, :] .= 0
-        return B, 0
-    end
-    rnk = 1
-    xmin = T[1]
-    xmax = T[1]
-    tmin = tmax = ar
-    while rnk < nr
-        tmin, smin, cmin = LAPACK.laic1!(2, xmin, tmin, view(A.factors, 1:rnk, rnk + 1), A.factors[rnk + 1, rnk + 1])
-        tmax, smax, cmax = LAPACK.laic1!(1, xmax, tmax, view(A.factors, 1:rnk, rnk + 1), A.factors[rnk + 1, rnk + 1])
-        tmax*rcond > tmin && break
-        push!(xmin, cmin)
-        push!(xmax, cmax)
-        for i = 1:rnk
-            xmin[i] *= smin
-            xmax[i] *= smax
+        if rnk < n
+            B[rnk+1:n,:] .= zero(T)
+            LAPACK.ormrz!('L', T <: Complex ? 'C' : 'T', C, τ, view(B, 1:n, :))
+        end
+
+        for j in axes(B, 2)
+            for i in 1:n
+                work[A.p[i]] = B[i,j]
+            end
+            for i in 1:n
+                B[i,j] = work[i]
+            end
         end
-        rnk += 1
     end
-    C, τ = LAPACK.tzrzf!(A.factors[1:rnk,:])
-    ldiv!(UpperTriangular(C[1:rnk,1:rnk]),view(lmul!(adjoint(A.Q), view(B, 1:mA, 1:nrhs)), 1:rnk, 1:nrhs))
-    B[rnk+1:end,:] .= zero(T)
-    LAPACK.ormrz!('L', eltype(B)<:Complex ? 'C' : 'T', C, τ, view(B,1:nA,1:nrhs))
-    B[1:nA,:] = view(B, 1:nA, :)[invperm(A.p),:]
+
     return B, rnk
 end
-ldiv!(A::QRPivoted{T}, B::StridedVector{T}) where {T<:BlasFloat} =
-    vec(ldiv!(A,reshape(B,length(B),1)))
-ldiv!(A::QRPivoted{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-    ldiv!(A, B, min(size(A)...)*eps(real(float(one(eltype(B))))))[1]
-function _wide_qr_ldiv!(A::QR{T}, B::StridedMatrix{T}) where T
+
+ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractVector{T}) where {T<:BlasFloat} =
+    vec(ldiv!(A, reshape(B, length(B), 1)))
+ldiv!(A::QRPivoted{T,<:StridedMatrix}, B::AbstractMatrix{T}) where {T<:BlasFloat} =
+    ldiv!(A, B, min(size(A)...)*eps(real(T)))[1]
+
+function _wide_qr_ldiv!(A::QR{T}, B::AbstractMatrix{T}) where T
     m, n = size(A)
     minmn = min(m,n)
     mB, nB = size(B)
@@ -951,14 +651,14 @@ function _wide_qr_ldiv!(A::QR{T}, B::StridedMatrix{T}) where T
             B[m + 1:mB,1:nB] .= zero(T)
             for j = 1:nB
                 for k = 1:m
-                    vBj = B[k,j]
+                    vBj = B[k,j]'
                     for i = m + 1:n
-                        vBj += B[i,j]*R[k,i]'
+                        vBj += B[i,j]'*R[k,i]'
                     end
                     vBj *= τ[k]
-                    B[k,j] -= vBj
+                    B[k,j] -= vBj'
                     for i = m + 1:n
-                        B[i,j] -= R[k,i]*vBj
+                        B[i,j] -= R[k,i]'*vBj'
                     end
                 end
             end
@@ -968,7 +668,7 @@ function _wide_qr_ldiv!(A::QR{T}, B::StridedMatrix{T}) where T
 end
 
 
-function ldiv!(A::QR{T}, B::StridedMatrix{T}) where T
+function ldiv!(A::QR{T}, B::AbstractMatrix{T}) where T
     m, n = size(A)
     m < n && return _wide_qr_ldiv!(A, B)
 
@@ -977,17 +677,17 @@ function ldiv!(A::QR{T}, B::StridedMatrix{T}) where T
     ldiv!(UpperTriangular(view(R,1:n,:)), view(B, 1:n, :))
     return B
 end
-function ldiv!(A::QR, B::StridedVector)
+function ldiv!(A::QR, B::AbstractVector)
     ldiv!(A, reshape(B, length(B), 1))
     return B
 end
 
-function ldiv!(A::QRPivoted, b::StridedVector)
+function ldiv!(A::QRPivoted, b::AbstractVector)
     ldiv!(QR(A.factors,A.τ), b)
     b[1:size(A.factors, 2)] = view(b, 1:size(A.factors, 2))[invperm(A.jpvt)]
     b
 end
-function ldiv!(A::QRPivoted, B::StridedMatrix)
+function ldiv!(A::QRPivoted, B::AbstractMatrix)
     ldiv!(QR(A.factors, A.τ), B)
     B[1:size(A.factors, 2),:] = view(B, 1:size(A.factors, 2), :)[invperm(A.jpvt),:]
     B
@@ -999,9 +699,9 @@ function _apply_permutation!(F::QRPivoted, B::AbstractVecOrMat)
     B[1:length(F.p), :] = B[F.p, :]
     return B
 end
-_apply_permutation!(F::Factorization, B::AbstractVecOrMat) = B
+_apply_permutation!(::Factorization, B::AbstractVecOrMat) = B
 
-function ldiv!(Fadj::Adjoint{<:Any,<:Union{QR,QRCompactWY,QRPivoted}}, B::AbstractVecOrMat)
+function ldiv!(Fadj::AdjointFactorization{<:Any,<:Union{QR,QRCompactWY,QRPivoted}}, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     m, n = size(Fadj)
 
@@ -1060,25 +760,3 @@ end
 ## Lower priority: Add LQ, QL and RQ factorizations
 
 # FIXME! Should add balancing option through xgebal
-
-
-det(Q::QRPackedQ) = _det_tau(Q.τ)
-
-det(Q::QRCompactWYQ) =
-    prod(i -> _det_tau(_diagview(Q.T[:, i:min(i + size(Q.T, 1), size(Q.T, 2))])),
-         1:size(Q.T, 1):size(Q.T, 2))
-
-_diagview(A) = @view A[diagind(A)]
-
-# Compute `det` from the number of Householder reflections.  Handle
-# the case `Q.τ` contains zeros.
-_det_tau(τs::AbstractVector{<:Real}) =
-    isodd(count(!iszero, τs)) ? -one(eltype(τs)) : one(eltype(τs))
-
-# In complex case, we need to compute the non-unit eigenvalue `λ = 1 - c*τ`
-# (where `c = v'v`) of each Householder reflector.  As we know that the
-# reflector must have the determinant of 1, it must satisfy `abs2(λ) == 1`.
-# Combining this with the constraint `c > 0`, it turns out that the eigenvalue
-# (hence the determinant) can be computed as `λ = -sign(τ)^2`.
-# See: https://github.com/JuliaLang/julia/pull/32887#issuecomment-521935716
-_det_tau(τs) = prod(τ -> iszero(τ) ? one(τ) : -sign(τ)^2, τs)
diff --git a/stdlib/LinearAlgebra/src/schur.jl b/stdlib/LinearAlgebra/src/schur.jl
index 75cef93ee2f4b..7257544ff872e 100644
--- a/stdlib/LinearAlgebra/src/schur.jl
+++ b/stdlib/LinearAlgebra/src/schur.jl
@@ -68,7 +68,7 @@ Base.iterate(S::Schur, ::Val{:values}) = (S.values, Val(:done))
 Base.iterate(S::Schur, ::Val{:done}) = nothing
 
 """
-    schur!(A::StridedMatrix) -> F::Schur
+    schur!(A) -> F::Schur
 
 Same as [`schur`](@ref) but uses the input argument `A` as workspace.
 
@@ -102,6 +102,8 @@ julia> A
 """
 schur!(A::StridedMatrix{<:BlasFloat}) = Schur(LinearAlgebra.LAPACK.gees!('V', A)...)
 
+schur!(A::UpperHessenberg{T}) where {T<:BlasFloat} = Schur(LinearAlgebra.LAPACK.hseqr!(parent(A))...)
+
 """
     schur(A) -> F::Schur
 
@@ -153,6 +155,7 @@ true
 ```
 """
 schur(A::AbstractMatrix{T}) where {T} = schur!(copy_similar(A, eigtype(T)))
+schur(A::UpperHessenberg{T}) where {T} = schur!(copy_similar(A, eigtype(T)))
 function schur(A::RealHermSymComplexHerm)
     F = eigen(A; sortby=nothing)
     return Schur(typeof(F.vectors)(Diagonal(F.values)), F.vectors, F.values)
@@ -342,8 +345,13 @@ Base.iterate(S::GeneralizedSchur, ::Val{:done}) = nothing
 
 Same as [`schur`](@ref) but uses the input matrices `A` and `B` as workspace.
 """
-schur!(A::StridedMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} =
-    GeneralizedSchur(LinearAlgebra.LAPACK.gges!('V', 'V', A, B)...)
+function schur!(A::StridedMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat}
+    if LAPACK.version() < v"3.6.0"
+        GeneralizedSchur(LinearAlgebra.LAPACK.gges!('V', 'V', A, B)...)
+    else
+        GeneralizedSchur(LinearAlgebra.LAPACK.gges3!('V', 'V', A, B)...)
+    end
+end
 
 """
     schur(A, B) -> F::GeneralizedSchur
diff --git a/stdlib/LinearAlgebra/src/special.jl b/stdlib/LinearAlgebra/src/special.jl
index 098df785e557a..d028fe43e6338 100644
--- a/stdlib/LinearAlgebra/src/special.jl
+++ b/stdlib/LinearAlgebra/src/special.jl
@@ -2,13 +2,6 @@
 
 # Methods operating on different special matrix types
 
-
-# Usually, reducedim_initarray calls similar, which yields a sparse matrix for a
-# Diagonal/Bidiagonal/Tridiagonal/SymTridiagonal matrix. However, reducedim should
-# yield a dense vector to increase performance.
-Base.reducedim_initarray(A::Union{Diagonal,Bidiagonal,Tridiagonal,SymTridiagonal}, region, init, ::Type{R}) where {R} = fill(convert(R, init), Base.reduced_indices(A,region))
-
-
 # Interconversion between special matrix types
 
 # conversions from Diagonal to other special matrix types
@@ -50,8 +43,8 @@ Bidiagonal(A::AbstractTriangular) =
     isbanded(A, -1, 0) ? Bidiagonal(diag(A, 0), diag(A, -1), :L) : # is lower bidiagonal
         throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
 
-_lucopy(A::Bidiagonal, T)     = copymutable_oftype(Tridiagonal(A), T)
-_lucopy(A::Diagonal, T)       = copymutable_oftype(Tridiagonal(A), T)
+_lucopy(A::Bidiagonal, T) = copymutable_oftype(Tridiagonal(A), T)
+_lucopy(A::Diagonal, T)   = copymutable_oftype(Tridiagonal(A), T)
 function _lucopy(A::SymTridiagonal, T)
     du = copy_similar(_evview(A), T)
     dl = copy.(transpose.(du))
@@ -62,27 +55,27 @@ end
 const ConvertibleSpecialMatrix = Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal,AbstractTriangular}
 const PossibleTriangularMatrix = Union{Diagonal, Bidiagonal, AbstractTriangular}
 
-convert(T::Type{<:Diagonal},       m::ConvertibleSpecialMatrix) = m isa T ? m :
-    isdiag(m) ? T(m) : throw(ArgumentError("matrix cannot be represented as Diagonal"))
-convert(T::Type{<:SymTridiagonal}, m::ConvertibleSpecialMatrix) = m isa T ? m :
-    issymmetric(m) && isbanded(m, -1, 1) ? T(m) : throw(ArgumentError("matrix cannot be represented as SymTridiagonal"))
-convert(T::Type{<:Tridiagonal},    m::ConvertibleSpecialMatrix) = m isa T ? m :
-    isbanded(m, -1, 1) ? T(m) : throw(ArgumentError("matrix cannot be represented as Tridiagonal"))
+convert(::Type{T}, m::ConvertibleSpecialMatrix) where {T<:Diagonal}       = m isa T ? m :
+    isdiag(m) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as Diagonal"))
+convert(::Type{T}, m::ConvertibleSpecialMatrix) where {T<:SymTridiagonal} = m isa T ? m :
+    issymmetric(m) && isbanded(m, -1, 1) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as SymTridiagonal"))
+convert(::Type{T}, m::ConvertibleSpecialMatrix) where {T<:Tridiagonal}    = m isa T ? m :
+    isbanded(m, -1, 1) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as Tridiagonal"))
 
-convert(T::Type{<:LowerTriangular}, m::Union{LowerTriangular,UnitLowerTriangular}) = m isa T ? m : T(m)
-convert(T::Type{<:UpperTriangular}, m::Union{UpperTriangular,UnitUpperTriangular}) = m isa T ? m : T(m)
+convert(::Type{T}, m::Union{LowerTriangular,UnitLowerTriangular}) where {T<:LowerTriangular} = m isa T ? m : T(m)::T
+convert(::Type{T}, m::Union{UpperTriangular,UnitUpperTriangular}) where {T<:UpperTriangular} = m isa T ? m : T(m)::T
 
-convert(T::Type{<:LowerTriangular}, m::PossibleTriangularMatrix) = m isa T ? m :
-    istril(m) ? T(m) : throw(ArgumentError("matrix cannot be represented as LowerTriangular"))
-convert(T::Type{<:UpperTriangular}, m::PossibleTriangularMatrix) = m isa T ? m :
-    istriu(m) ? T(m) : throw(ArgumentError("matrix cannot be represented as UpperTriangular"))
+convert(::Type{T}, m::PossibleTriangularMatrix) where {T<:LowerTriangular} = m isa T ? m :
+    istril(m) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as LowerTriangular"))
+convert(::Type{T}, m::PossibleTriangularMatrix) where {T<:UpperTriangular} = m isa T ? m :
+    istriu(m) ? T(m)::T : throw(ArgumentError("matrix cannot be represented as UpperTriangular"))
 
 # Constructs two method definitions taking into account (assumed) commutativity
 # e.g. @commutative f(x::S, y::T) where {S,T} = x+y is the same is defining
 #     f(x::S, y::T) where {S,T} = x+y
 #     f(y::T, x::S) where {S,T} = f(x, y)
 macro commutative(myexpr)
-    @assert myexpr.head===:(=) || myexpr.head===:function # Make sure it is a function definition
+    @assert Base.is_function_def(myexpr) # Make sure it is a function definition
     y = copy(myexpr.args[1].args[2:end])
     reverse!(y)
     reversed_call = Expr(:(=), Expr(:call,myexpr.args[1].args[1],y...), myexpr.args[1])
@@ -114,6 +107,37 @@ for op in (:+, :-)
     end
 end
 
+# disambiguation between triangular and banded matrices, banded ones "dominate"
+mul!(C::AbstractMatrix, A::AbstractTriangular, B::BandedMatrix) = _mul!(C, A, B, MulAddMul())
+mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractTriangular) = _mul!(C, A, B, MulAddMul())
+mul!(C::AbstractMatrix, A::AbstractTriangular, B::BandedMatrix, alpha::Number, beta::Number) =
+    _mul!(C, A, B, MulAddMul(alpha, beta))
+mul!(C::AbstractMatrix, A::BandedMatrix, B::AbstractTriangular, alpha::Number, beta::Number) =
+    _mul!(C, A, B, MulAddMul(alpha, beta))
+
+function *(H::UpperHessenberg, B::Bidiagonal)
+    T = promote_op(matprod, eltype(H), eltype(B))
+    A = mul!(similar(H, T, size(H)), H, B)
+    return B.uplo == 'U' ? UpperHessenberg(A) : A
+end
+function *(B::Bidiagonal, H::UpperHessenberg)
+    T = promote_op(matprod, eltype(B), eltype(H))
+    A = mul!(similar(H, T, size(H)), B, H)
+    return B.uplo == 'U' ? UpperHessenberg(A) : A
+end
+
+function /(H::UpperHessenberg, B::Bidiagonal)
+    T = typeof(oneunit(eltype(H))/oneunit(eltype(B)))
+    A = _rdiv!(similar(H, T, size(H)), H, B)
+    return B.uplo == 'U' ? UpperHessenberg(A) : A
+end
+
+function \(B::Bidiagonal, H::UpperHessenberg)
+    T = typeof(oneunit(eltype(B))\oneunit(eltype(H)))
+    A = ldiv!(similar(H, T, size(H)), B, H)
+    return B.uplo == 'U' ? UpperHessenberg(A) : A
+end
+
 # specialized +/- for structured matrices. If these are removed, it falls
 # back to broadcasting which has ~2-10x speed regressions.
 # For the other structure matrix pairs, broadcasting works well.
@@ -124,7 +148,7 @@ end
 # the off diagonal could be a different type after the operation resulting in
 # an error. See issue #28994
 
-function (+)(A::Bidiagonal, B::Diagonal)
+@commutative function (+)(A::Bidiagonal, B::Diagonal)
     newdv = A.dv + B.diag
     Bidiagonal(newdv, typeof(newdv)(A.ev), A.uplo)
 end
@@ -134,223 +158,123 @@ function (-)(A::Bidiagonal, B::Diagonal)
     Bidiagonal(newdv, typeof(newdv)(A.ev), A.uplo)
 end
 
-function (+)(A::Diagonal, B::Bidiagonal)
-    newdv = A.diag + B.dv
-    Bidiagonal(newdv, typeof(newdv)(B.ev), B.uplo)
-end
-
 function (-)(A::Diagonal, B::Bidiagonal)
-    newdv = A.diag-B.dv
+    newdv = A.diag - B.dv
     Bidiagonal(newdv, typeof(newdv)(-B.ev), B.uplo)
 end
 
-function (+)(A::Diagonal, B::SymTridiagonal)
-    newdv = A.diag+B.dv
-    SymTridiagonal(A.diag+B.dv, typeof(newdv)(B.ev))
+@commutative function (+)(A::Diagonal, B::SymTridiagonal)
+    newdv = A.diag + B.dv
+    SymTridiagonal(A.diag + B.dv, typeof(newdv)(B.ev))
 end
 
 function (-)(A::Diagonal, B::SymTridiagonal)
-    newdv = A.diag-B.dv
+    newdv = A.diag - B.dv
     SymTridiagonal(newdv, typeof(newdv)(-B.ev))
 end
 
-function (+)(A::SymTridiagonal, B::Diagonal)
-    newdv = A.dv+B.diag
-    SymTridiagonal(newdv, typeof(newdv)(A.ev))
-end
-
 function (-)(A::SymTridiagonal, B::Diagonal)
-    newdv = A.dv-B.diag
+    newdv = A.dv - B.diag
     SymTridiagonal(newdv, typeof(newdv)(A.ev))
 end
 
 # this set doesn't have the aforementioned problem
 
-+(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl+_evview(B), A.d+B.dv, A.du+_evview(B))
+@commutative (+)(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl+_evview(B), A.d+B.dv, A.du+_evview(B))
 -(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl-_evview(B), A.d-B.dv, A.du-_evview(B))
-+(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(_evview(A)+B.dl, A.dv+B.d, _evview(A)+B.du)
 -(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(_evview(A)-B.dl, A.dv-B.d, _evview(A)-B.du)
 
-
-function (+)(A::Diagonal, B::Tridiagonal)
-    newdv = A.diag+B.d
+@commutative function (+)(A::Diagonal, B::Tridiagonal)
+    newdv = A.diag + B.d
     Tridiagonal(typeof(newdv)(B.dl), newdv, typeof(newdv)(B.du))
 end
 
 function (-)(A::Diagonal, B::Tridiagonal)
-    newdv = A.diag-B.d
+    newdv = A.diag - B.d
     Tridiagonal(typeof(newdv)(-B.dl), newdv, typeof(newdv)(-B.du))
 end
 
-function (+)(A::Tridiagonal, B::Diagonal)
-    newdv = A.d+B.diag
-    Tridiagonal(typeof(newdv)(A.dl), newdv, typeof(newdv)(A.du))
-end
-
 function (-)(A::Tridiagonal, B::Diagonal)
-    newdv = A.d-B.diag
+    newdv = A.d - B.diag
     Tridiagonal(typeof(newdv)(A.dl), newdv, typeof(newdv)(A.du))
 end
 
-function (+)(A::Bidiagonal, B::Tridiagonal)
-    newdv = A.dv+B.d
+@commutative function (+)(A::Bidiagonal, B::Tridiagonal)
+    newdv = A.dv + B.d
     Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(B.dl), newdv, A.ev+B.du) : (A.ev+B.dl, newdv, typeof(newdv)(B.du)))...)
 end
 
 function (-)(A::Bidiagonal, B::Tridiagonal)
-    newdv = A.dv-B.d
+    newdv = A.dv - B.d
     Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-B.dl), newdv, A.ev-B.du) : (A.ev-B.dl, newdv, typeof(newdv)(-B.du)))...)
 end
 
-function (+)(A::Tridiagonal, B::Bidiagonal)
-    newdv = A.d+B.dv
-    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(A.dl), newdv, A.du+B.ev) : (A.dl+B.ev, newdv, typeof(newdv)(A.du)))...)
-end
-
 function (-)(A::Tridiagonal, B::Bidiagonal)
-    newdv = A.d-B.dv
+    newdv = A.d - B.dv
     Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(A.dl), newdv, A.du-B.ev) : (A.dl-B.ev, newdv, typeof(newdv)(A.du)))...)
 end
 
-function (+)(A::Bidiagonal, B::SymTridiagonal)
-    newdv = A.dv+B.dv
+@commutative function (+)(A::Bidiagonal, B::SymTridiagonal)
+    newdv = A.dv + B.dv
     Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(_evview(B)), A.dv+B.dv, A.ev+_evview(B)) : (A.ev+_evview(B), A.dv+B.dv, typeof(newdv)(_evview(B))))...)
 end
 
 function (-)(A::Bidiagonal, B::SymTridiagonal)
-    newdv = A.dv-B.dv
+    newdv = A.dv - B.dv
     Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-_evview(B)), newdv, A.ev-_evview(B)) : (A.ev-_evview(B), newdv, typeof(newdv)(-_evview(B))))...)
 end
 
-function (+)(A::SymTridiagonal, B::Bidiagonal)
-    newdv = A.dv+B.dv
-    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(_evview(A)), newdv, _evview(A)+B.ev) : (_evview(A)+B.ev, newdv, typeof(newdv)(_evview(A))))...)
-end
-
 function (-)(A::SymTridiagonal, B::Bidiagonal)
-    newdv = A.dv-B.dv
+    newdv = A.dv - B.dv
     Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(_evview(A)), newdv, _evview(A)-B.ev) : (_evview(A)-B.ev, newdv, typeof(newdv)(_evview(A))))...)
 end
 
-# fixing uniform scaling problems from #28994
-# {<:Number} is required due to the test case from PR #27289 where eltype is a matrix.
-
-function (+)(A::Tridiagonal{<:Number}, B::UniformScaling)
-    newd = A.d .+ B.λ
+@commutative function (+)(A::Tridiagonal, B::UniformScaling)
+    newd = A.d .+ Ref(B)
     Tridiagonal(typeof(newd)(A.dl), newd, typeof(newd)(A.du))
 end
 
-function (+)(A::SymTridiagonal{<:Number}, B::UniformScaling)
-    newdv = A.dv .+ B.λ
+@commutative function (+)(A::SymTridiagonal, B::UniformScaling)
+    newdv = A.dv .+ Ref(B)
     SymTridiagonal(newdv, typeof(newdv)(A.ev))
 end
 
-function (+)(A::Bidiagonal{<:Number}, B::UniformScaling)
-    newdv = A.dv .+ B.λ
+@commutative function (+)(A::Bidiagonal, B::UniformScaling)
+    newdv = A.dv .+ Ref(B)
     Bidiagonal(newdv, typeof(newdv)(A.ev), A.uplo)
 end
 
-function (+)(A::Diagonal{<:Number}, B::UniformScaling)
-    Diagonal(A.diag .+ B.λ)
-end
-
-function (+)(A::UniformScaling, B::Tridiagonal{<:Number})
-    newd = A.λ .+ B.d
-    Tridiagonal(typeof(newd)(B.dl), newd, typeof(newd)(B.du))
-end
-
-function (+)(A::UniformScaling, B::SymTridiagonal{<:Number})
-    newdv = A.λ .+ B.dv
-    SymTridiagonal(newdv, typeof(newdv)(B.ev))
-end
-
-function (+)(A::UniformScaling, B::Bidiagonal{<:Number})
-    newdv = A.λ .+ B.dv
-    Bidiagonal(newdv, typeof(newdv)(B.ev), B.uplo)
+@commutative function (+)(A::Diagonal, B::UniformScaling)
+    Diagonal(A.diag .+ Ref(B))
 end
 
-function (+)(A::UniformScaling, B::Diagonal{<:Number})
-    Diagonal(A.λ .+ B.diag)
+# StructuredMatrix - UniformScaling = StructuredMatrix + (-UniformScaling) =>
+# no need to define reversed order
+function (-)(A::UniformScaling, B::Tridiagonal)
+    d = Ref(A) .- B.d
+    Tridiagonal(convert(typeof(d), -B.dl), d, convert(typeof(d), -B.du))
 end
-
-function (-)(A::UniformScaling, B::Tridiagonal{<:Number})
-    newd = A.λ .- B.d
-    Tridiagonal(typeof(newd)(-B.dl), newd, typeof(newd)(-B.du))
+function (-)(A::UniformScaling, B::SymTridiagonal)
+    dv = Ref(A) .- B.dv
+    SymTridiagonal(dv, convert(typeof(dv), -B.ev))
 end
-
-function (-)(A::UniformScaling, B::SymTridiagonal{<:Number})
-    newdv = A.λ .- B.dv
-    SymTridiagonal(newdv, typeof(newdv)(-B.ev))
+function (-)(A::UniformScaling, B::Bidiagonal)
+    dv = Ref(A) .- B.dv
+    Bidiagonal(dv, convert(typeof(dv), -B.ev), B.uplo)
 end
-
-function (-)(A::UniformScaling, B::Bidiagonal{<:Number})
-    newdv = A.λ .- B.dv
-    Bidiagonal(newdv, typeof(newdv)(-B.ev), B.uplo)
+function (-)(A::UniformScaling, B::Diagonal)
+    Diagonal(Ref(A) .- B.diag)
 end
 
-function (-)(A::UniformScaling, B::Diagonal{<:Number})
-    Diagonal(A.λ .- B.diag)
-end
+## Diagonal construction from UniformScaling
+Diagonal{T}(s::UniformScaling, m::Integer) where {T} = Diagonal{T}(fill(T(s.λ), m))
+Diagonal(s::UniformScaling, m::Integer) = Diagonal{eltype(s)}(s, m)
 
-lmul!(Q::AbstractQ, B::AbstractTriangular) = lmul!(Q, full!(B))
-lmul!(Q::QRPackedQ, B::AbstractTriangular) = lmul!(Q, full!(B)) # disambiguation
-lmul!(Q::Adjoint{<:Any,<:AbstractQ}, B::AbstractTriangular) = lmul!(Q, full!(B))
-lmul!(Q::Adjoint{<:Any,<:QRPackedQ}, B::AbstractTriangular) = lmul!(Q, full!(B)) # disambiguation
+Base.muladd(A::Union{Diagonal, UniformScaling}, B::Union{Diagonal, UniformScaling}, z::Union{Diagonal, UniformScaling}) =
+    Diagonal(_diag_or_value(A) .* _diag_or_value(B) .+ _diag_or_value(z))
 
-function _qlmul(Q::AbstractQ, B)
-    TQB = promote_type(eltype(Q), eltype(B))
-    if size(Q.factors, 1) == size(B, 1)
-        Bnew = Matrix{TQB}(B)
-    elseif size(Q.factors, 2) == size(B, 1)
-        Bnew = [Matrix{TQB}(B); zeros(TQB, size(Q.factors, 1) - size(B,1), size(B, 2))]
-    else
-        throw(DimensionMismatch("first dimension of matrix must have size either $(size(Q.factors, 1)) or $(size(Q.factors, 2))"))
-    end
-    lmul!(convert(AbstractMatrix{TQB}, Q), Bnew)
-end
-function _qlmul(adjQ::Adjoint{<:Any,<:AbstractQ}, B)
-    TQB = promote_type(eltype(adjQ), eltype(B))
-    lmul!(adjoint(convert(AbstractMatrix{TQB}, parent(adjQ))), Matrix{TQB}(B))
-end
-
-*(Q::AbstractQ, B::AbstractTriangular) = _qlmul(Q, B)
-*(Q::Adjoint{<:Any,<:AbstractQ}, B::AbstractTriangular) = _qlmul(Q, B)
-*(Q::AbstractQ, B::BiTriSym) = _qlmul(Q, B)
-*(Q::Adjoint{<:Any,<:AbstractQ}, B::BiTriSym) = _qlmul(Q, B)
-*(Q::AbstractQ, B::Diagonal) = _qlmul(Q, B)
-*(Q::Adjoint{<:Any,<:AbstractQ}, B::Diagonal) = _qlmul(Q, B)
-
-rmul!(A::AbstractTriangular, Q::AbstractQ) = rmul!(full!(A), Q)
-rmul!(A::AbstractTriangular, Q::Adjoint{<:Any,<:AbstractQ}) = rmul!(full!(A), Q)
-
-function _qrmul(A, Q::AbstractQ)
-    TAQ = promote_type(eltype(A), eltype(Q))
-    return rmul!(Matrix{TAQ}(A), convert(AbstractMatrix{TAQ}, Q))
-end
-function _qrmul(A, adjQ::Adjoint{<:Any,<:AbstractQ})
-    Q = adjQ.parent
-    TAQ = promote_type(eltype(A), eltype(Q))
-    if size(A,2) == size(Q.factors, 1)
-        Anew = Matrix{TAQ}(A)
-    elseif size(A,2) == size(Q.factors,2)
-        Anew = [Matrix{TAQ}(A) zeros(TAQ, size(A, 1), size(Q.factors, 1) - size(Q.factors, 2))]
-    else
-        throw(DimensionMismatch("matrix A has dimensions $(size(A)) but matrix B has dimensions $(size(Q))"))
-    end
-    return rmul!(Anew, adjoint(convert(AbstractMatrix{TAQ}, Q)))
-end
-
-*(A::AbstractTriangular, Q::AbstractQ) = _qrmul(A, Q)
-*(A::AbstractTriangular, Q::Adjoint{<:Any,<:AbstractQ}) = _qrmul(A, Q)
-*(A::BiTriSym, Q::AbstractQ) = _qrmul(A, Q)
-*(A::BiTriSym, Q::Adjoint{<:Any,<:AbstractQ}) = _qrmul(A, Q)
-*(A::Diagonal, Q::AbstractQ) = _qrmul(A, Q)
-*(A::Diagonal, Q::Adjoint{<:Any,<:AbstractQ}) = _qrmul(A, Q)
-
-*(Q::AbstractQ, B::AbstractQ) = _qlmul(Q, B)
-*(Q::Adjoint{<:Any,<:AbstractQ}, B::AbstractQ) = _qrmul(Q, B)
-*(Q::AbstractQ, B::Adjoint{<:Any,<:AbstractQ}) = _qlmul(Q, B)
-*(Q::Adjoint{<:Any,<:AbstractQ}, B::Adjoint{<:Any,<:AbstractQ}) = _qrmul(Q, B)
+_diag_or_value(A::Diagonal) = A.diag
+_diag_or_value(A::UniformScaling) = A.λ
 
 # fill[stored]! methods
 fillstored!(A::Diagonal, x) = (fill!(A.diag, x); A)
@@ -381,6 +305,10 @@ end
 zero(D::Diagonal) = Diagonal(zero.(D.diag))
 oneunit(D::Diagonal) = Diagonal(oneunit.(D.diag))
 
+isdiag(A::HermOrSym{<:Any,<:Diagonal}) = isdiag(parent(A))
+dot(x::AbstractVector, A::RealHermSymComplexSym{<:Real,<:Diagonal}, y::AbstractVector) =
+    dot(x, A.data, y)
+
 # equals and approx equals methods for structured matrices
 # SymTridiagonal == Tridiagonal is already defined in tridiag.jl
 
@@ -402,29 +330,121 @@ end
 ==(A::Bidiagonal, B::SymTridiagonal) = iszero(_evview(B)) && iszero(A.ev) && A.dv == B.dv
 ==(B::SymTridiagonal, A::Bidiagonal) = A == B
 
-# concatenation
-const _SpecialArrays = Union{Diagonal, Bidiagonal, Tridiagonal, SymTridiagonal}
-const _Symmetric_DenseArrays{T,A<:Matrix} = Symmetric{T,A}
-const _Hermitian_DenseArrays{T,A<:Matrix} = Hermitian{T,A}
-const _Triangular_DenseArrays{T,A<:Matrix} = AbstractTriangular{T,A}
-const _Annotated_DenseArrays = Union{_SpecialArrays, _Triangular_DenseArrays, _Symmetric_DenseArrays, _Hermitian_DenseArrays}
-const _Annotated_Typed_DenseArrays{T} = Union{_Triangular_DenseArrays{T}, _Symmetric_DenseArrays{T}, _Hermitian_DenseArrays{T}}
-const _DenseConcatGroup = Union{Number, Vector, Adjoint{<:Any,<:Vector}, Transpose{<:Any,<:Vector}, Matrix, _Annotated_DenseArrays}
-const _TypedDenseConcatGroup{T} = Union{Vector{T}, Adjoint{T,Vector{T}}, Transpose{T,Vector{T}}, Matrix{T}, _Annotated_Typed_DenseArrays{T}}
-
-promote_to_array_type(::Tuple{Vararg{Union{_DenseConcatGroup,UniformScaling}}}) = Matrix
-
-Base._cat(dims, xs::_DenseConcatGroup...) = Base._cat_t(dims, promote_eltype(xs...), xs...)
-vcat(A::Vector...) = Base.typed_vcat(promote_eltype(A...), A...)
-vcat(A::_DenseConcatGroup...) = Base.typed_vcat(promote_eltype(A...), A...)
-hcat(A::Vector...) = Base.typed_hcat(promote_eltype(A...), A...)
-hcat(A::_DenseConcatGroup...) = Base.typed_hcat(promote_eltype(A...), A...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::_DenseConcatGroup...) = Base.typed_hvcat(promote_eltype(xs...), rows, xs...)
-# For performance, specially handle the case where the matrices/vectors have homogeneous eltype
-Base._cat(dims, xs::_TypedDenseConcatGroup{T}...) where {T} = Base._cat_t(dims, T, xs...)
-vcat(A::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_vcat(T, A...)
-hcat(A::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_hcat(T, A...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_hvcat(T, rows, xs...)
+# TODO: remove these deprecations (used by SparseArrays in the past)
+const _DenseConcatGroup = Union{}
+const _SpecialArrays = Union{}
+
+promote_to_array_type(::Tuple) = Matrix
+
+# promote_to_arrays(n,k, T, A...) promotes any UniformScaling matrices
+# in A to matrices of type T and sizes given by n[k:end].  n is an array
+# so that the same promotion code can be used for hvcat.  We pass the type T
+# so that we can re-use this code for sparse-matrix hcat etcetera.
+promote_to_arrays_(n::Int, ::Type, a::Number) = a
+promote_to_arrays_(n::Int, ::Type{Matrix}, J::UniformScaling{T}) where {T} = Matrix(J, n, n)
+promote_to_arrays_(n::Int, ::Type, A::AbstractArray) = A
+promote_to_arrays_(n::Int, ::Type, A::AbstractQ) = collect(A)
+promote_to_arrays(n,k, ::Type) = ()
+promote_to_arrays(n,k, ::Type{T}, A) where {T} = (promote_to_arrays_(n[k], T, A),)
+promote_to_arrays(n,k, ::Type{T}, A, B) where {T} =
+    (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B))
+promote_to_arrays(n,k, ::Type{T}, A, B, C) where {T} =
+    (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays_(n[k+2], T, C))
+promote_to_arrays(n,k, ::Type{T}, A, B, Cs...) where {T} =
+    (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays(n,k+2, T, Cs...)...)
+
+_us2number(A) = A
+_us2number(J::UniformScaling) = J.λ
+
+for (f, _f, dim, name) in ((:hcat, :_hcat, 1, "rows"), (:vcat, :_vcat, 2, "cols"))
+    @eval begin
+        @inline $f(A::Union{AbstractArray,AbstractQ,UniformScaling}...) = $_f(A...)
+        # if there's a Number present, J::UniformScaling must be 1x1-dimensional
+        @inline $f(A::Union{AbstractArray,AbstractQ,UniformScaling,Number}...) = $f(map(_us2number, A)...)
+        function $_f(A::Union{AbstractArray,AbstractQ,UniformScaling,Number}...; array_type = promote_to_array_type(A))
+            n = -1
+            for a in A
+                if !isa(a, UniformScaling)
+                    require_one_based_indexing(a)
+                    na = size(a,$dim)
+                    n >= 0 && n != na &&
+                        throw(DimensionMismatch(string("number of ", $name,
+                            " of each array must match (got ", n, " and ", na, ")")))
+                    n = na
+                end
+            end
+            n == -1 && throw(ArgumentError($("$f of only UniformScaling objects cannot determine the matrix size")))
+            return cat(promote_to_arrays(fill(n, length(A)), 1, array_type, A...)..., dims=Val(3-$dim))
+        end
+    end
+end
+
+hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractArray,AbstractQ,UniformScaling}...) = _hvcat(rows, A...)
+hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractArray,AbstractQ,UniformScaling,Number}...) = _hvcat(rows, A...)
+function _hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractArray,AbstractQ,UniformScaling,Number}...; array_type = promote_to_array_type(A))
+    require_one_based_indexing(A...)
+    nr = length(rows)
+    sum(rows) == length(A) || throw(ArgumentError("mismatch between row sizes and number of arguments"))
+    n = fill(-1, length(A))
+    needcols = false # whether we also need to infer some sizes from the column count
+    j = 0
+    for i = 1:nr # infer UniformScaling sizes from row counts, if possible:
+        ni = -1 # number of rows in this block-row, -1 indicates unknown
+        for k = 1:rows[i]
+            if !isa(A[j+k], UniformScaling)
+                na = size(A[j+k], 1)
+                ni >= 0 && ni != na &&
+                    throw(DimensionMismatch("mismatch in number of rows"))
+                ni = na
+            end
+        end
+        if ni >= 0
+            for k = 1:rows[i]
+                n[j+k] = ni
+            end
+        else # row consisted only of UniformScaling objects
+            needcols = true
+        end
+        j += rows[i]
+    end
+    if needcols # some sizes still unknown, try to infer from column count
+        nc = -1
+        j = 0
+        for i = 1:nr
+            nci = 0
+            rows[i] > 0 && n[j+1] == -1 && (j += rows[i]; continue)
+            for k = 1:rows[i]
+                nci += isa(A[j+k], UniformScaling) ? n[j+k] : size(A[j+k], 2)
+            end
+            nc >= 0 && nc != nci && throw(DimensionMismatch("mismatch in number of columns"))
+            nc = nci
+            j += rows[i]
+        end
+        nc == -1 && throw(ArgumentError("sizes of UniformScalings could not be inferred"))
+        j = 0
+        for i = 1:nr
+            if rows[i] > 0 && n[j+1] == -1 # this row consists entirely of UniformScalings
+                nci, r = divrem(nc, rows[i])
+                r != 0 && throw(DimensionMismatch("indivisible UniformScaling sizes"))
+                for k = 1:rows[i]
+                    n[j+k] = nci
+                end
+            end
+            j += rows[i]
+        end
+    end
+    Amat = promote_to_arrays(n, 1, array_type, A...)
+    # We have two methods for promote_to_array_type, one returning Matrix and
+    # another one returning SparseMatrixCSC (in SparseArrays.jl). In the dense
+    # case, we cannot call hvcat for the promoted UniformScalings because this
+    # causes a stack overflow. In the sparse case, however, we cannot call
+    # typed_hvcat because we need a sparse output.
+    if array_type == Matrix
+        return typed_hvcat(promote_eltype(Amat...), rows, Amat...)
+    else
+        return hvcat(rows, Amat...)
+    end
+end
 
 # factorizations
 function cholesky(S::RealHermSymComplexHerm{<:Real,<:SymTridiagonal}, ::NoPivot = NoPivot(); check::Bool = true)
diff --git a/stdlib/LinearAlgebra/src/structuredbroadcast.jl b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
index 95a1842702291..02e39b199679b 100644
--- a/stdlib/LinearAlgebra/src/structuredbroadcast.jl
+++ b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
@@ -9,35 +9,41 @@ StructuredMatrixStyle{T}(::Val{2}) where {T} = StructuredMatrixStyle{T}()
 StructuredMatrixStyle{T}(::Val{N}) where {T,N} = Broadcast.DefaultArrayStyle{N}()
 
 const StructuredMatrix = Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal,LowerTriangular,UnitLowerTriangular,UpperTriangular,UnitUpperTriangular}
-Broadcast.BroadcastStyle(::Type{T}) where {T<:StructuredMatrix} = StructuredMatrixStyle{T}()
+for ST in Base.uniontypes(StructuredMatrix)
+    @eval Broadcast.BroadcastStyle(::Type{<:$ST}) = $(StructuredMatrixStyle{ST}())
+end
 
 # Promotion of broadcasts between structured matrices. This is slightly unusual
 # as we define them symmetrically. This allows us to have a fallback to DefaultArrayStyle{2}().
 # Diagonal can cavort with all the other structured matrix types.
 # Bidiagonal doesn't know if it's upper or lower, so it becomes Tridiagonal
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Diagonal}, ::StructuredMatrixStyle{<:Diagonal}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{Diagonal}) =
     StructuredMatrixStyle{Diagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Diagonal}, ::StructuredMatrixStyle{<:Union{Bidiagonal,SymTridiagonal,Tridiagonal}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{Bidiagonal}) =
+    StructuredMatrixStyle{Bidiagonal}()
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{<:Union{SymTridiagonal,Tridiagonal}}) =
     StructuredMatrixStyle{Tridiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Diagonal}, ::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}) =
     StructuredMatrixStyle{LowerTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Diagonal}, ::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Diagonal}, ::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}) =
     StructuredMatrixStyle{UpperTriangular}()
 
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Bidiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Bidiagonal}, ::StructuredMatrixStyle{Diagonal}) =
+    StructuredMatrixStyle{Bidiagonal}()
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Bidiagonal}, ::StructuredMatrixStyle{<:Union{Bidiagonal,SymTridiagonal,Tridiagonal}}) =
     StructuredMatrixStyle{Tridiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:SymTridiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{SymTridiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
     StructuredMatrixStyle{Tridiagonal}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Tridiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{Tridiagonal}, ::StructuredMatrixStyle{<:Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal}}) =
     StructuredMatrixStyle{Tridiagonal}()
 
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:LowerTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,LowerTriangular,UnitLowerTriangular}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{LowerTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,LowerTriangular,UnitLowerTriangular}}) =
     StructuredMatrixStyle{LowerTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:UpperTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,UpperTriangular,UnitUpperTriangular}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{UpperTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,UpperTriangular,UnitUpperTriangular}}) =
     StructuredMatrixStyle{UpperTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:UnitLowerTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,LowerTriangular,UnitLowerTriangular}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{UnitLowerTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,LowerTriangular,UnitLowerTriangular}}) =
     StructuredMatrixStyle{LowerTriangular}()
-Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:UnitUpperTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,UpperTriangular,UnitUpperTriangular}}) =
+Broadcast.BroadcastStyle(::StructuredMatrixStyle{UnitUpperTriangular}, ::StructuredMatrixStyle{<:Union{Diagonal,UpperTriangular,UnitUpperTriangular}}) =
     StructuredMatrixStyle{UpperTriangular}()
 
 Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}, ::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}) =
@@ -45,17 +51,17 @@ Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLow
 Broadcast.BroadcastStyle(::StructuredMatrixStyle{<:Union{UpperTriangular,UnitUpperTriangular}}, ::StructuredMatrixStyle{<:Union{LowerTriangular,UnitLowerTriangular}}) =
     StructuredMatrixStyle{Matrix}()
 
-# Make sure that `StructuredMatrixStyle{<:Matrix}` doesn't ever end up falling
+# Make sure that `StructuredMatrixStyle{Matrix}` doesn't ever end up falling
 # through and give back `DefaultArrayStyle{2}`
-Broadcast.BroadcastStyle(T::StructuredMatrixStyle{<:Matrix}, ::StructuredMatrixStyle) = T
-Broadcast.BroadcastStyle(::StructuredMatrixStyle, T::StructuredMatrixStyle{<:Matrix}) = T
-Broadcast.BroadcastStyle(T::StructuredMatrixStyle{<:Matrix}, ::StructuredMatrixStyle{<:Matrix}) = T
+Broadcast.BroadcastStyle(T::StructuredMatrixStyle{Matrix}, ::StructuredMatrixStyle) = T
+Broadcast.BroadcastStyle(::StructuredMatrixStyle, T::StructuredMatrixStyle{Matrix}) = T
+Broadcast.BroadcastStyle(T::StructuredMatrixStyle{Matrix}, ::StructuredMatrixStyle{Matrix}) = T
 
 # All other combinations fall back to the default style
 Broadcast.BroadcastStyle(::StructuredMatrixStyle, ::StructuredMatrixStyle) = DefaultArrayStyle{2}()
 
 # And a definition akin to similar using the structured type:
-structured_broadcast_alloc(bc, ::Type{<:Diagonal}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{Diagonal}, ::Type{ElType}, n) where {ElType} =
     Diagonal(Array{ElType}(undef, n))
 # Bidiagonal is tricky as we need to know if it's upper or lower. The promotion
 # system will return Tridiagonal when there's more than one Bidiagonal, but when
@@ -67,9 +73,9 @@ merge_uplos(a, b) = a == b ? a : 'T'
 
 find_uplo(a::Bidiagonal) = a.uplo
 find_uplo(a) = nothing
-find_uplo(bc::Broadcasted) = mapreduce(find_uplo, merge_uplos, bc.args, init=nothing)
+find_uplo(bc::Broadcasted) = mapfoldl(find_uplo, merge_uplos, Broadcast.cat_nested(bc), init=nothing)
 
-function structured_broadcast_alloc(bc, ::Type{<:Bidiagonal}, ::Type{ElType}, n) where {ElType}
+function structured_broadcast_alloc(bc, ::Type{Bidiagonal}, ::Type{ElType}, n) where {ElType}
     uplo = n > 0 ? find_uplo(bc) : 'U'
     n1 = max(n - 1, 0)
     if uplo == 'T'
@@ -77,19 +83,19 @@ function structured_broadcast_alloc(bc, ::Type{<:Bidiagonal}, ::Type{ElType}, n)
     end
     return Bidiagonal(Array{ElType}(undef, n),Array{ElType}(undef, n1), uplo)
 end
-structured_broadcast_alloc(bc, ::Type{<:SymTridiagonal}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{SymTridiagonal}, ::Type{ElType}, n) where {ElType} =
     SymTridiagonal(Array{ElType}(undef, n),Array{ElType}(undef, n-1))
-structured_broadcast_alloc(bc, ::Type{<:Tridiagonal}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{Tridiagonal}, ::Type{ElType}, n) where {ElType} =
     Tridiagonal(Array{ElType}(undef, n-1),Array{ElType}(undef, n),Array{ElType}(undef, n-1))
-structured_broadcast_alloc(bc, ::Type{<:LowerTriangular}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{LowerTriangular}, ::Type{ElType}, n) where {ElType} =
     LowerTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{<:UpperTriangular}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{UpperTriangular}, ::Type{ElType}, n) where {ElType} =
     UpperTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{<:UnitLowerTriangular}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{UnitLowerTriangular}, ::Type{ElType}, n) where {ElType} =
     UnitLowerTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{<:UnitUpperTriangular}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{UnitUpperTriangular}, ::Type{ElType}, n) where {ElType} =
     UnitUpperTriangular(Array{ElType}(undef, n, n))
-structured_broadcast_alloc(bc, ::Type{<:Matrix}, ::Type{ElType}, n) where {ElType} =
+structured_broadcast_alloc(bc, ::Type{Matrix}, ::Type{ElType}, n) where {ElType} =
     Matrix(Array{ElType}(undef, n, n))
 
 # A _very_ limited list of structure-preserving functions known at compile-time. This list is
@@ -126,6 +132,7 @@ fails as `zero(::Tuple{Int})` is not defined. However,
 """
 iszerodefined(::Type) = false
 iszerodefined(::Type{<:Number}) = true
+iszerodefined(::Type{<:AbstractArray{T}}) where T = iszerodefined(T)
 
 fzeropreserving(bc) = (v = fzero(bc); !ismissing(v) && (iszerodefined(typeof(v)) ? iszero(v) : v == 0))
 # Like sparse matrices, we assume that the zero-preservation property of a broadcasted
@@ -151,83 +158,91 @@ function Base.similar(bc::Broadcasted{StructuredMatrixStyle{T}}, ::Type{ElType})
     return similar(convert(Broadcasted{DefaultArrayStyle{ndims(bc)}}, bc), ElType)
 end
 
+isvalidstructbc(dest, bc::Broadcasted{T}) where {T<:StructuredMatrixStyle} =
+    Broadcast.combine_styles(dest, bc) === Broadcast.combine_styles(dest) &&
+    (isstructurepreserving(bc) || fzeropreserving(bc))
+
+isvalidstructbc(dest::Bidiagonal, bc::Broadcasted{StructuredMatrixStyle{Bidiagonal}}) =
+    (size(dest, 1) < 2 || find_uplo(bc) == dest.uplo) &&
+    (isstructurepreserving(bc) || fzeropreserving(bc))
+
 function copyto!(dest::Diagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    !isstructurepreserving(bc) && !fzeropreserving(bc) && return copyto!(dest, convert(Broadcasted{Nothing}, bc))
+    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for i in axs[1]
-        dest.diag[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
+        dest.diag[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
     end
     return dest
 end
 
 function copyto!(dest::Bidiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    !isstructurepreserving(bc) && !fzeropreserving(bc) && return copyto!(dest, convert(Broadcasted{Nothing}, bc))
+    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for i in axs[1]
-        dest.dv[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
+        dest.dv[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
     end
     if dest.uplo == 'U'
         for i = 1:size(dest, 1)-1
-            dest.ev[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
+            dest.ev[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
         end
     else
         for i = 1:size(dest, 1)-1
-            dest.ev[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
+            dest.ev[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
         end
     end
     return dest
 end
 
 function copyto!(dest::SymTridiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    !isstructurepreserving(bc) && !fzeropreserving(bc) && return copyto!(dest, convert(Broadcasted{Nothing}, bc))
+    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for i in axs[1]
-        dest.dv[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
+        dest.dv[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
     end
     for i = 1:size(dest, 1)-1
-        v = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
-        v == Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i)) || throw(ArgumentError("broadcasted assignment breaks symmetry between locations ($i, $(i+1)) and ($(i+1), $i)"))
+        v = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
+        v == (@inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))) || throw(ArgumentError("broadcasted assignment breaks symmetry between locations ($i, $(i+1)) and ($(i+1), $i)"))
         dest.ev[i] = v
     end
     return dest
 end
 
 function copyto!(dest::Tridiagonal, bc::Broadcasted{<:StructuredMatrixStyle})
-    !isstructurepreserving(bc) && !fzeropreserving(bc) && return copyto!(dest, convert(Broadcasted{Nothing}, bc))
+    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for i in axs[1]
-        dest.d[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
+        dest.d[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i))
     end
     for i = 1:size(dest, 1)-1
-        dest.du[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
-        dest.dl[i] = Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
+        dest.du[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i, i+1))
+        dest.dl[i] = @inbounds Broadcast._broadcast_getindex(bc, CartesianIndex(i+1, i))
     end
     return dest
 end
 
 function copyto!(dest::LowerTriangular, bc::Broadcasted{<:StructuredMatrixStyle})
-    !isstructurepreserving(bc) && !fzeropreserving(bc) && return copyto!(dest, convert(Broadcasted{Nothing}, bc))
+    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for j in axs[2]
         for i in j:axs[1][end]
-            dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
+            @inbounds dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
         end
     end
     return dest
 end
 
 function copyto!(dest::UpperTriangular, bc::Broadcasted{<:StructuredMatrixStyle})
-    !isstructurepreserving(bc) && !fzeropreserving(bc) && return copyto!(dest, convert(Broadcasted{Nothing}, bc))
+    isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
     axs = axes(dest)
     axes(bc) == axs || Broadcast.throwdm(axes(bc), axs)
     for j in axs[2]
         for i in 1:j
-            dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
+            @inbounds dest.data[i,j] = Broadcast._broadcast_getindex(bc, CartesianIndex(i, j))
         end
     end
     return dest
diff --git a/stdlib/LinearAlgebra/src/svd.jl b/stdlib/LinearAlgebra/src/svd.jl
index d0ac4d957e60d..c1b886f616f02 100644
--- a/stdlib/LinearAlgebra/src/svd.jl
+++ b/stdlib/LinearAlgebra/src/svd.jl
@@ -26,10 +26,10 @@ julia> F = svd(A)
 SVD{Float64, Float64, Matrix{Float64}, Vector{Float64}}
 U factor:
 4×4 Matrix{Float64}:
- 0.0  1.0  0.0   0.0
- 1.0  0.0  0.0   0.0
- 0.0  0.0  0.0  -1.0
- 0.0  0.0  1.0   0.0
+ 0.0  1.0   0.0  0.0
+ 1.0  0.0   0.0  0.0
+ 0.0  0.0   0.0  1.0
+ 0.0  0.0  -1.0  0.0
 singular values:
 4-element Vector{Float64}:
  3.0
@@ -38,10 +38,10 @@ singular values:
  0.0
 Vt factor:
 4×5 Matrix{Float64}:
- -0.0       0.0  1.0  -0.0  0.0
-  0.447214  0.0  0.0   0.0  0.894427
- -0.0       1.0  0.0  -0.0  0.0
-  0.0       0.0  0.0   1.0  0.0
+ -0.0        0.0  1.0  -0.0  0.0
+  0.447214   0.0  0.0   0.0  0.894427
+  0.0       -1.0  0.0   0.0  0.0
+  0.0        0.0  0.0   1.0  0.0
 
 julia> F.U * Diagonal(F.S) * F.Vt
 4×5 Matrix{Float64}:
@@ -175,11 +175,11 @@ julia> Uonly == U
 true
 ```
 """
-function svd(A::StridedVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T}
-    svd!(copymutable_oftype(A, eigtype(T)), full = full, alg = alg)
+function svd(A::AbstractVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T}
+    svd!(eigencopy_oftype(A, eigtype(T)), full = full, alg = alg)
 end
-function svd(A::StridedVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T <: Union{Float16,Complex{Float16}}}
-    A = svd!(copymutable_oftype(A, eigtype(T)), full = full, alg = alg)
+function svd(A::AbstractVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T <: Union{Float16,Complex{Float16}}}
+    A = svd!(eigencopy_oftype(A, eigtype(T)), full = full, alg = alg)
     return SVD{T}(A)
 end
 function svd(x::Number; full::Bool = false, alg::Algorithm = default_svd_alg(x))
@@ -213,7 +213,6 @@ Base.propertynames(F::SVD, private::Bool=false) =
 
 Return the singular values of `A`, saving space by overwriting the input.
 See also [`svdvals`](@ref) and [`svd`](@ref).
-```
 """
 svdvals!(A::StridedMatrix{T}) where {T<:BlasFloat} = isempty(A) ? zeros(real(T), 0) : LAPACK.gesdd!('N', A)[2]
 svdvals!(A::StridedVector{T}) where {T<:BlasFloat} = svdvals!(reshape(A, (length(A), 1)))
@@ -240,15 +239,13 @@ julia> svdvals(A)
  0.0
 ```
 """
-svdvals(A::AbstractMatrix{T}) where {T} = svdvals!(copymutable_oftype(A, eigtype(T)))
+svdvals(A::AbstractMatrix{T}) where {T} = svdvals!(eigencopy_oftype(A, eigtype(T)))
 svdvals(A::AbstractVector{T}) where {T} = [convert(eigtype(T), norm(A))]
-svdvals(A::AbstractMatrix{<:BlasFloat}) = svdvals!(copy(A))
-svdvals(A::AbstractVector{<:BlasFloat}) = [norm(A)]
 svdvals(x::Number) = abs(x)
 svdvals(S::SVD{<:Any,T}) where {T} = (S.S)::Vector{T}
 
 ### SVD least squares ###
-function ldiv!(A::SVD{T}, B::StridedVecOrMat) where T
+function ldiv!(A::SVD{T}, B::AbstractVecOrMat) where T
     m, n = size(A)
     k = searchsortedlast(A.S, eps(real(T))*A.S[1], rev=true)
     mul!(view(B, 1:n, :), view(A.Vt, 1:k, :)', view(A.S, 1:k) .\ (view(A.U, :, 1:k)' * _cut_B(B, 1:m)))
@@ -404,7 +401,8 @@ function svd!(A::StridedMatrix{T}, B::StridedMatrix{T}) where T<:BlasFloat
     end
     GeneralizedSVD(U, V, Q, a, b, Int(k), Int(l), R)
 end
-svd(A::StridedMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} = svd!(copy(A),copy(B))
+svd(A::AbstractMatrix{T}, B::AbstractMatrix{T}) where {T<:BlasFloat} =
+    svd!(copy_similar(A, T), copy_similar(B, T))
 
 """
 
@@ -457,9 +455,9 @@ julia> U == Uonly
 true
 ```
 """
-function svd(A::StridedMatrix{TA}, B::StridedMatrix{TB}) where {TA,TB}
+function svd(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}) where {TA,TB}
     S = promote_type(eigtype(TA),TB)
-    return svd!(copymutable_oftype(A, S), copymutable_oftype(B, S))
+    return svd!(copy_similar(A, S), copy_similar(B, S))
 end
 # This method can be heavily optimized but it is probably not critical
 # and might introduce bugs or inconsistencies relative to the 1x1 matrix
@@ -541,7 +539,6 @@ function svdvals!(A::StridedMatrix{T}, B::StridedMatrix{T}) where T<:BlasFloat
     end
     a[1:k + l] ./ b[1:k + l]
 end
-svdvals(A::StridedMatrix{T},B::StridedMatrix{T}) where {T<:BlasFloat} = svdvals!(copy(A),copy(B))
 
 """
     svdvals(A, B)
@@ -567,9 +564,9 @@ julia> svdvals(A, B)
  1.0
 ```
 """
-function svdvals(A::StridedMatrix{TA}, B::StridedMatrix{TB}) where {TA,TB}
+function svdvals(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}) where {TA,TB}
     S = promote_type(eigtype(TA), TB)
-    return svdvals!(copymutable_oftype(A, S), copymutable_oftype(B, S))
+    return svdvals!(copy_similar(A, S), copy_similar(B, S))
 end
 svdvals(x::Number, y::Number) = abs(x/y)
 
diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl
index 7347dd6f78639..67ed8afd35c0b 100644
--- a/stdlib/LinearAlgebra/src/symmetric.jl
+++ b/stdlib/LinearAlgebra/src/symmetric.jl
@@ -5,7 +5,7 @@ struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
     data::S
     uplo::Char
 
-    function Symmetric{T,S}(data, uplo) where {T,S<:AbstractMatrix{<:T}}
+    function Symmetric{T,S}(data, uplo::Char) where {T,S<:AbstractMatrix{<:T}}
         require_one_based_indexing(data)
         (uplo != 'U' && uplo != 'L') && throw_uplo()
         new{T,S}(data, uplo)
@@ -17,34 +17,45 @@ end
 Construct a `Symmetric` view of the upper (if `uplo = :U`) or lower (if `uplo = :L`)
 triangle of the matrix `A`.
 
+`Symmetric` views are mainly useful for real-symmetric matrices, for which
+specialized algorithms (e.g. for eigenproblems) are enabled for `Symmetric` types.
+More generally, see also [`Hermitian(A)`](@ref) for Hermitian matrices `A == A'`, which
+is effectively equivalent to `Symmetric` for real matrices but is also useful for
+complex matrices.  (Whereas complex `Symmetric` matrices are supported but have few
+if any specialized algorithms.)
+
+To compute the symmetric part of a real matrix, or more generally the Hermitian part `(A + A') / 2` of
+a real or complex matrix `A`, use [`hermitianpart`](@ref).
+
 # Examples
 ```jldoctest
-julia> A = [1 0 2 0 3; 0 4 0 5 0; 6 0 7 0 8; 0 9 0 1 0; 2 0 3 0 4]
-5×5 Matrix{Int64}:
- 1  0  2  0  3
- 0  4  0  5  0
- 6  0  7  0  8
- 0  9  0  1  0
- 2  0  3  0  4
+julia> A = [1 2 3; 4 5 6; 7 8 9]
+3×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
+ 7  8  9
 
 julia> Supper = Symmetric(A)
-5×5 Symmetric{Int64, Matrix{Int64}}:
- 1  0  2  0  3
- 0  4  0  5  0
- 2  0  7  0  8
- 0  5  0  1  0
- 3  0  8  0  4
+3×3 Symmetric{Int64, Matrix{Int64}}:
+ 1  2  3
+ 2  5  6
+ 3  6  9
 
 julia> Slower = Symmetric(A, :L)
-5×5 Symmetric{Int64, Matrix{Int64}}:
- 1  0  6  0  2
- 0  4  0  9  0
- 6  0  7  0  3
- 0  9  0  1  0
- 2  0  3  0  4
+3×3 Symmetric{Int64, Matrix{Int64}}:
+ 1  4  7
+ 4  5  8
+ 7  8  9
+
+julia> hermitianpart(A)
+3×3 Hermitian{Float64, Matrix{Float64}}:
+ 1.0  3.0  5.0
+ 3.0  5.0  7.0
+ 5.0  7.0  9.0
 ```
 
-Note that `Supper` will not be equal to `Slower` unless `A` is itself symmetric (e.g. if `A == transpose(A)`).
+Note that `Supper` will not be equal to `Slower` unless `A` is itself symmetric (e.g. if
+`A == transpose(A)`).
 """
 function Symmetric(A::AbstractMatrix, uplo::Symbol=:U)
     checksquare(A)
@@ -87,7 +98,7 @@ struct Hermitian{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
     data::S
     uplo::Char
 
-    function Hermitian{T,S}(data, uplo) where {T,S<:AbstractMatrix{<:T}}
+    function Hermitian{T,S}(data, uplo::Char) where {T,S<:AbstractMatrix{<:T}}
         require_one_based_indexing(data)
         (uplo != 'U' && uplo != 'L') && throw_uplo()
         new{T,S}(data, uplo)
@@ -99,25 +110,33 @@ end
 Construct a `Hermitian` view of the upper (if `uplo = :U`) or lower (if `uplo = :L`)
 triangle of the matrix `A`.
 
+To compute the Hermitian part of `A`, use [`hermitianpart`](@ref).
+
 # Examples
 ```jldoctest
-julia> A = [1 0 2+2im 0 3-3im; 0 4 0 5 0; 6-6im 0 7 0 8+8im; 0 9 0 1 0; 2+2im 0 3-3im 0 4];
+julia> A = [1 2+2im 3-3im; 4 5 6-6im; 7 8+8im 9]
+3×3 Matrix{Complex{Int64}}:
+ 1+0im  2+2im  3-3im
+ 4+0im  5+0im  6-6im
+ 7+0im  8+8im  9+0im
 
 julia> Hupper = Hermitian(A)
-5×5 Hermitian{Complex{Int64}, Matrix{Complex{Int64}}}:
- 1+0im  0+0im  2+2im  0+0im  3-3im
- 0+0im  4+0im  0+0im  5+0im  0+0im
- 2-2im  0+0im  7+0im  0+0im  8+8im
- 0+0im  5+0im  0+0im  1+0im  0+0im
- 3+3im  0+0im  8-8im  0+0im  4+0im
+3×3 Hermitian{Complex{Int64}, Matrix{Complex{Int64}}}:
+ 1+0im  2+2im  3-3im
+ 2-2im  5+0im  6-6im
+ 3+3im  6+6im  9+0im
 
 julia> Hlower = Hermitian(A, :L)
-5×5 Hermitian{Complex{Int64}, Matrix{Complex{Int64}}}:
- 1+0im  0+0im  6+6im  0+0im  2-2im
- 0+0im  4+0im  0+0im  9+0im  0+0im
- 6-6im  0+0im  7+0im  0+0im  3+3im
- 0+0im  9+0im  0+0im  1+0im  0+0im
- 2+2im  0+0im  3-3im  0+0im  4+0im
+3×3 Hermitian{Complex{Int64}, Matrix{Complex{Int64}}}:
+ 1+0im  4+0im  7+0im
+ 4+0im  5+0im  8-8im
+ 7+0im  8+8im  9+0im
+
+julia> hermitianpart(A)
+3×3 Hermitian{ComplexF64, Matrix{ComplexF64}}:
+ 1.0+0.0im  3.0+1.0im  5.0-1.5im
+ 3.0-1.0im  5.0+0.0im  7.0-7.0im
+ 5.0+1.5im  7.0+7.0im  9.0+0.0im
 ```
 
 Note that `Hupper` will not be equal to `Hlower` unless `A` is itself Hermitian (e.g. if `A == adjoint(A)`).
@@ -166,6 +185,9 @@ function hermitian_type(::Type{T}) where {S<:AbstractMatrix, T<:AbstractMatrix{S
 end
 hermitian_type(::Type{T}) where {T<:Number} = T
 
+_unwrap(A::Hermitian) = parent(A)
+_unwrap(A::Symmetric) = parent(A)
+
 for (S, H) in ((:Symmetric, :Hermitian), (:Hermitian, :Symmetric))
     @eval begin
         $S(A::$S) = A
@@ -180,7 +202,7 @@ for (S, H) in ((:Symmetric, :Hermitian), (:Hermitian, :Symmetric))
         function $S(A::$H, uplo::Symbol)
             if A.uplo == char_uplo(uplo)
                 if $H === Hermitian && !(eltype(A) <: Real) &&
-                    any(!isreal, A.data[i] for i in diagind(A.data))
+                    any(!isreal, A.data[i] for i in diagind(A.data, IndexStyle(A.data)))
 
                     throw(ArgumentError("Cannot construct $($S)($($H))); diagonal contains complex values"))
                 end
@@ -192,16 +214,24 @@ for (S, H) in ((:Symmetric, :Hermitian), (:Hermitian, :Symmetric))
     end
 end
 
-convert(T::Type{<:Symmetric}, m::Union{Symmetric,Hermitian}) = m isa T ? m : T(m)
-convert(T::Type{<:Hermitian}, m::Union{Symmetric,Hermitian}) = m isa T ? m : T(m)
+convert(::Type{T}, m::Union{Symmetric,Hermitian}) where {T<:Symmetric} = m isa T ? m : T(m)::T
+convert(::Type{T}, m::Union{Symmetric,Hermitian}) where {T<:Hermitian} = m isa T ? m : T(m)::T
 
 const HermOrSym{T,        S} = Union{Hermitian{T,S}, Symmetric{T,S}}
 const RealHermSym{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}}
 const RealHermSymComplexHerm{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}, Hermitian{Complex{T},S}}
 const RealHermSymComplexSym{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}, Symmetric{Complex{T},S}}
 
-size(A::HermOrSym, d) = size(A.data, d)
 size(A::HermOrSym) = size(A.data)
+@inline function Base.isassigned(A::HermOrSym, i::Int, j::Int)
+    @boundscheck checkbounds(Bool, A, i, j) || return false
+    @inbounds if i == j || ((A.uplo == 'U') == (i < j))
+        return isassigned(A.data, i, j)
+    else
+        return isassigned(A.data, j, i)
+    end
+end
+
 @inline function getindex(A::Symmetric, i::Integer, j::Integer)
     @boundscheck checkbounds(A, i, j)
     @inbounds if i == j
@@ -223,12 +253,12 @@ end
     end
 end
 
-function setindex!(A::Symmetric, v, i::Integer, j::Integer)
+@propagate_inbounds function setindex!(A::Symmetric, v, i::Integer, j::Integer)
     i == j || throw(ArgumentError("Cannot set a non-diagonal index in a symmetric matrix"))
     setindex!(A.data, v, i, j)
 end
 
-function setindex!(A::Hermitian, v, i::Integer, j::Integer)
+@propagate_inbounds function setindex!(A::Hermitian, v, i::Integer, j::Integer)
     if i != j
         throw(ArgumentError("Cannot set a non-diagonal index in a Hermitian matrix"))
     elseif !isreal(v)
@@ -241,6 +271,8 @@ end
 diag(A::Symmetric) = symmetric.(diag(parent(A)), sym_uplo(A.uplo))
 diag(A::Hermitian) = hermitian.(diag(parent(A)), sym_uplo(A.uplo))
 
+isdiag(A::HermOrSym) = isdiag(A.uplo == 'U' ? UpperTriangular(A.data) : LowerTriangular(A.data))
+
 # For A<:Union{Symmetric,Hermitian}, similar(A[, neweltype]) should yield a matrix with the same
 # symmetry type, uplo flag, and underlying storage type as A. The following methods cover these cases.
 similar(A::Symmetric, ::Type{T}) where {T} = Symmetric(similar(parent(A), T), ifelse(A.uplo == 'U', :U, :L))
@@ -276,9 +308,11 @@ parent(A::HermOrSym) = A.data
 Symmetric{T,S}(A::Symmetric{T,S}) where {T,S<:AbstractMatrix{T}} = A
 Symmetric{T,S}(A::Symmetric) where {T,S<:AbstractMatrix{T}} = Symmetric{T,S}(convert(S,A.data),A.uplo)
 AbstractMatrix{T}(A::Symmetric) where {T} = Symmetric(convert(AbstractMatrix{T}, A.data), sym_uplo(A.uplo))
+AbstractMatrix{T}(A::Symmetric{T}) where {T} = copy(A)
 Hermitian{T,S}(A::Hermitian{T,S}) where {T,S<:AbstractMatrix{T}} = A
 Hermitian{T,S}(A::Hermitian) where {T,S<:AbstractMatrix{T}} = Hermitian{T,S}(convert(S,A.data),A.uplo)
 AbstractMatrix{T}(A::Hermitian) where {T} = Hermitian(convert(AbstractMatrix{T}, A.data), sym_uplo(A.uplo))
+AbstractMatrix{T}(A::Hermitian{T}) where {T} = copy(A)
 
 copy(A::Symmetric{T,S}) where {T,S} = (B = copy(A.data); Symmetric{T,typeof(B)}(B,A.uplo))
 copy(A::Hermitian{T,S}) where {T,S} = (B = copy(A.data); Hermitian{T,typeof(B)}(B,A.uplo))
@@ -316,6 +350,7 @@ function fillstored!(A::HermOrSym{T}, x) where T
     return A
 end
 
+Base.isreal(A::HermOrSym{<:Real}) = true
 function Base.isreal(A::HermOrSym)
     n = size(A, 1)
     @inbounds if A.uplo == 'U'
@@ -363,6 +398,7 @@ Base.copy(A::Adjoint{<:Any,<:Symmetric}) =
 Base.copy(A::Transpose{<:Any,<:Hermitian}) =
     Hermitian(copy(transpose(A.parent.data)), ifelse(A.parent.uplo == 'U', :L, :U))
 
+tr(A::Symmetric) = tr(A.data) # to avoid AbstractMatrix fallback (incl. allocations)
 tr(A::Hermitian) = real(tr(A.data))
 
 Base.conj(A::HermOrSym) = typeof(A)(conj(A.data), A.uplo)
@@ -417,7 +453,7 @@ function triu(A::Symmetric, k::Integer=0)
     end
 end
 
-for (T, trans, real) in [(:Symmetric, :transpose, :identity), (:Hermitian, :adjoint, :real)]
+for (T, trans, real) in [(:Symmetric, :transpose, :identity), (:(Hermitian{<:Union{Real,Complex}}), :adjoint, :real)]
     @eval begin
         function dot(A::$T, B::$T)
             n = size(A, 2)
@@ -489,97 +525,15 @@ for f in (:+, :-)
     end
 end
 
-## Matvec
-@inline function mul!(y::StridedVector{T}, A::Symmetric{T,<:StridedMatrix}, x::StridedVector{T},
-             α::Number, β::Number) where {T<:BlasFloat}
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
-        return BLAS.symv!(A.uplo, alpha, A.data, x, beta, y)
-    else
-        return generic_matvecmul!(y, 'N', A, x, MulAddMul(α, β))
-    end
-end
-@inline function mul!(y::StridedVector{T}, A::Hermitian{T,<:StridedMatrix}, x::StridedVector{T},
-             α::Number, β::Number) where {T<:BlasReal}
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
-        return BLAS.symv!(A.uplo, alpha, A.data, x, beta, y)
-    else
-        return generic_matvecmul!(y, 'N', A, x, MulAddMul(α, β))
-    end
-end
-@inline function mul!(y::StridedVector{T}, A::Hermitian{T,<:StridedMatrix}, x::StridedVector{T},
-             α::Number, β::Number) where {T<:BlasComplex}
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
-        return BLAS.hemv!(A.uplo, alpha, A.data, x, beta, y)
-    else
-        return generic_matvecmul!(y, 'N', A, x, MulAddMul(α, β))
-    end
-end
-## Matmat
-@inline function mul!(C::StridedMatrix{T}, A::Symmetric{T,<:StridedMatrix}, B::StridedMatrix{T},
-             α::Number, β::Number) where {T<:BlasFloat}
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
-        return BLAS.symm!('L', A.uplo, alpha, A.data, B, beta, C)
-    else
-        return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-    end
-end
-@inline function mul!(C::StridedMatrix{T}, A::StridedMatrix{T}, B::Symmetric{T,<:StridedMatrix},
-             α::Number, β::Number) where {T<:BlasFloat}
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
-        return BLAS.symm!('R', B.uplo, alpha, B.data, A, beta, C)
-    else
-        return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-    end
-end
-@inline function mul!(C::StridedMatrix{T}, A::Hermitian{T,<:StridedMatrix}, B::StridedMatrix{T},
-             α::Number, β::Number) where {T<:BlasReal}
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
-        return BLAS.symm!('L', A.uplo, alpha, A.data, B, beta, C)
-    else
-        return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-    end
-end
-@inline function mul!(C::StridedMatrix{T}, A::StridedMatrix{T}, B::Hermitian{T,<:StridedMatrix},
-             α::Number, β::Number) where {T<:BlasReal}
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
-        return BLAS.symm!('R', B.uplo, alpha, B.data, A, beta, C)
-    else
-        return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-    end
-end
-@inline function mul!(C::StridedMatrix{T}, A::Hermitian{T,<:StridedMatrix}, B::StridedMatrix{T},
-             α::Number, β::Number) where {T<:BlasComplex}
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
-        return BLAS.hemm!('L', A.uplo, alpha, A.data, B, beta, C)
-    else
-        return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-    end
-end
-@inline function mul!(C::StridedMatrix{T}, A::StridedMatrix{T}, B::Hermitian{T,<:StridedMatrix},
-             α::Number, β::Number) where {T<:BlasComplex}
-    alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T}
-        return BLAS.hemm!('R', B.uplo, alpha, B.data, A, beta, C)
-    else
-        return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
-    end
-end
-
 *(A::HermOrSym, B::HermOrSym) = A * copyto!(similar(parent(B)), B)
 
 function dot(x::AbstractVector, A::RealHermSymComplexHerm, y::AbstractVector)
     require_one_based_indexing(x, y)
-    (length(x) == length(y) == size(A, 1)) || throw(DimensionMismatch())
+    n = length(x)
+    (n == length(y) == size(A, 1)) || throw(DimensionMismatch())
     data = A.data
-    r = zero(eltype(x)) * zero(eltype(A)) * zero(eltype(y))
+    r = dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
+    iszero(n) && return r
     if A.uplo == 'U'
         @inbounds for j = 1:length(y)
             r += dot(x[j], real(data[j,j]), y[j])
@@ -611,21 +565,29 @@ end
 factorize(A::HermOrSym) = _factorize(A)
 function _factorize(A::HermOrSym{T}; check::Bool=true) where T
     TT = typeof(sqrt(oneunit(T)))
-    if TT <: BlasFloat
+    if isdiag(A)
+        return Diagonal(A)
+    elseif TT <: BlasFloat
         return bunchkaufman(A; check=check)
     else # fallback
         return lu(A; check=check)
     end
 end
 
+logabsdet(A::RealHermSymComplexHerm) = ((l, s) = logabsdet(_factorize(A; check=false)); return real(l), s)
+logabsdet(A::Symmetric{<:Real}) = logabsdet(_factorize(A; check=false))
+logabsdet(A::Symmetric) = logabsdet(_factorize(A; check=false))
+logdet(A::RealHermSymComplexHerm) = real(logdet(_factorize(A; check=false)))
+logdet(A::Symmetric{<:Real}) = logdet(_factorize(A; check=false))
+logdet(A::Symmetric) = logdet(_factorize(A; check=false))
 det(A::RealHermSymComplexHerm) = real(det(_factorize(A; check=false)))
 det(A::Symmetric{<:Real}) = det(_factorize(A; check=false))
 det(A::Symmetric) = det(_factorize(A; check=false))
 
-\(A::HermOrSym{<:Any,<:StridedMatrix}, B::AbstractVector) = \(factorize(A), B)
+\(A::HermOrSym, B::AbstractVector) = \(factorize(A), B)
 # Bunch-Kaufman solves can not utilize BLAS-3 for multiple right hand sides
 # so using LU is faster for AbstractMatrix right hand side
-\(A::HermOrSym{<:Any,<:StridedMatrix}, B::AbstractMatrix) = \(lu(A), B)
+\(A::HermOrSym, B::AbstractMatrix) = \(isdiag(A) ? Diagonal(A) : lu(A), B)
 
 function _inv(A::HermOrSym)
     n = checksquare(A)
@@ -643,6 +605,7 @@ function _inv(A::HermOrSym)
     end
     B
 end
+# StridedMatrix restriction seems necessary due to inv! call in _inv above
 inv(A::Hermitian{<:Any,<:StridedMatrix}) = Hermitian(_inv(A), sym_uplo(A.uplo))
 inv(A::Symmetric{<:Any,<:StridedMatrix}) = Symmetric(_inv(A), sym_uplo(A.uplo))
 
@@ -854,3 +817,63 @@ for func in (:log, :sqrt)
         end
     end
 end
+
+# Cube root of a real-valued symmetric matrix
+function cbrt(A::HermOrSym{<:Real})
+    F = eigen(A)
+    A = F.vectors * Diagonal(cbrt.(F.values)) * F.vectors'
+    return A
+end
+
+"""
+    hermitianpart(A, uplo=:U) -> Hermitian
+
+Return the Hermitian part of the square matrix `A`, defined as `(A + A') / 2`, as a
+[`Hermitian`](@ref) matrix. For real matrices `A`, this is also known as the symmetric part
+of `A`; it is also sometimes called the "operator real part". The optional argument `uplo` controls the corresponding argument of the
+[`Hermitian`](@ref) view. For real matrices, the latter is equivalent to a
+[`Symmetric`](@ref) view.
+
+See also [`hermitianpart!`](@ref) for the corresponding in-place operation.
+
+!!! compat "Julia 1.10"
+    This function requires Julia 1.10 or later.
+"""
+hermitianpart(A::AbstractMatrix, uplo::Symbol=:U) = Hermitian(_hermitianpart(A), uplo)
+
+"""
+    hermitianpart!(A, uplo=:U) -> Hermitian
+
+Overwrite the square matrix `A` in-place with its Hermitian part `(A + A') / 2`, and return
+[`Hermitian(A, uplo)`](@ref). For real matrices `A`, this is also known as the symmetric
+part of `A`.
+
+See also [`hermitianpart`](@ref) for the corresponding out-of-place operation.
+
+!!! compat "Julia 1.10"
+    This function requires Julia 1.10 or later.
+"""
+hermitianpart!(A::AbstractMatrix, uplo::Symbol=:U) = Hermitian(_hermitianpart!(A), uplo)
+
+_hermitianpart(A::AbstractMatrix) = _hermitianpart!(copy_similar(A, Base.promote_op(/, eltype(A), Int)))
+_hermitianpart(a::Number) = real(a)
+
+function _hermitianpart!(A::AbstractMatrix)
+    require_one_based_indexing(A)
+    n = checksquare(A)
+    @inbounds for j in 1:n
+        A[j, j] = _hermitianpart(A[j, j])
+        for i in 1:j-1
+            A[i, j] = val = (A[i, j] + adjoint(A[j, i])) / 2
+            A[j, i] = adjoint(val)
+        end
+    end
+    return A
+end
+
+## structured matrix printing ##
+function Base.replace_in_print_matrix(A::HermOrSym,i::Integer,j::Integer,s::AbstractString)
+    ijminmax = minmax(i, j)
+    inds = A.uplo == 'U' ? ijminmax : reverse(ijminmax)
+    Base.replace_in_print_matrix(parent(A), inds..., s)
+end
diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl
index 8d90f370e06b6..4e35616f62181 100644
--- a/stdlib/LinearAlgebra/src/symmetriceigen.jl
+++ b/stdlib/LinearAlgebra/src/symmetriceigen.jl
@@ -1,13 +1,16 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# preserve HermOrSym wrapper
+eigencopy_oftype(A::Hermitian, S) = Hermitian(copy_similar(A, S), sym_uplo(A.uplo))
+eigencopy_oftype(A::Symmetric, S) = Symmetric(copy_similar(A, S), sym_uplo(A.uplo))
+
 # Eigensolvers for symmetric and Hermitian matrices
 eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) =
     Eigen(sorteig!(LAPACK.syevr!('V', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)..., sortby)...)
 
 function eigen(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
-    T = eltype(A)
-    S = eigtype(T)
-    eigen!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), sortby=sortby)
+    S = eigtype(eltype(A))
+    eigen!(eigencopy_oftype(A, S), sortby=sortby)
 end
 
 eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) =
@@ -31,9 +34,8 @@ The [`UnitRange`](@ref) `irange` specifies indices of the sorted eigenvalues to
     will be a *truncated* factorization.
 """
 function eigen(A::RealHermSymComplexHerm, irange::UnitRange)
-    T = eltype(A)
-    S = eigtype(T)
-    eigen!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), irange)
+    S = eigtype(eltype(A))
+    eigen!(eigencopy_oftype(A, S), irange)
 end
 
 eigen!(A::RealHermSymComplexHerm{T,<:StridedMatrix}, vl::Real, vh::Real) where {T<:BlasReal} =
@@ -57,9 +59,8 @@ The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`
     will be a *truncated* factorization.
 """
 function eigen(A::RealHermSymComplexHerm, vl::Real, vh::Real)
-    T = eltype(A)
-    S = eigtype(T)
-    eigen!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), vl, vh)
+    S = eigtype(eltype(A))
+    eigen!(eigencopy_oftype(A, S), vl, vh)
 end
 
 function eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing)
@@ -69,9 +70,8 @@ function eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby:
 end
 
 function eigvals(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
-    T = eltype(A)
-    S = eigtype(T)
-    eigvals!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), sortby=sortby)
+    S = eigtype(eltype(A))
+    eigvals!(eigencopy_oftype(A, S), sortby=sortby)
 end
 
 """
@@ -110,9 +110,8 @@ julia> eigvals(A)
 ```
 """
 function eigvals(A::RealHermSymComplexHerm, irange::UnitRange)
-    T = eltype(A)
-    S = eigtype(T)
-    eigvals!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), irange)
+    S = eigtype(eltype(A))
+    eigvals!(eigencopy_oftype(A, S), irange)
 end
 
 """
@@ -150,13 +149,17 @@ julia> eigvals(A)
 ```
 """
 function eigvals(A::RealHermSymComplexHerm, vl::Real, vh::Real)
-    T = eltype(A)
-    S = eigtype(T)
-    eigvals!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), vl, vh)
+    S = eigtype(eltype(A))
+    eigvals!(eigencopy_oftype(A, S), vl, vh)
 end
 
-eigmax(A::RealHermSymComplexHerm{<:Real,<:StridedMatrix}) = eigvals(A, size(A, 1):size(A, 1))[1]
-eigmin(A::RealHermSymComplexHerm{<:Real,<:StridedMatrix}) = eigvals(A, 1:1)[1]
+eigmax(A::RealHermSymComplexHerm{<:Real}) = eigvals(A, size(A, 1):size(A, 1))[1]
+eigmin(A::RealHermSymComplexHerm{<:Real}) = eigvals(A, 1:1)[1]
+
+function eigen(A::HermOrSym{TA}, B::HermOrSym{TB}; kws...) where {TA,TB}
+    S = promote_type(eigtype(TA), TB)
+    return eigen!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...)
+end
 
 function eigen!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix}
     vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))
@@ -167,105 +170,72 @@ function eigen!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,Not
     GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
 end
 
-function eigen!(A::RealHermSymComplexHerm{T,S}, B::AbstractMatrix{T}; sortby::Union{Function,Nothing}=nothing) where {T<:Number,S<:StridedMatrix}
-    U = cholesky(B).U
-    vals, w = eigen!(UtiAUi!(A, U))
-    vecs = U \ w
+function eigen(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing)
+    if ishermitian(A)
+        eigen!(eigencopy_oftype(Hermitian(A), eigtype(eltype(A))), C; sortby)
+    else
+        eigen!(copy_similar(A, eigtype(eltype(A))), C; sortby)
+    end
+end
+function eigen!(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing)
+    # Cholesky decomposition based eigenvalues and eigenvectors
+    vals, w = eigen!(UtiAUi!(A, C.U))
+    vecs = C.U \ w
     GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
 end
 
-# Perform U' \ A / U in-place.
-UtiAUi!(As::Symmetric, Utr::UpperTriangular) = Symmetric(_UtiAsymUi!(As.uplo, parent(As), parent(Utr)), sym_uplo(As.uplo))
-UtiAUi!(As::Hermitian, Utr::UpperTriangular) = Hermitian(_UtiAsymUi!(As.uplo, parent(As), parent(Utr)), sym_uplo(As.uplo))
-UtiAUi!(As::Symmetric, Udi::Diagonal) = Symmetric(_UtiAsymUi_diag!(As.uplo, parent(As), Udi), sym_uplo(As.uplo))
-UtiAUi!(As::Hermitian, Udi::Diagonal) = Hermitian(_UtiAsymUi_diag!(As.uplo, parent(As), Udi), sym_uplo(As.uplo))
-
-# U is upper triangular
-function _UtiAsymUi!(uplo, A, U)
-    n = size(A, 1)
-    μ⁻¹ = 1 / U[1, 1]
-    αμ⁻² = A[1, 1] * μ⁻¹' * μ⁻¹
-
-    # Update (1, 1) element
-    A[1, 1] = αμ⁻²
-    if n > 1
-        Unext = view(U, 2:n, 2:n)
-
-        if uplo === 'U'
-            # Update submatrix
-            for j in 2:n, i in 2:j
-                A[i, j] = (
-                    A[i, j]
-                    - μ⁻¹' * U[1, j] * A[1, i]'
-                    - μ⁻¹ * A[1, j] * U[1, i]'
-                    + αμ⁻² * U[1, j] * U[1, i]'
-                )
-            end
-
-            # Update vector
-            for j in 2:n
-                A[1, j] = A[1, j] * μ⁻¹' - U[1, j] * αμ⁻²
-            end
-            ldiv!(view(A', 2:n, 1), UpperTriangular(Unext)', view(A', 2:n, 1))
-        else
-            # Update submatrix
-            for j in 2:n, i in 2:j
-                A[j, i] = (
-                    A[j, i]
-                    - μ⁻¹ * A[i, 1]' * U[1, j]'
-                    - μ⁻¹' * U[1, i] * A[j, 1]
-                    + αμ⁻² * U[1, i] * U[1, j]'
-                )
-            end
-
-            # Update vector
-            for j in 2:n
-                A[j, 1] = A[j, 1] * μ⁻¹ - U[1, j]' * αμ⁻²
-            end
-            ldiv!(view(A, 2:n, 1), UpperTriangular(Unext)', view(A, 2:n, 1))
-        end
-
-        # Recurse
-        _UtiAsymUi!(uplo, view(A, 2:n, 2:n), Unext)
-    end
+# Bunch-Kaufmann (LDLT) based solution for generalized eigenvalues and eigenvectors
+function eigen(A::StridedMatrix{T}, B::BunchKaufman{T,<:AbstractMatrix}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasFloat}
+    eigen!(copy(A), copy(B); sortby)
+end
+function eigen!(A::StridedMatrix{T}, B::BunchKaufman{T,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasFloat}
+    M, TD, p = getproperties!(B)
+    # Compute generalized eigenvalues of equivalent matrix:
+    #    A' = inv(Tridiagonal(dl,d,du))*inv(M)*P*A*P'*inv(M')
+    # See: https://github.com/JuliaLang/julia/pull/50471#issuecomment-1627836781
+    permutecols!(A, p)
+    permuterows!(A, p)
+    ldiv!(M, A)
+    rdiv!(A, M')
+    ldiv!(TD, A)
+    vals, vecs = eigen!(A; sortby)
+    # Compute generalized eigenvectors from 'vecs':
+    #   vecs = P'*inv(M')*vecs
+    # See: https://github.com/JuliaLang/julia/pull/50471#issuecomment-1627836781
+    M = B.uplo == 'U' ? UnitUpperTriangular{eltype(vecs)}(M) : UnitLowerTriangular{eltype(vecs)}(M) ;
+    ldiv!(M', vecs)
+    invpermuterows!(vecs, p)
+    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
+end
 
-    return A
+# LU based solution for generalized eigenvalues and eigenvectors
+function eigen(A::StridedMatrix{T}, F::LU{T,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) where {T}
+    return eigen!(copy(A), copy(F); sortby)
+end
+function eigen!(A::StridedMatrix{T}, F::LU{T,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) where {T}
+    L = UnitLowerTriangular(F.L)
+    U = UpperTriangular(F.U)
+    permuterows!(A, F.p)
+    ldiv!(L, A)
+    rdiv!(A, U)
+    vals, vecs = eigen!(A; sortby)
+    # Compute generalized eigenvectors from 'vecs':
+    #   vecs = P'*inv(M')*vecs
+    # See: https://github.com/JuliaLang/julia/pull/50471#issuecomment-1627836781
+    U = UpperTriangular{eltype(vecs)}(U)
+    ldiv!(U, vecs)
+    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
 end
 
-# U is diagonal
-function _UtiAsymUi_diag!(uplo, A, U)
-    n = size(A, 1)
-    μ⁻¹ = 1 / U[1, 1]
-    αμ⁻² = A[1, 1] * μ⁻¹' * μ⁻¹
-
-    # Update (1, 1) element
-    A[1, 1] = αμ⁻²
-    if n > 1
-        Unext = view(U, 2:n, 2:n)
-
-        if uplo === 'U'
-            # No need to update any submatrix when U is diagonal
-
-            # Update vector
-            for j in 2:n
-                A[1, j] = A[1, j] * μ⁻¹'
-            end
-            ldiv!(view(A', 2:n, 1), Diagonal(Unext)', view(A', 2:n, 1))
-        else
-            # No need to update any submatrix when U is diagonal
-
-            # Update vector
-            for j in 2:n
-                A[j, 1] = A[j, 1] * μ⁻¹
-            end
-            ldiv!(view(A, 2:n, 1), Diagonal(Unext)', view(A, 2:n, 1))
-        end
-
-        # Recurse
-        _UtiAsymUi!(uplo, view(A, 2:n, 2:n), Unext)
-    end
+# Perform U' \ A / U in-place, where U::Union{UpperTriangular,Diagonal}
+UtiAUi!(A, U) = _UtiAUi!(A, U)
+UtiAUi!(A::Symmetric, U) = Symmetric(_UtiAUi!(copytri!(parent(A), A.uplo), U), sym_uplo(A.uplo))
+UtiAUi!(A::Hermitian, U) = Hermitian(_UtiAUi!(copytri!(parent(A), A.uplo, true), U), sym_uplo(A.uplo))
+_UtiAUi!(A, U) = rdiv!(ldiv!(U', A), U)
 
-    return A
+function eigvals(A::HermOrSym{TA}, B::HermOrSym{TB}; kws...) where {TA,TB}
+    S = promote_type(eigtype(TA), TB)
+    return eigvals!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...)
 end
 
 function eigvals!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix}
@@ -279,3 +249,48 @@ function eigvals!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,N
     return vals
 end
 eigvecs(A::HermOrSym) = eigvecs(eigen(A))
+
+function eigvals(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing)
+    if ishermitian(A)
+        eigvals!(eigencopy_oftype(Hermitian(A), eigtype(eltype(A))), C; sortby)
+    else
+        eigvals!(copy_similar(A, eigtype(eltype(A))), C; sortby)
+    end
+end
+function eigvals!(A::AbstractMatrix{T}, C::Cholesky{T, <:AbstractMatrix}; sortby::Union{Function,Nothing}=nothing) where {T<:Number}
+    # Cholesky decomposition based eigenvalues
+    return eigvals!(UtiAUi!(A, C.U); sortby)
+end
+
+# Bunch-Kaufmann (LDLT) based solution for generalized eigenvalues
+function eigvals(A::StridedMatrix{T}, B::BunchKaufman{T,<:AbstractMatrix}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasFloat}
+    eigvals!(copy(A), copy(B); sortby)
+end
+function eigvals!(A::StridedMatrix{T}, B::BunchKaufman{T,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasFloat}
+    M, TD, p = getproperties!(B)
+    # Compute generalized eigenvalues of equivalent matrix:
+    #    A' = inv(Tridiagonal(dl,d,du))*inv(M)*P*A*P'*inv(M')
+    # See: https://github.com/JuliaLang/julia/pull/50471#issuecomment-1627836781
+    permutecols!(A, p)
+    permuterows!(A, p)
+    ldiv!(M, A)
+    rdiv!(A, M')
+    ldiv!(TD, A)
+    return eigvals!(A; sortby)
+end
+
+# LU based solution for generalized eigenvalues
+function eigvals(A::StridedMatrix{T}, F::LU{T,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) where {T}
+    return eigvals!(copy(A), copy(F); sortby)
+end
+function eigvals!(A::StridedMatrix{T}, F::LU{T,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) where {T}
+    L = UnitLowerTriangular(F.L)
+    U = UpperTriangular(F.U)
+    # Compute generalized eigenvalues of equivalent matrix:
+    #    A' = inv(L)*(P*A)*inv(U)
+    # See: https://github.com/JuliaLang/julia/pull/50471#issuecomment-1627836781
+    permuterows!(A, F.p)
+    ldiv!(L, A)
+    rdiv!(A, U)
+    return eigvals!(A; sortby)
+end
diff --git a/stdlib/LinearAlgebra/src/transpose.jl b/stdlib/LinearAlgebra/src/transpose.jl
index c7ca6339aac6a..afc3494fc9726 100644
--- a/stdlib/LinearAlgebra/src/transpose.jl
+++ b/stdlib/LinearAlgebra/src/transpose.jl
@@ -175,8 +175,8 @@ julia> copy(T)
 """
 copy(::Union{Transpose,Adjoint})
 
-Base.copy(A::Transpose{<:Any,<:AbstractMatrix}) = transpose!(similar(A.parent, reverse(axes(A.parent))), A.parent)
-Base.copy(A::Adjoint{<:Any,<:AbstractMatrix}) = adjoint!(similar(A.parent, reverse(axes(A.parent))), A.parent)
+Base.copy(A::TransposeAbsMat) = transpose!(similar(A.parent, reverse(axes(A.parent))), A.parent)
+Base.copy(A::AdjointAbsMat) = adjoint!(similar(A.parent, reverse(axes(A.parent))), A.parent)
 
 function copy_transpose!(B::AbstractVecOrMat, ir_dest::AbstractRange{Int}, jr_dest::AbstractRange{Int},
                          A::AbstractVecOrMat, ir_src::AbstractRange{Int}, jr_src::AbstractRange{Int})
@@ -194,10 +194,29 @@ function copy_transpose!(B::AbstractVecOrMat, ir_dest::AbstractRange{Int}, jr_de
     for jsrc in jr_src
         jdest = first(jr_dest)
         for isrc in ir_src
-            B[idest,jdest] = A[isrc,jsrc]
+            B[idest,jdest] = transpose(A[isrc,jsrc])
             jdest += step(jr_dest)
         end
         idest += step(ir_dest)
     end
     return B
 end
+
+function copy_similar(A::AdjointAbsMat, ::Type{T}) where {T}
+    C = similar(A, T, size(A))
+    adjoint!(C, parent(A))
+end
+function copy_similar(A::TransposeAbsMat, ::Type{T}) where {T}
+    C = similar(A, T, size(A))
+    transpose!(C, parent(A))
+end
+
+function Base.copyto_unaliased!(deststyle::IndexStyle, dest::AbstractMatrix, srcstyle::IndexCartesian, src::AdjOrTransAbsMat)
+    if axes(dest) == axes(src)
+        f! = inplace_adj_or_trans(src)
+        f!(dest, parent(src))
+    else
+        @invoke Base.copyto_unaliased!(deststyle::IndexStyle, dest::AbstractArray, srcstyle::IndexStyle, src::AbstractArray)
+    end
+    return dest
+end
diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl
index d939a5df0da01..66e5fd3e4b4c1 100644
--- a/stdlib/LinearAlgebra/src/triangular.jl
+++ b/stdlib/LinearAlgebra/src/triangular.jl
@@ -3,13 +3,18 @@
 ## Triangular
 
 # could be renamed to Triangular when that name has been fully deprecated
-abstract type AbstractTriangular{T,S<:AbstractMatrix} <: AbstractMatrix{T} end
+"""
+    AbstractTriangular
+
+Supertype of triangular matrix types such as [`LowerTriangular`](@ref), [`UpperTriangular`](@ref),
+[`UnitLowerTriangular`](@ref) and [`UnitUpperTriangular`](@ref).
+"""
+abstract type AbstractTriangular{T} <: AbstractMatrix{T} end
 
 # First loop through all methods that don't need special care for upper/lower and unit diagonal
-for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular,
-          :UnitUpperTriangular)
+for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular, :UnitUpperTriangular)
     @eval begin
-        struct $t{T,S<:AbstractMatrix{T}} <: AbstractTriangular{T,S}
+        struct $t{T,S<:AbstractMatrix{T}} <: AbstractTriangular{T}
             data::S
 
             function $t{T,S}(data) where {T,S<:AbstractMatrix{T}}
@@ -20,22 +25,15 @@ for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular,
         end
         $t(A::$t) = A
         $t{T}(A::$t{T}) where {T} = A
-        function $t(A::AbstractMatrix)
-            return $t{eltype(A), typeof(A)}(A)
-        end
-        function $t{T}(A::AbstractMatrix) where T
-            $t(convert(AbstractMatrix{T}, A))
-        end
+        $t(A::AbstractMatrix) = $t{eltype(A), typeof(A)}(A)
+        $t{T}(A::AbstractMatrix) where {T} = $t(convert(AbstractMatrix{T}, A))
+        $t{T}(A::$t) where {T} = $t(convert(AbstractMatrix{T}, A.data))
 
-        function $t{T}(A::$t) where T
-            Anew = convert(AbstractMatrix{T}, A.data)
-            $t(Anew)
-        end
         Matrix(A::$t{T}) where {T} = Matrix{T}(A)
 
         AbstractMatrix{T}(A::$t) where {T} = $t{T}(A)
+        AbstractMatrix{T}(A::$t{T}) where {T} = copy(A)
 
-        size(A::$t, d) = size(A.data, d)
         size(A::$t) = size(A.data)
 
         # For A<:AbstractTriangular, similar(A[, neweltype]) should yield a matrix with the same
@@ -151,13 +149,17 @@ julia> UnitUpperTriangular(A)
 """
 UnitUpperTriangular
 
+const UpperOrUnitUpperTriangular{T,S} = Union{UpperTriangular{T,S}, UnitUpperTriangular{T,S}}
+const LowerOrUnitLowerTriangular{T,S} = Union{LowerTriangular{T,S}, UnitLowerTriangular{T,S}}
+const UpperOrLowerTriangular{T,S} = Union{UpperOrUnitUpperTriangular{T,S}, LowerOrUnitLowerTriangular{T,S}}
+
 imag(A::UpperTriangular) = UpperTriangular(imag(A.data))
 imag(A::LowerTriangular) = LowerTriangular(imag(A.data))
 imag(A::UnitLowerTriangular) = LowerTriangular(tril!(imag(A.data),-1))
 imag(A::UnitUpperTriangular) = UpperTriangular(triu!(imag(A.data),1))
 
 Array(A::AbstractTriangular) = Matrix(A)
-parent(A::AbstractTriangular) = A.data
+parent(A::UpperOrLowerTriangular) = A.data
 
 # then handle all methods that requires specific handling of upper/lower and unit diagonal
 
@@ -172,7 +174,7 @@ function Matrix{T}(A::UnitLowerTriangular) where T
     copyto!(B, A.data)
     tril!(B)
     for i = 1:size(B,1)
-        B[i,i] = 1
+        B[i,i] = oneunit(T)
     end
     B
 end
@@ -187,7 +189,7 @@ function Matrix{T}(A::UnitUpperTriangular) where T
     copyto!(B, A.data)
     triu!(B)
     for i = 1:size(B,1)
-        B[i,i] = 1
+        B[i,i] = oneunit(T)
     end
     B
 end
@@ -201,7 +203,7 @@ function full!(A::UnitLowerTriangular)
     B = A.data
     tril!(B)
     for i = 1:size(A,1)
-        B[i,i] = 1
+        B[i,i] = oneunit(eltype(B))
     end
     B
 end
@@ -214,23 +216,41 @@ function full!(A::UnitUpperTriangular)
     B = A.data
     triu!(B)
     for i = 1:size(A,1)
-        B[i,i] = 1
+        B[i,i] = oneunit(eltype(B))
     end
     B
 end
 
-getindex(A::UnitLowerTriangular{T}, i::Integer, j::Integer) where {T} =
+Base.isassigned(A::UnitLowerTriangular, i::Int, j::Int) =
+    i > j ? isassigned(A.data, i, j) : true
+Base.isassigned(A::LowerTriangular, i::Int, j::Int) =
+    i >= j ? isassigned(A.data, i, j) : true
+Base.isassigned(A::UnitUpperTriangular, i::Int, j::Int) =
+    i < j ? isassigned(A.data, i, j) : true
+Base.isassigned(A::UpperTriangular, i::Int, j::Int) =
+    i <= j ? isassigned(A.data, i, j) : true
+
+Base.isstored(A::UnitLowerTriangular, i::Int, j::Int) =
+    i > j ? Base.isstored(A.data, i, j) : false
+Base.isstored(A::LowerTriangular, i::Int, j::Int) =
+    i >= j ? Base.isstored(A.data, i, j) : false
+Base.isstored(A::UnitUpperTriangular, i::Int, j::Int) =
+    i < j ? Base.isstored(A.data, i, j) : false
+Base.isstored(A::UpperTriangular, i::Int, j::Int) =
+    i <= j ? Base.isstored(A.data, i, j) : false
+
+@propagate_inbounds getindex(A::UnitLowerTriangular{T}, i::Integer, j::Integer) where {T} =
     i > j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T))
-getindex(A::LowerTriangular, i::Integer, j::Integer) =
+@propagate_inbounds getindex(A::LowerTriangular, i::Integer, j::Integer) =
     i >= j ? A.data[i,j] : zero(A.data[j,i])
-getindex(A::UnitUpperTriangular{T}, i::Integer, j::Integer) where {T} =
+@propagate_inbounds getindex(A::UnitUpperTriangular{T}, i::Integer, j::Integer) where {T} =
     i < j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T))
-getindex(A::UpperTriangular, i::Integer, j::Integer) =
+@propagate_inbounds getindex(A::UpperTriangular, i::Integer, j::Integer) =
     i <= j ? A.data[i,j] : zero(A.data[j,i])
 
-function setindex!(A::UpperTriangular, x, i::Integer, j::Integer)
+@propagate_inbounds function setindex!(A::UpperTriangular, x, i::Integer, j::Integer)
     if i > j
-        x == 0 || throw(ArgumentError("cannot set index in the lower triangular part " *
+        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
             "($i, $j) of an UpperTriangular matrix to a nonzero value ($x)"))
     else
         A.data[i,j] = x
@@ -238,12 +258,12 @@ function setindex!(A::UpperTriangular, x, i::Integer, j::Integer)
     return A
 end
 
-function setindex!(A::UnitUpperTriangular, x, i::Integer, j::Integer)
+@propagate_inbounds function setindex!(A::UnitUpperTriangular, x, i::Integer, j::Integer)
     if i > j
-        x == 0 || throw(ArgumentError("cannot set index in the lower triangular part " *
+        iszero(x) || throw(ArgumentError("cannot set index in the lower triangular part " *
             "($i, $j) of a UnitUpperTriangular matrix to a nonzero value ($x)"))
     elseif i == j
-        x == 1 || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
+        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
             "of a UnitUpperTriangular matrix to a non-unit value ($x)"))
     else
         A.data[i,j] = x
@@ -251,9 +271,9 @@ function setindex!(A::UnitUpperTriangular, x, i::Integer, j::Integer)
     return A
 end
 
-function setindex!(A::LowerTriangular, x, i::Integer, j::Integer)
+@propagate_inbounds function setindex!(A::LowerTriangular, x, i::Integer, j::Integer)
     if i < j
-        x == 0 || throw(ArgumentError("cannot set index in the upper triangular part " *
+        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
             "($i, $j) of a LowerTriangular matrix to a nonzero value ($x)"))
     else
         A.data[i,j] = x
@@ -261,12 +281,12 @@ function setindex!(A::LowerTriangular, x, i::Integer, j::Integer)
     return A
 end
 
-function setindex!(A::UnitLowerTriangular, x, i::Integer, j::Integer)
+@propagate_inbounds function setindex!(A::UnitLowerTriangular, x, i::Integer, j::Integer)
     if i < j
-        x == 0 || throw(ArgumentError("cannot set index in the upper triangular part " *
+        iszero(x) || throw(ArgumentError("cannot set index in the upper triangular part " *
             "($i, $j) of a UnitLowerTriangular matrix to a nonzero value ($x)"))
     elseif i == j
-        x == 1 || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
+        x == oneunit(x) || throw(ArgumentError("cannot set index on the diagonal ($i, $j) " *
             "of a UnitLowerTriangular matrix to a non-unit value ($x)"))
     else
         A.data[i,j] = x
@@ -274,6 +294,22 @@ function setindex!(A::UnitLowerTriangular, x, i::Integer, j::Integer)
     return A
 end
 
+@inline function fill!(A::UpperTriangular, x)
+    iszero(x) || throw(ArgumentError("cannot set indices in the lower triangular part " *
+            "of an UpperTriangular matrix to a nonzero value ($x)"))
+    for col in axes(A,2), row in firstindex(A,1):col
+        @inbounds A.data[row, col] = x
+    end
+    A
+end
+@inline function fill!(A::LowerTriangular, x)
+    iszero(x) || throw(ArgumentError("cannot set indices in the upper triangular part " *
+            "of a LowerTriangular matrix to a nonzero value ($x)"))
+    for col in axes(A,2), row in col:lastindex(A,1)
+        @inbounds A.data[row, col] = x
+    end
+    A
+end
 
 ## structured matrix methods ##
 function Base.replace_in_print_matrix(A::Union{UpperTriangular,UnitUpperTriangular},
@@ -293,28 +329,36 @@ function istriu(A::Union{UpperTriangular,UnitUpperTriangular}, k::Integer=0)
     k <= 0 && return true
     return _istriu(A, k)
 end
-istril(A::Adjoint) = istriu(A.parent)
-istril(A::Transpose) = istriu(A.parent)
-istriu(A::Adjoint) = istril(A.parent)
-istriu(A::Transpose) = istril(A.parent)
+istril(A::Adjoint, k::Integer=0) = istriu(A.parent, -k)
+istril(A::Transpose, k::Integer=0) = istriu(A.parent, -k)
+istriu(A::Adjoint, k::Integer=0) = istril(A.parent, -k)
+istriu(A::Transpose, k::Integer=0) = istril(A.parent, -k)
 
-function tril!(A::UpperTriangular, k::Integer=0)
+function tril!(A::UpperTriangular{T}, k::Integer=0) where {T}
     n = size(A,1)
     if k < 0
-        fill!(A.data,0)
+        fill!(A.data, zero(T))
         return A
     elseif k == 0
         for j in 1:n, i in 1:j-1
-            A.data[i,j] = 0
+            A.data[i,j] = zero(T)
         end
         return A
     else
         return UpperTriangular(tril!(A.data,k))
     end
 end
-triu!(A::UpperTriangular, k::Integer=0) = UpperTriangular(triu!(A.data,k))
+function triu!(A::UpperTriangular, k::Integer=0)
+    n = size(A,1)
+    if k > 0
+        for j in 1:n, i in max(1,j-k+1):j
+            A.data[i,j] = zero(eltype(A))
+        end
+    end
+    return A
+end
 
-function tril!(A::UnitUpperTriangular{T}, k::Integer=0) where T
+function tril!(A::UnitUpperTriangular{T}, k::Integer=0) where {T}
     n = size(A,1)
     if k < 0
         fill!(A.data, zero(T))
@@ -337,25 +381,33 @@ function triu!(A::UnitUpperTriangular, k::Integer=0)
     for i in diagind(A)
         A.data[i] = oneunit(eltype(A))
     end
-    return triu!(UpperTriangular(A.data),k)
+    return triu!(UpperTriangular(A.data), k)
 end
 
-function triu!(A::LowerTriangular, k::Integer=0)
+function triu!(A::LowerTriangular{T}, k::Integer=0) where {T}
     n = size(A,1)
     if k > 0
-        fill!(A.data,0)
+        fill!(A.data, zero(T))
         return A
     elseif k == 0
         for j in 1:n, i in j+1:n
-            A.data[i,j] = 0
+            A.data[i,j] = zero(T)
         end
         return A
     else
-        return LowerTriangular(triu!(A.data,k))
+        return LowerTriangular(triu!(A.data, k))
     end
 end
 
-tril!(A::LowerTriangular, k::Integer=0) = LowerTriangular(tril!(A.data,k))
+function tril!(A::LowerTriangular, k::Integer=0)
+    n = size(A,1)
+    if k < 0
+        for j in 1:n, i in j:min(j-k-1,n)
+            A.data[i, j] = zero(eltype(A))
+        end
+    end
+    A
+end
 
 function triu!(A::UnitLowerTriangular{T}, k::Integer=0) where T
     n = size(A,1)
@@ -372,7 +424,7 @@ function triu!(A::UnitLowerTriangular{T}, k::Integer=0) where T
         for i in diagind(A)
             A.data[i] = oneunit(T)
         end
-        return LowerTriangular(triu!(A.data,k))
+        return LowerTriangular(triu!(A.data, k))
     end
 end
 
@@ -380,7 +432,7 @@ function tril!(A::UnitLowerTriangular, k::Integer=0)
     for i in diagind(A)
         A.data[i] = oneunit(eltype(A))
     end
-    return tril!(LowerTriangular(A.data),k)
+    return tril!(LowerTriangular(A.data), k)
 end
 
 adjoint(A::LowerTriangular) = UpperTriangular(adjoint(A.data))
@@ -402,30 +454,40 @@ adjoint!(A::UpperTriangular) = LowerTriangular(copytri!(A.data, 'U' , true, true
 adjoint!(A::UnitUpperTriangular) = UnitLowerTriangular(copytri!(A.data, 'U' , true, true))
 
 diag(A::LowerTriangular) = diag(A.data)
-diag(A::UnitLowerTriangular) = fill(one(eltype(A)), size(A,1))
+diag(A::UnitLowerTriangular) = fill(oneunit(eltype(A)), size(A,1))
 diag(A::UpperTriangular) = diag(A.data)
-diag(A::UnitUpperTriangular) = fill(one(eltype(A)), size(A,1))
+diag(A::UnitUpperTriangular) = fill(oneunit(eltype(A)), size(A,1))
 
 # Unary operations
 -(A::LowerTriangular) = LowerTriangular(-A.data)
 -(A::UpperTriangular) = UpperTriangular(-A.data)
 function -(A::UnitLowerTriangular)
-    Anew = -A.data
+    Adata = A.data
+    Anew = similar(Adata) # must be mutable, even if Adata is not
+    @. Anew = -Adata
     for i = 1:size(A, 1)
-        Anew[i, i] = -1
+        Anew[i, i] = -A[i, i]
     end
     LowerTriangular(Anew)
 end
 function -(A::UnitUpperTriangular)
-    Anew = -A.data
+    Adata = A.data
+    Anew = similar(Adata) # must be mutable, even if Adata is not
+    @. Anew = -Adata
     for i = 1:size(A, 1)
-        Anew[i, i] = -1
+        Anew[i, i] = -A[i, i]
     end
     UpperTriangular(Anew)
 end
 
+tr(A::LowerTriangular) = tr(A.data)
+tr(A::UnitLowerTriangular) = size(A, 1) * oneunit(eltype(A))
+tr(A::UpperTriangular) = tr(A.data)
+tr(A::UnitUpperTriangular) = size(A, 1) * oneunit(eltype(A))
+
 # copy and scale
-function copyto!(A::T, B::T) where T<:Union{UpperTriangular,UnitUpperTriangular}
+function copyto!(A::T, B::T) where {T<:Union{UpperTriangular,UnitUpperTriangular}}
+    checkbounds(A, axes(B)...)
     n = size(B,1)
     for j = 1:n
         for i = 1:(isa(B, UnitUpperTriangular) ? j-1 : j)
@@ -434,7 +496,8 @@ function copyto!(A::T, B::T) where T<:Union{UpperTriangular,UnitUpperTriangular}
     end
     return A
 end
-function copyto!(A::T, B::T) where T<:Union{LowerTriangular,UnitLowerTriangular}
+function copyto!(A::T, B::T) where {T<:Union{LowerTriangular,UnitLowerTriangular}}
+    checkbounds(A, axes(B)...)
     n = size(B,1)
     for j = 1:n
         for i = (isa(B, UnitLowerTriangular) ? j+1 : j):n
@@ -444,106 +507,106 @@ function copyto!(A::T, B::T) where T<:Union{LowerTriangular,UnitLowerTriangular}
     return A
 end
 
-# Define `mul!` for (Unit){Upper,Lower}Triangular matrices times a
-# number.
-for (Trig, UnitTrig) in Any[(UpperTriangular, UnitUpperTriangular),
-                            (LowerTriangular, UnitLowerTriangular)]
-    for (TB, TC) in Any[(Trig, Number),
-                        (Number, Trig),
-                        (UnitTrig, Number),
-                        (Number, UnitTrig)]
-        @eval @inline mul!(A::$Trig, B::$TB, C::$TC, alpha::Number, beta::Number) =
-            _mul!(A, B, C, MulAddMul(alpha, beta))
-    end
+# Define `mul!` for (Unit){Upper,Lower}Triangular matrices times a number.
+# be permissive here and require compatibility later in _triscale!
+@inline mul!(A::AbstractTriangular, B::AbstractTriangular, C::Number, alpha::Number, beta::Number) =
+    _triscale!(A, B, C, MulAddMul(alpha, beta))
+@inline mul!(A::AbstractTriangular, B::Number, C::AbstractTriangular, alpha::Number, beta::Number) =
+    _triscale!(A, B, C, MulAddMul(alpha, beta))
+
+function checksize1(A, B)
+    szA, szB = size(A), size(B)
+    szA == szB || throw(DimensionMismatch("size of A, $szA, does not match size of B, $szB"))
+    checksquare(B)
 end
 
-@inline function _mul!(A::UpperTriangular, B::UpperTriangular, c::Number, _add::MulAddMul)
-    n = checksquare(B)
+function _triscale!(A::UpperTriangular, B::UpperTriangular, c::Number, _add)
+    n = checksize1(A, B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         for i = 1:j
-            @inbounds _modify!(_add, B[i,j] * c, A, (i,j))
+            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::UpperTriangular, c::Number, B::UpperTriangular, _add::MulAddMul)
-    n = checksquare(B)
+function _triscale!(A::UpperTriangular, c::Number, B::UpperTriangular, _add)
+    n = checksize1(A, B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         for i = 1:j
-            @inbounds _modify!(_add, c * B[i,j], A, (i,j))
+            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::UpperTriangular, B::UnitUpperTriangular, c::Number, _add::MulAddMul)
-    n = checksquare(B)
+function _triscale!(A::UpperOrUnitUpperTriangular, B::UnitUpperTriangular, c::Number, _add)
+    n = checksize1(A, B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         @inbounds _modify!(_add, c, A, (j,j))
         for i = 1:(j - 1)
-            @inbounds _modify!(_add, B[i,j] * c, A, (i,j))
+            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::UpperTriangular, c::Number, B::UnitUpperTriangular, _add::MulAddMul)
-    n = checksquare(B)
+function _triscale!(A::UpperOrUnitUpperTriangular, c::Number, B::UnitUpperTriangular, _add)
+    n = checksize1(A, B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         @inbounds _modify!(_add, c, A, (j,j))
         for i = 1:(j - 1)
-            @inbounds _modify!(_add, c * B[i,j], A, (i,j))
+            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::LowerTriangular, B::LowerTriangular, c::Number, _add::MulAddMul)
-    n = checksquare(B)
+function _triscale!(A::LowerTriangular, B::LowerTriangular, c::Number, _add)
+    n = checksize1(A, B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         for i = j:n
-            @inbounds _modify!(_add, B[i,j] * c, A, (i,j))
+            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::LowerTriangular, c::Number, B::LowerTriangular, _add::MulAddMul)
-    n = checksquare(B)
+function _triscale!(A::LowerTriangular, c::Number, B::LowerTriangular, _add)
+    n = checksize1(A, B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         for i = j:n
-            @inbounds _modify!(_add, c * B[i,j], A, (i,j))
+            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::LowerTriangular, B::UnitLowerTriangular, c::Number, _add::MulAddMul)
-    n = checksquare(B)
+function _triscale!(A::LowerOrUnitLowerTriangular, B::UnitLowerTriangular, c::Number, _add)
+    n = checksize1(A, B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         @inbounds _modify!(_add, c, A, (j,j))
         for i = (j + 1):n
-            @inbounds _modify!(_add, B[i,j] * c, A, (i,j))
+            @inbounds _modify!(_add, B.data[i,j] * c, A.data, (i,j))
         end
     end
     return A
 end
-@inline function _mul!(A::LowerTriangular, c::Number, B::UnitLowerTriangular, _add::MulAddMul)
-    n = checksquare(B)
+function _triscale!(A::LowerOrUnitLowerTriangular, c::Number, B::UnitLowerTriangular, _add)
+    n = checksize1(A, B)
     iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     for j = 1:n
         @inbounds _modify!(_add, c, A, (j,j))
         for i = (j + 1):n
-            @inbounds _modify!(_add, c * B[i,j], A, (i,j))
+            @inbounds _modify!(_add, c * B.data[i,j], A.data, (i,j))
         end
     end
     return A
 end
 
-rmul!(A::Union{UpperTriangular,LowerTriangular}, c::Number) = mul!(A, A, c)
-lmul!(c::Number, A::Union{UpperTriangular,LowerTriangular}) = mul!(A, c, A)
+rmul!(A::UpperOrLowerTriangular, c::Number) = @inline _triscale!(A, A, c, MulAddMul())
+lmul!(c::Number, A::UpperOrLowerTriangular) = @inline _triscale!(A, c, A, MulAddMul())
 
 function dot(x::AbstractVector, A::UpperTriangular, y::AbstractVector)
     require_one_based_indexing(x, y)
@@ -659,34 +722,93 @@ fillstored!(A::UnitUpperTriangular, x) = (fillband!(A.data, x, 1, size(A,2)-1);
 # BlasFloat routines #
 ######################
 
-lmul!(A::Tridiagonal, B::AbstractTriangular) = A*full!(B) # is this necessary?
-
-@inline mul!(C::AbstractMatrix, A::AbstractTriangular, B::Tridiagonal, alpha::Number, beta::Number) =
-    mul!(C, copyto!(similar(parent(A)), A), B, alpha, beta)
-@inline mul!(C::AbstractMatrix, A::Tridiagonal, B::AbstractTriangular, alpha::Number, beta::Number) =
-    mul!(C, A, copyto!(similar(parent(B)), B), alpha, beta)
-mul!(C::AbstractVector, A::AbstractTriangular, transB::Transpose{<:Any,<:AbstractVecOrMat}) =
-    (B = transB.parent; lmul!(A, transpose!(C, B)))
-mul!(C::AbstractMatrix, A::AbstractTriangular, transB::Transpose{<:Any,<:AbstractVecOrMat}) =
-    (B = transB.parent; lmul!(A, transpose!(C, B)))
-mul!(C::AbstractMatrix, A::AbstractTriangular, adjB::Adjoint{<:Any,<:AbstractVecOrMat}) =
-    (B = adjB.parent; lmul!(A, adjoint!(C, B)))
-mul!(C::AbstractVecOrMat, A::AbstractTriangular, adjB::Adjoint{<:Any,<:AbstractVecOrMat}) =
-    (B = adjB.parent; lmul!(A, adjoint!(C, B)))
-
-# The three methods are neceesary to avoid ambiguities with definitions in matmul.jl
-mul!(C::AbstractVector  , A::AbstractTriangular, B::AbstractVector)   = lmul!(A, copyto!(C, B))
-mul!(C::AbstractMatrix  , A::AbstractTriangular, B::AbstractVecOrMat) = lmul!(A, copyto!(C, B))
-mul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) = lmul!(A, copyto!(C, B))
-
-@inline mul!(C::AbstractMatrix, A::AbstractTriangular, B::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) =
-    mul!(C, A, copy(B), alpha, beta)
-@inline mul!(C::AbstractMatrix, A::AbstractTriangular, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) =
-    mul!(C, A, copy(B), alpha, beta)
-mul!(C::AbstractVector, A::AbstractTriangular{<:Any,<:Adjoint}, B::Transpose{<:Any,<:AbstractVecOrMat}) = throw(MethodError(mul!, (C, A, B)))
-mul!(C::AbstractVector, A::AbstractTriangular{<:Any,<:Transpose}, B::Transpose{<:Any,<:AbstractVecOrMat}) = throw(MethodError(mul!, (C, A, B)))
-
-# preserve triangular structure in in-place multiplication
+# which triangle to use of the underlying data
+uplo_char(::UpperOrUnitUpperTriangular) = 'U'
+uplo_char(::LowerOrUnitLowerTriangular) = 'L'
+uplo_char(::UpperOrUnitUpperTriangular{<:Any,<:AdjOrTrans}) = 'L'
+uplo_char(::LowerOrUnitLowerTriangular{<:Any,<:AdjOrTrans}) = 'U'
+uplo_char(::UpperOrUnitUpperTriangular{<:Any,<:Adjoint{<:Any,<:Transpose}}) = 'U'
+uplo_char(::LowerOrUnitLowerTriangular{<:Any,<:Adjoint{<:Any,<:Transpose}}) = 'L'
+uplo_char(::UpperOrUnitUpperTriangular{<:Any,<:Transpose{<:Any,<:Adjoint}}) = 'U'
+uplo_char(::LowerOrUnitLowerTriangular{<:Any,<:Transpose{<:Any,<:Adjoint}}) = 'L'
+
+isunit_char(::UpperTriangular) = 'N'
+isunit_char(::UnitUpperTriangular) = 'U'
+isunit_char(::LowerTriangular) = 'N'
+isunit_char(::UnitLowerTriangular) = 'U'
+
+lmul!(A::Tridiagonal, B::AbstractTriangular) = A*full!(B)
+mul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVector) = _trimul!(C, A, B)
+mul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix) = _trimul!(C, A, B)
+mul!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) = _trimul!(C, A, B)
+mul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractTriangular) = _trimul!(C, A, B)
+
+# generic fallback for AbstractTriangular matrices outside of the four subtypes provided here
+_trimul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVector) =
+    lmul!(A, copyto!(C, B))
+_trimul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractMatrix) =
+    lmul!(A, copyto!(C, B))
+_trimul!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) =
+    rmul!(copyto!(C, A), B)
+_trimul!(C::AbstractMatrix, A::AbstractTriangular, B::AbstractTriangular) =
+    lmul!(A, copyto!(C, B))
+# redirect for UpperOrLowerTriangular
+_trimul!(C::AbstractVecOrMat, A::UpperOrLowerTriangular, B::AbstractVector) =
+    generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
+_trimul!(C::AbstractMatrix, A::UpperOrLowerTriangular, B::AbstractMatrix) =
+    generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
+_trimul!(C::AbstractMatrix, A::AbstractMatrix, B::UpperOrLowerTriangular) =
+    generic_mattrimul!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
+_trimul!(C::AbstractMatrix, A::UpperOrLowerTriangular, B::UpperOrLowerTriangular) =
+    generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
+# disambiguation with AbstractTriangular
+_trimul!(C::AbstractMatrix, A::UpperOrLowerTriangular, B::AbstractTriangular) =
+    generic_trimatmul!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
+_trimul!(C::AbstractMatrix, A::AbstractTriangular, B::UpperOrLowerTriangular) =
+    generic_mattrimul!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
+
+lmul!(A::AbstractTriangular, B::AbstractVecOrMat) = @inline _trimul!(B, A, B)
+rmul!(A::AbstractMatrix, B::AbstractTriangular)   = @inline _trimul!(A, A, B)
+
+
+for TC in (:AbstractVector, :AbstractMatrix)
+    @eval @inline function mul!(C::$TC, A::AbstractTriangular, B::AbstractVector, alpha::Number, beta::Number)
+        if isone(alpha) && iszero(beta)
+            return mul!(C, A, B)
+        else
+            return generic_matvecmul!(C, 'N', A, B, MulAddMul(alpha, beta))
+        end
+    end
+end
+for (TA, TB) in ((:AbstractTriangular, :AbstractMatrix),
+                    (:AbstractMatrix, :AbstractTriangular),
+                    (:AbstractTriangular, :AbstractTriangular)
+                )
+    @eval @inline function mul!(C::AbstractMatrix, A::$TA, B::$TB, alpha::Number, beta::Number)
+        if isone(alpha) && iszero(beta)
+            return mul!(C, A, B)
+        else
+            return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
+        end
+    end
+end
+
+ldiv!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) = _ldiv!(C, A, B)
+# generic fallback for AbstractTriangular, directs to 2-arg [l/r]div!
+_ldiv!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) =
+    ldiv!(A, copyto!(C, B))
+_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractTriangular) =
+    rdiv!(copyto!(C, A), B)
+# redirect for UpperOrLowerTriangular to generic_*div!
+_ldiv!(C::AbstractVecOrMat, A::UpperOrLowerTriangular, B::AbstractVecOrMat) =
+    generic_trimatdiv!(C, uplo_char(A), isunit_char(A), wrapperop(parent(A)), _unwrap_at(parent(A)), B)
+_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UpperOrLowerTriangular) =
+    generic_mattridiv!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
+
+ldiv!(A::AbstractTriangular, B::AbstractVecOrMat) = @inline _ldiv!(B, A, B)
+rdiv!(A::AbstractMatrix, B::AbstractTriangular)   = @inline _rdiv!(A, A, B)
+
+# preserve triangular structure in in-place multiplication/division
 for (cty, aty, bty) in ((:UpperTriangular, :UpperTriangular, :UpperTriangular),
                         (:UpperTriangular, :UpperTriangular, :UnitUpperTriangular),
                         (:UpperTriangular, :UnitUpperTriangular, :UpperTriangular),
@@ -695,54 +817,47 @@ for (cty, aty, bty) in ((:UpperTriangular, :UpperTriangular, :UpperTriangular),
                         (:LowerTriangular, :LowerTriangular, :UnitLowerTriangular),
                         (:LowerTriangular, :UnitLowerTriangular, :LowerTriangular),
                         (:UnitLowerTriangular, :UnitLowerTriangular, :UnitLowerTriangular))
-    @eval function mul!(C::$cty, A::$aty, B::$bty)
-        lmul!(A, copyto!(parent(C), B))
-        return C
-    end
-
-    @eval @inline function mul!(C::$cty, A::$aty, B::$bty, alpha::Number, beta::Number)
-        if isone(alpha) && iszero(beta)
-            return mul!(C, A, B)
-        else
-            return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
+    @eval begin
+        function _trimul!(C::$cty, A::$aty, B::$bty)
+            _trimul!(parent(C), A, B)
+            return C
+        end
+        function _ldiv!(C::$cty, A::$aty, B::$bty)
+            _ldiv!(parent(C), A, B)
+            return C
+        end
+        function _rdiv!(C::$cty, A::$aty, B::$bty)
+            _rdiv!(parent(C), A, B)
+            return C
         end
     end
 end
 
-# direct multiplication/division
 for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
                             (:UnitLowerTriangular, 'L', 'U'),
                             (:UpperTriangular, 'U', 'N'),
                             (:UnitUpperTriangular, 'U', 'U'))
     @eval begin
-        # Vector multiplication
-        lmul!(A::$t{T,<:StridedMatrix}, b::StridedVector{T}) where {T<:BlasFloat} =
-            BLAS.trmv!($uploc, 'N', $isunitc, A.data, b)
-
-        # Matrix multiplication
-        lmul!(A::$t{T,<:StridedMatrix}, B::StridedMatrix{T}) where {T<:BlasFloat} =
-            BLAS.trmm!('L', $uploc, 'N', $isunitc, one(T), A.data, B)
-        rmul!(A::StridedMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
-            BLAS.trmm!('R', $uploc, 'N', $isunitc, one(T), B.data, A)
-
-        # Left division
-        ldiv!(A::$t{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-            LAPACK.trtrs!($uploc, 'N', $isunitc, A.data, B)
-
-        # Right division
-        rdiv!(A::StridedMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
-            BLAS.trsm!('R', $uploc, 'N', $isunitc, one(T), B.data, A)
-
         # Matrix inverse
         inv!(A::$t{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
             $t{T,S}(LAPACK.trtri!($uploc, $isunitc, A.data))
 
+        function inv(A::$t{T}) where {T}
+            S = typeof(inv(oneunit(T)))
+            if S <: BlasFloat || S === T # i.e. A is unitless
+                $t(ldiv!(convert(AbstractArray{S}, A), Matrix{S}(I, size(A))))
+            else
+                J = (one(T)*I)(size(A, 1))
+                $t(ldiv!(similar(A, S, size(A)), A, J))
+            end
+        end
+
         # Error bounds for triangular solve
         errorbounds(A::$t{T,<:StridedMatrix}, X::StridedVecOrMat{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
             LAPACK.trrfs!($uploc, 'N', $isunitc, A.data, B, X)
 
         # Condition numbers
-        function cond(A::$t{<:BlasFloat}, p::Real=2)
+        function cond(A::$t{<:BlasFloat,<:StridedMatrix}, p::Real=2)
             checksquare(A)
             if p == 1
                 return inv(LAPACK.trcon!('O', $uploc, $isunitc, A.data))
@@ -755,67 +870,22 @@ for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
     end
 end
 
-# adjoint/transpose multiplication ('uploc' reversed)
-for (t, uploc, isunitc) in ((:LowerTriangular, 'U', 'N'),
-                            (:UnitLowerTriangular, 'U', 'U'),
-                            (:UpperTriangular, 'L', 'N'),
-                            (:UnitUpperTriangular, 'L', 'U'))
-    @eval begin
-        # Vector multiplication
-        lmul!(A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasFloat} =
-            BLAS.trmv!($uploc, 'T', $isunitc, parent(parent(A)), b)
-        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasReal} =
-            BLAS.trmv!($uploc, 'T', $isunitc, parent(parent(A)), b)
-        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasComplex} =
-            BLAS.trmv!($uploc, 'C', $isunitc, parent(parent(A)), b)
-
-        # Matrix multiplication
-        lmul!(A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasFloat} =
-            BLAS.trmm!('L', $uploc, 'T', $isunitc, one(T), parent(parent(A)), B)
-        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasComplex} =
-            BLAS.trmm!('L', $uploc, 'C', $isunitc, one(T), parent(parent(A)), B)
-        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasReal} =
-            BLAS.trmm!('L', $uploc, 'T', $isunitc, one(T), parent(parent(A)), B)
-
-        rmul!(A::StridedMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-            BLAS.trmm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
-        rmul!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasComplex} =
-            BLAS.trmm!('R', $uploc, 'C', $isunitc, one(T), parent(parent(B)), A)
-        rmul!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasReal} =
-            BLAS.trmm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
-
-        # Left division
-        ldiv!(A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-            LAPACK.trtrs!($uploc, 'T', $isunitc, parent(parent(A)), B)
-        ldiv!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-            LAPACK.trtrs!($uploc, 'T', $isunitc, parent(parent(A)), B)
-        ldiv!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-            LAPACK.trtrs!($uploc, 'C', $isunitc, parent(parent(A)), B)
-
-        # Right division
-        rdiv!(A::StridedMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-            BLAS.trsm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
-        rdiv!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasReal} =
-            BLAS.trsm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
-        rdiv!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasComplex} =
-            BLAS.trsm!('R', $uploc, 'C', $isunitc, one(T), parent(parent(B)), A)
-    end
-end
-
-function inv(A::LowerTriangular{T}) where T
-    S = typeof((zero(T)*one(T) + zero(T))/one(T))
-    LowerTriangular(ldiv!(convert(AbstractArray{S}, A), Matrix{S}(I, size(A, 1), size(A, 1))))
-end
-function inv(A::UpperTriangular{T}) where T
-    S = typeof((zero(T)*one(T) + zero(T))/one(T))
-    UpperTriangular(ldiv!(convert(AbstractArray{S}, A), Matrix{S}(I, size(A, 1), size(A, 1))))
-end
-inv(A::UnitUpperTriangular{T}) where {T} = UnitUpperTriangular(ldiv!(A, Matrix{T}(I, size(A, 1), size(A, 1))))
-inv(A::UnitLowerTriangular{T}) where {T} = UnitLowerTriangular(ldiv!(A, Matrix{T}(I, size(A, 1), size(A, 1))))
-
-errorbounds(A::AbstractTriangular{T,<:StridedMatrix}, X::StridedVecOrMat{T}, B::StridedVecOrMat{T}) where {T<:Union{BigFloat,Complex{BigFloat}}} =
+# multiplication
+generic_trimatmul!(c::StridedVector{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, b::AbstractVector{T}) where {T<:BlasFloat} =
+    BLAS.trmv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, c === b ? c : copyto!(c, b))
+generic_trimatmul!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, B::AbstractMatrix{T}) where {T<:BlasFloat} =
+    BLAS.trmm!('L', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, C === B ? C : copyto!(C, B))
+generic_mattrimul!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::AbstractMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} =
+    BLAS.trmm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copyto!(C, A))
+# division
+generic_trimatdiv!(C::StridedVecOrMat{T}, uploc, isunitc, tfun::Function, A::StridedMatrix{T}, B::AbstractVecOrMat{T}) where {T<:BlasFloat} =
+    LAPACK.trtrs!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, C === B ? C : copyto!(C, B))
+generic_mattridiv!(C::StridedMatrix{T}, uploc, isunitc, tfun::Function, A::AbstractMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} =
+    BLAS.trsm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copyto!(C, A))
+
+errorbounds(A::AbstractTriangular{T}, X::AbstractVecOrMat{T}, B::AbstractVecOrMat{T}) where {T<:Union{BigFloat,Complex{BigFloat}}} =
     error("not implemented yet! Please submit a pull request.")
-function errorbounds(A::AbstractTriangular{TA,<:StridedMatrix}, X::StridedVecOrMat{TX}, B::StridedVecOrMat{TB}) where {TA<:Number,TX<:Number,TB<:Number}
+function errorbounds(A::AbstractTriangular{TA}, X::AbstractVecOrMat{TX}, B::AbstractVecOrMat{TB}) where {TA<:Number,TX<:Number,TB<:Number}
     TAXB = promote_type(TA, TB, TX, Float32)
     errorbounds(convert(AbstractMatrix{TAXB}, A), convert(AbstractArray{TAXB}, X), convert(AbstractArray{TAXB}, B))
 end
@@ -893,614 +963,539 @@ for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
 end
 
 ## Generic triangular multiplication
-function lmul!(A::UpperTriangular, B::StridedVecOrMat)
+function generic_trimatmul!(C::AbstractVecOrMat, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractVecOrMat)
+    require_one_based_indexing(C, A, B)
     m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
+    N = size(A, 1)
+    if m != N
         throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
     end
-    for j = 1:n
-        for i = 1:m
-            Bij = A.data[i,i]*B[i,j]
-            for k = i + 1:m
-                Bij += A.data[i,k]*B[k,j]
+    mc, nc = size(C, 1), size(C, 2)
+    if mc != N || nc != n
+        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
+    end
+    oA = oneunit(eltype(A))
+    unit = isunitc == 'U'
+    @inbounds if uploc == 'U'
+        if tfun === identity
+            for j in 1:n
+                for i in 1:m
+                    Cij = (unit ? oA : A[i,i]) * B[i,j]
+                    for k in i + 1:m
+                        Cij += A[i,k] * B[k,j]
+                    end
+                    C[i,j] = Cij
+                end
+            end
+        else # tfun in (transpose, adjoint)
+            for j in 1:n
+                for i in m:-1:1
+                    Cij = (unit ? oA : tfun(A[i,i])) * B[i,j]
+                    for k in 1:i - 1
+                        Cij += tfun(A[k,i]) * B[k,j]
+                    end
+                    C[i,j] = Cij
+                end
             end
-            B[i,j] = Bij
         end
-    end
-    B
-end
-
-function lmul!(A::UnitUpperTriangular, B::StridedVecOrMat)
-    m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    for j = 1:n
-        for i = 1:m
-            Bij = B[i,j]
-            for k = i + 1:m
-                Bij += A.data[i,k]*B[k,j]
+    else # uploc == 'L'
+        if tfun === identity
+            for j in 1:n
+                for i in m:-1:1
+                    Cij = (unit ? oA : A[i,i]) * B[i,j]
+                    for k in 1:i - 1
+                        Cij += A[i,k] * B[k,j]
+                    end
+                    C[i,j] = Cij
+                end
+            end
+        else # tfun in (transpose, adjoint)
+            for j in 1:n
+                for i in 1:m
+                    Cij = (unit ? oA : tfun(A[i,i])) * B[i,j]
+                    for k in i + 1:m
+                        Cij += tfun(A[k,i]) * B[k,j]
+                    end
+                    C[i,j] = Cij
+                end
             end
-            B[i,j] = Bij
         end
     end
-    B
+    return C
 end
-
-function lmul!(A::LowerTriangular, B::StridedVecOrMat)
+# conjugate cases
+function generic_trimatmul!(C::AbstractVecOrMat, uploc, isunitc, ::Function, xA::AdjOrTrans, B::AbstractVecOrMat)
+    A = parent(xA)
+    require_one_based_indexing(C, A, B)
     m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
+    N = size(A, 1)
+    if m != N
         throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
     end
-    for j = 1:n
-        for i = m:-1:1
-            Bij = A.data[i,i]*B[i,j]
-            for k = 1:i - 1
-                Bij += A.data[i,k]*B[k,j]
+    mc, nc = size(C, 1), size(C, 2)
+    if mc != N || nc != n
+        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($N,$n)"))
+    end
+    oA = oneunit(eltype(A))
+    unit = isunitc == 'U'
+    @inbounds if uploc == 'U'
+        for j in 1:n
+            for i in 1:m
+                Cij = (unit ? oA : conj(A[i,i])) * B[i,j]
+                for k in i + 1:m
+                    Cij += conj(A[i,k]) * B[k,j]
+                end
+                C[i,j] = Cij
             end
-            B[i,j] = Bij
         end
-    end
-    B
-end
-function lmul!(A::UnitLowerTriangular, B::StridedVecOrMat)
-    m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    for j = 1:n
-        for i = m:-1:1
-            Bij = B[i,j]
-            for k = 1:i - 1
-                Bij += A.data[i,k]*B[k,j]
+    else # uploc == 'L'
+        for j in 1:n
+            for i in m:-1:1
+                Cij = (unit ? oA : conj(A[i,i])) * B[i,j]
+                for k in 1:i - 1
+                    Cij += conj(A[i,k]) * B[k,j]
+                end
+                C[i,j] = Cij
             end
-            B[i,j] = Bij
         end
     end
-    B
+    return C
 end
 
-for (t, tfun) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
-    @eval begin
-        function lmul!(xA::UpperTriangular{<:Any,<:$t}, B::StridedVecOrMat)
-            A = xA.data
-            m, n = size(B, 1), size(B, 2)
-            if m != size(A, 1)
-                throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-            end
-            pA = parent(A)
-            for j = 1:n
-                for i = 1:m
-                    Bij = $tfun(pA[i,i])*B[i,j]
-                    for k = i + 1:m
-                        Bij += $tfun(pA[k,i])*B[k,j]
+function generic_mattrimul!(C::AbstractMatrix, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractMatrix)
+    require_one_based_indexing(C, A, B)
+    m, n = size(A, 1), size(A, 2)
+    N = size(B, 1)
+    if n != N
+        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
+    end
+    mc, nc = size(C, 1), size(C, 2)
+    if mc != m || nc != N
+        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
+    end
+    oB = oneunit(eltype(B))
+    unit = isunitc == 'U'
+    @inbounds if uploc == 'U'
+        if tfun === identity
+            for i in 1:m
+                for j in n:-1:1
+                    Cij = A[i,j] * (unit ? oB : B[j,j])
+                    for k in 1:j - 1
+                        Cij += A[i,k] * B[k,j]
                     end
-                    B[i,j] = Bij
+                    C[i,j] = Cij
                 end
             end
-            B
-        end
-
-        function lmul!(xA::UnitUpperTriangular{<:Any,<:$t}, B::StridedVecOrMat)
-            A = xA.data
-            m, n = size(B, 1), size(B, 2)
-            if m != size(A, 1)
-                throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-            end
-            pA = parent(A)
-            for j = 1:n
-                for i = 1:m
-                    Bij = B[i,j]
-                    for k = i + 1:m
-                        Bij += $tfun(pA[k,i])*B[k,j]
+        else # tfun in (transpose, adjoint)
+            for i in 1:m
+                for j in 1:n
+                    Cij = A[i,j] * (unit ? oB : tfun(B[j,j]))
+                    for k in j + 1:n
+                        Cij += A[i,k] * tfun(B[j,k])
                     end
-                    B[i,j] = Bij
+                    C[i,j] = Cij
                 end
             end
-            B
         end
-
-        function lmul!(xA::LowerTriangular{<:Any,<:$t}, B::StridedVecOrMat)
-            A = xA.data
-            m, n = size(B, 1), size(B, 2)
-            if m != size(A, 1)
-                throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-            end
-            pA = parent(A)
-            for j = 1:n
-                for i = m:-1:1
-                    Bij = $tfun(pA[i,i])*B[i,j]
-                    for k = 1:i - 1
-                        Bij += $tfun(pA[k,i])*B[k,j]
+    else # uploc == 'L'
+        if tfun === identity
+            for i in 1:m
+                for j in 1:n
+                    Cij = A[i,j] * (unit ? oB : B[j,j])
+                    for k in j + 1:n
+                        Cij += A[i,k] * B[k,j]
                     end
-                    B[i,j] = Bij
+                    C[i,j] = Cij
                 end
             end
-            B
-        end
-        function lmul!(xA::UnitLowerTriangular{<:Any,<:$t}, B::StridedVecOrMat)
-            A = xA.data
-            m, n = size(B, 1), size(B, 2)
-            if m != size(A, 1)
-                throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-            end
-            pA = parent(A)
-            for j = 1:n
-                for i = m:-1:1
-                    Bij = B[i,j]
-                    for k = 1:i - 1
-                        Bij += $tfun(pA[k,i])*B[k,j]
+        else # tfun in (transpose, adjoint)
+            for i in 1:m
+                for j in n:-1:1
+                    Cij = A[i,j] * (unit ? oB : tfun(B[j,j]))
+                    for k in 1:j - 1
+                        Cij += A[i,k] * tfun(B[j,k])
                     end
-                    B[i,j] = Bij
+                    C[i,j] = Cij
                 end
             end
-            B
         end
     end
+    return C
 end
-
-function rmul!(A::StridedMatrix, B::UpperTriangular)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]*B[j,j]
-            for k = 1:j - 1
-                Aij += A[i,k]*B.data[k,j]
+# conjugate cases
+function generic_mattrimul!(C::AbstractMatrix, uploc, isunitc, ::Function, A::AbstractMatrix, xB::AdjOrTrans)
+    B = parent(xB)
+    require_one_based_indexing(C, A, B)
+    m, n = size(A, 1), size(A, 2)
+    N = size(B, 1)
+    if n != N
+        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $N"))
+    end
+    mc, nc = size(C, 1), size(C, 2)
+    if mc != m || nc != N
+        throw(DimensionMismatch("output has dimensions ($mc,$nc), should have ($m,$N)"))
+    end
+    oB = oneunit(eltype(B))
+    unit = isunitc == 'U'
+    @inbounds if uploc == 'U'
+        for i in 1:m
+            for j in n:-1:1
+                Cij = A[i,j] * (unit ? oB : conj(B[j,j]))
+                for k in 1:j - 1
+                    Cij += A[i,k] * conj(B[k,j])
+                end
+                C[i,j] = Cij
             end
-            A[i,j] = Aij
         end
-    end
-    A
-end
-function rmul!(A::StridedMatrix, B::UnitUpperTriangular)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]
-            for k = 1:j - 1
-                Aij += A[i,k]*B.data[k,j]
+    else # uploc == 'L'
+        for i in 1:m
+            for j in 1:n
+                Cij = A[i,j] * (unit ? oB : conj(B[j,j]))
+                for k in j + 1:n
+                    Cij += A[i,k] * conj(B[k,j])
+                end
+                C[i,j] = Cij
             end
-            A[i,j] = Aij
         end
     end
-    A
+    return C
 end
 
-function rmul!(A::StridedMatrix, B::LowerTriangular)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]*B[j,j]
-            for k = j + 1:n
-                Aij += A[i,k]*B.data[k,j]
-            end
-            A[i,j] = Aij
-        end
-    end
-    A
-end
-function rmul!(A::StridedMatrix, B::UnitLowerTriangular)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]
-            for k = j + 1:n
-                Aij += A[i,k]*B.data[k,j]
-            end
-            A[i,j] = Aij
-        end
-    end
-    A
-end
+#Generic solver using naive substitution
 
-for (t, tfun) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
-    @eval begin
-        function rmul!(A::StridedMatrix, B::UpperTriangular{<:Any,<:$t})
-            m, n = size(A)
-            if size(B, 1) != n
-                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-            end
-            pB = parent(parent(B))
-            for i = 1:m
-                for j = n:-1:1
-                    Aij = A[i,j]*$tfun(pB[j,j])
-                    for k = 1:j - 1
-                        Aij += A[i,k]*$tfun(pB[j,k])
+@inline _ustrip(a) = oneunit(a) \ a
+@inline _ustrip(a::Union{AbstractFloat,Integer,Complex,Rational}) = a
+
+# manually hoisting b[j] significantly improves performance as of Dec 2015
+# manually eliding bounds checking significantly improves performance as of Dec 2015
+# replacing repeated references to A.data[j,j] with [Ajj = A.data[j,j] and references to Ajj]
+# does not significantly impact performance as of Dec 2015
+# in the transpose and conjugate transpose naive substitution variants,
+# accumulating in z rather than b[j,k] significantly improves performance as of Dec 2015
+function generic_trimatdiv!(C::AbstractVecOrMat, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractVecOrMat)
+    require_one_based_indexing(C, A, B)
+    mA, nA = size(A)
+    m, n = size(B, 1), size(B,2)
+    if nA != m
+        throw(DimensionMismatch("second dimension of left hand side A, $nA, and first dimension of right hand side B, $m, must be equal"))
+    end
+    if size(C) != size(B)
+        throw(DimensionMismatch("size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
+    end
+    oA = oneunit(eltype(A))
+    @inbounds if uploc == 'U'
+        if isunitc == 'N'
+            if tfun === identity
+                for k in 1:n
+                    amm = A[m,m]
+                    iszero(amm) && throw(SingularException(m))
+                    Cm = C[m,k] = amm \ B[m,k]
+                    # fill C-column
+                    for i in m-1:-1:1
+                        C[i,k] = oA \ B[i,k] - _ustrip(A[i,m]) * Cm
+                    end
+                    for j in m-1:-1:1
+                        ajj = A[j,j]
+                        iszero(ajj) && throw(SingularException(j))
+                        Cj = C[j,k] = _ustrip(ajj) \ C[j,k]
+                        for i in j-1:-1:1
+                            C[i,k] -= _ustrip(A[i,j]) * Cj
+                        end
                     end
-                    A[i,j] = Aij
                 end
-            end
-            A
-        end
-
-        function rmul!(A::StridedMatrix, B::UnitUpperTriangular{<:Any,<:$t})
-            m, n = size(A)
-            if size(B, 1) != n
-                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-            end
-            pB = parent(parent(B))
-            for i = 1:m
-                for j = n:-1:1
-                    Aij = A[i,j]
-                    for k = 1:j - 1
-                        Aij += A[i,k]*$tfun(pB[j,k])
+            else # tfun in (adjoint, transpose)
+                for k in 1:n
+                    for j in 1:m
+                        ajj = A[j,j]
+                        iszero(ajj) && throw(SingularException(j))
+                        Bj = B[j,k]
+                        for i in 1:j-1
+                            Bj -= tfun(A[i,j]) * C[i,k]
+                        end
+                        C[j,k] = tfun(ajj) \ Bj
                     end
-                    A[i,j] = Aij
                 end
             end
-            A
-        end
-
-        function rmul!(A::StridedMatrix, B::LowerTriangular{<:Any,<:$t})
-            m, n = size(A)
-            if size(B, 1) != n
-                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-            end
-            pB = parent(parent(B))
-            for i = 1:m
-                for j = 1:n
-                    Aij = A[i,j]*$tfun(pB[j,j])
-                    for k = j + 1:n
-                        Aij += A[i,k]*$tfun(pB[j,k])
+        else # isunitc == 'U'
+            if tfun === identity
+                for k in 1:n
+                    Cm = C[m,k] = oA \ B[m,k]
+                    # fill C-column
+                    for i in m-1:-1:1
+                        C[i,k] = oA \ B[i,k] - _ustrip(A[i,m]) * Cm
+                    end
+                    for j in m-1:-1:1
+                        Cj = C[j,k]
+                        for i in 1:j-1
+                            C[i,k] -= _ustrip(A[i,j]) * Cj
+                        end
+                    end
+                end
+            else # tfun in (adjoint, transpose)
+                for k in 1:n
+                    for j in 1:m
+                        Bj = B[j,k]
+                        for i in 1:j-1
+                            Bj -= tfun(A[i,j]) * C[i,k]
+                        end
+                        C[j,k] = oA \ Bj
                     end
-                    A[i,j] = Aij
                 end
             end
-            A
         end
-
-        function rmul!(A::StridedMatrix, B::UnitLowerTriangular{<:Any,<:$t})
-            m, n = size(A)
-            if size(B, 1) != n
-                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
+    else # uploc == 'L'
+        if isunitc == 'N'
+            if tfun === identity
+                for k in 1:n
+                    a11 = A[1,1]
+                    iszero(a11) && throw(SingularException(1))
+                    C1 = C[1,k] = a11 \ B[1,k]
+                    # fill C-column
+                    for i in 2:m
+                        C[i,k] = oA \ B[i,k] - _ustrip(A[i,1]) * C1
+                    end
+                    for j in 2:m
+                        ajj = A[j,j]
+                        iszero(ajj) && throw(SingularException(j))
+                        Cj = C[j,k] = _ustrip(ajj) \ C[j,k]
+                        for i in j+1:m
+                            C[i,k] -= _ustrip(A[i,j]) * Cj
+                        end
+                    end
+                end
+            else # tfun in (adjoint, transpose)
+                for k in 1:n
+                    for j in m:-1:1
+                        ajj = A[j,j]
+                        iszero(ajj) && throw(SingularException(j))
+                        Bj = B[j,k]
+                        for i in j+1:m
+                            Bj -= tfun(A[i,j]) * C[i,k]
+                        end
+                        C[j,k] = tfun(ajj) \ Bj
+                    end
+                end
             end
-            pB = parent(parent(B))
-            for i = 1:m
-                for j = 1:n
-                    Aij = A[i,j]
-                    for k = j + 1:n
-                        Aij += A[i,k]*$tfun(pB[j,k])
+        else # isunitc == 'U'
+            if tfun === identity
+                for k in 1:n
+                    C1 = C[1,k] = oA \ B[1,k]
+                    # fill C-column
+                    for i in 2:m
+                        C[i,k] = oA \ B[i,k] - _ustrip(A[i,1]) * C1
+                    end
+                    for j in 2:m
+                        Cj = C[j,k]
+                        for i in j+1:m
+                            C[i,k] -= _ustrip(A[i,j]) * Cj
+                        end
+                    end
+                end
+            else # tfun in (adjoint, transpose)
+                for k in 1:n
+                    for j in m:-1:1
+                        Bj = B[j,k]
+                        for i in j+1:m
+                            Bj -= tfun(A[i,j]) * C[i,k]
+                        end
+                        C[j,k] = oA \ Bj
                     end
-                    A[i,j] = Aij
                 end
             end
-            A
-        end
-    end
-end
-
-#Generic solver using naive substitution
-# manually hoisting b[j] significantly improves performance as of Dec 2015
-# manually eliding bounds checking significantly improves performance as of Dec 2015
-# directly indexing A.data rather than A significantly improves performance as of Dec 2015
-# replacing repeated references to A.data with [Adata = A.data and references to Adata]
-# does not significantly impact performance as of Dec 2015
-# replacing repeated references to A.data[j,j] with [Ajj = A.data[j,j] and references to Ajj]
-# does not significantly impact performance as of Dec 2015
-function ldiv!(A::UpperTriangular, b::AbstractVector)
-    require_one_based_indexing(A, b)
-    n = size(A, 2)
-    if !(n == length(b))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-    end
-    @inbounds for j in n:-1:1
-        iszero(A.data[j,j]) && throw(SingularException(j))
-        bj = b[j] = A.data[j,j] \ b[j]
-        for i in j-1:-1:1 # counterintuitively 1:j-1 performs slightly better
-            b[i] -= A.data[i,j] * bj
-        end
-    end
-    return b
-end
-function ldiv!(A::UnitUpperTriangular, b::AbstractVector)
-    require_one_based_indexing(A, b)
-    n = size(A, 2)
-    if !(n == length(b))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-    end
-    @inbounds for j in n:-1:1
-        bj = b[j]
-        for i in j-1:-1:1 # counterintuitively 1:j-1 performs slightly better
-            b[i] -= A.data[i,j] * bj
-        end
-    end
-    return b
-end
-function ldiv!(A::LowerTriangular, b::AbstractVector)
-    require_one_based_indexing(A, b)
-    n = size(A, 2)
-    if !(n == length(b))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-    end
-    @inbounds for j in 1:n
-        iszero(A.data[j,j]) && throw(SingularException(j))
-        bj = b[j] = A.data[j,j] \ b[j]
-        for i in j+1:n
-            b[i] -= A.data[i,j] * bj
         end
     end
-    return b
-end
-function ldiv!(A::UnitLowerTriangular, b::AbstractVector)
-    require_one_based_indexing(A, b)
-    n = size(A, 2)
-    if !(n == length(b))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-    end
-    @inbounds for j in 1:n
-        bj = b[j]
-        for i in j+1:n
-            b[i] -= A.data[i,j] * bj
-        end
-    end
-    return b
-end
-function ldiv!(A::AbstractTriangular, B::AbstractMatrix)
-    require_one_based_indexing(A, B)
-    nA, mA = size(A)
-    n = size(B, 1)
-    if nA != n
-        throw(DimensionMismatch("second dimension of left hand side A, $mA, and first dimension of right hand side B, $n, must be equal"))
-    end
-    for b in eachcol(B)
-        ldiv!(A, b)
-    end
-    B
+    return C
 end
-
-# in the following transpose and conjugate transpose naive substitution variants,
-# accumulating in z rather than b[j,k] significantly improves performance as of Dec 2015
-for (t, tfun) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
-    @eval begin
-        function ldiv!(xA::UpperTriangular{<:Any,<:$t}, b::AbstractVector)
-            require_one_based_indexing(xA, b)
-            A = parent(parent(xA))
-            n = size(A, 1)
-            if !(n == length(b))
-                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-            end
-            @inbounds for j in n:-1:1
-                z = b[j]
-                for i in n:-1:j+1
-                    z -= $tfun(A[i,j]) * b[i]
+# conjugate cases
+function generic_trimatdiv!(C::AbstractVecOrMat, uploc, isunitc, ::Function, xA::AdjOrTrans, B::AbstractVecOrMat)
+    A = parent(xA)
+    require_one_based_indexing(C, A, B)
+    mA, nA = size(A)
+    m, n = size(B, 1), size(B,2)
+    if nA != m
+        throw(DimensionMismatch("second dimension of left hand side A, $nA, and first dimension of right hand side B, $m, must be equal"))
+    end
+    if size(C) != size(B)
+        throw(DimensionMismatch("size of output, $(size(C)), does not match size of right hand side, $(size(B))"))
+    end
+    oA = oneunit(eltype(A))
+    @inbounds if uploc == 'U'
+        if isunitc == 'N'
+            for k in 1:n
+                amm = conj(A[m,m])
+                iszero(amm) && throw(SingularException(m))
+                Cm = C[m,k] = amm \ B[m,k]
+                # fill C-column
+                for i in m-1:-1:1
+                    C[i,k] = oA \ B[i,k] - _ustrip(conj(A[i,m])) * Cm
                 end
-                iszero(A[j,j]) && throw(SingularException(j))
-                b[j] = $tfun(A[j,j]) \ z
-            end
-            return b
-        end
-
-        function ldiv!(xA::UnitUpperTriangular{<:Any,<:$t}, b::AbstractVector)
-            require_one_based_indexing(xA, b)
-            A = parent(parent(xA))
-            n = size(A, 1)
-            if !(n == length(b))
-                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-            end
-            @inbounds for j in n:-1:1
-                z = b[j]
-                for i in n:-1:j+1
-                    z -= $tfun(A[i,j]) * b[i]
+                for j in m-1:-1:1
+                    ajj = conj(A[j,j])
+                    iszero(ajj) && throw(SingularException(j))
+                    Cj = C[j,k] = _ustrip(ajj) \ C[j,k]
+                    for i in j-1:-1:1
+                        C[i,k] -= _ustrip(conj(A[i,j])) * Cj
+                    end
                 end
-                b[j] = z
-            end
-            return b
-        end
-
-        function ldiv!(xA::LowerTriangular{<:Any,<:$t}, b::AbstractVector)
-            require_one_based_indexing(xA, b)
-            A = parent(parent(xA))
-            n = size(A, 1)
-            if !(n == length(b))
-                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
             end
-            @inbounds for j in 1:n
-                z = b[j]
-                for i in 1:j-1
-                    z -= $tfun(A[i,j]) * b[i]
+        else # isunitc == 'U'
+            for k in 1:n
+                Cm = C[m,k] = oA \ B[m,k]
+                # fill C-column
+                for i in m-1:-1:1
+                    C[i,k] = oA \ B[i,k] - _ustrip(conj(A[i,m])) * Cm
                 end
-                iszero(A[j,j]) && throw(SingularException(j))
-                b[j] = $tfun(A[j,j]) \ z
-            end
-            return b
-        end
-
-        function ldiv!(xA::UnitLowerTriangular{<:Any,<:$t}, b::AbstractVector)
-            require_one_based_indexing(xA, b)
-            A = parent(parent(xA))
-            n = size(A, 1)
-            if !(n == length(b))
-                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
-            end
-            @inbounds for j in 1:n
-                z = b[j]
-                for i in 1:j-1
-                    z -= $tfun(A[i,j]) * b[i]
+                for j in m-1:-1:1
+                    Cj = C[j,k]
+                    for i in 1:j-1
+                        C[i,k] -= _ustrip(conj(A[i,j])) * Cj
+                    end
                 end
-                b[j] = z
             end
-            return b
         end
-    end
-end
-
-function rdiv!(A::StridedMatrix, B::UpperTriangular)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]
-            for k = 1:j - 1
-                Aij -= A[i,k]*B.data[k,j]
+    else # uploc == 'L'
+        if isunitc == 'N'
+            for k in 1:n
+                a11 = conj(A[1,1])
+                iszero(a11) && throw(SingularException(1))
+                C1 = C[1,k] = a11 \ B[1,k]
+                # fill C-column
+                for i in 2:m
+                    C[i,k] = oA \ B[i,k] - _ustrip(conj(A[i,1])) * C1
+                end
+                for j in 2:m
+                    ajj = conj(A[j,j])
+                    iszero(ajj) && throw(SingularException(j))
+                    Cj = C[j,k] = _ustrip(ajj) \ C[j,k]
+                    for i in j+1:m
+                        C[i,k] -= _ustrip(conj(A[i,j])) * Cj
+                    end
+                end
             end
-            A[i,j] = Aij/B[j,j]
-        end
-    end
-    A
-end
-function rdiv!(A::StridedMatrix, B::UnitUpperTriangular)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]
-            for k = 1:j - 1
-                Aij -= A[i,k]*B.data[k,j]
+        else # isunitc == 'U'
+            for k in 1:n
+                C1 = C[1,k] = oA \ B[1,k]
+                # fill C-column
+                for i in 2:m
+                    C[i,k] = oA \ B[i,k] - _ustrip(conj(A[i,1])) * C1
+                end
+                for j in 1:m
+                    Cj = C[j,k]
+                    for i in j+1:m
+                        C[i,k] -= _ustrip(conj(A[i,j])) * Cj
+                    end
+                end
             end
-            A[i,j] = Aij
         end
     end
-    A
+    return C
 end
 
-function rdiv!(A::StridedMatrix, B::LowerTriangular)
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]
-            for k = j + 1:n
-                Aij -= A[i,k]*B.data[k,j]
-            end
-            A[i,j] = Aij/B[j,j]
-        end
-    end
-    A
-end
-function rdiv!(A::StridedMatrix, B::UnitLowerTriangular)
+function generic_mattridiv!(C::AbstractMatrix, uploc, isunitc, tfun::Function, A::AbstractMatrix, B::AbstractMatrix)
+    require_one_based_indexing(C, A, B)
     m, n = size(A)
     if size(B, 1) != n
         throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
     end
-    for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]
-            for k = j + 1:n
-                Aij -= A[i,k]*B.data[k,j]
-            end
-            A[i,j] = Aij
-        end
+    if size(C) != size(A)
+        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
     end
-    A
-end
-
-for (t, tfun) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
-    @eval begin
-        function rdiv!(A::StridedMatrix, xB::LowerTriangular{<:Any,<:$t})
-            B = parent(parent(xB))
-            m, n = size(A)
-            if size(B, 1) != n
-                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-            end
-            for i = 1:m
-                for j = n:-1:1
+    oB = oneunit(eltype(B))
+    unit = isunitc == 'U'
+    @inbounds if uploc == 'U'
+        if tfun === identity
+            for i in 1:m
+                for j in 1:n
                     Aij = A[i,j]
-                    for k = j + 1:n
-                        Aij -= A[i,k]*$tfun(B[j,k])
+                    for k in 1:j - 1
+                        Aij -= C[i,k]*B[k,j]
                     end
-                    A[i,j] = Aij/$tfun(B[j,j])
+                    unit || (iszero(B[j,j]) && throw(SingularException(j)))
+                    C[i,j] = Aij / (unit ? oB : B[j,j])
                 end
             end
-            A
-        end
-        function rdiv!(A::StridedMatrix, xB::UnitLowerTriangular{<:Any,<:$t})
-            B = parent(parent(xB))
-            m, n = size(A)
-            if size(B, 1) != n
-                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-            end
-            for i = 1:m
-                for j = n:-1:1
+        else # tfun in (adjoint, transpose)
+            for i in 1:m
+                for j in n:-1:1
                     Aij = A[i,j]
-                    for k = j + 1:n
-                        Aij -= A[i,k]*$tfun(B[j,k])
+                    for k in j + 1:n
+                        Aij -= C[i,k]*tfun(B[j,k])
                     end
-                    A[i,j] = Aij
+                    unit || (iszero(B[j,j]) && throw(SingularException(j)))
+                    C[i,j] = Aij / (unit ? oB : tfun(B[j,j]))
                 end
             end
-            A
         end
-
-        function rdiv!(A::StridedMatrix, xB::UpperTriangular{<:Any,<:$t})
-            B = parent(parent(xB))
-            m, n = size(A)
-            if size(B, 1) != n
-                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-            end
-            for i = 1:m
-                for j = 1:n
+    else # uploc == 'L'
+        if tfun === identity
+            for i in 1:m
+                for j in n:-1:1
                     Aij = A[i,j]
-                    for k = 1:j - 1
-                        Aij -= A[i,k]*$tfun(B[j,k])
+                    for k in j + 1:n
+                        Aij -= C[i,k]*B[k,j]
                     end
-                    A[i,j] = Aij/$tfun(B[j,j])
+                    unit || (iszero(B[j,j]) && throw(SingularException(j)))
+                    C[i,j] = Aij / (unit ? oB : B[j,j])
                 end
             end
-            A
-        end
-        function rdiv!(A::StridedMatrix, xB::UnitUpperTriangular{<:Any,<:$t})
-            B = parent(parent(xB))
-            m, n = size(A)
-            if size(B, 1) != n
-                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-            end
-            for i = 1:m
-                for j = 1:n
+        else # tfun in (adjoint, transpose)
+            for i in 1:m
+                for j in 1:n
                     Aij = A[i,j]
-                    for k = 1:j - 1
-                        Aij -= A[i,k]*$tfun(B[j,k])
+                    for k in 1:j - 1
+                        Aij -= C[i,k]*tfun(B[j,k])
                     end
-                    A[i,j] = Aij
+                    unit || (iszero(B[j,j]) && throw(SingularException(j)))
+                    C[i,j] = Aij / (unit ? oB : tfun(B[j,j]))
                 end
             end
-            A
         end
     end
+    return C
 end
-
-function lmul!(A::Union{UpperTriangular,UnitUpperTriangular}, B::UpperTriangular)
-    UpperTriangular(lmul!(A, triu!(B.data)))
-end
-function lmul!(A::Union{LowerTriangular,UnitLowerTriangular}, B::LowerTriangular)
-    return LowerTriangular(lmul!(A, tril!(B.data)))
-end
-function ldiv!(xA::Union{UpperTriangular,UnitUpperTriangular}, B::UpperTriangular)
-    return UpperTriangular(ldiv!(xA, triu!(B.data)))
-end
-function ldiv!(xA::Union{LowerTriangular,UnitLowerTriangular}, B::LowerTriangular)
-    return LowerTriangular(ldiv!(xA, tril!(B.data)))
+function generic_mattridiv!(C::AbstractMatrix, uploc, isunitc, ::Function, A::AbstractMatrix, xB::AdjOrTrans)
+    B = parent(xB)
+    require_one_based_indexing(C, A, B)
+    m, n = size(A)
+    if size(B, 1) != n
+        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
+    end
+    if size(C) != size(A)
+        throw(DimensionMismatch("size of output, $(size(C)), does not match size of left hand side, $(size(A))"))
+    end
+    oB = oneunit(eltype(B))
+    unit = isunitc == 'U'
+    if uploc == 'U'
+        @inbounds for i in 1:m
+            for j in 1:n
+                Aij = A[i,j]
+                for k in 1:j - 1
+                    Aij -= C[i,k]*conj(B[k,j])
+                end
+                unit || (iszero(B[j,j]) && throw(SingularException(j)))
+                C[i,j] = Aij / (unit ? oB : conj(B[j,j]))
+            end
+        end
+    else # uploc == 'L'
+        @inbounds for i in 1:m
+            for j in n:-1:1
+                Aij = A[i,j]
+                for k in j + 1:n
+                    Aij -= C[i,k]*conj(B[k,j])
+                end
+                unit || (iszero(B[j,j]) && throw(SingularException(j)))
+                C[i,j] = Aij / (unit ? oB : conj(B[j,j]))
+            end
+        end
+    end
+    return C
 end
 
-function rdiv!(A::UpperTriangular, B::Union{UpperTriangular,UnitUpperTriangular})
-    return UpperTriangular(rdiv!(triu!(A.data), B))
-end
-function rdiv!(A::LowerTriangular, B::Union{LowerTriangular,UnitLowerTriangular})
-    return LowerTriangular(rdiv!(tril!(A.data), B))
-end
-function rmul!(A::UpperTriangular, B::Union{UpperTriangular,UnitUpperTriangular})
-    return UpperTriangular(rmul!(triu!(A.data), B))
-end
-function rmul!(A::LowerTriangular, B::Union{LowerTriangular,UnitLowerTriangular})
-    return LowerTriangular(rmul!(tril!(A.data), B))
-end
+# these are needed because we don't keep track of left- and right-multiplication in tritrimul!
+rmul!(A::UpperTriangular, B::UpperTriangular)     = UpperTriangular(rmul!(triu!(A.data), B))
+rmul!(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(rmul!(triu!(A.data), B))
+rmul!(A::LowerTriangular, B::LowerTriangular)     = LowerTriangular(rmul!(tril!(A.data), B))
+rmul!(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(rmul!(tril!(A.data), B))
 
 # Promotion
 ## Promotion methods in matmul don't apply to triangular multiplication since
@@ -1509,184 +1504,97 @@ end
 ## the element type doesn't have to be stable under division whereas that is
 ## necessary in the general triangular solve problem.
 
-## Some Triangular-Triangular cases. We might want to write tailored methods
-## for these cases, but I'm not sure it is worth it.
-
-for (f, f2!) in ((:*, :lmul!), (:\, :ldiv!))
-    @eval begin
-        function ($f)(A::LowerTriangular, B::LowerTriangular)
-            TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
-                         ($f)(zero(eltype(A)), zero(eltype(B))))
-            BB = copy_similar(B, TAB)
-            return LowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function $(f)(A::UnitLowerTriangular, B::LowerTriangular)
-            TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                         (*)(zero(eltype(A)), zero(eltype(B))))
-             BB = copy_similar(B, TAB)
-            return LowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function $(f)(A::LowerTriangular, B::UnitLowerTriangular)
-            TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
-                         ($f)(zero(eltype(A)), zero(eltype(B))))
-             BB = copy_similar(B, TAB)
-            return LowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function $(f)(A::UnitLowerTriangular, B::UnitLowerTriangular)
-            TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                         (*)(zero(eltype(A)), zero(eltype(B))))
-             BB = copy_similar(B, TAB)
-            return UnitLowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function ($f)(A::UpperTriangular, B::UpperTriangular)
-            TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
-                         ($f)(zero(eltype(A)), zero(eltype(B))))
-            BB = copy_similar(B, TAB)
-            return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function ($f)(A::UnitUpperTriangular, B::UpperTriangular)
-            TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                         (*)(zero(eltype(A)), zero(eltype(B))))
-            BB = copy_similar(B, TAB)
-            return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function ($f)(A::UpperTriangular, B::UnitUpperTriangular)
-            TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
-                         ($f)(zero(eltype(A)), zero(eltype(B))))
-            BB = copy_similar(B, TAB)
-            return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-
-        function ($f)(A::UnitUpperTriangular, B::UnitUpperTriangular)
-            TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                         (*)(zero(eltype(A)), zero(eltype(B))))
-            BB = copy_similar(B, TAB)
-            return UnitUpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
-        end
-    end
-end
-
-function (/)(A::LowerTriangular, B::LowerTriangular)
-    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
-                 (/)(zero(eltype(A)), one(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::UnitLowerTriangular, B::LowerTriangular)
-    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
-                 (/)(zero(eltype(A)), one(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::LowerTriangular, B::UnitLowerTriangular)
-    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
-                 (/)(zero(eltype(A)), one(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::UnitLowerTriangular, B::UnitLowerTriangular)
-    TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                 (*)(zero(eltype(A)), zero(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return UnitLowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::UpperTriangular, B::UpperTriangular)
-    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
-                 (/)(zero(eltype(A)), one(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return UpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::UnitUpperTriangular, B::UpperTriangular)
-    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
-                 (/)(zero(eltype(A)), one(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return UpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::UpperTriangular, B::UnitUpperTriangular)
-    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
-                 (/)(zero(eltype(A)), one(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return UpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-function (/)(A::UnitUpperTriangular, B::UnitUpperTriangular)
-    TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                 (*)(zero(eltype(A)), zero(eltype(B))))
-    AA = copy_similar(A, TAB)
-    return UnitUpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
-end
-
-_inner_type_promotion(A,B) = promote_type(eltype(A), eltype(B), typeof(zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B))))
+_inner_type_promotion(op, ::Type{TA}, ::Type{TB}) where {TA<:Integer,TB<:Integer} =
+    _init_eltype(*, TA, TB)
+_inner_type_promotion(op, ::Type{TA}, ::Type{TB}) where {TA,TB} =
+    _init_eltype(op, TA, TB)
 ## The general promotion methods
 function *(A::AbstractTriangular, B::AbstractTriangular)
-    TAB = _inner_type_promotion(A,B)
-    BB = copy_similar(B, TAB)
-    lmul!(convert(AbstractArray{TAB}, A), BB)
+    TAB = _init_eltype(*, eltype(A), eltype(B))
+    mul!(similar(B, TAB, size(B)), A, B)
 end
 
 for mat in (:AbstractVector, :AbstractMatrix)
     ### Multiplication with triangle to the left and hence rhs cannot be transposed.
     @eval function *(A::AbstractTriangular, B::$mat)
         require_one_based_indexing(B)
-        TAB = _inner_type_promotion(A,B)
-        BB = copy_similar(B, TAB)
-        lmul!(convert(AbstractArray{TAB}, A), BB)
+        TAB = _init_eltype(*, eltype(A), eltype(B))
+        mul!(similar(B, TAB, size(B)), A, B)
     end
     ### Left division with triangle to the left hence rhs cannot be transposed. No quotients.
     @eval function \(A::Union{UnitUpperTriangular,UnitLowerTriangular}, B::$mat)
         require_one_based_indexing(B)
-        TAB = _inner_type_promotion(A,B)
-        BB = copy_similar(B, TAB)
-        ldiv!(convert(AbstractArray{TAB}, A), BB)
+        TAB = _inner_type_promotion(\, eltype(A), eltype(B))
+        ldiv!(similar(B, TAB, size(B)), A, B)
     end
     ### Left division with triangle to the left hence rhs cannot be transposed. Quotients.
     @eval function \(A::Union{UpperTriangular,LowerTriangular}, B::$mat)
         require_one_based_indexing(B)
-        TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
-        BB = copy_similar(B, TAB)
-        ldiv!(convert(AbstractArray{TAB}, A), BB)
+        TAB = _init_eltype(\, eltype(A), eltype(B))
+        ldiv!(similar(B, TAB, size(B)), A, B)
     end
     ### Right division with triangle to the right hence lhs cannot be transposed. No quotients.
     @eval function /(A::$mat, B::Union{UnitUpperTriangular, UnitLowerTriangular})
         require_one_based_indexing(A)
-        TAB = _inner_type_promotion(A,B)
-        AA = copy_similar(A, TAB)
-        rdiv!(AA, convert(AbstractArray{TAB}, B))
+        TAB = _inner_type_promotion(/, eltype(A), eltype(B))
+        _rdiv!(similar(A, TAB, size(A)), A, B)
     end
     ### Right division with triangle to the right hence lhs cannot be transposed. Quotients.
     @eval function /(A::$mat, B::Union{UpperTriangular,LowerTriangular})
         require_one_based_indexing(A)
-        TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
-        AA = copy_similar(A, TAB)
-        rdiv!(AA, convert(AbstractArray{TAB}, B))
+        TAB = _init_eltype(/, eltype(A), eltype(B))
+        _rdiv!(similar(A, TAB, size(A)), A, B)
     end
 end
 ### Multiplication with triangle to the right and hence lhs cannot be transposed.
 # Only for AbstractMatrix, hence outside the above loop.
 function *(A::AbstractMatrix, B::AbstractTriangular)
     require_one_based_indexing(A)
-    TAB = _inner_type_promotion(A,B)
-    AA = copy_similar(A, TAB)
-    rmul!(AA, convert(AbstractArray{TAB}, B))
+    TAB = _init_eltype(*, eltype(A), eltype(B))
+    mul!(similar(A, TAB, size(A)), A, B)
 end
-# ambiguity resolution with definitions in linalg/rowvector.jl
+# ambiguity resolution with definitions in matmul.jl
 *(v::AdjointAbsVec, A::AbstractTriangular) = adjoint(adjoint(A) * v.parent)
 *(v::TransposeAbsVec, A::AbstractTriangular) = transpose(transpose(A) * v.parent)
 
-# If these are not defined, they will fallback to the versions in matmul.jl
-# and dispatch to generic_matmatmul! which is very costly to compile. The methods
-# below might compute an unnecessary copy. Eliminating the copy requires adding
-# all the promotion logic here once again. Since these methods are probably relatively
-# rare, we chose not to bother for now.
-*(A::Adjoint{<:Any,<:AbstractMatrix}, B::AbstractTriangular) = copy(A) * B
-*(A::Transpose{<:Any,<:AbstractMatrix}, B::AbstractTriangular) = copy(A) * B
-*(A::AbstractTriangular, B::Adjoint{<:Any,<:AbstractMatrix}) = A * copy(B)
-*(A::AbstractTriangular, B::Transpose{<:Any,<:AbstractMatrix}) = A * copy(B)
+## Some Triangular-Triangular cases. We might want to write tailored methods
+## for these cases, but I'm not sure it is worth it.
+for f in (:*, :\)
+    @eval begin
+        ($f)(A::LowerTriangular, B::LowerTriangular) =
+            LowerTriangular(@invoke $f(A::LowerTriangular, B::AbstractMatrix))
+        ($f)(A::LowerTriangular, B::UnitLowerTriangular) =
+            LowerTriangular(@invoke $f(A::LowerTriangular, B::AbstractMatrix))
+        ($f)(A::UnitLowerTriangular, B::LowerTriangular) =
+            LowerTriangular(@invoke $f(A::UnitLowerTriangular, B::AbstractMatrix))
+        ($f)(A::UnitLowerTriangular, B::UnitLowerTriangular) =
+            UnitLowerTriangular(@invoke $f(A::UnitLowerTriangular, B::AbstractMatrix))
+        ($f)(A::UpperTriangular, B::UpperTriangular) =
+            UpperTriangular(@invoke $f(A::UpperTriangular, B::AbstractMatrix))
+        ($f)(A::UpperTriangular, B::UnitUpperTriangular) =
+            UpperTriangular(@invoke $f(A::UpperTriangular, B::AbstractMatrix))
+        ($f)(A::UnitUpperTriangular, B::UpperTriangular) =
+            UpperTriangular(@invoke $f(A::UnitUpperTriangular, B::AbstractMatrix))
+        ($f)(A::UnitUpperTriangular, B::UnitUpperTriangular) =
+            UnitUpperTriangular(@invoke $f(A::UnitUpperTriangular, B::AbstractMatrix))
+    end
+end
+(/)(A::LowerTriangular, B::LowerTriangular) =
+    LowerTriangular(@invoke /(A::AbstractMatrix, B::LowerTriangular))
+(/)(A::LowerTriangular, B::UnitLowerTriangular) =
+    LowerTriangular(@invoke /(A::AbstractMatrix, B::UnitLowerTriangular))
+(/)(A::UnitLowerTriangular, B::LowerTriangular) =
+    LowerTriangular(@invoke /(A::AbstractMatrix, B::LowerTriangular))
+(/)(A::UnitLowerTriangular, B::UnitLowerTriangular) =
+    UnitLowerTriangular(@invoke /(A::AbstractMatrix, B::UnitLowerTriangular))
+(/)(A::UpperTriangular, B::UpperTriangular) =
+    UpperTriangular(@invoke /(A::AbstractMatrix, B::UpperTriangular))
+(/)(A::UpperTriangular, B::UnitUpperTriangular) =
+    UpperTriangular(@invoke /(A::AbstractMatrix, B::UnitUpperTriangular))
+(/)(A::UnitUpperTriangular, B::UpperTriangular) =
+    UpperTriangular(@invoke /(A::AbstractMatrix, B::UpperTriangular))
+(/)(A::UnitUpperTriangular, B::UnitUpperTriangular) =
+    UnitUpperTriangular(@invoke /(A::AbstractMatrix, B::UnitUpperTriangular))
 
 # Complex matrix power for upper triangular factor, see:
 #   Higham and Lin, "A Schur-Padé algorithm for fractional powers of a Matrix",
@@ -2304,7 +2212,7 @@ function sqrt(A::UnitUpperTriangular{T}) where T
     n = checksquare(B)
     t = typeof(sqrt(zero(T)))
     R = Matrix{t}(I, n, n)
-    tt = typeof(zero(t)*zero(t))
+    tt = typeof(oneunit(t)*oneunit(t))
     half = inv(R[1,1]+R[1,1]) # for general, algebraic cases. PR#20214
     @inbounds for j = 1:n
         for i = j-1:-1:1
@@ -2312,7 +2220,7 @@ function sqrt(A::UnitUpperTriangular{T}) where T
             @simd for k = i+1:j-1
                 r -= R[i,k]*R[k,j]
             end
-            r==0 || (R[i,j] = half*r)
+            iszero(r) || (R[i,j] = half*r)
         end
     end
     return UnitUpperTriangular(R)
@@ -2401,7 +2309,8 @@ function _sqrt_quasitriu_diag_block!(R, A)
             R[i, i] = sqrt(ta(A[i, i]))
             i += 1
         else
-            # this branch is never reached when A is complex triangular
+            # This branch is never reached when A is complex triangular
+            @assert eltype(A) <: Real
             @views _sqrt_real_2x2!(R[i:(i + 1), i:(i + 1)], A[i:(i + 1), i:(i + 1)])
             i += 2
         end
@@ -2658,10 +2567,6 @@ end
 
 factorize(A::AbstractTriangular) = A
 
-# disambiguation methods: *(AbstractTriangular, Adj/Trans of AbstractVector)
-*(A::AbstractTriangular, B::AdjointAbsVec) = adjoint(adjoint(B) * adjoint(A))
-*(A::AbstractTriangular, B::TransposeAbsVec) = transpose(transpose(B) * transpose(A))
-
 # disambiguation methods: /(Adjoint of AbsVec, <:AbstractTriangular)
 /(u::AdjointAbsVec, A::Union{LowerTriangular,UpperTriangular}) = adjoint(adjoint(A) \ u.parent)
 /(u::AdjointAbsVec, A::Union{UnitLowerTriangular,UnitUpperTriangular}) = adjoint(adjoint(A) \ u.parent)
@@ -2676,3 +2581,94 @@ for (tritype, comptritype) in ((:LowerTriangular, :UpperTriangular),
     @eval /(u::TransposeAbsVec, A::$tritype{<:Any,<:Adjoint}) = transpose($comptritype(conj(parent(parent(A)))) \ u.parent)
     @eval /(u::TransposeAbsVec, A::$tritype{<:Any,<:Transpose}) = transpose(transpose(A) \ u.parent)
 end
+
+# Cube root of a 2x2 real-valued matrix with complex conjugate eigenvalues and equal diagonal values.
+# Reference [1]: Smith, M. I. (2003). A Schur Algorithm for Computing Matrix pth Roots.
+#   SIAM Journal on Matrix Analysis and Applications (Vol. 24, Issue 4, pp. 971–989).
+#   https://doi.org/10.1137/s0895479801392697
+function _cbrt_2x2!(A::AbstractMatrix{T}) where {T<:Real}
+    @assert checksquare(A) == 2
+    @inbounds begin
+        (A[1,1] == A[2,2]) || throw(ArgumentError("_cbrt_2x2!: Matrix A must have equal diagonal values."))
+        (A[1,2]*A[2,1] < 0) || throw(ArgumentError("_cbrt_2x2!: Matrix A must have complex conjugate eigenvalues."))
+        μ = sqrt(-A[1,2]*A[2,1])
+        r = cbrt(hypot(A[1,1], μ))
+        θ = atan(μ, A[1,1])
+        s, c = sincos(θ/3)
+        α, β′ = r*c, r*s/µ
+        A[1,1] = α
+        A[2,2] = α
+        A[1,2] = β′*A[1,2]
+        A[2,1] = β′*A[2,1]
+    end
+    return A
+end
+
+# Cube root of a quasi upper triangular matrix (output of Schur decomposition)
+# Reference [1]: Smith, M. I. (2003). A Schur Algorithm for Computing Matrix pth Roots.
+#   SIAM Journal on Matrix Analysis and Applications (Vol. 24, Issue 4, pp. 971–989).
+#   https://doi.org/10.1137/s0895479801392697
+@views function _cbrt_quasi_triu!(A::AbstractMatrix{T}) where {T<:Real}
+    m, n = size(A)
+    (m == n) || throw(ArgumentError("_cbrt_quasi_triu!: Matrix A must be square."))
+    # Cube roots of 1x1 and 2x2 diagonal blocks
+    i = 1
+    sizes = ones(Int,n)
+    S = zeros(T,2,n)
+    while i < n
+        if !iszero(A[i+1,i])
+            _cbrt_2x2!(A[i:i+1,i:i+1])
+            mul!(S[1:2,i:i+1], A[i:i+1,i:i+1], A[i:i+1,i:i+1])
+            sizes[i] = 2
+            sizes[i+1] = 0
+            i += 2
+        else
+            A[i,i] = cbrt(A[i,i])
+            S[1,i] = A[i,i]*A[i,i]
+            i += 1
+        end
+    end
+    if i == n
+        A[n,n] = cbrt(A[n,n])
+        S[1,n] = A[n,n]*A[n,n]
+    end
+    # Algorithm 4.3 in Reference [1]
+    Δ = I(4)
+    M_L₀ = zeros(T,4,4)
+    M_L₁ = zeros(T,4,4)
+    M_Bᵢⱼ⁽⁰⁾ = zeros(T,2,2)
+    M_Bᵢⱼ⁽¹⁾ = zeros(T,2,2)
+    for k = 1:n-1
+        for i = 1:n-k
+            if sizes[i] == 0 || sizes[i+k] == 0 continue end
+            k₁, k₂ = i+1+(sizes[i+1]==0), i+k-1
+            i₁, i₂, j₁, j₂, s₁, s₂ = i, i+sizes[i]-1, i+k, i+k+sizes[i+k]-1, sizes[i], sizes[i+k]
+            L₀ = M_L₀[1:s₁*s₂,1:s₁*s₂]
+            L₁ = M_L₁[1:s₁*s₂,1:s₁*s₂]
+            Bᵢⱼ⁽⁰⁾ = M_Bᵢⱼ⁽⁰⁾[1:s₁, 1:s₂]
+            Bᵢⱼ⁽¹⁾ = M_Bᵢⱼ⁽¹⁾[1:s₁, 1:s₂]
+            # Compute Bᵢⱼ⁽⁰⁾ and Bᵢⱼ⁽¹⁾
+            mul!(Bᵢⱼ⁽⁰⁾, A[i₁:i₂,k₁:k₂], A[k₁:k₂,j₁:j₂])
+            # Retreive Rᵢ,ᵢ₊ₖ as A[i+k,i]'
+            mul!(Bᵢⱼ⁽¹⁾, A[i₁:i₂,k₁:k₂], A[j₁:j₂,k₁:k₂]')
+            # Solve Uᵢ,ᵢ₊ₖ using Reference [1, (4.10)]
+            kron!(L₀, Δ[1:s₂,1:s₂], S[1:s₁,i₁:i₂])
+            L₀ .+= kron!(L₁, A[j₁:j₂,j₁:j₂]', A[i₁:i₂,i₁:i₂])
+            L₀ .+= kron!(L₁, S[1:s₂,j₁:j₂]', Δ[1:s₁,1:s₁])
+            mul!(A[i₁:i₂,j₁:j₂], A[i₁:i₂,i₁:i₂], Bᵢⱼ⁽⁰⁾, -1.0, 1.0)
+            A[i₁:i₂,j₁:j₂] .-= Bᵢⱼ⁽¹⁾
+            ldiv!(lu!(L₀), A[i₁:i₂,j₁:j₂][:])
+            # Compute and store Rᵢ,ᵢ₊ₖ' in A[i+k,i]
+            mul!(Bᵢⱼ⁽⁰⁾, A[i₁:i₂,i₁:i₂], A[i₁:i₂,j₁:j₂], 1.0, 1.0)
+            mul!(Bᵢⱼ⁽⁰⁾, A[i₁:i₂,j₁:j₂], A[j₁:j₂,j₁:j₂], 1.0, 1.0)
+            A[j₁:j₂,i₁:i₂] .= Bᵢⱼ⁽⁰⁾'
+        end
+    end
+    # Make quasi triangular
+    for j=1:n for i=j+1+(sizes[j]==2):n A[i,j] = 0 end end
+    return A
+end
+
+# Cube roots of real-valued triangular matrices
+cbrt(A::UpperTriangular{T}) where {T<:Real} = UpperTriangular(_cbrt_quasi_triu!(Matrix{T}(A)))
+cbrt(A::LowerTriangular{T}) where {T<:Real} = LowerTriangular(_cbrt_quasi_triu!(Matrix{T}(A'))')
diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl
index a686ab4421954..da15e1680f53a 100644
--- a/stdlib/LinearAlgebra/src/tridiag.jl
+++ b/stdlib/LinearAlgebra/src/tridiag.jl
@@ -70,9 +70,14 @@ julia> A[2,1]
 SymTridiagonal(dv::V, ev::V) where {T,V<:AbstractVector{T}} = SymTridiagonal{T}(dv, ev)
 SymTridiagonal{T}(dv::V, ev::V) where {T,V<:AbstractVector{T}} = SymTridiagonal{T,V}(dv, ev)
 function SymTridiagonal{T}(dv::AbstractVector, ev::AbstractVector) where {T}
-    SymTridiagonal(convert(AbstractVector{T}, dv)::AbstractVector{T},
-                   convert(AbstractVector{T}, ev)::AbstractVector{T})
+    d = convert(AbstractVector{T}, dv)::AbstractVector{T}
+    e = convert(AbstractVector{T}, ev)::AbstractVector{T}
+    typeof(d) == typeof(e) ?
+        SymTridiagonal{T}(d, e) :
+        throw(ArgumentError("diagonal vectors needed to be convertible to same type"))
 end
+SymTridiagonal(d::AbstractVector{T}, e::AbstractVector{S}) where {T,S} =
+    SymTridiagonal{promote_type(T, S)}(d, e)
 
 """
     SymTridiagonal(A::AbstractMatrix)
@@ -103,8 +108,12 @@ julia> SymTridiagonal(B)
 ```
 """
 function SymTridiagonal(A::AbstractMatrix)
-    if (diag(A, 1) == transpose.(diag(A, -1))) && all(issymmetric.(diag(A, 0)))
-        SymTridiagonal(diag(A, 0), diag(A, 1))
+    checksquare(A)
+    du = diag(A, 1)
+    d  = diag(A)
+    dl = diag(A, -1)
+    if all(((x, y),) -> x == transpose(y), zip(du, dl)) && all(issymmetric, d)
+        SymTridiagonal(d, du)
     else
         throw(ArgumentError("matrix is not symmetric; cannot convert to SymTridiagonal"))
     end
@@ -116,16 +125,17 @@ SymTridiagonal{T,V}(S::SymTridiagonal) where {T,V<:AbstractVector{T}} =
 SymTridiagonal{T}(S::SymTridiagonal{T}) where {T} = S
 SymTridiagonal{T}(S::SymTridiagonal) where {T} =
     SymTridiagonal(convert(AbstractVector{T}, S.dv)::AbstractVector{T},
-                   convert(AbstractVector{T}, S.ev)::AbstractVector{T})
+                    convert(AbstractVector{T}, S.ev)::AbstractVector{T})
 SymTridiagonal(S::SymTridiagonal) = S
 
-AbstractMatrix{T}(S::SymTridiagonal) where {T} =
-    SymTridiagonal(convert(AbstractVector{T}, S.dv)::AbstractVector{T},
-                   convert(AbstractVector{T}, S.ev)::AbstractVector{T})
+AbstractMatrix{T}(S::SymTridiagonal) where {T} = SymTridiagonal{T}(S)
+AbstractMatrix{T}(S::SymTridiagonal{T}) where {T} = copy(S)
+
 function Matrix{T}(M::SymTridiagonal) where T
     n = size(M, 1)
-    Mf = zeros(T, n, n)
+    Mf = Matrix{T}(undef, n, n)
     n == 0 && return Mf
+    n > 2 && fill!(Mf, zero(T))
     @inbounds for i = 1:n-1
         Mf[i,i] = symmetric(M.dv[i], :U)
         Mf[i+1,i] = transpose(M.ev[i])
@@ -137,19 +147,10 @@ end
 Matrix(M::SymTridiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(M)
 Array(M::SymTridiagonal) = Matrix(M)
 
-size(A::SymTridiagonal) = (length(A.dv), length(A.dv))
-function size(A::SymTridiagonal, d::Integer)
-    if d < 1
-        throw(ArgumentError("dimension must be ≥ 1, got $d"))
-    elseif d<=2
-        return length(A.dv)
-    else
-        return 1
-    end
-end
+size(A::SymTridiagonal) = (n = length(A.dv); (n, n))
 
 similar(S::SymTridiagonal, ::Type{T}) where {T} = SymTridiagonal(similar(S.dv, T), similar(S.ev, T))
-similar(S::SymTridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = zeros(T, dims...)
+similar(S::SymTridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(S.dv, T, dims)
 
 copyto!(dest::SymTridiagonal, src::SymTridiagonal) =
     (copyto!(dest.dv, src.dv); copyto!(dest.ev, _evview(src)); dest)
@@ -172,6 +173,8 @@ Base.copy(S::Adjoint{<:Any,<:SymTridiagonal}) = SymTridiagonal(map(x -> copy.(ad
 ishermitian(S::SymTridiagonal) = isreal(S.dv) && isreal(_evview(S))
 issymmetric(S::SymTridiagonal) = true
 
+tr(S::SymTridiagonal) = sum(S.dv)
+
 function diag(M::SymTridiagonal{T}, n::Integer=0) where T<:Number
     # every branch call similar(..., ::Int) to make sure the
     # same vector type is returned independent of n
@@ -211,97 +214,63 @@ end
 *(B::Number, A::SymTridiagonal) = SymTridiagonal(B*A.dv, B*A.ev)
 /(A::SymTridiagonal, B::Number) = SymTridiagonal(A.dv/B, A.ev/B)
 \(B::Number, A::SymTridiagonal) = SymTridiagonal(B\A.dv, B\A.ev)
-==(A::SymTridiagonal, B::SymTridiagonal) = (A.dv==B.dv) && (_evview(A)==_evview(B))
-
-@inline mul!(A::StridedVecOrMat, B::SymTridiagonal, C::StridedVecOrMat,
-             alpha::Number, beta::Number) =
-    _mul!(A, B, C, MulAddMul(alpha, beta))
-
-@inline function _mul!(C::StridedVecOrMat, S::SymTridiagonal, B::StridedVecOrMat,
-                          _add::MulAddMul)
-    m, n = size(B, 1), size(B, 2)
-    if !(m == size(S, 1) == size(C, 1))
-        throw(DimensionMismatch("A has first dimension $(size(S,1)), B has $(size(B,1)), C has $(size(C,1)) but all must match"))
-    end
-    if n != size(C, 2)
-        throw(DimensionMismatch("second dimension of B, $n, doesn't match second dimension of C, $(size(C,2))"))
-    end
-
-    if m == 0
-        return C
-    elseif iszero(_add.alpha)
-        return _rmul_or_fill!(C, _add.beta)
-    end
-
-    α = S.dv
-    β = S.ev
-    @inbounds begin
-        for j = 1:n
-            x₊ = B[1, j]
-            x₀ = zero(x₊)
-            # If m == 1 then β[1] is out of bounds
-            β₀ = m > 1 ? zero(β[1]) : zero(eltype(β))
-            for i = 1:m - 1
-                x₋, x₀, x₊ = x₀, x₊, B[i + 1, j]
-                β₋, β₀ = β₀, β[i]
-                _modify!(_add, β₋*x₋ + α[i]*x₀ + β₀*x₊, C, (i, j))
-            end
-            _modify!(_add, β₀*x₀ + α[m]*x₊, C, (m, j))
-        end
-    end
-
-    return C
-end
+==(A::SymTridiagonal{<:Number}, B::SymTridiagonal{<:Number}) =
+    (A.dv == B.dv) && (_evview(A) == _evview(B))
+==(A::SymTridiagonal, B::SymTridiagonal) =
+    size(A) == size(B) && all(i -> A[i,i] == B[i,i], axes(A, 1)) && (_evview(A) == _evview(B))
 
 function dot(x::AbstractVector, S::SymTridiagonal, y::AbstractVector)
     require_one_based_indexing(x, y)
     nx, ny = length(x), length(y)
-    (nx == size(S, 1) == ny) || throw(DimensionMismatch())
-    if iszero(nx)
-        return dot(zero(eltype(x)), zero(eltype(S)), zero(eltype(y)))
+    (nx == size(S, 1) == ny) || throw(DimensionMismatch("dot"))
+    if nx ≤ 1
+        nx == 0 && return dot(zero(eltype(x)), zero(eltype(S)), zero(eltype(y)))
+        return dot(x[1], S.dv[1], y[1])
     end
     dv, ev = S.dv, S.ev
-    x₀ = x[1]
-    x₊ = x[2]
-    sub = transpose(ev[1])
-    r = dot(adjoint(dv[1])*x₀ + adjoint(sub)*x₊, y[1])
-    @inbounds for j in 2:nx-1
-        x₋, x₀, x₊ = x₀, x₊, x[j+1]
-        sup, sub = transpose(sub), transpose(ev[j])
-        r += dot(adjoint(sup)*x₋ + adjoint(dv[j])*x₀ + adjoint(sub)*x₊, y[j])
-    end
-    r += dot(adjoint(transpose(sub))*x₀ + adjoint(dv[nx])*x₊, y[nx])
+    @inbounds begin
+        x₀ = x[1]
+        x₊ = x[2]
+        sub = transpose(ev[1])
+        r = dot(adjoint(dv[1])*x₀ + adjoint(sub)*x₊, y[1])
+        for j in 2:nx-1
+            x₋, x₀, x₊ = x₀, x₊, x[j+1]
+            sup, sub = transpose(sub), transpose(ev[j])
+            r += dot(adjoint(sup)*x₋ + adjoint(dv[j])*x₀ + adjoint(sub)*x₊, y[j])
+        end
+        r += dot(adjoint(transpose(sub))*x₀ + adjoint(dv[nx])*x₊, y[nx])
+    end
     return r
 end
 
-(\)(T::SymTridiagonal, B::StridedVecOrMat) = ldlt(T)\B
+(\)(T::SymTridiagonal, B::AbstractVecOrMat) = ldlt(T)\B
 
 # division with optional shift for use in shifted-Hessenberg solvers (hessenberg.jl):
 ldiv!(A::SymTridiagonal, B::AbstractVecOrMat; shift::Number=false) = ldiv!(ldlt(A, shift=shift), B)
 rdiv!(B::AbstractVecOrMat, A::SymTridiagonal; shift::Number=false) = rdiv!(B, ldlt(A, shift=shift))
 
-eigen!(A::SymTridiagonal{<:BlasReal}) = Eigen(LAPACK.stegr!('V', A.dv, A.ev)...)
+eigen!(A::SymTridiagonal{<:BlasReal,<:StridedVector}) = Eigen(LAPACK.stegr!('V', A.dv, A.ev)...)
 eigen(A::SymTridiagonal{T}) where T = eigen!(copymutable_oftype(A, eigtype(T)))
 
-eigen!(A::SymTridiagonal{<:BlasReal}, irange::UnitRange) =
+eigen!(A::SymTridiagonal{<:BlasReal,<:StridedVector}, irange::UnitRange) =
     Eigen(LAPACK.stegr!('V', 'I', A.dv, A.ev, 0.0, 0.0, irange.start, irange.stop)...)
 eigen(A::SymTridiagonal{T}, irange::UnitRange) where T =
     eigen!(copymutable_oftype(A, eigtype(T)), irange)
 
-eigen!(A::SymTridiagonal{<:BlasReal}, vl::Real, vu::Real) =
+eigen!(A::SymTridiagonal{<:BlasReal,<:StridedVector}, vl::Real, vu::Real) =
     Eigen(LAPACK.stegr!('V', 'V', A.dv, A.ev, vl, vu, 0, 0)...)
 eigen(A::SymTridiagonal{T}, vl::Real, vu::Real) where T =
     eigen!(copymutable_oftype(A, eigtype(T)), vl, vu)
 
-eigvals!(A::SymTridiagonal{<:BlasReal}) = LAPACK.stev!('N', A.dv, A.ev)[1]
+eigvals!(A::SymTridiagonal{<:BlasReal,<:StridedVector}) = LAPACK.stev!('N', A.dv, A.ev)[1]
 eigvals(A::SymTridiagonal{T}) where T = eigvals!(copymutable_oftype(A, eigtype(T)))
 
-eigvals!(A::SymTridiagonal{<:BlasReal}, irange::UnitRange) =
+eigvals!(A::SymTridiagonal{<:BlasReal,<:StridedVector}, irange::UnitRange) =
     LAPACK.stegr!('N', 'I', A.dv, A.ev, 0.0, 0.0, irange.start, irange.stop)[1]
 eigvals(A::SymTridiagonal{T}, irange::UnitRange) where T =
     eigvals!(copymutable_oftype(A, eigtype(T)), irange)
 
-eigvals!(A::SymTridiagonal{<:BlasReal}, vl::Real, vu::Real) =
+eigvals!(A::SymTridiagonal{<:BlasReal,<:StridedVector}, vl::Real, vu::Real) =
     LAPACK.stegr!('N', 'V', A.dv, A.ev, vl, vu, 0, 0)[1]
 eigvals(A::SymTridiagonal{T}, vl::Real, vu::Real) where T =
     eigvals!(copymutable_oftype(A, eigtype(T)), vl, vu)
@@ -349,7 +318,7 @@ julia> eigvecs(A, [1.])
  -0.5547001962252291
 ```
 """
-eigvecs(A::SymTridiagonal{<:BlasFloat}, eigvals::Vector{<:Real}) = LAPACK.stein!(A.dv, A.ev, eigvals)
+eigvecs(A::SymTridiagonal{<:BlasFloat,<:StridedVector}, eigvals::Vector{<:Real}) = LAPACK.stein!(A.dv, A.ev, eigvals)
 
 function svdvals!(A::SymTridiagonal)
     vals = eigvals!(A)
@@ -445,6 +414,32 @@ end
 det(A::SymTridiagonal; shift::Number=false) = det_usmani(A.ev, A.dv, A.ev, shift)
 logabsdet(A::SymTridiagonal; shift::Number=false) = logabsdet(ldlt(A; shift=shift))
 
+@inline function Base.isassigned(A::SymTridiagonal, i::Int, j::Int)
+    @boundscheck checkbounds(Bool, A, i, j) || return false
+    if i == j
+        return @inbounds isassigned(A.dv, i)
+    elseif i == j + 1
+        return @inbounds isassigned(A.ev, j)
+    elseif i + 1 == j
+        return @inbounds isassigned(A.ev, i)
+    else
+        return true
+    end
+end
+
+@inline function Base.isstored(A::SymTridiagonal, i::Int, j::Int)
+    @boundscheck checkbounds(A, i, j)
+    if i == j
+        return @inbounds Base.isstored(A.dv, i)
+    elseif i == j + 1
+        return @inbounds Base.isstored(A.ev, j)
+    elseif i + 1 == j
+        return @inbounds Base.isstored(A.ev, i)
+    else
+        return false
+    end
+end
+
 @inline function getindex(A::SymTridiagonal{T}, i::Integer, j::Integer) where T
     @boundscheck checkbounds(A, i, j)
     if i == j
@@ -482,13 +477,13 @@ struct Tridiagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
                 "lengths of subdiagonal, diagonal and superdiagonal: ",
                 "($(length(dl)), $(length(d)), $(length(du)))")))
         end
-        new{T,V}(dl, d, du)
+        new{T,V}(dl, d, Base.unalias(dl, du))
     end
     # constructor used in lu!
     function Tridiagonal{T,V}(dl, d, du, du2) where {T,V<:AbstractVector{T}}
         require_one_based_indexing(dl, d, du, du2)
         # length checks?
-        new{T,V}(dl, d, du, du2)
+        new{T,V}(dl, d, Base.unalias(dl, du), du2)
     end
 end
 
@@ -501,6 +496,10 @@ solvers, but may be converted into a regular matrix with
 [`convert(Array, _)`](@ref) (or `Array(_)` for short).
 The lengths of `dl` and `du` must be one less than the length of `d`.
 
+!!! note
+    The subdiagonal `dl` and the superdiagonal `du` must not be aliased to each other.
+    If aliasing is detected, the constructor will use a copy of `du` as its argument.
+
 # Examples
 ```jldoctest
 julia> dl = [1, 2, 3];
@@ -519,8 +518,21 @@ julia> Tridiagonal(dl, d, du)
 """
 Tridiagonal(dl::V, d::V, du::V) where {T,V<:AbstractVector{T}} = Tridiagonal{T,V}(dl, d, du)
 Tridiagonal(dl::V, d::V, du::V, du2::V) where {T,V<:AbstractVector{T}} = Tridiagonal{T,V}(dl, d, du, du2)
+Tridiagonal(dl::AbstractVector{T}, d::AbstractVector{S}, du::AbstractVector{U}) where {T,S,U} =
+    Tridiagonal{promote_type(T, S, U)}(dl, d, du)
+Tridiagonal(dl::AbstractVector{T}, d::AbstractVector{S}, du::AbstractVector{U}, du2::AbstractVector{V}) where {T,S,U,V} =
+    Tridiagonal{promote_type(T, S, U, V)}(dl, d, du, du2)
 function Tridiagonal{T}(dl::AbstractVector, d::AbstractVector, du::AbstractVector) where {T}
-    Tridiagonal(map(x->convert(AbstractVector{T}, x), (dl, d, du))...)
+    l, d, u = map(x->convert(AbstractVector{T}, x), (dl, d, du))
+    typeof(l) == typeof(d) == typeof(u) ?
+        Tridiagonal(l, d, u) :
+        throw(ArgumentError("diagonal vectors needed to be convertible to same type"))
+end
+function Tridiagonal{T}(dl::AbstractVector, d::AbstractVector, du::AbstractVector, du2::AbstractVector) where {T}
+    l, d, u, u2 = map(x->convert(AbstractVector{T}, x), (dl, d, du, du2))
+    typeof(l) == typeof(d) == typeof(u) == typeof(u2) ?
+        Tridiagonal(l, d, u, u2) :
+        throw(ArgumentError("diagonal vectors needed to be convertible to same type"))
 end
 
 """
@@ -551,30 +563,30 @@ Tridiagonal(A::AbstractMatrix) = Tridiagonal(diag(A,-1), diag(A,0), diag(A,1))
 Tridiagonal(A::Tridiagonal) = A
 Tridiagonal{T}(A::Tridiagonal{T}) where {T} = A
 function Tridiagonal{T}(A::Tridiagonal) where {T}
-    dl, d, du = map(x->convert(AbstractVector{T}, x)::AbstractVector{T},
-                    (A.dl, A.d, A.du))
+    dl, d, du = map(x -> convert(AbstractVector{T}, x)::AbstractVector{T}, (A.dl, A.d, A.du))
     if isdefined(A, :du2)
-        Tridiagonal(dl, d, du, convert(AbstractVector{T}, A.du2)::AbstractVector{T})
+        Tridiagonal{T}(dl, d, du, convert(AbstractVector{T}, A.du2)::AbstractVector{T})
     else
-        Tridiagonal(dl, d, du)
+        Tridiagonal{T}(dl, d, du)
     end
 end
-
-size(M::Tridiagonal) = (length(M.d), length(M.d))
-function size(M::Tridiagonal, d::Integer)
-    if d < 1
-        throw(ArgumentError("dimension d must be ≥ 1, got $d"))
-    elseif d <= 2
-        return length(M.d)
+Tridiagonal{T,V}(A::Tridiagonal{T,V}) where {T,V<:AbstractVector{T}} = A
+function Tridiagonal{T,V}(A::Tridiagonal) where {T,V<:AbstractVector{T}}
+    dl, d, du = map(x -> convert(V, x)::V, (A.dl, A.d, A.du))
+    if isdefined(A, :du2)
+        Tridiagonal{T,V}(dl, d, du, convert(V, A.du2)::V)
     else
-        return 1
+        Tridiagonal{T,V}(dl, d, du)
     end
 end
 
+size(M::Tridiagonal) = (n = length(M.d); (n, n))
+
 function Matrix{T}(M::Tridiagonal) where {T}
-    A = zeros(T, size(M))
+    A = Matrix{T}(undef, size(M))
     n = length(M.d)
     n == 0 && return A
+    n > 2 && fill!(A, zero(T))
     for i in 1:n-1
         A[i,i] = M.d[i]
         A[i+1,i] = M.dl[i]
@@ -587,7 +599,7 @@ Matrix(M::Tridiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(M
 Array(M::Tridiagonal) = Matrix(M)
 
 similar(M::Tridiagonal, ::Type{T}) where {T} = Tridiagonal(similar(M.dl, T), similar(M.d, T), similar(M.du, T))
-similar(M::Tridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = zeros(T, dims...)
+similar(M::Tridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = similar(M.d, T, dims)
 
 # Operations on Tridiagonal matrices
 copyto!(dest::Tridiagonal, src::Tridiagonal) = (copyto!(dest.dl, src.dl); copyto!(dest.d, src.d); copyto!(dest.du, src.du); dest)
@@ -614,7 +626,7 @@ Base.copy(tS::Transpose{<:Any,<:Tridiagonal}) = (S = tS.parent; Tridiagonal(map(
 ishermitian(S::Tridiagonal) = all(ishermitian, S.d) && all(Iterators.map((x, y) -> x == y', S.du, S.dl))
 issymmetric(S::Tridiagonal) = all(issymmetric, S.d) && all(Iterators.map((x, y) -> x == transpose(y), S.du, S.dl))
 
-\(A::Adjoint{<:Any,<:Tridiagonal}, B::Adjoint{<:Any,<:StridedVecOrMat}) = copy(A) \ B
+\(A::Adjoint{<:Any,<:Tridiagonal}, B::Adjoint{<:Any,<:AbstractVecOrMat}) = copy(A) \ B
 
 function diag(M::Tridiagonal{T}, n::Integer=0) where T
     # every branch call similar(..., ::Int) to make sure the
@@ -633,6 +645,32 @@ function diag(M::Tridiagonal{T}, n::Integer=0) where T
     end
 end
 
+@inline function Base.isassigned(A::Tridiagonal, i::Int, j::Int)
+    @boundscheck checkbounds(Bool, A, i, j) || return false
+    if i == j
+        return @inbounds isassigned(A.d, i)
+    elseif i == j + 1
+        return @inbounds isassigned(A.dl, j)
+    elseif i + 1 == j
+        return @inbounds isassigned(A.du, i)
+    else
+        return true
+    end
+end
+
+@inline function Base.isstored(A::Tridiagonal, i::Int, j::Int)
+    @boundscheck checkbounds(A, i, j)
+    if i == j
+        return @inbounds Base.isstored(A.d, i)
+    elseif i == j + 1
+        return @inbounds Base.isstored(A.dl, j)
+    elseif i + 1 == j
+        return @inbounds Base.isstored(A.du, i)
+    else
+        return false
+    end
+end
+
 @inline function getindex(A::Tridiagonal{T}, i::Integer, j::Integer) where T
     @boundscheck checkbounds(A, i, j)
     if i == j
@@ -731,6 +769,8 @@ function triu!(M::Tridiagonal{T}, k::Integer=0) where T
     return M
 end
 
+tr(M::Tridiagonal) = sum(M.d)
+
 ###################
 # Generic methods #
 ###################
@@ -754,6 +794,7 @@ end
 det(A::Tridiagonal) = det_usmani(A.dl, A.d, A.du)
 
 AbstractMatrix{T}(M::Tridiagonal) where {T} = Tridiagonal{T}(M)
+AbstractMatrix{T}(M::Tridiagonal{T}) where {T} = copy(M)
 Tridiagonal{T}(M::SymTridiagonal{T}) where {T} = Tridiagonal(M)
 function SymTridiagonal{T}(M::Tridiagonal) where T
     if issymmetric(M)
@@ -841,18 +882,21 @@ function dot(x::AbstractVector, A::Tridiagonal, y::AbstractVector)
     require_one_based_indexing(x, y)
     nx, ny = length(x), length(y)
     (nx == size(A, 1) == ny) || throw(DimensionMismatch())
-    if iszero(nx)
-        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    end
-    x₀ = x[1]
-    x₊ = x[2]
-    dl, d, du = A.dl, A.d, A.du
-    r = dot(adjoint(d[1])*x₀ + adjoint(dl[1])*x₊, y[1])
-    @inbounds for j in 2:nx-1
-        x₋, x₀, x₊ = x₀, x₊, x[j+1]
-        r += dot(adjoint(du[j-1])*x₋ + adjoint(d[j])*x₀ + adjoint(dl[j])*x₊, y[j])
-    end
-    r += dot(adjoint(du[nx-1])*x₀ + adjoint(d[nx])*x₊, y[nx])
+    if nx ≤ 1
+        nx == 0 && return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
+        return dot(x[1], A.d[1], y[1])
+    end
+    @inbounds begin
+        x₀ = x[1]
+        x₊ = x[2]
+        dl, d, du = A.dl, A.d, A.du
+        r = dot(adjoint(d[1])*x₀ + adjoint(dl[1])*x₊, y[1])
+        for j in 2:nx-1
+            x₋, x₀, x₊ = x₀, x₊, x[j+1]
+            r += dot(adjoint(du[j-1])*x₋ + adjoint(d[j])*x₀ + adjoint(dl[j])*x₊, y[j])
+        end
+        r += dot(adjoint(du[nx-1])*x₀ + adjoint(d[nx])*x₊, y[nx])
+    end
     return r
 end
 
@@ -864,3 +908,74 @@ function cholesky(S::SymTridiagonal, ::NoPivot = NoPivot(); check::Bool = true)
     T = choltype(eltype(S))
     cholesky!(Hermitian(Bidiagonal{T}(diag(S, 0), diag(S, 1), :U)), NoPivot(); check = check)
 end
+
+# See dgtsv.f
+"""
+    ldiv!(A::Tridiagonal, B::AbstractVecOrMat) -> B
+
+Compute `A \\ B` in-place by Gaussian elimination with partial pivoting and store the result
+in `B`, returning the result. In the process, the diagonals of `A` are overwritten as well.
+
+!!! compat "Julia 1.11"
+    `ldiv!` for `Tridiagonal` left-hand sides requires at least Julia 1.11.
+"""
+function ldiv!(A::Tridiagonal, B::AbstractVecOrMat)
+    LinearAlgebra.require_one_based_indexing(B)
+    n = size(A, 1)
+    if n != size(B,1)
+        throw(DimensionMismatch("matrix has dimensions ($n,$n) but right hand side has $(size(B,1)) rows"))
+    end
+    nrhs = size(B, 2)
+
+    # Initialize variables
+    dl = A.dl
+    d = A.d
+    du = A.du
+
+    @inbounds begin
+        for i in 1:n-1
+            # pivot or not?
+            if abs(d[i]) >= abs(dl[i])
+                # No interchange
+                if d[i] != 0
+                    fact = dl[i]/d[i]
+                    d[i+1] -= fact*du[i]
+                    for j in 1:nrhs
+                        B[i+1,j] -= fact*B[i,j]
+                    end
+                else
+                    checknonsingular(i, RowMaximum())
+                end
+                i < n-1 && (dl[i] = 0)
+            else
+                # Interchange
+                fact = d[i]/dl[i]
+                d[i] = dl[i]
+                tmp = d[i+1]
+                d[i+1] = du[i] - fact*tmp
+                du[i] = tmp
+                if i < n-1
+                    dl[i] = du[i+1]
+                    du[i+1] = -fact*dl[i]
+                end
+                for j in 1:nrhs
+                    temp = B[i,j]
+                    B[i,j] = B[i+1,j]
+                    B[i+1,j] = temp - fact*B[i+1,j]
+                end
+            end
+        end
+        iszero(d[n]) && checknonsingular(n, RowMaximum())
+        # backward substitution
+        for j in 1:nrhs
+            B[n,j] /= d[n]
+            if n > 1
+                B[n-1,j] = (B[n-1,j] - du[n-1]*B[n,j])/d[n-1]
+            end
+            for i in n-2:-1:1
+                B[i,j] = (B[i,j] - du[i]*B[i+1,j] - dl[i]*B[i+2,j]) / d[i]
+            end
+        end
+    end
+    return B
+end
diff --git a/stdlib/LinearAlgebra/src/uniformscaling.jl b/stdlib/LinearAlgebra/src/uniformscaling.jl
index 661bd28cb8f91..47a282b91b9ea 100644
--- a/stdlib/LinearAlgebra/src/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/src/uniformscaling.jl
@@ -118,7 +118,7 @@ function show(io::IO, ::MIME"text/plain", J::UniformScaling)
 end
 copy(J::UniformScaling) = UniformScaling(J.λ)
 
-Base.convert(::Type{UniformScaling{T}}, J::UniformScaling) where {T} = UniformScaling(convert(T, J.λ))
+Base.convert(::Type{UniformScaling{T}}, J::UniformScaling) where {T} = UniformScaling(convert(T, J.λ))::UniformScaling{T}
 
 conj(J::UniformScaling) = UniformScaling(conj(J.λ))
 real(J::UniformScaling) = UniformScaling(real(J.λ))
@@ -179,7 +179,7 @@ for (t1, t2) in ((:UnitUpperTriangular, :UpperTriangular),
                  (:UnitLowerTriangular, :LowerTriangular))
     @eval begin
         function (+)(UL::$t1, J::UniformScaling)
-            ULnew = copymutable_oftype(UL.data, Base._return_type(+, Tuple{eltype(UL), typeof(J)}))
+            ULnew = copymutable_oftype(UL.data, Base.promote_op(+, eltype(UL), typeof(J)))
             for i in axes(ULnew, 1)
                 ULnew[i,i] = one(ULnew[i,i]) + J
             end
@@ -193,7 +193,7 @@ end
 # However, to preserve type stability, we do not special-case a
 # UniformScaling{<:Complex} that happens to be real.
 function (+)(A::Hermitian, J::UniformScaling{<:Complex})
-    TS = Base._return_type(+, Tuple{eltype(A), typeof(J)})
+    TS = Base.promote_op(+, eltype(A), typeof(J))
     B = copytri!(copymutable_oftype(parent(A), TS), A.uplo, true)
     for i in diagind(B)
         B[i] = A[i] + J
@@ -202,7 +202,7 @@ function (+)(A::Hermitian, J::UniformScaling{<:Complex})
 end
 
 function (-)(J::UniformScaling{<:Complex}, A::Hermitian)
-    TS = Base._return_type(+, Tuple{eltype(A), typeof(J)})
+    TS = Base.promote_op(+, eltype(A), typeof(J))
     B = copytri!(copymutable_oftype(parent(A), TS), A.uplo, true)
     B .= .-B
     for i in diagind(B)
@@ -213,7 +213,7 @@ end
 
 function (+)(A::AbstractMatrix, J::UniformScaling)
     checksquare(A)
-    B = copymutable_oftype(A, Base._return_type(+, Tuple{eltype(A), typeof(J)}))
+    B = copymutable_oftype(A, Base.promote_op(+, eltype(A), typeof(J)))
     for i in intersect(axes(A,1), axes(A,2))
         @inbounds B[i,i] += J
     end
@@ -222,7 +222,7 @@ end
 
 function (-)(J::UniformScaling, A::AbstractMatrix)
     checksquare(A)
-    B = convert(AbstractMatrix{Base._return_type(+, Tuple{eltype(A), typeof(J)})}, -A)
+    B = convert(AbstractMatrix{Base.promote_op(+, eltype(A), typeof(J))}, -A)
     for i in intersect(axes(A,1), axes(A,2))
         @inbounds B[i,i] += J
     end
@@ -293,7 +293,7 @@ function mul!(out::AbstractMatrix{T}, a::Number, B::UniformScaling, α::Number,
     end
     s = convert(T, a*B.λ*α)
     if !iszero(s)
-        @inbounds for i in diagind(out)
+        @inbounds for i in diagind(out, IndexStyle(out))
             out[i] += s
         end
     end
@@ -381,115 +381,25 @@ function copyto!(A::AbstractMatrix, J::UniformScaling)
     return A
 end
 
-function cond(J::UniformScaling{T}) where T
-    onereal = inv(one(real(J.λ)))
-    return J.λ ≠ zero(T) ? onereal : oftype(onereal, Inf)
+function copyto!(A::Diagonal, J::UniformScaling)
+    A.diag .= J.λ
+    return A
 end
-
-# promote_to_arrays(n,k, T, A...) promotes any UniformScaling matrices
-# in A to matrices of type T and sizes given by n[k:end].  n is an array
-# so that the same promotion code can be used for hvcat.  We pass the type T
-# so that we can re-use this code for sparse-matrix hcat etcetera.
-promote_to_arrays_(n::Int, ::Type, a::Number) = a
-promote_to_arrays_(n::Int, ::Type{Matrix}, J::UniformScaling{T}) where {T} = Matrix(J, n, n)
-promote_to_arrays_(n::Int, ::Type, A::AbstractVecOrMat) = A
-promote_to_arrays(n,k, ::Type) = ()
-promote_to_arrays(n,k, ::Type{T}, A) where {T} = (promote_to_arrays_(n[k], T, A),)
-promote_to_arrays(n,k, ::Type{T}, A, B) where {T} =
-    (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B))
-promote_to_arrays(n,k, ::Type{T}, A, B, C) where {T} =
-    (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays_(n[k+2], T, C))
-promote_to_arrays(n,k, ::Type{T}, A, B, Cs...) where {T} =
-    (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays(n,k+2, T, Cs...)...)
-promote_to_array_type(A::Tuple{Vararg{Union{AbstractVecOrMat,UniformScaling,Number}}}) = Matrix
-
-for (f, _f, dim, name) in ((:hcat, :_hcat, 1, "rows"), (:vcat, :_vcat, 2, "cols"))
-    @eval begin
-        @inline $f(A::Union{AbstractVecOrMat,UniformScaling}...) = $_f(A...)
-        @inline $f(A::Union{AbstractVecOrMat,UniformScaling,Number}...) = $_f(A...)
-        function $_f(A::Union{AbstractVecOrMat,UniformScaling,Number}...; array_type = promote_to_array_type(A))
-            n = -1
-            for a in A
-                if !isa(a, UniformScaling)
-                    require_one_based_indexing(a)
-                    na = size(a,$dim)
-                    n >= 0 && n != na &&
-                        throw(DimensionMismatch(string("number of ", $name,
-                            " of each array must match (got ", n, " and ", na, ")")))
-                    n = na
-                end
-            end
-            n == -1 && throw(ArgumentError($("$f of only UniformScaling objects cannot determine the matrix size")))
-            return cat(promote_to_arrays(fill(n, length(A)), 1, array_type, A...)..., dims=Val(3-$dim))
-        end
-    end
+function copyto!(A::Union{Bidiagonal, SymTridiagonal}, J::UniformScaling)
+    A.ev .= 0
+    A.dv .= J.λ
+    return A
+end
+function copyto!(A::Tridiagonal, J::UniformScaling)
+    A.dl .= 0
+    A.du .= 0
+    A.d .= J.λ
+    return A
 end
 
-hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling}...) = _hvcat(rows, A...)
-hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...) = _hvcat(rows, A...)
-function _hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...; array_type = promote_to_array_type(A))
-    require_one_based_indexing(A...)
-    nr = length(rows)
-    sum(rows) == length(A) || throw(ArgumentError("mismatch between row sizes and number of arguments"))
-    n = fill(-1, length(A))
-    needcols = false # whether we also need to infer some sizes from the column count
-    j = 0
-    for i = 1:nr # infer UniformScaling sizes from row counts, if possible:
-        ni = -1 # number of rows in this block-row, -1 indicates unknown
-        for k = 1:rows[i]
-            if !isa(A[j+k], UniformScaling)
-                na = size(A[j+k], 1)
-                ni >= 0 && ni != na &&
-                    throw(DimensionMismatch("mismatch in number of rows"))
-                ni = na
-            end
-        end
-        if ni >= 0
-            for k = 1:rows[i]
-                n[j+k] = ni
-            end
-        else # row consisted only of UniformScaling objects
-            needcols = true
-        end
-        j += rows[i]
-    end
-    if needcols # some sizes still unknown, try to infer from column count
-        nc = -1
-        j = 0
-        for i = 1:nr
-            nci = 0
-            rows[i] > 0 && n[j+1] == -1 && (j += rows[i]; continue)
-            for k = 1:rows[i]
-                nci += isa(A[j+k], UniformScaling) ? n[j+k] : size(A[j+k], 2)
-            end
-            nc >= 0 && nc != nci && throw(DimensionMismatch("mismatch in number of columns"))
-            nc = nci
-            j += rows[i]
-        end
-        nc == -1 && throw(ArgumentError("sizes of UniformScalings could not be inferred"))
-        j = 0
-        for i = 1:nr
-            if rows[i] > 0 && n[j+1] == -1 # this row consists entirely of UniformScalings
-                nci, r = divrem(nc, rows[i])
-                r != 0 && throw(DimensionMismatch("indivisible UniformScaling sizes"))
-                for k = 1:rows[i]
-                    n[j+k] = nci
-                end
-            end
-            j += rows[i]
-        end
-    end
-    Amat = promote_to_arrays(n, 1, array_type, A...)
-    # We have two methods for promote_to_array_type, one returning Matrix and
-    # another one returning SparseMatrixCSC (in SparseArrays.jl). In the dense
-    # case, we cannot call hvcat for the promoted UniformScalings because this
-    # causes a stack overflow. In the sparse case, however, we cannot call
-    # typed_hvcat because we need a sparse output.
-    if array_type == Matrix
-        return typed_hvcat(promote_eltype(Amat...), rows, Amat...)
-    else
-        return hvcat(rows, Amat...)
-    end
+function cond(J::UniformScaling{T}) where T
+    onereal = inv(one(real(J.λ)))
+    return J.λ ≠ zero(T) ? onereal : oftype(onereal, Inf)
 end
 
 ## Matrix construction from UniformScaling
@@ -509,10 +419,6 @@ Array{T}(s::UniformScaling, m::Integer, n::Integer) where {T} = Matrix{T}(s, m,
 Array(s::UniformScaling, m::Integer, n::Integer) = Matrix(s, m, n)
 Array(s::UniformScaling, dims::Dims{2}) = Matrix(s, dims)
 
-## Diagonal construction from UniformScaling
-Diagonal{T}(s::UniformScaling, m::Integer) where {T} = Diagonal{T}(fill(T(s.λ), m))
-Diagonal(s::UniformScaling, m::Integer) = Diagonal{eltype(s)}(s, m)
-
 dot(A::AbstractMatrix, J::UniformScaling) = dot(tr(A), J.λ)
 dot(J::UniformScaling, A::AbstractMatrix) = dot(J.λ, tr(A))
 
@@ -523,8 +429,3 @@ dot(x::AbstractVector, a::Union{Real,Complex}, y::AbstractVector) = a*dot(x, y)
 # muladd
 Base.muladd(A::UniformScaling, B::UniformScaling, z::UniformScaling) =
     UniformScaling(A.λ * B.λ + z.λ)
-Base.muladd(A::Union{Diagonal, UniformScaling}, B::Union{Diagonal, UniformScaling}, z::Union{Diagonal, UniformScaling}) =
-    Diagonal(_diag_or_value(A) .* _diag_or_value(B) .+ _diag_or_value(z))
-
-_diag_or_value(A::Diagonal) = A.diag
-_diag_or_value(A::UniformScaling) = A.λ
diff --git a/stdlib/LinearAlgebra/test/abstractq.jl b/stdlib/LinearAlgebra/test/abstractq.jl
new file mode 100644
index 0000000000000..19b872d685668
--- /dev/null
+++ b/stdlib/LinearAlgebra/test/abstractq.jl
@@ -0,0 +1,101 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module TestAbstractQ
+
+using Test
+using LinearAlgebra
+using LinearAlgebra: AbstractQ, AdjointQ
+import LinearAlgebra: lmul!, rmul!
+import Base: size, convert
+
+n = 5
+
+@testset "custom AbstractQ type" begin
+    struct MyQ{T,S<:AbstractQ{T}} <: AbstractQ{T}
+        Q::S
+    end
+    MyQ{T}(Q::AbstractQ) where {T} = (P = convert(AbstractQ{T}, Q); MyQ{T,typeof(P)}(P))
+    MyQ(Q::MyQ) = Q
+
+    Base.size(Q::MyQ) = size(Q.Q)
+    LinearAlgebra.lmul!(Q::MyQ, B::AbstractVecOrMat) = lmul!(Q.Q, B)
+    LinearAlgebra.lmul!(adjQ::AdjointQ{<:Any,<:MyQ}, B::AbstractVecOrMat) = lmul!(parent(adjQ).Q', B)
+    LinearAlgebra.rmul!(A::AbstractVecOrMat, Q::MyQ) = rmul!(A, Q.Q)
+    LinearAlgebra.rmul!(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:MyQ}) = rmul!(A, parent(adjQ).Q')
+    Base.convert(::Type{AbstractQ{T}}, Q::MyQ) where {T} = MyQ{T}(Q.Q)
+    LinearAlgebra.det(Q::MyQ) = det(Q.Q)
+
+    for T in (Float64, ComplexF64)
+        A = rand(T, n, n)
+        F = qr(A)
+        Q = MyQ(F.Q)
+        @test ndims(Q) == 2
+        T <: Real && @test transpose(Q) == adjoint(Q)
+        T <: Complex && @test_throws ErrorException transpose(Q)
+        @test convert(AbstractQ{complex(T)}, Q) isa MyQ{complex(T)}
+        @test convert(AbstractQ{complex(T)}, Q') isa AdjointQ{<:complex(T),<:MyQ{complex(T)}}
+        @test *(Q) == Q
+        @test Q*I ≈ Q.Q*I rtol=2eps(real(T))
+        @test Q'*I ≈ Q.Q'*I rtol=2eps(real(T))
+        @test I*Q ≈ Q.Q*I rtol=2eps(real(T))
+        @test I*Q' ≈ I*Q.Q' rtol=2eps(real(T))
+        @test abs(det(Q)) ≈ 1
+        @test logabsdet(Q)[1] ≈ 0 atol=2n*eps(real(T))
+        y = rand(T, n)
+        @test Q * y ≈ Q.Q * y ≈ Q' \ y ≈ ldiv!(Q', copy(y)) ≈ ldiv!(zero(y), Q', y)
+        @test Q'y ≈ Q.Q' * y ≈ Q \ y ≈ ldiv!(Q, copy(y)) ≈ ldiv!(zero(y), Q, y)
+        @test y'Q ≈ y'Q.Q ≈ y' / Q'
+        @test y'Q' ≈ y'Q.Q' ≈ y' / Q
+        y = Matrix(y')
+        @test y*Q ≈ y*Q.Q ≈ y / Q' ≈ rdiv!(copy(y), Q')
+        @test y*Q' ≈ y*Q.Q' ≈ y / Q ≈ rdiv!(copy(y), Q)
+        Y = rand(T, n, n); X = similar(Y)
+        for transQ in (identity, adjoint), transY in (identity, adjoint), Y in (Y, Y')
+            @test mul!(X, transQ(Q), transY(Y)) ≈ transQ(Q) * transY(Y) ≈ transQ(Q.Q) * transY(Y)
+            @test mul!(X, transY(Y), transQ(Q)) ≈ transY(Y) * transQ(Q) ≈ transY(Y) * transQ(Q.Q)
+        end
+        @test convert(Matrix, Q) ≈ Matrix(Q) ≈ Q[:,:] ≈ copyto!(zeros(T, size(Q)), Q) ≈ Q.Q*I
+        @test convert(Matrix, Q') ≈ Matrix(Q') ≈ (Q')[:,:] ≈ copyto!(zeros(T, size(Q)), Q') ≈ Q.Q'*I
+        @test Q[1,:] == Q.Q[1,:] == view(Q, 1, :)
+        @test Q[:,1] == Q.Q[:,1] == view(Q, :, 1)
+        @test Q[1,1] == Q.Q[1,1]
+        @test Q[:] == Q.Q[:]
+        @test Q[:,1:3] == Q.Q[:,1:3] == view(Q, :, 1:3)
+        @test Q[:,1:3] ≈ Matrix(Q)[:,1:3]
+        @test Q[2:3,2:3] == view(Q, 2:3, 2:3) ≈ Matrix(Q)[2:3,2:3]
+        @test_throws BoundsError Q[0,1]
+        @test_throws BoundsError Q[n+1,1]
+        @test_throws BoundsError Q[1,0]
+        @test_throws BoundsError Q[1,n+1]
+        @test_throws BoundsError Q[:,1:n+1]
+        @test_throws BoundsError Q[:,0:n]
+        for perm in ((1, 2), (2, 1))
+            P = PermutedDimsArray(zeros(T, size(Q)), perm)
+            @test copyto!(P, Q) ≈ Matrix(Q)
+        end
+        x = randn(T)
+        @test x * Q ≈ (x*I)*Q ≈ x * Q.Q
+        @test Q * x ≈ Q*(x*I) ≈ Q.Q * x
+        @test x * Q' ≈ (x*I)* Q' ≈ x * Q.Q'
+        @test Q' * x ≈ Q'*(x*I) ≈ Q.Q' * x
+        x = rand(T, 1)
+        Q = MyQ(qr(rand(T, 1, 1)).Q)
+        @test x * Q ≈ x * Q.Q
+        @test x * Q' ≈ x * Q.Q'
+        @test Q * x ≈ Q.Q * x
+        @test Q' * x ≈ Q.Q' * x
+    end
+    A = rand(Float64, 5, 3)
+    F = qr(A)
+    Q = MyQ(F.Q)
+    Prect = Matrix(F.Q)
+    Psquare = collect(F.Q)
+    @test Q == Prect
+    @test Q == Psquare
+    @test Q == F.Q*I
+    @test Q ≈ Prect
+    @test Q ≈ Psquare
+    @test Q ≈ F.Q*I
+end
+
+end # module
diff --git a/stdlib/LinearAlgebra/test/addmul.jl b/stdlib/LinearAlgebra/test/addmul.jl
index 72fdf687bf5c3..3fff8289242f7 100644
--- a/stdlib/LinearAlgebra/test/addmul.jl
+++ b/stdlib/LinearAlgebra/test/addmul.jl
@@ -164,7 +164,8 @@ end
         Bc = Matrix(B)
         returned_mat = mul!(C, A, B, α, β)
         @test returned_mat === C
-        @test collect(returned_mat) ≈ α * Ac * Bc + β * Cc  rtol=rtol
+        # This test is skipped because it is flakey, but should be fixed and put back (see #49966)
+        @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc  rtol=rtol
 
         y = C[:, 1]
         x = B[:, 1]
@@ -189,7 +190,8 @@ end
 
                     returned_mat = mul!(C, Af, Bf, α, β)
                     @test returned_mat === C
-                    @test collect(returned_mat) ≈ α * Ac * Bc + β * Cc  rtol=rtol
+                    # This test is skipped because it is flakey, but should be fixed and put back (see #49966)
+                    @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc  rtol=rtol
                 end
             end
         end
@@ -201,7 +203,8 @@ end
                 Bc = Matrix(B)
                 returned_mat = mul!(C, A, B, α, zero(eltype(C)))
                 @test returned_mat === C
-                @test collect(returned_mat) ≈ α * Ac * Bc  rtol=rtol
+                # This test is skipped because it is flakey, but should be fixed and put back (see #49966)
+                @test_skip collect(returned_mat) ≈ α * Ac * Bc  rtol=rtol
             end
         end
 
diff --git a/stdlib/LinearAlgebra/test/adjtrans.jl b/stdlib/LinearAlgebra/test/adjtrans.jl
index 7b782d463768d..2c533af37f912 100644
--- a/stdlib/LinearAlgebra/test/adjtrans.jl
+++ b/stdlib/LinearAlgebra/test/adjtrans.jl
@@ -6,6 +6,9 @@ using Test, LinearAlgebra
 
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 
+isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
+using .Main.OffsetArrays
+
 @testset "Adjoint and Transpose inner constructor basics" begin
     intvec, intmat = [1, 2], [1 2; 3 4]
     # Adjoint/Transpose eltype must match the type of the Adjoint/Transpose of the input eltype
@@ -87,11 +90,15 @@ end
         @test size(Transpose(intvec)) == (1, length(intvec))
         @test size(Transpose(intmat)) == reverse(size(intmat))
     end
-    @testset "indices methods" begin
+    @testset "axes methods" begin
         @test axes(Adjoint(intvec)) == (Base.OneTo(1), Base.OneTo(length(intvec)))
         @test axes(Adjoint(intmat)) == reverse(axes(intmat))
         @test axes(Transpose(intvec)) == (Base.OneTo(1), Base.OneTo(length(intvec)))
         @test axes(Transpose(intmat)) == reverse(axes(intmat))
+
+        A = OffsetArray([1,2], 2)
+        @test (@inferred axes(A')[2]) === axes(A,1)
+        @test (@inferred axes(A')[1]) === axes(A,2)
     end
     @testset "IndexStyle methods" begin
         @test IndexStyle(Adjoint(intvec)) == IndexLinear()
@@ -476,6 +483,16 @@ end
     @test adjoint!(b, a) === b
 end
 
+@testset "copyto! uses adjoint!/transpose!" begin
+    for T in (Float64, ComplexF64), f in (transpose, adjoint), sz in ((5,4), (5,))
+        S = rand(T, sz)
+        adjS = f(S)
+        A = similar(S')
+        copyto!(A, adjS)
+        @test A == adjS
+    end
+end
+
 @testset "aliasing with adjoint and transpose" begin
     A = collect(reshape(1:25, 5, 5)) .+ rand.().*im
     B = copy(A)
@@ -489,13 +506,13 @@ end
     @test B == A .* A'
 end
 
-@testset "test show methods for $t of Factorizations" for t in (Adjoint, Transpose)
-    A = randn(4, 4)
+@testset "test show methods for $t of Factorizations" for t in (adjoint, transpose)
+    A = randn(ComplexF64, 4, 4)
     F = lu(A)
     Fop = t(F)
-    @test "LinearAlgebra."*sprint(show, Fop) ==
+    @test sprint(show, Fop) ==
                   "$t of "*sprint(show, parent(Fop))
-    @test "LinearAlgebra."*sprint((io, t) -> show(io, MIME"text/plain"(), t), Fop) ==
+    @test sprint((io, t) -> show(io, MIME"text/plain"(), t), Fop) ==
                   "$t of "*sprint((io, t) -> show(io, MIME"text/plain"(), t), parent(Fop))
 end
 
@@ -588,24 +605,102 @@ end
     @test transpose(Int[]) * Int[] == 0
 end
 
-@testset "reductions: $adjtrans" for adjtrans in [transpose, adjoint]
-    mat = rand(ComplexF64, 3,5)
-    @test sum(adjtrans(mat)) ≈ sum(collect(adjtrans(mat)))
-    @test sum(adjtrans(mat), dims=1) ≈ sum(collect(adjtrans(mat)), dims=1)
-    @test sum(adjtrans(mat), dims=(1,2)) ≈ sum(collect(adjtrans(mat)), dims=(1,2))
+@testset "reductions: $adjtrans" for adjtrans in (transpose, adjoint)
+    for (reduction, reduction!, op) in ((sum, sum!, +), (prod, prod!, *), (minimum, minimum!, min), (maximum, maximum!, max))
+        T = op in (max, min) ? Float64 : ComplexF64
+        mat = rand(T, 3,5)
+        rd1 = zeros(T, 1, 3)
+        rd2 = zeros(T, 5, 1)
+        rd3 = zeros(T, 1, 1)
+        @test reduction(adjtrans(mat)) ≈ reduction(copy(adjtrans(mat)))
+        @test reduction(adjtrans(mat), dims=1) ≈ reduction(copy(adjtrans(mat)), dims=1)
+        @test reduction(adjtrans(mat), dims=2) ≈ reduction(copy(adjtrans(mat)), dims=2)
+        @test reduction(adjtrans(mat), dims=(1,2)) ≈ reduction(copy(adjtrans(mat)), dims=(1,2))
+
+        @test reduction!(rd1, adjtrans(mat)) ≈ reduction!(rd1, copy(adjtrans(mat)))
+        @test reduction!(rd2, adjtrans(mat)) ≈ reduction!(rd2, copy(adjtrans(mat)))
+        @test reduction!(rd3, adjtrans(mat)) ≈ reduction!(rd3, copy(adjtrans(mat)))
+
+        @test reduction(imag, adjtrans(mat)) ≈ reduction(imag, copy(adjtrans(mat)))
+        @test reduction(imag, adjtrans(mat), dims=1) ≈ reduction(imag, copy(adjtrans(mat)), dims=1)
+        @test reduction(imag, adjtrans(mat), dims=2) ≈ reduction(imag, copy(adjtrans(mat)), dims=2)
+        @test reduction(imag, adjtrans(mat), dims=(1,2)) ≈ reduction(imag, copy(adjtrans(mat)), dims=(1,2))
+
+        @test Base.mapreducedim!(imag, op, rd1, adjtrans(mat)) ≈ Base.mapreducedim!(imag, op, rd1, copy(adjtrans(mat)))
+        @test Base.mapreducedim!(imag, op, rd2, adjtrans(mat)) ≈ Base.mapreducedim!(imag, op, rd2, copy(adjtrans(mat)))
+        @test Base.mapreducedim!(imag, op, rd3, adjtrans(mat)) ≈ Base.mapreducedim!(imag, op, rd3, copy(adjtrans(mat)))
+
+        op in (max, min) && continue
+        mat = [rand(T,2,2) for _ in 1:3, _ in 1:5]
+        rd1 = fill(zeros(T, 2, 2), 1, 3)
+        rd2 = fill(zeros(T, 2, 2), 5, 1)
+        rd3 = fill(zeros(T, 2, 2), 1, 1)
+        @test reduction(adjtrans(mat)) ≈ reduction(copy(adjtrans(mat)))
+        @test reduction(adjtrans(mat), dims=1) ≈ reduction(copy(adjtrans(mat)), dims=1)
+        @test reduction(adjtrans(mat), dims=2) ≈ reduction(copy(adjtrans(mat)), dims=2)
+        @test reduction(adjtrans(mat), dims=(1,2)) ≈ reduction(copy(adjtrans(mat)), dims=(1,2))
+
+        @test reduction(imag, adjtrans(mat)) ≈ reduction(imag, copy(adjtrans(mat)))
+        @test reduction(x -> x[1,2], adjtrans(mat)) ≈ reduction(x -> x[1,2], copy(adjtrans(mat)))
+        @test reduction(imag, adjtrans(mat), dims=1) ≈ reduction(imag, copy(adjtrans(mat)), dims=1)
+        @test reduction(x -> x[1,2], adjtrans(mat), dims=1) ≈ reduction(x -> x[1,2], copy(adjtrans(mat)), dims=1)
+    end
+    # see #46605
+    Ac = [1 2; 3 4]'
+    @test mapreduce(identity, (x, y) -> 10x+y, copy(Ac)) == mapreduce(identity, (x, y) -> 10x+y, Ac) == 1234
+    @test extrema([3,7,4]') == (3, 7)
+    @test mapreduce(x -> [x;;;], +, [1, 2, 3]') == sum(x -> [x;;;], [1, 2, 3]') == [6;;;]
+    @test mapreduce(string, *, [1 2; 3 4]') == mapreduce(string, *, copy([1 2; 3 4]')) == "1234"
+end
 
-    @test sum(imag, adjtrans(mat)) ≈ sum(imag, collect(adjtrans(mat)))
-    @test sum(imag, adjtrans(mat), dims=1) ≈ sum(imag, collect(adjtrans(mat)), dims=1)
+@testset "trace" begin
+    for T in (Float64, ComplexF64), t in (adjoint, transpose)
+        A = randn(T, 10, 10)
+        @test tr(t(A)) == tr(copy(t(A))) == t(tr(A))
+    end
+end
 
-    mat = [rand(ComplexF64,2,2) for _ in 1:3, _ in 1:5]
-    @test sum(adjtrans(mat)) ≈ sum(collect(adjtrans(mat)))
-    @test sum(adjtrans(mat), dims=1) ≈ sum(collect(adjtrans(mat)), dims=1)
-    @test sum(adjtrans(mat), dims=(1,2)) ≈ sum(collect(adjtrans(mat)), dims=(1,2))
+@testset "structured printing" begin
+    D = Diagonal(1:3)
+    @test sprint(Base.print_matrix, Adjoint(D)) == sprint(Base.print_matrix, D)
+    @test sprint(Base.print_matrix, Transpose(D)) == sprint(Base.print_matrix, D)
+    D = Diagonal((1:3)*im)
+    D2 = Diagonal((1:3)*(-im))
+    @test sprint(Base.print_matrix, Transpose(D)) == sprint(Base.print_matrix, D)
+    @test sprint(Base.print_matrix, Adjoint(D)) == sprint(Base.print_matrix, D2)
+
+    struct OneHotVecOrMat{N} <: AbstractArray{Bool,N}
+        inds::NTuple{N,Int}
+        sz::NTuple{N,Int}
+    end
+    Base.size(x::OneHotVecOrMat) = x.sz
+    function Base.getindex(x::OneHotVecOrMat{N}, inds::Vararg{Int,N}) where {N}
+        checkbounds(x, inds...)
+        inds == x.inds
+    end
+    Base.replace_in_print_matrix(o::OneHotVecOrMat{1}, i::Integer, j::Integer, s::AbstractString) =
+        o.inds == (i,) ? s : Base.replace_with_centered_mark(s)
+    Base.replace_in_print_matrix(o::OneHotVecOrMat{2}, i::Integer, j::Integer, s::AbstractString) =
+        o.inds == (i,j) ? s : Base.replace_with_centered_mark(s)
+
+    o = OneHotVecOrMat((2,), (4,))
+    @test sprint(Base.print_matrix, Transpose(o)) == sprint(Base.print_matrix, OneHotVecOrMat((1,2), (1,4)))
+    @test sprint(Base.print_matrix, Adjoint(o)) == sprint(Base.print_matrix, OneHotVecOrMat((1,2), (1,4)))
+end
 
-    @test sum(imag, adjtrans(mat)) ≈ sum(imag, collect(adjtrans(mat)))
-    @test sum(x -> x[1,2], adjtrans(mat)) ≈ sum(x -> x[1,2], collect(adjtrans(mat)))
-    @test sum(imag, adjtrans(mat), dims=1) ≈ sum(imag, collect(adjtrans(mat)), dims=1)
-    @test sum(x -> x[1,2], adjtrans(mat), dims=1) ≈ sum(x -> x[1,2], collect(adjtrans(mat)), dims=1)
+@testset "copy_transpose!" begin
+    # scalar case
+    A = [randn() for _ in 1:2, _ in 1:3]
+    At = copy(transpose(A))
+    B = zero.(At)
+    LinearAlgebra.copy_transpose!(B, axes(B, 1), axes(B, 2), A, axes(A, 1), axes(A, 2))
+    @test B == At
+    # matrix of matrices
+    A = [randn(2,3) for _ in 1:2, _ in 1:3]
+    At = copy(transpose(A))
+    B = zero.(At)
+    LinearAlgebra.copy_transpose!(B, axes(B, 1), axes(B, 2), A, axes(A, 1), axes(A, 2))
+    @test B == At
 end
 
 end # module TestAdjointTranspose
diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl
index adaae98250ee4..bf65088dc953b 100644
--- a/stdlib/LinearAlgebra/test/bidiag.jl
+++ b/stdlib/LinearAlgebra/test/bidiag.jl
@@ -13,6 +13,12 @@ using .Main.Furlongs
 isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
 using .Main.Quaternions
 
+isdefined(Main, :InfiniteArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "InfiniteArrays.jl"))
+using .Main.InfiniteArrays
+
+isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
+using .Main.FillArrays
+
 include("testutils.jl") # test_approx_eq_modphase
 
 n = 10 #Size of test matrix
@@ -52,6 +58,9 @@ Random.seed!(1)
             # from matrix
             @test Bidiagonal(ubd, :U) == Bidiagonal(Matrix(ubd), :U) == ubd
             @test Bidiagonal(lbd, :L) == Bidiagonal(Matrix(lbd), :L) == lbd
+            # from its own type
+            @test typeof(ubd)(ubd) === ubd
+            @test typeof(lbd)(lbd) === lbd
         end
         @test eltype(Bidiagonal{elty}([1,2,3,4], [1.0f0,2.0f0,3.0f0], :U)) == elty
         @test eltype(Bidiagonal([1,2,3,4], [1.0f0,2.0f0,3.0f0], :U)) == Float32 # promotion test
@@ -93,7 +102,7 @@ Random.seed!(1)
         @test_throws BoundsError ubd[1, n + 1] = 1
         @test ((cubd[2, 2] = 10) == 10; cubd[2, 2] == 10)
         # bidiagonal size
-        @test_throws ArgumentError size(ubd, 0)
+        @test_throws BoundsError size(ubd, 0)
         @test size(ubd, 1) == size(ubd, 2) == n
         @test size(ubd, 3) == 1
         # bidiagonal similar
@@ -111,6 +120,21 @@ Random.seed!(1)
         @test_throws ArgumentError Bl[4, 5] = 1
     end
 
+    @testset "isstored" begin
+        ubd = Bidiagonal(dv, ev, :U)
+        lbd = Bidiagonal(dv, ev, :L)
+        # bidiagonal isstored / upper & lower
+        @test_throws BoundsError Base.isstored(ubd, n + 1, 1)
+        @test_throws BoundsError Base.isstored(ubd, 1, n + 1)
+        @test Base.isstored(ubd, 2, 2)
+        # bidiagonal isstored / upper
+        @test Base.isstored(ubd, 2, 3)
+        @test !Base.isstored(ubd, 3, 2)
+        # bidiagonal isstored / lower
+        @test Base.isstored(lbd, 3, 2)
+        @test !Base.isstored(lbd, 2, 3)
+    end
+
     @testset "show" begin
         BD = Bidiagonal(dv, ev, :U)
         dstring = sprint(Base.print_matrix,BD.dv')
@@ -126,12 +150,12 @@ Random.seed!(1)
         @testset "Constructor and basic properties" begin
             @test size(T, 1) == size(T, 2) == n
             @test size(T) == (n, n)
-            @test Array(T) == diagm(0 => dv, (uplo == :U ? 1 : -1) => ev)
+            @test Array(T) == diagm(0 => dv, (uplo === :U ? 1 : -1) => ev)
             @test Bidiagonal(Array(T), uplo) == T
             @test big.(T) == T
-            @test Array(abs.(T)) == abs.(diagm(0 => dv, (uplo == :U ? 1 : -1) => ev))
-            @test Array(real(T)) == real(diagm(0 => dv, (uplo == :U ? 1 : -1) => ev))
-            @test Array(imag(T)) == imag(diagm(0 => dv, (uplo == :U ? 1 : -1) => ev))
+            @test Array(abs.(T)) == abs.(diagm(0 => dv, (uplo === :U ? 1 : -1) => ev))
+            @test Array(real(T)) == real(diagm(0 => dv, (uplo === :U ? 1 : -1) => ev))
+            @test Array(imag(T)) == imag(diagm(0 => dv, (uplo === :U ? 1 : -1) => ev))
         end
 
         @testset for func in (conj, transpose, adjoint)
@@ -215,6 +239,17 @@ Random.seed!(1)
             end
         end
 
+        @testset "trace" begin
+            for uplo in (:U, :L)
+                B = Bidiagonal(dv, ev, uplo)
+                if relty <: Integer
+                    @test tr(B) == tr(Matrix(B))
+                else
+                    @test tr(B) ≈ tr(Matrix(B)) rtol=2eps(relty)
+                end
+            end
+        end
+
         Tfull = Array(T)
         @testset "Linear solves" begin
             if relty <: AbstractFloat
@@ -309,31 +344,26 @@ Random.seed!(1)
                 @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
             end
             @testset "Specialized multiplication/division" begin
+                getval(x) = x
+                getval(x::Furlong) = x.val
                 function _bidiagdivmultest(T,
                         x,
                         typemul=T.uplo == 'U' ? UpperTriangular : Matrix,
                         typediv=T.uplo == 'U' ? UpperTriangular : Matrix,
                         typediv2=T.uplo == 'U' ? UpperTriangular : Matrix)
                     TM = Matrix(T)
-                    @test (T*x)::typemul ≈ TM*x #broken=eltype(x) <: Furlong
-                    @test (x*T)::typemul ≈ x*TM #broken=eltype(x) <: Furlong
-                    @test (x\T)::typediv ≈ x\TM #broken=eltype(T) <: Furlong
-                    @test (T/x)::typediv ≈ TM/x #broken=eltype(T) <: Furlong
+                    @test map(getval, (T*x)::typemul) ≈ map(getval, TM*x)
+                    @test map(getval, (x*T)::typemul) ≈ map(getval, x*TM)
+                    @test map(getval, (x\T)::typediv) ≈ map(getval, x\TM)
+                    @test map(getval, (T/x)::typediv) ≈ map(getval, TM/x)
                     if !isa(x, Number)
-                        @test (T\x)::typediv2 ≈ TM\x #broken=eltype(x) <: Furlong
-                        @test (x/T)::typediv2 ≈ x/TM #broken=eltype(x) <: Furlong
+                        @test map(getval, Array((T\x)::typediv2)) ≈ map(getval, Array(TM\x))
+                        @test map(getval, Array((x/T)::typediv2)) ≈ map(getval, Array(x/TM))
                     end
                     return nothing
                 end
-                if relty <: Integer
-                    A = convert(Matrix{elty}, rand(1:10, n, n))
-                    if (elty <: Complex)
-                        A += im*convert(Matrix{elty}, rand(1:10, n, n))
-                    end
-                else
-                    A = rand(elty, n, n)
-                end
-                for t in (T, #=Furlong.(T)=#), (A, dv, ev) in ((A, dv, ev), #=(Furlong.(A), Furlong.(dv), Furlong.(ev))=#)
+                A = Matrix(T)
+                for t in (T, Furlong.(T)), (A, dv, ev) in ((A, dv, ev), (Furlong.(A), Furlong.(dv), Furlong.(ev)))
                     _bidiagdivmultest(t, 5, Bidiagonal, Bidiagonal)
                     _bidiagdivmultest(t, 5I, Bidiagonal, Bidiagonal, t.uplo == 'U' ? UpperTriangular : LowerTriangular)
                     _bidiagdivmultest(t, Diagonal(dv), Bidiagonal, Bidiagonal, t.uplo == 'U' ? UpperTriangular : LowerTriangular)
@@ -356,7 +386,7 @@ Random.seed!(1)
 
         @testset "diag" begin
             @test (@inferred diag(T))::typeof(dv) == dv
-            @test (@inferred diag(T, uplo == :U ? 1 : -1))::typeof(dv) == ev
+            @test (@inferred diag(T, uplo === :U ? 1 : -1))::typeof(dv) == ev
             @test (@inferred diag(T,2))::typeof(dv) == zeros(elty, n-2)
             @test_throws ArgumentError diag(T, -n - 1)
             @test_throws ArgumentError diag(T,  n + 1)
@@ -364,7 +394,7 @@ Random.seed!(1)
             gdv, gev = GenericArray(dv), GenericArray(ev)
             G = Bidiagonal(gdv, gev, uplo)
             @test (@inferred diag(G))::typeof(gdv) == gdv
-            @test (@inferred diag(G, uplo == :U ? 1 : -1))::typeof(gdv) == gev
+            @test (@inferred diag(G, uplo === :U ? 1 : -1))::typeof(gdv) == gev
             @test (@inferred diag(G,2))::typeof(gdv) == GenericArray(zeros(elty, n-2))
         end
 
@@ -372,9 +402,9 @@ Random.seed!(1)
             if relty <: AbstractFloat
                 d1, v1 = eigen(T)
                 d2, v2 = eigen(map(elty<:Complex ? ComplexF64 : Float64,Tfull), sortby=nothing)
-                @test (uplo == :U ? d1 : reverse(d1)) ≈ d2
+                @test (uplo === :U ? d1 : reverse(d1)) ≈ d2
                 if elty <: Real
-                    test_approx_eq_modphase(v1, uplo == :U ? v2 : v2[:,n:-1:1])
+                    test_approx_eq_modphase(v1, uplo === :U ? v2 : v2[:,n:-1:1])
                 end
             end
         end
@@ -409,6 +439,9 @@ Random.seed!(1)
                 for op in (+, -, *)
                     @test Array(op(T, T2)) ≈ op(Tfull, Tfull2)
                 end
+                A = kron(T.dv, T.dv')
+                @test T * A ≈ lmul!(T, copy(A))
+                @test A * T ≈ rmul!(copy(A), T)
             end
             # test pass-through of mul! for SymTridiagonal*Bidiagonal
             TriSym = SymTridiagonal(T.dv, T.ev)
@@ -416,7 +449,8 @@ Random.seed!(1)
             # test pass-through of mul! for AbstractTriangular*Bidiagonal
             Tri = UpperTriangular(diagm(1 => T.ev))
             Dia = Diagonal(T.dv)
-            @test Array(Tri*T) ≈ Array(Tri)*Array(T)
+            @test Array(Tri*T) ≈ Array(Tri)*Array(T) ≈ rmul!(copy(Tri), T)
+            @test Array(T*Tri) ≈ Array(T)*Array(Tri) ≈ lmul!(T, copy(Tri))
             # test mul! itself for these types
             for AA in (Tri, Dia)
                 for f in (identity, transpose, adjoint)
@@ -429,8 +463,10 @@ Random.seed!(1)
             for f in (identity, transpose, adjoint)
                 C = relty == Int ? rand(float(elty), n, n) : rand(elty, n, n)
                 B = rand(elty, n, n)
-                D = copy(C) + 2.0 * Array(T*f(B))
-                mul!(C, T, f(B), 2.0, 1.0) ≈ D
+                D = C + 2.0 * Array(T*f(B))
+                @test mul!(C, T, f(B), 2.0, 1.0) ≈ D
+                @test lmul!(T, copy(f(B))) ≈ T * f(B)
+                @test rmul!(copy(f(B)), T) ≈ f(B) * T
             end
 
             # Issue #31870
@@ -627,14 +663,14 @@ end
 end
 
 @testset "generalized dot" begin
-    for elty in (Float64, ComplexF64)
-        dv = randn(elty, 5)
-        ev = randn(elty, 4)
-        x = randn(elty, 5)
-        y = randn(elty, 5)
+    for elty in (Float64, ComplexF64), n in (5, 1)
+        dv = randn(elty, n)
+        ev = randn(elty, n-1)
+        x = randn(elty, n)
+        y = randn(elty, n)
         for uplo in (:U, :L)
             B = Bidiagonal(dv, ev, uplo)
-            @test dot(x, B, y) ≈ dot(B'x, y) ≈ dot(x, Matrix(B), y)
+            @test dot(x, B, y) ≈ dot(B'x, y) ≈ dot(x, B*y) ≈ dot(x, Matrix(B), y)
         end
         dv = Vector{elty}(undef, 0)
         ev = Vector{elty}(undef, 0)
@@ -642,7 +678,7 @@ end
         y = Vector{elty}(undef, 0)
         for uplo in (:U, :L)
             B = Bidiagonal(dv, ev, uplo)
-            @test dot(x, B, y) ≈ dot(zero(elty), zero(elty), zero(elty))
+            @test dot(x, B, y) === zero(elty)
         end
     end
 end
@@ -782,6 +818,37 @@ end
             @test iszero(BL[i,j])
         end
     end
+
+    M = ones(2,2)
+    for n in 0:1
+        dv = fill(M, n)
+        ev = fill(M, 0)
+        B = Bidiagonal(dv, ev, :U)
+        @test B == Matrix{eltype(B)}(B)
+    end
+end
+
+@testset "copyto! with UniformScaling" begin
+    @testset "Fill" begin
+        for len in (4, InfiniteArrays.Infinity())
+            d = FillArrays.Fill(1, len)
+            ud = FillArrays.Fill(0, len-1)
+            B = Bidiagonal(d, ud, :U)
+            @test copyto!(B, I) === B
+        end
+    end
+    B = Bidiagonal(fill(2, 4), fill(3, 3), :U)
+    copyto!(B, I)
+    @test all(isone, diag(B))
+    @test all(iszero, diag(B, 1))
+end
+
+@testset "diagind" begin
+    B = Bidiagonal(1:4, 1:3, :U)
+    M = Matrix(B)
+    @testset for k in -4:4
+        @test B[diagind(B,k)] == M[diagind(M,k)]
+    end
 end
 
 end # module TestBidiagonal
diff --git a/stdlib/LinearAlgebra/test/blas.jl b/stdlib/LinearAlgebra/test/blas.jl
index 0a2ac87c8026d..dd432952cb56e 100644
--- a/stdlib/LinearAlgebra/test/blas.jl
+++ b/stdlib/LinearAlgebra/test/blas.jl
@@ -4,6 +4,7 @@ module TestBLAS
 
 using Test, LinearAlgebra, Random
 using LinearAlgebra: BlasReal, BlasComplex
+using Libdl: dlsym, dlopen
 fabs(x::Real) = abs(x)
 fabs(x::Complex) = abs(real(x)) + abs(imag(x))
 
@@ -11,16 +12,21 @@ fabs(x::Complex) = abs(real(x)) + abs(imag(x))
 function pack(A, uplo)
     AP = eltype(A)[]
     n = size(A, 1)
-    for j in 1:n, i in (uplo==:L ? (j:n) : (1:j))
+    for j in 1:n, i in (uplo === :L ? (j:n) : (1:j))
         push!(AP, A[i,j])
     end
     return AP
 end
 
 @testset "vec_pointer_stride" begin
-    a = zeros(4,4,4)
-    @test BLAS.asum(view(a,1:2:4,:,:)) == 0 # vector like
+    a = float(rand(1:20,4,4,4))
+    @test BLAS.asum(a) == sum(a) # dense case
+    @test BLAS.asum(view(a,1:2:4,:,:)) == sum(view(a,1:2:4,:,:)) # vector like
+    @test BLAS.asum(view(a,1:3,2:2,3:3)) == sum(view(a,1:3,2:2,3:3))
+    @test BLAS.asum(view(a,1:1,1:3,1:1)) == sum(view(a,1:1,1:3,1:1))
+    @test BLAS.asum(view(a,1:1,1:1,1:3)) == sum(view(a,1:1,1:1,1:3))
     @test_throws ArgumentError BLAS.asum(view(a,1:3:4,:,:)) # non-vector like
+    @test_throws ArgumentError BLAS.asum(view(a,1:2,1:1,1:3))
 end
 Random.seed!(100)
 ## BLAS tests - testing the interface code to BLAS routines
@@ -129,7 +135,7 @@ Random.seed!(100)
             end
         end
 
-        @testset "ger, her, syr" for x in (rand(elty, n), view(rand(elty,2n), 1:2:2n), view(rand(elty,n), n:-1:1)),
+        @testset "ger, geru, her, syr" for x in (rand(elty, n), view(rand(elty,2n), 1:2:2n), view(rand(elty,n), n:-1:1)),
             y in (rand(elty,n), view(rand(elty,3n), 1:3:3n), view(rand(elty,2n), 2n:-2:2))
 
             A = rand(elty,n,n)
@@ -138,6 +144,9 @@ Random.seed!(100)
             @test BLAS.ger!(α,x,y,copy(A)) ≈ A + α*x*y'
             @test_throws DimensionMismatch BLAS.ger!(α,Vector{elty}(undef,n+1),y,copy(A))
 
+            @test BLAS.geru!(α,x,y,copy(A)) ≈ A + α*x*transpose(y)
+            @test_throws DimensionMismatch BLAS.geru!(α,Vector{elty}(undef,n+1),y,copy(A))
+
             A = rand(elty,n,n)
             A = A + transpose(A)
             @test issymmetric(A)
@@ -222,11 +231,19 @@ Random.seed!(100)
                 @test_throws DimensionMismatch BLAS.symm('R','U',Cmn,Cnn)
                 @test_throws DimensionMismatch BLAS.symm!('L','U',one(elty),Asymm,Cnn,one(elty),Cmn)
                 @test_throws DimensionMismatch BLAS.symm!('L','U',one(elty),Asymm,Cnn,one(elty),Cnm)
+                @test_throws DimensionMismatch BLAS.symm!('L','U',one(elty),Asymm,Cmn,one(elty),Cnn)
+                @test_throws DimensionMismatch BLAS.symm!('R','U',one(elty),Asymm,Cnm,one(elty),Cmn)
+                @test_throws DimensionMismatch BLAS.symm!('R','U',one(elty),Asymm,Cnn,one(elty),Cnm)
+                @test_throws DimensionMismatch BLAS.symm!('R','U',one(elty),Asymm,Cmn,one(elty),Cnn)
                 if elty <: BlasComplex
                     @test_throws DimensionMismatch BLAS.hemm('L','U',Cnm,Cnn)
                     @test_throws DimensionMismatch BLAS.hemm('R','U',Cmn,Cnn)
                     @test_throws DimensionMismatch BLAS.hemm!('L','U',one(elty),Aherm,Cnn,one(elty),Cmn)
                     @test_throws DimensionMismatch BLAS.hemm!('L','U',one(elty),Aherm,Cnn,one(elty),Cnm)
+                    @test_throws DimensionMismatch BLAS.hemm!('L','U',one(elty),Aherm,Cmn,one(elty),Cnn)
+                    @test_throws DimensionMismatch BLAS.hemm!('R','U',one(elty),Aherm,Cnm,one(elty),Cmn)
+                    @test_throws DimensionMismatch BLAS.hemm!('R','U',one(elty),Aherm,Cnn,one(elty),Cnm)
+                    @test_throws DimensionMismatch BLAS.hemm!('R','U',one(elty),Aherm,Cmn,one(elty),Cnn)
                 end
             end
         end
@@ -433,6 +450,40 @@ Random.seed!(100)
             end
         end
     end
+    @testset "gemmt" begin
+        for (wrapper, uplo) in ((LowerTriangular, 'L'), (UpperTriangular, 'U'))
+            @test wrapper(BLAS.gemmt(uplo, 'N', 'N', I4, I4)) ≈ wrapper(I4)
+            @test wrapper(BLAS.gemmt(uplo, 'N', 'T', I4, I4)) ≈ wrapper(I4)
+            @test wrapper(BLAS.gemmt(uplo, 'T', 'N', I4, I4)) ≈ wrapper(I4)
+            @test wrapper(BLAS.gemmt(uplo, 'T', 'T', I4, I4)) ≈ wrapper(I4)
+            @test wrapper(BLAS.gemmt(uplo, 'N', 'N', el2, I4, I4)) ≈ wrapper(el2 * I4)
+            @test wrapper(BLAS.gemmt(uplo, 'N', 'T', el2, I4, I4)) ≈ wrapper(el2 * I4)
+            @test wrapper(BLAS.gemmt(uplo, 'T', 'N', el2, I4, I4)) ≈ wrapper(el2 * I4)
+            @test wrapper(BLAS.gemmt(uplo, 'T', 'T', el2, I4, I4)) ≈ wrapper(el2 * I4)
+            I4cp = copy(I4)
+            @test wrapper(BLAS.gemmt!(uplo, 'N', 'N', one(elty), I4, I4, elm1, I4cp)) ≈ wrapper(Z4)
+            @test I4cp ≈ Z4
+            I4cp[:] = I4
+            @test wrapper(BLAS.gemmt!(uplo, 'N', 'T', one(elty), I4, I4, elm1, I4cp)) ≈ wrapper(Z4)
+            @test I4cp ≈ Z4
+            I4cp[:] = I4
+            @test wrapper(BLAS.gemmt!(uplo, 'T', 'N', one(elty), I4, I4, elm1, I4cp)) ≈ wrapper(Z4)
+            @test I4cp ≈ Z4
+            I4cp[:] = I4
+            @test wrapper(BLAS.gemmt!(uplo, 'T', 'T', one(elty), I4, I4, elm1, I4cp)) ≈ wrapper(Z4)
+            @test I4cp ≈ Z4
+            M1 = uplo == 'U' ? U4 : I4
+            @test wrapper(BLAS.gemmt(uplo, 'N', 'N', I4, U4)) ≈ wrapper(M1)
+            M2 = uplo == 'U' ? I4 : U4'
+            @test wrapper(BLAS.gemmt(uplo, 'N', 'T', I4, U4)) ≈ wrapper(M2)
+            @test_throws DimensionMismatch BLAS.gemmt!(uplo, 'N', 'N', one(elty), I43, I4, elm1, I43)
+            @test_throws DimensionMismatch BLAS.gemmt!(uplo, 'N', 'N', one(elty), I4, I4, elm1, Matrix{elty}(I, 5, 5))
+            @test_throws DimensionMismatch BLAS.gemmt!(uplo, 'N', 'N', one(elty), I43, I4, elm1, I4)
+            @test_throws DimensionMismatch BLAS.gemmt!(uplo, 'T', 'N', one(elty), I4, I43, elm1, I43)
+            @test_throws DimensionMismatch BLAS.gemmt!(uplo, 'N', 'T', one(elty), I43, I43, elm1, I43)
+            @test_throws DimensionMismatch BLAS.gemmt!(uplo, 'T', 'T', one(elty), I43, I43, elm1, Matrix{elty}(I, 3, 4))
+        end
+    end
     @testset "gemm" begin
         @test all(BLAS.gemm('N', 'N', I4, I4) .== I4)
         @test all(BLAS.gemm('N', 'T', I4, I4) .== I4)
@@ -441,7 +492,7 @@ Random.seed!(100)
         @test all(BLAS.gemm('N', 'N', el2, I4, I4) .== el2 * I4)
         @test all(BLAS.gemm('N', 'T', el2, I4, I4) .== el2 * I4)
         @test all(BLAS.gemm('T', 'N', el2, I4, I4) .== el2 * I4)
-        @test all(LinearAlgebra.BLAS.gemm('T', 'T', el2, I4, I4) .== el2 * I4)
+        @test all(BLAS.gemm('T', 'T', el2, I4, I4) .== el2 * I4)
         I4cp = copy(I4)
         @test all(BLAS.gemm!('N', 'N', one(elty), I4, I4, elm1, I4cp) .== Z4)
         @test all(I4cp .== Z4)
@@ -518,7 +569,7 @@ Base.getindex(A::WrappedArray, i::Int) = A.A[i]
 Base.getindex(A::WrappedArray{T, N}, I::Vararg{Int, N}) where {T, N} = A.A[I...]
 Base.setindex!(A::WrappedArray, v, i::Int) = setindex!(A.A, v, i)
 Base.setindex!(A::WrappedArray{T, N}, v, I::Vararg{Int, N}) where {T, N} = setindex!(A.A, v, I...)
-Base.unsafe_convert(::Type{Ptr{T}}, A::WrappedArray{T}) where T = Base.unsafe_convert(Ptr{T}, A.A)
+Base.cconvert(::Type{Ptr{T}}, A::WrappedArray{T}) where T = Base.cconvert(Ptr{T}, A.A)
 
 Base.strides(A::WrappedArray) = strides(A.A)
 Base.elsize(::Type{WrappedArray{T,N}}) where {T,N} = Base.elsize(Array{T,N})
@@ -689,7 +740,7 @@ end
     end
     @test BLAS.iamax(a) == 0
     @test_throws "dest" BLAS.scal!(b[1], a)
-    @testset "nrm2/asum" begin # OpenBLAS allways return 0.0
+    @testset "nrm2/asum" begin # OpenBLAS always return 0.0
         @test_throws "input" BLAS.nrm2(a)
         @test_throws "input" BLAS.asum(a)
     end
@@ -700,4 +751,11 @@ end
     end
 end
 
+# Make sure we can use `Base.libblas_name`.  Avoid causing
+# https://github.com/JuliaLang/julia/issues/48427 again.
+@testset "libblas_name" begin
+    dot_sym = dlsym(dlopen(Base.libblas_name), "cblas_ddot" * (Sys.WORD_SIZE == 64 ? "64_" : ""))
+    @test 23.0 === @ccall $(dot_sym)(2::Int, [2.0, 3.0]::Ref{Cdouble}, 1::Int, [4.0, 5.0]::Ref{Cdouble}, 1::Int)::Cdouble
+end
+
 end # module TestBLAS
diff --git a/stdlib/LinearAlgebra/test/bunchkaufman.jl b/stdlib/LinearAlgebra/test/bunchkaufman.jl
index f1da22d8733e2..613e4d09a3cc6 100644
--- a/stdlib/LinearAlgebra/test/bunchkaufman.jl
+++ b/stdlib/LinearAlgebra/test/bunchkaufman.jl
@@ -70,10 +70,10 @@ bimg  = randn(n,2)/2
                 @test getproperty(bc1, uplo)*bc1.D*transpose(getproperty(bc1, uplo)) ≈ asym[bc1.p, bc1.p]
                 @test getproperty(bc1, uplo)*bc1.D*transpose(getproperty(bc1, uplo)) ≈ bc1.P*asym*transpose(bc1.P)
                 @test_throws ErrorException bc1.Z
-                @test_throws ArgumentError uplo == :L ? bc1.U : bc1.L
+                @test_throws ArgumentError uplo === :L ? bc1.U : bc1.L
             end
             # test Base.iterate
-            ref_objs = (bc1.D, uplo == :L ? bc1.L : bc1.U, bc1.p)
+            ref_objs = (bc1.D, uplo === :L ? bc1.L : bc1.U, bc1.p)
             for (bki, bkobj) in enumerate(bc1)
                 @test bkobj == ref_objs[bki]
             end
@@ -162,7 +162,7 @@ end
         @test B.D == Tridiagonal([], [], [])
         @test B.P == ones(0, 0)
         @test B.p == []
-        if ul == :U
+        if ul === :U
             @test B.U == UnitUpperTriangular(ones(0, 0))
             @test_throws ArgumentError B.L
         else
@@ -190,4 +190,10 @@ end
     @test_throws ArgumentError("adjoint not implemented for complex symmetric matrices") F'
 end
 
+@testset "BunchKaufman for AbstractMatrix" begin
+    S = SymTridiagonal(fill(2.0, 4), ones(3))
+    B = bunchkaufman(S)
+    @test B.U * B.D * B.U' ≈ S
+end
+
 end # module TestBunchKaufman
diff --git a/stdlib/LinearAlgebra/test/cholesky.jl b/stdlib/LinearAlgebra/test/cholesky.jl
index 8e6cac65f7dfb..a795eb8d44a03 100644
--- a/stdlib/LinearAlgebra/test/cholesky.jl
+++ b/stdlib/LinearAlgebra/test/cholesky.jl
@@ -260,11 +260,12 @@ end
     end
 end
 
-@testset "behavior for non-positive definite matrices" for T in (Float64, ComplexF64)
+@testset "behavior for non-positive definite matrices" for T in (Float64, ComplexF64, BigFloat)
     A = T[1 2; 2 1]
     B = T[1 2; 0 1]
+    C = T[2 0; 0 0]
     # check = (true|false)
-    for M in (A, Hermitian(A), B)
+    for M in (A, Hermitian(A), B, C)
         @test_throws PosDefException cholesky(M)
         @test_throws PosDefException cholesky!(copy(M))
         @test_throws PosDefException cholesky(M; check = true)
@@ -272,17 +273,19 @@ end
         @test !LinearAlgebra.issuccess(cholesky(M; check = false))
         @test !LinearAlgebra.issuccess(cholesky!(copy(M); check = false))
     end
-    for M in (A, Hermitian(A), B)
-        @test_throws RankDeficientException cholesky(M, RowMaximum())
-        @test_throws RankDeficientException cholesky!(copy(M), RowMaximum())
-        @test_throws RankDeficientException cholesky(M, RowMaximum(); check = true)
-        @test_throws RankDeficientException cholesky!(copy(M), RowMaximum(); check = true)
-        @test !LinearAlgebra.issuccess(cholesky(M, RowMaximum(); check = false))
-        @test !LinearAlgebra.issuccess(cholesky!(copy(M), RowMaximum(); check = false))
-        C = cholesky(M, RowMaximum(); check = false)
-        @test_throws RankDeficientException chkfullrank(C)
-        C = cholesky!(copy(M), RowMaximum(); check = false)
-        @test_throws RankDeficientException chkfullrank(C)
+    if T !== BigFloat # generic pivoted cholesky is not implemented
+        for M in (A, Hermitian(A), B)
+            @test_throws RankDeficientException cholesky(M, RowMaximum())
+            @test_throws RankDeficientException cholesky!(copy(M), RowMaximum())
+            @test_throws RankDeficientException cholesky(M, RowMaximum(); check = true)
+            @test_throws RankDeficientException cholesky!(copy(M), RowMaximum(); check = true)
+            @test !LinearAlgebra.issuccess(cholesky(M, RowMaximum(); check = false))
+            @test !LinearAlgebra.issuccess(cholesky!(copy(M), RowMaximum(); check = false))
+            C = cholesky(M, RowMaximum(); check = false)
+            @test_throws RankDeficientException chkfullrank(C)
+            C = cholesky!(copy(M), RowMaximum(); check = false)
+            @test_throws RankDeficientException chkfullrank(C)
+        end
     end
     @test !isposdef(A)
     str = sprint((io, x) -> show(io, "text/plain", x), cholesky(A; check = false))
@@ -303,6 +306,7 @@ end
     v = rand(5)
     @test cholesky(Diagonal(v)) \ B ≈ Diagonal(v) \ B
     @test B / cholesky(Diagonal(v)) ≈ B / Diagonal(v)
+    @test inv(cholesky(Diagonal(v)))::Diagonal ≈ Diagonal(1 ./ v)
 end
 
 struct WrappedVector{T} <: AbstractVector{T}
@@ -389,9 +393,9 @@ end
 
     # complex
     D = complex(D)
-    CD = cholesky(D)
-    CM = cholesky(Matrix(D))
-    @test CD isa Cholesky{ComplexF64}
+    CD = cholesky(Hermitian(D))
+    CM = cholesky(Matrix(Hermitian(D)))
+    @test CD isa Cholesky{ComplexF64,<:Diagonal}
     @test CD.U ≈ Diagonal(.√d) ≈ CM.U
     @test D ≈ CD.L * CD.U
     @test CD.info == 0
@@ -406,6 +410,12 @@ end
     @test_throws InexactError cholesky!(Diagonal([2, 1]))
 end
 
+@testset "Cholesky for AbstractMatrix" begin
+    S = SymTridiagonal(fill(2.0, 4), ones(3))
+    C = cholesky(S)
+    @test C.L * C.U ≈ S
+end
+
 @testset "constructor with non-BlasInt arguments" begin
 
     x = rand(5,5)
diff --git a/stdlib/LinearAlgebra/test/dense.jl b/stdlib/LinearAlgebra/test/dense.jl
index a7b31dcc50611..66670f579aa1f 100644
--- a/stdlib/LinearAlgebra/test/dense.jl
+++ b/stdlib/LinearAlgebra/test/dense.jl
@@ -25,7 +25,7 @@ Random.seed!(1234323)
             ainv = inv(a)
             @test cond(a, 1)   == opnorm(a, 1)  *opnorm(ainv, 1)
             @test cond(a, Inf) == opnorm(a, Inf)*opnorm(ainv, Inf)
-            @test cond(a[:, 1:5]) == (/)(reverse(extrema(svdvals(a[:, 1:5])))...)
+            @test cond(a[:, 1:5]) == (\)(extrema(svdvals(a[:, 1:5]))...)
             @test_throws ArgumentError cond(a,3)
         end
     end
@@ -132,8 +132,20 @@ bimg  = randn(n,2)/2
         @testset "Lyapunov/Sylvester" begin
             x = lyap(a, a2)
             @test -a2 ≈ a*x + x*a'
+            y = lyap(a', a2')
+            @test y ≈ lyap(Array(a'), Array(a2'))
+            @test -a2' ≈ a'y + y*a
+            z = lyap(Tridiagonal(a)', Diagonal(a2))
+            @test z ≈ lyap(Array(Tridiagonal(a)'), Array(Diagonal(a2)))
+            @test -Diagonal(a2) ≈ Tridiagonal(a)'*z + z*Tridiagonal(a)
             x2 = sylvester(a[1:3, 1:3], a[4:n, 4:n], a2[1:3,4:n])
             @test -a2[1:3, 4:n] ≈ a[1:3, 1:3]*x2 + x2*a[4:n, 4:n]
+            y2 = sylvester(a[1:3, 1:3]', a[4:n, 4:n]', a2[4:n,1:3]')
+            @test y2 ≈ sylvester(Array(a[1:3, 1:3]'), Array(a[4:n, 4:n]'), Array(a2[4:n,1:3]'))
+            @test -a2[4:n, 1:3]' ≈ a[1:3, 1:3]'*y2 + y2*a[4:n, 4:n]'
+            z2 = sylvester(Tridiagonal(a[1:3, 1:3]), Diagonal(a[4:n, 4:n]), a2[1:3,4:n])
+            @test z2 ≈ sylvester(Array(Tridiagonal(a[1:3, 1:3])), Array(Diagonal(a[4:n, 4:n])), Array(a2[1:3,4:n]))
+            @test -a2[1:3, 4:n] ≈ Tridiagonal(a[1:3, 1:3])*z2 + z2*Diagonal(a[4:n, 4:n])
         end
 
         @testset "Matrix square root" begin
@@ -226,6 +238,15 @@ end
     @test pinv(M,rtol=0.5)== M
 end
 
+@testset "Test inv of matrix of NaNs" begin
+    for eltya in (NaN16, NaN32, NaN32)
+        r = fill(eltya, 2, 2)
+        @test_throws ArgumentError inv(r)
+        c = fill(complex(eltya, eltya), 2, 2)
+        @test_throws ArgumentError inv(c)
+    end
+end
+
 @testset "test out of bounds triu/tril" begin
     local m, n = 5, 7
     ainit = rand(m, n)
@@ -857,7 +878,7 @@ end
     end
 end
 
-@testset "matrix logarithm is type-inferrable" for elty in (Float32,Float64,ComplexF32,ComplexF64)
+@testset "matrix logarithm is type-inferable" for elty in (Float32,Float64,ComplexF32,ComplexF64)
     A1 = randn(elty, 4, 4)
     @inferred Union{Matrix{elty},Matrix{complex(elty)}} log(A1)
 end
@@ -1007,8 +1028,8 @@ end
     @test lyap(1.0+2.0im, 3.0+4.0im) == -1.5 - 2.0im
 end
 
-@testset "Matrix to real power" for elty in (Float64, ComplexF64)
-# Tests proposed at Higham, Deadman: Testing Matrix Function Algorithms Using Identities, March 2014
+@testset "$elty Matrix to real power" for elty in (Float64, ComplexF64)
+    # Tests proposed at Higham, Deadman: Testing Matrix Function Algorithms Using Identities, March 2014
     #Aa : only positive real eigenvalues
     Aa = convert(Matrix{elty}, [5 4 2 1; 0 1 -1 -1; -1 -1 3 0; 1 1 -1 2])
 
@@ -1044,6 +1065,9 @@ end
         @test (A^(2/3))*(A^(1/3)) ≈ A
         @test (A^im)^(-im) ≈ A
     end
+
+    Tschurpow = Union{Matrix{real(elty)}, Matrix{complex(elty)}}
+    @test (@inferred Tschurpow LinearAlgebra.schurpow(Aa, 2.0)) ≈ Aa^2
 end
 
 @testset "diagonal integer matrix to real power" begin
@@ -1108,12 +1132,12 @@ end
 end
 
 function test_rdiv_pinv_consistency(a, b)
-    @test a*(b/b) ≈ (a*b)*pinv(b) ≈ a*(b*pinv(b))
-    @test typeof(a*(b/b)) == typeof((a*b)*pinv(b)) == typeof(a*(b*pinv(b)))
+    @test (a*b)/b ≈ a*(b/b) ≈ (a*b)*pinv(b) ≈ a*(b*pinv(b))
+    @test typeof((a*b)/b) == typeof(a*(b/b)) == typeof((a*b)*pinv(b)) == typeof(a*(b*pinv(b)))
 end
 function test_ldiv_pinv_consistency(a, b)
-    @test (a\a)*b ≈ (pinv(a)*a)*b ≈ pinv(a)*(a*b)
-    @test typeof((a\a)*b) == typeof((pinv(a)*a)*b) == typeof(pinv(a)*(a*b))
+    @test a\(a*b) ≈ (a\a)*b ≈ (pinv(a)*a)*b ≈ pinv(a)*(a*b)
+    @test typeof(a\(a*b)) == typeof((a\a)*b) == typeof((pinv(a)*a)*b) == typeof(pinv(a)*(a*b))
 end
 function test_div_pinv_consistency(a, b)
     test_rdiv_pinv_consistency(a, b)
@@ -1192,6 +1216,11 @@ end
     @test exp(log(A2)) ≈ A2
 end
 
+@testset "sqrt of empty Matrix of type $T" for T in [Int,Float32,Float64,ComplexF32,ComplexF64]
+    @test sqrt(Matrix{T}(undef, 0, 0)) == Matrix{T}(undef, 0, 0)
+    @test_throws DimensionMismatch sqrt(Matrix{T}(undef, 0, 3))
+end
+
 struct TypeWithoutZero end
 Base.zero(::Type{TypeWithoutZero}) = TypeWithZero()
 struct TypeWithZero end
@@ -1203,4 +1232,52 @@ Base.:+(x::TypeWithZero, ::TypeWithoutZero) = x
     @test diagm(0 => [TypeWithoutZero()]) isa Matrix{TypeWithZero}
 end
 
+@testset "cbrt(A::AbstractMatrix{T})" begin
+    N = 10
+
+    # Non-square
+    A = randn(N,N+2)
+    @test_throws DimensionMismatch cbrt(A)
+
+    # Real valued diagonal
+    D = Diagonal(randn(N))
+    T = cbrt(D)
+    @test T*T*T ≈ D
+    @test eltype(D) == eltype(T)
+    # Real valued triangular
+    U = UpperTriangular(randn(N,N))
+    T = cbrt(U)
+    @test T*T*T ≈ U
+    @test eltype(U) == eltype(T)
+    L = LowerTriangular(randn(N,N))
+    T = cbrt(L)
+    @test T*T*T ≈ L
+    @test eltype(L) == eltype(T)
+    # Real valued symmetric
+    S =  (A -> (A+A')/2)(randn(N,N))
+    T = cbrt(Symmetric(S,:U))
+    @test T*T*T ≈ S
+    @test eltype(S) == eltype(T)
+    # Real valued symmetric
+    S =  (A -> (A+A')/2)(randn(N,N))
+    T = cbrt(Symmetric(S,:L))
+    @test T*T*T ≈ S
+    @test eltype(S) == eltype(T)
+    # Real valued Hermitian
+    S =  (A -> (A+A')/2)(randn(N,N))
+    T = cbrt(Hermitian(S,:U))
+    @test T*T*T ≈ S
+    @test eltype(S) == eltype(T)
+    # Real valued Hermitian
+    S =  (A -> (A+A')/2)(randn(N,N))
+    T = cbrt(Hermitian(S,:L))
+    @test T*T*T ≈ S
+    @test eltype(S) == eltype(T)
+    # Real valued arbitrary
+    A = randn(N,N)
+    T = cbrt(A)
+    @test T*T*T ≈ A
+    @test eltype(A) == eltype(T)
+end
+
 end # module TestDense
diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl
index b8186f3b33150..3de868a847b7c 100644
--- a/stdlib/LinearAlgebra/test/diagonal.jl
+++ b/stdlib/LinearAlgebra/test/diagonal.jl
@@ -12,7 +12,16 @@ using .Main.Furlongs
 isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
 using .Main.OffsetArrays
 
-n=12 #Size of matrix problem to test
+isdefined(Main, :InfiniteArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "InfiniteArrays.jl"))
+using .Main.InfiniteArrays
+
+isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
+using .Main.FillArrays
+
+isdefined(Main, :SizedArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "SizedArrays.jl"))
+using .Main.SizedArrays
+
+const n=12 # Size of matrix problem to test
 Random.seed!(1)
 
 @testset for relty in (Float32, Float64, BigFloat), elty in (relty, Complex{relty})
@@ -37,15 +46,20 @@ Random.seed!(1)
         end
         @test eltype(Diagonal{elty}([1,2,3,4])) == elty
         @test isa(Diagonal{elty,Vector{elty}}(GenericArray([1,2,3,4])), Diagonal{elty,Vector{elty}})
+        @test isa(Diagonal{elty}(rand(Int,n,n)), Diagonal{elty,Vector{elty}})
         DI = Diagonal([1,2,3,4])
         @test Diagonal(DI) === DI
         @test isa(Diagonal{elty}(DI), Diagonal{elty})
         # issue #26178
-        @test_throws MethodError convert(Diagonal, [1, 2, 3, 4])
+        @test_throws MethodError convert(Diagonal, [1,2,3,4])
+        @test_throws DimensionMismatch convert(Diagonal, [1 2 3 4])
+        @test_throws InexactError convert(Diagonal, ones(2,2))
     end
 
     @testset "Basic properties" begin
-        @test_throws ArgumentError size(D,0)
+        @test_throws BoundsError size(D,0)
+        @test size(D,1) == size(D,2) == length(dd)
+        @test size(D,3) == 1
         @test typeof(convert(Diagonal{ComplexF32},D)) <: Diagonal{ComplexF32}
         @test typeof(convert(AbstractMatrix{ComplexF32},D)) <: Diagonal{ComplexF32}
 
@@ -69,6 +83,9 @@ Random.seed!(1)
         @test !istril(D, -1)
         @test istril(D, 1)
         @test istril(Diagonal(zero(diag(D))), -1)
+        @test Base.isstored(D,1,1)
+        @test !Base.isstored(D,1,2)
+        @test_throws BoundsError Base.isstored(D, n + 1, 1)
         if elty <: Real
             @test ishermitian(D)
         end
@@ -94,6 +111,12 @@ Random.seed!(1)
         for func in (det, tr)
             @test func(D) ≈ func(DM) atol=n^2*eps(relty)*(1+(elty<:Complex))
         end
+
+        if eltype(D) <: Real
+            @test minimum(D) ≈ minimum(DM)
+            @test maximum(D) ≈ maximum(DM)
+        end
+
         if relty <: BlasFloat
             for func in (exp, cis, sinh, cosh, tanh, sech, csch, coth)
                 @test func(D) ≈ func(DM) atol=n^3*eps(relty)
@@ -372,9 +395,17 @@ Random.seed!(1)
 
     @testset "conj and transpose" begin
         @test transpose(D) == D
-        if elty <: BlasComplex
+        if elty <: Real
+            @test transpose(D) === D
+            @test adjoint(D) === D
+        elseif elty <: BlasComplex
             @test Array(conj(D)) ≈ conj(DM)
             @test adjoint(D) == conj(D)
+            local D2 = copy(D)
+            local D2adj = adjoint(D2)
+            D2adj[1,1] = rand(eltype(D2adj))
+            @test D2[1,1] == adjoint(D2adj[1,1])
+            @test D2adj' === D2
         end
         # Translates to Ac/t_mul_B, which is specialized after issue 21286
         @test(D' * vv == conj(D) * vv)
@@ -440,6 +471,12 @@ Random.seed!(1)
     end
 end
 
+@testset "axes" begin
+    v = OffsetArray(1:3)
+    D = Diagonal(v)
+    @test axes(D) isa NTuple{2,typeof(axes(v,1))}
+end
+
 @testset "rdiv! (#40887)" begin
     @test rdiv!(Matrix(Diagonal([2.0, 3.0])), Diagonal(2:3)) == Diagonal([1.0, 1.0])
     @test rdiv!(fill(3.0, 3, 3), 3.0I(3)) == ones(3,3)
@@ -462,6 +499,43 @@ end
     @test kron(Ad, Ad).diag == kron([1, 2, 3], [1, 2, 3])
 end
 
+# Define a vector type that does not support `deleteat!`, to ensure that `kron` handles this
+struct SimpleVector{T} <: AbstractVector{T}
+    vec::Vector{T}
+end
+SimpleVector(x::SimpleVector) = SimpleVector(Vector(x.vec))
+SimpleVector{T}(::UndefInitializer, n::Integer) where {T} = SimpleVector(Vector{T}(undef, n))
+Base.:(==)(x::SimpleVector, y::SimpleVector) = x == y
+Base.axes(x::SimpleVector) = axes(x.vec)
+Base.convert(::Type{Vector{T}}, x::SimpleVector) where {T} = convert(Vector{T}, x.vec)
+Base.convert(::Type{Vector}, x::SimpleVector{T}) where {T} = convert(Vector{T}, x)
+Base.convert(::Type{Array{T}}, x::SimpleVector) where {T} = convert(Vector{T}, x)
+Base.convert(::Type{Array}, x::SimpleVector) = convert(Vector, x)
+Base.copyto!(x::SimpleVector, y::SimpleVector) = (copyto!(x.vec, y.vec); x)
+Base.eltype(::Type{SimpleVector{T}}) where {T} = T
+Base.getindex(x::SimpleVector, ind...) = getindex(x.vec, ind...)
+Base.kron(x::SimpleVector, y::SimpleVector) = SimpleVector(kron(x.vec, y.vec))
+Base.promote_rule(::Type{<:AbstractVector{T}}, ::Type{SimpleVector{U}}) where {T,U} = Vector{promote_type(T, U)}
+Base.promote_rule(::Type{SimpleVector{T}}, ::Type{SimpleVector{U}}) where {T,U} = SimpleVector{promote_type(T, U)}
+Base.setindex!(x::SimpleVector, val, ind...) = (setindex!(x.vec, val, ind...), x)
+Base.similar(x::SimpleVector, ::Type{T}) where {T} = SimpleVector(similar(x.vec, T))
+Base.similar(x::SimpleVector, ::Type{T}, dims::Dims{1}) where {T} = SimpleVector(similar(x.vec, T, dims))
+Base.size(x::SimpleVector) = size(x.vec)
+
+@testset "kron (issue #46456)" for repr in Any[identity, SimpleVector]
+    A = Diagonal(repr(randn(10)))
+    BL = Bidiagonal(repr(randn(10)), repr(randn(9)), :L)
+    BU = Bidiagonal(repr(randn(10)), repr(randn(9)), :U)
+    C = SymTridiagonal(repr(randn(10)), repr(randn(9)))
+    Cl = SymTridiagonal(repr(randn(10)), repr(randn(10)))
+    D = Tridiagonal(repr(randn(9)), repr(randn(10)), repr(randn(9)))
+    @test kron(A, BL)::Bidiagonal == kron(Array(A), Array(BL))
+    @test kron(A, BU)::Bidiagonal == kron(Array(A), Array(BU))
+    @test kron(A, C)::SymTridiagonal == kron(Array(A), Array(C))
+    @test kron(A, Cl)::SymTridiagonal == kron(Array(A), Array(Cl))
+    @test kron(A, D)::Tridiagonal == kron(Array(A), Array(D))
+end
+
 @testset "svdvals and eigvals (#11120/#11247)" begin
     D = Diagonal(Matrix{Float64}[randn(3,3), randn(2,2)])
     @test sort([svdvals(D)...;], rev = true) ≈ svdvals([D.diag[1] zeros(3,2); zeros(2,3) D.diag[2]])
@@ -529,7 +603,7 @@ end
 end
 
 @testset "inverse" begin
-    for d in (randn(n), [1, 2, 3], [1im, 2im, 3im])
+    for d in Any[randn(n), Int[], [1, 2, 3], [1im, 2im, 3im], [1//1, 2//1, 3//1], [1+1im//1, 2//1, 3im//1]]
         D = Diagonal(d)
         @test inv(D) ≈ inv(Array(D))
     end
@@ -538,6 +612,14 @@ end
     @test_throws SingularException inv(Diagonal([0im, 1im, 2im]))
 end
 
+@testset "pseudoinverse" begin
+    for d in Any[randn(n), zeros(n), Int[], [0, 2, 0.003], [0im, 1+2im, 0.003im], [0//1, 2//1, 3//100], [0//1, 1//1+2im, 3im//100]]
+        D = Diagonal(d)
+        @test pinv(D) ≈ pinv(Array(D))
+        @test pinv(D, 1.0e-2) ≈ pinv(Array(D), 1.0e-2)
+    end
+end
+
 # allow construct from range
 @test all(Diagonal(range(1, stop=3, length=3)) .== Diagonal([1.0,2.0,3.0]))
 
@@ -645,6 +727,16 @@ end
     @test D2 == D * D
 end
 
+@testset "multiplication of 2 Diagonal and a Matrix (#46400)" begin
+    A = randn(10, 10)
+    D = Diagonal(randn(10))
+    D2 = Diagonal(randn(10))
+    @test D * A * D2 ≈ D * (A * D2)
+    @test D * A * D2 ≈ (D * A) * D2
+    @test_throws DimensionMismatch Diagonal(ones(9)) * A * D2
+    @test_throws DimensionMismatch D * A * Diagonal(ones(9))
+end
+
 @testset "multiplication of QR Q-factor and Diagonal (#16615 spot test)" begin
     D = Diagonal(randn(5))
     Q = qr(randn(5, 5)).Q
@@ -683,6 +775,39 @@ end
 
     @test tr(D) == 10
     @test det(D) == 4
+
+    M = [1 2; 3 4]
+    for n in 0:1
+        D = Diagonal(fill(M, n))
+        @test D == Matrix{eltype(D)}(D)
+    end
+
+    S = SizedArray{(2,3)}(reshape([1:6;],2,3))
+    D = Diagonal(fill(S,3))
+    @test D * fill(S,2,3)' == fill(S * S', 3, 2)
+    @test fill(S,3,2)' * D == fill(S' * S, 2, 3)
+end
+
+@testset "Eigensystem for block diagonal (issue #30681)" begin
+    I2 = Matrix(I, 2,2)
+    D = Diagonal([2.0*I2, 3.0*I2])
+    eigD = eigen(D)
+    evals = [ 2.0, 2.0, 3.0, 3.0 ]
+    evecs = [ [[ 1.0, 0.0 ]]  [[ 0.0, 1.0 ]]  [[ 0.0, 0.0 ]]  [[ 0.0, 0.0 ]];
+              [[ 0.0, 0.0 ]]  [[ 0.0, 0.0 ]]  [[ 1.0, 0.0 ]]  [[ 0.0, 1.0 ]] ]
+    @test eigD.values == evals
+    @test eigD.vectors == evecs
+    @test D * eigD.vectors ≈ eigD.vectors * Diagonal(eigD.values)
+
+    I3 = Matrix(I, 3,3)
+    D = Diagonal([[0.0 -1.0; 1.0 0.0], 2.0*I3])
+    eigD = eigen(D)
+    evals = [ -1.0im, 1.0im, 2.0, 2.0, 2.0 ]
+    evecs = [ [[ 1/sqrt(2)+0im, 1/sqrt(2)*im ]]  [[ 1/sqrt(2)+0im, -1/sqrt(2)*im ]]  [[ 0.0, 0.0 ]]       [[ 0.0, 0.0 ]]      [[ 0.0, 0.0]];
+              [[ 0.0, 0.0, 0.0 ]]                [[ 0.0, 0.0, 0.0 ]]                 [[ 1.0, 0.0, 0.0 ]]  [[ 0.0, 1.0, 0.0 ]] [[ 0.0, 0.0, 1.0]] ]
+    @test eigD.values == evals
+    @test eigD.vectors ≈ evecs
+    @test D * eigD.vectors ≈ eigD.vectors * Diagonal(eigD.values)
 end
 
 @testset "linear solve for block diagonal matrices" begin
@@ -804,8 +929,8 @@ end
         U = UpperTriangular(randn(elty, K, K))
         L = LowerTriangular(randn(elty, K, K))
         D = Diagonal(randn(elty, K))
-        @test (U / D)::UpperTriangular{elty} ≈ UpperTriangular(Matrix(U) / Matrix(D)) rtol=2eps(real(elty))
-        @test (L / D)::LowerTriangular{elty} ≈ LowerTriangular(Matrix(L) / Matrix(D)) rtol=2eps(real(elty))
+        @test (U / D)::UpperTriangular{elty} == UpperTriangular(Matrix(U) / Matrix(D))
+        @test (L / D)::LowerTriangular{elty} == LowerTriangular(Matrix(L) / Matrix(D))
         @test (D \ U)::UpperTriangular{elty} == UpperTriangular(Matrix(D) \ Matrix(U))
         @test (D \ L)::LowerTriangular{elty} == LowerTriangular(Matrix(D) \ Matrix(L))
     end
@@ -819,8 +944,8 @@ end
         D0 = Diagonal(zeros(elty, K))
         @test (D \ S)::Tridiagonal{elty} == Tridiagonal(Matrix(D) \ Matrix(S))
         @test (D \ T)::Tridiagonal{elty} == Tridiagonal(Matrix(D) \ Matrix(T))
-        @test (S / D)::Tridiagonal{elty} ≈ Tridiagonal(Matrix(S) / Matrix(D)) rtol=2eps(real(elty))
-        @test (T / D)::Tridiagonal{elty} ≈ Tridiagonal(Matrix(T) / Matrix(D)) rtol=2eps(real(elty))
+        @test (S / D)::Tridiagonal{elty} == Tridiagonal(Matrix(S) / Matrix(D))
+        @test (T / D)::Tridiagonal{elty} == Tridiagonal(Matrix(T) / Matrix(D))
         @test_throws SingularException D0 \ S
         @test_throws SingularException D0 \ T
         @test_throws SingularException S / D0
@@ -864,8 +989,8 @@ end
     D = Diagonal(rand(1:20, K))
     @test (D \ S)::Tridiagonal{Float64} == Tridiagonal(Matrix(D) \ Matrix(S))
     @test (D \ T)::Tridiagonal{Float64} == Tridiagonal(Matrix(D) \ Matrix(T))
-    @test (S / D)::Tridiagonal{Float64} ≈ Tridiagonal(Matrix(S) / Matrix(D)) rtol=2eps()
-    @test (T / D)::Tridiagonal{Float64} ≈ Tridiagonal(Matrix(T) / Matrix(D)) rtol=2eps()
+    @test (S / D)::Tridiagonal{Float64} == Tridiagonal(Matrix(S) / Matrix(D))
+    @test (T / D)::Tridiagonal{Float64} == Tridiagonal(Matrix(T) / Matrix(D))
 end
 
 @testset "eigenvalue sorting" begin
@@ -922,10 +1047,14 @@ end
     @test s1 == prod(sign, d)
 end
 
-@testset "Empty (#35424)" begin
+@testset "Empty (#35424) & size checks (#47060)" begin
     @test zeros(0)'*Diagonal(zeros(0))*zeros(0) === 0.0
     @test transpose(zeros(0))*Diagonal(zeros(Complex{Int}, 0))*zeros(0) === 0.0 + 0.0im
     @test dot(zeros(Int32, 0), Diagonal(zeros(Int, 0)), zeros(Int16, 0)) === 0
+    @test_throws DimensionMismatch zeros(2)' * Diagonal(zeros(2)) * zeros(3)
+    @test_throws DimensionMismatch zeros(3)' * Diagonal(zeros(2)) * zeros(2)
+    @test_throws DimensionMismatch dot(zeros(2), Diagonal(zeros(2)), zeros(3))
+    @test_throws DimensionMismatch dot(zeros(3), Diagonal(zeros(2)), zeros(2))
 end
 
 @testset "Diagonal(undef)" begin
@@ -973,7 +1102,7 @@ end
 @testset "divisions functionality" for elty in (Int, Float64, ComplexF64)
     B = Diagonal(rand(elty,5,5))
     x = rand(elty)
-    @test \(x, B) ≈ /(B, x) rtol=2eps()
+    @test \(x, B) == /(B, x)
 end
 
 @testset "promotion" begin
@@ -1051,4 +1180,63 @@ end
     @test outTri === mul!(outTri, UTriA, D, 2, 1)::Tri == mul!(out, Matrix(UTriA), D, 2, 1)
 end
 
+struct SMatrix1{T} <: AbstractArray{T,2}
+    elt::T
+end
+Base.:(==)(A::SMatrix1, B::SMatrix1) = A.elt == B.elt
+Base.zero(::Type{SMatrix1{T}}) where {T} = SMatrix1(zero(T))
+Base.iszero(A::SMatrix1) = iszero(A.elt)
+Base.getindex(A::SMatrix1, inds...) = A.elt
+Base.size(::SMatrix1) = (1, 1)
+@testset "map for Diagonal matrices (#46292)" begin
+    A = Diagonal([1])
+    @test A isa Diagonal{Int,Vector{Int}}
+    @test 2*A isa Diagonal{Int,Vector{Int}}
+    @test A.+1 isa Matrix{Int}
+    # Numeric element types remain diagonal
+    B = map(SMatrix1, A)
+    @test B == fill(SMatrix1(1), 1, 1)
+    @test B isa Diagonal{SMatrix1{Int},Vector{SMatrix1{Int}}}
+    # Non-numeric element types become dense
+    C = map(a -> SMatrix1(string(a)), A)
+    @test C == fill(SMatrix1(string(1)), 1, 1)
+    @test C isa Matrix{SMatrix1{String}}
+end
+
+@testset "copyto! with UniformScaling" begin
+    @testset "Fill" begin
+        for len in (4, InfiniteArrays.Infinity())
+            d = FillArrays.Fill(1, len)
+            D = Diagonal(d)
+            @test copyto!(D, I) === D
+        end
+    end
+    D = Diagonal(fill(2, 2))
+    copyto!(D, I)
+    @test all(isone, diag(D))
+end
+
+@testset "diagonal triple multiplication (#49005)" begin
+    n = 10
+    @test *(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n))) isa Diagonal
+    @test_throws DimensionMismatch (*(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n+1))))
+    @test_throws DimensionMismatch (*(Diagonal(ones(n)), Diagonal(1:n+1), Diagonal(ones(n+1))))
+    @test_throws DimensionMismatch (*(Diagonal(ones(n+1)), Diagonal(1:n), Diagonal(ones(n))))
+
+    # currently falls back to two-term *
+    @test *(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n)), Diagonal(1:n)) isa Diagonal
+end
+
+@testset "diagind" begin
+    D = Diagonal(1:4)
+    M = Matrix(D)
+    @testset for k in -4:4
+        @test D[diagind(D,k)] == M[diagind(M,k)]
+    end
+end
+
+@testset "copy" begin
+    @test copy(Diagonal(1:5)) === Diagonal(1:5)
+end
+
 end # module TestDiagonal
diff --git a/stdlib/LinearAlgebra/test/eigen.jl b/stdlib/LinearAlgebra/test/eigen.jl
index 4ee1845ecc385..85ec5cdf5ab46 100644
--- a/stdlib/LinearAlgebra/test/eigen.jl
+++ b/stdlib/LinearAlgebra/test/eigen.jl
@@ -45,6 +45,16 @@ aimg  = randn(n,n)/2
             @test eigvecs(f) === f.vectors
             @test Array(f) ≈ a
 
+            for T in (Tridiagonal(a), Hermitian(Tridiagonal(a)))
+                f = eigen(T)
+                d, v = f
+                for i in 1:size(a,2)
+                    @test T*v[:,i] ≈ d[i]*v[:,i]
+                end
+                @test det(T) ≈ det(f)
+                @test inv(T) ≈ inv(f)
+            end
+
             num_fact = eigen(one(eltya))
             @test num_fact.values[1] == one(eltya)
             h = asym
@@ -61,43 +71,60 @@ aimg  = randn(n,n)/2
                 asym_sg = view(asym, 1:n1, 1:n1)
                 a_sg = view(a, 1:n, n1+1:n2)
             end
-            f = eigen(asym_sg, a_sg'a_sg)
-            @test asym_sg*f.vectors ≈ (a_sg'a_sg*f.vectors) * Diagonal(f.values)
-            @test f.values ≈ eigvals(asym_sg, a_sg'a_sg)
-            @test prod(f.values) ≈ prod(eigvals(asym_sg/(a_sg'a_sg))) atol=200ε
-            @test eigvecs(asym_sg, a_sg'a_sg) == f.vectors
+            ASG2 = a_sg'a_sg
+            f = eigen(asym_sg, ASG2)
+            @test asym_sg*f.vectors ≈ (ASG2*f.vectors) * Diagonal(f.values)
+            @test f.values ≈ eigvals(asym_sg, ASG2)
+            @test prod(f.values) ≈ prod(eigvals(asym_sg/(ASG2))) atol=200ε
+            @test eigvecs(asym_sg, ASG2) == f.vectors
             @test eigvals(f) === f.values
             @test eigvecs(f) === f.vectors
             @test_throws ErrorException f.Z
 
-            d,v = eigen(asym_sg, a_sg'a_sg)
+            d,v = eigen(asym_sg, ASG2)
             @test d == f.values
             @test v == f.vectors
 
             # solver for in-place U' \ A / U (#14896)
             if !(eltya <: Integer)
                 for atyp in (eltya <: Real ? (Symmetric, Hermitian) : (Hermitian,))
-                    for utyp in (UpperTriangular, Diagonal)
-                        A = atyp(asym_sg)
-                        U = utyp(a_sg'a_sg)
+                    for utyp in (UpperTriangular, Diagonal), uplo in (:L, :U)
+                        A = atyp(asym_sg, uplo)
+                        U = utyp(ASG2)
                         @test UtiAUi!(copy(A), U) ≈ U' \ A / U
                     end
                 end
             end
 
             # matrices of different types (#14896)
-            if eltya <: Real
-                fs = eigen(Symmetric(asym_sg), a_sg'a_sg)
-                @test fs.values ≈ f.values
-                @test abs.(fs.vectors) ≈ abs.(f.vectors)  # may change sign
-                gs = eigen(Symmetric(asym_sg), Diagonal(a_sg'a_sg))
-                @test Symmetric(asym_sg)*gs.vectors ≈ (Diagonal(a_sg'a_sg)*gs.vectors) * Diagonal(gs.values)
+            D = Diagonal(ASG2)
+            for uplo in (:L, :U)
+                if eltya <: Real
+                    fs = eigen(Symmetric(asym_sg, uplo), ASG2)
+                    @test fs.values ≈ f.values
+                    @test abs.(fs.vectors) ≈ abs.(f.vectors)  # may change sign
+                    gs = eigen(Symmetric(asym_sg, uplo), D)
+                    @test Symmetric(asym_sg, uplo)*gs.vectors ≈ (D*gs.vectors) * Diagonal(gs.values)
+                end
+                fh = eigen(Hermitian(asym_sg, uplo), ASG2)
+                @test fh.values ≈ f.values
+                @test abs.(fh.vectors) ≈ abs.(f.vectors)  # may change sign
+                gh = eigen(Hermitian(asym_sg, uplo), D)
+                @test Hermitian(asym_sg, uplo)*gh.vectors ≈ (D*gh.vectors) * Diagonal(gh.values)
+                gd = eigen(Matrix(Hermitian(ASG2, uplo)), D)
+                @test Hermitian(ASG2, uplo) * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
+                gd = eigen(Hermitian(Tridiagonal(ASG2), uplo), D)
+                @test Hermitian(Tridiagonal(ASG2), uplo) * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
             end
-            fh = eigen(Hermitian(asym_sg), a_sg'a_sg)
-            @test fh.values ≈ f.values
-            @test abs.(fh.vectors) ≈ abs.(f.vectors)  # may change sign
-            gh = eigen(Hermitian(asym_sg), Diagonal(a_sg'a_sg))
-            @test Hermitian(asym_sg)*gh.vectors ≈ (Diagonal(a_sg'a_sg)*gh.vectors) * Diagonal(gh.values)
+            gd = eigen(D, D)
+            @test all(≈(1), gd.values)
+            @test D * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
+            gd = eigen(Matrix(D), D)
+            @test D * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
+            gd = eigen(D, Matrix(D))
+            @test D * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
+            gd = eigen(Tridiagonal(ASG2), Matrix(D))
+            @test Tridiagonal(ASG2) * gd.vectors ≈ D * gd.vectors * Diagonal(gd.values)
         end
         @testset "Non-symmetric generalized eigenproblem" begin
             if isa(a, Array)
@@ -115,6 +142,9 @@ aimg  = randn(n,n)/2
             @test eigvecs(a1_nsg, a2_nsg; sortby = sortfunc) == f.vectors
             @test_throws ErrorException f.Z
 
+            g = eigen(a1_nsg, Diagonal(1:n1))
+            @test a1_nsg*g.vectors ≈ (Diagonal(1:n1)*g.vectors) * Diagonal(g.values)
+
             d,v = eigen(a1_nsg, a2_nsg; sortby = sortfunc)
             @test d == f.values
             @test v == f.vectors
@@ -129,8 +159,17 @@ end
         test_matrix = rand(typeof(eltya),3,3)
         test_matrix[1,3] = eltya
         @test_throws(ArgumentError, eigen(test_matrix))
+        @test_throws(ArgumentError, eigvals(test_matrix))
+        @test_throws(ArgumentError, eigvecs(test_matrix))
         @test_throws(ArgumentError, eigen(Symmetric(test_matrix)))
+        @test_throws(ArgumentError, eigvals(Symmetric(test_matrix)))
+        @test_throws(ArgumentError, eigvecs(Symmetric(test_matrix)))
         @test_throws(ArgumentError, eigen(Hermitian(test_matrix)))
+        @test_throws(ArgumentError, eigvals(Hermitian(test_matrix)))
+        @test_throws(ArgumentError, eigvecs(Hermitian(test_matrix)))
+        @test_throws(ArgumentError, eigen(Hermitian(complex.(test_matrix))))
+        @test_throws(ArgumentError, eigvals(Hermitian(complex.(test_matrix))))
+        @test_throws(ArgumentError, eigvecs(Hermitian(complex.(test_matrix))))
         @test eigen(Symmetric(test_matrix, :L)) isa Eigen
         @test eigen(Hermitian(test_matrix, :L)) isa Eigen
     end
@@ -202,6 +241,29 @@ end
     @test F.vectors isa Matrix{ComplexF16}
     @test F.values ≈ F32.values
     @test F.vectors ≈ F32.vectors
+
+    for T in (Float16, ComplexF16)
+        D = Diagonal(T[1,2,4])
+        A = Array(D)
+        B = eigen(A)
+        @test B isa Eigen{Float16, Float16, Matrix{Float16}, Vector{Float16}}
+        @test B.values isa Vector{Float16}
+        @test B.vectors isa Matrix{Float16}
+    end
+    D = Diagonal(ComplexF16[im,2,4])
+    A = Array(D)
+    B = eigen(A)
+    @test B isa Eigen{Float16, ComplexF16, Matrix{Float16}, Vector{ComplexF16}}
+    @test B.values isa Vector{ComplexF16}
+    @test B.vectors isa Matrix{Float16}
+end
+
+@testset "complex eigen inference (#52289)" begin
+    A = ComplexF64[1.0 0.0; 0.0 8.0]
+    TC = Eigen{ComplexF64, ComplexF64, Matrix{ComplexF64}, Vector{ComplexF64}}
+    TR = Eigen{ComplexF64, Float64, Matrix{ComplexF64}, Vector{Float64}}
+    λ, v = @inferred Union{TR,TC} eigen(A)
+    @test λ == [1.0, 8.0]
 end
 
 end # module TestEigen
diff --git a/stdlib/LinearAlgebra/test/factorization.jl b/stdlib/LinearAlgebra/test/factorization.jl
index d200eff2f17bf..72233293ff515 100644
--- a/stdlib/LinearAlgebra/test/factorization.jl
+++ b/stdlib/LinearAlgebra/test/factorization.jl
@@ -56,11 +56,24 @@ end
     A = randn(3, 3)
     A = A * A' # ensure A is pos. def. and symmetric
     F = f(A)
-    tF = Transpose(F)
-    aF = Adjoint(F)
     @test size(F) == size(A)
-    @test size(tF) == size(Transpose(A))
-    @test size(aF) == size(Adjoint(A))
+    @test size(F') == size(A')
+end
+
+@testset "size for transpose factorizations - $f" for f in Any[
+    bunchkaufman,
+    cholesky,
+    x -> cholesky(x, RowMaximum()),
+    hessenberg,
+    lq,
+    lu,
+    svd,
+]
+    A = randn(3, 3)
+    A = A * A' # ensure A is pos. def. and symmetric
+    F = f(A)
+    @test size(F) == size(A)
+    @test size(transpose(F)) == size(transpose(A))
 end
 
 @testset "equality of QRCompactWY" begin
diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl
index 7a51228efc725..b8cb15ff695cb 100644
--- a/stdlib/LinearAlgebra/test/generic.jl
+++ b/stdlib/LinearAlgebra/test/generic.jl
@@ -12,6 +12,11 @@ using .Main.Quaternions
 isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
 using .Main.OffsetArrays
 
+isdefined(Main, :DualNumbers) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "DualNumbers.jl"))
+using .Main.DualNumbers
+
+isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
+using .Main.FillArrays
 
 Random.seed!(123)
 
@@ -75,33 +80,21 @@ n = 5 # should be odd
         X = fill(x, 1, 1)
         @test logabsdet(x)[1] ≈ logabsdet(X)[1]
         @test logabsdet(x)[2] ≈ logabsdet(X)[2]
+        # Diagonal, upper, and lower triangular matrices
+        chksign(s1, s2) = if elty <: Real s1 == s2 else s1 ≈ s2 end
+        D = Matrix(Diagonal(A))
+        v, s = logabsdet(D)
+        @test v ≈ log(abs(det(D))) && chksign(s, sign(det(D)))
+        R = triu(A)
+        v, s = logabsdet(R)
+        @test v ≈ log(abs(det(R))) && chksign(s, sign(det(R)))
+        L = tril(A)
+        v, s = logabsdet(L)
+        @test v ≈ log(abs(det(L))) && chksign(s, sign(det(L)))
     end
 
     @testset "det with nonstandard Number type" begin
-        struct MyDual{T<:Real} <: Real
-            val::T
-            eps::T
-        end
-        Base.:+(x::MyDual, y::MyDual) = MyDual(x.val + y.val, x.eps + y.eps)
-        Base.:*(x::MyDual, y::MyDual) = MyDual(x.val * y.val, x.eps * y.val + y.eps * x.val)
-        Base.:/(x::MyDual, y::MyDual) = x.val / y.val
-        Base.:(==)(x::MyDual, y::MyDual) = x.val == y.val && x.eps == y.eps
-        Base.zero(::MyDual{T}) where {T} = MyDual(zero(T), zero(T))
-        Base.zero(::Type{MyDual{T}}) where {T} = MyDual(zero(T), zero(T))
-        Base.one(::MyDual{T}) where {T} = MyDual(one(T), zero(T))
-        Base.one(::Type{MyDual{T}}) where {T} = MyDual(one(T), zero(T))
-        # the following line is required for BigFloat, IDK why it doesn't work via
-        # promote_rule like for all other types
-        Base.promote_type(::Type{MyDual{BigFloat}}, ::Type{BigFloat}) = MyDual{BigFloat}
-        Base.promote_rule(::Type{MyDual{T}}, ::Type{S}) where {T,S<:Real} =
-            MyDual{promote_type(T, S)}
-        Base.promote_rule(::Type{MyDual{T}}, ::Type{MyDual{S}}) where {T,S} =
-            MyDual{promote_type(T, S)}
-        Base.convert(::Type{MyDual{T}}, x::MyDual) where {T} =
-            MyDual(convert(T, x.val), convert(T, x.eps))
-        if elty <: Real
-            @test det(triu(MyDual.(A, zero(A)))) isa MyDual
-        end
+        elty <: Real && @test det(Dual.(triu(A), zero(A))) isa Dual
     end
 end
 
@@ -117,12 +110,12 @@ end
     x = ['a','b','c','d','e']
     y = ['a','b','c','d','e']
     α, β = 'f', 'g'
-    @test_throws DimensionMismatch LinearAlgebra.axpy!(α,x,['g'])
-    @test_throws DimensionMismatch LinearAlgebra.axpby!(α,x,β,['g'])
-    @test_throws BoundsError LinearAlgebra.axpy!(α,x,Vector(-1:5),y,Vector(1:7))
-    @test_throws BoundsError LinearAlgebra.axpy!(α,x,Vector(1:7),y,Vector(-1:5))
-    @test_throws BoundsError LinearAlgebra.axpy!(α,x,Vector(1:7),y,Vector(1:7))
-    @test_throws DimensionMismatch LinearAlgebra.axpy!(α,x,Vector(1:3),y,Vector(1:5))
+    @test_throws DimensionMismatch axpy!(α, x, ['g'])
+    @test_throws DimensionMismatch axpby!(α, x, β, ['g'])
+    @test_throws BoundsError axpy!(α, x, Vector(-1:5), y, Vector(1:7))
+    @test_throws BoundsError axpy!(α, x, Vector(1:7), y, Vector(-1:5))
+    @test_throws BoundsError axpy!(α, x, Vector(1:7), y, Vector(1:7))
+    @test_throws DimensionMismatch axpy!(α, x, Vector(1:3), y, Vector(1:5))
 end
 
 @test !issymmetric(fill(1,5,3))
@@ -155,8 +148,8 @@ end
 
         @testset "Scaling with rdiv! and ldiv!" begin
             @test rdiv!(copy(a), 5.) == a/5
-            @test ldiv!(5., copy(a)) == 5\a
-            @test ldiv!(zero(a), 5., copy(a)) == 5\a
+            @test ldiv!(5., copy(a)) == a/5
+            @test ldiv!(zero(a), 5., copy(a)) == a/5
         end
 
         @testset "Scaling with 3-argument mul!" begin
@@ -235,6 +228,8 @@ end
     @test norm(NaN, 0) === NaN
 end
 
+@test rank(zeros(4)) == 0
+@test rank(1:10) == 1
 @test rank(fill(0, 0, 0)) == 0
 @test rank([1.0 0.0; 0.0 0.9],0.95) == 1
 @test rank([1.0 0.0; 0.0 0.9],rtol=0.95) == 1
@@ -267,6 +262,24 @@ end
     @test norm(x, 3) ≈ cbrt(5^3  +sqrt(5)^3)
 end
 
+@testset "norm of transpose/adjoint equals norm of parent #32739" begin
+    for t in (transpose, adjoint), elt in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
+        # Vector/matrix of scalars
+        for sz in ((2,), (2, 3))
+            A = rand(elt, sz...)
+            Aᵀ = t(A)
+            @test norm(Aᵀ) ≈ norm(Matrix(Aᵀ))
+        end
+
+        # Vector/matrix of vectors/matrices
+        for sz_outer in ((2,), (2, 3)), sz_inner in ((3,), (1, 2))
+            A = [rand(elt, sz_inner...) for _ in CartesianIndices(sz_outer)]
+            Aᵀ = t(A)
+            @test norm(Aᵀ) ≈ norm(Matrix(Matrix.(Aᵀ)))
+        end
+    end
+end
+
 @testset "rotate! and reflect!" begin
     x = rand(ComplexF64, 10)
     y = rand(ComplexF64, 10)
@@ -300,44 +313,50 @@ end
     end
 end
 
-@testset "LinearAlgebra.axp(b)y! for element type without commutative multiplication" begin
+@testset "axp(b)y! for element type without commutative multiplication" begin
     α = [1 2; 3 4]
     β = [5 6; 7 8]
     x = fill([ 9 10; 11 12], 3)
     y = fill([13 14; 15 16], 3)
-    axpy = LinearAlgebra.axpy!(α, x, deepcopy(y))
-    axpby = LinearAlgebra.axpby!(α, x, β, deepcopy(y))
+    axpy = axpy!(α, x, deepcopy(y))
+    axpby = axpby!(α, x, β, deepcopy(y))
     @test axpy == x .* [α] .+ y
     @test axpy != [α] .* x .+ y
     @test axpby == x .* [α] .+ y .* [β]
     @test axpby != [α] .* x .+ [β] .* y
+    axpy = axpy!(zero(α), x, deepcopy(y))
+    axpby = axpby!(zero(α), x, one(β), deepcopy(y))
+    @test axpy == y
+    @test axpy == y
+    @test axpby == y
+    @test axpby == y
 end
 
-@testset "LinearAlgebra.axpy! for x and y of different dimensions" begin
+@testset "axpy! for x and y of different dimensions" begin
     α = 5
     x = 2:5
     y = fill(1, 2, 4)
     rx = [1 4]
     ry = [2 8]
-    @test LinearAlgebra.axpy!(α, x, rx, y, ry) == [1 1 1 1; 11 1 1 26]
+    @test axpy!(α, x, rx, y, ry) == [1 1 1 1; 11 1 1 26]
 end
 
-@testset "LinearAlgebra.axp(b)y! for non strides input" begin
+@testset "axp(b)y! for non strides input" begin
     a = rand(5, 5)
-    @test LinearAlgebra.axpby!(1, Hermitian(a), 1, zeros(size(a))) == Hermitian(a)
-    @test LinearAlgebra.axpby!(1, 1.:5, 1, zeros(5)) == 1.:5
-    @test LinearAlgebra.axpy!(1, Hermitian(a), zeros(size(a))) == Hermitian(a)
-    @test LinearAlgebra.axpy!(1, 1.:5, zeros(5)) == 1.:5
+    @test axpby!(1, Hermitian(a), 1, zeros(size(a))) == Hermitian(a)
+    @test axpby!(1, 1.:5, 1, zeros(5)) == 1.:5
+    @test axpy!(1, Hermitian(a), zeros(size(a))) == Hermitian(a)
+    @test axpy!(1, 1.:5, zeros(5)) == 1.:5
 end
 
 @testset "LinearAlgebra.axp(b)y! for stride-vector like input" begin
     for T in (Float32, Float64, ComplexF32, ComplexF64)
         a = rand(T, 5, 5)
-        @test LinearAlgebra.axpby!(1, view(a, :, 1:5), 1, zeros(T, size(a))) == a
-        @test LinearAlgebra.axpy!(1, view(a, :, 1:5), zeros(T, size(a))) == a
+        @test axpby!(1, view(a, :, 1:5), 1, zeros(T, size(a))) == a
+        @test axpy!(1, view(a, :, 1:5), zeros(T, size(a))) == a
         b = view(a, 25:-2:1)
-        @test LinearAlgebra.axpby!(1, b, 1, zeros(T, size(b))) == b
-        @test LinearAlgebra.axpy!(1, b, zeros(T, size(b))) == b
+        @test axpby!(1, b, 1, zeros(T, size(b))) == b
+        @test axpy!(1, b, zeros(T, size(b))) == b
     end
 end
 
@@ -364,6 +383,7 @@ end
         [1.0 2.0 3.0; 4.0 5.0 6.0], # 2-dim
         rand(1,2,3),                # higher dims
         rand(1,2,3,4),
+        Dual.(randn(2,3), randn(2,3)),
         OffsetArray([-1,0], (-2,))  # no index 1
     )
         @test normalize(arr) == normalize!(copy(arr))
@@ -441,13 +461,19 @@ Base.:-(a::ModInt{n}, b::ModInt{n}) where {n} = ModInt{n}(a.k - b.k)
 Base.:*(a::ModInt{n}, b::ModInt{n}) where {n} = ModInt{n}(a.k * b.k)
 Base.:-(a::ModInt{n}) where {n} = ModInt{n}(-a.k)
 Base.inv(a::ModInt{n}) where {n} = ModInt{n}(invmod(a.k, n))
+Base.:/(a::ModInt{n}, b::ModInt{n}) where {n} = a*inv(b)
 
+Base.isfinite(a::ModInt{n}) where {n} = isfinite(a.k)
 Base.zero(::Type{ModInt{n}}) where {n} = ModInt{n}(0)
 Base.zero(::ModInt{n}) where {n} = ModInt{n}(0)
 Base.one(::Type{ModInt{n}}) where {n} = ModInt{n}(1)
 Base.one(::ModInt{n}) where {n} = ModInt{n}(1)
 Base.conj(a::ModInt{n}) where {n} = a
 LinearAlgebra.lupivottype(::Type{ModInt{n}}) where {n} = RowNonZero()
+Base.adjoint(a::ModInt{n}) where {n} = ModInt{n}(conj(a))
+Base.transpose(a::ModInt{n}) where {n} = a  # see Issue 20978
+LinearAlgebra.Adjoint(a::ModInt{n}) where {n} = adjoint(a)
+LinearAlgebra.Transpose(a::ModInt{n}) where {n} = transpose(a)
 
 @testset "Issue 22042" begin
     A = [ModInt{2}(1) ModInt{2}(0); ModInt{2}(1) ModInt{2}(1)]
@@ -536,10 +562,17 @@ end
 
 @testset "missing values" begin
     @test ismissing(norm(missing))
+    x = [5, 6, missing]
+    y = [missing, 5, 6]
+    for p in (-Inf, -1, 1, 2, 3, Inf)
+        @test ismissing(norm(x, p))
+        @test ismissing(norm(y, p))
+    end
+    @test_broken ismissing(norm(x, 0))
 end
 
 @testset "peakflops" begin
-    @test LinearAlgebra.peakflops() > 0
+    @test LinearAlgebra.peakflops(1024, eltype=Float32, ntrials=2) > 0
 end
 
 @testset "NaN handling: Issue 28972" begin
@@ -606,4 +639,25 @@ end
     @test condskeel(A) ≈ condskeel(A, [8,8,8])
 end
 
+@testset "copytrito!" begin
+    n = 10
+    A = rand(n, n)
+    for uplo in ('L', 'U')
+        B = zeros(n, n)
+        copytrito!(B, A, uplo)
+        C = uplo == 'L' ? tril(A) : triu(A)
+        @test B ≈ C
+    end
+end
+
+@testset "immutable arrays" begin
+    A = FillArrays.Fill(big(3), (4, 4))
+    M = Array(A)
+    @test triu(A) == triu(M)
+    @test triu(A, -1) == triu(M, -1)
+    @test tril(A) == tril(M)
+    @test tril(A, 1) == tril(M, 1)
+    @test det(A) == det(M)
+end
+
 end # module TestGeneric
diff --git a/stdlib/LinearAlgebra/test/givens.jl b/stdlib/LinearAlgebra/test/givens.jl
index c1d0caf7b8883..62d677cf086ad 100644
--- a/stdlib/LinearAlgebra/test/givens.jl
+++ b/stdlib/LinearAlgebra/test/givens.jl
@@ -3,10 +3,10 @@
 module TestGivens
 
 using Test, LinearAlgebra, Random
-using LinearAlgebra: rmul!, lmul!, Givens
+using LinearAlgebra: Givens, Rotation, givensAlgorithm
 
 # Test givens rotations
-@testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
+@testset "Test Givens for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
     if elty <: Real
         raw_A = convert(Matrix{elty}, randn(10,10))
     else
@@ -14,25 +14,35 @@ using LinearAlgebra: rmul!, lmul!, Givens
     end
     @testset for A in (raw_A, view(raw_A, 1:10, 1:10))
         Ac = copy(A)
-        R = LinearAlgebra.Rotation(LinearAlgebra.Givens{elty}[])
+        R = Rotation(Givens{elty}[])
+        T = Rotation(Givens{elty}[])
         for j = 1:8
             for i = j+2:10
                 G, _ = givens(A, j+1, i, j)
                 lmul!(G, A)
                 rmul!(A, adjoint(G))
                 lmul!(G, R)
+                rmul!(T, G)
 
-                @test lmul!(G,Matrix{elty}(I, 10, 10)) == [G[i,j] for i=1:10,j=1:10]
+                @test lmul!(G, Matrix{elty}(I, 10, 10)) == [G[i,j] for i=1:10,j=1:10]
 
                 @testset "transposes" begin
-                    @test G'*G*Matrix(elty(1)I, 10, 10)   ≈ Matrix(I, 10, 10)
+                    @test (@inferred G'*G)*Matrix(elty(1)I, 10, 10) ≈ Matrix(I, 10, 10)
                     @test (G*Matrix(elty(1)I, 10, 10))*G' ≈ Matrix(I, 10, 10)
-                    @test copy(R')*(R*Matrix(elty(1)I, 10, 10)) ≈ Matrix(I, 10, 10)
+                    @test (@inferred copy(R'))*(R*Matrix(elty(1)I, 10, 10)) ≈ Matrix(I, 10, 10)
                     @test_throws ErrorException transpose(G)
                     @test_throws ErrorException transpose(R)
                 end
             end
         end
+        @test (R')' === R
+        # test products of Givens and Rotations
+        for r in (R, T, *(R.rotations...), *(R.rotations[1], *(R.rotations[2:end]...)))
+            @test r * A ≈ (A' * r')' ≈ lmul!(r, copy(A))
+            @test A * r ≈ (r' * A')' ≈ rmul!(copy(A), r)
+            @test r' * A ≈ lmul!(r', copy(A))
+            @test A * r' ≈ rmul!(copy(A), r')
+        end
         @test_throws ArgumentError givens(A, 3, 3, 2)
         @test_throws ArgumentError givens(one(elty),zero(elty),2,2)
         G, _ = givens(one(elty),zero(elty),11,12)
@@ -46,27 +56,29 @@ using LinearAlgebra: rmul!, lmul!, Givens
         @test (G*I10)' * (G*I10) ≈ I10
         K, _ = givens(zero(elty),one(elty),9,10)
         @test (K*I10)' * (K*I10) ≈ I10
+    end
 
-        @testset "Givens * vectors" begin
-            if isa(A, Array)
-                x = A[:, 1]
-            else
-                x = view(A, 1:10, 1)
-            end
-            G, r = givens(x[2], x[4], 2, 4)
+    @testset "Givens * vectors" begin
+        for x in (raw_A[:,1], view(raw_A, :, 1))
+            G, r = @inferred  givens(x[2], x[4], 2, 4)
             @test (G*x)[2] ≈ r
             @test abs((G*x)[4]) < eps(real(elty))
-            @inferred givens(x[2], x[4], 2, 4)
 
-            G, r = givens(x, 2, 4)
+            G, r = @inferred givens(x, 2, 4)
             @test (G*x)[2] ≈ r
             @test abs((G*x)[4]) < eps(real(elty))
-            @inferred givens(x, 2, 4)
 
             G, r = givens(x, 4, 2)
             @test (G*x)[4] ≈ r
             @test abs((G*x)[2]) < eps(real(elty))
         end
+        d = rand(4)
+        l = d[1]
+        g2, l = givens(l, d[2], 1, 2)
+        g3, l = givens(l, d[3], 1, 3)
+        g4, l = givens(l, d[4], 1, 4)
+        @test g2*(g3*d) ≈ g2*g3*d ≈ (g2*g3)*d
+        @test g2*g3*g4 isa Rotation
     end
 end
 
@@ -100,4 +112,13 @@ oneunit(::Type{<:MockUnitful{T}}) where T = MockUnitful(one(T))
     @test r.data ≈ 5.0
 end
 
+# 51554
+# avoid infinite loop on Inf inputs
+@testset "givensAlgorithm - Inf inputs" for T in (Float64, ComplexF64)
+    cs, sn, r = givensAlgorithm(T(Inf), T(1.0))
+    @test !isfinite(r)
+    cs, sn, r = givensAlgorithm(T(1.0), T(Inf))
+    @test !isfinite(r)
+end
+
 end # module TestGivens
diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl
index b2b23caac6865..105b9f8970ec8 100644
--- a/stdlib/LinearAlgebra/test/hessenberg.jl
+++ b/stdlib/LinearAlgebra/test/hessenberg.jl
@@ -24,6 +24,11 @@ let n = 10
         A = Areal
         H = UpperHessenberg(A)
         AH = triu(A,-1)
+        for k in -2:2
+            @test istril(H, k) == istril(AH, k)
+            @test istriu(H, k) == istriu(AH, k)
+            @test (k <= -1 ? istriu(H, k) : !istriu(H, k))
+        end
         @test UpperHessenberg(H) === H
         @test parent(H) === A
         @test Matrix(H) == Array(H) == H == AH
@@ -92,10 +97,10 @@ let n = 10
             @testset "Multiplication/division" begin
                 for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U),
                             UpperTriangular(A), UnitUpperTriangular(A))
-                    @test (H*x)::UpperHessenberg == Array(H)*x
-                    @test (x*H)::UpperHessenberg == x*Array(H)
-                    @test H/x == Array(H)/x broken = eltype(H) <: Furlong && x isa UpperTriangular
-                    @test x\H == x\Array(H) broken = eltype(H) <: Furlong && x isa UpperTriangular
+                    @test (H*x)::UpperHessenberg ≈ Array(H)*x
+                    @test (x*H)::UpperHessenberg ≈ x*Array(H)
+                    @test H/x ≈ Array(H)/x# broken = eltype(H) <: Furlong && x isa UpperTriangular
+                    @test x\H ≈ x\Array(H)# broken = eltype(H) <: Furlong && x isa UpperTriangular
                     @test H/x isa UpperHessenberg
                     @test x\H isa UpperHessenberg
                 end
@@ -108,13 +113,12 @@ let n = 10
             H = UpperHessenberg(Furlong.(Areal))
             for A in (A, Furlong.(A))
                 @testset "Multiplication/division Furlong" begin
-                    for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U))
-                        @test (H*x)::UpperHessenberg == Array(H)*x
-                        @test (x*H)::UpperHessenberg == x*Array(H)
-                        @test H/x == Array(H)/x broken = eltype(H) <: Furlong && x isa UpperTriangular
-                        @test x\H == x\Array(H) broken = eltype(H) <: Furlong && x isa UpperTriangular
-                        @test H/x isa UpperHessenberg
-                        @test x\H isa UpperHessenberg
+                    for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U),
+                                UpperTriangular(A), UnitUpperTriangular(A))
+                        @test map(x -> x.val, (H*x)::UpperHessenberg) ≈ map(x -> x.val, Array(H)*x)
+                        @test map(x -> x.val, (x*H)::UpperHessenberg) ≈ map(x -> x.val, x*Array(H))
+                        @test map(x -> x.val, (H/x)::UpperHessenberg) ≈ map(x -> x.val, Array(H)/x)
+                        @test map(x -> x.val, (x\H)::UpperHessenberg) ≈ map(x -> x.val, x\Array(H))
                     end
                     x = Bidiagonal(d, dl, :L)
                     @test H*x == Array(H)*x
@@ -144,15 +148,17 @@ let n = 10
         @test_throws ErrorException H.Z
         @test convert(Array, H) ≈ A
         @test (H.Q * H.H) * H.Q' ≈ A ≈ (Matrix(H.Q) * Matrix(H.H)) * Matrix(H.Q)'
-        @test (H.Q' *A) * H.Q ≈ H.H
+        @test (H.Q' * A) * H.Q ≈ H.H
         #getindex for HessenbergQ
         @test H.Q[1,1] ≈ Array(H.Q)[1,1]
+        @test det(H.Q) ≈ det(Matrix(H.Q))
+        @test logabsdet(H.Q)[1] ≈ logabsdet(Matrix(H.Q))[1] atol=2n*eps(float(real(eltya)))
 
         # REPL show
         hessstring = sprint((t, s) -> show(t, "text/plain", s), H)
         qstring = sprint((t, s) -> show(t, "text/plain", s), H.Q)
         hstring = sprint((t, s) -> show(t, "text/plain", s), H.H)
-        @test hessstring == "$(summary(H))\nQ factor:\n$qstring\nH factor:\n$hstring"
+        @test hessstring == "$(summary(H))\nQ factor: $qstring\nH factor:\n$hstring"
 
         #iterate
         q,h = H
@@ -172,8 +178,10 @@ let n = 10
         @test H \ B ≈ A \ B ≈ H \ complex(B)
         @test (H - I) \ B ≈ (A - I) \ B
         @test (H - (3+4im)I) \ B ≈ (A - (3+4im)I) \ B
-        @test b' / H ≈ b' / A ≈ complex.(b') / H
+        @test b' / H ≈ b' / A ≈ complex(b') / H
+        @test transpose(b) / H ≈ transpose(b) / A ≈ transpose(complex(b)) / H
         @test B' / H ≈ B' / A ≈ complex(B') / H
+        @test b' / H' ≈ complex(b)' / H'
         @test B' / (H - I) ≈ B' / (A - I)
         @test B' / (H - (3+4im)I) ≈ B' / (A - (3+4im)I)
         @test (H - (3+4im)I)' \ B ≈ (A - (3+4im)I)' \ B
@@ -191,6 +199,13 @@ let n = 10
     end
 end
 
+@testset "hessenberg(::AbstractMatrix)" begin
+    n = 10
+    A = Tridiagonal(rand(n-1), rand(n), rand(n-1))
+    H = hessenberg(A)
+    @test convert(Array, H) ≈ A
+end
+
 # check logdet on a matrix that has a positive determinant
 let A = [0.5 0.1 0.9 0.4; 0.9 0.7 0.5 0.4; 0.3 0.4 0.9 0.0; 0.4 0.0 0.0 0.5]
     @test logdet(hessenberg(A)) ≈ logdet(A) ≈ -3.5065578973199822
diff --git a/stdlib/LinearAlgebra/test/lapack.jl b/stdlib/LinearAlgebra/test/lapack.jl
index d1130072573ec..6e12c85204a78 100644
--- a/stdlib/LinearAlgebra/test/lapack.jl
+++ b/stdlib/LinearAlgebra/test/lapack.jl
@@ -24,10 +24,17 @@ using LinearAlgebra: BlasInt
         vals, Z = LAPACK.syevr!('V', copy(Asym))
         @test Z*(Diagonal(vals)*Z') ≈ Asym
         @test all(vals .> 0.0)
-        @test LAPACK.syevr!('N','V','U',copy(Asym),0.0,1.0,4,5,-1.0)[1] ≈ vals[vals .< 1.0]
-        @test LAPACK.syevr!('N','I','U',copy(Asym),0.0,1.0,4,5,-1.0)[1] ≈ vals[4:5]
-        @test vals ≈ LAPACK.syev!('N','U',copy(Asym))
-        @test_throws DimensionMismatch LAPACK.sygvd!(1,'V','U',copy(Asym),Matrix{elty}(undef,6,6))
+        @test LAPACK.syevr!('N', 'V', 'U', copy(Asym), 0.0, 1.0, 4, 5, -1.0)[1] ≈ vals[vals .< 1.0]
+        @test LAPACK.syevr!('N', 'I', 'U', copy(Asym), 0.0, 1.0, 4, 5, -1.0)[1] ≈ vals[4:5]
+        @test vals ≈ LAPACK.syev!('N', 'U', copy(Asym))
+        @test vals ≈ LAPACK.syevd!('N', 'U', copy(Asym))
+        vals_test, Z_test = LAPACK.syev!('V', 'U', copy(Asym))
+        @test vals_test ≈ vals
+        @test Z_test*(Diagonal(vals)*Z_test') ≈ Asym
+        vals_test, Z_test = LAPACK.syevd!('V', 'U', copy(Asym))
+        @test vals_test ≈ vals
+        @test Z_test*(Diagonal(vals)*Z_test') ≈ Asym
+        @test_throws DimensionMismatch LAPACK.sygvd!(1, 'V', 'U', copy(Asym), zeros(elty, 6, 6))
     end
 end
 
@@ -180,7 +187,7 @@ end
     end
 end
 
-@testset "geevx, ggev errors" begin
+@testset "geevx, ggev, ggev3 errors" begin
     @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
         A = rand(elty,10,10)
         B = rand(elty,10,10)
@@ -191,6 +198,9 @@ end
         @test_throws ArgumentError LAPACK.ggev!('N','B',A,B)
         @test_throws ArgumentError LAPACK.ggev!('B','N',A,B)
         @test_throws DimensionMismatch LAPACK.ggev!('N','N',A,zeros(elty,12,12))
+        @test_throws ArgumentError LAPACK.ggev3!('N','B',A,B)
+        @test_throws ArgumentError LAPACK.ggev3!('B','N',A,B)
+        @test_throws DimensionMismatch LAPACK.ggev3!('N','N',A,zeros(elty,12,12))
     end
 end
 
@@ -221,9 +231,16 @@ end
     @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
         A = rand(elty,10,10)
         iA = inv(A)
-        A, ipiv = LAPACK.getrf!(A)
+        A, ipiv, info = LAPACK.getrf!(A)
         A = LAPACK.getri!(A, ipiv)
         @test A ≈ iA
+
+        B = rand(elty,10,10)
+        iB = inv(B)
+        ipiv = rand(BlasInt,10)
+        B, ipiv, info = LAPACK.getrf!(B, ipiv)
+        B = LAPACK.getri!(B, ipiv)
+        @test B ≈ iB
     end
 end
 
@@ -590,11 +607,12 @@ end
     end
 end
 
-@testset "gees, gges error throwing" begin
+@testset "gees, gges, gges3 error throwing" begin
     @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
         A = rand(elty,10,10)
         B = rand(elty,11,11)
         @test_throws DimensionMismatch LAPACK.gges!('V','V',A,B)
+        @test_throws DimensionMismatch LAPACK.gges3!('V','V',A,B)
     end
 end
 
@@ -667,6 +685,19 @@ end
     end
 end
 
+@testset "lacpy!" begin
+    @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
+        n = 10
+        A = rand(elty, n, n)
+        for uplo in ('L', 'U', 'N')
+            B = zeros(elty, n, n)
+            LinearAlgebra.LAPACK.lacpy!(B, A, uplo)
+            C = uplo == 'L' ? tril(A) : (uplo == 'U' ? triu(A) : A)
+            @test B ≈ C
+        end
+    end
+end
+
 @testset "Julia vs LAPACK" begin
     # Test our own linear algebra functionality against LAPACK
     @testset for elty in (Float32, Float64, ComplexF32, ComplexF64)
@@ -709,4 +740,13 @@ a = zeros(2,0), zeros(0)
 @test LinearAlgebra.LAPACK.geqrf!(a...) === a
 @test LinearAlgebra.LAPACK.gerqf!(a...) === a
 
+# Issue #49489: https://github.com/JuliaLang/julia/issues/49489
+# Dimension mismatch between A and ipiv causes segfaults
+@testset "issue #49489" begin
+    A = randn(23,23)
+    b = randn(23)
+    ipiv = collect(1:20)
+    @test_throws DimensionMismatch LinearAlgebra.LAPACK.getrs!('N', A, ipiv, b)
+end
+
 end # module TestLAPACK
diff --git a/stdlib/LinearAlgebra/test/lq.jl b/stdlib/LinearAlgebra/test/lq.jl
index 96f31ded78d6d..44f920db25557 100644
--- a/stdlib/LinearAlgebra/test/lq.jl
+++ b/stdlib/LinearAlgebra/test/lq.jl
@@ -37,10 +37,10 @@ rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
 
         @testset for isview in (false,true)
             let a = isview ? view(a, 1:m - 1, 1:n - 1) : a, b = isview ? view(b, 1:m - 1) : b, m = m - isview, n = n - isview
-                lqa   = lq(a)
+                lqa = lq(a)
                 x = lqa\b
-                l,q   = lqa.L, lqa.Q
-                qra   = qr(a, ColumnNorm())
+                l, q = lqa.L, lqa.Q
+                qra = qr(a, ColumnNorm())
                 @testset "Basic ops" begin
                     @test size(lqa,1) == size(a,1)
                     @test size(lqa,3) == 1
@@ -62,18 +62,20 @@ rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
                     @test Array{eltya}(q) ≈ Matrix(q)
                 end
                 @testset "Binary ops" begin
+                    k = size(a, 2)
+                    T = Tridiagonal(rand(eltya, k-1), rand(eltya, k), rand(eltya, k-1))
+                    @test lq(T) * T ≈ T * T rtol=3000ε
+                    @test lqa * T ≈ a * T rtol=3000ε
                     @test a*x ≈ b rtol=3000ε
                     @test x ≈ qra \ b rtol=3000ε
                     @test lqa*x ≈ a*x rtol=3000ε
                     @test (sq = size(q.factors, 2); *(Matrix{eltyb}(I, sq, sq), adjoint(q))*squareQ(q)) ≈ Matrix(I, n, n) rtol=5000ε
                     if eltya != Int
-                        @test Matrix{eltyb}(I, n, n)*q ≈ convert(AbstractMatrix{tab},q)
+                        @test Matrix{eltyb}(I, n, n)*q ≈ Matrix(I, n, n) * convert(LinearAlgebra.AbstractQ{tab}, q)
                     end
                     @test q*x ≈ squareQ(q)*x rtol=100ε
-                    @test transpose(q)*x ≈ transpose(squareQ(q))*x rtol=100ε
                     @test q'*x ≈ squareQ(q)'*x rtol=100ε
                     @test a*q ≈ a*squareQ(q) rtol=100ε
-                    @test a*transpose(q) ≈ a*transpose(squareQ(q)) rtol=100ε
                     @test a*q' ≈ a*squareQ(q)' rtol=100ε
                     @test q*a'≈ squareQ(q)*a' rtol=100ε
                     @test q'*a' ≈ squareQ(q)'*a' rtol=100ε
@@ -85,7 +87,6 @@ rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
                         pad_a = vcat(I, a)
                         pad_x = hcat(I, x)
                         @test pad_a*q ≈ pad_a*squareQ(q) rtol=100ε
-                        @test transpose(q)*pad_x ≈ transpose(squareQ(q))*pad_x rtol=100ε
                         @test q'*pad_x ≈ squareQ(q)'*pad_x rtol=100ε
                     end
                 end
@@ -189,12 +190,12 @@ end
     @testset for n in 1:3, m in 1:3
         @testset "real" begin
             _, Q = lq(randn(n, m))
-            @test det(Q) ≈ det(collect(Q))
+            @test det(Q) ≈ det(Q*I)
             @test abs(det(Q)) ≈ 1
         end
         @testset "complex" begin
             _, Q = lq(randn(ComplexF64, n, m))
-            @test det(Q) ≈ det(collect(Q))
+            @test det(Q) ≈ det(Q*I)
             @test abs(det(Q)) ≈ 1
         end
     end
@@ -205,19 +206,14 @@ end
     show(bf, "text/plain", lq(Matrix(I, 4, 4)))
     seekstart(bf)
     @test String(take!(bf)) == """
-LinearAlgebra.LQ{Float64, Matrix{Float64}, Vector{Float64}}
+$(LinearAlgebra.LQ){Float64, Matrix{Float64}, Vector{Float64}}
 L factor:
 4×4 Matrix{Float64}:
  1.0  0.0  0.0  0.0
  0.0  1.0  0.0  0.0
  0.0  0.0  1.0  0.0
  0.0  0.0  0.0  1.0
-Q factor:
-4×4 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}:
- 1.0  0.0  0.0  0.0
- 0.0  1.0  0.0  0.0
- 0.0  0.0  1.0  0.0
- 0.0  0.0  0.0  1.0"""
+Q factor: 4×4 $(LinearAlgebra.LQPackedQ){Float64, Matrix{Float64}, Vector{Float64}}"""
 end
 
 @testset "adjoint of LQ" begin
diff --git a/stdlib/LinearAlgebra/test/lu.jl b/stdlib/LinearAlgebra/test/lu.jl
index e86cd583c0904..d96510549caa5 100644
--- a/stdlib/LinearAlgebra/test/lu.jl
+++ b/stdlib/LinearAlgebra/test/lu.jl
@@ -97,7 +97,9 @@ dimg  = randn(n)/2
             dlu = convert.(eltya, [1, 1])
             dia = convert.(eltya, [-2, -2, -2])
             tri = Tridiagonal(dlu, dia, dlu)
-            @test_throws ArgumentError lu!(tri)
+            L = lu(tri)
+            @test lu!(tri) == L
+            @test UpperTriangular(tri) == L.U
         end
     end
     @testset for eltyb in (Float32, Float64, ComplexF32, ComplexF64, Int)
@@ -224,6 +226,11 @@ dimg  = randn(n)/2
     end
 end
 
+@testset "Small tridiagonal matrices" for T in (Float64, ComplexF64)
+    A = Tridiagonal(T[], T[1], T[])
+    @test inv(A) == A
+end
+
 @testset "Singular matrices" for T in (Float64, ComplexF64)
     A = T[1 2; 0 0]
     @test_throws SingularException lu(A)
@@ -296,7 +303,7 @@ end
         show(bf, "text/plain", lu(Matrix(I, 4, 4)))
         seekstart(bf)
         @test String(take!(bf)) == """
-LinearAlgebra.LU{Float64, Matrix{Float64}, Vector{$Int}}
+$(LinearAlgebra.LU){Float64, Matrix{Float64}, Vector{$Int}}
 L factor:
 4×4 Matrix{Float64}:
  1.0  0.0  0.0  0.0
@@ -386,6 +393,15 @@ end
         B = randn(elty, 5, 5)
         @test rdiv!(transform(A), transform(lu(B))) ≈ transform(C) / transform(B)
     end
+    for elty in (Float32, Float64, ComplexF64), transF in (identity, transpose),
+            transB in (transpose, adjoint), transT in (identity, complex)
+        A = randn(elty, 5, 5)
+        F = lu(A)
+        b = randn(transT(elty), 5)
+        @test rdiv!(transB(copy(b)), transF(F)) ≈ transB(b) / transF(F) ≈ transB(b) / transF(A)
+        B = randn(transT(elty), 5, 5)
+        @test rdiv!(copy(B), transF(F)) ≈ B / transF(F) ≈ B / transF(A)
+    end
 end
 
 @testset "transpose(A) / lu(B)' should not overwrite A (#36657)" begin
@@ -435,4 +451,35 @@ end
     @test length(b) == 4
 end
 
+@testset "NaN matrix should throw error" begin
+    for eltya in (NaN16, NaN32, NaN64, BigFloat(NaN))
+        r = fill(eltya, 2, 3)
+        c = fill(complex(eltya, eltya), 2, 3)
+        @test_throws ArgumentError lu(r)
+        @test_throws ArgumentError lu(c)
+    end
+end
+
+@testset "more generic ldiv! #35419" begin
+    A = rand(3, 3)
+    b = rand(3)
+    @test A * ldiv!(lu(A), Base.ReshapedArray(copy(b)', (3,), ())) ≈ b
+end
+
+@testset "generic lu!" begin
+    A = rand(3,3); B = deepcopy(A); C = A[2:3,2:3]
+    Asub1 = @view(A[2:3,2:3])
+    F1 = lu!(Asub1)
+    Asub2 = @view(B[[2,3],[2,3]])
+    F2 = lu!(Asub2)
+    @test Matrix(F1) ≈ Matrix(F2) ≈ C
+end
+
+@testset "matrix with Nonfinite" begin
+    lu(fill(NaN, 2, 2), check=false)
+    lu(fill(Inf, 2, 2), check=false)
+    LinearAlgebra.generic_lufact!(fill(NaN, 2, 2), check=false)
+    LinearAlgebra.generic_lufact!(fill(Inf, 2, 2), check=false)
+end
+
 end # module TestLU
diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl
index cf0295ce552b5..4b0181f079a6f 100644
--- a/stdlib/LinearAlgebra/test/matmul.jl
+++ b/stdlib/LinearAlgebra/test/matmul.jl
@@ -4,10 +4,34 @@ module TestMatmul
 
 using Base: rtoldefault
 using Test, LinearAlgebra, Random
-using LinearAlgebra: mul!
+using LinearAlgebra: mul!, Symmetric, Hermitian
 
 ## Test Julia fallbacks to BLAS routines
 
+mul_wrappers = [
+    m -> m,
+    m -> Symmetric(m, :U),
+    m -> Symmetric(m, :L),
+    m -> Hermitian(m, :U),
+    m -> Hermitian(m, :L),
+    m -> adjoint(m),
+    m -> transpose(m)]
+
+@testset "wrap" begin
+    f(A) = LinearAlgebra.wrap(A, 'N')
+    A = ones(1,1)
+    @test @inferred(f(A)) === A
+    g(A) = LinearAlgebra.wrap(A, 'T')
+    @test @inferred(g(A)) === transpose(A)
+    # https://github.com/JuliaLang/julia/issues/52202
+    @test Base.infer_return_type((Vector{Float64},)) do v
+        LinearAlgebra.wrap(v, 'N')
+    end == Vector{Float64}
+    h(A) = LinearAlgebra.wrap(LinearAlgebra._unwrap(A), LinearAlgebra.wrapper_char(A))
+    @test @inferred(h(transpose(A))) === transpose(A)
+    @test @inferred(h(adjoint(A))) === transpose(A)
+end
+
 @testset "matrices with zero dimensions" begin
     for (dimsA, dimsB, dimsC) in (
         ((0, 5), (5, 3), (0, 3)),
@@ -42,6 +66,9 @@ end
         @test *(adjoint(Ai), adjoint(Bi)) == [-28.25-66im 9.75-58im; -26-89im 21-73im]
         @test_throws DimensionMismatch [1 2; 0 0; 0 0] * [1 2]
     end
+    for wrapper_a in mul_wrappers, wrapper_b in mul_wrappers
+        @test wrapper_a(AA) * wrapper_b(BB) == Array(wrapper_a(AA)) * Array(wrapper_b(BB))
+    end
     @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 3, 3), AA, BB)
 end
 @testset "3x3 matmul" begin
@@ -62,6 +89,9 @@ end
         @test *(adjoint(Ai), adjoint(Bi)) == [1+2im 20.75+9im -44.75+42im; 19.5+17.5im -54-36.5im 51-14.5im; 13+7.5im 11.25+31.5im -43.25-14.5im]
         @test_throws DimensionMismatch [1 2 3; 0 0 0; 0 0 0] * [1 2 3]
     end
+    for wrapper_a in mul_wrappers, wrapper_b in mul_wrappers
+        @test wrapper_a(AA) * wrapper_b(BB) == Array(wrapper_a(AA)) * Array(wrapper_b(BB))
+    end
     @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 4, 4), AA, BB)
 end
 
@@ -104,7 +134,7 @@ end
         @test mul!(C, transpose(A), B) == A' * B
         @test mul!(C, A, transpose(B)) == A * B'
         @test mul!(C, transpose(A), transpose(B)) == A' * B'
-        @test LinearAlgebra.mul!(C, adjoint(A), transpose(B)) == A' * transpose(B)
+        @test mul!(C, adjoint(A), transpose(B)) == A' * transpose(B)
 
         # Inplace multiply-add
         α = rand(-10:10)
@@ -120,8 +150,8 @@ end
         @test mul!(C0(), adjoint(A), transpose(B), α, β) == α * A' * transpose(B) .+ βC
 
         #test DimensionMismatch for generic_matmatmul
-        @test_throws DimensionMismatch LinearAlgebra.mul!(C, adjoint(A), transpose(fill(1, 4, 4)))
-        @test_throws DimensionMismatch LinearAlgebra.mul!(C, adjoint(fill(1, 4, 4)), transpose(B))
+        @test_throws DimensionMismatch mul!(C, adjoint(A), transpose(fill(1, 4, 4)))
+        @test_throws DimensionMismatch mul!(C, adjoint(fill(1, 4, 4)), transpose(B))
     end
     vv = [1, 2]
     CC = Matrix{Int}(undef, 2, 2)
@@ -156,14 +186,66 @@ end
     end
 end
 
+@testset "generic_matvecmul for vectors of vectors" begin
+    @testset "matrix of scalars" begin
+        u = [[1, 2], [3, 4]]
+        A = [1 2; 3 4]
+        v = [[0, 0], [0, 0]]
+        Au = [[7, 10], [15, 22]]
+        @test A * u == Au
+        mul!(v, A, u)
+        @test v == Au
+        mul!(v, A, u, 2, -1)
+        @test v == Au
+    end
+
+    @testset "matrix of matrices" begin
+        u = [[1, 2], [3, 4]]
+        A = Matrix{Matrix{Int}}(undef, 2, 2)
+        A[1, 1] = [1 2; 3 4]
+        A[1, 2] = [5 6; 7 8]
+        A[2, 1] = [9 10; 11 12]
+        A[2, 2] = [13 14; 15 16]
+        v = [[0, 0], [0, 0]]
+        Au = [[44, 64], [124, 144]]
+        @test A * u == Au
+        mul!(v, A, u)
+        @test v == Au
+        mul!(v, A, u, 2, -1)
+        @test v == Au
+    end
+end
+
+@testset "generic_matmatmul for matrices of vectors" begin
+    B = Matrix{Vector{Int}}(undef, 2, 2)
+    B[1, 1] = [1, 2]
+    B[2, 1] = [3, 4]
+    B[1, 2] = [5, 6]
+    B[2, 2] = [7, 8]
+    A = [1 2; 3 4]
+    C = Matrix{Vector{Int}}(undef, 2, 2)
+    AB = Matrix{Vector{Int}}(undef, 2, 2)
+    AB[1, 1] = [7, 10]
+    AB[2, 1] = [15, 22]
+    AB[1, 2] = [19, 22]
+    AB[2, 2] = [43, 50]
+    @test A * B == AB
+    mul!(C, A, B)
+    @test C == AB
+    mul!(C, A, B, 2, -1)
+    @test C == AB
+    LinearAlgebra.generic_matmatmul!(C, 'N', 'N', A, B, LinearAlgebra.MulAddMul(2, -1))
+    @test C == AB
+end
+
 @testset "fallbacks & such for BlasFloats" begin
     AA = rand(Float64, 6, 6)
     BB = rand(Float64, 6, 6)
     CC = zeros(Float64, 6, 6)
     for A in (copy(AA), view(AA, 1:6, 1:6)), B in (copy(BB), view(BB, 1:6, 1:6)), C in (copy(CC), view(CC, 1:6, 1:6))
-        @test LinearAlgebra.mul!(C, transpose(A), transpose(B)) == transpose(A) * transpose(B)
-        @test LinearAlgebra.mul!(C, A, adjoint(B)) == A * transpose(B)
-        @test LinearAlgebra.mul!(C, adjoint(A), B) == transpose(A) * B
+        @test mul!(C, transpose(A), transpose(B)) == transpose(A) * transpose(B)
+        @test mul!(C, A, adjoint(B)) == A * transpose(B)
+        @test mul!(C, adjoint(A), B) == transpose(A) * B
 
         # Inplace multiply-add
         α = rand(Float64)
@@ -178,15 +260,57 @@ end
     end
 end
 
+@testset "allocations in BLAS-mul" begin
+    for n in (2, 3, 6)
+        A = rand(Float64, n, n)
+        B = rand(Float64, n, n)
+        C = zeros(Float64, n, n)
+        # gemm
+        for t in (identity, adjoint, transpose)
+            At = t(A)
+            Bt = t(B)
+            mul!(C, At, B)
+            @test 0 == @allocations mul!(C, At, B)
+            mul!(C, A, Bt)
+            @test 0 == @allocations mul!(C, A, Bt)
+            mul!(C, At, Bt)
+            @test 0 == @allocations mul!(C, At, Bt)
+        end
+        # syrk/herk
+        @test 0 == @allocations mul!(C, transpose(A), A)
+        @test 0 == @allocations mul!(C, adjoint(A), A)
+        @test 0 == @allocations mul!(C, A, transpose(A))
+        @test 0 == @allocations mul!(C, A, adjoint(A))
+        # complex times real
+        Cc = complex(C)
+        Ac = complex(A)
+        for t in (identity, adjoint, transpose)
+            Bt = t(B)
+            @test 0 == @allocations mul!(Cc, Ac, Bt)
+        end
+    end
+end
+
 @testset "mixed Blas-non-Blas matmul" begin
     AA = rand(-10:10, 6, 6)
-    BB = rand(Float64, 6, 6)
+    BB = ones(Float64, 6, 6)
     CC = zeros(Float64, 6, 6)
     for A in (copy(AA), view(AA, 1:6, 1:6)), B in (copy(BB), view(BB, 1:6, 1:6)), C in (copy(CC), view(CC, 1:6, 1:6))
-        @test LinearAlgebra.mul!(C, A, B) == A * B
-        @test LinearAlgebra.mul!(C, transpose(A), transpose(B)) == transpose(A) * transpose(B)
-        @test LinearAlgebra.mul!(C, A, adjoint(B)) == A * transpose(B)
-        @test LinearAlgebra.mul!(C, adjoint(A), B) == transpose(A) * B
+        @test mul!(C, A, B) == A * B
+        @test mul!(C, transpose(A), transpose(B)) == transpose(A) * transpose(B)
+        @test mul!(C, A, adjoint(B)) == A * transpose(B)
+        @test mul!(C, adjoint(A), B) == transpose(A) * B
+    end
+end
+
+@testset "allocations in mixed Blas-non-Blas matmul" begin
+    for n in (2, 3, 6)
+        A = rand(-10:10, n, n)
+        B = ones(Float64, n, n)
+        C = zeros(Float64, n, n)
+        @test 0 == @allocations mul!(C, A, B)
+        @test 0 == @allocations mul!(C, A, transpose(B))
+        @test 0 == @allocations mul!(C, adjoint(A), B)
     end
 end
 
@@ -592,21 +716,18 @@ end
 import Base: *, adjoint, transpose
 import LinearAlgebra: Adjoint, Transpose
 (*)(x::RootInt, y::RootInt) = x.i * y.i
+(*)(x::RootInt, y::Integer) = x.i * y
 adjoint(x::RootInt) = x
 transpose(x::RootInt) = x
-Adjoint(x::RootInt) = x
-Transpose(x::RootInt) = x
-# TODO once Adjoint/Transpose constructors call adjoint/transpose recursively
-# rather than Adjoint/Transpose, the additional definitions should become unnecessary
 
 @test Base.promote_op(*, RootInt, RootInt) === Int
 
 @testset "#14293" begin
     a = [RootInt(3)]
-    C = [0]
+    C = [0;;]
     mul!(C, a, transpose(a))
     @test C[1] == 9
-    C = [1]
+    C = [1;;]
     mul!(C, a, transpose(a), 2, 3)
     @test C[1] == 21
     a = [RootInt(2), RootInt(10)]
@@ -615,7 +736,7 @@ Transpose(x::RootInt) = x
     @test A * a == [56]
 end
 
-function test_mul(C, A, B)
+function test_mul(C, A, B, S)
     mul!(C, A, B)
     @test Array(A) * Array(B) ≈ C
     @test A * B ≈ C
@@ -624,10 +745,10 @@ function test_mul(C, A, B)
     # but consider all number types involved:
     rtol = max(rtoldefault.(real.(eltype.((C, A, B))))...)
 
-    rand!(C)
+    rand!(C, S)
     T = promote_type(eltype.((A, B))...)
-    α = rand(T)
-    β = rand(T)
+    α = T <: AbstractFloat ? rand(T) : rand(T(-10):T(10))
+    β = T <: AbstractFloat ? rand(T) : rand(T(-10):T(10))
     βArrayC = β * Array(C)
     βC = β * C
     mul!(C, A, B, α, β)
@@ -636,7 +757,7 @@ function test_mul(C, A, B)
 end
 
 @testset "mul! vs * for special types" begin
-    eltypes = [Float32, Float64, Int64]
+    eltypes = [Float32, Float64, Int64(-100):Int64(100)]
     for k in [3, 4, 10]
         T = rand(eltypes)
         bi1 = Bidiagonal(rand(T, k), rand(T, k - 1), rand([:U, :L]))
@@ -649,26 +770,26 @@ end
         specialmatrices = (bi1, bi2, tri1, tri2, stri1, stri2)
         for A in specialmatrices
             B = specialmatrices[rand(1:length(specialmatrices))]
-            test_mul(C, A, B)
+            test_mul(C, A, B, T)
         end
         for S in specialmatrices
             l = rand(1:6)
             B = randn(k, l)
             C = randn(k, l)
-            test_mul(C, S, B)
+            test_mul(C, S, B, T)
             A = randn(l, k)
             C = randn(l, k)
-            test_mul(C, A, S)
+            test_mul(C, A, S, T)
         end
     end
     for T in eltypes
         A = Bidiagonal(rand(T, 2), rand(T, 1), rand([:U, :L]))
         B = Bidiagonal(rand(T, 2), rand(T, 1), rand([:U, :L]))
         C = randn(2, 2)
-        test_mul(C, A, B)
+        test_mul(C, A, B, T)
         B = randn(2, 9)
         C = randn(2, 9)
-        test_mul(C, A, B)
+        test_mul(C, A, B, T)
     end
     let
         tri44 = Tridiagonal(randn(3), randn(4), randn(3))
@@ -796,7 +917,7 @@ end
     # Just in case dispatching on the surface API `mul!` is changed in the future,
     # let's test the function where the tiled multiplication is defined.
     fill!(C, 0)
-    LinearAlgebra._generic_matmatmul!(C, 'N', 'N', A, B, LinearAlgebra.MulAddMul(-1, 0))
+    LinearAlgebra.generic_matmatmul!(C, 'N', 'N', A, B, LinearAlgebra.MulAddMul(-1, 0))
     @test D ≈ C
 end
 
@@ -927,4 +1048,17 @@ end
     end
 end
 
+@testset "Issue #46865: mul!() with non-const alpha, beta" begin
+    f!(C,A,B,alphas,betas) = mul!(C, A, B, alphas[1], betas[1])
+    alphas = [1.0]
+    betas = [0.5]
+    for d in [2,3,4]  # test native small-matrix cases as well as BLAS
+        A = rand(d,d)
+        B = copy(A)
+        C = copy(A)
+        f!(C, A, B, alphas, betas)
+        @test_broken (@allocated f!(C, A, B, alphas, betas)) == 0
+    end
+end
+
 end # module TestMatmul
diff --git a/stdlib/LinearAlgebra/test/qr.jl b/stdlib/LinearAlgebra/test/qr.jl
index b897803074ff9..184971da304f7 100644
--- a/stdlib/LinearAlgebra/test/qr.jl
+++ b/stdlib/LinearAlgebra/test/qr.jl
@@ -21,8 +21,8 @@ breal = randn(n,2)/2
 bimg  = randn(n,2)/2
 
 # helper functions to unambiguously recover explicit forms of an implicit QR Q
-squareQ(Q::LinearAlgebra.AbstractQ) = (sq = size(Q.factors, 1); lmul!(Q, Matrix{eltype(Q)}(I, sq, sq)))
-rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
+squareQ(Q::LinearAlgebra.AbstractQ) = Q*I
+rectangularQ(Q::LinearAlgebra.AbstractQ) = Matrix(Q)
 
 @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, BigFloat, Int)
     raw_a = eltya == Int ? rand(1:7, n, n) : convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
@@ -62,14 +62,14 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 sq = size(q.factors, 2)
                 @test *(Matrix{eltyb}(I, sq, sq), adjoint(q)) * squareQ(q) ≈ Matrix(I, sq, sq) atol=5000ε
                 if eltya != Int
-                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ convert(AbstractMatrix{tab}, q)
+                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab}, q))
                     ac = copy(a)
                     @test qr!(a[:, 1:5])\b == qr!(view(ac, :, 1:5))\b
                 end
                 qrstring = sprint((t, s) -> show(t, "text/plain", s), qra)
                 rstring  = sprint((t, s) -> show(t, "text/plain", s), r)
                 qstring  = sprint((t, s) -> show(t, "text/plain", s), q)
-                @test qrstring == "$(summary(qra))\nQ factor:\n$qstring\nR factor:\n$rstring"
+                @test qrstring == "$(summary(qra))\nQ factor: $qstring\nR factor:\n$rstring"
                 # iterate
                 q, r = qra
                 @test q*r ≈ a
@@ -86,14 +86,14 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 @test q*b[1:n1] ≈ rectangularQ(q)*b[1:n1] atol=100ε
                 @test q*b ≈ squareQ(q)*b atol=100ε
                 if eltya != Int
-                    @test Array{eltya}(q) ≈ Matrix(q)
+                    @test Array{eltya}(q) ≈ rectangularQ(q)
                 end
                 @test_throws DimensionMismatch q*b[1:n1 + 1]
                 @test_throws DimensionMismatch b[1:n1 + 1]*q'
                 sq = size(q.factors, 2)
                 @test *(UpperTriangular(Matrix{eltyb}(I, sq, sq)), adjoint(q))*squareQ(q) ≈ Matrix(I, n1, a_1) atol=5000ε
                 if eltya != Int
-                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ convert(AbstractMatrix{tab},q)
+                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab},q))
                 end
                 # iterate
                 q, r = qra
@@ -123,7 +123,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 @test_throws DimensionMismatch q*b[1:n1+1]
                 @test_throws DimensionMismatch b[1:n1+1]*q'
                 if eltya != Int
-                    @test Matrix{eltyb}(I, n1, n1)*q ≈ convert(AbstractMatrix{tab},q)
+                    @test Matrix{eltyb}(I, n1, n1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab},q))
                 end
                 # iterate
                 q, r, p = qrpa
@@ -149,13 +149,13 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 sq = size(q.factors, 2)
                 @test *(UpperTriangular(Matrix{eltyb}(I, sq, sq)), adjoint(q))*squareQ(q) ≈ Matrix(I, n1, a_1) atol=5000ε
                 if eltya != Int
-                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ convert(AbstractMatrix{tab},q)
+                    @test Matrix{eltyb}(I, a_1, a_1)*q ≈ squareQ(convert(LinearAlgebra.AbstractQ{tab},q))
                 end
                 qrstring = sprint((t, s) -> show(t, "text/plain", s), qrpa)
                 rstring  = sprint((t, s) -> show(t, "text/plain", s), r)
                 qstring  = sprint((t, s) -> show(t, "text/plain", s), q)
                 pstring  = sprint((t, s) -> show(t, "text/plain", s), p)
-                @test qrstring == "$(summary(qrpa))\nQ factor:\n$qstring\nR factor:\n$rstring\npermutation:\n$pstring"
+                @test qrstring == "$(summary(qrpa))\nQ factor: $qstring\nR factor:\n$rstring\npermutation:\n$pstring"
                 # iterate
                 q, r, p = qrpa
                 @test q*r[:,invperm(p)] ≈ a[:,1:n1]
@@ -205,15 +205,22 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 @test mul!(c, b, q) ≈ b*q
                 @test mul!(c, b, q') ≈ b*q'
                 @test_throws DimensionMismatch mul!(Matrix{eltya}(I, n+1, n), q, b)
+
+                b = similar(a[:,1]); rand!(b)
+                c = similar(a[:,1])
+                d = similar(a[:,1])
+                @test mul!(c, q, b) ≈ q*b
+                @test mul!(c, q', b) ≈ q'*b
+                @test_throws DimensionMismatch mul!(Vector{eltya}(undef, n+1), q, b)
             end
         end
     end
 end
 
 @testset "transpose errors" begin
-    @test_throws MethodError transpose(qr(randn(3,3)))
-    @test_throws MethodError transpose(qr(randn(3,3), NoPivot()))
-    @test_throws MethodError transpose(qr(big.(randn(3,3))))
+    @test_throws ArgumentError transpose(qr(randn(ComplexF64,3,3)))
+    @test_throws ArgumentError transpose(qr(randn(ComplexF64,3,3), NoPivot()))
+    @test_throws ArgumentError transpose(qr(big.(randn(ComplexF64,3,3))))
 end
 
 @testset "Issue 7304" begin
@@ -228,7 +235,7 @@ end
         for T in (Tr, Complex{Tr})
             v = convert(Vector{T}, vr)
             nv, nm = qr(v)
-            @test norm(nv - [-0.6 -0.8; -0.8 0.6], Inf) < eps(Tr)
+            @test norm(nv*Matrix(I, (2,2)) - [-0.6 -0.8; -0.8 0.6], Inf) < eps(Tr)
             @test nm == fill(-5.0, 1, 1)
         end
     end
@@ -244,7 +251,7 @@ end
 end
 
 @testset "Issue 16520" begin
-    @test_throws DimensionMismatch Matrix{Float64}(undef,3,2)\(1:5)
+    @test_throws DimensionMismatch rand(3,2)\(1:5)
 end
 
 @testset "Issue 22810" begin
@@ -261,7 +268,7 @@ end
 
 @testset "Issue 24589. Promotion of rational matrices" begin
     A = rand(1//1:5//5, 4,3)
-    @test first(qr(A)) == first(qr(float(A)))
+    @test Matrix(first(qr(A))) == Matrix(first(qr(float(A))))
 end
 
 @testset "Issue Test Factorization fallbacks for rectangular problems" begin
@@ -303,7 +310,7 @@ end
             @testset for k in 0:min(n, m, 5)
                 A = cat(Array(I(k)), randn(n - k, m - k); dims=(1, 2))
                 Q, = qr(A, pivot)
-                @test det(Q) ≈ det(collect(Q))
+                @test det(Q) ≈ det(Q*Matrix(I, size(Q, 1), size(Q, 1)))
                 @test abs(det(Q)) ≈ 1
             end
         end
@@ -311,7 +318,7 @@ end
             @testset for k in 0:min(n, m, 5)
                 A = cat(Array(I(k)), randn(ComplexF64, n - k, m - k); dims=(1, 2))
                 Q, = qr(A, pivot)
-                @test det(Q) ≈ det(collect(Q))
+                @test det(Q) ≈ det(Q*Matrix(I, size(Q, 1), size(Q, 1)))
                 @test abs(det(Q)) ≈ 1
             end
         end
@@ -322,6 +329,7 @@ end
     for T in (Float64, ComplexF64)
         Q = qr(randn(T,5,5)).Q
         @test inv(Q) === Q'
+        @test inv(Q)' === inv(Q') === Q
     end
 end
 
@@ -329,7 +337,7 @@ end
     for T in (Float32, Float64, ComplexF32, ComplexF64)
         Q1, R1 = qr(randn(T,5,5))
         Q2, R2 = qr(Q1)
-        @test Q1 ≈ Q2
+        @test Matrix(Q1) ≈ Matrix(Q2)
         @test R2 ≈ I
     end
 end
@@ -362,13 +370,13 @@ end
         n = 5
         Q, R = qr(randn(T,n,n))
         Qmat = Matrix(Q)
-        dest1 = similar(Q)
+        dest1 = Matrix{T}(undef, size(Q))
         copyto!(dest1, Q)
         @test dest1 ≈ Qmat
-        dest2 = PermutedDimsArray(similar(Q), (1, 2))
+        dest2 = PermutedDimsArray(Matrix{T}(undef, size(Q)), (1, 2))
         copyto!(dest2, Q)
         @test dest2 ≈ Qmat
-        dest3 = PermutedDimsArray(similar(Q), (2, 1))
+        dest3 = PermutedDimsArray(Matrix{T}(undef, size(Q)), (2, 1))
         copyto!(dest3, Q)
         @test dest3 ≈ Qmat
     end
@@ -419,8 +427,8 @@ end
     A = qr(ones(3, 1))
     B = I(3)
     C = B*A.Q'
-    @test C ≈ A.Q
-    @test A.Q' * B ≈ A.Q
+    @test C ≈ A.Q * Matrix(I, 3, 3)
+    @test A.Q' * B ≈ A.Q * Matrix(I, 3, 3)
 end
 
 @testset "convert between eltypes" begin
@@ -466,4 +474,34 @@ end
     @test MyIdentity{Float64}()[1,:] == [1.0, 0.0]
 end
 
+@testset "issue #48911" begin
+    # testcase in the original issue
+    # test ldiv!(::QRPivoted, ::AbstractVector)
+    A = Complex{BigFloat}[1+im 1-im]
+    b = Complex{BigFloat}[3+im]
+    x = A\b
+    AF = Complex{Float64}[1+im 1-im]
+    bf = Complex{Float64}[3+im]
+    xf = AF\bf
+    @test x ≈ xf
+
+    # test ldiv!(::QRPivoted, ::AbstractVector)
+    A = Complex{BigFloat}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
+    b = Complex{BigFloat}[1+im; 0]
+    x = A\b
+    AF = Complex{Float64}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
+    bf = Complex{Float64}[1+im; 0]
+    xf = AF\bf
+    @test x ≈ xf
+
+    # test ldiv!(::QRPivoted, ::AbstractMatrix)
+    C = Complex{BigFloat}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
+    D = Complex{BigFloat}[1+im 1-im; 0 0]
+    x = C\D
+    CF = Complex{Float64}[1+im 2-2im 3+3im; 4-4im 5+5im 6-6im]
+    DF = Complex{Float64}[1+im 1-im; 0 0]
+    xf = CF\DF
+    @test x ≈ xf
+end
+
 end # module TestQR
diff --git a/stdlib/LinearAlgebra/test/schur.jl b/stdlib/LinearAlgebra/test/schur.jl
index d047ca12abc1f..c9a5d92dbdae8 100644
--- a/stdlib/LinearAlgebra/test/schur.jl
+++ b/stdlib/LinearAlgebra/test/schur.jl
@@ -202,4 +202,20 @@ end
     @test A' ≈ C ≈ E
 end
 
+@testset "UpperHessenberg schur" begin
+    A = UpperHessenberg(rand(ComplexF64, 100, 100))
+    B = Array(A)
+    fact1 = schur(A)
+    fact2 = schur(B)
+    @test fact1.values ≈ fact2.values
+    @test fact1.Z * fact1.T * fact1.Z' ≈ B
+
+    A = UpperHessenberg(rand(Int32, 50, 50))
+    B = Array(A)
+    fact1 = schur(A)
+    fact2 = schur(B)
+    @test fact1.values ≈ fact2.values
+    @test fact1.Z * fact1.T * fact1.Z' ≈ B
+end
+
 end # module TestSchur
diff --git a/stdlib/LinearAlgebra/test/special.jl b/stdlib/LinearAlgebra/test/special.jl
index 234f9f472557b..7e96af369e310 100644
--- a/stdlib/LinearAlgebra/test/special.jl
+++ b/stdlib/LinearAlgebra/test/special.jl
@@ -191,7 +191,7 @@ end
         push!(mats, SymTridiagonal(Vector{T}(diag), Vector{T}(offdiag)))
     end
 
-    for op in (+,*) # to do: fix when operation is - and the matrix has a range as the underlying representation and we get a step size of 0.
+    for op in (+,-,*)
         for A in mats
             for B in mats
                 @test (op)(A, B) ≈ (op)(Matrix(A), Matrix(B)) ≈ Matrix((op)(A, B))
@@ -206,6 +206,17 @@ end
             end
         end
     end
+    diag = [randn(ComplexF64, 2, 2) for _ in 1:3]
+    odiag = [randn(ComplexF64, 2, 2) for _ in 1:2]
+    for A in (Diagonal(diag),
+                Bidiagonal(diag, odiag, :U),
+                Bidiagonal(diag, odiag, :L),
+                Tridiagonal(odiag, diag, odiag),
+                SymTridiagonal(diag, odiag)), B in uniformscalingmats
+        @test (A + B)::typeof(A) == (B + A)::typeof(A)
+        @test (A - B)::typeof(A) == ((A + (-B))::typeof(A))
+        @test (B - A)::typeof(A) == ((B + (-A))::typeof(A))
+    end
 end
 
 
@@ -215,16 +226,29 @@ end
         atri = typ(a)
         matri = Matrix(atri)
         b = rand(n,n)
-        qrb = qr(b, ColumnNorm())
-        @test atri * qrb.Q ≈ matri * qrb.Q ≈ rmul!(copy(atri), qrb.Q)
-        @test atri * qrb.Q' ≈ matri * qrb.Q' ≈ rmul!(copy(atri), qrb.Q')
-        @test qrb.Q * atri ≈ qrb.Q * matri ≈ lmul!(qrb.Q, copy(atri))
-        @test qrb.Q' * atri ≈ qrb.Q' * matri ≈ lmul!(qrb.Q', copy(atri))
-        qrb = qr(b, NoPivot())
-        @test atri * qrb.Q ≈ matri * qrb.Q ≈ rmul!(copy(atri), qrb.Q)
-        @test atri * qrb.Q' ≈ matri * qrb.Q' ≈ rmul!(copy(atri), qrb.Q')
-        @test qrb.Q * atri ≈ qrb.Q * matri ≈ lmul!(qrb.Q, copy(atri))
-        @test qrb.Q' * atri ≈ qrb.Q' * matri ≈ lmul!(qrb.Q', copy(atri))
+        for pivot in (ColumnNorm(), NoPivot())
+            qrb = qr(b, pivot)
+            @test atri * qrb.Q ≈ matri * qrb.Q
+            @test atri * qrb.Q' ≈ matri * qrb.Q'
+            @test qrb.Q * atri ≈ qrb.Q * matri
+            @test qrb.Q' * atri ≈ qrb.Q' * matri
+        end
+    end
+end
+
+@testset "Multiplication of Qs" begin
+    for pivot in (ColumnNorm(), NoPivot()), A in (rand(5, 3), rand(5, 5), rand(3, 5))
+        Q = qr(A, pivot).Q
+        m = size(A, 1)
+        C = Matrix{Float64}(undef, (m, m))
+        @test Q*Q ≈ (Q*I) * (Q*I) ≈ mul!(C, Q, Q)
+        @test size(Q*Q) == (m, m)
+        @test Q'Q ≈ (Q'*I) * (Q*I) ≈ mul!(C, Q', Q)
+        @test size(Q'Q) == (m, m)
+        @test Q*Q' ≈ (Q*I) * (Q'*I) ≈ mul!(C, Q, Q')
+        @test size(Q*Q') == (m, m)
+        @test Q'Q' ≈ (Q'*I) * (Q'*I) ≈ mul!(C, Q', Q')
+        @test size(Q'Q') == (m, m)
     end
 end
 
@@ -235,16 +259,16 @@ end
     bidiagmat = Bidiagonal(1:N, 1:(N-1), :U)
     tridiagmat = Tridiagonal(1:(N-1), 1:N, 1:(N-1))
     symtridiagmat = SymTridiagonal(1:N, 1:(N-1))
-    specialmats = (diagmat, bidiagmat, tridiagmat, symtridiagmat)
+    abstractq = qr(tridiagmat).Q
+    specialmats = (diagmat, bidiagmat, tridiagmat, symtridiagmat, abstractq, zeros(Int,N,N))
     for specialmata in specialmats, specialmatb in specialmats
-        MA = Matrix(specialmata); MB = Matrix(specialmatb)
+        MA = collect(specialmata); MB = collect(specialmatb)
         @test hcat(specialmata, specialmatb) == hcat(MA, MB)
         @test vcat(specialmata, specialmatb) == vcat(MA, MB)
         @test hvcat((1,1), specialmata, specialmatb) == hvcat((1,1), MA, MB)
         @test cat(specialmata, specialmatb; dims=(1,2)) == cat(MA, MB; dims=(1,2))
     end
-    # Test concatenating pairwise combinations of special matrices with sparse matrices,
-    # dense matrices, or dense vectors
+    # Test concatenating pairwise combinations of special matrices with dense matrices or dense vectors
     densevec = fill(1., N)
     densemat = diagm(0 => densevec)
     for specialmat in specialmats
@@ -268,7 +292,7 @@ end
 @testset "concatenations of annotated types" begin
     N = 4
     # The tested annotation types
-    testfull = Bool(parse(Int,(get(ENV, "JULIA_TESTFULL", "0"))))
+    testfull = Base.get_bool_env("JULIA_TESTFULL", false)
     utriannotations = (UpperTriangular, UnitUpperTriangular)
     ltriannotations = (LowerTriangular, UnitLowerTriangular)
     triannotations = (utriannotations..., ltriannotations...)
diff --git a/stdlib/LinearAlgebra/test/structuredbroadcast.jl b/stdlib/LinearAlgebra/test/structuredbroadcast.jl
index 4aeca31a79a03..2ca1904b2ff2d 100644
--- a/stdlib/LinearAlgebra/test/structuredbroadcast.jl
+++ b/stdlib/LinearAlgebra/test/structuredbroadcast.jl
@@ -100,6 +100,8 @@ end
     @test_throws ArgumentError broadcast!(+, copy(T), T, A) == Tridiagonal(broadcast(*, T, A))
     @test_throws ArgumentError broadcast!(+, copy(◣), ◣, A) == LowerTriangular(broadcast(*, ◣, A))
     @test_throws ArgumentError broadcast!(+, copy(◥), ◥, A) == UpperTriangular(broadcast(*, ◥, A))
+    @test_throws ArgumentError broadcast!(*, copy(◥), ◣, 2)
+    @test_throws ArgumentError broadcast!(*, copy(Bu), Bl, 2)
 end
 
 @testset "map[!] over combinations of structured matrices" begin
@@ -200,7 +202,7 @@ end
     Bu2 = 2 .* Bl
     @test typeof(Bl2) <: Bidiagonal && Bl2.uplo == 'L'
 
-    # Example of Nested Brodacasts
+    # Example of Nested Broadcasts
     tmp = (1 .* 2) .* (Bidiagonal(1:3, 1:2, 'U') .* (3 .* 4)) .* (5 .* Bidiagonal(1:3, 1:2, 'L'))
     @test typeof(tmp) <: Tridiagonal
 
diff --git a/stdlib/LinearAlgebra/test/svd.jl b/stdlib/LinearAlgebra/test/svd.jl
index 8bd3edadc911d..7f2aad904a88f 100644
--- a/stdlib/LinearAlgebra/test/svd.jl
+++ b/stdlib/LinearAlgebra/test/svd.jl
@@ -127,8 +127,20 @@ aimg  = randn(n,n)/2
             gsvd = svd(b,c)
             @test gsvd.U*gsvd.D1*gsvd.R*gsvd.Q' ≈ b
             @test gsvd.V*gsvd.D2*gsvd.R*gsvd.Q' ≈ c
+            # AbstractMatrix svd
+            T = Tridiagonal(a)
+            asvd = svd(T, a)
+            @test asvd.U*asvd.D1*asvd.R*asvd.Q' ≈ T
+            @test asvd.V*asvd.D2*asvd.R*asvd.Q' ≈ a
+            @test all(≈(1), svdvals(T, T))
         end
     end
+    @testset "singular value decomposition of AbstractMatrix" begin
+        A = Tridiagonal(aa)
+        F = svd(A)
+        @test Matrix(F) ≈ A
+        @test svdvals(A) ≈ F.S
+    end
     @testset "singular value decomposition of Hermitian/real-Symmetric" begin
         for T in (eltya <: Real ? (Symmetric, Hermitian) : (Hermitian,))
             usv = svd(T(asym))
diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl
index 47a36df5e7883..82236c2a677eb 100644
--- a/stdlib/LinearAlgebra/test/symmetric.jl
+++ b/stdlib/LinearAlgebra/test/symmetric.jl
@@ -4,6 +4,11 @@ module TestSymmetric
 
 using Test, LinearAlgebra, Random
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+
+isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
+using .Main.Quaternions
+
 Random.seed!(1010)
 
 @testset "Pauli σ-matrices: $σ" for σ in map(Hermitian,
@@ -64,6 +69,9 @@ end
                 @test_throws ArgumentError Symmetric(asym, :R)
                 @test_throws ArgumentError Hermitian(asym, :R)
 
+                @test_throws MethodError Symmetric{eltya,typeof(asym)}(asym, :L)
+                @test_throws MethodError Hermitian{eltya,typeof(aherm)}(aherm, :L)
+
                 # mixed cases with Hermitian/Symmetric
                 if eltya <: Real
                     @test Symmetric(Hermitian(aherm, :U))     === Symmetric(aherm, :U)
@@ -76,8 +84,16 @@ end
             end
             @testset "diag" begin
                 D = Diagonal(x)
-                @test diag(Symmetric(D, :U))::Vector == x
-                @test diag(Hermitian(D, :U))::Vector == real(x)
+                DM = Matrix(D)
+                B = diagm(-1 => x, 1 => x)
+                for uplo in (:U, :L)
+                    @test diag(Symmetric(D, uplo))::Vector == x
+                    @test diag(Hermitian(D, uplo))::Vector == real(x)
+                    @test isdiag(Symmetric(DM, uplo))
+                    @test isdiag(Hermitian(DM, uplo))
+                    @test !isdiag(Symmetric(B, uplo))
+                    @test !isdiag(Hermitian(B, uplo))
+                end
             end
             @testset "similar" begin
                 @test isa(similar(Symmetric(asym)), Symmetric{eltya})
@@ -252,6 +268,14 @@ end
                         end
                     end
                 end
+                if eltya <: AbstractFloat
+                @testset "inv should error with NaNs/Infs" begin
+                    h = Hermitian(fill(eltya(NaN), 2, 2))
+                    @test_throws ArgumentError inv(h)
+                    s = Symmetric(fill(eltya(NaN), 2, 2))
+                    @test_throws ArgumentError inv(s)
+                end
+                end
             end
 
             # Revisit when implemented in julia
@@ -352,6 +376,9 @@ end
                 C = zeros(eltya,n,n)
                 @test Hermitian(aherm) * a ≈ aherm * a
                 @test a * Hermitian(aherm) ≈ a * aherm
+                # rectangular multiplication
+                @test [a; a] * Hermitian(aherm) ≈ [a; a] * aherm
+                @test Hermitian(aherm) * [a a] ≈ aherm * [a a]
                 @test Hermitian(aherm) * Hermitian(aherm) ≈ aherm*aherm
                 @test_throws DimensionMismatch Hermitian(aherm) * Vector{eltya}(undef, n+1)
                 LinearAlgebra.mul!(C,a,Hermitian(aherm))
@@ -360,6 +387,9 @@ end
                 @test Symmetric(asym) * Symmetric(asym) ≈ asym*asym
                 @test Symmetric(asym) * a ≈ asym * a
                 @test a * Symmetric(asym) ≈ a * asym
+                # rectangular multiplication
+                @test Symmetric(asym) * [a a] ≈ asym * [a a]
+                @test [a; a] * Symmetric(asym) ≈ [a; a] * asym
                 @test_throws DimensionMismatch Symmetric(asym) * Vector{eltya}(undef, n+1)
                 LinearAlgebra.mul!(C,a,Symmetric(asym))
                 @test C ≈ a*asym
@@ -380,6 +410,10 @@ end
                 @test Hermitian(aherm)\b ≈ aherm\b
                 @test Symmetric(asym)\x  ≈ asym\x
                 @test Symmetric(asym)\b  ≈ asym\b
+                @test Hermitian(Diagonal(aherm))\x ≈ Diagonal(aherm)\x
+                @test Hermitian(Matrix(Diagonal(aherm)))\b ≈ Diagonal(aherm)\b
+                @test Symmetric(Diagonal(asym))\x  ≈ Diagonal(asym)\x
+                @test Symmetric(Matrix(Diagonal(asym)))\b  ≈ Diagonal(asym)\b
             end
         end
         @testset "generalized dot product" begin
@@ -387,6 +421,8 @@ end
                 @test dot(x, Hermitian(aherm, uplo), y) ≈ dot(x, Hermitian(aherm, uplo)*y) ≈ dot(x, Matrix(Hermitian(aherm, uplo)), y)
                 @test dot(x, Hermitian(aherm, uplo), x) ≈ dot(x, Hermitian(aherm, uplo)*x) ≈ dot(x, Matrix(Hermitian(aherm, uplo)), x)
             end
+            @test dot(x, Hermitian(Diagonal(a)), y) ≈ dot(x, Hermitian(Diagonal(a))*y) ≈ dot(x, Matrix(Hermitian(Diagonal(a))), y)
+            @test dot(x, Hermitian(Diagonal(a)), x) ≈ dot(x, Hermitian(Diagonal(a))*x) ≈ dot(x, Matrix(Hermitian(Diagonal(a))), x)
             if eltya <: Real
                 for uplo in (:U, :L)
                     @test dot(x, Symmetric(aherm, uplo), y) ≈ dot(x, Symmetric(aherm, uplo)*y) ≈ dot(x, Matrix(Symmetric(aherm, uplo)), y)
@@ -431,6 +467,17 @@ end
     end
 end
 
+# bug identified in PR #52318: dot products of quaternionic Hermitian matrices,
+# or any number type where conj(a)*conj(b) ≠ conj(a*b):
+@testset "dot Hermitian quaternion #52318" begin
+    A, B = [Quaternion.(randn(3,3), randn(3, 3), randn(3, 3), randn(3,3)) |> t -> t + t' for i in 1:2]
+    @test A == Hermitian(A) && B == Hermitian(B)
+    @test dot(A, B) ≈ dot(Hermitian(A), Hermitian(B))
+    A, B = [Quaternion.(randn(3,3), randn(3, 3), randn(3, 3), randn(3,3)) |> t -> t + transpose(t) for i in 1:2]
+    @test A == Symmetric(A) && B == Symmetric(B)
+    @test dot(A, B) ≈ dot(Symmetric(A), Symmetric(B))
+end
+
 #Issue #7647: test xsyevr, xheevr, xstevr drivers.
 @testset "Eigenvalues in interval for $(typeof(Mi7647))" for Mi7647 in
         (Symmetric(diagm(0 => 1.0:3.0)),
@@ -574,13 +621,13 @@ end
         # Hermitian
         A = Hermitian(fill(1.0+0im, 2, 2), uplo)
         @test fill!(A, 2) == fill(2, 2, 2)
-        @test A.data == (uplo == :U ? [2 2; 1.0+0im 2] : [2 1.0+0im; 2 2])
+        @test A.data == (uplo === :U ? [2 2; 1.0+0im 2] : [2 1.0+0im; 2 2])
         @test_throws ArgumentError fill!(A, 2+im)
 
         # Symmetric
         A = Symmetric(fill(1.0+im, 2, 2), uplo)
         @test fill!(A, 2) == fill(2, 2, 2)
-        @test A.data == (uplo == :U ? [2 2; 1.0+im 2] : [2 1.0+im; 2 2])
+        @test A.data == (uplo === :U ? [2 2; 1.0+im 2] : [2 1.0+im; 2 2])
     end
 end
 
@@ -762,4 +809,95 @@ end
     end
 end
 
+@testset "hermitian part" begin
+    for T in [Float32, Complex{Float32}, Int32, Rational{Int32},
+              Complex{Int32}, Complex{Rational{Int32}}]
+        f, f!, t = hermitianpart, hermitianpart!, T <: Real ? transpose : adjoint
+        X = T[1 2 3; 4 5 6; 7 8 9]
+        T <: Complex && (X .+= im .* X)
+        Xc = copy(X)
+        Y = (X + t(X)) / 2
+        U = f(X)
+        L = f(X, :L)
+        @test U isa Hermitian
+        @test L isa Hermitian
+        @test U.uplo == 'U'
+        @test L.uplo == 'L'
+        @test U == L == Y
+        if T <: AbstractFloat || real(T) <: AbstractFloat
+            HU = f!(X)
+            @test HU == Y
+            @test triu(X) == triu(Y)
+            HL = f!(Xc, :L)
+            @test HL == Y
+            @test tril(Xc) == tril(Y)
+        end
+    end
+    @test_throws DimensionMismatch hermitianpart(ones(1,2))
+    for T in (Float64, ComplexF64), uplo in (:U, :L)
+        A = [randn(T, 2, 2) for _ in 1:2, _ in 1:2]
+        Aherm = hermitianpart(A, uplo)
+        @test Aherm == Aherm.data == (A + A')/2
+        @test Aherm isa Hermitian
+        @test Aherm.uplo == LinearAlgebra.char_uplo(uplo)
+    end
+end
+
+@testset "Structured display" begin
+    @testset "Diagonal" begin
+        d = 10:13
+        D = Diagonal(d)
+        for uplo in (:L, :U), SymHerm in (Symmetric, Hermitian)
+            S = SymHerm(D, uplo)
+            @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, D)
+        end
+
+        d = (10:13) .+ 2im
+        D = Diagonal(d)
+        DR = Diagonal(complex.(real.(d)))
+        for uplo in (:L, :U)
+            H = Hermitian(D, uplo)
+            @test sprint(Base.print_matrix, H) == sprint(Base.print_matrix, DR)
+
+            S = Symmetric(D, uplo)
+            @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, D)
+        end
+    end
+    @testset "Bidiagonal" begin
+        dv, ev = 1:4, 1:3
+        ST = SymTridiagonal(dv, ev)
+        D = Diagonal(dv)
+        for B_uplo in (:L, :U)
+            B = Bidiagonal(dv, ev, B_uplo)
+            for Sym_uplo in (:L, :U), SymHerm in (Symmetric, Hermitian)
+                SB = SymHerm(B, Sym_uplo)
+                teststr = sprint(Base.print_matrix, Sym_uplo == B_uplo ? ST : D)
+                @test sprint(Base.print_matrix, SB) == teststr
+                SB = SymHerm(Transpose(B), Sym_uplo)
+                teststr = sprint(Base.print_matrix, Sym_uplo == B_uplo ? D : ST)
+                @test sprint(Base.print_matrix, SB) == teststr
+            end
+        end
+    end
+    @testset "Tridiagonal" begin
+        superd, d, subd = 3:5, 10:13, 1:3
+        for uplo in (:U, :L), SymHerm in (Symmetric, Hermitian)
+            S = SymHerm(Tridiagonal(subd, d, superd), uplo)
+            ST = SymTridiagonal(d, uplo == :U ? superd : subd)
+            @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, ST)
+        end
+
+        superd, d, subd = collect((3:5)*im), collect(Complex{Int}, 10:13), collect((1:3)*im)
+        for uplo in (:U, :L)
+            S = Symmetric(Tridiagonal(subd, d, superd), uplo)
+            ST = SymTridiagonal(d, uplo == :U ? superd : subd)
+            @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, ST)
+
+            H = Hermitian(Tridiagonal(subd, d, superd), uplo)
+            T = Tridiagonal(uplo == :L ? subd : conj(superd), d, uplo == :U ? superd : conj(subd))
+            @test sprint(Base.print_matrix, H) == sprint(Base.print_matrix, T)
+        end
+    end
+end
+
 end # module TestSymmetric
diff --git a/stdlib/LinearAlgebra/test/symmetriceigen.jl b/stdlib/LinearAlgebra/test/symmetriceigen.jl
new file mode 100644
index 0000000000000..e2f475323b292
--- /dev/null
+++ b/stdlib/LinearAlgebra/test/symmetriceigen.jl
@@ -0,0 +1,147 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module TestSymmetricEigen
+
+using Test, LinearAlgebra
+
+@testset "chol-eigen-eigvals" begin
+    ## Cholesky decomposition based
+
+    # eigenvalue sorting
+    sf = x->(imag(x),real(x))
+
+    ## Real valued
+    A = Float64[1 1 0 0; 1 2 1 0; 0 1 3 1; 0 0 1 4]
+    H = (A+A')/2
+    B = Float64[2 1 4 3; 0 3 1 3; 3 1 0 0; 0 1 3 1]
+    BH = (B+B')/2
+    # PD matrix
+    BPD = B*B'
+    # eigen
+    C = cholesky(BPD)
+    e,v = eigen(A, C; sortby=sf)
+    @test A*v ≈ BPD*v*Diagonal(e)
+    # eigvals
+    @test eigvals(A, BPD; sortby=sf) ≈ eigvals(A, C; sortby=sf)
+
+    ## Complex valued
+    A =  [1.0+im 1.0+1.0im 0 0; 1.0+1.0im 2.0+3.0im 1.0+1.0im 0; 0 1.0+2.0im 3.0+4.0im 1.0+5.0im; 0 0 1.0+1.0im 4.0+4.0im]
+    AH = (A+A')/2
+    B =  [2.0+2.0im 1.0+1.0im 4.0+4.0im 3.0+3.0im; 0 3.0+2.0im 1.0+1.0im 3.0+4.0im; 3.0+3.0im 1.0+4.0im 0 0; 0 1.0+2.0im 3.0+1.0im 1.0+1.0im]
+    BH = (B+B')/2
+    # PD matrix
+    BPD = B*B'
+    # eigen
+    C = cholesky(BPD)
+    e,v = eigen(A, C; sortby=sf)
+    @test A*v ≈ BPD*v*Diagonal(e)
+    # eigvals
+    @test eigvals(A, BPD; sortby=sf) ≈ eigvals(A, C; sortby=sf)
+end
+
+@testset "issue #49533" begin
+    # eigenvalue sorting
+    sf = x->(imag(x),real(x))
+
+    ## Real valued
+    A = Float64[1 1 0 0; 1 2 1 0; 0 1 3 1; 0 0 1 4]
+    B = Matrix(Diagonal(Float64[1:4;]))
+    # eigen
+    e0,v0 = eigen(A, B)
+    e1,v1 = eigen(A, Symmetric(B))
+    e2,v2 = eigen(Symmetric(A), B)
+    e3,v3 = eigen(Symmetric(A), Symmetric(B))
+    @test e0 ≈ e1 && v0 ≈ v1
+    @test e0 ≈ e2 && v0 ≈ v2
+    @test e0 ≈ e3 && v0 ≈ v3
+    # eigvals
+    @test eigvals(A, B) ≈ eigvals(A, Symmetric(B))
+    @test eigvals(A, B) ≈ eigvals(Symmetric(A), B)
+    @test eigvals(A, B) ≈ eigvals(Symmetric(A), Symmetric(B))
+
+    ## Complex valued
+    A =  [1.0+im 1.0+1.0im 0 0; 1.0+1.0im 2.0+3.0im 1.0+1.0im 0; 0 1.0+2.0im 3.0+4.0im 1.0+5.0im; 0 0 1.0+1.0im 4.0+4.0im]
+    AH = A'A
+    B =  [2.0+2.0im 1.0+1.0im 4.0+4.0im 3.0+3.0im; 0 3.0+2.0im 1.0+1.0im 3.0+4.0im; 3.0+3.0im 1.0+4.0im 0 0; 0 1.0+2.0im 3.0+1.0im 1.0+1.0im]
+    BH = B'B
+    # eigen
+    e1,v1 = eigen(A, Hermitian(BH))
+    @test A*v1 ≈ Hermitian(BH)*v1*Diagonal(e1)
+    e2,v2 = eigen(Hermitian(AH), B)
+    @test Hermitian(AH)*v2 ≈ B*v2*Diagonal(e2)
+    e3,v3 = eigen(Hermitian(AH), Hermitian(BH))
+    @test Hermitian(AH)*v3 ≈ Hermitian(BH)*v3*Diagonal(e3)
+    # eigvals
+    @test eigvals(A, BH; sortby=sf) ≈ eigvals(A, Hermitian(BH); sortby=sf)
+    @test eigvals(AH, B; sortby=sf) ≈ eigvals(Hermitian(AH), B; sortby=sf)
+    @test eigvals(AH, BH; sortby=sf) ≈ eigvals(Hermitian(AH), Hermitian(BH); sortby=sf)
+end
+
+@testset "bk-lu-eigen-eigvals" begin
+    # Bunchkaufman decomposition based
+
+    # eigenvalue sorting
+    sf = x->(imag(x),real(x))
+
+    # Real-valued random matrix
+    N = 10
+    A = randn(N,N)
+    B = randn(N,N)
+    BH = (B+B')/2
+    # eigen
+    e0 = eigvals(A,BH; sortby=sf)
+    e,v = eigen(A,bunchkaufman(Hermitian(BH,:L)); sortby=sf)
+    @test e0 ≈ e
+    @test A*v ≈ BH*v*Diagonal(e)
+    e,v = eigen(A,bunchkaufman(Hermitian(BH,:U)); sortby=sf)
+    @test e0 ≈ e
+    @test A*v ≈ BH*v*Diagonal(e)
+    e,v = eigen(A,lu(Hermitian(BH,:L)); sortby=sf)
+    @test e0 ≈ e
+    @test A*v ≈ BH*v*Diagonal(e)
+    e,v = eigen(A,lu(Hermitian(BH,:U)); sortby=sf)
+    @test e0 ≈ e
+    @test A*v ≈ BH*v*Diagonal(e)
+    # eigvals
+    e0 = eigvals(A,BH; sortby=sf)
+    el = eigvals(A,bunchkaufman(Hermitian(BH,:L)); sortby=sf)
+    eu = eigvals(A,bunchkaufman(Hermitian(BH,:U)); sortby=sf)
+    @test e0 ≈ el
+    @test e0 ≈ eu
+    el = eigvals(A,lu(Hermitian(BH,:L)); sortby=sf)
+    eu = eigvals(A,lu(Hermitian(BH,:U)); sortby=sf)
+    @test e0 ≈ el
+    @test e0 ≈ eu
+
+    # Complex-valued random matrix
+    N = 10
+    A = complex.(randn(N,N),randn(N,N))
+    B = complex.(randn(N,N),randn(N,N))
+    BH = (B+B')/2
+    # eigen
+    e0 = eigvals(A,BH; sortby=sf)
+    e,v = eigen(A,bunchkaufman(Hermitian(BH,:L)); sortby=sf)
+    @test e0 ≈ e
+    @test A*v ≈ BH*v*Diagonal(e)
+    e,v = eigen(A,bunchkaufman(Hermitian(BH,:U)); sortby=sf)
+    @test e0 ≈ e
+    @test A*v ≈ BH*v*Diagonal(e)
+    e,v = eigen(A,lu(Hermitian(BH,:L)); sortby=sf)
+    @test e0 ≈ e
+    @test A*v ≈ BH*v*Diagonal(e)
+    e,v = eigen(A,lu(Hermitian(BH,:U)); sortby=sf)
+    @test e0 ≈ e
+    @test A*v ≈ BH*v*Diagonal(e)
+    # eigvals
+    e0 = eigvals(A,BH; sortby=sf)
+    el = eigvals(A,bunchkaufman(Hermitian(BH,:L)); sortby=sf)
+    eu = eigvals(A,bunchkaufman(Hermitian(BH,:U)); sortby=sf)
+    @test e0 ≈ el
+    @test e0 ≈ eu
+    el = eigvals(A,lu(Hermitian(BH,:L)); sortby=sf)
+    eu = eigvals(A,lu(Hermitian(BH,:U)); sortby=sf)
+    @test e0 ≈ el
+    @test e0 ≈ eu
+end
+
+end # module TestSymmetricEigen
diff --git a/stdlib/LinearAlgebra/test/testgroups b/stdlib/LinearAlgebra/test/testgroups
index de082d8e7dce0..0f2f4f4af8708 100644
--- a/stdlib/LinearAlgebra/test/testgroups
+++ b/stdlib/LinearAlgebra/test/testgroups
@@ -1,28 +1,30 @@
 triangular
-qr
-dense
-matmul
-schur
-special
-eigen
-bunchkaufman
-svd
-lapack
-tridiag
+addmul
 bidiag
+matmul
+dense
+symmetric
 diagonal
+special
+qr
 cholesky
+blas
 lu
-symmetric
-generic
 uniformscaling
-lq
+structuredbroadcast
 hessenberg
-blas
+svd
+eigen
+tridiag
+lapack
+lq
 adjtrans
-pinv
+generic
+schur
+bunchkaufman
 givens
-structuredbroadcast
-addmul
-ldlt
+pinv
 factorization
+abstractq
+ldlt
+symmetriceigen
diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl
index d3c2817f89463..ebff80d3db2ca 100644
--- a/stdlib/LinearAlgebra/test/triangular.jl
+++ b/stdlib/LinearAlgebra/test/triangular.jl
@@ -8,6 +8,11 @@ using LinearAlgebra: BlasFloat, errorbounds, full!, transpose!,
     UnitUpperTriangular, UnitLowerTriangular,
     mul!, rdiv!, rmul!, lmul!
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+
+isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
+using .Main.FillArrays
+
 debug && println("Triangular matrices")
 
 n = 9
@@ -26,7 +31,7 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
                         (UnitLowerTriangular, :L))
 
         # Construct test matrix
-        A1 = t1(elty1 == Int ? rand(1:7, n, n) : convert(Matrix{elty1}, (elty1 <: Complex ? complex.(randn(n, n), randn(n, n)) : randn(n, n)) |> t -> cholesky(t't).U |> t -> uplo1 == :U ? t : copy(t')))
+        A1 = t1(elty1 == Int ? rand(1:7, n, n) : convert(Matrix{elty1}, (elty1 <: Complex ? complex.(randn(n, n), randn(n, n)) : randn(n, n)) |> t -> cholesky(t't).U |> t -> uplo1 === :U ? t : copy(t')))
         @test t1(A1) === A1
         @test t1{elty1}(A1) === A1
         # test the ctor works for AbstractMatrix
@@ -77,7 +82,7 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
         A1c = copy(A1)
         for i = 1:size(A1, 1)
             for j = 1:size(A1, 2)
-                if uplo1 == :U
+                if uplo1 === :U
                     if i > j
                         A1c[i,j] = 0
                         @test_throws ArgumentError A1c[i,j] = 1
@@ -104,7 +109,7 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
         end
 
         # istril/istriu
-        if uplo1 == :L
+        if uplo1 === :L
             @test istril(A1)
             @test !istriu(A1)
             @test istriu(A1')
@@ -119,9 +124,19 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
             @test !istriu(A1')
             @test !istriu(transpose(A1))
         end
+        M = copy(parent(A1))
+        for trans in (adjoint, transpose), k in -1:1
+            triu!(M, k)
+            @test istril(trans(M), -k) == istril(copy(trans(M)), -k) == true
+        end
+        M = copy(parent(A1))
+        for trans in (adjoint, transpose), k in 1:-1:-1
+            tril!(M, k)
+            @test istriu(trans(M), -k) == istriu(copy(trans(M)), -k) == true
+        end
 
         #tril/triu
-        if uplo1 == :L
+        if uplo1 === :L
             @test tril(A1,0)  == A1
             @test tril(A1,-1) == LowerTriangular(tril(Matrix(A1), -1))
             @test tril(A1,1)  == t1(tril(tril(Matrix(A1), 1)))
@@ -169,11 +184,19 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
         # diag
         @test diag(A1) == diag(Matrix(A1))
 
+        # tr
+        @test tr(A1)::elty1 == tr(Matrix(A1))
+
         # real
         @test real(A1) == real(Matrix(A1))
         @test imag(A1) == imag(Matrix(A1))
         @test abs.(A1) == abs.(Matrix(A1))
 
+        # zero
+        if A1 isa UpperTriangular || A1 isa LowerTriangular
+            @test zero(A1) == zero(parent(A1))
+        end
+
         # Unary operations
         @test -A1 == -Matrix(A1)
 
@@ -248,11 +271,7 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
         for eltyb in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
             b1 = convert(Vector{eltyb}, (elty1 <: Complex ? real(A1) : A1)*fill(1., n))
             b2 = convert(Vector{eltyb}, (elty1 <: Complex ? real(A1) : A1)*randn(n))
-            if elty1 in (BigFloat, Complex{BigFloat}) || eltyb in (BigFloat, Complex{BigFloat})
-                @test dot(b1, A1, b2) ≈ dot(A1'b1, b2)  atol=sqrt(max(eps(real(float(one(elty1)))),eps(real(float(one(eltyb))))))*n*n
-            else
-                @test dot(b1, A1, b2) ≈ dot(A1'b1, b2)  atol=sqrt(max(eps(real(float(one(elty1)))),eps(real(float(one(eltyb))))))*n*n
-            end
+            @test dot(b1, A1, b2) ≈ dot(A1'b1, b2)  atol=sqrt(max(eps(real(float(one(elty1)))),eps(real(float(one(eltyb))))))*n*n
         end
 
         # Binary operations
@@ -317,9 +336,9 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
                                 (LowerTriangular, :L),
                                 (UnitLowerTriangular, :L))
 
-                debug && println("elty1: $elty1, A1: $t1, elty2: $elty2")
+                debug && println("elty1: $elty1, A1: $t1, elty2: $elty2, A2: $t2")
 
-                A2 = t2(elty2 == Int ? rand(1:7, n, n) : convert(Matrix{elty2}, (elty2 <: Complex ? complex.(randn(n, n), randn(n, n)) : randn(n, n)) |> t -> cholesky(t't).U |> t -> uplo2 == :U ? t : copy(t')))
+                A2 = t2(elty2 == Int ? rand(1:7, n, n) : convert(Matrix{elty2}, (elty2 <: Complex ? complex.(randn(n, n), randn(n, n)) : randn(n, n)) |> t -> cholesky(t't).U |> t -> uplo2 === :U ? t : copy(t')))
 
                 # Convert
                 if elty1 <: Real && !(elty2 <: Integer)
@@ -348,21 +367,29 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
                     if t1 === UnitUpperTriangular && t2 === UnitUpperTriangular
                         @test A1*A2 isa UnitUpperTriangular
                         @test A1/A2 isa UnitUpperTriangular
+                        elty1 == Int && elty2 == Int && @test eltype(A1/A2) == Int
                         @test A1\A2 isa UnitUpperTriangular
+                        elty1 == Int && elty2 == Int && @test eltype(A1\A2) == Int
                     else
                         @test A1*A2 isa UpperTriangular
                         @test A1/A2 isa UpperTriangular
+                        elty1 == Int && elty2 == Int && t2 === UnitUpperTriangular && @test eltype(A1/A2) == Int
                         @test A1\A2 isa UpperTriangular
+                        elty1 == Int && elty2 == Int && t1 === UnitUpperTriangular && @test eltype(A1\A2) == Int
                     end
                 elseif uplo1 === :L && uplo2 === :L
                     if t1 === UnitLowerTriangular && t2 === UnitLowerTriangular
                         @test A1*A2 isa UnitLowerTriangular
                         @test A1/A2 isa UnitLowerTriangular
+                        elty1 == Int && elty2 == Int && @test eltype(A1/A2) == Int
                         @test A1\A2 isa UnitLowerTriangular
+                        elty1 == Int && elty2 == Int && @test eltype(A1\A2) == Int
                     else
                         @test A1*A2 isa LowerTriangular
                         @test A1/A2 isa LowerTriangular
+                        elty1 == Int && elty2 == Int && t2 === UnitLowerTriangular && @test eltype(A1/A2) == Int
                         @test A1\A2 isa LowerTriangular
+                        elty1 == Int && elty2 == Int && t1 === UnitLowerTriangular && @test eltype(A1\A2) == Int
                     end
                 end
                 offsizeA = Matrix{Float64}(I, n+1, n+1)
@@ -376,20 +403,20 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
                 @test_throws DimensionMismatch A2'  * offsizeA
                 @test_throws DimensionMismatch A2   * offsizeA
                 if (uplo1 == uplo2 && elty1 == elty2 != Int && t1 != UnitLowerTriangular && t1 != UnitUpperTriangular)
-                    @test rdiv!(copy(A1), copy(A2))::t1 ≈ A1/A2 ≈ Matrix(A1)/Matrix(A2)
-                    @test ldiv!(copy(A2), copy(A1))::t1 ≈ A2\A1 ≈ Matrix(A2)\Matrix(A1)
+                    @test rdiv!(copy(A1), A2)::t1 ≈ A1/A2 ≈ Matrix(A1)/Matrix(A2)
+                    @test ldiv!(A2, copy(A1))::t1 ≈ A2\A1 ≈ Matrix(A2)\Matrix(A1)
                 end
                 if (uplo1 != uplo2 && elty1 == elty2 != Int && t2 != UnitLowerTriangular && t2 != UnitUpperTriangular)
-                    @test lmul!(adjoint(copy(A1)), copy(A2)) ≈ A1'*A2 ≈ Matrix(A1)'*Matrix(A2)
-                    @test lmul!(transpose(copy(A1)), copy(A2)) ≈ transpose(A1)*A2 ≈ transpose(Matrix(A1))*Matrix(A2)
-                    @test ldiv!(adjoint(copy(A1)), copy(A2)) ≈ A1'\A2 ≈ Matrix(A1)'\Matrix(A2)
-                    @test ldiv!(transpose(copy(A1)), copy(A2)) ≈ transpose(A1)\A2 ≈ transpose(Matrix(A1))\Matrix(A2)
+                    @test lmul!(adjoint(A1), copy(A2)) ≈ A1'*A2 ≈ Matrix(A1)'*Matrix(A2)
+                    @test lmul!(transpose(A1), copy(A2)) ≈ transpose(A1)*A2 ≈ transpose(Matrix(A1))*Matrix(A2)
+                    @test ldiv!(adjoint(A1), copy(A2)) ≈ A1'\A2 ≈ Matrix(A1)'\Matrix(A2)
+                    @test ldiv!(transpose(A1), copy(A2)) ≈ transpose(A1)\A2 ≈ transpose(Matrix(A1))\Matrix(A2)
                 end
                 if (uplo1 != uplo2 && elty1 == elty2 != Int && t1 != UnitLowerTriangular && t1 != UnitUpperTriangular)
-                    @test rmul!(copy(A1), adjoint(copy(A2))) ≈ A1*A2' ≈ Matrix(A1)*Matrix(A2)'
-                    @test rmul!(copy(A1), transpose(copy(A2))) ≈ A1*transpose(A2) ≈ Matrix(A1)*transpose(Matrix(A2))
-                    @test rdiv!(copy(A1), adjoint(copy(A2))) ≈ A1/A2' ≈ Matrix(A1)/Matrix(A2)'
-                    @test rdiv!(copy(A1), transpose(copy(A2))) ≈ A1/transpose(A2) ≈ Matrix(A1)/transpose(Matrix(A2))
+                    @test rmul!(copy(A1), adjoint(A2)) ≈ A1*A2' ≈ Matrix(A1)*Matrix(A2)'
+                    @test rmul!(copy(A1), transpose(A2)) ≈ A1*transpose(A2) ≈ Matrix(A1)*transpose(Matrix(A2))
+                    @test rdiv!(copy(A1), adjoint(A2)) ≈ A1/A2' ≈ Matrix(A1)/Matrix(A2)'
+                    @test rdiv!(copy(A1), transpose(A2)) ≈ A1/transpose(A2) ≈ Matrix(A1)/transpose(Matrix(A2))
                 end
             end
         end
@@ -399,17 +426,15 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
 
             debug && println("elty1: $elty1, A1: $t1, B: $eltyB")
 
-            if !(eltyB in (BigFloat, Complex{BigFloat})) # rand does not support BigFloat and Complex{BigFloat} as of Dec 2015
-                Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-                @test lmul!(Tri,copy(A1)) ≈ Tri*Matrix(A1)
-                Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-                C = Matrix{promote_type(elty1,eltyB)}(undef, n, n)
-                mul!(C, Tri, copy(A1))
-                @test C ≈ Tri*Matrix(A1)
-                Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-                mul!(C, copy(A1), Tri)
-                @test C ≈ Matrix(A1)*Tri
-            end
+            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
+            @test lmul!(Tri,copy(A1)) ≈ Tri*Matrix(A1)
+            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
+            C = Matrix{promote_type(elty1,eltyB)}(undef, n, n)
+            mul!(C, Tri, A1)
+            @test C ≈ Tri*Matrix(A1)
+            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
+            mul!(C, A1, Tri)
+            @test C ≈ Matrix(A1)*Tri
 
             # Triangular-dense Matrix/vector multiplication
             @test A1*B[:,1] ≈ Matrix(A1)*B[:,1]
@@ -549,7 +574,7 @@ end
     end
 end
 
-@testset "check matrix logarithm type-inferrable" for elty in (Float32,Float64,ComplexF32,ComplexF64)
+@testset "check matrix logarithm type-inferable" for elty in (Float32,Float64,ComplexF32,ComplexF64)
     A = UpperTriangular(exp(triu(randn(elty, n, n))))
     @inferred Union{typeof(A),typeof(complex(A))} log(A)
     @test exp(Matrix(log(A))) ≈ A
@@ -686,8 +711,23 @@ isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "te
 using .Main.Furlongs
 LinearAlgebra.sylvester(a::Furlong,b::Furlong,c::Furlong) = -c / (a + b)
 
-let A = UpperTriangular([Furlong(1) Furlong(4); Furlong(0) Furlong(1)])
-    @test sqrt(A) == Furlong{1//2}.(UpperTriangular([1 2; 0 1]))
+@testset "dimensional correctness" begin
+    A = UpperTriangular([Furlong(1) Furlong(4); Furlong(0) Furlong(1)])
+    @test sqrt(A)::UpperTriangular == Furlong{1//2}.(UpperTriangular([1 2; 0 1]))
+    @test inv(A)::UpperTriangular == Furlong{-1}.(UpperTriangular([1 -4; 0 1]))
+    B = UnitUpperTriangular([Furlong(1) Furlong(4); Furlong(0) Furlong(1)])
+    @test sqrt(B)::UnitUpperTriangular == Furlong{1//2}.(UpperTriangular([1 2; 0 1]))
+    @test inv(B)::UnitUpperTriangular == Furlong{-1}.(UpperTriangular([1 -4; 0 1]))
+    b = [Furlong(5), Furlong(8)]
+    @test (A \ b)::Vector{<:Furlong{0}} == (B \ b)::Vector{<:Furlong{0}} == Furlong{0}.([-27, 8])
+    C = LowerTriangular([Furlong(1) Furlong(0); Furlong(4) Furlong(1)])
+    @test sqrt(C)::LowerTriangular == Furlong{1//2}.(LowerTriangular([1 0; 2 1]))
+    @test inv(C)::LowerTriangular == Furlong{-1}.(LowerTriangular([1 0; -4 1]))
+    D = UnitLowerTriangular([Furlong(1) Furlong(0); Furlong(4) Furlong(1)])
+    @test sqrt(D)::UnitLowerTriangular == Furlong{1//2}.(UnitLowerTriangular([1 0; 2 1]))
+    @test inv(D)::UnitLowerTriangular == Furlong{-1}.(UnitLowerTriangular([1 0; -4 1]))
+    b = [Furlong(5), Furlong(8)]
+    @test (C \ b)::Vector{<:Furlong{0}} == (D \ b)::Vector{<:Furlong{0}} == Furlong{0}.([5, -12])
 end
 
 isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
@@ -836,4 +876,27 @@ end
     end
 end
 
+@testset "arithmetic with an immutable parent" begin
+    F = FillArrays.Fill(2, (4,4))
+    for UT in (UnitUpperTriangular, UnitLowerTriangular)
+        U = UT(F)
+        @test -U == -Array(U)
+    end
+end
+
+@testset "error paths" begin
+    A = zeros(1,1); B = zeros(2,2)
+    @testset "inplace mul scaling with incompatible sizes" begin
+        for T in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
+            @test_throws DimensionMismatch mul!(T(A), T(B), 3)
+            @test_throws DimensionMismatch mul!(T(A), 3, T(B))
+        end
+    end
+    @testset "copyto with incompatible sizes" begin
+        for T in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
+            @test_throws BoundsError copyto!(T(A), T(B))
+        end
+    end
+end
+
 end # module TestTriangular
diff --git a/stdlib/LinearAlgebra/test/trickyarithmetic.jl b/stdlib/LinearAlgebra/test/trickyarithmetic.jl
index c5faf57acd857..ad04ac89c2761 100644
--- a/stdlib/LinearAlgebra/test/trickyarithmetic.jl
+++ b/stdlib/LinearAlgebra/test/trickyarithmetic.jl
@@ -8,12 +8,15 @@ module TrickyArithmetic
     Base.convert(::Type{A}, i::Int) = A(i)
     Base.zero(::Union{A, Type{A}}) = A(0)
     Base.one(::Union{A, Type{A}}) = A(1)
+    Base.isfinite(a::A) = isfinite(a.x)
     struct B
         x::Int
     end
     struct C
         x::Int
     end
+    Base.isfinite(b::B) = isfinite(b.x)
+    Base.isfinite(c::C) = isfinite(c.x)
     C(a::A) = C(a.x)
     Base.zero(::Union{C, Type{C}}) = C(0)
     Base.one(::Union{C, Type{C}}) = C(1)
@@ -40,6 +43,7 @@ module TrickyArithmetic
     Base.:(*)(a::Union{A,B,C}, b::D) = b * a
     Base.inv(a::Union{A,B,C}) = A(1) / a
     Base.inv(a::D) = a.d / a.n
+    Base.isfinite(a::D) = isfinite(a.n) && isfinite(a.d)
     Base.:(/)(a::Union{A,B,C}, b::Union{A,B,C}) = D(a, b)
     Base.:(/)(a::D, b::Union{A,B,C}) = a.n / (a.d*b)
     Base.:(/)(a::Union{A,B,C,D}, b::D) = a * inv(b)
diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl
index ecdf6b416baa5..0c07e5b160c58 100644
--- a/stdlib/LinearAlgebra/test/tridiag.jl
+++ b/stdlib/LinearAlgebra/test/tridiag.jl
@@ -9,6 +9,12 @@ const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
 using .Main.Quaternions
 
+isdefined(Main, :InfiniteArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "InfiniteArrays.jl"))
+using .Main.InfiniteArrays
+
+isdefined(Main, :FillArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FillArrays.jl"))
+using .Main.FillArrays
+
 include("testutils.jl") # test_approx_eq_modphase
 
 #Test equivalence of eigenvectors/singular vectors taking into account possible phase (sign) differences
@@ -71,11 +77,13 @@ end
             @test ST == Matrix(ST)
             @test ST.dv === x
             @test ST.ev === y
+            @test typeof(ST)(ST) === ST
             TT = (Tridiagonal(y, x, y))::Tridiagonal{elty, typeof(x)}
             @test TT == Matrix(TT)
             @test TT.dl === y
             @test TT.d  === x
-            @test TT.du === y
+            @test TT.du == y
+            @test typeof(TT)(TT) === TT
         end
         ST = SymTridiagonal{elty}([1,2,3,4], [1,2,3])
         @test eltype(ST) == elty
@@ -87,12 +95,12 @@ end
         @test isa(ST, SymTridiagonal{elty,Vector{elty}})
         TT = Tridiagonal{elty,Vector{elty}}(GenericArray(dl), d, GenericArray(dl))
         @test isa(TT, Tridiagonal{elty,Vector{elty}})
-        @test_throws MethodError SymTridiagonal(d, GenericArray(dl))
-        @test_throws MethodError SymTridiagonal(GenericArray(d), dl)
-        @test_throws MethodError Tridiagonal(GenericArray(dl), d, GenericArray(dl))
-        @test_throws MethodError Tridiagonal(dl, GenericArray(d), dl)
-        @test_throws MethodError SymTridiagonal{elty}(d, GenericArray(dl))
-        @test_throws MethodError Tridiagonal{elty}(GenericArray(dl), d,GenericArray(dl))
+        @test_throws ArgumentError SymTridiagonal(d, GenericArray(dl))
+        @test_throws ArgumentError SymTridiagonal(GenericArray(d), dl)
+        @test_throws ArgumentError Tridiagonal(GenericArray(dl), d, GenericArray(dl))
+        @test_throws ArgumentError Tridiagonal(dl, GenericArray(d), dl)
+        @test_throws ArgumentError SymTridiagonal{elty}(d, GenericArray(dl))
+        @test_throws ArgumentError Tridiagonal{elty}(GenericArray(dl), d,GenericArray(dl))
         STI = SymTridiagonal([1,2,3,4], [1,2,3])
         TTI = Tridiagonal([1,2,3], [1,2,3,4], [1,2,3])
         TTI2 = Tridiagonal([1,2,3], [1,2,3,4], [1,2,3], [1,2])
@@ -226,7 +234,7 @@ end
             @test size(A, 3) == 1
             @test size(A, 1) == n
             @test size(A) == (n, n)
-            @test_throws ArgumentError size(A, 0)
+            @test_throws BoundsError size(A, 0)
         end
         @testset "getindex" begin
             @test_throws BoundsError A[n + 1, 1]
@@ -261,6 +269,13 @@ end
             @test (@inferred diag(GA))::typeof(GenericArray(d)) == GenericArray(d)
             @test (@inferred diag(GA, -1))::typeof(GenericArray(d)) == GenericArray(dl)
         end
+        @testset "trace" begin
+            if real(elty) <: Integer
+                @test tr(A) == tr(fA)
+            else
+                @test tr(A) ≈ tr(fA) rtol=2eps(real(elty))
+            end
+        end
         @testset "Idempotent tests" begin
             for func in (conj, transpose, adjoint)
                 @test func(func(A)) == A
@@ -419,27 +434,37 @@ end
             end
         else # mat_type is Tridiagonal
             @testset "tridiagonal linear algebra" begin
-                for (BB, vv) in ((copy(B), copy(v)), (view(B, 1:n, 1), view(v, 1:n)))
+                for vv in (copy(v), view(copy(v), 1:n))
                     @test A*vv ≈ fA*vv
                     invFv = fA\vv
                     @test A\vv ≈ invFv
-                    # @test Base.solve(T,v) ≈ invFv
-                    # @test Base.solve(T, B) ≈ F\B
                     Tlu = factorize(A)
                     x = Tlu\vv
                     @test x ≈ invFv
                 end
+                elty != Int && @test A \ v ≈ ldiv!(copy(A), copy(v))
             end
+            F = lu(A)
+            L1, U1, p1 = F
+            G = lu!(F, 2A)
+            L2, U2, p2 = F
+            @test L1 ≈ L2
+            @test 2U1 ≈ U2
+            @test p1 == p2
         end
         @testset "generalized dot" begin
             x = fill(convert(elty, 1), n)
             y = fill(convert(elty, 1), n)
-            @test dot(x, A, y) ≈ dot(A'x, y)
+            @test dot(x, A, y) ≈ dot(A'x, y) ≈ dot(x, A*y)
+            @test dot([1], SymTridiagonal([1], Int[]), [1]) == 1
+            @test dot([1], Tridiagonal(Int[], [1], Int[]), [1]) == 1
+            @test dot(Int[], SymTridiagonal(Int[], Int[]), Int[]) === 0
+            @test dot(Int[], Tridiagonal(Int[], Int[], Int[]), Int[]) === 0
         end
     end
 end
 
-@testset "SymTridiagonal block matrix" begin
+@testset "SymTridiagonal/Tridiagonal block matrix" begin
     M = [1 2; 2 4]
     n = 5
     A = SymTridiagonal(fill(M, n), fill(M, n-1))
@@ -453,12 +478,38 @@ end
     @test_throws ArgumentError diag(A, 2)
     @test_throws ArgumentError diag(A, n+1)
     @test_throws ArgumentError diag(A, -n-1)
+
+    A = Tridiagonal(fill(M, n-1), fill(M, n), fill(M, n-1))
+    @test @inferred A[1,1] == M
+    @test @inferred A[1,2] == M
+    @test @inferred A[2,1] == M
+    @test @inferred diag(A, 1) == fill(M, n-1)
+    @test @inferred diag(A, 0) == fill(M, n)
+    @test @inferred diag(A, -1) == fill(M, n-1)
+    @test_throws MethodError diag(A, -2)
+    @test_throws MethodError diag(A, 2)
+    @test_throws ArgumentError diag(A, n+1)
+    @test_throws ArgumentError diag(A, -n-1)
+
+    for n in 0:2
+        dv, ev = fill(M, n), fill(M, max(n-1,0))
+        A = SymTridiagonal(dv, ev)
+        @test A == Matrix{eltype(A)}(A)
+
+        A = Tridiagonal(ev, dv, ev)
+        @test A == Matrix{eltype(A)}(A)
+    end
 end
 
 @testset "Issue 12068" begin
     @test SymTridiagonal([1, 2], [0])^3 == [1 0; 0 8]
 end
 
+@testset "Issue #48505" begin
+    @test SymTridiagonal([1,2,3],[4,5.0]) == [1.0 4.0 0.0; 4.0 2.0 5.0; 0.0 5.0 3.0]
+    @test Tridiagonal([1, 2], [4, 5, 1], [6.0, 7]) == [4.0 6.0 0.0; 1.0 5.0 7.0; 0.0 2.0 1.0]
+end
+
 @testset "convert for SymTridiagonal" begin
     STF32 = SymTridiagonal{Float32}(fill(1f0, 5), fill(1f0, 4))
     @test convert(SymTridiagonal{Float64}, STF32)::SymTridiagonal{Float64} == STF32
@@ -475,6 +526,14 @@ end
     @test Tridiagonal(4:5, 1:3, 1:2) == [1 1 0; 4 2 2; 0 5 3]
 end
 
+@testset "Prevent off-diagonal aliasing in Tridiagonal" begin
+    e = ones(4)
+    f = e[1:end-1]
+    T = Tridiagonal(f, 2e, f)
+    T ./= 10
+    @test all(==(0.1), f)
+end
+
 @testset "Issue #26994 (and the empty case)" begin
     T = SymTridiagonal([1.0],[3.0])
     x = ones(1)
@@ -725,4 +784,38 @@ using .Main.SizedArrays
         @test S !== Tridiagonal(diag(Sdense, 1), diag(Sdense),  diag(Sdense, 1)) !== S
     end
 end
+
+@testset "copyto! with UniformScaling" begin
+    @testset "Tridiagonal" begin
+        @testset "Fill" begin
+            for len in (4, InfiniteArrays.Infinity())
+                d = FillArrays.Fill(1, len)
+                ud = FillArrays.Fill(0, len-1)
+                T = Tridiagonal(ud, d, ud)
+                @test copyto!(T, I) === T
+            end
+        end
+        T = Tridiagonal(fill(3, 3), fill(2, 4), fill(3, 3))
+        copyto!(T, I)
+        @test all(isone, diag(T))
+        @test all(iszero, diag(T, 1))
+        @test all(iszero, diag(T, -1))
+    end
+    @testset "SymTridiagonal" begin
+        @testset "Fill" begin
+            for len in (4, InfiniteArrays.Infinity())
+                d = FillArrays.Fill(1, len)
+                ud = FillArrays.Fill(0, len-1)
+                ST = SymTridiagonal(d, ud)
+                @test copyto!(ST, I) === ST
+            end
+        end
+        ST = SymTridiagonal(fill(2, 4), fill(3, 3))
+        copyto!(ST, I)
+        @test all(isone, diag(ST))
+        @test all(iszero, diag(ST, 1))
+        @test all(iszero, diag(ST, -1))
+    end
+end
+
 end # module TestTridiagonal
diff --git a/stdlib/LinearAlgebra/test/uniformscaling.jl b/stdlib/LinearAlgebra/test/uniformscaling.jl
index be1b9887d570f..975cbf7bc59bc 100644
--- a/stdlib/LinearAlgebra/test/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/test/uniformscaling.jl
@@ -183,10 +183,10 @@ end
 end
 
 @test copy(UniformScaling(one(Float64))) == UniformScaling(one(Float64))
-@test sprint(show,MIME"text/plain"(),UniformScaling(one(ComplexF64))) == "LinearAlgebra.UniformScaling{ComplexF64}\n(1.0 + 0.0im)*I"
-@test sprint(show,MIME"text/plain"(),UniformScaling(one(Float32))) == "LinearAlgebra.UniformScaling{Float32}\n1.0*I"
-@test sprint(show,UniformScaling(one(ComplexF64))) == "LinearAlgebra.UniformScaling{ComplexF64}(1.0 + 0.0im)"
-@test sprint(show,UniformScaling(one(Float32))) == "LinearAlgebra.UniformScaling{Float32}(1.0f0)"
+@test sprint(show,MIME"text/plain"(),UniformScaling(one(ComplexF64))) == "$(LinearAlgebra.UniformScaling){ComplexF64}\n(1.0 + 0.0im)*I"
+@test sprint(show,MIME"text/plain"(),UniformScaling(one(Float32))) == "$(LinearAlgebra.UniformScaling){Float32}\n1.0*I"
+@test sprint(show,UniformScaling(one(ComplexF64))) == "$(LinearAlgebra.UniformScaling){ComplexF64}(1.0 + 0.0im)"
+@test sprint(show,UniformScaling(one(Float32))) == "$(LinearAlgebra.UniformScaling){Float32}(1.0f0)"
 
 let
     λ = complex(randn(),randn())
diff --git a/stdlib/Logging/Project.toml b/stdlib/Logging/Project.toml
index af931e68e07d1..ce69112733d5e 100644
--- a/stdlib/Logging/Project.toml
+++ b/stdlib/Logging/Project.toml
@@ -1,5 +1,6 @@
 name = "Logging"
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Logging/docs/src/index.md b/stdlib/Logging/docs/src/index.md
index 9a269ee54571b..55d24c7ae0a26 100644
--- a/stdlib/Logging/docs/src/index.md
+++ b/stdlib/Logging/docs/src/index.md
@@ -180,7 +180,7 @@ pattern match against the log event stream.
 
 ## Environment variables
 
-Message filtering can be influenced through the `JULIA_DEBUG` environment
+Message filtering can be influenced through the [`JULIA_DEBUG`](@ref JULIA_DEBUG) environment
 variable, and serves as an easy way to enable debug logging for a file or
 module. Loading julia with `JULIA_DEBUG=loading` will activate
 `@debug` log messages in `loading.jl`. For example, in Linux shells:
diff --git a/stdlib/Logging/src/ConsoleLogger.jl b/stdlib/Logging/src/ConsoleLogger.jl
index bb040aac91858..747f8a2b22966 100644
--- a/stdlib/Logging/src/ConsoleLogger.jl
+++ b/stdlib/Logging/src/ConsoleLogger.jl
@@ -117,8 +117,8 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module
     # Generate a text representation of the message and all key value pairs,
     # split into lines.
     msglines = [(indent=0, msg=l) for l in split(chomp(convert(String, string(message))::String), '\n')]
-    stream = logger.stream
-    if !isopen(stream)
+    stream::IO = logger.stream
+    if !(isopen(stream)::Bool)
         stream = stderr
     end
     dsize = displaysize(stream)::Tuple{Int,Int}
diff --git a/stdlib/Logging/test/runtests.jl b/stdlib/Logging/test/runtests.jl
index b6b4813964536..3a793c4e0bc33 100644
--- a/stdlib/Logging/test/runtests.jl
+++ b/stdlib/Logging/test/runtests.jl
@@ -6,6 +6,10 @@ import Logging: min_enabled_level, shouldlog, handle_message
 
 @noinline func1() = backtrace()
 
+# see "custom log macro" testset
+CustomLog = LogLevel(-500)
+macro customlog(exs...) Base.CoreLogging.logmsg_code((Base.CoreLogging.@_sourceinfo)..., esc(CustomLog), exs...) end
+
 @testset "Logging" begin
 
 @testset "Core" begin
@@ -275,4 +279,17 @@ end
     @test m.run()
 end
 
+@testset "custom log macro" begin
+    @test_logs (CustomLog, "a") min_level=CustomLog @customlog "a"
+
+    buf = IOBuffer()
+    io = IOContext(buf, :displaysize=>(30,80), :color=>false)
+    logger = ConsoleLogger(io, CustomLog)
+
+    with_logger(logger) do
+        @customlog "a"
+    end
+    @test occursin("LogLevel(-500): a", String(take!(buf)))
+end
+
 end
diff --git a/stdlib/MPFR_jll/Project.toml b/stdlib/MPFR_jll/Project.toml
index 22aa30d20511b..e4b24d070db55 100644
--- a/stdlib/MPFR_jll/Project.toml
+++ b/stdlib/MPFR_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "MPFR_jll"
 uuid = "3a97d323-0669-5f0c-9066-3539efd106a3"
-version = "4.1.1+1"
+version = "4.2.0+1"
 
 [deps]
 GMP_jll = "781609d7-10c4-51f6-84f2-b8444358ff6d"
diff --git a/stdlib/MPFR_jll/src/MPFR_jll.jl b/stdlib/MPFR_jll/src/MPFR_jll.jl
index 5b2dbd1e84b24..c184a9801102f 100644
--- a/stdlib/MPFR_jll/src/MPFR_jll.jl
+++ b/stdlib/MPFR_jll/src/MPFR_jll.jl
@@ -13,9 +13,9 @@ export libmpfr
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libmpfr_handle = C_NULL
-libmpfr_path = ""
+artifact_dir::String = ""
+libmpfr_handle::Ptr{Cvoid} = C_NULL
+libmpfr_path::String = ""
 
 if Sys.iswindows()
     const libmpfr = "libmpfr-6.dll"
diff --git a/stdlib/MPFR_jll/test/runtests.jl b/stdlib/MPFR_jll/test/runtests.jl
index 68bb6d3ec40e4..81b6e06ed7b49 100644
--- a/stdlib/MPFR_jll/test/runtests.jl
+++ b/stdlib/MPFR_jll/test/runtests.jl
@@ -4,5 +4,5 @@ using Test, Libdl, MPFR_jll
 
 @testset "MPFR_jll" begin
     vn = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Cstring, ())))
-    @test vn == v"4.1.0"
+    @test vn == v"4.2.0"
 end
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 44c3b97e2fb0f..1c8a2849d75f1 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -6,20 +6,22 @@ SRCCACHE := $(abspath $(SRCDIR)/srccache)
 BUILDDIR := .
 
 include $(JULIAHOME)/Make.inc
-include $(JULIAHOME)/deps/Versions.make
 include $(JULIAHOME)/deps/tools/common.mk
 include $(JULIAHOME)/deps/tools/stdlib-external.mk
+# include all `*.version` files, import `LIBNAME_JLL_NAME` and `LIBNAME_JLL_VER`
+# Note: Some deps do not have a `_jll` pkg: [libwhich, patchelf, utf8proc]
+include $(JULIAHOME)/deps/*.version
 
-VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
 
-$(build_datarootdir)/julia/stdlib/$(VERSDIR):
-	mkdir -p $@
+VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
+DIRS := $(build_datarootdir)/julia/stdlib/$(VERSDIR) $(build_prefix)/manifest/$(VERSDIR)
+$(foreach dir,$(DIRS),$(eval $(call dir_target,$(dir))))
 
 JLLS = DSFMT GMP CURL LIBGIT2 LLVM LIBSSH2 LIBUV MBEDTLS MPFR NGHTTP2 \
        BLASTRAMPOLINE OPENBLAS OPENLIBM P7ZIP PCRE LIBSUITESPARSE ZLIB \
-       LLVMUNWIND CSL UNWIND
+       LLVMUNWIND CSL UNWIND LLD
 
-# Initialize this with JLLs that aren't in deps/Versions.make
+# Initialize this with JLLs that aren't in "deps/$(LibName).version"
 JLL_NAMES := MozillaCACerts_jll
 get-MozillaCACerts_jll:
 install-MozillaCACerts_jll:
@@ -38,16 +40,22 @@ endef
 $(foreach jll,$(JLLS),$(eval $(call download-artifacts-toml,$(jll))))
 
 
-STDLIBS = Artifacts Base64 CRC32c Dates DelimitedFiles Distributed FileWatching \
-          Future InteractiveUtils LazyArtifacts Libdl LibGit2 LinearAlgebra Logging \
-          Markdown Mmap Printf Profile Random REPL Serialization SHA \
-          SharedArrays Sockets SparseArrays SuiteSparse Test TOML Unicode UUIDs \
+STDLIBS = Artifacts Base64 CRC32c Dates FileWatching \
+          Future InteractiveUtils Libdl LibGit2 LinearAlgebra Logging \
+          Markdown Mmap Printf Profile Random REPL Serialization \
+          SharedArrays Sockets Test TOML Unicode UUIDs \
           $(JLL_NAMES)
 
-STDLIBS_EXT = Pkg Statistics LibCURL DelimitedFiles Downloads ArgTools Tar NetworkOptions SuiteSparse SparseArrays SHA
+STDLIBS_EXT = Pkg Statistics LazyArtifacts LibCURL DelimitedFiles Downloads ArgTools \
+              Tar NetworkOptions SuiteSparse SparseArrays StyledStrings SHA Distributed
 
 $(foreach module, $(STDLIBS_EXT), $(eval $(call stdlib-external,$(module),$(shell echo $(module) | tr a-z A-Z))))
 
+ifneq ($(filter $(STDLIBS),$(STDLIBS_EXT)),)
+$(error ERROR duplicated STDLIBS in list)
+endif
+
+
 # Generate symlinks to all stdlibs at usr/share/julia/stdlib/vX.Y/
 $(foreach module, $(STDLIBS), $(eval $(call symlink_target,$$(JULIAHOME)/stdlib/$(module),$$(build_datarootdir)/julia/stdlib/$$(VERSDIR),$(module))))
 
@@ -65,5 +73,5 @@ clean: $(addprefix clean-, $(STDLIBS_EXT)) $(CLEAN_TARGETS) extstdlibclean
 distclean: $(addprefix distclean-, $(STDLIBS_EXT)) clean
 checksumall: $(addprefix checksum-, $(STDLIBS_EXT))
 
-DEP_LIBS_STAGED_ALL := $(STDLIBS_EXT)
+DEP_LIBS_STAGED_ALL := $(addprefix $(VERSDIR)/,$(STDLIBS_EXT))
 include $(JULIAHOME)/deps/tools/uninstallers.mk
diff --git a/stdlib/Markdown/Project.toml b/stdlib/Markdown/Project.toml
index 229e58749d233..b40de17b9422d 100644
--- a/stdlib/Markdown/Project.toml
+++ b/stdlib/Markdown/Project.toml
@@ -1,5 +1,6 @@
 name = "Markdown"
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+version = "1.11.0"
 
 [deps]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
diff --git a/stdlib/Markdown/docs/src/index.md b/stdlib/Markdown/docs/src/index.md
index a107929d1e838..44f2f2dbfd688 100644
--- a/stdlib/Markdown/docs/src/index.md
+++ b/stdlib/Markdown/docs/src/index.md
@@ -153,8 +153,8 @@ A header line can contain any inline syntax in the same way as a paragraph can.
 
 ### Code blocks
 
-Source code can be displayed as a literal block using an indent of four spaces as shown in the
-following example.
+Source code can be displayed as a literal block using an indent of four spaces or one tab as shown
+in the following example.
 
 ```
 This is a paragraph.
@@ -298,7 +298,8 @@ aside from the `:` character that is appended to the footnote label.
 
 [^note]:
 
-    Named footnote text containing several toplevel elements.
+    Named footnote text containing several toplevel elements
+    indented by 4 spaces or one tab.
 
       * item one
       * item two
@@ -357,6 +358,7 @@ They can be defined using the following `!!!` syntax:
 !!! note
 
     This is the content of the note.
+    It is indented by 4 spaces. A tab would work as well.
 
 !!! warning "Beware!"
 
diff --git a/stdlib/Markdown/src/Common/block.jl b/stdlib/Markdown/src/Common/block.jl
index 366a7283f0be5..bd184b60c40fa 100644
--- a/stdlib/Markdown/src/Common/block.jl
+++ b/stdlib/Markdown/src/Common/block.jl
@@ -61,7 +61,7 @@ function hashheader(stream::IO, md::MD)
 
         if c != '\n' # Empty header
             h = strip(readline(stream))
-            h = match(r"(.*?)( +#+)?$", h).captures[1]
+            h = (match(r"(.*?)( +#+)?$", h)::AbstractMatch).captures[1]
             buffer = IOBuffer()
             print(buffer, h)
             push!(md.content, Header(parseinline(seek(buffer, 0), md), level))
@@ -136,7 +136,7 @@ function footnote(stream::IO, block::MD)
         if isempty(str)
             return false
         else
-            ref = match(regex, str).captures[1]
+            ref = (match(regex, str)::AbstractMatch).captures[1]
             buffer = IOBuffer()
             write(buffer, readline(stream, keep=true))
             while !eof(stream)
@@ -211,11 +211,11 @@ function admonition(stream::IO, block::MD)
                 titled   = r"^([a-z]+) \"(.*)\"$", # !!! <CATEGORY_NAME> "<TITLE>"
                 line     = strip(readline(stream))
                 if occursin(untitled, line)
-                    m = match(untitled, line)
+                    m = match(untitled, line)::AbstractMatch
                     # When no title is provided we use CATEGORY_NAME, capitalising it.
                     m.captures[1], uppercasefirst(m.captures[1])
                 elseif occursin(titled, line)
-                    m = match(titled, line)
+                    m = match(titled, line)::AbstractMatch
                     # To have a blank TITLE provide an explicit empty string as TITLE.
                     m.captures[1], m.captures[2]
                 else
@@ -274,7 +274,7 @@ function list(stream::IO, block::MD)
             elseif occursin(r"^ {0,3}\d+(\.|\))( |$)", bullet)
                 # An ordered list. Either with `1. ` or `1) ` style numbering.
                 r = occursin(".", bullet) ? r"^ {0,3}(\d+)\.( |$)" : r"^ {0,3}(\d+)\)( |$)"
-                Base.parse(Int, match(r, bullet).captures[1]), r
+                Base.parse(Int, (match(r, bullet)::AbstractMatch).captures[1]), r
             else
                 # Failed to match any bullets. This branch shouldn't actually be needed
                 # since the `NUM_OR_BULLETS` regex should cover this, but we include it
diff --git a/stdlib/Markdown/src/Common/inline.jl b/stdlib/Markdown/src/Common/inline.jl
index fd5134481e113..fda716a10fae7 100644
--- a/stdlib/Markdown/src/Common/inline.jl
+++ b/stdlib/Markdown/src/Common/inline.jl
@@ -112,7 +112,7 @@ function footnote_link(stream::IO, md::MD)
         if isempty(str)
             return
         else
-            ref = match(regex, str).captures[1]
+            ref = (match(regex, str)::AbstractMatch).captures[1]
             return Footnote(ref, nothing)
         end
     end
diff --git a/stdlib/Markdown/src/parse/util.jl b/stdlib/Markdown/src/parse/util.jl
index 7be845c96a9fc..aabfcbb3ddc62 100644
--- a/stdlib/Markdown/src/parse/util.jl
+++ b/stdlib/Markdown/src/parse/util.jl
@@ -36,7 +36,7 @@ function skipblank(io::IO)
 end
 
 """
-Returns true if the line contains only (and, unless allowempty,
+Return true if the line contains only (and, unless allowempty,
 at least one of) the characters given.
 """
 function linecontains(io::IO, chars; allow_whitespace = true,
diff --git a/stdlib/Markdown/src/render/terminal/formatting.jl b/stdlib/Markdown/src/render/terminal/formatting.jl
index 87022124b9c8a..a031de4d9ad82 100644
--- a/stdlib/Markdown/src/render/terminal/formatting.jl
+++ b/stdlib/Markdown/src/render/terminal/formatting.jl
@@ -3,7 +3,7 @@
 # Wrapping
 
 function ansi_length(s)
-    replace(s, r"\e\[[0-9]+m" => "") |> length
+    replace(s, r"\e\[[0-9]+m" => "") |> textwidth
 end
 
 words(s) = split(s, " ")
@@ -17,6 +17,23 @@ function wrapped_line(io::IO, s::AbstractString, width, i)
         word_length == 0 && continue
         if isempty(lines) || i + word_length + 1 > width
             i = word_length
+            if length(lines) > 0
+                last_line = lines[end]
+                maybe_underline = findlast(Base.text_colors[:underline], last_line)
+                if !isnothing(maybe_underline)
+                    # disable underline style at end of line if not already disabled.
+                    maybe_disable_underline = max(
+                        last(something(findlast(Base.disable_text_style[:underline], last_line), -1)),
+                        last(something(findlast(Base.text_colors[:normal], last_line), -1)),
+                    )
+
+                    if maybe_disable_underline < 0 || maybe_disable_underline < last(maybe_underline)
+
+                        lines[end] = last_line * Base.disable_text_style[:underline]
+                        word = Base.text_colors[:underline] * word
+                    end
+                end
+            end
             push!(lines, word)
         else
             i += word_length + 1
diff --git a/stdlib/Markdown/src/render/terminal/render.jl b/stdlib/Markdown/src/render/terminal/render.jl
index 3fd274aee2a2e..20b1ef6d041fc 100644
--- a/stdlib/Markdown/src/render/terminal/render.jl
+++ b/stdlib/Markdown/src/render/terminal/render.jl
@@ -81,14 +81,16 @@ end
 function _term_header(io::IO, md, char, columns)
     text = terminline_string(io, md.text)
     with_output_color(:bold, io) do io
-        print(io, ' '^margin)
+        pre = ' '^margin
+        print(io, pre)
         line_no, lastline_width = print_wrapped(io, text,
-                                                width=columns - 4margin; pre=" ")
-        line_width = min(1 + lastline_width, columns)
+                                                width=columns - 4margin; pre)
+        line_width = min(lastline_width, columns)
         if line_no > 1
-            line_width = max(line_width, div(columns, 3))
+            line_width = max(line_width, div(columns, 3)+length(pre))
         end
-        char != ' ' && print(io, '\n', ' '^(margin), char^line_width)
+        header_width = max(0, line_width-length(pre))
+        char != ' ' && header_width > 0 && print(io, '\n', ' '^(margin), char^header_width)
     end
 end
 
diff --git a/stdlib/Markdown/test/runtests.jl b/stdlib/Markdown/test/runtests.jl
index dfe80430a00d6..84f0868747567 100644
--- a/stdlib/Markdown/test/runtests.jl
+++ b/stdlib/Markdown/test/runtests.jl
@@ -376,7 +376,8 @@ table = md"""
 # mime output
 let out =
     @test sprint(show, "text/plain", book) ==
-        "  Title\n  ≡≡≡≡≡≡≡\n\n  Some discussion\n\n  │  A quote\n\n  Section important\n  ===================\n\n  Some bolded\n\n    •  list1\n\n    •  list2"
+        "  Title\n  ≡≡≡≡≡\n\n  Some discussion\n\n  │  A quote\n\n  Section important\n  =================\n\n  Some bolded\n\n    •  list1\n\n    •  list2"
+    @test sprint(show, "text/plain", md"#") == "  " # edge case of empty header
     @test sprint(show, "text/markdown", book) ==
         """
         # Title
@@ -1148,7 +1149,7 @@ end
 # issue 20225, check this can print
 @test typeof(sprint(Markdown.term, Markdown.parse(" "))) == String
 
-# different output depending on whether color is requested:	+# issue 20225, check this can print
+# different output depending on whether color is requested: +# issue 20225, check this can print
 let buf = IOBuffer()
     @test typeof(sprint(Markdown.term, Markdown.parse(" "))) == String
     show(buf, "text/plain", md"*emph*")
@@ -1159,6 +1160,38 @@ let buf = IOBuffer()
     @test String(take!(buf)) == "  \e[4memph\e[24m"
 end
 
+let word = "Markdown" # disable underline when wrapping lines
+    buf = IOBuffer()
+    ctx = IOContext(buf, :color => true, :displaysize => (displaysize(buf)[1], length(word)))
+    long_italic_text = Markdown.parse('_' * join(fill(word, 10), ' ') * '_')
+    show(ctx, MIME("text/plain"), long_italic_text)
+    lines = split(String(take!(buf)), '\n')
+    @test endswith(lines[begin], Base.disable_text_style[:underline])
+    @test startswith(lines[begin+1], ' '^Markdown.margin * Base.text_colors[:underline])
+end
+
+let word = "Markdown" # pre is of size Markdown.margin when wrapping title
+    buf = IOBuffer()
+    ctx = IOContext(buf, :color => true, :displaysize => (displaysize(buf)[1], length(word)))
+    long_title = Markdown.parse("# " * join(fill(word, 3)))
+    show(ctx, MIME("text/plain"), long_title)
+    lines = split(String(take!(buf)), '\n')
+    @test all(startswith(Base.text_colors[:bold] * ' '^Markdown.margin), lines)
+end
+
+struct Struct49454 end
+Base.show(io::IO, ::Struct49454) =
+    print(io, Base.text_colors[:underline], "Struct 49454()", Base.text_colors[:normal])
+
+let buf = IOBuffer()
+    ctx = IOContext(buf, :color => true, :displaysize => (displaysize(buf)[1], 10))
+    show(ctx, MIME("text/plain"), md"""
+    text without $(Struct49454()) underline.
+    """)
+    lines = split(String(take!(buf)), '\n')
+    @test !occursin(Base.text_colors[:underline], lines[end])
+end
+
 # table rendering with term #25213
 t = """
     a   |   b
diff --git a/stdlib/MbedTLS_jll/Project.toml b/stdlib/MbedTLS_jll/Project.toml
index 00a6b29426d91..27d4884d099ad 100644
--- a/stdlib/MbedTLS_jll/Project.toml
+++ b/stdlib/MbedTLS_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "MbedTLS_jll"
 uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
-version = "2.28.0+0"
+version = "2.28.2+1"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl b/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
index 338bec9503c07..e46da42a9a638 100644
--- a/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
+++ b/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
@@ -14,13 +14,13 @@ export libmbedcrypto, libmbedtls, libmbedx509
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libmbedcrypto_handle = C_NULL
-libmbedcrypto_path = ""
-libmbedtls_handle = C_NULL
-libmbedtls_path = ""
-libmbedx509_handle = C_NULL
-libmbedx509_path = ""
+artifact_dir::String = ""
+libmbedcrypto_handle::Ptr{Cvoid} = C_NULL
+libmbedcrypto_path::String = ""
+libmbedtls_handle::Ptr{Cvoid} = C_NULL
+libmbedtls_path::String = ""
+libmbedx509_handle::Ptr{Cvoid} = C_NULL
+libmbedx509_path::String = ""
 
 if Sys.iswindows()
     const libmbedcrypto = "libmbedcrypto.dll"
diff --git a/stdlib/MbedTLS_jll/test/runtests.jl b/stdlib/MbedTLS_jll/test/runtests.jl
index b731d7f833043..2d82fa564cd18 100644
--- a/stdlib/MbedTLS_jll/test/runtests.jl
+++ b/stdlib/MbedTLS_jll/test/runtests.jl
@@ -6,5 +6,5 @@ using Test, Libdl, MbedTLS_jll
     vstr = zeros(UInt8, 32)
     ccall((:mbedtls_version_get_string, libmbedcrypto), Cvoid, (Ref{UInt8},), vstr)
     vn = VersionNumber(unsafe_string(pointer(vstr)))
-    @test vn == v"2.28.0"
+    @test vn == v"2.28.2"
 end
diff --git a/stdlib/Mmap/Project.toml b/stdlib/Mmap/Project.toml
index f3dab686d2eaa..ce4b65ccbb06a 100644
--- a/stdlib/Mmap/Project.toml
+++ b/stdlib/Mmap/Project.toml
@@ -1,5 +1,6 @@
 name = "Mmap"
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Mmap/src/Mmap.jl b/stdlib/Mmap/src/Mmap.jl
index 629f53e8371ed..6d328c40cd7b3 100644
--- a/stdlib/Mmap/src/Mmap.jl
+++ b/stdlib/Mmap/src/Mmap.jl
@@ -208,18 +208,43 @@ function mmap(io::IO,
     mmaplen = (offset - offset_page) + len
 
     file_desc = gethandle(io)
+    szfile = convert(Csize_t, len + offset)
+    requestedSizeLarger = false
+    if !(io isa Mmap.Anonymous)
+        @static if !Sys.isapple()
+            requestedSizeLarger = szfile > filesize(io)
+        end
+    end
     # platform-specific mmapping
     @static if Sys.isunix()
         prot, flags, iswrite = settings(file_desc, shared)
-        iswrite && grow && grow!(io, offset, len)
+        if requestedSizeLarger
+            if iswrite
+                if grow
+                    grow!(io, offset, len)
+                else
+                    throw(ArgumentError("requested size $szfile larger than file size $(filesize(io)), but requested not to grow"))
+                end
+            else
+                throw(ArgumentError("unable to increase file size to $szfile due to read-only permissions"))
+            end
+        end
+        @static if Sys.isapple()
+            iswrite && grow && grow!(io, offset, len)
+        end
         # mmap the file
         ptr = ccall(:jl_mmap, Ptr{Cvoid}, (Ptr{Cvoid}, Csize_t, Cint, Cint, RawFD, Int64),
             C_NULL, mmaplen, prot, flags, file_desc, offset_page)
         systemerror("memory mapping failed", reinterpret(Int, ptr) == -1)
     else
         name, readonly, create = settings(io)
-        szfile = convert(Csize_t, len + offset)
-        readonly && szfile > filesize(io) && throw(ArgumentError("unable to increase file size to $szfile due to read-only permissions"))
+        if requestedSizeLarger
+            if readonly
+                throw(ArgumentError("unable to increase file size to $szfile due to read-only permissions"))
+            elseif !grow
+                throw(ArgumentError("requested size $szfile larger than file size $(filesize(io)), but requested not to grow"))
+            end
+        end
         handle = create ? ccall(:CreateFileMappingW, stdcall, Ptr{Cvoid}, (OS_HANDLE, Ptr{Cvoid}, DWORD, DWORD, DWORD, Cwstring),
                                 file_desc, C_NULL, readonly ? PAGE_READONLY : PAGE_READWRITE, szfile >> 32, szfile & typemax(UInt32), name) :
                           ccall(:OpenFileMappingW, stdcall, Ptr{Cvoid}, (DWORD, Cint, Cwstring),
@@ -342,8 +367,9 @@ Forces synchronization between the in-memory version of a memory-mapped `Array`
 [`BitArray`](@ref) and the on-disk version.
 """
 function sync!(m::Array, flags::Integer=MS_SYNC)
-    offset = rem(UInt(pointer(m)), PAGESIZE)
-    ptr = pointer(m) - offset
+    ptr = pointer(m)
+    offset = rem(UInt(ptr), PAGESIZE)
+    ptr = ptr - offset
     mmaplen = sizeof(m) + offset
     GC.@preserve m @static if Sys.isunix()
         systemerror("msync",
@@ -404,8 +430,9 @@ Advises the kernel on the intended usage of the memory-mapped `array`, with the
 `flag` being one of the available `MADV_*` constants.
 """
 function madvise!(m::Array, flag::Integer=MADV_NORMAL)
-    offset = rem(UInt(pointer(m)), PAGESIZE)
-    ptr = pointer(m) - offset
+    ptr = pointer(m)
+    offset = rem(UInt(ptr), PAGESIZE)
+    ptr = ptr - offset
     mmaplen = sizeof(m) + offset
     GC.@preserve m begin
         systemerror("madvise",
diff --git a/stdlib/MozillaCACerts_jll/Project.toml b/stdlib/MozillaCACerts_jll/Project.toml
index 0db86a1dd5319..cef860fda4acd 100644
--- a/stdlib/MozillaCACerts_jll/Project.toml
+++ b/stdlib/MozillaCACerts_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "MozillaCACerts_jll"
 uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
-version = "2022.2.1"
+version = "2023.01.10"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/NetworkOptions.version b/stdlib/NetworkOptions.version
index 483d6bd51694b..be36f14f526dc 100644
--- a/stdlib/NetworkOptions.version
+++ b/stdlib/NetworkOptions.version
@@ -1,4 +1,4 @@
 NETWORKOPTIONS_BRANCH = master
-NETWORKOPTIONS_SHA1 = 4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a
+NETWORKOPTIONS_SHA1 = aab83e5dd900c874826d430e25158dff43559d78
 NETWORKOPTIONS_GIT_URL := https://github.com/JuliaLang/NetworkOptions.jl.git
 NETWORKOPTIONS_TAR_URL = https://api.github.com/repos/JuliaLang/NetworkOptions.jl/tarball/$1
diff --git a/stdlib/OpenBLAS_jll/Project.toml b/stdlib/OpenBLAS_jll/Project.toml
index 21fa9e9f0a0e6..99a75366e24a7 100644
--- a/stdlib/OpenBLAS_jll/Project.toml
+++ b/stdlib/OpenBLAS_jll/Project.toml
@@ -1,14 +1,15 @@
 name = "OpenBLAS_jll"
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
-version = "0.3.20+0"
+version = "0.3.25+0"
 
 [deps]
+# See note in `src/OpenBLAS_jll.jl` about this dependency.
 CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.7"
+julia = "1.11"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
index f656621d957d6..a0c11ab047142 100644
--- a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
+++ b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
@@ -2,7 +2,17 @@
 
 ## dummy stub for https://github.com/JuliaBinaryWrappers/OpenBLAS_jll.jl
 baremodule OpenBLAS_jll
-using Base, Libdl, CompilerSupportLibraries_jll, Base.BinaryPlatforms
+using Base, Libdl, Base.BinaryPlatforms
+
+# We are explicitly NOT loading this at runtime, as it contains `libgomp`
+# which conflicts with `libiomp5`, breaking things like MKL.  In the future,
+# we hope to transition to a JLL interface that provides a more granular
+# interface than eagerly dlopen'ing all libraries provided in the JLL
+# which will eliminate issues like this, where we avoid loading a JLL
+# because we don't want to load a library that we don't even use yet.
+# using CompilerSupportLibraries_jll
+# Because of this however, we have to manually load the libraries we
+# _do_ care about, namely libgfortran
 Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
 const PATH_list = String[]
@@ -13,9 +23,9 @@ export libopenblas
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libopenblas_handle = C_NULL
-libopenblas_path = ""
+artifact_dir::String = ""
+libopenblas_handle::Ptr{Cvoid} = C_NULL
+libopenblas_path::String = ""
 
 if Base.USE_BLAS64
     const libsuffix = "64_"
@@ -25,10 +35,13 @@ end
 
 if Sys.iswindows()
     const libopenblas = "libopenblas$(libsuffix).dll"
+    const _libgfortran = string("libgfortran-", libgfortran_version(HostPlatform()).major, ".dll")
 elseif Sys.isapple()
     const libopenblas = "@rpath/libopenblas$(libsuffix).dylib"
+    const _libgfortran = string("@rpath/", "libgfortran.", libgfortran_version(HostPlatform()).major, ".dylib")
 else
     const libopenblas = "libopenblas$(libsuffix).so"
+    const _libgfortran = string("libgfortran.so.", libgfortran_version(HostPlatform()).major)
 end
 
 function __init__()
@@ -37,6 +50,23 @@ function __init__()
         ENV["OPENBLAS_MAIN_FREE"] = "1"
     end
 
+    # Ensure that OpenBLAS does not grab a huge amount of memory at first,
+    # since it instantly allocates scratch buffer space for the number of
+    # threads it thinks it needs to use.
+    # X-ref: https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
+    # X-ref: https://github.com/JuliaLang/julia/issues/45434
+    if !haskey(ENV, "OPENBLAS_NUM_THREADS") &&
+       !haskey(ENV, "GOTO_NUM_THREADS") &&
+       !haskey(ENV, "OMP_NUM_THREADS")
+        # We set this to `1` here, and then LinearAlgebra will update
+        # to the true value in its `__init__()` function.
+        ENV["OPENBLAS_DEFAULT_NUM_THREADS"] = "1"
+    end
+
+    # As mentioned above, we are sneaking this in here so that we don't have to
+    # depend on CSL_jll and load _all_ of its libraries.
+    dlopen(_libgfortran)
+
     global libopenblas_handle = dlopen(libopenblas)
     global libopenblas_path = dlpath(libopenblas_handle)
     global artifact_dir = dirname(Sys.BINDIR)
diff --git a/stdlib/OpenLibm_jll/Project.toml b/stdlib/OpenLibm_jll/Project.toml
index 7f02fbc81ce1b..f6162f402bfcf 100644
--- a/stdlib/OpenLibm_jll/Project.toml
+++ b/stdlib/OpenLibm_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "OpenLibm_jll"
 uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
-version = "0.8.1+0"
+version = "0.8.1+2"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl b/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
index e3536021ad4c9..f2dee45a279cd 100644
--- a/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
+++ b/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
@@ -13,9 +13,9 @@ export libopenlibm
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libopenlibm_handle = C_NULL
-libopenlibm_path = ""
+artifact_dir::String = ""
+libopenlibm_handle::Ptr{Cvoid} = C_NULL
+libopenlibm_path::String = ""
 
 if Sys.iswindows()
     const libopenlibm = "libopenlibm.dll"
diff --git a/stdlib/PCRE2_jll/Project.toml b/stdlib/PCRE2_jll/Project.toml
index 187eddb2a5541..788d6b733234f 100644
--- a/stdlib/PCRE2_jll/Project.toml
+++ b/stdlib/PCRE2_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "PCRE2_jll"
 uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
-version = "10.40.0+0"
+version = "10.42.0+1"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/PCRE2_jll/src/PCRE2_jll.jl b/stdlib/PCRE2_jll/src/PCRE2_jll.jl
index 81048a45998b5..e7f685820830b 100644
--- a/stdlib/PCRE2_jll/src/PCRE2_jll.jl
+++ b/stdlib/PCRE2_jll/src/PCRE2_jll.jl
@@ -13,9 +13,9 @@ export libpcre2_8
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libpcre2_8_handle = C_NULL
-libpcre2_8_path = ""
+artifact_dir::String = ""
+libpcre2_8_handle::Ptr{Cvoid} = C_NULL
+libpcre2_8_path::String = ""
 
 if Sys.iswindows()
     const libpcre2_8 = "libpcre2-8-0.dll"
diff --git a/stdlib/PCRE2_jll/test/runtests.jl b/stdlib/PCRE2_jll/test/runtests.jl
index 21e7e7db7286b..d593b07af31ce 100644
--- a/stdlib/PCRE2_jll/test/runtests.jl
+++ b/stdlib/PCRE2_jll/test/runtests.jl
@@ -6,5 +6,5 @@ using Test, Libdl, PCRE2_jll
     vstr = zeros(UInt8, 32)
     @test ccall((:pcre2_config_8, libpcre2_8), Cint, (UInt32, Ref{UInt8}), 11, vstr) > 0
     vn = VersionNumber(split(unsafe_string(pointer(vstr)), " ")[1])
-    @test vn == v"10.40.0"
+    @test vn == v"10.42.0"
 end
diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version
index 079244a98a4ae..98a3e315c3a54 100644
--- a/stdlib/Pkg.version
+++ b/stdlib/Pkg.version
@@ -1,4 +1,4 @@
 PKG_BRANCH = master
-PKG_SHA1 = 98d0cc276cc59817eb9c2e18e747fe027d7282a2
+PKG_SHA1 = 85f1e5564d733c9b04199d3523aeef0607f564e2
 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git
 PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
diff --git a/stdlib/Printf/Project.toml b/stdlib/Printf/Project.toml
index 9fa4e3633cae1..019b7e94ef9bd 100644
--- a/stdlib/Printf/Project.toml
+++ b/stdlib/Printf/Project.toml
@@ -1,5 +1,6 @@
 name = "Printf"
 uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
 
 [deps]
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
diff --git a/stdlib/Printf/docs/src/index.md b/stdlib/Printf/docs/src/index.md
index 48e38e2b2ce5b..7a5bae778ea36 100644
--- a/stdlib/Printf/docs/src/index.md
+++ b/stdlib/Printf/docs/src/index.md
@@ -3,4 +3,6 @@
 ```@docs
 Printf.@printf
 Printf.@sprintf
+Printf.Format
+Printf.format
 ```
diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl
index 1d04e7146e28b..ae9bf45786295 100644
--- a/stdlib/Printf/src/Printf.jl
+++ b/stdlib/Printf/src/Printf.jl
@@ -6,6 +6,8 @@ using Base.Ryu
 
 export @printf, @sprintf
 
+public format, Format
+
 # format specifier categories
 const Ints = Union{Val{'d'}, Val{'i'}, Val{'u'}, Val{'x'}, Val{'X'}, Val{'o'}}
 const Floats = Union{Val{'e'}, Val{'E'}, Val{'f'}, Val{'F'}, Val{'g'}, Val{'G'}, Val{'a'}, Val{'A'}}
@@ -15,6 +17,10 @@ const Pointer = Val{'p'}
 const HexBases = Union{Val{'x'}, Val{'X'}, Val{'a'}, Val{'A'}}
 const PositionCounter = Val{'n'}
 
+const MAX_FRACTIONAL_PART_WIDTH = 17  # max significant decimals + 1: `ceil(Int, log10(1 / eps(Float64))) + 1`
+const MAX_INTEGER_PART_WIDTH = 309  # max exponent: `ceil(Int, log10(prevfloat(typemax(Float64))))`
+const MAX_FMT_CHARS_WIDTH = 5  # hash | sign +/- | decimal dot | exponent e/E | exponent sign
+
 """
 Typed representation of a format specifier.
 
@@ -30,19 +36,29 @@ struct Spec{T} # T => %type => Val{'type'}
     hash::Bool
     width::Int
     precision::Int
+    dynamic_width::Bool
+    dynamic_precision::Bool
 end
 
 # recreate the format specifier string from a typed Spec
 Base.string(f::Spec{T}; modifier::String="") where {T} =
-    string("%", f.leftalign ? "-" : "", f.plus ? "+" : "", f.space ? " " : "",
-        f.zero ? "0" : "", f.hash ? "#" : "", f.width > 0 ? f.width : "",
-        f.precision == 0 ? ".0" : f.precision > 0 ? ".$(f.precision)" : "", modifier, char(T))
+    string("%",
+           f.leftalign ? "-" : "",
+           f.plus ? "+" : "",
+           f.space ? " " : "",
+           f.zero ? "0" : "",
+           f.hash ? "#" : "",
+           f.dynamic_width ? "*" : (f.width > 0 ? f.width : ""),
+           f.dynamic_precision ? ".*" : (f.precision == 0 ? ".0" : (f.precision > 0 ? ".$(f.precision)" : "")),
+           modifier,
+           char(T))
+
 Base.show(io::IO, f::Spec) = print(io, string(f))
 
 floatfmt(s::Spec{T}) where {T} =
-    Spec{Val{'f'}}(s.leftalign, s.plus, s.space, s.zero, s.hash, s.width, 0)
+    Spec{Val{'f'}}(s.leftalign, s.plus, s.space, s.zero, s.hash, s.width, 0, s.dynamic_width, s.dynamic_precision)
 ptrfmt(s::Spec{T}, x) where {T} =
-    Spec{Val{'x'}}(s.leftalign, s.plus, s.space, s.zero, true, s.width, sizeof(x) == 8 ? 16 : 8)
+    Spec{Val{'x'}}(s.leftalign, s.plus, s.space, s.zero, true, s.width, sizeof(x) == 8 ? 16 : 8, s.dynamic_width, s.dynamic_precision)
 
 """
     Printf.Format(format_str)
@@ -71,24 +87,58 @@ struct Format{S, T}
       # and so on, then at the end, str[substringranges[end]]
     substringranges::Vector{UnitRange{Int}}
     formats::T # Tuple of Specs
+    numarguments::Int  # required for dynamic format specifiers
 end
 
 # what number base should be used for a given format specifier?
 base(T) = T <: HexBases ? 16 : T <: Val{'o'} ? 8 : 10
 char(::Type{Val{c}}) where {c} = c
 
+struct InvalidFormatStringError <: Exception
+    message::String
+    format::String
+    start_color::Int
+    end_color::Int
+end
+
+function Base.showerror(io::IO, err::InvalidFormatStringError)
+    io_has_color = get(io, :color, false)::Bool
+
+    println(io, "InvalidFormatStringError: ", err.message)
+    print(io, "    \"", @view(err.format[begin:prevind(err.format, err.start_color)]))
+    invalid_text = @view err.format[err.start_color:err.end_color]
+
+    printstyled(io, invalid_text, color=:red)
+
+    # +1 is okay, since all format characters are single bytes
+    println(io, @view(err.format[err.end_color+1:end]), "\"")
+
+    arrow_error = '-'^(length(invalid_text)-1)
+    arrow = "    " * ' '^err.start_color * arrow_error * "^\n"
+    if io_has_color
+        printstyled(io, arrow, color=:red)
+    else
+        print(io, arrow)
+    end
+end
+
 # parse format string
 function Format(f::AbstractString)
-    isempty(f) && throw(ArgumentError("empty format string"))
     bytes = codeunits(f)
     len = length(bytes)
     pos = 1
+    numarguments = 0
+
     b = 0x00
+    local last_percent_pos
+
+    # skip ahead to first format specifier
     while pos <= len
         b = bytes[pos]
         pos += 1
         if b == UInt8('%')
-            pos > len && throw(ArgumentError("invalid format string: '$f'"))
+            last_percent_pos = pos-1
+            pos > len && throw(InvalidFormatStringError("Format specifier is incomplete", f, last_percent_pos, last_percent_pos))
             if bytes[pos] == UInt8('%')
                 # escaped '%'
                 b = bytes[pos]
@@ -120,7 +170,7 @@ function Format(f::AbstractString)
             else
                 break
             end
-            pos > len && throw(ArgumentError("incomplete format string: '$f'"))
+            pos > len && throw(InvalidFormatStringError("Format specifier is incomplete", f, last_percent_pos, pos-1))
             b = bytes[pos]
             pos += 1
         end
@@ -129,47 +179,68 @@ function Format(f::AbstractString)
         end
         # parse width
         width = 0
-        while b - UInt8('0') < 0x0a
-            width = 10 * width + (b - UInt8('0'))
+        dynamic_width = false
+        if b == UInt8('*')
+            dynamic_width = true
+            numarguments += 1
             b = bytes[pos]
             pos += 1
-            pos > len && break
+        else
+            while b - UInt8('0') < 0x0a
+            width = 10 * width + (b - UInt8('0'))
+                b = bytes[pos]
+                pos += 1
+                pos > len && break
+            end
         end
         # parse precision
         precision = 0
         parsedprecdigits = false
+        dynamic_precision = false
         if b == UInt8('.')
-            pos > len && throw(ArgumentError("incomplete format string: '$f'"))
+            pos > len && throw(InvalidFormatStringError("Precision specifier is missing precision", f, last_percent_pos, pos-1))
             parsedprecdigits = true
             b = bytes[pos]
             pos += 1
             if pos <= len
-                while b - UInt8('0') < 0x0a
-                    precision = 10precision + (b - UInt8('0'))
+                if b == UInt8('*')
+                    dynamic_precision = true
+                    numarguments += 1
                     b = bytes[pos]
                     pos += 1
-                    pos > len && break
+                else
+                    precision = 0
+                    while b - UInt8('0') < 0x0a
+                        precision = 10precision + (b - UInt8('0'))
+                        b = bytes[pos]
+                        pos += 1
+                        pos > len && break
+                    end
                 end
             end
         end
         # parse length modifier (ignored)
         if b == UInt8('h') || b == UInt8('l')
             prev = b
+            pos > len && throw(InvalidFormatStringError("Length modifier is missing type specifier", f, last_percent_pos, pos-1))
             b = bytes[pos]
             pos += 1
             if b == prev
-                pos > len && throw(ArgumentError("invalid format string: '$f'"))
+                pos > len && throw(InvalidFormatStringError("Length modifier is missing type specifier", f, last_percent_pos, pos-1))
                 b = bytes[pos]
                 pos += 1
             end
-        elseif b in b"Ljqtz"
+        elseif b in b"Ljqtz" # q was a synonym for ll above, see `man 3 printf`. Not to be used.
+            pos > len && throw(InvalidFormatStringError("Length modifier is missing type specifier", f, last_percent_pos, pos-1))
             b = bytes[pos]
             pos += 1
         end
         # parse type
-        !(b in b"diouxXDOUeEfFgGaAcCsSpn") && throw(ArgumentError("invalid format string: '$f', invalid type specifier: '$(Char(b))'"))
+        !(b in b"diouxXDOUeEfFgGaAcCsSpn") && throw(InvalidFormatStringError("'$(Char(b))' is not a valid type specifier", f, last_percent_pos, pos-1))
         type = Val{Char(b)}
         if type <: Ints && precision > 0
+            # note - we should also set zero to false if dynamic precision > 0
+            # this is taken care of in fmt() for Ints
             zero = false
         elseif (type <: Strings || type <: Chars) && !parsedprecdigits
             precision = -1
@@ -178,13 +249,15 @@ function Format(f::AbstractString)
         elseif type <: Floats && !parsedprecdigits
             precision = 6
         end
-        push!(fmts, Spec{type}(leftalign, plus, space, zero, hash, width, precision))
+        numarguments += 1
+        push!(fmts, Spec{type}(leftalign, plus, space, zero, hash, width, precision, dynamic_width, dynamic_precision))
         start = pos
         while pos <= len
             b = bytes[pos]
             pos += 1
             if b == UInt8('%')
-                pos > len && throw(ArgumentError("invalid format string: '$f'"))
+                last_percent_pos = pos-1
+                pos > len && throw(InvalidFormatStringError("Format specifier is incomplete", f, last_percent_pos, last_percent_pos))
                 if bytes[pos] == UInt8('%')
                     # escaped '%'
                     b = bytes[pos]
@@ -196,7 +269,7 @@ function Format(f::AbstractString)
         end
         push!(strs, start:pos - 1 - (b == UInt8('%')))
     end
-    return Format(bytes, strs, Tuple(fmts))
+    return Format(bytes, strs, Tuple(fmts), numarguments)
 end
 
 macro format_str(str)
@@ -218,6 +291,28 @@ const HEX = b"0123456789ABCDEF"
     return pos
 end
 
+
+@inline function rmdynamic(spec::Spec{T}, args, argp) where {T}
+    zero, width, precision = spec.zero, spec.width, spec.precision
+    if spec.dynamic_width
+        width = args[argp]
+        argp += 1
+    end
+    if spec.dynamic_precision
+        precision = args[argp]
+        if zero && T <: Ints && precision > 0
+            zero = false
+        end
+        argp += 1
+    end
+    (Spec{T}(spec.leftalign, spec.plus, spec.space, zero, spec.hash, width, precision, false, false), argp)
+end
+
+@inline function fmt(buf, pos, args, argp, spec::Spec{T}) where {T}
+    spec, argp = rmdynamic(spec, args, argp)
+    (fmt(buf, pos, args[argp], spec), argp+1)
+end
+
 @inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Chars}
     leftalign, width = spec.leftalign, spec.width
     c = Char(first(arg))
@@ -242,7 +337,7 @@ end
 @inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Strings}
     leftalign, hash, width, prec = spec.leftalign, spec.hash, spec.width, spec.precision
     str = string(arg)
-    slen = textwidth(str) + (hash ? arg isa AbstractString ? 2 : 1 : 0)
+    slen = textwidth(str)::Int + (hash ? arg isa AbstractString ? 2 : 1 : 0)
     op = p = prec == -1 ? slen : min(slen, prec)
     if !leftalign && width > p
         for _ = 1:(width - p)
@@ -394,6 +489,10 @@ _snprintf(ptr, siz, str, arg) =
     @ccall "libmpfr".mpfr_snprintf(ptr::Ptr{UInt8}, siz::Csize_t, str::Ptr{UInt8};
                                    arg::Ref{BigFloat})::Cint
 
+# Arbitrary constant for a maximum number of bytes we want to output for a BigFloat.
+# 8KiB seems like a reasonable default. Larger BigFloat representations should probably
+# use a custom printing routine. Printing values with results larger than this ourselves
+# seems like a dangerous thing to do.
 const __BIG_FLOAT_MAX__ = 8192
 
 @inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Floats}
@@ -405,17 +504,15 @@ const __BIG_FLOAT_MAX__ = 8192
             GC.@preserve buf begin
                 siz = length(buf) - pos + 1
                 str = string(spec; modifier="R")
-                len = _snprintf(pointer(buf, pos), siz, str, x)
-                if len > siz
-                    maxout = max(__BIG_FLOAT_MAX__,
-                                 ceil(Int, precision(x) * log(2) / log(10)) + 25)
-                    len > maxout &&
-                        error("Over $maxout bytes $len needed to output BigFloat $x")
-                    resize!(buf, len + 1)
-                    len = _snprintf(pointer(buf, pos), len + 1, str, x)
+                required_length = _snprintf(pointer(buf, pos), siz, str, x)
+                if required_length > siz
+                    required_length > __BIG_FLOAT_MAX__ &&
+                        throw(ArgumentError("The given BigFloat requires $required_length bytes to be printed, which is more than the maximum of $__BIG_FLOAT_MAX__ bytes supported."))
+                    resize!(buf, required_length + 1)
+                    required_length = _snprintf(pointer(buf, pos), required_length + 1, str, x)
                 end
-                len > 0 || throw(ArgumentError("invalid printf formatting $str for BigFloat"))
-                return pos + len
+                required_length > 0 || throw(ArgumentError("The given BigFloat would produce less than the maximum allowed number of bytes $__BIG_FLOAT_MAX__, but still couldn't be printed fully for an unknown reason."))
+                return pos + required_length
             end
         end
         x = Float64(x)
@@ -593,7 +690,9 @@ function ini_dec end
 function fmtfallback(buf, pos, arg, spec::Spec{T}) where {T}
     leftalign, plus, space, zero, hash, width, prec =
         spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.width, spec.precision
-    buf2 = Base.StringVector(309 + 17 + 5)
+    buf2 = Base.StringVector(
+        MAX_INTEGER_PART_WIDTH + MAX_FRACTIONAL_PART_WIDTH + MAX_FMT_CHARS_WIDTH
+    )
     ise = T <: Union{Val{'e'}, Val{'E'}}
     isg = T <: Union{Val{'g'}, Val{'G'}}
     isf = T <: Val{'f'}
@@ -729,9 +828,10 @@ const UNROLL_UPTO = 16
     # for each format, write out arg and next substring
     # unroll up to 16 formats
     N = length(f.formats)
+    argp = 1
     Base.@nexprs 16 i -> begin
         if N >= i
-            pos = fmt(buf, pos, args[i], f.formats[i])
+            pos, argp = fmt(buf, pos, args, argp, f.formats[i])
             for j in f.substringranges[i + 1]
                 b = f.str[j]
                 if !escapechar
@@ -746,7 +846,7 @@ const UNROLL_UPTO = 16
     end
     if N > 16
         for i = 17:length(f.formats)
-            pos = fmt(buf, pos, args[i], f.formats[i])
+            pos, argp = fmt(buf, pos, args, argp, f.formats[i])
             for j in f.substringranges[i + 1]
                 b = f.str[j]
                 if !escapechar
@@ -762,11 +862,17 @@ const UNROLL_UPTO = 16
     return pos
 end
 
+@inline function plength(f::Spec{T}, args, argp) where {T}
+    f, argp = rmdynamic(f, args, argp)
+    (plength(f, args[argp]), argp+1)
+end
+
 function plength(f::Spec{T}, x) where {T <: Chars}
     c = Char(first(x))
     w = textwidth(c)
     return max(f.width, w) + (ncodeunits(c) - w)
 end
+
 plength(f::Spec{Pointer}, x) = max(f.width, 2 * sizeof(x) + 2)
 
 function plength(f::Spec{T}, x) where {T <: Strings}
@@ -778,34 +884,40 @@ end
 
 function plength(f::Spec{T}, x) where {T <: Ints}
     x2 = toint(x)
-    return max(f.width, f.precision + ndigits(x2, base=base(T), pad=1) + 5)
+    return max(
+        f.width,
+        f.precision + ndigits(x2, base=base(T), pad=1) + MAX_FMT_CHARS_WIDTH
+    )
 end
 
 plength(f::Spec{T}, x::AbstractFloat) where {T <: Ints} =
-    max(f.width, 0 + 309 + 17 + f.hash + 5)
+    max(f.width, f.hash + MAX_INTEGER_PART_WIDTH + 0 + MAX_FMT_CHARS_WIDTH)
 plength(f::Spec{T}, x) where {T <: Floats} =
-    max(f.width, f.precision + 309 + 17 + f.hash + 5)
+    max(f.width, f.hash + MAX_INTEGER_PART_WIDTH + f.precision + MAX_FMT_CHARS_WIDTH)
 plength(::Spec{PositionCounter}, x) = 0
 
 @inline function computelen(substringranges, formats, args)
     len = sum(length, substringranges)
     N = length(formats)
     # unroll up to 16 formats
+    argp = 1
     Base.@nexprs 16 i -> begin
         if N >= i
-            len += plength(formats[i], args[i])
+            l, argp = plength(formats[i], args, argp)
+            len += l
         end
     end
     if N > 16
         for i = 17:length(formats)
-            len += plength(formats[i], args[i])
+            l, argp = plength(formats[i], args, argp)
+            len += l
         end
     end
     return len
 end
 
 @noinline argmismatch(a, b) =
-    throw(ArgumentError("mismatch between # of format specifiers and provided args: $a != $b"))
+    throw(ArgumentError("Number of format specifiers and number of provided args differ: $a != $b"))
 
 """
     Printf.format(f::Printf.Format, args...) => String
@@ -818,7 +930,7 @@ for more details on C `printf` support.
 function format end
 
 function format(io::IO, f::Format, args...) # => Nothing
-    length(f.formats) == length(args) || argmismatch(length(f.formats), length(args))
+    f.numarguments == length(args) || argmismatch(f.numarguments, length(args))
     buf = Base.StringVector(computelen(f.substringranges, f.formats, args))
     pos = format(buf, 1, f, args...)
     write(io, resize!(buf, pos - 1))
@@ -826,7 +938,7 @@ function format(io::IO, f::Format, args...) # => Nothing
 end
 
 function format(f::Format, args...) # => String
-    length(f.formats) == length(args) || argmismatch(length(f.formats), length(args))
+    f.numarguments == length(args) || argmismatch(f.numarguments, length(args))
     buf = Base.StringVector(computelen(f.substringranges, f.formats, args))
     pos = format(buf, 1, f, args...)
     return String(resize!(buf, pos - 1))
@@ -860,9 +972,11 @@ Padded with zeros to length 6 000123
 
 julia> @printf "Use shorter of decimal or scientific %g %g" 1.23 12300000.0
 Use shorter of decimal or scientific 1.23 1.23e+07
-```
 
-For a systematic specification of the format, see [here](https://www.cplusplus.com/reference/cstdio/printf/).
+julia> @printf "Use dynamic width and precision  %*.*f" 10 2 0.12345
+Use dynamic width and precision        0.12
+```
+For a systematic specification of the format, see [here](https://en.cppreference.com/w/c/io/fprintf).
 See also [`@sprintf`](@ref) to get the result as a `String` instead of it being printed.
 
 # Caveats
@@ -885,6 +999,9 @@ julia> @printf "%.0f %.1f %f" 0.5 0.025 -0.0078125
     using [`textwidth`](@ref), which e.g. ignores zero-width characters
     (such as combining characters for diacritical marks) and treats certain
     "wide" characters (e.g. emoji) as width `2`.
+
+!!! compat "Julia 1.10"
+    Dynamic width specifiers like `%*s` and `%0*.*f` require Julia 1.10.
 """
 macro printf(io_or_fmt, args...)
     if io_or_fmt isa String
@@ -892,8 +1009,10 @@ macro printf(io_or_fmt, args...)
         return esc(:($Printf.format(stdout, $fmt, $(args...))))
     else
         io = io_or_fmt
-        isempty(args) && throw(ArgumentError("must provide required format string"))
-        fmt = Format(args[1])
+        isempty(args) && throw(ArgumentError("No format string provided to `@printf` - use like `@printf [io] <format string> [<args...>]."))
+        fmt_str = first(args)
+        fmt_str isa String || throw(ArgumentError("First argument to `@printf` after `io` must be a format string"))
+        fmt = Format(fmt_str)
         return esc(:($Printf.format($io, $fmt, $(Base.tail(args)...))))
     end
 end
@@ -910,6 +1029,7 @@ julia> @sprintf "this is a %s %15.1f" "test" 34.567
 ```
 """
 macro sprintf(fmt, args...)
+    fmt isa String || throw(ArgumentError("First argument to `@sprintf` must be a format string."))
     f = Format(fmt)
     return esc(:($Printf.format($f, $(args...))))
 end
diff --git a/stdlib/Printf/test/runtests.jl b/stdlib/Printf/test/runtests.jl
index e80cbe9626823..33970f78648e2 100644
--- a/stdlib/Printf/test/runtests.jl
+++ b/stdlib/Printf/test/runtests.jl
@@ -339,10 +339,9 @@ end
     @test Printf.@sprintf("1%%2%%3") == "1%2%3"
     @test Printf.@sprintf("GAP[%%]") == "GAP[%]"
     @test Printf.@sprintf("hey there") == "hey there"
-    @test_throws ArgumentError Printf.Format("")
-    @test_throws ArgumentError Printf.Format("%+")
-    @test_throws ArgumentError Printf.Format("%.")
-    @test_throws ArgumentError Printf.Format("%.0")
+    @test_throws Printf.InvalidFormatStringError Printf.Format("%+")
+    @test_throws Printf.InvalidFormatStringError Printf.Format("%.")
+    @test_throws Printf.InvalidFormatStringError Printf.Format("%.0")
     @test isempty(Printf.Format("%%").formats)
     @test Printf.@sprintf("%d%d", 1, 2) == "12"
     @test (Printf.@sprintf "%d%d" [1 2]...) == "12"
@@ -355,10 +354,10 @@ end
     @test (Printf.@sprintf("%d\u0f00%d", 1, 2)) == "1\u0f002"
     @test (Printf.@sprintf("%d\U0001ffff%d", 1, 2)) == "1\U0001ffff2"
     @test (Printf.@sprintf("%d\u2203%d\u0203", 1, 2)) == "1\u22032\u0203"
-    @test_throws ArgumentError Printf.Format("%y%d")
-    @test_throws ArgumentError Printf.Format("%\u00d0%d")
-    @test_throws ArgumentError Printf.Format("%\u0f00%d")
-    @test_throws ArgumentError Printf.Format("%\U0001ffff%d")
+    @test_throws Printf.InvalidFormatStringError Printf.Format("%y%d")
+    @test_throws Printf.InvalidFormatStringError Printf.Format("%\u00d0%d")
+    @test_throws Printf.InvalidFormatStringError Printf.Format("%\u0f00%d")
+    @test_throws Printf.InvalidFormatStringError Printf.Format("%\U0001ffff%d")
     @test Printf.@sprintf("%10.5d", 4) == "     00004"
     @test (Printf.@sprintf "%d" typemax(Int64)) == "9223372036854775807"
 
@@ -444,8 +443,8 @@ end
     @test (Printf.@sprintf("%f", parse(BigFloat, "1e400"))) ==
            "10000000000000000000000000000000000000000000000000000000000000000000000000000025262527574416492004687051900140830217136998040684679611623086405387447100385714565637522507383770691831689647535911648520404034824470543643098638520633064715221151920028135130764414460468236314621044034960475540018328999334468948008954289495190631358190153259681118693204411689043999084305348398480210026863210192871358464.000000"
 
-    # Check that does not attempt to output incredibly large amounts of digits
-    @test_throws ErrorException Printf.@sprintf("%f", parse(BigFloat, "1e99999"))
+    # Check that Printf does not attempt to output more than 8KiB worth of digits
+    @test_throws ArgumentError Printf.@sprintf("%f", parse(BigFloat, "1e99999"))
 
     # Check bug with precision > length of string
     @test Printf.@sprintf("%4.2s", "a") == "   a"
@@ -488,6 +487,10 @@ end
     @test @sprintf("%d", 3//1) == "3"
     @test @sprintf("%d", Inf) == "Inf"
     @test @sprintf(" %d", NaN) == " NaN"
+
+    # 50011
+    @test Printf.@sprintf("") == ""
+    @test Printf.format(Printf.Format("")) == ""
 end
 
 @testset "integers" begin
@@ -528,13 +531,13 @@ end
     @test Printf.@sprintf( "%0-5d", -42) == "-42  "
     @test Printf.@sprintf( "%0-15d",  42) == "42             "
     @test Printf.@sprintf( "%0-15d", -42) == "-42            "
-    @test_throws ArgumentError Printf.Format("%d %")
+    @test_throws Printf.InvalidFormatStringError Printf.Format("%d %")
 
     @test Printf.@sprintf("%lld", 18446744065119617025) == "18446744065119617025"
     @test Printf.@sprintf("%+8lld", 100) == "    +100"
     @test Printf.@sprintf("%+.8lld", 100) == "+00000100"
     @test Printf.@sprintf("%+10.8lld", 100) == " +00000100"
-    @test_throws ArgumentError Printf.Format("%_1lld")
+    @test_throws Printf.InvalidFormatStringError Printf.Format("%_1lld")
     @test Printf.@sprintf("%-1.5lld", -100) == "-00100"
     @test Printf.@sprintf("%5lld", 100) == "  100"
     @test Printf.@sprintf("%5lld", -100) == " -100"
@@ -775,6 +778,7 @@ end
     @test Printf.@sprintf("%40d", typemax(Int128)) == " 170141183460469231731687303715884105727"
 end
 
+
 @testset "%n" begin
     x = Ref{Int}()
     @test (Printf.@sprintf("%d4%n", 123, x); x[] == 4)
@@ -782,4 +786,363 @@ end
     @test (Printf.@sprintf("%s%n", "1234", x); x[] == 4)
 end
 
+@testset "dynamic" begin
+
+    # dynamic width and precision
+    @test Printf.@sprintf("%*d", 10, 12)         == "        12"
+    @test Printf.@sprintf("%.*d",  4, 12)        == "0012"
+    @test Printf.@sprintf("%*.*d", 10, 4, 12)    == "      0012"
+    @test Printf.@sprintf("%+*.*d", 10, 4, 12)   == "     +0012"
+    @test Printf.@sprintf("%0*.*d", 10, 4, 12)   == "      0012"
+
+    @test Printf.@sprintf("%*d%*d%*d", 4, 12, 4, 13, 4, 14)  == "  12  13  14"
+    @test Printf.@sprintf("%*d%*d%*d", 4, 12, 5, 13, 6, 14)  == "  12   13    14"
+
+    # dynamic should return whatever the static width and precision returns
+
+
+    # pointers
+    @test Printf.@sprintf("%*p", 20, 0) == Printf.@sprintf("%20p", 0)
+    @test Printf.@sprintf("%-*p", 20, 0) == Printf.@sprintf("%-20p", 0)
+    @test Printf.@sprintf("%*p", 20, C_NULL) == Printf.@sprintf("%20p", C_NULL)
+    @test Printf.@sprintf("%-*p", 20, C_NULL) ==  Printf.@sprintf("%-20p", C_NULL)
+
+    # hex float
+    @test Printf.@sprintf("%.*a", 0, 3.14) == Printf.@sprintf("%.0a", 3.14)
+    @test Printf.@sprintf("%.*a", 1, 3.14) == Printf.@sprintf("%.1a", 3.14)
+    @test Printf.@sprintf("%.*a", 2, 3.14) == Printf.@sprintf("%.2a", 3.14)
+    @test Printf.@sprintf("%#.*a", 0, 3.14) == Printf.@sprintf("%#.0a", 3.14)
+    @test Printf.@sprintf("%#.*a", 1, 3.14) == Printf.@sprintf("%#.1a", 3.14)
+    @test Printf.@sprintf("%#.*a", 2, 3.14) == Printf.@sprintf("%#.2a", 3.14)
+    @test Printf.@sprintf("%.*a", 6, 1.5) == Printf.@sprintf("%.6a", 1.5)
+
+    # "%g"
+    @test Printf.@sprintf("%*.*g", 10, 5, -123.4 ) == Printf.@sprintf( "%10.5g", -123.4 )
+    @test Printf.@sprintf("%0*.*g", 10, 5, -123.4 ) == Printf.@sprintf( "%010.5g", -123.4 )
+    @test Printf.@sprintf("%.*g", 6, 12340000.0 ) == Printf.@sprintf( "%.6g", 12340000.0 )
+    @test Printf.@sprintf("%#.*g", 6, 12340000.0 ) == Printf.@sprintf( "%#.6g", 12340000.0 )
+    @test Printf.@sprintf("%*.*g", 10, 5, big"-123.4" ) == Printf.@sprintf( "%10.5g", big"-123.4" )
+    @test Printf.@sprintf("%0*.*g", 10, 5, big"-123.4" ) == Printf.@sprintf( "%010.5g", big"-123.4" )
+    @test Printf.@sprintf("%.*g", 6, big"12340000.0" ) == Printf.@sprintf( "%.6g", big"12340000.0" )
+    @test Printf.@sprintf("%#.*g", 6, big"12340000.0") == Printf.@sprintf( "%#.6g", big"12340000.0")
+
+    @test Printf.@sprintf("%.*g", 5, 42) == Printf.@sprintf( "%.5g", 42)
+    @test Printf.@sprintf("%#.*g", 2, 42) == Printf.@sprintf( "%#.2g", 42)
+    @test Printf.@sprintf("%#.*g", 5, 42) == Printf.@sprintf( "%#.5g", 42)
+
+    @test Printf.@sprintf("%.*g", 15, 0) == Printf.@sprintf("%.15g", 0)
+    @test Printf.@sprintf("%#.*g", 15, 0) == Printf.@sprintf("%#.15g", 0)
+
+    # "%f"
+    @test Printf.@sprintf("%.*f", 0, 3e142) ==  Printf.@sprintf( "%.0f", 3e142)
+    @test Printf.@sprintf("%.*f", 2, 1.234) == Printf.@sprintf("%.2f", 1.234)
+    @test Printf.@sprintf("%.*f", 2, 1.235) == Printf.@sprintf("%.2f", 1.235)
+    @test Printf.@sprintf("%.*f", 2, 0.235) == Printf.@sprintf("%.2f", 0.235)
+    @test Printf.@sprintf("%*.*f", 4, 1, 1.234) == Printf.@sprintf("%4.1f", 1.234)
+    @test Printf.@sprintf("%*.*f", 8, 1, 1.234) == Printf.@sprintf("%8.1f", 1.234)
+    @test Printf.@sprintf("%+*.*f", 8, 1, 1.234) == Printf.@sprintf("%+8.1f", 1.234)
+    @test Printf.@sprintf("% *.*f", 8, 1, 1.234) == Printf.@sprintf("% 8.1f", 1.234)
+    @test Printf.@sprintf("% *.*f", 7, 1, 1.234) == Printf.@sprintf("% 7.1f", 1.234)
+    @test Printf.@sprintf("% 0*.*f", 8, 1, 1.234) == Printf.@sprintf("% 08.1f", 1.234)
+    @test Printf.@sprintf("%0*.*f", 8, 1, 1.234) == Printf.@sprintf("%08.1f", 1.234)
+    @test Printf.@sprintf("%-0*.*f", 8, 1, 1.234) == Printf.@sprintf("%-08.1f", 1.234)
+    @test Printf.@sprintf("%-*.*f", 8, 1, 1.234) == Printf.@sprintf("%-8.1f", 1.234)
+    @test Printf.@sprintf("%0*.*f", 8, 1, -1.234) == Printf.@sprintf("%08.1f", -1.234)
+    @test Printf.@sprintf("%0*.*f", 9, 1, -1.234) == Printf.@sprintf("%09.1f", -1.234)
+    @test Printf.@sprintf("%0*.*f", 9, 1, 1.234) == Printf.@sprintf("%09.1f", 1.234)
+    @test Printf.@sprintf("%+0*.*f", 9, 1, 1.234) == Printf.@sprintf("%+09.1f", 1.234)
+    @test Printf.@sprintf("% 0*.*f", 9, 1, 1.234) == Printf.@sprintf("% 09.1f", 1.234)
+    @test Printf.@sprintf("%+ 0*.*f", 9, 1, 1.234) == Printf.@sprintf("%+ 09.1f", 1.234)
+    @test Printf.@sprintf("%+ 0*.*f", 9, 1, 1.234) == Printf.@sprintf("%+ 09.1f", 1.234)
+    @test Printf.@sprintf("%+ 0*.*f", 9, 0, 1.234) == Printf.@sprintf("%+ 09.0f", 1.234)
+    @test Printf.@sprintf("%+ #0*.*f", 9, 0, 1.234) == Printf.@sprintf("%+ #09.0f", 1.234)
+
+    # "%e"
+    @test Printf.@sprintf("%*.*e", 10, 4, Inf) == Printf.@sprintf("%10.4e", Inf)
+    @test Printf.@sprintf("%*.*e", 10, 4, NaN) == Printf.@sprintf("%10.4e", NaN)
+    @test Printf.@sprintf("%*.*e", 10, 4, big"Inf") == Printf.@sprintf("%10.4e", big"Inf")
+    @test Printf.@sprintf("%*.*e", 10, 4, big"NaN") == Printf.@sprintf("%10.4e", big"NaN")
+
+    @test Printf.@sprintf("%.*e", 0, 3e142) == Printf.@sprintf("%.0e",3e142)
+    @test Printf.@sprintf("%#.*e", 0,  3e142) == Printf.@sprintf("%#.0e", 3e142)
+    @test Printf.@sprintf("%.*e", 0,  big"3e142") == Printf.@sprintf("%.0e", big"3e142")
+
+    @test Printf.@sprintf("%#.*e", 0,  big"3e142") == Printf.@sprintf("%#.0e", big"3e142")
+    @test Printf.@sprintf("%.*e", 0, big"3e1042") == Printf.@sprintf("%.0e", big"3e1042")
+
+    @test Printf.@sprintf("%.*e", 2, 1.234) == Printf.@sprintf("%.2e", 1.234)
+    @test Printf.@sprintf("%.*e", 2, 1.235) == Printf.@sprintf("%.2e", 1.235)
+    @test Printf.@sprintf("%.*e", 2, 0.235) == Printf.@sprintf("%.2e", 0.235)
+    @test Printf.@sprintf("%*.*e", 4, 1, 1.234) == Printf.@sprintf("%4.1e", 1.234)
+    @test Printf.@sprintf("%*.*e", 8, 1, 1.234) == Printf.@sprintf("%8.1e", 1.234)
+    @test Printf.@sprintf("%+*.*e", 8, 1, 1.234) == Printf.@sprintf("%+8.1e", 1.234)
+    @test Printf.@sprintf("% *.*e", 8, 1, 1.234) == Printf.@sprintf("% 8.1e", 1.234)
+    @test Printf.@sprintf("% *.*e", 7, 1, 1.234) == Printf.@sprintf("% 7.1e", 1.234)
+    @test Printf.@sprintf("% 0*.*e", 8, 1, 1.234) == Printf.@sprintf("% 08.1e", 1.234)
+    @test Printf.@sprintf("%0*.*e", 8, 1, 1.234) == Printf.@sprintf("%08.1e", 1.234)
+    @test Printf.@sprintf("%-0*.*e", 8, 1, 1.234) == Printf.@sprintf("%-08.1e", 1.234)
+    @test Printf.@sprintf("%-*.*e", 8, 1, 1.234) == Printf.@sprintf("%-8.1e", 1.234)
+    @test Printf.@sprintf("%-*.*e", 8, 1, 1.234) == Printf.@sprintf("%-8.1e", 1.234)
+    @test Printf.@sprintf("%0*.*e", 8, 1, -1.234) == Printf.@sprintf("%08.1e", -1.234)
+    @test Printf.@sprintf("%0*.*e", 9, 1, -1.234) == Printf.@sprintf("%09.1e", -1.234)
+    @test Printf.@sprintf("%0*.*e", 9, 1, 1.234) == Printf.@sprintf("%09.1e", 1.234)
+    @test Printf.@sprintf("%+0*.*e", 9, 1, 1.234) == Printf.@sprintf("%+09.1e", 1.234)
+    @test Printf.@sprintf("% 0*.*e", 9, 1, 1.234) == Printf.@sprintf("% 09.1e", 1.234)
+    @test Printf.@sprintf("%+ 0*.*e", 9, 1, 1.234) == Printf.@sprintf("%+ 09.1e", 1.234)
+    @test Printf.@sprintf("%+ 0*.*e", 9, 1, 1.234) == Printf.@sprintf("%+ 09.1e", 1.234)
+    @test Printf.@sprintf("%+ 0*.*e", 9, 0, 1.234) == Printf.@sprintf("%+ 09.0e", 1.234)
+    @test Printf.@sprintf("%+ #0*.*e", 9, 0, 1.234) == Printf.@sprintf("%+ #09.0e", 1.234)
+
+    # strings
+    @test Printf.@sprintf("%.*s", 1, "foo") == Printf.@sprintf("%.1s", "foo")
+    @test Printf.@sprintf("%*s", 1, "Hallo heimur") == Printf.@sprintf("%1s", "Hallo heimur")
+    @test Printf.@sprintf("%*s", 20, "Hallo") == Printf.@sprintf("%20s", "Hallo")
+    @test Printf.@sprintf("%-*s", 20, "Hallo") == Printf.@sprintf("%-20s", "Hallo")
+    @test Printf.@sprintf("%0-*s", 20, "Hallo") == Printf.@sprintf("%0-20s", "Hallo")
+    @test Printf.@sprintf("%.*s", 20, "Hallo heimur") == Printf.@sprintf("%.20s", "Hallo heimur")
+    @test Printf.@sprintf("%*.*s", 20, 5, "Hallo heimur") == Printf.@sprintf("%20.5s", "Hallo heimur")
+    @test Printf.@sprintf("%.*s", 0, "Hallo heimur") == Printf.@sprintf("%.0s", "Hallo heimur")
+    @test Printf.@sprintf("%*.*s", 20, 0, "Hallo heimur") == Printf.@sprintf("%20.0s", "Hallo heimur")
+    @test Printf.@sprintf("%.s", "Hallo heimur") == Printf.@sprintf("%.s", "Hallo heimur")
+    @test Printf.@sprintf("%*.s", 20, "Hallo heimur") == Printf.@sprintf("%20.s", "Hallo heimur")
+    @test Printf.@sprintf("%*sø", 4, "ø") == Printf.@sprintf("%4sø", "ø")
+    @test Printf.@sprintf("%-*sø", 4, "ø") == Printf.@sprintf("%-4sø", "ø")
+
+    @test Printf.@sprintf("%*s", 8, "test") == Printf.@sprintf("%8s", "test")
+    @test Printf.@sprintf("%-*s", 8, "test") == Printf.@sprintf("%-8s", "test")
+
+    @test Printf.@sprintf("%#*s", 8, :test) == Printf.@sprintf("%#8s", :test)
+    @test Printf.@sprintf("%#-*s", 8, :test) == Printf.@sprintf("%#-8s", :test)
+
+    @test Printf.@sprintf("%*.*s", 8, 3, "test") == Printf.@sprintf("%8.3s", "test")
+    @test Printf.@sprintf("%#*.*s", 8, 3, "test") == Printf.@sprintf("%#8.3s", "test")
+    @test Printf.@sprintf("%-*.*s", 8, 3, "test") == Printf.@sprintf("%-8.3s", "test")
+    @test Printf.@sprintf("%#-*.*s", 8, 3, "test") == Printf.@sprintf("%#-8.3s", "test")
+    @test Printf.@sprintf("%.*s", 3, "test") == Printf.@sprintf("%.3s", "test")
+    @test Printf.@sprintf("%#.*s", 3, "test") == Printf.@sprintf("%#.3s", "test")
+    @test Printf.@sprintf("%-.*s", 3, "test") == Printf.@sprintf("%-.3s", "test")
+    @test Printf.@sprintf("%#-.*s", 3, "test") == Printf.@sprintf("%#-.3s", "test")
+
+    # chars
+    @test Printf.@sprintf("%*c", 3, 'a') == Printf.@sprintf("%3c", 'a')
+    @test Printf.@sprintf("%*c", 1, 'x') == Printf.@sprintf("%1c", 'x')
+    @test Printf.@sprintf("%*c"  , 20, 'x') == Printf.@sprintf("%20c"  , 'x')
+    @test Printf.@sprintf("%-*c" , 20, 'x') == Printf.@sprintf("%-20c" , 'x')
+    @test Printf.@sprintf("%-0*c", 20, 'x') == Printf.@sprintf("%-020c", 'x')
+    @test Printf.@sprintf("%*c", 3, 'A') == Printf.@sprintf("%3c", 'A')
+    @test Printf.@sprintf("%-*c", 3, 'A') == Printf.@sprintf("%-3c", 'A')
+
+    # more than 16 formats/args
+    @test Printf.@sprintf("%*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f %*.*f", 4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345,4,2,1.2345) ==  Printf.@sprintf("%4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f", 1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345,1.2345)
+
+    # Check bug with trailing nul printing BigFloat
+    @test (Printf.@sprintf("%.*f", 330, BigFloat(1)))[end] != '\0'
+
+    # Check bug with precision > length of string
+    @test Printf.@sprintf("%*.*s", 4, 2, "a") == Printf.@sprintf("%4.2s", "a")
+
+    # issue #29662
+    @test Printf.@sprintf("%*.*e", 12, 3, pi*1e100) == Printf.@sprintf("%12.3e", pi*1e100)
+    @test Printf.@sprintf("%*d", 2, 3.14) == Printf.@sprintf("%*d", 2, 3.14)
+    @test Printf.@sprintf("%*d", 2, big(3.14)) == Printf.@sprintf("%*d", 2, big(3.14))
+
+    # 37539
+    @test Printf.@sprintf(" %.*e\n", 1, 0.999) == Printf.@sprintf(" %.1e\n", 0.999)
+    @test Printf.@sprintf("   %.*f", 1, 9.999) == Printf.@sprintf("   %.1f", 9.999)
+
+    # integers
+    @test Printf.@sprintf("%*d", 10, 12)         == (Printf.@sprintf("%10d", 12))
+    @test Printf.@sprintf("%.*d",  4, 12)        == (Printf.@sprintf("%.4d", 12))
+    @test Printf.@sprintf("%*.*d", 10, 4, 12)    == (Printf.@sprintf("%10.4d", 12))
+    @test Printf.@sprintf("%+*.*d", 10, 4, 12)   == (Printf.@sprintf("%+10.4d", 12))
+    @test Printf.@sprintf("%0*.*d", 10, 4, 12)   == (Printf.@sprintf("%010.4d", 12))
+
+    @test Printf.@sprintf( "% *d",  5,  42)   == Printf.@sprintf( "% 5d",  42)
+    @test Printf.@sprintf( "% *d",  5, -42)   == Printf.@sprintf( "% 5d", -42)
+    @test Printf.@sprintf( "% *d", 15,  42)   == Printf.@sprintf( "% 15d",  42)
+    @test Printf.@sprintf( "% *d", 15, -42)   == Printf.@sprintf( "% 15d", -42)
+
+    @test Printf.@sprintf("%+*d",  5,  42) == Printf.@sprintf("%+5d",  42)
+    @test Printf.@sprintf("%+*d",  5, -42) == Printf.@sprintf("%+5d", -42)
+    @test Printf.@sprintf("%+*d", 15,  42) == Printf.@sprintf("%+15d",  42)
+    @test Printf.@sprintf("%+*d", 15, -42) == Printf.@sprintf("%+15d", -42)
+    @test Printf.@sprintf( "%*d",  0,  42) == Printf.@sprintf( "%0d",  42)
+    @test Printf.@sprintf( "%*d",  0, -42) == Printf.@sprintf( "%0d", -42)
+
+    @test Printf.@sprintf("%-*d",  5,  42) == Printf.@sprintf("%-5d",  42)
+    @test Printf.@sprintf("%-*d",  5, -42) == Printf.@sprintf("%-5d", -42)
+    @test Printf.@sprintf("%-*d", 15,  42) == Printf.@sprintf("%-15d",  42)
+    @test Printf.@sprintf("%-*d", 15, -42) == Printf.@sprintf("%-15d", -42)
+
+    @test Printf.@sprintf("%+*lld", 8, 100) == Printf.@sprintf("%+8lld", 100)
+    @test Printf.@sprintf("%+.*lld", 8, 100) == Printf.@sprintf("%+.8lld", 100)
+    @test Printf.@sprintf("%+*.*lld", 10, 8, 100) == Printf.@sprintf("%+10.8lld", 100)
+
+    @test Printf.@sprintf("%-*.*lld", 1, 5, -100) == Printf.@sprintf("%-1.5lld", -100)
+    @test Printf.@sprintf("%*lld", 5, 100) == Printf.@sprintf("%5lld", 100)
+    @test Printf.@sprintf("%*lld", 5, -100) == Printf.@sprintf("%5lld", -100)
+    @test Printf.@sprintf("%-*lld", 5, 100) == Printf.@sprintf("%-5lld", 100)
+    @test Printf.@sprintf("%-*lld", 5, -100) == Printf.@sprintf("%-5lld", -100)
+    @test Printf.@sprintf("%-.*lld", 5, 100) == Printf.@sprintf("%-.5lld", 100)
+    @test Printf.@sprintf("%-.*lld", 5, -100) == Printf.@sprintf("%-.5lld", -100)
+    @test Printf.@sprintf("%-*.*lld", 8, 5, 100) == Printf.@sprintf("%-8.5lld", 100)
+    @test Printf.@sprintf("%-*.*lld", 8, 5, -100) == Printf.@sprintf("%-8.5lld", -100)
+    @test Printf.@sprintf("%0*lld", 5, 100) == Printf.@sprintf("%05lld", 100)
+    @test Printf.@sprintf("%0*lld", 5, -100) == Printf.@sprintf("%05lld", -100)
+    @test Printf.@sprintf("% *lld", 5,  100) == Printf.@sprintf("% 5lld", 100)
+    @test Printf.@sprintf("% *lld", 5,  -100) == Printf.@sprintf("% 5lld", -100)
+    @test Printf.@sprintf("% .*lld", 5,  100) == Printf.@sprintf("% .5lld", 100)
+    @test Printf.@sprintf("% .*lld", 5,  -100) == Printf.@sprintf("% .5lld", -100)
+    @test Printf.@sprintf("% *.*lld", 8, 5,  100) == Printf.@sprintf("% 8.5lld", 100)
+    @test Printf.@sprintf("% *.*lld", 8, 5,  -100) == Printf.@sprintf("% 8.5lld", -100)
+    @test Printf.@sprintf("%.*lld", 0, 0) == Printf.@sprintf("%.0lld", 0)
+    @test Printf.@sprintf("%#+*.*llx", 21, 18, -100) == Printf.@sprintf("%#+21.18llx", -100)
+    @test Printf.@sprintf("%#.*llo", 25, -100) == Printf.@sprintf("%#.25llo", -100)
+    @test Printf.@sprintf("%#+*.*llo", 24, 20, -100) == Printf.@sprintf("%#+24.20llo", -100)
+    @test Printf.@sprintf("%#+*.*llX", 18, 21, -100) == Printf.@sprintf("%#+18.21llX", -100)
+    @test Printf.@sprintf("%#+*.*llo", 20, 24, -100) == Printf.@sprintf("%#+20.24llo", -100)
+    @test Printf.@sprintf("%#+*.*llu", 25, 22, -1) == Printf.@sprintf("%#+25.22llu", -1)
+    @test Printf.@sprintf("%#+*.*llu", 30, 25, -1) == Printf.@sprintf("%#+30.25llu", -1)
+    @test Printf.@sprintf("%+#*.*lld", 25, 22, -1) == Printf.@sprintf("%+#25.22lld", -1)
+    @test Printf.@sprintf("%#-*.*llo", 8, 5, 100) == Printf.@sprintf("%#-8.5llo", 100)
+    @test Printf.@sprintf("%#-+ 0*.*lld", 8, 5, 100) == Printf.@sprintf("%#-+ 08.5lld", 100)
+    @test Printf.@sprintf("%#-+ 0*.*lld", 8, 5, 100) == Printf.@sprintf("%#-+ 08.5lld", 100)
+    @test Printf.@sprintf("%.*lld",  40, 1) == Printf.@sprintf("%.40lld",  1)
+    @test Printf.@sprintf("% .*lld",  40, 1) == Printf.@sprintf("% .40lld",  1)
+    @test Printf.@sprintf("% .*d",  40, 1) == Printf.@sprintf("% .40d",  1)
+
+    @test Printf.@sprintf("%#0*x",  12, 1) == Printf.@sprintf("%#012x",  1)
+    @test Printf.@sprintf("%#0*.*x", 4, 8, 1) == Printf.@sprintf("%#04.8x",  1)
+
+    @test Printf.@sprintf("%#-0*.*x", 8, 2,  1) == Printf.@sprintf("%#-08.2x",  1)
+    @test Printf.@sprintf("%#0*o", 8,  1) == Printf.@sprintf("%#08o",  1)
+
+    @test Printf.@sprintf("%*d", 20, 1024) == Printf.@sprintf("%20d",  1024)
+    @test Printf.@sprintf("%*d", 20,-1024) == Printf.@sprintf("%20d", -1024)
+    @test Printf.@sprintf("%*i", 20, 1024) == Printf.@sprintf("%20i",  1024)
+    @test Printf.@sprintf("%*i", 20,-1024) == Printf.@sprintf("%20i", -1024)
+    @test Printf.@sprintf("%*u", 20, 1024) == Printf.@sprintf("%20u",  1024)
+    @test Printf.@sprintf("%*u", 20, UInt(4294966272)) == Printf.@sprintf("%20u",  UInt(4294966272))
+    @test Printf.@sprintf("%*o", 20, 511) == Printf.@sprintf("%20o",  511)
+    @test Printf.@sprintf("%*o", 20, UInt(4294966785)) == Printf.@sprintf("%20o",  UInt(4294966785))
+    @test Printf.@sprintf("%*x", 20, 305441741) == Printf.@sprintf("%20x",  305441741)
+    @test Printf.@sprintf("%*x", 20, UInt(3989525555)) == Printf.@sprintf("%20x",  UInt(3989525555))
+    @test Printf.@sprintf("%*X", 20, 305441741) == Printf.@sprintf("%20X",  305441741)
+    @test Printf.@sprintf("%*X", 20, UInt(3989525555)) == Printf.@sprintf("%20X",  UInt(3989525555))
+    @test Printf.@sprintf("%-*d", 20, 1024) == Printf.@sprintf("%-20d",  1024)
+    @test Printf.@sprintf("%-*d", 20,-1024) == Printf.@sprintf("%-20d", -1024)
+    @test Printf.@sprintf("%-*i", 20, 1024) == Printf.@sprintf("%-20i",  1024)
+    @test Printf.@sprintf("%-*i", 20,-1024) == Printf.@sprintf("%-20i", -1024)
+    @test Printf.@sprintf("%-*u", 20, 1024) == Printf.@sprintf("%-20u",  1024)
+    @test Printf.@sprintf("%-*u", 20, UInt(4294966272)) == Printf.@sprintf("%-20u",  UInt(4294966272))
+    @test Printf.@sprintf("%-*o", 20, 511) == Printf.@sprintf("%-20o",  511)
+    @test Printf.@sprintf("%-*o", 20, UInt(4294966785)) == Printf.@sprintf("%-20o",  UInt(4294966785))
+    @test Printf.@sprintf("%-*x", 20, 305441741) == Printf.@sprintf("%-20x",  305441741)
+    @test Printf.@sprintf("%-*x", 20, UInt(3989525555)) == Printf.@sprintf("%-20x",  UInt(3989525555))
+    @test Printf.@sprintf("%-*X", 20, 305441741) == Printf.@sprintf("%-20X",  305441741)
+    @test Printf.@sprintf("%-*X", 20, UInt(3989525555)) == Printf.@sprintf("%-20X",  UInt(3989525555))
+    @test Printf.@sprintf("%0*d", 20, 1024) == Printf.@sprintf("%020d",  1024)
+    @test Printf.@sprintf("%0*d", 20,-1024) == Printf.@sprintf("%020d", -1024)
+    @test Printf.@sprintf("%0*i", 20, 1024) == Printf.@sprintf("%020i",  1024)
+    @test Printf.@sprintf("%0*i", 20,-1024) == Printf.@sprintf("%020i", -1024)
+    @test Printf.@sprintf("%0*u", 20, 1024) == Printf.@sprintf("%020u",  1024)
+    @test Printf.@sprintf("%0*u", 20, UInt(4294966272)) == Printf.@sprintf("%020u",  UInt(4294966272))
+    @test Printf.@sprintf("%0*o", 20, 511) == Printf.@sprintf("%020o",  511)
+    @test Printf.@sprintf("%0*o", 20, UInt(4294966785)) == Printf.@sprintf("%020o",  UInt(4294966785))
+    @test Printf.@sprintf("%0*x", 20, 305441741) == Printf.@sprintf("%020x",  305441741)
+    @test Printf.@sprintf("%0*x", 20, UInt(3989525555)) == Printf.@sprintf("%020x",  UInt(3989525555))
+    @test Printf.@sprintf("%0*X", 20, 305441741) == Printf.@sprintf("%020X",  305441741)
+    @test Printf.@sprintf("%0*X", 20, UInt(3989525555)) == Printf.@sprintf("%020X",  UInt(3989525555))
+    @test Printf.@sprintf("%#*o", 20, 511) == Printf.@sprintf("%#20o",  511)
+    @test Printf.@sprintf("%#*o", 20, UInt(4294966785)) == Printf.@sprintf("%#20o",  UInt(4294966785))
+    @test Printf.@sprintf("%#*x", 20, 305441741) == Printf.@sprintf("%#20x",  305441741)
+    @test Printf.@sprintf("%#*x", 20, UInt(3989525555)) == Printf.@sprintf("%#20x",  UInt(3989525555))
+    @test Printf.@sprintf("%#*X", 20, 305441741) == Printf.@sprintf("%#20X",  305441741)
+    @test Printf.@sprintf("%#*X", 20, UInt(3989525555)) == Printf.@sprintf("%#20X",  UInt(3989525555))
+    @test Printf.@sprintf("%#0*o", 20, 511) == Printf.@sprintf("%#020o",  511)
+    @test Printf.@sprintf("%#0*o", 20, UInt(4294966785)) == Printf.@sprintf("%#020o",  UInt(4294966785))
+    @test Printf.@sprintf("%#0*x", 20, 305441741) == Printf.@sprintf("%#020x",  305441741)
+    @test Printf.@sprintf("%#0*x", 20, UInt(3989525555)) == Printf.@sprintf("%#020x",  UInt(3989525555))
+    @test Printf.@sprintf("%#0*X", 20, 305441741) == Printf.@sprintf("%#020X",  305441741)
+    @test Printf.@sprintf("%#0*X", 20, UInt(3989525555)) == Printf.@sprintf("%#020X",  UInt(3989525555))
+    @test Printf.@sprintf("%0-*d", 20, 1024) == Printf.@sprintf("%0-20d",  1024)
+    @test Printf.@sprintf("%0-*d", 20,-1024) == Printf.@sprintf("%0-20d", -1024)
+    @test Printf.@sprintf("%0-*i", 20, 1024) == Printf.@sprintf("%0-20i",  1024)
+    @test Printf.@sprintf("%0-*i", 20,-1024) == Printf.@sprintf("%0-20i", -1024)
+    @test Printf.@sprintf("%0-*u", 20, 1024) == Printf.@sprintf("%0-20u",  1024)
+    @test Printf.@sprintf("%0-*u", 20, UInt(4294966272)) == Printf.@sprintf("%0-20u",  UInt(4294966272))
+    @test Printf.@sprintf("%-0*o", 20, 511) == Printf.@sprintf("%-020o",  511)
+    @test Printf.@sprintf("%-0*o", 20, UInt(4294966785)) == Printf.@sprintf("%-020o",  UInt(4294966785))
+    @test Printf.@sprintf("%-0*x", 20, 305441741) == Printf.@sprintf("%-020x",  305441741)
+    @test Printf.@sprintf("%-0*x", 20, UInt(3989525555)) == Printf.@sprintf("%-020x",  UInt(3989525555))
+    @test Printf.@sprintf("%-0*X", 20, 305441741) == Printf.@sprintf("%-020X",  305441741)
+    @test Printf.@sprintf("%-0*X", 20, UInt(3989525555)) == Printf.@sprintf("%-020X",  UInt(3989525555))
+    @test Printf.@sprintf("%.*d", 20, 1024) == Printf.@sprintf("%.20d",  1024)
+    @test Printf.@sprintf("%.*d", 20,-1024) == Printf.@sprintf("%.20d", -1024)
+    @test Printf.@sprintf("%.*i", 20, 1024) == Printf.@sprintf("%.20i",  1024)
+    @test Printf.@sprintf("%.*i", 20,-1024) == Printf.@sprintf("%.20i", -1024)
+    @test Printf.@sprintf("%.*u", 20, 1024) == Printf.@sprintf("%.20u",  1024)
+    @test Printf.@sprintf("%.*u", 20, UInt(4294966272)) == Printf.@sprintf("%.20u",  UInt(4294966272))
+    @test Printf.@sprintf("%.*o", 20, 511) == Printf.@sprintf("%.20o",  511)
+    @test Printf.@sprintf("%.*o", 20, UInt(4294966785)) == Printf.@sprintf("%.20o",  UInt(4294966785))
+    @test Printf.@sprintf("%.*x", 20, 305441741) == Printf.@sprintf("%.20x",  305441741)
+    @test Printf.@sprintf("%.*x", 20, UInt(3989525555)) == Printf.@sprintf("%.20x",  UInt(3989525555))
+    @test Printf.@sprintf("%.*X", 20, 305441741) == Printf.@sprintf("%.20X",  305441741)
+    @test Printf.@sprintf("%.*X", 20, UInt(3989525555)) == Printf.@sprintf("%.20X",  UInt(3989525555))
+    @test Printf.@sprintf("%*.*d", 20, 5, 1024) == Printf.@sprintf("%20.5d",  1024)
+    @test Printf.@sprintf("%*.*d", 20, 5, -1024) == Printf.@sprintf("%20.5d", -1024)
+    @test Printf.@sprintf("%*.*i", 20, 5, 1024) == Printf.@sprintf("%20.5i",  1024)
+    @test Printf.@sprintf("%*.*i", 20, 5,-1024) == Printf.@sprintf("%20.5i", -1024)
+    @test Printf.@sprintf("%*.*u", 20, 5, 1024) == Printf.@sprintf("%20.5u",  1024)
+    @test Printf.@sprintf("%*.*u", 20, 5, UInt(4294966272)) == Printf.@sprintf("%20.5u",  UInt(4294966272))
+    @test Printf.@sprintf("%*.*o", 20, 5, 511) == Printf.@sprintf("%20.5o",  511)
+    @test Printf.@sprintf("%*.*o", 20, 5, UInt(4294966785)) == Printf.@sprintf("%20.5o",  UInt(4294966785))
+    @test Printf.@sprintf("%*.*x", 20, 5, 305441741) == Printf.@sprintf("%20.5x",  305441741)
+    @test Printf.@sprintf("%*.*x", 20, 10, UInt(3989525555)) == Printf.@sprintf("%20.10x",  UInt(3989525555))
+    @test Printf.@sprintf("%*.*X", 20, 5, 305441741) == Printf.@sprintf("%20.5X",  305441741)
+    @test Printf.@sprintf("%*.*X", 20, 10, UInt(3989525555)) == Printf.@sprintf("%20.10X",  UInt(3989525555))
+    @test Printf.@sprintf("%0*.*d", 20, 5, 1024) == Printf.@sprintf("%020.5d",  1024)
+    @test Printf.@sprintf("%0*.*d", 20, 5,-1024) == Printf.@sprintf("%020.5d", -1024)
+    @test Printf.@sprintf("%0*.*i", 20, 5, 1024) == Printf.@sprintf("%020.5i",  1024)
+    @test Printf.@sprintf("%0*.*i", 20, 5,-1024) == Printf.@sprintf("%020.5i", -1024)
+    @test Printf.@sprintf("%0*.*u", 20, 5, 1024) == Printf.@sprintf("%020.5u",  1024)
+    @test Printf.@sprintf("%0*.*u", 20, 5, UInt(4294966272)) == Printf.@sprintf("%020.5u",  UInt(4294966272))
+    @test Printf.@sprintf("%0*.*o", 20, 5, 511) == Printf.@sprintf("%020.5o",  511)
+    @test Printf.@sprintf("%0*.*o", 20, 5, UInt(4294966785)) == Printf.@sprintf("%020.5o",  UInt(4294966785))
+    @test Printf.@sprintf("%0*.*x", 20, 5, 305441741) == Printf.@sprintf("%020.5x",  305441741)
+    @test Printf.@sprintf("%0*.*x", 20, 10, UInt(3989525555)) == Printf.@sprintf("%020.10x",  UInt(3989525555))
+    @test Printf.@sprintf("%0*.*X", 20, 5, 305441741) == Printf.@sprintf("%020.5X",  305441741)
+    @test Printf.@sprintf("%0*.*X", 20, 10, UInt(3989525555)) == Printf.@sprintf("%020.10X",  UInt(3989525555))
+    @test Printf.@sprintf("%*.0d", 20, 1024) == Printf.@sprintf("%20.0d",  1024)
+    @test Printf.@sprintf("%*.d", 20,-1024) == Printf.@sprintf("%20.d", -1024)
+    @test Printf.@sprintf("%*.d", 20, 0) == Printf.@sprintf("%20.d",  0)
+    @test Printf.@sprintf("%*.0i", 20, 1024) == Printf.@sprintf("%20.0i",  1024)
+    @test Printf.@sprintf("%*.i", 20,-1024) == Printf.@sprintf("%20.i", -1024)
+    @test Printf.@sprintf("%*.i", 20, 0) == Printf.@sprintf("%20.i",  0)
+    @test Printf.@sprintf("%*.u", 20, 1024) == Printf.@sprintf("%20.u",  1024)
+    @test Printf.@sprintf("%*.0u", 20, UInt(4294966272)) == Printf.@sprintf("%20.0u",  UInt(4294966272))
+    @test Printf.@sprintf("%*.u", 20, UInt(0)) == Printf.@sprintf("%20.u",  UInt(0))
+    @test Printf.@sprintf("%*.o", 20, 511) == Printf.@sprintf("%20.o",  511)
+    @test Printf.@sprintf("%*.0o", 20, UInt(4294966785)) == Printf.@sprintf("%20.0o",  UInt(4294966785))
+    @test Printf.@sprintf("%*.o", 20, UInt(0)) == Printf.@sprintf("%20.o",  UInt(0))
+    @test Printf.@sprintf("%*.x", 20, 305441741) == Printf.@sprintf("%20.x",  305441741)
+    @test Printf.@sprintf("%*.0x", 20, UInt(3989525555)) == Printf.@sprintf("%20.0x",  UInt(3989525555))
+    @test Printf.@sprintf("%*.x", 20, UInt(0)) == Printf.@sprintf("%20.x",  UInt(0))
+    @test Printf.@sprintf("%*.X", 20, 305441741) == Printf.@sprintf("%20.X",  305441741)
+    @test Printf.@sprintf("%*.0X", 20, UInt(3989525555)) == Printf.@sprintf("%20.0X",  UInt(3989525555))
+    @test Printf.@sprintf("%*.X", 20, UInt(0)) == Printf.@sprintf("%20.X",  UInt(0))
+
+    x = Ref{Int}()
+    y = Ref{Int}()
+    @test (Printf.@sprintf("%10s%n", "😉", x); Printf.@sprintf("%*s%n", 10, "😉", y); x[] == y[])
+    @test (Printf.@sprintf("%10s%n", "1234", x); Printf.@sprintf("%*s%n", 10, "1234", y); x[] == y[])
+
+end
+
+@testset "length modifiers" begin
+    @test_throws Printf.InvalidFormatStringError Printf.Format("%h")
+    @test_throws Printf.InvalidFormatStringError Printf.Format("%hh")
+    @test_throws Printf.InvalidFormatStringError Printf.Format("%z")
+end
+
 end # @testset "Printf"
diff --git a/stdlib/Profile/Project.toml b/stdlib/Profile/Project.toml
index 334d475832b6d..ad0107ecf9404 100644
--- a/stdlib/Profile/Project.toml
+++ b/stdlib/Profile/Project.toml
@@ -1,8 +1,6 @@
 name = "Profile"
 uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
-
-[deps]
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+version = "1.11.0"
 
 [extras]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
diff --git a/stdlib/Profile/docs/src/index.md b/stdlib/Profile/docs/src/index.md
index 8701dded0d427..ab670866e086c 100644
--- a/stdlib/Profile/docs/src/index.md
+++ b/stdlib/Profile/docs/src/index.md
@@ -34,6 +34,9 @@ First, a single stack trace at the instant that the signal was thrown is shown,
 followed by the profile report at the next yield point, which may be at task completion for code without yield points
 e.g. tight loops.
 
+Optionally set environment variable [`JULIA_PROFILE_PEEK_HEAP_SNAPSHOT`](@ref JULIA_PROFILE_PEEK_HEAP_SNAPSHOT) to `1` to also automatically collect a
+[heap snapshot](@ref Heap-Snapshots).
+
 ```julia-repl
 julia> foo()
 ##== the user sends a trigger while foo is running ==##
@@ -103,7 +106,29 @@ The methods in `Profile.Allocs` are not exported and need to be called e.g. as `
 
 ```@docs
 Profile.Allocs.clear
+Profile.Allocs.print
 Profile.Allocs.fetch
 Profile.Allocs.start
 Profile.Allocs.stop
 ```
+
+## Heap Snapshots
+
+```@docs
+Profile.take_heap_snapshot
+```
+
+The methods in `Profile` are not exported and need to be called e.g. as `Profile.take_heap_snapshot()`.
+
+```julia-repl
+julia> using Profile
+
+julia> Profile.take_heap_snapshot("snapshot.heapsnapshot")
+```
+
+Traces and records julia objects on the heap. This only records objects known to the Julia
+garbage collector. Memory allocated by external libraries not managed by the garbage
+collector will not show up in the snapshot.
+
+The resulting heap snapshot file can be uploaded to chrome devtools to be viewed.
+For more information, see the [chrome devtools docs](https://developer.chrome.com/docs/devtools/memory-problems/heap-snapshots/#view_snapshots).
diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl
index 26dd90a821e01..59399b1f21bb3 100644
--- a/stdlib/Profile/src/Allocs.jl
+++ b/stdlib/Profile/src/Allocs.jl
@@ -1,5 +1,12 @@
 module Allocs
 
+global print # Allocs.print is separate from both Base.print and Profile.print
+public @profile,
+    clear,
+    print,
+    fetch
+
+using ..Profile: Profile, ProfileFormat, StackFrameTree, print_flat, print_tree
 using Base.StackTraces: StackTrace, StackFrame, lookup
 using Base: InterpreterIP
 
@@ -30,7 +37,7 @@ struct RawResults
 end
 
 """
-    Profile.Allocs.@profile [sample_rate=0.0001] expr
+    Profile.Allocs.@profile [sample_rate=0.1] expr
 
 Profile allocations that happen during `expr`, returning
 both the result and and AllocResults struct.
@@ -47,6 +54,10 @@ julia> last(sort(results.allocs, by=x->x.size))
 Profile.Allocs.Alloc(Vector{Any}, Base.StackTraces.StackFrame[_new_array_ at array.c:127, ...], 5576)
 ```
 
+The best way to visualize these is currently with the
+[PProf.jl](https://github.com/JuliaPerf/PProf.jl) package,
+by invoking `PProf.Allocs.pprof`.
+
 !!! note
     The current implementation of the Allocations Profiler does not
     capture types for all allocations. Allocations for which the profiler
@@ -54,7 +65,7 @@ Profile.Allocs.Alloc(Vector{Any}, Base.StackTraces.StackFrame[_new_array_ at arr
     `Profile.Allocs.UnknownType`.
 
     You can read more about the missing types and the plan to improve this, here:
-    https://github.com/JuliaLang/julia/issues/43688.
+    <https://github.com/JuliaLang/julia/issues/43688>.
 
 !!! compat "Julia 1.8"
     The allocation profiler was added in Julia 1.8.
@@ -63,7 +74,7 @@ macro profile(opts, ex)
     _prof_expr(ex, opts)
 end
 macro profile(ex)
-    _prof_expr(ex, :(sample_rate=0.0001))
+    _prof_expr(ex, :(sample_rate=0.1))
 end
 
 function _prof_expr(expr, opts)
@@ -134,15 +145,19 @@ end
 # Without this, the Alloc's stacktrace prints for lines and lines and lines...
 function Base.show(io::IO, a::Alloc)
     stacktrace_sample = length(a.stacktrace) >= 1 ? "$(a.stacktrace[1]), ..." : ""
-    print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
+    Base.print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
 end
 
 const BacktraceCache = Dict{BTElement,Vector{StackFrame}}
 
 # copied from julia_internal.h
-const JL_BUFF_TAG = UInt(0x4eadc000)
+JL_BUFF_TAG::UInt = ccall(:jl_get_buff_tag, UInt, ())
 const JL_GC_UNKNOWN_TYPE_TAG = UInt(0xdeadaa03)
 
+function __init__()
+    global JL_BUFF_TAG = ccall(:jl_get_buff_tag, UInt, ())
+end
+
 struct CorruptType end
 struct BufferType end
 struct UnknownType end
@@ -208,9 +223,201 @@ function stacktrace_memoized(
     return stack
 end
 
-# Precompile once for the package cache.
-@assert precompile(start, ())
-@assert precompile(stop, ())
-@assert precompile(fetch, ())
+function warning_empty()
+    @warn """
+    There were no samples collected.
+    Run your program longer (perhaps by running it multiple times),
+    or adjust the frequency of samples to record every event with
+    the `sample_rate=1.0` kwarg."""
+end
+
+
+"""
+    Profile.Allocs.print([io::IO = stdout,] [data::AllocResults = fetch()]; kwargs...)
+
+Prints profiling results to `io` (by default, `stdout`). If you do not
+supply a `data` vector, the internal buffer of accumulated backtraces
+will be used.
+
+See `Profile.print` for an explanation of the valid keyword arguments.
+"""
+print(; kwargs...) =
+    Profile.print(stdout, fetch(); kwargs...)
+print(io::IO; kwargs...) =
+    Profile.print(io, fetch(); kwargs...)
+print(io::IO, data::AllocResults; kwargs...) =
+    Profile.print(io, data; kwargs...)
+Profile.print(data::AllocResults; kwargs...) =
+    Profile.print(stdout, data; kwargs...)
+
+function Profile.print(io::IO,
+        data::AllocResults,
+        ;
+        format = :tree,
+        C = false,
+        #combine = true,
+        maxdepth::Int = typemax(Int),
+        mincount::Int = 0,
+        noisefloor = 0,
+        sortedby::Symbol = :filefuncline,
+        groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
+        recur::Symbol = :off,
+        )
+    pf = ProfileFormat(;C, maxdepth, mincount, noisefloor, sortedby, recur)
+    Profile.print(io, data, pf, format)
+    return
+end
+
+function Profile.print(io::IO, data::AllocResults, fmt::ProfileFormat, format::Symbol)
+    cols::Int = Base.displaysize(io)[2]
+    fmt.recur ∈ (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
+    data = data.allocs
+    if format === :tree
+        tree(io, data, cols, fmt)
+    elseif format === :flat
+        fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
+        flat(io, data, cols, fmt)
+    else
+        throw(ArgumentError("output format $(repr(format)) not recognized"))
+    end
+    nothing
+end
+
+
+function parse_flat(::Type{T}, data::Vector{Alloc}, C::Bool) where T
+    lilist = StackFrame[]
+    n = Int[]
+    m = Int[]
+    lilist_idx = Dict{T, Int}()
+    recursive = Set{T}()
+    totalbytes = 0
+    for r in data
+        first = true
+        empty!(recursive)
+        nb = r.size # or 1 for counting
+        totalbytes += nb
+        for frame in r.stacktrace
+            !C && frame.from_c && continue
+            key = (T === UInt64 ? ip : frame)
+            idx = get!(lilist_idx, key, length(lilist) + 1)
+            if idx > length(lilist)
+                push!(recursive, key)
+                push!(lilist, frame)
+                push!(n, nb)
+                push!(m, 0)
+            elseif !(key in recursive)
+                push!(recursive, key)
+                n[idx] += nb
+            end
+            if first
+                m[idx] += nb
+                first = false
+            end
+        end
+    end
+    @assert length(lilist) == length(n) == length(m) == length(lilist_idx)
+    return (lilist, n, m, totalbytes)
+end
+
+function flat(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
+    fmt.combine || error(ArgumentError("combine=false"))
+    lilist, n, m, totalbytes = parse_flat(fmt.combine ? StackFrame : UInt64, data, fmt.C)
+    filenamemap = Dict{Symbol,String}()
+    if isempty(lilist)
+        warning_empty()
+        return true
+    end
+    print_flat(io, lilist, n, m, cols, filenamemap, fmt)
+    Base.println(io, "Total snapshots: ", length(data))
+    Base.println(io, "Total bytes: ", totalbytes)
+    return false
+end
+
+function tree!(root::StackFrameTree{T}, all::Vector{Alloc}, C::Bool, recur::Symbol) where {T}
+    tops = Vector{StackFrameTree{T}}()
+    build = Dict{T, StackFrameTree{T}}()
+    for r in all
+        first = true
+        nb = r.size # or 1 for counting
+        root.recur = 0
+        root.count += nb
+        parent = root
+        for i in reverse(eachindex(r.stacktrace))
+            frame = r.stacktrace[i]
+            key = (T === UInt64 ? ip : frame)
+            if (recur === :flat && !frame.from_c) || recur === :flatc
+                # see if this frame already has a parent
+                this = get!(build, frame, parent)
+                if this !== parent
+                    # Rewind the `parent` tree back, if this exact ip (FIXME) was already present *higher* in the current tree
+                    push!(tops, parent)
+                    parent = this
+                end
+            end
+            !C && frame.from_c && continue
+            this = get!(StackFrameTree{T}, parent.down, key)
+            if recur === :off || this.recur == 0
+                this.frame = frame
+                this.up = parent
+                this.count += nb
+                this.recur = 1
+            else
+                this.count_recur += 1
+            end
+            parent = this
+        end
+        parent.overhead += nb
+        if recur !== :off
+            # We mark all visited nodes to so we'll only count those branches
+            # once for each backtrace. Reset that now for the next backtrace.
+            empty!(build)
+            push!(tops, parent)
+            for top in tops
+                while top.recur != 0
+                    top.max_recur < top.recur && (top.max_recur = top.recur)
+                    top.recur = 0
+                    top = top.up
+                end
+            end
+            empty!(tops)
+        end
+        let this = parent
+            while this !== root
+                this.flat_count += nb
+                this = this.up
+            end
+        end
+    end
+    function cleanup!(node::StackFrameTree)
+        stack = [node]
+        while !isempty(stack)
+            node = pop!(stack)
+            node.recur = 0
+            empty!(node.builder_key)
+            empty!(node.builder_value)
+            append!(stack, values(node.down))
+        end
+        nothing
+    end
+    cleanup!(root)
+    return root
+end
+
+function tree(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
+    fmt.combine || error(ArgumentError("combine=false"))
+    if fmt.combine
+        root = tree!(StackFrameTree{StackFrame}(), data, fmt.C, fmt.recur)
+    else
+        root = tree!(StackFrameTree{UInt64}(), data, fmt.C, fmt.recur)
+    end
+    print_tree(io, root, cols, fmt, false)
+    if isempty(root.down)
+        warning_empty()
+        return true
+    end
+    Base.println(io, "Total snapshots: ", length(data))
+    Base.println(io, "Total bytes: ", root.count)
+    return false
+end
 
 end
diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl
index 593f265eba3fa..206d2957a91e5 100644
--- a/stdlib/Profile/src/Profile.jl
+++ b/stdlib/Profile/src/Profile.jl
@@ -5,6 +5,20 @@ Profiling support, main entry point is the [`@profile`](@ref) macro.
 """
 module Profile
 
+global print
+public @profile,
+    clear,
+    print,
+    fetch,
+    retrieve,
+    add_fake_meta,
+    flatten,
+    callers,
+    init,
+    take_heap_snapshot,
+    clear_malloc_data,
+    Allocs
+
 import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
 
 const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
@@ -23,10 +37,7 @@ appended to an internal buffer of backtraces.
 macro profile(ex)
     return quote
         try
-            status = start_timer()
-            if status < 0
-                error(error_codes[status])
-            end
+            start_timer()
             $(esc(ex))
         finally
             stop_timer()
@@ -34,27 +45,12 @@ macro profile(ex)
     end
 end
 
-# triggers printing the report after a SIGINFO/SIGUSR1 profile request
-const PROFILE_PRINT_COND = Ref{Base.AsyncCondition}()
-function profile_printing_listener()
-    try
-        while true
-            wait(PROFILE_PRINT_COND[])
-            peek_report[]()
-        end
-    catch ex
-        if !isa(ex, InterruptException)
-            @error "Profile printing listener crashed" exception=ex,catch_backtrace()
-        end
-    end
-end
-
 # An internal function called to show the report after an information request (SIGINFO or SIGUSR1).
 function _peek_report()
     iob = IOBuffer()
-    ioc = IOContext(IOContext(iob, stdout), :displaysize=>displaysize(stdout))
+    ioc = IOContext(IOContext(iob, stderr), :displaysize=>displaysize(stderr))
     print(ioc, groupby = [:thread, :task])
-    Base.print(stdout, String(take!(iob)))
+    Base.print(stderr, String(take!(iob)))
 end
 # This is a ref so that it can be overridden by other profile info consumers.
 const peek_report = Ref{Function}(_peek_report)
@@ -72,12 +68,7 @@ Set the duration in seconds of the profile "peek" that is triggered via `SIGINFO
 """
 set_peek_duration(t::Float64) = ccall(:jl_set_profile_peek_duration, Cvoid, (Float64,), t)
 
-precompile_script = """
-import Profile
-Profile.@profile while Profile.len_data() < 1000; rand(10,10) * rand(10,10); end
-Profile.peek_report[]()
-Profile.clear()
-"""
+
 
 ####
 #### User-level functions
@@ -91,17 +82,17 @@ stored per thread. Each instruction pointer corresponds to a single line of code
 list of instruction pointers. Note that 6 spaces for instruction pointers per backtrace are used to store metadata and two
 NULL end markers. Current settings can be obtained by calling this function with no arguments, and each can be set independently
 using keywords or in the order `(n, delay)`.
-
-!!! compat "Julia 1.8"
-    As of Julia 1.8, this function allocates space for `n` instruction pointers per thread being profiled.
-    Previously this was `n` total.
 """
 function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real} = nothing, limitwarn::Bool = true)
     n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ())
+    if n_cur == 0 && isnothing(n) && isnothing(delay)
+        # indicates that the buffer hasn't been initialized at all, so set the default
+        default_init()
+        n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ())
+    end
     delay_cur = ccall(:jl_profile_delay_nsec, UInt64, ())/10^9
     if n === nothing && delay === nothing
-        nthreads = Sys.iswindows() ? 1 : Threads.nthreads() # windows only profiles the main thread
-        return round(Int, n_cur / nthreads), delay_cur
+        return n_cur, delay_cur
     end
     nnew = (n === nothing) ? n_cur : n
     delaynew = (delay === nothing) ? delay_cur : delay
@@ -109,24 +100,21 @@ function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real}
 end
 
 function init(n::Integer, delay::Real; limitwarn::Bool = true)
-    nthreads = Sys.iswindows() ? 1 : Threads.nthreads() # windows only profiles the main thread
     sample_size_bytes = sizeof(Ptr) # == Sys.WORD_SIZE / 8
-    buffer_samples = n * nthreads
+    buffer_samples = n
     buffer_size_bytes = buffer_samples * sample_size_bytes
     if buffer_size_bytes > 2^29 && Sys.WORD_SIZE == 32
-        buffer_size_bytes_per_thread = floor(Int, 2^29 / nthreads)
-        buffer_samples_per_thread = floor(Int, buffer_size_bytes_per_thread / sample_size_bytes)
-        buffer_samples = buffer_samples_per_thread * nthreads
+        buffer_samples = floor(Int, 2^29 / sample_size_bytes)
         buffer_size_bytes = buffer_samples * sample_size_bytes
-        limitwarn && @warn "Requested profile buffer limited to 512MB (n = $buffer_samples_per_thread per thread) given that this system is 32-bit"
+        limitwarn && @warn "Requested profile buffer limited to 512MB (n = $buffer_samples) given that this system is 32-bit"
     end
-    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), buffer_samples, round(UInt64,10^9*delay))
+    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), buffer_samples, round(UInt64, 10^9*delay))
     if status == -1
-        error("could not allocate space for ", n, " instruction pointers per thread being profiled ($nthreads threads, $(Base.format_bytes(buffer_size_bytes)) total)")
+        error("could not allocate space for ", n, " instruction pointers ($(Base.format_bytes(buffer_size_bytes)))")
     end
 end
 
-function __init__()
+function default_init()
     # init with default values
     # Use a max size of 10M profile samples, and fire timer every 1ms
     # (that should typically give around 100 seconds of record)
@@ -136,15 +124,18 @@ function __init__()
         n = 1_000_000
         delay = 0.01
     else
+        # Keep these values synchronized with trigger_profile_peek
         n = 10_000_000
         delay = 0.001
     end
     init(n, delay, limitwarn = false)
-    @static if !Sys.iswindows()
-        # triggering a profile via signals is not implemented on windows
-        PROFILE_PRINT_COND[] = Base.AsyncCondition()
-        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), PROFILE_PRINT_COND[].handle)
-        errormonitor(Threads.@spawn(profile_printing_listener()))
+end
+
+# Checks whether the profile buffer has been initialized. If not, initializes it with the default size.
+function check_init()
+    buffer_size = @ccall jl_profile_maxlen_data()::Int
+    if buffer_size == 0
+        default_init()
     end
 end
 
@@ -242,19 +233,23 @@ function print(io::IO,
         tasks::Union{UInt,AbstractVector{UInt}} = typemin(UInt):typemax(UInt))
 
     pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
-    if groupby == :none
-        print(io, data, lidict, pf, format, threads, tasks, false)
+    if groupby === :none
+        print_group(io, data, lidict, pf, format, threads, tasks, false)
     else
         if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
             error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
         elseif Sys.iswindows() && in(groupby, [:thread, [:task, :thread], [:thread, :task]])
             @warn "Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report"
         end
-        any_nosamples = false
-        println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
-        println(io, "=========================================================")
+        any_nosamples = true
+        if format === :tree
+            Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
+            Base.print(io, "=========================================================\n")
+        end
         if groupby == [:task, :thread]
-            for taskid in intersect(get_task_ids(data), tasks)
+            taskids = intersect(get_task_ids(data), tasks)
+            isempty(taskids) && (any_nosamples = true)
+            for taskid in taskids
                 threadids = intersect(get_thread_ids(data, taskid), threads)
                 if length(threadids) == 0
                     any_nosamples = true
@@ -263,14 +258,16 @@ function print(io::IO,
                     printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
                     for threadid in threadids
                         printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
-                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
                         nosamples && (any_nosamples = true)
                         println(io)
                     end
                 end
             end
         elseif groupby == [:thread, :task]
-            for threadid in intersect(get_thread_ids(data), threads)
+            threadids = intersect(get_thread_ids(data), threads)
+            isempty(threadids) && (any_nosamples = true)
+            for threadid in threadids
                 taskids = intersect(get_task_ids(data, threadid), tasks)
                 if length(taskids) == 0
                     any_nosamples = true
@@ -279,25 +276,29 @@ function print(io::IO,
                     printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
                     for taskid in taskids
                         printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
-                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
                         nosamples && (any_nosamples = true)
                         println(io)
                     end
                 end
             end
-        elseif groupby == :task
+        elseif groupby === :task
             threads = 1:typemax(Int)
-            for taskid in intersect(get_task_ids(data), tasks)
+            taskids = intersect(get_task_ids(data), tasks)
+            isempty(taskids) && (any_nosamples = true)
+            for taskid in taskids
                 printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
-                nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
+                nosamples = print_group(io, data, lidict, pf, format, threads, taskid, true)
                 nosamples && (any_nosamples = true)
                 println(io)
             end
-        elseif groupby == :thread
+        elseif groupby === :thread
             tasks = 1:typemax(UInt)
-            for threadid in intersect(get_thread_ids(data), threads)
+            threadids = intersect(get_thread_ids(data), threads)
+            isempty(threadids) && (any_nosamples = true)
+            for threadid in threadids
                 printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
-                nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
+                nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true)
                 nosamples && (any_nosamples = true)
                 println(io)
             end
@@ -319,7 +320,7 @@ See `Profile.print([io], data)` for an explanation of the valid keyword argument
 print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
     print(stdout, data, lidict; kwargs...)
 
-function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
+function print_group(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
                 format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
                 is_subsection::Bool = false)
     cols::Int = Base.displaysize(io)[2]
@@ -410,7 +411,8 @@ function getdict!(dict::LineInfoDict, data::Vector{UInt})
     n_unique_ips = length(unique_ips)
     n_unique_ips == 0 && return dict
     iplookups = similar(unique_ips, Vector{StackFrame})
-    @sync for indexes_part in Iterators.partition(eachindex(unique_ips), div(n_unique_ips, Threads.nthreads(), RoundUp))
+    sort!(unique_ips) # help each thread to get a disjoint set of libraries, as much if possible
+    @sync for indexes_part in Iterators.partition(eachindex(unique_ips), div(n_unique_ips, Threads.threadpoolsize(), RoundUp))
         Threads.@spawn begin
             for i in indexes_part
                 iplookups[i] = _lookup_corrected(unique_ips[i])
@@ -504,7 +506,7 @@ function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
                 end
             end
             return path
-        elseif isfile(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "src", "base", path))
+        elseif isfile(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path))
             # do the same mechanic for Base (or Core/Compiler) files as above,
             # but they start from a relative path
             return joinpath("@Base", normpath(path))
@@ -567,7 +569,14 @@ Julia, and examine the resulting `*.mem` files.
 clear_malloc_data() = ccall(:jl_clear_malloc_data, Cvoid, ())
 
 # C wrappers
-start_timer() = ccall(:jl_profile_start_timer, Cint, ())
+function start_timer()
+    check_init() # if the profile buffer hasn't been initialized, initialize with default size
+    status = ccall(:jl_profile_start_timer, Cint, ())
+    if status < 0
+        error(error_codes[status])
+    end
+end
+
 
 stop_timer() = ccall(:jl_profile_stop_timer, Cvoid, ())
 
@@ -591,7 +600,7 @@ error_codes = Dict(
 """
     fetch(;include_meta = true) -> data
 
-Returns a copy of the buffer of profile backtraces. Note that the
+Return a copy of the buffer of profile backtraces. Note that the
 values in `data` have meaning only on this machine in the current session, because it
 depends on the exact memory addresses used in JIT-compiling. This function is primarily for
 internal use; [`retrieve`](@ref) may be a better choice for most users.
@@ -599,6 +608,9 @@ By default metadata such as threadid and taskid is included. Set `include_meta`
 """
 function fetch(;include_meta = true, limitwarn = true)
     maxlen = maxlen_data()
+    if maxlen == 0
+        error("The profiling data buffer is not initialized. A profile has not been requested this session.")
+    end
     len = len_data()
     if limitwarn && is_buffer_full()
         @warn """The profile data buffer is full; profiling probably terminated
@@ -666,7 +678,7 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
     m = Int[]
     lilist_idx = Dict{T, Int}()
     recursive = Set{T}()
-    first = true
+    leaf = 0
     totalshots = 0
     startframe = length(data)
     skip = false
@@ -690,12 +702,16 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
             skip = false
             totalshots += 1
             empty!(recursive)
-            first = true
+            if leaf != 0
+                m[leaf] += 1
+            end
+            leaf = 0
             startframe = i
         elseif !skip
             frames = lidict[ip]
             nframes = (frames isa Vector ? length(frames) : 1)
-            for j = 1:nframes
+            # the last lookup is the non-inlined root frame, the first is the inlined leaf frame
+            for j = nframes:-1:1
                 frame = (frames isa Vector ? frames[j] : frames)
                 !C && frame.from_c && continue
                 key = (T === UInt64 ? ip : frame)
@@ -709,10 +725,7 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
                     push!(recursive, key)
                     n[idx] += 1
                 end
-                if first
-                    m[idx] += 1
-                    first = false
-                end
+                leaf = idx
             end
         end
     end
@@ -723,30 +736,31 @@ end
 function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat,
                 threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
     lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
+    if false # optional: drop the "non-interpretable" ones
+        keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist)
+        lilist = lilist[keep]
+        n = n[keep]
+        m = m[keep]
+    end
     util_perc = (1 - (nsleeping / totalshots)) * 100
+    filenamemap = Dict{Symbol,String}()
     if isempty(lilist)
         if is_subsection
             Base.print(io, "Total snapshots: ")
             printstyled(io, "$(totalshots)", color=Base.warn_color())
-            Base.println(io, " (", round(Int, util_perc), "% utilization)")
+            Base.print(io, ". Utilization: ", round(Int, util_perc), "%\n")
         else
             warning_empty()
         end
         return true
     end
-    if false # optional: drop the "non-interpretable" ones
-        keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist)
-        lilist = lilist[keep]
-        n = n[keep]
-        m = m[keep]
-    end
-    filenamemap = Dict{Symbol,String}()
-    print_flat(io, lilist, n, m, cols, filenamemap, fmt)
-    Base.print(io, "Total snapshots: ", totalshots, " (", round(Int, util_perc), "% utilization")
+    is_subsection || print_flat(io, lilist, n, m, cols, filenamemap, fmt)
+    Base.print(io, "Total snapshots: ", totalshots, ". Utilization: ", round(Int, util_perc), "%")
     if is_subsection
-        println(io, ")")
+        println(io)
+        print_flat(io, lilist, n, m, cols, filenamemap, fmt)
     else
-        println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task)")
+        Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n")
     end
     return false
 end
@@ -965,8 +979,8 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
             root.count += 1
             startframe = i
         elseif !skip
-            pushfirst!(build, parent)
             if recur === :flat || recur === :flatc
+                pushfirst!(build, parent)
                 # Rewind the `parent` tree back, if this exact ip was already present *higher* in the current tree
                 found = false
                 for j in 1:(startframe - i)
@@ -1067,8 +1081,8 @@ function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat
     filenamemap = Dict{Symbol,String}()
     worklist = [(bt, 0, 0, "")]
     if !is_subsection
-        println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
-        println(io, "=========================================================")
+        Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
+        Base.print(io, "=========================================================\n")
     end
     while !isempty(worklist)
         (bt, level, noisefloor, str) = popfirst!(worklist)
@@ -1114,24 +1128,23 @@ function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, Line
         root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
     end
     util_perc = (1 - (nsleeping / root.count)) * 100
-    !is_subsection && print_tree(io, root, cols, fmt, is_subsection)
+    is_subsection || print_tree(io, root, cols, fmt, is_subsection)
     if isempty(root.down)
         if is_subsection
             Base.print(io, "Total snapshots: ")
             printstyled(io, "$(root.count)", color=Base.warn_color())
-            Base.println(io, ". Utilization: ", round(Int, util_perc), "%")
+            Base.print(io, ". Utilization: ", round(Int, util_perc), "%\n")
         else
             warning_empty()
         end
         return true
-    else
-        Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%")
     end
+    Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%")
     if is_subsection
-        println(io)
+        Base.println(io)
         print_tree(io, root, cols, fmt, is_subsection)
     else
-        println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task")
+        Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n")
     end
     return false
 end
@@ -1212,6 +1225,50 @@ function warning_empty(;summary = false)
     end
 end
 
+
+"""
+    Profile.take_heap_snapshot(io::IOStream, all_one::Bool=false)
+    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false)
+    Profile.take_heap_snapshot(all_one::Bool=false; dir::String)
+
+Write a snapshot of the heap, in the JSON format expected by the Chrome
+Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file
+(`\$pid_\$timestamp.heapsnapshot`) in the current directory by default (or tempdir if
+the current directory is unwritable), or in `dir` if given, or the given
+full file path, or IO stream.
+
+If `all_one` is true, then report the size of every object as one so they can be easily
+counted. Otherwise, report the actual size.
+"""
+function take_heap_snapshot(io::IOStream, all_one::Bool=false)
+    Base.@_lock_ios(io, ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one)))
+end
+function take_heap_snapshot(filepath::String, all_one::Bool=false)
+    open(filepath, "w") do io
+        take_heap_snapshot(io, all_one)
+    end
+    return filepath
+end
+function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString}
+    fname = "$(getpid())_$(time_ns()).heapsnapshot"
+    if isnothing(dir)
+        wd = pwd()
+        fpath = joinpath(wd, fname)
+        try
+            touch(fpath)
+            rm(fpath; force=true)
+        catch
+            @warn "Cannot write to current directory `$(pwd())` so saving heap snapshot to `$(tempdir())`" maxlog=1 _id=Symbol(wd)
+            fpath = joinpath(tempdir(), fname)
+        end
+    else
+        fpath = joinpath(expanduser(dir), fname)
+    end
+    return take_heap_snapshot(fpath, all_one)
+end
+
+
 include("Allocs.jl")
+include("precompile.jl")
 
 end # module
diff --git a/stdlib/Profile/src/precompile.jl b/stdlib/Profile/src/precompile.jl
new file mode 100644
index 0000000000000..7b33e09941b28
--- /dev/null
+++ b/stdlib/Profile/src/precompile.jl
@@ -0,0 +1,11 @@
+if Base.generating_output()
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UInt})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UnitRange{UInt}})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Vector{Int}, Vector{UInt}})
+    precompile(Tuple{typeof(Profile._peek_report)})
+    precompile(Tuple{typeof(Profile.Allocs.start)})
+    precompile(Tuple{typeof(Profile.Allocs.stop)})
+    precompile(Tuple{typeof(Profile.Allocs.fetch)})
+end
diff --git a/stdlib/Profile/test/allocs.jl b/stdlib/Profile/test/allocs.jl
index b8d6222d07567..d4930a2b7f5ed 100644
--- a/stdlib/Profile/test/allocs.jl
+++ b/stdlib/Profile/test/allocs.jl
@@ -1,6 +1,13 @@
 using Test
 using Profile: Allocs
 
+Allocs.clear()
+let iobuf = IOBuffer()
+    for format in (:tree, :flat)
+        Test.@test_logs (:warn, r"^There were no samples collected\.") Allocs.print(iobuf; format, C=true)
+    end
+end
+
 @testset "alloc profiler doesn't segfault" begin
     res = Allocs.@profile sample_rate=1.0 begin
         # test the allocations during compilation
@@ -13,6 +20,20 @@ using Profile: Allocs
     @test first_alloc.size > 0
     @test length(first_alloc.stacktrace) > 0
     @test length(string(first_alloc.type)) > 0
+
+    # test printing options
+    for options in ((format=:tree, C=true),
+                    (format=:tree, maxdepth=2),
+                    (format=:flat, C=true),
+                    (),
+                    (format=:flat, sortedby=:count),
+                    (format=:tree, recur=:flat),
+                   )
+        iobuf = IOBuffer()
+        Allocs.print(iobuf; options...)
+        str = String(take!(iobuf))
+        @test !isempty(str)
+    end
 end
 
 @testset "alloc profiler works when there are multiple tasks on multiple threads" begin
@@ -64,7 +85,8 @@ end
 @testset "alloc profiler start stop fetch clear" begin
     function do_work()
         # Compiling allocates a lot
-        for f in (gensym() for _ in 1:10)
+        nsyms = @static Sys.WORD_SIZE == 32 ? 1 : 10
+        for f in (gensym() for _ in 1:nsyms)
             @eval begin
                 $f() = 10
                 $f()
@@ -120,3 +142,39 @@ end
     @test length(prof.allocs) >= 1
     @test length([a for a in prof.allocs if a.type == String]) >= 1
 end
+
+@testset "alloc profiler catches allocs from codegen" begin
+    @eval begin
+        struct MyType x::Int; y::Int end
+        Base.:(+)(n::Number, x::MyType) = n + x.x + x.y
+        foo(a, x) = a[1] + x
+        wrapper(a) = foo(a, MyType(0,1))
+    end
+    a = Any[1,2,3]
+    # warmup
+    wrapper(a)
+
+    @eval Allocs.@profile sample_rate=1 wrapper($a)
+
+    prof = Allocs.fetch()
+    Allocs.clear()
+
+    @test length(prof.allocs) >= 1
+    @test length([a for a in prof.allocs if a.type == MyType]) >= 1
+end
+
+@testset "alloc profiler catches allocs from buffer resize" begin
+    f(a) = for _ in 1:100; push!(a, 1); end
+    f(Int[])
+    resize!(Int[], 1)
+    a = Int[]
+    Allocs.clear()
+    Allocs.@profile sample_rate=1 f(a)
+    Allocs.@profile sample_rate=1 resize!(a, 1_000_000) # 4MB
+    prof = Allocs.fetch()
+    Allocs.clear()
+
+    @test 3 <= length(prof.allocs) <= 10
+    @test length([a for a in prof.allocs if a.type === Allocs.BufferType]) == 1
+    @test length([a for a in prof.allocs if a.type === Memory{Int}]) >= 2
+end
diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl
index 058158023cd25..ad7c12272d051 100644
--- a/stdlib/Profile/test/runtests.jl
+++ b/stdlib/Profile/test/runtests.jl
@@ -3,6 +3,8 @@
 using Test, Profile, Serialization, Logging
 using Base.StackTraces: StackFrame
 
+@test_throws "The profiling data buffer is not initialized. A profile has not been requested this session." Profile.print()
+
 Profile.clear()
 Profile.init()
 
@@ -36,28 +38,18 @@ let r = Profile.retrieve()
     end
 end
 
-let iobuf = IOBuffer()
-    Profile.print(iobuf, format=:tree, C=true)
-    str = String(take!(iobuf))
-    @test !isempty(str)
-    truncate(iobuf, 0)
-    Profile.print(iobuf, format=:tree, maxdepth=2)
-    str = String(take!(iobuf))
-    @test !isempty(str)
-    truncate(iobuf, 0)
-    Profile.print(iobuf, format=:flat, C=true)
-    str = String(take!(iobuf))
-    @test !isempty(str)
-    truncate(iobuf, 0)
-    Profile.print(iobuf)
-    @test !isempty(String(take!(iobuf)))
-    truncate(iobuf, 0)
-    Profile.print(iobuf, format=:flat, sortedby=:count)
-    @test !isempty(String(take!(iobuf)))
-    Profile.print(iobuf, format=:tree, recur=:flat)
+# test printing options
+for options in ((format=:tree, C=true),
+                (format=:tree, maxdepth=2),
+                (format=:flat, C=true),
+                (),
+                (format=:flat, sortedby=:count),
+                (format=:tree, recur=:flat),
+               )
+    iobuf = IOBuffer()
+    Profile.print(iobuf; options...)
     str = String(take!(iobuf))
     @test !isempty(str)
-    truncate(iobuf, 0)
 end
 
 @testset "Profile.print() groupby options" begin
@@ -118,11 +110,10 @@ end
 @testset "setting sample count and delay in init" begin
     n_, delay_ = Profile.init()
     n_original = n_
-    nthreads = Sys.iswindows() ? 1 : Threads.nthreads()
     sample_size_bytes = sizeof(Ptr)
     def_n = Sys.iswindows() && Sys.WORD_SIZE == 32 ? 1_000_000 : 10_000_000
-    if Sys.WORD_SIZE == 32 && (def_n * nthreads * sample_size_bytes) > 2^29
-        @test n_ * nthreads * sample_size_bytes <= 2^29
+    if Sys.WORD_SIZE == 32 && (def_n * sample_size_bytes) > 2^29
+        @test n_ * sample_size_bytes <= 2^29
     else
         @test n_ == def_n
     end
@@ -131,8 +122,8 @@ end
     @test delay_ == def_delay
     Profile.init(n=1_000_001, delay=0.0005)
     n_, delay_ = Profile.init()
-    if Sys.WORD_SIZE == 32 && (1_000_001 * nthreads * sample_size_bytes) > 2^29
-        @test n_ * nthreads * sample_size_bytes <= 2^29
+    if Sys.WORD_SIZE == 32 && (1_000_001 * sample_size_bytes) > 2^29
+        @test n_ * sample_size_bytes <= 2^29
     else
         @test n_ == 1_000_001
     end
@@ -151,14 +142,14 @@ end
     @profile busywait(1, 20)
     _, fdict0 = Profile.flatten(Profile.retrieve()...)
     Base.update_stackframes_callback[] = function(list)
-        modify((sf, n)) = sf.func == :busywait ? (StackTraces.StackFrame(sf.func, sf.file, sf.line+2, sf.linfo, sf.from_c, sf.inlined, sf.pointer), n) : (sf, n)
+        modify((sf, n)) = sf.func === :busywait ? (StackTraces.StackFrame(sf.func, sf.file, sf.line+2, sf.linfo, sf.from_c, sf.inlined, sf.pointer), n) : (sf, n)
         map!(modify, list, list)
     end
     _, fdictc = Profile.flatten(Profile.retrieve()...)
     Base.update_stackframes_callback[] = identity
     function getline(sfs)
         for sf in sfs
-            sf.func == :busywait && return sf.line
+            sf.func === :busywait && return sf.line
         end
         nothing
     end
@@ -180,7 +171,7 @@ let cmd = Base.julia_cmd()
     p = open(`$cmd -e $script`)
     t = Timer(120) do t
         # should be under 10 seconds, so give it 2 minutes then report failure
-        println("KILLING BY PROFILE TEST WATCHDOG\n")
+        println("KILLING debuginfo registration test BY PROFILE TEST WATCHDOG\n")
         kill(p, Base.SIGTERM)
         sleep(10)
         kill(p, Base.SIGKILL)
@@ -197,42 +188,54 @@ if Sys.isbsd() || Sys.islinux()
     @testset "SIGINFO/SIGUSR1 profile triggering" begin
         let cmd = Base.julia_cmd()
             script = """
-                x = rand(1000, 1000)
-                println("started")
-                while true
-                    x * x
-                    yield()
-                end
+                print(stderr, "started\n")
+                eof(stdin)
                 """
-            iob = Base.BufferStream()
-            p = run(pipeline(`$cmd -e $script`, stderr = devnull, stdout = iob), wait = false)
+            iob = Base.BufferStream() # make an unbounded buffer, so we can just read after waiting for exit
+            notify_exit = Base.PipeEndpoint()
+            p = run(`$cmd -e $script`, notify_exit, devnull, iob, wait=false)
+            eof = @async try # set up a monitor task to set EOF on iob after p exits
+                wait(p)
+            finally
+                closewrite(iob)
+            end
             t = Timer(120) do t
                 # should be under 10 seconds, so give it 2 minutes then report failure
-                println("KILLING BY PROFILE TEST WATCHDOG\n")
+                println("KILLING siginfo/sigusr1 test BY PROFILE TEST WATCHDOG\n")
                 kill(p, Base.SIGTERM)
                 sleep(10)
                 kill(p, Base.SIGKILL)
-                close(iob)
+                close(notify_exit)
             end
             try
-                s = readuntil(iob, "started", keep = true)
+                s = readuntil(iob, "started", keep=true)
                 @assert occursin("started", s)
                 @assert process_running(p)
-                for _ in 1:2
-                    sleep(2.5)
+                for i in 1:2
+                    i > 1 && sleep(5)
                     if Sys.isbsd()
                         kill(p, 29) # SIGINFO
                     elseif Sys.islinux()
                         kill(p, 10) # SIGUSR1
                     end
-                    s = readuntil(iob, "Overhead ╎", keep = true)
+                    s = readuntil(iob, "Overhead ╎", keep=true)
                     @test process_running(p)
+                    readavailable(iob)
                     @test occursin("Overhead ╎", s)
                 end
-            finally
-                kill(p, Base.SIGKILL)
+                close(notify_exit) # notify test finished
+                wait(eof) # wait for test completion
+                s = read(iob, String) # consume test output from buffer
                 close(t)
+            catch
+                close(notify_exit)
+                wait(eof) # wait for test completion
+                errs = read(iob, String) # consume test output
+                isempty(errs) || println("CHILD STDERR after test failure: ", errs)
+                close(t)
+                rethrow()
             end
+            @test success(p)
         end
     end
 end
@@ -263,11 +266,27 @@ end
     Profile.tree!(root, backtraces, lidict, #= C =# true, :off)
     @test length(root.down) == 2
     for k in keys(root.down)
-        @test k.file == :file1
+        @test k.file === :file1
         @test k.line ∈ (1, 2)
     end
     node = root.down[stackframe(:f1, :file1, 2)]
     @test only(node.down).first == lidict[8]
 end
 
+@testset "HeapSnapshot" begin
+    tmpdir = mktempdir()
+    fname = cd(tmpdir) do
+        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; print(Profile.take_heap_snapshot())"`, String)
+    end
+
+    @test isfile(fname)
+
+    open(fname) do fs
+        @test readline(fs) != ""
+    end
+
+    rm(fname)
+    rm(tmpdir, force = true, recursive = true)
+end
+
 include("allocs.jl")
diff --git a/stdlib/REPL/Project.toml b/stdlib/REPL/Project.toml
index 4f77157da0146..77eca2bfe4240 100644
--- a/stdlib/REPL/Project.toml
+++ b/stdlib/REPL/Project.toml
@@ -1,5 +1,6 @@
 name = "REPL"
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+version = "1.11.0"
 
 [deps]
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
@@ -8,8 +9,8 @@ Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
 [extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
 test = ["Test", "Random"]
diff --git a/stdlib/REPL/docs/src/index.md b/stdlib/REPL/docs/src/index.md
index 203f377c9ba63..ae444e134ad17 100644
--- a/stdlib/REPL/docs/src/index.md
+++ b/stdlib/REPL/docs/src/index.md
@@ -7,8 +7,9 @@ shell modes. The REPL can be started by simply calling `julia` with no arguments
 on the executable:
 
 ```@eval
+using REPL
 io = IOBuffer()
-Base.banner(io)
+REPL.banner(io)
 banner = String(take!(io))
 import Markdown
 Markdown.parse("```\n\$ julia\n\n$(banner)\njulia>\n```")
@@ -43,14 +44,14 @@ julia> ans
 "12"
 ```
 
-In Julia mode, the REPL supports something called *prompt pasting*. This activates when pasting
-text that starts with `julia> ` into the REPL. In that case, only expressions starting with
-`julia> ` are parsed, others are removed. This makes it possible to paste a chunk of code
-that has been copied from a REPL session without having to scrub away prompts and outputs. This
-feature is enabled by default but can be disabled or enabled at will with `REPL.enable_promptpaste(::Bool)`.
-If it is enabled, you can try it out by pasting the code block above this paragraph straight into
-the REPL. This feature does not work on the standard Windows command prompt due to its limitation
-at detecting when a paste occurs.
+In Julia mode, the REPL supports something called *prompt pasting*. This activates when pasting text
+that starts with `julia> ` into the REPL. In that case, only expressions starting with `julia> ` (as
+well as the other REPL mode prompts: `shell> `, `help?> `, `pkg>` ) are parsed, but others are
+removed. This makes it possible to paste a chunk of text that has been copied from a REPL session
+without having to scrub away prompts and outputs. This feature is enabled by default but can be
+disabled or enabled at will with `REPL.enable_promptpaste(::Bool)`. If it is enabled, you can try it
+out by pasting the code block above this paragraph straight into the REPL. This feature does not
+work on the standard Windows command prompt due to its limitation at detecting when a paste occurs.
 
 Objects are printed at the REPL using the [`show`](@ref) function with a specific [`IOContext`](@ref).
 In particular, the `:limit` attribute is set to `true`.
@@ -259,6 +260,7 @@ to do so), or pressing Esc and then the key.
 | `^W`                | Delete previous text up to the nearest whitespace                                                          |
 | `meta-w`            | Copy the current region in the kill ring                                                                   |
 | `meta-W`            | "Kill" the current region, placing the text in the kill ring                                               |
+| `^U`                | "Kill" to beginning of line, placing the text in the kill ring                                             |
 | `^K`                | "Kill" to end of line, placing the text in the kill ring                                                   |
 | `^Y`                | "Yank" insert the text from the kill ring                                                                  |
 | `meta-y`            | Replace a previously yanked text with an older entry from the kill ring                                    |
@@ -311,7 +313,7 @@ Users should refer to `LineEdit.jl` to discover the available actions on key inp
 
 ## Tab completion
 
-In both the Julian and help modes of the REPL, one can enter the first few characters of a function
+In the Julian, pkg and help modes of the REPL, one can enter the first few characters of a function
 or type and then press the tab key to get a list all matches:
 
 ```julia-repl
@@ -333,6 +335,13 @@ julia> mapfold[TAB]
 mapfoldl mapfoldr
 ```
 
+When a single complete tab-complete result is available at the end of an input line and 2 or more characters
+have been typed, a hint of the completion will show in a lighter color.
+This can be disabled via `Base.active_repl.options.hint_tab_completes = false`.
+
+!!! compat "Julia 1.11"
+    Tab-complete hinting was added in Julia 1.11
+
 Like other components of the REPL, the search is case-sensitive:
 
 ```julia-repl
@@ -413,7 +422,7 @@ Tab completion can also help completing fields:
 ```julia-repl
 julia> x = 3 + 4im;
 
-julia> julia> x.[TAB][TAB]
+julia> x.[TAB][TAB]
 im re
 
 julia> import UUIDs
@@ -569,8 +578,8 @@ Main
 
 It is possible to change this contextual module via the function
 `REPL.activate(m)` where `m` is a `Module` or by typing the module in the REPL
-and pressing the keybinding Alt-m (the cursor must be on the module name). The
-active module is shown in the prompt:
+and pressing the keybinding Alt-m (the cursor must be on the module name). The `Main` module can be "activated" with an empty prompt plus the keybinding. The
+active module is shown in the prompt (unless it is `Main`):
 
 ```julia-repl
 julia> using REPL
@@ -590,7 +599,7 @@ julia> Core<Alt-m> # using the keybinding to change module
 
 (Core) julia>
 
-(Core) julia> Main<Alt-m> # going back to Main via keybinding
+(Core) julia> <Alt-m> # going back to Main via keybinding
 
 julia>
 ```
@@ -616,6 +625,42 @@ julia> REPL.activate(CustomMod)
   var       8 bytes Int64
 ```
 
+## Numbered prompt
+
+It is possible to get an interface which is similar to the IPython REPL and the Mathematica notebook with numbered input prompts and output prefixes. This is done by calling `REPL.numbered_prompt!()`. If you want to have this enabled on startup, add
+
+```julia
+atreplinit() do repl
+    @eval import REPL
+    if !isdefined(repl, :interface)
+        repl.interface = REPL.setup_interface(repl)
+    end
+    REPL.numbered_prompt!(repl)
+end
+```
+
+to your `startup.jl` file. In numbered prompt the variable `Out[n]` (where `n` is an integer) can be used to refer to earlier results:
+
+```julia-repl
+In [1]: 5 + 3
+Out[1]: 8
+
+In [2]: Out[1] + 5
+Out[2]: 13
+
+In [3]: Out
+Out[3]: Dict{Int64, Any} with 2 entries:
+  2 => 13
+  1 => 8
+```
+
+!!! note
+    Since all outputs from previous REPL evaluations are saved in the `Out` variable, one should be careful if they are returning many
+    large in-memory objects like arrays, since they will be protected from garbage collection so long as a reference to them remains in
+    `Out`. If you need to remove references to objects in `Out`, you can clear the entire history it stores with `empty!(Out)`, or clear
+    an individual entry with `Out[n] = nothing`.
+
+
 ## TerminalMenus
 
 TerminalMenus is a submodule of the Julia REPL and enables small, low-profile interactive menus in the terminal.
diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl
index b30a1d816a83f..709eeaa2857d0 100644
--- a/stdlib/REPL/src/LineEdit.jl
+++ b/stdlib/REPL/src/LineEdit.jl
@@ -49,6 +49,9 @@ mutable struct Prompt <: TextInterface
     prompt_prefix::Union{String,Function}
     # Same as prefix except after the prompt
     prompt_suffix::Union{String,Function}
+    output_prefix::Union{String,Function}
+    output_prefix_prefix::Union{String,Function}
+    output_prefix_suffix::Union{String,Function}
     keymap_dict::Dict{Char,Any}
     repl::Union{AbstractREPL,Nothing}
     complete::CompletionProvider
@@ -94,6 +97,7 @@ mutable struct PromptState <: ModeState
     p::Prompt
     input_buffer::IOBuffer
     region_active::Symbol # :shift or :mark or :off
+    hint::Union{String,Nothing}
     undo_buffers::Vector{IOBuffer}
     undo_idx::Int
     ias::InputAreaState
@@ -320,32 +324,45 @@ function common_prefix(completions::Vector{String})
     end
 end
 
+# This is the maximum number of completions that will be displayed in a single
+# column, anything above that and multiple columns will be used. Note that this
+# does not restrict column length when multiple columns are used.
+const MULTICOLUMN_THRESHOLD = 5
+
 # Show available completions
 function show_completions(s::PromptState, completions::Vector{String})
-    colmax = maximum(map(length, completions))
-    num_cols = max(div(width(terminal(s)), colmax+2), 1)
-    entries_per_col, r = divrem(length(completions), num_cols)
-    entries_per_col += r != 0
     # skip any lines of input after the cursor
     cmove_down(terminal(s), input_string_newlines_aftercursor(s))
     println(terminal(s))
-    for row = 1:entries_per_col
-        for col = 0:num_cols
-            idx = row + col*entries_per_col
-            if idx <= length(completions)
-                cmove_col(terminal(s), (colmax+2)*col+1)
+    if any(Base.Fix1(occursin, '\n'), completions)
+        foreach(Base.Fix1(println, terminal(s)), completions)
+    else
+        n = length(completions)
+        colmax = 2 + maximum(length, completions; init=1) # n.b. length >= textwidth
+
+        num_cols = min(cld(n, MULTICOLUMN_THRESHOLD),
+                       max(div(width(terminal(s)), colmax), 1))
+
+        entries_per_col = cld(n, num_cols)
+        idx = 0
+        for _ in 1:entries_per_col
+            for col = 0:(num_cols-1)
+                idx += 1
+                idx > n && break
+                cmove_col(terminal(s), colmax*col+1)
                 print(terminal(s), completions[idx])
             end
+            println(terminal(s))
         end
-        println(terminal(s))
     end
+
     # make space for the prompt
     for i = 1:input_string_newlines(s)
         println(terminal(s))
     end
 end
 
-# Prompt Completions
+# Prompt Completions & Hints
 function complete_line(s::MIState)
     set_action!(s, :complete_line)
     if complete_line(state(s), s.key_repeats, s.active_module)
@@ -356,6 +373,50 @@ function complete_line(s::MIState)
     end
 end
 
+function check_for_hint(s::MIState)
+    st = state(s)
+    if !options(st).hint_tab_completes || !eof(buffer(st))
+        # only generate hints if enabled and at the end of the line
+        # TODO: maybe show hints for insertions at other positions
+        # Requires making space for them earlier in refresh_multi_line
+        return clear_hint(st)
+    end
+    completions, partial, should_complete = complete_line(st.p.complete, st, s.active_module)::Tuple{Vector{String},String,Bool}
+    isempty(completions) && return clear_hint(st)
+    # Don't complete for single chars, given e.g. `x` completes to `xor`
+    if length(partial) > 1 && should_complete
+        singlecompletion = length(completions) == 1
+        p = singlecompletion ? completions[1] : common_prefix(completions)
+        if singlecompletion || p in completions # i.e. complete `@time` even though `@time_imports` etc. exists
+            # The completion `p` and the input `partial` may not share the same initial
+            # characters, for instance when completing to subscripts or superscripts.
+            # So, in general, make sure that the hint starts at the correct position by
+            # incrementing its starting position by as many characters as the input.
+            startind = 1 # index of p from which to start providing the hint
+            maxind = ncodeunits(p)
+            for _ in partial
+                startind = nextind(p, startind)
+                startind > maxind && break
+            end
+            if startind ≤ maxind # completion on a complete name returns itself so check that there's something to hint
+                hint = p[startind:end]
+                st.hint = hint
+                return true
+            end
+        end
+    end
+    return clear_hint(st)
+end
+
+function clear_hint(s::ModeState)
+    if !isnothing(s.hint)
+        s.hint = "" # don't set to nothing here. That will be done in `maybe_show_hint`
+        return true # indicate maybe_show_hint has work to do
+    else
+        return false
+    end
+end
+
 function complete_line(s::PromptState, repeats::Int, mod::Module)
     completions, partial, should_complete = complete_line(s.p.complete, s, mod)::Tuple{Vector{String},String,Bool}
     isempty(completions) && return false
@@ -416,12 +477,29 @@ prompt_string(p::Prompt) = prompt_string(p.prompt)
 prompt_string(s::AbstractString) = s
 prompt_string(f::Function) = Base.invokelatest(f)
 
+function maybe_show_hint(s::PromptState)
+    isa(s.hint, String) || return nothing
+    # The hint being "" then nothing is used to first clear a previous hint, then skip printing the hint
+    # the clear line cannot be printed each time because it breaks column movement
+    if isempty(s.hint)
+        print(terminal(s), "\e[0K") # clear remainder of line which had a hint
+        s.hint = nothing
+    else
+        Base.printstyled(terminal(s), s.hint, color=:light_black)
+        cmove_left(terminal(s), textwidth(s.hint))
+        s.hint = "" # being "" signals to do one clear line remainder to clear the hint next time if still empty
+    end
+    return nothing
+end
+
 function refresh_multi_line(s::PromptState; kw...)
     if s.refresh_wait !== nothing
         close(s.refresh_wait)
         s.refresh_wait = nothing
     end
-    refresh_multi_line(terminal(s), s; kw...)
+    r = refresh_multi_line(terminal(s), s; kw...)
+    maybe_show_hint(s)
+    return r
 end
 refresh_multi_line(s::ModeState; kw...) = refresh_multi_line(terminal(s), s; kw...)
 refresh_multi_line(termbuf::TerminalBuffer, s::ModeState; kw...) = refresh_multi_line(termbuf, terminal(s), s; kw...)
@@ -444,7 +522,7 @@ function refresh_multi_line(termbuf::TerminalBuffer, terminal::UnixTerminal, buf
     # Write out the prompt string
     lindent = write_prompt(termbuf, prompt, hascolor(terminal))::Int
     # Count the '\n' at the end of the line if the terminal emulator does (specific to DOS cmd prompt)
-    miscountnl = @static Sys.iswindows() ? (isa(Terminals.pipe_reader(terminal), Base.TTY) && !Base.ispty(Terminals.pipe_reader(terminal))) : false
+    miscountnl = @static Sys.iswindows() ? (isa(Terminals.pipe_reader(terminal), Base.TTY) && !(Base.ispty(Terminals.pipe_reader(terminal)))::Bool) : false
 
     # Now go through the buffer line by line
     seek(buf, 0)
@@ -753,10 +831,11 @@ function edit_splice!(s::BufferLike, r::Region=region(s), ins::String = ""; rigi
     elseif buf.mark >= B
         buf.mark += sizeof(ins) - B + A
     end
+    ensureroom(buf, B) # handle !buf.reinit from take!
     ret = splice!(buf.data, A+1:B, codeunits(String(ins))) # position(), etc, are 0-indexed
     buf.size = buf.size + sizeof(ins) - B + A
     adjust_pos && seek(buf, position(buf) + sizeof(ins))
-    return String(ret)
+    return String(copy(ret))
 end
 
 edit_splice!(s::MIState, ins::AbstractString) = edit_splice!(s, region(s), ins)
@@ -1093,7 +1172,7 @@ end
 
 function edit_transpose_chars(buf::IOBuffer)
     # Moving left but not transpoing anything is intentional, and matches Emacs's behavior
-    eof(buf) && char_move_left(buf)
+    eof(buf) && position(buf) !== 0 && char_move_left(buf)
     position(buf) == 0 && return false
     char_move_left(buf)
     pos = position(buf)
@@ -1273,7 +1352,7 @@ end
 # compute the number of spaces from b till the next non-space on the right
 # (which can also be "end of line" or "end of buffer")
 function leadingspaces(buf::IOBuffer, b::Int)
-    ls = something(findnext(_notspace, buf.data, b+1), 0)-1
+    @views ls = something(findnext(_notspace, buf.data[1:buf.size], b+1), 0)-1
     ls == -1 && (ls = buf.size)
     ls -= b
     return ls
@@ -1323,9 +1402,9 @@ end
 function edit_input(s, f = (filename, line, column) -> InteractiveUtils.edit(filename, line, column))
     mode_name = guess_current_mode_name(s)
     filename = tempname()
-    if mode_name == :julia
+    if mode_name === :julia
         filename *= ".jl"
-    elseif mode_name == :shell
+    elseif mode_name === :shell
         filename *= ".sh"
     end
     buf = buffer(s)
@@ -1349,19 +1428,22 @@ function edit_input(s, f = (filename, line, column) -> InteractiveUtils.edit(fil
         col += 1
     end
 
+    # Write current input to temp file, edit, read back
     write(filename, str)
     f(filename, line, col)
     str_mod = readchomp(filename)
     rm(filename)
-    if str != str_mod # something was changed, run the input
-        write(buf, str_mod)
-        commit_line(s)
-        :done
-    else # no change, the edit session probably unsuccessful
-        write(buf, str)
-        seek(buf, pos) # restore state from before edit
-        refresh_line(s)
+
+    # Write updated content
+    write(buf, str_mod)
+    if str == str_mod
+        # If input was not modified: reset cursor
+        seek(buf, pos)
+    else
+        # If input was modified: move cursor to end
+        move_input_end(s)
     end
+    refresh_line(s)
 end
 
 # return the identifier under the cursor, possibly with other words concatenated
@@ -1397,14 +1479,22 @@ current_word_with_dots(s::MIState) = current_word_with_dots(buffer(s))
 
 function activate_module(s::MIState)
     word = current_word_with_dots(s);
-    isempty(word) && return beep(s)
-    try
-        mod = Base.Core.eval(Base.active_module(), Base.Meta.parse(word))
-        REPL.activate(mod)
-        edit_clear(s)
-    catch
+    mod = if isempty(word)
+        edit_insert(s, ' ') # makes the `edit_clear` below actually update the prompt
+        Main
+    else
+        try
+            Base.Core.eval(Base.active_module(), Base.Meta.parse(word))
+        catch
+            nothing
+        end
+    end
+    if !(mod isa Module)
         beep(s)
+        return
     end
+    REPL.activate(mod)
+    edit_clear(s)
 end
 
 history_prev(::EmptyHistoryProvider) = ("", false)
@@ -1447,7 +1537,6 @@ default_completion_cb(::IOBuffer) = []
 default_enter_cb(_) = true
 
 write_prompt(terminal::AbstractTerminal, s::PromptState, color::Bool) = write_prompt(terminal, s.p, color)
-
 function write_prompt(terminal::AbstractTerminal, p::Prompt, color::Bool)
     prefix = prompt_string(p.prompt_prefix)
     suffix = prompt_string(p.prompt_suffix)
@@ -1459,6 +1548,17 @@ function write_prompt(terminal::AbstractTerminal, p::Prompt, color::Bool)
     return width
 end
 
+function write_output_prefix(io::IO, p::Prompt, color::Bool)
+    prefix = prompt_string(p.output_prefix_prefix)
+    suffix = prompt_string(p.output_prefix_suffix)
+    print(io, prefix)
+    color && write(io, Base.text_colors[:bold])
+    width = write_prompt(io, p.output_prefix, color)
+    color && write(io, Base.text_colors[:normal])
+    print(io, suffix)
+    return width
+end
+
 # On Windows, when launching external processes, we cannot control what assumption they make on the
 # console mode. We thus forcibly reset the console mode at the start of the prompt to ensure they do
 # not leave the console mode in a corrupt state.
@@ -1490,7 +1590,7 @@ end
 end
 
 # returns the width of the written prompt
-function write_prompt(terminal, s::Union{AbstractString,Function}, color::Bool)
+function write_prompt(terminal::Union{IO, AbstractTerminal}, s::Union{AbstractString,Function}, color::Bool)
     @static Sys.iswindows() && _reset_console_mode()
     promptstr = prompt_string(s)::String
     write(terminal, promptstr)
@@ -1548,7 +1648,7 @@ function normalize_keys(keymap::Union{Dict{Char,Any},AnyDict})
     return ret
 end
 
-function add_nested_key!(keymap::Dict, key::Union{String, Char}, value; override = false)
+function add_nested_key!(keymap::Dict{Char, Any}, key::Union{String, Char}, value; override::Bool = false)
     y = iterate(key)
     while y !== nothing
         c, i = y
@@ -1563,7 +1663,7 @@ function add_nested_key!(keymap::Dict, key::Union{String, Char}, value; override
         elseif !(c in keys(keymap) && isa(keymap[c], Dict))
             keymap[c] = Dict{Char,Any}()
         end
-        keymap = keymap[c]
+        keymap = keymap[c]::Dict{Char, Any}
     end
 end
 
@@ -1708,7 +1808,7 @@ end
 function getEntry(keymap::Dict{Char,Any},key::Union{String,Char})
     v = keymap
     for c in key
-        if !haskey(v,c)
+        if !(haskey(v,c)::Bool)
             return nothing
         end
         v = v[c]
@@ -2217,7 +2317,7 @@ end
 
 function move_line_end(buf::IOBuffer)
     eof(buf) && return
-    pos = findnext(isequal(UInt8('\n')), buf.data, position(buf)+1)
+    @views pos = findnext(isequal(UInt8('\n')), buf.data[1:buf.size], position(buf)+1)
     if pos === nothing
         move_input_end(buf)
         return
@@ -2394,8 +2494,8 @@ AnyDict(
     "\e\n" => "\e\r",
     "^_" => (s::MIState,o...)->edit_undo!(s),
     "\e_" => (s::MIState,o...)->edit_redo!(s),
-    # Simply insert it into the buffer by default
-    "*" => (s::MIState,data,c::StringLike)->(edit_insert(s, c)),
+    # Show hints at what tab complete would do by default
+    "*" => (s::MIState,data,c::StringLike)->(edit_insert(s, c); check_for_hint(s) && refresh_line(s)),
     "^U" => (s::MIState,o...)->edit_kill_line_backwards(s),
     "^K" => (s::MIState,o...)->edit_kill_line_forwards(s),
     "^Y" => (s::MIState,o...)->edit_yank(s),
@@ -2586,6 +2686,9 @@ function Prompt(prompt
     ;
     prompt_prefix = "",
     prompt_suffix = "",
+    output_prefix = "",
+    output_prefix_prefix = "",
+    output_prefix_suffix = "",
     keymap_dict = default_keymap_dict,
     repl = nothing,
     complete = EmptyCompletionProvider(),
@@ -2594,14 +2697,14 @@ function Prompt(prompt
     hist = EmptyHistoryProvider(),
     sticky = false)
 
-    return Prompt(prompt, prompt_prefix, prompt_suffix, keymap_dict, repl,
-        complete, on_enter, on_done, hist, sticky)
+    return Prompt(prompt, prompt_prefix, prompt_suffix, output_prefix, output_prefix_prefix, output_prefix_suffix,
+                   keymap_dict, repl, complete, on_enter, on_done, hist, sticky)
 end
 
 run_interface(::Prompt) = nothing
 
 init_state(terminal, prompt::Prompt) =
-    PromptState(terminal, prompt, IOBuffer(), :off, IOBuffer[], 1, InputAreaState(1, 1),
+    PromptState(terminal, prompt, IOBuffer(), :off, nothing, IOBuffer[], 1, InputAreaState(1, 1),
                 #=indent(spaces)=# -1, Threads.SpinLock(), 0.0, -Inf, nothing)
 
 function init_state(terminal, m::ModalInterface)
diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index 4a5246301cf43..8c55883a04fb9 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -3,17 +3,36 @@
 """
 Run Evaluate Print Loop (REPL)
 
-    Example minimal code
-    ```
-    import REPL
-    term = REPL.Terminals.TTYTerminal("dumb", stdin, stdout, stderr)
-    repl = REPL.LineEditREPL(term, true)
-    REPL.run_repl(repl)
-    ```
+Example minimal code
+
+```julia
+import REPL
+term = REPL.Terminals.TTYTerminal("dumb", stdin, stdout, stderr)
+repl = REPL.LineEditREPL(term, true)
+REPL.run_repl(repl)
+```
 """
 module REPL
 
+const PRECOMPILE_STATEMENTS = Vector{String}()
+
+function __init__()
+    Base.REPL_MODULE_REF[] = REPL
+    # We can encounter the situation where the sub-ordinate process used
+    # during precompilation of REPL, can load a valid cache-file.
+    # We need to replay the statements such that the parent process
+    # can also include those. See JuliaLang/julia#51532
+    if Base.JLOptions().trace_compile !== C_NULL && !isempty(PRECOMPILE_STATEMENTS)
+        for statement in PRECOMPILE_STATEMENTS
+            ccall(:jl_write_precompile_statement, Cvoid, (Cstring,), statement)
+        end
+    else
+        empty!(PRECOMPILE_STATEMENTS)
+    end
+end
+
 Base.Experimental.@optlevel 1
+Base.Experimental.@max_methods 1
 
 using Base.Meta, Sockets
 import InteractiveUtils
@@ -70,10 +89,6 @@ include("docview.jl")
 
 @nospecialize # use only declared type signatures
 
-function __init__()
-    Base.REPL_MODULE_REF[] = REPL
-end
-
 answer_color(::AbstractREPL) = ""
 
 const JULIA_PROMPT = "julia> "
@@ -151,7 +166,7 @@ function eval_user_input(@nospecialize(ast), backend::REPLBackend, mod::Module)
                 end
                 value = Core.eval(mod, ast)
                 backend.in_eval = false
-                setglobal!(mod, :ans, value)
+                setglobal!(Base.MainInclude, :ans, value)
                 put!(backend.response_channel, Pair{Any, Bool}(value, false))
             end
             break
@@ -179,8 +194,8 @@ function check_for_missing_packages_and_run_hooks(ast)
 end
 
 function modules_to_be_loaded(ast::Expr, mods::Vector{Symbol} = Symbol[])
-    ast.head == :quote && return mods # don't search if it's not going to be run during this eval
-    if ast.head in [:using, :import]
+    ast.head === :quote && return mods # don't search if it's not going to be run during this eval
+    if ast.head === :using || ast.head === :import
         for arg in ast.args
             arg = arg::Expr
             arg1 = first(arg.args)
@@ -250,17 +265,22 @@ function repl_backend_loop(backend::REPLBackend, get_module::Function)
     return nothing
 end
 
-struct REPLDisplay{R<:AbstractREPL} <: AbstractDisplay
-    repl::R
+struct REPLDisplay{Repl<:AbstractREPL} <: AbstractDisplay
+    repl::Repl
 end
 
-==(a::REPLDisplay, b::REPLDisplay) = a.repl === b.repl
-
 function display(d::REPLDisplay, mime::MIME"text/plain", x)
     x = Ref{Any}(x)
     with_repl_linfo(d.repl) do io
         io = IOContext(io, :limit => true, :module => active_module(d)::Module)
-        get(io, :color, false) && write(io, answer_color(d.repl))
+        if d.repl isa LineEditREPL
+            mistate = d.repl.mistate
+            mode = LineEdit.mode(mistate)
+            if mode isa LineEdit.Prompt
+                LineEdit.write_output_prefix(io, mode, get(io, :color, false)::Bool)
+            end
+        end
+        get(io, :color, false)::Bool && write(io, answer_color(d.repl))
         if isdefined(d.repl, :options) && isdefined(d.repl.options, :iocontext)
             # this can override the :limit property set initially
             io = foldl(IOContext, d.repl.options.iocontext, init=io)
@@ -280,6 +300,19 @@ function print_response(repl::AbstractREPL, response, show_value::Bool, have_col
     end
     return nothing
 end
+
+function repl_display_error(errio::IO, @nospecialize errval)
+    # this will be set to true if types in the stacktrace are truncated
+    limitflag = Ref(false)
+    errio = IOContext(errio, :stacktrace_types_limited => limitflag)
+    Base.invokelatest(Base.display_error, errio, errval)
+    if limitflag[]
+        print(errio, "Some type information was truncated. Use `show(err)` to see complete types.")
+        println(errio)
+    end
+    return nothing
+end
+
 function print_response(errio::IO, response, show_value::Bool, have_color::Bool, specialdisplay::Union{AbstractDisplay,Nothing}=nothing)
     Base.sigatomic_begin()
     val, iserr = response
@@ -288,8 +321,8 @@ function print_response(errio::IO, response, show_value::Bool, have_color::Bool,
             Base.sigatomic_end()
             if iserr
                 val = Base.scrub_repl_backtrace(val)
-                Base.istrivialerror(val) || setglobal!(Main, :err, val)
-                Base.invokelatest(Base.display_error, errio, val)
+                Base.istrivialerror(val) || setglobal!(Base.MainInclude, :err, val)
+                repl_display_error(errio, val)
             else
                 if val !== nothing && show_value
                     try
@@ -311,8 +344,8 @@ function print_response(errio::IO, response, show_value::Bool, have_color::Bool,
                 println(errio, "SYSTEM (REPL): showing an error caused an error")
                 try
                     excs = Base.scrub_repl_backtrace(current_exceptions())
-                    setglobal!(Main, :err, excs)
-                    Base.invokelatest(Base.display_error, errio, excs)
+                    setglobal!(Base.MainInclude, :err, excs)
+                    repl_display_error(errio, excs)
                 catch e
                     # at this point, only print the name of the type as a Symbol to
                     # minimize the possibility of further errors.
@@ -354,8 +387,7 @@ end
 
     consumer is an optional function that takes a REPLBackend as an argument
 """
-function run_repl(repl::AbstractREPL, @nospecialize(consumer = x -> nothing); backend_on_current_task::Bool = true)
-    backend = REPLBackend()
+function run_repl(repl::AbstractREPL, @nospecialize(consumer = x -> nothing); backend_on_current_task::Bool = true, backend = REPLBackend())
     backend_ref = REPLBackendRef(backend)
     cleanup = @task try
             destroy(backend_ref, t)
@@ -384,6 +416,7 @@ end
 mutable struct BasicREPL <: AbstractREPL
     terminal::TextTerminal
     waserror::Bool
+    frontend_task::Task
     BasicREPL(t) = new(t, false)
 end
 
@@ -391,6 +424,7 @@ outstream(r::BasicREPL) = r.terminal
 hascolor(r::BasicREPL) = hascolor(r.terminal)
 
 function run_frontend(repl::BasicREPL, backend::REPLBackendRef)
+    repl.frontend_task = current_task()
     d = REPLDisplay(repl)
     dopushdisplay = !in(d,Base.Multimedia.displays)
     dopushdisplay && pushdisplay(d)
@@ -457,6 +491,7 @@ mutable struct LineEditREPL <: AbstractREPL
     last_shown_line_infos::Vector{Tuple{String,Int}}
     interface::ModalInterface
     backendref::REPLBackendRef
+    frontend_task::Task
     function LineEditREPL(t,hascolor,prompt_color,input_color,answer_color,shell_color,help_color,history_file,in_shell,in_help,envcolors)
         opts = Options()
         opts.hascolor = hascolor
@@ -467,7 +502,7 @@ mutable struct LineEditREPL <: AbstractREPL
             in_help,envcolors,false,nothing, opts, nothing, Tuple{String,Int}[])
     end
 end
-outstream(r::LineEditREPL) = r.t isa TTYTerminal ? r.t.out_stream : r.t
+outstream(r::LineEditREPL) = (t = r.t; t isa TTYTerminal ? t.out_stream : t)
 specialdisplay(r::LineEditREPL) = r.specialdisplay
 specialdisplay(r::AbstractREPL) = nothing
 terminal(r::LineEditREPL) = r.t
@@ -491,11 +526,18 @@ REPLCompletionProvider() = REPLCompletionProvider(LineEdit.Modifiers())
 mutable struct ShellCompletionProvider <: CompletionProvider end
 struct LatexCompletions <: CompletionProvider end
 
-active_module(repl::LineEditREPL) = repl.mistate === nothing ? Main : repl.mistate.active_module
+function active_module() # this method is also called from Base
+    isdefined(Base, :active_repl) || return Main
+    return active_module(Base.active_repl::AbstractREPL)
+end
+active_module((; mistate)::LineEditREPL) = mistate === nothing ? Main : mistate.active_module
 active_module(::AbstractREPL) = Main
 active_module(d::REPLDisplay) = active_module(d.repl)
 
+setmodifiers!(c::CompletionProvider, m::LineEdit.Modifiers) = nothing
+
 setmodifiers!(c::REPLCompletionProvider, m::LineEdit.Modifiers) = c.modifiers = m
+
 """
     activate(mod::Module=Main)
 
@@ -503,9 +545,11 @@ Set `mod` as the default contextual module in the REPL,
 both for evaluating expressions and printing them.
 """
 function activate(mod::Module=Main)
-    Base.active_repl.mistate.active_module = mod
+    mistate = (Base.active_repl::LineEditREPL).mistate
+    mistate === nothing && return nothing
+    mistate.active_module = mod
     Base.load_InteractiveUtils(mod)
-    nothing
+    return nothing
 end
 
 beforecursor(buf::IOBuffer) = String(buf.data[1:buf.ptr-1])
@@ -1055,7 +1099,7 @@ function setup_interface(
 
     shell_prompt_len = length(SHELL_PROMPT)
     help_prompt_len = length(HELP_PROMPT)
-    jl_prompt_regex = r"^(?:\(.+\) )?julia> "
+    jl_prompt_regex = r"^In \[[0-9]+\]: |^(?:\(.+\) )?julia> "
     pkg_prompt_regex = r"^(?:\(.+\) )?pkg> "
 
     # Canonicalize user keymap input
@@ -1084,6 +1128,31 @@ function setup_interface(
                 edit_insert(s, '?')
             end
         end,
+        ']' => function (s::MIState,o...)
+            if isempty(s) || position(LineEdit.buffer(s)) == 0
+                pkgid = Base.PkgId(Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg")
+                if Base.locate_package(pkgid) !== nothing # Only try load Pkg if we can find it
+                    Pkg = Base.require(pkgid)
+                    # Pkg should have loaded its REPL mode by now, let's find it so we can transition to it.
+                    pkg_mode = nothing
+                    for mode in repl.interface.modes
+                        if mode isa LineEdit.Prompt && mode.complete isa Pkg.REPLMode.PkgCompletionProvider
+                            pkg_mode = mode
+                            break
+                        end
+                    end
+                    # TODO: Cache the `pkg_mode`?
+                    if pkg_mode !== nothing
+                        buf = copy(LineEdit.buffer(s))
+                        transition(s, pkg_mode) do
+                            LineEdit.state(s, pkg_mode).input_buffer = buf
+                        end
+                        return
+                    end
+                end
+            end
+            edit_insert(s, ']')
+        end,
 
         # Bracketed Paste Mode
         "\e[200~" => (s::MIState,o...)->begin
@@ -1237,7 +1306,7 @@ function setup_interface(
                 @goto writeback
             end
             try
-                InteractiveUtils.edit(linfos[n][1], linfos[n][2])
+                InteractiveUtils.edit(Base.fixup_stdlib_path(linfos[n][1]), linfos[n][2])
             catch ex
                 ex isa ProcessFailedException || ex isa Base.IOError || ex isa SystemError || rethrow()
                 @info "edit failed" _exception=ex
@@ -1269,6 +1338,7 @@ function setup_interface(
 end
 
 function run_frontend(repl::LineEditREPL, backend::REPLBackendRef)
+    repl.frontend_task = current_task()
     d = REPLDisplay(repl)
     dopushdisplay = repl.specialdisplay === nothing && !in(d,Base.Multimedia.displays)
     dopushdisplay && pushdisplay(d)
@@ -1294,6 +1364,7 @@ mutable struct StreamREPL <: AbstractREPL
     input_color::String
     answer_color::String
     waserror::Bool
+    frontend_task::Task
     StreamREPL(stream,pc,ic,ac) = new(stream,pc,ic,ac,false)
 end
 StreamREPL(stream::IO) = StreamREPL(stream, Base.text_colors[:green], Base.input_color(), Base.answer_color())
@@ -1351,9 +1422,80 @@ ends_with_semicolon(code::AbstractString) = ends_with_semicolon(String(code))
 ends_with_semicolon(code::Union{String,SubString{String}}) =
     contains(_rm_strings_and_comments(code), r";\s*$")
 
+function banner(io::IO = stdout; short = false)
+    if Base.GIT_VERSION_INFO.tagged_commit
+        commit_string = Base.TAGGED_RELEASE_BANNER
+    elseif isempty(Base.GIT_VERSION_INFO.commit)
+        commit_string = ""
+    else
+        days = Int(floor((ccall(:jl_clock_now, Float64, ()) - Base.GIT_VERSION_INFO.fork_master_timestamp) / (60 * 60 * 24)))
+        days = max(0, days)
+        unit = days == 1 ? "day" : "days"
+        distance = Base.GIT_VERSION_INFO.fork_master_distance
+        commit = Base.GIT_VERSION_INFO.commit_short
+
+        if distance == 0
+            commit_string = "Commit $(commit) ($(days) $(unit) old master)"
+        else
+            branch = Base.GIT_VERSION_INFO.branch
+            commit_string = "$(branch)/$(commit) (fork: $(distance) commits, $(days) $(unit))"
+        end
+    end
+
+    commit_date = isempty(Base.GIT_VERSION_INFO.date_string) ? "" : " ($(split(Base.GIT_VERSION_INFO.date_string)[1]))"
+
+    if get(io, :color, false)::Bool
+        c = Base.text_colors
+        tx = c[:normal] # text
+        jl = c[:normal] # julia
+        d1 = c[:bold] * c[:blue]    # first dot
+        d2 = c[:bold] * c[:red]     # second dot
+        d3 = c[:bold] * c[:green]   # third dot
+        d4 = c[:bold] * c[:magenta] # fourth dot
+
+        if short
+            print(io,"""
+              $(d3)o$(tx)  | Version $(VERSION)$(commit_date)
+             $(d2)o$(tx) $(d4)o$(tx) | $(commit_string)
+            """)
+        else
+            print(io,"""               $(d3)_$(tx)
+               $(d1)_$(tx)       $(jl)_$(tx) $(d2)_$(d3)(_)$(d4)_$(tx)     |  Documentation: https://docs.julialang.org
+              $(d1)(_)$(jl)     | $(d2)(_)$(tx) $(d4)(_)$(tx)    |
+               $(jl)_ _   _| |_  __ _$(tx)   |  Type \"?\" for help, \"]?\" for Pkg help.
+              $(jl)| | | | | | |/ _` |$(tx)  |
+              $(jl)| | |_| | | | (_| |$(tx)  |  Version $(VERSION)$(commit_date)
+             $(jl)_/ |\\__'_|_|_|\\__'_|$(tx)  |  $(commit_string)
+            $(jl)|__/$(tx)                   |
+
+            """)
+        end
+    else
+        if short
+            print(io,"""
+              o  |  Version $(VERSION)$(commit_date)
+             o o |  $(commit_string)
+            """)
+        else
+            print(io,"""
+                           _
+               _       _ _(_)_     |  Documentation: https://docs.julialang.org
+              (_)     | (_) (_)    |
+               _ _   _| |_  __ _   |  Type \"?\" for help, \"]?\" for Pkg help.
+              | | | | | | |/ _` |  |
+              | | |_| | | | (_| |  |  Version $(VERSION)$(commit_date)
+             _/ |\\__'_|_|_|\\__'_|  |  $(commit_string)
+            |__/                   |
+
+            """)
+        end
+    end
+end
+
 function run_frontend(repl::StreamREPL, backend::REPLBackendRef)
+    repl.frontend_task = current_task()
     have_color = hascolor(repl)
-    Base.banner(repl.stream)
+    banner(repl.stream)
     d = REPLDisplay(repl)
     dopushdisplay = !in(d,Base.Multimedia.displays)
     dopushdisplay && pushdisplay(d)
@@ -1381,4 +1523,108 @@ function run_frontend(repl::StreamREPL, backend::REPLBackendRef)
     nothing
 end
 
+module Numbered
+
+using ..REPL
+
+__current_ast_transforms() = isdefined(Base, :active_repl_backend) ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
+
+function repl_eval_counter(hp)
+    return length(hp.history) - hp.start_idx
+end
+
+function out_transform(@nospecialize(x), n::Ref{Int})
+    return Expr(:toplevel, get_usings!([], x)..., quote
+        let __temp_val_a72df459 = $x
+            $capture_result($n, __temp_val_a72df459)
+            __temp_val_a72df459
+        end
+    end)
+end
+
+function get_usings!(usings, ex)
+    ex isa Expr || return usings
+    # get all `using` and `import` statements which are at the top level
+    for (i, arg) in enumerate(ex.args)
+        if Base.isexpr(arg, :toplevel)
+            get_usings!(usings, arg)
+        elseif Base.isexpr(arg, [:using, :import])
+            push!(usings, popat!(ex.args, i))
+        end
+    end
+    return usings
+end
+
+function capture_result(n::Ref{Int}, @nospecialize(x))
+    n = n[]
+    mod = Base.MainInclude
+    if !isdefined(mod, :Out)
+        @eval mod global Out
+        @eval mod export Out
+        setglobal!(mod, :Out, Dict{Int, Any}())
+    end
+    if x !== getglobal(mod, :Out) && x !== nothing # remove this?
+        getglobal(mod, :Out)[n] = x
+    end
+    nothing
+end
+
+function set_prompt(repl::LineEditREPL, n::Ref{Int})
+    julia_prompt = repl.interface.modes[1]
+    julia_prompt.prompt = function()
+        n[] = repl_eval_counter(julia_prompt.hist)+1
+        string("In [", n[], "]: ")
+    end
+    nothing
+end
+
+function set_output_prefix(repl::LineEditREPL, n::Ref{Int})
+    julia_prompt = repl.interface.modes[1]
+    if REPL.hascolor(repl)
+        julia_prompt.output_prefix_prefix = Base.text_colors[:red]
+    end
+    julia_prompt.output_prefix = () -> string("Out[", n[], "]: ")
+    nothing
+end
+
+function __current_ast_transforms(backend)
+    if backend === nothing
+        isdefined(Base, :active_repl_backend) ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
+    else
+        backend.ast_transforms
+    end
+end
+
+
+function numbered_prompt!(repl::LineEditREPL=Base.active_repl, backend=nothing)
+    n = Ref{Int}(0)
+    set_prompt(repl, n)
+    set_output_prefix(repl, n)
+    push!(__current_ast_transforms(backend), @nospecialize(ast) -> out_transform(ast, n))
+    return
+end
+
+"""
+    Out[n]
+
+A variable referring to all previously computed values, automatically imported to the interactive prompt.
+Only defined and exists while using [Numbered prompt](@ref Numbered-prompt).
+
+See also [`ans`](@ref).
+"""
+Base.MainInclude.Out
+
+end
+
+import .Numbered.numbered_prompt!
+
+# this assignment won't survive precompilation,
+# but will stick if REPL is baked into a sysimg.
+# Needs to occur after this module is finished.
+Base.REPL_MODULE_REF[] = REPL
+
+if Base.generating_output()
+    include("precompile.jl")
+end
+
 end # module
diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
index 295fd5ae64229..da5d20270c5b6 100644
--- a/stdlib/REPL/src/REPLCompletions.jl
+++ b/stdlib/REPL/src/REPLCompletions.jl
@@ -4,8 +4,10 @@ module REPLCompletions
 
 export completions, shell_completions, bslash_completions, completion_text
 
+using Core: CodeInfo, MethodInstance, CodeInstance, Const
+const CC = Core.Compiler
 using Base.Meta
-using Base: propertynames, something
+using Base: propertynames, something, IdSet
 
 abstract type Completion end
 
@@ -17,6 +19,10 @@ struct KeywordCompletion <: Completion
     keyword::String
 end
 
+struct KeyvalCompletion <: Completion
+    keyval::String
+end
+
 struct PathCompletion <: Completion
     path::String
 end
@@ -59,6 +65,10 @@ struct DictCompletion <: Completion
     key::String
 end
 
+struct KeywordArgumentCompletion <: Completion
+    kwarg::String
+end
+
 # interface definition
 function Base.getproperty(c::Completion, name::Symbol)
     if name === :text
@@ -85,21 +95,25 @@ function Base.getproperty(c::Completion, name::Symbol)
         return getfield(c, :text)::String
     elseif name === :key
         return getfield(c, :key)::String
+    elseif name === :kwarg
+        return getfield(c, :kwarg)::String
     end
     return getfield(c, name)
 end
 
 _completion_text(c::TextCompletion) = c.text
 _completion_text(c::KeywordCompletion) = c.keyword
+_completion_text(c::KeyvalCompletion) = c.keyval
 _completion_text(c::PathCompletion) = c.path
 _completion_text(c::ModuleCompletion) = c.mod
 _completion_text(c::PackageCompletion) = c.package
-_completion_text(c::PropertyCompletion) = string(c.property)
-_completion_text(c::FieldCompletion) = string(c.field)
+_completion_text(c::PropertyCompletion) = sprint(Base.show_sym, c.property)
+_completion_text(c::FieldCompletion) = sprint(Base.show_sym, c.field)
 _completion_text(c::MethodCompletion) = repr(c.method)
 _completion_text(c::BslashCompletion) = c.bslash
 _completion_text(c::ShellCompletion) = c.text
 _completion_text(c::DictCompletion) = c.key
+_completion_text(c::KeywordArgumentCompletion) = c.kwarg*'='
 
 completion_text(c) = _completion_text(c)::String
 
@@ -110,7 +124,8 @@ function completes_global(x, name)
 end
 
 function appendmacro!(syms, macros, needle, endchar)
-    for s in macros
+    for macsym in macros
+        s = String(macsym)
         if endswith(s, needle)
             from = nextind(s, firstindex(s))
             to = prevind(s, sizeof(s)-sizeof(needle)+1)
@@ -121,44 +136,38 @@ end
 
 function filtered_mod_names(ffunc::Function, mod::Module, name::AbstractString, all::Bool = false, imported::Bool = false)
     ssyms = names(mod, all = all, imported = imported)
+    all || filter!(Base.Fix1(Base.isexported, mod), ssyms)
     filter!(ffunc, ssyms)
-    syms = String[string(s) for s in ssyms]
-    macros =  filter(x -> startswith(x, "@" * name), syms)
+    macros = filter(x -> startswith(String(x), "@" * name), ssyms)
+    syms = String[sprint((io,s)->Base.show_sym(io, s; allow_macroname=true), s) for s in ssyms if completes_global(String(s), name)]
     appendmacro!(syms, macros, "_str", "\"")
     appendmacro!(syms, macros, "_cmd", "`")
-    filter!(x->completes_global(x, name), syms)
     return [ModuleCompletion(mod, sym) for sym in syms]
 end
 
 # REPL Symbol Completions
-function complete_symbol(sym::String, @nospecialize(ffunc), context_module::Module=Main)
+function complete_symbol(@nospecialize(ex), name::String, @nospecialize(ffunc), context_module::Module=Main)
     mod = context_module
-    name = sym
 
     lookup_module = true
     t = Union{}
     val = nothing
-    if something(findlast(in(non_identifier_chars), sym), 0) < something(findlast(isequal('.'), sym), 0)
-        # Find module
-        lookup_name, name = rsplit(sym, ".", limit=2)
-
-        ex = Meta.parse(lookup_name, raise=false, depwarn=false)
-
-        b, found = get_value(ex, context_module)
-        if found
-            val = b
-            if isa(b, Module)
-                mod = b
+    if ex !== nothing
+        res = repl_eval_ex(ex, context_module)
+        res === nothing && return Completion[]
+        if res isa Const
+            val = res.val
+            if isa(val, Module)
+                mod = val
                 lookup_module = true
             else
                 lookup_module = false
-                t = typeof(b)
+                t = typeof(val)
             end
-        else # If the value is not found using get_value, the expression contain an advanced expression
+        else
             lookup_module = false
-            t, found = get_type(ex, context_module)
+            t = CC.widenconst(res)
         end
-        found || return Completion[]
     end
 
     suggestions = Completion[]
@@ -167,7 +176,7 @@ function complete_symbol(sym::String, @nospecialize(ffunc), context_module::Modu
         # as excluding Main.Main.Main, etc., because that's most likely not what
         # the user wants
         p = let mod=mod, modname=nameof(mod)
-            s->(!Base.isdeprecated(mod, s) && s != modname && ffunc(mod, s)::Bool)
+            (s::Symbol) -> !Base.isdeprecated(mod, s) && s != modname && ffunc(mod, s)::Bool && !(mod === Main && s === :MainInclude)
         end
         # Looking for a binding in a module
         if mod == context_module
@@ -187,48 +196,85 @@ function complete_symbol(sym::String, @nospecialize(ffunc), context_module::Modu
                 push!(suggestions, PropertyCompletion(val, property))
             end
         end
-    else
+    elseif field_completion_eligible(t)
         # Looking for a member of a type
-        if t isa DataType && t != Any
-            # Check for cases like Type{typeof(+)}
-            if t isa DataType && t.name === Base._TYPE_NAME
-                t = typeof(t.parameters[1])
-            end
-            # Only look for fields if this is a concrete type
-            if isconcretetype(t)
-                fields = fieldnames(t)
-                for field in fields
-                    s = string(field)
-                    if startswith(s, name)
-                        push!(suggestions, FieldCompletion(t, field))
-                    end
-                end
+        add_field_completions!(suggestions, name, t)
+    end
+    return suggestions
+end
+
+function add_field_completions!(suggestions::Vector{Completion}, name::String, @nospecialize(t))
+    if isa(t, Union)
+        add_field_completions!(suggestions, name, t.a)
+        add_field_completions!(suggestions, name, t.b)
+    else
+        @assert isconcretetype(t)
+        fields = fieldnames(t)
+        for field in fields
+            isa(field, Symbol) || continue # Tuple type has ::Int field name
+            s = string(field)
+            if startswith(s, name)
+                push!(suggestions, FieldCompletion(t, field))
             end
         end
     end
-    suggestions
+end
+
+const GENERIC_PROPERTYNAMES_METHOD = which(propertynames, (Any,))
+
+function field_completion_eligible(@nospecialize t)
+    if isa(t, Union)
+        return field_completion_eligible(t.a) && field_completion_eligible(t.b)
+    end
+    isconcretetype(t) || return false
+    # field completion is correct only when `getproperty` fallbacks to `getfield`
+    match = Base._which(Tuple{typeof(propertynames),t}; raise=false)
+    match === nothing && return false
+    return match.method === GENERIC_PROPERTYNAMES_METHOD
+end
+
+function complete_from_list(T::Type, list::Vector{String}, s::Union{String,SubString{String}})
+    r = searchsorted(list, s)
+    i = first(r)
+    n = length(list)
+    while i <= n && startswith(list[i],s)
+        r = first(r):i
+        i += 1
+    end
+    Completion[T(kw) for kw in list[r]]
 end
 
 const sorted_keywords = [
     "abstract type", "baremodule", "begin", "break", "catch", "ccall",
-    "const", "continue", "do", "else", "elseif", "end", "export", "false",
+    "const", "continue", "do", "else", "elseif", "end", "export",
     "finally", "for", "function", "global", "if", "import",
     "let", "local", "macro", "module", "mutable struct",
     "primitive type", "quote", "return", "struct",
-    "true", "try", "using", "while"]
+    "try", "using", "while"]
 
-function complete_keyword(s::Union{String,SubString{String}})
-    r = searchsorted(sorted_keywords, s)
-    i = first(r)
-    n = length(sorted_keywords)
-    while i <= n && startswith(sorted_keywords[i],s)
-        r = first(r):i
-        i += 1
-    end
-    Completion[KeywordCompletion(kw) for kw in sorted_keywords[r]]
+complete_keyword(s::Union{String,SubString{String}}) = complete_from_list(KeywordCompletion, sorted_keywords, s)
+
+const sorted_keyvals = ["false", "true"]
+
+complete_keyval(s::Union{String,SubString{String}}) = complete_from_list(KeyvalCompletion, sorted_keyvals, s)
+
+function do_raw_escape(s)
+    # escape_raw_string with delim='`' and ignoring the rule for the ending \
+    return replace(s, r"(\\+)`" => s"\1\\`")
+end
+function do_shell_escape(s)
+    return Base.shell_escape_posixly(s)
+end
+function do_string_escape(s)
+    return escape_string(s, ('\"','$'))
 end
 
-function complete_path(path::AbstractString, pos::Int; use_envpath=false, shell_escape=false)
+function complete_path(path::AbstractString;
+                       use_envpath=false,
+                       shell_escape=false,
+                       raw_escape=false,
+                       string_escape=false)
+    @assert !(shell_escape && string_escape)
     if Base.Sys.isunix() && occursin(r"^~(?:/|$)", path)
         # if the path is just "~", don't consider the expanded username as a prefix
         if path == "~"
@@ -239,50 +285,49 @@ function complete_path(path::AbstractString, pos::Int; use_envpath=false, shell_
     else
         dir, prefix = splitdir(path)
     end
-    local files
-    try
+    files = try
         if isempty(dir)
-            files = readdir()
+            readdir()
         elseif isdir(dir)
-            files = readdir(dir)
+            readdir(dir)
         else
-            return Completion[], 0:-1, false
+            return Completion[], dir, false
         end
-    catch
-        return Completion[], 0:-1, false
+    catch ex
+        ex isa Base.IOError || rethrow()
+        return Completion[], dir, false
     end
 
     matches = Set{String}()
     for file in files
         if startswith(file, prefix)
-            id = try isdir(joinpath(dir, file)) catch; false end
-            # joinpath is not used because windows needs to complete with double-backslash
-            push!(matches, id ? file * (@static Sys.iswindows() ? "\\\\" : "/") : file)
+            p = joinpath(dir, file)
+            is_dir = try isdir(p) catch ex; ex isa Base.IOError ? false : rethrow() end
+            push!(matches, is_dir ? file * "/" : file)
         end
     end
 
-    if use_envpath && length(dir) == 0
+    if use_envpath && isempty(dir)
         # Look for files in PATH as well
-        local pathdirs = split(ENV["PATH"], @static Sys.iswindows() ? ";" : ":")
+        pathdirs = split(ENV["PATH"], @static Sys.iswindows() ? ";" : ":")
 
         for pathdir in pathdirs
-            local actualpath
-            try
-                actualpath = realpath(pathdir)
-            catch
+            actualpath = try
+                realpath(pathdir)
+            catch ex
+                ex isa Base.IOError || rethrow()
                 # Bash doesn't expect every folder in PATH to exist, so neither shall we
                 continue
             end
 
-            if actualpath != pathdir && in(actualpath,pathdirs)
+            if actualpath != pathdir && in(actualpath, pathdirs)
                 # Remove paths which (after resolving links) are in the env path twice.
                 # Many distros eg. point /bin to /usr/bin but have both in the env path.
                 continue
             end
 
-            local filesinpath
-            try
-                filesinpath = readdir(pathdir)
+            filesinpath = try
+                readdir(pathdir)
             catch e
                 # Bash allows dirs in PATH that can't be read, so we should as well.
                 if isa(e, Base.IOError) || isa(e, Base.ArgumentError)
@@ -296,52 +341,75 @@ function complete_path(path::AbstractString, pos::Int; use_envpath=false, shell_
             for file in filesinpath
                 # In a perfect world, we would filter on whether the file is executable
                 # here, or even on whether the current user can execute the file in question.
-                if startswith(file, prefix) && isfile(joinpath(pathdir, file))
-                    push!(matches, file)
+                try
+                    if startswith(file, prefix) && isfile(joinpath(pathdir, file))
+                        push!(matches, file)
+                    end
+                catch e
+                    # `isfile()` can throw in rare cases such as when probing a
+                    # symlink that points to a file within a directory we do not
+                    # have read access to.
+                    if isa(e, Base.IOError)
+                        continue
+                    else
+                        rethrow()
+                    end
                 end
             end
         end
     end
 
-    matchList = Completion[PathCompletion(shell_escape ? replace(s, r"\s" => s"\\\0") : s) for s in matches]
-    startpos = pos - lastindex(prefix) + 1 - count(isequal(' '), prefix)
-    # The pos - lastindex(prefix) + 1 is correct due to `lastindex(prefix)-lastindex(prefix)==0`,
-    # hence we need to add one to get the first index. This is also correct when considering
-    # pos, because pos is the `lastindex` a larger string which `endswith(path)==true`.
-    return matchList, startpos:pos, !isempty(matchList)
+    matches = ((shell_escape ? do_shell_escape(s) : string_escape ? do_string_escape(s) : s) for s in matches)
+    matches = ((raw_escape ? do_raw_escape(s) : s) for s in matches)
+    matches = Completion[PathCompletion(s) for s in matches]
+    return matches, dir, !isempty(matches)
 end
 
-function complete_expanduser(path::AbstractString, r)
-    expanded = expanduser(path)
-    return Completion[PathCompletion(expanded)], r, path != expanded
+function complete_path(path::AbstractString,
+                       pos::Int;
+                       use_envpath=false,
+                       shell_escape=false,
+                       string_escape=false)
+    ## TODO: enable this depwarn once Pkg is fixed
+    #Base.depwarn("complete_path with pos argument is deprecated because the return value [2] is incorrect to use", :complete_path)
+    paths, dir, success = complete_path(path; use_envpath, shell_escape, string_escape)
+    if Base.Sys.isunix() && occursin(r"^~(?:/|$)", path)
+        # if the path is just "~", don't consider the expanded username as a prefix
+        if path == "~"
+            dir, prefix = homedir(), ""
+        else
+            dir, prefix = splitdir(homedir() * path[2:end])
+        end
+    else
+        dir, prefix = splitdir(path)
+    end
+    startpos = pos - lastindex(prefix) + 1
+    Sys.iswindows() && map!(paths, paths) do c::PathCompletion
+        # emulation for unnecessarily complicated return value, since / is a
+        # perfectly acceptable path character which does not require quoting
+        # but is required by Pkg's awkward parser handling
+        return endswith(c.path, "/") ? PathCompletion(chop(c.path) * "\\\\") : c
+    end
+    return paths, startpos:pos, success
 end
 
-# Determines whether method_complete should be tried. It should only be done if
-# the string endswiths ',' or '(' when disregarding whitespace_chars
-function should_method_complete(s::AbstractString)
-    method_complete = false
-    for c in reverse(s)
-        if c in [',', '(', ';']
-            method_complete = true
-            break
-        elseif !(c in whitespace_chars)
-            method_complete = false
-            break
+function complete_expanduser(path::AbstractString, r)
+    expanded =
+        try expanduser(path)
+        catch e
+            e isa ArgumentError || rethrow()
+            path
         end
-    end
-    method_complete
+    return Completion[PathCompletion(expanded)], r, path != expanded
 end
 
 # Returns a range that includes the method name in front of the first non
 # closed start brace from the end of the string.
 function find_start_brace(s::AbstractString; c_start='(', c_end=')')
-    braces = 0
     r = reverse(s)
     i = firstindex(r)
-    in_single_quotes = false
-    in_double_quotes = false
-    in_back_ticks = false
-    in_comment = 0
+    braces = in_comment = 0
+    in_single_quotes = in_double_quotes = in_back_ticks = false
     while i <= ncodeunits(r)
         c, i = iterate(r, i)
         if c == '#' && i <= ncodeunits(r) && iterate(r, i)[1] == '='
@@ -407,179 +475,286 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')')
     return (startind:lastindex(s), method_name_end)
 end
 
-# Returns the value in a expression if sym is defined in current namespace fn.
-# This method is used to iterate to the value of a expression like:
-# :(REPL.REPLCompletions.whitespace_chars) a `dump` of this expression
-# will show it consist of Expr, QuoteNode's and Symbol's which all needs to
-# be handled differently to iterate down to get the value of whitespace_chars.
-function get_value(sym::Expr, fn)
-    if sym.head === :quote || sym.head === :inert
-        return sym.args[1], true
-    end
-    sym.head !== :. && return (nothing, false)
-    for ex in sym.args
-        ex, found = get_value(ex, fn)
-        !found && return (nothing, false)
-        fn, found = get_value(ex, fn)
-        !found && return (nothing, false)
-    end
-    return (fn, true)
-end
-get_value(sym::Symbol, fn) = isdefined(fn, sym) ? (getfield(fn, sym), true) : (nothing, false)
-get_value(sym::QuoteNode, fn) = (sym.value, true)
-get_value(sym::GlobalRef, fn) = get_value(sym.name, sym.mod)
-get_value(sym, fn) = (sym, true)
-
-# Return the type of a getfield call expression
-function get_type_getfield(ex::Expr, fn::Module)
-    length(ex.args) == 3 || return Any, false # should never happen, but just for safety
-    fld, found = get_value(ex.args[3], fn)
-    fld isa Symbol || return Any, false
-    obj = ex.args[2]
-    objt, found = get_type(obj, fn)
-    found || return Any, false
-    objt isa DataType || return Any, false
-    hasfield(objt, fld) || return Any, false
-    return fieldtype(objt, fld), true
-end
-
-# Determines the return type with the Compiler of a function call using the type information of the arguments.
-function get_type_call(expr::Expr, fn::Module)
-    f_name = expr.args[1]
-    f, found = get_type(f_name, fn)
-    found || return (Any, false) # If the function f is not found return Any.
-    args = Any[]
-    for i in 2:length(expr.args) # Find the type of the function arguments
-        typ, found = get_type(expr.args[i], fn)
-        found ? push!(args, typ) : push!(args, Any)
-    end
-    world = Base.get_world_counter()
-    return_type = Core.Compiler.return_type(Tuple{f, args...}, world)
-    return (return_type, true)
-end
-
-# Returns the return type. example: get_type(:(Base.strip("", ' ')), Main) returns (SubString{String}, true)
-function try_get_type(sym::Expr, fn::Module)
-    val, found = get_value(sym, fn)
-    found && return Core.Typeof(val), found
-    if sym.head === :call
-        # getfield call is special cased as the evaluation of getfield provides good type information,
-        # is inexpensive and it is also performed in the complete_symbol function.
-        a1 = sym.args[1]
-        if a1 === :getfield || a1 === GlobalRef(Core, :getfield)
-            return get_type_getfield(sym, fn)
-        end
-        return get_type_call(sym, fn)
-    elseif sym.head === :thunk
-        thk = sym.args[1]
-        rt = ccall(:jl_infer_thunk, Any, (Any, Any), thk::Core.CodeInfo, fn)
-        rt !== Any && return (rt, true)
-    elseif sym.head === :ref
-        # some simple cases of `expand`
-        return try_get_type(Expr(:call, GlobalRef(Base, :getindex), sym.args...), fn)
-    elseif sym.head === :. && sym.args[2] isa QuoteNode # second check catches broadcasting
-        return try_get_type(Expr(:call, GlobalRef(Core, :getfield), sym.args...), fn)
-    elseif sym.head === :toplevel || sym.head === :block
-        isempty(sym.args) && return (nothing, true)
-        return try_get_type(sym.args[end], fn)
-    elseif sym.head === :escape || sym.head === :var"hygienic-scope"
-        return try_get_type(sym.args[1], fn)
-    end
-    return (Any, false)
-end
-
-try_get_type(other, fn::Module) = get_type(other, fn)
-
-function get_type(sym::Expr, fn::Module)
-    # try to analyze nests of calls. if this fails, try using the expanded form.
-    val, found = try_get_type(sym, fn)
-    found && return val, found
-    # https://github.com/JuliaLang/julia/issues/27184
-    if isexpr(sym, :macrocall)
-        _, found = get_type(first(sym.args), fn)
-        found || return Any, false
-    end
-    newsym = try
-        macroexpand(fn, sym; recursive=false)
-    catch e
-        # user code failed in macroexpand (ignore it)
-        return Any, false
+struct REPLInterpreterCache
+    dict::IdDict{MethodInstance,CodeInstance}
+end
+REPLInterpreterCache() = REPLInterpreterCache(IdDict{MethodInstance,CodeInstance}())
+const REPL_INTERPRETER_CACHE = REPLInterpreterCache()
+
+function get_code_cache()
+    # XXX Avoid storing analysis results into the cache that persists across precompilation,
+    #     as [sys|pkg]image currently doesn't support serializing externally created `CodeInstance`.
+    #     Otherwise, `CodeInstance`s created by `REPLInterpreter`, that are much less optimized
+    #     that those produced by `NativeInterpreter`, will leak into the native code cache,
+    #     potentially causing runtime slowdown.
+    #     (see https://github.com/JuliaLang/julia/issues/48453).
+    if Base.generating_output()
+        return REPLInterpreterCache()
+    else
+        return REPL_INTERPRETER_CACHE
     end
-    val, found = try_get_type(newsym, fn)
-    if !found
-        newsym = try
-            Meta.lower(fn, sym)
-        catch e
-            # user code failed in lowering (ignore it)
-            return Any, false
+end
+
+struct REPLInterpreter <: CC.AbstractInterpreter
+    limit_aggressive_inference::Bool
+    world::UInt
+    inf_params::CC.InferenceParams
+    opt_params::CC.OptimizationParams
+    inf_cache::Vector{CC.InferenceResult}
+    code_cache::REPLInterpreterCache
+    function REPLInterpreter(limit_aggressive_inference::Bool=false;
+                             world::UInt = Base.get_world_counter(),
+                             inf_params::CC.InferenceParams = CC.InferenceParams(;
+                                 aggressive_constant_propagation=true,
+                                 unoptimize_throw_blocks=false),
+                             opt_params::CC.OptimizationParams = CC.OptimizationParams(),
+                             inf_cache::Vector{CC.InferenceResult} = CC.InferenceResult[],
+                             code_cache::REPLInterpreterCache = get_code_cache())
+        return new(limit_aggressive_inference, world, inf_params, opt_params, inf_cache, code_cache)
+    end
+end
+CC.InferenceParams(interp::REPLInterpreter) = interp.inf_params
+CC.OptimizationParams(interp::REPLInterpreter) = interp.opt_params
+CC.get_world_counter(interp::REPLInterpreter) = interp.world
+CC.get_inference_cache(interp::REPLInterpreter) = interp.inf_cache
+CC.code_cache(interp::REPLInterpreter) = CC.WorldView(interp.code_cache, CC.WorldRange(interp.world))
+CC.get(wvc::CC.WorldView{REPLInterpreterCache}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
+CC.getindex(wvc::CC.WorldView{REPLInterpreterCache}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
+CC.haskey(wvc::CC.WorldView{REPLInterpreterCache}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
+function CC.setindex!(wvc::CC.WorldView{REPLInterpreterCache}, ci::CodeInstance, mi::MethodInstance)
+    CC.add_invalidation_callback!(mi) do replaced::MethodInstance, max_world::UInt32
+        delete!(wvc.cache.dict, replaced)
+    end
+    return setindex!(wvc.cache.dict, ci, mi)
+end
+
+# REPLInterpreter is only used for type analysis, so it should disable optimization entirely
+CC.may_optimize(::REPLInterpreter) = false
+
+# REPLInterpreter analyzes a top-level frame, so better to not bail out from it
+CC.bail_out_toplevel_call(::REPLInterpreter, ::CC.InferenceLoopState, ::CC.InferenceState) = false
+
+# `REPLInterpreter` aggressively resolves global bindings to enable reasonable completions
+# for lines like `Mod.a.|` (where `|` is the cursor position).
+# Aggressive binding resolution poses challenges for the inference cache validation
+# (until https://github.com/JuliaLang/julia/issues/40399 is implemented).
+# To avoid the cache validation issues, `REPLInterpreter` only allows aggressive binding
+# resolution for top-level frame representing REPL input code and for child uncached frames
+# that are constant propagated from the top-level frame ("repl-frame"s). This works, even if
+# those global bindings are not constant and may be mutated in the future, since:
+# a.) "repl-frame"s are never cached, and
+# b.) mutable values are never observed by any cached frames.
+#
+# `REPLInterpreter` also aggressively concrete evaluate `:inconsistent` calls within
+# "repl-frame" to provide reasonable completions for lines like `Ref(Some(42))[].|`.
+# Aggressive concrete evaluation allows us to get accurate type information about complex
+# expressions that otherwise can not be constant folded, in a safe way, i.e. it still
+# doesn't evaluate effectful expressions like `pop!(xs)`.
+# Similarly to the aggressive binding resolution, aggressive concrete evaluation doesn't
+# present any cache validation issues because "repl-frame" is never cached.
+
+# `REPLInterpreter` is specifically used by `repl_eval_ex`, where all top-level frames are
+# `repl_frame` always. However, this assumption wouldn't stand if `REPLInterpreter` were to
+# be employed, for instance, by `typeinf_ext_toplevel`.
+is_repl_frame(sv::CC.InferenceState) = sv.linfo.def isa Module && sv.cache_mode === CC.CACHE_MODE_NULL
+
+function is_call_graph_uncached(sv::CC.InferenceState)
+    CC.is_cached(sv) && return false
+    parent = sv.parent
+    parent === nothing && return true
+    return is_call_graph_uncached(parent::CC.InferenceState)
+end
+
+# aggressive global binding resolution within `repl_frame`
+function CC.abstract_eval_globalref(interp::REPLInterpreter, g::GlobalRef,
+                                    sv::CC.InferenceState)
+    if (interp.limit_aggressive_inference ? is_repl_frame(sv) : is_call_graph_uncached(sv))
+        if CC.isdefined_globalref(g)
+            return CC.RTEffects(Const(ccall(:jl_get_globalref_value, Any, (Any,), g)), Union{}, CC.EFFECTS_TOTAL)
         end
-        val, found = try_get_type(newsym, fn)
+        return CC.RTEffects(Union{}, UndefVarError, CC.EFFECTS_THROWS)
+    end
+    return @invoke CC.abstract_eval_globalref(interp::CC.AbstractInterpreter, g::GlobalRef,
+                                              sv::CC.InferenceState)
+end
+
+function is_repl_frame_getproperty(sv::CC.InferenceState)
+    def = sv.linfo.def
+    def isa Method || return false
+    def.name === :getproperty || return false
+    CC.is_cached(sv) && return false
+    return is_repl_frame(sv.parent)
+end
+
+# aggressive global binding resolution for `getproperty(::Module, ::Symbol)` calls within `repl_frame`
+function CC.builtin_tfunction(interp::REPLInterpreter, @nospecialize(f),
+                              argtypes::Vector{Any}, sv::CC.InferenceState)
+    if f === Core.getglobal && (interp.limit_aggressive_inference ? is_repl_frame_getproperty(sv) : is_call_graph_uncached(sv))
+        if length(argtypes) == 2
+            a1, a2 = argtypes
+            if isa(a1, Const) && isa(a2, Const)
+                a1val, a2val = a1.val, a2.val
+                if isa(a1val, Module) && isa(a2val, Symbol)
+                    g = GlobalRef(a1val, a2val)
+                    if CC.isdefined_globalref(g)
+                        return Const(ccall(:jl_get_globalref_value, Any, (Any,), g))
+                    end
+                    return Union{}
+                end
+            end
+        end
+    end
+    return @invoke CC.builtin_tfunction(interp::CC.AbstractInterpreter, f::Any,
+                                        argtypes::Vector{Any}, sv::CC.InferenceState)
+end
+
+# aggressive concrete evaluation for `:inconsistent` frames within `repl_frame`
+function CC.concrete_eval_eligible(interp::REPLInterpreter, @nospecialize(f),
+                                   result::CC.MethodCallResult, arginfo::CC.ArgInfo,
+                                   sv::CC.InferenceState)
+    if (interp.limit_aggressive_inference ? is_repl_frame(sv) : is_call_graph_uncached(sv))
+        neweffects = CC.Effects(result.effects; consistent=CC.ALWAYS_TRUE)
+        result = CC.MethodCallResult(result.rt, result.exct, result.edgecycle, result.edgelimited,
+                                     result.edge, neweffects)
+    end
+    ret = @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter, f::Any,
+                                            result::CC.MethodCallResult, arginfo::CC.ArgInfo,
+                                            sv::CC.InferenceState)
+    if ret === :semi_concrete_eval
+        # while the base eligibility check probably won't permit semi-concrete evaluation
+        # for `REPLInterpreter` (given it completely turns off optimization),
+        # this ensures we don't inadvertently enter irinterp
+        ret = :none
     end
-    return val, found
+    return ret
 end
 
-function get_type(sym, fn::Module)
-    val, found = get_value(sym, fn)
-    return found ? Core.Typeof(val) : Any, found
+# allow constant propagation for mutable constants
+function CC.const_prop_argument_heuristic(interp::REPLInterpreter, arginfo::CC.ArgInfo, sv::CC.InferenceState)
+    if !interp.limit_aggressive_inference
+        any(@nospecialize(a)->isa(a, Const), arginfo.argtypes) && return true # even if mutable
+    end
+    return @invoke CC.const_prop_argument_heuristic(interp::CC.AbstractInterpreter, arginfo::CC.ArgInfo, sv::CC.InferenceState)
+end
+
+function resolve_toplevel_symbols!(src::Core.CodeInfo, mod::Module)
+    @ccall jl_resolve_globals_in_ir(
+        #=jl_array_t *stmts=# src.code::Any,
+        #=jl_module_t *m=# mod::Any,
+        #=jl_svec_t *sparam_vals=# Core.svec()::Any,
+        #=int binding_effects=# 0::Int)::Cvoid
+    return src
 end
 
-function get_type(T, found::Bool, default_any::Bool)
-    return found ? T :
-           default_any ? Any : throw(ArgumentError("argument not found"))
+# lower `ex` and run type inference on the resulting top-level expression
+function repl_eval_ex(@nospecialize(ex), context_module::Module; limit_aggressive_inference::Bool=false)
+    if (isexpr(ex, :toplevel) || isexpr(ex, :tuple)) && !isempty(ex.args)
+        # get the inference result for the last expression
+        ex = ex.args[end]
+    end
+    lwr = try
+        Meta.lower(context_module, ex)
+    catch # macro expansion failed, etc.
+        return nothing
+    end
+    if lwr isa Symbol
+        return isdefined(context_module, lwr) ? Const(getfield(context_module, lwr)) : nothing
+    end
+    lwr isa Expr || return Const(lwr) # `ex` is literal
+    isexpr(lwr, :thunk) || return nothing # lowered to `Expr(:error, ...)` or similar
+    src = lwr.args[1]::Core.CodeInfo
+
+    # construct top-level `MethodInstance`
+    mi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ());
+    mi.specTypes = Tuple{}
+
+    mi.def = context_module
+    resolve_toplevel_symbols!(src, context_module)
+    @atomic mi.uninferred = src
+
+    interp = REPLInterpreter(limit_aggressive_inference)
+    result = CC.InferenceResult(mi)
+    frame = CC.InferenceState(result, src, #=cache=#:no, interp)
+
+    # NOTE Use the fixed world here to make `REPLInterpreter` robust against
+    #      potential invalidations of `Core.Compiler` methods.
+    Base.invoke_in_world(COMPLETION_WORLD[], CC.typeinf, interp, frame)
+
+    result = frame.result.result
+    result === Union{} && return nothing # for whatever reason, callers expect this as the Bottom and/or Top type instead
+    return result
 end
 
+# `COMPLETION_WORLD[]` will be initialized within `__init__`
+# (to allow us to potentially remove REPL from the sysimage in the future).
+# Note that inference from the `code_typed` call below will use the current world age
+# rather than `typemax(UInt)`, since `Base.invoke_in_world` uses the current world age
+# when the given world age is higher than the current one.
+const COMPLETION_WORLD = Ref{UInt}(typemax(UInt))
+
+# Generate code cache for `REPLInterpreter` now:
+# This code cache will be available at the world of `COMPLETION_WORLD`,
+# assuming no invalidation will happen before initializing REPL.
+# Once REPL is loaded, `REPLInterpreter` will be resilient against future invalidations.
+code_typed(CC.typeinf, (REPLInterpreter, CC.InferenceState))
+
 # Method completion on function call expression that look like :(max(1))
 MAX_METHOD_COMPLETIONS::Int = 40
+function _complete_methods(ex_org::Expr, context_module::Module, shift::Bool)
+    funct = repl_eval_ex(ex_org.args[1], context_module)
+    funct === nothing && return 2, nothing, [], Set{Symbol}()
+    funct = CC.widenconst(funct)
+    args_ex, kwargs_ex, kwargs_flag = complete_methods_args(ex_org, context_module, true, true)
+    return kwargs_flag, funct, args_ex, kwargs_ex
+end
+
 function complete_methods(ex_org::Expr, context_module::Module=Main, shift::Bool=false)
+    kwargs_flag, funct, args_ex, kwargs_ex = _complete_methods(ex_org, context_module, shift)::Tuple{Int, Any, Vector{Any}, Set{Symbol}}
     out = Completion[]
-    funct, found = get_type(ex_org.args[1], context_module)::Tuple{Any,Bool}
-    !found && return out
-
-    args_ex, kwargs_ex = complete_methods_args(ex_org.args[2:end], ex_org, context_module, true, true)
-    push!(args_ex, Vararg{Any})
-    complete_methods!(out, funct, args_ex, kwargs_ex, shift ? -2 : MAX_METHOD_COMPLETIONS)
-
+    kwargs_flag == 2 && return out # one of the kwargs is invalid
+    kwargs_flag == 0 && push!(args_ex, Vararg{Any}) # allow more arguments if there is no semicolon
+    complete_methods!(out, funct, args_ex, kwargs_ex, shift ? -2 : MAX_METHOD_COMPLETIONS, kwargs_flag == 1)
     return out
 end
 
 MAX_ANY_METHOD_COMPLETIONS::Int = 10
+function recursive_explore_names!(seen::IdSet, callee_module::Module, initial_module::Module, exploredmodules::IdSet{Module}=IdSet{Module}())
+    push!(exploredmodules, callee_module)
+    for name in names(callee_module; all=true, imported=true)
+        if !Base.isdeprecated(callee_module, name) && !startswith(string(name), '#') && isdefined(initial_module, name)
+            func = getfield(callee_module, name)
+            if !isa(func, Module)
+                funct = Core.Typeof(func)
+                push!(seen, funct)
+            elseif isa(func, Module) && func ∉ exploredmodules
+                recursive_explore_names!(seen, func, initial_module, exploredmodules)
+            end
+        end
+    end
+end
+function recursive_explore_names(callee_module::Module, initial_module::Module)
+    seen = IdSet{Any}()
+    recursive_explore_names!(seen, callee_module, initial_module)
+    seen
+end
+
 function complete_any_methods(ex_org::Expr, callee_module::Module, context_module::Module, moreargs::Bool, shift::Bool)
     out = Completion[]
-    args_ex, kwargs_ex = try
+    args_ex, kwargs_ex, kwargs_flag = try
         # this may throw, since we set default_any to false
-        complete_methods_args(ex_org.args[2:end], ex_org, context_module, false, false)
+        complete_methods_args(ex_org, context_module, false, false)
     catch ex
         ex isa ArgumentError || rethrow()
         return out
     end
+    kwargs_flag == 2 && return out # one of the kwargs is invalid
+
+    # moreargs determines whether to accept more args, independently of the presence of a
+    # semicolon for the ".?(" syntax
     moreargs && push!(args_ex, Vararg{Any})
 
-    seen = Base.IdSet()
-    for name in names(callee_module; all=true)
-        if !Base.isdeprecated(callee_module, name) && isdefined(callee_module, name)
-            func = getfield(callee_module, name)
-            if !isa(func, Module)
-                funct = Core.Typeof(func)
-                if !in(funct, seen)
-                    push!(seen, funct)
-                    complete_methods!(out, funct, args_ex, kwargs_ex, MAX_ANY_METHOD_COMPLETIONS)
-                end
-            elseif callee_module === Main && isa(func, Module)
-                callee_module2 = func
-                for name in names(callee_module2)
-                    if !Base.isdeprecated(callee_module2, name) && isdefined(callee_module2, name)
-                        func = getfield(callee_module, name)
-                        if !isa(func, Module)
-                            funct = Core.Typeof(func)
-                            if !in(funct, seen)
-                                push!(seen, funct)
-                                complete_methods!(out, funct, args_ex, kwargs_ex, MAX_ANY_METHOD_COMPLETIONS)
-                            end
-                        end
-                    end
-                end
-            end
-        end
+    for seen_name in recursive_explore_names(callee_module, callee_module)
+        complete_methods!(out, seen_name, args_ex, kwargs_ex, MAX_ANY_METHOD_COMPLETIONS, false)
     end
 
     if !shift
@@ -595,44 +770,77 @@ function complete_any_methods(ex_org::Expr, callee_module::Module, context_modul
     return out
 end
 
-function complete_methods_args(funargs::Vector{Any}, ex_org::Expr, context_module::Module, default_any::Bool, allow_broadcasting::Bool)
+function detect_invalid_kwarg!(kwargs_ex::Vector{Symbol}, @nospecialize(x), kwargs_flag::Int, possible_splat::Bool)
+    n = isexpr(x, :kw) ? x.args[1] : x
+    if n isa Symbol
+        push!(kwargs_ex, n)
+        return kwargs_flag
+    end
+    possible_splat && isexpr(x, :...) && return kwargs_flag
+    return 2 # The kwarg is invalid
+end
+
+function detect_args_kwargs(funargs::Vector{Any}, context_module::Module, default_any::Bool, broadcasting::Bool)
     args_ex = Any[]
-    kwargs_ex = false
-    if allow_broadcasting && ex_org.head === :. && ex_org.args[2] isa Expr
-        # handle broadcasting, but only handle number of arguments instead of
-        # argument types
-        for _ in (ex_org.args[2]::Expr).args
-            push!(args_ex, Any)
-        end
-    else
-        for ex in funargs
-            if isexpr(ex, :parameters)
-                if !isempty(ex.args)
-                    kwargs_ex = true
-                end
-            elseif isexpr(ex, :kw)
-                kwargs_ex = true
+    kwargs_ex = Symbol[]
+    kwargs_flag = 0
+    # kwargs_flag is:
+    # * 0 if there is no semicolon and no invalid kwarg
+    # * 1 if there is a semicolon and no invalid kwarg
+    # * 2 if there are two semicolons or more, or if some kwarg is invalid, which
+    #        means that it is not of the form "bar=foo", "bar" or "bar..."
+    for i in (1+!broadcasting):length(funargs)
+        ex = funargs[i]
+        if isexpr(ex, :parameters)
+            kwargs_flag = ifelse(kwargs_flag == 0, 1, 2) # there should be at most one :parameters
+            for x in ex.args
+                kwargs_flag = detect_invalid_kwarg!(kwargs_ex, x, kwargs_flag, true)
+            end
+        elseif isexpr(ex, :kw)
+            kwargs_flag = detect_invalid_kwarg!(kwargs_ex, ex, kwargs_flag, false)
+        else
+            if broadcasting
+                # handle broadcasting, but only handle number of arguments instead of
+                # argument types
+                push!(args_ex, Any)
             else
-                push!(args_ex, get_type(get_type(ex, context_module)..., default_any))
+                argt = repl_eval_ex(ex, context_module)
+                if argt !== nothing
+                    push!(args_ex, CC.widenconst(argt))
+                elseif default_any
+                    push!(args_ex, Any)
+                else
+                    throw(ArgumentError("argument not found"))
+                end
             end
         end
     end
-    return args_ex, kwargs_ex
+    return args_ex, Set{Symbol}(kwargs_ex), kwargs_flag
 end
 
-function complete_methods!(out::Vector{Completion}, @nospecialize(funct), args_ex::Vector{Any}, kwargs_ex::Bool, max_method_completions::Int)
+is_broadcasting_expr(ex::Expr) = ex.head === :. && isexpr(ex.args[2], :tuple)
+
+function complete_methods_args(ex::Expr, context_module::Module, default_any::Bool, allow_broadcasting::Bool)
+    if allow_broadcasting && is_broadcasting_expr(ex)
+        return detect_args_kwargs((ex.args[2]::Expr).args, context_module, default_any, true)
+    end
+    return detect_args_kwargs(ex.args, context_module, default_any, false)
+end
+
+function complete_methods!(out::Vector{Completion}, @nospecialize(funct), args_ex::Vector{Any}, kwargs_ex::Set{Symbol}, max_method_completions::Int, exact_nargs::Bool)
     # Input types and number of arguments
     t_in = Tuple{funct, args_ex...}
     m = Base._methods_by_ftype(t_in, nothing, max_method_completions, Base.get_world_counter(),
         #=ambig=# true, Ref(typemin(UInt)), Ref(typemax(UInt)), Ptr{Int32}(C_NULL))
-    if m === false
+    if !isa(m, Vector)
         push!(out, TextCompletion(sprint(Base.show_signature_function, funct) * "( too many methods, use SHIFT-TAB to show )"))
+        return
     end
-    m isa Vector || return
     for match in m
         # TODO: if kwargs_ex, filter out methods without kwargs?
         push!(out, MethodCompletion(match.spec_types, match.method))
     end
+    # TODO: filter out methods with wrong number of arguments if `exact_nargs` is set
 end
 
 include("latex_symbols.jl")
@@ -663,10 +871,11 @@ function afterusing(string::String, startpos::Int)
     return occursin(r"^\b(using|import)\s*((\w+[.])*\w+\s*,\s*)*$", str[fr:end])
 end
 
-function close_path_completion(str, startpos, r, paths, pos)
+function close_path_completion(dir, paths, str, pos)
     length(paths) == 1 || return false  # Only close if there's a single choice...
-    _path = str[startpos:prevind(str, first(r))] * (paths[1]::PathCompletion).path
-    path = expanduser(replace(_path, r"\\ " => " "))
+    path = (paths[1]::PathCompletion).path
+    path = unescape_string(replace(path, "\\\$"=>"\$"))
+    path = joinpath(dir, path)
     # ...except if it's a directory...
     try
         isdir(path)
@@ -677,7 +886,6 @@ function close_path_completion(str, startpos, r, paths, pos)
     return lastindex(str) <= pos || str[nextind(str, pos)] != '"'
 end
 
-
 function bslash_completions(string::String, pos::Int)
     slashpos = something(findprev(isequal('\\'), string, pos), 0)
     if (something(findprev(in(bslash_separators), string, pos), 0) < slashpos &&
@@ -718,20 +926,18 @@ function dict_identifier_key(str::String, tag::Symbol, context_module::Module=Ma
     else
         str_close = str
     end
-
     frange, end_of_identifier = find_start_brace(str_close, c_start='[', c_end=']')
     isempty(frange) && return (nothing, nothing, nothing)
-    obj = context_module
-    for name in split(str[frange[1]:end_of_identifier], '.')
-        Base.isidentifier(name) || return (nothing, nothing, nothing)
-        sym = Symbol(name)
-        isdefined(obj, sym) || return (nothing, nothing, nothing)
-        obj = getfield(obj, sym)
-    end
-    (isa(obj, AbstractDict) && length(obj)::Int < 1_000_000) || return (nothing, nothing, nothing)
+    objstr = str[1:end_of_identifier]
+    objex = Meta.parse(objstr, raise=false, depwarn=false)
+    objt = repl_eval_ex(objex, context_module)
+    isa(objt, Core.Const) || return (nothing, nothing, nothing)
+    obj = objt.val
+    isa(obj, AbstractDict) || return (nothing, nothing, nothing)
+    length(obj)::Int < 1_000_000 || return (nothing, nothing, nothing)
     begin_of_key = something(findnext(!isspace, str, nextind(str, end_of_identifier) + 1), # +1 for [
                              lastindex(str)+1)
-    return (obj::AbstractDict, str[begin_of_key:end], begin_of_key)
+    return (obj, str[begin_of_key:end], begin_of_key)
 end
 
 # This needs to be a separate non-inlined function, see #19441
@@ -744,6 +950,81 @@ end
     return matches
 end
 
+# Identify an argument being completed in a method call. If the argument is empty, method
+# suggestions will be provided instead of argument completions.
+function identify_possible_method_completion(partial, last_idx)
+    fail = 0:-1, Expr(:nothing), 0:-1, 0
+
+    # First, check that the last punctuation is either ',', ';' or '('
+    idx_last_punct = something(findprev(x -> ispunct(x) && x != '_' && x != '!', partial, last_idx), 0)::Int
+    idx_last_punct == 0 && return fail
+    last_punct = partial[idx_last_punct]
+    last_punct == ',' || last_punct == ';' || last_punct == '(' || return fail
+
+    # Then, check that `last_punct` is only followed by an identifier or nothing
+    before_last_word_start = something(findprev(in(non_identifier_chars), partial, last_idx), 0)
+    before_last_word_start == 0 && return fail
+    all(isspace, @view partial[nextind(partial, idx_last_punct):before_last_word_start]) || return fail
+
+    # Check that `last_punct` is either the last '(' or placed after a previous '('
+    frange, method_name_end = find_start_brace(@view partial[1:idx_last_punct])
+    method_name_end ∈ frange || return fail
+
+    # Strip the preceding ! operators, if any, and close the expression with a ')'
+    s = replace(partial[frange], r"\G\!+([^=\(]+)" => s"\1"; count=1) * ')'
+    ex = Meta.parse(s, raise=false, depwarn=false)
+    isa(ex, Expr) || return fail
+
+    # `wordrange` is the position of the last argument to complete
+    wordrange = nextind(partial, before_last_word_start):last_idx
+    return frange, ex, wordrange, method_name_end
+end
+
+# Provide completion for keyword arguments in function calls
+function complete_keyword_argument(partial, last_idx, context_module)
+    frange, ex, wordrange, = identify_possible_method_completion(partial, last_idx)
+    fail = Completion[], 0:-1, frange
+    ex.head === :call || is_broadcasting_expr(ex) || return fail
+
+    kwargs_flag, funct, args_ex, kwargs_ex = _complete_methods(ex, context_module, true)::Tuple{Int, Any, Vector{Any}, Set{Symbol}}
+    kwargs_flag == 2 && return fail # one of the previous kwargs is invalid
+
+    methods = Completion[]
+    complete_methods!(methods, funct, Any[Vararg{Any}], kwargs_ex, -1, kwargs_flag == 1)
+    # TODO: use args_ex instead of Any[Vararg{Any}] and only provide kwarg completion for
+    # method calls compatible with the current arguments.
+
+    # For each method corresponding to the function call, provide completion suggestions
+    # for each keyword that starts like the last word and that is not already used
+    # previously in the expression. The corresponding suggestion is "kwname=".
+    # If the keyword corresponds to an existing name, also include "kwname" as a suggestion
+    # since the syntax "foo(; kwname)" is equivalent to "foo(; kwname=kwname)".
+    last_word = partial[wordrange] # the word to complete
+    kwargs = Set{String}()
+    for m in methods
+        m::MethodCompletion
+        possible_kwargs = Base.kwarg_decl(m.method)
+        current_kwarg_candidates = String[]
+        for _kw in possible_kwargs
+            kw = String(_kw)
+            if !endswith(kw, "...") && startswith(kw, last_word) && _kw ∉ kwargs_ex
+                push!(current_kwarg_candidates, kw)
+            end
+        end
+        union!(kwargs, current_kwarg_candidates)
+    end
+
+    suggestions = Completion[KeywordArgumentCompletion(kwarg) for kwarg in kwargs]
+
+    # Only add these if not in kwarg space. i.e. not in `foo(; `
+    if kwargs_flag == 0
+        append!(suggestions, complete_symbol(nothing, last_word, Returns(true), context_module))
+        append!(suggestions, complete_keyval(last_word))
+    end
+
+    return sort!(suggestions, by=completion_text), wordrange
+end
+
 function project_deps_get_completion_candidates(pkgstarts::String, project_file::String)
     loading_candidates = String[]
     d = Base.parsed_toml(project_file)
@@ -760,6 +1041,74 @@ function project_deps_get_completion_candidates(pkgstarts::String, project_file:
     return Completion[PackageCompletion(name) for name in loading_candidates]
 end
 
+function complete_identifiers!(suggestions::Vector{Completion}, @nospecialize(ffunc::Function), context_module::Module, string::String, name::String, pos::Int, dotpos::Int, startpos::Int, comp_keywords=false)
+    ex = nothing
+    if comp_keywords
+        append!(suggestions, complete_keyword(name))
+        append!(suggestions, complete_keyval(name))
+    end
+    if dotpos > 1 && string[dotpos] == '.'
+        s = string[1:prevind(string, dotpos)]
+        # First see if the whole string up to `pos` is a valid expression. If so, use it.
+        ex = Meta.parse(s, raise=false, depwarn=false)
+        if isexpr(ex, :incomplete)
+            s = string[startpos:pos]
+            # Heuristic to find the start of the expression. TODO: This would be better
+            # done with a proper error-recovering parser.
+            if 0 < startpos <= lastindex(string) && string[startpos] == '.'
+                i = prevind(string, startpos)
+                while 0 < i
+                    c = string[i]
+                    if c in (')', ']')
+                        if c == ')'
+                            c_start = '('
+                            c_end = ')'
+                        elseif c == ']'
+                            c_start = '['
+                            c_end = ']'
+                        end
+                        frange, end_of_identifier = find_start_brace(string[1:prevind(string, i)], c_start=c_start, c_end=c_end)
+                        isempty(frange) && break # unbalanced parens
+                        startpos = first(frange)
+                        i = prevind(string, startpos)
+                    elseif c in ('\'', '\"', '\`')
+                        s = "$c$c"*string[startpos:pos]
+                        break
+                    else
+                        break
+                    end
+                    s = string[startpos:pos]
+                end
+            end
+            if something(findlast(in(non_identifier_chars), s), 0) < something(findlast(isequal('.'), s), 0)
+                lookup_name, name = rsplit(s, ".", limit=2)
+                name = String(name)
+
+                ex = Meta.parse(lookup_name, raise=false, depwarn=false)
+            end
+            isexpr(ex, :incomplete) && (ex = nothing)
+        elseif isexpr(ex, :call) && length(ex.args) > 1
+            isinfix = s[end] != ')'
+            # A complete call expression that does not finish with ')' is an infix call.
+            if !isinfix
+                # Handle infix call argument completion of the form bar + foo(qux).
+                frange, end_of_identifier = find_start_brace(@view s[1:prevind(s, end)])
+                isinfix = Meta.parse(@view(s[frange[1]:end]), raise=false, depwarn=false) == ex.args[end]
+            end
+            if isinfix
+                ex = ex.args[end]
+            end
+        elseif isexpr(ex, :macrocall) && length(ex.args) > 1
+            # allow symbol completions within potentially incomplete macrocalls
+            if s[end] ≠ '`' && s[end] ≠ ')'
+                ex = ex.args[end]
+            end
+        end
+    end
+    append!(suggestions, complete_symbol(ex, name, ffunc, context_module))
+    return sort!(unique(suggestions), by=completion_text), (dotpos+1):pos, true
+end
+
 function completions(string::String, pos::Int, context_module::Module=Main, shift::Bool=true)
     # First parse everything up to the current position
     partial = string[1:pos]
@@ -803,40 +1152,112 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
         length(matches)>0 && return Completion[DictCompletion(identifier, match) for match in sort!(matches)], loc::Int:pos, true
     end
 
-    # otherwise...
-    if inc_tag in [:cmd, :string]
-        m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial))
-        startpos = nextind(partial, reverseind(partial, m.offset))
-        r = startpos:pos
+    ffunc = Returns(true)
+    suggestions = Completion[]
 
-        expanded = complete_expanduser(replace(string[r], r"\\ " => " "), r)
-        expanded[3] && return expanded  # If user expansion available, return it
+    # Check if this is a var"" string macro that should be completed like
+    # an identifier rather than a string.
+    # TODO: It would be nice for the parser to give us more information here
+    # so that we can lookup the macro by identity rather than pattern matching
+    # its invocation.
+    varrange = findprev("var\"", string, pos)
+
+    if varrange !== nothing
+        ok, ret = bslash_completions(string, pos)
+        ok && return ret
+        startpos = first(varrange) + 4
+        dotpos = something(findprev(isequal('.'), string, first(varrange)-1), 0)
+        return complete_identifiers!(Completion[], ffunc, context_module, string,
+            string[startpos:pos], pos, dotpos, startpos)
+    elseif inc_tag === :cmd
+        # TODO: should this call shell_completions instead of partially reimplementing it?
+        let m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial)) # fuzzy shell_parse in reverse
+            startpos = nextind(partial, reverseind(partial, m.offset))
+            r = startpos:pos
+            scs::String = string[r]
+
+            expanded = complete_expanduser(scs, r)
+            expanded[3] && return expanded  # If user expansion available, return it
+
+            path::String = replace(scs, r"(\\+)\g1(\\?)`" => "\1\2`") # fuzzy unescape_raw_string: match an even number of \ before ` and replace with half as many
+            # This expansion with "\\ "=>' ' replacement and shell_escape=true
+            # assumes the path isn't further quoted within the cmd backticks.
+            path = replace(path, r"\\ " => " ", r"\$" => "\$") # fuzzy shell_parse (reversed by shell_escape_posixly)
+            paths, dir, success = complete_path(path, shell_escape=true, raw_escape=true)
+
+            if success && !isempty(dir)
+                let dir = do_raw_escape(do_shell_escape(dir))
+                    # if escaping of dir matches scs prefix, remove that from the completions
+                    # otherwise make it the whole completion
+                    if endswith(dir, "/") && startswith(scs, dir)
+                        r = (startpos + sizeof(dir)):pos
+                    elseif startswith(scs, dir * "/")
+                        r = nextind(string, startpos + sizeof(dir)):pos
+                    else
+                        map!(paths, paths) do c::PathCompletion
+                            return PathCompletion(dir * "/" * c.path)
+                        end
+                    end
+                end
+            end
+            return sort!(paths, by=p->p.path), r, success
+        end
+    elseif inc_tag === :string
+        # Find first non-escaped quote
+        let m = match(r"\"(?!\\)", reverse(partial))
+            startpos = nextind(partial, reverseind(partial, m.offset))
+            r = startpos:pos
+            scs::String = string[r]
+
+            expanded = complete_expanduser(scs, r)
+            expanded[3] && return expanded  # If user expansion available, return it
+
+            path = try
+                unescape_string(replace(scs, "\\\$"=>"\$"))
+            catch ex
+                ex isa ArgumentError || rethrow()
+                nothing
+            end
+            if !isnothing(path)
+                paths, dir, success = complete_path(path::String, string_escape=true)
 
-        paths, r, success = complete_path(replace(string[r], r"\\ " => " "), pos)
+                if close_path_completion(dir, paths, path, pos)
+                    paths[1] = PathCompletion((paths[1]::PathCompletion).path * "\"")
+                end
 
-        if inc_tag === :string && close_path_completion(string, startpos, r, paths, pos)
-            paths[1] = PathCompletion((paths[1]::PathCompletion).path * "\"")
-        end
+                if success && !isempty(dir)
+                    let dir = do_string_escape(dir)
+                        # if escaping of dir matches scs prefix, remove that from the completions
+                        # otherwise make it the whole completion
+                        if endswith(dir, "/") && startswith(scs, dir)
+                            r = (startpos + sizeof(dir)):pos
+                        elseif startswith(scs, dir * "/")
+                            r = nextind(string, startpos + sizeof(dir)):pos
+                        else
+                            map!(paths, paths) do c::PathCompletion
+                                return PathCompletion(dir * "/" * c.path)
+                            end
+                        end
+                    end
+                end
 
-        #Latex symbols can be completed for strings
-        (success || inc_tag==:cmd) && return sort!(paths, by=p->p.path), r, success
+                # Fallthrough allowed so that Latex symbols can be completed in strings
+                success && return sort!(paths, by=p->p.path), r, success
+            end
+        end
     end
 
     ok, ret = bslash_completions(string, pos)
     ok && return ret
 
     # Make sure that only bslash_completions is working on strings
-    inc_tag==:string && return Completion[], 0:-1, false
-    if inc_tag === :other && should_method_complete(partial)
-        frange, method_name_end = find_start_brace(partial)
-        # strip preceding ! operator
-        s = replace(partial[frange], r"\!+([^=\(]+)" => s"\1")
-        ex = Meta.parse(s * ")", raise=false, depwarn=false)
-
-        if isa(ex, Expr)
+    inc_tag === :string && return Completion[], 0:-1, false
+    if inc_tag === :other
+        frange, ex, wordrange, method_name_end = identify_possible_method_completion(partial, pos)
+        if last(frange) != -1 && all(isspace, @view partial[wordrange]) # no last argument to complete
             if ex.head === :call
                 return complete_methods(ex, context_module, shift), first(frange):method_name_end, false
-            elseif ex.head === :. && ex.args[2] isa Expr && (ex.args[2]::Expr).head === :tuple
+            elseif is_broadcasting_expr(ex)
                 return complete_methods(ex, context_module, shift), first(frange):(method_name_end - 1), false
             end
         end
@@ -844,16 +1265,19 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
         return Completion[], 0:-1, false
     end
 
+    # Check whether we can complete a keyword argument in a function call
+    kwarg_completion, wordrange = complete_keyword_argument(partial, pos, context_module)
+    isempty(wordrange) || return kwarg_completion, wordrange, !isempty(kwarg_completion)
+
     dotpos = something(findprev(isequal('.'), string, pos), 0)
     startpos = nextind(string, something(findprev(in(non_identifier_chars), string, pos), 0))
     # strip preceding ! operator
-    if (m = match(r"^\!+", string[startpos:pos])) !== nothing
+    if (m = match(r"\G\!+", partial, startpos)) isa RegexMatch
         startpos += length(m.match)
     end
 
-    ffunc = (mod,x)->true
-    suggestions = Completion[]
-    comp_keywords = true
+    name = string[max(startpos, dotpos+1):pos]
+    comp_keywords = !isempty(name) && startpos > dotpos
     if afterusing(string, startpos)
         # We're right after using or import. Let's look only for packages
         # and modules we can reach from here
@@ -895,72 +1319,68 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif
         ffunc = (mod,x)->(Base.isbindingresolved(mod, x) && isdefined(mod, x) && isa(getfield(mod, x), Module))
         comp_keywords = false
     end
+
     startpos == 0 && (pos = -1)
     dotpos < startpos && (dotpos = startpos - 1)
-    s = string[startpos:pos]
-    comp_keywords && append!(suggestions, complete_keyword(s))
-    # if the start of the string is a `.`, try to consume more input to get back to the beginning of the last expression
-    if 0 < startpos <= lastindex(string) && string[startpos] == '.'
-        i = prevind(string, startpos)
-        while 0 < i
-            c = string[i]
-            if c in (')', ']')
-                if c == ')'
-                    c_start = '('
-                    c_end = ')'
-                elseif c == ']'
-                    c_start = '['
-                    c_end = ']'
-                end
-                frange, end_of_identifier = find_start_brace(string[1:prevind(string, i)], c_start=c_start, c_end=c_end)
-                isempty(frange) && break # unbalanced parens
-                startpos = first(frange)
-                i = prevind(string, startpos)
-            elseif c in ('\'', '\"', '\`')
-                s = "$c$c"*string[startpos:pos]
-                break
-            else
-                break
-            end
-            s = string[startpos:pos]
-        end
-    end
-    append!(suggestions, complete_symbol(s, ffunc, context_module))
-    return sort!(unique(suggestions), by=completion_text), (dotpos+1):pos, true
+    return complete_identifiers!(suggestions, ffunc, context_module, string,
+        name, pos, dotpos, startpos, comp_keywords)
 end
 
 function shell_completions(string, pos)
     # First parse everything up to the current position
     scs = string[1:pos]
-    local args, last_parse
-    try
-        args, last_parse = Base.shell_parse(scs, true)::Tuple{Expr,UnitRange{Int}}
-    catch
+    args, last_arg_start = try
+        Base.shell_parse(scs, true)::Tuple{Expr,Int}
+    catch ex
+        ex isa ArgumentError || ex isa ErrorException || rethrow()
         return Completion[], 0:-1, false
     end
     ex = args.args[end]::Expr
     # Now look at the last thing we parsed
     isempty(ex.args) && return Completion[], 0:-1, false
-    arg = ex.args[end]
-    if all(s -> isa(s, AbstractString), ex.args)
-        arg = arg::AbstractString
-        # Treat this as a path
-
-        # As Base.shell_parse throws away trailing spaces (unless they are escaped),
-        # we need to special case here.
-        # If the last char was a space, but shell_parse ignored it search on "".
-        ignore_last_word = arg != " " && scs[end] == ' '
-        prefix = ignore_last_word ? "" : join(ex.args)
+    lastarg = ex.args[end]
+    # As Base.shell_parse throws away trailing spaces (unless they are escaped),
+    # we need to special case here.
+    # If the last char was a space, but shell_parse ignored it search on "".
+    if isexpr(lastarg, :incomplete) || isexpr(lastarg, :error)
+        partial = string[last_arg_start:pos]
+        ret, range = completions(partial, lastindex(partial))
+        range = range .+ (last_arg_start - 1)
+        return ret, range, true
+    elseif endswith(scs, ' ') && !endswith(scs, "\\ ")
+        r = pos+1:pos
+        paths, dir, success = complete_path("", use_envpath=false, shell_escape=true)
+        return paths, r, success
+    elseif all(arg -> arg isa AbstractString, ex.args)
+        # Join these and treat this as a path
+        path::String = join(ex.args)
+        r = last_arg_start:pos
 
         # Also try looking into the env path if the user wants to complete the first argument
-        use_envpath = !ignore_last_word && length(args.args) < 2
+        use_envpath = length(args.args) < 2
 
-        return complete_path(prefix, pos, use_envpath=use_envpath, shell_escape=true)
-    elseif isexpr(arg, :incomplete) || isexpr(arg, :error)
-        partial = scs[last_parse]
-        ret, range = completions(partial, lastindex(partial))
-        range = range .+ (first(last_parse) - 1)
-        return ret, range, true
+        # TODO: call complete_expanduser here?
+
+        paths, dir, success = complete_path(path, use_envpath=use_envpath, shell_escape=true)
+
+        if success && !isempty(dir)
+            let dir = do_shell_escape(dir)
+                # if escaping of dir matches scs prefix, remove that from the completions
+                # otherwise make it the whole completion
+                partial = string[last_arg_start:pos]
+                if endswith(dir, "/") && startswith(partial, dir)
+                    r = (last_arg_start + sizeof(dir)):pos
+                elseif startswith(partial, dir * "/")
+                    r = nextind(string, last_arg_start + sizeof(dir)):pos
+                else
+                    map!(paths, paths) do c::PathCompletion
+                        return PathCompletion(dir * "/" * c.path)
+                    end
+                end
+            end
+        end
+
+        return paths, r, success
     end
     return Completion[], 0:-1, false
 end
@@ -968,20 +1388,67 @@ end
 function UndefVarError_hint(io::IO, ex::UndefVarError)
     var = ex.var
     if var === :or
-        print(io, "\nsuggestion: Use `||` for short-circuiting boolean OR.")
+        print(io, "\nSuggestion: Use `||` for short-circuiting boolean OR.")
     elseif var === :and
-        print(io, "\nsuggestion: Use `&&` for short-circuiting boolean AND.")
+        print(io, "\nSuggestion: Use `&&` for short-circuiting boolean AND.")
     elseif var === :help
         println(io)
         # Show friendly help message when user types help or help() and help is undefined
         show(io, MIME("text/plain"), Base.Docs.parsedoc(Base.Docs.keywords[:help]))
     elseif var === :quit
-        print(io, "\nsuggestion: To exit Julia, use Ctrl-D, or type exit() and press enter.")
+        print(io, "\nSuggestion: To exit Julia, use Ctrl-D, or type exit() and press enter.")
     end
+    if isdefined(ex, :scope)
+        scope = ex.scope
+        if scope isa Module
+            bnd = ccall(:jl_get_module_binding, Any, (Any, Any, Cint), scope, var, true)::Core.Binding
+            if isdefined(bnd, :owner)
+                owner = bnd.owner
+                if owner === bnd
+                    print(io, "\nSuggestion: add an appropriate import or assignment. This global was declared but not assigned.")
+                end
+            else
+                owner = ccall(:jl_binding_owner, Ptr{Cvoid}, (Any, Any), scope, var)
+                if C_NULL == owner
+                    # No global of this name exists in this module.
+                    # This is the common case, so do not print that information.
+                    print(io, "\nSuggestion: check for spelling errors or missing imports.")
+                    owner = bnd
+                else
+                    owner = unsafe_pointer_to_objref(owner)::Core.Binding
+                end
+            end
+            if owner !== bnd
+                # this could use jl_binding_dbgmodule for the exported location in the message too
+                print(io, "\nSuggestion: this global was defined as `$(owner.globalref)` but not assigned a value.")
+            end
+        elseif scope === :static_parameter
+            print(io, "\nSuggestion: run Test.detect_unbound_args to detect method arguments that do not fully constrain a type parameter.")
+        elseif scope === :local
+            print(io, "\nSuggestion: check for an assignment to a local variable that shadows a global of the same name.")
+        end
+    else
+        scope = undef
+    end
+    warnfor(m, var) = Base.isbindingresolved(m, var) && (Base.isexported(m, var) || Base.ispublic(m, var)) && (print(io, "\nHint: a global variable of this name also exists in $m."); true)
+    if scope !== Base && !warnfor(Base, var)
+        warned = false
+        for m in Base.loaded_modules_order
+            m === Core && continue
+            m === Base && continue
+            m === Main && continue
+            m === scope && continue
+            warned = warnfor(m, var) || warned
+        end
+        warned = warned || warnfor(Core, var)
+        warned = warned || warnfor(Main, var)
+    end
+    nothing
 end
 
 function __init__()
     Base.Experimental.register_error_hint(UndefVarError_hint, UndefVarError)
+    COMPLETION_WORLD[] = Base.get_world_counter()
     nothing
 end
 
diff --git a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
index 127d0cd88a2cf..a1f94852b38ec 100644
--- a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
@@ -131,7 +131,7 @@ end
 """
     header(m::AbstractMenu) -> String
 
-Returns a header string to be printed above the menu.
+Return a header string to be printed above the menu.
 Defaults to "".
 """
 header(m::AbstractMenu) = ""
@@ -216,7 +216,7 @@ function request(term::REPL.Terminals.TTYTerminal, m::AbstractMenu; cursor::Unio
                 m.pageoffset = 0
             elseif c == Int(END_KEY)
                 cursor[] = lastoption
-                m.pageoffset = lastoption - m.pagesize
+                m.pageoffset = max(0, lastoption - m.pagesize)
             elseif c == 13 # <enter>
                 # will break if pick returns true
                 pick(m, cursor[]) && break
@@ -269,7 +269,7 @@ function move_up!(m::AbstractMenu, cursor::Int, lastoption::Int=numoptions(m))
     elseif scroll_wrap(m)
         # wrap to bottom
         cursor = lastoption
-        m.pageoffset = lastoption - m.pagesize
+        m.pageoffset = max(0, lastoption - m.pagesize)
     end
     return cursor
 end
@@ -299,7 +299,7 @@ end
 
 function page_down!(m::AbstractMenu, cursor::Int, lastoption::Int=numoptions(m))
     m.pageoffset += m.pagesize - (cursor == 1 ? 1 : 0)
-    m.pageoffset = min(m.pageoffset, lastoption - m.pagesize)
+    m.pageoffset = max(0, min(m.pageoffset, lastoption - m.pagesize))
     return min(cursor + m.pagesize, lastoption)
 end
 
diff --git a/stdlib/REPL/src/docview.jl b/stdlib/REPL/src/docview.jl
index 5d8478c9ae42e..41f3020447090 100644
--- a/stdlib/REPL/src/docview.jl
+++ b/stdlib/REPL/src/docview.jl
@@ -20,20 +20,28 @@ using Unicode: normalize
 ## Help mode ##
 
 # This is split into helpmode and _helpmode to easier unittest _helpmode
-helpmode(io::IO, line::AbstractString, mod::Module=Main) = :($REPL.insert_hlines($io, $(REPL._helpmode(io, line, mod))))
+function helpmode(io::IO, line::AbstractString, mod::Module=Main)
+    internal_accesses = Set{Pair{Module,Symbol}}()
+    quote
+        docs = $REPL.insert_hlines($(REPL._helpmode(io, line, mod, internal_accesses)))
+        $REPL.insert_internal_warning(docs, $internal_accesses)
+    end
+end
 helpmode(line::AbstractString, mod::Module=Main) = helpmode(stdout, line, mod)
 
+# A hack to make the line entered at the REPL available at trimdocs without
+# passing the string through the entire mechanism.
 const extended_help_on = Ref{Any}(nothing)
 
-function _helpmode(io::IO, line::AbstractString, mod::Module=Main)
+function _helpmode(io::IO, line::AbstractString, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing)
     line = strip(line)
     ternary_operator_help = (line == "?" || line == "?:")
     if startswith(line, '?') && !ternary_operator_help
         line = line[2:end]
-        extended_help_on[] = line
+        extended_help_on[] = nothing
         brief = false
     else
-        extended_help_on[] = nothing
+        extended_help_on[] = line
         brief = true
     end
     # interpret anything starting with # or #= as asking for help on comments
@@ -47,7 +55,7 @@ function _helpmode(io::IO, line::AbstractString, mod::Module=Main)
     x = Meta.parse(line, raise = false, depwarn = false)
     assym = Symbol(line)
     expr =
-        if haskey(keywords, Symbol(line)) || Base.isoperator(assym) || isexpr(x, :error) ||
+        if haskey(keywords, assym) || Base.isoperator(assym) || isexpr(x, :error) ||
             isexpr(x, :invalid) || isexpr(x, :incomplete)
             # Docs for keywords must be treated separately since trying to parse a single
             # keyword such as `function` would throw a parse error due to the missing `end`.
@@ -64,12 +72,12 @@ function _helpmode(io::IO, line::AbstractString, mod::Module=Main)
         end
     # the following must call repl(io, expr) via the @repl macro
     # so that the resulting expressions are evaluated in the Base.Docs namespace
-    :($REPL.@repl $io $expr $brief $mod)
+    :($REPL.@repl $io $expr $brief $mod $internal_accesses)
 end
 _helpmode(line::AbstractString, mod::Module=Main) = _helpmode(stdout, line, mod)
 
-# Print vertical lines along each docstring if there are multiple docs
-function insert_hlines(io::IO, docs)
+# Print horizontal lines between each docstring if there are multiple docs
+function insert_hlines(docs)
     if !isa(docs, Markdown.MD) || !haskey(docs.meta, :results) || isempty(docs.meta[:results])
         return docs
     end
@@ -148,6 +156,48 @@ end
 
 _trimdocs(md, brief::Bool) = md, false
 
+
+is_tuple(expr) = false
+is_tuple(expr::Expr) = expr.head == :tuple
+
+struct Logged{F}
+    f::F
+    mod::Module
+    collection::Set{Pair{Module,Symbol}}
+end
+function (la::Logged)(m::Module, s::Symbol)
+    m !== la.mod && Base.isdefined(m, s) && !Base.ispublic(m, s) && push!(la.collection, m => s)
+    la.f(m, s)
+end
+(la::Logged)(args...) = la.f(args...)
+
+function log_nonpublic_access(expr::Expr, mod::Module, internal_access::Set{Pair{Module,Symbol}})
+    if expr.head === :. && length(expr.args) == 2 && !is_tuple(expr.args[2])
+        Expr(:call, Logged(getproperty, mod, internal_access), log_nonpublic_access.(expr.args, (mod,), (internal_access,))...)
+    elseif expr.head === :call && expr.args[1] === Base.Docs.Binding
+        Expr(:call, Logged(Base.Docs.Binding, mod, internal_access), log_nonpublic_access.(expr.args[2:end], (mod,), (internal_access,))...)
+    else
+        Expr(expr.head, log_nonpublic_access.(expr.args, (mod,), (internal_access,))...)
+    end
+end
+log_nonpublic_access(expr, ::Module, _) = expr
+
+function insert_internal_warning(md::Markdown.MD, internal_access::Set{Pair{Module,Symbol}})
+    if !isempty(internal_access)
+        items = Any[Any[Markdown.Paragraph(Any[Markdown.Code("", s)])] for s in sort!(["$mod.$sym" for (mod, sym) in internal_access])]
+        admonition = Markdown.Admonition("warning", "Warning", Any[
+            Markdown.Paragraph(Any["The following bindings may be internal; they may change or be removed in future versions:"]),
+            Markdown.List(items, -1, false)])
+        pushfirst!(md.content, admonition)
+    end
+    md
+end
+function insert_internal_warning(other, internal_access::Set{Pair{Module,Symbol}})
+    # We don't know how to insert an internal symbol warning into non-markdown
+    # content, so we don't.
+    other
+end
+
 """
     Docs.doc(binding, sig)
 
@@ -164,7 +214,8 @@ function doc(binding::Binding, sig::Type = Union{})
     results, groups = DocStr[], MultiDoc[]
     # Lookup `binding` and `sig` for matches in all modules of the docsystem.
     for mod in modules
-        dict = meta(mod)
+        dict = meta(mod; autoinit=false)
+        isnothing(dict) && continue
         if haskey(dict, binding)
             multidoc = dict[binding]
             push!(groups, multidoc)
@@ -226,11 +277,15 @@ function lookup_doc(ex)
             return Markdown.parse("`x $op= y` is a synonym for `x $eq x $op y`")
         elseif isdotted && ex !== :(..)
             op = str[2:end]
-            return Markdown.parse("`x $ex y` is akin to `broadcast($op, x, y)`. See [`broadcast`](@ref).")
+            if op in ("&&", "||")
+                return Markdown.parse("`x $ex y` broadcasts the boolean operator `$op` to `x` and `y`. See [`broadcast`](@ref).")
+            else
+                return Markdown.parse("`x $ex y` is akin to `broadcast($op, x, y)`. See [`broadcast`](@ref).")
+            end
         end
     end
     binding = esc(bindingexpr(namify(ex)))
-    if isexpr(ex, :call) || isexpr(ex, :macrocall)
+    if isexpr(ex, :call) || isexpr(ex, :macrocall) || isexpr(ex, :where)
         sig = esc(signature(ex))
         :($(doc)($binding, $sig))
     else
@@ -245,12 +300,22 @@ function summarize(binding::Binding, sig)
     io = IOBuffer()
     if defined(binding)
         binding_res = resolve(binding)
-        !isa(binding_res, Module) && println(io, "No documentation found.\n")
+        if !isa(binding_res, Module)
+            if Base.ispublic(binding.mod, binding.var)
+                println(io, "No documentation found for public symbol.\n")
+            else
+                println(io, "No documentation found for private symbol.\n")
+            end
+        end
         summarize(io, binding_res, binding)
     else
         println(io, "No documentation found.\n")
         quot = any(isspace, sprint(print, binding)) ? "'" : ""
-        println(io, "Binding ", quot, "`", binding, "`", quot, " does not exist.")
+        if Base.isbindingresolved(binding.mod, binding.var)
+            println(io, "Binding ", quot, "`", binding, "`", quot, " exists, but has not been assigned a value.")
+        else
+            println(io, "Binding ", quot, "`", binding, "`", quot, " does not exist.")
+        end
     end
     md = Markdown.parse(seekstart(io))
     # Save metadata in the generated markdown.
@@ -333,16 +398,17 @@ function find_readme(m::Module)::Union{String, Nothing}
 end
 function summarize(io::IO, m::Module, binding::Binding; nlines::Int = 200)
     readme_path = find_readme(m)
+    public = Base.ispublic(binding.mod, binding.var) ? "public" : "internal"
     if isnothing(readme_path)
-        println(io, "No docstring or readme file found for module `$m`.\n")
+        println(io, "No docstring or readme file found for $public module `$m`.\n")
     else
-        println(io, "No docstring found for module `$m`.")
+        println(io, "No docstring found for $public module `$m`.")
     end
     exports = filter!(!=(nameof(m)), names(m))
     if isempty(exports)
-        println(io, "Module does not export any names.")
+        println(io, "Module does not have any public names.")
     else
-        println(io, "# Exported names")
+        println(io, "# Public names")
         print(io, "  `")
         join(io, exports, "`, `")
         println(io, "`\n")
@@ -463,29 +529,30 @@ end
 repl_latex(s::String) = repl_latex(stdout, s)
 
 macro repl(ex, brief::Bool=false, mod::Module=Main) repl(ex; brief, mod) end
-macro repl(io, ex, brief, mod) repl(io, ex; brief, mod) end
+macro repl(io, ex, brief, mod, internal_accesses) repl(io, ex; brief, mod, internal_accesses) end
 
-function repl(io::IO, s::Symbol; brief::Bool=true, mod::Module=Main)
+function repl(io::IO, s::Symbol; brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing)
     str = string(s)
     quote
         repl_latex($io, $str)
         repl_search($io, $str, $mod)
-        $(if !isdefined(mod, s) && !haskey(keywords, s) && !Base.isoperator(s)
+        $(if !isdefined(mod, s) && !Base.isbindingresolved(mod, s) && !haskey(keywords, s) && !Base.isoperator(s)
+               # n.b. we call isdefined for the side-effect of resolving the binding, if possible
                :(repl_corrections($io, $str, $mod))
           end)
-        $(_repl(s, brief))
+        $(_repl(s, brief, mod, internal_accesses))
     end
 end
 isregex(x) = isexpr(x, :macrocall, 3) && x.args[1] === Symbol("@r_str") && !isempty(x.args[3])
 
-repl(io::IO, ex::Expr; brief::Bool=true, mod::Module=Main) = isregex(ex) ? :(apropos($io, $ex)) : _repl(ex, brief)
-repl(io::IO, str::AbstractString; brief::Bool=true, mod::Module=Main) = :(apropos($io, $str))
-repl(io::IO, other; brief::Bool=true, mod::Module=Main) = esc(:(@doc $other))
+repl(io::IO, ex::Expr; brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing) = isregex(ex) ? :(apropos($io, $ex)) : _repl(ex, brief, mod, internal_accesses)
+repl(io::IO, str::AbstractString; brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing) = :(apropos($io, $str))
+repl(io::IO, other; brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing) = esc(:(@doc $other)) # TODO: track internal_accesses
 #repl(io::IO, other) = lookup_doc(other) # TODO
 
 repl(x; brief::Bool=true, mod::Module=Main) = repl(stdout, x; brief, mod)
 
-function _repl(x, brief::Bool=true)
+function _repl(x, brief::Bool=true, mod::Module=Main, internal_accesses::Union{Nothing, Set{Pair{Module,Symbol}}}=nothing)
     if isexpr(x, :call)
         x = x::Expr
         # determine the types of the values
@@ -551,6 +618,7 @@ function _repl(x, brief::Bool=true)
     else
         docs
     end
+    docs = log_nonpublic_access(macroexpand(mod, docs), mod, internal_accesses)
     :(REPL.trimdocs($docs, $brief))
 end
 
@@ -561,7 +629,8 @@ Return documentation for a particular `field` of a type if it exists.
 """
 function fielddoc(binding::Binding, field::Symbol)
     for mod in modules
-        dict = meta(mod)
+        dict = meta(mod; autoinit=false)
+        isnothing(dict) && continue
         if haskey(dict, binding)
             multidoc = dict[binding]
             if haskey(multidoc.docs, Union{})
@@ -573,8 +642,9 @@ function fielddoc(binding::Binding, field::Symbol)
             end
         end
     end
-    fields = join(["`$f`" for f in fieldnames(resolve(binding))], ", ", ", and ")
-    fields = isempty(fields) ? "no fields" : "fields $fields"
+    fs = fieldnames(resolve(binding))
+    fields = isempty(fs) ? "no fields" : (length(fs) == 1 ? "field " : "fields ") *
+                                          join(("`$f`" for f in fs), ", ", ", and ")
     Markdown.parse("`$(resolve(binding))` has $fields.")
 end
 
@@ -613,22 +683,61 @@ bestmatch(needle, haystack) =
     longer(matchinds(needle, haystack, acronym = true),
            matchinds(needle, haystack))
 
-avgdistance(xs) =
-    isempty(xs) ? 0 :
-    (xs[end] - xs[1] - length(xs)+1)/length(xs)
+# Optimal string distance: Counts the minimum number of insertions, deletions,
+# transpositions or substitutions to go from one string to the other.
+function string_distance(a::AbstractString, lena::Integer, b::AbstractString, lenb::Integer)
+    if lena > lenb
+        a, b = b, a
+        lena, lenb = lenb, lena
+    end
+    start = 0
+    for (i, j) in zip(a, b)
+        if a == b
+            start += 1
+        else
+            break
+        end
+    end
+    start == lena && return lenb - start
+    vzero = collect(1:(lenb - start))
+    vone = similar(vzero)
+    prev_a, prev_b = first(a), first(b)
+    current = 0
+    for (i, ai) in enumerate(a)
+        i > start || (prev_a = ai; continue)
+        left = i - start - 1
+        current = i - start
+        transition_next = 0
+        for (j, bj) in enumerate(b)
+            j > start || (prev_b = bj; continue)
+            # No need to look beyond window of lower right diagonal
+            above = current
+            this_transition = transition_next
+            transition_next = vone[j - start]
+            vone[j - start] = current = left
+            left = vzero[j - start]
+            if ai != bj
+                # Minimum between substitution, deletion and insertion
+                current = min(current + 1, above + 1, left + 1)
+                if i > start + 1 && j > start + 1 && ai == prev_b && prev_a == bj
+                    current = min(current, (this_transition += 1))
+                end
+            end
+            vzero[j - start] = current
+            prev_b = bj
+        end
+        prev_a = ai
+    end
+    current
+end
 
-function fuzzyscore(needle, haystack)
-    score = 0.
-    is, acro = bestmatch(needle, haystack)
-    score += (acro ? 2 : 1)*length(is) # Matched characters
-    score -= 2(length(needle)-length(is)) # Missing characters
-    !acro && (score -= avgdistance(is)/10) # Contiguous
-    !isempty(is) && (score -= sum(is)/length(is)/100) # Closer to beginning
-    return score
+function fuzzyscore(needle::AbstractString, haystack::AbstractString)
+    lena, lenb = length(needle), length(haystack)
+    1 - (string_distance(needle, lena, haystack, lenb) / max(lena, lenb))
 end
 
 function fuzzysort(search::String, candidates::Vector{String})
-    scores = map(cand -> (fuzzyscore(search, cand), -Float64(levenshtein(search, cand))), candidates)
+    scores = map(cand -> fuzzyscore(search, cand), candidates)
     candidates[sortperm(scores)] |> reverse
 end
 
@@ -679,7 +788,7 @@ function printmatches(io::IO, word, matches; cols::Int = _displaysize(io)[2])
     total = 0
     for match in matches
         total + length(match) + 1 > cols && break
-        fuzzyscore(word, match) < 0 && break
+        fuzzyscore(word, match) < 0.5 && break
         print(io, " ")
         printmatch(io, word, match)
         total += length(match) + 1
@@ -732,8 +841,9 @@ function doc_completions(name, mod::Module=Main)
     idxs = findall(!isnothing, ms)
 
     # avoid messing up the order while inserting
-    for i in reverse(idxs)
-        insert!(res, i, "$(only(ms[i].captures))\"\"")
+    for i in reverse!(idxs)
+        c = only((ms[i]::AbstractMatch).captures)
+        insert!(res, i, "$(c)\"\"")
     end
     res
 end
@@ -829,7 +939,9 @@ function apropos(io::IO, needle::Regex)
     for mod in modules
         # Module doc might be in README.md instead of the META dict
         docsearch(doc(mod), needle) && println(io, mod)
-        for (k, v) in meta(mod)
+        dict = meta(mod; autoinit=false)
+        isnothing(dict) && continue
+        for (k, v) in dict
             docsearch(v, needle) && println(io, k)
         end
     end
diff --git a/stdlib/REPL/src/latex_symbols.jl b/stdlib/REPL/src/latex_symbols.jl
index 87a3c289661d9..bc406e1468710 100644
--- a/stdlib/REPL/src/latex_symbols.jl
+++ b/stdlib/REPL/src/latex_symbols.jl
@@ -432,8 +432,10 @@ const latex_symbols = Dict(
     "\\pertenthousand" => "‱",
     "\\prime" => "′",
     "\\backprime" => "‵",
-    "\\guilsinglleft" => "‹",
+    "\\guilsinglleft" => "‹", # note: \guil* quote names follow the LaTeX csquotes package
     "\\guilsinglright" => "›",
+    "\\guillemotleft" => "«",
+    "\\guillemotright" => "»",
     "\\nolinebreak" => "\u2060",
     "\\pes" => "₧",
     "\\dddot" => "⃛",
@@ -982,17 +984,16 @@ const latex_symbols = Dict(
     "\\droang" => "̚",  # left angle above (non-spacing)
     "\\wideutilde" => "̰",  # under tilde accent (multiple characters and non-spacing)
     "\\not" => "̸",  # combining long solidus overlay
-    "\\upMu" => "Μ",  # capital mu, greek
-    "\\upNu" => "Ν",  # capital nu, greek
-    "\\upOmicron" => "Ο",  # capital omicron, greek
-    "\\upepsilon" => "ε",  # rounded small epsilon, greek
-    "\\upomicron" => "ο",  # small omicron, greek
-    "\\upvarbeta" => "ϐ",  # rounded small beta, greek
-    "\\upoldKoppa" => "Ϙ",  # greek letter archaic koppa
-    "\\upoldkoppa" => "ϙ",  # greek small letter archaic koppa
-    "\\upstigma" => "ϛ",  # greek small letter stigma
-    "\\upkoppa" => "ϟ",  # greek small letter koppa
-    "\\upsampi" => "ϡ",  # greek small letter sampi
+    "\\Mu" => "Μ",  # capital mu, greek
+    "\\Nu" => "Ν",  # capital nu, greek
+    "\\Omicron" => "Ο",  # capital omicron, greek
+    "\\omicron" => "ο",  # small omicron, greek
+    "\\varbeta" => "ϐ",  # rounded small beta, greek
+    "\\oldKoppa" => "Ϙ",  # greek letter archaic koppa
+    "\\oldkoppa" => "ϙ",  # greek small letter archaic koppa
+    "\\stigma" => "ϛ",  # greek small letter stigma
+    "\\koppa" => "ϟ",  # greek small letter koppa
+    "\\sampi" => "ϡ",  # greek small letter sampi
     "\\tieconcat" => "⁀",  # character tie, z notation sequence concatenation
     "\\leftharpoonaccent" => "⃐",  # combining left harpoon above
     "\\rightharpoonaccent" => "⃑",  # combining right harpoon above
@@ -1289,6 +1290,7 @@ const latex_symbols = Dict(
     "\\bsolhsub" => "\u27c8",  # reverse solidus preceding subset
     "\\suphsol" => "\u27c9",  # superset preceding solidus
     "\\wedgedot" => "⟑",  # and with dot
+    "\\veedot" => "⟇",  # or with dot
     "\\upin" => "⟒",  # element of opening upwards
     "\\bigbot" => "⟘",  # large up tack
     "\\bigtop" => "⟙",  # large down tack
@@ -1569,7 +1571,9 @@ const latex_symbols = Dict(
     "\\bsimilarleftarrow" => "\u2b41",  # reverse tilde operator above leftwards arrow
     "\\leftarrowbackapprox" => "\u2b42",  # leftwards arrow above reverse almost equal to
     "\\rightarrowgtr" => "\u2b43",  # rightwards arrow through greater-than
-    "\\rightarrowsupset" => "\u2b44",  # rightwards arrow through subset
+    "\\leftarrowless" => "\u2977",  # leftwards arrow through less-than
+    "\\rightarrowsupset" => "\u2b44",  # rightwards arrow through superset
+    "\\leftarrowsubset" => "\u297a",  # leftwards arrow through subset
     "\\LLeftarrow" => "\u2b45",  # leftwards quadruple arrow
     "\\RRightarrow" => "\u2b46",  # rightwards quadruple arrow
     "\\bsimilarrightarrow" => "\u2b47",  # reverse tilde operator above rightwards arrow
@@ -2618,10 +2622,10 @@ const latex_symbols = Dict(
     "\\4/5" => "⅘", # vulgar fraction four fifths
     "\\1/6" => "⅙", # vulgar fraction one sixth
     "\\5/6" => "⅚", # vulgar fraction five sixths
-    "\\1/8" => "⅛", # vulgar fraction one eigth
-    "\\3/8" => "⅜", # vulgar fraction three eigths
-    "\\5/8" => "⅝", # vulgar fraction five eigths
-    "\\7/8" => "⅞", # vulgar fraction seventh eigths
+    "\\1/8" => "⅛", # vulgar fraction one eighth
+    "\\3/8" => "⅜", # vulgar fraction three eighths
+    "\\5/8" => "⅝", # vulgar fraction five eighths
+    "\\7/8" => "⅞", # vulgar fraction seventh eighths
     "\\1/" => "⅟", # fraction numerator one
     "\\0/3" => "↉", # vulgar fraction zero thirds
     "\\1/4" => "¼", # vulgar fraction one quarter
diff --git a/stdlib/REPL/src/options.jl b/stdlib/REPL/src/options.jl
index 3ce0ab6ff00dc..1fb2c654c7df2 100644
--- a/stdlib/REPL/src/options.jl
+++ b/stdlib/REPL/src/options.jl
@@ -27,6 +27,7 @@ mutable struct Options
     auto_indent_time_threshold::Float64
     # refresh after time delay
     auto_refresh_time_delay::Float64
+    hint_tab_completes::Bool
     # default IOContext settings at the REPL
     iocontext::Dict{Symbol,Any}
 end
@@ -47,6 +48,7 @@ Options(;
         auto_indent_bracketed_paste = false,
         auto_indent_time_threshold = 0.005,
         auto_refresh_time_delay = Sys.iswindows() ? 0.05 : 0.0,
+        hint_tab_completes = true,
         iocontext = Dict{Symbol,Any}()) =
             Options(hascolor, extra_keymap, tabwidth,
                     kill_ring_max, region_animation_duration,
@@ -55,6 +57,7 @@ Options(;
                     backspace_align, backspace_adjust, confirm_exit,
                     auto_indent, auto_indent_tmp_off, auto_indent_bracketed_paste,
                     auto_indent_time_threshold, auto_refresh_time_delay,
+                    hint_tab_completes,
                     iocontext)
 
 # for use by REPLs not having an options field
diff --git a/stdlib/REPL/src/precompile.jl b/stdlib/REPL/src/precompile.jl
new file mode 100644
index 0000000000000..97c494cb331d3
--- /dev/null
+++ b/stdlib/REPL/src/precompile.jl
@@ -0,0 +1,209 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module Precompile
+# Can't use this during incremental: `@eval Module() begin``
+
+import ..REPL
+
+# Ugly hack for our cache file to not have a dependency edge on FakePTYs.
+Base._track_dependencies[] = false
+try
+    Base.include(@__MODULE__, joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testhelpers", "FakePTYs.jl"))
+    import .FakePTYs: open_fake_pty
+finally
+    Base._track_dependencies[] = true
+end
+using Base.Meta
+
+import Markdown
+
+## Debugging options
+# Disable parallel precompiles generation by setting `false`
+const PARALLEL_PRECOMPILATION = true
+
+# View the code sent to the repl by setting this to `stdout`
+const debug_output = devnull # or stdout
+
+CTRL_C = '\x03'
+CTRL_D = '\x04'
+CTRL_R = '\x12'
+UP_ARROW = "\e[A"
+DOWN_ARROW = "\e[B"
+
+repl_script = """
+2+2
+print("")
+printstyled("a", "b")
+display([1])
+display([1 2; 3 4])
+foo(x) = 1
+@time @eval foo(1)
+; pwd
+$CTRL_C
+$CTRL_R$CTRL_C#
+? reinterpret
+using Ra\t$CTRL_C
+\\alpha\t$CTRL_C
+\e[200~paste here ;)\e[201~"$CTRL_C
+$UP_ARROW$DOWN_ARROW$CTRL_C
+123\b\b\b$CTRL_C
+\b\b$CTRL_C
+f(x) = x03
+f(1,2)
+[][1]
+cd("complete_path\t\t$CTRL_C
+"""
+
+julia_exepath() = joinpath(Sys.BINDIR, Base.julia_exename())
+
+const JULIA_PROMPT = "julia> "
+const PKG_PROMPT = "pkg> "
+const SHELL_PROMPT = "shell> "
+const HELP_PROMPT = "help?> "
+
+blackhole = Sys.isunix() ? "/dev/null" : "nul"
+procenv = Dict{String,Any}(
+        "JULIA_HISTORY" => blackhole,
+        "JULIA_PROJECT" => nothing, # remove from environment
+        "JULIA_LOAD_PATH" => "@stdlib",
+        "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":",
+        "TERM" => "",
+        "JULIA_FALLBACK_REPL" => "0") # Turn REPL.jl on in subprocess
+
+generate_precompile_statements() = try
+    # Extract the precompile statements from the precompile file
+    statements_step = Channel{String}(Inf)
+
+    step = @async mktemp() do precompile_file, precompile_file_h
+        # Collect statements from running a REPL process and replaying our REPL script
+        touch(precompile_file)
+        pts, ptm = open_fake_pty()
+        cmdargs = `-e 'import REPL; REPL.Terminals.is_precompiling[] = true'`
+        p = run(addenv(addenv(```$(julia_exepath()) -O0 --trace-compile=$precompile_file
+                --cpu-target=native --startup-file=no --compiled-modules=existing --color=yes -i $cmdargs```, procenv),
+                "JULIA_PKG_PRECOMPILE_AUTO" => "0"),
+            pts, pts, pts; wait=false)
+        Base.close_stdio(pts)
+        # Prepare a background process to copy output from process until `pts` is closed
+        output_copy = Base.BufferStream()
+        tee = @async try
+            while !eof(ptm)
+                l = readavailable(ptm)
+                write(debug_output, l)
+                Sys.iswindows() && (sleep(0.1); yield(); yield()) # workaround hang - probably a libuv issue?
+                write(output_copy, l)
+            end
+            write(debug_output, "\n#### EOF ####\n")
+        catch ex
+            if !(ex isa Base.IOError && ex.code == Base.UV_EIO)
+                rethrow() # ignore EIO on ptm after pts dies
+            end
+        finally
+            close(output_copy)
+            close(ptm)
+        end
+        Base.errormonitor(tee)
+        repl_inputter = @async begin
+            # wait for the definitive prompt before start writing to the TTY
+            readuntil(output_copy, JULIA_PROMPT)
+            sleep(0.1)
+            readavailable(output_copy)
+            # Input our script
+            precompile_lines = split(repl_script::String, '\n'; keepempty=false)
+            curr = 0
+            for l in precompile_lines
+                sleep(0.1)
+                curr += 1
+                # consume any other output
+                bytesavailable(output_copy) > 0 && readavailable(output_copy)
+                # push our input
+                write(debug_output, "\n#### inputting statement: ####\n$(repr(l))\n####\n")
+                # If the line ends with a CTRL_C, don't write an extra newline, which would
+                # cause a second empty prompt. Our code below expects one new prompt per
+                # input line and can race out of sync with the unexpected second line.
+                endswith(l, CTRL_C) ? write(ptm, l) : write(ptm, l, "\n")
+                readuntil(output_copy, "\n")
+                # wait for the next prompt-like to appear
+                readuntil(output_copy, "\n")
+                strbuf = ""
+                while !eof(output_copy)
+                    strbuf *= String(readavailable(output_copy))
+                    occursin(JULIA_PROMPT, strbuf) && break
+                    occursin(PKG_PROMPT, strbuf) && break
+                    occursin(SHELL_PROMPT, strbuf) && break
+                    occursin(HELP_PROMPT, strbuf) && break
+                    sleep(0.1)
+                end
+            end
+            write(debug_output, "\n#### COMPLETED - Closing REPL ####\n")
+            write(ptm, "$CTRL_D")
+            wait(tee)
+            success(p) || Base.pipeline_error(p)
+            close(ptm)
+            write(debug_output, "\n#### FINISHED ####\n")
+        end
+        Base.errormonitor(repl_inputter)
+
+        n_step = 0
+        precompile_copy = Base.BufferStream()
+        buffer_reader = @async for statement in eachline(precompile_copy)
+            push!(statements_step, statement)
+            n_step += 1
+        end
+
+        open(precompile_file, "r") do io
+            while true
+                # We need to always call eof(io) for bytesavailable(io) to work
+                eof(io) && istaskdone(repl_inputter) && eof(io) && break
+                if bytesavailable(io) == 0
+                    sleep(0.1)
+                    continue
+                end
+                write(precompile_copy, readavailable(io))
+            end
+        end
+        close(precompile_copy)
+        wait(buffer_reader)
+        close(statements_step)
+        return :ok
+    end
+    !PARALLEL_PRECOMPILATION && wait(step)
+
+    # Make statements unique
+    statements = Set{String}()
+    # Execute the precompile statements
+    for statement in statements_step
+        # Main should be completely clean
+        occursin("Main.", statement) && continue
+        Base.in!(statement, statements) && continue
+        try
+            ps = Meta.parse(statement)
+            if !isexpr(ps, :call)
+                # these are typically comments
+                @debug "skipping statement because it does not parse as an expression" statement
+                continue
+            end
+            push!(REPL.PRECOMPILE_STATEMENTS, statement)
+            popfirst!(ps.args) # precompile(...)
+            ps.head = :tuple
+            # println(ps)
+            ps = eval(ps)
+            if !precompile(ps...)
+                @warn "Failed to precompile expression" form=statement _module=nothing _file=nothing _line=0
+            end
+        catch ex
+            # See #28808
+            @warn "Failed to precompile expression" form=statement exception=ex _module=nothing _file=nothing _line=0
+        end
+    end
+
+    fetch(step) == :ok || throw("Collecting precompiles failed.")
+    return nothing
+finally
+    GC.gc(true); GC.gc(false); # reduce memory footprint
+end
+
+generate_precompile_statements()
+
+precompile(Tuple{typeof(getproperty), REPL.REPLBackend, Symbol})
+end # Precompile
diff --git a/stdlib/REPL/test/TerminalMenus/dynamic_menu.jl b/stdlib/REPL/test/TerminalMenus/dynamic_menu.jl
index 23d026358385f..63b48b7173491 100644
--- a/stdlib/REPL/test/TerminalMenus/dynamic_menu.jl
+++ b/stdlib/REPL/test/TerminalMenus/dynamic_menu.jl
@@ -116,3 +116,36 @@ str = String(take!(io))
 nback, strs = linesplitter(str)
 @test nback == 3
 @test strs == ["^  3", "   4", "   5", " > 6*"]
+
+# Test with page size larger than number of options.
+# END_KEY, PAGE_DOWN, and ARROW_UP (from first element with scroll
+# wrap) used to be problematic. The last two are tested here, whereas
+# the first one is unreachable within the `request` function.
+menu = DynamicMenu(4, 0, -1, 2, TerminalMenus.Config(scroll_wrap = true))
+
+cursor = 1
+state = TerminalMenus.printmenu(io, menu, cursor; init=true)
+str = String(take!(io))
+@test count(isequal('\n'), str) == state
+nback, strs = linesplitter(str)
+@test nback == 0
+@test strs == [" > 1*", "   2"]
+
+cursor = TerminalMenus.page_down!(menu, cursor)
+@test cursor == menu.numopts
+@test menu.pageoffset == 0
+state = TerminalMenus.printmenu(io, menu, cursor; oldstate=state)
+str = String(take!(io))
+nback, strs = linesplitter(str)
+@test nback == 1
+@test strs == ["   1", " > 2*"]
+
+cursor = TerminalMenus.page_up!(menu, cursor)
+cursor = TerminalMenus.move_up!(menu, cursor)
+@test cursor == menu.numopts
+@test menu.pageoffset == 0
+state = TerminalMenus.printmenu(io, menu, cursor; oldstate=state)
+str = String(take!(io))
+nback, strs = linesplitter(str)
+@test nback == 1
+@test strs == ["   1", " > 2*"]
diff --git a/stdlib/REPL/test/docview.jl b/stdlib/REPL/test/docview.jl
index 22701ead7883d..897a69a9266ab 100644
--- a/stdlib/REPL/test/docview.jl
+++ b/stdlib/REPL/test/docview.jl
@@ -4,21 +4,17 @@ using Test
 import REPL, REPL.REPLCompletions
 import Markdown
 
-@testset "symbol completion" begin
-    @test startswith(let buf = IOBuffer()
-            Core.eval(Main, REPL.helpmode(buf, "α"))
-            String(take!(buf))
-        end, "\"α\" can be typed by \\alpha<tab>\n")
-
-    @test startswith(let buf = IOBuffer()
-            Core.eval(Main, REPL.helpmode(buf, "🐨"))
-            String(take!(buf))
-        end, "\"🐨\" can be typed by \\:koala:<tab>\n")
+function get_help_io(input)
+    buf = IOBuffer()
+    eval(REPL.helpmode(buf, input))
+    String(take!(buf))
+end
+get_help_standard(input) = string(eval(REPL.helpmode(IOBuffer(), input)))
 
-    @test startswith(let buf = IOBuffer()
-            Core.eval(Main, REPL.helpmode(buf, "ᵞ₁₂₃¹²³α"))
-            String(take!(buf))
-        end, "\"ᵞ₁₂₃¹²³α\" can be typed by \\^gamma<tab>\\_123<tab>\\^123<tab>\\alpha<tab>\n")
+@testset "symbol completion" begin
+    @test startswith(get_help_io("α"), "\"α\" can be typed by \\alpha<tab>\n")
+    @test startswith(get_help_io("🐨"), "\"🐨\" can be typed by \\:koala:<tab>\n")
+    @test startswith(get_help_io("ᵞ₁₂₃¹²³α"), "\"ᵞ₁₂₃¹²³α\" can be typed by \\^gamma<tab>\\_123<tab>\\^123<tab>\\alpha<tab>\n")
 
     # Check that all symbols with several completions have a canonical mapping (#39148)
     symbols = values(REPLCompletions.latex_symbols)
@@ -27,19 +23,16 @@ import Markdown
 end
 
 @testset "quoting in doc search" begin
-    str = let buf = IOBuffer()
-        Core.eval(Main, REPL.helpmode(buf, "mutable s"))
-        String(take!(buf))
-    end
+    str = get_help_io("mutable s")
     @test occursin("'mutable struct'", str)
     @test occursin("Couldn't find 'mutable s'", str)
 end
 
 @testset "Non-Markdown" begin
     # https://github.com/JuliaLang/julia/issues/37765
-    @test isa(REPL.insert_hlines(IOBuffer(), Markdown.Text("foo")), Markdown.Text)
+    @test isa(REPL.insert_hlines(Markdown.Text("foo")), Markdown.Text)
     # https://github.com/JuliaLang/julia/issues/37757
-    @test REPL.insert_hlines(IOBuffer(), nothing) === nothing
+    @test REPL.insert_hlines(nothing) === nothing
 end
 
 @testset "Check @var_str also completes to var\"\" in REPL.doc_completions()" begin
@@ -54,6 +47,15 @@ end
     # https://github.com/JunoLab/FuzzyCompletions.jl/issues/7
     # shouldn't throw when there is a space in a middle of query
     @test (REPL.matchinds("a ", "a file.txt"); true)
+    @test isapprox(REPL.fuzzyscore("abcdef", ""), 0.0; atol=0.001)
+    @test 0.8 < REPL.fuzzyscore(
+    "supercalifragilisticexpialidocious",
+    "bupercalifragilisticexpialidocious"
+    ) < 1.0
+
+    # Unicode
+    @test 1.0 > REPL.fuzzyscore("αkδψm", "αkδm") > 0.0
+    @test 1.0 > REPL.fuzzyscore("αkδψm", "α") > 0.0
 end
 
 @testset "Unicode doc lookup (#41589)" begin
@@ -65,3 +67,71 @@ end
     b = REPL.Binding(@__MODULE__, :R)
     @test REPL.summarize(b, Tuple{}) isa Markdown.MD
 end
+
+@testset "Struct field help (#51178)" begin
+    struct StructWithNoFields end
+    struct StructWithOneField
+        field1
+    end
+    struct StructWithTwoFields
+        field1
+        field2
+    end
+    struct StructWithThreeFields
+        field1
+        field2
+        field3
+    end
+
+    @test endswith(get_help_standard("StructWithNoFields.not_a_field"), "StructWithNoFields` has no fields.\n")
+    @test endswith(get_help_standard("StructWithOneField.not_a_field"), "StructWithOneField` has field `field1`.\n")
+    @test endswith(get_help_standard("StructWithTwoFields.not_a_field"), "StructWithTwoFields` has fields `field1`, and `field2`.\n")
+    @test endswith(get_help_standard("StructWithThreeFields.not_a_field"), "StructWithThreeFields` has fields `field1`, `field2`, and `field3`.\n")
+end
+
+module InternalWarningsTests
+
+    module A
+        public B, B3
+        module B
+            public e
+            c = 4
+            "d is 5"
+            d = 5
+            "e is 6"
+            e = 6
+        end
+
+        module B2
+            module C
+                public e
+                d = 1
+                "e is 2"
+                e = 2
+            end
+        end
+
+        module B3 end
+    end
+
+    using Test, REPL
+    @testset "internal warnings" begin
+        header = "!!! warning\n    The following bindings may be internal; they may change or be removed in future versions:\n\n"
+        prefix(warnings) = header * join("      * `$(@__MODULE__).$w`\n" for w in warnings) * "\n\n"
+        docstring(input) = string(eval(REPL.helpmode(IOBuffer(), input, @__MODULE__)))
+
+        @test docstring("A") == "No docstring or readme file found for internal module `$(@__MODULE__).A`.\n\n# Public names\n\n`B`, `B3`\n"
+        @test docstring("A.B") == "No docstring or readme file found for public module `$(@__MODULE__).A.B`.\n\n# Public names\n\n`e`\n"
+        @test startswith(docstring("A.B.c"), prefix(["A.B.c"]))
+        @test startswith(docstring("A.B.d"), prefix(["A.B.d"]))
+        @test docstring("A.B.e") == "e is 6\n"
+        @test startswith(docstring("A.B2"), prefix(["A.B2"]))
+        @test startswith(docstring("A.B2.C"), prefix(["A.B2", "A.B2.C"]))
+        @test startswith(docstring("A.B2.C.d"), prefix(["A.B2", "A.B2.C", "A.B2.C.d"]))
+        @test startswith(docstring("A.B2.C.e"), prefix(["A.B2", "A.B2.C"]))
+        @test docstring("A.B3") == "No docstring or readme file found for public module `$(@__MODULE__).A.B3`.\n\nModule does not have any public names.\n"
+    end
+end
+
+# Issue #51344, don't print "internal binding" warning for non-existent bindings.
+@test string(eval(REPL.helpmode("Base.no_such_symbol"))) == "No documentation found.\n\nBinding `Base.no_such_symbol` does not exist.\n"
diff --git a/stdlib/REPL/test/lineedit.jl b/stdlib/REPL/test/lineedit.jl
index 87028e239d5b8..cf87e811508a0 100644
--- a/stdlib/REPL/test/lineedit.jl
+++ b/stdlib/REPL/test/lineedit.jl
@@ -29,7 +29,7 @@ function transform!(f, s, i = -1) # i is char-based (not bytes) buffer position
     # simulate what happens in LineEdit.set_action!
     s isa LineEdit.MIState && (s.current_action = :unknown)
     status = f(s)
-    if s isa LineEdit.MIState && status != :ignore
+    if s isa LineEdit.MIState && status !== :ignore
         # simulate what happens in LineEdit.prompt!
         s.last_action = s.current_action
     end
@@ -306,21 +306,21 @@ seek(buf,0)
 
 ## edit_delete_prev_word ##
 
-buf = IOBuffer("type X\n ")
+buf = IOBuffer(Vector{UInt8}("type X\n "), read=true, write=true)
 seekend(buf)
 @test !isempty(@inferred(LineEdit.edit_delete_prev_word(buf)))
 @test position(buf) == 5
 @test buf.size == 5
 @test content(buf) == "type "
 
-buf = IOBuffer("4 +aaa+ x")
+buf = IOBuffer(Vector{UInt8}("4 +aaa+ x"), read=true, write=true)
 seek(buf,8)
 @test !isempty(LineEdit.edit_delete_prev_word(buf))
 @test position(buf) == 3
 @test buf.size == 4
 @test content(buf) == "4 +x"
 
-buf = IOBuffer("x = func(arg1,arg2 , arg3)")
+buf = IOBuffer(Vector{UInt8}("x = func(arg1,arg2 , arg3)"), read=true, write=true)
 seekend(buf)
 LineEdit.char_move_word_left(buf)
 @test position(buf) == 21
@@ -376,6 +376,8 @@ let buf = IOBuffer()
     LineEdit.edit_transpose_chars(buf)
     @test content(buf) == "βγαδε"
 
+
+    # Transposing a one-char buffer should behave like Emacs
     seek(buf, 0)
     @inferred(LineEdit.edit_clear(buf))
     edit_insert(buf, "a")
@@ -385,6 +387,13 @@ let buf = IOBuffer()
     LineEdit.edit_transpose_chars(buf)
     @test content(buf) == "a"
     @test position(buf) == 0
+
+    # Transposing an empty buffer shouldn't implode
+    seek(buf, 0)
+    LineEdit.edit_clear(buf)
+    LineEdit.edit_transpose_chars(buf)
+    @test content(buf) == ""
+    @test position(buf) == 0
 end
 
 @testset "edit_word_transpose" begin
@@ -465,7 +474,8 @@ end
 # julia> is 6 characters + 1 character for space,
 # so the rest of the terminal is 73 characters
 #########################################################################
-let buf = IOBuffer(
+withenv("COLUMNS"=>"80") do
+    buf = IOBuffer(
         "begin\nprint(\"A very very very very very very very very very very very very ve\")\nend")
     seek(buf, 4)
     outbuf = IOBuffer()
@@ -906,3 +916,27 @@ end
     @test get_last_word("a[b[]]") == "b"
     @test get_last_word("a[]") == "a[]"
 end
+
+@testset "show_completions" begin
+    term = FakeTerminal(IOBuffer(), IOBuffer(), IOBuffer())
+
+    function getcompletion(completions)
+        promptstate = REPL.LineEdit.init_state(term, REPL.LineEdit.mode(new_state()))
+        REPL.LineEdit.show_completions(promptstate, completions)
+        return String(take!(term.out_stream))
+    end
+
+    # When the number of completions is less than
+    # LineEdit.MULTICOLUMN_THRESHOLD, they should be in a single column.
+    strings = ["abcdef", "123456", "ijklmn"]
+    @assert length(strings) < LineEdit.MULTICOLUMN_THRESHOLD
+    @test getcompletion(strings) == "\033[0B\n\rabcdef\n\r123456\n\rijklmn\n"
+
+    # But with more than the threshold there should be multiple columns
+    strings2 = repeat(["foo"], LineEdit.MULTICOLUMN_THRESHOLD + 1)
+    @test getcompletion(strings2) == "\033[0B\n\rfoo\r\e[5Cfoo\n\rfoo\r\e[5Cfoo\n\rfoo\r\e[5Cfoo\n"
+
+    # Check that newlines in completions are handled correctly (issue #45836)
+    strings3 = ["abcdef", "123456\nijklmn"]
+    @test getcompletion(strings3) == "\033[0B\nabcdef\n123456\nijklmn\n"
+end
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index 6e4132aaab1cd..7896b589612e3 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -6,6 +6,10 @@ using Random
 import REPL.LineEdit
 using Markdown
 
+empty!(Base.Experimental._hint_handlers) # unregister error hints so they can be tested separately
+
+@test isassigned(Base.REPL_MODULE_REF)
+
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :FakePTYs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FakePTYs.jl"))
 import .Main.FakePTYs: with_fake_pty
@@ -16,7 +20,6 @@ include(joinpath(BASE_TEST_PATH, "testenv.jl"))
 include("FakeTerminals.jl")
 import .FakeTerminals.FakeTerminal
 
-
 function kill_timer(delay)
     # Give ourselves a generous timer here, just to prevent
     # this causing e.g. a CI hang when there's something unexpected in the output.
@@ -35,6 +38,33 @@ function kill_timer(delay)
     return Timer(kill_test, delay)
 end
 
+## Debugging toys. Usage:
+##   stdout_read = tee_repr_stdout(stdout_read)
+##   ccall(:jl_breakpoint, Cvoid, (Any,), stdout_read)
+#function tee(f, in::IO)
+#    copy = Base.BufferStream()
+#    t = @async try
+#        while !eof(in)
+#            l = readavailable(in)
+#            f(l)
+#            write(copy, l)
+#        end
+#    catch ex
+#        if !(ex isa Base.IOError && ex.code == Base.UV_EIO)
+#            rethrow() # ignore EIO on `in` stream
+#        end
+#    finally
+#        # TODO: could we call closewrite to propagate an error, instead of always doing a clean close here?
+#        closewrite(copy)
+#    end
+#    Base.errormonitor(t)
+#    return copy
+#end
+#tee(out::IO, in::IO) = tee(l -> write(out, l), in)
+#tee_repr_stdout(io) = tee(io) do x
+#    print(repr(String(copy(x))) * "\n")
+#end
+
 # REPL tests
 function fake_repl(@nospecialize(f); options::REPL.Options=REPL.Options(confirm_exit=false))
     # Use pipes so we can easily do blocking reads
@@ -99,8 +129,8 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
     end
 
     global inc = false
-    global b = Condition()
-    global c = Condition()
+    global b = Base.Event(true)
+    global c = Base.Event(true)
     let cmd = "\"Hello REPL\""
         write(stdin_write, "$(curmod_prefix)inc || wait($(curmod_prefix)b); r = $cmd; notify($(curmod_prefix)c); r\r")
     end
@@ -143,44 +173,46 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
             homedir_pwd = cd(pwd, homedir())
 
             # Test `cd`'ing to an absolute path
-            write(stdin_write, ";")
+            t = @async write(stdin_write, ";")
             readuntil(stdout_read, "shell> ")
-            write(stdin_write, "cd $(escape_string(tmpdir))\n")
+            wait(t)
+            t = @async write(stdin_write, "cd $(escape_string(tmpdir))\n")
             readuntil(stdout_read, "cd $(escape_string(tmpdir))")
-            readuntil(stdout_read, tmpdir_pwd)
-            readuntil(stdout_read, "\n")
-            readuntil(stdout_read, "\n")
+            readuntil(stdout_read, tmpdir_pwd * "\n\n")
+            wait(t)
             @test samefile(".", tmpdir)
             write(stdin_write, "\b")
 
             # Test using `cd` to move to the home directory
-            write(stdin_write, ";")
+            t = @async write(stdin_write, ";")
             readuntil(stdout_read, "shell> ")
-            write(stdin_write, "cd\n")
-            readuntil(stdout_read, homedir_pwd)
-            readuntil(stdout_read, "\n")
-            readuntil(stdout_read, "\n")
+            wait(t)
+            t = @async write(stdin_write, "cd\n")
+            readuntil(stdout_read, homedir_pwd * "\n\n")
+            wait(t)
             @test samefile(".", homedir_pwd)
-            write(stdin_write, "\b")
+            t1 = @async write(stdin_write, "\b")
 
             # Test using `-` to jump backward to tmpdir
-            write(stdin_write, ";")
+            t = @async write(stdin_write, ";")
             readuntil(stdout_read, "shell> ")
-            write(stdin_write, "cd -\n")
-            readuntil(stdout_read, tmpdir_pwd)
-            readuntil(stdout_read, "\n")
-            readuntil(stdout_read, "\n")
+            wait(t1)
+            wait(t)
+            t = @async write(stdin_write, "cd -\n")
+            readuntil(stdout_read, tmpdir_pwd * "\n\n")
+            wait(t)
             @test samefile(".", tmpdir)
-            write(stdin_write, "\b")
+            t1 = @async write(stdin_write, "\b")
 
             # Test using `~` (Base.expanduser) in `cd` commands
             if !Sys.iswindows()
-                write(stdin_write, ";")
+                t = @async write(stdin_write, ";")
                 readuntil(stdout_read, "shell> ")
-                write(stdin_write, "cd ~\n")
-                readuntil(stdout_read, homedir_pwd)
-                readuntil(stdout_read, "\n")
-                readuntil(stdout_read, "\n")
+                wait(t1)
+                wait(t)
+                t = @async write(stdin_write, "cd ~\n")
+                readuntil(stdout_read, homedir_pwd * "\n\n")
+                wait(t)
                 @test samefile(".", homedir_pwd)
                 write(stdin_write, "\b")
             end
@@ -203,9 +235,10 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
 
     # issue #20771
     let s
-        write(stdin_write, ";")
+        t = @async write(stdin_write, ";")
         readuntil(stdout_read, "shell> ")
-        write(stdin_write, "'\n") # invalid input
+        wait(t)
+        t = @async write(stdin_write, "'\n") # invalid input
         s = readuntil(stdout_read, "\n")
         @test occursin("shell> ", s) # check for the echo of the prompt
         @test occursin("'", s) # check for the echo of the input
@@ -213,26 +246,28 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
         @test startswith(s, "\e[0mERROR: unterminated single quote\nStacktrace:\n  [1] ") ||
               startswith(s, "\e[0m\e[1m\e[91mERROR: \e[39m\e[22m\e[91munterminated single quote\e[39m\nStacktrace:\n  [1] ")
         write(stdin_write, "\b")
+        wait(t)
     end
 
     # issue #27293
     if Sys.isunix()
         let s, old_stdout = stdout
-            write(stdin_write, ";")
+            t = @async write(stdin_write, ";")
             readuntil(stdout_read, "shell> ")
-            write(stdin_write, "echo ~")
-            s = readuntil(stdout_read, "~")
+            wait(t)
 
             proc_stdout_read, proc_stdout = redirect_stdout()
             get_stdout = @async read(proc_stdout_read, String)
             try
-                write(stdin_write, "\n")
+                t = @async write(stdin_write, "echo ~\n")
+                readuntil(stdout_read, "~")
                 readuntil(stdout_read, "\n")
-                s = readuntil(stdout_read, "\n")
+                s = readuntil(stdout_read, "\n") # the child has exited
+                wait(t)
             finally
                 redirect_stdout(old_stdout)
             end
-            @test s == "\e[0m" # the child has exited
+            @test s == "\e[0m"
             close(proc_stdout)
             # check for the correct, expanded response
             @test occursin(expanduser("~"), fetch(get_stdout))
@@ -261,28 +296,33 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
     # issue #10120
     # ensure that command quoting works correctly
     let s, old_stdout = stdout
-        write(stdin_write, ";")
+        t = @async write(stdin_write, ";")
         readuntil(stdout_read, "shell> ")
-        Base.print_shell_escaped(stdin_write, Base.julia_cmd().exec..., special=Base.shell_special)
-        write(stdin_write, """ -e "println(\\"HI\\")\"""")
+        wait(t)
+        t = @async begin
+            Base.print_shell_escaped(stdin_write, Base.julia_cmd().exec..., special=Base.shell_special)
+            write(stdin_write, """ -e "println(\\"HI\\")\"""")
+        end
         readuntil(stdout_read, ")\"")
+        wait(t)
         proc_stdout_read, proc_stdout = redirect_stdout()
         get_stdout = @async read(proc_stdout_read, String)
         try
-            write(stdin_write, '\n')
-            s = readuntil(stdout_read, "\n", keep=true)
-            if s == "\n"
+            t = @async write(stdin_write, '\n')
+            s = readuntil(stdout_read, "\n")
+            if s == ""
                 # if shell width is precisely the text width,
                 # we may print some extra characters to fix the cursor state
-                s = readuntil(stdout_read, "\n", keep=true)
+                s = readuntil(stdout_read, "\n")
                 @test occursin("shell> ", s)
-                s = readuntil(stdout_read, "\n", keep=true)
-                @test s == "\r\r\n"
+                s = readuntil(stdout_read, "\n")
+                @test s == "\r\r"
             else
                 @test occursin("shell> ", s)
             end
-            s = readuntil(stdout_read, "\n", keep=true)
-            @test s == "\e[0m\n" # the child has exited
+            s = readuntil(stdout_read, "\n")
+            @test s == "\e[0m" # the child printed nothing
+            wait(t)
         finally
             redirect_stdout(old_stdout)
         end
@@ -699,14 +739,22 @@ fake_repl() do stdin_write, stdout_read, repl
         REPL.run_repl(repl)
     end
 
-    global c = Condition()
-    sendrepl2(cmd) = write(stdin_write, "$cmd\n notify($(curmod_prefix)c)\n")
+    global c = Base.Event(true)
+    function sendrepl2(cmd)
+        t = @async readuntil(stdout_read, "\"done\"\n\n")
+        write(stdin_write, "$cmd\n notify($(curmod_prefix)c); \"done\"\n")
+        wait(c)
+        fetch(t)
+    end
 
     # Test removal of prefix in single statement paste
     sendrepl2("\e[200~julia> A = 2\e[201~\n")
-    wait(c)
     @test Main.A == 2
 
+    # Test removal of prefix in single statement paste
+    sendrepl2("\e[200~In [12]: A = 2.2\e[201~\n")
+    @test Main.A == 2.2
+
     # Test removal of prefix in multiple statement paste
     sendrepl2("""\e[200~
             julia> mutable struct T17599; a::Int; end
@@ -717,7 +765,6 @@ fake_repl() do stdin_write, stdout_read, repl
 
                     julia> A = 3\e[201~
              """)
-    wait(c)
     @test Main.A == 3
     @test Base.invokelatest(Main.foo, 4)
     @test Base.invokelatest(Main.T17599, 3).a == 3
@@ -730,26 +777,22 @@ fake_repl() do stdin_write, stdout_read, repl
             julia> A = 4
             4\e[201~
              """)
-    wait(c)
     @test Main.A == 4
     @test Base.invokelatest(Main.goo, 4) == 5
 
     # Test prefix removal only active in bracket paste mode
     sendrepl2("julia = 4\n julia> 3 && (A = 1)\n")
-    wait(c)
     @test Main.A == 1
 
     # Test that indentation corresponding to the prompt is removed
-    sendrepl2("""\e[200~julia> begin\n           α=1\n           β=2\n       end\n\e[201~""")
-    wait(c)
-    readuntil(stdout_read, "begin")
-    @test readuntil(stdout_read, "end", keep=true) == "\n\r\e[7C    α=1\n\r\e[7C    β=2\n\r\e[7Cend"
+    s = sendrepl2("""\e[200~julia> begin\n           α=1\n           β=2\n       end\n\e[201~""")
+    s2 = split(rsplit(s, "begin", limit=2)[end], "end", limit=2)[1]
+    @test s2 == "\n\r\e[7C    α=1\n\r\e[7C    β=2\n\r\e[7C"
+
     # for incomplete input (`end` below is added after the end of bracket paste)
-    sendrepl2("""\e[200~julia> begin\n           α=1\n           β=2\n\e[201~end""")
-    wait(c)
-    readuntil(stdout_read, "begin")
-    readuntil(stdout_read, "begin")
-    @test readuntil(stdout_read, "end", keep=true) == "\n\r\e[7C    α=1\n\r\e[7C    β=2\n\r\e[7Cend"
+    s = sendrepl2("""\e[200~julia> begin\n           α=1\n           β=2\n\e[201~end""")
+    s2 = split(rsplit(s, "begin", limit=2)[end], "end", limit=2)[1]
+    @test s2 == "\n\r\e[7C    α=1\n\r\e[7C    β=2\n\r\e[7C"
 
     # Test switching repl modes
     redirect_stdout(devnull) do # to suppress "foo" echoes
@@ -774,7 +817,6 @@ fake_repl() do stdin_write, stdout_read, repl
             julia> B = 2
             2\e[201~
              """)
-    wait(c)
     @test Main.A == 1
     @test Main.B == 2
     end # redirect_stdout
@@ -812,13 +854,13 @@ fake_repl() do stdin_write, stdout_read, repl
 
     repltask = @async REPL.run_interface(repl.t, LineEdit.ModalInterface(Any[panel, search_prompt]))
 
-    write(stdin_write,"a\n")
+    write(stdin_write, "a\n")
     @test wait(c) == "a"
     # Up arrow enter should recall history even at the start
-    write(stdin_write,"\e[A\n")
+    write(stdin_write, "\e[A\n")
     @test wait(c) == "a"
     # And again
-    write(stdin_write,"\e[A\n")
+    write(stdin_write, "\e[A\n")
     @test wait(c) == "a"
     # Close REPL ^D
     write(stdin_write, '\x04')
@@ -844,7 +886,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
 
         output = readuntil(ptm, ' ', keep=true)
         if Sys.iswindows()
-        # Our fake pty is actually a pipe, and thus lacks the input echo feature of posix
+            # Our fake pty is actually a pipe, and thus lacks the input echo feature of posix
             @test output == "1\n\njulia> "
         else
             @test output == "1\r\nexit()\r\n1\r\n\r\njulia> "
@@ -1080,16 +1122,18 @@ fake_repl() do stdin_write, stdout_read, repl
     end
 
     @eval Main module TestShowTypeREPL; export TypeA; struct TypeA end; end
-    write(stdin_write, "TestShowTypeREPL.TypeA\n")
-    @test endswith(readline(stdout_read), "\r\e[7CTestShowTypeREPL.TypeA\r\e[29C")
-    readline(stdout_read)
-    @test readline(stdout_read) == ""
+    t = @async write(stdin_write, "TestShowTypeREPL.TypeA\n")
+    s = readuntil(stdout_read, "\n\n")
+    s2 = rsplit(s, "\n", limit=2)[end]
+    @test s2 == "\e[0mMain.TestShowTypeREPL.TypeA"
+    wait(t)
     @eval Main using .TestShowTypeREPL
     readuntil(stdout_read, "julia> ", keep=true)
-    write(stdin_write, "TypeA\n")
-    @test endswith(readline(stdout_read), "\r\e[7CTypeA\r\e[12C")
-    readline(stdout_read)
-    @test readline(stdout_read) == ""
+    t = @async write(stdin_write, "TypeA\n")
+    s = readuntil(stdout_read, "\n\n")
+    s2 = rsplit(s, "\n", limit=2)[end]
+    @test s2 == "\e[0mTypeA"
+    wait(t)
 
     # Close REPL ^D
     readuntil(stdout_read, "julia> ", keep=true)
@@ -1107,24 +1151,23 @@ fake_repl() do stdin_write, stdout_read, repl
         REPL.run_repl(repl)
     end
 
-    write(stdin_write, "(123, Base.Fix1)\n")
-    @test occursin("julia> ", split(readline(stdout_read), "Base.Fix1")[2])
-    @test occursin("(123, Base.Fix1)", readline(stdout_read))
-    readline(stdout_read)
+    write(stdin_write, " ( 123 , Base.Fix1 , ) \n")
+    s = readuntil(stdout_read, "\n\n")
+    @test endswith(s, "(123, Base.Fix1)")
 
     repl.mistate.active_module = Base # simulate activate_module(Base)
-    write(stdin_write, "(456, Base.Fix2)\n")
-    @test occursin("(Base) julia> ", split(readline(stdout_read), "Base.Fix2")[2])
+    write(stdin_write, " ( 456 , Base.Fix2 , ) \n")
+    s = readuntil(stdout_read, "\n\n")
     # ".Base" prefix not shown here
-    @test occursin("(456, Fix2)", readline(stdout_read))
-    readline(stdout_read)
+    @test endswith(s, "(456, Fix2)")
 
     # Close REPL ^D
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     Base.wait(repltask)
 end
 
-help_result(line, mod::Module=Base) = mod.eval(REPL._helpmode(IOBuffer(), line))
+help_result(line, mod::Module=Base) = Core.eval(mod, REPL._helpmode(IOBuffer(), line, mod))
 
 # Docs.helpmode tests: we test whether the correct expressions are being generated here,
 # rather than complete integration with Julia's REPL mode system.
@@ -1163,7 +1206,9 @@ end
 @test occursin("broadcast", sprint(show, help_result(".<=")))
 
 # Issue 39427
-@test occursin("does not exist", sprint(show, help_result(":=")))
+@test occursin("does not exist.", sprint(show, help_result(":=")))
+global some_undef_global
+@test occursin("exists,", sprint(show, help_result("some_undef_global", @__MODULE__)))
 
 # Issue #40563
 @test occursin("does not exist", sprint(show, help_result("..")))
@@ -1207,6 +1252,7 @@ let emptyH1 = Markdown.parse("# "),
 end
 
 module BriefExtended
+public f, f_plain
 """
     f()
 
@@ -1248,15 +1294,18 @@ fake_repl() do stdin_write, stdout_read, repl
     repltask = @async begin
         REPL.run_repl(repl)
     end
-    write(stdin_write, "Expr(:call, GlobalRef(Base.Math, :float), Core.SlotNumber(1))\n")
+    t = @async write(stdin_write, "Expr(:call, GlobalRef(Base.Math, :float), Core.SlotNumber(1))\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0m:(Base.Math.float(_1))"
-    @test readline(stdout_read) == ""
+    s = readuntil(stdout_read, "\n\n")
+    @test endswith(s, "\e[0m:(Base.Math.float(_1))")
+    wait(t)
+
     readuntil(stdout_read, "julia> ", keep=true)
-    write(stdin_write, "ans\n")
+    t = @async write(stdin_write, "ans\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0m:(Base.Math.float(_1))"
-    @test readline(stdout_read) == ""
+    s = readuntil(stdout_read, "\n\n")
+    @test endswith(s, "\e[0m:(Base.Math.float(_1))")
+    wait(t)
     readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     Base.wait(repltask)
@@ -1267,17 +1316,21 @@ fake_repl() do stdin_write, stdout_read, repl
     repltask = @async begin
         REPL.run_repl(repl)
     end
-    write(stdin_write, "struct Errs end\n")
-    readline(stdout_read)
+    t = @async write(stdin_write, "struct Errs end\n")
+    readuntil(stdout_read, "\e[0m")
     readline(stdout_read)
+    wait(t)
     readuntil(stdout_read, "julia> ", keep=true)
-    write(stdin_write, "Base.show(io::IO, ::Errs) = throw(Errs())\n")
+    t = @async write(stdin_write, "Base.show(io::IO, ::Errs) = throw(Errs())\n")
     readline(stdout_read)
+    readuntil(stdout_read, "\e[0m")
     readline(stdout_read)
+    wait(t)
     readuntil(stdout_read, "julia> ", keep=true)
-    write(stdin_write, "Errs()\n")
-    readline(stdout_read)
+    t = @async write(stdin_write, "Errs()\n")
     readline(stdout_read)
+    readuntil(stdout_read, "\n\n")
+    wait(t)
     readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     wait(repltask)
@@ -1291,7 +1344,8 @@ fake_repl() do stdin_write, stdout_read, repl
     end
     write(stdin_write, "?;\n")
     readline(stdout_read)
-    @test endswith(readline(stdout_read), "search: ;")
+    s = readline(stdout_read)
+    @test endswith(s, "search: ;")
     readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     Base.wait(repltask)
@@ -1323,7 +1377,7 @@ fake_repl() do stdin_write, stdout_read, repl
     # necessary to read at least some part of the buffer,
     # for the "region_active" to have time to be updated
 
-    @test LineEdit.state(repl.mistate).region_active == :off
+    @test LineEdit.state(repl.mistate).region_active === :off
     @test s4 == "anything" # no control characters between the last two occurrences of "anything"
     write(stdin_write, "\x15\x04")
     Base.wait(repltask)
@@ -1432,14 +1486,14 @@ fake_repl() do stdin_write, stdout_read, repl
         REPL.run_repl(repl)
     end
     # initialize `err` to `nothing`
-    write(stdin_write, "global err = nothing\n")
-    readline(stdout_read)
-    readline(stdout_read) == "\e[0m"
+    t = @async (readline(stdout_read); readuntil(stdout_read, "\e[0m\n"))
+    write(stdin_write, "setglobal!(Base.MainInclude, :err, nothing)\n")
+    wait(t)
     readuntil(stdout_read, "julia> ", keep=true)
     # generate top-level error
     write(stdin_write, "foobar\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: foobar not defined"
+    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined in `Main`"
     @test readline(stdout_read) == ""
     readuntil(stdout_read, "julia> ", keep=true)
     # check that top-level error did not change `err`
@@ -1449,18 +1503,21 @@ fake_repl() do stdin_write, stdout_read, repl
     readuntil(stdout_read, "julia> ", keep=true)
     # generate deeper error
     write(stdin_write, "foo() = foobar\n")
+    readuntil(stdout_read, "\n\e[0m", keep=true)
     readline(stdout_read)
     readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, "foo()\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: foobar not defined"
+    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined in `Main`"
     readuntil(stdout_read, "julia> ", keep=true)
     # check that deeper error did set `err`
     write(stdin_write, "err\n")
     readline(stdout_read)
     @test readline(stdout_read) == "\e[0m1-element ExceptionStack:"
-    @test readline(stdout_read) == "UndefVarError: foobar not defined"
+    @test readline(stdout_read) == "UndefVarError: `foobar` not defined in `Main`"
     @test readline(stdout_read) == "Stacktrace:"
+    readuntil(stdout_read, "\n\n", keep=true)
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     Base.wait(repltask)
 end
@@ -1548,3 +1605,147 @@ fake_repl() do stdin_write, stdout_read, repl
     LineEdit.edit_input(s, input_f)
     @test buffercontents(LineEdit.buffer(s)) == "1234αβ56γ"
 end
+
+# Non standard output_prefix, tested via `numbered_prompt!`
+fake_repl() do stdin_write, stdout_read, repl
+    repl.interface = REPL.setup_interface(repl)
+
+    backend = REPL.REPLBackend()
+    repltask = @async begin
+        REPL.run_repl(repl; backend)
+    end
+
+    REPL.numbered_prompt!(repl, backend)
+
+    global c = Base.Event(true)
+    function sendrepl2(cmd, txt)
+        t = @async write(stdin_write, "$cmd\n notify($(curmod_prefix)c); \"done\"\n")
+        r = readuntil(stdout_read, txt, keep=true)
+        readuntil(stdout_read, "\"done\"\n\n", keep=true)
+        wait(c)
+        wait(t)
+        return r
+    end
+
+    s = sendrepl2("\"z\" * \"z\"\n", "\"zz\"")
+    @test contains(s, "In [1]")
+    @test endswith(s, "Out[1]: \"zz\"")
+
+    s = sendrepl2("\"y\" * \"y\"\n", "\"yy\"")
+    @test endswith(s, "Out[3]: \"yy\"")
+
+    s = sendrepl2("Out[1] * Out[3]\n", "\"zzyy\"")
+    @test endswith(s, "Out[5]: \"zzyy\"")
+
+    # test a top-level expression
+    s = sendrepl2("import REPL\n", "In [8]")
+    @test !contains(s, "ERROR")
+    @test !contains(s, "[6]")
+    @test !contains(s, "Out[7]:")
+    @test contains(s, "In [7]: ")
+    @test contains(s, "import REPL")
+    s = sendrepl2("REPL\n", "In [10]")
+    @test contains(s, "Out[9]: REPL")
+
+    # Test for https://github.com/JuliaLang/julia/issues/46451
+    s = sendrepl2("x_47878 = range(-1; stop = 1)\n", "-1:1")
+    @test contains(s, "Out[11]: -1:1")
+
+    # Test for https://github.com/JuliaLang/julia/issues/49041
+    s = sendrepl2("using Test; @test true", "In [14]")
+    @test !contains(s, "ERROR")
+    @test contains(s, "Test Passed")
+
+    # Test for https://github.com/JuliaLang/julia/issues/49319
+    s = sendrepl2("# comment", "In [16]")
+    @test !contains(s, "ERROR")
+
+    write(stdin_write, '\x04')
+    Base.wait(repltask)
+end
+
+fake_repl() do stdin_write, stdout_read, repl
+    backend = REPL.REPLBackend()
+    repltask = @async REPL.run_repl(repl; backend)
+    write(stdin_write,
+          "a = UInt8(81):UInt8(160); b = view(a, 1:64); c = reshape(b, (8, 8)); d = reinterpret(reshape, Float64, c); sqrteach(a) = [sqrt(x) for x in a]; sqrteach(d)\n\"ZZZZZ\"\n")
+    txt = readuntil(stdout_read, "ZZZZZ")
+    write(stdin_write, '\x04')
+    wait(repltask)
+    @test contains(txt, "Some type information was truncated. Use `show(err)` to see complete types.")
+end
+
+# Hints for tab completes
+
+fake_repl() do stdin_write, stdout_read, repl
+    repltask = @async begin
+        REPL.run_repl(repl)
+    end
+    write(stdin_write, "reada")
+    s1 = readuntil(stdout_read, "reada") # typed
+    s2 = readuntil(stdout_read, "vailable") # partial hint
+
+    write(stdin_write, "x") # "readax" doesn't tab complete so no hint
+    # we can't use readuntil given this doesn't print, so just wait for the hint state to be reset
+    while LineEdit.state(repl.mistate).hint !== nothing
+        sleep(0.1)
+    end
+    @test LineEdit.state(repl.mistate).hint === nothing
+
+    write(stdin_write, "\b") # only tab complete while typing forward
+    while LineEdit.state(repl.mistate).hint !== nothing
+        sleep(0.1)
+    end
+    @test LineEdit.state(repl.mistate).hint === nothing
+
+    write(stdin_write, "v")
+    s3 = readuntil(stdout_read, "ailable") # partial hint
+
+    write(stdin_write, "\t")
+    s4 = readuntil(stdout_read, "readavailable") # full completion is reprinted
+
+    write(stdin_write, "\x15")
+    write(stdin_write, "x") # single chars shouldn't hint e.g. `x` shouldn't hint at `xor`
+    while LineEdit.state(repl.mistate).hint !== nothing
+        sleep(0.1)
+    end
+    @test LineEdit.state(repl.mistate).hint === nothing
+
+    # issue #52376
+    write(stdin_write, "\x15")
+    write(stdin_write, "\\_ailuj")
+    while LineEdit.state(repl.mistate).hint !== nothing
+        sleep(0.1)
+    end
+    @test LineEdit.state(repl.mistate).hint === nothing
+    s5 = readuntil(stdout_read, "\\_ailuj")
+    write(stdin_write, "\t")
+    s6 = readuntil(stdout_read, "ₐᵢₗᵤⱼ")
+
+    write(stdin_write, "\x15\x04")
+    Base.wait(repltask)
+end
+## hints disabled
+fake_repl(options=REPL.Options(confirm_exit=false,hascolor=true,hint_tab_completes=false)) do stdin_write, stdout_read, repl
+    repltask = @async begin
+        REPL.run_repl(repl)
+    end
+    write(stdin_write, "reada")
+    s1 = readuntil(stdout_read, "reada") # typed
+    @test LineEdit.state(repl.mistate).hint === nothing
+
+    write(stdin_write, "\x15\x04")
+    Base.wait(repltask)
+    @test !occursin("vailable", String(readavailable(stdout_read)))
+end
+
+# banner
+let io = IOBuffer()
+    @test REPL.banner(io) === nothing
+    seek(io, 0)
+    @test countlines(io) == 9
+    take!(io)
+    @test REPL.banner(io; short=true) === nothing
+    seek(io, 0)
+    @test countlines(io) == 2
+end
diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl
index f584569519c22..2fe6c987de63d 100644
--- a/stdlib/REPL/test/replcompletions.jl
+++ b/stdlib/REPL/test/replcompletions.jl
@@ -4,12 +4,14 @@ using REPL.REPLCompletions
 using Test
 using Random
 using REPL
-    @testset "Check symbols previously not shown by REPL.doc_completions()" begin
+
+@testset "Check symbols previously not shown by REPL.doc_completions()" begin
     symbols = ["?","=","[]","[","]","{}","{","}",";","","'","&&","||","julia","Julia","new","@var_str"]
-        for i in symbols
-            @test i ∈ REPL.doc_completions(i, Main)
-        end
+    for i in symbols
+        @test i ∈ REPL.doc_completions(i, Main)
     end
+end
+
 let ex = quote
     module CompletionFoo
         using Random
@@ -23,6 +25,9 @@ let ex = quote
         end
         type_test = Test_x(Test_y(1))
         (::Test_y)() = "", ""
+        unicode_αβγ = Test_y(1)
+
+        Base.:(+)(x::Test_x, y::Test_y) = Test_x(Test_y(x.xx.yy + y.yy))
         module CompletionFoo2
 
         end
@@ -101,15 +106,27 @@ let ex = quote
         test11(x::Int, y::Int, z) = pass
         test11(_, _, s::String) = pass
 
+        test!12() = pass
+
         kwtest(; x=1, y=2, w...) = pass
         kwtest2(a; x=1, y=2, w...) = pass
+        kwtest3(a::Number; length, len2, foobar, kwargs...) = pass
+        kwtest3(a::Real; another!kwarg, len2) = pass
+        kwtest3(a::Integer; namedarg, foobar, slurp...) = pass
+        kwtest4(a::AbstractString; _a1b, x23) = pass
+        kwtest4(a::String; _a1b, xαβγ) = pass
+        kwtest4(a::SubString; x23, _something) = pass
+        kwtest5(a::Int, b, x...; somekwarg, somekotherkwarg) = pass
+        kwtest5(a::Char, b; xyz) = pass
+
+        const named = (; len2=3)
 
         array = [1, 1]
         varfloat = 0.1
 
         const tuple = (1, 2)
 
-        test_y_array=[CompletionFoo.Test_y(rand()) for i in 1:10]
+        test_y_array=[(@__MODULE__).Test_y(rand()) for i in 1:10]
         test_dict = Dict("abc"=>1, "abcd"=>10, :bar=>2, :bar2=>9, Base=>3,
                          occursin=>4, `ls`=>5, 66=>7, 67=>8, ("q",3)=>11,
                          "α"=>12, :α=>13)
@@ -120,7 +137,16 @@ let ex = quote
         macro testcmd_cmd(s) end
         macro tϵsτcmδ_cmd(s) end
 
-        end
+        var"complicated symbol with spaces" = 5
+
+        struct WeirdNames end
+        Base.propertynames(::WeirdNames) = (Symbol("oh no!"), Symbol("oh yes!"))
+
+        # https://github.com/JuliaLang/julia/issues/52551#issuecomment-1858543413
+        export exported_symbol
+        exported_symbol(::WeirdNames) = nothing
+
+        end # module CompletionFoo
         test_repl_comp_dict = CompletionFoo.test_dict
         test_repl_comp_customdict = CompletionFoo.test_customdict
         test_dict_ℂ = Dict(1=>2)
@@ -137,13 +163,27 @@ end
 test_complete(s) = map_completion_text(@inferred(completions(s, lastindex(s))))
 test_scomplete(s) =  map_completion_text(@inferred(shell_completions(s, lastindex(s))))
 test_bslashcomplete(s) =  map_completion_text(@inferred(bslash_completions(s, lastindex(s)))[2])
-test_complete_context(s, m) =  map_completion_text(@inferred(completions(s,lastindex(s), m)))
+test_complete_context(s, m=@__MODULE__) =  map_completion_text(@inferred(completions(s,lastindex(s), m)))
 test_complete_foo(s) = test_complete_context(s, Main.CompletionFoo)
 test_complete_noshift(s) = map_completion_text(@inferred(completions(s, lastindex(s), Main, false)))
 
+test_methods_list(@nospecialize(f), tt) = map(x -> string(x.method), Base._methods_by_ftype(Base.signature_type(f, tt), 10, Base.get_world_counter()))
+
+
 module M32377 end
 test_complete_32377(s) = map_completion_text(completions(s,lastindex(s), M32377))
 
+macro test_nocompletion(s)
+    tests = [
+        :(@test c == String[]),
+        :(@test res === false)
+    ]
+    for t in tests
+        t.args[2] = __source__ # fix the LineNumberNode
+    end
+    return Expr(:let, Expr(:(=), :((c, _, res)), :(test_complete($(esc(s))))), Expr(:block, tests...))
+end
+
 let s = ""
     c, r = test_complete(s)
     @test "CompletionFoo" in c
@@ -222,6 +262,11 @@ let s = "Main.CompletionFoo.type_test.x"
     @test s[r] == "x"
 end
 
+let s = "Main.CompletionFoo.unicode_αβγ.y"
+    c, r = test_complete(s)
+    @test "yy" in c
+end
+
 let s = "Main.CompletionFoo.bar.no_val_available"
     c, r = test_complete(s)
     @test length(c)==0
@@ -270,23 +315,23 @@ let
     @test isempty(c)
 end
 
-# inexistent completion inside a string
-let s = "Base.print(\"lol"
-    c, r, res = test_complete(s)
-    @test res == false
+# issue 46800: (3,2).<TAB> errors in the REPL
+let
+    c, r = test_complete("(3,2).")
+    @test isempty(c)
 end
 
+# inexistent completion inside a string
+@test_nocompletion("Base.print(\"lol")
+
 # inexistent completion inside a cmd
-let s = "run(`lol"
-    c, r, res = test_complete(s)
-    @test res == false
-end
+@test_nocompletion("run(`lol")
 
 # test latex symbol completions
 let s = "\\alpha"
     c, r = test_bslashcomplete(s)
     @test c[1] == "α"
-    @test r == 1:length(s)
+    @test r == 1:lastindex(s)
     @test length(c) == 1
 end
 
@@ -395,8 +440,9 @@ end
 let s = "CompletionFoo.test(1, 1, "
     c, r, res = test_complete(s)
     @test !res
-    @test c[1] == string(first(methods(Main.CompletionFoo.test, Tuple{Int, Int})))
-    @test c[2] == string(first(methods(Main.CompletionFoo.test, Tuple{}))) # corresponding to the vararg
+    m = test_methods_list(Main.CompletionFoo.test, Tuple{Int, Int, Vararg})
+    @test c[1] == m[1]
+    @test c[2] == m[2]
     @test length(c) == 2
     # In particular, this checks that test(x::Real, y::Real) is not a valid completion
     # since it is strictly less specific than test(x::T, y::T) where T
@@ -407,7 +453,7 @@ end
 let s = "CompletionFoo.test(CompletionFoo.array,"
     c, r, res = test_complete(s)
     @test !res
-    @test c[1] == string(first(methods(Main.CompletionFoo.test, Tuple{Array{Int, 1}, Any})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test, Tuple{Array{Int, 1}, Any, Vararg}))
     @test length(c) == 2
     @test r == 1:18
     @test s[r] == "CompletionFoo.test"
@@ -416,7 +462,7 @@ end
 let s = "CompletionFoo.test(1,1,1,"
     c, r, res = test_complete(s)
     @test !res
-    @test c[1] == string(first(methods(Main.CompletionFoo.test, Tuple{Any, Any, Any})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test, Tuple{Any, Any, Any, Vararg}))
     @test length(c) == 1
     @test r == 1:18
     @test s[r] == "CompletionFoo.test"
@@ -440,7 +486,7 @@ end
 
 let s = "prevind(\"θ\",1,"
     c, r, res = test_complete(s)
-    @test c[1] == string(first(methods(prevind, Tuple{String, Int})))
+    @test c[1] == first(test_methods_list(prevind, Tuple{String, Int, Vararg}))
     @test r == 1:7
     @test s[r] == "prevind"
 end
@@ -449,7 +495,7 @@ for (T, arg) in [(String,"\")\""),(Char, "')'")]
     s = "(1, CompletionFoo.test2($arg,"
     c, r, res = test_complete(s)
     @test length(c) == 1
-    @test c[1] == string(first(methods(Main.CompletionFoo.test2, Tuple{T,})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test2, Tuple{T, Vararg}))
     @test r == 5:23
     @test s[r] == "CompletionFoo.test2"
 end
@@ -457,19 +503,19 @@ end
 let s = "(1, CompletionFoo.test2(`')'`,"
     c, r, res = test_complete(s)
     @test length(c) == 1
-    @test c[1] == string(first(methods(Main.CompletionFoo.test2, Tuple{Cmd})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test2, Tuple{Cmd, Vararg}))
 end
 
 let s = "CompletionFoo.test3([1, 2] .+ CompletionFoo.varfloat,"
     c, r, res = test_complete(s)
     @test !res
-    @test_broken only(c) == string(first(methods(Main.CompletionFoo.test3, Tuple{Array{Float64, 1}, Float64})))
+    @test only(c) == first(test_methods_list(Main.CompletionFoo.test3, Tuple{Array{Float64, 1}, Float64, Vararg}))
 end
 
 let s = "CompletionFoo.test3([1.,2.], 1.,"
     c, r, res = test_complete(s)
     @test !res
-    @test c[1] == string(first(methods(Main.CompletionFoo.test3, Tuple{Array{Float64, 1}, Float64})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test3, Tuple{Array{Float64, 1}, Float64, Vararg}))
     @test r == 1:19
     @test length(c) == 1
     @test s[r] == "CompletionFoo.test3"
@@ -478,7 +524,7 @@ end
 let s = "CompletionFoo.test4(\"e\",r\" \","
     c, r, res = test_complete(s)
     @test !res
-    @test c[1] == string(first(methods(Main.CompletionFoo.test4, Tuple{String, Regex})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test4, Tuple{String, Regex, Vararg}))
     @test r == 1:19
     @test length(c) == 1
     @test s[r] == "CompletionFoo.test4"
@@ -489,7 +535,7 @@ end
 let s = "CompletionFoo.test5(broadcast((x,y)->x==y, push!(Base.split(\"\",' '),\"\",\"\"), \"\"),"
     c, r, res = test_complete(s)
     @test !res
-    @test_broken only(c) == string(first(methods(Main.CompletionFoo.test5, Tuple{BitArray{1}})))
+    @test_broken only(c) == first(test_methods_list(Main.CompletionFoo.test5, Tuple{BitArray{1}, Vararg}))
 end
 
 # test partial expression expansion
@@ -497,17 +543,17 @@ let s = "CompletionFoo.test5(Bool[x==1 for x=1:4],"
     c, r, res = test_complete(s)
     @test !res
     @test length(c) == 1
-    @test c[1] == string(first(methods(Main.CompletionFoo.test5, Tuple{Array{Bool,1}})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test5, Tuple{Array{Bool,1}, Vararg}))
 end
 
 let s = "CompletionFoo.test4(CompletionFoo.test_y_array[1]()[1], CompletionFoo.test_y_array[1]()[2], "
     c, r, res = test_complete(s)
     @test !res
     @test length(c) == 1
-    @test c[1] == string(first(methods(Main.CompletionFoo.test4, Tuple{String, String})))
+    @test c[1] == first(test_methods_list(Main.CompletionFoo.test4, Tuple{String, String, Vararg}))
 end
 
-# Test that string escaption is handled correct
+# Test that string escaping is handled correct
 let s = """CompletionFoo.test4("\\"","""
     c, r, res = test_complete(s)
     @test !res
@@ -525,22 +571,17 @@ let s = "convert("
     @test length(c2) > REPL.REPLCompletions.MAX_METHOD_COMPLETIONS
 end
 
-########## Test where the current inference logic fails ########
-# Fails due to inference fails to determine a concrete type for arg 1
-# But it returns AbstractArray{T,N} and hence is able to remove test5(x::Float64) from the suggestions
-let s = "CompletionFoo.test5(AbstractArray[[]][1],"
+let s = "CompletionFoo.test5(AbstractArray[Bool[]][1],"
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 2
+    @test length(c) == 1
 end
 
-# equivalent to above but due to the time macro the completion fails to find the concrete type
-let s = "CompletionFoo.test3(@time([1, 2] + CompletionFoo.varfloat),"
+let s = "CompletionFoo.test3(@time([1, 2] .+ CompletionFoo.varfloat),"
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 2
+    @test length(c) == 1
 end
-#################################################################
 
 # method completions with kwargs
 let s = "CompletionFoo.kwtest( "
@@ -548,27 +589,58 @@ let s = "CompletionFoo.kwtest( "
     @test !res
     @test length(c) == 1
     @test occursin("x, y, w...", c[1])
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest(;")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest(; x=1, ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest(; kw=1, ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest(x=1, ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest(x=1; ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest(x=kw=1, ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest(; x=kw=1, ")
 end
 
-for s in ("CompletionFoo.kwtest(;",
-          "CompletionFoo.kwtest(; x=1, ",
-          "CompletionFoo.kwtest(; kw=1, ",
-          )
+let s = "CompletionFoo.kwtest2(1, x=1,"
     c, r, res = test_complete(s)
     @test !res
     @test length(c) == 1
-    @test occursin("x, y, w...", c[1])
+    @test occursin("a; x, y, w...", c[1])
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest2(1; x=1, ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest2(1, x=1; ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest2(1, kw=1, ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest2(1; kw=1, ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest2(1, kw=1; ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest2(y=3, 1, ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest2(y=3, 1; ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest2(kw=3, 1, ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest2(kw=3, 1; ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest2(1; ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest2(1, ")
+end
+
+let s = "CompletionFoo.kwtest4(x23=18, x; "
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 3 # TODO: remove "kwtest4(a::String; _a1b, xαβγ)"
+    @test any(str->occursin("kwtest4(a::SubString", str), c)
+    @test any(str->occursin("kwtest4(a::AbstractString", str), c)
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest4(x23=18, x, ")
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest4(x23=18, ")
 end
 
-for s in ("CompletionFoo.kwtest2(1; x=1,",
-          "CompletionFoo.kwtest2(1; kw=1, ",
-          )
+# TODO: @test_nocompletion("CompletionFoo.kwtest4(x23=17; ")
+# TODO: @test_nocompletion("CompletionFoo.kwtest4.(x23=17; ")
+
+let s = "CompletionFoo.kwtest5(3, somekwarg=6,"
     c, r, res = test_complete(s)
     @test !res
     @test length(c) == 1
-    @test occursin("a; x, y, w...", c[1])
+    @test occursin("kwtest5(a::$(Int), b, x...; somekwarg, somekotherkwarg)", c[1])
+    @test (c, r, res) == test_complete("CompletionFoo.kwtest5(3, somekwarg=6, anything, ")
 end
 
+# TODO: @test_nocompletion("CompletionFoo.kwtest5(3; somekwarg=6,")
+# TODO: @test_nocompletion("CompletionFoo.kwtest5(3;")
+# TODO: @test_nocompletion("CompletionFoo.kwtest5(3; somekwarg=6, anything, ")
+
 #################################################################
 
 # method completion with `?` (arbitrary method with given argument types)
@@ -644,6 +716,39 @@ let s = "CompletionFoo.?()"
     @test occursin("test10(s::String...)", c[1])
 end
 
+#= TODO: restrict the number of completions when a semicolon is present in ".?(" syntax
+let s = "CompletionFoo.?(; y=2, "
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 4
+    @test all(x -> occursin("kwtest", x), c)
+    # We choose to include kwtest2 and kwtest3 although the number of args if wrong.
+    # This is because the ".?(" syntax with no closing parenthesis does not constrain the
+    # number of arguments in the methods it suggests.
+end
+
+let s = "CompletionFoo.?(3; len2=5, "
+    c, r, res = test_complete_noshift(s)
+    @test !res
+    @test length(c) == 1
+    @test occursin("kwtest3(a::Integer; namedarg, foobar, slurp...)", c[1])
+    # the other two kwtest3 methods should not appear because of specificity
+end
+=#
+
+# For the ".?(" syntax, do not constrain the number of arguments even with a semicolon.
+@test test_complete("CompletionFoo.?(; ") ==
+      test_complete("CompletionFoo.?(")
+
+#TODO: @test test_complete("CompletionFoo.?(Any[]...; ") == test_complete("CompletionFoo.?(Cmd[]..., ") == test_complete("CompletionFoo.?(")
+
+@test test_complete("CompletionFoo.?()") == test_complete("CompletionFoo.?(;)")
+
+#TODO: @test_nocompletion("CompletionFoo.?(3; len2=5; ")
+
+# https://github.com/JuliaLang/julia/issues/52551
+@test !isempty(test_complete("?("))
+
 #################################################################
 
 # Test method completion with varargs
@@ -696,7 +801,7 @@ end
 let s = "CompletionFoo.test10(\"a\", Union{Signed,Bool,String}[3][1], "
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 4
+    @test length(c) == 2
     @test all(startswith("test10("), c)
     @test allunique(c)
     @test !any(str->occursin("test10(a::Integer, b::Integer, c)", str), c)
@@ -706,7 +811,7 @@ end
 let s = "CompletionFoo.test11(Integer[false][1], Integer[14][1], "
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 4
+    @test length(c) == 3
     @test all(startswith("test11("), c)
     @test allunique(c)
 end
@@ -714,16 +819,16 @@ end
 let s = "CompletionFoo.test11(Integer[-7][1], Integer[0x6][1], 6,"
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 3
+    @test length(c) == 2
     @test any(str->occursin("test11(a::Integer, b, c)", str), c)
     @test any(str->occursin("test11(u, v::Integer, w)", str), c)
-    @test any(str->occursin("test11(x::$Int, y::$Int, z)", str), c)
+    @test !any(str->occursin("test11(x::$Int, y::$Int, z)", str), c)
 end
 
 let s = "CompletionFoo.test11(3, 4,"
     c, r, res = test_complete(s)
     @test !res
-    @test length(c) == 4
+    @test length(c) == 2
     @test any(str->occursin("test11(x::$Int, y::$Int, z)", str), c)
     @test any(str->occursin("test11(::Any, ::Any, s::String)", str), c)
 end
@@ -753,6 +858,56 @@ let s = "CompletionFoo.test11('d', 3,"
     @test any(str->occursin("test11(::Any, ::Any, s::String)", str), c)
 end
 
+let s = "CompletionFoo.test!12("
+    c, r, res = test_complete(s)
+    @test !res
+    @test occursin("test!12()", only(c))
+end
+
+#= TODO: Test method completion depending on the number of arguments with splatting
+
+@test_nocompletion("CompletionFoo.test3(unknown; ")
+@test_nocompletion("CompletionFoo.test3.(unknown; ")
+
+let s = "CompletionFoo.test2(unknown..., somethingelse..., xyz...; " # splat may be empty
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 3
+    @test all(str->occursin("test2(", str), c)
+    @test (c, r, res) == test_complete("CompletionFoo.test2(unknown..., somethingelse..., xyz, ")
+    @test (c, r, res) == test_complete("CompletionFoo.test2(unknown..., somethingelse..., xyz; ")
+end
+
+let s = "CompletionFoo.test('a', args..., 'b';"
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 1
+    @test occursin("test(args...)", c[1])
+    @test (c, r, res) == test_complete("CompletionFoo.test(a, args..., b, c;")
+end
+
+let s = "CompletionFoo.test(3, 5, args...,;"
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 2
+    @test any(str->occursin("test(x::T, y::T) where T<:Real", str), c)
+    @test any(str->occursin("test(args...)", str), c)
+end
+=#
+
+# Test that method calls with ill-formed kwarg syntax are not completed
+
+@test_nocompletion("CompletionFoo.kwtest(; x=2, y=4; kw=3, ")
+@test_nocompletion("CompletionFoo.kwtest(x=2; y=4; ")
+@test_nocompletion("CompletionFoo.kwtest((x=y)=4, ")
+@test_nocompletion("CompletionFoo.kwtest(; (x=y)=4, ")
+@test_nocompletion("CompletionFoo.kwtest(; w...=16, ")
+@test_nocompletion("CompletionFoo.kwtest(; 2, ")
+@test_nocompletion("CompletionFoo.kwtest(; 2=3, ")
+@test_nocompletion("CompletionFoo.kwtest3(im; (true ? length : length), ")
+@test_nocompletion("CompletionFoo.kwtest.(x=2; y=4; ")
+@test_nocompletion("CompletionFoo.kwtest.(; w...=16, ")
+
 # Test of inference based getfield completion
 let s = "(1+2im)."
     c,r = test_complete(s)
@@ -789,6 +944,13 @@ let s = "CompletionFoo.test6()[1](CompletionFoo.Test_y(rand())).y"
     @test c[1] == "yy"
 end
 
+let s = "CompletionFoo.named."
+    c, r = test_complete(s)
+    @test length(c) == 1
+    @test r == (lastindex(s) + 1):lastindex(s)
+    @test c[1] == "len2"
+end
+
 # Test completion in multi-line comments
 let s = "#=\n\\alpha"
     c, r, res = test_complete(s)
@@ -888,7 +1050,7 @@ let s, c, r
     s = "@show \"/dev/nul\""
     c,r = completions(s, 15)
     c = map(completion_text, c)
-    @test "null" in c
+    @test "null\"" in c
     @test r == 13:15
     @test s[r] == "nul"
 
@@ -912,8 +1074,8 @@ let s, c, r
     if !isdir(joinpath(s, "tmp"))
         c,r = test_scomplete(s)
         @test !("tmp/" in c)
-        @test r === length(s) + 1:0
-        @test s[r] == ""
+        @test !("$s/tmp/" in c)
+        @test r === (sizeof(s) + 1):sizeof(s)
     end
 
     s = "cd \$(Iter"
@@ -938,7 +1100,7 @@ let s, c, r
         touch(file)
         s = string(tempdir(), "/repl\\ ")
         c,r = test_scomplete(s)
-        @test ["repl\\ completions"] == c
+        @test ["'repl completions'"] == c
         @test s[r] == "repl\\ "
         rm(file)
     end
@@ -958,22 +1120,29 @@ let s, c, r
             s = "\"~"
             @test "tmpfoobar/" in c
             c,r = test_complete(s)
+            s = "\"~user"
+            c, r = test_complete(s)
+            @test isempty(c)
             rm(dir)
         end
     end
 
     # Tests detecting of files in the env path (in shell mode)
-    let path, file
-        path = tempdir()
-        unreadable = joinpath(tempdir(), "replcompletion-unreadable")
+    mktempdir() do path
+        unreadable = joinpath(path, "replcompletion-unreadable")
+        file = joinpath(path, "tmp-executable")
+        touch(file)
+        chmod(file, 0o755)
+        mkdir(unreadable)
+        hidden_file = joinpath(unreadable, "hidden")
+        touch(hidden_file)
 
-        try
-            file = joinpath(path, "tmp-executable")
-            touch(file)
-            chmod(file, 0o755)
-            mkdir(unreadable)
-            chmod(unreadable, 0o000)
+        # Create symlink to a file that is in an unreadable directory
+        chmod(hidden_file, 0o755)
+        chmod(unreadable, 0o000)
+        symlink(hidden_file, joinpath(path, "replcompletions-link"))
 
+        try
             # PATH can also contain folders which we aren't actually allowed to read.
             withenv("PATH" => string(path, ":", unreadable)) do
                 s = "tmp-execu"
@@ -981,10 +1150,13 @@ let s, c, r
                 @test "tmp-executable" in c
                 @test r == 1:9
                 @test s[r] == "tmp-execu"
+
+                c,r = test_scomplete("replcompletions-link")
+                @test isempty(c)
             end
         finally
-            rm(file)
-            rm(unreadable)
+            # If we don't fix the permissions here, our cleanup fails.
+            chmod(unreadable, 0o700)
         end
     end
 
@@ -1029,7 +1201,7 @@ let current_dir, forbidden
             catch e
                 e isa Base.IOError && occursin("ELOOP", e.msg)
             end
-            c, r = test_complete("\"$(joinpath(path, "selfsym"))")
+            c, r = test_complete("\"$(escape_string(path))/selfsym")
             @test c == ["selfsymlink"]
         end
     end
@@ -1059,26 +1231,62 @@ end
 mktempdir() do path
     space_folder = randstring() * " α"
     dir = joinpath(path, space_folder)
-    dir_space = replace(space_folder, " " => "\\ ")
-
     mkdir(dir)
     cd(path) do
-        open(joinpath(space_folder, "space .file"),"w") do f
-            s = Sys.iswindows() ? "rm $dir_space\\\\space" : "cd $dir_space/space"
-            c, r = test_scomplete(s)
-            @test r == lastindex(s)-4:lastindex(s)
-            @test "space\\ .file" in c
+        touch(joinpath(space_folder, "space .file"))
+
+        dir_space = replace(space_folder, " " => "\\ ")
+        s = Sys.iswindows() ? "cd $dir_space\\\\space" : "cd $dir_space/space"
+        c, r = test_scomplete(s)
+        @test s[r] == (Sys.iswindows() ? "$dir_space\\\\space" : "$dir_space/space")
+        @test "'$space_folder'/'space .file'" in c
+        # Also use shell escape rules within cmd backticks
+        s = "`$s"
+        c, r = test_scomplete(s)
+        @test s[r] == (Sys.iswindows() ? "$dir_space\\\\space" : "$dir_space/space")
+        @test "'$space_folder'/'space .file'" in c
+
+        # escape string according to Julia escaping rules
+        julia_esc(str) = REPL.REPLCompletions.do_string_escape(str)
+
+        # For normal strings the string should be properly escaped according to
+        # the usual rules for Julia strings.
+        s = "cd(\"" * julia_esc(joinpath(path, space_folder) * "/space")
+        c, r = test_complete(s)
+        @test s[r] == "space"
+        @test "space .file\"" in c
+
+        # '$' is the only character which can appear in a windows filename and
+        # which needs to be escaped in Julia strings (on unix we could do this
+        # test with all sorts of special chars)
+        touch(joinpath(space_folder, "needs_escape\$.file"))
+        escpath = julia_esc(joinpath(path, space_folder) * "/needs_escape\$")
+        s = "cd(\"$escpath"
+        c, r = test_complete(s)
+        @test s[r] == "needs_escape\\\$"
+        @test "needs_escape\\\$.file\"" in c
+
+        if !Sys.iswindows()
+            touch(joinpath(space_folder, "needs_escape2\n\".file"))
+            escpath = julia_esc(joinpath(path, space_folder, "needs_escape2\n\""))
+            s = "cd(\"$escpath"
+            c, r = test_complete(s)
+            @test s[r] == "needs_escape2\\n\\\""
+            @test "needs_escape2\\n\\\".file\"" in c
 
-            s = Sys.iswindows() ? "cd(\"β $dir_space\\\\space" : "cd(\"β $dir_space/space"
+            touch(joinpath(space_folder, "needs_escape3\\.file"))
+            escpath = julia_esc(joinpath(path, space_folder, "needs_escape3\\"))
+            s = "cd(\"$escpath"
             c, r = test_complete(s)
-            @test r == lastindex(s)-4:lastindex(s)
-            @test "space .file\"" in c
+            @test s[r] == "needs_escape3\\\\"
+            @test "needs_escape3\\\\.file\"" in c
         end
+
         # Test for issue #10324
-        s = "cd(\"$dir_space"
+        s = "cd(\"$space_folder"
         c, r = test_complete(s)
-        @test r == 5:15
-        @test s[r] ==  dir_space
+        @test r == 5:14
+        @test s[r] == space_folder
 
         #Test for #18479
         for c in "'`@\$;&"
@@ -1089,11 +1297,12 @@ mktempdir() do path
                     # in shell commands the shell path completion cannot complete
                     # paths with these characters
                     c, r, res = test_scomplete(test_dir)
-                    @test c[1] == test_dir*(Sys.iswindows() ? "\\\\" : "/")
+                    @test c[1] == "'$test_dir/'"
                     @test res
                 end
-                c, r, res = test_complete("\""*test_dir)
-                @test c[1] == test_dir*(Sys.iswindows() ? "\\\\" : "/")
+                escdir = julia_esc(test_dir)
+                c, r, res = test_complete("\""*escdir)
+                @test c[1] == escdir * "/"
                 @test res
             finally
                 rm(joinpath(path, test_dir), recursive=true)
@@ -1129,27 +1338,43 @@ if Sys.iswindows()
     cd(path) do
         s = "cd ..\\\\"
         c,r = test_scomplete(s)
-        @test r == length(s)+1:length(s)
-        @test temp_name * "\\\\" in c
+        @test r == lastindex(s)-3:lastindex(s)
+        @test "../$temp_name/" in c
+
+        s = "cd ../"
+        c,r = test_scomplete(s)
+        @test r == lastindex(s)+1:lastindex(s)
+        @test "$temp_name/" in c
 
         s = "ls $(file[1:2])"
         c,r = test_scomplete(s)
-        @test r == length(s)-1:length(s)
+        @test r == lastindex(s)-1:lastindex(s)
         @test file in c
 
-        s = "cd(\"..\\"
+        s = "cd(\"..\\\\"
         c,r = test_complete(s)
-        @test r == length(s)+1:length(s)
-        @test temp_name * "\\\\" in c
+        @test r == lastindex(s)-3:lastindex(s)
+        @test "../$temp_name/" in c
+
+        s = "cd(\"../"
+        c,r = test_complete(s)
+        @test r == lastindex(s)+1:lastindex(s)
+        @test "$temp_name/" in c
 
         s = "cd(\"$(file[1:2])"
         c,r = test_complete(s)
-        @test r == length(s) - 1:length(s)
+        @test r == lastindex(s) - 1:lastindex(s)
         @test (length(c) > 1 && file in c) || (["$file\""] == c)
     end
     rm(tmp)
 end
 
+# issue 51985
+let s = "`\\"
+    c,r = test_scomplete(s)
+    @test r == lastindex(s)+1:lastindex(s)
+end
+
 # auto completions of true and false... issue #14101
 let s = "tru"
     c, r, res = test_complete(s)
@@ -1260,6 +1485,129 @@ test_dict_completion("test_repl_comp_customdict")
     @test "tϵsτcmδ`" in c
 end
 
+@testset "Keyword-argument completion" begin
+    c, r = test_complete("CompletionFoo.kwtest3(a;foob")
+    @test c == ["foobar="]
+    c, r = test_complete("CompletionFoo.kwtest3(a; le")
+    @test "length" ∉ c
+    @test "length=" ∈ c
+    @test "len2=" ∈ c
+    @test "len2" ∉ c
+    c, r = test_complete("CompletionFoo.kwtest3.(a;\nlength")
+    @test "length" ∉ c
+    @test "length=" ∈ c
+    c, r = test_complete("CompletionFoo.kwtest3(a, length=4, l")
+    @test "length" ∈ c
+    @test "length=" ∉ c # since it was already used, do not suggest it again
+    @test "len2=" ∈ c
+    c, r = test_complete("CompletionFoo.kwtest3(a; kwargs..., fo")
+    @test "foreach" ∉ c
+    @test "foobar=" ∈ c
+    c, r = test_complete("CompletionFoo.kwtest3(a; another!kwarg=0, le")
+    @test "length" ∉ c
+    @test "length=" ∈ c # the first method could be called and `anotherkwarg` slurped
+    @test "len2=" ∈ c
+    c, r = test_complete("CompletionFoo.kwtest3(a; another!")
+    @test c == ["another!kwarg="]
+    c, r = test_complete("CompletionFoo.kwtest3(a; another!kwarg=0, foob")
+    @test c == ["foobar="] # the first method could be called and `anotherkwarg` slurped
+    c, r = test_complete("CompletionFoo.kwtest3(a; namedarg=0, foob")
+    @test c == ["foobar="]
+
+    # Check for confusion with CompletionFoo.named
+    c, r = test_complete_foo("kwtest3(blabla; unknown=4, namedar")
+    @test c == ["namedarg="]
+    c, r = test_complete_foo("kwtest3(blabla; named")
+    @test "named" ∉ c
+    @test "namedarg=" ∈ c
+    @test "len2" ∉ c
+    c, r = test_complete_foo("kwtest3(blabla; named.")
+    @test c == ["len2"]
+    c, r = test_complete_foo("kwtest3(blabla; named..., another!")
+    @test c == ["another!kwarg="]
+    c, r = test_complete_foo("kwtest3(blabla; named..., len")
+    @test "length" ∉ c
+    @test "length=" ∈ c
+    @test "len2=" ∈ c
+    c, r = test_complete_foo("kwtest3(1+3im; named")
+    @test "named" ∉ c
+    # TODO: @test "namedarg=" ∉ c
+    @test "len2" ∉ c
+    c, r = test_complete_foo("kwtest3(1+3im; named.")
+    @test c == ["len2"]
+
+    c, r = test_complete("CompletionFoo.kwtest4(a; x23=0, _")
+    @test "_a1b=" ∈ c
+    @test "_something=" ∈ c
+    c, r = test_complete("CompletionFoo.kwtest4(a; xαβγ=1, _")
+    @test "_a1b=" ∈ c
+    # TODO: @test "_something=" ∉ c # no such keyword for the method with keyword `xαβγ`
+    c, r = test_complete("CompletionFoo.kwtest4.(a; xαβγ=1, _")
+    @test "_a1b=" ∈ c
+    # TODO: @test "_something=" ∉ c # broadcasting does not affect the existence of kwargs
+    c, r = test_complete("CompletionFoo.kwtest4(a; x23=0, x")
+    @test "x23=" ∉ c
+    # TODO: @test "xαβγ=" ∉ c
+    c, r = test_complete("CompletionFoo.kwtest4.(a; x23=0, x")
+    @test "x23=" ∉ c
+    # TODO: @test "xαβγ=" ∉ c
+    c, r = test_complete("CompletionFoo.kwtest4(a; _a1b=1, x")
+    @test "x23=" ∈ c
+    @test "xαβγ=" ∈ c
+
+    c, r = test_complete("CompletionFoo.kwtest5(3, 5; somek")
+    @test c == ["somekotherkwarg=", "somekwarg="]
+    c, r = test_complete("CompletionFoo.kwtest5(3, 5, somekwarg=4, somek")
+    @test c == ["somekotherkwarg="]
+    c, r = test_complete("CompletionFoo.kwtest5(3, 5, 7; somekw")
+    @test c == ["somekwarg="]
+    c, r = test_complete("CompletionFoo.kwtest5(3, 5, 7, 9; somekw")
+    @test c == ["somekwarg="]
+    c, r = test_complete("CompletionFoo.kwtest5(3, 5, 7, 9, Any[]...; somek")
+    @test c == ["somekotherkwarg=", "somekwarg="]
+    c, r = test_complete("CompletionFoo.kwtest5(unknownsplat...; somekw")
+    @test c == ["somekwarg="]
+    c, r = test_complete("CompletionFoo.kwtest5(3, 5, 7, 9, somekwarg=4, somek")
+    @test c == ["somekotherkwarg="]
+    c, r = test_complete("CompletionFoo.kwtest5(String[]..., unknownsplat...; xy")
+    @test c == ["xyz="]
+    c, r = test_complete("CompletionFoo.kwtest5('a', unknownsplat...; xy")
+    @test c == ["xyz="]
+    c, r = test_complete("CompletionFoo.kwtest5('a', 3, String[]...; xy")
+    @test c == ["xyz="]
+
+    # return true if no completion suggests a keyword argument
+    function hasnokwsuggestions(str)
+        c, _ = test_complete(str)
+        return !any(x -> endswith(x, r"[a-z]="), c)
+    end
+    @test hasnokwsuggestions("Completio")
+    @test hasnokwsuggestions("CompletionFoo.kwt")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a;")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a; len2=")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a; len2=le")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a; len2=3 ")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a; [le")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3([length; le")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a; (le")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a; foo(le")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a; (; le")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a; length, ")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a; kwargs..., ")
+
+    #= TODO: Test the absence of kwarg completion the call is incompatible with the method bearing the kwarg.
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(le")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a; unknown=4, another!kw") # only methods 1 and 3 could slurp `unknown`
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(1+3im; nameda")
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(12//7; foob") # because of specificity
+    @test hasnokwsuggestions("CompletionFoo.kwtest3(a, len2=b, length, foob") # length is not length=length
+    @test hasnokwsuggestions("CompletionFoo.kwtest5('a', 3, 5, unknownsplat...; xy")
+    @test hasnokwsuggestions("CompletionFoo.kwtest5(3; somek")
+    =#
+end
+
 # Test completion in context
 
 # No CompletionFoo.CompletionFoo
@@ -1339,11 +1687,17 @@ let s = ":(function foo(::Int) end).args[1].args[2]."
     @test c == Any[]
 end
 
-let s = "log(log.(x),"
+let s = "log(log.(varfloat),"
     c, r = test_complete_foo(s)
     @test !isempty(c)
 end
 
+# TODO: this is a bad test
+#let s = "log(log.(noexist),"
+#    c, r = test_complete_foo(s)
+#    @test isempty(c)
+#end
+
 let s = "Base.return_types(getin"
     c, r = test_complete_foo(s)
     @test "getindex" in c
@@ -1359,9 +1713,10 @@ end
 let s = "test(1,1, "
     c, r, res = test_complete_foo(s)
     @test !res
-    @test c[1] == string(first(methods(Main.CompletionFoo.test, Tuple{Int, Int})))
-    @test c[2] == string(first(methods(Main.CompletionFoo.test, Tuple{})))  # corresponding to the vararg
-    @test length(c) == 2
+    m = test_methods_list(Main.CompletionFoo.test, Tuple{Int, Int, Vararg})
+    @test length(m) == 2 == length(c)
+    @test c[1] == m[1]
+    @test c[2] == m[2]
     # In particular, this checks that test(x::Real, y::Real) is not a valid completion
     # since it is strictly less specific than test(x::T, y::T) where T
     @test r == 1:4
@@ -1374,11 +1729,13 @@ let s = "test.(1,1, "
     @test length(c) == 4
     @test r == 1:4
     @test s[r] == "test"
+    # TODO: @test (c, r, res) == test_complete_foo("test.(1, 1, String[]..., ")
+    # TODO: @test (c, r, res) == test_complete_foo("test.(1, Any[]..., 2, ")
 end
 
 let s = "prevind(\"θ\",1,"
     c, r, res = test_complete_foo(s)
-    @test c[1] == string(first(methods(prevind, Tuple{String, Int})))
+    @test c[1] == first(test_methods_list(prevind, Tuple{String, Int, Vararg}))
     @test r == 1:7
     @test s[r] == "prevind"
 end
@@ -1429,8 +1786,7 @@ end
 @testset "https://github.com/JuliaLang/julia/issues/40247" begin
     # getfield type completion can work for complicated expression
 
-    let
-        m = Module()
+    let m = Module()
         @eval m begin
             struct Rs
                 rs::Vector{Regex}
@@ -1447,8 +1803,7 @@ end
         @test length(c) == fieldcount(Regex)
     end
 
-    let
-        m = Module()
+    let m = Module()
         @eval m begin
             struct R
                 r::Regex
@@ -1469,3 +1824,367 @@ end
         @test length(c) == fieldcount(Regex)
     end
 end
+
+@testset "https://github.com/JuliaLang/julia/issues/47593" begin
+    let m = Module()
+        @eval m begin
+            struct TEST_47594
+                var"("::Int
+            end
+            test_47594 = TEST_47594(1)
+        end
+
+        c, r = test_complete_context("test_47594.", m)
+        @test c == Any["var\"(\""]
+    end
+end
+
+# https://github.com/JuliaLang/julia/issues/36437
+struct Issue36437{T}
+    v::T
+end
+Base.propertynames(::Issue36437) = (:a, :b, :c)
+function Base.getproperty(v::Issue36437, s::Symbol)
+    if s === :a
+        return 1
+    elseif s === :b
+        return 2
+    elseif s === :c
+        return getfield(v, :v)
+    else
+        throw(ArgumentError(lazy"`(v::Issue36437).$s` is not supported"))
+    end
+end
+
+let s = "Issue36437(42)."
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+
+let s = "Some(Issue36437(42)).value."
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+
+some_issue36437 = Some(Issue36437(42))
+
+let s = "some_issue36437.value."
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+
+# get completions for :toplevel/:tuple expressions
+let s = "some_issue36437.value.a, some_issue36437.value."
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+let s = "@show some_issue36437.value.a; some_issue36437.value."
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+end
+# https://github.com/JuliaLang/julia/issues/51505
+let s = "()."
+    c, r, res = test_complete_context(s)
+    @test res
+end
+
+# aggressive concrete evaluation on mutable allocation in `repl_frame`
+let s = "Ref(Issue36437(42))[]."
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("a", "b", "c")
+        @test n in c
+    end
+    @test "v" ∉ c
+end
+
+# concrete evaluation through `getindex`ing dictionary
+global_dict = Dict{Symbol, Any}(:r => r"foo")
+let s = "global_dict[:r]."
+    c, r, res = test_complete_context(s)
+    @test res
+    for fname in fieldnames(Regex)
+        @test String(fname) in c
+    end
+end
+global_dict_nested = Dict{Symbol, Any}(:g => global_dict)
+let s = "global_dict_nested[:g][:r]."
+    c, r, res = test_complete_context(s)
+    @test res
+    for fname in fieldnames(Regex)
+        @test String(fname) in c
+    end
+end
+
+# dict completions through nested `getindex`ing
+let s = "global_dict_nested["
+    c, r, res = test_complete_context(s)
+    @test res
+    @test ":g]" in c
+end
+let s = "global_dict_nested[:g]["
+    c, r, res = test_complete_context(s)
+    @test res
+    @test ":r]" in c
+end
+
+const global_xs = [Some(42)]
+let s = "pop!(global_xs)."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "value" in c
+end
+@test length(global_xs) == 1 # the completion above shouldn't evaluate `pop!` call
+
+# https://github.com/JuliaLang/julia/issues/51499
+# allow aggressive concrete evaluation for child uncached frames
+struct Issue51499CompletionDict
+    inner::Dict{Symbol,Any}
+    leaf_func # Function that gets invoked on leaf objects before being returned.
+    function Issue51499CompletionDict(inner::Dict, leaf_func=identity)
+        inner = Dict{Symbol,Any}(Symbol(k) => v for (k, v) in inner)
+        return new(inner, leaf_func)
+    end
+end
+function Base.getproperty(tcd::Issue51499CompletionDict, name::Symbol)
+    prop = getfield(tcd, :inner)[name]
+    isa(prop, Issue51499CompletionDict) && return prop
+    return getfield(tcd, :leaf_func)(prop)
+end
+Base.propertynames(tcd::Issue51499CompletionDict) = keys(getfield(tcd, :inner))
+
+const issue51499 = Ref{Any}(nothing)
+tcd3 = Issue51499CompletionDict(
+    Dict(:a => 1.0, :b => 2.0),
+    function (x)
+        issue51499[] = x
+        return sin(x)
+    end)
+tcd2 = Issue51499CompletionDict(
+    Dict(:v => tcd3, :w => 1.0))
+tcd1 = Issue51499CompletionDict(
+    Dict(:x => tcd2, :y => 1.0))
+let (c, r, res) = test_complete_context("tcd1.")
+    @test res
+    @test "x" in c && "y" in c
+    @test isnothing(issue51499[])
+end
+let (c, r, res) = test_complete_context("tcd1.x.")
+    @test res
+    @test "v" in c && "w" in c
+    @test isnothing(issue51499[])
+end
+let (c, r, res) = test_complete_context("tcd1.x.v.")
+    @test res
+    @test "a" in c && "b" in c
+    @test isnothing(issue51499[])
+end
+@test tcd1.x.v.a == sin(1.0)
+@test issue51499[] == 1.0
+
+# aggressive constant propagation for mutable `Const`s
+mutable_const_prop = Dict{Symbol,Any}(:key => Any[Some(r"x")])
+getkeyelem(d) = d[:key][1]
+let (c, r, res) = test_complete_context("getkeyelem(mutable_const_prop).")
+    @test res
+    @test "value" in c
+end
+let (c, r, res) = test_complete_context("getkeyelem(mutable_const_prop).value.")
+    @test res
+    for name in fieldnames(Regex)
+        @test String(name) in c
+    end
+end
+
+# JuliaLang/julia/#51548
+# don't return wrong result due to mutable inconsistency
+function issue51548(T, a)
+    # if we fold `xs = getindex(T)` to `xs::Const(Vector{T}())`, then we may wrongly
+    # constant-fold `isempty(xs)::Const(true)` and return wrong result
+    xs = T[]
+    if a isa T
+        push!(xs, a)
+    end
+    return Val(isempty(xs))
+end;
+let inferred = REPL.REPLCompletions.repl_eval_ex(
+        :(issue51548(Any, r"issue51548")), @__MODULE__; limit_aggressive_inference=true)
+    @test !isnothing(inferred)
+    RT = Core.Compiler.widenconst(inferred)
+    @test Val{false} <: RT
+end
+module TestLimitAggressiveInferenceGetProp
+global global_var = 1
+end
+function test_limit_aggressive_inference_getprop()
+    return getproperty(TestLimitAggressiveInferenceGetProp, :global_var)
+end
+let inferred = REPL.REPLCompletions.repl_eval_ex(
+        :(test_limit_aggressive_inference_getprop()), @__MODULE__; limit_aggressive_inference=true)
+    @test inferred == Core.Const(1)
+end
+
+# Test completion of var"" identifiers (#49280)
+let s = "var\"complicated "
+    c, r = test_complete_foo(s)
+    @test c == Any["var\"complicated symbol with spaces\""]
+end
+
+for s in ("WeirdNames().var\"oh ", "WeirdNames().var\"")
+    c, r = test_complete_foo(s)
+    @test c == Any["var\"oh no!\"", "var\"oh yes!\""]
+end
+
+# Test completion of non-Expr literals
+let s = "\"abc\"."
+    c, r = test_complete(s)
+    # (no completion, but shouldn't error)
+    @test isempty(c)
+end
+
+let s = "`abc`.e"
+    c, r = test_complete(s)
+    # (completions for the fields of `Cmd`)
+    @test c == Any["env", "exec"]
+end
+
+# suppress false positive field completions (when `getproperty`/`propertynames` is overloaded)
+struct Issue51499_2
+    inner::Dict{Symbol,Any}
+end
+Base.getproperty(issue51499::Issue51499_2, name::Symbol) = getfield(issue51499, :inner)[name]
+Base.propertynames(issue51499::Issue51499_2) = keys(getfield(issue51499, :inner))
+const issue51499_2_1 = Issue51499_2(Dict(:a => nothing))
+const issue51499_2_2 = Issue51499_2(Dict(:b => nothing))
+let s = "(rand(Bool) ? issue51499_2_1 : issue51499_2_2)."
+    c, r, res = test_complete_context(s)
+    @test "inner" ∉ c
+end
+
+# Test completion for a case when type inference returned `Union` of the same types
+union_somes(a, b) = rand() < 0.5 ? Some(a) : Some(b)
+let s = "union_somes(1, 1.0)."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "value" in c
+end
+union_some_ref(a, b) = rand() < 0.5 ? Some(a) : Ref(b)
+let s = "union_some_ref(1, 1.0)."
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "value" in c && "x" in c
+end
+
+Issue49892(x) = x
+let s = "Issue49892(fal"
+    c, r, res = test_complete_context(s)
+    @test res
+    for n in ("false", "falses")
+        @test n in c
+    end
+end
+
+@testset "public but non-exported symbols only complete qualified (#51331)" begin
+    c, r, res = test_complete("ispub")
+    @test res
+    @test "ispublic" ∉ c
+
+    c, r, res = test_complete("Base.ispub")
+    @test res
+    @test "ispublic" ∈ c
+
+    @test Base.ispublic(Base, :ispublic)
+    # If this last test starts failing, that's okay, just pick a new example symbol:
+    @test !Base.isexported(Base, :ispublic)
+end
+
+# issue #51194
+for (s, compl) in (("2*CompletionFoo.nam", "named"),
+                   (":a isa CompletionFoo.test!1", "test!12"),
+                   ("-CompletionFoo.Test_y(3).", "yy"),
+                   ("99 ⨷⁻ᵨ⁷ CompletionFoo.type_test.", "xx"),
+                   ("CompletionFoo.type_test + CompletionFoo.Test_y(2).", "yy"),
+                   ("(CompletionFoo.type_test + CompletionFoo.Test_y(2)).", "xx"),
+                   ("CompletionFoo.type_test + CompletionFoo.unicode_αβγ.", "yy"),
+                   ("(CompletionFoo.type_test + CompletionFoo.unicode_αβγ).", "xx"),
+                   ("foo'CompletionFoo.test!1", "test!12"))
+    c, r = test_complete(s)
+    @test only(c) == compl
+end
+
+# allows symbol completion within incomplete :macrocall
+# https://github.com/JuliaLang/julia/issues/51827
+macro issue51827(args...)
+    length(args) ≥ 2 || error("@issue51827: incomplete arguments")
+    return args
+end
+let s = "@issue51827 Base.ac"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "acquire" in c
+end
+
+let t = REPLCompletions.repl_eval_ex(:(`a b`), @__MODULE__; limit_aggressive_inference=true)
+    @test t isa Core.Const
+    @test t.val == `a b`
+end
+
+# issue #51823
+@test "include" in test_complete_context("inc", Main)[1]
+
+# REPL completions should not try to concrete-evaluate !:noub methods
+function very_unsafe_method(i::Int)
+    xs = Any[]
+    @inbounds xs[i]
+end
+let t = REPLCompletions.repl_eval_ex(:(unsafe_method(42)), @__MODULE__)
+    @test isnothing(t)
+end
+
+# https://github.com/JuliaLang/julia/issues/52099
+const issue52099 = []
+let t = REPLCompletions.repl_eval_ex(:(Base.PersistentDict(issue52099 => 3)), @__MODULE__)
+    if t isa Core.Const
+        @test length(t.val) == 1
+    end
+end
+
+# test REPLInterpreter effects for `getindex(::Dict, key)`
+for (DictT, KeyT) = Any[(Dict{Symbol,Any}, Symbol),
+                        (Dict{Int,Any}, Int),
+                        (Dict{String,Any}, String)]
+    @testset let DictT=DictT, KeyT=KeyT
+        effects = Base.infer_effects(getindex, (DictT,KeyT); interp=REPL.REPLCompletions.REPLInterpreter())
+        @test Core.Compiler.is_effect_free(effects)
+        @test Core.Compiler.is_terminates(effects)
+        @test Core.Compiler.is_noub(effects)
+        effects = Base.infer_effects((DictT,KeyT); interp=REPL.REPLCompletions.REPLInterpreter()) do d, key
+            key in keys(d)
+        end
+        @test Core.Compiler.is_effect_free(effects)
+        @test Core.Compiler.is_terminates(effects)
+        @test Core.Compiler.is_noub(effects)
+    end
+end
+
+# test invalidation support
+replinterp_invalidation_callee(c::Bool=rand(Bool)) = Some(c ? r"foo" : r"bar")
+replinterp_invalidation_caller() = replinterp_invalidation_callee().value
+@test REPLCompletions.repl_eval_ex(:(replinterp_invalidation_caller()), @__MODULE__) == Regex
+replinterp_invalidation_callee(c::Bool=rand(Bool)) = Some(c ? "foo" : "bar")
+@test REPLCompletions.repl_eval_ex(:(replinterp_invalidation_caller()), @__MODULE__) == String
diff --git a/stdlib/REPL/test/runtests.jl b/stdlib/REPL/test/runtests.jl
index 2d46491103d01..e152677ccf7bb 100644
--- a/stdlib/REPL/test/runtests.jl
+++ b/stdlib/REPL/test/runtests.jl
@@ -1,5 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# Make a copy of the original environment
+original_env = copy(ENV)
+
 module REPLTests
     include("repl.jl")
 end
@@ -15,3 +18,13 @@ end
 module TerminalMenusTest
     include("TerminalMenus/runtests.jl")
 end
+
+# Restore the original environment
+for k in keys(ENV)
+    if !haskey(original_env, k)
+        delete!(ENV, k)
+    end
+end
+for (k, v) in pairs(original_env)
+    ENV[k] = v
+end
diff --git a/stdlib/Random/Project.toml b/stdlib/Random/Project.toml
index 199dcab940c86..5a9cc2dfc4cb7 100644
--- a/stdlib/Random/Project.toml
+++ b/stdlib/Random/Project.toml
@@ -1,8 +1,8 @@
 name = "Random"
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+version = "1.11.0"
 
 [deps]
-Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 
 [extras]
@@ -10,7 +10,6 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
-Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [targets]
-test = ["Test", "SparseArrays", "LinearAlgebra", "Future", "Statistics"]
+test = ["Test", "SparseArrays", "LinearAlgebra", "Future"]
diff --git a/stdlib/Random/docs/src/index.md b/stdlib/Random/docs/src/index.md
index 0f7636cf2444f..70d27dc021875 100644
--- a/stdlib/Random/docs/src/index.md
+++ b/stdlib/Random/docs/src/index.md
@@ -33,6 +33,8 @@ unbounded integers, the interval must be specified (e.g. `rand(big.(1:6))`).
 Additionally, normal and exponential distributions are implemented for some `AbstractFloat` and
 `Complex` types, see [`randn`](@ref) and [`randexp`](@ref) for details.
 
+To generate random numbers from other distributions, see the [Distributions.jl](https://juliastats.org/Distributions.jl/stable/) package.
+
 !!! warning
     Because the precise way in which random numbers are generated is considered an implementation detail, bug fixes and speed improvements may change the stream of numbers that are generated after a version change. Relying on a specific seed or generated stream of numbers during unit testing is thus discouraged - consider testing properties of the methods in question instead.
 
@@ -79,7 +81,7 @@ Random.MersenneTwister
 Random.RandomDevice
 ```
 
-## Hooking into the `Random` API
+## [Hooking into the `Random` API](@id rand-api-hook)
 
 There are two mostly orthogonal ways to extend `Random` functionalities:
 1) generating random values of custom types
@@ -124,8 +126,8 @@ Random.SamplerSimple
 Decoupling pre-computation from actually generating the values is part of the API, and is also available to the user. As an example, assume that `rand(rng, 1:20)` has to be called repeatedly in a loop: the way to take advantage of this decoupling is as follows:
 
 ```julia
-rng = MersenneTwister()
-sp = Random.Sampler(rng, 1:20) # or Random.Sampler(MersenneTwister, 1:20)
+rng = Xoshiro()
+sp = Random.Sampler(rng, 1:20) # or Random.Sampler(Xoshiro, 1:20)
 for x in X
     n = rand(rng, sp) # similar to n = rand(rng, 1:20)
     # use n
@@ -157,8 +159,8 @@ Scalar and array methods for `Die` now work as expected:
 julia> rand(Die)
 Die(5)
 
-julia> rand(MersenneTwister(0), Die)
-Die(11)
+julia> rand(Xoshiro(0), Die)
+Die(10)
 
 julia> rand(Die, 3)
 3-element Vector{Die}:
@@ -213,7 +215,7 @@ and that we *always* want to build an alias table, regardless of the number of v
 Random.eltype(::Type{<:DiscreteDistribution}) = Int
 
 function Random.Sampler(::Type{<:AbstractRNG}, distribution::DiscreteDistribution, ::Repetition)
-    SamplerSimple(disribution, make_alias_table(distribution.probabilities))
+    SamplerSimple(distribution, make_alias_table(distribution.probabilities))
 end
 ```
 should be defined to return a sampler with pre-computed data, then
diff --git a/stdlib/Random/src/DSFMT.jl b/stdlib/Random/src/DSFMT.jl
index f72a9dd5e9a0a..25155b4e8575d 100644
--- a/stdlib/Random/src/DSFMT.jl
+++ b/stdlib/Random/src/DSFMT.jl
@@ -65,7 +65,8 @@ function dsfmt_init_gen_rand(s::DSFMT_state, seed::UInt32)
           s.val, seed)
 end
 
-function dsfmt_init_by_array(s::DSFMT_state, seed::Vector{UInt32})
+function dsfmt_init_by_array(s::DSFMT_state, seed::StridedVector{UInt32})
+    strides(seed) == (1,) || throw(ArgumentError("seed must have its stride equal to 1"))
     ccall((:dsfmt_init_by_array,:libdSFMT),
           Cvoid,
           (Ptr{Cvoid}, Ptr{UInt32}, Int32),
@@ -194,8 +195,13 @@ function dsfmt_jump(s::DSFMT_state, jp::GF2X)
     work = zeros(Int32, JN32)
     rwork = reinterpret(UInt64, work)
     dsfmt = Vector{UInt64}(undef, nval >> 1)
-    ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt64}, Ptr{Int32}, Csize_t),
-          dsfmt, val, (nval - 1) * sizeof(Int32))
+    dsfmtref = Base.cconvert(Ptr{Cvoid}, dsfmt)
+    valref = Base.cconvert(Ptr{Cvoid}, val)
+    GC.@preserve dsfmtref valref begin
+        pdsfmt = Base.unsafe_convert(Ptr{Cvoid}, dsfmtref)
+        pval = Base.unsafe_convert(Ptr{Cvoid}, valref)
+        Base.Libc.memcpy(pdsfmt, pval, (nval - 1) * sizeof(Int32))
+    end
     dsfmt[end] = UInt64(N*2)
 
     for i in 0:degree(jp)
diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl
index 115034d3e3988..7782de88ba537 100644
--- a/stdlib/Random/src/RNGs.jl
+++ b/stdlib/Random/src/RNGs.jl
@@ -12,7 +12,7 @@ The entropy is obtained from the operating system.
 """
 struct RandomDevice <: AbstractRNG; end
 RandomDevice(seed::Nothing) = RandomDevice()
-seed!(rng::RandomDevice) = rng
+seed!(rng::RandomDevice, ::Nothing) = rng
 
 rand(rd::RandomDevice, sp::SamplerBoolBitInteger) = Libc.getrandom!(Ref{sp[]}())[]
 rand(rd::RandomDevice, ::SamplerType{Bool}) = rand(rd, UInt8) % Bool
@@ -44,7 +44,7 @@ const MT_CACHE_I = 501 << 4 # number of bytes in the UInt128 cache
 @assert dsfmt_get_min_array_size() <= MT_CACHE_F
 
 mutable struct MersenneTwister <: AbstractRNG
-    seed::Vector{UInt32}
+    seed::Any
     state::DSFMT_state
     vals::Vector{Float64}
     ints::Vector{UInt128}
@@ -70,7 +70,7 @@ mutable struct MersenneTwister <: AbstractRNG
     end
 end
 
-MersenneTwister(seed::Vector{UInt32}, state::DSFMT_state) =
+MersenneTwister(seed, state::DSFMT_state) =
     MersenneTwister(seed, state,
                     Vector{Float64}(undef, MT_CACHE_F),
                     Vector{UInt128}(undef, MT_CACHE_I >> 4),
@@ -83,28 +83,26 @@ MersenneTwister(seed::Vector{UInt32}, state::DSFMT_state) =
 Create a `MersenneTwister` RNG object. Different RNG objects can have
 their own seeds, which may be useful for generating different streams
 of random numbers.
-The `seed` may be a non-negative integer or a vector of
-`UInt32` integers. If no seed is provided, a randomly generated one
-is created (using entropy from the system).
-See the [`seed!`](@ref) function for reseeding an already existing
-`MersenneTwister` object.
+The `seed` may be an integer, a string, or a vector of `UInt32` integers.
+If no seed is provided, a randomly generated one is created (using entropy from the system).
+See the [`seed!`](@ref) function for reseeding an already existing `MersenneTwister` object.
 
+!!! compat "Julia 1.11"
+    Passing a negative integer seed requires at least Julia 1.11.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
+julia> rng = MersenneTwister(123);
 
 julia> x1 = rand(rng, 2)
 2-element Vector{Float64}:
- 0.5908446386657102
- 0.7667970365022592
+ 0.37453777969575874
+ 0.8735343642013971
 
-julia> rng = MersenneTwister(1234);
-
-julia> x2 = rand(rng, 2)
+julia> x2 = rand(MersenneTwister(123), 2)
 2-element Vector{Float64}:
- 0.5908446386657102
- 0.7667970365022592
+ 0.37453777969575874
+ 0.8735343642013971
 
 julia> x1 == x2
 true
@@ -115,7 +113,7 @@ MersenneTwister(seed=nothing) =
 
 
 function copy!(dst::MersenneTwister, src::MersenneTwister)
-    copyto!(resize!(dst.seed, length(src.seed)), src.seed)
+    dst.seed = src.seed
     copy!(dst.state, src.state)
     copyto!(dst.vals, src.vals)
     copyto!(dst.ints, src.ints)
@@ -129,7 +127,7 @@ function copy!(dst::MersenneTwister, src::MersenneTwister)
 end
 
 copy(src::MersenneTwister) =
-    MersenneTwister(copy(src.seed), copy(src.state), copy(src.vals), copy(src.ints),
+    MersenneTwister(src.seed, copy(src.state), copy(src.vals), copy(src.ints),
                     src.idxF, src.idxI, src.adv, src.adv_jump, src.adv_vals, src.adv_ints)
 
 
@@ -144,12 +142,10 @@ hash(r::MersenneTwister, h::UInt) =
 
 function show(io::IO, rng::MersenneTwister)
     # seed
-    seed = from_seed(rng.seed)
-    seed_str = seed <= typemax(Int) ? string(seed) : "0x" * string(seed, base=16) # DWIM
     if rng.adv_jump == 0 && rng.adv == 0
-        return print(io, MersenneTwister, "(", seed_str, ")")
+        return print(io, MersenneTwister, "(", repr(rng.seed), ")")
     end
-    print(io, MersenneTwister, "(", seed_str, ", (")
+    print(io, MersenneTwister, "(", repr(rng.seed), ", (")
     # state
     adv = Integer[rng.adv_jump, rng.adv]
     if rng.adv_vals != -1 || rng.adv_ints != -1
@@ -277,68 +273,125 @@ end
 
 ### seeding
 
-#### make_seed()
+#### random_seed() & hash_seed()
 
-# make_seed produces values of type Vector{UInt32}, suitable for MersenneTwister seeding
-function make_seed()
+# random_seed tries to produce a random seed of type UInt128 from system entropy
+function random_seed()
     try
-        return rand(RandomDevice(), UInt32, 4)
+        # as MersenneTwister prints its seed when `show`ed, 128 bits is a good compromise for
+        # almost surely always getting distinct seeds, while having them printed reasonably tersely
+        return rand(RandomDevice(), UInt128)
     catch ex
         ex isa IOError || rethrow()
         @warn "Entropy pool not available to seed RNG; using ad-hoc entropy sources."
-        return make_seed(Libc.rand())
+        return Libc.rand()
     end
 end
 
-function make_seed(n::Integer)
-    n < 0 && throw(DomainError(n, "`n` must be non-negative."))
-    seed = UInt32[]
+function hash_seed(seed::Integer)
+    ctx = SHA.SHA2_256_CTX()
+    neg = signbit(seed)
+    if neg
+        seed = ~seed
+    end
+    @assert seed >= 0
     while true
-        push!(seed, n & 0xffffffff)
-        n >>= 32
-        if n == 0
-            return seed
-        end
+        word = (seed % UInt32) & 0xffffffff
+        seed >>>= 32
+        SHA.update!(ctx, reinterpret(NTuple{4, UInt8}, word))
+        iszero(seed) && break
+    end
+    # make sure the hash of negative numbers is different from the hash of positive numbers
+    neg && SHA.update!(ctx, (0x01,))
+    SHA.digest!(ctx)
+end
+
+function hash_seed(seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}})
+    ctx = SHA.SHA2_256_CTX()
+    for xx in seed
+        SHA.update!(ctx, reinterpret(NTuple{8, UInt8}, UInt64(xx)))
     end
+    # discriminate from hash_seed(::Integer)
+    SHA.update!(ctx, (0x10,))
+    SHA.digest!(ctx)
+end
+
+function hash_seed(str::AbstractString)
+    ctx = SHA.SHA2_256_CTX()
+    # convert to String such that `codeunits(str)` below is consistent between equal
+    # strings of different types
+    str = String(str)
+    SHA.update!(ctx, codeunits(str))
+    # signature for strings: so far, all hash_seed functions end-up hashing a multiple
+    # of 4 bytes of data, and add the signature (1 byte) at the end; so hash as many
+    # bytes as necessary to have a total number of hashed bytes equal to 0 mod 4 (padding),
+    # and then hash the signature 0x05; in order for strings of different lengths to have
+    # different hashes, padding bytes are set equal to the number of padding bytes
+    pad = 4 - mod(ncodeunits(str), 4)
+    for _=1:pad
+        SHA.update!(ctx, (pad % UInt8,))
+    end
+    SHA.update!(ctx, (0x05,))
+    SHA.digest!(ctx)
 end
 
-# inverse of make_seed(::Integer)
-from_seed(a::Vector{UInt32})::BigInt = sum(a[i] * big(2)^(32*(i-1)) for i in 1:length(a))
 
+"""
+    hash_seed(seed) -> AbstractVector{UInt8}
+
+Return a cryptographic hash of `seed` of size 256 bits (32 bytes).
+`seed` can currently be of type
+`Union{Integer, AbstractString, AbstractArray{UInt32}, AbstractArray{UInt64}}`,
+but modules can extend this function for types they own.
+
+`hash_seed` is "injective" : if `n != m`, then `hash_seed(n) != `hash_seed(m)`.
+Moreover, if `n == m`, then `hash_seed(n) == hash_seed(m)`.
+
+This is an internal function subject to change.
+"""
+hash_seed
 
 #### seed!()
 
-function seed!(r::MersenneTwister, seed::Vector{UInt32})
-    copyto!(resize!(r.seed, length(seed)), seed)
-    dsfmt_init_by_array(r.state, r.seed)
+function initstate!(r::MersenneTwister, data::StridedVector, seed)
+    # we deepcopy `seed` because the caller might mutate it, and it's useful
+    # to keep it constant inside `MersenneTwister`; but multiple instances
+    # can share the same seed without any problem (e.g. in `copy`)
+    r.seed = deepcopy(seed)
+    dsfmt_init_by_array(r.state, reinterpret(UInt32, data))
     reset_caches!(r)
     r.adv = 0
     r.adv_jump = 0
     return r
 end
 
-seed!(r::MersenneTwister) = seed!(r, make_seed())
-seed!(r::MersenneTwister, n::Integer) = seed!(r, make_seed(n))
+# when a seed is not provided, we generate one via `RandomDevice()` in `random_seed()` rather
+# than calling directly `initstate!` with `rand(RandomDevice(), UInt32, whatever)` because the
+# seed is printed in `show(::MersenneTwister)`, so we need one; the cost of `hash_seed` is a
+# small overhead compared to `initstate!`, so this simple solution is fine
+seed!(r::MersenneTwister, ::Nothing) = seed!(r, random_seed())
+seed!(r::MersenneTwister, seed) = initstate!(r, hash_seed(seed), seed)
 
 
 ### Global RNG
 
-struct _GLOBAL_RNG <: AbstractRNG
-    global const GLOBAL_RNG = _GLOBAL_RNG.instance
-end
+"""
+    Random.default_rng() -> rng
 
-# GLOBAL_RNG currently uses TaskLocalRNG
-typeof_rng(::_GLOBAL_RNG) = TaskLocalRNG
+Return the default global random number generator (RNG), which is used by `rand`-related functions when
+no explicit RNG is provided.
 
-"""
-    default_rng() -> rng
+When the `Random` module is loaded, the default RNG is _randomly_ seeded, via [`Random.seed!()`](@ref):
+this means that each time a new julia session is started, the first call to `rand()` produces a different
+result, unless `seed!(seed)` is called first.
 
-Return the default global random number generator (RNG).
+It is thread-safe: distinct threads can safely call `rand`-related functions on `default_rng()` concurrently,
+e.g. `rand(default_rng())`.
 
 !!! note
-    What the default RNG is is an implementation detail.  Across different versions of
-    Julia, you should not expect the default RNG to be always the same, nor that it will
-    return the same stream of random numbers for a given seed.
+    The type of the default RNG is an implementation detail. Across different versions of
+    Julia, you should not expect the default RNG to always have the same type, nor that it will
+    produce the same stream of random numbers for a given seed.
 
 !!! compat "Julia 1.3"
     This function was introduced in Julia 1.3.
@@ -346,48 +399,37 @@ Return the default global random number generator (RNG).
 @inline default_rng() = TaskLocalRNG()
 @inline default_rng(tid::Int) = TaskLocalRNG()
 
-copy!(dst::Xoshiro, ::_GLOBAL_RNG) = copy!(dst, default_rng())
-copy!(::_GLOBAL_RNG, src::Xoshiro) = copy!(default_rng(), src)
-copy(::_GLOBAL_RNG) = copy(default_rng())
-
-GLOBAL_SEED = 0
-set_global_seed!(seed) = global GLOBAL_SEED = seed
-
-function seed!(::_GLOBAL_RNG, seed=rand(RandomDevice(), UInt64, 4))
-    global GLOBAL_SEED = seed
-    seed!(default_rng(), seed)
-end
+# defined only for backward compatibility with pre-v1.3 code when `default_rng()` didn't exist;
+# `GLOBAL_RNG` was never really documented, but was appearing in the docstring of `rand`
+const GLOBAL_RNG = default_rng()
 
-seed!(rng::_GLOBAL_RNG, ::Nothing) = seed!(rng)  # to resolve ambiguity
+# In v1.0, the GLOBAL_RNG was storing the seed which was used to initialize it; this seed was used to implement
+# the following feature of `@testset`:
+# > Before the execution of the body of a `@testset`, there is an implicit
+# > call to `Random.seed!(seed)` where `seed` is the current seed of the global RNG.
+# But the global RNG is now `TaskLocalRNG()` and doesn't store its seed; in order to not break `@testset`,
+# in a call like `seed!(seed)` *without* an explicit RNG, we now store the state of `TaskLocalRNG()` in
+# `task_local_storage()`
 
-seed!(seed::Union{Nothing,Integer,Vector{UInt32},Vector{UInt64}}=nothing) =
-    seed!(GLOBAL_RNG, seed)
+# GLOBAL_SEED is used as a fall-back when no tls seed is found
+# only `Random.__init__` is allowed to set it
+const GLOBAL_SEED = Xoshiro(0, 0, 0, 0, 0)
 
-rng_native_52(::_GLOBAL_RNG) = rng_native_52(default_rng())
-rand(::_GLOBAL_RNG, sp::SamplerBoolBitInteger) = rand(default_rng(), sp)
-for T in (:(SamplerTrivial{UInt52Raw{UInt64}}),
-          :(SamplerTrivial{UInt2x52Raw{UInt128}}),
-          :(SamplerTrivial{UInt104Raw{UInt128}}),
-          :(SamplerTrivial{CloseOpen01_64}),
-          :(SamplerTrivial{CloseOpen12_64}),
-          :(SamplerUnion(Int64, UInt64, Int128, UInt128)),
-          :(SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32)),
-         )
-    @eval rand(::_GLOBAL_RNG, x::$T) = rand(default_rng(), x)
-end
+get_tls_seed() = get!(() -> copy(GLOBAL_SEED), task_local_storage(),
+                      :__RANDOM_GLOBAL_RNG_SEED_uBlmfA8ZS__)::Xoshiro
 
-rand!(::_GLOBAL_RNG, A::AbstractArray{Float64}, I::SamplerTrivial{<:FloatInterval_64}) = rand!(default_rng(), A, I)
-rand!(::_GLOBAL_RNG, A::Array{Float64}, I::SamplerTrivial{<:FloatInterval_64}) = rand!(default_rng(), A, I)
-for T in (Float16, Float32)
-    @eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerTrivial{CloseOpen12{$T}}) = rand!(default_rng(), A, I)
-    @eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerTrivial{CloseOpen01{$T}}) = rand!(default_rng(), A, I)
-end
-for T in BitInteger_types
-    @eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerType{$T}) = rand!(default_rng(), A, I)
+# seed the default RNG
+function seed!(seed=nothing)
+    seed!(default_rng(), seed)
+    copy!(get_tls_seed(), default_rng())
+    default_rng()
 end
 
 function __init__()
-    seed!(GLOBAL_RNG)
+    # do not call no-arg `seed!()` to not update `task_local_storage()` unnecessarily at startup
+    seed!(default_rng())
+    copy!(GLOBAL_SEED, TaskLocalRNG())
+    ccall(:jl_gc_init_finalizer_rng_state, Cvoid, ())
 end
 
 
@@ -465,7 +507,7 @@ end
 
 ##### Array : internal functions
 
-# internal array-like type to circumevent the lack of flexibility with reinterpret
+# internal array-like type to circumvent the lack of flexibility with reinterpret
 struct UnsafeView{T} <: DenseArray{T,1}
     ptr::Ptr{T}
     len::Int
@@ -700,7 +742,7 @@ end
 function _randjump(r::MersenneTwister, jumppoly::DSFMT.GF2X)
     adv = r.adv
     adv_jump = r.adv_jump
-    s = MersenneTwister(copy(r.seed), DSFMT.dsfmt_jump(r.state, jumppoly))
+    s = MersenneTwister(r.seed, DSFMT.dsfmt_jump(r.state, jumppoly))
     reset_caches!(s)
     s.adv = adv
     s.adv_jump = adv_jump
@@ -725,16 +767,16 @@ jump!(r::MersenneTwister, steps::Integer) = copy!(r, jump(r, steps))
 # parameters in the tuples are:
 # 1: .adv_jump (jump steps)
 # 2: .adv (number of generated floats at the DSFMT_state level since seeding, besides jumps)
-# 3, 4: .adv_vals, .idxF (counters to reconstruct the float chache, optional if 5-6 not shown))
-# 5, 6: .adv_ints, .idxI (counters to reconstruct the integer chache, optional)
+# 3, 4: .adv_vals, .idxF (counters to reconstruct the float cache, optional if 5-6 not shown))
+# 5, 6: .adv_ints, .idxI (counters to reconstruct the integer cache, optional)
 
-Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{6,Integer}) =
+Random.MersenneTwister(seed, advance::NTuple{6,Integer}) =
     advance!(MersenneTwister(seed), advance...)
 
-Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{4,Integer}) =
+Random.MersenneTwister(seed, advance::NTuple{4,Integer}) =
     MersenneTwister(seed, (advance..., 0, 0))
 
-Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{2,Integer}) =
+Random.MersenneTwister(seed, advance::NTuple{2,Integer}) =
     MersenneTwister(seed, (advance..., 0, 0, 0, 0))
 
 # advances raw state (per fill_array!) of r by n steps (Float64 values)
diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl
index 02bc609e55679..432e32a4de691 100644
--- a/stdlib/Random/src/Random.jl
+++ b/stdlib/Random/src/Random.jl
@@ -16,7 +16,6 @@ using Base.GMP: Limb
 import SHA
 
 using Base: BitInteger, BitInteger_types, BitUnsigned, require_one_based_indexing
-
 import Base: copymutable, copy, copy!, ==, hash, convert,
              rand, randn, show
 
@@ -137,11 +136,9 @@ the amount of precomputation, if applicable.
 *types* and *values*, respectively. [`Random.SamplerSimple`](@ref) can be used to store
 pre-computed values without defining extra types for only this purpose.
 """
-Sampler(rng::AbstractRNG, x, r::Repetition=Val(Inf)) = Sampler(typeof_rng(rng), x, r)
+Sampler(rng::AbstractRNG, x, r::Repetition=Val(Inf)) = Sampler(typeof(rng), x, r)
 Sampler(rng::AbstractRNG, ::Type{X}, r::Repetition=Val(Inf)) where {X} =
-    Sampler(typeof_rng(rng), X, r)
-
-typeof_rng(rng::AbstractRNG) = typeof(rng)
+    Sampler(typeof(rng), X, r)
 
 # this method is necessary to prevent rand(rng::AbstractRNG, X) from
 # recursively constructing nested Sampler types.
@@ -215,7 +212,7 @@ end
 # TODO: make constraining constructors to enforce that those
 # types are <: Sampler{T}
 
-##### Adapter to generate a randome value in [0, n]
+##### Adapter to generate a random value in [0, n]
 
 struct LessThan{T<:Integer,S} <: Sampler{T}
     sup::T
@@ -256,7 +253,7 @@ rand(rng::AbstractRNG, ::UniformT{T}) where {T} = rand(rng, T)
 rand(rng::AbstractRNG, X)                                           = rand(rng, Sampler(rng, X, Val(1)))
 # this is needed to disambiguate
 rand(rng::AbstractRNG, X::Dims)                                     = rand(rng, Sampler(rng, X, Val(1)))
-rand(rng::AbstractRNG=default_rng(), ::Type{X}=Float64) where {X} = rand(rng, Sampler(rng, X, Val(1)))::X
+rand(rng::AbstractRNG=default_rng(), ::Type{X}=Float64) where {X}   = rand(rng, Sampler(rng, X, Val(1)))::X
 
 rand(X)                   = rand(default_rng(), X)
 rand(::Type{X}) where {X} = rand(default_rng(), X)
@@ -314,21 +311,45 @@ include("XoshiroSimd.jl")
 Pick a random element or array of random elements from the set of values specified by `S`;
 `S` can be
 
-* an indexable collection (for example `1:9` or `('x', "y", :z)`),
-* an `AbstractDict` or `AbstractSet` object,
+* an indexable collection (for example `1:9` or `('x', "y", :z)`)
+
+* an `AbstractDict` or `AbstractSet` object
+
 * a string (considered as a collection of characters), or
-* a type: the set of values to pick from is then equivalent to `typemin(S):typemax(S)` for
-  integers (this is not applicable to [`BigInt`](@ref)), to ``[0, 1)`` for floating
-  point numbers and to ``[0, 1)+i[0, 1)`` for complex floating point numbers;
+
+* a type from the list below, corresponding to the specified set of values
+
+  + concrete integer types sample from `typemin(S):typemax(S)` (excepting [`BigInt`](@ref) which is not supported)
+
+  + concrete floating point types sample from `[0, 1)`
+
+  + concrete complex types `Complex{T}` if `T` is a sampleable type take their real and imaginary components
+    independently from the set of values corresponding to `T`, but are not supported if `T` is not sampleable.
+
+  + all `<:AbstractChar` types sample from the set of valid Unicode scalars
+
+  + a user-defined type and set of values; for implementation guidance please see [Hooking into the `Random` API](@ref rand-api-hook)
+
+  + a tuple type of known size and where each parameter of `S` is itself a sampleable type; return a value of type `S`.
+    Note that tuple types such as `Tuple{Vararg{T}}` (unknown size) and `Tuple{1:2}` (parameterized with a value) are not supported
+
+  + a `Pair` type, e.g. `Pair{X, Y}` such that `rand` is defined for `X` and `Y`,
+    in which case random pairs are produced.
+
 
 `S` defaults to [`Float64`](@ref).
 When only one argument is passed besides the optional `rng` and is a `Tuple`, it is interpreted
 as a collection of values (`S`) and not as `dims`.
 
 
+See also [`randn`](@ref) for normally distributed numbers, and [`rand!`](@ref) and [`randn!`](@ref) for the in-place equivalents.
+
 !!! compat "Julia 1.1"
     Support for `S` as a tuple requires at least Julia 1.1.
 
+!!! compat "Julia 1.11"
+    Support for `S` as a `Tuple` type requires at least Julia 1.11.
+
 # Examples
 ```julia-repl
 julia> rand(Int, 2)
@@ -338,8 +359,8 @@ julia> rand(Int, 2)
 
 julia> using Random
 
-julia> rand(MersenneTwister(0), Dict(1=>2, 3=>4))
-1=>2
+julia> rand(Xoshiro(0), Dict(1=>2, 3=>4))
+3 => 4
 
 julia> rand((2, 3))
 3
@@ -371,15 +392,13 @@ but without allocating a new array.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> rand!(rng, zeros(5))
+julia> rand!(Xoshiro(123), zeros(5))
 5-element Vector{Float64}:
- 0.5908446386657102
- 0.7667970365022592
- 0.5662374165061859
- 0.4600853424625171
- 0.7940257103317943
+ 0.521213795535383
+ 0.5868067574533484
+ 0.8908786980927811
+ 0.19090669902576285
+ 0.5256623915420473
 ```
 """
 rand!
@@ -393,6 +412,8 @@ sequence of numbers if and only if a `seed` is provided. Some RNGs
 don't accept a seed, like `RandomDevice`.
 After the call to `seed!`, `rng` is equivalent to a newly created
 object initialized with the same seed.
+The types of accepted seeds depend on the type of `rng`, but in general,
+integer seeds should work.
 
 If `rng` is not specified, it defaults to seeding the state of the
 shared task-local generator.
@@ -432,6 +453,11 @@ julia> rand(Xoshiro(), Bool) # not reproducible either
 true
 ```
 """
-seed!(rng::AbstractRNG, ::Nothing) = seed!(rng)
+seed!(rng::AbstractRNG) = seed!(rng, nothing)
+#=
+We have this generic definition instead of the alternative option
+`seed!(rng::AbstractRNG, ::Nothing) = seed!(rng)`
+because it would lead too easily to ambiguities, e.g. when we define `seed!(::Xoshiro, seed)`.
+=#
 
 end # module
diff --git a/stdlib/Random/src/Xoshiro.jl b/stdlib/Random/src/Xoshiro.jl
index 5b8aa4644d140..b16668e99584b 100644
--- a/stdlib/Random/src/Xoshiro.jl
+++ b/stdlib/Random/src/Xoshiro.jl
@@ -4,7 +4,7 @@
 # Lots of implementation is shared with TaskLocalRNG
 
 """
-    Xoshiro(seed)
+    Xoshiro(seed::Union{Integer, AbstractString})
     Xoshiro()
 
 Xoshiro256++ is a fast pseudorandom number generator described by David Blackman and
@@ -21,6 +21,12 @@ multiple interleaved xoshiro instances).
 The virtual PRNGs are discarded once the bulk request has been serviced (and should cause
 no heap allocations).
 
+If no seed is provided, a randomly generated one is created (using entropy from the system).
+See the [`seed!`](@ref) function for reseeding an already existing `Xoshiro` object.
+
+!!! compat "Julia 1.11"
+    Passing a negative integer seed requires at least Julia 1.11.
+
 # Examples
 ```jldoctest
 julia> using Random
@@ -48,47 +54,130 @@ mutable struct Xoshiro <: AbstractRNG
     s1::UInt64
     s2::UInt64
     s3::UInt64
+    s4::UInt64 # internal splitmix state
 
-    Xoshiro(s0::Integer, s1::Integer, s2::Integer, s3::Integer) = new(s0, s1, s2, s3)
+    Xoshiro(s0::Integer, s1::Integer, s2::Integer, s3::Integer, s4::Integer) = new(s0, s1, s2, s3, s4)
+    Xoshiro(s0::Integer, s1::Integer, s2::Integer, s3::Integer) = initstate!(new(), map(UInt64, (s0, s1, s2, s3)))
     Xoshiro(seed=nothing) = seed!(new(), seed)
 end
 
-function setstate!(x::Xoshiro, s0::UInt64, s1::UInt64, s2::UInt64, s3::UInt64)
+@inline function setstate!(x::Xoshiro, (s0, s1, s2, s3, s4))
     x.s0 = s0
     x.s1 = s1
     x.s2 = s2
     x.s3 = s3
+    if s4 !== nothing
+        x.s4 = s4
+    end
     x
 end
 
-copy(rng::Xoshiro) = Xoshiro(rng.s0, rng.s1, rng.s2, rng.s3)
+@inline getstate(x::Xoshiro) = (x.s0, x.s1, x.s2, x.s3, x.s4)
 
-function copy!(dst::Xoshiro, src::Xoshiro)
-    dst.s0, dst.s1, dst.s2, dst.s3 = src.s0, src.s1, src.s2, src.s3
-    dst
-end
+rng_native_52(::Xoshiro) = UInt64
 
-function ==(a::Xoshiro, b::Xoshiro)
-    a.s0 == b.s0 && a.s1 == b.s1 && a.s2 == b.s2 && a.s3 == b.s3
+# Jump functions from: https://xoshiro.di.unimi.it/xoshiro256plusplus.c
+
+for (fname, JUMP) in ((:jump_128, (0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c)),
+                      (:jump_192, (0x76e15d3efefdcbbf, 0xc5004e441c522fb3, 0x77710069854ee241, 0x39109bb02acbe635)))
+    local fname! = Symbol(fname, :!)
+    @eval function $fname!(rng::Xoshiro)
+        _s0 = 0x0000000000000000
+        _s1 = 0x0000000000000000
+        _s2 = 0x0000000000000000
+        _s3 = 0x0000000000000000
+        s0, s1, s2, s3 = rng.s0, rng.s1, rng.s2, rng.s3
+        for j in $JUMP
+            for b in 0x0000000000000000:0x000000000000003f
+                if (j & 0x0000000000000001 << b) != 0
+                    _s0 ⊻= s0
+                    _s1 ⊻= s1
+                    _s2 ⊻= s2
+                    _s3 ⊻= s3
+                end
+                t = s1 << 17
+                s2 = xor(s2, s0)
+                s3 = xor(s3, s1)
+                s1 = xor(s1, s2)
+                s0 = xor(s0, s3)
+                s2 = xor(s2, t)
+                s3 = s3 << 45 | s3 >> 19
+            end
+        end
+        setstate!(rng, (_s0, _s1, _s2, _s3, nothing))
+    end
+    @eval $fname(rng::Xoshiro) = $fname!(copy(rng))
+
+    @eval function $fname!(rng::Xoshiro, n::Integer)
+        n < 0 && throw(DomainError(n, "the number of jumps must be ≥ 0"))
+        i = zero(n)
+        while i < n
+            $fname!(rng)
+            i += one(n)
+        end
+        rng
+    end
+
+    @eval $fname(rng::Xoshiro, n::Integer) = $fname!(copy(rng), n)
 end
 
-rng_native_52(::Xoshiro) = UInt64
+for (fname, sz) in ((:jump_128, 128), (:jump_192, 192))
+    local fname! = Symbol(fname, :!)
+    local see_other = Symbol(fname === :jump_128 ? :jump_192 : :jump_128)
+    local see_other! = Symbol(see_other, :!)
+    local seq_pow = 256 - sz
+    @eval begin
+        """
+            $($fname!)(rng::Xoshiro, [n::Integer=1])
 
-@inline function rand(rng::Xoshiro, ::SamplerType{UInt64})
-    s0, s1, s2, s3 = rng.s0, rng.s1, rng.s2, rng.s3
-    tmp = s0 + s3
-    res = ((tmp << 23) | (tmp >> 41)) + s0
-    t = s1 << 17
-    s2 = xor(s2, s0)
-    s3 = xor(s3, s1)
-    s1 = xor(s1, s2)
-    s0 = xor(s0, s3)
-    s2 = xor(s2, t)
-    s3 = s3 << 45 | s3 >> 19
-    rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3
-    res
-end
+        Jump forward, advancing the state equivalent to `2^$($sz)` calls which consume
+        8 bytes (i.e. a full `UInt64`) each.
+
+        If `n > 0` is provided, the state is advanced equivalent to `n * 2^$($sz)` calls; if `n = 0`,
+        the state remains unchanged.
+
+        This can be used to generate `2^$($seq_pow)` non-overlapping subsequences for parallel computations.
+
+        See also: [`$($fname)`](@ref), [`$($see_other!)`](@ref)
+
+        # Examples
+        ```julia-repl
+        julia> $($fname!)($($fname!)(Xoshiro(1))) == $($fname!)(Xoshiro(1), 2)
+        true
+        ```
+        """
+        function $fname! end
+    end
 
+    @eval begin
+        """
+            $($fname)(rng::Xoshiro, [n::Integer=1])
+
+        Return a copy of `rng` with the state advanced equivalent to `n * 2^$($sz)` calls which consume
+        8 bytes (i.e. a full `UInt64`) each; if `n = 0`, the state of the returned copy will be
+        identical to `rng`.
+
+        This can be used to generate `2^$($seq_pow)` non-overlapping subsequences for parallel computations.
+
+        See also: [`$($fname!)`](@ref), [`$($see_other)`](@ref)
+
+        # Examples
+        ```julia-repl
+        julia> x = Xoshiro(1);
+
+        julia> $($fname)($($fname)(x)) == $($fname)(x, 2)
+        true
+
+        julia> $($fname)(x, 0) == x
+        true
+
+        julia> $($fname)(x, 0) === x
+        false
+        ```
+        """
+        function $fname end
+    end
+end
 
 ## Task local RNG
 
@@ -108,58 +197,85 @@ endianness and possibly word size.
 
 Using or seeding the RNG of any other task than the one returned by `current_task()`
 is undefined behavior: it will work most of the time, and may sometimes fail silently.
+
+When seeding `TaskLocalRNG()` with [`seed!`](@ref), the passed seed, if any,
+may be any integer.
+
+!!! compat "Julia 1.11"
+    Seeding `TaskLocalRNG()` with a negative integer seed requires at least Julia 1.11.
 """
 struct TaskLocalRNG <: AbstractRNG end
 TaskLocalRNG(::Nothing) = TaskLocalRNG()
-rng_native_52(::TaskLocalRNG) = UInt64
 
-function setstate!(x::TaskLocalRNG, s0::UInt64, s1::UInt64, s2::UInt64, s3::UInt64)
+@inline function setstate!(x::TaskLocalRNG, (s0, s1, s2, s3, s4))
     t = current_task()
     t.rngState0 = s0
     t.rngState1 = s1
     t.rngState2 = s2
     t.rngState3 = s3
+    if s4 !== nothing
+        t.rngState4 = s4
+    end
     x
 end
 
-@inline function rand(::TaskLocalRNG, ::SamplerType{UInt64})
-    task = current_task()
-    s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
-    tmp = s0 + s3
-    res = ((tmp << 23) | (tmp >> 41)) + s0
-    t = s1 << 17
-    s2 = xor(s2, s0)
-    s3 = xor(s3, s1)
-    s1 = xor(s1, s2)
-    s0 = xor(s0, s3)
-    s2 = xor(s2, t)
-    s3 = s3 << 45 | s3 >> 19
-    task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
-    res
+@inline function getstate(::TaskLocalRNG)
+    t = current_task()
+    (t.rngState0, t.rngState1, t.rngState2, t.rngState3, t.rngState4)
 end
 
-# Shared implementation between Xoshiro and TaskLocalRNG -- seeding
+rng_native_52(::TaskLocalRNG) = UInt64
+
+
+## Shared implementation between Xoshiro and TaskLocalRNG
+
+# this variant of setstate! initializes the internal splitmix state, a.k.a. `s4`
+@inline function initstate!(x::Union{TaskLocalRNG, Xoshiro}, state)
+    length(state) == 4 && eltype(state) == UInt64 ||
+        throw(ArgumentError("initstate! expects a list of 4 `UInt64` values"))
+    s0, s1, s2, s3 = state
+    setstate!(x, (s0, s1, s2, s3, 1s0 + 3s1 + 5s2 + 7s3))
+end
 
-function seed!(rng::Union{TaskLocalRNG,Xoshiro})
+copy(rng::Union{TaskLocalRNG, Xoshiro}) = Xoshiro(getstate(rng)...)
+copy!(dst::Union{TaskLocalRNG, Xoshiro}, src::Union{TaskLocalRNG, Xoshiro}) = setstate!(dst, getstate(src))
+==(x::Union{TaskLocalRNG, Xoshiro}, y::Union{TaskLocalRNG, Xoshiro}) = getstate(x) == getstate(y)
+# use a magic (random) number to scramble `h` so that `hash(x)` is distinct from `hash(getstate(x))`
+hash(x::Union{TaskLocalRNG, Xoshiro}, h::UInt) = hash(getstate(x), h + 0x49a62c2dda6fa9be % UInt)
+
+function seed!(rng::Union{TaskLocalRNG, Xoshiro}, ::Nothing)
     # as we get good randomness from RandomDevice, we can skip hashing
     rd = RandomDevice()
-    setstate!(rng, rand(rd, UInt64), rand(rd, UInt64), rand(rd, UInt64), rand(rd, UInt64))
+    s0 = rand(rd, UInt64)
+    s1 = rand(rd, UInt64)
+    s2 = rand(rd, UInt64)
+    s3 = rand(rd, UInt64)
+    initstate!(rng, (s0, s1, s2, s3))
 end
 
-function seed!(rng::Union{TaskLocalRNG,Xoshiro}, seed::Union{Vector{UInt32}, Vector{UInt64}})
-    c = SHA.SHA2_256_CTX()
-    SHA.update!(c, reinterpret(UInt8, seed))
-    s0, s1, s2, s3 = reinterpret(UInt64, SHA.digest!(c))
-    setstate!(rng, s0, s1, s2, s3)
-end
+seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed) =
+    initstate!(rng, reinterpret(UInt64, hash_seed(seed)))
 
-seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::Integer) = seed!(rng, make_seed(seed))
 
+@inline function rand(x::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{UInt64})
+    s0, s1, s2, s3 = getstate(x)
+    tmp = s0 + s3
+    res = ((tmp << 23) | (tmp >> 41)) + s0
+    t = s1 << 17
+    s2 ⊻= s0
+    s3 ⊻= s1
+    s1 ⊻= s2
+    s0 ⊻= s3
+    s2 ⊻= t
+    s3 = s3 << 45 | s3 >> 19
+    setstate!(x, (s0, s1, s2, s3, nothing))
+    res
+end
 
 @inline function rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{UInt128})
     first = rand(rng, UInt64)
     second = rand(rng,UInt64)
-    second + UInt128(first)<<64
+    second + UInt128(first) << 64
 end
 
 @inline rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{Int128}) = rand(rng, UInt128) % Int128
@@ -171,30 +287,6 @@ end
     (rand(rng, UInt64) >>> (64 - 8*sizeof(S))) % S
 end
 
-function copy(rng::TaskLocalRNG)
-    t = current_task()
-    Xoshiro(t.rngState0, t.rngState1, t.rngState2, t.rngState3)
-end
-
-function copy!(dst::TaskLocalRNG, src::Xoshiro)
-    t = current_task()
-    t.rngState0, t.rngState1, t.rngState2, t.rngState3 = src.s0, src.s1, src.s2, src.s3
-    dst
-end
-
-function copy!(dst::Xoshiro, src::TaskLocalRNG)
-    t = current_task()
-    dst.s0, dst.s1, dst.s2, dst.s3 = t.rngState0, t.rngState1, t.rngState2, t.rngState3
-    dst
-end
-
-function ==(a::Xoshiro, b::TaskLocalRNG)
-    t = current_task()
-    a.s0 == t.rngState0 && a.s1 == t.rngState1 && a.s2 == t.rngState2 && a.s3 == t.rngState3
-end
-
-==(a::TaskLocalRNG, b::Xoshiro) = b == a
-
 # for partial words, use upper bits from Xoshiro
 
 rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt52Raw{UInt64}}) = rand(r, UInt64) >>> 12
diff --git a/stdlib/Random/src/XoshiroSimd.jl b/stdlib/Random/src/XoshiroSimd.jl
index 9fb03f9572688..6d4886f31d22b 100644
--- a/stdlib/Random/src/XoshiroSimd.jl
+++ b/stdlib/Random/src/XoshiroSimd.jl
@@ -2,9 +2,10 @@
 
 module XoshiroSimd
 # Getting the xoroshiro RNG to reliably vectorize is somewhat of a hassle without Simd.jl.
-import ..Random: TaskLocalRNG, rand, rand!, Xoshiro, CloseOpen01, UnsafeView,
-                 SamplerType, SamplerTrivial
+import ..Random: rand!
+using ..Random: TaskLocalRNG, rand, Xoshiro, CloseOpen01, UnsafeView, SamplerType, SamplerTrivial, getstate, setstate!
 using Base: BitInteger_types
+using Base.Libc: memcpy
 using Core.Intrinsics: llvmcall
 
 # Vector-width. Influences random stream.
@@ -148,14 +149,9 @@ _id(x, T) = x
     nothing
 end
 
-@noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{T}, f::F) where {T, F}
-    if rng isa TaskLocalRNG
-        task = current_task()
-        s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
-    else
-        (; s0, s1, s2, s3) = rng::Xoshiro
-    end
-
+@noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{T}, f::F
+                                       ) where {T, F}
+    s0, s1, s2, s3 = getstate(rng)
     i = 0
     while i+8 <= len
         res = _plus(_rotl23(_plus(s0,s3)),s0)
@@ -180,24 +176,14 @@ end
         s3 = _rotl45(s3)
         ref = Ref(f(res, T))
         # TODO: This may make the random-stream dependent on system endianness
-        ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i)
-    end
-    if rng isa TaskLocalRNG
-        task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
-    else
-       rng.s0, rng.s1, rng.s2, rng.s3 =  s0, s1, s2, s3
+        GC.@preserve ref memcpy(dst+i, Base.unsafe_convert(Ptr{Cvoid}, ref), len-i)
     end
+    setstate!(rng, (s0, s1, s2, s3, nothing))
     nothing
 end
 
 @noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{Bool}, f)
-    if rng isa TaskLocalRNG
-        task = current_task()
-        s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
-    else
-        (; s0, s1, s2, s3) = rng::Xoshiro
-    end
-
+    s0, s1, s2, s3 = getstate(rng)
     i = 0
     while i+8 <= len
         res = _plus(_rotl23(_plus(s0,s3)),s0)
@@ -222,7 +208,7 @@ end
         res = _plus(_rotl23(_plus(s0,s3)),s0)
         resLoc = _and(res, 0x0101010101010101)
         ref = Ref(resLoc)
-        ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i)
+        GC.@preserve ref memcpy(dst+i, Base.unsafe_convert(Ptr{Cvoid}, ref), len-i)
         t = _shl17(s1)
         s2 = _xor(s2, s0)
         s3 = _xor(s3, s1)
@@ -231,11 +217,7 @@ end
         s2 = _xor(s2, t)
         s3 = _rotl45(s3)
     end
-    if rng isa TaskLocalRNG
-        task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
-    else
-        rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3
-    end
+    setstate!(rng, (s0, s1, s2, s3, nothing))
     nothing
 end
 
diff --git a/stdlib/Random/src/generation.jl b/stdlib/Random/src/generation.jl
index ddbf6dce98bec..99017c8c76a82 100644
--- a/stdlib/Random/src/generation.jl
+++ b/stdlib/Random/src/generation.jl
@@ -132,7 +132,7 @@ rand(r::AbstractRNG, sp::SamplerTrivial{<:UniformBits{T}}) where {T} =
 
 # rand_generic methods are intended to help RNG implementors with common operations
 # we don't call them simply `rand` as this can easily contribute to create
-# amibuities with user-side methods (forcing the user to resort to @eval)
+# ambiguities with user-side methods (forcing the user to resort to @eval)
 
 rand_generic(r::AbstractRNG, T::Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32}) =
     rand(r, UInt52Raw()) % T[]
@@ -167,6 +167,38 @@ function rand(r::AbstractRNG, ::SamplerType{T}) where {T<:AbstractChar}
     (c < 0xd800) ? T(c) : T(c+0x800)
 end
 
+### random tuples
+
+function Sampler(::Type{RNG}, ::Type{T}, n::Repetition) where {T<:Tuple, RNG<:AbstractRNG}
+    tail_sp_ = Sampler(RNG, Tuple{Base.tail(fieldtypes(T))...}, n)
+    SamplerTag{Ref{T}}((Sampler(RNG, fieldtype(T, 1), n), tail_sp_.data...))
+    # Ref so that the gentype is `T` in SamplerTag's constructor
+end
+
+function Sampler(::Type{RNG}, ::Type{Tuple{Vararg{T, N}}}, n::Repetition) where {T, N, RNG<:AbstractRNG}
+    if N > 0
+        SamplerTag{Ref{Tuple{Vararg{T, N}}}}((Sampler(RNG, T, n),))
+    else
+        SamplerTag{Ref{Tuple{}}}(())
+    end
+end
+
+function rand(rng::AbstractRNG, sp::SamplerTag{Ref{T}}) where T<:Tuple
+    ntuple(i -> rand(rng, sp.data[min(i, length(sp.data))]), Val{fieldcount(T)}())::T
+end
+
+### random pairs
+
+function Sampler(::Type{RNG}, ::Type{Pair{A, B}}, n::Repetition) where {RNG<:AbstractRNG, A, B}
+    sp1 = Sampler(RNG, A, n)
+    sp2 = A === B ? sp1 : Sampler(RNG, B, n)
+    SamplerTag{Ref{Pair{A,B}}}(sp1 => sp2) # Ref so that the gentype is Pair{A, B}
+                                           # in SamplerTag's constructor
+end
+
+rand(rng::AbstractRNG, sp::SamplerTag{<:Ref{<:Pair}}) =
+    rand(rng, sp.data.first) => rand(rng, sp.data.second)
+
 
 ## Generate random integer within a range
 
@@ -210,9 +242,9 @@ SamplerRangeFast(r::AbstractUnitRange{T}) where T<:BitInteger =
     SamplerRangeFast(r, uint_sup(T))
 
 function SamplerRangeFast(r::AbstractUnitRange{T}, ::Type{U}) where {T,U}
-    isempty(r) && throw(ArgumentError("range must be non-empty"))
+    isempty(r) && throw(ArgumentError("collection must be non-empty"))
     m = (last(r) - first(r)) % unsigned(T) % U # % unsigned(T) to not propagate sign bit
-    bw = (sizeof(U) << 3 - leading_zeros(m)) % UInt # bit-width
+    bw = (Base.top_set_bit(m)) % UInt # bit-width
     mask = ((1 % U) << bw) - (1 % U)
     SamplerRangeFast{U,T}(first(r), bw, m, mask)
 end
@@ -284,11 +316,11 @@ SamplerRangeInt(r::AbstractUnitRange{T}) where T<:BitInteger =
     SamplerRangeInt(r, uint_sup(T))
 
 function SamplerRangeInt(r::AbstractUnitRange{T}, ::Type{U}) where {T,U}
-    isempty(r) && throw(ArgumentError("range must be non-empty"))
+    isempty(r) && throw(ArgumentError("collection must be non-empty"))
     a = first(r)
     m = (last(r) - first(r)) % unsigned(T) % U
     k = m + one(U)
-    bw = (sizeof(U) << 3 - leading_zeros(m)) % Int
+    bw = (Base.top_set_bit(m)) % Int
     mult = if U === UInt32
         maxmultiple(k)
     elseif U === UInt64
@@ -330,7 +362,7 @@ struct SamplerRangeNDL{U<:Unsigned,T} <: Sampler{T}
 end
 
 function SamplerRangeNDL(r::AbstractUnitRange{T}) where {T}
-    isempty(r) && throw(ArgumentError("range must be non-empty"))
+    isempty(r) && throw(ArgumentError("collection must be non-empty"))
     a = first(r)
     U = uint_sup(T)
     s = (last(r) - first(r)) % unsigned(T) % U + one(U) # overflow ok
@@ -369,7 +401,7 @@ end
 function SamplerBigInt(::Type{RNG}, r::AbstractUnitRange{BigInt}, N::Repetition=Val(Inf)
                        ) where {RNG<:AbstractRNG}
     m = last(r) - first(r)
-    m.size < 0 && throw(ArgumentError("range must be non-empty"))
+    m.size < 0 && throw(ArgumentError("collection must be non-empty"))
     nlimbs = Int(m.size)
     hm = nlimbs == 0 ? Limb(0) : GC.@preserve m unsafe_load(m.d, nlimbs)
     highsp = Sampler(RNG, Limb(0):hm, N)
@@ -438,6 +470,12 @@ function rand(rng::AbstractRNG, sp::SamplerSimple{<:Dict,<:Sampler})
     end
 end
 
+rand(rng::AbstractRNG, sp::SamplerTrivial{<:Base.KeySet{<:Any,<:Dict}}) =
+    rand(rng, sp[].dict).first
+
+rand(rng::AbstractRNG, sp::SamplerTrivial{<:Base.ValueIterator{<:Dict}}) =
+    rand(rng, sp[].dict).second
+
 ## random values from Set
 
 Sampler(::Type{RNG}, t::Set{T}, n::Repetition) where {RNG<:AbstractRNG,T} =
diff --git a/stdlib/Random/src/misc.jl b/stdlib/Random/src/misc.jl
index b1e3a4808e026..908776383d45f 100644
--- a/stdlib/Random/src/misc.jl
+++ b/stdlib/Random/src/misc.jl
@@ -17,16 +17,14 @@ Generate a `BitArray` of random boolean values.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> bitrand(rng, 10)
+julia> bitrand(Xoshiro(123), 10)
 10-element BitVector:
  0
- 0
- 0
+ 1
  0
  1
  0
+ 1
  0
  0
  1
@@ -55,8 +53,8 @@ number generator, see [Random Numbers](@ref).
 julia> Random.seed!(3); randstring()
 "Lxz5hUwn"
 
-julia> randstring(MersenneTwister(3), 'a':'z', 6)
-"ocucay"
+julia> randstring(Xoshiro(3), 'a':'z', 6)
+"iyzcsm"
 
 julia> randstring("ACGT")
 "TGCTCCTC"
@@ -141,19 +139,17 @@ Like [`randsubseq`](@ref), but the results are stored in `S`
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
 julia> S = Int64[];
 
-julia> randsubseq!(rng, S, 1:8, 0.3)
+julia> randsubseq!(Xoshiro(123), S, 1:8, 0.3)
 2-element Vector{Int64}:
+ 4
  7
- 8
 
 julia> S
 2-element Vector{Int64}:
+ 4
  7
- 8
 ```
 """
 randsubseq!(S::AbstractArray, A::AbstractArray, p::Real) = randsubseq!(default_rng(), S, A, p)
@@ -171,12 +167,10 @@ large.) Technically, this process is known as "Bernoulli sampling" of `A`.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> randsubseq(rng, 1:8, 0.3)
+julia> randsubseq(Xoshiro(123), 1:8, 0.3)
 2-element Vector{Int64}:
+ 4
  7
- 8
 ```
 """
 randsubseq(A::AbstractArray, p::Real) = randsubseq(default_rng(), A, p)
@@ -197,42 +191,50 @@ optionally supplying the random-number generator `rng`.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> shuffle!(rng, Vector(1:16))
-16-element Vector{Int64}:
-  2
- 15
+julia> shuffle!(Xoshiro(123), Vector(1:10))
+10-element Vector{Int64}:
   5
- 14
+  4
+  2
+  3
+  6
+ 10
+  8
   1
   9
- 10
-  6
- 11
-  3
- 16
   7
-  4
- 12
-  8
- 13
 ```
 """
 function shuffle!(r::AbstractRNG, a::AbstractArray)
+    # keep it consistent with `randperm!` and `randcycle!` if possible
     require_one_based_indexing(a)
     n = length(a)
-    n <= 1 && return a # nextpow below won't work with n == 0
     @assert n <= Int64(2)^52
-    mask = nextpow(2, n) - 1
-    for i = n:-1:2
-        (mask >> 1) == i && (mask >>= 1)
+    n == 0 && return a
+    mask = 3
+    @inbounds for i = 2:n
         j = 1 + rand(r, ltm52(i, mask))
         a[i], a[j] = a[j], a[i]
+        i == 1 + mask && (mask = 2 * mask + 1)
     end
     return a
 end
 
+function shuffle!(r::AbstractRNG, a::AbstractArray{Bool})
+    old_count = count(a)
+    len = length(a)
+    uncommon_value = 2old_count <= len
+    fuel = uncommon_value ? old_count : len - old_count
+    fuel == 0 && return a
+    a .= !uncommon_value
+    while fuel > 0
+        k = rand(r, eachindex(a))
+        fuel -= a[k] != uncommon_value
+        a[k] = uncommon_value
+    end
+    a
+end
+
 shuffle!(a::AbstractArray) = shuffle!(default_rng(), a)
 
 """
@@ -245,20 +247,18 @@ indices, see [`randperm`](@ref).
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> shuffle(rng, Vector(1:10))
+julia> shuffle(Xoshiro(123), Vector(1:10))
 10-element Vector{Int64}:
-  6
-  1
- 10
+  5
+  4
   2
   3
+  6
+ 10
+  8
+  1
   9
-  5
   7
-  4
-  8
 ```
 """
 shuffle(r::AbstractRNG, a::AbstractArray) = shuffle!(r, copymutable(a))
@@ -285,11 +285,11 @@ To randomly permute an arbitrary vector, see [`shuffle`](@ref) or
 
 # Examples
 ```jldoctest
-julia> randperm(MersenneTwister(1234), 4)
+julia> randperm(Xoshiro(123), 4)
 4-element Vector{Int64}:
- 2
  1
  4
+ 2
  3
 ```
 """
@@ -306,15 +306,16 @@ optional `rng` argument specifies a random number generator (see
 
 # Examples
 ```jldoctest
-julia> randperm!(MersenneTwister(1234), Vector{Int}(undef, 4))
+julia> randperm!(Xoshiro(123), Vector{Int}(undef, 4))
 4-element Vector{Int64}:
- 2
  1
  4
+ 2
  3
 ```
 """
 function randperm!(r::AbstractRNG, a::Array{<:Integer})
+    # keep it consistent with `shuffle!` and `randcycle!` if possible
     n = length(a)
     @assert n <= Int64(2)^52
     n == 0 && return a
@@ -326,7 +327,7 @@ function randperm!(r::AbstractRNG, a::Array{<:Integer})
             a[i] = a[j]
         end
         a[j] = i
-        i == 1+mask && (mask = 2mask + 1)
+        i == 1 + mask && (mask = 2 * mask + 1)
     end
     return a
 end
@@ -343,20 +344,26 @@ Construct a random cyclic permutation of length `n`. The optional `rng`
 argument specifies a random number generator, see [Random Numbers](@ref).
 The element type of the result is the same as the type of `n`.
 
+Here, a "cyclic permutation" means that all of the elements lie within
+a single cycle.  If `n > 0`, there are ``(n-1)!`` possible cyclic permutations,
+which are sampled uniformly.  If `n == 0`, `randcycle` returns an empty vector.
+
+[`randcycle!`](@ref) is an in-place variant of this function.
+
 !!! compat "Julia 1.1"
-    In Julia 1.1 `randcycle` returns a vector `v` with `eltype(v) == typeof(n)`
-    while in Julia 1.0 `eltype(v) == Int`.
+    In Julia 1.1 and above, `randcycle` returns a vector `v` with
+    `eltype(v) == typeof(n)` while in Julia 1.0 `eltype(v) == Int`.
 
 # Examples
 ```jldoctest
-julia> randcycle(MersenneTwister(1234), 6)
+julia> randcycle(Xoshiro(123), 6)
 6-element Vector{Int64}:
- 3
  5
  4
+ 2
  6
+ 3
  1
- 2
 ```
 """
 randcycle(r::AbstractRNG, n::T) where {T <: Integer} = randcycle!(r, Vector{T}(undef, n))
@@ -365,33 +372,41 @@ randcycle(n::Integer) = randcycle(default_rng(), n)
 """
     randcycle!([rng=default_rng(),] A::Array{<:Integer})
 
-Construct in `A` a random cyclic permutation of length `length(A)`.
+Construct in `A` a random cyclic permutation of length `n = length(A)`.
 The optional `rng` argument specifies a random number generator, see
 [Random Numbers](@ref).
 
+Here, a "cyclic permutation" means that all of the elements lie within a single cycle.
+If `A` is nonempty (`n > 0`), there are ``(n-1)!`` possible cyclic permutations,
+which are sampled uniformly.  If `A` is empty, `randcycle!` leaves it unchanged.
+
+[`randcycle`](@ref) is a variant of this function that allocates a new vector.
+
 # Examples
 ```jldoctest
-julia> randcycle!(MersenneTwister(1234), Vector{Int}(undef, 6))
+julia> randcycle!(Xoshiro(123), Vector{Int}(undef, 6))
 6-element Vector{Int64}:
- 3
  5
  4
+ 2
  6
+ 3
  1
- 2
 ```
 """
 function randcycle!(r::AbstractRNG, a::Array{<:Integer})
+    # keep it consistent with `shuffle!` and `randperm!` if possible
     n = length(a)
-    n == 0 && return a
     @assert n <= Int64(2)^52
+    n == 0 && return a
     a[1] = 1
     mask = 3
+    # Sattolo's algorithm:
     @inbounds for i = 2:n
         j = 1 + rand(r, ltm52(i-1, mask))
         a[i] = a[j]
         a[j] = i
-        i == 1+mask && (mask = 2mask + 1)
+        i == 1 + mask && (mask = 2 * mask + 1)
     end
     return a
 end
diff --git a/stdlib/Random/src/normal.jl b/stdlib/Random/src/normal.jl
index 9d0f1595f052f..9f663495c1c20 100644
--- a/stdlib/Random/src/normal.jl
+++ b/stdlib/Random/src/normal.jl
@@ -14,25 +14,51 @@
 
 Generate a normally-distributed random number of type `T`
 with mean 0 and standard deviation 1.
-Optionally generate an array of normally-distributed random numbers.
-The `Base` module currently provides an implementation for the types
-[`Float16`](@ref), [`Float32`](@ref), and [`Float64`](@ref) (the default), and their
-[`Complex`](@ref) counterparts. When the type argument is complex, the values are drawn
-from the circularly symmetric complex normal distribution of variance 1 (corresponding to real and imaginary part having independent normal distribution with mean zero and variance `1/2`).
+Given the optional `dims` argument(s), generate an array of size `dims` of such numbers.
+Julia's standard library supports `randn` for any floating-point type
+that implements [`rand`](@ref), e.g. the `Base` types
+[`Float16`](@ref), [`Float32`](@ref), [`Float64`](@ref) (the default), and [`BigFloat`](@ref),
+along with their [`Complex`](@ref) counterparts.
+
+(When `T` is complex, the values are drawn
+from the circularly symmetric complex normal distribution of variance 1, corresponding to real and imaginary parts
+having independent normal distribution with mean zero and variance `1/2`).
+
+See also [`randn!`](@ref) to act in-place.
 
 # Examples
+
+Generating a single random number (with the default `Float64` type):
+
+```julia-repl
+julia> randn()
+-0.942481877315864
+```
+
+Generating a matrix of normal random numbers (with the default `Float64` type):
+
+```julia-repl
+julia> randn(2,3)
+2×3 Matrix{Float64}:
+  1.18786   -0.678616   1.49463
+ -0.342792  -0.134299  -1.45005
+```
+
+Setting up of the random number generator `rng` with a user-defined seed (for reproducible numbers)
+and using it to generate a random `Float32` number or a matrix of `ComplexF32` random numbers:
+
 ```jldoctest
 julia> using Random
 
-julia> rng = MersenneTwister(1234);
+julia> rng = Xoshiro(123);
 
-julia> randn(rng, ComplexF64)
-0.6133070881429037 - 0.6376291670853887im
+julia> randn(rng, Float32)
+-0.6457307f0
 
 julia> randn(rng, ComplexF32, (2, 3))
 2×3 Matrix{ComplexF32}:
- -0.349649-0.638457im  0.376756-0.192146im  -0.396334-0.0136413im
-  0.611224+1.56403im   0.355204-0.365563im  0.0905552+1.31012im
+  -1.03467-1.14806im  0.693657+0.056538im   0.291442+0.419454im
+ -0.153912+0.34807im    1.0954-0.948661im  -0.543347-0.0538589im
 ```
 """
 @inline function randn(rng::AbstractRNG=default_rng())
@@ -70,8 +96,8 @@ end
 @noinline function randn_unlikely(rng, idx, rabs, x)
     @inbounds if idx == 0
         while true
-            xx = -ziggurat_nor_inv_r*log(rand(rng))
-            yy = -log(rand(rng))
+            xx = -ziggurat_nor_inv_r*log1p(-rand(rng))
+            yy = -log1p(-rand(rng))
             yy+yy > xx*xx &&
                 return (rabs >> 8) % Bool ? -ziggurat_nor_r-xx : ziggurat_nor_r+xx
         end
@@ -112,16 +138,16 @@ The `Base` module currently provides an implementation for the types
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
+julia> rng = Xoshiro(123);
 
 julia> randexp(rng, Float32)
-2.4835055f0
+1.1757717f0
 
 julia> randexp(rng, 3, 3)
 3×3 Matrix{Float64}:
- 1.5167    1.30652   0.344435
- 0.604436  2.78029   0.418516
- 0.695867  0.693292  0.643644
+ 1.37766  0.456653  0.236418
+ 3.40007  0.229917  0.0684921
+ 0.48096  0.577481  0.71835
 ```
 """
 randexp(rng::AbstractRNG=default_rng()) = _randexp(rng, rand(rng, UInt52Raw()))
@@ -138,7 +164,7 @@ end
 
 @noinline function randexp_unlikely(rng, idx, x)
     @inbounds if idx == 0
-        return ziggurat_exp_r - log(rand(rng))
+        return ziggurat_exp_r - log1p(-rand(rng))
     elseif (fe[idx] - fe[idx+1])*rand(rng) + fe[idx+1] < exp(-x)
         return x # return from the triangular area
     else
@@ -160,15 +186,13 @@ Also see the [`rand`](@ref) function.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> randn!(rng, zeros(5))
+julia> randn!(Xoshiro(123), zeros(5))
 5-element Vector{Float64}:
-  0.8673472019512456
- -0.9017438158568171
- -0.4944787535042339
- -0.9029142938652416
-  0.8644013132535154
+ -0.6457306721039767
+ -1.4632513788889214
+ -1.6236037455860806
+ -0.21766510678354617
+  0.4922456865251828
 ```
 """
 function randn! end
@@ -181,15 +205,13 @@ Fill the array `A` with random numbers following the exponential distribution
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
-
-julia> randexp!(rng, zeros(5))
+julia> randexp!(Xoshiro(123), zeros(5))
 5-element Vector{Float64}:
- 2.4835053723904896
- 1.516703605376473
- 0.6044364871025417
- 0.6958665886385867
- 1.3065196315496677
+ 1.1757716836348473
+ 1.758884569451514
+ 1.0083623637301151
+ 0.3510644315565272
+ 0.6348266443720407
 ```
 """
 function randexp! end
diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl
index 616aa80a20dca..61cee5c952c51 100644
--- a/stdlib/Random/test/runtests.jl
+++ b/stdlib/Random/test/runtests.jl
@@ -10,94 +10,65 @@ using .Main.OffsetArrays
 using Random
 using Random.DSFMT
 
-using Random: Sampler, SamplerRangeFast, SamplerRangeInt, SamplerRangeNDL, MT_CACHE_F, MT_CACHE_I
+using Random: default_rng, Sampler, SamplerRangeFast, SamplerRangeInt, SamplerRangeNDL, MT_CACHE_F, MT_CACHE_I
+using Random: jump_128, jump_192, jump_128!, jump_192!
 
 import Future # randjump
 
-@testset "Issue #6573" begin
-    Random.seed!(0)
-    rand()
-    x = rand(384)
-    @test findall(x .== rand()) == []
-end
-
-@test rand() != rand()
-@test 0.0 <= rand() < 1.0
-@test rand(UInt32) >= 0
-@test -10 <= rand(-10:-5) <= -5
-@test -10 <= rand(-10:5) <= 5
-@test minimum([rand(Int32(1):Int32(7^7)) for i = 1:100000]) > 0
-@test typeof(rand(false:true)) === Bool
-@test typeof(rand(Char)) === Char
-@test length(randn(4, 5)) == 20
-@test length(randn(ComplexF64, 4, 5)) == 20
-@test length(bitrand(4, 5)) == 20
-
-@test rand(MersenneTwister(0)) == 0.8236475079774124
-@test rand(MersenneTwister(42)) == 0.5331830160438613
-# Try a seed larger than 2^32
-@test rand(MersenneTwister(5294967296)) == 0.3498809918210497
-
-# Test array filling, Issues #7643, #8360
-@test rand(MersenneTwister(0), 1) == [0.8236475079774124]
-let A = zeros(2, 2)
-    rand!(MersenneTwister(0), A)
-    @test A == [0.8236475079774124  0.16456579813368521;
-                0.9103565379264364  0.17732884646626457]
-end
-let A = zeros(2, 2)
-    @test_throws MethodError rand!(MersenneTwister(0), A, 5)
-    @test rand(MersenneTwister(0), Int64, 1) == [-3433174948434291912]
-end
-let A = zeros(Int64, 2, 2)
-    rand!(MersenneTwister(0), A)
-    @test A == [858542123778948672  5715075217119798169;
-                8690327730555225005 8435109092665372532]
-end
-
-# rand from AbstractArray
-let mt = MersenneTwister()
-    @test rand(mt, 0:3:1000) in 0:3:1000
-    @test issubset(rand!(mt, Vector{Int}(undef, 100), 0:3:1000), 0:3:1000)
-    coll = Any[2, UInt128(128), big(619), "string"]
-    @test rand(mt, coll) in coll
-    @test issubset(rand(mt, coll, 2, 3), coll)
-
-    # check API with default RNG:
-    rand(0:3:1000)
-    rand!(Vector{Int}(undef, 100), 0:3:1000)
-    rand(coll)
-    rand(coll, 2, 3)
-end
-
-# randn
-@test randn(MersenneTwister(42)) == -0.5560268761463861
-let A = zeros(2, 2)
-    randn!(MersenneTwister(42), A)
-    @test A == [-0.5560268761463861  0.027155338009193845;
-                -0.444383357109696  -0.29948409035891055]
-end
-
-let B = zeros(ComplexF64, 2)
-    randn!(MersenneTwister(42), B)
-    @test B == [ComplexF64(-0.5560268761463861,-0.444383357109696),
-                ComplexF64(0.027155338009193845,-0.29948409035891055)] * 0.7071067811865475244008
-end
-
-for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt,
-          Float16, Float32, Float64, Rational{Int})
-    r = rand(convert(T, 97):convert(T, 122))
-    @test typeof(r) == T
-    @test 97 <= r <= 122
-    r = rand(convert(T, 97):convert(T,2):convert(T, 122),2)[1]
-    @test typeof(r) == T
-    @test 97 <= r <= 122
-    @test mod(r,2)==1
-
-    if T<:Integer && !(T===BigInt)
-        x = rand(typemin(T):typemax(T))
-        @test isa(x,T)
-        @test typemin(T) <= x <= typemax(T)
+function test_uniform(xs::AbstractArray{T}) where {T<:AbstractFloat}
+    if precision(T) >= precision(Float32) # TODO: refine
+        @test allunique(xs)
+    end
+    @test all(x -> zero(x) <= x < one(x), xs)
+end
+
+function test_uniform(xs::AbstractArray{T}) where {T<:Integer}
+    if !Base.hastypemax(T) || widen(typemax(T)) - widen(typemin(T)) >= 2^30 # TODO: refine
+        @test allunique(xs)
+    end
+end
+
+
+@testset "MersenneTwister: do not do update the same global state in incompatible ways" begin
+    # Issue #6573
+    mm = MersenneTwister(rand(UInt128))
+    rand(mm)
+    xs = rand(mm, 384)
+    @test rand(mm) ∉ xs
+    test_uniform(xs)
+end
+
+@testset "rand from AbstractArray" begin
+    seed = rand(UInt128)
+    for rng ∈ ([MersenneTwister(seed)], [Xoshiro(seed)], [])
+        # issue 8257
+        i8257 = 1:1/3:100
+        for _ = 1:100
+            @test rand(rng... ,i8257) in i8257
+        end
+
+        @test rand(rng..., 0:3:1000) in 0:3:1000
+        @test issubset(rand!(rng..., Vector{Int}(undef, 100), 0:3:1000), 0:3:1000)
+        coll = Any[2, UInt128(128), big(619), "string"]
+        @test rand(rng..., coll) in coll
+        @test issubset(rand(rng..., coll, 2, 3), coll)
+
+        for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt,
+                  Float16, Float32, Float64, Rational{Int})
+            r = rand(rng..., convert(T, 97):convert(T, 122))
+            @test typeof(r) == T
+            @test 97 <= r <= 122
+            r = rand(rng..., convert(T, 97):convert(T,2):convert(T, 122),2)[1]
+            @test typeof(r) == T
+            @test 97 <= r <= 122
+            @test mod(r,2)==1
+
+            if T<:Integer && !(T===BigInt)
+                x = rand(rng..., typemin(T):typemax(T))
+                @test isa(x,T)
+                @test typemin(T) <= x <= typemax(T)
+            end
+        end
     end
 end
 
@@ -227,54 +198,35 @@ for U in (Int64, UInt64)
               for k in 13 .+ Int64(2).^(1:30))
 end
 
-#issue 8257
-let i8257 = 1:1/3:100
-    for i = 1:100
-        @test rand(i8257) in i8257
-    end
-end
-
-# test code paths of rand!
-
-let mt = MersenneTwister(0)
-    A128 = Vector{UInt128}()
+@testset "test code paths of rand!(::MersenneTwister)" begin
+    mt = MersenneTwister(rand(UInt128))
+    A128 = UInt128[]
     @test length(rand!(mt, A128)) == 0
-    for (i,n) in enumerate([1, 3, 5, 6, 10, 11, 30])
+    for (i, n) in enumerate([1, 3, 5, 6, 10, 11, 30])
         resize!(A128, n)
         rand!(mt, A128)
         @test length(A128) == n
-        @test A128[end] == UInt128[0x15de6b23025813ad129841f537a04e40,
-                                   0xcfa4db38a2c65bc4f18c07dc91125edf,
-                                   0x33bec08136f19b54290982449b3900d5,
-                                   0xde41af3463e74cb830dad4add353ca20,
-                                   0x066d8695ebf85f833427c93416193e1f,
-                                   0x48fab49cc9fcee1c920d6dae629af446,
-                                   0x4b54632b4619f4eca22675166784d229][i]
-    end
-
-    Random.seed!(mt, 0)
-    Aend = Any[]
-    Bend = Any[]
-    for (i,T) in enumerate([Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, Float16, Float32])
+        test_uniform(A128)
+    end
+
+    for (i, T) in enumerate([Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, Float16, Float32])
         A = Vector{T}(undef, 16)
         B = Vector{T}(undef, 31)
         rand!(mt, A)
         rand!(mt, B)
-        push!(Aend, A[end])
-        push!(Bend, B[end])
+        @test length(A) == 16
+        @test length(B) == 31
+        test_uniform(A)
+        test_uniform(B)
     end
-    @test Aend == Any[21, 0x7b, 17385, 0x3086, -1574090021, 0xadcb4460, 6797283068698303107, 0x68a9f9865393cfd6,
-                      33687499368208574024854346399216845930, Float16(0.7744), 0.97259974f0]
-    @test Bend == Any[49, 0x65, -3725, 0x719d, 814246081, 0xdf61843a, -3433174948434291912, 0xd461716f27c91500,
-                      -85900088726243933988214632401750448432, Float16(0.10645), 0.13879478f0]
 
-    Random.seed!(mt, 0)
     AF64 = Vector{Float64}(undef, Random.dsfmt_get_min_array_size()-1)
-    @test rand!(mt, AF64)[end] == 0.957735065345398
-    @test rand!(mt, AF64)[end] == 0.6492481059865669
+    rand!(mt, AF64)
+    test_uniform(AF64)
     resize!(AF64, 2*length(mt.vals))
-    @test invoke(rand!, Tuple{MersenneTwister,AbstractArray{Float64},Random.SamplerTrivial{Random.CloseOpen01_64}},
-                 mt, AF64, Random.SamplerTrivial(Random.CloseOpen01()))[end]  == 0.1142787906708973
+    invoke(rand!, Tuple{MersenneTwister,AbstractArray{Float64},Random.SamplerTrivial{Random.CloseOpen01_64}},
+           mt, AF64, Random.SamplerTrivial(Random.CloseOpen01()))
+    test_uniform(AF64)
 end
 
 # Issue #9037
@@ -297,7 +249,7 @@ let mt = MersenneTwister(0)
         Random.seed!(mt, 0)
         rand(mt) # this is to fill mt.vals, cf. #9040
         rand!(mt, A) # must not segfault even if Int(pointer(A)) % 16 != 0
-        @test A[end-4:end] == [0.3371041633752143, 0.41147647589610803, 0.6063082992397912, 0.9103565379264364, 0.16456579813368521]
+        test_uniform(A)
     end
 end
 
@@ -331,15 +283,23 @@ end
 
 # test all rand APIs
 for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
+    realrng = rng == [] ? default_rng() : only(rng)
     ftypes = [Float16, Float32, Float64, FakeFloat64, BigFloat]
     cftypes = [ComplexF16, ComplexF32, ComplexF64, ftypes...]
-    types = [Bool, Char, BigFloat, Base.BitInteger_types..., ftypes...]
+    types = [Bool, Char, BigFloat, Tuple{Bool, Tuple{Int, Char}}, Pair{Int8, UInt32},
+             Base.BitInteger_types..., cftypes...]
     randset = Set(rand(Int, 20))
     randdict = Dict(zip(rand(Int,10), rand(Int, 10)))
+
+    randwidetup = Tuple{Bool, Char, Vararg{Tuple{Int, Float64}, 14}}
+    @inferred rand(rng..., randwidetup)
+
     collections = [BitSet(rand(1:100, 20))          => Int,
                    randset                          => Int,
                    GenericSet(randset)              => Int,
                    randdict                         => Pair{Int,Int},
+                   keys(randdict)                   => Int,
+                   values(randdict)                 => Int,
                    GenericDict(randdict)            => Pair{Int,Int},
                    1:100                            => Int,
                    rand(Int, 100)                   => Int,
@@ -354,42 +314,55 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
     b2 = big(2)
     u3 = UInt(3)
     for f in [rand, randn, randexp]
-        f(rng...)                     ::Float64
-        f(rng..., 5)                  ::Vector{Float64}
-        f(rng..., 2, 3)               ::Array{Float64, 2}
-        f(rng..., b2, u3)             ::Array{Float64, 2}
+        f1 = f(rng...)                     ::Float64
+        f2 = f(rng..., 5)                  ::Vector{Float64}
+        f3 = f(rng..., 2, 3)               ::Array{Float64, 2}
+        f4 = f(rng..., b2, u3)             ::Array{Float64, 2}
+        @test size(f1) == ()
+        @test size(f2) == (5,)
+        @test size(f3) == size(f4) == (2, 3)
         for T in functypes[f]
-            a0 = f(rng..., T)         ::T
-            a1 = f(rng..., T, 5)      ::Vector{T}
-            a2 = f(rng..., T, 2, 3)   ::Array{T, 2}
-            a3 = f(rng..., T, b2, u3) ::Array{T, 2}
-            a4 = f(rng..., T, (2, 3)) ::Array{T, 2}
-            if T <: AbstractFloat && f === rand
-                for a in [a0, a1..., a2..., a3..., a4...]
-                    @test 0.0 <= a < 1.0
+            tts = f == rand ? (T, Sampler(realrng, T, Val(1)), Sampler(realrng, T, Val(Inf))) : (T,)
+            for tt in tts
+                a0 = f(rng..., tt)         ::T
+                a1 = f(rng..., tt, 5)      ::Vector{T}
+                a2 = f(rng..., tt, 2, 3)   ::Array{T, 2}
+                a3 = f(rng..., tt, b2, u3) ::Array{T, 2}
+                a4 = f(rng..., tt, (2, 3)) ::Array{T, 2}
+                if T <: Number
+                    @test size(a0) == ()
+                end
+                @test size(a1) == (5,)
+                @test size(a2) == size(a3) == size(a4) == (2, 3)
+                if T <: AbstractFloat && f === rand
+                    for a in T[a0, a1..., a2..., a3..., a4...]
+                        @test 0.0 <= a < 1.0
+                    end
                 end
             end
         end
     end
     for (C, T) in collections
-        a0  = rand(rng..., C)                                                       ::T
-        a1  = rand(rng..., C, 5)                                                    ::Vector{T}
-        a2  = rand(rng..., C, 2, 3)                                                 ::Array{T, 2}
-        a3  = rand(rng..., C, (2, 3))                                               ::Array{T, 2}
-        a4  = rand(rng..., C, b2, u3)                                               ::Array{T, 2}
-        a5  = rand!(rng..., Array{T}(undef, 5), C)                          ::Vector{T}
-        a6  = rand!(rng..., Array{T}(undef, 2, 3), C)                       ::Array{T, 2}
-        a7  = rand!(rng..., GenericArray{T}(undef, 5), C)                   ::GenericArray{T, 1}
-        a8  = rand!(rng..., GenericArray{T}(undef, 2, 3), C)                ::GenericArray{T, 2}
-        a9  = rand!(rng..., OffsetArray(Array{T}(undef, 5), 9), C)          ::OffsetArray{T, 1}
-        a10 = rand!(rng..., OffsetArray(Array{T}(undef, 2, 3), (-2, 4)), C) ::OffsetArray{T, 2}
-        @test size(a1) == (5,)
-        @test size(a2) == size(a3) == (2, 3)
-        for a in [a0, a1..., a2..., a3..., a4..., a5..., a6..., a7..., a8..., a9..., a10...]
-            if C isa Type
-                @test a isa C
-            else
-                @test a in C
+        for cc = (C, Sampler(realrng, C, Val(1)), Sampler(realrng, C, Val(Inf)))
+            a0  = rand(rng..., cc)                                               ::T
+            a1  = rand(rng..., cc, 5)                                            ::Vector{T}
+            a2  = rand(rng..., cc, 2, 3)                                         ::Array{T, 2}
+            a3  = rand(rng..., cc, (2, 3))                                       ::Array{T, 2}
+            a4  = rand(rng..., cc, b2, u3)                                       ::Array{T, 2}
+            a5  = rand!(rng..., Array{T}(undef, 5), cc)                          ::Vector{T}
+            a6  = rand!(rng..., Array{T}(undef, 2, 3), cc)                       ::Array{T, 2}
+            a7  = rand!(rng..., GenericArray{T}(undef, 5), cc)                   ::GenericArray{T, 1}
+            a8  = rand!(rng..., GenericArray{T}(undef, 2, 3), cc)                ::GenericArray{T, 2}
+            a9  = rand!(rng..., OffsetArray(Array{T}(undef, 5), 9), cc)          ::OffsetArray{T, 1}
+            a10 = rand!(rng..., OffsetArray(Array{T}(undef, 2, 3), (-2, 4)), cc) ::OffsetArray{T, 2}
+            @test size(a1) == (5,)
+            @test size(a2) == size(a3) == (2, 3)
+            for a in [a0, a1..., a2..., a3..., a4..., a5..., a6..., a7..., a8..., a9..., a10...]
+                if C isa Type
+                    @test a isa C
+                else
+                    @test a in C
+                end
             end
         end
     end
@@ -401,6 +374,7 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
     end
     for f! in [rand!, randn!, randexp!]
         for T in functypes[f!]
+            (T <: Tuple || T <: Pair) && continue
             X = T == Bool ? T[0,1] : T[0,1,2]
             for A in (Vector{T}(undef, 5),
                       Matrix{T}(undef, 2, 3),
@@ -409,7 +383,8 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
                       OffsetArray(Array{T}(undef, 5), -3),
                       OffsetArray(Array{T}(undef, 2, 3), (4, 5)))
                 local A
-                f!(rng..., A)                    ::typeof(A)
+                A2 = f!(rng..., A)               ::typeof(A)
+                @test A2 === A
                 if f! === rand!
                     f!(rng..., A, X)             ::typeof(A)
                     if A isa Array && T !== Char # Char/Integer comparison
@@ -421,11 +396,16 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
         end
     end
 
-    bitrand(rng..., 5)             ::BitArray{1}
-    bitrand(rng..., 2, 3)          ::BitArray{2}
-    bitrand(rng..., b2, u3)        ::BitArray{2}
-    rand!(rng..., BitVector(undef, 5))     ::BitArray{1}
-    rand!(rng..., BitMatrix(undef, 2, 3))  ::BitArray{2}
+    z1 = bitrand(rng..., 5)             ::BitArray{1}
+    @test size(z1) == (5,)
+    z2 = bitrand(rng..., 2, 3)          ::BitArray{2}
+    @test size(z2) == (2, 3)
+    z3 = bitrand(rng..., b2, u3)        ::BitArray{2}
+    @test size(z3) == (b2, u3)
+    z4 = rand!(rng..., BitVector(undef, 5))     ::BitArray{1}
+    @test size(z4) == (5,)
+    z5 = rand!(rng..., BitMatrix(undef, 2, 3))  ::BitArray{2}
+    @test size(z5) == (2, 3)
 
     # Test that you cannot call randn or randexp with non-Float types.
     for r in [randn, randexp]
@@ -441,6 +421,10 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
         @test_throws MethodError r(rng..., Number, (2,3))
         @test_throws MethodError r(rng..., Any, 1)
     end
+
+    # Test that you cannot call rand with a tuple type of unknown size or with isbits parameters
+    @test_throws ArgumentError rand(rng..., Tuple{Vararg{Int}})
+    @test_throws TypeError rand(rng..., Tuple{1:2})
 end
 
 function hist(X, n)
@@ -451,17 +435,22 @@ function hist(X, n)
     v
 end
 
-# test uniform distribution of floats
-for rng in [MersenneTwister(), RandomDevice(), Xoshiro()],
-    T in [Float16, Float32, Float64, BigFloat],
+@testset "uniform distribution of floats" begin
+    for rng in [MersenneTwister(), RandomDevice(), Xoshiro()],
+        T in [Float16, Float32, Float64, BigFloat],
         prec in (T == BigFloat ? [3, 53, 64, 100, 256, 1000] : [256])
-    setprecision(BigFloat, prec) do
-        # array version
-        counts = hist(rand(rng, T, 2000), 4)
-        @test minimum(counts) > 300 # should fail with proba < 1e-26
-        # scalar version
-        counts = hist([rand(rng, T) for i in 1:2000], 4)
-        @test minimum(counts) > 300
+
+        setprecision(BigFloat, prec) do
+            if precision(T) >= precision(Float32)
+                @test rand(rng, T) != rand(rng, T)
+            end
+            # array version
+            counts = hist(rand(rng, T, 2000), 4)
+            @test minimum(counts) > 300 # should fail with proba < 1e-26
+            # scalar version
+            counts = hist([rand(rng, T) for i in 1:2000], 4)
+            @test minimum(counts) > 300
+        end
     end
 end
 
@@ -523,6 +512,7 @@ end
     @test shuffle!(mta,Vector(1:10)) == shuffle!(mtb,Vector(1:10))
     @test shuffle(mta,Vector(2:11)) == shuffle(mtb,2:11)
     @test shuffle!(mta, rand(mta, 2, 3)) == shuffle!(mtb, rand(mtb, 2, 3))
+    @test shuffle!(mta, rand(mta, Bool, 2, 3)) == shuffle!(mtb, rand(mtb, Bool, 2, 3))
     @test shuffle(mta, rand(mta, 2, 3)) == shuffle(mtb, rand(mtb, 2, 3))
 
     @test randperm(mta,10) == randperm(mtb,10)
@@ -588,30 +578,41 @@ end
     end
 end
 
-# test that the following is not an error (#16925)
-guardseed() do
-    Random.seed!(typemax(UInt))
-    Random.seed!(typemax(UInt128))
-end
-
-# copy, == and hash
-let seed = rand(UInt32, 10)
-    r = MersenneTwister(seed)
-    @test r == MersenneTwister(seed) # r.vals should be all zeros
-    @test hash(r) == hash(MersenneTwister(seed))
-    s = copy(r)
-    @test s == r && s !== r
-    @test hash(s) == hash(r)
-    skip, len = rand(0:2000, 2)
-    for j=1:skip
-        rand(r)
-        rand(s)
-    end
-    @test rand(r, len) == rand(s, len)
-    @test s == r
-    @test hash(s) == hash(r)
-    h = rand(UInt)
-    @test hash(s, h) == hash(r, h)
+@testset "copy, == and hash" begin
+    for RNG = (MersenneTwister, Xoshiro)
+        seed = rand(UInt32, 10)
+        r = RNG(seed)
+        t = RNG(seed)
+        @test r == t
+        @test hash(r) == hash(t)
+        s = copy(r)
+        @test s == r == t && s !== r
+        @test hash(s) == hash(r)
+        skip, len = rand(0:2000, 2)
+        for j=1:skip
+            rand(r)
+            @test r != s
+            @test hash(r) != hash(s)
+            rand(s)
+        end
+        @test rand(r, len) == rand(s, len)
+        @test s == r
+        @test hash(s) == hash(r)
+        h = rand(UInt)
+        @test hash(s, h) == hash(r, h)
+        if RNG == Xoshiro
+            t = copy(TaskLocalRNG())
+            @test hash(t) == hash(TaskLocalRNG())
+            @test hash(t, h) == hash(TaskLocalRNG(), h)
+            x = rand()
+            @test hash(t) != hash(TaskLocalRNG())
+            @test rand(t) == x
+            @test hash(t) == hash(TaskLocalRNG())
+            copy!(TaskLocalRNG(), r)
+            @test hash(TaskLocalRNG()) == hash(r)
+            @test TaskLocalRNG() == r
+        end
+    end
 end
 
 # MersenneTwister initialization with invalid values
@@ -633,9 +634,7 @@ end
 let seed = rand(UInt32, 10)
     r = MersenneTwister(seed)
     @test r.seed == seed && r.seed !== seed
-    # RNGs do not share their seed in randjump
     let r2 = Future.randjump(r, big(10)^20)
-        @test  r.seed !== r2.seed
         Random.seed!(r2)
         @test seed == r.seed != r2.seed
     end
@@ -643,17 +642,23 @@ let seed = rand(UInt32, 10)
     @test r.seed != seed
 end
 
-# Random.seed!(rng, ...) returns rng (#21248)
-guardseed() do
-    g = Random.default_rng()
-    m = MersenneTwister(0)
-    @test Random.seed!() === g
-    @test Random.seed!(rand(UInt)) === g
-    @test Random.seed!(rand(UInt32, rand(1:8))) === g
-    @test Random.seed!(m) === m
-    @test Random.seed!(m, rand(UInt)) === m
-    @test Random.seed!(m, rand(UInt32, rand(1:10))) === m
-    @test Random.seed!(m, rand(1:10)) === m
+@testset "Random.seed!(rng, ...) returns rng" begin
+    # issue #21248
+    seed = rand(UInt)
+    for m = ([MersenneTwister(seed)], [Xoshiro(seed)], [])
+        m2 = m == [] ? default_rng() : m[1]
+        @test Random.seed!(m...) === m2
+        @test Random.seed!(m..., rand(UInt)) === m2
+        @test Random.seed!(m..., rand(UInt32, rand(1:10))) === m2
+        @test Random.seed!(m..., rand(1:10)) === m2
+        # Try a seed larger than 2^32
+        @test Random.seed!(m..., 5294967296) === m2
+
+        # test that the following is not an error (#16925)
+        @test Random.seed!(m..., typemax(UInt)) === m2
+        @test Random.seed!(m..., typemax(UInt128)) === m2
+        @test Random.seed!(m..., "a random seed") === m2
+    end
 end
 
 # Issue 20062 - ensure internal functions reserve_1, reserve are type-stable
@@ -707,7 +712,7 @@ end
 end
 
 @testset "$RNG(seed) & Random.seed!(m::$RNG, seed) produce the same stream" for RNG=(MersenneTwister,Xoshiro)
-    seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), rand(UInt128, 3)...]
+    seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), randstring(), randstring(), rand(UInt128, 3)...]
     if RNG == Xoshiro
         push!(seeds, rand(UInt64, rand(1:4)))
     end
@@ -720,37 +725,19 @@ end
 end
 
 @testset "Random.seed!(seed) sets Random.GLOBAL_SEED" begin
-    seeds = Any[0, rand(UInt128), rand(UInt64, 4)]
+    seeds = Any[0, rand(UInt128), rand(UInt64, 4), randstring(20)]
 
     for seed=seeds
         Random.seed!(seed)
-        @test Random.GLOBAL_SEED === seed
+        @test Random.get_tls_seed() == default_rng()
     end
-    # two separate loops as otherwise we are no sure that the second call (with GLOBAL_RNG)
-    # actually sets GLOBAL_SEED
-    for seed=seeds
-        Random.seed!(Random.GLOBAL_RNG, seed)
-        @test Random.GLOBAL_SEED === seed
-    end
-
-    Random.seed!(nothing)
-    seed1 = Random.GLOBAL_SEED
-    @test seed1 isa Vector{UInt64} # could change, but must not be nothing
 
-    Random.seed!(Random.GLOBAL_RNG, nothing)
-    seed2 = Random.GLOBAL_SEED
-    @test seed2 isa Vector{UInt64}
-    @test seed2 != seed1
-
-    Random.seed!()
-    seed3 = Random.GLOBAL_SEED
-    @test seed3 isa Vector{UInt64}
-    @test seed3 != seed2
-
-    Random.seed!(Random.GLOBAL_RNG)
-    seed4 = Random.GLOBAL_SEED
-    @test seed4 isa Vector{UInt64}
-    @test seed4 != seed3
+    for ii = 1:8
+        iseven(ii) ? Random.seed!(nothing) : Random.seed!()
+        push!(seeds, copy(Random.get_tls_seed()))
+        @test Random.get_tls_seed() isa Xoshiro # could change, but must not be nothing
+    end
+    @test allunique(seeds)
 end
 
 struct RandomStruct23964 end
@@ -760,9 +747,20 @@ struct RandomStruct23964 end
 end
 
 @testset "rand(::$(typeof(RNG)), ::UnitRange{$T}" for RNG ∈ (MersenneTwister(rand(UInt128)), RandomDevice(), Xoshiro()),
-                                                        T ∈ (Int8, Int16, Int32, UInt32, Int64, Int128, UInt128)
-    for S in (SamplerRangeInt, SamplerRangeFast, SamplerRangeNDL)
-        S == SamplerRangeNDL && sizeof(T) > 8 && continue
+                                                        T ∈ (Bool, Int8, Int16, Int32, UInt32, Int64, Int128, UInt128)
+    if T === Bool
+        @test rand(RNG, false:true) ∈ (false, true)
+        @test rand(RNG, false:false) === false
+        @test rand(RNG, true:true) === true
+        @test_throws ArgumentError rand(RNG, true:false)
+        continue
+    end
+    for S in (identity, SamplerRangeInt, SamplerRangeFast, SamplerRangeNDL)
+        if T === Int32 && RNG isa MersenneTwister
+            @test minimum([rand(RNG, T(1):T(7^7)) for i = 1:100000]) > 0
+        end
+
+        (S == SamplerRangeNDL || S == identity) && sizeof(T) > 8 && continue
         r = T(1):T(108)
         @test rand(RNG, S(r)) ∈ r
         @test rand(RNG, S(typemin(T):typemax(T))) isa T
@@ -803,10 +801,18 @@ end
     end
 end
 
+@testset "rand(::Type{<:Tuple})" begin
+    @test_throws ArgumentError rand(Tuple)
+    @test rand(Tuple{}) == ()
+    @inferred rand(Tuple{Int32,Int64,Float64})
+    @inferred rand(NTuple{20,Int})
+    @test_throws TypeError rand(Tuple{1:2,3:4})
+end
+
 @testset "GLOBAL_RNG" begin
+    @test VERSION < v"2" # deprecate this in v2 (GLOBAL_RNG must go)
     local GLOBAL_RNG = Random.GLOBAL_RNG
     local LOCAL_RNG = Random.default_rng()
-    @test VERSION < v"2" # deprecate this in v2
 
     @test Random.seed!(GLOBAL_RNG, nothing) === LOCAL_RNG
     @test Random.seed!(GLOBAL_RNG, UInt32[0]) === LOCAL_RNG
@@ -888,7 +894,8 @@ end
     @test (x >> 64) % UInt64 == xs[end-6]
     @test x % UInt64 == xs[end-7]
     x = rand(m, UInt64)
-    @test x == xs[end-8] # should not be == xs[end-7]
+    @test x == xs[end-8]
+    @test x != xs[end-7]
 
     s = Set{UInt64}()
     n = 0
@@ -921,14 +928,29 @@ end
         @test m == MersenneTwister(123, (200000000000000000000, 2256, 0, 1, 1002, 1))
 
         m = MersenneTwister(0x0ecfd77f89dcd508caa37a17ebb7556b)
-        @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b)"
+        @test string(m) == "MersenneTwister(0x0ecfd77f89dcd508caa37a17ebb7556b)"
         rand(m, Int64)
-        @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))"
+        @test string(m) == "MersenneTwister(0x0ecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))"
         @test m == MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))
 
         m = MersenneTwister(0); rand(m, Int64); rand(m)
         @test string(m) == "MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))"
         @test m == MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))
+
+        # negative seeds
+        Random.seed!(m, -3)
+        @test string(m) == "MersenneTwister(-3)"
+        Random.seed!(m, typemin(Int8))
+        @test string(m) == "MersenneTwister(-128)"
+
+        # string seeds
+        Random.seed!(m, "seed 1")
+        @test string(m) == "MersenneTwister(\"seed 1\")"
+        x = rand(m)
+        @test x == rand(MersenneTwister("seed 1"))
+        @test string(m) == """MersenneTwister("seed 1", (0, 1002, 0, 1))"""
+        # test that MersenneTwister's fancy constructors accept string seeds
+        @test MersenneTwister("seed 1", (0, 1002, 0, 1)) == m
     end
 
     @testset "RandomDevice" begin
@@ -994,3 +1016,219 @@ end
     @test minimum(m) >= 0.094
     @test maximum(m) <= 0.106
 end
+
+# issue #42752
+# test that running finalizers that launch tasks doesn't change RNG stream
+function f42752(do_gc::Bool, cell = (()->Any[[]])())
+    a = rand()
+    if do_gc
+        finalizer(cell[1]) do _
+            @async nothing
+        end
+        cell[1] = nothing
+        GC.gc()
+    end
+    b = rand()
+    (a, b)
+end
+guardseed() do
+    for _ in 1:4
+        Random.seed!(1)
+        val = f42752(false)
+        Random.seed!(1)
+        @test f42752(true) === val
+    end
+end
+
+@testset "TaskLocalRNG: stream collision smoke test" begin
+    # spawn a trinary tree of tasks:
+    # - spawn three recursive child tasks in each
+    # - generate a random UInt64 in each before, after and between
+    # - collect and count all the generated random values
+    # these should all be distinct across all tasks
+    function gen(d)
+        r = rand(UInt64)
+        vals = [r]
+        if d ≥ 0
+            append!(vals, gent(d - 1))
+            isodd(r) && append!(vals, gent(d - 1))
+            push!(vals, rand(UInt64))
+            iseven(r) && append!(vals, gent(d - 1))
+        end
+        push!(vals, rand(UInt64))
+    end
+    gent(d) = fetch(@async gen(d))
+    seeds = rand(RandomDevice(), UInt64, 5)
+    for seed in seeds
+        Random.seed!(seed)
+        vals = gen(6)
+        @test allunique(vals)
+    end
+end
+
+@testset "TaskLocalRNG: child doesn't affect parent" begin
+    seeds = rand(RandomDevice(), UInt64, 5)
+    for seed in seeds
+        Random.seed!(seed)
+        x = rand(UInt64)
+        y = rand(UInt64)
+        n = 3
+        for i = 1:n
+            Random.seed!(seed)
+            @sync for j = 0:i
+                @async rand(UInt64)
+            end
+            @test x == rand(UInt64)
+            @sync for j = 0:(n-i)
+                @async rand(UInt64)
+            end
+            @test y == rand(UInt64)
+        end
+    end
+end
+
+@testset "TaskLocalRNG: copy and copy! handle the splitmix state" begin
+    seeds = rand(RandomDevice(), UInt64, 5)
+    for seed in seeds
+        Random.seed!(seed)
+        rng1 = copy(TaskLocalRNG())
+        x = fetch(@async rand(UInt64))
+        rng2 = copy(TaskLocalRNG())
+        y = fetch(@async rand(UInt64))
+        rng3 = copy(TaskLocalRNG())
+        @test x != y
+        @test rng1 != rng2
+        Random.seed!(seed)
+        @test TaskLocalRNG() == rng1
+        @test x == fetch(@async rand(UInt64))
+        @test TaskLocalRNG() == rng2
+        # this should be a no-op:
+        copy!(TaskLocalRNG(), copy(TaskLocalRNG()))
+        @test TaskLocalRNG() == rng2
+        @test y == fetch(@async rand(UInt64))
+        @test TaskLocalRNG() == rng3
+    end
+end
+
+# Xoshiro jumps
+@testset "Xoshiro jump, basic" begin
+    x1 = Xoshiro(1)
+    x2 = Xoshiro(1)
+
+    @test x1 === jump_128!(jump_128!(x1))
+    @test x2 === jump_128!(x2, 2)
+    @test x1 == x2
+
+    xo1 = Xoshiro(0xfff0241072ddab67, 0xc53bc12f4c3f0b4e, 0x56d451780b2dd4ba, 0x50a4aa153d208dd8)
+    @test rand(jump_128(xo1), UInt64) == 0x87c158da8c35824d
+    @test rand(jump_192(xo1), UInt64) == 0xcaecd5afdd0847d5
+
+    @test rand(jump_128(xo1, 98765), UInt64) == 0xcbec1d5053142608
+    @test rand(jump_192(xo1, 98765), UInt64) == 0x3b97a94c44d66216
+
+    # Throws where appropriate
+    @test_throws DomainError jump_128(Xoshiro(1), -1)
+    @test_throws DomainError jump_128!(Xoshiro(1), -1)
+    @test_throws DomainError jump_192(Xoshiro(1), -1)
+    @test_throws DomainError jump_192!(Xoshiro(1), -1)
+
+    # clean copy when non-mut and no state advance
+    x = Xoshiro(1)
+    @test jump_128(x, 0) == x
+    @test jump_128(x, 0) !== x
+    @test jump_192(x, 0) == x
+    @test jump_192(x, 0) !== x
+
+    y = Xoshiro(1)
+    @test jump_128!(x, 0) == y
+    @test jump_192!(x, 0) == y
+end
+
+@testset "Xoshiro jump_128, various seeds" begin
+    for seed in (0, 1, 0xa0a3f09d0cecd878, 0x7ff8)
+        x = Xoshiro(seed)
+        @test jump_128(jump_128(jump_128(x))) == jump_128(x, 3)
+        x1 = Xoshiro(seed)
+        @test jump_128!(jump_128!(jump_128!(x1))) == jump_128(x, 3)
+        jump_128!(x1, 997)
+        x2 = jump_128!(Xoshiro(seed), 1000)
+        for T ∈ (Float64, UInt64, Int, Char, Bool)
+            @test rand(x1, T, 5) == rand(x2, T, 5)
+            @test rand(jump_128!(x1), T, 5) == rand(jump_128!(x2), T, 5)
+        end
+    end
+end
+
+@testset "Xoshiro jump_192, various seeds" begin
+    for seed in (0, 1, 0xa0a3f09d0cecd878, 0x7ff8)
+        x = Xoshiro(seed)
+        @test jump_192(jump_192(jump_192(x))) == jump_192(x, 3)
+        x1 = Xoshiro(seed)
+        @test jump_192!(jump_192!(jump_192!(x1))) == jump_192(x, 3)
+        jump_192!(x1, 997)
+        x2 = jump_192!(Xoshiro(seed), 1000)
+        for T ∈ (Float64, UInt64, Int, Char, Bool)
+            @test rand(x1, T, 5) == rand(x2, T, 5)
+            @test rand(jump_192!(x1), T, 5) == rand(jump_192!(x2), T, 5)
+        end
+    end
+end
+
+@testset "seed! and hash_seed" begin
+    # Test that:
+    # 1) if n == m, then hash_seed(n) == hash_seed(m)
+    # 2) if n != m, then hash_seed(n) != hash_seed(m)
+    rngs = (Xoshiro(0), TaskLocalRNG(), MersenneTwister(0))
+    seeds = Any[]
+    for T = Base.BitInteger_types
+        append!(seeds, rand(T, 8))
+        push!(seeds, typemin(T), typemin(T) + T(1), typemin(T) + T(2),
+              typemax(T), typemax(T) - T(1), typemax(T) - T(2))
+        T <: Signed && push!(seeds, T(0), T(1), T(2), T(-1), T(-2))
+    end
+
+    vseeds = Dict{Vector{UInt8}, BigInt}()
+    for seed = seeds
+        bigseed = big(seed)
+        vseed = Random.hash_seed(bigseed)
+        # test property 1) above
+        @test Random.hash_seed(seed) == vseed
+        # test property 2) above
+        @test bigseed == get!(vseeds, vseed, bigseed)
+        # test that the property 1) is actually inherited by `seed!`
+        for rng = rngs
+            rng2 = copy(Random.seed!(rng, seed))
+            Random.seed!(rng, bigseed)
+            @test rng == rng2
+        end
+    end
+
+    seed32 = rand(UInt32, rand(1:9))
+    hash32 = Random.hash_seed(seed32)
+    @test Random.hash_seed(map(UInt64, seed32)) == hash32
+    @test hash32 ∉ keys(vseeds)
+
+    seed_str = randstring()
+    seed_gstr = GenericString(seed_str)
+    @test Random.hash_seed(seed_str) == Random.hash_seed(seed_gstr)
+    string_seeds = Set{Vector{UInt8}}()
+    for ch = 'A':'z'
+        vseed = Random.hash_seed(string(ch))
+        @test vseed ∉ keys(vseeds)
+        @test vseed ∉ string_seeds
+        push!(string_seeds, vseed)
+    end
+end
+
+@testset "rand(::Type{<:Pair})" begin
+    @test rand(Pair{Int, Int}) isa Pair{Int, Int}
+    @test rand(Pair{Int, Float64}) isa Pair{Int, Float64}
+    @test rand(Pair{Int, Float64}, 3) isa Array{Pair{Int, Float64}}
+
+    # test that making an array out of a sampler works
+    # (i.e. that gentype(sp) is correct)
+    sp = Random.Sampler(AbstractRNG, Pair{Bool, Char})
+    xs = rand(sp, 3)
+    @test xs isa Vector{Pair{Bool, Char}}
+    @test length(xs) == 3
+end
diff --git a/stdlib/SHA.version b/stdlib/SHA.version
index f2242a336c6fe..f22bb33dc7ea2 100644
--- a/stdlib/SHA.version
+++ b/stdlib/SHA.version
@@ -1,4 +1,4 @@
 SHA_BRANCH = master
-SHA_SHA1 = 2d1f84e6f8417a1a368de48318640d948b023e7a
+SHA_SHA1 = aaf2df61ff8c3898196587a375d3cf213bd40b41
 SHA_GIT_URL := https://github.com/JuliaCrypto/SHA.jl.git
 SHA_TAR_URL = https://api.github.com/repos/JuliaCrypto/SHA.jl/tarball/$1
diff --git a/stdlib/Serialization/Project.toml b/stdlib/Serialization/Project.toml
index 4a2f7874e3124..97e898d731c7d 100644
--- a/stdlib/Serialization/Project.toml
+++ b/stdlib/Serialization/Project.toml
@@ -1,5 +1,6 @@
 name = "Serialization"
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl
index f86fb1b858b05..46081f38cf9aa 100644
--- a/stdlib/Serialization/src/Serialization.jl
+++ b/stdlib/Serialization/src/Serialization.jl
@@ -39,7 +39,7 @@ const TAGS = Any[
     Float16, Float32, Float64, Char, DataType, Union, UnionAll, Core.TypeName, Tuple,
     Array, Expr, LineNumberNode, :__LabelNode__, GotoNode, QuoteNode, CodeInfo, TypeVar,
     Core.Box, Core.MethodInstance, Module, Task, String, SimpleVector, Method,
-    GlobalRef, SlotNumber, TypedSlot, NewvarNode, SSAValue,
+    GlobalRef, SlotNumber, Const, NewvarNode, SSAValue,
 
     # dummy entries for tags that don't correspond directly to types
     Symbol, # UNDEFREF_TAG
@@ -77,15 +77,14 @@ const TAGS = Any[
     (Int64(0):Int64(n_int_literals-1))...
 ]
 
-@assert length(TAGS) == 255
+const NTAGS = length(TAGS)
+@assert NTAGS == 255
 
-const ser_version = 19 # do not make changes without bumping the version #!
+const ser_version = 26 # do not make changes without bumping the version #!
 
 format_version(::AbstractSerializer) = ser_version
 format_version(s::Serializer) = s.version
 
-const NTAGS = length(TAGS)
-
 function sertag(@nospecialize(v))
     # NOTE: we use jl_value_ptr directly since we know at least one of the arguments
     # in the comparison below is a singleton.
@@ -194,7 +193,7 @@ serialize(s::AbstractSerializer, ::Tuple{}) = writetag(s.io, EMPTYTUPLE_TAG)
 
 function serialize(s::AbstractSerializer, t::Tuple)
     l = length(t)
-    if l <= 255
+    if l <= NTAGS
         writetag(s.io, TUPLE_TAG)
         write(s.io, UInt8(l))
     else
@@ -224,7 +223,7 @@ function serialize(s::AbstractSerializer, x::Symbol)
     if len > 7
         serialize_cycle(s, x) && return
     end
-    if len <= 255
+    if len <= NTAGS
         writetag(s.io, SYMBOL_TAG)
         write(s.io, UInt8(len))
     else
@@ -289,13 +288,38 @@ function serialize(s::AbstractSerializer, a::SubArray{T,N,A}) where {T,N,A<:Arra
     serialize_any(s, b)
 end
 
+serialize(s::AbstractSerializer, m::GenericMemory) = error("GenericMemory{:atomic} currently cannot be serialized")
+function serialize(s::AbstractSerializer, m::Memory)
+    serialize_cycle_header(s, m) && return
+    serialize(s, length(m))
+    elty = eltype(m)
+    if isbitstype(elty)
+        serialize_array_data(s.io, m)
+    else
+        sizehint!(s.table, div(length(m),4))  # prepare for lots of pointers
+        @inbounds for i in eachindex(m)
+            if isassigned(m, i)
+                serialize(s, m[i])
+            else
+                writetag(s.io, UNDEFREF_TAG)
+            end
+        end
+    end
+end
+
+function serialize(s::AbstractSerializer, x::GenericMemoryRef)
+    serialize_type(s, typeof(x))
+    serialize(s, getfield(x, :mem))
+    serialize(s, Base.memoryrefoffset(x))
+end
+
 function serialize(s::AbstractSerializer, ss::String)
     len = sizeof(ss)
     if len > 7
         serialize_cycle(s, ss) && return
         writetag(s.io, SHARED_REF_TAG)
     end
-    if len <= 255
+    if len <= NTAGS
         writetag(s.io, STRING_TAG)
         write(s.io, UInt8(len))
     else
@@ -327,7 +351,7 @@ end
 function serialize(s::AbstractSerializer, ex::Expr)
     serialize_cycle(s, ex) && return
     l = length(ex.args)
-    if l <= 255
+    if l <= NTAGS
         writetag(s.io, EXPR_TAG)
         write(s.io, UInt8(l))
     else
@@ -364,7 +388,8 @@ function serialize_mod_names(s::AbstractSerializer, m::Module)
     p = parentmodule(m)
     if p === m || m === Base
         key = Base.root_module_key(m)
-        serialize(s, key.uuid === nothing ? nothing : key.uuid.value)
+        uuid = key.uuid
+        serialize(s, uuid === nothing ? nothing : uuid.value)
         serialize(s, Symbol(key.name))
     else
         serialize_mod_names(s, p)
@@ -418,6 +443,7 @@ function serialize(s::AbstractSerializer, meth::Method)
     serialize(s, meth.nargs)
     serialize(s, meth.isva)
     serialize(s, meth.is_for_opaque_closure)
+    serialize(s, meth.nospecializeinfer)
     serialize(s, meth.constprop)
     serialize(s, meth.purity)
     if isdefined(meth, :source)
@@ -510,7 +536,7 @@ function serialize_typename(s::AbstractSerializer, t::Core.TypeName)
     serialize(s, primary.super)
     serialize(s, primary.parameters)
     serialize(s, primary.types)
-    serialize(s, isdefined(primary, :instance))
+    serialize(s, Base.issingletontype(primary))
     serialize(s, t.flags & 0x1 == 0x1) # .abstract
     serialize(s, t.flags & 0x2 == 0x2) # .mutable
     serialize(s, Int32(length(primary.types) - t.n_uninitialized))
@@ -519,10 +545,11 @@ function serialize_typename(s::AbstractSerializer, t::Core.TypeName)
         serialize(s, t.mt.name)
         serialize(s, collect(Base.MethodList(t.mt)))
         serialize(s, t.mt.max_args)
-        if isdefined(t.mt, :kwsorter)
-            serialize(s, t.mt.kwsorter)
-        else
+        kws = collect(methods(Core.kwcall, (Any, t.wrapper, Vararg)))
+        if isempty(kws)
             writetag(s.io, UNDEFREF_TAG)
+        else
+            serialize(s, kws)
         end
     else
         writetag(s.io, UNDEFREF_TAG)
@@ -564,10 +591,8 @@ function serialize_type_data(s, @nospecialize(t::DataType))
         serialize(s, t.name)
     else
         writetag(s.io, DATATYPE_TAG)
-        tname = t.name.name
-        serialize(s, tname)
-        mod = t.name.module
-        serialize(s, mod)
+        serialize(s, nameof(t))
+        serialize(s, parentmodule(t))
     end
     if !isempty(t.parameters)
         if iswrapper
@@ -654,14 +679,18 @@ end
 
 serialize(s::AbstractSerializer, @nospecialize(x)) = serialize_any(s, x)
 
+function serialize(s::AbstractSerializer, x::Core.AddrSpace)
+    serialize_type(s, typeof(x))
+    write(s.io, Core.bitcast(UInt8, x))
+end
+
 function serialize_any(s::AbstractSerializer, @nospecialize(x))
     tag = sertag(x)
     if tag > 0
         return write_as_tag(s.io, tag)
     end
     t = typeof(x)::DataType
-    nf = nfields(x)
-    if nf == 0 && t.size > 0
+    if isprimitivetype(t)
         serialize_type(s, t)
         write(s.io, x)
     else
@@ -671,6 +700,7 @@ function serialize_any(s::AbstractSerializer, @nospecialize(x))
         else
             serialize_type(s, t, false)
         end
+        nf = nfields(x)
         for i in 1:nf
             if isdefined(x, i)
                 serialize(s, getfield(x, i))
@@ -1027,16 +1057,22 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
     nargs = deserialize(s)::Int32
     isva = deserialize(s)::Bool
     is_for_opaque_closure = false
+    nospecializeinfer = false
     constprop = 0x00
-    purity = 0x00
+    purity = 0x0000
     template_or_is_opaque = deserialize(s)
     if isa(template_or_is_opaque, Bool)
         is_for_opaque_closure = template_or_is_opaque
+        if format_version(s) >= 24
+            nospecializeinfer = deserialize(s)::Bool
+        end
         if format_version(s) >= 14
             constprop = deserialize(s)::UInt8
         end
-        if format_version(s) >= 17
-            purity = deserialize(s)::UInt8
+        if format_version(s) >= 26
+            purity = deserialize(s)::UInt16
+        elseif format_version(s) >= 17
+            purity = UInt16(deserialize(s)::UInt8)
         end
         template = deserialize(s)
     else
@@ -1056,12 +1092,12 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
         meth.nargs = nargs
         meth.isva = isva
         meth.is_for_opaque_closure = is_for_opaque_closure
+        meth.nospecializeinfer = nospecializeinfer
         meth.constprop = constprop
         meth.purity = purity
         if template !== nothing
             # TODO: compress template
             meth.source = template::CodeInfo
-            meth.pure = template.pure
             if !@isdefined(slot_syms)
                 slot_syms = ccall(:jl_compress_argnames, Ref{String}, (Any,), meth.source.slotnames)
             end
@@ -1089,7 +1125,7 @@ function deserialize(s::AbstractSerializer, ::Type{Core.MethodInstance})
     deserialize_cycle(s, linfo)
     tag = Int32(read(s.io, UInt8)::UInt8)
     if tag != UNDEFREF_TAG
-        linfo.uninferred = handle_deserialize(s, tag)::CodeInfo
+        setfield!(linfo, :uninferred, handle_deserialize(s, tag)::CodeInfo, :monotonic)
     end
     tag = Int32(read(s.io, UInt8)::UInt8)
     if tag != UNDEFREF_TAG
@@ -1157,7 +1193,9 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
     if length(ssaflags) ≠ length(code)
         # make sure the length of `ssaflags` matches that of `code`
         # so that the latest inference doesn't throw on IRs serialized from old versions
-        ssaflags = UInt8[0x00 for _ in 1:length(code)]
+        ssaflags = UInt32[0x00 for _ in 1:length(code)]
+    elseif eltype(ssaflags) != UInt32
+        ssaflags = map(UInt32, ssaflags)
     end
     ci.ssaflags = ssaflags
     if pre_12
@@ -1183,15 +1221,38 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
         end
     end
     ci.inferred = deserialize(s)
-    ci.inlineable = deserialize(s)
+    if format_version(s) < 22
+        inlining_cost = deserialize(s)
+        if isa(inlining_cost, Bool)
+            Core.Compiler.set_inlineable!(ci, inlining_cost)
+        else
+            ci.inlining_cost = inlining_cost
+        end
+    end
     ci.propagate_inbounds = deserialize(s)
-    ci.pure = deserialize(s)
+    if format_version(s) < 23
+        deserialize(s) # `pure` field has been removed
+    end
+    if format_version(s) >= 20
+        ci.has_fcall = deserialize(s)
+    end
+    if format_version(s) >= 24
+        ci.nospecializeinfer = deserialize(s)::Bool
+    end
+    if format_version(s) >= 21
+        ci.inlining = deserialize(s)::UInt8
+    end
     if format_version(s) >= 14
         ci.constprop = deserialize(s)::UInt8
     end
-    if format_version(s) >= 17
+    if format_version(s) >= 26
+        ci.purity = deserialize(s)::UInt16
+    elseif format_version(s) >= 17
         ci.purity = deserialize(s)::UInt8
     end
+    if format_version(s) >= 22
+        ci.inlining_cost = deserialize(s)::UInt16
+    end
     return ci
 end
 
@@ -1250,7 +1311,7 @@ function deserialize_array(s::AbstractSerializer)
     return A
 end
 
-function deserialize_fillarray!(A::Array{T}, s::AbstractSerializer) where {T}
+function deserialize_fillarray!(A::Union{Array{T},Memory{T}}, s::AbstractSerializer) where {T}
     for i = eachindex(A)
         tag = Int32(read(s.io, UInt8)::UInt8)
         if tag != UNDEFREF_TAG
@@ -1260,6 +1321,48 @@ function deserialize_fillarray!(A::Array{T}, s::AbstractSerializer) where {T}
     return A
 end
 
+function deserialize(s::AbstractSerializer, X::Type{Memory{T}} where T)
+    slot = pop!(s.pending_refs) # e.g. deserialize_cycle
+    n = deserialize(s)::Int
+    elty = eltype(X)
+    if isbitstype(elty)
+        A = X(undef, n)
+        if X === Memory{Bool}
+            i = 1
+            while i <= n
+                b = read(s.io, UInt8)::UInt8
+                v = (b >> 7) != 0
+                count = b & 0x7f
+                nxt = i + count
+                while i < nxt
+                    A[i] = v
+                    i += 1
+                end
+            end
+        else
+            A = read!(s.io, A)::X
+        end
+        s.table[slot] = A
+        return A
+    end
+    A = X(undef, n)
+    s.table[slot] = A
+    sizehint!(s.table, s.counter + div(n, 4))
+    deserialize_fillarray!(A, s)
+    return A
+end
+
+function deserialize(s::AbstractSerializer, X::Type{MemoryRef{T}} where T)
+    x = Core.memoryref(deserialize(s))::X
+    i = deserialize(s)::Int
+    i == 2 || (x = Core.memoryref(x, i, true))
+    return x::X
+end
+
+function deserialize(s::AbstractSerializer, X::Type{Core.AddrSpace{M}} where M)
+    Core.bitcast(X, read(s.io, UInt8))
+end
+
 function deserialize_expr(s::AbstractSerializer, len)
     e = Expr(:temp)
     resolve_ref_immediately(s, e)
@@ -1334,7 +1437,7 @@ function deserialize_typename(s::AbstractSerializer, number)
                 mt.offs = 0
             end
             mt.name = mtname
-            mt.max_args = maxa
+            setfield!(mt, :max_args, maxa, :monotonic)
             ccall(:jl_set_nth_field, Cvoid, (Any, Csize_t, Any), tn, Base.fieldindex(Core.TypeName, :mt)-1, mt)
             for def in defs
                 if isdefined(def, :sig)
@@ -1346,7 +1449,15 @@ function deserialize_typename(s::AbstractSerializer, number)
         if tag != UNDEFREF_TAG
             kws = handle_deserialize(s, tag)
             if makenew
-                tn.mt.kwsorter = kws
+                if kws isa Vector{Method}
+                    for def in kws
+                        kwmt = typeof(Core.kwcall).name.mt
+                        ccall(:jl_method_table_insert, Cvoid, (Any, Any, Ptr{Cvoid}), mt, def, C_NULL)
+                    end
+                else
+                    # old object format -- try to forward from old to new
+                    @eval Core.kwcall(kwargs::NamedTuple, f::$ty, args...) = $kws(kwargs, f, args...)
+                end
             end
         end
     elseif makenew
@@ -1458,8 +1569,7 @@ end
 # default DataType deserializer
 function deserialize(s::AbstractSerializer, t::DataType)
     nf = length(t.types)
-    if nf == 0 && t.size > 0
-        # bits type
+    if isprimitivetype(t)
         return read(s.io, t)
     elseif ismutabletype(t)
         x = ccall(:jl_new_struct_uninit, Any, (Any,), t)
diff --git a/stdlib/Serialization/test/runtests.jl b/stdlib/Serialization/test/runtests.jl
index 104b3e97d6118..46749d4375538 100644
--- a/stdlib/Serialization/test/runtests.jl
+++ b/stdlib/Serialization/test/runtests.jl
@@ -317,18 +317,23 @@ main_ex = quote
     using Serialization
     $create_serialization_stream() do s
         local g() = :magic_token_anon_fun_test
+        local gkw(; kw=:thekw) = kw
         serialize(s, g)
         serialize(s, g)
+        serialize(s, gkw)
 
         seekstart(s)
         ds = Serializer(s)
         local g2 = deserialize(ds)
-        Base.invokelatest() do
-            $Test.@test g2 !== g
-            $Test.@test g2() == :magic_token_anon_fun_test
-            $Test.@test g2() == :magic_token_anon_fun_test
-            $Test.@test deserialize(ds) === g2
-        end
+        @test g2 !== g
+        $Test.@test Base.invokelatest(g2) === :magic_token_anon_fun_test
+        $Test.@test Base.invokelatest(g2) === :magic_token_anon_fun_test
+        deserialize(ds) === g2
+
+        local gkw2 = deserialize(s)
+        $Test.@test gkw2 !== gkw
+        $Test.@test Base.invokelatest(gkw2) === :thekw
+        $Test.@test Base.invokelatest(gkw2, kw="kwtest") === "kwtest"
 
         # issue #21793
         y = x -> (() -> x)
@@ -336,10 +341,10 @@ main_ex = quote
         serialize(s, y)
         seekstart(s)
         y2 = deserialize(s)
-        Base.invokelatest() do
+        $Test.@test Base.invokelatest() do
             x2 = y2(2)
-            $Test.@test x2() == 2
-        end
+            x2()
+        end === 2
     end
 end
 # This needs to be run on `Main` since the serializer treats it differently.
@@ -354,7 +359,7 @@ create_serialization_stream() do s # user-defined type array
     seek(s, 0)
     r = deserialize(s)
     @test r.storage[:v] == 2
-    @test r.state == :done
+    @test r.state === :done
     @test r.exception === nothing
 end
 
@@ -366,7 +371,7 @@ create_serialization_stream() do s # user-defined type array
     serialize(s, t)
     seek(s, 0)
     r = deserialize(s)
-    @test r.state == :failed
+    @test r.state === :failed
 end
 
 # corner case: undefined inside immutable struct
diff --git a/stdlib/SharedArrays/Project.toml b/stdlib/SharedArrays/Project.toml
index 588785347c73d..46e5332f8d89d 100644
--- a/stdlib/SharedArrays/Project.toml
+++ b/stdlib/SharedArrays/Project.toml
@@ -1,5 +1,6 @@
 name = "SharedArrays"
 uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+version = "1.11.0"
 
 [deps]
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
diff --git a/stdlib/SharedArrays/src/SharedArrays.jl b/stdlib/SharedArrays/src/SharedArrays.jl
index a961be4e534b3..93ce396277af7 100644
--- a/stdlib/SharedArrays/src/SharedArrays.jl
+++ b/stdlib/SharedArrays/src/SharedArrays.jl
@@ -8,7 +8,7 @@ module SharedArrays
 using Mmap, Distributed, Random
 
 import Base: length, size, elsize, ndims, IndexStyle, reshape, convert, deepcopy_internal,
-             show, getindex, setindex!, fill!, similar, reduce, map!, copyto!, unsafe_convert
+             show, getindex, setindex!, fill!, similar, reduce, map!, copyto!, cconvert
 import Random
 using Serialization
 using Serialization: serialize_cycle_header, serialize_type, writetag, UNDEFREF_TAG, serialize, deserialize
@@ -328,7 +328,7 @@ procs(S::SharedArray) = S.pids
 """
     indexpids(S::SharedArray)
 
-Returns the current worker's index in the list of workers
+Return the current worker's index in the list of workers
 mapping the `SharedArray` (i.e. in the same list returned by `procs(S)`), or
 0 if the `SharedArray` is not mapped locally.
 """
@@ -337,7 +337,7 @@ indexpids(S::SharedArray) = S.pidx
 """
     sdata(S::SharedArray)
 
-Returns the actual `Array` object backing `S`.
+Return the actual `Array` object backing `S`.
 """
 sdata(S::SharedArray) = S.s
 sdata(A::AbstractArray) = A
@@ -345,7 +345,7 @@ sdata(A::AbstractArray) = A
 """
     localindices(S::SharedArray)
 
-Returns a range describing the "default" indices to be handled by the
+Return a range describing the "default" indices to be handled by the
 current process.  This range should be interpreted in the sense of
 linear indexing, i.e., as a sub-range of `1:length(S)`.  In
 multi-process contexts, returns an empty range in the parent process
@@ -358,8 +358,8 @@ for each worker process.
 """
 localindices(S::SharedArray) = S.pidx > 0 ? range_1dim(S, S.pidx) : 1:0
 
-unsafe_convert(::Type{Ptr{T}}, S::SharedArray{T}) where {T} = unsafe_convert(Ptr{T}, sdata(S))
-unsafe_convert(::Type{Ptr{T}}, S::SharedArray   ) where {T} = unsafe_convert(Ptr{T}, sdata(S))
+cconvert(::Type{Ptr{T}}, S::SharedArray{T}) where {T} = cconvert(Ptr{T}, sdata(S))
+cconvert(::Type{Ptr{T}}, S::SharedArray   ) where {T} = cconvert(Ptr{T}, sdata(S))
 
 function SharedArray(A::Array)
     S = SharedArray{eltype(A),ndims(A)}(size(A))
@@ -374,7 +374,7 @@ function SharedArray{TS,N}(A::Array{TA,N}) where {TS,TA,N}
     copyto!(S, A)
 end
 
-convert(T::Type{<:SharedArray}, a::Array) = T(a)
+convert(T::Type{<:SharedArray}, a::Array) = T(a)::T
 
 function deepcopy_internal(S::SharedArray, stackdict::IdDict)
     haskey(stackdict, S) && return stackdict[S]
diff --git a/stdlib/Sockets/Project.toml b/stdlib/Sockets/Project.toml
index 5afb89b29f126..6a395465722f2 100644
--- a/stdlib/Sockets/Project.toml
+++ b/stdlib/Sockets/Project.toml
@@ -1,5 +1,6 @@
 name = "Sockets"
 uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Sockets/src/IPAddr.jl b/stdlib/Sockets/src/IPAddr.jl
index 1792008620981..04710e400fe87 100644
--- a/stdlib/Sockets/src/IPAddr.jl
+++ b/stdlib/Sockets/src/IPAddr.jl
@@ -31,7 +31,7 @@ end
 """
     IPv4(host::Integer) -> IPv4
 
-Returns an IPv4 object from ip address `host` formatted as an [`Integer`](@ref).
+Return an IPv4 object from ip address `host` formatted as an [`Integer`](@ref).
 
 # Examples
 ```jldoctest
@@ -84,7 +84,7 @@ end
 """
     IPv6(host::Integer) -> IPv6
 
-Returns an IPv6 object from ip address `host` formatted as an [`Integer`](@ref).
+Return an IPv6 object from ip address `host` formatted as an [`Integer`](@ref).
 
 # Examples
 ```jldoctest
diff --git a/stdlib/Sockets/src/Sockets.jl b/stdlib/Sockets/src/Sockets.jl
index dfb2cf7261088..85f3e7639b6ae 100644
--- a/stdlib/Sockets/src/Sockets.jl
+++ b/stdlib/Sockets/src/Sockets.jl
@@ -450,7 +450,7 @@ function send(sock::UDPSocket, ipaddr::IPAddr, port::Integer, msg)
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        ct.queue === nothing || Base.list_deletefirst!(ct.queue, ct)
         if uv_req_data(uvw) != C_NULL
             # uvw is still alive,
             # so make sure we won't get spurious notifications later
@@ -567,7 +567,11 @@ end
 """
     nagle(socket::Union{TCPServer, TCPSocket}, enable::Bool)
 
-Enables or disables Nagle's algorithm on a given TCP server or socket.
+Nagle's algorithm batches multiple small TCP packets into larger
+ones. This can improve throughput but worsen latency. Nagle's algorithm
+is enabled by default. This function sets whether Nagle's algorithm is
+active on a given TCP server or socket. The opposite option is called
+`TCP_NODELAY` in other languages.
 
 !!! compat "Julia 1.3"
     This function requires Julia 1.3 or later.
@@ -709,16 +713,17 @@ end
 const localhost = ip"127.0.0.1"
 
 """
-    listenany([host::IPAddr,] port_hint) -> (UInt16, TCPServer)
+    listenany([host::IPAddr,] port_hint; backlog::Integer=BACKLOG_DEFAULT) -> (UInt16, TCPServer)
 
 Create a `TCPServer` on any port, using hint as a starting point. Returns a tuple of the
 actual port that the server was created on and the server itself.
+The backlog argument defines the maximum length to which the queue of pending connections for sockfd may grow.
 """
-function listenany(host::IPAddr, default_port)
+function listenany(host::IPAddr, default_port; backlog::Integer=BACKLOG_DEFAULT)
     addr = InetAddr(host, default_port)
     while true
         sock = TCPServer()
-        if bind(sock, addr) && trylisten(sock) == 0
+        if bind(sock, addr) && trylisten(sock; backlog) == 0
             if default_port == 0
                 _addr, port = getsockname(sock)
                 return (port, sock)
@@ -726,14 +731,14 @@ function listenany(host::IPAddr, default_port)
             return (addr.port, sock)
         end
         close(sock)
-        addr = InetAddr(addr.host, addr.port + 1)
+        addr = InetAddr(addr.host, addr.port + UInt16(1))
         if addr.port == default_port
             error("no ports available")
         end
     end
 end
 
-listenany(default_port) = listenany(localhost, default_port)
+listenany(default_port; backlog::Integer=BACKLOG_DEFAULT) = listenany(localhost, default_port; backlog)
 
 function udp_set_membership(sock::UDPSocket, group_addr::String,
                             interface_addr::Union{Nothing, String}, operation)
diff --git a/stdlib/Sockets/src/addrinfo.jl b/stdlib/Sockets/src/addrinfo.jl
index 586463ba0fa21..ac4aef8737d1b 100644
--- a/stdlib/Sockets/src/addrinfo.jl
+++ b/stdlib/Sockets/src/addrinfo.jl
@@ -90,7 +90,7 @@ function getalladdrinfo(host::String)
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        ct.queue === nothing || Base.list_deletefirst!(ct.queue, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we don't get spurious notifications later
@@ -170,7 +170,7 @@ using the operating system's underlying `getnameinfo` implementation.
 
 # Examples
 ```julia-repl
-julia> getnameinfo(Sockets.IPv4("8.8.8.8"))
+julia> getnameinfo(IPv4("8.8.8.8"))
 "google-public-dns-a.google.com"
 ```
 """
@@ -205,7 +205,7 @@ function getnameinfo(address::Union{IPv4, IPv6})
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        ct.queue === nothing || Base.list_deletefirst!(ct.queue, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we don't get spurious notifications later
diff --git a/stdlib/Sockets/test/runtests.jl b/stdlib/Sockets/test/runtests.jl
index a27bb89408f1d..02a994460afbf 100644
--- a/stdlib/Sockets/test/runtests.jl
+++ b/stdlib/Sockets/test/runtests.jl
@@ -136,7 +136,7 @@ defaultport = rand(2000:4000)
                 write(sock, "Hello World\n")
 
                 # test "locked" println to a socket
-                @Experimental.sync begin
+                Experimental.@sync begin
                     for i in 1:100
                         @async println(sock, "a", 1)
                     end
@@ -307,7 +307,7 @@ end
         bind(a, ip"127.0.0.1", randport)
         bind(b, ip"127.0.0.1", randport + 1)
 
-        @Experimental.sync begin
+        Experimental.@sync begin
             let i = 0
                 for _ = 1:30
                     @async let msg = String(recv(a))
@@ -387,7 +387,7 @@ end
         # connect to it
         client_sock = connect(addr, port)
         test_done = false
-        @Experimental.sync begin
+        Experimental.@sync begin
             @async begin
                 Base.wait_readnb(client_sock, 1)
                 test_done || error("Client disconnected prematurely.")
diff --git a/stdlib/SparseArrays.version b/stdlib/SparseArrays.version
index 0862a4e2a458c..68cdbbf3fca7c 100644
--- a/stdlib/SparseArrays.version
+++ b/stdlib/SparseArrays.version
@@ -1,4 +1,4 @@
 SPARSEARRAYS_BRANCH = main
-SPARSEARRAYS_SHA1 = 2bbdd7a12ead8207593655c541ba347761a9c663
+SPARSEARRAYS_SHA1 = f154de2b6801ec8d5afaf58b73b830c8e71013c3
 SPARSEARRAYS_GIT_URL := https://github.com/JuliaSparse/SparseArrays.jl.git
 SPARSEARRAYS_TAR_URL = https://api.github.com/repos/JuliaSparse/SparseArrays.jl/tarball/$1
diff --git a/stdlib/Statistics.version b/stdlib/Statistics.version
index aba34b5423d99..1449dcee29b79 100644
--- a/stdlib/Statistics.version
+++ b/stdlib/Statistics.version
@@ -1,4 +1,4 @@
 STATISTICS_BRANCH = master
-STATISTICS_SHA1 = c38dd4418738bc595bd8229eb4ee91b717de64af
+STATISTICS_SHA1 = 68869af06e8cdeb7aba1d5259de602da7328057f
 STATISTICS_GIT_URL := https://github.com/JuliaStats/Statistics.jl.git
 STATISTICS_TAR_URL = https://api.github.com/repos/JuliaStats/Statistics.jl/tarball/$1
diff --git a/stdlib/StyledStrings.version b/stdlib/StyledStrings.version
new file mode 100644
index 0000000000000..bca2e9dca3e67
--- /dev/null
+++ b/stdlib/StyledStrings.version
@@ -0,0 +1,4 @@
+STYLEDSTRINGS_BRANCH = main
+STYLEDSTRINGS_SHA1 = 61e7b105b157b40807ed0b4840166a25b0948549
+STYLEDSTRINGS_GIT_URL := https://github.com/JuliaLang/StyledStrings.jl.git
+STYLEDSTRINGS_TAR_URL = https://api.github.com/repos/JuliaLang/StyledStrings.jl/tarball/$1
diff --git a/stdlib/SuiteSparse.version b/stdlib/SuiteSparse.version
index 73b16ba750915..a5d7d781eff3d 100644
--- a/stdlib/SuiteSparse.version
+++ b/stdlib/SuiteSparse.version
@@ -1,4 +1,4 @@
 SUITESPARSE_BRANCH = master
-SUITESPARSE_SHA1 = ed89e0fe3d8908cede058f42f872ba60159af0a6
+SUITESPARSE_SHA1 = e8285dd13a6d5b5cf52d8124793fc4d622d07554
 SUITESPARSE_GIT_URL := https://github.com/JuliaSparse/SuiteSparse.jl.git
 SUITESPARSE_TAR_URL = https://api.github.com/repos/JuliaSparse/SuiteSparse.jl/tarball/$1
diff --git a/stdlib/SuiteSparse_jll/Project.toml b/stdlib/SuiteSparse_jll/Project.toml
index f36ce756c834c..7518d64e80200 100644
--- a/stdlib/SuiteSparse_jll/Project.toml
+++ b/stdlib/SuiteSparse_jll/Project.toml
@@ -1,15 +1,14 @@
 name = "SuiteSparse_jll"
 uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
-version = "5.10.1+0"
+version = "7.2.1+1"
 
 [deps]
 libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.7"
+julia = "1.10"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
index 2940970ceff9f..f245759aaf383 100644
--- a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
+++ b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
@@ -14,31 +14,31 @@ export libamd, libbtf, libcamd, libccolamd, libcholmod, libcolamd, libklu, libld
 # Man I can't wait until these are automatically handled by an in-Base JLLWrappers clone.
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libamd_handle = C_NULL
-libamd_path = ""
-libbtf_handle = C_NULL
-libbtf_path = ""
-libcamd_handle = C_NULL
-libcamd_path = ""
-libccolamd_handle = C_NULL
-libccolamd_path = ""
-libcholmod_handle = C_NULL
-libcholmod_path = ""
-libcolamd_handle = C_NULL
-libcolamd_path = ""
-libklu_handle = C_NULL
-libklu_path = ""
-libldl_handle = C_NULL
-libldl_path = ""
-librbio_handle = C_NULL
-librbio_path = ""
-libspqr_handle = C_NULL
-libspqr_path = ""
-libsuitesparseconfig_handle = C_NULL
-libsuitesparseconfig_path = ""
-libumfpack_handle = C_NULL
-libumfpack_path = ""
+artifact_dir::String = ""
+libamd_handle::Ptr{Cvoid} = C_NULL
+libamd_path::String = ""
+libbtf_handle::Ptr{Cvoid} = C_NULL
+libbtf_path::String = ""
+libcamd_handle::Ptr{Cvoid} = C_NULL
+libcamd_path::String = ""
+libccolamd_handle::Ptr{Cvoid} = C_NULL
+libccolamd_path::String = ""
+libcholmod_handle::Ptr{Cvoid} = C_NULL
+libcholmod_path::String = ""
+libcolamd_handle::Ptr{Cvoid} = C_NULL
+libcolamd_path::String = ""
+libklu_handle::Ptr{Cvoid} = C_NULL
+libklu_path::String = ""
+libldl_handle::Ptr{Cvoid} = C_NULL
+libldl_path::String = ""
+librbio_handle::Ptr{Cvoid} = C_NULL
+librbio_path::String = ""
+libspqr_handle::Ptr{Cvoid} = C_NULL
+libspqr_path::String = ""
+libsuitesparseconfig_handle::Ptr{Cvoid} = C_NULL
+libsuitesparseconfig_path::String = ""
+libumfpack_handle::Ptr{Cvoid} = C_NULL
+libumfpack_path::String = ""
 
 if Sys.iswindows()
     const libamd = "libamd.dll"
@@ -54,58 +54,60 @@ if Sys.iswindows()
     const libsuitesparseconfig = "libsuitesparseconfig.dll"
     const libumfpack = "libumfpack.dll"
 elseif Sys.isapple()
-    const libamd = "@rpath/libamd.2.dylib"
-    const libbtf = "@rpath/libbtf.1.dylib"
-    const libcamd = "@rpath/libcamd.2.dylib"
-    const libccolamd = "@rpath/libccolamd.2.dylib"
-    const libcholmod = "@rpath/libcholmod.3.dylib"
-    const libcolamd = "@rpath/libcolamd.2.dylib"
-    const libklu = "@rpath/libklu.1.dylib"
-    const libldl = "@rpath/libldl.2.dylib"
-    const librbio = "@rpath/librbio.2.dylib"
-    const libspqr = "@rpath/libspqr.2.dylib"
-    const libsuitesparseconfig = "@rpath/libsuitesparseconfig.5.dylib"
-    const libumfpack = "@rpath/libumfpack.5.dylib"
+    const libamd = "@rpath/libamd.3.dylib"
+    const libbtf = "@rpath/libbtf.2.dylib"
+    const libcamd = "@rpath/libcamd.3.dylib"
+    const libccolamd = "@rpath/libccolamd.3.dylib"
+    const libcholmod = "@rpath/libcholmod.4.dylib"
+    const libcolamd = "@rpath/libcolamd.3.dylib"
+    const libklu = "@rpath/libklu.2.dylib"
+    const libldl = "@rpath/libldl.3.dylib"
+    const librbio = "@rpath/librbio.4.dylib"
+    const libspqr = "@rpath/libspqr.4.dylib"
+    const libsuitesparseconfig = "@rpath/libsuitesparseconfig.7.dylib"
+    const libumfpack = "@rpath/libumfpack.6.dylib"
 else
-    const libamd = "libamd.so.2"
-    const libbtf = "libbtf.so.1"
-    const libcamd = "libcamd.so.2"
-    const libccolamd = "libccolamd.so.2"
-    const libcholmod = "libcholmod.so.3"
-    const libcolamd = "libcolamd.so.2"
-    const libklu = "libklu.so.1"
-    const libldl = "libldl.so.2"
-    const librbio = "librbio.so.2"
-    const libspqr = "libspqr.so.2"
-    const libsuitesparseconfig = "libsuitesparseconfig.so.5"
-    const libumfpack = "libumfpack.so.5"
+    const libamd = "libamd.so.3"
+    const libbtf = "libbtf.so.2"
+    const libcamd = "libcamd.so.3"
+    const libccolamd = "libccolamd.so.3"
+    const libcholmod = "libcholmod.so.4"
+    const libcolamd = "libcolamd.so.3"
+    const libklu = "libklu.so.2"
+    const libldl = "libldl.so.3"
+    const librbio = "librbio.so.4"
+    const libspqr = "libspqr.so.4"
+    const libsuitesparseconfig = "libsuitesparseconfig.so.7"
+    const libumfpack = "libumfpack.so.6"
 end
 
 function __init__()
-    global libamd_handle = dlopen(libamd)
-    global libamd_path = dlpath(libamd_handle)
-    global libbtf_handle = dlopen(libbtf)
-    global libbtf_path = dlpath(libbtf_handle)
-    global libcamd_handle = dlopen(libcamd)
-    global libcamd_path = dlpath(libcamd_handle)
-    global libccolamd_handle = dlopen(libccolamd)
-    global libccolamd_path = dlpath(libccolamd_handle)
-    global libcholmod_handle = dlopen(libcholmod)
-    global libcholmod_path = dlpath(libcholmod_handle)
-    global libcolamd_handle = dlopen(libcolamd)
-    global libcolamd_path = dlpath(libcolamd_handle)
-    global libklu_handle = dlopen(libklu)
-    global libklu_path = dlpath(libklu_handle)
-    global libldl_handle = dlopen(libldl)
-    global libldl_path = dlpath(libldl_handle)
-    global librbio_handle = dlopen(librbio)
-    global librbio_path = dlpath(librbio_handle)
-    global libspqr_handle = dlopen(libspqr)
-    global libspqr_path = dlpath(libspqr_handle)
-    global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig)
-    global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle)
-    global libumfpack_handle = dlopen(libumfpack)
-    global libumfpack_path = dlpath(libumfpack_handle)
+    if Base.USE_GPL_LIBS
+        global libamd_handle = dlopen(libamd)
+        global libamd_path = dlpath(libamd_handle)
+        global libbtf_handle = dlopen(libbtf)
+        global libbtf_path = dlpath(libbtf_handle)
+        global libcamd_handle = dlopen(libcamd)
+        global libcamd_path = dlpath(libcamd_handle)
+        global libccolamd_handle = dlopen(libccolamd)
+        global libccolamd_path = dlpath(libccolamd_handle)
+        global libcholmod_handle = dlopen(libcholmod)
+        global libcholmod_path = dlpath(libcholmod_handle)
+        global libcolamd_handle = dlopen(libcolamd)
+        global libcolamd_path = dlpath(libcolamd_handle)
+        global libklu_handle = dlopen(libklu)
+        global libklu_path = dlpath(libklu_handle)
+        global libldl_handle = dlopen(libldl)
+        global libldl_path = dlpath(libldl_handle)
+        global librbio_handle = dlopen(librbio)
+        global librbio_path = dlpath(librbio_handle)
+        global libspqr_handle = dlopen(libspqr)
+        global libspqr_path = dlpath(libspqr_handle)
+        global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig)
+        global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle)
+        global libumfpack_handle = dlopen(libumfpack)
+        global libumfpack_path = dlpath(libumfpack_handle)
+    end
     global artifact_dir = dirname(Sys.BINDIR)
 end
 
diff --git a/stdlib/SuiteSparse_jll/test/runtests.jl b/stdlib/SuiteSparse_jll/test/runtests.jl
index ca356951f99e2..922da55fa1881 100644
--- a/stdlib/SuiteSparse_jll/test/runtests.jl
+++ b/stdlib/SuiteSparse_jll/test/runtests.jl
@@ -2,6 +2,10 @@
 
 using Test, SuiteSparse_jll
 
+# SuiteSparse only uses SUITESPARSE_MAIN_VERSION and SUITESPARSE_SUB_VERSION to compute its version
+# The SUITESPARSE_SUBSUB_VERSION is not used
+# TODO before release: update to 7020 or above when upstreamed.
+# This should be safe and unnecessary since we specify exact version of the BB JLL.
 @testset "SuiteSparse_jll" begin
-    @test ccall((:SuiteSparse_version, libsuitesparseconfig), Cint, (Ptr{Cint},), C_NULL) == 5010
+    @test ccall((:SuiteSparse_version, libsuitesparseconfig), Cint, (Ptr{Cint},), C_NULL) > 7000
 end
diff --git a/stdlib/TOML/Project.toml b/stdlib/TOML/Project.toml
index 48bf828a370c9..17fc8be19ec8e 100644
--- a/stdlib/TOML/Project.toml
+++ b/stdlib/TOML/Project.toml
@@ -1,12 +1,18 @@
 name = "TOML"
 uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
-version = "1.0.0"
+version = "1.0.3"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 
+[compat]
+julia = "1.6"
+
 [extras]
+Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+p7zip_jll = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
 
 [targets]
-test = ["Test"]
+test = ["Downloads", "p7zip_jll", "Tar", "Test"]
diff --git a/stdlib/TOML/docs/src/index.md b/stdlib/TOML/docs/src/index.md
index 36e8ec6248108..380a4d4ee820d 100644
--- a/stdlib/TOML/docs/src/index.md
+++ b/stdlib/TOML/docs/src/index.md
@@ -36,7 +36,7 @@ none:1:16 error: failed to parse value
 ```
 
 There are other versions of the parse functions ([`TOML.tryparse`](@ref)
-and [`TOML.tryparsefile`]) that instead of throwing exceptions on parser error
+and [`TOML.tryparsefile`](@ref)) that instead of throwing exceptions on parser error
 returns a [`TOML.ParserError`](@ref) with information:
 
 ```jldoctest
diff --git a/stdlib/TOML/src/print.jl b/stdlib/TOML/src/print.jl
index 059414152f727..1fa9f97405504 100644
--- a/stdlib/TOML/src/print.jl
+++ b/stdlib/TOML/src/print.jl
@@ -33,6 +33,14 @@ function print_toml_escaped(io::IO, s::AbstractString)
     end
 end
 
+const MbyFunc = Union{Function, Nothing}
+const TOMLValue = Union{AbstractVector, AbstractDict, Dates.DateTime, Dates.Time, Dates.Date, Bool, Integer, AbstractFloat, AbstractString}
+
+
+########
+# Keys #
+########
+
 function printkey(io::IO, keys::Vector{String})
     for (i, k) in enumerate(keys)
         i != 1 && Base.print(io, ".")
@@ -50,48 +58,85 @@ function printkey(io::IO, keys::Vector{String})
     end
 end
 
-const MbyFunc = Union{Function, Nothing}
-const TOMLValue = Union{AbstractVector, AbstractDict, Dates.DateTime, Dates.Time, Dates.Date, Bool, Integer, AbstractFloat, AbstractString}
-function printvalue(f::MbyFunc, io::IO, value::AbstractVector; sorted=false, by=identity)
+function to_toml_value(f::MbyFunc, value)
+    if f === nothing
+        error("type `$(typeof(value))` is not a valid TOML type, pass a conversion function to `TOML.print`")
+    end
+    toml_value = f(value)
+    if !(toml_value isa TOMLValue)
+        error("TOML syntax function for type `$(typeof(value))` did not return a valid TOML type but a `$(typeof(toml_value))`")
+    end
+    return toml_value
+end
+
+##########
+# Values #
+##########
+
+# Fallback
+function printvalue(f::MbyFunc, io::IO, value)
+    toml_value = to_toml_value(f, value)
+    @invokelatest printvalue(f, io, toml_value)
+end
+
+function printvalue(f::MbyFunc, io::IO, value::AbstractVector)
     Base.print(io, "[")
     for (i, x) in enumerate(value)
         i != 1 && Base.print(io, ", ")
-        if isa(x, AbstractDict)
-            _print(f, io, x; sorted, by)
-        else
-            printvalue(f, io, x; sorted, by)
-        end
+        printvalue(f, io, x)
     end
     Base.print(io, "]")
 end
-printvalue(f::MbyFunc, io::IO, value::AbstractDict; sorted=false, by=identity) =
-    _print(f, io, value; sorted, by)
-printvalue(f::MbyFunc, io::IO, value::Dates.DateTime; _...) =
-    Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd\THH:MM:SS.sss\Z"))
-printvalue(f::MbyFunc, io::IO, value::Dates.Time; _...) =
-    Base.print(io, Dates.format(value, Dates.dateformat"HH:MM:SS.sss"))
-printvalue(f::MbyFunc, io::IO, value::Dates.Date; _...) =
-    Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd"))
-printvalue(f::MbyFunc, io::IO, value::Bool; _...) =
-    Base.print(io, value ? "true" : "false")
-printvalue(f::MbyFunc, io::IO, value::Integer; _...) =
-    Base.print(io, Int64(value))  # TOML specifies 64-bit signed long range for integer
-printvalue(f::MbyFunc, io::IO, value::AbstractFloat; _...) =
-    Base.print(io, isnan(value) ? "nan" :
-                   isinf(value) ? string(value > 0 ? "+" : "-", "inf") :
-                   Float64(value))  # TOML specifies IEEE 754 binary64 for float
-function printvalue(f::MbyFunc, io::IO, value::AbstractString; _...)
-    Base.print(io, "\"")
-    print_toml_escaped(io, value)
-    Base.print(io, "\"")
+
+function printvalue(f::MbyFunc, io::IO, value::TOMLValue)
+    value isa Dates.DateTime ? Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd\THH:MM:SS.sss\Z")) :
+    value isa Dates.Time     ? Base.print(io, Dates.format(value, Dates.dateformat"HH:MM:SS.sss")) :
+    value isa Dates.Date     ? Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd")) :
+    value isa Bool           ? Base.print(io, value ? "true" : "false") :
+    value isa Integer        ? print_integer(io, value) :  # Julia's own printing should be compatible with TOML on integers
+    value isa AbstractFloat  ? Base.print(io, isnan(value) ? "nan" :
+                                              isinf(value) ? string(value > 0 ? "+" : "-", "inf") :
+                                              Float64(value)) :  # TOML specifies IEEE 754 binary64 for float
+    value isa AbstractString ? (Base.print(io, "\"");
+                                print_toml_escaped(io, value);
+                                Base.print(io, "\"")) :
+    value isa AbstractDict ? print_inline_table(f, io, value) :
+    error("internal error in TOML printing, unhandled value")
+end
+
+function print_integer(io::IO, value::Integer)
+    value isa Signed && return Base.show(io, value)
+    # unsigned integers are printed as hex
+    n = 2 * ndigits(value, base=256)
+    Base.print(io, "0x", string(value, base=16, pad=n))
+    return
+end
+
+function print_inline_table(f::MbyFunc, io::IO, value::AbstractDict)
+    Base.print(io, "{")
+    for (i, (k,v)) in enumerate(value)
+        i != 1 && Base.print(io, ", ")
+        printkey(io, [String(k)])
+        Base.print(io, " = ")
+        printvalue(f, io, v)
+    end
+    Base.print(io, "}")
 end
 
+
+##########
+# Tables #
+##########
+
 is_table(value)           = isa(value, AbstractDict)
 is_array_of_tables(value) = isa(value, AbstractArray) &&
-                            length(value) > 0 && isa(value[1], AbstractDict)
-is_tabular(value)         = is_table(value) || is_array_of_tables(value)
+                            length(value) > 0 && (
+                                isa(value, AbstractArray{<:AbstractDict}) ||
+                                all(v -> isa(v, AbstractDict), value)
+                            )
+is_tabular(value)         = is_table(value) || @invokelatest(is_array_of_tables(value))
 
-function _print(f::MbyFunc, io::IO, a::AbstractDict,
+function print_table(f::MbyFunc, io::IO, a::AbstractDict,
     ks::Vector{String} = String[];
     indent::Int = 0,
     first_block::Bool = true,
@@ -100,40 +145,34 @@ function _print(f::MbyFunc, io::IO, a::AbstractDict,
 )
     akeys = keys(a)
     if sorted
-        akeys = sort!(collect(akeys); by)
+        akeys = sort!(collect(akeys); by=by)
     end
 
     # First print non-tabular entries
     for key in akeys
         value = a[key]
-        is_tabular(value) && continue
         if !isa(value, TOMLValue)
-            if f === nothing
-                error("type `$(typeof(value))` is not a valid TOML type, pass a conversion function to `TOML.print`")
-            end
-            toml_value = f(value)
-            if !(toml_value isa TOMLValue)
-                error("TOML syntax function for type `$(typeof(value))` did not return a valid TOML type but a `$(typeof(toml_value))`")
-            end
-            value = toml_value
-        end
-        if is_tabular(value)
-            _print(f, io, Dict(key => value); indent, first_block, sorted, by)
-        else
-            Base.print(io, ' '^4max(0,indent-1))
-            printkey(io, [String(key)])
-            Base.print(io, " = ") # print separator
-            printvalue(f, io, value; sorted, by)
-            Base.print(io, "\n")  # new line?
+            value = to_toml_value(f, value)
         end
+        is_tabular(value) && continue
+
+        Base.print(io, ' '^4max(0,indent-1))
+        printkey(io, [String(key)])
+        Base.print(io, " = ") # print separator
+        printvalue(f, io, value)
+        Base.print(io, "\n")  # new line?
         first_block = false
     end
 
     for key in akeys
         value = a[key]
+        if !isa(value, TOMLValue)
+            value = to_toml_value(f, value)
+        end
         if is_table(value)
             push!(ks, String(key))
-            header = isempty(value) || !all(is_tabular(v) for v in values(value))::Bool
+            _values = @invokelatest values(value)
+            header = isempty(value) || !all(is_tabular(v) for v in _values)::Bool
             if header
                 # print table
                 first_block || println(io)
@@ -144,9 +183,9 @@ function _print(f::MbyFunc, io::IO, a::AbstractDict,
                 Base.print(io,"]\n")
             end
             # Use runtime dispatch here since the type of value seems not to be enforced other than as AbstractDict
-            @invokelatest _print(f, io, value, ks; indent = indent + header, first_block = header, sorted, by)
+            @invokelatest print_table(f, io, value, ks; indent = indent + header, first_block = header, sorted=sorted, by=by)
             pop!(ks)
-        elseif is_array_of_tables(value)
+        elseif @invokelatest(is_array_of_tables(value))
             # print array of tables
             first_block || println(io)
             first_block = false
@@ -158,14 +197,19 @@ function _print(f::MbyFunc, io::IO, a::AbstractDict,
                 Base.print(io,"]]\n")
                 # TODO, nicer error here
                 !isa(v, AbstractDict) && error("array should contain only tables")
-                @invokelatest _print(f, io, v, ks; indent = indent + 1, sorted, by)
+                @invokelatest print_table(f, io, v, ks; indent = indent + 1, sorted=sorted, by=by)
             end
             pop!(ks)
         end
     end
 end
 
-print(f::MbyFunc, io::IO, a::AbstractDict; sorted::Bool=false, by=identity) = _print(f, io, a; sorted, by)
-print(f::MbyFunc, a::AbstractDict; sorted::Bool=false, by=identity) = print(f, stdout, a; sorted, by)
-print(io::IO, a::AbstractDict; sorted::Bool=false, by=identity) = _print(nothing, io, a; sorted, by)
-print(a::AbstractDict; sorted::Bool=false, by=identity) = print(nothing, stdout, a; sorted, by)
+
+#######
+# API #
+#######
+
+print(f::MbyFunc, io::IO, a::AbstractDict; sorted::Bool=false, by=identity) = print_table(f, io, a; sorted=sorted, by=by)
+print(f::MbyFunc, a::AbstractDict; sorted::Bool=false, by=identity) = print(f, stdout, a; sorted=sorted, by=by)
+print(io::IO, a::AbstractDict; sorted::Bool=false, by=identity) = print_table(nothing, io, a; sorted=sorted, by=by)
+print(a::AbstractDict; sorted::Bool=false, by=identity) = print(nothing, stdout, a; sorted=sorted, by=by)
diff --git a/stdlib/TOML/test/parse.jl b/stdlib/TOML/test/parse.jl
index 12f68acbdb5bf..30400344f67cf 100644
--- a/stdlib/TOML/test/parse.jl
+++ b/stdlib/TOML/test/parse.jl
@@ -14,6 +14,7 @@ using TOML: ParserError
           TOML.parse(IOBuffer(str)) ==
           TOML.parse(p, str) == TOML.parse(p, SubString(str)) ==
           TOML.parse(p, IOBuffer(str)) == dict
+    @test TOML.parse("a\t=1") == dict
     @test_throws ParserError TOML.parse(invalid_str)
     @test_throws ParserError TOML.parse(SubString(invalid_str))
     @test_throws ParserError TOML.parse(IOBuffer(invalid_str))
diff --git a/stdlib/TOML/test/print.jl b/stdlib/TOML/test/print.jl
index 4ab5e2d8d066d..765b6feb491a5 100644
--- a/stdlib/TOML/test/print.jl
+++ b/stdlib/TOML/test/print.jl
@@ -71,3 +71,72 @@ end
     d = Dict("str" => string(Char(0xd800)))
     @test_throws ErrorException TOML.print(devnull, d)
 end
+
+str = """
+[[dataset.loader]]
+driver = "nested"
+loaders = ["gzip", { driver = "csv", args = {delim = "\t"}}]
+"""
+@test roundtrip(str)
+
+
+@testset "vec with dicts and non-dicts" begin
+    # https://github.com/JuliaLang/julia/issues/45340
+    d =  Dict("b" => Any[111, Dict("a" =>  222, "d" => 333)])
+    @test toml_str(d) == "b = [111, {a = 222, d = 333}]\n"
+
+    d =  Dict("b" => Any[Dict("a" =>  222, "d" => 333), 111])
+    @test toml_str(d) == "b = [{a = 222, d = 333}, 111]\n"
+
+    d =  Dict("b" => Any[Dict("a" =>  222, "d" => 333)])
+    @test toml_str(d) == """
+    [[b]]
+    a = 222
+    d = 333
+    """
+end
+
+@testset "unsigned integers" for (x, s) in [
+            0x1a0 => "0x01a0",
+            0x1aea8 => "0x01aea8",
+            0x1aeee8 => "0x1aeee8",
+            0x1aea01231 => "0x01aea01231",
+            0x1aea01231213ae13125 => "0x01aea01231213ae13125",
+        ]
+    d = Dict("x" => x)
+    @test toml_str(d) == """
+    x = $s
+    """
+end
+
+struct Foo
+    a::Int64
+    b::Float64
+end
+
+struct Bar
+    c::Float64
+    d::String
+end
+
+
+f = Foo(2,9.9)
+b = Bar(1.345, "hello")
+
+dd = Dict("hello"=>"world", "f"=>f,  "b"=>b)
+
+to_dict(foo::Foo) = Dict("a"=>foo.a, "b"=>foo.b)
+to_dict(bar::Bar) = Dict("c"=>bar.c, "d"=>bar.d)
+
+@test toml_str(to_dict, dd; sorted=true) ==
+"""
+hello = "world"
+
+[b]
+c = 1.345
+d = "hello"
+
+[f]
+a = 2
+b = 9.9
+"""
diff --git a/stdlib/TOML/test/readme.jl b/stdlib/TOML/test/readme.jl
index 21961cc6f7ec8..ee267414485ba 100644
--- a/stdlib/TOML/test/readme.jl
+++ b/stdlib/TOML/test/readme.jl
@@ -410,31 +410,90 @@ d = parse(str)
 @test d["oct2"] == 0o755
 @test d["bin1"] == 0b11010110
 
+str = """
+hex1 = 0x6E # UInt8
+hex2 = 0x8f1e # UInt16
+hex3 = 0x765f3173 # UInt32
+hex4 = 0xc13b830a807cc7f4 # UInt64
+hex5 = 0x937efe0a4241edb24a04b97bd90ef363 # UInt128
+hex6 = 0x937efe0a4241edb24a04b97bd90ef3632 # BigInt
+"""
+@test roundtrip(str)
+d = parse(str)
+@test d["hex1"] isa UInt64
+@test d["hex2"] isa UInt64
+@test d["hex3"] isa UInt64
+@test d["hex4"] isa UInt64
+@test d["hex5"] isa UInt128
+@test d["hex6"] isa BigInt
+
+str = """
+oct1 = 0o140 # UInt8
+oct2 = 0o46244 # UInt16
+oct3 = 0o32542120656 # UInt32
+oct4 = 0o1526535761042630654411 # UInt64
+oct5 = 0o3467204325743773607311464533371572447656531 # UInt128
+oct6 = 0o34672043257437736073114645333715724476565312 # BigInt
+"""
+@test roundtrip(str)
+d = parse(str)
+@test d["oct1"] isa UInt64
+@test d["oct2"] isa UInt64
+@test d["oct3"] isa UInt64
+@test d["oct4"] isa UInt64
+@test d["oct5"] isa UInt128
+@test d["oct6"] isa BigInt
+
+str = """
+bin1 = 0b10001010 # UInt8
+bin2 = 0b11111010001100 # UInt16
+bin3 = 0b11100011110000010101000010101 # UInt32
+bin4 = 0b10000110100111011010001000000111110110000011111101101110011011 # UInt64
+bin5 = 0b1101101101101100110001010110111011101000111010101110011000011100110100101111110001010001011001000001000001010010011101100100111 # UInt128
+bin6 = 0b110110110110110011000101011011101110100011101010111001100001110011010010111111000101000101100100000100000101001001110110010011111 # BigInt
+"""
+
+@test roundtrip(str)
+d = parse(str)
+@test d["bin1"] isa UInt64
+@test d["bin2"] isa UInt64
+@test d["bin3"] isa UInt64
+@test d["bin4"] isa UInt64
+@test d["bin5"] isa UInt128
+@test d["bin6"] isa BigInt
+
 #Arbitrary 64-bit signed integers (from −2^63 to 2^63−1) should be accepted and
 #handled losslessly. If an integer cannot be represented losslessly, an error
 #must be thrown.
 str = """
-low = -9_223_372_036_854_775_808
-high = 9_223_372_036_854_775_807
+low = -170_141_183_460_469_231_731_687_303_715_884_105_728
+high = 170_141_183_460_469_231_731_687_303_715_884_105_727
+"""
+@test roundtrip(str)
+d = parse(str)
+@test d["low"] == typemin(Int128)
+@test d["high"] == typemax(Int128)
+
+str = """
+low = -170_141_183_460_469_231_731_687_303_715_884_105_728_123
+high = 170_141_183_460_469_231_731_687_303_715_884_105_727_123
 """
 @test roundtrip(str)
 d = parse(str)
-@test d["low"] == -9_223_372_036_854_775_808
-@test d["high"] == 9_223_372_036_854_775_807
+@test d["low"] == big"-170_141_183_460_469_231_731_687_303_715_884_105_728_123"
+@test d["high"] == big"170_141_183_460_469_231_731_687_303_715_884_105_727_123"
 
 str = """
 toolow = -9_223_372_036_854_775_809
 """
-err = tryparse(str)
-@test err isa ParserError
-@test err.type == Internals.ErrOverflowError
+d = parse(str)
+@test d["toolow"] == -9223372036854775809
 
 str = """
 toohigh = 9_223_372_036_854_775_808
 """
-err = tryparse(str)
-@test err isa ParserError
-@test err.type == Internals.ErrOverflowError
+d = parse(str)
+d["toohigh"] == 9_223_372_036_854_775_808
 
 end
 
@@ -613,7 +672,7 @@ contributors = [
   { name = \"Baz Qux\", email = \"bazqux@example.com\", url = \"https://example.com/bazqux\" }
 ]
 """
-@test_broken roundtrip(str) # Printer doesn't handle inline tables in arrays?
+@test roundtrip(str)
 d = parse(str)
 @test d["integers"] == [1,2,3]
 @test d["colors"] == ["red", "yellow", "green"]
diff --git a/stdlib/TOML/test/runtests.jl b/stdlib/TOML/test/runtests.jl
index 6228b3c2fc11c..7376fab914636 100644
--- a/stdlib/TOML/test/runtests.jl
+++ b/stdlib/TOML/test/runtests.jl
@@ -16,6 +16,7 @@ function roundtrip(data)
 end
 
 include("readme.jl")
+include("utils/utils.jl")
 include("toml_test.jl")
 include("values.jl")
 include("invalids.jl")
diff --git a/stdlib/TOML/test/testfiles/COPYING b/stdlib/TOML/test/testfiles/COPYING
deleted file mode 100644
index 93b22020a83d8..0000000000000
--- a/stdlib/TOML/test/testfiles/COPYING
+++ /dev/null
@@ -1,21 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2018 TOML authors
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
diff --git a/stdlib/TOML/test/testfiles/invalid/datetime-malformed-no-leads.toml b/stdlib/TOML/test/testfiles/invalid/datetime-malformed-no-leads.toml
deleted file mode 100644
index 123f173beb3ac..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/datetime-malformed-no-leads.toml
+++ /dev/null
@@ -1 +0,0 @@
-no-leads = 1987-7-05T17:45:00Z
diff --git a/stdlib/TOML/test/testfiles/invalid/datetime-malformed-no-secs.toml b/stdlib/TOML/test/testfiles/invalid/datetime-malformed-no-secs.toml
deleted file mode 100644
index ba9390076273d..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/datetime-malformed-no-secs.toml
+++ /dev/null
@@ -1 +0,0 @@
-no-secs = 1987-07-05T17:45Z
diff --git a/stdlib/TOML/test/testfiles/invalid/datetime-malformed-no-t.toml b/stdlib/TOML/test/testfiles/invalid/datetime-malformed-no-t.toml
deleted file mode 100644
index 617e3c56d4008..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/datetime-malformed-no-t.toml
+++ /dev/null
@@ -1 +0,0 @@
-no-t = 1987-07-0517:45:00Z
diff --git a/stdlib/TOML/test/testfiles/invalid/datetime-malformed-with-milli.toml b/stdlib/TOML/test/testfiles/invalid/datetime-malformed-with-milli.toml
deleted file mode 100644
index eef792f34d6ef..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/datetime-malformed-with-milli.toml
+++ /dev/null
@@ -1 +0,0 @@
-with-milli = 1987-07-5T17:45:00.12Z
diff --git a/stdlib/TOML/test/testfiles/invalid/duplicate-key-table.toml b/stdlib/TOML/test/testfiles/invalid/duplicate-key-table.toml
deleted file mode 100644
index cedf05fc53bff..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/duplicate-key-table.toml
+++ /dev/null
@@ -1,5 +0,0 @@
-[fruit]
-type = "apple"
-
-[fruit.type]
-apple = "yes"
diff --git a/stdlib/TOML/test/testfiles/invalid/duplicate-keys.toml b/stdlib/TOML/test/testfiles/invalid/duplicate-keys.toml
deleted file mode 100644
index 9b5aee0e59b35..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/duplicate-keys.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-dupe = false
-dupe = true
diff --git a/stdlib/TOML/test/testfiles/invalid/duplicate-tables.toml b/stdlib/TOML/test/testfiles/invalid/duplicate-tables.toml
deleted file mode 100644
index 8ddf49b4e8930..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/duplicate-tables.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[a]
-[a]
diff --git a/stdlib/TOML/test/testfiles/invalid/empty-implicit-table.toml b/stdlib/TOML/test/testfiles/invalid/empty-implicit-table.toml
deleted file mode 100644
index 0cc36d0d28154..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/empty-implicit-table.toml
+++ /dev/null
@@ -1 +0,0 @@
-[naughty..naughty]
diff --git a/stdlib/TOML/test/testfiles/invalid/empty-table.toml b/stdlib/TOML/test/testfiles/invalid/empty-table.toml
deleted file mode 100644
index fe51488c7066f..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/empty-table.toml
+++ /dev/null
@@ -1 +0,0 @@
-[]
diff --git a/stdlib/TOML/test/testfiles/invalid/float-leading-zero-neg.toml b/stdlib/TOML/test/testfiles/invalid/float-leading-zero-neg.toml
deleted file mode 100644
index dbc16ff161787..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/float-leading-zero-neg.toml
+++ /dev/null
@@ -1 +0,0 @@
-leading-zero = -03.14
diff --git a/stdlib/TOML/test/testfiles/invalid/float-leading-zero-pos.toml b/stdlib/TOML/test/testfiles/invalid/float-leading-zero-pos.toml
deleted file mode 100644
index 6de9634c6b110..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/float-leading-zero-pos.toml
+++ /dev/null
@@ -1 +0,0 @@
-leading-zero = +03.14
diff --git a/stdlib/TOML/test/testfiles/invalid/float-leading-zero.toml b/stdlib/TOML/test/testfiles/invalid/float-leading-zero.toml
deleted file mode 100644
index 551fb2551053a..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/float-leading-zero.toml
+++ /dev/null
@@ -1 +0,0 @@
-leading-zero = 03.14
diff --git a/stdlib/TOML/test/testfiles/invalid/float-no-leading-zero.toml b/stdlib/TOML/test/testfiles/invalid/float-no-leading-zero.toml
deleted file mode 100644
index cab76bfd15887..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/float-no-leading-zero.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-answer = .12345
-neganswer = -.12345
diff --git a/stdlib/TOML/test/testfiles/invalid/float-no-trailing-digits.toml b/stdlib/TOML/test/testfiles/invalid/float-no-trailing-digits.toml
deleted file mode 100644
index cbff2d06f05cc..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/float-no-trailing-digits.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-answer = 1.
-neganswer = -1.
diff --git a/stdlib/TOML/test/testfiles/invalid/float-underscore-after-point.toml b/stdlib/TOML/test/testfiles/invalid/float-underscore-after-point.toml
deleted file mode 100644
index fe2f2e2e7a981..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/float-underscore-after-point.toml
+++ /dev/null
@@ -1 +0,0 @@
-bad = 1._2
diff --git a/stdlib/TOML/test/testfiles/invalid/float-underscore-after.toml b/stdlib/TOML/test/testfiles/invalid/float-underscore-after.toml
deleted file mode 100644
index 33f2bae570c57..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/float-underscore-after.toml
+++ /dev/null
@@ -1 +0,0 @@
-bad = 1.2_
diff --git a/stdlib/TOML/test/testfiles/invalid/float-underscore-before-point.toml b/stdlib/TOML/test/testfiles/invalid/float-underscore-before-point.toml
deleted file mode 100644
index 0aa1722f790c2..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/float-underscore-before-point.toml
+++ /dev/null
@@ -1 +0,0 @@
-bad = 1_.2
diff --git a/stdlib/TOML/test/testfiles/invalid/float-underscore-before.toml b/stdlib/TOML/test/testfiles/invalid/float-underscore-before.toml
deleted file mode 100644
index 155de0f65d1e7..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/float-underscore-before.toml
+++ /dev/null
@@ -1 +0,0 @@
-bad = _1.2
diff --git a/stdlib/TOML/test/testfiles/invalid/inline-table-linebreak.toml b/stdlib/TOML/test/testfiles/invalid/inline-table-linebreak.toml
deleted file mode 100644
index 727fb2a4991b0..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/inline-table-linebreak.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-simple = { a = 1
-}
diff --git a/stdlib/TOML/test/testfiles/invalid/integer-leading-zero-neg.toml b/stdlib/TOML/test/testfiles/invalid/integer-leading-zero-neg.toml
deleted file mode 100644
index ff6836b690b6e..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/integer-leading-zero-neg.toml
+++ /dev/null
@@ -1 +0,0 @@
-leading-zero = -012
diff --git a/stdlib/TOML/test/testfiles/invalid/integer-leading-zero-pos.toml b/stdlib/TOML/test/testfiles/invalid/integer-leading-zero-pos.toml
deleted file mode 100644
index 4e635421de813..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/integer-leading-zero-pos.toml
+++ /dev/null
@@ -1 +0,0 @@
-leading-zero = +012
diff --git a/stdlib/TOML/test/testfiles/invalid/integer-leading-zero.toml b/stdlib/TOML/test/testfiles/invalid/integer-leading-zero.toml
deleted file mode 100644
index 38b1ca40529ff..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/integer-leading-zero.toml
+++ /dev/null
@@ -1 +0,0 @@
-leading-zero = 012
diff --git a/stdlib/TOML/test/testfiles/invalid/integer-underscore-after.toml b/stdlib/TOML/test/testfiles/invalid/integer-underscore-after.toml
deleted file mode 100644
index b9ec0ee8978e4..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/integer-underscore-after.toml
+++ /dev/null
@@ -1 +0,0 @@
-bad = 123_
diff --git a/stdlib/TOML/test/testfiles/invalid/integer-underscore-before.toml b/stdlib/TOML/test/testfiles/invalid/integer-underscore-before.toml
deleted file mode 100644
index 1f96c4a5943b4..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/integer-underscore-before.toml
+++ /dev/null
@@ -1 +0,0 @@
-bad = _123
diff --git a/stdlib/TOML/test/testfiles/invalid/integer-underscore-double.toml b/stdlib/TOML/test/testfiles/invalid/integer-underscore-double.toml
deleted file mode 100644
index 490adb3547a7b..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/integer-underscore-double.toml
+++ /dev/null
@@ -1 +0,0 @@
-bad = 1__23
diff --git a/stdlib/TOML/test/testfiles/invalid/key-after-array.toml b/stdlib/TOML/test/testfiles/invalid/key-after-array.toml
deleted file mode 100644
index 5c1a1b0a9bc50..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/key-after-array.toml
+++ /dev/null
@@ -1 +0,0 @@
-[[agencies]] owner = "S Cjelli"
diff --git a/stdlib/TOML/test/testfiles/invalid/key-after-table.toml b/stdlib/TOML/test/testfiles/invalid/key-after-table.toml
deleted file mode 100644
index 68867842cb8e2..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/key-after-table.toml
+++ /dev/null
@@ -1 +0,0 @@
-[error] this = "should not be here"
diff --git a/stdlib/TOML/test/testfiles/invalid/key-empty.toml b/stdlib/TOML/test/testfiles/invalid/key-empty.toml
deleted file mode 100644
index 09f998f4163e1..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/key-empty.toml
+++ /dev/null
@@ -1 +0,0 @@
- = 1
diff --git a/stdlib/TOML/test/testfiles/invalid/key-hash.toml b/stdlib/TOML/test/testfiles/invalid/key-hash.toml
deleted file mode 100644
index e321b1fbd0c96..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/key-hash.toml
+++ /dev/null
@@ -1 +0,0 @@
-a# = 1
diff --git a/stdlib/TOML/test/testfiles/invalid/key-newline.toml b/stdlib/TOML/test/testfiles/invalid/key-newline.toml
deleted file mode 100644
index 707aad54ec34f..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/key-newline.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-a
-= 1
diff --git a/stdlib/TOML/test/testfiles/invalid/key-no-eol.toml b/stdlib/TOML/test/testfiles/invalid/key-no-eol.toml
deleted file mode 100644
index 3c58eee182b21..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/key-no-eol.toml
+++ /dev/null
@@ -1 +0,0 @@
-a = 1 b = 2
diff --git a/stdlib/TOML/test/testfiles/invalid/key-open-bracket.toml b/stdlib/TOML/test/testfiles/invalid/key-open-bracket.toml
deleted file mode 100644
index f0aeb16e50003..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/key-open-bracket.toml
+++ /dev/null
@@ -1 +0,0 @@
-[abc = 1
diff --git a/stdlib/TOML/test/testfiles/invalid/key-single-open-bracket.toml b/stdlib/TOML/test/testfiles/invalid/key-single-open-bracket.toml
deleted file mode 100644
index 558ed37d93c5c..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/key-single-open-bracket.toml
+++ /dev/null
@@ -1 +0,0 @@
-[
diff --git a/stdlib/TOML/test/testfiles/invalid/key-space.toml b/stdlib/TOML/test/testfiles/invalid/key-space.toml
deleted file mode 100644
index 7c22703e888e7..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/key-space.toml
+++ /dev/null
@@ -1 +0,0 @@
-a b = 1
diff --git a/stdlib/TOML/test/testfiles/invalid/key-start-bracket.toml b/stdlib/TOML/test/testfiles/invalid/key-start-bracket.toml
deleted file mode 100644
index e0597ae1c6f1c..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/key-start-bracket.toml
+++ /dev/null
@@ -1,3 +0,0 @@
-[a]
-[xyz = 5
-[b]
diff --git a/stdlib/TOML/test/testfiles/invalid/key-two-equals.toml b/stdlib/TOML/test/testfiles/invalid/key-two-equals.toml
deleted file mode 100644
index 25a037894eb0f..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/key-two-equals.toml
+++ /dev/null
@@ -1 +0,0 @@
-key= = 1
diff --git a/stdlib/TOML/test/testfiles/invalid/llbrace.toml b/stdlib/TOML/test/testfiles/invalid/llbrace.toml
deleted file mode 100644
index 047978e5bc784..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/llbrace.toml
+++ /dev/null
@@ -1 +0,0 @@
-[ [table]]
diff --git a/stdlib/TOML/test/testfiles/invalid/multi-line-inline-table.toml b/stdlib/TOML/test/testfiles/invalid/multi-line-inline-table.toml
deleted file mode 100644
index 3f34e15c07216..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/multi-line-inline-table.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-json_like = {
-          first = "Tom",
-          last = "Preston-Werner"
-}
diff --git a/stdlib/TOML/test/testfiles/invalid/multi-line-string-no-close.toml b/stdlib/TOML/test/testfiles/invalid/multi-line-string-no-close.toml
deleted file mode 100644
index 4ca959715a953..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/multi-line-string-no-close.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-invalid = """
-    this will fail
diff --git a/stdlib/TOML/test/testfiles/invalid/rrbrace.toml b/stdlib/TOML/test/testfiles/invalid/rrbrace.toml
deleted file mode 100644
index 3a4dee4712685..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/rrbrace.toml
+++ /dev/null
@@ -1 +0,0 @@
-[[table] ]
diff --git a/stdlib/TOML/test/testfiles/invalid/string-bad-byte-escape.toml b/stdlib/TOML/test/testfiles/invalid/string-bad-byte-escape.toml
deleted file mode 100644
index 4c7be59f4b16c..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/string-bad-byte-escape.toml
+++ /dev/null
@@ -1 +0,0 @@
-naughty = "\xAg"
diff --git a/stdlib/TOML/test/testfiles/invalid/string-bad-codepoint.toml b/stdlib/TOML/test/testfiles/invalid/string-bad-codepoint.toml
deleted file mode 100644
index 592db75bb0c34..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/string-bad-codepoint.toml
+++ /dev/null
@@ -1 +0,0 @@
-invalid-codepoint = "This string contains a non scalar unicode codepoint \uD801"
diff --git a/stdlib/TOML/test/testfiles/invalid/string-bad-escape.toml b/stdlib/TOML/test/testfiles/invalid/string-bad-escape.toml
deleted file mode 100644
index 60acb0ccc5077..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/string-bad-escape.toml
+++ /dev/null
@@ -1 +0,0 @@
-invalid-escape = "This string has a bad \a escape character."
diff --git a/stdlib/TOML/test/testfiles/invalid/string-bad-slash-escape.toml b/stdlib/TOML/test/testfiles/invalid/string-bad-slash-escape.toml
deleted file mode 100644
index 154abadd5c3eb..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/string-bad-slash-escape.toml
+++ /dev/null
@@ -1 +0,0 @@
-invalid-escape = "This string has a bad \/ escape character."
diff --git a/stdlib/TOML/test/testfiles/invalid/string-bad-uni-esc.toml b/stdlib/TOML/test/testfiles/invalid/string-bad-uni-esc.toml
deleted file mode 100644
index 9eae4ab96e5fd..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/string-bad-uni-esc.toml
+++ /dev/null
@@ -1 +0,0 @@
-str = "val\ue"
diff --git a/stdlib/TOML/test/testfiles/invalid/string-byte-escapes.toml b/stdlib/TOML/test/testfiles/invalid/string-byte-escapes.toml
deleted file mode 100644
index e94452a8dfc88..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/string-byte-escapes.toml
+++ /dev/null
@@ -1 +0,0 @@
-answer = "\x33"
diff --git a/stdlib/TOML/test/testfiles/invalid/string-no-close.toml b/stdlib/TOML/test/testfiles/invalid/string-no-close.toml
deleted file mode 100644
index 0c292fcab730d..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/string-no-close.toml
+++ /dev/null
@@ -1 +0,0 @@
-no-ending-quote = "One time, at band camp
diff --git a/stdlib/TOML/test/testfiles/invalid/table-array-implicit.toml b/stdlib/TOML/test/testfiles/invalid/table-array-implicit.toml
deleted file mode 100644
index 55094605bb8f6..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/table-array-implicit.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-# This test is a bit tricky. It should fail because the first use of
-# `[[albums.songs]]` without first declaring `albums` implies that `albums`
-# must be a table. The alternative would be quite weird. Namely, it wouldn't
-# comply with the TOML spec: "Each double-bracketed sub-table will belong to
-# the most *recently* defined table element *above* it."
-#
-# This is in contrast to the *valid* test, table-array-implicit where
-# `[[albums.songs]]` works by itself, so long as `[[albums]]` isn't declared
-# later. (Although, `[albums]` could be.)
-[[albums.songs]]
-name = "Glory Days"
-
-[[albums]]
-name = "Born in the USA"
diff --git a/stdlib/TOML/test/testfiles/invalid/table-array-malformed-bracket.toml b/stdlib/TOML/test/testfiles/invalid/table-array-malformed-bracket.toml
deleted file mode 100644
index 39c73b05c44e4..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/table-array-malformed-bracket.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[[albums]
-name = "Born to Run"
diff --git a/stdlib/TOML/test/testfiles/invalid/table-array-malformed-empty.toml b/stdlib/TOML/test/testfiles/invalid/table-array-malformed-empty.toml
deleted file mode 100644
index a470ca332f31f..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/table-array-malformed-empty.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[[]]
-name = "Born to Run"
diff --git a/stdlib/TOML/test/testfiles/invalid/table-empty.toml b/stdlib/TOML/test/testfiles/invalid/table-empty.toml
deleted file mode 100644
index fe51488c7066f..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/table-empty.toml
+++ /dev/null
@@ -1 +0,0 @@
-[]
diff --git a/stdlib/TOML/test/testfiles/invalid/table-nested-brackets-close.toml b/stdlib/TOML/test/testfiles/invalid/table-nested-brackets-close.toml
deleted file mode 100644
index c8b5a67858006..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/table-nested-brackets-close.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[a]b]
-zyx = 42
diff --git a/stdlib/TOML/test/testfiles/invalid/table-nested-brackets-open.toml b/stdlib/TOML/test/testfiles/invalid/table-nested-brackets-open.toml
deleted file mode 100644
index 246d7e91fe4fb..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/table-nested-brackets-open.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[a[b]
-zyx = 42
diff --git a/stdlib/TOML/test/testfiles/invalid/table-whitespace.toml b/stdlib/TOML/test/testfiles/invalid/table-whitespace.toml
deleted file mode 100644
index 0a6a6a69725c4..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/table-whitespace.toml
+++ /dev/null
@@ -1 +0,0 @@
-[invalid key]
diff --git a/stdlib/TOML/test/testfiles/invalid/table-with-pound.toml b/stdlib/TOML/test/testfiles/invalid/table-with-pound.toml
deleted file mode 100644
index e7b777ecfb305..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/table-with-pound.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[key#group]
-answer = 42
diff --git a/stdlib/TOML/test/testfiles/invalid/text-after-array-entries.toml b/stdlib/TOML/test/testfiles/invalid/text-after-array-entries.toml
deleted file mode 100644
index 1a7289074ed13..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/text-after-array-entries.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-array = [
-  "Is there life after an array separator?", No
-  "Entry"
-]
diff --git a/stdlib/TOML/test/testfiles/invalid/text-after-integer.toml b/stdlib/TOML/test/testfiles/invalid/text-after-integer.toml
deleted file mode 100644
index 42de7aff4d856..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/text-after-integer.toml
+++ /dev/null
@@ -1 +0,0 @@
-answer = 42 the ultimate answer?
diff --git a/stdlib/TOML/test/testfiles/invalid/text-after-string.toml b/stdlib/TOML/test/testfiles/invalid/text-after-string.toml
deleted file mode 100644
index c92a6f11d85a7..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/text-after-string.toml
+++ /dev/null
@@ -1 +0,0 @@
-string = "Is there life after strings?" No.
diff --git a/stdlib/TOML/test/testfiles/invalid/text-after-table.toml b/stdlib/TOML/test/testfiles/invalid/text-after-table.toml
deleted file mode 100644
index 87da9db26dffc..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/text-after-table.toml
+++ /dev/null
@@ -1 +0,0 @@
-[error] this shouldn't be here
diff --git a/stdlib/TOML/test/testfiles/invalid/text-before-array-separator.toml b/stdlib/TOML/test/testfiles/invalid/text-before-array-separator.toml
deleted file mode 100644
index 9b06a39241063..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/text-before-array-separator.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-array = [
-  "Is there life before an array separator?" No,
-  "Entry"
-]
diff --git a/stdlib/TOML/test/testfiles/invalid/text-in-array.toml b/stdlib/TOML/test/testfiles/invalid/text-in-array.toml
deleted file mode 100644
index a6a6c42075e24..0000000000000
--- a/stdlib/TOML/test/testfiles/invalid/text-in-array.toml
+++ /dev/null
@@ -1,5 +0,0 @@
-array = [
-  "Entry 1",
-  I don't belong,
-  "Entry 2",
-]
diff --git a/stdlib/TOML/test/testfiles/valid/array-empty.jl b/stdlib/TOML/test/testfiles/valid/array-empty.jl
deleted file mode 100644
index da5f04f7da1a8..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-empty.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("thevoid" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[],"type" => "array")],"type" => "array")],"type" => "array")],"type" => "array")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/array-empty.json b/stdlib/TOML/test/testfiles/valid/array-empty.json
deleted file mode 100644
index 2fbf2567f87bc..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-empty.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "thevoid": { "type": "array", "value": [
-        {"type": "array", "value": [
-            {"type": "array", "value": [
-                {"type": "array", "value": [
-                    {"type": "array", "value": []}
-                ]}
-            ]}
-        ]}
-    ]}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/array-empty.toml b/stdlib/TOML/test/testfiles/valid/array-empty.toml
deleted file mode 100644
index fa58dc63d4880..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-empty.toml
+++ /dev/null
@@ -1 +0,0 @@
-thevoid = [[[[[]]]]]
diff --git a/stdlib/TOML/test/testfiles/valid/array-nospaces.jl b/stdlib/TOML/test/testfiles/valid/array-nospaces.jl
deleted file mode 100644
index 3f8b61a2880d4..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-nospaces.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("ints" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1","type" => "integer"), Dict{String,Any}("value" => "2","type" => "integer"), Dict{String,Any}("value" => "3","type" => "integer")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/array-nospaces.json b/stdlib/TOML/test/testfiles/valid/array-nospaces.json
deleted file mode 100644
index 1833d61c55973..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-nospaces.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-    "ints": {
-        "type": "array",
-        "value": [
-            {"type": "integer", "value": "1"},
-            {"type": "integer", "value": "2"},
-            {"type": "integer", "value": "3"}
-        ]
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/array-nospaces.toml b/stdlib/TOML/test/testfiles/valid/array-nospaces.toml
deleted file mode 100644
index 66189367fe9eb..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-nospaces.toml
+++ /dev/null
@@ -1 +0,0 @@
-ints = [1,2,3]
diff --git a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.jl b/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.jl
deleted file mode 100644
index 6e6862dc30080..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("title" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => " \", ","type" => "string")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.json b/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.json
deleted file mode 100644
index a88eb26ba12ea..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.json
+++ /dev/null
@@ -1 +0,0 @@
-{"title": {"type": "array", "value": [{"type": "string", "value": " \", "}]}}
diff --git a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.toml b/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.toml
deleted file mode 100644
index 4758ddcade2f4..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.toml
+++ /dev/null
@@ -1 +0,0 @@
-title = [ " \", ",]
diff --git a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.jl b/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.jl
deleted file mode 100644
index d570f5e2a433a..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("title" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "Client: \"XXXX\", Job: XXXX","type" => "string"), Dict{String,Any}("value" => "Code: XXXX","type" => "string")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.json b/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.json
deleted file mode 100644
index c6f031f595c9f..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-    "title": {
-        "type": "array",
-        "value": [
-            {"type": "string", "value": "Client: \"XXXX\", Job: XXXX"},
-            {"type": "string", "value": "Code: XXXX"}
-        ]
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.toml b/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.toml
deleted file mode 100644
index 6b458e1e8b96b..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-title = [
-"Client: \"XXXX\", Job: XXXX",
-"Code: XXXX"
-]
diff --git a/stdlib/TOML/test/testfiles/valid/array-string-with-comma.jl b/stdlib/TOML/test/testfiles/valid/array-string-with-comma.jl
deleted file mode 100644
index 83727c9f05954..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-string-with-comma.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("title" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "Client: XXXX, Job: XXXX","type" => "string"), Dict{String,Any}("value" => "Code: XXXX","type" => "string")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/array-string-with-comma.json b/stdlib/TOML/test/testfiles/valid/array-string-with-comma.json
deleted file mode 100644
index d879c4c22ce4f..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-string-with-comma.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-    "title": {
-        "type": "array",
-        "value": [
-            {"type": "string", "value": "Client: XXXX, Job: XXXX"},
-            {"type": "string", "value": "Code: XXXX"}
-        ]
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/array-string-with-comma.toml b/stdlib/TOML/test/testfiles/valid/array-string-with-comma.toml
deleted file mode 100644
index 655c40e27ed44..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-string-with-comma.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-title = [
-"Client: XXXX, Job: XXXX",
-"Code: XXXX"
-]
diff --git a/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.jl b/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.jl
deleted file mode 100644
index 0c0ad7fe793bb..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("foo" => Any[Dict{String,Any}("bar" => Dict{String,Any}("value" => "\"{{baz}}\"","type" => "string"))])
diff --git a/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.json b/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.json
deleted file mode 100644
index 4797be94c24b6..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "foo": [
-    {
-        "bar": {"type": "string", "value": "\"{{baz}}\"" }
-    }
-  ]
-}
diff --git a/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.toml b/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.toml
deleted file mode 100644
index f0de81e0d646d..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.toml
+++ /dev/null
@@ -1 +0,0 @@
-foo = [ { bar="\"{{baz}}\""} ]
diff --git a/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.jl b/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.jl
deleted file mode 100644
index 7f66b6052096a..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("mixed" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1","type" => "integer"), Dict{String,Any}("value" => "2","type" => "integer")],"type" => "array"), Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "a","type" => "string"), Dict{String,Any}("value" => "b","type" => "string")],"type" => "array"), Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1.1","type" => "float"), Dict{String,Any}("value" => "2.1","type" => "float")],"type" => "array")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.json b/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.json
deleted file mode 100644
index 478fa5c706b2f..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.json
+++ /dev/null
@@ -1,19 +0,0 @@
-{
-    "mixed": {
-        "type": "array",
-        "value": [
-            {"type": "array", "value": [
-                {"type": "integer", "value": "1"},
-                {"type": "integer", "value": "2"}
-            ]},
-            {"type": "array", "value": [
-                {"type": "string", "value": "a"},
-                {"type": "string", "value": "b"}
-            ]},
-            {"type": "array", "value": [
-                {"type": "float", "value": "1.1"},
-                {"type": "float", "value": "2.1"}
-            ]}
-        ]
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.toml b/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.toml
deleted file mode 100644
index a246fcf1deb37..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.toml
+++ /dev/null
@@ -1 +0,0 @@
-mixed = [[1, 2], ["a", "b"], [1.1, 2.1]]
diff --git a/stdlib/TOML/test/testfiles/valid/arrays-nested.jl b/stdlib/TOML/test/testfiles/valid/arrays-nested.jl
deleted file mode 100644
index 4f3280552e9da..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/arrays-nested.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("nest" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "a","type" => "string")],"type" => "array"), Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "b","type" => "string")],"type" => "array")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/arrays-nested.json b/stdlib/TOML/test/testfiles/valid/arrays-nested.json
deleted file mode 100644
index d21920cc3eb41..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/arrays-nested.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-    "nest": {
-        "type": "array",
-        "value": [
-            {"type": "array", "value": [
-                {"type": "string", "value": "a"}
-            ]},
-            {"type": "array", "value": [
-                {"type": "string", "value": "b"}
-            ]}
-        ]
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/arrays-nested.toml b/stdlib/TOML/test/testfiles/valid/arrays-nested.toml
deleted file mode 100644
index ce3302249b72d..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/arrays-nested.toml
+++ /dev/null
@@ -1 +0,0 @@
-nest = [["a"], ["b"]]
diff --git a/stdlib/TOML/test/testfiles/valid/arrays.jl b/stdlib/TOML/test/testfiles/valid/arrays.jl
deleted file mode 100644
index dc0ccdfc4f414..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/arrays.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("strings" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "a","type" => "string"), Dict{String,Any}("value" => "b","type" => "string"), Dict{String,Any}("value" => "c","type" => "string")],"type" => "array"),"ints" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1","type" => "integer"), Dict{String,Any}("value" => "2","type" => "integer"), Dict{String,Any}("value" => "3","type" => "integer")],"type" => "array"),"dates" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1987-07-05T17:45:00Z","type" => "datetime"), Dict{String,Any}("value" => "1979-05-27T07:32:00Z","type" => "datetime"), Dict{String,Any}("value" => "2006-06-01T11:00:00Z","type" => "datetime")],"type" => "array"),"comments" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1","type" => "integer"), Dict{String,Any}("value" => "2","type" => "integer")],"type" => "array"),"floats" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1.1","type" => "float"), Dict{String,Any}("value" => "2.1","type" => "float"), Dict{String,Any}("value" => "3.1","type" => "float")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/arrays.json b/stdlib/TOML/test/testfiles/valid/arrays.json
deleted file mode 100644
index 244511695b67d..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/arrays.json
+++ /dev/null
@@ -1,41 +0,0 @@
-{
-    "ints": {
-        "type": "array",
-        "value": [
-            {"type": "integer", "value": "1"},
-            {"type": "integer", "value": "2"},
-            {"type": "integer", "value": "3"}
-        ]
-    },
-    "floats": {
-        "type": "array",
-        "value": [
-            {"type": "float", "value": "1.1"},
-            {"type": "float", "value": "2.1"},
-            {"type": "float", "value": "3.1"}
-        ]
-    },
-    "strings": {
-        "type": "array",
-        "value": [
-            {"type": "string", "value": "a"},
-            {"type": "string", "value": "b"},
-            {"type": "string", "value": "c"}
-        ]
-    },
-    "dates": {
-        "type": "array",
-        "value": [
-            {"type": "datetime", "value": "1987-07-05T17:45:00Z"},
-            {"type": "datetime", "value": "1979-05-27T07:32:00Z"},
-            {"type": "datetime", "value": "2006-06-01T11:00:00Z"}
-        ]
-    },
-    "comments": {
-        "type": "array",
-        "value": [
-            {"type": "integer", "value": "1"},
-            {"type": "integer", "value": "2"}
-        ]
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/arrays.toml b/stdlib/TOML/test/testfiles/valid/arrays.toml
deleted file mode 100644
index db1c40020ff5d..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/arrays.toml
+++ /dev/null
@@ -1,12 +0,0 @@
-ints = [1, 2, 3]
-floats = [1.1, 2.1, 3.1]
-strings = ["a", "b", "c"]
-dates = [
-  1987-07-05T17:45:00Z,
-  1979-05-27T07:32:00Z,
-  2006-06-01T11:00:00Z,
-]
-comments = [
-         1,
-         2, #this is ok
-]
diff --git a/stdlib/TOML/test/testfiles/valid/bool.jl b/stdlib/TOML/test/testfiles/valid/bool.jl
deleted file mode 100644
index 5ce242aae3915..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/bool.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("f" => Dict{String,Any}("value" => "false","type" => "bool"),"t" => Dict{String,Any}("value" => "true","type" => "bool"))
diff --git a/stdlib/TOML/test/testfiles/valid/bool.json b/stdlib/TOML/test/testfiles/valid/bool.json
deleted file mode 100644
index ae368e9492e35..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/bool.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "f": {"type": "bool", "value": "false"},
-    "t": {"type": "bool", "value": "true"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/bool.toml b/stdlib/TOML/test/testfiles/valid/bool.toml
deleted file mode 100644
index a8a829b34de9b..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/bool.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-t = true
-f = false
diff --git a/stdlib/TOML/test/testfiles/valid/comments-at-eof.jl b/stdlib/TOML/test/testfiles/valid/comments-at-eof.jl
deleted file mode 100644
index 45392c32b0ba1..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/comments-at-eof.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("key" => Dict{String,Any}("value" => "value","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/comments-at-eof.json b/stdlib/TOML/test/testfiles/valid/comments-at-eof.json
deleted file mode 100644
index 458c38a3377e8..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/comments-at-eof.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "key": {"type": "string", "value": "value"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/comments-at-eof.toml b/stdlib/TOML/test/testfiles/valid/comments-at-eof.toml
deleted file mode 100644
index 090b474834610..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/comments-at-eof.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-# This is a full-line comment
-key = "value" # This is a comment at the end of a line
diff --git a/stdlib/TOML/test/testfiles/valid/comments-at-eof2.jl b/stdlib/TOML/test/testfiles/valid/comments-at-eof2.jl
deleted file mode 100644
index 45392c32b0ba1..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/comments-at-eof2.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("key" => Dict{String,Any}("value" => "value","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/comments-at-eof2.json b/stdlib/TOML/test/testfiles/valid/comments-at-eof2.json
deleted file mode 100644
index 458c38a3377e8..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/comments-at-eof2.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "key": {"type": "string", "value": "value"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/comments-at-eof2.toml b/stdlib/TOML/test/testfiles/valid/comments-at-eof2.toml
deleted file mode 100644
index 090b474834610..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/comments-at-eof2.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-# This is a full-line comment
-key = "value" # This is a comment at the end of a line
diff --git a/stdlib/TOML/test/testfiles/valid/comments-everywhere.jl b/stdlib/TOML/test/testfiles/valid/comments-everywhere.jl
deleted file mode 100644
index dd43fd70576e9..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/comments-everywhere.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("group" => Dict{String,Any}("more" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "42","type" => "integer"), Dict{String,Any}("value" => "42","type" => "integer")],"type" => "array"),"answer" => Dict{String,Any}("value" => "42","type" => "integer")))
diff --git a/stdlib/TOML/test/testfiles/valid/comments-everywhere.json b/stdlib/TOML/test/testfiles/valid/comments-everywhere.json
deleted file mode 100644
index e69a2e9582395..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/comments-everywhere.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "group": {
-        "answer": {"type": "integer", "value": "42"},
-        "more": {
-            "type": "array",
-            "value": [
-                {"type": "integer", "value": "42"},
-                {"type": "integer", "value": "42"}
-            ]
-        }
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/comments-everywhere.toml b/stdlib/TOML/test/testfiles/valid/comments-everywhere.toml
deleted file mode 100644
index 3dca74cade516..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/comments-everywhere.toml
+++ /dev/null
@@ -1,24 +0,0 @@
-# Top comment.
-  # Top comment.
-# Top comment.
-
-# [no-extraneous-groups-please]
-
-[group] # Comment
-answer = 42 # Comment
-# no-extraneous-keys-please = 999
-# Inbetween comment.
-more = [ # Comment
-  # What about multiple # comments?
-  # Can you handle it?
-  #
-          # Evil.
-# Evil.
-  42, 42, # Comments within arrays are fun.
-  # What about multiple # comments?
-  # Can you handle it?
-  #
-          # Evil.
-# Evil.
-# ] Did I fool you?
-] # Hopefully not.
diff --git a/stdlib/TOML/test/testfiles/valid/datetime-timezone.jl b/stdlib/TOML/test/testfiles/valid/datetime-timezone.jl
deleted file mode 100644
index 1759fd10e086c..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/datetime-timezone.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("bestdayever" => Dict{String,Any}("value" => "2017-06-06T12:34:56-05:00","type" => "datetime"))
diff --git a/stdlib/TOML/test/testfiles/valid/datetime-timezone.json b/stdlib/TOML/test/testfiles/valid/datetime-timezone.json
deleted file mode 100644
index 0b70f141c06c9..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/datetime-timezone.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "bestdayever": {"type": "datetime", "value": "2017-06-06T12:34:56-05:00"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/datetime-timezone.toml b/stdlib/TOML/test/testfiles/valid/datetime-timezone.toml
deleted file mode 100644
index e59cb842c40bf..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/datetime-timezone.toml
+++ /dev/null
@@ -1 +0,0 @@
-bestdayever = 2017-06-06T12:34:56-05:00
diff --git a/stdlib/TOML/test/testfiles/valid/datetime.jl b/stdlib/TOML/test/testfiles/valid/datetime.jl
deleted file mode 100644
index 8d6c630023e3f..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/datetime.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("milliseconds" => Dict{String,Any}("value" => "1977-12-21T03:32:00.555+00:00","type" => "datetime"),"bestdayever" => Dict{String,Any}("value" => "1987-07-05T17:45:00Z","type" => "datetime"),"numoffset" => Dict{String,Any}("value" => "1977-06-28T12:32:00Z","type" => "datetime"))
diff --git a/stdlib/TOML/test/testfiles/valid/datetime.json b/stdlib/TOML/test/testfiles/valid/datetime.json
deleted file mode 100644
index 4cdc0006580cc..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/datetime.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "bestdayever": {"type": "datetime", "value": "1987-07-05T17:45:00Z"},
-    "numoffset": {"type": "datetime", "value": "1977-06-28T12:32:00Z"},
-    "milliseconds": {"type": "datetime", "value": "1977-12-21T03:32:00.555+00:00"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/datetime.toml b/stdlib/TOML/test/testfiles/valid/datetime.toml
deleted file mode 100644
index ee787b7ed6762..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/datetime.toml
+++ /dev/null
@@ -1,3 +0,0 @@
-bestdayever = 1987-07-05T17:45:00Z
-numoffset = 1977-06-28T07:32:00-05:00
-milliseconds = 1977-12-21T10:32:00.555+07:00
diff --git a/stdlib/TOML/test/testfiles/valid/double-quote-escape.jl b/stdlib/TOML/test/testfiles/valid/double-quote-escape.jl
deleted file mode 100644
index 934675aacf219..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/double-quote-escape.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("test" => Dict{String,Any}("value" => "\"one\"","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/double-quote-escape.json b/stdlib/TOML/test/testfiles/valid/double-quote-escape.json
deleted file mode 100644
index 0c4ac37e0a95e..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/double-quote-escape.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "test": {
-    "type": "string",
-    "value": "\"one\""
-  }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/double-quote-escape.toml b/stdlib/TOML/test/testfiles/valid/double-quote-escape.toml
deleted file mode 100644
index 78e7e72927950..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/double-quote-escape.toml
+++ /dev/null
@@ -1 +0,0 @@
-test = "\"one\""
diff --git a/stdlib/TOML/test/testfiles/valid/empty.jl b/stdlib/TOML/test/testfiles/valid/empty.jl
deleted file mode 100644
index 1adb380ba335b..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/empty.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}()
diff --git a/stdlib/TOML/test/testfiles/valid/empty.json b/stdlib/TOML/test/testfiles/valid/empty.json
deleted file mode 100644
index 0967ef424bce6..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/empty.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
diff --git a/stdlib/TOML/test/testfiles/valid/escaped-escape.jl b/stdlib/TOML/test/testfiles/valid/escaped-escape.jl
deleted file mode 100644
index ed710ff1b4ff6..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/escaped-escape.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("answer" => Dict{String,Any}("value" => "\\x64","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/escaped-escape.json b/stdlib/TOML/test/testfiles/valid/escaped-escape.json
deleted file mode 100644
index 9db7f8ab5f251..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/escaped-escape.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "answer": {"type": "string", "value": "\\x64"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/escaped-escape.toml b/stdlib/TOML/test/testfiles/valid/escaped-escape.toml
deleted file mode 100644
index d5758761457f1..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/escaped-escape.toml
+++ /dev/null
@@ -1 +0,0 @@
-answer = "\\x64"
diff --git a/stdlib/TOML/test/testfiles/valid/example.jl b/stdlib/TOML/test/testfiles/valid/example.jl
deleted file mode 100644
index b5b2bb86c5363..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/example.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("best-day-ever" => Dict{String,Any}("value" => "1987-07-05T17:45:00Z","type" => "datetime"),"numtheory" => Dict{String,Any}("perfection" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "6","type" => "integer"), Dict{String,Any}("value" => "28","type" => "integer"), Dict{String,Any}("value" => "496","type" => "integer")],"type" => "array"),"boring" => Dict{String,Any}("value" => "false","type" => "bool")))
diff --git a/stdlib/TOML/test/testfiles/valid/example.json b/stdlib/TOML/test/testfiles/valid/example.json
deleted file mode 100644
index 48aa90784a4eb..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/example.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "best-day-ever": {"type": "datetime", "value": "1987-07-05T17:45:00Z"},
-  "numtheory": {
-    "boring": {"type": "bool", "value": "false"},
-    "perfection": {
-      "type": "array",
-      "value": [
-        {"type": "integer", "value": "6"},
-        {"type": "integer", "value": "28"},
-        {"type": "integer", "value": "496"}
-      ]
-    }
-  }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/example.toml b/stdlib/TOML/test/testfiles/valid/example.toml
deleted file mode 100644
index 8cb02e01b0348..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/example.toml
+++ /dev/null
@@ -1,5 +0,0 @@
-best-day-ever = 1987-07-05T17:45:00Z
-
-[numtheory]
-boring = false
-perfection = [6, 28, 496]
diff --git a/stdlib/TOML/test/testfiles/valid/exponent-part-float.jl b/stdlib/TOML/test/testfiles/valid/exponent-part-float.jl
deleted file mode 100644
index 34ed0bebb2fc0..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/exponent-part-float.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("million" => Dict{String,Any}("value" => "1000000","type" => "float"),"minustenth" => Dict{String,Any}("value" => "-0.1","type" => "float"),"beast" => Dict{String,Any}("value" => "666","type" => "float"))
diff --git a/stdlib/TOML/test/testfiles/valid/exponent-part-float.json b/stdlib/TOML/test/testfiles/valid/exponent-part-float.json
deleted file mode 100644
index 4dbfbeec030d0..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/exponent-part-float.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "million": {"type": "float", "value": "1000000"},
-    "minustenth": {"type": "float", "value": "-0.1"},
-    "beast": {"type": "float", "value": "666"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/exponent-part-float.toml b/stdlib/TOML/test/testfiles/valid/exponent-part-float.toml
deleted file mode 100644
index 41bd282d824d7..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/exponent-part-float.toml
+++ /dev/null
@@ -1,3 +0,0 @@
-million = 1e6
-minustenth = -1E-1
-beast = 6.66E2
diff --git a/stdlib/TOML/test/testfiles/valid/float-exponent.jl b/stdlib/TOML/test/testfiles/valid/float-exponent.jl
deleted file mode 100644
index e64817ce85e92..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/float-exponent.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("neg" => Dict{String,Any}("value" => "0.03","type" => "float"),"zero" => Dict{String,Any}("value" => "3.0","type" => "float"),"pointupper" => Dict{String,Any}("value" => "310.0","type" => "float"),"lower" => Dict{String,Any}("value" => "300.0","type" => "float"),"upper" => Dict{String,Any}("value" => "300.0","type" => "float"),"pos" => Dict{String,Any}("value" => "300.0","type" => "float"),"pointlower" => Dict{String,Any}("value" => "310.0","type" => "float"))
diff --git a/stdlib/TOML/test/testfiles/valid/float-exponent.json b/stdlib/TOML/test/testfiles/valid/float-exponent.json
deleted file mode 100644
index b0d40bd0be156..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/float-exponent.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-    "lower": {"type": "float", "value": "300.0"},
-    "upper": {"type": "float", "value": "300.0"},
-    "neg": {"type": "float", "value": "0.03"},
-    "pos": {"type": "float", "value": "300.0"},
-    "zero": {"type": "float", "value": "3.0"},
-    "pointlower": {"type": "float", "value": "310.0"},
-    "pointupper": {"type": "float", "value": "310.0"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/float-exponent.toml b/stdlib/TOML/test/testfiles/valid/float-exponent.toml
deleted file mode 100644
index d0db16fd557c7..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/float-exponent.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-lower = 3e2
-upper = 3E2
-neg = 3e-2
-pos = 3E+2
-zero = 3e0
-pointlower = 3.1e2
-pointupper = 3.1E2
diff --git a/stdlib/TOML/test/testfiles/valid/float-underscore.jl b/stdlib/TOML/test/testfiles/valid/float-underscore.jl
deleted file mode 100644
index e175c937f4d5b..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/float-underscore.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("after" => Dict{String,Any}("value" => "3141.5927","type" => "float"),"exponent" => Dict{String,Any}("value" => "3e14","type" => "float"),"before" => Dict{String,Any}("value" => "3141.5927","type" => "float"))
diff --git a/stdlib/TOML/test/testfiles/valid/float-underscore.json b/stdlib/TOML/test/testfiles/valid/float-underscore.json
deleted file mode 100644
index f86cdd790f07c..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/float-underscore.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "before": {"type": "float", "value": "3141.5927"},
-    "after": {"type": "float", "value": "3141.5927"},
-    "exponent": {"type": "float", "value": "3e14"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/float-underscore.toml b/stdlib/TOML/test/testfiles/valid/float-underscore.toml
deleted file mode 100644
index 343353a89e063..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/float-underscore.toml
+++ /dev/null
@@ -1,3 +0,0 @@
-before = 3_141.5927
-after = 3141.592_7
-exponent = 3e1_4
diff --git a/stdlib/TOML/test/testfiles/valid/float.jl b/stdlib/TOML/test/testfiles/valid/float.jl
deleted file mode 100644
index d36893db363a3..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/float.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("negpi" => Dict{String,Any}("value" => "-3.14","type" => "float"),"pospi" => Dict{String,Any}("value" => "3.14","type" => "float"),"pi" => Dict{String,Any}("value" => "3.14","type" => "float"),"zero-intpart" => Dict{String,Any}("value" => "0.123","type" => "float"))
diff --git a/stdlib/TOML/test/testfiles/valid/float.json b/stdlib/TOML/test/testfiles/valid/float.json
deleted file mode 100644
index 3f69b172c98fc..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/float.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "pi": {"type": "float", "value": "3.14"},
-    "pospi": {"type": "float", "value": "3.14"},
-    "negpi": {"type": "float", "value": "-3.14"},
-    "zero-intpart": {"type": "float", "value": "0.123"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/float.toml b/stdlib/TOML/test/testfiles/valid/float.toml
deleted file mode 100644
index 5f023229486b9..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/float.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-pi = 3.14
-pospi = +3.14
-negpi = -3.14
-zero-intpart = 0.123
diff --git a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.jl b/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.jl
deleted file mode 100644
index 376f0b95cf7e8..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer"))),"better" => Dict{String,Any}("value" => "43","type" => "integer")))
diff --git a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.json b/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.json
deleted file mode 100644
index 374bd09343ef1..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-    "a": {
-        "better": {"type": "integer", "value": "43"},
-        "b": {
-            "c": {
-                "answer": {"type": "integer", "value": "42"}
-            }
-        }
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.toml b/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.toml
deleted file mode 100644
index c0e8865b392c2..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.toml
+++ /dev/null
@@ -1,5 +0,0 @@
-[a.b.c]
-answer = 42
-
-[a]
-better = 43
diff --git a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.jl b/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.jl
deleted file mode 100644
index 376f0b95cf7e8..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer"))),"better" => Dict{String,Any}("value" => "43","type" => "integer")))
diff --git a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.json b/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.json
deleted file mode 100644
index 374bd09343ef1..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-    "a": {
-        "better": {"type": "integer", "value": "43"},
-        "b": {
-            "c": {
-                "answer": {"type": "integer", "value": "42"}
-            }
-        }
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.toml b/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.toml
deleted file mode 100644
index eee68ff5143aa..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.toml
+++ /dev/null
@@ -1,5 +0,0 @@
-[a]
-better = 43
-
-[a.b.c]
-answer = 42
diff --git a/stdlib/TOML/test/testfiles/valid/implicit-groups.jl b/stdlib/TOML/test/testfiles/valid/implicit-groups.jl
deleted file mode 100644
index 5481705ddbc4e..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/implicit-groups.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer")))))
diff --git a/stdlib/TOML/test/testfiles/valid/implicit-groups.json b/stdlib/TOML/test/testfiles/valid/implicit-groups.json
deleted file mode 100644
index fbae7fc71beff..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/implicit-groups.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-    "a": {
-        "b": {
-            "c": {
-                "answer": {"type": "integer", "value": "42"}
-            }
-        }
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/implicit-groups.toml b/stdlib/TOML/test/testfiles/valid/implicit-groups.toml
deleted file mode 100644
index b6333e49d577e..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/implicit-groups.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[a.b.c]
-answer = 42
diff --git a/stdlib/TOML/test/testfiles/valid/inline-table-array.jl b/stdlib/TOML/test/testfiles/valid/inline-table-array.jl
deleted file mode 100644
index c9b1c336003d2..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/inline-table-array.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("people" => Any[Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bruce","type" => "string"),"last_name" => Dict{String,Any}("value" => "Springsteen","type" => "string")), Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Eric","type" => "string"),"last_name" => Dict{String,Any}("value" => "Clapton","type" => "string")), Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bob","type" => "string"),"last_name" => Dict{String,Any}("value" => "Seger","type" => "string"))])
diff --git a/stdlib/TOML/test/testfiles/valid/inline-table-array.json b/stdlib/TOML/test/testfiles/valid/inline-table-array.json
deleted file mode 100644
index 84df2dabb0d6b..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/inline-table-array.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "people": [
-        {
-            "first_name": {"type": "string", "value": "Bruce"},
-            "last_name": {"type": "string", "value": "Springsteen"}
-        },
-        {
-            "first_name": {"type": "string", "value": "Eric"},
-            "last_name": {"type": "string", "value": "Clapton"}
-        },
-        {
-            "first_name": {"type": "string", "value": "Bob"},
-            "last_name": {"type": "string", "value": "Seger"}
-        }
-    ]
-}
diff --git a/stdlib/TOML/test/testfiles/valid/inline-table-array.toml b/stdlib/TOML/test/testfiles/valid/inline-table-array.toml
deleted file mode 100644
index 3fa60d6695574..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/inline-table-array.toml
+++ /dev/null
@@ -1,3 +0,0 @@
-people = [{first_name = "Bruce", last_name = "Springsteen"},
-          {first_name = "Eric", last_name = "Clapton"},
-          {first_name = "Bob", last_name = "Seger"}]
diff --git a/stdlib/TOML/test/testfiles/valid/inline-table.jl b/stdlib/TOML/test/testfiles/valid/inline-table.jl
deleted file mode 100644
index ecbaec3304cad..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/inline-table.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("point" => Dict{String,Any}("x" => Dict{String,Any}("value" => "1","type" => "integer"),"y" => Dict{String,Any}("value" => "2","type" => "integer")),"name" => Dict{String,Any}("first" => Dict{String,Any}("value" => "Tom","type" => "string"),"last" => Dict{String,Any}("value" => "Preston-Werner","type" => "string")),"str-key" => Dict{String,Any}("a" => Dict{String,Any}("value" => "1","type" => "integer")),"simple" => Dict{String,Any}("a" => Dict{String,Any}("value" => "1","type" => "integer")),"table-array" => Any[Dict{String,Any}("a" => Dict{String,Any}("value" => "1","type" => "integer")), Dict{String,Any}("b" => Dict{String,Any}("value" => "2","type" => "integer"))])
diff --git a/stdlib/TOML/test/testfiles/valid/inline-table.json b/stdlib/TOML/test/testfiles/valid/inline-table.json
deleted file mode 100644
index 71cc119c5bce3..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/inline-table.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "name": {
-        "first": {"type": "string", "value": "Tom"},
-        "last": {"type": "string", "value": "Preston-Werner"}
-    },
-    "point": {
-        "x": {"type": "integer", "value": "1"},
-        "y": {"type": "integer", "value": "2"}
-    },
-    "simple": { "a": {"type": "integer", "value": "1"} },
-    "str-key": { "a": {"type": "integer", "value": "1"} },
-    "table-array": [
-        { "a": {"type": "integer", "value": "1"} },
-        { "b": {"type": "integer", "value": "2"} }
-    ]
-}
diff --git a/stdlib/TOML/test/testfiles/valid/inline-table.toml b/stdlib/TOML/test/testfiles/valid/inline-table.toml
deleted file mode 100644
index 257047eebc019..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/inline-table.toml
+++ /dev/null
@@ -1,5 +0,0 @@
-name = { first = "Tom", last = "Preston-Werner" }
-point = { x = 1, y = 2 }
-simple = { a = 1 }
-str-key = { "a" = 1 }
-table-array = [{ "a" = 1 }, { "b" = 2 }]
diff --git a/stdlib/TOML/test/testfiles/valid/integer-underscore.jl b/stdlib/TOML/test/testfiles/valid/integer-underscore.jl
deleted file mode 100644
index 84b2dfa1ad44e..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/integer-underscore.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("kilo" => Dict{String,Any}("value" => "1000","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/integer-underscore.json b/stdlib/TOML/test/testfiles/valid/integer-underscore.json
deleted file mode 100644
index bb6c3e7ba7d69..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/integer-underscore.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "kilo": {"type": "integer", "value": "1000"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/integer-underscore.toml b/stdlib/TOML/test/testfiles/valid/integer-underscore.toml
deleted file mode 100644
index 45eb4f71ab583..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/integer-underscore.toml
+++ /dev/null
@@ -1 +0,0 @@
-kilo = 1_000
diff --git a/stdlib/TOML/test/testfiles/valid/integer.jl b/stdlib/TOML/test/testfiles/valid/integer.jl
deleted file mode 100644
index 7150736c81415..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/integer.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("zero" => Dict{String,Any}("value" => "0","type" => "integer"),"posanswer" => Dict{String,Any}("value" => "42","type" => "integer"),"answer" => Dict{String,Any}("value" => "42","type" => "integer"),"neganswer" => Dict{String,Any}("value" => "-42","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/integer.json b/stdlib/TOML/test/testfiles/valid/integer.json
deleted file mode 100644
index 543738ba87999..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/integer.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "answer": {"type": "integer", "value": "42"},
-    "neganswer": {"type": "integer", "value": "-42"},
-    "posanswer": {"type": "integer", "value": "42"},
-    "zero": {"type": "integer", "value": "0"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/integer.toml b/stdlib/TOML/test/testfiles/valid/integer.toml
deleted file mode 100644
index b62de30aee0e3..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/integer.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-answer = 42
-posanswer = +42
-neganswer = -42
-zero = 0
diff --git a/stdlib/TOML/test/testfiles/valid/key-equals-nospace.jl b/stdlib/TOML/test/testfiles/valid/key-equals-nospace.jl
deleted file mode 100644
index b88a68c41a2c1..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/key-equals-nospace.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/key-equals-nospace.json b/stdlib/TOML/test/testfiles/valid/key-equals-nospace.json
deleted file mode 100644
index 1f8709ab9f46f..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/key-equals-nospace.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "answer": {"type": "integer", "value": "42"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/key-equals-nospace.toml b/stdlib/TOML/test/testfiles/valid/key-equals-nospace.toml
deleted file mode 100644
index 560901c5a43f2..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/key-equals-nospace.toml
+++ /dev/null
@@ -1 +0,0 @@
-answer=42
diff --git a/stdlib/TOML/test/testfiles/valid/key-numeric.jl b/stdlib/TOML/test/testfiles/valid/key-numeric.jl
deleted file mode 100644
index b6d00e0041bbe..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/key-numeric.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("1" => Dict{String,Any}("value" => "1","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/key-numeric.json b/stdlib/TOML/test/testfiles/valid/key-numeric.json
deleted file mode 100644
index 862f8cbba9a29..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/key-numeric.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "1": {"type": "integer", "value": "1"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/key-numeric.toml b/stdlib/TOML/test/testfiles/valid/key-numeric.toml
deleted file mode 100644
index 532356f49b43e..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/key-numeric.toml
+++ /dev/null
@@ -1 +0,0 @@
-1 = 1
diff --git a/stdlib/TOML/test/testfiles/valid/key-space.jl b/stdlib/TOML/test/testfiles/valid/key-space.jl
deleted file mode 100644
index c43b2619a1c91..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/key-space.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("a b" => Dict{String,Any}("value" => "1","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/key-space.json b/stdlib/TOML/test/testfiles/valid/key-space.json
deleted file mode 100644
index 9d1f76911d523..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/key-space.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "a b": {"type": "integer", "value": "1"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/key-space.toml b/stdlib/TOML/test/testfiles/valid/key-space.toml
deleted file mode 100644
index f4f36c4f6df2c..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/key-space.toml
+++ /dev/null
@@ -1 +0,0 @@
-"a b" = 1
diff --git a/stdlib/TOML/test/testfiles/valid/key-special-chars.jl b/stdlib/TOML/test/testfiles/valid/key-special-chars.jl
deleted file mode 100644
index 31b05979dbf19..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/key-special-chars.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("~!@\$^&*()_+-`1234567890[]|/?><.,;:'" => Dict{String,Any}("value" => "1","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/key-special-chars.json b/stdlib/TOML/test/testfiles/valid/key-special-chars.json
deleted file mode 100644
index 3585b2cfb464e..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/key-special-chars.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "~!@$^&*()_+-`1234567890[]|/?><.,;:'": {
-        "type": "integer", "value": "1"
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/key-special-chars.toml b/stdlib/TOML/test/testfiles/valid/key-special-chars.toml
deleted file mode 100644
index cc572befd06e5..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/key-special-chars.toml
+++ /dev/null
@@ -1 +0,0 @@
-"~!@$^&*()_+-`1234567890[]|/?><.,;:'" = 1
diff --git a/stdlib/TOML/test/testfiles/valid/keys-with-dots.jl b/stdlib/TOML/test/testfiles/valid/keys-with-dots.jl
deleted file mode 100644
index 2d700e6e091ec..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/keys-with-dots.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("with.dot" => Dict{String,Any}("value" => "2","type" => "integer"),"plain_table" => Dict{String,Any}("with.dot" => Dict{String,Any}("value" => "4","type" => "integer"),"plain" => Dict{String,Any}("value" => "3","type" => "integer")),"table" => Dict{String,Any}("withdot" => Dict{String,Any}("key.with.dots" => Dict{String,Any}("value" => "6","type" => "integer"),"plain" => Dict{String,Any}("value" => "5","type" => "integer"))),"plain" => Dict{String,Any}("value" => "1","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/keys-with-dots.json b/stdlib/TOML/test/testfiles/valid/keys-with-dots.json
deleted file mode 100644
index 6dd7b28e636e2..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/keys-with-dots.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "plain": {"type": "integer", "value": "1"},
-  "with.dot": {"type": "integer", "value": "2"},
-  "plain_table": {
-    "plain": {"type": "integer", "value": "3"},
-    "with.dot": {"type": "integer", "value": "4"}
-  },
-  "table": {
-    "withdot": {
-      "plain": {"type": "integer", "value": "5"},
-      "key.with.dots": {"type": "integer", "value": "6"}
-    }
-  }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/keys-with-dots.toml b/stdlib/TOML/test/testfiles/valid/keys-with-dots.toml
deleted file mode 100644
index 65fcddf96a491..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/keys-with-dots.toml
+++ /dev/null
@@ -1,10 +0,0 @@
-plain = 1
-"with.dot" = 2
-
-[plain_table]
-plain = 3
-"with.dot" = 4
-
-[table.withdot]
-plain = 5
-"key.with.dots" = 6
diff --git a/stdlib/TOML/test/testfiles/valid/long-float.jl b/stdlib/TOML/test/testfiles/valid/long-float.jl
deleted file mode 100644
index d59e96f1cc019..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/long-float.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("longpi" => Dict{String,Any}("value" => "3.141592653589793","type" => "float"),"neglongpi" => Dict{String,Any}("value" => "-3.141592653589793","type" => "float"))
diff --git a/stdlib/TOML/test/testfiles/valid/long-float.json b/stdlib/TOML/test/testfiles/valid/long-float.json
deleted file mode 100644
index 8ceed47971ef0..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/long-float.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "longpi": {"type": "float", "value": "3.141592653589793"},
-    "neglongpi": {"type": "float", "value": "-3.141592653589793"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/long-float.toml b/stdlib/TOML/test/testfiles/valid/long-float.toml
deleted file mode 100644
index 9558ae47c023f..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/long-float.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-longpi = 3.141592653589793
-neglongpi = -3.141592653589793
diff --git a/stdlib/TOML/test/testfiles/valid/long-integer.jl b/stdlib/TOML/test/testfiles/valid/long-integer.jl
deleted file mode 100644
index 63ae15b3d84c5..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/long-integer.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("answer" => Dict{String,Any}("value" => "9223372036854775807","type" => "integer"),"neganswer" => Dict{String,Any}("value" => "-9223372036854775808","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/long-integer.json b/stdlib/TOML/test/testfiles/valid/long-integer.json
deleted file mode 100644
index 16c331ed3983a..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/long-integer.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "answer": {"type": "integer", "value": "9223372036854775807"},
-    "neganswer": {"type": "integer", "value": "-9223372036854775808"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/long-integer.toml b/stdlib/TOML/test/testfiles/valid/long-integer.toml
deleted file mode 100644
index 424a13ac2af1b..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/long-integer.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-answer = 9223372036854775807
-neganswer = -9223372036854775808
diff --git a/stdlib/TOML/test/testfiles/valid/multiline-string.jl b/stdlib/TOML/test/testfiles/valid/multiline-string.jl
deleted file mode 100644
index dad787a454c56..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/multiline-string.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("equivalent_two" => Dict{String,Any}("value" => "The quick brown fox jumps over the lazy dog.","type" => "string"),"multiline_empty_four" => Dict{String,Any}("value" => "","type" => "string"),"multiline_empty_one" => Dict{String,Any}("value" => "","type" => "string"),"equivalent_three" => Dict{String,Any}("value" => "The quick brown fox jumps over the lazy dog.","type" => "string"),"equivalent_one" => Dict{String,Any}("value" => "The quick brown fox jumps over the lazy dog.","type" => "string"),"multiline_empty_two" => Dict{String,Any}("value" => "","type" => "string"),"multiline_empty_three" => Dict{String,Any}("value" => "","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/multiline-string.json b/stdlib/TOML/test/testfiles/valid/multiline-string.json
deleted file mode 100644
index 075bf505464b5..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/multiline-string.json
+++ /dev/null
@@ -1,30 +0,0 @@
-{
-    "multiline_empty_one": {
-        "type": "string",
-        "value": ""
-    },
-    "multiline_empty_two": {
-        "type": "string",
-        "value": ""
-    },
-    "multiline_empty_three": {
-        "type": "string",
-        "value": ""
-    },
-    "multiline_empty_four": {
-        "type": "string",
-        "value": ""
-    },
-    "equivalent_one": {
-        "type": "string",
-        "value": "The quick brown fox jumps over the lazy dog."
-    },
-    "equivalent_two": {
-        "type": "string",
-        "value": "The quick brown fox jumps over the lazy dog."
-    },
-    "equivalent_three": {
-        "type": "string",
-        "value": "The quick brown fox jumps over the lazy dog."
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/multiline-string.toml b/stdlib/TOML/test/testfiles/valid/multiline-string.toml
deleted file mode 100644
index 15b11434ff009..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/multiline-string.toml
+++ /dev/null
@@ -1,23 +0,0 @@
-multiline_empty_one = """"""
-multiline_empty_two = """
-"""
-multiline_empty_three = """\
-    """
-multiline_empty_four = """\
-   \
-   \
-   """
-
-equivalent_one = "The quick brown fox jumps over the lazy dog."
-equivalent_two = """
-The quick brown \
-
-
-  fox jumps over \
-    the lazy dog."""
-
-equivalent_three = """\
-       The quick brown \
-       fox jumps over \
-       the lazy dog.\
-       """
diff --git a/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.jl b/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.jl
deleted file mode 100644
index 0bc1d39c16608..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("a" => Any[Dict{String,Any}("b" => Dict{String,Any}())])
diff --git a/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.json b/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.json
deleted file mode 100644
index 89cd83e22eefc..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "a": [
-        {
-            "b": {}
-        }
-    ]
-}
diff --git a/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.toml b/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.toml
deleted file mode 100644
index e1e24f6c38f8a..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.toml
+++ /dev/null
@@ -1 +0,0 @@
-a = [ { b = {} } ]
diff --git a/stdlib/TOML/test/testfiles/valid/newline-crlf.jl b/stdlib/TOML/test/testfiles/valid/newline-crlf.jl
deleted file mode 100644
index 1bb4161f1a2a2..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/newline-crlf.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("newline" => Dict{String,Any}("value" => "crlf","type" => "string"),"os" => Dict{String,Any}("value" => "DOS","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/newline-crlf.json b/stdlib/TOML/test/testfiles/valid/newline-crlf.json
deleted file mode 100644
index d32f230b2b826..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/newline-crlf.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "os": {"type": "string", "value": "DOS"},
-    "newline": {"type": "string", "value": "crlf"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/newline-crlf.toml b/stdlib/TOML/test/testfiles/valid/newline-crlf.toml
deleted file mode 100644
index 9b13df0412235..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/newline-crlf.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-os = "DOS"
-newline = "crlf"
diff --git a/stdlib/TOML/test/testfiles/valid/newline-lf.jl b/stdlib/TOML/test/testfiles/valid/newline-lf.jl
deleted file mode 100644
index e9bb103ab934d..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/newline-lf.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("newline" => Dict{String,Any}("value" => "lf","type" => "string"),"os" => Dict{String,Any}("value" => "unix","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/newline-lf.json b/stdlib/TOML/test/testfiles/valid/newline-lf.json
deleted file mode 100644
index 8114848b53193..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/newline-lf.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "os": {"type": "string", "value": "unix"},
-    "newline": {"type": "string", "value": "lf"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/newline-lf.toml b/stdlib/TOML/test/testfiles/valid/newline-lf.toml
deleted file mode 100644
index 0f3377cd990e3..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/newline-lf.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-os = "unix"
-newline = "lf"
diff --git a/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.jl b/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.jl
deleted file mode 100644
index 054b671ad564d..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("multiline" => Dict{String,Any}("value" => "This string\r\nhas ' a quote character\r\nand more than\r\none newline\r\nin it.","type" => "string"),"firstnl" => Dict{String,Any}("value" => "This string has a ' quote character.","type" => "string"),"oneline" => Dict{String,Any}("value" => "This string has a ' quote character.","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.json b/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.json
deleted file mode 100644
index 90e27df8ac804..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-    "oneline": {
-        "type": "string",
-        "value": "This string has a ' quote character."
-    },
-    "firstnl": {
-        "type": "string",
-        "value": "This string has a ' quote character."
-    },
-    "multiline": {
-        "type": "string",
-        "value": "This string\r\nhas ' a quote character\r\nand more than\r\none newline\r\nin it."
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.toml b/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.toml
deleted file mode 100644
index 8094c03e31a40..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.toml
+++ /dev/null
@@ -1,9 +0,0 @@
-oneline = '''This string has a ' quote character.'''
-firstnl = '''
-This string has a ' quote character.'''
-multiline = '''
-This string
-has ' a quote character
-and more than
-one newline
-in it.'''
diff --git a/stdlib/TOML/test/testfiles/valid/raw-multiline-string.jl b/stdlib/TOML/test/testfiles/valid/raw-multiline-string.jl
deleted file mode 100644
index e05360e1fcd84..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/raw-multiline-string.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("multiline" => Dict{String,Any}("value" => "This string\nhas ' a quote character\nand more than\none newline\nin it.","type" => "string"),"firstnl" => Dict{String,Any}("value" => "This string has a ' quote character.","type" => "string"),"oneline" => Dict{String,Any}("value" => "This string has a ' quote character.","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/raw-multiline-string.json b/stdlib/TOML/test/testfiles/valid/raw-multiline-string.json
deleted file mode 100644
index b43cce5a2d173..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/raw-multiline-string.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-    "oneline": {
-        "type": "string",
-        "value": "This string has a ' quote character."
-    },
-    "firstnl": {
-        "type": "string",
-        "value": "This string has a ' quote character."
-    },
-    "multiline": {
-        "type": "string",
-        "value": "This string\nhas ' a quote character\nand more than\none newline\nin it."
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/raw-multiline-string.toml b/stdlib/TOML/test/testfiles/valid/raw-multiline-string.toml
deleted file mode 100644
index 8094c03e31a40..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/raw-multiline-string.toml
+++ /dev/null
@@ -1,9 +0,0 @@
-oneline = '''This string has a ' quote character.'''
-firstnl = '''
-This string has a ' quote character.'''
-multiline = '''
-This string
-has ' a quote character
-and more than
-one newline
-in it.'''
diff --git a/stdlib/TOML/test/testfiles/valid/raw-string.jl b/stdlib/TOML/test/testfiles/valid/raw-string.jl
deleted file mode 100644
index 58a1929689bd3..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/raw-string.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("slash" => Dict{String,Any}("value" => "This string has a \\/ slash character.","type" => "string"),"formfeed" => Dict{String,Any}("value" => "This string has a \\f form feed character.","type" => "string"),"backslash" => Dict{String,Any}("value" => "This string has a \\\\ backslash character.","type" => "string"),"newline" => Dict{String,Any}("value" => "This string has a \\n new line character.","type" => "string"),"carriage" => Dict{String,Any}("value" => "This string has a \\r carriage return character.","type" => "string"),"backspace" => Dict{String,Any}("value" => "This string has a \\b backspace character.","type" => "string"),"tab" => Dict{String,Any}("value" => "This string has a \\t tab character.","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/raw-string.json b/stdlib/TOML/test/testfiles/valid/raw-string.json
deleted file mode 100644
index 693ab9b54a493..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/raw-string.json
+++ /dev/null
@@ -1,30 +0,0 @@
-{
-    "backspace": {
-        "type": "string",
-        "value": "This string has a \\b backspace character."
-    },
-    "tab": {
-        "type": "string",
-        "value": "This string has a \\t tab character."
-    },
-    "newline": {
-        "type": "string",
-        "value": "This string has a \\n new line character."
-    },
-    "formfeed": {
-        "type": "string",
-        "value": "This string has a \\f form feed character."
-    },
-    "carriage": {
-        "type": "string",
-        "value": "This string has a \\r carriage return character."
-    },
-    "slash": {
-        "type": "string",
-        "value": "This string has a \\/ slash character."
-    },
-    "backslash": {
-        "type": "string",
-        "value": "This string has a \\\\ backslash character."
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/raw-string.toml b/stdlib/TOML/test/testfiles/valid/raw-string.toml
deleted file mode 100644
index 92acd2557c4c2..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/raw-string.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-backspace = 'This string has a \b backspace character.'
-tab = 'This string has a \t tab character.'
-newline = 'This string has a \n new line character.'
-formfeed = 'This string has a \f form feed character.'
-carriage = 'This string has a \r carriage return character.'
-slash = 'This string has a \/ slash character.'
-backslash = 'This string has a \\ backslash character.'
diff --git a/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.jl b/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.jl
deleted file mode 100644
index 25393187ca54d..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("black" => Dict{String,Any}("allow_prereleases" => Dict{String,Any}("value" => "true","type" => "bool"),"python" => Dict{String,Any}("value" => ">3.6","type" => "string"),"version" => Dict{String,Any}("value" => ">=18.9b0","type" => "string")))
diff --git a/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.json b/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.json
deleted file mode 100644
index 7fc7d6dafff06..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "black":{
-       "allow_prereleases":{
-          "type":"bool",
-          "value":"true"
-       },
-       "python":{
-          "type":"string",
-          "value":">3.6"
-       },
-       "version":{
-          "type":"string",
-          "value":">=18.9b0"
-       }
-    }
- }
diff --git a/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.toml b/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.toml
deleted file mode 100644
index 94e5651d582e2..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.toml
+++ /dev/null
@@ -1 +0,0 @@
-black = { python=">3.6", version=">=18.9b0", allow_prereleases=true }
diff --git a/stdlib/TOML/test/testfiles/valid/string-empty.jl b/stdlib/TOML/test/testfiles/valid/string-empty.jl
deleted file mode 100644
index 4adba9eed74f9..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-empty.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("answer" => Dict{String,Any}("value" => "","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/string-empty.json b/stdlib/TOML/test/testfiles/valid/string-empty.json
deleted file mode 100644
index 6c26d695b29a6..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-empty.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "answer": {
-        "type": "string",
-        "value": ""
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/string-empty.toml b/stdlib/TOML/test/testfiles/valid/string-empty.toml
deleted file mode 100644
index e37e6815bc73d..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-empty.toml
+++ /dev/null
@@ -1 +0,0 @@
-answer = ""
diff --git a/stdlib/TOML/test/testfiles/valid/string-escapes.jl b/stdlib/TOML/test/testfiles/valid/string-escapes.jl
deleted file mode 100644
index d153276492df3..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-escapes.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("formfeed" => Dict{String,Any}("value" => "This string has a \f form feed character.","type" => "string"),"notunicode2" => Dict{String,Any}("value" => "This string does not have a unicode \\u escape.","type" => "string"),"backslash" => Dict{String,Any}("value" => "This string has a \\ backslash character.","type" => "string"),"notunicode3" => Dict{String,Any}("value" => "This string does not have a unicode \\u0075 escape.","type" => "string"),"notunicode4" => Dict{String,Any}("value" => "This string does not have a unicode \\u escape.","type" => "string"),"tab" => Dict{String,Any}("value" => "This string has a \t tab character.","type" => "string"),"carriage" => Dict{String,Any}("value" => "This string has a \r carriage return character.","type" => "string"),"quote" => Dict{String,Any}("value" => "This string has a \" quote character.","type" => "string"),"newline" => Dict{String,Any}("value" => "This string has a \n new line character.","type" => "string"),"notunicode1" => Dict{String,Any}("value" => "This string does not have a unicode \\u escape.","type" => "string"),"backspace" => Dict{String,Any}("value" => "This string has a \b backspace character.","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/string-escapes.json b/stdlib/TOML/test/testfiles/valid/string-escapes.json
deleted file mode 100644
index 98e2c82d1ce8a..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-escapes.json
+++ /dev/null
@@ -1,46 +0,0 @@
-{
-    "backspace": {
-        "type": "string",
-        "value": "This string has a \u0008 backspace character."
-    },
-    "tab": {
-        "type": "string",
-        "value": "This string has a \u0009 tab character."
-    },
-    "newline": {
-        "type": "string",
-        "value": "This string has a \u000A new line character."
-    },
-    "formfeed": {
-        "type": "string",
-        "value": "This string has a \u000C form feed character."
-    },
-    "carriage": {
-        "type": "string",
-        "value": "This string has a \u000D carriage return character."
-    },
-    "quote": {
-        "type": "string",
-        "value": "This string has a \u0022 quote character."
-    },
-    "backslash": {
-        "type": "string",
-        "value": "This string has a \u005C backslash character."
-    },
-    "notunicode1": {
-        "type": "string",
-        "value": "This string does not have a unicode \\u escape."
-    },
-    "notunicode2": {
-        "type": "string",
-        "value": "This string does not have a unicode \u005Cu escape."
-    },
-    "notunicode3": {
-        "type": "string",
-        "value": "This string does not have a unicode \\u0075 escape."
-    },
-    "notunicode4": {
-        "type": "string",
-        "value": "This string does not have a unicode \\\u0075 escape."
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/string-escapes.toml b/stdlib/TOML/test/testfiles/valid/string-escapes.toml
deleted file mode 100644
index 6d554e4553bdc..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-escapes.toml
+++ /dev/null
@@ -1,11 +0,0 @@
-backspace = "This string has a \b backspace character."
-tab = "This string has a \t tab character."
-newline = "This string has a \n new line character."
-formfeed = "This string has a \f form feed character."
-carriage = "This string has a \r carriage return character."
-quote = "This string has a \" quote character."
-backslash = "This string has a \\ backslash character."
-notunicode1 = "This string does not have a unicode \\u escape."
-notunicode2 = "This string does not have a unicode \u005Cu escape."
-notunicode3 = "This string does not have a unicode \\u0075 escape."
-notunicode4 = "This string does not have a unicode \\\u0075 escape."
diff --git a/stdlib/TOML/test/testfiles/valid/string-nl.jl b/stdlib/TOML/test/testfiles/valid/string-nl.jl
deleted file mode 100644
index 1d60e431ee1bb..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-nl.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("nl_end" => Dict{String,Any}("value" => "value\n","type" => "string"),"lit_nl_mid" => Dict{String,Any}("value" => "val\\nue","type" => "string"),"nl_mid" => Dict{String,Any}("value" => "val\nue","type" => "string"),"lit_nl_uni" => Dict{String,Any}("value" => "val\\ue","type" => "string"),"lit_nl_end" => Dict{String,Any}("value" => "value\\n","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/string-nl.json b/stdlib/TOML/test/testfiles/valid/string-nl.json
deleted file mode 100644
index 54a4a9831813e..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-nl.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
-    "nl_mid": {
-        "type": "string",
-        "value": "val\nue"
-    },
-    "nl_end": {
-        "type": "string",
-        "value": "value\n"
-    },
-    "lit_nl_end": {
-        "type": "string",
-        "value": "value\\n"
-    },
-    "lit_nl_mid": {
-        "type": "string",
-        "value": "val\\nue"
-    },
-    "lit_nl_uni": {
-        "type": "string",
-        "value": "val\\ue"
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/string-nl.toml b/stdlib/TOML/test/testfiles/valid/string-nl.toml
deleted file mode 100644
index 1e09a8bf78d68..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-nl.toml
+++ /dev/null
@@ -1,6 +0,0 @@
-nl_mid = "val\nue"
-nl_end = """value\n"""
-
-lit_nl_end = '''value\n'''
-lit_nl_mid = 'val\nue'
-lit_nl_uni = 'val\ue'
diff --git a/stdlib/TOML/test/testfiles/valid/string-simple.jl b/stdlib/TOML/test/testfiles/valid/string-simple.jl
deleted file mode 100644
index dbee3f00e38d9..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-simple.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("answer" => Dict{String,Any}("value" => "You are not drinking enough whisky.","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/string-simple.json b/stdlib/TOML/test/testfiles/valid/string-simple.json
deleted file mode 100644
index 2e05f99b4d181..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-simple.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "answer": {
-        "type": "string",
-        "value": "You are not drinking enough whisky."
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/string-simple.toml b/stdlib/TOML/test/testfiles/valid/string-simple.toml
deleted file mode 100644
index e17ade6237b7b..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-simple.toml
+++ /dev/null
@@ -1 +0,0 @@
-answer = "You are not drinking enough whisky."
diff --git a/stdlib/TOML/test/testfiles/valid/string-with-pound.jl b/stdlib/TOML/test/testfiles/valid/string-with-pound.jl
deleted file mode 100644
index 0acceceab6160..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-with-pound.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("pound" => Dict{String,Any}("value" => "We see no # comments here.","type" => "string"),"poundcomment" => Dict{String,Any}("value" => "But there are # some comments here.","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/string-with-pound.json b/stdlib/TOML/test/testfiles/valid/string-with-pound.json
deleted file mode 100644
index 33cdc9c4b58c8..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-with-pound.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "pound": {"type": "string", "value": "We see no # comments here."},
-    "poundcomment": {
-        "type": "string",
-        "value": "But there are # some comments here."
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/string-with-pound.toml b/stdlib/TOML/test/testfiles/valid/string-with-pound.toml
deleted file mode 100644
index 5fd87466dff05..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/string-with-pound.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-pound = "We see no # comments here."
-poundcomment = "But there are # some comments here." # Did I # mess you up?
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-implicit.jl b/stdlib/TOML/test/testfiles/valid/table-array-implicit.jl
deleted file mode 100644
index fc8c932d672e7..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-implicit.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("albums" => Dict{String,Any}("songs" => Any[Dict{String,Any}("name" => Dict{String,Any}("value" => "Glory Days","type" => "string"))]))
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-implicit.json b/stdlib/TOML/test/testfiles/valid/table-array-implicit.json
deleted file mode 100644
index 32e464012d63d..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-implicit.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "albums": {
-       "songs": [
-           {"name": {"type": "string", "value": "Glory Days"}}
-       ]
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-implicit.toml b/stdlib/TOML/test/testfiles/valid/table-array-implicit.toml
deleted file mode 100644
index 3157ac981d379..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-implicit.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[[albums.songs]]
-name = "Glory Days"
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-many.jl b/stdlib/TOML/test/testfiles/valid/table-array-many.jl
deleted file mode 100644
index c9b1c336003d2..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-many.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("people" => Any[Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bruce","type" => "string"),"last_name" => Dict{String,Any}("value" => "Springsteen","type" => "string")), Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Eric","type" => "string"),"last_name" => Dict{String,Any}("value" => "Clapton","type" => "string")), Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bob","type" => "string"),"last_name" => Dict{String,Any}("value" => "Seger","type" => "string"))])
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-many.json b/stdlib/TOML/test/testfiles/valid/table-array-many.json
deleted file mode 100644
index 84df2dabb0d6b..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-many.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "people": [
-        {
-            "first_name": {"type": "string", "value": "Bruce"},
-            "last_name": {"type": "string", "value": "Springsteen"}
-        },
-        {
-            "first_name": {"type": "string", "value": "Eric"},
-            "last_name": {"type": "string", "value": "Clapton"}
-        },
-        {
-            "first_name": {"type": "string", "value": "Bob"},
-            "last_name": {"type": "string", "value": "Seger"}
-        }
-    ]
-}
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-many.toml b/stdlib/TOML/test/testfiles/valid/table-array-many.toml
deleted file mode 100644
index 46062beb8e747..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-many.toml
+++ /dev/null
@@ -1,11 +0,0 @@
-[[people]]
-first_name = "Bruce"
-last_name = "Springsteen"
-
-[[people]]
-first_name = "Eric"
-last_name = "Clapton"
-
-[[people]]
-first_name = "Bob"
-last_name = "Seger"
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-nest.jl b/stdlib/TOML/test/testfiles/valid/table-array-nest.jl
deleted file mode 100644
index 68ef1c97f41a4..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-nest.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("albums" => Any[Dict{String,Any}("name" => Dict{String,Any}("value" => "Born to Run","type" => "string"),"songs" => Any[Dict{String,Any}("name" => Dict{String,Any}("value" => "Jungleland","type" => "string")), Dict{String,Any}("name" => Dict{String,Any}("value" => "Meeting Across the River","type" => "string"))]), Dict{String,Any}("name" => Dict{String,Any}("value" => "Born in the USA","type" => "string"),"songs" => Any[Dict{String,Any}("name" => Dict{String,Any}("value" => "Glory Days","type" => "string")), Dict{String,Any}("name" => Dict{String,Any}("value" => "Dancing in the Dark","type" => "string"))])])
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-nest.json b/stdlib/TOML/test/testfiles/valid/table-array-nest.json
deleted file mode 100644
index c117afa40d4d0..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-nest.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "albums": [
-        {
-            "name": {"type": "string", "value": "Born to Run"},
-            "songs": [
-                {"name": {"type": "string", "value": "Jungleland"}},
-                {"name": {"type": "string", "value": "Meeting Across the River"}}
-            ]
-        },
-        {
-            "name": {"type": "string", "value": "Born in the USA"},
-            "songs": [
-                {"name": {"type": "string", "value": "Glory Days"}},
-                {"name": {"type": "string", "value": "Dancing in the Dark"}}
-            ]
-        }
-    ]
-}
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-nest.toml b/stdlib/TOML/test/testfiles/valid/table-array-nest.toml
deleted file mode 100644
index ce3cae15dbadc..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-nest.toml
+++ /dev/null
@@ -1,17 +0,0 @@
-[[albums]]
-name = "Born to Run"
-
-  [[albums.songs]]
-  name = "Jungleland"
-
-  [[albums.songs]]
-  name = "Meeting Across the River"
-
-[[albums]]
-name = "Born in the USA"
-
-  [[albums.songs]]
-  name = "Glory Days"
-
-  [[albums.songs]]
-  name = "Dancing in the Dark"
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-one.jl b/stdlib/TOML/test/testfiles/valid/table-array-one.jl
deleted file mode 100644
index 830e3af323fc7..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-one.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("people" => Any[Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bruce","type" => "string"),"last_name" => Dict{String,Any}("value" => "Springsteen","type" => "string"))])
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-one.json b/stdlib/TOML/test/testfiles/valid/table-array-one.json
deleted file mode 100644
index d75faaeb23904..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-one.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-    "people": [
-        {
-            "first_name": {"type": "string", "value": "Bruce"},
-            "last_name": {"type": "string", "value": "Springsteen"}
-        }
-    ]
-}
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-one.toml b/stdlib/TOML/test/testfiles/valid/table-array-one.toml
deleted file mode 100644
index cd7e1b6907110..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-one.toml
+++ /dev/null
@@ -1,3 +0,0 @@
-[[people]]
-first_name = "Bruce"
-last_name = "Springsteen"
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-table-array.jl b/stdlib/TOML/test/testfiles/valid/table-array-table-array.jl
deleted file mode 100644
index d379c1d3daca7..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-table-array.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("a" => Any[Dict{String,Any}("b" => Any[Dict{String,Any}("c" => Dict{String,Any}("d" => Dict{String,Any}("value" => "val0","type" => "string"))), Dict{String,Any}("c" => Dict{String,Any}("d" => Dict{String,Any}("value" => "val1","type" => "string")))])])
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-table-array.json b/stdlib/TOML/test/testfiles/valid/table-array-table-array.json
deleted file mode 100644
index e5b7e0aab9e80..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-table-array.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-  "a": [
-    {
-      "b": [
-        { "c" : { "d": {"type": "string", "value": "val0" } } },
-        { "c" : { "d": {"type": "string", "value": "val1" } } }
-      ]
-    }
-  ]
-}
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-table-array.toml b/stdlib/TOML/test/testfiles/valid/table-array-table-array.toml
deleted file mode 100644
index a07b0c7fe3fdd..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-array-table-array.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[[a]]
-    [[a.b]]
-        [a.b.c]
-            d = "val0"
-    [[a.b]]
-        [a.b.c]
-            d = "val1"
diff --git a/stdlib/TOML/test/testfiles/valid/table-empty.jl b/stdlib/TOML/test/testfiles/valid/table-empty.jl
deleted file mode 100644
index a62b1dc36cdf3..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-empty.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("a" => Dict{String,Any}())
diff --git a/stdlib/TOML/test/testfiles/valid/table-empty.json b/stdlib/TOML/test/testfiles/valid/table-empty.json
deleted file mode 100644
index 6f3873af6b2f8..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-empty.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "a": {}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/table-empty.toml b/stdlib/TOML/test/testfiles/valid/table-empty.toml
deleted file mode 100644
index 8bb6a0aa07ea6..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-empty.toml
+++ /dev/null
@@ -1 +0,0 @@
-[a]
diff --git a/stdlib/TOML/test/testfiles/valid/table-no-eol.jl b/stdlib/TOML/test/testfiles/valid/table-no-eol.jl
deleted file mode 100644
index 4a103a5e13f54..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-no-eol.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("table" => Dict{String,Any}())
diff --git a/stdlib/TOML/test/testfiles/valid/table-no-eol.json b/stdlib/TOML/test/testfiles/valid/table-no-eol.json
deleted file mode 100644
index 11fa444073cfb..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-no-eol.json
+++ /dev/null
@@ -1 +0,0 @@
-{ "table": {} }
diff --git a/stdlib/TOML/test/testfiles/valid/table-no-eol.toml b/stdlib/TOML/test/testfiles/valid/table-no-eol.toml
deleted file mode 100644
index f1098fdacaa27..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-no-eol.toml
+++ /dev/null
@@ -1 +0,0 @@
-[table]
diff --git a/stdlib/TOML/test/testfiles/valid/table-sub-empty.jl b/stdlib/TOML/test/testfiles/valid/table-sub-empty.jl
deleted file mode 100644
index 448cd9237d7d0..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-sub-empty.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}()))
diff --git a/stdlib/TOML/test/testfiles/valid/table-sub-empty.json b/stdlib/TOML/test/testfiles/valid/table-sub-empty.json
deleted file mode 100644
index 97877708e6d9b..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-sub-empty.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "a": { "b": {} }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/table-sub-empty.toml b/stdlib/TOML/test/testfiles/valid/table-sub-empty.toml
deleted file mode 100644
index 70b7fe11c3d12..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-sub-empty.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[a]
-[a.b]
diff --git a/stdlib/TOML/test/testfiles/valid/table-whitespace.jl b/stdlib/TOML/test/testfiles/valid/table-whitespace.jl
deleted file mode 100644
index 1af4cc9cb98e8..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-whitespace.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("valid key" => Dict{String,Any}())
diff --git a/stdlib/TOML/test/testfiles/valid/table-whitespace.json b/stdlib/TOML/test/testfiles/valid/table-whitespace.json
deleted file mode 100644
index 3a73ec864537e..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-whitespace.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "valid key": {}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/table-whitespace.toml b/stdlib/TOML/test/testfiles/valid/table-whitespace.toml
deleted file mode 100644
index daf881d13a560..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-whitespace.toml
+++ /dev/null
@@ -1 +0,0 @@
-["valid key"]
diff --git a/stdlib/TOML/test/testfiles/valid/table-with-literal-string.jl b/stdlib/TOML/test/testfiles/valid/table-with-literal-string.jl
deleted file mode 100644
index 7157a1b75e6ea..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-with-literal-string.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("a" => Dict{String,Any}("\"b\"" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer")))))
diff --git a/stdlib/TOML/test/testfiles/valid/table-with-literal-string.json b/stdlib/TOML/test/testfiles/valid/table-with-literal-string.json
deleted file mode 100644
index 8f006b0e24747..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-with-literal-string.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-    "a": {
-        "\"b\"": {
-            "c": {
-                "answer": {"type": "integer", "value": "42"}
-            }
-        }
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/table-with-literal-string.toml b/stdlib/TOML/test/testfiles/valid/table-with-literal-string.toml
deleted file mode 100644
index 63d20a2c672bb..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-with-literal-string.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-['a']
-[a.'"b"']
-[a.'"b"'.c]
-answer = 42
diff --git a/stdlib/TOML/test/testfiles/valid/table-with-pound.jl b/stdlib/TOML/test/testfiles/valid/table-with-pound.jl
deleted file mode 100644
index d1c99bb09e8ab..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-with-pound.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("key#group" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer")))
diff --git a/stdlib/TOML/test/testfiles/valid/table-with-pound.json b/stdlib/TOML/test/testfiles/valid/table-with-pound.json
deleted file mode 100644
index 5e594e4191981..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-with-pound.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "key#group": {
-        "answer": {"type": "integer", "value": "42"}
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/table-with-pound.toml b/stdlib/TOML/test/testfiles/valid/table-with-pound.toml
deleted file mode 100644
index 33f2c4fd6cf02..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-with-pound.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-["key#group"]
-answer = 42
diff --git a/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.jl b/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.jl
deleted file mode 100644
index 5481705ddbc4e..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer")))))
diff --git a/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.json b/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.json
deleted file mode 100644
index fbae7fc71beff..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-    "a": {
-        "b": {
-            "c": {
-                "answer": {"type": "integer", "value": "42"}
-            }
-        }
-    }
-}
diff --git a/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.toml b/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.toml
deleted file mode 100644
index b04efcc02c3de..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-['a']
-[a.'b']
-[a.'b'.c]
-answer = 42
diff --git a/stdlib/TOML/test/testfiles/valid/underscored-float.jl b/stdlib/TOML/test/testfiles/valid/underscored-float.jl
deleted file mode 100644
index 420cefd96e481..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/underscored-float.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("electron_mass" => Dict{String,Any}("value" => "9.109109383e-31","type" => "float"))
diff --git a/stdlib/TOML/test/testfiles/valid/underscored-float.json b/stdlib/TOML/test/testfiles/valid/underscored-float.json
deleted file mode 100644
index 480109c200be9..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/underscored-float.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "electron_mass": {"type": "float", "value": "9.109109383e-31"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/underscored-float.toml b/stdlib/TOML/test/testfiles/valid/underscored-float.toml
deleted file mode 100644
index 025b02a177bce..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/underscored-float.toml
+++ /dev/null
@@ -1 +0,0 @@
-electron_mass = 9_109.109_383e-3_4
diff --git a/stdlib/TOML/test/testfiles/valid/underscored-integer.jl b/stdlib/TOML/test/testfiles/valid/underscored-integer.jl
deleted file mode 100644
index 4fb9d43398a9c..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/underscored-integer.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("million" => Dict{String,Any}("value" => "1000000","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/underscored-integer.json b/stdlib/TOML/test/testfiles/valid/underscored-integer.json
deleted file mode 100644
index 0804919f10a54..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/underscored-integer.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "million": {"type": "integer", "value": "1000000"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/underscored-integer.toml b/stdlib/TOML/test/testfiles/valid/underscored-integer.toml
deleted file mode 100644
index 6be8b5153794c..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/underscored-integer.toml
+++ /dev/null
@@ -1 +0,0 @@
-million = 1_000_000
diff --git a/stdlib/TOML/test/testfiles/valid/unicode-escape.jl b/stdlib/TOML/test/testfiles/valid/unicode-escape.jl
deleted file mode 100644
index d773bc04b9ce5..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/unicode-escape.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("answer8" => Dict{String,Any}("value" => "δ","type" => "string"),"answer4" => Dict{String,Any}("value" => "δ","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/unicode-escape.json b/stdlib/TOML/test/testfiles/valid/unicode-escape.json
deleted file mode 100644
index 216f8f7c9318a..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/unicode-escape.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "answer4": {"type": "string", "value": "\u03B4"},
-    "answer8": {"type": "string", "value": "\u03B4"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/unicode-escape.toml b/stdlib/TOML/test/testfiles/valid/unicode-escape.toml
deleted file mode 100644
index 82faecbfa5997..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/unicode-escape.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-answer4 = "\u03B4"
-answer8 = "\U000003B4"
diff --git a/stdlib/TOML/test/testfiles/valid/unicode-literal.jl b/stdlib/TOML/test/testfiles/valid/unicode-literal.jl
deleted file mode 100644
index 675b94774c343..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/unicode-literal.jl
+++ /dev/null
@@ -1 +0,0 @@
-Dict{String,Any}("answer" => Dict{String,Any}("value" => "δ","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/unicode-literal.json b/stdlib/TOML/test/testfiles/valid/unicode-literal.json
deleted file mode 100644
index 00aa2f8325ecb..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/unicode-literal.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "answer": {"type": "string", "value": "δ"}
-}
diff --git a/stdlib/TOML/test/testfiles/valid/unicode-literal.toml b/stdlib/TOML/test/testfiles/valid/unicode-literal.toml
deleted file mode 100644
index c65723ca1d273..0000000000000
--- a/stdlib/TOML/test/testfiles/valid/unicode-literal.toml
+++ /dev/null
@@ -1 +0,0 @@
-answer = "δ"
diff --git a/stdlib/TOML/test/toml_test.jl b/stdlib/TOML/test/toml_test.jl
index 45fbd20dbcdab..c22c01acf1870 100644
--- a/stdlib/TOML/test/toml_test.jl
+++ b/stdlib/TOML/test/toml_test.jl
@@ -5,11 +5,16 @@ using TOML
 using Test
 using Dates
 
+testfiles = get_data()
+
 const jsnval = Dict{String,Function}(
     "string" =>identity,
     "float"    => (s -> Base.parse(Float64, s)),
     "integer"  => (s -> Base.parse(Int64, s)),
-    "datetime" => (s -> Base.parse(DateTime, s, dateformat"yyyy-mm-ddTHH:MM:SSZ")),
+    "datetime" => (s -> Base.parse(DateTime, endswith(s, 'Z') ? chop(s) : s)),
+    "datetime-local" => (s -> Base.parse(DateTime, endswith(s, 'Z') ? chop(s) : s)),
+    "date-local" => (s -> Base.parse(DateTime, endswith(s, 'Z') ? chop(s) : s)),
+    "time-local" => (s -> Base.parse(Time, s)),
     "array"    => (a -> map(jsn2data, a)),
     "bool"     => (b -> b == "true")
 )
@@ -29,163 +34,125 @@ end
 # Valid #
 #########
 
-valid_test_folder = joinpath(@__DIR__, "testfiles", "valid")
-
 function check_valid(f)
-    fp = joinpath(valid_test_folder, f)
-    jsn = jsn2data(@eval include($fp * ".jl"))
-    tml = TOML.parsefile(fp * ".toml")
+    jsn = try jsn2data(@eval include($f * ".jl"))
+    # Some files cannot be represented with julias DateTime (timezones)
+    catch
+        return false
+    end
+    tml = TOML.tryparsefile(f * ".toml")
+    tml isa TOML.Internals.ParserError && return false
     return isequal(tml, jsn)
 end
 
 @testset "valid" begin
 
-@test check_valid("array-empty")
-@test check_valid("array-nospaces")
-@test check_valid("array-string-quote-comma-2")
-@test check_valid("array-string-quote-comma")
-@test check_valid("array-string-with-comma")
-@test check_valid("array-table-array-string-backslash")
-@test check_valid("arrays-hetergeneous")
-@test check_valid("arrays-nested")
-@test check_valid("arrays")
-@test check_valid("bool")
-@test check_valid("comments-at-eof")
-@test check_valid("comments-at-eof2")
-@test check_valid("comments-everywhere")
-@test_broken check_valid("datetime-timezone")
-@test_broken check_valid("datetime")
-@test check_valid("double-quote-escape")
-@test check_valid("empty")
-@test check_valid("escaped-escape")
-@test check_valid("example")
-@test check_valid("exponent-part-float")
-@test check_valid("float-exponent")
-@test check_valid("float-underscore")
-@test check_valid("float")
-@test check_valid("implicit-and-explicit-after")
-@test check_valid("implicit-and-explicit-before")
-@test check_valid("implicit-groups")
-@test check_valid("inline-table-array")
-@test check_valid("inline-table")
-@test check_valid("integer-underscore")
-@test check_valid("integer")
-@test check_valid("key-equals-nospace")
-@test check_valid("key-numeric")
-@test check_valid("key-space")
-@test check_valid("key-special-chars")
-@test check_valid("keys-with-dots")
-@test check_valid("long-float")
-@test check_valid("long-integer")
-@test check_valid("multiline-string")
-@test check_valid("nested-inline-table-array")
-@test check_valid("newline-crlf")
-@test check_valid("newline-lf")
-if Sys.iswindows() &&
-    # Sometimes git normalizes the line endings
-    contains(read(joinpath(valid_test_folder, "raw-multiline-string-win.toml"), String), '\r')
-    @test check_valid("raw-multiline-string-win")
-else
-    @test check_valid("raw-multiline-string")
+failures = [
+    "valid/spec-example-1.toml",
+    "valid/spec-example-1-compact.toml",
+    "valid/datetime/datetime.toml",
+    "valid/comment/everywhere.toml",
+    "valid/datetime/milliseconds.toml",
+    "valid/datetime/timezone.toml",
+    "valid/string/multiline-quotes.toml",
+    "valid/string/multiline.toml",
+    "valid/float/zero.toml", # this one has a buggy .json file
+    "valid/string/escape-esc.toml",
+]
+
+n_files_valid = 0
+valid_test_folder = joinpath(testfiles, "valid")
+for (root, dirs, files) in walkdir(valid_test_folder)
+    for f in files
+        if endswith(f, ".toml")
+            n_files_valid += 1
+            file = joinpath(root, f)
+            rel = relpath(file, testfiles)
+            if Sys.iswindows()
+                rel = replace(rel, '\\' => '/')
+            end
+            v = check_valid(splitext(file)[1])
+            if rel in failures
+                @test_broken v
+            else
+                @test v
+            end
+        end
+    end
 end
-@test check_valid("raw-string")
-@test check_valid("right-curly-brace-after-boolean")
-@test check_valid("string-empty")
-@test check_valid("string-escapes")
-@test check_valid("string-nl")
-@test check_valid("string-simple")
-@test check_valid("string-with-pound")
-@test check_valid("table-array-implicit")
-@test check_valid("table-array-many")
-@test check_valid("table-array-nest")
-@test check_valid("table-array-one")
-@test check_valid("table-array-table-array")
-@test check_valid("table-empty")
-@test check_valid("table-no-eol")
-@test check_valid("table-sub-empty")
-@test check_valid("table-whitespace")
-@test check_valid("table-with-literal-string")
-@test check_valid("table-with-pound")
-@test check_valid("table-with-single-quotes")
-@test check_valid("underscored-float")
-@test check_valid("underscored-integer")
-@test check_valid("unicode-escape")
-@test check_valid("unicode-literal")
+@test n_files_valid >= 100
 
-end
+end # testset
 
 
 ###########
 # Invalid #
 ###########
 
-invalid_test_folder = joinpath(@__DIR__, "testfiles", "invalid")
-
 # TODO: Check error type
 function check_invalid(f)
-    fp = joinpath(invalid_test_folder, f)
-    tml = TOML.tryparsefile(fp * ".toml")
+    tml = try TOML.tryparsefile(f)
+    catch
+        return false
+    end
     return tml isa TOML.Internals.ParserError
 end
 
-@test check_invalid("datetime-malformed-no-leads")
-@test check_invalid("datetime-malformed-no-secs")
-@test check_invalid("datetime-malformed-no-t")
-@test check_invalid("datetime-malformed-with-milli")
-@test check_invalid("duplicate-key-table")
-@test check_invalid("duplicate-keys")
-@test check_invalid("duplicate-tables")
-@test check_invalid("empty-implicit-table")
-@test check_invalid("empty-table")
-@test check_invalid("float-leading-zero-neg")
-@test check_invalid("float-leading-zero-pos")
-@test check_invalid("float-leading-zero")
-@test check_invalid("float-no-leading-zero")
-@test check_invalid("float-no-trailing-digits")
-@test check_invalid("float-underscore-after-point")
-@test check_invalid("float-underscore-after")
-@test check_invalid("float-underscore-before-point")
-@test check_invalid("float-underscore-before")
-@test check_invalid("inline-table-linebreak")
-@test check_invalid("integer-leading-zero-neg")
-@test check_invalid("integer-leading-zero-pos")
-@test check_invalid("integer-leading-zero")
-@test check_invalid("integer-underscore-after")
-@test check_invalid("integer-underscore-before")
-@test check_invalid("integer-underscore-double")
-@test check_invalid("key-after-array")
-@test check_invalid("key-after-table")
-@test check_invalid("key-empty")
-@test check_invalid("key-hash")
-@test check_invalid("key-newline")
-@test check_invalid("key-no-eol")
-@test check_invalid("key-open-bracket")
-@test check_invalid("key-single-open-bracket")
-@test check_invalid("key-space")
-@test check_invalid("key-start-bracket")
-@test check_invalid("key-two-equals")
-@test check_invalid("llbrace")
-@test check_invalid("multi-line-inline-table")
-@test check_invalid("multi-line-string-no-close")
-@test check_invalid("rrbrace")
-@test check_invalid("string-bad-byte-escape")
-@test check_invalid("string-bad-codepoint")
-@test check_invalid("string-bad-escape")
-@test check_invalid("string-bad-slash-escape")
-@test check_invalid("string-bad-uni-esc")
-@test check_invalid("string-byte-escapes")
-@test check_invalid("string-no-close")
-@test check_invalid("table-array-implicit")
-@test check_invalid("table-array-malformed-bracket")
-@test check_invalid("table-array-malformed-empty")
-@test check_invalid("table-empty")
-@test check_invalid("table-nested-brackets-close")
-@test check_invalid("table-nested-brackets-open")
-@test check_invalid("table-whitespace")
-@test check_invalid("table-with-pound")
-@test check_invalid("text-after-array-entries")
-@test check_invalid("text-after-integer")
-@test check_invalid("text-after-string")
-@test check_invalid("text-after-table")
-@test check_invalid("text-before-array-separator")
-@test check_invalid("text-in-array")
+@testset "invalid" begin
+
+failures = [
+    "invalid/control/bare-cr.toml",
+    "invalid/control/comment-del.toml",
+    "invalid/control/comment-lf.toml",
+    "invalid/control/comment-null.toml",
+    "invalid/control/comment-us.toml",
+    "invalid/control/comment-cr.toml",
+    "invalid/datetime/time-no-leads.toml",
+    "invalid/control/multi-del.toml",
+    "invalid/control/multi-lf.toml",
+    "invalid/control/multi-null.toml",
+    "invalid/control/multi-us.toml",
+    "invalid/control/rawmulti-del.toml",
+    "invalid/control/rawmulti-lf.toml",
+    "invalid/control/rawmulti-null.toml",
+    "invalid/control/rawmulti-us.toml",
+    "invalid/control/rawstring-del.toml",
+    "invalid/control/rawstring-lf.toml",
+    "invalid/control/rawstring-null.toml",
+    "invalid/control/rawstring-us.toml",
+    "invalid/control/string-bs.toml",
+    "invalid/control/string-del.toml",
+    "invalid/control/string-lf.toml",
+    "invalid/control/string-null.toml",
+    "invalid/control/string-us.toml",
+    "invalid/encoding/bad-utf8-in-comment.toml",
+    "invalid/encoding/bad-utf8-in-string.toml",
+    "invalid/key/multiline.toml",
+    "invalid/table/append-with-dotted-keys-2.toml",
+    "invalid/table/duplicate-key-dotted-table.toml",
+    "invalid/table/duplicate-key-dotted-table2.toml",
+]
+
+n_invalid = 0
+invalid_test_folder = joinpath(testfiles, "invalid")
+for (root, dirs, files) in walkdir(invalid_test_folder)
+    for f in files
+        if endswith(f, ".toml")
+            n_invalid += 1
+            file = joinpath(root, f)
+            rel = relpath(file, testfiles)
+            if Sys.iswindows()
+                rel = replace(rel, '\\' => '/')
+            end
+            v = check_invalid(file)
+            if rel in failures
+                @test_broken v
+            else
+                @test v
+            end
+        end
+    end
+end
+@test n_invalid > 50
+
+end # testset
diff --git a/stdlib/TOML/test/utils/convert_json_to_jl.jl b/stdlib/TOML/test/utils/convert_json_to_jl.jl
deleted file mode 100644
index 00d4fac69084b..0000000000000
--- a/stdlib/TOML/test/utils/convert_json_to_jl.jl
+++ /dev/null
@@ -1,19 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# This converts the ground-truth JSON files to the Julia repr format so
-# we can use that without requiring a JSON parser during testing.
-
-using JSON
-
-const testfiles =  joinpath(@__DIR__, "..", "testfiles")
-
-function convert_json_files()
-    for folder in ("invalid", "valid")
-        for file in readdir(joinpath(testfiles, folder); join=true)
-            endswith(file, ".json") || continue
-            d_json = open(JSON.parse, file)
-            d_jl = repr(d_json)
-            write(splitext(file)[1] * ".jl", d_jl)
-        end
-    end
-end
diff --git a/stdlib/TOML/test/utils/utils.jl b/stdlib/TOML/test/utils/utils.jl
new file mode 100644
index 0000000000000..c484a61cee25a
--- /dev/null
+++ b/stdlib/TOML/test/utils/utils.jl
@@ -0,0 +1,39 @@
+# This converts the ground-truth JSON files to the Julia repr format so
+# we can use that without requiring a JSON parser during testing.
+
+using Downloads
+using Tar
+using p7zip_jll
+
+const url = "https://github.com/KristofferC/toml-test-julia/archive/refs/tags/v1.2.0.tar.gz"
+const tarname = basename(url)
+const version = lstrip(split(tarname, ".tar.gz")[1], 'v')
+
+# From Pkg
+function exe7z()
+    # If the JLL is available, use the wrapper function defined in there
+    if p7zip_jll.is_available()
+        return p7zip_jll.p7zip()
+    end
+    return Cmd([find7z()])
+end
+
+function find7z()
+    name = "7z"
+    Sys.iswindows() && (name = "$name.exe")
+    for dir in (joinpath("..", "libexec"), ".")
+        path = normpath(Sys.BINDIR::String, dir, name)
+        isfile(path) && return path
+    end
+    path = Sys.which(name)
+    path !== nothing && return path
+    error("7z binary not found")
+end
+
+function get_data()
+    tmp = mktempdir()
+    path = joinpath(tmp, basename(url))
+    Downloads.download(url, path)
+    Tar.extract(`$(exe7z()) x $path -so`, joinpath(tmp, "testfiles"))
+    return joinpath(tmp, "testfiles", "toml-test-julia-$version", "testfiles")
+end
diff --git a/stdlib/TOML/test/values.jl b/stdlib/TOML/test/values.jl
index 8337bb5a54714..be2ed3acce5b5 100644
--- a/stdlib/TOML/test/values.jl
+++ b/stdlib/TOML/test/values.jl
@@ -23,8 +23,6 @@ end
     @test failval("00.0"                 , Internals.ErrParsingDateTime)
     @test failval("-00.0"                , Internals.ErrParsingDateTime)
     @test failval("+00.0"                , Internals.ErrParsingDateTime)
-    @test failval("9223372036854775808"  , Internals.ErrOverflowError)
-    @test failval("-9223372036854775809" , Internals.ErrOverflowError)
 
     @test failval("0."        , Internals.ErrNoTrailingDigitAfterDot)
     @test failval("0.e"       , Internals.ErrNoTrailingDigitAfterDot)
@@ -54,6 +52,30 @@ end
     @test testval("+1_000" , 1000  |> Int64)
     @test testval("-1_000" , -1000 |> Int64)
 
+    @test testval("0x6E", 0x6E|> UInt64)
+    @test testval("0x8f1e", 0x8f1e|> UInt64)
+    @test testval("0x765f3173", 0x765f3173|> UInt64)
+    @test testval("0xc13b830a807cc7f4", 0xc13b830a807cc7f4|> UInt64)
+    @test testval("0x937efe_0a4241_edb24a04b97bd90ef363", 0x937efe0a4241edb24a04b97bd90ef363 |> UInt128)
+
+    @test testval("0o140", 0o140 |> UInt64) # UInt8
+    @test testval("0o46244", 0o46244 |> UInt64) # UInt16
+    @test testval("0o32542120656", 0o32542120656 |> UInt64) # UInt32
+    @test testval("0o1526535761042630654411", 0o1526535761042630654411 |> UInt64) # UInt64
+    @test testval("0o3467204325743773607311464533371572447656531", 0o3467204325743773607311464533371572447656531 |> UInt128) # UInt128
+    @test testval("0o34672043257437736073114645333715724476565312", 0o34672043257437736073114645333715724476565312 |> BigInt) # BigInt
+
+    @test testval("0b10001010",0b10001010 |> UInt64) # UInt8
+    @test testval("0b11111010001100",0b11111010001100 |> UInt64) # UInt16
+    @test testval("0b11100011110000010101000010101",0b11100011110000010101000010101 |> UInt64) # UInt32
+    @test testval("0b10000110100111011010001000000111110110000011111101101110011011",0b10000110100111011010001000000111110110000011111101101110011011 |> UInt64) # UInt64
+    @test testval(
+        "0b1101101101101100110001010110111011101000111010101110011000011100110100101111110001010001011001000001000001010010011101100100111",
+        0b1101101101101100110001010110111011101000111010101110011000011100110100101111110001010001011001000001000001010010011101100100111 |> UInt128) # UInt128
+    @test testval(
+        "0b110110110110110011000101011011101110100011101010111001100001110011010010111111000101000101100100000100000101001001110110010011111",
+        0b110110110110110011000101011011101110100011101010111001100001110011010010111111000101000101100100000100000101001001110110010011111 |> BigInt) # BigInt
+
     @test failval("0_"     , Internals.ErrUnderscoreNotSurroundedByDigits)
     @test failval("0__0"   , Internals.ErrUnderscoreNotSurroundedByDigits)
     @test failval("__0"    , Internals.ErrUnexpectedStartOfValue)
diff --git a/stdlib/Tar.version b/stdlib/Tar.version
index 7ba08fd461f88..f1c361eff972e 100644
--- a/stdlib/Tar.version
+++ b/stdlib/Tar.version
@@ -1,4 +1,4 @@
 TAR_BRANCH = master
-TAR_SHA1 = 56062695b92920c8b75e997fb0c8c3b015d04b78
+TAR_SHA1 = 81888a33704b233a2ad6f82f84456a1dd82c87f0
 TAR_GIT_URL := https://github.com/JuliaIO/Tar.jl.git
 TAR_TAR_URL = https://api.github.com/repos/JuliaIO/Tar.jl/tarball/$1
diff --git a/stdlib/Test/Project.toml b/stdlib/Test/Project.toml
index ee1ae15fd7154..f04b4f976196f 100644
--- a/stdlib/Test/Project.toml
+++ b/stdlib/Test/Project.toml
@@ -1,5 +1,6 @@
 name = "Test"
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+version = "1.11.0"
 
 [deps]
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
diff --git a/stdlib/Test/docs/src/index.md b/stdlib/Test/docs/src/index.md
index 077d350554775..99f1a93286a37 100644
--- a/stdlib/Test/docs/src/index.md
+++ b/stdlib/Test/docs/src/index.md
@@ -20,7 +20,7 @@ The `Test` module provides simple *unit testing* functionality. Unit testing is
 see if your code is correct by checking that the results are what you expect. It can be helpful
 to ensure your code still works after you make changes, and can be used when developing as a way
 of specifying the behaviors your code should have when complete. You may also want to look at the
-documentation for [adding tests to your Julia Package](https://pkgdocs.julialang.org/dev/creating-packages/#Adding-tests-to-the-package).
+documentation for [adding tests to your Julia Package](@ref adding-tests-to-packages).
 
 Simple unit testing can be performed with the `@test` and `@test_throws` macros:
 
@@ -55,6 +55,7 @@ julia> @test foo("f") == 20
 Test Failed at none:1
   Expression: foo("f") == 20
    Evaluated: 1 == 20
+
 ERROR: There was an error during testing
 ```
 
@@ -224,6 +225,7 @@ julia> @test 1 ≈ 0.999999
 Test Failed at none:1
   Expression: 1 ≈ 0.999999
    Evaluated: 1 ≈ 0.999999
+
 ERROR: There was an error during testing
 ```
 You can specify relative and absolute tolerances by setting the `rtol` and `atol` keyword arguments of `isapprox`, respectively,
@@ -258,6 +260,16 @@ in the test set reporting. The test will not run but gives a `Broken` `Result`.
 Test.@test_skip
 ```
 
+## Test result types
+
+```@docs
+Test.Result
+Test.Pass
+Test.Fail
+Test.Error
+Test.Broken
+```
+
 ## Creating Custom `AbstractTestSet` Types
 
 Packages can create their own `AbstractTestSet` subtypes by implementing the `record` and `finish`
@@ -332,6 +344,162 @@ Test.detect_ambiguities
 Test.detect_unbound_args
 ```
 
+## Workflow for Testing Packages
+
+Using the tools available to us in the previous sections, here is a potential workflow of creating a package and adding tests to it.
+
+### Generating an Example Package
+
+For this workflow, we will create a package called `Example`:
+
+```julia
+pkg> generate Example
+shell> cd Example
+shell> mkdir test
+pkg> activate .
+```
+
+### Creating Sample Functions
+
+The number one requirement for testing a package is to have functionality to test.
+For that, we will add some simple functions to `Example` that we can test.
+Add the following to `src/Example.jl`:
+
+```julia
+module Example
+
+function greet()
+    "Hello world!"
+end
+
+function simple_add(a, b)
+    a + b
+end
+
+function type_multiply(a::Float64, b::Float64)
+    a * b
+end
+
+export greet, simple_add, type_multiply
+
+end
+```
+
+### Creating a Test Environment
+
+From within the root of the `Example` package, navigate to the `test` directory, activate a new environment there, and add the `Test` package to the environment:
+
+```julia
+shell> cd test
+pkg> activate .
+(test) pkg> add Test
+```
+
+### Testing Our Package
+
+Now, we are ready to add tests to `Example`.
+It is standard practice to create a file within the `test` directory called `runtests.jl` which contains the test sets we want to run.
+Go ahead and create that file within the `test` directory and add the following code to it:
+
+```julia
+using Example
+using Test
+
+@testset "Example tests" begin
+
+    @testset "Math tests" begin
+        include("math_tests.jl")
+    end
+
+    @testset "Greeting tests" begin
+        include("greeting_tests.jl")
+    end
+end
+```
+
+We will need to create those two included files, `math_tests.jl` and `greeting_tests.jl`, and add some tests to them.
+
+> **Note:** Notice how we did not have to specify add `Example` into the `test` environment's `Project.toml`.
+> This is a benefit of Julia's testing system that you could [read about more here](@ref adding-tests-to-packages).
+
+#### Writing Tests for `math_tests.jl`
+
+Using our knowledge of `Test.jl`, here are some example tests we could add to `math_tests.jl`:
+
+```julia
+@testset "Testset 1" begin
+    @test 2 == simple_add(1, 1)
+    @test 3.5 == simple_add(1, 2.5)
+        @test_throws MethodError simple_add(1, "A")
+        @test_throws MethodError simple_add(1, 2, 3)
+end
+
+@testset "Testset 2" begin
+    @test 1.0 == type_multiply(1.0, 1.0)
+        @test isa(type_multiply(2.0, 2.0), Float64)
+    @test_throws MethodError type_multiply(1, 2.5)
+end
+```
+
+#### Writing Tests for `greeting_tests.jl`
+
+Using our knowledge of `Test.jl`, here are some example tests we could add to `greeting_tests.jl`:
+
+```julia
+@testset "Testset 3" begin
+    @test "Hello world!" == greet()
+    @test_throws MethodError greet("Antonia")
+end
+```
+
+### Testing Our Package
+
+Now that we have added our tests and our `runtests.jl` script in `test`, we can test our `Example` package by going back to the root of the `Example` package environment and reactivating the `Example` environment:
+
+```julia
+shell> cd ..
+pkg> activate .
+```
+
+From there, we can finally run our test suite as follows:
+
+```julia
+(Example) pkg> test
+     Testing Example
+      Status `/tmp/jl_Yngpvy/Project.toml`
+  [fa318bd2] Example v0.1.0 `/home/src/Projects/tmp/errata/Example`
+  [8dfed614] Test `@stdlib/Test`
+      Status `/tmp/jl_Yngpvy/Manifest.toml`
+  [fa318bd2] Example v0.1.0 `/home/src/Projects/tmp/errata/Example`
+  [2a0f44e3] Base64 `@stdlib/Base64`
+  [b77e0a4c] InteractiveUtils `@stdlib/InteractiveUtils`
+  [56ddb016] Logging `@stdlib/Logging`
+  [d6f4376e] Markdown `@stdlib/Markdown`
+  [9a3f8284] Random `@stdlib/Random`
+  [ea8e919c] SHA `@stdlib/SHA`
+  [9e88b42a] Serialization `@stdlib/Serialization`
+  [8dfed614] Test `@stdlib/Test`
+     Testing Running tests...
+Test Summary: | Pass  Total
+Example tests |    9      9
+     Testing Example tests passed
+```
+
+And if all went correctly, you should see a similar output as above.
+Using `Test.jl`, more complicated tests can be added for packages but this should ideally point developers in the direction of how to get started with testing their own created packages.
+
 ```@meta
 DocTestSetup = nothing
 ```
+
+### Code Coverage
+
+Code coverage tracking during tests can be enabled using the `pkg> test --coverage` flag (or at a lower level using the
+[`--code-coverage`](@ref command-line-interface) julia arg). This is on by default in the
+[julia-runtest](https://github.com/julia-actions/julia-runtest) GitHub action.
+
+To evaluate coverage either manually inspect the `.cov` files that are generated beside the source files locally,
+or in CI use the [julia-processcoverage](https://github.com/julia-actions/julia-processcoverage) GitHub action.
+
+!!! compat "Julia 1.11"
+    Since Julia 1.11, coverage is not collected during the package precompilation phase.
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index 9a080812dbc45..9742975d7c464 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -27,7 +27,7 @@ export TestLogger, LogRecord
 using Random
 using Random: AbstractRNG, default_rng
 using InteractiveUtils: gen_call_with_extracted_types
-using Base: typesplit
+using Base: typesplit, remove_linenums!
 using Serialization: Serialization
 
 const DISPLAY_FAILED = (
@@ -47,30 +47,64 @@ const FAIL_FAST = Ref{Bool}(false)
 
 # Backtrace utility functions
 function ip_has_file_and_func(ip, file, funcs)
-    return any(fr -> (string(fr.file) == file && fr.func in funcs), StackTraces.lookup(ip))
+    return any(fr -> (in_file(fr, file) && fr.func in funcs), StackTraces.lookup(ip))
 end
+in_file(frame, file) = string(frame.file) == file
 
-function scrub_backtrace(bt)
+function test_location(bt, file_ts, file_t)
+    if (isnothing(file_ts) || isnothing(file_t))
+        return macrocall_location(bt, something(file_ts, @__FILE__))
+    else
+        return test_callsite(bt, file_ts, file_t)
+    end
+end
+
+function test_callsite(bt, file_ts, file_t)
+    # We avoid duplicate calls to `StackTraces.lookup`, as it is an expensive call.
+    # For that, we retrieve locations from lower to higher stack elements
+    # and only traverse parts of the backtrace which we haven't traversed before.
+    # The order will always be <internal functions> -> `@test` -> `@testset`.
+    internal = @something(macrocall_location(bt, @__FILE__), return nothing)
+    test = internal - 1 + @something(findfirst(ip -> any(frame -> in_file(frame, file_t), StackTraces.lookup(ip)), @view bt[internal:end]), return nothing)
+    testset = test - 1 + @something(macrocall_location(@view(bt[test:end]), file_ts), return nothing)
+
+    # If stacktrace locations differ, include frames until the `@testset` appears.
+    test != testset && return testset
+    # `@test` and `@testset` occurred at the same stacktrace location.
+    # This may happen if `@test` occurred directly in scope of the testset,
+    # or if `@test` occurred in a function that has been inlined in the testset.
+    frames = StackTraces.lookup(bt[testset])
+    outer_frame = findfirst(frame -> in_file(frame, file_ts) && frame.func == Symbol("macro expansion"), frames)
+    isnothing(outer_frame) && return nothing
+    # The `@test` call occurred directly in scope of a `@testset`.
+    # The __source__ from `@test` will be printed in the test message upon failure.
+    # There is no need to include more frames, but always include at least the internal macrocall location in the stacktrace.
+    in_file(frames[outer_frame], file_t) && return internal
+    # The `@test` call was inlined, so we still need to include the callsite.
+    return testset
+end
+
+macrocall_location(bt, file) = findfirst(ip -> ip_has_file_and_func(ip, file, (Symbol("macro expansion"),)), bt)
+
+function scrub_backtrace(bt, file_ts, file_t)
     do_test_ind = findfirst(ip -> ip_has_file_and_func(ip, @__FILE__, (:do_test, :do_test_throws)), bt)
     if do_test_ind !== nothing && length(bt) > do_test_ind
         bt = bt[do_test_ind + 1:end]
     end
-    name_ind = findfirst(ip -> ip_has_file_and_func(ip, @__FILE__, (Symbol("macro expansion"),)), bt)
-    if name_ind !== nothing && length(bt) != 0
-        bt = bt[1:name_ind]
-    end
+    stop_at = test_location(bt, file_ts, file_t)
+    !isnothing(stop_at) && !isempty(bt) && return bt[1:stop_at]
     return bt
 end
 
-function scrub_exc_stack(stack)
-    return Any[ (x[1], scrub_backtrace(x[2]::Vector{Union{Ptr{Nothing},Base.InterpreterIP}})) for x in stack ]
+function scrub_exc_stack(stack, file_ts, file_t)
+    return Any[ (x[1], scrub_backtrace(x[2]::Vector{Union{Ptr{Nothing},Base.InterpreterIP}}, file_ts, file_t)) for x in stack ]
 end
 
 # define most of the test infrastructure without type specialization
 @nospecialize
 
 """
-    Result
+    Test.Result
 
 All tests produce a result object. This object may or may not be
 stored, depending on whether the test is part of a test set.
@@ -78,7 +112,7 @@ stored, depending on whether the test is part of a test set.
 abstract type Result end
 
 """
-    Pass
+    Test.Pass <: Test.Result
 
 The test condition was true, i.e. the expression evaluated to true or
 the correct exception was thrown.
@@ -108,7 +142,7 @@ function Base.show(io::IO, t::Pass)
 end
 
 """
-    Fail
+    Test.Fail <: Test.Result
 
 The test condition was false, i.e. the expression evaluated to false or
 the correct exception was not thrown.
@@ -118,18 +152,26 @@ struct Fail <: Result
     orig_expr::String
     data::Union{Nothing, String}
     value::String
+    context::Union{Nothing, String}
     source::LineNumberNode
     message_only::Bool
-    function Fail(test_type::Symbol, orig_expr, data, value, source::LineNumberNode, message_only::Bool=false)
+    backtrace::Union{Nothing, String}
+    function Fail(test_type::Symbol, orig_expr, data, value, context, source::LineNumberNode, message_only::Bool, backtrace=nothing)
         return new(test_type,
             string(orig_expr),
             data === nothing ? nothing : string(data),
             string(isa(data, Type) ? typeof(value) : value),
+            context,
             source,
-            message_only)
+            message_only,
+            backtrace)
     end
 end
 
+# Deprecated fallback constructor without `context` argument (added in Julia 1.9). Remove in Julia 2.0.
+Fail(test_type::Symbol, orig_expr, data, value, source::LineNumberNode, message_only::Bool=false) =
+    Fail(test_type, orig_expr, data, value, nothing, source, message_only)
+
 function Base.show(io::IO, t::Fail)
     printstyled(io, "Test Failed"; bold=true, color=Base.error_color())
     print(io, " at ")
@@ -144,20 +186,31 @@ function Base.show(io::IO, t::Fail)
         else
             print(io, "\n    Expected: ", data)
             print(io, "\n      Thrown: ", value)
+            print(io, "\n")
+            if t.backtrace !== nothing
+                # Capture error message and indent to match
+                join(io, ("      " * line for line in split(t.backtrace, "\n")), "\n")
+            end
         end
     elseif t.test_type === :test_throws_nothing
         # An exception was expected, but no exception was thrown
         print(io, "\n    Expected: ", data)
         print(io, "\n  No exception thrown")
-    elseif t.test_type === :test && data !== nothing
-        # The test was an expression, so display the term-by-term
-        # evaluated version as well
-        print(io, "\n   Evaluated: ", data)
+    elseif t.test_type === :test
+        if data !== nothing
+            # The test was an expression, so display the term-by-term
+            # evaluated version as well
+            print(io, "\n   Evaluated: ", data)
+        end
+        if t.context !== nothing
+            print(io, "\n     Context: ", t.context)
+        end
     end
+    println(io) # add some visual space to separate sequential failures
 end
 
 """
-    Error
+    Test.Error <: Test.Result
 
 The test condition couldn't be evaluated due to an exception, or
 it evaluated to something other than a [`Bool`](@ref).
@@ -173,7 +226,7 @@ struct Error <: Result
 
     function Error(test_type::Symbol, orig_expr, value, bt, source::LineNumberNode)
         if test_type === :test_error
-            bt = scrub_exc_stack(bt)
+            bt = scrub_exc_stack(bt, nothing, extract_file(source))
         end
         if test_type === :test_error || test_type === :nontest_error
             bt_str = try # try the latest world for this, since we might have eval'd new code for show
@@ -238,7 +291,7 @@ function Base.show(io::IO, t::Error)
 end
 
 """
-    Broken
+    Test.Broken <: Test.Result
 
 The test condition is the expected (failed) result of a broken test,
 or was explicitly skipped with `@test_skip`.
@@ -454,19 +507,20 @@ macro test(ex, kws...)
 
     # Build the test expression
     test_expr!("@test", ex, kws...)
-    orig_ex = Expr(:inert, ex)
 
     result = get_test_result(ex, __source__)
 
-    return quote
+    ex = Expr(:inert, ex)
+    result = quote
         if $(length(skip) > 0 && esc(skip[1]))
-            record(get_testset(), Broken(:skipped, $orig_ex))
+            record(get_testset(), Broken(:skipped, $ex))
         else
             let _do = $(length(broken) > 0 && esc(broken[1])) ? do_broken_test : do_test
-                _do($result, $orig_ex)
+                _do($result, $ex)
             end
         end
     end
+    return result
 end
 
 """
@@ -494,10 +548,10 @@ Test Broken
 """
 macro test_broken(ex, kws...)
     test_expr!("@test_broken", ex, kws...)
-    orig_ex = Expr(:inert, ex)
     result = get_test_result(ex, __source__)
     # code to call do_test with execution result and original expr
-    :(do_broken_test($result, $orig_ex))
+    ex = Expr(:inert, ex)
+    return :(do_broken_test($result, $ex))
 end
 
 """
@@ -524,9 +578,9 @@ Test Broken
 """
 macro test_skip(ex, kws...)
     test_expr!("@test_skip", ex, kws...)
-    orig_ex = Expr(:inert, ex)
-    testres = :(Broken(:skipped, $orig_ex))
-    :(record(get_testset(), $testres))
+    ex = Expr(:inert, ex)
+    testres = :(Broken(:skipped, $ex))
+    return :(record(get_testset(), $testres))
 end
 
 # An internal function, called by the code generated by the @test
@@ -614,7 +668,8 @@ function get_test_result(ex, source)
             $negate,
         ))
     else
-        testret = :(Returned($(esc(orig_ex)), nothing, $(QuoteNode(source))))
+        ex = Expr(:block, source, esc(orig_ex))
+        testret = :(Returned($ex, nothing, $(QuoteNode(source))))
     end
     result = quote
         try
@@ -624,7 +679,6 @@ function get_test_result(ex, source)
             Threw(_e, Base.current_exceptions(), $(QuoteNode(source)))
         end
     end
-    Base.remove_linenums!(result)
     result
 end
 
@@ -643,7 +697,7 @@ function do_test(result::ExecutionResult, orig_expr)
         testres = if isa(value, Bool)
             # a true value Passes
             value ? Pass(:test, orig_expr, result.data, value, result.source) :
-                    Fail(:test, orig_expr, result.data, value, result.source)
+                    Fail(:test, orig_expr, result.data, value, nothing, result.source, false)
         else
             # If the result is non-Boolean, this counts as an Error
             Error(:test_nonbool, orig_expr, value, nothing, result.source)
@@ -663,8 +717,13 @@ function do_broken_test(result::ExecutionResult, orig_expr)
     # Assume the test is broken and only change if the result is true
     if isa(result, Returned)
         value = result.value
-        if isa(value, Bool) && value
-            testres = Error(:test_unbroken, orig_expr, value, nothing, result.source)
+        if isa(value, Bool)
+            if value
+                testres = Error(:test_unbroken, orig_expr, value, nothing, result.source)
+            end
+        else
+            # If the result is non-Boolean, this counts as an Error
+            testres = Error(:test_nonbool, orig_expr, value, nothing, result.source)
         end
     end
     record(get_testset(), testres)
@@ -682,6 +741,9 @@ a matching function,
 or a value (which will be tested for equality by comparing fields).
 Note that `@test_throws` does not support a trailing keyword form.
 
+!!! compat "Julia 1.8"
+    The ability to specify anything other than a type or a value as `exception` requires Julia v1.8 or later.
+
 # Examples
 ```jldoctest
 julia> @test_throws BoundsError [1, 2, 3][4]
@@ -694,7 +756,7 @@ Test Passed
 
 julia> @test_throws "Try sqrt(Complex" sqrt(-1)
 Test Passed
-     Message: "DomainError with -1.0:\\nsqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x))."
+     Message: "DomainError with -1.0:\\nsqrt was called with a negative real argument but will only return a complex result if called with a complex argument. Try sqrt(Complex(x))."
 ```
 
 In the final example, instead of matching a single string it could alternatively have been performed with:
@@ -705,18 +767,18 @@ In the final example, instead of matching a single string it could alternatively
 """
 macro test_throws(extype, ex)
     orig_ex = Expr(:inert, ex)
+    ex = Expr(:block, __source__, esc(ex))
     result = quote
         try
-            Returned($(esc(ex)), nothing, $(QuoteNode(__source__)))
+            Returned($ex, nothing, $(QuoteNode(__source__)))
         catch _e
             if $(esc(extype)) != InterruptException && _e isa InterruptException
                 rethrow()
             end
-            Threw(_e, nothing, $(QuoteNode(__source__)))
+            Threw(_e, Base.current_exceptions(), $(QuoteNode(__source__)))
         end
     end
-    Base.remove_linenums!(result)
-    :(do_test_throws($result, $orig_ex, $(esc(extype))))
+    return :(do_test_throws($result, $orig_ex, $(esc(extype))))
 end
 
 const MACROEXPAND_LIKE = Symbol.(("@macroexpand", "@macroexpand1", "macroexpand"))
@@ -770,10 +832,25 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype)
         if success
             testres = Pass(:test_throws, orig_expr, extype, exc, result.source, message_only)
         else
-            testres = Fail(:test_throws_wrong, orig_expr, extype, exc, result.source, message_only)
+            if result.backtrace !== nothing
+                bt = scrub_exc_stack(result.backtrace, nothing, extract_file(result.source))
+                bt_str = try # try the latest world for this, since we might have eval'd new code for show
+                    Base.invokelatest(sprint, Base.show_exception_stack, bt; context=stdout)
+                catch ex
+                    "#=ERROR showing exception stack=# " *
+                        try
+                            sprint(Base.showerror, ex, catch_backtrace(); context=stdout)
+                        catch
+                            "of type " * string(typeof(ex))
+                        end
+                end
+            else
+                bt_str = nothing
+            end
+            testres = Fail(:test_throws_wrong, orig_expr, extype, exc, nothing, result.source, message_only, bt_str)
         end
     else
-        testres = Fail(:test_throws_nothing, orig_expr, extype, nothing, result.source)
+        testres = Fail(:test_throws_nothing, orig_expr, extype, nothing, nothing, result.source, false)
     end
     record(get_testset(), testres)
 end
@@ -949,6 +1026,33 @@ finish(ts::FallbackTestSet) = ts
 
 #-----------------------------------------------------------------------
 
+"""
+    ContextTestSet
+
+Passes test failures through to the parent test set, while adding information
+about a context object that is being tested.
+"""
+struct ContextTestSet <: AbstractTestSet
+    parent_ts::AbstractTestSet
+    context_name::Union{Symbol, Expr}
+    context::Any
+end
+
+function ContextTestSet(name::Union{Symbol, Expr}, @nospecialize(context))
+    if (name isa Expr) && (name.head != :tuple)
+        error("Invalid syntax: $(name)")
+    end
+    return ContextTestSet(get_testset(), name, context)
+end
+record(c::ContextTestSet, t) = record(c.parent_ts, t)
+function record(c::ContextTestSet, t::Fail)
+    context = string(c.context_name, " = ", c.context)
+    context = t.context === nothing ? context : string(t.context, "\n              ", context)
+    record(c.parent_ts, Fail(t.test_type, t.orig_expr, t.data, t.value, context, t.source, t.message_only))
+end
+
+#-----------------------------------------------------------------------
+
 """
     DefaultTestSet
 
@@ -966,8 +1070,9 @@ mutable struct DefaultTestSet <: AbstractTestSet
     time_start::Float64
     time_end::Union{Float64,Nothing}
     failfast::Bool
+    file::Union{String,Nothing}
 end
-function DefaultTestSet(desc::AbstractString; verbose::Bool = false, showtiming::Bool = true, failfast::Union{Nothing,Bool} = nothing)
+function DefaultTestSet(desc::AbstractString; verbose::Bool = false, showtiming::Bool = true, failfast::Union{Nothing,Bool} = nothing, source = nothing)
     if isnothing(failfast)
         # pass failfast state into child testsets
         parent_ts = get_testset()
@@ -977,8 +1082,11 @@ function DefaultTestSet(desc::AbstractString; verbose::Bool = false, showtiming:
             failfast = false
         end
     end
-    return DefaultTestSet(String(desc)::String, [], 0, false, verbose, showtiming, time(), nothing, failfast)
+    return DefaultTestSet(String(desc)::String, [], 0, false, verbose, showtiming, time(), nothing, failfast, extract_file(source))
 end
+extract_file(source::LineNumberNode) = extract_file(source.file)
+extract_file(file::Symbol) = string(file)
+extract_file(::Nothing) = nothing
 
 struct FailFastError <: Exception end
 
@@ -989,14 +1097,14 @@ record(ts::DefaultTestSet, t::Pass) = (ts.n_passed += 1; t)
 
 # For the other result types, immediately print the error message
 # but do not terminate. Print a backtrace.
-function record(ts::DefaultTestSet, t::Union{Fail, Error})
-    if TESTSET_PRINT_ENABLE[]
+function record(ts::DefaultTestSet, t::Union{Fail, Error}; print_result::Bool=TESTSET_PRINT_ENABLE[])
+    if print_result
         print(ts.description, ": ")
         # don't print for interrupted tests
         if !(t isa Error) || t.test_type !== :test_interrupted
             print(t)
             if !isa(t, Error) # if not gets printed in the show method
-                Base.show_backtrace(stdout, scrub_backtrace(backtrace()))
+                Base.show_backtrace(stdout, scrub_backtrace(backtrace(), ts.file, extract_file(t.source)))
             end
             println()
         end
@@ -1081,7 +1189,7 @@ const TESTSET_PRINT_ENABLE = Ref(true)
 
 # Called at the end of a @testset, behaviour depends on whether
 # this is a child of another testset, or the "root" testset
-function finish(ts::DefaultTestSet)
+function finish(ts::DefaultTestSet; print_results::Bool=TESTSET_PRINT_ENABLE[])
     ts.time_end = time()
     # If we are a nested test set, do not print a full summary
     # now - let the parent test set do the printing
@@ -1098,7 +1206,7 @@ function finish(ts::DefaultTestSet)
     total_broken = broken + c_broken
     total = total_pass + total_fail + total_error + total_broken
 
-    if TESTSET_PRINT_ENABLE[]
+    if print_results
         print_test_results(ts)
     end
 
@@ -1163,10 +1271,11 @@ function get_test_counts(ts::DefaultTestSet)
         end
     end
     ts.anynonpass = (fails + errors + c_fails + c_errors > 0)
-    duration = if isnothing(ts.time_end)
+    (; time_start, time_end) = ts
+    duration = if isnothing(time_end)
         ""
     else
-        dur_s = ts.time_end - ts.time_start
+        dur_s = time_end - time_start
         if dur_s < 60
             string(round(dur_s, digits = 1), "s")
         else
@@ -1264,12 +1373,16 @@ function _check_testset(testsettype, testsetname)
 end
 
 """
-    @testset [CustomTestSet] [option=val  ...] ["description"] begin ... end
-    @testset [CustomTestSet] [option=val  ...] ["description \$v"] for v in (...) ... end
-    @testset [CustomTestSet] [option=val  ...] ["description \$v, \$w"] for v in (...), w in (...) ... end
-    @testset [CustomTestSet] [option=val  ...] ["description \$v, \$w"] foo()
+    @testset [CustomTestSet] [options...] ["description"] begin test_ex end
+    @testset [CustomTestSet] [options...] ["description \$v"] for v in itr test_ex end
+    @testset [CustomTestSet] [options...] ["description \$v, \$w"] for v in itrv, w in itrw test_ex end
+    @testset [CustomTestSet] [options...] ["description"] test_func()
+    @testset let v = v, w = w; test_ex; end
+
+# With begin/end or function call
 
-Starts a new test set, or multiple test sets if a `for` loop is provided.
+When @testset is used, with begin/end or a single function call, the macro
+starts a new test set in which to evaluate the given expression.
 
 If no custom testset type is given it defaults to creating a `DefaultTestSet`.
 `DefaultTestSet` records all the results and, if there are any `Fail`s or
@@ -1281,19 +1394,23 @@ also be used for any nested `@testset` invocations. The given options are only
 applied to the test set where they are given. The default test set type
 accepts three boolean options:
 - `verbose`: if `true`, the result summary of the nested testsets is shown even
-when they all pass (the default is `false`).
+  when they all pass (the default is `false`).
 - `showtiming`: if `true`, the duration of each displayed testset is shown
-(the default is `true`).
+  (the default is `true`).
 - `failfast`: if `true`, any test failure or error will cause the testset and any
-child testsets to return immediately (the default is `false`). This can also be set
-globally via the env var `JULIA_TEST_FAILFAST`.
+  child testsets to return immediately (the default is `false`).
+  This can also be set globally via the env var `JULIA_TEST_FAILFAST`.
+
+!!! compat "Julia 1.8"
+    `@testset test_func()` requires at least Julia 1.8.
 
 !!! compat "Julia 1.9"
     `failfast` requires at least Julia 1.9.
 
 The description string accepts interpolation from the loop indices.
 If no description is provided, one is constructed based on the variables.
-If a function call is provided, its name will be used. Explicit description strings override this behavior.
+If a function call is provided, its name will be used.
+Explicit description strings override this behavior.
 
 By default the `@testset` macro will return the testset object itself, though
 this behavior can be customized in other testset types. If a `for` loop is used
@@ -1309,7 +1426,7 @@ reproducibility in case of failure, and to allow seamless
 re-arrangements of `@testset`s regardless of their side-effect on the
 global RNG state.
 
-# Examples
+## Examples
 ```jldoctest; filter = r"trigonometric identities |    4      4  [0-9\\.]+s"
 julia> @testset "trigonometric identities" begin
            θ = 2/3*π
@@ -1321,6 +1438,52 @@ julia> @testset "trigonometric identities" begin
 Test Summary:            | Pass  Total  Time
 trigonometric identities |    4      4  0.2s
 ```
+
+# `@testset for`
+
+When `@testset for` is used, the macro starts a new test for each iteration of
+the provided loop. The semantics of each test set are otherwise identical to that
+of that `begin/end` case (as if used for each loop iteration).
+
+# `@testset let`
+
+When `@testset let` is used, the macro starts a *transparent* test set with
+the given object added as a context object to any failing test contained
+therein. This is useful when performing a set of related tests on one larger
+object and it is desirable to print this larger object when any of the
+individual tests fail. Transparent test sets do not introduce additional levels
+of nesting in the test set hierarchy and are passed through directly to the
+parent test set (with the context object appended to any failing tests.)
+
+!!! compat "Julia 1.9"
+    `@testset let` requires at least Julia 1.9.
+
+!!! compat "Julia 1.10"
+    Multiple `let` assignments are supported since Julia 1.10.
+
+## Examples
+```jldoctest
+julia> @testset let logi = log(im)
+           @test imag(logi) == π/2
+           @test !iszero(real(logi))
+       end
+Test Failed at none:3
+  Expression: !(iszero(real(logi)))
+     Context: logi = 0.0 + 1.5707963267948966im
+
+ERROR: There was an error during testing
+
+julia> @testset let logi = log(im), op = !iszero
+           @test imag(logi) == π/2
+           @test op(real(logi))
+       end
+Test Failed at none:3
+  Expression: op(real(logi))
+     Context: logi = 0.0 + 1.5707963267948966im
+              op = !iszero
+
+ERROR: There was an error during testing
+```
 """
 macro testset(args...)
     isempty(args) && error("No arguments to @testset")
@@ -1328,15 +1491,17 @@ macro testset(args...)
     tests = args[end]
 
     # Determine if a single block or for-loop style
-    if !isa(tests,Expr) || (tests.head !== :for && tests.head !== :block && tests.head != :call)
+    if !isa(tests,Expr) || (tests.head !== :for && tests.head !== :block && tests.head !== :call && tests.head !== :let)
 
         error("Expected function call, begin/end block or for loop as argument to @testset")
     end
 
-    FAIL_FAST[] = something(tryparse(Bool, get(ENV, "JULIA_TEST_FAILFAST", "false")), false)
+    FAIL_FAST[] = Base.get_bool_env("JULIA_TEST_FAILFAST", false)
 
     if tests.head === :for
         return testset_forloop(args, tests, __source__)
+    elseif tests.head === :let
+        return testset_context(args, tests, __source__)
     else
         return testset_beginend_call(args, tests, __source__)
     end
@@ -1345,6 +1510,51 @@ end
 trigger_test_failure_break(@nospecialize(err)) =
     ccall(:jl_test_failure_breakpoint, Cvoid, (Any,), err)
 
+"""
+Generate the code for an `@testset` with a `let` argument.
+"""
+function testset_context(args, ex, source)
+    desc, testsettype, options = parse_testset_args(args[1:end-1])
+    if desc !== nothing || testsettype !== nothing
+        # Reserve this syntax if we ever want to allow this, but for now,
+        # just do the transparent context test set.
+        error("@testset with a `let` argument cannot be customized")
+    end
+
+    let_ex = ex.args[1]
+
+    if Meta.isexpr(let_ex, :(=))
+        contexts = Any[let_ex.args[1]]
+    elseif Meta.isexpr(let_ex, :block)
+        contexts = Any[]
+        for assign_ex in let_ex.args
+            if Meta.isexpr(assign_ex, :(=))
+                push!(contexts, assign_ex.args[1])
+            else
+                error("Malformed `let` expression is given")
+            end
+        end
+    else
+        error("Malformed `let` expression is given")
+    end
+    reverse!(contexts)
+
+    test_ex = ex.args[2]
+
+    ex.args[2] = quote
+        $(map(contexts) do context
+            :($push_testset($(ContextTestSet)($(QuoteNode(context)), $context; $options...)))
+        end...)
+        try
+            $(test_ex)
+        finally
+            $(map(_->:($pop_testset()), contexts)...)
+        end
+    end
+
+    return esc(ex)
+end
+
 """
 Generate the code for a `@testset` with a function call or `begin`/`end` argument
 """
@@ -1370,17 +1580,20 @@ function testset_beginend_call(args, tests, source)
     ex = quote
         _check_testset($testsettype, $(QuoteNode(testsettype.args[1])))
         local ret
-        local ts = $(testsettype)($desc; $options...)
+        local ts = if ($testsettype === $DefaultTestSet) && $(isa(source, LineNumberNode))
+            $(testsettype)($desc; source=$(QuoteNode(source.file)), $options...)
+        else
+            $(testsettype)($desc; $options...)
+        end
         push_testset(ts)
         # we reproduce the logic of guardseed, but this function
         # cannot be used as it changes slightly the semantic of @testset,
         # by wrapping the body in a function
-        local RNG = default_rng()
-        local oldrng = copy(RNG)
-        local oldseed = Random.GLOBAL_SEED
+        local default_rng_orig = copy(default_rng())
+        local tls_seed_orig = copy(Random.get_tls_seed())
         try
-            # RNG is re-seeded with its own seed to ease reproduce a failed test
-            Random.seed!(Random.GLOBAL_SEED)
+            # default RNG is reset to its state from last `seed!()` to ease reproduce a failed test
+            copy!(Random.default_rng(), tls_seed_orig)
             let
                 $(esc(tests))
             end
@@ -1395,8 +1608,8 @@ function testset_beginend_call(args, tests, source)
                 record(ts, Error(:nontest_error, Expr(:tuple), err, Base.current_exceptions(), $(QuoteNode(source))))
             end
         finally
-            copy!(RNG, oldrng)
-            Random.set_global_seed!(oldseed)
+            copy!(default_rng(), default_rng_orig)
+            copy!(Random.get_tls_seed(), tls_seed_orig)
             pop_testset()
             ret = finish(ts)
         end
@@ -1411,7 +1624,7 @@ end
 
 function failfast_print()
     printstyled("\nFail-fast enabled:"; color = Base.error_color(), bold=true)
-    printstyled(" Fail or Error occured\n\n"; color = Base.error_color())
+    printstyled(" Fail or Error occurred\n\n"; color = Base.error_color())
 end
 
 """
@@ -1461,12 +1674,13 @@ function testset_forloop(args, testloop, source)
             finish_errored = true
             push!(arr, finish(ts))
             finish_errored = false
-
-            # it's 1000 times faster to copy from tmprng rather than calling Random.seed!
-            copy!(RNG, tmprng)
-
+            copy!(default_rng(), tls_seed_orig)
+        end
+        ts = if ($testsettype === $DefaultTestSet) && $(isa(source, LineNumberNode))
+            $(testsettype)($desc; source=$(QuoteNode(source.file)), $options...)
+        else
+            $(testsettype)($desc; $options...)
         end
-        ts = $(testsettype)($desc; $options...)
         push_testset(ts)
         first_iteration = false
         try
@@ -1486,11 +1700,9 @@ function testset_forloop(args, testloop, source)
         local first_iteration = true
         local ts
         local finish_errored = false
-        local RNG = default_rng()
-        local oldrng = copy(RNG)
-        local oldseed = Random.GLOBAL_SEED
-        Random.seed!(Random.GLOBAL_SEED)
-        local tmprng = copy(RNG)
+        local default_rng_orig = copy(default_rng())
+        local tls_seed_orig = copy(Random.get_tls_seed())
+        copy!(Random.default_rng(), tls_seed_orig)
         try
             let
                 $(Expr(:for, Expr(:block, [esc(v) for v in loopvars]...), blk))
@@ -1501,8 +1713,8 @@ function testset_forloop(args, testloop, source)
                 pop_testset()
                 push!(arr, finish(ts))
             end
-            copy!(RNG, oldrng)
-            Random.set_global_seed!(oldseed)
+            copy!(default_rng(), default_rng_orig)
+            copy!(Random.get_tls_seed(), tls_seed_orig)
         end
         arr
     end
@@ -1578,7 +1790,7 @@ end
 """
     get_testset_depth()
 
-Returns the number of active test sets, not including the default test set
+Return the number of active test sets, not including the default test set
 """
 function get_testset_depth()
     testsets = get(task_local_storage(), :__BASETESTNEXT__, AbstractTestSet[])
@@ -1602,7 +1814,7 @@ matches the inferred type modulo `AllowedType`, or when the return type is a sub
 `AllowedType`. This is useful when testing type stability of functions returning a small
 union such as `Union{Nothing, T}` or `Union{Missing, T}`.
 
-```jldoctest; setup = :(using InteractiveUtils), filter = r"begin\\n(.|\\n)*end"
+```jldoctest; setup = :(using InteractiveUtils; using Base: >), filter = r"begin\\n(.|\\n)*end"
 julia> f(a) = a > 1 ? 1 : 1.0
 f (generic function with 1 method)
 
@@ -1611,8 +1823,7 @@ Int64
 
 julia> @code_warntype f(2)
 MethodInstance for f(::Int64)
-  from f(a)
-     @ Main none:1
+  from f(a) @ Main none:1
 Arguments
   #self#::Core.Const(f)
   a::Int64
@@ -1664,10 +1875,9 @@ function _inferred(ex, mod, allow = :(Union{}))
         ex = Expr(:call, GlobalRef(Test, :_materialize_broadcasted),
             farg, ex.args[2:end]...)
     end
-    Base.remove_linenums!(let ex = ex;
+    result = let ex = ex
         quote
-            let
-                allow = $(esc(allow))
+            let allow = $(esc(allow))
                 allow isa Type || throw(ArgumentError("@inferred requires a type as second argument"))
                 $(if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
                     # Has keywords
@@ -1691,7 +1901,8 @@ function _inferred(ex, mod, allow = :(Union{}))
                 result
             end
         end
-    end)
+    end
+    return remove_linenums!(result)
 end
 
 function is_in_mods(m::Module, recursive::Bool, mods)
@@ -1709,7 +1920,7 @@ end
                                       ambiguous_bottom=false,
                                       allowed_undefineds=nothing)
 
-Returns a vector of `(Method,Method)` pairs of ambiguous methods
+Return a vector of `(Method,Method)` pairs of ambiguous methods
 defined in the specified modules.
 Use `recursive=true` to test in all submodules.
 
@@ -1731,16 +1942,16 @@ function detect_ambiguities(mods::Module...;
     ambs = Set{Tuple{Method,Method}}()
     mods = collect(mods)::Vector{Module}
     function sortdefs(m1::Method, m2::Method)
-        ord12 = m1.file < m2.file
-        if !ord12 && (m1.file == m2.file)
-            ord12 = m1.line < m2.line
+        ord12 = cmp(m1.file, m2.file)
+        if ord12 == 0
+            ord12 = cmp(m1.line, m2.line)
         end
-        return ord12 ? (m1, m2) : (m2, m1)
+        return ord12 <= 0 ? (m1, m2) : (m2, m1)
     end
     function examine(mt::Core.MethodTable)
         for m in Base.MethodList(mt)
             m.sig == Tuple && continue # ignore Builtins
-            is_in_mods(m.module, recursive, mods) || continue
+            is_in_mods(parentmodule(m), recursive, mods) || continue
             world = Base.get_world_counter()
             ambig = Ref{Int32}(0)
             ms = Base._methods_by_ftype(m.sig, nothing, -1, world, true, Ref(typemin(UInt)), Ref(typemax(UInt)), ambig)::Vector
@@ -1773,7 +1984,7 @@ function detect_ambiguities(mods::Module...;
             f = Base.unwrap_unionall(getfield(mod, n))
             if isa(f, Module) && f !== mod && parentmodule(f) === mod && nameof(f) === n
                 push!(work, f)
-            elseif isa(f, DataType) && isdefined(f.name, :mt) && f.name.module === mod && f.name.name === n && f.name.mt !== Symbol.name.mt && f.name.mt !== DataType.name.mt
+            elseif isa(f, DataType) && isdefined(f.name, :mt) && parentmodule(f) === mod && nameof(f) === n && f.name.mt !== Symbol.name.mt && f.name.mt !== DataType.name.mt
                 examine(f.name.mt)
             end
         end
@@ -1786,7 +1997,7 @@ end
 """
     detect_unbound_args(mod1, mod2...; recursive=false, allowed_undefineds=nothing)
 
-Returns a vector of `Method`s which may have unbound type parameters.
+Return a vector of `Method`s which may have unbound type parameters.
 Use `recursive=true` to test in all submodules.
 
 By default, any undefined symbols trigger a warning. This warning can
@@ -1794,8 +2005,8 @@ be suppressed by supplying a collection of `GlobalRef`s for which
 the warning can be skipped. For example, setting
 
 ```
-allow_undefineds = Set([GlobalRef(Base, :active_repl),
-                        GlobalRef(Base, :active_repl_backend)])
+allowed_undefineds = Set([GlobalRef(Base, :active_repl),
+                          GlobalRef(Base, :active_repl_backend)])
 ```
 
 would suppress warnings about `Base.active_repl` and
@@ -1812,7 +2023,7 @@ function detect_unbound_args(mods...;
     mods = collect(mods)::Vector{Module}
     function examine(mt::Core.MethodTable)
         for m in Base.MethodList(mt)
-            is_in_mods(m.module, recursive, mods) || continue
+            is_in_mods(parentmodule(m), recursive, mods) || continue
             has_unbound_vars(m.sig) || continue
             tuple_sig = Base.unwrap_unionall(m.sig)::DataType
             if Base.isvatuple(tuple_sig)
@@ -1844,7 +2055,7 @@ function detect_unbound_args(mods...;
             f = Base.unwrap_unionall(getfield(mod, n))
             if isa(f, Module) && f !== mod && parentmodule(f) === mod && nameof(f) === n
                 push!(work, f)
-            elseif isa(f, DataType) && isdefined(f.name, :mt) && f.name.module === mod && f.name.name === n && f.name.mt !== Symbol.name.mt && f.name.mt !== DataType.name.mt
+            elseif isa(f, DataType) && isdefined(f.name, :mt) && parentmodule(f) === mod && nameof(f) === n && f.name.mt !== Symbol.name.mt && f.name.mt !== DataType.name.mt
                 examine(f.name.mt)
             end
         end
@@ -1854,54 +2065,11 @@ function detect_unbound_args(mods...;
     return collect(ambs)
 end
 
-# find if var will be constrained to have a definite value
-# in any concrete leaftype subtype of typ
-function constrains_param(var::TypeVar, @nospecialize(typ), covariant::Bool)
-    typ === var && return true
-    while typ isa UnionAll
-        covariant && constrains_param(var, typ.var.ub, covariant) && return true
-        # typ.var.lb doesn't constrain var
-        typ = typ.body
-    end
-    if typ isa Union
-        # for unions, verify that both options would constrain var
-        ba = constrains_param(var, typ.a, covariant)
-        bb = constrains_param(var, typ.b, covariant)
-        (ba && bb) && return true
-    elseif typ isa DataType
-        # return true if any param constrains var
-        fc = length(typ.parameters)
-        if fc > 0
-            if typ.name === Tuple.name
-                # vararg tuple needs special handling
-                for i in 1:(fc - 1)
-                    p = typ.parameters[i]
-                    constrains_param(var, p, covariant) && return true
-                end
-                lastp = typ.parameters[fc]
-                vararg = Base.unwrap_unionall(lastp)
-                if vararg isa Core.TypeofVararg && isdefined(vararg, :N)
-                    constrains_param(var, vararg.N, covariant) && return true
-                    # T = vararg.parameters[1] doesn't constrain var
-                else
-                    constrains_param(var, lastp, covariant) && return true
-                end
-            else
-                for i in 1:fc
-                    p = typ.parameters[i]
-                    constrains_param(var, p, false) && return true
-                end
-            end
-        end
-    end
-    return false
-end
-
 function has_unbound_vars(@nospecialize sig)
     while sig isa UnionAll
         var = sig.var
         sig = sig.body
-        if !constrains_param(var, sig, true)
+        if !Core.Compiler.constrains_param(var, sig, #=covariant=#true, #=type_constrains=#true)
             return true
         end
     end
@@ -1956,6 +2124,8 @@ for G in (GenericSet, GenericDict)
 end
 
 Base.get(s::GenericDict, x, y) = get(s.s, x, y)
+Base.pop!(s::GenericDict, k) = pop!(s.s, k)
+Base.setindex!(s::GenericDict, v, k) = setindex!(s.s, v, k)
 
 """
 The `GenericArray` can be used to test generic array APIs that program to
@@ -2020,49 +2190,7 @@ function _check_bitarray_consistency(B::BitArray{N}) where N
     return true
 end
 
-# 0.7 deprecations
-
-begin
-    approx_full(x::AbstractArray) = x
-    approx_full(x::Number) = x
-    approx_full(x) = full(x)
-
-    function test_approx_eq(va, vb, Eps, astr, bstr)
-        va = approx_full(va)
-        vb = approx_full(vb)
-        la, lb = length(LinearIndices(va)), length(LinearIndices(vb))
-        if la != lb
-            error("lengths of ", astr, " and ", bstr, " do not match: ",
-                "\n  ", astr, " (length $la) = ", va,
-                "\n  ", bstr, " (length $lb) = ", vb)
-        end
-        diff = real(zero(eltype(va)))
-        for (xa, xb) = zip(va, vb)
-            if isfinite(xa) && isfinite(xb)
-                diff = max(diff, abs(xa-xb))
-            elseif !isequal(xa,xb)
-                error("mismatch of non-finite elements: ",
-                    "\n  ", astr, " = ", va,
-                    "\n  ", bstr, " = ", vb)
-            end
-        end
-
-        if !isnan(Eps) && !(diff <= Eps)
-            sdiff = string("|", astr, " - ", bstr, "| <= ", Eps)
-            error("assertion failed: ", sdiff,
-                "\n  ", astr, " = ", va,
-                "\n  ", bstr, " = ", vb,
-                "\n  difference = ", diff, " > ", Eps)
-        end
-    end
-
-    array_eps(a::AbstractArray{Complex{T}}) where {T} = eps(float(maximum(x->(isfinite(x) ? abs(x) : T(NaN)), a)))
-    array_eps(a) = eps(float(maximum(x->(isfinite(x) ? abs(x) : oftype(x,NaN)), a)))
-
-    test_approx_eq(va, vb, astr, bstr) =
-        test_approx_eq(va, vb, 1E4*length(LinearIndices(va))*max(array_eps(va), array_eps(vb)), astr, bstr)
-end
-
 include("logging.jl")
+include("precompile.jl")
 
 end # module
diff --git a/stdlib/Test/src/logging.jl b/stdlib/Test/src/logging.jl
index d7146b121d47d..7b3838903ce10 100644
--- a/stdlib/Test/src/logging.jl
+++ b/stdlib/Test/src/logging.jl
@@ -120,9 +120,9 @@ end
 # Log testing tools
 
 # Failure result type for log testing
-mutable struct LogTestFailure <: Result
+struct LogTestFailure <: Result
     orig_expr
-    source::Union{Nothing,LineNumberNode}
+    source::LineNumberNode
     patterns
     logs
 end
@@ -149,12 +149,12 @@ function record(ts::DefaultTestSet, t::LogTestFailure)
     if TESTSET_PRINT_ENABLE[]
         printstyled(ts.description, ": ", color=:white)
         print(t)
-        Base.show_backtrace(stdout, scrub_backtrace(backtrace()))
+        Base.show_backtrace(stdout, scrub_backtrace(backtrace(), ts.file, extract_file(t.source)))
         println()
     end
     # Hack: convert to `Fail` so that test summarization works correctly
-    push!(ts.results, Fail(:test, t.orig_expr, t.logs, nothing, t.source))
-    t
+    push!(ts.results, Fail(:test, t.orig_expr, t.logs, nothing, nothing, t.source, false))
+    return t
 end
 
 """
diff --git a/stdlib/Test/src/precompile.jl b/stdlib/Test/src/precompile.jl
new file mode 100644
index 0000000000000..2cb2fb7f3f0c6
--- /dev/null
+++ b/stdlib/Test/src/precompile.jl
@@ -0,0 +1,9 @@
+redirect_stdout(devnull) do
+    @testset "example" begin
+        @test 1 == 1
+        @test_throws ErrorException error()
+        @test_logs (:info, "Doing foo with n=2") @info "Doing foo with n=2"
+        @test_broken 1 == 2
+        @test 1 ≈ 1.0000000000000001
+    end
+end
diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl
index 38a4fb0031dd7..1f93db2b5ed72 100644
--- a/stdlib/Test/test/runtests.jl
+++ b/stdlib/Test/test/runtests.jl
@@ -77,7 +77,7 @@ end
     @test 1234 === @test_nowarn(1234)
     @test 5678 === @test_warn("WARNING: foo", begin println(stderr, "WARNING: foo"); 5678; end)
     let a
-        @test_throws UndefVarError(:a) a
+        @test_throws UndefVarError(:a, :local) a
         @test_nowarn a = 1
         @test a === 1
     end
@@ -162,7 +162,7 @@ let fails = @testset NoThrowTestSet begin
         @test_throws "A test" error("a test")
         @test_throws r"sqrt\([Cc]omplx" sqrt(-1)
         @test_throws str->occursin("a T", str) error("a test")
-        @test_throws ["BoundsError", "acess", "1-element", "at index [2]"] [1][2]
+        @test_throws ["BoundsError", "aquire", "1-element", "at index [2]"] [1][2]
     end
     for fail in fails
         @test fail isa Test.Fail
@@ -294,7 +294,7 @@ let fails = @testset NoThrowTestSet begin
     end
 
     let str = sprint(show, fails[26])
-        @test occursin("Expected: [\"BoundsError\", \"acess\", \"1-element\", \"at index [2]\"]", str)
+        @test occursin("Expected: [\"BoundsError\", \"aquire\", \"1-element\", \"at index [2]\"]", str)
         @test occursin(r"Message: \"BoundsError.* 1-element.*at index \[2\]", str)
     end
 
@@ -346,7 +346,7 @@ let retval_tests = @testset NoThrowTestSet begin
         @test Test.record(ts, pass_mock) isa Test.Pass
         error_mock = Test.Error(:test, 1, 2, 3, LineNumberNode(0, "An Error Mock"))
         @test Test.record(ts, error_mock) isa Test.Error
-        fail_mock = Test.Fail(:test, 1, 2, 3, LineNumberNode(0, "A Fail Mock"))
+        fail_mock = Test.Fail(:test, 1, 2, 3, nothing, LineNumberNode(0, "A Fail Mock"), false)
         @test Test.record(ts, fail_mock) isa Test.Fail
         broken_mock = Test.Broken(:test, LineNumberNode(0, "A Broken Mock"))
         @test Test.record(ts, broken_mock) isa Test.Broken
@@ -409,19 +409,19 @@ end
                 @test true
                 @test false
                 @test 1 == 1
-                @test 2 == :foo
+                @test 2 === :foo
                 @test 3 == 3
                 @testset "d" begin
                     @test 4 == 4
                 end
                 @testset begin
-                    @test :blank != :notblank
+                    @test :blank !== :notblank
                 end
             end
             @testset "inner1" begin
                 @test 1 == 1
                 @test 2 == 2
-                @test 3 == :bar
+                @test 3 === :bar
                 @test 4 == 4
                 @test_throws ErrorException 1+1
                 @test_throws ErrorException error()
@@ -659,15 +659,15 @@ end
 @test tss.foo == 3
 
 # test @inferred
-uninferrable_function(i) = (1, "1")[i]
-uninferrable_small_union(i) = (1, nothing)[i]
-@test_throws ErrorException @inferred(uninferrable_function(1))
+uninferable_function(i) = (1, "1")[i]
+uninferable_small_union(i) = (1, nothing)[i]
+@test_throws ErrorException @inferred(uninferable_function(1))
 @test @inferred(identity(1)) == 1
-@test @inferred(Nothing, uninferrable_small_union(1)) === 1
-@test @inferred(Nothing, uninferrable_small_union(2)) === nothing
-@test_throws ErrorException @inferred(Missing, uninferrable_small_union(1))
-@test_throws ErrorException @inferred(Missing, uninferrable_small_union(2))
-@test_throws ArgumentError @inferred(nothing, uninferrable_small_union(1))
+@test @inferred(Nothing, uninferable_small_union(1)) === 1
+@test @inferred(Nothing, uninferable_small_union(2)) === nothing
+@test_throws ErrorException @inferred(Missing, uninferable_small_union(1))
+@test_throws ErrorException @inferred(Missing, uninferable_small_union(2))
+@test_throws ArgumentError @inferred(nothing, uninferable_small_union(1))
 
 # Ensure @inferred only evaluates the arguments once
 inferred_test_global = 0
@@ -692,12 +692,12 @@ end
 
 # Issue #17105
 # @inferred with kwargs
-inferrable_kwtest(x; y=1) = 2x
-uninferrable_kwtest(x; y=1) = 2x+y
-@test (@inferred inferrable_kwtest(1)) == 2
-@test (@inferred inferrable_kwtest(1; y=1)) == 2
-@test (@inferred uninferrable_kwtest(1)) == 3
-@test (@inferred uninferrable_kwtest(1; y=2)) == 4
+inferable_kwtest(x; y=1) = 2x
+uninferable_kwtest(x; y=1) = 2x+y
+@test (@inferred inferable_kwtest(1)) == 2
+@test (@inferred inferable_kwtest(1; y=1)) == 2
+@test (@inferred uninferable_kwtest(1)) == 3
+@test (@inferred uninferable_kwtest(1; y=2)) == 4
 
 @test_throws ErrorException @testset "$(error())" for i in 1:10
 end
@@ -722,6 +722,115 @@ end
     rm(f; force=true)
 end
 
+@testset "provide informative location in backtrace for test failures" begin
+    win2unix(filename) = replace(filename, "\\" => '/')
+    utils = win2unix(tempname())
+    write(utils,
+    """
+    function test_properties2(value)
+        @test isodd(value)
+    end
+    """)
+
+    included = win2unix(tempname())
+    write(included,
+    """
+    @testset "Other tests" begin
+        @test 1 + 1 == 3
+        test_properties2(2)
+    end
+    test_properties2(8)
+
+    # Test calls to `@test` and `@testset` with no file/lineno information (__source__ == nothing).
+    eval(Expr(:macrocall, Symbol("@test"), nothing, :false))
+    eval(Expr(:macrocall, Symbol("@testset"), nothing, "Testset without source", quote
+        @test false
+        @test error("failed")
+    end))
+    """)
+
+    runtests = win2unix(tempname())
+    write(runtests,
+    """
+    using Test
+
+    include("$utils")
+
+    function test_properties(value)
+        @test isodd(value)
+    end
+
+    @testset "Tests" begin
+        test_properties(8)
+        @noinline test_properties(8)
+        test_properties2(8)
+
+        include("$included")
+    end
+    """)
+    msg = read(pipeline(ignorestatus(`$(Base.julia_cmd()) --startup-file=no --color=no $runtests`), stderr=devnull), String)
+    msg = win2unix(msg)
+    regex = r"((?:Tests|Other tests|Testset without source): Test Failed (?:.|\n)*?)\n\nStacktrace:(?:.|\n)*?(?=\n(?:Tests|Other tests))"
+    failures = map(eachmatch(regex, msg)) do m
+        m = match(r"(Tests|Other tests|Testset without source): .*? at (.*?)\n  Expression: (.*)(?:.|\n)*\n+Stacktrace:\n((?:.|\n)*)", m.match)
+        (; testset = m[1], source = m[2], ex = m[3], stacktrace = m[4])
+    end
+    @test length(failures) == 8 # 8 failed tests
+    @test count(contains("Error During Test"), split(msg, '\n')) == 1 # 1 error
+    test_properties_macro_source = runtests * ":6"
+    test_properties2_macro_source = utils * ":2"
+
+    fail = failures[1]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 6
+    @test fail.testset == "Tests" && fail.source == test_properties_macro_source && fail.ex == "isodd(value)"
+    @test count(contains(runtests * ":10"), lines) == 2 # @testset + test
+
+    fail = failures[2]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 6
+    @test fail.testset == "Tests" && fail.source == test_properties_macro_source && fail.ex == "isodd(value)"
+    @test count(contains(runtests * ":10"), lines) == 1 # @testset
+    @test count(contains(runtests * ":11"), lines) == 1 # test
+
+    fail = failures[3]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 6
+    @test fail.testset == "Tests" && fail.source == test_properties2_macro_source && fail.ex == "isodd(value)"
+    @test count(contains(runtests * ":10"), lines) == 1 # @testset
+    @test count(contains(runtests * ":12"), lines) == 1 # test
+
+    fail = failures[4]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 5
+    @test fail.testset == "Other tests" && fail.source == included * ":2" && fail.ex == "1 + 1 == 3"
+    @test count(contains(included * ":2"), lines) == 2 # @testset + test
+    @test count(contains(runtests * ":10"), lines) == 0 # @testset (stop at the innermost testset)
+
+    fail = failures[5]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 6
+    @test fail.testset == "Other tests" && fail.source == test_properties2_macro_source && fail.ex == "isodd(value)"
+    @test count(contains(included * ":2"), lines) == 1 # @testset
+    @test count(contains(included * ":3"), lines) == 1 # test
+    @test count(contains(runtests * ":10"), lines) == 0 # @testset (stop at the innermost testset)
+
+    fail = failures[6]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 8
+    @test fail.testset == "Tests" && fail.source == test_properties2_macro_source && fail.ex == "isodd(value)"
+    @test count(contains(runtests * ":10"), lines) == 1 # @testset
+    @test count(contains(runtests * ":14"), lines) == 1 # include
+    @test count(contains(included * ":5"), lines) == 1 # test
+
+    fail = failures[7]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 9
+    @test fail.testset == "Tests" && fail.source == "none:0" && fail.ex == "false"
+    @test count(contains(runtests * ":10"), lines) == 1 # @testset
+    @test count(contains(runtests * ":14"), lines) == 1 # include
+    @test count(contains(included * ":8"), lines) == 1 # test
+
+    fail = failures[8]; lines = split(fail.stacktrace, '\n')
+    @test length(lines)/2 ≤ 5
+    @test fail.testset == "Testset without source" && fail.source == included * ":10" && fail.ex == "false"
+    @test count(contains(included * ":10"), lines) == 2 # @testset + test
+    @test count(contains(runtests * ":10"), lines) == 0 # @testset (stop at the innermost testset)
+end
+
 let io = IOBuffer()
     exc = Test.TestSetException(1,2,3,4,Vector{Union{Test.Error, Test.Fail}}())
     Base.showerror(io, exc, backtrace())
@@ -923,6 +1032,7 @@ end
     # i.e. it behaves as if it was wrapped in a `guardseed(GLOBAL_SEED)` block
     seed = rand(UInt128)
     Random.seed!(seed)
+    seeded_state = copy(Random.default_rng())
     a = rand()
     @testset begin
         # global RNG must re-seeded at the beginning of @testset
@@ -934,31 +1044,82 @@ end
     # the @testset's above must have no consequence for rand() below
     b = rand()
     Random.seed!(seed)
+    @test Random.default_rng() == seeded_state
     @test a == rand()
     @test b == rand()
 
     # Even when seed!() is called within a testset A, subsequent testsets
     # should start with the same "global RNG state" as what A started with,
     # such that the test `refvalue == rand(Int)` below succeeds.
-    # Currently, this means that Random.GLOBAL_SEED has to be restored,
+    # Currently, this means that `Random.get_tls_seed()` has to be restored,
     # in addition to the state of Random.default_rng().
-    GLOBAL_SEED_orig = Random.GLOBAL_SEED
+    tls_seed_orig = copy(Random.get_tls_seed())
     local refvalue
-    @testset "GLOBAL_SEED is also preserved (setup)" begin
-        @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
+    @testset "TLS seed is also preserved (setup)" begin
+        @test tls_seed_orig == Random.get_tls_seed()
         refvalue = rand(Int)
         Random.seed!()
-        @test GLOBAL_SEED_orig != Random.GLOBAL_SEED
+        @test tls_seed_orig != Random.get_tls_seed()
     end
-    @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
-    @testset "GLOBAL_SEED is also preserved (forloop)" for _=1:3
+    @test tls_seed_orig == Random.get_tls_seed()
+    @testset "TLS seed is also preserved (forloop)" for _=1:3
         @test refvalue == rand(Int)
         Random.seed!()
     end
-    @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
-    @testset "GLOBAL_SEED is also preserved (beginend)" begin
+    @test tls_seed_orig == Random.get_tls_seed()
+    @testset "TLS seed is also preserved (beginend)" begin
         @test refvalue == rand(Int)
     end
+
+    # @testset below is not compatible with e.g. v1.9, but it still fails there (at "main task")
+    # when deleting lines using get_tls_seed() or GLOBAL_SEED
+    @testset "TLS seed and concurrency" begin
+        # Even with multi-tasking, the TLS seed must stay consistent: the default_rng() state
+        # is reset to the "global seed" at the beginning, and the "global seed" is reset to what
+        # it was at the end of the testset; make sure that distinct tasks don't see the mutation
+        # of this "global seed" (iow, it's task-local)
+        seed = rand(UInt128)
+        Random.seed!(seed)
+        seeded_state = copy(Random.default_rng())
+        a = rand()
+
+        ch = Channel{Nothing}()
+        @sync begin
+            @async begin
+                @testset "task 1" begin
+                    # tick 1
+                    # this task didn't call seed! explicitly (yet), so its TaskLocalRNG() should have been
+                    # reset to `Random.GLOBAL_SEED` at the beginning of `@testset`
+                    @test Random.GLOBAL_SEED == Random.default_rng()
+                    Random.seed!()
+                    put!(ch, nothing) # tick 1 -> tick 2
+                    take!(ch) # tick 3
+                end
+                put!(ch, nothing) # tick 3 -> tick 4
+            end
+            @async begin
+                take!(ch) # tick 2
+                # @testset below will record the current TLS "seed" and reset default_rng() to
+                # this value;
+                # it must not be affected by the fact that "task 1" called `seed!()` first
+                @test Random.get_tls_seed() == Random.GLOBAL_SEED
+
+                @testset "task 2" begin
+                    @test Random.GLOBAL_SEED == Random.default_rng()
+                    Random.seed!()
+                    put!(ch, nothing) # tick 2 -> tick 3
+                    take!(ch) # tick 4
+                end
+                # when `@testset` of task 2 finishes, which is after `@testset` from task 1,
+                # it resets `get_tls_seed()` to what it was before starting:
+                @test Random.get_tls_seed() == Random.GLOBAL_SEED
+            end
+        end
+        @testset "main task" begin
+            @test Random.default_rng() == seeded_state
+            @test a == rand()
+        end
+    end
 end
 
 @testset "InterruptExceptions #21043" begin
@@ -1032,7 +1193,7 @@ h25835(;x=1,y=1) = x isa Int ? x*y : (rand(Bool) ? 1.0 : 1)
     @test @inferred(f25835(x=nothing)) == ()
     @test @inferred(f25835(x=1)) == (1,)
 
-    # A global argument should make this uninferrable
+    # A global argument should make this uninferable
     global y25835 = 1
     @test f25835(x=y25835) == (1,)
     @test_throws ErrorException @inferred((()->f25835(x=y25835))()) == (1,)
@@ -1387,12 +1548,12 @@ Test.finish(ts::PassInformationTestSet) = ts
     end
     test_line_number = (@__LINE__) - 3
     test_throws_line_number =  (@__LINE__) - 3
-    @test ts.results[1].test_type == :test
+    @test ts.results[1].test_type === :test
     @test ts.results[1].orig_expr == :(1 == 1)
     @test ts.results[1].data == Expr(:comparison, 1, :(==), 1)
     @test ts.results[1].value == true
     @test ts.results[1].source == LineNumberNode(test_line_number, @__FILE__)
-    @test ts.results[2].test_type == :test_throws
+    @test ts.results[2].test_type === :test_throws
     @test ts.results[2].orig_expr == :(throw(ErrorException("Msg")))
     @test ts.results[2].data == ErrorException
     @test ts.results[2].value == ErrorException("Msg")
diff --git a/stdlib/UUIDs/Project.toml b/stdlib/UUIDs/Project.toml
index 11dbcda5c4944..4eb31dc9572c0 100644
--- a/stdlib/UUIDs/Project.toml
+++ b/stdlib/UUIDs/Project.toml
@@ -1,5 +1,6 @@
 name = "UUIDs"
 uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+version = "1.11.0"
 
 [deps]
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/stdlib/UUIDs/src/UUIDs.jl b/stdlib/UUIDs/src/UUIDs.jl
index 41d5319fec24d..2b137bbe88ffc 100644
--- a/stdlib/UUIDs/src/UUIDs.jl
+++ b/stdlib/UUIDs/src/UUIDs.jl
@@ -42,17 +42,19 @@ Generates a version 1 (time-based) universally unique identifier (UUID), as spec
 by RFC 4122. Note that the Node ID is randomly generated (does not identify the host)
 according to section 4.5 of the RFC.
 
-The default rng used by `uuid1` is not `GLOBAL_RNG` and every invocation of `uuid1()` without
+The default rng used by `uuid1` is not `Random.default_rng()` and every invocation of `uuid1()` without
 an argument should be expected to return a unique identifier. Importantly, the outputs of
 `uuid1` do not repeat even when `Random.seed!(seed)` is called. Currently (as of Julia 1.6),
 `uuid1` uses `Random.RandomDevice` as the default rng. However, this is an implementation
 detail that may change in the future.
 
 !!! compat "Julia 1.6"
-    The output of `uuid1` does not depend on `GLOBAL_RNG` as of Julia 1.6.
+    The output of `uuid1` does not depend on `Random.default_rng()` as of Julia 1.6.
 
 # Examples
 ```jldoctest; filter = r"[a-z0-9]{8}-([a-z0-9]{4}-){3}[a-z0-9]{12}"
+julia> using Random
+
 julia> rng = MersenneTwister(1234);
 
 julia> uuid1(rng)
@@ -88,21 +90,23 @@ end
 Generates a version 4 (random or pseudo-random) universally unique identifier (UUID),
 as specified by RFC 4122.
 
-The default rng used by `uuid4` is not `GLOBAL_RNG` and every invocation of `uuid4()` without
+The default rng used by `uuid4` is not `Random.default_rng()` and every invocation of `uuid4()` without
 an argument should be expected to return a unique identifier. Importantly, the outputs of
 `uuid4` do not repeat even when `Random.seed!(seed)` is called. Currently (as of Julia 1.6),
 `uuid4` uses `Random.RandomDevice` as the default rng. However, this is an implementation
 detail that may change in the future.
 
 !!! compat "Julia 1.6"
-    The output of `uuid4` does not depend on `GLOBAL_RNG` as of Julia 1.6.
+    The output of `uuid4` does not depend on `Random.default_rng()` as of Julia 1.6.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
+julia> using Random
+
+julia> rng = Xoshiro(123);
 
 julia> uuid4(rng)
-UUID("7a052949-c101-4ca3-9a7e-43a2532b2fa8")
+UUID("856e446e-0c6a-472a-9638-f7b8557cd282")
 ```
 """
 function uuid4(rng::AbstractRNG=Random.RandomDevice())
@@ -123,13 +127,15 @@ as specified by RFC 4122.
 
 # Examples
 ```jldoctest
-julia> rng = MersenneTwister(1234);
+julia> using Random
+
+julia> rng = Xoshiro(123);
 
 julia> u4 = uuid4(rng)
-UUID("7a052949-c101-4ca3-9a7e-43a2532b2fa8")
+UUID("856e446e-0c6a-472a-9638-f7b8557cd282")
 
 julia> u5 = uuid5(u4, "julia")
-UUID("086cc5bb-2461-57d8-8068-0aed7f5b5cd1")
+UUID("2df91e3f-da06-5362-a6fe-03772f2e14c9")
 ```
 """
 function uuid5(ns::UUID, name::String)
diff --git a/stdlib/UUIDs/test/runtests.jl b/stdlib/UUIDs/test/runtests.jl
index 5085fa33e8573..1770b24b3abae 100644
--- a/stdlib/UUIDs/test/runtests.jl
+++ b/stdlib/UUIDs/test/runtests.jl
@@ -56,10 +56,10 @@ for (init_uuid, next_uuid) in standard_namespace_uuids
 end
 
 # Issue 35860
-Random.seed!(Random.GLOBAL_RNG, 10)
+Random.seed!(Random.default_rng(), 10)
 u1 = uuid1()
 u4 = uuid4()
-Random.seed!(Random.GLOBAL_RNG, 10)
+Random.seed!(Random.default_rng(), 10)
 @test u1 != uuid1()
 @test u4 != uuid4()
 
diff --git a/stdlib/Unicode/Project.toml b/stdlib/Unicode/Project.toml
index 5e3040ce9e3db..a01833870644e 100644
--- a/stdlib/Unicode/Project.toml
+++ b/stdlib/Unicode/Project.toml
@@ -1,6 +1,6 @@
 name = "Unicode"
 uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
-
+version = "1.11.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl
index 0467a8d50aa6b..9d86f6f9cbe7b 100644
--- a/stdlib/Unicode/src/Unicode.jl
+++ b/stdlib/Unicode/src/Unicode.jl
@@ -87,7 +87,7 @@ options (which all default to `false` except for `compose`) are specified:
 * `stable=true`: enforce Unicode versioning stability (never introduce characters missing from earlier Unicode versions)
 
 You can also use the `chartransform` keyword (which defaults to `identity`) to pass an arbitrary
-*function* mapping `Integer` codepoints to codepoints, which is is called on each
+*function* mapping `Integer` codepoints to codepoints, which is called on each
 character in `s` as it is processed, in order to perform arbitrary additional normalizations.
 For example, by passing `chartransform=Unicode.julia_chartransform`, you can apply a few Julia-specific
 character normalizations that are performed by Julia when parsing identifiers (in addition to
@@ -120,7 +120,7 @@ normalize(s::AbstractString; kwargs...) = Base.Unicode.normalize(s; kwargs...)
 """
     Unicode.isassigned(c) -> Bool
 
-Returns `true` if the given char or integer is an assigned Unicode code point.
+Return `true` if the given char or integer is an assigned Unicode code point.
 
 # Examples
 ```jldoctest
@@ -136,7 +136,7 @@ isassigned(c) = Base.Unicode.isassigned(c)
 """
     graphemes(s::AbstractString) -> GraphemeIterator
 
-Returns an iterator over substrings of `s` that correspond to the extended graphemes in the
+Return an iterator over substrings of `s` that correspond to the extended graphemes in the
 string, as defined by Unicode UAX #29. (Roughly, these are what users would perceive as
 single characters, even though they may contain more than one codepoint; for example a
 letter combined with an accent mark is a single grapheme.)
@@ -208,12 +208,19 @@ end
 
 using Base.Unicode: utf8proc_error, UTF8PROC_DECOMPOSE, UTF8PROC_CASEFOLD, UTF8PROC_STRIPMARK
 
-function _decompose_char!(codepoint::Union{Integer,Char}, dest::Vector{UInt32}, options::Integer)
-    ret = @ccall utf8proc_decompose_char(codepoint::UInt32, dest::Ptr{UInt32}, length(dest)::Int, options::Cint, C_NULL::Ptr{Cint})::Int
+function _decompose_char!(codepoint::Union{Integer,Char}, dest::Vector{UInt32}, offset::Integer, options::Integer)
+    ret = GC.@preserve dest @ccall utf8proc_decompose_char(codepoint::UInt32, pointer(dest, 1+offset)::Ptr{UInt32}, (length(dest)-offset)::Int, options::Cint, C_NULL::Ptr{Cint})::Int
     ret < 0 && utf8proc_error(ret)
     return ret
 end
 
+# would be good to have higher-level accessor functions in utf8proc.  alternatively,
+# we could mirror the whole utf8proc_property_t struct in Julia, but that is annoying
+# because of the bitfields.
+combining_class(uc::Integer) =
+    0x000301 ≤ uc ≤ 0x10ffff ? unsafe_load(ccall(:utf8proc_get_property, Ptr{UInt16}, (UInt32,), uc), 2) : 0x0000
+combining_class(c::AbstractChar) = ismalformed(c) ? 0x0000 : combining_class(UInt32(c))
+
 """
     isequal_normalized(s1::AbstractString, s2::AbstractString; casefold=false, stripmark=false, chartransform=identity)
 
@@ -225,11 +232,14 @@ As with [`Unicode.normalize`](@ref), you can also pass an arbitrary
 function via the `chartransform` keyword (mapping `Integer` codepoints to codepoints)
 to perform custom normalizations, such as [`Unicode.julia_chartransform`](@ref).
 
+!!! compat "Julia 1.8"
+    The `isequal_normalized` function was added in Julia 1.8.
+
 # Examples
 
 For example, the string `"noël"` can be constructed in two canonically equivalent ways
 in Unicode, depending on whether `"ë"` is formed from a single codepoint U+00EB or
-from the ASCII character `'o'` followed by the U+0308 combining-diaeresis character.
+from the ASCII character `'e'` followed by the U+0308 combining-diaeresis character.
 
 ```jldoctest
 julia> s1 = "no\u00EBl"
@@ -251,29 +261,78 @@ julia> isequal_normalized(s1, "NOËL", casefold=true)
 true
 ```
 """
-function isequal_normalized(s1::AbstractString, s2::AbstractString; casefold::Bool=false, stripmark::Bool=false, chartransform=identity)
-    function decompose_next_char!(c, state, d, options, s)
-        n = _decompose_char!(c, d, options)
-        if n > length(d) # may be possible in future Unicode versions?
-            n = _decompose_char!(c, resize!(d, n), options)
+isequal_normalized(s1::AbstractString, s2::AbstractString; casefold::Bool=false, stripmark::Bool=false, chartransform=identity) =
+    _isequal_normalized!(s1, s2, Vector{UInt32}(undef, 4), Vector{UInt32}(undef, 4), chartransform; casefold, stripmark)
+
+# like isequal_normalized, but takes pre-allocated codepoint buffers as arguments, and chartransform is a positional argument
+function _isequal_normalized!(s1::AbstractString, s2::AbstractString,
+                              d1::Vector{UInt32}, d2::Vector{UInt32}, chartransform::F=identity;
+                              casefold::Bool=false, stripmark::Bool=false) where {F}
+    function decompose_next_chars!(state, d, options, s)
+        local n
+        offset = 0
+        @inbounds while true
+            # read a char and decompose it to d
+            c = chartransform(UInt32(state[1]))
+            state = iterate(s, state[2])
+            if c < 0x80 # fast path for common ASCII case
+                n = 1 + offset
+                n > length(d) && resize!(d, 2n)
+                d[n] = casefold ? (0x41 ≤ c ≤ 0x5A ? c+0x20 : c) : c
+                break # ASCII characters are all zero combining class
+            else
+                while true
+                    n = _decompose_char!(c, d, offset, options) + offset
+                    if n > length(d)
+                        resize!(d, 2n)
+                        continue
+                    end
+                    break
+                end
+            end
+
+            # decomposed chars must be sorted in ascending order of combining class,
+            # which means we need to keep fetching chars until we get to non-combining
+            (iszero(combining_class(d[n])) || isnothing(state)) && break # non-combining
+            offset = n
         end
-        return 1, n, iterate(s, state)
+
+        # sort by combining class
+        if n < 32 # almost always true
+            for j1 = 2:n # insertion sort
+                cc = combining_class(d[j1])
+                iszero(cc) && continue # don't re-order non-combiners
+                for j2 = j1:-1:2
+                    combining_class(d[j2-1]) ≤ cc && break
+                    d[j2-1], d[j2] = d[j2], d[j2-1]
+                end
+            end
+        else # avoid n^2 complexity in crazy large-n case
+            j = 1
+            @views while j < n
+                j₀ = j + something(findnext(iszero ∘ combining_class, d[j+1:n], 1), n+1-j)
+                sort!(d[j:j₀-1], by=combining_class)
+                j = j₀
+            end
+        end
+
+        # split return statement to help type inference:
+        return state === nothing ? (1, n, nothing) : (1, n, state)
     end
     options = UTF8PROC_DECOMPOSE
     casefold && (options |= UTF8PROC_CASEFOLD)
     stripmark && (options |= UTF8PROC_STRIPMARK)
     i1,i2 = iterate(s1),iterate(s2)
-    d1,d2 = Vector{UInt32}(undef, 4), Vector{UInt32}(undef, 4) # codepoint buffers
     n1 = n2 = 0 # lengths of codepoint buffers
     j1 = j2 = 1 # indices in d1, d2
     while true
         if j1 > n1
             i1 === nothing && return i2 === nothing && j2 > n2
-            j1, n1, i1 = decompose_next_char!(chartransform(UInt32(i1[1])), i1[2], d1, options, s1)
+            j1, n1, i1 = decompose_next_chars!(i1, d1, options, s1)
         end
         if j2 > n2
             i2 === nothing && return false
-            j2, n2, i2 = decompose_next_char!(chartransform(UInt32(i2[1])), i2[2], d2, options, s2)
+            j2, n2, i2 = decompose_next_chars!(i2, d2, options, s2)
         end
         d1[j1] == d2[j2] || return false
         j1 += 1; j2 += 1
diff --git a/stdlib/Unicode/test/runtests.jl b/stdlib/Unicode/test/runtests.jl
index 5c5a75b33e363..5248bd1e1fd27 100644
--- a/stdlib/Unicode/test/runtests.jl
+++ b/stdlib/Unicode/test/runtests.jl
@@ -3,6 +3,9 @@
 using Test
 using Unicode
 using Unicode: normalize, isassigned, julia_chartransform
+import Random
+
+Random.seed!(12345)
 
 @testset "string normalization" begin
     # normalize (Unicode normalization etc.):
@@ -27,8 +30,8 @@ using Unicode: normalize, isassigned, julia_chartransform
     @test normalize("\u0072\u0307\u0323", :NFC) == "\u1E5B\u0307" #26917
 
     # julia_chartransform identifier normalization
-    @test normalize("julia\u025B\u00B5\u00B7\u0387\u2212", chartransform=julia_chartransform) ==
-        "julia\u03B5\u03BC\u22C5\u22C5\u002D"
+    @test normalize("julia\u025B\u00B5\u00B7\u0387\u2212\u210F", chartransform=julia_chartransform) ==
+        "julia\u03B5\u03BC\u22C5\u22C5\u002D\u0127"
     @test julia_chartransform('\u00B5') === '\u03BC'
 end
 
@@ -455,6 +458,9 @@ end
     @test !Base.Unicode.isvalid(Char, overlong_char)
 end
 
+# the obvious, but suboptimal, algorithm:
+isequal_normalized_naive(s1, s2; kws...) = normalize(s1; kws...) == normalize(s2; kws...)
+
 @testset "Unicode equivalence" begin
     @test isequal_normalized("no\u00EBl", "noe\u0308l")
     @test !isequal_normalized("no\u00EBl", "noe\u0308l ")
@@ -466,4 +472,65 @@ end
     @test isequal_normalized("no\u00EBl", "noel", stripmark=true)
     @test isequal_normalized("no\u00EBl", "NOEL", stripmark=true, casefold=true)
     @test isequal_normalized("\u00B5\u0302m", "\u03BC\u0302m", chartransform=julia_chartransform)
+
+    # issue #52408
+    @testset "Sorting combining characters" begin
+        for str in ("\u5bc\u5b0", "j\u5ae\u5bf\u5b2\u5b4") # julia#52408 examples
+            @test isequal_normalized(str, normalize(str))
+        end
+
+        # first codepoint in every possible Unicode combining class
+        let cc_chars = UInt32[0x00000334, 0x00016ff0, 0x0000093c, 0x00003099, 0x0000094d, 0x000005b0, 0x000005b1, 0x000005b2, 0x000005b3, 0x000005b4, 0x000005b5, 0x000005b6, 0x000005b7, 0x000005b8, 0x000005b9, 0x000005bb, 0x000005bc, 0x000005bd, 0x000005bf, 0x000005c1, 0x000005c2, 0x0000fb1e, 0x0000064b, 0x0000064c, 0x0000064d, 0x00000618, 0x00000619, 0x0000061a, 0x00000651, 0x00000652, 0x00000670, 0x00000711, 0x00000c55, 0x00000c56, 0x00000e38, 0x00000e48, 0x00000eb8, 0x00000ec8, 0x00000f71, 0x00000f72, 0x00000f74, 0x00000321, 0x00001dce, 0x0000031b, 0x00001dfa, 0x00000316, 0x0000059a, 0x0000302e, 0x0001d16d, 0x000005ae, 0x00000301, 0x00000315, 0x0000035c, 0x0000035d, 0x00000345],
+            vowels = ['a', 'e', 'i', 'o', 'u', 'å', 'é', 'î', 'ö', 'ü'], Vowels = [vowels; uppercase.(vowels)]
+            function randcc(n, n_cc) # random string with lots of combining chars
+                buf = IOBuffer()
+                for _ = 1:n
+                    print.(buf, rand(Vowels, rand(1:5)))
+                    print.(buf, Char.(rand(cc_chars, rand(0:n_cc))))
+                end
+                return String(take!(buf))
+            end
+            for _ = 1:100
+                s = randcc(10,10)
+                ns = normalize(s)
+                cs = normalize(s, casefold=true)
+                @test isequal_normalized(s, s)
+                if !isequal_normalized(s, ns)
+                    @show s
+                end
+                @test isequal_normalized(s, ns)
+                @test isequal_normalized(cs, ns) == isequal_normalized_naive(cs, ns)
+                @test isequal_normalized(cs, ns, casefold=true) ==
+                      isequal_normalized_naive(cs, ns, casefold=true)
+            end
+            for _ = 1:3
+                s = randcc(5,1000) # exercise sort!-based fallback
+                @test isequal_normalized(s, normalize(s))
+            end
+            function randcc2(n, n_cc) # 2 strings with equivalent reordered combiners
+                buf1 = IOBuffer()
+                buf2 = IOBuffer()
+                p = n_cc / length(cc_chars)
+                for _ = 1:n
+                    a = join(rand(Vowels, rand(1:5)))
+                    print(buf1, a)
+                    print(buf2, a)
+
+                    # chars from distinct combining classes
+                    # are canonically equivalent when re-ordered
+                    c = Random.randsubseq(cc_chars, p)
+                    print.(buf1, Char.(Random.shuffle!(c)))
+                    print.(buf2, Char.(Random.shuffle!(c)))
+                end
+                return String(take!(buf1)), String(take!(buf2))
+            end
+            for _ = 1:100
+                s1, s2 = randcc2(10,10)
+                @test isequal_normalized(s1, s2)
+            end
+        end
+
+        # combining characters in the same class are inequivalent if re-ordered:
+        @test !isequal_normalized("x\u0334\u0335", "x\u0335\u0334")
+    end
 end
diff --git a/stdlib/Zlib_jll/Project.toml b/stdlib/Zlib_jll/Project.toml
index 77e1da5f9c22e..b1fa9576af3ec 100644
--- a/stdlib/Zlib_jll/Project.toml
+++ b/stdlib/Zlib_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "Zlib_jll"
 uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-version = "1.2.12+3"
+version = "1.2.13+1"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/Zlib_jll/src/Zlib_jll.jl b/stdlib/Zlib_jll/src/Zlib_jll.jl
index c05e26c4c6993..ea381b8b0683c 100644
--- a/stdlib/Zlib_jll/src/Zlib_jll.jl
+++ b/stdlib/Zlib_jll/src/Zlib_jll.jl
@@ -13,9 +13,9 @@ export libz
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libz_handle = C_NULL
-libz_path = ""
+artifact_dir::String = ""
+libz_handle::Ptr{Cvoid} = C_NULL
+libz_path::String = ""
 
 if Sys.iswindows()
     const libz = "libz.dll"
diff --git a/stdlib/Zlib_jll/test/runtests.jl b/stdlib/Zlib_jll/test/runtests.jl
index cc9e64188a0aa..f04f9c70a7054 100644
--- a/stdlib/Zlib_jll/test/runtests.jl
+++ b/stdlib/Zlib_jll/test/runtests.jl
@@ -3,5 +3,5 @@
 using Test, Zlib_jll
 
 @testset "Zlib_jll" begin
-    @test VersionNumber(unsafe_string(ccall((:zlibVersion, libz), Cstring, ()))) == v"1.2.12"
+    @test VersionNumber(unsafe_string(ccall((:zlibVersion, libz), Cstring, ()))) == v"1.2.13"
 end
diff --git a/stdlib/dSFMT_jll/Project.toml b/stdlib/dSFMT_jll/Project.toml
index 4e3e80f918f0b..a83775f625987 100644
--- a/stdlib/dSFMT_jll/Project.toml
+++ b/stdlib/dSFMT_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "dSFMT_jll"
 uuid = "05ff407c-b0c1-5878-9df8-858cc2e60c36"
-version = "2.2.4+1"
+version = "2.2.4+4"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/dSFMT_jll/src/dSFMT_jll.jl b/stdlib/dSFMT_jll/src/dSFMT_jll.jl
index f1d6d019faf59..35ada23778a94 100644
--- a/stdlib/dSFMT_jll/src/dSFMT_jll.jl
+++ b/stdlib/dSFMT_jll/src/dSFMT_jll.jl
@@ -14,9 +14,9 @@ export libdSFMT
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libdSFMT_handle = C_NULL
-libdSFMT_path = ""
+artifact_dir::String = ""
+libdSFMT_handle::Ptr{Cvoid} = C_NULL
+libdSFMT_path::String = ""
 
 if Sys.iswindows()
     const libdSFMT = "libdSFMT.dll"
diff --git a/stdlib/libLLVM_jll/Project.toml b/stdlib/libLLVM_jll/Project.toml
index d59cf8a4583ba..92867293c3116 100644
--- a/stdlib/libLLVM_jll/Project.toml
+++ b/stdlib/libLLVM_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "libLLVM_jll"
 uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
-version = "14.0.5+0"
+version = "15.0.7+10"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/libLLVM_jll/src/libLLVM_jll.jl b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
index 331600eab6523..3140dc3989a72 100644
--- a/stdlib/libLLVM_jll/src/libLLVM_jll.jl
+++ b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
@@ -14,16 +14,16 @@ export libLLVM
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libLLVM_handle = C_NULL
-libLLVM_path = ""
+artifact_dir::String = ""
+libLLVM_handle::Ptr{Cvoid} = C_NULL
+libLLVM_path::String = ""
 
 if Sys.iswindows()
-    const libLLVM = "libLLVM-14jl.dll"
+    const libLLVM = "$(Base.libllvm_name).dll"
 elseif Sys.isapple()
     const libLLVM = "@rpath/libLLVM.dylib"
 else
-    const libLLVM = "libLLVM-14jl.so"
+    const libLLVM = "$(Base.libllvm_name).so"
 end
 
 function __init__()
diff --git a/stdlib/libblastrampoline_jll/Project.toml b/stdlib/libblastrampoline_jll/Project.toml
index 9f96421b2089a..6ee704bc7e1c4 100644
--- a/stdlib/libblastrampoline_jll/Project.toml
+++ b/stdlib/libblastrampoline_jll/Project.toml
@@ -1,13 +1,13 @@
 name = "libblastrampoline_jll"
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
-version = "5.1.1+0"
+version = "5.8.0+1"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.8"
+julia = "1.10"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl b/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
index 77882067ed633..49e7932a6b701 100644
--- a/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
+++ b/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
@@ -14,16 +14,17 @@ export libblastrampoline
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libblastrampoline_handle = C_NULL
-libblastrampoline_path = ""
+artifact_dir::String = ""
+libblastrampoline_handle::Ptr{Cvoid} = C_NULL
+libblastrampoline_path::String = ""
 
+# NOTE: keep in sync with `Base.libblas_name` and `Base.liblapack_name`.
 const libblastrampoline = if Sys.iswindows()
-    "libblastrampoline.dll"
+    "libblastrampoline-5.dll"
 elseif Sys.isapple()
-    "@rpath/libblastrampoline.dylib"
+    "@rpath/libblastrampoline.5.dylib"
 else
-    "libblastrampoline.so"
+    "libblastrampoline.so.5"
 end
 
 function __init__()
diff --git a/stdlib/libblastrampoline_jll/test/runtests.jl b/stdlib/libblastrampoline_jll/test/runtests.jl
index 80095e70f0c76..e64fc328771be 100644
--- a/stdlib/libblastrampoline_jll/test/runtests.jl
+++ b/stdlib/libblastrampoline_jll/test/runtests.jl
@@ -3,5 +3,5 @@
 using Test, Libdl, libblastrampoline_jll
 
 @testset "libblastrampoline_jll" begin
-    @test isa(Libdl.dlsym(Libdl.dlopen(:libblastrampoline), :dgemm_64_), Ptr{Nothing})
+    @test isa(Libdl.dlsym(libblastrampoline_jll.libblastrampoline_handle, :dgemm_64_), Ptr{Nothing})
 end
diff --git a/stdlib/nghttp2_jll/Project.toml b/stdlib/nghttp2_jll/Project.toml
index e768d6fc84b96..560a9537a3b21 100644
--- a/stdlib/nghttp2_jll/Project.toml
+++ b/stdlib/nghttp2_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "nghttp2_jll"
 uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-version = "1.47.0+0"
+version = "1.58.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/nghttp2_jll/src/nghttp2_jll.jl b/stdlib/nghttp2_jll/src/nghttp2_jll.jl
index 09af350636943..76e8d3582c402 100644
--- a/stdlib/nghttp2_jll/src/nghttp2_jll.jl
+++ b/stdlib/nghttp2_jll/src/nghttp2_jll.jl
@@ -13,9 +13,9 @@ export libnghttp2
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-libnghttp2_handle = C_NULL
-libnghttp2_path = ""
+artifact_dir::String = ""
+libnghttp2_handle::Ptr{Cvoid} = C_NULL
+libnghttp2_path::String = ""
 
 if Sys.iswindows()
     const libnghttp2 = "libnghttp2-14.dll"
diff --git a/stdlib/nghttp2_jll/test/runtests.jl b/stdlib/nghttp2_jll/test/runtests.jl
index 9a1dcd1c91cc2..8155e4e787fcc 100644
--- a/stdlib/nghttp2_jll/test/runtests.jl
+++ b/stdlib/nghttp2_jll/test/runtests.jl
@@ -11,5 +11,5 @@ end
 
 @testset "nghttp2_jll" begin
     info = unsafe_load(ccall((:nghttp2_version,libnghttp2), Ptr{nghttp2_info}, (Cint,), 0))
-    @test VersionNumber(unsafe_string(info.version_str)) == v"1.47.0"
+    @test VersionNumber(unsafe_string(info.version_str)) == v"1.58.0"
 end
diff --git a/stdlib/p7zip_jll/Project.toml b/stdlib/p7zip_jll/Project.toml
index 4c9bf62ad7ec1..b1bd4bc9e0a1a 100644
--- a/stdlib/p7zip_jll/Project.toml
+++ b/stdlib/p7zip_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "p7zip_jll"
 uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
-version = "17.4.0+0"
+version = "17.4.0+2"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/p7zip_jll/src/p7zip_jll.jl b/stdlib/p7zip_jll/src/p7zip_jll.jl
index 4320003b282f7..01f26de936e78 100644
--- a/stdlib/p7zip_jll/src/p7zip_jll.jl
+++ b/stdlib/p7zip_jll/src/p7zip_jll.jl
@@ -13,8 +13,8 @@ export p7zip
 # These get calculated in __init__()
 const PATH = Ref("")
 const LIBPATH = Ref("")
-artifact_dir = ""
-p7zip_path = ""
+artifact_dir::String = ""
+p7zip_path::String = ""
 if Sys.iswindows()
     const p7zip_exe = "7z.exe"
 else
@@ -69,8 +69,8 @@ end
 
 function init_p7zip_path()
     # Prefer our own bundled p7zip, but if we don't have one, pick it up off of the PATH
-    # If this is an in-tree build, `7z` will live in `bin`.  Otherwise, it'll be in `libexec`
-    for bundled_p7zip_path in (joinpath(Sys.BINDIR, Base.LIBEXECDIR, p7zip_exe),
+    # If this is an in-tree build, `7z` will live in `bindir`.  Otherwise, it'll be in `private_libexecdir`
+    for bundled_p7zip_path in (joinpath(Sys.BINDIR, Base.PRIVATE_LIBEXECDIR, p7zip_exe),
                                joinpath(Sys.BINDIR, p7zip_exe))
         if isfile(bundled_p7zip_path)
             global p7zip_path = abspath(bundled_p7zip_path)
diff --git a/stdlib/stdlib.mk b/stdlib/stdlib.mk
new file mode 100644
index 0000000000000..696b24a8f8bf1
--- /dev/null
+++ b/stdlib/stdlib.mk
@@ -0,0 +1,27 @@
+STDLIBS_WITHIN_SYSIMG := \
+	Artifacts FileWatching Libdl SHA libblastrampoline_jll OpenBLAS_jll Random \
+	LinearAlgebra Sockets
+
+INDEPENDENT_STDLIBS := \
+	ArgTools Base64 CRC32c Dates DelimitedFiles Distributed Downloads Future \
+	InteractiveUtils LazyArtifacts LibGit2 LibCURL Logging Markdown Mmap \
+	NetworkOptions Profile Printf Pkg REPL Serialization SharedArrays SparseArrays \
+	Statistics StyledStrings Tar Test TOML Unicode UUIDs \
+	dSFMT_jll GMP_jll libLLVM_jll LLD_jll LLVMLibUnwind_jll LibUnwind_jll LibUV_jll \
+	LibCURL_jll LibSSH2_jll LibGit2_jll nghttp2_jll  MozillaCACerts_jll MbedTLS_jll \
+	MPFR_jll OpenLibm_jll PCRE2_jll p7zip_jll Zlib_jll
+
+
+STDLIBS := $(STDLIBS_WITHIN_SYSIMG) $(INDEPENDENT_STDLIBS)
+VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
+
+SYSIMG_STDLIB_SRCS =
+define STDLIB_srcs
+$1_SRCS := $$(shell find $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/src -name \*.jl) \
+	$$(wildcard $$(build_prefix)/manifest/$$(VERSDIR)/$1) $$(build_datarootdir)/julia/stdlib/$$(VERSDIR)/$1/Project.toml
+ifneq ($(filter $(1),$(STDLIBS_WITHIN_SYSIMG)),)
+	SYSIMG_STDLIB_SRCS += $$($1_SRCS)
+endif
+endef
+
+$(foreach stdlib,$(STDLIBS),$(eval $(call STDLIB_srcs,$(stdlib))))
diff --git a/sysimage.mk b/sysimage.mk
index 8b7d19926f9da..e6fa54be5f186 100644
--- a/sysimage.mk
+++ b/sysimage.mk
@@ -2,6 +2,7 @@ SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 BUILDDIR := .
 JULIAHOME := $(SRCDIR)
 include $(JULIAHOME)/Make.inc
+include $(JULIAHOME)/stdlib/stdlib.mk
 
 default: sysimg-$(JULIA_BUILD_MODE) # contains either "debug" or "release"
 all: sysimg-release sysimg-debug
@@ -10,7 +11,7 @@ sysimg-bc: $(build_private_libdir)/sys-bc.a
 sysimg-release: $(build_private_libdir)/sys.$(SHLIB_EXT)
 sysimg-debug: $(build_private_libdir)/sys-debug.$(SHLIB_EXT)
 
-VERSDIR := v`cut -d. -f1-2 < $(JULIAHOME)/VERSION`
+VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
 
 $(build_private_libdir)/%.$(SHLIB_EXT): $(build_private_libdir)/%-o.a
 	@$(call PRINT_LINK, $(CXX) $(LDFLAGS) -shared $(fPIC) -L$(build_private_libdir) -L$(build_libdir) -L$(build_shlibdir) -o $@ \
@@ -20,62 +21,71 @@ $(build_private_libdir)/%.$(SHLIB_EXT): $(build_private_libdir)/%-o.a
 	@$(INSTALL_NAME_CMD)$(notdir $@) $@
 	@$(DSYMUTIL) $@
 
-BASE_DIR := $(build_datarootdir)/julia/src
-COMPILER_SRCS := $(addprefix $(BASE_DIR)/base/, \
-		boot.jl \
-		docs/core.jl \
-		abstractarray.jl \
-		abstractdict.jl \
-		array.jl \
-		bitarray.jl \
-		bitset.jl \
-		bool.jl \
-		ctypes.jl \
-		error.jl \
-		essentials.jl \
-		expr.jl \
-		generator.jl \
-		int.jl \
-		indices.jl \
-		iterators.jl \
-		namedtuple.jl \
-		number.jl \
-		operators.jl \
-		options.jl \
-		pair.jl \
-		pointer.jl \
-		promotion.jl \
-		range.jl \
-		reflection.jl \
-		traits.jl \
-		refvalue.jl \
-		tuple.jl)
-COMPILER_SRCS += $(shell find $(BASE_DIR)/base/compiler -name \*.jl)
-BASE_SRCS := $(shell find $(BASE_DIR) -name \*.jl -and -not -name sysimg.jl)
-STDLIB_SRCS := $(BASE_DIR)/base/sysimg.jl $(shell find $(build_datarootdir)/julia/stdlib/$(VERSDIR)/*/src -name \*.jl)
+COMPILER_SRCS := $(addprefix $(JULIAHOME)/, \
+		base/boot.jl \
+		base/docs/core.jl \
+		base/abstractarray.jl \
+		base/abstractdict.jl \
+		base/abstractset.jl \
+		base/iddict.jl \
+		base/idset.jl \
+		base/array.jl \
+		base/bitarray.jl \
+		base/bitset.jl \
+		base/bool.jl \
+		base/ctypes.jl \
+		base/error.jl \
+		base/essentials.jl \
+		base/expr.jl \
+		base/generator.jl \
+		base/int.jl \
+		base/indices.jl \
+		base/iterators.jl \
+		base/namedtuple.jl \
+		base/number.jl \
+		base/operators.jl \
+		base/options.jl \
+		base/pair.jl \
+		base/pointer.jl \
+		base/promotion.jl \
+		base/range.jl \
+		base/reflection.jl \
+		base/traits.jl \
+		base/refvalue.jl \
+		base/tuple.jl)
+COMPILER_SRCS += $(shell find $(JULIAHOME)/base/compiler -name \*.jl)
+# sort these to remove duplicates
+BASE_SRCS := $(sort $(shell find $(JULIAHOME)/base -name \*.jl -and -not -name sysimg.jl) \
+                    $(shell find $(BUILDROOT)/base -name \*.jl  -and -not -name sysimg.jl))
+STDLIB_SRCS := $(JULIAHOME)/base/sysimg.jl $(SYSIMG_STDLIB_SRCS)
+RELBUILDROOT := $(call rel_path,$(JULIAHOME)/base,$(BUILDROOT)/base)/ # <-- make sure this always has a trailing slash
 
 $(build_private_libdir)/corecompiler.ji: $(COMPILER_SRCS)
-	@$(call PRINT_JULIA, cd $(BASE_DIR)/base && \
-	$(call spawn,$(JULIA_EXECUTABLE)) -C "$(JULIA_CPU_TARGET)" --output-ji $(call cygpath_w,$@).tmp \
+	@$(call PRINT_JULIA, cd $(JULIAHOME)/base && \
+	$(call spawn,$(JULIA_EXECUTABLE)) -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp \
 		--startup-file=no --warn-overwrite=yes -g$(BOOTSTRAP_DEBUG_LEVEL) -O0 compiler/compiler.jl)
 	@mv $@.tmp $@
 
 $(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAHOME)/VERSION $(BASE_SRCS) $(STDLIB_SRCS)
-	@$(call PRINT_JULIA, cd $(BASE_DIR)/base && \
+	@$(call PRINT_JULIA, cd $(JULIAHOME)/base && \
 	if ! JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
-			$(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \
-			--startup-file=no --warn-overwrite=yes --sysimage $(call cygpath_w,$<) sysimg.jl; then \
+			$(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \
+			--startup-file=no --warn-overwrite=yes --sysimage $(call cygpath_w,$<) sysimg.jl $(RELBUILDROOT); then \
 		echo '*** This error might be fixed by running `make clean`. If the error persists$(COMMA) try `make cleanall`. ***'; \
 		false; \
 	fi )
 	@mv $@.tmp $@
 
 define sysimg_builder
-$$(build_private_libdir)/sys$1-o.a $$(build_private_libdir)/sys$1-bc.a : $$(build_private_libdir)/sys$1-%.a : $$(build_private_libdir)/sys.ji
-	@$$(call PRINT_JULIA, cd $$(BASE_DIR)/base && \
-	if ! JULIA_BINDIR=$$(call cygpath_w,$(build_bindir)) WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \
-			JULIA_NUM_THREADS=1 \
-			$$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \
+$$(build_private_libdir)/sys$1-o.a $$(build_private_libdir)/sys$1-bc.a : $$(build_private_libdir)/sys$1-%.a : $$(build_private_libdir)/sys.ji $$(JULIAHOME)/contrib/generate_precompile.jl
+	@$$(call PRINT_JULIA, cd $$(JULIAHOME)/base && \
+	if ! JULIA_BINDIR=$$(call cygpath_w,$(build_bindir)) \
+		 WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \
+		 JULIA_LOAD_PATH='@stdlib' \
+		 JULIA_PROJECT= \
+		 JULIA_DEPOT_PATH=':' \
+		 JULIA_NUM_THREADS=1 \
+			$$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" $$(HEAPLIM) --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \
 			--startup-file=no --warn-overwrite=yes --sysimage $$(call cygpath_w,$$<) $$(call cygpath_w,$$(JULIAHOME)/contrib/generate_precompile.jl) $(JULIA_PRECOMPILE); then \
 		echo '*** This error is usually fixed by running `make clean`. If the error persists$$(COMMA) try `make cleanall`. ***'; \
 		false; \
diff --git a/test/.gitignore b/test/.gitignore
index a1af9ae3d44bf..fc55a0df3a173 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -2,3 +2,5 @@
 /ccalltest
 /ccalltest.s
 /libccalltest.*
+/relocatedepot
+/RelocationTestPkg2/src/foo.txt
diff --git a/test/Makefile b/test/Makefile
index 24e137a5b1492..c3b877793f4b5 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -6,10 +6,15 @@ VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
 STDLIBDIR := $(build_datarootdir)/julia/stdlib/$(VERSDIR)
 # TODO: this Makefile ignores BUILDDIR, except for computing JULIA_EXECUTABLE
 
+export JULIA_DEPOT_PATH := $(build_prefix)/share/julia
+export JULIA_LOAD_PATH := @$(PATHSEP)@stdlib
+unexport JULIA_PROJECT :=
+unexport JULIA_BINDIR :=
+
 TESTGROUPS = unicode strings compiler
 TESTS = all default stdlib $(TESTGROUPS) \
 		$(patsubst $(STDLIBDIR)/%/,%,$(dir $(wildcard $(STDLIBDIR)/*/.))) \
-		$(filter-out runtests testdefs, \
+		$(filter-out runtests testdefs relocatedepot, \
 			$(patsubst $(SRCDIR)/%.jl,%,$(wildcard $(SRCDIR)/*.jl))) \
 		$(foreach group,$(TESTGROUPS), \
 			$(patsubst $(SRCDIR)/%.jl,%,$(wildcard $(SRCDIR)/$(group)/*.jl)))
@@ -29,6 +34,28 @@ $(addprefix revise-, $(TESTS)): revise-% :
 	@cd $(SRCDIR) && \
     $(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*)
 
+relocatedepot:
+	@rm -rf $(SRCDIR)/relocatedepot
+	@cd $(SRCDIR) && \
+	$(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@)
+	@mkdir $(SRCDIR)/relocatedepot
+	@cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg1 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg2 $(SRCDIR)/relocatedepot
+	@cd $(SRCDIR) && \
+	$(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading RELOCATEDEPOT="" JULIA_DEPOT_PATH=$(SRCDIR)/relocatedepot/julia $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@)
+
+revise-relocatedepot: revise-% :
+	@rm -rf $(SRCDIR)/relocatedepot
+	@cd $(SRCDIR) && \
+	$(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*)
+	@mkdir $(SRCDIR)/relocatedepot
+	@cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg1 $(SRCDIR)/relocatedepot
+	@cp -R $(SRCDIR)/RelocationTestPkg2 $(SRCDIR)/relocatedepot
+	@cd $(SRCDIR) && \
+	$(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading RELOCATEDEPOT="" JULIA_DEPOT_PATH=$(SRCDIR)/relocatedepot/julia $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*)
+
 embedding:
 	@$(MAKE) -C $(SRCDIR)/$@ check $(EMBEDDING_ARGS)
 
@@ -42,4 +69,4 @@ clean:
 	@$(MAKE) -C embedding $@ $(EMBEDDING_ARGS)
 	@$(MAKE) -C gcext $@ $(GCEXT_ARGS)
 
-.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) embedding gcext clangsa clean
+.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) relocatedepot revise-relocatedepot embedding gcext clangsa clean
diff --git a/test/RelocationTestPkg1/Project.toml b/test/RelocationTestPkg1/Project.toml
new file mode 100644
index 0000000000000..826980207d508
--- /dev/null
+++ b/test/RelocationTestPkg1/Project.toml
@@ -0,0 +1,4 @@
+name = "RelocationTestPkg1"
+uuid = "854e1adb-5a97-46bf-a391-1cfe05ac726d"
+authors = ["flo "]
+version = "0.1.0"
diff --git a/test/RelocationTestPkg1/src/RelocationTestPkg1.jl b/test/RelocationTestPkg1/src/RelocationTestPkg1.jl
new file mode 100644
index 0000000000000..a86543a61b3f8
--- /dev/null
+++ b/test/RelocationTestPkg1/src/RelocationTestPkg1.jl
@@ -0,0 +1,5 @@
+module RelocationTestPkg1
+
+greet() = print("Hello World!")
+
+end # module RelocationTestPkg1
diff --git a/stdlib/TOML/test/testfiles/valid/empty.toml b/test/RelocationTestPkg1/src/foo.txt
similarity index 100%
rename from stdlib/TOML/test/testfiles/valid/empty.toml
rename to test/RelocationTestPkg1/src/foo.txt
diff --git a/test/RelocationTestPkg2/Project.toml b/test/RelocationTestPkg2/Project.toml
new file mode 100644
index 0000000000000..68da889785215
--- /dev/null
+++ b/test/RelocationTestPkg2/Project.toml
@@ -0,0 +1,4 @@
+name = "RelocationTestPkg2"
+uuid = "8d933983-b090-4b0b-a37e-c34793f459d1"
+authors = ["flo "]
+version = "0.1.0"
diff --git a/test/RelocationTestPkg2/src/RelocationTestPkg2.jl b/test/RelocationTestPkg2/src/RelocationTestPkg2.jl
new file mode 100644
index 0000000000000..0d8b5e15edf06
--- /dev/null
+++ b/test/RelocationTestPkg2/src/RelocationTestPkg2.jl
@@ -0,0 +1,6 @@
+module RelocationTestPkg2
+
+include_dependency("foo.txt")
+greet() = print("Hello World!")
+
+end # module RelocationTestPkg2
diff --git a/test/RelocationTestPkg2/src/foo.txt b/test/RelocationTestPkg2/src/foo.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test/abstractarray.jl b/test/abstractarray.jl
index 111e2cabbe7c2..8762440a2f25a 100644
--- a/test/abstractarray.jl
+++ b/test/abstractarray.jl
@@ -2,6 +2,9 @@
 
 using Random, LinearAlgebra
 
+isdefined(Main, :InfiniteArrays) || @eval Main include("testhelpers/InfiniteArrays.jl")
+using .Main.InfiniteArrays
+
 A = rand(5,4,3)
 @testset "Bounds checking" begin
     @test checkbounds(Bool, A, 1, 1, 1) == true
@@ -56,6 +59,20 @@ end
     @test checkbounds(Bool, A, CartesianIndex((5,)), CartesianIndex((4,)), CartesianIndex((4,)))  == false
 end
 
+@testset "Infinite axes" begin
+    r = OneToInf()
+    @testset "CartesianIndices" begin
+        C = CartesianIndices(size(r))
+        ax = to_indices(r, (C,))[1]
+        @test ax === r
+    end
+    @testset "LinearIndices" begin
+        L = LinearIndices(size(r))
+        ax = to_indices(r, (L,))[1]
+        @test ax === L
+    end
+end
+
 @testset "vector indices" begin
     @test checkbounds(Bool, A, 1:5, 1:4, 1:3) == true
     @test checkbounds(Bool, A, 0:5, 1:4, 1:3) == false
@@ -236,6 +253,19 @@ end
     end
 end
 
+@testset "AbstractArray fallbacks for CartesianIndices" begin
+    @test ndims(CartesianIndices{3}) == 3
+    @test eltype(CartesianIndices{3}) == CartesianIndex{3}
+    for t in ((1:2, 1:2), (3:4,), ())
+        C2 = CartesianIndices(t)
+        @test ndims(C2) == length(t)
+        @test ndims(typeof(C2)) == length(t)
+        @test IndexStyle(C2) == IndexCartesian()
+        @test eltype(C2) == CartesianIndex{length(t)}
+        @test Base.IteratorSize(C2) isa Base.HasShape{length(t)}
+    end
+end
+
 @testset "LinearIndices" begin
     @testset "constructors" begin
         for oinds in [
@@ -520,9 +550,6 @@ function test_primitives(::Type{T}, shape, ::Type{TestAbstractArray}) where T
     @test convert(Matrix, Y) == Y
     @test convert(Matrix, view(Y, 1:2, 1:2)) == Y
     @test_throws MethodError convert(Matrix, X)
-
-    # convert(::Type{Union{}}, A::AbstractMatrix)
-    @test_throws MethodError convert(Union{}, X)
 end
 
 mutable struct TestThrowNoGetindex{T} <: AbstractVector{T} end
@@ -673,8 +700,8 @@ function test_cat(::Type{TestAbstractArray})
     @test hcat() == Any[]
     @test vcat(1, 1.0, 3, 3.0) == [1.0, 1.0, 3.0, 3.0]
     @test hcat(1, 1.0, 3, 3.0) == [1.0 1.0 3.0 3.0]
-    @test_throws ArgumentError hcat(B1, B2)
-    @test_throws ArgumentError vcat(C1, C2)
+    @test_throws DimensionMismatch hcat(B1, B2)
+    @test_throws DimensionMismatch vcat(C1, C2)
 
     @test vcat(B) == B
     @test hcat(B) == B
@@ -685,6 +712,14 @@ function test_cat(::Type{TestAbstractArray})
     @test Base.typed_hcat(Float64, B, B) == TSlow(b2hcat)
     @test Base.typed_hcat(Float64, B, B, B) == TSlow(b3hcat)
 
+    @testset "issue #49676, bad error message on v[1 +1]" begin
+        # This is here because all these expressions are handled by Base.typed_hcat
+        v = [1 2 3]
+        @test_throws ArgumentError v[1 +1]
+        @test_throws ArgumentError v[1 1]
+        @test_throws ArgumentError v[[1 2] [2 3]]
+    end
+
     @test vcat(B1, B2) == TSlow(vcat([1:24...], [1:25...]))
     @test hcat(C1, C2) == TSlow([1 2 1 2 3; 3 4 4 5 6])
     @test hcat(C1, C2, C1) == TSlow([1 2 1 2 3 1 2; 3 4 4 5 6 3 4])
@@ -695,9 +730,9 @@ function test_cat(::Type{TestAbstractArray})
     end
 
     @test_throws ArgumentError hvcat(7, 1:20...)
-    @test_throws ArgumentError hvcat((2), C1, C3)
-    @test_throws ArgumentError hvcat((1), C1, C2)
-    @test_throws ArgumentError hvcat((1), C2, C3)
+    @test_throws DimensionMismatch hvcat((2), C1, C3)
+    @test_throws DimensionMismatch hvcat((1), C1, C2)
+    @test_throws DimensionMismatch hvcat((1), C2, C3)
 
     tup = tuple(rand(1:10, i)...)
     @test hvcat(tup) == []
@@ -706,8 +741,8 @@ function test_cat(::Type{TestAbstractArray})
     @test_throws ArgumentError hvcat((2, 2), 1, 2, 3, 4, 5)
     @test_throws ArgumentError Base.typed_hvcat(Int, (2, 2), 1, 2, 3, 4, 5)
     # check for # of columns mismatch b/w rows
-    @test_throws ArgumentError hvcat((3, 2), 1, 2, 3, 4, 5, 6)
-    @test_throws ArgumentError Base.typed_hvcat(Int, (3, 2), 1, 2, 3, 4, 5, 6)
+    @test_throws DimensionMismatch hvcat((3, 2), 1, 2, 3, 4, 5, 6)
+    @test_throws DimensionMismatch Base.typed_hvcat(Int, (3, 2), 1, 2, 3, 4, 5, 6)
 
     # 18395
     @test isa(Any["a" 5; 2//3 1.0][2,1], Rational{Int})
@@ -993,9 +1028,9 @@ end
     end
 
     i = CartesianIndex(17,-2)
-    @test CR .+ i === i .+ CR === CartesianIndices((19:21, -1:3))
-    @test CR .- i === CartesianIndices((-15:-13, 3:7))
-    @test collect(i .- CR) == Ref(i) .- collect(CR)
+    @test CR .+ i === i .+ CR === CartesianIndices((19:21, -1:3)) == collect(CR) .+ i
+    @test CR .- i === CartesianIndices((-15:-13, 3:7)) == collect(CR) .- i
+    @test collect(i .- CR) == Ref(i) .- collect(CR) == i .- collect(CR)
 end
 
 @testset "issue #25770" begin
@@ -1044,6 +1079,7 @@ end
 @testset "IndexStyle for various types" begin
     @test Base.IndexStyle(UpperTriangular) == IndexCartesian() # subtype of AbstractArray, not of Array
     @test Base.IndexStyle(Vector) == IndexLinear()
+    @test Base.IndexStyle(Memory) == IndexLinear()
     @test Base.IndexStyle(UnitRange) == IndexLinear()
     @test Base.IndexStyle(UpperTriangular(rand(3, 3)), [1; 2; 3]) == IndexCartesian()
     @test Base.IndexStyle(UpperTriangular(rand(3, 3)), rand(3, 3), [1; 2; 3]) == IndexCartesian()
@@ -1073,23 +1109,23 @@ end
 @testset "sizeof" begin
     let arrUInt8 = zeros(UInt8, 10)
         @test sizeof(arrUInt8) == 10
-        @test Core.sizeof(arrUInt8) == 10
+        @test Core.sizeof(arrUInt8) == 3 * sizeof(Int)
     end
 
     let arrUInt32 = zeros(UInt32, 10)
         @test sizeof(arrUInt32) == 40
-        @test Core.sizeof(arrUInt32) == 40
+        @test Core.sizeof(arrUInt32) == 3 * sizeof(Int)
     end
 
     let arrFloat64 = zeros(Float64, 10, 10)
         @test sizeof(arrFloat64) == 800
-        @test Core.sizeof(arrFloat64) == 800
+        @test Core.sizeof(arrFloat64) == 4 * sizeof(Int)
     end
 
     # Test union arrays (Issue #23321)
     let arrUnion = Union{Int64, Cvoid}[rand(Bool) ? k : nothing for k = 1:10]
         @test sizeof(arrUnion) == 80
-        @test Core.sizeof(arrUnion) == 80
+        @test Core.sizeof(arrUnion) == 3 * sizeof(Int)
     end
 
     # Test non-power of 2 types (Issue #35884)
@@ -1103,7 +1139,7 @@ end
     let arrayOfUInt48 = [a, b, c]
         f35884(x) = sizeof(x)
         @test f35884(arrayOfUInt48) == 24
-        @test Core.sizeof(arrayOfUInt48) == 24
+        @test Core.sizeof(arrayOfUInt48) == 3 * sizeof(Int)
     end
 end
 
@@ -1129,7 +1165,7 @@ function Base.getindex(S::Strider{<:Any,N}, I::Vararg{Int,N}) where {N}
 end
 Base.strides(S::Strider) = S.strides
 Base.elsize(::Type{<:Strider{T}}) where {T} = Base.elsize(Vector{T})
-Base.unsafe_convert(::Type{Ptr{T}}, S::Strider{T}) where {T} = pointer(S.data, S.offset)
+Base.cconvert(::Type{Ptr{T}}, S::Strider{T}) where {T} = MemoryRef(S.data.ref, S.offset)
 
 @testset "Simple 3d strided views and permutes" for sz in ((5, 3, 2), (7, 11, 13))
     A = collect(reshape(1:prod(sz), sz))
@@ -1160,8 +1196,9 @@ Base.unsafe_convert(::Type{Ptr{T}}, S::Strider{T}) where {T} = pointer(S.data, S
             Ps = Strider{Int, 3}(vec(A), 1, strides(A)[collect(perm)], sz[collect(perm)])
             @test pointer(Ap) == pointer(Sp) == pointer(Ps)
             for i in 1:length(Ap)
-                # This is intentionally disabled due to ambiguity
-                @test_broken pointer(Ap, i) == pointer(Sp, i) == pointer(Ps, i)
+                # This is intentionally disabled due to ambiguity. See `Base.pointer(A::PermutedDimsArray, i::Integer)`.
+                # But only evaluate one iteration as broken to reduce test report noise
+                i == 1 && @test_broken pointer(Ap, i) == pointer(Sp, i) == pointer(Ps, i)
                 @test P[i] == Ap[i] == Sp[i] == Ps[i]
             end
             Pv = view(P, idxs[collect(perm)]...)
@@ -1180,8 +1217,9 @@ Base.unsafe_convert(::Type{Ptr{T}}, S::Strider{T}) where {T} = pointer(S.data, S
             Svp = Base.PermutedDimsArray(Sv, perm)
             @test pointer(Avp) == pointer(Svp)
             for i in 1:length(Avp)
-                # This is intentionally disabled due to ambiguity
-                @test_broken pointer(Avp, i) == pointer(Svp, i)
+                # This is intentionally disabled due to ambiguity. See `Base.pointer(A::PermutedDimsArray, i::Integer)`
+                # But only evaluate one iteration as broken to reduce test report noise
+                i == 1 && @test_broken pointer(Avp, i) == pointer(Svp, i)
                 @test Ip[i] == Vp[i] == Avp[i] == Svp[i]
             end
         end
@@ -1220,8 +1258,9 @@ end
         Ps = Strider{Int, 2}(vec(A), 1, strides(A)[collect(perm)], sz[collect(perm)])
         @test pointer(Ap) == pointer(Sp) == pointer(Ps) == pointer(At) == pointer(Aa)
         for i in 1:length(Ap)
-            # This is intentionally disabled due to ambiguity
-            @test_broken pointer(Ap, i) == pointer(Sp, i) == pointer(Ps, i) == pointer(At, i) == pointer(Aa, i) == pointer(St, i) == pointer(Sa, i)
+            # This is intentionally disabled due to ambiguity. See `Base.pointer(A::PermutedDimsArray, i::Integer)`
+            # But only evaluate one iteration as broken to reduce test report noise
+            i == 1 && @test_broken pointer(Ap, i) == pointer(Sp, i) == pointer(Ps, i) == pointer(At, i) == pointer(Aa, i) == pointer(St, i) == pointer(Sa, i)
             @test pointer(Ps, i) == pointer(At, i) == pointer(Aa, i) == pointer(St, i) == pointer(Sa, i)
             @test P[i] == Ap[i] == Sp[i] == Ps[i] == At[i] == Aa[i] == St[i] == Sa[i]
         end
@@ -1247,8 +1286,9 @@ end
         Svp = Base.PermutedDimsArray(Sv, perm)
         @test pointer(Avp) == pointer(Svp) == pointer(Avt) == pointer(Ava)
         for i in 1:length(Avp)
-            # This is intentionally disabled due to ambiguity
-            @test_broken pointer(Avp, i) == pointer(Svp, i) == pointer(Avt, i) == pointer(Ava, i) == pointer(Svt, i) == pointer(Sva, i)
+            # This is intentionally disabled due to ambiguity. See `Base.pointer(A::PermutedDimsArray, i::Integer)`
+            # But only evaluate one iteration as broken to reduce test report noise
+            i == 1 && @test_broken pointer(Avp, i) == pointer(Svp, i) == pointer(Avt, i) == pointer(Ava, i) == pointer(Svt, i) == pointer(Sva, i)
             @test pointer(Avt, i) == pointer(Ava, i) == pointer(Svt, i) == pointer(Sva, i)
             @test Vp[i] == Avp[i] == Svp[i] == Avt[i] == Ava[i] == Svt[i] == Sva[i]
         end
@@ -1268,6 +1308,13 @@ end
     @test last(itr, 25) !== itr
     @test last(itr, 1) == [itr[end]]
     @test_throws ArgumentError last(itr, -6)
+
+    @testset "overflow (issue #45842)" begin
+        @test_throws OverflowError first(typemin(Int):typemax(Int), 10)
+        @test first(2:typemax(Int)-1, typemax(Int)÷2) === 2:((typemax(Int)÷2) + 1)
+        @test last(2:typemax(Int), typemax(Int)÷2) ===
+            range(stop=typemax(Int), length=typemax(Int)÷2)
+    end
 end
 
 @testset "Base.rest" begin
@@ -1307,6 +1354,28 @@ Base.pushfirst!(tpa::TestPushArray{T}, a::T) where T = pushfirst!(tpa.data, a)
     @test tpa.data == reverse(collect(1:6))
 end
 
+mutable struct SimpleArray{T} <: AbstractVector{T}
+    els::Vector{T}
+end
+Base.size(sa::SimpleArray) = size(sa.els)
+Base.getindex(sa::SimpleArray, idx...) = getindex(sa.els, idx...)
+Base.setindex!(sa::SimpleArray, v, idx...) = setindex!(sa.els, v, idx...)
+Base.resize!(sa::SimpleArray, n) = resize!(sa.els, n)
+Base.copy(sa::SimpleArray) = SimpleArray(copy(sa.els))
+
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays
+
+@testset "Failing `$f` should not grow the array $a" for
+        f in (push!, append!, pushfirst!, prepend!),
+        a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1))
+    for args in ((1,), (1,2), ([1], [2]), [1])
+        orig = copy(a)
+        @test_throws Exception f(a, args...)
+        @test a == orig
+    end
+end
+
 @testset "splatting into hvcat" begin
     t = (1, 2)
     @test [t...; 3 4] == [1 2; 3 4]
@@ -1315,7 +1384,7 @@ end
 
     @test Int[t...; 3 4] == [1 2; 3 4]
     @test Int[0 t...; t... 0] == [0 1 2; 1 2 0]
-    @test_throws ArgumentError Int[t...; 3 4 5]
+    @test_throws DimensionMismatch Int[t...; 3 4 5]
 end
 
 @testset "issue #39896, modified getindex " begin
@@ -1369,15 +1438,15 @@ using Base: typed_hvncat
     @test [1;;] == fill(1, (1,1))
 
     for v in (1, fill(1), fill(1,1,1), fill(1, 1, 1, 1))
-        @test_throws ArgumentError [v; v;; v]
-        @test_throws ArgumentError [v; v;; v; v; v]
-        @test_throws ArgumentError [v; v; v;; v; v]
-        @test_throws ArgumentError [v; v;; v; v;;; v; v;; v; v;; v; v]
-        @test_throws ArgumentError [v; v;; v; v;;; v; v]
-        @test_throws ArgumentError [v; v;; v; v;;; v; v; v;; v; v]
-        @test_throws ArgumentError [v; v;; v; v;;; v; v;; v; v; v]
+        @test_throws DimensionMismatch [v; v;; v]
+        @test_throws DimensionMismatch [v; v;; v; v; v]
+        @test_throws DimensionMismatch [v; v; v;; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v;; v; v;; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v; v;; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v;; v; v; v]
         # ensure a wrong shape with the right number of elements doesn't pass through
-        @test_throws ArgumentError [v; v;; v; v;;; v; v; v; v]
+        @test_throws DimensionMismatch [v; v;; v; v;;; v; v; v; v]
 
         @test [v; v;; v; v] == fill(1, ndims(v) == 3 ? (2, 2, 1) : (2,2))
         @test [v; v;; v; v;;;] == fill(1, 2, 2, 1)
@@ -1445,7 +1514,7 @@ using Base: typed_hvncat
     end
 
     # reject shapes that don't nest evenly between levels (e.g. 1 + 2 does not fit into 2)
-    @test_throws ArgumentError hvncat(((1, 2, 1), (2, 2), (4,)), true, [1 2], [3], [4], [1 2; 3 4])
+    @test_throws DimensionMismatch hvncat(((1, 2, 1), (2, 2), (4,)), true, [1 2], [3], [4], [1 2; 3 4])
 
     # zero-length arrays are handled appropriately
     @test [zeros(Int, 1, 2, 0) ;;; 1 3] == [1 3;;;]
@@ -1460,18 +1529,18 @@ using Base: typed_hvncat
     for v1 ∈ (zeros(Int, 0, 0), zeros(Int, 0, 0, 0, 0), zeros(Int, 0, 0, 0, 0, 0, 0, 0))
         for v2 ∈ (1, [1])
             for v3 ∈ (2, [2])
-                @test_throws ArgumentError [v1 ;;; v2]
-                @test_throws ArgumentError [v1 ;;; v2 v3]
-                @test_throws ArgumentError [v1 v1 ;;; v2 v3]
+                @test_throws DimensionMismatch [v1 ;;; v2]
+                @test_throws DimensionMismatch [v1 ;;; v2 v3]
+                @test_throws DimensionMismatch [v1 v1 ;;; v2 v3]
             end
         end
     end
     v1 = zeros(Int, 0, 0, 0)
     for v2 ∈ (1, [1])
         for v3 ∈ (2, [2])
-            @test_throws ArgumentError [v1 ;;; v2 v3]
-            @test_throws ArgumentError [v1 ;;; v2]
-            @test_throws ArgumentError [v1 v1 ;;; v2 v3]
+            @test_throws DimensionMismatch [v1 ;;; v2 v3]
+            @test_throws DimensionMismatch [v1 ;;; v2]
+            @test_throws DimensionMismatch [v1 v1 ;;; v2 v3]
         end
     end
 
@@ -1539,10 +1608,144 @@ using Base: typed_hvncat
     @test Array{Int, 3}(undef, 0, 0, 0) == typed_hvncat(Int, 3) isa Array{Int, 3}
 
     # Issue 43933 - semicolon precedence mistake should produce an error
-    @test_throws ArgumentError [[1 1]; 2 ;; 3 ; [3 4]]
-    @test_throws ArgumentError [[1 ;;; 1]; 2 ;;; 3 ; [3 ;;; 4]]
+    @test_throws DimensionMismatch [[1 1]; 2 ;; 3 ; [3 4]]
+    @test_throws DimensionMismatch [[1 ;;; 1]; 2 ;;; 3 ; [3 ;;; 4]]
 
     @test [[1 2; 3 4] [5; 6]; [7 8] 9;;;] == [1 2 5; 3 4 6; 7 8 9;;;]
+
+    #45461, #46133 - ensure non-numeric types do not error
+    @test [1;;; 2;;; nothing;;; 4] == reshape([1; 2; nothing; 4], (1, 1, 4))
+    @test [1 2;;; nothing 4] == reshape([1; 2; nothing; 4], (1, 2, 2))
+    @test [[1 2];;; nothing 4] == reshape([1; 2; nothing; 4], (1, 2, 2))
+    @test ["A";;"B";;"C";;"D"] == ["A" "B" "C" "D"]
+    @test ["A";"B";;"C";"D"] == ["A" "C"; "B" "D"]
+    @test [["A";"B"];;"C";"D"] == ["A" "C"; "B" "D"]
+end
+
+@testset "stack" begin
+    # Basics
+    for args in ([[1, 2]], [1:2, 3:4], [[1 2; 3 4], [5 6; 7 8]],
+                AbstractVector[1:2, [3.5, 4.5]], Vector[[1,2], [3im, 4im]],
+                [[1:2, 3:4], [5:6, 7:8]], [fill(1), fill(2)])
+        X = stack(args)
+        Y = cat(args...; dims=ndims(args[1])+1)
+        @test X == Y
+        @test typeof(X) === typeof(Y)
+
+        X2 = stack(x for x in args)
+        @test X2 == Y
+        @test typeof(X2) === typeof(Y)
+
+        X3 = stack(x for x in args if true)
+        @test X3 == Y
+        @test typeof(X3) === typeof(Y)
+
+        if isconcretetype(eltype(args))
+            @inferred stack(args)
+            @inferred stack(x for x in args)
+        end
+    end
+
+    # Higher dims
+    @test size(stack([rand(2,3) for _ in 1:4, _ in 1:5])) == (2,3,4,5)
+    @test size(stack(rand(2,3) for _ in 1:4, _ in 1:5)) == (2,3,4,5)
+    @test size(stack(rand(2,3) for _ in 1:4, _ in 1:5 if true)) == (2, 3, 20)
+    @test size(stack([rand(2,3) for _ in 1:4, _ in 1:5]; dims=1)) == (20, 2, 3)
+    @test size(stack(rand(2,3) for _ in 1:4, _ in 1:5; dims=2)) == (2, 20, 3)
+
+    # Tuples
+    @test stack([(1,2), (3,4)]) == [1 3; 2 4]
+    @test stack(((1,2), (3,4))) == [1 3; 2 4]
+    @test stack(Any[(1,2), (3,4)]) == [1 3; 2 4]
+    @test stack([(1,2), (3,4)]; dims=1) == [1 2; 3 4]
+    @test stack(((1,2), (3,4)); dims=1) == [1 2; 3 4]
+    @test stack(Any[(1,2), (3,4)]; dims=1) == [1 2; 3 4]
+    @test size(@inferred stack(Iterators.product(1:3, 1:4))) == (2,3,4)
+    @test @inferred(stack([('a', 'b'), ('c', 'd')])) == ['a' 'c'; 'b' 'd']
+    @test @inferred(stack([(1,2+3im), (4, 5+6im)])) isa Matrix{Number}
+
+    # stack(f, iter)
+    @test @inferred(stack(x -> [x, 2x], 3:5)) == [3 4 5; 6 8 10]
+    @test @inferred(stack(x -> x*x'/2, [1:2, 3:4])) == [0.5 1.0; 1.0 2.0;;; 4.5 6.0; 6.0 8.0]
+    @test @inferred(stack(*, [1:2, 3:4], 5:6)) == [5 18; 10 24]
+
+    # Iterators
+    @test stack([(a=1,b=2), (a=3,b=4)]) == [1 3; 2 4]
+    @test stack([(a=1,b=2), (c=3,d=4)]) == [1 3; 2 4]
+    @test stack([(a=1,b=2), (c=3,d=4)]; dims=1) == [1 2; 3 4]
+    @test stack([(a=1,b=2), (c=3,d=4)]; dims=2) == [1 3; 2 4]
+    @test stack((x/y for x in 1:3) for y in 4:5) == (1:3) ./ (4:5)'
+    @test stack((x/y for x in 1:3) for y in 4:5; dims=1) == (1:3)' ./ (4:5)
+
+    # Exotic
+    ips = ((Iterators.product([i,i^2], [2i,3i,4i], 1:4)) for i in 1:5)
+    @test size(stack(ips)) == (2, 3, 4, 5)
+    @test stack(ips) == cat(collect.(ips)...; dims=4)
+    ips_cat2 = cat(reshape.(collect.(ips), Ref((2,1,3,4)))...; dims=2)
+    @test stack(ips; dims=2) == ips_cat2
+    @test stack(collect.(ips); dims=2) == ips_cat2
+    ips_cat3 = cat(reshape.(collect.(ips), Ref((2,3,1,4)))...; dims=3)
+    @test stack(ips; dims=3) == ips_cat3  # path for non-array accumulation on non-final dims
+    @test stack(collect, ips; dims=3) == ips_cat3  # ... and for array accumulation
+    @test stack(collect.(ips); dims=3) == ips_cat3
+
+    # Trivial, because numbers are iterable:
+    @test stack(abs2, 1:3) == [1, 4, 9] == collect(Iterators.flatten(abs2(x) for x in 1:3))
+
+    # Allocation tests
+    xv = [rand(10) for _ in 1:100]
+    xt = Tuple.(xv)
+    for dims in (1, 2, :)
+        @test stack(xv; dims) == stack(xt; dims)
+        @test_skip 9000 > @allocated stack(xv; dims)
+        @test_skip 9000 > @allocated stack(xt; dims)
+    end
+    xr = (reshape(1:1000,10,10,10) for _ = 1:1000)
+    for dims in (1, 2, 3, :)
+        stack(xr; dims)
+        @test_skip 8.1e6 > @allocated stack(xr; dims)
+    end
+
+    # Mismatched sizes
+    @test_throws DimensionMismatch stack([1:2, 1:3])
+    @test_throws DimensionMismatch stack([1:2, 1:3]; dims=1)
+    @test_throws DimensionMismatch stack([1:2, 1:3]; dims=2)
+    @test_throws DimensionMismatch stack([(1,2), (3,4,5)])
+    @test_throws DimensionMismatch stack([(1,2), (3,4,5)]; dims=1)
+    @test_throws DimensionMismatch stack(x for x in [1:2, 1:3])
+    @test_throws DimensionMismatch stack([[5 6; 7 8], [1, 2, 3, 4]])
+    @test_throws DimensionMismatch stack([[5 6; 7 8], [1, 2, 3, 4]]; dims=1)
+    @test_throws DimensionMismatch stack(x for x in [[5 6; 7 8], [1, 2, 3, 4]])
+    # Inner iterator of unknown length
+    @test_throws MethodError stack((x for x in 1:3 if true) for _ in 1:4)
+    @test_throws MethodError stack((x for x in 1:3 if true) for _ in 1:4; dims=1)
+
+    @test_throws ArgumentError stack([1:3, 4:6]; dims=0)
+    @test_throws ArgumentError stack([1:3, 4:6]; dims=3)
+    @test_throws ArgumentError stack(abs2, 1:3; dims=2)
+
+    # Empty
+    @test_throws ArgumentError stack(())
+    @test_throws ArgumentError stack([])
+    @test_throws ArgumentError stack(x for x in 1:3 if false)
+end
+
+@testset "tests from PR 31644" begin
+    v_v_same = [rand(128) for ii in 1:100]
+    v_v_diff = Any[rand(128), rand(Float32,128), rand(Int, 128)]
+    v_v_diff_typed = Union{Vector{Float64},Vector{Float32},Vector{Int}}[rand(128), rand(Float32,128), rand(Int, 128)]
+    for v_v in (v_v_same, v_v_diff, v_v_diff_typed)
+        # Cover all combinations of iterator traits.
+        g_v = (x for x in v_v)
+        f_g_v = Iterators.filter(x->true, g_v)
+        f_v_v = Iterators.filter(x->true, v_v);
+        hcat_expected = hcat(v_v...)
+        vcat_expected = vcat(v_v...)
+        @testset "$(typeof(data))" for data in (v_v, g_v, f_g_v, f_v_v)
+            @test stack(data) == hcat_expected
+            @test vec(stack(data)) == vcat_expected
+        end
+    end
 end
 
 @testset "keepat!" begin
@@ -1575,22 +1778,74 @@ end
     @test length(rr) == length(r)
 end
 
+module IRUtils
+    include("compiler/irutils.jl")
+end
+
+function check_pointer_strides(A::AbstractArray)
+    # Make sure stride(A, i) is equivalent with strides(A)[i] (if 1 <= i <= ndims(A))
+    dims = ntuple(identity, ndims(A))
+    map(i -> stride(A, i), dims) == @inferred(strides(A)) || return false
+    # Test pointer via value check.
+    first(A) === Base.unsafe_load(pointer(A)) || return false
+    # Test strides via value check.
+    for i in eachindex(IndexLinear(), A)
+        A[i] === Base.unsafe_load(pointer(A, i)) || return false
+    end
+    return true
+end
+
 @testset "strides for ReshapedArray" begin
-    # Type-based contiguous check is tested in test/compiler/inline.jl
+    # Type-based contiguous Check
+    a = vec(reinterpret(reshape, Int16, reshape(view(reinterpret(Int32, randn(10)), 2:11), 5, :)))
+    f(a) = only(strides(a));
+    @test IRUtils.fully_eliminated(f, Base.typesof(a)) && f(a) == 1
     # General contiguous check
     a = view(rand(10,10), 1:10, 1:10)
-    @test strides(vec(a)) == (1,)
+    @test check_pointer_strides(vec(a))
     b = view(parent(a), 1:9, 1:10)
-    @test_throws "Parent must be contiguous." strides(vec(b))
+    @test_throws "Input is not strided." strides(vec(b))
     # StridedVector parent
     for n in 1:3
         a = view(collect(1:60n), 1:n:60n)
-        @test strides(reshape(a, 3, 4, 5)) == (n, 3n, 12n)
-        @test strides(reshape(a, 5, 6, 2)) == (n, 5n, 30n)
+        @test check_pointer_strides(reshape(a, 3, 4, 5))
+        @test check_pointer_strides(reshape(a, 5, 6, 2))
         b = view(parent(a), 60n:-n:1)
-        @test strides(reshape(b, 3, 4, 5)) == (-n, -3n, -12n)
-        @test strides(reshape(b, 5, 6, 2)) == (-n, -5n, -30n)
+        @test check_pointer_strides(reshape(b, 3, 4, 5))
+        @test check_pointer_strides(reshape(b, 5, 6, 2))
     end
+    # StridedVector like parent
+    a = randn(10, 10, 10)
+    b = view(a, 1:10, 1:1, 5:5)
+    @test check_pointer_strides(reshape(b, 2, 5))
+    # Other StridedArray parent
+    a = view(randn(10,10), 1:9, 1:10)
+    @test check_pointer_strides(reshape(a,3,3,2,5))
+    @test check_pointer_strides(reshape(a,3,3,5,2))
+    @test check_pointer_strides(reshape(a,9,5,2))
+    @test check_pointer_strides(reshape(a,3,3,10))
+    @test check_pointer_strides(reshape(a,1,3,1,3,1,5,1,2))
+    @test check_pointer_strides(reshape(a,3,3,5,1,1,2,1,1))
+    @test_throws "Input is not strided." strides(reshape(a,3,6,5))
+    @test_throws "Input is not strided." strides(reshape(a,3,2,3,5))
+    @test_throws "Input is not strided." strides(reshape(a,3,5,3,2))
+    @test_throws "Input is not strided." strides(reshape(a,5,3,3,2))
+    # Zero dimensional parent
+    struct FakeZeroDimArray <: AbstractArray{Int, 0} end
+    Base.strides(::FakeZeroDimArray) = ()
+    Base.size(::FakeZeroDimArray) = ()
+    a = reshape(FakeZeroDimArray(),1,1,1)
+    @test @inferred(strides(a)) == (1, 1, 1)
+    # Dense parent (but not StridedArray)
+    A = reinterpret(Int8, reinterpret(reshape, Int16, rand(Int8, 2, 3, 3)))
+    @test check_pointer_strides(reshape(A, 3, 2, 3))
+end
+
+@testset "pointer for SubArray with none-dense parent." begin
+    a = view(Matrix(reshape(0x01:0xc8, 20, :)), 1:2:20, :)
+    b = reshape(a, 20, :)
+    @test check_pointer_strides(view(b, 2:11, 1:5))
+    @test check_pointer_strides(view(b, reshape(2:11, 2, :), 1:5))
 end
 
 @testset "stride for 0 dims array #44087" begin
@@ -1617,3 +1872,31 @@ end
     @test (@inferred A[i,i,i]) === A[1]
     @test (@inferred to_indices([], (1, CIdx(1, 1), 1, CIdx(1, 1), 1, CIdx(1, 1), 1))) == ntuple(Returns(1), 10)
 end
+
+@testset "type-based offset axes check" begin
+    a = randn(ComplexF64, 10)
+    ta = reinterpret(Float64, a)
+    tb = reinterpret(Float64, view(a, 1:2:10))
+    tc = reinterpret(Float64, reshape(view(a, 1:3:10), 2, 2, 1))
+    # Issue #44040
+    @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(ta, tc))
+    @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(tc, tc))
+    @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(ta, tc, tb))
+    # Ranges && CartesianIndices
+    @test IRUtils.fully_eliminated(Base.require_one_based_indexing, Base.typesof(1:10, Base.OneTo(10), 1.0:2.0, LinRange(1.0, 2.0, 2), 1:2:10, CartesianIndices((1:2:10, 1:2:10))))
+    # Remind us to call `any` in `Base.has_offset_axes` once our compiler is ready.
+    @inline _has_offset_axes(A) = @inline any(x -> Int(first(x))::Int != 1, axes(A))
+    @inline _has_offset_axes(As...) = @inline any(_has_offset_axes, As)
+    a, b = zeros(2, 2, 2), zeros(2, 2)
+    @test_broken IRUtils.fully_eliminated(_has_offset_axes, Base.typesof(a, a, b, b))
+end
+
+# type stable [x;;] (https://github.com/JuliaLang/julia/issues/45952)
+f45952(x) = [x;;]
+@inferred f45952(1.0)
+
+@testset "isassigned with a Bool index" begin
+    A = zeros(2,2)
+    @test_throws "invalid index: true of type Bool" isassigned(A, 1, true)
+    @test_throws "invalid index: true of type Bool" isassigned(A, true)
+end
diff --git a/test/ambiguous.jl b/test/ambiguous.jl
index a06b92c5481b3..bbbe509439083 100644
--- a/test/ambiguous.jl
+++ b/test/ambiguous.jl
@@ -46,8 +46,8 @@ let err = try
     @test occursin("Possible fix, define\n  ambig(::Integer, ::Integer)", errstr)
 end
 
-ambig_with_bounds(x, ::Int, ::T) where {T<:Integer,S} = 0
-ambig_with_bounds(::Int, x, ::T) where {T<:Integer,S} = 1
+@test_warn "declares type variable S but does not use it" @eval ambig_with_bounds(x, ::Int, ::T) where {T<:Integer,S} = 0
+@test_warn "declares type variable S but does not use it" @eval ambig_with_bounds(::Int, x, ::T) where {T<:Integer,S} = 1
 let err = try
               ambig_with_bounds(1, 2, 3)
           catch _e_
@@ -100,10 +100,6 @@ ambig(x::Union{Char, Int16}) = 's'
 const allowed_undefineds = Set([
     GlobalRef(Base, :active_repl),
     GlobalRef(Base, :active_repl_backend),
-    GlobalRef(Base.Filesystem, :JL_O_TEMPORARY),
-    GlobalRef(Base.Filesystem, :JL_O_SHORT_LIVED),
-    GlobalRef(Base.Filesystem, :JL_O_SEQUENTIAL),
-    GlobalRef(Base.Filesystem, :JL_O_RANDOM),
 ])
 
 let Distributed = get(Base.loaded_modules,
@@ -157,17 +153,22 @@ ambig(x::Int8, y) = 1
 ambig(x::Integer, y) = 2
 ambig(x, y::Int) = 3
 end
-
 ambs = detect_ambiguities(Ambig5)
 @test length(ambs) == 2
 
-
-using LinearAlgebra, SparseArrays, SuiteSparse
+module Ambig48312
+ambig(::Integer, ::Int) = 1
+ambig(::Int, ::Integer) = 2
+ambig(::Signed, ::Int) = 3
+ambig(::Int, ::Signed) = 4
+end
+ambs = detect_ambiguities(Ambig48312)
+@test length(ambs) == 4
 
 # Test that Core and Base are free of ambiguities
 # not using isempty so this prints more information when it fails
 @testset "detect_ambiguities" begin
-    let ambig = Set{Any}(((m1.sig, m2.sig) for (m1, m2) in detect_ambiguities(Core, Base; recursive=true, ambiguous_bottom=false, allowed_undefineds)))
+    let ambig = Set(detect_ambiguities(Core, Base; recursive=true, ambiguous_bottom=false, allowed_undefineds))
         good = true
         for (sig1, sig2) in ambig
             @test sig1 === sig2 # print this ambiguity
@@ -176,9 +177,10 @@ using LinearAlgebra, SparseArrays, SuiteSparse
         @test good
     end
 
-    # some ambiguities involving Union{} type parameters are expected, but not required
+    # some ambiguities involving Union{} type parameters may be expected, but not required
     let ambig = Set(detect_ambiguities(Core; recursive=true, ambiguous_bottom=true))
         @test !isempty(ambig)
+        @test length(ambig) < 30
     end
 
     STDLIB_DIR = Sys.STDLIB
@@ -286,6 +288,30 @@ for f in (Ambig8.f, Ambig8.g)
     @test f(Int8(0)) == 4
     @test_throws MethodError f(0)
     @test_throws MethodError f(pi)
+    let ambig = Ref{Int32}(0)
+        ms = Base._methods_by_ftype(Tuple{typeof(f), Union{Int,AbstractIrrational}}, nothing, 10, Base.get_world_counter(), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+        @test ms isa Vector
+        @test length(ms) == 2
+        @test ambig[] == 1
+    end
+    let ambig = Ref{Int32}(0)
+        ms = Base._methods_by_ftype(Tuple{typeof(f), Union{Int,AbstractIrrational}}, nothing, -1, Base.get_world_counter(), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+        @test ms isa Vector
+        @test length(ms) == 2
+        @test ambig[] == 1
+    end
+    let ambig = Ref{Int32}(0)
+        ms = Base._methods_by_ftype(Tuple{typeof(f), Union{Int,AbstractIrrational}}, nothing, 10, Base.get_world_counter(), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+        @test ms isa Vector
+        @test length(ms) == 3
+        @test ambig[] == 1
+    end
+    let ambig = Ref{Int32}(0)
+        ms = Base._methods_by_ftype(Tuple{typeof(f), Union{Int,AbstractIrrational}}, nothing, -1, Base.get_world_counter(), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+        @test ms isa Vector
+        @test length(ms) == 3
+        @test ambig[] == 1
+    end
 end
 
 module Ambig9
@@ -353,33 +379,46 @@ f35983(::Type, ::Type) = 2
 @test length(Base.methods(f35983, (Any, Any))) == 2
 @test first(Base.methods(f35983, (Any, Any))).sig == Tuple{typeof(f35983), Type, Type}
 let ambig = Ref{Int32}(0)
-    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, nothing, -1, typemax(UInt), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, nothing, -1, Base.get_world_counter(), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    @test ms isa Vector
     @test length(ms) == 1
     @test ambig[] == 0
 end
 f35983(::Type{Int16}, ::Any) = 3
 @test length(Base.methods_including_ambiguous(f35983, (Type, Type))) == 2
-@test length(Base.methods(f35983, (Type, Type))) == 2
+@test length(Base.methods(f35983, (Type, Type))) == 1
 let ambig = Ref{Int32}(0)
-    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, nothing, -1, typemax(UInt), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, nothing, -1, Base.get_world_counter(), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    @test ms isa Vector
     @test length(ms) == 2
     @test ambig[] == 1
 end
 
 struct B38280 <: Real; val; end
 let ambig = Ref{Int32}(0)
-    ms = Base._methods_by_ftype(Tuple{Type{B38280}, Any}, nothing, 1, typemax(UInt), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    ms = Base._methods_by_ftype(Tuple{Type{B38280}, Any}, nothing, 1, Base.get_world_counter(), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
     @test ms isa Vector
     @test length(ms) == 1
     @test ambig[] == 1
 end
 
+fnoambig(::Int,::Int) = 1
+fnoambig(::Int,::Any) = 2
+fnoambig(::Any,::Int) = 3
+fnoambig(::Any,::Any) = 4
+let has_ambig = Ref(Int32(0))
+    ms = Base._methods_by_ftype(Tuple{typeof(fnoambig), Any, Any}, nothing, 4, Base.get_world_counter(), false, Ref(typemin(UInt)), Ref(typemax(UInt)), has_ambig)
+    @test ms isa Vector
+    @test length(ms) == 4
+    @test has_ambig[] == 0
+end
+
 # issue #11407
 f11407(::Dict{K,V}, ::Dict{Any,V}) where {K,V} = 1
 f11407(::Dict{K,V}, ::Dict{K,Any}) where {K,V} = 2
 @test_throws MethodError f11407(Dict{Any,Any}(), Dict{Any,Any}()) # ambiguous
 @test f11407(Dict{Any,Int}(), Dict{Any,Int}()) == 1
-f11407(::Dict{Any,Any}, ::Dict{Any,Any}) where {K,V} = 3
+@test_warn "declares type variable V but does not use it" @eval f11407(::Dict{Any,Any}, ::Dict{Any,Any}) where {K,V} = 3
 @test f11407(Dict{Any,Any}(), Dict{Any,Any}()) == 3
 
 # issue #12814
@@ -393,10 +432,23 @@ end
 
 # issue #43040
 module M43040
+   using Test
    struct C end
-   stripType(::Type{C}) where {T} = C # where {T} is intentionally incorrect
+   @test_warn "declares type variable T but does not use it" @eval M43040 stripType(::Type{C}) where {T} = C # where {T} is intentionally incorrect
 end
 
 @test isempty(detect_ambiguities(M43040; recursive=true))
 
+cc46601(T::Type{<:Core.IntrinsicFunction}, x) = 1
+cc46601(::Type{T}, x::Number) where {T<:AbstractChar} = 2
+cc46601(T::Type{<:Nothing}, x) = 3
+cc46601(::Type{T}, x::T) where {T<:Number} = 4
+cc46601(::Type{T}, arg) where {T<:VecElement} = 5
+cc46601(::Type{T}, x::Number) where {T<:Number} = 6
+@test length(methods(cc46601, Tuple{Type{<:Integer}, Integer})) == 2
+@test length(Base.methods_including_ambiguous(cc46601, Tuple{Type{<:Integer}, Integer})) == 6
+cc46601(::Type{T}, x::Int) where {T<:AbstractString} = 7
+@test length(methods(cc46601, Tuple{Type{<:Integer}, Integer})) == 2
+@test length(Base.methods_including_ambiguous(cc46601, Tuple{Type{<:Integer}, Integer})) == 7
+
 nothing
diff --git a/test/arrayops.jl b/test/arrayops.jl
index b11731d394b65..8e33e209ee88b 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -115,7 +115,10 @@ end
         @test convert(Array{Int,1}, r) == [2,3,4]
         @test_throws MethodError convert(Array{Int,2}, r)
         @test convert(Array{Int}, r) == [2,3,4]
-        @test Base.unsafe_convert(Ptr{Int}, r) == Base.unsafe_convert(Ptr{Int}, s)
+        let rc = Base.cconvert(Ptr{Int}, r), rs = Base.cconvert(Ptr{Int}, s)
+            @test rc == rs
+            @test Base.unsafe_convert(Ptr{Int}, rc) == Base.unsafe_convert(Ptr{Int}, rs)
+        end
         @test isa(r, StridedArray)  # issue #22411
     end
     @testset "linearslow" begin
@@ -131,6 +134,7 @@ end
         @test convert(Array{Int,1}, r) == [2,3,5]
         @test_throws MethodError convert(Array{Int,2}, r)
         @test convert(Array{Int}, r) == [2,3,5]
+        # @test_throws ErrorException Base.cconvert(Ptr{Int}, r) broken=true
         @test_throws ErrorException Base.unsafe_convert(Ptr{Int}, r)
         r[2] = -1
         @test a[3] == -1
@@ -604,6 +608,15 @@ end
     @testset "issue 43078" begin
         @test_throws TypeError findall([1])
     end
+
+    @testset "issue #46425" begin
+        counter = 0
+        function pred46425(x)
+            counter += 1
+            counter < 4 && x
+        end
+        @test findall(pred46425, [false, false, true, true]) == [3]
+    end
 end
 @testset "find with Matrix" begin
     A = [1 2 0; 3 4 0]
@@ -708,7 +721,7 @@ end
     ap = PermutedDimsArray(Array(a), (2,1,3))
     @test strides(ap) == (3,1,12)
 
-    for A in [rand(1,2,3,4),rand(2,2,2,2),rand(5,6,5,6),rand(1,1,1,1)]
+    for A in [rand(1,2,3,4),rand(2,2,2,2),rand(5,6,5,6),rand(1,1,1,1), [rand(ComplexF64, 2,2) for _ in 1:2, _ in 1:3, _ in 1:2, _ in 1:4]]
         perm = randperm(4)
         @test isequal(A,permutedims(permutedims(A,perm),invperm(perm)))
         @test isequal(A,permutedims(permutedims(A,invperm(perm)),perm))
@@ -716,6 +729,10 @@ end
         @test sum(permutedims(A,perm)) ≈ sum(PermutedDimsArray(A,perm))
         @test sum(permutedims(A,perm), dims=2) ≈ sum(PermutedDimsArray(A,perm), dims=2)
         @test sum(permutedims(A,perm), dims=(2,4)) ≈ sum(PermutedDimsArray(A,perm), dims=(2,4))
+
+        @test prod(permutedims(A,perm)) ≈ prod(PermutedDimsArray(A,perm))
+        @test prod(permutedims(A,perm), dims=2) ≈ prod(PermutedDimsArray(A,perm), dims=2)
+        @test prod(permutedims(A,perm), dims=(2,4)) ≈ prod(PermutedDimsArray(A,perm), dims=(2,4))
     end
 
     m = [1 2; 3 4]
@@ -765,6 +782,18 @@ end
     @test circshift(src, 1) == src
     src = zeros(Bool, (4,0))
     @test circshift(src, 1) == src
+
+    # 1d circshift! (https://github.com/JuliaLang/julia/issues/46533)
+    a = [1:5;]
+    @test circshift!(a, 1) === a
+    @test a == circshift([1:5;], 1) == [5, 1, 2, 3, 4]
+    a = [1:5;]
+    @test circshift!(a, -2) === a
+    @test a == circshift([1:5;], -2) == [3, 4, 5, 1, 2]
+    a = [1:5;]
+    oa = OffsetVector(copy(a), -1)
+    @test circshift!(oa, 1) === oa
+    @test oa == circshift(OffsetVector(a, -1), 1)
 end
 
 @testset "circcopy" begin
@@ -1136,7 +1165,7 @@ end
     @test isequal(setdiff([1,2,3,4], [7,8,9]), [1,2,3,4])
     @test isequal(setdiff([1,2,3,4], Int64[]), Int64[1,2,3,4])
     @test isequal(setdiff([1,2,3,4], [1,2,3,4,5]), Int64[])
-    @test isequal(symdiff([1,2,3], [4,3,4]), [1,2])
+    @test isequal(symdiff([1,2,3], [4,3,4]), [1,2,4])
     @test isequal(symdiff(['e','c','a'], ['b','a','d']), ['e','c','b','d'])
     @test isequal(symdiff([1,2,3], [4,3], [5]), [1,2,4,5])
     @test isequal(symdiff([1,2,3,4,5], [1,2,3], [3,4]), [3,5])
@@ -1203,10 +1232,9 @@ end
     @test o == fill(1, 3, 4)
 
     # issue #18524
-    # m = mapslices(x->tuple(x), [1 2; 3 4], dims=1) # see variations of this below
-    # ERROR: fatal error in type inference (type bound), https://github.com/JuliaLang/julia/issues/43064
-    # @test m[1,1] == ([1,3],)
-    # @test m[1,2] == ([2,4],)
+    m = mapslices(x->tuple(x), [1 2; 3 4], dims=1) # see variations of this below
+    @test m[1,1] == ([1,3],)
+    @test m[1,2] == ([2,4],)
 
     r = rand(Int8, 4,5,2)
     @test vec(mapslices(repr, r, dims=(2,1))) == map(repr, eachslice(r, dims=3))
@@ -1216,8 +1244,6 @@ end
     # failures
     @test_broken @inferred(mapslices(tuple, [1 2; 3 4], dims=1)) == [([1, 3],)  ([2, 4],)]
     @test_broken @inferred(mapslices(transpose, r, dims=(1,3))) == permutedims(r, (3,2,1))
-    # ERROR: fatal error in type inference (type bound), https://github.com/JuliaLang/julia/issues/43064
-    @test_broken @inferred(mapslices(x -> tuple(x), [1 2; 3 4], dims=1)) == [([1, 3],)  ([2, 4],)]
 
     # re-write, #40996
     @test_throws ArgumentError mapslices(identity, rand(2,3), dims=0) # previously BoundsError
@@ -1420,6 +1446,15 @@ end
     @test sortslices(B, dims=(1,3)) == B
 end
 
+@testset "sortslices inference (#52019)" begin
+    x = rand(3, 2)
+    @inferred sortslices(x, dims=1)
+    @inferred sortslices(x, dims=(2,))
+    x = rand(1, 2, 3)
+    @inferred sortslices(x, dims=(1,2))
+    @inferred sortslices(x, dims=3, by=sum)
+end
+
 @testset "fill" begin
     @test fill!(Float64[1.0], -0.0)[1] === -0.0
     A = fill(1.,3,3)
@@ -1488,6 +1523,9 @@ end
     @test isempty(eoa)
 end
 
+@testset "filter curried #41173" begin
+    @test -5:5 |> filter(iseven) == -4:2:4
+end
 @testset "logical keepat!" begin
     # Vector
     a = Vector(1:10)
@@ -1689,6 +1727,39 @@ end
     @test istriu([1 2 0; 0 4 1])
 end
 
+#issue 49021
+@testset "reverse cartesian indices" begin
+    @test reverse(CartesianIndices((2, 3))) === CartesianIndices((2:-1:1, 3:-1:1))
+    @test reverse(CartesianIndices((2:5, 3:7))) === CartesianIndices((5:-1:2, 7:-1:3))
+    @test reverse(CartesianIndices((5:-1:2, 7:-1:3))) === CartesianIndices((2:1:5, 3:1:7))
+end
+
+@testset "reverse cartesian indices dim" begin
+    A = CartesianIndices((2, 3, 5:-1:1))
+    @test reverse(A, dims=1) === CartesianIndices((2:-1:1, 3, 5:-1:1))
+    @test reverse(A, dims=3) === CartesianIndices((2, 3, 1:1:5))
+    @test_throws ArgumentError reverse(A, dims=0)
+    @test_throws ArgumentError reverse(A, dims=4)
+end
+
+@testset "reverse cartesian indices multiple dims" begin
+    A = CartesianIndices((2, 3, 5:-1:1))
+    @test reverse(A, dims=(1, 3)) === CartesianIndices((2:-1:1, 3, 1:1:5))
+    @test reverse(A, dims=(3, 1)) === CartesianIndices((2:-1:1, 3, 1:1:5))
+    @test_throws ArgumentError reverse(A, dims=(1, 2, 4))
+    @test_throws ArgumentError reverse(A, dims=(0, 1, 2))
+    @test_throws ArgumentError reverse(A, dims=(1, 1))
+end
+
+@testset "stability of const propagation" begin
+    A = CartesianIndices((2, 3, 5:-1:1))
+    f1(x) = reverse(x; dims=1)
+    f2(x) = reverse(x; dims=(1, 3))
+    @test @inferred(f1(A)) === CartesianIndices((2:-1:1, 3, 5:-1:1))
+    @test @inferred(f2(A)) === CartesianIndices((2:-1:1, 3, 1:1:5))
+    @test @inferred(reverse(A; dims=())) === A
+end
+
 # issue 4228
 let A = [[i i; i i] for i=1:2]
     @test cumsum(A) == Any[[1 1; 1 1], [3 3; 3 3]]
@@ -1724,6 +1795,32 @@ end
     # offset array
     @test append!([1,2], OffsetArray([9,8], (-3,))) == [1,2,9,8]
     @test prepend!([1,2], OffsetArray([9,8], (-3,))) == [9,8,1,2]
+
+    # Error recovery
+    A = [1, 2]
+    @test_throws MethodError append!(A, [1, 2, "hi"])
+    @test A == [1, 2, 1, 2]
+
+    oA = OffsetVector(A, 0:3)
+    @test_throws InexactError append!(oA, [1, 2, 3.01])
+    @test oA == OffsetVector([1, 2, 1, 2, 1, 2], 0:5)
+
+    @test_throws InexactError append!(A, (x for x in [1, 2, 3.1]))
+    @test A == [1, 2, 1, 2, 1, 2, 1, 2]
+
+    @test_throws InexactError append!(A, (x for x in [1, 2, 3.1] if isfinite(x)))
+    @test A == [1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
+
+    @test_throws MethodError prepend!(A, [1, 2, "hi"])
+    @test A == [2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
+
+    A = [1, 2]
+    @test_throws InexactError prepend!(A, (x for x in [1, 2, 3.1]))
+    @test A == [2, 1, 1, 2]
+
+    A = [1, 2]
+    @test_throws InexactError prepend!(A, (x for x in [1, 2, 3.1] if isfinite(x)))
+    @test A == [2, 1, 1, 2]
 end
 
 let A = [1,2]
@@ -2066,6 +2163,8 @@ R = CartesianIndices((3,0))
     @test @inferred(eachindex(Base.IndexLinear(), a, b)) == 1:4
     @test @inferred(eachindex(a, b)) == CartesianIndices((2,2))
     @test @inferred(eachindex(a, a)) == 1:4
+    @test @inferred(eachindex(a, a, a)) == 1:4
+    @test @inferred(eachindex(a, a, b)) == CartesianIndices((2,2))
     @test_throws DimensionMismatch eachindex(a, rand(3,3))
     @test_throws DimensionMismatch eachindex(b, rand(3,3))
 end
@@ -2269,6 +2368,23 @@ end
     @test S32K isa AbstractSlices{<:AbstractArray{Int, 2}, 4}
     @test size(S32K) == (1,2,2,1)
     @test S32K[1,2,1,1] == M[:,2,1,:]
+
+    @testset "eachslice inference (#45923)" begin
+        a = [1 2; 3 4]
+        f1(a) = eachslice(a, dims=1)
+        @test (@inferred f1(a)) == eachrow(a)
+        f2(a) = eachslice(a, dims=2)
+        @test (@inferred f2(a)) == eachcol(a)
+    end
+
+    @testset "eachslice bounds checking" begin
+        # https://github.com/JuliaLang/julia/pull/32310#issuecomment-1146911510
+        A = eachslice(rand(2,3), dims = 2, drop = false)
+        @test_throws BoundsError A[2, 1]
+        @test_throws BoundsError A[4]
+        @test_throws BoundsError A[2,3] = [4,5]
+        @test_throws BoundsError A[2,3] .= [4,5]
+    end
 end
 
 ###
@@ -2649,7 +2765,7 @@ end
 end
 
 @testset "accumulate, accumulate!" begin
-    @test accumulate(+, [1,2,3]) == [1, 3, 6]
+    @test accumulate(+, [1, 2, 3]) == [1, 3, 6]
     @test accumulate(min, [1 2; 3 4], dims=1) == [1 2; 1 2]
     @test accumulate(max, [1 2; 3 0], dims=2) == [1 2; 3 3]
     @test accumulate(+, Bool[]) == Int[]
@@ -2666,12 +2782,15 @@ end
     @test accumulate(min, [1 0; 0 1], dims=1) == [1 0; 0 0]
     @test accumulate(min, [1 0; 0 1], dims=2) == [1 0; 0 0]
 
+    @test accumulate(+, [1, 2, 3], dims=1, init=1) == [2, 4, 7]
+    @test accumulate(*, [1, 4, 2], dims=1, init=2) == [2, 8, 16]
+
     @test accumulate(min, [3 2 1; 3 2 1], dims=2) == [3 2 1; 3 2 1]
     @test accumulate(min, [3 2 1; 3 2 1], dims=2, init=2) == [2 2 1; 2 2 1]
 
     @test isa(accumulate(+, Int[]), Vector{Int})
     @test isa(accumulate(+, Int[]; init=1.), Vector{Float64})
-    @test accumulate(+, [1,2]; init=1) == [2, 4]
+    @test accumulate(+, [1, 2]; init=1) == [2, 4]
     arr = randn(4)
     @test accumulate(*, arr; init=1) ≈ accumulate(*, arr)
 
@@ -2715,7 +2834,7 @@ end
 
     # asymmetric operation
     op(x,y) = 2x+y
-    @test accumulate(op, [10,20, 30]) == [10, op(10, 20), op(op(10, 20), 30)] == [10, 40, 110]
+    @test accumulate(op, [10, 20, 30]) == [10, op(10, 20), op(op(10, 20), 30)] == [10, 40, 110]
     @test accumulate(op, [10 20 30], dims=2) == [10 op(10, 20) op(op(10, 20), 30)] == [10 40 110]
 
     #25506
@@ -3051,3 +3170,22 @@ end
         @test c + zero(c) == c
     end
 end
+
+@testset "Wrapping Memory into Arrays" begin
+    mem = Memory{Int}(undef, 10) .= 1
+    memref = MemoryRef(mem)
+    @test_throws DimensionMismatch wrap(Array, mem, (10, 10))
+    @test wrap(Array, mem, (5,)) == ones(Int, 5)
+    @test wrap(Array, mem, 2) == ones(Int, 2)
+    @test wrap(Array, memref, 10) == ones(Int, 10)
+    @test wrap(Array, memref, (2,2,2)) == ones(Int,2,2,2)
+    @test wrap(Array, mem, (5, 2)) == ones(Int, 5, 2)
+
+    memref2 = MemoryRef(mem, 3)
+    @test wrap(Array, memref2, (5,)) == ones(Int, 5)
+    @test wrap(Array, memref2, 2) == ones(Int, 2)
+    @test wrap(Array, memref2, (2,2,2)) == ones(Int,2,2,2)
+    @test wrap(Array, memref2, (3, 2)) == ones(Int, 3, 2)
+    @test_throws DimensionMismatch wrap(Array, memref2, 9)
+    @test_throws DimensionMismatch wrap(Array, memref2, 10)
+end
diff --git a/test/asyncmap.jl b/test/asyncmap.jl
index ec49230dbce14..5dc79e612acda 100644
--- a/test/asyncmap.jl
+++ b/test/asyncmap.jl
@@ -64,7 +64,7 @@ let
     end
     @test e isa CapturedException
     @test e.ex == ErrorException("captured")
-    @test e.processed_bt[2][1].func == :f42105
+    @test e.processed_bt[2][1].func === :f42105
 end
 
 include("generic_map_tests.jl")
diff --git a/test/atexit.jl b/test/atexit.jl
index 103cb1e52bca6..4a37d465f250b 100644
--- a/test/atexit.jl
+++ b/test/atexit.jl
@@ -4,8 +4,9 @@ using Test
 
 @testset "atexit.jl" begin
     function _atexit_tests_gen_cmd_eval(expr::String)
+        # We run the atexit tests with 2 threads, for the parallelism tests at the end.
         cmd_eval = ```
-        $(Base.julia_cmd()) -e $(expr)
+        $(Base.julia_cmd()) -t2 -e $(expr)
         ```
         return cmd_eval
     end
@@ -13,8 +14,9 @@ using Test
         script, io = mktemp(temp_dir)
         println(io, expr)
         close(io)
+        # We run the atexit tests with 2 threads, for the parallelism tests at the end.
         cmd_script = ```
-        $(Base.julia_cmd()) $(script)
+        $(Base.julia_cmd()) -t2 $(script)
         ```
         return cmd_script
     end
@@ -28,6 +30,11 @@ using Test
             exit(22)
             """ => 0,
             # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+            """
+            atexit(exitcode -> exitcode > 10 && exit(0))
+            exit(22)
+            """ => 0,
+            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
             )
         for julia_expr in keys(julia_expr_list)
             cmd_eval = _atexit_tests_gen_cmd_eval(julia_expr)
@@ -87,6 +94,11 @@ using Test
             """ => 13,
             # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
             """
+            atexit(exitcode -> exit(exitcode+3))
+            exit(22)
+            """ => 25,
+            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+            """
             atexit(() -> ("No error"))
             atexit(() -> exit(5))
             exit(22)
@@ -135,6 +147,105 @@ using Test
             exit(22)
             """ => 4,
             # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+            """
+            atexit(() -> exit(21))
+            atexit(exitcode -> exit(exitcode+3))
+            exit(22)
+            """ => 21,
+            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+            """
+            atexit(exitcode -> exit(exitcode+3))
+            atexit(() -> exit(21))
+            exit(22)
+            """ => 24,
+            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+            )
+        for julia_expr in keys(julia_expr_list)
+            cmd_eval = _atexit_tests_gen_cmd_eval(julia_expr)
+            cmd_script = _atexit_tests_gen_cmd_script(atexit_temp_dir, julia_expr)
+            expected_exit_code = julia_expr_list[julia_expr]
+            @test_throws(ProcessFailedException, run(cmd_eval))
+            @test_throws(ProcessFailedException, run(cmd_script))
+            p_eval = run(cmd_eval; wait = false)
+            p_script = run(cmd_script; wait = false)
+            wait(p_eval)
+            wait(p_script)
+            @test p_eval.exitcode == expected_exit_code
+            @test p_script.exitcode == expected_exit_code
+        end
+    end
+    @testset "test calling atexit() in parallel with running atexit hooks." begin
+        # These tests cover 3 parallelism cases, as described by the following comments.
+        julia_expr_list = Dict(
+            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+            # 1. registering a hook from inside a hook
+            """
+            atexit() do
+                atexit() do
+                    exit(11)
+                end
+            end
+            # This will attempt to exit 0, but the execution of the atexit hook will
+            # register another hook, which will exit 11.
+            exit(0)
+            """ => 11,
+            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+            # 2. registering a hook from another thread while hooks are running
+            """
+            c = Channel()
+            # This hook must execute _last_. (Execution is LIFO.)
+            atexit() do
+                put!(c, nothing)
+                put!(c, nothing)
+            end
+            atexit() do
+                # This will run in a concurrent task, testing that we can register atexit
+                # hooks from another task while running atexit hooks.
+                Threads.@spawn begin
+                    take!(c) # block on c
+                    atexit() do
+                        exit(11)
+                    end
+                    take!(c) # keep the _atexit() loop alive until we've added another item.
+                end
+            end
+            exit(0)
+            """ => 11,
+            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+            # 3. attempting to register a hook after all hooks have finished (disallowed)
+            """
+            const atexit_has_finished = Threads.Atomic{Int}(0)
+            atexit() do
+                Threads.@spawn begin
+                    # Block until the atexit hooks have all finished. We use a manual "spin
+                    # lock" because task switch is disallowed inside the finalizer, below.
+                    atexit_has_finished[] = 1
+                    while atexit_has_finished[] == 1 end
+                    try
+                        # By the time this runs, all the atexit hooks will be done.
+                        # So this will throw.
+                        atexit() do
+                            exit(11)
+                        end
+                    catch
+                        # Meaning we _actually_ exit 22.
+                        exit(22)
+                    end
+                end
+                while atexit_has_finished[] == 0 end
+            end
+            # Finalizers run after the atexit hooks, so this blocks exit until the spawned
+            # task above gets a chance to run.
+            x = []
+            finalizer(x) do x
+                # Allow the spawned task to finish
+                atexit_has_finished[] = 2
+                # Then spin forever to prevent exit.
+                while atexit_has_finished[] == 2 end
+            end
+            exit(0)
+            """ => 22,
+            # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
             )
         for julia_expr in keys(julia_expr_list)
             cmd_eval = _atexit_tests_gen_cmd_eval(julia_expr)
diff --git a/test/atomics.jl b/test/atomics.jl
index 15ffd84a2c0a2..dd50fb96be49f 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -22,19 +22,20 @@ mutable struct Refxy{T}
     Refxy{T}() where {T} = new() # unused, but sets ninitialized to 0
 end
 
-@test_throws ErrorException("invalid redefinition of constant ARefxy") @eval mutable struct ARefxy{T}
+modname = String(nameof(@__MODULE__))
+@test_throws ErrorException("invalid redefinition of constant $modname.ARefxy") @eval mutable struct ARefxy{T}
     @atomic x::T
     @atomic y::T
 end
-@test_throws ErrorException("invalid redefinition of constant ARefxy") @eval mutable struct ARefxy{T}
+@test_throws ErrorException("invalid redefinition of constant $modname.ARefxy") @eval mutable struct ARefxy{T}
     x::T
     y::T
 end
-@test_throws ErrorException("invalid redefinition of constant ARefxy") @eval mutable struct ARefxy{T}
+@test_throws ErrorException("invalid redefinition of constant $modname.ARefxy") @eval mutable struct ARefxy{T}
     x::T
     @atomic y::T
 end
-@test_throws ErrorException("invalid redefinition of constant Refxy") @eval mutable struct Refxy{T}
+@test_throws ErrorException("invalid redefinition of constant $modname.Refxy") @eval mutable struct Refxy{T}
     x::T
     @atomic y::T
 end
@@ -266,8 +267,10 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20))
     nothing
 end
 @noinline function test_field_orderings(r, x, y)
-    _test_field_orderings(Ref(copy(r)), x, y)
-    _test_field_orderings(Ref{Any}(copy(r)), x, y)
+    @testset "$r" begin
+        _test_field_orderings(Ref(copy(r)), x, y)
+        _test_field_orderings(Ref{Any}(copy(r)), x, y)
+    end
     nothing
 end
 @noinline test_field_orderings(x, y) = (@nospecialize; test_field_orderings(ARefxy(x, y), x, y))
diff --git a/test/backtrace.jl b/test/backtrace.jl
index c0abad5146b39..50a50100488c4 100644
--- a/test/backtrace.jl
+++ b/test/backtrace.jl
@@ -35,7 +35,7 @@ catch err
     @test endswith(string(lkup[2].file), "backtrace.jl")
     @test lkup[2].line == 42
     # TODO: we don't support surface AST locations with inlined function names
-    @test_broken lkup[1].func == :inlfunc
+    @test_broken lkup[1].func === :inlfunc
     @test endswith(string(lkup[1].file), "backtrace.jl")
     @test lkup[1].line == 37
 end
@@ -106,10 +106,10 @@ lkup = map(lookup, bt())
 hasbt = hasbt2 = false
 for sfs in lkup
     for sf in sfs
-        if sf.func == :bt
+        if sf.func === :bt
             global hasbt = true
         end
-        if sf.func == :bt2
+        if sf.func === :bt2
             global hasbt2 = true
         end
     end
@@ -125,10 +125,10 @@ lkup = map(lookup, btmacro())
 hasme = hasbtmacro = false
 for sfs in lkup
     for sf in sfs
-        if sf.func == Symbol("macro expansion")
+        if sf.func === Symbol("macro expansion")
             global hasme = true
         end
-        if sf.func == :btmacro
+        if sf.func === :btmacro
             global hasbtmacro = true
         end
     end
@@ -175,7 +175,7 @@ let bt, found = false
         bt = backtrace()
     end
     for frame in map(lookup, bt)
-        if frame[1].line == @__LINE__() - 3 && frame[1].file == Symbol(@__FILE__)
+        if frame[1].line == @__LINE__() - 3 && frame[1].file === Symbol(@__FILE__)
             found = true; break
         end
     end
@@ -187,7 +187,7 @@ let bt, found = false
     @debug ""
     bt = backtrace()
     for frame in map(lookup, bt)
-        if frame[1].line == @__LINE__() - 2 && frame[1].file == Symbol(@__FILE__)
+        if frame[1].line == @__LINE__() - 2 && frame[1].file === Symbol(@__FILE__)
             found = true; break
         end
     end
@@ -195,6 +195,13 @@ let bt, found = false
 end
 
 # Syntax error locations appear in backtraces
+let trace = try
+        eval(Expr(:error, 1))
+    catch
+        stacktrace(catch_backtrace())
+    end
+    @test trace[1].func === Symbol("top-level scope")
+end
 let trace = try
         include_string(@__MODULE__,
             """
@@ -205,8 +212,8 @@ let trace = try
     catch
         stacktrace(catch_backtrace())
     end
-    @test trace[1].func == Symbol("top-level scope")
-    @test trace[1].file == :a_filename
+    @test trace[1].func === Symbol("top-level scope")
+    @test trace[1].file === :a_filename
     @test trace[1].line == 2
 end
 let trace = try
@@ -219,9 +226,9 @@ let trace = try
     catch
         stacktrace(catch_backtrace())
     end
-    @test trace[1].func == Symbol("top-level scope")
-    @test trace[1].file == :a_filename
-    @test trace[1].line == 2
+    @test trace[1].func === Symbol("top-level scope")
+    @test trace[1].file === :a_filename
+    @test trace[1].line == 3
 end
 
 # issue #45171
diff --git a/test/binaryplatforms.jl b/test/binaryplatforms.jl
index 793a9b1f06a41..8de522e9c6c8b 100644
--- a/test/binaryplatforms.jl
+++ b/test/binaryplatforms.jl
@@ -315,8 +315,9 @@ end
         P("x86_64", "linux"; libgfortran_version=v"5") => "linux8",
 
         # Ambiguity test
-        P("aarch64", "linux"; libgfortran_version=v"3") => "linux4",
+        P("aarch64", "linux"; libgfortran_version=v"3") => "linux3",
         P("aarch64", "linux"; libgfortran_version=v"3", libstdcxx_version=v"3.4.18") => "linux5",
+        P("aarch64", "linux"; libgfortran_version=v"3", libstdcxx_version=v"3.4.18", foo="bar") => "linux9",
 
         # OS test
         P("x86_64", "macos"; libgfortran_version=v"3") => "mac4",
@@ -327,8 +328,9 @@ end
     @test select_platform(platforms, P("x86_64", "linux"; libgfortran_version=v"4")) == "linux7"
 
     # Ambiguity test
-    @test select_platform(platforms, P("aarch64", "linux")) == "linux5"
-    @test select_platform(platforms, P("aarch64", "linux"; libgfortran_version=v"3")) == "linux5"
+    @test select_platform(platforms, P("aarch64", "linux")) == "linux3"
+    @test select_platform(platforms, P("aarch64", "linux"; libgfortran_version=v"3")) == "linux3"
+    @test select_platform(platforms, P("aarch64", "linux"; libgfortran_version=v"3", libstdcxx_version=v"3.4.18")) === "linux5"
     @test select_platform(platforms, P("aarch64", "linux"; libgfortran_version=v"4")) === nothing
 
     @test select_platform(platforms, P("x86_64", "macos")) == "mac4"
@@ -339,6 +341,22 @@ end
 
     # Sorry, Alex. ;)
     @test select_platform(platforms, P("x86_64", "freebsd")) === nothing
+
+    # The new "most complete match" algorithm deals with ambiguities as follows:
+    platforms = Dict(
+        P("x86_64", "linux") => "normal",
+        P("x86_64", "linux"; sanitize="memory") => "sanitized",
+    )
+    @test select_platform(platforms, P("x86_64", "linux")) == "normal"
+    @test select_platform(platforms, P("x86_64", "linux"; sanitize="memory")) == "sanitized"
+
+    # Ties are broken by reverse-sorting by triplet:
+    platforms = Dict(
+        P("x86_64", "linux"; libgfortran_version=v"3") => "libgfortran3",
+        P("x86_64", "linux"; libgfortran_version=v"4") => "libgfortran4",
+    )
+    @test select_platform(platforms, P("x86_64", "linux")) == "libgfortran4"
+    @test select_platform(platforms, P("x86_64", "linux"; libgfortran_version=v"3")) == "libgfortran3"
 end
 
 @testset "Custom comparators" begin
diff --git a/test/bitarray.jl b/test/bitarray.jl
index d17a9856596a4..056a201bd4f6f 100644
--- a/test/bitarray.jl
+++ b/test/bitarray.jl
@@ -15,12 +15,11 @@ bitcheck(x) = true
 bcast_setindex!(b, x, I...) = (b[I...] .= x; b)
 
 function check_bitop_call(ret_type, func, args...; kwargs...)
-    r1 = func(args...; kwargs...)
     r2 = func(map(x->(isa(x, BitArray) ? Array(x) : x), args)...; kwargs...)
-    ret_type ≢ nothing && !isa(r1, ret_type) && @show ret_type, typeof(r1)
-    ret_type ≢ nothing && @test isa(r1, ret_type)
+    r1 = func(args...; kwargs...)
+    ret_type ≢ nothing && (@test isa(r1, ret_type) || @show ret_type, typeof(r1))
     @test tc(r1, r2)
-    @test isequal(r1, ret_type ≡ nothing ? r2 : r2)
+    @test isequal(r1, r2)
     @test bitcheck(r1)
 end
 macro check_bit_operation(ex, ret_type)
@@ -99,9 +98,9 @@ end
 timesofar("conversions")
 
 @testset "Promotions for size $sz" for (sz, T) in allsizes
-    @test isequal(promote(falses(sz...), zeros(sz...)),
+    @test_broken isequal(promote(falses(sz...), zeros(sz...)),
                  (zeros(sz...), zeros(sz...)))
-    @test isequal(promote(trues(sz...), ones(sz...)),
+    @test_broken isequal(promote(trues(sz...), ones(sz...)),
                  (ones(sz...), ones(sz...)))
     ae = falses(1, sz...)
     ex = (@test_throws ErrorException promote(ae, ones(sz...))).value
@@ -518,12 +517,14 @@ timesofar("constructors")
             end
         end
 
+        self_copyto!(a, n1, n2, l) = copyto!(a, n1, a, n2, l)
         for p1 = [rand(1:v1) 1 63 64 65 191 192 193]
             for p2 = [rand(1:v1) 1 63 64 65 191 192 193]
                 for n = 0 : min(v1 - p1 + 1, v1 - p2 + 1)
                     b1 = bitrand(v1)
                     b2 = bitrand(v1)
                     @check_bit_operation copyto!(b1, p1, b2, p2, n) BitVector
+                    @check_bit_operation self_copyto!(b1, p1, p2, n) BitVector
                 end
             end
         end
@@ -1493,6 +1494,66 @@ timesofar("reductions")
         C17970 = map(x -> x ? false : true, A17970)
         @test C17970::BitArray{1} == map(~, A17970)
     end
+
+    #=
+    |<----------------dest----------(original_tail)->|
+    |<------------------b2(l)------>|    extra_l     |
+    |<------------------b3(l)------>|
+    |<------------------b4(l+extra_l)--------------->|
+    |<--------------desk_inbetween-------->| extra÷2 |
+    =#
+    @testset "Issue #47011, map! over unequal length bitarray" begin
+        for l = [0, 1, 63, 64, 65, 127, 128, 129, 255, 256, 257, 6399, 6400, 6401]
+            for extra_l = [10, 63, 64, 65, 127, 128, 129, 255, 256, 257, 6399, 6400, 6401]
+
+                dest = bitrand(l+extra_l)
+                b2 = bitrand(l)
+                original_tail = last(dest, extra_l)
+                for op in (!, ~)
+                    map!(op, dest, b2)
+                    @test first(dest, l) == map(op, b2)
+                    # check we didn't change bits we're not suppose to
+                    @test last(dest, extra_l) == original_tail
+                end
+
+                b3 = bitrand(l)
+                b4 = bitrand(l+extra_l)
+                # when dest is longer than one source but shorter than the other
+                dest_inbetween = bitrand(l + extra_l÷2)
+                original_tail_inbetween = last(dest_inbetween, extra_l÷2)
+                for op in (|, ⊻)
+                    map!(op, dest, b2, b3)
+                    @test first(dest, l) == map(op, b2, b3)
+                    # check we didn't change bits we're not suppose to
+                    @test last(dest, extra_l) == original_tail
+
+                    map!(op, dest, b2, b4)
+                    @test first(dest, l) == map(op, b2, b4)
+                    # check we didn't change bits we're not suppose to
+                    @test last(dest, extra_l) == original_tail
+
+                    map!(op, dest_inbetween, b2, b4)
+                    @test first(dest_inbetween, l) == map(op, b2, b4)
+                    @test last(dest_inbetween, extra_l÷2) == original_tail_inbetween
+                end
+            end
+        end
+    end
+    @testset "Issue #50780, map! bitarray map! where dest aliases source" begin
+        a = BitVector([1,0])
+        b = map(!, a)
+        map!(!, a, a) # a .= !.a
+        @test a == b == BitVector([0,1])
+
+        a = BitVector([1,0])
+        c = map(|, a, b)
+        map!(|, a, a, b)
+        @test c == a == BitVector([1, 1])
+
+        a = BitVector([1,0])
+        map!(|, b, a, b)
+        @test c == b == BitVector([1, 1])
+    end
 end
 
 ## Filter ##
@@ -1606,7 +1667,7 @@ timesofar("cat")
     @test ((svdb1, svdb1A) = (svd(b1), svd(Array(b1)));
             svdb1.U == svdb1A.U && svdb1.S == svdb1A.S && svdb1.V == svdb1A.V)
     @test ((qrb1, qrb1A) = (qr(b1), qr(Array(b1)));
-            qrb1.Q == qrb1A.Q && qrb1.R == qrb1A.R)
+            Matrix(qrb1.Q) == Matrix(qrb1A.Q) && qrb1.R == qrb1A.R)
 
     b1 = bitrand(v1)
     @check_bit_operation diagm(0 => b1) BitMatrix
@@ -1787,3 +1848,38 @@ end
         @test all(bitarray[rangein, rangeout] .== true)
     end
 end
+
+# issue #45825
+
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays
+
+let all_false = OffsetArray(falses(2001), -1000:1000)
+    @test !any(==(true), all_false)
+    # should be run with --check-bounds=yes
+    @test_throws DimensionMismatch BitArray(all_false)
+    all_false = OffsetArray(falses(2001), 1:2001)
+    @test !any(==(true), BitArray(all_false))
+    all_false = OffsetArray(falses(100, 100), 0:99, -1:98)
+    @test !any(==(true), all_false)
+    @test_throws DimensionMismatch BitArray(all_false)
+    all_false = OffsetArray(falses(100, 100), 1:100, 1:100)
+    @test !any(==(true), all_false)
+end
+let a = falses(1000),
+    msk = BitArray(rand(Bool, 1000)),
+    n = count(msk),
+    b = OffsetArray(rand(Bool, n), (-n÷2):(n÷2)-iseven(n))
+    a[msk] = b
+    @test a[msk] == collect(b)
+    a = falses(100, 100)
+    msk = BitArray(rand(Bool, 100, 100))
+    n = count(msk)
+    b = OffsetArray(rand(Bool, 1, n), 1:1, (-n÷2):(n÷2)-iseven(n))
+    a[msk] = b
+    @test a[msk] == vec(collect(b))
+end
+let b = trues(10)
+    copyto!(b, view([0,0,0], :))
+    @test b == [0,0,0,1,1,1,1,1,1,1]
+end
diff --git a/test/bitset.jl b/test/bitset.jl
index 1919da4f3702a..f8c5d3fffd7d2 100644
--- a/test/bitset.jl
+++ b/test/bitset.jl
@@ -38,9 +38,12 @@ end
     @test !in(1,s)
     @test in(2,s)
     @test !in(10002,s)
-    @test in(10000,s)
+    @test in(UInt128(10000),s)
+    @test in(Int32(10000),s)
     @test in(10000.0,s)
     @test !in(10002.0,s)
+    @test !in(typemax(UInt), s)
+    @test !in(typemin(Int)-Int128(14), s)
     @test_throws ArgumentError first(BitSet())
     @test_throws ArgumentError last(BitSet())
     t = copy(s)
@@ -65,11 +68,13 @@ end
     @test !(-1 in BitSet(1:10))
 end
 
-# # issue #8570
-# This requires 2^29 bytes of storage, which is too much for a simple test
-# s = BitSet(typemax(Int32))
-# @test length(s) === 1
-# for b in s; b; end
+@testset "issue #8570" begin
+    let s
+        @test 400 > @allocated s = BitSet(typemax(Int32))
+        @test length(s) === 1
+        @test only(s) == typemax(Int32)
+    end
+end
 
 @testset "union!, symdiff!" begin
     i = BitSet([1, 2, 3])
@@ -155,13 +160,16 @@ end
     for n in -20:0
         @test length(delete!(s, n)) == len
     end
+    @test length(delete!(s, typemax(UInt))) == len
     @test pop!(s, 1) === 1
     @test !(1 in s)
     @test_throws KeyError pop!(s, 1)
     @test_throws KeyError pop!(s, -1)
     @test pop!(s, -1, 1) === 1
     @test pop!(s, 1, 0) === 0
-    @test s === delete!(s, 1)
+    @test 5 in s
+    @test s === delete!(s, 1) === delete!(s, Int8(5))
+    @test !(5 in s)
     for i in s; pop!(s, i); end
     @test isempty(s)
     push!(s, 100)
@@ -346,8 +354,17 @@ end
     x = BitSet(rand(-1000:1000, 500))
     y = copy(x)
     @test union!(x, BitSet(a:b)) == union!(y, BitSet(a:1:b))
-    @test_throws ArgumentError BitSet(Int128(typemin(Int))-1:typemin(Int))
-    @test_throws ArgumentError BitSet(typemax(Int):Int128(typemax(Int))+1)
+    @test_throws InexactError BitSet(Int128(typemin(Int))-1:typemin(Int))
+    @test_throws InexactError BitSet(typemax(Int):Int128(typemax(Int))+1)
     # union! with an empty range doesn't modify the BitSet
     @test union!(x, b:a) == y
 end
+
+@testset "union!(::BitSet, ::AbstractUnitRange) when two ranges do not overlap" begin
+    # see #45574
+    a, b = rand(-10000:-5000), rand(5000:10000)
+    c, d = minmax(rand(20000:30000, 2)...)
+    @test length(union!(BitSet(a:b), c:d)) == length(a:b) + length(c:d)
+    c, d = minmax(rand(-30000:-20000, 2)...)
+    @test length(union!(BitSet(a:b), c:d)) == length(a:b) + length(c:d)
+end
diff --git a/test/boundscheck.jl b/test/boundscheck.jl
index 09cc8d2cd13e8..ad7f50a84e086 100644
--- a/test/boundscheck.jl
+++ b/test/boundscheck.jl
@@ -2,17 +2,14 @@
 
 # run boundscheck tests on separate workers launched with --check-bounds={default,yes,no}
 
-cmd = `$(Base.julia_cmd()) --depwarn=error --startup-file=no boundscheck_exec.jl`
-if !success(pipeline(cmd; stdout=stdout, stderr=stderr))
-    error("boundscheck test failed, cmd : $cmd")
+let cmd = `$(Base.julia_cmd()) --check-bounds=auto --depwarn=error --startup-file=no boundscheck_exec.jl`
+    success(pipeline(cmd; stdout=stdout, stderr=stderr)) || error("boundscheck test failed, cmd : $cmd")
 end
 
-cmd = `$(Base.julia_cmd()) --check-bounds=yes --startup-file=no --depwarn=error boundscheck_exec.jl`
-if !success(pipeline(cmd; stdout=stdout, stderr=stderr))
-    error("boundscheck test failed, cmd : $cmd")
+let cmd = `$(Base.julia_cmd()) --check-bounds=yes --startup-file=no --depwarn=error boundscheck_exec.jl`
+    success(pipeline(cmd; stdout=stdout, stderr=stderr)) || error("boundscheck test failed, cmd : $cmd")
 end
 
-cmd = `$(Base.julia_cmd()) --check-bounds=no --startup-file=no --depwarn=error boundscheck_exec.jl`
-if !success(pipeline(cmd; stdout=stdout, stderr=stderr))
-    error("boundscheck test failed, cmd : $cmd")
+let cmd = `$(Base.julia_cmd()) --check-bounds=no --startup-file=no --depwarn=error boundscheck_exec.jl`
+    success(pipeline(cmd; stdout=stdout, stderr=stderr)) || error("boundscheck test failed, cmd : $cmd")
 end
diff --git a/test/boundscheck_exec.jl b/test/boundscheck_exec.jl
index 715700e00378f..10f46eb4a8031 100644
--- a/test/boundscheck_exec.jl
+++ b/test/boundscheck_exec.jl
@@ -239,24 +239,22 @@ if bc_opt != bc_off
     @test_throws BoundsError BadVector20469([1,2,3])[:]
 end
 
-# Ensure iteration over arrays is vectorizable with boundschecks off
+# Ensure iteration over arrays is vectorizable
 function g27079(X)
     r = 0
-    @inbounds for x in X
+    for x in X
         r += x
     end
     r
 end
-if bc_opt == bc_default || bc_opt == bc_off
-    @test occursin("vector.body", sprint(code_llvm, g27079, Tuple{Vector{Int}}))
-end
+
+@test occursin("vector.reduce.add", sprint(code_llvm, g27079, Tuple{Vector{Int}}))
 
 # Boundschecking removal of indices with different type, see #40281
 getindex_40281(v, a, b, c) = @inbounds getindex(v, a, b, c)
-typed_40281 = sprint((io, args...) -> code_warntype(io, args...; optimize=true), getindex_40281, Tuple{Array{Float64, 3}, Int, UInt8, Int})
+llvm_40281 = sprint((io, args...) -> code_llvm(io, args...; optimize=true), getindex_40281, Tuple{Array{Float64, 3}, Int, UInt8, Int})
 if bc_opt == bc_default || bc_opt == bc_off
-    @test occursin("arrayref(false", typed_40281)
-    @test !occursin("arrayref(true", typed_40281)
+    @test !occursin("call void @ijl_bounds_error_ints", llvm_40281)
 end
 
 # Given this is a sub-processed test file, not using @testsets avoids
@@ -283,7 +281,6 @@ begin # Pass inbounds meta to getindex on CartesianIndices (#42115)
     end
 end
 
-
 # Test that --check-bounds=off doesn't permit const prop of indices into
 # function that are not dynamically reachable (the same test for @inbounds
 # is in the compiler tests).
@@ -295,4 +292,9 @@ function f_boundscheck_elim(n)
 end
 @test Tuple{} <: code_typed(f_boundscheck_elim, Tuple{Int})[1][2]
 
+# https://github.com/JuliaArrays/StaticArrays.jl/issues/1155
+@test Base.return_types() do
+    typeintersect(Int, Integer)
+end |> only === Type{Int}
+
 end
diff --git a/test/broadcast.jl b/test/broadcast.jl
index 1fd1b02776b68..0a111f9523d60 100644
--- a/test/broadcast.jl
+++ b/test/broadcast.jl
@@ -699,11 +699,11 @@ end
     @test_throws Base.CanonicalIndexError A[2] .= 0
     @test_throws MethodError A[3] .= 0
     A = [[1, 2, 3], 4:5]
-    @test A isa Vector{Vector{Int}}
     A[1] .= 0
-    A[2] .= 0
-    @test A[1] == [0, 0, 0]
-    @test A[2] == [0, 0]
+    @test A[1] isa Vector{Int}
+    @test A[2] isa UnitRange
+    @test A[1] == [0,0,0]
+    @test_throws Base.CanonicalIndexError A[2] .= 0
 end
 
 # Issue #22180
@@ -774,14 +774,27 @@ let X = zeros(2, 3)
 end
 
 # issue #27988: inference of Broadcast.flatten
-using .Broadcast: Broadcasted
+using .Broadcast: Broadcasted, cat_nested
 let
     bc = Broadcasted(+, (Broadcasted(*, (1, 2)), Broadcasted(*, (Broadcasted(*, (3, 4)), 5))))
-    @test @inferred(Broadcast.cat_nested(bc)) == (1,2,3,4,5)
+    @test @inferred(cat_nested(bc)) == (1,2,3,4,5)
     @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == @inferred(Broadcast.materialize(bc)) == 62
     bc = Broadcasted(+, (Broadcasted(*, (1, Broadcasted(/, (2.0, 2.5)))), Broadcasted(*, (Broadcasted(*, (3, 4)), 5))))
-    @test @inferred(Broadcast.cat_nested(bc)) == (1,2.0,2.5,3,4,5)
+    @test @inferred(cat_nested(bc)) == (1,2.0,2.5,3,4,5)
     @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == @inferred(Broadcast.materialize(bc)) == 60.8
+    # 1 .* 1 .- 1 .* 1 .^2 .+ 1 .* 1 .+ 1 .^ 3
+    bc = Broadcasted(+, (Broadcasted(+, (Broadcasted(-, (Broadcasted(*, (1, 1)), Broadcasted(*, (1, Broadcasted(Base.literal_pow, (Ref(^), 1, Ref(Val(2)))))))), Broadcasted(*, (1, 1)))), Broadcasted(Base.literal_pow, (Base.RefValue{typeof(^)}(^), 1, Base.RefValue{Val{3}}(Val{3}())))))
+    @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == @inferred(Broadcast.materialize(bc)) == 2
+    # @. 1 + 1 * (1 + 1 + 1 + 1)
+    bc = Broadcasted(+, (1, Broadcasted(*, (1, Broadcasted(+, (1, 1, 1, 1))))))
+    @test @inferred(cat_nested(bc)) == (1, 1, 1, 1, 1, 1) # `cat_nested` failed to infer this
+    @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == Broadcast.materialize(bc)
+    # @. 1 + (1 + 1) + 1 + (1 + 1) + 1 + (1 + 1) + 1
+    bc = Broadcasted(+, (1, Broadcasted(+, (1, 1)), 1, Broadcasted(+, (1, 1)), 1, Broadcasted(+, (1, 1)), 1))
+    @test @inferred(cat_nested(bc)) == (1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
+    @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == Broadcast.materialize(bc)
+    bc = Broadcasted(Float32, (Broadcasted(+, (1, 1)),))
+    @test @inferred(Broadcast.materialize(Broadcast.flatten(bc))) == Broadcast.materialize(bc)
 end
 
 let
@@ -880,7 +893,7 @@ let
     @test Broadcast.broadcasted(+, AD1(rand(3)), AD2(rand(3))) isa Broadcast.Broadcasted{Broadcast.ArrayConflict}
     @test Broadcast.broadcasted(+, AD1(rand(3)), AD2(rand(3))) isa Broadcast.Broadcasted{<:Broadcast.AbstractArrayStyle{Any}}
 
-    @test @inferred(Base.IteratorSize(Broadcast.broadcasted((1,2,3),a1,zeros(3,3,3)))) === Base.HasShape{3}()
+    @test @inferred(Base.IteratorSize(Broadcast.broadcasted(+, (1,2,3), a1, zeros(3,3,3)))) === Base.HasShape{3}()
 
     # inference on nested
     bc = Base.broadcasted(+, AD1(randn(3)), AD1(randn(3)))
@@ -903,13 +916,13 @@ end
     ys = 1:2:20
     bc = Broadcast.instantiate(Broadcast.broadcasted(*, xs, ys))
     @test IndexStyle(bc) == IndexLinear()
-    @test sum(bc) == mapreduce(Base.Splat(*), +, zip(xs, ys))
+    @test sum(bc) == mapreduce(Base.splat(*), +, zip(xs, ys))
 
     xs2 = reshape(xs, 1, :)
     ys2 = reshape(ys, 1, :)
     bc = Broadcast.instantiate(Broadcast.broadcasted(*, xs2, ys2))
     @test IndexStyle(bc) == IndexCartesian()
-    @test sum(bc) == mapreduce(Base.Splat(*), +, zip(xs, ys))
+    @test sum(bc) == mapreduce(Base.splat(*), +, zip(xs, ys))
 
     xs = 1:5:3*5
     ys = 1:4:3*4
@@ -1104,7 +1117,7 @@ end
     end
     arr = rand(1000)
     @allocated test(arr)
-    @test (@allocated test(arr)) == 0
+    @test (@allocated test(arr)) <= 16
 end
 
 @testset "Fix type unstable .&& #43470" begin
@@ -1116,7 +1129,39 @@ end
     @inferred(test(x, y)) == [0, 0]
 end
 
+@testset "issue #45903, in place broadcast into a bit-masked bitmatrix" begin
+    A = BitArray(ones(3,3))
+    pos = randn(3,3)
+    A[pos .< 0] .= false
+    @test all(>=(0), pos[A])
+    @test count(A) == count(>=(0), pos)
+end
+
+@testset "issue #38432: make CartesianIndex a broadcast scalar" begin
+    @test CartesianIndex(1,2) .+ (CartesianIndex(3,4), CartesianIndex(5,6)) == (CartesianIndex(4, 6), CartesianIndex(6, 8))
+    @test CartesianIndex(1,2) .+ [CartesianIndex(3,4), CartesianIndex(5,6)] == [CartesianIndex(4, 6), CartesianIndex(6, 8)]
+end
+
+struct MyBroadcastStyleWithField <: Broadcast.BroadcastStyle
+    i::Int
+end
+# asymmetry intended
+Base.BroadcastStyle(a::MyBroadcastStyleWithField, b::MyBroadcastStyleWithField) = a
+
+@testset "issue #50937: styles that have fields" begin
+    @test Broadcast.result_style(MyBroadcastStyleWithField(1), MyBroadcastStyleWithField(1)) ==
+        MyBroadcastStyleWithField(1)
+    @test_throws ErrorException Broadcast.result_style(MyBroadcastStyleWithField(1),
+                                                       MyBroadcastStyleWithField(2))
+    dest = [0, 0]
+    dest .= Broadcast.Broadcasted(MyBroadcastStyleWithField(1), +, (1:2, 2:3))
+    @test dest == [3, 5]
+end
+
 # test that `Broadcast` definition is defined as total and eligible for concrete evaluation
 import Base.Broadcast: BroadcastStyle, DefaultArrayStyle
 @test Base.infer_effects(BroadcastStyle, (DefaultArrayStyle{1},DefaultArrayStyle{2},)) |>
     Core.Compiler.is_foldable
+
+f51129(v, x) = (1 .- (v ./ x) .^ 2)
+@test @inferred(f51129([13.0], 6.5)) == [-3.0]
diff --git a/test/cartesian.jl b/test/cartesian.jl
index 772ce259c7d24..9643da72642ec 100644
--- a/test/cartesian.jl
+++ b/test/cartesian.jl
@@ -296,8 +296,7 @@ end
     R = CartesianIndex(1, 1):CartesianIndex(2, 3):CartesianIndex(4, 5)
     @test R.indices == (1:2:3, 1:3:4)
     i = CartesianIndex(4, 1)
-    i_next = CartesianIndex(1, 4)
-    @test !(i in R) && iterate(R, i) == (i_next, i_next)
+    @test !(i in R)
 
     for R in [
         CartesianIndices((1:-1:-1, 1:2:5)),
@@ -393,19 +392,20 @@ end
 
 @testset "CartesianIndices overflow" begin
     @testset "incremental steps" begin
+        # n.b. typemax is an odd number
         I = CartesianIndices((1:typemax(Int),))
         i = last(I)
         @test iterate(I, i) === nothing
 
         I = CartesianIndices((1:2:typemax(Int), ))
-        i = CartesianIndex(typemax(Int)-1)
+        i = CartesianIndex(typemax(Int))
         @test iterate(I, i) === nothing
 
         I = CartesianIndices((1:(typemax(Int)-1),))
-        i = CartesianIndex(typemax(Int))
+        i = CartesianIndex(typemax(Int)-1)
         @test iterate(I, i) === nothing
 
-        I = CartesianIndices((1:2:typemax(Int)-1, ))
+        I = CartesianIndices((2:2:typemax(Int)-1, ))
         i = CartesianIndex(typemax(Int)-1)
         @test iterate(I, i) === nothing
 
@@ -413,7 +413,7 @@ end
         i = last(I)
         @test iterate(I, i) === nothing
 
-        I = CartesianIndices((1:2:typemax(Int), 1:2:typemax(Int)))
+        I = CartesianIndices((2:2:typemax(Int), 2:2:typemax(Int)))
         i = CartesianIndex(typemax(Int)-1, typemax(Int)-1)
         @test iterate(I, i) === nothing
 
@@ -421,9 +421,9 @@ end
         i = CartesianIndex(typemax(Int), 1)
         @test iterate(I, i) === (CartesianIndex(1, 2), CartesianIndex(1,2))
 
-        I = CartesianIndices((1:2:typemax(Int), 1:2:typemax(Int)))
+        I = CartesianIndices((2:2:typemax(Int), 2:2:typemax(Int)))
         i = CartesianIndex(typemax(Int)-1, 1)
-        @test iterate(I, i) === (CartesianIndex(1, 3), CartesianIndex(1, 3))
+        @test iterate(I, i) === (CartesianIndex(2, 3), CartesianIndex(2, 3))
 
         I = CartesianIndices((typemin(Int):(typemin(Int)+3),))
         i = last(I)
@@ -493,15 +493,6 @@ end
     end
     @test length(I) == length(indices)
     @test vec(collect(I)) == indices
-
-    # test invalid state
-    I = CartesianIndices((2:4, 3:5))
-    @test iterate(I, CartesianIndex(typemax(Int), 3))[1] == CartesianIndex(2,4)
-    @test iterate(I, CartesianIndex(typemax(Int), 4))[1] == CartesianIndex(2,5)
-    @test iterate(I, CartesianIndex(typemax(Int), 5))    === nothing
-
-    @test iterate(I, CartesianIndex(3, typemax(Int)))[1] == CartesianIndex(4,typemax(Int))
-    @test iterate(I, CartesianIndex(4, typemax(Int)))    === nothing
 end
 
 @testset "CartesianIndices operations" begin
@@ -515,6 +506,12 @@ end
 f39705() = Base.Cartesian.@nany 0 _ -> true
 @test f39705() === false
 
+@testset "Cartesian @nall macro test" begin
+    i_1, i_2, i_3 = 1, 2, 3;
+    @test Base.Cartesian.@nall 2 d->(i_d <= 2)
+    @test !Base.Cartesian.@nall 3 d->(i_d <= 2)
+end
+
 @testset "CartesianIndices with Bool" begin
     @test @inferred(CartesianIndices((true,))) == CartesianIndices((1,))
     @test @inferred(CartesianIndices((false,))) == CartesianIndices((0,))
@@ -536,3 +533,35 @@ end
     inds2 = (1, CI(1, 2), 1, CI(1, 2), 1, CI(1, 2), 1)
     @test (@inferred CI(inds2)) == CI(1, 1, 2, 1, 1, 2, 1, 1, 2, 1)
 end
+
+@testset "@ncallkw" begin
+    f(x...; a, b = 1, c = 2, d = 3) = +(x..., a, b, c, d)
+    x_1, x_2 = (-1, -2)
+    kw = (a = 0, c = 0, d = 0)
+    @test x_1 + x_2 + 1 + 4 == Base.Cartesian.@ncallkw 2 f kw 4 x
+    b = 0
+    kw = (c = 0, d = 0)
+    @test x_1 + x_2 + 4 == Base.Cartesian.@ncallkw 2 f (; a = 0, b, kw...) 4 x
+end
+
+@testset "if with and without else branch" begin
+    t1 = Base.Cartesian.@ntuple 3 i -> i == 1 ? 1 : 0
+    t2 = Base.Cartesian.@ntuple 3 i -> begin
+        m = 0
+        if i == 1
+            m = 1
+        end
+        m
+    end
+    @test t1 == t2
+    t3 = Base.Cartesian.@ntuple 3 i -> begin
+        m = 0
+        if i == 1
+            m = 1
+        elseif i == 2
+            m = 2
+        end
+        m
+    end
+    @test t3 == (1, 2, 0)
+end
diff --git a/test/ccall.jl b/test/ccall.jl
index 3a1b6ff3db733..3647173eb9290 100644
--- a/test/ccall.jl
+++ b/test/ccall.jl
@@ -802,7 +802,7 @@ if cfunction_closure
 verbose && println("Testing cfunction closures: ")
 
 # helper Type for testing that constructors work
-# with cfucntion and that object identity is preserved
+# with cfunction and that object identity is preserved
 mutable struct IdentityTestKV{K, V}
     (T::Type{<:IdentityTestKV})(S) = (@test T === S; T)
 end
@@ -1020,7 +1020,7 @@ end
 
 else
 
-@test_broken "cfunction: no support for closures on this platform"
+@test_broken "cfunction: no support for closures on this platform" === nothing
 
 end
 
@@ -1125,12 +1125,12 @@ struct Struct_AA64_2
     v2::Float64
 end
 
-# This is a homogenious short vector aggregate
+# This is a homogeneous short vector aggregate
 struct Struct_AA64_3
     v1::VecReg{8,Int8}
     v2::VecReg{2,Float32}
 end
-# This is NOT a homogenious short vector aggregate
+# This is NOT a homogeneous short vector aggregate
 struct Struct_AA64_4
     v2::VecReg{2,Float32}
     v1::VecReg{8,Int16}
@@ -1477,7 +1477,7 @@ end
 # issue #20835
 @test_throws(ErrorException("could not evaluate ccall argument type (it might depend on a local variable)"),
              eval(:(f20835(x) = ccall(:fn, Cvoid, (Ptr{typeof(x)},), x))))
-@test_throws(UndefVarError(:Something_not_defined_20835),
+@test_throws(UndefVarError(:Something_not_defined_20835, @__MODULE__),
              eval(:(f20835(x) = ccall(:fn, Something_not_defined_20835, (Ptr{typeof(x)},), x))))
 @test isempty(methods(f20835))
 
@@ -1516,6 +1516,12 @@ end
 @test_throws(ErrorException("ccall return type struct fields cannot contain a reference"),
              @eval ccall(:fn, typeof(Ref("")), ()))
 
+fn45187() = nothing
+
+@test_throws(TypeError, @eval ccall(nothing, Cvoid, ()))
+@test_throws(TypeError, @eval ccall(49142, Cvoid, ()))
+@test_throws(TypeError, @eval ccall((:fn, fn45187), Cvoid, ()))
+
 # test for malformed syntax errors
 @test Expr(:error, "more arguments than types for ccall") == Meta.lower(@__MODULE__, :(ccall(:fn, A, (), x)))
 @test Expr(:error, "more arguments than types for ccall") == Meta.lower(@__MODULE__, :(ccall(:fn, A, (B,), x, y)))
@@ -1590,6 +1596,32 @@ function caller22734(ptr)
 end
 @test caller22734(ptr22734) === 32.0
 
+# issue #46786 -- non-isbitstypes passed "by-value"
+struct NonBits46786
+    x::Union{Int16,NTuple{3,UInt8}}
+end
+let ptr = @cfunction(identity, NonBits46786, (NonBits46786,))
+    obj1 = NonBits46786((0x01,0x02,0x03))
+    obj2 = ccall(ptr, NonBits46786, (NonBits46786,), obj1)
+    @test obj1 === obj2
+end
+let ptr = @cfunction(identity, Base.RefValue{NonBits46786}, (Base.RefValue{NonBits46786},))
+    obj1 = Base.RefValue(NonBits46786((0x01,0x02,0x03)))
+    obj2 = ccall(ptr, Base.RefValue{NonBits46786}, (Base.RefValue{NonBits46786},), obj1)
+    @test obj1 !== obj2
+    @test obj1.x === obj2.x
+end
+
+mutable struct MutNonBits46786
+    x::Union{Int16,NTuple{3,UInt8}}
+end
+let ptr = @cfunction(identity, MutNonBits46786, (MutNonBits46786,))
+    obj1 = MutNonBits46786((0x01,0x02,0x03))
+    obj2 = ccall(ptr, MutNonBits46786, (MutNonBits46786,), obj1)
+    @test obj1 !== obj2
+    @test obj1.x === obj2.x
+end
+
 # 26297#issuecomment-371165725
 #   test that the first argument to cglobal is recognized as a tuple literal even through
 #   macro expansion
@@ -1725,37 +1757,11 @@ end
     )::Cstring))...)
     @test call == Base.remove_linenums!(
         quote
-        local arg1root = $(GlobalRef(Base, :cconvert))($(Expr(:escape, :Cstring)), $(Expr(:escape, :str)))
-        local arg1 = $(GlobalRef(Base, :unsafe_convert))($(Expr(:escape, :Cstring)), arg1root)
-        local arg2root = $(GlobalRef(Base, :cconvert))($(Expr(:escape, :Cint)), $(Expr(:escape, :num1)))
-        local arg2 = $(GlobalRef(Base, :unsafe_convert))($(Expr(:escape, :Cint)), arg2root)
-        local arg3root = $(GlobalRef(Base, :cconvert))($(Expr(:escape, :Cint)), $(Expr(:escape, :num2)))
-        local arg3 = $(GlobalRef(Base, :unsafe_convert))($(Expr(:escape, :Cint)), arg3root)
-        $(Expr(:foreigncall,
-               :($(Expr(:escape, :((:func, libstring))))),
-               :($(Expr(:escape, :Cstring))),
-               :($(Expr(:escape, :(($(Expr(:core, :svec)))(Cstring, Cint, Cint))))),
-               0,
-               :(:ccall),
-               :arg1, :arg2, :arg3, :arg1root, :arg2root, :arg3root))
+        ccall($(Expr(:escape, :((:func, libstring)))), $(Expr(:cconv, :ccall, 0)), $(Expr(:escape, :Cstring)), ($(Expr(:escape, :Cstring)), $(Expr(:escape, :Cint)), $(Expr(:escape, :Cint))), $(Expr(:escape, :str)), $(Expr(:escape, :num1)), $(Expr(:escape, :num2)))
         end)
 
-    # pointer interpolation
-    call = ccall_macro_lower(:ccall, ccall_macro_parse(:( $(Expr(:$, :fptr))("bar"::Cstring)::Cvoid ))...)
-    @test Base.remove_linenums!(call) == Base.remove_linenums!(
-    quote
-        func = $(Expr(:escape, :fptr))
-        begin
-            if !(func isa Ptr{Cvoid})
-                name = :fptr
-                throw(ArgumentError("interpolated function `$(name)` was not a Ptr{Cvoid}, but $(typeof(func))"))
-            end
-        end
-        local arg1root = $(GlobalRef(Base, :cconvert))($(Expr(:escape, :Cstring)), $(Expr(:escape, "bar")))
-        local arg1 = $(GlobalRef(Base, :unsafe_convert))($(Expr(:escape, :Cstring)), arg1root)
-        $(Expr(:foreigncall, :func, :($(Expr(:escape, :Cvoid))), :($(Expr(:escape, :(($(Expr(:core, :svec)))(Cstring))))), 0, :(:ccall), :arg1, :arg1root))
-    end)
-
+    local fptr = :x
+    @test_throws ArgumentError("interpolated function `fptr` was not a Ptr{Cvoid}, but Symbol") @ccall $fptr()::Cvoid
 end
 
 @testset "check error paths" begin
@@ -1767,7 +1773,7 @@ end
     @test_throws ArgumentError("args in @ccall need type annotations. 'x' doesn't have one.") ccall_macro_parse(:( foo(x)::Cint ))
     # missing type annotations on varargs arguments
     @test_throws ArgumentError("args in @ccall need type annotations. 'y' doesn't have one.") ccall_macro_parse(:( foo(x::Cint ; y)::Cint ))
-    # no reqired args on varargs call
+    # no required args on varargs call
     @test_throws ArgumentError("C ABI prohibits vararg without one required argument") ccall_macro_parse(:( foo(; x::Cint)::Cint ))
     # not a function pointer
     @test_throws ArgumentError("interpolated function `PROGRAM_FILE` was not a Ptr{Cvoid}, but String") @ccall $PROGRAM_FILE("foo"::Cstring)::Cvoid
@@ -1791,7 +1797,7 @@ end
     str_identity = @cfunction(identity, Cstring, (Cstring,))
     foo = @ccall $str_identity("foo"::Cstring)::Cstring
     @test unsafe_string(foo) == "foo"
-    # test interpolation of an expresison that returns a pointer.
+    # test interpolation of an expression that returns a pointer.
     foo = @ccall $(@cfunction(identity, Cstring, (Cstring,)))("foo"::Cstring)::Cstring
     @test unsafe_string(foo) == "foo"
 
@@ -1832,7 +1838,7 @@ ccall_lazy_lib_name(x) = ccall((:testUcharX, compute_lib_name()), Int32, (UInt8,
 @test ccall_lazy_lib_name(0) == 0
 @test ccall_lazy_lib_name(3) == 1
 ccall_with_undefined_lib() = ccall((:time, xx_nOt_DeFiNeD_xx), Cint, (Ptr{Cvoid},), C_NULL)
-@test_throws UndefVarError(:xx_nOt_DeFiNeD_xx) ccall_with_undefined_lib()
+@test_throws UndefVarError(:xx_nOt_DeFiNeD_xx, @__MODULE__) ccall_with_undefined_lib()
 
 @testset "transcode for UInt8 and UInt16" begin
     a   = [UInt8(1), UInt8(2), UInt8(3)]
@@ -1884,6 +1890,12 @@ end
     function cglobal33413_literal_notype()
         return cglobal(:sin)
     end
+    function cglobal49142_nothing()
+        return cglobal(nothing)
+    end
+    function cglobal45187fn()
+        return cglobal((:fn, fn45187))
+    end
     @test unsafe_load(cglobal33413_ptrvar()) == 1
     @test unsafe_load(cglobal33413_ptrinline()) == 1
     @test unsafe_load(cglobal33413_tupleliteral()) == 1
@@ -1892,10 +1904,33 @@ end
     @test unsafe_load(convert(Ptr{Cint}, cglobal33413_tupleliteral_notype())) == 1
     @test cglobal33413_literal() != C_NULL
     @test cglobal33413_literal_notype() != C_NULL
+    @test_throws(TypeError, cglobal49142_nothing())
+    @test_throws(TypeError, cglobal45187fn())
+    @test_throws(TypeError, @eval cglobal(nothing))
+    @test_throws(TypeError, @eval cglobal((:fn, fn45187)))
 end
 
 @testset "ccall_effects" begin
-    ctest_total(x) = @Base.assume_effects :total @ccall libccalltest.ctest(x::Complex{Int})::Complex{Int}
+    ctest_total(x) = Base.@assume_effects :total @ccall libccalltest.ctest(x::Complex{Int})::Complex{Int}
     ctest_total_const() = Val{ctest_total(1 + 2im)}()
     Core.Compiler.return_type(ctest_total_const, Tuple{}) == Val{2 + 0im}
 end
+
+const libfrobozz = ""
+
+function somefunction_not_found()
+    ccall((:somefunction, libfrobozz), Cvoid, ())
+end
+
+function somefunction_not_found_libc()
+    ccall(:test,Int,())
+end
+
+@testset "library not found" begin
+    if Sys.islinux()
+        @test_throws "could not load symbol \"somefunction\"" somefunction_not_found()
+    else
+        @test_throws "could not load library \"\"" somefunction_not_found()
+    end
+    @test_throws "could not load symbol \"test\"" somefunction_not_found_libc()
+end
diff --git a/test/channel_threadpool.jl b/test/channel_threadpool.jl
new file mode 100644
index 0000000000000..4509604087fa8
--- /dev/null
+++ b/test/channel_threadpool.jl
@@ -0,0 +1,14 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+using Base.Threads
+
+@testset "Task threadpools" begin
+    c = Channel{Symbol}() do c; put!(c, threadpool(current_task())); end
+    @test take!(c) === threadpool(current_task())
+    c = Channel{Symbol}(spawn = true) do c; put!(c, threadpool(current_task())); end
+    @test take!(c) === :default
+    c = Channel{Symbol}(threadpool = :interactive) do c; put!(c, threadpool(current_task())); end
+    @test take!(c) === :interactive
+    @test_throws ArgumentError Channel{Symbol}(threadpool = :foo) do c; put!(c, :foo); end
+end
diff --git a/test/channels.jl b/test/channels.jl
index 1b7f96ad528bf..5633d9480d0b8 100644
--- a/test/channels.jl
+++ b/test/channels.jl
@@ -14,6 +14,28 @@ using Base: n_avail
     @test fetch(t) == "finished"
 end
 
+@testset "wait first behavior of wait on Condition" begin
+    a = Condition()
+    waiter1 = @async begin
+        wait(a)
+    end
+    waiter2 = @async begin
+        wait(a)
+    end
+    waiter3 = @async begin
+        wait(a; first=true)
+    end
+    waiter4 = @async begin
+        wait(a)
+    end
+    t = @async begin
+        Base.notify(a, "success"; all=false)
+        "finished"
+    end
+    @test fetch(waiter3) == "success"
+    @test fetch(t) == "finished"
+end
+
 @testset "various constructors" begin
     c = Channel()
     @test eltype(c) == Any
@@ -85,6 +107,11 @@ end
     @test taskref[].sticky == false
     @test collect(c) == [0]
 end
+let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no channel_threadpool.jl`
+    new_env = copy(ENV)
+    new_env["JULIA_NUM_THREADS"] = "1,1"
+    run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr))
+end
 
 @testset "multiple concurrent put!/take! on a channel for different sizes" begin
     function testcpt(sz)
@@ -288,7 +315,8 @@ end
 end
 
 @testset "timedwait on multiple channels" begin
-    @Experimental.sync begin
+    Experimental.@sync begin
+        sync = Channel(1)
         rr1 = Channel(1)
         rr2 = Channel(1)
         rr3 = Channel(1)
@@ -298,20 +326,17 @@ end
         @test !callback()
         @test timedwait(callback, 0) === :timed_out
 
-        @async begin sleep(0.5); put!(rr1, :ok) end
+        @async begin put!(sync, :ready); sleep(0.5); put!(rr1, :ok) end
         @async begin sleep(1.0); put!(rr2, :ok) end
-        @async begin sleep(2.0); put!(rr3, :ok) end
+        @async begin @test take!(rr3) == :done end
 
+        @test take!(sync) == :ready
         et = @elapsed timedwait(callback, 1)
 
-        # assuming that 0.5 seconds is a good enough buffer on a typical modern CPU
-        try
-            @assert (et >= 1.0) && (et <= 1.5)
-            @assert !isready(rr3)
-        catch
-            @warn "`timedwait` tests delayed. et=$et, isready(rr3)=$(isready(rr3))"
-        end
+        @test et >= 1.0
+
         @test isready(rr1)
+        put!(rr3, :done)
     end
 end
 
@@ -359,7 +384,7 @@ end
         redirect_stderr(oldstderr)
         close(newstderr[2])
     end
-    @test fetch(errstream) == "\nWARNING: Workqueue inconsistency detected: popfirst!(Workqueue).state != :runnable\n"
+    @test fetch(errstream) == "\nWARNING: Workqueue inconsistency detected: popfirst!(Workqueue).state !== :runnable\n"
 end
 
 @testset "throwto" begin
@@ -432,8 +457,8 @@ end
         Sys.iswindows() && Base.process_events() # schedule event (windows?)
         close(async) # and close
         @test !isopen(async)
-        @test tc[] == 2
-        @test tc[] == 2
+        @test tc[] == 3
+        @test tc[] == 3
         yield() # consume event & then close
         @test tc[] == 3
         sleep(0.1) # no further events
@@ -454,7 +479,7 @@ end
         close(async)
         @test !isopen(async)
         Base.process_events() # and close
-        @test tc[] == 0
+        @test tc[] == 1
         yield() # consume event & then close
         @test tc[] == 1
         sleep(0.1) # no further events
@@ -534,7 +559,7 @@ end
     e = @elapsed for i = 1:5
         wait(t)
     end
-    @test 1.5 > e >= 0.4
+    @test e >= 0.4
     @test a[] == 0
     nothing
 end
diff --git a/test/char.jl b/test/char.jl
index 1639c62ec819d..1d3579013ad18 100644
--- a/test/char.jl
+++ b/test/char.jl
@@ -1,7 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 @testset "basic properties" begin
-
     @test typemax(Char) == reinterpret(Char, typemax(UInt32))
     @test typemin(Char) == Char(0)
     @test typemax(Char) == reinterpret(Char, 0xffffffff)
@@ -214,6 +213,35 @@ end
     end
 end
 
+# issue #50532
+@testset "invalid read(io, Char)" begin
+    # byte values with different numbers of leading bits
+    B = UInt8[
+        0x3f, 0x4d, 0x52, 0x63, 0x81, 0x83, 0x89, 0xb6,
+        0xc0, 0xc8, 0xd3, 0xe3, 0xea, 0xeb, 0xf0, 0xf2,
+        0xf4, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+    ]
+    f = tempname()
+    for b1 in B, b2 in B, t = 0:3
+        bytes = [b1, b2]
+        append!(bytes, rand(B, t))
+        s = String(bytes)
+        write(f, s)
+        @test s == read(f, String)
+        chars = collect(s)
+        ios = [IOBuffer(s), open(f), Base.Filesystem.open(f, 0)]
+        for io in ios
+            chars′ = Char[]
+            while !eof(io)
+                push!(chars′, read(io, Char))
+            end
+            @test chars == chars′
+            close(io)
+        end
+    end
+    rm(f)
+end
+
 @testset "overlong codes" begin
     function test_overlong(c::Char, n::Integer, rep::String)
         if isvalid(c)
diff --git a/test/choosetests.jl b/test/choosetests.jl
index 099dfa18a71c5..beed4e15a58df 100644
--- a/test/choosetests.jl
+++ b/test/choosetests.jl
@@ -19,18 +19,32 @@ const TESTNAMES = [
         "mpfr", "broadcast", "complex",
         "floatapprox", "stdlib", "reflection", "regex", "float16",
         "combinatorics", "sysinfo", "env", "rounding", "ranges", "mod2pi",
-        "euler", "show", "client",
+        "euler", "show", "client", "terminfo",
         "errorshow", "sets", "goto", "llvmcall", "llvmcall2", "ryu",
-        "some", "meta", "stacktraces", "docs",
+        "some", "meta", "stacktraces", "docs", "gc",
         "misc", "threads", "stress", "binaryplatforms", "atexit",
         "enums", "cmdlineargs", "int", "interpreter",
-        "checked", "bitset", "floatfuncs", "precompile",
+        "checked", "bitset", "floatfuncs", "precompile", "relocatedepot",
         "boundscheck", "error", "ambiguous", "cartesian", "osutils",
         "channels", "iostream", "secretbuffer", "specificity",
         "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap",
         "smallarrayshrink", "opaque_closure", "filesystem", "download",
+        "scopedvalues",
 ]
 
+const INTERNET_REQUIRED_LIST = [
+    "Artifacts",
+    "Downloads",
+    "LazyArtifacts",
+    "LibCURL",
+    "LibGit2",
+    "Pkg",
+    "TOML",
+    "download",
+]
+
+const NETWORK_REQUIRED_LIST = vcat(INTERNET_REQUIRED_LIST, ["Sockets"])
+
 """
 `(; tests, net_on, exit_on_error, seed) = choosetests(choices)` selects a set of tests to be
 run. `choices` should be a vector of test names; if empty or set to
@@ -137,16 +151,19 @@ function choosetests(choices = [])
 
     filtertests!(tests, "unicode", ["unicode/utf8"])
     filtertests!(tests, "strings", ["strings/basic", "strings/search", "strings/util",
-                   "strings/io", "strings/types"])
+                   "strings/io", "strings/types", "strings/annotated"])
     # do subarray before sparse but after linalg
     filtertests!(tests, "subarray")
-    filtertests!(tests, "compiler", ["compiler/inference", "compiler/validation",
-        "compiler/ssair", "compiler/irpasses", "compiler/codegen",
-        "compiler/inline", "compiler/contextual", "compiler/AbstractInterpreter",
-        "compiler/EscapeAnalysis/local", "compiler/EscapeAnalysis/interprocedural"])
+    filtertests!(tests, "compiler", [
+        "compiler/datastructures", "compiler/inference", "compiler/effects",
+        "compiler/validation", "compiler/ssair", "compiler/irpasses",
+        "compiler/codegen", "compiler/inline", "compiler/contextual",
+        "compiler/invalidation", "compiler/AbstractInterpreter",
+        "compiler/EscapeAnalysis/EscapeAnalysis"])
     filtertests!(tests, "compiler/EscapeAnalysis", [
-        "compiler/EscapeAnalysis/local", "compiler/EscapeAnalysis/interprocedural"])
+        "compiler/EscapeAnalysis/EscapeAnalysis"])
     filtertests!(tests, "stdlib", STDLIBS)
+    filtertests!(tests, "internet_required", INTERNET_REQUIRED_LIST)
     # do ambiguous first to avoid failing if ambiguities are introduced by other tests
     filtertests!(tests, "ambiguous")
 
@@ -157,43 +174,35 @@ function choosetests(choices = [])
         filter!(x -> (x != "Profile"), tests)
     end
 
-    net_required_for = [
-        "Artifacts",
-        "Downloads",
-        "LazyArtifacts",
-        "LibCURL",
-        "LibGit2",
-        "Sockets",
-        "download",
-    ]
+    if ccall(:jl_running_on_valgrind,Cint,()) != 0 && "rounding" in tests
+        @warn "Running under valgrind: Skipping rounding tests"
+        filter!(x -> x != "rounding", tests)
+    end
+
+    net_required_for = filter!(in(tests), NETWORK_REQUIRED_LIST)
     net_on = true
-    JULIA_TEST_NETWORKING_AVAILABLE = get(ENV, "JULIA_TEST_NETWORKING_AVAILABLE", "") |>
-                                      strip |>
-                                      lowercase |>
-                                      s -> tryparse(Bool, s) |>
-                                      x -> x === true
+    JULIA_TEST_NETWORKING_AVAILABLE = Base.get_bool_env("JULIA_TEST_NETWORKING_AVAILABLE", false) === true
     # If the `JULIA_TEST_NETWORKING_AVAILABLE` environment variable is set to `true`, we
     # always set `net_on` to `true`.
     # Otherwise, we set `net_on` to true if and only if networking is actually available.
     if !JULIA_TEST_NETWORKING_AVAILABLE
         try
-            ipa = getipaddr()
+            getipaddr()
         catch
             if ci_option_passed
                 @error("Networking unavailable, but `--ci` was passed")
                 rethrow()
             end
             net_on = false
-            @warn "Networking unavailable: Skipping tests [" * join(net_required_for, ", ") * "]"
-            filter!(!in(net_required_for), tests)
+            if isempty(net_required_for)
+                @warn "Networking unavailable"
+            else
+                @warn "Networking unavailable: Skipping tests [" * join(net_required_for, ", ") * "]"
+                filter!(!in(net_required_for), tests)
+            end
         end
     end
 
-    if ccall(:jl_running_on_valgrind,Cint,()) != 0 && "rounding" in tests
-        @warn "Running under valgrind: Skipping rounding tests"
-        filter!(x -> x != "rounding", tests)
-    end
-
     filter!(!in(tests), unhandled)
     filter!(!in(skip_tests), tests)
 
diff --git a/test/clangsa/GCPushPop.cpp b/test/clangsa/GCPushPop.cpp
index f8dcfdafa5aa9..72e0494a7d936 100644
--- a/test/clangsa/GCPushPop.cpp
+++ b/test/clangsa/GCPushPop.cpp
@@ -3,6 +3,7 @@
 // RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -x c++ %s
 
 #include "julia.h"
+#include <string>
 
 void missingPop() {
   jl_value_t *x = NULL;
@@ -17,7 +18,7 @@ void missingPop2() {
 } // expected-warning{{Non-popped GC frame present at end of function}}
   // expected-note@-1{{Non-popped GC frame present at end of function}}
 
-void superflousPop() {
+void superfluousPop() {
   JL_GC_POP(); // expected-warning{{JL_GC_POP without corresponding push}}
 }              // expected-note@-1{{JL_GC_POP without corresponding push}}
 
@@ -34,3 +35,21 @@ void jl_gc_run_finalizers_in_list(jl_ptls_t ptls, arraylist_t *list)
     //    run_finalizer(ptls, items[i], items[i + 1]);
     JL_GC_POP();
 }
+
+bool testfunc1() JL_NOTSAFEPOINT
+{
+    struct implied_struct1 { // expected-note{{Tried to call method defined here}}
+        std::string s;
+        struct implied_constructor { } x;
+    } x; // expected-warning{{Calling potential safepoint as CXXConstructorCall from function annotated JL_NOTSAFEPOINT}}
+         // expected-note@-1{{Calling potential safepoint as CXXConstructorCall from function annotated JL_NOTSAFEPOINT}}
+    return 1;
+}
+bool testfunc2() JL_NOTSAFEPOINT
+{
+    struct implied_struct2 { // expected-note{{Tried to call method defined here}}
+        std::string s;
+    } x{""};
+    return 1; // expected-warning{{Calling potential safepoint as CXXDestructorCall from function annotated JL_NOTSAFEPOINT}}
+              // expected-note@-1{{Calling potential safepoint as CXXDestructorCall from function annotated JL_NOTSAFEPOINT}}
+}
diff --git a/test/clangsa/MissingRoots.c b/test/clangsa/MissingRoots.c
index f0b32c54bc7b8..0ff5e633622ce 100644
--- a/test/clangsa/MissingRoots.c
+++ b/test/clangsa/MissingRoots.c
@@ -352,6 +352,9 @@ void assoc_exact_broken(jl_value_t **args, size_t n, int8_t offs, size_t world)
 }
 */
 
+// declare
+jl_typemap_level_t *jl_new_typemap_level(void);
+
 void assoc_exact_ok(jl_value_t *args1, jl_value_t **args, size_t n, int8_t offs, size_t world) {
     jl_typemap_level_t *cache = jl_new_typemap_level();
     JL_GC_PUSH1(&cache);
diff --git a/test/client.jl b/test/client.jl
index 195743b1d6208..61fe7d5093474 100644
--- a/test/client.jl
+++ b/test/client.jl
@@ -12,14 +12,14 @@ nested_error_pattern = r"""
     ERROR: DivideError: integer division error
     Stacktrace:.*
 
-    caused by: UndefVarError: __not_a_binding__ not defined
+    caused by: UndefVarError: `__not_a_binding__` not defined in `Main`
     Stacktrace:.*
     """s
 
 @testset "display_error" begin
     # Display of errors which cause more than one entry on the exception stack
     excs = try
-        eval(nested_error_expr)
+        Core.eval(Main, nested_error_expr)
     catch
         Base.current_exceptions()
     end
@@ -31,7 +31,7 @@ nested_error_pattern = r"""
         DivideError: integer division error
         Stacktrace:.*
 
-        caused by: UndefVarError: __not_a_binding__ not defined
+        caused by: UndefVarError: `__not_a_binding__` not defined in `Main`
         Stacktrace:.*
         """s, sprint(show, excs))
 end
@@ -52,3 +52,8 @@ end
         ERROR: ErrorException
         """s, err_str)
 end
+
+@testset "defining `ans` and `err`" begin
+    @test eval(:(ans = 1)) == 1
+    @test eval(:(err = 1)) == 1
+end
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index 82e5dd5c04619..92bada23cb258 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -4,8 +4,8 @@ import Libdl
 
 # helper function for passing input to stdin
 # and returning the stdout result
-function writereadpipeline(input, exename)
-    p = open(exename, "w+")
+function writereadpipeline(input, exename; stderr=nothing)
+    p = open(pipeline(exename; stderr), "w+")
     @async begin
         write(p.in, input)
         close(p.in)
@@ -31,6 +31,16 @@ function format_filename(s)
     return r
 end
 
+# Returns true if the given command errors, but doesn't signal
+function errors_not_signals(cmd::Cmd)
+    p = run(pipeline(ignorestatus(cmd); stdout=devnull, stderr=devnull))
+    return errors_not_signals(p)
+end
+function errors_not_signals(p::Base.Process)
+    wait(p)
+    return process_exited(p) && !Base.process_signaled(p) && !success(p)
+end
+
 let
     fn = format_filename("a%d %p %i %L %l %u z")
     hd = withenv("HOME" => nothing) do
@@ -50,26 +60,125 @@ let
     @test format_filename("%a%%b") == "a%b"
 end
 
+@testset "julia_cmd" begin
+    julia_basic = Base.julia_cmd()
+    function get_julia_cmd(arg)
+        io = Base.BufferStream()
+        cmd = `$julia_basic $arg -e 'print(repr(Base.julia_cmd()))'`
+        try
+            run(pipeline(cmd, stdout=io, stderr=io))
+        catch
+            @error "cmd failed" cmd read(io, String)
+            rethrow()
+        end
+        closewrite(io)
+        return read(io, String)
+    end
+
+    opts = Base.JLOptions()
+
+    for (arg, default) in (
+                            # Use a Cmd to handle space nicely when
+                            # interpolating inside another Cmd.
+                            (`-C $(unsafe_string(opts.cpu_target))`,  false),
+
+                            ("-J$(unsafe_string(opts.image_file))",  false),
+
+                            ("--depwarn=yes",   false),
+                            ("--depwarn=error", false),
+                            ("--depwarn=no",    true),
+
+                            ("--check-bounds=yes",  false),
+                            ("--check-bounds=no",   false),
+                            ("--check-bounds=auto", true),
+
+                            ("--inline=no",         false),
+                            ("--inline=yes",        true),
+
+                            ("-O0", false),
+                            ("-O1", false),
+                            ("-O2", true),
+                            ("-O3", false),
+
+                            ("--min-optlevel=0",    true),
+                            ("--min-optlevel=1",    false),
+                            ("--min-optlevel=2",    false),
+                            ("--min-optlevel=3",    false),
+
+                            ("-g0", false),
+                            ("-g1", false),
+                            ("-g2", false),
+
+                            ("--compile=no",    false),
+                            ("--compile=all",   false),
+                            ("--compile=min",   false),
+                            ("--compile=yes",   true),
+
+                            ("--code-coverage=@",    false),
+                            ("--code-coverage=user", false),
+                            ("--code-coverage=all",  false),
+                            ("--code-coverage=none", true),
+
+                            ("--track-allocation=@",    false),
+                            ("--track-allocation=user", false),
+                            ("--track-allocation=all",  false),
+                            ("--track-allocation=none", true),
+
+                            ("--color=yes", false),
+                            ("--color=no",  false),
+
+                            ("--startup-file=no",   false),
+                            ("--startup-file=yes",  true),
+
+                            # ("--sysimage-native-code=no",   false), # takes a lot longer (30s)
+                            ("--sysimage-native-code=yes",  true),
+
+                            ("--pkgimages=yes", true),
+                            ("--pkgimages=no",  false),
+                        )
+        @testset "$arg" begin
+            str = arg isa Cmd ? join(arg.exec, ' ') : arg
+            if default
+                @test !occursin(str, get_julia_cmd(arg))
+            else
+                @test occursin(str, get_julia_cmd(arg))
+            end
+        end
+    end
+
+    # Test empty `cpu_target` gives a helpful error message, issue #52209.
+    io = IOBuffer()
+    p = run(pipeline(`$(Base.julia_cmd(; cpu_target="")) --startup-file=no -e ''`; stderr=io); wait=false)
+    wait(p)
+    @test p.exitcode == 1
+    @test occursin("empty CPU name", String(take!(io)))
+end
+
 let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # tests for handling of ENV errors
-    let v = writereadpipeline("println(\"REPL: \", @which(less), @isdefined(InteractiveUtils))",
-                setenv(`$exename -i -E 'empty!(LOAD_PATH); @isdefined InteractiveUtils'`,
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            "println(\"REPL: \", @which(less), @isdefined(InteractiveUtils))",
+            setenv(`$exename -i -E '@assert isempty(LOAD_PATH); push!(LOAD_PATH, "@stdlib"); @isdefined InteractiveUtils'`,
                     "JULIA_LOAD_PATH" => "",
-                    "JULIA_DEPOT_PATH" => "",
-                    "HOME" => homedir()))
-        @test v[1] == "false\nREPL: InteractiveUtilstrue\n"
-        @test v[2]
+                    "JULIA_DEPOT_PATH" => ";:",
+                    "HOME" => homedir());
+            stderr=io)
+        # @which is undefined
+        @test_broken v == ("false\nREPL: InteractiveUtilstrue\n", true)
+        stderr = String(take!(io))
+        @test_broken isempty(stderr)
     end
     let v = writereadpipeline("println(\"REPL: \", InteractiveUtils)",
                 setenv(`$exename -i -e 'const InteractiveUtils = 3'`,
                     "JULIA_LOAD_PATH" => ";;;:::",
                     "JULIA_DEPOT_PATH" => ";;;:::",
                     "HOME" => homedir()))
-        # TODO: ideally, `@which`, etc. would still work, but Julia can't handle `using $InterativeUtils`
-        @test v[1] == "REPL: 3\n"
-        @test v[2]
+        # TODO: ideally, `@which`, etc. would still work, but Julia can't handle `using $InteractiveUtils`
+        @test v == ("REPL: 3\n", true)
     end
-    let v = readchomperrors(`$exename -i -e '
+    @testset let v = readchomperrors(`$exename -i -e '
             empty!(LOAD_PATH)
             @eval Sys STDLIB=mktempdir()
             Base.unreference_module(Base.PkgId(Base.UUID(0xb77e0a4c_d291_57a0_90e8_8db25a27a240), "InteractiveUtils"))
@@ -78,40 +187,46 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         # make sure this is a non-fatal error and the REPL still loads
         @test v[1]
         @test isempty(v[2])
-        @test startswith(v[3], "┌ Warning: Failed to import InteractiveUtils into module Main\n")
+        # Can't load REPL if it's outside the sysimg if we break the load path.
+        # Need to rewrite this test nicer
+        # ┌ Warning: REPL provider not available: using basic fallback
+        # └ @ Base client.jl:459
+        @test_broken startswith(v[3], "┌ Warning: Failed to import InteractiveUtils into module Main\n")
     end
     real_threads = string(ccall(:jl_cpu_threads, Int32, ()))
     for nc in ("0", "-2", "x", "2x", " ", "")
         v = readchomperrors(setenv(`$exename -i -E 'Sys.CPU_THREADS'`, "JULIA_CPU_THREADS" => nc, "HOME" => homedir()))
-        @test v[1]
-        @test v[2] == real_threads
-        @test v[3] == "WARNING: couldn't parse `JULIA_CPU_THREADS` environment variable. Defaulting Sys.CPU_THREADS to $real_threads."
+        @test v == (true, real_threads,
+            "WARNING: couldn't parse `JULIA_CPU_THREADS` environment variable. Defaulting Sys.CPU_THREADS to $real_threads.")
     end
     for nc in ("1", " 1 ", " +1 ", " 0x1 ")
-        v = readchomperrors(setenv(`$exename -i -E 'Sys.CPU_THREADS'`, "JULIA_CPU_THREADS" => nc, "HOME" => homedir()))
-        @test v[1]
-        @test v[2] == "1"
-        @test isempty(v[3])
+        @testset let v = readchomperrors(setenv(`$exename -i -E 'Sys.CPU_THREADS'`, "JULIA_CPU_THREADS" => nc, "HOME" => homedir()))
+            @test v[1]
+            @test v[2] == "1"
+            @test isempty(v[3])
+        end
     end
 
-    let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options", "HOME" => homedir()))
+    @testset let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options", "HOME" => homedir()))
         @test v[1]
         @test contains(v[2], r"print-options + = 1")
         @test contains(v[2], r"combiner-store-merge-dependence-limit + = 4")
         @test contains(v[2], r"enable-tail-merge + = 2")
         @test isempty(v[3])
     end
-    let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options -enable-tail-merge=1 -combiner-store-merge-dependence-limit=6", "HOME" => homedir()))
+    @testset let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options -enable-tail-merge=1 -combiner-store-merge-dependence-limit=6", "HOME" => homedir()))
         @test v[1]
         @test contains(v[2], r"print-options + = 1")
         @test contains(v[2], r"combiner-store-merge-dependence-limit + = 6")
         @test contains(v[2], r"enable-tail-merge + = 1")
         @test isempty(v[3])
     end
-    let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options -enable-tail-merge=1 -enable-tail-merge=1", "HOME" => homedir()))
-        @test !v[1]
-        @test isempty(v[2])
-        @test v[3] == "julia: for the --enable-tail-merge option: may only occur zero or one times!"
+    if Base.libllvm_version < v"15" #LLVM over 15 doesn't care for multiple options
+        @testset let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options -enable-tail-merge=1 -enable-tail-merge=1", "HOME" => homedir()))
+            @test !v[1]
+            @test isempty(v[2])
+            @test v[3] == "julia: for the --enable-tail-merge option: may only occur zero or one times!"
+        end
     end
 end
 
@@ -143,16 +258,19 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     end
 
     # --quiet, --banner
-    let t(q,b) = "Base.JLOptions().quiet == $q && Base.JLOptions().banner == $b"
-        @test success(`$exename                 -e $(t(0, -1))`)
-        @test success(`$exename -q              -e $(t(1,  0))`)
-        @test success(`$exename --quiet         -e $(t(1,  0))`)
-        @test success(`$exename --banner=no     -e $(t(0,  0))`)
-        @test success(`$exename --banner=yes    -e $(t(0,  1))`)
-        @test success(`$exename -q --banner=no  -e $(t(1,  0))`)
-        @test success(`$exename -q --banner=yes -e $(t(1,  1))`)
-        @test success(`$exename --banner=no  -q -e $(t(1,  0))`)
-        @test success(`$exename --banner=yes -q -e $(t(1,  1))`)
+    let p = "print((Base.JLOptions().quiet, Base.JLOptions().banner))"
+        @test read(`$exename                   -e $p`, String) == "(0, -1)"
+        @test read(`$exename -q                -e $p`, String) == "(1, 0)"
+        @test read(`$exename --quiet           -e $p`, String) == "(1, 0)"
+        @test read(`$exename --banner=no       -e $p`, String) == "(0, 0)"
+        @test read(`$exename --banner=yes      -e $p`, String) == "(0, 1)"
+        @test read(`$exename --banner=short    -e $p`, String) == "(0, 2)"
+        @test read(`$exename -q --banner=no    -e $p`, String) == "(1, 0)"
+        @test read(`$exename -q --banner=yes   -e $p`, String) == "(1, 1)"
+        @test read(`$exename -q --banner=short -e $p`, String) == "(1, 2)"
+        @test read(`$exename --banner=no  -q   -e $p`, String) == "(1, 0)"
+        @test read(`$exename --banner=yes -q   -e $p`, String) == "(1, 1)"
+        @test read(`$exename --banner=short -q -e $p`, String) == "(1, 2)"
     end
 
     # --home
@@ -161,22 +279,22 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
 
     # --eval
     @test  success(`$exename -e "exit(0)"`)
-    @test !success(`$exename -e "exit(1)"`)
+    @test errors_not_signals(`$exename -e "exit(1)"`)
     @test  success(`$exename --eval="exit(0)"`)
-    @test !success(`$exename --eval="exit(1)"`)
-    @test !success(`$exename -e`)
-    @test !success(`$exename --eval`)
+    @test errors_not_signals(`$exename --eval="exit(1)"`)
+    @test errors_not_signals(`$exename -e`)
+    @test errors_not_signals(`$exename --eval`)
     # --eval --interactive (replaced --post-boot)
     @test  success(`$exename -i -e "exit(0)"`)
-    @test !success(`$exename -i -e "exit(1)"`)
+    @test errors_not_signals(`$exename -i -e "exit(1)"`)
     # issue #34924
     @test  success(`$exename -e 'const LOAD_PATH=1'`)
 
     # --print
     @test read(`$exename -E "1+1"`, String) == "2\n"
     @test read(`$exename --print="1+1"`, String) == "2\n"
-    @test !success(`$exename -E`)
-    @test !success(`$exename --print`)
+    @test errors_not_signals(`$exename -E`)
+    @test errors_not_signals(`$exename --print`)
 
     # --load
     let testfile = tempname()
@@ -209,52 +327,87 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         end
     end
     # -L, --load requires an argument
-    @test !success(`$exename -L`)
-    @test !success(`$exename --load`)
+    @test errors_not_signals(`$exename -L`)
+    @test errors_not_signals(`$exename --load`)
 
     # --cpu-target (requires LLVM enabled)
-    @test !success(`$exename -C invalidtarget`)
-    @test !success(`$exename --cpu-target=invalidtarget`)
+    # Strictly test for failed error, not a segfault, since we had a false positive with just `success()` before.
+    @test errors_not_signals(`$exename -C invalidtarget`)
+    @test errors_not_signals(`$exename --cpu-target=invalidtarget`)
+
+    if Sys.iswindows()
+        # -t, --threads
+        code = "print(Threads.threadpoolsize())"
+        cpu_threads = ccall(:jl_effective_threads, Int32, ())
+        @test string(cpu_threads) ==
+            read(`$exename --threads auto -e $code`, String) ==
+            read(`$exename --threads=auto -e $code`, String) ==
+            read(`$exename -tauto -e $code`, String) ==
+            read(`$exename -t auto -e $code`, String)
+        for nt in (nothing, "1")
+            withenv("JULIA_NUM_THREADS" => nt) do
+                @test read(`$exename --threads=2 -e $code`, String) ==
+                    read(`$exename -t 2 -e $code`, String) == "2"
+            end
+        end
+        # We want to test oversubscription, but on manycore machines, this can
+        # actually exhaust limited PID spaces
+        cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads))
+        if Sys.WORD_SIZE == 32
+            cpu_threads = min(cpu_threads, 50)
+        end
+        @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads)
+        withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do
+            @test read(`$exename -e $code`, String) == string(cpu_threads)
+        end
+        @test errors_not_signals(`$exename -t 0`)
+        @test errors_not_signals(`$exename -t -1`)
+
+        # Combining --threads and --procs: --threads does propagate
+        withenv("JULIA_NUM_THREADS" => nothing) do
+            code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))"
+            @test read(`$exename -p2 -t2 -e $code`, String) == "6"
+        end
+    else
+        @test_skip "Command line tests with -t are flakey on non-Windows OS"
+        # Known issue: https://github.com/JuliaLang/julia/issues/49154
+        # These tests should be fixed and reenabled on all operating systems.
+    end
+
+    # Combining --threads and invalid -C should yield a decent error
+    @test errors_not_signals(`$exename -t 2 -C invalidtarget`)
+
+    # --procs
+    @test readchomp(`$exename -q -p 2 -e "println(nworkers())"`) == "2"
+    @test errors_not_signals(`$exename -p 0`)
+    let p = run(`$exename --procs=1.0`, wait=false)
+        wait(p)
+        @test p.exitcode == 1 && p.termsignal == 0
+    end
 
-    # -t, --threads
-    code = "print(Threads.nthreads())"
+    # --gcthreads
+    code = "print(Threads.ngcthreads())"
     cpu_threads = ccall(:jl_effective_threads, Int32, ())
-    @test string(cpu_threads) ==
+    @test (cpu_threads == 1 ? "1" : string(div(cpu_threads, 2))) ==
           read(`$exename --threads auto -e $code`, String) ==
           read(`$exename --threads=auto -e $code`, String) ==
           read(`$exename -tauto -e $code`, String) ==
           read(`$exename -t auto -e $code`, String)
     for nt in (nothing, "1")
-        withenv("JULIA_NUM_THREADS" => nt) do
-            @test read(`$exename --threads=2 -e $code`, String) ==
-                  read(`$exename -t 2 -e $code`, String) == "2"
+        withenv("JULIA_NUM_GC_THREADS" => nt) do
+            @test read(`$exename --gcthreads=2 -e $code`, String) == "2"
+        end
+        withenv("JULIA_NUM_GC_THREADS" => nt) do
+            @test read(`$exename --gcthreads=2,1 -e $code`, String) == "3"
         end
     end
-    # We want to test oversubscription, but on manycore machines, this can
-    # actually exhaust limited PID spaces
-    cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads))
-    if Sys.WORD_SIZE == 32
-        cpu_threads = min(cpu_threads, 50)
-    end
-    @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads)
-    withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do
-        @test read(`$exename -e $code`, String) == string(cpu_threads)
-    end
-    @test !success(`$exename -t 0`)
-    @test !success(`$exename -t -1`)
 
-    # Combining --threads and --procs: --threads does propagate
-    withenv("JULIA_NUM_THREADS" => nothing) do
-        code = "print(sum(remotecall_fetch(Threads.nthreads, x) for x in procs()))"
-        @test read(`$exename -p2 -t2 -e $code`, String) == "6"
+    withenv("JULIA_NUM_GC_THREADS" => 2) do
+        @test read(`$exename -e $code`, String) == "2"
     end
 
-    # --procs
-    @test readchomp(`$exename -q -p 2 -e "println(nworkers())"`) == "2"
-    @test !success(`$exename -p 0`)
-    let p = run(`$exename --procs=1.0`, wait=false)
-        wait(p)
-        @test p.exitcode == 1 && p.termsignal == 0
+    withenv("JULIA_NUM_GC_THREADS" => "2,1") do
+        @test read(`$exename -e $code`, String) == "3"
     end
 
     # --machine-file
@@ -278,14 +431,14 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # --color
     @test readchomp(`$exename --color=yes -E "Base.have_color"`) == "true"
     @test readchomp(`$exename --color=no -E "Base.have_color"`) == "false"
-    @test !success(`$exename --color=false`)
+    @test errors_not_signals(`$exename --color=false`)
 
     # --history-file
     @test readchomp(`$exename -E "Bool(Base.JLOptions().historyfile)"
         --history-file=yes`) == "true"
     @test readchomp(`$exename -E "Bool(Base.JLOptions().historyfile)"
         --history-file=no`) == "false"
-    @test !success(`$exename --history-file=false`)
+    @test errors_not_signals(`$exename --history-file=false`)
 
     # --code-coverage
     mktempdir() do dir
@@ -347,10 +500,47 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test occursin(expected, got) || (expected, got)
         @test_broken occursin(expected_good, got)
 
+        # Ask for coverage in current directory
+        tdir = dirname(realpath(inputfile))
+        cd(tdir) do
+            # there may be atrailing separator here so use rstrip
+            @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, rstrip(unsafe_string(Base.JLOptions().tracked_path), '/'))" -L $inputfile
+                --code-coverage=$covfile --code-coverage=@`) == "(3, $(repr(tdir)))"
+        end
+        @test isfile(covfile)
+        got = read(covfile, String)
+        rm(covfile)
+        @test occursin(expected, got) || (expected, got)
+        @test_broken occursin(expected_good, got)
+
+        # Ask for coverage in relative directory
+        tdir = dirname(realpath(inputfile))
+        cd(dirname(tdir)) do
+            @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
+                --code-coverage=$covfile --code-coverage=@testhelpers`) == "(3, $(repr(tdir)))"
+        end
+        @test isfile(covfile)
+        got = read(covfile, String)
+        rm(covfile)
+        @test occursin(expected, got) || (expected, got)
+        @test_broken occursin(expected_good, got)
+
+        # Ask for coverage in relative directory with dot-dot notation
+        tdir = dirname(realpath(inputfile))
+        cd(tdir) do
+            @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
+                --code-coverage=$covfile --code-coverage=@../testhelpers`) == "(3, $(repr(tdir)))"
+        end
+        @test isfile(covfile)
+        got = read(covfile, String)
+        rm(covfile)
+        @test occursin(expected, got) || (expected, got)
+        @test_broken occursin(expected_good, got)
+
         # Ask for coverage in a different directory
         tdir = mktempdir() # a dir that contains no code
         @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
-            --code-coverage=$covfile --code-coverage=@$tdir`) == "(3, $(repr(tdir)))"
+            --code-coverage=$covfile --code-coverage=@$tdir`) == "(3, $(repr(realpath(tdir))))"
         @test isfile(covfile)
         got = read(covfile, String)
         @test isempty(got)
@@ -387,9 +577,9 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
             @test popfirst!(got) == "       32     Base.invokelatest(g, x)"
         end
         if Sys.WORD_SIZE == 64
-            @test popfirst!(got) == "       48     []"
-        else
             @test popfirst!(got) == "       32     []"
+        else
+            @test popfirst!(got) == "       16     []"
         end
         @test popfirst!(got) == "        - end"
         @test popfirst!(got) == "        - f(1.23)"
@@ -408,29 +598,34 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
 
     # -g
     @test readchomp(`$exename -E "Base.JLOptions().debug_level" -g`) == "2"
-    let code = writereadpipeline("code_llvm(stdout, +, (Int64, Int64), raw=true, dump_module=true)", `$exename -g0`)
-        @test code[2]
-        code = code[1]
-        @test occursin("llvm.module.flags", code)
-        @test !occursin("llvm.dbg.cu", code)
-        @test !occursin("int.jl", code)
-        @test !occursin("Int64", code)
-    end
-    let code = writereadpipeline("code_llvm(stdout, +, (Int64, Int64), raw=true, dump_module=true)", `$exename -g1`)
-        @test code[2]
-        code = code[1]
-        @test occursin("llvm.module.flags", code)
-        @test occursin("llvm.dbg.cu", code)
-        @test occursin("int.jl", code)
-        @test !occursin("Int64", code)
-    end
-    let code = writereadpipeline("code_llvm(stdout, +, (Int64, Int64), raw=true, dump_module=true)", `$exename -g2`)
-        @test code[2]
-        code = code[1]
-        @test occursin("llvm.module.flags", code)
-        @test occursin("llvm.dbg.cu", code)
-        @test occursin("int.jl", code)
-        @test occursin("\"Int64\"", code)
+    # --print-before/--print-after with pass names is broken on Windows due to no-gnu-unique issues
+    if !Sys.iswindows()
+        withenv("JULIA_LLVM_ARGS" => "--print-before=BeforeOptimization") do
+            let code = readchomperrors(`$exename -g0 -E "@eval Int64(1)+Int64(1)"`)
+                @test code[1]
+                code = code[3]
+                @test occursin("llvm.module.flags", code)
+                @test !occursin("llvm.dbg.cu", code)
+                @test !occursin("int.jl", code)
+                @test !occursin("name: \"Int64\"", code)
+            end
+            let code = readchomperrors(`$exename -g1 -E "@eval Int64(1)+Int64(1)"`)
+                @test code[1]
+                code = code[3]
+                @test occursin("llvm.module.flags", code)
+                @test occursin("llvm.dbg.cu", code)
+                @test occursin("int.jl", code)
+                @test !occursin("name: \"Int64\"", code)
+            end
+            let code = readchomperrors(`$exename -g2 -E "@eval Int64(1)+Int64(1)"`)
+                @test code[1]
+                code = code[3]
+                @test occursin("llvm.module.flags", code)
+                @test occursin("llvm.dbg.cu", code)
+                @test occursin("int.jl", code)
+                @test occursin("name: \"Int64\"", code)
+            end
+        end
     end
 
     # --check-bounds
@@ -449,16 +644,16 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
             --check-bounds=no`)) == JL_OPTIONS_CHECK_BOUNDS_OFF
     end
     # check-bounds takes yes/no as argument
-    @test !success(`$exename -E "exit(0)" --check-bounds=false`)
+    @test errors_not_signals(`$exename -E "exit(0)" --check-bounds=false`)
 
     # --depwarn
     @test readchomp(`$exename --depwarn=no  -E "Base.JLOptions().depwarn"`) == "0"
     @test readchomp(`$exename --depwarn=yes -E "Base.JLOptions().depwarn"`) == "1"
-    @test !success(`$exename --depwarn=false`)
+    @test errors_not_signals(`$exename --depwarn=false`)
     # test deprecated syntax
-    @test !success(`$exename -e "foo (x::Int) = x * x" --depwarn=error`)
+    @test errors_not_signals(`$exename -e "foo (x::Int) = x * x" --depwarn=error`)
     # test deprecated method
-    @test !success(`$exename -e "
+    @test errors_not_signals(`$exename -e "
         foo() = :foo; bar() = :bar
         @deprecate foo() bar()
         foo()
@@ -476,7 +671,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         Foo.Deprecated
         """
 
-        @test !success(`$exename -E "$code" --depwarn=error`)
+        @test errors_not_signals(`$exename -E "$code" --depwarn=error`)
 
         @test readchomperrors(`$exename -E "$code" --depwarn=yes`) ==
             (true, "true", "WARNING: Foo.Deprecated is deprecated, use NotDeprecated instead.\n  likely near none:8")
@@ -490,14 +685,14 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test readchomp(`$exename --inline=yes -E "Bool(Base.JLOptions().can_inline)"`) == "true"
     @test readchomp(`$exename --inline=no -E "Bool(Base.JLOptions().can_inline)"`) == "false"
     # --inline takes yes/no as argument
-    @test !success(`$exename --inline=false`)
+    @test errors_not_signals(`$exename --inline=false`)
 
     # --polly
     @test readchomp(`$exename -E "Bool(Base.JLOptions().polly)"`) == "true"
     @test readchomp(`$exename --polly=yes -E "Bool(Base.JLOptions().polly)"`) == "true"
     @test readchomp(`$exename --polly=no -E "Bool(Base.JLOptions().polly)"`) == "false"
     # --polly takes yes/no as argument
-    @test !success(`$exename --polly=false`)
+    @test errors_not_signals(`$exename --polly=false`)
 
     # --fast-math
     let JL_OPTIONS_FAST_MATH_DEFAULT = 0,
@@ -515,7 +710,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
 
     # --worker takes default / custom as argument (default/custom arguments
     # tested in test/parallel.jl)
-    @test !success(`$exename --worker=true`)
+    @test errors_not_signals(`$exename --worker=true`)
 
     # test passing arguments
     mktempdir() do dir
@@ -551,7 +746,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
             @test readchomp(`$exename -L $testfile $testfile`) == output
             @test readchomp(`$exename --startup-file=yes $testfile`) == output
 
-            @test !success(`$exename --foo $testfile`)
+            @test errors_not_signals(`$exename --foo $testfile`)
         end
     end
 
@@ -613,6 +808,8 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         (false, "", "ERROR: option `--inline` is missing an argument")
     @test readchomperrors(`$exename --startup-file=no -e "@show ARGS" -now -- julia RUN.jl`) ==
         (false, "", "ERROR: unknown option `-n`")
+    @test readchomperrors(`$exename --interactive=yes`) ==
+        (false, "", "ERROR: option `-i/--interactive` does not accept an argument")
 
     # --compiled-modules={yes|no}
     @test readchomp(`$exename -E "Bool(Base.JLOptions().use_compiled_modules)"`) == "true"
@@ -620,7 +817,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         "Bool(Base.JLOptions().use_compiled_modules)"`) == "true"
     @test readchomp(`$exename --compiled-modules=no -E
         "Bool(Base.JLOptions().use_compiled_modules)"`) == "false"
-    @test !success(`$exename --compiled-modules=foo -e "exit(0)"`)
+    @test errors_not_signals(`$exename --compiled-modules=foo -e "exit(0)"`)
 
     # issue #12671, starting from a non-directory
     # rm(dir) fails on windows with Permission denied
@@ -640,14 +837,45 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     end
 end
 
+# Object file with multiple cpu targets
+@testset "Object file for multiple microarchitectures" begin
+    julia_path = joinpath(Sys.BINDIR, Base.julia_exename())
+    outputo_file = tempname()
+    write(outputo_file, "1")
+    object_file = tempname() * ".o"
+
+    # This is to test that even with `pkgimages=no`, we can create object file
+    # with multiple cpu-targets
+    # The cmd is checked for `--object-o` as soon as it is run. So, to avoid long
+    # testing times, intentionally don't pass `--sysimage`; when we reach the
+    # corresponding error, we know that `check_cmdline` has already passed
+    let v = readchomperrors(`$julia_path
+        --cpu-target='native;native'
+        --output-o=$object_file $outputo_file
+        --pkgimages=no`)
+
+        @test v[1] == false
+        @test v[2] == ""
+        @test !contains(v[3], "More than one command line CPU targets specified")
+        @test v[3] == "ERROR: File \"boot.jl\" not found"
+    end
+
+    # This is to test that with `pkgimages=yes`, multiple CPU targets are parsed.
+    # We intentionally fail fast due to a lack of an `--output-o` flag.
+    let v = readchomperrors(`$julia_path --cpu-target='native;native' --pkgimages=yes`)
+        @test v[1] == false
+        @test v[2] == ""
+        @test contains(v[3], "More than one command line CPU targets specified")
+    end
+end
 
 # Find the path of libjulia (or libjulia-debug, as the case may be)
 # to use as a dummy shlib to open
 libjulia = if Base.DARWIN_FRAMEWORK
     abspath(Libdl.dlpath(Base.DARWIN_FRAMEWORK_NAME *
-        (ccall(:jl_is_debugbuild, Cint, ()) != 0 ? "_debug" : "")))
+        (Base.isdebugbuild() ? "_debug" : "")))
 else
-    abspath(Libdl.dlpath((ccall(:jl_is_debugbuild, Cint, ()) != 0) ? "libjulia-debug" : "libjulia"))
+    abspath(Libdl.dlpath(Base.isdebugbuild() ? "libjulia-debug" : "libjulia"))
 end
 
 
@@ -666,8 +894,7 @@ let exename = `$(Base.julia_cmd().exec[1]) -t 1`
                 @test !occursin("Segmentation fault", s)
                 @test !occursin("EXCEPTION_ACCESS_VIOLATION", s)
             end
-            @test !success(p)
-            @test !Base.process_signaled(p)
+            @test errors_not_signals(p)
             @test p.exitcode == 1
         end
     end
@@ -677,8 +904,7 @@ let exename = `$(Base.julia_cmd().exec[1]) -t 1`
         let s = read(err, String)
             @test s == "ERROR: System image file failed consistency check: maybe opened the wrong version?\n"
         end
-        @test !success(p)
-        @test !Base.process_signaled(p)
+        @test errors_not_signals(p)
         @test p.exitcode == 1
     end
 end
@@ -696,7 +922,7 @@ let exename = Base.julia_cmd()
         @test parse(Int,readchomp(`$exename -E "Base.JLOptions().startupfile"
             --startup-file=no`)) == JL_OPTIONS_STARTUPFILE_OFF
     end
-    @test !success(`$exename --startup-file=false`)
+    @test errors_not_signals(`$exename --startup-file=false`)
 end
 
 # Make sure `julia --lisp` doesn't break
@@ -782,7 +1008,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     close(in)
     close(err.in)
     txt = readline(err)
-    @test startswith(txt, "ERROR: syntax: incomplete")
+    @test startswith(txt, r"ERROR: (syntax: incomplete|ParseError:)")
 end
 
 # Issue #29855
@@ -817,4 +1043,27 @@ end
         @test lines[3] == "foo"
         @test lines[4] == "bar"
     end
+#heap-size-hint, we reserve 250 MB for non GC memory (llvm, etc.)
+@test readchomp(`$(Base.julia_cmd()) --startup-file=no --heap-size-hint=500M -e "println(@ccall jl_gc_get_max_memory()::UInt64)"`) == "$((500-250)*1024*1024)"
+end
+
+## `Main.main` entrypoint
+
+# Basic usage
+@test readchomp(`$(Base.julia_cmd()) -e '(@main)(ARGS) = println("hello")'`) == "hello"
+
+# Test ARGS with -e
+@test readchomp(`$(Base.julia_cmd()) -e '(@main)(ARGS) = println(ARGS)' a b`) == repr(["a", "b"])
+
+# Test import from module
+@test readchomp(`$(Base.julia_cmd()) -e 'module Hello; export main; (@main)(ARGS) = println("hello"); end; using .Hello'`) == "hello"
+@test readchomp(`$(Base.julia_cmd()) -e 'module Hello; export main; (@main)(ARGS) = println("hello"); end; import .Hello'`) == ""
+
+# test --bug-report=rr
+if Sys.islinux() && Sys.ARCH in (:i686, :x86_64) # rr is only available on these platforms
+    mktempdir() do temp_trace_dir
+        @test success(pipeline(setenv(`$(Base.julia_cmd()) --bug-report=rr-local -e 'exit()'`,
+                                      "JULIA_RR_RECORD_ARGS" => "-n --nested=ignore",
+                                      "_RR_TRACE_DIR" => temp_trace_dir); #=stderr, stdout=#))
+    end
 end
diff --git a/test/combinatorics.jl b/test/combinatorics.jl
index 8a27ad6da9971..25a444b70ec36 100644
--- a/test/combinatorics.jl
+++ b/test/combinatorics.jl
@@ -2,6 +2,9 @@
 
 using Random: randcycle
 
+isdefined(Main, :ImmutableArrays) || @eval Main include("testhelpers/ImmutableArrays.jl")
+using .Main.ImmutableArrays
+
 @testset "binomial" begin
     @test binomial(5,-1) == 0
     @test binomial(5,10) == 0
@@ -16,6 +19,16 @@ using Random: randcycle
     @test binomial(Int64(67), Int64(29)) == binomial(BigInt(67), BigInt(29)) == 7886597962249166160
     @test binomial(Int128(131), Int128(62)) == binomial(BigInt(131), BigInt(62)) == 157311720980559117816198361912717812000
     @test_throws OverflowError binomial(Int64(67), Int64(30))
+
+    #Issue 48072
+    ∐ = parse(BigInt, "1" * "0"^13 * "666" * "0"^13 * "1")
+    @test binomial(∐, ∐ - 1) == ∐
+    @test binomial(∐, ∐ - 2) == 500000000000066600000000002218280000000000033300000000000000
+    @test binomial(∐, ∐ - 3) == binomial(∐, 3)
+    @test binomial(-big(2), ∐ - 3) == 1000000000000066599999999999999
+    @test_throws OverflowError binomial(big(2)^65, big(2)^64)
+    @test_throws OverflowError binomial(-big(2)^65, big(2)^64)
+    @test binomial(∐, 2 * ∐) == BigInt(0)
 end
 
 @testset "permutations" begin
@@ -34,6 +47,7 @@ end
     @test invperm((1,2)) == (1,2)
     @test invperm((2,1)) == (2,1)
     @test_throws ArgumentError invperm((1,3))
+    @test_throws ArgumentError invperm((1,1))
 
     push!(p, 1)
     @test !isperm(p)
@@ -56,6 +70,10 @@ end
         @test isperm(T) == true
         @test isperm(K) == false
     end
+
+    # issue #47847
+    p = ImmutableArrays.ImmutableArray([2,3,1])
+    @test invperm(p) == invperm([2,3,1])
 end
 
 @testset "factorial" begin
diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl
index 74775b8e77213..121e7fad55c90 100644
--- a/test/compiler/AbstractInterpreter.jl
+++ b/test/compiler/AbstractInterpreter.jl
@@ -1,46 +1,36 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+using Test
 const CC = Core.Compiler
-import Core: MethodInstance, CodeInstance
-import .CC: WorldRange, WorldView
-
-# define new `AbstractInterpreter` that satisfies the minimum interface requirements
-# while managing its cache independently
-macro newinterp(name)
-    cachename = gensym(string(name, "Cache"))
-    name = esc(name)
-    quote
-        struct $cachename
-            dict::IdDict{MethodInstance,CodeInstance}
-        end
-        struct $name <: CC.AbstractInterpreter
-            interp::CC.NativeInterpreter
-            cache::$cachename
-            $name(world = Base.get_world_counter();
-                interp = CC.NativeInterpreter(world),
-                cache = $cachename(IdDict{MethodInstance,CodeInstance}())
-                ) = new(interp, cache)
-        end
-        CC.InferenceParams(interp::$name) = CC.InferenceParams(interp.interp)
-        CC.OptimizationParams(interp::$name) = CC.OptimizationParams(interp.interp)
-        CC.get_world_counter(interp::$name) = CC.get_world_counter(interp.interp)
-        CC.get_inference_cache(interp::$name) = CC.get_inference_cache(interp.interp)
-        CC.code_cache(interp::$name) = WorldView(interp.cache, WorldRange(CC.get_world_counter(interp)))
-        CC.get(wvc::WorldView{<:$cachename}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
-        CC.getindex(wvc::WorldView{<:$cachename}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
-        CC.haskey(wvc::WorldView{<:$cachename}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
-        CC.setindex!(wvc::WorldView{<:$cachename}, ci::CodeInstance, mi::MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
-    end
-end
 
-# `OverlayMethodTable`
-# --------------------
-import Base.Experimental: @MethodTable, @overlay
+include("irutils.jl")
+include("newinterp.jl")
+
+
+# OverlayMethodTable
+# ==================
+
+using Base.Experimental: @MethodTable, @overlay
+
+# @overlay method with return type annotation
+@MethodTable RT_METHOD_DEF
+@overlay RT_METHOD_DEF Base.sin(x::Float64)::Float64 = cos(x)
+@overlay RT_METHOD_DEF function Base.sin(x::T)::T where T<:AbstractFloat
+    cos(x)
+end
 
 @newinterp MTOverlayInterp
-@MethodTable(OverlayedMT)
+@MethodTable OverlayedMT
 CC.method_table(interp::MTOverlayInterp) = CC.OverlayMethodTable(CC.get_world_counter(interp), OverlayedMT)
 
+function CC.add_remark!(interp::MTOverlayInterp, ::CC.InferenceState, remark)
+    if interp.meta !== nothing
+        # Core.println(remark)
+        push!(interp.meta, remark)
+    end
+    return nothing
+end
+
 strangesin(x) = sin(x)
 @overlay OverlayedMT strangesin(x::Float64) = iszero(x) ? nothing : cos(x)
 
@@ -60,6 +50,22 @@ end |> !Core.Compiler.is_nonoverlayed
     @invoke strangesin(x::Float64)
 end |> !Core.Compiler.is_nonoverlayed
 
+# account for overlay possibility in unanalyzed matching method
+callstrange(::Float64) = strangesin(x)
+callstrange(::Nothing) = Core.compilerbarrier(:type, nothing) # trigger inference bail out
+callstrange_entry(x) = callstrange(x) # needs to be defined here because of world age
+let interp = MTOverlayInterp(Set{Any}())
+    matches = Core.Compiler.findall(Tuple{typeof(callstrange),Any}, Core.Compiler.method_table(interp))
+    @test matches !== nothing
+    @test Core.Compiler.length(matches) == 2
+    if Core.Compiler.getindex(matches, 1).method == which(callstrange, (Nothing,))
+        @test Base.infer_effects(callstrange_entry, (Any,); interp) |> !Core.Compiler.is_nonoverlayed
+        @test "Call inference reached maximally imprecise information. Bailing on." in interp.meta
+    else
+        @warn "`nonoverlayed` test for inference bailing out is skipped since the method match sort order is changed."
+    end
+end
+
 # but it should never apply for the native compilation
 @test Base.infer_effects((Float64,)) do x
     strangesin(x)
@@ -83,13 +89,13 @@ overlay_match(::Any) = nothing
     overlay_match(x)
 end |> only === Union{Nothing,Missing}
 
-# partial pure/concrete evaluation
+# partial concrete evaluation
 @test Base.return_types(; interp=MTOverlayInterp()) do
     isbitstype(Int) ? nothing : missing
 end |> only === Nothing
-Base.@assume_effects :terminates_globally function issue41694(x)
+Base.@assume_effects :terminates_locally function issue41694(x)
     res = 1
-    1 < x < 20 || throw("bad")
+    0 ≤ x < 20 || error("bad fact")
     while x > 1
         res *= x
         x -= 1
@@ -100,7 +106,7 @@ end
     issue41694(3) == 6 ? nothing : missing
 end |> only === Nothing
 
-# disable partial pure/concrete evaluation when tainted by any overlayed call
+# disable partial concrete evaluation when tainted by any overlayed call
 Base.@assume_effects :total totalcall(f, args...) = f(args...)
 @test Base.return_types(; interp=MTOverlayInterp()) do
     if totalcall(strangesin, 1.0) == cos(1.0)
@@ -109,3 +115,356 @@ Base.@assume_effects :total totalcall(f, args...) = f(args...)
         return missing
     end
 end |> only === Nothing
+
+# GPUCompiler needs accurate inference through kwfunc with the overlay of `Core.throw_inexacterror`
+# https://github.com/JuliaLang/julia/issues/48097
+@newinterp Issue48097Interp
+@MethodTable Issue48097MT
+CC.method_table(interp::Issue48097Interp) = CC.OverlayMethodTable(CC.get_world_counter(interp), Issue48097MT)
+CC.InferenceParams(::Issue48097Interp) = CC.InferenceParams(; unoptimize_throw_blocks=false)
+function CC.concrete_eval_eligible(interp::Issue48097Interp,
+    @nospecialize(f), result::CC.MethodCallResult, arginfo::CC.ArgInfo, sv::CC.AbsIntState)
+    ret = @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter,
+        f::Any, result::CC.MethodCallResult, arginfo::CC.ArgInfo, sv::CC.AbsIntState)
+    if ret === :semi_concrete_eval
+        # disable semi-concrete interpretation
+        return :none
+    end
+    return ret
+end
+@overlay Issue48097MT @noinline Core.throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = return
+issue48097(; kwargs...) = return 42
+@test_broken fully_eliminated(; interp=Issue48097Interp(), retval=42) do
+    issue48097(; a=1f0, b=1.0)
+end
+
+# Should not concrete-eval overlayed methods in semi-concrete interpretation
+@newinterp OverlaySinInterp
+@MethodTable OverlaySinMT
+CC.method_table(interp::OverlaySinInterp) = CC.OverlayMethodTable(CC.get_world_counter(interp), OverlaySinMT)
+overlay_sin1(x) = error("Not supposed to be called.")
+@overlay OverlaySinMT overlay_sin1(x) = cos(x)
+@overlay OverlaySinMT Base.sin(x::Union{Float32,Float64}) = overlay_sin1(x)
+let oc = Base.code_ircode(; interp=OverlaySinInterp()) do
+        sin(0.)
+    end |> only |> first |> Core.OpaqueClosure
+    @test oc() == cos(0.)
+end
+@overlay OverlaySinMT Base.sin(x::Union{Float32,Float64}) = @noinline overlay_sin1(x)
+let oc = Base.code_ircode(; interp=OverlaySinInterp()) do
+        sin(0.)
+    end |> only |> first |> Core.OpaqueClosure
+    @test oc() == cos(0.)
+end
+_overlay_sin2(x) = error("Not supposed to be called.")
+@overlay OverlaySinMT _overlay_sin2(x) = cos(x)
+overlay_sin2(x) = _overlay_sin2(x)
+@overlay OverlaySinMT Base.sin(x::Union{Float32,Float64}) = @noinline overlay_sin2(x)
+let oc = Base.code_ircode(; interp=OverlaySinInterp()) do
+        sin(0.)
+    end |> only |> first |> Core.OpaqueClosure
+    @test oc() == cos(0.)
+end
+
+# AbstractLattice
+# ===============
+
+using Core: SlotNumber, Argument
+using Core.Compiler: slot_id, tmerge_fast_path
+import .CC:
+    AbstractLattice, BaseInferenceLattice, IPOResultLattice, InferenceLattice,
+    widenlattice, is_valid_lattice_norec, typeinf_lattice, ipo_lattice, optimizer_lattice,
+    widenconst, tmeet, tmerge, ⊑, abstract_eval_special_value, widenreturn
+
+@newinterp TaintInterpreter
+struct TaintLattice{PL<:AbstractLattice} <: CC.AbstractLattice
+    parent::PL
+end
+CC.widenlattice(𝕃::TaintLattice) = 𝕃.parent
+CC.is_valid_lattice_norec(::TaintLattice, @nospecialize(elm)) = isa(elm, Taint)
+
+struct InterTaintLattice{PL<:AbstractLattice} <: CC.AbstractLattice
+    parent::PL
+end
+CC.widenlattice(𝕃::InterTaintLattice) = 𝕃.parent
+CC.is_valid_lattice_norec(::InterTaintLattice, @nospecialize(elm)) = isa(elm, InterTaint)
+
+const AnyTaintLattice{L} = Union{TaintLattice{L},InterTaintLattice{L}}
+
+CC.typeinf_lattice(::TaintInterpreter) = InferenceLattice(TaintLattice(BaseInferenceLattice.instance))
+CC.ipo_lattice(::TaintInterpreter) = InferenceLattice(InterTaintLattice(IPOResultLattice.instance))
+CC.optimizer_lattice(::TaintInterpreter) = InterTaintLattice(SimpleInferenceLattice.instance)
+
+struct Taint
+    typ
+    slots::BitSet
+    function Taint(@nospecialize(typ), slots::BitSet)
+        if typ isa Taint
+            slots = typ.slots ∪ slots
+            typ = typ.typ
+        end
+        return new(typ, slots)
+    end
+end
+Taint(@nospecialize(typ), id::Int) = Taint(typ, push!(BitSet(), id))
+function Base.:(==)(a::Taint, b::Taint)
+    return a.typ == b.typ && a.slots == b.slots
+end
+
+struct InterTaint
+    typ
+    slots::BitSet
+    function InterTaint(@nospecialize(typ), slots::BitSet)
+        if typ isa InterTaint
+            slots = typ.slots ∪ slots
+            typ = typ.typ
+        end
+        return new(typ, slots)
+    end
+end
+InterTaint(@nospecialize(typ), id::Int) = InterTaint(typ, push!(BitSet(), id))
+function Base.:(==)(a::InterTaint, b::InterTaint)
+    return a.typ == b.typ && a.slots == b.slots
+end
+
+const AnyTaint = Union{Taint, InterTaint}
+
+function CC.tmeet(𝕃::AnyTaintLattice, @nospecialize(v), @nospecialize(t::Type))
+    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
+    if isa(v, T)
+        v = v.typ
+    end
+    return tmeet(widenlattice(𝕃), v, t)
+end
+function CC.tmerge(𝕃::AnyTaintLattice, @nospecialize(typea), @nospecialize(typeb))
+    r = tmerge_fast_path(𝕃, typea, typeb)
+    r !== nothing && return r
+    # type-lattice for Taint
+    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
+    if isa(typea, T)
+        if isa(typeb, T)
+            return T(
+                tmerge(widenlattice(𝕃), typea.typ, typeb.typ),
+                typea.slots ∪ typeb.slots)
+        else
+            typea = typea.typ
+        end
+    elseif isa(typeb, T)
+        typeb = typeb.typ
+    end
+    return tmerge(widenlattice(𝕃), typea, typeb)
+end
+function CC.:⊑(𝕃::AnyTaintLattice, @nospecialize(typea), @nospecialize(typeb))
+    T = isa(𝕃, TaintLattice) ? Taint : InterTaint
+    if isa(typea, T)
+        if isa(typeb, T)
+            typea.slots ⊆ typeb.slots || return false
+            return ⊑(widenlattice(𝕃), typea.typ, typeb.typ)
+        end
+        typea = typea.typ
+    elseif isa(typeb, T)
+        return false
+    end
+    return ⊑(widenlattice(𝕃), typea, typeb)
+end
+CC.widenconst(taint::AnyTaint) = widenconst(taint.typ)
+
+function CC.abstract_eval_special_value(interp::TaintInterpreter,
+    @nospecialize(e), vtypes::CC.VarTable, sv::CC.InferenceState)
+    ret = @invoke CC.abstract_eval_special_value(interp::CC.AbstractInterpreter,
+        e::Any, vtypes::CC.VarTable, sv::CC.InferenceState)
+    if isa(e, SlotNumber) || isa(e, Argument)
+        return Taint(ret, slot_id(e))
+    end
+    return ret
+end
+
+function CC.widenreturn(𝕃::InferenceLattice{<:InterTaintLattice}, @nospecialize(rt), @nospecialize(bestguess), nargs::Int, slottypes::Vector{Any}, changes::CC.VarTable)
+    if isa(rt, Taint)
+        return InterTaint(rt.typ, BitSet((id for id in rt.slots if id ≤ nargs)))
+    end
+    return CC.widenreturn(widenlattice(𝕃), rt, bestguess, nargs, slottypes, changes)
+end
+
+@test CC.tmerge(typeinf_lattice(TaintInterpreter()), Taint(Int, 1), Taint(Int, 2)) == Taint(Int, BitSet(1:2))
+
+# code_typed(ifelse, (Bool, Int, Int); interp=TaintInterpreter())
+
+# External lattice without `Conditional`
+
+import .CC:
+    AbstractLattice, ConstsLattice, PartialsLattice, InferenceLattice,
+    typeinf_lattice, ipo_lattice, optimizer_lattice
+
+@newinterp NonconditionalInterpreter
+CC.typeinf_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice()))
+CC.ipo_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice()))
+CC.optimizer_lattice(::NonconditionalInterpreter) = PartialsLattice(ConstsLattice())
+
+@test Base.return_types((Any,); interp=NonconditionalInterpreter()) do x
+    c = isa(x, Int) || isa(x, Float64)
+    if c
+        return x
+    else
+        return nothing
+    end
+end |> only === Any
+
+# CallInfo × inlining
+# ===================
+
+@newinterp NoinlineInterpreter
+noinline_modules(interp::NoinlineInterpreter) = interp.meta::Set{Module}
+
+import .CC: CallInfo
+
+struct NoinlineCallInfo <: CallInfo
+    info::CallInfo # wrapped call
+end
+CC.nsplit_impl(info::NoinlineCallInfo) = CC.nsplit(info.info)
+CC.getsplit_impl(info::NoinlineCallInfo, idx::Int) = CC.getsplit(info.info, idx)
+CC.getresult_impl(info::NoinlineCallInfo, idx::Int) = CC.getresult(info.info, idx)
+
+function CC.abstract_call(interp::NoinlineInterpreter,
+    arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int)
+    ret = @invoke CC.abstract_call(interp::CC.AbstractInterpreter,
+        arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int)
+    if sv.mod in noinline_modules(interp)
+        return CC.CallMeta(ret.rt, ret.exct, ret.effects, NoinlineCallInfo(ret.info))
+    end
+    return ret
+end
+function CC.inlining_policy(interp::NoinlineInterpreter,
+    @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
+    if isa(info, NoinlineCallInfo)
+        return nothing
+    end
+    return @invoke CC.inlining_policy(interp::CC.AbstractInterpreter,
+        src::Any, info::CallInfo, stmt_flag::UInt32)
+end
+
+@inline function inlined_usually(x, y, z)
+    return x * y + z
+end
+
+# check if the inlining algorithm works as expected
+let src = code_typed1((Float64,Float64,Float64)) do x, y, z
+        inlined_usually(x, y, z)
+    end
+    @test count(isinvoke(:inlined_usually), src.code) == 0
+    @test count(iscall((src, inlined_usually)), src.code) == 0
+end
+let NoinlineModule = Module()
+    interp = NoinlineInterpreter(Set((NoinlineModule,)))
+
+    # this anonymous function's context is Main -- it should be inlined as usual
+    let src = code_typed1((Float64,Float64,Float64); interp) do x, y, z
+            inlined_usually(x, y, z)
+        end
+        @test count(isinvoke(:inlined_usually), src.code) == 0
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    # it should work for cached results
+    method = only(methods(inlined_usually, (Float64,Float64,Float64,)))
+    mi = CC.specialize_method(method, Tuple{typeof(inlined_usually),Float64,Float64,Float64}, Core.svec())
+    @test haskey(interp.code_cache.dict, mi)
+    let src = code_typed1((Float64,Float64,Float64); interp) do x, y, z
+            inlined_usually(x, y, z)
+        end
+        @test count(isinvoke(:inlined_usually), src.code) == 0
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    # now the context module is `NoinlineModule` -- it should not be inlined
+    let src = @eval NoinlineModule $code_typed1((Float64,Float64,Float64); interp=$interp) do x, y, z
+            $inlined_usually(x, y, z)
+        end
+        @test count(isinvoke(:inlined_usually), src.code) == 1
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+
+    # the context module is totally irrelevant -- it should be inlined as usual
+    OtherModule = Module()
+    let src = @eval OtherModule $code_typed1((Float64,Float64,Float64); interp=$interp) do x, y, z
+            $inlined_usually(x, y, z)
+        end
+        @test count(isinvoke(:inlined_usually), src.code) == 0
+        @test count(iscall((src, inlined_usually)), src.code) == 0
+    end
+end
+
+# Make sure that Core.Compiler has enough NamedTuple infrastructure
+# to properly give error messages for basic kwargs...
+Core.eval(Core.Compiler, quote f(;a=1) = a end)
+@test_throws MethodError Core.Compiler.f(;b=2)
+
+
+# Custom lookup function
+# ======================
+
+# In the following test with `ConstInvokeInterp`, we use a custom lookup function that
+# uses const-prop'ed source if available, and check if LLVM emits code using it.
+
+using Core: MethodInstance, CodeInstance
+using Base: CodegenParams
+using InteractiveUtils
+
+@newinterp ConstInvokeInterp
+function CC.concrete_eval_eligible(interp::ConstInvokeInterp,
+    @nospecialize(f), result::CC.MethodCallResult, arginfo::CC.ArgInfo, sv::CC.AbsIntState)
+    ret = @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter,
+        f::Any, result::CC.MethodCallResult, arginfo::CC.ArgInfo, sv::CC.AbsIntState)
+    if ret === :semi_concrete_eval
+        return :none # disable semi-concrete interpretation
+    end
+    return ret
+end
+Base.@constprop :aggressive @noinline function custom_lookup_target(c::Bool, x::Int)
+    if c
+        y = sin(x)
+        z = nothing
+    else
+        y = cos(x)
+        z = missing
+    end
+    return y, z
+end
+custom_lookup_context(x::Int) = custom_lookup_target(true, x)
+
+const CONST_INVOKE_INTERP_WORLD = Base.get_world_counter()
+const CONST_INVOKE_INTERP = ConstInvokeInterp(; world=CONST_INVOKE_INTERP_WORLD)
+function custom_lookup(mi::MethodInstance, min_world::UInt, max_world::UInt)
+    for inf_result in CONST_INVOKE_INTERP.inf_cache
+        if inf_result.linfo === mi
+            if CC.any(inf_result.overridden_by_const)
+                return CodeInstance(CONST_INVOKE_INTERP, inf_result, inf_result.valid_worlds)
+            end
+        end
+    end
+    return CONST_INVOKE_INTERP.code_cache.dict[mi]
+end
+
+let # generate cache
+    code_typed(custom_lookup_context; world=CONST_INVOKE_INTERP_WORLD, interp=CONST_INVOKE_INTERP)
+
+    # check if the lookup function works as expected
+    target_mi = CC.specialize_method(only(methods(custom_lookup_target)), Tuple{typeof(custom_lookup_target),Bool,Int}, Core.svec())
+    target_ci = custom_lookup(target_mi, CONST_INVOKE_INTERP_WORLD, CONST_INVOKE_INTERP_WORLD)
+    @test target_ci.rettype == Tuple{Float64,Nothing} # constprop'ed source
+    # display(@ccall jl_uncompress_ir(target_ci.def.def::Any, C_NULL::Ptr{Cvoid}, target_ci.inferred::Any)::Any)
+
+    raw = false
+    lookup = @cfunction(custom_lookup, Any, (Any,Csize_t,Csize_t))
+    params = CodegenParams(;
+        debug_info_kind=Cint(0),
+        debug_info_level=Cint(2),
+        safepoint_on_entry=raw,
+        gcstack_arg=raw,
+        lookup)
+    io = IOBuffer()
+    code_llvm(io, custom_lookup_target, (Bool,Int,); params)
+    s = String(take!(io))
+    @test  occursin("j_sin_", s)
+    @test !occursin("j_cos_", s)
+end
diff --git a/test/compiler/EscapeAnalysis/EAUtils.jl b/test/compiler/EscapeAnalysis/EAUtils.jl
index f71cc20387733..27a971173ac70 100644
--- a/test/compiler/EscapeAnalysis/EAUtils.jl
+++ b/test/compiler/EscapeAnalysis/EAUtils.jl
@@ -3,13 +3,14 @@ module EAUtils
 export code_escapes, @code_escapes, __clear_cache!
 
 const CC = Core.Compiler
-const EA = CC.EscapeAnalysis
+using ..EscapeAnalysis
+const EA = EscapeAnalysis
 
 # entries
 # -------
 
-import Base: unwrap_unionall, rewrap_unionall
-import InteractiveUtils: gen_call_with_extracted_types_and_kwargs
+using Base: IdSet, unwrap_unionall, rewrap_unionall
+using InteractiveUtils: gen_call_with_extracted_types_and_kwargs
 
 """
     @code_escapes [options...] f(args...)
@@ -38,24 +39,22 @@ Runs the escape analysis on optimized IR of a generic function call with the giv
 """
 function code_escapes(@nospecialize(f), @nospecialize(types=Base.default_tt(f));
                       world::UInt = get_world_counter(),
-                      interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world),
-                      debuginfo::Symbol = :none,
-                      optimize::Bool = true)
-    ft = Core.Typeof(f)
-    if isa(types, Type)
-        u = unwrap_unionall(types)
-        tt = rewrap_unionall(Tuple{ft, u.parameters...}, types)
-    else
-        tt = Tuple{ft, types...}
+                      debuginfo::Symbol = :none)
+    tt = Base.signature_type(f, types)
+    match = Base._which(tt; world, raise=true)
+    mi = Core.Compiler.specialize_method(match)::MethodInstance
+    interp = EscapeAnalyzer(world, mi)
+    frame = Core.Compiler.typeinf_frame(interp, mi, #=run_optimizer=#true)
+    isdefined(interp, :result) || error("optimization didn't happen: maybe everything has been constant folded?")
+    slotnames = let src = frame.src
+        src isa CodeInfo ? src.slotnames : nothing
     end
-    interp = EscapeAnalyzer(interp, tt, optimize)
-    results = Base.code_typed_by_type(tt; optimize=true, world, interp)
-    isone(length(results)) || throw(ArgumentError("`code_escapes` only supports single analysis result"))
-    return EscapeResult(interp.ir, interp.state, interp.linfo, debuginfo===:source)
+    return EscapeResult(interp.result.ir, interp.result.estate, interp.result.mi,
+                        slotnames, debuginfo === :source, interp)
 end
 
 # in order to run a whole analysis from ground zero (e.g. for benchmarking, etc.)
-__clear_cache!() = empty!(GLOBAL_CODE_CACHE)
+__clear_cache!() = empty!(GLOBAL_EA_CODE_CACHE)
 
 # AbstractInterpreter
 # -------------------
@@ -63,188 +62,146 @@ __clear_cache!() = empty!(GLOBAL_CODE_CACHE)
 # imports
 import .CC:
     AbstractInterpreter, NativeInterpreter, WorldView, WorldRange,
-    InferenceParams, OptimizationParams, get_world_counter, get_inference_cache, code_cache
+    InferenceParams, OptimizationParams, get_world_counter, get_inference_cache, code_cache,
+    ipo_dataflow_analysis!, cache_result!
 # usings
-import Core:
+using Core:
     CodeInstance, MethodInstance, CodeInfo
-import .CC:
-    InferenceResult, OptimizationState, IRCode, copy as cccopy,
-    @timeit, convert_to_ircode, slot2reg, compact!, ssa_inlining_pass!, sroa_pass!,
-    adce_pass!, type_lift_pass!, JLOptions, verify_ir, verify_linetable
-import .EA: analyze_escapes, ArgEscapeCache, EscapeInfo, EscapeState, is_ipo_profitable
+using .CC:
+    InferenceResult, OptimizationState, IRCode
+using .EA: analyze_escapes, ArgEscapeCache, EscapeInfo, EscapeState
+
+struct CodeCache
+    cache::IdDict{MethodInstance,CodeInstance}
+end
+CodeCache() = CodeCache(IdDict{MethodInstance,CodeInstance}())
+const GLOBAL_CODE_CACHE = CodeCache()
 
 # when working outside of Core.Compiler,
 # cache entire escape state for later inspection and debugging
-struct EscapeCache
-    cache::ArgEscapeCache
+struct EscapeCacheInfo
+    argescapes::ArgEscapeCache
     state::EscapeState # preserved just for debugging purpose
     ir::IRCode         # preserved just for debugging purpose
 end
 
-mutable struct EscapeAnalyzer{State} <: AbstractInterpreter
-    native::NativeInterpreter
-    cache::IdDict{InferenceResult,EscapeCache}
-    entry_tt
-    optimize::Bool
+struct EscapeCache
+    cache::IdDict{MethodInstance,EscapeCacheInfo}
+end
+EscapeCache() = EscapeCache(IdDict{MethodInstance,EscapeCacheInfo}())
+const GLOBAL_ESCAPE_CACHE = EscapeCache()
+
+struct EscapeResultForEntry
     ir::IRCode
-    state::State
-    linfo::MethodInstance
-    EscapeAnalyzer(native::NativeInterpreter, @nospecialize(tt), optimize::Bool) =
-        new{EscapeState}(native, IdDict{InferenceResult,EscapeCache}(), tt, optimize)
+    estate::EscapeState
+    mi::MethodInstance
 end
 
-CC.InferenceParams(interp::EscapeAnalyzer)    = InferenceParams(interp.native)
-CC.OptimizationParams(interp::EscapeAnalyzer) = OptimizationParams(interp.native)
-CC.get_world_counter(interp::EscapeAnalyzer)  = get_world_counter(interp.native)
+mutable struct EscapeAnalyzer <: AbstractInterpreter
+    const world::UInt
+    const inf_params::InferenceParams
+    const opt_params::OptimizationParams
+    const inf_cache::Vector{InferenceResult}
+    const code_cache::CodeCache
+    const escape_cache::EscapeCache
+    const entry_mi::MethodInstance
+    result::EscapeResultForEntry
+    function EscapeAnalyzer(world::UInt, entry_mi::MethodInstance,
+                            code_cache::CodeCache=GLOBAL_CODE_CACHE,
+                            escape_cache::EscapeCache=GLOBAL_ESCAPE_CACHE)
+        inf_params = InferenceParams()
+        opt_params = OptimizationParams()
+        inf_cache = InferenceResult[]
+        return new(world, inf_params, opt_params, inf_cache, code_cache, escape_cache, entry_mi)
+    end
+end
 
-CC.get_inference_cache(interp::EscapeAnalyzer) = get_inference_cache(interp.native)
+CC.InferenceParams(interp::EscapeAnalyzer) = interp.inf_params
+CC.OptimizationParams(interp::EscapeAnalyzer) = interp.opt_params
+CC.get_world_counter(interp::EscapeAnalyzer) = interp.world
+CC.get_inference_cache(interp::EscapeAnalyzer) = interp.inf_cache
 
-const GLOBAL_CODE_CACHE = IdDict{MethodInstance,CodeInstance}()
+struct EscapeAnalyzerCacheView
+    code_cache::CodeCache
+    escape_cache::EscapeCache
+end
 
 function CC.code_cache(interp::EscapeAnalyzer)
     worlds = WorldRange(get_world_counter(interp))
-    return WorldView(GlobalCache(), worlds)
-end
-
-struct GlobalCache end
-
-CC.haskey(wvc::WorldView{GlobalCache}, mi::MethodInstance) = haskey(GLOBAL_CODE_CACHE, mi)
-
-CC.get(wvc::WorldView{GlobalCache}, mi::MethodInstance, default) = get(GLOBAL_CODE_CACHE, mi, default)
-
-CC.getindex(wvc::WorldView{GlobalCache}, mi::MethodInstance) = getindex(GLOBAL_CODE_CACHE, mi)
-
-function CC.setindex!(wvc::WorldView{GlobalCache}, ci::CodeInstance, mi::MethodInstance)
-    GLOBAL_CODE_CACHE[mi] = ci
-    add_callback!(mi) # register the callback on invalidation
-    return nothing
+    return WorldView(EscapeAnalyzerCacheView(interp.code_cache, interp.escape_cache), worlds)
 end
-
-function add_callback!(linfo)
-    if !isdefined(linfo, :callbacks)
-        linfo.callbacks = Any[invalidate_cache!]
-    else
-        if !any(@nospecialize(cb)->cb===invalidate_cache!, linfo.callbacks)
-            push!(linfo.callbacks, invalidate_cache!)
-        end
+CC.haskey(wvc::WorldView{EscapeAnalyzerCacheView}, mi::MethodInstance) = haskey(wvc.cache.code_cache.cache, mi)
+CC.get(wvc::WorldView{EscapeAnalyzerCacheView}, mi::MethodInstance, default) = get(wvc.cache.code_cache.cache, mi, default)
+CC.getindex(wvc::WorldView{EscapeAnalyzerCacheView}, mi::MethodInstance) = getindex(wvc.cache.code_cache.cache, mi)
+function CC.setindex!(wvc::WorldView{EscapeAnalyzerCacheView}, ci::CodeInstance, mi::MethodInstance)
+    wvc.cache.code_cache.cache[mi] = ci
+    # register the callback on invalidation
+    CC.add_invalidation_callback!(mi) do replaced::MethodInstance, max_world::UInt32
+        delete!(wvc.cache.code_cache.cache, replaced)
+        delete!(wvc.cache.escape_cache.cache, replaced)
     end
-    return nothing
+    return wvc
 end
 
-function invalidate_cache!(replaced, max_world, depth = 0)
-    delete!(GLOBAL_CODE_CACHE, replaced)
-
-    if isdefined(replaced, :backedges)
-        for mi in replaced.backedges
-            mi = mi::MethodInstance
-            if !haskey(GLOBAL_CODE_CACHE, mi)
-                continue # otherwise fall into infinite loop
-            end
-            invalidate_cache!(mi, max_world, depth+1)
-        end
+function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, ir::IRCode, caller::InferenceResult)
+    # run EA on all frames that have been optimized
+    nargs = let def = caller.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end
+    get_escape_cache = GetEscapeCache(interp)
+    estate = try
+        analyze_escapes(ir, nargs, CC.optimizer_lattice(interp), get_escape_cache)
+    catch err
+        @error "error happened within EA, inspect `Main.failed_escapeanalysis`"
+        Main.failed_escapeanalysis = FailedAnalysis(ir, nargs, get_escape_cache)
+        rethrow(err)
     end
-    return nothing
-end
+    if caller.linfo === interp.entry_mi
+        # return back the result
+        interp.result = EscapeResultForEntry(CC.copy(ir), estate, caller.linfo)
+    end
+    record_escapes!(interp, caller, estate, ir)
 
-function CC.optimize(interp::EscapeAnalyzer,
-    opt::OptimizationState, params::OptimizationParams, caller::InferenceResult)
-    ir = run_passes_with_ea(interp, opt.src, opt, caller)
-    return CC.finish(interp, opt, params, ir, caller)
+    @invoke CC.ipo_dataflow_analysis!(interp::AbstractInterpreter, ir::IRCode, caller::InferenceResult)
 end
 
-function CC.cache_result!(interp::EscapeAnalyzer, caller::InferenceResult)
-    if haskey(interp.cache, caller)
-        GLOBAL_ESCAPE_CACHE[caller.linfo] = interp.cache[caller]
-    end
-    return @invoke CC.cache_result!(interp::AbstractInterpreter, caller::InferenceResult)
+function record_escapes!(interp::EscapeAnalyzer,
+    caller::InferenceResult, estate::EscapeState, ir::IRCode)
+    argescapes = ArgEscapeCache(estate)
+    ecacheinfo = EscapeCacheInfo(argescapes, estate, ir)
+    return CC.stack_analysis_result!(caller, ecacheinfo)
 end
 
-const GLOBAL_ESCAPE_CACHE = IdDict{MethodInstance,EscapeCache}()
-
-"""
-    cache_escapes!(caller::InferenceResult, estate::EscapeState, cacheir::IRCode)
-
-Transforms escape information of call arguments of `caller`,
-and then caches it into a global cache for later interprocedural propagation.
-"""
-function cache_escapes!(interp::EscapeAnalyzer,
-    caller::InferenceResult, estate::EscapeState, cacheir::IRCode)
-    cache = ArgEscapeCache(estate)
-    ecache = EscapeCache(cache, estate, cacheir)
-    interp.cache[caller] = ecache
-    return cache
+struct GetEscapeCache
+    escape_cache::EscapeCache
+    GetEscapeCache(interp::EscapeAnalyzer) = new(interp.escape_cache)
+end
+function ((; escape_cache)::GetEscapeCache)(mi::MethodInstance)
+    ecacheinfo = get(escape_cache.cache, mi, nothing)
+    return ecacheinfo === nothing ? false : ecacheinfo.argescapes
 end
 
-function get_escape_cache(interp::EscapeAnalyzer)
-    return function (linfo::Union{InferenceResult,MethodInstance})
-        if isa(linfo, InferenceResult)
-            ecache = get(interp.cache, linfo, nothing)
-        else
-            ecache = get(GLOBAL_ESCAPE_CACHE, linfo, nothing)
-        end
-        return ecache !== nothing ? ecache.cache : nothing
-    end
+struct FailedAnalysis
+    ir::IRCode
+    nargs::Int
+    get_escape_cache::GetEscapeCache
 end
 
-function run_passes_with_ea(interp::EscapeAnalyzer, ci::CodeInfo, sv::OptimizationState,
-    caller::InferenceResult)
-    @timeit "convert"   ir = convert_to_ircode(ci, sv)
-    @timeit "slot2reg"  ir = slot2reg(ir, ci, sv)
-    # TODO: Domsorting can produce an updated domtree - no need to recompute here
-    @timeit "compact 1" ir = compact!(ir)
-    nargs = let def = sv.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end
-    local state
-    if is_ipo_profitable(ir, nargs) || caller.linfo.specTypes === interp.entry_tt
-        try
-            @timeit "[IPO EA]" begin
-                state = analyze_escapes(ir, nargs, false, get_escape_cache(interp))
-                cache_escapes!(interp, caller, state, cccopy(ir))
-            end
-        catch err
-            @error "error happened within [IPO EA], insepct `Main.ir` and `Main.nargs`"
-            @eval Main (ir = $ir; nargs = $nargs)
-            rethrow(err)
-        end
+function CC.cache_result!(interp::EscapeAnalyzer, inf_result::InferenceResult)
+    ecacheinfo = CC.traverse_analysis_results(inf_result) do @nospecialize result
+        return result isa EscapeCacheInfo ? result : nothing
     end
-    if caller.linfo.specTypes === interp.entry_tt && !interp.optimize
-        # return back the result
-        interp.ir = cccopy(ir)
-        interp.state = state
-        interp.linfo = sv.linfo
-    end
-    @timeit "Inlining"  ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
-    # @timeit "verify 2" verify_ir(ir)
-    @timeit "compact 2" ir = compact!(ir)
-    if caller.linfo.specTypes === interp.entry_tt && interp.optimize
-        try
-            @timeit "[Local EA]" state = analyze_escapes(ir, nargs, true, get_escape_cache(interp))
-        catch err
-            @error "error happened within [Local EA], insepct `Main.ir` and `Main.nargs`"
-            @eval Main (ir = $ir; nargs = $nargs)
-            rethrow(err)
-        end
-        # return back the result
-        interp.ir = cccopy(ir)
-        interp.state = state
-        interp.linfo = sv.linfo
-    end
-    @timeit "SROA"      ir = sroa_pass!(ir)
-    @timeit "ADCE"      ir = adce_pass!(ir)
-    @timeit "type lift" ir = type_lift_pass!(ir)
-    @timeit "compact 3" ir = compact!(ir)
-    if JLOptions().debug_level == 2
-        @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
-    end
-    return ir
+    ecacheinfo isa EscapeCacheInfo && (interp.escape_cache.cache[inf_result.linfo] = ecacheinfo)
+    return @invoke CC.cache_result!(interp::AbstractInterpreter, inf_result::InferenceResult)
 end
 
 # printing
 # --------
 
-import Core: Argument, SSAValue
-import .CC: widenconst, singleton_type
+using Core: Argument, SSAValue
+using .CC: widenconst, singleton_type
 
-Base.getindex(estate::EscapeState, @nospecialize(x)) = CC.getindex(estate, x)
+if EA._TOP_MOD === CC
+    Base.getindex(estate::EscapeState, @nospecialize(x)) = CC.getindex(estate, x)
+end
 
 function get_name_color(x::EscapeInfo, symbol::Bool = false)
     getname(x) = string(nameof(x))
@@ -281,34 +238,31 @@ function Base.show(io::IO, x::EscapeInfo)
         printstyled(io, name; color)
     end
 end
-function Base.show(io::IO, ::MIME"application/prs.juno.inline", x::EscapeInfo)
-    name, color = get_name_color(x)
-    if isnothing(name)
-        return x # use fancy tree-view
-    else
-        printstyled(io, name; color)
-    end
-end
 
 struct EscapeResult
     ir::IRCode
     state::EscapeState
-    linfo::Union{Nothing,MethodInstance}
+    mi::Union{Nothing,MethodInstance}
+    slotnames::Union{Nothing,Vector{Symbol}}
     source::Bool
+    interp::Union{Nothing,EscapeAnalyzer}
     function EscapeResult(ir::IRCode, state::EscapeState,
-        linfo::Union{Nothing,MethodInstance} = nothing,
-        source::Bool=false)
-        return new(ir, state, linfo, source)
+                          mi::Union{Nothing,MethodInstance}=nothing,
+                          slotnames::Union{Nothing,Vector{Symbol}}=nothing,
+                          source::Bool=false,
+                          interp::Union{Nothing,EscapeAnalyzer}=nothing)
+        return new(ir, state, mi, slotnames, source, interp)
     end
 end
 Base.show(io::IO, result::EscapeResult) = print_with_info(io, result)
 @eval Base.iterate(res::EscapeResult, state=1) =
     return state > $(fieldcount(EscapeResult)) ? nothing : (getfield(res, state), state+1)
 
-Base.show(io::IO, cached::EscapeCache) = show(io, EscapeResult(cached.ir, cached.state, nothing))
+Base.show(io::IO, ecacheinfo::EscapeCacheInfo) = show(io, EscapeResult(ecacheinfo.ir, ecacheinfo.state))
 
 # adapted from https://github.com/JuliaDebug/LoweredCodeUtils.jl/blob/4612349432447e868cf9285f647108f43bd0a11c/src/codeedges.jl#L881-L897
-function print_with_info(io::IO, (; ir, state, linfo, source)::EscapeResult)
+function print_with_info(io::IO, result::EscapeResult)
+    (; ir, state, mi, slotnames, source) = result
     # print escape information on SSA values
     function preprint(io::IO)
         ft = ir.argtypes[1]
@@ -321,12 +275,13 @@ function print_with_info(io::IO, (; ir, state, linfo, source)::EscapeResult)
             arg = state[Argument(i)]
             i == 1 && continue
             c, color = get_name_color(arg, true)
-            printstyled(io, c, ' ', '_', i, "::", ir.argtypes[i]; color)
+            slot = isnothing(slotnames) ? "_$i" : slotnames[i]
+            printstyled(io, c, ' ', slot, "::", ir.argtypes[i]; color)
             i ≠ state.nargs && print(io, ", ")
         end
         print(io, ')')
-        if !isnothing(linfo)
-            def = linfo.def
+        if !isnothing(mi)
+            def = mi.def
             printstyled(io, " in ", (isa(def, Module) ? (def,) : (def.module, " at ", def.file, ':', def.line))...; color=:bold)
         end
         println(io)
diff --git a/test/compiler/EscapeAnalysis/local.jl b/test/compiler/EscapeAnalysis/EscapeAnalysis.jl
similarity index 68%
rename from test/compiler/EscapeAnalysis/local.jl
rename to test/compiler/EscapeAnalysis/EscapeAnalysis.jl
index e5d8f1bf2c940..d8ea8be21fe07 100644
--- a/test/compiler/EscapeAnalysis/local.jl
+++ b/test/compiler/EscapeAnalysis/EscapeAnalysis.jl
@@ -1,20 +1,93 @@
-# Local EA Test
-# =============
-# EA works on post-inlining IR
+module test_EA
 
-include(normpath(@__DIR__, "setup.jl"))
+const use_core_compiler = true
+
+if use_core_compiler
+    const EscapeAnalysis = Core.Compiler.EscapeAnalysis
+else
+    include(normpath(Sys.BINDIR, "..", "..", "base", "compiler", "ssair", "EscapeAnalysis", "EscapeAnalysis.jl"))
+end
+
+include("EAUtils.jl")
+include("../irutils.jl")
+
+using Test, .EscapeAnalysis, .EAUtils
+using .EscapeAnalysis: ignore_argescape
+
+let utils_ex = quote
+        mutable struct SafeRef{T}
+            x::T
+        end
+        Base.getindex(s::SafeRef) = getfield(s, 1)
+        Base.setindex!(s::SafeRef, x) = setfield!(s, 1, x)
+
+        mutable struct SafeRefs{S,T}
+            x1::S
+            x2::T
+        end
+        Base.getindex(s::SafeRefs, idx::Int) = getfield(s, idx)
+        Base.setindex!(s::SafeRefs, x, idx::Int) = setfield!(s, idx, x)
+
+        global GV::Any
+        const global GR = Ref{Any}()
+    end
+    global function EATModule(utils_ex = utils_ex)
+        M = Module()
+        Core.eval(M, utils_ex)
+        return M
+    end
+    Core.eval(@__MODULE__, utils_ex)
+end
+
+using .EscapeAnalysis:
+    EscapeInfo, IndexableElements, IndexableFields, normalize
+
+isϕ(@nospecialize x) = isa(x, Core.PhiNode)
+function with_normalized_name(@nospecialize(f), @nospecialize(x))
+    if Meta.isexpr(x, :foreigncall)
+        name = x.args[1]
+        nn = normalize(name)
+        return isa(nn, Symbol) && f(nn)
+    end
+    return false
+end
+isarrayalloc(@nospecialize x) =
+    with_normalized_name(nn::Symbol->false, x)
+isarrayresize(@nospecialize x) =
+    with_normalized_name(nn::Symbol->false, x)
+isarraycopy(@nospecialize x) =
+    with_normalized_name(nn::Symbol->false, x)
+"""
+    is_load_forwardable(x::EscapeInfo) -> Bool
+
+Queries if `x` is elibigle for store-to-load forwarding optimization.
+"""
+function is_load_forwardable(x::EscapeInfo)
+    AliasInfo = x.AliasInfo
+    # NOTE technically we also need to check `!has_thrown_escape(x)` here as well,
+    # but we can also do equivalent check during forwarding
+    return isa(AliasInfo, IndexableFields) || isa(AliasInfo, IndexableElements)
+end
+
+@testset "EAUtils" begin
+    @test_throws "everything has been constant folded" code_escapes() do; sin(42); end
+    @test code_escapes(sin, (Int,)) isa EAUtils.EscapeResult
+    @test code_escapes(sin, (Int,)) isa EAUtils.EscapeResult
+end
 
 @testset "basics" begin
     let # arg return
         result = code_escapes((Any,)) do a # return to caller
+            println("prevent ConstABI")
             return nothing
         end
         @test has_arg_escape(result.state[Argument(2)])
         # return
         result = code_escapes((Any,)) do a
+            println("prevent ConstABI")
             return a
         end
-        i = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_arg_escape(result.state[Argument(1)]) # self
         @test !has_return_escape(result.state[Argument(1)], i) # self
         @test has_arg_escape(result.state[Argument(2)]) # a
@@ -41,38 +114,38 @@ include(normpath(@__DIR__, "setup.jl"))
             GV = s
             return GV
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
     end
     let # :gc_preserve_begin / :gc_preserve_end
         result = code_escapes((String,)) do s
             m = SafeRef(s)
             GC.@preserve m begin
+                println(s)
                 return nothing
             end
         end
-        i = findfirst(isT(SafeRef{String}), result.ir.stmts.type) # find allocation statement
+        i = findfirst(==(SafeRef{String}), result.ir.stmts.type) # find allocation statement
         @test !isnothing(i)
         @test has_no_escape(result.state[SSAValue(i)])
     end
     let # :isdefined
-        result = code_escapes((String, Bool, )) do a, b
+        result = code_escapes((String, Bool,)) do a, b
             if b
                 s = Ref(a)
             end
             return @isdefined(s)
         end
-        i = findfirst(isT(Base.RefValue{String}), result.ir.stmts.type) # find allocation statement
-        @test !isnothing(i)
-        @test has_no_escape(result.state[SSAValue(i)])
+        i = findfirst(==(Base.RefValue{String}), result.ir.stmts.type) # find allocation statement
+        @test isnothing(i) || has_no_escape(result.state[SSAValue(i)])
     end
     let # ϕ-node
         result = code_escapes((Bool,Any,Any)) do cond, a, b
             c = cond ? a : b # ϕ(a, b)
             return c
         end
-        @assert any(@nospecialize(x)->isa(x, Core.PhiNode), result.ir.stmts.inst)
-        i = only(findall(isreturn, result.ir.stmts.inst))
+        @assert any(@nospecialize(x)->isa(x, Core.PhiNode), result.ir.stmts.stmt)
+        i = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(3)], i) # a
         @test has_return_escape(result.state[Argument(4)], i) # b
     end
@@ -83,8 +156,8 @@ include(normpath(@__DIR__, "setup.jl"))
             end
             return nothing
         end
-        @assert any(@nospecialize(x)->isa(x, Core.PiNode), result.ir.stmts.inst)
-        @test any(findall(isreturn, result.ir.stmts.inst)) do i
+        @assert any(@nospecialize(x)->isa(x, Core.PiNode), result.ir.stmts.stmt)
+        @test any(findall(isreturn, result.ir.stmts.stmt)) do i
             has_return_escape(result.state[Argument(2)], i)
         end
     end
@@ -98,9 +171,9 @@ include(normpath(@__DIR__, "setup.jl"))
             end
             return x
         end
-        @assert any(@nospecialize(x)->isa(x, Core.PhiCNode), result.ir.stmts.inst)
-        @assert any(@nospecialize(x)->isa(x, Core.UpsilonNode), result.ir.stmts.inst)
-        i = only(findall(isreturn, result.ir.stmts.inst))
+        @assert any(@nospecialize(x)->isa(x, Core.PhiCNode), result.ir.stmts.stmt)
+        @assert any(@nospecialize(x)->isa(x, Core.UpsilonNode), result.ir.stmts.stmt)
+        i = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], i)
         @test has_return_escape(result.state[Argument(3)], i)
     end
@@ -122,12 +195,13 @@ include(normpath(@__DIR__, "setup.jl"))
             end
             nothing
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_return_escape(result.state[SSAValue(i)])
     end
     let # try/catch
         result = code_escapes((Any,)) do a
             try
+                println("prevent ConstABI")
                 nothing
             catch err
                 return a # return escape
@@ -137,6 +211,7 @@ include(normpath(@__DIR__, "setup.jl"))
     end
     let result = code_escapes((Any,)) do a
             try
+                println("prevent ConstABI")
                 nothing
             finally
                 return a # return escape
@@ -152,15 +227,6 @@ include(normpath(@__DIR__, "setup.jl"))
     end
 end
 
-let # simple allocation
-    result = code_escapes((Bool,)) do c
-        mm = SafeRef{Bool}(c) # just allocated, never escapes
-        return mm[] ? nothing : 1
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    @test has_no_escape(result.state[SSAValue(i)])
-end
-
 @testset "builtins" begin
     let # throw
         r = code_escapes((Any,)) do a
@@ -202,7 +268,7 @@ end
             r = ifelse(c, Ref("yes"), Ref("no"))
             return r
         end
-        inds = findall(isnew, result.ir.stmts.inst)
+        inds = findall(isnew, result.ir.stmts.stmt)
         @assert !isempty(inds)
         for i in inds
             @test has_return_escape(result.state[SSAValue(i)])
@@ -214,9 +280,9 @@ end
             return r
         end
         for i in 1:length(result.ir.stmts)
-            if isnew(result.ir.stmts.inst[i]) && isT(Base.RefValue{String})(result.ir.stmts.type[i])
+            if isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == Base.RefValue{String}
                 @test has_return_escape(result.state[SSAValue(i)])
-            elseif isnew(result.ir.stmts.inst[i]) && isT(Base.RefValue{Nothing})(result.ir.stmts.type[i])
+            elseif isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == Base.RefValue{Nothing}
                 @test has_no_escape(result.state[SSAValue(i)])
             end
         end
@@ -227,7 +293,7 @@ end
             y = x::String
             return y
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         @test !has_all_escape(result.state[Argument(2)])
     end
@@ -236,7 +302,7 @@ end
         result = code_escapes((Any,)) do x
             isdefined(x, :foo) ? x : throw("undefined")
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         @test !has_all_escape(result.state[Argument(2)])
 
@@ -256,8 +322,8 @@ end
             end
             return r
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        rts = findall(isreturn, result.ir.stmts.inst)
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        rts = findall(isreturn, result.ir.stmts.stmt)
         @assert length(rts) == 2
         @test count(rt->has_return_escape(result.state[SSAValue(i)], rt), rts) == 1
     end
@@ -271,8 +337,8 @@ end
             rand(Bool) && return r
             return cnt
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        rts = findall(isreturn, result.ir.stmts.inst) # return statement
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        rts = findall(isreturn, result.ir.stmts.stmt) # return statement
         @assert length(rts) == 3
         @test count(rt->has_return_escape(result.state[SSAValue(i)], rt), rts) == 2
     end
@@ -308,7 +374,7 @@ end
             end
             return ret
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_return_escape(result.state[SSAValue(i)])
     end
 
@@ -324,7 +390,7 @@ end
             end
             nothing
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i)])
     end
 
@@ -341,7 +407,7 @@ end
                 GR[] = err2
             end
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i)])
     end
     let # account for possible escapes via `rethrow`
@@ -357,7 +423,7 @@ end
                 GR[] = err2
             end
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i)])
     end
     let # account for possible escapes via `rethrow`
@@ -369,7 +435,7 @@ end
                 escape_rethrow!()
             end
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i)])
     end
     let # account for possible escapes via `rethrow`
@@ -384,7 +450,7 @@ end
             end
             return t
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i)])
     end
     let # account for possible escapes via `Base.current_exceptions`
@@ -396,7 +462,7 @@ end
                 GR[] = Base.current_exceptions()
             end
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i)])
     end
     let # account for possible escapes via `Base.current_exceptions`
@@ -408,7 +474,7 @@ end
                 escape_current_exceptions!()
             end
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i)])
     end
 
@@ -426,10 +492,10 @@ end
             s2 = unsafeget(r2)
             return s2, r2
         end
-        is = findall(isnew, result.ir.stmts.inst)
+        is = findall(isnew, result.ir.stmts.stmt)
         @test length(is) == 2
         i1, i2 = is
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i1)])
         @test !has_all_escape(result.state[SSAValue(i2)])
         @test has_return_escape(result.state[SSAValue(i2)], r)
@@ -449,9 +515,9 @@ end
             end
             return ret
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test_broken !has_return_escape(result.state[SSAValue(i)], r)
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test_broken !has_return_escape(result.state[SSAValue(i)], r) # TODO? see `escape_exception!`
     end
     let # sequential: escape information imposed on `err1` and `err2 should propagate separately
         result = @eval M $code_escapes() do
@@ -472,13 +538,13 @@ end
             end
             return ret
         end
-        is = findall(isnew, result.ir.stmts.inst)
+        is = findall(isnew, result.ir.stmts.stmt)
         @test length(is) == 2
         i1, i2 = is
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i1)])
         @test has_return_escape(result.state[SSAValue(i2)], r)
-        @test_broken !has_all_escape(result.state[SSAValue(i2)])
+        @test_broken !has_all_escape(result.state[SSAValue(i2)]) # TODO? see `escape_exception!`
     end
     let # nested: escape information imposed on `inner` shouldn't propagate to `s`
         result = @eval M $code_escapes() do
@@ -496,7 +562,7 @@ end
             end
             return ret
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test_broken !has_return_escape(result.state[SSAValue(i)])
     end
     let # merge: escape information imposed on `err1` and `err2 should be merged
@@ -517,8 +583,8 @@ end
             end
             nothing
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        rs = findall(isreturn, result.ir.stmts.inst)
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        rs = findall(isreturn, result.ir.stmts.stmt)
         @test_broken !has_all_escape(result.state[SSAValue(i)])
         for r in rs
             @test has_return_escape(result.state[SSAValue(i)], r)
@@ -538,8 +604,8 @@ end
             end
             return ret
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test_broken !has_return_escape(result.state[SSAValue(i)], r)
     end
 end
@@ -553,7 +619,7 @@ end
             global GV = SafeRef{Any}(a)
             nothing
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i)])
         @test has_all_escape(result.state[Argument(2)])
     end
@@ -561,7 +627,7 @@ end
             global GV = (a,)
             nothing
         end
-        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.inst))
+        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i)])
         @test has_all_escape(result.state[Argument(2)])
     end
@@ -570,7 +636,7 @@ end
             global GV = SafeRef(o0)
             nothing
         end
-        is = findall(isnew, result.ir.stmts.inst)
+        is = findall(isnew, result.ir.stmts.stmt)
         @test length(is) == 2
         i0, i1 = is
         @test has_all_escape(result.state[SSAValue(i0)])
@@ -582,7 +648,7 @@ end
             global GV = (t0,)
             nothing
         end
-        inds = findall(iscall((result.ir, tuple)), result.ir.stmts.inst)
+        inds = findall(iscall((result.ir, tuple)), result.ir.stmts.stmt)
         @assert length(inds) == 2
         for i in inds; @test has_all_escape(result.state[SSAValue(i)]); end
         @test has_all_escape(result.state[Argument(2)])
@@ -594,7 +660,7 @@ end
             r[] = a
             nothing
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i)])
         @test has_all_escape(result.state[Argument(2)])
     end
@@ -604,7 +670,7 @@ end
             r[] = b
             nothing
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_all_escape(result.state[SSAValue(i)])
         @test has_all_escape(result.state[Argument(2)]) # a
         @test has_all_escape(result.state[Argument(3)]) # b
@@ -648,11 +714,11 @@ end
     # field escape should propagate to :new arguments
     let result = code_escapes((String,)) do a
             o = SafeRef(a)
-            f = o[]
-            return f
+            Core.donotdelete(o)
+            return o[]
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         @test is_load_forwardable(result.state[SSAValue(i)])
     end
@@ -661,20 +727,21 @@ end
             f = t[][1]
             return f
         end
-        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         @test is_load_forwardable(result.state[SSAValue(i)])
         result.state[SSAValue(i)].AliasInfo
     end
     let result = code_escapes((String, String)) do a, b
             obj = SafeRefs(a, b)
+            Core.donotdelete(obj)
             fld1 = obj[1]
             fld2 = obj[2]
             return (fld1, fld2)
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r) # a
         @test has_return_escape(result.state[Argument(3)], r) # b
         @test is_load_forwardable(result.state[SSAValue(i)])
@@ -683,22 +750,23 @@ end
     # field escape should propagate to `setfield!` argument
     let result = code_escapes((String,)) do a
             o = SafeRef("foo")
+            Core.donotdelete(o)
             o[] = a
-            f = o[]
-            return f
+            return o[]
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         @test is_load_forwardable(result.state[SSAValue(i)])
     end
     # propagate escape information imposed on return value of `setfield!` call
     let result = code_escapes((String,)) do a
             obj = SafeRef("foo")
+            Core.donotdelete(obj)
             return (obj[] = a)
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         @test is_load_forwardable(result.state[SSAValue(i)])
     end
@@ -709,12 +777,12 @@ end
             o2 = SafeRef(o1)
             return o2[]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         for i in 1:length(result.ir.stmts)
-            if isnew(result.ir.stmts.inst[i]) && isT(SafeRef{String})(result.ir.stmts.type[i])
+            if isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == SafeRef{String}
                 @test has_return_escape(result.state[SSAValue(i)], r)
-            elseif isnew(result.ir.stmts.inst[i]) && isT(SafeRef{SafeRef{String}})(result.ir.stmts.type[i])
+            elseif isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == SafeRef{SafeRef{String}}
                 @test is_load_forwardable(result.state[SSAValue(i)])
             end
         end
@@ -724,12 +792,12 @@ end
             o2 = (o1,)
             return o2[1]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         for i in 1:length(result.ir.stmts)
-            if isnew(result.ir.stmts.inst[i]) && isT(Tuple{String})(result.ir.stmts.type[i])
+            if isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == Tuple{String}
                 @test has_return_escape(result.state[SSAValue(i)], r)
-            elseif isnew(result.ir.stmts.inst[i]) && isT(Tuple{Tuple{String}})(result.ir.stmts.type[i])
+            elseif isnew(result.ir.stmts.stmt[i]) && result.ir.stmts.type[i] == Tuple{Tuple{String}}
                 @test is_load_forwardable(result.state[SSAValue(i)])
             end
         end
@@ -741,9 +809,9 @@ end
             a′  = o1′[]
             return a′
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
-        for i in findall(isnew, result.ir.stmts.inst)
+        for i in findall(isnew, result.ir.stmts.stmt)
             @test is_load_forwardable(result.state[SSAValue(i)])
         end
     end
@@ -752,8 +820,8 @@ end
             o2 = SafeRef(o1)
             return o2
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        for i in findall(isnew, result.ir.stmts.inst)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        for i in findall(isnew, result.ir.stmts.stmt)
             @test has_return_escape(result.state[SSAValue(i)], r)
         end
     end
@@ -764,9 +832,9 @@ end
             o2[] = o1
             return o2
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         findall(1:length(result.ir.stmts)) do i
-            if isnew(result.ir.stmts[i][:inst])
+            if isnew(result.ir.stmts[i][:stmt])
                 t = result.ir.stmts[i][:type]
                 return t === SafeRef{String}  || # o1
                        t === SafeRef{SafeRef}    # o2
@@ -777,10 +845,12 @@ end
         end
     end
     let result = code_escapes((String,)) do x
-            broadcast(identity, Ref(x))
+            o = Ref(x)
+            Core.donotdelete(o)
+            broadcast(identity, o)
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         @test is_load_forwardable(result.state[SSAValue(i)])
     end
@@ -794,12 +864,12 @@ end
             end
             return ϕ[]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(3)], r) # x
         @test has_return_escape(result.state[Argument(4)], r) # y
-        i = only(findall(isϕ, result.ir.stmts.inst))
+        i = only(findall(isϕ, result.ir.stmts.stmt))
         @test is_load_forwardable(result.state[SSAValue(i)])
-        for i in findall(isnew, result.ir.stmts.inst)
+        for i in findall(isnew, result.ir.stmts.stmt)
             @test is_load_forwardable(result.state[SSAValue(i)])
         end
     end
@@ -811,13 +881,13 @@ end
             end
             return ϕ1[], ϕ2[]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(3)], r) # x
         @test has_return_escape(result.state[Argument(4)], r) # y
-        for i in findall(isϕ, result.ir.stmts.inst)
+        for i in findall(isϕ, result.ir.stmts.stmt)
             @test is_load_forwardable(result.state[SSAValue(i)])
         end
-        for i in findall(isnew, result.ir.stmts.inst)
+        for i in findall(isnew, result.ir.stmts.stmt)
             @test is_load_forwardable(result.state[SSAValue(i)])
         end
     end
@@ -832,9 +902,9 @@ end
             end
             return @isdefined(out) ? out : throw(ϕ)
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        t = only(findall(iscall((result.ir, throw)), result.ir.stmts.inst))
-        ϕ = only(findall(isT(Union{SafeRef{String},SafeRefs{String,String}}), result.ir.stmts.type))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        t = only(findall(iscall((result.ir, throw)), result.ir.stmts.stmt))
+        ϕ = only(findall(==(Union{SafeRef{String},SafeRefs{String,String}}), result.ir.stmts.type))
         @test has_return_escape(result.state[Argument(3)], r) # x
         @test !has_return_escape(result.state[Argument(4)], r) # y
         @test has_return_escape(result.state[Argument(5)], r) # z
@@ -847,22 +917,24 @@ end
     # alias via getfield & Expr(:new)
     let result = code_escapes((String,)) do s
             r = SafeRef(s)
+            Core.donotdelete(r)
             return r[]
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
         @test isaliased(Argument(2), val, result.state)
         @test !isaliased(Argument(2), SSAValue(i), result.state)
     end
     let result = code_escapes((String,)) do s
             r1 = SafeRef(s)
             r2 = SafeRef(r1)
+            Core.donotdelete(r1, r2)
             return r2[]
         end
-        i1, i2 = findall(isnew, result.ir.stmts.inst)
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        i1, i2 = findall(isnew, result.ir.stmts.stmt)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
         @test !isaliased(SSAValue(i1), SSAValue(i2), result.state)
         @test isaliased(SSAValue(i1), val, result.state)
         @test !isaliased(SSAValue(i2), val, result.state)
@@ -870,12 +942,13 @@ end
     let result = code_escapes((String,)) do s
             r1 = SafeRef(s)
             r2 = SafeRef(r1)
+            Core.donotdelete(r1, r2)
             return r2[][]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
         @test isaliased(Argument(2), val, result.state)
-        for i in findall(isnew, result.ir.stmts.inst)
+        for i in findall(isnew, result.ir.stmts.stmt)
             @test !isaliased(SSAValue(i), val, result.state)
         end
     end
@@ -883,36 +956,39 @@ end
             const Rx = SafeRef("Rx")
             $code_escapes((String,)) do s
                 r = SafeRef(Rx)
+                Core.donotdelete(r)
                 rx = r[] # rx aliased to Rx
                 rx[] = s
                 nothing
             end
         end
-        i = findfirst(isnew, result.ir.stmts.inst)
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test has_all_escape(result.state[Argument(2)])
         @test is_load_forwardable(result.state[SSAValue(i)])
     end
     # alias via getfield & setfield!
     let result = code_escapes((String,)) do s
             r = Ref{String}()
+            Core.donotdelete(r)
             r[] = s
             return r[]
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
         @test isaliased(Argument(2), val, result.state)
         @test !isaliased(Argument(2), SSAValue(i), result.state)
     end
     let result = code_escapes((String,)) do s
             r1 = Ref(s)
             r2 = Ref{Base.RefValue{String}}()
+            Core.donotdelete(r1, r2)
             r2[] = r1
             return r2[]
         end
-        i1, i2 = findall(isnew, result.ir.stmts.inst)
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        i1, i2 = findall(isnew, result.ir.stmts.stmt)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
         @test !isaliased(SSAValue(i1), SSAValue(i2), result.state)
         @test isaliased(SSAValue(i1), val, result.state)
         @test !isaliased(SSAValue(i2), val, result.state)
@@ -920,14 +996,15 @@ end
     let result = code_escapes((String,)) do s
             r1 = Ref{String}()
             r2 = Ref{Base.RefValue{String}}()
+            Core.donotdelete(r1, r2)
             r2[] = r1
             r1[] = s
             return r2[][]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
         @test isaliased(Argument(2), val, result.state)
-        for i in findall(isnew, result.ir.stmts.inst)
+        for i in findall(isnew, result.ir.stmts.stmt)
             @test !isaliased(SSAValue(i), val, result.state)
         end
         result = code_escapes((String,)) do s
@@ -937,10 +1014,10 @@ end
             r2[] = r1
             return r2[][]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
         @test isaliased(Argument(2), val, result.state)
-        for i in findall(isnew, result.ir.stmts.inst)
+        for i in findall(isnew, result.ir.stmts.stmt)
             @test !isaliased(SSAValue(i), val, result.state)
         end
     end
@@ -948,13 +1025,14 @@ end
             const Rx = SafeRef("Rx")
             $code_escapes((SafeRef{String}, String,)) do _rx, s
                 r = SafeRef(_rx)
+                Core.donotdelete(r)
                 r[] = Rx
                 rx = r[] # rx aliased to Rx
                 rx[] = s
                 nothing
             end
         end
-        i = findfirst(isnew, result.ir.stmts.inst)
+        i = findfirst(isnew, result.ir.stmts.stmt)
         @test has_all_escape(result.state[Argument(3)])
         @test is_load_forwardable(result.state[SSAValue(i)])
     end
@@ -963,8 +1041,8 @@ end
             r = a::String
             return r
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
         @test has_return_escape(result.state[Argument(2)], r) # a
         @test isaliased(Argument(2), val, result.state)       # a <-> r
     end
@@ -980,8 +1058,8 @@ end
             r = ifelse(c, a, b)
             return r
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
         @test has_return_escape(result.state[Argument(3)], r) # a
         @test has_return_escape(result.state[Argument(4)], r) # b
         @test !isaliased(Argument(2), val, result.state)      # c <!-> r
@@ -1008,14 +1086,14 @@ end
             ϕ2[] = x
             return ϕ1[]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
         @test has_return_escape(result.state[Argument(3)], r) # x
         @test isaliased(Argument(3), val, result.state) # x
-        for i in findall(isϕ, result.ir.stmts.inst)
+        for i in findall(isϕ, result.ir.stmts.stmt)
             @test is_load_forwardable(result.state[SSAValue(i)])
         end
-        for i in findall(isnew, result.ir.stmts.inst)
+        for i in findall(isnew, result.ir.stmts.stmt)
             @test is_load_forwardable(result.state[SSAValue(i)])
         end
     end
@@ -1028,14 +1106,14 @@ end
             cond2 && (ϕ2[] = x)
             return ϕ1[]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        val = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
         @test has_return_escape(result.state[Argument(4)], r) # x
         @test isaliased(Argument(4), val, result.state) # x
-        for i in findall(isϕ, result.ir.stmts.inst)
+        for i in findall(isϕ, result.ir.stmts.stmt)
             @test is_load_forwardable(result.state[SSAValue(i)])
         end
-        for i in findall(isnew, result.ir.stmts.inst)
+        for i in findall(isnew, result.ir.stmts.stmt)
             @test is_load_forwardable(result.state[SSAValue(i)])
         end
     end
@@ -1046,8 +1124,8 @@ end
             end
             throw("error!")
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        rval = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        rval = (result.ir.stmts.stmt[r]::ReturnNode).val::SSAValue
         @test has_return_escape(result.state[Argument(2)], r) # x
         @test isaliased(Argument(2), rval, result.state)
     end
@@ -1067,8 +1145,8 @@ end
             x[] = x
             return x[]
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[SSAValue(i)], r)
     end
     let result = @eval Module() begin
@@ -1079,8 +1157,8 @@ end
                 return r[]
             end
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        for i in findall(iscall((result.ir, getfield)), result.ir.stmts.inst)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        for i in findall(iscall((result.ir, getfield)), result.ir.stmts.stmt)
             @test has_return_escape(result.state[SSAValue(i)], r)
         end
     end
@@ -1095,8 +1173,8 @@ end
                 return x[]
             end
         end
-        i = only(findall(isinvoke(:genr), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isinvoke(:genr), result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[SSAValue(i)], r)
     end
 
@@ -1107,7 +1185,7 @@ end
     let result = @eval code_escapes((Any,Any,)) do T, x
             obj = $(Expr(:new, :T, :x))
         end
-        t = only(findall(isnew, result.ir.stmts.inst))
+        t = only(findall(isnew, result.ir.stmts.stmt))
         @test #=T=# has_thrown_escape(result.state[Argument(2)], t) # T
         @test #=x=# has_thrown_escape(result.state[Argument(3)], t) # x
     end
@@ -1115,8 +1193,8 @@ end
             obj = $(Expr(:new, :T, :x, :y))
             return getfield(obj, :x)
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test #=x=# has_return_escape(result.state[Argument(3)], r)
         @test #=y=# has_return_escape(result.state[Argument(4)], r)
         @test #=z=# !has_return_escape(result.state[Argument(5)], r)
@@ -1126,8 +1204,8 @@ end
             setfield!(obj, :x, y)
             return getfield(obj, :x)
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test #=x=# has_return_escape(result.state[Argument(3)], r)
         @test #=y=# has_return_escape(result.state[Argument(4)], r)
         @test #=z=# !has_return_escape(result.state[Argument(5)], r)
@@ -1139,8 +1217,8 @@ end
             obj = SafeRef(a)
             return getfield(obj, fld)
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r) # a
         @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
     end
@@ -1148,8 +1226,8 @@ end
             obj = SafeRefs(a, b)
             return getfield(obj, fld) # should escape both `a` and `b`
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r) # a
         @test has_return_escape(result.state[Argument(3)], r) # b
         @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
@@ -1158,8 +1236,8 @@ end
             obj = SafeRefs(a, b)
             return obj[idx] # should escape both `a` and `b`
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r) # a
         @test has_return_escape(result.state[Argument(3)], r) # b
         @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
@@ -1169,8 +1247,8 @@ end
             setfield!(obj, fld, a)
             return obj[2] # should escape `a`
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r) # a
         @test !has_return_escape(result.state[Argument(3)], r) # b
         @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
@@ -1180,8 +1258,8 @@ end
             setfield!(obj, fld, a)
             return obj[1] # this should escape `a`
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r) # a
         @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
     end
@@ -1190,8 +1268,8 @@ end
             obj[idx] = a
             return obj[2] # should escape `a`
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r) # a
         @test !has_return_escape(result.state[Argument(3)], r) # b
         @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
@@ -1208,8 +1286,8 @@ end
                 return fld
             end
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         # NOTE we can't scalar replace `obj`, but still we may want to stack allocate it
         @test_broken is_load_forwardable(result.state[SSAValue(i)])
@@ -1233,8 +1311,8 @@ end
                 return x
             end
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[SSAValue(i)], r)
         @test has_return_escape(result.state[Argument(2)], r) # y
     end
@@ -1243,12 +1321,13 @@ end
             $code_escapes((String,)) do y
                 x1 = SafeRef("init")
                 x2 = SafeRef(y)
+                Core.donotdelete(x1, x2)
                 setxy!(x1, x2[])
                 return x1
             end
         end
-        i1, i2 = findall(isnew, result.ir.stmts.inst)
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i1, i2 = findall(isnew, result.ir.stmts.stmt)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[SSAValue(i1)], r)
         @test !has_return_escape(result.state[SSAValue(i2)], r)
         @test has_return_escape(result.state[Argument(2)], r) # y
@@ -1268,29 +1347,32 @@ end
 
     let result = code_escapes((Any,Any)) do a, b
             r = SafeRef{Any}(a)
+            Core.donotdelete(r)
             r[] = b
             return r[]
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test_broken !has_return_escape(result.state[Argument(2)], r) # a
         @test has_return_escape(result.state[Argument(3)], r) # b
         @test is_load_forwardable(result.state[SSAValue(i)])
     end
     let result = code_escapes((Any,Any)) do a, b
             r = SafeRef{Any}(:init)
+            Core.donotdelete(r)
             r[] = a
             r[] = b
             return r[]
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test_broken !has_return_escape(result.state[Argument(2)], r) # a
         @test has_return_escape(result.state[Argument(3)], r) # b
         @test is_load_forwardable(result.state[SSAValue(i)])
     end
     let result = code_escapes((Any,Any,Bool)) do a, b, cond
             r = SafeRef{Any}(:init)
+            Core.donotdelete(r)
             if cond
                 r[] = a
                 return r[]
@@ -1299,9 +1381,9 @@ end
                 return nothing
             end
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
         @test is_load_forwardable(result.state[SSAValue(i)])
-        r = only(findall(result.ir.stmts.inst) do @nospecialize x
+        r = only(findall(result.ir.stmts.stmt) do @nospecialize x
             isreturn(x) && isa(x.val, Core.SSAValue)
         end)
         @test has_return_escape(result.state[Argument(2)], r) # a
@@ -1323,10 +1405,10 @@ end
             end
             r
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(3)], r) # baz
         @test has_return_escape(result.state[Argument(4)], r) # qux
-        for new in findall(isnew, result.ir.stmts.inst)
+        for new in findall(isnew, result.ir.stmts.stmt)
             @test is_load_forwardable(result.state[SSAValue(new)])
         end
     end
@@ -1343,7 +1425,7 @@ end
             end
             r
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(3)], r) # baz
         @test has_return_escape(result.state[Argument(4)], r) # qux
     end
@@ -1359,7 +1441,7 @@ end
                 t, mt, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
             return mt, has_ambig[]
         end
-        for i in findall(isnew, result.ir.stmts.inst)
+        for i in findall(isnew, result.ir.stmts.stmt)
             @test !is_load_forwardable(result.state[SSAValue(i)])
         end
     end
@@ -1383,8 +1465,8 @@ function compute(T, ax, ay, bx, by)
 end
 let result = @code_escapes compute(MPoint, 1+.5im, 2+.5im, 2+.25im, 4+.75im)
     for i in findall(1:length(result.ir.stmts)) do idx
-                 inst = EscapeAnalysis.getinst(result.ir, idx)
-                 stmt = inst[:inst]
+                 inst = result.ir[SSAValue(idx)]
+                 stmt = inst[:stmt]
                  return (isnew(stmt) || isϕ(stmt)) && inst[:type] <: MPoint
              end
         @test is_load_forwardable(result.state[SSAValue(i)])
@@ -1397,15 +1479,15 @@ function compute(a, b)
     end
     a.x, a.y
 end
-let result = @code_escapes compute(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
-    idxs = findall(1:length(result.ir.stmts)) do idx
-        inst = EscapeAnalysis.getinst(result.ir, idx)
-        stmt = inst[:inst]
-        return isnew(stmt) && inst[:type] <: MPoint
-    end
-    @assert length(idxs) == 2
-    @test count(i->is_load_forwardable(result.state[SSAValue(i)]), idxs) == 1
-end
+# let result = @code_escapes compute(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
+#     idxs = findall(1:length(result.ir.stmts)) do idx
+#         inst = result.ir[SSAValue(idx)]
+#         stmt = inst[:stmt]
+#         return isnew(stmt) && inst[:type] <: MPoint
+#     end
+#     @assert length(idxs) == 2
+#     @test count(i->is_load_forwardable(result.state[SSAValue(i)]), idxs) == 1
+# end
 function compute!(a, b)
     for i in 0:(100000000-1)
         c = add(a, b)  # replaceable
@@ -1416,8 +1498,8 @@ function compute!(a, b)
 end
 let result = @code_escapes compute!(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
     for i in findall(1:length(result.ir.stmts)) do idx
-                 inst = EscapeAnalysis.getinst(result.ir, idx)
-                 stmt = inst[:inst]
+                 inst = result.ir[SSAValue(idx)]
+                 stmt = inst[:stmt]
                  return isnew(stmt) && inst[:type] <: MPoint
              end
         @test is_load_forwardable(result.state[SSAValue(i)])
@@ -1425,339 +1507,310 @@ let result = @code_escapes compute!(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.7
 end
 
 @testset "array primitives" begin
-    inbounds = Base.JLOptions().check_bounds == 0
-
     # arrayref
-    let result = code_escapes((Vector{String},Int)) do xs, i
+    @test_skip let result = code_escapes((Vector{String},Int)) do xs, i
             s = Base.arrayref(true, xs, i)
             return s
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)   # xs
         @test has_thrown_escape(result.state[Argument(2)])      # xs
         @test !has_return_escape(result.state[Argument(3)], r)  # i
     end
-    let result = code_escapes((Vector{String},Int)) do xs, i
+    @test_skip let result = code_escapes((Vector{String},Int)) do xs, i
             s = Base.arrayref(false, xs, i)
             return s
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r)   # xs
-        @test !has_thrown_escape(result.state[Argument(2)])     # xs
-        @test !has_return_escape(result.state[Argument(3)], r)  # i
-    end
-    inbounds && let result = code_escapes((Vector{String},Int)) do xs, i
-            s = @inbounds xs[i]
-            return s
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)   # xs
         @test !has_thrown_escape(result.state[Argument(2)])     # xs
         @test !has_return_escape(result.state[Argument(3)], r)  # i
     end
-    let result = code_escapes((Vector{String},Bool)) do xs, i
+    @test_skip let result = code_escapes((Vector{String},Bool)) do xs, i
             c = Base.arrayref(true, xs, i) # TypeError will happen here
             return c
         end
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[Argument(2)], t) # xs
     end
-    let result = code_escapes((String,Int)) do xs, i
+    @test_skip let result = code_escapes((String,Int)) do xs, i
             c = Base.arrayref(true, xs, i) # TypeError will happen here
             return c
         end
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[Argument(2)], t) # xs
     end
-    let result = code_escapes((AbstractVector{String},Int)) do xs, i
+    @test_skip let result = code_escapes((AbstractVector{String},Int)) do xs, i
             c = Base.arrayref(true, xs, i) # TypeError may happen here
             return c
         end
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[Argument(2)], t) # xs
     end
-    let result = code_escapes((Vector{String},Any)) do xs, i
+    @test_skip let result = code_escapes((Vector{String},Any)) do xs, i
             c = Base.arrayref(true, xs, i) # TypeError may happen here
             return c
         end
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[Argument(2)], t) # xs
     end
 
     # arrayset
-    let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
+    @test_skip let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
             Base.arrayset(true, xs, x, i)
             return xs
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r) # xs
         @test has_thrown_escape(result.state[Argument(2)])    # xs
         @test has_return_escape(result.state[Argument(3)], r) # x
     end
-    let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
+    @test_skip let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
             Base.arrayset(false, xs, x, i)
             return xs
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r) # xs
         @test !has_thrown_escape(result.state[Argument(2)])    # xs
         @test has_return_escape(result.state[Argument(3)], r) # x
     end
-    inbounds && let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
-            @inbounds xs[i] = x
-            return xs
-        end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test has_return_escape(result.state[Argument(2)], r) # xs
-        @test !has_thrown_escape(result.state[Argument(2)])    # xs
-        @test has_return_escape(result.state[Argument(3)], r) # x
-    end
-    let result = code_escapes((String,String,String,)) do s, t, u
+    @test_skip let result = code_escapes((String,String,String,)) do s, t, u
             xs = Vector{String}(undef, 3)
             Base.arrayset(true, xs, s, 1)
             Base.arrayset(true, xs, t, 2)
             Base.arrayset(true, xs, u, 3)
             return xs
         end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[SSAValue(i)], r)
         for i in 2:result.state.nargs
             @test has_return_escape(result.state[Argument(i)], r)
         end
     end
-    let result = code_escapes((Vector{String},String,Bool,)) do xs, x, i
+    @test_skip let result = code_escapes((Vector{String},String,Bool,)) do xs, x, i
             Base.arrayset(true, xs, x, i) # TypeError will happen here
             return xs
         end
-        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[Argument(2)], t) # xs
         @test has_thrown_escape(result.state[Argument(3)], t) # x
     end
-    let result = code_escapes((String,String,Int,)) do xs, x, i
+    @test_skip let result = code_escapes((String,String,Int,)) do xs, x, i
             Base.arrayset(true, xs, x, i) # TypeError will happen here
             return xs
         end
-        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[Argument(2)], t) # xs::String
         @test has_thrown_escape(result.state[Argument(3)], t) # x::String
     end
-    let result = code_escapes((AbstractVector{String},String,Int,)) do xs, x, i
+    @test_skip let result = code_escapes((AbstractVector{String},String,Int,)) do xs, x, i
             Base.arrayset(true, xs, x, i) # TypeError may happen here
             return xs
         end
-        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[Argument(2)], t) # xs
         @test has_thrown_escape(result.state[Argument(3)], t) # x
     end
-    let result = code_escapes((Vector{String},AbstractString,Int,)) do xs, x, i
+    @test_skip let result = code_escapes((Vector{String},AbstractString,Int,)) do xs, x, i
             Base.arrayset(true, xs, x, i) # TypeError may happen here
             return xs
         end
-        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[Argument(2)], t) # xs
         @test has_thrown_escape(result.state[Argument(3)], t) # x
     end
 
     # arrayref and arrayset
-    let result = code_escapes() do
+    @test_skip let result = code_escapes() do
             a = Vector{Vector{Any}}(undef, 1)
             b = Any[]
             a[1] = b
             return a[1]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        ai = only(findall(result.ir.stmts.inst) do @nospecialize x
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        ai = only(findall(result.ir.stmts.stmt) do @nospecialize x
             isarrayalloc(x) && x.args[2] === Vector{Vector{Any}}
         end)
-        bi = only(findall(result.ir.stmts.inst) do @nospecialize x
+        bi = only(findall(result.ir.stmts.stmt) do @nospecialize x
             isarrayalloc(x) && x.args[2] === Vector{Any}
         end)
         @test !has_return_escape(result.state[SSAValue(ai)], r)
         @test has_return_escape(result.state[SSAValue(bi)], r)
     end
-    let result = code_escapes() do
+    @test_skip let result = code_escapes() do
             a = Vector{Vector{Any}}(undef, 1)
             b = Any[]
             a[1] = b
             return a
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        ai = only(findall(result.ir.stmts.inst) do @nospecialize x
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        ai = only(findall(result.ir.stmts.stmt) do @nospecialize x
             isarrayalloc(x) && x.args[2] === Vector{Vector{Any}}
         end)
-        bi = only(findall(result.ir.stmts.inst) do @nospecialize x
+        bi = only(findall(result.ir.stmts.stmt) do @nospecialize x
             isarrayalloc(x) && x.args[2] === Vector{Any}
         end)
         @test has_return_escape(result.state[SSAValue(ai)], r)
         @test has_return_escape(result.state[SSAValue(bi)], r)
     end
-    let result = code_escapes((Vector{Any},String,Int,Int)) do xs, s, i, j
+    @test_skip let result = code_escapes((Vector{Any},String,Int,Int)) do xs, s, i, j
             x = SafeRef(s)
             xs[i] = x
             xs[j] # potential error
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[Argument(3)], t) # s
         @test has_thrown_escape(result.state[SSAValue(i)], t) # x
     end
 
     # arraysize
-    let result = code_escapes((Vector{Any},)) do xs
+    @test_skip let result = code_escapes((Vector{Any},)) do xs
             Core.arraysize(xs, 1)
         end
-        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.stmt))
         @test !has_thrown_escape(result.state[Argument(2)], t)
     end
-    let result = code_escapes((Vector{Any},Int,)) do xs, dim
+    @test_skip let result = code_escapes((Vector{Any},Int,)) do xs, dim
             Core.arraysize(xs, dim)
         end
-        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.stmt))
         @test !has_thrown_escape(result.state[Argument(2)], t)
     end
-    let result = code_escapes((Any,)) do xs
+    @test_skip let result = code_escapes((Any,)) do xs
             Core.arraysize(xs, 1)
         end
-        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[Argument(2)], t)
     end
 
     # arraylen
-    let result = code_escapes((Vector{Any},)) do xs
+    @test_skip let result = code_escapes((Vector{Any},)) do xs
             Base.arraylen(xs)
         end
-        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.stmt))
         @test !has_thrown_escape(result.state[Argument(2)], t) # xs
     end
-    let result = code_escapes((String,)) do xs
+    @test_skip let result = code_escapes((String,)) do xs
             Base.arraylen(xs)
         end
-        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[Argument(2)], t) # xs
     end
-    let result = code_escapes((Vector{Any},)) do xs
+    @test_skip let result = code_escapes((Vector{Any},)) do xs
             Base.arraylen(xs, 1)
         end
-        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[Argument(2)], t) # xs
     end
 
     # array resizing
     # without BoundsErrors
-    let result = code_escapes((Vector{Any},String)) do xs, x
+    @test_skip let result = code_escapes((Vector{Any},String)) do xs, x
             @ccall jl_array_grow_beg(xs::Any, 2::UInt)::Cvoid
             xs[1] = x
             xs
         end
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        t = only(findall(isarrayresize, result.ir.stmts.stmt))
         @test !has_thrown_escape(result.state[Argument(2)], t) # xs
         @test !has_thrown_escape(result.state[Argument(3)], t) # x
     end
-    let result = code_escapes((Vector{Any},String)) do xs, x
+    @test_skip let result = code_escapes((Vector{Any},String)) do xs, x
             @ccall jl_array_grow_end(xs::Any, 2::UInt)::Cvoid
             xs[1] = x
             xs
         end
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        t = only(findall(isarrayresize, result.ir.stmts.stmt))
         @test !has_thrown_escape(result.state[Argument(2)], t) # xs
         @test !has_thrown_escape(result.state[Argument(3)], t) # x
     end
     # with possible BoundsErrors
-    let result = code_escapes((String,)) do x
+    @test_skip let result = code_escapes((String,)) do x
             xs = Any[1,2,3]
             xs[3] = x
             @ccall jl_array_del_beg(xs::Any, 2::UInt)::Cvoid # can potentially throw
             xs
         end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
+        t = only(findall(isarrayresize, result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
         @test has_thrown_escape(result.state[Argument(2)], t) # x
     end
-    let result = code_escapes((String,)) do x
+    @test_skip let result = code_escapes((String,)) do x
             xs = Any[1,2,3]
             xs[1] = x
             @ccall jl_array_del_end(xs::Any, 2::UInt)::Cvoid # can potentially throw
             xs
         end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
+        t = only(findall(isarrayresize, result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
         @test has_thrown_escape(result.state[Argument(2)], t) # x
     end
-    let result = code_escapes((String,)) do x
+    @test_skip let result = code_escapes((String,)) do x
             xs = Any[x]
             @ccall jl_array_grow_at(xs::Any, 1::UInt, 2::UInt)::Cvoid # can potentially throw
         end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
+        t = only(findall(isarrayresize, result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
         @test has_thrown_escape(result.state[Argument(2)], t) # x
     end
-    let result = code_escapes((String,)) do x
+    @test_skip let result = code_escapes((String,)) do x
             xs = Any[x]
             @ccall jl_array_del_at(xs::Any, 1::UInt, 2::UInt)::Cvoid # can potentially throw
         end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
-        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
-        @test has_thrown_escape(result.state[Argument(2)], t) # x
-    end
-    inbounds && let result = code_escapes((String,)) do x
-            xs = @inbounds Any[x]
-            @ccall jl_array_del_at(xs::Any, 1::UInt, 2::UInt)::Cvoid # can potentially throw
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
+        t = only(findall(isarrayresize, result.ir.stmts.stmt))
         @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
         @test has_thrown_escape(result.state[Argument(2)], t) # x
     end
 
     # array copy
-    let result = code_escapes((Vector{Any},)) do xs
+    @test_skip let result = code_escapes((Vector{Any},)) do xs
             return copy(xs)
         end
-        i = only(findall(isarraycopy, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarraycopy, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[SSAValue(i)], r)
-        @test_broken !has_return_escape(result.state[Argument(2)], r)
+        @test !has_return_escape(result.state[Argument(2)], r)
     end
-    let result = code_escapes((String,)) do s
+    @test_skip let result = code_escapes((String,)) do s
             xs = String[s]
             xs′ = copy(xs)
             return xs′[1]
         end
-        i1 = only(findall(isarrayalloc, result.ir.stmts.inst))
-        i2 = only(findall(isarraycopy, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i1 = only(findall(isarrayalloc, result.ir.stmts.stmt))
+        i2 = only(findall(isarraycopy, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test !has_return_escape(result.state[SSAValue(i1)])
         @test !has_return_escape(result.state[SSAValue(i2)])
         @test has_return_escape(result.state[Argument(2)], r) # s
     end
-    let result = code_escapes((Vector{Any},)) do xs
+    @test_skip let result = code_escapes((Vector{Any},)) do xs
             xs′ = copy(xs)
             return xs′[1] # may potentially throw BoundsError, should escape `xs` conservatively (i.e. escape its elements)
         end
-        i = only(findall(isarraycopy, result.ir.stmts.inst))
-        ref = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
-        ret = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarraycopy, result.ir.stmts.stmt))
+        ref = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.stmt))
+        ret = only(findall(isreturn, result.ir.stmts.stmt))
         @test_broken !has_thrown_escape(result.state[SSAValue(i)], ref)
         @test_broken !has_return_escape(result.state[SSAValue(i)], ret)
         @test has_thrown_escape(result.state[Argument(2)], ref)
         @test has_return_escape(result.state[Argument(2)], ret)
     end
-    let result = code_escapes((String,)) do s
+    @test_skip let result = code_escapes((String,)) do s
             xs = Vector{String}(undef, 1)
             xs[1] = s
             xs′ = copy(xs)
             length(xs′) > 2 && throw(xs′)
             return xs′
         end
-        i1 = only(findall(isarrayalloc, result.ir.stmts.inst))
-        i2 = only(findall(isarraycopy, result.ir.stmts.inst))
-        t = only(findall(iscall((result.ir, throw)), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i1 = only(findall(isarrayalloc, result.ir.stmts.stmt))
+        i2 = only(findall(isarraycopy, result.ir.stmts.stmt))
+        t = only(findall(iscall((result.ir, throw)), result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test_broken !has_thrown_escape(result.state[SSAValue(i1)], t)
         @test_broken !has_return_escape(result.state[SSAValue(i1)], r)
         @test has_thrown_escape(result.state[SSAValue(i2)], t)
@@ -1770,42 +1823,42 @@ end
     let result = code_escapes((Vector{Any},Int)) do xs, i
             return isassigned(xs, i)
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        @test !has_return_escape(result.state[Argument(2)], r)
-        @test !has_thrown_escape(result.state[Argument(2)])
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        @test_broken !has_return_escape(result.state[Argument(2)], r)
+        @test_broken !has_thrown_escape(result.state[Argument(2)])
     end
 
     # indexing analysis
     # -----------------
 
     # safe case
-    let result = code_escapes((String,String)) do s, t
+    @test_skip let result = code_escapes((String,String)) do s, t
             a = Vector{Any}(undef, 2)
             a[1] = s
             a[2] = t
             return a[1]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
         @test !has_return_escape(result.state[SSAValue(i)], r)
         @test is_load_forwardable(result.state[SSAValue(i)])
         @test has_return_escape(result.state[Argument(2)], r) # s
         @test !has_return_escape(result.state[Argument(3)], r) # t
     end
-    let result = code_escapes((String,String)) do s, t
+    @test_skip let result = code_escapes((String,String)) do s, t
             a = Matrix{Any}(undef, 1, 2)
             a[1, 1] = s
             a[1, 2] = t
             return a[1, 1]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
         @test !has_return_escape(result.state[SSAValue(i)], r)
         @test is_load_forwardable(result.state[SSAValue(i)])
         @test has_return_escape(result.state[Argument(2)], r) # s
         @test !has_return_escape(result.state[Argument(3)], r) # t
     end
-    let result = code_escapes((Bool,String,String,String)) do c, s, t, u
+    @test_skip let result = code_escapes((Bool,String,String,String)) do c, s, t, u
             a = Vector{Any}(undef, 2)
             if c
                 a[1] = s
@@ -1816,15 +1869,15 @@ end
             end
             return a[1]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
         @test is_load_forwardable(result.state[SSAValue(i)])
         @test !has_return_escape(result.state[SSAValue(i)], r)
         @test has_return_escape(result.state[Argument(3)], r) # s
         @test has_return_escape(result.state[Argument(4)], r) # t
         @test !has_return_escape(result.state[Argument(5)], r) # u
     end
-    let result = code_escapes((Bool,String,String,String)) do c, s, t, u
+    @test_skip let result = code_escapes((Bool,String,String,String)) do c, s, t, u
             a = Any[nothing, nothing] # TODO how to deal with loop indexing?
             if c
                 a[1] = s
@@ -1835,22 +1888,22 @@ end
             end
             return a[1]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
         @test !has_return_escape(result.state[SSAValue(i)], r)
         @test_broken is_load_forwardable(result.state[SSAValue(i)])
         @test has_return_escape(result.state[Argument(3)], r) # s
         @test has_return_escape(result.state[Argument(4)], r) # t
         @test_broken !has_return_escape(result.state[Argument(5)], r) # u
     end
-    let result = code_escapes((String,)) do s
+    @test_skip let result = code_escapes((String,)) do s
             a = Vector{Vector{Any}}(undef, 1)
             b = Any[s]
             a[1] = b
             return a[1][1]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        is = findall(isarrayalloc, result.ir.stmts.inst)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        is = findall(isarrayalloc, result.ir.stmts.stmt)
         @assert length(is) == 2
         ia, ib = is
         @test !has_return_escape(result.state[SSAValue(ia)], r)
@@ -1859,7 +1912,7 @@ end
         @test_broken is_load_forwardable(result.state[SSAValue(ib)])
         @test has_return_escape(result.state[Argument(2)], r) # s
     end
-    let result = code_escapes((Bool,String,String,Regex,Regex,)) do c, s1, s2, t1, t2
+    @test_skip let result = code_escapes((Bool,String,String,Regex,Regex,)) do c, s1, s2, t1, t2
             if c
                 a = Vector{String}(undef, 2)
                 a[1] = s1
@@ -1871,8 +1924,8 @@ end
             end
             return a[1]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        for i in findall(isarrayalloc, result.ir.stmts.inst)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        for i in findall(isarrayalloc, result.ir.stmts.stmt)
             @test !has_return_escape(result.state[SSAValue(i)], r)
             @test is_load_forwardable(result.state[SSAValue(i)])
         end
@@ -1881,57 +1934,57 @@ end
         @test has_return_escape(result.state[Argument(5)], r) # t1
         @test !has_return_escape(result.state[Argument(6)], r) # t2
     end
-    let result = code_escapes((String,String,Int)) do s, t, i
+    @test_skip let result = code_escapes((String,String,Int)) do s, t, i
             a = Any[s]
             push!(a, t)
             return a[2]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
         @test !has_return_escape(result.state[SSAValue(i)], r)
         @test_broken is_load_forwardable(result.state[SSAValue(i)])
         @test_broken !has_return_escape(result.state[Argument(2)], r) # s
         @test has_return_escape(result.state[Argument(3)], r) # t
     end
     # unsafe cases
-    let result = code_escapes((String,String,Int)) do s, t, i
+    @test_skip let result = code_escapes((String,String,Int)) do s, t, i
             a = Vector{Any}(undef, 2)
             a[1] = s
             a[2] = t
             return a[i]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
         @test !has_return_escape(result.state[SSAValue(i)], r)
         @test !is_load_forwardable(result.state[SSAValue(i)])
         @test has_return_escape(result.state[Argument(2)], r) # s
         @test has_return_escape(result.state[Argument(3)], r) # t
     end
-    let result = code_escapes((String,String,Int)) do s, t, i
+    @test_skip let result = code_escapes((String,String,Int)) do s, t, i
             a = Vector{Any}(undef, 2)
             a[1] = s
             a[i] = t
             return a[1]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
         @test !has_return_escape(result.state[SSAValue(i)], r)
         @test !is_load_forwardable(result.state[SSAValue(i)])
         @test has_return_escape(result.state[Argument(2)], r) # s
         @test has_return_escape(result.state[Argument(3)], r) # t
     end
-    let result = code_escapes((String,String,Int,Int,Int)) do s, t, i, j, k
+    @test_skip let result = code_escapes((String,String,Int,Int,Int)) do s, t, i, j, k
             a = Vector{Any}(undef, 2)
             a[3] = s # BoundsError
             a[1] = t
             return a[1]
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
         @test !has_return_escape(result.state[SSAValue(i)], r)
         @test !is_load_forwardable(result.state[SSAValue(i)])
     end
-    let result = @eval Module() begin
+    @test_skip let result = @eval Module() begin
             @noinline some_resize!(a) = pushfirst!(a, nothing)
             $code_escapes((String,String,Int)) do s, t, i
                 a = Vector{Any}(undef, 2)
@@ -1940,23 +1993,23 @@ end
                 return a[2]
             end
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
         @test_broken !has_return_escape(result.state[SSAValue(i)], r)
         @test !is_load_forwardable(result.state[SSAValue(i)])
     end
 
     # circular reference
-    let result = code_escapes() do
+    @test_skip let result = code_escapes() do
             xs = Vector{Any}(undef, 1)
             xs[1] = xs
             return xs[1]
         end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[SSAValue(i)], r)
     end
-    let result = @eval Module() begin
+    @test_skip let result = @eval Module() begin
             const Ax = Vector{Any}(undef, 1)
             Ax[1] = Ax
             $code_escapes() do
@@ -1964,8 +2017,8 @@ end
                 return xs[1]
             end
         end
-        r = only(findall(isreturn, result.ir.stmts.inst))
-        for i in findall(iscall((result.ir, Core.arrayref)), result.ir.stmts.inst)
+        r = only(findall(isreturn, result.ir.stmts.stmt))
+        for i in findall(iscall((result.ir, Core.arrayref)), result.ir.stmts.stmt)
             @test has_return_escape(result.state[SSAValue(i)], r)
         end
     end
@@ -1980,42 +2033,42 @@ end
                 return xs[1]
             end
         end
-        i = only(findall(isinvoke(:genxs), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isinvoke(:genxs), result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[SSAValue(i)], r)
     end
 end
 
 # demonstrate array primitive support with a realistic end to end example
-let result = code_escapes((Int,String,)) do n,s
+@test_skip let result = code_escapes((Int,String,)) do n,s
         xs = String[]
         for i in 1:n
             push!(xs, s)
         end
         xs
     end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
+    i = only(findall(isarrayalloc, result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
     @test has_return_escape(result.state[SSAValue(i)], r)
-    Base.JLOptions().check_bounds ≠ 0 && @test has_thrown_escape(result.state[SSAValue(i)])
+    @test !has_thrown_escape(result.state[SSAValue(i)])
     @test has_return_escape(result.state[Argument(3)], r) # s
-    Base.JLOptions().check_bounds ≠ 0 && @test has_thrown_escape(result.state[Argument(3)])    # s
+    @test !has_thrown_escape(result.state[Argument(3)])    # s
 end
-let result = code_escapes((Int,String,)) do n,s
+@test_skip let result = code_escapes((Int,String,)) do n,s
         xs = String[]
         for i in 1:n
             pushfirst!(xs, s)
         end
         xs
     end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
+    i = only(findall(isarrayalloc, result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
     @test has_return_escape(result.state[SSAValue(i)], r) # xs
-    @test has_thrown_escape(result.state[SSAValue(i)])    # xs
+    @test !has_thrown_escape(result.state[SSAValue(i)])    # xs
     @test has_return_escape(result.state[Argument(3)], r) # s
-    @test has_thrown_escape(result.state[Argument(3)])    # s
+    @test !has_thrown_escape(result.state[Argument(3)])    # s
 end
-let result = code_escapes((String,String,String)) do s, t, u
+@test_skip let result = code_escapes((String,String,String)) do s, t, u
         xs = String[]
         resize!(xs, 3)
         xs[1] = s
@@ -2023,8 +2076,8 @@ let result = code_escapes((String,String,String)) do s, t, u
         xs[1] = u
         xs
     end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
+    i = only(findall(isarrayalloc, result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
     @test has_return_escape(result.state[SSAValue(i)], r)
     @test has_thrown_escape(result.state[SSAValue(i)])    # xs
     @test has_return_escape(result.state[Argument(2)], r) # s
@@ -2032,129 +2085,6 @@ let result = code_escapes((String,String,String)) do s, t, u
     @test has_return_escape(result.state[Argument(4)], r) # u
 end
 
-@static if isdefined(Core, :ImmutableArray)
-
-import Core: ImmutableArray, arrayfreeze, mutating_arrayfreeze, arraythaw
-
-@testset "ImmutableArray" begin
-    # arrayfreeze
-    let result = code_escapes((Vector{Any},)) do xs
-            arrayfreeze(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Vector,)) do xs
-            arrayfreeze(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do xs
-            arrayfreeze(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((ImmutableArray{Any,1},)) do xs
-            arrayfreeze(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes() do
-            xs = Any[]
-            arrayfreeze(xs)
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test has_no_escape(result.state[SSAValue(1)])
-    end
-
-    # mutating_arrayfreeze
-    let result = code_escapes((Vector{Any},)) do xs
-            mutating_arrayfreeze(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Vector,)) do xs
-            mutating_arrayfreeze(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do xs
-            mutating_arrayfreeze(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((ImmutableArray{Any,1},)) do xs
-            mutating_arrayfreeze(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes() do
-            xs = Any[]
-            mutating_arrayfreeze(xs)
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test has_no_escape(result.state[SSAValue(1)])
-    end
-
-    # arraythaw
-    let result = code_escapes((ImmutableArray{Any,1},)) do xs
-            arraythaw(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((ImmutableArray,)) do xs
-            arraythaw(xs)
-        end
-        @test !has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Any,)) do xs
-            arraythaw(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes((Vector{Any},)) do xs
-            arraythaw(xs)
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
-    end
-    let result = code_escapes() do
-            xs = ImmutableArray(Any[])
-            arraythaw(xs)
-        end
-        i = only(findall(isarrayalloc, result.ir.stmts.inst))
-        @test has_no_escape(result.state[SSAValue(1)])
-    end
-end
-
-# demonstrate some arrayfreeze optimizations
-# !has_return_escape(ary) means ary is eligible for arrayfreeze to mutating_arrayfreeze optimization
-let result = code_escapes((Int,)) do n
-        xs = collect(1:n)
-        ImmutableArray(xs)
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)])
-end
-let result = code_escapes((Vector{Float64},)) do xs
-        ys = sin.(xs)
-        ImmutableArray(ys)
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)])
-end
-let result = code_escapes((Vector{Pair{Int,String}},)) do xs
-        n = maximum(first, xs)
-        ys = Vector{String}(undef, n)
-        for (i, s) in xs
-            ys[i] = s
-        end
-        ImmutableArray(xs)
-    end
-    i = only(findall(isarrayalloc, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)])
-end
-
-end # @static if isdefined(Core, :ImmutableArray)
-
 # demonstrate a simple type level analysis can sometimes improve the analysis accuracy
 # by compensating the lack of yet unimplemented analyses
 @testset "special-casing bitstype" begin
@@ -2166,11 +2096,11 @@ end # @static if isdefined(Core, :ImmutableArray)
 
     let result = code_escapes((Int,)) do a
             o = SafeRef(a)
-            f = o[]
-            return f
+            Core.donotdelete(o)
+            return o[]
         end
-        i = only(findall(isnew, result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isnew, result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test !has_return_escape(result.state[SSAValue(i)], r)
     end
 
@@ -2179,8 +2109,8 @@ end # @static if isdefined(Core, :ImmutableArray)
             t = tuple(a, b)
             return t
         end
-        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.inst))
-        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.stmt))
+        r = only(findall(isreturn, result.ir.stmts.stmt))
         @test !has_return_escape(result.state[Argument(2)], r)
         @test has_return_escape(result.state[Argument(3)], r)
     end
@@ -2204,3 +2134,177 @@ end
 #     end
 #     return m
 # end
+
+# interprocedural analysis
+# ========================
+
+# propagate escapes imposed on call arguments
+@noinline broadcast_noescape1(a) = (broadcast(identity, a); nothing)
+let result = code_escapes() do
+        broadcast_noescape1(Ref("Hi"))
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    @test !has_return_escape(result.state[SSAValue(i)])
+    @test_broken !has_thrown_escape(result.state[SSAValue(i)]) # TODO `getfield(RefValue{String}, :x)` isn't safe
+end
+@noinline broadcast_noescape2(b) = broadcast(identity, b)
+let result = code_escapes() do
+        broadcast_noescape2(Ref("Hi"))
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    @test_broken !has_return_escape(result.state[SSAValue(i)]) # TODO interprocedural alias analysis
+    @test_broken !has_thrown_escape(result.state[SSAValue(i)]) # TODO `getfield(RefValue{String}, :x)` isn't safe
+end
+@noinline allescape_argument(a) = (global GV = a) # obvious escape
+let result = code_escapes() do
+        allescape_argument(Ref("Hi"))
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    @test has_all_escape(result.state[SSAValue(i)])
+end
+# if we can't determine the matching method statically, we should be conservative
+let result = code_escapes((Ref{Any},)) do a
+        may_exist(a)
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+let result = code_escapes((Ref{Any},)) do a
+        Base.@invokelatest broadcast_noescape1(a)
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+
+# handling of simple union-split (just exploit the inliner's effort)
+@noinline unionsplit_noescape(a)      = string(nothing)
+@noinline unionsplit_noescape(a::Int) = a + 10
+let result = code_escapes((Union{Int,Nothing},)) do x
+        s = SafeRef{Union{Int,Nothing}}(x)
+        unionsplit_noescape(s[])
+        return nothing
+    end
+    inds = findall(isnew, result.ir.stmts.stmt) # find allocation statement
+    @assert !isempty(inds)
+    for i in inds
+        @test has_no_escape(result.state[SSAValue(i)])
+    end
+end
+
+@noinline unused_argument(a) = (println("prevent inlining"); nothing)
+let result = code_escapes() do
+        a = Ref("foo") # shouldn't be "return escape"
+        b = unused_argument(a)
+        nothing
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    @test has_no_escape(result.state[SSAValue(i)])
+
+    result = code_escapes() do
+        a = Ref("foo") # still should be "return escape"
+        b = unused_argument(a)
+        return a
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
+    @test has_return_escape(result.state[SSAValue(i)], r)
+end
+
+# should propagate escape information imposed on return value to the aliased call argument
+@noinline returnescape_argument(a) = (println("prevent inlining"); a)
+let result = code_escapes() do
+        obj = Ref("foo")           # should be "return escape"
+        ret = returnescape_argument(obj)
+        return ret                 # alias of `obj`
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
+    @test has_return_escape(result.state[SSAValue(i)], r)
+end
+@noinline noreturnescape_argument(a) = (println("prevent inlining"); identity("hi"))
+let result = code_escapes() do
+        obj = Ref("foo")              # better to not be "return escape"
+        ret = noreturnescape_argument(obj)
+        return ret                    # must not alias to `obj`
+    end
+    i = only(findall(isnew, result.ir.stmts.stmt))
+    @test has_no_escape(result.state[SSAValue(i)])
+end
+
+function with_self_aliased(from_bb::Int, succs::Vector{Int})
+    worklist = Int[from_bb]
+    visited = BitSet(from_bb)
+    function visit!(bb::Int)
+        if bb ∉ visited
+            push!(visited, bb)
+            push!(worklist, bb)
+        end
+    end
+    while !isempty(worklist)
+        foreach(visit!, succs)
+    end
+    return visited
+end
+@test code_escapes(with_self_aliased) isa EAUtils.EscapeResult
+
+# accounts for ThrownEscape via potential MethodError
+
+# no method error
+@noinline identity_if_string(x::SafeRef) = (println("preventing inlining"); nothing)
+let result = code_escapes((SafeRef{String},)) do x
+        identity_if_string(x)
+    end
+    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+end
+let result = code_escapes((Union{SafeRef{String},Nothing},)) do x
+        identity_if_string(x)
+    end
+    i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
+    @test has_thrown_escape(result.state[Argument(2)], i)
+    @test_broken !has_return_escape(result.state[Argument(2)], r)
+end
+let result = code_escapes((SafeRef{String},)) do x
+        try
+            identity_if_string(x)
+        catch err
+            global GV = err
+        end
+        return nothing
+    end
+    @test !has_all_escape(result.state[Argument(2)])
+end
+let result = code_escapes((Union{SafeRef{String},Vector{String}},)) do x
+        try
+            identity_if_string(x)
+        catch err
+            global GV = err
+        end
+        return nothing
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+# method ambiguity error
+@noinline ambig_error_test(a::SafeRef, b) = (println("preventing inlining"); nothing)
+@noinline ambig_error_test(a, b::SafeRef) = (println("preventing inlining"); nothing)
+@noinline ambig_error_test(a, b) = (println("preventing inlining"); nothing)
+let result = code_escapes((SafeRef{String},Any)) do x, y
+        ambig_error_test(x, y)
+    end
+    i = only(findall(iscall((result.ir, ambig_error_test)), result.ir.stmts.stmt))
+    r = only(findall(isreturn, result.ir.stmts.stmt))
+    @test has_thrown_escape(result.state[Argument(2)], i)  # x
+    @test has_thrown_escape(result.state[Argument(3)], i)  # y
+    @test_broken !has_return_escape(result.state[Argument(2)], r)  # x
+    @test_broken !has_return_escape(result.state[Argument(3)], r)  # y
+end
+let result = code_escapes((SafeRef{String},Any)) do x, y
+        try
+            ambig_error_test(x, y)
+        catch err
+            global GV = err
+        end
+    end
+    @test has_all_escape(result.state[Argument(2)])  # x
+    @test has_all_escape(result.state[Argument(3)])  # y
+end
+
+end # module test_EA
diff --git a/test/compiler/EscapeAnalysis/interprocedural.jl b/test/compiler/EscapeAnalysis/interprocedural.jl
deleted file mode 100644
index 756e5489ed637..0000000000000
--- a/test/compiler/EscapeAnalysis/interprocedural.jl
+++ /dev/null
@@ -1,262 +0,0 @@
-# IPO EA Test
-# ===========
-# EA works on pre-inlining IR
-
-include(normpath(@__DIR__, "setup.jl"))
-
-# callsites
-# ---------
-
-noescape(a) = nothing
-noescape(a, b) = nothing
-function global_escape!(x)
-    GR[] = x
-    return nothing
-end
-union_escape!(x) = global_escape!(x)
-union_escape!(x::SafeRef) = nothing
-union_escape!(x::SafeRefs) = nothing
-Base.@constprop :aggressive function conditional_escape!(cnd, x)
-    cnd && global_escape!(x)
-    return nothing
-end
-
-# MethodMatchInfo -- global cache
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return noescape(x)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        identity(x)
-        return nothing
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return identity(x)
-    end
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return Ref(x)
-    end
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        r = Ref{SafeRef{String}}()
-        r[] = x
-        return r
-    end
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        global_escape!(x)
-    end
-    @test has_all_escape(result.state[Argument(2)])
-end
-# UnionSplitInfo
-let result = code_escapes((Bool,Vector{Any}); optimize=false) do c, s
-        x = c ? s : SafeRef(s)
-        union_escape!(x)
-    end
-    @test has_all_escape(result.state[Argument(3)]) # s
-end
-let result = code_escapes((Bool,Vector{Any}); optimize=false) do c, s
-        x = c ? SafeRef(s) : SafeRefs(s, s)
-        union_escape!(x)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-# ConstCallInfo -- local cache
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return conditional_escape!(false, x)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-# InvokeCallInfo
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return @invoke noescape(x::Any)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        return @invoke conditional_escape!(false::Any, x::Any)
-    end
-    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
-end
-
-# MethodError
-# -----------
-# accounts for ThrownEscape via potential MethodError
-
-# no method error
-identity_if_string(x::SafeRef) = nothing
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        identity_if_string(x)
-    end
-    i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test !has_thrown_escape(result.state[Argument(2)], i)
-    @test !has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((Union{SafeRef{String},Vector{String}},); optimize=false) do x
-        identity_if_string(x)
-    end
-    i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_thrown_escape(result.state[Argument(2)], i)
-    @test !has_return_escape(result.state[Argument(2)], r)
-end
-let result = code_escapes((SafeRef{String},); optimize=false) do x
-        try
-            identity_if_string(x)
-        catch err
-            global GV = err
-        end
-        return nothing
-    end
-    @test !has_all_escape(result.state[Argument(2)])
-end
-let result = code_escapes((Union{SafeRef{String},Vector{String}},); optimize=false) do x
-        try
-            identity_if_string(x)
-        catch err
-            global GV = err
-        end
-        return nothing
-    end
-    @test has_all_escape(result.state[Argument(2)])
-end
-# method ambiguity error
-ambig_error_test(a::SafeRef, b) = nothing
-ambig_error_test(a, b::SafeRef) = nothing
-ambig_error_test(a, b) = nothing
-let result = code_escapes((SafeRef{String},Any); optimize=false) do x, y
-        ambig_error_test(x, y)
-    end
-    i = only(findall(iscall((result.ir, ambig_error_test)), result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_thrown_escape(result.state[Argument(2)], i)  # x
-    @test has_thrown_escape(result.state[Argument(3)], i)  # y
-    @test !has_return_escape(result.state[Argument(2)], r)  # x
-    @test !has_return_escape(result.state[Argument(3)], r)  # y
-end
-let result = code_escapes((SafeRef{String},Any); optimize=false) do x, y
-        try
-            ambig_error_test(x, y)
-        catch err
-            global GV = err
-        end
-    end
-    @test has_all_escape(result.state[Argument(2)])  # x
-    @test has_all_escape(result.state[Argument(3)])  # y
-end
-
-# Local EA integration
-# --------------------
-
-# propagate escapes imposed on call arguments
-
-# FIXME handle _apply_iterate
-# FIXME currently we can't prove the effect-freeness of `getfield(RefValue{String}, :x)`
-# because of this check https://github.com/JuliaLang/julia/blob/94b9d66b10e8e3ebdb268e4be5f7e1f43079ad4e/base/compiler/tfuncs.jl#L745
-# and thus it leads to the following two broken tests
-
-@noinline broadcast_noescape1(a) = (broadcast(identity, a); nothing)
-let result = code_escapes() do
-        broadcast_noescape1(Ref("Hi"))
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    @test_broken !has_return_escape(result.state[SSAValue(i)])
-    @test_broken !has_thrown_escape(result.state[SSAValue(i)])
-end
-@noinline broadcast_noescape2(b) = broadcast(identity, b)
-let result = code_escapes() do
-        broadcast_noescape2(Ref("Hi"))
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    @test_broken !has_return_escape(result.state[SSAValue(i)])
-    @test_broken !has_thrown_escape(result.state[SSAValue(i)])
-end
-@noinline allescape_argument(a) = (global GV = a) # obvious escape
-let result = code_escapes() do
-        allescape_argument(Ref("Hi"))
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    @test has_all_escape(result.state[SSAValue(i)])
-end
-# if we can't determine the matching method statically, we should be conservative
-let result = code_escapes((Ref{Any},)) do a
-        may_exist(a)
-    end
-    @test has_all_escape(result.state[Argument(2)])
-end
-let result = code_escapes((Ref{Any},)) do a
-        Base.@invokelatest broadcast_noescape1(a)
-    end
-    @test has_all_escape(result.state[Argument(2)])
-end
-
-# handling of simple union-split (just exploit the inliner's effort)
-@noinline unionsplit_noescape(a)      = string(nothing)
-@noinline unionsplit_noescape(a::Int) = a + 10
-let result = code_escapes((Union{Int,Nothing},)) do x
-        s = SafeRef{Union{Int,Nothing}}(x)
-        unionsplit_noescape(s[])
-        return nothing
-    end
-    inds = findall(isnew, result.ir.stmts.inst) # find allocation statement
-    @assert !isempty(inds)
-    for i in inds
-        @test has_no_escape(result.state[SSAValue(i)])
-    end
-end
-
-@noinline function unused_argument(a)
-    println("prevent inlining")
-    return Base.inferencebarrier(nothing)
-end
-let result = code_escapes() do
-        a = Ref("foo") # shouldn't be "return escape"
-        b = unused_argument(a)
-        nothing
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)], r)
-
-    result = code_escapes() do
-        a = Ref("foo") # still should be "return escape"
-        b = unused_argument(a)
-        return a
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[SSAValue(i)], r)
-end
-
-# should propagate escape information imposed on return value to the aliased call argument
-@noinline returnescape_argument(a) = (println("prevent inlining"); a)
-let result = code_escapes() do
-        obj = Ref("foo")           # should be "return escape"
-        ret = returnescape_argument(obj)
-        return ret                 # alias of `obj`
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test has_return_escape(result.state[SSAValue(i)], r)
-end
-@noinline noreturnescape_argument(a) = (println("prevent inlining"); identity("hi"))
-let result = code_escapes() do
-        obj = Ref("foo")              # better to not be "return escape"
-        ret = noreturnescape_argument(obj)
-        return ret                    # must not alias to `obj`
-    end
-    i = only(findall(isnew, result.ir.stmts.inst))
-    r = only(findall(isreturn, result.ir.stmts.inst))
-    @test !has_return_escape(result.state[SSAValue(i)], r)
-end
diff --git a/test/compiler/EscapeAnalysis/setup.jl b/test/compiler/EscapeAnalysis/setup.jl
deleted file mode 100644
index 4e7d6fb5159aa..0000000000000
--- a/test/compiler/EscapeAnalysis/setup.jl
+++ /dev/null
@@ -1,72 +0,0 @@
-include(normpath(@__DIR__, "EAUtils.jl"))
-using Test, Core.Compiler.EscapeAnalysis, .EAUtils
-import Core: Argument, SSAValue, ReturnNode
-const EA = Core.Compiler.EscapeAnalysis
-import .EA: ignore_argescape
-
-isT(T) = (@nospecialize x) -> x === T
-isreturn(@nospecialize x) = isa(x, Core.ReturnNode) && isdefined(x, :val)
-isthrow(@nospecialize x) = Meta.isexpr(x, :call) && Core.Compiler.is_throw_call(x)
-isnew(@nospecialize x) = Meta.isexpr(x, :new)
-isϕ(@nospecialize x) = isa(x, Core.PhiNode)
-function with_normalized_name(@nospecialize(f), @nospecialize(x))
-    if Meta.isexpr(x, :foreigncall)
-        name = x.args[1]
-        nn = EA.normalize(name)
-        return isa(nn, Symbol) && f(nn)
-    end
-    return false
-end
-isarrayalloc(@nospecialize x) = with_normalized_name(nn->!isnothing(Core.Compiler.alloc_array_ndims(nn)), x)
-isarrayresize(@nospecialize x) = with_normalized_name(nn->!isnothing(EA.array_resize_info(nn)), x)
-isarraycopy(@nospecialize x) = with_normalized_name(nn->EA.is_array_copy(nn), x)
-import Core.Compiler: argextype, singleton_type
-iscall(y) = @nospecialize(x) -> iscall(y, x)
-function iscall((ir, f), @nospecialize(x))
-    return iscall(x) do @nospecialize x
-        singleton_type(Core.Compiler.argextype(x, ir, Any[])) === f
-    end
-end
-iscall(pred::Function, @nospecialize(x)) = Meta.isexpr(x, :call) && pred(x.args[1])
-
-# check if `x` is a statically-resolved call of a function whose name is `sym`
-isinvoke(y) = @nospecialize(x) -> isinvoke(y, x)
-isinvoke(sym::Symbol, @nospecialize(x)) = isinvoke(mi->mi.def.name===sym, x)
-isinvoke(pred::Function, @nospecialize(x)) = Meta.isexpr(x, :invoke) && pred(x.args[1]::Core.MethodInstance)
-
-"""
-    is_load_forwardable(x::EscapeInfo) -> Bool
-
-Queries if `x` is elibigle for store-to-load forwarding optimization.
-"""
-function is_load_forwardable(x::EA.EscapeInfo)
-    AliasInfo = x.AliasInfo
-    # NOTE technically we also need to check `!has_thrown_escape(x)` here as well,
-    # but we can also do equivalent check during forwarding
-    return isa(AliasInfo, EA.IndexableFields) || isa(AliasInfo, EA.IndexableElements)
-end
-
-let setup_ex = quote
-        mutable struct SafeRef{T}
-            x::T
-        end
-        Base.getindex(s::SafeRef) = getfield(s, 1)
-        Base.setindex!(s::SafeRef, x) = setfield!(s, 1, x)
-
-        mutable struct SafeRefs{S,T}
-            x1::S
-            x2::T
-        end
-        Base.getindex(s::SafeRefs, idx::Int) = getfield(s, idx)
-        Base.setindex!(s::SafeRefs, x, idx::Int) = setfield!(s, idx, x)
-
-        global GV::Any
-        const global GR = Ref{Any}()
-    end
-    global function EATModule(setup_ex = setup_ex)
-        M = Module()
-        Core.eval(M, setup_ex)
-        return M
-    end
-    Core.eval(@__MODULE__, setup_ex)
-end
diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl
index 97ecda14efde0..94396bf7a7a49 100644
--- a/test/compiler/codegen.jl
+++ b/test/compiler/codegen.jl
@@ -10,13 +10,26 @@ const opt_level = Base.JLOptions().opt_level
 const coverage = (Base.JLOptions().code_coverage > 0) || (Base.JLOptions().malloc_log > 0)
 const Iptr = sizeof(Int) == 8 ? "i64" : "i32"
 
-# `_dump_function` might be more efficient but it doesn't really matter here...
-get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true) =
-    sprint(code_llvm, f, t, raw, dump_module, optimize)
+const is_debug_build = Base.isdebugbuild()
+function libjulia_codegen_name()
+    is_debug_build ? "libjulia-codegen-debug" : "libjulia-codegen"
+end
 
-if opt_level > 0
+# The tests below assume a certain format and safepoint_on_entry=true breaks that.
+function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true)
+    params = Base.CodegenParams(safepoint_on_entry=false, gcstack_arg = false, debug_info_level=Cint(2))
+    d = InteractiveUtils._dump_function(f, t, false, false, raw, dump_module, :att, optimize, :none, false, params)
+    sprint(print, d)
+end
+
+# Some tests assume calls should be stripped out,
+# so strip out the calls to debug intrinsics that
+# are not actually materialized as call instructions.
+strip_debug_calls(ir) = replace(ir, r"call void @llvm\.dbg\.declare.*\n" => "", r"call void @llvm\.dbg\.value.*\n" => "")
+
+if !is_debug_build && opt_level > 0
     # Make sure getptls call is removed at IR level with optimization on
-    @test !occursin(" call ", get_llvm(identity, Tuple{String}))
+    @test !occursin(" call ", strip_debug_calls(get_llvm(identity, Tuple{String})))
 end
 
 jl_string_ptr(s::String) = ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s)
@@ -104,24 +117,29 @@ function test_jl_dump_llvm_opt()
     end
 end
 
-if opt_level > 0
+if !is_debug_build && opt_level > 0
     # Make sure `jl_string_ptr` is inlined
-    @test !occursin(" call ", get_llvm(jl_string_ptr, Tuple{String}))
+    @test !occursin(" call ", strip_debug_calls(get_llvm(jl_string_ptr, Tuple{String})))
     # Make sure `Core.sizeof` call is inlined
     s = "aaa"
     @test jl_string_ptr(s) == pointer_from_objref(s) + sizeof(Int)
     # String
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{String}), [Iptr])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{String})), [Iptr])
     # String
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Core.SimpleVector}), [Iptr])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Core.SimpleVector})), [Iptr])
     # Array
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Vector{Int}}), [Iptr])
+    test_loads_no_call(strip_debug_calls(get_llvm(sizeof, Tuple{Vector{Int}})), [Iptr])
+    # As long as the eltype is known we don't need to load the elsize, but do need to check isvector
+    @test_skip test_loads_no_call(strip_debug_calls(get_llvm(sizeof, Tuple{Array{Any}})), ["atomic $Iptr", "{} addrspace(10)* addrspace(10)*", "$Iptr addrspace(10)*", Iptr, Iptr, "{ i64, {} addrspace(10)** } addrspace(10)*",  Iptr])
+    # Memory
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory{Int}})), [Iptr])
     # As long as the eltype is known we don't need to load the elsize
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Array{Any}}), [Iptr])
-    # Check that we load the elsize
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Vector}), [Iptr, "i16"])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory{Any}})), [Iptr])
+    # Check that we load the elsize and isunion from the typeof layout
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic $Iptr", "i32*", "i32", "i16"])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Memory})), [Iptr, "atomic $Iptr", "i32*", "i32", "i16"])
     # Primitive Type size should be folded to a constant
-    test_loads_no_call(get_llvm(core_sizeof, Tuple{Ptr}), String[])
+    test_loads_no_call(strip_debug_calls(get_llvm(core_sizeof, Tuple{Ptr})), String[])
 
     test_jl_dump_compiles()
     test_jl_dump_compiles_toplevel_thunks()
@@ -245,7 +263,7 @@ if opt_level > 0
     load_dummy_ref_ir = get_llvm(load_dummy_ref, Tuple{Int})
     @test !occursin("jl_gc_pool_alloc", load_dummy_ref_ir)
     # Hopefully this is reliable enough. LLVM should be able to optimize this to a direct return.
-    @test occursin("ret $Iptr %0", load_dummy_ref_ir)
+    @test occursin("ret $Iptr %\"x::$(Int)\"", load_dummy_ref_ir)
 end
 
 # Issue 22770
@@ -296,8 +314,8 @@ end
 
 # PR #23595
 @generated f23595(g, args...) = Expr(:call, :g, Expr(:(...), :args))
-x23595 = rand(1)
-@test f23595(Core.arrayref, true, x23595, 1) == x23595[]
+x23595 = rand(1).ref
+@test f23595(Core.memoryrefget, x23595, :not_atomic, true) == x23595[]
 
 # Issue #22421
 @noinline f22421_1(x) = x[] + 1
@@ -354,26 +372,10 @@ mktemp() do f_22330, _
 end
 
 # Alias scope
-macro aliasscope(body)
-    sym = gensym()
-    esc(quote
-        $(Expr(:aliasscope))
-        $sym = $body
-        $(Expr(:popaliasscope))
-        $sym
-    end)
-end
-
-struct ConstAliasScope{T<:Array}
-    a::T
-end
-
-@eval Base.getindex(A::ConstAliasScope, i1::Int) = Core.const_arrayref($(Expr(:boundscheck)), A.a, i1)
-@eval Base.getindex(A::ConstAliasScope, i1::Int, i2::Int, I::Int...) =  (@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
-
+using Base.Experimental: @aliasscope, Const
 function foo31018!(a, b)
     @aliasscope for i in eachindex(a, b)
-        a[i] = ConstAliasScope(b)[i]
+        a[i] = Const(b)[i]
     end
 end
 io = IOBuffer()
@@ -485,12 +487,16 @@ function f37262(x)
     catch
         GC.safepoint()
     end
+    local a
     try
         GC.gc()
-        return g37262(x)
+        a = g37262(x)
+        Base.inferencebarrier(false) && error()
+        return a
     catch ex
         GC.gc()
     finally
+        @isdefined(a) && Base.donotdelete(a)
         GC.gc()
     end
 end
@@ -556,6 +562,7 @@ end
     function f1(cond)
         val = [1]
         GC.@preserve val begin end
+        return cond
     end
     @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f1, Tuple{Bool}, true, false, false))
 
@@ -563,19 +570,22 @@ end
     function f3(cond)
         val = ([1],)
         GC.@preserve val begin end
+        return cond
     end
     @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f3, Tuple{Bool}, true, false, false))
 
-    # unions of immutables (JuliaLang/julia#39501)
+    # PhiNode of unions of immutables (JuliaLang/julia#39501)
     function f2(cond)
-        val = cond ? 1 : 1f0
+        val = cond ? 1 : ""
         GC.@preserve val begin end
+        return cond
     end
-    @test !occursin("llvm.julia.gc_preserve_begin", get_llvm(f2, Tuple{Bool}, true, false, false))
+    @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f2, Tuple{Bool}, true, false, false))
     # make sure the fix for the above doesn't regress #34241
     function f4(cond)
         val = cond ? ([1],) : ([1f0],)
         GC.@preserve val begin end
+        return cond
     end
     @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f4, Tuple{Bool}, true, false, false))
 end
@@ -641,7 +651,7 @@ end
 
 # issue #41157
 f41157(a, b) = a[1] = b[1]
-@test_throws BoundsError f41157(Tuple{Int}[], Tuple{Union{}}[])
+@test_throws BoundsError f41157(Tuple{Int}[], (NTuple{N,Union{}} where N)[])
 
 # issue #41096
 struct Modulate41096{M<:Union{Function, Val{true}, Val{false}}, id}
@@ -675,17 +685,31 @@ U41096 = Term41096{:U}(Modulate41096(:U, false))
 
 @test !newexpand41096((t=t41096, μ=μ41096, U=U41096), :U)
 
+
 # test that we can start julia with libjulia-codegen removed; PR #41936
 mktempdir() do pfx
     cp(dirname(Sys.BINDIR), pfx; force=true)
-    libpath = relpath(dirname(dlpath("libjulia-codegen")), dirname(Sys.BINDIR))
+    libpath = relpath(dirname(dlpath(libjulia_codegen_name())), dirname(Sys.BINDIR))
     libs_deleted = 0
-    for f in filter(f -> startswith(f, "libjulia-codegen"), readdir(joinpath(pfx, libpath)))
+    libfiles = filter(f -> startswith(f, "libjulia-codegen"), readdir(joinpath(pfx, libpath)))
+    for f in libfiles
         rm(joinpath(pfx, libpath, f); force=true, recursive=true)
         libs_deleted += 1
     end
     @test libs_deleted > 0
     @test readchomp(`$pfx/bin/$(Base.julia_exename()) -e 'print("no codegen!\n")'`) == "no codegen!"
+
+    # PR #47343
+    libs_emptied = 0
+    for f in libfiles
+        touch(joinpath(pfx, libpath, f))
+        libs_emptied += 1
+    end
+
+    errfile = joinpath(pfx, "stderr.txt")
+    @test libs_emptied > 0
+    @test_throws ProcessFailedException run(pipeline(`$pfx/bin/$(Base.julia_exename()) -e 'print("This should fail!\n")'`; stderr=errfile))
+    @test contains(readline(errfile), "ERROR: Unable to load dependent library")
 end
 
 # issue #42645
@@ -713,9 +737,9 @@ struct A44921{T}
     x::T
 end
 function f44921(a)
-    if a == :x
+    if a === :x
         A44921(_f) # _f purposefully undefined
-    elseif a == :p
+    elseif a === :p
         g44921(a)
     end
 end
@@ -741,3 +765,111 @@ f_donotdelete_input(x) = Base.donotdelete(x+1)
 f_donotdelete_const() = Base.donotdelete(1+1)
 @test occursin("call void (...) @jl_f_donotdelete(i64", get_llvm(f_donotdelete_input, Tuple{Int64}, true, false, false))
 @test occursin("call void (...) @jl_f_donotdelete()", get_llvm(f_donotdelete_const, Tuple{}, true, false, false))
+
+# Test 45476 fixes
+struct MaybeTuple45476
+    val::Union{Nothing, Tuple{Float32}}
+end
+
+@test MaybeTuple45476((0,)).val[1] == 0f0
+
+# Test int paths for getfield/isdefined
+f_getfield_nospecialize(@nospecialize(x)) = getfield(x, 1)
+f_isdefined_nospecialize(@nospecialize(x)) = isdefined(x, 1)
+
+@test !occursin("jl_box_int", get_llvm(f_getfield_nospecialize, Tuple{Any}, true, false, false))
+@test !occursin("jl_box_int", get_llvm(f_isdefined_nospecialize, Tuple{Any}, true, false, false))
+
+# Test codegen for isa(::Any, Type)
+f_isa_type(@nospecialize(x)) = isa(x, Type)
+@test !occursin("jl_isa", get_llvm(f_isa_type, Tuple{Any}, true, false, false))
+
+# Issue #47247
+f47247(a::Ref{Int}, b::Nothing) = setfield!(a, :x, b)
+@test_throws TypeError f47247(Ref(5), nothing)
+
+f48085(@nospecialize x...) = length(x)
+@test Core.Compiler.get_compileable_sig(which(f48085, (Vararg{Any},)), Tuple{typeof(f48085), Vararg{Int}}, Core.svec()) === nothing
+@test Core.Compiler.get_compileable_sig(which(f48085, (Vararg{Any},)), Tuple{typeof(f48085), Int, Vararg{Int}}, Core.svec()) === Tuple{typeof(f48085), Any, Vararg{Any}}
+
+# Make sure that the bounds check is elided in tuple iteration
+@test !occursin("call void @", strip_debug_calls(get_llvm(iterate, Tuple{NTuple{4, Float64}, Int})))
+
+# issue #34459
+function f34459(args...)
+    Base.pointerset(args[1], 1, 1, 1)
+    return
+end
+@test !occursin("jl_f_tuple", get_llvm(f34459, Tuple{Ptr{Int}, Type{Int}}, true, false, false))
+
+# issue #48394: incorrectly-inferred getproperty shouldn't introduce invalid cgval_t
+#               when dealing with unions of ghost values
+struct X48394
+    x::Nothing
+    y::Bool
+end
+struct Y48394
+    x::Nothing
+    z::Missing
+end
+function F48394(a, b, i)
+    c = i ? a : b
+    c.y
+end
+@test F48394(X48394(nothing,true), Y48394(nothing, missing), true)
+@test occursin("llvm.trap", get_llvm(F48394, Tuple{X48394, Y48394, Bool}))
+
+# issue 48917, hoisting load to above the parent
+f48917(x, w) = (y = (a=1, b=x); z = (; a=(a=(1, w), b=(3, y))))
+@test f48917(1,2) == (a = (a = (1, 2), b = (3, (a = 1, b = 1))),)
+
+# https://github.com/JuliaLang/julia/issues/50317 getproperty allocation on struct with 1 field
+struct Wrapper50317
+    lock::ReentrantLock
+end
+const MONITOR50317 = Wrapper50317(ReentrantLock())
+issue50317() = @noinline MONITOR50317.lock
+issue50317()
+let res = @timed issue50317()
+    @test res.bytes == 0
+    return res # must return otherwise the compiler may eliminate the result entirely
+end
+struct Wrapper50317_2
+    lock::ReentrantLock
+    fun::Vector{Int}
+end
+const MONITOR50317_2 = Wrapper50317_2(ReentrantLock(),[1])
+issue50317_2() = @noinline MONITOR50317.lock
+issue50317_2()
+let res = @timed issue50317_2()
+    @test res.bytes == 0
+    return res
+end
+const a50317 = (b=3,)
+let res = @timed a50317[:b]
+    @test res.bytes == 0
+    return res
+end
+
+# https://github.com/JuliaLang/julia/issues/50964
+@noinline bar50964(x::Core.Const) = Base.inferencebarrier(1)
+@noinline bar50964(x::DataType) = Base.inferencebarrier(2)
+foo50964(x) = bar50964(Base.inferencebarrier(Core.Const(x)))
+foo50964(1) # Shouldn't assert!
+
+# https://github.com/JuliaLang/julia/issues/51233
+obj51233 = (1,)
+@test_throws ErrorException obj51233.x
+
+# Very specific test for multiversioning
+if Sys.ARCH === :x86_64
+    foo52079() = Core.Intrinsics.have_fma(Float64)
+    if foo52079() == true
+        let io = IOBuffer()
+            code_native(io,^,(Float64,Float64), dump_module=false)
+            str = String(take!(io))
+            @test !occursin("fma_emulated", str)
+            @test occursin("vfmadd", str)
+        end
+    end
+end
diff --git a/test/compiler/contextual.jl b/test/compiler/contextual.jl
index e89b56e4bf6de..bbcf7b0dfb959 100644
--- a/test/compiler/contextual.jl
+++ b/test/compiler/contextual.jl
@@ -1,12 +1,15 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# Cassette
+# ========
+
 module MiniCassette
     # A minimal demonstration of the cassette mechanism. Doesn't support all the
     # fancy features, but sufficient to exercise this code path in the compiler.
 
-    using Core.Compiler: method_instances, retrieve_code_info, CodeInfo,
-        MethodInstance, SSAValue, GotoNode, GotoIfNot, ReturnNode, Slot, SlotNumber, quoted,
-        signature_type
+    using Core.Compiler: retrieve_code_info, CodeInfo,
+        MethodInstance, SSAValue, GotoNode, GotoIfNot, ReturnNode, SlotNumber, quoted,
+        signature_type, anymap
     using Base: _methods_by_ftype
     using Base.Meta: isexpr
     using Test
@@ -16,10 +19,11 @@ module MiniCassette
     struct Ctx; end
 
     # A no-op cassette-like transform
-    function transform_expr(expr, map_slot_number, map_ssa_value, sparams)
-        transform(expr) = transform_expr(expr, map_slot_number, map_ssa_value, sparams)
+    function transform_expr(expr, map_slot_number, map_ssa_value, sparams::Core.SimpleVector)
+        @nospecialize expr
+        transform(@nospecialize expr) = transform_expr(expr, map_slot_number, map_ssa_value, sparams)
         if isexpr(expr, :call)
-            return Expr(:call, overdub, SlotNumber(2), map(transform, expr.args)...)
+            return Expr(:call, overdub, SlotNumber(2), anymap(transform, expr.args)...)
         elseif isa(expr, GotoIfNot)
             return GotoIfNot(transform(expr.cond), map_ssa_value(SSAValue(expr.dest)).id)
         elseif isexpr(expr, :static_parameter)
@@ -27,10 +31,10 @@ module MiniCassette
         elseif isa(expr, ReturnNode)
             return ReturnNode(transform(expr.val))
         elseif isa(expr, Expr)
-            return Expr(expr.head, map(transform, expr.args)...)
+            return Expr(expr.head, anymap(transform, expr.args)...)
         elseif isa(expr, GotoNode)
             return GotoNode(map_ssa_value(SSAValue(expr.label)).id)
-        elseif isa(expr, Slot)
+        elseif isa(expr, SlotNumber)
             return map_slot_number(expr.id)
         elseif isa(expr, SSAValue)
             return map_ssa_value(expr)
@@ -39,16 +43,16 @@ module MiniCassette
         end
     end
 
-    function transform!(ci, nargs, sparams)
+    function transform!(ci::CodeInfo, nargs::Int, sparams::Core.SimpleVector)
         code = ci.code
         ci.slotnames = Symbol[Symbol("#self#"), :ctx, :f, :args, ci.slotnames[nargs+1:end]...]
         ci.slotflags = UInt8[(0x00 for i = 1:4)..., ci.slotflags[nargs+1:end]...]
         # Insert one SSAValue for every argument statement
-        prepend!(code, [Expr(:call, getfield, SlotNumber(4), i) for i = 1:nargs])
-        prepend!(ci.codelocs, [0 for i = 1:nargs])
-        prepend!(ci.ssaflags, [0x00 for i = 1:nargs])
+        prepend!(code, Any[Expr(:call, getfield, SlotNumber(4), i) for i = 1:nargs])
+        prepend!(ci.codelocs, fill(0, nargs))
+        prepend!(ci.ssaflags, fill(0x00, nargs))
         ci.ssavaluetypes += nargs
-        function map_slot_number(slot)
+        function map_slot_number(slot::Int)
             if slot == 1
                 # self in the original function is now `f`
                 return SlotNumber(3)
@@ -66,24 +70,28 @@ module MiniCassette
         end
     end
 
-    function overdub_generator(self, c, f, args)
-        if !isdefined(f, :instance)
-            return :(return f(args...))
+    function overdub_generator(world::UInt, source, self, c, f, args)
+        @nospecialize
+        if !Base.issingletontype(f)
+            # (c, f, args..) -> f(args...)
+            code_info = :(return f(args...))
+            return Core.GeneratedFunctionStub(identity, Core.svec(:overdub, :c, :f, :args), Core.svec())(world, source, code_info)
         end
 
         tt = Tuple{f, args...}
-        match = Base._which(tt, typemax(UInt))
+        match = Base._which(tt; world)
         mi = Core.Compiler.specialize_method(match)
         # Unsupported in this mini-cassette
         @assert !mi.def.isva
-        code_info = retrieve_code_info(mi)
+        code_info = retrieve_code_info(mi, world)
         @assert isa(code_info, CodeInfo)
         code_info = copy(code_info)
-        if isdefined(code_info, :edges)
-            code_info.edges = MethodInstance[mi]
-        end
+        @assert code_info.edges === nothing
+        code_info.edges = MethodInstance[mi]
         transform!(code_info, length(args), match.sparams)
-        code_info
+        # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
+        # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
+        return code_info
     end
 
     @inline function overdub(c::Ctx, f::Union{Core.Builtin, Core.IntrinsicFunction}, args...)
@@ -92,16 +100,7 @@ module MiniCassette
 
     @eval function overdub(c::Ctx, f, args...)
         $(Expr(:meta, :generated_only))
-        $(Expr(:meta,
-                :generated,
-                Expr(:new,
-                    Core.GeneratedFunctionStub,
-                    :overdub_generator,
-                    Any[:overdub, :ctx, :f, :args],
-                    Any[],
-                    @__LINE__,
-                    QuoteNode(Symbol(@__FILE__)),
-                    true)))
+        $(Expr(:meta, :generated, overdub_generator))
     end
 end
 
@@ -116,30 +115,13 @@ f() = 2
 # Test that MiniCassette is at least somewhat capable by overdubbing gcd
 @test overdub(Ctx(), gcd, 10, 20) === gcd(10, 20)
 
-# Test that pure propagates for Cassette
-Base.@pure isbitstype(T) = Base.isbitstype(T)
-f31012(T) = Val(isbitstype(T))
-@test @inferred(overdub(Ctx(), f31012, Int64)) == Val(true)
-
 @generated bar(::Val{align}) where {align} = :(42)
 foo(i) = i+bar(Val(1))
 
 @test @inferred(overdub(Ctx(), foo, 1)) == 43
 
-# Check that misbehaving pure functions propagate their error
-Base.@pure func1() = 42
-Base.@pure func2() = (this_is_an_exception; func1())
-
-let method = which(func2, ())
-    mi = Core.Compiler.specialize_method(method, Tuple{typeof(func2)}, Core.svec())
-    mi.inInference = true
-end
-func3() = func2()
-@test_throws UndefVarError func3()
-
-
-
-## overlay method tables
+# overlay method tables
+# =====================
 
 module OverlayModule
 
@@ -157,7 +139,7 @@ end
 # parametric function def
 @overlay mt tan(x::T) where {T} = 3
 
-end
+end # module OverlayModule
 
 methods = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, nothing, 1, Base.get_world_counter())
 @test only(methods).method.module === Base.Math
@@ -210,8 +192,31 @@ try
      Baz = Base.require(Main, :Baz)
      @test length(Bar.mt) == 1
 finally
-    rm(load_path, recursive=true, force=true)
-    rm(depot_path, recursive=true, force=true)
     filter!((≠)(load_path), LOAD_PATH)
     filter!((≠)(depot_path), DEPOT_PATH)
+    rm(load_path, recursive=true, force=true)
+    try
+        rm(depot_path, force=true, recursive=true)
+    catch err
+        @show err
+    end
 end
+
+# Test that writing a bad cassette-style pass gives the expected error (#49715)
+function generator49715(world, source, self, f, tt)
+    tt = tt.parameters[1]
+    sig = Tuple{f, tt.parameters...}
+    mi = Base._which(sig; world)
+
+    error("oh no")
+
+    stub = Core.GeneratedFunctionStub(identity, Core.svec(:methodinstance, :ctx, :x, :f), Core.svec())
+    stub(world, source, :(nothing))
+end
+
+@eval function doit49715(f, tt)
+  $(Expr(:meta, :generated, generator49715))
+  $(Expr(:meta, :generated_only))
+end
+
+@test_throws "oh no" doit49715(sin, Tuple{Int})
diff --git a/test/compiler/datastructures.jl b/test/compiler/datastructures.jl
new file mode 100644
index 0000000000000..f3f862c49ea77
--- /dev/null
+++ b/test/compiler/datastructures.jl
@@ -0,0 +1,113 @@
+using Test
+
+@testset "CachedMethodTable" begin
+    # cache result should be separated per `limit` and `sig`
+    # https://github.com/JuliaLang/julia/pull/46799
+    interp = Core.Compiler.NativeInterpreter()
+    table = Core.Compiler.method_table(interp)
+    sig = Tuple{typeof(*), Any, Any}
+    result1 = Core.Compiler.findall(sig, table; limit=-1)
+    result2 = Core.Compiler.findall(sig, table; limit=Core.Compiler.InferenceParams().max_methods)
+    @test result1 !== nothing && !Core.Compiler.isempty(result1)
+    @test result2 === nothing
+end
+
+@testset "BitSetBoundedMinPrioritySet" begin
+    bsbmp = Core.Compiler.BitSetBoundedMinPrioritySet(5)
+    Core.Compiler.push!(bsbmp, 2)
+    Core.Compiler.push!(bsbmp, 2)
+    iterateok = true
+    cnt = 0
+    @eval Core.Compiler for v in $bsbmp
+        if cnt == 0
+            iterateok &= v == 2
+        elseif cnt == 1
+            iterateok &= v == 5
+        else
+            iterateok = false
+        end
+        cnt += 1
+    end
+    @test iterateok
+    @test Core.Compiler.popfirst!(bsbmp) == 2
+    Core.Compiler.push!(bsbmp, 1)
+    @test Core.Compiler.popfirst!(bsbmp) == 1
+    @test Core.Compiler.isempty(bsbmp)
+end
+
+@testset "basic heap functionality" begin
+    v = [2,3,1]
+    @test Core.Compiler.heapify!(v, Core.Compiler.Forward) === v
+    @test Core.Compiler.heappop!(v, Core.Compiler.Forward) === 1
+    @test Core.Compiler.heappush!(v, 4, Core.Compiler.Forward) === v
+    @test Core.Compiler.heappop!(v, Core.Compiler.Forward) === 2
+    @test Core.Compiler.heappop!(v, Core.Compiler.Forward) === 3
+    @test Core.Compiler.heappop!(v, Core.Compiler.Forward) === 4
+end
+
+@testset "randomized heap correctness tests" begin
+    order = Core.Compiler.By(x -> -x[2])
+    for i in 1:6
+        heap = Tuple{Int, Int}[(rand(1:i), rand(1:i)) for _ in 1:2i]
+        mock = copy(heap)
+        @test Core.Compiler.heapify!(heap, order) === heap
+        sort!(mock, by=last)
+
+        for _ in 1:6i
+            if rand() < .5 && !isempty(heap)
+                # The first entries may differ because heaps are not stable
+                @test last(Core.Compiler.heappop!(heap, order)) === last(pop!(mock))
+            else
+                new = (rand(1:i), rand(1:i))
+                Core.Compiler.heappush!(heap, new, order)
+                push!(mock, new)
+                sort!(mock, by=last)
+            end
+        end
+    end
+end
+
+@testset "searchsorted" begin
+    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 0) === Core.Compiler.UnitRange(1, 0)
+    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 1) === Core.Compiler.UnitRange(1, 2)
+    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2) === Core.Compiler.UnitRange(3, 4)
+    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 4) === Core.Compiler.UnitRange(7, 6)
+    @test Core.Compiler.searchsorted([1, 1, 2, 2, 3, 3], 2.5; lt=<) === Core.Compiler.UnitRange(5, 4)
+
+    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 0) === Core.Compiler.UnitRange(1, 0)
+    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 1) === Core.Compiler.UnitRange(1, 1)
+    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 2) === Core.Compiler.UnitRange(2, 2)
+    @test Core.Compiler.searchsorted(Core.Compiler.UnitRange(1, 3), 4) === Core.Compiler.UnitRange(4, 3)
+
+    @test Core.Compiler.searchsorted([1:10;], 1, by=(x -> x >= 5)) === Core.Compiler.UnitRange(1, 4)
+    @test Core.Compiler.searchsorted([1:10;], 10, by=(x -> x >= 5)) === Core.Compiler.UnitRange(5, 10)
+    @test Core.Compiler.searchsorted([1:5; 1:5; 1:5], 1, 6, 10, Core.Compiler.Forward) === Core.Compiler.UnitRange(6, 6)
+    @test Core.Compiler.searchsorted(fill(1, 15), 1, 6, 10, Core.Compiler.Forward) === Core.Compiler.UnitRange(6, 10)
+
+    for (rg,I) in Any[(Core.Compiler.UnitRange(49, 57),   47:59),
+                      (Core.Compiler.StepRange(1, 2, 17), -1:19)]
+        rg_r = Core.Compiler.reverse(rg)
+        rgv, rgv_r = Core.Compiler.collect(rg), Core.Compiler.collect(rg_r)
+        for i = I
+            @test Core.Compiler.searchsorted(rg,i) === Core.Compiler.searchsorted(rgv,i)
+            @test Core.Compiler.searchsorted(rg_r,i,rev=true) === Core.Compiler.searchsorted(rgv_r,i,rev=true)
+        end
+    end
+end
+
+@testset "basic sort" begin
+    v = [3,1,2]
+    @test v == [3,1,2]
+    @test Core.Compiler.sort!(v) === v == [1,2,3]
+    @test Core.Compiler.sort!(v, by = x -> -x) === v == [3,2,1]
+    @test Core.Compiler.sort!(v, by = x -> -x, < = >) === v == [1,2,3]
+end
+
+@testset "randomized sorting tests" begin
+    for n in [0, 1, 3, 10, 30, 100, 300], k in [0, 30, 2n]
+        v = rand(-1:k, n)
+        for by in [identity, x -> -x, x -> x^2 + .1x], lt in [<, >]
+            @test sort(v; by, lt) == Core.Compiler.sort!(copy(v); by, < = lt)
+        end
+    end
+end
diff --git a/test/compiler/effects.jl b/test/compiler/effects.jl
new file mode 100644
index 0000000000000..2160a716af689
--- /dev/null
+++ b/test/compiler/effects.jl
@@ -0,0 +1,1358 @@
+using Test
+include("irutils.jl")
+
+# Test that the Core._apply_iterate bail path taints effects
+function f_apply_bail(f)
+    f(()...)
+    return nothing
+end
+@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(f_apply_bail))
+@test !fully_eliminated((Function,)) do f
+    f_apply_bail(f)
+    nothing
+end
+
+# Test that effect modeling for return_type doesn't incorrectly pick
+# up the effects of the function being analyzed
+f_throws() = error()
+@noinline function return_type_unused(x)
+    Core.Compiler.return_type(f_throws, Tuple{})
+    return x+1
+end
+@test Core.Compiler.is_removable_if_unused(Base.infer_effects(return_type_unused, (Int,)))
+@test fully_eliminated((Int,)) do x
+    return_type_unused(x)
+    return nothing
+end
+
+# Test that ambiguous calls don't accidentally get nothrow effect
+ambig_effects_test(a::Int, b) = 1
+ambig_effects_test(a, b::Int) = 1
+ambig_effects_test(a, b) = 1
+@test !Core.Compiler.is_nothrow(Base.infer_effects(ambig_effects_test, (Int, Any)))
+global ambig_unknown_type_global::Any = 1
+@noinline function conditionally_call_ambig(b::Bool, a)
+    if b
+        ambig_effects_test(a, ambig_unknown_type_global)
+    end
+    return 0
+end
+@test !fully_eliminated((Bool,)) do b
+    conditionally_call_ambig(b, 1)
+    return nothing
+end
+
+# Test that a missing methtable identification gets tainted
+# appropriately
+struct FCallback; f::Union{Nothing, Function}; end
+f_invoke_callback(fc) = let f=fc.f; (f !== nothing && f(); nothing); end
+@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(f_invoke_callback, (FCallback,)))
+@test !fully_eliminated((FCallback,)) do fc
+    f_invoke_callback(fc)
+    return nothing
+end
+
+# @assume_effects override
+const ___CONST_DICT___ = Dict{Any,Any}(Symbol(c) => i for (i, c) in enumerate('a':'z'))
+Base.@assume_effects :foldable concrete_eval(
+    f, args...; kwargs...) = f(args...; kwargs...)
+@test fully_eliminated() do
+    concrete_eval(getindex, ___CONST_DICT___, :a)
+end
+
+# :removable override
+Base.@assume_effects :removable removable_call(
+    f, args...; kwargs...) = f(args...; kwargs...)
+@test fully_eliminated() do
+    @noinline removable_call(getindex, ___CONST_DICT___, :a)
+    nothing
+end
+
+# terminates_globally override
+# https://github.com/JuliaLang/julia/issues/41694
+Base.@assume_effects :terminates_globally function issue41694(x)
+    res = 1
+    0 ≤ x < 20 || error("bad fact")
+    while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end
+@test Core.Compiler.is_foldable(Base.infer_effects(issue41694, (Int,)))
+@test fully_eliminated() do
+    issue41694(2)
+end
+
+Base.@assume_effects :terminates_globally function recur_termination1(x)
+    x == 0 && return 1
+    0 ≤ x < 20 || error("bad fact")
+    return x * recur_termination1(x-1)
+end
+@test_broken Core.Compiler.is_foldable(Base.infer_effects(recur_termination1, (Int,)))
+@test Core.Compiler.is_terminates(Base.infer_effects(recur_termination1, (Int,)))
+function recur_termination2()
+    Base.@assume_effects :total !:terminates_globally
+    recur_termination1(12)
+end
+@test_broken fully_eliminated(recur_termination2)
+@test fully_eliminated() do; recur_termination2(); end
+
+Base.@assume_effects :terminates_globally function recur_termination21(x)
+    x == 0 && return 1
+    0 ≤ x < 20 || error("bad fact")
+    return recur_termination22(x)
+end
+recur_termination22(x) = x * recur_termination21(x-1)
+@test_broken Core.Compiler.is_foldable(Base.infer_effects(recur_termination21, (Int,)))
+@test_broken Core.Compiler.is_foldable(Base.infer_effects(recur_termination22, (Int,)))
+@test Core.Compiler.is_terminates(Base.infer_effects(recur_termination21, (Int,)))
+@test Core.Compiler.is_terminates(Base.infer_effects(recur_termination22, (Int,)))
+function recur_termination2x()
+    Base.@assume_effects :total !:terminates_globally
+    recur_termination21(12) + recur_termination22(12)
+end
+@test_broken fully_eliminated(recur_termination2x)
+@test fully_eliminated() do; recur_termination2x(); end
+
+# anonymous function support for `@assume_effects`
+@test fully_eliminated() do
+    map((2,3,4)) do x
+        # this :terminates_locally allows this anonymous function to be constant-folded
+        Base.@assume_effects :terminates_locally
+        res = 1
+        0 ≤ x < 20 || error("bad fact")
+        while x > 1
+            res *= x
+            x -= 1
+        end
+        return res
+    end
+end
+
+# control flow backedge should taint `terminates`
+@test Base.infer_effects((Int,)) do n
+    for i = 1:n; end
+end |> !Core.Compiler.is_terminates
+
+# interprocedural-recursion should taint `terminates` **appropriately**
+function sumrecur(a, x)
+    isempty(a) && return x
+    return sumrecur(Base.tail(a), x + first(a))
+end
+@test Base.infer_effects(sumrecur, (Tuple{Int,Int,Int},Int)) |> Core.Compiler.is_terminates
+@test Base.infer_effects(sumrecur, (Tuple{Int,Int,Int,Vararg{Int}},Int)) |> !Core.Compiler.is_terminates
+
+# https://github.com/JuliaLang/julia/issues/45781
+@test Base.infer_effects((Float32,)) do a
+    out1 = promote_type(Irrational{:π}, Bool)
+    out2 = sin(a)
+    out1, out2
+end |> Core.Compiler.is_terminates
+
+# refine :consistent-cy effect inference using the return type information
+@test Base.infer_effects((Any,)) do x
+    taint = Ref{Any}(x) # taints :consistent-cy, but will be adjusted
+    throw(taint)
+end |> Core.Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    if x < 0
+        taint = Ref(x) # taints :consistent-cy, but will be adjusted
+        throw(DomainError(x, taint))
+    end
+    return nothing
+end |> Core.Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    if x < 0
+        taint = Ref(x) # taints :consistent-cy, but will be adjusted
+        throw(DomainError(x, taint))
+    end
+    return x == 0 ? nothing : x # should `Union` of isbitstype objects nicely
+end |> Core.Compiler.is_consistent
+@test Base.infer_effects((Symbol,Any)) do s, x
+    if s === :throw
+        taint = Ref{Any}(":throw option given") # taints :consistent-cy, but will be adjusted
+        throw(taint)
+    end
+    return s # should handle `Symbol` nicely
+end |> Core.Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    return Ref(x)
+end |> !Core.Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    return x < 0 ? Ref(x) : nothing
+end |> !Core.Compiler.is_consistent
+@test Base.infer_effects((Int,)) do x
+    if x < 0
+        throw(DomainError(x, lazy"$x is negative"))
+    end
+    return nothing
+end |> Core.Compiler.is_foldable
+
+# :the_exception expression should taint :consistent-cy
+global inconsistent_var::Int = 42
+function throw_inconsistent() # this is still :consistent
+    throw(inconsistent_var)
+end
+function catch_inconsistent()
+    try
+        throw_inconsistent()
+    catch err
+        err
+    end
+end
+@test !Core.Compiler.is_consistent(Base.infer_effects(catch_inconsistent))
+cache_inconsistent() = catch_inconsistent()
+function compare_inconsistent()
+    a = cache_inconsistent()
+    global inconsistent_var = 0
+    b = cache_inconsistent()
+    global inconsistent_var = 42
+    return a === b
+end
+@test !compare_inconsistent()
+# return type information shouldn't be able to refine it also
+function catch_inconsistent(x::T) where T
+    v = x
+    try
+        throw_inconsistent()
+    catch err
+        v = err::T
+    end
+    return v
+end
+@test !Core.Compiler.is_consistent(Base.infer_effects(catch_inconsistent, (Int,)))
+cache_inconsistent(x) = catch_inconsistent(x)
+function compare_inconsistent(x::T) where T
+    x = one(T)
+    a = cache_inconsistent(x)
+    global inconsistent_var = 0
+    b = cache_inconsistent(x)
+    global inconsistent_var = 42
+    return a === b
+end
+@test !compare_inconsistent(3)
+
+# Effect modeling for Core.compilerbarrier
+@test Base.infer_effects(Base.inferencebarrier, Tuple{Any}) |> Core.Compiler.is_removable_if_unused
+
+# allocation/access of uninitialized fields should taint the :consistent-cy
+struct Maybe{T}
+    x::T
+    Maybe{T}() where T = new{T}()
+    Maybe{T}(x) where T = new{T}(x)
+    Maybe(x::T) where T = new{T}(x)
+end
+Base.getindex(x::Maybe) = x.x
+
+struct SyntacticallyDefined{T}
+    x::T
+end
+
+import Core.Compiler: Const, getfield_notundefined
+for T = (Base.RefValue, Maybe) # both mutable and immutable
+    for name = (Const(1), Const(:x))
+        @test getfield_notundefined(T{String}, name)
+        @test getfield_notundefined(T{Integer}, name)
+        @test getfield_notundefined(T{Union{String,Integer}}, name)
+        @test getfield_notundefined(Union{T{String},T{Integer}}, name)
+        @test !getfield_notundefined(T{Int}, name)
+        @test !getfield_notundefined(T{<:Integer}, name)
+        @test !getfield_notundefined(T{Union{Int32,Int64}}, name)
+        @test !getfield_notundefined(T, name)
+    end
+    # throw doesn't account for undefined behavior
+    for name = (Const(0), Const(2), Const(1.0), Const(:y), Const("x"),
+                Float64, String, Nothing)
+        @test getfield_notundefined(T{String}, name)
+        @test getfield_notundefined(T{Int}, name)
+        @test getfield_notundefined(T{Integer}, name)
+        @test getfield_notundefined(T{<:Integer}, name)
+        @test getfield_notundefined(T{Union{Int32,Int64}}, name)
+        @test getfield_notundefined(T, name)
+    end
+    # should not be too conservative when field isn't known very well but object information is accurate
+    @test getfield_notundefined(T{String}, Int)
+    @test getfield_notundefined(T{String}, Symbol)
+    @test getfield_notundefined(T{Integer}, Int)
+    @test getfield_notundefined(T{Integer}, Symbol)
+    @test !getfield_notundefined(T{Int}, Int)
+    @test !getfield_notundefined(T{Int}, Symbol)
+    @test !getfield_notundefined(T{<:Integer}, Int)
+    @test !getfield_notundefined(T{<:Integer}, Symbol)
+end
+# should be conservative when object information isn't accurate
+@test !getfield_notundefined(Any, Const(1))
+@test !getfield_notundefined(Any, Const(:x))
+# tuples and namedtuples should be okay if not given accurate information
+for TupleType = Any[Tuple{Int,Int,Int}, Tuple{Int,Vararg{Int}}, Tuple{Any}, Tuple,
+                    NamedTuple{(:a, :b), Tuple{Int,Int}}, NamedTuple{(:x,),Tuple{Any}}, NamedTuple],
+    FieldType = Any[Int, Symbol, Any]
+    @test getfield_notundefined(TupleType, FieldType)
+end
+# skip analysis on fields that are known to be defined syntactically
+@test Core.Compiler.getfield_notundefined(SyntacticallyDefined{Float64}, Symbol)
+@test Core.Compiler.getfield_notundefined(Const(Main), Const(:var))
+@test Core.Compiler.getfield_notundefined(Const(Main), Const(42))
+# high-level tests for `getfield_notundefined`
+@test Base.infer_effects() do
+    Maybe{Int}()
+end |> !Core.Compiler.is_consistent
+@test Base.infer_effects() do
+    Maybe{Int}()[]
+end |> !Core.Compiler.is_consistent
+@test !fully_eliminated() do
+    Maybe{Int}()[]
+end
+@test Base.infer_effects() do
+    Maybe{String}()
+end |> Core.Compiler.is_consistent
+@test Base.infer_effects() do
+    Maybe{String}()[]
+end |> Core.Compiler.is_consistent
+let f() = Maybe{String}()[]
+    @test Base.return_types() do
+        f() # this call should be concrete evaluated
+    end |> only === Union{}
+end
+@test Base.infer_effects() do
+    Ref{Int}()
+end |> !Core.Compiler.is_consistent
+@test Base.infer_effects() do
+    Ref{Int}()[]
+end |> !Core.Compiler.is_consistent
+@test !fully_eliminated() do
+    Ref{Int}()[]
+end
+@test Base.infer_effects() do
+    Ref{String}()[]
+end |> Core.Compiler.is_consistent
+let f() = Ref{String}()[]
+    @test Base.return_types() do
+        f() # this call should be concrete evaluated
+    end |> only === Union{}
+end
+@test Base.infer_effects((SyntacticallyDefined{Float64}, Symbol)) do w, s
+    getfield(w, s)
+end |> Core.Compiler.is_foldable
+
+# effects propagation for `Core.invoke` calls
+# https://github.com/JuliaLang/julia/issues/44763
+global x44763::Int = 0
+increase_x44763!(n) = (global x44763; x44763 += n)
+invoke44763(x) = @invoke increase_x44763!(x)
+@test Base.return_types() do
+    invoke44763(42)
+end |> only === Int
+@test x44763 == 0
+
+# `@inbounds`/`@boundscheck` expression should taint :consistent correctly
+# https://github.com/JuliaLang/julia/issues/48099
+function A1_inbounds()
+    r = 0
+    @inbounds begin
+        @boundscheck r += 1
+    end
+    return r
+end
+@test !Core.Compiler.is_consistent(Base.infer_effects(A1_inbounds))
+
+# Test that purity doesn't try to accidentally run unreachable code due to
+# boundscheck elimination
+function f_boundscheck_elim(n)
+    # Inbounds here assumes that this is only ever called with `n==0`, but of
+    # course the compiler has no way of knowing that, so it must not attempt
+    # to run the `@inbounds getfield(sin, 1)` that `ntuple` generates.
+    ntuple(x->(@inbounds ()[x]), n)
+end
+@test !Core.Compiler.is_noub(Base.infer_effects(f_boundscheck_elim, (Int,)))
+@test Tuple{} <: only(Base.return_types(f_boundscheck_elim, (Int,)))
+
+# Test that purity modeling doesn't accidentally introduce new world age issues
+f_redefine_me(x) = x+1
+f_call_redefine() = f_redefine_me(0)
+f_mk_opaque() = Base.Experimental.@opaque ()->Base.inferencebarrier(f_call_redefine)()
+const op_capture_world = f_mk_opaque()
+f_redefine_me(x) = x+2
+@test op_capture_world() == 1
+@test f_mk_opaque()() == 2
+
+# backedge insertion for Any-typed, effect-free frame
+const CONST_DICT = let d = Dict()
+    for c in 'A':'z'
+        push!(d, c => Int(c))
+    end
+    d
+end
+Base.@assume_effects :foldable getcharid(c) = CONST_DICT[c]
+@noinline callf(f, args...) = f(args...)
+function entry_to_be_invalidated(c)
+    return callf(getcharid, c)
+end
+@test Base.infer_effects((Char,)) do x
+    entry_to_be_invalidated(x)
+end |> Core.Compiler.is_foldable
+@test fully_eliminated(; retval=97) do
+    entry_to_be_invalidated('a')
+end
+getcharid(c) = CONST_DICT[c] # now this is not eligible for concrete evaluation
+@test Base.infer_effects((Char,)) do x
+    entry_to_be_invalidated(x)
+end |> !Core.Compiler.is_foldable
+@test !fully_eliminated() do
+    entry_to_be_invalidated('a')
+end
+
+@test !Core.Compiler.builtin_nothrow(Core.Compiler.fallback_lattice, Core.get_binding_type, Any[Rational{Int}, Core.Const(:foo)], Any)
+
+# Nothrow for assignment to globals
+global glob_assign_int::Int = 0
+f_glob_assign_int() = global glob_assign_int += 1
+let effects = Base.infer_effects(f_glob_assign_int, ())
+    @test !Core.Compiler.is_effect_free(effects)
+    @test Core.Compiler.is_nothrow(effects)
+end
+# Nothrow for setglobal!
+global SETGLOBAL!_NOTHROW::Int = 0
+let effects = Base.infer_effects() do
+        setglobal!(@__MODULE__, :SETGLOBAL!_NOTHROW, 42)
+    end
+    @test Core.Compiler.is_nothrow(effects)
+end
+
+# we should taint `nothrow` if the binding doesn't exist and isn't fixed yet,
+# as the cached effects can be easily wrong otherwise
+# since the inference currently doesn't track "world-age" of global variables
+@eval global_assignment_undefinedyet() = $(GlobalRef(@__MODULE__, :UNDEFINEDYET)) = 42
+setglobal!_nothrow_undefinedyet() = setglobal!(@__MODULE__, :UNDEFINEDYET, 42)
+let effects = Base.infer_effects() do
+        global_assignment_undefinedyet()
+    end
+    @test !Core.Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        setglobal!_nothrow_undefinedyet()
+    end
+    @test !Core.Compiler.is_nothrow(effects)
+end
+global UNDEFINEDYET::String = "0"
+let effects = Base.infer_effects() do
+        global_assignment_undefinedyet()
+    end
+    @test !Core.Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        setglobal!_nothrow_undefinedyet()
+    end
+    @test !Core.Compiler.is_nothrow(effects)
+end
+@test_throws ErrorException setglobal!_nothrow_undefinedyet()
+
+# Nothrow for setfield!
+mutable struct SetfieldNothrow
+    x::Int
+end
+f_setfield_nothrow() = SetfieldNothrow(0).x = 1
+let effects = Base.infer_effects(f_setfield_nothrow, ())
+    @test Core.Compiler.is_nothrow(effects)
+    @test Core.Compiler.is_effect_free(effects) # see EFFECT_FREE_IF_INACCESSIBLEMEMONLY
+end
+
+# even if 2-arg `getfield` may throw, it should be still `:consistent`
+@test Core.Compiler.is_consistent(Base.infer_effects(getfield, (NTuple{5, Float64}, Int)))
+
+# SimpleVector allocation is consistent
+@test Core.Compiler.is_consistent(Base.infer_effects(Core.svec))
+@test Base.infer_effects() do
+    Core.svec(nothing, 1, "foo")
+end |> Core.Compiler.is_consistent
+
+# fastmath operations are in-`:consistent`
+@test !Core.Compiler.is_consistent(Base.infer_effects((a,b)->@fastmath(a+b), (Float64,Float64)))
+
+# issue 46122: @assume_effects for @ccall
+@test Base.infer_effects((Vector{Int},)) do a
+    Base.@assume_effects :effect_free @ccall this_call_does_not_really_exist(a::Any)::Ptr{Int}
+end |> Core.Compiler.is_effect_free
+
+# `getfield_effects` handles access to union object nicely
+let 𝕃 = Core.Compiler.fallback_lattice
+    getfield_effects = Core.Compiler.getfield_effects
+    @test Core.Compiler.is_consistent(getfield_effects(𝕃, Any[Some{String}, Core.Const(:value)], String))
+    @test Core.Compiler.is_consistent(getfield_effects(𝕃, Any[Some{Symbol}, Core.Const(:value)], Symbol))
+    @test Core.Compiler.is_consistent(getfield_effects(𝕃, Any[Union{Some{Symbol},Some{String}}, Core.Const(:value)], Union{Symbol,String}))
+end
+@test Base.infer_effects((Bool,)) do c
+    obj = c ? Some{String}("foo") : Some{Symbol}(:bar)
+    return getfield(obj, :value)
+end |> Core.Compiler.is_consistent
+
+# getfield is nothrow when bounds checking is turned off
+@test Base.infer_effects((Tuple{Int,Int},Int)) do t, i
+    getfield(t, i, false)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Tuple{Int,Int},Symbol)) do t, i
+    getfield(t, i, false)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Tuple{Int,Int},String)) do t, i
+    getfield(t, i, false) # invalid name type
+end |> !Core.Compiler.is_nothrow
+
+@test Base.infer_effects((Some{Any},)) do some
+    getfield(some, 1, :not_atomic)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},)) do some
+    getfield(some, 1, :invalid_atomic_spec)
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},Bool)) do some, boundscheck
+    getfield(some, 1, boundscheck)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},Bool)) do some, boundscheck
+    getfield(some, 1, :not_atomic, boundscheck)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},Bool)) do some, boundscheck
+    getfield(some, 1, :invalid_atomic_spec, boundscheck)
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects((Some{Any},Any)) do some, boundscheck
+    getfield(some, 1, :not_atomic, boundscheck)
+end |> !Core.Compiler.is_nothrow
+
+@test Core.Compiler.is_consistent(Base.infer_effects(setindex!, (Base.RefValue{Int}, Int)))
+
+# :inaccessiblememonly effect
+const global constant_global::Int = 42
+const global ConstantType = Ref
+global nonconstant_global::Int = 42
+const global constant_mutable_global = Ref(0)
+const global constant_global_nonisbits = Some(:foo)
+@test Base.infer_effects() do
+    constant_global
+end |> Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    ConstantType
+end |> Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    ConstantType{Any}()
+end |> Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    constant_global_nonisbits
+end |> Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :constant_global)
+end |> Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    nonconstant_global
+end |> !Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :nonconstant_global)
+end |> !Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Symbol,)) do name
+    getglobal(@__MODULE__, name)
+end |> !Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Int,)) do v
+    global nonconstant_global = v
+end |> !Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Int,)) do v
+    setglobal!(@__MODULE__, :nonconstant_global, v)
+end |> !Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Int,)) do v
+    constant_mutable_global[] = v
+end |> !Core.Compiler.is_inaccessiblememonly
+module ConsistentModule
+const global constant_global::Int = 42
+const global ConstantType = Ref
+end # module
+@test Base.infer_effects() do
+    ConsistentModule.constant_global
+end |> Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    ConsistentModule.ConstantType
+end |> Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    ConsistentModule.ConstantType{Any}()
+end |> Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :ConsistentModule).constant_global
+end |> Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :ConsistentModule).ConstantType
+end |> Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :ConsistentModule).ConstantType{Any}()
+end |> Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Module,)) do M
+    M.constant_global
+end |> !Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects((Module,)) do M
+    M.ConstantType
+end |> !Core.Compiler.is_inaccessiblememonly
+@test Base.infer_effects() do M
+    M.ConstantType{Any}()
+end |> !Core.Compiler.is_inaccessiblememonly
+
+# the `:inaccessiblememonly` helper effect allows us to prove `:consistent`-cy of frames
+# including `getfield` / `isdefined` accessing to local mutable object
+
+mutable struct SafeRef{T}
+    x::T
+end
+Base.getindex(x::SafeRef) = x.x;
+Base.setindex!(x::SafeRef, v) = x.x = v;
+Base.isassigned(x::SafeRef) = true;
+
+function mutable_consistent(s)
+    SafeRef(s)[]
+end
+@test Core.Compiler.is_inaccessiblememonly(Base.infer_effects(mutable_consistent, (Symbol,)))
+@test fully_eliminated(; retval=:foo) do
+    mutable_consistent(:foo)
+end
+
+function nested_mutable_consistent(s)
+    SafeRef(SafeRef(SafeRef(SafeRef(SafeRef(s)))))[][][][][]
+end
+@test Core.Compiler.is_inaccessiblememonly(Base.infer_effects(nested_mutable_consistent, (Symbol,)))
+@test fully_eliminated(; retval=:foo) do
+    nested_mutable_consistent(:foo)
+end
+
+const consistent_global = Some(:foo)
+@test Base.infer_effects() do
+    consistent_global.value
+end |> Core.Compiler.is_consistent
+const inconsistent_global = SafeRef(:foo)
+@test Base.infer_effects() do
+    inconsistent_global[]
+end |> !Core.Compiler.is_consistent
+const inconsistent_condition_ref = Ref{Bool}(false)
+@test Base.infer_effects() do
+    if inconsistent_condition_ref[]
+        return 0
+    else
+        return 1
+    end
+end |> !Core.Compiler.is_consistent
+
+# should handle va-method properly
+callgetfield1(xs...) = getfield(getfield(xs, 1), 1)
+@test !Core.Compiler.is_inaccessiblememonly(Base.infer_effects(callgetfield1, (Base.RefValue{Symbol},)))
+const GLOBAL_XS = Ref(:julia)
+global_getfield() = callgetfield1(GLOBAL_XS)
+@test let
+    Base.Experimental.@force_compile
+    global_getfield()
+end === :julia
+GLOBAL_XS[] = :julia2
+@test let
+    Base.Experimental.@force_compile
+    global_getfield()
+end === :julia2
+
+# the `:inaccessiblememonly` helper effect allows us to prove `:effect_free`-ness of frames
+# including `setfield!` modifying local mutable object
+
+const global_ref = Ref{Any}()
+global const global_bit::Int = 42
+makeref() = Ref{Any}()
+setref!(ref, @nospecialize v) = ref[] = v
+
+@noinline function removable_if_unused1()
+    x = makeref()
+    setref!(x, 42)
+    x
+end
+@noinline function removable_if_unused2()
+    x = makeref()
+    setref!(x, global_bit)
+    x
+end
+for f = Any[removable_if_unused1, removable_if_unused2]
+    effects = Base.infer_effects(f)
+    @test Core.Compiler.is_inaccessiblememonly(effects)
+    @test Core.Compiler.is_effect_free(effects)
+    @test Core.Compiler.is_removable_if_unused(effects)
+    @test @eval fully_eliminated() do
+        $f()
+        nothing
+    end
+end
+@noinline function removable_if_unused3(v)
+    x = makeref()
+    setref!(x, v)
+    x
+end
+let effects = Base.infer_effects(removable_if_unused3, (Int,))
+    @test Core.Compiler.is_inaccessiblememonly(effects)
+    @test Core.Compiler.is_effect_free(effects)
+    @test Core.Compiler.is_removable_if_unused(effects)
+end
+@test fully_eliminated((Int,)) do v
+    removable_if_unused3(v)
+    nothing
+end
+
+@noinline function unremovable_if_unused1!(x)
+    setref!(x, 42)
+end
+@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused1!, (typeof(global_ref),)))
+@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused1!, (Any,)))
+
+@noinline function unremovable_if_unused2!()
+    setref!(global_ref, 42)
+end
+@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused2!))
+
+@noinline function unremovable_if_unused3!()
+    getfield(@__MODULE__, :global_ref)[] = nothing
+end
+@test !Core.Compiler.is_removable_if_unused(Base.infer_effects(unremovable_if_unused3!))
+
+# array ops
+# =========
+
+# allocation
+# ----------
+
+# low-level constructor
+@noinline construct_array(@nospecialize(T), args...) = Array{T}(undef, args...)
+# should eliminate safe but dead allocations
+let good_dims = [1, 2, 3, 4, 10]
+    Ns = [1, 2, 3, 4, 10]
+    for dim = good_dims, N = Ns
+        Int64(dim)^N > typemax(Int) && continue
+        dims = ntuple(i->dim, N)
+        @test @eval Base.infer_effects() do
+            construct_array(Int, $(dims...))
+        end |> Core.Compiler.is_removable_if_unused
+        @test @eval fully_eliminated() do
+            construct_array(Int, $(dims...))
+            nothing
+        end
+    end
+end
+# should analyze throwness correctly
+let bad_dims = [-1, typemax(Int)]
+    for dim in bad_dims, N in [1, 2, 3, 4, 10]
+        for T in Any[Int, Union{Missing,Nothing}, Missing, Any]
+            dims = ntuple(i->dim, N)
+            @test @eval Base.infer_effects() do
+                construct_array($T, $(dims...))
+            end |> !Core.Compiler.is_removable_if_unused
+            @test @eval !fully_eliminated() do
+                construct_array($T, $(dims...))
+                nothing
+            end
+            @test_throws "invalid " @eval construct_array($T, $(dims...))
+        end
+    end
+end
+
+# high-level interfaces
+# getindex
+for safesig = Any[
+        (Type{Int},)
+        (Type{Int}, Int)
+        (Type{Int}, Int, Int)
+        (Type{Number},)
+        (Type{Number}, Number)
+        (Type{Number}, Int)
+        (Type{Any},)
+        (Type{Any}, Any,)
+        (Type{Any}, Any, Any)
+    ]
+    let effects = Base.infer_effects(getindex, safesig)
+        @test Core.Compiler.is_consistent_if_notreturned(effects)
+        @test Core.Compiler.is_removable_if_unused(effects)
+    end
+end
+for unsafesig = Any[
+        (Type{Int}, String)
+        (Type{Int}, Any)
+        (Type{Number}, AbstractString)
+        (Type{Number}, Any)
+    ]
+    let effects = Base.infer_effects(getindex, unsafesig)
+        @test !Core.Compiler.is_nothrow(effects)
+    end
+end
+# vect
+for safesig = Any[
+        ()
+        (Int,)
+        (Int, Int)
+    ]
+    let effects = Base.infer_effects(Base.vect, safesig)
+        @test Core.Compiler.is_consistent_if_notreturned(effects)
+        @test Core.Compiler.is_removable_if_unused(effects)
+    end
+end
+
+# array getindex
+let tt = (MemoryRef{Any},Symbol,Bool)
+    @testset let effects = Base.infer_effects(Core.memoryrefget, tt)
+        @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
+        @test Core.Compiler.is_effect_free(effects)
+        @test !Core.Compiler.is_nothrow(effects)
+        @test Core.Compiler.is_terminates(effects)
+    end
+end
+
+# array setindex!
+let tt = (MemoryRef{Any},Any,Symbol,Bool)
+    @testset let effects = Base.infer_effects(Core.memoryrefset!, tt)
+        @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
+        @test Core.Compiler.is_effect_free_if_inaccessiblememonly(effects)
+        @test !Core.Compiler.is_nothrow(effects)
+        @test Core.Compiler.is_terminates(effects)
+    end
+end
+# nothrow for arrayset
+@test Base.infer_effects((MemoryRef{Int},Int)) do a, v
+    Core.memoryrefset!(a, v, :not_atomic, true)
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects((MemoryRef{Int},Int)) do a, v
+    a[] = v # may throw
+end |> !Core.Compiler.is_nothrow
+# when bounds checking is turned off, it should be safe
+@test Base.infer_effects((MemoryRef{Int},Int)) do a, v
+    Core.memoryrefset!(a, v, :not_atomic, false)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((MemoryRef{Number},Number)) do a, v
+    Core.memoryrefset!(a, v, :not_atomic, false)
+end |> Core.Compiler.is_nothrow
+
+# arraysize
+# ---------
+
+let effects = Base.infer_effects(size, (Array,Int))
+    @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
+    @test Core.Compiler.is_effect_free(effects)
+    @test !Core.Compiler.is_nothrow(effects)
+    @test Core.Compiler.is_terminates(effects)
+end
+# Test that arraysize has proper effect modeling
+@test fully_eliminated(M->(size(M, 2); nothing), (Matrix{Float64},))
+
+# arraylen
+# --------
+
+let effects = Base.infer_effects(length, (Vector{Any},))
+    @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects)
+    @test Core.Compiler.is_effect_free(effects)
+    @test Core.Compiler.is_nothrow(effects)
+    @test Core.Compiler.is_terminates(effects)
+end
+
+# resize
+# ------
+
+#for op = Any[
+#        Base._growbeg!,
+#        Base._growend!,
+#        Base._deletebeg!,
+#        Base._deleteend!,
+#    ]
+#    let effects = Base.infer_effects(op, (Vector, Int))
+#        @test Core.Compiler.is_effect_free_if_inaccessiblememonly(effects)
+#        @test Core.Compiler.is_terminates(effects)
+#        @test !Core.Compiler.is_nothrow(effects)
+#    end
+#end
+#
+# tuple indexing
+# --------------
+
+@test Core.Compiler.is_foldable(Base.infer_effects(iterate, Tuple{Tuple{Int, Int}, Int}))
+
+# end to end
+# ----------
+
+#function simple_vec_ops(T, op!, op, xs...)
+#    a = T[]
+#    op!(a, xs...)
+#    return op(a)
+#end
+#for T = Any[Int,Any], op! = Any[push!,pushfirst!], op = Any[length,size],
+#    xs = Any[(Int,), (Int,Int,)]
+#    let effects = Base.infer_effects(simple_vec_ops, (Type{T},typeof(op!),typeof(op),xs...))
+#        @test Core.Compiler.is_foldable(effects)
+#    end
+#end
+
+# Test that builtin_effects handles vararg correctly
+@test !Core.Compiler.is_nothrow(Core.Compiler.builtin_effects(Core.Compiler.fallback_lattice, Core.isdefined,
+    Any[String, Vararg{Any}], Bool))
+
+# Test that :new can be eliminated even if an sparam is unknown
+struct SparamUnused{T}
+    x
+    SparamUnused(x::T) where {T} = new{T}(x)
+end
+mksparamunused(x) = (SparamUnused(x); nothing)
+let src = code_typed1(mksparamunused, (Any,))
+    @test count(isnew, src.code) == 0
+end
+
+struct WrapperOneField{T}
+    x::T
+end
+
+# Effects for getfield of type instance
+@test Base.infer_effects(Tuple{Nothing}) do x
+    WrapperOneField{typeof(x)}.instance
+end |> Core.Compiler.is_foldable_nothrow
+@test Base.infer_effects(Tuple{WrapperOneField{Float64}, Symbol}) do w, s
+    getfield(w, s)
+end |> Core.Compiler.is_foldable
+@test Core.Compiler.getfield_notundefined(WrapperOneField{Float64}, Symbol)
+@test Base.infer_effects(Tuple{WrapperOneField{Symbol}, Symbol}) do w, s
+    getfield(w, s)
+end |> Core.Compiler.is_foldable
+
+# Flow-sensitive consistent for _typevar
+@test Base.infer_effects() do
+    return WrapperOneField == (WrapperOneField{T} where T)
+end |> Core.Compiler.is_foldable_nothrow
+
+# Test that dead `@inbounds` does not taint consistency
+# https://github.com/JuliaLang/julia/issues/48243
+@test Base.infer_effects(Tuple{Int64}) do i
+    false && @inbounds (1,2,3)[i]
+    return 1
+end |> Core.Compiler.is_foldable_nothrow
+
+@test Base.infer_effects(Tuple{Int64}) do i
+    @inbounds (1,2,3)[i]
+end |> !Core.Compiler.is_noub
+
+@test Base.infer_effects(Tuple{Tuple{Int64}}) do x
+    @inbounds x[1]
+end |> Core.Compiler.is_foldable_nothrow
+
+# Test that :new of non-concrete, but otherwise known type
+# does not taint consistency.
+@eval struct ImmutRef{T}
+    x::T
+    ImmutRef(x) = $(Expr(:new, :(ImmutRef{typeof(x)}), :x))
+end
+@test Core.Compiler.is_foldable(Base.infer_effects(ImmutRef, Tuple{Any}))
+
+@test Core.Compiler.is_foldable_nothrow(Base.infer_effects(typejoin, ()))
+
+# nothrow-ness of subtyping operations
+# https://github.com/JuliaLang/julia/pull/48566
+@test !Core.Compiler.is_nothrow(Base.infer_effects((A,B)->A<:B, (Any,Any)))
+@test !Core.Compiler.is_nothrow(Base.infer_effects((A,B)->A>:B, (Any,Any)))
+
+# GotoIfNot should properly mark itself as throwing when given a non-Bool
+# https://github.com/JuliaLang/julia/pull/48583
+gotoifnot_throw_check_48583(x) = x ? x : 0
+@test !Core.Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Missing,)))
+@test !Core.Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Any,)))
+@test Core.Compiler.is_nothrow(Base.infer_effects(gotoifnot_throw_check_48583, (Bool,)))
+
+# unknown :static_parameter should taint :nothrow
+# https://github.com/JuliaLang/julia/issues/46771
+unknown_sparam_throw(::Union{Nothing, Type{T}}) where T = (T; nothing)
+unknown_sparam_nothrow1(x::Ref{T}) where T = (T; nothing)
+unknown_sparam_nothrow2(x::Ref{Ref{T}}) where T = (T; nothing)
+@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type{Int},)))
+@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type{<:Integer},)))
+@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Type,)))
+@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Nothing,)))
+@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Union{Type{Int},Nothing},)))
+@test !Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_throw, (Any,)))
+@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_nothrow1, (Ref,)))
+@test Core.Compiler.is_nothrow(Base.infer_effects(unknown_sparam_nothrow2, (Ref{Ref{T}} where T,)))
+
+# purely abstract recursion should not taint :terminates
+# https://github.com/JuliaLang/julia/issues/48983
+abstractly_recursive1() = abstractly_recursive2()
+abstractly_recursive2() = (Core.Compiler._return_type(abstractly_recursive1, Tuple{}); 1)
+abstractly_recursive3() = abstractly_recursive2()
+@test Core.Compiler.is_terminates(Base.infer_effects(abstractly_recursive3, ()))
+actually_recursive1(x) = actually_recursive2(x)
+actually_recursive2(x) = (x <= 0) ? 1 : actually_recursive1(x - 1)
+actually_recursive3(x) = actually_recursive2(x)
+@test !Core.Compiler.is_terminates(Base.infer_effects(actually_recursive3, (Int,)))
+
+# `isdefined` effects
+struct MaybeSome{T}
+    value::T
+    MaybeSome(x::T) where T = new{T}(x)
+    MaybeSome{T}(x::T) where T = new{T}(x)
+    MaybeSome{T}() where T = new{T}()
+end
+const undefined_ref = Ref{String}()
+const defined_ref = Ref{String}("julia")
+const undefined_some = MaybeSome{String}()
+const defined_some = MaybeSome{String}("julia")
+let effects = Base.infer_effects() do
+        isdefined(undefined_ref, :x)
+    end
+    @test !Core.Compiler.is_consistent(effects)
+    @test Core.Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        isdefined(defined_ref, :x)
+    end
+    @test Core.Compiler.is_consistent(effects)
+    @test Core.Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        isdefined(undefined_some, :value)
+    end
+    @test Core.Compiler.is_consistent(effects)
+    @test Core.Compiler.is_nothrow(effects)
+end
+let effects = Base.infer_effects() do
+        isdefined(defined_some, :value)
+    end
+    @test Core.Compiler.is_consistent(effects)
+    @test Core.Compiler.is_nothrow(effects)
+end
+# high-level interface test
+isassigned_effects(s) = isassigned(Ref(s))
+@test Core.Compiler.is_consistent(Base.infer_effects(isassigned_effects, (Symbol,)))
+@test fully_eliminated(; retval=true) do
+    isassigned_effects(:foo)
+end
+
+# inference on throw block should be disabled only when the effects are already known to be
+# concrete-eval ineligible:
+function optimize_throw_block_for_effects(x)
+    a = [x]
+    if x < 0
+        throw(ArgumentError(lazy"negative number given: $x"))
+    end
+    return a
+end
+let effects = Base.infer_effects(optimize_throw_block_for_effects, (Int,))
+    @test Core.Compiler.is_consistent_if_notreturned(effects)
+    @test Core.Compiler.is_effect_free(effects)
+    @test !Core.Compiler.is_nothrow(effects)
+    @test Core.Compiler.is_terminates(effects)
+end
+
+# :isdefined effects
+@test @eval Base.infer_effects() do
+    @isdefined($(gensym("some_undef_symbol")))
+end |> !Core.Compiler.is_consistent
+
+# Effects of Base.hasfield (#50198)
+hf50198(s) = hasfield(typeof((;x=1, y=2)), s)
+f50198() = (hf50198(Ref(:x)[]); nothing)
+@test fully_eliminated(f50198)
+
+# Effects properly applied to flags by irinterp (#50311)
+f50311(x, s) = Symbol(s)
+g50311(x) = Val{f50311((1.0, x), "foo")}()
+@test fully_eliminated(g50311, Tuple{Float64})
+
+# getglobal effects
+const my_defined_var = 42
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :my_defined_var, :monotonic)
+end |> Core.Compiler.is_foldable_nothrow
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :my_defined_var, :foo)
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects() do
+    getglobal(@__MODULE__, :my_defined_var, :foo, nothing)
+end |> !Core.Compiler.is_nothrow
+
+# irinterp should refine `:nothrow` information only if profitable
+Base.@assume_effects :nothrow function irinterp_nothrow_override(x, y)
+    z = sin(y)
+    if x
+        return "julia"
+    end
+    return z
+end
+@test Base.infer_effects((Float64,)) do y
+    isinf(y) && return zero(y)
+    irinterp_nothrow_override(true, y)
+end |> Core.Compiler.is_nothrow
+
+# Effects for :compilerbarrier
+f1_compilerbarrier(b) = Base.compilerbarrier(:type, b)
+f2_compilerbarrier(b) = Base.compilerbarrier(:conditional, b)
+
+@test !Core.Compiler.is_consistent(Base.infer_effects(f1_compilerbarrier, (Bool,)))
+@test Core.Compiler.is_consistent(Base.infer_effects(f2_compilerbarrier, (Bool,)))
+
+# Optimizer-refined effects
+function f1_optrefine(b)
+    if Base.inferencebarrier(b)
+        error()
+    end
+    return b
+end
+@test !Core.Compiler.is_consistent(Base.infer_effects(f1_optrefine, (Bool,)))
+
+function f2_optrefine()
+    if Ref(false)[]
+        error()
+    end
+    return true
+end
+@test Core.Compiler.is_nothrow(Base.infer_effects(f2_optrefine))
+
+function f3_optrefine(x)
+    @fastmath sqrt(x)
+    return x
+end
+@test Core.Compiler.is_consistent(Base.infer_effects(f3_optrefine))
+
+# Check that :consistent is properly modeled for throwing statements
+const GLOBAL_MUTABLE_SWITCH = Ref{Bool}(false)
+
+check_switch(switch::Base.RefValue{Bool}) = (switch[] && error(); return nothing)
+check_switch2() = check_switch(GLOBAL_MUTABLE_SWITCH)
+
+@test (Base.return_types(check_switch2) |> only) === Nothing
+GLOBAL_MUTABLE_SWITCH[] = true
+# Check that flipping the switch doesn't accidentally change the return type
+@test (Base.return_types(check_switch2) |> only) === Nothing
+
+@test !Core.Compiler.is_consistent(Base.infer_effects(check_switch, (Base.RefValue{Bool},)))
+
+# post-opt IPO analysis refinement of `:effect_free`-ness
+function post_opt_refine_effect_free(y, c=true)
+    x = Ref(c)
+    if x[]
+        return true
+    else
+        r = y[] isa Number
+        y[] = nothing
+    end
+    return r
+end
+@test Core.Compiler.is_effect_free(Base.infer_effects(post_opt_refine_effect_free, (Base.RefValue{Any},)))
+@test Base.infer_effects((Base.RefValue{Any},)) do y
+    post_opt_refine_effect_free(y, true)
+end |> Core.Compiler.is_effect_free
+
+# Check EA-based refinement of :effect_free
+Base.@assume_effects :nothrow @noinline _noinline_set!(x) = (x[] = 1; nothing)
+
+function set_ref_with_unused_arg_1(_)
+    x = Ref(0)
+    _noinline_set!(x)
+    return nothing
+end
+function set_ref_with_unused_arg_2(_)
+    x = @noinline Ref(0)
+    _noinline_set!(x)
+    return nothing
+end
+function set_arg_ref!(x)
+    _noinline_set!(x)
+    y = Ref(false)
+    y[] && (Main.x = x)
+    return nothing
+end
+
+function set_arr_with_unused_arg_1(_)
+    x = Int[0]
+    _noinline_set!(x)
+    return nothing
+end
+function set_arr_with_unused_arg_2(_)
+    x = @noinline Int[0]
+    _noinline_set!(x)
+    return nothing
+end
+function set_arg_arr!(x)
+    _noinline_set!(x)
+    y = Bool[false]
+    y[] && (Main.x = x)
+    return nothing
+end
+
+# This is inferable by type analysis only since the arguments have no mutable memory
+@test Core.Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(_noinline_set!, (Base.RefValue{Int},)))
+@test Core.Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(_noinline_set!, (Vector{Int},)))
+for func in (set_ref_with_unused_arg_1, set_ref_with_unused_arg_2,
+             set_arr_with_unused_arg_1, set_arr_with_unused_arg_2)
+    effects = Base.infer_effects(func, (Nothing,))
+    @test Core.Compiler.is_inaccessiblememonly(effects)
+    @test Core.Compiler.is_effect_free(effects)
+end
+
+# These need EA
+@test Core.Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_1, (Base.RefValue{Int},)))
+@test Core.Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_2, (Base.RefValue{Int},)))
+@test Core.Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(set_arg_ref!, (Base.RefValue{Int},)))
+@test_broken Core.Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_1, (Vector{Int},)))
+@test_broken Core.Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_2, (Vector{Int},)))
+@test_broken Core.Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(set_arg_arr!, (Vector{Int},)))
+
+function issue51837(; openquotechar::Char, newlinechar::Char)
+    ncodeunits(openquotechar) == 1 || throw(ArgumentError("`openquotechar` must be a single-byte character"))
+    if !isnothing(newlinechar)
+        ncodeunits(newlinechar) > 1 && throw(ArgumentError("`newlinechar` must be a single-byte character."))
+    end
+    return nothing
+end
+@test Base.infer_effects() do openquotechar::Char, newlinechar::Char
+    issue51837(; openquotechar, newlinechar)
+end |> !Core.Compiler.is_nothrow
+@test_throws ArgumentError issue51837(; openquotechar='α', newlinechar='\n')
+
+# idempotency of effects derived by post-opt analysis
+callgetfield(x, f) = getfield(x, f, Base.@_boundscheck)
+@test Base.infer_effects(callgetfield, (Some{Any},Symbol)).noub === Core.Compiler.NOUB_IF_NOINBOUNDS
+callgetfield1(x, f) = getfield(x, f, Base.@_boundscheck)
+callgetfield_simple(x, f) = callgetfield1(x, f)
+@test Base.infer_effects(callgetfield_simple, (Some{Any},Symbol)).noub ===
+      Base.infer_effects(callgetfield_simple, (Some{Any},Symbol)).noub ===
+      Core.Compiler.ALWAYS_TRUE
+callgetfield2(x, f) = getfield(x, f, Base.@_boundscheck)
+callgetfield_inbounds(x, f) = @inbounds callgetfield2(x, f)
+@test Base.infer_effects(callgetfield_inbounds, (Some{Any},Symbol)).noub ===
+      Base.infer_effects(callgetfield_inbounds, (Some{Any},Symbol)).noub ===
+      Core.Compiler.ALWAYS_FALSE
+
+# noub modeling for memory ops
+let (memoryref, memoryrefget, memoryref_isassigned, memoryrefset!) =
+        (Core.memoryref, Core.memoryrefget, Core.memoryref_isassigned, Core.memoryrefset!)
+    function builtin_effects(@nospecialize xs...)
+        interp = Core.Compiler.NativeInterpreter()
+        𝕃 = Core.Compiler.typeinf_lattice(interp)
+        rt = Core.Compiler.builtin_tfunction(interp, xs..., nothing)
+        return Core.Compiler.builtin_effects(𝕃, xs..., rt)
+    end
+    @test Core.Compiler.is_noub(builtin_effects(memoryref, Any[Memory,]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Int]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Int,Core.Const(true)]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Int,Core.Const(false)]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Int,Bool]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Int,Int]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Int,Vararg{Bool}]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryref, Any[MemoryRef,Vararg{Any}]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Core.Const(true)]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Core.Const(false)]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Bool]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Int]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Symbol,Vararg{Bool}]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryrefget, Any[MemoryRef,Vararg{Any}]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Core.Const(true)]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Core.Const(false)]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Bool]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Int]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Symbol,Vararg{Bool}]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryref_isassigned, Any[MemoryRef,Vararg{Any}]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Core.Const(true)]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Core.Const(false)]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Bool]))
+    @test Core.Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Int]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Any,Symbol,Vararg{Bool}]))
+    @test !Core.Compiler.is_noub(builtin_effects(memoryrefset!, Any[MemoryRef,Vararg{Any}]))
+    # `:boundscheck` taint should be refined by post-opt analysis
+    @test Base.infer_effects() do xs::Vector{Any}, i::Int
+        memoryrefget(memoryref(getfield(xs, :ref), i, Base.@_boundscheck), :not_atomic, Base.@_boundscheck)
+    end |> Core.Compiler.is_noub_if_noinbounds
+end
+
+# high level tests
+@test Core.Compiler.is_noub_if_noinbounds(Base.infer_effects(getindex, (Vector{Int},Int)))
+@test Core.Compiler.is_noub_if_noinbounds(Base.infer_effects(getindex, (Vector{Any},Int)))
+@test Core.Compiler.is_noub_if_noinbounds(Base.infer_effects(setindex!, (Vector{Int},Int,Int)))
+@test Core.Compiler.is_noub_if_noinbounds(Base.infer_effects(setindex!, (Vector{Any},Any,Int)))
+@test Core.Compiler.is_noub_if_noinbounds(Base.infer_effects(isassigned, (Vector{Int},Int)))
+@test Core.Compiler.is_noub_if_noinbounds(Base.infer_effects(isassigned, (Vector{Any},Int)))
+@test Base.infer_effects((Vector{Int},Int)) do xs, i
+    xs[i]
+end |> Core.Compiler.is_noub
+@test Base.infer_effects((Vector{Any},Int)) do xs, i
+    xs[i]
+end |> Core.Compiler.is_noub
+@test Base.infer_effects((Vector{Int},Int,Int)) do xs, x, i
+    xs[i] = x
+end |> Core.Compiler.is_noub
+@test Base.infer_effects((Vector{Any},Any,Int)) do xs, x, i
+    xs[i] = x
+end |> Core.Compiler.is_noub
+@test Base.infer_effects((Vector{Int},Int)) do xs, i
+    @inbounds xs[i]
+end |> !Core.Compiler.is_noub
+@test Base.infer_effects((Vector{Any},Int)) do xs, i
+    @inbounds xs[i]
+end |> !Core.Compiler.is_noub
+Base.@propagate_inbounds getindex_propagate(xs, i) = xs[i]
+getindex_dont_propagate(xs, i) = xs[i]
+@test Core.Compiler.is_noub_if_noinbounds(Base.infer_effects(getindex_propagate, (Vector{Any},Int)))
+@test Core.Compiler.is_noub(Base.infer_effects(getindex_dont_propagate, (Vector{Any},Int)))
+@test Base.infer_effects((Vector{Any},Int)) do xs, i
+    @inbounds getindex_propagate(xs, i)
+end |> !Core.Compiler.is_noub
+@test Base.infer_effects((Vector{Any},Int)) do xs, i
+    @inbounds getindex_dont_propagate(xs, i)
+end |> Core.Compiler.is_noub
+
+# refine `:nothrow` when `exct` is known to be `Bottom`
+@test Base.infer_exception_type(getindex, (Vector{Int},Int)) == BoundsError
+function getindex_nothrow(xs::Vector{Int}, i::Int)
+    try
+        return xs[i]
+    catch err
+        err isa BoundsError && return nothing
+        rethrow(err)
+    end
+end
+@test Core.Compiler.is_nothrow(Base.infer_effects(getindex_nothrow, (Vector{Int}, Int)))
+
+# callsite `@assume_effects` annotation
+let ast = code_lowered((Int,)) do x
+        Base.@assume_effects :total identity(x)
+    end |> only
+    ssaflag = ast.ssaflags[findfirst(!iszero, ast.ssaflags)::Int]
+    override = Core.Compiler.decode_statement_effects_override(ssaflag)
+    # if this gets broken, check if this is synced with expr.jl
+    @test override.consistent && override.effect_free && override.nothrow &&
+          override.terminates_globally && !override.terminates_locally &&
+          override.notaskstate && override.inaccessiblememonly &&
+          override.noub && !override.noub_if_noinbounds
+end
+@test Base.infer_effects((Float64,)) do x
+    isinf(x) && return 0.0
+    return Base.@assume_effects :nothrow sin(x)
+end |> Core.Compiler.is_nothrow
+let effects = Base.infer_effects((Vector{Float64},)) do xs
+        isempty(xs) && return 0.0
+        Base.@assume_effects :nothrow begin
+            x = Base.@assume_effects :noub @inbounds xs[1]
+            isinf(x) && return 0.0
+            return sin(x)
+        end
+    end
+    # all nested overrides should be applied
+    @test Core.Compiler.is_nothrow(effects)
+    @test Core.Compiler.is_noub(effects)
+end
+@test Base.infer_effects((Int,)) do x
+    res = 1
+    0 ≤ x < 20 || error("bad fact")
+    Base.@assume_effects :terminates_locally while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end |> Core.Compiler.is_terminates
+
+# https://github.com/JuliaLang/julia/issues/52531
+const a52531 = Core.Ref(1)
+@eval getref52531() = $(QuoteNode(a52531)).x
+@test !Core.Compiler.is_consistent(Base.infer_effects(getref52531))
+let
+    global set_a52531!, get_a52531
+    _a::Int       = -1
+    set_a52531!(a::Int) = (_a = a; return get_a52531())
+    get_a52531()       = _a
+end
+@test !Core.Compiler.is_consistent(Base.infer_effects(set_a52531!, (Int,)))
+@test !Core.Compiler.is_consistent(Base.infer_effects(get_a52531, ()))
+@test get_a52531() == -1
+@test set_a52531!(1) == 1
+@test get_a52531() == 1
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index 8fc63f42ada87..c71fd3ca9b265 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -5,7 +5,7 @@ import Core.Compiler: Const, Conditional, ⊑, ReturnNode, GotoIfNot
 isdispatchelem(@nospecialize x) = !isa(x, Type) || Core.Compiler.isdispatchelem(x)
 
 using Random, Core.IR
-using InteractiveUtils: code_llvm
+using InteractiveUtils
 
 include("irutils.jl")
 
@@ -27,7 +27,6 @@ let comparison = Tuple{X, X} where X<:Tuple
     @test Core.Compiler.limit_type_size(sig, comparison, comparison, 100, 100) == Tuple{Tuple, Tuple}
     @test Core.Compiler.limit_type_size(sig, ref, comparison, 100, 100) == Tuple{Any, Any}
     @test Core.Compiler.limit_type_size(Tuple{sig}, Tuple{ref}, comparison, 100, 100) == Tuple{Tuple{Any, Any}}
-    @test Core.Compiler.limit_type_size(sig, ref, Tuple{comparison}, 100,  100) == Tuple{Tuple{X, X} where X<:Tuple, Tuple{X, X} where X<:Tuple}
     @test Core.Compiler.limit_type_size(ref, sig, Union{}, 100, 100) == ref
 end
 
@@ -51,10 +50,19 @@ let va = ccall(:jl_type_intersection_with_env, Any, (Any, Any), Tuple{Tuple}, Tu
     @test Core.Compiler.__limit_type_size(Tuple, va, Core.svec(va, Union{}), 2, 2) === Tuple
 end
 
+mutable struct TS14009{T}; end
+let A = TS14009{TS14009{TS14009{TS14009{TS14009{T}}}}} where {T},
+    B = Base.rewrap_unionall(TS14009{Base.unwrap_unionall(A)}, A)
+
+    @test Core.Compiler.Compiler.limit_type_size(B, A, A, 2, 2) == TS14009
+end
+
 # issue #42835
 @test !Core.Compiler.type_more_complex(Int, Any, Core.svec(), 1, 1, 1)
 @test !Core.Compiler.type_more_complex(Int, Type{Int}, Core.svec(), 1, 1, 1)
 @test !Core.Compiler.type_more_complex(Type{Int}, Any, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Any, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.limit_type_size(Type{Int}, Any, Union{}, 0, 0) == Type{Int}
 @test  Core.Compiler.type_more_complex(Type{Type{Int}}, Type{Int}, Core.svec(Type{Int}), 1, 1, 1)
 @test  Core.Compiler.type_more_complex(Type{Type{Int}}, Int, Core.svec(Type{Int}), 1, 1, 1)
 @test  Core.Compiler.type_more_complex(Type{Type{Int}}, Any, Core.svec(), 1, 1, 1)
@@ -65,7 +73,8 @@ end
 @test  Core.Compiler.type_more_complex(ComplexF32, Type{ComplexF32}, Core.svec(), 1, 1, 1)
 @test !Core.Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(Type{Type{ComplexF32}}), 1, 1, 1)
 @test  Core.Compiler.type_more_complex(Type{ComplexF32}, Type{Type{ComplexF32}}, Core.svec(), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Type{ComplexF32}, ComplexF32, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{ComplexF32}, ComplexF32, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.limit_type_size(Type{ComplexF32}, ComplexF32, Union{}, 1, 1) == Type{<:Complex}
 @test  Core.Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(), 1, 1, 1)
 @test  Core.Compiler.type_more_complex(Type{Type{ComplexF32}}, Type{ComplexF32}, Core.svec(Type{ComplexF32}), 1, 1, 1)
 @test  Core.Compiler.type_more_complex(Type{Type{ComplexF32}}, ComplexF32, Core.svec(ComplexF32), 1, 1, 1)
@@ -80,18 +89,76 @@ end
 
 @test !Core.Compiler.type_more_complex(Type{1}, Type{2}, Core.svec(), 1, 1, 1)
 @test  Core.Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
-@test !Core.Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 0, 1, 1)
-@test_broken Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Type{Union{Float32,Float64}}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Union{Float32,Float64}}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Union{Float32,Float64}}}, Type{Union{Float32,Float64}}, Core.svec(Type{Union{Float32,Float64}}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Type{Union{Float32,Float64}}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
 @test  Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Any, Core.svec(Union{Float32,Float64}), 1, 1, 1)
 
+# issue #49287
+@test !Core.Compiler.type_more_complex(Tuple{Vararg{Tuple{}}}, Tuple{Vararg{Tuple}}, Core.svec(), 0, 0, 0)
+@test  Core.Compiler.type_more_complex(Tuple{Vararg{Tuple}}, Tuple{Vararg{Tuple{}}}, Core.svec(), 0, 0, 0)
+
+# issue #51694
+@test Core.Compiler.type_more_complex(
+       Base.Generator{Base.Iterators.Flatten{Array{Bool, 1}}, typeof(identity)},
+       Base.Generator{Array{Bool, 1}, typeof(identity)},
+       Core.svec(), 0, 0, 0)
+@test Core.Compiler.type_more_complex(
+       Base.Generator{Base.Iterators.Flatten{Base.Generator{Array{Bool, 1}, typeof(identity)}}, typeof(identity)},
+       Base.Generator{Array{Bool, 1}, typeof(identity)},
+       Core.svec(), 0, 0, 0)
 
 let # 40336
-    t = Type{Type{Int}}
-    c = Type{Int}
+    t = Type{Type{Type{Int}}}
+    c = Type{Type{Int}}
     r = Core.Compiler.limit_type_size(t, c, c, 100, 100)
     @test t !== r && t <: r
 end
 
+@test Core.Compiler.limit_type_size(Type{Type{Type{Int}}}, Type, Union{}, 0, 0) == Type{<:Type}
+@test Core.Compiler.limit_type_size(Type{Type{Int}}, Type, Union{}, 0, 0) == Type{<:Type}
+@test Core.Compiler.limit_type_size(Type{Int}, Type, Union{}, 0, 0) == Type{Int}
+@test Core.Compiler.limit_type_size(Type{<:Int}, Type, Union{}, 0, 0) == Type{<:Int}
+@test Core.Compiler.limit_type_size(Type{ComplexF32}, ComplexF32, Union{}, 0, 0) == Type{<:Complex} # added nesting
+@test Core.Compiler.limit_type_size(Type{ComplexF32}, Type{ComplexF64}, Union{}, 0, 0) == Type{ComplexF32} # base matches
+@test Core.Compiler.limit_type_size(Type{ComplexF32}, Type, Union{}, 0, 0) == Type{<:Complex}
+@test_broken  Core.Compiler.limit_type_size(Type{<:ComplexF64}, Type, Union{}, 0, 0) == Type{<:Complex}
+@test Core.Compiler.limit_type_size(Type{<:ComplexF64}, Type, Union{}, 0, 0) == Type #50692
+@test Core.Compiler.limit_type_size(Type{Union{ComplexF32,ComplexF64}}, Type, Union{}, 0, 0) == Type
+@test_broken Core.Compiler.limit_type_size(Type{Union{ComplexF32,ComplexF64}}, Type, Union{}, 0, 0) == Type{<:Complex} #50692
+@test Core.Compiler.limit_type_size(Type{Union{Float32,Float64}}, Type, Union{}, 0, 0) == Type
+@test Core.Compiler.limit_type_size(Type{Union{Int,Type{Int}}}, Type{Type{Int}}, Union{}, 0, 0) == Type
+@test Core.Compiler.limit_type_size(Type{Union{Int,Type{Int}}}, Union{Type{Int},Type{Type{Int}}}, Union{}, 0, 0) == Type
+@test Core.Compiler.limit_type_size(Type{Union{Int,Type{Int}}}, Type{Union{Type{Int},Type{Type{Int}}}}, Union{}, 0, 0) == Type{Union{Int, Type{Int}}}
+@test Core.Compiler.limit_type_size(Type{Union{Int,Type{Int}}}, Type{Type{Int}}, Union{}, 0, 0) == Type
+
+
+@test Core.Compiler.limit_type_size(Type{Any}, Union{}, Union{}, 0, 0) ==
+      Core.Compiler.limit_type_size(Type{Any}, Any, Union{}, 0, 0) ==
+      Core.Compiler.limit_type_size(Type{Any}, Type, Union{}, 0, 0) ==
+      Type{Any}
+
+# issue #43296
+struct C43296{t,I} end
+r43296(b) = r43296(typeof(b))
+r43296(::Type) = nothing
+r43296(::Nothing) = nonexistent
+r43296(::Type{C43296{c,d}}) where {c,d} = f43296(r43296(c), e)
+f43296(::Nothing, :) = nothing
+f43296(g, :) = h
+k43296(b, j, :) = l
+k43296(b, j, ::Nothing) = b
+i43296(b, j) = k43296(b, j, r43296(j))
+@test only(Base.return_types(i43296, (Int, C43296{C43296{C43296{Val, Tuple}}}))) <: Int
+@test only(Base.return_types(i43296, (Int, C43296{C43296{C43296{Val, <:Tuple}}}))) <: Int
+
+abstract type e43296{a, j} <: AbstractArray{a, j} end
+abstract type b43296{a, j, c, d} <: e43296{a, j} end
+struct h43296{a, j, f, d, i} <: b43296{a, j, f, d} end
+Base.ndims(::Type{f}) where {f<:e43296} = ndims(supertype(f))
+Base.ndims(g::e43296) = ndims(typeof(g))
+@test only(Base.return_types(ndims, (h43296{Any, 0, Any, Int, Any},))) == Int
+
 @test Core.Compiler.unionlen(Union{}) == 1
 @test Core.Compiler.unionlen(Int8) == 1
 @test Core.Compiler.unionlen(Union{Int8, Int16}) == 2
@@ -107,15 +174,15 @@ end
 @test Core.Compiler.unioncomplexity(Tuple{Union{Int8, Int16, Int32, Int64}}) == 3
 @test Core.Compiler.unioncomplexity(Union{Int8, Int16, Int32, T} where T) == 3
 @test Core.Compiler.unioncomplexity(Tuple{Val{T}, Union{Int8, Int16}, Int8} where T<:Union{Int8, Int16, Int32, Int64}) == 3
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Tuple{Union{Int8, Int16}}}}) == 1
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Symbol}}) == 0
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}) == 1
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}}}}) == 2
-@test Core.Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}}}}}}}) == 3
+@test Core.Compiler.unioncomplexity(Tuple{Vararg{Tuple{Union{Int8, Int16}}}}) == 2
+@test Core.Compiler.unioncomplexity(Tuple{Vararg{Symbol}}) == 1
+@test Core.Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}) == 3
+@test Core.Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}}}}) == 5
+@test Core.Compiler.unioncomplexity(Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple{Vararg{Symbol}}}}}}}}}}}) == 7
 
 
 # PR 22120
-function tmerge_test(a, b, r, commutative=true)
+function tuplemerge_test(a, b, r, commutative=true)
     @test r == Core.Compiler.tuplemerge(a, b)
     if commutative
         @test r == Core.Compiler.tuplemerge(b, a)
@@ -123,32 +190,32 @@ function tmerge_test(a, b, r, commutative=true)
         @test_broken r == Core.Compiler.tuplemerge(b, a)
     end
 end
-tmerge_test(Tuple{Int}, Tuple{String}, Tuple{Union{Int, String}})
-tmerge_test(Tuple{Int}, Tuple{String, String}, Tuple)
-tmerge_test(Tuple{Vararg{Int}}, Tuple{String}, Tuple)
-tmerge_test(Tuple{Int}, Tuple{Int, Int},
+tuplemerge_test(Tuple{Int}, Tuple{String}, Tuple{Union{Int, String}})
+tuplemerge_test(Tuple{Int}, Tuple{String, String}, Tuple)
+tuplemerge_test(Tuple{Vararg{Int}}, Tuple{String}, Tuple)
+tuplemerge_test(Tuple{Int}, Tuple{Int, Int},
     Tuple{Vararg{Int}})
-tmerge_test(Tuple{Integer}, Tuple{Int, Int},
+tuplemerge_test(Tuple{Integer}, Tuple{Int, Int},
     Tuple{Vararg{Integer}})
-tmerge_test(Tuple{}, Tuple{Int, Int},
+tuplemerge_test(Tuple{}, Tuple{Int, Int},
     Tuple{Vararg{Int}})
-tmerge_test(Tuple{}, Tuple{Complex},
+tuplemerge_test(Tuple{}, Tuple{Complex},
     Tuple{Vararg{Complex}})
-tmerge_test(Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF64},
+tuplemerge_test(Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF64},
     Tuple{Vararg{Complex}})
-tmerge_test(Tuple{Vararg{ComplexF32}}, Tuple{Vararg{ComplexF64}},
+tuplemerge_test(Tuple{Vararg{ComplexF32}}, Tuple{Vararg{ComplexF64}},
     Tuple{Vararg{Complex}})
-tmerge_test(Tuple{}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Union{ComplexF32, ComplexF64}}})
-tmerge_test(Tuple{ComplexF32}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{ComplexF32}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Union{ComplexF32, ComplexF64}}})
-tmerge_test(Tuple{ComplexF32, ComplexF32, ComplexF32}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{ComplexF32, ComplexF32, ComplexF32}, Tuple{ComplexF32, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Union{ComplexF32, ComplexF64}}})
-tmerge_test(Tuple{}, Tuple{Union{ComplexF64, ComplexF32}, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{}, Tuple{Union{ComplexF64, ComplexF32}, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Union{ComplexF32, ComplexF64}}})
-tmerge_test(Tuple{ComplexF64, ComplexF64, ComplexF32}, Tuple{Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{ComplexF64, ComplexF64, ComplexF32}, Tuple{Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Complex}}, false)
-tmerge_test(Tuple{}, Tuple{Complex, Vararg{Union{ComplexF32, ComplexF64}}},
+tuplemerge_test(Tuple{}, Tuple{Complex, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Complex}})
 @test Core.Compiler.tmerge(Tuple{}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
     Union{Nothing, Tuple{}, Tuple{ComplexF32, ComplexF32}}
@@ -163,7 +230,32 @@ tmerge_test(Tuple{}, Tuple{Complex, Vararg{Union{ComplexF32, ComplexF64}}},
 @test Core.Compiler.tmerge(Vector{Int}, Core.Compiler.tmerge(Vector{String}, Union{Vector{Bool}, Vector{Symbol}})) == Vector
 @test Core.Compiler.tmerge(Base.BitIntegerType, Union{}) === Base.BitIntegerType
 @test Core.Compiler.tmerge(Union{}, Base.BitIntegerType) === Base.BitIntegerType
-@test Core.Compiler.tmerge(Core.Compiler.InterConditional(1, Int, Union{}), Core.Compiler.InterConditional(2, String, Union{})) === Core.Compiler.Const(true)
+@test Core.Compiler.tmerge(Core.Compiler.fallback_ipo_lattice, Core.Compiler.InterConditional(1, Int, Union{}), Core.Compiler.InterConditional(2, String, Union{})) === Core.Compiler.Const(true)
+# test issue behind https://github.com/JuliaLang/julia/issues/50458
+@test Core.Compiler.tmerge(Nothing, Tuple{Base.BitInteger, Int}) == Union{Nothing, Tuple{Base.BitInteger, Int}}
+@test Core.Compiler.tmerge(Union{Nothing, Tuple{Int, Int}}, Tuple{Base.BitInteger, Int}) == Union{Nothing, Tuple{Any, Int}}
+@test Core.Compiler.tmerge(Nothing, Tuple{Union{Char, String, SubString{String}, Symbol}, Int}) == Union{Nothing, Tuple{Union{Char, String, SubString{String}, Symbol}, Int}}
+@test Core.Compiler.tmerge(Union{Nothing, Tuple{Char, Int}}, Tuple{Union{Char, String, SubString{String}, Symbol}, Int}) == Union{Nothing, Tuple{Union{Char, String, SubString{String}, Symbol}, Int}}
+@test Core.Compiler.tmerge(Nothing, Tuple{Integer, Int}) == Union{Nothing, Tuple{Integer, Int}}
+@test Core.Compiler.tmerge(Union{Nothing, Tuple{Int, Int}}, Tuple{Integer, Int}) == Union{Nothing, Tuple{Integer, Int}}
+@test Core.Compiler.tmerge(Union{Nothing, AbstractVector{Int}}, Vector) == Union{Nothing, AbstractVector}
+@test Core.Compiler.tmerge(Union{Nothing, AbstractVector{Int}}, Matrix) == Union{Nothing, AbstractArray}
+@test Core.Compiler.tmerge(Union{Nothing, AbstractVector{Int}}, Matrix{Int}) == Union{Nothing, AbstractArray{Int}}
+@test Core.Compiler.tmerge(Union{Nothing, AbstractVector{Int}}, Array) == Union{Nothing, AbstractArray}
+@test Core.Compiler.tmerge(Union{Nothing, AbstractArray{Int}}, Vector) == Union{Nothing, AbstractArray}
+@test Core.Compiler.tmerge(Union{Nothing, AbstractVector}, Matrix{Int}) == Union{Nothing, AbstractArray}
+@test Core.Compiler.tmerge(Union{Nothing, AbstractFloat}, Integer) == Union{Nothing, AbstractFloat, Integer}
+
+# test that recursively more complicated types don't widen all the way to Any when there is a useful valid type upper bound
+# Specifically test with base types of a trivial type, a simple union, a complicated union, and a tuple.
+for T in (Nothing, Base.BitInteger, Union{Int, Int32, Int16, Int8}, Tuple{Int, Int})
+    Ta, Tb = T, T
+    for i in 1:10
+        Ta = Union{Tuple{Ta}, Nothing}
+        Tb = Core.Compiler.tmerge(Tuple{Tb}, Nothing)
+        @test Ta <: Tb <: Union{Nothing, Tuple}
+    end
+end
 
 struct SomethingBits
     x::Base.BitIntegerType
@@ -273,9 +365,9 @@ barTuple2() = fooTuple{tuple(:y)}()
 @test Base.return_types(barTuple1,Tuple{})[1] == Base.return_types(barTuple2,Tuple{})[1] == fooTuple{(:y,)}
 
 # issue #6050
-@test Core.Compiler.getfield_tfunc(
+@test Core.Compiler.getfield_tfunc(Core.Compiler.fallback_lattice,
           Dict{Int64,Tuple{UnitRange{Int64},UnitRange{Int64}}},
-          Core.Compiler.Const(:vals)) == Array{Tuple{UnitRange{Int64},UnitRange{Int64}},1}
+          Core.Compiler.Const(:vals)) == Memory{Tuple{UnitRange{Int64},UnitRange{Int64}}}
 
 # assert robustness of `getfield_tfunc`
 struct GetfieldRobustness
@@ -335,8 +427,7 @@ code_llvm(devnull, f14009, (Int,))
 mutable struct B14009{T}; end
 g14009(a) = g14009(B14009{a})
 code_typed(g14009, (Type{Int},))
-code_llvm(devnull, f14009, (Int,))
-
+code_llvm(devnull, g14009, (Type{Int},))
 
 # issue #9232
 arithtype9232(::Type{T},::Type{T}) where {T<:Real} = arithtype9232(T)
@@ -353,7 +444,7 @@ code_llvm(devnull, invoke_g10878, ())
 
 
 # issue #10930
-@test isa(code_typed(promote,(Any,Any,Vararg{Any})), Array)
+@test isa(Base.return_types(promote, (Any,Any,Vararg{Any})), Vector)
 find_tvar10930(sig::Type{T}) where {T<:Tuple} = 1
 function find_tvar10930(arg)
     if isa(arg, Type) && arg<:Tuple
@@ -379,7 +470,7 @@ struct A15259
     x
     y
 end
-# check that allocation was ellided
+# check that allocation was elided
 @eval f15259(x,y) = (a = $(Expr(:new, :A15259, :x, :y)); (a.x, a.y, getfield(a,1), getfield(a, 2)))
 @test isempty(filter(x -> isa(x,Expr) && x.head === :(=) &&
                           isa(x.args[2], Expr) && x.args[2].head === :new,
@@ -406,7 +497,7 @@ f11366(x::Type{Ref{T}}) where {T} = Ref{x}
 
 
 let f(T) = Type{T}
-    @test Base.return_types(f, Tuple{Type{Int}}) == [Type{Type{Int}}]
+    @test Base.return_types(f, Tuple{Type{Int}}) == Any[Type{Type{Int}}]
 end
 
 # issue #9222
@@ -429,7 +520,7 @@ function foo9222()
 end
 @test 0.0 == foo9222()
 
-# branching based on inferrable conditions
+# branching based on inferable conditions
 let f(x) = isa(x,Int) ? 1 : ""
     @test Base.return_types(f, Tuple{Int}) == [Int]
 end
@@ -557,27 +648,6 @@ f18450() = ifelse(true, Tuple{Vararg{Int}}, Tuple{Vararg})
 # issue #18569
 @test !Core.Compiler.isconstType(Type{Tuple})
 
-# ensure pure attribute applies correctly to all signatures of fpure
-Base.@pure function fpure(a=rand(); b=rand())
-    # use the `rand` function since it is known to be `@inline`
-    # but would be too big to inline
-    return a + b + rand()
-end
-gpure() = fpure()
-gpure(x::Irrational) = fpure(x)
-@test which(fpure, ()).pure
-@test which(fpure, (typeof(pi),)).pure
-@test !which(gpure, ()).pure
-@test !which(gpure, (typeof(pi),)).pure
-@test code_typed(gpure, ())[1][1].pure
-@test code_typed(gpure, (typeof(π),))[1][1].pure
-@test gpure() == gpure() == gpure()
-@test gpure(π) == gpure(π) == gpure(π)
-
-# Make sure @pure works for functions using the new syntax
-Base.@pure (fpure2(x::T) where T) = T
-@test which(fpure2, (Int64,)).pure
-
 # issue #10880
 function cat10880(a, b)
     Tuple{a.parameters..., b.parameters...}
@@ -597,7 +667,6 @@ function is_typed_expr(e::Expr)
 end
 is_typed_expr(@nospecialize other) = false
 test_inferred_static(@nospecialize(other)) = true
-test_inferred_static(slot::TypedSlot) = @test isdispatchelem(slot.typ)
 function test_inferred_static(expr::Expr)
     for a in expr.args
         test_inferred_static(a)
@@ -654,25 +723,15 @@ for (codetype, all_ssa) in Any[
         (code_typed(h18679, ())[1], true),
         (code_typed(g19348, (typeof((1, 2.0)),))[1], true)]
     code = codetype[1]
-    local notconst(@nospecialize(other)) = true
-    notconst(slot::TypedSlot) = @test isa(slot.typ, Type)
-    function notconst(expr::Expr)
-        for a in expr.args
-            notconst(a)
-        end
-    end
     local i
-    for i = 1:length(code.code)
-        e = code.code[i]
-        notconst(e)
+    for i = 1:length(code.ssavaluetypes)
         typ = code.ssavaluetypes[i]
-        typ isa Core.Compiler.MaybeUndef && (typ = typ.typ)
         @test isa(typ, Type) || isa(typ, Const) || isa(typ, Conditional) || typ
     end
     test_inferred_static(codetype, all_ssa)
 end
 @test f18679() === ()
-@test_throws UndefVarError(:any_undef_global) g18679()
+@test_throws UndefVarError(:any_undef_global, @__MODULE__) g18679()
 @test h18679() === nothing
 
 
@@ -707,6 +766,7 @@ end
 # inference of `fieldtype`
 mutable struct UndefField__
     x::Union{}
+    UndefField__() = new()
 end
 f_infer_undef_field() = fieldtype(UndefField__, :x)
 @test Base.return_types(f_infer_undef_field, ()) == Any[Type{Union{}}]
@@ -717,8 +777,10 @@ mutable struct HasAbstractlyTypedField
 end
 f_infer_abstract_fieldtype() = fieldtype(HasAbstractlyTypedField, :x)
 @test Base.return_types(f_infer_abstract_fieldtype, ()) == Any[Type{Union{Int,String}}]
-let fieldtype_tfunc = Core.Compiler.fieldtype_tfunc,
-    fieldtype_nothrow = Core.Compiler.fieldtype_nothrow
+let fieldtype_tfunc(@nospecialize args...) =
+        Core.Compiler.fieldtype_tfunc(Core.Compiler.fallback_lattice, args...),
+    fieldtype_nothrow(@nospecialize(s0), @nospecialize(name)) = Core.Compiler.fieldtype_nothrow(
+        Core.Compiler.SimpleInferenceLattice.instance, s0, name)
     @test fieldtype_tfunc(Union{}, :x) == Union{}
     @test fieldtype_tfunc(Union{Type{Int32}, Int32}, Const(:x)) == Union{}
     @test fieldtype_tfunc(Union{Type{Base.RefValue{T}}, Type{Int32}} where {T<:Array}, Const(:x)) == Type{<:Array}
@@ -898,35 +960,6 @@ end
 f20267(x::T20267{T}, y::T) where (T) = f20267(Any[1][1], x.inds)
 @test Base.return_types(f20267, (Any, Any)) == Any[Union{}]
 
-# issue #20704
-f20704(::Int) = 1
-Base.@pure b20704(@nospecialize(x)) = f20704(x)
-@test b20704(42) === 1
-@test_throws MethodError b20704(42.0)
-
-bb20704() = b20704(Any[1.0][1])
-@test_throws MethodError bb20704()
-
-v20704() = Val{b20704(Any[1.0][1])}
-@test_throws MethodError v20704()
-@test Base.return_types(v20704, ()) == Any[Type{Val{1}}]
-
-Base.@pure g20704(::Int) = 1
-h20704(@nospecialize(x)) = g20704(x)
-@test g20704(1) === 1
-@test_throws MethodError h20704(1.2)
-
-Base.@pure c20704() = (f20704(1.0); 1)
-d20704() = c20704()
-@test_throws MethodError d20704()
-
-Base.@pure function a20704(x)
-    rand()
-    42
-end
-aa20704(x) = x(nothing)
-@test code_typed(aa20704, (typeof(a20704),))[1][1].pure
-
 #issue #21065, elision of _apply_iterate when splatted expression is not effect_free
 function f21065(x,y)
     println("x=$x, y=$y")
@@ -956,7 +989,7 @@ err20033(x::Float64...) = prod(x)
 
 # Inference of constant svecs
 @eval fsvecinf() = $(QuoteNode(Core.svec(Tuple{Int,Int}, Int)))[1]
-@test Core.Compiler.return_type(fsvecinf, Tuple{}) == Type{Tuple{Int,Int}}
+@test only(Base.return_types(fsvecinf, Tuple{})) == Type{Tuple{Int,Int}}
 
 # nfields tfunc on `DataType`
 let f = ()->Val{nfields(DataType[Int][1])}
@@ -984,7 +1017,7 @@ end
 
 # issue #21410
 f21410(::V, ::Pair{V,E}) where {V, E} = E
-@test code_typed(f21410, Tuple{Ref, Pair{Ref{T},Ref{T}} where T<:Number})[1].second ==
+@test only(Base.return_types(f21410, Tuple{Ref, Pair{Ref{T},Ref{T}} where T<:Number})) ==
     Type{E} where E <: (Ref{T} where T<:Number)
 
 # issue #21369
@@ -1059,7 +1092,7 @@ end
 g21771(T) = T
 f21771(::Val{U}) where {U} = Tuple{g21771(U)}
 @test @inferred(f21771(Val{Int}())) === Tuple{Int}
-@test @inferred(f21771(Val{Union{}}())) === Tuple{Union{}}
+@test_throws ErrorException @inferred(f21771(Val{Union{}}()))
 @test @inferred(f21771(Val{Integer}())) === Tuple{Integer}
 
 # PR #28284, check that constants propagate through calls to new
@@ -1114,12 +1147,6 @@ let f(x) = isdefined(x, :NonExistentField) ? 1 : ""
     @test Base.return_types(f, (ComplexF32,)) == Any[String]
     @test Union{Int,String} <: Base.return_types(f, (AbstractArray,))[1]
 end
-import Core.Compiler: isdefined_tfunc
-@test isdefined_tfunc(ComplexF32, Const(())) === Union{}
-@test isdefined_tfunc(ComplexF32, Const(1)) === Const(true)
-@test isdefined_tfunc(ComplexF32, Const(2)) === Const(true)
-@test isdefined_tfunc(ComplexF32, Const(3)) === Const(false)
-@test isdefined_tfunc(ComplexF32, Const(0)) === Const(false)
 mutable struct SometimesDefined
     x
     function SometimesDefined()
@@ -1130,36 +1157,62 @@ mutable struct SometimesDefined
         return v
     end
 end
-@test isdefined_tfunc(SometimesDefined, Const(:x)) == Bool
-@test isdefined_tfunc(SometimesDefined, Const(:y)) === Const(false)
-@test isdefined_tfunc(Const(Base), Const(:length)) === Const(true)
-@test isdefined_tfunc(Const(Base), Symbol) == Bool
-@test isdefined_tfunc(Const(Base), Const(:NotCurrentlyDefinedButWhoKnows)) == Bool
-@test isdefined_tfunc(Core.SimpleVector, Const(1)) === Const(false)
-@test Const(false) ⊑ isdefined_tfunc(Const(:x), Symbol)
-@test Const(false) ⊑ isdefined_tfunc(Const(:x), Const(:y))
-@test isdefined_tfunc(Vector{Int}, Const(1)) == Const(false)
-@test isdefined_tfunc(Vector{Any}, Const(1)) == Const(false)
-@test isdefined_tfunc(Module, Int) === Union{}
-@test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(0)) === Const(false)
-@test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(1)) === Const(true)
-@test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(2)) === Bool
-@test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(3)) === Bool
-@testset "isdefined check for `NamedTuple`s" begin
-    # concrete `NamedTuple`s
-    @test isdefined_tfunc(NamedTuple{(:x,:y),Tuple{Int,Int}}, Const(:x)) === Const(true)
-    @test isdefined_tfunc(NamedTuple{(:x,:y),Tuple{Int,Int}}, Const(:y)) === Const(true)
-    @test isdefined_tfunc(NamedTuple{(:x,:y),Tuple{Int,Int}}, Const(:z)) === Const(false)
-    # non-concrete `NamedTuple`s
-    @test isdefined_tfunc(NamedTuple{(:x,:y),<:Tuple{Int,Any}}, Const(:x)) === Const(true)
-    @test isdefined_tfunc(NamedTuple{(:x,:y),<:Tuple{Int,Any}}, Const(:y)) === Const(true)
-    @test isdefined_tfunc(NamedTuple{(:x,:y),<:Tuple{Int,Any}}, Const(:z)) === Const(false)
-end
 struct UnionIsdefinedA; x; end
 struct UnionIsdefinedB; x; end
-@test isdefined_tfunc(Union{UnionIsdefinedA,UnionIsdefinedB}, Const(:x)) === Const(true)
-@test isdefined_tfunc(Union{UnionIsdefinedA,UnionIsdefinedB}, Const(:y)) === Const(false)
-@test isdefined_tfunc(Union{UnionIsdefinedA,Nothing}, Const(:x)) === Bool
+let isdefined_tfunc(@nospecialize xs...) =
+        Core.Compiler.isdefined_tfunc(Core.Compiler.fallback_lattice, xs...)
+    @test isdefined_tfunc(typeof(NamedTuple()), Const(0)) === Const(false)
+    @test isdefined_tfunc(typeof(NamedTuple()), Const(1)) === Const(false)
+    @test isdefined_tfunc(typeof((a=1,b=2)), Const(:a)) === Const(true)
+    @test isdefined_tfunc(typeof((a=1,b=2)), Const(:b)) === Const(true)
+    @test isdefined_tfunc(typeof((a=1,b=2)), Const(:c)) === Const(false)
+    @test isdefined_tfunc(typeof((a=1,b=2)), Const(0)) === Const(false)
+    @test isdefined_tfunc(typeof((a=1,b=2)), Const(1)) === Const(true)
+    @test isdefined_tfunc(typeof((a=1,b=2)), Const(2)) === Const(true)
+    @test isdefined_tfunc(typeof((a=1,b=2)), Const(3)) === Const(false)
+    @test isdefined_tfunc(NamedTuple, Const(1)) == Bool
+    @test isdefined_tfunc(NamedTuple, Symbol) == Bool
+    @test Const(false) ⊑ isdefined_tfunc(NamedTuple{(:x,:y)}, Const(:z))
+    @test Const(true) ⊑ isdefined_tfunc(NamedTuple{(:x,:y)}, Const(1))
+    @test Const(false) ⊑ isdefined_tfunc(NamedTuple{(:x,:y)}, Const(3))
+    @test Const(true) ⊑ isdefined_tfunc(NamedTuple{(:x,:y)}, Const(:y))
+
+    @test isdefined_tfunc(ComplexF32, Const(())) === Union{}
+    @test isdefined_tfunc(ComplexF32, Const(1)) === Const(true)
+    @test isdefined_tfunc(ComplexF32, Const(2)) === Const(true)
+    @test isdefined_tfunc(ComplexF32, Const(3)) === Const(false)
+    @test isdefined_tfunc(ComplexF32, Const(0)) === Const(false)
+    @test isdefined_tfunc(SometimesDefined, Const(:x)) == Bool
+    @test isdefined_tfunc(SometimesDefined, Const(:y)) === Const(false)
+    @test isdefined_tfunc(Const(Base), Const(:length)) === Const(true)
+    @test isdefined_tfunc(Const(Base), Symbol) == Bool
+    @test isdefined_tfunc(Const(Base), Const(:NotCurrentlyDefinedButWhoKnows)) == Bool
+    @test isdefined_tfunc(Core.SimpleVector, Const(1)) === Const(false)
+    @test Const(false) ⊑ isdefined_tfunc(Const(:x), Symbol)
+    @test Const(false) ⊑ isdefined_tfunc(Const(:x), Const(:y))
+    @test isdefined_tfunc(Module, Int) === Union{}
+    @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(0)) === Const(false)
+    @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(1)) === Const(true)
+    @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(2)) === Bool
+    @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(3)) === Bool
+    @testset "isdefined check for `NamedTuple`s" begin
+        # concrete `NamedTuple`s
+        @test isdefined_tfunc(NamedTuple{(:x,:y),Tuple{Int,Int}}, Const(:x)) === Const(true)
+        @test isdefined_tfunc(NamedTuple{(:x,:y),Tuple{Int,Int}}, Const(:y)) === Const(true)
+        @test isdefined_tfunc(NamedTuple{(:x,:y),Tuple{Int,Int}}, Const(:z)) === Const(false)
+        # non-concrete `NamedTuple`s
+        @test isdefined_tfunc(NamedTuple{(:x,:y),<:Tuple{Int,Any}}, Const(:x)) === Const(true)
+        @test isdefined_tfunc(NamedTuple{(:x,:y),<:Tuple{Int,Any}}, Const(:y)) === Const(true)
+        @test isdefined_tfunc(NamedTuple{(:x,:y),<:Tuple{Int,Any}}, Const(:z)) === Const(false)
+    end
+    @test isdefined_tfunc(Union{UnionIsdefinedA,UnionIsdefinedB}, Const(:x)) === Const(true)
+    @test isdefined_tfunc(Union{UnionIsdefinedA,UnionIsdefinedB}, Const(:y)) === Const(false)
+    @test isdefined_tfunc(Union{UnionIsdefinedA,Nothing}, Const(:x)) === Bool
+end
+
+# https://github.com/aviatesk/JET.jl/issues/379
+fJET379(x::Union{Complex{T}, T}) where T = isdefined(x, :im)
+@test only(Base.return_types(fJET379)) === Bool
 
 @noinline map3_22347(f, t::Tuple{}) = ()
 @noinline map3_22347(f, t::Tuple) = (f(t[1]), map3_22347(f, Base.tail(t))...)
@@ -1173,24 +1226,11 @@ let niter = 0
 end
 
 # issue #22875
-
-typeargs = Tuple{Type{Int},}
-@test Base.Core.Compiler.return_type((args...) -> one(args...), typeargs) === Int
-
-typeargs = Tuple{Type{Int},Type{Int},Type{Int},Type{Int},Type{Int},Type{Int}}
-@test Base.Core.Compiler.return_type(promote_type, typeargs) === Type{Int}
-
-# demonstrate that inference must converge
-# while doing constant propagation
-Base.@pure plus1(x) = x + 1
-f21933(x::Val{T}) where {T} = f(Val(plus1(T)))
-code_typed(f21933, (Val{1},))
-Base.return_types(f21933, (Val{1},))
-
-function count_specializations(method::Method)
-    specs = method.specializations
-    n = count(i -> isassigned(specs, i), 1:length(specs))
-    return n
+let typeargs = Tuple{Type{Int},}
+    @test only(Base.return_types((args...) -> one(args...), typeargs)) === Int
+end
+let typeargs = Tuple{Type{Int},Type{Int},Type{Int},Type{Int},Type{Int},Type{Int}}
+    @test only(Base.return_types(promote_type, typeargs)) === Type{Int}
 end
 
 # demonstrate that inference can complete without waiting for MAX_TYPE_DEPTH
@@ -1198,29 +1238,13 @@ copy_dims_out(out) = ()
 copy_dims_out(out, dim::Int, tail...) =  copy_dims_out((out..., dim), tail...)
 copy_dims_out(out, dim::Colon, tail...) = copy_dims_out((out..., dim), tail...)
 @test Base.return_types(copy_dims_out, (Tuple{}, Vararg{Union{Int,Colon}})) == Any[Tuple{}, Tuple{}, Tuple{}]
-@test all(m -> 4 < count_specializations(m) < 15, methods(copy_dims_out)) # currently about 5
+@test all(m -> 4 < length(Base.specializations(m)) < 15, methods(copy_dims_out)) # currently about 5
 
 copy_dims_pair(out) = ()
 copy_dims_pair(out, dim::Int, tail...) =  copy_dims_pair(out => dim, tail...)
 copy_dims_pair(out, dim::Colon, tail...) = copy_dims_pair(out => dim, tail...)
 @test Base.return_types(copy_dims_pair, (Tuple{}, Vararg{Union{Int,Colon}})) == Any[Tuple{}, Tuple{}, Tuple{}]
-@test all(m -> 5 < count_specializations(m) < 15, methods(copy_dims_pair)) # currently about 7
-
-@test isdefined_tfunc(typeof(NamedTuple()), Const(0)) === Const(false)
-@test isdefined_tfunc(typeof(NamedTuple()), Const(1)) === Const(false)
-@test isdefined_tfunc(typeof((a=1,b=2)), Const(:a)) === Const(true)
-@test isdefined_tfunc(typeof((a=1,b=2)), Const(:b)) === Const(true)
-@test isdefined_tfunc(typeof((a=1,b=2)), Const(:c)) === Const(false)
-@test isdefined_tfunc(typeof((a=1,b=2)), Const(0)) === Const(false)
-@test isdefined_tfunc(typeof((a=1,b=2)), Const(1)) === Const(true)
-@test isdefined_tfunc(typeof((a=1,b=2)), Const(2)) === Const(true)
-@test isdefined_tfunc(typeof((a=1,b=2)), Const(3)) === Const(false)
-@test isdefined_tfunc(NamedTuple, Const(1)) == Bool
-@test isdefined_tfunc(NamedTuple, Symbol) == Bool
-@test Const(false) ⊑ isdefined_tfunc(NamedTuple{(:x,:y)}, Const(:z))
-@test Const(true) ⊑ isdefined_tfunc(NamedTuple{(:x,:y)}, Const(1))
-@test Const(false) ⊑ isdefined_tfunc(NamedTuple{(:x,:y)}, Const(3))
-@test Const(true) ⊑ isdefined_tfunc(NamedTuple{(:x,:y)}, Const(:y))
+@test all(m -> 3 < length(Base.specializations(m)) < 15, methods(copy_dims_pair)) # currently about 5
 
 # splatting an ::Any should still allow inference to use types of parameters preceding it
 f22364(::Int, ::Any...) = 0
@@ -1234,7 +1258,7 @@ function get_linfo(@nospecialize(f), @nospecialize(t))
         throw(ArgumentError("argument is not a generic function"))
     end
     # get the MethodInstance for the method match
-    match = Base._which(Base.signature_type(f, t), Base.get_world_counter())
+    match = Base._which(Base.signature_type(f, t))
     precompile(match.spec_types)
     return Core.Compiler.specialize_method(match)
 end
@@ -1305,32 +1329,36 @@ test_const_return(()->sizeof(1), Tuple{}, sizeof(Int))
 test_const_return(()->sizeof(DataType), Tuple{}, sizeof(DataType))
 test_const_return(()->sizeof(1 < 2), Tuple{}, 1)
 test_const_return(()->fieldtype(Dict{Int64,Nothing}, :age), Tuple{}, UInt)
-test_const_return(@eval(()->Core.sizeof($(Array{Int,0}(undef)))), Tuple{}, sizeof(Int))
-test_const_return(@eval(()->Core.sizeof($(Matrix{Float32}(undef, 2, 2)))), Tuple{}, 4 * 2 * 2)
+test_const_return(@eval(()->Core.sizeof($(Array{Int,0}(undef)))), Tuple{}, 2 * sizeof(Int))
+test_const_return(@eval(()->Core.sizeof($(Matrix{Float32}(undef, 2, 2)))), Tuple{}, 4 * sizeof(Int))
+# TODO: do we want to implement these?
+# test_const_return(@eval(()->sizeof($(Array{Int,0}(undef)))), Tuple{}, sizeof(Int))
+# test_const_return(@eval(()->sizeof($(Matrix{Float32}(undef, 2, 2)))), Tuple{}, 4 * 2 * 2)
+# test_const_return(@eval(()->Core.sizeof($(Memory{Int}(undef, 0)))), Tuple{}, 0)
 
 # Make sure Core.sizeof with a ::DataType as inferred input type is inferred but not constant.
 function sizeof_typeref(typeref)
     return Core.sizeof(typeref[])
 end
 @test @inferred(sizeof_typeref(Ref{DataType}(Int))) == sizeof(Int)
-@test find_call(first(code_typed(sizeof_typeref, (Ref{DataType},))[1]), Core.sizeof, 2)
+@test find_call(only(code_typed(sizeof_typeref, (Ref{DataType},)))[1], Core.sizeof, 2)
 # Constant `Vector` can be resized and shouldn't be optimized to a constant.
 const constvec = [1, 2, 3]
 @eval function sizeof_constvec()
-    return Core.sizeof($constvec)
+    return sizeof($constvec)
 end
 @test @inferred(sizeof_constvec()) == sizeof(Int) * 3
-@test find_call(first(code_typed(sizeof_constvec, ())[1]), Core.sizeof, 2)
 push!(constvec, 10)
-@test @inferred(sizeof_constvec()) == sizeof(Int) * 4
+@test sizeof_constvec() == sizeof(Int) * 4
 
 test_const_return(x->isdefined(x, :re), Tuple{ComplexF64}, true)
 
 isdefined_f3(x) = isdefined(x, 3)
 @test @inferred(isdefined_f3(())) == false
-@test find_call(first(code_typed(isdefined_f3, Tuple{Tuple{Vararg{Int}}})[1]), isdefined, 3)
+@test find_call(only(code_typed(isdefined_f3, Tuple{Tuple{Vararg{Int}}}))[1], isdefined, 3)
 
-let isa_tfunc = Core.Compiler.isa_tfunc
+let isa_tfunc(@nospecialize xs...) =
+        Core.Compiler.isa_tfunc(Core.Compiler.fallback_lattice, xs...)
     @test isa_tfunc(Array, Const(AbstractArray)) === Const(true)
     @test isa_tfunc(Array, Type{AbstractArray}) === Const(true)
     @test isa_tfunc(Array, Type{AbstractArray{Int}}) == Bool
@@ -1369,7 +1397,8 @@ let isa_tfunc = Core.Compiler.isa_tfunc
     @test isa_tfunc(Union{Int64, Float64}, Type{AbstractArray}) === Const(false)
 end
 
-let subtype_tfunc = Core.Compiler.subtype_tfunc
+let subtype_tfunc(@nospecialize xs...) =
+        Core.Compiler.subtype_tfunc(Core.Compiler.fallback_lattice, xs...)
     @test subtype_tfunc(Type{<:Array}, Const(AbstractArray)) === Const(true)
     @test subtype_tfunc(Type{<:Array}, Type{AbstractArray}) === Const(true)
     @test subtype_tfunc(Type{<:Array}, Type{AbstractArray{Int}}) == Bool
@@ -1421,8 +1450,9 @@ end
 
 let egal_tfunc
     function egal_tfunc(a, b)
-        r = Core.Compiler.egal_tfunc(a, b)
-        @test r === Core.Compiler.egal_tfunc(b, a)
+        𝕃 = Core.Compiler.fallback_lattice
+        r = Core.Compiler.egal_tfunc(𝕃, a, b)
+        @test r === Core.Compiler.egal_tfunc(𝕃, b, a)
         return r
     end
     @test egal_tfunc(Const(12345.12345), Const(12344.12345 + 1)) == Const(true)
@@ -1490,84 +1520,135 @@ egal_conditional_lattice3(x, y) = x === y + y ? "" : 1
 @test Base.return_types(egal_conditional_lattice3, (Int64, Int64)) == Any[Union{Int, String}]
 @test Base.return_types(egal_conditional_lattice3, (Int32, Int64)) == Any[Int]
 
-using Core.Compiler: PartialStruct, nfields_tfunc, sizeof_tfunc, sizeof_nothrow
-@test sizeof_tfunc(Const(Ptr)) === sizeof_tfunc(Union{Ptr, Int, Type{Ptr{Int8}}, Type{Int}}) === Const(Sys.WORD_SIZE ÷ 8)
-@test sizeof_tfunc(Type{Ptr}) === Const(sizeof(Ptr))
-@test sizeof_nothrow(Union{Ptr, Int, Type{Ptr{Int8}}, Type{Int}})
-@test sizeof_nothrow(Const(Ptr))
-@test sizeof_nothrow(Type{Ptr})
-@test sizeof_nothrow(Type{Union{Ptr{Int}, Int}})
-@test !sizeof_nothrow(Const(Tuple))
-@test !sizeof_nothrow(Type{Vector{Int}})
-@test !sizeof_nothrow(Type{Union{Int, String}})
-@test sizeof_nothrow(String)
-@test !sizeof_nothrow(Type{String})
-@test sizeof_tfunc(Type{Union{Int64, Int32}}) == Const(Core.sizeof(Union{Int64, Int32}))
-let PT = PartialStruct(Tuple{Int64,UInt64}, Any[Const(10), UInt64])
-    @test sizeof_tfunc(PT) === Const(16)
-    @test nfields_tfunc(PT) === Const(2)
-    @test sizeof_nothrow(PT)
-end
-@test nfields_tfunc(Type) === Int
-@test nfields_tfunc(Number) === Int
-@test nfields_tfunc(Int) === Const(0)
-@test nfields_tfunc(Complex) === Const(2)
-@test nfields_tfunc(Type{Type{Int}}) === Const(nfields(DataType))
-@test nfields_tfunc(UnionAll) === Const(2)
-@test nfields_tfunc(DataType) === Const(nfields(DataType))
-@test nfields_tfunc(Type{Int}) === Const(nfields(DataType))
-@test nfields_tfunc(Type{Integer}) === Const(nfields(DataType))
-@test nfields_tfunc(Type{Complex}) === Int
-@test nfields_tfunc(typeof(Union{})) === Const(0)
-@test nfields_tfunc(Type{Union{}}) === Const(0)
-@test nfields_tfunc(Tuple{Int, Vararg{Int}}) === Int
-@test nfields_tfunc(Tuple{Int, Integer}) === Const(2)
-@test nfields_tfunc(Union{Tuple{Int, Float64}, Tuple{Int, Int}}) === Const(2)
-
-using Core.Compiler: typeof_tfunc
-@test typeof_tfunc(Tuple{Vararg{Int}}) == Type{Tuple{Vararg{Int,N}}} where N
-@test typeof_tfunc(Tuple{Any}) == Type{<:Tuple{Any}}
-@test typeof_tfunc(Type{Array}) === DataType
-@test typeof_tfunc(Type{<:Array}) === DataType
-@test typeof_tfunc(Array{Int}) == Type{Array{Int,N}} where N
-@test typeof_tfunc(AbstractArray{Int}) == Type{<:AbstractArray{Int,N}} where N
-@test typeof_tfunc(Union{<:T, <:Real} where T<:Complex) == Union{Type{Complex{T}} where T<:Real, Type{<:Real}}
+let nfields_tfunc(@nospecialize xs...) =
+        Core.Compiler.nfields_tfunc(Core.Compiler.fallback_lattice, xs...)
+    sizeof_tfunc(@nospecialize xs...) =
+        Core.Compiler.sizeof_tfunc(Core.Compiler.fallback_lattice, xs...)
+    sizeof_nothrow(@nospecialize xs...) =
+        Core.Compiler.sizeof_nothrow(xs...)
+    @test sizeof_tfunc(Const(Ptr)) === sizeof_tfunc(Union{Ptr, Int, Type{Ptr{Int8}}, Type{Int}}) === Const(Sys.WORD_SIZE ÷ 8)
+    @test sizeof_tfunc(Type{Ptr}) === Const(sizeof(Ptr))
+    @test sizeof_nothrow(Union{Ptr, Int, Type{Ptr{Int8}}, Type{Int}})
+    @test sizeof_nothrow(Const(Ptr))
+    @test sizeof_nothrow(Type{Ptr})
+    @test sizeof_nothrow(Type{Union{Ptr{Int}, Int}})
+    @test !sizeof_nothrow(Const(Tuple))
+    @test sizeof_nothrow(Type{Vector{Int}})
+    @test !sizeof_nothrow(Type{Union{Int, String}})
+    @test sizeof_nothrow(String)
+    @test !sizeof_nothrow(Type{String})
+    @test sizeof_tfunc(Type{Union{Int64, Int32}}) == Const(Core.sizeof(Union{Int64, Int32}))
+    let PT = Core.Compiler.PartialStruct(Tuple{Int64,UInt64}, Any[Const(10), UInt64])
+        @test sizeof_tfunc(PT) === Const(16)
+        @test nfields_tfunc(PT) === Const(2)
+        @test sizeof_nothrow(PT)
+    end
+    @test nfields_tfunc(Type) === Int
+    @test nfields_tfunc(Number) === Int
+    @test nfields_tfunc(Int) === Const(0)
+    @test nfields_tfunc(Complex) === Const(2)
+    @test nfields_tfunc(Type{Type{Int}}) === Const(nfields(DataType))
+    @test nfields_tfunc(UnionAll) === Const(2)
+    @test nfields_tfunc(DataType) === Const(nfields(DataType))
+    @test nfields_tfunc(Type{Int}) === Const(nfields(DataType))
+    @test nfields_tfunc(Type{Integer}) === Const(nfields(DataType))
+    @test nfields_tfunc(Type{Complex}) === Int
+    @test nfields_tfunc(typeof(Union{})) === Const(0)
+    @test nfields_tfunc(Type{Union{}}) === Const(0)
+    @test nfields_tfunc(Tuple{Int, Vararg{Int}}) === Int
+    @test nfields_tfunc(Tuple{Int, Integer}) === Const(2)
+    @test nfields_tfunc(Union{Tuple{Int, Float64}, Tuple{Int, Int}}) === Const(2)
+    @test nfields_tfunc(@NamedTuple{a::Int,b::Integer}) === Const(2)
+    @test nfields_tfunc(NamedTuple{(:a,:b),T} where T<:Tuple{Int,Integer}) === Const(2)
+    @test nfields_tfunc(NamedTuple{(:a,:b)}) === Const(2)
+    @test nfields_tfunc(NamedTuple{names,Tuple{Any,Any}} where names) === Const(2)
+    @test nfields_tfunc(Union{NamedTuple{(:a,:b)},NamedTuple{(:c,:d)}}) === Const(2)
+end
+
+let typeof_tfunc(@nospecialize xs...) =
+        Core.Compiler.typeof_tfunc(Core.Compiler.fallback_lattice, xs...)
+    @test typeof_tfunc(Tuple{Vararg{Int}}) == Type{Tuple{Vararg{Int,N}}} where N
+    @test typeof_tfunc(Tuple{Any}) == Type{<:Tuple{Any}}
+    @test typeof_tfunc(Type{Array}) === DataType
+    @test typeof_tfunc(Type{<:Array}) === DataType
+    @test typeof_tfunc(Array{Int}) == Type{Array{Int,N}} where N
+    @test typeof_tfunc(AbstractArray{Int}) == Type{<:AbstractArray{Int,N}} where N
+    @test typeof_tfunc(Union{<:T, <:Real} where T<:Complex) == Union{Type{Complex{T}} where T<:Real, Type{<:Real}}
+end
 
 f_typeof_tfunc(x) = typeof(x)
 @test Base.return_types(f_typeof_tfunc, (Union{<:T, Int} where T<:Complex,)) == Any[Union{Type{Int}, Type{Complex{T}} where T<:Real}]
 
-# arrayref / arrayset / arraysize
-import Core.Compiler: Const, arrayref_tfunc, arrayset_tfunc, arraysize_tfunc
-@test arrayref_tfunc(Const(true), Vector{Int}, Int) === Int
-@test arrayref_tfunc(Const(true), Vector{<:Integer}, Int) === Integer
-@test arrayref_tfunc(Const(true), Vector, Int) === Any
-@test arrayref_tfunc(Const(true), Vector{Int}, Int, Vararg{Int}) === Int
-@test arrayref_tfunc(Const(true), Vector{Int}, Vararg{Int}) === Int
-@test arrayref_tfunc(Const(true), Vector{Int}) === Union{}
-@test arrayref_tfunc(Const(true), String, Int) === Union{}
-@test arrayref_tfunc(Const(true), Vector{Int}, Float64) === Union{}
-@test arrayref_tfunc(Int, Vector{Int}, Int) === Union{}
-@test arrayset_tfunc(Const(true), Vector{Int}, Int, Int) === Vector{Int}
-let ua = Vector{<:Integer}
-    @test arrayset_tfunc(Const(true), ua, Int, Int) === ua
-end
-@test arrayset_tfunc(Const(true), Vector, Int, Int) === Vector
-@test arrayset_tfunc(Const(true), Any, Int, Int) === Any
-@test arrayset_tfunc(Const(true), Vector{String}, String, Int, Vararg{Int}) === Vector{String}
-@test arrayset_tfunc(Const(true), Vector{String}, String, Vararg{Int}) === Vector{String}
-@test arrayset_tfunc(Const(true), Vector{String}, String) === Union{}
-@test arrayset_tfunc(Const(true), String, Char, Int) === Union{}
-@test arrayset_tfunc(Const(true), Vector{Int}, Int, Float64) === Union{}
-@test arrayset_tfunc(Int, Vector{Int}, Int, Int) === Union{}
-@test arrayset_tfunc(Const(true), Vector{Int}, Float64, Int) === Union{}
-@test arraysize_tfunc(Vector, Int) === Int
-@test arraysize_tfunc(Vector, Float64) === Union{}
-@test arraysize_tfunc(String, Int) === Union{}
-
-let tuple_tfunc
-    function tuple_tfunc(@nospecialize xs...)
-        return Core.Compiler.tuple_tfunc(Any[xs...])
+# memoryref_tfunc, memoryrefget_tfunc, memoryrefset!_tfunc, memoryref_isassigned, memoryrefoffset_tfunc
+let memoryref_tfunc(@nospecialize xs...) = Core.Compiler.memoryref_tfunc(Core.Compiler.fallback_lattice, xs...)
+    memoryrefget_tfunc(@nospecialize xs...) = Core.Compiler.memoryrefget_tfunc(Core.Compiler.fallback_lattice, xs...)
+    memoryref_isassigned_tfunc(@nospecialize xs...) = Core.Compiler.memoryref_isassigned_tfunc(Core.Compiler.fallback_lattice, xs...)
+    memoryrefset!_tfunc(@nospecialize xs...) = Core.Compiler.memoryrefset!_tfunc(Core.Compiler.fallback_lattice, xs...)
+    memoryrefoffset_tfunc(@nospecialize xs...) = Core.Compiler.memoryrefoffset_tfunc(Core.Compiler.fallback_lattice, xs...)
+    interp = Core.Compiler.NativeInterpreter()
+    builtin_tfunction(@nospecialize xs...) = Core.Compiler.builtin_tfunction(interp, xs..., nothing)
+    @test memoryref_tfunc(Memory{Int}) == MemoryRef{Int}
+    @test memoryref_tfunc(Memory{Integer}) == MemoryRef{Integer}
+    @test memoryref_tfunc(MemoryRef{Int}, Int) == MemoryRef{Int}
+    @test memoryref_tfunc(MemoryRef{Int}, Vararg{Int}) == MemoryRef{Int}
+    @test memoryref_tfunc(MemoryRef{Int}, Int, Symbol) == Union{}
+    @test memoryref_tfunc(MemoryRef{Int}, Int, Bool) == MemoryRef{Int}
+    @test memoryref_tfunc(MemoryRef{Int}, Int, Vararg{Bool}) == MemoryRef{Int}
+    @test memoryref_tfunc(Memory{Int}, Int) == Union{}
+    @test memoryref_tfunc(Any, Any, Any) == Any # also probably could be GenericMemoryRef
+    @test memoryref_tfunc(Any, Any) == Any # also probably could be GenericMemoryRef
+    @test memoryref_tfunc(Any) == GenericMemoryRef
+    @test memoryrefget_tfunc(MemoryRef{Int}, Symbol, Bool) === Int
+    @test memoryrefget_tfunc(MemoryRef{Int}, Any, Any) === Int
+    @test memoryrefget_tfunc(MemoryRef{<:Integer}, Symbol, Bool) === Integer
+    @test memoryrefget_tfunc(GenericMemoryRef, Symbol, Bool) === Any
+    @test memoryrefget_tfunc(GenericMemoryRef{:not_atomic}, Symbol, Bool) === Any
+    @test memoryrefget_tfunc(Vector{Int}, Symbol, Bool) === Union{}
+    @test memoryrefget_tfunc(String, Symbol, Bool) === Union{}
+    @test memoryrefget_tfunc(MemoryRef{Int}, String, Bool) === Union{}
+    @test memoryrefget_tfunc(MemoryRef{Int}, Symbol, String) === Union{}
+    @test memoryrefget_tfunc(Any, Any, Any) === Any
+    @test builtin_tfunction(Core.memoryrefget, Any[MemoryRef{Int}, Vararg{Any}]) == Int
+    @test builtin_tfunction(Core.memoryrefget, Any[MemoryRef{Int}, Symbol, Bool, Vararg{Bool}]) == Int
+    @test memoryref_isassigned_tfunc(MemoryRef{Any}, Symbol, Bool) === Bool
+    @test memoryref_isassigned_tfunc(MemoryRef{Any}, Any, Any) === Bool
+    @test memoryref_isassigned_tfunc(MemoryRef{<:Integer}, Symbol, Bool) === Bool
+    @test memoryref_isassigned_tfunc(GenericMemoryRef, Symbol, Bool) === Bool
+    @test memoryref_isassigned_tfunc(GenericMemoryRef{:not_atomic}, Symbol, Bool) === Bool
+    @test memoryref_isassigned_tfunc(Vector{Int}, Symbol, Bool) === Union{}
+    @test memoryref_isassigned_tfunc(String, Symbol, Bool) === Union{}
+    @test memoryref_isassigned_tfunc(MemoryRef{Int}, String, Bool) === Union{}
+    @test memoryref_isassigned_tfunc(MemoryRef{Int}, Symbol, String) === Union{}
+    @test memoryref_isassigned_tfunc(Any, Any, Any) === Bool
+    @test builtin_tfunction(Core.memoryref_isassigned, Any[MemoryRef{Int}, Vararg{Any}]) == Bool
+    @test builtin_tfunction(Core.memoryref_isassigned, Any[MemoryRef{Int}, Symbol, Bool, Vararg{Bool}]) == Bool
+    @test memoryrefset!_tfunc(MemoryRef{Int}, Int, Symbol, Bool) === MemoryRef{Int}
+    let ua = MemoryRef{<:Integer}
+        @test memoryrefset!_tfunc(ua, Int, Symbol, Bool) === ua
     end
+    @test memoryrefset!_tfunc(GenericMemoryRef, Int, Symbol, Bool) === GenericMemoryRef
+    @test memoryrefset!_tfunc(GenericMemoryRef{:not_atomic}, Int, Symbol, Bool) === GenericMemoryRef{:not_atomic}
+    @test memoryrefset!_tfunc(Any, Int, Symbol, Bool) === Any
+    @test memoryrefset!_tfunc(MemoryRef{String}, Int, Symbol, Bool) === Union{}
+    @test memoryrefset!_tfunc(String, Char, Symbol, Bool) === Union{}
+    @test memoryrefset!_tfunc(MemoryRef{Int}, Any, Symbol, Bool) === MemoryRef{Int}
+    @test memoryrefset!_tfunc(MemoryRef{Int}, Any, Any, Any) === MemoryRef{Int}
+    @test memoryrefset!_tfunc(GenericMemoryRef{:not_atomic}, Any, Any, Any) === GenericMemoryRef{:not_atomic}
+    @test memoryrefset!_tfunc(GenericMemoryRef, Any, Any, Any) === GenericMemoryRef
+    @test memoryrefset!_tfunc(Any, Any, Any, Any) === Any # also probably could be GenericMemoryRef
+    @test builtin_tfunction(Core.memoryrefset!, Any[MemoryRef{Int}, Vararg{Any}]) == MemoryRef{Int}
+    @test builtin_tfunction(Core.memoryrefset!, Any[MemoryRef{Int}, Vararg{Symbol}]) == Union{}
+    @test builtin_tfunction(Core.memoryrefset!, Any[MemoryRef{Int}, Any, Symbol, Vararg{Bool}]) == MemoryRef{Int}
+    @test builtin_tfunction(Core.memoryrefset!, Any[MemoryRef{Int}, Any, Symbol, Bool, Vararg{Any}]) == MemoryRef{Int}
+    @test memoryrefoffset_tfunc(MemoryRef) == memoryrefoffset_tfunc(GenericMemoryRef) == Int
+    @test memoryrefoffset_tfunc(Memory) == memoryrefoffset_tfunc(GenericMemory) == Union{}
+    @test builtin_tfunction(Core.memoryrefoffset, Any[Vararg{MemoryRef}]) == Int
+    @test builtin_tfunction(Core.memoryrefoffset, Any[Vararg{Any}]) == Int
+    @test builtin_tfunction(Core.memoryrefoffset, Any[Vararg{Memory}]) == Union{}
+end
+
+let tuple_tfunc(@nospecialize xs...) =
+        Core.Compiler.tuple_tfunc(Core.Compiler.fallback_lattice, Any[xs...])
     @test Core.Compiler.widenconst(tuple_tfunc(Type{Int})) === Tuple{DataType}
     # https://github.com/JuliaLang/julia/issues/44705
     @test tuple_tfunc(Union{Type{Int32},Type{Int64}}) === Tuple{Type}
@@ -1585,6 +1666,7 @@ g23024(TT::Tuple{DataType}) = f23024(TT[1], v23024)
 @test g23024((UInt8,)) === 2
 
 @test !Core.Compiler.isconstType(Type{typeof(Union{})}) # could be Core.TypeofBottom or Type{Union{}} at runtime
+@test !isa(Core.Compiler.getfield_tfunc(Core.Compiler.fallback_lattice, Type{Core.TypeofBottom}, Core.Compiler.Const(:name)), Core.Compiler.Const)
 @test Base.return_types(supertype, (Type{typeof(Union{})},)) == Any[Any]
 
 # issue #23685
@@ -1611,7 +1693,7 @@ gg13183(x::X...) where {X} = (_false13183 ? gg13183(x, x) : 0)
 let linfo = get_linfo(Base.convert, Tuple{Type{Int64}, Int32}),
     world = UInt(23) # some small-numbered world that should be valid
     interp = Core.Compiler.NativeInterpreter()
-    opt = Core.Compiler.OptimizationState(linfo, Core.Compiler.OptimizationParams(interp), interp)
+    opt = Core.Compiler.OptimizationState(linfo, interp)
     # make sure the state of the properties look reasonable
     @test opt.src !== linfo.def.source
     @test length(opt.src.slotflags) == linfo.def.nargs <= length(opt.src.slotnames)
@@ -1622,7 +1704,7 @@ end
 
 # approximate static parameters due to unions
 let T1 = Array{Float64}, T2 = Array{_1,2} where _1
-    inference_test_copy(a::T) where {T<:Array} = ccall(:jl_array_copy, Ref{T}, (Any,), a)
+    inference_test_copy(a::T) where {T<:Array} = ccall(:array_copy_like, Ref{T}, (Any,), a)
     rt = Base.return_types(inference_test_copy, (Union{T1,T2},))[1]
     @test rt >: T1 && rt >: T2
 
@@ -1643,44 +1725,48 @@ g_test_constant() = (f_constant(3) == 3 && f_constant(4) == 4 ? true : "BAD")
 f_pure_add() = (1 + 1 == 2) ? true : "FAIL"
 @test @inferred f_pure_add()
 
-# inference of `T.mutable`
-@test Core.Compiler.getfield_tfunc(Const(Int.name), Const(:flags)) == Const(0x4)
-@test Core.Compiler.getfield_tfunc(Const(Vector{Int}.name), Const(:flags)) == Const(0x2)
-@test Core.Compiler.getfield_tfunc(Core.TypeName, Const(:flags)) == UInt8
-
-# getfield on abstract named tuples. issue #32698
-import Core.Compiler: getfield_tfunc, Const
-@test getfield_tfunc(NamedTuple{(:id, :y), T} where {T <: Tuple{Int, Union{Float64, Missing}}},
-                     Const(:y)) == Union{Missing, Float64}
-@test getfield_tfunc(NamedTuple{(:id, :y), T} where {T <: Tuple{Int, Union{Float64, Missing}}},
-                     Const(2)) == Union{Missing, Float64}
-@test getfield_tfunc(NamedTuple{(:id, :y), T} where {T <: Tuple{Int, Union{Float64, Missing}}},
-                     Symbol) == Union{Missing, Float64, Int}
-@test getfield_tfunc(NamedTuple{<:Any, T} where {T <: Tuple{Int, Union{Float64, Missing}}},
-                     Symbol) == Union{Missing, Float64, Int}
-@test getfield_tfunc(NamedTuple{<:Any, T} where {T <: Tuple{Int, Union{Float64, Missing}}},
-                     Int) == Union{Missing, Float64, Int}
-@test getfield_tfunc(NamedTuple{<:Any, T} where {T <: Tuple{Int, Union{Float64, Missing}}},
-                     Const(:x)) == Union{Missing, Float64, Int}
-
+import Core: Const
 mutable struct ARef{T}
     @atomic x::T
 end
-@test getfield_tfunc(ARef{Int},Const(:x),Symbol) === Int
-@test getfield_tfunc(ARef{Int},Const(:x),Bool) === Int
-@test getfield_tfunc(ARef{Int},Const(:x),Symbol,Bool) === Int
-@test getfield_tfunc(ARef{Int},Const(:x),Symbol,Vararg{Symbol}) === Int # `Vararg{Symbol}` might be empty
-@test getfield_tfunc(ARef{Int},Const(:x),Vararg{Symbol}) === Int
-@test getfield_tfunc(ARef{Int},Const(:x),Any,) === Int
-@test getfield_tfunc(ARef{Int},Const(:x),Any,Any) === Int
-@test getfield_tfunc(ARef{Int},Const(:x),Any,Vararg{Any}) === Int
-@test getfield_tfunc(ARef{Int},Const(:x),Vararg{Any}) === Int
-@test getfield_tfunc(ARef{Int},Const(:x),Int) === Union{}
-@test getfield_tfunc(ARef{Int},Const(:x),Bool,Symbol) === Union{}
-@test getfield_tfunc(ARef{Int},Const(:x),Symbol,Symbol) === Union{}
-@test getfield_tfunc(ARef{Int},Const(:x),Bool,Bool) === Union{}
-
-import Core.Compiler: setfield!_tfunc, setfield!_nothrow, Const
+let getfield_tfunc(@nospecialize xs...) =
+        Core.Compiler.getfield_tfunc(Core.Compiler.fallback_lattice, xs...)
+
+    # inference of `T.mutable`
+    @test getfield_tfunc(Const(Int.name), Const(:flags)) == Const(0x4)
+    @test getfield_tfunc(Const(Vector{Int}.name), Const(:flags)) == Const(0x2)
+    @test getfield_tfunc(Core.TypeName, Const(:flags)) == UInt8
+
+    # getfield on abstract named tuples. issue #32698
+    @test getfield_tfunc(NamedTuple{(:id, :y), T} where {T <: Tuple{Int, Union{Float64, Missing}}},
+                         Const(:y)) == Union{Missing, Float64}
+    @test getfield_tfunc(NamedTuple{(:id, :y), T} where {T <: Tuple{Int, Union{Float64, Missing}}},
+                         Const(2)) == Union{Missing, Float64}
+    @test getfield_tfunc(NamedTuple{(:id, :y), T} where {T <: Tuple{Int, Union{Float64, Missing}}},
+                         Symbol) == Union{Missing, Float64, Int}
+    @test getfield_tfunc(NamedTuple{<:Any, T} where {T <: Tuple{Int, Union{Float64, Missing}}},
+                         Symbol) == Union{Missing, Float64, Int}
+    @test getfield_tfunc(NamedTuple{<:Any, T} where {T <: Tuple{Int, Union{Float64, Missing}}},
+                         Int) == Union{Missing, Float64, Int}
+    @test getfield_tfunc(NamedTuple{<:Any, T} where {T <: Tuple{Int, Union{Float64, Missing}}},
+                         Const(:x)) == Union{Missing, Float64, Int}
+
+    @test getfield_tfunc(ARef{Int},Const(:x),Symbol) === Int
+    @test getfield_tfunc(ARef{Int},Const(:x),Bool) === Int
+    @test getfield_tfunc(ARef{Int},Const(:x),Symbol,Bool) === Int
+    @test getfield_tfunc(ARef{Int},Const(:x),Symbol,Vararg{Symbol}) === Int # `Vararg{Symbol}` might be empty
+    @test getfield_tfunc(ARef{Int},Const(:x),Vararg{Symbol}) === Int
+    @test getfield_tfunc(ARef{Int},Const(:x),Any,) === Int
+    @test getfield_tfunc(ARef{Int},Const(:x),Any,Any) === Int
+    @test getfield_tfunc(ARef{Int},Const(:x),Any,Vararg{Any}) === Int
+    @test getfield_tfunc(ARef{Int},Const(:x),Vararg{Any}) === Int
+    @test getfield_tfunc(ARef{Int},Const(:x),Int) === Union{}
+    @test getfield_tfunc(ARef{Int},Const(:x),Bool,Symbol) === Union{}
+    @test getfield_tfunc(ARef{Int},Const(:x),Symbol,Symbol) === Union{}
+    @test getfield_tfunc(ARef{Int},Const(:x),Bool,Bool) === Union{}
+end
+
+import Core.Compiler: Const
 mutable struct XY{X,Y}
     x::X
     y::Y
@@ -1691,101 +1777,107 @@ mutable struct ABCDconst
     c
     const d::Union{Int,Nothing}
 end
-@test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Int) === Int
-@test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Int, Symbol) === Int
-@test setfield!_tfunc(Base.RefValue{Int}, Const(1), Int) === Int
-@test setfield!_tfunc(Base.RefValue{Int}, Const(1), Int, Symbol) === Int
-@test setfield!_tfunc(Base.RefValue{Int}, Int, Int) === Int
-@test setfield!_tfunc(Base.RefValue{Any}, Const(:x), Int) === Int
-@test setfield!_tfunc(Base.RefValue{Any}, Const(:x), Int, Symbol) === Int
-@test setfield!_tfunc(Base.RefValue{Any}, Const(1), Int) === Int
-@test setfield!_tfunc(Base.RefValue{Any}, Const(1), Int, Symbol) === Int
-@test setfield!_tfunc(Base.RefValue{Any}, Int, Int) === Int
-@test setfield!_tfunc(XY{Any,Any}, Const(1), Int) === Int
-@test setfield!_tfunc(XY{Any,Any}, Const(2), Float64) === Float64
-@test setfield!_tfunc(XY{Int,Float64}, Const(1), Int) === Int
-@test setfield!_tfunc(XY{Int,Float64}, Const(2), Float64) === Float64
-@test setfield!_tfunc(ABCDconst, Const(:c), Any) === Any
-@test setfield!_tfunc(ABCDconst, Const(3), Any) === Any
-@test setfield!_tfunc(ABCDconst, Symbol, Any) === Any
-@test setfield!_tfunc(ABCDconst, Int, Any) === Any
-@test setfield!_tfunc(Union{Base.RefValue{Any},Some{Any}}, Const(:x), Int) === Int
-@test setfield!_tfunc(Union{Base.RefValue,Some{Any}}, Const(:x), Int) === Int
-@test setfield!_tfunc(Union{Base.RefValue{Any},Some{Any}}, Const(1), Int) === Int
-@test setfield!_tfunc(Union{Base.RefValue,Some{Any}}, Const(1), Int) === Int
-@test setfield!_tfunc(Union{Base.RefValue{Any},Some{Any}}, Symbol, Int) === Int
-@test setfield!_tfunc(Union{Base.RefValue,Some{Any}}, Symbol, Int) === Int
-@test setfield!_tfunc(Union{Base.RefValue{Any},Some{Any}}, Int, Int) === Int
-@test setfield!_tfunc(Union{Base.RefValue,Some{Any}}, Int, Int) === Int
-@test setfield!_tfunc(Any, Symbol, Int) === Int
-@test setfield!_tfunc(Any, Int, Int) === Int
-@test setfield!_tfunc(Any, Any, Int) === Int
-@test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Float64) === Union{}
-@test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Float64, Symbol) === Union{}
-@test setfield!_tfunc(Base.RefValue{Int}, Const(1), Float64) === Union{}
-@test setfield!_tfunc(Base.RefValue{Int}, Const(1), Float64, Symbol) === Union{}
-@test setfield!_tfunc(Base.RefValue{Int}, Int, Float64) === Union{}
-@test setfield!_tfunc(Base.RefValue{Any}, Const(:y), Int) === Union{}
-@test setfield!_tfunc(Base.RefValue{Any}, Const(:y), Int, Bool) === Union{}
-@test setfield!_tfunc(Base.RefValue{Any}, Const(2), Int) === Union{}
-@test setfield!_tfunc(Base.RefValue{Any}, Const(2), Int, Bool) === Union{}
-@test setfield!_tfunc(Base.RefValue{Any}, String, Int) === Union{}
-@test setfield!_tfunc(Some{Any}, Const(:value), Int) === Union{}
-@test setfield!_tfunc(Some, Const(:value), Int) === Union{}
-@test setfield!_tfunc(Some{Any}, Const(1), Int) === Union{}
-@test setfield!_tfunc(Some, Const(1), Int) === Union{}
-@test setfield!_tfunc(Some{Any}, Symbol, Int) === Union{}
-@test setfield!_tfunc(Some, Symbol, Int) === Union{}
-@test setfield!_tfunc(Some{Any}, Int, Int) === Union{}
-@test setfield!_tfunc(Some, Int, Int) === Union{}
-@test setfield!_tfunc(Const(@__MODULE__), Const(:v), Int) === Union{}
-@test setfield!_tfunc(Const(@__MODULE__), Int, Int) === Union{}
-@test setfield!_tfunc(Module, Const(:v), Int) === Union{}
-@test setfield!_tfunc(Union{Module,Base.RefValue{Any}}, Const(:v), Int) === Union{}
-@test setfield!_tfunc(ABCDconst, Const(:a), Any) === Union{}
-@test setfield!_tfunc(ABCDconst, Const(:b), Any) === Union{}
-@test setfield!_tfunc(ABCDconst, Const(:d), Any) === Union{}
-@test setfield!_tfunc(ABCDconst, Const(1), Any) === Union{}
-@test setfield!_tfunc(ABCDconst, Const(2), Any) === Union{}
-@test setfield!_tfunc(ABCDconst, Const(4), Any) === Union{}
-@test setfield!_nothrow(Base.RefValue{Int}, Const(:x), Int)
-@test setfield!_nothrow(Base.RefValue{Int}, Const(1), Int)
-@test setfield!_nothrow(Base.RefValue{Any}, Const(:x), Int)
-@test setfield!_nothrow(Base.RefValue{Any}, Const(1), Int)
-@test setfield!_nothrow(XY{Any,Any}, Const(:x), Int)
-@test setfield!_nothrow(XY{Any,Any}, Const(:x), Any)
-@test setfield!_nothrow(XY{Int,Float64}, Const(:x), Int)
-@test setfield!_nothrow(ABCDconst, Const(:c), Any)
-@test setfield!_nothrow(ABCDconst, Const(3), Any)
-@test !setfield!_nothrow(XY{Int,Float64}, Symbol, Any)
-@test !setfield!_nothrow(XY{Int,Float64}, Int, Any)
-@test !setfield!_nothrow(Base.RefValue{Int}, Const(:x), Any)
-@test !setfield!_nothrow(Base.RefValue{Int}, Const(1), Any)
-@test !setfield!_nothrow(Any[Base.RefValue{Any}, Const(:x), Int, Symbol])
-@test !setfield!_nothrow(Base.RefValue{Any}, Symbol, Int)
-@test !setfield!_nothrow(Base.RefValue{Any}, Int, Int)
-@test !setfield!_nothrow(XY{Int,Float64}, Const(:y), Int)
-@test !setfield!_nothrow(XY{Int,Float64}, Symbol, Int)
-@test !setfield!_nothrow(XY{Int,Float64}, Int, Int)
-@test !setfield!_nothrow(ABCDconst, Const(:a), Any)
-@test !setfield!_nothrow(ABCDconst, Const(:b), Any)
-@test !setfield!_nothrow(ABCDconst, Const(:d), Any)
-@test !setfield!_nothrow(ABCDconst, Symbol, Any)
-@test !setfield!_nothrow(ABCDconst, Const(1), Any)
-@test !setfield!_nothrow(ABCDconst, Const(2), Any)
-@test !setfield!_nothrow(ABCDconst, Const(4), Any)
-@test !setfield!_nothrow(ABCDconst, Int, Any)
-@test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Const(:x), Int)
-@test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Const(:x), Int)
-@test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Const(1), Int)
-@test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Const(1), Int)
-@test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Symbol, Int)
-@test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Symbol, Int)
-@test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Int, Int)
-@test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Int, Int)
-@test !setfield!_nothrow(Any, Symbol, Int)
-@test !setfield!_nothrow(Any, Int, Int)
-@test !setfield!_nothrow(Any, Any, Int)
+let setfield!_tfunc(@nospecialize xs...) =
+        Core.Compiler.setfield!_tfunc(Core.Compiler.fallback_lattice, xs...)
+    @test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Int) === Int
+    @test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Int, Symbol) === Int
+    @test setfield!_tfunc(Base.RefValue{Int}, Const(1), Int) === Int
+    @test setfield!_tfunc(Base.RefValue{Int}, Const(1), Int, Symbol) === Int
+    @test setfield!_tfunc(Base.RefValue{Int}, Int, Int) === Int
+    @test setfield!_tfunc(Base.RefValue{Any}, Const(:x), Int) === Int
+    @test setfield!_tfunc(Base.RefValue{Any}, Const(:x), Int, Symbol) === Int
+    @test setfield!_tfunc(Base.RefValue{Any}, Const(1), Int) === Int
+    @test setfield!_tfunc(Base.RefValue{Any}, Const(1), Int, Symbol) === Int
+    @test setfield!_tfunc(Base.RefValue{Any}, Int, Int) === Int
+    @test setfield!_tfunc(XY{Any,Any}, Const(1), Int) === Int
+    @test setfield!_tfunc(XY{Any,Any}, Const(2), Float64) === Float64
+    @test setfield!_tfunc(XY{Int,Float64}, Const(1), Int) === Int
+    @test setfield!_tfunc(XY{Int,Float64}, Const(2), Float64) === Float64
+    @test setfield!_tfunc(ABCDconst, Const(:c), Any) === Any
+    @test setfield!_tfunc(ABCDconst, Const(3), Any) === Any
+    @test setfield!_tfunc(ABCDconst, Symbol, Any) === Any
+    @test setfield!_tfunc(ABCDconst, Int, Any) === Any
+    @test setfield!_tfunc(Union{Base.RefValue{Any},Some{Any}}, Const(:x), Int) === Int
+    @test setfield!_tfunc(Union{Base.RefValue,Some{Any}}, Const(:x), Int) === Int
+    @test setfield!_tfunc(Union{Base.RefValue{Any},Some{Any}}, Const(1), Int) === Int
+    @test setfield!_tfunc(Union{Base.RefValue,Some{Any}}, Const(1), Int) === Int
+    @test setfield!_tfunc(Union{Base.RefValue{Any},Some{Any}}, Symbol, Int) === Int
+    @test setfield!_tfunc(Union{Base.RefValue,Some{Any}}, Symbol, Int) === Int
+    @test setfield!_tfunc(Union{Base.RefValue{Any},Some{Any}}, Int, Int) === Int
+    @test setfield!_tfunc(Union{Base.RefValue,Some{Any}}, Int, Int) === Int
+    @test setfield!_tfunc(Any, Symbol, Int) === Int
+    @test setfield!_tfunc(Any, Int, Int) === Int
+    @test setfield!_tfunc(Any, Any, Int) === Int
+    @test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Float64) === Union{}
+    @test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Float64, Symbol) === Union{}
+    @test setfield!_tfunc(Base.RefValue{Int}, Const(1), Float64) === Union{}
+    @test setfield!_tfunc(Base.RefValue{Int}, Const(1), Float64, Symbol) === Union{}
+    @test setfield!_tfunc(Base.RefValue{Int}, Int, Float64) === Union{}
+    @test setfield!_tfunc(Base.RefValue{Any}, Const(:y), Int) === Union{}
+    @test setfield!_tfunc(Base.RefValue{Any}, Const(:y), Int, Bool) === Union{}
+    @test setfield!_tfunc(Base.RefValue{Any}, Const(2), Int) === Union{}
+    @test setfield!_tfunc(Base.RefValue{Any}, Const(2), Int, Bool) === Union{}
+    @test setfield!_tfunc(Base.RefValue{Any}, String, Int) === Union{}
+    @test setfield!_tfunc(Some{Any}, Const(:value), Int) === Union{}
+    @test setfield!_tfunc(Some, Const(:value), Int) === Union{}
+    @test setfield!_tfunc(Some{Any}, Const(1), Int) === Union{}
+    @test setfield!_tfunc(Some, Const(1), Int) === Union{}
+    @test setfield!_tfunc(Some{Any}, Symbol, Int) === Union{}
+    @test setfield!_tfunc(Some, Symbol, Int) === Union{}
+    @test setfield!_tfunc(Some{Any}, Int, Int) === Union{}
+    @test setfield!_tfunc(Some, Int, Int) === Union{}
+    @test setfield!_tfunc(Const(@__MODULE__), Const(:v), Int) === Union{}
+    @test setfield!_tfunc(Const(@__MODULE__), Int, Int) === Union{}
+    @test setfield!_tfunc(Module, Const(:v), Int) === Union{}
+    @test setfield!_tfunc(Union{Module,Base.RefValue{Any}}, Const(:v), Int) === Union{}
+    @test setfield!_tfunc(ABCDconst, Const(:a), Any) === Union{}
+    @test setfield!_tfunc(ABCDconst, Const(:b), Any) === Union{}
+    @test setfield!_tfunc(ABCDconst, Const(:d), Any) === Union{}
+    @test setfield!_tfunc(ABCDconst, Const(1), Any) === Union{}
+    @test setfield!_tfunc(ABCDconst, Const(2), Any) === Union{}
+    @test setfield!_tfunc(ABCDconst, Const(4), Any) === Union{}
+end
+let setfield!_nothrow(@nospecialize xs...) =
+        Core.Compiler.setfield!_nothrow(Core.Compiler.SimpleInferenceLattice.instance, xs...)
+    @test setfield!_nothrow(Base.RefValue{Int}, Const(:x), Int)
+    @test setfield!_nothrow(Base.RefValue{Int}, Const(1), Int)
+    @test setfield!_nothrow(Base.RefValue{Any}, Const(:x), Int)
+    @test setfield!_nothrow(Base.RefValue{Any}, Const(1), Int)
+    @test setfield!_nothrow(XY{Any,Any}, Const(:x), Int)
+    @test setfield!_nothrow(XY{Any,Any}, Const(:x), Any)
+    @test setfield!_nothrow(XY{Int,Float64}, Const(:x), Int)
+    @test setfield!_nothrow(ABCDconst, Const(:c), Any)
+    @test setfield!_nothrow(ABCDconst, Const(3), Any)
+    @test !setfield!_nothrow(XY{Int,Float64}, Symbol, Any)
+    @test !setfield!_nothrow(XY{Int,Float64}, Int, Any)
+    @test !setfield!_nothrow(Base.RefValue{Int}, Const(:x), Any)
+    @test !setfield!_nothrow(Base.RefValue{Int}, Const(1), Any)
+    @test !setfield!_nothrow(Base.RefValue{Any}, Const(:x), Int, Symbol)
+    @test !setfield!_nothrow(Base.RefValue{Any}, Symbol, Int)
+    @test !setfield!_nothrow(Base.RefValue{Any}, Int, Int)
+    @test !setfield!_nothrow(XY{Int,Float64}, Const(:y), Int)
+    @test !setfield!_nothrow(XY{Int,Float64}, Symbol, Int)
+    @test !setfield!_nothrow(XY{Int,Float64}, Int, Int)
+    @test !setfield!_nothrow(ABCDconst, Const(:a), Any)
+    @test !setfield!_nothrow(ABCDconst, Const(:b), Any)
+    @test !setfield!_nothrow(ABCDconst, Const(:d), Any)
+    @test !setfield!_nothrow(ABCDconst, Symbol, Any)
+    @test !setfield!_nothrow(ABCDconst, Const(1), Any)
+    @test !setfield!_nothrow(ABCDconst, Const(2), Any)
+    @test !setfield!_nothrow(ABCDconst, Const(4), Any)
+    @test !setfield!_nothrow(ABCDconst, Int, Any)
+    @test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Const(:x), Int)
+    @test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Const(:x), Int)
+    @test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Const(1), Int)
+    @test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Const(1), Int)
+    @test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Symbol, Int)
+    @test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Symbol, Int)
+    @test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Int, Int)
+    @test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Int, Int)
+    @test !setfield!_nothrow(Any, Symbol, Int)
+    @test !setfield!_nothrow(Any, Int, Int)
+    @test !setfield!_nothrow(Any, Any, Int)
+end
 
 struct Foo_22708
     x::Ptr{Foo_22708}
@@ -1798,9 +1890,17 @@ bar_22708(x) = f_22708(x)
 
 @test bar_22708(1) == "x"
 
+struct EarlyGeneratedFunctionStub
+    stub::Core.GeneratedFunctionStub
+end
+(stub::EarlyGeneratedFunctionStub)(args...) = (@nospecialize; stub.stub(args...))
+
 # mechanism for spoofing work-limiting heuristics and early generator expansion (#24852)
-function _generated_stub(gen::Symbol, args::Vector{Any}, params::Vector{Any}, line, file, expand_early)
-    stub = Expr(:new, Core.GeneratedFunctionStub, gen, args, params, line, file, expand_early)
+function _generated_stub(gen::Symbol, args::Core.SimpleVector, params::Core.SimpleVector, expand_early::Bool)
+    stub = Expr(:new, Core.GeneratedFunctionStub, gen, args, params)
+    if expand_early
+        stub = Expr(:new, EarlyGeneratedFunctionStub, stub)
+    end
     return Expr(:meta, :generated, stub)
 end
 
@@ -1809,39 +1909,52 @@ f24852_kernel2(x, y::Tuple) = f24852_kernel1(x, (y,))
 f24852_kernel3(x, y::Tuple) = f24852_kernel2(x, (y,))
 f24852_kernel(x, y::Number) = f24852_kernel3(x, (y,))
 
-function f24852_kernel_cinfo(fsig::Type)
-    world = typemax(UInt) # FIXME
-    match = Base._methods_by_ftype(fsig, -1, world)[1]
-    isdefined(match.method, :source) || return (nothing, :(f(x, y)))
+function f24852_kernel_cinfo(world::UInt, source, fsig::Type)
+    matches = Base._methods_by_ftype(fsig, -1, world)
+    if matches === nothing || length(matches) != 1
+        match = nothing
+    else
+        match = matches[1]
+        if !isdefined(match.method, :source)
+            match = nothing
+        end
+    end
+    if match === nothing
+        code_info = :(f(x, y))
+        code_info = Core.GeneratedFunctionStub(identity, Core.svec(:self, :f, :x, :y), Core.svec(:X, :Y))(world, source, code_info)
+        return (nothing, code_info)
+    end
     code_info = Base.uncompressed_ir(match.method)
     Meta.partially_inline!(code_info.code, Any[], match.spec_types, Any[match.sparams...], 1, 0, :propagate)
     if startswith(String(match.method.name), "f24852")
         for a in code_info.code
-            if a isa Expr && a.head == :(=)
+            if Meta.isexpr(a, :(=))
                 a = a.args[2]
             end
-            if a isa Expr && length(a.args) === 3 && a.head === :call
+            if Meta.isexpr(a, :call) && length(a.args) === 3
                 pushfirst!(a.args, Core.SlotNumber(1))
             end
         end
     end
     pushfirst!(code_info.slotnames, Symbol("#self#"))
     pushfirst!(code_info.slotflags, 0x00)
+    # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
+    # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
     return match.method, code_info
 end
 
-function f24852_gen_cinfo_uninflated(X, Y, _, f, x, y)
-    _, code_info = f24852_kernel_cinfo(Tuple{f, x, y})
+function f24852_gen_cinfo_uninflated(world::UInt, source, X, Y, _, f, x, y)
+    _, code_info = f24852_kernel_cinfo(world, source, Tuple{f, x, y})
     return code_info
 end
 
-function f24852_gen_cinfo_inflated(X, Y, _, f, x, y)
-    method, code_info = f24852_kernel_cinfo(Tuple{f, x, y})
+function f24852_gen_cinfo_inflated(world::UInt, source, X, Y, _, f, x, y)
+    method, code_info = f24852_kernel_cinfo(world, source, Tuple{f, x, y})
     code_info.method_for_inference_limit_heuristics = method
     return code_info
 end
 
-function f24852_gen_expr(X, Y, _, f, x, y) # deparse f(x::X, y::Y) where {X, Y}
+function f24852_gen_expr(X, Y, _, f, x, y) # deparse of f(x::X, y::Y) where {X, Y}
     if f === typeof(f24852_kernel)
         f2 = :f24852_kernel3
     elseif f === typeof(f24852_kernel3)
@@ -1858,20 +1971,8 @@ end
 
 @eval begin
     function f24852_late_expr(f, x::X, y::Y) where {X, Y}
-        $(_generated_stub(:f24852_gen_expr, Any[:self, :f, :x, :y],
-                          Any[:X, :Y], @__LINE__, QuoteNode(Symbol(@__FILE__)), false))
-        $(Expr(:meta, :generated_only))
-        #= no body =#
-    end
-    function f24852_late_inflated(f, x::X, y::Y) where {X, Y}
-        $(_generated_stub(:f24852_gen_cinfo_inflated, Any[:self, :f, :x, :y],
-                          Any[:X, :Y], @__LINE__, QuoteNode(Symbol(@__FILE__)), false))
-        $(Expr(:meta, :generated_only))
-        #= no body =#
-    end
-    function f24852_late_uninflated(f, x::X, y::Y) where {X, Y}
-        $(_generated_stub(:f24852_gen_cinfo_uninflated, Any[:self, :f, :x, :y],
-                          Any[:X, :Y], @__LINE__, QuoteNode(Symbol(@__FILE__)), false))
+        $(_generated_stub(:f24852_gen_expr, Core.svec(:self, :f, :x, :y),
+                          Core.svec(:X, :Y), false))
         $(Expr(:meta, :generated_only))
         #= no body =#
     end
@@ -1879,20 +1980,18 @@ end
 
 @eval begin
     function f24852_early_expr(f, x::X, y::Y) where {X, Y}
-        $(_generated_stub(:f24852_gen_expr, Any[:self, :f, :x, :y],
-                          Any[:X, :Y], @__LINE__, QuoteNode(Symbol(@__FILE__)), true))
+        $(_generated_stub(:f24852_gen_expr, Core.svec(:self, :f, :x, :y),
+                          Core.svec(:X, :Y), true))
         $(Expr(:meta, :generated_only))
         #= no body =#
     end
     function f24852_early_inflated(f, x::X, y::Y) where {X, Y}
-        $(_generated_stub(:f24852_gen_cinfo_inflated, Any[:self, :f, :x, :y],
-                          Any[:X, :Y], @__LINE__, QuoteNode(Symbol(@__FILE__)), true))
+        $(Expr(:meta, :generated, f24852_gen_cinfo_inflated))
         $(Expr(:meta, :generated_only))
         #= no body =#
     end
     function f24852_early_uninflated(f, x::X, y::Y) where {X, Y}
-        $(_generated_stub(:f24852_gen_cinfo_uninflated, Any[:self, :f, :x, :y],
-                          Any[:X, :Y], @__LINE__, QuoteNode(Symbol(@__FILE__)), true))
+        $(Expr(:meta, :generated, f24852_gen_cinfo_uninflated))
         $(Expr(:meta, :generated_only))
         #= no body =#
     end
@@ -1903,10 +2002,6 @@ result = f24852_kernel(x, y)
 
 @test result === f24852_late_expr(f24852_kernel, x, y)
 @test Base.return_types(f24852_late_expr, typeof((f24852_kernel, x, y))) == Any[Any]
-@test result === f24852_late_uninflated(f24852_kernel, x, y)
-@test Base.return_types(f24852_late_uninflated, typeof((f24852_kernel, x, y))) == Any[Any]
-@test result === f24852_late_uninflated(f24852_kernel, x, y)
-@test Base.return_types(f24852_late_uninflated, typeof((f24852_kernel, x, y))) == Any[Any]
 
 @test result === f24852_early_expr(f24852_kernel, x, y)
 @test Base.return_types(f24852_early_expr, typeof((f24852_kernel, x, y))) == Any[Any]
@@ -1914,7 +2009,6 @@ result = f24852_kernel(x, y)
 @test Base.return_types(f24852_early_uninflated, typeof((f24852_kernel, x, y))) == Any[Any]
 @test result === @inferred f24852_early_inflated(f24852_kernel, x, y)
 @test Base.return_types(f24852_early_inflated, typeof((f24852_kernel, x, y))) == Any[Float64]
-
 # TODO: test that `expand_early = true` + inflated `method_for_inference_limit_heuristics`
 # can be used to tighten up some inference result.
 
@@ -1940,7 +2034,7 @@ function foo25261()
         next = f25261(Core.getfield(next, 2))
     end
 end
-let opt25261 = code_typed(foo25261, Tuple{}, optimize=false)[1].first.code
+let opt25261 = code_typed(foo25261, Tuple{}, optimize=true)[1].first.code
     i = 1
     # Skip to after the branch
     while !isa(opt25261[i], GotoIfNot)
@@ -1948,7 +2042,7 @@ let opt25261 = code_typed(foo25261, Tuple{}, optimize=false)[1].first.code
     end
     foundslot = false
     for expr25261 in opt25261[i:end]
-        if expr25261 isa TypedSlot && expr25261.typ === Tuple{Int, Int}
+        if expr25261 isa Core.PiNode && expr25261.typ === Tuple{Int, Int}
             # This should be the assignment to the SSAValue into the getfield
             # call - make sure it's a TypedSlot
             foundslot = true
@@ -2080,7 +2174,7 @@ end
 end
 
 # https://github.com/JuliaLang/julia/issues/42090#issuecomment-911824851
-# `PartialStruct` shoudln't wrap `Conditional`
+# `PartialStruct` shouldn't wrap `Conditional`
 let M = Module()
     @eval M begin
         struct BePartialStruct
@@ -2102,6 +2196,16 @@ let M = Module()
     @test rt == Tuple{Union{Nothing,Int},Any}
 end
 
+# make sure we never form nested `Conditional` (https://github.com/JuliaLang/julia/issues/46207)
+@test Base.return_types((Any,)) do a
+    c = isa(a, Integer)
+    42 === c ? :a : "b"
+end |> only === String
+@test Base.return_types((Any,)) do a
+    c = isa(a, Integer)
+    c === 42 ? :a : "b"
+end |> only === String
+
 @testset "conditional constraint propagation from non-`Conditional` object" begin
     @test Base.return_types((Bool,)) do b
         if b
@@ -2149,6 +2253,317 @@ end
     end |> only === Int
 end
 
+# type-based alias analysis
+# =========================
+# `MustAlias` propagates constraints imposed on aliased fields
+
+struct AliasableField{T}
+    f::T
+end
+struct AliasableFields{S,T}
+    f1::S
+    f2::T
+end
+mutable struct AliasableConstField{S,T}
+    const f1::S
+    f2::T
+end
+
+import Core.Compiler:
+    InferenceLattice, MustAliasesLattice, InterMustAliasesLattice,
+    BaseInferenceLattice, SimpleInferenceLattice, IPOResultLattice, typeinf_lattice, ipo_lattice, optimizer_lattice
+
+include("newinterp.jl")
+@newinterp MustAliasInterpreter
+let CC = Core.Compiler
+    CC.typeinf_lattice(::MustAliasInterpreter) = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance))
+    CC.ipo_lattice(::MustAliasInterpreter) = InferenceLattice(InterMustAliasesLattice(IPOResultLattice.instance))
+    CC.optimizer_lattice(::MustAliasInterpreter) = SimpleInferenceLattice.instance
+end
+
+# lattice
+# -------
+
+import Core.Compiler: MustAlias, Const, PartialStruct, ⊑, tmerge
+let 𝕃ᵢ = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance))
+    ⊑(@nospecialize(a), @nospecialize(b)) = Core.Compiler.:⊑(𝕃ᵢ, a, b)
+    tmerge(@nospecialize(a), @nospecialize(b)) = Core.Compiler.tmerge(𝕃ᵢ, a, b)
+    isa_tfunc(@nospecialize xs...) = Core.Compiler.isa_tfunc(𝕃ᵢ, xs...)
+    ifelse_tfunc(@nospecialize xs...) = Core.Compiler.ifelse_tfunc(𝕃ᵢ, xs...)
+
+    @test (MustAlias(2, AliasableField{Any}, 1, Int) ⊑ Int)
+    @test !(Int ⊑ MustAlias(2, AliasableField{Any}, 1, Int))
+    @test (Int ⊑ MustAlias(2, AliasableField{Any}, 1, Any))
+    @test (Const(42) ⊑ MustAlias(2, AliasableField{Any}, 1, Int))
+    @test !(MustAlias(2, AliasableField{Any}, 1, Any) ⊑ Int)
+    @test tmerge(MustAlias(2, AliasableField{Any}, 1, Any), Const(nothing)) === Any
+    @test tmerge(MustAlias(2, AliasableField{Any}, 1, Int), Const(nothing)) === Union{Int,Nothing}
+    @test tmerge(Const(nothing), MustAlias(2, AliasableField{Any}, 1, Any)) === Any
+    @test tmerge(Const(nothing), MustAlias(2, AliasableField{Any}, 1, Int)) === Union{Int,Nothing}
+    @test isa_tfunc(MustAlias(2, AliasableField{Any}, 1, Bool), Const(Bool)) === Const(true)
+    @test isa_tfunc(MustAlias(2, AliasableField{Any}, 1, Bool), Type{Bool}) === Const(true)
+    @test isa_tfunc(MustAlias(2, AliasableField{Any}, 1, Int), Type{Bool}) === Const(false)
+    @test ifelse_tfunc(MustAlias(2, AliasableField{Any}, 1, Bool), Int, Int) === Int
+    @test ifelse_tfunc(MustAlias(2, AliasableField{Any}, 1, Int), Int, Int) === Union{}
+end
+
+maybeget_mustalias_tmerge(x::AliasableField) = x.f
+maybeget_mustalias_tmerge(x) = x
+@test Base.return_types((Union{Nothing,AliasableField{Any}},); interp=MustAliasInterpreter()) do x
+    isa(maybeget_mustalias_tmerge(x)#=::Any, not MustAlias=#, Int) && throw()
+    x
+end |> only === Union{Nothing,AliasableField{Any}}
+
+# isa constraint
+# --------------
+
+# simple intra-procedural case
+@test Base.return_types((AliasableField,); interp=MustAliasInterpreter()) do a
+    if isa(getfield(a, :f), Int)
+        return getfield(a, :f)
+    end
+    return 0
+end |> only === Int
+@test Base.return_types((AliasableField,); interp=MustAliasInterpreter()) do a
+    if isa(getfield(a, 1), Int)
+        return getfield(a, 1)
+    end
+    return 0
+end |> only === Int
+@test Base.return_types((AliasableField{Union{Some{Int},Nothing}},); interp=MustAliasInterpreter()) do a
+    if isa(getfield(a, 1), Some)
+        return getfield(a, 1)
+    end
+    throw()
+end |> only === Some{Int}
+@test Base.return_types((Tuple{Any},); interp=MustAliasInterpreter()) do t
+    if isa(getfield(t, 1), Int)
+        return getfield(t, 1)
+    end
+    return 0
+end |> only === Int
+@test Base.return_types((Any,); interp=MustAliasInterpreter()) do a
+    x = AliasableFields(a, 0)     # x::PartialStruct(AliasableFields, Any[Any, Const(0)])
+    if isa(getfield(x, :f1), Int) # x::PartialStruct(AliasableFields, Any[Int, Const(0)])
+        return getfield(x, :f1)
+    end
+    return 0
+end |> only === Int
+@test Base.return_types((Any,Any); interp=MustAliasInterpreter()) do a, b
+    x = AliasableFields(a, b)         # x::AliasableFields
+    if isa(getfield(x, :f1), Int)     # x::PartialStruct(AliasableFields, Any[Int, Any])
+        if isa(getfield(x, :f2), Int) # x::PartialStruct(AliasableFields, Any[Int, Int])
+            return getfield(x, :f1), getfield(x, :f2)
+        end
+    end
+    return 0, 0
+end |> only === Tuple{Int,Int}
+@test Base.return_types((Any,); interp=MustAliasInterpreter()) do a
+    x = AliasableConstField(a, 0)
+    if isa(getfield(x, :f1), Int)
+        return getfield(x, :f1)
+    end
+    return 0
+end |> only === Int
+
+# shouldn't use refinement information when not worthwhile
+@test Base.return_types((AliasableField{Int},); interp=MustAliasInterpreter()) do a
+    if isa(getfield(a, :f), Any)
+        return getfield(a, :f) # shouldn't be ::Any
+    end
+    return 0
+end |> only === Int
+# shouldn't assume anything about mutable field
+@test Base.return_types((Any,Any); interp=MustAliasInterpreter()) do a, b
+    x = AliasableConstField{Any,Any}(a, b)
+    if isa(getfield(x, :f2), Int)
+        setfield!(x, :f2, z::Any)
+        return getfield(x, :f2) # shouldn't be ::Int
+    end
+    return 0
+end |> only === Any
+# when abstract type, we shouldn't assume anything
+@test Base.return_types((Any,); interp=MustAliasInterpreter()) do a
+    if isa(getfield(a, :mayexist), Int)
+        return getfield(a, :mayexist)
+    end
+    return 0
+end |> only === Any
+
+# works inter-procedurally
+@test Base.return_types((AliasableField,); interp=MustAliasInterpreter()) do a
+    if isa(a.f, Int)
+        return a.f
+    end
+    return 0
+end |> only === Int
+@test Base.return_types((Tuple{Any},); interp=MustAliasInterpreter()) do t
+    if isa(t[1], Int)
+        return t[1]
+    end
+    return 0
+end |> only === Int
+@test Base.return_types((Any,); interp=MustAliasInterpreter()) do a
+    x = AliasableFields(a, 0) # x::PartialStruct(AliasableFields, Any[Any, Const(0)])
+    if isa(x.f1, Int)         # x::PartialStruct(AliasableFields, Any[Int, Const(0)])
+        return x.f1
+    end
+    return 0
+end |> only === Int
+@test Base.return_types((Any,Any); interp=MustAliasInterpreter()) do a, b
+    x = AliasableFields(a, b) # x::AliasableFields
+    if isa(x.f1, Int)         # x::PartialStruct(AliasableFields, Any[Int, Any])
+        if isa(x.f2, Int)     # x::PartialStruct(AliasableFields, Any[Int, Int])
+            return x.f1, x.f2
+        end
+    end
+    return 0, 0
+end |> only === Tuple{Int,Int}
+@test Base.return_types((Any,); interp=MustAliasInterpreter()) do a
+    x = AliasableConstField(a, 0)
+    if isa(x.f1, Int)
+        return x.f1
+    end
+    return 0
+end |> only === Int
+getf(a) = a.f
+@test Base.return_types((AliasableField,); interp=MustAliasInterpreter()) do a
+    if isa(getf(a), Int)
+        return getf(a)
+    end
+    return 0
+end |> only === Int
+
+# merge of same `MustAlias`s
+merge_same_aliases(b, a) = b ? _merge_same_aliases1(a) : _merge_same_aliases2(a) # MustAlias(a, Const(:f1), Union{Int,Nothing})
+_merge_same_aliases1(a) = (@assert isa(a.f, Int); a.f) # ::MustAlias(a, Const(:f1), Int)
+_merge_same_aliases2(a) = (@assert isa(a.f, Nothing); a.f) # ::MustAlias(a, Const(:f1), Nothing)
+@test Base.return_types((Bool,AliasableField,); interp=MustAliasInterpreter()) do b, a
+    return merge_same_aliases(b, a) # ::Union{Int,Nothing}
+end |> only === Union{Nothing,Int}
+
+# call-site refinement
+isaint(a) = isa(a, Int)
+@test Base.return_types((AliasableField,); interp=MustAliasInterpreter()) do a
+    if isaint(a.f)
+        return a.f
+    end
+    return 0
+end |> only === Int
+# handle multiple call-site refinement targets
+isasome(_) = true
+isasome(::Nothing) = false
+@test_broken Base.return_types((AliasableField{Union{Int,Nothing}},); interp=MustAliasInterpreter()) do a
+    if isasome(a.f)
+        return a.f
+    end
+    return 0
+end |> only === Int
+
+# appropriate lattice order
+@test Base.return_types((AliasableField{Any},); interp=MustAliasInterpreter()) do x
+    v = x.f        # ::MustAlias(2, AliasableField{Any}, 1, Any)
+    if isa(v, Int) # ::Conditional(3, Int, Any)
+        v = v      # ::Int (∵ Int ⊑ MustAlias(2, AliasableField{Any}, 1, Any))
+    else
+        v = 42
+    end
+    return v
+end |> only === Int
+
+# complicated callsite refinement cases
+from_interconditional_check11(y::Int, ::AliasableField) = y > 0
+@test Base.return_types((AliasableField{Any},); interp=MustAliasInterpreter()) do x
+    if from_interconditional_check11(x.f, x)
+        return x.f
+    end
+    return 0
+end |> only === Int
+from_interconditional_check12(::AliasableField, y::Int) = y > 0
+@test Base.return_types((AliasableField{Any},); interp=MustAliasInterpreter()) do x
+    if from_interconditional_check12(x, x.f)
+        return x.f
+    end
+    return 0
+end |> only === Int
+from_interconditional_check21(y, ::Union{Int,String}) = isa(y, Int)
+@test Base.return_types((AliasableField{Any},); interp=MustAliasInterpreter()) do x
+    if from_interconditional_check21(x.f, x.f)
+        return x.f
+    end
+    return 0
+end |> only === Int
+from_interconditional_check22(::Union{Int,String}, y) = isa(y, Int)
+@test Base.return_types((AliasableField{Any},); interp=MustAliasInterpreter()) do x
+    if from_interconditional_check22(x.f, x.f)
+        return x.f
+    end
+    return 0
+end |> only === Int
+
+# prioritize constraints on slot objects
+# https://github.com/aviatesk/JET.jl/issues/509
+struct JET509
+    list::Union{Tuple{},Vector{Int}}
+end
+jet509_hasitems(list) = length(list) >= 1
+@test Base.return_types((JET509,); interp=MustAliasInterpreter()) do ilist::JET509
+    list = ilist.list
+    if jet509_hasitems(list)
+        return list
+    end
+    error("list is empty")
+end |> only == Vector{Int}
+
+# === constraint
+# --------------
+
+# simple symmetric tests
+@test Base.return_types((AliasableField,); interp=MustAliasInterpreter()) do x
+    if x.f === 0
+        return x.f
+    end
+    return 0
+end |> only === Int
+@test Base.return_types((AliasableField,); interp=MustAliasInterpreter()) do x
+    if 0 === x.f
+        return x.f
+    end
+    return 0
+end |> only === Int
+# NOTE we prioritize constraints on aliased field over those on slots themselves
+@test Base.return_types((AliasableField,Int,); interp=MustAliasInterpreter()) do x, a
+    if x.f === a
+        return x.f
+    end
+    return 0
+end |> only === Int
+@test Base.return_types((AliasableField,Int,); interp=MustAliasInterpreter()) do x, a
+    if a === x.f
+        return x.f
+    end
+    return 0
+end |> only === Int
+@test_broken Base.return_types((AliasableField{Union{Nothing,Int}},); interp=MustAliasInterpreter()) do x
+    if !isnothing(x.f)
+        return x.f
+    end
+    return 0
+end |> only === Int
+@test_broken Base.return_types((AliasableField{Union{Some{Int},Nothing}},); interp=MustAliasInterpreter()) do x
+    if !isnothing(x.f)
+        return x.f
+    end
+    throw()
+end |> only === Some{Int}
+
+# handle the edge case
+@eval intermustalias_edgecase(_) = $(Core.Compiler.InterMustAlias(2, Some{Any}, 1, Int))
+Base.return_types(intermustalias_edgecase, (Any,); interp=MustAliasInterpreter()) # create cache
+@test Base.return_types((Any,); interp=MustAliasInterpreter()) do x
+    intermustalias_edgecase(x)
+end |> only === Core.Compiler.InterMustAlias
+
 function f25579(g)
     h = g[]
     t = (h === nothing)
@@ -2175,7 +2590,7 @@ function h25579(g)
     return t ? typeof(h) : typeof(h)
 end
 @test Base.return_types(h25579, (Base.RefValue{Union{Nothing, Int}},)) ==
-        Any[Union{Type{Float64}, Type{Int}, Type{Nothing}}]
+        Any[Type{Float64}]
 
 f26172(v) = Val{length(Base.tail(ntuple(identity, v)))}() # Val(M-1)
 g26172(::Val{0}) = ()
@@ -2258,78 +2673,125 @@ function _g_ifelse_isa_()
 end
 @test Base.return_types(_g_ifelse_isa_, ()) == [Int]
 
-@testset "Conditional forwarding" begin
-    # forward `Conditional` if it conveys a constraint on any other argument
-    ifelselike(cnd, x, y) = cnd ? x : y
-
-    @test Base.return_types((Any,Int,)) do x, y
-        ifelselike(isa(x, Int), x, y)
-    end |> only == Int
+# Conditional forwarding
+# ======================
 
-    # should work nicely with union-split
-    @test Base.return_types((Union{Int,Nothing},)) do x
-        ifelselike(isa(x, Int), x, 0)
-    end |> only == Int
+# forward `Conditional` if it conveys a constraint on any other argument
+ifelselike(cnd, x, y) = cnd ? x : y
 
-    @test Base.return_types((Any,Int)) do x, y
-        ifelselike(!isa(x, Int), y, x)
-    end |> only == Int
+@test Base.return_types((Any,Int,)) do x, y
+    ifelselike(isa(x, Int), x, y)
+end |> only == Int
 
-    @test Base.return_types((Any,Int)) do x, y
-        a = ifelselike(x === 0, x, 0) # ::Const(0)
-        if a == 0
-            return y
-        else
-            return nothing # dead branch
-        end
-    end |> only == Int
+# should work nicely with union-split
+@test Base.return_types((Union{Int,Nothing},)) do x
+    ifelselike(isa(x, Int), x, 0)
+end |> only == Int
 
-    # pick up the first if there are multiple constrained arguments
-    @test Base.return_types((Any,)) do x
-        ifelselike(isa(x, Int), x, x)
-    end |> only == Any
+@test Base.return_types((Any,Int)) do x, y
+    ifelselike(!isa(x, Int), y, x)
+end |> only == Int
 
-    # just propagate multiple constraints
-    ifelselike2(cnd1, cnd2, x, y, z) = cnd1 ? x : cnd2 ? y : z
-    @test Base.return_types((Any,Any)) do x, y
-        ifelselike2(isa(x, Int), isa(y, Int), x, y, 0)
-    end |> only == Int
+@test Base.return_types((Any,Int)) do x, y
+    a = ifelselike(x === 0, x, 0) # ::Const(0)
+    if a == 0
+        return y
+    else
+        return nothing # dead branch
+    end
+end |> only == Int
 
-    # work with `invoke`
-    @test Base.return_types((Any,Any)) do x, y
-        @invoke ifelselike(isa(x, Int), x::Any, y::Int)
-    end |> only == Int
+# pick up the first if there are multiple constrained arguments
+@test Base.return_types((Any,)) do x
+    ifelselike(isa(x, Int), x, x)
+end |> only == Any
 
-    # don't be confused with vararg method
-    vacond(cnd, va...) = cnd ? va : 0
-    @test Base.return_types((Any,)) do x
-        # at runtime we will see `va::Tuple{Tuple{Int,Int}, Tuple{Int,Int}}`
-        vacond(isa(x, Tuple{Int,Int}), x, x)
-    end |> only == Union{Int,Tuple{Any,Any}}
+# just propagate multiple constraints
+ifelselike2(cnd1, cnd2, x, y, z) = cnd1 ? x : cnd2 ? y : z
+@test Base.return_types((Any,Any)) do x, y
+    ifelselike2(isa(x, Int), isa(y, Int), x, y, 0)
+end |> only == Int
 
-    # demonstrate extra constraint propagation for Base.ifelse
-    @test Base.return_types((Any,Int,)) do x, y
-        ifelse(isa(x, Int), x, y)
-    end |> only == Int
+# work with `invoke`
+@test Base.return_types((Any,Any)) do x, y
+    @invoke ifelselike(isa(x, Int), x::Any, y::Int)
+end |> only == Int
 
-    # slot as SSA
-    @test Base.return_types((Any,Vector{Any})) do x, y
-        z = x
-        ifelselike(isa(z, Int), z, length(y))
-    end |> only === Int
+# don't be confused with vararg method
+vacond(cnd, va...) = cnd ? va : 0
+@test Base.return_types((Any,)) do x
+    # at runtime we will see `va::Tuple{Tuple{Int,Int}, Tuple{Int,Int}}`
+    vacond(isa(x, Tuple{Int,Int}), x, x)
+end |> only == Union{Int,Tuple{Any,Any}}
+
+# https://github.com/JuliaLang/julia/issues/47435
+is_closed_ex(e::InvalidStateException) = true
+is_closed_ex(e) = false
+function issue47435()
+    try
+    catch e
+        println("caught $e: $(is_closed_ex(e))")
+    end
 end
+@test only(Base.return_types(issue47435)) === Nothing
+
+# demonstrate extra constraint propagation for Base.ifelse
+@test Base.return_types((Any,Int,)) do x, y
+    ifelse(isa(x, Int), x, y)
+end |> only == Int
+
+# forward conditional information imposed on SSA that is alised to a slot
+@test Base.return_types((Any,Vector{Any})) do x, y
+    z = x
+    ifelselike(isa(z, Int), z, length(y))
+end |> only === Int
 
 # Equivalence of Const(T.instance) and T for singleton types
 @test Const(nothing) ⊑ Nothing && Nothing ⊑ Const(nothing)
 
-# Don't pessimize apply_type to anything worse than Type and yield Bottom for invalid Unions
-@test Core.Compiler.return_type(Core.apply_type, Tuple{Type{Union}}) == Type{Union{}}
-@test Core.Compiler.return_type(Core.apply_type, Tuple{Type{Union},Any}) == Type
-@test Core.Compiler.return_type(Core.apply_type, Tuple{Type{Union},Any,Any}) == Type
-@test Core.Compiler.return_type(Core.apply_type, Tuple{Type{Union},Int}) == Union{}
-@test Core.Compiler.return_type(Core.apply_type, Tuple{Type{Union},Any,Int}) == Union{}
-@test Core.Compiler.return_type(Core.apply_type, Tuple{Any}) == Any
-@test Core.Compiler.return_type(Core.apply_type, Tuple{Any,Any}) == Any
+# https://github.com/JuliaLang/julia/pull/47947
+# correct `apply_type` inference of `NamedTuple{(), <:Any}`
+@test (() -> NamedTuple{(), <:Any})() isa UnionAll
+
+# Don't pessimize apply_type to anything worse than Type (or TypeVar) and yield Bottom for invalid Unions
+@test only(Base.return_types(Core.apply_type, Tuple{Type{Union}})) == Type{Union{}}
+@test only(Base.return_types(Core.apply_type, Tuple{Type{Union},Any})) == Union{Type,TypeVar}
+@test only(Base.return_types(Core.apply_type, Tuple{Type{Union},Any,Any})) == Type
+@test only(Base.return_types(Core.apply_type, Tuple{Type{Union},Int})) == Union{}
+@test only(Base.return_types(Core.apply_type, Tuple{Type{Union},Any,Int})) == Union{}
+@test only(Base.return_types(Core.apply_type, Tuple{Any})) == Any
+@test only(Base.return_types(Core.apply_type, Tuple{Any,Any})) == Any
+
+# `apply_type_tfunc` accuracy for constrained type construction
+# https://github.com/JuliaLang/julia/issues/47089
+import Core: Const
+import Core.Compiler: apply_type_tfunc
+struct Issue47089{A<:Number,B<:Number} end
+let 𝕃 = Core.Compiler.fallback_lattice
+    A = Type{<:Integer}
+    @test apply_type_tfunc(𝕃, Const(Issue47089), A, A) <: (Type{Issue47089{A,B}} where {A<:Integer, B<:Integer})
+    @test apply_type_tfunc(𝕃, Const(Issue47089), Const(Int), Const(Int), Const(Int)) === Union{}
+    @test apply_type_tfunc(𝕃, Const(Issue47089), Const(String)) === Union{}
+    @test apply_type_tfunc(𝕃, Const(Issue47089), Const(AbstractString)) === Union{}
+    @test apply_type_tfunc(𝕃, Const(Issue47089), Type{Ptr}, Type{Ptr{T}} where T) === Base.rewrap_unionall(Type{Issue47089.body.body}, Issue47089)
+    # check complexity size limiting
+    @test apply_type_tfunc(𝕃, Const(Val), Type{Pair{Pair{Pair{Pair{A,B},C},D},E}} where {A,B,C,D,E}) == Type{Val{Pair{A, B}}} where {A, B}
+    @test apply_type_tfunc(𝕃, Const(Pair), Base.rewrap_unionall(Type{Pair.body.body},Pair), Type{Pair{Pair{Pair{Pair{A,B},C},D},E}} where {A,B,C,D,E}) == Type{Pair{Pair{A, B}, Pair{C, D}}} where {A, B, C, D}
+    @test apply_type_tfunc(𝕃, Const(Val), Type{Union{Int,Pair{Pair{Pair{Pair{A,B},C},D},E}}} where {A,B,C,D,E}) == Type{Val{_A}} where _A
+end
+@test only(Base.return_types(keys, (Dict{String},))) == Base.KeySet{String, T} where T<:(Dict{String})
+@test only(Base.return_types((r)->similar(Array{typeof(r[])}, 1), (Base.RefValue{Array{Int}},))) == Vector{<:Array{Int}}
+@test only(Base.return_types((r)->similar(Array{typeof(r[])}, 1), (Base.RefValue{Array{<:Real}},))) == Vector{<:Array{<:Real}}
+# test complexity limit on apply_type on a function capturing functions returning functions
+@test only(Base.return_types(Base.afoldl, (typeof((m, n) -> () -> Returns(nothing)(m, n)), Function, Function, Vararg{Function}))) === Function
+
+let A = Tuple{A,B,C,D,E,F,G,H} where {A,B,C,D,E,F,G,H}
+    B = Core.Compiler.rename_unionall(A)
+    for i in 1:8
+        @test A.var != B.var && (i == 1 ? A == B : A != B)
+        A, B = A.body, B.body
+    end
+end
 
 # PR 27351, make sure optimized type intersection for method invalidation handles typevars
 
@@ -2488,7 +2950,7 @@ end
 # issue #27316 - inference shouldn't hang on these
 f27316(::Vector) = nothing
 f27316(::Any) = f27316(Any[][1]), f27316(Any[][1])
-let expected = NTuple{2, Union{Nothing, NTuple{2, Union{Nothing, Tuple{Any, Any}}}}}
+let expected = NTuple{2, Union{Nothing, Tuple{Any, Any}}}
     @test Tuple{Nothing, Nothing} <: only(Base.return_types(f27316, Tuple{Int})) == expected # we may be able to improve this bound in the future
 end
 function g27316()
@@ -2524,7 +2986,7 @@ Base.iterate(i::Iterator27434, ::Val{2}) = i.z, Val(3)
 Base.iterate(::Iterator27434, ::Any) = nothing
 @test @inferred(splat27434(Iterator27434(1, 2, 3))) == (1, 2, 3)
 @test @inferred((1, 2, 3) == (1, 2, 3))
-@test Core.Compiler.return_type(splat27434, Tuple{typeof(Iterators.repeated(1))}) == Union{}
+@test only(Base.return_types(splat27434, Tuple{typeof(Iterators.repeated(1))})) == Union{}
 
 # issue #32465
 let rt = Base.return_types(splat27434, (NamedTuple{(:x,), Tuple{T}} where T,))
@@ -2565,13 +3027,16 @@ end
 @test ig27907(Int, Int, 1, 0) == 0
 
 # issue #28279
+# ensure that lowering doesn't move these into statement position, which would require renumbering
+using Base: +, -
 function f28279(b::Bool)
-    i = 1
-    while i > b
-        i -= 1
+    let i = 1
+        while i > b
+            i -= 1
+        end
+        if b end
+        return i + 1
     end
-    if b end
-    return i + 1
 end
 code28279 = code_lowered(f28279, (Bool,))[1].code
 oldcode28279 = deepcopy(code28279)
@@ -2611,11 +3076,11 @@ end
 # issue #28356
 # unit test to make sure countunionsplit overflows gracefully
 # we don't care what number is returned as long as it's large
-@test Core.Compiler.unionsplitcost(Any[Union{Int32, Int64} for i=1:80]) > 100000
-@test Core.Compiler.unionsplitcost(Any[Union{Int8, Int16, Int32, Int64}]) == 2
-@test Core.Compiler.unionsplitcost(Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32, Int64}, Int8]) == 8
-@test Core.Compiler.unionsplitcost(Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32}, Int8]) == 6
-@test Core.Compiler.unionsplitcost(Any[Union{Int8, Int16, Int32}, Union{Int8, Int16, Int32, Int64}, Int8]) == 6
+@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int32, Int64} for i=1:80]) > 100000
+@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}]) == 2
+@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32, Int64}, Int8]) == 8
+@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32}, Int8]) == 6
+@test Core.Compiler.unionsplitcost(Core.Compiler.JLTypeLattice(), Any[Union{Int8, Int16, Int32}, Union{Int8, Int16, Int32, Int64}, Int8]) == 6
 
 # make sure compiler doesn't hang in union splitting
 
@@ -2801,7 +3266,7 @@ foo_inlining_apply(args...) = ccall(:jl_, Nothing, (Any,), args[1])
 bar_inlining_apply() = Core._apply_iterate(iterate, Core._apply_iterate, (iterate,), (foo_inlining_apply,), ((1,),))
 let ci = code_typed(bar_inlining_apply, Tuple{})[1].first
     @test length(ci.code) == 2
-    @test ci.code[1].head == :foreigncall
+    @test ci.code[1].head === :foreigncall
 end
 
 # Test that inference can infer .instance of types
@@ -2850,7 +3315,7 @@ j30385(T, y) = k30385(f30385(T, y))
 @test @inferred(j30385(:dummy, 1)) == "dummy"
 
 @test Base.return_types(Tuple, (NamedTuple{<:Any,Tuple{Any,Int}},)) == Any[Tuple{Any,Int}]
-@test Base.return_types(Base.Splat(tuple), (typeof((a=1,)),)) == Any[Tuple{Int}]
+@test Base.return_types(Base.splat(tuple), (typeof((a=1,)),)) == Any[Tuple{Int}]
 
 # test that return_type_tfunc isn't affected by max_methods differently than return_type
 _rttf_test(::Int8) = 0
@@ -2872,8 +3337,8 @@ f_with_Type_arg(::Type{T}) where {T} = T
     (N >= 0) || throw(ArgumentError(string("tuple length should be ≥0, got ", N)))
     if @generated
         quote
-            @Base.nexprs $N i -> t_i = f(i)
-            @Base.ncall $N tuple t
+            Base.@nexprs $N i -> t_i = f(i)
+            Base.@ncall $N tuple t
         end
     else
         Tuple(f(i) for i = 1:N)
@@ -2882,7 +3347,10 @@ end
 call_ntuple(a, b) = my_ntuple(i->(a+b; i), Val(4))
 @test Base.return_types(call_ntuple, Tuple{Any,Any}) == [NTuple{4, Int}]
 @test length(code_typed(my_ntuple, Tuple{Any, Val{4}})) == 1
-@test_throws ErrorException code_typed(my_ntuple, Tuple{Any, Val})
+let (src, rt) = only(code_typed(my_ntuple, Tuple{Any, Val}))
+    @test src isa CodeInfo
+    @test rt == Tuple
+end
 
 @generated unionall_sig_generated(::Vector{T}, b::Vector{S}) where {T, S} = :($b)
 @test length(code_typed(unionall_sig_generated, Tuple{Any, Vector{Int}})) == 1
@@ -3040,9 +3508,12 @@ const DenseIdx = Union{IntRange,Integer}
 # Non uniformity in expressions with PartialTypeVar
 @test Core.Compiler.:⊑(Core.Compiler.PartialTypeVar(TypeVar(:N), true, true), TypeVar)
 let N = TypeVar(:N)
-    @test Core.Compiler.apply_type_nothrow([Core.Compiler.Const(NTuple),
+    𝕃 = Core.Compiler.SimpleInferenceLattice.instance
+    argtypes = Any[Core.Compiler.Const(NTuple),
         Core.Compiler.PartialTypeVar(N, true, true),
-        Core.Compiler.Const(Any)], Type{Tuple{Vararg{Any,N}}})
+        Core.Compiler.Const(Any)]
+    rt = Type{Tuple{Vararg{Any,N}}}
+    @test Core.Compiler.apply_type_nothrow(𝕃, argtypes, rt)
 end
 
 # issue #33768
@@ -3064,8 +3535,12 @@ end
 @test Base.return_types(h33768, ()) == Any[Union{}]
 
 # constant prop of `Symbol("")`
-f_getf_computed_symbol(p) = getfield(p, Symbol("first"))
-@test Base.return_types(f_getf_computed_symbol, Tuple{Pair{Int8,String}}) == [Int8]
+@test Base.return_types() do
+    Val(Symbol("julia"))
+end |> only == Val{:julia}
+@test Base.return_types() do p::Pair{Int8,String}
+    getfield(p, Symbol("first"))
+end |> only == Int8
 
 # issue #33954
 struct X33954
@@ -3096,8 +3571,20 @@ function pickvarnames(x::Vector{Any})
 end
 @test pickvarnames(:a) === :a
 @test pickvarnames(Any[:a, :b]) === (:a, :b)
-@test only(Base.return_types(pickvarnames, (Vector{Any},))) == Tuple{Vararg{Union{Symbol, Tuple}}}
-@test only(Base.code_typed(pickvarnames, (Vector{Any},), optimize=false))[2] == Tuple{Vararg{Union{Symbol, Tuple{Vararg{Union{Symbol, Tuple}}}}}}
+@test only(Base.return_types(pickvarnames, (Vector{Any},))) == Tuple
+@test only(Base.code_typed(pickvarnames, (Vector{Any},), optimize=false))[2] == Tuple{Vararg{Union{Symbol, Tuple}}}
+
+# make sure this converges in a reasonable amount of time
+function pickvarnames2(x::Vector{Any})
+    varnames = ()
+    for a in x
+        varnames = (varnames..., pickvarnames(a) )
+    end
+    return varnames
+end
+@test only(Base.return_types(pickvarnames2, (Vector{Any},))) == Tuple{Vararg{Union{Symbol, Tuple}}}
+@test only(Base.code_typed(pickvarnames2, (Vector{Any},), optimize=false))[2] == Tuple{Vararg{Union{Symbol, Tuple}}}
+
 
 @test map(>:, [Int], [Int]) == [true]
 
@@ -3153,9 +3640,16 @@ end
               Tuple{Int, Char, Int}, Tuple{Int, Int, Char}, Tuple{Int, Int, Int}}
 # Test that these don't throw
 @test Core.Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Char}}, 0) == Tuple{Vararg{Int}}
+@test Core.Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Int}}, 0) == Union{}
+@test Core.Compiler.typesubtract(Tuple{String,Int}, Tuple{String,Vararg{Int}}, 0) == Union{}
+@test Core.Compiler.typesubtract(Tuple{String,Vararg{Int}}, Tuple{String,Int}, 0) == Tuple{String,Vararg{Int}}
 @test Core.Compiler.typesubtract(NTuple{3, Real}, NTuple{3, Char}, 0) == NTuple{3, Real}
 @test Core.Compiler.typesubtract(NTuple{3, Union{Real, Char}}, NTuple{2, Char}, 0) == NTuple{3, Union{Real, Char}}
 
+@test Core.Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Vararg{Int}})
+@test !Core.Compiler.compatible_vatuple(Tuple{String,Int}, Tuple{String,Vararg{Int}})
+@test !Core.Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Int})
+
 @test Base.return_types(Issue35566.f) == [Val{:expected}]
 
 # constant prop through keyword arguments
@@ -3277,10 +3771,10 @@ Base.iterate(::Itr41839_3 , i) = i < 16 ? (i, i + 1) : nothing
 
 # issue #32699
 f32699(a) = (id = a[1],).id
-@test Base.return_types(f32699, (Vector{Union{Int,Missing}},)) == Any[Union{Int,Missing}]
+@test only(Base.return_types(f32699, (Vector{Union{Int,Missing}},))) == Union{Int,Missing}
 g32699(a) = Tuple{a}
-@test Base.return_types(g32699, (Type{<:Integer},))[1] == Type{<:Tuple{Any}}
-@test Base.return_types(g32699, (Type,))[1] == Type{<:Tuple}
+@test only(Base.return_types(g32699, (Type{<:Integer},))) <: Type{<:Tuple{Any}}
+@test only(Base.return_types(g32699, (Type,))) <: Type{<:Tuple}
 
 # Inference precision of union-split calls
 function f_apply_union_split(fs, x)
@@ -3311,10 +3805,11 @@ f_generator_splat(t::Tuple) = tuple((identity(l) for l in t)...)
 
 # Issue #36710 - sizeof(::UnionAll) tfunc correctness
 @test (sizeof(Ptr),) == sizeof.((Ptr,)) == sizeof.((Ptr{Cvoid},))
-@test Core.Compiler.sizeof_tfunc(UnionAll) === Int
+@test Core.Compiler.sizeof_tfunc(Core.Compiler.fallback_lattice, UnionAll) === Int
 @test !Core.Compiler.sizeof_nothrow(UnionAll)
 
-@test Base.return_types(Expr) == Any[Expr]
+@test only(Base.return_types(Core._expr)) === Expr
+@test only(Base.return_types(Core.svec, (Any,))) === Core.SimpleVector
 
 # Use a global constant to rely less on unrelated constant propagation
 const const_int32_typename = Int32.name
@@ -3338,44 +3833,46 @@ for badf in [getfield_const_typename_bad1, getfield_const_typename_bad2]
     @test_throws TypeError badf()
 end
 
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(sizeof), Vararg{DataType}}) == Int
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(sizeof), DataType, Vararg}) == Int
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(sizeof), DataType, Any, Vararg}) == Union{}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(===), Vararg}) == Bool
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(===), Any, Vararg}) == Bool
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(===), Any, Any, Vararg}) == Bool
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(===), Any, Any, Any, Vararg}) == Union{}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Vararg{Symbol}}) == Union{}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Vararg{Symbol}}) == Symbol
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Vararg{Integer}}) == Integer
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Integer, Vararg}) == Integer
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Integer, Any, Vararg}) == Integer
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Integer, Any, Any, Vararg}) == Union{}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(Core._expr), Vararg}) == Expr
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(Core._expr), Any, Vararg}) == Expr
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(Core._expr), Any, Any, Vararg}) == Expr
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(applicable), Vararg}) == Bool
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(applicable), Any, Vararg}) == Bool
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(applicable), Any, Any, Vararg}) == Bool
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(applicable), Any, Any, Any, Vararg}) == Bool
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Tuple{Int}, Vararg}) == Int
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Tuple{Int}, Any, Vararg}) == Int
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Tuple{Int}, Any, Any, Vararg}) == Int
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Tuple{Int}, Any, Any, Any, Vararg}) == Int
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Any, Any, Any, Any, Any, Vararg}) == Union{}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Vararg}) == Any
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Vararg}) == Any
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Vararg}) == Any
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Any, Vararg}) == Any
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Any, Any, Vararg}) == Union{}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(Core.apply_type), Vararg}) == Any
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(Core.apply_type), Any, Vararg}) == Any
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(Core.apply_type), Any, Any, Vararg}) == Any
+# tfuncs precision with vararg argument
+apply_fargs(f, args...) = f(args...)
+@test only(Base.return_types(apply_fargs, Tuple{typeof(sizeof), Vararg{DataType}})) == Int
+@test only(Base.return_types(apply_fargs, Tuple{typeof(sizeof), DataType, Vararg})) == Int
+@test only(Base.return_types(apply_fargs, Tuple{typeof(sizeof), DataType, Any, Vararg})) == Union{}
+@test only(Base.return_types(apply_fargs, Tuple{typeof(===), Vararg})) == Bool
+@test only(Base.return_types(apply_fargs, Tuple{typeof(===), Any, Vararg})) == Bool
+@test only(Base.return_types(apply_fargs, Tuple{typeof(===), Any, Any, Vararg})) == Bool
+@test only(Base.return_types(apply_fargs, Tuple{typeof(===), Any, Any, Any, Vararg})) == Union{}
+@test only(Base.return_types(apply_fargs, Tuple{typeof(setfield!), Vararg{Symbol}})) == Union{}
+@test only(Base.return_types(apply_fargs, Tuple{typeof(setfield!), Any, Vararg{Symbol}})) == Symbol
+@test only(Base.return_types(apply_fargs, Tuple{typeof(setfield!), Any, Symbol, Vararg{Integer}})) == Integer
+@test only(Base.return_types(apply_fargs, Tuple{typeof(setfield!), Any, Symbol, Integer, Vararg})) == Integer
+@test only(Base.return_types(apply_fargs, Tuple{typeof(setfield!), Any, Symbol, Integer, Any, Vararg})) == Integer
+@test only(Base.return_types(apply_fargs, Tuple{typeof(setfield!), Any, Symbol, Integer, Any, Any, Vararg})) == Union{}
+@test only(Base.return_types(apply_fargs, Tuple{typeof(Core._expr), Vararg})) == Expr
+@test only(Base.return_types(apply_fargs, Tuple{typeof(Core._expr), Any, Vararg})) == Expr
+@test only(Base.return_types(apply_fargs, Tuple{typeof(Core._expr), Any, Any, Vararg})) == Expr
+@test only(Base.return_types(apply_fargs, Tuple{typeof(applicable), Vararg})) == Bool
+@test only(Base.return_types(apply_fargs, Tuple{typeof(applicable), Any, Vararg})) == Bool
+@test only(Base.return_types(apply_fargs, Tuple{typeof(applicable), Any, Any, Vararg})) == Bool
+@test only(Base.return_types(apply_fargs, Tuple{typeof(applicable), Any, Any, Any, Vararg})) == Bool
+@test only(Base.return_types(apply_fargs, Tuple{typeof(getfield), Tuple{Int}, Vararg})) == Int
+@test only(Base.return_types(apply_fargs, Tuple{typeof(getfield), Tuple{Int}, Any, Vararg})) == Int
+@test only(Base.return_types(apply_fargs, Tuple{typeof(getfield), Tuple{Int}, Any, Any, Vararg})) == Int
+@test only(Base.return_types(apply_fargs, Tuple{typeof(getfield), Tuple{Int}, Any, Any, Any, Vararg})) == Int
+@test only(Base.return_types(apply_fargs, Tuple{typeof(getfield), Any, Any, Any, Any, Any, Vararg})) == Union{}
+@test only(Base.return_types(apply_fargs, Tuple{typeof(fieldtype), Vararg})) == Any
+@test only(Base.return_types(apply_fargs, Tuple{typeof(fieldtype), Any, Vararg})) == Any
+@test only(Base.return_types(apply_fargs, Tuple{typeof(fieldtype), Any, Any, Vararg})) == Any
+@test only(Base.return_types(apply_fargs, Tuple{typeof(fieldtype), Any, Any, Any, Vararg})) == Any
+@test only(Base.return_types(apply_fargs, Tuple{typeof(fieldtype), Any, Any, Any, Any, Vararg})) == Union{}
+@test only(Base.return_types(apply_fargs, Tuple{typeof(Core.apply_type), Vararg})) == Any
+@test only(Base.return_types(apply_fargs, Tuple{typeof(Core.apply_type), Any, Vararg})) == Any
+@test only(Base.return_types(apply_fargs, Tuple{typeof(Core.apply_type), Any, Any, Vararg})) == Any
 f_apply_cglobal(args...) = cglobal(args...)
-@test Core.Compiler.return_type(f_apply_cglobal, Tuple{Vararg{Type{Int}}}) == Ptr
-@test Core.Compiler.return_type(f_apply_cglobal, Tuple{Any, Vararg{Type{Int}}}) == Ptr
-@test Core.Compiler.return_type(f_apply_cglobal, Tuple{Any, Type{Int}, Vararg{Type{Int}}}) == Ptr{Int}
-@test Core.Compiler.return_type(f_apply_cglobal, Tuple{Any, Type{Int}, Type{Int}, Vararg{Type{Int}}}) == Union{}
+@test only(Base.return_types(f_apply_cglobal, Tuple{Vararg{Type{Int}}})) == Ptr
+@test only(Base.return_types(f_apply_cglobal, Tuple{Any, Vararg{Type{Int}}})) == Ptr
+@test only(Base.return_types(f_apply_cglobal, Tuple{Any, Type{Int}, Vararg{Type{Int}}})) == Ptr{Int}
+@test only(Base.return_types(f_apply_cglobal, Tuple{Any, Type{Int}, Type{Int}, Vararg{Type{Int}}})) == Union{}
 
 # issue #37532
 @test Core.Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr{Int}}, Int])
@@ -3441,7 +3938,7 @@ end
             end
         end
     end
-    @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 5)", string(timingmod.children))
+    @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 6)", string(timingmod.children))
     # END LINE NUMBER SENSITIVITY
 
     # Recursive function
@@ -3492,7 +3989,7 @@ end
 end
 
 # issue #37638
-@test isa(Core.Compiler.return_type(() -> (nothing, Any[]...)[2], Tuple{}), Type)
+@test only(Base.return_types(() -> (nothing, Any[]...)[2])) isa Type
 
 # Issue #37943
 f37943(x::Any, i::Int) = getfield((x::Pair{false, Int}), i)
@@ -3522,25 +4019,51 @@ g38888() = S38888(Base.inferencebarrier(3), nothing)
 @test g38888() isa S38888
 
 f_inf_error_bottom(x::Vector) = isempty(x) ? error(x[1]) : x
-@test Core.Compiler.return_type(f_inf_error_bottom, Tuple{Vector{Any}}) == Vector{Any}
-
-# @constprop :aggressive
-@noinline g_nonaggressive(y, x) = Val{x}()
-@noinline Base.@constprop :aggressive g_aggressive(y, x) = Val{x}()
-
-f_nonaggressive(x) = g_nonaggressive(x, 1)
-f_aggressive(x) = g_aggressive(x, 1)
-
-# The first test just makes sure that improvements to the compiler don't
-# render the annotation effectless.
-@test Base.return_types(f_nonaggressive, Tuple{Int})[1] == Val
-@test Base.return_types(f_aggressive, Tuple{Int})[1] == Val{1}
-
-# @constprop :none
-@noinline Base.@constprop :none g_noaggressive(flag::Bool) = flag ? 1 : 1.0
-ftrue_noaggressive() = g_noaggressive(true)
-@test only(Base.return_types(ftrue_noaggressive, Tuple{})) == Union{Int,Float64}
-
+@test only(Base.return_types(f_inf_error_bottom, Tuple{Vector{Any}})) == Vector{Any}
+
+# @constprop annotation
+@noinline f_constprop_simple(f, x) = (f(x); Val{x}())
+Base.@constprop :aggressive f_constprop_aggressive(f, x) = (f(x); Val{x}())
+Base.@constprop :aggressive @noinline f_constprop_aggressive_noinline(f, x) = (f(x); Val{x}())
+Base.@constprop :none f_constprop_none(f, x) = (f(x); Val{x}())
+Base.@constprop :none @inline f_constprop_none_inline(f, x) = (f(x); Val{x}())
+
+@test !Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_simple)))
+@test !Core.Compiler.is_no_constprop(only(methods(f_constprop_simple)))
+@test Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_aggressive)))
+@test !Core.Compiler.is_no_constprop(only(methods(f_constprop_aggressive)))
+@test Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_aggressive_noinline)))
+@test !Core.Compiler.is_no_constprop(only(methods(f_constprop_aggressive_noinline)))
+@test !Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_none)))
+@test Core.Compiler.is_no_constprop(only(methods(f_constprop_none)))
+@test !Core.Compiler.is_aggressive_constprop(only(methods(f_constprop_none_inline)))
+@test Core.Compiler.is_no_constprop(only(methods(f_constprop_none_inline)))
+
+# make sure that improvements to the compiler don't render the annotation effectless.
+@test Base.return_types((Function,)) do f
+    f_constprop_simple(f, 1)
+end |> only == Val
+@test Base.return_types((Function,)) do f
+    f_constprop_aggressive(f, 1)
+end |> only == Val{1}
+@test Base.return_types((Function,)) do f
+    f_constprop_aggressive_noinline(f, 1)
+end |> only == Val{1}
+@test Base.return_types((Function,)) do f
+    f_constprop_none(f, 1)
+end |> only == Val
+@test Base.return_types((Function,)) do f
+    f_constprop_none_inline(f, 1)
+end |> only == Val
+
+# anonymous function support for `@constprop`
+@test Base.return_types((Function,)) do f
+    map((1,2,3)) do x
+        Base.@constprop :aggressive
+        f(x)
+        return Val{x}()
+    end
+end |> only == Tuple{Val{1},Val{2},Val{3}}
 
 function splat_lotta_unions()
     a = Union{Tuple{Int},Tuple{String,Vararg{Int}},Tuple{Int,Vararg{Int}}}[(2,)][1]
@@ -3548,7 +4071,7 @@ function splat_lotta_unions()
     c = Union{Int8,Int16,Int32,Int64,Int128}[1][1]
     (a...,b...,c...)
 end
-@test Core.Compiler.return_type(splat_lotta_unions, Tuple{}) >: Tuple{Int,Int,Int}
+@test only(Base.return_types(splat_lotta_unions, Tuple{})) >: Tuple{Int,Int,Int}
 
 # Bare Core.Argument in IR
 @eval f_bare_argument(x) = $(Core.Argument(2))
@@ -3584,13 +4107,13 @@ end
 
     # argtypes
     let
-        tunion = Core.Compiler.switchtupleunion(Any[Union{Int32,Int64}, Core.Const(nothing)])
+        tunion = Core.Compiler.switchtupleunion(Core.Compiler.ConstsLattice(), Any[Union{Int32,Int64}, Core.Const(nothing)])
         @test length(tunion) == 2
         @test Any[Int32, Core.Const(nothing)] in tunion
         @test Any[Int64, Core.Const(nothing)] in tunion
     end
     let
-        tunion = Core.Compiler.switchtupleunion(Any[Union{Int32,Int64}, Union{Float32,Float64}, Core.Const(nothing)])
+        tunion = Core.Compiler.switchtupleunion(Core.Compiler.ConstsLattice(), Any[Union{Int32,Int64}, Union{Float32,Float64}, Core.Const(nothing)])
         @test length(tunion) == 4
         @test Any[Int32, Float32, Core.Const(nothing)] in tunion
         @test Any[Int32, Float64, Core.Const(nothing)] in tunion
@@ -3671,27 +4194,18 @@ end
     end
 end == [Union{Some{Float64}, Some{Int}, Some{UInt8}}]
 
+# make sure inference on a recursive call graph with nested `Type`s terminates
 # https://github.com/JuliaLang/julia/issues/40336
-@testset "make sure a call with signatures with recursively nested Types terminates" begin
-    @test @eval Module() begin
-        f(@nospecialize(t)) = f(Type{t})
-
-        code_typed() do
-            f(Int)
-        end
-        true
-    end
+f40336(@nospecialize(t)) = f40336(Type{t})
+@test Base.return_types() do
+    f40336(Int)
+end |> only === Union{}
 
-    @test @eval Module() begin
-        f(@nospecialize(t)) = tdepth(t) == 10 ? t : f(Type{t})
-        tdepth(@nospecialize(t)) = isempty(t.parameters) ? 1 : 1+tdepth(t.parameters[1])
-
-        code_typed() do
-            f(Int)
-        end
-        true
-    end
-end
+g40336(@nospecialize(t)) = tdepth(t) == 10 ? t : g40336(Type{t})
+tdepth(@nospecialize(t)) = (!isa(t, DataType) || isempty(t.parameters)) ? 1 : 1+tdepth(t.parameters[1])
+@test (Base.return_types() do
+    g40336(Int)
+end |> only; true)
 
 # Make sure that const prop doesn't fall into cycles that aren't problematic
 # in the type domain
@@ -3732,69 +4246,155 @@ function f_convert_me_to_ir(b, x)
     return a
 end
 
-let
-    # Test the presence of PhiNodes in lowered IR by taking the above function,
+let # Test the presence of PhiNodes in lowered IR by taking the above function,
     # running it through SSA conversion and then putting it into an opaque
     # closure.
     mi = Core.Compiler.specialize_method(first(methods(f_convert_me_to_ir)),
         Tuple{Bool, Float64}, Core.svec())
     ci = Base.uncompressed_ast(mi.def)
+    ci.slottypes = Any[ Any for i = 1:length(ci.slotflags) ]
     ci.ssavaluetypes = Any[Any for i = 1:ci.ssavaluetypes]
-    sv = Core.Compiler.OptimizationState(mi, Core.Compiler.OptimizationParams(),
-        Core.Compiler.NativeInterpreter())
+    sv = Core.Compiler.OptimizationState(mi, Core.Compiler.NativeInterpreter())
     ir = Core.Compiler.convert_to_ircode(ci, sv)
     ir = Core.Compiler.slot2reg(ir, ci, sv)
     ir = Core.Compiler.compact!(ir)
-    Core.Compiler.replace_code_newstyle!(ci, ir, 4)
-    ci.ssavaluetypes = length(ci.code)
+    Core.Compiler.replace_code_newstyle!(ci, ir)
+    ci.ssavaluetypes = length(ci.ssavaluetypes)
     @test any(x->isa(x, Core.PhiNode), ci.code)
     oc = @eval b->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any,
         Expr(:opaque_closure_method, nothing, 2, false, LineNumberNode(0, nothing), ci)))(b, 1.0)
     @test Base.return_types(oc, Tuple{Bool}) == Any[Float64]
-
     oc = @eval ()->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any,
         Expr(:opaque_closure_method, nothing, 2, false, LineNumberNode(0, nothing), ci)))(true, 1.0)
     @test Base.return_types(oc, Tuple{}) == Any[Float64]
 end
 
-@testset "constant prop' on `invoke` calls" begin
-    m = Module()
+# constant prop' on `invoke` calls
+invoke_constprop(a::Any,    typ::Bool) = typ ? Any : :any
+invoke_constprop(a::Number, typ::Bool) = typ ? Number : :number
+@test Base.return_types((Any,)) do a
+    @invoke invoke_constprop(a::Any, true::Bool)
+end |> only === Type{Any}
+@test Base.return_types((Any,)) do a
+    @invoke invoke_constprop(a::Number, true::Bool)
+end |> only === Type{Number}
+@test Base.return_types((Any,)) do a
+    @invoke invoke_constprop(a::Any, false::Bool)
+end |> only === Symbol
+@test Base.return_types((Any,)) do a
+    @invoke invoke_constprop(a::Number, false::Bool)
+end |> only === Symbol
+
+# https://github.com/JuliaLang/julia/issues/41024
+abstract type Interface41024 end
+Base.getproperty(x::Interface41024, sym::Symbol) =
+    sym === :x ? getfield(x, sym)::Int :
+    return getfield(x, sym) # fallback
+
+# extended mixin, which expects additional field `y::Rational{Int}`
+abstract type Interface41024Extended <: Interface41024 end
+Base.getproperty(x::Interface41024Extended, sym::Symbol) =
+    sym === :y ? getfield(x, sym)::Rational{Int} :
+    return @invoke getproperty(x::Interface41024, sym::Symbol)
+
+@test Base.return_types((Interface41024Extended,)) do x
+    x.x
+end |> only === Int
 
-    # simple cases
-    @eval m begin
-        f(a::Any,    sym::Bool) = sym ? Any : :any
-        f(a::Number, sym::Bool) = sym ? Number : :number
-    end
-    @test (@eval m Base.return_types((Any,)) do a
-        @invoke f(a::Any, true::Bool)
-    end) == Any[Type{Any}]
-    @test (@eval m Base.return_types((Any,)) do a
-        @invoke f(a::Number, true::Bool)
-    end) == Any[Type{Number}]
-    @test (@eval m Base.return_types((Any,)) do a
-        @invoke f(a::Any, false::Bool)
-    end) == Any[Symbol]
-    @test (@eval m Base.return_types((Any,)) do a
-        @invoke f(a::Number, false::Bool)
-    end) == Any[Symbol]
-
-    # https://github.com/JuliaLang/julia/issues/41024
-    @eval m begin
-        # mixin, which expects common field `x::Int`
-        abstract type AbstractInterface end
-        Base.getproperty(x::AbstractInterface, sym::Symbol) =
-            sym === :x ? getfield(x, sym)::Int :
-            return getfield(x, sym) # fallback
-
-        # extended mixin, which expects additional field `y::Rational{Int}`
-        abstract type AbstractInterfaceExtended <: AbstractInterface end
-        Base.getproperty(x::AbstractInterfaceExtended, sym::Symbol) =
-            sym === :y ? getfield(x, sym)::Rational{Int} :
-            return @invoke getproperty(x::AbstractInterface, sym::Symbol)
-    end
-    @test (@eval m Base.return_types((AbstractInterfaceExtended,)) do x
-        x.x
-    end) == Any[Int]
+function call_func_itr(func, itr)
+    local r = 0
+    r += func(itr[1])
+    r += func(itr[2])
+    r += func(itr[3])
+    r += func(itr[4])
+    r += func(itr[5])
+    r
+end
+
+global inline_checker = c -> c # untyped global, a call of this func will prevent inlining
+# if `f` is inlined, `GlobalRef(m, :inline_checker)` should appear within the body of `invokef`
+function is_inline_checker(@nospecialize stmt)
+    isa(stmt, GlobalRef) && stmt.name === :inline_checker
+end
+
+function func_nospecialized(@nospecialize a)
+    c = isa(a, Function)
+    inline_checker(c) # dynamic dispatch, preventing inlining
+end
+
+@inline function func_nospecialized_inline(@nospecialize a)
+    c = isa(a, Function)
+    inline_checker(c) # dynamic dispatch, preventing inlining (but forced by the annotation)
+end
+
+Base.@nospecializeinfer function func_nospecializeinfer(@nospecialize a)
+    c = isa(a, Function)
+    inline_checker(c) # dynamic dispatch, preventing inlining
+end
+
+Base.@nospecializeinfer @inline function func_nospecializeinfer_inline(@nospecialize a)
+    c = isa(a, Function)
+    inline_checker(c) # dynamic dispatch, preventing inlining (but forced by the annotation)
+end
+
+Base.@nospecializeinfer Base.@constprop :aggressive function func_nospecializeinfer_constprop(c::Bool, @nospecialize a)
+    if c
+        return inline_checker(a) # dynamic dispatch, preventing inlining/constprop (but forced by the annotation)
+    end
+    return false
+end
+Base.@nospecializeinfer func_nospecializeinfer_constprop(@nospecialize a) = func_nospecializeinfer_constprop(false, a)
+
+itr_dispatchonly = Any[sin, muladd, "foo", nothing, missing]   # untyped container can cause excessive runtime dispatch
+itr_withinfernce = tuple(sin, muladd, "foo", nothing, missing) # typed container can cause excessive inference
+
+@testset "compilation annotations" begin
+    @testset "@nospecialize" begin
+        # `@nospecialize` should suppress runtime dispatches of `nospecialize`
+        @test call_func_itr(func_nospecialized, itr_dispatchonly) == 2
+        @test length(Base.specializations(only(methods((func_nospecialized))))) == 1
+        # `@nospecialize` should allow inference to happen
+        @test call_func_itr(func_nospecialized, itr_withinfernce) == 2
+        @test length(Base.specializations(only(methods((func_nospecialized))))) == 6
+        @test count(is_inline_checker, @get_code call_func_itr(func_nospecialized, itr_dispatchonly)) == 0
+
+        # `@nospecialize` should allow inlinining
+        @test call_func_itr(func_nospecialized_inline, itr_dispatchonly) == 2
+        @test length(Base.specializations(only(methods((func_nospecialized_inline))))) == 1
+        @test call_func_itr(func_nospecialized_inline, itr_withinfernce) == 2
+        @test length(Base.specializations(only(methods((func_nospecialized_inline))))) == 6
+        @test count(is_inline_checker, @get_code call_func_itr(func_nospecialized_inline, itr_dispatchonly)) == 5
+    end
+
+    @testset "@nospecializeinfer" begin
+        # `@nospecialize` should suppress runtime dispatches of `nospecialize`
+        @test call_func_itr(func_nospecializeinfer, itr_dispatchonly) == 2
+        @test length(Base.specializations(only(methods((func_nospecializeinfer))))) == 1
+        # `@nospecializeinfer` suppresses inference also
+        @test call_func_itr(func_nospecializeinfer, itr_withinfernce) == 2
+        @test length(Base.specializations(only(methods((func_nospecializeinfer))))) == 1
+        @test !any(is_inline_checker, @get_code call_func_itr(func_nospecializeinfer, itr_dispatchonly))
+
+        # `@nospecializeinfer` should allow inlinining
+        @test call_func_itr(func_nospecializeinfer_inline, itr_dispatchonly) == 2
+        @test length(Base.specializations(only(methods((func_nospecializeinfer_inline))))) == 1
+        @test call_func_itr(func_nospecializeinfer_inline, itr_withinfernce) == 2
+        @test length(Base.specializations(only(methods((func_nospecializeinfer_inline))))) == 1
+        @test any(is_inline_checker, @get_code call_func_itr(func_nospecializeinfer_inline, itr_dispatchonly))
+
+        # `@nospecializeinfer` should allow constprop
+        @test Base.return_types((Any,)) do x
+            Val(func_nospecializeinfer_constprop(x))
+        end |> only == Val{false}
+        @test call_func_itr(func_nospecializeinfer_constprop, itr_dispatchonly) == 0
+        for m = methods(func_nospecializeinfer_constprop)
+            @test length(Base.specializations(m)) == 1
+        end
+        @test call_func_itr(func_nospecializeinfer_constprop, itr_withinfernce) == 0
+        for m = methods(func_nospecializeinfer_constprop)
+            @test length(Base.specializations(m)) == 1
+        end
+    end
 end
 
 @testset "fieldtype for unions" begin # e.g. issue #40177
@@ -3816,30 +4416,28 @@ g41908() = f41908(Any[1][1])
 # issue #42022
 let x = Tuple{Int,Any}[
         #= 1=# (0, Expr(:(=), Core.SlotNumber(3), 1))
-        #= 2=# (0, Expr(:enter, 18))
+        #= 2=# (0, EnterNode(17))
         #= 3=# (2, Expr(:(=), Core.SlotNumber(3), 2.0))
-        #= 4=# (2, Expr(:enter, 12))
+        #= 4=# (2, EnterNode(12))
         #= 5=# (4, Expr(:(=), Core.SlotNumber(3), '3'))
         #= 6=# (4, Core.GotoIfNot(Core.SlotNumber(2), 9))
-        #= 7=# (4, Expr(:leave, 2))
+        #= 7=# (4, Expr(:leave, Core.SSAValue(4), Core.SSAValue(2)))
         #= 8=# (0, Core.ReturnNode(1))
         #= 9=# (4, Expr(:call, GlobalRef(Main, :throw)))
-        #=10=# (4, Expr(:leave, 1))
-        #=11=# (2, Core.GotoNode(16))
-        #=12=# (4, Expr(:leave, 1))
-        #=13=# (2, Expr(:(=), Core.SlotNumber(4), Expr(:the_exception)))
-        #=14=# (2, Expr(:call, GlobalRef(Main, :rethrow)))
-        #=15=# (2, Expr(:pop_exception, Core.SSAValue(4)))
-        #=16=# (2, Expr(:leave, 1))
-        #=17=# (0, Core.GotoNode(22))
-        #=18=# (2, Expr(:leave, 1))
-        #=19=# (0, Expr(:(=), Core.SlotNumber(5), Expr(:the_exception)))
-        #=20=# (0, nothing)
-        #=21=# (0, Expr(:pop_exception, Core.SSAValue(2)))
-        #=22=# (0, Core.ReturnNode(Core.SlotNumber(3)))
+        #=10=# (4, Expr(:leave, Core.SSAValue(4)))
+        #=11=# (2, Core.GotoNode(15))
+        #=12=# (2, Expr(:(=), Core.SlotNumber(4), Expr(:the_exception)))
+        #=13=# (2, Expr(:call, GlobalRef(Main, :rethrow)))
+        #=14=# (2, Expr(:pop_exception, Core.SSAValue(4)))
+        #=15=# (2, Expr(:leave, Core.SSAValue(2)))
+        #=16=# (0, Core.GotoNode(20))
+        #=17=# (0, Expr(:(=), Core.SlotNumber(5), Expr(:the_exception)))
+        #=18=# (0, nothing)
+        #=19=# (0, Expr(:pop_exception, Core.SSAValue(2)))
+        #=20=# (0, Core.ReturnNode(Core.SlotNumber(3)))
     ]
-    handler_at = Core.Compiler.compute_trycatch(last.(x), Core.Compiler.BitSet())
-    @test handler_at == first.(x)
+    handler_at, handlers = Core.Compiler.compute_trycatch(last.(x), Core.Compiler.BitSet())
+    @test map(x->x[1] == 0 ? 0 : handlers[x[1]].enter_idx, handler_at) == first.(x)
 end
 
 @test only(Base.return_types((Bool,)) do y
@@ -3856,7 +4454,7 @@ end
             nothing
         end
         return x
-    end) === Union{Int, Float64, Char}
+    end) === Union{Int, Char}
 
 # issue #42097
 struct Foo42097{F} end
@@ -3923,10 +4521,6 @@ end
         +(UnhandledVarargCond(false), xs...)
     end |> only === Int
 
-    @test (Base.return_types((Vector{Any},)) do xs
-        Core.kwfunc(xs...)
-    end; true)
-
     @test Base.return_types((Vector{Vector{Int}},)) do xs
         Tuple(xs...)
     end |> only === Tuple{Vararg{Int}}
@@ -3958,6 +4552,22 @@ end |> only == Tuple{Int,Int}
     s2.value.value
 end |> only == Int
 
+# form PartialStruct for mutables with `const` field
+import Core.Compiler: Const, ⊑
+mutable struct PartialMutable{S,T}
+    const s::S
+    t::T
+end
+@test Base.return_types((Int,)) do s
+    o = PartialMutable{Any,Any}(s, s) # form `PartialStruct(PartialMutable{Any,Any}, Any[Int,Any])` here
+    o.s
+end |> only === Int
+@test Const(nothing) ⊑ Base.return_types((Int,)) do s
+    o = PartialMutable{Any,Any}(s, s) # don't form `PartialStruct(PartialMutable{Any,Any}, Any[Int,Int])` here
+    o.t = nothing
+    o.t
+end |> only
+
 # issue #42986
 @testset "narrow down `Union` using `isdefined` checks" begin
     # basic functionality
@@ -4029,18 +4639,18 @@ end
 
     init = Base.ImmutableDict{Number,Number}()
     a = Const(init)
-    b = Core.PartialStruct(typeof(init), Any[Const(init), Any, ComplexF64])
+    b = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), Any, ComplexF64])
     c = Core.Compiler.tmerge(a, b)
     @test ⊑(a, c) && ⊑(b, c)
     @test c === typeof(init)
 
-    a = Core.PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
+    a = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
     c = Core.Compiler.tmerge(a, b)
     @test ⊑(a, c) && ⊑(b, c)
     @test c.fields[2] === Any # or Number
     @test c.fields[3] === ComplexF64
 
-    b = Core.PartialStruct(typeof(init), Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
+    b = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
     c = Core.Compiler.tmerge(a, b)
     @test ⊑(a, c)
     @test ⊑(b, c)
@@ -4054,45 +4664,48 @@ end
                 g = Base.ImmutableDict(g, 1=>2)
             end
         end |> only === Union{}
-end
-
-# Test that purity modeling doesn't accidentally introduce new world age issues
-f_redefine_me(x) = x+1
-f_call_redefine() = f_redefine_me(0)
-f_mk_opaque() = @Base.Experimental.opaque ()->Base.inferencebarrier(f_call_redefine)()
-const op_capture_world = f_mk_opaque()
-f_redefine_me(x) = x+2
-@test op_capture_world() == 1
-@test f_mk_opaque()() == 2
-
-# Test that purity doesn't try to accidentally run unreachable code due to
-# boundscheck elimination
-function f_boundscheck_elim(n)
-    # Inbounds here assumes that this is only ever called with n==0, but of
-    # course the compiler has no way of knowing that, so it must not attempt
-    # to run the @inbounds `getfield(sin, 1)`` that ntuple generates.
-    ntuple(x->(@inbounds getfield(sin, x)), n)
-end
-@test Tuple{} <: code_typed(f_boundscheck_elim, Tuple{Int})[1][2]
-
-@test !Core.Compiler.builtin_nothrow(Core.get_binding_type, Any[Rational{Int}, Core.Const(:foo)], Any)
 
-# Test that max_methods works as expected
-@Base.Experimental.max_methods 1 function f_max_methods end
+    a = Val{Union{}}
+    a = Core.Compiler.tmerge(Union{a, Val{a}}, a)
+    @test a == Union{Val{Union{}}, Val{Val{Union{}}}}
+    a = Core.Compiler.tmerge(Union{a, Val{a}}, a)
+    @test a == Union{Val{Union{}}, Val{Val{Union{}}}, Val{Union{Val{Union{}}, Val{Val{Union{}}}}}}
+    a = Core.Compiler.tmerge(Union{a, Val{a}}, a)
+    @test a == Val
+
+    a = Val{Union{}}
+    a = Core.Compiler.tmerge(Core.Compiler.JLTypeLattice(), Val{<:a}, a)
+    @test_broken a != Val{<:Val{Union{}}}
+    @test_broken a == Val{<:Val} || a == Val
+
+    a = Tuple{Vararg{Tuple{}}}
+    a = Core.Compiler.tmerge(Core.Compiler.JLTypeLattice(), Tuple{a}, a)
+    @test a == Tuple{Vararg{Tuple{Vararg{Tuple{}}}}}
+    a = Core.Compiler.tmerge(Core.Compiler.JLTypeLattice(), Tuple{a}, a)
+    @test a == Tuple{Vararg{Tuple{Vararg{Tuple{Vararg{Tuple{}}}}}}}
+    a = Core.Compiler.tmerge(Core.Compiler.JLTypeLattice(), Tuple{a}, a)
+    @test a == Tuple{Vararg{Tuple{Vararg{Tuple{Vararg{Tuple{Vararg{Tuple{}}}}}}}}}
+    a = Core.Compiler.tmerge(Core.Compiler.JLTypeLattice(), Tuple{a}, a)
+    @test a == Tuple
+end
+
+# Test that a function-wise `@max_methods` works as expected
+Base.Experimental.@max_methods 1 function f_max_methods end
 f_max_methods(x::Int) = 1
 f_max_methods(x::Float64) = 2
 g_max_methods(x) = f_max_methods(x)
-@test Core.Compiler.return_type(g_max_methods, Tuple{Int}) === Int
-@test Core.Compiler.return_type(g_max_methods, Tuple{Any}) === Any
+@test only(Base.return_types(g_max_methods, Tuple{Int})) === Int
+@test only(Base.return_types(g_max_methods, Tuple{Any})) === Any
 
-# Unit tests for BitSetBoundedMinPrioritySet
-let bsbmp = Core.Compiler.BitSetBoundedMinPrioritySet(5)
-    Core.Compiler.push!(bsbmp, 2)
-    Core.Compiler.push!(bsbmp, 2)
-    @test Core.Compiler.popfirst!(bsbmp) == 2
-    Core.Compiler.push!(bsbmp, 1)
-    @test Core.Compiler.popfirst!(bsbmp) == 1
-    @test Core.Compiler.isempty(bsbmp)
+# Test that a module-wise `@max_methods` works as expected
+module Test43370
+using Test
+Base.Experimental.@max_methods 1
+f_max_methods(x::Int) = 1
+f_max_methods(x::Float64) = 2
+g_max_methods(x) = f_max_methods(x)
+@test only(Base.return_types(g_max_methods, Tuple{Int})) === Int
+@test only(Base.return_types(g_max_methods, Tuple{Any})) === Any
 end
 
 # Make sure return_type_tfunc doesn't accidentally cause bad inference if used
@@ -4102,157 +4715,897 @@ end
     Core.Compiler.return_type(+, NTuple{2, Rational})
 end == Rational
 
+# vararg-tuple comparison within `Compiler.PartialStruct`
 # https://github.com/JuliaLang/julia/issues/44965
-let t = Core.Compiler.tuple_tfunc(Any[Core.Const(42), Vararg{Any}])
-    @test Core.Compiler.issimplertype(t, t)
+let 𝕃ᵢ = Core.Compiler.fallback_lattice
+    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Vararg{Any}])
+    @test Core.Compiler.issimplertype(𝕃ᵢ, t, t)
+
+    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Vararg{Union{}}])
+    @test t === Const((42,))
+    t = Core.Compiler.tuple_tfunc(𝕃ᵢ, Any[Const(42), Int, Vararg{Union{}}])
+    @test t.typ === Tuple{Int, Int}
+    @test t.fields == Any[Const(42), Int]
 end
 
-# https://github.com/JuliaLang/julia/issues/44763
-global x44763::Int = 0
-increase_x44763!(n) = (global x44763; x44763 += n)
-invoke44763(x) = @invoke increase_x44763!(x)
+foo_empty_vararg(i...) = i[2]
+bar_empty_vararg(i) = foo_empty_vararg(10, 20, 30, i...)
+@test bar_empty_vararg(Union{}[]) === 20
+
+
+# check the inference convergence with an empty vartable:
+# the inference state for the toplevel chunk below will have an empty vartable,
+# and so we may fail to terminate (or optimize) it if we don't update vartables correctly
+let # NOTE make sure this toplevel chunk doesn't contain any local binding
+    Base.Experimental.@force_compile
+    global xcond::Bool = false
+    while xcond end
+end
+@test !xcond
+
+struct Issue45780
+    oc::Core.OpaqueClosure{Tuple{}}
+end
+f45780() = Val{Issue45780(Base.Experimental.@opaque ()->1).oc()}()
+@test (@inferred f45780()) == Val{1}()
+
+# issue #45600
+@test only(code_typed() do
+    while true
+        x = try finally end
+    end
+end)[2] == Union{}
+@test only(code_typed() do
+    while true
+        @time 1
+    end
+end)[2] == Union{}
+
+# compilerbarrier builtin
+import Core: compilerbarrier
+# runtime semantics
+for setting = (:type, :const, :conditional)
+    @test compilerbarrier(setting, 42) == 42
+    @test compilerbarrier(setting, :sym) == :sym
+end
+@test_throws ErrorException compilerbarrier(:nonexisting, 42)
+@test_throws TypeError compilerbarrier("badtype", 42)
+@test_throws ArgumentError compilerbarrier(:nonexisting, 42, nothing)
+# barrier on abstract interpretation
+@test Base.return_types((Int,)) do a
+    x = compilerbarrier(:type, a) # `x` won't be inferred as `x::Int`
+    return x
+end |> only === Any
 @test Base.return_types() do
-    invoke44763(42)
-end |> only === Int
-@test x44763 == 0
+    x = compilerbarrier(:const, 42)
+    if x == 42 # no constant information here, so inference also accounts for the else branch (leading to less accurate return type inference)
+        return x # but `x` is still inferred as `x::Int` at least here
+    else
+        return nothing
+    end
+end |> only === Union{Int,Nothing}
+@test Base.return_types((Union{Int,Nothing},)) do a
+    if compilerbarrier(:conditional, isa(a, Int))
+        # the conditional information `a::Int` isn't available here (leading to less accurate return type inference)
+        return a
+    else
+        return nothing
+    end
+end |> only === Union{Int,Nothing}
+@test Base.return_types((Symbol,Int)) do setting, val
+    compilerbarrier(setting, val)
+end |> only === Any # XXX we may want to have "compile-time" error for this instead
+for setting = (#=:type, :const,=# :conditional,)
+    # a successful barrier on abstract interpretation should be eliminated at the optimization
+    @test @eval fully_eliminated((Int,)) do a
+        compilerbarrier($(QuoteNode(setting)), 42)
+    end
+end
+
+# https://github.com/JuliaLang/julia/issues/46426
+@noinline typebarrier() = Base.inferencebarrier(0.0)
+@noinline constbarrier() = Base.compilerbarrier(:const, 0.0)
+let src = code_typed1() do
+        typebarrier()
+    end
+    @test any(isinvoke(:typebarrier), src.code)
+    @test Base.return_types() do
+        typebarrier()
+    end |> only === Any
+end
+let src = code_typed1() do
+        constbarrier()
+    end
+    @test any(isinvoke(:constbarrier), src.code)
+    @test Base.return_types() do
+        constbarrier()
+    end |> only === Float64
+end
+
+# Test that Const ⊑ PartialStruct respects vararg
+@test Const((1,2)) ⊑ PartialStruct(Tuple{Vararg{Int}}, [Const(1), Vararg{Int}])
 
-# backedge insertion for Any-typed, effect-free frame
-const CONST_DICT = let d = Dict()
-    for c in 'A':'z'
-        push!(d, c => Int(c))
+# Test that semi-concrete interpretation doesn't break on functions with while loops in them.
+Base.@assume_effects :consistent :effect_free :terminates_globally function pure_annotated_loop(x::Int, y::Int)
+    for i = 1:2
+        x += y
     end
-    d
+    return y
 end
-Base.@assume_effects :foldable getcharid(c) = CONST_DICT[c]
-@noinline callf(f, args...) = f(args...)
-function entry_to_be_invalidated(c)
-    return callf(getcharid, c)
+call_pure_annotated_loop(x) = Val{pure_annotated_loop(x, 1)}()
+@test only(Base.return_types(call_pure_annotated_loop, Tuple{Int})) === Val{1}
+
+function isa_kindtype(T::Type{<:AbstractVector})
+    if isa(T, DataType)
+        # `T` here should be inferred as `DataType` rather than `Type{<:AbstractVector}`
+        return T.name.name # should be inferred as ::Symbol
+    end
+    return nothing
 end
-@test Base.infer_effects((Char,)) do x
-    entry_to_be_invalidated(x)
+@test only(Base.return_types(isa_kindtype)) === Union{Nothing,Symbol}
+
+invoke_concretized1(a::Int) = a > 0 ? :int : nothing
+invoke_concretized1(a::Integer) = a > 0 ? "integer" : nothing
+# check if `invoke(invoke_concretized1, Tuple{Integer}, ::Int)` is foldable
+@test Base.infer_effects((Int,)) do a
+    @invoke invoke_concretized1(a::Integer)
+end |> Core.Compiler.is_foldable
+@test Base.return_types() do
+    @invoke invoke_concretized1(42::Integer)
+end |> only === String
+
+invoke_concretized2(a::Int) = a > 0 ? :int : nothing
+invoke_concretized2(a::Integer) = a > 0 ? :integer : nothing
+# check if `invoke(invoke_concretized2, Tuple{Integer}, ::Int)` is foldable
+@test Base.infer_effects((Int,)) do a
+    @invoke invoke_concretized2(a::Integer)
 end |> Core.Compiler.is_foldable
-@test fully_eliminated(; retval=97) do
-    entry_to_be_invalidated('a')
+@test let
+    Base.Experimental.@force_compile
+    @invoke invoke_concretized2(42::Integer)
+end === :integer
+
+# Test that abstract_apply doesn't fail to fully infer if the result is unused
+struct FiniteIteration
+    n::Int
 end
-getcharid(c) = CONST_DICT[c] # now this is not eligible for concrete evaluation
-@test Base.infer_effects((Char,)) do x
-    entry_to_be_invalidated(x)
-end |> !Core.Compiler.is_foldable
-@test !fully_eliminated() do
-    entry_to_be_invalidated('a')
+Base.iterate(f::FiniteIteration, i::Int = 0) = i < f.n ? (i, i+1) : nothing
+function unused_apply_iterate()
+    tuple(FiniteIteration(4)...)
+    return nothing
 end
+@test fully_eliminated(unused_apply_iterate, ())
 
-# control flow backedge should taint `terminates`
-@test Base.infer_effects((Int,)) do n
-    for i = 1:n; end
-end |> !Core.Compiler.is_terminates
+@testset "#45956: non-linearized cglobal needs special treatment for stmt effects" begin
+    function foo()
+        cglobal((a, ))
+        ccall(0, Cvoid, (Nothing,), b)
+    end
+    @test only(code_typed() do
+        cglobal((a, ))
+        ccall(0, Cvoid, (Nothing,), b)
+    end)[2] === Nothing
+end
 
-# Nothrow for assignment to globals
-global glob_assign_int::Int = 0
-f_glob_assign_int() = global glob_assign_int += 1
-let effects = Base.infer_effects(f_glob_assign_int, ())
-    @test !Core.Compiler.is_effect_free(effects)
-    @test Core.Compiler.is_nothrow(effects)
+# singleton_type on slot wrappers
+@test Base.return_types((Int,)) do x
+    c = isa(x, Int) # ::Conditional
+    c(false)        # ::Union{}
+end |> only === Union{}
+@test Base.return_types((Tuple{typeof(typeof),Float64},)) do args
+    f = args[1] # ::MustAlias
+    v = args[2] # ::MustAlias
+    f(v)        # ::Type{Float64}
+end |> only === Type{Float64}
+
+# Issue #46839: `abstract_invoke` should handle incorrect call type
+@test only(Base.return_types(()->invoke(BitSet, Any, x), ())) === Union{}
+@test only(Base.return_types(()->invoke(BitSet, Union{Tuple{Int32},Tuple{Int64}}, 1), ())) === Union{}
+
+# Issue #47688: Abstract iteration should take into account `iterate` effects
+global it_count47688 = 0
+struct CountsIterate47688{N}; end
+function Base.iterate(::CountsIterate47688{N}, n=0) where N
+    global it_count47688 += 1
+    n <= N ? (n, n+1) : nothing
+end
+foo47688() = tuple(CountsIterate47688{5}()...)
+bar47688() = foo47688()
+@test only(Base.return_types(bar47688)) == NTuple{6, Int}
+@test it_count47688 == 0
+@test isa(bar47688(), NTuple{6, Int})
+@test it_count47688 == 7
+@test isa(foo47688(), NTuple{6, Int})
+@test it_count47688 == 14
+
+# refine instantiation of partially-known NamedTuple that is known to be empty
+function empty_nt_values(Tpl)
+    T = NamedTuple{(),Tpl}
+    nt = T(())
+    values(nt)
+end
+function empty_nt_keys(Tpl)
+    T = NamedTuple{(),Tpl}
+    nt = T(())
+    keys(nt)
+end
+@test Base.return_types(empty_nt_values, (Any,)) |> only === Tuple{}
+@test Base.return_types(empty_nt_keys, (Any,)) |> only === Tuple{}
+g() = empty_nt_values(Base.inferencebarrier(Tuple{}))
+@test g() == () # Make sure to actually run this to test this in the inference world age
+
+# This is somewhat sensitive to the exact recursion level that inference is willing to do, but the intention
+# is to test the case where inference limited a recursion, but then a forced constprop nevertheless managed
+# to terminate the call.
+Base.@constprop :aggressive type_level_recurse1(x...) = x[1] == 2 ? 1 : (length(x) > 100 ? x : type_level_recurse2(x[1] + 1, x..., x...))
+Base.@constprop :aggressive type_level_recurse2(x...) = type_level_recurse1(x...)
+type_level_recurse_entry() = Val{type_level_recurse1(1)}()
+@test Base.return_types(type_level_recurse_entry, ()) |> only == Val{1}
+
+# Test that inference doesn't give up if it can potentially refine effects,
+# even if the return type is Any.
+f_no_bail_effects_any(x::Any) = x
+f_no_bail_effects_any(x::NamedTuple{(:x,), Tuple{Any}}) = getfield(x, 1)
+g_no_bail_effects_any(x::Any) = f_no_bail_effects_any(x)
+@test Core.Compiler.is_foldable_nothrow(Base.infer_effects(g_no_bail_effects_any, Tuple{Any}))
+
+# issue #48374
+@test (() -> Union{<:Nothing})() == Nothing
+
+# :static_parameter accuracy
+unknown_sparam_throw(::Union{Nothing, Type{T}}) where T = @isdefined(T) ? T::Type : nothing
+unknown_sparam_nothrow1(x::Ref{T}) where T = @isdefined(T) ? T::Type : nothing
+unknown_sparam_nothrow2(x::Ref{Ref{T}}) where T = @isdefined(T) ? T::Type : nothing
+@test only(Base.return_types(unknown_sparam_throw, (Type{Int},))) == Type{Int}
+@test only(Base.return_types(unknown_sparam_throw, (Type{<:Integer},))) == Type{<:Integer}
+@test only(Base.return_types(unknown_sparam_throw, (Type,))) == Union{Nothing, Type}
+@test_broken only(Base.return_types(unknown_sparam_throw, (Nothing,))) === Nothing
+@test_broken only(Base.return_types(unknown_sparam_throw, (Union{Type{Int},Nothing},))) === Union{Nothing,Type{Int}}
+@test only(Base.return_types(unknown_sparam_throw, (Any,))) === Union{Nothing,Type}
+@test only(Base.return_types(unknown_sparam_nothrow1, (Ref,))) === Type
+@test only(Base.return_types(unknown_sparam_nothrow2, (Ref{Ref{T}} where T,))) === Type
+
+struct Issue49027{Ty<:Number}
+    x::Ty
+end
+function issue49027(::Type{<:Issue49027{Ty}}) where Ty
+    if @isdefined Ty # should be false when `Ty` is given as a free type var.
+        return Ty::DataType
+    end
+    return nothing
 end
-# Nothrow for setglobal!
-global SETGLOBAL!_NOTHROW::Int = 0
-let effects = Base.infer_effects() do
-        setglobal!(@__MODULE__, :SETGLOBAL!_NOTHROW, 42)
+@test only(Base.return_types(issue49027, (Type{Issue49027{TypeVar(:Ty)}},))) >: Nothing
+@test isnothing(issue49027(Issue49027{TypeVar(:Ty)}))
+function issue49027_integer(::Type{<:Issue49027{Ty}}) where Ty<:Integer
+    if @isdefined Ty # should be false when `Ty` is given as a free type var.
+        return Ty::DataType
     end
-    @test Core.Compiler.is_nothrow(effects)
+    nothing
+end
+@test only(Base.return_types(issue49027_integer, (Type{Issue49027{TypeVar(:Ty,Int)}},))) >: Nothing
+@test isnothing(issue49027_integer(Issue49027{TypeVar(:Ty,Int)}))
+
+function fapplicable end
+gapplicable() = Val(applicable(fapplicable))
+gapplicable(x) = Val(applicable(fapplicable; x))
+@test only(Base.return_types(gapplicable, ())) === Val{false}
+@test only(Base.return_types(gapplicable, (Int,))) === Val{false}
+fapplicable() = 1
+@test only(Base.return_types(gapplicable, ())) === Val{true}
+@test only(Base.return_types(gapplicable, (Int,))) === Val{false}
+Base.delete_method(which(fapplicable, ()))
+@test only(Base.return_types(gapplicable, ())) === Val{false}
+@test only(Base.return_types(gapplicable, (Int,))) === Val{false}
+fapplicable(; x) = x
+@test only(Base.return_types(gapplicable, ())) === Val{true}
+@test only(Base.return_types(gapplicable, (Int,))) === Val{true}
+@test only(Base.return_types(()) do; applicable(); end) === Union{}
+@test only(Base.return_types((Any,)) do x; Val(applicable(x...)); end) == Val
+@test only(Base.return_types((Tuple{Vararg{Int}},)) do x; Val(applicable(+, 1, 2, x...)); end) == Val # could be improved to Val{true}
+@test only(Base.return_types((Tuple{Vararg{Int}},)) do x; Val(applicable(+, 1, 2, 3, x...)); end) === Val{true}
+@test only(Base.return_types((Int,)) do x; Val(applicable(+, 1, x)); end) === Val{true}
+@test only(Base.return_types((Union{Int32,Int64},)) do x; Val(applicable(+, 1, x)); end) === Val{true}
+@test only(Base.return_types((String,)) do x; Val(applicable(+, 1, x)); end) === Val{false}
+fapplicable(::Int, ::Integer) = 2
+fapplicable(::Integer, ::Int32) = 3
+@test only(Base.return_types((Int32,)) do x; Val(applicable(fapplicable, 1, x)); end) === Val{false}
+@test only(Base.return_types((Int64,)) do x; Val(applicable(fapplicable, 1, x)); end) === Val{true}
+@test only(Base.return_types((Tuple{Vararg{Int}},)) do x; Val(applicable(tuple, x...)); end) === Val{true}
+@test only(Base.return_types((Tuple{Vararg{Int}},)) do x; Val(applicable(sin, 1, x...)); end) == Val
+@test only(Base.return_types((Tuple{Vararg{Int}},)) do x; Val(applicable(sin, 1, 2, x...)); end) === Val{false}
+
+function fhasmethod end
+ghasmethod() = Val(hasmethod(fhasmethod, Tuple{}))
+@test only(Base.return_types(ghasmethod, ())) === Val{false}
+fhasmethod() = 1
+@test only(Base.return_types(ghasmethod, ())) === Val{true}
+Base.delete_method(which(fhasmethod, ()))
+@test only(Base.return_types(ghasmethod, ())) === Val{false}
+@test only(Base.return_types(()) do; Core._hasmethod(); end) === Any
+@test only(Base.return_types(()) do; Core._hasmethod(+, Tuple, 1); end) === Any
+@test only(Base.return_types(()) do; Core._hasmethod(+, 1); end) === Bool
+@test only(Base.return_types(()) do; Core._hasmethod(+, Tuple{1}); end) === Bool
+@test only(Base.return_types((Any,)) do x; Val(hasmethod(x...)); end) == Val
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Int})); end) === Val{true}
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Int, Vararg{Int}})); end) === Val{false}
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Int, Int, Vararg{Int}})); end) === Val{true}
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Int})); end) === Val{true}
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Union{Int32,Int64}})); end) === Val{true}
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Union{Int,String}})); end) === Val{false}
+@test only(Base.return_types(()) do; Val(hasmethod(+, Tuple{Int, Any})); end) === Val{false}
+@test only(Base.return_types() do; Val(hasmethod(+, Tuple{Int, String})); end) === Val{false}
+fhasmethod(::Int, ::Integer) = 2
+fhasmethod(::Integer, ::Int32) = 3
+@test only(Base.return_types(()) do; Val(hasmethod(fhasmethod, Tuple{Int, Int32})); end) === Val{false}
+@test only(Base.return_types(()) do; Val(hasmethod(fhasmethod, Tuple{Int, Int64})); end) === Val{true}
+@test only(Base.return_types(()) do; Val(hasmethod(tuple, Tuple{Vararg{Int}})); end) === Val{true}
+@test only(Base.return_types(()) do; Val(hasmethod(sin, Tuple{Int, Vararg{Int}})); end) == Val{false}
+@test only(Base.return_types(()) do; Val(hasmethod(sin, Tuple{Int, Int, Vararg{Int}})); end) === Val{false}
+
+# interprocedural call inference from irinterp
+@noinline Base.@assume_effects :total issue48679_unknown_any(x) = Base.inferencebarrier(x)
+
+@noinline _issue48679(y::Union{Nothing,T}) where {T} = T::Type
+Base.@constprop :aggressive function issue48679(x, b)
+    if b
+        x = issue48679_unknown_any(x)
+    end
+    return _issue48679(x)
 end
+@test Base.return_types((Float64,)) do x
+    issue48679(x, false)
+end |> only == Type{Float64}
 
-# we should taint `nothrow` if the binding doesn't exist and isn't fixed yet,
-# as the cached effects can be easily wrong otherwise
-# since the inference curently doesn't track "world-age" of global variables
-@eval global_assignment_undefinedyet() = $(GlobalRef(@__MODULE__, :UNDEFINEDYET)) = 42
-setglobal!_nothrow_undefinedyet() = setglobal!(@__MODULE__, :UNDEFINEDYET, 42)
-let effects = Base.infer_effects() do
-        global_assignment_undefinedyet()
+Base.@constprop :aggressive @noinline _issue48679_const(b, y::Union{Nothing,T}) where {T} = b ? nothing : T::Type
+Base.@constprop :aggressive function issue48679_const(x, b)
+    if b
+        x = issue48679_unknown_any(x)
     end
-    @test !Core.Compiler.is_nothrow(effects)
+    return _issue48679_const(b, x)
+end
+@test Base.return_types((Float64,)) do x
+    issue48679_const(x, false)
+end |> only == Type{Float64}
+
+# `invoke` call in irinterp
+@noinline _irinterp_invoke(x::Any) = :any
+@noinline _irinterp_invoke(x::T) where T = T
+Base.@constprop :aggressive Base.@assume_effects :foldable function irinterp_invoke(x::T, b) where T
+    return @invoke _irinterp_invoke(x::(b ? T : Any))
 end
-let effects = Base.infer_effects() do
-        setglobal!_nothrow_undefinedyet()
+@test Base.return_types((Int,)) do x
+    irinterp_invoke(x, true)
+end |> only == Type{Int}
+
+# recursion detection for semi-concrete interpretation
+# avoid direct infinite loop via `concrete_eval_invoke`
+Base.@assume_effects :foldable function recur_irinterp1(x, y)
+    if rand(Bool)
+        return x, y
+    end
+    return recur_irinterp1(x+1, y)
+end
+@test Base.return_types((Symbol,)) do y
+    recur_irinterp1(0, y)
+end |> only === Tuple{Int,Symbol}
+@test last(recur_irinterp1(0, :y)) === :y
+# avoid indirect infinite loop via `concrete_eval_invoke`
+Base.@assume_effects :foldable function recur_irinterp2(x, y)
+    if rand(Bool)
+        return x, y
+    end
+    return _recur_irinterp2(x+1, y)
+end
+Base.@assume_effects :foldable _recur_irinterp2(x, y) = @noinline recur_irinterp2(x, y)
+@test Base.return_types((Symbol,)) do y
+    recur_irinterp2(0, y)
+end |> only === Tuple{Int,Symbol}
+@test last(recur_irinterp2(0, :y)) === :y
+
+# test Conditional Union splitting of info derived from fieldtype (e.g. in abstract setproperty! handling)
+@test only(Base.return_types((Int, Pair{Int,Nothing}, Symbol)) do a, x, s
+    T = fieldtype(typeof(x), s)
+    if a isa T
+        throw(a)
+    else
+        return T
     end
-    @test !Core.Compiler.is_nothrow(effects)
+end) == Type{Nothing}
+
+# Test that Core.Compiler.return_type inference works for the 1-arg version
+@test Base.return_types() do
+    Core.Compiler.return_type(Tuple{typeof(+), Int, Int})
+end |> only == Type{Int}
+
+# Test that NamedTuple abstract iteration works for PartialStruct/Const
+function nt_splat_const()
+    nt = (; x=1, y=2)
+    Val{tuple(nt...)[2]}()
 end
-global UNDEFINEDYET::String = "0"
-let effects = Base.infer_effects() do
-        global_assignment_undefinedyet()
+@test @inferred(nt_splat_const()) == Val{2}()
+
+function nt_splat_partial(x::Int)
+    nt = (; x, y=2)
+    Val{tuple(nt...)[2]}()
+end
+@test @inferred(nt_splat_partial(42)) == Val{2}()
+
+# Test that irinterp refines based on discovered errors
+Base.@assume_effects :foldable Base.@constprop :aggressive function kill_error_edge(b1, b2, xs, x)
+    y = b1 ? "julia" : xs[]
+    if b2
+        a = length(y)
+    else
+        a = sin(y)
     end
-    @test !Core.Compiler.is_nothrow(effects)
+    a + x
 end
-let effects = Base.infer_effects() do
-        setglobal!_nothrow_undefinedyet()
+
+Base.@assume_effects :foldable Base.@constprop :aggressive function kill_error_edge(b1, b2, xs, ys, x)
+    y = b1 ? xs[] : ys[]
+    if b2
+        a = length(y)
+    else
+        a = sin(y)
     end
-    @test !Core.Compiler.is_nothrow(effects)
+    a + x
 end
-@test_throws ErrorException setglobal!_nothrow_undefinedyet()
 
-# Nothrow for setfield!
-mutable struct SetfieldNothrow
-    x::Int
+let src = code_typed1((Bool,Base.RefValue{Any},Int,)) do b2, xs, x
+        kill_error_edge(true, b2, xs, x)
+    end
+    @test count(@nospecialize(x)->isa(x, Core.PhiNode), src.code) == 0
 end
-f_setfield_nothrow() = SetfieldNothrow(0).x = 1
-let effects = Base.infer_effects(f_setfield_nothrow, ())
-    # Technically effect free even though we use the heap, since the
-    # object doesn't escape, but the compiler doesn't know that.
-    #@test Core.Compiler.is_effect_free(effects)
-    @test Core.Compiler.is_nothrow(effects)
+
+let src = code_typed1((Bool,Base.RefValue{String}, Base.RefValue{Any},Int,)) do b2, xs, ys, x
+        kill_error_edge(true, b2, xs, ys, x)
+    end
+    @test count(@nospecialize(x)->isa(x, Core.PhiNode), src.code) == 0
+end
+
+struct Issue49785{S, T<:S} end
+let 𝕃 = Core.Compiler.SimpleInferenceLattice.instance
+    argtypes = Any[Core.Compiler.Const(Issue49785),
+        Union{Type{String},Type{Int}},
+        Union{Type{String},Type{Int}}]
+    rt = Type{Issue49785{<:Any, Int}}
+    # the following should not throw
+    @test !Core.Compiler.apply_type_nothrow(𝕃, argtypes, rt)
+    @test code_typed() do
+        S = Union{Type{String},Type{Int}}[Int][1]
+        map(T -> Issue49785{S,T}, (a = S,))
+    end isa Vector
+end
+
+# `getindex(::SimpleVector, ::Int)` should be concrete-evaluated
+@eval Base.return_types() do
+    $(Core.svec(1,Int,nothing))[2]
+end |> only == Type{Int}
+# https://github.com/JuliaLang/julia/issues/50544
+struct Issue50544{T<:Tuple}
+    t::T
+end
+Base.@propagate_inbounds f_issue50544(x, i, ii...) = f_issue50544(f_issue50544(x, i), ii...)
+Base.@propagate_inbounds f_issue50544(::Type{Issue50544{T}}, i) where T = T.parameters[i]
+g_issue50544(T...) = Issue50544{Tuple{T...}}
+h_issue50544(x::T) where T = g_issue50544(f_issue50544(T, 1), f_issue50544(T, 2, 1))
+let x = Issue50544((1, Issue50544((2.0, 'x'))))
+    @test only(Base.return_types(h_issue50544, (typeof(x),))) == Type{Issue50544{Tuple{Int,Float64}}}
 end
 
-# refine :consistent-cy effect inference using the return type information
-@test Base.infer_effects((Any,)) do x
-    taint = Ref{Any}(x) # taints :consistent-cy, but will be adjusted
-    throw(taint)
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    if x < 0
-        taint = Ref(x) # taints :consistent-cy, but will be adjusted
-        throw(DomainError(x, taint))
+# refine const-prop'ed `PartialStruct` with declared method signature type
+Base.@constprop :aggressive function refine_partial_struct1((a, b)::Tuple{String,Int})
+    if iszero(b)
+        println("b=0") # to prevent semi-concrete eval
+        return nothing
+    else
+        return a
     end
-    return nothing
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    if x < 0
-        taint = Ref(x) # taints :consistent-cy, but will be adjusted
-        throw(DomainError(x, taint))
-    end
-    return x == 0 ? nothing : x # should `Union` of isbitstype objects nicely
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects((Symbol,Any)) do s, x
-    if s === :throw
-        taint = Ref{Any}(":throw option given") # taints :consistent-cy, but will be adjusted
-        throw(taint)
-    end
-    return s # should handle `Symbol` nicely
-end |> Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    return Ref(x)
-end |> !Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    return x < 0 ? Ref(x) : nothing
-end |> !Core.Compiler.is_consistent
-@test Base.infer_effects((Int,)) do x
-    if x < 0
-        throw(DomainError(x, lazy"$x is negative"))
+end
+@test Base.return_types() do s::AbstractString
+    refine_partial_struct1((s, 42))
+end |> only === String
+
+function refine_partial_struct2(xs::Union{Int,String,Symbol}...)
+    first(xs) isa Int && iszero(first(xs)) && return nothing
+    for x in xs[2:end]
+        if x isa String
+            continue
+        else
+            return nothing
+        end
     end
-    return nothing
-end |> Core.Compiler.is_foldable
+    return string(length(xs))
+end
+@test Base.return_types() do s::AbstractString
+    refine_partial_struct2(42, s)
+end |> only === String
+# JET.test_call(s::AbstractString->Base._string(s, 'c'))
 
-# check the inference convergence with an empty vartable:
-# the inference state for the toplevel chunk below will have an empty vartable,
-# and so we may fail to terminate (or optimize) it if we don't update vartables correctly
-let # NOTE make sure this toplevel chunk doesn't contain any local binding
-    Base.Experimental.@force_compile
-    global xcond::Bool = false
-    while xcond end
+# issue #45759 #46557
+g45759(x::Tuple{Any,Vararg}) = x[1] + _g45759(x[2:end])
+g45759(x::Tuple{}) = 0
+_g45759(x) = g45759(x)
+@test only(Base.return_types(g45759, Tuple{Tuple{Int,Int,Int,Int,Int,Int,Int}})) == Int
+
+h45759(x::Tuple{Any,Vararg}; kwargs...) = x[1] + h45759(x[2:end]; kwargs...)
+h45759(x::Tuple{}; kwargs...) = 0
+@test only(Base.return_types(h45759, Tuple{Tuple{Int,Int,Int,Int,Int,Int,Int}})) == Int
+
+# issue #50709
+@test Base.code_typed_by_type(Tuple{Type{Vector{S}} where {T, S<:AbstractVector{T}}, UndefInitializer, Int})[1][2] == Vector{<:AbstractVector{T}} where T
+
+@test only(Base.return_types((typeof([[[1]]]),)) do x
+    sum(x) do v
+        sum(length, v)
+    end
+end) == Int
+
+struct FunctionSum{Tf}
+    functions::Tf
 end
-@test !xcond
+(F::FunctionSum)(x) = sum(f -> f(x), F.functions)
+F = FunctionSum((x -> sqrt(x), FunctionSum((x -> x^2, x -> x^3))))
+@test @inferred(F(1.)) === 3.0
 
-struct Issue45780
-    oc::Core.OpaqueClosure{Tuple{}}
+f31485(arr::AbstractArray{T, 0}) where {T} = arr
+indirect31485(arr) = f31485(arr)
+f31485(arr::AbstractArray{T, N}) where {T, N} = indirect31485(view(arr, 1, ntuple(i -> :, Val(N-1))...))
+@test @inferred(f31485(zeros(3,3,3,3,3),)) == fill(0.0)
+
+# override const-prop' return type with the concrete-eval result
+# if concrete-eval returns non-inlineable constant
+Base.@assume_effects :foldable function continue_const_prop(i, j)
+    chars = map(Char, i:j)
+    String(chars)
+end
+@test Base.return_types() do
+    Val(length(continue_const_prop(1, 5)))
+end |> only === Val{5}
+@test fully_eliminated() do
+    length(continue_const_prop(1, 5))
+end
+
+# issue #51090
+@noinline function bar51090(b)
+    b == 0 && return
+    r = foo51090(b - 1)
+    Base.donotdelete(b)
+    return r
+end
+foo51090(b) = return bar51090(b)
+@test !fully_eliminated(foo51090, (Int,))
+
+# exploit throwness from concrete eval for intrinsics
+@test Base.return_types() do
+    Base.or_int(true, 1)
+end |> only === Union{}
+
+# [add|or]_int tfuncs
+@test Base.return_types((Bool,)) do b
+    Val(Core.Intrinsics.and_int(b, false))
+end |> only == Val{false}
+@test Base.return_types((Bool,)) do b
+    Val(Core.Intrinsics.or_int(true, b))
+end |> only == Val{true}
+
+# https://github.com/JuliaLang/julia/issues/51310
+@test code_typed() do
+    b{c} = d...
+end |> only |> first isa Core.CodeInfo
+
+abstract_call_unionall_vararg(some::Some{Any}) = UnionAll(some.value...)
+@test only(Base.return_types(abstract_call_unionall_vararg)) !== Union{}
+let TV = TypeVar(:T)
+    t = Vector{TV}
+    some = Some{Any}((TV, t))
+    @test abstract_call_unionall_vararg(some) isa UnionAll
+end
+
+# use `Vararg` type constraints
+use_vararg_constraint1(args::Vararg{T,N}) where {T,N} = Val(T), Val(N)
+@test only(Base.return_types(use_vararg_constraint1, Tuple{Int,Int})) == Tuple{Val{Int},Val{2}}
+use_vararg_constraint2(args::Vararg{T,N}) where {T,N} = Val(T), N
+@test only(Base.return_types(use_vararg_constraint2, Tuple{Vararg{Int}})) == Tuple{Val{Int},Int}
+use_vararg_constraint3(args::NTuple{N,T}) where {T,N} = Val(T), Val(N)
+@test only(Base.return_types(use_vararg_constraint3, Tuple{Tuple{Int,Int}})) == Tuple{Val{Int},Val{2}}
+use_vararg_constraint4(args::NTuple{N,T}) where {T,N} = Val(T), N
+@test only(Base.return_types(use_vararg_constraint4, Tuple{NTuple{N,Int}} where N)) == Tuple{Val{Int},Int}
+
+# issue 51228
+global whatever_unknown_value51228
+f51228() = f51228(whatever_unknown_value51228)
+f51228(x) = 1
+f51228(::Vararg{T,T}) where {T} = "2"
+@test only(Base.return_types(f51228, ())) == Int
+
+struct A51317
+    b::Tuple{1}
+    A1() = new()
+end
+struct An51317
+    a::Int
+    b::Tuple{1}
+    An51317() = new()
+end
+@test only(Base.return_types((x,f) -> getfield(x, f), (A51317, Symbol))) === Union{}
+@test only(Base.return_types((x,f) -> getfield(x, f), (An51317, Symbol))) === Int
+@test only(Base.return_types(x -> getfield(x, :b), (A51317,))) === Union{}
+@test only(Base.return_types(x -> getfield(x, :b), (An51317,))) === Union{}
+
+# Don't visit the catch block for empty try/catch
+function completely_dead_try_catch()
+    try
+    catch
+        return 2.0
+    end
+    return 1
+end
+@test Base.return_types(completely_dead_try_catch) |> only === Int
+@test fully_eliminated(completely_dead_try_catch)
+
+function nothrow_try_catch()
+    try
+        1+1
+    catch
+        return 2.0
+    end
+    return 1
+end
+@test Base.return_types(nothrow_try_catch) |> only === Int
+@test fully_eliminated(nothrow_try_catch)
+
+may_error(b) = Base.inferencebarrier(b) && error()
+function phic_type1()
+    a = 1
+    try
+        may_error(false)
+        a = 1.0
+    catch
+        return a
+    end
+    return 2
+end
+@test Base.return_types(phic_type1) |> only === Int
+@test phic_type1() === 2
+
+function phic_type2()
+    a = 1
+    try
+        may_error(false)
+        a = 1.0
+        may_error(false)
+    catch
+        return a
+    end
+    return 2
+end
+@test Base.return_types(phic_type2) |> only === Union{Int, Float64}
+@test phic_type2() === 2
+
+function phic_type3()
+    a = 1
+    try
+        may_error(false)
+        a = 1.0
+        may_error(false)
+        if Base.inferencebarrier(false)
+            a = Ref(1)
+        elseif Base.inferencebarrier(false)
+            a = nothing
+        end
+    catch
+        return a
+    end
+    return 2
+end
+@test Base.return_types(phic_type3) |> only === Union{Int, Float64}
+@test phic_type3() === 2
+
+# Issue #51852
+function phic_type4()
+    a = (;progress = "a")
+    try
+        may_error(false)
+        let b = Base.inferencebarrier(true) ? (;progress = 1.0) : a
+            a = b
+        end
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type4) |> only === Union{@NamedTuple{progress::Float64}, @NamedTuple{progress::String}}
+@test phic_type4() === (;progress = 1.0)
+
+function phic_type5()
+    a = (;progress = "a")
+    try
+        vals = (a, (progress=1.0,))
+        may_error(false)
+        a = vals[Base.inferencebarrier(false) ? 1 : 2]
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type5) |> only === Union{@NamedTuple{progress::Float64}, @NamedTuple{progress::String}}
+@test phic_type5() === (;progress = 1.0)
+
+function phic_type6()
+    a = Base.inferencebarrier(true) ? (;progress = "a") : (;progress = Ref{Any}(0))
+    try
+        may_error(false)
+        let b = Base.inferencebarrier(true) ? (;progress = 1.0) : a
+            a = b
+        end
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type6) |> only === Union{@NamedTuple{progress::Float64}, @NamedTuple{progress::Base.RefValue{Any}}, @NamedTuple{progress::String}}
+@test phic_type6() === (;progress = 1.0)
+
+function phic_type7()
+    a = Base.inferencebarrier(true) ? (;progress = "a") : (;progress = Ref{Any}(0))
+    try
+        vals = (a, (progress=1.0,))
+        may_error(false)
+        a = vals[Base.inferencebarrier(false) ? 1 : 2]
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type7) |> only === Union{@NamedTuple{progress::Float64}, @NamedTuple{progress::Base.RefValue{Any}}, @NamedTuple{progress::String}}
+@test phic_type7() === (;progress = 1.0)
+
+function phic_type8()
+    local a
+    try
+        may_error(true)
+        a = Base.inferencebarrier(1)
+    catch
+    end
+
+    try
+        a = 2
+        may_error(true)
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type8) |> only === Int
+@test phic_type8() === 2
+
+function phic_type9()
+    local a
+    try
+        may_error(false)
+        a = Base.inferencebarrier(false) ? 1 : nothing
+    catch
+    end
+
+    try
+        a = 2
+        may_error(true)
+    catch
+    end
+    GC.gc()
+    return a
+end
+@test Base.return_types(phic_type9) |> only === Int
+@test phic_type9() === 2
+
+function phic_type10()
+    local a
+    try
+        may_error(false)
+        a = Base.inferencebarrier(true) ? missing : nothing
+    catch
+    end
+
+    try
+        Base.inferencebarrier(true) && (a = 2)
+        may_error(true)
+    catch
+    end
+    GC.gc()
+    return a::Int
+end
+@test Base.return_types(phic_type10) |> only === Int
+@test phic_type10() === 2
+
+undef_trycatch() = try (a_undef_trycatch = a_undef_trycatch, b = 2); return 1 catch end
+# `global a_undef_trycatch` could be defined dynamically, so both paths must be allowed
+@test Base.return_types(undef_trycatch) |> only === Union{Nothing, Int}
+@test undef_trycatch() === nothing
+
+# Test that `exit` returns `Union{}` (issue #51856)
+function test_exit_bottom(s)
+    n = tryparse(Int, s)
+    isnothing(n) && exit()
+    n
+end
+@test only(Base.return_types(test_exit_bottom, Tuple{String})) == Int
+
+function foo_typed_throw_error()
+    try
+        error()
+    catch e
+        if isa(e, ErrorException)
+            return 1.0
+        end
+    end
+    return 1
+end
+@test Base.return_types(foo_typed_throw_error) |> only === Float64
+
+will_throw_no_method(x::Int) = 1
+function foo_typed_throw_metherr()
+    try
+        will_throw_no_method(1.0)
+    catch e
+        if isa(e, MethodError)
+            return 1.0
+        end
+    end
+    return 1
+end
+@test Base.return_types(foo_typed_throw_metherr) |> only === Float64
+
+# refine `exct` when `:nothrow` is proven
+Base.@assume_effects :nothrow function sin_nothrow(x::Float64)
+    x == Inf && return zero(x)
+    return sin(x)
+end
+@test Base.infer_exception_type(sin_nothrow, (Float64,)) == Union{}
+@test Base.return_types((Float64,)) do x
+    try
+        return sin_nothrow(x)
+    catch err
+        return err
+    end
+end |> only === Float64
+# for semi-concrete interpretation result too
+Base.@constprop :aggressive function sin_maythrow(x::Float64, maythrow::Bool)
+    if maythrow
+        return sin(x)
+    else
+        return @noinline sin_nothrow(x)
+    end
+end
+@test Base.return_types((Float64,)) do x
+    try
+        return sin_maythrow(x, false)
+    catch err
+        return err
+    end
+end |> only === Float64
+
+# exception type from GotoIfNot
+@test Base.infer_exception_type(c::Bool -> c ? 1 : 2) == Union{}
+@test Base.infer_exception_type(c::Missing -> c ? 1 : 2) == TypeError
+@test Base.infer_exception_type(c::Any -> c ? 1 : 2) == TypeError
+
+# semi-concrete interpretation accuracy
+# https://github.com/JuliaLang/julia/issues/50037
+@inline countvars50037(bitflags::Int, var::Int) = bitflags >> 0
+@test Base.infer_return_type() do var::Int
+    Val(countvars50037(1, var))
+end == Val{1}
+
+# Issue #52168
+f52168(x, t::Type) = x::NTuple{2, Base.inferencebarrier(t)::Type}
+@test f52168((1, 2.), Any) === (1, 2.)
+
+# Issue #27031
+let x = 1, _Any = Any
+    @noinline bar27031(tt::Tuple{T,T}, ::Type{Val{T}}) where {T} = notsame27031(tt)
+    @noinline notsame27031(tt::Tuple{T, T}) where {T} = error()
+    @noinline notsame27031(tt::Tuple{T, S}) where {T, S} = "OK"
+    foo27031() = bar27031((x, 1.0), Val{_Any})
+    @test foo27031() == "OK"
 end
-f45780() = Val{Issue45780(@Base.Experimental.opaque ()->1).oc()}()
-@test (@inferred f45780()) == Val{1}()
diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl
index 044ba03bacf32..9e58f23fd755c 100644
--- a/test/compiler/inline.jl
+++ b/test/compiler/inline.jl
@@ -4,7 +4,8 @@ using Test
 using Base.Meta
 using Core: ReturnNode
 
-include(normpath(@__DIR__, "irutils.jl"))
+include("irutils.jl")
+include("newinterp.jl")
 
 """
 Helper to walk the AST and call a function on every node.
@@ -28,7 +29,7 @@ function test_inlined_symbols(func, argtypes)
     ast = Expr(:block)
     ast.args = src.code
     walk(ast) do e
-        if isa(e, Core.Slot)
+        if isa(e, Core.SlotNumber)
             @test 1 <= e.id <= nl
         end
         if isa(e, Core.NewvarNode)
@@ -70,7 +71,7 @@ function bar12620()
         foo_inl(i==1)
     end
 end
-@test_throws UndefVarError(:y) bar12620()
+@test_throws UndefVarError(:y, :local) bar12620()
 
 # issue #16165
 @inline f16165(x) = (x = UInt(x) + 1)
@@ -120,16 +121,14 @@ f29083(;μ,σ) = μ + σ*randn()
 g29083() = f29083(μ=2.0,σ=0.1)
 let c = code_typed(g29083, ())[1][1].code
     # make sure no call to kwfunc remains
-    @test !any(e->(isa(e,Expr) && ((e.head === :invoke && e.args[1].def.name === :kwfunc) ||
-                                   (e.head === :foreigncall && e.args[1] === QuoteNode(:jl_get_keyword_sorter)))),
-               c)
+    @test !any(e->(isa(e,Expr) && (e.head === :invoke && e.args[1].def.name === :kwfunc)), c)
 end
 
 @testset "issue #19122: [no]inline of short func. def. with return type annotation" begin
     exf19122 = @macroexpand(@inline f19122()::Bool = true)
     exg19122 = @macroexpand(@noinline g19122()::Bool = true)
-    @test exf19122.args[2].args[1].args[1] == :inline
-    @test exg19122.args[2].args[1].args[1] == :noinline
+    @test exf19122.args[2].args[1].args[1] === :inline
+    @test exg19122.args[2].args[1].args[1] === :noinline
 
     @inline f19122()::Bool = true
     @noinline g19122()::Bool = true
@@ -148,8 +147,10 @@ end
         s
     end
 
-    (src, _) = code_typed(sum27403, Tuple{Vector{Int}})[1]
-    @test !any(x -> x isa Expr && x.head === :invoke, src.code)
+    (src, _) = only(code_typed(sum27403, Tuple{Vector{Int}}))
+    @test !any(src.code) do x
+        x isa Expr && x.head === :invoke && x.args[2] !== Core.GlobalRef(Base, :throw_boundserror)
+    end
 end
 
 # check that ismutabletype(type) can be fully eliminated
@@ -275,34 +276,55 @@ f34900(x, y::Int) = y
 f34900(x::Int, y::Int) = invoke(f34900, Tuple{Int, Any}, x, y)
 @test fully_eliminated(f34900, Tuple{Int, Int}; retval=Core.Argument(2))
 
-@testset "check jl_ir_flag_inlineable for inline macro" begin
-    @test ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods(@inline x -> x)).source)
-    @test ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods(x -> (@inline; x))).source)
-    @test !ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods(x -> x)).source)
-    @test ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods(@inline function f(x) x end)).source)
-    @test ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods(function f(x) @inline; x end)).source)
-    @test !ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods(function f(x) x end)).source)
-    @test ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods() do x @inline; x end).source)
-    @test !ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods() do x x end).source)
+using Core.Compiler: is_declared_inline, is_declared_noinline
+
+@testset "is_declared_[no]inline" begin
+    @test is_declared_inline(only(methods(@inline x -> x)))
+    @test is_declared_inline(only(methods(x -> (@inline; x))))
+    @test is_declared_inline(only(methods(@inline function f(x) x end)))
+    @test is_declared_inline(only(methods(function f(x) @inline; x end)))
+    @test is_declared_inline(only(methods() do x @inline; x end))
+    @test is_declared_noinline(only(methods(@noinline x -> x)))
+    @test is_declared_noinline(only(methods(x -> (@noinline; x))))
+    @test is_declared_noinline(only(methods(@noinline function f(x) x end)))
+    @test is_declared_noinline(only(methods(function f(x) @noinline; x end)))
+    @test is_declared_noinline(only(methods() do x @noinline; x end))
+    @test !is_declared_inline(only(methods(x -> x)))
+    @test !is_declared_noinline(only(methods(x -> x)))
+    @test !is_declared_inline(only(methods(function f(x) x end)))
+    @test !is_declared_noinline(only(methods(function f(x) x end)))
+    @test !is_declared_inline(only(methods() do x x end))
+    @test !is_declared_noinline(only(methods() do x x end))
+end
+
+using Core.Compiler: is_inlineable, set_inlineable!
+
+@testset "basic set_inlineable! functionality" begin
+    ci = code_typed1() do
+        x -> x
+    end
+    set_inlineable!(ci, true)
+    @test is_inlineable(ci)
+    set_inlineable!(ci, false)
+    @test !is_inlineable(ci)
+    @test_throws MethodError set_inlineable!(ci, 5)
 end
 
 const _a_global_array = [1]
 f_inline_global_getindex() = _a_global_array[1]
 let ci = code_typed(f_inline_global_getindex, Tuple{})[1].first
-    @test any(x->(isexpr(x, :call) && x.args[1] === GlobalRef(Base, :arrayref)), ci.code)
+    @test any(x->(isexpr(x, :call) && x.args[1] === GlobalRef(Base, :memoryrefget)), ci.code)
 end
 
 # Issue #29114 & #36087 - Inlining of non-tuple splats
 f_29115(x) = (x...,)
 @test @allocated(f_29115(1)) == 0
 @test @allocated(f_29115(1=>2)) == 0
-let ci = code_typed(f_29115, Tuple{Int64})[1].first
-    @test length(ci.code) == 2 && isexpr(ci.code[1], :call) &&
-        ci.code[1].args[1] === GlobalRef(Core, :tuple)
+let src = code_typed(f_29115, Tuple{Int64}) |> only |> first
+    @test iscall((src, tuple), src.code[end-1])
 end
-let ci = code_typed(f_29115, Tuple{Pair{Int64, Int64}})[1].first
-    @test length(ci.code) == 4 && isexpr(ci.code[1], :call) &&
-        ci.code[end-1].args[1] === GlobalRef(Core, :tuple)
+let src = code_typed(f_29115, Tuple{Pair{Int64, Int64}}) |> only |> first
+    @test iscall((src, tuple), src.code[end-1])
 end
 
 # Issue #37182 & #37555 - Inlining of pending nodes
@@ -317,7 +339,7 @@ struct NonIsBitsDims
     dims::NTuple{N, Int} where N
 end
 NonIsBitsDims() = NonIsBitsDims(())
-@test fully_eliminated(NonIsBitsDims, (); retval=QuoteNode(NonIsBitsDims()))
+@test fully_eliminated(NonIsBitsDims, (); retval=NonIsBitsDims())
 
 struct NonIsBitsDimsUndef
     dims::NTuple{N, Int} where N
@@ -341,18 +363,6 @@ struct RealConstrained{T <: Real}; end
 @test !fully_eliminated(x->(RealConstrained{x}; nothing), Tuple{Int})
 @test !fully_eliminated(x->(RealConstrained{x}; nothing), Tuple{Type{Vector{T}} where T})
 
-# Check that pure functions with non-inlineable results still get deleted
-struct Big
-    x::NTuple{1024, Int}
-end
-@Base.pure Big() = Big(ntuple(identity, 1024))
-function pure_elim_full()
-    Big()
-    nothing
-end
-
-@test fully_eliminated(pure_elim_full, Tuple{})
-
 # Union splitting of convert
 f_convert_missing(x) = convert(Int64, x)
 let ci = code_typed(f_convert_missing, Tuple{Union{Int64, Missing}})[1][1],
@@ -371,26 +381,6 @@ using Base.Experimental: @opaque
 f_oc_getfield(x) = (@opaque ()->x)()
 @test fully_eliminated(f_oc_getfield, Tuple{Int})
 
-import Core.Compiler: argextype, singleton_type
-const EMPTY_SPTYPES = Any[]
-
-code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::Core.CodeInfo
-get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
-
-# check if `x` is a dynamic call of a given function
-iscall(y) = @nospecialize(x) -> iscall(y, x)
-function iscall((src, f)::Tuple{Core.CodeInfo,Base.Callable}, @nospecialize(x))
-    return iscall(x) do @nospecialize x
-        singleton_type(argextype(x, src, EMPTY_SPTYPES)) === f
-    end
-end
-iscall(pred::Base.Callable, @nospecialize(x)) = Meta.isexpr(x, :call) && pred(x.args[1])
-
-# check if `x` is a statically-resolved call of a function whose name is `sym`
-isinvoke(y) = @nospecialize(x) -> isinvoke(y, x)
-isinvoke(sym::Symbol, @nospecialize(x)) = isinvoke(mi->mi.def.name===sym, x)
-isinvoke(pred::Function, @nospecialize(x)) = Meta.isexpr(x, :invoke) && pred(x.args[1]::Core.MethodInstance)
-
 @testset "@inline/@noinline annotation before definition" begin
     M = Module()
     @eval M begin
@@ -517,6 +507,17 @@ end
         Base.@constprop :aggressive noinlined_constprop_implicit(a) = a+g
         force_inline_constprop_implicit() = @inline noinlined_constprop_implicit(0)
 
+        function force_inline_constprop_cached1()
+            r1 =         noinlined_constprop_implicit(0)
+            r2 = @inline noinlined_constprop_implicit(0)
+            return (r1, r2)
+        end
+        function force_inline_constprop_cached2()
+            r1 = @inline noinlined_constprop_implicit(0)
+            r2 =         noinlined_constprop_implicit(0)
+            return (r1, r2)
+        end
+
         @inline Base.@constprop :aggressive inlined_constprop_explicit(a) = a+g
         force_noinline_constprop_explicit() = @noinline inlined_constprop_explicit(0)
         @inline Base.@constprop :aggressive inlined_constprop_implicit(a) = a+g
@@ -567,6 +568,12 @@ end
     let code = get_code(M.force_inline_constprop_implicit)
         @test all(!isinvoke(:noinlined_constprop_implicit), code)
     end
+    let code = get_code(M.force_inline_constprop_cached1)
+        @test count(isinvoke(:noinlined_constprop_implicit), code) == 1
+    end
+    let code = get_code(M.force_inline_constprop_cached2)
+        @test count(isinvoke(:noinlined_constprop_implicit), code) == 1
+    end
 
     let code = get_code(M.force_noinline_constprop_explicit)
         @test any(isinvoke(:inlined_constprop_explicit), code)
@@ -580,6 +587,18 @@ end
     end
 end
 
+@noinline fresh_edge_noinlined(a::Integer) = unresolvable(a)
+let src = code_typed1((Integer,)) do x
+        @inline fresh_edge_noinlined(x)
+    end
+    @test count(iscall((src, fresh_edge_noinlined)), src.code) == 0
+end
+let src = code_typed1((Integer,)) do x
+        @inline fresh_edge_noinlined(x)
+    end
+    @test count(iscall((src, fresh_edge_noinlined)), src.code) == 0 # should be idempotent
+end
+
 # force constant-prop' for `setproperty!`
 # https://github.com/JuliaLang/julia/pull/41882
 let code = @eval Module() begin
@@ -608,7 +627,7 @@ g41299(f::Tf, args::Vararg{Any,N}) where {Tf,N} = f(args...)
 @test_throws TypeError g41299(>:, 1, 2)
 
 # https://github.com/JuliaLang/julia/issues/42078
-# idempotency of callsite inling
+# idempotency of callsite inlining
 function getcache(mi::Core.MethodInstance)
     cache = Core.Compiler.code_cache(Core.Compiler.NativeInterpreter())
     codeinf = Core.Compiler.get(cache, mi, nothing)
@@ -629,10 +648,9 @@ let
         f42078(a)
     end
     let # make sure to discard the inferred source
-        specs = collect(only(methods(f42078)).specializations)
-        mi = specs[findfirst(!isnothing, specs)]::Core.MethodInstance
+        mi = only(methods(f42078)).specializations::Core.MethodInstance
         codeinf = getcache(mi)::Core.CodeInstance
-        codeinf.inferred = nothing
+        @atomic codeinf.inferred = nothing
     end
 
     let # inference should re-infer `f42078(::Int)` and we should get the same code
@@ -645,7 +663,7 @@ let
 end
 
 begin
-    # more idempotency of callsite inling
+    # more idempotency of callsite inlining
     # -----------------------------------
     # this test case requires forced constant propagation for callsite inlined function call,
     # particularly, in the following example, the inlinear will look up `+ₚ(::Point, ::Const(Point(2.25, 4.75)))`
@@ -697,9 +715,9 @@ begin
 end
 
 # https://github.com/JuliaLang/julia/issues/42246
-@test mktempdir() do dir
+mktempdir() do dir
     cd(dir) do
-        code = quote
+        code = """
             issue42246() = @noinline IOBuffer("a")
             let
                 ci, rt = only(code_typed(issue42246))
@@ -712,12 +730,33 @@ end
                     exit(1)
                end
             end
-        end |> string
+            """
         cmd = `$(Base.julia_cmd()) --code-coverage=tmp.info -e $code`
-        success(pipeline(Cmd(cmd); stdout=stdout, stderr=stderr))
+        @test success(pipeline(cmd; stdout, stderr))
     end
 end
 
+# callsite inlining with cached frames
+issue49823_events = @NamedTuple{evid::Int8, base_time::Float64}[
+    (evid = 1, base_time = 0.0), (evid = -1, base_time = 0.0)]
+issue49823_fl1(t, events) = @inline findlast(x -> x.evid ∈ (1, 4) && x.base_time <= t, events)
+issue49823_fl3(t, events) = @inline findlast(x -> any(==(x.evid), (1,4)) && x.base_time <= t, events)
+issue49823_fl5(t, events) = begin
+    f = let t=t
+        x -> x.evid ∈ (1, 4) && x.base_time <= t
+    end
+    @inline findlast(f, events)
+end
+let src = @code_typed1 issue49823_fl1(0.0, issue49823_events)
+    @test count(isinvoke(:findlast), src.code) == 0 # successful inlining
+end
+let src = @code_typed1 issue49823_fl3(0.0, issue49823_events)
+    @test count(isinvoke(:findlast), src.code) == 0 # successful inlining
+end
+let src = @code_typed1 issue49823_fl5(0.0, issue49823_events)
+    @test count(isinvoke(:findlast), src.code) == 0 # successful inlining
+end
+
 # Issue #42264 - crash on certain union splits
 let f(x) = (x...,)
     # Test splatting with a Union of non-{Tuple, SimpleVector} types that require creating new `iterate` calls
@@ -769,8 +808,8 @@ end
 let src = code_typed((Union{Tuple{Int,Int,Int}, Vector{Int}},)) do xs
         g42840(xs, 2)
     end |> only |> first
-    # `(xs::Vector{Int})[a::Const(2)]` => `Base.arrayref(true, xs, 2)`
-    @test count(iscall((src, Base.arrayref)), src.code) == 1
+    # `(xs::Vector{Int})[a::Const(2)]`
+    @test count(iscall((src, Base.memoryrefget)), src.code) == 1
     @test count(isinvoke(:g42840), src.code) == 1
 end
 
@@ -934,7 +973,7 @@ let # aggressive inlining of single, abstract method match (with constant-prop'e
     # both callsite should be inlined with constant-prop'ed result
     @test count(isinvoke(:isType), src.code) == 2
     @test count(isinvoke(:has_free_typevars), src.code) == 0
-    # `isGoodType(y::Any)` isn't fully convered, thus a runtime type check and fallback dynamic dispatch should be inserted
+    # `isGoodType(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
     @test count(iscall((src,isGoodType2)), src.code) == 1
 end
 
@@ -995,13 +1034,6 @@ end
     @invoke conditional_escape!(false::Any, x::Any)
 end
 
-@testset "strides for ReshapedArray (PR#44027)" begin
-    # Type-based contiguous check
-    a = vec(reinterpret(reshape,Int16,reshape(view(reinterpret(Int32,randn(10)),2:11),5,:)))
-    f(a) = only(strides(a));
-    @test fully_eliminated(f, Tuple{typeof(a)}) && f(a) == 1
-end
-
 @testset "elimination of `get_binding_type`" begin
     m = Module()
     @eval m begin
@@ -1055,26 +1087,6 @@ struct FooTheRef
     x::Ref
     FooTheRef(v) = new(v === nothing ? THE_REF_NULL : THE_REF)
 end
-let src = code_typed1() do
-        FooTheRef(nothing)
-    end
-    @test count(isnew, src.code) == 1
-end
-let src = code_typed1() do
-        FooTheRef(0)
-    end
-    @test count(isnew, src.code) == 1
-end
-let src = code_typed1() do
-        @invoke FooTheRef(nothing::Any)
-    end
-    @test count(isnew, src.code) == 1
-end
-let src = code_typed1() do
-        @invoke FooTheRef(0::Any)
-    end
-    @test count(isnew, src.code) == 1
-end
 @test fully_eliminated() do
     FooTheRef(nothing)
     nothing
@@ -1092,17 +1104,6 @@ end
     nothing
 end
 
-# Test that the Core._apply_iterate bail path taints effects
-function f_apply_bail(f)
-    f(()...)
-    return nothing
-end
-f_call_apply_bail(f) = f_apply_bail(f)
-@test !fully_eliminated(f_call_apply_bail, Tuple{Function})
-
-# Test that arraysize has proper effect modeling
-@test fully_eliminated(M->(size(M, 2); nothing), Tuple{Matrix{Float64}})
-
 # DCE of non-inlined callees
 @noinline noninlined_dce_simple(a) = identity(a)
 @test fully_eliminated((String,)) do s
@@ -1124,78 +1125,11 @@ Base.setindex!(s::SafeRef, x) = setfield!(s, 1, x)
     noninlined_dce_new(s)
     nothing
 end
-# should be resolved once we merge https://github.com/JuliaLang/julia/pull/43923
-@test_broken fully_eliminated((Union{Symbol,String},)) do s
+@test fully_eliminated((Union{Symbol,String},)) do s
     noninlined_dce_new(s)
     nothing
 end
 
-# Test that ambigous calls don't accidentally get nothrow effect
-ambig_effect_test(a::Int, b) = 1
-ambig_effect_test(a, b::Int) = 1
-ambig_effect_test(a, b) = 1
-global ambig_unknown_type_global=1
-@noinline function conditionally_call_ambig(b::Bool, a)
-    if b
-        ambig_effect_test(a, ambig_unknown_type_global)
-    end
-    return 0
-end
-function call_call_ambig(b::Bool)
-    conditionally_call_ambig(b, 1)
-    return 1
-end
-@test !fully_eliminated(call_call_ambig, Tuple{Bool})
-
-# Test that a missing methtable identification gets tainted
-# appropriately
-struct FCallback; f::Union{Nothing, Function}; end
-f_invoke_callback(fc) = let f=fc.f; (f !== nothing && f(); nothing); end
-function f_call_invoke_callback(f::FCallback)
-    f_invoke_callback(f)
-    return nothing
-end
-@test !fully_eliminated(f_call_invoke_callback, Tuple{FCallback})
-
-# https://github.com/JuliaLang/julia/issues/41694
-Base.@assume_effects :terminates_globally function issue41694(x)
-    res = 1
-    1 < x < 20 || throw("bad")
-    while x > 1
-        res *= x
-        x -= 1
-    end
-    return res
-end
-@test fully_eliminated() do
-    issue41694(2)
-end
-
-Base.@assume_effects :terminates_globally function recur_termination1(x)
-    x == 1 && return 1
-    1 < x < 20 || throw("bad")
-    return x * recur_termination1(x-1)
-end
-@test fully_eliminated() do
-    recur_termination1(12)
-end
-Base.@assume_effects :terminates_globally function recur_termination21(x)
-    x == 1 && return 1
-    1 < x < 20 || throw("bad")
-    return recur_termination22(x)
-end
-recur_termination22(x) = x * recur_termination21(x-1)
-@test fully_eliminated() do
-    recur_termination21(12) + recur_termination22(12)
-end
-
-const ___CONST_DICT___ = Dict{Any,Any}(Symbol(c) => i for (i, c) in enumerate('a':'z'))
-Base.@assume_effects :foldable concrete_eval(
-    f, args...; kwargs...) = f(args...; kwargs...)
-@test fully_eliminated() do
-    concrete_eval(getindex, ___CONST_DICT___, :a)
-end
-
 # https://github.com/JuliaLang/julia/issues/44732
 struct Component44732
     v
@@ -1240,7 +1174,7 @@ let src = code_typed1(g_call_peel, Tuple{Any})
 end
 
 const my_defined_var = 42
-@test fully_eliminated((); retval=42) do
+@test fully_eliminated(; retval=42) do
     getglobal(@__MODULE__, :my_defined_var, :monotonic)
 end
 @test !fully_eliminated() do
@@ -1260,22 +1194,6 @@ end
     return maybe_error_int(1)
 end
 
-# Test that effect modeling for return_type doesn't incorrectly pick
-# up the effects of the function being analyzed
-function f_throws()
-    error()
-end
-
-@noinline function return_type_unused(x)
-    Core.Compiler.return_type(f_throws, Tuple{})
-    return x+1
-end
-
-@test fully_eliminated(Tuple{Int}) do x
-    return_type_unused(x)
-    return nothing
-end
-
 # Test that inlining doesn't accidentally delete a bad return_type call
 f_bad_return_type() = Core.Compiler.return_type(+, 1, 2)
 @test_throws MethodError f_bad_return_type()
@@ -1291,7 +1209,8 @@ end
 
 # Test that we can inline a finalizer for a struct that does not otherwise escape
 @noinline nothrow_side_effect(x) =
-    @Base.assume_effects :total !:effect_free @ccall jl_(x::Any)::Cvoid
+    Base.@assume_effects :total !:effect_free @ccall jl_(x::Any)::Cvoid
+@test Core.Compiler.is_finalizer_inlineable(Base.infer_effects(nothrow_side_effect, (Nothing,)))
 
 mutable struct DoAllocNoEscape
     function DoAllocNoEscape()
@@ -1300,7 +1219,6 @@ mutable struct DoAllocNoEscape
         end
     end
 end
-
 let src = code_typed1() do
         for i = 1:1000
             DoAllocNoEscape()
@@ -1309,6 +1227,65 @@ let src = code_typed1() do
     @test count(isnew, src.code) == 0
 end
 
+# Test that a case when `Core.finalizer` is registered interprocedurally,
+# but still eligible for SROA after inlining
+mutable struct DoAllocNoEscapeInter end
+
+let src = code_typed1() do
+        for i = 1:1000
+            obj = DoAllocNoEscapeInter()
+            finalizer(obj) do this
+                nothrow_side_effect(nothing)
+            end
+        end
+    end
+    @test count(isnew, src.code) == 0
+end
+
+function register_finalizer!(obj)
+    finalizer(obj) do this
+        nothrow_side_effect(nothing)
+    end
+end
+let src = code_typed1() do
+        for i = 1:1000
+            obj = DoAllocNoEscapeInter()
+            register_finalizer!(obj)
+        end
+    end
+    @test count(isnew, src.code) == 0
+end
+
+function genfinalizer(val)
+    return function (this)
+        nothrow_side_effect(val)
+    end
+end
+let src = code_typed1() do
+        for i = 1:1000
+            obj = DoAllocNoEscapeInter()
+            finalizer(genfinalizer(nothing), obj)
+        end
+    end
+    @test count(isnew, src.code) == 0
+end
+
+# Test that we can inline a finalizer that just returns a constant value
+mutable struct DoAllocConst
+    function DoAllocConst()
+        finalizer(new()) do this
+            return nothing
+        end
+    end
+end
+let src = code_typed1() do
+        for i = 1:1000
+            DoAllocConst()
+        end
+    end
+    @test count(isnew, src.code) == 0
+end
+
 # Test that finalizer elision doesn't cause a throw to be inlined into a function
 # that shouldn't have it
 const finalizer_should_throw = Ref{Bool}(true)
@@ -1334,27 +1311,43 @@ end
 @test f_finalizer_throws()
 
 # Test finalizers with static parameters
-global last_finalizer_type::Type = Any
 mutable struct DoAllocNoEscapeSparam{T}
-    x::T
-    function finalizer_sparam(d::DoAllocNoEscapeSparam{T}) where {T}
+    x
+    @inline function finalizer_sparam(d::DoAllocNoEscapeSparam{T}) where {T}
         nothrow_side_effect(nothing)
         nothrow_side_effect(T)
     end
-    function DoAllocNoEscapeSparam{T}(x::T) where {T}
+    @inline function DoAllocNoEscapeSparam(x::T) where {T}
         finalizer(finalizer_sparam, new{T}(x))
     end
 end
-DoAllocNoEscapeSparam(x::T) where {T} = DoAllocNoEscapeSparam{T}(x)
-
 let src = code_typed1(Tuple{Any}) do x
         for i = 1:1000
             DoAllocNoEscapeSparam(x)
         end
     end
-    # This requires more inlining enhancments. For now just make sure this
-    # doesn't error.
-    @test count(isnew, src.code) in (0, 1) # == 0
+    @test count(x->isexpr(x, :static_parameter), src.code) == 0 # A bad inline might leave left-over :static_parameter
+    nnothrow_invokes = count(isinvoke(:nothrow_side_effect), src.code)
+    @test count(iscall(f->!isa(singleton_type(argextype(f, src)), Core.Builtin)), src.code) ==
+          count(iscall((src, nothrow_side_effect)), src.code) == 2 - nnothrow_invokes
+    # TODO: Our effect modeling is not yet strong enough to fully eliminate this
+    @test_broken count(isnew, src.code) == 0
+end
+
+# Test finalizer varargs
+function varargs_finalizer(args...)
+    nothrow_side_effect(args[1])
+end
+mutable struct DoAllocNoEscapeNoVarargs
+    function DoAllocNoEscapeNoInline()
+        finalizer(noinline_finalizer, new())
+    end
+end
+let src = code_typed1() do
+        for i = 1:1000
+            DoAllocNoEscapeNoInline()
+        end
+    end
 end
 
 # Test noinline finalizer
@@ -1366,7 +1359,6 @@ mutable struct DoAllocNoEscapeNoInline
         finalizer(noinline_finalizer, new())
     end
 end
-
 let src = code_typed1() do
         for i = 1:1000
             DoAllocNoEscapeNoInline()
@@ -1376,23 +1368,247 @@ let src = code_typed1() do
     @test count(isinvoke(:noinline_finalizer), src.code) == 1
 end
 
+# Test that we resolve a `finalizer` call that we don't handle currently
+mutable struct DoAllocNoEscapeBranch
+    val::Int
+    function DoAllocNoEscapeBranch(val::Int)
+        finalizer(new(val)) do this
+            if this.val > 500
+                nothrow_side_effect(this.val)
+            else
+                nothrow_side_effect(nothing)
+            end
+        end
+    end
+end
+let src = code_typed1() do
+        for i = 1:1000
+            DoAllocNoEscapeBranch(i)
+        end
+    end
+    @test !any(iscall((src, Core.finalizer)), src.code)
+    @test !any(isinvoke(:finalizer), src.code)
+end
+
+const FINALIZATION_COUNT = Ref(0)
+init_finalization_count!() = FINALIZATION_COUNT[] = 0
+get_finalization_count() = FINALIZATION_COUNT[]
+@noinline add_finalization_count!(x) = FINALIZATION_COUNT[] += x
+@noinline Base.@assume_effects :nothrow safeprint(io::IO, x...) = (@nospecialize; print(io, x...))
+@test Core.Compiler.is_finalizer_inlineable(Base.infer_effects(add_finalization_count!, (Int,)))
+
+mutable struct DoAllocWithField
+    x::Int
+    function DoAllocWithField(x::Int)
+        finalizer(new(x)) do this
+            add_finalization_count!(this.x)
+        end
+    end
+end
+mutable struct DoAllocWithFieldInter
+    x::Int
+end
+function register_finalizer!(obj::DoAllocWithFieldInter)
+    finalizer(obj) do this
+        add_finalization_count!(this.x)
+    end
+end
+
+function const_finalization(io)
+    for i = 1:1000
+        o = DoAllocWithField(1)
+        safeprint(io, o.x)
+    end
+end
+let src = code_typed1(const_finalization, (IO,))
+    @test count(isinvoke(:add_finalization_count!), src.code) == 1
+end
+let
+    init_finalization_count!()
+    const_finalization(IOBuffer())
+    @test get_finalization_count() == 1000
+end
+
+# Test that finalizers that don't do anything are just erased from the IR
+function useless_finalizer()
+    x = Ref(1)
+    finalizer(x) do x
+        nothing
+    end
+    return x
+end
+let src = code_typed1(useless_finalizer, ())
+    @test count(iscall((src, Core.finalizer)), src.code) == 0
+    @test length(src.code) == 2
+end
+
+# tests finalizer inlining when def/uses involve control flow
+function cfg_finalization1(io)
+    for i = -999:1000
+        o = DoAllocWithField(i)
+        if i == 1000
+            safeprint(io, o.x, '\n')
+        elseif i > 0
+            safeprint(io, o.x)
+        end
+    end
+end
+let src = code_typed1(cfg_finalization1, (IO,))
+    @test count(isinvoke(:add_finalization_count!), src.code) == 1
+end
+let
+    init_finalization_count!()
+    cfg_finalization1(IOBuffer())
+    @test get_finalization_count() == 1000
+end
+
+function cfg_finalization2(io)
+    for i = -999:1000
+        o = DoAllocWithField(1)
+        o.x = i # with `setfield!`
+        if i == 1000
+            safeprint(io, o.x, '\n')
+        elseif i > 0
+            safeprint(io, o.x)
+        end
+    end
+end
+let src = code_typed1(cfg_finalization2, (IO,))
+    @test count(isinvoke(:add_finalization_count!), src.code) == 1
+end
+let
+    init_finalization_count!()
+    cfg_finalization2(IOBuffer())
+    @test get_finalization_count() == 1000
+end
+
+function cfg_finalization3(io)
+    for i = -999:1000
+        o = DoAllocWithFieldInter(i)
+        register_finalizer!(o)
+        if i == 1000
+            safeprint(io, o.x, '\n')
+        elseif i > 0
+            safeprint(io, o.x)
+        end
+    end
+end
+let src = code_typed1(cfg_finalization3, (IO,))
+    @test count(isinvoke(:add_finalization_count!), src.code) == 1
+end
+let
+    init_finalization_count!()
+    cfg_finalization3(IOBuffer())
+    @test get_finalization_count() == 1000
+end
+
+function cfg_finalization4(io)
+    for i = -999:1000
+        o = DoAllocWithFieldInter(1)
+        o.x = i # with `setfield!`
+        register_finalizer!(o)
+        if i == 1000
+            safeprint(io, o.x, '\n')
+        elseif i > 0
+            safeprint(io, o.x)
+        end
+    end
+end
+let src = code_typed1(cfg_finalization4, (IO,))
+    @test count(isinvoke(:add_finalization_count!), src.code) == 1
+end
+let
+    init_finalization_count!()
+    cfg_finalization4(IOBuffer())
+    @test get_finalization_count() == 1000
+end
+
+function cfg_finalization5(io)
+    for i = -999:1000
+        o = DoAllocWithFieldInter(i)
+        if i == 1000
+            safeprint(io, o.x, '\n')
+        elseif i > 0
+            safeprint(io, o.x)
+        end
+        register_finalizer!(o)
+    end
+end
+let src = code_typed1(cfg_finalization5, (IO,))
+    @test count(isinvoke(:add_finalization_count!), src.code) == 1
+end
+let
+    init_finalization_count!()
+    cfg_finalization5(IOBuffer())
+    @test get_finalization_count() == 1000
+end
+
+function cfg_finalization6(io)
+    for i = -999:1000
+        o = DoAllocWithField(0)
+        if i == 1000
+            o.x = i # with `setfield!`
+        elseif i > 0
+            safeprint(io, o.x, '\n')
+        end
+    end
+end
+let src = code_typed1(cfg_finalization6, (IO,))
+    @test count(isinvoke(:add_finalization_count!), src.code) == 1
+end
+let
+    init_finalization_count!()
+    cfg_finalization6(IOBuffer())
+    @test get_finalization_count() == 1000
+end
+
+
+function cfg_finalization7(io)
+    for i = -999:1000
+        o = DoAllocWithField(0)
+        o.x = 0
+        if i == 1000
+            o.x = i # with `setfield!`
+        end
+        o.x = i
+        if i == 999
+            o.x = i
+        end
+        o.x = 0
+        if i == 1000
+            o.x = i
+        end
+    end
+end
+let src = code_typed1(cfg_finalization7, (IO,))
+    @test count(isinvoke(:add_finalization_count!), src.code) == 1
+end
+let
+    init_finalization_count!()
+    cfg_finalization7(IOBuffer())
+    @test get_finalization_count() == 1000
+end
+
+
 # optimize `[push!|pushfirst!](::Vector{Any}, x...)`
 @testset "optimize `$f(::Vector{Any}, x...)`" for f = Any[push!, pushfirst!]
     @eval begin
-        let src = code_typed1((Vector{Any}, Any)) do xs, x
-                $f(xs, x)
+        for T in [Int, Any]
+            let src = code_typed1((Vector{T}, T)) do xs, x
+                    $f(xs, x)
+                end
+                @test count(iscall((src, $f)), src.code) == 0
             end
-            @test count(iscall((src, $f)), src.code) == 0
-            @test count(src.code) do @nospecialize x
-                isa(x, Core.GotoNode) ||
-                isa(x, Core.GotoIfNot) ||
-                iscall((src, getfield))(x)
-            end == 0 # no loop should be involved for the common single arg case
-        end
-        let src = code_typed1((Vector{Any}, Any, Any)) do xs, x, y
-                $f(xs, x, y)
+            let effects = Base.infer_effects((Vector{T}, T)) do xs, x
+                    $f(xs, x)
+                end
+                @test Core.Compiler.Core.Compiler.is_terminates(effects)
+            end
+            let src = code_typed1((Vector{T}, T, T)) do xs, x, y
+                    $f(xs, x, y)
+                end
+                @test count(iscall((src, $f)), src.code) == 0
             end
-            @test count(iscall((src, $f)), src.code) == 0
         end
         let xs = Any[]
             $f(xs, :x, "y", 'z')
@@ -1402,3 +1618,501 @@ end
         end
     end
 end
+
+using Core.Compiler: is_declared_inline, is_declared_noinline
+
+# https://github.com/JuliaLang/julia/issues/45050
+@testset "propagate :meta annotations to keyword sorter methods" begin
+    # @inline, @noinline, @constprop
+    let @inline f(::Any; x::Int=1) = 2x
+        @test is_declared_inline(only(methods(f)))
+        @test is_declared_inline(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
+    end
+    let @noinline f(::Any; x::Int=1) = 2x
+        @test is_declared_noinline(only(methods(f)))
+        @test is_declared_noinline(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
+    end
+    let Base.@constprop :aggressive f(::Any; x::Int=1) = 2x
+        @test Core.Compiler.is_aggressive_constprop(only(methods(f)))
+        @test Core.Compiler.is_aggressive_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
+    end
+    let Base.@constprop :none f(::Any; x::Int=1) = 2x
+        @test Core.Compiler.is_no_constprop(only(methods(f)))
+        @test Core.Compiler.is_no_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
+    end
+    # @nospecialize
+    let f(@nospecialize(A::Any); x::Int=1) = 2x
+        @test only(methods(f)).nospecialize == 1
+        @test only(methods(Core.kwcall, (Any, typeof(f), Vararg))).nospecialize == 4
+    end
+    let f(::Any; x::Int=1) = (@nospecialize; 2x)
+        @test only(methods(f)).nospecialize == -1
+        @test only(methods(Core.kwcall, (Any, typeof(f), Vararg))).nospecialize == -1
+    end
+    # Base.@assume_effects
+    let Base.@assume_effects :notaskstate f(::Any; x::Int=1) = 2x
+        @test Core.Compiler.decode_effects_override(only(methods(f)).purity).notaskstate
+        @test Core.Compiler.decode_effects_override(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).purity).notaskstate
+    end
+    # propagate multiple metadata also
+    let @inline Base.@assume_effects :notaskstate Base.@constprop :aggressive f(::Any; x::Int=1) = (@nospecialize; 2x)
+        @test is_declared_inline(only(methods(f)))
+        @test Core.Compiler.is_aggressive_constprop(only(methods(f)))
+        @test is_declared_inline(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
+        @test Core.Compiler.is_aggressive_constprop(only(methods(Core.kwcall, (Any, typeof(f), Vararg))))
+        @test only(methods(f)).nospecialize == -1
+        @test only(methods(Core.kwcall, (Any, typeof(f), Vararg))).nospecialize == -1
+        @test Core.Compiler.decode_effects_override(only(methods(f)).purity).notaskstate
+        @test Core.Compiler.decode_effects_override(only(methods(Core.kwcall, (Any, typeof(f), Vararg))).purity).notaskstate
+    end
+end
+
+# Test that one opaque closure capturing another gets inlined properly.
+function oc_capture_oc(z)
+    oc1 = @opaque x->x
+    oc2 = @opaque y->oc1(y)
+    return oc2(z)
+end
+@test fully_eliminated(oc_capture_oc, (Int,))
+
+# inlining with unmatched type parameters
+@eval struct OldVal{T}
+    (OV::Type{OldVal{T}})() where T = $(Expr(:new, :OV))
+end
+@test OldVal{0}() === OldVal{0}.instance
+function with_unmatched_typeparam()
+    f(x::OldVal{i}) where {i} = i
+    r = 0
+    for i = 1:10000
+        r += f(OldVal{i}())
+    end
+    return r
+end
+let src = code_typed1(with_unmatched_typeparam)
+    found = nothing
+    for x in src.code
+        if isexpr(x, :call) && length(x.args) == 1
+            found = x
+            break
+        end
+    end
+    @test isnothing(found) || (source=src, statement=found)
+end
+
+function twice_sitofp(x::Int, y::Int)
+    x = Base.sitofp(Float64, x)
+    y = Base.sitofp(Float64, y)
+    return (x, y)
+end
+
+# Test that semi-concrete eval can inline constant results
+let src = code_typed1((Int,)) do x
+        twice_sitofp(x, 2)
+    end
+    @test count(iscall((src, Base.sitofp)), src.code) == 1
+end
+
+# `@noinline` annotations with semi-concrete eval
+let src = code_typed1((Int,)) do x
+        @noinline twice_sitofp(x, 2)
+    end
+    @test count(isinvoke(:twice_sitofp), src.code) == 1
+end
+
+# `Base.@constprop :aggressive` forces semi-concrete eval, but it should still not be inlined
+@noinline Base.@constprop :aggressive function twice_sitofp_noinline(x::Int, y::Int)
+    x = Base.sitofp(Float64, x)
+    y = Base.sitofp(Float64, y)
+    return (x, y)
+end
+
+let src = code_typed1((Int,)) do x
+        twice_sitofp_noinline(x, 2)
+    end
+    @test count(isinvoke(:twice_sitofp_noinline), src.code) == 1
+end
+
+# Test getfield modeling of Type{Ref{_A}} where _A
+let getfield_tfunc(@nospecialize xs...) =
+        Core.Compiler.getfield_tfunc(Core.Compiler.fallback_lattice, xs...)
+    @test getfield_tfunc(Type, Core.Const(:parameters)) !== Union{}
+    @test !isa(getfield_tfunc(Type{Tuple{Union{Int, Float64}, Int}}, Core.Const(:name)), Core.Const)
+end
+@test fully_eliminated(Base.ismutable, Tuple{Base.RefValue})
+
+# TODO: Remove compute sparams for vararg_retrieval
+fvarargN_inline(x::Tuple{Vararg{Int, N}}) where {N} = N
+fvarargN_inline(args...) = fvarargN_inline(args)
+let src = code_typed1(fvarargN_inline, (Tuple{Vararg{Int}},))
+    @test_broken count(iscall((src, Core._compute_sparams)), src.code) == 0 &&
+                 count(iscall((src, Core._svec_ref)), src.code) == 0 &&
+                 count(iscall((src, Core.nfields)), src.code) == 1
+end
+
+# Test effect annotation of declined inline unionsplit
+f_union_unmatched(x::Union{Nothing, Type{T}}) where {T} = nothing
+let src = code_typed1((Any,)) do x
+        if isa(x, Union{Nothing, Type})
+            f_union_unmatched(x)
+        end
+        nothing
+    end
+    @test count(iscall((src, f_union_unmatched)), src.code) == 0
+end
+
+# modifyfield! handling
+# =====================
+
+isinvokemodify(y) = @nospecialize(x) -> isinvokemodify(y, x)
+isinvokemodify(sym::Symbol, @nospecialize(x)) = isinvokemodify(mi->mi.def.name===sym, x)
+isinvokemodify(pred::Function, @nospecialize(x)) = isexpr(x, :invoke_modify) && pred(x.args[1]::MethodInstance)
+
+mutable struct Atomic{T}
+    @atomic x::T
+end
+let src = code_typed1((Atomic{Int},)) do a
+        @atomic a.x + 1
+    end
+    @test count(isinvokemodify(:+), src.code) == 1
+end
+let src = code_typed1((Atomic{Int},)) do a
+        @atomic a.x += 1
+    end
+    @test count(isinvokemodify(:+), src.code) == 1
+end
+let src = code_typed1((Atomic{Int},)) do a
+        @atomic a.x max 10
+    end
+    @test count(isinvokemodify(:max), src.code) == 1
+end
+# simple union split handling
+mymax(x::T, y::T) where T<:Real = max(x, y)
+mymax(x::T, y::Real) where T<:Real = convert(T, max(x, y))::T
+let src = code_typed1((Atomic{Int},Union{Int,Float64})) do a, b
+        @atomic a.x mymax b
+    end
+    @test count(isinvokemodify(:mymax), src.code) == 2
+end
+
+# apply `ssa_inlining_pass` multiple times
+let interp = Core.Compiler.NativeInterpreter()
+    # check if callsite `@noinline` annotation works
+    ir, = Base.code_ircode((Int,Int); optimize_until="inlining", interp) do a, b
+        @noinline a*b
+    end |> only
+    i = findfirst(isinvoke(:*), ir.stmts.stmt)
+    @test i !== nothing
+
+    # ok, now delete the callsite flag, and see the second inlining pass can inline the call
+    @eval Core.Compiler $ir.stmts[$i][:flag] &= ~IR_FLAG_NOINLINE
+    inlining = Core.Compiler.InliningState(interp)
+    ir = Core.Compiler.ssa_inlining_pass!(ir, inlining, false)
+    @test count(isinvoke(:*), ir.stmts.stmt) == 0
+    @test count(iscall((ir, Core.Intrinsics.mul_int)), ir.stmts.stmt) == 1
+end
+
+# Test special purpose inliner for Core.ifelse
+f_ifelse_1(a, b) = Core.ifelse(true, a, b)
+f_ifelse_2(a, b) = Core.ifelse(false, a, b)
+f_ifelse_3(a, b) = Core.ifelse(a, true, b)
+
+@test fully_eliminated(f_ifelse_1, Tuple{Any, Any}; retval=Core.Argument(2))
+@test fully_eliminated(f_ifelse_2, Tuple{Any, Any}; retval=Core.Argument(3))
+@test !fully_eliminated(f_ifelse_3, Tuple{Any, Any})
+
+# inline_splatnew for abstract `NamedTuple`
+@eval construct_splatnew(T, fields) = $(Expr(:splatnew, :T, :fields))
+for tt = Any[(Int,Int), (Integer,Integer), (Any,Any)]
+    let src = code_typed1(tt) do a, b
+            construct_splatnew(NamedTuple{(:a,:b),typeof((a,b))}, (a,b))
+        end
+        @test count(issplatnew, src.code) == 0
+        @test count(isnew, src.code) == 1
+    end
+end
+
+# optimize away `NamedTuple`s used for handling `@nospecialize`d keyword-argument
+# https://github.com/JuliaLang/julia/pull/47059
+abstract type CallInfo end
+struct NewInstruction
+    stmt::Any
+    type::Any
+    info::CallInfo
+    line::Int32
+    flag::UInt8
+    function NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info::CallInfo),
+                            line::Int32, flag::UInt8)
+        return new(stmt, type, info, line, flag)
+    end
+end
+@nospecialize
+function NewInstruction(newinst::NewInstruction;
+    stmt=newinst.stmt,
+    type=newinst.type,
+    info::CallInfo=newinst.info,
+    line::Int32=newinst.line,
+    flag::UInt8=newinst.flag)
+    return NewInstruction(stmt, type, info, line, flag)
+end
+@specialize
+let src = code_typed1((NewInstruction,Any,Any,CallInfo)) do newinst, stmt, type, info
+        NewInstruction(newinst; stmt, type, info)
+    end
+    @test count(issplatnew, src.code) == 0
+    @test count(iscall((src,NamedTuple)), src.code) == 0
+    @test count(isnew, src.code) == 1
+end
+
+# Test that inlining can still use nothrow information from concrete-eval
+# even if the result itself is too big to be inlined, and nothrow is not
+# known without concrete-eval
+const THE_BIG_TUPLE = ntuple(identity, 1024);
+function return_the_big_tuple(err::Bool)
+    err && error("BAD")
+    return THE_BIG_TUPLE
+end
+@test fully_eliminated() do
+    return_the_big_tuple(false)[1]
+end
+@test fully_eliminated() do
+    @inline return_the_big_tuple(false)[1]
+end
+
+# inlineable but removable call should be eligible for DCE
+Base.@assume_effects :removable @inline function inlineable_effect_free(a::Float64)
+    a == Inf && return zero(a)
+    return sin(a) + cos(a)
+end
+@test fully_eliminated((Float64,)) do a
+    b = inlineable_effect_free(a)
+    c = inlineable_effect_free(b)
+    nothing
+end
+
+# https://github.com/JuliaLang/julia/issues/47374
+function f47374(x)
+    [f47374(i, x) for i in 1:1]
+end
+function f47374(i::Int, x)
+    return 1.0
+end
+@test f47374(rand(1)) == Float64[1.0]
+
+# compiler should recognize effectful :static_parameter
+# https://github.com/JuliaLang/julia/issues/45490
+issue45490_1(x::Union{T, Nothing}, y::Union{T, Nothing}) where {T} = T
+issue45490_2(x::Union{T, Nothing}, y::Union{T, Nothing}) where {T} = (typeof(T); nothing)
+for f = (issue45490_1, issue45490_2)
+    src = code_typed1(f, (Any,Any))
+    @test any(src.code) do @nospecialize x
+        isexpr(x, :static_parameter)
+    end
+    @test_throws UndefVarError f(nothing, nothing)
+end
+
+# inline effect-free :static_parameter, required for semi-concrete interpretation accuracy
+# https://github.com/JuliaLang/julia/issues/47349
+function make_issue47349(::Val{N}) where {N}
+    pickargs(::Val{N}) where {N} = (@nospecialize(x::Tuple)) -> x[N]
+    return pickargs(Val{N-1}())
+end
+let src = code_typed1(make_issue47349(Val{4}()), (Any,))
+    @test !any(src.code) do @nospecialize x
+        isexpr(x, :static_parameter)
+    end
+    @test Base.return_types((Int,)) do x
+        make_issue47349(Val(4))((x,nothing,Int))
+    end |> only === Type{Int}
+end
+
+# Test that irinterp can make use of constant results even if they're big
+# Check that pure functions with non-inlineable results still get deleted
+struct BigSemi
+    x::NTuple{1024, Int}
+end
+@Base.assume_effects :total @noinline make_big_tuple(x::Int) = ntuple(x->x+1, 1024)::NTuple{1024, Int}
+BigSemi(y::Int, x::Int) = BigSemi(make_big_tuple(x))
+function elim_full_ir(y)
+    bs = BigSemi(y, 10)
+    return Val{bs.x[1]}()
+end
+
+@test fully_eliminated(elim_full_ir, Tuple{Int})
+
+# union splitting should account for uncovered call signature
+# https://github.com/JuliaLang/julia/issues/48397
+f48397(::Bool) = :ok
+f48397(::Tuple{String,String}) = :ok
+let src = code_typed1((Union{Bool,Tuple{String,Any}},)) do x
+        f48397(x)
+    end
+    @test any(iscall((src, f48397)), src.code)
+end
+g48397::Union{Bool,Tuple{String,Any}} = ("48397", 48397)
+let res = @test_throws MethodError let
+        Base.Experimental.@force_compile
+        f48397(g48397)
+    end
+    err = res.value
+    @test err.f === f48397 && err.args === (g48397,)
+end
+let res = @test_throws MethodError let
+        Base.Experimental.@force_compile
+        convert(Union{Bool,Tuple{String,String}}, g48397)
+    end
+    err = res.value
+    @test err.f === convert && err.args === (Union{Bool,Tuple{String,String}}, g48397)
+end
+
+# https://github.com/JuliaLang/julia/issues/49050
+abstract type Issue49050AbsTop{T,N} end
+abstract type Issue49050Abs1{T, N} <: Issue49050AbsTop{T,N} end
+abstract type Issue49050Abs2{T} <: Issue49050Abs1{T,3} end
+struct Issue49050Concrete{T} <: Issue49050Abs2{T}
+    x::T
+end
+issue49074(::Type{Issue49050AbsTop{T,N}}) where {T,N} = Issue49050AbsTop{T,N}
+Base.@assume_effects :foldable issue49074(::Type{C}) where {C<:Issue49050AbsTop} = issue49074(supertype(C))
+let src = code_typed1() do
+        issue49074(Issue49050Concrete)
+    end
+    @test any(isinvoke(:issue49074), src.code)
+end
+let result = @test_throws MethodError issue49074(Issue49050Concrete)
+    @test result.value.f === issue49074
+    @test result.value.args === (Any,)
+end
+
+# inlining of `TypeName`
+@test fully_eliminated() do
+    Ref.body.name
+end
+
+# Regression for finalizer inlining with more complex control flow
+global finalizer_escape::Int = 0
+mutable struct FinalizerEscapeTest
+    x::Int
+    function FinalizerEscapeTest()
+        this = new(0)
+        finalizer(this) do this
+            global finalizer_escape
+            finalizer_escape = this.x
+        end
+        return this
+    end
+end
+
+function run_finalizer_escape_test1(b1, b2)
+    x = FinalizerEscapeTest()
+    x.x = 1
+    if b1
+        x.x = 2
+    end
+    if b2
+        Base.donotdelete(b2)
+    end
+    x.x = 3
+    return nothing
+end
+
+function run_finalizer_escape_test2(b1, b2)
+    x = FinalizerEscapeTest()
+    x.x = 1
+    if b1
+        x.x = 2
+    end
+    x.x = 3
+    return nothing
+end
+
+for run_finalizer_escape_test in (run_finalizer_escape_test1, run_finalizer_escape_test2)
+    global finalizer_escape::Int = 0
+
+    let src = code_typed1(run_finalizer_escape_test, Tuple{Bool, Bool})
+        @test any(x->isexpr(x, :(=)), src.code)
+    end
+
+    let
+        run_finalizer_escape_test(true, true)
+        @test finalizer_escape == 3
+    end
+end
+
+# `compilesig_invokes` inlining option
+@newinterp NoCompileSigInvokes
+Core.Compiler.OptimizationParams(::NoCompileSigInvokes) =
+    Core.Compiler.OptimizationParams(; compilesig_invokes=false)
+@noinline no_compile_sig_invokes(@nospecialize x) = (x !== Any && !Base.has_free_typevars(x))
+# test the single dispatch candidate case
+let src = code_typed1((Type,)) do x
+        no_compile_sig_invokes(x)
+    end
+    @test count(src.code) do @nospecialize x
+        isinvoke(:no_compile_sig_invokes, x) &&
+        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),Any}
+    end == 1
+end
+let src = code_typed1((Type,); interp=NoCompileSigInvokes()) do x
+        no_compile_sig_invokes(x)
+    end
+    @test count(src.code) do @nospecialize x
+        isinvoke(:no_compile_sig_invokes, x) &&
+        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),Type}
+    end == 1
+end
+# test the union split case
+let src = code_typed1((Union{DataType,UnionAll},)) do x
+        no_compile_sig_invokes(x)
+    end
+    @test count(src.code) do @nospecialize x
+        isinvoke(:no_compile_sig_invokes, x) &&
+        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),Any}
+    end == 2
+end
+let src = code_typed1((Union{DataType,UnionAll},); interp=NoCompileSigInvokes()) do x
+        no_compile_sig_invokes(x)
+    end
+    @test count(src.code) do @nospecialize x
+        isinvoke(:no_compile_sig_invokes, x) &&
+        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),DataType}
+    end == 1
+    @test count(src.code) do @nospecialize x
+        isinvoke(:no_compile_sig_invokes, x) &&
+        (x.args[1]::MethodInstance).specTypes == Tuple{typeof(no_compile_sig_invokes),UnionAll}
+    end == 1
+end
+
+# https://github.com/JuliaLang/julia/issues/50612
+f50612(x) = UInt32(x)
+@test all(!isinvoke(:UInt32),get_code(f50612,Tuple{Char}))
+
+# move inlineable constant values into statement position during `compact!`-ion
+# so that we don't inline DCE-eligibile calls
+Base.@assume_effects :nothrow function erase_before_inlining(x, y)
+    z = sin(y)
+    if x
+        return "julia"
+    end
+    return z
+end
+@test fully_eliminated((Float64,); retval=5) do y
+    length(erase_before_inlining(true, y))
+end
+@test fully_eliminated((Float64,); retval=(5,5)) do y
+    z = erase_before_inlining(true, y)
+    return length(z), length(z)
+end
+
+# continue const-prop' when concrete-eval result is too big
+const THE_BIG_TUPLE_2 = ntuple(identity, 1024)
+return_the_big_tuple2(a) = (a, THE_BIG_TUPLE_2)
+let src = code_typed1() do
+        return return_the_big_tuple2(42)[2]
+    end
+    @test count(isinvoke(:return_the_big_tuple2), src.code) == 0
+end
+let src = code_typed1() do
+        return iterate(("1", '2'), 1)
+    end
+    @test count(isinvoke(:iterate), src.code) == 0
+end
diff --git a/test/compiler/interpreter_exec.jl b/test/compiler/interpreter_exec.jl
index 27143c17052cc..ce0704be15178 100644
--- a/test/compiler/interpreter_exec.jl
+++ b/test/compiler/interpreter_exec.jl
@@ -2,25 +2,26 @@
 
 # tests that interpreter matches codegen
 using Test
-using Core: GotoIfNot, ReturnNode
+using Core.IR
 
 # test that interpreter correctly handles PhiNodes (#29262)
 let m = Meta.@lower 1 + 1
     @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
+    src = m.args[1]::CodeInfo
     src.code = Any[
         # block 1
         QuoteNode(:a),
         QuoteNode(:b),
         GlobalRef(@__MODULE__, :test29262),
-        GotoIfNot(Core.SSAValue(3), 6),
+        GotoIfNot(SSAValue(3), 6),
         # block 2
-        Core.PhiNode(Int32[4], Any[Core.SSAValue(1)]),
-        Core.PhiNode(Int32[4, 5], Any[Core.SSAValue(2), Core.SSAValue(5)]),
-        ReturnNode(Core.SSAValue(6)),
+        PhiNode(Int32[4], Any[SSAValue(1)]),
+        PhiNode(Int32[4, 5], Any[SSAValue(2), SSAValue(5)]),
+        ReturnNode(SSAValue(6)),
     ]
     nstmts = length(src.code)
     src.ssavaluetypes = Any[ Any for _ = 1:nstmts ]
+    src.ssaflags = fill(UInt8(0x00), nstmts)
     src.codelocs = fill(Int32(1), nstmts)
     src.inferred = true
     Core.Compiler.verify_ir(Core.Compiler.inflate_ir(src))
@@ -32,7 +33,7 @@ end
 
 let m = Meta.@lower 1 + 1
     @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
+    src = m.args[1]::CodeInfo
     src.code = Any[
         # block 1
         QuoteNode(:a),
@@ -40,27 +41,28 @@ let m = Meta.@lower 1 + 1
         QuoteNode(:c),
         GlobalRef(@__MODULE__, :test29262),
         # block 2
-        Core.PhiNode(Int32[4, 16], Any[false, true]), # false, true
-        Core.PhiNode(Int32[4, 16], Any[Core.SSAValue(1), Core.SSAValue(2)]), # :a, :b
-        Core.PhiNode(Int32[4, 16], Any[Core.SSAValue(3), Core.SSAValue(6)]), # :c, :a
-        Core.PhiNode(Int32[16], Any[Core.SSAValue(7)]), # NULL, :c
+        PhiNode(Int32[4, 16], Any[false, true]), # false, true
+        PhiNode(Int32[4, 16], Any[SSAValue(1), SSAValue(2)]), # :a, :b
+        PhiNode(Int32[4, 16], Any[SSAValue(3), SSAValue(6)]), # :c, :a
+        PhiNode(Int32[16], Any[SSAValue(7)]), # NULL, :c
         # block 3
-        Core.PhiNode(Int32[], Any[]), # NULL, NULL
-        Core.PhiNode(Int32[17, 8], Any[true, Core.SSAValue(4)]), # test29262, test29262, [true]
-        Core.PhiNode(Int32[17], Vector{Any}(undef, 1)), # NULL, NULL
-        Core.PhiNode(Int32[8], Vector{Any}(undef, 1)), # NULL, NULL
-        Core.PhiNode(Int32[], Any[]), # NULL, NULL
-        Core.PhiNode(Int32[17, 8], Any[Core.SSAValue(2), Core.SSAValue(8)]), # NULL, :c, [:b]
-        Core.PhiNode(Int32[], Any[]), # NULL, NULL
-        GotoIfNot(Core.SSAValue(5), 5),
+        PhiNode(Int32[], Any[]), # NULL, NULL
+        PhiNode(Int32[17, 8], Any[true, SSAValue(4)]), # test29262, test29262, [true]
+        PhiNode(Int32[17], Vector{Any}(undef, 1)), # NULL, NULL
+        PhiNode(Int32[8], Vector{Any}(undef, 1)), # NULL, NULL
+        PhiNode(Int32[], Any[]), # NULL, NULL
+        PhiNode(Int32[17, 8], Any[SSAValue(2), SSAValue(8)]), # NULL, :c, [:b]
+        PhiNode(Int32[], Any[]), # NULL, NULL
+        GotoIfNot(SSAValue(5), 5),
         # block 4
-        GotoIfNot(Core.SSAValue(10), 9),
+        GotoIfNot(SSAValue(10), 9),
         # block 5
-        Expr(:call, GlobalRef(Core, :tuple), Core.SSAValue(6), Core.SSAValue(7), Core.SSAValue(8), Core.SSAValue(14)),
-        ReturnNode(Core.SSAValue(18)),
+        Expr(:call, GlobalRef(Core, :tuple), SSAValue(6), SSAValue(7), SSAValue(8), SSAValue(14)),
+        ReturnNode(SSAValue(18)),
     ]
     nstmts = length(src.code)
     src.ssavaluetypes = Any[ Any for _ = 1:nstmts ]
+    src.ssaflags = fill(UInt8(0x00), nstmts)
     src.codelocs = fill(Int32(1), nstmts)
     src.inferred = true
     Core.Compiler.verify_ir(Core.Compiler.inflate_ir(src))
@@ -72,32 +74,32 @@ end
 
 let m = Meta.@lower 1 + 1
     @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
+    src = m.args[1]::CodeInfo
     src.code = Any[
         # block 1
         QuoteNode(:a),
         QuoteNode(:b),
         GlobalRef(@__MODULE__, :test29262),
         # block 2
-        Expr(:enter, 11),
+        EnterNode(11),
         # block 3
-        Core.UpsilonNode(),
-        Core.UpsilonNode(),
-        Core.UpsilonNode(Core.SSAValue(2)),
-        GotoIfNot(Core.SSAValue(3), 10),
+        UpsilonNode(),
+        UpsilonNode(),
+        UpsilonNode(SSAValue(2)),
+        GotoIfNot(SSAValue(3), 10),
         # block 4
-        Core.UpsilonNode(Core.SSAValue(1)),
+        UpsilonNode(SSAValue(1)),
         # block 5
         Expr(:throw_undef_if_not, :expected, false),
         # block 6
-        Core.PhiCNode(Any[Core.SSAValue(5), Core.SSAValue(7), Core.SSAValue(9)]), # NULL, :a, :b
-        Core.PhiCNode(Any[Core.SSAValue(6)]), # NULL
-        Expr(:leave, 1),
+        PhiCNode(Any[SSAValue(5), SSAValue(7), SSAValue(9)]), # NULL, :a, :b
+        PhiCNode(Any[SSAValue(6)]), # NULL
         # block 7
-        ReturnNode(Core.SSAValue(11)),
+        ReturnNode(SSAValue(11)),
     ]
     nstmts = length(src.code)
     src.ssavaluetypes = Any[ Any for _ = 1:nstmts ]
+    src.ssaflags = fill(UInt8(0x00), nstmts)
     src.codelocs = fill(Int32(1), nstmts)
     src.inferred = true
     Core.Compiler.verify_ir(Core.Compiler.inflate_ir(src))
@@ -106,3 +108,17 @@ let m = Meta.@lower 1 + 1
     global test29262 = false
     @test :b === @eval $m
 end
+
+# https://github.com/JuliaLang/julia/issues/47065
+# `Core.Compiler.sort!` should be able to handle a big list
+let n = 1000
+    ex = :(return 1)
+    for _ in 1:n
+        ex = :(rand() < .1 && $(ex))
+    end
+    @eval global function f_1000_blocks()
+        $ex
+        return 0
+    end
+end
+@test f_1000_blocks() == 0
diff --git a/test/compiler/invalidation.jl b/test/compiler/invalidation.jl
new file mode 100644
index 0000000000000..0d1cfeee56ea8
--- /dev/null
+++ b/test/compiler/invalidation.jl
@@ -0,0 +1,224 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# setup
+# -----
+
+include("irutils.jl")
+
+using Test
+const CC = Core.Compiler
+import Core: MethodInstance, CodeInstance
+import .CC: WorldRange, WorldView
+
+struct InvalidationTesterCache
+    dict::IdDict{MethodInstance,CodeInstance}
+end
+InvalidationTesterCache() = InvalidationTesterCache(IdDict{MethodInstance,CodeInstance}())
+
+const INVALIDATION_TESTER_CACHE = InvalidationTesterCache()
+
+struct InvalidationTester <: CC.AbstractInterpreter
+    world::UInt
+    inf_params::CC.InferenceParams
+    opt_params::CC.OptimizationParams
+    inf_cache::Vector{CC.InferenceResult}
+    code_cache::InvalidationTesterCache
+    function InvalidationTester(;
+                                world::UInt = Base.get_world_counter(),
+                                inf_params::CC.InferenceParams = CC.InferenceParams(),
+                                opt_params::CC.OptimizationParams = CC.OptimizationParams(),
+                                inf_cache::Vector{CC.InferenceResult} = CC.InferenceResult[],
+                                code_cache::InvalidationTesterCache = INVALIDATION_TESTER_CACHE)
+        return new(world, inf_params, opt_params, inf_cache, code_cache)
+    end
+end
+
+struct InvalidationTesterCacheView
+    dict::IdDict{MethodInstance,CodeInstance}
+end
+
+CC.InferenceParams(interp::InvalidationTester) = interp.inf_params
+CC.OptimizationParams(interp::InvalidationTester) = interp.opt_params
+CC.get_world_counter(interp::InvalidationTester) = interp.world
+CC.get_inference_cache(interp::InvalidationTester) = interp.inf_cache
+CC.code_cache(interp::InvalidationTester) = WorldView(InvalidationTesterCacheView(interp.code_cache.dict), WorldRange(interp.world))
+CC.get(wvc::WorldView{InvalidationTesterCacheView}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
+CC.getindex(wvc::WorldView{InvalidationTesterCacheView}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
+CC.haskey(wvc::WorldView{InvalidationTesterCacheView}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
+function CC.setindex!(wvc::WorldView{InvalidationTesterCacheView}, ci::CodeInstance, mi::MethodInstance)
+    CC.add_invalidation_callback!(mi) do replaced::MethodInstance, max_world::UInt32
+        delete!(wvc.cache.dict, replaced)
+        # Core.println("[InvalidationTester] ", replaced) # debug
+    end
+    setindex!(wvc.cache.dict, ci, mi)
+end
+
+# basic functionality test
+# ------------------------
+
+basic_callee(x) = x
+basic_caller(x) = basic_callee(x)
+
+# run inference and check that cache exist
+@test Base.return_types((Float64,); interp=InvalidationTester()) do x
+    basic_caller(x)
+end |> only === Float64
+@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+    mi.def.name === :basic_callee
+end
+@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+    mi.def.name === :basic_caller
+end
+
+# this redefinition below should invalidate the cache
+basic_callee(x) = x, x
+@test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+    mi.def.name === :basic_callee
+end
+@test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+    mi.def.name === :basic_caller
+end
+
+# re-run inference and check the result is updated (and new cache exists)
+@test Base.return_types((Float64,); interp=InvalidationTester()) do x
+    basic_caller(x)
+end |> only === Tuple{Float64,Float64}
+@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+    mi.def.name === :basic_callee
+end
+@test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+    mi.def.name === :basic_caller
+end
+
+# backedge optimization
+# ---------------------
+
+const GLOBAL_BUFFER = IOBuffer()
+
+# test backedge optimization when the callee's type and effects information are maximized
+begin take!(GLOBAL_BUFFER)
+
+    pr48932_callee(x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(x))
+    pr48932_caller(x) = pr48932_callee(Base.inferencebarrier(x))
+
+    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
+    let rt = only(Base.return_types(pr48932_callee, (Any,)))
+        @test rt === Any
+        effects = Base.infer_effects(pr48932_callee, (Any,))
+        @test Core.Compiler.Effects(effects) == Core.Compiler.Effects()
+    end
+
+    # run inference on both `pr48932_caller` and `pr48932_callee`
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline pr48932_caller(x)
+        end |> only
+        @test rt === Any
+        @test any(iscall((src, pr48932_callee)), src.code)
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_callee
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_caller
+    end
+    @test 42 == pr48932_caller(42)
+    @test "42" == String(take!(GLOBAL_BUFFER))
+
+    # test that we didn't add the backedge from `pr48932_callee` to `pr48932_caller`:
+    # this redefinition below should invalidate the cache of `pr48932_callee` but not that of `pr48932_caller`
+    pr48932_callee(x) = (print(GLOBAL_BUFFER, x); nothing)
+    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_callee
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_caller
+    end
+    @test isnothing(pr48932_caller(42))
+    @test "42" == String(take!(GLOBAL_BUFFER))
+end
+
+# we can avoid adding backedge even if the callee's return type is not the top
+# when the return value is not used within the caller
+begin take!(GLOBAL_BUFFER)
+    pr48932_callee_inferable(x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(1)::Int)
+    pr48932_caller_unuse(x) = (pr48932_callee_inferable(Base.inferencebarrier(x)); nothing)
+
+    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
+    let rt = only(Base.return_types(pr48932_callee_inferable, (Any,)))
+        @test rt === Int
+        effects = Base.infer_effects(pr48932_callee_inferable, (Any,))
+        @test Core.Compiler.Effects(effects) == Core.Compiler.Effects()
+    end
+
+    # run inference on both `pr48932_caller` and `pr48932_callee`:
+    # we don't need to add backedge to `pr48932_callee` from `pr48932_caller`
+    # since the inference result of `pr48932_callee` is maximized and it's not inlined
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline pr48932_caller_unuse(x)
+        end |> only
+        @test rt === Nothing
+        @test any(iscall((src, pr48932_callee_inferable)), src.code)
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_callee_inferable
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_caller_unuse
+    end
+    @test isnothing(pr48932_caller_unuse(42))
+    @test "42" == String(take!(GLOBAL_BUFFER))
+
+    # test that we didn't add the backedge from `pr48932_callee_inferable` to `pr48932_caller_unuse`:
+    # this redefinition below should invalidate the cache of `pr48932_callee_inferable` but not that of `pr48932_caller_unuse`
+    pr48932_callee_inferable(x) = (print(GLOBAL_BUFFER, "foo"); x)
+    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_callee_inferable
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_caller_unuse
+    end
+    @test isnothing(pr48932_caller_unuse(42))
+    @test "foo" == String(take!(GLOBAL_BUFFER))
+end
+
+# we need to add backedge when the callee is inlined
+begin take!(GLOBAL_BUFFER)
+
+    @noinline pr48932_callee_inlined(@nospecialize x) = (print(GLOBAL_BUFFER, x); Base.inferencebarrier(x))
+    pr48932_caller_inlined(x) = pr48932_callee_inlined(Base.inferencebarrier(x))
+
+    # assert that type and effects information inferred from `pr48932_callee(::Any)` are the top
+    let rt = only(Base.return_types(pr48932_callee_inlined, (Any,)))
+        @test rt === Any
+        effects = Base.infer_effects(pr48932_callee_inlined, (Any,))
+        @test Core.Compiler.Effects(effects) == Core.Compiler.Effects()
+    end
+
+    # run inference on `pr48932_caller_inlined` and `pr48932_callee_inlined`
+    let (src, rt) = code_typed((Int,); interp=InvalidationTester()) do x
+            @inline pr48932_caller_inlined(x)
+        end |> only
+        @test rt === Any
+        @test any(isinvoke(:pr48932_callee_inlined), src.code)
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_callee_inlined
+    end
+    @test any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_caller_inlined
+    end
+    @test 42 == pr48932_caller_inlined(42)
+    @test "42" == String(take!(GLOBAL_BUFFER))
+
+    # test that we added the backedge from `pr48932_callee_inlined` to `pr48932_caller_inlined`:
+    # this redefinition below should invalidate the cache of `pr48932_callee_inlined` but not that of `pr48932_caller_inlined`
+    @noinline pr48932_callee_inlined(@nospecialize x) = (print(GLOBAL_BUFFER, x); nothing)
+    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_callee_inlined
+    end
+    @test !any(INVALIDATION_TESTER_CACHE.dict) do (mi, ci)
+        mi.def.name === :pr48932_caller_inlined
+    end
+    @test isnothing(pr48932_caller_inlined(42))
+    @test "42" == String(take!(GLOBAL_BUFFER))
+end
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
index 48682b9af3b95..97e80fca6cba1 100644
--- a/test/compiler/irpasses.jl
+++ b/test/compiler/irpasses.jl
@@ -2,20 +2,15 @@
 
 using Test
 using Base.Meta
-import Core:
-    CodeInfo, Argument, SSAValue, GotoNode, GotoIfNot, PiNode, PhiNode,
-    QuoteNode, ReturnNode
+using Core.IR
 
-include(normpath(@__DIR__, "irutils.jl"))
+include("irutils.jl")
 
 # domsort
 # =======
 
 ## Test that domsort doesn't mangle single-argument phis (#29262)
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+let code = Any[
         # block 1
         Expr(:call, :opaque),
         GotoIfNot(Core.SSAValue(1), 10),
@@ -33,24 +28,16 @@ let m = Meta.@lower 1 + 1
         Core.PhiNode(Int32[2, 8], Any[0, Core.SSAValue(7)]),
         ReturnNode(Core.SSAValue(10)),
     ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
+    ir = make_ircode(code)
     domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
     ir = Core.Compiler.domsort_ssa!(ir, domtree)
     Core.Compiler.verify_ir(ir)
-    phi = ir.stmts.inst[3]
+    phi = ir.stmts.stmt[3]
     @test isa(phi, Core.PhiNode) && length(phi.edges) == 1
 end
 
 # test that we don't stack-overflow in SNCA with large functions.
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    code = Any[]
+let code = Any[]
     N = 2^15
     for i in 1:2:N
         push!(code, Expr(:call, :opaque))
@@ -59,14 +46,7 @@ let m = Meta.@lower 1 + 1
     # all goto here
     push!(code, Expr(:call, :opaque))
     push!(code, ReturnNode(nothing))
-    src.code = code
-
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
+    ir = make_ircode(code)
     domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
     ir = Core.Compiler.domsort_ssa!(ir, domtree)
     Core.Compiler.verify_ir(ir)
@@ -75,7 +55,7 @@ end
 # SROA
 # ====
 
-import Core.Compiler: widenconst
+using Core.Compiler: widenconst
 
 is_load_forwarded(src::CodeInfo) = !any(iscall((src, getfield)), src.code)
 is_scalar_replaced(src::CodeInfo) =
@@ -387,6 +367,22 @@ let # should work with constant globals
     @test count(isnew, src.code) == 0
 end
 
+# don't SROA statement that may throw
+# https://github.com/JuliaLang/julia/issues/48067
+function issue48067(a::Int, b)
+   r = Ref(a)
+   try
+       setfield!(r, :x, b)
+       nothing
+   catch err
+       getfield(r, :x)
+   end
+end
+let src = code_typed1(issue48067, (Int,String))
+    @test any(iscall((src, setfield!)), src.code)
+end
+@test issue48067(42, "julia") == 42
+
 # should work nicely with inlining to optimize away a complicated case
 # adapted from http://wiki.luajit.org/Allocation-Sinking-Optimization#implementation%5B
 struct Point
@@ -440,7 +436,7 @@ let src = code_typed1() do
     @test count(isnew, src.code) == 1
 end
 
-# should eliminate allocation whose address isn't taked even if it has unintialized field(s)
+# should eliminate allocation whose address isn't taked even if it has uninitialized field(s)
 mutable struct BadRef
     x::String
     y::String
@@ -489,7 +485,7 @@ function isdefined_elim()
     return arr
 end
 let src = code_typed1(isdefined_elim)
-    @test is_scalar_replaced(src)
+    @test count(isisdefined, src.code) == 0
 end
 @test isdefined_elim() == Any[]
 
@@ -521,7 +517,7 @@ end
 # comparison lifting
 # ==================
 
-let # lifting `===`
+let # lifting `===` through PhiNode
     src = code_typed1((Bool,Int,)) do c, x
         y = c ? x : nothing
         y === nothing # => ϕ(false, true)
@@ -541,7 +537,15 @@ let # lifting `===`
     end
 end
 
-let # lifting `isa`
+let # lifting `===` through Core.ifelse
+    src = code_typed1((Bool,Int,)) do c, x
+        y = Core.ifelse(c, x, nothing)
+        y === nothing # => Core.ifelse(c, false, true)
+    end
+    @test count(iscall((src, ===)), src.code) == 0
+end
+
+let # lifting `isa` through PhiNode
     src = code_typed1((Bool,Int,)) do c, x
         y = c ? x : nothing
         isa(y, Int) # => ϕ(true, false)
@@ -564,7 +568,16 @@ let # lifting `isa`
     end
 end
 
-let # lifting `isdefined`
+let # lifting `isa` through Core.ifelse
+    src = code_typed1((Bool,Int,)) do c, x
+        y = Core.ifelse(c, x, nothing)
+        isa(y, Int) # => Core.ifelse(c, true, false)
+    end
+    @test count(iscall((src, isa)), src.code) == 0
+end
+
+
+let # lifting `isdefined` through PhiNode
     src = code_typed1((Bool,Some{Int},)) do c, x
         y = c ? x : nothing
         isdefined(y, 1) # => ϕ(true, false)
@@ -587,6 +600,14 @@ let # lifting `isdefined`
     end
 end
 
+let # lifting `isdefined` through Core.ifelse
+    src = code_typed1((Bool,Some{Int},)) do c, x
+        y = Core.ifelse(c, x, nothing)
+        isdefined(y, 1) # => Core.ifelse(c, true, false)
+    end
+    @test count(iscall((src, isdefined)), src.code) == 0
+end
+
 mutable struct Foo30594; x::Float64; end
 Base.copy(x::Foo30594) = Foo30594(x.x)
 function add!(p::Foo30594, off::Foo30594)
@@ -647,7 +668,7 @@ let nt = (a=1, b=2)
 end
 
 # Expr(:new) annotated as PartialStruct
-struct FooPartial
+struct FooPartialNew
     x
     y
     global f_partial
@@ -656,10 +677,7 @@ end
 @test fully_eliminated(f_partial, Tuple{Float64})
 
 # A SSAValue after the compaction line
-let m = Meta.@lower 1 + 1
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+let code = Any[
         # block 1
         nothing,
         # block 2
@@ -678,7 +696,7 @@ let m = Meta.@lower 1 + 1
         # block 5
         ReturnNode(Core.SSAValue(2)),
     ]
-    src.ssavaluetypes = Any[
+    ssavaluetypes = Any[
         Nothing,
         Any,
         Bool,
@@ -691,11 +709,52 @@ let m = Meta.@lower 1 + 1
         Any,
         Any
     ]
-    nstmts = length(src.code)
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src, Any[], Any[Any, Any])
+    slottypes = Any[Any, Any, Any]
+    ir = make_ircode(code; ssavaluetypes, slottypes)
+    ir = @test_nowarn Core.Compiler.sroa_pass!(ir)
     @test Core.Compiler.verify_ir(ir) === nothing
+end
+
+# A lifted Core.ifelse with an eliminated branch (#50276)
+let code = Any[
+        # block 1
+        #=  %1: =# Core.Argument(2),
+        # block 2
+        #=  %2: =# Expr(:call, Core.ifelse, SSAValue(1), true, missing),
+        #=  %3: =# GotoIfNot(SSAValue(2), 11),
+        # block 3
+        #=  %4: =# PiNode(SSAValue(2), Bool), # <-- This PiNode is the trigger of the bug, since it
+                                              #     means that only one branch of the Core.ifelse
+                                              #     is lifted.
+        #=  %5: =# GotoIfNot(false, 8),
+        # block 2
+        #=  %6: =# nothing,
+        #=  %7: =# GotoNode(8),
+        # block 4
+        #=  %8: =# PhiNode(Int32[5, 7], Any[SSAValue(4), SSAValue(6)]),
+        #               ^-- N.B. This PhiNode also needs to have a Union{ ... } type in order
+        #                   for lifting to be performed (it is skipped for e.g. `Bool`)
+        #
+        #=  %9: =# Expr(:call, isa, SSAValue(8), Missing),
+        #= %10: =# ReturnNode(SSAValue(9)),
+        # block 5
+        #= %11: =# ReturnNode(false),
+    ]
+    ssavaluetypes = Any[
+        Any,
+        Union{Missing, Bool},
+        Any,
+        Bool,
+        Any,
+        Missing,
+        Any,
+        Union{Nothing, Bool},
+        Bool,
+        Any,
+        Any
+    ]
+    slottypes = Any[Any, Any, Any]
+    ir = make_ircode(code; ssavaluetypes, slottypes)
     ir = @test_nowarn Core.Compiler.sroa_pass!(ir)
     @test Core.Compiler.verify_ir(ir) === nothing
 end
@@ -718,11 +777,8 @@ let src = code_typed(gcd, Tuple{Int, Int})[1].first
     Core.Compiler.verify_ir(ir)
 end
 
-let m = Meta.@lower 1 + 1
-    # Test that CFG simplify combines redundant basic blocks
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+let # Test that CFG simplify combines redundant basic blocks
+    code = Any[
         Core.Compiler.GotoNode(2),
         Core.Compiler.GotoNode(3),
         Core.Compiler.GotoNode(4),
@@ -731,23 +787,104 @@ let m = Meta.@lower 1 + 1
         Core.Compiler.GotoNode(7),
         ReturnNode(2)
     ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
+    ir = make_ircode(code)
     ir = Core.Compiler.cfg_simplify!(ir)
     Core.Compiler.verify_ir(ir)
     ir = Core.Compiler.compact!(ir)
     @test length(ir.cfg.blocks) == 1 && Core.Compiler.length(ir.stmts) == 1
 end
 
-let m = Meta.@lower 1 + 1
-    # Test that CFG simplify doesn't mess up when chaining past return blocks
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+# Test cfg_simplify in complicated sequences of dropped and merged bbs
+using Core.Compiler: Argument, IRCode, GotoNode, GotoIfNot, ReturnNode, NoCallInfo, BasicBlock, StmtRange, SSAValue
+bb_term(ir, bb) = Core.Compiler.getindex(ir, SSAValue(Core.Compiler.last(ir.cfg.blocks[bb].stmts)))[:stmt]
+
+function each_stmt_a_bb(stmts, preds, succs)
+    ir = IRCode()
+    empty!(ir.stmts.stmt)
+    append!(ir.stmts.stmt, stmts)
+    empty!(ir.stmts.type); append!(ir.stmts.type, [Nothing for _ = 1:length(stmts)])
+    empty!(ir.stmts.flag); append!(ir.stmts.flag, [0x0 for _ = 1:length(stmts)])
+    empty!(ir.stmts.line); append!(ir.stmts.line, [Int32(0) for _ = 1:length(stmts)])
+    empty!(ir.stmts.info); append!(ir.stmts.info, [NoCallInfo() for _ = 1:length(stmts)])
+    empty!(ir.cfg.blocks); append!(ir.cfg.blocks, [BasicBlock(StmtRange(i, i), preds[i], succs[i]) for i = 1:length(stmts)])
+    empty!(ir.cfg.index);  append!(ir.cfg.index,  [i for i = 2:length(stmts)])
+    Core.Compiler.verify_ir(ir)
+    return ir
+end
+
+for gotoifnot in (false, true)
+    stmts = [
+        # BB 1
+        GotoIfNot(Argument(1), 8),
+        # BB 2
+        GotoIfNot(Argument(2), 4),
+        # BB 3
+        GotoNode(9),
+        # BB 4
+        GotoIfNot(Argument(3), 10),
+        # BB 5
+        GotoIfNot(Argument(4), 11),
+        # BB 6
+        GotoIfNot(Argument(5), 12),
+        # BB 7
+        GotoNode(13),
+        # BB 8
+        ReturnNode(1),
+        # BB 9
+        nothing,
+        # BB 10
+        nothing,
+        # BB 11
+        gotoifnot ? GotoIfNot(Argument(6), 13) : GotoNode(13),
+        # BB 12
+        ReturnNode(2),
+        # BB 13
+        ReturnNode(3),
+    ]
+    preds = Vector{Int}[Int[], [1], [2], [2], [4], [5], [6], [1], [3], [4, 9], [5, 10], gotoifnot ? [6,11] : [6], [7, 11]]
+    succs = Vector{Int}[[2, 8], [3, 4], [9], [5, 10], [6, 11], [7, 12], [13], Int[], [10], [11], gotoifnot ? [12, 13] : [13], Int[], Int[]]
+    ir = each_stmt_a_bb(stmts, preds, succs)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+
+    if gotoifnot
+        let term4 = bb_term(ir, 4), term5 = bb_term(ir, 5)
+            @test isa(term4, GotoIfNot) && bb_term(ir, term4.dest).val == 3
+            @test isa(term5, ReturnNode) && term5.val == 2
+        end
+    else
+        @test length(ir.cfg.blocks) == 10
+        let term = bb_term(ir, 3)
+            @test isa(term, GotoNode) && bb_term(ir, term.label).val == 3
+        end
+    end
+end
+
+let stmts = [
+        # BB 1
+        GotoIfNot(Argument(1), 4),
+        # BB 2
+        GotoIfNot(Argument(2), 5),
+        # BB 3
+        GotoNode(5),
+        # BB 4
+        ReturnNode(1),
+        # BB 5
+        ReturnNode(2)
+    ]
+    preds = Vector{Int}[Int[], [1], [2], [1], [2, 3]]
+    succs = Vector{Int}[[2, 4], [3, 5], [5], Int[], Int[]]
+    ir = each_stmt_a_bb(stmts, preds, succs)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+
+    @test length(ir.cfg.blocks) == 4
+    terms = map(i->bb_term(ir, i), 1:length(ir.cfg.blocks))
+    @test Set(term.val for term in terms if isa(term, ReturnNode)) == Set([1,2])
+end
+
+let # Test that CFG simplify doesn't mess up when chaining past return blocks
+    code = Any[
         Core.Compiler.GotoIfNot(Core.Compiler.Argument(2), 3),
         Core.Compiler.GotoNode(4),
         ReturnNode(1),
@@ -759,25 +896,17 @@ let m = Meta.@lower 1 + 1
         ReturnNode(2),
         ReturnNode(3)
     ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
+    ir = make_ircode(code)
     ir = Core.Compiler.cfg_simplify!(ir)
     Core.Compiler.verify_ir(ir)
     @test length(ir.cfg.blocks) == 5
-    ret_2 = ir.stmts.inst[ir.cfg.blocks[3].stmts[end]]
+    ret_2 = ir.stmts.stmt[ir.cfg.blocks[3].stmts[end]]
     @test isa(ret_2, Core.Compiler.ReturnNode) && ret_2.val == 2
 end
 
-let m = Meta.@lower 1 + 1
-    # Test that CFG simplify doesn't try to merge every block in a loop into
+let # Test that CFG simplify doesn't try to merge every block in a loop into
     # its predecessor
-    @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::CodeInfo
-    src.code = Any[
+    code = Any[
         # Block 1
         Core.Compiler.GotoNode(2),
         # Block 2
@@ -785,17 +914,27 @@ let m = Meta.@lower 1 + 1
         # Block 3
         Core.Compiler.GotoNode(1)
     ]
-    nstmts = length(src.code)
-    src.ssavaluetypes = nstmts
-    src.codelocs = fill(Int32(1), nstmts)
-    src.ssaflags = fill(Int32(0), nstmts)
-    ir = Core.Compiler.inflate_ir(src)
-    Core.Compiler.verify_ir(ir)
+    ir = make_ircode(code)
     ir = Core.Compiler.cfg_simplify!(ir)
     Core.Compiler.verify_ir(ir)
     @test length(ir.cfg.blocks) == 1
 end
 
+# `cfg_simplify!` shouldn't error in a presence of `try/catch` block
+let ir = Base.code_ircode(; optimize_until="slot2ssa") do
+        v = try
+        catch
+        end
+        v
+    end |> only |> first
+    Core.Compiler.verify_ir(ir)
+    nb = length(ir.cfg.blocks)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+    na = length(ir.cfg.blocks)
+    @test na < nb
+end
+
 # Issue #29213
 function f_29213()
     while true
@@ -933,8 +1072,8 @@ let
         end |> only |> first
     end
 
-    refs = map(Core.SSAValue, findall(x->x isa Expr && x.head == :new, src.code))
-    some_ccall = findfirst(x -> x isa Expr && x.head == :foreigncall && x.args[1] == :(:some_ccall), src.code)
+    refs = map(Core.SSAValue, findall(@nospecialize(x)->Meta.isexpr(x, :new), src.code))
+    some_ccall = findfirst(@nospecialize(x) -> Meta.isexpr(x, :foreigncall) && x.args[1] == :(:some_ccall), src.code)
     @assert some_ccall !== nothing
     stmt = src.code[some_ccall]
     nccallargs = length(stmt.args[3]::Core.SimpleVector)
@@ -944,42 +1083,47 @@ let
     @test all(alloc -> alloc in preserves, refs)
 end
 
-# test `stmt_effect_free` and DCE
-# ===============================
+# test `flags_for_effects` and DCE
+# ================================
 
 let # effect-freeness computation for array allocation
 
     # should eliminate dead allocations
-    good_dims = (0, 2)
-    for dim in good_dims, N in 0:10
+    good_dims = [1, 2, 3, 4, 10]
+    Ns = [1, 2, 3, 4, 10]
+    for dim = good_dims, N = Ns
+        Int64(dim)^N > typemax(Int) && continue
         dims = ntuple(i->dim, N)
-        @eval @test fully_eliminated(()) do
+        @test @eval fully_eliminated() do
             Array{Int,$N}(undef, $(dims...))
             nothing
         end
     end
 
-    # shouldn't eliminate errorneous dead allocations
-    bad_dims = [-1,           # should keep "invalid Array dimensions"
-                typemax(Int)] # should keep "invalid Array size"
-    for dim in bad_dims, N in 1:10
+    # shouldn't eliminate erroneous dead allocations
+    bad_dims = [-1, typemax(Int)]
+    for dim in bad_dims, N in [1, 2, 3, 4, 10], T in Any[Int, Union{Missing,Nothing}, Nothing, Any]
         dims = ntuple(i->dim, N)
-        @eval @test !fully_eliminated(()) do
-            Array{Int,$N}(undef, $(dims...))
+        @test @eval !fully_eliminated() do
+            Array{$T,$N}(undef, $(dims...))
+            nothing
+        end
+        @test_throws "invalid " @eval let
+            Array{$T,$N}(undef, $(dims...))
             nothing
         end
     end
 
     # some high-level examples
-    @test fully_eliminated(()) do
+    @test fully_eliminated() do
         Int[]
         nothing
     end
-    @test fully_eliminated(()) do
+    @test fully_eliminated() do
         Matrix{Tuple{String,String}}(undef, 4, 4)
         nothing
     end
-    @test fully_eliminated(()) do
+    @test fully_eliminated() do
         IdDict{Any,Any}()
         nothing
     end
@@ -995,9 +1139,9 @@ let ci = code_typed1(optimize=false) do
         end
     end
     ir = Core.Compiler.inflate_ir(ci)
-    @test count(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.inst) == 1
+    @test any(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.stmt)
     ir = Core.Compiler.compact!(ir, true)
-    @test count(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.inst) == 0
+    @test !any(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.stmt)
 end
 
 # Test that adce_pass! can drop phi node uses that can be concluded unused
@@ -1023,7 +1167,7 @@ function foo_cfg_empty(b)
         @goto x
     end
     @label x
-    return 1
+    return b
 end
 let ci = code_typed(foo_cfg_empty, Tuple{Bool}, optimize=true)[1][1]
     ir = Core.Compiler.inflate_ir(ci)
@@ -1033,7 +1177,7 @@ let ci = code_typed(foo_cfg_empty, Tuple{Bool}, optimize=true)[1][1]
     ir = Core.Compiler.cfg_simplify!(ir)
     Core.Compiler.verify_ir(ir)
     @test length(ir.cfg.blocks) <= 2
-    @test isa(ir.stmts[length(ir.stmts)][:inst], ReturnNode)
+    @test isa(ir.stmts[length(ir.stmts)][:stmt], ReturnNode)
 end
 
 @test Core.Compiler.is_effect_free(Base.infer_effects(getfield, (Complex{Int}, Symbol)))
@@ -1061,3 +1205,495 @@ let sroa_no_forward() = begin
     end
     @test sroa_no_forward() == (1, 2.0)
 end
+
+@noinline function foo_defined_last_iter(n::Int)
+    local x
+    for i = 1:n
+        if i == 5
+            x = 1
+        end
+    end
+    if n > 2
+        return x + n
+    end
+    return 0
+end
+const_call_defined_last_iter() = foo_defined_last_iter(3)
+@test foo_defined_last_iter(2) == 0
+@test_throws UndefVarError foo_defined_last_iter(3)
+@test_throws UndefVarError const_call_defined_last_iter()
+@test foo_defined_last_iter(6) == 7
+
+let src = code_typed1(foo_defined_last_iter, Tuple{Int})
+    for i = 1:length(src.code)
+        e = src.code[i]
+        if isexpr(e, :throw_undef_if_not)
+            @assert !isa(e.args[2], Bool)
+        end
+    end
+end
+
+# Issue #47180, incorrect phi counts in CmdRedirect
+function a47180(b; stdout )
+    c = setenv(b, b.env)
+    if true
+        c = pipeline(c, stdout)
+    end
+    c
+end
+@test isa(a47180(``; stdout), Base.AbstractCmd)
+
+# Test that _compute_sparams can be eliminated for NamedTuple
+named_tuple_elim(name::Symbol, result) = NamedTuple{(name,)}(result)
+let src = code_typed1(named_tuple_elim, Tuple{Symbol, Tuple})
+    @test count(iscall((src, Core._compute_sparams)), src.code) == 0 &&
+          count(iscall((src, Core._svec_ref)), src.code) == 0 &&
+          count(iscall(x->!isa(argextype(x, src).val, Core.Builtin)), src.code) == 0
+end
+
+# Test that sroa works if the struct type is a PartialStruct
+mutable struct OneConstField
+    const a::Int
+    b::Int
+end
+
+@eval function one_const_field_partial()
+    # Use explicit :new here to avoid inlining messing with the type
+    strct = $(Expr(:new, OneConstField, 1, 2))
+    strct.b = 4
+    strct.b = 5
+    return strct.b
+end
+@test fully_eliminated(one_const_field_partial; retval=5)
+
+# Test that SROA updates the type of intermediate phi nodes (#50285)
+struct Immut50285
+    x::Any
+end
+
+function immut50285(b, x, y)
+    if b
+       z = Immut50285(x)
+    else
+       z = Immut50285(y)
+    end
+    z.x::Union{Float64, Int}
+end
+
+let src = code_typed1(immut50285, Tuple{Bool, Int, Float64})
+    @test count(isnew, src.code) == 0
+    @test count(iscall((src, typeassert)), src.code) == 0
+end
+
+function mut50285(b, x, y)
+    z = Ref{Any}()
+    if b
+       z[] = x
+    else
+       z[] = y
+    end
+    z[]::Union{Float64, Int}
+end
+
+let src = code_typed1(mut50285, Tuple{Bool, Int, Float64})
+    @test count(isnew, src.code) == 0
+    @test count(iscall((src, typeassert)), src.code) == 0
+end
+
+# Test that we can eliminate new{typeof(x)}(x)
+struct TParamTypeofTest1{T}
+    x::T
+    @eval TParamTypeofTest1(x) = $(Expr(:new, :(TParamTypeofTest1{typeof(x)}), :x))
+end
+tparam_typeof_test_elim1(x) = TParamTypeofTest1(x).x
+@test fully_eliminated(tparam_typeof_test_elim1, Tuple{Any})
+
+struct TParamTypeofTest2{S,T}
+    x::S
+    y::T
+    @eval TParamTypeofTest2(x, y) = $(Expr(:new, :(TParamTypeofTest2{typeof(x),typeof(y)}), :x, :y))
+end
+tparam_typeof_test_elim2(x, y) = TParamTypeofTest2(x, y).x
+@test fully_eliminated(tparam_typeof_test_elim2, Tuple{Any,Any})
+
+# Test that sroa doesn't get confused by free type parameters in struct types
+struct Wrap1{T}
+    x::T
+    @eval @inline (T::Type{Wrap1{X}} where X)(x) = $(Expr(:new, :T, :x))
+end
+Wrap1(x) = Wrap1{typeof(x)}(x)
+
+function wrap1_wrap1_ifelse(b, x, w1)
+    w2 = Wrap1(Wrap1(x))
+    w3 = Wrap1(typeof(w1)(w1.x))
+    Core.ifelse(b, w3, w2).x.x
+end
+function wrap1_wrap1_wrapper(b, x, y)
+    w1 = Base.inferencebarrier(Wrap1(y))::Wrap1{<:Union{Int, Float64}}
+    wrap1_wrap1_ifelse(b, x, w1)
+end
+@test wrap1_wrap1_wrapper(true, 1, 1.0) === 1.0
+@test wrap1_wrap1_wrapper(false, 1, 1.0) === 1
+
+# Test unswitching-union optimization within SRO Apass
+function sroaunswitchuniontuple(c, x1, x2)
+    t = c ? (x1,) : (x2,)
+    return getfield(t, 1)
+end
+struct SROAUnswitchUnion1{T}
+    x::T
+end
+struct SROAUnswitchUnion2{S,T}
+    x::T
+    @inline SROAUnswitchUnion2{S}(x::T) where {S,T} = new{S,T}(x)
+end
+function sroaunswitchunionstruct1(c, x1, x2)
+    x = c ? SROAUnswitchUnion1(x1) : SROAUnswitchUnion1(x2)
+    return getfield(x, :x)
+end
+function sroaunswitchunionstruct2(c, x1, x2)
+    x = c ? SROAUnswitchUnion2{:a}(x1) : SROAUnswitchUnion2{:a}(x2)
+    return getfield(x, :x)
+end
+let src = code_typed1(sroaunswitchuniontuple, Tuple{Bool, Int, Float64})
+    @test count(isnew, src.code) == 0
+    @test count(iscall((src, getfield)), src.code) == 0
+end
+let src = code_typed1(sroaunswitchunionstruct1, Tuple{Bool, Int, Float64})
+    @test count(isnew, src.code) == 0
+    @test count(iscall((src, getfield)), src.code) == 0
+end
+@test sroaunswitchunionstruct2(true, 1, 1.0) === 1
+@test sroaunswitchunionstruct2(false, 1, 1.0) === 1.0
+
+# Test SROA of union into getfield
+struct SingleFieldStruct1
+    x::Int
+end
+struct SingleFieldStruct2
+    x::Int
+end
+function foo(b, x)
+    if b
+        f = SingleFieldStruct1(x)
+    else
+        f = SingleFieldStruct2(x)
+    end
+    getfield(f, :x) + 1
+end
+@test foo(true, 1) == 2
+
+# ifelse folding
+@test Core.Compiler.is_removable_if_unused(Base.infer_effects(exp, (Float64,)))
+@test !Core.Compiler.is_inlineable(code_typed1(exp, (Float64,)))
+fully_eliminated(; retval=Core.Argument(2)) do x::Float64
+    return Core.ifelse(true, x, exp(x))
+end
+fully_eliminated(; retval=Core.Argument(2)) do x::Float64
+    return ifelse(true, x, exp(x)) # the optimization should be applied to post-inlining IR too
+end
+fully_eliminated(; retval=Core.Argument(2)) do x::Float64
+    return ifelse(isa(x, Float64), x, exp(x))
+end
+
+# PhiC fixup of compact! with cfg modification
+@inline function big_dead_throw_catch()
+    x = 1
+    try
+        x = 2
+        if Ref{Bool}(false)[]
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            Base.donotdelete(x)
+            x = 3
+        end
+    catch
+        return x
+    end
+end
+
+function call_big_dead_throw_catch()
+    if Ref{Bool}(false)[]
+        return big_dead_throw_catch()
+    end
+    return 4
+end
+
+# Issue #51159 - Unreachable reached in try-catch block
+function f_with_early_try_catch_exit()
+    result = false
+    for i in 3
+        x = try
+        catch
+            # This introduces an early Expr(:leave) that we must respect when building
+            # φᶜ-nodes in slot2ssa. In particular, we have to ignore the `result = x`
+            # assignment that occurs outside of this try-catch block
+            continue
+        end
+        result = x
+    end
+    result
+end
+
+let ir = first(only(Base.code_ircode(f_with_early_try_catch_exit, (); optimize_until="compact")))
+    for i = 1:length(ir.stmts)
+        expr = ir.stmts[i][:stmt]
+        if isa(expr, PhiCNode)
+            # The φᶜ should only observe the value of `result` at the try-catch :enter
+            # (from the `result = false` assignment), since `result = x` assignment is
+            # dominated by an Expr(:leave).
+            @test length(expr.values) == 1
+        end
+    end
+end
+
+@test isnothing(f_with_early_try_catch_exit())
+
+# Issue #51144 - UndefRefError during compaction
+let code = Any[
+        # block 1  → 2, 3
+        #=  %1: =# Expr(:(=), Core.SlotNumber(4), Core.Argument(2)),
+        #=  %2: =# Expr(:call, :(===), Core.SlotNumber(4), nothing),
+        #=  %3: =# GotoIfNot(Core.SSAValue(1), 5),
+        # block 2
+        #=  %4: =# ReturnNode(nothing),
+        # block 3  → 4, 5
+        #=  %5: =# Expr(:(=), Core.SlotNumber(4), false),
+        #=  %6: =# GotoIfNot(Core.Argument(2), 8),
+        # block 4  → 5
+        #=  %7: =# Expr(:(=), Core.SlotNumber(4), true),
+        # block 5
+        #=  %8: =# ReturnNode(nothing), # Must not insert a π-node here
+    ]
+    slottypes = Any[Any, Union{Bool, Nothing}, Bool, Union{Bool, Nothing}]
+    src = make_codeinfo(code; slottypes)
+
+    mi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ());
+    mi.specTypes = Tuple{}
+    mi.def = Module()
+
+    # Simulate the important results from inference
+    interp = Core.Compiler.NativeInterpreter()
+    sv = Core.Compiler.OptimizationState(mi, src, interp)
+    slot_id = 4
+    for block_id = 3:5
+        # (_4 !== nothing) conditional narrows the type, triggering PiNodes
+        sv.bb_vartables[block_id][slot_id] = VarState(Bool, #= maybe_undef =# false)
+    end
+
+    ir = Core.Compiler.convert_to_ircode(src, sv)
+    ir = Core.Compiler.slot2reg(ir, src, sv)
+    ir = Core.Compiler.compact!(ir)
+
+    Core.Compiler.verify_ir(ir)
+end
+
+function f_with_merge_to_entry_block()
+    while true
+        i = @noinline rand(Int)
+        if @noinline isodd(i)
+            return i
+        end
+    end
+end
+
+let (ir, _) = only(Base.code_ircode(f_with_merge_to_entry_block))
+    Core.Compiler.verify_ir(ir)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+end
+
+# Test that CFG simplify doesn't leave an un-renamed SSA Value
+let # Test that CFG simplify doesn't try to merge every block in a loop into
+    # its predecessor
+    code = Any[
+        # Block 1
+        GotoIfNot(Argument(1), 3),
+        # Block 2
+        GotoNode(5),
+        # Block 3
+        Expr(:call, Base.inferencebarrier, 1),
+        GotoNode(6),
+        # Block 4
+        Expr(:call, Base.inferencebarrier, 2), # fallthrough
+        # Block 5
+        PhiNode(Int32[4, 5], Any[SSAValue(3), SSAValue(5)]),
+        ReturnNode(1)
+    ]
+    ir = make_ircode(code)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) == 4
+end
+
+# JET.test_opt(Core.Compiler.cfg_simplify!, (Core.Compiler.IRCode,))
+
+# Test support for Core.OptimizedGenerics.KeyValue protocol
+function persistent_dict_elim()
+    a = Base.PersistentDict(:a => 1)
+    return a[:a]
+end
+
+# Ideally we would be able to fully eliminate this,
+# but currently this would require an extra round of constprop
+@test_broken fully_eliminated(persistent_dict_elim)
+@test code_typed(persistent_dict_elim)[1][1].code[end] == Core.ReturnNode(1)
+
+function persistent_dict_elim_multiple()
+    a = Base.PersistentDict(:a => 1)
+    b = Base.PersistentDict(a, :b => 2)
+    return b[:a]
+end
+@test_broken fully_eliminated(persistent_dict_elim_multiple)
+@test code_typed(persistent_dict_elim_multiple)[1][1].code[end] == Core.ReturnNode(1)
+
+function persistent_dict_elim_multiple_phi(c::Bool)
+    if c
+        a = Base.PersistentDict(:a => 1)
+    else
+        a = Base.PersistentDict(:a => 1)
+    end
+    b = Base.PersistentDict(a, :b => 2)
+    return b[:a]
+end
+@test_broken fully_eliminated(persistent_dict_elim_multiple_phi)
+@test code_typed(persistent_dict_elim_multiple_phi)[1][1].code[end] == Core.ReturnNode(1)
+
+# Test CFG simplify with try/catch blocks
+let code = Any[
+        # Block 1
+        GotoIfNot(Argument(1), 5),
+        # Block 2
+        EnterNode(4),
+        # Block 3
+        Expr(:leave),
+        # Block 4
+        GotoNode(5),
+        # Block 5
+        ReturnNode(1)
+    ]
+    ir = make_ircode(code)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) == 4
+end
+
+# Test CFG simplify with single predecessor phi node
+let code = Any[
+        # Block 1
+        GotoNode(3),
+        # Block 2
+        nothing,
+        # Block 3
+        Expr(:call, Base.inferencebarrier, 1),
+        GotoNode(5),
+        # Block 4
+        PhiNode(Int32[4], Any[SSAValue(3)]),
+        ReturnNode(SSAValue(5))
+    ]
+    ir = make_ircode(code)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) <= 2
+    ir = Core.Compiler.compact!(ir)
+    @test length(ir.stmts) <= 3
+    @test (ir[SSAValue(length(ir.stmts))][:stmt]::ReturnNode).val !== nothing
+end
+
+let code = Any[
+    Expr(:call, Base.inferencebarrier, Argument(1)), # ::Bool
+    Expr(:call, Core.tuple, 1), # ::Tuple{Int}
+    Expr(:call, Core.tuple, 1.0), # ::Tuple{Float64}
+    Expr(:call, Core.ifelse, SSAValue(1), SSAValue(2), SSAValue(3)), # ::Tuple{Int} (e.g. from inlining)
+    Expr(:call, Core.getfield, SSAValue(4), 1), # ::Int
+    ReturnNode(SSAValue(5))
+]
+    try
+        argtypes = Any[Bool]
+        ssavaluetypes = Any[Bool, Tuple{Int}, Tuple{Float64}, Tuple{Int}, Int, Any]
+        ir = make_ircode(code; slottypes=argtypes, ssavaluetypes)
+        Core.Compiler.verify_ir(ir)
+        Core.Compiler.__set_check_ssa_counts(true)
+        ir = Core.Compiler.sroa_pass!(ir)
+        Core.Compiler.verify_ir(ir)
+    finally
+        Core.Compiler.__set_check_ssa_counts(false)
+    end
+end
+
+# Test SROA all_same on NewNode
+let code = Any[
+    # Block 1
+    Expr(:call, tuple, Argument(1)),
+    GotoIfNot(Argument(4), 5),
+    # Block 2
+    Expr(:call, tuple, Argument(2)),
+    GotoIfNot(Argument(4), 9),
+    # Block 3
+    PhiNode(Int32[2, 4], Any[SSAValue(1), SSAValue(3)]),
+    Expr(:call, getfield, SSAValue(5), 1),
+    Expr(:call, tuple, SSAValue(6), Argument(2)), # ::Tuple{Int, Int}
+    Expr(:call, tuple, SSAValue(7), Argument(3)), # ::Tuple{Tuple{Int, Int}, Int}
+    # Block 4
+    PhiNode(Int32[4, 8], Any[nothing, SSAValue(8)]),
+    Expr(:call, Core.Intrinsics.not_int, Argument(4)),
+    GotoIfNot(SSAValue(10), 13),
+    # Block 5
+    ReturnNode(1),
+    # Block 6
+    PiNode(SSAValue(9), Tuple{Tuple{Int, Int}, Int}),
+    Expr(:call, getfield, SSAValue(13), 1),
+    Expr(:call, getfield, SSAValue(14), 1),
+    ReturnNode(SSAValue(15))
+]
+
+    argtypes = Any[Int, Int, Int, Bool]
+    ssavaluetypes = Any[Tuple{Int}, Any, Tuple{Int}, Any, Tuple{Int}, Int, Tuple{Int, Int}, Tuple{Tuple{Int, Int}, Int},
+                        Union{Nothing, Tuple{Tuple{Int, Int}, Int}}, Bool, Any, Any,
+                        Tuple{Tuple{Int, Int}, Int},
+                        Tuple{Int, Int}, Int, Any]
+    ir = make_ircode(code; slottypes=argtypes, ssavaluetypes)
+    Core.Compiler.verify_ir(ir)
+    ir = Core.Compiler.sroa_pass!(ir)
+    Core.Compiler.verify_ir(ir)
+    ir = Core.Compiler.compact!(ir)
+    Core.Compiler.verify_ir(ir)
+end
+
+# Test correctness of current_scope folding
+@eval function scope_folding()
+    $(Expr(:tryfinally,
+        Expr(:block,
+            Expr(:tryfinally, :(), :(), 2),
+            :(return Core.current_scope())),
+    :(), 1))
+end
+
+@eval function scope_folding_opt()
+    $(Expr(:tryfinally,
+        Expr(:block,
+            Expr(:tryfinally, :(), :(), :(Base.inferencebarrier(2))),
+            :(return Core.current_scope())),
+    :(), :(Base.inferencebarrier(1))))
+end
+
+@test scope_folding() == 1
+@test scope_folding_opt() == 1
+@test_broken fully_eliminated(scope_folding)
+@test_broken fully_eliminated(scope_folding_opt)
diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl
index 06d261720bdf8..788d7bbc721ee 100644
--- a/test/compiler/irutils.jl
+++ b/test/compiler/irutils.jl
@@ -1,34 +1,91 @@
-import Core: CodeInfo, ReturnNode, MethodInstance
-import Core.Compiler: argextype, singleton_type
-import Base.Meta: isexpr
+using Core.IR
+using Core.Compiler: IRCode, IncrementalCompact, singleton_type, VarState
+using Base.Meta: isexpr
+using InteractiveUtils: gen_call_with_extracted_types_and_kwargs
 
-argextype(@nospecialize args...) = argextype(args..., Any[])
+argextype(@nospecialize args...) = Core.Compiler.argextype(args..., VarState[])
 code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::CodeInfo
+macro code_typed1(ex0...)
+    return gen_call_with_extracted_types_and_kwargs(__module__, :code_typed1, ex0)
+end
 get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
+macro get_code(ex0...)
+    return gen_call_with_extracted_types_and_kwargs(__module__, :get_code, ex0)
+end
 
 # check if `x` is a statement with a given `head`
 isnew(@nospecialize x) = isexpr(x, :new)
-isreturn(@nospecialize x) = isa(x, ReturnNode)
+issplatnew(@nospecialize x) = isexpr(x, :splatnew)
+isreturn(@nospecialize x) = isa(x, ReturnNode) && isdefined(x, :val)
+isisdefined(@nospecialize x) = isexpr(x, :isdefined)
 
 # check if `x` is a dynamic call of a given function
 iscall(y) = @nospecialize(x) -> iscall(y, x)
-function iscall((src, f)::Tuple{CodeInfo,Base.Callable}, @nospecialize(x))
+function iscall((src, f)::Tuple{IR,Base.Callable}, @nospecialize(x)) where IR<:Union{CodeInfo,IRCode,IncrementalCompact}
     return iscall(x) do @nospecialize x
         singleton_type(argextype(x, src)) === f
     end
 end
-iscall(pred::Base.Callable, @nospecialize(x)) = isexpr(x, :call) && pred(x.args[1])
+function iscall(pred::Base.Callable, @nospecialize(x))
+    if isexpr(x, :(=))
+        x = x.args[2]
+    end
+    return isexpr(x, :call) && pred(x.args[1])
+end
 
 # check if `x` is a statically-resolved call of a function whose name is `sym`
 isinvoke(y) = @nospecialize(x) -> isinvoke(y, x)
 isinvoke(sym::Symbol, @nospecialize(x)) = isinvoke(mi->mi.def.name===sym, x)
 isinvoke(pred::Function, @nospecialize(x)) = isexpr(x, :invoke) && pred(x.args[1]::MethodInstance)
 
-function fully_eliminated(@nospecialize args...; retval=(@__FILE__), kwargs...)
-    code = code_typed1(args...; kwargs...).code
-    if retval !== (@__FILE__)
-        return length(code) == 1 && isreturn(code[1]) && code[1].val == retval
-    else
-        return length(code) == 1 && isreturn(code[1])
+fully_eliminated(@nospecialize args...; retval=(@__FILE__), kwargs...) =
+    fully_eliminated(code_typed1(args...; kwargs...); retval)
+fully_eliminated(src::CodeInfo; retval=(@__FILE__)) = fully_eliminated(src.code; retval)
+fully_eliminated(ir::IRCode; retval=(@__FILE__)) = fully_eliminated(ir.stmts.stmt; retval)
+function fully_eliminated(code::Vector{Any}; retval=(@__FILE__), kwargs...)
+    length(code) == 1 || return false
+    retstmt = only(code)
+    isreturn(retstmt) || return false
+    retval === (@__FILE__) && return true
+    retval′ = retstmt.val
+    if retval′ isa QuoteNode
+        retval′ = retval′.value
+    end
+    return retval′ == retval
+end
+macro fully_eliminated(ex0...)
+    return gen_call_with_extracted_types_and_kwargs(__module__, :fully_eliminated, ex0)
+end
+
+let m = Meta.@lower 1 + 1
+    @assert Meta.isexpr(m, :thunk)
+    orig_src = m.args[1]::CodeInfo
+    global function make_codeinfo(code::Vector{Any};
+                                  ssavaluetypes::Union{Nothing,Vector{Any}}=nothing,
+                                  slottypes::Union{Nothing,Vector{Any}}=nothing)
+        src = copy(orig_src)
+        src.code = code
+        nstmts = length(src.code)
+        if ssavaluetypes === nothing
+            src.ssavaluetypes = nstmts
+        else
+            src.ssavaluetypes = ssavaluetypes
+        end
+        src.codelocs = fill(one(Int32), nstmts)
+        src.ssaflags = fill(zero(UInt32), nstmts)
+        if slottypes !== nothing
+            src.slottypes = slottypes
+            src.slotflags = fill(zero(UInt8), length(slottypes))
+        end
+        return src
+    end
+    global function make_ircode(code::Vector{Any};
+                                ssavaluetypes::Union{Nothing,Vector{Any}}=nothing,
+                                slottypes::Union{Nothing,Vector{Any}}=nothing,
+                                verify::Bool=true)
+        src = make_codeinfo(code; ssavaluetypes, slottypes)
+        ir = Core.Compiler.inflate_ir(src)
+        verify && Core.Compiler.verify_ir(ir)
+        return ir
     end
 end
diff --git a/test/compiler/newinterp.jl b/test/compiler/newinterp.jl
new file mode 100644
index 0000000000000..b454cdd3a40d8
--- /dev/null
+++ b/test/compiler/newinterp.jl
@@ -0,0 +1,47 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# TODO set up a version who defines new interpreter with persistent cache?
+
+"""
+    @newinterp NewInterpreter
+
+Defines new `NewInterpreter <: AbstractInterpreter` whose cache is separated
+from the native code cache, satisfying the minimum interface requirements.
+"""
+macro newinterp(InterpName)
+    InterpCacheName = esc(Symbol(string(InterpName, "Cache")))
+    InterpName = esc(InterpName)
+    C = Core
+    CC = Core.Compiler
+    quote
+        struct $InterpCacheName
+            dict::IdDict{$C.MethodInstance,$C.CodeInstance}
+        end
+        $InterpCacheName() = $InterpCacheName(IdDict{$C.MethodInstance,$C.CodeInstance}())
+        struct $InterpName <: $CC.AbstractInterpreter
+            meta # additional information
+            world::UInt
+            inf_params::$CC.InferenceParams
+            opt_params::$CC.OptimizationParams
+            inf_cache::Vector{$CC.InferenceResult}
+            code_cache::$InterpCacheName
+            function $InterpName(meta = nothing;
+                                 world::UInt = Base.get_world_counter(),
+                                 inf_params::$CC.InferenceParams = $CC.InferenceParams(),
+                                 opt_params::$CC.OptimizationParams = $CC.OptimizationParams(),
+                                 inf_cache::Vector{$CC.InferenceResult} = $CC.InferenceResult[],
+                                 code_cache::$InterpCacheName = $InterpCacheName())
+                return new(meta, world, inf_params, opt_params, inf_cache, code_cache)
+            end
+        end
+        $CC.InferenceParams(interp::$InterpName) = interp.inf_params
+        $CC.OptimizationParams(interp::$InterpName) = interp.opt_params
+        $CC.get_world_counter(interp::$InterpName) = interp.world
+        $CC.get_inference_cache(interp::$InterpName) = interp.inf_cache
+        $CC.code_cache(interp::$InterpName) = $CC.WorldView(interp.code_cache, $CC.WorldRange(interp.world))
+        $CC.get(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance, default) = get(wvc.cache.dict, mi, default)
+        $CC.getindex(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance) = getindex(wvc.cache.dict, mi)
+        $CC.haskey(wvc::$CC.WorldView{$InterpCacheName}, mi::$C.MethodInstance) = haskey(wvc.cache.dict, mi)
+        $CC.setindex!(wvc::$CC.WorldView{$InterpCacheName}, ci::$C.CodeInstance, mi::$C.MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
+    end
+end
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index 86545c8d0088c..491638f7596d9 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -5,6 +5,8 @@ using Core.IR
 const Compiler = Core.Compiler
 using .Compiler: CFG, BasicBlock, NewSSAValue
 
+include("irutils.jl")
+
 make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs)
 
 function make_ci(code)
@@ -36,7 +38,7 @@ end
 #        false, false, false, false
 #    ))
 #
-#    NullLineInfo = Core.LineInfoNode(Main, Symbol(""), Symbol(""), Int32(0), Int32(0))
+#    NullLineInfo = Core.LineInfoNode(Main, Symbol(""), Symbol(""), Int32(0), UInt32(0))
 #    Compiler.run_passes(ci, 1, [NullLineInfo])
 #    # XXX: missing @test
 #end
@@ -69,8 +71,10 @@ let cfg = CFG(BasicBlock[
 ], Int[])
     dfs = Compiler.DFS(cfg.blocks)
     @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4
-    let correct_idoms = Compiler.naive_idoms(cfg.blocks)
+    let correct_idoms = Compiler.naive_idoms(cfg.blocks),
+        correct_pidoms = Compiler.naive_idoms(cfg.blocks, true)
         @test Compiler.construct_domtree(cfg.blocks).idoms_bb == correct_idoms
+        @test Compiler.construct_postdomtree(cfg.blocks).idoms_bb == correct_pidoms
         # For completeness, reverse the order of pred/succ in the CFG and verify
         # the answer doesn't change (it does change the which node is chosen
         # as the semi-dominator, since it changes the DFS numbering).
@@ -82,6 +86,7 @@ let cfg = CFG(BasicBlock[
                 d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs))
                 cfg′ = CFG(blocks, cfg.index)
                 @test Compiler.construct_domtree(cfg′.blocks).idoms_bb == correct_idoms
+                @test Compiler.construct_postdomtree(cfg′.blocks).idoms_bb == correct_pidoms
             end
         end
     end
@@ -101,15 +106,6 @@ for compile in ("min", "yes")
     end
 end
 
-# Issue #27104
-# Test whether meta nodes are still present after code optimization.
-let
-    @noinline f(x, y) = x + y
-    @test any(code_typed(f)[1][1].code) do ex
-        Meta.isexpr(ex, :meta)
-    end
-end
-
 # PR #32145
 # Make sure IncrementalCompact can handle blocks with predecessors of index 0
 # while removing blocks with no predecessors.
@@ -121,9 +117,9 @@ let cfg = CFG(BasicBlock[
     make_bb([2, 3]    , []    ),
 ], Int[])
     insts = Compiler.InstructionStream([], [], Any[], Int32[], UInt8[])
-    code = Compiler.IRCode(insts, cfg, LineInfoNode[], [], Expr[], [])
-    compact = Compiler.IncrementalCompact(code, true)
-    @test length(compact.result_bbs) == 4 && 0 in compact.result_bbs[3].preds
+    ir = Compiler.IRCode(insts, cfg, Core.LineInfoNode[], Any[], Expr[], Compiler.VarState[])
+    compact = Compiler.IncrementalCompact(ir, true)
+    @test length(compact.cfg_transform.result_bbs) == 4 && 0 in compact.cfg_transform.result_bbs[3].preds
 end
 
 # Issue #32579 - Optimizer bug involving type constraints
@@ -147,9 +143,10 @@ end
 @test f32579(0, false) === false
 
 # Test for bug caused by renaming blocks improperly, related to PR #32145
-let ci = make_ci([
+let code = Any[
         # block 1
-        Core.Compiler.GotoIfNot(Expr(:boundscheck), 6),
+        Expr(:boundscheck),
+        Core.Compiler.GotoIfNot(SSAValue(1), 6),
         # block 2
         Expr(:call, GlobalRef(Base, :size), Core.Compiler.Argument(3)),
         Core.Compiler.ReturnNode(),
@@ -159,30 +156,40 @@ let ci = make_ci([
         # block 4
         GlobalRef(Main, :something),
         GlobalRef(Main, :somethingelse),
-        Expr(:call, Core.SSAValue(6), Core.SSAValue(7)),
-        Core.Compiler.GotoIfNot(Core.SSAValue(8), 11),
+        Expr(:call, Core.SSAValue(7), Core.SSAValue(8)),
+        Core.Compiler.GotoIfNot(Core.SSAValue(9), 12),
         # block 5
-        Core.Compiler.ReturnNode(Core.SSAValue(8)),
+        Core.Compiler.ReturnNode(Core.SSAValue(9)),
         # block 6
-        Core.Compiler.ReturnNode(Core.SSAValue(8))
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
+        Core.Compiler.ReturnNode(Core.SSAValue(9))
+    ]
+    ir = make_ircode(code)
     ir = Core.Compiler.compact!(ir, true)
-    @test Core.Compiler.verify_ir(ir) == nothing
+    @test Core.Compiler.verify_ir(ir) === nothing
+end
+
+# Test that the verifier doesn't choke on cglobals (which aren't linearized)
+let code = Any[
+        Expr(:call, GlobalRef(Main, :cglobal),
+                    Expr(:call, Core.tuple, :(:c)), Nothing),
+                    Core.Compiler.ReturnNode()
+    ]
+    ir = make_ircode(code)
+    @test Core.Compiler.verify_ir(ir) === nothing
 end
 
 # Test that GlobalRef in value position is non-canonical
-let ci = make_ci([
+let code = Any[
         Expr(:call, GlobalRef(Main, :something_not_defined_please))
         ReturnNode(SSAValue(1))
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
+    ]
+    ir = make_ircode(code; verify=false)
     ir = Core.Compiler.compact!(ir, true)
     @test_throws ErrorException Core.Compiler.verify_ir(ir, false)
 end
 
 # Issue #29107
-let ci = make_ci([
+let code = Any[
         # Block 1
         Core.Compiler.GotoNode(6),
         # Block 2
@@ -197,15 +204,15 @@ let ci = make_ci([
         Core.Compiler.GotoNode(2),
         # Block 3
         Core.Compiler.ReturnNode(1000)
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
+    ]
+    ir = make_ircode(code)
     ir = Core.Compiler.compact!(ir, true)
     # Make sure that if there is a call to `something` (block 2 should be
     # removed entirely with working DCE), it doesn't use any SSA values that
     # come after it.
     for i in 1:length(ir.stmts)
         s = ir.stmts[i]
-        if isa(s, Expr) && s.head == :call && s.args[1] == :something
+        if Meta.isexpr(s, :call) && s.args[1] === :something
             if isa(s.args[2], SSAValue)
                 @test s.args[2].id <= i
             end
@@ -213,9 +220,8 @@ let ci = make_ci([
     end
 end
 
-# Make sure dead blocks that are removed are not still referenced in live phi
-# nodes
-let ci = make_ci([
+# Make sure dead blocks that are removed are not still referenced in live phi nodes
+let code = Any[
         # Block 1
         Core.Compiler.GotoNode(3),
         # Block 2 (no predecessors)
@@ -223,8 +229,8 @@ let ci = make_ci([
         # Block 3
         Core.PhiNode(Int32[1, 2], Any[100, 200]),
         Core.Compiler.ReturnNode(Core.SSAValue(3))
-    ])
-    ir = Core.Compiler.inflate_ir(ci)
+    ]
+    ir = make_ircode(code; verify=false)
     ir = Core.Compiler.compact!(ir, true)
     @test Core.Compiler.verify_ir(ir) == nothing
 end
@@ -315,8 +321,8 @@ end
 f_if_typecheck() = (if nothing; end; unsafe_load(Ptr{Int}(0)))
 @test_throws TypeError f_if_typecheck()
 
-@test let # https://github.com/JuliaLang/julia/issues/42258
-    code = quote
+let # https://github.com/JuliaLang/julia/issues/42258
+    code = """
         function foo()
             a = @noinline rand(rand(0:10))
             if isempty(a)
@@ -329,10 +335,11 @@ f_if_typecheck() = (if nothing; end; unsafe_load(Ptr{Int}(0)))
         code_typed(foo; optimize=true)
 
         code_typed(Core.Compiler.setindex!, (Core.Compiler.UseRef,Core.Compiler.NewSSAValue); optimize=true)
-    end |> string
+        """
     cmd = `$(Base.julia_cmd()) -g 2 -e $code`
     stderr = IOBuffer()
-    success(pipeline(Cmd(cmd); stdout=stdout, stderr=stderr)) && isempty(String(take!(stderr)))
+    @test success(pipeline(Cmd(cmd); stdout, stderr))
+    @test readchomp(stderr) == ""
 end
 
 @testset "code_ircode" begin
@@ -352,6 +359,25 @@ end
     @test first(only(Base.code_ircode(demo; optimize_until = "SROA"))) isa Compiler.IRCode
 end
 
+# slots after SSA conversion
+function f_with_slots(a, b)
+    # `c` and `d` are local variables
+    c = a + b
+    d = c > 0
+    return (c, d)
+end
+let # #self#, a, b, c, d
+    unopt = code_typed1(f_with_slots, (Int,Int); optimize=false)
+    @test length(unopt.slotnames) == length(unopt.slotflags) == length(unopt.slottypes) == 5
+    ir_withslots = first(only(Base.code_ircode(f_with_slots, (Int,Int); optimize_until="convert")))
+    @test length(ir_withslots.argtypes) == 5
+    # #self#, a, b
+    opt = code_typed1(f_with_slots, (Int,Int); optimize=true)
+    @test length(opt.slotnames) == length(opt.slotflags) == length(opt.slottypes) == 3
+    ir_ssa = first(only(Base.code_ircode(f_with_slots, (Int,Int); optimize_until="slot2reg")))
+    @test length(ir_ssa.argtypes) == 3
+end
+
 let
     function test_useref(stmt, v, op)
         if isa(stmt, Expr)
@@ -397,7 +423,7 @@ let
         Expr(:enter, 11),
         Expr(:call, :+, SSAValue(3), 1),
         Expr(:throw_undef_if_not, :expected, false),
-        Expr(:leave, 1),
+        Expr(:leave, Core.SSAValue(1)),
         Expr(:(=), SSAValue(1), Expr(:call, :+, SSAValue(3), 1)),
         UpsilonNode(),
         UpsilonNode(SSAValue(2)),
@@ -414,3 +440,248 @@ let
 
     test_userefs(body)
 end
+
+let ir = Base.code_ircode((Bool,Any)) do c, x
+        println(x, 1) #1
+        if c
+            println(x, 2) #2
+        else
+            println(x, 3) #3
+        end
+        println(x, 4) #4
+    end |> only |> first
+    # IR legality check
+    @test length(ir.cfg.blocks) == 4
+    for i = 1:4
+        @test any(ir.cfg.blocks[i].stmts) do j
+            inst = ir.stmts[j][:stmt]
+            iscall((ir, println), inst) &&
+            inst.args[3] == i
+        end
+    end
+    # domination analysis
+    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
+    @test Core.Compiler.dominates(domtree, 1, 2)
+    @test Core.Compiler.dominates(domtree, 1, 3)
+    @test Core.Compiler.dominates(domtree, 1, 4)
+    for i = 2:4
+        for j = 1:4
+            i == j && continue
+            @test !Core.Compiler.dominates(domtree, i, j)
+        end
+    end
+    # post domination analysis
+    post_domtree = Core.Compiler.construct_postdomtree(ir.cfg.blocks)
+    @test Core.Compiler.postdominates(post_domtree, 4, 1)
+    @test Core.Compiler.postdominates(post_domtree, 4, 2)
+    @test Core.Compiler.postdominates(post_domtree, 4, 3)
+    for i = 1:3
+        for j = 1:4
+            i == j && continue
+            @test !Core.Compiler.postdominates(post_domtree, i, j)
+        end
+    end
+end
+
+@testset "issue #46967: undef stmts introduced by compaction" begin
+    # generate some IR
+    function foo(i)
+        j = i+42
+        j == 1 ? 1 : 2
+    end
+    ir = only(Base.code_ircode(foo, (Int,)))[1]
+    instructions = length(ir.stmts)
+
+    # get the addition instruction
+    add_stmt = ir.stmts[1]
+    @test Meta.isexpr(add_stmt[:stmt], :call) && add_stmt[:stmt].args[3] == 42
+
+    # replace the addition with a slightly different one
+    inst = Core.Compiler.NewInstruction(Expr(:call, add_stmt[:stmt].args[1], add_stmt[:stmt].args[2], 999), Int)
+    node = Core.Compiler.insert_node!(ir, 1, inst)
+    Core.Compiler.setindex!(add_stmt, node, :stmt)
+
+    # perform compaction (not by calling compact! because with DCE the bug doesn't trigger)
+    compact = Core.Compiler.IncrementalCompact(ir)
+    state = Core.Compiler.iterate(compact)
+    while state !== nothing
+        state = Core.Compiler.iterate(compact, state[2])
+    end
+    ir = Core.Compiler.complete(compact)
+
+    # test that the inserted node was compacted
+    @test Core.Compiler.length(ir.new_nodes) == 0
+
+    # test that we performed copy propagation, but that the undef node was trimmed
+    @test length(ir.stmts) == instructions
+
+    @test show(devnull, ir) === nothing
+end
+
+@testset "IncrementalCompact statefulness" begin
+    foo(i) = i == 1 ? 1 : 2
+    ir = only(Base.code_ircode(foo, (Int,)))[1]
+    compact = Core.Compiler.IncrementalCompact(ir)
+
+    # set up first iterator
+    x = Core.Compiler.iterate(compact)
+    x = Core.Compiler.iterate(compact, x[2])
+
+    # set up second iterator
+    x = Core.Compiler.iterate(compact)
+
+    # consume remainder
+    while x !== nothing
+        x = Core.Compiler.iterate(compact, x[2])
+    end
+
+    ir = Core.Compiler.complete(compact)
+    @test Core.Compiler.verify_ir(ir) === nothing
+end
+
+# insert_node! operations
+# =======================
+
+import Core: SSAValue
+import Core.Compiler: NewInstruction, insert_node!
+
+# insert_node! for pending node
+let ir = Base.code_ircode((Int,Int); optimize_until="inlining") do a, b
+        a^b
+    end |> only |> first
+    @test length(ir.stmts) == 2
+    @test Meta.isexpr(ir.stmts[1][:stmt], :invoke)
+
+    newssa = insert_node!(ir, SSAValue(1), NewInstruction(Expr(:call, println, SSAValue(1)), Nothing), #=attach_after=#true)
+    newssa = insert_node!(ir, newssa, NewInstruction(Expr(:call, println, newssa), Nothing), #=attach_after=#true)
+
+    ir = Core.Compiler.compact!(ir)
+    @test length(ir.stmts) == 4
+    @test Meta.isexpr(ir.stmts[1][:stmt], :invoke)
+    call1 = ir.stmts[2][:stmt]
+    @test iscall((ir,println), call1)
+    @test call1.args[2] === SSAValue(1)
+    call2 = ir.stmts[3][:stmt]
+    @test iscall((ir,println), call2)
+    @test call2.args[2] === SSAValue(2)
+end
+
+# Issue #50379 - insert_node!(::IncrementalCompact, ...) at end of basic block
+let code = Any[
+        # block 1
+        #= %1: =# Expr(:boundscheck),
+        #= %2: =# Core.Compiler.GotoIfNot(SSAValue(1), 4),
+        # block 2
+        #= %3: =# Expr(:call, println, Argument(1)),
+        # block 3
+        #= %4: =# Core.PhiNode(),
+        #= %5: =# Core.Compiler.ReturnNode(),
+    ]
+    ir = make_ircode(code)
+
+    # Insert another call at end of "block 2"
+    compact = Core.Compiler.IncrementalCompact(ir)
+    new_inst = NewInstruction(Expr(:call, println, Argument(1)), Nothing)
+    insert_node!(compact, SSAValue(3), new_inst, #= attach_after =# true)
+
+    # Complete iteration
+    x = Core.Compiler.iterate(compact)
+    while x !== nothing
+        x = Core.Compiler.iterate(compact, x[2])
+    end
+    ir = Core.Compiler.complete(compact)
+
+    @test Core.Compiler.verify_ir(ir) === nothing
+end
+
+# compact constant PiNode
+let code = Any[
+        PiNode(0.0, Const(0.0))
+        ReturnNode(SSAValue(1))
+    ]
+    ir = make_ircode(code)
+    ir = Core.Compiler.compact!(ir)
+    @test fully_eliminated(ir)
+end
+
+# insert_node! with new instruction with flag computed
+let ir = Base.code_ircode((Int,Int); optimize_until="inlining") do a, b
+        a^b
+    end |> only |> first
+    invoke_idx = findfirst(ir.stmts.stmt) do @nospecialize(x)
+        Meta.isexpr(x, :invoke)
+    end
+    @test invoke_idx !== nothing
+    invoke_expr = ir.stmts.stmt[invoke_idx]
+
+    # effect-ful node
+    let compact = Core.Compiler.IncrementalCompact(Core.Compiler.copy(ir))
+        insert_node!(compact, SSAValue(1), NewInstruction(Expr(:call, println, SSAValue(1)), Nothing), #=attach_after=#true)
+        state = Core.Compiler.iterate(compact)
+        while state !== nothing
+            state = Core.Compiler.iterate(compact, state[2])
+        end
+        ir = Core.Compiler.finish(compact)
+        new_invoke_idx = findfirst(ir.stmts.stmt) do @nospecialize(x)
+            x == invoke_expr
+        end
+        @test new_invoke_idx !== nothing
+        new_call_idx = findfirst(ir.stmts.stmt) do @nospecialize(x)
+            iscall((ir,println), x) && x.args[2] === SSAValue(invoke_idx)
+        end
+        @test new_call_idx !== nothing
+        @test new_call_idx == new_invoke_idx+1
+    end
+
+    # effect-free node
+    let compact = Core.Compiler.IncrementalCompact(Core.Compiler.copy(ir))
+        insert_node!(compact, SSAValue(1), NewInstruction(Expr(:call, GlobalRef(Base, :add_int), SSAValue(1), SSAValue(1)), Int), #=attach_after=#true)
+        state = Core.Compiler.iterate(compact)
+        while state !== nothing
+            state = Core.Compiler.iterate(compact, state[2])
+        end
+        ir = Core.Compiler.finish(compact)
+
+        ir = Core.Compiler.finish(compact)
+        new_invoke_idx = findfirst(ir.stmts.stmt) do @nospecialize(x)
+            x == invoke_expr
+        end
+        @test new_invoke_idx !== nothing
+        new_call_idx = findfirst(ir.stmts.stmt) do @nospecialize(x)
+            iscall((ir,Base.add_int), x) && x.args[2] === SSAValue(invoke_idx)
+        end
+        @test new_call_idx === nothing # should be deleted during the compaction
+    end
+end
+
+@testset "GotoIfNot folding" begin
+    # After IRCode conversion, following the targets of a GotoIfNot should never lead to
+    # statically unreachable code.
+    function f_with_maybe_nonbool_cond(a::Int, r::Bool)
+        a = r ? true : a
+        if a
+            # The following conditional can be resolved statically, since `a === true`
+            # This test checks that it becomes a static `goto` despite its wide slottype.
+            x = a ? 1 : 2.
+        else
+            x = a ? 1 : 2.
+        end
+        return x
+    end
+    let
+        # At least some statements should have been found to be statically unreachable and wrapped in Const(...)::Union{}
+        unopt = code_typed1(f_with_maybe_nonbool_cond, (Int, Bool); optimize=false)
+        @test any(j -> isa(unopt.code[j], Core.Const) && unopt.ssavaluetypes[j] == Union{}, 1:length(unopt.code))
+
+        # Any GotoIfNot destinations after IRCode conversion should not be statically unreachable
+        ircode = first(only(Base.code_ircode(f_with_maybe_nonbool_cond, (Int, Bool); optimize_until="convert")))
+        for i = 1:length(ircode.stmts)
+            expr = ircode.stmts[i][:stmt]
+            if isa(expr, GotoIfNot)
+                # If this statement is Core.Const(...)::Union{}, that means this code was not reached
+                @test !(isa(ircode.stmts[i+1][:stmt], Core.Const) && (unopt.ssavaluetypes[i+1] === Union{}))
+                @test !(isa(ircode.stmts[expr.dest][:stmt], Core.Const) && (unopt.ssavaluetypes[expr.dest] === Union{}))
+            end
+        end
+    end
+end
diff --git a/test/compiler/validation.jl b/test/compiler/validation.jl
index ffa79ed1c823d..5fd074fee73ae 100644
--- a/test/compiler/validation.jl
+++ b/test/compiler/validation.jl
@@ -20,12 +20,11 @@ end
 
 msig = Tuple{typeof(f22938),Int,Int,Int,Int}
 world = Base.get_world_counter()
-match = Base._methods_by_ftype(msig, -1, world)[]
+match = only(Base._methods_by_ftype(msig, -1, world))
 mi = Core.Compiler.specialize_method(match)
-c0 = Core.Compiler.retrieve_code_info(mi)
+c0 = Core.Compiler.retrieve_code_info(mi, world)
 
-@test isempty(Core.Compiler.validate_code(mi))
-@test isempty(Core.Compiler.validate_code(c0))
+@test isempty(Core.Compiler.validate_code(mi, c0))
 
 @testset "INVALID_EXPR_HEAD" begin
     c = copy(c0)
@@ -116,7 +115,7 @@ end
 @testset "SIGNATURE_NARGS_MISMATCH" begin
     old_sig = mi.def.sig
     mi.def.sig = Tuple{1,2}
-    errors = Core.Compiler.validate_code(mi)
+    errors = Core.Compiler.validate_code(mi, nothing)
     mi.def.sig = old_sig
     @test length(errors) == 1
     @test errors[1].kind === Core.Compiler.SIGNATURE_NARGS_MISMATCH
@@ -132,7 +131,7 @@ end
 
 @testset "SLOTNAMES_NARGS_MISMATCH" begin
     mi.def.nargs += 20
-    errors = Core.Compiler.validate_code(mi)
+    errors = Core.Compiler.validate_code(mi, c0)
     mi.def.nargs -= 20
     @test length(errors) == 2
     @test count(e.kind === Core.Compiler.SLOTNAMES_NARGS_MISMATCH for e in errors) == 1
diff --git a/test/complex.jl b/test/complex.jl
index 20470dd5617e7..d798cfe16489c 100644
--- a/test/complex.jl
+++ b/test/complex.jl
@@ -44,7 +44,12 @@ end
     @testset for T in (Float16, Float32, Float64, BigFloat)
         t = true
         f = false
-
+        @testset "equality" begin
+            @test isequal(T(0.0)*im, T(0.0))
+            @test !isequal(T(0.0)*im, T(-0.0))
+            @test isequal(Complex(T(-0.0), T(0.0)), T(-0.0))
+            @test !isequal(T(-0.0)*im, T(-0.0))
+        end
         @testset "add and subtract" begin
             @test isequal(T(+0.0) + im, Complex(T(+0.0), T(+1.0)))
             @test isequal(T(-0.0) + im, Complex(T(-0.0), T(+1.0)))
@@ -378,6 +383,7 @@ import Base.Math.@horner
     @test isequal(log1p(complex(-2, 1e-10)), log(1 + complex(-2, 1e-10)))
     @test isequal(log1p(complex(1, Inf)), complex(Inf, pi/2))
     @test isequal(log1p(complex(1, -Inf)), complex(Inf, -pi/2))
+    @test isequal(log1p(complex(1e-200, 5e-175)), complex(1e-200, 5e-175))
 
     for z in (1e-10+1e-9im, 1e-10-1e-9im, -1e-10+1e-9im, -1e-10-1e-9im)
         @test log1p(z) ≈ @horner(z, 0, 1, -0.5, 1/3, -0.25, 0.2)
@@ -935,6 +941,7 @@ end
     @test cispi(0.0+0.0im) == cispi(0)
     @test cispi(1.0+0.0im) == cispi(1)
     @test cispi(2.0+0.0im) == cispi(2)
+    @test cispi(5im) ≈ exp(-5pi) rtol=1e-10 # https://github.com/JuliaLang/julia/pull/45945
 end
 
 @testset "exp2" begin
diff --git a/test/copy.jl b/test/copy.jl
index 04fda36728e62..633beee5f2af3 100644
--- a/test/copy.jl
+++ b/test/copy.jl
@@ -245,3 +245,26 @@ end
 @testset "deepcopy_internal arrays" begin
     @test (@inferred Base.deepcopy_internal(zeros(), IdDict())) == zeros()
 end
+
+@testset "`copyto!`'s unaliasing" begin
+    a = view([1:3;], :)
+    @test copyto!(a, 2, a, 1, 2) == [1;1:2;]
+    a = [1:3;]
+    @test copyto!(a, 2:3, 1:1, a, 1:2, 1:1) == [1;1:2;]
+end
+
+@testset "`deepcopy` a `GenericCondition`" begin
+    a = Base.GenericCondition(ReentrantLock())
+    @test !islocked(a.lock)
+    lock(a.lock)
+    @test islocked(a.lock)
+    b = deepcopy(a)
+    @test typeof(a) === typeof(b)
+    @test a != b
+    @test a !== b
+    @test typeof(a.lock) === typeof(b.lock)
+    @test a.lock != b.lock
+    @test a.lock !== b.lock
+    @test islocked(a.lock)
+    @test !islocked(b.lock)
+end
diff --git a/test/core.jl b/test/core.jl
index e5b1c231d39a8..25264c689fcfb 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -14,17 +14,37 @@ include("testenv.jl")
 # sanity tests that our built-in types are marked correctly for const fields
 for (T, c) in (
         (Core.CodeInfo, []),
-        (Core.CodeInstance, [:def]),
-        (Core.Method, [#=:name, :module, :file, :line, :primary_world, :sig, :slot_syms, :external_mt, :nargs, :called, :nospecialize, :nkw, :isva, :pure, :is_for_opaque_closure, :constprop=#]),
-        (Core.MethodInstance, [#=:def, :specTypes, :sparam_vals]=#]),
+        (Core.CodeInstance, [:def, :rettype, :exctype, :rettype_const, :ipo_purity_bits, :analysis_results]),
+        (Core.Method, [#=:name, :module, :file, :line, :primary_world, :sig, :slot_syms, :external_mt, :nargs, :called, :nospecialize, :nkw, :isva, :is_for_opaque_closure, :constprop=#]),
+        (Core.MethodInstance, [#=:def, :specTypes, :sparam_vals=#]),
         (Core.MethodTable, [:module]),
         (Core.TypeMapEntry, [:sig, :simplesig, :guardsigs, :min_world, :max_world, :func, :isleafsig, :issimplesig, :va]),
         (Core.TypeMapLevel, []),
-        (Core.TypeName, [:name, :module, :names, :atomicfields, :constfields, :wrapper, :mt, :hash, :n_uninitialized, :flags]),
+        (Core.TypeName, [:name, :module, :names, :wrapper, :mt, :hash, :n_uninitialized, :flags]),
         (DataType, [:name, :super, :parameters, :instance, :hash]),
+        (TypeVar, [:name, :ub, :lb]),
+        (Core.Memory, [:length, :ptr]),
+        (Core.GenericMemoryRef, [:mem, :ptr_or_offset]),
     )
     @test Set((fieldname(T, i) for i in 1:fieldcount(T) if isconst(T, i))) == Set(c)
 end
+#
+# sanity tests that our built-in types are marked correctly for atomic fields
+for (T, c) in (
+        (Core.CodeInfo, []),
+        (Core.CodeInstance, [:next, :inferred, :purity_bits, :invoke, :specptr, :precompile]),
+        (Core.Method, []),
+        (Core.MethodInstance, [:uninferred, :cache, :precompiled]),
+        (Core.MethodTable, [:defs, :leafcache, :cache, :max_args]),
+        (Core.TypeMapEntry, [:next]),
+        (Core.TypeMapLevel, [:arg1, :targ, :name1, :tname, :list, :any]),
+        (Core.TypeName, [:cache, :linearcache]),
+        (DataType, [:types, :layout]),
+        (Core.Memory, []),
+        (Core.GenericMemoryRef, []),
+    )
+    @test Set((fieldname(T, i) for i in 1:fieldcount(T) if Base.isfieldatomic(T, i))) == Set(c)
+end
 
 @test_throws(ErrorException("setfield!: const field .name of type DataType cannot be changed"),
     setfield!(Int, :name, Int.name))
@@ -41,14 +61,14 @@ mutable struct ABCDconst
     c
     const d::Union{Int,Nothing}
 end
-@test_throws(ErrorException("invalid redefinition of constant ABCDconst"),
+@test_throws(ErrorException("invalid redefinition of constant $(nameof(curmod)).ABCDconst"),
     mutable struct ABCDconst
         const a
         const b::Int
         c
         d::Union{Int,Nothing}
     end)
-@test_throws(ErrorException("invalid redefinition of constant ABCDconst"),
+@test_throws(ErrorException("invalid redefinition of constant $(nameof(curmod)).ABCDconst"),
     mutable struct ABCDconst
         a
         b::Int
@@ -259,6 +279,30 @@ let mi = T26321{3,NTuple{3,Int}}((1,2,3)), mf = T26321{3,NTuple{3,Float64}}((1.0
     @test a isa Vector{<:T26321{3}}
 end
 
+@test Base.return_types() do
+    typejoin(Int, UInt)
+end  |> only == Type{typejoin(Int, UInt)}
+@test Base.return_types() do
+    typejoin(Int, UInt, Float64)
+end  |> only == Type{typejoin(Int, UInt, Float64)}
+
+let res = @test_throws TypeError let
+        Base.Experimental.@force_compile
+        typejoin(1, 2)
+        nothing
+    end
+    err = res.value
+    @test err.func === :<:
+end
+let res = @test_throws TypeError let
+        Base.Experimental.@force_compile
+        typejoin(1, 2, 3)
+        nothing
+    end
+    err = res.value
+    @test err.func === :<:
+end
+
 # promote_typejoin returns a Union only with Nothing/Missing combined with concrete types
 for T in (Nothing, Missing)
     @test Base.promote_typejoin(Int, Float64) === Real
@@ -334,8 +378,8 @@ let ft = Base.datatype_fieldtypes
     @test ft(elT2.body)[1].parameters[1] === elT2
     @test Base.isconcretetype(ft(elT2.body)[1])
 end
-#struct S22624{A,B,C} <: Ref{S22624{Int64,A}}; end
-@test_broken @isdefined S22624
+struct S22624{A,B,C} <: Ref{S22624{Int,A}}; end
+@test sizeof(S22624) == sizeof(S22624{Int,Int,Int}) == 0
 
 # issue #42297
 mutable struct Node42297{T, V}
@@ -374,6 +418,18 @@ mutable struct FooFoo{A,B} y::FooFoo{A} end
 
 @test FooFoo{Int} <: FooFoo{Int,AbstractString}.types[1]
 
+# make sure this self-referential struct doesn't crash type layout
+struct SelfTyA{V}
+    a::Base.RefValue{V}
+end
+struct SelfTyB{T}
+    a::T
+    b::SelfTyA{SelfTyB{T}}
+end
+let T = Base.RefValue{SelfTyB{Int}}
+    @test sizeof(T) === sizeof(Int)
+    @test sizeof(T.types[1]) === 2 * sizeof(Int)
+end
 
 let x = (2,3)
     @test +(x...) == 5
@@ -496,7 +552,7 @@ function i18408()
     return (x -> i)
 end
 let f = i18408()
-    @test_throws UndefVarError(:i) f(0)
+    @test_throws UndefVarError(:i, :local) f(0)
 end
 
 # issue #23558
@@ -556,7 +612,7 @@ begin
         global f7234_cnt += -10000
     end
 end
-@test_throws UndefVarError(:glob_x2) f7234_a()
+@test_throws UndefVarError(:glob_x2, :local) f7234_a()
 @test f7234_cnt == 1
 begin
     global glob_x2 = 24
@@ -566,7 +622,7 @@ begin
         global f7234_cnt += -10000
     end
 end
-@test_throws UndefVarError(:glob_x2) f7234_b()
+@test_throws UndefVarError(:glob_x2, :local) f7234_b()
 @test f7234_cnt == 2
 # globals can accessed if declared
 for i = 1:2
@@ -674,18 +730,18 @@ end
 f21900_cnt = 0
 function f21900()
     for i = 1:1
-        x = 0
+        x_global_undefined_error = 0
     end
     global f21900_cnt += 1
-    x # should be global
+    x_global_undefined_error # should be global
     global f21900_cnt += -1000
     nothing
 end
-@test_throws UndefVarError(:x) f21900()
+@test_throws UndefVarError(:x_global_undefined_error, @__MODULE__) f21900()
 @test f21900_cnt == 1
 
 # use @eval so this runs as a toplevel scope block
-@test_throws UndefVarError(:foo21900) @eval begin
+@test_throws UndefVarError(:foo21900, @__MODULE__) @eval begin
     for i21900 = 1:10
         local bar21900
         for j21900 = 1:10
@@ -698,7 +754,7 @@ end
 @test !@isdefined(foo21900)
 @test !@isdefined(bar21900)
 bar21900 = 0
-@test_throws UndefVarError(:foo21900) @eval begin
+@test_throws UndefVarError(:foo21900, @__MODULE__) @eval begin
     for i21900 = 1:10
         global bar21900
         for j21900 = 1:10
@@ -764,11 +820,15 @@ let
     @test isassigned(a,1) && !isassigned(a,2)
     a = Vector{Float64}(undef,1)
     @test isassigned(a,1)
+    @test isassigned(a,1,1)
     @test isassigned(a)
     @test !isassigned(a,2)
     a = Array{Float64}(undef, 2, 2, 2)
     @test isassigned(a,1)
-    @test isassigned(a)
+    @test isassigned(a,8)
+    @test isassigned(a,2,2,2)
+    @test isassigned(a,2,2,2,1)
+    @test !isassigned(a)
     @test !isassigned(a,9)
     a = Array{Float64}(undef, 1)
     @test isassigned(a,1)
@@ -776,15 +836,22 @@ let
     @test !isassigned(a,2)
     a = Array{Float64}(undef, 2, 2, 2, 2)
     @test isassigned(a,1)
-    @test isassigned(a)
+    @test isassigned(a,2,2,2,2)
+    @test isassigned(a,2,2,2,2,1)
+    @test isassigned(a,16)
+    @test !isassigned(a)
     @test !isassigned(a,17)
+    @test !isassigned(a,3,1,1,1)
+    @test !isassigned(a,1,3,1,1)
+    @test !isassigned(a,1,1,3,1)
+    @test !isassigned(a,1,1,1,3)
 end
 
 # isassigned, issue #11167
 mutable struct Type11167{T,N} end
 function count11167()
     let cache = Type11167.body.body.name.cache
-        return sum(i -> isassigned(cache, i), 0:length(cache))
+        return count(!isnothing, cache)
     end
 end
 @test count11167() == 0
@@ -1378,6 +1445,9 @@ let
     @test unsafe_load(p2) == 101
     unsafe_store!(p2, 909, 3)
     @test a2 == [101,102,909]
+    # test for issue 51954
+    @test pointer(a.ref.mem)===pointer(a)
+    @test pointer(a.ref.mem,2)===pointer(a,2)
 end
 
 @test unsafe_pointer_to_objref(ccall(:jl_call1, Ptr{Cvoid}, (Any,Any),
@@ -1407,6 +1477,7 @@ let
     @test occursin("is not properly aligned to $(sizeof(Int)) bytes", res.value.msg)
     res = @test_throws ArgumentError unsafe_wrap(Array, pointer(a) + 1, (1, 1))
     @test occursin("is not properly aligned to $(sizeof(Int)) bytes", res.value.msg)
+    res = @test_throws MethodError unsafe_wrap(Vector{UInt8}, pointer(Int32[1]), (sizeof(Int32),))
 end
 
 struct FooBar2515
@@ -1642,7 +1713,9 @@ end
 
 # issue #3221
 let x = fill(nothing, 1)
-    @test_throws MethodError x[1] = 1
+    @test_throws ErrorException("cannot convert a value to nothing for assignment") x[1] = 1
+    x = Vector{Union{}}(undef, 1)
+    @test_throws ArgumentError("cannot convert a value to Union{} for assignment") x[1] = 1
 end
 
 # issue #3220
@@ -1987,9 +2060,8 @@ mutable struct TupleParam{P}
     x::Bool
 end
 
-function tupledispatch(a::TupleParam{(1,:a)})
-    a.x
-end
+tupledispatch(a::TupleParam{(1,:a)}) = a.x
+tupledispatch(a::TupleParam{(1,(:a,))}) = 42
 
 # tuples can be used as type params
 let t1 = TupleParam{(1,:a)}(true),
@@ -2001,6 +2073,10 @@ let t1 = TupleParam{(1,:a)}(true),
     # dispatch works properly
     @test tupledispatch(t1) == true
     @test_throws MethodError tupledispatch(t2)
+
+    @test tupledispatch(TupleParam{(1,(:a,))}(true)) === 42
+    @test_throws TypeError TupleParam{NamedTuple{(:a,), Tuple{Any}}((1,))}
+    @test_throws TypeError Val{NamedTuple{(:a,), Tuple{NamedTuple{<:Any,Tuple{Int}}}}(((x=2,),))}
 end
 
 # issue #5254
@@ -2525,7 +2601,7 @@ struct D14919 <: Function; end
 for f in (:Any, :Function, :(Core.Builtin), :(Union{Nothing, Type}), :(Union{typeof(+), Type}), :(Union{typeof(+), typeof(-)}), :(Base.Callable))
     @test_throws ErrorException("Method dispatch is unimplemented currently for this method signature") @eval (::$f)() = 1
 end
-for f in (:(Core.arrayref), :((::typeof(Core.arrayref))), :((::Core.IntrinsicFunction)))
+for f in (:(Core.getfield), :((::typeof(Core.getfield))), :((::Core.IntrinsicFunction)))
     @test_throws ErrorException("cannot add methods to a builtin function") @eval $f() = 1
 end
 
@@ -2838,10 +2914,10 @@ let f
     end
 end
 for m in methods(f10373)
-    @test m.name == :f10373
+    @test m.name === :f10373
 end
 for m in methods(g10373)
-    @test m.name == :g10373
+    @test m.name === :g10373
 end
 
 # issue #7221
@@ -3607,7 +3683,7 @@ let
         @test false
     catch err
         @test isa(err, TypeError)
-        @test err.func == :Vararg
+        @test err.func === :Vararg
         @test err.expected == Int
         @test err.got == Int
     end
@@ -3617,7 +3693,7 @@ let
         @test false
     catch err
         @test isa(err, TypeError)
-        @test err.func == :Vararg
+        @test err.func === :Vararg
         @test err.expected == Int
         @test err.got == 0x1
     end
@@ -3828,7 +3904,8 @@ PossiblyInvalidUnion{T} = Union{T,Int}
 # issue #13007
 call13007(::Type{Array{T,N}}) where {T,N} = 0
 call13007(::Type{Array}) = 1
-@test length(Base._methods(call13007, Tuple{Type{x} where x<:Array}, 4, typemax(UInt))) == 2
+@test Base._methods(call13007, Tuple{Type{x} where x<:Array}, 4, typemax(UInt)) === nothing
+@test length(Base._methods(call13007, Tuple{Type{x} where x<:Array}, 4, Base.get_world_counter())) == 2
 
 # detecting cycles during type intersection, e.g. #1631
 cycle_in_solve_tvar_constraints(::Type{Some{S}}, x::S) where {S} = 0
@@ -4052,7 +4129,29 @@ end
 let z1 = Z14477()
     @test isa(z1, Z14477)
     @test isa(z1.fld, Z14477)
+    @test isdefined(z1, :fld)
+    @test !isdefined(z1.fld, :fld)
 end
+struct Z14477B
+    fld::Union{Nothing,Z14477B}
+    Z14477B() = new(new(nothing))
+end
+let z1 = Z14477B()
+    @test isa(z1, Z14477B)
+    @test isa(z1.fld, Z14477B)
+    @test isa(z1.fld.fld, Nothing)
+end
+struct Z14477C{T}
+    fld::Z14477C{Int8}
+    Z14477C() = new{Int16}(new{Int8}())
+end
+let z1 = Z14477C()
+    @test isa(z1, Z14477C)
+    @test isa(z1.fld, Z14477C)
+    @test isdefined(z1, :fld)
+    @test !isdefined(z1.fld, :fld)
+end
+
 
 # issue #8846, generic macros
 macro m8846(a, b=0)
@@ -4077,7 +4176,7 @@ let foo(x::Union{T, Nothing}, y::Union{T, Nothing}) where {T} = 1
 end
 let foo(x::Union{T, Nothing}, y::Union{T, Nothing}) where {T} = T
     @test foo(1, nothing) === Int
-    @test_throws UndefVarError(:T) foo(nothing, nothing)
+    @test_throws UndefVarError(:T, :static_parameter) foo(nothing, nothing)
 end
 
 module TestMacroGlobalFunction
@@ -4131,14 +4230,14 @@ foo9677(x::Array) = invoke(foo9677, Tuple{AbstractArray}, x)
 
 # issue #6846
 f6846() = (please6846; 2)
-@test_throws UndefVarError(:please6846) f6846()
+@test_throws UndefVarError(:please6846, @__MODULE__) f6846()
 
 module M6846
     macro f()
         return esc(:(please6846; 2))
     end
 end
-@test_throws UndefVarError(:please6846) @M6846.f()
+@test_throws UndefVarError(:please6846, @__MODULE__) @M6846.f()
 
 # issue #14758
 @test isa(@eval(f14758(; $([]...)) = ()), Function)
@@ -4229,7 +4328,7 @@ end
 let ex = quote
              $(if true; :(test); end)
          end
-    @test ex.args[2] == :test
+    @test ex.args[2] === :test
 end
 
 # issue #15180
@@ -4242,6 +4341,7 @@ function f15180(x::T) where T
 end
 @test map(f15180(1), [1,2]) == [(Int,1),(Int,1)]
 
+using Base: _growbeg!, _deletebeg!, _growend!, _deleteend!
 struct ValueWrapper
     vpadding::NTuple{2,VecElement{UInt}}
     value
@@ -4250,43 +4350,44 @@ end
 Base.convert(::Type{ValueWrapper}, x) = ValueWrapper(x)
 for T in (Any, ValueWrapper)
     let ary = Vector{T}(undef, 10)
-        check_undef_and_fill(ary, rng) = for i in rng
-            @test !isassigned(ary, i)
+        check_undef_and_fill(ary, rng) = all(i -> begin
+            isassigned(ary, i) && return false
             ary[i] = (Float64(i), i) # some non-cached content
-            @test isassigned(ary, i)
-        end
+            isassigned(ary, i) || return false
+            return true
+        end, rng)
         # Check if the memory is initially zerod and fill it with value
         # to check if these values are not reused later.
-        check_undef_and_fill(ary, 1:10)
+        @test check_undef_and_fill(ary, 1:10)
         # Check if the memory grown at the end are zerod
-        ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10)
-        check_undef_and_fill(ary, 11:20)
+        _growend!(ary, 10)
+        @test check_undef_and_fill(ary, 11:20)
         # Make sure the content of the memory deleted at the end are not reused
-        ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), ary, 5)
-        ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 5)
-        check_undef_and_fill(ary, 16:20)
+        _deleteend!(ary, 5)
+        _growend!(ary, 5)
+        @test check_undef_and_fill(ary, 16:20)
 
         # Now check grow/del_end
         ary = Vector{T}(undef, 1010)
-        check_undef_and_fill(ary, 1:1010)
+        @test check_undef_and_fill(ary, 1:1010)
         # This del_beg should move the buffer
-        ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 1000)
-        ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 1000)
-        check_undef_and_fill(ary, 1:1000)
+        _deletebeg!(ary, 1000)
+        _growbeg!(ary, 1000)
+        @test check_undef_and_fill(ary, 1:1000)
         ary = Vector{T}(undef, 1010)
-        check_undef_and_fill(ary, 1:1010)
+        @test check_undef_and_fill(ary, 1:1010)
         # This del_beg should not move the buffer
-        ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 10)
-        ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 10)
-        check_undef_and_fill(ary, 1:10)
+        _deletebeg!(ary, 10)
+        _growbeg!(ary, 10)
+        @test check_undef_and_fill(ary, 1:10)
 
         ary = Vector{T}(undef, 1010)
-        check_undef_and_fill(ary, 1:1010)
-        ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10)
-        check_undef_and_fill(ary, 1011:1020)
-        ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), ary, 10)
-        ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 10)
-        check_undef_and_fill(ary, 1:10)
+        @test check_undef_and_fill(ary, 1:1010)
+        _growend!(ary, 10)
+        @test check_undef_and_fill(ary, 1011:1020)
+        _deleteend!(ary, 10)
+        _growbeg!(ary, 10)
+        @test check_undef_and_fill(ary, 1:10)
 
         # Make sure newly malloc'd buffers are filled with 0
         # test this for a few different sizes since we need to make sure
@@ -4299,33 +4400,51 @@ for T in (Any, ValueWrapper)
             GC.gc()
             GC.gc()
             GC.gc()
-            ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 4)
-            ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 4)
-            ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, n)
-            ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), ary, 4)
-            check_undef_and_fill(ary, 1:(2n + 4))
+            _growbeg!(ary, 4)
+            _deletebeg!(ary, 4)
+            _growend!(ary, n)
+            _growbeg!(ary, 4)
+            @test check_undef_and_fill(ary, 1:(2n + 4))
         end
 
         ary = Vector{T}(undef, 100)
-        ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10000)
+        _growend!(ary, 10000)
         ary[:] = 1:length(ary)
-        ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), ary, 10000)
+        _deletebeg!(ary, 10000)
         # grow on the back until a buffer reallocation happens
         cur_ptr = pointer(ary)
         while cur_ptr == pointer(ary)
             len = length(ary)
-            ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), ary, 10)
-            for i in (len + 1):(len + 10)
-                @test !isassigned(ary, i)
-            end
+            _growend!(ary, 10)
+            result = @test all(i -> !isassigned(ary, i), (len + 1):(len + 10))
+            result isa Test.Pass || break
         end
 
-        ary = Vector{T}(undef, 100)
-        ary[:] = 1:length(ary)
-        ccall(:jl_array_grow_at, Cvoid, (Any, Csize_t, Csize_t), ary, 50, 10)
-        for i in 51:60
-            @test !isassigned(ary, i)
-        end
+        # growat when copy into start of same buffer
+        ary = Vector{T}(undef, 10)
+        ary[:] = 1:10
+        pushfirst!(ary, 0)
+        Base._growat!(ary, 3, 5)
+        @test all(i -> !isassigned(ary, i), 3:7)
+        @test all(i -> isassigned(ary, i), 8:length(ary))
+        @test all(i -> isassigned(ary, i), 1:2)
+
+        # growat when copy into end of same buffer
+        ary = Vector{T}(undef, 10)
+        ary[:] = 1:10
+        push!(ary, 11)
+        Base._growat!(ary, 6, 10)
+        @test all(i -> !isassigned(ary, i), 6:15)
+        @test all(i -> isassigned(ary, i), 16:length(ary))
+        @test all(i -> isassigned(ary, i), 1:5)
+
+        # growat when copy to new buffer
+        ary = Vector{T}(undef, 10)
+        ary[:] = 1:10
+        Base._growat!(ary, 6, 10)
+        @test all(i -> !isassigned(ary, i), 6:15)
+        @test all(i -> isassigned(ary, i), 16:length(ary))
+        @test all(i -> isassigned(ary, i), 1:5)
     end
 end
 
@@ -4411,8 +4530,13 @@ end
 # Make sure arrayset can handle `Array{T}` (where `T` is a type and not a
 # `TypeVar`) without crashing
 let
-    function arrayset_unknown_dim(::Type{T}, n) where T
-        Base.arrayset(true, reshape(Vector{T}(undef, 1), fill(1, n)...), 2, 1)
+    @noinline function arrayset_unknown_dim(::Type{T}, n) where T
+        a = Vector{T}(undef, 1)
+        fill!(a, 0)
+        a = reshape(a, fill(1, n)...)::Array{T}
+        @test a[1] === 0
+        Core.memoryrefset!(a.ref, 2, :not_atomic, true)
+        @test a[1] === 2
     end
     arrayset_unknown_dim(Any, 1)
     arrayset_unknown_dim(Any, 2)
@@ -4422,88 +4546,6 @@ let
     arrayset_unknown_dim(Int, 3)
 end
 
-module TestSharedArrayResize
-using Test
-# Attempting to change the shape of a shared array should unshare it and
-# not modify the original data
-function test_shared_array_resize(::Type{T}) where T
-    len = 100
-    a = Vector{T}(undef, len)
-    function test_unshare(f)
-        a′ = reshape(reshape(a, (len ÷ 2, 2)), len)
-        a[:] = 1:length(a)
-        # The operation should fail on the owner shared array
-        # and has no side effect.
-        @test_throws ErrorException f(a)
-        @test a == [1:len;]
-        @test a′ == [1:len;]
-        @test pointer(a) == pointer(a′)
-        # The operation should pass on the non-owner shared array
-        # and should unshare the arrays with no effect on the original one.
-        f(a′)
-        @test a == [1:len;]
-        @test pointer(a) != pointer(a′)
-    end
-
-    test_unshare(a->ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), a, 0))
-    test_unshare(a->ccall(:jl_array_del_end, Cvoid, (Any, Csize_t), a, 1))
-    test_unshare(a->ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), a, 0))
-    test_unshare(a->ccall(:jl_array_del_beg, Cvoid, (Any, Csize_t), a, 1))
-    test_unshare(a->deleteat!(a, 10))
-    test_unshare(a->deleteat!(a, 90))
-    test_unshare(a->ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), a, 0))
-    test_unshare(a->ccall(:jl_array_grow_end, Cvoid, (Any, Csize_t), a, 1))
-    test_unshare(a->ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), a, 0))
-    test_unshare(a->ccall(:jl_array_grow_beg, Cvoid, (Any, Csize_t), a, 1))
-    test_unshare(a->insert!(a, 10, 10))
-    test_unshare(a->insert!(a, 90, 90))
-end
-test_shared_array_resize(Int)
-test_shared_array_resize(Any)
-end
-
-module TestArrayNUL
-using Test
-function check_nul(a::Vector{UInt8})
-    b = ccall(:jl_array_cconvert_cstring,
-              Ref{Vector{UInt8}}, (Vector{UInt8},), a)
-    @test unsafe_load(pointer(b), length(b) + 1) == 0x0
-    return b === a
-end
-
-a = UInt8[]
-b = "aaa"
-c = [0x2, 0x1, 0x3]
-
-@test check_nul(a)
-@test check_nul(unsafe_wrap(Vector{UInt8},b))
-@test check_nul(c)
-d = [0x2, 0x1, 0x3]
-@test check_nul(d)
-push!(d, 0x3)
-@test check_nul(d)
-push!(d, 0x3)
-@test check_nul(d)
-ccall(:jl_array_del_end, Cvoid, (Any, UInt), d, 2)
-@test check_nul(d)
-ccall(:jl_array_grow_end, Cvoid, (Any, UInt), d, 1)
-@test check_nul(d)
-ccall(:jl_array_grow_end, Cvoid, (Any, UInt), d, 1)
-@test check_nul(d)
-ccall(:jl_array_grow_end, Cvoid, (Any, UInt), d, 10)
-@test check_nul(d)
-ccall(:jl_array_del_beg, Cvoid, (Any, UInt), d, 8)
-@test check_nul(d)
-ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), d, 8)
-@test check_nul(d)
-ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), d, 8)
-@test check_nul(d)
-f = unsafe_wrap(Array, pointer(d), length(d))
-@test !check_nul(f)
-f = unsafe_wrap(Array, ccall(:malloc, Ptr{UInt8}, (Csize_t,), 10), 10, own = true)
-@test !check_nul(f)
-end
-
 # Copy of `#undef`
 copyto!(Vector{Any}(undef, 10), Vector{Any}(undef, 10))
 function test_copy_alias(::Type{T}) where T
@@ -4855,12 +4897,19 @@ let a = Any[]
     @test a == [10, 2]
 end
 
+# issue 47209
+struct f47209
+    x::Int
+    f47209()::Nothing = new(1)
+end
+@test_throws ErrorException("cannot convert a value to nothing for assignment") f47209()
+
 # issue #12096
 let a = Val{Val{TypeVar(:_, Int)}},
     b = Val{Val{x} where x<:Int}
 
-    @test !isdefined(a, :instance)
-    @test  isdefined(b, :instance)
+    @test !Base.issingletontype(a)
+    @test  Base.issingletontype(b)
     @test Base.isconcretetype(b)
 end
 
@@ -4891,7 +4940,7 @@ function trigger14878()
     w.ext[:14878] = B14878(junk)  # global junk not defined!
     return w
 end
-@test_throws UndefVarError(:junk) trigger14878()
+@test_throws UndefVarError(:junk, @__MODULE__) trigger14878()
 
 # issue #1090
 function f1090(x)::Int
@@ -5131,9 +5180,9 @@ let x = 1
     @noinline g18444(a) = (x += 1; a[])
     f18444_1(a) = invoke(sin, Tuple{Int}, g18444(a))
     f18444_2(a) = invoke(sin, Tuple{Integer}, g18444(a))
-    @test_throws ErrorException("invoke: argument type error") f18444_1(Ref{Any}(1.0))
+    @test_throws "TypeError: in invoke: argument type error, expected" f18444_1(Ref{Any}(1.0))
     @test x == 2
-    @test_throws ErrorException("invoke: argument type error") f18444_2(Ref{Any}(1.0))
+    @test_throws "TypeError: in invoke: argument type error, expected" f18444_2(Ref{Any}(1.0))
     @test x == 3
     @test f18444_1(Ref{Any}(1)) === sin(1)
     @test x == 4
@@ -5205,13 +5254,13 @@ end
 GC.enable(true)
 
 # issue #18710
-bad_tvars() where {T} = 1
+@test_warn "declares type variable T but does not use it" @eval bad_tvars() where {T} = 1
 @test isa(which(bad_tvars, ()), Method)
 @test bad_tvars() === 1
-bad_tvars2() where {T} = T
-@test_throws UndefVarError(:T) bad_tvars2()
+@test_warn "declares type variable T but does not use it" @eval bad_tvars2() where {T} = T
+@test_throws UndefVarError(:T, :static_parameter) bad_tvars2()
 missing_tvar(::T...) where {T} = T
-@test_throws UndefVarError(:T) missing_tvar()
+@test_throws UndefVarError(:T, :static_parameter) missing_tvar()
 @test missing_tvar(1) === Int
 @test missing_tvar(1, 2, 3) === Int
 @test_throws MethodError missing_tvar(1, 2, "3")
@@ -5338,6 +5387,21 @@ function g37690()
 end
 @test g37690().x === 0
 
+# issue #48889
+function f48889()
+    let j=0, f, i
+        while j < 3
+            i = j + 1
+            if j == 0
+                f = ()->i
+            end
+            j += 1
+        end
+        f
+    end
+end
+@test f48889()() == 3
+
 function _assigns_and_captures_arg(a)
     a = a
     return ()->a
@@ -5812,7 +5876,7 @@ function f_unused_undefined_sp(::T...) where T
     T
     return 0
 end
-@test_throws UndefVarError(:T) f_unused_undefined_sp()
+@test_throws UndefVarError(:T, :static_parameter) f_unused_undefined_sp()
 
 # note: the constant `5` here should be > DataType.ninitialized.
 # This tests that there's no crash due to accessing Type.body.layout.
@@ -5925,7 +5989,7 @@ module GlobalDef18933
         global sincos
         nothing
     end
-    @test which(Main, :sincos) === Base.Math
+    @test which(@__MODULE__, :sincos) === Base.Math
     @test @isdefined sincos
     @test sincos === Base.sincos
 end
@@ -5954,10 +6018,10 @@ const unboxedunions = [Union{Int8, Nothing},
 @test Base.isbitsunion(unboxedunions[2])
 @test Base.isbitsunion(unboxedunions[3])
 
-@test Base.bitsunionsize(unboxedunions[1]) == 1
-@test Base.bitsunionsize(unboxedunions[2]) == 2
-@test Base.bitsunionsize(unboxedunions[3]) == 16
-@test Base.bitsunionsize(unboxedunions[4]) == 8
+@test Base.aligned_sizeof(unboxedunions[1]) == 1
+@test Base.aligned_sizeof(unboxedunions[2]) == 2
+@test Base.aligned_sizeof(unboxedunions[3]) == 16
+@test Base.aligned_sizeof(unboxedunions[4]) == 8
 
 @test sizeof(unboxedunions[1]) == 1
 @test sizeof(unboxedunions[2]) == 2
@@ -6265,7 +6329,7 @@ for U in unboxedunions
             resize!(A, len)
             @test length(A) === len
             @test A[1] === initvalue2(F2)
-            @test typeof(A[end]) === F
+            @test typeof(A[end]) === F2
 
             # deleteat!
             F = Base.uniontypes(U)[2]
@@ -6353,304 +6417,291 @@ for U in unboxedunions
     end
 end
 
-@testset "jl_array_grow_at_end" begin
+@testset "array _growatend!" begin
 
 # start w/ array, set & check elements, grow it, check that elements stayed correct, set & check elements
 A = Vector{Union{Missing, UInt8}}(undef, 2)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
 
-# grow_at_end 2
 resize!(A, 5)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === missing
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+# The rest of the values are unspecified
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
+@test isequal(A, [0x01, missing, 0x03, missing, 0x05])
 
 # grow_at_end 1
 Base._growat!(A, 4, 1)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x05
-
-Base.arrayset(true, A, missing, 1)
-Base.arrayset(true, A, 0x02, 2)
-Base.arrayset(true, A, missing, 3)
-Base.arrayset(true, A, 0x04, 4)
-Base.arrayset(true, A, missing, 5)
-Base.arrayset(true, A, 0x06, 6)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x02
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x04
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x06
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x03
+#A[4] is unspecified
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x05
+
+setindex!(A, missing, 1)
+setindex!(A, 0x02, 2)
+setindex!(A, missing, 3)
+setindex!(A, 0x04, 4)
+setindex!(A, missing, 5)
+setindex!(A, 0x06, 6)
+@test isequal(A, [missing, 0x2, missing, 0x4, missing, 0x6])
 
 # grow_at_end 5
 Base._growat!(A, 4, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x02
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x04
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x06
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x02
+@test getindex(A, 3) === missing
+#A[4] is unspecified
+@test getindex(A, 5) === 0x04
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x06
 
 # grow_at_end 6
 resize!(A, 8)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x02
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x04
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x06
-@test Base.arrayref(true, A, 8) === missing
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x02
+@test getindex(A, 3) === missing
+# A[4] still unspecified
+@test getindex(A, 5) === 0x04
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x06
+# A[8] is unspecified but test that it exists
+@test getindex(A, 8) isa Any
 
 # grow_at_end 4
 resize!(A, 1048576)
 resize!(A, 1048577)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x02
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x04
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x06
-@test Base.arrayref(true, A, 8) === missing
-foreach(9:1048577) do i
-    @test Base.arrayref(true, A, i) === missing
-end
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x02
+@test getindex(A, 3) === missing
+# A[4] is stil still unspecified
+@test getindex(A, 5) === 0x04
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x06
+@test getindex(A, 8) === missing
+# 9:1048577 are unspecified
 foreach(9:1048577) do i
-    Base.arrayset(true, A, i % UInt8, i)
-    @test Base.arrayref(true, A, i) === i % UInt8
+    setindex!(A, i % UInt8, i)
+    @test getindex(A, i) === i % UInt8
 end
 
 # grow_at_end 3
 A = Vector{Union{Missing, UInt8}}(undef, 1048577)
 foreach(1:1048577) do i
-    @test Base.arrayref(true, A, i) === missing
-    Base.arrayset(true, A, i % UInt8, i)
-    @test Base.arrayref(true, A, i) === i % UInt8
+    @test getindex(A, i) === missing
+    setindex!(A, i % UInt8, i)
+    @test getindex(A, i) === i % UInt8
 end
 Base._growat!(A, 1048576, 1)
 @test length(A) == 1048578
 foreach(1:1048575) do i
-    @test Base.arrayref(true, A, i) === i % UInt8
+    @test getindex(A, i) === i % UInt8
     @test A[i] === i % UInt8
 end
-@test Base.arrayref(true, A, 1048576) === missing
-@test Base.arrayref(true, A, 1048577) === 1048576 % UInt8
-@test Base.arrayref(true, A, 1048578) === 1048577 % UInt8
+@test getindex(A, 1048576) === missing
+@test getindex(A, 1048577) === 1048576 % UInt8
+@test getindex(A, 1048578) === 1048577 % UInt8
 
 end # @testset
 
-@testset "jl_array_grow_at_beg" begin
+@testset "array _growatbeg!" begin
 
 # grow_at_beg 4
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 1, 1)
 
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x01
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x03
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x01
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x03
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x05
 
 # grow_at_beg 2
 Base._growat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x01
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x03
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x01
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === 0x03
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x05
 
 # grow_at_beg 1
 Base._growat!(A, 2, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x01
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x03
-@test Base.arrayref(true, A, 7) === missing
-@test Base.arrayref(true, A, 8) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x01
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x03
+@test getindex(A, 7) === missing
+@test getindex(A, 8) === 0x05
 
 # grow_at_beg 9
 Base._growat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x01
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x03
-@test Base.arrayref(true, A, 8) === missing
-@test Base.arrayref(true, A, 9) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === 0x01
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x03
+@test getindex(A, 8) === missing
+@test getindex(A, 9) === 0x05
 
 # grow_at_beg 8
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 2, 1)
 Base._growat!(A, 2, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x03
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === 0x03
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x05
 
 # grow_at_beg 5
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 4, 1)
 Base._growat!(A, 4, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === missing
-@test Base.arrayref(true, A, 7) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x03
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === missing
+@test getindex(A, 7) === 0x05
 
 # grow_at_beg 6
 Base._growat!(A, 2, 3)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x03
-@test Base.arrayref(true, A, 7) === missing
-@test Base.arrayref(true, A, 8) === missing
-@test Base.arrayref(true, A, 9) === missing
-@test Base.arrayref(true, A, 10) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x03
+@test getindex(A, 7) === missing
+@test getindex(A, 8) === missing
+@test getindex(A, 9) === missing
+@test getindex(A, 10) === 0x05
 
 # grow_at_beg 3
 A = Vector{Union{Missing, UInt8}}(undef, 1048577)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 2, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x03
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x03
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x05
 
 foreach(7:length(A)) do i
-    @test Base.arrayref(true, A, i) === missing
-    Base.arrayset(true, A, i % UInt8, i)
-    @test Base.arrayref(true, A, i) === i % UInt8
+    @test getindex(A, i) === missing
+    setindex!(A, i % UInt8, i)
+    @test getindex(A, i) === i % UInt8
 end
 
 end # @testset
 
-@testset "jl_array_del_at_beg" begin
+@testset "array _deleteatbeg!" begin
 
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._deleteat!(A, 2, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === 0x03
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x05
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === 0x03
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x05
 
 Base._deleteat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === 0x03
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x05
+@test getindex(A, 1) === 0x03
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x05
 
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._growat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x01
-@test Base.arrayref(true, A, 3) === missing
-@test Base.arrayref(true, A, 4) === 0x03
-@test Base.arrayref(true, A, 5) === missing
-@test Base.arrayref(true, A, 6) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x01
+@test getindex(A, 3) === missing
+@test getindex(A, 4) === 0x03
+@test getindex(A, 5) === missing
+@test getindex(A, 6) === 0x05
 Base._deleteat!(A, 2, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
-@test Base.arrayref(true, A, 5) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x03
+@test getindex(A, 4) === missing
+@test getindex(A, 5) === 0x05
 Base._deleteat!(A, 1, 2)
-@test Base.arrayref(true, A, 1) === 0x03
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x05
+@test getindex(A, 1) === 0x03
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x05
 Base._deleteat!(A, 1, 1)
-@test Base.arrayref(true, A, 1) === missing
-@test Base.arrayref(true, A, 2) === 0x05
+@test getindex(A, 1) === missing
+@test getindex(A, 2) === 0x05
 
 end # @testset
 
-@testset "jl_array_del_at_end" begin
+@testset "array _deleteatend!" begin
 
 A = Vector{Union{Missing, UInt8}}(undef, 5)
-Base.arrayset(true, A, 0x01, 1)
-Base.arrayset(true, A, missing, 2)
-Base.arrayset(true, A, 0x03, 3)
-Base.arrayset(true, A, missing, 4)
-Base.arrayset(true, A, 0x05, 5)
+setindex!(A, 0x01, 1)
+setindex!(A, missing, 2)
+setindex!(A, 0x03, 3)
+setindex!(A, missing, 4)
+setindex!(A, 0x05, 5)
 Base._deleteat!(A, 5, 1)
 
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === 0x03
-@test Base.arrayref(true, A, 4) === missing
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === 0x03
+@test getindex(A, 4) === missing
 
 Base._deleteat!(A, 3, 1)
-@test Base.arrayref(true, A, 1) === 0x01
-@test Base.arrayref(true, A, 2) === missing
-@test Base.arrayref(true, A, 3) === missing
+@test getindex(A, 1) === 0x01
+@test getindex(A, 2) === missing
+@test getindex(A, 3) === missing
 
 end # @testset
 
@@ -6672,23 +6723,23 @@ end
 
 # jl_array_shrink
 let A=Vector{Union{UInt8, Missing}}(undef, 1048577)
-    Base.arrayset(true, A, 0x01, 1)
-    Base.arrayset(true, A, missing, 2)
-    Base.arrayset(true, A, 0x03, 3)
-    Base.arrayset(true, A, missing, 4)
-    Base.arrayset(true, A, 0x05, 5)
+    setindex!(A, 0x01, 1)
+    setindex!(A, missing, 2)
+    setindex!(A, 0x03, 3)
+    setindex!(A, missing, 4)
+    setindex!(A, 0x05, 5)
     deleteat!(A, 6:1048577)
-    @test Base.arrayref(true, A, 1) === 0x01
-    @test Base.arrayref(true, A, 2) === missing
-    @test Base.arrayref(true, A, 3) === 0x03
-    @test Base.arrayref(true, A, 4) === missing
-    @test Base.arrayref(true, A, 5) === 0x05
+    @test getindex(A, 1) === 0x01
+    @test getindex(A, 2) === missing
+    @test getindex(A, 3) === 0x03
+    @test getindex(A, 4) === missing
+    @test getindex(A, 5) === 0x05
     sizehint!(A, 5)
-    @test Base.arrayref(true, A, 1) === 0x01
-    @test Base.arrayref(true, A, 2) === missing
-    @test Base.arrayref(true, A, 3) === 0x03
-    @test Base.arrayref(true, A, 4) === missing
-    @test Base.arrayref(true, A, 5) === 0x05
+    @test getindex(A, 1) === 0x01
+    @test getindex(A, 2) === missing
+    @test getindex(A, 3) === 0x03
+    @test getindex(A, 4) === missing
+    @test getindex(A, 5) === 0x05
 end
 
 # copyto!/vcat w/ internal padding
@@ -6706,14 +6757,14 @@ primitive type TypeWith24Bits 24 end
 TypeWith24Bits(x::UInt32) = Core.Intrinsics.trunc_int(TypeWith24Bits, x)
 let x = TypeWith24Bits(0x112233), y = TypeWith24Bits(0x445566), z = TypeWith24Bits(0x778899)
     a = [x, x]
-    Core.arrayset(true, a, y, 2)
+    Core.memoryrefset!(Core.memoryref(a.ref, 2, true), y, :not_atomic, true)
     @test a == [x, y]
     a[2] = z
     @test a == [x, z]
     @test pointer(a, 2) - pointer(a, 1) == 4
 
     b = [(x, x), (x, x)]
-    Core.arrayset(true, b, (x, y), 2)
+    Core.memoryrefset!(Core.memoryref(b.ref, 2, true), (x, y), :not_atomic, true)
     @test b == [(x, x), (x, y)]
     b[2] = (y, z)
     @test b == [(x, x), (y, z)]
@@ -6815,7 +6866,7 @@ end
 # issue #21004
 const PTuple_21004{N,T} = NTuple{N,VecElement{T}}
 @test_throws ArgumentError("too few elements for tuple type $PTuple_21004") PTuple_21004(1)
-@test_throws UndefVarError(:T) PTuple_21004_2{N,T} = NTuple{N, VecElement{T}}(1)
+@test_throws UndefVarError(:T, :static_parameter) PTuple_21004_2{N,T} = NTuple{N, VecElement{T}}(1)
 
 #issue #22792
 foo_22792(::Type{<:Union{Int8,Int,UInt}}) = 1;
@@ -6910,9 +6961,9 @@ g27209(x) = f27209(x ? nothing : 1.0)
 # Issue 27240
 @inline function foo27240()
     if rand(Bool)
-        return foo_nonexistant_27240
+        return foo_nonexistent_27240
     else
-        return bar_nonexistant_27240
+        return bar_nonexistent_27240
     end
 end
 bar27240() = foo27240()
@@ -7113,7 +7164,7 @@ end
 c28399 = 42
 @test g28399(0)() == 42
 @test g28399(1)() == 42
-@test_throws UndefVarError(:__undef_28399__) f28399()
+@test_throws UndefVarError(:__undef_28399__, @__MODULE__) f28399()
 
 # issue #28445
 mutable struct foo28445
@@ -7264,11 +7315,11 @@ struct sparse_t31649
 end
 Base.convert(::Any, v::sparse_t31649) = copy(v.val)
 let spvec = sparse_t31649(zeros(Float64,5), Vector{Int64}())
-    @test_throws MethodError repr(spvec)
+    @test_throws MethodError convert(Any, spvec)
     # Try manually putting the problematic method into the cache (in
     # the original issue compiling the showerror method caused this to happen)
     @test convert(Any, nothing) === nothing
-    @test_throws MethodError repr(spvec)
+    @test_throws MethodError convert(Any, spvec)
 end
 
 # Issue #31062 - Accidental recursion in jl_has_concrete_subtype
@@ -7306,18 +7357,28 @@ let fc = FieldConvert(1.0, [2.0], 0x3, 0x4, 0x5)
 end
 @test ftype_eval[] == 1
 let code = code_lowered(FieldConvert)[1].code
-    @test code[1] == Expr(:call, GlobalRef(Core, :apply_type), GlobalRef(@__MODULE__, :FieldConvert), GlobalRef(@__MODULE__, :FieldTypeA), Expr(:static_parameter, 1))
-    @test code[2] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 1)
-    @test code[3] == Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(2), Core.SlotNumber(2))
-    @test code[4] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 2)
-    @test code[5] == Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(4), Core.SlotNumber(3))
-    @test code[6] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 4)
-    @test code[7] == Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(6), Core.SlotNumber(5))
-    @test code[8] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(1), 5)
-    @test code[9] == Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(8), Core.SlotNumber(6))
-    @test code[10] == Expr(:new, Core.SSAValue(1), Core.SSAValue(3), Core.SSAValue(5), Core.SlotNumber(4), Core.SSAValue(7), Core.SSAValue(9))
-    @test code[11] == Core.ReturnNode(Core.SSAValue(10))
- end
+    local fc_global_ssa, sp1_ssa, apply_type_ssa, field_type_ssa,
+        field_type2_ssa, field_type4_ssa, field_type5_ssa,
+        slot_read_1, slot_read_2, slot_read_3, slot_read_4,
+        new_ssa
+    @test code[(fc_global_ssa = 1;)] == GlobalRef(@__MODULE__, :FieldConvert)
+    @test code[(sp1_ssa = 2;)] == Expr(:static_parameter, 1)
+    @test code[(apply_type_ssa = 3;)] == Expr(:call, GlobalRef(Core, :apply_type), Core.SSAValue(fc_global_ssa), GlobalRef(@__MODULE__, :FieldTypeA), Core.SSAValue(sp1_ssa))
+    @test code[(field_type_ssa = 4;)] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(apply_type_ssa), 1)
+    @test code[10] == Expr(:(=), Core.SlotNumber(10), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(field_type_ssa), Core.SlotNumber(10)))
+    @test code[(slot_read_1 = 11;)] == Core.SlotNumber(10)
+    @test code[(field_type2_ssa = 12;)] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(apply_type_ssa), 2)
+    @test code[18] == Expr(:(=), Core.SlotNumber(9), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(field_type2_ssa), Core.SlotNumber(9)))
+    @test code[(slot_read_2 = 19;)] == Core.SlotNumber(9)
+    @test code[(field_type4_ssa = 20;)] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(apply_type_ssa), 4)
+    @test code[26] == Expr(:(=), Core.SlotNumber(8), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(field_type4_ssa), Core.SlotNumber(8)))
+    @test code[(slot_read_3 = 27;)] == Core.SlotNumber(8)
+    @test code[(field_type5_ssa = 28;)] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(apply_type_ssa), 5)
+    @test code[34] == Expr(:(=), Core.SlotNumber(7), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(field_type5_ssa), Core.SlotNumber(7)))
+    @test code[(slot_read_4 = 35;)] == Core.SlotNumber(7)
+    @test code[(new_ssa = 36;)] == Expr(:new, Core.SSAValue(apply_type_ssa), Core.SSAValue(slot_read_1), Core.SSAValue(slot_read_2), Core.SlotNumber(4), Core.SSAValue(slot_read_3), Core.SSAValue(slot_read_4))
+    @test code[37] == Core.ReturnNode(Core.SSAValue(new_ssa))
+end
 
 # Issue #32820
 function f32820(refs)
@@ -7428,16 +7489,11 @@ function f34482()
     Base.not_int("ABC")
     1
 end
-function g34482()
-    Core.Intrinsics.arraylen(1)
-    1
-end
 function h34482()
     Core.Intrinsics.bitcast(1, 1)
     1
 end
 @test_throws ErrorException f34482()
-@test_throws TypeError g34482()
 @test_throws TypeError h34482()
 
 struct NFANode34126
@@ -7475,6 +7531,19 @@ end
 struct T36104   # check that redefining it works, issue #21816
     v::Vector{T36104}
 end
+struct S36104{K,V}
+    v::S36104{K,V}
+    S36104{K,V}() where {K,V} = new()
+    S36104{K,V}(x::S36104) where {K,V} = new(x)
+end
+@test !isdefined(Base.unwrap_unionall(Base.ImmutableDict).name, :partial)
+@test !isdefined(S36104.body.body.name, :partial)
+@test hasfield(typeof(S36104.body.body.name), :partial)
+struct S36104{K,V}   # check that redefining it works
+    v::S36104{K,V}
+    S36104{K,V}() where {K,V} = new()
+    S36104{K,V}(x::S36104) where {K,V} = new(x)
+end
 # with a gensymmed unionall
 struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
     data::S
@@ -7497,7 +7566,7 @@ end
 struct X36104; x::Int; end
 @test fieldtypes(X36104) == (Int,)
 primitive type P36104 8 end
-@test_throws ErrorException("invalid redefinition of constant P36104") @eval(primitive type P36104 16 end)
+@test_throws ErrorException("invalid redefinition of constant $(nameof(curmod)).P36104") @eval(primitive type P36104 16 end)
 
 # Malformed invoke
 f_bad_invoke(x::Int) = invoke(x, (Any,), x)
@@ -7808,3 +7877,209 @@ end
 import .Foo45350: x45350
 f45350() = (global x45350 = 2)
 @test_throws ErrorException f45350()
+
+@testset "Error behavior of unsafe_convert for RefValue" begin
+    b = Base.RefValue{Int}()
+    @test Base.unsafe_convert(Ptr{Int}, b) !== C_NULL
+    b = Base.RefValue{Base.RefValue{Int}}()
+    # throws because we hit `b.x`
+    @test_throws Core.UndefRefError Base.unsafe_convert(Ptr{Base.RefValue{Int}}, b)
+    # throws because we hit `b.x`
+    b = Base.RefValue{Integer}()
+    @test_throws Core.UndefRefError Base.unsafe_convert(Ptr{Integer}, b)
+end
+
+# #46503 - redefine `invoke`d methods
+foo46503(@nospecialize(a), b::Union{Vector{Any}, Float64, Nothing}) = rand()
+foo46503(a::Int, b::Nothing) = @invoke foo46503(a::Any, b)
+@test 0 <= foo46503(1, nothing) <= 1
+foo46503(@nospecialize(a), b::Union{Nothing, Float64}) = rand() + 10
+@test 10 <= foo46503(1, nothing) <= 11
+
+@testset "effect override on Symbol(::String)" begin
+    @test Core.Compiler.is_foldable(Base.infer_effects(Symbol, (String,)))
+end
+
+@testset "error message for getfield with bad integer type" begin
+    @test_throws "expected Union{$Int, Symbol}" getfield((1,2), Int8(1))
+end
+
+# Correct isdefined error for isdefined of Module of Int fld
+f_isdefined_one(@nospecialize(x)) = isdefined(x, 1)
+@test (try; f_isdefined_one(@__MODULE__); catch err; err; end).got === 1
+
+# Unspecialized retrieval of vararg length
+fvarargN(x::Tuple{Vararg{Int, N}}) where {N} = N
+fvarargN(args...) = fvarargN(args)
+finvokevarargN() = Base.inferencebarrier(fvarargN)(1, 2, 3)
+@test finvokevarargN() == 3
+
+# Make sure that @specialize actually overrides a module annotation
+module SpecializeModuleTest
+    @nospecialize
+    f(@specialize(x), y) = 2
+    @specialize
+end
+@test methods(SpecializeModuleTest.f)[1].nospecialize & 0b11 == 0b10
+
+let # https://github.com/JuliaLang/julia/issues/46918
+    # jl_get_binding_type shouldn't be unstable
+    code = quote
+        res1 = ccall(:jl_get_binding_type, Any, (Any, Any), Main, :stderr)
+
+        stderr
+
+        res2 = ccall(:jl_get_binding_type, Any, (Any, Any), Main, :stderr)
+
+        res3 = ccall(:jl_get_binding_type, Any, (Any, Any), Main, :stderr)
+
+        print(stdout, res1, " ", res2, " ", res3)
+    end |> x->join(x.args, ';')
+    cmd = `$(Base.julia_cmd()) -e $code` # N.B make sure not to pass this code as `:block`
+    stdout = IOBuffer()
+    stderr = IOBuffer()
+    @test success(pipeline(Cmd(cmd); stdout, stderr))
+    @test isempty(String(take!(stderr))) # make sure no error has happened
+    @test String(take!(stdout)) == "nothing IO IO"
+end
+
+# Modules allowed as type parameters and usable in generated functions
+module ModTparamTest
+    foo_test_mod_tparam() = 1
+end
+foo_test_mod_tparam() = 2
+
+struct ModTparamTestStruct{M}; end
+@generated function ModTparamTestStruct{M}() where {M}
+    return :($(GlobalRef(M, :foo_test_mod_tparam))())
+end
+@test ModTparamTestStruct{@__MODULE__}() == 2
+@test ModTparamTestStruct{ModTparamTest}() == 1
+
+# issue #47476
+f47476(::Union{Int, NTuple{N,Int}}...) where {N} = N
+# force it to populate the MethodInstance specializations cache
+# with the correct sparams
+code_typed(f47476, (Vararg{Union{Int, NTuple{2,Int}}},));
+code_typed(f47476, (Int, Vararg{Union{Int, NTuple{2,Int}}},));
+code_typed(f47476, (Int, Int, Vararg{Union{Int, NTuple{2,Int}}},))
+code_typed(f47476, (Int, Int, Int, Vararg{Union{Int, NTuple{2,Int}}},))
+code_typed(f47476, (Int, Int, Int, Int, Vararg{Union{Int, NTuple{2,Int}}},))
+@test f47476(1, 2, 3, 4, 5, 6, (7, 8)) === 2
+@test_throws UndefVarError(:N, :static_parameter) f47476(1, 2, 3, 4, 5, 6, 7)
+
+vect47476(::Type{T}) where {T} = T
+@test vect47476(Type{Type{Type{Int32}}}) === Type{Type{Type{Int32}}}
+@test vect47476(Type{Type{Type{Int64}}}) === Type{Type{Type{Int64}}}
+
+g47476(::Union{Nothing,Int,Val{T}}...) where {T} = T
+@test_throws UndefVarError(:T, :static_parameter) g47476(nothing, 1, nothing, 2, nothing, 3, nothing, 4, nothing, 5)
+@test g47476(nothing, 1, nothing, 2, nothing, 3, nothing, 4, nothing, 5, Val(6)) === 6
+let spec = only(methods(g47476)).specializations::Core.SimpleVector
+    @test !isempty(spec)
+    @test any(mi -> mi !== nothing && Base.isvatuple(mi.specTypes), spec)
+    @test all(mi -> mi === nothing || !Base.has_free_typevars(mi.specTypes), spec)
+end
+
+f48950(::Union{Int,d}, ::Union{c,Nothing}...) where {c,d} = 1
+@test f48950(1, 1, 1) == 1
+
+# Module as tparam in unionall
+struct ModTParamUnionAll{A, B}; end
+@test isa(objectid(ModTParamUnionAll{Base}), UInt)
+
+# effects for objectid
+for T in (Int, String, Symbol, Module)
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (T,)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(hash, (T,)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Some{T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(hash, (Some{T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Some{Some{T}},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(hash, (Some{Some{T}},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(hash, (Tuple{T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{T,T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(hash, (Tuple{T,T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Ref{T},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{Ref{T}},)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{Vector{T}},)))
+end
+@test Core.Compiler.is_foldable(Base.infer_effects(objectid, (DataType,)))
+
+# donotdelete should not taint consistency of the containing function
+f_donotdete(x) = (Core.Compiler.donotdelete(x); 1)
+@test Core.Compiler.is_consistent(Base.infer_effects(f_donotdete, (Tuple{Float64},)))
+
+# Test conditional UndefRefError (#50250)
+struct Foo50250
+    a::Int
+    x
+    Foo50250(a) = new()
+    Foo50250(a, x) = new(x)
+end
+
+struct Bar50250
+    a::Int
+    x
+    Bar50250(a) = new(a)
+    Bar50250(a, x) = new(a, x)
+end
+
+foo50250(b, y) = (b ? Foo50250(y, y) : Foo50250(y)).x
+bar50250(b, y) = (b ? Bar50250(y, y) : Bar50250(y)).x
+
+@test_throws UndefRefError foo50250(true, 1)
+@test_throws UndefRefError foo50250(false, 1)
+@test bar50250(true, 1) === 1
+@test_throws UndefRefError bar50250(false, 1)
+
+# Test that Type{typeof(Union{})} doesn't get codegen'ed as a constant (#50293)
+baz50293(x::Union{Type, Core.Const}) = Base.issingletontype(x)
+bar50293(@nospecialize(u)) = (Base.issingletontype(u.a), baz50293(u.a))
+let u = Union{Type{Union{}}, Type{Any}}, ab = bar50293(u)
+    @test ab[1] == ab[2] == false
+end
+
+# `SimpleVector`-operations should be concrete-eval eligible
+@test Core.Compiler.is_foldable(Base.infer_effects(length, (Core.SimpleVector,)))
+@test Core.Compiler.is_foldable(Base.infer_effects(getindex, (Core.SimpleVector,Int)))
+
+let lin = Core.LineInfoNode(Base, first(methods(convert)), :foo, Int32(5), Int32(0))
+    @test convert(LineNumberNode, lin) == LineNumberNode(5, :foo)
+end
+
+# Test that a nothrow-globalref doesn't get outlined during lowering
+module WellKnownGlobal
+    global well_known = 1
+end
+macro insert_global()
+    Expr(:call, GlobalRef(Base, :println), GlobalRef(WellKnownGlobal, :well_known))
+end
+check_globalref_lowering() = @insert_global
+let src = code_lowered(check_globalref_lowering)[1]
+    @test length(src.code) == 2
+end
+
+# Test correctness of widen_diagonal
+let widen_diagonal(x::UnionAll) = Base.rewrap_unionall(Base.widen_diagonal(Base.unwrap_unionall(x), x), x),
+    check_widen_diagonal(x, y) = !<:(x, y) && x <: widen_diagonal(y)
+    @test Tuple{Int,Float64} <: widen_diagonal(NTuple)
+    @test Tuple{Int,Float64} <: widen_diagonal(Tuple{T,T} where {T})
+    @test Tuple{Real,Int,Float64} <: widen_diagonal(Tuple{S,Vararg{T}} where {S, T<:S})
+    @test Tuple{Int,Int,Float64,Float64} <: widen_diagonal(Tuple{S,S,Vararg{T}} where {S, T<:S})
+    @test Union{Tuple{T}, Tuple{T,Int}} where {T} === widen_diagonal(Union{Tuple{T}, Tuple{T,Int}} where {T})
+    @test Tuple === widen_diagonal(Union{Tuple{Vararg{S}}, Tuple{Vararg{T}}} where {S, T})
+end
+
+# Test try/catch/else ordering
+function test_try_catch_else()
+    local x
+    try
+        x = 1
+    catch
+        rethrow()
+    else
+        return x
+    end
+end
+@test test_try_catch_else() == 1
diff --git a/test/corelogging.jl b/test/corelogging.jl
index 1b1254e78b3d6..778e70aecd406 100644
--- a/test/corelogging.jl
+++ b/test/corelogging.jl
@@ -1,5 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# Make a copy of the original environment
+original_env = copy(ENV)
+
 using Test, Base.CoreLogging
 import Base.CoreLogging: BelowMinLevel, Debug, Info, Warn, Error,
     handle_message, shouldlog, min_enabled_level, catch_exceptions
@@ -100,12 +103,12 @@ end
         logmsg = (function() @info msg x=y end,
                   function() @info msg x=y z=1+1 end)[i]
         @test_logs (Error, Test.Ignored(), Test.Ignored(), :logevent_error) catch_exceptions=true logmsg()
-        @test_throws UndefVarError(:msg) collect_test_logs(logmsg)
-        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:msg)
+        @test_throws UndefVarError(:msg, :local) collect_test_logs(logmsg)
+        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:msg, :local)
         msg = "the msg"
         @test_logs (Error, Test.Ignored(), Test.Ignored(), :logevent_error) catch_exceptions=true logmsg()
-        @test_throws UndefVarError(:y) collect_test_logs(logmsg)
-        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:y)
+        @test_throws UndefVarError(:y, :local) collect_test_logs(logmsg)
+        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:y, :local)
         y = "the y"
         @test_logs (Info,"the msg") logmsg()
         @test only(collect_test_logs(logmsg)[1]).kwargs[:x] === "the y"
@@ -120,8 +123,8 @@ end
     @test length(logger.logs) == 1
     record = logger.logs[1]
     @test record._module == Base.Core
-    @test record.group == :somegroup
-    @test record.id == :asdf
+    @test record.group === :somegroup
+    @test record.id === :asdf
     @test record.file == "/a/file"
     @test record.line == -10
     # Test consistency with shouldlog() function arguments
@@ -435,7 +438,7 @@ end
     (record,), _ = collect_test_logs() do
         @info "test"
     end
-    @test record.group == :corelogging  # name of this file
+    @test record.group === :corelogging  # name of this file
 end
 
 @testset "complicated kwargs logging macro" begin
@@ -454,3 +457,13 @@ end
 end
 
 end
+
+# Restore the original environment
+for k in keys(ENV)
+    if !haskey(original_env, k)
+        delete!(ENV, k)
+    end
+end
+for (k, v) in pairs(original_env)
+    ENV[k] = v
+end
diff --git a/test/deprecation_exec.jl b/test/deprecation_exec.jl
index 5a120f8e2ee76..61ffcc2a59ac6 100644
--- a/test/deprecation_exec.jl
+++ b/test/deprecation_exec.jl
@@ -8,8 +8,6 @@
 using Test
 using Logging
 
-using Base: remove_linenums!
-
 module DeprecationTests # to test @deprecate
     f() = true
 
@@ -36,6 +34,11 @@ module DeprecationTests # to test @deprecate
     # test that @deprecate_moved can be overridden by an import
     Base.@deprecate_moved foo1234 "Foo"
     Base.@deprecate_moved bar "Bar" false
+
+    # test that positional and keyword arguments are forwarded when
+    # there is no explicit type annotation
+    new_return_args(args...; kwargs...) = args, NamedTuple(kwargs)
+    @deprecate old_return_args new_return_args
 end # module
 module Foo1234
     export foo1234
@@ -108,6 +111,11 @@ begin # @deprecate
         T21972()
     end
     @test_deprecated "something" f21972()
+
+    # test that positional and keyword arguments are forwarded when
+    # there is no explicit type annotation
+    @test_logs (:warn,) @test DeprecationTests.old_return_args(1, 2, 3) == ((1, 2, 3),(;))
+    @test_logs (:warn,) @test DeprecationTests.old_return_args(1, 2, 3; a = 4, b = 5) == ((1, 2, 3), (a = 4, b = 5))
 end
 
 f24658() = depwarn24658()
@@ -132,7 +140,7 @@ f25130()
 testlogs = testlogger.logs
 @test length(testlogs) == 2
 @test testlogs[1].id != testlogs[2].id
-@test testlogs[1].kwargs[:caller].func == Symbol("top-level scope")
+@test testlogs[1].kwargs[:caller].func === Symbol("top-level scope")
 @test all(l.message == "f25130 message" for l in testlogs)
 global_logger(prev_logger)
 
@@ -157,7 +165,7 @@ begin # tuple indexed by float deprecation
     @test_throws Exception @test_warn r"`getindex(t::Tuple, i::Real)` is deprecated" getindex((1,2), -1.0)
 end
 
-@testset "@deprecated error message" begin
+begin #@deprecated error message
     @test_throws(
         "if the third `export_old` argument is not specified or `true`,",
         @eval @deprecate M.f() g()
@@ -171,7 +179,7 @@ end
     # `Old{T}(args...) where {...} = new(args...)` or
     # `(Old{T} where {...})(args...) = new(args...)`.
     # Since nobody has requested this feature yet, make sure that it throws, until we
-    # conciously define
+    # consciously define
     @test_throws(
         "invalid usage of @deprecate",
         @eval @deprecate Foo{T} where {T <: Int} g true
diff --git a/test/dict.jl b/test/dict.jl
index 9695877f44028..f00c7c4747672 100644
--- a/test/dict.jl
+++ b/test/dict.jl
@@ -196,7 +196,7 @@ end
     bestkey(d, key) = key
     bestkey(d::AbstractDict{K,V}, key) where {K<:AbstractString,V} = string(key)
     bar(x) = bestkey(x, :y)
-    @test bar(Dict(:x => [1,2,5])) == :y
+    @test bar(Dict(:x => [1,2,5])) === :y
     @test bar(Dict("x" => [1,2,5])) == "y"
 end
 
@@ -369,23 +369,107 @@ end
 end
 
 
-struct RainBowString
+struct RainbowString
     s::String
-end
-
-function Base.show(io::IO, rbs::RainBowString)
-    for s in rbs.s
-        _, color = rand(Base.text_colors)
-        print(io, color, s, "\e[0m")
+    bold::Bool
+    other::Bool
+    valid::Bool
+    offset::Int
+end
+RainbowString(s, bold=false, other=false, valid=true) = RainbowString(s, bold, other, valid, 0)
+
+function Base.show(io::IO, rbs::RainbowString)
+    for (i, s) in enumerate(rbs.s)
+        if i ≤ rbs.offset
+            print(io, s)
+            continue
+        end
+        color = rbs.other ? string("\033[4", rand(1:7), 'm') : Base.text_colors[rand(0:255)]
+        if rbs.bold
+            printstyled(io, color, s; bold=true)
+        else
+            print(io, color, s)
+        end
+        if rbs.valid
+            print(io, '\033', '[', rbs.other ? "0" : "39", 'm')  # end of color marker
+        end
     end
 end
 
 @testset "Display with colors" begin
-    d = Dict([randstring(8) => [RainBowString(randstring(8)) for i in 1:10] for j in 1:5]...)
+    d = Dict([randstring(8) => [RainbowString(randstring(8)) for i in 1:10] for j in 1:5]...)
     str = sprint(io -> show(io, MIME("text/plain"), d); context = (:displaysize=>(30,80), :color=>true, :limit=>true))
     lines = split(str, '\n')
-    @test all(endswith('…'), lines[2:end])
+    @test all(endswith("\033[0m…"), lines[2:end])
     @test all(x -> length(x) > 100, lines[2:end])
+
+    d2 = Dict(:foo => RainbowString("bar"))
+    str2 = sprint(io -> show(io, MIME("text/plain"), d2); context = (:displaysize=>(30,80), :color=>true, :limit=>true))
+    @test !occursin('…', str2)
+    @test endswith(str2, "\033[0m")
+
+    d3 = Dict(:foo => RainbowString("bar", true))
+    str3 = sprint(io -> show(io, MIME("text/plain"), d3); context = (:displaysize=>(30,80), :color=>true, :limit=>true))
+    @test !occursin('…', str3)
+    @test endswith(str3, "\033[0m")
+
+    d4 = Dict(RainbowString(randstring(8), true) => nothing)
+    str4 = sprint(io -> show(io, MIME("text/plain"), d4); context = (:displaysize=>(30,20), :color=>true, :limit=>true))
+    @test endswith(str4, "\033[0m… => nothing")
+
+    d5 = Dict(RainbowString(randstring(30), false, true, false) => nothing)
+    str5 = sprint(io -> show(io, MIME("text/plain"), d5); context = (:displaysize=>(30,30), :color=>true, :limit=>true))
+    @test endswith(str5, "\033[0m… => nothing")
+
+    d6 = Dict(randstring(8) => RainbowString(randstring(30), true, true, false) for _ in 1:3)
+    str6 = sprint(io -> show(io, MIME("text/plain"), d6); context = (:displaysize=>(30,30), :color=>true, :limit=>true))
+    lines6 = split(str6, '\n')
+    @test all(endswith("\033[0m…"), lines6[2:end])
+    @test all(x -> length(x) > 100, lines6[2:end])
+    str6_long = sprint(io -> show(io, MIME("text/plain"), d6); context = (:displaysize=>(30,80), :color=>true, :limit=>true))
+    lines6_long = split(str6_long, '\n')
+    @test all(endswith("\033[0m"), lines6_long[2:end])
+
+    d7 = Dict(randstring(8) => RainbowString(randstring(30)))
+    str7 = sprint(io -> show(io, MIME("text/plain"), d7); context = (:displaysize=>(30,20), :color=>true, :limit=>true))
+    line7 = split(str7, '\n')[2]
+    @test endswith(line7, "\033[0m…")
+    @test length(line7) > 100
+
+    d8 = Dict(:x => RainbowString(randstring(10), false, false, false, 6))
+    str8 = sprint(io -> show(io, MIME("text/plain"), d8); context = (:displaysize=>(30,14), :color=>true, :limit=>true))
+    line8 = split(str8, '\n')[2]
+    @test !occursin("\033[", line8)
+    @test length(line8) == 14
+    str8_long = sprint(io -> show(io, MIME("text/plain"), d8); context = (:displaysize=>(30,16), :color=>true, :limit=>true))
+    line8_long = split(str8_long, '\n')[2]
+    @test endswith(line8_long, "\033[0m…")
+    @test length(line8_long) > 20
+
+    d9 = Dict(:x => RainbowString(repeat('苹', 5), false, true, false))
+    str9 = sprint(io -> show(io, MIME("text/plain"), d9); context = (:displaysize=>(30,15), :color=>true, :limit=>true))
+    @test endswith(str9, "\033[0m…")
+    @test count('苹', str9) == 3
+
+    d10 = Dict(:xy => RainbowString(repeat('苹', 5), false, true, false))
+    str10 = sprint(io -> show(io, MIME("text/plain"), d10); context = (:displaysize=>(30,15), :color=>true, :limit=>true))
+    @test endswith(str10, "\033[0m…")
+    @test count('苹', str10) == 2
+
+    d11 = Dict(RainbowString("abcdefgh", false, true, false) => 0, "123456" => 1)
+    str11 = sprint(io -> show(io, MIME("text/plain"), d11); context = (:displaysize=>(30,80), :color=>true, :limit=>true))
+    _, line11_a, line11_b = split(str11, '\n')
+    @test endswith(line11_a, "h\033[0m => 0") || endswith(line11_b, "h\033[0m => 0")
+    @test endswith(line11_a, "6\" => 1") || endswith(line11_b, "6\" => 1")
+
+    d12 = Dict(RainbowString(repeat(Char(48+i), 4), (i&1)==1, (i&2)==2, (i&4)==4) => i for i in 1:8)
+    str12 = sprint(io -> show(io, MIME("text/plain"), d12); context = (:displaysize=>(30,80), :color=>true, :limit=>true))
+    @test !occursin('…', str12)
+
+    d13 = Dict(RainbowString("foo\nbar") => 74)
+    str13 = sprint(io -> show(io, MIME("text/plain"), d13); context = (:displaysize=>(30,80), :color=>true, :limit=>true))
+    @test count('\n', str13) == 1
+    @test occursin('…', str13)
 end
 
 @testset "Issue #15739" begin # Compact REPL printouts of an `AbstractDict` use brackets when appropriate
@@ -555,13 +639,13 @@ end
     @test d == IdDict(1=>1, 2=>2, 3=>3)
     @test eltype(d) == Pair{Int,Int}
     @test_throws KeyError d[:a]
-    @test_throws ArgumentError d[:a] = 1
+    @test_throws TypeError d[:a] = 1
     @test_throws MethodError d[1] = :a
 
     # copy constructor
     d = IdDict(Pair(1,1), Pair(2,2), Pair(3,3))
     @test collect(values(IdDict{Int,Float64}(d))) == collect(values(d))
-    @test_throws ArgumentError IdDict{Float64,Int}(d)
+    @test_throws TypeError IdDict{Float64,Int}(d)
 
     # misc constructors
     @test typeof(IdDict(1=>1, :a=>2)) == IdDict{Any,Int}
@@ -588,7 +672,7 @@ end
     @test_throws MethodError get!(d, "b", "b")
     @test delete!(d, "a") === d
     @test !haskey(d, "a")
-    @test_throws ArgumentError get!(IdDict{Symbol,Any}(), 2, "b")
+    @test_throws TypeError get!(IdDict{Symbol,Any}(), 2, "b")
     @test get!(IdDict{Int,Int}(), 1, 2.0) === 2
     @test get!(()->2.0, IdDict{Int,Int}(), 1) === 2
 
@@ -1000,6 +1084,119 @@ Dict(1 => rand(2,3), 'c' => "asdf") # just make sure this does not trigger a dep
     GC.@preserve A B C D nothing
 end
 
+import Base.PersistentDict
+@testset "PersistentDict" begin
+    @testset "HAMT HashState" begin
+        key = :key
+        h = Base.HAMT.HashState(key)
+        h1 = Base.HAMT.HashState(key, objectid(key), 0, 0)
+        h2 = Base.HAMT.HashState(h, key) # reconstruct
+        @test h.hash == h1.hash
+        @test h.hash == h2.hash
+
+        hs = Base.HAMT.next(h1)
+        @test hs.depth == 1
+        recompute_depth = (Base.HAMT.MAX_SHIFT ÷ Base.HAMT.BITS_PER_LEVEL) + 1
+        for i in 2:recompute_depth
+            hs = Base.HAMT.next(hs)
+            @test hs.depth == i
+        end
+        @test hs.depth == recompute_depth
+        @test hs.shift == 0
+        hsr = Base.HAMT.HashState(hs, key)
+        @test hs.hash == hsr.hash
+        @test hs.depth == hsr.depth
+        @test hs.shift == hsr.shift
+
+        @test Core.Compiler.is_removable_if_unused(Base.infer_effects(Base.HAMT.init_hamt, (Type{Vector{Any}},Type{Int},Vector{Any},Int)))
+        @test Core.Compiler.is_removable_if_unused(Base.infer_effects(Base.HAMT.HAMT{Vector{Any},Int}, (Pair{Vector{Any},Int},)))
+    end
+    @testset "basics" begin
+        dict = PersistentDict{Int, Int}()
+        @test_throws KeyError dict[1]
+        @test length(dict) == 0
+        @test isempty(dict)
+
+        dict = PersistentDict{Int, Int}(1=>2.0)
+        @test dict[1] == 2
+
+        dict = PersistentDict(1=>2)
+        @test dict[1] == 2
+
+        dict = PersistentDict(dict, 1=>3.0)
+        @test dict[1] == 3
+
+        dict = PersistentDict(dict, 1, 1)
+        @test dict[1] == 1
+        @test get(dict, 2, 1) == 1
+        @test get(()->1, dict, 2) == 1
+
+        @test (1 => 1) ∈ dict
+        @test (1 => 2) ∉ dict
+        @test (2 => 1) ∉ dict
+
+        @test haskey(dict, 1)
+        @test !haskey(dict, 2)
+
+        dict2 = PersistentDict{Int, Int}(dict, 1=>2)
+        @test dict[1] == 1
+        @test dict2[1] == 2
+
+        dict3 = Base.delete(dict2, 1)
+        @test_throws KeyError dict3[1]
+        @test dict3 == Base.delete(dict3, 1)
+        @test dict3.trie != Base.delete(dict3, 1).trie
+
+        dict = PersistentDict(dict, 1, 3)
+        @test dict[1] == 3
+        @test dict2[1] == 2
+
+        @test length(dict) == 1
+        @test length(dict2) == 1
+
+        dict = PersistentDict(1=>2, 2=>3, 4=>1)
+        @test eltype(dict) == Pair{Int, Int}
+        @test dict[1] == 2
+        @test dict[2] == 3
+        @test dict[4] == 1
+    end
+
+    @testset "objectid" begin
+        c = [0]
+        dict = PersistentDict{Any, Int}(c => 1, [1] => 2)
+        @test dict[c] == 1
+        c[1] = 1
+        @test dict[c] == 1
+
+        c[1] = 0
+        dict = PersistentDict{Any, Int}((c,) => 1, ([1],) => 2)
+        @test dict[(c,)] == 1
+
+        c[1] = 1
+        @test dict[(c,)] == 1
+    end
+
+    @testset "stress" begin
+        N = 2^14
+        dict = PersistentDict{Int, Int}()
+        for i in 1:N
+            dict = PersistentDict(dict, i, i)
+        end
+        @test length(dict) == N
+        length(collect(dict)) == N
+        values = sort!(collect(dict))
+        @test values[1] == (1=>1)
+        @test values[end] == (N=>N)
+
+        dict = Base.delete(dict, 16384)
+        @test !haskey(dict, 16384)
+        for i in 1:N
+            dict = Base.delete(dict, i)
+        end
+        @test isempty(dict)
+    end
+end
+
 @testset "issue #19995, hash of dicts" begin
     @test hash(Dict(Dict(1=>2) => 3, Dict(4=>5) => 6)) != hash(Dict(Dict(4=>5) => 3, Dict(1=>2) => 6))
     a = Dict(Dict(3 => 4, 2 => 3) => 2, Dict(1 => 2, 5 => 6) => 1)
@@ -1150,7 +1347,7 @@ end
     @test isempty(findall(isequal(1), Dict()))
     @test isempty(findall(isequal(1), Dict(:a=>2, :b=>3)))
 
-    @test findfirst(isequal(1), Dict(:a=>1, :b=>2)) == :a
+    @test findfirst(isequal(1), Dict(:a=>1, :b=>2)) === :a
     @test findfirst(isequal(1), Dict(:a=>1, :b=>1, :c=>3)) in (:a, :b)
     @test findfirst(isequal(1), Dict()) === nothing
     @test findfirst(isequal(1), Dict(:a=>2, :b=>3)) === nothing
@@ -1278,4 +1475,47 @@ end
     filter!(x -> x.first < 10, d)
     sizehint!(d, 10)
     @test length(d.slots) < 100
+    sizehint!(d, 1000)
+    sizehint!(d, 1; shrink = false)
+    @test length(d.slots) >= 1000
+    sizehint!(d, 1; shrink = true)
+    @test length(d.slots) < 1000
+end
+
+# getindex is :effect_free and :terminates but not :consistent
+for T in (Int, Float64, String, Symbol)
+    @testset let T=T
+        @test !Core.Compiler.is_consistent(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test_broken Core.Compiler.is_effect_free(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test !Core.Compiler.is_nothrow(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test_broken Core.Compiler.is_terminates(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+    end
+end
+
+struct BadHash
+    i::Int
+end
+Base.hash(::BadHash, ::UInt)=UInt(1)
+@testset "maxprobe reset #51595" begin
+    d = Dict(BadHash(i)=>nothing for i in 1:20)
+    empty!(d)
+    sizehint!(d, 0)
+    @test d.maxprobe < length(d.keys)
+    d[BadHash(1)]=nothing
+    @test !(BadHash(2) in keys(d))
+    d = Dict(BadHash(i)=>nothing for i in 1:20)
+    for _ in 1:20
+        pop!(d)
+    end
+    sizehint!(d, 0)
+    @test d.maxprobe < length(d.keys)
+    d[BadHash(1)]=nothing
+    @test !(BadHash(2) in keys(d))
+end
+
+# Issue #52066
+let d = Dict()
+    d[1] = 'a'
+    d[1.0] = 'b'
+    @test only(d) === Pair{Any,Any}(1.0, 'b')
 end
diff --git a/test/docs.jl b/test/docs.jl
index 762a481ee4801..1b26c9670e180 100644
--- a/test/docs.jl
+++ b/test/docs.jl
@@ -12,26 +12,19 @@ using InteractiveUtils: apropos
 include("testenv.jl")
 
 # Test helpers.
-function docstrings_equal(d1, d2)
+function docstrings_equal(d1, d2; debug=true)
     io1 = IOBuffer()
     io2 = IOBuffer()
     show(io1, MIME"text/markdown"(), d1)
     show(io2, MIME"text/markdown"(), d2)
     s1 = String(take!(io1))
     s2 = String(take!(io2))
-    #if s1 != s2 # for debugging
-    #    e1 = eachline(IOBuffer(s1))
-    #    e2 = eachline(IOBuffer(s2))
-    #    for (l1, l2) in zip(e1, e2)
-    #        l1 == l2 || println(l1, "\n", l2, "\n")
-    #    end
-    #    for l1 in e1
-    #        println(l1, "\n[missing]\n")
-    #    end
-    #    for l2 in e2
-    #        println("[missing]\n", l2, "\n")
-    #    end
-    #end
+    if debug && s1 != s2
+        print(s1)
+        println("--------------------------------------------------------------------------------")
+        print(s2)
+        println("================================================================================")
+    end
     return s1 == s2
 end
 docstrings_equal(d1::DocStr, d2) = docstrings_equal(parsedoc(d1), d2)
@@ -76,6 +69,13 @@ $$latex literal$$
 """
 function break_me_docs end
 
+
+# `hasdoc` returns `true` on a name with a docstring.
+@test Docs.hasdoc(Base, :map)
+# `hasdoc` returns `false` on a name without a docstring.
+@test !isdefined(Base, :_this_name_doesnt_exist_) && !Docs.hasdoc(Base, :_this_name_doesnt_exist_)
+@test isdefined(Base, :_typed_vcat) && !Docs.hasdoc(Base, :_typed_vcat)
+
 # issue #11548
 
 module ModuleMacroDoc
@@ -177,7 +177,7 @@ t(::AbstractString)
 "t-2"
 t(::Int, ::Any)
 "t-3"
-t{S <: Integer}(::S)
+t(::S) where {S <: Integer}
 
 # Docstrings to parametric methods after definition using where syntax (#32960):
 tw(x::T) where T = nothing
@@ -357,7 +357,7 @@ let d1 = @doc(DocsTest.t(::Int, ::Any)),
     @test docstrings_equal(d1,d2)
 end
 
-let d1 = @doc(DocsTest.t{S <: Integer}(::S)),
+let d1 = @doc(DocsTest.t(::S) where {S <: Integer}),
     d2 = doc"t-3"
     @test docstrings_equal(d1,d2)
 end
@@ -630,6 +630,7 @@ end
 
 let d = @doc(I15424.LazyHelp)
     @test repr("text/plain", d) == "LazyHelp\nLazyHelp(text)\n"
+    # (no internal warning is inserted for non-markdown content)
 end
 
 # Issue #13385.
@@ -649,15 +650,15 @@ macro m1_11993()
 end
 
 macro m2_11993()
-    Symbol("@m1_11993")
+    esc(Symbol("@m1_11993"))
 end
 
 @doc "This should document @m1... since its the result of expansion" @m2_11993
 @test (@doc @m1_11993) !== nothing
 let d = (@doc :@m2_11993),
-    macro_doc = Markdown.parse("`$(curmod_prefix)@m2_11993` is a macro.")
+    macro_doc = Markdown.parse("`$(curmod_prefix == "Main." ? "" : curmod_prefix)@m2_11993` is a macro.")
     @test docstring_startswith(d, doc"""
-    No documentation found.
+    No documentation found for private symbol.
 
     $macro_doc""")
 end
@@ -723,7 +724,7 @@ f12593_2() = 1
 
 # crude test to make sure we sort docstring output by method specificity
 @test !docstrings_equal(Docs.doc(getindex, Tuple{Dict{Int,Int},Int}),
-                        Docs.doc(getindex, Tuple{Type{Int64},Int}))
+                        Docs.doc(getindex, Tuple{Type{Int64},Int}); debug=false)
 
 # test that macro documentation works
 @test (@repl :@assert) !== nothing
@@ -794,7 +795,7 @@ end
 # Issue #13905.
 let err = try; @macroexpand(@doc "" f() = @x); false; catch ex; ex; end
     err::UndefVarError
-    @test err.var == Symbol("@x")
+    @test err.var === Symbol("@x")
  end
 
 
@@ -856,9 +857,9 @@ undocumented(x,y) = 3
 end # module
 
 doc_str = Markdown.parse("""
-No docstring or readme file found for module `$(curmod_prefix)Undocumented`.
+No docstring or readme file found for internal module `$(curmod_prefix)Undocumented`.
 
-# Exported names
+# Public names
 
 `A`, `B`, `C`, `at0`, `pt2`
 """)
@@ -872,7 +873,7 @@ Binding `$(curmod_prefix)Undocumented.bindingdoesnotexist` does not exist.
 @test docstrings_equal(@doc(Undocumented.bindingdoesnotexist), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public symbol.
 
 # Summary
 ```
@@ -888,7 +889,7 @@ $(curmod_prefix)Undocumented.C
 @test docstrings_equal(@doc(Undocumented.A), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public symbol.
 
 # Summary
 ```
@@ -908,7 +909,7 @@ $(curmod_prefix)Undocumented.B <: $(curmod_prefix)Undocumented.A <: Any
 @test docstrings_equal(@doc(Undocumented.B), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public symbol.
 
 # Summary
 ```
@@ -923,7 +924,7 @@ $(curmod_prefix)Undocumented.C <: $(curmod_prefix)Undocumented.A <: Any
 @test docstrings_equal(@doc(Undocumented.C), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 ```
@@ -945,7 +946,7 @@ $(curmod_prefix)Undocumented.D <: $(curmod_prefix)Undocumented.B <: $(curmod_pre
 @test docstrings_equal(@doc(Undocumented.D), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public symbol.
 
 # Summary
 
@@ -965,7 +966,7 @@ $(curmod_prefix)Undocumented.st4{T<:Number, N}
 @test docstrings_equal(@doc(Undocumented.at0), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -977,6 +978,7 @@ abstract type $(curmod_prefix)Undocumented.at1{T>:Integer, N}
 
 ```
 $(curmod_prefix)Undocumented.mt6{Integer, N}
+$(curmod_prefix)Undocumented.st5{T>:Integer, N}
 ```
 
 # Supertype Hierarchy
@@ -987,7 +989,7 @@ $(curmod_prefix)Undocumented.at1{T>:Integer, N} <: $(curmod_prefix)Undocumented.
 @test docstrings_equal(@doc(Undocumented.at1), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1006,7 +1008,7 @@ $(curmod_prefix)Undocumented.st4{Int64, N}
 @test docstrings_equal(@doc(Undocumented.at_), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for public symbol.
 
 # Summary
 
@@ -1023,7 +1025,7 @@ $(curmod_prefix)Undocumented.pt2{T<:Number, N, A>:Integer} <: $(curmod_prefix)Un
 @test docstrings_equal(@doc(Undocumented.pt2), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1033,7 +1035,7 @@ struct $(curmod_prefix)Undocumented.st3{T<:Integer, N}
 
 # Fields
 ```
-a :: Tuple{Vararg{T<:Integer, N}}
+a :: NTuple{N, T<:Integer}
 b :: Array{Int64, N}
 c :: Int64
 ```
@@ -1046,7 +1048,7 @@ $(curmod_prefix)Undocumented.st3{T<:Integer, N} <: $(curmod_prefix)Undocumented.
 @test docstrings_equal(@doc(Undocumented.st3), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1057,7 +1059,7 @@ struct $(curmod_prefix)Undocumented.st4{T, N}
 # Fields
 ```
 a :: T
-b :: Tuple{Vararg{T, N}}
+b :: NTuple{N, T}
 ```
 
 # Supertype Hierarchy
@@ -1068,7 +1070,7 @@ $(curmod_prefix)Undocumented.st4{T, N} <: $(curmod_prefix)Undocumented.at0{T, N}
 @test docstrings_equal(@doc(Undocumented.st4), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1089,7 +1091,7 @@ $(curmod_prefix)Undocumented.st5{T>:Int64, N} <: $(curmod_prefix)Undocumented.at
 @test docstrings_equal(@doc(Undocumented.st5), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1110,7 +1112,7 @@ $(curmod_prefix)Undocumented.mt6{T<:Integer, N} <: $(curmod_prefix)Undocumented.
 @test docstrings_equal(@doc(Undocumented.mt6), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1124,7 +1126,7 @@ No documentation found.
 @test docstrings_equal(@doc(Undocumented.ut7), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1140,7 +1142,7 @@ No documentation found.
 @test docstrings_equal(@doc(Undocumented.ut8), doc"$doc_str")
 
 doc_str = Markdown.parse("""
-No documentation found.
+No documentation found for private symbol.
 
 # Summary
 
@@ -1159,7 +1161,7 @@ let d = @doc(Undocumented.f)
     io = IOBuffer()
     show(io, MIME"text/markdown"(), d)
     @test startswith(String(take!(io)),"""
-    No documentation found.
+    No documentation found for private symbol.
 
     `$(curmod_prefix)Undocumented.f` is a `Function`.
     """)
@@ -1169,7 +1171,7 @@ let d = @doc(Undocumented.undocumented)
     io = IOBuffer()
     show(io, MIME"text/markdown"(), d)
     @test startswith(String(take!(io)), """
-    No documentation found.
+    No documentation found for private symbol.
 
     `$(curmod_prefix)Undocumented.undocumented` is a `Function`.
     """)
@@ -1209,11 +1211,11 @@ end
 
 import Base.Docs: @var, Binding, defined
 
-let x = Binding(Base, Symbol("@time"))
+let x = Binding(Base, Symbol("@inline"))
     @test defined(x) == true
-    @test @var(@time) == x
-    @test @var(Base.@time) == x
-    @test @var(Base.Iterators.@time) == x
+    @test @var(@inline) == x
+    @test @var(Base.@inline) == x
+    @test @var(Base.Iterators.@inline) == x
 end
 
 let x = Binding(Iterators, :enumerate)
@@ -1302,9 +1304,9 @@ dynamic_test.x = "test 2"
 function striptrimdocs(expr)
     if Meta.isexpr(expr, :call)
         fex = expr.args[1]
-        if Meta.isexpr(fex, :.) && fex.args[1] == :REPL
+        if Meta.isexpr(fex, :.) && fex.args[1] === :REPL
             fmex = fex.args[2]
-            if isa(fmex, QuoteNode) && fmex.value == :trimdocs
+            if isa(fmex, QuoteNode) && fmex.value === :trimdocs
                 expr = expr.args[2]
             end
         end
@@ -1315,30 +1317,30 @@ end
 let dt1 = striptrimdocs(_repl(:(dynamic_test(1.0))))
     @test dt1 isa Expr
     @test dt1.args[1] isa Expr
-    @test dt1.args[1].head === :macrocall
-    @test dt1.args[1].args[1] == Symbol("@doc")
-    @test dt1.args[1].args[3] == :(dynamic_test(::typeof(1.0)))
+    @test dt1.args[1].head === :call
+    @test dt1.args[1].args[1] === Base.Docs.doc
+    @test dt1.args[1].args[3] == :(Union{Tuple{typeof(1.0)}})
 end
 let dt2 = striptrimdocs(_repl(:(dynamic_test(::String))))
     @test dt2 isa Expr
     @test dt2.args[1] isa Expr
-    @test dt2.args[1].head === :macrocall
-    @test dt2.args[1].args[1] == Symbol("@doc")
-    @test dt2.args[1].args[3] == :(dynamic_test(::String))
+    @test dt2.args[1].head === :call
+    @test dt2.args[1].args[1] === Base.Docs.doc
+    @test dt2.args[1].args[3] == :(Union{Tuple{String}})
 end
 let dt3 = striptrimdocs(_repl(:(dynamic_test(a))))
     @test dt3 isa Expr
     @test dt3.args[1] isa Expr
-    @test dt3.args[1].head === :macrocall
-    @test dt3.args[1].args[1] == Symbol("@doc")
-    @test dt3.args[1].args[3].args[2].head == :(::) # can't test equality due to line numbers
+    @test dt3.args[1].head === :call
+    @test dt3.args[1].args[1] === Base.Docs.doc
+    @test dt3.args[1].args[3].args[2].head === :curly # can't test equality due to line numbers
 end
 let dt4 = striptrimdocs(_repl(:(dynamic_test(1.0,u=2.0))))
     @test dt4 isa Expr
     @test dt4.args[1] isa Expr
-    @test dt4.args[1].head === :macrocall
-    @test dt4.args[1].args[1] == Symbol("@doc")
-    @test dt4.args[1].args[3] == :(dynamic_test(::typeof(1.0); u::typeof(2.0)=2.0))
+    @test dt4.args[1].head === :call
+    @test dt4.args[1].args[1] === Base.Docs.doc
+    @test dt4.args[1].args[3] == :(Union{Tuple{typeof(1.0)}})
 end
 
 # Equality testing
@@ -1441,27 +1443,36 @@ end
 struct t_docs_abc end
 @test "t_docs_abc" in accessible(@__MODULE__)
 
-# Call overloading issue #20087
+# Call overloading issues #20087 and #44889
 """
 Docs for `MyFunc` struct.
 """
-mutable struct MyFunc
-    x
-end
+mutable struct MyFunc x end
+"""
+Docs for `MyParametricFunc{T}` struct.
+"""
+struct MyParametricFunc{T} end
 
 """
 Docs for calling `f::MyFunc`.
 """
-function (f::MyFunc)(x)
-    f.x = x
-    return f
-end
+(f::MyFunc)(x) = f
 
-@test docstrings_equal(@doc(MyFunc(2)),
+"""
+Docs for calling `f::MyParametricFunc{T}`.
+"""
+(f::MyParametricFunc{T})(x) where T = f
+
+@test docstrings_equal(@doc((::MyFunc)(2)),
 doc"""
 Docs for calling `f::MyFunc`.
 """)
 
+@test docstrings_equal(@doc((::MyParametricFunc{Int})(44889)),
+doc"""
+Docs for calling `f::MyParametricFunc{T}`.
+""")
+
 struct A_20087 end
 
 """a"""
@@ -1513,3 +1524,11 @@ struct S41727
 end
 @test S41727(1) isa S41727
 @test string(@repl S41727.x) == "x is 4\n"
+
+"ensure we can document ccallable functions"
+Base.@ccallable c51586_short()::Int = 2
+"ensure we can document ccallable functions"
+Base.@ccallable c51586_long()::Int = 3
+
+@test docstrings_equal(@doc(c51586_short()), doc"ensure we can document ccallable functions")
+@test docstrings_equal(@doc(c51586_long()), doc"ensure we can document ccallable functions")
diff --git a/test/embedding/embedding.c b/test/embedding/embedding.c
index 1294d4cdafb45..746c59fc8ce1f 100644
--- a/test/embedding/embedding.c
+++ b/test/embedding/embedding.c
@@ -86,17 +86,17 @@ int main()
         // (aka, is gc-rooted until) the program reaches the corresponding JL_GC_POP()
         JL_GC_PUSH1(&x);
 
-        double* xData = jl_array_data(x);
+        double* xData = jl_array_data(x, double);
 
         size_t i;
-        for (i = 0; i < jl_array_len(x); i++)
+        for (i = 0; i < jl_array_nrows(x); i++)
             xData[i] = i;
 
         jl_function_t *func  = jl_get_function(jl_base_module, "reverse!");
         jl_call1(func, (jl_value_t*) x);
 
         printf("x = [");
-        for (i = 0; i < jl_array_len(x); i++)
+        for (i = 0; i < jl_array_nrows(x); i++)
             printf("%e ", xData[i]);
         printf("]\n");
         fflush(stdout);
@@ -192,6 +192,12 @@ int main()
         checked_eval_string("f28825()");
     }
 
+    {
+        // jl_typeof works (#50714)
+        jl_value_t *v = checked_eval_string("sqrt(2.0)");
+        jl_value_t *t = jl_typeof(v);
+    }
+
     JL_TRY {
         jl_error("exception thrown");
     }
diff --git a/test/enums.jl b/test/enums.jl
index e0c1fcc6bccc0..6eb9360e08a23 100644
--- a/test/enums.jl
+++ b/test/enums.jl
@@ -94,18 +94,18 @@ end
 
 # other Integer types of enum members
 @enum Test3::UInt8 _one_Test3=0x01 _two_Test3=0x02 _three_Test3=0x03
-@test Test3.size == 1
+@test Core.sizeof(Test3) == 1
 @test UInt8(_one_Test3) === 0x01
 @test length(instances(Test3)) == 3
 
 @enum Test4::UInt16 _one_Test4=0x01 _two_Test4=0x0002 _three_Test4=0x03
-@test Test4.size == 2
+@test Core.sizeof(Test4) == 2
 
 @enum Test5::UInt32 _one_Test5=0x01 _two_Test5=0x00000002 _three_Test5=0x00000003
-@test Test5.size == 4
+@test Core.sizeof(Test5) == 4
 
 @enum Test6::UInt128 _one_Test6=0x00000000000000000000000000000001 _two_Test6=0x00000000000000000000000000000002
-@test Test6.size == 16
+@test Core.sizeof(Test6) == 16
 @test typeof(Integer(_one_Test6)) == UInt128
 
 # enum values must be integers
@@ -179,6 +179,15 @@ end
 @enum HashEnum2 Enum2_a=1
 @test hash(Enum1_a) != hash(Enum2_a)
 
+# PR #49777: Check that `Base.hash` can be specialized by the user without
+# overwriting a method definition.
+@enum HashEnum3 Enum3_a=1
+@test which(hash, (HashEnum3, UInt)).sig != Tuple{typeof(hash), HashEnum3, UInt64}
+
+# Check that generic `hash` on custom enum subtypes works.
+struct HashEnum4 <: Enum{Int} end
+@test hash(HashEnum4(), zero(UInt)) == invoke(hash, Tuple{Any, UInt}, HashEnum4(), zero(UInt))
+
 @test (Vector{Fruit}(undef, 3) .= apple) == [apple, apple, apple]
 
 # long, discongruous
diff --git a/test/env.jl b/test/env.jl
index 644d956af8fd4..4a3529f6d4081 100644
--- a/test/env.jl
+++ b/test/env.jl
@@ -1,5 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# Make a copy of the original environment
+original_env = copy(ENV)
+
 using Random
 
 @test !("f=a=k=e=n=a=m=e" ∈ keys(ENV))
@@ -49,6 +52,11 @@ end
     @test get!(ENV, key, "default") == "default"
     @test haskey(ENV, key)
     @test ENV[key] == "default"
+
+    key = randstring(25)
+    @test !haskey(ENV, key)
+    @test get!(ENV, key, 0) == 0
+    @test ENV[key] == "0"
 end
 @testset "#17956" begin
     @test length(ENV) > 1
@@ -118,3 +126,58 @@ if Sys.iswindows()
         end
     end
 end
+
+@testset "get_bool_env" begin
+    @testset "truthy" begin
+        for v in ("t", "true", "y", "yes", "1")
+            for _v in (v, uppercasefirst(v), uppercase(v))
+                ENV["testing_gbe"] = _v
+                @test Base.get_bool_env("testing_gbe", false) == true
+                @test Base.get_bool_env("testing_gbe", true) == true
+            end
+        end
+    end
+    @testset "falsy" begin
+        for v in ("f", "false", "n", "no", "0")
+            for _v in (v, uppercasefirst(v), uppercase(v))
+                ENV["testing_gbe"] = _v
+                @test Base.get_bool_env("testing_gbe", true) == false
+                @test Base.get_bool_env("testing_gbe", false) == false
+            end
+        end
+    end
+    @testset "empty" begin
+        ENV["testing_gbe"] = ""
+        @test Base.get_bool_env("testing_gbe", true) == true
+        @test Base.get_bool_env("testing_gbe", false) == false
+    end
+    @testset "undefined" begin
+        delete!(ENV, "testing_gbe")
+        @test !haskey(ENV, "testing_gbe")
+        @test Base.get_bool_env("testing_gbe", true) == true
+        @test Base.get_bool_env("testing_gbe", false) == false
+    end
+    @testset "unrecognized" begin
+        for v in ("truw", "falls")
+            ENV["testing_gbe"] = v
+            @test Base.get_bool_env("testing_gbe", true) === nothing
+            @test Base.get_bool_env("testing_gbe", false) === nothing
+        end
+    end
+
+    # the "default" arg shouldn't have a default val, for clarity.
+    @test_throws MethodError Base.get_bool_env("testing_gbe")
+
+    delete!(ENV, "testing_gbe")
+    @test !haskey(ENV, "testing_gbe")
+end
+
+# Restore the original environment
+for k in collect(keys(ENV))
+    if !haskey(original_env, k)
+        delete!(ENV, k)
+    end
+end
+for (k, v) in pairs(original_env)
+    ENV[k] = v
+end
diff --git a/test/error.jl b/test/error.jl
index eaf77c5d53912..e9cdfa100bc81 100644
--- a/test/error.jl
+++ b/test/error.jl
@@ -99,3 +99,27 @@ end
         @test s == "MethodError: no method matching f44319(::Int$(Sys.WORD_SIZE))\n\nClosest candidates are:\n  f44319()\n   @ $curmod_str none:0\n"
     end
 end
+
+@testset "All types ending with Exception or Error subtype Exception" begin
+    function test_exceptions(mod, visited=Set{Module}())
+        if mod ∉ visited
+            push!(visited, mod)
+            for name in names(mod, all=true)
+                isdefined(mod, name) || continue
+                value = getfield(mod, name)
+
+                if value isa Module
+                    test_exceptions(value, visited)
+                elseif value isa Type
+                    str = string(value)
+                    if endswith(str, "Exception") || endswith(str, "Error")
+                        @test value <: Exception
+                    end
+                end
+            end
+        end
+        visited
+    end
+    visited = test_exceptions(Base)
+    test_exceptions(Core, visited)
+end
diff --git a/test/errorshow.jl b/test/errorshow.jl
index b578c5025e98e..28ca2c1bc3c5c 100644
--- a/test/errorshow.jl
+++ b/test/errorshow.jl
@@ -5,6 +5,9 @@ using Random, LinearAlgebra
 # For curmod_*
 include("testenv.jl")
 
+# re-register only the error hints that are being tested here (
+Base.Experimental.register_error_hint(Base.noncallable_number_hint_handler, MethodError)
+Base.Experimental.register_error_hint(Base.string_concatenation_hint_handler, MethodError)
 
 @testset "SystemError" begin
     err = try; systemerror("reason", Cint(0)); false; catch ex; ex; end::SystemError
@@ -92,8 +95,15 @@ method_c2(x::Int32, y::Float64) = true
 method_c2(x::Int32, y::Int32, z::Int32) = true
 method_c2(x::T, y::T, z::T) where {T<:Real} = true
 
-Base.show_method_candidates(buf, Base.MethodError(method_c2,(1., 1., 2)))
-@test occursin( "\n\nClosest candidates are:\n  method_c2(!Matched::Int32, ::Float64, ::Any...)$cmod$cfile$(c2line+2)\n  method_c2(::T, ::T, !Matched::T) where T<:Real$cmod$cfile$(c2line+5)\n  method_c2(!Matched::Int32, ::Any...)$cmod$cfile$(c2line+1)\n  ...\n", String(take!(buf)))
+let s
+    Base.show_method_candidates(buf, Base.MethodError(method_c2, (1., 1., 2)))
+    s = String(take!(buf))
+    @test occursin("\n\nClosest candidates are:\n  ", s)
+    @test occursin("\n  method_c2(!Matched::Int32, ::Float64, ::Any...)$cmod$cfile$(c2line+2)\n  ", s)
+    @test occursin("\n  method_c2(::T, ::T, !Matched::T) where T<:Real$cmod$cfile$(c2line+5)\n  ", s)
+    @test occursin("\n  method_c2(!Matched::Int32, ::Any...)$cmod$cfile$(c2line+1)\n  ", s)
+    @test occursin("\n  ...\n", s)
+end
 
 c3line = @__LINE__() + 1
 method_c3(x::Float64, y::Float64) = true
@@ -343,7 +353,7 @@ let undefvar
     err_str = @except_str Vector{Any}(undef, 1)[1] UndefRefError
     @test err_str == "UndefRefError: access to undefined reference"
     err_str = @except_str undefvar UndefVarError
-    @test err_str == "UndefVarError: undefvar not defined"
+    @test err_str == "UndefVarError: `undefvar` not defined in local scope"
     err_str = @except_str read(IOBuffer(), UInt8) EOFError
     @test err_str == "EOFError: read end of file"
     err_str = @except_str Dict()[:doesnotexist] KeyError
@@ -404,8 +414,8 @@ let err_str
     @test occursin("MethodError: no method matching +(::$Int, ::Vector{Float64})", err_str)
     @test occursin("For element-wise addition, use broadcasting with dot syntax: scalar .+ array", err_str)
     err_str = @except_str rand(5) - 1//3 MethodError
-    @test occursin("MethodError: no method matching +(::Vector{Float64}, ::Rational{$Int})", err_str)
-    @test occursin("For element-wise addition, use broadcasting with dot syntax: array .+ scalar", err_str)
+    @test occursin("MethodError: no method matching -(::Vector{Float64}, ::Rational{$Int})", err_str)
+    @test occursin("For element-wise subtraction, use broadcasting with dot syntax: array .- scalar", err_str)
 end
 
 
@@ -431,29 +441,29 @@ let err_str,
     Base.stacktrace_contract_userdir() && (sp = Base.contractuser(sp))
 
     @test sprint(show, which(String, Tuple{})) ==
-        "String()\n     @ $curmod_str $sp:$(method_defs_lineno + 0)"
+        "String() @ $curmod_str $sp:$(method_defs_lineno + 0)"
     @test sprint(show, which("a", Tuple{})) ==
-        "(::String)()\n     @ $curmod_str $sp:$(method_defs_lineno + 1)"
+        "(::String)() @ $curmod_str $sp:$(method_defs_lineno + 1)"
     @test sprint(show, which(EightBitType, Tuple{})) ==
-        "$(curmod_prefix)EightBitType()\n     @ $curmod_str $sp:$(method_defs_lineno + 2)"
+        "$(curmod_prefix)EightBitType() @ $curmod_str $sp:$(method_defs_lineno + 2)"
     @test sprint(show, which(reinterpret(EightBitType, 0x54), Tuple{})) ==
-        "(::$(curmod_prefix)EightBitType)()\n     @ $curmod_str $sp:$(method_defs_lineno + 3)"
+        "(::$(curmod_prefix)EightBitType)() @ $curmod_str $sp:$(method_defs_lineno + 3)"
     @test sprint(show, which(EightBitTypeT, Tuple{})) ==
-        "$(curmod_prefix)EightBitTypeT()\n     @ $curmod_str $sp:$(method_defs_lineno + 4)"
+        "$(curmod_prefix)EightBitTypeT() @ $curmod_str $sp:$(method_defs_lineno + 4)"
     @test sprint(show, which(EightBitTypeT{Int32}, Tuple{})) ==
-        "$(curmod_prefix)EightBitTypeT{T}() where T\n     @ $curmod_str $sp:$(method_defs_lineno + 5)"
+        "$(curmod_prefix)EightBitTypeT{T}() where T @ $curmod_str $sp:$(method_defs_lineno + 5)"
     @test sprint(show, which(reinterpret(EightBitTypeT{Int32}, 0x54), Tuple{})) ==
-        "(::$(curmod_prefix)EightBitTypeT)()\n     @ $curmod_str $sp:$(method_defs_lineno + 6)"
+        "(::$(curmod_prefix)EightBitTypeT)() @ $curmod_str $sp:$(method_defs_lineno + 6)"
     @test startswith(sprint(show, which(Complex{Int}, Tuple{Int})),
                      "Complex{T}(")
     @test startswith(sprint(show, which(getfield(Base, Symbol("@doc")), Tuple{LineNumberNode, Module, Vararg{Any}})),
-                     "var\"@doc\"(__source__::LineNumberNode, __module__::Module, x...)\n     @ Core boot.jl:")
+                     "var\"@doc\"(__source__::LineNumberNode, __module__::Module, x...) @ Core boot.jl:")
     @test startswith(sprint(show, which(FunctionLike(), Tuple{})),
-                     "(::$(curmod_prefix)FunctionLike)()\n     @ $curmod_str $sp:$(method_defs_lineno + 7)")
+                     "(::$(curmod_prefix)FunctionLike)() @ $curmod_str $sp:$(method_defs_lineno + 7)")
     @test startswith(sprint(show, which(StructWithUnionAllMethodDefs{<:Integer}, (Any,))),
                      "($(curmod_prefix)StructWithUnionAllMethodDefs{T} where T<:Integer)(x)")
     @test repr("text/plain", FunctionLike()) == "(::$(curmod_prefix)FunctionLike) (generic function with 1 method)"
-    @test repr("text/plain", Core.arraysize) == "arraysize (built-in function)"
+    @test repr("text/plain", Core.getfield) == "getfield (built-in function)"
 
     err_str = @except_stackframe String() ErrorException
     @test err_str == "String() at $sn:$(method_defs_lineno + 0)"
@@ -494,7 +504,7 @@ let
     @test (@macroexpand @fastmath +      ) == :(Base.FastMath.add_fast)
     @test (@macroexpand @fastmath min(1) ) == :(Base.FastMath.min_fast(1))
     let err = try; @macroexpand @doc "" f() = @x; catch ex; ex; end
-        @test err == UndefVarError(Symbol("@x"))
+        @test err == UndefVarError(Symbol("@x"), @__MODULE__)
     end
     @test (@macroexpand @seven_dollar $bar) == 7
     x = 2
@@ -524,8 +534,16 @@ end
     ex = :(@nest2b 42)
     @test _macroexpand1(ex) != macroexpand(M,ex)
     @test _macroexpand1(_macroexpand1(ex)) == macroexpand(M, ex)
-    @test (@macroexpand1 @nest2b 42) == _macroexpand1(ex)
+    @test (@macroexpand1 @nest2b 42) == _macroexpand1(:(@nest2b 42))
+end
+
+module TwoargMacroExpand
+macro modulecontext(); return __module__; end
 end
+@test (@__MODULE__) == @macroexpand TwoargMacroExpand.@modulecontext
+@test TwoargMacroExpand == @macroexpand TwoargMacroExpand @modulecontext
+@test (@__MODULE__) == @macroexpand1 TwoargMacroExpand.@modulecontext
+@test TwoargMacroExpand == @macroexpand1 TwoargMacroExpand @modulecontext
 
 foo_9965(x::Float64; w=false) = x
 foo_9965(x::Int) = 2x
@@ -542,6 +560,25 @@ foo_9965(x::Int) = 2x
     @test occursin("got unsupported keyword argument \"w\"", String(take!(io)))
 end
 
+@testset "MethodError with long types (#50803)" begin
+    a = view(reinterpret(reshape, UInt8, PermutedDimsArray(rand(5, 7), (2, 1))), 2:3, 2:4, 1:4) # a mildly-complex type
+    function f50803 end
+    ex50803 = try
+        f50803(a, a, a, a, a, a)
+    catch e
+        e
+    end::MethodError
+    tlf = Ref(false)
+    str = sprint(Base.showerror, ex50803; context=(:displaysize=>(1000, 120), :stacktrace_types_limited=>tlf))
+    @test tlf[]
+    @test occursin("::SubArray{…}", str)
+    tlf[] = false
+    str = sprint(Base.showerror, ex50803; context=(:displaysize=>(1000, 10000), :stacktrace_types_limited=>tlf))
+    @test !tlf[]
+    str = sprint(Base.showerror, ex50803; context=(:displaysize=>(1000, 120)))
+    @test !occursin("::SubArray{…}", str)
+end
+
 # Issue #20556
 import REPL
 module EnclosingModule
@@ -571,7 +608,7 @@ let
     end
 end
 
-@testset "show for manually thrown MethodError" begin
+@testset "show for MethodError with world age issue" begin
     global f21006
 
     f21006() = nothing
@@ -613,6 +650,32 @@ end
     end
 end
 
+# Issue #50200
+using Base.Experimental: @opaque
+@testset "show for MethodError with world age issue (kwarg)" begin
+    test_no_error(f) = @test f() === nothing
+    function test_worldage_error(f)
+        ex = try; f(); error("Should not have been reached") catch ex; ex; end
+        @test occursin("The applicable method may be too new", sprint(Base.showerror, ex))
+        @test !occursin("!Matched::", sprint(Base.showerror, ex))
+    end
+
+    global callback50200
+
+    # First the no-kwargs version
+    callback50200 = (args...)->nothing
+    f = @opaque ()->callback50200()
+    test_no_error(f)
+    callback50200 = (args...)->nothing
+    test_worldage_error(f)
+
+    callback50200 = (args...; kwargs...)->nothing
+    f = @opaque ()->callback50200(;a=1)
+    test_no_error(f)
+    callback50200 = (args...; kwargs...)->nothing
+    test_worldage_error(f)
+end
+
 # Custom hints
 struct HasNoOne end
 function recommend_oneunit(io, ex, arg_types, kwargs)
@@ -668,11 +731,12 @@ backtrace()
     io = IOBuffer()
     Base.show_backtrace(io, bt)
     output = split(String(take!(io)), '\n')
+    length(output) >= 8 || println(output) # for better errors when this fails
     @test lstrip(output[3])[1:3] == "[1]"
     @test occursin("g28442", output[3])
     @test lstrip(output[5])[1:3] == "[2]"
     @test occursin("f28442", output[5])
-    @test occursin("the last 2 lines are repeated 5000 more times", output[7])
+    @test occursin("the above 2 lines are repeated 5000 more times", output[7])
     @test lstrip(output[8])[1:7] == "[10003]"
 end
 
@@ -680,7 +744,7 @@ end
     getbt() = backtrace()
     bt = getbt()
     Base.update_stackframes_callback[] = function(list)
-        modify((sf, n)) = sf.func == :getbt ? (StackTraces.StackFrame(sf.func, sf.file, sf.line+2, sf.linfo, sf.from_c, sf.inlined, sf.pointer), n) : (sf, n)
+        modify((sf, n)) = sf.func === :getbt ? (StackTraces.StackFrame(sf.func, sf.file, sf.line+2, sf.linfo, sf.from_c, sf.inlined, sf.pointer), n) : (sf, n)
         map!(modify, list, list)
     end
     io = IOBuffer()
@@ -807,7 +871,7 @@ if (Sys.isapple() || Sys.islinux()) && Sys.ARCH === :x86_64
                 catch_backtrace()
             end
             bt_str = sprint(Base.show_backtrace, bt)
-            @test occursin(r"the last 2 lines are repeated \d+ more times", bt_str)
+            @test occursin(r"the above 2 lines are repeated \d+ more times", bt_str)
         end
     end
 end
@@ -928,3 +992,101 @@ for (expr, errmsg) in
     end
     @test contains(sprint(showerror, err), errmsg)
 end
+
+let err_str
+    err_str = @except_str "a" + "b" MethodError
+    @test occursin("String concatenation is performed with *", err_str)
+end
+
+@testset "unused argument names" begin
+    g(::Int) = backtrace()
+    bt = g(1)
+    @test !contains(sprint(Base.show_backtrace, bt), "#unused#")
+end
+
+# issue #49002
+let buf = IOBuffer()
+    Base.show_method_candidates(buf, Base.MethodError(typeof, (17,)), pairs((foo = :bar,)))
+    @test isempty(take!(buf))
+    Base.show_method_candidates(buf, Base.MethodError(isa, ()), pairs((a = 5,)))
+    @test isempty(take!(buf))
+end
+
+f_internal_wrap(g, a; kw...) = error();
+@inline f_internal_wrap(a; kw...) = f_internal_wrap(identity, a; kw...);
+let bt
+    @test try
+        f_internal_wrap(1)
+        false
+    catch
+        bt = catch_backtrace()
+        true
+    end
+    @test !occursin("#f_internal_wrap#", sprint(Base.show_backtrace, bt))
+end
+
+g_collapse_pos(x, y=1.0, z=2.0) = error()
+let bt
+    @test try
+        g_collapse_pos(1.0)
+        false
+    catch
+        bt = catch_backtrace()
+        true
+    end
+    bt_str = sprint(Base.show_backtrace, bt)
+    @test occursin("g_collapse_pos(x::Float64, y::Float64, z::Float64)", bt_str)
+    @test !occursin("g_collapse_pos(x::Float64)", bt_str)
+end
+
+g_collapse_kw(x; y=2.0) = error()
+let bt
+    @test try
+        g_collapse_kw(1.0)
+        false
+    catch
+        bt = catch_backtrace()
+        true
+    end
+    bt_str = sprint(Base.show_backtrace, bt)
+    @test occursin("g_collapse_kw(x::Float64; y::Float64)", bt_str)
+    @test !occursin("g_collapse_kw(x::Float64)", bt_str)
+end
+
+g_collapse_pos_kw(x, y=1.0; z=2.0) = error()
+let bt
+    @test try
+        g_collapse_pos_kw(1.0)
+        false
+    catch
+        bt = catch_backtrace()
+        true
+    end
+    bt_str = sprint(Base.show_backtrace, bt)
+    @test occursin("g_collapse_pos_kw(x::Float64, y::Float64; z::Float64)", bt_str)
+    @test !occursin("g_collapse_pos_kw(x::Float64, y::Float64)", bt_str)
+    @test !occursin("g_collapse_pos_kw(x::Float64)", bt_str)
+end
+
+simplify_kwargs_type(pos; kws...) = (pos, sum(kws))
+let bt
+    res = try
+        simplify_kwargs_type(0; kw1=1.0, kw2="2.0")
+        false
+    catch
+        bt = catch_backtrace()
+        true
+    end
+    @test res
+    bt_str = sprint(Base.show_backtrace, bt)
+    @test occursin("simplify_kwargs_type(pos::$Int; kws::@Kwargs{kw1::Float64, kw2::String})", bt_str)
+end
+
+# Test Base.print_with_compare in convert MethodErrors
+struct TypeCompareError{A,B} <: Exception end
+let e = @test_throws MethodError convert(TypeCompareError{Float64,1}, TypeCompareError{Float64,2}())
+    str = sprint(Base.showerror, e.value)
+    @test  occursin("TypeCompareError{Float64,2}", str)
+    @test  occursin("TypeCompareError{Float64,1}", str)
+    @test !occursin("TypeCompareError{Float64{},2}", str) # No {...} for types without params
+end
diff --git a/test/exceptions.jl b/test/exceptions.jl
index d8d1e7b45b8b5..eb0bbaec35090 100644
--- a/test/exceptions.jl
+++ b/test/exceptions.jl
@@ -276,7 +276,7 @@ end
             exc
         end
         yield(t)
-        @test t.state == :done
+        @test t.state === :done
         @test t.result == ErrorException("B")
         # Task exception state is preserved around task switches
         @test length(current_exceptions()) == 1
@@ -296,7 +296,7 @@ end
                 exc
             end
             yield(t)
-            @test t.state == :done
+            @test t.state === :done
             @test t.result == ErrorException("B")
             @test bt == catch_backtrace()
             rethrow()
@@ -318,7 +318,7 @@ end
                 exc
             end
             yield(t)
-            @test t.state == :done
+            @test t.state === :done
             @test t.result == ErrorException("B")
             bt = catch_backtrace()
             rethrow(ErrorException("C"))
@@ -335,7 +335,7 @@ end
         error("B")
     end
     yield(t)
-    @test t.state == :failed
+    @test t.state === :failed
     @test t.result == ErrorException("B")
     @test current_exceptions(t, backtrace=false) == [
         (exception=ErrorException("A"),backtrace=nothing),
diff --git a/test/fastmath.jl b/test/fastmath.jl
index e93fb93330b4f..34744f325ad7f 100644
--- a/test/fastmath.jl
+++ b/test/fastmath.jl
@@ -207,6 +207,31 @@ end
         @test @fastmath(cis(third)) ≈ cis(third)
     end
 end
+
+@testset "reductions" begin
+    @test @fastmath(maximum([1,2,3])) == 3
+    @test @fastmath(minimum([1,2,3])) == 1
+    @test @fastmath(maximum(abs2, [1,2,3+0im])) == 9
+    @test @fastmath(minimum(sqrt, [1,2,3])) == 1
+    @test @fastmath(maximum(Float32[4 5 6; 7 8 9])) == 9.0f0
+    @test @fastmath(minimum(Float32[4 5 6; 7 8 9])) == 4.0f0
+
+    @test @fastmath(maximum(Float32[4 5 6; 7 8 9]; dims=1)) == Float32[7.0 8.0 9.0]
+    @test @fastmath(minimum(Float32[4 5 6; 7 8 9]; dims=2)) == Float32[4.0; 7.0;;]
+    @test @fastmath(maximum(abs, [4+im -5 6-im; -7 8 -9]; dims=1)) == [7.0 8.0 9.0]
+    @test @fastmath(minimum(cbrt, [4 -5 6; -7 8 -9]; dims=2)) == cbrt.([-5; -9;;])
+
+    x = randn(3,4,5)
+    x1 = sum(x; dims=1)
+    x23 = sum(x; dims=(2,3))
+    @test @fastmath(maximum!(x1, x)) ≈ maximum(x; dims=1)
+    @test x1 ≈ maximum(x; dims=1)
+    @test @fastmath(minimum!(x23, x)) ≈ minimum(x; dims=(2,3))
+    @test x23 ≈ minimum(x; dims=(2,3))
+    @test @fastmath(maximum!(abs, x23, x .+ im)) ≈ maximum(abs, x .+ im; dims=(2,3))
+    @test @fastmath(minimum!(abs2, x1, x .+ im)) ≈ minimum(abs2, x .+ im; dims=1)
+end
+
 @testset "issue #10544" begin
     a = fill(1.,2,2)
     b = fill(1.,2,2)
@@ -259,3 +284,17 @@ end
         end
     end
 end
+
+@testset "+= with indexing (#47241)" begin
+    i = 0
+    x = zeros(2)
+    @fastmath x[i += 1] += 1
+    @fastmath x[end] += 1
+    @test x == [1, 1]
+    @test i == 1
+end
+
+@testset "@fastmath-related crash (#49907)" begin
+    x = @fastmath maximum(Float16[1,2,3]; init = Float16(0))
+    @test x == Float16(3)
+end
diff --git a/test/file.jl b/test/file.jl
index e29b3099d3ed9..808fba52f9048 100644
--- a/test/file.jl
+++ b/test/file.jl
@@ -42,7 +42,7 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
     # creation of symlink to directory that does not yet exist
     new_dir = joinpath(subdir, "new_dir")
     foo_file = joinpath(subdir, "new_dir", "foo")
-    nedlink = joinpath(subdir, "non_existant_dirlink")
+    nedlink = joinpath(subdir, "nonexistent_dirlink")
     symlink("new_dir", nedlink; dir_target=true)
     try
         readdir(nedlink)
@@ -598,6 +598,17 @@ close(s)
 # This section tests temporary file and directory creation.           #
 #######################################################################
 
+@testset "invalid read/write flags" begin
+    @test try
+        open("this file is not expected to exist", read=false, write=false)
+        false
+    catch e
+        isa(e, SystemError) || rethrow()
+        @test endswith(sprint(showerror, e), "Invalid argument")
+        true
+    end
+end
+
 @testset "quoting filenames" begin
     @test try
         open("this file is not expected to exist")
@@ -771,13 +782,13 @@ end
 mktempdir() do tmpdir
     # rename file
     file = joinpath(tmpdir, "afile.txt")
-    files_stat = stat(file)
     close(open(file, "w")) # like touch, but lets the operating system update
+    files_stat = stat(file)
     # the timestamp for greater precision on some platforms (windows)
 
     newfile = joinpath(tmpdir, "bfile.txt")
     mv(file, newfile)
-    newfile_stat = stat(file)
+    newfile_stat = stat(newfile)
 
     @test !ispath(file)
     @test isfile(newfile)
@@ -1253,7 +1264,7 @@ let f = open(file, "w")
     if Sys.iswindows()
         f = RawFD(ccall(:_open, Cint, (Cstring, Cint), file, Base.Filesystem.JL_O_RDONLY))
     else
-        f = RawFD(ccall(:open, Cint, (Cstring, Cint), file, Base.Filesystem.JL_O_RDONLY))
+        f = RawFD(ccall(:open, Cint, (Cstring, Cint, UInt32...), file, Base.Filesystem.JL_O_RDONLY))
     end
     test_LibcFILE(Libc.FILE(f, Libc.modestr(true, false)))
 end
@@ -1453,7 +1464,7 @@ rm(dir)
 ####################
 mktempdir() do dir
     name1 = joinpath(dir, "apples")
-    name2 = joinpath(dir, "bannanas")
+    name2 = joinpath(dir, "bananas")
     @test !ispath(name1)
     @test touch(name1) == name1
     @test isfile(name1)
@@ -1520,11 +1531,11 @@ if !Sys.iswindows()
             chmod(joinpath(d, "empty_outer", "empty_inner"), 0o333)
 
             # Test that an empty directory, even when we can't read its contents, is deletable
-            rm(joinpath(d, "empty_outer"); recursive=true, force=true)
+            rm(joinpath(d, "empty_outer"); recursive=true)
             @test !isdir(joinpath(d, "empty_outer"))
 
             # But a non-empty directory is not
-            @test_throws Base.IOError rm(joinpath(d, "nonempty"); recursive=true, force=true)
+            @test_throws Base.IOError rm(joinpath(d, "nonempty"); recursive=true)
             chmod(joinpath(d, "nonempty"), 0o777)
             rm(joinpath(d, "nonempty"); recursive=true, force=true)
             @test !isdir(joinpath(d, "nonempty"))
@@ -1625,6 +1636,28 @@ end
     end
 end
 
+if Sys.isunix()
+    @testset "mkfifo" begin
+        mktempdir() do dir
+            path = Libc.mkfifo(joinpath(dir, "fifo"))
+            @sync begin
+                @async write(path, "hello")
+                cat_exec = `$(Base.julia_cmd()) --startup-file=no -e "write(stdout, read(ARGS[1]))"`
+                @test read(`$cat_exec $path`, String) == "hello"
+            end
+
+            existing_file = joinpath(dir, "existing")
+            write(existing_file, "")
+            @test_throws SystemError Libc.mkfifo(existing_file)
+        end
+    end
+else
+    @test_throws(
+        "mkfifo: Operation not supported",
+        Libc.mkfifo(joinpath(pwd(), "dummy_path")),
+    )
+end
+
 @testset "chmod/isexecutable" begin
     mktempdir() do dir
         mkdir(joinpath(dir, "subdir"))
@@ -1651,7 +1684,7 @@ end
 
 if Sys.iswindows()
 @testset "mkdir/rm permissions" begin
-    # test delete permission in system folders (i.e. impliclty test chmod permissions)
+    # test delete permission in system folders (i.e. implicitly test chmod permissions)
     # issue #38433
     @test withenv("TMP" => "C:\\") do
         mktempdir() do dir end
@@ -1662,6 +1695,28 @@ if Sys.iswindows()
 end
 end
 
+# Unusually for structs, we test this explicitly because the fields of StatStruct
+# is part of its documentation, and therefore cannot change.
+@testset "StatStruct has promised fields" begin
+    f, io = mktemp()
+    s = stat(f)
+    @test s isa Base.StatStruct
+
+    @test s.desc isa Union{String, Base.OS_HANDLE}
+    @test s.size isa Int64
+    @test s.device isa UInt
+    @test s.inode isa UInt
+    @test s.mode isa UInt
+    @test s.nlink isa Int
+    @test s.uid isa UInt
+    @test s.gid isa UInt
+    @test s.rdev isa UInt
+    @test s.blksize isa Int64
+    @test s.blocks isa Int64
+    @test s.mtime isa Float64
+    @test s.ctime isa Float64
+end
+
 @testset "StatStruct show's extended details" begin
     f, io = mktemp()
     s = stat(f)
diff --git a/test/floatfuncs.jl b/test/floatfuncs.jl
index 7e9d8021ac5df..f33ec75b58322 100644
--- a/test/floatfuncs.jl
+++ b/test/floatfuncs.jl
@@ -139,9 +139,10 @@ end
 end
 
 @testset "literal pow matches runtime pow matches optimized pow" begin
-    two = 2
-    @test 1.0000000105367122^2 == 1.0000000105367122^two
-    @test 1.0041504f0^2 == 1.0041504f0^two
+    let two = 2
+        @test 1.0000000105367122^2 == 1.0000000105367122^two
+        @test 1.0041504f0^2 == 1.0041504f0^two
+    end
 
     function g2(start, two, N)
         x = start
@@ -192,11 +193,13 @@ end
     finv(x) = f(x, -1)
     f2(x) = f(x, 2)
     f3(x) = f(x, 3)
-    x = 1.0000000105367122
-    @test x^2 == f(x, 2) == f2(x) == x*x == Float64(big(x)*big(x))
-    @test x^3 == f(x, 3) == f3(x) == x*x*x == Float64(big(x)*big(x)*big(x))
-    x = 1.000000007393669
-    @test x^-1 == f(x, -1) == finv(x) == 1/x == inv(x) == Float64(1/big(x)) == Float64(inv(big(x)))
+    let x = 1.0000000105367122
+        @test x^2 == f(x, 2) == f2(x) == x*x == Float64(big(x)*big(x))
+        @test x^3 == f(x, 3) == f3(x) == x*x*x == Float64(big(x)*big(x)*big(x))
+    end
+    let x = 1.000000007393669
+        @test x^-1 == f(x, -1) == finv(x) == 1/x == inv(x) == Float64(1/big(x)) == Float64(inv(big(x)))
+    end
 end
 
 @testset "curried approximation" begin
@@ -209,3 +212,78 @@ end
     struct CustomNumber <: Number end
     @test !isnan(CustomNumber())
 end
+
+@testset "isapprox and integer overflow" begin
+    for T in (Int8, Int16, Int32)
+        T === Int && continue
+        @test !isapprox(typemin(T), T(0))
+        @test !isapprox(typemin(T), unsigned(T)(0))
+        @test !isapprox(typemin(T), 0)
+        @test !isapprox(typemin(T), T(0), atol=0.99)
+        @test !isapprox(typemin(T), unsigned(T)(0), atol=0.99)
+        @test !isapprox(typemin(T), 0, atol=0.99)
+        @test_broken !isapprox(typemin(T), T(0), atol=1)
+        @test_broken !isapprox(typemin(T), unsigned(T)(0), atol=1)
+        @test !isapprox(typemin(T), 0, atol=1)
+
+        @test !isapprox(typemin(T)+T(10), T(10))
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10))
+        @test !isapprox(typemin(T)+T(10), 10)
+        @test !isapprox(typemin(T)+T(10), T(10), atol=0.99)
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10), atol=0.99)
+        @test !isapprox(typemin(T)+T(10), 10, atol=0.99)
+        @test_broken !isapprox(typemin(T)+T(10), T(10), atol=1)
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10), atol=1)
+        @test !isapprox(typemin(T)+T(10), 10, atol=1)
+
+        @test isapprox(typemin(T), 0.0, rtol=1)
+    end
+    for T in (Int, Int64, Int128)
+        @test !isapprox(typemin(T), T(0))
+        @test !isapprox(typemin(T), unsigned(T)(0))
+        @test !isapprox(typemin(T), T(0), atol=0.99)
+        @test !isapprox(typemin(T), unsigned(T)(0), atol=0.99)
+        @test_broken !isapprox(typemin(T), T(0), atol=1)
+        @test_broken !isapprox(typemin(T), unsigned(T)(0), atol=1)
+
+        @test !isapprox(typemin(T)+T(10), T(10))
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10))
+        @test !isapprox(typemin(T)+T(10), T(10), atol=0.99)
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10), atol=0.99)
+        @test_broken !isapprox(typemin(T)+T(10), T(10), atol=1)
+        @test !isapprox(typemin(T)+T(10), unsigned(T)(10), atol=1)
+
+        @test isapprox(typemin(T), 0.0, rtol=1)
+    end
+end
+
+@testset "Conversion from floating point to unsigned integer near extremes (#51063)" begin
+    @test_throws InexactError UInt32(4.2949673f9)
+    @test_throws InexactError UInt64(1.8446744f19)
+    @test_throws InexactError UInt64(1.8446744073709552e19)
+    @test_throws InexactError UInt128(3.402823669209385e38)
+end
+
+@testset "Conversion from floating point to integer near extremes (exhaustive)" begin
+    for Ti in Base.BitInteger_types, Tf in (Float16, Float32, Float64), x in (typemin(Ti), typemax(Ti))
+        y = Tf(x)
+        for i in -3:3
+            z = nextfloat(y, i)
+
+            result = isfinite(z) ? round(BigInt, z) : error
+            result = result !== error && typemin(Ti) <= result <= typemax(Ti) ? result : error
+
+            if result === error
+                @test_throws InexactError round(Ti, z)
+                @test_throws InexactError Ti(z)
+            else
+                @test result == round(Ti, z)
+                if isinteger(z)
+                    @test result == Ti(z)
+                else
+                    @test_throws InexactError Ti(z)
+                end
+            end
+        end
+    end
+end
diff --git a/test/functional.jl b/test/functional.jl
index c9b0b270baeb7..fce64c0e5720a 100644
--- a/test/functional.jl
+++ b/test/functional.jl
@@ -139,6 +139,20 @@ end
 @test findall(!iszero, x^2 for x in -1:0.5:1) == [1, 2, 4, 5]
 @test argmin(x^2 for x in -1:0.5:1) == 3
 
+# findall return type, see #45495
+let gen = (i for i in 1:3);
+    @test @inferred(findall(x -> true, gen))::Vector{Int} == [1, 2, 3]
+    @test @inferred(findall(x -> false, gen))::Vector{Int} == Int[]
+    @test @inferred(findall(x -> x < 0, gen))::Vector{Int} == Int[]
+end
+let d = Dict()
+    d[7]=2
+    d[3]=6
+    @test @inferred(sort(findall(x -> true, d)))::Vector{Int} == [3, 7]
+    @test @inferred(sort(findall(x -> false, d)))::Vector{Any} == []
+    @test @inferred(sort(findall(x -> x < 0, d)))::Vector{Any} == []
+end
+
 # inference on vararg generator of a type (see #22907 comments)
 let f(x) = collect(Base.Generator(=>, x, x))
     @test @inferred(f((1,2))) == [1=>1, 2=>2]
diff --git a/test/gc.jl b/test/gc.jl
new file mode 100644
index 0000000000000..99b96a5ae1fd2
--- /dev/null
+++ b/test/gc.jl
@@ -0,0 +1,35 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+
+function run_gctest(file)
+    let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no $file`
+        @testset for test_nthreads in (1, 2, 4)
+            @testset for concurrent_sweep in (0, 1)
+                new_env = copy(ENV)
+                new_env["JULIA_NUM_THREADS"] = string(test_nthreads)
+                new_env["JULIA_NUM_GC_THREADS"] = "$(test_nthreads),$(concurrent_sweep)"
+                @test success(run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr)))
+            end
+        end
+    end
+end
+
+function run_nonzero_page_utilization_test()
+    GC.gc()
+    page_utilization = Base.gc_page_utilization_data()
+    # at least one of the pools should have nonzero page_utilization
+    @test any(page_utilization .> 0)
+end
+
+# !!! note:
+#     Since we run our tests on 32bit OS as well we confine ourselves
+#     to parameters that allocate about 512MB of objects. Max RSS is lower
+#     than that.
+@testset "GC threads" begin
+    run_gctest("gc/binarytree.jl")
+    run_gctest("gc/linkedlist.jl")
+    run_gctest("gc/objarray.jl")
+    run_gctest("gc/chunks.jl")
+    run_nonzero_page_utilization_test()
+end
diff --git a/test/gc/binarytree.jl b/test/gc/binarytree.jl
new file mode 100644
index 0000000000000..896f47fa4c9c7
--- /dev/null
+++ b/test/gc/binarytree.jl
@@ -0,0 +1,54 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module BinaryTreeMutable
+
+# Adopted from
+# https://benchmarksgame-team.pages.debian.net/benchmarksgame/description/binarytrees.html#binarytrees
+
+using Base.Threads
+using Printf
+
+mutable struct Node
+    l::Union{Nothing, Node}
+    r::Union{Nothing, Node}
+end
+
+function make(n::Int)
+    return n === 0 ? Node(nothing, nothing) : Node(make(n-1), make(n-1))
+end
+
+function check(node::Node)
+    return  1 + (node.l === nothing ? 0 : check(node.l) + check(node.r))
+end
+
+function binary_trees(io, n::Int)
+    @printf io "stretch tree of depth %jd\t check: %jd\n" n+1 check(make(n+1))
+
+    long_tree = make(n)
+    minDepth = 4
+    resultSize = div((n - minDepth), 2) + 1
+    results = Vector{String}(undef, resultSize)
+    Threads.@threads for depth in minDepth:2:n
+        c = 0
+        niter = 1 << (n - depth + minDepth)
+        for _ in 1:niter
+            c += check(make(depth))
+        end
+        index = div((depth - minDepth),2) + 1
+        results[index] = @sprintf "%jd\t trees of depth %jd\t check: %jd\n" niter depth c
+    end
+
+    for i in results
+        write(io, i)
+    end
+
+    @printf io "long lived tree of depth %jd\t check: %jd\n" n check(long_tree)
+end
+
+end #module
+
+using .BinaryTreeMutable
+
+# Memory usage is 466MB
+BinaryTreeMutable.binary_trees(devnull, 16)
+GC.gc()
diff --git a/test/gc/chunks.jl b/test/gc/chunks.jl
new file mode 100644
index 0000000000000..08af59ecbf973
--- /dev/null
+++ b/test/gc/chunks.jl
@@ -0,0 +1,17 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# MWE from https://github.com/JuliaLang/julia/issues/49501
+N = 1_000_000  # or larger
+T = BigFloat
+
+struct Q{T}
+    a::T
+    b::T
+end
+
+# Memoy use is ~512MB
+let
+    A = [Q(rand(T), rand(T)) for _ in 1:N]
+end
+
+GC.gc()
diff --git a/test/gc/linkedlist.jl b/test/gc/linkedlist.jl
new file mode 100644
index 0000000000000..669e5f8ec21d9
--- /dev/null
+++ b/test/gc/linkedlist.jl
@@ -0,0 +1,23 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+mutable struct ListNode
+  key::Int64
+  next::ListNode
+  ListNode() = new()
+  ListNode(x)= new(x)
+  ListNode(x,y) = new(x,y);
+end
+
+function list(N=16*1024^2)
+    start::ListNode = ListNode(1)
+    current::ListNode = start
+    for i = 2:N
+        current = ListNode(i,current)
+    end
+    return current.key
+end
+
+# Memory use is 512 MB
+_ = list()
+
+GC.gc()
diff --git a/test/gc/objarray.jl b/test/gc/objarray.jl
new file mode 100644
index 0000000000000..d36fcedef71a4
--- /dev/null
+++ b/test/gc/objarray.jl
@@ -0,0 +1,36 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Random: seed!
+seed!(1)
+
+abstract type Cell end
+
+struct CellA<:Cell
+    a::Ref{Int}
+end
+
+struct CellB<:Cell
+    b::String
+end
+
+function fillcells!(mc::Array{Cell})
+    for ind in eachindex(mc)
+        mc[ind] = ifelse(rand() > 0.5, CellA(ind), CellB(string(ind)))
+    end
+    return mc
+end
+
+function work(size)
+    mcells = Array{Cell}(undef, size, size)
+    fillcells!(mcells)
+end
+
+function run(maxsize)
+    Threads.@threads for i in 1:maxsize
+        work(i*375)
+    end
+end
+
+# Memory usage 581 MB
+run(4)
+GC.gc()
diff --git a/test/gcext/.gitignore b/test/gcext/.gitignore
index 0f8c848e5cea6..829c3297dfa2c 100644
--- a/test/gcext/.gitignore
+++ b/test/gcext/.gitignore
@@ -1,2 +1,3 @@
 /gcext
 /gcext-debug
+/Foreign/deps
diff --git a/test/gcext/DependsOnForeign/Manifest.toml b/test/gcext/DependsOnForeign/Manifest.toml
new file mode 100644
index 0000000000000..d830116bb54ca
--- /dev/null
+++ b/test/gcext/DependsOnForeign/Manifest.toml
@@ -0,0 +1,14 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.8.3"
+manifest_format = "2.0"
+project_hash = "e7199d961a5f4ebad68a3deaf5beaa7406a0afcb"
+
+[[deps.Foreign]]
+deps = ["Libdl"]
+path = "../Foreign"
+uuid = "de1f6f7a-d7b3-400f-91c2-33f248ee89c4"
+version = "0.1.0"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/test/gcext/DependsOnForeign/Project.toml b/test/gcext/DependsOnForeign/Project.toml
new file mode 100644
index 0000000000000..b2bee1338c2b7
--- /dev/null
+++ b/test/gcext/DependsOnForeign/Project.toml
@@ -0,0 +1,6 @@
+name = "DependsOnForeign"
+uuid = "4b0716e0-dfb5-4e00-8b44-e2685a41517f"
+version = "0.1.0"
+
+[deps]
+Foreign = "de1f6f7a-d7b3-400f-91c2-33f248ee89c4"
diff --git a/test/gcext/DependsOnForeign/src/DependsOnForeign.jl b/test/gcext/DependsOnForeign/src/DependsOnForeign.jl
new file mode 100644
index 0000000000000..cdf31774956e1
--- /dev/null
+++ b/test/gcext/DependsOnForeign/src/DependsOnForeign.jl
@@ -0,0 +1,14 @@
+module DependsOnForeign
+
+using Foreign
+
+f(obj::FObj) = Base.pointer_from_objref(obj)
+precompile(f, (FObj,))
+
+const FObjRef = Ref{FObj}()
+
+function __init__()
+    FObjRef[] = FObj()
+end
+
+end # module DependsOnForeign
diff --git a/test/gcext/Foreign/Manifest.toml b/test/gcext/Foreign/Manifest.toml
new file mode 100644
index 0000000000000..25cf111aa50ba
--- /dev/null
+++ b/test/gcext/Foreign/Manifest.toml
@@ -0,0 +1,8 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.9.0-DEV"
+manifest_format = "2.0"
+project_hash = "7b70172a2edbdc772ed789e79d4411d7528eae86"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/test/gcext/Foreign/Project.toml b/test/gcext/Foreign/Project.toml
new file mode 100644
index 0000000000000..819f64beee442
--- /dev/null
+++ b/test/gcext/Foreign/Project.toml
@@ -0,0 +1,6 @@
+name = "Foreign"
+uuid = "de1f6f7a-d7b3-400f-91c2-33f248ee89c4"
+version = "0.1.0"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/test/gcext/Foreign/deps/foreignlib.c b/test/gcext/Foreign/deps/foreignlib.c
new file mode 100644
index 0000000000000..72e02e9bef0cf
--- /dev/null
+++ b/test/gcext/Foreign/deps/foreignlib.c
@@ -0,0 +1,56 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "julia.h"
+#include "julia_gcext.h"
+
+// TODO make these atomics
+int nmarks = 0;
+int nsweeps = 0;
+
+uintptr_t mark(jl_ptls_t ptls, jl_value_t *p)
+{
+    nmarks += 1;
+    return 0;
+}
+
+void sweep(jl_value_t *p)
+{
+    nsweeps++;
+}
+
+JL_DLLEXPORT jl_datatype_t *declare_foreign(jl_sym_t* name, jl_module_t *module, jl_datatype_t *parent)
+{
+     return jl_new_foreign_type(name, module, parent, mark, sweep, 1, 0);
+}
+
+// #define GC_MAX_SZCLASS (2032 - sizeof(void *))
+
+JL_DLLEXPORT int reinit_foreign(jl_datatype_t *dt)
+{
+    int ret = jl_reinit_foreign_type(dt, mark, sweep);
+    nmarks = nsweeps = 0;
+    if (ret == 0)
+        return 0;
+    if (dt->layout->npointers != 1)
+        return -1;
+    if (dt->layout->size != 0)
+        return -2;
+    return ret;
+}
+
+JL_DLLEXPORT jl_value_t *allocate_foreign(jl_ptls_t ptls, size_t sz, jl_datatype_t *dt)
+{
+    jl_value_t* obj = jl_gc_alloc_typed(ptls, sz, dt);
+    jl_gc_schedule_foreign_sweepfunc(ptls, obj);
+    return obj;
+}
+
+JL_DLLEXPORT int nmark_counter()
+{
+    return nmarks;
+}
+
+JL_DLLEXPORT int nsweep_counter()
+{
+    return nsweeps;
+}
diff --git a/test/gcext/Foreign/src/Foreign.jl b/test/gcext/Foreign/src/Foreign.jl
new file mode 100644
index 0000000000000..a1ab79fab586a
--- /dev/null
+++ b/test/gcext/Foreign/src/Foreign.jl
@@ -0,0 +1,29 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module Foreign
+
+using Libdl
+
+const foreignlib = joinpath(ENV["BINDIR"], "foreignlib.$(dlext)")
+
+const FObj = ccall((:declare_foreign, foreignlib), Any, (Any, Any, Any), :FObj, @__MODULE__, Any)
+FObj() = ccall((:allocate_foreign, foreignlib), Any, (Ptr{Cvoid}, Csize_t, Any,), Core.getptls(), sizeof(Ptr{Cvoid}), FObj)::FObj
+
+export FObj
+
+get_nmark()  = ccall((:nmark_counter, foreignlib),  Cint, ())
+get_nsweep() = ccall((:nsweep_counter, foreignlib), Cint, ())
+
+function __init__()
+    @assert ccall((:reinit_foreign, foreignlib), Cint, (Any,), FObj) == 1
+end
+
+allocs(N) = [Foreign.FObj() for _ in 1:N]
+
+function test(N)
+    x = allocs(N)
+    Core.donotdelete(x)
+    x = nothing
+end
+
+end # module Foreign
diff --git a/test/gcext/ForeignObjSerialization/Manifest.toml b/test/gcext/ForeignObjSerialization/Manifest.toml
new file mode 100644
index 0000000000000..d830116bb54ca
--- /dev/null
+++ b/test/gcext/ForeignObjSerialization/Manifest.toml
@@ -0,0 +1,14 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.8.3"
+manifest_format = "2.0"
+project_hash = "e7199d961a5f4ebad68a3deaf5beaa7406a0afcb"
+
+[[deps.Foreign]]
+deps = ["Libdl"]
+path = "../Foreign"
+uuid = "de1f6f7a-d7b3-400f-91c2-33f248ee89c4"
+version = "0.1.0"
+
+[[deps.Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/test/gcext/ForeignObjSerialization/Project.toml b/test/gcext/ForeignObjSerialization/Project.toml
new file mode 100644
index 0000000000000..1a26ff7884481
--- /dev/null
+++ b/test/gcext/ForeignObjSerialization/Project.toml
@@ -0,0 +1,6 @@
+name = "ForeignObjSerialization"
+uuid = "2c015d96-a6ca-42f0-bc68-f9090de6bc2c"
+version = "0.1.0"
+
+[deps]
+Foreign = "de1f6f7a-d7b3-400f-91c2-33f248ee89c4"
diff --git a/test/gcext/ForeignObjSerialization/src/ForeignObjSerialization.jl b/test/gcext/ForeignObjSerialization/src/ForeignObjSerialization.jl
new file mode 100644
index 0000000000000..e32753aecb3b4
--- /dev/null
+++ b/test/gcext/ForeignObjSerialization/src/ForeignObjSerialization.jl
@@ -0,0 +1,6 @@
+module ForeignObjSerialization
+
+using Foreign
+const FObjRef = Ref{FObj}(FObj())
+
+end # module ForeignObjSerialization
diff --git a/test/gcext/LocalTest.jl b/test/gcext/LocalTest.jl
index f73b4b47e8023..e2ee94e765321 100644
--- a/test/gcext/LocalTest.jl
+++ b/test/gcext/LocalTest.jl
@@ -54,13 +54,13 @@ function set_aux_root(n :: Int, x :: String)
     return ccall(:set_aux_root, Nothing, (UInt, String), n, x)
 end
 
-function internal_obj_scan(p :: Any)
-    if ccall(:internal_obj_scan, Cint, (Any,), p) == 0
-        global internal_obj_scan_failures += 1
-    end
-end
+# function internal_obj_scan(p :: Any)
+#     if ccall(:internal_obj_scan, Cint, (Any,), p) == 0
+#         global internal_obj_scan_failures += 1
+#     end
+# end
 
-global internal_obj_scan_failures = 0
+# global internal_obj_scan_failures = 0
 
 for i in 0:1000
     set_aux_root(i, string(i))
@@ -70,12 +70,12 @@ function test()
     local stack = make()
     for i in 1:100000
         push(stack, string(i, base=2))
-        internal_obj_scan(top(stack))
+        # internal_obj_scan(top(stack))
     end
     for i in 1:1000
         local stack2 = make()
-        internal_obj_scan(stack2)
-        internal_obj_scan(blob(stack2))
+        # internal_obj_scan(stack2)
+        # internal_obj_scan(blob(stack2))
         while !empty(stack)
             push(stack2, pop(stack))
         end
@@ -98,5 +98,5 @@ end
 print(gc_counter_full(), " full collections.\n")
 print(gc_counter_inc(), " partial collections.\n")
 print(num_obj_sweeps(), " object sweeps.\n")
-print(internal_obj_scan_failures, " internal object scan failures.\n")
+# print(internal_obj_scan_failures, " internal object scan failures.\n")
 print(corrupted_roots, " corrupted auxiliary roots.\n")
diff --git a/test/gcext/Makefile b/test/gcext/Makefile
index 7cb602572e3c5..2a77b76ede50d 100644
--- a/test/gcext/Makefile
+++ b/test/gcext/Makefile
@@ -19,18 +19,26 @@ SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 # get the executable suffix, if any
 EXE := $(suffix $(abspath $(JULIA)))
 
+OS := $(shell uname)
+ifeq ($(OS), Darwin)
+  DYLIB := .dylib
+else
+  DYLIB := .so
+endif
+
 # get compiler and linker flags. (see: `contrib/julia-config.jl`)
 JULIA_CONFIG := $(JULIA) -e 'include(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "julia-config.jl"))' --
 CPPFLAGS_ADD :=
 CFLAGS_ADD = $(shell $(JULIA_CONFIG) --cflags)
 LDFLAGS_ADD = -lm $(shell $(JULIA_CONFIG) --ldflags --ldlibs)
+DYLIBFLAGS := --shared -fPIC
 
 DEBUGFLAGS += -g
 
 #=============================================================================
 
-release: $(BIN)/gcext$(EXE)
-debug:   $(BIN)/gcext-debug$(EXE)
+release: $(BIN)/gcext$(EXE) $(BIN)/Foreign/deps/foreignlib$(DYLIB)
+debug:   $(BIN)/gcext-debug$(EXE) $(BIN)/Foreign/deps/foreignlib-debug$(DYLIB)
 
 $(BIN)/gcext$(EXE): $(SRCDIR)/gcext.c
 	$(CC) $^ -o $@ $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS)
@@ -38,19 +46,27 @@ $(BIN)/gcext$(EXE): $(SRCDIR)/gcext.c
 $(BIN)/gcext-debug$(EXE): $(SRCDIR)/gcext.c
 	$(CC) $^ -o $@ $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) $(DEBUGFLAGS)
 
+$(BIN)/foreignlib$(DYLIB): $(SRCDIR)/Foreign/deps/foreignlib.c
+	$(CC) $^ -o $@ $(DYLIBFLAGS) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS)
+
+$(BIN)/foreignlib-debug$(DYLIB): $(SRCDIR)/Foreign/deps/foreignlib.c
+	$(CC) $^ -o $@ $(DYLIBFLAGS) $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS) $(DEBUGFLAGS)
+
 ifneq ($(abspath $(BIN)),$(abspath $(SRCDIR)))
 # for demonstration purposes, our demo code is also installed
 # in $BIN, although this would likely not be typical
-$(BIN)/LocalModule.jl: $(SRCDIR)/LocalModule.jl
+$(BIN)/LocalTest.jl: $(SRCDIR)/LocalTest.jl
 	cp $< $@
 endif
 
-check: $(BIN)/gcext$(EXE) $(BIN)/LocalTest.jl
-	$(JULIA) --depwarn=error $(SRCDIR)/gcext-test.jl $<
+check: $(BIN)/gcext$(EXE) $(BIN)/LocalTest.jl $(BIN)/foreignlib$(DYLIB)
+	BINDIR=$(BIN) $(JULIA) --depwarn=error $(SRCDIR)/gcext-test.jl $<
 	@echo SUCCESS
 
 clean:
 	-rm -f $(BIN)/gcext-debug$(EXE) $(BIN)/gcext$(EXE)
+	-rm -f $(BIN)/foreignlib$(DYLIB)
+	-rm -f $(BIN)/foreignlib-debug$(DYLIB)
 
 .PHONY: release debug clean check
 
diff --git a/test/gcext/gcext-test.jl b/test/gcext/gcext-test.jl
index e6f3e3663ff0e..81637392e3c5d 100644
--- a/test/gcext/gcext-test.jl
+++ b/test/gcext/gcext-test.jl
@@ -2,6 +2,7 @@
 
 # tests the output of the embedding example is correct
 using Test
+using Pkg
 
 if Sys.iswindows()
     # libjulia needs to be in the same directory as the embedding executable or in path
@@ -31,12 +32,47 @@ end
     errlines = fetch(err_task)
     lines = fetch(out_task)
     @test length(errlines) == 0
-    @test length(lines) == 6
+    # @test length(lines) == 6
+    @test length(lines) == 5
     @test checknum(lines[2], r"([0-9]+) full collections", n -> n >= 10)
     @test checknum(lines[3], r"([0-9]+) partial collections", n -> n > 0)
     @test checknum(lines[4], r"([0-9]+) object sweeps", n -> n > 0)
-    @test checknum(lines[5], r"([0-9]+) internal object scan failures",
-        n -> n == 0)
-    @test checknum(lines[6], r"([0-9]+) corrupted auxiliary roots",
+    # @test checknum(lines[5], r"([0-9]+) internal object scan failures",
+    #     n -> n == 0)
+    # @test checknum(lines[6], r"([0-9]+) corrupted auxiliary roots",
+    #    n -> n == 0)
+    @test checknum(lines[5], r"([0-9]+) corrupted auxiliary roots",
         n -> n == 0)
 end
+
+@testset "Package with foreign type" begin
+    load_path = copy(LOAD_PATH)
+    push!(LOAD_PATH, joinpath(@__DIR__, "Foreign"))
+    push!(LOAD_PATH, joinpath(@__DIR__, "DependsOnForeign"))
+    try
+        # Force recaching
+        Base.compilecache(Base.identify_package("Foreign"))
+        Base.compilecache(Base.identify_package("DependsOnForeign"))
+
+        push!(LOAD_PATH, joinpath(@__DIR__, "ForeignObjSerialization"))
+        @test_throws ErrorException  Base.compilecache(Base.identify_package("ForeignObjSerialization"), Base.DevNull())
+        pop!(LOAD_PATH)
+
+        (@eval (using Foreign))
+        @test Base.invokelatest(Foreign.get_nmark)  == 0
+        @test Base.invokelatest(Foreign.get_nsweep) == 0
+
+        obj = Base.invokelatest(Foreign.FObj)
+        GC.@preserve obj begin
+            GC.gc(true)
+        end
+        @test Base.invokelatest(Foreign.get_nmark)  > 0
+        @time Base.invokelatest(Foreign.test, 10)
+        GC.gc(true)
+        @test Base.invokelatest(Foreign.get_nsweep) > 0
+        (@eval (using DependsOnForeign))
+        Base.invokelatest(DependsOnForeign.f, obj)
+    finally
+        copy!(LOAD_PATH, load_path)
+    end
+end
diff --git a/test/gcext/gcext.c b/test/gcext/gcext.c
index 7f2986d8f1f57..d5bf91ec8c9ab 100644
--- a/test/gcext/gcext.c
+++ b/test/gcext/gcext.c
@@ -32,7 +32,7 @@ static inline int lt_ptr(void *a, void *b)
     return (uintptr_t)a < (uintptr_t)b;
 }
 
-/* align pointer to full word if mis-aligned */
+/* align pointer to full word if misaligned */
 static inline void *align_ptr(void *p)
 {
     uintptr_t u = (uintptr_t)p;
@@ -307,6 +307,7 @@ static size_t gc_alloc_size(jl_value_t *val)
 
 int internal_obj_scan(jl_value_t *val)
 {
+    // FIXME: `jl_gc_internal_obj_base_ptr` is not allowed to be called from outside GC
     if (jl_gc_internal_obj_base_ptr(val) == val) {
         size_t size = gc_alloc_size(val);
         char *addr = (char *)val;
@@ -611,8 +612,7 @@ int main()
     jl_gc_set_cb_root_scanner(abort_with_error, 1);
     jl_gc_set_cb_root_scanner(abort_with_error, 0);
     // Create module to store types in.
-    module = jl_new_module(jl_symbol("TestGCExt"));
-    module->parent = jl_main_module;
+    module = jl_new_module(jl_symbol("TestGCExt"), jl_main_module);
     jl_set_const(jl_main_module, jl_symbol("TestGCExt"), (jl_value_t *)module);
     // Define Julia types for our stack implementation.
     datatype_stack = jl_new_foreign_type(
diff --git a/test/gmp.jl b/test/gmp.jl
index 1125f57b195b3..13413abe55f9d 100644
--- a/test/gmp.jl
+++ b/test/gmp.jl
@@ -11,6 +11,11 @@ ee = typemax(Int64)
     @test BigInt <: Signed
     @test big(1) isa Signed
 
+    if sizeof(Culong) >= 8
+        @test_throws OutOfMemoryError big(96608869069402268615522366320733234710)^16374500563449903721
+        @test_throws OutOfMemoryError 555555555555555555555555555555555555555555555555555^55555555555555555
+    end
+
     let x = big(1)
         @test signed(x) === x
         @test convert(Signed, x) === x
@@ -215,6 +220,8 @@ end
 end
 @testset "combinatorics" begin
     @test factorial(BigInt(40)) == parse(BigInt,"815915283247897734345611269596115894272000000000")
+    @test_throws DomainError factorial(BigInt(-1))
+    @test_throws DomainError factorial(BigInt(rand(-999:-2)))
     @test binomial(BigInt(1), -1) == BigInt(0)
     @test binomial(BigInt(1), 2)  == BigInt(0)
     @test binomial(BigInt(-53), 42) == parse(BigInt,"959509335087854414441273718")
@@ -227,6 +234,7 @@ let a, b
     @test 0 == sum(BigInt[]) isa BigInt
     @test prod(b) == foldl(*, b)
     @test 1 == prod(BigInt[]) isa BigInt
+    @test prod(BigInt[0, 0, 0]) == 0 # issue #46665
 end
 
 @testset "Iterated arithmetic" begin
@@ -335,11 +343,13 @@ end
 @testset "digits" begin
     n = Int64(2080310129088201558)
     N = big(n)
-    for base in (2,7,10,11,16,30,50,62,64,100), pad in (0,1,10,100)
-        @test digits(n; base, pad) == digits(N; base, pad)
+    for base in (2,7,10,11,16,30,50,62,64,100,128), pad in (0,1,10,100)
+        @test digits(n; base, pad) == digits(N; base, pad) == digits(UInt8, N; base, pad)
         @test digits(-n; base, pad) == digits(-N; base, pad)
         @test digits!(Vector{Int}(undef, pad), n; base) == digits!(Vector{Int}(undef, pad), N; base)
     end
+    @test digits(UInt8, n; base=1<<8) == digits(UInt8, N; base=1<<8)
+    @test digits(UInt16, n; base=1<<16) == digits(UInt16, N; base=1<<16)
 end
 
 # serialization (#5133)
@@ -438,8 +448,25 @@ end
 @test isqrt(big(4)) == 2
 @test isqrt(big(5)) == 2
 
-@test big(5)^true == big(5)
-@test big(5)^false == one(BigInt)
+
+@testset "Exponentiation operator" begin
+    @test big(5)^true == big(5)
+    @test big(5)^false == one(BigInt)
+    testvals = Int8[-128:-126; -3:3; 125:127]
+    @testset "BigInt and Int8 are consistent: $i^$j" for i in testvals, j in testvals
+        int8_res = try
+            i^j
+        catch e
+            e
+        end
+        if int8_res isa Int8
+            @test (big(i)^big(j)) % Int8 === int8_res
+        else
+            # Test both have exception of the same type
+            @test_throws typeof(int8_res) big(i)^big(j)
+        end
+    end
+end
 
 @testset "math ops returning BigFloat" begin
     # operations that when applied to Int64 give Float64, should give BigFloat
diff --git a/test/goto.jl b/test/goto.jl
index 011ec32a851bd..e069058f38d52 100644
--- a/test/goto.jl
+++ b/test/goto.jl
@@ -87,7 +87,7 @@ end
 @test goto_test5_3()
 
 
-@test Expr(:error, "goto from a try/finally block is not permitted") ==
+@test Expr(:error, "goto from a try/finally block is not permitted around $(@__FILE__):$(3 + @__LINE__)") ==
     Meta.lower(@__MODULE__, quote
         function goto_test6()
             try
diff --git a/test/hashing.jl b/test/hashing.jl
index 9bd076554962f..173a31d10a6a9 100644
--- a/test/hashing.jl
+++ b/test/hashing.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Random, LinearAlgebra, SparseArrays
+using Random, LinearAlgebra
 isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
 using .Main.OffsetArrays
 
@@ -8,7 +8,8 @@ types = Any[
     Bool,
     Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64,
     Rational{Int8}, Rational{UInt8}, Rational{Int16}, Rational{UInt16},
-    Rational{Int32}, Rational{UInt32}, Rational{Int64}, Rational{UInt64}
+    Rational{Int32}, Rational{UInt32}, Rational{Int64}, Rational{UInt64},
+    BigFloat, BigInt, Rational{BigInt}
 ]
 vals = vcat(
     typemin(Int64),
@@ -51,8 +52,7 @@ let collides = 0
             collides += eq
         end
     end
-    # each pair of types has one collision for these values
-    @test collides <= (length(types) - 1)^2
+    @test collides <= 516
 end
 @test hash(0.0) != hash(-0.0)
 
@@ -60,6 +60,9 @@ end
 @test hash(nextfloat(2.0^63)) == hash(UInt64(nextfloat(2.0^63)))
 @test hash(prevfloat(2.0^64)) == hash(UInt64(prevfloat(2.0^64)))
 
+# issue #48744
+@test hash(typemin(Int)//1) === hash(big(typemin(Int)//1))
+
 # issue #9264
 @test hash(1//6,zero(UInt)) == invoke(hash, Tuple{Real, UInt}, 1//6, zero(UInt))
 @test hash(1//6) == hash(big(1)//big(6))
@@ -89,9 +92,8 @@ vals = Any[
     Dict(x => x for x in 1:10),
     Dict(7=>7,9=>9,4=>4,10=>10,2=>2,3=>3,8=>8,5=>5,6=>6,1=>1),
     [], [1], [2], [1, 1], [1, 2], [1, 3], [2, 2], [1, 2, 2], [1, 3, 3],
-    zeros(2, 2), spzeros(2, 2), Matrix(1.0I, 2, 2), sparse(1.0I, 2, 2),
-    sparse(fill(1., 2, 2)), fill(1., 2, 2), sparse([0 0; 1 0]), [0 0; 1 0],
-    [-0. 0; -0. 0.], SparseMatrixCSC(2, 2, [1, 3, 3], [1, 2], [-0., -0.]),
+    zeros(2, 2), Matrix(1.0I, 2, 2), fill(1., 2, 2),
+    [-0. 0; -0. 0.],
     # issue #16364
     1:4, 1:1:4, 1:-1:0, 1.0:4.0, 1.0:1.0:4.0, range(1, stop=4, length=4),
     # issue #35597, when `LinearIndices` does not begin at 1
@@ -138,13 +140,6 @@ vals = Any[
     [5 1; 0 0], [1 1; 0 1], [0 2; 3 0], [0 2; 4 6], [4 0; 0 1],
     [0 0 0; 0 0 0], [1 0 0; 0 0 1], [0 0 2; 3 0 0], [0 0 7; 6 1 2],
     [4 0 0; 3 0 1], [0 2 4; 6 0 0],
-    # various stored zeros patterns
-    sparse([1], [1], [0]), sparse([1], [1], [-0.0]),
-    sparse([1, 2], [1, 1], [-0.0, 0.0]), sparse([1, 2], [1, 1], [0.0, -0.0]),
-    sparse([1, 2], [1, 1], [-0.0, 0.0], 3, 1), sparse([1, 2], [1, 1], [0.0, -0.0], 3, 1),
-    sparse([1, 3], [1, 1], [-0.0, 0.0], 3, 1), sparse([1, 3], [1, 1], [0.0, -0.0], 3, 1),
-    sparse([1, 2, 3], [1, 1, 1], [-1, 0, 1], 3, 1), sparse([1, 2, 3], [1, 1, 1], [-1.0, -0.0, 1.0], 3, 1),
-    sparse([1, 3], [1, 1], [-1, 0], 3, 1), sparse([1, 2], [1, 1], [-1, 0], 3, 1)
 ]
 
 for a in vals
@@ -152,7 +147,6 @@ for a in vals
     @test hash(convert(Array{Any}, a)) == hash(b)
     @test hash(convert(Array{supertype(eltype(a))}, a)) == hash(b)
     @test hash(convert(Array{Float64}, a)) == hash(b)
-    @test hash(sparse(a)) == hash(b)
     if !any(x -> isequal(x, -0.0), a)
         @test hash(convert(Array{Int}, a)) == hash(b)
         if all(x -> typemin(Int8) <= x <= typemax(Int8), a)
@@ -166,20 +160,6 @@ end
 @test hash(Any[Int8(127), Int8(-128), 129, 130]) ==
     hash([127, -128, 129, 130]) != hash([127,  128, 129, 130])
 
-# Test hashing sparse matrix with type which does not support -
-struct CustomHashReal
-    x::Float64
-end
-Base.hash(x::CustomHashReal, h::UInt) = hash(x.x, h)
-Base.:(==)(x::CustomHashReal, y::Number) = x.x == y
-Base.:(==)(x::Number, y::CustomHashReal) = x == y.x
-Base.zero(::Type{CustomHashReal}) = CustomHashReal(0.0)
-Base.zero(x::CustomHashReal) = zero(CustomHashReal)
-
-let a = sparse([CustomHashReal(0), CustomHashReal(3), CustomHashReal(3)])
-    @test hash(a) == hash(Array(a))
-end
-
 vals = Any[
     0.0:0.1:0.3, 0.3:-0.1:0.0,
     0:-1:1, 0.0:-1.0:1.0, 0.0:1.1:10.0, -4:10,
@@ -201,9 +181,9 @@ let a = QuoteNode(1), b = QuoteNode(1.0)
     @test (hash(a)==hash(b)) == (a==b)
 end
 
-let a = Expr(:block, Core.TypedSlot(1, Any)),
-    b = Expr(:block, Core.TypedSlot(1, Any)),
-    c = Expr(:block, Core.TypedSlot(3, Any))
+let a = Expr(:block, Core.SlotNumber(1)),
+    b = Expr(:block, Core.SlotNumber(1)),
+    c = Expr(:block, Core.SlotNumber(3))
     @test a == b && hash(a) == hash(b)
     @test a != c && hash(a) != hash(c)
     @test b != c && hash(b) != hash(c)
@@ -284,3 +264,43 @@ end
         end
     end
 end
+
+if Sys.WORD_SIZE >= 64
+    @testset "very large string" begin
+        N = 2^31+1
+        s = String('\0'^N);
+        objectid(s)
+    end
+end
+
+# Issue #49620
+let t1 = Tuple{AbstractVector,AbstractVector{<:Integer},UnitRange{<:Integer}},
+    t2 = Tuple{AbstractVector,AbstractVector{<:Integer},UnitRange{<:Integer}}
+    @test hash(t1) == hash(t2)
+    @test length(Set{Type}([t1, t2])) == 1
+end
+
+struct AUnionParam{T<:Union{Nothing,Float32,Float64}} end
+@test AUnionParam.body.hash == 0
+@test Type{AUnionParam}.hash != 0
+@test Type{AUnionParam{<:Union{Float32,Float64}}}.hash == 0
+@test Type{AUnionParam{<:Union{Nothing,Float32,Float64}}} === Type{AUnionParam}
+@test Type{AUnionParam.body}.hash == 0
+@test Type{Base.Broadcast.Broadcasted}.hash != 0
+
+
+@testset "issue 50628" begin
+    # test hashing of rationals that equal floats are equal to the float hash
+    @test hash(5//2) == hash(big(5)//2) == hash(2.5)
+    # test hashing of rational that are integers hash to the integer
+    @test hash(Int64(5)^25) == hash(big(5)^25) == hash(Int64(5)^25//1) == hash(big(5)^25//1)
+    # test integer/rational that don't fit in Float64 don't hash as Float64
+    @test hash(Int64(5)^25) != hash(5.0^25)
+    @test hash((Int64(5)//2)^25) == hash(big(5//2)^25)
+    # test integer/rational that don't fit in Float64 don't hash as Float64
+    @test hash((Int64(5)//2)^25) != hash(2.5^25)
+    # test hashing of rational with odd denominator
+    @test hash(5//3) == hash(big(5)//3)
+end
+
+@test Core.Compiler.is_foldable_nothrow(Base.infer_effects(hash, Tuple{Type{Int}, UInt}))
diff --git a/test/int.jl b/test/int.jl
index caabc7c343073..f79bc5a9781d0 100644
--- a/test/int.jl
+++ b/test/int.jl
@@ -124,7 +124,7 @@ end
 @test mod(123, UInt8) === 0x7b
 
 primitive type MyBitsType <: Signed 8 end
-@test_throws ErrorException ~reinterpret(MyBitsType, 0x7b)
+@test_throws MethodError ~reinterpret(MyBitsType, 0x7b)
 @test signed(MyBitsType) === MyBitsType
 
 UItypes = Base.BitUnsigned_types
@@ -199,6 +199,17 @@ end
                 @test val >> -scount === val << ucount
             end
         end
+        for T2 in Base.BitInteger_types
+            for op in (>>, <<, >>>)
+                if sizeof(T2)==sizeof(Int) || T <: Signed || (op==>>>) || T2 <: Unsigned
+                    @test Core.Compiler.is_foldable_nothrow(Base.infer_effects(op, (T, T2)))
+                else
+                    @test Core.Compiler.is_foldable(Base.infer_effects(op, (T, T2)))
+                    # #47835, TODO implement interval arithmetic analysis
+                    @test_broken Core.Compiler.is_nothrow(Base.infer_effects(op, (T, T2)))
+                end
+            end
+        end
     end
 end
 
@@ -289,6 +300,29 @@ end
     end
 end
 
+@testset "typemin typemax" begin
+    @test typemin(Int8   ) === Int8(-128)
+    @test typemax(Int8   ) === Int8(127)
+    @test typemin(UInt8  ) === UInt8(0)
+    @test typemax(UInt8  ) === UInt8(255)
+    @test typemin(Int16  ) === Int16(-32768)
+    @test typemax(Int16  ) === Int16(32767)
+    @test typemin(UInt16 ) === UInt16(0)
+    @test typemax(UInt16 ) === UInt16(65535)
+    @test typemin(Int32  ) === Int32(-2147483648)
+    @test typemax(Int32  ) === Int32(2147483647)
+    @test typemin(UInt32 ) === UInt32(0)
+    @test typemax(UInt32 ) === UInt32(4294967295)
+    @test typemin(Int64  ) === Int64(-9223372036854775808)
+    @test typemax(Int64  ) === Int64(9223372036854775807)
+    @test typemin(UInt64 ) === UInt64(0)
+    @test typemax(UInt64 ) === UInt64(0xffff_ffff_ffff_ffff)
+    @test typemin(UInt128) === UInt128(0)
+    @test typemax(UInt128) === UInt128(0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff)
+    @test typemin(Int128 ) === Int128(-170141183460469231731687303715884105728)
+    @test typemax(Int128 ) === Int128(170141183460469231731687303715884105727)
+end
+
 @testset "issue #15489" begin
     @test 0x00007ffea27edaa0 + (-40) === (-40) + 0x00007ffea27edaa0 === 0x00007ffea27eda78
     @test UInt64(1) * Int64(-1) === typemax(UInt64)
@@ -411,30 +445,6 @@ end
     @test bitreverse(Int32(456618293)) === Int32(-1399919400)
 end
 
-@testset "min/max of datatype" begin
-    @test typemin(Int8) === Int8(-128)
-    @test typemin(UInt8) === UInt8(0)
-    @test typemin(Int16) === Int16(-32768)
-    @test typemin(UInt16) === UInt16(0)
-    @test typemin(Int32) === Int32(-2147483648)
-    @test typemin(UInt32) === UInt32(0)
-    @test typemin(Int64) === Int64(-9223372036854775808)
-    @test typemin(UInt64) === UInt64(0)
-    @test typemin(Int128) === Int128(-170141183460469231731687303715884105728)
-    @test typemin(UInt128) === UInt128(0)
-
-    @test typemax(Int8) === Int8(127)
-    @test typemax(UInt8) === UInt8(255)
-    @test typemax(Int16) === Int16(32767)
-    @test typemax(UInt16) === UInt16(65535)
-    @test typemax(Int32) === Int32(2147483647)
-    @test typemax(UInt32) === UInt32(4294967295)
-    @test typemax(Int64) === Int64(9223372036854775807)
-    @test typemax(UInt64) === UInt64(0xffffffffffffffff)
-    @test typemax(Int128) === Int128(170141183460469231731687303715884105727)
-    @test typemax(UInt128) === UInt128(0xffffffffffffffffffffffffffffffff)
-end
-
 @testset "BitIntegerType" begin
     @test Int isa Base.BitIntegerType
     @test Base.BitIntegerType === Union{
diff --git a/test/intfuncs.jl b/test/intfuncs.jl
index c74e5be305a31..ed661b2806fb5 100644
--- a/test/intfuncs.jl
+++ b/test/intfuncs.jl
@@ -221,7 +221,7 @@ end
     @test_throws MethodError gcdx(MyOtherRational(2//3), MyOtherRational(3//4))
 end
 
-@testset "invmod" begin
+@testset "invmod(n, m)" begin
     @test invmod(6, 31) === 26
     @test invmod(-1, 3) === 2
     @test invmod(1, -3) === -2
@@ -256,6 +256,37 @@ end
     end
 end
 
+@testset "invmod(n)" begin
+    for T in (Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128)
+        if sizeof(T) ≤ 2
+            # test full domain for small types
+            for a = typemin(T)+true:T(2):typemax(T)
+                b = invmod(a)
+                @test a * b == 1
+            end
+        else
+            # test random sample for large types
+            for _ = 1:2^12
+                a = rand(T) | true
+                b = invmod(a)
+                @test a * b == 1
+            end
+        end
+    end
+end
+
+@testset "invmod(n, T)" begin
+    for S in (Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128),
+        T in (Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Int128,UInt128)
+        for _ = 1:2^8
+            a = rand(S) | true
+            b = invmod(a, T)
+            @test (a * b) % T == 1
+            @test (a % T) * b == 1
+        end
+    end
+end
+
 @testset "powermod" begin
     @test powermod(2, 3, 5) == 3
     @test powermod(2, 3, -5) == -2
@@ -267,6 +298,14 @@ end
     @test powermod(2, -2, 5) == 4
     @test powermod(2, -1, -5) == -2
     @test powermod(2, -2, -5) == -1
+
+    @test powermod(2, typemin(Int128), 5) == 1
+    @test powermod(2, typemin(Int128), -5) == -4
+
+    @test powermod(2, big(3), 5) == 3
+    @test powermod(2, big(3), -5) == -2
+    @inferred  powermod(2, -2, -5)
+    @inferred  powermod(big(2), -2, UInt(5))
 end
 
 @testset "nextpow/prevpow" begin
@@ -441,12 +480,42 @@ end
     end
 end
 
-@testset "leading_ones and count_zeros" begin
+@testset "leading_ones, count_zeros, etc." begin
     @test leading_ones(UInt32(Int64(2) ^ 32 - 2)) == 31
     @test leading_ones(1) == 0
     @test leading_zeros(Int32(1)) == 31
     @test leading_zeros(UInt32(Int64(2) ^ 32 - 2)) == 0
 
+    @test Base.top_set_bit(3) == 2
+    @test Base.top_set_bit(-Int64(17)) == 64
+    @test Base.top_set_bit(big(15)) != Base.top_set_bit(big(16)) == Base.top_set_bit(big(17)) == 5
+    @test_throws DomainError Base.top_set_bit(big(-17))
+
+    struct MyInt <: Integer
+        x::Int
+    end
+    MyInt(x::MyInt) = x
+    Base.:+(a::MyInt, b::MyInt) = a.x + b.x
+
+    for n in 0:100
+        x = ceil(Int, log2(n + 1))
+        @test x == Base.top_set_bit(Int128(n)) == Base.top_set_bit(unsigned(Int128(n)))
+        @test x == Base.top_set_bit(Int32(n)) == Base.top_set_bit(unsigned(Int64(n)))
+        @test x == Base.top_set_bit(Int8(n)) == Base.top_set_bit(unsigned(Int8(n)))
+        @test x == Base.top_set_bit(big(n))   # BigInt fallback
+        @test x == Base.top_set_bit(MyInt(n)) # generic fallback
+    end
+
+    for n in -10:-1
+        @test 128 == Base.top_set_bit(Int128(n)) == Base.top_set_bit(unsigned(Int128(n)))
+        @test 32  == Base.top_set_bit(Int32(n)) == Base.top_set_bit(unsigned(Int32(n)))
+        @test 8   == Base.top_set_bit(Int8(n)) == Base.top_set_bit(unsigned(Int8(n)))
+        @test_throws DomainError Base.top_set_bit(big(n))
+        # This error message should never be exposed to the end user anyway.
+        err = n == -1 ? InexactError : DomainError
+        @test_throws err Base.top_set_bit(MyInt(n))
+    end
+
     @test count_zeros(Int64(1)) == 63
 end
 
@@ -518,6 +587,14 @@ end
     for x in ((false,false), (false,true), (true,false), (true,true))
         @test binomial(x...) == (x != (false,true))
     end
+
+    # binomial(x,k) for non-integer x
+    @test @inferred(binomial(10.0,3)) === 120.0
+    @test @inferred(binomial(10//1,3)) === 120//1
+    @test binomial(2.5,3) ≈ 5//16 === binomial(5//2,3)
+    @test binomial(2.5,0) == 1.0
+    @test binomial(35.0, 30) ≈ binomial(35, 30) # naive method overflows
+    @test binomial(2.5,-1) == 0.0
 end
 
 # concrete-foldability
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index 48c5bed6abb36..8e4ab932f5eb6 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -107,16 +107,28 @@ end
 
 const ReplaceType = ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
 
+@testset "elsize(::Type{<:Ptr})" begin
+    @test Base.elsize(Ptr{Any}) == sizeof(Int)
+    @test Base.elsize(Ptr{NTuple{3,Int8}}) == 3
+    @test Base.elsize(Ptr{Cvoid}) == 0
+    @test Base.elsize(Ptr{Base.RefValue{Any}}) == sizeof(Int)
+    @test Base.elsize(Ptr{Int}) == sizeof(Int)
+    @test_throws MethodError Base.elsize(Ptr)
+    @test_throws ErrorException Base.elsize(Ptr{Ref{Int}})
+    @test_throws ErrorException Base.elsize(Ptr{Ref})
+    @test_throws ErrorException Base.elsize(Ptr{Complex})
+end
+
 # issue #29929
 let p = Ptr{Nothing}(0)
     @test unsafe_store!(p, nothing) === C_NULL
     @test unsafe_load(p) === nothing
-    @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing
-    @test Core.Intrinsics.atomic_pointerset(p, nothing, :sequentially_consistent) === p
-    @test Core.Intrinsics.atomic_pointerswap(p, nothing, :sequentially_consistent) === nothing
-    @test Core.Intrinsics.atomic_pointermodify(p, (i, j) -> j, nothing, :sequentially_consistent) === Pair(nothing, nothing)
-    @test Core.Intrinsics.atomic_pointerreplace(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true))
-    @test Core.Intrinsics.atomic_pointerreplace(p, missing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false))
+    @test unsafe_load(p, :sequentially_consistent) === nothing
+    @test unsafe_store!(p, nothing, :sequentially_consistent) === p
+    @test unsafe_swap!(p, nothing, :sequentially_consistent) === nothing
+    @test unsafe_modify!(p, (i, j) -> j, nothing, :sequentially_consistent) === Pair(nothing, nothing)
+    @test unsafe_replace!(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true))
+    @test unsafe_replace!(p, missing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false))
 end
 
 struct GhostStruct end
@@ -148,7 +160,6 @@ end
     @test_intrinsic Core.Intrinsics.sub_float Float16(3.3) Float16(2) Float16(1.301)
     @test_intrinsic Core.Intrinsics.mul_float Float16(3.3) Float16(2) Float16(6.6)
     @test_intrinsic Core.Intrinsics.div_float Float16(3.3) Float16(2) Float16(1.65)
-    @test_intrinsic Core.Intrinsics.rem_float Float16(3.3) Float16(2) Float16(1.301)
 
     # ternary
     @test_intrinsic Core.Intrinsics.fma_float Float16(3.3) Float16(4.4) Float16(5.5) Float16(20.02)
@@ -169,6 +180,14 @@ end
     @test_intrinsic Core.Intrinsics.fptoui UInt Float16(3.3) UInt(3)
 end
 
+@testset "Float16 intrinsics (crt)" begin
+    gnu_h2f_ieee(x::Float16) = ccall("julia__gnu_h2f_ieee", Float32, (Float16,), x)
+    gnu_f2h_ieee(x::Float32) = ccall("julia__gnu_f2h_ieee", Float16, (Float32,), x)
+
+    @test gnu_h2f_ieee(Float16(3.3)) == 3.3007812f0
+    @test gnu_f2h_ieee(3.3f0) == Float16(3.3)
+end
+
 using Base.Experimental: @force_compile
 @test_throws ConcurrencyViolationError("invalid atomic ordering") (@force_compile; Core.Intrinsics.atomic_fence(:u)) === nothing
 @test_throws ConcurrencyViolationError("invalid atomic ordering") (@force_compile; Core.Intrinsics.atomic_fence(Symbol("u", "x"))) === nothing
@@ -191,54 +210,90 @@ swap(i, j) = j
 for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Complex{Int512}, Any)
     r = Ref{TT}(10)
     GC.@preserve r begin
-        (function (::Type{TT}) where TT
+        (@noinline function (::Type{TT}) where TT
             p = Base.unsafe_convert(Ptr{TT}, r)
             T(x) = convert(TT, x)
             S = UInt32
             if TT !== Any
                 @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent)
-                @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent)
-                @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent)
+                @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(2), :sequentially_consistent)
+                @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(10), S(3), :sequentially_consistent, :sequentially_consistent)
             end
             @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[]
             if sizeof(r) > 8
-                @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
-                @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent)
-                @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerswap(p, T(100), :sequentially_consistent)
-                @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
-                @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
-                @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, T(100), T(2), :sequentially_consistent, :sequentially_consistent)
-                @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") unsafe_load(p, :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") unsafe_store!(p, T(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") unsafe_swap!(p, T(100), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") unsafe_modify!(p, add, T(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") unsafe_modify!(p, swap, S(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") unsafe_replace!(p, T(100), T(2), :sequentially_consistent, :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") unsafe_replace!(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent)
                 @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[]
             else
-                TT !== Any && @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
-                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(10)
-                @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p
-                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(1)
-                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(1), true))
-                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100)
-                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(100), false))
-                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100)
-                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(100), T(101))
-                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(101), T(102))
-                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(102)
-                @test Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent) === T(102)
-                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(103), false))
-                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(103)
+                if TT !== Any
+                    @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
+                    @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, Returns(S(5)), T(10), :sequentially_consistent)
+                end
+                @test unsafe_load(p, :sequentially_consistent) === T(10)
+                @test unsafe_store!(p, T(1), :sequentially_consistent) === p
+                @test unsafe_load(p, :sequentially_consistent) === T(1)
+                @test unsafe_replace!(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(1), true))
+                @test unsafe_load(p, :sequentially_consistent) === T(100)
+                @test unsafe_replace!(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(100), false))
+                @test unsafe_load(p, :sequentially_consistent) === T(100)
+                @test unsafe_modify!(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(100), T(101))
+                @test unsafe_modify!(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(101), T(102))
+                @test unsafe_load(p, :sequentially_consistent) === T(102)
+                @test unsafe_swap!(p, T(103), :sequentially_consistent) === T(102)
+                @test unsafe_replace!(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(103), false))
+                @test unsafe_load(p, :sequentially_consistent) === T(103)
+                @test unsafe_modify!(p, Returns(T(105)), nothing, :sequentially_consistent) === Pair{TT,TT}(T(103), T(105))
+                @test unsafe_load(p, :sequentially_consistent) === T(105)
             end
             if TT === Any
-                @test Core.Intrinsics.atomic_pointermodify(p, swap, S(103), :sequentially_consistent) === Pair{TT,TT}(T(103), S(103))
-                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === S(103)
-                @test Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) === p
-                @test Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) === S(1)
-                @test Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), false))
-                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), true))
-                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(2)
+                @test unsafe_modify!(p, swap, S(105), :sequentially_consistent) === Pair{TT,TT}(T(105), S(105))
+                @test unsafe_load(p, :sequentially_consistent) === S(105)
+                @test unsafe_store!(p, S(1), :sequentially_consistent) === p
+                @test unsafe_swap!(p, S(100), :sequentially_consistent) === S(1)
+                @test unsafe_replace!(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), false))
+                @test unsafe_replace!(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), true))
+                @test unsafe_load(p, :sequentially_consistent) === T(2)
             end
         end)(TT,)
     end
 end
 
+for TT in (Ptr{Nothing}, Ptr)
+    r = Ref(nothing)
+    GC.@preserve r begin
+        p = Ref{TT}(Base.unsafe_convert(Ptr{Nothing}, r))
+        (@noinline function (p::Ref)
+            p = p[]
+            S = UInt32
+            @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent)
+            @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent)
+            @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, nothing, S(2), :sequentially_consistent, :sequentially_consistent)
+            @test Core.Intrinsics.pointerref(p, 1, 1) === nothing === r[]
+            @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
+            @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, Returns(S(1)), nothing, :sequentially_consistent)
+            @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing
+            @test Core.Intrinsics.atomic_pointerset(p, nothing, :sequentially_consistent) === p
+            @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing
+            @test Core.Intrinsics.atomic_pointerreplace(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true))
+            @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing
+            @test Core.Intrinsics.atomic_pointerreplace(p, S(1), nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false))
+            @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing
+            @test Core.Intrinsics.atomic_pointermodify(p, Returns(nothing), nothing, :sequentially_consistent) === Pair{Nothing,Nothing}(nothing, nothing)
+            @test Core.Intrinsics.atomic_pointermodify(p, Returns(nothing), S(1), :sequentially_consistent) === Pair{Nothing,Nothing}(nothing, nothing)
+            @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing
+            @test Core.Intrinsics.atomic_pointerswap(p, nothing, :sequentially_consistent) === nothing
+            @test Core.Intrinsics.atomic_pointerreplace(p, S(100), nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false))
+            @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing
+        end)(p,)
+    end
+end
+
+
 mutable struct IntWrap <: Signed
     x::Int
 end
@@ -255,38 +310,38 @@ Base.show(io::IO, a::IntWrap) = print(io, "IntWrap(", a.x, ")")
         @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent)
         @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent)
         @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent)
-        r2 = Core.Intrinsics.pointerref(p, 1, 1)
+        r2 = unsafe_load(p, 1)
         @test r2 isa IntWrap && r2.x === 10 === r[].x && r2 !== r[]
         @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
-        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        r2 = unsafe_load(p, :sequentially_consistent)
         @test r2 isa IntWrap && r2.x === 10 === r[].x && r2 !== r[]
-        @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p
-        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test unsafe_store!(p, T(1), :sequentially_consistent) === p
+        r2 = unsafe_load(p, :sequentially_consistent)
         @test r2 isa IntWrap && r2.x === 1 === r[].x && r2 !== r[]
-        r2, succ = Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent)
+        r2, succ = unsafe_replace!(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent)
         @test r2 isa IntWrap && r2.x === 1 && r[].x === 100 && r2 !== r[]
         @test succ
-        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        r2 = unsafe_load(p, :sequentially_consistent)
         @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[]
-        r2, succ = Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent)
+        r2, succ = unsafe_replace!(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent)
         @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[]
         @test !succ
-        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        r2 = unsafe_load(p, :sequentially_consistent)
         @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[]
-        r2, r3 = Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
+        r2, r3 = unsafe_modify!(p, add, T(1), :sequentially_consistent)
         @test r2 isa IntWrap && r2.x === 100 !== r[].x && r2 !== r[]
         @test r3 isa IntWrap && r3.x === 101 === r[].x && r3 !== r[]
-        r2, r3 = Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
+        r2, r3 = unsafe_modify!(p, add, T(1), :sequentially_consistent)
         @test r2 isa IntWrap && r2.x === 101 !== r[].x && r2 !== r[]
         @test r3 isa IntWrap && r3.x === 102 === r[].x && r3 !== r[]
-        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        r2 = unsafe_load(p, :sequentially_consistent)
         @test r2 isa IntWrap && r2.x === 102 === r[].x && r2 !== r[]
-        r2 = Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent)
+        r2 = unsafe_swap!(p, T(103), :sequentially_consistent)
         @test r2 isa IntWrap && r2.x === 102 !== r[].x && r[].x == 103 && r2 !== r[]
-        r2, succ = Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent)
+        r2, succ = unsafe_replace!(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent)
         @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[]
         @test !succ
-        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        r2 = unsafe_load(p, :sequentially_consistent)
         @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[]
     end
 end)()
diff --git a/test/iobuffer.jl b/test/iobuffer.jl
index d8211aa7086b3..ec77903b4a5b8 100644
--- a/test/iobuffer.jl
+++ b/test/iobuffer.jl
@@ -348,3 +348,12 @@ end
 @testset "bytesavailable devnull" begin
     @test bytesavailable(devnull) == 0
 end
+
+@testset "#48188 read_sub for non Array AbstractArray" begin
+    a = [0,0,0]
+    v = @view a[1:2]
+    io = IOBuffer()
+    write(io,1)
+    seek(io,0)
+    @test Base.read_sub(io,v,1,1) == [1,0]
+end
diff --git a/test/iostream.jl b/test/iostream.jl
index bc4751fb1fca7..4ba2423f0f558 100644
--- a/test/iostream.jl
+++ b/test/iostream.jl
@@ -119,6 +119,24 @@ end
     end
 end
 
+@testset "read!/write(::IO, A::StridedArray)" begin
+    s1 = reshape(view(rand(UInt8, 16), 1:16), 2, 2, 2, 2)
+    s2 = view(s1, 1:2, 1:2, 1:2, 1:2)
+    s3 = view(s1, 1:2, 1:2, 1, 1:2)
+    mktemp() do path, io
+        b = Vector{UInt8}(undef, 17)
+        for s::StridedArray in (s3, s1, s2)
+            @test write(io, s) == length(s)
+            seek(io, 0)
+            @test readbytes!(io, b) == length(s)
+            seek(io, 0)
+            @test view(b, 1:length(s)) == vec(s)
+            @test read!(io, fill!(deepcopy(s), 0)) == s
+            seek(io, 0)
+        end
+    end
+end
+
 @test Base.open_flags(read=false, write=true, append=false) == (read=false, write=true, create=true, truncate=true, append=false)
 
 @testset "issue #30978" begin
diff --git a/test/iterators.jl b/test/iterators.jl
index 453f27ca8885c..46e7c8b454335 100644
--- a/test/iterators.jl
+++ b/test/iterators.jl
@@ -11,6 +11,10 @@ using Dates: Date, Day
 # issue #4718
 @test collect(Iterators.filter(x->x[1], zip([true, false, true, false],"abcd"))) == [(true,'a'),(true,'c')]
 
+# issue #45085
+@test_throws ArgumentError Iterators.reverse(zip("abc", "abcd"))
+@test_throws ArgumentError Iterators.reverse(zip("abc", Iterators.cycle("ab")))
+
 let z = zip(1:2)
     @test size(z) == (2,)
     @test collect(z) == [(1,), (2,)]
@@ -326,6 +330,8 @@ let itr
     @test collect(itr) == Int[] # Stateful do not preserve shape
     itr = (i-1 for i in Base.Stateful(zeros(Int, 0, 0)))
     @test collect(itr) == Int[] # Stateful do not preserve shape
+    itr = Iterators.Stateful(Iterators.Stateful(1:1))
+    @test collect(itr) == [1]
 end
 
 # with 1D inputs
@@ -334,21 +340,25 @@ let a = 1:2,
     c = Int32(1):Int32(0)
 
     # length
+    @test length(product())        == 1
     @test length(product(a))       == 2
     @test length(product(a, b))    == 20
     @test length(product(a, b, c)) == 0
 
     # size
+    @test size(product())          == tuple()
     @test size(product(a))         == (2,)
     @test size(product(a, b))      == (2, 10)
     @test size(product(a, b, c))   == (2, 10, 0)
 
     # eltype
+    @test eltype(product())        == Tuple{}
     @test eltype(product(a))       == Tuple{Int}
     @test eltype(product(a, b))    == Tuple{Int, Float64}
     @test eltype(product(a, b, c)) == Tuple{Int, Float64, Int32}
 
     # ndims
+    @test ndims(product())         == 0
     @test ndims(product(a))        == 1
     @test ndims(product(a, b))     == 2
     @test ndims(product(a, b, c))  == 3
@@ -423,6 +433,8 @@ let a = 1:2,
         @test_throws ArgumentError   size(product(itr))
         @test_throws ArgumentError  ndims(product(itr))
     end
+
+    @test_throws OverflowError length(product(1:typemax(Int), 1:typemax(Int)))
 end
 
 # IteratorSize trait business
@@ -610,7 +622,7 @@ end
             @test length(I) == iterate_length(I) == simd_iterate_length(I) == simd_trip_count(I)
             @test collect(I) == iterate_elements(I) == simd_iterate_elements(I) == index_elements(I)
         end
-        @test all(Base.Splat(==), zip(Iterators.flatten(map(collect, P)), iter))
+        @test all(Base.splat(==), zip(Iterators.flatten(map(collect, P)), iter))
     end
 end
 @testset "empty/invalid partitions" begin
@@ -841,6 +853,8 @@ end
         v, s = iterate(z)
         @test Base.isdone(z, s)
     end
+    # Stateful wrapping mutable iterators of known length (#43245)
+    @test length(Iterators.Stateful(Iterators.Stateful(1:5))) == 5
 end
 
 @testset "pair for Svec" begin
@@ -959,7 +973,7 @@ end
     @test Base.IteratorSize(zip(1:5, (1,2,3)) ) == Base.HasLength()         # for zip of ::HasShape and ::HasLength
 end
 
-@testset "proper patition for non-1-indexed vector" begin
+@testset "proper partition for non-1-indexed vector" begin
     @test partition(IdentityUnitRange(11:19), 5) |> collect == [11:15,16:19] # IdentityUnitRange
 end
 
@@ -983,3 +997,15 @@ end
     @test !Base.isdone(gen)
     @test collect(gen) == ["foo"]
 end
+
+@testset "empty product iterators" begin
+    v = nothing
+    for (z,) in zip(Iterators.product())
+        v = z
+    end
+    @test v == ()
+end
+
+@testset "collect partition substring" begin
+    @test collect(Iterators.partition(lstrip("01111", '0'), 2)) == ["11", "11"]
+end
diff --git a/test/keywordargs.jl b/test/keywordargs.jl
index 9cbae2b1a0b19..43013ab1d721e 100644
--- a/test/keywordargs.jl
+++ b/test/keywordargs.jl
@@ -181,7 +181,7 @@ end
     @test test4538_2(x=2) == 2
 
     # that, but in a module
-    @Foo4538.TEST()
+    Foo4538.@TEST()
     @test test4538_foo_2() == 1
     @test test4538_foo_2(x=2) == 2
 
@@ -288,7 +288,7 @@ end
 end
 @testset "issue #21510" begin
     f21510(; @nospecialize a = 2) = a
-    @test f21510(a=:b) == :b
+    @test f21510(a=:b) === :b
     @test f21510() == 2
 end
 @testset "issue #34516" begin
@@ -297,7 +297,7 @@ end
     @test_throws UndefKeywordError f34516()
     @test_throws UndefKeywordError f34516(1)
     g34516(@nospecialize(x); k=0) = 0
-    @test first(methods(Core.kwfunc(g34516))).nospecialize != 0
+    @test only(methods(Core.kwcall, (Any, typeof(g34516), Vararg))).nospecialize != 0
 end
 @testset "issue #21518" begin
     a = 0
@@ -387,3 +387,16 @@ f41416(a...="a"; b=true) = (b, a)
 @test f41416(;b=false)   === (false, ("a",))
 @test f41416(33)         === (true, (33,))
 @test f41416(3; b=false) === (false, (3,))
+
+Core.kwcall(i::Int) = "hi $i"
+let m = first(methods(Core.kwcall, (NamedTuple,typeof(kwf1),Vararg)))
+    @test m.name === :kwf1
+    @test Core.kwcall(1) == "hi 1"
+    @test which(Core.kwcall, (Int,)).name === :kwcall
+end
+
+# issue #50518
+function f50518(xs...=["a", "b", "c"]...; debug=false)
+    return xs[1]
+end
+@test f50518() == f50518(;debug=false) == "a"
diff --git a/test/llvmcall.jl b/test/llvmcall.jl
index b7f78205ec856..98968bfcdf8bc 100644
--- a/test/llvmcall.jl
+++ b/test/llvmcall.jl
@@ -147,6 +147,10 @@ module ObjLoadTest
     using Base: llvmcall, @ccallable
     using Test
     didcall = false
+    """    jl_the_callback()
+
+    Sets the global didcall when it did the call
+    """
     @ccallable Cvoid function jl_the_callback()
         global didcall
         didcall = true
@@ -205,6 +209,23 @@ module CcallableRetTypeTest
     @test do_the_call() === 42.0
 end
 
+# Issue #48093 - test that non-external globals are not deduplicated
+function kernel()
+    Base.llvmcall(("""
+        @shmem = internal global i8 0, align 8
+        define void @entry() {
+            store i8 1, i8* @shmem
+            ret void
+        }""", "entry"), Cvoid, Tuple{})
+    Base.llvmcall(("""
+        @shmem = internal global i8 0, align 8
+        define i8 @entry() {
+            %1 = load i8, i8* @shmem
+            ret i8 %1
+        }""", "entry"), UInt8, Tuple{})
+end
+@test kernel() == 0x00
+
 # If this test breaks, you've probably broken Cxx.jl - please check
 module LLVMCallFunctionTest
     using Base: llvmcall
@@ -247,5 +268,7 @@ MyStruct(kern) = MyStruct(kern, reinterpret(Core.LLVMPtr{UInt8,1}, 0))
 MyStruct() = MyStruct(0)
 s = MyStruct()
 
+# ensure LLVMPtr properly subtypes
+@test eltype(supertype(Core.LLVMPtr{UInt8,1})) <: UInt8
 @test s.kern == 0
 @test reinterpret(Int, s.ptr) == 0
diff --git a/test/llvmcall2.jl b/test/llvmcall2.jl
index 8926b962a35c6..e3e89bb916f2d 100644
--- a/test/llvmcall2.jl
+++ b/test/llvmcall2.jl
@@ -60,3 +60,25 @@ let err = ErrorException("llvmcall only supports intrinsic calls")
     @test_throws err (@eval ccall("llvm.floor.f64", llvmcall, Float64, (Float64, Float64...,), 0.0)) === 0.0
     @test_throws err (@eval ccall("llvm.floor", llvmcall, Float64, (Float64, Float64...,), 0.0)) === 0.0
 end
+
+@testset "JLJIT API" begin
+    function JLJITGetJuliaOJIT()
+        ccall(:JLJITGetJuliaOJIT, Ptr{Cvoid}, ())
+    end
+    function JLJITGetTripleString(JIT)
+        ccall(:JLJITGetTripleString, Cstring, (Ptr{Cvoid},), JIT)
+    end
+    jit = JLJITGetJuliaOJIT()
+    str = JLJITGetTripleString(jit)
+    jl_str = unsafe_string(str)
+    @test length(jl_str) > 4
+end
+
+
+# boolean structs
+const NT4I = NTuple{4, VecElement{Int}}
+const NT4B = NTuple{4, VecElement{Bool}}
+f_nt4b(x, y) = ccall("llvm.sadd.with.overflow", llvmcall, Pair{NT4B, NT4B}, (NT4B, NT4B), x, y)
+f_nt4i(x, y) = ccall("llvm.sadd.with.overflow", llvmcall, Pair{NT4I, NT4B}, (NT4I, NT4I), x, y)
+@test f_nt4b((false, true, false, true), (false, false, true, true)) === (NT4B((false, true, true, false)) => NT4B((false, false, false, true)))
+@test f_nt4i((typemin(Int), 0, typemax(Int), typemax(Int)), (-1, typemax(Int),-1, 1)) === (NT4I((typemax(Int), typemax(Int), typemax(Int)-1, typemin(Int))) => NT4B((true, false, false, true)))
diff --git a/test/llvmpasses/.gitignore b/test/llvmpasses/.gitignore
index aa144c71f85f8..4b99de76c491b 100644
--- a/test/llvmpasses/.gitignore
+++ b/test/llvmpasses/.gitignore
@@ -1 +1,2 @@
 /Output/
+.lit_test_times.txt
\ No newline at end of file
diff --git a/test/llvmpasses/Makefile b/test/llvmpasses/Makefile
index a0b9cf977ede8..7318d1b67da02 100644
--- a/test/llvmpasses/Makefile
+++ b/test/llvmpasses/Makefile
@@ -4,11 +4,30 @@ include $(JULIAHOME)/Make.inc
 
 check: .
 
-TESTS = $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.jl $(SRCDIR)/*.ll))
+TESTS_ll := $(filter-out update-%,$(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.ll)))
+TESTS_jl := $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.jl))
+TESTS := $(TESTS_ll) $(TESTS_jl)
 
 . $(TESTS):
+	$(MAKE) -C $(JULIAHOME)/deps install-llvm-tools
 	PATH=$(build_bindir):$(build_depsbindir):$$PATH \
 	LD_LIBRARY_PATH=${build_libdir}:$$LD_LIBRARY_PATH \
-	$(build_depsbindir)/lit/lit.py -v $(addprefix $(SRCDIR)/,$@)
+	$(build_depsbindir)/lit/lit.py -v "$(addprefix $(SRCDIR)/,$@)"
 
-.PHONY: $(TESTS) check all .
+$(addprefix update-,$(TESTS_ll)):
+	@echo 'NOTE: This requires a llvm source files locally, such as via `make -C deps USE_BINARYBUILDER_LLVM=0 DEPS_GIT=llvm checkout-llvm`'
+	@read -p "$$(printf $(WARNCOLOR)'This will directly modify %s, are you sure you want to proceed? '$(ENDCOLOR) '$@')" REPLY && [ yy = "y$$REPLY" ]
+	sed -e 's/%shlibext/.$(SHLIB_EXT)/g' < "$(@:update-%=$(SRCDIR)/%)" > "$@"
+	PATH=$(build_bindir):$(build_depsbindir):$$PATH \
+	LD_LIBRARY_PATH=${build_libdir}:$$LD_LIBRARY_PATH \
+	$(JULIAHOME)/deps/srccache/llvm/llvm/utils/update_test_checks.py "$@" \
+	--preserve-names
+	mv "$@" "$(@:update-%=$(SRCDIR)/%)"
+
+update-help:
+	PATH=$(build_bindir):$(build_depsbindir):$$PATH \
+	LD_LIBRARY_PATH=${build_libdir}:$$LD_LIBRARY_PATH \
+	$(JULIAHOME)/deps/srccache/llvm/llvm/utils/update_test_checks.py \
+	--help
+
+.PHONY: $(TESTS) $(addprefix update-,$(TESTS_ll)) check all .
diff --git a/test/llvmpasses/aliasscopes.jl b/test/llvmpasses/aliasscopes.jl
index 5c0fe48091ade..751e351dfad1e 100644
--- a/test/llvmpasses/aliasscopes.jl
+++ b/test/llvmpasses/aliasscopes.jl
@@ -18,8 +18,8 @@ import Base.Experimental: Const, @aliasscope
 function simple(A, B)
     @aliasscope @inbounds for I in eachindex(A, B)
         A[I] = Const(B)[I]
-# CHECK: load double, {{.*}} !alias.scope [[SCOPE:![0-9]+]]
-# CHECK: store double {{.*}} !noalias [[SCOPE]]
+# CHECK: load double, {{.*}} !alias.scope [[SCOPE_LD:![0-9]+]]
+# CHECK: store double {{.*}} !noalias [[SCOPE_ST:![0-9]+]]
     end
     return 0 # return nothing causes japi1
 end
@@ -28,8 +28,8 @@ end
 function constargs(A, B::Const)
     @aliasscope @inbounds for I in eachindex(A, B)
         A[I] = B[I]
-# CHECK: load double, {{.*}} !alias.scope [[SCOPE2:![0-9]+]]
-# CHECK: store double {{.*}} !noalias [[SCOPE2]]
+# CHECK: load double, {{.*}} !alias.scope [[SCOPE2_LD:![0-9]+]]
+# CHECK: store double {{.*}} !noalias [[SCOPE2_ST:![0-9]+]]
     end
     return 0
 end
@@ -40,10 +40,10 @@ function micro_ker!(AB, Ac, Bc, kc, offSetA, offSetB)
     @inbounds @aliasscope for k in 1:kc
         for j in 1:NR, i in 1:MR
             AB[i+(j-1)*MR] = muladd(Const(Ac)[offSetA+i], Const(Bc)[offSetB+j], Const(AB)[i+(j-1)*MR])
-# CHECK: load double, {{.*}} !alias.scope [[SCOPE3:![0-9]+]]
-# CHECK: load double, {{.*}} !alias.scope [[SCOPE3]]
-# CHECK: load double, {{.*}} !alias.scope [[SCOPE3]]
-# CHECK: store double {{.*}} !noalias [[SCOPE3]]
+# CHECK: load double, {{.*}} !alias.scope [[SCOPE3_LD:![0-9]+]]
+# CHECK: load double, {{.*}} !alias.scope [[SCOPE3_LD]]
+# CHECK: load double, {{.*}} !alias.scope [[SCOPE3_LD]]
+# CHECK: store double {{.*}} !noalias [[SCOPE3_ST:![0-9]+]]
         end
         offSetA += MR
         offSetB += NR
@@ -51,9 +51,14 @@ function micro_ker!(AB, Ac, Bc, kc, offSetA, offSetB)
     return
 end
 
-# CHECK: [[SCOPE]] = !{[[ALIASSCOPE:![0-9]+]]}
-# CHECK: [[ALIASSCOPE]] = !{!"aliasscope", [[MDNODE:![0-9]+]]}
-# CHECK: [[MDNODE]] = !{!"simple"}
+# CHECK-DAG: [[SCOPE_LD]] = !{[[ALIASSCOPE:![0-9]+]]
+# CHECK-DAG: [[SCOPE_ST]] = !{[[ALIASSCOPE]]
+# CHECK-DAG: [[SCOPE2_LD]] = !{[[ALIASSCOPE2:![0-9]+]]
+# CHECK-DAG: [[SCOPE2_ST]] = !{[[ALIASSCOPE2]]
+# CHECK-DAG: [[SCOPE3_LD]] = !{[[ALIASSCOPE3:![0-9]+]]
+# CHECK-DAG: [[SCOPE3_ST]] = !{[[ALIASSCOPE3]]
+# CHECK-DAG: [[ALIASSCOPE]] = !{!"aliasscope", [[MDNODE:![0-9]+]]}
+# CHECK-DAG: [[MDNODE]] = !{!"simple"}
 
 emit(simple, Vector{Float64}, Vector{Float64})
 emit(constargs, Vector{Float64}, Const{Float64, 1})
diff --git a/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll b/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll
new file mode 100644
index 0000000000000..9f9dc7056152a
--- /dev/null
+++ b/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll
@@ -0,0 +1,41 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+
+target triple = "amdgcn-amd-amdhsa"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
+
+@tag = external addrspace(10) global {}
+
+declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
+declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*)
+declare {}* @julia.pointer_from_objref({} addrspace(11)*)
+
+; Test that non-0 addrspace allocas are properly emitted and handled
+
+; CHECK-LABEL: @non_zero_addrspace
+; TYPED: %1 = alloca i32, align 8, addrspace(5)
+
+; TYPED: %2 = bitcast i32 addrspace(5)* %1 to i8 addrspace(5)*
+; TYPED: %var1 = bitcast i8 addrspace(5)* %2 to {} addrspace(5)*
+; TYPED: %3 = addrspacecast {} addrspace(5)* %var1 to {}*
+; TYPED: call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* %2)
+
+; OPAQUE: %var1 = alloca i32, align 8, addrspace(5)
+; OPAQUE: %1 = addrspacecast ptr addrspace(5) %var1 to ptr
+; OPAQUE: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %var1)
+
+; CHECK: ret void
+define void @non_zero_addrspace() {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+  %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 4, {} addrspace(10)* @tag)
+  %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)*
+  %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2)
+  ret void
+}
+; CHECK-LABEL: }{{$}}
diff --git a/test/llvmpasses/alloc-opt-gcframe.jl b/test/llvmpasses/alloc-opt-gcframe.ll
similarity index 50%
rename from test/llvmpasses/alloc-opt-gcframe.jl
rename to test/llvmpasses/alloc-opt-gcframe.ll
index 3b5fc3a51a606..e01bd900c71e7 100644
--- a/test/llvmpasses/alloc-opt-gcframe.jl
+++ b/test/llvmpasses/alloc-opt-gcframe.ll
@@ -1,49 +1,52 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
+; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S - | FileCheck %s
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S - | FileCheck %s
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
 
-isz = sizeof(UInt) == 8 ? "i64" : "i32"
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
-println("""
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 @tag = external addrspace(10) global {}
-""")
-
-# CHECK-LABEL: @return_obj
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK: %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-# CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 15
-# CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-# CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-# CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-# CHECK-NEXT: %v = call noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8* [[ptls_i8]], i32 [[SIZE_T:[0-9]+]], i32 16)
-# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
-println("""
+
+; CHECK-LABEL: @return_obj
+; CHECK-NOT: @julia.gc_alloc_obj
+
+; TYPED: %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+; TYPED: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
+; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
+; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
+; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
+; TYPED-NEXT: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8* [[ptls_i8]], i32 [[SIZE_T:[0-9]+]], i32 16, i64 {{.*}} @tag {{.*}})
+; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
+
+; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %gcstack, i64 -12
+; OPAQUE: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
+; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; OPAQUE-NEXT: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc_instrumented(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16, i64 {{.*}} @tag {{.*}})
+; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
+
 define {} addrspace(10)* @return_obj() {
   %pgcstack = call {}*** @julia.get_pgcstack()
   %gcstack = bitcast {}*** %pgcstack to {}**
   %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
   ret {} addrspace(10)* %v
 }
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @return_load
-# CHECK: alloca i64
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK-NOT: @jl_gc_pool_alloc
-# CHECK: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
-# CHECK-NOT: @tag
-# CHECK-NOT: @llvm.lifetime.end
-println("""
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @return_load
+; CHECK: alloca i64
+; CHECK-NOT: @julia.gc_alloc_obj
+; CHECK-NOT: @jl_gc_pool_alloc
+; TYPED: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
+; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr
+; CHECK-NOT: @tag
+; CHECK-NOT: @llvm.lifetime.end
 define i64 @return_load(i64 %i) {
   %pgcstack = call {}*** @julia.get_pgcstack()
   %gcstack = bitcast {}*** %pgcstack to {}**
   %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
   %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
   %v64a11 = addrspacecast i64 addrspace(10)* %v64 to i64 addrspace(11)*
   store i64 %i, i64 addrspace(11)* %v64a11, align 16, !tbaa !4
@@ -51,43 +54,46 @@ define i64 @return_load(i64 %i) {
   %l = load i64, i64 addrspace(11)* %v64a11, align 16, !tbaa !4
   ret i64 %l
 }
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @ccall_obj
-# CHECK: call {}*** @julia.get_pgcstack()
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK: @ijl_gc_pool_alloc
-# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
-println("""
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @ccall_obj
+; TYPED: call {}*** @julia.get_pgcstack()
+; OPAQUE: call ptr @julia.get_pgcstack()
+; CHECK-NOT: @julia.gc_alloc_obj
+; CHECK: @ijl_gc_pool_alloc
+; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
+; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 define void @ccall_obj(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
   %gcstack = bitcast {}*** %pgcstack to {}**
   %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
   %f = bitcast i8* %fptr to void ({} addrspace(10)*)*
   call void %f({} addrspace(10)* %v)
   ret void
 }
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @ccall_ptr
-# CHECK: alloca i64
-# CHECK: call {}*** @julia.get_pgcstack()
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK-NOT: @jl_gc_pool_alloc
-# CHECK: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
-# CHECK: %f = bitcast i8* %fptr to void (i8*)*
-# Currently the GC frame lowering pass strips away all operand bundles
-# CHECK-NEXT: call void %f(i8*
-# CHECK-NEXT: ret void
-println("""
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @ccall_ptr
+; CHECK: alloca i64
+; TYPED: call {}*** @julia.get_pgcstack()
+; OPAQUE: call ptr @julia.get_pgcstack()
+; CHECK-NOT: @julia.gc_alloc_obj
+; CHECK-NOT: @jl_gc_pool_alloc
+; TYPED: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
+; TYPED: %f = bitcast i8* %fptr to void (i8*)*
+
+; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr
+; OPAQUE: %f = bitcast ptr %fptr to ptr
+; Currently the GC frame lowering pass strips away all operand bundles
+; TYPED-NEXT: call void %f(i8*
+; OPAQUE-NEXT: call void %f(ptr
+; CHECK-NEXT: ret void
 define void @ccall_ptr(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
   %gcstack = bitcast {}*** %pgcstack to {}**
   %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
   %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
   %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va)
   %ptr = bitcast {}* %ptrj to i8*
@@ -95,20 +101,20 @@ define void @ccall_ptr(i8* %fptr) {
   call void %f(i8* %ptr) [ "jl_roots"({} addrspace(10)* %v), "unknown_bundle"(i8* %ptr) ]
   ret void
 }
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @ccall_unknown_bundle
-# CHECK: call {}*** @julia.get_pgcstack()
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK: @ijl_gc_pool_alloc
-# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
-println("""
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @ccall_unknown_bundle
+; TYPED: call {}*** @julia.get_pgcstack()
+; OPAQUE: call ptr @julia.get_pgcstack()
+; CHECK-NOT: @julia.gc_alloc_obj
+; CHECK: @ijl_gc_pool_alloc
+; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
+; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 define void @ccall_unknown_bundle(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
   %gcstack = bitcast {}*** %pgcstack to {}**
   %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
   %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
   %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va)
   %ptr = bitcast {}* %ptrj to i8*
@@ -116,26 +122,32 @@ define void @ccall_unknown_bundle(i8* %fptr) {
   call void %f(i8* %ptr) [ "jl_not_jl_roots"({} addrspace(10)* %v) ]
   ret void
 }
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @lifetime_branches
-# CHECK: alloca i64
-# CHECK: call {}*** @julia.get_pgcstack()
-# CHECK: L1:
-# CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8,
-# CHECK: %f = bitcast i8* %fptr to void (i8*)*
-# CHECK-NEXT: call void %f(i8*
-# CHECK-NEXT: br i1 %b2, label %L2, label %L3
-
-# CHECK: L2:
-# CHECK-NEXT: %f2 = bitcast i8* %fptr to void ({}*)*
-# CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8,
-# CHECK-NEXT: call void %f2({}* null)
-
-# CHECK: L3:
-# CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8,
-println("""
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @lifetime_branches
+; CHECK: alloca i64
+; TYPED: call {}*** @julia.get_pgcstack()
+; OPAQUE: call ptr @julia.get_pgcstack()
+; CHECK: L1:
+; CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8,
+
+; TYPED: %f = bitcast i8* %fptr to void (i8*)*
+; TYPED-NEXT: call void %f(i8*
+
+; OPAQUE: %f = bitcast ptr %fptr to ptr
+; OPAQUE-NEXT: call void %f(ptr
+
+; CHECK-NEXT: br i1 %b2, label %L2, label %L3
+
+; CHECK: L2:
+; TYPED-NEXT: %f2 = bitcast i8* %fptr to void ({}*)*
+; OPAQUE-NEXT: %f2 = bitcast ptr %fptr to ptr
+; CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8,
+; TYPED-NEXT: call void %f2({}* null)
+; OPAQUE-NEXT: call void %f2(ptr null)
+
+; CHECK: L3:
+; CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8,
 define void @lifetime_branches(i8* %fptr, i1 %b, i1 %b2) {
   %pgcstack = call {}*** @julia.get_pgcstack()
   %gcstack = bitcast {}*** %pgcstack to {}**
@@ -143,7 +155,7 @@ define void @lifetime_branches(i8* %fptr, i1 %b, i1 %b2) {
   br i1 %b, label %L1, label %L3
 
 L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
   %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
   %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va)
   %ptr = bitcast {}* %ptrj to i8*
@@ -159,62 +171,60 @@ L2:
 L3:
   ret void
 }
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @object_field
-# CHECK: call {}*** @julia.get_pgcstack()
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK-NOT: @jl_gc_pool_alloc
-# CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !4
-println("""
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @object_field
+; TYPED: call {}*** @julia.get_pgcstack()
+; OPAQUE: call ptr @julia.get_pgcstack()
+; CHECK-NOT: @julia.gc_alloc_obj
+; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !4
 define void @object_field({} addrspace(10)* %field) {
   %pgcstack = call {}*** @julia.get_pgcstack()
   %gcstack = bitcast {}*** %pgcstack to {}**
   %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
   %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
   %vab = bitcast {} addrspace(11)* %va to {} addrspace(10)* addrspace(11)*
   store {} addrspace(10)* %field, {} addrspace(10)* addrspace(11)* %vab, align 8
   ret void
 }
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @memcpy_opt
-# CHECK: alloca [16 x i8], align 16
-# CHECK: call {}*** @julia.get_pgcstack()
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK-NOT: @jl_gc_pool_alloc
-# CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
-println("""
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @memcpy_opt
+; CHECK: alloca [16 x i8], align 16
+; TYPED: call {}*** @julia.get_pgcstack()
+; OPAQUE: call ptr @julia.get_pgcstack()
+; CHECK-NOT: @julia.gc_alloc_obj
+; CHECK-NOT: @jl_gc_pool_alloc
+; TYPED: call void @llvm.memcpy.p0i8.p0i8.i64
+; OPAQUE: call void @llvm.memcpy.p0.p0.i64
 define void @memcpy_opt(i8* %v22) {
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %gcstack = bitcast {}*** %pgcstack to {}**
   %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-  %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 16, {} addrspace(10)* @tag)
+  %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 16, {} addrspace(10)* @tag)
   %v20 = bitcast {} addrspace(10)* %v19 to i8 addrspace(10)*
   %v21 = addrspacecast i8 addrspace(10)* %v20 to i8 addrspace(11)*
   call void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* %v21, i8* %v22, i64 16, i32 8, i1 false)
   ret void
 }
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @preserve_opt
-# CHECK: call {}*** @julia.get_pgcstack()
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK-NOT: @jl_gc_pool_alloc
-# CHECK-NOT: @llvm.lifetime.end
-# CHECK: @external_function
-println("""
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @preserve_opt
+; TYPED: call {}*** @julia.get_pgcstack()
+; OPAQUE: call ptr @julia.get_pgcstack()
+; CHECK-NOT: @julia.gc_alloc_obj
+; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: @llvm.lifetime.end
+; CHECK: @external_function
 define void @preserve_opt(i8* %v22) {
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %gcstack = bitcast {}*** %pgcstack to {}**
   %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
-  %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 16, {} addrspace(10)* @tag)
+  %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 16, {} addrspace(10)* @tag)
   %v20 = bitcast {} addrspace(10)* %v19 to i8 addrspace(10)*
   %v21 = addrspacecast i8 addrspace(10)* %v20 to i8 addrspace(11)*
   %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v19)
@@ -223,21 +233,20 @@ top:
   call void @external_function()
   ret void
 }
-""")
-# CHECK-LABEL: }{{$}}
+; CHECK-LABEL: }{{$}}
 
-# CHECK-LABEL: @preserve_branches
-# CHECK: call {}*** @julia.get_pgcstack()
-# CHECK: L1:
-# CHECK-NEXT: @external_function()
-# CHECK-NEXT: br i1 %b2, label %L2, label %L3
+; CHECK-LABEL: @preserve_branches
+; TYPED: call {}*** @julia.get_pgcstack()
+; OPAQUE: call ptr @julia.get_pgcstack()
+; CHECK: L1:
+; CHECK-NEXT: @external_function()
+; CHECK-NEXT: br i1 %b2, label %L2, label %L3
 
-# CHECK: L2:
-# CHECK: @external_function()
-# CHECK-NEXT: br label %L3
+; CHECK: L2:
+; CHECK: @external_function()
+; CHECK-NEXT: br label %L3
 
-# CHECK: L3:
-println("""
+; CHECK: L3:
 define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
   %pgcstack = call {}*** @julia.get_pgcstack()
   %gcstack = bitcast {}*** %pgcstack to {}**
@@ -245,7 +254,7 @@ define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
   br i1 %b, label %L1, label %L3
 
 L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
   %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v)
   call void @external_function()
   br i1 %b2, label %L2, label %L3
@@ -257,15 +266,16 @@ L2:
 L3:
   ret void
 }
-""")
-# CHECK-LABEL: }{{$}}
+; CHECK-LABEL: }{{$}}
+
+; TYPED: declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc_instrumented(i8*,
+; TYPED: declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc_instrumented(i8*,
 
-# CHECK: declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*,
-# CHECK: declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*,
-println("""
+; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_pool_alloc_instrumented(ptr,
+; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_big_alloc_instrumented(ptr,
 declare void @external_function()
 declare {}*** @julia.get_pgcstack()
-declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, $isz, {} addrspace(10)*)
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*)
 declare {}* @julia.pointer_from_objref({} addrspace(11)*)
 declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
 declare token @llvm.julia.gc_preserve_begin(...)
@@ -278,4 +288,3 @@ declare void @llvm.julia.gc_preserve_end(token)
 !4 = !{!5, !5, i64 0}
 !5 = !{!"jtbaa_mutab", !6, i64 0}
 !6 = !{!"jtbaa_value", !2, i64 0}
-""")
diff --git a/test/llvmpasses/alloc-opt-pass.jl b/test/llvmpasses/alloc-opt-pass.jl
deleted file mode 100644
index 4912a1dc26194..0000000000000
--- a/test/llvmpasses/alloc-opt-pass.jl
+++ /dev/null
@@ -1,146 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S - | FileCheck %s
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S - | FileCheck %s
-
-isz = sizeof(UInt) == 8 ? "i64" : "i32"
-
-println("""
-@tag = external addrspace(10) global {}
-""")
-
-# Test that the gc_preserve intrinsics are deleted directly.
-
-# CHECK-LABEL: @preserve_branches
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK: L1:
-# CHECK-NOT: @llvm.julia.gc_preserve_begin
-# CHECK-NEXT: @external_function()
-# CHECK-NEXT: br i1 %b2, label %L2, label %L3
-
-# CHECK: L2:
-# CHECK: @external_function()
-# CHECK-NEXT: br label %L3
-
-# CHECK: L3:
-println("""
-define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  br i1 %b, label %L1, label %L3
-
-L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v)
-  call void @external_function()
-  br i1 %b2, label %L2, label %L3
-
-L2:
-  call void @external_function()
-  br label %L3
-
-L3:
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @preserve_branches2
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK: L1:
-# CHECK-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2
-# CHECK-NEXT: @external_function()
-# CHECK-NEXT: br i1 %b2, label %L2, label %L3
-
-# CHECK: L2:
-# CHECK: @external_function()
-# CHECK-NEXT: br label %L3
-
-# CHECK: L3:
-println("""
-define void @preserve_branches2(i8* %fptr, i1 %b, i1 %b2) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v2 = call {} addrspace(10)* @external_function2()
-  br i1 %b, label %L1, label %L3
-
-L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v, {} addrspace(10)* %v2)
-  call void @external_function()
-  br i1 %b2, label %L2, label %L3
-
-L2:
-  call void @external_function()
-  br label %L3
-
-L3:
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @legal_int_types
-# CHECK: alloca [12 x i8]
-# CHECK-NOT: alloca i96
-# CHECK: ret void
-println("""
-define void @legal_int_types() {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 12, {} addrspace(10)* @tag)
-  %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)*
-  %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2)
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-
-
-println("""
-declare void @external_function()
-declare {} addrspace(10)* @external_function2()
-declare {}*** @julia.ptls_states()
-declare {}*** @julia.get_pgcstack()
-declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, {} addrspace(10)*)
-declare {}* @julia.pointer_from_objref({} addrspace(11)*)
-declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
-declare token @llvm.julia.gc_preserve_begin(...)
-declare void @llvm.julia.gc_preserve_end(token)
-""")
-
-# CHECK-LABEL: @memref_collision
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK-NOT: store {}
-# CHECK: store i
-# CHECK-NOT: store {}
-# CHECK: L1:
-# CHECK: load {}
-# CHECK: L2:
-# CHECK: load i
-println("""
-define void @memref_collision($isz %x) {
-  %pgcstack = call {}*** @julia.get_pgcstack()
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %v_p = bitcast {} addrspace(10)* %v to $isz addrspace(10)*
-  store $isz %x, $isz addrspace(10)* %v_p
-  br i1 0, label %L1, label %L2
-
-L1:
-  %v1 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-  %v1_x = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %v1
-  ret void
-
-L2:
-  %v2 = bitcast {} addrspace(10)* %v to $isz addrspace(10)*
-  %v2_x = load i64, i64 addrspace(10)* %v2
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
diff --git a/test/llvmpasses/alloc-opt-pass.ll b/test/llvmpasses/alloc-opt-pass.ll
new file mode 100644
index 0000000000000..6bee0fd325105
--- /dev/null
+++ b/test/llvmpasses/alloc-opt-pass.ll
@@ -0,0 +1,200 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+
+@tag = external addrspace(10) global {}
+
+; Test that the gc_preserve intrinsics are deleted directly.
+
+; CHECK-LABEL: @preserve_branches
+; TYPED: call {}*** @julia.ptls_states()
+; OPAQUE: call ptr @julia.ptls_states()
+; CHECK: L1:
+; CHECK-NOT: @llvm.julia.gc_preserve_begin
+; CHECK-NEXT: @external_function()
+; CHECK-NEXT: br i1 %b2, label %L2, label %L3
+
+; CHECK: L2:
+; CHECK: @external_function()
+; CHECK-NEXT: br label %L3
+
+; CHECK: L3:
+define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+  br i1 %b, label %L1, label %L3
+
+L1:
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
+  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* nonnull %v)
+  call void @external_function()
+  br i1 %b2, label %L2, label %L3
+
+L2:
+  call void @external_function()
+  br label %L3
+
+L3:
+  ret void
+}
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @preserve_branches2
+; TYPED: call {}*** @julia.ptls_states()
+; OPAQUE: call ptr @julia.ptls_states()
+; CHECK: L1:
+; TYPED-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2
+; OPAQUE-NEXT: @llvm.julia.gc_preserve_begin{{.*}}ptr addrspace(10) %v2
+; CHECK-NEXT: @external_function()
+; CHECK-NEXT: br i1 %b2, label %L2, label %L3
+
+; CHECK: L2:
+; CHECK: @external_function()
+; CHECK-NEXT: br label %L3
+
+; CHECK: L3:
+define void @preserve_branches2(i8* %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+  %v2 = call {} addrspace(10)* @external_function2()
+  br i1 %b, label %L1, label %L3
+
+L1:
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
+  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v, {} addrspace(10)* nonnull %v2)
+  call void @external_function()
+  br i1 %b2, label %L2, label %L3
+
+L2:
+  call void @external_function()
+  br label %L3
+
+L3:
+  ret void
+}
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @legal_int_types
+; CHECK: alloca [12 x i8]
+; CHECK-NOT: alloca i96
+; CHECK: store [12 x i8] zeroinitializer,
+; CHECK: ret void
+define void @legal_int_types() {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+  %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 12, {} addrspace(10)* @tag)
+  %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)*
+  %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2)
+  ret void
+}
+; CHECK-LABEL: }{{$}}
+
+
+declare void @external_function()
+declare {} addrspace(10)* @external_function2()
+declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
+declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*)
+declare {}* @julia.pointer_from_objref({} addrspace(11)*)
+declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
+declare token @llvm.julia.gc_preserve_begin(...)
+declare void @llvm.julia.gc_preserve_end(token)
+
+; CHECK-LABEL: @memref_collision
+; TYPED: call {}*** @julia.ptls_states()
+; OPAQUE: call ptr @julia.ptls_states()
+; TYPED-NOT: store {}
+; OPAQUE-NOT: store ptr
+; CHECK: store i
+; TYPED-NOT: store {}
+; OPAQUE-NOT: store ptr
+; CHECK: L1:
+; TYPED: load {}
+; OPAQUE: load ptr
+; CHECK: L2:
+; CHECK: load i
+define void @memref_collision(i64 %x) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
+  %v_p = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
+  store i64 %x, i64 addrspace(10)* %v_p
+  br i1 0, label %L1, label %L2
+
+L1:
+  %v1 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
+  %v1_x = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %v1
+  ret void
+
+L2:
+  %v2 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
+  %v2_x = load i64, i64 addrspace(10)* %v2
+  ret void
+}
+; CHECK-LABEL: }{{$}}
+
+; CHECK-LABEL: @lifetime_no_preserve_end
+; CHECK: alloca
+; CHECK-NOT: call token(...) @llvm.julia.gc_preserve_begin
+; CHECK: call void @llvm.lifetime.start
+; CHECK: store [8 x i8] zeroinitializer,
+; CHECK-NOT: call void @llvm.lifetime.end
+define void @lifetime_no_preserve_end({}* noalias nocapture noundef nonnull sret({}) %0) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
+  %token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v)
+  %v_derived = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
+  %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %v_derived)
+  %ptr_raw = bitcast {}* %ptr to i8*
+  call void @external_function() ; safepoint
+  %ret_raw = bitcast {}* %0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %ret_raw, i8 * align 8 %ptr_raw, i64 0, i1 false)
+  %ret_raw2 = bitcast {}* %0 to i8*
+  ret void
+}
+; CHECK-LABEL: }{{$}}
+
+
+; CHECK-LABEL: @initializers
+; CHECK: alloca [1 x i8]
+; CHECK-DAG: alloca [2 x i8]
+; CHECK-DAG: alloca [3 x i8]
+; CHECK-DAG: freeze [1 x i8] undef
+; CHECK-DAG: store [1 x i8] %
+; CHECK-DAG: store [3 x i8] zeroinitializer,
+; CHECK-NOT: store
+; CHECK-NOT: zeroinitializer
+; CHECK: ret void
+define void @initializers() {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+
+  %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 1, {} addrspace(10)* @tag) #0
+  %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)*
+  %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2)
+
+  %var4 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 2, {} addrspace(10)* @tag) #1
+  %var5 = addrspacecast {} addrspace(10)* %var4 to {} addrspace(11)*
+  %var6 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var5)
+
+  %var7 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 3, {} addrspace(10)* @tag) #2
+  %var8 = addrspacecast {} addrspace(10)* %var7 to {} addrspace(11)*
+  %var9 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var8)
+
+  ret void
+}
+; CHECK-LABEL: }{{$}}
+
+attributes #0 = { allockind("alloc") }
+attributes #1 = { allockind("alloc,uninitialized") }
+attributes #2 = { allockind("alloc,zeroed") }
diff --git a/test/llvmpasses/alloc-opt-unsized.ll b/test/llvmpasses/alloc-opt-unsized.ll
new file mode 100644
index 0000000000000..c3ea626c57f45
--- /dev/null
+++ b/test/llvmpasses/alloc-opt-unsized.ll
@@ -0,0 +1,44 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=OPAQUE
+
+source_filename = "text"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+target triple = "x86_64-linux-gnu"
+
+declare {}*** @julia.get_pgcstack()
+
+declare {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*)
+
+declare void @julia.write_barrier({} addrspace(10)*, ...)
+
+define void @diffejulia_objective__1864_inner_1wrap({} addrspace(10)* %arg, i64 %iv.i) {
+entry:
+  %i5 = call {}*** @julia.get_pgcstack()
+  %i13 = bitcast {}*** %i5 to {}**
+  %i14 = getelementptr inbounds {}*, {}** %i13, i64 -12
+  %i18 = call noalias nonnull dereferenceable(8000) dereferenceable_or_null(8000) {} addrspace(10)* @julia.gc_alloc_obj({}** %i14, i64 8000, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 139756155247504 to {}*) to {} addrspace(10)*))
+  %_malloccache.i = bitcast {} addrspace(10)* %i18 to {} addrspace(10)* addrspace(10)*
+  %i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %_malloccache.i, i64 %iv.i
+  store {} addrspace(10)* %arg, {} addrspace(10)* addrspace(10)* %i23, align 8
+  %i24 = bitcast {} addrspace(10)* addrspace(10)* %_malloccache.i to {} addrspace(10)*
+  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %i24, {} addrspace(10)* %arg)
+  %l = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %i23
+  ret void
+}
+
+; TYPED:   %[[i0:.+]] = alloca {} addrspace(10)*, i64 1000, align 16
+; TYPED:   %[[i1:.+]] = bitcast {} addrspace(10)** %[[i0]] to i8*
+; TYPED:   %i18 = bitcast i8* %[[i1]] to {}*
+; TYPED:   %_malloccache.i = bitcast {}* %i18 to {} addrspace(10)**
+; TYPED:   %i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %_malloccache.i, i64 %iv.i
+; TYPED:   store {} addrspace(10)* %arg, {} addrspace(10)** %i23, align 8
+; TYPED:   %i24 = bitcast {} addrspace(10)** %_malloccache.i to {}*
+; TYPED:   %l = load {} addrspace(10)*, {} addrspace(10)** %i23, align 8
+
+; OPAQUE:   %[[i0:.+]] = alloca ptr addrspace(10), i64 1000, align 16
+; OPAQUE:   %i23 = getelementptr inbounds ptr addrspace(10), ptr %i18, i64 %iv.i
+; OPAQUE:   store ptr addrspace(10) %arg, ptr %i23, align 8
+; OPAQUE:   %l = load ptr addrspace(10), ptr %i23, align 8
diff --git a/test/llvmpasses/cpu-features.ll b/test/llvmpasses/cpu-features.ll
index ccb8cc69f0f66..323f5e24015e9 100644
--- a/test/llvmpasses/cpu-features.ll
+++ b/test/llvmpasses/cpu-features.ll
@@ -1,5 +1,10 @@
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+target triple = "x86_64-linux-gnu"
 
 declare i1 @julia.cpu.have_fma.f64()
 declare double @with_fma(double %0, double %1, double %2)
diff --git a/test/llvmpasses/fastmath.jl b/test/llvmpasses/fastmath.jl
index 76b048c19a2a0..dd0892be56a0b 100644
--- a/test/llvmpasses/fastmath.jl
+++ b/test/llvmpasses/fastmath.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: julia --startup-file=no %s %t && llvm-link -S %t/* -o %t/module.ll
+# RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s
 
 ## Notes:
@@ -14,7 +14,7 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))
 
 import Base.FastMath
 
-# CHECK: call fast float @llvm.sqrt.f32(float %0)
+# CHECK: call fast float @llvm.sqrt.f32(float %"x::Float32")
 emit(FastMath.sqrt_fast, Float32)
 
 
@@ -22,13 +22,23 @@ emit(FastMath.sqrt_fast, Float32)
 # TODO: this is not true for platforms that natively support Float16
 
 foo(x::T,y::T) where T = x-y == zero(T)
-# LOWER: fsub half %0, %1
-# FINAL: %2 = fpext half %0 to float
-# FINAL: %3 = fpext half %1 to float
-# FINAL: fsub half %2, %3
+# CHECK: define {{(swiftcc )?}}i8 @julia_foo_{{[0-9]+}}({{.*}}half %[[X:"x::Float16"]], half %[[Y:"y::Float16"]]) {{.*}}{
+# CHECK-DAG: %[[XEXT:[0-9]+]] = fpext half %[[X]] to float
+# CHECK-DAG: %[[YEXT:[0-9]+]] = fpext half %[[Y]] to float
+# CHECK: %[[DIFF:[0-9]+]] = fsub float %[[XEXT]], %[[YEXT]]
+# CHECK: %[[TRUNC:[0-9]+]] = fptrunc float %[[DIFF]] to half
+# CHECK: %[[DIFFEXT:[0-9]+]] = fpext half %[[TRUNC]] to float
+# CHECK: %[[CMP:[0-9]+]] = fcmp oeq float %[[DIFFEXT]], 0.000000e+00
+# CHECK: %[[ZEXT:[0-9]+]] = zext i1 %[[CMP]] to i8
+# CHECK: ret i8 %[[ZEXT]]
+# CHECK: }
 emit(foo, Float16, Float16)
 
 @fastmath foo(x::T,y::T) where T = x-y == zero(T)
-# LOWER: fsub fast half %0, %1
-# FINAL: fsub fast half %0, %1
+# CHECK: define {{(swiftcc )?}}i8 @julia_foo_{{[0-9]+}}({{.*}}half %[[X:"x::Float16"]], half %[[Y:"y::Float16"]]) {{.*}}{
+# CHECK: %[[DIFF:[0-9]+]] = fsub fast half %[[X]], %[[Y]]
+# CHECK: %[[CMP:[0-9]+]] = fcmp fast oeq half %[[DIFF]], 0xH0000
+# CHECK: %[[ZEXT:[0-9]+]] = zext i1 %[[CMP]] to i8
+# CHECK: ret i8 %[[ZEXT]]
+# CHECK: }
 emit(foo, Float16, Float16)
diff --git a/test/llvmpasses/final-lower-gc-addrspaces.ll b/test/llvmpasses/final-lower-gc-addrspaces.ll
new file mode 100644
index 0000000000000..4632c312ae0e3
--- /dev/null
+++ b/test/llvmpasses/final-lower-gc-addrspaces.ll
@@ -0,0 +1,46 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+
+target triple = "amdgcn-amd-amdhsa"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
+
+@tag = external addrspace(10) global {}
+
+declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
+declare {} addrspace(10)* @ijl_box_int64(i64)
+declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
+
+declare noalias nonnull {} addrspace(10)** @julia.new_gc_frame(i32)
+declare void @julia.push_gc_frame({} addrspace(10)**, i32)
+declare {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)**, i32)
+declare void @julia.pop_gc_frame({} addrspace(10)**)
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8*, i64) #0
+
+attributes #0 = { allocsize(1) }
+
+define void @gc_frame_addrspace(i64 %a, i64 %b) {
+top:
+; CHECK-LABEL: @gc_frame_addrspace
+; TYPED: %0 = alloca {} addrspace(10)*, i32 4, align 16, addrspace(5)
+; OPAQUE: %0 = alloca ptr addrspace(10), i32 4, align 16, addrspace(5)
+; TYPED: %gcframe = addrspacecast {} addrspace(10)* addrspace(5)* %0 to {} addrspace(10)**
+; OPAQUE: %gcframe = addrspacecast ptr addrspace(5) %0 to ptr
+; TYPED: %1 = bitcast {} addrspace(10)** %gcframe to i8*
+  %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
+  %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
+  %frame_slot_1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1)
+  store {} addrspace(10)* %aboxed, {} addrspace(10)** %frame_slot_1, align 8
+  %bboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %b)
+  %frame_slot_2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0)
+  store {} addrspace(10)* %bboxed, {} addrspace(10)** %frame_slot_2, align 8
+  call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed)
+  call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
+; CHECK: ret void
+  ret void
+}
diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll
index 176b695ba918b..eb3b68662c2b4 100644
--- a/test/llvmpasses/final-lower-gc.ll
+++ b/test/llvmpasses/final-lower-gc.ll
@@ -1,5 +1,8 @@
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 @tag = external addrspace(10) global {}
@@ -13,41 +16,57 @@ declare noalias nonnull {} addrspace(10)** @julia.new_gc_frame(i32)
 declare void @julia.push_gc_frame({} addrspace(10)**, i32)
 declare {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)**, i32)
 declare void @julia.pop_gc_frame({} addrspace(10)**)
-declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8*, i64) #0
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8*, i64, i64) #0
 
 attributes #0 = { allocsize(1) }
 
 define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
   %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; CHECK:  [[GCFRAME_SLOT:%.*]] = call {}*** @julia.get_pgcstack()
+; TYPED:  [[GCFRAME_SLOT:%.*]] = call {}*** @julia.get_pgcstack()
+; OPAQUE: [[GCFRAME_SLOT:%.*]] = call ptr @julia.get_pgcstack()
   %pgcstack = call {}*** @julia.get_pgcstack()
-; CHECK-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0
-; CHECK-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64*
-; CHECK-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0
-; CHECK-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
-; CHECK-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}***
-; CHECK-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8
-; CHECK-DAG: store {}** [[PREV_GCFRAME]], {}*** [[PREV_GCFRAME_PTR2]], align 8, !tbaa !0
-; CHECK-DAG: [[GCFRAME_SLOT2:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)***
-; CHECK-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]], align 8
+; TYPED-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0
+; TYPED-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64*
+; TYPED-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0
+; TYPED-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
+; TYPED-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}***
+; TYPED-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8
+; TYPED-DAG: store {}** [[PREV_GCFRAME]], {}*** [[PREV_GCFRAME_PTR2]], align 8, !tbaa !0
+; TYPED-DAG: [[GCFRAME_SLOT2:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)***
+; TYPED-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]], align 8
+
+; OPAQUE-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 0
+; OPAQUE-DAG: store i64 8, ptr [[GCFRAME_SIZE_PTR]], align 8, !tbaa !0
+; OPAQUE-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 1
+; OPAQUE-DAG: [[PREV_GCFRAME:%.*]] = load ptr, ptr [[GCFRAME_SLOT]], align 8
+; OPAQUE-DAG: store ptr [[PREV_GCFRAME]], ptr [[PREV_GCFRAME_PTR]], align 8, !tbaa !0
+; OPAQUE-NEXT: store ptr %gcframe, ptr [[GCFRAME_SLOT]], align 8
   call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
   %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
-; CHECK: %frame_slot_1 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 3
+; TYPED: %frame_slot_1 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 3
+; OPAQUE: %frame_slot_1 = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 3
   %frame_slot_1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1)
   store {} addrspace(10)* %aboxed, {} addrspace(10)** %frame_slot_1, align 8
   %bboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %b)
-; CHECK: %frame_slot_2 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
+; TYPED: %frame_slot_2 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
+; OPAQUE: %frame_slot_2 = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
   %frame_slot_2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0)
   store {} addrspace(10)* %bboxed, {} addrspace(10)** %frame_slot_2, align 8
-; CHECK: call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed)
+; TYPED: call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed)
+; OPAQUE: call void @boxed_simple(ptr addrspace(10) %aboxed, ptr addrspace(10) %bboxed)
   call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed)
-; CHECK-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
-; CHECK-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0
-; CHECK-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)**
-; CHECK-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0
+; TYPED-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
+; TYPED-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0
+; TYPED-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)**
+; TYPED-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0
+
+; OPAQUE-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 1
+; OPAQUE-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load ptr addrspace(10), ptr [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0
+; OPAQUE-NEXT: store ptr addrspace(10) [[PREV_GCFRAME_PTR4]], ptr [[GCFRAME_SLOT]], align 8, !tbaa !0
   call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
 ; CHECK-NEXT: ret void
   ret void
@@ -59,8 +78,25 @@ top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
-; CHECK: %v = call noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc
-  %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8)
+; TYPED: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc_instrumented
+; OPAQUE: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc_instrumented
+  %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8, i64 12341234)
+  %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
+  %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1
+  store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* %1, align 8, !tbaa !0
+  ret {} addrspace(10)* %v
+}
+
+define {} addrspace(10)* @gc_alloc_lowering_var(i64 %size) {
+top:
+; CHECK-LABEL: @gc_alloc_lowering_var
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+; CHECK: %0 = add i64 %size, 8
+; TYPED: %v = call noalias nonnull align {{[0-9]+}} dereferenceable(8) {} addrspace(10)* @ijl_gc_alloc_typed(i8* %ptls_i8, i64 %0, i64 12341234)
+; OPAQUE: %v = call noalias nonnull align {{[0-9]+}} dereferenceable(8) ptr addrspace(10) @ijl_gc_alloc_typed(ptr %ptls_i8, i64 %0, i64 12341234)
+  %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 %size, i64 12341234)
   %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
   %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1
   store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* %1, align 8, !tbaa !0
diff --git a/test/llvmpasses/float16.ll b/test/llvmpasses/float16.ll
new file mode 100644
index 0000000000000..0c37be449d959
--- /dev/null
+++ b/test/llvmpasses/float16.ll
@@ -0,0 +1,166 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0  --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1  --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s
+
+define half @demote_half_test(half %a, half %b) #0 {
+top:
+; CHECK-LABEL: @demote_half_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %0 = fpext half %a to float
+; CHECK-NEXT:    %1 = fpext half %b to float
+; CHECK-NEXT:    %2 = fadd float %0, %1
+; CHECK-NEXT:    %3 = fptrunc float %2 to half
+; CHECK-NEXT:    %4 = fpext half %3 to float
+; CHECK-NEXT:    %5 = fpext half %b to float
+; CHECK-NEXT:    %6 = fadd float %4, %5
+; CHECK-NEXT:    %7 = fptrunc float %6 to half
+; CHECK-NEXT:    %8 = fpext half %7 to float
+; CHECK-NEXT:    %9 = fpext half %b to float
+; CHECK-NEXT:    %10 = fadd float %8, %9
+; CHECK-NEXT:    %11 = fptrunc float %10 to half
+; CHECK-NEXT:    %12 = fpext half %11 to float
+; CHECK-NEXT:    %13 = fpext half %b to float
+; CHECK-NEXT:    %14 = fmul float %12, %13
+; CHECK-NEXT:    %15 = fptrunc float %14 to half
+; CHECK-NEXT:    %16 = fpext half %15 to float
+; CHECK-NEXT:    %17 = fpext half %b to float
+; CHECK-NEXT:    %18 = fdiv float %16, %17
+; CHECK-NEXT:    %19 = fptrunc float %18 to half
+; CHECK-NEXT:    %20 = insertelement <2 x half> undef, half %a, i32 0
+; CHECK-NEXT:    %21 = insertelement <2 x half> %20, half %b, i32 1
+; CHECK-NEXT:    %22 = insertelement <2 x half> undef, half %b, i32 0
+; CHECK-NEXT:    %23 = insertelement <2 x half> %22, half %b, i32 1
+; CHECK-NEXT:    %24 = fpext <2 x half> %21 to <2 x float>
+; CHECK-NEXT:    %25 = fpext <2 x half> %23 to <2 x float>
+; CHECK-NEXT:    %26 = fadd <2 x float> %24, %25
+; CHECK-NEXT:    %27 = fptrunc <2 x float> %26 to <2 x half>
+; CHECK-NEXT:    %28 = extractelement <2 x half> %27, i32 0
+; CHECK-NEXT:    %29 = extractelement <2 x half> %27, i32 1
+; CHECK-NEXT:    %30 = fpext half %28 to float
+; CHECK-NEXT:    %31 = fpext half %29 to float
+; CHECK-NEXT:    %32 = fadd float %30, %31
+; CHECK-NEXT:    %33 = fptrunc float %32 to half
+; CHECK-NEXT:    %34 = fpext half %33 to float
+; CHECK-NEXT:    %35 = fpext half %19 to float
+; CHECK-NEXT:    %36 = fadd float %34, %35
+; CHECK-NEXT:    %37 = fptrunc float %36 to half
+; CHECK-NEXT:    ret half %37
+;
+  %0 = fadd half %a, %b
+  %1 = fadd half %0, %b
+  %2 = fadd half %1, %b
+  %3 = fmul half %2, %b
+  %4 = fdiv half %3, %b
+  %5 = insertelement <2 x half> undef, half %a, i32 0
+  %6 = insertelement <2 x half> %5, half %b, i32 1
+  %7 = insertelement <2 x half> undef, half %b, i32 0
+  %8 = insertelement <2 x half> %7, half %b, i32 1
+  %9 = fadd <2 x half> %6, %8
+  %10 = extractelement <2 x half> %9, i32 0
+  %11 = extractelement <2 x half> %9, i32 1
+  %12 = fadd half %10, %11
+  %13 = fadd half %12, %4
+  ret half %13
+}
+
+define half @native_half_test(half %a, half %b) #1 {
+; CHECK-LABEL: @native_half_test(
+; CHECK-NEXT   top:
+; CHECK-NEXT     %0 = fadd half %a, %b
+; CHECK-NEXT     %1 = fadd half %0, %b
+; CHECK-NEXT     %2 = fadd half %1, %b
+; CHECK-NEXT     %3 = fmul half %2, %b
+; CHECK-NEXT     %4 = fdiv half %3, %b
+; CHECK-NEXT     %5 = insertelement <2 x half> undef, half %a, i32 0
+; CHECK-NEXT     %6 = insertelement <2 x half> %5, half %b, i32 1
+; CHECK-NEXT     %7 = insertelement <2 x half> undef, half %b, i32 0
+; CHECK-NEXT     %8 = insertelement <2 x half> %7, half %b, i32 1
+; CHECK-NEXT     %9 = fadd <2 x half> %6, %8
+; CHECK-NEXT     %10 = extractelement <2 x half> %9, i32 0
+; CHECK-NEXT     %11 = extractelement <2 x half> %9, i32 1
+; CHECK-NEXT     %12 = fadd half %10, %11
+; CHECK-NEXT     %13 = fadd half %12, %4
+; CHECK-NEXT     ret half %13
+;
+top:
+  %0 = fadd half %a, %b
+  %1 = fadd half %0, %b
+  %2 = fadd half %1, %b
+  %3 = fmul half %2, %b
+  %4 = fdiv half %3, %b
+  %5 = insertelement <2 x half> undef, half %a, i32 0
+  %6 = insertelement <2 x half> %5, half %b, i32 1
+  %7 = insertelement <2 x half> undef, half %b, i32 0
+  %8 = insertelement <2 x half> %7, half %b, i32 1
+  %9 = fadd <2 x half> %6, %8
+  %10 = extractelement <2 x half> %9, i32 0
+  %11 = extractelement <2 x half> %9, i32 1
+  %12 = fadd half %10, %11
+  %13 = fadd half %12, %4
+  ret half %13
+}
+
+define bfloat @demote_bfloat_test(bfloat %a, bfloat %b) {
+top:
+; CHECK-LABEL: @demote_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %0 = fpext bfloat %a to float
+; CHECK-NEXT:    %1 = fpext bfloat %b to float
+; CHECK-NEXT:    %2 = fadd float %0, %1
+; CHECK-NEXT:    %3 = fptrunc float %2 to bfloat
+; CHECK-NEXT:    %4 = fpext bfloat %3 to float
+; CHECK-NEXT:    %5 = fpext bfloat %b to float
+; CHECK-NEXT:    %6 = fadd float %4, %5
+; CHECK-NEXT:    %7 = fptrunc float %6 to bfloat
+; CHECK-NEXT:    %8 = fpext bfloat %7 to float
+; CHECK-NEXT:    %9 = fpext bfloat %b to float
+; CHECK-NEXT:    %10 = fadd float %8, %9
+; CHECK-NEXT:    %11 = fptrunc float %10 to bfloat
+; CHECK-NEXT:    %12 = fpext bfloat %11 to float
+; CHECK-NEXT:    %13 = fpext bfloat %b to float
+; CHECK-NEXT:    %14 = fmul float %12, %13
+; CHECK-NEXT:    %15 = fptrunc float %14 to bfloat
+; CHECK-NEXT:    %16 = fpext bfloat %15 to float
+; CHECK-NEXT:    %17 = fpext bfloat %b to float
+; CHECK-NEXT:    %18 = fdiv float %16, %17
+; CHECK-NEXT:    %19 = fptrunc float %18 to bfloat
+; CHECK-NEXT:    %20 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+; CHECK-NEXT:    %21 = insertelement <2 x bfloat> %20, bfloat %b, i32 1
+; CHECK-NEXT:    %22 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+; CHECK-NEXT:    %23 = insertelement <2 x bfloat> %22, bfloat %b, i32 1
+; CHECK-NEXT:    %24 = fpext <2 x bfloat> %21 to <2 x float>
+; CHECK-NEXT:    %25 = fpext <2 x bfloat> %23 to <2 x float>
+; CHECK-NEXT:    %26 = fadd <2 x float> %24, %25
+; CHECK-NEXT:    %27 = fptrunc <2 x float> %26 to <2 x bfloat>
+; CHECK-NEXT:    %28 = extractelement <2 x bfloat> %27, i32 0
+; CHECK-NEXT:    %29 = extractelement <2 x bfloat> %27, i32 1
+; CHECK-NEXT:    %30 = fpext bfloat %28 to float
+; CHECK-NEXT:    %31 = fpext bfloat %29 to float
+; CHECK-NEXT:    %32 = fadd float %30, %31
+; CHECK-NEXT:    %33 = fptrunc float %32 to bfloat
+; CHECK-NEXT:    %34 = fpext bfloat %33 to float
+; CHECK-NEXT:    %35 = fpext bfloat %19 to float
+; CHECK-NEXT:    %36 = fadd float %34, %35
+; CHECK-NEXT:    %37 = fptrunc float %36 to bfloat
+; CHECK-NEXT:    ret bfloat %37
+;
+  %0 = fadd bfloat %a, %b
+  %1 = fadd bfloat %0, %b
+  %2 = fadd bfloat %1, %b
+  %3 = fmul bfloat %2, %b
+  %4 = fdiv bfloat %3, %b
+  %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+  %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+  %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+  %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+  %9 = fadd <2 x bfloat> %6, %8
+  %10 = extractelement <2 x bfloat> %9, i32 0
+  %11 = extractelement <2 x bfloat> %9, i32 1
+  %12 = fadd bfloat %10, %11
+  %13 = fadd bfloat %12, %4
+  ret bfloat %13
+}
+
+attributes #0 = { "target-features"="-avx512fp16" }
+attributes #1 = { "target-features"="+avx512fp16" }
diff --git a/test/llvmpasses/gcroots.ll b/test/llvmpasses/gcroots.ll
index 84f120712734b..90d7980e862c6 100644
--- a/test/llvmpasses/gcroots.ll
+++ b/test/llvmpasses/gcroots.ll
@@ -1,5 +1,8 @@
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
@@ -14,17 +17,28 @@ top:
 ; CHECK-LABEL: @simple
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
-; CHECK: call {} addrspace(10)* @jl_box_int64
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
+; TYPED: call {} addrspace(10)* @jl_box_int64
+; OPAQUE: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; CHECK-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
+; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
+; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
+
+; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
+; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
-; CHECK-NOT: getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]
-; CHECK: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]
-; CHECK-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
+
+; TYPED-NOT: getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]
+; TYPED: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]
+; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
+
+; OPAQUE-NOT: getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0]]
+; OPAQUE: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]]
+; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
+
 ; CHECK-NEXT: call void @boxed_simple
     call void @boxed_simple({} addrspace(10)* %aboxed,
                             {} addrspace(10)* %bboxed)
@@ -35,7 +49,8 @@ define void @leftover_alloca({} addrspace(10)* %a) {
 ; If this pass encounters an alloca, it'll just sink it into the gcframe,
 ; relying on mem2reg to catch simple cases such as this earlier
 ; CHECK-LABEL: @leftover_alloca
-; CHECK: %var = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe
+; TYPED: %var = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe
+; OPAQUE: %var = getelementptr inbounds ptr addrspace(10), ptr %gcframe
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %var = alloca {} addrspace(10)*
@@ -53,11 +68,16 @@ define void @simple_union() {
 ; CHECK-LABEL: @simple_union
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-; CHECK: %a = call { {} addrspace(10)*, i8 } @union_ret()
+; TYPED: %a = call { {} addrspace(10)*, i8 } @union_ret()
+; OPAQUE: %a = call { ptr addrspace(10), i8 } @union_ret()
     %a = call { {} addrspace(10)*, i8 } @union_ret()
-; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %a, 0
-; CHECK-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]]
+; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
+; TYPED-NEXT: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %a, 0
+; TYPED-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]]
+
+; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
+; OPAQUE-NEXT: [[EXTRACT:%.*]] = extractvalue { ptr addrspace(10), i8 } %a, 0
+; OPAQUE-NEXT: store ptr addrspace(10) [[EXTRACT]], ptr [[GEP0]]
     call void @union_arg({{} addrspace(10)*, i8} %a)
     ret void
 }
@@ -79,7 +99,8 @@ define void @select_simple(i64 %a, i64 %b) {
 define void @phi_simple(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_simple
-; CHECK:   %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED:   %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %cmp = icmp eq i64 %a, %b
@@ -92,8 +113,11 @@ blabel:
     br label %common
 common:
     %phi = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ]
-; CHECK:  [[GEP:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; CHECK:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP]]
+; TYPED:  [[GEP:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
+; TYPED:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP]]
+
+; OPAQUE:  [[GEP:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
+; OPAQUE:  store ptr addrspace(10) %phi, ptr [[GEP]]
     call void @one_arg_boxed({} addrspace(10)* %phi)
     ret void
 }
@@ -102,7 +126,8 @@ declare void @one_arg_decayed(i64 addrspace(12)*)
 
 define void @select_lift(i64 %a, i64 %b) {
 ; CHECK-LABEL: @select_lift
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
@@ -110,7 +135,8 @@ define void @select_lift(i64 %a, i64 %b) {
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
     %bdecayed = addrspacecast {} addrspace(10)* %bboxed to i64 addrspace(12)*
     %cmp = icmp eq i64 %a, %b
-; CHECK: %gclift = select i1 %cmp, {} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed
+; TYPED: %gclift = select i1 %cmp, {} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed
+; OPAQUE: %gclift = select i1 %cmp, ptr addrspace(10) %aboxed, ptr addrspace(10) %bboxed
     %selectb = select i1 %cmp, i64 addrspace(12)* %adecayed, i64 addrspace(12)* %bdecayed
     call void @one_arg_decayed(i64 addrspace(12)* %selectb)
     ret void
@@ -119,7 +145,8 @@ define void @select_lift(i64 %a, i64 %b) {
 define void @phi_lift(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_lift
-; CHECK: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ]
+; TYPED: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ]
+; OPAQUE: %gclift = phi ptr addrspace(10) [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ]
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %cmp = icmp eq i64 %a, %b
@@ -148,7 +175,8 @@ top:
     br i1 %cmp, label %alabel, label %blabel
 alabel:
     %u = call { {} addrspace(10)*, i8 } @union_ret()
-; CHECK: %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0
+; TYPED: %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0
+; OPAQUE: %aboxed = extractvalue { ptr addrspace(10), i8 } %u, 0
     %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
 ; CHECK: br label %common
@@ -158,7 +186,8 @@ blabel:
     %bdecayed = addrspacecast {} addrspace(10)* %bboxed to i64 addrspace(12)*
     br label %common
 common:
-; CHECK: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ]
+; TYPED: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ]
+; OPAQUE: %gclift = phi ptr addrspace(10) [ %aboxed, %alabel ], [ %bboxed, %blabel ]
     %phi = phi i64 addrspace(12)* [ %adecayed, %alabel ], [ %bdecayed, %blabel ]
     call void @one_arg_decayed(i64 addrspace(12)* %phi)
     ret void
@@ -167,7 +196,8 @@ common:
 define void @live_if_live_out(i64 %a, i64 %b) {
 ; CHECK-LABEL: @live_if_live_out
 top:
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
 ; The failure case is failing to realize that `aboxed` is live across the first
@@ -185,25 +215,34 @@ succ:
 ; safepoint
 define {} addrspace(10)* @ret_use(i64 %a, i64 %b) {
 ; CHECK-LABEL: @ret_use
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; CHECK: store {} addrspace(10)* %aboxed
+; TYPED: store {} addrspace(10)* %aboxed
+; OPAQUE: store ptr addrspace(10) %aboxed
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
     ret {} addrspace(10)* %aboxed
 }
 
 define {{} addrspace(10)*, i8} @ret_use_struct() {
 ; CHECK-LABEL: @ret_use_struct
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-; CHECK: %aunion = call { {} addrspace(10)*, i8 } @union_ret()
+; TYPED: %aunion = call { {} addrspace(10)*, i8 } @union_ret()
+; OPAQUE: %aunion = call { ptr addrspace(10), i8 } @union_ret()
     %aunion = call { {} addrspace(10)*, i8 } @union_ret()
-; CHECK-DAG: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; CHECK-DAG: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %aunion, 0
-; CHECK-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]]
+; TYPED-DAG: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
+; TYPED-DAG: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %aunion, 0
+; TYPED-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]]
+
+; OPAQUE-DAG: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
+; OPAQUE-DAG: [[EXTRACT:%.*]] = extractvalue { ptr addrspace(10), i8 } %aunion, 0
+; OPAQUE-NEXT: store ptr addrspace(10) [[EXTRACT]], ptr [[GEP0]]
+
 ; CHECK-NEXT: call void @jl_safepoint()
     call void @jl_safepoint()
     ret {{} addrspace(10)*, i8} %aunion
@@ -232,23 +271,27 @@ top:
 
 define void @global_ref() {
 ; CHECK-LABEL: @global_ref
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load {} addrspace(10)*, {} addrspace(10)** getelementptr ({} addrspace(10)*, {} addrspace(10)** inttoptr (i64 140540744325952 to {} addrspace(10)**), i64 1)
-; CHECK: store {} addrspace(10)* %loaded, {} addrspace(10)**
+; TYPED: store {} addrspace(10)* %loaded, {} addrspace(10)**
+; OPAQUE: store ptr addrspace(10) %loaded, ptr
     call void @one_arg_boxed({} addrspace(10)* %loaded)
     ret void
 }
 
 define {} addrspace(10)* @no_redundant_rerooting(i64 %a, i1 %cond) {
 ; CHECK-LABEL: @no_redundant_rerooting
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; CHECK: store {} addrspace(10)* %aboxed
+; TYPED: store {} addrspace(10)* %aboxed
+; OPAQUE: store ptr addrspace(10) %aboxed
 ; CHECK-NEXT: call void @jl_safepoint()
     call void @jl_safepoint()
     br i1 %cond, label %blocka, label %blockb
@@ -268,12 +311,14 @@ declare void @llvm.memcpy.p064.p10i8.i64(i64*, i8 addrspace(10)*, i64, i32, i1)
 
 define void @memcpy_use(i64 %a, i64 *%aptr) {
 ; CHECK-LABEL: @memcpy_use
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; CHECK: store {} addrspace(10)* %aboxed
+; TYPED: store {} addrspace(10)* %aboxed
+; OPAQUE: store ptr addrspace(10) %aboxed
     call void @jl_safepoint()
     %acast = bitcast {} addrspace(10)* %aboxed to i8 addrspace(10)*
     call void @llvm.memcpy.p064.p10i8.i64(i64* %aptr, i8 addrspace(10)* %acast, i64 8, i32 1, i1 false)
@@ -285,20 +330,24 @@ declare void @llvm.julia.gc_preserve_end(token)
 
 define void @gc_preserve(i64 %a) {
 ; CHECK-LABEL: @gc_preserve
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; CHECK: store {} addrspace(10)* %aboxed
+; TYPED: store {} addrspace(10)* %aboxed
+; OPAQUE: store ptr addrspace(10) %aboxed
     call void @jl_safepoint()
     %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %aboxed)
     %aboxed2 = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; CHECK: store {} addrspace(10)* %aboxed2
+; TYPED: store {} addrspace(10)* %aboxed2
+; OPAQUE: store ptr addrspace(10) %aboxed2
     call void @jl_safepoint()
     call void @llvm.julia.gc_preserve_end(token %tok)
     %aboxed3 = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; CHECK: store {} addrspace(10)* %aboxed3
+; TYPED: store {} addrspace(10)* %aboxed3
+; OPAQUE: store ptr addrspace(10) %aboxed3
     call void @jl_safepoint()
     call void @one_arg_boxed({} addrspace(10)* %aboxed2)
     call void @one_arg_boxed({} addrspace(10)* %aboxed3)
@@ -307,23 +356,37 @@ top:
 
 define void @gc_preserve_vec([2 x <2 x {} addrspace(10)*>] addrspace(11)* nocapture nonnull readonly dereferenceable(16)) {
 ; CHECK-LABEL: @gc_preserve_vec
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 6
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 6
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 6
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %v = load [2 x <2 x {} addrspace(10)*>], [2 x <2 x {} addrspace(10)*>] addrspace(11)* %0, align 8
-; CHECK-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0
-; CHECK-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0
-; CHECK-DAG: [[EXTRACT21:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1
-; CHECK-DAG: [[EXTRACT22:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1
-; CHECK-DAG: [[V11:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT11]], i32 0
-; CHECK-DAG: [[V12:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT12]], i32 1
-; CHECK-DAG: [[V21:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT21]], i32 0
-; CHECK-DAG: [[V22:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT22]], i32 1
-; CHECK-DAG: store {} addrspace(10)* [[V11]]
-; CHECK-DAG: store {} addrspace(10)* [[V12]]
-; CHECK-DAG: store {} addrspace(10)* [[V21]]
-; CHECK-DAG: store {} addrspace(10)* [[V22]]
+; TYPED-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0
+; TYPED-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0
+; TYPED-DAG: [[EXTRACT21:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1
+; TYPED-DAG: [[EXTRACT22:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1
+; TYPED-DAG: [[V11:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT11]], i32 0
+; TYPED-DAG: [[V12:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT12]], i32 1
+; TYPED-DAG: [[V21:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT21]], i32 0
+; TYPED-DAG: [[V22:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT22]], i32 1
+; TYPED-DAG: store {} addrspace(10)* [[V11]]
+; TYPED-DAG: store {} addrspace(10)* [[V12]]
+; TYPED-DAG: store {} addrspace(10)* [[V21]]
+; TYPED-DAG: store {} addrspace(10)* [[V22]]
+
+; OPAQUE-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 0
+; OPAQUE-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 0
+; OPAQUE-DAG: [[EXTRACT21:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 1
+; OPAQUE-DAG: [[EXTRACT22:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 1
+; OPAQUE-DAG: [[V11:%.*]] = extractelement <2 x ptr addrspace(10)> [[EXTRACT11]], i32 0
+; OPAQUE-DAG: [[V12:%.*]] = extractelement <2 x ptr addrspace(10)> [[EXTRACT12]], i32 1
+; OPAQUE-DAG: [[V21:%.*]] = extractelement <2 x ptr addrspace(10)> [[EXTRACT21]], i32 0
+; OPAQUE-DAG: [[V22:%.*]] = extractelement <2 x ptr addrspace(10)> [[EXTRACT22]], i32 1
+; OPAQUE-DAG: store ptr addrspace(10) [[V11]]
+; OPAQUE-DAG: store ptr addrspace(10) [[V12]]
+; OPAQUE-DAG: store ptr addrspace(10) [[V21]]
+; OPAQUE-DAG: store ptr addrspace(10) [[V22]]
     %tok = call token (...) @llvm.julia.gc_preserve_begin([2 x <2 x {} addrspace(10)*>] %v, i64 addrspace(10)* null, {}*** %ptls)
     call void @jl_safepoint()
     ret void
@@ -363,7 +426,8 @@ declare {} addrspace(10) *@alloc()
 
 define {} addrspace(10)* @vec_loadobj() {
 ; CHECK-LABEL: @vec_loadobj
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
   %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
   %obj = call {} addrspace(10) *@alloc()
@@ -377,7 +441,8 @@ define {} addrspace(10)* @vec_loadobj() {
 
 define {} addrspace(10)* @vec_gep() {
 ; CHECK-LABEL: @vec_gep
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
   %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
   %obj = call {} addrspace(10) *@alloc()
@@ -392,7 +457,8 @@ define {} addrspace(10)* @vec_gep() {
 declare i1 @check_property({} addrspace(10)* %val)
 define void @loopyness(i1 %cond1, {} addrspace(10) *%arg) {
 ; CHECK-LABEL: @loopyness
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -405,15 +471,21 @@ header:
 a:
 ; This needs a store
 ; CHECK-LABEL: a:
-; CHECK:  [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
-; CHECK:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP1]]
+; TYPED:  [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]
+; TYPED:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP1]]
+
+; OPAQUE:  [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]
+; OPAQUE:  store ptr addrspace(10) %phi, ptr [[GEP1]]
     call void @one_arg_boxed({} addrspace(10)* %phi)
     br label %latch
 
 latch:
 ; This as well in case we went the other path
-; CHECK:  [[GEP2:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]
-; CHECK:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP2]]
+; TYPED:  [[GEP2:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]
+; TYPED:  store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP2]]
+
+; OPAQUE:  [[GEP2:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0]]
+; OPAQUE:  store ptr addrspace(10) %phi, ptr [[GEP2]]
     %obj = call {} addrspace(10)* @alloc()
     %cond = call i1 @check_property({} addrspace(10)* %phi)
     br i1 %cond, label %exit, label %header
@@ -424,7 +496,8 @@ exit:
 
 define {} addrspace(10)* @phi_union(i1 %cond) {
 ; CHECK-LABEL: @phi_union
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
@@ -449,7 +522,8 @@ join:
 
 define {} addrspace(10)* @select_union(i1 %cond) {
 ; CHECK-LABEL: @select_union
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
@@ -465,7 +539,8 @@ top:
 
 define i8 @simple_arrayptr() {
 ; CHECK-LABEL: @simple_arrayptr
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
    %pgcstack = call {}*** @julia.get_pgcstack()
    %ptls = call {}*** @julia.ptls_states()
@@ -482,7 +557,8 @@ top:
 
 define {} addrspace(10)* @vecstoreload(<2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecstoreload
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -496,7 +572,8 @@ top:
 
 define void @vecphi(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecphi
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -522,7 +599,8 @@ common:
 
 define i8 @phi_arrayptr(i1 %cond) {
 ; CHECK-LABEL: @phi_arrayptr
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -561,7 +639,8 @@ common:
 
 define void @vecselect(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecselect
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -578,13 +657,15 @@ top:
 
 define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecselect_lift
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*>
     call void @jl_safepoint()
-; CHECK: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}}
+; TYPED: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}}
+; OPAQUE: %gclift = select i1 %cond, ptr addrspace(10) null, ptr addrspace(10) %{{[0-9]+}}
     %select = select i1 %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %decayed
     call void @jl_safepoint()
     %el1 = extractelement <2 x i64 addrspace(12)*> %select, i32 0
@@ -596,13 +677,15 @@ define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 
 define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecvecselect_lift
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*>
     call void @jl_safepoint()
-; CHECK: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}}
+; TYPED: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}}
+; OPAQUE: %gclift = select i1 %{{[0-9]+}}, ptr addrspace(10) null, ptr addrspace(10) %{{[0-9]+}}
     %select = select <2 x i1> %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %decayed
     call void @jl_safepoint()
     %el1 = extractelement <2 x i64 addrspace(12)*> %select, i32 0
@@ -614,14 +697,16 @@ define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) {
 
 define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
 ; CHECK-LABEL: @vecscalarselect_lift
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
     %avec = getelementptr i64, i64 addrspace(12)*  %adecayed, <2 x i32> zeroinitializer
     call void @jl_safepoint()
-; CHECK: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %aboxed
+; TYPED: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %aboxed
+; OPAQUE: %gclift = select i1 %{{[0-9]+}}, ptr addrspace(10) null, ptr addrspace(10) %aboxed
     %select = select <2 x i1> %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %avec
     call void @jl_safepoint()
     %el1 = extractelement <2 x i64 addrspace(12)*> %select, i32 0
@@ -633,14 +718,16 @@ define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
 
 define void @scalarvecselect_lift(i1 %cond, i64 %a) {
 ; CHECK-LABEL: @scalarvecselect_lift
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
     %avec = getelementptr i64, i64 addrspace(12)*  %adecayed, <2 x i32> zeroinitializer
     call void @jl_safepoint()
-; CHECK: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %aboxed
+; TYPED: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %aboxed
+; OPAQUE: %gclift = select i1 %cond, ptr addrspace(10) null, ptr addrspace(10) %aboxed
     %select = select i1 %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %avec
     call void @jl_safepoint()
     %el1 = extractelement <2 x i64 addrspace(12)*> %select, i32 0
@@ -652,7 +739,8 @@ define void @scalarvecselect_lift(i1 %cond, i64 %a) {
 
 define i8 @select_arrayptr(i1 %cond) {
 ; CHECK-LABEL: @select_arrayptr
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
@@ -679,9 +767,14 @@ top:
 
 define i8 @vector_arrayptrs() {
 ; CHECK-LABEL: @vector_arrayptrs
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
-; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
+
+; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
+; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
+
+; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
+; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]]
 ;
 top:
    %pgcstack = call {}*** @julia.get_pgcstack()
@@ -700,10 +793,16 @@ declare <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8 (<2 x i8 ad
 
 define i8 @masked_arrayptrs() {
 ; CHECK-LABEL: @masked_arrayptrs
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
-; CHECK: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8(<2 x i8 addrspace(13)*> addrspace(11)* %arrayptrptr, i32 16, <2 x i1> <i1 true, i1 false>, <2 x i8 addrspace(13)*> zeroinitializer)
-; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
+
+; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8(<2 x i8 addrspace(13)*> addrspace(11)* %arrayptrptr, i32 16, <2 x i1> <i1 true, i1 false>, <2 x i8 addrspace(13)*> zeroinitializer)
+; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
+; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
+
+; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.load.v2p13.p11(ptr addrspace(11) %arrayptrptr, i32 16, <2 x i1> <i1 true, i1 false>, <2 x ptr addrspace(13)> zeroinitializer)
+; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
+; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]]
 ;
 top:
    %pgcstack = call {}*** @julia.get_pgcstack()
@@ -722,10 +821,16 @@ declare <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8 (<2 x i8
 
 define i8 @gather_arrayptrs() {
 ; CHECK-LABEL: @gather_arrayptrs
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
-; CHECK: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 false>, <2 x i8 addrspace(13)*> zeroinitializer)
-; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
+
+; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 false>, <2 x i8 addrspace(13)*> zeroinitializer)
+; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
+; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
+
+; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 false>, <2 x ptr addrspace(13)> zeroinitializer)
+; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
+; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]]
 ;
 top:
    %pgcstack = call {}*** @julia.get_pgcstack()
@@ -743,10 +848,16 @@ top:
 
 define i8 @gather_arrayptrs_alltrue() {
 ; CHECK-LABEL: @gather_arrayptrs
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
-; CHECK: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 true>, <2 x i8 addrspace(13)*> zeroinitializer)
-; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
+
+; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 true>, <2 x i8 addrspace(13)*> zeroinitializer)
+; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
+; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
+
+; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> <i1 true, i1 true>, <2 x ptr addrspace(13)> zeroinitializer)
+; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
+; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]]
 ;
 top:
    %pgcstack = call {}*** @julia.get_pgcstack()
@@ -764,9 +875,14 @@ top:
 
 define i8 @lost_select_decayed(i1 %arg1) {
 ; CHECK-LABEL: @lost_select_decayed
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
-; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
-; CHECK: store {} addrspace(10)* [[SOMETHING:%.*]], {} addrspace(10)** [[GEP0]]
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
+
+; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
+; TYPED: store {} addrspace(10)* [[SOMETHING:%.*]], {} addrspace(10)** [[GEP0]]
+
+; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2
+; OPAQUE: store ptr addrspace(10) [[SOMETHING:%.*]], ptr [[GEP0]]
 top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
diff --git a/test/llvmpasses/image-codegen.jl b/test/llvmpasses/image-codegen.jl
new file mode 100644
index 0000000000000..2e52245b7d3b9
--- /dev/null
+++ b/test/llvmpasses/image-codegen.jl
@@ -0,0 +1,23 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+# RUN: export JULIA_LLVM_ARGS="--print-before=loop-vectorize --print-module-scope"
+# RUN: rm -rf %t
+# RUN: mkdir %t
+# RUN: julia --image-codegen --startup-file=no %s 2> %t/output.txt
+# RUN: FileCheck %s < %t/output.txt
+
+# COM: checks that global variables compiled in imaging codegen
+# COM: are marked as external and not internal
+# COM: Also makes sure that --imaging-codegen doesn't crash
+
+# CHECK: *** IR Dump Before
+# CHECK-NOT: internal global
+# CHECK-NOT: private global
+# CHECK: jl_global
+# COM: we emit both declarations and definitions, so we may see either style in the IR
+# CHECK-SAME: = {{(external )?}}global
+# CHECK: julia_f_
+# CHECK-NOT: internal global
+# CHECK-NOT: private global
+
+f() = "abcd"
+f()
diff --git a/test/llvmpasses/julia-licm-fail.ll b/test/llvmpasses/julia-licm-fail.ll
new file mode 100644
index 0000000000000..4f28239257ec0
--- /dev/null
+++ b/test/llvmpasses/julia-licm-fail.ll
@@ -0,0 +1,102 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+
+; COM: This file contains functions that should not trigger allocations to be hoisted out of loops
+
+@tag = external addrspace(10) global {}, align 16
+
+; COM: Tests that an escape in a loop prevents hoisting of the allocation
+; CHECK-LABEL: @julia_escape_alloc
+define void @julia_escape_alloc(i1 %ret) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NOT: julia.gc_alloc_obj
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
+  %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+; TYPED-NEXT: %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc)
+; OPAQUE-NEXT: %ignore = call ptr addrspace(10) @escape(ptr addrspace(10) %alloc)
+  %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc)
+  br i1 %ret, label %return, label %loop
+return:
+  ret void
+}
+
+; COM: Tests that addrescape in a loop prevents hoisting of the allocation
+; CHECK-LABEL: @julia_addrescape_alloc
+define void @julia_addrescape_alloc(i1 %ret) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NOT: julia.gc_alloc_obj
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
+  %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+; TYPED-NEXT: %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
+; OPAQUE-NEXT: %cast = addrspacecast ptr addrspace(10) %alloc to ptr addrspace(11)
+  %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
+; TYPED-NEXT: %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast)
+; OPAQUE-NEXT: %ptr = call nonnull ptr @julia.pointer_from_objref(ptr addrspace(11) %cast)
+  %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast)
+  br i1 %ret, label %return, label %loop
+return:
+  ret void
+}
+
+declare void @julia.write_barrier({}*, ...)
+
+declare {}*** @julia.get_pgcstack()
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) #1
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: inaccessiblemem_or_argmemonly
+declare void @ijl_gc_queue_root({} addrspace(10)*) #3
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
+
+; COM: escape to make it easy to find
+declare nonnull {} addrspace(10)* @escape({} addrspace(10)*)
+
+; COM: addrescape function
+declare nonnull {}* @julia.pointer_from_objref({} addrspace(11)*)
+
+attributes #0 = { "probe-stack"="inline-asm" }
+attributes #1 = { allocsize(1) }
+attributes #2 = { argmemonly nofree nosync nounwind willreturn }
+attributes #3 = { inaccessiblemem_or_argmemonly }
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/llvmpasses/julia-licm-memoryssa.ll b/test/llvmpasses/julia-licm-memoryssa.ll
new file mode 100644
index 0000000000000..e1684c7577578
--- /dev/null
+++ b/test/llvmpasses/julia-licm-memoryssa.ll
@@ -0,0 +1,171 @@
+; COM: NewPM-only test, tests that memoryssa is preserved correctly
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPAQUE
+
+@tag = external addrspace(10) global {}, align 16
+
+declare void @julia.write_barrier({} addrspace(10)*, ...)
+
+declare {}*** @julia.get_pgcstack()
+
+declare token @llvm.julia.gc_preserve_begin(...)
+
+declare void @llvm.julia.gc_preserve_end(token)
+
+declare void @mssa_use({} addrspace(10)*)
+
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*)
+
+; COM: check basic preserve hoist/sink functionality
+; CHECK-LABEL: MemorySSA for function: hoist_sink_preserves
+; CHECK-LABEL: @hoist_sink_preserves
+define void @hoist_sink_preserves({} addrspace(10)* %obj, i1 %ret) {
+; CHECK: top:
+top:
+; CHECK-NEXT: [[PGCSTACK:[0-9]+]] = MemoryDef(liveOnEntry)
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NEXT: [[PRESERVE_TOKEN:[0-9]+]] = MemoryDef([[PGCSTACK]])
+; CHECK-NEXT: %preserve_token = call token (...) @llvm.julia.gc_preserve_begin
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NOT: call token (...) @llvm.julia.gc_preserve_begin
+  %preserve_token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %obj)
+; CHECK-NOT: call void @llvm.julia.gc_preserve_end
+  call void @llvm.julia.gc_preserve_end(token %preserve_token)
+; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({preheader,[[PRESERVE_TOKEN]]},{loop,[[MPHI]]})
+; CHECK-NEXT: br i1 %ret
+  br i1 %ret, label %return, label %loop
+; CHECK: return:
+return:
+; CHECK-NEXT: [[PRESERVE_END:[0-9]+]] = MemoryDef([[MPHI]])
+; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token)
+; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[PRESERVE_END]])
+; CHECK-NEXT: call void @mssa_use
+  call void @mssa_use({} addrspace(10)* %obj)
+; CHECK-NEXT: ret void
+  ret void
+}
+
+; COM: check sink functionality when there are multiple loop exit blocks
+; CHECK-LABEL: MemorySSA for function: hoist_multisink_preserves
+; CHECK-LABEL: @hoist_multisink_preserves
+define void @hoist_multisink_preserves({} addrspace(10)* %obj, i1 %ret) {
+; CHECK: top:
+top:
+; CHECK-NEXT: [[PGCSTACK:[0-9]+]] = MemoryDef(liveOnEntry)
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NEXT: [[PRESERVE_TOKEN:[0-9]+]] = MemoryDef([[PGCSTACK]])
+; CHECK-NEXT: %preserve_token = call token (...) @llvm.julia.gc_preserve_begin
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NOT: call token (...) @llvm.julia.gc_preserve_begin
+  %preserve_token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %obj)
+; CHECK-NOT: call void @llvm.julia.gc_preserve_end
+  call void @llvm.julia.gc_preserve_end(token %preserve_token)
+; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({preheader,[[PRESERVE_TOKEN]]},{loop2,[[MPHI]]})
+; CHECK-NEXT: br i1 %ret
+  br i1 %ret, label %return, label %loop2
+; CHECK: loop2:
+loop2:
+; CHECK-NEXT: br i1 %ret
+  br i1 %ret, label %return2, label %loop
+; CHECK: return:
+return:
+; CHECK-NEXT: [[PRESERVE_END_1:[0-9]+]] = MemoryDef([[MPHI]])
+; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token)
+; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[PRESERVE_END_1]])
+; CHECK-NEXT: call void @mssa_use
+  call void @mssa_use({} addrspace(10)* %obj)
+; CHECK-NEXT: ret void
+  ret void
+; CHECK: return2:
+return2:
+; CHECK-NEXT: [[PRESERVE_END_2:[0-9]+]] = MemoryDef([[MPHI]])
+; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token)
+; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[PRESERVE_END_2]])
+; CHECK-NEXT: call void @mssa_use
+  call void @mssa_use({} addrspace(10)* %obj)
+; CHECK-NEXT: ret void
+  ret void
+}
+
+define void @hoist_allocation({} addrspace(10)* %obj, i1 %ret) {
+; CHECK: top:
+top:
+; CHECK-NEXT: [[PGCSTACK:[0-9]+]] = MemoryDef(liveOnEntry)
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NEXT: [[ALLOC:[0-9]+]] = MemoryDef([[PGCSTACK]])
+
+; TYPED-NEXT: %alloc = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 0, {} addrspace(10)* @tag)
+; TYPED-NEXT: %[[BCAST:.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)*
+
+; OPAQUE-NEXT: %alloc = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %current_task, i64 0, ptr addrspace(10) @tag)
+
+; CHECK-NEXT: [[MSET:[0-9]+]] = MemoryDef([[ALLOC]])
+; CHECK-NEXT: call void @llvm.memset
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NOT: %alloc
+; CHECK-NOT: @julia.gc_alloc_obj
+  %alloc = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 0, {} addrspace(10)* @tag)
+; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({preheader,[[MSET]]},{loop,[[MPHI]]})
+  br i1 %ret, label %return, label %loop
+; CHECK: return:
+return:
+; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[MPHI]])
+; CHECK-NEXT: call void @mssa_use
+  call void @mssa_use({} addrspace(10)* %obj)
+; CHECK-NEXT: ret void
+  ret void
+}
+
+define void @hoist_write_barrier({} addrspace(10)* %obj, i1 %ret) {
+; CHECK: top:
+top:
+; CHECK-NEXT: [[PGCSTACK:[0-9]+]] = MemoryDef(liveOnEntry)
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NEXT: [[WB:[0-9]+]] = MemoryDef([[PGCSTACK]])
+; CHECK-NEXT: call void
+; CHECK-SAME: @julia.write_barrier
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NOT: @julia.write_barrier
+  call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %obj)
+; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({preheader,[[WB]]},{loop,[[MPHI]]})
+  br i1 %ret, label %return, label %loop
+; CHECK: return:
+return:
+; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[MPHI]])
+; CHECK-NEXT: call void @mssa_use
+  call void @mssa_use({} addrspace(10)* %obj)
+; CHECK-NEXT: ret void
+  ret void
+}
diff --git a/test/llvmpasses/julia-licm-missed.ll b/test/llvmpasses/julia-licm-missed.ll
new file mode 100644
index 0000000000000..99b493cb6316b
--- /dev/null
+++ b/test/llvmpasses/julia-licm-missed.ll
@@ -0,0 +1,116 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+
+; COM: This file contains functions that currently do not trigger allocations to be hoisted out of loops
+; COM: i.e. they are missed optimizations
+; COM: Better optimization could potentially enable allocations to be hoisted out of these loops
+
+@tag = external addrspace(10) global {}, align 16
+
+; COM: Currently we don't hoist allocations that have references stored into them out of loops
+; COM: This is because we need to insert write barriers for the stores when the storee does not
+; COM: dominate the allocation after it has been moved out of the loop
+; CHECK-LABEL: @julia_refstore
+define void @julia_refstore({} addrspace(10)* %obj, i1 %ret) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NOT: julia.gc_alloc_obj
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
+  %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+; TYPED-NEXT: %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
+; OPAQUE-NEXT: %derived = addrspacecast ptr addrspace(10) %alloc to ptr addrspace(11)
+  %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)*
+; TYPED-NEXT: %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)*
+; OPAQUE-NEXT: %ptr = bitcast ptr addrspace(11) %derived to ptr addrspace(11)
+  %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)*
+; TYPED-NEXT: store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8
+; OPAQUE-NEXT: store ptr addrspace(10) %obj, ptr addrspace(11) %ptr, align 8
+  store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8
+  br i1 %ret, label %return, label %loop
+return:
+  ret void
+}
+
+; COM: Currently our LLVM-level escape analysis doesn't handle phi nodes at all
+; COM: so this allocation is counted as 'escaping' despite the fact that it's
+; COM: clearly dead
+; CHECK-LABEL: @julia_phi
+define void @julia_phi({} addrspace(10)* %obj, i1 %ret) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NOT: julia.gc_alloc_obj
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
+  %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+  br label %other
+; CHECK: other:
+other:
+; TYPED-NEXT: %phi = phi {} addrspace(10)* [ %alloc, %loop ]
+; OPAQUE-NEXT: %phi = phi ptr addrspace(10) [ %alloc, %loop ]
+  %phi = phi {} addrspace(10)* [ %alloc, %loop ]
+  br i1 %ret, label %return, label %loop
+return:
+  ret void
+}
+
+
+
+declare void @julia.write_barrier({}*, ...)
+
+declare {}*** @julia.get_pgcstack()
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) #1
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: inaccessiblemem_or_argmemonly
+declare void @ijl_gc_queue_root({} addrspace(10)*) #3
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #1
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
+
+; COM: escape to make it easy to find
+declare nonnull {} addrspace(10)* @escape({} addrspace(10)*)
+
+; COM: addrescape function
+declare nonnull {}* @julia.pointer_from_objref({} addrspace(11)*)
+
+attributes #0 = { "probe-stack"="inline-asm" }
+attributes #1 = { allocsize(1) }
+attributes #2 = { argmemonly nofree nosync nounwind willreturn }
+attributes #3 = { inaccessiblemem_or_argmemonly }
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/llvmpasses/julia-licm.ll b/test/llvmpasses/julia-licm.ll
index 0c7cf9a640ef7..1b0e7078e32ce 100644
--- a/test/llvmpasses/julia-licm.ll
+++ b/test/llvmpasses/julia-licm.ll
@@ -1,5 +1,8 @@
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 @tag = external addrspace(10) global {}, align 16
 
@@ -7,6 +10,77 @@ declare void @julia.write_barrier({}*, ...)
 
 declare {}*** @julia.get_pgcstack()
 
+declare token @llvm.julia.gc_preserve_begin(...)
+
+declare void @llvm.julia.gc_preserve_end(token)
+
+; COM: check basic preserve hoist/sink functionality
+; CHECK-LABEL: @hoist_sink_preserves
+define void @hoist_sink_preserves({} addrspace(10)* %obj, i1 %ret) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NEXT: %preserve_token = call token (...) @llvm.julia.gc_preserve_begin
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NOT: call token (...) @llvm.julia.gc_preserve_begin
+  %preserve_token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %obj)
+; CHECK-NOT: call void @llvm.julia.gc_preserve_end
+  call void @llvm.julia.gc_preserve_end(token %preserve_token)
+; CHECK-NEXT: br i1 %ret
+  br i1 %ret, label %return, label %loop
+; CHECK: return:
+return:
+; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token)
+; CHECK-NEXT: ret void
+  ret void
+}
+
+; COM: check sink functionality when there are multiple loop exit blocks
+; CHECK-LABEL: @hoist_multisink_preserves
+define void @hoist_multisink_preserves({} addrspace(10)* %obj, i1 %ret) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; CHECK-NEXT: %preserve_token = call token (...) @llvm.julia.gc_preserve_begin
+; CHECK-NEXT: br label %loop
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NOT: call token (...) @llvm.julia.gc_preserve_begin
+  %preserve_token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %obj)
+; CHECK-NOT: call void @llvm.julia.gc_preserve_end
+  call void @llvm.julia.gc_preserve_end(token %preserve_token)
+; CHECK-NEXT: br i1 %ret
+  br i1 %ret, label %return, label %loop2
+; CHECK: loop2:
+loop2:
+; CHECK-NEXT: br i1 %ret
+  br i1 %ret, label %return2, label %loop
+; CHECK: return:
+return:
+; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token)
+; CHECK-NEXT: ret void
+  ret void
+; CHECK: return2:
+return2:
+; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token)
+; CHECK-NEXT: ret void
+  ret void
+}
+
+; COM: check basic allocation hoisting functionality
+; CHECK-LABEL: @julia_allocation_hoist
 define nonnull {} addrspace(10)* @julia_allocation_hoist(i64 signext %0) #0 {
 top:
   %1 = call {}*** @julia.get_pgcstack()
@@ -24,14 +98,23 @@ L3:                                               ; preds = %L3.loopexit, %top
 L4:                                               ; preds = %top
   %current_task112 = getelementptr inbounds {}**, {}*** %1, i64 -12
   %current_task1 = bitcast {}*** %current_task112 to {}**
-  ; CHECK: %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag)
+  ; TYPED: %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag)
+  ; TYPED-NEXT: %4 = bitcast {} addrspace(10)* %3 to i8 addrspace(10)*
+  ; TYPED-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} %4, i8 0, i64 8, i1 false)
+
+  ; OPAQUE: %3 = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task1, i64 8, ptr addrspace(10) @tag)
+  ; OPAQUE-NEXT: call void @llvm.memset.p10.i64(ptr addrspace(10) align {{[0-9]+}} %3, i8 0, i64 8, i1 false)
+
   ; CHECK-NEXT: br label %L22
   br label %L22
 
 L22:                                              ; preds = %L4, %L22
   %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
-  ; CHECK: %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
-  ; CHECK-NEXT %4 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)*
+  ; TYPED: %value_phi5 = phi i64 [ 1, %L4 ], [ %6, %L22 ]
+  ; TYPED-NEXT %5 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)*
+
+  ; OPAQUE: %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
+  ; OPAQUE-NEXT %4 = bitcast ptr addrspace(10) %3 to ptr addrspace(10)
   %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag) #1
   %4 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)*
   store i64 %value_phi5, i64 addrspace(10)* %4, align 8, !tbaa !2
@@ -40,6 +123,32 @@ L22:                                              ; preds = %L4, %L22
   br i1 %.not, label %L3.loopexit, label %L22
 }
 
+; COM: check that we hoist the allocation out of the loop despite returning the allocation
+; CHECK-LABEL: @julia_hoist_returned
+define nonnull {} addrspace(10)* @julia_hoist_returned(i64 signext %n, i1 zeroext %ret) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %current_task = bitcast {}*** %pgcstack to {}**
+; CHECK: br label %preheader
+  br label %preheader
+; CHECK: preheader:
+preheader:
+; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+; TYPED-NEXT: [[casted:%.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)*
+; TYPED-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} [[casted]], i8 0, i64 8, i1 false)
+
+; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag)
+; OPAQUE-NEXT: call void @llvm.memset.p10.i64(ptr addrspace(10) align {{[0-9]+}} %alloc, i8 0, i64 8, i1 false)
+
+; CHECK-NEXT: br label %loop
+  br label %loop
+loop:
+  %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag)
+  br i1 %ret, label %return, label %loop
+return:
+  ret {} addrspace(10)* %alloc
+}
+
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) #1
 
diff --git a/test/llvmpasses/julia-simdloop-memoryssa.ll b/test/llvmpasses/julia-simdloop-memoryssa.ll
new file mode 100644
index 0000000000000..0c1c4ac021996
--- /dev/null
+++ b/test/llvmpasses/julia-simdloop-memoryssa.ll
@@ -0,0 +1,55 @@
+; COM: NewPM-only test, tests that memoryssa is preserved correctly
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print<memoryssa>)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK
+
+; CHECK-LABEL: MemorySSA for function: simd_test
+; CHECK-LABEL: @simd_test(
+define void @simd_test(double *%a, double *%b) {
+; CHECK: top:
+top:
+  br label %loop
+; CHECK: loop:
+loop:
+; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({top,liveOnEntry},{loop,[[MSSA_USE:[0-9]+]]})
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %aptr = getelementptr double, double *%a, i64 %i
+  %bptr = getelementptr double, double *%b, i64 %i
+; CHECK: MemoryUse([[MPHI]]) MayAlias
+; CHECK: llvm.mem.parallel_loop_access
+  %aval = load double, double *%aptr
+; CHECK: MemoryUse([[MPHI]]) MayAlias
+  %bval = load double, double *%aptr
+  %cval = fadd double %aval, %bval
+; CHECK: [[MSSA_USE]] = MemoryDef([[MPHI]])
+  store double %cval, double *%bptr
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !1
+loopdone:
+  ret void
+}
+
+; CHECK-LABEL: MemorySSA for function: simd_test_sub2
+; CHECK-LABEL: @simd_test_sub2(
+define double @simd_test_sub2(double *%a) {
+top:
+  br label %loop
+loop:
+  %i = phi i64 [0, %top], [%nexti, %loop]
+  %v = phi double [0.000000e+00, %top], [%nextv, %loop]
+  %aptr = getelementptr double, double *%a, i64 %i
+; CHECK: MemoryUse(liveOnEntry) MayAlias
+  %aval = load double, double *%aptr
+  %nextv = fsub double %v, %aval
+; CHECK: fsub reassoc contract double %v, %aval
+  %nexti = add i64 %i, 1
+  %done = icmp sgt i64 %nexti, 500
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !0
+loopdone:
+  ret double %nextv
+}
+
+!0 = distinct !{!0, !"julia.simdloop"}
+!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"}
\ No newline at end of file
diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/julia-simdloop.ll
similarity index 68%
rename from test/llvmpasses/simdloop.ll
rename to test/llvmpasses/julia-simdloop.ll
index 894d3a1428a5c..df96e34979a3d 100644
--- a/test/llvmpasses/simdloop.ll
+++ b/test/llvmpasses/julia-simdloop.ll
@@ -1,7 +1,8 @@
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s
+; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-declare void @julia.loopinfo_marker()
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s
 
 ; CHECK-LABEL: @simd_test(
 define void @simd_test(double *%a, double *%b) {
@@ -17,9 +18,8 @@ loop:
   %cval = fadd double %aval, %bval
   store double %cval, double *%bptr
   %nexti = add i64 %i, 1
-  call void @julia.loopinfo_marker(), !julia.loopinfo !3
   %done = icmp sgt i64 %nexti, 500
-  br i1 %done, label %loopdone, label %loop
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !1
 loopdone:
   ret void
 }
@@ -35,11 +35,10 @@ loop:
 ; CHECK: llvm.mem.parallel_loop_access
   %aval = load double, double *%aptr
   %nextv = fsub double %v, %aval
-; CHECK: fsub fast double %v, %aval
+; CHECK: fsub reassoc contract double %v, %aval
   %nexti = add i64 %i, 1
-  call void @julia.loopinfo_marker(), !julia.loopinfo !3
   %done = icmp sgt i64 %nexti, 500
-  br i1 %done, label %loopdone, label %loop
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !1
 loopdone:
   ret double %nextv
 }
@@ -54,11 +53,10 @@ loop:
   %aptr = getelementptr double, double *%a, i64 %i
   %aval = load double, double *%aptr
   %nextv = fsub double %v, %aval
-; CHECK: fsub fast double %v, %aval
+; CHECK: fsub reassoc contract double %v, %aval
   %nexti = add i64 %i, 1
-  call void @julia.loopinfo_marker(), !julia.loopinfo !2
   %done = icmp sgt i64 %nexti, 500
-  br i1 %done, label %loopdone, label %loop
+  br i1 %done, label %loopdone, label %loop, !llvm.loop !0
 loopdone:
   ret double %nextv
 }
@@ -77,20 +75,16 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
   store i32 %add, i32* %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  call void @julia.loopinfo_marker(), !julia.loopinfo !4
   %exitcond = icmp eq i64 %indvars.iv.next, 48
 ; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]]
-  br i1 %exitcond, label %for.end, label %for.body
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
 
 for.end:                                          ; preds = %for.body
   %1 = load i32, i32* %a, align 4
   ret i32 %1
 }
 
-!1 = !{}
-!2 = !{!"julia.simdloop"}
-!3 = !{!"julia.simdloop", !"julia.ivdep"}
-!4 = !{!"julia.simdloop", !"julia.ivdep", !5}
-!5 = !{!"llvm.loop.vectorize.disable", i1 0}
-; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[LOOP_DISABLE:![0-9]+]]}
-; CHECK-NEXT: [[LOOP_DISABLE]] = !{!"llvm.loop.vectorize.disable", i1 false}
+!0 = distinct !{!0, !"julia.simdloop"}
+!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"}
+!2 = distinct !{!2, !"julia.simdloop", !"julia.ivdep", !3}
+!3 = !{!"llvm.loop.vectorize.disable", i1 0}
diff --git a/test/llvmpasses/late-lower-gc-addrspaces.ll b/test/llvmpasses/late-lower-gc-addrspaces.ll
new file mode 100644
index 0000000000000..77f8e2ac685ce
--- /dev/null
+++ b/test/llvmpasses/late-lower-gc-addrspaces.ll
@@ -0,0 +1,190 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+
+target triple = "amdgcn-amd-amdhsa"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
+
+@tag = external addrspace(10) global {}, align 16
+
+declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
+declare {} addrspace(10)* @jl_box_int64(i64)
+declare {}*** @julia.get_pgcstack()
+declare void @jl_safepoint()
+declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32)
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*)
+declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*)
+
+define void @gc_frame_lowering(i64 %a, i64 %b) {
+top:
+; CHECK-LABEL: @gc_frame_lowering
+; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
+; TYPED:  %pgcstack = call {}*** @julia.get_pgcstack()
+
+; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; OPAQUE:  %pgcstack = call ptr @julia.get_pgcstack()
+    %pgcstack = call {}*** @julia.get_pgcstack()
+; TYPED-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
+; TYPED-NEXT: call {} addrspace(10)* @jl_box_int64
+
+; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
+; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64
+    %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
+; TYPED: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]])
+; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
+
+; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
+; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
+    %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
+; CHECK-NEXT: %bboxed =
+; Make sure the same gc slot isn't re-used
+; TYPED-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]])
+; TYPED: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]])
+; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
+
+; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
+; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
+; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
+
+; CHECK-NEXT: call void @boxed_simple
+    call void @boxed_simple({} addrspace(10)* %aboxed,
+                            {} addrspace(10)* %bboxed)
+; TYPED-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
+; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
+    ret void
+}
+
+define {} addrspace(10)* @gc_alloc_lowering() {
+top:
+; CHECK-LABEL: @gc_alloc_lowering
+    %pgcstack = call {}*** @julia.get_pgcstack()
+    %0 = bitcast {}*** %pgcstack to {}**
+    %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
+; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
+; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
+; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
+; TYPED-NEXT: %v = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
+; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
+; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
+; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+    %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
+; TYPED-NEXT: ret {} addrspace(10)* %v
+; OPAQUE-NEXT: ret ptr addrspace(10) %v
+    ret {} addrspace(10)* %v
+}
+
+; Confirm that loadedval instruction does not contain invariant.load metadata
+; after the gc placement pass, but still contains the range metadata.
+; Since loadedval is marked invariant, passes are allowed to move the use.
+; But after the placement pass, must ensure it won't be relocated after our
+; last gc-root use
+define void @gc_drop_aliasing() {
+top:
+; CHECK-LABEL: @gc_drop_aliasing
+    %pgcstack = call {}*** @julia.get_pgcstack()
+    %0 = bitcast {}*** %pgcstack to {}**
+    %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
+; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
+; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
+; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
+; TYPED-NEXT: %v = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
+; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
+; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
+; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+    %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
+; TYPED-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
+; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+    %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
+; TYPED-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7
+; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
+    %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1
+; TYPED-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8
+; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
+    store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2
+; TYPED-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7
+; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
+    %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4
+; CHECK-NEXT: ret void
+    ret void
+}
+
+define i32 @callee_root({} addrspace(10)* %v0, {} addrspace(10)* %v1) {
+top:
+; CHECK-LABEL: @callee_root
+; CHECK-NOT: @julia.new_gc_frame
+  %v2 = call {}*** @julia.get_pgcstack()
+  %v3 = bitcast {} addrspace(10)* %v0 to {} addrspace(10)* addrspace(10)*
+  %v4 = addrspacecast {} addrspace(10)* addrspace(10)* %v3 to {} addrspace(10)* addrspace(11)*
+  %v5 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v4 unordered, align 8
+  %v6 = bitcast {} addrspace(10)* %v1 to {} addrspace(10)* addrspace(10)*
+  %v7 = addrspacecast {} addrspace(10)* addrspace(10)* %v6 to {} addrspace(10)* addrspace(11)*
+  %v8 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v7 unordered, align 8
+  %v9 = addrspacecast {} addrspace(10)* %v5 to {} addrspace(12)*
+  %v10 = addrspacecast {} addrspace(10)* %v8 to {} addrspace(12)*
+  %v11 = call i32 @rooting_callee({} addrspace(12)* %v9, {} addrspace(12)* %v10)
+  ret i32 %v11
+; CHECK: ret i32
+}
+
+define i32 @freeze({} addrspace(10)* %v0, {} addrspace(10)* %v1) {
+top:
+; CHECK-LABEL: @freeze
+; CHECK-NOT: @julia.new_gc_frame
+  %v2 = call {}*** @julia.get_pgcstack()
+  %v3 = bitcast {} addrspace(10)* %v0 to {} addrspace(10)* addrspace(10)*
+  %v4 = addrspacecast {} addrspace(10)* addrspace(10)* %v3 to {} addrspace(10)* addrspace(11)*
+  %v5 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v4 unordered, align 8
+  %v6 = bitcast {} addrspace(10)* %v1 to {} addrspace(10)* addrspace(10)*
+  %v7 = addrspacecast {} addrspace(10)* addrspace(10)* %v6 to {} addrspace(10)* addrspace(11)*
+  %v8 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v7 unordered, align 8
+  %fv8 = freeze {} addrspace(10)* %v8
+  %v9 = addrspacecast {} addrspace(10)* %v5 to {} addrspace(12)*
+  %v10 = addrspacecast {} addrspace(10)* %fv8 to {} addrspace(12)*
+  %v11 = call i32 @rooting_callee({} addrspace(12)* %v9, {} addrspace(12)* %v10)
+  ret i32 %v11
+; CHECK: ret i32
+}
+
+!0 = !{i64 0, i64 23}
+!1 = !{!1}
+!2 = !{!7} ; scope list
+!3 = !{!4, !4, i64 0, i64 1}
+!4 = !{!"jtbaa_const", !5}
+!5 = !{!"jtbaa"}
+!6 = distinct !{!6} ; alias domain
+!7 = distinct !{!7, !6} ; alias scope
+
+
+; CHECK:      !0 = !{!1, !1, i64 0}
+; CHECK-NEXT: !1 = !{!"jtbaa_gcframe", !2, i64 0}
+; CHECK-NEXT: !2 = !{!"jtbaa", !3, i64 0}
+; CHECK-NEXT: !3 = !{!"jtbaa"}
+; CHECK-NEXT: !4 = !{!5, !5, i64 0}
+; CHECK-NEXT: !5 = !{!"jtbaa_tag", !6, i64 0}
+; CHECK-NEXT: !6 = !{!"jtbaa_data", !2, i64 0}
+; CHECK-NEXT: !7 = !{i64 0, i64 23}
+; CHECK-NEXT: !8 = !{!9}
+; CHECK-NEXT: !9 = distinct !{!9, !10}
+; CHECK-NEXT: !10 = distinct !{!10}
+; CHECK-NEXT: !11 = !{!12, !12, i64 0}
+; CHECK-NEXT: !12 = !{!"jtbaa_const", !3}
diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll
index cb159f17ef5e9..6dee18da5975f 100644
--- a/test/llvmpasses/late-lower-gc.ll
+++ b/test/llvmpasses/late-lower-gc.ll
@@ -1,5 +1,8 @@
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s -check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 @tag = external addrspace(10) global {}, align 16
 
@@ -14,24 +17,39 @@ declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*)
 define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
-; CHECK: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; CHECK:  %pgcstack = call {}*** @julia.get_pgcstack()
+; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
+; TYPED:  %pgcstack = call {}*** @julia.get_pgcstack()
+
+; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; OPAQUE:  %pgcstack = call ptr @julia.get_pgcstack()
     %pgcstack = call {}*** @julia.get_pgcstack()
-; CHECK-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
-; CHECK-NEXT: call {} addrspace(10)* @jl_box_int64
+; TYPED-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
+; TYPED-NEXT: call {} addrspace(10)* @jl_box_int64
+
+; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
+; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
-; CHECK: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; CHECK-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
+; TYPED: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]])
+; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]]
+
+; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
+; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
-; CHECK-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]])
-; CHECK: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; CHECK-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
+; TYPED-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]])
+; TYPED: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]])
+; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]]
+
+; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
+; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
+; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
+
 ; CHECK-NEXT: call void @boxed_simple
     call void @boxed_simple({} addrspace(10)* %aboxed,
                             {} addrspace(10)* %bboxed)
-; CHECK-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
+; TYPED-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
+; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
     ret void
 }
 
@@ -41,17 +59,25 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 15
-; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
-; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
+; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
+; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
+; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
+; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
+; TYPED-NEXT: %v = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
+; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
+; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
+; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; CHECK-NEXT: ret {} addrspace(10)* %v
+; TYPED-NEXT: ret {} addrspace(10)* %v
+; OPAQUE-NEXT: ret ptr addrspace(10) %v
     ret {} addrspace(10)* %v
 }
 
@@ -66,23 +92,34 @@ top:
     %pgcstack = call {}*** @julia.get_pgcstack()
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
-; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 15
-; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
-; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
-; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
-; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
-; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
-; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
+; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16
+; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
+; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
+; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
+; TYPED-NEXT: %v = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
+; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
+; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
+
+; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
+; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; CHECK-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
+; TYPED-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
+; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
     %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; CHECK-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7
+; TYPED-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7
+; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
     %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1
-; CHECK-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8
+; TYPED-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8
+; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
     store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2
-; CHECK-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7
+; TYPED-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7
+; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
     %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4
 ; CHECK-NEXT: ret void
     ret void
@@ -125,6 +162,56 @@ top:
 ; CHECK: ret i32
 }
 
+; COM: the bugs here may be caught by death-by-verify-assertion
+define {} addrspace(10)* @gclift_switch({} addrspace(13)* addrspace(10)* %input, i1 %unpredictable) {
+  top:
+  %0 = call {}*** @julia.get_pgcstack()
+  br i1 %unpredictable, label %mid1, label %mid2
+  mid1:
+  br label %mid2
+  mid2:
+  %root = phi {} addrspace(13)* addrspace(10)* [ %input, %top ], [ %input, %mid1 ]
+  %unrelated = phi i1 [ %unpredictable, %top ], [ %unpredictable, %mid1 ]
+  %1 = addrspacecast {} addrspace(13)* addrspace(10)* %root to {} addrspace(13)* addrspace(11)*
+  %2 = bitcast {} addrspace(13)* addrspace(11)* %1 to {} addrspace(11)*
+  switch i1 %unpredictable, label %end [
+    i1 1, label %end
+    i1 0, label %end
+  ]
+  end:
+  %phi = phi {} addrspace(11)* [ %2, %mid2 ], [ %2, %mid2 ], [ %2, %mid2 ]
+  %ret = bitcast {} addrspace(13)* addrspace(10)* %input to {} addrspace(10)*
+  ; CHECK: %gclift
+  ret {} addrspace(10)* %ret
+}
+
+define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) {
+  %v2 = call {}*** @julia.get_pgcstack()
+  %e0 = extractvalue [2 x {} addrspace(10)* addrspace(11)*] %ar, 0
+  %l0 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %e0
+  %e1 = extractvalue [2 x {} addrspace(10)* addrspace(11)*] %ar, 1
+  %l1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %e1
+  %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1)
+  ret void
+}
+
+; CHECK-LABEL: @decayar
+; TYPED:  %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
+; TYPED:  [[gc_slot_addr_:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1)
+; TYPED:  store {} addrspace(10)* %l0, {} addrspace(10)** [[gc_slot_addr_:%.*]], align 8
+; TYPED:  [[gc_slot_addr_:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0)
+; TYPED: store {} addrspace(10)* %l1, {} addrspace(10)** [[gc_slot_addr_:%.*]], align 8
+; TYPED: %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1)
+; TYPED: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
+
+; OPAQUE:  %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; OPAQUE: [[gc_slot_addr_:%.*]]1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1)
+; OPAQUE:  store ptr addrspace(10) %l0, ptr [[gc_slot_addr_:%.*]], align 8
+; OPAQUE:  [[gc_slot_addr_:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
+; OPAQUE: store ptr addrspace(10) %l1, ptr [[gc_slot_addr_:%.*]], align 8
+; OPAQUE: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1)
+; OPAQUE: call void @julia.pop_gc_frame(ptr %gcframe)
+
 !0 = !{i64 0, i64 23}
 !1 = !{!1}
 !2 = !{!7} ; scope list
diff --git a/test/llvmpasses/llvmcall.jl b/test/llvmpasses/llvmcall.jl
index 687abe0a8cd46..3e0df7a8885a7 100644
--- a/test/llvmpasses/llvmcall.jl
+++ b/test/llvmpasses/llvmcall.jl
@@ -1,7 +1,14 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
+
 # RUN: julia --startup-file=no %s %t
-# RUN: cat %t/* | FileCheck %s
+# RUN: cat %t/* | FileCheck %s --check-prefixes=CHECK,TYPED
+
+# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
+
+# RUN: julia --startup-file=no %s %t
+# RUN: cat %t/* | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 include(joinpath("..", "testhelpers", "llvmpasses.jl"))
 
@@ -13,20 +20,119 @@ end
 @generated foo(x)=:(ccall("extern foo", llvmcall, $x, ($x,), x))
 bar(x) = ntuple(i -> VecElement{Float16}(x[i]), 2)
 
-# CHECK: call half @foo(half %{{[0-9]+}})
+# CHECK: define
+# CHECK-SAME: half @julia_foo
+# CHECK-SAME: {
+# CHECK-NOT: define
+# CHECK: [[FOO_RET:%.*]] = call half @foo(half [[FOO_ARG:%.*]])
+# CHECK-NOT: define
+# CHECK: ret half
+# CHECK-NOT: define
+# CHECK: }
 emit(foo, Float16)
 
-# CHECK: call [2 x half] @foo([2 x half] %{{[0-9]+}})
+# COM: Make sure that we don't miss a function by accident (helps localize errors)
+# CHECK-NOT: {
+# CHECK-NOT: }
+# CHECK: define
+# TYPED-SAME: nonnull {} addrspace(10)* @jfptr
+# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr
+# CHECK-SAME: {
+
+# CHECK: define
+# CHECK-SAME: [2 x half] @julia_foo
+# CHECK-SAME: {
+# CHECK-NOT: define
+# CHECK: [[FOO_RET:%.*]] = call [2 x half] @foo([2 x half] [[FOO_ARG:%.*]])
+# CHECK-NOT: define
+# CHECK: ret [2 x half]
+# CHECK-NOT: define
+# CHECK: }
 emit(foo, NTuple{2, Float16})
 
-# CHECK: call <2 x half> @foo(<2 x half> %{{[0-9]+}})
+# COM: Make sure that we don't miss a function by accident (helps localize errors)
+# CHECK-NOT: {
+# CHECK-NOT: }
+# CHECK: define
+# TYPED-SAME: nonnull {} addrspace(10)* @jfptr
+# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr
+# CHECK-SAME: {
+
+# CHECK: define
+# CHECK-SAME: <2 x half> @julia_foo
+# CHECK-SAME: {
+# CHECK-NOT: define
+# CHECK: [[FOO_RET:%.*]] call <2 x half> @foo(<2 x half> [[FOO_ARG:%.*]])
+# CHECK-NOT: define
+# CHECK: ret <2 x half>
+# CHECK-NOT: define
+# CHECK: }
 emit(foo, NTuple{2, VecElement{Float16}})
 
-# CHECK: call i8 addrspace(3)* @foo(i8 addrspace(3)* %{{[0-9]+}})
+# COM: Make sure that we don't miss a function by accident (helps localize errors)
+# CHECK-NOT: {
+# CHECK-NOT: }
+# CHECK: define
+# TYPED-SAME: nonnull {} addrspace(10)* @jfptr
+# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr
+# CHECK-SAME: {
+
+# CHECK: define
+# TYPED-SAME: i8 addrspace(3)* @julia_foo
+# OPAQUE-SAME: ptr addrspace(3) @julia_foo
+# CHECK-SAME: {
+# CHECK-NOT: define
+# TYPED: [[FOO_RET:%.*]] call i8 addrspace(3)* @foo(i8 addrspace(3)* [[FOO_ARG:%.*]])
+# OPAQUE: [[FOO_RET:%.*]] call ptr addrspace(3) @foo(ptr addrspace(3) [[FOO_ARG:%.*]])
+# CHECK-NOT: define
+# TYPED: ret i8 addrspace(3)*
+# OPAQUE: ret ptr addrspace(3)
+# CHECK-NOT: define
+# CHECK: }
 emit(foo, Core.LLVMPtr{Float32, 3})
 
-# CHECK: call { i32, i32 } @foo({ i32, i32 } %{{[0-9]+}})
+# COM: Make sure that we don't miss a function by accident (helps localize errors)
+# CHECK-NOT: {
+# CHECK-NOT: }
+# CHECK: define
+# TYPED-SAME: nonnull {} addrspace(10)* @jfptr
+# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr
+# CHECK-SAME: {
+
+# CHECK: define
+# CHECK-SAME: [2 x i32] @julia_foo
+# CHECK-SAME: {
+# CHECK-NOT: define
+# CHECK: [[FOO_RET:%.*]] call { i32, i32 } @foo({ i32, i32 } [[FOO_ARG:%.*]])
+# CHECK-NOT: define
+# CHECK: ret [2 x i32]
+# CHECK-NOT: define
+# CHECK: }
 emit(foo, Foo)
 
-# CHECK: define <2 x half> @julia_bar_{{[0-9]+}}([2 x half]
+# COM: Make sure that we don't miss a function by accident (helps localize errors)
+# CHECK-NOT: {
+# CHECK-NOT: }
+# CHECK: define
+# TYPED-SAME: nonnull {} addrspace(10)* @jfptr
+# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr
+# CHECK-SAME: {
+
+# CHECK: define
+# CHECK-SAME: <2 x half> @julia_bar
+# TYPED-SAME: [2 x half]
+# OPAQUE-SAME: ptr
+# CHECK-SAME: {
+# CHECK-NOT: define
+# CHECK: ret <2 x half>
+# CHECK-NOT: define
+# CHECK: }
 emit(bar, NTuple{2, Float16})
+
+# COM: Make sure that we don't miss a function by accident (helps localize errors)
+# CHECK-NOT: {
+# CHECK-NOT: }
+# CHECK: define
+# TYPED-SAME: nonnull {} addrspace(10)* @jfptr
+# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr
+# CHECK-SAME: {
diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl
index 412bee7015c3e..c812f9c05a967 100644
--- a/test/llvmpasses/loopinfo.jl
+++ b/test/llvmpasses/loopinfo.jl
@@ -2,8 +2,7 @@
 
 # RUN: julia --startup-file=no %s %t && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s
-# RUN: cat %t/module.ll | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S - | FileCheck %s -check-prefix=LOWER
-# RUN: cat %t/module.ll | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S - | FileCheck %s -check-prefix=LOWER
+# RUN: cat %t/module.ll | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S - | FileCheck %s -check-prefix=LOWER
 # RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s -check-prefix=FINAL
 
@@ -27,12 +26,11 @@ function simdf(X)
     acc = zero(eltype(X))
     @simd for x in X
         acc += x
-# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO:![0-9]+]]
+# CHECK: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
 # LOWER-NOT: llvm.mem.parallel_loop_access
-# LOWER: fadd fast double
-# LOWER-NOT: call void @julia.loopinfo_marker()
+# LOWER: fadd reassoc contract double
 # LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]]
-# FINAL: fadd fast <{{[0-9]+}} x double>
+# FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double>
     end
     acc
 end
@@ -43,10 +41,9 @@ function simdf2(X)
     acc = zero(eltype(X))
     @simd ivdep for x in X
         acc += x
-# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO2:![0-9]+]]
+# CHECK: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]]
 # LOWER: llvm.mem.parallel_loop_access
-# LOWER-NOT: call void @julia.loopinfo_marker()
-# LOWER: fadd fast double
+# LOWER: fadd reassoc contract double
 # LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]]
     end
     acc
@@ -61,13 +58,12 @@ end
     for i in 1:N
         iteration(i)
         $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.count"), 3)))
-# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO3:![0-9]+]]
-# LOWER-NOT: call void @julia.loopinfo_marker()
+# CHECK: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
 # LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]]
-# FINAL: call void @j_iteration
-# FINAL: call void @j_iteration
-# FINAL: call void @j_iteration
-# FINAL-NOT: call void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
 # FINAL: br
     end
 end
@@ -87,20 +83,19 @@ end
             iteration(i)
         end
         $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.full"),)))
-# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO4:![0-9]+]]
-# LOWER-NOT: call void @julia.loopinfo_marker()
+# CHECK: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
 # LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]]
-# FINAL: call void @j_iteration
-# FINAL: call void @j_iteration
-# FINAL: call void @j_iteration
-# FINAL: call void @j_iteration
-# FINAL: call void @j_iteration
-# FINAL: call void @j_iteration
-# FINAL: call void @j_iteration
-# FINAL: call void @j_iteration
-# FINAL: call void @j_iteration
-# FINAL: call void @j_iteration
-# FINAL-NOT: call void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
     end
 end
 
@@ -111,19 +106,19 @@ end
             1 <= j <= I && continue
             @show (i,j)
             iteration(i)
-# FINAL: call void @j_iteration
-# FINAL-NOT: call void @j_iteration
+# FINAL: call {{(swiftcc )?}}void @j_iteration
+# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration
         end
         $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.disable"),)))
     end
 end
 
 ## Check all the MD nodes
-# CHECK: [[LOOPINFO]] = !{!"julia.simdloop"}
-# CHECK: [[LOOPINFO2]] = !{!"julia.simdloop", !"julia.ivdep"}
-# CHECK: [[LOOPINFO3]] = !{[[LOOPUNROLL:![0-9]+]]}
+# CHECK: [[LOOPID]] = distinct !{[[LOOPID]], !"julia.simdloop"}
+# CHECK: [[LOOPID2]] = distinct !{[[LOOPID2]], !"julia.simdloop", !"julia.ivdep"}
+# CHECK: [[LOOPID3]] = distinct !{[[LOOPID3]], [[LOOPUNROLL:![0-9]+]]}
 # CHECK: [[LOOPUNROLL]] = !{!"llvm.loop.unroll.count", i64 3}
-# CHECK: [[LOOPINFO4]] = !{[[LOOPUNROLL2:![0-9]+]]}
+# CHECK: [[LOOPID4]] = distinct !{[[LOOPID4]], [[LOOPUNROLL2:![0-9]+]]}
 # CHECK: [[LOOPUNROLL2]] = !{!"llvm.loop.unroll.full"}
 # LOWER: [[LOOPID]] = distinct !{[[LOOPID]]}
 # LOWER: [[LOOPID2]] = distinct !{[[LOOPID2]]}
diff --git a/test/llvmpasses/lower-handlers-addrspaces.ll b/test/llvmpasses/lower-handlers-addrspaces.ll
new file mode 100644
index 0000000000000..8b85a71705f60
--- /dev/null
+++ b/test/llvmpasses/lower-handlers-addrspaces.ll
@@ -0,0 +1,33 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
+
+target triple = "amdgcn-amd-amdhsa"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
+
+attributes #1 = { returns_twice }
+declare i32 @julia.except_enter() #1
+declare void @ijl_pop_handler(i32)
+declare i8**** @julia.ptls_states()
+declare i8**** @julia.get_pgcstack()
+
+define void @simple() {
+top:
+    %pgcstack = call i8**** @julia.get_pgcstack()
+; CHECK: call void @llvm.lifetime.start
+; CHECK: call void @ijl_enter_handler
+; CHECK: setjmp
+    %r = call i32 @julia.except_enter()
+    %cmp = icmp eq i32 %r, 0
+    br i1 %cmp, label %try, label %catch
+try:
+    br label %after
+catch:
+    br label %after
+after:
+    call void @ijl_pop_handler(i32 1)
+; CHECK: llvm.lifetime.end
+    ret void
+}
diff --git a/test/llvmpasses/lower-handlers.ll b/test/llvmpasses/lower-handlers.ll
index 01bc1ae728f15..a250edddcaa81 100644
--- a/test/llvmpasses/lower-handlers.ll
+++ b/test/llvmpasses/lower-handlers.ll
@@ -1,5 +1,8 @@
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
 
 attributes #1 = { returns_twice }
 declare i32 @julia.except_enter() #1
diff --git a/test/llvmpasses/muladd.ll b/test/llvmpasses/muladd.ll
index 2eddb62cef3ec..3c1c995ce7376 100644
--- a/test/llvmpasses/muladd.ll
+++ b/test/llvmpasses/muladd.ll
@@ -1,7 +1,11 @@
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s
+; This file is a part of Julia. License is MIT: https://julialang.org/license
 
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s
 
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s
+
+
+; CHECK-LABEL: @fast_muladd1
 define double @fast_muladd1(double %a, double %b, double %c) {
 top:
 ; CHECK: {{contract|fmuladd}}
@@ -11,6 +15,7 @@ top:
   ret double %v2
 }
 
+; CHECK-LABEL: @fast_mulsub1
 define double @fast_mulsub1(double %a, double %b, double %c) {
 top:
 ; CHECK: {{contract|fmuladd}}
@@ -20,6 +25,7 @@ top:
   ret double %v2
 }
 
+; CHECK-LABEL: @fast_mulsub_vec1
 define <2 x double> @fast_mulsub_vec1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 top:
 ; CHECK: {{contract|fmuladd}}
@@ -28,3 +34,31 @@ top:
 ; CHECK: ret <2 x double>
   ret <2 x double> %v2
 }
+
+; COM: Should not mark fmul as contract when multiple uses of fmul exist
+; CHECK-LABEL: @slow_muladd1
+define double @slow_muladd1(double %a, double %b, double %c) {
+top:
+; CHECK: %v1 = fmul double %a, %b
+  %v1 = fmul double %a, %b
+; CHECK: %v2 = fadd fast double %v1, %c
+  %v2 = fadd fast double %v1, %c
+; CHECK: %v3 = fadd fast double %v1, %b
+  %v3 = fadd fast double %v1, %b
+; CHECK: %v4 = fadd fast double %v3, %v2
+  %v4 = fadd fast double %v3, %v2
+; CHECK: ret double %v4
+  ret double %v4
+}
+
+; COM: Should not mark fadd->fadd fast as contract
+; CHECK-LABEL: @slow_addadd1
+define double @slow_addadd1(double %a, double %b, double %c) {
+top:
+; CHECK: %v1 = fadd double %a, %b
+  %v1 = fadd double %a, %b
+; CHECK: %v2 = fadd fast double %v1, %c
+  %v2 = fadd fast double %v1, %c
+; CHECK: ret double %v2
+  ret double %v2
+}
diff --git a/test/llvmpasses/multiversioning-annotate-only.ll b/test/llvmpasses/multiversioning-annotate-only.ll
new file mode 100644
index 0000000000000..4e90e3cb9bc6b
--- /dev/null
+++ b/test/llvmpasses/multiversioning-annotate-only.ll
@@ -0,0 +1,220 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s
+
+; COM: This test checks that multiversioning correctly picks up on features that should trigger cloning
+; COM: Note that for annotations alone, we don't need jl_fvars or jl_gvars
+
+; COM: Copied from src/processor.h
+; COM:    JL_TARGET_VEC_CALL = 1 << 0,
+; COM:    // Clone all functions
+; COM:    JL_TARGET_CLONE_ALL = 1 << 1,
+; COM:    // Clone when there's scalar math operations that can benefit from target-specific
+; COM:    // optimizations. This includes `muladd`, `fma`, `fast`/`contract` flags.
+; COM:    JL_TARGET_CLONE_MATH = 1 << 2,
+; COM:    // Clone when the function has a loop
+; COM:    JL_TARGET_CLONE_LOOP = 1 << 3,
+; COM:    // Clone when the function uses any vectors
+; COM:    // When this is specified, the cloning pass should also record if any of the cloned functions
+; COM:    // used this in any function call (including the signature of the function itself)
+; COM:    JL_TARGET_CLONE_SIMD = 1 << 4,
+; COM:    // The CPU name is unknown
+; COM:    JL_TARGET_UNKNOWN_NAME = 1 << 5,
+; COM:    // Optimize for size for this target
+; COM:    JL_TARGET_OPTSIZE = 1 << 6,
+; COM:    // Only optimize for size for this target
+; COM:    JL_TARGET_MINSIZE = 1 << 7,
+; COM:    // Clone when the function queries CPU features
+; COM:    JL_TARGET_CLONE_CPU = 1 << 8,
+; COM:    // Clone when the function uses fp16
+; COM:    JL_TARGET_CLONE_FLOAT16 = 1 << 9,
+
+; COM: start with the basics, just one feature per function
+
+; COM: boring should only be cloned if clone_all is enabled on the target
+; CHECK: @boring{{.*}}#[[BORING_ATTRS:[0-9]+]]
+define noundef i32 @boring(i32 noundef %0) {
+  ret i32 %0
+}
+
+; CHECK: @fastmath_test{{.*}}#[[FASTMATH_TEST_ATTRS:[0-9]+]]
+define noundef float @fastmath_test(float noundef %0, float noundef %1) {
+  %3 = fadd fast float %0, %1
+  ret float %3
+}
+
+; CHECK: @loop_test{{.*}}#[[LOOP_TEST_ATTRS:[0-9]+]]
+define noundef i32 @loop_test(i32 noundef %0) {
+  %2 = icmp sgt i32 %0, 0
+  br i1 %2, label %5, label %3
+
+3:                                                ; preds = %5, %1
+  %4 = phi i32 [ 0, %1 ], [ %9, %5 ]
+  ret i32 %4
+
+5:                                                ; preds = %1, %5
+  %6 = phi i32 [ %10, %5 ], [ 0, %1 ]
+  %7 = phi i32 [ %9, %5 ], [ 0, %1 ]
+  %8 = lshr i32 %6, 1
+  %9 = add nuw nsw i32 %8, %7
+  %10 = add nuw nsw i32 %6, 1
+  %11 = icmp eq i32 %10, %0
+  br i1 %11, label %3, label %5, !llvm.loop !9
+}
+
+; CHECK: @simd_test{{.*}}#[[SIMD_TEST_ATTRS:[0-9]+]]
+define noundef i32 @simd_test(<4 x i32> noundef %0) {
+  %2 = extractelement <4 x i32> %0, i64 0
+  ret i32 %2
+}
+
+; COM: now check all the combinations
+
+; CHECK: @simd_fastmath_test{{.*}}#[[SIMD_FASTMATH_TEST_ATTRS:[0-9]+]]
+define noundef float @simd_fastmath_test(<4 x float> noundef %0) {
+  %2 = extractelement <4 x float> %0, i64 0
+  %3 = extractelement <4 x float> %0, i64 1
+  %4 = fadd fast float %2, %3
+  ret float %4
+}
+
+; CHECK: @loop_fastmath_test{{.*}}#[[LOOP_FASTMATH_TEST_ATTRS:[0-9]+]]
+define noundef i32 @loop_fastmath_test(i32 noundef %0) {
+  %2 = icmp sgt i32 %0, 0
+  br i1 %2, label %7, label %5
+
+3:                                                ; preds = %7
+  %4 = fptosi float %12 to i32
+  br label %5
+
+5:                                                ; preds = %3, %1
+  %6 = phi i32 [ 0, %1 ], [ %4, %3 ]
+  ret i32 %6
+
+7:                                                ; preds = %1, %7
+  %8 = phi i32 [ %13, %7 ], [ 0, %1 ]
+  %9 = phi float [ %12, %7 ], [ 0.000000e+00, %1 ]
+  %10 = lshr i32 %8, 1
+  %11 = sitofp i32 %10 to float
+  %12 = fadd fast float %9, %11
+  %13 = add nuw nsw i32 %8, 1
+  %14 = icmp eq i32 %13, %0
+  br i1 %14, label %3, label %7, !llvm.loop !9
+}
+
+; CHECK: @simd_loop_test{{.*}}#[[SIMD_LOOP_TEST_ATTRS:[0-9]+]]
+define dso_local noundef i32 @simd_loop_test(<4 x i32> noundef %0) {
+  %2 = extractelement <4 x i32> %0, i64 0
+  %3 = icmp sgt i32 %2, 0
+  br i1 %3, label %6, label %4
+
+4:                                                ; preds = %6, %1
+  %5 = phi i32 [ 0, %1 ], [ %10, %6 ]
+  ret i32 %5
+
+6:                                                ; preds = %1, %6
+  %7 = phi i32 [ %11, %6 ], [ 0, %1 ]
+  %8 = phi i32 [ %10, %6 ], [ 0, %1 ]
+  %9 = lshr i32 %7, 1
+  %10 = add nuw nsw i32 %9, %8
+  %11 = add nuw nsw i32 %7, 1
+  %12 = icmp eq i32 %11, %2
+  br i1 %12, label %4, label %6, !llvm.loop !9
+}
+
+; CHECK: @simd_loop_fastmath_test{{.*}}#[[SIMD_LOOP_FASTMATH_TEST_ATTRS:[0-9]+]]
+define noundef i32 @simd_loop_fastmath_test(<4 x i32> noundef %0) {
+  %2 = extractelement <4 x i32> %0, i64 0
+  %3 = icmp sgt i32 %2, 0
+  br i1 %3, label %8, label %6
+
+4:                                                ; preds = %8
+  %5 = fptosi float %13 to i32
+  br label %6
+
+6:                                                ; preds = %4, %1
+  %7 = phi i32 [ 0, %1 ], [ %5, %4 ]
+  ret i32 %7
+
+8:                                                ; preds = %1, %8
+  %9 = phi i32 [ %14, %8 ], [ 0, %1 ]
+  %10 = phi float [ %13, %8 ], [ 0.000000e+00, %1 ]
+  %11 = lshr i32 %9, 1
+  %12 = sitofp i32 %11 to float
+  %13 = fadd fast float %10, %12
+  %14 = add nuw nsw i32 %9, 1
+  %15 = icmp eq i32 %14, %2
+  br i1 %15, label %4, label %8, !llvm.loop !9
+}
+
+; COM: check for fvar and reloc annotations on functions used by other globals
+
+@func_gv = global i32 (i32)* @func_in_gv, align 8
+
+; CHECK: @func_in_gv{{.*}}#[[FUNC_IN_GV_ATTRS:[0-9]+]]
+define noundef i32 @func_in_gv(i32 noundef returned %0) {
+  ret i32 %0
+}
+
+@aliaser = alias i32 (i32)*, bitcast (i32 (i32)* @aliasee to i32 (i32)**)
+
+; CHECK: @aliasee{{.*}}#[[ALIASEE_ATTRS:[0-9]+]]
+define i32 @aliasee(i32 noundef returned %0) {
+  ret i32 %0
+}
+
+; COM: check for reloc annotations on functions used by other functions
+; CHECK: @cloned{{.*}}#[[CLONED_RELOC_ATTRS:[0-9]+]]
+define noundef float @cloned(float noundef %0, float noundef %1) {
+  %3 = fadd fast float %0, %1
+  ret float %3
+}
+
+define noundef i32 @uncloned(i32 noundef %0) {
+  %2 = sitofp i32 %0 to float
+  %3 = call noundef float @cloned(float noundef %2, float noundef %2)
+  %4 = fptosi float %3 to i32
+  ret i32 %4
+}
+
+; COM: Note that these strings are hex-encoded bits of the target indices that will be cloned
+; CHECK-DAG: attributes #[[BORING_ATTRS]] = { "julia.mv.clones"="2" }
+; CHECK-DAG: attributes #[[FASTMATH_TEST_ATTRS]] = { "julia.mv.clones"="6" }
+; CHECK-DAG: attributes #[[LOOP_TEST_ATTRS]] = { "julia.mv.clones"="A" }
+; CHECK-DAG: attributes #[[SIMD_TEST_ATTRS]] = { "julia.mv.clones"="12" }
+; CHECK-DAG: attributes #[[SIMD_FASTMATH_TEST_ATTRS]] = { "julia.mv.clones"="16" }
+; CHECK-DAG: attributes #[[LOOP_FASTMATH_TEST_ATTRS]] = { "julia.mv.clones"="E" }
+; CHECK-DAG: attributes #[[SIMD_LOOP_TEST_ATTRS]] = { "julia.mv.clones"="1A" }
+; CHECK-DAG: attributes #[[SIMD_LOOP_FASTMATH_TEST_ATTRS]] = { "julia.mv.clones"="1E" }
+; CHECK-DAG: attributes #[[FUNC_IN_GV_ATTRS]]
+; CHECK-SAME: "julia.mv.clones"="2"
+; CHECK-SAME: "julia.mv.fvar"
+; CHECK-DAG: attributes #[[ALIASEE_ATTRS]]
+; CHECK-SAME: "julia.mv.clones"="2"
+; CHECK-SAME: "julia.mv.reloc"
+; CHECK-DAG: attributes #[[CLONED_RELOC_ATTRS]]
+; CHECK-SAME: "julia.mv.clones"="6"
+; CHECK-SAME: "julia.mv.reloc"
+
+; CHECK-LABEL: !llvm.module.flags
+
+!llvm.module.flags = !{!0, !1, !2}
+
+; CHECK-DAG: julia.mv.enable
+; CHECK-DAG: julia.mv.skipcloning
+; CHECK-DAG: julia.mv.specs
+; CHECK-DAG: julia.mv.annotated
+; CHECK-DAG: julia.mv.veccall
+
+!0 = !{i32 1, !"julia.mv.enable", i32 1}
+!1 = !{i32 1, !"julia.mv.skipcloning", i32 1}
+!2 = !{i32 1, !"julia.mv.specs", !3}
+!3 = !{!4, !5, !6, !7, !8}
+!4 = !{!"cpubase", !"nofeatures", i32 0, i32 2}
+!5 = !{!"cpucloneall", !"cloneall", i32 0, i32 2}
+!6 = !{!"cpufastmath", !"fastmathclone", i32 0, i32 4}
+!7 = !{!"cpuloop", !"loopclone", i32 0, i32 8}
+!8 = !{!"cpusimd", !"simdclone", i32 0, i32 16}
+!9 = !{!9}
diff --git a/test/llvmpasses/multiversioning-clone-only.ll b/test/llvmpasses/multiversioning-clone-only.ll
new file mode 100644
index 0000000000000..6cd407f2e461f
--- /dev/null
+++ b/test/llvmpasses/multiversioning-clone-only.ll
@@ -0,0 +1,218 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE
+
+; CHECK: @jl_fvar_idxs = hidden constant [1 x i32] zeroinitializer
+; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer
+; TYPED: @subtarget_cloned_gv = hidden global i64* null
+; OPAQUE: @subtarget_cloned_gv = hidden global ptr null
+; TYPED: @subtarget_cloned.reloc_slot = hidden global i32 (i32)* null
+; OPAQUE: @subtarget_cloned.reloc_slot = hidden global ptr null
+; CHECK: @jl_fvar_offsets = hidden constant [2 x i32] [i32 1, i32 0]
+; CHECK: @jl_gvar_base = hidden constant i64 0
+; CHECK: @jl_gvar_offsets = hidden constant [1 x i32] zeroinitializer
+; CHECK: @jl_clone_slots = hidden constant [5 x i32]
+; CHECK-SAME: i32 2, i32 0, {{.*}} sub {{.*}}@subtarget_cloned.reloc_slot{{.*}}@jl_gvar_base
+; CHECK: @jl_clone_idxs = hidden constant [13 x i32]
+; COM: TODO actually check the clone idxs maybe?
+; CHECK: @jl_clone_offsets = hidden constant [4 x i32]
+; CHECK-SAME: sub
+; CHECK-SAME: @subtarget_cloned.1
+; CHECK-SAME: @subtarget_cloned
+; CHECK-SAME: sub
+; CHECK-SAME: @subtarget_cloned.2
+; CHECK-SAME: @subtarget_cloned
+; CHECK-SAME: sub
+
+@jl_fvars = global [1 x i64*] [i64* bitcast (i32 (i32)* @subtarget_cloned to i64*)], align 16
+@jl_gvars = global [0 x i64*] zeroinitializer, align 16
+@jl_fvar_idxs = hidden constant [1 x i32] [i32 0], align 16
+@jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 16
+@subtarget_cloned_gv = hidden global i64* bitcast (i32 (i32)* @subtarget_cloned to i64*), align 16
+
+@subtarget_cloned_aliased = alias i32 (i32), i32 (i32)* @subtarget_cloned
+
+; CHECK: define{{.*}}@boring({{.*}}#[[BORING_DEFAULT_ATTRS:[0-9]+]]
+; CHECK-NEXT: ret i32 %0
+define noundef i32 @boring(i32 noundef %0) #0 {
+    ret i32 %0
+}
+
+; CHECK: declare{{.*}}@declaration({{.*}}#[[DECLARATION_DEFAULT_ATTRS:[0-9]+]]
+declare i32 @declaration(i32 %0) #1
+
+; CHECK: define{{.*}}@call_boring({{.*}}#[[BORING_DEFAULT_ATTRS]]
+; CHECK-NEXT: %2 = call noundef i32 @boring(i32 noundef %0)
+define noundef i32 @call_boring(i32 noundef %0) #0 {
+    %2 = call noundef i32 @boring(i32 noundef %0)
+    ret i32 %2
+}
+
+; CHECK: define{{.*}}@call_declaration({{.*}}#[[DECLARATION_DEFAULT_ATTRS]]
+; CHECK-NEXT: %2 = call noundef i32 @declaration(i32 noundef %0)
+define noundef i32 @call_declaration(i32 noundef %0) #1 {
+    %2 = call noundef i32 @declaration(i32 noundef %0)
+    ret i32 %2
+}
+
+; CHECK: define{{.*}}@subtarget_cloned({{.*}}#[[SUBTARGET_CLONED_DEFAULT_ATTRS:[0-9]+]]
+; CHECK-NEXT: ret i32 0
+define noundef i32 @subtarget_cloned(i32 noundef %0) #2 {
+    ret i32 0
+}
+
+; COM: should fixup this callsite since 2 is cloned for a subtarget
+; CHECK: define{{.*}}@call_subtarget_cloned({{.*}}#[[CALL_SUBTARGET_CLONED_DEFAULT_ATTRS:[0-9]+]]
+; CHECK-NEXT: [[FUNC_PTR:%[0-9]+]] = load{{.*}}@subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA:[0-9]+]], !invariant.load
+; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]
+; CHECK: ret i32
+define noundef i32 @call_subtarget_cloned(i32 noundef %0) #3 {
+    %2 = call noundef i32 @subtarget_cloned(i32 noundef %0)
+    ret i32 %2
+}
+
+; CHECK: define{{.*}}@call_subtarget_cloned_but_not_cloned({{.*}}#[[BORING_DEFAULT_ATTRS]]
+; CHECK-NEXT: [[FUNC_PTR:%[0-9]+]] = load{{.*}}@subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA]], !invariant.load
+; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]
+; CHECK: ret i32
+define noundef i32 @call_subtarget_cloned_but_not_cloned(i32 noundef %0) #0 {
+    %2 = call noundef i32 @subtarget_cloned(i32 noundef %0)
+    ret i32 %2
+}
+
+; CHECK: define{{.*}}@boring.1({{.*}}#[[BORING_CLONEALL_ATTRS:[0-9]+]]
+; CHECK-NEXT: ret i32 %0
+
+; CHECK: declare{{.*}}@declaration.1({{.*}}#[[DECLARATION_CLONEALL_ATTRS:[0-9]+]]
+
+; COM: should not fixup this callsite since boring is not cloned for a subtarget
+; COM: also should call boring.1 instead of boring
+; CHECK: define{{.*}}@call_boring.1({{.*}}#[[BORING_CLONEALL_ATTRS]]
+; CHECK-NEXT: %2 = call noundef i32 @boring.1(i32 noundef %0)
+
+; CHECK: define{{.*}}@call_declaration.1({{.*}}#[[DECLARATION_CLONEALL_ATTRS]]
+; CHECK-NEXT: %2 = call noundef i32 @declaration.1(i32 noundef %0)
+
+; CHECK: define{{.*}}@subtarget_cloned.1({{.*}}#[[SUBTARGET_CLONED_CLONEALL_ATTRS:[0-9]+]]
+; CHECK-NEXT: ret i32 0
+
+; CHECK: define{{.*}}@subtarget_cloned.2({{.*}}#[[SUBTARGET_CLONED_FASTMATH_ATTRS:[0-9]+]]
+; CHECK-NEXT: ret i32 0
+
+; COM: should *NOT* fixup this callsite since subtarget_cloned is not cloned for a subtarget of the cloneall
+; CHECK: define{{.*}}@call_subtarget_cloned.1({{.*}}#[[CALL_SUBTARGET_CLONED_CLONEALL_ATTRS:[0-9]+]]
+; CHECK-NEXT: %2 = call noundef i32 @subtarget_cloned.1(i32 noundef %0)
+
+; CHECK: define {{.*}}@call_subtarget_cloned.2({{.*}}#[[CALL_SUBTARGET_CLONED_FASTMATH_ATTRS:[0-9]+]]
+; CHECK-NEXT: %2 = call noundef i32 @subtarget_cloned.2(i32 noundef %0)
+
+; CHECK: define{{.*}}@call_subtarget_cloned_but_not_cloned.1({{.*}}#[[BORING_CLONEALL_ATTRS]]
+; CHECK-NEXT: %2 = call noundef i32 @subtarget_cloned.1(i32 noundef %0)
+
+; COM: should not have cloned for fastmath
+; CHECK-NOT: @subtarget_cloned_but_not_cloned.2
+
+; COM: check for alias being rewritten to a function trampoline
+; CHECK: define{{.*}}@subtarget_cloned_aliased{{.*}}#[[SUBTARGET_ALIASED_ATTRS:[0-9]+]]
+; CHECK-NOT: }
+; CHECK: [[FUNC_PTR:%[0-9]+]] = load{{.*}}@subtarget_cloned.reloc_slot{{.*}}!tbaa ![[TBAA_CONST_METADATA]], !invariant.load
+; CHECK-NEXT: call{{.*}}[[FUNC_PTR]]
+; CHECK: ret i32
+
+; CHECK: attributes #[[BORING_DEFAULT_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="2"
+; CHECK-DAG: "julia.mv.clone"="0"
+; CHECK-DAG: "target-cpu"="cpubase"
+; CHECK-DAG: "target-features"="nofeatures"
+; CHECK-SAME: }
+; CHECK: attributes #[[DECLARATION_DEFAULT_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="2"
+; CHECK-DAG: "julia.mv.clone"="0"
+; CHECK-DAG: "target-cpu"="cpubase"
+; CHECK-DAG: "target-features"="nofeatures"
+; CHECK-SAME: }
+; CHECK: attributes #[[SUBTARGET_CLONED_DEFAULT_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="6"
+; CHECK-DAG: "julia.mv.clone"="0"
+; CHECK-DAG: "target-cpu"="cpubase"
+; CHECK-DAG: "target-features"="nofeatures"
+; CHECK-DAG: "julia.mv.reloc"
+; CHECK-SAME: }
+; CHECK: attributes #[[CALL_SUBTARGET_CLONED_DEFAULT_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="6"
+; CHECK-DAG: "julia.mv.clone"="0"
+; CHECK-DAG: "target-cpu"="cpubase"
+; CHECK-DAG: "target-features"="nofeatures"
+; CHECK-SAME: }
+; CHECK: attributes #[[BORING_CLONEALL_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="2"
+; CHECK-DAG: "julia.mv.clone"="1"
+; CHECK-DAG: "target-cpu"="cpucloneall"
+; CHECK-DAG: "target-features"="cloneall"
+; CHECK-SAME: }
+; CHECK: attributes #[[DECLARATION_CLONEALL_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="2"
+; CHECK-DAG: "julia.mv.clone"="1"
+; CHECK-DAG: "target-cpu"="cpucloneall"
+; CHECK-DAG: "target-features"="cloneall"
+; CHECK-SAME: }
+; CHECK: attributes #[[SUBTARGET_CLONED_CLONEALL_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="6"
+; CHECK-DAG: "julia.mv.clone"="1"
+; CHECK-DAG: "target-cpu"="cpucloneall"
+; CHECK-DAG: "target-features"="cloneall"
+; CHECK-DAG: "julia.mv.reloc"
+; CHECK-SAME: }
+; CHECK: attributes #[[SUBTARGET_CLONED_FASTMATH_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="6"
+; CHECK-DAG: "julia.mv.clone"="2"
+; CHECK-DAG: "target-cpu"="cpufastmath"
+; CHECK-DAG: "target-features"="fastmathclone"
+; CHECK-DAG: "julia.mv.reloc"
+; CHECK-SAME: }
+; CHECK: attributes #[[CALL_SUBTARGET_CLONED_CLONEALL_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="6"
+; CHECK-DAG: "julia.mv.clone"="1"
+; CHECK-DAG: "target-cpu"="cpucloneall"
+; CHECK-DAG: "target-features"="cloneall"
+; CHECK-SAME: }
+; CHECK: attributes #[[CALL_SUBTARGET_CLONED_FASTMATH_ATTRS]]
+; CHECK-SAME: {
+; CHECK-DAG: "julia.mv.clones"="6"
+; CHECK-DAG: "julia.mv.clone"="2"
+; CHECK-DAG: "target-cpu"="cpufastmath"
+; CHECK-DAG: "target-features"="fastmathclone"
+; CHECK-SAME: }
+; CHECK: attributes #[[SUBTARGET_ALIASED_ATTRS]]
+; CHECK-SAME: {
+; CHECK-SAME: "julia.mv.alias"
+; CHECK-SAME: }
+attributes #0 = {"julia.mv.clones"="2"}
+attributes #1 = {"julia.mv.clones"="2" "test.unique"="1"}
+attributes #2 = {"julia.mv.clones"="6" "julia.mv.reloc"}
+attributes #3 = {"julia.mv.clones"="6"}
+
+!llvm.module.flags = !{!0, !1, !2}
+
+!0 = !{i32 1, !"julia.mv.enable", i32 1}
+!1 = !{i32 1, !"julia.mv.annotated", i32 1}
+!2 = !{i32 1, !"julia.mv.specs", !3}
+!3 = !{!4, !5, !6, !7, !8}
+!4 = !{!"cpubase", !"nofeatures", i32 0, i32 2}
+!5 = !{!"cpucloneall", !"cloneall", i32 0, i32 2}
+!6 = !{!"cpufastmath", !"fastmathclone", i32 0, i32 4}
+!7 = !{!"cpuloop", !"loopclone", i32 0, i32 8}
+!8 = !{!"cpusimd", !"simdclone", i32 0, i32 16}
+; CHECK-DAG: ![[TBAA_CONST_METADATA]] = !{![[JTBAA_CONST_METADATA:[0-9]+]], ![[JTBAA_CONST_METADATA]]
+; CHECK-DAG: ![[JTBAA_CONST_METADATA]] = !{!"jtbaa_const"
diff --git a/test/llvmpasses/multiversioning-x86.ll b/test/llvmpasses/multiversioning-x86.ll
new file mode 100644
index 0000000000000..ca43462e1eda9
--- /dev/null
+++ b/test/llvmpasses/multiversioning-x86.ll
@@ -0,0 +1,132 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning,CPUFeatures' -S %s | FileCheck %s
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning,CPUFeatures' -S %s | FileCheck %s
+
+; COM: This test checks that multiversioning actually happens from start to finish
+; COM: We need the fvars for a proper test
+
+
+
+; CHECK: @jl_fvar_idxs = hidden constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 16
+; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 16
+; TYPED: @simd_test.reloc_slot = hidden global i32 (<4 x i32>)* null
+; OPAQUE: @simd_test.reloc_slot = hidden global ptr null
+; TYPED: @jl_fvar_offsets = hidden constant [6 x i32] [i32 5, i32 0, i32 trunc (i64 sub (i64 ptrtoint (float (float, float)* @fastmath_test to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (i32)* @loop_test to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)* @simd_test to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)* @simd_test_call to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32)]
+; OPAQUE: @jl_fvar_offsets = hidden constant [6 x i32] [i32 5, i32 0, i32 trunc (i64 sub (i64 ptrtoint (ptr @fastmath_test to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @loop_test to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test_call to i64), i64 ptrtoint (ptr @boring to i64)) to i32)]
+; CHECK: @jl_gvar_base = hidden constant i64 0
+; CHECK: @jl_gvar_offsets = hidden constant [1 x i32] zeroinitializer
+; TYPED: @jl_clone_slots = hidden constant [3 x i32] [i32 1, i32 3, i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)** @simd_test.reloc_slot to i64), i64 ptrtoint (i64* @jl_gvar_base to i64)) to i32)]
+; OPAQUE: @jl_clone_slots = hidden constant [3 x i32] [i32 1, i32 3, i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test.reloc_slot to i64), i64 ptrtoint (ptr @jl_gvar_base to i64)) to i32)]
+; CHECK: @jl_clone_idxs = hidden constant [10 x i32] [i32 -2147483647, i32 3, i32 -2147483647, i32 3, i32 4, i32 1, i32 1, i32 2, i32 -2147483645, i32 4]
+; TYPED: @jl_clone_offsets = hidden constant [9 x i32] [i32 trunc (i64 sub (i64 ptrtoint (i32 (i32)* @boring.1 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (float (float, float)* @fastmath_test.1 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (i32)* @loop_test.1 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)* @simd_test.1 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)* @simd_test_call.1 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (float (float, float)* @fastmath_test.2 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (i32)* @loop_test.2 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)* @simd_test.2 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i32 (<4 x i32>)* @simd_test_call.2 to i64), i64 ptrtoint (i32 (i32)* @boring to i64)) to i32)]
+; OPAQUE: @jl_clone_offsets = hidden constant [9 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr @boring.1 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @fastmath_test.1 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @loop_test.1 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test.1 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test_call.1 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @fastmath_test.2 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @loop_test.2 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test.2 to i64), i64 ptrtoint (ptr @boring to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @simd_test_call.2 to i64), i64 ptrtoint (ptr @boring to i64)) to i32)]
+; TYPED: @jl_fvar_base = hidden alias i64, bitcast (i32 (i32)* @boring to i64*)
+; OPAQUE: @jl_fvar_base = hidden alias i64, ptr @boring
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+target triple = "x86_64-linux-gnu"
+
+@jl_fvars = global [5 x i64*] [i64* bitcast (i32 (i32)* @boring to i64*),
+                               i64* bitcast (float (float, float)* @fastmath_test to i64*),
+                               i64* bitcast (i32 (i32)* @loop_test to i64*),
+                               i64* bitcast (i32 (<4 x i32>)* @simd_test to i64*),
+                               i64* bitcast (i32 (<4 x i32>)* @simd_test_call to i64*)
+                              ], align 16
+@jl_gvars = global [0 x i64*] zeroinitializer, align 16
+@jl_fvar_idxs = hidden constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 16
+@jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer, align 16
+
+declare i1 @julia.cpu.have_fma.f32()
+
+; CHECK: @boring{{.*}}#[[BORING_BASE:[0-9]+]]
+define noundef i32 @boring(i32 noundef %0) {
+  ret i32 %0
+}
+
+; CHECK: @fastmath_test{{.*}}#[[NOT_BORING_BASE:[0-9]+]]
+; CHECK: %3 = sitofp i1 false to float
+define noundef float @fastmath_test(float noundef %0, float noundef %1) {
+  %3 = call i1 @julia.cpu.have_fma.f32()
+  %4 = sitofp i1 %3 to float
+  %5 = fadd fast float %0, %4
+  ret float %5
+}
+
+; CHECK: @loop_test{{.*}}#[[NOT_BORING_BASE:[0-9]+]]
+define noundef i32 @loop_test(i32 noundef %0) {
+  %2 = icmp sgt i32 %0, 0
+  br i1 %2, label %5, label %3
+
+3:                                                ; preds = %5, %1
+  %4 = phi i32 [ 0, %1 ], [ %9, %5 ]
+  ret i32 %4
+
+5:                                                ; preds = %1, %5
+  %6 = phi i32 [ %10, %5 ], [ 0, %1 ]
+  %7 = phi i32 [ %9, %5 ], [ 0, %1 ]
+  %8 = lshr i32 %6, 1
+  %9 = add nuw nsw i32 %8, %7
+  %10 = add nuw nsw i32 %6, 1
+  %11 = icmp eq i32 %10, %0
+  br i1 %11, label %3, label %5;, !llvm.loop -
+}
+
+; CHECK: @simd_test{{.*}}#[[SIMD_BASE_RELOC:[0-9]+]]
+define noundef i32 @simd_test(<4 x i32> noundef %0) {
+  %2 = extractelement <4 x i32> %0, i64 0
+  ret i32 %2
+}
+
+; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_BASE:[0-9]+]]
+define noundef i32 @simd_test_call(<4 x i32> noundef %0) {
+  %2 = call noundef i32 @simd_test(<4 x i32> noundef %0)
+  ret i32 %2
+}
+
+; CHECK: @boring{{.*}}#[[BORING_CLONE:[0-9]+]]
+
+; CHECK: @fastmath_test{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
+; CHECK: %3 = sitofp i1 false to float
+
+; CHECK: @fastmath_test{{.*}}#[[NOT_BORING_CLONE2:[0-9]+]]
+; CHECK: %3 = sitofp i1 true to float
+
+; CHECK: @loop_test{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
+
+; CHECK: @loop_test{{.*}}#[[NOT_BORING_CLONE2:[0-9]+]]
+
+; CHECK: @simd_test{{.*}}#[[SIMD_CLONE1:[0-9]+]]
+
+; CHECK: @simd_test{{.*}}#[[SIMD_CLONE2:[0-9]+]]
+
+; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_CLONE1:[0-9]+]]
+; TYPED: %2 = load i32 (<4 x i32>)*, i32 (<4 x i32>)** @simd_test.reloc_slot, align 8, !tbaa !8, !invariant.load !12
+; OPAQUE: %2 = load ptr, ptr @simd_test.reloc_slot, align 8, !tbaa !8, !invariant.load !12
+; CHECK: %3 = call noundef i32 %2(<4 x i32> noundef %0)
+
+; CHECK: @simd_test_call{{.*}}#[[NOT_BORING_CLONE2:[0-9]+]]
+; CHECK: %2 = call noundef i32 @simd_test.2(<4 x i32> noundef %0)
+
+; CHECK-DAG: attributes #[[BORING_BASE]] = { "julia.mv.clone"="0" "julia.mv.clones"="2" "julia.mv.fvar" "target-cpu"="x86-64" "target-features"="+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[NOT_BORING_BASE]] = { "julia.mv.clone"="0" "julia.mv.clones"="6" "julia.mv.fvar" "target-cpu"="x86-64" "target-features"="+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[SIMD_BASE_RELOC]] = { "julia.mv.clone"="0" "julia.mv.clones"="6" "julia.mv.reloc" "target-cpu"="x86-64" "target-features"="+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[BORING_CLONE]] = { "julia.mv.clone"="1" "julia.mv.clones"="2" "julia.mv.fvar" "target-cpu"="sandybridge" "target-features"="+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[NOT_BORING_CLONE1]] = { "julia.mv.clone"="1" "julia.mv.clones"="6" "julia.mv.fvar" "target-cpu"="sandybridge" "target-features"="+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[NOT_BORING_CLONE2]] =  { "julia.mv.clone"="2" "julia.mv.clones"="6" "julia.mv.fvar" "target-cpu"="haswell" "target-features"="+lzcnt,+sahf,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-aes,-rdrnd,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[SIMD_CLONE1]] = { "julia.mv.clone"="1" "julia.mv.clones"="6" "julia.mv.reloc" "target-cpu"="sandybridge" "target-features"="+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+; CHECK-DAG: attributes #[[SIMD_CLONE2]] = { "julia.mv.clone"="2" "julia.mv.clones"="6" "julia.mv.reloc" "target-cpu"="haswell" "target-features"="+lzcnt,+sahf,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-aes,-rdrnd,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+
+
+!llvm.module.flags = !{!0, !2}
+
+
+!0 = !{i32 1, !"julia.mv.enable", i32 1}
+!1 = !{!1}
+!2 = !{i32 1, !"julia.mv.specs", !3}
+!3 = !{!4, !5, !6}
+!4 = !{!"x86-64", !"+cx16,-sse3,-pclmul,-ssse3,-fma,-sse4.1,-sse4.2,-movbe,-popcnt,-aes,-xsave,-avx,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sahf,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8", i32 0, i32 0}
+!5 = !{!"sandybridge", !"+sahf,+avx,+xsave,+popcnt,+sse4.2,+sse4.1,+cx16,+ssse3,+pclmul,+sse3,-fma,-movbe,-aes,-f16c,-rdrnd,-fsgsbase,-bmi,-avx2,-bmi2,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-lzcnt,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8", i32 0, i32 2}
+!6 = !{!"haswell", !"+lzcnt,+sahf,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-aes,-rdrnd,-rtm,-avx512f,-avx512dq,-rdseed,-adx,-avx512ifma,-clflushopt,-clwb,-avx512pf,-avx512er,-avx512cd,-sha,-avx512bw,-avx512vl,-prefetchwt1,-avx512vbmi,-pku,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-uintr,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-avx512fp16,-amx-tile,-amx-int8,-sse4a,-prfchw,-xop,-fma4,-tbm,-mwaitx,-xsaveopt,-xsavec,-xsaves,-clzero,-wbnoinvd,-avxvnni,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8", i32 1, i32 284}
diff --git a/test/llvmpasses/names.jl b/test/llvmpasses/names.jl
new file mode 100644
index 0000000000000..344738abc3802
--- /dev/null
+++ b/test/llvmpasses/names.jl
@@ -0,0 +1,186 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll
+# RUN: cat %t/module.ll | FileCheck %s
+
+## Notes:
+# This script uses the `emit` function (defined llvmpasses.jl) to emit either
+# optimized or unoptimized LLVM IR. Each function is emitted individually and
+# `llvm-link` is used to create a single module that can be passed to opt.
+# The order in which files are emitted and linked is important since `lit` will
+# process the test cases in order.
+
+include(joinpath("..", "testhelpers", "llvmpasses.jl"))
+
+# COM: check basic parameter names
+function f1(a, b, c, d)
+    return a + b + c + d
+end
+
+# COM: check basic parameter names + varargs
+function f2(a, b, c, d, e...)
+    return a + b + c + d + sum(e)
+end
+
+mutable struct D
+    i::Int64
+end
+struct C
+    d::D
+end
+struct B
+    c::C
+end
+struct A
+    b::B
+end
+
+# COM: check getfield/setfield names
+function f5(a)
+    a.b.c.d.i = 0
+    return a.b.c.d
+end
+
+struct H end
+struct G
+    h::Ref{H}
+end
+struct F
+    g::Ref{G}
+end
+struct E
+    f::Ref{F}
+end
+
+# COM: check gc lowering names
+function f6(e)
+    return e.f[].g[].h[]
+end
+
+# COM: check getfield for Tuples
+function f7(a)
+    return a[2]
+end
+
+# COM: check write barrier names and struct names
+mutable struct Barrier
+    b
+end
+
+# COM: check write barrier names
+function f8(b,y)
+    b.b = y
+    return b
+end
+
+struct Named
+    x::Int
+end
+
+function fmemory(nel)
+    return Memory{Int64}(undef,nel)
+end
+# CHECK-LABEL: define {{(swiftcc )?}}double @julia_f1
+# CHECK-SAME: double %"a::Float64"
+# CHECK-SAME: double %"b::Float64"
+# CHECK-SAME: double %"c::Float64"
+# CHECK-SAME: double %"d::Float64"
+
+# CHECK: fadd double
+# CHECK-DAG: %"a::Float64"
+# CHECK-DAG: %"b::Float64"
+# CHECK-DAG: fadd double
+# CHECK-DAG: %"c::Float64"
+# CHECK-DAG: fadd double
+# CHECK-DAG: %"d::Float64"
+# CHECK: ret double
+# CHECK: }
+
+# CHECK-LABEL: define nonnull {} addrspace(10)* @jfptr_f1
+# CHECK-SAME: %"function::Core.Function"
+# CHECK-SAME: %"args::Any[]"
+# CHECK-SAME: %"nargs::UInt32"
+# CHECK: %"+Core.Float64
+# CHECK: ret {} addrspace(10)*
+# CHECK: }
+emit(f1, Float64, Float64, Float64, Float64)
+
+# CHECK: define {{(swiftcc )?}}double @julia_f2
+# CHECK-SAME: double %"a::Float64"
+# CHECK-SAME: double %"b::Float64"
+# CHECK-SAME: double %"c::Float64"
+# CHECK-SAME: double %"d::Float64"
+# CHECK-SAME: double %"e[1]::Float64"
+emit(f2, Float64, Float64, Float64, Float64, Float64)
+
+# CHECK: define {{(swiftcc )?}}double @julia_f2
+# CHECK-SAME: double %"a::Float64"
+# CHECK-SAME: double %"b::Float64"
+# CHECK-SAME: double %"c::Float64"
+# CHECK-SAME: double %"d::Float64"
+# CHECK-SAME: double %"e[1]::Float64"
+# CHECK-SAME: double %"e[2]::Float64"
+emit(f2, Float64, Float64, Float64, Float64, Float64, Float64)
+
+
+# CHECK: define {{(swiftcc )?}}double @julia_f2
+# CHECK-SAME: double %"a::Float64"
+# CHECK-SAME: double %"b::Float64"
+# CHECK-SAME: double %"c::Float64"
+# CHECK-SAME: double %"d::Float64"
+# CHECK-SAME: double %"e[1]::Float64"
+# CHECK-SAME: double %"e[2]::Float64"
+# CHECK-SAME: double %"e[3]::Float64"
+emit(f2, Float64, Float64, Float64, Float64, Float64, Float64, Float64)
+
+# CHECK: define {{(swiftcc )?}}nonnull {} addrspace(10)* @julia_f5
+# CHECK-SAME: %"a::A"
+# CHECK: %"a::A.b_ptr.c_ptr.d
+emit(f5, A)
+
+# CHECK: define {{(swiftcc )?}}nonnull {} addrspace(10)* @julia_f6
+# CHECK-SAME: %"e::E"
+# CHECK: %jlcallframe
+# CHECK: %gcframe
+# CHECK: %frame.nroots
+# CHECK: %frame.prev
+# CHECK: %task.gcstack
+# CHECK: %ptls_field
+# CHECK: %ptls_load
+# CHECK: %safepoint
+# CHECK: %"e::E.f_ptr"
+# CHECK: %"e::E.f"
+# CHECK: %"e::E.f.tag_addr"
+# CHECK: %"e::E.f.tag"
+# CHECK: @"+Main.Base.RefValue
+# CHECK: %gc_slot_addr_0
+# CHECK: @"jl_sym#g
+# CHECK: @"jl_sym#h
+emit(f6, E)
+
+
+# CHECK: define {{(swiftcc )?}}i64 @julia_f7
+# CHECK-SAME: %"a::Tuple"
+# CHECK: %"a::Tuple[2]_ptr.unbox
+emit(f7, Tuple{Int,Int})
+
+# CHECK: define {{(swiftcc )?}}nonnull {} addrspace(10)* @julia_f8
+# CHECK-SAME: %"y::Int64"
+# CHECK: %parent_bits
+# CHECK: %parent_old_marked
+# CHECK: %child_bit
+# CHECK: %child_not_marked
+emit(f8, Barrier, Int)
+
+# CHECK: define {{(swiftcc )?}}nonnull {} addrspace(10)* @julia_Barrier
+# CHECK-SAME: %"b::Named"
+# CHECK: %"new::Barrier"
+# CHECK: %"box::Named"
+# CHECK: %parent_bits
+# CHECK: %parent_old_marked
+emit(Barrier, Named)
+
+# CHECK: define {{(swiftcc )?}}nonnull {} addrspace(10)* @julia_fmemory
+# CHECK-SAME: %"nel::Int64"
+# CHECK: %"Memory{Int64}[]"
+emit(fmemory, Int64)
diff --git a/test/llvmpasses/parsing.ll b/test/llvmpasses/parsing.ll
new file mode 100644
index 0000000000000..6a5909ff5fd40
--- /dev/null
+++ b/test/llvmpasses/parsing.ll
@@ -0,0 +1,7 @@
+; COM: NewPM-only test, tests for ability to parse Julia passes
+
+; RUN: opt --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,FinalLowerGC,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(LowerSIMDLoop,JuliaLICM),GCInvariantVerifier<strong>,GCInvariantVerifier<no-strong>),LowerPTLSPass<imaging>,LowerPTLSPass<no-imaging>,JuliaMultiVersioning<external>,JuliaMultiVersioning<no-external>)' -S %s -o /dev/null
+
+define void @test() {
+    ret void
+}
diff --git a/test/llvmpasses/pipeline-o0.jl b/test/llvmpasses/pipeline-o0.jl
new file mode 100644
index 0000000000000..e48a5f7df111f
--- /dev/null
+++ b/test/llvmpasses/pipeline-o0.jl
@@ -0,0 +1,40 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
+
+# RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
+# RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
+# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
+# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
+
+# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
+
+# RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
+# RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
+# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
+# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
+
+include(joinpath("..", "testhelpers", "llvmpasses.jl"))
+
+# CHECK-LABEL: @julia_simple
+# CHECK-NOT: julia.get_pgcstack
+# CHECK-NOT: julia.gc_alloc_obj
+# CHECK: ijl_gc_pool_alloc
+# COM: we want something vaguely along the lines of asm load from the fs register -> allocate bytes
+function simple()
+    Ref(0)
+end
+
+# CHECK-LABEL: @julia_buildarray
+# CHECK-NOT: julia.write_barrier
+# CHECK: gc_queue_root
+function buildarray()
+    out = []
+    for i in 1:100
+        push!(out, Ref(0))
+    end
+    out
+end
+
+emit(simple)
+emit(buildarray)
diff --git a/test/llvmpasses/pipeline-o2-allocs.jl b/test/llvmpasses/pipeline-o2-allocs.jl
new file mode 100644
index 0000000000000..86ab9125f2f27
--- /dev/null
+++ b/test/llvmpasses/pipeline-o2-allocs.jl
@@ -0,0 +1,74 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
+
+# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
+# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
+
+# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
+
+# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
+# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s
+
+include(joinpath("..", "testhelpers", "llvmpasses.jl"))
+
+# COM: This tests that simplifycfg is still hoisting allocations in different basic blocks
+# COM: into the parent basic block, and deduplicating them in the process
+# CHECK-LABEL: @julia_split
+# CHECK: alloc
+# CHECK-NOT: alloc
+# CHECK: ret
+function split(maybe)
+    if maybe
+        Ref(1)
+    else
+        Ref(2)
+    end
+end
+
+# COM: This tests that irrespective of the condition outside the loop
+# COM: allocations inside the loop are hoisted and the loop is deleted
+# CHECK-LABEL: @julia_loop_alloc
+# CHECK: phi
+# CHECK-NOT: phi
+function loop_alloc(N)
+    ref = Ref(zero(typeof(N)))
+    N <= zero(typeof(N)) && return ref
+    for i in one(typeof(N)):N
+        ref = Ref(i)
+    end
+    ref
+end
+
+# COM: This tests that even with the allocation LLVM will recognize
+# COM: that the loop is meaningless and delete it
+# CHECK-LABEL: @julia_loop_const
+# CHECK-NOT: br
+function loop_const()
+    ref = Ref(0)
+    for i in 1:1000
+        ref = Ref(0)
+    end
+    ref
+end
+
+# COM: This tests that the GC.@preserve macro is being ignored since ref
+# COM: is not used anywhere else
+# CHECK-LABEL: @julia_nopreserve
+# CHECK-NOT: alloc
+# CHECK-NOT: julia.gc_preserve_begin
+# CHECK-NOT: julia.gc_preserve_end
+function nopreserve()
+    ref = Ref(0)
+    GC.@preserve ref begin
+    end
+end
+
+# COM: this cordons off the attributes/function declarations from the actual
+# COM: IR that we really want to check
+# CHECK: attributes
+
+emit(split, Bool)
+emit(loop_alloc, Int64)
+emit(loop_const)
+emit(nopreserve)
diff --git a/test/llvmpasses/pipeline-o2-broadcast.jl b/test/llvmpasses/pipeline-o2-broadcast.jl
new file mode 100644
index 0000000000000..83a4450522c79
--- /dev/null
+++ b/test/llvmpasses/pipeline-o2-broadcast.jl
@@ -0,0 +1,130 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
+
+# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
+# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
+
+# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
+
+# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
+# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s
+
+include(joinpath("..", "testhelpers", "llvmpasses.jl"))
+
+# COM: Check broadcasted outer product is vectorized
+
+# COM: Float32
+# CHECK: @japi1_prod_v_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
+# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x float>
+# CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x float>
+
+# COM: Float64
+# CHECK: @japi1_prod_v_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
+# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x double>
+# CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x double>
+
+# COM: Int32
+# CHECK: @japi1_prod_v_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
+# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i32>
+# CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i32>
+
+# COM: Int64
+# CHECK: @japi1_prod_v_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
+# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i64>
+# CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i64>
+
+function prod_v_vT(R, x, y)
+    R .= x .* y'
+end
+
+# COM: Check broadcasted inner product is vectorized
+
+# COM: Float32
+# CHECK: @japi1_prod_vT_v
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
+# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x float>
+# CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x float>
+
+# COM: Float64
+# CHECK: @japi1_prod_vT_v
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
+# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x double>
+# CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x double>
+
+# COM: Int32
+# CHECK: @japi1_prod_vT_v
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
+# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i32>
+# CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i32>
+
+# COM: Int64
+# CHECK: @japi1_prod_vT_v
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
+# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i64>
+# CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
+# CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i64>
+
+function prod_vT_v(R, x, y)
+    R .= x' .* y
+end
+
+# COM: Check broadcasted multiplications are vectorized
+
+# COM: Float32
+# CHECK: @japi1_prod_v_M_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
+# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x float>
+# XFAIL-CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x float>
+# XFAIL-CHECK: store <[[VSCALE]][[VEC_FACTOR]] x float>
+
+# COM: Float64
+# CHECK: @japi1_prod_v_M_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
+# COM: fmul <[[VSCALE]][[VEC_FACTOR]] x double>
+# XFAIL-CHECK: fmul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x double>
+# XFAIL-CHECK: store <[[VSCALE]][[VEC_FACTOR]] x double>
+
+# COM: Int32
+# CHECK: @japi1_prod_v_M_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
+# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i32>
+# XFAIL-CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i32>
+# XFAIL-CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i32>
+
+# COM: Int64
+# CHECK: @japi1_prod_v_M_vT
+# COM: load <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
+# COM: mul <[[VSCALE]][[VEC_FACTOR]] x i64>
+# XFAIL-CHECK: mul <[[VSCALE:(vscale x )?]][[VEC_FACTOR:[0-9]+]] x i64>
+# XFAIL-CHECK: store <[[VSCALE]][[VEC_FACTOR]] x i64>
+
+function prod_v_M_vT(R, x, M, y)
+    R .= x .* M .* y'
+end
+
+emit(prod_v_vT, Matrix{Float32}, Vector{Float32}, Vector{Float32})
+emit(prod_v_vT, Matrix{Float64}, Vector{Float64}, Vector{Float64})
+emit(prod_v_vT, Matrix{Int32}, Vector{Int32}, Vector{Int32})
+emit(prod_v_vT, Matrix{Int64}, Vector{Int64}, Vector{Int64})
+
+emit(prod_vT_v, Matrix{Float32}, Vector{Float32}, Vector{Float32})
+emit(prod_vT_v, Matrix{Float64}, Vector{Float64}, Vector{Float64})
+emit(prod_vT_v, Matrix{Int32}, Vector{Int32}, Vector{Int32})
+emit(prod_vT_v, Matrix{Int64}, Vector{Int64}, Vector{Int64})
+
+emit(prod_v_M_vT, Matrix{Float32}, Vector{Float32}, Matrix{Float32}, Vector{Float32})
+emit(prod_v_M_vT, Matrix{Float64}, Vector{Float64}, Matrix{Float64}, Vector{Float64})
+emit(prod_v_M_vT, Matrix{Int32}, Vector{Int32}, Matrix{Int32}, Vector{Int32})
+emit(prod_v_M_vT, Matrix{Int64}, Vector{Int64}, Matrix{Int64}, Vector{Int64})
diff --git a/test/llvmpasses/pipeline-o2.jl b/test/llvmpasses/pipeline-o2.jl
new file mode 100644
index 0000000000000..3ce2f692fc32e
--- /dev/null
+++ b/test/llvmpasses/pipeline-o2.jl
@@ -0,0 +1,165 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0"
+
+# RUNx: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
+# RUNx: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
+
+# RUN: julia --startup-file=no -O2 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
+# RUN: julia --startup-file=no -O3 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
+
+# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO
+# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO
+
+# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1"
+
+# RUNx: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
+# RUNx: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL
+
+# RUN: julia --startup-file=no -O2 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
+# RUN: julia --startup-file=no -O3 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF
+
+# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO
+# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO
+
+include(joinpath("..", "testhelpers", "llvmpasses.jl"))
+
+# COM: Ensure safe iteration over one array is not boundschecked and is vectorized
+
+# ALL-LABEL: @julia_iterate_read
+# ALL-NOT: bounds_error
+# ALL: vector.body
+function iterate_read(arr)
+    total = zero(eltype(arr))
+    for i in eachindex(arr)
+        total += arr[i]
+    end
+    total
+end
+
+# ALL-LABEL: @julia_iterate_write
+# ALL-NOT: bounds_error
+# ALL: vector.body
+function iterate_write(arr, out)
+    for i in eachindex(arr, out)
+        out[i] = arr[i]
+    end
+end
+
+# ALL-LABEL: @"julia_iterate_write!
+# ALL-NOT: bounds_error
+# ALL: vector.body
+function iterate_write!(arr)
+    for i in eachindex(arr)
+        arr[i] *= 2
+    end
+end
+
+# COM: Ensure safe iteration over multiple arrays is not boundschecked and is vectorized
+
+# ALL-LABEL: @julia_multiiterate_read
+# ALL-NOT: bounds_error
+# ALL: vector.body
+function multiiterate_read(arr1, arr2)
+    total = zero(eltype(arr1))
+    for i in eachindex(arr1, arr2)
+        total += arr1[i]
+        total += arr2[i]
+    end
+    total
+end
+
+# ALL-LABEL: @japi1_multiiterate_write
+# ALL-NOT: bounds_error
+# ALL: vector.body
+function multiiterate_write(arr1, arr2, arr3)
+    for i in eachindex(arr1, arr2, arr3)
+        arr3[i] += arr1[i]
+        arr3[i] += arr2[i]
+    end
+end
+
+# ALL-LABEL: @"julia_multiiterate_write!
+# ALL-NOT: bounds_error
+# ALL: vector.body
+function multiiterate_write!(arr1, arr2)
+    for i in eachindex(arr1, arr2)
+        arr1[i] += arr2[i]
+    end
+end
+
+# COM: memset checks
+
+# COM: INT64
+# ALL: define {{.*}} @julia_zeros
+# ALL-NOT: bounds_error
+# COM: memset is not used with bounds checks on (too late in the pipeline)
+# BC_OFF: llvm.memset
+# BC_AUTO: llvm.memset
+
+# COM: INT32
+# ALL: define {{.*}} @julia_zeros
+# ALL-NOT: bounds_error
+# COM: memset is not used with bounds checks on (too late in the pipeline)
+# BC_OFF: llvm.memset
+# BC_AUTO: llvm.memset
+
+# COM: INT16
+# ALL: define {{.*}} @julia_zeros
+# ALL-NOT: bounds_error
+# COM: memset is not used with bounds checks on (too late in the pipeline)
+# BC_OFF: llvm.memset
+# BC_AUTO: llvm.memset
+
+# COM: check reductive indvars/vectorization
+
+# ALL-LABEL: @julia_sumloop
+# ALL: mul
+function sumloop(N)
+    total = zero(typeof(N))
+    for i in one(typeof(N)):N
+        total += i
+    end
+    total
+end
+# ALL-LABEL: @julia_simd_sumloop
+# ALL: vector.body
+function simd_sumloop(N)
+    total = zero(typeof(N))
+    @simd for i in one(typeof(N)):N
+        total += i
+    end
+    total
+end
+
+# COM: check hoisting and loop deletion functionality
+
+# ALL-LABEL: @julia_loopedlength
+# ALL-NOT: br
+# ALL: ret
+function loopedlength(arr)
+    len = length(arr)
+    for i in 1:length(arr)
+        len = length(arr)
+    end
+    len
+end
+
+emit(iterate_read, Vector{Int64})
+emit(iterate_write, Vector{Int64}, Vector{Int64})
+emit(iterate_write!, Vector{Int64})
+
+emit(multiiterate_read, Vector{Int64}, Vector{Int64})
+emit(multiiterate_write, Vector{Int64}, Vector{Int64}, Vector{Int64})
+emit(multiiterate_write!, Vector{Int64}, Vector{Int64})
+
+emit(zeros, Type{Int64}, Int64)
+emit(zeros, Type{Int32}, Int64)
+emit(zeros, Type{Int16}, Int64)
+# COM: Int8 is hardcoded to memset anyways
+
+emit(sumloop, Int64)
+# COM: Float64 doesn't vectorize for some reason
+emit(simd_sumloop, Float32)
+
+emit(loopedlength, Vector{Int64})
diff --git a/test/llvmpasses/pipeline-prints.ll b/test/llvmpasses/pipeline-prints.ll
new file mode 100644
index 0000000000000..0c0d81420d9fe
--- /dev/null
+++ b/test/llvmpasses/pipeline-prints.ll
@@ -0,0 +1,335 @@
+; COM: This is a newpm-only test, no legacypm command
+; COM: we run all the prefixes even though some don't have tests because we want to make sure they don't crash
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION
+
+; ModuleID = 'f'
+source_filename = "f"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @julia_f_199({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0) #0 !dbg !4 {
+top:
+  %x = alloca {} addrspace(10)*, align 8
+  %1 = call {}*** @julia.get_pgcstack()
+  store {} addrspace(10)* null, {} addrspace(10)** %x, align 8
+  %2 = bitcast {}*** %1 to {}**
+  %current_task = getelementptr inbounds {}*, {}** %2, i64 -14
+  %3 = bitcast {}** %current_task to i64*
+  %world_age = getelementptr inbounds i64, i64* %3, i64 15
+  store {} addrspace(10)* %0, {} addrspace(10)** %x, align 8
+  %4 = bitcast {}*** %1 to {}**
+  %current_task1 = getelementptr inbounds {}*, {}** %4, i64 -14
+  %ptls_field = getelementptr inbounds {}*, {}** %current_task1, i64 16
+  %ptls_load = load {}*, {}** %ptls_field, align 8, !tbaa !8
+  %ptls = bitcast {}* %ptls_load to {}**
+  %5 = bitcast {}** %ptls to i64**
+  %6 = getelementptr inbounds i64*, i64** %5, i64 2
+  %safepoint = load i64*, i64** %6, align 8, !tbaa !12, !invariant.load !7
+  fence syncscope("singlethread") seq_cst
+  call void @julia.safepoint(i64* %safepoint), !dbg !14
+  fence syncscope("singlethread") seq_cst
+  %7 = load {} addrspace(10)*, {} addrspace(10)** %x, align 8, !dbg !15, !nonnull !7, !dereferenceable !23, !align !24
+  %8 = addrspacecast {} addrspace(10)* %7 to {} addrspace(11)*, !dbg !15
+  %9 = bitcast {} addrspace(11)* %8 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !15
+  %10 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %9, i32 0, i32 1, !dbg !15
+  %11 = load i64, i64 addrspace(11)* %10, align 8, !dbg !15, !tbaa !12, !range !25, !invariant.load !7, !alias.scope !26, !noalias !29
+  %12 = icmp sle i64 0, %11, !dbg !34
+  %13 = icmp ult i64 0, %11, !dbg !42
+  %14 = and i1 %12, %13, !dbg !43
+  %15 = zext i1 %14 to i8, !dbg !18
+  %16 = trunc i8 %15 to i1, !dbg !18
+  %17 = xor i1 %16, true, !dbg !18
+  br i1 %17, label %L12, label %L9, !dbg !18
+
+L9:                                               ; preds = %top
+  %18 = load {} addrspace(10)*, {} addrspace(10)** %x, align 8, !dbg !46, !nonnull !7, !dereferenceable !23, !align !24
+  %19 = addrspacecast {} addrspace(10)* %18 to {} addrspace(11)*, !dbg !46
+  %20 = bitcast {} addrspace(11)* %19 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !46
+  %21 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %20, i32 0, i32 0, !dbg !46
+  %22 = load i8 addrspace(13)*, i8 addrspace(13)* addrspace(11)* %21, align 8, !dbg !46, !tbaa !12, !invariant.load !7, !alias.scope !26, !noalias !29, !nonnull !7
+  %23 = bitcast i8 addrspace(13)* %22 to i64 addrspace(13)*, !dbg !46
+  %24 = getelementptr inbounds i64, i64 addrspace(13)* %23, i64 0, !dbg !46
+  %25 = load i64, i64 addrspace(13)* %24, align 8, !dbg !46, !tbaa !48, !alias.scope !51, !noalias !52
+  br label %L13, !dbg !18
+
+L12:                                              ; preds = %top
+  br label %L13, !dbg !18
+
+L13:                                              ; preds = %L12, %L9
+  %value_phi = phi i8 [ 0, %L9 ], [ 1, %L12 ]
+  %value_phi2 = phi i64 [ %25, %L9 ], [ undef, %L12 ]
+  %value_phi3 = phi i64 [ 2, %L9 ], [ undef, %L12 ]
+  br label %L17, !dbg !21
+
+L17:                                              ; preds = %L13
+  %26 = trunc i8 %value_phi to i1, !dbg !22
+  %27 = xor i1 %26, true, !dbg !22
+  %28 = zext i1 %27 to i8, !dbg !22
+  %29 = trunc i8 %28 to i1, !dbg !22
+  %30 = xor i1 %29, true, !dbg !22
+  br i1 %30, label %L17.L41_crit_edge, label %L17.L19_crit_edge, !dbg !22
+
+L17.L41_crit_edge:                                ; preds = %L17
+  br label %L41, !dbg !53
+
+L17.L19_crit_edge:                                ; preds = %L17
+  br label %L19, !dbg !18
+
+L19:                                              ; preds = %L17.L19_crit_edge, %L40
+  %value_phi4 = phi i64 [ %value_phi2, %L17.L19_crit_edge ], [ %value_phi7, %L40 ]
+  %value_phi5 = phi i64 [ %value_phi3, %L17.L19_crit_edge ], [ %value_phi8, %L40 ]
+  %value_phi6 = phi i64 [ 0, %L17.L19_crit_edge ], [ %31, %L40 ]
+  %31 = add i64 %value_phi6, %value_phi4, !dbg !55
+  %32 = sub i64 %value_phi5, 1, !dbg !58
+  %33 = load {} addrspace(10)*, {} addrspace(10)** %x, align 8, !dbg !61, !nonnull !7, !dereferenceable !23, !align !24
+  %34 = addrspacecast {} addrspace(10)* %33 to {} addrspace(11)*, !dbg !61
+  %35 = bitcast {} addrspace(11)* %34 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !61
+  %36 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %35, i32 0, i32 1, !dbg !61
+  %37 = load i64, i64 addrspace(11)* %36, align 8, !dbg !61, !tbaa !12, !range !25, !invariant.load !7, !alias.scope !26, !noalias !29
+  %38 = icmp sle i64 0, %37, !dbg !62
+  %39 = icmp ult i64 %32, %37, !dbg !65
+  %40 = and i1 %38, %39, !dbg !66
+  %41 = zext i1 %40 to i8, !dbg !53
+  %42 = trunc i8 %41 to i1, !dbg !53
+  %43 = xor i1 %42, true, !dbg !53
+  br i1 %43, label %L34, label %L31, !dbg !53
+
+L31:                                              ; preds = %L19
+  %44 = load {} addrspace(10)*, {} addrspace(10)** %x, align 8, !dbg !67, !nonnull !7, !dereferenceable !23, !align !24
+  %45 = sub i64 %value_phi5, 1, !dbg !67
+  %46 = mul i64 %45, 1, !dbg !67
+  %47 = add i64 0, %46, !dbg !67
+  %48 = addrspacecast {} addrspace(10)* %44 to {} addrspace(11)*, !dbg !67
+  %49 = bitcast {} addrspace(11)* %48 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !67
+  %50 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %49, i32 0, i32 0, !dbg !67
+  %51 = load i8 addrspace(13)*, i8 addrspace(13)* addrspace(11)* %50, align 8, !dbg !67, !tbaa !12, !invariant.load !7, !alias.scope !26, !noalias !29, !nonnull !7
+  %52 = bitcast i8 addrspace(13)* %51 to i64 addrspace(13)*, !dbg !67
+  %53 = getelementptr inbounds i64, i64 addrspace(13)* %52, i64 %47, !dbg !67
+  %54 = load i64, i64 addrspace(13)* %53, align 8, !dbg !67, !tbaa !48, !alias.scope !51, !noalias !52
+  %55 = add i64 %value_phi5, 1, !dbg !68
+  br label %L35, !dbg !53
+
+L34:                                              ; preds = %L19
+  br label %L35, !dbg !53
+
+L35:                                              ; preds = %L34, %L31
+  %value_phi7 = phi i64 [ %54, %L31 ], [ undef, %L34 ]
+  %value_phi8 = phi i64 [ %55, %L31 ], [ undef, %L34 ]
+  %value_phi9 = phi i8 [ 0, %L31 ], [ 1, %L34 ]
+  %56 = trunc i8 %value_phi9 to i1, !dbg !54
+  %57 = xor i1 %56, true, !dbg !54
+  %58 = zext i1 %57 to i8, !dbg !54
+  %59 = trunc i8 %58 to i1, !dbg !54
+  %60 = xor i1 %59, true, !dbg !54
+  br i1 %60, label %L35.L41_crit_edge, label %L40, !dbg !54
+
+L35.L41_crit_edge:                                ; preds = %L35
+  br label %L41, !dbg !53
+
+L40:                                              ; preds = %L35
+  br label %L19, !dbg !18
+
+L41:                                              ; preds = %L17.L41_crit_edge, %L35.L41_crit_edge
+  %value_phi10 = phi i64 [ %31, %L35.L41_crit_edge ], [ 0, %L17.L41_crit_edge ]
+  ret i64 %value_phi10, !dbg !69
+}
+
+; Function Attrs: noinline optnone
+define nonnull {} addrspace(10)* @jfptr_f_200({} addrspace(10)* %0, {} addrspace(10)** noalias nocapture noundef readonly %1, i32 %2) #1 {
+top:
+  %3 = call {}*** @julia.get_pgcstack()
+  %4 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %1, i32 0
+  %5 = load {} addrspace(10)*, {} addrspace(10)** %4, align 8, !tbaa !12, !invariant.load !7, !alias.scope !26, !noalias !29, !nonnull !7, !dereferenceable !23, !align !24
+  %6 = call i64 @julia_f_199({} addrspace(10)* %5)
+  %7 = call nonnull {} addrspace(10)* @ijl_box_int64(i64 signext %6)
+  ret {} addrspace(10)* %7
+}
+
+declare {}*** @julia.get_pgcstack()
+
+declare nonnull {} addrspace(10)* @ijl_box_int64(i64 signext)
+
+; Function Attrs: inaccessiblemem_or_argmemonly
+declare void @julia.safepoint(i64*) #2
+
+attributes #0 = { "frame-pointer"="all" "probe-stack"="inline-asm" }
+attributes #1 = { noinline optnone "frame-pointer"="all" "probe-stack"="inline-asm" }
+attributes #2 = { inaccessiblemem_or_argmemonly }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.dbg.cu = !{!2}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, nameTableKind: GNU)
+!3 = !DIFile(filename: "julia", directory: ".")
+!4 = distinct !DISubprogram(name: "f", linkageName: "julia_f_199", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
+!5 = !DIFile(filename: "REPL[2]", directory: ".")
+!6 = !DISubroutineType(types: !7)
+!7 = !{}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"jtbaa_gcframe", !10, i64 0}
+!10 = !{!"jtbaa", !11, i64 0}
+!11 = !{!"jtbaa"}
+!12 = !{!13, !13, i64 0, i64 1}
+!13 = !{!"jtbaa_const", !10, i64 0}
+!14 = !DILocation(line: 1, scope: !4)
+!15 = !DILocation(line: 10, scope: !16, inlinedAt: !18)
+!16 = distinct !DISubprogram(name: "length;", linkageName: "length", scope: !17, file: !17, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
+!17 = !DIFile(filename: "essentials.jl", directory: ".")
+!18 = !DILocation(line: 943, scope: !19, inlinedAt: !21)
+!19 = distinct !DISubprogram(name: "iterate;", linkageName: "iterate", scope: !20, file: !20, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
+!20 = !DIFile(filename: "array.jl", directory: ".")
+!21 = !DILocation(line: 943, scope: !19, inlinedAt: !22)
+!22 = !DILocation(line: 3, scope: !4)
+!23 = !{i64 40}
+!24 = !{i64 16}
+!25 = !{i64 0, i64 9223372036854775807}
+!26 = !{!27}
+!27 = !{!"jnoalias_const", !28}
+!28 = !{!"jnoalias"}
+!29 = !{!30, !31, !32, !33}
+!30 = !{!"jnoalias_gcframe", !28}
+!31 = !{!"jnoalias_stack", !28}
+!32 = !{!"jnoalias_data", !28}
+!33 = !{!"jnoalias_typemd", !28}
+!34 = !DILocation(line: 514, scope: !35, inlinedAt: !37)
+!35 = distinct !DISubprogram(name: "<=;", linkageName: "<=", scope: !36, file: !36, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
+!36 = !DIFile(filename: "int.jl", directory: ".")
+!37 = !DILocation(line: 423, scope: !38, inlinedAt: !40)
+!38 = distinct !DISubprogram(name: ">=;", linkageName: ">=", scope: !39, file: !39, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
+!39 = !DIFile(filename: "operators.jl", directory: ".")
+!40 = !DILocation(line: 520, scope: !41, inlinedAt: !18)
+!41 = distinct !DISubprogram(name: "<;", linkageName: "<", scope: !36, file: !36, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
+!42 = !DILocation(line: 513, scope: !41, inlinedAt: !40)
+!43 = !DILocation(line: 38, scope: !44, inlinedAt: !40)
+!44 = distinct !DISubprogram(name: "&;", linkageName: "&", scope: !45, file: !45, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
+!45 = !DIFile(filename: "bool.jl", directory: ".")
+!46 = !DILocation(line: 13, scope: !47, inlinedAt: !18)
+!47 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !17, file: !17, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
+!48 = !{!49, !49, i64 0}
+!49 = !{!"jtbaa_arraybuf", !50, i64 0}
+!50 = !{!"jtbaa_data", !10, i64 0}
+!51 = !{!32}
+!52 = !{!30, !31, !33, !27}
+!53 = !DILocation(line: 943, scope: !19, inlinedAt: !54)
+!54 = !DILocation(line: 5, scope: !4)
+!55 = !DILocation(line: 87, scope: !56, inlinedAt: !57)
+!56 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !36, file: !36, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
+!57 = !DILocation(line: 4, scope: !4)
+!58 = !DILocation(line: 86, scope: !59, inlinedAt: !60)
+!59 = distinct !DISubprogram(name: "-;", linkageName: "-", scope: !36, file: !36, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7)
+!60 = !DILocation(line: 1068, scope: !59, inlinedAt: !53)
+!61 = !DILocation(line: 10, scope: !16, inlinedAt: !53)
+!62 = !DILocation(line: 514, scope: !35, inlinedAt: !63)
+!63 = !DILocation(line: 423, scope: !38, inlinedAt: !64)
+!64 = !DILocation(line: 520, scope: !41, inlinedAt: !53)
+!65 = !DILocation(line: 513, scope: !41, inlinedAt: !64)
+!66 = !DILocation(line: 38, scope: !44, inlinedAt: !64)
+!67 = !DILocation(line: 13, scope: !47, inlinedAt: !53)
+!68 = !DILocation(line: 87, scope: !56, inlinedAt: !53)
+!69 = !DILocation(line: 6, scope: !4)
+
+; BEFOREEARLYSIMPLIFICATION: IR Dump Before BeforeEarlySimplification
+; AFTEREARLYSIMPLIFICATION: IR Dump Before AfterEarlySimplification
+; BEFOREEARLYOPTIMIZATION: IR Dump Before BeforeEarlyOptimization
+; AFTEREARLYOPTIMIZATION: IR Dump Before AfterEarlyOptimization
+; BEFORELOOPOPTIMIZATION: IR Dump Before BeforeLoopOptimization
+; BEFORELICM: IR Dump Before BeforeLICM
+; AFTERLICM: IR Dump Before AfterLICM
+; BEFORELOOPSIMPLIFICATION: IR Dump Before BeforeLoopSimplification
+; AFTERLOOPSIMPLIFICATION: IR Dump Before AfterLoopSimplification
+; AFTERLOOPOPTIMIZATION: IR Dump Before AfterLoopOptimization
+; BEFORESCALAROPTIMIZATION: IR Dump Before BeforeScalarOptimization
+; AFTERSCALAROPTIMIZATION: IR Dump Before AfterScalarOptimization
+; BEFOREVECTORIZATION: IR Dump Before BeforeVectorization
+; AFTERVECTORIZATION: IR Dump Before AfterVectorization
+; BEFOREINTRINSICLOWERING: IR Dump Before BeforeIntrinsicLowering
+; AFTERINTRINSICLOWERING: IR Dump Before AfterIntrinsicLowering
+; BEFORECLEANUP: IR Dump Before BeforeCleanup
+; AFTERCLEANUP: IR Dump Before AfterCleanup
+; AFTEROPTIMIZATION: IR Dump Before AfterOptimization
+
+; COM: simplifycfg should have killed this block
+; BEFOREOPTIMIZATION: L17.L41_crit_edge:                                ; preds = %L17
+; BEFOREOPTIMIZATION-NEXT: br label %L41, !dbg !53
+
+; BEFOREEARLYSIMPLIFICATION: L17.L41_crit_edge:                          ; preds = %L17
+; BEFOREEARLYSIMPLIFICATION-NEXT: br label %L41, !dbg !53
+
+; AFTEREARLYSIMPLIFICATION-NOT: L17.L41_crit_edge:                           ; preds = %L17
+; AFTEREARLYSIMPLIFICATION-NOT: br label %L41, !dbg !53
+
+; BEFOREEARLYOPTIMIZATION-NOT: L17.L41_crit_edge:                           ; preds = %L17
+; BEFOREEARLYOPTIMIZATION-NOT: br label %L41, !dbg !53
+
+
+; COM: InstSimplify/InstCombine should kill this zext-trunc pair
+; AFTEREARLYSIMPLIFICATION: [[ZEXT:%.*]] = zext i1 {{%.*}} to i8
+; AFTEREARLYSIMPLIFICATION-NEXT: trunc i8 [[ZEXT]] to i1
+
+; BEFOREEARLYOPTIMIZATION: [[ZEXT:%.*]] = zext i1 {{%.*}} to i8
+; BEFOREEARLYOPTIMIZATION-NEXT: trunc i8 [[ZEXT]] to i1
+
+; AFTEREARLYOPTIMIZATION-NOT: zext i1 {{%.*}} to i8
+; AFTEREARLYOPTIMIZATION-NOT: trunc i8 {{%.*}} to i1
+
+; BEFORELOOPOPTIMIZATION-NOT: zext i1 {{%.*}} to i8
+; BEFORELOOPOPTIMIZATION-NOT: trunc i8 {{%.*}} to i1
+
+; COM: Loop simplification makes the exit condition obvious
+; AFTERLOOPSIMPLIFICATION: L35.lr.ph:
+; AFTERLOOPSIMPLIFICATION-NEXT: add nuw nsw
+
+; COM: Scalar optimization removes the previous add from the preheader
+; AFTERSCALAROPTIMIZATION: L35.preheader:
+; AFTERSCALAROPTIMIZATION-NOT: add nuw nsw
+; AFTERSCALAROPTIMIZATION-NEXT: br label %L35
+
+; COM: Vectorization does stuff
+; AFTERVECTORIZATION: vector.body
+; AFTERVECTORIZATION: llvm.vector.reduce.add
+
+; COM: Intrinsics are lowered and cleaned up by the time optimization is finished
+; AFTEROPTIMIZATION-NOT: call void @julia.safepoint
+; AFTEROPTIMIZATION: load volatile i64{{.*}}%safepoint
\ No newline at end of file
diff --git a/test/llvmpasses/propagate-addrspace-non-zero.ll b/test/llvmpasses/propagate-addrspace-non-zero.ll
new file mode 100644
index 0000000000000..e90d6f97abe14
--- /dev/null
+++ b/test/llvmpasses/propagate-addrspace-non-zero.ll
@@ -0,0 +1,67 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
+
+target triple = "amdgcn-amd-amdhsa"
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
+
+define i64 @simple() {
+; CHECK-LABEL: @simple
+; CHECK-NOT: addrspace(11)
+    %stack = alloca i64, addrspace(5)
+    %casted = addrspacecast i64 addrspace(5)*%stack to i64 addrspace(11)*
+    %loaded = load i64, i64 addrspace(11)* %casted
+    ret i64 %loaded
+}
+
+define i64 @twogeps() {
+; CHECK-LABEL: @twogeps
+; CHECK-NOT: addrspace(11)
+    %stack = alloca i64, addrspace(5)
+    %casted = addrspacecast i64 addrspace(5)*%stack to i64 addrspace(11)*
+    %gep1 = getelementptr i64, i64 addrspace(11)* %casted, i64 1
+    %gep2 = getelementptr i64, i64 addrspace(11)* %gep1, i64 1
+    %loaded = load i64, i64 addrspace(11)* %gep2
+    ret i64 %loaded
+}
+
+define i64 @phi(i1 %cond) {
+; CHECK-LABEL: @phi
+; CHECK-NOT: addrspace(11)
+top:
+    %stack1 = alloca i64, addrspace(5)
+    %stack2 = alloca i64, addrspace(5)
+    %stack1_casted = addrspacecast i64 addrspace(5)*%stack1 to i64 addrspace(11)*
+    %stack2_casted = addrspacecast i64 addrspace(5)*%stack2 to i64 addrspace(11)*
+    br i1 %cond, label %A, label %B
+A:
+    br label %B
+B:
+    %phi = phi i64 addrspace(11)* [ %stack1_casted, %top ], [ %stack2_casted, %A ]
+    %load = load i64, i64 addrspace(11)* %phi
+    ret i64 %load
+}
+
+
+define i64 @select(i1 %cond) {
+; CHECK-LABEL: @select
+; CHECK-NOT: addrspace(11)
+top:
+    %stack1 = alloca i64, addrspace(5)
+    %stack2 = alloca i64, addrspace(5)
+    %stack1_casted = addrspacecast i64 addrspace(5)*%stack1 to i64 addrspace(11)*
+    %stack2_casted = addrspacecast i64 addrspace(5)*%stack2 to i64 addrspace(11)*
+    %select = select i1 %cond, i64 addrspace(11)* %stack1_casted, i64 addrspace(11)* %stack2_casted
+    %load = load i64, i64 addrspace(11)* %select
+    ret i64 %load
+}
+
+define i64 @nullptr() {
+; CHECK-LABEL: @nullptr
+; CHECK-NOT: addrspace(11)
+    %casted = addrspacecast i64 addrspace(5)*null to i64 addrspace(11)*
+    %load = load i64, i64 addrspace(11)* %casted
+    ret i64 %load
+}
diff --git a/test/llvmpasses/propagate-addrspace.ll b/test/llvmpasses/propagate-addrspace.ll
index 84ad33310ab3f..aa3bce3760540 100644
--- a/test/llvmpasses/propagate-addrspace.ll
+++ b/test/llvmpasses/propagate-addrspace.ll
@@ -1,5 +1,8 @@
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
 
 define i64 @simple() {
 ; CHECK-LABEL: @simple
diff --git a/test/llvmpasses/refinements.ll b/test/llvmpasses/refinements.ll
index cb2dea816c56b..c92d45300fb81 100644
--- a/test/llvmpasses/refinements.ll
+++ b/test/llvmpasses/refinements.ll
@@ -1,5 +1,8 @@
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
 
 
 declare {}*** @julia.ptls_states()
@@ -7,6 +10,7 @@ declare {}*** @julia.get_pgcstack()
 declare void @jl_safepoint()
 declare void @one_arg_boxed({} addrspace(10)*)
 declare {} addrspace(10)* @ijl_box_int64(i64)
+declare {} addrspace(10)* @allocate_some_value()
 
 define void @argument_refinement({} addrspace(10)* %a) {
 ; CHECK-LABEL: @argument_refinement
@@ -24,13 +28,15 @@ define void @argument_refinement({} addrspace(10)* %a) {
 ; Check that we reuse the gc slot from the box
 define void @heap_refinement1(i64 %a) {
 ; CHECK-LABEL: @heap_refinement1
-; CHECK:   %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED:   %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE:   %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
-; CHECK: store {} addrspace(10)* %aboxed
+; TYPED: store {} addrspace(10)* %aboxed
+; OPAQUE: store ptr addrspace(10) %aboxed
     call void @jl_safepoint()
     %casted2 = bitcast {} addrspace(10)* %loaded1 to i64 addrspace(10)*
     %loaded2 = load i64, i64 addrspace(10)* %casted2
@@ -41,42 +47,51 @@ define void @heap_refinement1(i64 %a) {
 ; Check that we don't root the allocated value here, just the derived value
 define void @heap_refinement2(i64 %a) {
 ; CHECK-LABEL: @heap_refinement2
-; CHECK:   %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED:   %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE:   %gcframe = alloca ptr addrspace(10), i32 3
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
-; CHECK: store {} addrspace(10)* %loaded1
+; TYPED: store {} addrspace(10)* %loaded1
+; OPAQUE: store ptr addrspace(10) %loaded1
     call void @jl_safepoint()
     %casted2 = bitcast {} addrspace(10)* %loaded1 to i64 addrspace(10)*
     %loaded2 = load i64, i64 addrspace(10)* %casted2
     ret void
 }
 
-declare {} addrspace(10)* @allocate_some_value()
-
 ; Check that the way we compute rooting is compatible with refinements
 define void @issue22770() {
 ; CHECK-LABEL: @issue22770
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
     %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %y = call {} addrspace(10)* @allocate_some_value()
     %casted1 = bitcast {} addrspace(10)* %y to {} addrspace(10)* addrspace(10)*
     %x = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
-; CHECK: store {} addrspace(10)* %y,
+; TYPED: store {} addrspace(10)* %y,
+; OPAQUE: store ptr addrspace(10) %y,
     %a = call {} addrspace(10)* @allocate_some_value()
-; CHECK: store {} addrspace(10)* %a
-; CHECK: call void @one_arg_boxed({} addrspace(10)* %x)
-; CHECK: call void @one_arg_boxed({} addrspace(10)* %a)
-; CHECK: call void @one_arg_boxed({} addrspace(10)* %y)
+; TYPED: store {} addrspace(10)* %a
+; TYPED: call void @one_arg_boxed({} addrspace(10)* %x)
+; TYPED: call void @one_arg_boxed({} addrspace(10)* %a)
+; TYPED: call void @one_arg_boxed({} addrspace(10)* %y)
+
+; OPAQUE: store ptr addrspace(10) %a
+; OPAQUE: call void @one_arg_boxed(ptr addrspace(10) %x)
+; OPAQUE: call void @one_arg_boxed(ptr addrspace(10) %a)
+; OPAQUE: call void @one_arg_boxed(ptr addrspace(10) %y)
     call void @one_arg_boxed({} addrspace(10)* %x)
     call void @one_arg_boxed({} addrspace(10)* %a)
     call void @one_arg_boxed({} addrspace(10)* %y)
-; CHECK: store {} addrspace(10)* %x
+; TYPED: store {} addrspace(10)* %x
+; OPAQUE: store ptr addrspace(10) %x
     %c = call {} addrspace(10)* @allocate_some_value()
-; CHECK: store {} addrspace(10)* %c
+; TYPED: store {} addrspace(10)* %c
+; OPAQUE: store ptr addrspace(10) %c
     call void @one_arg_boxed({} addrspace(10)* %x)
     call void @one_arg_boxed({} addrspace(10)* %c)
     ret void
@@ -106,7 +121,8 @@ L3:
 
 define void @dont_refine_loop({} addrspace(10)* %x) {
 ; CHECK-LABEL: @dont_refine_loop
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
@@ -149,7 +165,8 @@ L2:
 
 define void @refine_loop_indirect({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_indirect
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
@@ -174,7 +191,8 @@ L2:
 
 define void @refine_loop_indirect2({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_indirect2
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3
 top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
diff --git a/test/llvmpasses/remove-addrspaces.ll b/test/llvmpasses/remove-addrspaces.ll
index 77a8a5e815057..90043a7d85cf4 100644
--- a/test/llvmpasses/remove-addrspaces.ll
+++ b/test/llvmpasses/remove-addrspaces.ll
@@ -1,6 +1,15 @@
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s
+; This file is a part of Julia. License is MIT: https://julialang.org/license
 
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+
+
+; COM: check that package image fptrs work
+@pjlsys_BoundsError_32 = internal global {} addrspace(10)* ({}***, {} addrspace(10)*, [1 x i64] addrspace(11)*)* null
+; CHECK: @pjlsys_BoundsError_32 = internal global
+; TYPED-SAME: {}* ({}***, {}*, [1 x i64]*)* null
+; OPAQUE-SAME: ptr null
 
 define i64 @getindex({} addrspace(10)* nonnull align 16 dereferenceable(40)) {
 ; CHECK-LABEL: @getindex
@@ -32,7 +41,8 @@ top:
 define nonnull {} addrspace(10)* @constexpr(i64) {
 ; CHECK-LABEL: @constexpr
 top:
-; CHECK: call {}* inttoptr (i64 139806640486784 to {}* ({}*, i64)*)({}* inttoptr (i64 139806425039920 to {}*), i64 1)
+; TYPED: call {}* inttoptr (i64 139806640486784 to {}* ({}*, i64)*)({}* inttoptr (i64 139806425039920 to {}*), i64 1)
+; OPAQUE: call ptr inttoptr (i64 139806640486784 to ptr)(ptr inttoptr (i64 139806425039920 to ptr), i64 1)
   %1 = call {} addrspace(10)* inttoptr (i64 139806640486784 to {} addrspace(10)* ({} addrspace(10)*, i64)*)({} addrspace(10)* addrspacecast ({}* inttoptr (i64 139806425039920 to {}*) to {} addrspace(10)*), i64 1)
 ; CHECK-NOT: addrspacecast
 ; CHECK-NOT: addrspace
@@ -47,7 +57,7 @@ top:
 %list = type { i64, %list* }
 
 ; COM: There's nothing to remove in this function; but remove-addrspaces shouldn't crash.
-define i64 @sum.linked.list() #0 {
+define i64 @sum.linked.list() {
 ; CHECK-LABEL: @sum.linked.list
 top:
   %a = alloca %list
@@ -61,23 +71,23 @@ top:
   %c.cdr = getelementptr %list, %list* %c, i32 0, i32 1
 ; COM: Allow remove-addrspaces to rename the type but expect it to use the same prefix.
 ; CHECK: getelementptr %list
-; CHECK-SAME: %list
-; CHECK-SAME: * %a
+; TYPED-SAME: %list* %a
+; OPAQUE-SAME: ptr %a
 ; CHECK: getelementptr %list
-; CHECK-SAME: %list
-; CHECK-SAME: * %a
+; TYPED-SAME: %list* %a
+; OPAQUE-SAME: ptr %a
 ; CHECK: getelementptr %list
-; CHECK-SAME: %list
-; CHECK-SAME: * %b
+; TYPED-SAME: %list* %b
+; OPAQUE-SAME: ptr %b
 ; CHECK: getelementptr %list
-; CHECK-SAME: %list
-; CHECK-SAME: * %b
+; TYPED-SAME: %list* %b
+; OPAQUE-SAME: ptr %b
 ; CHECK: getelementptr %list
-; CHECK-SAME: %list
-; CHECK-SAME: * %c
+; TYPED-SAME: %list* %c
+; OPAQUE-SAME: ptr %c
 ; CHECK: getelementptr %list
-; CHECK-SAME: %list
-; CHECK-SAME: * %c
+; TYPED-SAME: %list* %c
+; OPAQUE-SAME: ptr %c
   store i64 111, i64* %a.car
   store i64 222, i64* %b.car
   store i64 333, i64* %c.car
@@ -106,6 +116,13 @@ exit:
 
 ; COM: check that address spaces in byval types are processed correctly
 define void @byval_type([1 x {} addrspace(10)*] addrspace(11)* byval([1 x {} addrspace(10)*]) %0) {
-; CHECK: define void @byval_type([1 x {}*]* byval([1 x {}*]) %0)
+; TYPED: define void @byval_type([1 x {}*]* byval([1 x {}*]) %0)
+; OPAQUE: define void @byval_type(ptr byval([1 x ptr]) %0)
   ret void
 }
+
+
+; COM: check that function attributes are preserved on declarations too
+declare void @convergent_function() #0
+attributes #0 = { convergent }
+; CHECK: attributes #0 = { convergent }
diff --git a/test/llvmpasses/returnstwicegc.ll b/test/llvmpasses/returnstwicegc.ll
index 17791d630d61a..d99c0aa02f85b 100644
--- a/test/llvmpasses/returnstwicegc.ll
+++ b/test/llvmpasses/returnstwicegc.ll
@@ -1,5 +1,8 @@
-; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s
-; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=TYPED
+
+; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=OPAQUE
 
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
@@ -12,7 +15,8 @@ declare void @one_arg_boxed({} addrspace(10)*)
 define void @try_catch(i64 %a, i64 %b)
 {
 ; Because of the returns_twice function, we need to keep aboxed live everywhere
-; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4
+; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
 top:
     %sigframe = alloca [208 x i8], align 16
     %sigframe.sub = getelementptr inbounds [208 x i8], [208 x i8]* %sigframe, i64 0, i64 0
diff --git a/test/llvmpasses/safepoint_stress.jl b/test/llvmpasses/safepoint_stress.jl
index dc6752e76d595..c1867decc7f25 100644
--- a/test/llvmpasses/safepoint_stress.jl
+++ b/test/llvmpasses/safepoint_stress.jl
@@ -1,7 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S - | FileCheck %s
-# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S - | FileCheck %s
+# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S - | FileCheck %s
 
 
 println("""
diff --git a/test/loading.jl b/test/loading.jl
index dd9aa66da196f..826107c8aab87 100644
--- a/test/loading.jl
+++ b/test/loading.jl
@@ -1,16 +1,18 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+original_depot_path = copy(Base.DEPOT_PATH)
+
 using Test
 
 # Tests for @__LINE__ inside and outside of macros
-@test (@__LINE__) == 6
+@test (@__LINE__) == 8
 
 macro macro_caller_lineno()
-    @test 9 == (@__LINE__) != __source__.line > 12
+    @test 11 == (@__LINE__) != __source__.line > 14
     return __source__.line
 end
 
-@test @macro_caller_lineno() == (@__LINE__) > 12
+@test @macro_caller_lineno() == (@__LINE__) > 14
 
 # @__LINE__ in a macro expands to the location of the macrocall in the source
 # while __source__.line is the location of the macro caller
@@ -40,6 +42,7 @@ thefname = "the fname!//\\&\1*"
 include_string_test_func = include_string(@__MODULE__, "include_string_test() = @__FILE__", thefname)
 @test include_string_test_func() == thefname
 @test include_string(@__MODULE__, "Base.source_path()", thefname) == Base.source_path()
+@test isdir(Base.source_dir())
 @test basename(@__FILE__) == "loading.jl"
 @test isabspath(@__FILE__)
 
@@ -57,7 +60,7 @@ let exename = `$(Base.julia_cmd()) --compiled-modules=yes --startup-file=no --co
     @test !endswith(s_dir, Base.Filesystem.path_separator)
 end
 
-@test Base.in_sysimage(Base.PkgId(Base.UUID("cf7118a7-6976-5b1a-9a39-7adc72f591a4"), "UUIDs"))
+@test Base.in_sysimage(Base.PkgId(Base.UUID("8f399da3-3557-5675-b5ff-fb832c97cbdb"), "Libdl"))
 @test Base.in_sysimage(Base.PkgId(Base.UUID("3a7fdc7e-7467-41b4-9f64-ea033d046d5b"), "NotAPackage")) == false
 
 ## Unit tests for safe file operations ##
@@ -660,7 +663,7 @@ finally
     Base.set_active_project(old_act_proj)
     popfirst!(LOAD_PATH)
 end
-@test Base.pkgorigins[Base.PkgId(UUID("69145d58-7df6-11e8-0660-cf7622583916"), "TestPkg")].version == v"1.2.3"
+@test pkgversion(TestPkg) == v"1.2.3"
 
 @testset "--project and JULIA_PROJECT paths should be absolutified" begin
     mktempdir() do dir; cd(dir) do
@@ -671,10 +674,10 @@ end
         cd("foo")
         @test Base.active_project() == old
         """
-        @test success(`$(Base.julia_cmd()) --startup-file=no --project=foo -e $(script)`)
-        withenv("JULIA_PROJECT" => "foo") do
-            @test success(`$(Base.julia_cmd()) --startup-file=no -e $(script)`)
-        end
+        cmd = `$(Base.julia_cmd()) --startup-file=no -e $(script)`
+        cmd = addenv(cmd, "JULIA_PROJECT" => "foo")
+        cmd = pipeline(cmd; stdout, stderr)
+        @test success(cmd)
     end; end
 end
 
@@ -687,15 +690,18 @@ mktempdir() do dir
     vdir = vdir[2:end] # remove @
     vpath = joinpath(dir, "environments", vdir)
     mkpath(vpath)
-    withenv("JULIA_DEPOT_PATH" => dir) do
-        script = "@assert startswith(Base.active_project(), $(repr(vpath)))"
-        @test success(`$(Base.julia_cmd()) --startup-file=no -e $(script)`)
-    end
+    script = "@assert startswith(Base.active_project(), $(repr(vpath)))"
+    cmd = `$(Base.julia_cmd()) --startup-file=no -e $(script)`
+    cmd = addenv(cmd,
+        "JULIA_DEPOT_PATH" => dir,
+        "JULIA_LOAD_PATH" => Sys.iswindows() ? ";" : ":")
+    cmd = pipeline(cmd; stdout, stderr)
+    @test success(cmd)
 end
 
 @testset "expansion of JULIA_LOAD_PATH" begin
     s = Sys.iswindows() ? ';' : ':'
-    tmp = "/foo/bar"
+    tmp = "/this/does/not/exist"
     cases = Dict{Any,Vector{String}}(
         nothing => Base.DEFAULT_LOAD_PATH,
         "" => [],
@@ -704,16 +710,17 @@ end
         "$s$tmp" => [Base.DEFAULT_LOAD_PATH; tmp],
         )
     for (env, result) in pairs(cases)
-        withenv("JULIA_LOAD_PATH" => env) do
-            script = "LOAD_PATH == $(repr(result)) || error()"
-            @test success(`$(Base.julia_cmd()) --startup-file=no -e $script`)
-        end
+        script = "LOAD_PATH == $(repr(result)) || error()"
+        cmd = `$(Base.julia_cmd()) --startup-file=no -e $script`
+        cmd = addenv(cmd, "JULIA_LOAD_PATH" => env)
+        cmd = pipeline(cmd; stdout, stderr)
+        @test success(cmd)
     end
 end
 
 @testset "expansion of JULIA_DEPOT_PATH" begin
     s = Sys.iswindows() ? ';' : ':'
-    tmp = "/foo/bar"
+    tmp = "/this/does/not/exist"
     DEFAULT = Base.append_default_depot_path!(String[])
     cases = Dict{Any,Vector{String}}(
         nothing => DEFAULT,
@@ -723,20 +730,33 @@ end
         "$s$tmp" => [DEFAULT; tmp],
         )
     for (env, result) in pairs(cases)
-        withenv("JULIA_DEPOT_PATH" => env) do
-            script = "DEPOT_PATH == $(repr(result)) || error()"
-            @test success(`$(Base.julia_cmd()) --startup-file=no -e $script`)
-        end
+        script = "DEPOT_PATH == $(repr(result)) || error()"
+        cmd = `$(Base.julia_cmd()) --startup-file=no -e $script`
+        cmd = addenv(cmd, "JULIA_DEPOT_PATH" => env)
+        cmd = pipeline(cmd; stdout, stderr)
+        @test success(cmd)
     end
 end
 
+@testset "Issue #25719" begin
+    empty!(LOAD_PATH)
+    @test Base.root_module(Core, :Core) == Core
+    push!(LOAD_PATH, "@stdlib")
+    @test Base.root_module(Base, :Test) == Test
+    @test_throws KeyError(:SomeNonExistentPackage) Base.root_module(Base, :SomeNonExistentPackage)
+end
+
 ## cleanup after tests ##
 
 for env in keys(envs)
     rm(env, force=true, recursive=true)
 end
 for depot in depots
-    rm(depot, force=true, recursive=true)
+    try
+        rm(depot, force=true, recursive=true)
+    catch err
+        @show err
+    end
 end
 
 append!(empty!(LOAD_PATH), saved_load_path)
@@ -775,8 +795,10 @@ end
 @testset "`Base.project_names` and friends" begin
     # Some functions in Pkg assumes that these tuples have the same length
     n = length(Base.project_names)
-    @test length(Base.manifest_names) == n
     @test length(Base.preferences_names) == n
+
+    # there are two manifest names per project name
+    @test length(Base.manifest_names) == 2n
 end
 
 @testset "Manifest formats" begin
@@ -805,14 +827,43 @@ end
     end
 end
 
+@testset "Manifest name preferential loading" begin
+    mktempdir() do tmp
+        proj = joinpath(tmp, "Project.toml")
+        touch(proj)
+        for man_name in (
+            "Manifest.toml",
+            "JuliaManifest.toml",
+            "Manifest-v$(VERSION.major).$(VERSION.minor).toml",
+            "JuliaManifest-v$(VERSION.major).$(VERSION.minor).toml"
+            )
+            touch(joinpath(tmp, man_name))
+            man = basename(Base.project_file_manifest_path(proj))
+            @test man == man_name
+        end
+    end
+    mktempdir() do tmp
+        # check that another version isn't preferred
+        proj = joinpath(tmp, "Project.toml")
+        touch(proj)
+        touch(joinpath(tmp, "Manifest-v1.5.toml"))
+        @test Base.project_file_manifest_path(proj) == nothing
+        touch(joinpath(tmp, "Manifest.toml"))
+        man = basename(Base.project_file_manifest_path(proj))
+        @test man == "Manifest.toml"
+    end
+end
+
 @testset "error message loading pkg bad module name" begin
     mktempdir() do tmp
         old_loadpath = copy(LOAD_PATH)
         try
             push!(LOAD_PATH, tmp)
             write(joinpath(tmp, "BadCase.jl"), "module badcase end")
-            @test_throws ErrorException("package `BadCase` did not define the expected module `BadCase`, \
-                                        check for typos in package module name") (@eval using BadCase)
+            @test_logs (:warn, r"The call to compilecache failed.*") match_mode=:any begin
+                @test_throws ErrorException("package `BadCase` did not define the expected module `BadCase`, \
+                    check for typos in package module name") (@eval using BadCase)
+            end
         finally
             copy!(LOAD_PATH, old_loadpath)
         end
@@ -920,3 +971,517 @@ end
         end
     end
 end
+
+
+@testset "Loading with incomplete manifest/depot #45977" begin
+    mktempdir() do tmp
+        # Set up a stacked env.
+        cp(joinpath(@__DIR__, "depot"), joinpath(tmp, "depot"))
+
+        mkdir(joinpath(tmp, "Env1"))
+        mkdir(joinpath(tmp, "Global"))
+
+        for env in ["Env1", "Global"]
+            write(joinpath(tmp, env, "Project.toml"), """
+            [deps]
+            Baz = "6801f525-dc68-44e8-a4e8-cabd286279e7"
+            """)
+        end
+
+        write(joinpath(tmp, "Global", "Manifest.toml"), """
+            [[Baz]]
+            uuid = "6801f525-dc68-44e8-a4e8-cabd286279e7"
+            git-tree-sha1 = "efc7e24c53d6a328011975294a2c75fed2f9800a"
+            """)
+
+        # This SHA does not exist in the depot.
+        write(joinpath(tmp, "Env1", "Manifest.toml"), """
+            [[Baz]]
+            uuid = "6801f525-dc68-44e8-a4e8-cabd286279e7"
+            git-tree-sha1 = "5f2f6e72d001b014b48b26ec462f3714c342e167"
+            """)
+
+
+        old_load_path = copy(LOAD_PATH)
+        old_depot_path = copy(DEPOT_PATH)
+        try
+            empty!(LOAD_PATH)
+            push!(empty!(DEPOT_PATH), joinpath(tmp, "depot"))
+
+            push!(LOAD_PATH, joinpath(tmp, "Global"))
+
+            pkg = Base.identify_package("Baz")
+            # Package in manifest in current env not present in depot
+            @test Base.locate_package(pkg) !== nothing
+
+            @test Base.find_package("Baz") !== nothing  # coverage
+
+            pushfirst!(LOAD_PATH, joinpath(tmp, "Env1"))
+
+            @test Base.locate_package(pkg) === nothing
+
+            write(joinpath(tmp, "Env1", "Manifest.toml"), """
+            """)
+            # Package in current env not present in manifest
+            pkg, env = Base.identify_package_env("Baz")
+            @test Base.locate_package(pkg, env) === nothing
+        finally
+            copy!(LOAD_PATH, old_load_path)
+            copy!(DEPOT_PATH, old_depot_path)
+        end
+    end
+end
+
+@testset "Extensions" begin
+    depot_path = mktempdir()
+    try
+        proj = joinpath(@__DIR__, "project", "Extensions", "HasDepWithExtensions.jl")
+
+        function gen_extension_cmd(compile, distr=false)
+            load_distr = distr ? "using Distributed; addprocs(1)" : ""
+            ew = distr ? "@everywhere" : ""
+            cmd = """
+            $load_distr
+            begin
+                $ew push!(empty!(DEPOT_PATH), $(repr(depot_path)))
+                using HasExtensions
+                $ew using HasExtensions
+                $ew Base.get_extension(HasExtensions, :Extension) === nothing || error("unexpectedly got an extension")
+                $ew HasExtensions.ext_loaded && error("ext_loaded set")
+                using HasDepWithExtensions
+                $ew using HasDepWithExtensions
+                $ew Base.get_extension(HasExtensions, :Extension).extvar == 1 || error("extvar in Extension not set")
+                $ew HasExtensions.ext_loaded || error("ext_loaded not set")
+                $ew HasExtensions.ext_folder_loaded && error("ext_folder_loaded set")
+                $ew HasDepWithExtensions.do_something() || error("do_something errored")
+                using ExtDep2
+                $ew using ExtDep2
+                $ew HasExtensions.ext_folder_loaded || error("ext_folder_loaded not set")
+            end
+            """
+            return `$(Base.julia_cmd()) $compile --startup-file=no -e $cmd`
+        end
+
+        for compile in (`--compiled-modules=no`, ``, ``) # Once when requiring precompilation, once where it is already precompiled
+            cmd = gen_extension_cmd(compile)
+            cmd = addenv(cmd, "JULIA_LOAD_PATH" => proj)
+            cmd = pipeline(cmd; stdout, stderr)
+            @test success(cmd)
+        end
+
+        sep = Sys.iswindows() ? ';' : ':'
+
+        cmd = gen_extension_cmd(``, true)
+        cmd = addenv(cmd, "JULIA_LOAD_PATH" => join([proj, "@stdlib"], sep))
+        str = read(cmd, String)
+        @test !occursin("Error during loading of extension", str)
+        @test !occursin("ConcurrencyViolationError", str)
+
+        # 48351
+        cmd = gen_extension_cmd(``)
+        cmd = addenv(cmd, "JULIA_LOAD_PATH" => join([mktempdir(), proj], sep))
+        cmd = pipeline(cmd; stdout, stderr)
+        @test success(cmd)
+
+        # Only load env from where package is loaded
+        envs = [joinpath(@__DIR__, "project", "Extensions", "EnvWithHasExtensionsv2"), joinpath(@__DIR__, "project", "Extensions", "EnvWithHasExtensions")]
+        cmd = addenv(```$(Base.julia_cmd()) --startup-file=no -e '
+        begin
+            push!(empty!(DEPOT_PATH), '$(repr(depot_path))')
+            using HasExtensions
+            using ExtDep
+            Base.get_extension(HasExtensions, :Extension) === nothing || error("unexpectedly loaded ext from other env")
+            Base.get_extension(HasExtensions, :Extension2) === nothing && error("did not load ext from active env")
+        end
+        '
+        ```, "JULIA_LOAD_PATH" => join(envs, sep))
+        @test success(cmd)
+
+        test_ext_proj = """
+        begin
+            using HasExtensions
+            using ExtDep
+            Base.get_extension(HasExtensions, :Extension) isa Module || error("expected extension to load")
+            using ExtDep2
+            Base.get_extension(HasExtensions, :ExtensionFolder) isa Module || error("expected extension to load")
+        end
+        """
+        for compile in (`--compiled-modules=no`, ``)
+            cmd_proj_ext = `$(Base.julia_cmd()) $compile --startup-file=no -e $test_ext_proj`
+            proj = joinpath(@__DIR__, "project", "Extensions")
+            cmd_proj_ext = addenv(cmd_proj_ext, "JULIA_LOAD_PATH" => join([joinpath(proj, "HasExtensions.jl"), joinpath(proj, "EnvWithDeps")], sep))
+            run(cmd_proj_ext)
+        end
+    finally
+        try
+            rm(depot_path, force=true, recursive=true)
+        catch err
+            @show err
+        end
+    end
+end
+
+pkgimage(val) = val == 1 ? `--pkgimages=yes` : `--pkgimages=no`
+opt_level(val) = `-O$val`
+debug_level(val) = `-g$val`
+inline(val) = val == 1 ? `--inline=yes` : `--inline=no`
+check_bounds(val) = if val == 0
+    `--check-bounds=auto`
+elseif val == 1
+    `--check-bounds=yes`
+elseif val == 2
+    `--check-bounds=no`
+end
+
+@testset "CacheFlags" begin
+    cf = Base.CacheFlags()
+    opts = Base.JLOptions()
+    @test cf.use_pkgimages == opts.use_pkgimages
+    @test cf.debug_level == opts.debug_level
+    @test cf.check_bounds == opts.check_bounds
+    @test cf.inline == opts.can_inline
+    @test cf.opt_level == opts.opt_level
+
+    # OOICCDDP
+    for (P, D, C, I, O) in Iterators.product(0:1, 0:2, 0:2, 0:1, 0:3)
+        julia = joinpath(Sys.BINDIR, Base.julia_exename())
+        script = """
+        let
+            cf = Base.CacheFlags()
+            opts = Base.JLOptions()
+            cf.use_pkgimages == opts.use_pkgimages == $P || error("use_pkgimages")
+            cf.debug_level == opts.debug_level == $D || error("debug_level")
+            cf.check_bounds == opts.check_bounds == $C || error("check_bounds")
+            cf.inline == opts.can_inline == $I || error("inline")
+            cf.opt_level == opts.opt_level == $O || error("opt_level")
+        end
+        """
+        cmd = `$julia $(pkgimage(P)) $(opt_level(O)) $(debug_level(D)) $(check_bounds(C)) $(inline(I)) -e $script`
+        @test success(pipeline(cmd; stdout, stderr))
+    end
+
+    cf = Base.CacheFlags(255)
+    @test cf.use_pkgimages
+    @test cf.debug_level == 3
+    @test cf.check_bounds == 3
+    @test cf.inline
+    @test cf.opt_level == 3
+
+    io = PipeBuffer()
+    show(io, cf)
+    @test read(io, String) == "use_pkgimages = true, debug_level = 3, check_bounds = 3, inline = true, opt_level = 3"
+end
+
+empty!(Base.DEPOT_PATH)
+append!(Base.DEPOT_PATH, original_depot_path)
+
+@testset "loading deadlock detector" begin
+    pkid1 = Base.PkgId("pkgid1")
+    pkid2 = Base.PkgId("pkgid2")
+    pkid3 = Base.PkgId("pkgid3")
+    pkid4 = Base.PkgId("pkgid4")
+    e = Base.Event()
+    @test nothing === @lock Base.require_lock Base.start_loading(pkid4)     # module pkgid4
+    @test nothing === @lock Base.require_lock Base.start_loading(pkid1)     # module pkgid1
+    t1 = @async begin
+        @test nothing === @lock Base.require_lock Base.start_loading(pkid2) # @async module pkgid2; using pkgid1; end
+        notify(e)
+        @test "loaded_pkgid1" == @lock Base.require_lock Base.start_loading(pkid1)
+        @lock Base.require_lock Base.end_loading(pkid2, "loaded_pkgid2")
+    end
+    wait(e)
+    reset(e)
+    t2 = @async begin
+        @test nothing === @lock Base.require_lock Base.start_loading(pkid3) # @async module pkgid3; using pkgid2; end
+        notify(e)
+        @test "loaded_pkgid2" == @lock Base.require_lock Base.start_loading(pkid2)
+        @lock Base.require_lock Base.end_loading(pkid3, "loaded_pkgid3")
+    end
+    wait(e)
+    reset(e)
+    @test_throws(ConcurrencyViolationError("deadlock detected in loading pkgid3 -> pkgid2 -> pkgid1 -> pkgid3 && pkgid4"),
+        @lock Base.require_lock Base.start_loading(pkid3)).value            # try using pkgid3
+    @test_throws(ConcurrencyViolationError("deadlock detected in loading pkgid4 -> pkgid4 && pkgid1"),
+        @lock Base.require_lock Base.start_loading(pkid4)).value            # try using pkgid4
+    @lock Base.require_lock Base.end_loading(pkid1, "loaded_pkgid1")        # end
+    @lock Base.require_lock Base.end_loading(pkid4, "loaded_pkgid4")        # end
+    wait(t2)
+    wait(t1)
+end
+
+@testset "Upgradable stdlibs" begin
+    @test success(`$(Base.julia_cmd()) --startup-file=no -e 'using DelimitedFiles'`)
+    @test success(`$(Base.julia_cmd()) --startup-file=no -e 'using Statistics'`)
+end
+
+@testset "checking srcpath modules" begin
+    p = Base.PkgId("Dummy")
+    fpath, _ = mktemp()
+    @testset "valid" begin
+        write(fpath, """
+        module Foo
+        using Bar
+        end
+        """)
+        @test Base.check_src_module_wrap(p, fpath)
+
+        write(fpath, """
+        baremodule Foo
+        using Bar
+        end
+        """)
+        @test Base.check_src_module_wrap(p, fpath)
+
+        write(fpath, """
+        \"\"\"
+        Foo
+        using Foo
+        \"\"\"
+        module Foo
+        using Bar
+        end
+        """)
+        @test Base.check_src_module_wrap(p, fpath)
+
+        write(fpath, """
+        \"\"\" Foo \"\"\"
+        module Foo
+        using Bar
+        end
+        """)
+        @test Base.check_src_module_wrap(p, fpath)
+
+        write(fpath, """
+        \"\"\"
+        Foo
+        \"\"\" module Foo
+        using Bar
+        end
+        """)
+        @test Base.check_src_module_wrap(p, fpath)
+
+        write(fpath, """
+        @doc let x = 1
+            x
+        end module Foo
+        using Bar
+        end
+        """)
+        @test Base.check_src_module_wrap(p, fpath)
+
+        write(fpath, """
+        # using foo
+        module Foo
+        using Bar
+        end
+        """)
+        @test Base.check_src_module_wrap(p, fpath)
+    end
+    @testset "invalid" begin
+        write(fpath, """
+        # module Foo
+        using Bar
+        # end
+        """)
+        @test_throws ErrorException Base.check_src_module_wrap(p, fpath)
+
+        write(fpath, """
+        using Bar
+        module Foo
+        end
+        """)
+        @test_throws ErrorException Base.check_src_module_wrap(p, fpath)
+
+        write(fpath, """
+        using Bar
+        """)
+        @test_throws ErrorException Base.check_src_module_wrap(p, fpath)
+
+        write(fpath, """
+        x = 1
+        """)
+        @test_throws ErrorException Base.check_src_module_wrap(p, fpath)
+    end
+end
+
+@testset "relocatable upgrades #51989" begin
+    mktempdir() do depot
+        project_path = joinpath(depot, "project")
+        mkpath(project_path)
+
+        # Create fake `Foo.jl` package with two files:
+        foo_path = joinpath(depot, "dev", "Foo")
+        mkpath(joinpath(foo_path, "src"))
+        open(joinpath(foo_path, "src", "Foo.jl"); write=true) do io
+            println(io, """
+            module Foo
+            include("internal.jl")
+            end
+            """)
+        end
+        open(joinpath(foo_path, "src", "internal.jl"); write=true) do io
+            println(io, "const a = \"asd\"")
+        end
+        open(joinpath(foo_path, "Project.toml"); write=true) do io
+            println(io, """
+            name = "Foo"
+            uuid = "00000000-0000-0000-0000-000000000001"
+            version = "1.0.0"
+            """)
+        end
+
+        # In our depot, `dev` and then `precompile` this `Foo` package.
+        @test success(addenv(
+            `$(Base.julia_cmd()) --project=$project_path --startup-file=no -e 'import Pkg; Pkg.develop("Foo"); Pkg.precompile(); exit(0)'`,
+            "JULIA_DEPOT_PATH" => depot))
+
+        # Get the size of the generated `.ji` file so that we can ensure that it gets altered
+        foo_compiled_path = joinpath(depot, "compiled", "v$(VERSION.major).$(VERSION.minor)", "Foo")
+        cache_path = joinpath(foo_compiled_path, only(filter(endswith(".ji"), readdir(foo_compiled_path))))
+        cache_size = filesize(cache_path)
+
+        # Next, remove the dependence on `internal.jl` and delete it:
+        rm(joinpath(foo_path, "src", "internal.jl"))
+        open(joinpath(foo_path, "src", "Foo.jl"); write=true) do io
+            truncate(io, 0)
+            println(io, """
+            module Foo
+            end
+            """)
+        end
+
+        # Try to load `Foo`; this should trigger recompilation, not an error!
+        @test success(addenv(
+            `$(Base.julia_cmd()) --project=$project_path --startup-file=no -e 'using Foo; exit(0)'`,
+            "JULIA_DEPOT_PATH" => depot,
+        ))
+
+        # Ensure that there is still only one `.ji` file (it got replaced
+        # and the file size changed).
+        @test length(filter(endswith(".ji"), readdir(foo_compiled_path))) == 1
+        @test filesize(cache_path) != cache_size
+    end
+end
+
+@testset "code coverage disabled during precompilation" begin
+    mktempdir() do depot
+        cov_test_dir = joinpath(@__DIR__, "project", "deps", "CovTest.jl")
+        cov_cache_dir = joinpath(depot, "compiled", "v$(VERSION.major).$(VERSION.minor)", "CovTest")
+        function rm_cov_files()
+            for cov_file in filter(endswith(".cov"), readdir(joinpath(cov_test_dir, "src"), join=true))
+                rm(cov_file)
+            end
+            @test !cov_exists()
+        end
+        cov_exists() = !isempty(filter(endswith(".cov"), readdir(joinpath(cov_test_dir, "src"))))
+
+        rm_cov_files() # clear out any coverage files first
+        @test !cov_exists()
+
+        cd(cov_test_dir) do
+            # In our depot, precompile CovTest.jl with coverage on
+            @test success(addenv(
+                `$(Base.julia_cmd()) --startup-file=no --pkgimage=yes --code-coverage=@ --project -e 'using CovTest; exit(0)'`,
+                "JULIA_DEPOT_PATH" => depot,
+            ))
+            @test !isempty(filter(!endswith(".ji"), readdir(cov_cache_dir))) # check that object cache file(s) exists
+            @test !cov_exists()
+            rm_cov_files()
+
+            # same again but call foo(), which is in the pkgimage, and should generate coverage
+            @test success(addenv(
+                `$(Base.julia_cmd()) --startup-file=no --pkgimage=yes --code-coverage=@ --project -e 'using CovTest; foo(); exit(0)'`,
+                "JULIA_DEPOT_PATH" => depot,
+            ))
+            @test cov_exists()
+            rm_cov_files()
+
+            # same again but call bar(), which is NOT in the pkgimage, and should generate coverage
+            @test success(addenv(
+                `$(Base.julia_cmd()) --startup-file=no --pkgimage=yes --code-coverage=@ --project -e 'using CovTest; bar(); exit(0)'`,
+                "JULIA_DEPOT_PATH" => depot,
+            ))
+            @test cov_exists()
+            rm_cov_files()
+        end
+    end
+end
+
+@testset "command-line flags" begin
+    mktempdir() do dir
+        # generate a Parent.jl and Child.jl package, with Parent depending on Child
+        open(joinpath(dir, "Child.jl"), "w") do io
+            println(io, """
+                module Child
+                end""")
+        end
+        open(joinpath(dir, "Parent.jl"), "w") do io
+            println(io, """
+                module Parent
+                using Child
+                end""")
+        end
+
+        # helper function to load a package and return the output
+        function load_package(name, args=``)
+            code = "using $name"
+            cmd = addenv(`$(Base.julia_cmd()) -e $code $args`,
+                        "JULIA_LOAD_PATH" => dir,
+                        "JULIA_DEBUG" => "loading")
+
+            out = Pipe()
+            proc = run(pipeline(cmd, stdout=out, stderr=out))
+            close(out.in)
+
+            log = @async String(read(out))
+            @test success(proc)
+            fetch(log)
+        end
+
+        log = load_package("Parent", `--compiled-modules=no --pkgimages=no`)
+        @test !occursin(r"Generating (cache|object cache) file", log)
+        @test !occursin(r"Loading (cache|object cache) file", log)
+
+
+        ## tests for `--compiled-modules`, which generates cache files
+
+        log = load_package("Child", `--compiled-modules=yes --pkgimages=no`)
+        @test occursin(r"Generating cache file for Child", log)
+        @test occursin(r"Loading cache file .+ for Child", log)
+
+        # with `--compiled-modules=existing` we should only precompile Child
+        log = load_package("Parent", `--compiled-modules=existing --pkgimages=no`)
+        @test !occursin(r"Generating cache file for Child", log)
+        @test occursin(r"Loading cache file .+ for Child", log)
+        @test !occursin(r"Generating cache file for Parent", log)
+        @test !occursin(r"Loading cache file .+ for Parent", log)
+
+        # the default is `--compiled-modules=yes`, which should now precompile Parent
+        log = load_package("Parent", `--pkgimages=no`)
+        @test !occursin(r"Generating cache file for Child", log)
+        @test occursin(r"Loading cache file .+ for Child", log)
+        @test occursin(r"Generating cache file for Parent", log)
+        @test occursin(r"Loading cache file .+ for Parent", log)
+
+
+        ## tests for `--pkgimages`, which generates object cache files
+
+        log = load_package("Child", `--compiled-modules=yes --pkgimages=yes`)
+        @test occursin(r"Generating object cache file for Child", log)
+        @test occursin(r"Loading object cache file .+ for Child", log)
+
+        # with `--pkgimages=existing` we should only generate code for Child
+        log = load_package("Parent", `--compiled-modules=yes --pkgimages=existing`)
+        @test !occursin(r"Generating object cache file for Child", log)
+        @test occursin(r"Loading object cache file .+ for Child", log)
+        @test !occursin(r"Generating object cache file for Parent", log)
+        @test !occursin(r"Loading object cache file .+ for Parent", log)
+
+        # the default is `--pkgimages=yes`, which should now generate code for Parent
+        log = load_package("Parent")
+        @test !occursin(r"Generating object cache file for Child", log)
+        @test occursin(r"Loading object cache file .+ for Child", log)
+        @test occursin(r"Generating object cache file for Parent", log)
+        @test occursin(r"Loading object cache file .+ for Parent", log)
+    end
+end
diff --git a/test/math.jl b/test/math.jl
index 8938b6a8864ab..248f99cebd987 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -8,6 +8,20 @@ function isnan_type(::Type{T}, x) where T
     isa(x, T) && isnan(x)
 end
 
+# has_fma has no runtime support.
+# So we need function wrappers to make this work.
+has_fma_Int() = Core.Compiler.have_fma(Int)
+has_fma_Float32() = Core.Compiler.have_fma(Float32)
+has_fma_Float64() = Core.Compiler.have_fma(Float64)
+
+has_fma = Dict(
+    Int => has_fma_Int(),
+    Rational{Int} => has_fma_Int(),
+    Float32 => has_fma_Float32(),
+    Float64 => has_fma_Float64(),
+    BigFloat => true,
+)
+
 @testset "clamp" begin
     @test clamp(0, 1, 3) == 1
     @test clamp(1, 1, 3) == 1
@@ -55,8 +69,9 @@ end
     @test repr(Any[pi ℯ; ℯ pi]) == "Any[π ℯ; ℯ π]"
     @test string(pi) == "π"
 
-    @test sin(π) === sinpi(1) == tan(π) == sinpi(1 // 1) == 0
-    @test cos(π) === cospi(1) == sec(π) == cospi(1 // 1) == -1
+    @test sin(π) == sind(180) === sinpi(1) === sinpi(1//1) == tan(π) == 0
+    @test tan(π) == tand(180) === tanpi(1) === tanpi(1//1) === -0.0
+    @test cos(π) == cosd(180) === cospi(1) === cospi(1//1) == sec(π) == -1
     @test csc(π) == 1/0 && cot(π) == -1/0
     @test sincos(π) === sincospi(1) == (0, -1)
 end
@@ -165,12 +180,15 @@ end
             @test atan(x,y) ≈ atan(big(x),big(y))
             @test atanh(x) ≈ atanh(big(x))
             @test cbrt(x) ≈ cbrt(big(x))
+            @test fourthroot(x) ≈ fourthroot(big(x))
             @test cos(x) ≈ cos(big(x))
             @test cosh(x) ≈ cosh(big(x))
+            @test cospi(x) ≈ cospi(big(x))
             @test exp(x) ≈ exp(big(x))
             @test exp10(x) ≈ exp10(big(x))
             @test exp2(x) ≈ exp2(big(x))
             @test expm1(x) ≈ expm1(big(x))
+            @test expm1(T(-1.1)) ≈ expm1(big(T(-1.1)))
             @test hypot(x,y) ≈ hypot(big(x),big(y))
             @test hypot(x,x,y) ≈ hypot(hypot(big(x),big(x)),big(y))
             @test hypot(x,x,y,y) ≈ hypot(hypot(big(x),big(x)),hypot(big(y),big(y)))
@@ -180,9 +198,11 @@ end
             @test log2(x) ≈ log2(big(x))
             @test sin(x) ≈ sin(big(x))
             @test sinh(x) ≈ sinh(big(x))
+            @test sinpi(x) ≈ sinpi(big(x))
             @test sqrt(x) ≈ sqrt(big(x))
             @test tan(x) ≈ tan(big(x))
             @test tanh(x) ≈ tanh(big(x))
+            @test tanpi(x) ≈ tanpi(big(x))
             @test sec(x) ≈ sec(big(x))
             @test csc(x) ≈ csc(big(x))
             @test secd(x) ≈ secd(big(x))
@@ -201,6 +221,9 @@ end
             @test isequal(cbrt(T(0)), T(0))
             @test isequal(cbrt(T(1)), T(1))
             @test isequal(cbrt(T(1000000000))^3, T(1000)^3)
+            @test isequal(fourthroot(T(0)), T(0))
+            @test isequal(fourthroot(T(1)), T(1))
+            @test isequal(fourthroot(T(100000000))^4, T(100)^4)
             @test isequal(cos(T(0)), T(1))
             @test cos(T(pi)/2) ≈ T(0) atol=eps(T)
             @test isequal(cos(T(pi)), T(-1))
@@ -253,6 +276,8 @@ end
             @test asin(sin(x)) ≈ x
             @test cbrt(x)^3 ≈ x
             @test cbrt(x^3) ≈ x
+            @test fourthroot(x)^4 ≈ x
+            @test fourthroot(x^4) ≈ x
             @test asinh(sinh(x)) ≈ x
             @test atan(tan(x)) ≈ x
             @test atan(x,y) ≈ atan(x/y)
@@ -411,47 +436,51 @@ end
     @test rad2deg(pi + (pi/3)*im) ≈ 180 + 60im
 end
 
+# ensure zeros are signed the same
+⩲(x,y) = typeof(x) == typeof(y) && x == y && signbit(x) == signbit(y)
+⩲(x::Tuple, y::Tuple) = length(x) == length(y) && all(map(⩲,x,y))
+
 @testset "degree-based trig functions" begin
-    @testset "$T" for T = (Float32,Float64,Rational{Int})
+    @testset "$T" for T = (Float32,Float64,Rational{Int},BigFloat)
         fT = typeof(float(one(T)))
         fTsc = typeof( (float(one(T)), float(one(T))) )
         for x = -400:40:400
-            @test sind(convert(T,x))::fT ≈ convert(fT,sin(pi/180*x)) atol=eps(deg2rad(convert(fT,x)))
-            @test cosd(convert(T,x))::fT ≈ convert(fT,cos(pi/180*x)) atol=eps(deg2rad(convert(fT,x)))
+            @test sind(convert(T,x))::fT ≈ sin(pi*convert(fT,x)/180) atol=eps(deg2rad(convert(fT,x)))
+            @test cosd(convert(T,x))::fT ≈ cos(pi*convert(fT,x)/180) atol=eps(deg2rad(convert(fT,x)))
 
             s,c = sincosd(convert(T,x))
-            @test s::fT ≈ convert(fT,sin(pi/180*x)) atol=eps(deg2rad(convert(fT,x)))
-            @test c::fT ≈ convert(fT,cos(pi/180*x)) atol=eps(deg2rad(convert(fT,x)))
+            @test s::fT ≈ sin(pi*convert(fT,x)/180) atol=eps(deg2rad(convert(fT,x)))
+            @test c::fT ≈ cos(pi*convert(fT,x)/180) atol=eps(deg2rad(convert(fT,x)))
         end
         @testset "sind" begin
-            @test sind(convert(T,0.0))::fT === zero(fT)
-            @test sind(convert(T,180.0))::fT === zero(fT)
-            @test sind(convert(T,360.0))::fT === zero(fT)
-            T != Rational{Int} && @test sind(convert(T,-0.0))::fT === -zero(fT)
-            @test sind(convert(T,-180.0))::fT === -zero(fT)
-            @test sind(convert(T,-360.0))::fT === -zero(fT)
+            @test sind(convert(T,0.0))::fT ⩲ zero(fT)
+            @test sind(convert(T,180.0))::fT ⩲ zero(fT)
+            @test sind(convert(T,360.0))::fT ⩲ zero(fT)
+            T != Rational{Int} && @test sind(convert(T,-0.0))::fT ⩲ -zero(fT)
+            @test sind(convert(T,-180.0))::fT ⩲ -zero(fT)
+            @test sind(convert(T,-360.0))::fT ⩲ -zero(fT)
             if T <: AbstractFloat
                 @test isnan(sind(T(NaN)))
             end
         end
         @testset "cosd" begin
-            @test cosd(convert(T,90))::fT === zero(fT)
-            @test cosd(convert(T,270))::fT === zero(fT)
-            @test cosd(convert(T,-90))::fT === zero(fT)
-            @test cosd(convert(T,-270))::fT === zero(fT)
+            @test cosd(convert(T,90))::fT ⩲ zero(fT)
+            @test cosd(convert(T,270))::fT ⩲ zero(fT)
+            @test cosd(convert(T,-90))::fT ⩲ zero(fT)
+            @test cosd(convert(T,-270))::fT ⩲ zero(fT)
             if T <: AbstractFloat
                 @test isnan(cosd(T(NaN)))
             end
         end
         @testset "sincosd" begin
-            @test sincosd(convert(T,-360))::fTsc === ( -zero(fT),  one(fT) )
-            @test sincosd(convert(T,-270))::fTsc === (   one(fT), zero(fT) )
-            @test sincosd(convert(T,-180))::fTsc === ( -zero(fT), -one(fT) )
-            @test sincosd(convert(T, -90))::fTsc === (  -one(fT), zero(fT) )
-            @test sincosd(convert(T,   0))::fTsc === (  zero(fT),  one(fT) )
-            @test sincosd(convert(T,  90))::fTsc === (   one(fT), zero(fT) )
-            @test sincosd(convert(T, 180))::fTsc === (  zero(fT), -one(fT) )
-            @test sincosd(convert(T, 270))::fTsc === (  -one(fT), zero(fT) )
+            @test sincosd(convert(T,-360))::fTsc ⩲ ( -zero(fT),  one(fT) )
+            @test sincosd(convert(T,-270))::fTsc ⩲ (   one(fT), zero(fT) )
+            @test sincosd(convert(T,-180))::fTsc ⩲ ( -zero(fT), -one(fT) )
+            @test sincosd(convert(T, -90))::fTsc ⩲ (  -one(fT), zero(fT) )
+            @test sincosd(convert(T,   0))::fTsc ⩲ (  zero(fT),  one(fT) )
+            @test sincosd(convert(T,  90))::fTsc ⩲ (   one(fT), zero(fT) )
+            @test sincosd(convert(T, 180))::fTsc ⩲ (  zero(fT), -one(fT) )
+            @test sincosd(convert(T, 270))::fTsc ⩲ (  -one(fT), zero(fT) )
             if T <: AbstractFloat
                 @test_throws DomainError sincosd(T(Inf))
                 @test all(isnan.(sincosd(T(NaN))))
@@ -463,36 +492,63 @@ end
             "sincospi" => (x->sincospi(x)[1], x->sincospi(x)[2])
         )
             @testset "pi * $x" for x = -3:0.3:3
-                @test sinpi(convert(T,x))::fT ≈ convert(fT,sin(pi*x)) atol=eps(pi*convert(fT,x))
-                @test cospi(convert(T,x))::fT ≈ convert(fT,cos(pi*x)) atol=eps(pi*convert(fT,x))
+                @test sinpi(convert(T,x))::fT ≈ sin(pi*convert(fT,x)) atol=eps(pi*convert(fT,x))
+                @test cospi(convert(T,x))::fT ≈ cos(pi*convert(fT,x)) atol=eps(pi*convert(fT,x))
             end
 
-            @test sinpi(convert(T,0.0))::fT === zero(fT)
-            @test sinpi(convert(T,1.0))::fT === zero(fT)
-            @test sinpi(convert(T,2.0))::fT === zero(fT)
-            T != Rational{Int} && @test sinpi(convert(T,-0.0))::fT === -zero(fT)
-            @test sinpi(convert(T,-1.0))::fT === -zero(fT)
-            @test sinpi(convert(T,-2.0))::fT === -zero(fT)
+            @test sinpi(convert(T,0.0))::fT ⩲ zero(fT)
+            @test sinpi(convert(T,1.0))::fT ⩲ zero(fT)
+            @test sinpi(convert(T,2.0))::fT ⩲ zero(fT)
+            T != Rational{Int} && @test sinpi(convert(T,-0.0))::fT ⩲ -zero(fT)
+            @test sinpi(convert(T,-1.0))::fT ⩲ -zero(fT)
+            @test sinpi(convert(T,-2.0))::fT ⩲ -zero(fT)
             @test_throws DomainError sinpi(convert(T,Inf))
 
-            @test cospi(convert(T,0.5))::fT === zero(fT)
-            @test cospi(convert(T,1.5))::fT === zero(fT)
-            @test cospi(convert(T,-0.5))::fT === zero(fT)
-            @test cospi(convert(T,-1.5))::fT === zero(fT)
+            @test cospi(convert(T,0.5))::fT ⩲ zero(fT)
+            @test cospi(convert(T,1.5))::fT ⩲ zero(fT)
+            @test cospi(convert(T,-0.5))::fT ⩲ zero(fT)
+            @test cospi(convert(T,-1.5))::fT ⩲ zero(fT)
             @test_throws DomainError cospi(convert(T,Inf))
         end
-        @testset "Check exact values" begin
-            @test sind(convert(T,30)) == 0.5
-            @test cosd(convert(T,60)) == 0.5
-            @test sind(convert(T,150)) == 0.5
-            @test sinpi(one(T)/convert(T,6)) == 0.5
-            @test sincospi(one(T)/convert(T,6))[1] == 0.5
-            @test_throws DomainError sind(convert(T,Inf))
-            @test_throws DomainError cosd(convert(T,Inf))
-            T != Float32 && @test cospi(one(T)/convert(T,3)) == 0.5
-            T != Float32 && @test sincospi(one(T)/convert(T,3))[2] == 0.5
-            T == Rational{Int} && @test sinpi(5//6) == 0.5
-            T == Rational{Int} && @test sincospi(5//6)[1] == 0.5
+        @testset "trig pi functions accuracy" for numerator in -20:1:20
+            for func in (sinpi, cospi, tanpi,
+                         x -> sincospi(x)[1],
+                         x -> sincospi(x)[2])
+                x = numerator // 20
+                # Check that rational function works
+                @test func(x) ≈ func(BigFloat(x))
+                # Use short value so that wider values will be exactly equal
+                shortx = Float16(x)
+                # Compare to BigFloat value
+                bigvalue = func(BigFloat(shortx))
+                for T in (Float16,Float32,Float64)
+                    @test func(T(shortx)) ≈ T(bigvalue)
+                end
+            end
+        end
+        @testset begin
+            # If the machine supports fma (fused multiply add), we require exact equality.
+            # Otherwise, we only require approximate equality.
+            if has_fma[T]
+                my_eq = (==)
+                @debug "On this machine, FMA is supported for $(T), so we will test for exact equality" my_eq
+            else
+                my_eq = isapprox
+                @debug "On this machine, FMA is not supported for $(T), so we will test for approximate equality" my_eq
+            end
+            @testset let context=(T, has_fma[T], my_eq)
+                @test sind(convert(T,30)) == 0.5
+                @test cosd(convert(T,60)) == 0.5
+                @test sind(convert(T,150)) == 0.5
+                @test my_eq(sinpi(one(T)/convert(T,6)), 0.5)
+                @test my_eq(sincospi(one(T)/convert(T,6))[1], 0.5)
+                @test_throws DomainError sind(convert(T,Inf))
+                @test_throws DomainError cosd(convert(T,Inf))
+                fT == Float64 && @test my_eq(cospi(one(T)/convert(T,3)), 0.5)
+                fT == Float64 && @test my_eq(sincospi(one(T)/convert(T,3))[2], 0.5)
+                T == Rational{Int} && @test my_eq(sinpi(5//6), 0.5)
+                T == Rational{Int} && @test my_eq(sincospi(5//6)[1], 0.5)
+            end
         end
     end
     scdm = sincosd(missing)
@@ -500,14 +556,18 @@ end
     @test ismissing(scdm[2])
 end
 
-@testset "Integer and Inf args for sinpi/cospi/sinc/cosc" begin
+@testset "Integer and Inf args for sinpi/cospi/tanpi/sinc/cosc" begin
     for (sinpi, cospi) in ((sinpi, cospi), (x->sincospi(x)[1], x->sincospi(x)[2]))
-        @test sinpi(1) == 0
-        @test sinpi(-1) == -0
+        @test sinpi(1) === 0.0
+        @test sinpi(-1) === -0.0
         @test cospi(1) == -1
         @test cospi(2) == 1
     end
 
+    @test tanpi(1) === -0.0
+    @test tanpi(-1) === 0.0
+    @test tanpi(2) === 0.0
+    @test tanpi(-2) === -0.0
     @test sinc(1) == 0
     @test sinc(complex(1,0)) == 0
     @test sinc(0) == 1
@@ -538,15 +598,19 @@ end
             end
         end
     end
-    @test @inferred(sinc(0//1)) === 1.0
-    @test @inferred(cosc(0//1)) === -0.0
+    @test @inferred(sinc(0//1)) ⩲ 1.0
+    @test @inferred(cosc(0//1)) ⩲ -0.0
 
     # test right before/after thresholds of Taylor series
     @test sinc(0.001) ≈ 0.999998355066745 rtol=1e-15
     @test sinc(0.00099) ≈ 0.9999983878009009 rtol=1e-15
     @test sinc(0.05f0) ≈ 0.9958927352435614 rtol=1e-7
     @test sinc(0.0499f0) ≈ 0.9959091277049384 rtol=1e-7
-    @test cosc(0.14) ≈ -0.4517331883801308 rtol=1e-15
+    if has_fma[Float64]
+        @test cosc(0.14) ≈ -0.4517331883801308 rtol=1e-15
+    else
+        @test cosc(0.14) ≈ -0.4517331883801308 rtol=1e-14
+    end
     @test cosc(0.1399) ≈ -0.45142306168781854 rtol=1e-14
     @test cosc(0.26f0) ≈ -0.7996401373462212 rtol=5e-7
     @test cosc(0.2599f0) ≈ -0.7993744054401625 rtol=5e-7
@@ -556,7 +620,7 @@ end
     end
 end
 
-@testset "Irrational args to sinpi/cospi/sinc/cosc" begin
+@testset "Irrational args to sinpi/cospi/tanpi/sinc/cosc" begin
     for x in (pi, ℯ, Base.MathConstants.golden)
         for (sinpi, cospi) in ((sinpi, cospi), (x->sincospi(x)[1], x->sincospi(x)[2]))
             @test sinpi(x) ≈ Float64(sinpi(big(x)))
@@ -564,6 +628,7 @@ end
             @test sinpi(complex(x, x)) ≈ ComplexF64(sinpi(complex(big(x), big(x))))
             @test cospi(complex(x, x)) ≈ ComplexF64(cospi(complex(big(x), big(x))))
         end
+        @test tanpi(x) ≈ Float64(tanpi(big(x)))
         @test sinc(x)  ≈ Float64(sinc(big(x)))
         @test cosc(x)  ≈ Float64(cosc(big(x)))
         @test sinc(complex(x, x))  ≈ ComplexF64(sinc(complex(big(x),  big(x))))
@@ -593,7 +658,7 @@ end
 end
 
 @testset "trig function type stability" begin
-    @testset "$T $f" for T = (Float32,Float64,BigFloat,Rational{Int16},Complex{Int32},ComplexF16), f = (sind,cosd,sinpi,cospi)
+    @testset "$T $f" for T = (Float32,Float64,BigFloat,Rational{Int16},Complex{Int32},ComplexF16), f = (sind,cosd,sinpi,cospi,tanpi)
         @test Base.return_types(f,Tuple{T}) == [float(T)]
     end
     @testset "$T sincospi" for T = (Float32,Float64,BigFloat,Rational{Int16},Complex{Int32},ComplexF16)
@@ -1197,6 +1262,22 @@ end
     end
 end
 
+@testset "fourthroot" begin
+    for T in (Float32, Float64)
+        @test fourthroot(zero(T)) === zero(T)
+        @test fourthroot(one(T)) === one(T)
+        @test fourthroot(T(Inf)) === T(Inf)
+        @test isnan_type(T, fourthroot(T(NaN)))
+        for x in (pcnfloat(nextfloat(nextfloat(zero(T))))...,
+                  0.45, 0.6, 0.98,
+                  map(x->x^3, 1.0:1.0:1024.0)...,
+                  prevfloat(T(Inf)))
+            by = fourthroot(big(T(x)))
+            @test fourthroot(T(x)) ≈ by rtol=eps(T)
+        end
+    end
+end
+
 @testset "hypot" begin
     @test hypot(0, 0) == 0.0
     @test hypot(3, 4) == 5.0
@@ -1280,7 +1361,7 @@ struct BadFloatWrapper <: AbstractFloat
     x::Float64
 end
 
-@testset "not impelemented errors" begin
+@testset "not implemented errors" begin
     x = BadFloatWrapper(1.9)
     for f in (sin, cos, tan, sinh, cosh, tanh, atan, acos, asin, asinh, acosh, atanh, exp, log1p, expm1, log) #exp2, exp10 broken for now
         @test_throws MethodError f(x)
@@ -1319,14 +1400,18 @@ end
 
 @testset "pow" begin
     # tolerance by type for regular powers
-    POW_TOLS = Dict(Float16=>[.51, .51, 2.0, 1.5],
-                    Float32=>[.51, .51, 2.0, 1.5],
-                    Float64=>[1.0, 1.5, 2.0, 1.5])
+    POW_TOLS = Dict(Float16=>[.51, .51, .51, 2.0, 1.5],
+                    Float32=>[.51, .51, .51, 2.0, 1.5],
+                    Float64=>[.55, 0.8, 1.5, 2.0, 1.5])
     for T in (Float16, Float32, Float64)
         for x in (0.0, -0.0, 1.0, 10.0, 2.0, Inf, NaN, -Inf, -NaN)
             for y in (0.0, -0.0, 1.0, -3.0,-10.0 , Inf, NaN, -Inf, -NaN)
-                got, expected = T(x)^T(y), T(big(x))^T(y)
-                @test isnan_type(T, got) && isnan_type(T, expected) || (got === expected)
+                got, expected = T(x)^T(y), T(big(x)^T(y))
+                if isnan(expected)
+                    @test isnan_type(T, got) || T.((x,y))
+                else
+                    @test got == expected || T.((x,y))
+                end
             end
         end
         for _ in 1:2^16
@@ -1335,9 +1420,11 @@ end
             got, expected = x^y, widen(x)^y
             if isfinite(eps(T(expected)))
                 if y == T(-2) # unfortunately x^-2 is less accurate for performance reasons.
-                    @test abs(expected-got) <= POW_TOLS[T][3]*eps(T(expected)) || (x,y)
-                elseif y == T(3) # unfortunately x^3 is less accurate for performance reasons.
                     @test abs(expected-got) <= POW_TOLS[T][4]*eps(T(expected)) || (x,y)
+                elseif y == T(3) # unfortunately x^3 is less accurate for performance reasons.
+                    @test abs(expected-got) <= POW_TOLS[T][5]*eps(T(expected)) || (x,y)
+                elseif issubnormal(got)
+                    @test abs(expected-got) <= POW_TOLS[T][2]*eps(T(expected)) || (x,y)
                 else
                     @test abs(expected-got) <= POW_TOLS[T][1]*eps(T(expected)) || (x,y)
                 end
@@ -1348,7 +1435,7 @@ end
             x=rand(T)*floatmin(T); y=rand(T)*3-T(1.2)
             got, expected = x^y, widen(x)^y
             if isfinite(eps(T(expected)))
-                @test abs(expected-got) <= POW_TOLS[T][2]*eps(T(expected)) || (x,y)
+                @test abs(expected-got) <= POW_TOLS[T][3]*eps(T(expected)) || (x,y)
             end
         end
         # test (-x)^y for y larger than typemax(Int)
@@ -1358,6 +1445,10 @@ end
     end
     # test for large negative exponent where error compensation matters
     @test 0.9999999955206014^-1.0e8 == 1.565084574870928
+    @test 3e18^20 == Inf
+    # two cases where we have observed > 1 ULP in the past
+    @test 0.0013653274095082324^-97.60372292227069 == 4.088393948750035e279
+    @test 8.758520413376658e-5^70.55863059215994 == 5.052076767078296e-287
 end
 
 # Test that sqrt behaves correctly and doesn't exhibit fp80 double rounding.
@@ -1450,22 +1541,61 @@ end
     @test (@allocated f44336()) == 0
 end
 
-# test constant-foldability
-for fn in (:sin, :cos, :tan, :log, :log2, :log10, :log1p, :exponent, :sqrt, :cbrt,
-           # TODO? :asin, :atan, :acos, :sinh, :cosh, :tanh, :asinh, :acosh, :atanh,
-           # TODO? :exp, :exp2, :exp10, :expm1
-           )
-    for T in (Float32, Float64)
-        f = getfield(@__MODULE__, fn)
-        eff = Base.infer_effects(f, (T,))
-        if Core.Compiler.is_foldable(eff)
-            @test true
-        else
-            # XXX only print bad effects – especially `[sin|cos|tan](::Float32)` are analyzed
-            # as non-foldable sometimes but non-deterministically somehow, we need to dig
-            # into what's leading to the bad analysis with Cthulhu on each platform
-            @warn "bad effects found for $f(::$T)" eff
+@testset "constant-foldability of core math functions" begin
+    for T = Any[Float16, Float32, Float64]
+        @testset let T = T
+            for f = Any[sin, cos, tan, log, log2, log10, log1p, exponent, sqrt, cbrt, fourthroot,
+                        asin, atan, acos, sinh, cosh, tanh, asinh, acosh, atanh, exp, exp2, exp10, expm1]
+                @testset let f = f
+                    @test Base.infer_return_type(f, (T,)) != Union{}
+                    @test Core.Compiler.is_foldable(Base.infer_effects(f, (T,)))
+                end
+            end
+            @test Core.Compiler.is_foldable(Base.infer_effects(^, (T,Int)))
+            @test Core.Compiler.is_foldable(Base.infer_effects(^, (T,T)))
         end
     end
+end;
+@testset "removability of core math functions" begin
+    for T = Any[Float16, Float32, Float64]
+        @testset let T = T
+            for f = Any[exp, exp2, exp10, expm1]
+                @testset let f = f
+                    @test Core.Compiler.is_removable_if_unused(Base.infer_effects(f, (T,)))
+                end
+            end
+        end
+    end
+end;
+@testset "exception type inference of core math functions" begin
+    MathErrorT = Union{DomainError, InexactError}
+    for T = (Float16, Float32, Float64)
+        @testset let T = T
+            for f = Any[sin, cos, tan, log, log2, log10, log1p, exponent, sqrt, cbrt, fourthroot,
+                        asin, atan, acos, sinh, cosh, tanh, asinh, acosh, atanh, exp, exp2, exp10, expm1]
+                @testset let f = f
+                    @test Base.infer_exception_type(f, (T,)) <: MathErrorT
+                end
+            end
+            @test Base.infer_exception_type(^, (T,Int)) <: MathErrorT
+            @test Base.infer_exception_type(^, (T,T)) <: MathErrorT
+        end
+    end
+end;
+@test Base.infer_return_type((Int,)) do x
+    local r = nothing
+    try
+        r = sin(x)
+    catch err
+        if err isa DomainError
+            r = 0.0
+        end
+    end
+    return r
+end === Float64
+
+@testset "BigInt Rationals with special funcs" begin
+    @test sinpi(big(1//1)) == big(0.0)
+    @test tanpi(big(1//1)) == big(0.0)
+    @test cospi(big(1//1)) == big(-1.0)
 end
-@test Core.Compiler.is_foldable(Base.infer_effects(^, (Float32,Int)))
diff --git a/test/meta.jl b/test/meta.jl
index 5bdb988f41b6d..be0ecf0cdb827 100644
--- a/test/meta.jl
+++ b/test/meta.jl
@@ -43,77 +43,70 @@ end
 @test foundfunc(h_inlined(), :g_inlined)
 @test foundfunc(h_noinlined(), :g_noinlined)
 
-using Base: pushmeta!, popmeta!
+using Base: popmeta!
 
-macro attach(val, ex)
-    esc(_attach(val, ex))
+macro attach_meta(val, ex)
+    esc(_attach_meta(val, ex))
 end
+_attach_meta(val, ex) = Base.pushmeta!(ex, Expr(:test, val))
 
-_attach(val, ex) = pushmeta!(ex, :test, val)
-
-@attach 42 function dummy()
+@attach_meta 42 function dummy()
     false
 end
-
-asts = code_lowered(dummy, Tuple{})
-@test length(asts) == 1
-ast = asts[1]
-
-body = Expr(:block)
-body.args = ast.code
-
-@test popmeta!(body, :test) == (true, [42])
-@test popmeta!(body, :nonexistent) == (false, [])
+let ast = only(code_lowered(dummy, Tuple{}))
+    body = Expr(:block)
+    body.args = ast.code
+    @test popmeta!(body, :test) == (true, [42])
+    @test popmeta!(body, :nonexistent) == (false, [])
+end
 
 # Simple popmeta!() tests
-ex1 = quote
-    $(Expr(:meta, :foo))
-    x*x+1
+let ex1 = quote
+        $(Expr(:meta, :foo))
+        x*x+1
+    end
+    @test popmeta!(ex1, :foo)[1]
+    @test !popmeta!(ex1, :foo)[1]
+    @test !popmeta!(ex1, :bar)[1]
+    @test !(popmeta!(:(x*x+1), :foo)[1])
 end
-@test popmeta!(ex1, :foo)[1]
-@test !popmeta!(ex1, :foo)[1]
-@test !popmeta!(ex1, :bar)[1]
-@test !(popmeta!(:(x*x+1), :foo)[1])
 
 # Find and pop meta information from general ast locations
-multi_meta = quote
-    $(Expr(:meta, :foo1))
-    y = x
-    $(Expr(:meta, :foo2, :foo3))
-    begin
-        $(Expr(:meta, :foo4, Expr(:foo5, 1, 2)))
+let multi_meta = quote
+        $(Expr(:meta, :foo1))
+        y = x
+        $(Expr(:meta, :foo2, :foo3))
+        begin
+            $(Expr(:meta, :foo4, Expr(:foo5, 1, 2)))
+        end
+        x*x+1
     end
-    x*x+1
-end
-@test popmeta!(deepcopy(multi_meta), :foo1) == (true, [])
-@test popmeta!(deepcopy(multi_meta), :foo2) == (true, [])
-@test popmeta!(deepcopy(multi_meta), :foo3) == (true, [])
-@test popmeta!(deepcopy(multi_meta), :foo4) == (true, [])
-@test popmeta!(deepcopy(multi_meta), :foo5) == (true, [1,2])
-@test popmeta!(deepcopy(multi_meta), :bar)  == (false, [])
-
-# Test that popmeta!() removes meta blocks entirely when they become empty.
-for m in [:foo1, :foo2, :foo3, :foo4, :foo5]
-    @test popmeta!(multi_meta, m)[1]
+    @test popmeta!(deepcopy(multi_meta), :foo1) == (true, [])
+    @test popmeta!(deepcopy(multi_meta), :foo2) == (true, [])
+    @test popmeta!(deepcopy(multi_meta), :foo3) == (true, [])
+    @test popmeta!(deepcopy(multi_meta), :foo4) == (true, [])
+    @test popmeta!(deepcopy(multi_meta), :foo5) == (true, [1,2])
+    @test popmeta!(deepcopy(multi_meta), :bar)  == (false, [])
+
+    # Test that popmeta!() removes meta blocks entirely when they become empty.
+    ast = :(dummy() = $multi_meta)
+    for m in [:foo1, :foo2, :foo3, :foo4, :foo5]
+        @test popmeta!(multi_meta, m)[1]
+    end
+    @test Base.findmeta(ast)[1] == 0
 end
-@test Base.findmeta(multi_meta.args)[1] == 0
 
 # Test that pushmeta! can push across other macros,
 # in the case multiple pushmeta!-based macros are combined
-
-@attach 40 @attach 41 @attach 42 dummy_multi() = return nothing
-
-asts = code_lowered(dummy_multi, Tuple{})
-@test length(asts) == 1
-ast = asts[1]
-
-body = Expr(:block)
-body.args = ast.code
-
-@test popmeta!(body, :test) == (true, [40])
-@test popmeta!(body, :test) == (true, [41])
-@test popmeta!(body, :test) == (true, [42])
-@test popmeta!(body, :nonexistent) == (false, [])
+@attach_meta 40 @attach_meta 41 @attach_meta 42 dummy_multi() = return nothing
+let ast = only(code_lowered(dummy_multi, Tuple{}))
+    body = Expr(:block)
+    body.args = ast.code
+    @test popmeta!(body, :test) == (true, [40])
+    @test popmeta!(body, :test) == (true, [41])
+    @test popmeta!(body, :test) == (true, [42])
+    @test popmeta!(body, :nonexistent) == (false, [])
+end
 
 # tests to fully cover functions in base/meta.jl
 using Base.Meta
@@ -144,8 +137,8 @@ baremodule B
     x = 1
     module M; x = 2; end
     import Base
-    @Base.eval x = 3
-    @Base.eval M x = 4
+    Base.@eval x = 3
+    Base.@eval M x = 4
 end
 @test B.x == 3
 @test B.M.x == 4
@@ -221,8 +214,25 @@ let a = 1
     @test @macroexpand @is_dollar_expr $a
 end
 
-@test Meta.parseatom("@foo", 1, filename=:bar)[1].args[2].file == :bar
-@test Meta.parseall("@foo", filename=:bar).args[1].file == :bar
+let ex = Meta.parse("@foo"; filename=:bar)
+    @test Meta.isexpr(ex, :macrocall)
+    arg2 = ex.args[2]
+    @test isa(arg2, LineNumberNode) && arg2.file === :bar
+end
+let ex = Meta.parseatom("@foo", 1, filename=:bar)[1]
+    @test Meta.isexpr(ex, :macrocall)
+    arg2 = ex.args[2]
+    @test isa(arg2, LineNumberNode) && arg2.file === :bar
+end
+let ex = Meta.parseall("@foo", filename=:bar)
+    @test Meta.isexpr(ex, :toplevel)
+    arg1 = ex.args[1]
+    @test isa(arg1, LineNumberNode) && arg1.file === :bar
+    arg2 = ex.args[2]
+    @test Meta.isexpr(arg2, :macrocall)
+    arg2arg2 = arg2.args[2]
+    @test isa(arg2arg2, LineNumberNode) && arg2arg2.file === :bar
+end
 
 _lower(m::Module, ex, world::UInt) = ccall(:jl_expand_in_world, Any, (Any, Ref{Module}, Cstring, Cint, Csize_t), ex, m, "none", 0, world)
 
@@ -237,14 +247,14 @@ end
 f(::T) where {T} = T
 ci = code_lowered(f, Tuple{Int})[1]
 @test Meta.partially_inline!(ci.code, [], Tuple{typeof(f),Int}, Any[Int], 0, 0, :propagate) ==
-    Any[Core.ReturnNode(QuoteNode(Int))]
+    Any[QuoteNode(Int), Core.ReturnNode(Core.SSAValue(1))]
 
 g(::Val{x}) where {x} = x ? 1 : 0
 ci = code_lowered(g, Tuple{Val{true}})[1]
-@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 0, :propagate)[1] ==
-   Core.GotoIfNot(QuoteNode(true), 3)
-@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 2, :propagate)[1] ==
-   Core.GotoIfNot(QuoteNode(true), 5)
+@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 0, :propagate)[2] ==
+   Core.GotoIfNot(Core.SSAValue(1), 4)
+@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 2, :propagate)[2] ==
+   Core.GotoIfNot(Core.SSAValue(3), 6)
 
 @testset "inlining with isdefined" begin
     isdefined_slot(x) = @isdefined(x)
diff --git a/test/misc.jl b/test/misc.jl
index 6f0e6457be7ea..249175a0ed1d3 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -149,7 +149,7 @@ for l in (Threads.SpinLock(), ReentrantLock())
     @test get_finalizers_inhibited() == 1
     GC.enable_finalizers(true)
     @test get_finalizers_inhibited() == 0
-    if ccall(:jl_is_debugbuild, Cint, ()) != 0
+    if Base.isdebugbuild()
         # Note this warning only exists in debug builds
         @test_warn "WARNING: GC finalizers already enabled on this thread." GC.enable_finalizers(true)
     end
@@ -191,7 +191,7 @@ end
                 sleep(rand(0:0.01:0.1))
                 history[Threads.atomic_add!(clock, 1)] = Threads.atomic_sub!(occupied, 1) - 1
                 return :resultvalue
-            end == :resultvalue
+            end === :resultvalue
         end
     end
     @test all(<=(sem_size), history)
@@ -237,14 +237,16 @@ end
 # test that @sync is lexical (PR #27164)
 
 const x27164 = Ref(0)
-do_something_async_27164() = @async(begin sleep(1); x27164[] = 2; end)
+const c27164 = Base.Event()
+do_something_async_27164() = @async(begin wait(c27164); x27164[] = 2; end)
 
 let t = nothing
     @sync begin
+        @async (sleep(0.1); x27164[] = 1)
         t = do_something_async_27164()
-        @async (sleep(0.05); x27164[] = 1)
     end
     @test x27164[] == 1
+    notify(c27164)
     fetch(t)
     @test x27164[] == 2
 end
@@ -351,9 +353,49 @@ end
 
 after_comp, after_recomp = Base.cumulative_compile_time_ns() # no need to turn timing off, @time will do that
 @test after_comp >= before_comp;
+@test after_recomp >= before_recomp;
+@test after_recomp - before_recomp <= after_comp - before_comp;
+
+# should be approximately 60,000,000 ns, we definitely shouldn't exceed 100x that value
+# failing this probably means an uninitialized variable somewhere
+@test after_comp - before_comp < 6_000_000_000;
 
 end # redirect_stdout
 
+# issue #48024, avoid overcounting timers
+begin
+    double(x::Real) = 2x;
+    calldouble(container) = double(container[1]);
+    calldouble2(container) = calldouble(container);
+
+    Base.Experimental.@force_compile;
+    local elapsed = Base.time_ns();
+    Base.cumulative_compile_timing(true);
+    local compiles = Base.cumulative_compile_time_ns();
+    @eval calldouble([1.0]);
+    Base.cumulative_compile_timing(false);
+    compiles = Base.cumulative_compile_time_ns() .- compiles;
+    elapsed = Base.time_ns() - elapsed;
+
+    # compile time should be at most total time
+    @test compiles[1] <= elapsed
+    # recompile time should be at most compile time
+    @test compiles[2] <= compiles[1]
+
+    elapsed = Base.time_ns();
+    Base.cumulative_compile_timing(true);
+    compiles = Base.cumulative_compile_time_ns();
+    @eval calldouble(1.0);
+    Base.cumulative_compile_timing(false);
+    compiles = Base.cumulative_compile_time_ns() .- compiles;
+    elapsed = Base.time_ns() - elapsed;
+
+    # compile time should be at most total time
+    @test compiles[1] <= elapsed
+    # recompile time should be at most compile time
+    @test compiles[2] <= compiles[1]
+end
+
 macro capture_stdout(ex)
     quote
         mktemp() do fname, f
@@ -366,7 +408,28 @@ macro capture_stdout(ex)
     end
 end
 
-# compilation reports in @time
+# issue #48024, but with the time macro itself
+begin
+    double(x::Real) = 2x;
+    calldouble(container) = double(container[1]);
+    calldouble2(container) = calldouble(container);
+
+    local first = @capture_stdout @time @eval calldouble([1.0])
+    local second = @capture_stdout @time @eval calldouble2(1.0)
+
+    # these functions were not recompiled
+    local matches = collect(eachmatch(r"(\d+(?:\.\d+)?)%", first))
+    @test length(matches) == 1
+    @test parse(Float64, matches[1][1]) > 0.0
+    @test parse(Float64, matches[1][1]) <= 100.0
+
+    matches = collect(eachmatch(r"(\d+(?:\.\d+)?)%", second))
+    @test length(matches) == 1
+    @test parse(Float64, matches[1][1]) > 0.0
+    @test parse(Float64, matches[1][1]) <= 100.0
+end
+
+# compilation reports in @time, @timev
 let f = gensym("f"), callf = gensym("callf"), call2f = gensym("call2f")
     @eval begin
         $f(::Real) = 1
@@ -397,6 +460,36 @@ let f = gensym("f"), callf = gensym("callf"), call2f = gensym("call2f")
         @test occursin("% of which was recompilation", out)
     end
 end
+let f = gensym("f"), callf = gensym("callf"), call2f = gensym("call2f")
+    @eval begin
+        $f(::Real) = 1
+        $callf(container) = $f(container[1])
+        $call2f(container) = $callf(container)
+        c64 = [1.0]
+        c32 = [1.0f0]
+        cabs = AbstractFloat[1.0]
+
+        out = @capture_stdout @timev $call2f(c64)
+        @test occursin("% compilation time", out)
+        out = @capture_stdout @timev $call2f(c64)
+        @test occursin("% compilation time", out) == false
+
+        out = @capture_stdout @timev $call2f(c32)
+        @test occursin("% compilation time", out)
+        out = @capture_stdout @timev $call2f(c32)
+        @test occursin("% compilation time", out) == false
+
+        out = @capture_stdout @timev $call2f(cabs)
+        @test occursin("% compilation time", out)
+        out = @capture_stdout @timev $call2f(cabs)
+        @test occursin("% compilation time", out) == false
+
+        $f(::Float64) = 2
+        out = @capture_stdout @timev $call2f(c64)
+        @test occursin("% compilation time:", out)
+        @test occursin("% of which was recompilation", out)
+    end
+end
 
 # interactive utilities
 
@@ -766,6 +859,10 @@ let buf = IOBuffer()
     printstyled(buf_color, "foo"; bold=true, color=:red)
     @test String(take!(buf)) == "\e[31m\e[1mfoo\e[22m\e[39m"
 
+    # Check that italic is turned off
+    printstyled(buf_color, "foo"; italic=true, color=:red)
+    @test String(take!(buf)) == "\e[31m\e[3mfoo\e[23m\e[39m"
+
     # Check that underline is turned off
     printstyled(buf_color, "foo"; color = :red, underline = true)
     @test String(take!(buf)) == "\e[31m\e[4mfoo\e[24m\e[39m"
@@ -783,8 +880,8 @@ let buf = IOBuffer()
     @test String(take!(buf)) == "\e[31m\e[8mfoo\e[28m\e[39m"
 
     # Check that all options can be turned on simultaneously
-    printstyled(buf_color, "foo"; color = :red, bold = true, underline = true, blink = true, reverse = true, hidden = true)
-    @test String(take!(buf)) == "\e[31m\e[1m\e[4m\e[5m\e[7m\e[8mfoo\e[28m\e[27m\e[25m\e[24m\e[22m\e[39m"
+    printstyled(buf_color, "foo"; color = :red, bold = true, italic = true, underline = true, blink = true, reverse = true, hidden = true)
+    @test String(take!(buf)) == "\e[31m\e[1m\e[3m\e[4m\e[5m\e[7m\e[8mfoo\e[28m\e[27m\e[25m\e[24m\e[22m\e[23m\e[39m"
 end
 
 abstract type DA_19281{T, N} <: AbstractArray{T, N} end
@@ -813,7 +910,7 @@ mutable struct Demo_20254
 end
 
 # these cause stack overflows and are a little flaky on CI, ref #20256
-if Bool(parse(Int,(get(ENV, "JULIA_TESTFULL", "0"))))
+if Base.get_bool_env("JULIA_TESTFULL", false)
     function Demo_20254(arr::AbstractArray=Any[])
         Demo_20254(string.(arr))
     end
@@ -870,38 +967,87 @@ end
 module atinvokelatest
 f(x) = 1
 g(x, y; z=0) = x * y + z
+mutable struct X; x; end
+Base.getproperty(::X, ::Any) = error("overload me")
+Base.setproperty!(::X, ::Any, ::Any) = error("overload me")
+struct Xs
+    xs::Vector{Any}
 end
-
-let foo() = begin
-        @eval atinvokelatest.f(x::Int) = 3
-        return Base.@invokelatest atinvokelatest.f(0)
-    end
-    @test foo() == 3
+Base.getindex(::Xs, ::Any) = error("overload me")
+Base.setindex!(::Xs, ::Any, ::Any) = error("overload me")
 end
 
-let foo() = begin
+let call_test() = begin
         @eval atinvokelatest.f(x::Int) = 3
-        return Base.@invokelatest atinvokelatest.f(0)
+        return @invokelatest atinvokelatest.f(0)
     end
-    @test foo() == 3
+    @test call_test() == 3
 
-    bar() = begin
+    call_with_kws_test() = begin
         @eval atinvokelatest.g(x::Int, y::Int; z=3) = z
-        return Base.@invokelatest atinvokelatest.g(2, 3; z=1)
+        return @invokelatest atinvokelatest.g(2, 3; z=1)
+    end
+    @test call_with_kws_test() == 1
+
+    getproperty_test() = begin
+        @eval Base.getproperty(x::atinvokelatest.X, f::Symbol) = getfield(x, f)
+        x = atinvokelatest.X(nothing)
+        return @invokelatest x.x
+    end
+    @test isnothing(getproperty_test())
+
+    setproperty!_test() = begin
+        @eval Base.setproperty!(x::atinvokelatest.X, f::Symbol, @nospecialize(v)) = setfield!(x, f, v)
+        x = atinvokelatest.X(nothing)
+        @invokelatest x.x = 1
+        return x
+    end
+    x = setproperty!_test()
+    @test getfield(x, :x) == 1
+
+    getindex_test() = begin
+        @eval Base.getindex(xs::atinvokelatest.Xs, idx::Int) = xs.xs[idx]
+        xs = atinvokelatest.Xs(Any[nothing])
+        return @invokelatest xs[1]
     end
-    @test bar() == 1
+    @test isnothing(getindex_test())
+
+    setindex!_test() = begin
+        @eval function Base.setindex!(xs::atinvokelatest.Xs, @nospecialize(v), idx::Int)
+            xs.xs[idx] = v
+        end
+        xs = atinvokelatest.Xs(Any[nothing])
+        @invokelatest xs[1] = 1
+        return xs
+    end
+    xs = setindex!_test()
+    @test xs.xs[1] == 1
 end
 
+abstract type InvokeX end
+Base.getproperty(::InvokeX, ::Symbol) = error("overload InvokeX")
+Base.setproperty!(::InvokeX, ::Symbol, @nospecialize(v::Any)) = error("overload InvokeX")
+mutable struct InvokeX2 <: InvokeX; x; end
+Base.getproperty(x::InvokeX2, f::Symbol) = getfield(x, f)
+Base.setproperty!(x::InvokeX2, f::Symbol, @nospecialize(v::Any)) = setfield!(x, f, v)
+
+abstract type InvokeXs end
+Base.getindex(::InvokeXs, ::Int) = error("overload InvokeXs")
+Base.setindex!(::InvokeXs, @nospecialize(v::Any), ::Int) = error("overload InvokeXs")
+struct InvokeXs2 <: InvokeXs
+    xs::Vector{Any}
+end
+Base.getindex(xs::InvokeXs2, idx::Int) = xs.xs[idx]
+Base.setindex!(xs::InvokeXs2, @nospecialize(v::Any), idx::Int) = xs.xs[idx] = v
+
 @testset "@invoke macro" begin
     # test against `invoke` doc example
-    let
-        f(x::Real) = x^2
+    let f(x::Real) = x^2
         f(x::Integer) = 1 + @invoke f(x::Real)
         @test f(2) == 5
     end
 
-    let
-        f1(::Integer) = Integer
+    let f1(::Integer) = Integer
         f1(::Real) = Real;
         f2(x::Real) = _f2(x)
         _f2(::Integer) = Integer
@@ -912,9 +1058,8 @@ end
         @test @invoke(f2(1::Real)) === Integer
     end
 
-    # when argment's type annotation is omitted, it should be specified as `Core.Typeof(x)`
-    let
-        f(_) = Any
+    # when argument's type annotation is omitted, it should be specified as `Core.Typeof(x)`
+    let f(_) = Any
         f(x::Integer) = Integer
         @test f(1) === Integer
         @test @invoke(f(1::Any)) === Any
@@ -927,13 +1072,28 @@ end
     end
 
     # handle keyword arguments correctly
-    let
-        f(a; kw1 = nothing, kw2 = nothing) = a + max(kw1, kw2)
+    let f(a; kw1 = nothing, kw2 = nothing) = a + max(kw1, kw2)
         f(::Integer; kwargs...) = error("don't call me")
 
         @test_throws Exception f(1; kw1 = 1, kw2 = 2)
         @test 3 == @invoke f(1::Any; kw1 = 1, kw2 = 2)
     end
+
+    # additional syntax test
+    let x = InvokeX2(nothing)
+        @test_throws "overload InvokeX" @invoke (x::InvokeX).x
+        @test isnothing(@invoke x.x)
+        @test_throws "overload InvokeX" @invoke (x::InvokeX).x = 42
+        @invoke x.x = 42
+        @test 42 == x.x
+
+        xs = InvokeXs2(Any[nothing])
+        @test_throws "overload InvokeXs" @invoke (xs::InvokeXs)[1]
+        @test isnothing(@invoke xs[1])
+        @test_throws "overload InvokeXs" @invoke (xs::InvokeXs)[1] = 42
+        @invoke xs[1] = 42
+        @test 42 == xs.xs[1]
+    end
 end
 
 # Endian tests
@@ -986,19 +1146,28 @@ end
 @test_nowarn Core.eval(Main, :(import ....Main))
 
 # issue #27239
+using Base.BinaryPlatforms: HostPlatform, libc
 @testset "strftime tests issue #27239" begin
-    # change to non-Unicode Korean
+    # change to non-Unicode Korean to test that it is properly transcoded into valid UTF-8
     korloc = ["ko_KR.EUC-KR", "ko_KR.CP949", "ko_KR.949", "Korean_Korea.949"]
-    timestrs = String[]
-    withlocales(korloc) do
-        # system dependent formats
-        push!(timestrs, Libc.strftime(0.0))
-        push!(timestrs, Libc.strftime("%a %A %b %B %p %Z", 0))
+    at_least_one_locale_found = false
+    withlocales(korloc) do locale
+        at_least_one_locale_found = true
+        # Test both the default format and a custom formatting string
+        for s in (Libc.strftime(0.0), Libc.strftime("%a %A %b %B %p %Z", 0))
+            # Ensure that we always get valid UTF-8 back
+            @test isvalid(s)
+
+            # On `musl` it is impossible for `setlocale` to fail, it just falls back to
+            # the default system locale, which on our buildbots is en_US.UTF-8.  We'll
+            # assert that what we get does _not_ start with `Thu`, as that's what all
+            # en_US.UTF-8 encodings would start with.
+            # X-ref: https://musl.openwall.narkive.com/kO1vpTWJ/setlocale-behavior-with-missing-locales
+            @test !startswith(s, "Thu") broken=(libc(HostPlatform()) == "musl")
+        end
     end
-    # tests
-    isempty(timestrs) && @warn "skipping stftime tests: no locale found for testing"
-    for s in timestrs
-        @test isvalid(s)
+    if !at_least_one_locale_found
+        @warn "skipping stftime tests: no locale found for testing"
     end
 end
 
@@ -1064,6 +1233,56 @@ const outsidevar = 7
 end
 @test TestOutsideVar() == TestOutsideVar(7)
 
+@kwdef mutable struct Test_kwdef_const_atomic
+    a
+    b::Int
+    c::Int = 1
+    const d
+    const e::Int
+    const f = 1
+    const g::Int = 1
+    @atomic h::Int
+end
+
+@testset "const and @atomic fields in @kwdef" begin
+    x = Test_kwdef_const_atomic(a = 1, b = 1, d = 1, e = 1, h = 1)
+    for f in fieldnames(Test_kwdef_const_atomic)
+        @test getfield(x, f) == 1
+    end
+    @testset "const fields" begin
+        @test_throws ErrorException x.d = 2
+        @test_throws ErrorException x.e = 2
+        @test_throws MethodError x.e = "2"
+        @test_throws ErrorException x.f = 2
+        @test_throws ErrorException x.g = 2
+    end
+    @testset "atomic fields" begin
+        @test_throws ConcurrencyViolationError x.h = 1
+        @atomic x.h = 1
+        @test @atomic(x.h) == 1
+        @atomic x.h = 2
+        @test @atomic(x.h) == 2
+    end
+end
+
+@kwdef struct Test_kwdef_lineinfo
+    a::String
+end
+@testset "@kwdef constructor line info" begin
+    for method in methods(Test_kwdef_lineinfo)
+        @test method.file === Symbol(@__FILE__)
+        @test ((@__LINE__)-6) ≤ method.line ≤ ((@__LINE__)-5)
+    end
+end
+@kwdef struct Test_kwdef_lineinfo_sparam{S<:AbstractString}
+    a::S
+end
+@testset "@kwdef constructor line info with static parameter" begin
+    for method in methods(Test_kwdef_lineinfo_sparam)
+        @test method.file === Symbol(@__FILE__)
+        @test ((@__LINE__)-6) ≤ method.line ≤ ((@__LINE__)-5)
+    end
+end
 
 @testset "exports of modules" begin
     for (_, mod) in Base.loaded_modules
@@ -1134,7 +1353,7 @@ end
 end
 
 # Test that read fault on a prot-none region does not incorrectly give
-# ReadOnlyMemoryEror, but rather crashes the program
+# ReadOnlyMemoryError, but rather crashes the program
 const MAP_ANONYMOUS_PRIVATE = Sys.isbsd() ? 0x1002 : 0x22
 let script = :(
         let ptr = Ptr{Cint}(ccall(:jl_mmap, Ptr{Cvoid},
@@ -1162,4 +1381,24 @@ end
 
 @testset "Base/timing.jl" begin
     @test Base.jit_total_bytes() >= 0
+
+    # sanity check `@allocations` returns what we expect in some very simple cases
+    @test (@allocations "a") == 0
+    @test (@allocations "a" * "b") == 0 # constant propagation
+    @test (@allocations "a" * Base.inferencebarrier("b")) == 1
+end
+
+@testset "in_finalizer" begin
+    @test !GC.in_finalizer()
+
+    in_fin = Ref{Any}()
+    wait(@async begin
+        r = Ref(1)
+        finalizer(r) do _
+            in_fin[] = GC.in_finalizer()
+        end
+        nothing
+    end)
+    GC.gc(true); yield()
+    @test in_fin[]
 end
diff --git a/test/missing.jl b/test/missing.jl
index 13ed684f1fc05..36155eb32fe49 100644
--- a/test/missing.jl
+++ b/test/missing.jl
@@ -21,8 +21,8 @@ end
     @test convert(Union{Nothing, Missing}, nothing) === nothing
     @test convert(Union{Missing, Nothing, Float64}, 1) === 1.0
 
-    @test_throws MethodError convert(Missing, 1)
-    @test_throws MethodError convert(Union{Nothing, Missing}, 1)
+    @test_throws ErrorException("cannot convert a value to missing for assignment") convert(Missing, 1)
+    @test_throws ErrorException("cannot convert a value to missing for assignment") convert(Union{Nothing, Missing}, 1)
     @test_throws MethodError convert(Union{Int, Missing}, "a")
 end
 
@@ -66,6 +66,7 @@ end
     @test isequal(missing, missing)
     @test !isequal(1, missing)
     @test !isequal(missing, 1)
+    @test !isequal('c', missing)
     @test (missing < missing) === missing
     @test (missing < 1) === missing
     @test (1 < missing) === missing
@@ -79,7 +80,7 @@ end
     @test isapprox(missing, 1.0, atol=1e-6) === missing
     @test isapprox(1.0, missing, rtol=1e-6) === missing
 
-    @test !any(T -> T === Union{Missing,Bool}, Base.return_types(isequal, Tuple{Any,Any}))
+    @test all(==(Bool), Base.return_types(isequal, Tuple{Any,Any}))
 end
 
 @testset "arithmetic operators" begin
@@ -529,7 +530,7 @@ end
             @test mapreduce(cos, *, collect(skipmissing(A))) ≈ mapreduce(cos, *, skipmissing(A))
         end
 
-        # Patterns that exercize code paths for inputs with 1 or 2 non-missing values
+        # Patterns that exercise code paths for inputs with 1 or 2 non-missing values
         @test sum(skipmissing([1, missing, missing, missing])) === 1
         @test sum(skipmissing([missing, missing, missing, 1])) === 1
         @test sum(skipmissing([1, missing, missing, missing, 2])) === 3
@@ -642,4 +643,11 @@ end
     @test isequal(sort(X, alg=MergeSort, rev=true), XRP)
 end
 
-sortperm(reverse([NaN, missing, NaN, missing]))
+@test (sortperm(reverse([NaN, missing, NaN, missing])); true)
+
+# use LazyString for MissingException to get the better effects
+for func in (round, ceil, floor, trunc)
+    @testset let func = func
+        @test Core.Compiler.is_foldable(Base.infer_effects(func, (Type{Int},Union{Int,Missing})))
+    end
+end
diff --git a/test/mpfr.jl b/test/mpfr.jl
index 1a0a0041bf94e..a0dd15d97f70c 100644
--- a/test/mpfr.jl
+++ b/test/mpfr.jl
@@ -1039,3 +1039,10 @@ end
         end
     end
 end
+
+@testset "issue #50642" begin
+    setprecision(BigFloat, 500) do
+        bf = big"1.4901162082026128889687591176485489397376143775948511e-07"
+        @test Float16(bf) == Float16(2.0e-7)
+    end
+end
diff --git a/test/namedtuple.jl b/test/namedtuple.jl
index 3b571b3c7d612..20737d68db1bb 100644
--- a/test/namedtuple.jl
+++ b/test/namedtuple.jl
@@ -26,6 +26,7 @@
 @test (x=4, y=5, z=6)[[:x, :y]] == (x=4, y=5)
 @test (x=4, y=5, z=6)[[:x]] == (x=4,)
 @test (x=4, y=5, z=6)[()] == NamedTuple()
+@test (x=4, y=5, z=6)[:] == (x=4, y=5, z=6)
 @test NamedTuple()[()] == NamedTuple()
 @test_throws ErrorException (x=4, y=5, z=6).a
 @test_throws BoundsError (a=2,)[0]
@@ -75,6 +76,26 @@ let NT = NamedTuple{(:a,:b),Tuple{Int8,Int16}}, nt = (x=3,y=4)
     @test_throws MethodError convert(NT, nt)
 end
 
+@testset "convert NamedTuple" begin
+    conv1 = convert(NamedTuple{(:a,),Tuple{I}} where I, (;a=1))
+    @test conv1 === (a = 1,)
+
+    conv2 = convert(NamedTuple{(:a,),Tuple{Any}}, (;a=1))
+    @test conv2 === NamedTuple{(:a,), Tuple{Any}}((1,))
+
+    conv3 = convert(NamedTuple{(:a,),}, (;a=1))
+    @test conv3 === (a = 1,)
+
+    conv4 = convert(NamedTuple{(:a,),Tuple{I}} where I<:Unsigned, (;a=1))
+    @test conv4 === NamedTuple{(:a,), Tuple{Unsigned}}((1,))
+
+    conv5 = convert(NamedTuple, (;a=1))
+    @test conv1 === (a = 1,)
+
+    conv_res = @test_throws MethodError convert(NamedTuple{(:a,),Tuple{I}} where I<:AbstractString, (;a=1))
+    @test conv_res.value.f === convert && conv_res.value.args === (AbstractString, 1)
+end
+
 @test NamedTuple{(:a,:c)}((b=1,z=2,c=3,aa=4,a=5)) === (a=5, c=3)
 @test NamedTuple{(:a,)}(NamedTuple{(:b, :a), Tuple{Int, Union{Int,Nothing}}}((1, 2))) ===
     NamedTuple{(:a,), Tuple{Union{Int,Nothing}}}((2,))
@@ -83,6 +104,9 @@ end
 @test eltype(NamedTuple{(:x, :y),Tuple{Union{Missing, Int},Union{Missing, Float64}}}(
     (missing, missing))) === Union{Real, Missing}
 
+@test valtype((a=[1,2], b=[3,4])) === Vector{Int}
+@test keytype((a=[1,2], b=[3,4])) === Symbol
+
 @test Tuple((a=[1,2], b=[3,4])) == ([1,2], [3,4])
 @test Tuple(NamedTuple()) === ()
 @test Tuple((x=4, y=5, z=6)) == (4,5,6)
@@ -120,7 +144,7 @@ end
 let nt = merge(NamedTuple{(:a,:b),Tuple{Int32,Union{Int32,Nothing}}}((1,Int32(2))),
                NamedTuple{(:a,:c),Tuple{Union{Int8,Nothing},Float64}}((nothing,1.0)))
     @test typeof(nt) == NamedTuple{(:a,:b,:c),Tuple{Union{Int8,Nothing},Union{Int32,Nothing},Float64}}
-    @test repr(nt) == "NamedTuple{(:a, :b, :c), Tuple{Union{Nothing, Int8}, Union{Nothing, Int32}, Float64}}((nothing, 2, 1.0))"
+    @test repr(nt) == "@NamedTuple{a::Union{Nothing, Int8}, b::Union{Nothing, Int32}, c::Float64}((nothing, 2, 1.0))"
 end
 
 @test merge(NamedTuple(), [:a=>1, :b=>2, :c=>3, :a=>4, :c=>5]) == (a=4, b=2, c=5)
@@ -147,6 +171,8 @@ end
 @test Base.front((a = 1, )) ≡ NamedTuple()
 @test_throws ArgumentError Base.tail(NamedTuple())
 @test_throws ArgumentError Base.front(NamedTuple())
+@test @inferred(reverse((a=1,))) === (a=1,)
+@test @inferred(reverse((a=1, b=:c))) === (b=:c, a=1)
 
 # syntax errors
 
@@ -257,10 +283,10 @@ abstr_nt_22194_3()
 @test findall(isequal(1), (a=1, b=1)) == [:a, :b]
 @test isempty(findall(isequal(1), NamedTuple()))
 @test isempty(findall(isequal(1), (a=2, b=3)))
-@test findfirst(isequal(1), (a=1, b=2)) == :a
-@test findlast(isequal(1), (a=1, b=2)) == :a
-@test findfirst(isequal(1), (a=1, b=1)) == :a
-@test findlast(isequal(1), (a=1, b=1)) == :b
+@test findfirst(isequal(1), (a=1, b=2)) === :a
+@test findlast(isequal(1), (a=1, b=2)) === :a
+@test findfirst(isequal(1), (a=1, b=1)) === :a
+@test findlast(isequal(1), (a=1, b=1)) === :b
 @test findfirst(isequal(1), ()) === nothing
 @test findlast(isequal(1), ()) === nothing
 @test findfirst(isequal(1), (a=2, b=3)) === nothing
@@ -316,6 +342,12 @@ end
     @test_throws LoadError include_string(Main, "@NamedTuple(a::Int, b)")
 end
 
+# @Kwargs
+@testset "@Kwargs" begin
+   @test @Kwargs{a::Int,b::String}  == typeof(pairs((;a=1,b="2")))
+   @test @Kwargs{} == typeof(pairs((;)))
+end
+
 # issue #29333, implicit names
 let x = 1, y = 2
     @test (;y) === (y = 2,)
@@ -336,3 +368,61 @@ end
 
 # issue #44086
 @test NamedTuple{(:x, :y, :z), Tuple{Int8, Int16, Int32}}((z=1, x=2, y=3)) === (x = Int8(2), y = Int16(3), z = Int32(1))
+
+@testset "mapfoldl" begin
+    A1 = (;a=1, b=2, c=3, d=4)
+    A2 = (;a=-1, b=-2, c=-3, d=-4)
+    @test (((1=>2)=>3)=>4) == foldl(=>, A1) ==
+          mapfoldl(identity, =>, A1) == mapfoldl(abs, =>, A2)
+    @test mapfoldl(abs, =>, A2, init=-10) == ((((-10=>1)=>2)=>3)=>4)
+    @test mapfoldl(abs, =>, (;), init=-10) == -10
+    @test mapfoldl(abs, Pair{Any,Any}, NamedTuple(Symbol(:x,i) => i for i in 1:30)) == mapfoldl(abs, Pair{Any,Any}, [1:30;])
+    @test_throws "reducing over an empty collection" mapfoldl(abs, =>, (;))
+end
+
+# Test effect/inference for merge/diff of unknown NamedTuples
+for f in (Base.merge, Base.structdiff)
+    @testset let f = f
+        # test the effects of the fallback path
+        fallback_func(a::NamedTuple, b::NamedTuple) = @invoke f(a::NamedTuple, b::NamedTuple)
+        @testset let eff = Base.infer_effects(fallback_func)
+            @test Core.Compiler.is_foldable(eff)
+            @test eff.nonoverlayed
+        end
+        @test only(Base.return_types(fallback_func)) == NamedTuple
+        # test if `max_methods = 4` setting works as expected
+        general_func(a::NamedTuple, b::NamedTuple) = f(a, b)
+        @testset let eff = Base.infer_effects(general_func)
+            @test Core.Compiler.is_foldable(eff)
+            @test eff.nonoverlayed
+        end
+        @test only(Base.return_types(general_func)) == NamedTuple
+    end
+end
+@test Core.Compiler.is_foldable(Base.infer_effects(pairs, Tuple{NamedTuple}))
+
+# Test that merge/diff preserves nt field types
+let a = Base.NamedTuple{(:a, :b), Tuple{Any, Any}}((1, 2)), b = Base.NamedTuple{(:b,), Tuple{Float64}}(3)
+    @test typeof(Base.merge(a, b)) == Base.NamedTuple{(:a, :b), Tuple{Any, Float64}}
+    @test typeof(Base.structdiff(a, b)) == Base.NamedTuple{(:a,), Tuple{Any}}
+end
+
+function mergewith51009(combine, a::NamedTuple{an}, b::NamedTuple{bn}) where {an, bn}
+    names = Base.merge_names(an, bn)
+    NamedTuple{names}(ntuple(Val{nfields(names)}()) do i
+                          n = getfield(names, i)
+                          if Base.sym_in(n, an)
+                              if Base.sym_in(n, bn)
+                                  combine(getfield(a, n), getfield(b, n))
+                              else
+                                  getfield(a, n)
+                              end
+                          else
+                              getfield(b, n)
+                          end
+                      end)
+end
+let c = (a=1, b=2),
+    d = (b=3, c=(d=1,))
+    @test @inferred(mergewith51009((x,y)->y, c, d)) === (a = 1, b = 3, c = (d = 1,))
+end
diff --git a/test/numbers.jl b/test/numbers.jl
index ad521d7382713..a9d126aa33d5a 100644
--- a/test/numbers.jl
+++ b/test/numbers.jl
@@ -95,34 +95,68 @@ end
     @test max(1) === 1
     @test minmax(1) === (1, 1)
     @test minmax(5, 3) == (3, 5)
-    @test minmax(3., 5.) == (3., 5.)
-    @test minmax(5., 3.) == (3., 5.)
-    @test minmax(3., NaN) ≣ (NaN, NaN)
-    @test minmax(NaN, 3) ≣ (NaN, NaN)
-    @test minmax(Inf, NaN) ≣ (NaN, NaN)
-    @test minmax(NaN, Inf) ≣ (NaN, NaN)
-    @test minmax(-Inf, NaN) ≣ (NaN, NaN)
-    @test minmax(NaN, -Inf) ≣ (NaN, NaN)
-    @test minmax(NaN, NaN) ≣ (NaN, NaN)
-    @test min(-0.0,0.0) === min(0.0,-0.0)
-    @test max(-0.0,0.0) === max(0.0,-0.0)
-    @test minmax(-0.0,0.0) === minmax(0.0,-0.0)
-    @test max(-3.2, 5.1) == max(5.1, -3.2) == 5.1
-    @test min(-3.2, 5.1) == min(5.1, -3.2) == -3.2
-    @test max(-3.2, Inf) == max(Inf, -3.2) == Inf
-    @test max(-3.2, NaN) ≣ max(NaN, -3.2) ≣ NaN
-    @test min(5.1, Inf) == min(Inf, 5.1) == 5.1
-    @test min(5.1, -Inf) == min(-Inf, 5.1) == -Inf
-    @test min(5.1, NaN) ≣ min(NaN, 5.1) ≣ NaN
-    @test min(5.1, -NaN) ≣ min(-NaN, 5.1) ≣ NaN
-    @test minmax(-3.2, 5.1) == (min(-3.2, 5.1), max(-3.2, 5.1))
-    @test minmax(-3.2, Inf) == (min(-3.2, Inf), max(-3.2, Inf))
-    @test minmax(-3.2, NaN) ≣ (min(-3.2, NaN), max(-3.2, NaN))
-    @test (max(Inf,NaN), max(-Inf,NaN), max(Inf,-NaN), max(-Inf,-NaN)) ≣ (NaN,NaN,NaN,NaN)
-    @test (max(NaN,Inf), max(NaN,-Inf), max(-NaN,Inf), max(-NaN,-Inf)) ≣ (NaN,NaN,NaN,NaN)
-    @test (min(Inf,NaN), min(-Inf,NaN), min(Inf,-NaN), min(-Inf,-NaN)) ≣ (NaN,NaN,NaN,NaN)
-    @test (min(NaN,Inf), min(NaN,-Inf), min(-NaN,Inf), min(-NaN,-Inf)) ≣ (NaN,NaN,NaN,NaN)
-    @test minmax(-Inf,NaN) ≣ (min(-Inf,NaN), max(-Inf,NaN))
+    Top(T, op, x, y) = op(T.(x), T.(y))
+    Top(T, op) = (x, y) -> Top(T, op, x, y)
+    _compare(x, y) = x == y
+    for T in (Float16, Float32, Float64, BigFloat)
+        minmax = Top(T,Base.minmax)
+        min = Top(T,Base.min)
+        max = Top(T,Base.max)
+        (==) = Top(T,_compare)
+        (===) = Top(T,Base.isequal) # we only use === to compare -0.0/0.0, `isequal` should be equivalent
+        @test minmax(3., 5.) == (3., 5.)
+        @test minmax(5., 3.) == (3., 5.)
+        @test minmax(3., NaN) ≣ (NaN, NaN)
+        @test minmax(NaN, 3) ≣ (NaN, NaN)
+        @test minmax(Inf, NaN) ≣ (NaN, NaN)
+        @test minmax(NaN, Inf) ≣ (NaN, NaN)
+        @test minmax(-Inf, NaN) ≣ (NaN, NaN)
+        @test minmax(NaN, -Inf) ≣ (NaN, NaN)
+        @test minmax(NaN, NaN) ≣ (NaN, NaN)
+        @test min(-0.0,0.0) === min(0.0,-0.0)
+        @test max(-0.0,0.0) === max(0.0,-0.0)
+        @test minmax(-0.0,0.0) === minmax(0.0,-0.0)
+        @test max(-3.2, 5.1) == max(5.1, -3.2) == 5.1
+        @test min(-3.2, 5.1) == min(5.1, -3.2) == -3.2
+        @test max(-3.2, Inf) == max(Inf, -3.2) == Inf
+        @test max(-3.2, NaN) ≣ max(NaN, -3.2) ≣ NaN
+        @test min(5.1, Inf) == min(Inf, 5.1) == 5.1
+        @test min(5.1, -Inf) == min(-Inf, 5.1) == -Inf
+        @test min(5.1, NaN) ≣ min(NaN, 5.1) ≣ NaN
+        @test min(5.1, -NaN) ≣ min(-NaN, 5.1) ≣ NaN
+        @test minmax(-3.2, 5.1) == (min(-3.2, 5.1), max(-3.2, 5.1))
+        @test minmax(-3.2, Inf) == (min(-3.2, Inf), max(-3.2, Inf))
+        @test minmax(-3.2, NaN) ≣ (min(-3.2, NaN), max(-3.2, NaN))
+        @test (max(Inf,NaN), max(-Inf,NaN), max(Inf,-NaN), max(-Inf,-NaN)) ≣ (NaN,NaN,NaN,NaN)
+        @test (max(NaN,Inf), max(NaN,-Inf), max(-NaN,Inf), max(-NaN,-Inf)) ≣ (NaN,NaN,NaN,NaN)
+        @test (min(Inf,NaN), min(-Inf,NaN), min(Inf,-NaN), min(-Inf,-NaN)) ≣ (NaN,NaN,NaN,NaN)
+        @test (min(NaN,Inf), min(NaN,-Inf), min(-NaN,Inf), min(-NaN,-Inf)) ≣ (NaN,NaN,NaN,NaN)
+        @test minmax(-Inf,NaN) ≣ (min(-Inf,NaN), max(-Inf,NaN))
+    end
+end
+@testset "Base._extrema_rf for float" begin
+    for T in (Float16, Float32, Float64, BigFloat)
+        ordered = T[-Inf, -5, -0.0, 0.0, 3, Inf]
+        unorded = T[NaN, -NaN]
+        for i1 in 1:6, i2 in 1:6, j1 in 1:6, j2 in 1:6
+            x = ordered[i1], ordered[i2]
+            y = ordered[j1], ordered[j2]
+            z = ordered[min(i1,j1)], ordered[max(i2,j2)]
+            @test Base._extrema_rf(x, y) === z
+        end
+        for i in 1:2, j1 in 1:6, j2 in 1:6 # unordered test (only 1 NaN)
+            x = unorded[i] , unorded[i]
+            y = ordered[j1], ordered[j2]
+            @test Base._extrema_rf(x, y) === x
+            @test Base._extrema_rf(y, x) === x
+        end
+        for i in 1:2, j in 1:2 # unordered test (2 NaNs)
+            x = unorded[i], unorded[i]
+            y = unorded[j], unorded[j]
+            z = Base._extrema_rf(x, y)
+            @test z === x || z === y
+        end
+    end
 end
 @testset "fma" begin
     let x = Int64(7)^7
@@ -1037,6 +1071,15 @@ end
     @test Float64(10633823966279328163822077199654060033) == 1.063382396627933e37 #nextfloat(0x1p123)
     @test Float64(-10633823966279328163822077199654060032) == -1.0633823966279327e37
     @test Float64(-10633823966279328163822077199654060033) == -1.063382396627933e37
+
+    # Test lsb/msb gaps of 54 (won't fit in 64 bit mantissa)
+    @test Float64(Int128(9007199254740993)) == 9.007199254740992e15
+    @test Float64(UInt128(9007199254740993)) == 9.007199254740992e15
+    # Test 2^104-1 and 2^104 (2^104 is cutoff for which case is run in the conversion algorithm)
+    @test Float64(Int128(20282409603651670423947251286015)) == 2.028240960365167e31
+    @test Float64(Int128(20282409603651670423947251286016)) == 2.028240960365167e31
+    @test Float64(UInt128(20282409603651670423947251286015)) == 2.028240960365167e31
+    @test Float64(UInt128(20282409603651670423947251286016)) == 2.028240960365167e31
 end
 @testset "Float vs Int128 comparisons" begin
     @test Int128(1e30) == 1e30
@@ -1117,12 +1160,25 @@ end
 
     @test sqrt(2) == 1.4142135623730951
 end
+Base.@irrational i46051 4863.185427757 1548big(pi)
 @testset "Irrational printing" begin
     @test sprint(show, "text/plain", π) == "π = 3.1415926535897..."
     @test sprint(show, "text/plain", π, context=:compact => true) == "π"
     @test sprint(show, π) == "π"
+    # issue #46051
+    @test sprint(show, "text/plain", i46051) == "i46051 = 4863.185427757..."
+end
 
+@testset "Irrational round, float, ceil" begin
+    using .MathConstants
+    @test round(π) === 3.0
+    @test round(Int, ℯ) === 3
+    @test floor(ℯ) === 2.0
+    @test floor(Int, φ) === 1
+    @test ceil(γ) === 1.0
+    @test ceil(Int, catalan) === 1
 end
+
 @testset "issue #6365" begin
     for T in (Float32, Float64)
         for i = 9007199254740992:9007199254740996
@@ -1650,8 +1706,13 @@ end
         @test rem(prevfloat(1.0),1.0) == prevfloat(1.0)
         @test mod(prevfloat(1.0),1.0) == prevfloat(1.0)
     end
-    # issue #3046
-    @test mod(Int64(2),typemax(Int64)) == 2
+    @test mod(Int64(2), typemax(Int64)) == 2  # issue #3046
+    @testset "issue #45875" begin
+        @test cld(+1.1, 0.1) == div(+1.1, 0.1, RoundUp)   ==  ceil(big(+1.1)/big(0.1)) == +12.0
+        @test fld(+1.1, 0.1) == div(+1.1, 0.1, RoundDown) == floor(big(+1.1)/big(0.1)) == +11.0
+        @test cld(-1.1, 0.1) == div(-1.1, 0.1, RoundUp)   ==  ceil(big(-1.1)/big(0.1)) == -11.0
+        @test fld(-1.1, 0.1) == div(-1.1, 0.1, RoundDown) == floor(big(-1.1)/big(0.1)) == -12.0
+    end
 end
 @testset "return types" begin
     for T in (Int8,Int16,Int32,Int64,Int128, UInt8,UInt16,UInt32,UInt64,UInt128)
@@ -2169,9 +2230,7 @@ end
 @test_throws ErrorException reinterpret(Int, 0x01)
 
 @testset "issue #12832" begin
-    @test_throws ErrorException reinterpret(Float64, Complex{Int64}(1))
-    @test_throws ErrorException reinterpret(Float64, ComplexF32(1))
-    @test_throws ErrorException reinterpret(ComplexF32, Float64(1))
+    @test_throws ArgumentError reinterpret(Float64, Complex{Int64}(1))
     @test_throws ErrorException reinterpret(Int32, false)
 end
 # issue #41
@@ -2214,6 +2273,17 @@ end
 @test_throws InexactError convert(Int16, big(2)^100)
 @test_throws InexactError convert(Int, typemax(UInt))
 
+@testset "infinity to integer conversion" begin
+    for T in (
+        UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128, BigInt
+    )
+        for S in (Float16, Float32, Float64, BigFloat)
+            @test_throws InexactError convert(T, typemin(S))
+            @test_throws InexactError convert(T, typemax(S))
+        end
+    end
+end
+
 @testset "issue #9789" begin
     @test_throws InexactError convert(Int8, typemax(UInt64))
     @test_throws InexactError convert(Int16, typemax(UInt64))
@@ -2304,12 +2374,6 @@ end
     end
 end
 @testset "getindex error throwing" begin
-    #getindex(x::Number,-1) throws BoundsError
-    #getindex(x::Number,0) throws BoundsError
-    #getindex(x::Number,2) throws BoundsError
-    #getindex(x::Array,-1) throws BoundsError
-    #getindex(x::Array,0 throws BoundsError
-    #getindex(x::Array,length(x::Array)+1) throws BoundsError
     for x in [1.23, 7, ℯ, 4//5] #[FP, Int, Irrational, Rat]
         @test_throws BoundsError getindex(x,-1)
         @test_throws BoundsError getindex(x,0)
@@ -2439,17 +2503,32 @@ Base.abs(x::TestNumber) = TestNumber(abs(x.inner))
             d == 0 && continue
             fastd = Base.multiplicativeinverse(d)
             for n in numrange
+                d == -1 && n == typemin(typeof(n)) && continue
                 @test div(n,d) == div(n,fastd)
             end
         end
     end
     testmi(-1000:1000, -100:100)
-    testmi(typemax(Int)-1000:typemax(Int), -100:100)
-    testmi(typemin(Int)+1:typemin(Int)+1000, -100:100)
     @test_throws ArgumentError Base.multiplicativeinverse(0)
-    testmi(map(UInt32, 0:1000), map(UInt32, 1:100))
-    testmi(typemax(UInt32)-UInt32(1000):typemax(UInt32), map(UInt32, 1:100))
+    for T in [Int8, Int16, Int32, Int64, Int128]
+        testmi(map(T, typemin(T)+1:typemin(T)+100), map(T, -50:50))
+    end
+    for T in [UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128]
+        testmi(map(T, typemax(T)-50:typemax(T)), map(T, 1:50))
+        testmi(rand(T, 50), rand(T, 50))
+        @test_throws ArgumentError Base.multiplicativeinverse(T(0))
+    end
+
+    # Division overflow is not handled
+    T = Int8
+    fastd = Base.multiplicativeinverse(T(-1))
+    @test_throws DivideError div(typemin(T), T(-1))
+    # does not throw:
+    # @test_throws div(typemin(T), fastd)
+    # test broadcasting works.
+    @test div.(3, Base.multiplicativeinverse(3)) == 1
 end
+
 @testset "ndims/indices/size/length" begin
     @test ndims(1) == 0
     @test ndims(Integer) == 0
@@ -2550,13 +2629,12 @@ end
         @test isnan(rem(T(1), T(0), mode))
         @test isnan(rem(T(Inf), T(2), mode))
         @test isnan(rem(T(1), T(NaN), mode))
-        # FIXME: The broken case erroneously returns -Inf
-        @test rem(T(4), floatmin(T) * 2, mode) == 0 broken=(T == BigFloat && mode in (RoundUp,RoundFromZero))
+        @test rem(T(4), floatmin(T) * 2, mode) == 0
     end
     @test isequal(rem(nextfloat(typemin(T)), T(2), RoundToZero),  -0.0)
     @test isequal(rem(nextfloat(typemin(T)), T(2), RoundNearest), -0.0)
     @test isequal(rem(nextfloat(typemin(T)), T(2), RoundDown),     0.0)
-    @test isequal(rem(nextfloat(typemin(T)), T(2), RoundUp),       0.0)
+    @test isequal(rem(nextfloat(typemin(T)), T(2), RoundUp),      -0.0)
     @test isequal(rem(nextfloat(typemin(T)), T(2), RoundFromZero), 0.0)
 end
 
@@ -2633,6 +2711,37 @@ end
     @test rem2pi(T(-8), RoundNearest) ≈ -8+2pi
     @test rem2pi(T(-8), RoundDown)    ≈ -8+4pi
     @test rem2pi(T(-8), RoundUp)      ≈ -8+2pi
+    # to hit n is even and n % 4 == 2 condition
+    @test rem2pi(T(3), RoundToZero)  == 3
+    @test rem2pi(T(3), RoundNearest) == 3
+    @test rem2pi(T(3), RoundDown)    == 3
+    @test rem2pi(T(3), RoundUp)      ≈ 3 - 2π
+    @test rem2pi(T(-3), RoundToZero)  == -3
+    @test rem2pi(T(-3), RoundNearest) == -3
+    @test rem2pi(T(-3), RoundDown)    ≈ -3 + 2π
+    @test rem2pi(T(-3), RoundUp)      == -3
+    # to hit even n condition and n % 4 != 2 condition
+    @test rem2pi(T(13), RoundToZero)  ≈ 13-4π
+    @test rem2pi(T(13), RoundNearest) ≈ 13-4π
+    @test rem2pi(T(13), RoundDown)    ≈ 13-4π
+    @test rem2pi(T(13), RoundUp)      ≈ 13-6π
+    @test rem2pi(T(-13), RoundToZero)  ≈ -13+4π
+    @test rem2pi(T(-13), RoundNearest) ≈ -13+4π
+    @test rem2pi(T(-13), RoundDown)    ≈ -13+6π
+    @test rem2pi(T(-13), RoundUp)      ≈ -13+4π
+end
+
+@testset "PR #36420 $T" for T in (Float16, Float32, Float64, BigFloat)
+    nan = reinterpret(Float64, reinterpret(UInt64, NaN) | rand(UInt64))
+    for r in (RoundToZero, RoundNearest, RoundDown, RoundUp)
+        for x in (Inf, -Inf, NaN, -NaN, nan)
+            @test isnan(rem2pi(T(x), r))
+            @test rem2pi(T(x), r) isa T
+            if isnan(x) && T !== BigFloat
+                @test rem2pi(T(x), r) === T(x)
+            end
+        end
+    end
 end
 
 import Base.^
@@ -2792,6 +2901,7 @@ end
     let float_types = Set()
         allsubtypes!(Base, AbstractFloat, float_types)
         allsubtypes!(Core, AbstractFloat, float_types)
+        filter!(!isequal(Core.BFloat16), float_types)   # defined externally
         @test !isempty(float_types)
 
         for T in float_types
@@ -2816,7 +2926,7 @@ end
 @testset "constructor inferability for BigFloat" begin
     T = BigFloat
     @test_broken all(R -> R<:T, Base.return_types(T))
-    @test all(m -> m.file == Symbol("deprecated.jl"),
+    @test all(m -> m.file === Symbol("deprecated.jl"),
         collect(methods(T))[findall(R -> !(R<:T), Base.return_types(T))])
 end
 
@@ -2877,3 +2987,157 @@ end
         @test false == ceil(Bool, -0.7)
     end
 end
+
+Base.@irrational irrational_1548_pi 4863.185427757 1548big(pi)
+Base.@irrational irrational_inv_1548_pi 1/big(irrational_1548_pi)
+@testset "@irrational" begin
+    @test irrational_1548_pi ≈ 1548big(pi)
+    @test Float64(irrational_1548_pi) == 1548π
+    @test irrational_1548_pi ≈ 1548pi
+    @test irrational_1548_pi != 1548pi
+
+    @test irrational_inv_1548_pi ≈ inv(1548big(pi))
+    @test Float64(irrational_inv_1548_pi) == 1/(1548π)
+    @test irrational_inv_1548_pi ≈ inv(1548pi)
+    @test irrational_inv_1548_pi != inv(1548pi)
+end
+
+@testset "modf" begin
+    @testset "remd" begin
+        denorm_min = nextfloat(0.0)
+        minfloat = floatmin(Float64)
+        maxfloat = floatmax(Float64)
+        values = [3.0,denorm_min,-denorm_min, minfloat,
+                 -minfloat, maxfloat, -maxfloat]
+         #  rem (0, y) == 0 for y != 0.
+        for val in values
+            @test isequal(rem(0.0, val), 0.0)
+        end
+        #  rem (-0, y) == -0 for y != 0.
+        for val in values
+            @test isequal(rem(-0.0, val), -0.0)
+        end
+        #  rem (+Inf, y) == NaN
+        values2 = [3.0,-1.1,0.0,-0.0,denorm_min,minfloat,
+                   maxfloat,Inf,-Inf]
+        for val in values2
+            @test isequal(rem(Inf, val), NaN)
+        end
+        #  rem (-Inf, y) == NaN
+        for val in values2
+            @test isequal(rem(-Inf, val), NaN)
+        end
+        #  rem (x, +0) == NaN
+        values3 = values2[begin:end-2]
+        for val in values3
+            @test isequal(rem(val, 0.0), NaN)
+        end
+        #  rem (x, -0) == NaN
+        for val in values3
+            @test isequal(rem(val, -0.0), NaN)
+        end
+        #  rem (x, +Inf) == x for x not infinite.
+        @test isequal(rem(0.0, Inf), 0.0)
+        @test isequal(rem(-0.0, Inf), -0.0)
+        @test isequal(rem(denorm_min, Inf), denorm_min)
+        @test isequal(rem(minfloat, Inf), minfloat)
+        @test isequal(rem(maxfloat, Inf), maxfloat)
+        @test isequal(rem(3.0, Inf), 3.0)
+        #  rem (x, -Inf) == x for x not infinite.
+        @test isequal(rem(0.0, -Inf), 0.0)
+        @test isequal(rem(-0.0, -Inf), -0.0)
+        @test isequal(rem(denorm_min, -Inf), denorm_min)
+        @test isequal(rem(minfloat, -Inf), minfloat)
+        @test isequal(rem(maxfloat, -Inf), maxfloat)
+        @test isequal(rem(3.0, -Inf), 3.0)
+        #NaN tests
+        @test isequal(rem(0.0, NaN), NaN)
+        @test isequal(rem(1.0, NaN), NaN)
+        @test isequal(rem(Inf, NaN), NaN)
+        @test isequal(rem(NaN, 0.0), NaN)
+        @test isequal(rem(NaN, 1.0), NaN)
+        @test isequal(rem(NaN, Inf), NaN)
+        @test isequal(rem(NaN, NaN), NaN)
+        #Sign tests
+        @test isequal(rem(6.5, 2.25), 2.0)
+        @test isequal(rem(-6.5, 2.25), -2.0)
+        @test isequal(rem(6.5, -2.25), 2.0)
+        @test isequal(rem(-6.5, -2.25), -2.0)
+        values4 = [maxfloat,-maxfloat,minfloat,-minfloat,
+                  denorm_min, -denorm_min]
+        for val in values4
+            @test isequal(rem(maxfloat,val), 0.0)
+        end
+        for val in values4
+            @test isequal(rem(-maxfloat,val), -0.0)
+        end
+        @test isequal(rem(minfloat, maxfloat), minfloat)
+        @test isequal(rem(minfloat, -maxfloat), minfloat)
+        values5 = values4[begin+2:end]
+        for val in values5
+            @test isequal(rem(minfloat,val), 0.0)
+        end
+        @test isequal(rem(-minfloat, maxfloat), -minfloat)
+        @test isequal(rem(-minfloat, -maxfloat), -minfloat)
+        for val in values5
+            @test isequal(rem(-minfloat,val), -0.0)
+        end
+        values6 = values4[begin:end-2]
+        for val in values6
+            @test isequal(rem(denorm_min,val), denorm_min)
+        end
+        @test isequal(rem(denorm_min, denorm_min), 0.0)
+        @test isequal(rem(denorm_min, -denorm_min), 0.0)
+        for val in values6
+            @test isequal(rem(-denorm_min,val), -denorm_min)
+        end
+        @test isequal(rem(-denorm_min, denorm_min), -0.0)
+        @test isequal(rem(-denorm_min, -denorm_min), -0.0)
+        #Max value tests
+        values7 = [0x3p-1074,-0x3p-1074,0x3p-1073,-0x3p-1073]
+        for val in values7
+            @test isequal(rem(0x1p1023,val),  0x1p-1073)
+        end
+        @test isequal(rem(0x1p1023, 0x3p-1022), 0x1p-1021)
+        @test isequal(rem(0x1p1023, -0x3p-1022), 0x1p-1021)
+        for val in values7
+            @test isequal(rem(-0x1p1023,val),  -0x1p-1073)
+        end
+        @test isequal(rem(-0x1p1023, 0x3p-1022), -0x1p-1021)
+        @test isequal(rem(-0x1p1023, -0x3p-1022), -0x1p-1021)
+
+    end
+
+    @testset "remf" begin
+        @test isequal(rem(Float32(0x1p127), Float32(0x3p-149)), Float32(0x1p-149))
+        @test isequal(rem(Float32(0x1p127), -Float32(0x3p-149)), Float32(0x1p-149))
+        @test isequal(rem(Float32(0x1p127), Float32(0x3p-148)), Float32(0x1p-147))
+        @test isequal(rem(Float32(0x1p127), -Float32(0x3p-148)), Float32(0x1p-147))
+        @test isequal(rem(Float32(0x1p127), Float32(0x3p-126)), Float32(0x1p-125))
+        @test isequal(rem(Float32(0x1p127), -Float32(0x3p-126)), Float32(0x1p-125))
+        @test isequal(rem(-Float32(0x1p127), Float32(0x3p-149)), -Float32(0x1p-149))
+        @test isequal(rem(-Float32(0x1p127), -Float32(0x3p-149)), -Float32(0x1p-149))
+        @test isequal(rem(-Float32(0x1p127), Float32(0x3p-148)), -Float32(0x1p-147))
+        @test isequal(rem(-Float32(0x1p127), -Float32(0x3p-148)), -Float32(0x1p-147))
+        @test isequal(rem(-Float32(0x1p127), Float32(0x3p-126)), -Float32(0x1p-125))
+        @test isequal(rem(-Float32(0x1p127), -Float32(0x3p-126)), -Float32(0x1p-125))
+    end
+
+end
+
+@testset "FP(inf) == inf" begin
+    # Iterate through all pairs of FP types
+    fp_types = (Float16, Float32, Float64, BigFloat)
+    for F ∈ fp_types, G ∈ fp_types, f ∈ (typemin, typemax)
+        i = f(F)
+        @test i == G(i)
+    end
+end
+
+@testset "small int FP conversion" begin
+    fp_types = (Float16, Float32, Float64, BigFloat)
+    m = Int(maxintfloat(Float16))
+    for F ∈ fp_types, G ∈ fp_types, n ∈ (-m):m
+        @test n == G(F(n)) == F(G(n))
+    end
+end
diff --git a/test/offsetarray.jl b/test/offsetarray.jl
index d656a0e8ae34b..e2924ac0a8ca4 100644
--- a/test/offsetarray.jl
+++ b/test/offsetarray.jl
@@ -403,7 +403,8 @@ v2 = copy(v)
 v = OffsetArray(v0, (-3,))
 @test lastindex(v) == 1
 @test v ≈ v
-@test axes(v') === (Base.OneTo(1), OffsetArrays.IdOffsetRange(Base.OneTo(4), -3))
+@test (@inferred axes(v')[1]) === OffsetArrays.IdOffsetRange(Base.OneTo(1))
+@test (@inferred axes(v')[2]) === OffsetArrays.IdOffsetRange(Base.OneTo(4), -3)
 @test parent(v) == collect(v)
 rv = reverse(v)
 @test axes(rv) == axes(v)
@@ -414,6 +415,23 @@ rv = reverse(v)
 cv = copy(v)
 @test reverse!(cv) == rv
 
+@testset "reverse! (issue #45870)" begin
+    @testset for n in [4,5]
+        offset = typemax(Int)-n
+        vo = OffsetArray([1:n;], offset)
+        vo2 = OffsetArray([1:n;], offset)
+        @test reverse!(vo) == OffsetArray(n:-1:1, offset)
+        @test reverse!(vo) == vo2
+        @test_throws BoundsError reverse!(vo, firstindex(vo)-1, firstindex(vo))
+        @test reverse!(vo, firstindex(vo), firstindex(vo)-1) == vo2
+        @test reverse!(vo, firstindex(vo), firstindex(vo)) == vo2
+        @test reverse!(vo, lastindex(vo), lastindex(vo)) == vo2
+        @test reverse!(vo, lastindex(vo), lastindex(vo)+1) == vo2 # overflow in stop
+        @test reverse!(vo, firstindex(vo)+1) == OffsetArray([1;n:-1:2], offset)
+        @test reverse!(vo2, firstindex(vo)+1, lastindex(vo)-1) == OffsetArray([1;n-1:-1:2;n], offset)
+    end
+end
+
 A = OffsetArray(rand(4,4), (-3,5))
 @test lastindex(A) == 16
 @test lastindex(A, 1) == 1
@@ -610,15 +628,15 @@ end
     B = OffsetArray(reshape(1:24, 4, 3, 2), -5, 6, -7)
     for R in (fill(0, -4:-1), fill(0, -4:-1, 7:7), fill(0, -4:-1, 7:7, -6:-6))
         @test @inferred(maximum!(R, B)) == reshape(maximum(B, dims=(2,3)), axes(R)) == reshape(21:24, axes(R))
-        @test @allocated(maximum!(R, B)) <= 800
+        @test @allocated(maximum!(R, B)) <= 400
         @test @inferred(minimum!(R, B)) == reshape(minimum(B, dims=(2,3)), axes(R)) == reshape(1:4, axes(R))
-        @test @allocated(minimum!(R, B)) <= 800
+        @test @allocated(minimum!(R, B)) <= 400
     end
     for R in (fill(0, -4:-4, 7:9), fill(0, -4:-4, 7:9, -6:-6))
         @test @inferred(maximum!(R, B)) == reshape(maximum(B, dims=(1,3)), axes(R)) == reshape(16:4:24, axes(R))
-        @test @allocated(maximum!(R, B)) <= 800
+        @test @allocated(maximum!(R, B)) <= 400
         @test @inferred(minimum!(R, B)) == reshape(minimum(B, dims=(1,3)), axes(R)) == reshape(1:4:9, axes(R))
-        @test @allocated(minimum!(R, B)) <= 800
+        @test @allocated(minimum!(R, B)) <= 400
     end
     @test_throws DimensionMismatch maximum!(fill(0, -4:-1, 7:7, -6:-6, 1:1), B)
     @test_throws DimensionMismatch minimum!(fill(0, -4:-1, 7:7, -6:-6, 1:1), B)
@@ -641,6 +659,14 @@ end
     @test last(v, 100) == v0
     @test last(v, 100) !== v
     @test last(v, 1) == [v[end]]
+
+    @testset "overflow (issue #45842)" begin
+        a = [2,3,4]
+        b = OffsetArray(a, 2:4)
+        @test first(a, typemax(Int)) == first(b, typemax(Int))
+        b = OffsetArray(a, typemin(Int))
+        @test last(a, 100) == last(b, 100)
+    end
 end
 
 @testset "Resizing OffsetVectors" begin
@@ -793,6 +819,22 @@ end
     @test reshape(a, (:,)) === a
 end
 
+@testset "stack" begin
+    nought = OffsetArray([0, 0.1, 0.01], 0:2)
+    ten = OffsetArray([1,10,100,1000], 10:13)
+
+    @test stack(ten) == ten
+    @test stack(ten .+ nought') == ten .+ nought'
+    @test stack(x^2 for x in ten) == ten.^2
+
+    @test axes(stack(nought for _ in ten)) == (0:2, 10:13)
+    @test axes(stack([nought for _ in ten])) == (0:2, 10:13)
+    @test axes(stack(nought for _ in ten; dims=1)) == (10:13, 0:2)
+    @test axes(stack((x, x^2) for x in nought)) == (1:2, 0:2)
+    @test axes(stack(x -> x[end-1:end], ten for _ in nought, _ in nought)) == (1:2, 0:2, 0:2)
+    @test axes(stack([ten[end-1:end] for _ in nought, _ in nought])) == (1:2, 0:2, 0:2)
+end
+
 @testset "issue #41630: replace_ref_begin_end!/@view on offset-like arrays" begin
     x = OffsetArray([1 2; 3 4], -10:-9, 9:10)  # 2×2 OffsetArray{...} with indices -10:-9×9:10
 
diff --git a/test/opaque_closure.jl b/test/opaque_closure.jl
index 7fe53812c3a92..856253ecd5a8d 100644
--- a/test/opaque_closure.jl
+++ b/test/opaque_closure.jl
@@ -1,8 +1,10 @@
 using Test
 using InteractiveUtils
 using Core: OpaqueClosure
+using Base.Experimental: @opaque
 
 const_int() = 1
+const_int_barrier() = Base.inferencebarrier(1)::typeof(1)
 
 const lno = LineNumberNode(1, :none)
 
@@ -12,7 +14,7 @@ let ci = @code_lowered const_int()
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci)))
     end
 end
-@test isa(oc_trivial(), Core.OpaqueClosure{Tuple{}, Any})
+@test isa(oc_trivial(), OpaqueClosure{Tuple{}, Any})
 @test oc_trivial()() == 1
 
 let ci = @code_lowered const_int()
@@ -21,7 +23,7 @@ let ci = @code_lowered const_int()
             Expr(:opaque_closure_method, nothing, 0, false, lno, ci)))
     end
 end
-@test isa(oc_simple_inf(), Core.OpaqueClosure{Tuple{}, Int})
+@test isa(oc_simple_inf(), OpaqueClosure{Tuple{}, Int})
 @test oc_simple_inf()() == 1
 
 struct OcClos2Int
@@ -72,8 +74,8 @@ let ci = @code_lowered OcClos1Any(1)()
             :x))
     end
 end
-@test isa(oc_infer_pass_clos(1), Core.OpaqueClosure{Tuple{}, typeof(1)})
-@test isa(oc_infer_pass_clos("a"), Core.OpaqueClosure{Tuple{}, typeof("a")})
+@test isa(oc_infer_pass_clos(1), OpaqueClosure{Tuple{}, typeof(1)})
+@test isa(oc_infer_pass_clos("a"), OpaqueClosure{Tuple{}, typeof("a")})
 @test oc_infer_pass_clos(1)() == 1
 @test oc_infer_pass_clos("a")() == "a"
 
@@ -115,8 +117,6 @@ let A = [1 2]
     end
 end
 
-using Base.Experimental: @opaque
-
 @test @opaque(x->2x)(8) == 16
 let f = @opaque (x::Int, y::Float64)->(2x, 3y)
     @test_throws TypeError f(1, 1)
@@ -128,18 +128,26 @@ end
 @test uses_frontend_opaque(10)(8) == 18
 
 # World age mechanism
+module test_world_age
+
+using Test
+using Core: OpaqueClosure
+using Base.Experimental: @opaque
+
 function test_oc_world_age end
 mk_oc_world_age() = @opaque ()->test_oc_world_age()
 g_world_age = @opaque ()->test_oc_world_age()
 h_world_age = mk_oc_world_age()
-@test isa(h_world_age, Core.OpaqueClosure{Tuple{}, Union{}})
+@test isa(h_world_age, OpaqueClosure{Tuple{}, Union{}})
 test_oc_world_age() = 1
 @test_throws MethodError g_world_age()
 @test_throws MethodError h_world_age()
 @test mk_oc_world_age()() == 1
 g_world_age = @opaque ()->test_oc_world_age()
 @test g_world_age() == 1
-@test isa(mk_oc_world_age(), Core.OpaqueClosure{Tuple{}, Int})
+@test isa(mk_oc_world_age(), OpaqueClosure{Tuple{}, Int})
+
+end # module test_world_age
 
 function maybe_vararg(isva::Bool)
     T = isva ? Vararg{Int} : Int
@@ -170,33 +178,28 @@ mk_va_opaque() = @opaque (x...)->x
 @test mk_va_opaque()(1,2) == (1,2)
 
 # OpaqueClosure show method
-@test repr(@opaque x->1) == "(::Any)::Any->◌"
+@test repr(@opaque x->Base.inferencebarrier(1)) == "(::Any)::Any->◌"
 
 # Opaque closure in CodeInfo returned from generated functions
-function mk_ocg(args...)
-    ci = @code_lowered const_int()
-    cig = Meta.lower(@__MODULE__, Expr(:new_opaque_closure, Tuple{}, Any, Any,
-        Expr(:opaque_closure_method, nothing, 0, false, lno, ci))).args[1]
-    cig.slotnames = Symbol[Symbol("#self#")]
-    cig.slottypes = Any[Any]
-    cig.slotflags = UInt8[0x00]
-    cig
+let ci = @code_lowered const_int()
+    global function mk_ocg(world::UInt, source, args...)
+        @nospecialize
+        cig = Meta.lower(@__MODULE__, Expr(:new_opaque_closure, Tuple{}, Any, Any,
+            Expr(:opaque_closure_method, nothing, 0, false, lno, ci))).args[1]
+        cig.slotnames = Symbol[Symbol("#self#")]
+        cig.slottypes = Any[Any]
+        cig.slotflags = UInt8[0x00]
+        @assert cig.min_world == UInt(1)
+        @assert cig.max_world == typemax(UInt)
+        return cig
+    end
 end
 
 @eval function oc_trivial_generated()
     $(Expr(:meta, :generated_only))
-    $(Expr(:meta,
-            :generated,
-            Expr(:new,
-                Core.GeneratedFunctionStub,
-                :mk_ocg,
-                Any[:oc_trivial_generated],
-                Any[],
-                @__LINE__,
-                QuoteNode(Symbol(@__FILE__)),
-                true)))
-end
-@test isa(oc_trivial_generated(), Core.OpaqueClosure{Tuple{}, Any})
+    $(Expr(:meta, :generated, mk_ocg))
+end
+@test isa(oc_trivial_generated(), OpaqueClosure{Tuple{}, Any})
 @test oc_trivial_generated()() == 1
 
 # Constprop through varargs OpaqueClosure
@@ -242,25 +245,109 @@ let oc = @opaque a->sin(a)
 end
 
 # constructing an opaque closure from IRCode
-let ci = code_typed(+, (Int, Int))[1][1]
-    ir = Core.Compiler.inflate_ir(ci)
-    @test OpaqueClosure(ir; nargs=2, isva=false)(40, 2) == 42
-    @test OpaqueClosure(ci)(40, 2) == 42
+let src = first(only(code_typed(+, (Int, Int))))
+    ir = Core.Compiler.inflate_ir(src)
+    @test OpaqueClosure(src)(40, 2) == 42
+    oc = OpaqueClosure(ir)
+    @test oc(40, 2) == 42
+    @test isa(oc, OpaqueClosure{Tuple{Int,Int}, Int})
+    @test_throws TypeError oc("40", 2)
+    @test OpaqueClosure(ir)(40, 2) == 42 # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
+end
+let ir = first(only(Base.code_ircode(sin, (Int,))))
+    @test OpaqueClosure(ir)(42) == sin(42)
+    @test OpaqueClosure(ir)(42) == sin(42) # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
+    ir = first(only(Base.code_ircode(sin, (Float64,))))
+    @test OpaqueClosure(ir)(42.) == sin(42.)
+    @test OpaqueClosure(ir)(42.) == sin(42.) # the `OpaqueClosure(::IRCode)` constructor should be non-destructive
+end
+
+# variadic arguments
+let src = code_typed((Int,Int)) do x, y...
+        return (x, y)
+    end |> only |> first
+    let oc = OpaqueClosure(src)
+        @test oc(1,2) === (1,(2,))
+        @test_throws MethodError oc(1,2,3)
+    end
+    ir = Core.Compiler.inflate_ir(src)
+    let oc = OpaqueClosure(ir; isva=true)
+        @test oc(1,2) === (1,(2,))
+        @test_throws MethodError oc(1,2,3)
+    end
+end
 
-    ir = Core.Compiler.inflate_ir(ci, Any[], Any[Tuple{}, Int, Int])
-    @test OpaqueClosure(ir; nargs=2, isva=false)(40, 2) == 42
-    @test isa(OpaqueClosure(ir; nargs=2, isva=false), Core.OpaqueClosure{Tuple{Int, Int}, Int})
-    @test_throws TypeError OpaqueClosure(ir; nargs=2, isva=false)(40.0, 2)
+# Check for correct handling in case of broken return type.
+eval_oc_dyn(oc) = Base.inferencebarrier(oc)()
+eval_oc_spec(oc) = oc()
+for f in (const_int, const_int_barrier)
+    ci = code_lowered(f, Tuple{})[1]
+    for compiled in (true, false)
+        oc_expr = Expr(:new_opaque_closure, Tuple{}, Union{}, Float64,
+            Expr(:opaque_closure_method, nothing, 0, false, lno, ci))
+        oc_mismatch = let ci = code_lowered(f, Tuple{})[1]
+            if compiled
+                eval(:((()->$oc_expr)()))
+            else
+                eval(oc_expr)
+            end
+        end
+        @test isa(oc_mismatch, OpaqueClosure{Tuple{}, Union{}})
+        @test_throws TypeError eval_oc_dyn(oc_mismatch)
+        @test_throws TypeError eval_oc_spec(oc_mismatch)
+    end
 end
 
-let ci = code_typed((x, y...)->(x, y), (Int, Int))[1][1]
-    ir = Core.Compiler.inflate_ir(ci)
-    @test OpaqueClosure(ir; nargs=2, isva=true)(40, 2) === (40, (2,))
-    @test OpaqueClosure(ci)(40, 2) === (40, (2,))
+
+# Attempting to construct an opaque closure backtrace after the oc is GC'ed
+f_oc_throws() = error("oops")
+@noinline function make_oc_and_collect_bt()
+    did_gc = Ref{Bool}(false)
+    bt = let ir = first(only(Base.code_ircode(f_oc_throws, ())))
+        sentinel = Ref{Any}(nothing)
+        oc = OpaqueClosure(ir, sentinel)
+        finalizer(sentinel) do x
+            did_gc[] = true
+        end
+        try
+            oc()
+            @test false
+        catch e
+            bt = catch_backtrace()
+            @test isa(e, ErrorException)
+            bt
+        end
+    end
+    return bt, did_gc
+end
+let (bt, did_gc) = make_oc_and_collect_bt()
+    GC.gc(true); GC.gc(true); GC.gc(true);
+    @test did_gc[]
+    @test any(stacktrace(bt)) do frame
+        isa(frame.linfo, Core.MethodInstance) || return false
+        isa(frame.linfo.def, Method) || return false
+        return frame.linfo.def.is_for_opaque_closure
+    end
 end
 
-let ci = code_typed((x, y...)->(x, y), (Int, Int))[1][1]
-    ir = Core.Compiler.inflate_ir(ci)
-    @test_throws MethodError OpaqueClosure(ir; nargs=2, isva=true)(1, 2, 3)
-    @test_throws MethodError OpaqueClosure(ci)(1, 2, 3)
+# Opaque closure with mismatch struct argtype
+const op_arg_restrict2 = @opaque (x::Tuple{Int64}, y::Base.RefValue{Int64})->x+y
+ccall_op_arg_restrict2_bad_args() = op_arg_restrict2((1.,), 2)
+
+@test_throws TypeError ccall_op_arg_restrict2_bad_args()
+
+# code_llvm for opaque closures
+let ir = Base.code_ircode((Int,Int)) do x, y
+        @noinline x * y
+    end |> only |> first
+    oc = Core.OpaqueClosure(ir)
+    io = IOBuffer()
+    code_llvm(io, oc, Tuple{Int,Int})
+    @test occursin("j_*_", String(take!(io)))
+    code_llvm(io, oc, (Int,Int))
+    @test occursin("j_*_", String(take!(io)))
 end
+
+foopaque() = Base.Experimental.@opaque(@noinline x::Int->println(x))(1)
+
+code_llvm(devnull,foopaque,()) #shouldn't crash
diff --git a/test/operators.jl b/test/operators.jl
index a1e27d0e1cd7b..95006235692a0 100644
--- a/test/operators.jl
+++ b/test/operators.jl
@@ -2,6 +2,8 @@
 
 using Random: randstring
 
+include("compiler/irutils.jl")
+
 @testset "ifelse" begin
     @test ifelse(true, 1, 2) == 1
     @test ifelse(false, 1, 2) == 2
@@ -44,11 +46,11 @@ end
 
     p = 1=>:foo
     @test first(p) == 1
-    @test last(p)  == :foo
-    @test first(reverse(p)) == :foo
+    @test last(p)  === :foo
+    @test first(reverse(p)) === :foo
     @test last(reverse(p))  == 1
     @test lastindex(p) == 2
-    @test p[lastindex(p)] == p[end] == p[2] == :foo
+    @test p[lastindex(p)] == p[end] == p[2] === :foo
 end
 
 # Infix `isa`
@@ -91,6 +93,23 @@ end
 
 @test isless('a','b')
 
+@testset "isless on pairs of integers (because there is a fastpath)" begin
+    @test isless((1,2), (1,3))
+    @test isless((0,-2), (0,2))
+    @test isless((-1,2), (1,2))
+    @test isless((-1,-2), (1,2))
+    @test !isless((1,3), (1,2))
+    @test !isless((0,2), (0,-2))
+    @test !isless((1,2), (-1,2))
+    @test !isless((1,2), (-1,-2))
+    @test !isless((-1,-2), (-1,-2))
+
+    @test isless((typemin(Int), typemin(Int)), (0,0))
+    @test isless((1, 1), (Int8(2), Int8(2)))
+    @test !isless((UInt8(200),Int8(-1)), (UInt8(200),Int8(-1)))
+    @test isless((1, 1), (1, unsigned(2)))
+end
+
 @testset "isgreater" begin
     # isgreater should be compatible with min.
     min1(a, b) = Base.isgreater(a, b) ? b : a
@@ -135,6 +154,13 @@ Base.convert(::Type{T19714}, ::Int) = T19714()
 Base.promote_rule(::Type{T19714}, ::Type{Int}) = T19714
 @test T19714()/1 === 1/T19714() === T19714()
 
+@testset "operators with zero argument" begin
+    @test_throws(MethodError, +())
+    @test_throws(MethodError, *())
+    @test isempty(methods(+, ()))
+    @test isempty(methods(*, ()))
+end
+
 # pr #17155 and #33568
 @testset "function composition" begin
     @test (uppercase∘(x->string(x,base=16)))(239487) == "3A77F"
@@ -169,12 +195,26 @@ Base.promote_rule(::Type{T19714}, ::Type{Int}) = T19714
     @test repr(uppercase ∘ first) == "uppercase ∘ first"
     @test sprint(show, "text/plain", uppercase ∘ first) == "uppercase ∘ first"
 
-    # test keyword ags in composition
+    # test keyword args in composition
     function kwf(a;b,c); a + b + c; end
     @test (abs2 ∘ kwf)(1,b=2,c=3) == 36
 
 end
 
+@testset "Nested ComposedFunction's stability" begin
+    f(x) = (1, 1, x...)
+    g = (f ∘ (f ∘ f)) ∘ (f ∘ f ∘ f)
+    @test (@inferred (g∘g)(1)) == ntuple(Returns(1), 25)
+    @test (@inferred g(1)) == ntuple(Returns(1), 13)
+    h = (-) ∘ (-) ∘ (-) ∘ (-) ∘ (-) ∘ (-) ∘ sum
+    @test (@inferred h((1, 2, 3); init = 0.0)) == 6.0
+    issue_45877 = reduce(∘, fill(sin, 50))
+    @test Core.Compiler.is_foldable(Base.infer_effects(Base.unwrap_composed, (typeof(issue_45877),)))
+    @test fully_eliminated() do
+        issue_45877(1.0)
+    end
+end
+
 @testset "function negation" begin
     str = randstring(20)
     @test filter(!isuppercase, str) == replace(str, r"[A-Z]" => "")
@@ -258,6 +298,9 @@ end
     end
 
     @test fldmod1(4.0, 3) == fldmod1(4, 3)
+
+    # issue 28973
+    @test fld1(0.4, 0.9) == fld1(nextfloat(0.4), 0.9) == 1.0
 end
 
 @testset "Fix12" begin
@@ -308,4 +351,36 @@ end
     val = [1,2,3]
     @test Returns(val)(1) === val
     @test sprint(show, Returns(1.0)) == "Returns{Float64}(1.0)"
+
+    illtype = Vector{Core.TypeVar(:T)}
+    @test Returns(illtype) == Returns{DataType}(illtype)
+end
+
+@testset "<= (issue #46327)" begin
+    struct A46327 <: Real end
+    Base.:(==)(::A46327, ::A46327) = false
+    Base.:(<)(::A46327, ::A46327) = false
+    @test !(A46327() <= A46327())
+    struct B46327 <: Real end
+    Base.:(==)(::B46327, ::B46327) = true
+    Base.:(<)(::B46327, ::B46327) = false
+    @test B46327() <= B46327()
+end
+
+@testset "concrete eval `x in itr::Tuple`" begin
+    @test Core.Compiler.is_foldable(Base.infer_effects(in, (Int,Tuple{Int,Int,Int})))
+    @test Core.Compiler.is_foldable(Base.infer_effects(in, (Char,Tuple{Char,Char,Char})))
+    for i = (1,2,3)
+        @testset let i = i
+            @test @eval Base.return_types() do
+                Val($i in (1,2,3))
+            end |> only == Val{true}
+        end
+    end
+    @test Base.return_types() do
+        Val(4 in (1,2,3))
+    end |> only == Val{false}
+    @test Base.return_types() do
+        Val('1' in ('1','2','3'))
+    end |> only == Val{true}
 end
diff --git a/test/ordering.jl b/test/ordering.jl
index 547d8d8dd0e8b..972d48c17b1af 100644
--- a/test/ordering.jl
+++ b/test/ordering.jl
@@ -2,21 +2,24 @@
 
 using Test
 
-import Base.Order: Forward, Reverse
+import Base.Order: Forward, Reverse, ord, Lt, By, ReverseOrdering
 
 # every argument can flip the integer order by passing the right value. Here,
 # we enumerate a few of these combinations and check that all these flips
 # compound so that in total we either have an increasing or decreasing sort.
 for (s1, rev) in enumerate([true, false])
-    for (s2, lt) in enumerate([>, <, (a, b) -> a - b > 0, (a, b) -> a - b < 0])
+    for (s2, lt) in enumerate([(a, b)->isless(b, a), isless, >, <, (a, b) -> a - b > 0, (a, b) -> a - b < 0])
         for (s3, by) in enumerate([-, +])
             for (s4, order) in enumerate([Reverse, Forward])
-                if iseven(s1 + s2 + s3 + s4)
-                    target = [1, 2, 3]
-                else
-                    target = [3, 2, 1]
-                end
+                is_fwd = iseven(s1 + s2 + s3 + s4)
+                target = is_fwd ? (1:3) : (3:-1:1)
+                # arrays, integer and float ranges sometimes have different code paths
                 @test target == sort([2, 3, 1], rev=rev, lt=lt, by=by, order=order)
+
+                @test target == sort(1:3, rev=rev, lt=lt, by=by, order=order)
+                @test target == sort(3:-1:1, rev=rev, lt=lt, by=by, order=order)
+                @test float(target) == sort(1.0:3, rev=rev, lt=lt, by=by, order=order)
+                @test float(target) == sort(3.0:-1:1, rev=rev, lt=lt, by=by, order=order)
             end
         end
     end
@@ -40,3 +43,11 @@ struct SomeOtherOrder <: Base.Order.Ordering end
 
 @test reverse(Forward) === Reverse
 @test reverse(Reverse) === Forward
+
+@test ord(isless, identity, false, Forward) === Forward
+@test ord(isless, identity, true, Forward) === Reverse
+@test ord(<, identity, false, Forward) === Lt(<)
+@test ord(isless, abs, false, Forward) === By(abs)
+@test ord(<, abs, false, Forward) === By(abs, Lt(<))
+@test ord(<, abs, true, Forward) === ReverseOrdering(By(abs, Lt(<)))
+@test ord(<, abs, true, Reverse) === By(abs, Lt(<))
diff --git a/test/osutils.jl b/test/osutils.jl
index 36f2878017129..5e72675279cbc 100644
--- a/test/osutils.jl
+++ b/test/osutils.jl
@@ -51,7 +51,7 @@ end
 if Sys.iswindows()
     @testset "path variables use correct path delimiters on windows" begin
         for path in (Base.SYSCONFDIR, Base.DATAROOTDIR, Base.DOCDIR,
-                     Base.LIBDIR, Base.PRIVATE_LIBDIR, Base.INCLUDEDIR, Base.LIBEXECDIR)
+                     Base.LIBDIR, Base.PRIVATE_LIBDIR, Base.INCLUDEDIR, Base.LIBEXECDIR, Base.PRIVATE_LIBEXECDIR)
             @test !occursin("/", path)
             @test !occursin("\\\\", path)
         end
diff --git a/test/parse.jl b/test/parse.jl
index ae07936b3a18e..e2b94a45cc446 100644
--- a/test/parse.jl
+++ b/test/parse.jl
@@ -41,6 +41,16 @@ Base.iterate(::Issue29451String, i::Integer=1) = i == 1 ? ('0', 2) : nothing
 @test Issue29451String() == "0"
 @test parse(Int, Issue29451String()) == 0
 
+# https://github.com/JuliaStrings/InlineStrings.jl/issues/57
+struct InlineStringIssue57 <: AbstractString end
+Base.ncodeunits(::InlineStringIssue57) = 4
+Base.lastindex(::InlineStringIssue57) = 4
+Base.isvalid(::InlineStringIssue57, i::Integer) = 0 < i < 5
+Base.iterate(::InlineStringIssue57, i::Integer=1) = i == 1 ? ('t', 2) : i == 2 ? ('r', 3) : i == 3 ? ('u', 4) : i == 4 ? ('e', 5) : nothing
+Base.:(==)(::SubString{InlineStringIssue57}, x::String) = x == "true"
+
+@test parse(Bool, InlineStringIssue57())
+
 @testset "Issue 20587, T=$T" for T in Any[BigInt, Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8]
     T === BigInt && continue # TODO: make BigInt pass this test
     for s in ["", " ", "  "]
@@ -286,6 +296,8 @@ end
         @test_throws ArgumentError parse(Complex{T}, bad)
     end
     @test_throws ArgumentError parse(Complex{Int}, "3 + 4.2im")
+    @test_throws ArgumentError parse(ComplexF64, "3 β+ 4im")
+    @test_throws ArgumentError parse(ComplexF64, "3 + 4αm")
 end
 
 @testset "parse and tryparse type inference" begin
@@ -300,7 +312,7 @@ end
     @test eltype([tryparse(Complex{Int}, s) for s in String[]]) == Union{Nothing, Complex{Int}}
 end
 
-@testset "isssue #29980" begin
+@testset "issue #29980" begin
     @test parse(Bool, "1") === true
     @test parse(Bool, "01") === true
     @test parse(Bool, "0") === false
diff --git a/test/path.jl b/test/path.jl
index 4a4caa6b0b115..2f4f2d0983a58 100644
--- a/test/path.jl
+++ b/test/path.jl
@@ -171,6 +171,9 @@
         @test string(splitdrive(S(homedir()))...) == homedir()
         @test splitdrive("a\nb") == ("", "a\nb")
 
+        @test splitdir("a/\xfe/\n/b/c.ext") == ("a/\xfe/\n/b", "c.ext")
+        @test splitext("a/\xfe/\n/b/c.ext") == ("a/\xfe/\n/b/c", ".ext")
+
         if Sys.iswindows()
             @test splitdrive(S("\\\\servername\\hello.world\\filename.ext")) ==
                 ("\\\\servername\\hello.world","\\filename.ext")
diff --git a/test/precompile.jl b/test/precompile.jl
index f7e3e93acd27e..1ac3999947736 100644
--- a/test/precompile.jl
+++ b/test/precompile.jl
@@ -1,6 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+original_depot_path = copy(Base.DEPOT_PATH)
+original_load_path = copy(Base.LOAD_PATH)
+
 using Test, Distributed, Random
+using REPL # doc lookup function
 
 Foo_module = :Foo4b3a94a1a081a8cb
 Foo2_module = :F2oo4b3a94a1a081a8cb
@@ -25,8 +29,18 @@ function precompile_test_harness(@nospecialize(f), separate::Bool)
         pushfirst!(DEPOT_PATH, load_cache_path)
         f(load_path)
     finally
-        rm(load_path, recursive=true, force=true)
-        separate && rm(load_cache_path, recursive=true, force=true)
+        try
+            rm(load_path, force=true, recursive=true)
+        catch err
+            @show err
+        end
+        if separate
+            try
+                rm(load_cache_path, force=true, recursive=true)
+            catch err
+                @show err
+            end
+        end
         filter!((≠)(load_path), LOAD_PATH)
         separate && filter!((≠)(load_cache_path), DEPOT_PATH)
     end
@@ -35,7 +49,7 @@ end
 
 # method root provenance
 
-rootid(m::Module) = ccall(:jl_module_build_id, UInt64, (Any,), Base.parentmodule(m))
+rootid(m::Module) = Base.module_build_id(Base.parentmodule(m)) % UInt64
 rootid(m::Method) = rootid(m.module)
 
 function root_provenance(m::Method, i::Int)
@@ -106,18 +120,21 @@ precompile_test_harness(false) do dir
     write(Foo2_file,
           """
           module $Foo2_module
-              export override
+              export override, overridenc
               override(x::Integer) = 2
               override(x::AbstractFloat) = Float64(override(1))
+              overridenc(x::Integer) = rand()+1
+              overridenc(x::AbstractFloat) = Float64(overridenc(1))
           end
           """)
     write(Foo_file,
           """
           module $Foo_module
               import $FooBase_module, $FooBase_module.typeA
-              import $Foo2_module: $Foo2_module, override
+              import $Foo2_module: $Foo2_module, override, overridenc
               import $FooBase_module.hash
               import Test
+              public foo, Bar
               module Inner
                   import $FooBase_module.hash
                   using ..$Foo_module
@@ -134,6 +151,7 @@ precompile_test_harness(false) do dir
               include_dependency("foo.jl")
               include_dependency("foo.jl")
               module Bar
+                  public bar
                   include_dependency("bar.jl")
               end
               @doc "Bar module" Bar # this needs to define the META dictionary via eval
@@ -155,10 +173,9 @@ precompile_test_harness(false) do dir
               # issue 16529 (adding a method to a type with no instances)
               (::Task)(::UInt8, ::UInt16, ::UInt32) = 2
 
-              # issue 16471 (capturing references to a kwfunc)
-              Test.@test !isdefined(typeof(sin).name.mt, :kwsorter)
+              # issue 16471
               Base.sin(::UInt8, ::UInt16, ::UInt32; x = 52) = x
-              const sinkw = Core.kwfunc(Base.sin)
+              const sinkw = Core.kwcall
 
               # issue 16908 (some complicated types and external method definitions)
               abstract type CategoricalPool{T, R <: Integer, V} end
@@ -221,12 +238,17 @@ precompile_test_harness(false) do dir
 
               g() = override(1.0)
               Test.@test g() === 2.0 # compile this
+              gnc() = overridenc(1.0)
+              Test.@test 1 < gnc() < 5 # compile this
 
               const abigfloat_f() = big"12.34"
               const abigfloat_x = big"43.21"
               const abigint_f() = big"123"
               const abigint_x = big"124"
 
+              # issue #51111
+              abigfloat_to_f32() = Float32(big"1.5")
+
               # issue #31488
               _v31488 = Base.StringVector(2)
               resize!(_v31488, 0)
@@ -245,11 +267,32 @@ precompile_test_harness(false) do dir
 
               # check that @ccallable works from precompiled modules
               Base.@ccallable Cint f35014(x::Cint) = x+Cint(1)
+
+              # check that Tasks work from serialized state
+              ch1 = Channel(x -> nothing)
+              ch2 = Channel(x -> (push!(x, 2); nothing), Inf)
+
+              # check that Memory aliasing is respected
+              a_vec_int = Int[]
+              push!(a_vec_int, 1, 2)
+              a_mat_int = reshape(a_vec_int, (1, 2))
+
+              a_vec_any = Any[]
+              push!(a_vec_any, 1, 2)
+              a_mat_any = reshape(a_vec_any, (1, 2))
+
+              a_vec_union = Union{Int,Nothing}[]
+              push!(a_vec_union, 1, 2)
+              a_mat_union = reshape(a_vec_union, (1, 2))
+
+              a_vec_inline = Pair{Int,Any}[]
+              push!(a_vec_inline, 1=>2, 3=>4)
+              a_mat_inline = reshape(a_vec_inline, (1, 2))
+
+              oid_vec_int = objectid(a_vec_int)
+              oid_mat_int = objectid(a_mat_int)
           end
           """)
-    # make sure `sin` didn't have a kwfunc (which would invalidate the attempted test)
-    @test !isdefined(typeof(sin).name.mt, :kwsorter)
-
     # Issue #12623
     @test __precompile__(false) === nothing
 
@@ -257,6 +300,8 @@ precompile_test_harness(false) do dir
     Foo2 = Base.require(Main, Foo2_module)
     @eval $Foo2.override(::Int) = 'a'
     @eval $Foo2.override(::Float32) = 'b'
+    @eval $Foo2.overridenc(::Int) = rand() + 97.0
+    @eval $Foo2.overridenc(::Float32) = rand() + 100.0
 
     Foo = Base.require(Main, Foo_module)
     Base.invokelatest() do # use invokelatest to see the results of loading the compile
@@ -265,9 +310,13 @@ precompile_test_harness(false) do dir
 
         # Issue #21307
         @test Foo.g() === 97.0
+        @test 96 < Foo.gnc() < 99
         @test Foo.override(1.0e0) == Float64('a')
         @test Foo.override(1.0f0) == 'b'
         @test Foo.override(UInt(1)) == 2
+        @test 96 < Foo.overridenc(1.0e0) < 99
+        @test 99 < Foo.overridenc(1.0f0) < 102
+        @test 0 < Foo.overridenc(UInt(1)) < 3
 
         # Issue #15722
         @test Foo.abigfloat_f()::BigFloat == big"12.34"
@@ -275,6 +324,9 @@ precompile_test_harness(false) do dir
         @test Foo.abigint_f()::BigInt == big"123"
         @test Foo.abigint_x::BigInt + 1 == big"125"
 
+        # Issue #51111
+        @test Foo.abigfloat_to_f32() == 1.5f0
+
         @test Foo.x28297.result === missing
 
         @test Foo.d29936a === Dict
@@ -291,6 +343,41 @@ precompile_test_harness(false) do dir
         @test Foo.layout2 == Any[Ptr{Int8}(0), Ptr{Int16}(0), Ptr{Int32}(-1)]
         @test typeof.(Foo.layout2) == [Ptr{Int8}, Ptr{Int16}, Ptr{Int32}]
         @test Foo.layout3 == ["ab", "cd", "ef", "gh", "ij"]
+
+        @test !isopen(Foo.ch1)
+        @test !isopen(Foo.ch2)
+        @test !isready(Foo.ch1)
+        @test isready(Foo.ch2)
+        @test take!(Foo.ch2) === 2
+        @test !isready(Foo.ch2)
+    end
+
+    let
+        @test Foo.a_vec_int == Int[1, 2]
+        @test Foo.a_mat_int == Int[1 2]
+        Foo.a_mat_int[1, 2] = 3
+        @test Foo.a_vec_int[2] === 3
+
+        @test Foo.a_vec_any == Int[1, 2]
+        @test Foo.a_mat_any == Int[1 2]
+        Foo.a_mat_any[1, 2] = 3
+        @test Foo.a_vec_any[2] === 3
+
+        @test Foo.a_vec_union == Union{Int,Nothing}[1, 2]
+        @test Foo.a_mat_union == Union{Int,Nothing}[1 2]
+        Foo.a_mat_union[1, 2] = 3
+        @test Foo.a_vec_union[2] === 3
+        Foo.a_mat_union[1, 2] = nothing
+        @test Foo.a_vec_union[2] === nothing
+
+        @test Foo.a_vec_inline == Pair{Int,Any}[1=>2, 3=>4]
+        @test Foo.a_mat_inline == Pair{Int,Any}[1=>2 3=>4]
+        Foo.a_mat_inline[1, 2] = 5=>6
+        @test Foo.a_vec_inline[2] === Pair{Int,Any}(5, 6)
+
+        @test objectid(Foo.a_vec_int) === Foo.oid_vec_int
+        @test objectid(Foo.a_mat_int) === Foo.oid_mat_int
+        @test Foo.oid_vec_int !== Foo.oid_mat_int
     end
 
     @eval begin function ccallable_test()
@@ -309,12 +396,20 @@ precompile_test_harness(false) do dir
     cachedir = joinpath(dir, "compiled", "v$(VERSION.major).$(VERSION.minor)")
     cachedir2 = joinpath(dir2, "compiled", "v$(VERSION.major).$(VERSION.minor)")
     cachefile = joinpath(cachedir, "$Foo_module.ji")
-    # use _require_from_serialized to ensure that the test fails if
-    # the module doesn't reload from the image:
-    @test_warn "@ccallable was already defined for this method name" begin
-        @test_logs (:warn, "Replacing module `$Foo_module`") begin
-            ms = Base._require_from_serialized(Base.PkgId(Foo), cachefile)
-            @test isa(ms, Array{Any,1})
+    do_pkgimg = Base.JLOptions().use_pkgimages == 1 && Base.JLOptions().permalloc_pkgimg == 1
+    if do_pkgimg ||  Base.JLOptions().use_pkgimages == 0
+        if do_pkgimg
+            ocachefile = Base.ocachefile_from_cachefile(cachefile)
+        else
+            ocachefile = nothing
+        end
+            # use _require_from_serialized to ensure that the test fails if
+            # the module doesn't reload from the image:
+        @test_warn "@ccallable was already defined for this method name" begin
+            @test_logs (:warn, "Replacing module `$Foo_module`") begin
+                m = Base._require_from_serialized(Base.PkgId(Foo), cachefile, ocachefile)
+                @test isa(m, Module)
+            end
         end
     end
 
@@ -334,9 +429,9 @@ precompile_test_harness(false) do dir
         @test string(Base.Docs.doc(Foo.Bar.bar)) == "bar function\n"
         @test string(Base.Docs.doc(Foo.Bar)) == "Bar module\n"
 
-        modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile)
-        discard_module = mod_fl_mt -> (mod_fl_mt.filename, mod_fl_mt.mtime)
-        @test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) ]
+        modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
+        discard_module = mod_fl_mt -> mod_fl_mt.filename
+        @test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) % UInt64 ]
         @test map(x -> x.filename, deps) == [ Foo_file, joinpath(dir, "foo.jl"), joinpath(dir, "bar.jl") ]
         @test requires == [ Base.PkgId(Foo) => Base.PkgId(string(FooBase_module)),
                             Base.PkgId(Foo) => Base.PkgId(Foo2),
@@ -349,33 +444,39 @@ precompile_test_harness(false) do dir
         @test_throws ErrorException Base.read_dependency_src(cachefile, joinpath(dir, "foo.jl"))
 
         modules, deps1 = Base.cache_dependencies(cachefile)
-        @test Dict(modules) == merge(
+        modules_ok = merge(
             Dict(let m = Base.PkgId(s)
                     m => Base.module_build_id(Base.root_module(m))
                  end for s in
                  [ "Base", "Core", "Main",
-                   string(Foo2_module), string(FooBase_module) ]),
+                   string(Foo2_module), string(FooBase_module),]),
             # plus modules included in the system image
             Dict(let m = Base.root_module(Base, s)
                      Base.PkgId(m) => Base.module_build_id(m)
-                 end for s in
-                [:ArgTools, :Artifacts, :Base64, :CompilerSupportLibraries_jll, :CRC32c, :Dates,
-                 :Distributed, :Downloads, :FileWatching, :Future, :InteractiveUtils, :libblastrampoline_jll,
-                 :LazyArtifacts, :LibCURL, :LibCURL_jll, :LibGit2, :Libdl, :LinearAlgebra,
-                 :Logging, :Markdown, :Mmap, :MozillaCACerts_jll, :NetworkOptions, :OpenBLAS_jll, :Pkg, :Printf,
-                 :Profile, :p7zip_jll, :REPL, :Random, :SHA, :Serialization, :SharedArrays, :Sockets,
-                 :TOML, :Tar, :Test, :UUIDs, :Unicode,
-                 :nghttp2_jll]
-            ),
+                 end for s in [Symbol(x.name) for x in Base._sysimage_modules if !(x.name in ["Base", "Core", "Main"])]),
+            # plus test module,
+            Dict(Base.PkgId(Base.root_module(Base, :Test)) => Base.module_build_id(Base.root_module(Base, :Test))),
+            # plus dependencies of test module
+            Dict(Base.PkgId(Base.root_module(Base, :InteractiveUtils)) => Base.module_build_id(Base.root_module(Base, :InteractiveUtils))),
+            Dict(Base.PkgId(Base.root_module(Base, :Logging)) => Base.module_build_id(Base.root_module(Base, :Logging))),
+            Dict(Base.PkgId(Base.root_module(Base, :Random)) => Base.module_build_id(Base.root_module(Base, :Random))),
+            Dict(Base.PkgId(Base.root_module(Base, :Serialization)) => Base.module_build_id(Base.root_module(Base, :Serialization))),
+            # and their dependencies
+            Dict(Base.PkgId(Base.root_module(Base, :SHA)) => Base.module_build_id(Base.root_module(Base, :SHA))),
+            Dict(Base.PkgId(Base.root_module(Base, :Markdown)) => Base.module_build_id(Base.root_module(Base, :Markdown))),
+            # and their dependencies
+            Dict(Base.PkgId(Base.root_module(Base, :Base64)) => Base.module_build_id(Base.root_module(Base, :Base64))),
         )
+        @test Dict(modules) == modules_ok
+
         @test discard_module.(deps) == deps1
-        modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile; srcfiles_only=true)
+        modules, (_, deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
         @test map(x -> x.filename, deps) == [Foo_file]
 
         @test current_task()(0x01, 0x4000, 0x30031234) == 2
         @test sin(0x01, 0x4000, 0x30031234) == 52
         @test sin(0x01, 0x4000, 0x30031234; x = 9142) == 9142
-        @test Foo.sinkw === Core.kwfunc(Base.sin)
+        @test Foo.sinkw === Core.kwcall
 
         @test Foo.NominalValue() == 1
         @test Foo.OrdinalValue() == 1
@@ -432,7 +533,7 @@ precompile_test_harness(false) do dir
         """)
     Nest = Base.require(Main, Nest_module)
     cachefile = joinpath(cachedir, "$Nest_module.ji")
-    modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile)
+    modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
     @test last(deps).modpath == ["NestInner"]
 
     UsesB_module = :UsesB4b3a94a1a081a8cb
@@ -454,7 +555,7 @@ precompile_test_harness(false) do dir
         """)
     UsesB = Base.require(Main, UsesB_module)
     cachefile = joinpath(cachedir, "$UsesB_module.ji")
-    modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile)
+    modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
     id1, id2 = only(requires)
     @test Base.pkgorigins[id1].cachepath == cachefile
     @test Base.pkgorigins[id2].cachepath == joinpath(cachedir, "$B_module.ji")
@@ -462,15 +563,50 @@ precompile_test_harness(false) do dir
     Baz_file = joinpath(dir, "Baz.jl")
     write(Baz_file,
           """
-          true && __precompile__(false)
+          haskey(Base.loaded_modules, Base.PkgId("UseBaz")) || __precompile__(false)
           module Baz
           baz() = 1
           end
           """)
 
     @test Base.compilecache(Base.PkgId("Baz")) == Base.PrecompilableError() # due to __precompile__(false)
+
+    OverwriteMethodError_file = joinpath(dir, "OverwriteMethodError.jl")
+    write(OverwriteMethodError_file,
+          """
+          module OverwriteMethodError
+              Base.:(+)(x::Bool, y::Bool) = false
+          end
+          """)
+
+    @test Base.compilecache(Base.PkgId("OverwriteMethodError")) == Base.PrecompilableError() # due to piracy
+
+    UseBaz_file = joinpath(dir, "UseBaz.jl")
+    write(UseBaz_file,
+          """
+          module UseBaz
+          biz() = 1
+          @assert haskey(Base.loaded_modules, Base.PkgId("UseBaz"))
+          @assert !haskey(Base.loaded_modules, Base.PkgId("Baz"))
+          using Baz
+          @assert haskey(Base.loaded_modules, Base.PkgId("Baz"))
+          buz() = 2
+          const generating = ccall(:jl_generating_output, Cint, ())
+          const incremental = Base.JLOptions().incremental
+          end
+          """)
+
+    @test Base.compilecache(Base.PkgId("UseBaz")) == Base.PrecompilableError() # due to __precompile__(false)
+    @eval using UseBaz
+    @test haskey(Base.loaded_modules, Base.PkgId("UseBaz"))
+    @test haskey(Base.loaded_modules, Base.PkgId("Baz"))
+    @test Base.invokelatest(UseBaz.biz) === 1
+    @test Base.invokelatest(UseBaz.buz) === 2
+    @test UseBaz.generating == 0
+    @test UseBaz.incremental == 0
     @eval using Baz
-    @test Base.invokelatest(Baz.baz) == 1
+    @test Base.invokelatest(Baz.baz) === 1
+    @test Baz === UseBaz.Baz
 
     # Issue #12720
     FooBar1_file = joinpath(dir, "FooBar1.jl")
@@ -488,30 +624,31 @@ precompile_test_harness(false) do dir
           end
           """)
 
-    cachefile = Base.compilecache(Base.PkgId("FooBar"))
+    cachefile, _ = Base.compilecache(Base.PkgId("FooBar"))
     empty_prefs_hash = Base.get_preferences_hash(nothing, String[])
     @test cachefile == Base.compilecache_path(Base.PkgId("FooBar"), empty_prefs_hash)
     @test isfile(joinpath(cachedir, "FooBar.ji"))
-    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Vector
+    Tsc = Bool(Base.JLOptions().use_pkgimages) ? Tuple{<:Vector, String} : Tuple{<:Vector, Nothing}
+    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
     @test !isdefined(Main, :FooBar)
     @test !isdefined(Main, :FooBar1)
 
     relFooBar_file = joinpath(dir, "subfolder", "..", "FooBar.jl")
-    @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa (Sys.iswindows() ? Vector : Bool) # `..` is not a symlink on Windows
+    @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa (Sys.iswindows() ? Tuple{<:Vector, String} : Bool) # `..` is not a symlink on Windows
     mkdir(joinpath(dir, "subfolder"))
-    @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa Vector
+    @test Base.stale_cachefile(relFooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
 
     @eval using FooBar
     fb_uuid = Base.module_build_id(FooBar)
     sleep(2); touch(FooBar_file)
     insert!(DEPOT_PATH, 1, dir2)
-    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) === true
+    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
     @eval using FooBar1
     @test !isfile(joinpath(cachedir2, "FooBar.ji"))
     @test !isfile(joinpath(cachedir, "FooBar1.ji"))
     @test isfile(joinpath(cachedir2, "FooBar1.ji"))
-    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) === true
-    @test Base.stale_cachefile(FooBar1_file, joinpath(cachedir2, "FooBar1.ji")) isa Vector
+    @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
+    @test Base.stale_cachefile(FooBar1_file, joinpath(cachedir2, "FooBar1.ji")) isa Tsc
     @test fb_uuid == Base.module_build_id(FooBar)
     fb_uuid1 = Base.module_build_id(FooBar1)
     @test fb_uuid != fb_uuid1
@@ -543,7 +680,11 @@ precompile_test_harness(false) do dir
     FooBar3_inc = joinpath(dir, "FooBar3_inc.jl")
     write(FooBar3_inc, "x=1\n")
     for code in ["Core.eval(Base, :(x=1))", "Base.include(Base, \"FooBar3_inc.jl\")"]
-        write(FooBar3_file, code)
+        write(FooBar3_file, """
+        module FooBar3
+        $code
+        end
+        """)
         @test_warn "Evaluation into the closed module `Base` breaks incremental compilation" try
                 Base.require(Main, :FooBar3)
             catch exc
@@ -615,7 +756,10 @@ precompile_test_harness("code caching") do dir
               precompile(getelsize, (Vector{Int32},))
           end
           """)
-    Base.compilecache(Base.PkgId(string(Cache_module)))
+    pkgid = Base.PkgId(string(Cache_module))
+    @test !Base.isprecompiled(pkgid)
+    Base.compilecache(pkgid)
+    @test Base.isprecompiled(pkgid)
     @eval using $Cache_module
     M = getfield(@__MODULE__, Cache_module)
     # Test that this cache file "owns" all the roots
@@ -626,40 +770,35 @@ precompile_test_harness("code caching") do dir
         @test all(i -> root_provenance(m, i) == Mid, 1:length(m.roots))
     end
     # Check that we can cache external CodeInstances:
-    # size(::Vector) has an inferred specialization for Vector{X}
-    msize = which(size, (Vector{<:Any},))
+    # length(::Vector) has an inferred specialization for `Vector{X}`
+    msize = which(length, (Vector{<:Any},))
     hasspec = false
-    for i = 1:length(msize.specializations)
-        if isassigned(msize.specializations, i)
-            mi = msize.specializations[i]
-            if isa(mi, Core.MethodInstance)
-                tt = Base.unwrap_unionall(mi.specTypes)
-                if tt.parameters[2] == Vector{Cacheb8321416e8a3e2f1.X}
-                    if isdefined(mi, :cache) && isa(mi.cache, Core.CodeInstance) && mi.cache.max_world == typemax(UInt) && mi.cache.inferred !== nothing
-                        hasspec = true
-                        break
-                    end
-                end
+    for mi in Base.specializations(msize)
+        if mi.specTypes == Tuple{typeof(length),Vector{Cacheb8321416e8a3e2f1.X}}
+            if (isdefined(mi, :cache) && isa(mi.cache, Core.CodeInstance) &&
+                mi.cache.max_world == typemax(UInt) && mi.cache.inferred !== nothing)
+                hasspec = true
+                break
             end
         end
     end
     @test hasspec
     # Test that compilation adds to method roots with appropriate provenance
     m = which(setindex!, (Dict{M.X,Any}, Any, M.X))
-    @test M.X ∈ m.roots
+    @test Memory{M.X} ∈ m.roots
     # Check that roots added outside of incremental builds get attributed to a moduleid of 0
     Base.invokelatest() do
         Dict{M.X2,Any}()[M.X2()] = nothing
     end
-    @test M.X2 ∈ m.roots
+    @test Memory{M.X2} ∈ m.roots
     groups = group_roots(m)
-    @test M.X ∈ groups[Mid]           # attributed to M
-    @test M.X2 ∈ groups[0]            # activate module is not known
+    @test Memory{M.X} ∈ groups[Mid]           # attributed to M
+    @test Memory{M.X2} ∈ groups[0]            # activate module is not known
     @test !isempty(groups[Bid])
     # Check that internal methods and their roots are accounted appropriately
     minternal = which(M.getelsize, (Vector,))
-    mi = minternal.specializations[1]
-    @test Base.unwrap_unionall(mi.specTypes).parameters[2] == Vector{Int32}
+    mi = minternal.specializations::Core.MethodInstance
+    @test mi.specTypes == Tuple{typeof(M.getelsize),Vector{Int32}}
     ci = mi.cache
     @test ci.relocatability == 1
     @test ci.inferred !== nothing
@@ -667,7 +806,9 @@ precompile_test_harness("code caching") do dir
     Base.invokelatest() do
         M.getelsize(M.X2[])
     end
-    mi = minternal.specializations[2]
+    mispecs = minternal.specializations::Core.SimpleVector
+    @test mispecs[1] === mi
+    mi = mispecs[2]::Core.MethodInstance
     ci = mi.cache
     @test ci.relocatability == 0
     # PkgA loads PkgB, and both add roots to the same `push!` method (both before and after loading B)
@@ -702,10 +843,10 @@ precompile_test_harness("code caching") do dir
     end
     mT = which(push!, (Vector{T} where T, Any))
     groups = group_roots(mT)
-    @test M2.Y ∈ groups[M2id]
-    @test M2.Z ∈ groups[M2id]
-    @test M.X ∈ groups[Mid]
-    @test M.X ∉ groups[M2id]
+    @test Memory{M2.Y} ∈ groups[M2id]
+    @test Memory{M2.Z} ∈ groups[M2id]
+    @test Memory{M.X} ∈ groups[Mid]
+    @test Memory{M.X} ∉ groups[M2id]
     # backedges of external MethodInstances
     # Root gets used by RootA and RootB, and both consumers end up inferring the same MethodInstance from Root
     # Do both callers get listed as backedges?
@@ -752,8 +893,9 @@ precompile_test_harness("code caching") do dir
     MB = getfield(@__MODULE__, RootB)
     M = getfield(MA, RootModule)
     m = which(M.f, (Any,))
-    for mi in m.specializations
+    for mi in Base.specializations(m)
         mi === nothing && continue
+        mi = mi::Core.MethodInstance
         if mi.specTypes.parameters[2] === Int8
             # external callers
             mods = Module[]
@@ -775,7 +917,7 @@ precompile_test_harness("code caching") do dir
         end
     end
 
-    # Invalidations (this test is adapted from from SnoopCompile)
+    # Invalidations (this test is adapted from SnoopCompile)
     function hasvalid(mi, world)
         isdefined(mi, :cache) || return false
         ci = mi.cache
@@ -805,6 +947,10 @@ precompile_test_harness("code caching") do dir
         build_stale(37)
         stale('c')
 
+        ## Reporting tests (unrelated to the above)
+        nbits(::Int8) = 8
+        nbits(::Int16) = 16
+
         end
         """
     )
@@ -819,9 +965,18 @@ precompile_test_harness("code caching") do dir
 
         # This will be invalidated if StaleC is loaded
         useA() = $StaleA.stale("hello")
+        useA2() = useA()
 
         # force precompilation
-        useA()
+        begin
+            Base.Experimental.@force_compile
+            useA2()
+        end
+
+        ## Reporting tests
+        call_nbits(x::Integer) = $StaleA.nbits(x)
+        map_nbits() = map(call_nbits, Integer[Int8(1), Int16(1)])
+        map_nbits()
 
         end
         """
@@ -844,19 +999,23 @@ precompile_test_harness("code caching") do dir
         Base.compilecache(Base.PkgId(string(pkg)))
     end
     @eval using $StaleA
+    MA = getfield(@__MODULE__, StaleA)
+    Base.eval(MA, :(nbits(::UInt8) = 8))
     @eval using $StaleC
+    invalidations = ccall(:jl_debug_method_invalidation, Any, (Cint,), 1)
     @eval using $StaleB
-    MA = getfield(@__MODULE__, StaleA)
+    ccall(:jl_debug_method_invalidation, Any, (Cint,), 0)
     MB = getfield(@__MODULE__, StaleB)
     MC = getfield(@__MODULE__, StaleC)
     world = Base.get_world_counter()
     m = only(methods(MA.use_stale))
-    mi = m.specializations[1]
+    mi = m.specializations::Core.MethodInstance
     @test hasvalid(mi, world)   # it was re-inferred by StaleC
     m = only(methods(MA.build_stale))
-    mis = filter(!isnothing, collect(m.specializations))
+    mis = filter(!isnothing, collect(m.specializations::Core.SimpleVector))
     @test length(mis) == 2
     for mi in mis
+        mi = mi::Core.MethodInstance
         if mi.specTypes.parameters[2] == Int
             @test mi.cache.max_world < world
         else
@@ -866,11 +1025,206 @@ precompile_test_harness("code caching") do dir
         end
     end
     m = only(methods(MB.useA))
-    mi = m.specializations[1]
+    mi = m.specializations::Core.MethodInstance
     @test !hasvalid(mi, world)      # invalidated by the stale(x::String) method in StaleC
     m = only(methods(MC.call_buildstale))
-    mi = m.specializations[1]
+    mi = m.specializations::Core.MethodInstance
     @test hasvalid(mi, world)       # was compiled with the new method
+
+    # Reporting test (ensure SnoopCompile works)
+    @test all(i -> isassigned(invalidations, i), eachindex(invalidations))
+    m = only(methods(MB.call_nbits))
+    for mi in Base.specializations(m)
+        hv = hasvalid(mi, world)
+        @test mi.specTypes.parameters[end] === Integer ? !hv : hv
+    end
+
+    setglobal!(Main, :inval, invalidations)
+    idxs = findall(==("verify_methods"), invalidations)
+    idxsbits = filter(idxs) do i
+        mi = invalidations[i-1]
+        mi.def == m
+    end
+    idx = only(idxsbits)
+    tagbad = invalidations[idx+1]
+    @test isa(tagbad, Int32)
+    j = findfirst(==(tagbad), invalidations)
+    @test invalidations[j-1] == "insert_backedges_callee"
+    @test isa(invalidations[j-2], Type)
+    @test isa(invalidations[j+1], Vector{Any}) # [nbits(::UInt8)]
+    m = only(methods(MB.useA2))
+    mi = only(Base.specializations(m))
+    @test !hasvalid(mi, world)
+    @test mi ∈ invalidations
+
+    m = only(methods(MB.map_nbits))
+    @test !hasvalid(m.specializations::Core.MethodInstance, world+1) # insert_backedges invalidations also trigger their backedges
+end
+
+precompile_test_harness("invoke") do dir
+    InvokeModule = :Invoke0x030e7e97c2365aad
+    CallerModule = :Caller0x030e7e97c2365aad
+    write(joinpath(dir, "$InvokeModule.jl"),
+          """
+          module $InvokeModule
+              export f, g, h, q, fnc, gnc, hnc, qnc   # nc variants do not infer to a Const
+              export f44320, g44320
+              export getlast
+              # f is for testing invoke that occurs within a dependency
+              f(x::Real) = 0
+              f(x::Int) = x < 5 ? 1 : invoke(f, Tuple{Real}, x)
+              fnc(x::Real) = rand()-1
+              fnc(x::Int) = x < 5 ? rand()+1 : invoke(fnc, Tuple{Real}, x)
+              # g is for testing invoke that occurs from a dependent
+              g(x::Real) = 0
+              g(x::Int) = 1
+              gnc(x::Real) = rand()-1
+              gnc(x::Int) = rand()+1
+              # h will be entirely superseded by a new method (full invalidation)
+              h(x::Real) = 0
+              h(x::Int) = x < 5 ? 1 : invoke(h, Tuple{Integer}, x)
+              hnc(x::Real) = rand()-1
+              hnc(x::Int) = x < 5 ? rand()+1 : invoke(hnc, Tuple{Integer}, x)
+              # q will have some callers invalidated
+              q(x::Integer) = 0
+              qnc(x::Integer) = rand()-1
+              # Issue #44320
+              f44320(::Int) = 1
+              f44320(::Any) = 2
+              g44320() = invoke(f44320, Tuple{Any}, 0)
+              g44320()
+
+              # Adding new specializations should not invalidate `invoke`s
+              function getlast(itr)
+                  x = nothing
+                  for y in itr
+                      x = y
+                  end
+                  return x
+              end
+              getlast(a::AbstractArray) = invoke(getlast, Tuple{Any}, a)
+          end
+          """)
+          write(joinpath(dir, "$CallerModule.jl"),
+          """
+          module $CallerModule
+              using $InvokeModule
+              # involving external modules
+              callf(x) = f(x)
+              callg(x) = x < 5 ? g(x) : invoke(g, Tuple{Real}, x)
+              callh(x) = h(x)
+              callq(x) = q(x)
+              callqi(x) = invoke(q, Tuple{Integer}, x)
+              callfnc(x) = fnc(x)
+              callgnc(x) = x < 5 ? gnc(x) : invoke(gnc, Tuple{Real}, x)
+              callhnc(x) = hnc(x)
+              callqnc(x) = qnc(x)
+              callqnci(x) = invoke(qnc, Tuple{Integer}, x)
+
+              # Purely internal
+              internal(x::Real) = 0
+              internal(x::Int) = x < 5 ? 1 : invoke(internal, Tuple{Real}, x)
+              internalnc(x::Real) = rand()-1
+              internalnc(x::Int) = x < 5 ? rand()+1 : invoke(internalnc, Tuple{Real}, x)
+
+              # Issue #44320
+              f44320(::Real) = 3
+
+              call_getlast(x) = getlast(x)
+
+              # force precompilation
+              begin
+                  Base.Experimental.@force_compile
+                  callf(3)
+                  callg(3)
+                  callh(3)
+                  callq(3)
+                  callqi(3)
+                  callfnc(3)
+                  callgnc(3)
+                  callhnc(3)
+                  callqnc(3)
+                  callqnci(3)
+                  internal(3)
+                  internalnc(3)
+                  call_getlast([1,2,3])
+              end
+
+              # Now that we've precompiled, invalidate with a new method that overrides the `invoke` dispatch
+              $InvokeModule.h(x::Integer) = -1
+              $InvokeModule.hnc(x::Integer) = rand() - 20
+              # ...and for q, override with a more specialized method that should leave only the invoked version still valid
+              $InvokeModule.q(x::Int) = -1
+              $InvokeModule.qnc(x::Int) = rand()+1
+          end
+          """)
+    Base.compilecache(Base.PkgId(string(CallerModule)))
+    @eval using $InvokeModule: $InvokeModule
+    MI = getfield(@__MODULE__, InvokeModule)
+    @eval $MI.getlast(a::UnitRange) = a.stop
+    @eval using $CallerModule
+    M = getfield(@__MODULE__, CallerModule)
+
+    function get_method_for_type(func, @nospecialize(T))   # return the method func(::T)
+        for m in methods(func)
+            m.sig.parameters[end] === T && return m
+        end
+        error("no ::Real method found for $func")
+    end
+    function nvalid(mi::Core.MethodInstance)
+        isdefined(mi, :cache) || return 0
+        ci = mi.cache
+        n = Int(ci.max_world == typemax(UInt))
+        while isdefined(ci, :next)
+            ci = ci.next
+            n += ci.max_world == typemax(UInt)
+        end
+        return n
+    end
+
+    for func in (M.f, M.g, M.internal, M.fnc, M.gnc, M.internalnc)
+        m = get_method_for_type(func, Real)
+        mi = m.specializations::Core.MethodInstance
+        @test length(mi.backedges) == 2
+        @test mi.backedges[1] === Tuple{typeof(func), Real}
+        @test isa(mi.backedges[2], Core.MethodInstance)
+        @test mi.cache.max_world == typemax(mi.cache.max_world)
+    end
+    for func in (M.q, M.qnc)
+        m = get_method_for_type(func, Integer)
+        mi = m.specializations::Core.MethodInstance
+        @test length(mi.backedges) == 2
+        @test mi.backedges[1] === Tuple{typeof(func), Integer}
+        @test isa(mi.backedges[2], Core.MethodInstance)
+        @test mi.cache.max_world == typemax(mi.cache.max_world)
+    end
+
+    m = get_method_for_type(M.h, Real)
+    @test isempty(Base.specializations(m))
+    m = get_method_for_type(M.hnc, Real)
+    @test isempty(Base.specializations(m))
+    m = only(methods(M.callq))
+    @test isempty(Base.specializations(m)) || nvalid(m.specializations::Core.MethodInstance) == 0
+    m = only(methods(M.callqnc))
+    @test isempty(Base.specializations(m)) || nvalid(m.specializations::Core.MethodInstance) == 0
+    m = only(methods(M.callqi))
+    @test (m.specializations::Core.MethodInstance).specTypes == Tuple{typeof(M.callqi), Int}
+    m = only(methods(M.callqnci))
+    @test (m.specializations::Core.MethodInstance).specTypes == Tuple{typeof(M.callqnci), Int}
+
+    m = only(methods(M.g44320))
+    @test (m.specializations::Core.MethodInstance).cache.max_world == typemax(UInt)
+
+    m = which(MI.getlast, (Any,))
+    @test (m.specializations::Core.MethodInstance).cache.max_world == typemax(UInt)
+
+    # Precompile specific methods for arbitrary arg types
+    invokeme(x) = 1
+    invokeme(::Int) = 2
+    m_any, m_int = sort(collect(methods(invokeme)); by=m->(m.file,m.line))
+    @test precompile(invokeme, (Int,), m_any)
+    @test (m_any.specializations::Core.MethodInstance).specTypes === Tuple{typeof(invokeme), Int}
+    @test isempty(Base.specializations(m_int))
 end
 
 # test --compiled-modules=no command line option
@@ -1052,7 +1406,11 @@ end
         end
     finally
         cd(save_cwd)
-        rm(temp_path, recursive=true)
+        try
+            rm(temp_path, recursive=true)
+        catch err
+            @show err
+        end
         pop!(test_workers) # remove myid
         rmprocs(test_workers)
     end
@@ -1069,14 +1427,22 @@ precompile_test_harness("delete_method") do dir
           """
           module $A_module
 
-          export apc, anopc
+          export apc, anopc, apcnc, anopcnc
 
+          # Infer to a const
           apc(::Int, ::Int) = 1
           apc(::Any, ::Any) = 2
 
           anopc(::Int, ::Int) = 1
           anopc(::Any, ::Any) = 2
 
+          # Do not infer to a const
+          apcnc(::Int, ::Int) = rand() - 1
+          apcnc(::Any, ::Any) = rand() + 1
+
+          anopcnc(::Int, ::Int) = rand() - 1
+          anopcnc(::Any, ::Any) = rand() + 1
+
           end
           """)
     write(B_file,
@@ -1087,19 +1453,26 @@ precompile_test_harness("delete_method") do dir
 
           bpc(x) = apc(x, x)
           bnopc(x) = anopc(x, x)
+          bpcnc(x) = apcnc(x, x)
+          bnopcnc(x) = anopcnc(x, x)
 
           precompile(bpc, (Int,))
           precompile(bpc, (Float64,))
+          precompile(bpcnc, (Int,))
+          precompile(bpcnc, (Float64,))
 
           end
           """)
     A = Base.require(Main, A_module)
-    for mths in (collect(methods(A.apc)), collect(methods(A.anopc)))
-        Base.delete_method(mths[1])
+    for mths in (collect(methods(A.apc)), collect(methods(A.anopc)), collect(methods(A.apcnc)), collect(methods(A.anopcnc)))
+        idx = findfirst(m -> m.sig.parameters[end] === Int, mths)
+        Base.delete_method(mths[idx])
     end
     B = Base.require(Main, B_module)
-    @test Base.invokelatest(B.bpc, 1) == Base.invokelatest(B.bpc, 1.0) == 2
-    @test Base.invokelatest(B.bnopc, 1) == Base.invokelatest(B.bnopc, 1.0) == 2
+    for f in (B.bpc, B.bnopc, B.bpcnc, B.bnopcnc)
+        @test Base.invokelatest(f, 1) > 1
+        @test Base.invokelatest(f, 1.0) > 1
+    end
 end
 
 precompile_test_harness("Issues #19030 and #25279") do load_path
@@ -1177,13 +1550,13 @@ precompile_test_harness("Issue #25971") do load_path
     sourcefile = joinpath(load_path, "Foo25971.jl")
     write(sourcefile, "module Foo25971 end")
     chmod(sourcefile, 0o666)
-    cachefile = Base.compilecache(Base.PkgId("Foo25971"))
+    cachefile, _ = Base.compilecache(Base.PkgId("Foo25971"))
     @test filemode(sourcefile) == filemode(cachefile)
     chmod(sourcefile, 0o600)
-    cachefile = Base.compilecache(Base.PkgId("Foo25971"))
+    cachefile, _ = Base.compilecache(Base.PkgId("Foo25971"))
     @test filemode(sourcefile) == filemode(cachefile)
     chmod(sourcefile, 0o444)
-    cachefile = Base.compilecache(Base.PkgId("Foo25971"))
+    cachefile, _ = Base.compilecache(Base.PkgId("Foo25971"))
     # Check writable
     @test touch(cachefile) == cachefile
 end
@@ -1256,10 +1629,10 @@ precompile_test_harness("No external edges") do load_path
     Base.compilecache(Base.PkgId("NoExternalEdges"))
     @eval begin
         using NoExternalEdges
-        @test only(methods(NoExternalEdges.foo1)).specializations[1].cache.max_world != 0
-        @test only(methods(NoExternalEdges.foo2)).specializations[1].cache.max_world != 0
-        @test only(methods(NoExternalEdges.foo3)).specializations[1].cache.max_world != 0
-        @test only(methods(NoExternalEdges.foo4)).specializations[1].cache.max_world != 0
+        @test (only(methods(NoExternalEdges.foo1)).specializations::Core.MethodInstance).cache.max_world != 0
+        @test (only(methods(NoExternalEdges.foo2)).specializations::Core.MethodInstance).cache.max_world != 0
+        @test (only(methods(NoExternalEdges.foo3)).specializations::Core.MethodInstance).cache.max_world != 0
+        @test (only(methods(NoExternalEdges.foo4)).specializations::Core.MethodInstance).cache.max_world != 0
     end
 end
 
@@ -1270,10 +1643,10 @@ end
         f(x, y) = x + y
         f(x::Int, y) = 2x + y
     end
-    precompile(M.f, (Int, Any))
-    precompile(M.f, (AbstractFloat, Any))
+    @test precompile(M.f, (Int, Any))
+    @test precompile(M.f, (AbstractFloat, Any))
     mis = map(methods(M.f)) do m
-        m.specializations[1]
+        m.specializations::Core.MethodInstance
     end
     @test any(mi -> mi.specTypes.parameters[2] === Any, mis)
     @test all(mi -> isa(mi.cache, Core.CodeInstance), mis)
@@ -1290,3 +1663,260 @@ precompile_test_harness("__init__ cachepath") do load_path
           """)
     @test isa((@eval (using InitCachePath; InitCachePath)), Module)
 end
+
+# Test that precompilation can handle invalidated methods created from `precompile`,
+# not via backedges.
+precompile_test_harness("Issue #46558") do load_path
+    write(joinpath(load_path, "Foo46558.jl"),
+        """
+        module Foo46558
+        foo(x::Real) = 1
+        end
+        """)
+    write(joinpath(load_path, "Bar46558.jl"),
+        """
+        module Bar46558
+        using Foo46558
+        precompile(Foo46558.foo, (Int,))
+        end
+        """)
+    Base.compilecache(Base.PkgId("Foo46558"))
+    Base.compilecache(Base.PkgId("Bar46558"))
+    Foo = (@eval (using Foo46558; Foo46558))
+    @eval ($Foo.foo)(x::Int) = 2
+    Bar = (@eval (using Bar46558; Bar46558))
+    @test (@eval $Foo.foo(1)) == 2
+end
+
+precompile_test_harness("issue #46296") do load_path
+    write(joinpath(load_path, "CodeInstancePrecompile.jl"),
+        """
+        module CodeInstancePrecompile
+
+        mi = first(Base.specializations(first(methods(identity))))
+        ci = Core.CodeInstance(mi, Any, Any, nothing, nothing, zero(Int32), typemin(UInt),
+                               typemax(UInt), zero(UInt32), zero(UInt32), nothing, 0x00)
+
+        __init__() = @assert ci isa Core.CodeInstance
+
+        end
+        """)
+    Base.compilecache(Base.PkgId("CodeInstancePrecompile"))
+    (@eval (using CodeInstancePrecompile))
+end
+
+precompile_test_harness("Recursive types") do load_path
+    write(joinpath(load_path, "RecursiveTypeDef.jl"),
+        """
+        module RecursiveTypeDef
+
+        struct C{T,O} end
+        struct A{T,N,O} <: AbstractArray{C{T,A{T,N,O}},N}
+            sz::NTuple{N,Int}
+        end
+
+        end
+        """)
+    Base.compilecache(Base.PkgId("RecursiveTypeDef"))
+    (@eval (using RecursiveTypeDef))
+    a = Base.invokelatest(RecursiveTypeDef.A{Float64,2,String}, (3, 3))
+    @test isa(a, AbstractArray)
+end
+
+@testset "issue 46778" begin
+    f46778(::Any, ::Type{Int}) = 1
+    f46778(::Any, ::DataType) = 2
+    @test precompile(Tuple{typeof(f46778), Int, DataType})
+    @test (which(f46778, Tuple{Any,DataType}).specializations::Core.MethodInstance).cache.invoke != C_NULL
+end
+
+
+precompile_test_harness("Module tparams") do load_path
+    write(joinpath(load_path, "ModuleTparams.jl"),
+        """
+        module ModuleTparams
+            module TheTParam
+            end
+
+            struct ParamStruct{T}; end
+            const the_struct = ParamStruct{TheTParam}()
+        end
+        """)
+    Base.compilecache(Base.PkgId("ModuleTparams"))
+    (@eval (using ModuleTparams))
+    @test ModuleTparams.the_struct === Base.invokelatest(ModuleTparams.ParamStruct{ModuleTparams.TheTParam})
+end
+
+precompile_test_harness("PkgCacheInspector") do load_path
+    # Test functionality needed by PkgCacheInspector.jl
+    write(joinpath(load_path, "PCI.jl"),
+        """
+        module PCI
+        Base.repl_cmd() = 55            # external method
+        f() = Base.repl_cmd(7, "hello")   # external specialization (should never exist otherwise)
+        try
+            f()
+        catch
+        end
+        end
+        """)
+    cachefile, ocachefile = Base.compilecache(Base.PkgId("PCI"))
+
+    # Get the depmods
+    local depmods
+    @lock Base.require_lock begin
+        local depmodnames
+        io = open(cachefile, "r")
+        try
+            # isvalid_cache_header returns checksum id or zero
+            Base.isvalid_cache_header(io) == 0 && throw(ArgumentError("Invalid header in cache file $cachefile."))
+            depmodnames = Base.parse_cache_header(io, cachefile)[3]
+            Base.isvalid_file_crc(io) || throw(ArgumentError("Invalid checksum in cache file $cachefile."))
+        finally
+            close(io)
+        end
+        ndeps = length(depmodnames)
+        depmods = Vector{Any}(undef, ndeps)
+        for i in 1:ndeps
+            modkey, build_id = depmodnames[i]
+            dep = Base._tryrequire_from_serialized(modkey, build_id)
+            if !isa(dep, Module)
+                return dep
+            end
+            depmods[i] = dep
+        end
+    end
+
+    if ocachefile !== nothing
+        sv = ccall(:jl_restore_package_image_from_file, Any, (Cstring, Any, Cint, Cstring, Cint), ocachefile, depmods, true, "PCI", false)
+    else
+        sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint, Cstring), cachefile, depmods, true, "PCI")
+    end
+
+    modules, init_order, external_methods, new_specializations, new_method_roots, external_targets, edges = sv
+    m = only(external_methods)
+    @test m.name == :repl_cmd && m.nargs < 2
+    @test any(new_specializations) do ci
+        mi = ci.def
+        mi.specTypes == Tuple{typeof(Base.repl_cmd), Int, String}
+    end
+end
+
+precompile_test_harness("DynamicExpressions") do load_path
+    # https://github.com/JuliaLang/julia/pull/47184#issuecomment-1364716312
+    write(joinpath(load_path, "Float16MWE.jl"),
+        """
+        module Float16MWE
+        struct Node{T}
+            val::T
+        end
+        doconvert(::Type{<:Node}, val) = convert(Float16, val)
+        precompile(Tuple{typeof(doconvert), Type{Node{Float16}}, Float64})
+        end # module Float16MWE
+        """)
+    Base.compilecache(Base.PkgId("Float16MWE"))
+    @eval using Float16MWE
+    @test @invokelatest(Float16MWE.doconvert(Float16MWE.Node{Float16}, -1.2)) === Float16(-1.2)
+end
+
+precompile_test_harness("BadInvalidations") do load_path
+    write(joinpath(load_path, "BadInvalidations.jl"),
+        """
+        module BadInvalidations
+        Base.Experimental.@compiler_options compile=min optimize=1
+        getval() = Base.a_method_to_overwrite_in_test()
+        getval()
+        end # module BadInvalidations
+        """)
+    Base.compilecache(Base.PkgId("BadInvalidations"))
+    @eval Base a_method_to_overwrite_in_test() = inferencebarrier(2)
+    @eval using BadInvalidations
+    @test Base.invokelatest(BadInvalidations.getval) === 2
+end
+
+# https://github.com/JuliaLang/julia/issues/48074
+precompile_test_harness("WindowsCacheOverwrite") do load_path
+    # https://github.com/JuliaLang/julia/pull/47184#issuecomment-1364716312
+    write(joinpath(load_path, "WindowsCacheOverwrite.jl"),
+        """
+        module WindowsCacheOverwrite
+        end # module
+        """)
+    ji, ofile = Base.compilecache(Base.PkgId("WindowsCacheOverwrite"))
+    @eval using WindowsCacheOverwrite
+
+    write(joinpath(load_path, "WindowsCacheOverwrite.jl"),
+        """
+        module WindowsCacheOverwrite
+        f() = "something new"
+        end # module
+        """)
+
+    ji_2, ofile_2 = Base.compilecache(Base.PkgId("WindowsCacheOverwrite"))
+    @test ofile_2 == Base.ocachefile_from_cachefile(ji_2)
+end
+
+precompile_test_harness("Issue #48391") do load_path
+    write(joinpath(load_path, "I48391.jl"),
+        """
+        module I48391
+        struct SurrealFinite <: Real end
+        precompile(Tuple{typeof(Base.isless), SurrealFinite, SurrealFinite})
+        Base.:(<)(x::SurrealFinite, y::SurrealFinite) = "good"
+        end
+        """)
+    ji, ofile = Base.compilecache(Base.PkgId("I48391"))
+    @eval using I48391
+    x = Base.invokelatest(I48391.SurrealFinite)
+    @test Base.invokelatest(isless, x, x) === "good"
+    @test_throws ErrorException isless(x, x)
+end
+
+precompile_test_harness("Generator nospecialize") do load_path
+    write(joinpath(load_path, "GenNoSpec.jl"),
+        """
+        module GenNoSpec
+        @generated function f(x...)
+            :((\$(Base.Meta.quot(x)),))
+        end
+        @assert precompile(Tuple{typeof(which(f, (Any,Any)).generator.gen), Any, Any})
+        end
+        """)
+    ji, ofile = Base.compilecache(Base.PkgId("GenNoSpec"))
+    @eval using GenNoSpec
+end
+
+precompile_test_harness("Issue #50538") do load_path
+    write(joinpath(load_path, "I50538.jl"),
+        """
+        module I50538
+        const newglobal = try
+            Base.newglobal = false
+        catch ex
+            ex isa ErrorException || rethrow()
+            ex
+        end
+        const newtype = try
+            Core.set_binding_type!(Base, :newglobal)
+        catch ex
+            ex isa ErrorException || rethrow()
+            ex
+        end
+        global undefglobal
+        end
+        """)
+    ji, ofile = Base.compilecache(Base.PkgId("I50538"))
+    @eval using I50538
+    @test I50538.newglobal.msg == "Creating a new global in closed module `Base` (`newglobal`) breaks incremental compilation because the side effects will not be permanent."
+    @test I50538.newtype.msg == "Creating a new global in closed module `Base` (`newglobal`) breaks incremental compilation because the side effects will not be permanent."
+    @test_throws(ErrorException("cannot set type for global I50538.undefglobal. It already has a value or is already set to a different type."),
+                 Core.set_binding_type!(I50538, :undefglobal, Int))
+    Core.set_binding_type!(I50538, :undefglobal, Any)
+    @test Core.get_binding_type(I50538, :undefglobal) === Any
+    @test !isdefined(I50538, :undefglobal)
+end
+
+empty!(Base.DEPOT_PATH)
+append!(Base.DEPOT_PATH, original_depot_path)
+empty!(Base.LOAD_PATH)
+append!(Base.LOAD_PATH, original_load_path)
diff --git a/test/project/Extensions/EnvWithDeps/Manifest.toml b/test/project/Extensions/EnvWithDeps/Manifest.toml
new file mode 100644
index 0000000000000..85ff259f0a4d5
--- /dev/null
+++ b/test/project/Extensions/EnvWithDeps/Manifest.toml
@@ -0,0 +1,21 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.9.0-rc3"
+manifest_format = "2.0"
+project_hash = "ec25ff8df3a5e2212a173c3de2c7d716cc47cd36"
+
+[[deps.ExtDep]]
+deps = ["SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.ExtDep2]]
+path = "../ExtDep2"
+uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+version = "0.1.0"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/EnvWithDeps/Project.toml b/test/project/Extensions/EnvWithDeps/Project.toml
new file mode 100644
index 0000000000000..cf020b56fc2e8
--- /dev/null
+++ b/test/project/Extensions/EnvWithDeps/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/EnvWithHasExtensions/Manifest.toml b/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
new file mode 100644
index 0000000000000..8ac961fa1a9a9
--- /dev/null
+++ b/test/project/Extensions/EnvWithHasExtensions/Manifest.toml
@@ -0,0 +1,29 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.9.0-beta4"
+manifest_format = "2.0"
+project_hash = "caa716752e6dff3d77c3de929ebbb5d2024d04ef"
+
+[[deps.ExtDep]]
+deps = ["SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.HasExtensions]]
+path = "../HasExtensions.jl"
+uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
+version = "0.1.0"
+
+    [deps.HasExtensions.extensions]
+    Extension = "ExtDep"
+    ExtensionFolder = ["ExtDep", "ExtDep2"]
+
+    [deps.HasExtensions.weakdeps]
+    ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+    ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/EnvWithHasExtensions/Project.toml b/test/project/Extensions/EnvWithHasExtensions/Project.toml
new file mode 100644
index 0000000000000..8639881ae95c0
--- /dev/null
+++ b/test/project/Extensions/EnvWithHasExtensions/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+HasExtensions = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml b/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml
new file mode 100644
index 0000000000000..66781a5701363
--- /dev/null
+++ b/test/project/Extensions/EnvWithHasExtensionsv2/Manifest.toml
@@ -0,0 +1,25 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.10.0-DEV"
+manifest_format = "2.0"
+project_hash = "caa716752e6dff3d77c3de929ebbb5d2024d04ef"
+
+[[deps.ExtDep]]
+deps = ["SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.HasExtensions]]
+path = "../HasExtensions_v2.jl"
+uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
+version = "0.2.0"
+weakdeps = ["ExtDep"]
+
+    [deps.HasExtensions.extensions]
+    Extension2 = "ExtDep"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/EnvWithHasExtensionsv2/Project.toml b/test/project/Extensions/EnvWithHasExtensionsv2/Project.toml
new file mode 100644
index 0000000000000..8639881ae95c0
--- /dev/null
+++ b/test/project/Extensions/EnvWithHasExtensionsv2/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+HasExtensions = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/ExtDep.jl/Project.toml b/test/project/Extensions/ExtDep.jl/Project.toml
new file mode 100644
index 0000000000000..d246934b7f958
--- /dev/null
+++ b/test/project/Extensions/ExtDep.jl/Project.toml
@@ -0,0 +1,6 @@
+name = "ExtDep"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[deps]
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
diff --git a/test/project/Extensions/ExtDep.jl/src/ExtDep.jl b/test/project/Extensions/ExtDep.jl/src/ExtDep.jl
new file mode 100644
index 0000000000000..1c0022d879f51
--- /dev/null
+++ b/test/project/Extensions/ExtDep.jl/src/ExtDep.jl
@@ -0,0 +1,9 @@
+module ExtDep
+
+# loading this package makes the check for loading extensions trigger
+# which tests #47921
+using SomePackage
+
+struct ExtDepStruct end
+
+end # module ExtDep
diff --git a/test/project/Extensions/ExtDep2/Project.toml b/test/project/Extensions/ExtDep2/Project.toml
new file mode 100644
index 0000000000000..b25b99615b185
--- /dev/null
+++ b/test/project/Extensions/ExtDep2/Project.toml
@@ -0,0 +1,3 @@
+name = "ExtDep2"
+uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+version = "0.1.0"
diff --git a/test/project/Extensions/ExtDep2/src/ExtDep2.jl b/test/project/Extensions/ExtDep2/src/ExtDep2.jl
new file mode 100644
index 0000000000000..969905e25992f
--- /dev/null
+++ b/test/project/Extensions/ExtDep2/src/ExtDep2.jl
@@ -0,0 +1,5 @@
+module ExtDep2
+
+greet() = print("Hello World!")
+
+end # module ExtDep2
diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml
new file mode 100644
index 0000000000000..52542fc822094
--- /dev/null
+++ b/test/project/Extensions/HasDepWithExtensions.jl/Manifest.toml
@@ -0,0 +1,31 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.10.0-DEV"
+manifest_format = "2.0"
+project_hash = "d523b3401f72a1ed34b7b43749fd2655c6b78542"
+
+[[deps.ExtDep]]
+deps = ["SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.ExtDep2]]
+path = "../ExtDep2"
+uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+version = "0.1.0"
+
+[[deps.HasExtensions]]
+path = "../HasExtensions.jl"
+uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
+version = "0.1.0"
+weakdeps = ["ExtDep", "ExtDep2"]
+
+    [deps.HasExtensions.extensions]
+    Extension = "ExtDep"
+    ExtensionFolder = ["ExtDep", "ExtDep2"]
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/HasDepWithExtensions.jl/Project.toml b/test/project/Extensions/HasDepWithExtensions.jl/Project.toml
new file mode 100644
index 0000000000000..8f308a9fbee72
--- /dev/null
+++ b/test/project/Extensions/HasDepWithExtensions.jl/Project.toml
@@ -0,0 +1,8 @@
+name = "HasDepWithExtensions"
+uuid = "d4ef3d4a-8e22-4710-85d8-c6cf2eb9efca"
+version = "0.1.0"
+
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+HasExtensions = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
diff --git a/test/project/Extensions/HasDepWithExtensions.jl/src/HasDepWithExtensions.jl b/test/project/Extensions/HasDepWithExtensions.jl/src/HasDepWithExtensions.jl
new file mode 100644
index 0000000000000..5c1f2d1f301aa
--- /dev/null
+++ b/test/project/Extensions/HasDepWithExtensions.jl/src/HasDepWithExtensions.jl
@@ -0,0 +1,21 @@
+module HasDepWithExtensions
+
+using HasExtensions: HasExtensions, HasExtensionsStruct
+using ExtDep: ExtDepStruct
+# Loading ExtDep makes the extension "Extension" load
+
+const m = Base.get_extension(HasExtensions, :Extension)
+m isa Module || error("extension not loaded during precompilation")
+
+function do_something()
+    HasExtensions.foo(HasExtensionsStruct()) == 1 || error()
+    HasExtensions.foo(ExtDepStruct()) == 2 || error()
+    return true
+end
+
+function __init__()
+    m = Base.get_extension(HasExtensions, :Extension)
+    m isa Module || error("extension not loaded during __init__")
+end
+
+end # module
diff --git a/test/project/Extensions/HasExtensions.jl/Manifest.toml b/test/project/Extensions/HasExtensions.jl/Manifest.toml
new file mode 100644
index 0000000000000..55f7958701a75
--- /dev/null
+++ b/test/project/Extensions/HasExtensions.jl/Manifest.toml
@@ -0,0 +1,7 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.10.0-DEV"
+manifest_format = "2.0"
+project_hash = "c87947f1f1f070eea848950c304d668a112dec3d"
+
+[deps]
diff --git a/test/project/Extensions/HasExtensions.jl/Project.toml b/test/project/Extensions/HasExtensions.jl/Project.toml
new file mode 100644
index 0000000000000..72577de36d65d
--- /dev/null
+++ b/test/project/Extensions/HasExtensions.jl/Project.toml
@@ -0,0 +1,11 @@
+name = "HasExtensions"
+uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
+version = "0.1.0"
+
+[weakdeps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+ExtDep2 = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+
+[extensions]
+Extension = "ExtDep"
+ExtensionFolder = ["ExtDep", "ExtDep2"]
diff --git a/test/project/Extensions/HasExtensions.jl/ext/Extension.jl b/test/project/Extensions/HasExtensions.jl/ext/Extension.jl
new file mode 100644
index 0000000000000..9216c403a485a
--- /dev/null
+++ b/test/project/Extensions/HasExtensions.jl/ext/Extension.jl
@@ -0,0 +1,13 @@
+module Extension
+
+using HasExtensions, ExtDep
+
+HasExtensions.foo(::ExtDep.ExtDepStruct) = 2
+
+function __init__()
+    HasExtensions.ext_loaded = true
+end
+
+const extvar = 1
+
+end
diff --git a/test/project/Extensions/HasExtensions.jl/ext/ExtensionFolder/ExtensionFolder.jl b/test/project/Extensions/HasExtensions.jl/ext/ExtensionFolder/ExtensionFolder.jl
new file mode 100644
index 0000000000000..1fb90d7989ca9
--- /dev/null
+++ b/test/project/Extensions/HasExtensions.jl/ext/ExtensionFolder/ExtensionFolder.jl
@@ -0,0 +1,9 @@
+module ExtensionFolder
+
+using ExtDep, ExtDep2, HasExtensions
+
+function __init__()
+    HasExtensions.ext_folder_loaded = true
+end
+
+end
diff --git a/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl b/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl
new file mode 100644
index 0000000000000..dbfaeec4f8812
--- /dev/null
+++ b/test/project/Extensions/HasExtensions.jl/src/HasExtensions.jl
@@ -0,0 +1,10 @@
+module HasExtensions
+
+struct HasExtensionsStruct end
+
+foo(::HasExtensionsStruct) = 1
+
+ext_loaded = false
+ext_folder_loaded = false
+
+end # module
diff --git a/test/project/Extensions/HasExtensions_v2.jl/Project.toml b/test/project/Extensions/HasExtensions_v2.jl/Project.toml
new file mode 100644
index 0000000000000..5d92a4b138058
--- /dev/null
+++ b/test/project/Extensions/HasExtensions_v2.jl/Project.toml
@@ -0,0 +1,9 @@
+name = "HasExtensions"
+uuid = "4d3288b3-3afc-4bb6-85f3-489fffe514c8"
+version = "0.2.0"
+
+[weakdeps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+
+[extensions]
+Extension2 = "ExtDep"
diff --git a/test/project/Extensions/HasExtensions_v2.jl/ext/Extension2.jl b/test/project/Extensions/HasExtensions_v2.jl/ext/Extension2.jl
new file mode 100644
index 0000000000000..d027adec9c223
--- /dev/null
+++ b/test/project/Extensions/HasExtensions_v2.jl/ext/Extension2.jl
@@ -0,0 +1,3 @@
+module Extension2
+
+end
diff --git a/test/project/Extensions/HasExtensions_v2.jl/src/HasExtensions.jl b/test/project/Extensions/HasExtensions_v2.jl/src/HasExtensions.jl
new file mode 100644
index 0000000000000..dbfaeec4f8812
--- /dev/null
+++ b/test/project/Extensions/HasExtensions_v2.jl/src/HasExtensions.jl
@@ -0,0 +1,10 @@
+module HasExtensions
+
+struct HasExtensionsStruct end
+
+foo(::HasExtensionsStruct) = 1
+
+ext_loaded = false
+ext_folder_loaded = false
+
+end # module
diff --git a/test/project/Extensions/SomePackage/Project.toml b/test/project/Extensions/SomePackage/Project.toml
new file mode 100644
index 0000000000000..b2d43340b39a8
--- /dev/null
+++ b/test/project/Extensions/SomePackage/Project.toml
@@ -0,0 +1,4 @@
+name = "SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+authors = ["Kristoffer <kcarlsson89@gmail.com>"]
+version = "0.1.0"
diff --git a/test/project/Extensions/SomePackage/src/SomePackage.jl b/test/project/Extensions/SomePackage/src/SomePackage.jl
new file mode 100644
index 0000000000000..a41e0b7482bae
--- /dev/null
+++ b/test/project/Extensions/SomePackage/src/SomePackage.jl
@@ -0,0 +1,5 @@
+module SomePackage
+
+greet() = print("Hello World!")
+
+end # module SomePackage
diff --git a/test/project/deps/CovTest.jl/Project.toml b/test/project/deps/CovTest.jl/Project.toml
new file mode 100644
index 0000000000000..97fb2c7d9cfce
--- /dev/null
+++ b/test/project/deps/CovTest.jl/Project.toml
@@ -0,0 +1,3 @@
+name = "CovTest"
+uuid = "f1f4390d-b815-473a-b5dd-5af6e1d717cb"
+version = "0.1.0"
diff --git a/test/project/deps/CovTest.jl/src/CovTest.jl b/test/project/deps/CovTest.jl/src/CovTest.jl
new file mode 100644
index 0000000000000..bd172fc3a00f4
--- /dev/null
+++ b/test/project/deps/CovTest.jl/src/CovTest.jl
@@ -0,0 +1,26 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module CovTest
+
+function foo()
+    x = 1
+    y = 2
+    z = x * y
+    return z
+end
+
+function bar()
+    x = 1
+    y = 2
+    z = x * y
+    return z
+end
+
+if Base.generating_output()
+    # precompile foo but not bar
+    foo()
+end
+
+export foo, bar
+
+end #module
diff --git a/test/ranges.jl b/test/ranges.jl
index 6d038747e706d..8933ab0047c86 100644
--- a/test/ranges.jl
+++ b/test/ranges.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Base.Checked: checked_length
+using InteractiveUtils: code_llvm
 
 @testset "range construction" begin
     @test_throws ArgumentError range(start=1, step=1, stop=2, length=10)
@@ -58,6 +59,9 @@ using Base.Checked: checked_length
     @test last(10:0.2:3) === 9.8
     @test step(10:0.2:3) === 0.2
     @test isempty(10:0.2:3)
+
+    unitrangeerrstr = "promotion of types Char and Char failed to change any arguments"
+    @test_throws unitrangeerrstr UnitRange('a', 'b')
 end
 
 using Dates, Random
@@ -254,6 +258,45 @@ end
     @test x.hi/2 === PhysQuantity{1}(2.0)
     @test_throws ErrorException("Int is incommensurate with PhysQuantity") x/2
     @test zero(typeof(x)) === Base.TwicePrecision(PhysQuantity{1}(0.0))
+
+    function twiceprecision_roundtrip_is_not_lossy(
+        ::Type{S},
+        x::T,
+    ) where {S<:Number, T<:Union{Number,Base.TwicePrecision}}
+        tw = Base.TwicePrecision{S}(x)
+        @test x == T(tw)
+    end
+
+    function twiceprecision_is_normalized(tw::Tw) where {Tw<:Base.TwicePrecision}
+        (hi, lo) = (tw.hi, tw.lo)
+        normalized = Tw(Base.canonicalize2(hi, lo)...)
+        @test (abs(lo) ≤ abs(hi)) & (tw == normalized)
+    end
+
+    rand_twiceprecision(::Type{T}) where {T<:Number} = Base.TwicePrecision{T}(rand(widen(T)))
+
+    rand_twiceprecision_is_ok(::Type{T}) where {T<:Number} = @test !iszero(rand_twiceprecision(T).lo)
+
+    # For this test the `BigFloat` mantissa needs to be just a bit
+    # larger than the `Float64` mantissa
+    setprecision(BigFloat, 70) do
+        n = 10
+        @testset "rand twiceprecision is ok" for T ∈ (Float32, Float64), i ∈ 1:n
+            rand_twiceprecision_is_ok(T)
+        end
+        @testset "twiceprecision roundtrip is not lossy 1" for i ∈ 1:n
+            twiceprecision_roundtrip_is_not_lossy(Float64, rand(BigFloat))
+        end
+        @testset "twiceprecision roundtrip is not lossy 2" for i ∈ 1:n
+            twiceprecision_roundtrip_is_not_lossy(Float64, rand_twiceprecision(Float32))
+        end
+        @testset "twiceprecision normalization 1: Float64 to Float32" for i ∈ 1:n
+            twiceprecision_is_normalized(Base.TwicePrecision{Float32}(rand_twiceprecision(Float64)))
+        end
+        @testset "twiceprecision normalization 2: Float32 to Float64" for i ∈ 1:n
+            twiceprecision_is_normalized(Base.TwicePrecision{Float64}(rand_twiceprecision(Float32)))
+        end
+    end
 end
 @testset "ranges" begin
     @test size(10:1:0) == (0,)
@@ -518,8 +561,10 @@ end
         @test !(3.5 in 1:5)
         @test (3 in 1:5)
         @test (3 in 5:-1:1)
-        #@test (3 in 3+0*(1:5))
-        #@test !(4 in 3+0*(1:5))
+        @test (3 in 3 .+ 0*(1:5))
+        @test !(4 in 3 .+ 0*(1:5))
+        @test 0. in (0. .* (1:10))
+        @test !(0.1 in (0. .* (1:10)))
 
         let r = 0.0:0.01:1.0
             @test (r[30] in r)
@@ -536,8 +581,17 @@ end
             x = (NaN16, Inf32, -Inf64, 1//0, -1//0)
             @test !(x in r)
         end
+
+        @test 1e40 ∉ 0:1.0 # Issue #45747
+        @test 1e20 ∉ 0:1e-20:1e-20
+        @test 1e20 ∉ 0:1e-20
+        @test 1.0  ∉ 0:1e-20:1e-20
+        @test 0.5  ∉ 0:1e-20:1e-20
+        @test 1    ∉ 0:1e-20:1e-20
+
+        @test_broken 17.0 ∈ 0:1e40 # Don't support really long ranges
     end
-    @testset "in() works across types, including non-numeric types (#21728)" begin
+    @testset "in() works across types, including non-numeric types (#21728 and #45646)" begin
         @test 1//1 in 1:3
         @test 1//1 in 1.0:3.0
         @test !(5//1 in 1:3)
@@ -558,14 +612,49 @@ end
         @test !(Complex(1, 0) in Date(2017, 01, 01):Dates.Day(1):Date(2017, 01, 05))
         @test !(π in Date(2017, 01, 01):Dates.Day(1):Date(2017, 01, 05))
         @test !("a" in Date(2017, 01, 01):Dates.Day(1):Date(2017, 01, 05))
+
+        # We use Ducks because of their propensity to stand in a row and because we know
+        # that no additional methods (e.g. isfinite) are defined specifically for Ducks.
+        struct Duck
+            location::Int
+        end
+        Base.:+(x::Duck, y::Int) = Duck(x.location + y)
+        Base.:-(x::Duck, y::Int) = Duck(x.location - y)
+        Base.:-(x::Duck, y::Duck) = x.location - y.location
+        Base.isless(x::Duck, y::Duck) = isless(x.location, y.location)
+
+        @test Duck(3) ∈ Duck(1):2:Duck(5)
+        @test Duck(3) ∈ Duck(5):-2:Duck(2)
+        @test Duck(4) ∉ Duck(5):-2:Duck(1)
+        @test Duck(4) ∈ Duck(1):Duck(5)
+        @test Duck(0) ∉ Duck(1):Duck(5)
     end
 end
 @testset "indexing range with empty range (#4309)" begin
-    @test (3:6)[5:4] === 7:6
+    @test (@inferred (3:6)[5:4]) === 7:6
     @test_throws BoundsError (3:6)[5:5]
     @test_throws BoundsError (3:6)[5]
-    @test (0:2:10)[7:6] === 12:2:10
+    @test (@inferred (0:2:10)[7:6]) === 12:2:11
     @test_throws BoundsError (0:2:10)[7:7]
+
+    for start in [true], stop in [true, false]
+        @test (@inferred (start:stop)[1:0]) === true:false
+    end
+    @test (@inferred (true:false)[true:false]) == true:false
+
+    @testset "issue #40760" begin
+        empty_range = 1:0
+        r = range(false, length = 0)
+        @test r isa UnitRange && first(r) == 0 && last(r) == -1
+        r = (true:true)[empty_range]
+        @test r isa UnitRange && first(r) == true && last(r) == false
+        @testset for r in Any[true:true, true:true:true, 1:2, 1:1:2]
+            @test (@inferred r[1:0]) isa AbstractRange
+            @test r[1:0] == empty_range
+            @test (@inferred r[1:1:0]) isa AbstractRange
+            @test r[1:1:0] == empty_range
+        end
+    end
 end
 # indexing with negative ranges (#8351)
 for a=AbstractRange[3:6, 0:2:10], b=AbstractRange[0:1, 2:-1:0]
@@ -860,7 +949,15 @@ function range_fuzztests(::Type{T}, niter, nrange) where {T}
         @test m == length(r)
         @test strt == first(r)
         @test Δ == step(r)
-        @test_skip stop ≈ last(r)
+        # potential floating point error:
+        #   stop = strt + (n-1)*Δ
+        #      *          error <= eps((n-1)*Δ)/2 <= abs((n-1)*Δ)/2 * eps(T)
+        #      +          error <= eps(stop)/2    <= abs(stop)/2    * eps(T)
+        #   last(r)
+        #     rat(strt)   error <= eps(strt)/2    <= abs(strt)/2    * eps(T)
+        #     rat(Δ)      error <= (n-1)*eps(Δ)/2 <= abs((n-1)*Δ)/2 * eps(T)
+        #     T(...)      error <= eps(last(r))/2 <= abs(stop)/2    * eps(T)
+        @test stop ≈ last(r) atol = (abs(strt)/2 + (n-1)*abs(Δ) + abs(stop)) * eps(T)
         l = range(strt, stop=stop, length=n)
         @test n == length(l)
         @test strt == first(l)
@@ -929,6 +1026,7 @@ end
         end
         a = prevfloat(a)
     end
+    @test (1:2:3)[StepRangeLen{Bool}(true,-1,2)] == [1]
 end
 
 # issue #20380
@@ -1208,6 +1306,8 @@ end
 
     @test sprint(show, UnitRange(1, 2)) == "1:2"
     @test sprint(show, StepRange(1, 2, 5)) == "1:2:5"
+
+    @test sprint(show, LinRange{Float32}(1.5, 2.5, 10)) == "LinRange{Float32}(1.5, 2.5, 10)"
 end
 
 @testset "Issue 11049, and related" begin
@@ -1724,6 +1824,7 @@ Base.div(x::Displacement, y::Displacement) = Displacement(div(x.val, y.val))
 # required for collect (summing lengths); alternatively, should length return Int by default?
 Base.promote_rule(::Type{Displacement}, ::Type{Int}) = Int
 Base.convert(::Type{Int}, x::Displacement) = x.val
+Base.Int(x::Displacement) = x.val
 
 # Unsigned complement, for testing checked_length
 struct UPosition <: Unsigned
@@ -2031,8 +2132,17 @@ end
 end
 
 @testset "length(StepRange()) type stability" begin
-    typeof(length(StepRange(1,Int128(1),1))) == typeof(length(StepRange(1,Int128(1),0)))
-    typeof(checked_length(StepRange(1,Int128(1),1))) == typeof(checked_length(StepRange(1,Int128(1),0)))
+    for SR in (StepRange{Int,Int128}, StepRange{Int8,Int128})
+        r1, r2 = SR(1, 1, 1), SR(1, 1, 0)
+        @test typeof(length(r1)) == typeof(checked_length(r1)) ==
+              typeof(length(r2)) == typeof(checked_length(r2))
+    end
+    SR = StepRange{Union{Int64,Int128},Int}
+    test_length(r, l) = length(r) === checked_length(r) === l
+    @test test_length(SR(Int64(1), 1, Int128(1)), Int128(1))
+    @test test_length(SR(Int64(1), 1, Int128(0)), Int128(0))
+    @test test_length(SR(Int64(1), 1, Int64(1)), Int64(1))
+    @test test_length(SR(Int64(1), 1, Int64(0)), Int64(0))
 end
 
 @testset "LinRange eltype for element types that wrap integers" begin
@@ -2215,6 +2325,7 @@ end
     @test_throws BoundsError r[true:true:false]
     @test_throws BoundsError r[true:true:true]
 end
+
 @testset "Non-Int64 endpoints that are identical (#39798)" begin
     for T in DataType[Float16,Float32,Float64,Bool,Int8,Int16,Int32,Int64,Int128,UInt8,UInt16,UInt32,UInt64,UInt128],
         r in [ LinRange(1, 1, 10), StepRangeLen(7, 0, 5) ]
@@ -2346,3 +2457,125 @@ end
 @test isempty(range(typemax(Int), length=0, step=UInt(2)))
 
 @test length(range(1, length=typemax(Int128))) === typemax(Int128)
+
+@testset "firstindex(::StepRange{<:Base.BitInteger})" begin
+    test_firstindex(x) = firstindex(x) === first(Base.axes1(x))
+    for T in Base.BitInteger_types, S in Base.BitInteger_types
+        @test test_firstindex(StepRange{T,S}(1, 1, 1))
+        @test test_firstindex(StepRange{T,S}(1, 1, 0))
+    end
+    @test test_firstindex(StepRange{Union{Int64,Int128},Int}(Int64(1), 1, Int128(1)))
+    @test test_firstindex(StepRange{Union{Int64,Int128},Int}(Int64(1), 1, Int128(0)))
+end
+
+@testset "PR 49516" begin
+    struct PR49516 <: Signed
+        n::Int
+    end
+    PR49516(f::PR49516) = f
+    Base.:*(x::Integer, f::PR49516) = PR49516(*(x, f.n))
+    Base.:+(f1::PR49516, f2::PR49516) = PR49516(+(f1.n, f2.n))
+    Base.show(io::IO, f::PR49516) = print(io, "PR49516(", f.n, ")")
+
+    srl = StepRangeLen(PR49516(1), PR49516(2), 10)
+    @test sprint(show, srl) == "PR49516(1):PR49516(2):PR49516(19)"
+end
+
+@testset "Inline StepRange Construction #49270" begin
+    x = rand(Float32, 80)
+    a = rand(round(Int, length(x) / 2):length(x), 10^6)
+
+    function test(x, a)
+        c = zero(Float32)
+
+        @inbounds for j in a
+            for i in 1:8:j
+                c += x[i]
+            end
+        end
+
+        return c
+    end
+
+    llvm_ir(f, args) = sprint((io, args...) -> code_llvm(io, args...; debuginfo=:none), f, Base.typesof(args...))
+
+    ir = llvm_ir(test, (x, a))
+    @test !occursin("steprange_last", ir)
+    @test !occursin("_colon", ir)
+    @test !occursin("StepRange", ir)
+end
+
+# DimensionMismatch and LazyString
+function check_ranges(rx, ry)
+    if length(rx) != length(ry)
+        throw(DimensionMismatch(lazy"length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
+    end
+    rx, ry
+end
+@test Core.Compiler.is_foldable(Base.infer_effects(check_ranges, (UnitRange{Int},UnitRange{Int})))
+# TODO JET.@test_opt check_ranges(1:2, 3:4)
+
+@testset "checkbounds overflow (#26623)" begin
+    # the reported issue:
+    @test_throws BoundsError (1:3:4)[typemax(Int)÷3*2+3]
+
+    # a case that using mul_with_overflow & add_with_overflow might get wrong:
+    @test (-10:2:typemax(Int))[typemax(Int)÷2+2] == typemax(Int)-9
+end
+
+@testset "collect with specialized vcat" begin
+    struct OneToThree <: AbstractUnitRange{Int} end
+    Base.size(r::OneToThree) = (3,)
+    Base.first(r::OneToThree) = 1
+    Base.length(r::OneToThree) = 3
+    Base.last(r::OneToThree) = 3
+    function Base.getindex(r::OneToThree, i::Int)
+        checkbounds(r, i)
+        i
+    end
+    Base.vcat(r::OneToThree) = r
+    r = OneToThree()
+    a = Array(r)
+    @test a isa Vector{Int}
+    @test a == r
+    @test collect(r) isa Vector{Int}
+    @test collect(r) == r
+end
+
+@testset "isassigned" begin
+    for (r, val) in ((1:3, 3), (1:big(2)^65, big(2)^65))
+        @test isassigned(r, lastindex(r))
+        # test that the indexing actually succeeds
+        @test r[end] == val
+        @test_throws ArgumentError isassigned(r, true)
+    end
+
+end
+
+@testset "unsigned index #44895" begin
+    x = range(-1,1,length=11)
+    @test x[UInt(1)] == -1.0
+    a = StepRangeLen(1,2,3,2)
+    @test a[UInt(1)] == -1
+end
+
+@testset "StepRangeLen of CartesianIndex-es" begin
+    CIstart = CartesianIndex(2,3)
+    CIstep = CartesianIndex(1,1)
+    r = StepRangeLen(CIstart, CIstep, 4)
+    @test length(r) == 4
+    @test first(r) == CIstart
+    @test step(r) == CIstep
+    @test last(r) == CartesianIndex(5,6)
+    @test r[2] == CartesianIndex(3,4)
+
+    @test repr(r) == "StepRangeLen($CIstart, $CIstep, 4)"
+
+    r = StepRangeLen(CartesianIndex(), CartesianIndex(), 3)
+    @test all(==(CartesianIndex()), r)
+    @test length(r) == 3
+    @test repr(r) == "StepRangeLen(CartesianIndex(), CartesianIndex(), 3)"
+
+    errmsg = ("deliberately unsupported for CartesianIndex", "StepRangeLen")
+    @test_throws errmsg range(CartesianIndex(1), step=CartesianIndex(1), length=3)
+end
diff --git a/test/rational.jl b/test/rational.jl
index 9f47f2cb9dd16..118f2e6289228 100644
--- a/test/rational.jl
+++ b/test/rational.jl
@@ -28,20 +28,30 @@ using Test
     @test (1//typemax(Int)) / (1//typemax(Int)) == 1
     @test_throws OverflowError (1//2)^63
     @test inv((1+typemin(Int))//typemax(Int)) == -1
-    @test_throws ArgumentError inv(typemin(Int)//typemax(Int))
-    @test_throws ArgumentError Rational(0x1, typemin(Int32))
+    @test_throws OverflowError inv(typemin(Int)//typemax(Int))
+    @test_throws OverflowError Rational(0x1, typemin(Int32))
 
     @test @inferred(rationalize(Int, 3.0, 0.0)) === 3//1
     @test @inferred(rationalize(Int, 3.0, 0)) === 3//1
+    @test @inferred(rationalize(Int, 33//100; tol=0.1)) === 1//3 # because tol
+    @test @inferred(rationalize(Int, 3; tol=0.0)) === 3//1
+    @test @inferred(rationalize(Int8, 1000//333)) === Rational{Int8}(3//1)
+    @test @inferred(rationalize(Int8, 1000//3)) === Rational{Int8}(1//0)
+    @test @inferred(rationalize(Int8, 1000)) === Rational{Int8}(1//0)
     @test_throws OverflowError rationalize(UInt, -2.0)
     @test_throws ArgumentError rationalize(Int, big(3.0), -1.)
     # issue 26823
     @test_throws InexactError rationalize(Int, NaN)
     # issue 32569
-    @test_throws ArgumentError 1 // typemin(Int)
+    @test_throws OverflowError 1 // typemin(Int)
     @test_throws ArgumentError 0 // 0
     @test -2 // typemin(Int) == -1 // (typemin(Int) >> 1)
     @test 2 // typemin(Int) == 1 // (typemin(Int) >> 1)
+    # issue 32443
+    @test Int8(-128)//Int8(1) == -128
+    @test_throws OverflowError Int8(-128)//Int8(-1)
+    @test_throws OverflowError Int8(-1)//Int8(-128)
+    @test Int8(-128)//Int8(-2) == 64
 
     @test_throws InexactError Rational(UInt(1), typemin(Int32))
     @test iszero(Rational{Int}(UInt(0), 1))
@@ -253,6 +263,10 @@ end
     rational2 = Rational(-4500, 9000)
     @test sprint(show, rational1) == "1465//8593"
     @test sprint(show, rational2) == "-1//2"
+    @test sprint(show, -2//2) == "-1//1"
+    @test sprint(show, [-2//2,]) == "Rational{$Int}[-1]"
+    @test sprint(show, MIME"text/plain"(), Union{Int, Rational{Int}}[7 3//6; 6//3 2]) ==
+        "2×2 Matrix{Union{Rational{$Int}, $Int}}:\n  7    1//2\n 2//1   2"
     let
         io1 = IOBuffer()
         write(io1, rational1)
@@ -265,6 +279,9 @@ end
         @test read(io2, typeof(rational2)) == rational2
     end
 end
+@testset "abs overflow for Rational" begin
+    @test_throws OverflowError abs(typemin(Int) // 1)
+end
 @testset "parse" begin
     # Non-negative Int in which parsing is expected to work
     @test parse(Rational{Int}, string(10)) == 10 // 1
@@ -525,6 +542,7 @@ end
              100798//32085
              103993//33102
              312689//99532 ]
+    @test rationalize(pi) === rationalize(BigFloat(pi))
 end
 
 @testset "issue #12536" begin
@@ -720,3 +738,10 @@ end
     @test rationalize(1.192 + 2.233im) == 149//125 + 2233//1000*im
     @test rationalize(Int8, 1.192 + 2.233im) == 118//99 + 67//30*im
 end
+@testset "rationalize(Complex) with tol" begin
+    # test: rationalize(x::Complex; kvs...)
+    precise_next = 7205759403792795//72057594037927936
+    @assert Float64(precise_next) == nextfloat(0.1)
+    @test rationalize(Int64, nextfloat(0.1) * im; tol=0) == precise_next * im
+    @test rationalize(0.1im; tol=eps(0.1)) == rationalize(0.1im)
+end
diff --git a/test/read.jl b/test/read.jl
index b8060a023333f..283381668c28a 100644
--- a/test/read.jl
+++ b/test/read.jl
@@ -145,6 +145,7 @@ for (name, f) in l
 
     verbose && println("$name readuntil...")
     for (t, s, m, kept) in [
+            ("a", "", "", ""),
             ("a", "ab", "a", "a"),
             ("b", "ab", "b", "b"),
             ("α", "αγ", "α", "α"),
@@ -152,16 +153,19 @@ for (name, f) in l
             ("bc", "abc", "bc", "bc"),
             ("αβ", "αβγ", "αβ", "αβ"),
             ("aaabc", "ab", "aa", "aaab"),
+            ("aaabc", "b", "aaa", "aaab"),
             ("aaabc", "ac", "aaabc", "aaabc"),
             ("aaabc", "aab", "a", "aaab"),
             ("aaabc", "aac", "aaabc", "aaabc"),
             ("αααβγ", "αβ", "αα", "αααβ"),
+            ("αααβγ", "β", "ααα", "αααβ"),
             ("αααβγ", "ααβ", "α", "αααβ"),
             ("αααβγ", "αγ", "αααβγ", "αααβγ"),
             ("barbarbarians", "barbarian", "bar", "barbarbarian"),
             ("abcaabcaabcxl", "abcaabcx", "abca", "abcaabcaabcx"),
             ("abbaabbaabbabbaax", "abbaabbabbaax", "abba", "abbaabbaabbabbaax"),
             ("abbaabbabbaabbaabbabbaax", "abbaabbabbaax", "abbaabbabba", "abbaabbabbaabbaabbabbaax"),
+            ('a'^500 * 'x' * "bbbb", "x", 'a'^500, 'a'^500 * 'x'),
            ]
         local t, s, m, kept
         @test readuntil(io(t), s) == m
@@ -174,6 +178,18 @@ for (name, f) in l
         @test readuntil(io(t), unsafe_wrap(Vector{UInt8},s), keep=true) == unsafe_wrap(Vector{UInt8},kept)
         @test readuntil(io(t), collect(s)::Vector{Char}) == Vector{Char}(m)
         @test readuntil(io(t), collect(s)::Vector{Char}, keep=true) == Vector{Char}(kept)
+
+        buf = IOBuffer()
+        @test String(take!(copyuntil(buf, io(t), s))) == m
+        @test String(take!(copyuntil(buf, io(t), s, keep=true))) == kept
+        file = tempname()
+        for (k,m) in ((false, m), (true, kept))
+            open(file, "w") do f
+                @test f == copyuntil(f, io(t), s, keep=k)
+            end
+            @test read(file, String) == m
+        end
+        rm(file)
     end
     cleanup()
 
@@ -281,8 +297,45 @@ for (name, f) in l
         cleanup()
 
         verbose && println("$name readline...")
-        @test readline(io(), keep=true) == readline(IOBuffer(text), keep=true)
-        @test readline(io(), keep=true) == readline(filename, keep=true)
+        file = tempname()
+        for lineending in ("\n", "\r\n", "")
+            kept = "foo bar" * lineending
+            t = isempty(lineending) ? "foo bar" : kept * "baz\n"
+            write(file, t)
+            @test readline(io(t)) == readline(file) == "foo bar"
+            @test readline(io(t), keep=true) == readline(file, keep=true) == kept
+
+            @test String(take!(copyline(IOBuffer(), file))) == "foo bar"
+            @test String(take!(copyline(IOBuffer(), file, keep=true))) == kept
+
+            cleanup()
+
+            buf = IOBuffer()
+            @test buf === copyline(buf, io(t))
+            @test String(take!(buf)) == "foo bar"
+            @test String(take!(copyline(buf, file, keep=true))) == kept
+            for keep in (true, false)
+                open(file, "w") do f
+                    @test f === copyline(f, io(t), keep=keep)
+                end
+                @test read(file, String) == (keep ? kept : "foo bar")
+            end
+
+            cleanup()
+
+            write(file, lineending)
+            @test readline(IOBuffer(lineending)) == ""
+            @test readline(IOBuffer(lineending), keep=true) == lineending
+            @test String(take!(copyline(IOBuffer(), IOBuffer(lineending)))) == ""
+            @test String(take!(copyline(IOBuffer(), IOBuffer(lineending), keep=true))) == lineending
+            @test readline(file) == ""
+            @test readline(file, keep=true) == lineending
+            @test String(take!(copyline(IOBuffer(), file))) == ""
+            @test String(take!(copyline(IOBuffer(), file, keep=true))) == lineending
+
+            cleanup()
+        end
+        rm(file)
 
         verbose && println("$name readlines...")
         @test readlines(io(), keep=true) == readlines(IOBuffer(text), keep=true)
@@ -624,6 +677,21 @@ end
     @test  isempty(itr) # now it is empty
 end
 
+@testset "readuntil/copyuntil fallbacks" begin
+    # test fallback for generic delim::T
+    buf = IOBuffer()
+    fib = [1,1,2,3,5,8,13,21]
+    write(buf, fib)
+    @test readuntil(seekstart(buf), 21) == fib[1:end-1]
+    @test readuntil(buf, 21) == Int[]
+    @test readuntil(seekstart(buf), 21; keep=true) == fib
+    out = IOBuffer()
+    @test copyuntil(out, seekstart(buf), 21) === out
+    @test reinterpret(Int, take!(out)) == fib[1:end-1]
+    @test copyuntil(out, seekstart(buf), 21; keep=true) === out
+    @test reinterpret(Int, take!(out)) == fib
+end
+
 # more tests for reverse(eachline)
 @testset "reverse(eachline)" begin
     lines = vcat(repr.(1:4), ' '^50000 .* repr.(5:10), repr.(11:10^5))
@@ -652,3 +720,21 @@ end
         @test isempty(r) && isempty(collect(r))
     end
 end
+
+@testset "Ref API" begin
+    io = PipeBuffer()
+    @test write(io, Ref{Any}(0xabcd_1234)) === 4
+    @test read(io, UInt32) === 0xabcd_1234
+    @test_throws ErrorException("write cannot copy from a Ptr") invoke(write, Tuple{typeof(io), Ref{Cvoid}}, io, C_NULL)
+    @test_throws ErrorException("write cannot copy from a Ptr") invoke(write, Tuple{typeof(io), Ref{Int}}, io, Ptr{Int}(0))
+    @test_throws ErrorException("write cannot copy from a Ptr") invoke(write, Tuple{typeof(io), Ref{Any}}, io, Ptr{Any}(0))
+    @test_throws ErrorException("read! cannot copy into a Ptr") read!(io, C_NULL)
+    @test_throws ErrorException("read! cannot copy into a Ptr") read!(io, Ptr{Int}(0))
+    @test_throws ErrorException("read! cannot copy into a Ptr") read!(io, Ptr{Any}(0))
+    @test eof(io)
+    @test write(io, C_NULL) === sizeof(Int)
+    @test write(io, Ptr{Int}(4)) === sizeof(Int)
+    @test write(io, Ptr{Any}(5)) === sizeof(Int)
+    @test read!(io, Int[1, 2, 3]) == [0, 4, 5]
+    @test eof(io)
+end
diff --git a/test/reduce.jl b/test/reduce.jl
index db8c97f2f80ca..f5140c8a34bd9 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -33,8 +33,12 @@ using .Main.OffsetArrays
 
 @test Base.mapfoldr(abs2, -, 2:5) == -14
 @test Base.mapfoldr(abs2, -, 2:5; init=10) == -4
-@test @inferred(mapfoldr(x -> x + 1, (x, y) -> (x, y...), (1, 2.0, '3');
-                         init = ())) == (2, 3.0, '4')
+for t in Any[(1, 2.0, '3'), (;a = 1, b = 2.0, c = '3')]
+    @test @inferred(mapfoldr(x -> x + 1, (x, y) -> (x, y...), t;
+                            init = ())) == (2, 3.0, '4')
+    @test @inferred(mapfoldl(x -> x + 1, (x, y) -> (x..., y), t;
+                            init = ())) == (2, 3.0, '4')
+end
 
 @test foldr((x, y) -> ('⟨' * x * '|' * y * '⟩'), "λ 🐨.α") == "⟨λ|⟨ |⟨🐨|⟨.|α⟩⟩⟩⟩" # issue #31780
 let x = rand(10)
@@ -49,8 +53,8 @@ end
 @test reduce(max, [8 6 7 5 3 0 9]) == 9
 @test reduce(+, 1:5; init=1000) == (1000 + 1 + 2 + 3 + 4 + 5)
 @test reduce(+, 1) == 1
-@test_throws "reducing with * over an empty collection of element type Union{} is not allowed" reduce(*, ())
-@test_throws "reducing with * over an empty collection of element type Union{} is not allowed" reduce(*, Union{}[])
+@test_throws "reducing over an empty collection is not allowed" reduce(*, ())
+@test_throws "reducing over an empty collection is not allowed" reduce(*, Union{}[])
 
 # mapreduce
 @test mapreduce(-, +, [-10 -9 -3]) == ((10 + 9) + 3)
@@ -87,8 +91,7 @@ end
 @test mapreduce(abs2, *, Float64[]) === 1.0
 @test mapreduce(abs2, max, Float64[]) === 0.0
 @test mapreduce(abs, max, Float64[]) === 0.0
-@test_throws ["reducing over an empty collection is not allowed",
-              "consider supplying `init`"] mapreduce(abs2, &, Float64[])
+@test_throws "reducing over an empty collection is not allowed" mapreduce(abs2, &, Float64[])
 @test_throws str -> !occursin("Closest candidates are", str) mapreduce(abs2, &, Float64[])
 @test_throws "reducing over an empty collection is not allowed" mapreduce(abs2, |, Float64[])
 
@@ -140,9 +143,8 @@ fz = float(z)
 @test sum(z) === 136
 @test sum(fz) === 136.0
 
-@test_throws "reducing with add_sum over an empty collection of element type Union{} is not allowed" sum(Union{}[])
-@test_throws ["reducing over an empty collection is not allowed",
-              "consider supplying `init`"] sum(sin, Int[])
+@test_throws "reducing over an empty collection is not allowed" sum(Union{}[])
+@test_throws "reducing over an empty collection is not allowed" sum(sin, Int[])
 @test sum(sin, 3) == sin(3.0)
 @test sum(sin, [3]) == sin(3.0)
 a = sum(sin, z)
@@ -160,12 +162,14 @@ plus(x,y) = x + y
 sum3(A) = reduce(plus, A)
 sum4(itr) = invoke(reduce, Tuple{Function, Any}, plus, itr)
 sum5(A) = reduce(plus, A; init=0)
-sum6(itr) = invoke(Core.kwfunc(reduce), Tuple{NamedTuple{(:init,), Tuple{Int}}, typeof(reduce), Function, Any}, (init=0,), reduce, plus, itr)
+sum6(itr) = invoke(Core.kwcall, Tuple{NamedTuple{(:init,), Tuple{Int}}, typeof(reduce), Function, Any}, (init=0,), reduce, plus, itr)
+sum61(itr) = invoke(reduce, Tuple{Function, Any}, init=0, plus, itr)
 sum7(A) = mapreduce(x->x, plus, A)
 sum8(itr) = invoke(mapreduce, Tuple{Function, Function, Any}, x->x, plus, itr)
 sum9(A) = mapreduce(x->x, plus, A; init=0)
-sum10(itr) = invoke(Core.kwfunc(mapreduce), Tuple{NamedTuple{(:init,),Tuple{Int}}, typeof(mapreduce), Function, Function, Any}, (init=0,), mapreduce, x->x, plus, itr)
-for f in (sum2, sum5, sum6, sum9, sum10)
+sum10(itr) = invoke(Core.kwcall, Tuple{NamedTuple{(:init,),Tuple{Int}}, typeof(mapreduce), Function, Function, Any}, (init=0,), mapreduce, x->x, plus, itr)
+sum11(itr) = invoke(mapreduce, Tuple{Function, Function, Any}, init=0, x->x, plus, itr)
+for f in (sum2, sum5, sum6, sum61, sum9, sum10, sum11)
     @test sum(z) == f(z)
     @test sum(Int[]) == f(Int[]) == 0
     @test sum(Int[7]) == f(Int[7]) == 7
@@ -263,7 +267,6 @@ let x = [4,3,5,2]
     @test maximum(x) == 5
     @test minimum(x) == 2
     @test extrema(x) == (2, 5)
-    @test Core.Compiler.extrema(x) == (2, 5)
 
     @test maximum(abs2, x) == 25
     @test minimum(abs2, x) == 4
@@ -434,39 +437,39 @@ end
 
 # any & all
 
-@test @inferred any([]) == false
-@test @inferred any(Bool[]) == false
-@test @inferred any([true]) == true
-@test @inferred any([false, false]) == false
-@test @inferred any([false, true]) == true
-@test @inferred any([true, false]) == true
-@test @inferred any([true, true]) == true
-@test @inferred any([true, true, true]) == true
-@test @inferred any([true, false, true]) == true
-@test @inferred any([false, false, false]) == false
-
-@test @inferred all([]) == true
-@test @inferred all(Bool[]) == true
-@test @inferred all([true]) == true
-@test @inferred all([false, false]) == false
-@test @inferred all([false, true]) == false
-@test @inferred all([true, false]) == false
-@test @inferred all([true, true]) == true
-@test @inferred all([true, true, true]) == true
-@test @inferred all([true, false, true]) == false
-@test @inferred all([false, false, false]) == false
-
-@test @inferred any(x->x>0, []) == false
-@test @inferred any(x->x>0, Int[]) == false
-@test @inferred any(x->x>0, [-3]) == false
-@test @inferred any(x->x>0, [4]) == true
-@test @inferred any(x->x>0, [-3, 4, 5]) == true
-
-@test @inferred all(x->x>0, []) == true
-@test @inferred all(x->x>0, Int[]) == true
-@test @inferred all(x->x>0, [-3]) == false
-@test @inferred all(x->x>0, [4]) == true
-@test @inferred all(x->x>0, [-3, 4, 5]) == false
+@test @inferred(Union{Missing,Bool}, any([])) == false
+@test @inferred(any(Bool[])) == false
+@test @inferred(any([true])) == true
+@test @inferred(any([false, false])) == false
+@test @inferred(any([false, true])) == true
+@test @inferred(any([true, false])) == true
+@test @inferred(any([true, true])) == true
+@test @inferred(any([true, true, true])) == true
+@test @inferred(any([true, false, true])) == true
+@test @inferred(any([false, false, false])) == false
+
+@test @inferred(Union{Missing,Bool}, all([])) == true
+@test @inferred(all(Bool[])) == true
+@test @inferred(all([true])) == true
+@test @inferred(all([false, false])) == false
+@test @inferred(all([false, true])) == false
+@test @inferred(all([true, false])) == false
+@test @inferred(all([true, true])) == true
+@test @inferred(all([true, true, true])) == true
+@test @inferred(all([true, false, true])) == false
+@test @inferred(all([false, false, false])) == false
+
+@test @inferred(Union{Missing,Bool}, any(x->x>0, [])) == false
+@test @inferred(any(x->x>0, Int[])) == false
+@test @inferred(any(x->x>0, [-3])) == false
+@test @inferred(any(x->x>0, [4])) == true
+@test @inferred(any(x->x>0, [-3, 4, 5])) == true
+
+@test @inferred(Union{Missing,Bool}, all(x->x>0, [])) == true
+@test @inferred(all(x->x>0, Int[])) == true
+@test @inferred(all(x->x>0, [-3])) == false
+@test @inferred(all(x->x>0, [4])) == true
+@test @inferred(all(x->x>0, [-3, 4, 5])) == false
 
 @test reduce((a, b) -> a .| b, fill(trues(5), 24))  == trues(5)
 @test reduce((a, b) -> a .| b, fill(falses(5), 24)) == falses(5)
@@ -677,3 +680,55 @@ end
         @test mapreduce(+, +, oa, oa) == 2len
     end
 end
+
+# issue #45748
+@testset "foldl's stability for nested Iterators" begin
+    a = Iterators.flatten((1:3, 1:3))
+    b = (2i for i in a if i > 0)
+    c = Base.Generator(Float64, b)
+    d = (sin(i) for i in c if i > 0)
+    @test @inferred(sum(d)) == sum(collect(d))
+    @test @inferred(extrema(d)) == extrema(collect(d))
+    @test @inferred(maximum(c)) == maximum(collect(c))
+    @test @inferred(prod(b)) == prod(collect(b))
+    @test @inferred(minimum(a)) == minimum(collect(a))
+end
+
+function fold_alloc(a)
+    sum(a)
+    foldr(+, a)
+    max(@allocated(sum(a)), @allocated(foldr(+, a)))
+end
+let a = NamedTuple(Symbol(:x,i) => i for i in 1:33),
+    b = (a...,)
+    @test fold_alloc(a) == fold_alloc(b) == 0
+end
+
+@testset "concrete eval `[any|all](f, itr::Tuple)`" begin
+    intf = in((1,2,3)); Intf = typeof(intf)
+    symf = in((:one,:two,:three)); Symf = typeof(symf)
+    @test Core.Compiler.is_foldable(Base.infer_effects(intf, (Int,)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(symf, (Symbol,)))
+    @test Core.Compiler.is_foldable(Base.infer_effects(all, (Intf,Tuple{Int,Int,Int})))
+    @test Core.Compiler.is_foldable(Base.infer_effects(all, (Symf,Tuple{Symbol,Symbol,Symbol})))
+    @test Core.Compiler.is_foldable(Base.infer_effects(any, (Intf,Tuple{Int,Int,Int})))
+    @test Core.Compiler.is_foldable(Base.infer_effects(any, (Symf,Tuple{Symbol,Symbol,Symbol})))
+    @test Base.return_types() do
+        Val(all(in((1,2,3)), (1,2,3)))
+    end |> only == Val{true}
+    @test Base.return_types() do
+        Val(all(in((1,2,3)), (1,2,3,4)))
+    end |> only == Val{false}
+    @test Base.return_types() do
+        Val(any(in((1,2,3)), (4,5,3)))
+    end |> only == Val{true}
+    @test Base.return_types() do
+        Val(any(in((1,2,3)), (4,5,6)))
+    end |> only == Val{false}
+    @test Base.return_types() do
+        Val(all(in((:one,:two,:three)),(:three,:four)))
+    end |> only == Val{false}
+    @test Base.return_types() do
+        Val(any(in((:one,:two,:three)),(:four,:three)))
+    end |> only == Val{true}
+end
diff --git a/test/reducedim.jl b/test/reducedim.jl
index 5402376744e82..f4767dd7a472c 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -6,7 +6,16 @@ using Random
 
 # issue #35800
 # tested very early since it can be state-dependent
-@test @inferred(mapreduce(x->count(!iszero,x), +, [rand(1)]; init = 0.)) == 1.0
+
+function my_simple_count(pred, g::Vector{T}) where {T}
+    n::T = zero(T)
+    for x in g
+        n += pred(x)
+    end
+    return n
+end
+
+@test @inferred(mapreduce(x->my_simple_count(!iszero,x), +, [rand(1)]; init = 0.)) == 1.0
 
 function safe_mapslices(op, A, region)
     newregion = intersect(region, 1:ndims(A))
@@ -599,7 +608,7 @@ end
 end
 @testset "NaN/missing test for extrema with dims #43599" begin
     for sz = (3, 10, 100)
-        for T in (Int, Float64, BigFloat)
+        for T in (Int, Float64, BigFloat, BigInt)
             Aₘ = Matrix{Union{T, Missing}}(rand(-sz:sz, sz, sz))
             Aₘ[rand(1:sz*sz, sz)] .= missing
             unordered_test_for_extrema(Aₘ)
@@ -613,9 +622,16 @@ end
         end
     end
 end
-@test_broken minimum([missing;BigInt(1)], dims = 1)
-@test_broken maximum([missing;BigInt(1)], dims = 1)
-@test_broken extrema([missing;BigInt(1)], dims = 1)
+
+@testset "minimum/maximum over dims with missing (#35308)" begin
+    for T in (Int, Float64, BigInt, BigFloat)
+        x = Union{T, Missing}[1 missing; 2 missing]
+        @test isequal(minimum(x, dims=1), reshape([1, missing], 1, :))
+        @test isequal(maximum(x, dims=1), reshape([2, missing], 1, :))
+        @test isequal(minimum(x, dims=2), reshape([missing, missing], :, 1))
+        @test isequal(maximum(x, dims=2), reshape([missing, missing], :, 1))
+    end
+end
 
 # issue #26709
 @testset "dimensional reduce with custom non-bitstype types" begin
diff --git a/test/reflection.jl b/test/reflection.jl
index 5fd1be83ce01e..d866749e12d34 100644
--- a/test/reflection.jl
+++ b/test/reflection.jl
@@ -2,6 +2,8 @@
 
 using Test
 
+include("compiler/irutils.jl")
+
 # code_native / code_llvm (issue #8239)
 # It's hard to really test these, but just running them should be
 # sufficient to catch segfault bugs.
@@ -66,6 +68,7 @@ end # module ReflectionTest
 @test isbits((1,2))
 @test !isbits([1])
 @test isbits(nothing)
+@test fully_eliminated(isbits, (Int,))
 
 # issue #16670
 @test isconcretetype(Int)
@@ -81,7 +84,6 @@ end # module ReflectionTest
 @test isconcretetype(DataType)
 @test isconcretetype(Union)
 @test !isconcretetype(Union{})
-@test isconcretetype(Tuple{Union{}})
 @test !isconcretetype(Complex)
 @test !isconcretetype(Complex.body)
 @test !isconcretetype(AbstractArray{Int,1})
@@ -147,7 +149,7 @@ module TestModSub9475
     let
         @test Base.binding_module(@__MODULE__, :a9475) == @__MODULE__
         @test Base.binding_module(@__MODULE__, :c7648) == TestMod7648
-        @test Base.nameof(@__MODULE__) == :TestModSub9475
+        @test Base.nameof(@__MODULE__) === :TestModSub9475
         @test Base.fullname(@__MODULE__) == (curmod_name..., :TestMod7648, :TestModSub9475)
         @test Base.parentmodule(@__MODULE__) == TestMod7648
     end
@@ -158,7 +160,7 @@ using .TestModSub9475
 let
     @test Base.binding_module(@__MODULE__, :d7648) == @__MODULE__
     @test Base.binding_module(@__MODULE__, :a9475) == TestModSub9475
-    @test Base.nameof(@__MODULE__) == :TestMod7648
+    @test Base.nameof(@__MODULE__) === :TestMod7648
     @test Base.parentmodule(@__MODULE__) == curmod
 end
 end # module TestMod7648
@@ -183,14 +185,16 @@ let
     using .TestMod7648
     @test Base.binding_module(@__MODULE__, :a9475) == TestMod7648.TestModSub9475
     @test Base.binding_module(@__MODULE__, :c7648) == TestMod7648
-    @test nameof(foo7648) == :foo7648
+    @test nameof(foo7648) === :foo7648
     @test parentmodule(foo7648, (Any,)) == TestMod7648
     @test parentmodule(foo7648) == TestMod7648
     @test parentmodule(foo7648_nomethods) == TestMod7648
     @test parentmodule(foo9475, (Any,)) == TestMod7648.TestModSub9475
     @test parentmodule(foo9475) == TestMod7648.TestModSub9475
     @test parentmodule(Foo7648) == TestMod7648
-    @test nameof(Foo7648) == :Foo7648
+    @test parentmodule(first(methods(foo9475))) == TestMod7648.TestModSub9475
+    @test parentmodule(first(methods(foo7648))) == TestMod7648
+    @test nameof(Foo7648) === :Foo7648
     @test basename(functionloc(foo7648, (Any,))[1]) == "reflection.jl"
     @test first(methods(TestMod7648.TestModSub9475.foo7648)) == which(foo7648, (Int,))
     @test TestMod7648 == which(@__MODULE__, :foo7648)
@@ -199,7 +203,7 @@ end
 
 @test which(===, Tuple{Int, Int}) isa Method
 @test length(code_typed(===, Tuple{Int, Int})) === 1
-@test only(Base.return_types(===, Tuple{Int, Int})) === Any
+@test only(Base.return_types(===, Tuple{Int, Int})) === Bool
 
 module TestingExported
 using Test
@@ -207,15 +211,21 @@ include("testenv.jl") # for curmod_str
 import Base.isexported
 global this_is_not_defined
 export this_is_not_defined
+public this_is_public
 @test_throws ErrorException("\"this_is_not_defined\" is not defined in module Main") which(Main, :this_is_not_defined)
 @test_throws ErrorException("\"this_is_not_exported\" is not defined in module Main") which(Main, :this_is_not_exported)
 @test isexported(@__MODULE__, :this_is_not_defined)
 @test !isexported(@__MODULE__, :this_is_not_exported)
+@test !isexported(@__MODULE__, :this_is_public)
 const a_value = 1
 @test which(@__MODULE__, :a_value) === @__MODULE__
 @test_throws ErrorException("\"a_value\" is not defined in module Main") which(Main, :a_value)
 @test which(Main, :Core) === Main
 @test !isexported(@__MODULE__, :a_value)
+@test !Base.ispublic(@__MODULE__, :a_value)
+@test Base.ispublic(@__MODULE__, :this_is_not_defined)
+@test Base.ispublic(@__MODULE__, :this_is_public)
+@test !Base.ispublic(@__MODULE__, :this_is_not_exported)
 end
 
 # PR 13825
@@ -224,7 +234,7 @@ let ex = :(a + b)
 end
 foo13825(::Array{T, N}, ::Array, ::Vector) where {T, N} = nothing
 @test startswith(string(first(methods(foo13825))),
-                 "foo13825(::Array{T, N}, ::Array, ::Vector) where {T, N}\n")
+                 "foo13825(::Array{T, N}, ::Array, ::Vector) where {T, N}")
 
 mutable struct TLayout
     x::Int8
@@ -425,10 +435,10 @@ let li = typeof(fieldtype).name.mt.cache.func::Core.MethodInstance,
     mmime = repr("text/plain", li.def)
 
     @test lrepr == lmime == "MethodInstance for fieldtype(...)"
-    @test mrepr == mmime == "fieldtype(...)\n     @ Core none:0"
+    @test mrepr == "fieldtype(...) @ Core none:0"       # simple print
+    @test mmime == "fieldtype(...)\n     @ Core none:0" # verbose print
 end
 
-
 # Linfo Tracing test
 function tracefoo end
 # Method Tracing test
@@ -527,7 +537,7 @@ let
     ft = typeof(f18888)
 
     code_typed(f18888, Tuple{}; optimize=false)
-    @test !isempty(m.specializations) # uncached, but creates the specializations entry
+    @test m.specializations !== Core.svec() # uncached, but creates the specializations entry
     mi = Core.Compiler.specialize_method(m, Tuple{ft}, Core.svec())
     interp = Core.Compiler.NativeInterpreter(world)
     @test !Core.Compiler.haskey(Core.Compiler.code_cache(interp), mi)
@@ -543,7 +553,7 @@ let
 end
 
 # code_typed_by_type
-@test Base.code_typed_by_type(Tuple{Type{<:Val}})[1][2] == Val
+@test Base.code_typed_by_type(Tuple{Type{<:Val}})[2][2] == Val
 @test Base.code_typed_by_type(Tuple{typeof(sin), Float64})[1][2] === Float64
 
 # New reflection methods in 0.6
@@ -604,11 +614,16 @@ end
              sizeof(Real))
 @test sizeof(Union{ComplexF32,ComplexF64}) == 16
 @test sizeof(Union{Int8,UInt8}) == 1
-@test_throws ErrorException sizeof(AbstractArray)
+@test sizeof(MemoryRef{Int}) == 2 * sizeof(Int)
+@test sizeof(GenericMemoryRef{:atomic,Int,Core.CPU}) == 2 * sizeof(Int)
+@test sizeof(Array{Int,0}) == 2 * sizeof(Int)
+@test sizeof(Array{Int,1}) == 3 * sizeof(Int)
+@test sizeof(Array{Int,2}) == 4 * sizeof(Int)
+@test sizeof(Array{Int,20}) == 22 * sizeof(Int)
 @test_throws ErrorException sizeof(Tuple)
 @test_throws ErrorException sizeof(Tuple{Any,Any})
 @test_throws ErrorException sizeof(String)
-@test_throws ErrorException sizeof(Vector{Int})
+@test_throws ErrorException sizeof(Memory{false,Int})
 @test_throws ErrorException sizeof(Symbol)
 @test_throws ErrorException sizeof(Core.SimpleVector)
 @test_throws ErrorException sizeof(Union{})
@@ -643,7 +658,7 @@ let
     world = Core.Compiler.get_world_counter()
     match = Base._methods_by_ftype(T22979, -1, world)[1]
     instance = Core.Compiler.specialize_method(match)
-    cinfo_generated = Core.Compiler.get_staged(instance)
+    cinfo_generated = Core.Compiler.get_staged(instance, world)
     @test_throws ErrorException Base.uncompressed_ir(match.method)
 
     test_similar_codeinfo(code_lowered(f22979, typeof(x22979))[1], cinfo_generated)
@@ -721,10 +736,35 @@ Base.delete_method(m)
 @test faz4(1) == 1
 @test faz4(1.0) == 1
 
+# Deletion & invoke (issue #48802)
+function f48802!(log, x::Integer)
+    log[] = "default"
+    return x + 1
+end
+function addmethod_48802()
+    @eval function f48802!(log, x::Int)
+        ret = invoke(f48802!, Tuple{Any, Integer}, log, x)
+        log[] = "specialized"
+        return ret
+    end
+end
+log = Ref{String}()
+@test f48802!(log, 1) == 2
+@test log[] == "default"
+addmethod_48802()
+@test f48802!(log, 1) == 2
+@test log[] == "specialized"
+Base.delete_method(which(f48802!, Tuple{Any, Int}))
+@test f48802!(log, 1) == 2
+@test log[] == "default"
+addmethod_48802()
+@test f48802!(log, 1) == 2
+@test log[] == "specialized"
+
 # Methods with keyword arguments
 fookw(x; direction=:up) = direction
 fookw(y::Int) = 2
-@test fookw("string") == :up
+@test fookw("string") === :up
 @test fookw(1) == 2
 m = collect(methods(fookw))[2]
 Base.delete_method(m)
@@ -880,16 +920,15 @@ _test_at_locals2(1,1,0.5f0)
     f31687_parent() = f31687_child(0)
     params = Base.CodegenParams()
     _dump_function(f31687_parent, Tuple{},
-                   #=native=#false, #=wrapper=#false, #=strip=#false,
+                   #=native=#false, #=wrapper=#false, #=raw=#true,
                    #=dump_module=#true, #=syntax=#:att, #=optimize=#false, :none,
-                   #=binary=#false,
-                   params)
+                   #=binary=#false)
 end
 
 @test nameof(Any) === :Any
 @test nameof(:) === :Colon
 @test nameof(Core.Intrinsics.mul_int) === :mul_int
-@test nameof(Core.Intrinsics.arraylen) === :arraylen
+@test nameof(Core.Intrinsics.cglobal) === :cglobal
 
 module TestMod33403
 f(x) = 1
@@ -965,9 +1004,17 @@ end
     @test Base.default_tt(m.f4) == Tuple
 end
 
+@testset "lookup mi" begin
+    @test 1+1 == 2
+    mi1 = @ccall jl_method_lookup_by_tt(Tuple{typeof(+), Int, Int}::Any, Base.get_world_counter()::Csize_t, nothing::Any)::Ref{Core.MethodInstance}
+    @test mi1.def.name == :+
+    mi2 = @ccall jl_method_lookup(Any[+, 1, 1]::Ptr{Any}, 3::Csize_t, Base.get_world_counter()::Csize_t)::Ref{Core.MethodInstance}
+    @test mi1 == mi2
+end
+
 Base.@assume_effects :terminates_locally function issue41694(x::Int)
     res = 1
-    1 < x < 20 || throw("bad")
+    0 ≤ x < 20 || error("bad fact")
     while x > 1
         res *= x
         x -= 1
@@ -977,8 +1024,27 @@ end
 maybe_effectful(x::Int) = 42
 maybe_effectful(x::Any) = unknown_operation()
 function f_no_methods end
+ambig_effects_test(a::Int, b) = 1
+ambig_effects_test(a, b::Int) = 1
+ambig_effects_test(a, b) = 1
+
+@testset "Base.infer_return_type[s]" begin
+    # generic function case
+    @test only(Base.return_types(issue41694, (Int,))) == Base.infer_return_type(issue41694, (Int,)) == Int
+    # case when it's not fully covered
+    @test only(Base.return_types(issue41694, (Integer,))) == Base.infer_return_type(issue41694, (Integer,)) == Int
+    # MethodError case
+    @test isempty(Base.return_types(issue41694, (Float64,)))
+    @test Base.infer_return_type(issue41694, (Float64,)) == Union{}
+    # builtin case
+    @test only(Base.return_types(typeof, (Any,))) == Base.infer_return_type(typeof, (Any,)) == DataType
+    @test only(Base.return_types(===, (Any,Any))) == Base.infer_return_type(===, (Any,Any)) == Bool
+    @test only(Base.return_types(setfield!, ())) == Base.infer_return_type(setfield!, ()) == Union{}
+    @test only(Base.return_types(Core.Intrinsics.mul_int, ())) == Base.infer_return_type(Core.Intrinsics.mul_int, ()) == Union{}
+end
 
-@testset "infer_effects" begin
+@testset "Base.infer_effects" begin
+    # generic functions
     @test Base.infer_effects(issue41694, (Int,)) |> Core.Compiler.is_terminates
     @test Base.infer_effects((Int,)) do x
         issue41694(x)
@@ -991,8 +1057,126 @@ function f_no_methods end
         @test !Core.Compiler.is_terminates(effects)
         @test !Core.Compiler.is_nonoverlayed(effects)
     end
-    @test Base.infer_effects(f_no_methods) |> !Core.Compiler.is_nothrow
+    # should account for MethodError
+    @test Base.infer_effects(issue41694, (Float64,)) |> !Core.Compiler.is_nothrow # definitive dispatch error
+    @test Base.infer_effects(issue41694, (Integer,)) |> !Core.Compiler.is_nothrow # possible dispatch error
+    @test Base.infer_effects(f_no_methods) |> !Core.Compiler.is_nothrow # no possible matching methods
+    @test Base.infer_effects(ambig_effects_test, (Int,Int)) |> !Core.Compiler.is_nothrow # ambiguity error
+    @test Base.infer_effects(ambig_effects_test, (Int,Any)) |> !Core.Compiler.is_nothrow # ambiguity error
     # builtins
-    @test Base.infer_effects(typeof, (Any,)) |> Core.Compiler.is_total
-    @test Base.infer_effects(===, (Any,Any)) |> Core.Compiler.is_total
+    @test Base.infer_effects(typeof, (Any,)) |> Core.Compiler.is_foldable_nothrow
+    @test Base.infer_effects(===, (Any,Any)) |> Core.Compiler.is_foldable_nothrow
+    @test (Base.infer_effects(setfield!, ()); true) # `builtin_effects` shouldn't throw on empty `argtypes`
+    @test (Base.infer_effects(Core.Intrinsics.mul_int, ()); true) # `intrinsic_effects` shouldn't throw on empty `argtypes`
+end
+
+@testset "Base.infer_exception_type[s]" begin
+    # generic functions
+    @test Base.infer_exception_type(issue41694, (Int,)) == only(Base.infer_exception_types(issue41694, (Int,))) == ErrorException
+    @test Base.infer_exception_type((Int,)) do x
+        issue41694(x)
+    end == Base.infer_exception_types((Int,)) do x
+        issue41694(x)
+    end |> only == ErrorException
+    @test Base.infer_exception_type(issue41694) == only(Base.infer_exception_types(issue41694)) == ErrorException # use `default_tt`
+    let excts = Base.infer_exception_types(maybe_effectful, (Any,))
+        @test any(==(Any), excts)
+        @test any(==(Union{}), excts)
+    end
+    @test Base.infer_exception_type(maybe_effectful, (Any,)) == Any
+    # `infer_exception_type` should account for MethodError
+    @test Base.infer_exception_type(issue41694, (Float64,)) == MethodError # definitive dispatch error
+    @test Base.infer_exception_type(issue41694, (Integer,)) == Union{MethodError,ErrorException} # possible dispatch error
+    @test Base.infer_exception_type(f_no_methods) == MethodError # no possible matching methods
+    @test Base.infer_exception_type(ambig_effects_test, (Int,Int)) == MethodError # ambiguity error
+    @test Base.infer_exception_type(ambig_effects_test, (Int,Any)) == MethodError # ambiguity error
+    # builtins
+    @test Base.infer_exception_type(typeof, (Any,)) === only(Base.infer_exception_types(typeof, (Any,))) === Union{}
+    @test Base.infer_exception_type(===, (Any,Any)) === only(Base.infer_exception_types(===, (Any,Any))) === Union{}
+    @test (Base.infer_exception_type(setfield!, ()); Base.infer_exception_types(setfield!, ()); true) # `infer_exception_type[s]` shouldn't throw on empty `argtypes`
+    @test (Base.infer_exception_type(Core.Intrinsics.mul_int, ()); Base.infer_exception_types(Core.Intrinsics.mul_int, ()); true) # `infer_exception_type[s]` shouldn't throw on empty `argtypes`
+end
+
+@test Base._methods_by_ftype(Tuple{}, -1, Base.get_world_counter()) == Any[]
+@test length(methods(Base.Broadcast.broadcasted, Tuple{Any, Any, Vararg})) >
+      length(methods(Base.Broadcast.broadcasted, Tuple{Base.Broadcast.BroadcastStyle, Any, Vararg})) >=
+      length(methods(Base.Broadcast.broadcasted, Tuple{Base.Broadcast.DefaultArrayStyle{1}, Any, Vararg})) >=
+      10
+
+@testset "specializations" begin
+    f(x) = 1
+    f(1)
+    f("hello")
+    @test length(Base.specializations(only(methods(f)))) == 2
+end
+
+# https://github.com/JuliaLang/julia/issues/48856
+@test !Base.ismutationfree(Vector{Any})
+@test !Base.ismutationfree(Vector{Symbol})
+@test !Base.ismutationfree(Vector{UInt8})
+@test !Base.ismutationfree(Vector{Int32})
+@test !Base.ismutationfree(Vector{UInt64})
+
+@test Base.ismutationfree(Type{Union{}})
+
+module TestNames
+
+public publicized
+export exported
+
+publicized() = 1
+exported() = 1
+private() = 1
+
+end
+
+@test names(TestNames) == [:TestNames, :exported, :publicized]
+
+# reflections for generated function with abstract input types
+
+# :generated_only function should return failed results if given abstract input types
+@generated function generated_only_simple(x)
+    if x <: Integer
+        return :(x ^ 2)
+    else
+        return :(x)
+    end
+end
+@test only(Base.return_types(generated_only_simple, (Real,))) ==
+      Base.infer_return_type(generated_only_simple, (Real,)) ==
+      Core.Compiler.return_type(generated_only_simple, Tuple{Real}) == Any
+let (src, rt) = only(code_typed(generated_only_simple, (Real,)))
+    @test src isa Method
+    @test rt == Any
+end
+
+# optionally generated function should return fallback results if given abstract input types
+function sub2ind_gen_impl(dims::Type{NTuple{N,Int}}, I...) where N
+    ex = :(I[$N] - 1)
+    for i = (N - 1):-1:1
+        ex = :(I[$i] - 1 + dims[$i] * $ex)
+    end
+    return :($ex + 1)
+end;
+function sub2ind_gen_fallback(dims::NTuple{N,Int}, I) where N
+    ind = I[N] - 1
+    for i = (N - 1):-1:1
+        ind = I[i] - 1 + dims[i]*ind
+    end
+    return ind + 1
+end;
+function sub2ind_gen(dims::NTuple{N,Int}, I::Integer...) where N
+    length(I) == N || error("partial indexing is unsupported")
+    if @generated
+        return sub2ind_gen_impl(dims, I...)
+    else
+        return sub2ind_gen_fallback(dims, I)
+    end
+end;
+@test only(Base.return_types(sub2ind_gen, (NTuple,Int,Int,))) == Int
+let (src, rt) = only(code_typed(sub2ind_gen, (NTuple,Int,Int,); optimize=false))
+    @test src isa CodeInfo
+    @test rt == Int
+    @test any(iscall((src,sub2ind_gen_fallback)), src.code)
+    @test any(iscall((src,error)), src.code)
 end
diff --git a/test/regex.jl b/test/regex.jl
index 1cc377d9cfdbf..e5f1428527512 100644
--- a/test/regex.jl
+++ b/test/regex.jl
@@ -59,6 +59,11 @@
     @test repr(r"\\\"") == raw"r\"\\\\\\\"\""
     @test repr(s"\\\"\\") == raw"s\"\\\\\\\"\\\\\""
 
+    @test repr(r""a) == "r\"\"a"
+    @test repr(r""imsxa) == "r\"\"imsxa"
+    @test repr(Regex("", Base.DEFAULT_COMPILER_OPTS, UInt32(0))) == """Regex("", $(repr(Base.DEFAULT_COMPILER_OPTS)), $(repr(UInt32(0))))"""
+    @test repr(Regex("", UInt32(0), Base.DEFAULT_MATCH_OPTS)) == """Regex("", $(repr(UInt32(0))), $(repr(Base.DEFAULT_MATCH_OPTS)))"""
+
     # findall
     @test findall(r"\w+", "foo bar") == [1:3, 5:7]
     @test findall(r"\w+", "foo bar", overlap=true) == [1:3, 2:3, 3:3, 5:7, 6:7, 7:7]
@@ -74,6 +79,10 @@
     @test findall([0x01, 0x01], [0x01, 0x01, 0x01, 0x01]) == [1:2, 3:4]
     @test findall([0x01, 0x01], [0x01, 0x01, 0x01, 0x01]; overlap=true) == [1:2, 2:3, 3:4]
 
+    # findnext
+    @test findnext(r"z", "zabcz", 2) == 5:5
+    @test_throws BoundsError findnext(r"z", "zabcz", 7)
+
     # count
     @test count(r"\w+", "foo bar") == 2
     @test count(r"\w+", "foo bar", overlap=true) == 6
@@ -118,14 +127,24 @@
 
     # Backcapture reference in substitution string
     @test replace("abcde", r"(..)(?P<byname>d)" => s"\g<byname>xy\\\1") == "adxy\\bce"
-    @test_throws ErrorException replace("a", r"(?P<x>)" => s"\g<y>")
+    @test_throws(ErrorException("Bad replacement string: Group y not found in regex r\"(?P<x>)\""),
+        replace("a", r"(?P<x>)" => s"\g<y>"))
+    # test replace with invalid substitution group pattern
+    @test_throws(ErrorException("Bad replacement string: \\gg1>"),
+        replace("s", r"(?<g1>.)" => s"\gg1>"))
+    # test replace with 2-digit substitution group
+    @test replace(("0" ^ 9) * "1", Regex(("(0)" ^ 9) * "(1)") => s"10th group: \10") == "10th group: 1"
 
     # Proper unicode handling
     @test  match(r"∀∀", "∀x∀∀∀").match == "∀∀"
 
-    # 'a' flag to disable UCP
+    # 'a' flag to disable UCP and UTF
     @test match(r"\w+", "Düsseldorf").match == "Düsseldorf"
     @test match(r"\w+"a, "Düsseldorf").match == "D"
+    @test match(r".+"a, "Düsseldorf").match == "Düsseldorf"
+    @test match(r".+"a, "Dü\xefsseldorf").match == "Dü\xefsseldorf"
+    @test_throws(ErrorException("PCRE.exec error: $(Base.PCRE.err_message(Base.PCRE.ERROR_UTF8_ERR6))"),
+        match(r"(*UTF).+"a, "Dü\xefsseldorf"))
 
     # Regex behaves like a scalar in broadcasting
     @test occursin.(r"Hello", ["Hello", "World"]) == [true, false]
@@ -141,6 +160,8 @@
     @test startswith("abc", r"A"i)
     @test !endswith("abc", r"C")
     @test endswith("abc", r"C"i)
+    # test with substring
+    @test endswith((@views "abc"[2:3]), r"C"i)
 
     @testset "multiplication & exponentiation" begin
         @test *(r"a") == r"a"
@@ -201,9 +222,26 @@
     end
 
     # Test that PCRE throws the correct kind of error
-    # TODO: Uncomment this once the corresponding change has propagated to CI
-    #@test_throws ErrorException Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32)
+    @test_throws ErrorException("PCRE error: NULL regex object") Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32)
 
     # test that we can get the error message of negative error codes
     @test Base.PCRE.err_message(Base.PCRE.ERROR_NOMEMORY) isa String
+
+    # test failure cases for invalid integer flags
+    @test_throws ArgumentError Regex("test", typemax(Int32), 0)
+    @test_throws ArgumentError Regex("test", 0, typemax(Int32))
+
+    # hash
+    @test hash(r"123"i, zero(UInt)) == hash(Regex("123", "i"), zero(UInt))
+end
+
+@testset "#47936" begin
+    tests = (r"a+[bc]+c",
+             r"a+[bc]{1,2}c",
+             r"(a)+[bc]+c",
+             r"a{1,2}[bc]+c",
+             r"(a+)[bc]+c")
+    for re in tests
+        @test match(re, "ababc").match === SubString("ababc", 3:5)
+    end
 end
diff --git a/test/reinterpretarray.jl b/test/reinterpretarray.jl
index 40d09652ffa12..05a40895b7934 100644
--- a/test/reinterpretarray.jl
+++ b/test/reinterpretarray.jl
@@ -6,20 +6,28 @@ using .Main.OffsetArrays
 isdefined(Main, :TSlow) || @eval Main include("testhelpers/arrayindexingtypes.jl")
 using .Main: TSlow, WrapperArray
 
+tslow(a::AbstractArray) = TSlow(a)
+wrapper(a::AbstractArray) = WrapperArray(a)
+fcviews(a::AbstractArray) = view(a, ntuple(Returns(:),ndims(a)-1)..., axes(a)[end])
+fcviews(a::AbstractArray{<:Any, 0}) = view(a)
+tslow(t::Tuple) = map(tslow, t)
+wrapper(t::Tuple) = map(wrapper, t)
+fcviews(t::Tuple) = map(fcviews, t)
+
+test_many_wrappers(testf, A, wrappers) = foreach(w -> testf(w(A)), wrappers)
+test_many_wrappers(testf, A) = test_many_wrappers(testf, A, (identity, tslow, wrapper, fcviews))
+
 A = Int64[1, 2, 3, 4]
-As = TSlow(A)
 Ars = Int64[1 3; 2 4]
-Arss = TSlow(Ars)
 B = Complex{Int64}[5+6im, 7+8im, 9+10im]
-Bs = TSlow(B)
 Av = [Int32[1,2], Int32[3,4]]
 
-for Ar in (Ars, Arss)
+test_many_wrappers(Ars, (identity, tslow)) do Ar
     @test @inferred(ndims(reinterpret(reshape, Complex{Int64}, Ar))) == 1
     @test @inferred(axes(reinterpret(reshape, Complex{Int64}, Ar))) === (Base.OneTo(2),)
     @test @inferred(size(reinterpret(reshape, Complex{Int64}, Ar))) == (2,)
 end
-for _B in (B, Bs)
+test_many_wrappers(B, (identity, tslow)) do _B
     @test @inferred(ndims(reinterpret(reshape, Int64, _B))) == 2
     @test @inferred(axes(reinterpret(reshape, Int64, _B))) === (Base.OneTo(2), Base.OneTo(3))
     @test @inferred(size(reinterpret(reshape, Int64, _B))) == (2, 3)
@@ -42,24 +50,25 @@ end
 @test_throws ArgumentError("cannot reinterpret a zero-dimensional `UInt8` array to `UInt16` which is of a larger size") reinterpret(reshape, UInt16, reshape([0x01]))
 
 # getindex
-for _A in (A, As)
+test_many_wrappers(A) do _A
     @test reinterpret(Complex{Int64}, _A) == [1 + 2im, 3 + 4im]
     @test reinterpret(Float64, _A) == reinterpret.(Float64, A)
     @test reinterpret(reshape, Float64, _A) == reinterpret.(Float64, A)
 end
-for Ar in (Ars, Arss)
+test_many_wrappers(Ars) do Ar
     @test reinterpret(reshape, Complex{Int64}, Ar) == [1 + 2im, 3 + 4im]
     @test reinterpret(reshape, Float64, Ar) == reinterpret.(Float64, Ars)
 end
 
-for _B in (B, Bs)
+test_many_wrappers(B) do _B
     @test reinterpret(NTuple{3, Int64}, _B) == [(5,6,7),(8,9,10)]
     @test reinterpret(reshape, Int64, _B) == [5 7 9; 6 8 10]
 end
 
 # setindex
-for (_A, Ar, _B) in ((A, Ars, B), (As, Arss, Bs))
-    let Ac = copy(_A), Arsc = copy(Ar), Bc = copy(_B)
+test_many_wrappers((A, Ars, B)) do (A, Ars, B)
+    _A, Ar, _B = deepcopy(A), deepcopy(Ars), deepcopy(B)
+    let Ac = deepcopy(_A), Arsc = deepcopy(Ar), Bc = deepcopy(_B)
         reinterpret(Complex{Int64}, Ac)[2] = -1 - 2im
         @test Ac == [1, 2, -1, -2]
         reinterpret(Complex{Int64}, Arsc)[2] = -1 - 2im
@@ -94,50 +103,67 @@ for (_A, Ar, _B) in ((A, Ars, B), (As, Arss, Bs))
     end
 end
 A3 = collect(reshape(1:18, 2, 3, 3))
-A3r = reinterpret(reshape, Complex{Int}, A3)
-@test A3r[4] === A3r[1,2] === A3r[CartesianIndex(1, 2)] === 7+8im
-A3r[2,3] = -8-15im
-@test A3[1,2,3] == -8
-@test A3[2,2,3] == -15
-A3r[4] = 100+200im
-@test A3[1,1,2] == 100
-@test A3[2,1,2] == 200
-A3r[CartesianIndex(1,2)] = 300+400im
-@test A3[1,1,2] == 300
-@test A3[2,1,2] == 400
+test_many_wrappers(A3) do A3_
+    A3 = deepcopy(A3_)
+    A3r = reinterpret(reshape, Complex{Int}, A3)
+    @test A3r[4] === A3r[1,2] === A3r[CartesianIndex(1, 2)] === 7+8im
+    A3r[2,3] = -8-15im
+    @test A3[1,2,3] == -8
+    @test A3[2,2,3] == -15
+    A3r[4] = 100+200im
+    @test A3[1,1,2] == 100
+    @test A3[2,1,2] == 200
+    A3r[CartesianIndex(1,2)] = 300+400im
+    @test A3[1,1,2] == 300
+    @test A3[2,1,2] == 400
+end
 
 # same-size reinterpret where one of the types is non-primitive
-let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)], ra = reinterpret(Float32, a)
-    @test ra[1] == reinterpret(Float32, 0x04030201)
-    @test setindex!(ra, 2.0) === ra
-    @test reinterpret(Float32, a)[1] == 2.0
+let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)]
+    test_many_wrappers(a, (identity, wrapper, fcviews)) do a_
+        a = deepcopy(a_)
+        ra = reinterpret(Float32, a)
+        @test ra[1] == reinterpret(Float32, 0x04030201)
+        @test setindex!(ra, 2.0) === ra
+        @test reinterpret(Float32, a)[1] == 2.0
+    end
 end
-let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)], ra = reinterpret(reshape, Float32, a)
-    @test ra[1] == reinterpret(Float32, 0x04030201)
-    @test setindex!(ra, 2.0) === ra
-    @test reinterpret(reshape, Float32, a)[1] == 2.0
+let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)]
+    test_many_wrappers(a, (identity, wrapper, fcviews)) do a_
+        a = deepcopy(a_)
+        ra = reinterpret(reshape, Float32, a)
+        @test ra[1] == reinterpret(Float32, 0x04030201)
+        @test setindex!(ra, 2.0) === ra
+        @test reinterpret(reshape, Float32, a)[1] == 2.0
+    end
 end
 
 # Pass-through indexing
 B = Complex{Int64}[5+6im, 7+8im, 9+10im]
-Br = reinterpret(reshape, Int64, B)
-W = WrapperArray(Br)
-for (b, w) in zip(5:10, W)
-    @test b == w
-end
-for (i, j) in zip(eachindex(W), 11:16)
-    W[i] = j
+test_many_wrappers(B) do B_
+    B = deepcopy(B_)
+    Br = reinterpret(reshape, Int64, B)
+    W = WrapperArray(Br)
+    for (b, w) in zip(5:10, W)
+        @test b == w
+    end
+    for (i, j) in zip(eachindex(W), 11:16)
+        W[i] = j
+    end
+    @test B[1] === Complex{Int64}(11+12im)
+    @test B[2] === Complex{Int64}(13+14im)
+    @test B[3] === Complex{Int64}(15+16im)
 end
-@test B[1] === Complex{Int64}(11+12im)
-@test B[2] === Complex{Int64}(13+14im)
-@test B[3] === Complex{Int64}(15+16im)
 z3 = (0x00, 0x00, 0x00)
 Az = [z3 z3; z3 z3]
-Azr = reinterpret(reshape, UInt8, Az)
-W = WrapperArray(Azr)
-copyto!(W, fill(0x01, 3, 2, 2))
-@test all(isequal((0x01, 0x01, 0x01)), Az)
-@test eachindex(W, W) == eachindex(W)
+test_many_wrappers(Az, (identity, wrapper)) do Az_
+    Az = deepcopy(Az_)
+    Azr = reinterpret(reshape, UInt8, Az)
+    W = WrapperArray(Azr)
+    copyto!(W, fill(0x01, 3, 2, 2))
+    @test all(isequal((0x01, 0x01, 0x01)), Az)
+    @test eachindex(W, W) == eachindex(W)
+end
 
 # ensure that reinterpret arrays aren't erroneously classified as strided
 let A = reshape(1:20, 5, 4)
@@ -169,7 +195,7 @@ function check_strides(A::AbstractArray)
 end
 
 @testset "strides for NonReshapedReinterpretArray" begin
-    A = Array{Int32}(reshape(1:88, 11, 8))
+    A = WrapperArray(Array{Int32}(reshape(1:88, 11, 8)))
     for viewax2 in (1:8, 1:2:6, 7:-1:1, 5:-2:1, 2:3:8, 7:-6:1, 3:5:11)
         # dim1 is contiguous
         for T in (Int16, Float32)
@@ -180,7 +206,7 @@ end
         else
             @test_throws "Parent's strides" strides(reinterpret(Int64, view(A, 1:8, viewax2)))
         end
-        # non-integer-multipled classified
+        # non-integer-multiplied classified
         if mod(step(viewax2), 3) == 0
             @test check_strides(reinterpret(NTuple{3,Int16}, view(A, 2:7, viewax2)))
         else
@@ -197,10 +223,13 @@ end
         end
         @test check_strides(reinterpret(Float32, view(A, 8:-1:1, viewax2)))
     end
+    # issue 46113
+    A = reinterpret(Int8, reinterpret(reshape, Int16, rand(Int8, 2, 3, 3)))
+    @test check_strides(A)
 end
 
 @testset "strides for ReshapedReinterpretArray" begin
-    A = Array{Int32}(reshape(1:192, 3, 8, 8))
+    A = WrapperArray(Array{Int32}(reshape(1:192, 3, 8, 8)))
     for viewax1 in (1:8, 1:2:8, 8:-1:1, 8:-2:1), viewax2 in (1:2, 4:-1:1)
         for T in (Int16, Float32)
             @test check_strides(reinterpret(reshape, T, view(A, 1:2, viewax1, viewax2)))
@@ -237,7 +266,8 @@ end
 end
 
 # IndexStyle
-let a = fill(1.0, 5, 3)
+test_many_wrappers(fill(1.0, 5, 3), (identity, wrapper)) do a_
+    a = deepcopy(a_)
     r = reinterpret(Int64, a)
     @test @inferred(IndexStyle(r)) == IndexLinear()
     fill!(r, 2)
@@ -290,14 +320,13 @@ let a = fill(1.0, 5, 3)
         @test setindex!(r, -5, goodinds...) === r
         @test r[goodinds...] == -5
     end
-
-    ar = [(1,2), (3,4)]
+end
+let ar = [(1,2), (3,4)]
     arr = reinterpret(reshape, Int, ar)
     @test @inferred(IndexStyle(arr)) == Base.IndexSCartesian2{2}()
     @test @inferred(eachindex(arr)) == Base.SCartesianIndices2{2}(Base.OneTo(2))
     @test @inferred(eachindex(arr, arr)) == Base.SCartesianIndices2{2}(Base.OneTo(2))
 end
-
 # Error on reinterprets that would expose padding
 struct S1
     a::Int8
@@ -311,11 +340,14 @@ end
 
 A1 = S1[S1(0, 0)]
 A2 = S2[S2(0, 0)]
-@test reinterpret(S1, A2)[1] == S1(0, 0)
-@test_throws Base.PaddingError (reinterpret(S1, A2)[1] = S2(1, 2))
-@test_throws Base.PaddingError reinterpret(S2, A1)[1]
-reinterpret(S2, A1)[1] = S2(1, 2)
-@test A1[1] == S1(1, 2)
+test_many_wrappers((A1, A2), (identity, wrapper)) do (A1_, A2_)
+    A1, A2 = deepcopy(A1_), deepcopy(A2_)
+    @test reinterpret(S1, A2)[1] == S1(0, 0)
+    @test_throws Base.PaddingError (reinterpret(S1, A2)[1] = S2(1, 2))
+    @test_throws Base.PaddingError reinterpret(S2, A1)[1]
+    reinterpret(S2, A1)[1] = S2(1, 2)
+    @test A1[1] == S1(1, 2)
+end
 
 # Unconventional axes
 let a = [0.1 0.2; 0.3 0.4], at = reshape([(i,i+1) for i = 1:2:8], 2, 2)
@@ -368,50 +400,59 @@ end
 
 # Test 0-dimensional Arrays
 A = zeros(UInt32)
-B = reinterpret(Int32, A)
-Brs = reinterpret(reshape,Int32, A)
-C = reinterpret(Tuple{UInt32}, A) # non-primitive type
-Crs = reinterpret(reshape, Tuple{UInt32}, A)  # non-primitive type
-@test size(B) == size(Brs) == size(C) == size(Crs) == ()
-@test axes(B) == axes(Brs) == axes(C) == axes(Crs) == ()
-@test setindex!(B, Int32(5)) === B
-@test B[] === Int32(5)
-@test Brs[] === Int32(5)
-@test C[] === (UInt32(5),)
-@test Crs[] === (UInt32(5),)
-@test A[] === UInt32(5)
-@test setindex!(Brs, Int32(12)) === Brs
-@test A[] === UInt32(12)
-@test setindex!(C, (UInt32(7),)) === C
-@test A[] === UInt32(7)
-@test setindex!(Crs, (UInt32(3),)) === Crs
-@test A[] === UInt32(3)
-
-
-a = [(1.0,2.0)]
-af = @inferred(reinterpret(reshape, Float64, a))
-anew = @inferred(reinterpret(reshape, Tuple{Float64,Float64}, vec(af)))
-@test anew[1] == a[1]
-@test ndims(anew) == 0
+test_many_wrappers(A, (identity, wrapper)) do A_
+    A = deepcopy(A_)
+    B = reinterpret(Int32, A)
+    Brs = reinterpret(reshape,Int32, A)
+    C = reinterpret(Tuple{UInt32}, A) # non-primitive type
+    Crs = reinterpret(reshape, Tuple{UInt32}, A)  # non-primitive type
+    @test size(B) == size(Brs) == size(C) == size(Crs) == ()
+    @test axes(B) == axes(Brs) == axes(C) == axes(Crs) == ()
+    @test setindex!(B, Int32(5)) === B
+    @test B[] === Int32(5)
+    @test Brs[] === Int32(5)
+    @test C[] === (UInt32(5),)
+    @test Crs[] === (UInt32(5),)
+    @test A[] === UInt32(5)
+    @test setindex!(Brs, Int32(12)) === Brs
+    @test A[] === UInt32(12)
+    @test setindex!(C, (UInt32(7),)) === C
+    @test A[] === UInt32(7)
+    @test setindex!(Crs, (UInt32(3),)) === Crs
+    @test A[] === UInt32(3)
+end
+
+test_many_wrappers([(1.0,2.0)], (identity, wrapper)) do a
+    af = @inferred(reinterpret(reshape, Float64, a))
+    anew = @inferred(reinterpret(reshape, Tuple{Float64,Float64}, vec(af)))
+    @test anew[1] == a[1]
+    @test ndims(anew) == 0
+end
 
 # re-reinterpret
 a0 = reshape([0x22, 0x44, 0x88, 0xf0, 0x01, 0x02, 0x03, 0x04], 4, 2)
-a = reinterpret(reshape, NTuple{4,UInt8}, a0)
-@test a == [(0x22, 0x44, 0x88, 0xf0), (0x01, 0x02, 0x03, 0x04)]
-@test reinterpret(UInt8, a) == [0x22, 0x44, 0x88, 0xf0, 0x01, 0x02, 0x03, 0x04]
-@test reinterpret(reshape, UInt8, a) === a0
+test_many_wrappers(a0, (identity, wrapper)) do a0
+    a = reinterpret(reshape, NTuple{4,UInt8}, a0)
+    @test a == [(0x22, 0x44, 0x88, 0xf0), (0x01, 0x02, 0x03, 0x04)]
+    @test reinterpret(UInt8, a) == [0x22, 0x44, 0x88, 0xf0, 0x01, 0x02, 0x03, 0x04]
+    @test reinterpret(reshape, UInt8, a) === a0
+end
 
 # reductions
 a = [(1,2,3), (4,5,6)]
-ars = reinterpret(reshape, Int, a)
-@test sum(ars) == 21
-@test sum(ars; dims=1) == [6 15]
-@test sum(ars; dims=2) == reshape([5,7,9], (3, 1))
-@test sum(ars; dims=(1,2)) == reshape([21], (1, 1))
+test_many_wrappers(a, (identity, wrapper)) do a
+    ars = reinterpret(reshape, Int, a)
+    @test sum(ars) == 21
+    @test sum(ars; dims=1) == [6 15]
+    @test sum(ars; dims=2) == reshape([5,7,9], (3, 1))
+    @test sum(ars; dims=(1,2)) == reshape([21], (1, 1))
+end
 # also test large sizes for the pairwise algorithm
 a = [(k,k+1,k+2) for k = 1:3:4000]
-ars = reinterpret(reshape, Int, a)
-@test sum(ars) == 8010003
+test_many_wrappers(a, (identity, wrapper)) do a
+    ars = reinterpret(reshape, Int, a)
+    @test sum(ars) == 8010003
+end
 
 @testset "similar(::ReinterpretArray)" begin
     a = reinterpret(NTuple{2,Float64}, TSlow(rand(Float64, 4, 4)))
@@ -447,10 +488,10 @@ end
         SomeSingleton(x) = new()
     end
 
-    @test_throws ErrorException reinterpret(Int, nothing)
-    @test_throws ErrorException reinterpret(Missing, 3)
-    @test_throws ErrorException reinterpret(Missing, NotASingleton())
-    @test_throws ErrorException reinterpret(NotASingleton, ())
+    @test_throws ArgumentError reinterpret(Int, nothing)
+    @test_throws ArgumentError reinterpret(Missing, 3)
+    @test_throws ArgumentError reinterpret(Missing, NotASingleton())
+    @test_throws ArgumentError reinterpret(NotASingleton, ())
 
     @test_throws ArgumentError reinterpret(NotASingleton, fill(nothing, ()))
     @test_throws ArgumentError reinterpret(reshape, NotASingleton, fill(missing, 3))
@@ -465,7 +506,7 @@ end
     @test_throws ArgumentError reinterpret(Nothing, 1:6)
     @test_throws ArgumentError reinterpret(reshape, Missing, [0.0])
 
-    # reintepret of empty array
+    # reinterpret of empty array
     @test reinterpret(reshape, Nothing, fill(missing, (1,0,3))) == fill(nothing, (1,0,3))
     @test reinterpret(reshape, Missing, fill((), (0,))) == fill(missing, (0,))
     @test_throws ArgumentError reinterpret(reshape, Nothing, fill(3.2, (0,0)))
@@ -510,3 +551,40 @@ end
     @test setindex!(x, SomeSingleton(:), 3, 5) == x2
     @test_throws MethodError x[2,4] = nothing
 end
+
+@testset "pointer for StridedArray" begin
+    a = rand(Float64, 251)
+    v = view(a, UInt(2):UInt(251));
+    A = reshape(v, 25, 10);
+    @test A isa StridedArray && pointer(A) === pointer(a, 2)
+    Av = view(A, 1:20, 1:2)
+    @test Av isa StridedArray && pointer(Av) === pointer(a, 2)
+    @test Av * Av' isa Array
+end
+
+@testset "effect of StridedReinterpretArray's getindex" begin
+    eff = Base.infer_effects(getindex, Base.typesof(reinterpret(Int8, Int[1]), 1))
+    @test Core.Compiler.is_effect_free(eff)
+end
+
+# reinterpret of arbitrary bitstypes
+@testset "Reinterpret arbitrary bitstypes" begin
+    struct Bytes15
+        a::Int8
+        b::Int16
+        c::Int32
+        d::Int64
+    end
+
+    @test reinterpret(Float64, ComplexF32(1, 1)) === 0.007812501848093234
+    @test reinterpret(ComplexF32, 0.007812501848093234) === ComplexF32(1, 1)
+    @test reinterpret(Tuple{Float64, Float64}, ComplexF64(1, 1)) === (1.0, 1.0)
+    @test reinterpret(ComplexF64, (1.0, 1.0)) === ComplexF64(1, 1)
+    @test reinterpret(Tuple{Int8, Int16, Int32, Int64}, (Int64(1), Int32(2), Int16(3), Int8(4))) === (Int8(1), Int16(0), Int32(0), 288233674686595584)
+    @test reinterpret(Tuple{Int8, Int16, Tuple{Int32, Int64}}, (Int64(1), Int32(2), Int16(3), Int8(4))) === (Int8(1), Int16(0), (Int32(0), 288233674686595584))
+    @test reinterpret(Tuple{Int64, Int32, Int16, Int8}, (Int8(1), Int16(0), (Int32(0), 288233674686595584))) === (Int64(1), Int32(2), Int16(3), Int8(4))
+    @test reinterpret(Tuple{Int8, Int16, Int32, Int64}, Bytes15(Int8(1), Int16(2), Int32(3), Int64(4))) === (Int8(1), Int16(2), Int32(3), Int64(4))
+    @test reinterpret(Bytes15, (Int8(1), Int16(2), Int32(3), Int64(4))) == Bytes15(Int8(1), Int16(2), Int32(3), Int64(4))
+
+    @test_throws ArgumentError reinterpret(Tuple{Int32, Int64}, (Int16(1), Int64(4)))
+end
diff --git a/test/relocatedepot.jl b/test/relocatedepot.jl
new file mode 100644
index 0000000000000..7aaeda1174e29
--- /dev/null
+++ b/test/relocatedepot.jl
@@ -0,0 +1,147 @@
+using Test
+using Logging
+
+
+include("testenv.jl")
+
+
+function test_harness(@nospecialize(fn))
+    load_path = copy(LOAD_PATH)
+    depot_path = copy(DEPOT_PATH)
+    try
+        fn()
+    finally
+        copy!(LOAD_PATH, load_path)
+        copy!(DEPOT_PATH, depot_path)
+    end
+end
+
+
+if !test_relocated_depot
+
+    @testset "insert @depot tag in path" begin
+
+        test_harness() do
+            mktempdir() do dir
+                pushfirst!(DEPOT_PATH, dir)
+                path = dir*dir
+                @test Base.replace_depot_path(path) == "@depot"*dir
+            end
+        end
+
+        test_harness() do
+            mktempdir() do dir
+                pushfirst!(DEPOT_PATH, dir)
+                path = joinpath(dir, "foo")
+                if isdirpath(DEPOT_PATH[1])
+                    DEPOT_PATH[1] = dirname(DEPOT_PATH[1]) # strip trailing pathsep
+                end
+                tag = joinpath("@depot", "") # append a pathsep
+                @test startswith(Base.replace_depot_path(path), tag)
+                DEPOT_PATH[1] = joinpath(DEPOT_PATH[1], "") # append a pathsep
+                @test startswith(Base.replace_depot_path(path), tag)
+                popfirst!(DEPOT_PATH)
+                @test !startswith(Base.replace_depot_path(path), tag)
+            end
+        end
+
+    end
+
+    @testset "restore path from @depot tag" begin
+
+        tmp = tempdir()
+
+        path = joinpath("@depot", "foo", "bar")
+        tmppath = joinpath(tmp, "foo", "bar")
+        @test Base.restore_depot_path(path, tmp) == tmppath
+
+        path = joinpath("no@depot", "foo", "bar")
+        @test Base.restore_depot_path(path, tmp) == path
+
+        path = joinpath("@depot", "foo", "bar\n", "@depot", "foo")
+        tmppath = joinpath(tmp, "foo", "bar\n", "@depot", "foo")
+        @test Base.restore_depot_path(path, tmp) == tmppath
+
+    end
+
+    @testset "precompile RelocationTestPkg1" begin
+        pkgname = "RelocationTestPkg1"
+        test_harness() do
+            push!(LOAD_PATH, @__DIR__)
+            push!(DEPOT_PATH, @__DIR__)
+            pkg = Base.identify_package(pkgname)
+            cachefiles = Base.find_all_in_cache_path(pkg)
+            rm.(cachefiles, force=true)
+            @test Base.isprecompiled(pkg) == false
+            Base.require(pkg) # precompile
+            @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+        end
+    end
+
+    @testset "precompile RelocationTestPkg2 (contains include_dependency)" begin
+        pkgname = "RelocationTestPkg2"
+        test_harness() do
+            push!(LOAD_PATH, @__DIR__)
+            push!(DEPOT_PATH, string(@__DIR__, "/"))
+            pkg = Base.identify_package(pkgname)
+            cachefiles = Base.find_all_in_cache_path(pkg)
+            rm.(cachefiles, force=true)
+            @test Base.isprecompiled(pkg) == false
+            touch(joinpath(@__DIR__, pkgname, "src", "foo.txt"))
+            Base.require(pkg) # precompile
+            @test Base.isprecompiled(pkg, ignore_loaded=true) == true
+        end
+    end
+
+else
+
+    # must come before any of the load tests, because the will recompile and generate new cache files
+    @testset "attempt loading precompiled pkgs when depot is missing" begin
+        test_harness() do
+            empty!(LOAD_PATH)
+            push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
+            for pkgname in ("RelocationTestPkg1", "RelocationTestPkg2")
+                pkg = Base.identify_package(pkgname)
+                cachefile = only(Base.find_all_in_cache_path(pkg))
+                @test_throws ArgumentError("""
+                  Failed to determine depot from srctext files in cache file $cachefile.
+                  - Make sure you have adjusted DEPOT_PATH in case you relocated depots.""") Base.isprecompiled(pkg)
+            end
+        end
+    end
+
+    @testset "load stdlib from test/relocatedepot" begin
+        test_harness() do
+            push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot"))
+            # stdlib should be already precompiled
+            pkg = Base.identify_package("DelimitedFiles")
+            @test Base.isprecompiled(pkg) == true
+        end
+    end
+
+    @testset "load RelocationTestPkg1 from test/relocatedepot" begin
+        pkgname = "RelocationTestPkg1"
+        test_harness() do
+            push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot"))
+            pkg = Base.identify_package(pkgname)
+            @test Base.isprecompiled(pkg) == true
+            Base.require(pkg) # re-precompile
+            @test Base.isprecompiled(pkg) == true
+        end
+    end
+
+    @testset "load RelocationTestPkg2 (contains include_dependency) from test/relocatedepot" begin
+        pkgname = "RelocationTestPkg2"
+        test_harness() do
+            push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
+            push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot"))
+            pkg = Base.identify_package(pkgname)
+            @test Base.isprecompiled(pkg) == false # moving depot changes mtime of include_dependency
+            Base.require(pkg) # re-precompile
+            @test Base.isprecompiled(pkg) == true
+        end
+    end
+
+end
diff --git a/test/rounding.jl b/test/rounding.jl
index 508a68032e083..045c834e63013 100644
--- a/test/rounding.jl
+++ b/test/rounding.jl
@@ -57,7 +57,7 @@ end
         @test pu - pd == eps(pz)
     end
 
-    for T in [Float32,Float64]
+    for T in [Float16,Float32,Float64]
         for v in [sqrt(big(2.0)),-big(1.0)/big(3.0),nextfloat(big(1.0)),
                   prevfloat(big(1.0)),nextfloat(big(0.0)),prevfloat(big(0.0)),
                   pi,ℯ,eulergamma,catalan,golden,
@@ -351,3 +351,110 @@ end
         Base.Rounding.setrounding_raw(T, Base.Rounding.to_fenv(old))
     end
 end
+
+@testset "rounding floats with specified return type #50778" begin
+    @test round(Float64, 1.2) === 1.0
+    @test round(Float32, 1e60) === Inf32
+    x = floatmax(Float32)-1.0
+    @test round(Float32, x) == x
+end
+
+@testset "rounding complex numbers (#42060, #47128)" begin
+    # 42060
+    @test ceil(Complex(4.6, 2.2)) === Complex(5.0, 3.0)
+    @test floor(Complex(4.6, 2.2)) === Complex(4.0, 2.0)
+    @test trunc(Complex(4.6, 2.2)) === Complex(4.0, 2.0)
+    @test round(Complex(4.6, 2.2)) === Complex(5.0, 2.0)
+    @test ceil(Complex(-4.6, -2.2)) === Complex(-4.0, -2.0)
+    @test floor(Complex(-4.6, -2.2)) === Complex(-5.0, -3.0)
+    @test trunc(Complex(-4.6, -2.2)) === Complex(-4.0, -2.0)
+    @test round(Complex(-4.6, -2.2)) === Complex(-5.0, -2.0)
+
+    # 47128
+    @test round(Complex{Int}, Complex(4.6, 2.2)) === Complex(5, 2)
+    @test ceil(Complex{Int}, Complex(4.6, 2.2)) === Complex(5, 3)
+end
+
+@testset "rounding to custom integers" begin
+    struct Int50812 <: Integer
+        x::Int
+    end
+    @test round(Int50812, 1.2) === Int50812(1)
+    @test round(Int50812, π) === Int50812(3)
+    @test ceil(Int50812, π) === Int50812(4)
+end
+
+const MPFRRM = Base.MPFR.MPFRRoundingMode
+
+function mpfr_to_ieee(::Type{Float32}, x::BigFloat, r::MPFRRM)
+    ccall((:mpfr_get_flt, Base.MPFR.libmpfr), Float32, (Ref{BigFloat}, MPFRRM), x, r)
+end
+function mpfr_to_ieee(::Type{Float64}, x::BigFloat, r::MPFRRM)
+    ccall((:mpfr_get_d, Base.MPFR.libmpfr), Float64, (Ref{BigFloat}, MPFRRM), x, r)
+end
+
+function mpfr_to_ieee(::Type{G}, x::BigFloat, r::RoundingMode) where {G}
+    mpfr_to_ieee(G, x, convert(MPFRRM, r))
+end
+
+const mpfr_rounding_modes = map(
+    Base.Fix1(convert, MPFRRM),
+    (RoundNearest, RoundToZero, RoundFromZero, RoundDown, RoundUp)
+)
+
+sample_float(::Type{T}, e::Integer) where {T<:AbstractFloat} = ldexp(rand(T) + true, e)::T
+
+function float_samples(::Type{T}, exponents, n::Int) where {T<:AbstractFloat}
+    ret = T[]
+    for e ∈ exponents, i ∈ 1:n
+        push!(ret, sample_float(T, e), -sample_float(T, e))
+    end
+    ret
+end
+
+# a reasonable range of values for testing behavior between 1:200
+const fib200 = [1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 200]
+
+@testset "IEEEFloat(::BigFloat) against MPFR" begin
+    for pr ∈ fib200
+        setprecision(BigFloat, pr) do
+            exp = exponent(floatmax(Float64)) + 10
+            bf_samples = float_samples(BigFloat, (-exp):exp, 20) # about 82680 random values
+            for mpfr_rm ∈ mpfr_rounding_modes, bf ∈ bf_samples, F ∈ (Float32, Float64)
+                @test (
+                    mpfr_to_ieee(F, bf, mpfr_rm) ===
+                    F(bf, mpfr_rm) === F(bf, convert(RoundingMode, mpfr_rm))
+                )
+            end
+        end
+    end
+end
+
+const native_rounding_modes = (
+    RoundNearest, RoundNearestTiesAway, RoundNearestTiesUp,
+    RoundToZero, RoundFromZero, RoundUp, RoundDown
+)
+
+# Checks that each rounding mode is faithful.
+@testset "IEEEFloat(::BigFloat) faithful rounding" begin
+    for pr ∈ fib200
+        setprecision(BigFloat, pr) do
+            exp = 500
+            bf_samples = float_samples(BigFloat, (-exp):exp, 20) # about 40040 random values
+            for rm ∈ (mpfr_rounding_modes..., Base.MPFR.MPFRRoundFaithful,
+                      native_rounding_modes...),
+                bf ∈ bf_samples,
+                F ∈ (Float16, Float32, Float64)
+                f = F(bf, rm)
+                @test (f === F(bf, RoundDown)) | (f === F(bf, RoundUp))
+            end
+        end
+    end
+end
+
+@testset "round(Int, -Inf16) should throw (#51113)" begin
+    @test_throws InexactError round(Int32, -Inf16)
+    @test_throws InexactError round(Int64, -Inf16)
+    @test_throws InexactError round(Int128, -Inf16)
+    # More comprehensive testing is present in test/floatfuncs.jl
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 4c9ac1cfd869c..1b839a41bf0e0 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -71,10 +71,12 @@ function move_to_node1(t)
 end
 
 # Base.compilecache only works from node 1, so precompile test is handled specially
+move_to_node1("ccall")
 move_to_node1("precompile")
 move_to_node1("SharedArrays")
 move_to_node1("threads")
 move_to_node1("Distributed")
+move_to_node1("gc")
 # Ensure things like consuming all kernel pipe memory doesn't interfere with other tests
 move_to_node1("stress")
 
@@ -82,12 +84,14 @@ move_to_node1("stress")
 # since it starts a lot of workers and can easily exceed the maximum memory
 limited_worker_rss && move_to_node1("Distributed")
 
-# Shuffle LinearAlgebra tests to the front, because they take a while, so we might
+# Move LinearAlgebra and Pkg tests to the front, because they take a while, so we might
 # as well get them all started early.
-linalg_test_ids = findall(x->occursin("LinearAlgebra", x), tests)
-linalg_tests = tests[linalg_test_ids]
-deleteat!(tests, linalg_test_ids)
-prepend!(tests, linalg_tests)
+for prependme in ["LinearAlgebra", "Pkg"]
+    prependme_test_ids = findall(x->occursin(prependme, x), tests)
+    prependme_tests = tests[prependme_test_ids]
+    deleteat!(tests, prependme_test_ids)
+    prepend!(tests, prependme_tests)
+end
 
 import LinearAlgebra
 cd(@__DIR__) do
@@ -102,11 +106,7 @@ cd(@__DIR__) do
     #   * https://github.com/JuliaLang/julia/pull/29384
     #   * https://github.com/JuliaLang/julia/pull/40348
     n = 1
-    JULIA_TEST_USE_MULTIPLE_WORKERS = get(ENV, "JULIA_TEST_USE_MULTIPLE_WORKERS", "") |>
-                                      strip |>
-                                      lowercase |>
-                                      s -> tryparse(Bool, s) |>
-                                      x -> x === true
+    JULIA_TEST_USE_MULTIPLE_WORKERS = Base.get_bool_env("JULIA_TEST_USE_MULTIPLE_WORKERS", false)
     # If the `JULIA_TEST_USE_MULTIPLE_WORKERS` environment variable is set to `true`, we use
     # multiple worker processes regardless of the value of `net_on`.
     # Otherwise, we use multiple worker processes if and only if `net_on` is true.
@@ -126,8 +126,9 @@ cd(@__DIR__) do
 
     println("""
         Running parallel tests with:
+          getpid() = $(getpid())
           nworkers() = $(nworkers())
-          nthreads() = $(Threads.nthreads())
+          nthreads() = $(Threads.threadpoolsize())
           Sys.CPU_THREADS = $(Sys.CPU_THREADS)
           Sys.total_memory() = $(Base.format_bytes(Sys.total_memory()))
           Sys.free_memory() = $(Base.format_bytes(Sys.free_memory()))
@@ -247,7 +248,7 @@ cd(@__DIR__) do
                 end
             end
         end
-        o_ts_duration = @elapsed @Experimental.sync begin
+        o_ts_duration = @elapsed Experimental.@sync begin
             for p in workers()
                 @async begin
                     push!(all_tasks, current_task())
diff --git a/test/ryu.jl b/test/ryu.jl
index cf60e4867e236..4acd2fd08df50 100644
--- a/test/ryu.jl
+++ b/test/ryu.jl
@@ -52,6 +52,11 @@ end
     @test "2.305843009213694e40" == Ryu.writeshortest(Core.bitcast(Float64, 0x4850F0CF064DD592))
 end
 
+@testset "pow5 overflow (#47464)" begin
+    @test "4.6458339e+63" == Ryu.writeexp(4.645833859177319e63, 7)
+    @test "4.190673780e+40" == Ryu.writeexp(4.190673779576499e40, 9)
+end
+
 @testset "OutputLength" begin
     @test "1.0" == Ryu.writeshortest(1.0) # already tested in Basic
     @test "1.2" == Ryu.writeshortest(1.2)
@@ -553,6 +558,11 @@ end # Float16
         @test Ryu.writefixed(1.25e+5, 1, false, false, false, UInt8('.'), true) == "125000"
         @test Ryu.writefixed(1.25e+5, 2, false, false, false, UInt8('.'), true) == "125000"
     end
+
+    @test Ryu.writefixed(100.0-eps(100.0), 0, false, false, true, UInt8('.'), false) == "100."
+    @test Ryu.writefixed(-100.0+eps(-100.0), 0, false, false, true, UInt8('.'), false) == "-100."
+    @test Ryu.writefixed(100.0-eps(100.0), 1, false, false, true, UInt8('.'), false) == "100.0"
+    @test Ryu.writefixed(-100.0+eps(-100.0), 1, false, false, true, UInt8('.'), false) == "-100.0"
 end # fixed
 
 @testset "Ryu.writeexp" begin
diff --git a/test/scopedvalues.jl b/test/scopedvalues.jl
new file mode 100644
index 0000000000000..b1f3241af8fc6
--- /dev/null
+++ b/test/scopedvalues.jl
@@ -0,0 +1,128 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+import Base: ScopedValues
+
+@testset "errors" begin
+    @test ScopedValue{Float64}(1)[] == 1.0
+    @test_throws InexactError ScopedValue{Int}(1.5)
+    let val = ScopedValue(1)
+        @test_throws MethodError val[] = 2
+        with() do
+            @test_throws MethodError val[] = 2
+        end
+    end
+    let val = ScopedValue{String}()
+        @test_throws KeyError val[]
+    end
+    let val = ScopedValue{Int}()
+        @test_throws KeyError val[]
+    end
+    @test_throws MethodError ScopedValue()
+end
+
+const sval = ScopedValue(1)
+@testset "inheritance" begin
+    @test sval[] == 1
+    with() do
+        @test sval[] == 1
+        with() do
+            @test sval[] == 1
+        end
+        with(sval => 2) do
+            @test sval[] == 2
+        end
+        @test sval[] == 1
+    end
+    @test sval[] == 1
+end
+
+const sval_float = ScopedValue(1.0)
+
+@testset "multiple scoped values" begin
+    with(sval => 2, sval_float => 2.0) do
+        @test sval[] == 2
+        @test sval_float[] == 2.0
+    end
+    with(sval => 2, sval => 3) do
+        @test sval[] == 3
+    end
+end
+
+emptyf() = nothing
+
+@testset "conversion" begin
+    with(emptyf, sval_float=>2)
+    @test_throws MethodError with(emptyf, sval_float=>"hello")
+end
+
+import Base.Threads: @spawn
+@testset "tasks" begin
+    @test fetch(@spawn begin
+        sval[]
+    end) == 1
+    with(sval => 2) do
+        @test fetch(@spawn begin
+            sval[]
+        end) == 2
+    end
+end
+
+@testset "show" begin
+    @test sprint(show, ScopedValue{Int}()) == "ScopedValue{$Int}(undefined)"
+    @test sprint(show, sval) == "ScopedValue{$Int}(1)"
+    @test sprint(show, Core.current_scope()) == "nothing"
+    with(sval => 2.0) do
+        @test sprint(show, sval) == "ScopedValue{$Int}(2)"
+        objid = sprint(show, Base.objectid(sval))
+        @test sprint(show, Core.current_scope()) == "Base.ScopedValues.Scope(ScopedValue{$Int}@$objid => 2)"
+    end
+end
+
+const depth = ScopedValue(0)
+function nth_with(f, n)
+    if n <= 0
+        f()
+    else
+        with(depth => n) do
+            nth_with(f, n-1)
+        end
+    end
+end
+
+
+@testset "nested with" begin
+    @testset for depth in 1:16
+        nth_with(depth) do
+            @test sval_float[] == 1.0
+        end
+        with(sval_float=>2.0) do
+            nth_with(depth) do
+                @test sval_float[] == 2.0
+            end
+        end
+        nth_with(depth) do
+            with(sval_float=>2.0) do
+                @test sval_float[] == 2.0
+            end
+        end
+    end
+    with(sval_float=>2.0) do
+        nth_with(15) do
+            @test sval_float[] == 2.0
+            with(sval_float => 3.0) do
+                @test sval_float[] == 3.0
+            end
+        end
+    end
+end
+
+@testset "macro" begin
+    @with sval=>2 sval_float=>2.0 begin
+        @test sval[] == 2
+        @test sval_float[] == 2.0
+    end
+    # Doesn't do much...
+    @with begin
+        @test sval[] == 1
+        @test sval_float[] == 1.0
+    end
+end
diff --git a/test/secretbuffer.jl b/test/secretbuffer.jl
index 976c757deea57..29e28ded8da72 100644
--- a/test/secretbuffer.jl
+++ b/test/secretbuffer.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Base: SecretBuffer, SecretBuffer!, shred!, isshredded
-using Test
+using Test, Random
 
 @testset "SecretBuffer" begin
     @testset "original unmodified" begin
@@ -129,4 +129,45 @@ using Test
         @test_throws ArgumentError Base.unsafe_SecretBuffer!(null_ptr)
         @test_throws ArgumentError Base.unsafe_SecretBuffer!(null_ptr, 0)
     end
+
+    @testset "copiers" begin
+        s1 = SecretBuffer()
+        write(s1, "hello world")
+        seekstart(s1)
+
+        s2 = copy(s1)
+        write(s2, 'c')
+        seekstart(s2)
+
+        @test read(s1) == codeunits("hello world")
+        @test read(s2) == codeunits("cello world")
+
+        shred!(s1)
+        @test isshredded(s1)
+        @test !isshredded(s2)
+        shred!(s2)
+
+        # Copying into a bigger destination
+        s3 = SecretBuffer()
+        s4 = SecretBuffer()
+        write(s3, "original")
+        seekstart(s3)
+        write(s4, randstring(1234))
+        s4data = s4.data
+        copy!(s4, s3)
+        @test s3.data == s4.data
+        @test read(s3) == read(s4) == codeunits("original")
+        @test all(iszero, s4data)
+        shred!(s3); shred!(s4)
+
+        # Copying into a smaller destination
+        s5 = SecretBuffer()
+        s6 = SecretBuffer("sekrit")
+        str = randstring(321)
+        write(s5, str)
+        seekstart(s5)
+        copy!(s6, s5)
+        @test read(s5) == read(s6) == codeunits(str)
+        shred!(s5); shred!(s6)
+    end
 end
diff --git a/test/sets.jl b/test/sets.jl
index 9410739596486..2e7e19c5c1cdf 100644
--- a/test/sets.jl
+++ b/test/sets.jl
@@ -115,7 +115,7 @@ end
     @test in(2,s)
     @test length(s) == 2
     @test_throws KeyError pop!(s,1)
-    @test pop!(s,1,:foo) == :foo
+    @test pop!(s,1,:foo) === :foo
     @test length(delete!(s,2)) == 1
     @test !in(1,s)
     @test !in(2,s)
@@ -124,7 +124,40 @@ end
     @test isempty(s)
     @test_throws ArgumentError pop!(s)
     @test length(Set(['x',120])) == 2
+
+    # Test that pop! returns the element in the set, not the query
+    s = Set{Any}(Any[0x01, UInt(2), 3, 4.0])
+    @test pop!(s, 1) === 0x01
+    @test pop!(s, 2) === UInt(2)
+    @test pop!(s, 3) === 3
+    @test pop!(s, 4) === 4.0
+    @test_throws KeyError pop!(s, 5)
+end
+
+@testset "in!" begin
+    s = Set()
+    @test !(in!(0x01, s))
+    @test !(in!(Int32(2), s))
+    @test in!(1, s)
+    @test in!(2.0, s)
+    (a, b, c...) = sort!(collect(s))
+    @test a === 0x01
+    @test b === Int32(2)
+    @test isempty(c)
+
+    # in! will convert to the right type automatically
+    s = Set{Int32}()
+    @test !(in!(1, s))
+    @test only(s) === Int32(1)
+    @test_throws Exception in!("hello", s)
+
+    # Other set types
+    s = BitSet()
+    @test !(in!(13, s))
+    @test in!(UInt16(13), s)
+    @test only(s) === 13
 end
+
 @testset "copy" begin
     data_in = (1,2,9,8,4)
     s = Set(data_in)
@@ -164,6 +197,19 @@ end
     sizehint!(s2, 10)
     @test s2 == GenericSet(s)
 end
+
+@testset "shrinking" begin # Similar test as for the underlying Dict
+    d = Set(i for i = 1:1000)
+    filter!(x -> x < 10, d)
+    sizehint!(d, 10)
+    @test length(d.dict.slots) < 100
+    sizehint!(d, 1000)
+    sizehint!(d, 1; shrink = false)
+    @test length(d.dict.slots) >= 1000
+    sizehint!(d, 1; shrink = true)
+    @test length(d.dict.slots) < 1000
+end
+
 @testset "rehash!" begin
     # Use a pointer type to have defined behavior for uninitialized
     # array element
@@ -364,7 +410,9 @@ end
             @test issubset(intersect(l,r), r)
             @test issubset(l, union(l,r))
             @test issubset(r, union(l,r))
+            @test issubset(union(l,r))(r)
             @test isdisjoint(l,l) == isempty(l)
+            @test isdisjoint(l)(l) == isempty(l)
             @test isdisjoint(l,r) == isempty(intersect(l,r))
             if S === Vector
                 @test sort(union(intersect(l,r),symdiff(l,r))) == sort(union(l,r))
@@ -381,6 +429,15 @@ end
             @test ⊋(S([1,2]), S([1]))
             @test !⊋(S([1]), S([1]))
             @test ⊉(S([1]), S([2]))
+
+            @test ⊆(S([1,2]))(S([1]))
+            @test ⊊(S([1,2]))(S([1]))
+            @test !⊊(S([1]))(S([1]))
+            @test ⊈(S([2]))(S([1]))
+            @test ⊇(S([1]))(S([1,2]))
+            @test ⊋(S([1]))(S([1,2]))
+            @test !⊋(S([1]))(S([1]))
+            @test ⊉(S([2]))(S([1]))
         end
         let s1 = S([1,2,3,4])
             @test s1 !== symdiff(s1) == s1
@@ -393,9 +450,10 @@ end
     @test symdiff(Set([1]), BitSet()) isa Set{Int}
     @test symdiff(BitSet([1]), Set{Int}()) isa BitSet
     @test symdiff([1], BitSet()) isa Vector{Int}
-    # symdiff must NOT uniquify
-    @test symdiff([1, 2, 1]) == symdiff!([1, 2, 1]) == [2]
-    @test symdiff([1, 2, 1], [2, 2]) == symdiff!([1, 2, 1], [2, 2]) == [2]
+    #symdiff does uniquify
+    @test symdiff([1, 2, 1]) == symdiff!([1, 2, 1]) == [1,2]
+    @test symdiff([1, 2, 1], [2, 2]) == symdiff!([1, 2, 1], [2, 2]) == [1]
+    @test symdiff([1, 2, 1], [2, 2]) == symdiff!([1, 2, 1], [2, 2]) == [1]
 
     # Base.hasfastin
     @test all(Base.hasfastin, Any[Dict(1=>2), Set(1), BitSet(1), 1:9, 1:2:9,
@@ -419,6 +477,48 @@ end
     @test issubset(Set(Bool[]), rand(Bool, 100)) == true
     # neither has a fast in, right doesn't have a length
     @test isdisjoint([1, 3, 5, 7, 9], Iterators.filter(iseven, 1:10))
+
+    # range fast-path
+    for (truth, a, b) in (
+                   # Integers
+                   (true, 1:10, 11:20), # not overlapping
+                   (false, 1:10, 5:20), # partial overlap
+                   (false, 5:9, 1:10), # complete overlap
+                   # complete overlap, unequal steps
+                   (false, 3:6:60, 9:9:60),
+                   (true, 4:6:60, 9:9:60),
+                   (true, 0:6:12, 9:9:60),
+                   (false, 6:6:18, 9:9:60),
+                   (false, 12:6:18, 9:9:60),
+                   (false, 18:6:18, 9:9:60),
+                   (true, 1:2:3, 2:3:5),
+                   (true, 1:4:5, 2:1:4),
+                   (false, 4:12:124, 1:1:8),
+                   # potential overflow
+                   (false, 0x1:0x3:0x4, 0x4:0x3:0x4),
+                   (true, 0x3:0x3:0x6, 0x4:0x3:0x4),
+                   (false, typemax(Int8):Int8(3):typemax(Int8), typemin(Int8):Int8(3):typemax(Int8)),
+                   # Chars
+                   (true, 'a':'l', 'o':'p'), # not overlapping
+                   (false, 'a':'l', 'h':'p'), # partial overlap
+                   (false, 'a':'l', 'c':'e'), # complete overlap
+                   # Floats
+                   (true, 1.:10., 11.:20.), # not overlapping
+                   (false, 1.:10., 5.:20.), # partial overlap
+                   (false, 5.:9., 1.:10.), # complete overlap
+                   # Inputs that may hang
+                   (false, -6011687643038262928:3545293653953105048, -6446834672754204848:3271267329311042532),
+                   )
+        @test isdisjoint(a, b) == truth
+        @test isdisjoint(b, a) == truth
+        @test isdisjoint(a, reverse(b)) == truth
+        @test isdisjoint(reverse(a), b) == truth
+        @test isdisjoint(b, reverse(a)) == truth
+        @test isdisjoint(reverse(b), a) == truth
+    end
+    @test isdisjoint(10:9, 1:10) # empty range
+    @test !isdisjoint(1e-100:.1:1, 0:.1:1)
+    @test !isdisjoint(eps()/4:.1:.71, 0:.1:1)
 end
 
 @testset "unique" begin
@@ -505,6 +605,9 @@ end
     @test !allunique([1,1,2])
     @test !allunique([:a,:b,:c,:a])
     @test allunique(unique(randn(100)))  # longer than 32
+    @test allunique(collect(1:100)) # sorted/unique && longer than 32
+    @test allunique(collect(100:-1:1)) # sorted/unique && longer than 32
+    @test !allunique(fill(1,100)) # sorted/repeating && longer than 32
     @test allunique(collect('A':'z')) # 58-element Vector{Char}
     @test !allunique(repeat(1:99, 1, 2))
     @test !allunique(vcat(pi, randn(1998), pi))  # longer than 1000
@@ -733,8 +836,7 @@ end
     x = @inferred replace([1, 2], 2=>missing)
     @test isequal(x, [1, missing]) && x isa Vector{Union{Int, Missing}}
 
-    @test_broken @inferred replace([1, missing], missing=>2)
-    x = replace([1, missing], missing=>2)
+    x = @inferred replace([1, missing], missing=>2)
     @test x == [1, 2] && x isa Vector{Int}
     x = @inferred replace([1, missing], missing=>2, count=1)
     @test x == [1, 2] && x isa Vector{Union{Int, Missing}}
@@ -768,6 +870,28 @@ end
     @test replace((NaN, 1.0), NaN=>0.0) === (0.0, 1.0)
     @test replace([1, missing], missing=>0) == [1, 0]
     @test replace((1, missing), missing=>0) === (1, 0)
+
+    # test that MethodError is thrown for pairs
+    @test_throws MethodError replace(identity, 1=>2)
+    @test_throws MethodError replace(identity, 1=>2, 3=>4)
+    @test_throws MethodError replace!(identity, 1=>2)
+    @test_throws MethodError replace!(identity, 1=>2, 3=>4)
+
+    # test replace and friends for AbstractDicts
+    d1 = GenericDict(Dict(1=>2, 3=>4))
+    d2 = replace(d1, (1=>2) => (1=>"a"))
+    @test d2 == Dict(1=>"a", 3=>4)
+    @test d2 isa Dict{Int, Any}
+    @test d1 === replace!(d1, (1=>2) => (1=>-2))
+    @test d1 == Dict(1=>-2, 3=>4)
+
+    dd = Dict(1=>2, 3=>1, 5=>1, 7=>1)
+    for d1 in (dd, GenericDict(dd))
+        @test replace(d1, (1=>2) => (1=>"a"), count=0) == d1
+        d2 = replace(kv->(kv[2] == 1 ? kv[1]=>2 : kv), d1, count=2)
+        @test count(==(2), values(d2)) == 3
+        @test count(==(1), values(d2)) == 1
+    end
 end
 
 @testset "⊆, ⊊, ⊈, ⊇, ⊋, ⊉, <, <=, issetequal" begin
@@ -795,6 +919,8 @@ end
         @test !(B ⊉ A)
         @test !issetequal(A, B)
         @test !issetequal(B, A)
+        @test !issetequal(B)(A)
+        @test !issetequal(A)(B)
         for T = (Tuple, identity, Set, BitSet, Base.IdSet{Int})
             @test issetequal(A, T(A))
             @test issetequal(B, T(B))
@@ -889,4 +1015,6 @@ end
     end
     set = TestSet{Any}()
     @test sizehint!(set, 1) === set
+    @test sizehint!(set, 1; shrink = true) === set
+    @test sizehint!(set, 1; shrink = false) === set
 end
diff --git a/test/show.jl b/test/show.jl
index 5aff7b1b3d6b0..a2ed2bba31f50 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -268,7 +268,6 @@ end
 @test repr(Expr(:import, :Foo)) == ":(\$(Expr(:import, :Foo)))"
 @test repr(Expr(:import, Expr(:(.), ))) == ":(\$(Expr(:import, :(\$(Expr(:.))))))"
 
-
 @test repr(Expr(:using, Expr(:(.), :A))) == ":(using A)"
 @test repr(Expr(:using, Expr(:(.), :A),
                         Expr(:(.), :B))) == ":(using A, B)"
@@ -286,6 +285,10 @@ end
 @test repr(Expr(:import, Expr(:(.), :A, :B),
                          Expr(:(.), :C, :D))) == ":(import A.B, C.D)"
 
+# https://github.com/JuliaLang/julia/issues/49168
+@test repr(:(using A: (..))) == ":(using A: (..))"
+@test repr(:(using A: (..) as twodots)) == ":(using A: (..) as twodots)"
+
 # range syntax
 @test_repr "1:2"
 @test_repr "3:4:5"
@@ -323,7 +326,7 @@ end
             # line meta
             if d < 0
                 # line meta
-                error(\"dimension size must be nonnegative (got \$d)\")
+                error(\"dimension size must be non-negative (got \$d)\")
             end
             # line meta
             n *= d
@@ -520,6 +523,13 @@ end
 # Hidden macro names
 @test sprint(show, Expr(:macrocall, Symbol("@#"), nothing, :a)) == ":(@var\"#\" a)"
 
+# Test that public expressions are rendered nicely
+# though they are hard to create with quotes because public is not a context dependant keyword
+@test sprint(show, Expr(:public, Symbol("@foo"))) == ":(public @foo)"
+@test sprint(show, Expr(:public, :f,:o,:o)) == ":(public f, o, o)"
+s = sprint(show, :(module A; public x; end))
+@test match(r"^:\(module A\n  #= .* =#\n  #= .* =#\n  public x\n  end\)$", s) !== nothing
+
 # PR #38418
 module M1 var"#foo#"() = 2 end
 @test occursin("M1.var\"#foo#\"", sprint(show, M1.var"#foo#", context = :module=>@__MODULE__))
@@ -603,7 +613,7 @@ let q1 = Meta.parse(repr(:("$(a)b"))),
     @test q1.args[1].args == [:a, "b"]
 
     @test isa(q2, Expr)
-    @test q2.args[1].head == :string
+    @test q2.args[1].head === :string
     @test q2.args[1].args == [:ab,]
 end
 
@@ -630,7 +640,7 @@ end
 @test_repr "::@m(x, y) + z"
 @test_repr "[@m(x) y z]"
 @test_repr "[@m(x) y; z]"
-@test_repr "let @m(x), y=z; end"
+test_repr("let @m(x), y=z; end", true)
 
 @test repr(:(@m x y))    == ":(#= $(@__FILE__):$(@__LINE__) =# @m x y)"
 @test string(:(@m x y))  ==   "#= $(@__FILE__):$(@__LINE__) =# @m x y"
@@ -769,12 +779,33 @@ let repr = sprint(show, "text/html", methods(f16580))
     @test occursin("f16580(x, y...; <i>z, w, q...</i>)", repr)
 end
 
+# Just check it doesn't error
+f46594(::Vararg{T, 2}) where T = 1
+let repr = sprint(show, "text/html", first(methods(f46594)))
+    @test occursin("f46594(::Vararg{T, 2}) where T", replace(repr, r"</?[A-Za-z]>"=>""))
+end
+
 function triangular_methodshow(x::T1, y::T2) where {T2<:Integer, T1<:T2}
 end
 let repr = sprint(show, "text/plain", methods(triangular_methodshow))
     @test occursin("where {T2<:Integer, T1<:T2}", repr)
 end
 
+struct S45879{P} end
+let ms = methods(S45879)
+    @test ms isa Base.MethodList
+    @test length(ms) == 0
+    @test sprint(show, Base.MethodList(Method[], typeof(S45879).name.mt)) isa String
+end
+
+function f49475(a=12.0; b) end
+let ms = methods(f49475)
+    @test length(ms) == 2
+    repr1 = sprint(show, "text/plain", ms[1])
+    repr2 = sprint(show, "text/plain", ms[2])
+    @test occursin("f49475(; ...)", repr1) || occursin("f49475(; ...)", repr2)
+end
+
 if isempty(Base.GIT_VERSION_INFO.commit)
     @test occursin("https://github.com/JuliaLang/julia/tree/v$VERSION/base/special/trig.jl#L", Base.url(which(sin, (Float64,))))
 else
@@ -986,6 +1017,9 @@ test_mt(show_f5, "show_f5(A::AbstractArray{T, N}, indices::Vararg{$Int, N})")
 @test sprint(show, :(function f end)) == ":(function f end)"
 @test_repr "function g end"
 
+# Printing of :(function (x...) end)
+@test startswith(replstr(Meta.parse("function (x...) end")), ":(function (x...,)")
+
 # Printing of macro definitions
 @test sprint(show, :(macro m end)) == ":(macro m end)"
 @test_repr "macro m end"
@@ -1262,12 +1296,6 @@ end
 let repr = sprint(dump, Core.svec())
     @test repr == "empty SimpleVector\n"
 end
-let sv = Core.svec(:a, :b, :c)
-    # unsafe replacement of :c with #undef to test handling of incomplete SimpleVectors
-    unsafe_store!(convert(Ptr{Ptr{Cvoid}}, Base.pointer_from_objref(sv)) + 3 * sizeof(Ptr), C_NULL)
-    repr = sprint(dump, sv)
-    @test repr == "SimpleVector\n  1: Symbol a\n  2: Symbol b\n  3: #undef\n"
-end
 let repr = sprint(dump, sin)
     @test repr == "sin (function of type typeof(sin))\n"
 end
@@ -1286,6 +1314,9 @@ let a = Vector{Any}(undef, 10000)
 end
 @test occursin("NamedTuple", sprint(dump, NamedTuple))
 
+# issue 36495, dumping a partial NamedTupled shouldn't error
+@test occursin("NamedTuple", sprint(dump, NamedTuple{(:foo,:bar)}))
+
 # issue #17338
 @test repr(Core.svec(1, 2)) == "svec(1, 2)"
 
@@ -1340,19 +1371,35 @@ test_repr("(:).a")
 @test repr(Tuple{Float32, Float32, Float32}) == "Tuple{Float32, Float32, Float32}"
 @test repr(Tuple{String, Int64, Int64, Int64}) == "Tuple{String, Int64, Int64, Int64}"
 @test repr(Tuple{String, Int64, Int64, Int64, Int64}) == "Tuple{String, Vararg{Int64, 4}}"
+@test repr(NTuple) == "NTuple{N, T} where {N, T}"
+@test repr(Tuple{NTuple{N}, Vararg{NTuple{N}, 4}} where N) == "NTuple{5, NTuple{N, T} where T} where N"
+@test repr(Tuple{Float64, NTuple{N}, Vararg{NTuple{N}, 4}} where N) == "Tuple{Float64, Vararg{NTuple{N, T} where T, 5}} where N"
+
+# Test printing of NamedTuples using the macro syntax
+@test repr(@NamedTuple{kw::Int64}) == "@NamedTuple{kw::Int64}"
+@test repr(@NamedTuple{kw::Union{Float64, Int64}, kw2::Int64}) == "@NamedTuple{kw::Union{Float64, Int64}, kw2::Int64}"
+@test repr(@NamedTuple{kw::@NamedTuple{kw2::Int64}}) == "@NamedTuple{kw::@NamedTuple{kw2::Int64}}"
+@test repr(@NamedTuple{kw::NTuple{7, Int64}}) == "@NamedTuple{kw::NTuple{7, Int64}}"
+@test repr(@NamedTuple{a::Float64, b}) == "@NamedTuple{a::Float64, b}"
+
+# Test general printing of `Base.Pairs` (it should not use the `@Kwargs` macro syntax)
+@test repr(@Kwargs{init::Int}) == "Base.Pairs{Symbol, $Int, Tuple{Symbol}, @NamedTuple{init::$Int}}"
 
 @testset "issue #42931" begin
-    @test repr(NTuple{4, :A}) == "NTuple{4, :A}"
+    @test repr(NTuple{4, :A}) == "Tuple{:A, :A, :A, :A}"
     @test repr(NTuple{3, :A}) == "Tuple{:A, :A, :A}"
     @test repr(NTuple{2, :A}) == "Tuple{:A, :A}"
     @test repr(NTuple{1, :A}) == "Tuple{:A}"
     @test repr(NTuple{0, :A}) == "Tuple{}"
 
     @test repr(Tuple{:A, :A, :A, :B}) == "Tuple{:A, :A, :A, :B}"
-    @test repr(Tuple{:A, :A, :A, :A}) == "NTuple{4, :A}"
+    @test repr(Tuple{:A, :A, :A, :A}) == "Tuple{:A, :A, :A, :A}"
     @test repr(Tuple{:A, :A, :A}) == "Tuple{:A, :A, :A}"
     @test repr(Tuple{:A}) == "Tuple{:A}"
     @test repr(Tuple{}) == "Tuple{}"
+
+    @test repr(Tuple{Vararg{N, 10}} where N) == "NTuple{10, N} where N"
+    @test repr(Tuple{Vararg{10, N}} where N) == "Tuple{Vararg{10, N}} where N"
 end
 
 # Test that REPL/mime display of invalid UTF-8 data doesn't throw an exception:
@@ -1431,7 +1478,7 @@ struct var"#X#" end
 var"#f#"() = 2
 struct var"%X%" end  # Invalid name without '#'
 
-# (Just to make this test more sustainable,) we don't necesssarily need to test the exact
+# (Just to make this test more sustainable,) we don't necessarily need to test the exact
 # output format, just ensure that it prints at least the parts we expect:
 @test occursin(".var\"#X#\"", static_shown(var"#X#"))  # Leading `.` tests it printed a module name.
 @test occursin(r"Set{var\"[^\"]+\"} where var\"[^\"]+\"", static_shown(Set{<:Any}))
@@ -1820,8 +1867,8 @@ end
     # issue #27747
     let t = (x = Integer[1, 2],)
         v = [t, t]
-        @test showstr(v) == "NamedTuple{(:x,), Tuple{Vector{Integer}}}[(x = [1, 2],), (x = [1, 2],)]"
-        @test replstr(v) == "2-element Vector{NamedTuple{(:x,), Tuple{Vector{Integer}}}}:\n (x = [1, 2],)\n (x = [1, 2],)"
+        @test showstr(v) == "@NamedTuple{x::Vector{Integer}}[(x = [1, 2],), (x = [1, 2],)]"
+        @test replstr(v) == "2-element Vector{@NamedTuple{x::Vector{Integer}}}:\n (x = [1, 2],)\n (x = [1, 2],)"
     end
 
     # issue #25857
@@ -1860,6 +1907,10 @@ end
     @test replstr((; var"#var#"=1)) == """(var"#var#" = 1,)"""
     @test replstr((; var"a"=1, b=2)) == "(a = 1, b = 2)"
     @test replstr((; a=1, b=2)) == "(a = 1, b = 2)"
+
+    # issue 48828, typeinfo missing for arrays with >2 dimensions
+    @test showstr(Float16[1.0 3.0; 2.0 4.0;;; 5.0 7.0; 6.0 8.0]) ==
+                 "Float16[1.0 3.0; 2.0 4.0;;; 5.0 7.0; 6.0 8.0]"
 end
 
 @testset "#14684: `display` should print associative types in full" begin
@@ -1921,12 +1972,12 @@ end
 end
 
 @testset "Intrinsic printing" begin
-    @test sprint(show, Core.Intrinsics.arraylen) == "Core.Intrinsics.arraylen"
-    @test repr(Core.Intrinsics.arraylen) == "Core.Intrinsics.arraylen"
+    @test sprint(show, Core.Intrinsics.cglobal) == "Core.Intrinsics.cglobal"
+    @test repr(Core.Intrinsics.cglobal) == "Core.Intrinsics.cglobal"
     let io = IOBuffer()
-        show(io, MIME"text/plain"(), Core.Intrinsics.arraylen)
+        show(io, MIME"text/plain"(), Core.Intrinsics.cglobal)
         str = String(take!(io))
-        @test occursin("arraylen", str)
+        @test occursin("cglobal", str)
         @test occursin("(intrinsic function", str)
     end
     @test string(Core.Intrinsics.add_int) == "add_int"
@@ -2001,6 +2052,7 @@ eval(Meta._parse_string("""function my_fun28173(x)
             r = 1
             s = try
                 r = 2
+                Base.inferencebarrier(false) && error()
                 "BYE"
             catch
                 r = 3
@@ -2024,33 +2076,36 @@ let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
     @test all(isspace, pop!(lines1))
     Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(QuoteNode(1), Val{1}), false)
     Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(QuoteNode(2), Val{2}), true)
-    Core.Compiler.insert_node!(ir, length(ir.stmts.inst), Core.Compiler.NewInstruction(QuoteNode(3), Val{3}), false)
-    Core.Compiler.insert_node!(ir, length(ir.stmts.inst), Core.Compiler.NewInstruction(QuoteNode(4), Val{4}), true)
+    Core.Compiler.insert_node!(ir, length(ir.stmts.stmt), Core.Compiler.NewInstruction(QuoteNode(3), Val{3}), false)
+    Core.Compiler.insert_node!(ir, length(ir.stmts.stmt), Core.Compiler.NewInstruction(QuoteNode(4), Val{4}), true)
     lines2 = split(repr(ir), '\n')
     @test all(isspace, pop!(lines2))
     @test popfirst!(lines2) == "2  1 ──       $(QuoteNode(1))"
-    @test popfirst!(lines2) == "   │          $(QuoteNode(2))" # TODO: this should print after the next statement
     let line1 = popfirst!(lines1)
         line2 = popfirst!(lines2)
         @test startswith(line1, "2  1 ── ")
         @test startswith(line2, "   │    ")
         @test line2[12:end] == line2[12:end]
     end
-    let line1 = pop!(lines1)
-        line2 = pop!(lines2)
-        @test startswith(line1, "17 ")
-        @test startswith(line2, "   ")
-        @test line1[3:end] == line2[3:end]
-    end
-    @test pop!(lines2) == "   │          \$(QuoteNode(4))"
-    @test pop!(lines2) == "17 │          \$(QuoteNode(3))" # TODO: this should print after the next statement
+    @test popfirst!(lines2) == "   │          $(QuoteNode(2))"
+    @test pop!(lines2) == "   └───       \$(QuoteNode(4))"
+    @test pop!(lines1) == "18 └───       return %21"
+    @test pop!(lines2) == "   │          return %21"
+    @test pop!(lines2) == "18 │          \$(QuoteNode(3))"
     @test lines1 == lines2
 
     # verbose linetable
     io = IOBuffer()
     Base.IRShow.show_ir(io, ir, Base.IRShow.default_config(ir; verbose_linetable=true))
     seekstart(io)
-    @test count(contains(r"@ a{80}:\d+ within `my_fun28173"), eachline(io)) == 10
+    @test count(contains(r"@ a{80}:\d+ within `my_fun28173"), eachline(io)) == 11
+
+    # Test that a bad :invoke doesn't cause an error during printing
+    Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(Expr(:invoke, nothing, sin), Any), false)
+    io = IOBuffer()
+    Base.IRShow.show_ir(io, ir)
+    seekstart(io)
+    @test contains(String(take!(io)), "Expr(:invoke, nothing")
 end
 
 # Verify that extra instructions at the end of the IR
@@ -2058,7 +2113,7 @@ end
 # with as unnamed "!" BB.
 let src = code_typed(gcd, (Int, Int), debuginfo=:source)[1][1]
     ir = Core.Compiler.inflate_ir(src)
-    push!(ir.stmts.inst, Core.Compiler.ReturnNode())
+    push!(ir.stmts.stmt, Core.Compiler.ReturnNode())
     lines = split(sprint(show, ir), '\n')
     @test all(isspace, pop!(lines))
     @test pop!(lines) == "   !!! ──       unreachable::#UNDEF"
@@ -2292,6 +2347,8 @@ end
     @eval f1(var"a.b") = 3
     @test occursin("f1(var\"a.b\")", sprint(_show, methods(f1)))
 
+    @test sprint(_show, Method[]) == "0-element Vector{Method}"
+
     italic(s) = mime == MIME("text/html") ? "<i>$s</i>" : s
 
     @eval f2(; var"123") = 5
@@ -2373,3 +2430,225 @@ Base.show(io::IO, ces::⛵) = Base.print(io, '⛵')
 @test Base.alignment(stdout, ⛵()) == (0, 2)
 @test Base.alignment(IOContext(IOBuffer(), :color=>true), ColoredLetter()) == (0, 1)
 @test Base.alignment(IOContext(IOBuffer(), :color=>false), ColoredLetter()) == (0, 1)
+
+# spacing around dots in Diagonal, etc:
+redminusthree = sprint((io, x) -> printstyled(io, x, color=:red), "-3", context=stdout)
+@test Base.replace_with_centered_mark(redminusthree) == Base.replace_with_centered_mark("-3")
+
+# `show` implementations for `Method`
+let buf = IOBuffer()
+
+    # single line printing by default
+    show(buf, only(methods(sin, (Float64,))))
+    @test !occursin('\n', String(take!(buf)))
+
+    # two-line printing for rich display
+    show(buf, MIME("text/plain"), only(methods(sin, (Float64,))))
+    @test occursin('\n', String(take!(buf)))
+end
+
+@testset "basic `show_ir` functionality tests" begin
+    mktemp() do f, io
+        redirect_stdout(io) do
+            let io = IOBuffer()
+                for i = 1:10
+                    # make sure we don't error on printing IRs at any optimization level
+                    ir = only(Base.code_ircode(sin, (Float64,); optimize_until=i))[1]
+                    @test try; show(io, ir); true; catch; false; end
+                    compact = Core.Compiler.IncrementalCompact(ir)
+                    @test try; show(io, compact); true; catch; false; end
+                end
+            end
+        end
+        close(io)
+        @test isempty(read(f, String)) # make sure we don't unnecessarily lean anything into `stdout`
+    end
+end
+
+@testset "IRCode: fix coloring of invalid SSA values" begin
+    # get some ir
+    function foo(i)
+        j = i+42
+        j == 1 ? 1 : 2
+    end
+    ir = only(Base.code_ircode(foo, (Int,)))[1]
+
+    # replace an instruction
+    add_stmt = ir.stmts[1]
+    inst = Core.Compiler.NewInstruction(Expr(:call, add_stmt[:stmt].args[1], add_stmt[:stmt].args[2], 999), Int)
+    node = Core.Compiler.insert_node!(ir, 1, inst)
+    Core.Compiler.setindex!(add_stmt, node, :stmt)
+
+    # the new node should be colored green (as it's uncompacted IR),
+    # and its uses shouldn't be colored at all (since they're just plain valid references)
+    str = sprint(; context=:color=>true) do io
+        show(io, ir)
+    end
+    @test contains(str, "\e[32m%6 =")
+    @test contains(str, "%1 = %6")
+
+    # if we insert an invalid node, it should be colored appropriately
+    Core.Compiler.setindex!(add_stmt, Core.Compiler.SSAValue(node.id+1), :stmt)
+    str = sprint(; context=:color=>true) do io
+        show(io, ir)
+    end
+    @test contains(str, "%1 = \e[31m%7")
+end
+
+@testset "issue #46947: IncrementalCompact double display of just-compacted nodes" begin
+    # get some IR
+    foo(i) = i == 1 ? 1 : 2
+    ir = only(Base.code_ircode(foo, (Int,)))[1]
+
+    instructions = length(ir.stmts)
+    lines_shown(obj) = length(findall('\n', sprint(io->show(io, obj))))
+    @test lines_shown(ir) == instructions
+
+    # insert a couple of instructions
+    let inst = Core.Compiler.NewInstruction(Expr(:identity, 1), Nothing)
+        Core.Compiler.insert_node!(ir, 2, inst)
+    end
+    let inst = Core.Compiler.NewInstruction(Expr(:identity, 2), Nothing)
+        Core.Compiler.insert_node!(ir, 2, inst)
+    end
+    let inst = Core.Compiler.NewInstruction(Expr(:identity, 3), Nothing)
+        Core.Compiler.insert_node!(ir, 4, inst)
+    end
+    instructions += 3
+    @test lines_shown(ir) == instructions
+
+    # compact the IR, ensuring we always show the same number of lines
+    # (the instructions + a separator line)
+    compact = Core.Compiler.IncrementalCompact(ir)
+    @test lines_shown(compact) == instructions + 1
+    state = Core.Compiler.iterate(compact)
+    while state !== nothing
+        @test lines_shown(compact) == instructions + 1
+        state = Core.Compiler.iterate(compact, state[2])
+    end
+    @test lines_shown(compact) == instructions + 1
+
+    ir = Core.Compiler.complete(compact)
+    @test lines_shown(compact) == instructions + 1
+end
+
+@testset "#46424: IncrementalCompact displays wrong basic-block boundaries" begin
+    # get some cfg
+    function foo(i)
+        j = i+42
+        j == 1 ? 1 : 2
+    end
+    ir = only(Base.code_ircode(foo, (Int,)))[1]
+
+    # at every point we should be able to observe these three basic blocks
+    function verify_display(ir)
+        str = sprint(io->show(io, ir))
+        @test contains(str, "1 ─ %1 = ")
+        @test contains(str, r"2 ─ \s+ return 1")
+        @test contains(str, r"3 ─ \s+ return 2")
+    end
+    verify_display(ir)
+
+    # insert some instructions
+    for i in 1:3
+        inst = Core.Compiler.NewInstruction(Expr(:call, :identity, i), Int)
+        Core.Compiler.insert_node!(ir, 2, inst)
+    end
+
+    # compact
+    compact = Core.Compiler.IncrementalCompact(ir)
+    verify_display(compact)
+
+    # Compact the first instruction
+    state = Core.Compiler.iterate(compact)
+
+    # Insert some instructions here
+    for i in 1:2
+        inst = Core.Compiler.NewInstruction(Expr(:call, :identity, i), Int, Int32(1))
+        Core.Compiler.insert_node_here!(compact, inst)
+        verify_display(compact)
+    end
+
+    while state !== nothing
+        state = Core.Compiler.iterate(compact, state[2])
+        verify_display(compact)
+    end
+
+    # complete
+    ir = Core.Compiler.complete(compact)
+    verify_display(ir)
+end
+
+@testset "IRCode: CFG display" begin
+    # get a cfg
+    function foo(i)
+        j = i+42
+        j == 1 ? 1 : 2
+    end
+    ir = only(Base.code_ircode(foo, (Int,)))[1]
+    cfg = ir.cfg
+
+    str = sprint(io->show(io, cfg))
+    @test contains(str, r"CFG with \d+ blocks")
+    @test contains(str, r"bb 1 \(stmt.+\) → bb.*")
+end
+
+@testset "IncrementalCompact: correctly display attach-after nodes" begin
+    # set some IR
+    function foo(i)
+        j = i+42
+        return j
+    end
+    ir = only(Base.code_ircode(foo, (Int,)))[1]
+
+    # insert a bunch of nodes, inserting both before and after instruction 1
+    inst = Core.Compiler.NewInstruction(Expr(:call, :identity, 1), Int)
+    Core.Compiler.insert_node!(ir, 1, inst)
+    inst = Core.Compiler.NewInstruction(Expr(:call, :identity, 2), Int)
+    Core.Compiler.insert_node!(ir, 1, inst)
+    inst = Core.Compiler.NewInstruction(Expr(:call, :identity, 3), Int)
+    Core.Compiler.insert_node!(ir, 1, inst, true)
+    inst = Core.Compiler.NewInstruction(Expr(:call, :identity, 4), Int)
+    Core.Compiler.insert_node!(ir, 1, inst, true)
+
+    # at every point we should be able to observe these instructions (in order)
+    function verify_display(ir)
+        str = sprint(io->show(io, ir))
+        lines = split(str, '\n')
+        patterns = ["identity(1)",
+                    "identity(2)",
+                    "add_int",
+                    "identity(3)",
+                    "identity(4)",
+                    "return"]
+        line_idx = 1
+        pattern_idx = 1
+        while pattern_idx <= length(patterns) && line_idx <= length(lines)
+            # we test pattern-per-pattern, in order,
+            # so that we skip e.g. the compaction boundary
+            if contains(lines[line_idx], patterns[pattern_idx])
+                pattern_idx += 1
+            end
+            line_idx += 1
+        end
+        @test pattern_idx > length(patterns)
+    end
+    verify_display(ir)
+
+    compact = Core.Compiler.IncrementalCompact(ir)
+    verify_display(compact)
+
+    state = Core.Compiler.iterate(compact)
+    while state !== nothing
+        verify_display(compact)
+        state = Core.Compiler.iterate(compact, state[2])
+    end
+
+    ir = Core.Compiler.complete(compact)
+    verify_display(ir)
+end
+
+let buf = IOBuffer()
+    Base.show_tuple_as_call(buf, Symbol(""), Tuple{Function,Any})
+    @test String(take!(buf)) == "(::Function)(::Any)"
+end
diff --git a/test/smallarrayshrink.jl b/test/smallarrayshrink.jl
index a1a7df5aee5a5..680a882e432d4 100644
--- a/test/smallarrayshrink.jl
+++ b/test/smallarrayshrink.jl
@@ -1,45 +1,20 @@
 @testset "shrink small array" begin
-    x = [1, 2, 3, 4]
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
-    sizehint!(x, 10000)
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 10000
-    sizehint!(x, 4)
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
-
-    x = [1, 2, 3, 4]
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
-    sizehint!(x, 1000000)
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 1000000
-    sizehint!(x, 4)
-    @test x[1] == 1
-    @test x[2] == 2
-    @test x[3] == 3
-    @test x[4] == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
-    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
+    function check_array(x, size, capacity)
+        @test x[1] == 1
+        @test x[2] == 2
+        @test x[3] == 3
+        @test x[4] == 4
+        @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == size
+        @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == capacity
+    end
+    for hint_size = [10000, 1000000]
+        x = [1, 2, 3, 4]
+        check_array(x, 4, 4)
+        sizehint!(x, hint_size)
+        check_array(x, 4, hint_size)
+        sizehint!(x, 4; shrink = false)
+        check_array(x, 4, hint_size)
+        sizehint!(x, 4)
+        check_array(x, 4, 4)
+    end
 end
diff --git a/test/some.jl b/test/some.jl
index 27d50ca354a49..e49fc586a3a6e 100644
--- a/test/some.jl
+++ b/test/some.jl
@@ -33,7 +33,7 @@
 @test convert(Union{Int, Nothing}, 1) === 1
 @test convert(Union{Int, Nothing}, 1.0) === 1
 @test convert(Nothing, nothing) === nothing
-@test_throws MethodError convert(Nothing, 1)
+@test_throws ErrorException("cannot convert a value to nothing for assignment") convert(Nothing, 1)
 
 ## show()
 
diff --git a/test/sorting.jl b/test/sorting.jl
index 560ce02376a89..878c4b36d4611 100644
--- a/test/sorting.jl
+++ b/test/sorting.jl
@@ -75,6 +75,17 @@ end
     @test sum(randperm(6)) == 21
     @test length(reverse(0x1:0x2)) == 2
     @test issorted(sort(rand(UInt64(1):UInt64(2), 7); rev=true); rev=true) # issue #43034
+    @test sort(Union{}[]) == Union{}[] # issue #45280
+end
+
+@testset "stability" begin
+    for Alg in [InsertionSort, MergeSort, Base.Sort.ScratchQuickSort(), Base.DEFAULT_STABLE,
+            Base.Sort.ScratchQuickSort(missing, 1729), Base.Sort.ScratchQuickSort(1729, missing)]
+        @test issorted(sort(1:2000, alg=Alg, by=x->0))
+        @test issorted(sort(1:2000, alg=Alg, by=x->x÷100))
+    end
+    @test sort(1:2000, by=x->x÷100, rev=true) == sort(1:2000, by=x->-x÷100) ==
+        vcat(2000, (x:x+99 for x in 1900:-100:100)..., 1:99)
 end
 
 @testset "partialsort" begin
@@ -119,9 +130,11 @@ Base.step(r::ConstantRange) = 0
     @test searchsortedlast(r, 1.0, Forward) == 5
     @test searchsortedlast(r, 1, Forward) == 5
     @test searchsortedlast(r, UInt(1), Forward) == 5
+end
 
+@testset "Each sorting algorithm individually" begin
     a = rand(1:10000, 1000)
-    for alg in [InsertionSort, MergeSort, Base.DEFAULT_STABLE]
+    for alg in [InsertionSort, MergeSort, QuickSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
 
         b = sort(a, alg=alg)
         @test issorted(b)
@@ -186,18 +199,16 @@ Base.step(r::ConstantRange) = 0
         @test b == c
     end
 
-    @testset "unstable algorithms" begin
-        for alg in [QuickSort, Base.DEFAULT_UNSTABLE]
-            b = sort(a, alg=alg)
-            @test issorted(b)
-            @test last(b) == last(sort(a, alg=PartialQuickSort(length(a))))
-            b = sort(a, alg=alg, rev=true)
-            @test issorted(b, rev=true)
-            @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), rev=true))
-            b = sort(a, alg=alg, by=x->1/x)
-            @test issorted(b, by=x->1/x)
-            @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), by=x->1/x))
-        end
+    @testset "PartialQuickSort" begin
+        b = sort(a)
+        @test issorted(b)
+        @test last(b) == last(sort(a, alg=PartialQuickSort(length(a))))
+        b = sort(a, rev=true)
+        @test issorted(b, rev=true)
+        @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), rev=true))
+        b = sort(a, by=x->1/x)
+        @test issorted(b, by=x->1/x)
+        @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), by=x->1/x))
     end
 end
 @testset "insorted" begin
@@ -258,8 +269,8 @@ end
 @testset "PartialQuickSort" begin
     a = rand(1:10000, 1000)
     # test PartialQuickSort only does a partial sort
-    let alg = PartialQuickSort(1:div(length(a), 10))
-        k = alg.k
+    let k = 1:div(length(a), 10)
+        alg = PartialQuickSort(k)
         b = sort(a, alg=alg)
         c = sort(a, alg=alg, by=x->1/x)
         d = sort(a, alg=alg, rev=true)
@@ -270,8 +281,8 @@ end
         @test !issorted(c, by=x->1/x)
         @test !issorted(d, rev=true)
     end
-    let alg = PartialQuickSort(div(length(a), 10))
-        k = alg.k
+    let k = div(length(a), 10)
+        alg = PartialQuickSort(k)
         b = sort(a, alg=alg)
         c = sort(a, alg=alg, by=x->1/x)
         d = sort(a, alg=alg, rev=true)
@@ -288,6 +299,7 @@ end
     @test partialsortperm([3,6,30,1,9], 2, rev=true) == 5
     @test partialsortperm([3,6,30,1,9], 2, by=x->1/x) == 5
 end
+
 ## more advanced sorting tests ##
 
 randnans(n) = reinterpret(Float64,[rand(UInt64)|0x7ff8000000000000 for i=1:n])
@@ -323,7 +335,7 @@ end
             @test c == v
 
             # stable algorithms
-            for alg in [MergeSort, Base.DEFAULT_STABLE]
+            for alg in [MergeSort, Base.Sort.ScratchQuickSort(), Base.Sort.ScratchQuickSort(1:n), Base.DEFAULT_STABLE]
                 p = sortperm(v, alg=alg, rev=rev)
                 p2 = sortperm(float(v), alg=alg, rev=rev)
                 @test p == p2
@@ -333,6 +345,10 @@ end
                 @test s == si
                 invpermute!(s, p)
                 @test s == v
+
+                # Ensure stability, even with reverse short circuit
+                @test all(sort!(Real[fill(2.0, 15); fill(2, 15); fill(1.0, 15); fill(1, 15)])
+                           .=== Real[fill(1.0, 15); fill(1, 15); fill(2.0, 15); fill(2, 15)])
             end
 
             # unstable algorithms
@@ -367,8 +383,7 @@ end
         end
 
         v = randn_with_nans(n,0.1)
-        # TODO: alg = PartialQuickSort(n) fails here
-        for alg in [InsertionSort, QuickSort, MergeSort, Base.DEFAULT_UNSTABLE, Base.DEFAULT_STABLE],
+        for alg in [InsertionSort, MergeSort, Base.Sort.ScratchQuickSort(), Base.Sort.ScratchQuickSort(1, n), Base.DEFAULT_UNSTABLE, Base.DEFAULT_STABLE],
             rev in [false,true]
             alg === InsertionSort && n >= 3000 && continue
             # test float sorting with NaNs
@@ -430,7 +445,7 @@ end
         @test all(issorted, [sp[inds.==x] for x in 1:200])
     end
 
-    for alg in [InsertionSort, MergeSort, Base.DEFAULT_STABLE]
+    for alg in [InsertionSort, MergeSort, QuickSort, Base.DEFAULT_STABLE]
         sp = sortperm(inds, alg=alg)
         @test all(issorted, [sp[inds.==x] for x in 1:200])
     end
@@ -521,11 +536,11 @@ end
     @test issorted(a)
 
     a = view([9:-1:0;], :)::SubArray
-    Base.Sort.sort_int_range!(a, 10, 0, identity)  # test it supports non-Vector
+    Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, (; mn=0, mx=9))  # test it supports non-Vector
     @test issorted(a)
 
     a = OffsetArray([9:-1:0;], -5)
-    Base.Sort.sort_int_range!(a, 10, 0, identity)
+    Base.Sort._sort!(a, Base.Sort.CountingSort(), Base.Forward, (; mn=0, mx=9))
     @test issorted(a)
 end
 
@@ -547,6 +562,13 @@ end
     end
 end
 
+@testset "Offset with missing (#48862)" begin
+    v = [-1.0, missing, 1.0, 0.0, missing, -0.5, 0.5, 1.0, -0.5, missing, 0.5, -0.8, 1.5, NaN]
+    vo = OffsetArray(v, (firstindex(v):lastindex(v)).+100)
+    @test issorted(sort!(vo))
+    @test issorted(v)
+end
+
 @testset "searchsortedfirst/last with generalized indexing" begin
     o = OffsetVector(1:3, -2)
     @test searchsortedfirst(o, 4) == lastindex(o) + 1
@@ -575,7 +597,7 @@ end
 
     @testset "fallback" begin
         @test adaptive_sort_test(rand(1:typemax(Int32), len), by=x->x^2)# fallback
-        @test adaptive_sort_test(rand(Int, len), by=x->0, trusted=QuickSort)
+        @test adaptive_sort_test(rand(Int, len), by=x->0, trusted=Base.Sort.ScratchQuickSort())
     end
 
     @test adaptive_sort_test(rand(Int, 20)) # InsertionSort
@@ -619,9 +641,9 @@ end
 @testset "uint mappings" begin
 
     #Construct value lists
-    floats = [T[-π, -1.0, -1/π, 1/π, 1.0, π, -0.0, 0.0, Inf, -Inf, NaN, -NaN,
-                prevfloat(T(0)), nextfloat(T(0)), prevfloat(T(Inf)), nextfloat(T(-Inf))]
-        for T in [Float16, Float32, Float64]]
+    floats = [reinterpret(U, vcat(T[-π, -1.0, -1/π, 1/π, 1.0, π, -0.0, 0.0, Inf, -Inf, NaN, -NaN,
+                prevfloat(T(0)), nextfloat(T(0)), prevfloat(T(Inf)), nextfloat(T(-Inf))], randnans(4)))
+        for (U, T) in [(UInt16, Float16), (UInt32, Float32), (UInt64, Float64)]]
 
     ints = [T[17, -T(17), 0, -one(T), 1, typemax(T), typemin(T), typemax(T)-1, typemin(T)+1]
         for T in Base.BitInteger_types]
@@ -637,22 +659,18 @@ end
     UIntN(::Val{8}) = UInt64
     UIntN(::Val{16}) = UInt128
     map(vals) do x
+        x isa Base.ReinterpretArray && return
         T = eltype(x)
         U = UIntN(Val(sizeof(T)))
         append!(x, rand(T, 4))
         append!(x, reinterpret.(T, rand(U, 4)))
-        if T <: AbstractFloat
-            mask = reinterpret(U, T(NaN))
-            append!(x, reinterpret.(T, mask .| rand(U, 4)))
-        end
     end
 
     for x in vals
         T = eltype(x)
         U = UIntN(Val(sizeof(T)))
-        for order in [Forward, Reverse, Base.Sort.Float.Left(), Base.Sort.Float.Right(), By(Forward, identity)]
-            if order isa Base.Order.By || T === Float16 ||
-                ((T <: AbstractFloat) == (order isa DirectOrdering))
+        for order in [Forward, Reverse, By(Forward, identity)]
+            if order isa Base.Order.By
                 @test Base.Sort.UIntMappable(T, order) === nothing
                 continue
             end
@@ -669,30 +687,386 @@ end
 
             for a in x
                 for b in x
-                    if order === Base.Sort.Float.Left() || order === Base.Sort.Float.Right()
-                        # Left and Right orderings guarantee homogeneous sign and no NaNs
-                        (isnan(a) || isnan(b) || signbit(a) != signbit(b)) && continue
-                    end
                     @test Base.Order.lt(order, a, b) === Base.Order.lt(Forward, Base.Sort.uint_map(a, order), Base.Sort.uint_map(b, order))
                 end
             end
         end
     end
+
+    @test Base.Sort.UIntMappable(Union{Int, UInt}, Base.Forward) === nothing # issue #45280
 end
 
-@testset "sort(x; workspace=w) " begin
+@testset "invalid lt (#11429)" begin
+    # lt must be a total linear order (e.g. < not <=) so this usage is
+    # not allowed. Consequently, none of the behavior tested in this
+    # testset is guaranteed to work in future minor versions of Julia.
+
+    safe_algs = [InsertionSort, MergeSort, Base.Sort.ScratchQuickSort(), Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
+
+    n = 1000
+    v = rand(1:5, n);
+    s = sort(v);
+
+    # Nevertheless, it still works...
+    for alg in safe_algs
+        @test sort(v, alg=alg, lt = <=) == s
+    end
+    @test partialsort(v, 172, lt = <=) == s[172]
+    @test partialsort(v, 315:415, lt = <=) == s[315:415]
+
+    # ...and it is consistently reverse stable. All these algorithms swap v[i] and v[j]
+    # where i < j if and only if lt(o, v[j], v[i]). This invariant holds even for
+    # this invalid lt order.
+    perm = reverse(sortperm(v, rev=true))
+    for alg in safe_algs
+        @test sort(1:n, alg=alg, lt = (i,j) -> v[i]<=v[j]) == perm
+    end
+    # Broken by the introduction of BracketedSort in #52006 which is unstable
+    @test_broken partialsort(1:n, 172, lt = (i,j) -> v[i]<=v[j]) == perm[172]
+    @test_broken partialsort(1:n, 315:415, lt = (i,j) -> v[i]<=v[j]) == perm[315:415]
+
+    # lt can be very poorly behaved and sort will still permute its input in some way.
+    for alg in safe_algs
+        @test sort!(sort(v, alg=alg, lt = (x,y) -> rand([false, true]))) == s
+    end
+    @test partialsort(v, 172, lt = (x,y) -> rand([false, true])) ∈ 1:5
+    @test all(partialsort(v, 315:415, lt = (x,y) -> rand([false, true])) .∈ (1:5,))
+
+    # issue #32675
+    k = [38, 18, 38, 38, 3, 37, 26, 26, 6, 29, 38, 36, 38, 1, 38, 36, 38, 38, 38, 36, 36,
+        36, 28, 34, 35, 38, 25, 20, 38, 1, 1, 5, 38, 38, 3, 34, 16, 38, 4, 10, 35, 37, 38,
+        38, 2, 38, 25, 35, 38, 1, 35, 36, 20, 33, 36, 18, 38, 1, 24, 4, 38, 18, 12, 38, 34,
+        35, 36, 38, 26, 31, 36, 38, 38, 30, 36, 35, 35, 7, 22, 35, 38, 35, 30, 21, 37]
+    idx = sortperm(k; lt=!isless)
+    @test issorted(k[idx], rev=true)
+end
+
+@testset "sort(x; scratch)" begin
     for n in [1,10,100,1000]
         v = rand(n)
-        w = [0.0]
-        @test sort(v) == sort(v; workspace=w)
-        @test sort!(copy(v)) == sort!(copy(v); workspace=w)
-        @test sortperm(v) == sortperm(v; workspace=[4])
-        @test sortperm!(Vector{Int}(undef, n), v) == sortperm!(Vector{Int}(undef, n), v; workspace=[4])
+        scratch = [0.0]
+        @test sort(v) == sort(v; scratch)
+        @test sort!(copy(v)) == sort!(copy(v); scratch)
+        @test sortperm(v) == sortperm(v; scratch=[4])
+        @test sortperm!(Vector{Int}(undef, n), v) == sortperm!(Vector{Int}(undef, n), v; scratch=[4])
 
         n > 100 && continue
         M = rand(n, n)
-        @test sort(M; dims=2) == sort(M; dims=2, workspace=w)
-        @test sort!(copy(M); dims=1) == sort!(copy(M); dims=1, workspace=w)
+        @test sort(M; dims=2) == sort(M; dims=2, scratch)
+        @test sort!(copy(M); dims=1) == sort!(copy(M); dims=1, scratch)
+    end
+end
+
+@testset "sorting preserves identity" begin
+    a = BigInt.([2, 2, 2, 1, 1, 1]) # issue #39620
+    sort!(a)
+    @test length(IdDict(a .=> a)) == 6
+
+    for v in [BigInt.(rand(1:5, 40)), BigInt.(rand(Int, 70)), BigFloat.(rand(52))]
+        hashes = Set(hash.(v))
+        ids = Set(objectid.(v))
+        sort!(v)
+        @test hashes == Set(hash.(v))
+        @test ids == Set(objectid.(v))
+    end
+end
+
+@testset "Unions with missing" begin
+    @test issorted(sort(shuffle!(vcat(fill(missing, 10), rand(Int, 100)))))
+    @test issorted(sort(vcat(rand(Int8, 600), [missing])))
+
+    # Because we define defalg(::AbstractArray{Missing})
+    @test all(fill(missing, 10) .=== sort(fill(missing, 10)))
+
+    # Unit tests for WithoutMissingVector
+    a = [1,7,missing,4]
+    @test_throws ArgumentError Base.Sort.WithoutMissingVector(a)
+    @test eltype(a[[1,2,4]]) == eltype(a)
+    @test eltype(Base.Sort.WithoutMissingVector(a[[1,2,4]])) == Int
+    am = Base.Sort.WithoutMissingVector(a, unsafe=true)
+    @test am[2] == 7
+    @test eltype(am) == Int
+end
+
+@testset "Specific algorithms" begin
+    let
+        requires_uint_mappable = Union{Base.Sort.RadixSort, Base.Sort.ConsiderRadixSort,
+            Base.Sort.CountingSort, Base.Sort.ConsiderCountingSort,
+            typeof(Base.Sort.DEFAULT_STABLE.next.next.next.big.next.yes),
+            typeof(Base.Sort.DEFAULT_STABLE.next.next.next.big.next.yes.big),
+            typeof(Base.Sort.DEFAULT_STABLE.next.next.next.big.next.yes.big.next)}
+
+        function test_alg(kw, alg, float=true)
+            for order in [Base.Forward, Base.Reverse, Base.By(x -> x^2)]
+                order isa Base.By && alg isa requires_uint_mappable && continue
+                for n in [1,7,179,1312]
+
+                    n == 1 && alg isa Base.Sort.RadixSort && continue
+
+                    x = rand(1:n+1, n)
+                    y = sort(x; order)
+                    @test Base.Sort._sort!(x, alg, order, (;kw(y)...)) !== x
+                    @test all(y .=== x)
+
+                    alg isa requires_uint_mappable && continue
+
+                    x = randn(n)
+                    y = sort(x; order)
+                    @test Base.Sort._sort!(x, alg, order, (;kw(y)...)) !== x
+                    @test all(y .=== x)
+                end
+            end
+        end
+        test_alg(alg) = test_alg(x -> (), alg)
+
+        function test_alg_rec(alg, extrema=false)
+            if extrema
+                test_alg(alg) do y
+                    (;mn=first(y),mx=last(y))
+                end
+            else
+                test_alg(alg)
+            end
+            extrema |= alg isa Base.Sort.ComputeExtrema
+            for name in fieldnames(typeof(alg))
+                a = getfield(alg, name)
+                a isa Base.Sort.Algorithm && test_alg_rec(a, extrema)
+            end
+        end
+
+        test_alg_rec(Base.DEFAULT_STABLE)
+    end
+end
+
+@testset "show(::Algorithm)" begin
+    @test eval(Meta.parse(string(Base.DEFAULT_STABLE))) === Base.DEFAULT_STABLE
+    lines = split(string(Base.DEFAULT_STABLE), '\n')
+    @test 10 < maximum(length, lines) < 100
+    @test 1 < length(lines) < 30
+end
+
+@testset "Extensibility" begin
+    # Defining new algorithms & backwards compatibility with packages that use sorting internals
+
+    struct MyFirstAlg <: Base.Sort.Algorithm end
+
+    @test_throws ArgumentError sort([1,2,3], alg=MyFirstAlg()) # not a stack overflow error
+
+    v = shuffle(vcat(fill(missing, 10), rand(Int, 100)))
+
+    # The pre 1.9 dispatch method
+    function Base.sort!(v::AbstractVector{Int}, lo::Integer, hi::Integer, ::MyFirstAlg, o::Base.Order.Ordering)
+        v[lo:hi] .= 7
+    end
+    @test sort([1,2,3], alg=MyFirstAlg()) == [7,7,7]
+    @test all(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg())) .=== vcat(fill(7, 100), fill(missing, 10)))
+
+    # Using the old hook with old entry-point
+    @test sort!([3,1,2], MyFirstAlg(), Base.Forward) == [7,7,7]
+    @test sort!([3,1,2], 1, 3, MyFirstAlg(), Base.Forward) == [7,7,7]
+
+    # Use the pre 1.9 entry-point into the internals
+    function Base.sort!(v::AbstractVector{Int}, lo::Integer, hi::Integer, ::MyFirstAlg, o::Base.Order.Ordering)
+        sort!(v, lo, hi, Base.DEFAULT_STABLE, o)
+    end
+    @test sort([3,1,2], alg=MyFirstAlg()) == [1,2,3]
+    @test issorted(sort(v, alg=Base.Sort.InitialOptimizations(MyFirstAlg())))
+
+    # Another pre 1.9 entry-point into the internals
+    @test issorted(sort!(rand(100), InsertionSort, Base.Order.Forward))
+
+    struct MySecondAlg <: Base.Sort.Algorithm end
+    # A new dispatch method
+    function Base.Sort._sort!(v::AbstractVector, ::MySecondAlg, o::Base.Order.Ordering, kw)
+        Base.Sort.@getkw lo hi
+        v[lo:hi] .= 9
+    end
+    @test sort([1,2,3], alg=MySecondAlg()) == [9,9,9]
+    @test all(sort(v, alg=Base.Sort.InitialOptimizations(MySecondAlg())) .=== vcat(fill(9, 100), fill(missing, 10)))
+end
+
+@testset "sort!(v, lo, hi, alg, order)" begin
+    v = Vector{Float64}(undef, 4000)
+    for alg in [MergeSort, QuickSort, InsertionSort, Base.DEFAULT_STABLE, Base.DEFAULT_UNSTABLE]
+        rand!(v)
+        sort!(v, 1, 2000, alg, Base.Forward)
+        @test issorted(v[1:2000])
+        @test !issorted(v)
+
+        sort!(v, 2001, 4000, alg, Base.Forward)
+        @test issorted(v[1:2000])
+        @test issorted(v[2001:4000])
+        @test !issorted(v)
+
+        sort!(v, 1001, 3000, alg, Base.Forward)
+        @test issorted(v[1:1000])
+        @test issorted(v[1001:3000])
+        @test issorted(v[3001:4000])
+        @test !issorted(v[1:2000])
+        @test !issorted(v[2001:4000])
+        @test !issorted(v)
+    end
+end
+
+@testset "IEEEFloatOptimization with -0.0" begin
+    x = vcat(round.(100 .* randn(1000)) ./ 100) # Also test lots of duplicates
+    x[rand(1:1000, 5)] .= 0.0
+    x[rand(1:1000, 5)] .= -0.0  # To be sure that -0.0 is present
+    @test issorted(sort!(x))
+end
+
+@testset "Count sort near the edge of its range" begin
+    @test issorted(sort(rand(typemin(Int):typemin(Int)+100, 1000)))
+    @test issorted(sort(rand(typemax(Int)-100:typemax(Int), 1000)))
+    @test issorted(sort(rand(Int8, 600)))
+end
+
+@testset "ScratchQuickSort API" begin
+    bsqs = Base.Sort.ScratchQuickSort
+    @test bsqs(1, 2, MergeSort)             === bsqs(1, 2, MergeSort)
+    @test bsqs(missing, 2, MergeSort)       === bsqs(missing, 2, MergeSort)
+    @test bsqs(1, missing, MergeSort)       === bsqs(1, missing, MergeSort)
+    @test bsqs(missing, missing, MergeSort) === bsqs(missing, missing, MergeSort)
+    @test bsqs(1, MergeSort)                === bsqs(1, 1, MergeSort)
+    @test bsqs(missing, MergeSort)          === bsqs(missing, missing, MergeSort)
+    @test bsqs(MergeSort)                   === bsqs(missing, missing, MergeSort)
+
+    @test bsqs(1, 2)                        === bsqs(1, 2, InsertionSort)
+    @test bsqs(missing, 2)                  === bsqs(missing, 2, InsertionSort)
+    @test bsqs(1, missing)                  === bsqs(1, missing, InsertionSort)
+    @test bsqs(missing, missing)            === bsqs(missing, missing, InsertionSort)
+    @test bsqs(1)                           === bsqs(1, 1, InsertionSort)
+    @test bsqs(missing)                     === bsqs(missing, missing, InsertionSort)
+    @test bsqs()                            === bsqs(missing, missing, InsertionSort)
+end
+
+@testset "ScratchQuickSort allocations on non-concrete eltype" begin
+    v = Vector{Union{Nothing, Bool}}(rand(Bool, 10000))
+    @test 10 > @allocations sort(v)
+    @test 10 > @allocations sort(v; alg=Base.Sort.ScratchQuickSort())
+    # it would be nice if these numbers were lower (1 or 2), but these
+    # test that we don't have O(n) allocations due to type instability
+end
+
+function test_allocs()
+    v = rand(10)
+    i = randperm(length(v))
+    @test 2 >= @allocations sort(v)
+    @test 0 == @allocations sortperm!(i, v)
+    @test 0 == @allocations sort!(i)
+    @test 0 == @allocations sortperm!(i, v, rev=true)
+    @test 2 >= @allocations sortperm(v, rev=true)
+    @test 2 >= @allocations sortperm(v, rev=false)
+    @test 0 == @allocations sortperm!(i, v, order=Base.Reverse)
+    @test 2 >= @allocations sortperm(v)
+    @test 2 >= @allocations sortperm(i, by=sqrt)
+    @test 0 == @allocations sort!(v, lt=(a, b) -> hash(a) < hash(b))
+    sort!(Int[], rev=false) # compile
+    @test 0 == @allocations sort!(i, rev=false)
+    rand!(i)
+    @test 0 == @allocations sort!(i, order=Base.Reverse)
+end
+@testset "Small calls do not unnecessarily allocate" begin
+    test_allocs()
+end
+
+@testset "Presorted and reverse-presorted" begin
+    for len in [7, 92, 412, 780]
+        x = sort(randn(len))
+        for _ in 1:2
+            @test issorted(sort(x))
+            @test issorted(sort(x), by=x -> x+7)
+            reverse!(x)
+        end
+    end
+end
+
+struct MyArray49392{T, N} <: AbstractArray{T, N}
+    data::Array{T, N}
+end
+Base.size(A::MyArray49392) = size(A.data)
+Base.getindex(A::MyArray49392, i...) = getindex(A.data, i...)
+Base.setindex!(A::MyArray49392, v, i...) = setindex!(A.data, v, i...)
+Base.similar(A::MyArray49392, ::Type{T}, dims::Dims{N}) where {T, N} = MyArray49392(similar(A.data, T, dims))
+
+@testset "Custom matrices (#49392)" begin
+    x = rand(10, 10)
+    y = MyArray49392(copy(x))
+    @test all(sort!(y, dims=2) .== sort!(x,dims=2))
+end
+
+@testset "MissingOptimization fastpath for Perm ordering when lo:hi ≠ eachindex(v)" begin
+    v = [rand() < .5 ? missing : rand() for _ in 1:100]
+    ix = collect(1:100)
+    sort!(ix, 1, 10, Base.Sort.DEFAULT_STABLE, Base.Order.Perm(Base.Order.Forward, v))
+    @test issorted(v[ix[1:10]])
+end
+
+struct NonScalarIndexingOfWithoutMissingVectorAlg <: Base.Sort.Algorithm end
+function Base.Sort._sort!(v::AbstractVector, ::NonScalarIndexingOfWithoutMissingVectorAlg, o::Base.Order.Ordering, kw)
+    Base.Sort.@getkw lo hi
+    first_half = v[lo:lo+(hi-lo)÷2]
+    second_half = v[lo+(hi-lo)÷2+1:hi]
+    whole = v[lo:hi]
+    all(vcat(first_half, second_half) .=== whole) || error()
+    out = Base.Sort._sort!(whole, Base.Sort.DEFAULT_STABLE, o, (;kw..., lo=1, hi=length(whole)))
+    v[lo:hi] .= whole
+    out
+end
+
+@testset "Non-scaler indexing of WithoutMissingVector" begin
+    @testset "Unit test" begin
+        wmv = Base.Sort.WithoutMissingVector(Union{Missing, Int}[1, 7, 2, 9])
+        @test wmv[[1, 3]] == [1, 2]
+        @test wmv[1:3] == [1, 7, 2]
+    end
+    @testset "End to end" begin
+        alg = Base.Sort.InitialOptimizations(NonScalarIndexingOfWithoutMissingVectorAlg())
+        @test issorted(sort(rand(100); alg))
+        @test issorted(sort([rand() < .5 ? missing : randstring() for _ in 1:100]; alg))
+    end
+end
+
+struct DispatchLoopTestAlg <: Base.Sort.Algorithm end
+function Base.sort!(v::AbstractVector, lo::Integer, hi::Integer, ::DispatchLoopTestAlg, order::Base.Order.Ordering)
+    sort!(view(v, lo:hi); order)
+end
+@testset "Support dispatch from the old style to the new style and back" begin
+    @test issorted(sort!(rand(100), Base.Sort.InitialOptimizations(DispatchLoopTestAlg()), Base.Order.Forward))
+end
+
+@testset "partialsort tests added for BracketedSort #52006" begin
+    x = rand(Int, 1000)
+    @test partialsort(x, 1) == minimum(x)
+    @test partialsort(x, 1000) == maximum(x)
+    sx = sort(x)
+    for i in [1, 2, 4, 10, 11, 425, 500, 845, 991, 997, 999, 1000]
+        @test partialsort(x, i) == sx[i]
+    end
+    for i in [1:1, 1:2, 1:5, 1:8, 1:9, 1:11, 1:108, 135:812, 220:586, 363:368, 450:574, 458:597, 469:638, 487:488, 500:501, 584:594, 1000:1000]
+        @test partialsort(x, i) == sx[i]
+    end
+
+    # Semi-pathological input
+    seed = hash(1000, Int === Int64 ? 0x85eb830e0216012d : 0xae6c4e15)
+    seed = hash(1, seed)
+    for i in 1:100
+        j = mod(hash(i, seed), i:1000)
+        x[j] = typemax(Int)
+    end
+    @test partialsort(x, 500) == sort(x)[500]
+
+    # Fully pathological input
+    # it would be too much trouble to actually construct a valid pathological input, so we
+    # construct an invalid pathological input.
+    # This test is kind of sketchy because it passes invalid inputs to the function
+    for i in [1:6, 1:483, 1:957, 77:86, 118:478, 223:227, 231:970, 317:958, 500:501, 500:501, 500:501, 614:620, 632:635, 658:665, 933:940, 937:942, 997:1000, 999:1000]
+        x = rand(1:5, 1000)
+        @test partialsort(x, i, lt=(<=)) == sort(x)[i]
+    end
+    for i in [1, 7, 8, 490, 495, 852, 993, 996, 1000]
+        x = rand(1:5, 1000)
+        @test partialsort(x, i, lt=(<=)) == sort(x)[i]
     end
 end
 
@@ -839,7 +1213,7 @@ end
 
         @testset "issue #34408" begin
             r = 1f8-10:1f8
-            # collect(r) = Float32[9.999999e7, 9.999999e7, 9.999999e7, 9.999999e7, 1.0e8, 1.0e8, 1.0e8, 1.0e8, 1.0e8]
+            @test collect(r) == Float32[9.999999e7, 9.999999e7, 9.999999e7, 9.999999e7, 1.0e8, 1.0e8, 1.0e8, 1.0e8, 1.0e8]
             for i in r
                 @test_broken searchsorted(collect(r), i) == searchsorted(r, i)
             end
@@ -856,6 +1230,16 @@ end
             @test searchsorted(v, 0.1, rev=true) === 4:3
         end
     end
+
+    @testset "ranges issue #44102, PR #50365" begin
+        # range sorting test for different Ordering parameter combinations
+        @test searchsorted(-1000.0:1:1000, -0.0) === 1001:1000
+        @test searchsorted(-1000.0:1:1000, -0.0; lt=<) === 1001:1001
+        @test searchsorted(-1000.0:1:1000, -0.0; lt=<, by=x->x) === 1001:1001
+        @test searchsorted(reverse(-1000.0:1:1000), -0.0; lt=<, by=-) === 1001:1001
+        @test searchsorted(reverse(-1000.0:1:1000), -0.0, rev=true) === 1002:1001
+        @test searchsorted(reverse(-1000.0:1:1000), -0.0; lt=<, rev=true) === 1001:1001
+    end
 end
 # The "searchsorted" testset is at the end of the file because it is slow.
 
diff --git a/test/spawn.jl b/test/spawn.jl
index a8a2af40643ff..4ae4b3368bef6 100644
--- a/test/spawn.jl
+++ b/test/spawn.jl
@@ -5,7 +5,7 @@
 ###################################
 
 using Random, Sockets
-using Downloads: download
+using Downloads: Downloads, download
 
 valgrind_off = ccall(:jl_running_on_valgrind, Cint, ()) == 0
 
@@ -20,8 +20,33 @@ shcmd = `sh`
 sleepcmd = `sleep`
 lscmd = `ls`
 havebb = false
+
+function _tryonce_download_from_cache(desired_url::AbstractString)
+    cache_url = "https://cache.julialang.org/$(desired_url)"
+    cache_output_filename = joinpath(mktempdir(), "myfile")
+    cache_response = Downloads.request(
+        cache_url;
+        output = cache_output_filename,
+        throw = false,
+        timeout = 60,
+    )
+    if cache_response isa Downloads.Response
+        if Downloads.status_ok(cache_response.proto, cache_response.status)
+            return cache_output_filename
+        end
+    end
+    return Downloads.download(desired_url; timeout = 60)
+end
+
+function download_from_cache(desired_url::AbstractString)
+    f = () -> _tryonce_download_from_cache(desired_url)
+    delays = Float64[30, 30, 60, 60, 60]
+    g = retry(f; delays)
+    return g()
+end
+
 if Sys.iswindows()
-    busybox = download("https://cache.julialang.org/https://frippery.org/files/busybox/busybox.exe", joinpath(tempdir(), "busybox.exe"))
+    busybox = download_from_cache("https://frippery.org/files/busybox/busybox.exe")
     havebb = try # use busybox-w32 on windows, if available
         success(`$busybox`)
         true
@@ -635,9 +660,21 @@ let p = run(`$sleepcmd 100`, wait=false)
     kill(p)
 end
 
-# Second argument of shell_parse
+# Second return of shell_parse
 let s = "   \$abc   "
-    @test s[Base.shell_parse(s)[2]] == "abc"
+    @test Base.shell_parse(s)[2] === findfirst('a', s)
+    s = "abc def"
+    @test Base.shell_parse(s)[2] === findfirst('d', s)
+    s = "abc 'de'f\"\"g"
+    @test Base.shell_parse(s)[2] === findfirst('\'', s)
+    s = "abc \$x'de'f\"\"g"
+    @test Base.shell_parse(s)[2] === findfirst('\'', s)
+    s = "abc def\$x'g'"
+    @test Base.shell_parse(s)[2] === findfirst('\'', s)
+    s = "abc def\$x "
+    @test Base.shell_parse(s)[2] === findfirst('x', s)
+    s = "abc \$(d)ef\$(x "
+    @test Base.shell_parse(s)[2] === findfirst('x', s) - 1
 end
 
 # Logging macros should not output to finalized streams (#26687)
@@ -770,8 +807,9 @@ let text = "input-test-text"
     out = Base.BufferStream()
     proc = run(catcmd, IOBuffer(text), out, wait=false)
     @test proc.out === out
-    @test read(out, String) == text
     @test success(proc)
+    closewrite(out)
+    @test read(out, String) == text
 
     out = PipeBuffer()
     proc = run(catcmd, IOBuffer(SubString(text)), out)
@@ -978,5 +1016,19 @@ end
     args = ["ab ^` c", " \" ", "\"", ascii95, ascii95,
             "\"\\\"\\", "", "|", "&&", ";"];
     @test Base.shell_escape_wincmd(Base.escape_microsoft_c_args(args...)) == "\"ab ^` c\" \" \\\" \" \"\\\"\" \" !\\\"#\$%^&'^(^)*+,-./0123456789:;^<=^>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^^_`abcdefghijklmnopqrstuvwxyz{^|}~\" \" ^!\\\"#\$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\" \"\\\"\\\\\\\"\\\\\" \"\" ^| ^&^& ;"
+end
 
+# effects for Cmd construction
+for f in (() -> `a b c`, () -> `a a$("bb")a $("c")`)
+    effects = Base.infer_effects(f)
+    @test Core.Compiler.is_effect_free(effects)
+    @test Core.Compiler.is_terminates(effects)
+    @test Core.Compiler.is_noub(effects)
+    @test !Core.Compiler.is_consistent(effects)
+end
+let effects = Base.infer_effects(x -> `a $x`, (Any,))
+    @test !Core.Compiler.is_effect_free(effects)
+    @test !Core.Compiler.is_terminates(effects)
+    @test !Core.Compiler.is_noub(effects)
+    @test !Core.Compiler.is_consistent(effects)
 end
diff --git a/test/specificity.jl b/test/specificity.jl
index 1a5c117ce5d9d..9b605444bad42 100644
--- a/test/specificity.jl
+++ b/test/specificity.jl
@@ -214,7 +214,7 @@ f27361(::M) where M <: Tuple{3} = nothing
 @test length(methods(f27361)) == 2
 
 # specificity of TypeofBottom
-@test args_morespecific(Tuple{Core.TypeofBottom}, Tuple{DataType})
+@test !args_morespecific(Tuple{DataType}, Tuple{Core.TypeofBottom})
 @test args_morespecific(Tuple{Core.TypeofBottom}, Tuple{Type{<:Tuple}})
 
 @test  args_morespecific(Tuple{Type{Any}, Type}, Tuple{Type{T}, Type{T}} where T)
@@ -311,3 +311,8 @@ let A = Tuple{Type{SubString{S}},AbstractString} where S<:AbstractString,
     @test  args_morespecific(B, C)
     @test  args_morespecific(A, C)
 end
+
+@test args_morespecific(Tuple{Type{Union{}}, Any}, Tuple{Any, Type{Union{}}})
+@test args_morespecific(Tuple{typeof(Union{}), Any}, Tuple{Any, Type{Union{}}})
+@test args_morespecific(Tuple{Type{Union{}}, Type{Union{}}, Any}, Tuple{Type{Union{}}, Any, Type{Union{}}})
+@test args_morespecific(Tuple{Type{Union{}}, Type{Union{}}, Any, Type{Union{}}}, Tuple{Type{Union{}}, Any, Type{Union{}}, Type{Union{}}})
diff --git a/test/stack_overflow.jl b/test/stack_overflow.jl
index 9f4bae6f3f5b3..297186c8a4d3a 100644
--- a/test/stack_overflow.jl
+++ b/test/stack_overflow.jl
@@ -17,3 +17,20 @@ let exename = Base.julia_cmd()
     @show readchomperrors(`$exename -e "f() = f(); f()"`)
     @show readchomperrors(`$exename -e "f() = f(); fetch(@async f())"`)
 end
+
+# Issue #49507: stackoverflow in type inference caused by close(::Channel, ::Exception)
+@testset "close(::Channel, ::StackOverflowError)" begin
+    ch = let result = Channel()
+        foo() = try
+            foo()
+        catch e;
+            close(result, e)
+        end
+
+        foo()  # This shouldn't fail with an internal stackoverflow error in inference.
+
+        result
+    end
+
+    @test (try take!(ch) catch e; e; end) isa StackOverflowError
+end
diff --git a/test/stacktraces.jl b/test/stacktraces.jl
index cbb07a60e456b..be035e31833d3 100644
--- a/test/stacktraces.jl
+++ b/test/stacktraces.jl
@@ -91,8 +91,16 @@ trace = (try; f(3); catch; stacktrace(catch_backtrace()); end)[1:3]
 can_inline = Bool(Base.JLOptions().can_inline)
 for (frame, func, inlined) in zip(trace, [g,h,f], (can_inline, can_inline, false))
     @test frame.func === typeof(func).name.mt.name
-    #@test get(frame.linfo).def === which(func, (Any,)).func
-    #@test get(frame.linfo).specTypes === Tuple{typeof(func), Int}
+    # broken until #50082 can be addressed
+    if inlined
+        @test frame.linfo.def.module === which(func, (Any,)).module broken=true
+        @test frame.linfo.def === which(func, (Any,)) broken=true
+        @test frame.linfo.specTypes === Tuple{typeof(func), Int} broken=true
+    else
+        @test frame.linfo.def.module === which(func, (Any,)).module
+        @test frame.linfo.def === which(func, (Any,))
+        @test frame.linfo.specTypes === Tuple{typeof(func), Int}
+    end
     # line
     @test frame.file === Symbol(@__FILE__)
     @test !frame.from_c
@@ -104,7 +112,7 @@ let src = Meta.lower(Main, quote let x = 1 end end).args[1]::Core.CodeInfo,
     li = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ()),
     sf
 
-    li.uninferred = src
+    setfield!(li, :uninferred, src, :monotonic)
     li.specTypes = Tuple{}
     li.def = @__MODULE__
     sf = StackFrame(:a, :b, 3, li, false, false, 0)
@@ -158,6 +166,22 @@ end
 @test bt[1].line == topline+4
 end
 
+# Accidental incorrect phi block computation in interpreter
+global global_false_bool = false
+let bt, topline = @__LINE__
+    try
+        let
+            global read_write_global_bt_test, global_false_bool
+            if global_false_bool
+            end
+            (read_write_global_bt_test, (read_write_global_bt_test=2;))
+        end
+    catch
+        bt = stacktrace(catch_backtrace())
+    end
+    @test bt[1].line == topline+6
+end
+
 # issue #28990
 let bt
 try
@@ -191,3 +215,56 @@ let bt
     end
     @test any(s->startswith(string(s), "f33065(x::Float32, y::Float32; b::Float64, a::String, c::"), bt)
 end
+
+struct F49231{a,b,c,d,e,f,g} end
+(::F49231)(a,b,c) = error("oops")
+
+@testset "type_depth_limit" begin
+    tdl = Base.type_depth_limit
+
+    str = repr(typeof(view([1, 2, 3], 1:2)))
+    @test tdl(str, 0, maxdepth = 1) == "SubArray{…}"
+    @test tdl(str, 0, maxdepth = 2) == "SubArray{$Int, 1, Vector{…}, Tuple{…}, true}"
+    @test tdl(str, 0, maxdepth = 3) == "SubArray{$Int, 1, Vector{$Int}, Tuple{UnitRange{…}}, true}"
+    @test tdl(str, 0, maxdepth = 4) == "SubArray{$Int, 1, Vector{$Int}, Tuple{UnitRange{$Int}}, true}"
+    @test tdl(str, 3) == "SubArray{…}"
+    @test tdl(str, 44) == "SubArray{…}"
+    @test tdl(str, 45) == "SubArray{$Int, 1, Vector{…}, Tuple{…}, true}"
+    @test tdl(str, 59) == "SubArray{$Int, 1, Vector{…}, Tuple{…}, true}"
+    @test tdl(str, 60) == "SubArray{$Int, 1, Vector{$Int}, Tuple{UnitRange{…}}, true}"
+    @test tdl(str, 100) == "SubArray{$Int, 1, Vector{$Int}, Tuple{UnitRange{$Int}}, true}"
+
+    str = repr(Vector{V} where V<:AbstractVector{T} where T<:Real)
+    @test tdl(str, 0, maxdepth = 1) == "Vector{…} where {…}"
+    @test tdl(str, 0, maxdepth = 2) == "Vector{V} where {T<:Real, V<:AbstractVector{…}}"
+    @test tdl(str, 0, maxdepth = 3) == "Vector{V} where {T<:Real, V<:AbstractVector{T}}"
+    @test tdl(str, 20) == "Vector{…} where {…}"
+    @test tdl(str, 46) == "Vector{…} where {…}"
+    @test tdl(str, 47) == "Vector{V} where {T<:Real, V<:AbstractVector{T}}"
+
+    str = "F49231{Vector,Val{('}','}')},Vector{Vector{Vector{Vector}}},Tuple{Int,Int,Int,Int,Int,Int,Int},Int,Int,Int}"
+    @test tdl(str, 105) == "F49231{Vector,Val{('}','}')},Vector{Vector{Vector{…}}},Tuple{Int,Int,Int,Int,Int,Int,Int},Int,Int,Int}"
+    @test tdl(str, 85) == "F49231{Vector,Val{…},Vector{…},Tuple{…},Int,Int,Int}"
+
+    # Stacktrace
+    a = UInt8(81):UInt8(160)
+    b = view(a, 1:64)
+    c = reshape(b, (8, 8))
+    d = reinterpret(reshape, Float64, c)
+    sqrteach(a) = [sqrt(x) for x in a]
+    st = try
+        sqrteach(d)
+    catch e
+        stacktrace(catch_backtrace())
+    end
+    str = sprint(Base.show_backtrace, st, context = (:limit=>true, :stacktrace_types_limited => Ref(false), :color=>true, :displaysize=>(50,105)))
+    @test contains(str, "[5] \e[0m\e[1mcollect_to!\e[22m\e[0m\e[1m(\e[22m\e[90mdest\e[39m::\e[0mVector\e[90m{…}\e[39m, \e[90mitr\e[39m::\e[0mBase.Generator\e[90m{…}\e[39m, \e[90moffs\e[39m::\e[0m$Int, \e[90mst\e[39m::\e[0mTuple\e[90m{…}\e[39m\e[0m\e[1m)\e[22m\n\e[90m")
+
+    st = try
+        F49231{Vector,Val{'}'},Vector{Vector{Vector{Vector}}},Tuple{Int,Int,Int,Int,Int,Int,Int},Int,Int,Int}()(1,2,3)
+    catch e
+        stacktrace(catch_backtrace())
+    end
+    str = sprint(Base.show_backtrace, st, context = (:limit=>true, :stacktrace_types_limited => Ref(false), :color=>true, :displaysize=>(50,132)))
+    @test contains(str, "[2] \e[0m\e[1m(::$F49231{Vector, Val{…}, Vector{…}, NTuple{…}, $Int, $Int, $Int})\e[22m\e[0m\e[1m(\e[22m\e[90ma\e[39m::\e[0m$Int, \e[90mb\e[39m::\e[0m$Int, \e[90mc\e[39m::\e[0m$Int\e[0m\e[1m)\e[22m\n\e[90m")
+end
diff --git a/test/staged.jl b/test/staged.jl
index b99ef46a2bc1e..76d02c1938e4d 100644
--- a/test/staged.jl
+++ b/test/staged.jl
@@ -182,7 +182,7 @@ let gf_err, tsk = @async nothing # create a Task for yield to try to run
     Expected = ErrorException("task switch not allowed from inside staged nor pure functions")
     @test_throws Expected gf_err()
     @test_throws Expected gf_err()
-    @test gf_err_ref[] == 4
+    @test gf_err_ref[] < 1000
 end
 
 gf_err_ref[] = 0
@@ -196,12 +196,11 @@ let gf_err2
         return nothing
     end
     Expected = ErrorException("code reflection cannot be used from generated functions")
+    @test_throws Expected gf_err2(code_lowered)
     @test_throws Expected gf_err2(code_typed)
     @test_throws Expected gf_err2(code_llvm)
     @test_throws Expected gf_err2(code_native)
-    @test gf_err_ref[] == 66
-    @test gf_err2(code_lowered) === nothing
-    @test gf_err_ref[] == 1077
+    @test gf_err_ref[] == 88
 end
 
 # issue #15043
@@ -246,12 +245,18 @@ f22440kernel(x::AbstractFloat) = x * x
 f22440kernel(::Type{T}) where {T} = one(T)
 f22440kernel(::Type{T}) where {T<:AbstractFloat} = zero(T)
 
-@generated function f22440(y)
-    match = Base._methods_by_ftype(Tuple{typeof(f22440kernel),y}, -1, typemax(UInt))[1]
+function f22440_gen(world::UInt, source, _, y)
+    match = only(Base._methods_by_ftype(Tuple{typeof(f22440kernel),y}, -1, world))
     code_info = Base.uncompressed_ir(match.method)
     Meta.partially_inline!(code_info.code, Any[], match.spec_types, Any[match.sparams...], 0, 0, :propagate)
+    # TODO: this is mandatory: code_info.min_world = max(code_info.min_world, min_world[])
+    # TODO: this is mandatory: code_info.max_world = min(code_info.max_world, max_world[])
     return code_info
 end
+@eval function f22440(y)
+    $(Expr(:meta, :generated, f22440_gen))
+    $(Expr(:meta, :generated_only))
+end
 
 @test f22440(Int) === f22440kernel(Int)
 @test f22440(Float64) === f22440kernel(Float64)
@@ -303,5 +308,70 @@ end
 @generated function f33243()
     :(global x33243 = 2)
 end
+@test_throws ErrorException f33243()
+global x33243
 @test f33243() === 2
 @test x33243 === 2
+
+# https://github.com/JuliaDebug/CassetteOverlay.jl/issues/12
+# generated function with varargs and unfortunately placed unused slot
+@generated function f_vararg_generated(args...)
+    local unusedslot4
+    local unusedslot5
+    local unusedslot6
+    :($args)
+end
+g_vararg_generated() = f_vararg_generated((;), (;), Base.inferencebarrier((;)))
+let tup = g_vararg_generated()
+    @test all(==(typeof((;))), tup)
+    # This is just to make sure that the test is actually testing what we want:
+    # the test only works if there is an unused that matches the position of
+    # the inferencebarrier argument above (N.B. the generator function itself
+    # shifts everything over by 1)
+    @test_broken only(code_lowered(only(methods(f_vararg_generated)).generator.gen)).slotflags[5] == 0x00
+end
+
+# respect a given linetable in code generation
+# https://github.com/JuliaLang/julia/pull/47750
+let world = Base.get_world_counter()
+    match = Base._which(Tuple{typeof(sin), Int}; world)
+    mi = Core.Compiler.specialize_method(match)
+    lwr = Core.Compiler.retrieve_code_info(mi, world)
+    @test all(lin->lin.method === :sin, lwr.linetable)
+    @eval function sin_generated(a)
+        $(Expr(:meta, :generated, Returns(lwr)))
+        $(Expr(:meta, :generated_only))
+    end
+    src = only(code_lowered(sin_generated, (Int,)))
+    @test all(lin->lin.method === :sin, src.linetable)
+    @test sin_generated(42) == sin(42)
+end
+
+# Allow passing unreachable insts in generated codeinfo
+let
+    dummy() = return
+    dummy_m = which(dummy, Tuple{})
+
+    src = Base.uncompressed_ir(dummy_m)
+    src.code = Any[
+        # block 1
+        Core.ReturnNode(nothing),
+        # block 2
+        Core.ReturnNode(),
+    ]
+    nstmts = length(src.code)
+    nslots = 1
+    src.ssavaluetypes = nstmts
+    src.codelocs = fill(Int32(1), nstmts)
+    src.ssaflags = fill(Int32(0), nstmts)
+    src.slotflags = fill(0, nslots)
+    src.slottypes = Any[Any]
+
+    @eval function f_unreachable()
+        $(Expr(:meta, :generated, Returns(src)))
+        $(Expr(:meta, :generated_only))
+    end
+
+    ir, _ = Base.code_ircode(f_unreachable, ()) |> only
+    @test length(ir.cfg.blocks) == 1
+end
diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl
new file mode 100644
index 0000000000000..be70578d149f5
--- /dev/null
+++ b/test/strings/annotated.jl
@@ -0,0 +1,108 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+@testset "AnnotatedString" begin
+    str = Base.AnnotatedString("some string")
+    @test str == Base.AnnotatedString(str.string, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[])
+    @test length(str) == 11
+    @test ncodeunits(str) == 11
+    @test eltype(str) == Base.AnnotatedChar{eltype(str.string)}
+    @test first(str) == Base.AnnotatedChar(first(str.string), Pair{Symbol, Any}[])
+    @test str[1:4] isa SubString{typeof(str)}
+    @test str[1:4] == Base.AnnotatedString("some")
+    @test "a" * str == Base.AnnotatedString("asome string")
+    @test str * "a" == Base.AnnotatedString("some stringa")
+    @test str * str == Base.AnnotatedString("some stringsome string")
+    Base.annotate!(str, 1:4, :thing => 0x01)
+    Base.annotate!(str, 6:11, :other => 0x02)
+    Base.annotate!(str, 1:11, :all => 0x03)
+    #  :thing :other
+    #  ┌┸─┐ ┌──┸─┐
+    # "some string"
+    #  └───┰─────┘
+    #     :all
+    @test str[3:4] == SubString(str, 3, 4)
+    @test Base.AnnotatedString(str[3:4]) ==
+        Base.AnnotatedString("me", [(1:2, :thing => 0x01), (1:2, :all => 0x03)])
+    @test Base.AnnotatedString(str[3:6]) ==
+        Base.AnnotatedString("me s", [(1:2, :thing => 0x01), (1:4, :all => 0x03), (4:4, :other => 0x02)])
+    @test str == Base.AnnotatedString("some string", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (6:11, :other => 0x02)])
+    @test str != Base.AnnotatedString("some string")
+    @test str != Base.AnnotatedString("some string", [(1:1, :thing => 0x01), (6:6, :other => 0x02), (11:11, :all => 0x03)])
+    @test str != Base.AnnotatedString("some string", [(1:4, :thing => 0x11), (1:11, :all => 0x13), (6:11, :other => 0x12)])
+    @test str != Base.AnnotatedString("some thingg", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (6:11, :other => 0x02)])
+    @test Base.AnnotatedString([Base.AnnotatedChar('a', [:a => 1]), Base.AnnotatedChar('b', [:b => 2])]) ==
+        Base.AnnotatedString("ab", [(1:1, :a => 1), (2:2, :b => 2)])
+    let allstrings =
+        ['a', Base.AnnotatedChar('a'), Base.AnnotatedChar('a', [:aaa => 0x04]),
+        "a string", Base.AnnotatedString("a string"),
+        Base.AnnotatedString("a string", [(1:2, :hmm => '%')])]
+        for str1 in repeat(allstrings, 2)
+            for str2 in repeat(allstrings, 2)
+                @test String(str1 * str2) ==
+                    String(string(str1, str2)) ==
+                    String(string(str1)) * String(string(str2))
+                @test Base.annotatedstring(str1 * str2) ==
+                    Base.annotatedstring(str1, str2) ==
+                    Base.annotatedstring(str1) * Base.annotatedstring(str2)
+            end
+        end
+    end
+    # @test collect(Base.eachstyle(str)) ==
+    #     [("some", [:thing => 0x01, :all => 0x03]),
+    #     (" string", [:all => 0x03, :other => 0x02])]
+    @test ==(Base.annotatedstring_optimize!(
+        Base.AnnotatedString("abc", [(1:1, :val => 1),
+                             (2:2, :val => 2),
+                             (2:2, :val => 1),
+                             (3:3, :val => 2)])),
+             Base.AnnotatedString("abc", [(1:2, :val => 1),
+                                  (2:3, :val => 2)]))
+end
+
+@testset "AnnotatedChar" begin
+    chr = Base.AnnotatedChar('c')
+    @test chr == Base.AnnotatedChar(chr.char, Pair{Symbol, Any}[])
+    str = Base.AnnotatedString("hmm", [(1:1, :attr => "h0h0"),
+                               (1:2, :attr => "h0m1"),
+                               (2:3, :attr => "m1m2")])
+    @test str[1] == Base.AnnotatedChar('h', Pair{Symbol, Any}[:attr => "h0h0"])
+    @test str[2] == Base.AnnotatedChar('m', Pair{Symbol, Any}[:attr => "h0m1", :attr => "m1m2"])
+    @test str[3] == Base.AnnotatedChar('m', Pair{Symbol, Any}[:attr => "m1m2"])
+end
+
+@testset "Styling preservation" begin
+    str = Base.AnnotatedString("some string", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (6:11, :other => 0x02)])
+    @test match(r".e", str).match == str[3:4]
+    @test  match(r"(.e)", str).captures == [str[3:4]]
+    let m0 = match(r"(.)e", str)
+        m1 = first(eachmatch(r"(.)e", str))
+        for f in fieldnames(RegexMatch)
+            @test getfield(m0, f) == getfield(m1, f)
+        end
+    end
+    @test lpad(str, 12) ==
+        Base.AnnotatedString(" some string", [(2:5, :thing => 0x01),
+                                      (2:12, :all => 0x03),
+                                      (7:12, :other => 0x02)])
+    @test rpad(str, 12) ==
+        Base.AnnotatedString("some string ", [(1:4, :thing => 0x01),
+                                      (1:11, :all => 0x03),
+                                      (6:11, :other => 0x02)])
+    str1 = Base.AnnotatedString("test", [(1:4, :label => 5)])
+    str2 = Base.AnnotatedString("case", [(2:3, :label => "oomph")])
+    @test join([str1, str1], Base.AnnotatedString(" ")) ==
+        Base.AnnotatedString("test test",
+                     [(1:4, :label => 5),
+                      (6:9, :label => 5)])
+    @test join([str1, str1], Base.AnnotatedString(" ", [(1:1, :label => 2)])) ==
+        Base.AnnotatedString("test test",
+                     [(1:4, :label => 5),
+                      (5:5, :label => 2),
+                      (6:9, :label => 5)])
+    @test repeat(str1, 2) == Base.AnnotatedString("testtest", [(1:8, :label => 5)])
+    @test repeat(str2, 2) == Base.AnnotatedString("casecase", [(2:3, :label => "oomph"),
+                                                       (6:7, :label => "oomph")])
+    @test repeat(str1[1], 3) == Base.AnnotatedString("ttt", [(1:3, :label => 5)])
+    @test reverse(str1) == Base.AnnotatedString("tset", [(1:4, :label => 5)])
+    @test reverse(str2) == Base.AnnotatedString("esac", [(2:3, :label => "oomph")])
+end
diff --git a/test/strings/basic.jl b/test/strings/basic.jl
index b7021e3a2c2cb..225a41bf12be9 100644
--- a/test/strings/basic.jl
+++ b/test/strings/basic.jl
@@ -164,14 +164,23 @@ end
     @test endswith(y)(y)
     @test endswith(z, z)
     @test endswith(z)(z)
+    #40616 startswith for IO objects
+    let s = "JuliaLang", io = IOBuffer(s)
+        for prefix in ("Julia", "July", s^2, "Ju", 'J', 'x', ('j','J'))
+            @test startswith(io, prefix) == startswith(s, prefix)
+        end
+    end
 end
 
 @testset "SubStrings and Views" begin
     x = "abcdefg"
     @testset "basic unit range" begin
         @test SubString(x, 2:4) == "bcd"
-        @test view(x, 2:4) == "bcd"
-        @test view(x, 2:4) isa SubString
+        sx = view(x, 2:4)
+        @test sx == "bcd"
+        @test sx isa SubString
+        @test parent(sx) === x
+        @test parentindices(sx) == (2:4,)
         @test (@view x[4:end]) == "defg"
         @test (@view x[4:end]) isa SubString
     end
@@ -241,8 +250,6 @@ end
     @test string(sym) == string(Char(0xdcdb))
     @test String(sym) == string(Char(0xdcdb))
     @test Meta.lower(Main, sym) === sym
-    @test Meta.parse(string(Char(0xe0080)," = 1"), 1, raise=false)[1] ==
-        Expr(:error, "invalid character \"\Ue0080\" near column 1")
 end
 
 @testset "Symbol and gensym" begin
@@ -418,7 +425,7 @@ end
     end
     @test nextind("fóobar", 0, 3) == 4
 
-    @test Symbol(gstr) == Symbol("12")
+    @test Symbol(gstr) === Symbol("12")
 
     @test sizeof(gstr) == 2
     @test ncodeunits(gstr) == 2
@@ -683,6 +690,7 @@ end
 Base.iterate(x::CharStr) = iterate(x.chars)
 Base.iterate(x::CharStr, i::Int) = iterate(x.chars, i)
 Base.lastindex(x::CharStr) = lastindex(x.chars)
+Base.length(x::CharStr) = length(x.chars)
 @testset "cmp without UTF-8 indexing" begin
     # Simple case, with just ANSI Latin 1 characters
     @test "áB" != CharStr("áá") # returns false with bug
@@ -726,6 +734,11 @@ end
     @test_throws ArgumentError "abc"[BitArray([true, false, true])]
 end
 
+@testset "issue #46039 enhance StringIndexError display" begin
+    @test sprint(showerror, StringIndexError("αn", 2)) == "StringIndexError: invalid index [2], valid nearby indices [1]=>'α', [3]=>'n'"
+    @test sprint(showerror, StringIndexError("α\n", 2)) == "StringIndexError: invalid index [2], valid nearby indices [1]=>'α', [3]=>'\\n'"
+end
+
 @testset "concatenation" begin
     @test "ab" * "cd" == "abcd"
     @test 'a' * "bc" == "abc"
@@ -746,11 +759,6 @@ function getData(dic)
 end
 @test getData(Dict()) == ",,,,,,,,,,,,,,,,,,"
 
-@testset "unrecognized escapes in string/char literals" begin
-    @test_throws Meta.ParseError Meta.parse("\"\\.\"")
-    @test_throws Meta.ParseError Meta.parse("\'\\.\'")
-end
-
 @testset "thisind" begin
     let strs = Any["∀α>β:α+1>β", s"∀α>β:α+1>β",
                    SubString("123∀α>β:α+1>β123", 4, 18),
@@ -932,6 +940,21 @@ end
     end
 end
 
+@testset "Conversion to Type{Union{String, SubString{String}}}" begin
+    str = "abc"
+    substr = SubString(str)
+    for T in [String, SubString{String}]
+        conv_str = convert(T, str)
+        conv_substr = convert(T, substr)
+
+        if T == String
+            @test conv_str === conv_substr === str
+        elseif T == SubString{String}
+            @test conv_str === conv_substr === substr
+        end
+    end
+end
+
 @test unsafe_wrap(Vector{UInt8},"\xcc\xdd\xee\xff\x80") == [0xcc,0xdd,0xee,0xff,0x80]
 
 @test iterate("a", 1)[2] == 2
@@ -1098,6 +1121,32 @@ end
     @test sprint(summary, "") == "empty String"
 end
 
+@testset "isascii" begin
+    N = 1
+    @test isascii("S"^N) == true
+    @test isascii("S"^(N - 1)) == true
+    @test isascii("S"^(N + 1)) == true
+
+    @test isascii("λ" * ("S"^(N))) == false
+    @test isascii(("S"^(N)) * "λ") == false
+
+    for p = 1:16
+        N = 2^p
+        @test isascii("S"^N) == true
+        @test isascii("S"^(N - 1)) == true
+        @test isascii("S"^(N + 1)) == true
+
+        @test isascii("λ" * ("S"^(N))) == false
+        @test isascii(("S"^(N)) * "λ") == false
+        @test isascii("λ"*("S"^(N - 1))) == false
+        @test isascii(("S"^(N - 1)) * "λ") == false
+        if N > 4
+            @test isascii("λ" * ("S"^(N - 3))) == false
+            @test isascii(("S"^(N - 3)) * "λ") == false
+        end
+    end
+end
+
 @testset "Plug holes in test coverage" begin
     @test_throws MethodError checkbounds(Bool, "abc", [1.0, 2.0])
 
@@ -1115,7 +1164,7 @@ end
     code_units = Base.CodeUnits("abc")
     @test Base.IndexStyle(Base.CodeUnits) == IndexLinear()
     @test Base.elsize(code_units) == sizeof(UInt8)
-    @test Base.unsafe_convert(Ptr{Int8}, code_units) == Base.unsafe_convert(Ptr{Int8}, code_units.s)
+    @test Base.unsafe_convert(Ptr{Int8}, Base.cconvert(Ptr{UInt8}, code_units)) == Base.unsafe_convert(Ptr{Int8}, Base.cconvert(Ptr{Int8}, code_units.s))
 end
 
 @testset "LazyString" begin
@@ -1138,4 +1187,198 @@ end
         end
         return a
     end |> Core.Compiler.is_foldable
+    let i=49248
+        @test String(lazy"PR n°$i") == "PR n°49248"
+    end
+end
+
+@testset "String Effects" begin
+    for (f, Ts) in [(*, (String, String)),
+                   (*, (Char, String)),
+                   (*, (Char, Char)),
+                   (string, (Symbol, String, Char)),
+                   (==, (String, String)),
+                   (cmp, (String, String)),
+                   (==, (Symbol, Symbol)),
+                   (cmp, (Symbol, Symbol)),
+                   (String, (Symbol,)),
+                   (length, (String,)),
+                   (hash, (String,UInt)),
+                   (hash, (Char,UInt)),]
+        e = Base.infer_effects(f, Ts)
+        @test Core.Compiler.is_foldable(e) || (f, Ts)
+        @test Core.Compiler.is_removable_if_unused(e) || (f, Ts)
+    end
+    for (f, Ts) in [(^, (String, Int)),
+                   (^, (Char, Int)),
+                   (codeunit, (String, Int)),
+                   ]
+        e = Base.infer_effects(f, Ts)
+        @test Core.Compiler.is_foldable(e) || (f, Ts)
+        @test !Core.Compiler.is_removable_if_unused(e) || (f, Ts)
+    end
+    # Substrings don't have any nice effects because the compiler can
+    # invent fake indices leading to out of bounds
+    for (f, Ts) in [(^, (SubString{String}, Int)),
+                   (string, (String, SubString{String})),
+                   (string, (Symbol, SubString{String})),
+                   (hash, (SubString{String},UInt)),
+                   ]
+        e = Base.infer_effects(f, Ts)
+        @test !Core.Compiler.is_foldable(e) || (f, Ts)
+        @test !Core.Compiler.is_removable_if_unused(e) || (f, Ts)
+    end
+    @test_throws ArgumentError Symbol("a\0a")
+end
+
+@testset "Ensure UTF-8 DFA can never leave invalid state" begin
+    for b = typemin(UInt8):typemax(UInt8)
+        @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_INVALID,[b],1,1) == Base._UTF8_DFA_INVALID
+    end
+end
+@testset "Ensure  UTF-8 DFA stays in ASCII State for all ASCII" begin
+    for b = 0x00:0x7F
+        @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b],1,1) == Base._UTF8_DFA_ASCII
+    end
+end
+
+@testset "Validate UTF-8 DFA" begin
+    # Unicode 15
+    # Table 3-7. Well-Formed UTF-8 Byte Sequences
+
+    table_rows = [  [0x00:0x7F],
+                    [0xC2:0xDF,0x80:0xBF],
+                    [0xE0:0xE0,0xA0:0xBF,0x80:0xBF],
+                    [0xE1:0xEC,0x80:0xBF,0x80:0xBF],
+                    [0xED:0xED,0x80:0x9F,0x80:0xBF],
+                    [0xEE:0xEF,0x80:0xBF,0x80:0xBF],
+                    [0xF0:0xF0,0x90:0xBF,0x80:0xBF,0x80:0xBF],
+                    [0xF1:0xF3,0x80:0xBF,0x80:0xBF,0x80:0xBF],
+                    [0xF4:0xF4,0x80:0x8F,0x80:0xBF,0x80:0xBF]]
+    invalid_first_bytes = union(0xC0:0xC1,0xF5:0xFF,0x80:0xBF)
+
+    valid_first_bytes = union(collect(first(r) for r in table_rows)...)
+
+
+
+    # Prove that the first byte sets in the table & invalid cover all bytes
+    @test length(union(valid_first_bytes,invalid_first_bytes)) == 256
+    @test length(intersect(valid_first_bytes,invalid_first_bytes)) == 0
+
+    #Check the ASCII range
+    for b = 0x00:0x7F
+        #Test from both UTF-8 state and ascii state
+        @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b],1,1) == Base._UTF8_DFA_ACCEPT
+        @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b],1,1) == Base._UTF8_DFA_ASCII
+    end
+
+    #Check the remaining first bytes
+    for b = 0x80:0xFF
+        if b ∈ invalid_first_bytes
+            @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b],1,1) == Base._UTF8_DFA_INVALID
+            @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b],1,1) == Base._UTF8_DFA_INVALID
+        else
+            @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b],1,1) != Base._UTF8_DFA_INVALID
+            @test Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b],1,1) != Base._UTF8_DFA_INVALID
+        end
+    end
+
+    # Check two byte Sequences
+    for table_row in [table_rows[2]]
+        b1 = first(table_row[1])
+        state1 = Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b1],1,1)
+        state2 = Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b1],1,1)
+        @test state1 == state2
+        #Prove that all the first bytes in a row give same state
+        for b1 in table_row[1]
+            @test state1 == Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b1],1,1)
+            @test state1 == Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b1],1,1)
+        end
+        b1 = first(table_row[1])
+        #Prove that all valid second bytes return correct state
+        for b2 = table_row[2]
+            @test Base._UTF8_DFA_ACCEPT == Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        end
+        for b2 = setdiff(0x00:0xFF,table_row[2])
+            @test Base._UTF8_DFA_INVALID == Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        end
+    end
+
+    # Check three byte Sequences
+    for table_row in table_rows[3:6]
+        b1 = first(table_row[1])
+        state1 = Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b1],1,1)
+        state2 = Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b1],1,1)
+        @test state1 == state2
+        #Prove that all the first bytes in a row give same state
+        for b1 in table_row[1]
+            @test state1 == Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b1],1,1)
+            @test state1 == Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b1],1,1)
+        end
+
+        b1 = first(table_row[1])
+        b2 = first(table_row[2])
+        #Prove that all valid second bytes return same state
+        state2 = Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        for b2 = table_row[2]
+            @test state2 == Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        end
+        for b2 = setdiff(0x00:0xFF,table_row[2])
+            @test Base._UTF8_DFA_INVALID == Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        end
+
+        b2 = first(table_row[2])
+        #Prove that all valid third bytes return correct state
+        for b3 = table_row[3]
+            @test Base._UTF8_DFA_ACCEPT == Base._isvalid_utf8_dfa(state2,[b3],1,1)
+        end
+        for b3 = setdiff(0x00:0xFF,table_row[3])
+            @test Base._UTF8_DFA_INVALID == Base._isvalid_utf8_dfa(state2,[b3],1,1)
+        end
+    end
+
+    # Check Four byte Sequences
+    for table_row in table_rows[7:9]
+        b1 = first(table_row[1])
+        state1 = Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b1],1,1)
+        state2 = Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b1],1,1)
+        @test state1 == state2
+        #Prove that all the first bytes in a row give same state
+        for b1 in table_row[1]
+            @test state1 == Base._isvalid_utf8_dfa(Base._UTF8_DFA_ACCEPT,[b1],1,1)
+            @test state1 == Base._isvalid_utf8_dfa(Base._UTF8_DFA_ASCII,[b1],1,1)
+        end
+
+        b1 = first(table_row[1])
+        b2 = first(table_row[2])
+        #Prove that all valid second bytes return same state
+        state2 = Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        for b2 = table_row[2]
+            @test state2 == Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        end
+        for b2 = setdiff(0x00:0xFF,table_row[2])
+            @test Base._UTF8_DFA_INVALID == Base._isvalid_utf8_dfa(state1,[b2],1,1)
+        end
+
+
+        b2 = first(table_row[2])
+        b3 = first(table_row[3])
+        state3 = Base._isvalid_utf8_dfa(state2,[b3],1,1)
+        #Prove that all valid third bytes return same state
+        for b3 = table_row[3]
+            @test state3 == Base._isvalid_utf8_dfa(state2,[b3],1,1)
+        end
+        for b3 = setdiff(0x00:0xFF,table_row[3])
+            @test Base._UTF8_DFA_INVALID == Base._isvalid_utf8_dfa(state2,[b3],1,1)
+        end
+
+        b3 = first(table_row[3])
+        #Prove that all valid forth bytes return correct state
+        for b4 = table_row[4]
+            @test Base._UTF8_DFA_ACCEPT == Base._isvalid_utf8_dfa(state3,[b4],1,1)
+        end
+        for b4 = setdiff(0x00:0xFF,table_row[4])
+            @test Base._UTF8_DFA_INVALID == Base._isvalid_utf8_dfa(state3,[b4],1,1)
+        end
+    end
 end
diff --git a/test/strings/io.jl b/test/strings/io.jl
index 91ad83b24e328..5f657297af08f 100644
--- a/test/strings/io.jl
+++ b/test/strings/io.jl
@@ -156,6 +156,15 @@
         @test "aaa \\g \\n" == unescape_string(str, ['g', 'n'])
     end
     @test Base.escape_raw_string(raw"\"\\\"\\-\\") == "\\\"\\\\\\\"\\\\-\\\\"
+    @test Base.escape_raw_string(raw"`\`\\-\\") == "\`\\\`\\\\-\\\\"
+    @test Base.escape_raw_string(raw"\"\\\"\\-\\", '`') == "\"\\\"\\\\-\\\\"
+    @test Base.escape_raw_string(raw"`\`\\-\\", '`') == "\\\`\\\\\\\`\\\\-\\\\"
+    @test Base.escape_raw_string(raw"some`string") == "some`string"
+    @test Base.escape_raw_string(raw"some\"string", '`') == "some\"string"
+    @test Base.escape_raw_string(raw"some`string\\") == "some`string\\\\"
+    @test Base.escape_raw_string(raw"some\"string\\", '`') == "some\"string\\\\"
+    @test Base.escape_raw_string(raw"some\"string") == "some\\\"string"
+    @test Base.escape_raw_string(raw"some`string", '`') == "some\\`string"
 end
 @testset "join()" begin
     @test join([]) == join([],",") == ""
@@ -190,8 +199,8 @@ end
 
 @testset "sprint with context" begin
     function f(io::IO)
-        println(io, "compact => ", get(io, :compact, false))
-        println(io, "limit   => ", get(io, :limit,   false))
+        println(io, "compact => ", get(io, :compact, false)::Bool)
+        println(io, "limit   => ", get(io, :limit,   false)::Bool)
     end
 
     str = sprint(f)
@@ -219,6 +228,10 @@ end
         """
 end
 
+@testset "sprint honoring IOContext" begin
+    @test startswith(sprint(show, Base.Dict[], context=(:compact=>false, :module=>nothing)), "Base.Dict")
+end
+
 @testset "#11659" begin
     # The indentation code was not correctly counting tab stops
     @test Base.indentation("      \t") == (8, true)
diff --git a/test/strings/types.jl b/test/strings/types.jl
index 1879d05eb8fab..771be253b1ec9 100644
--- a/test/strings/types.jl
+++ b/test/strings/types.jl
@@ -26,7 +26,7 @@ for i1 = 1:length(u8str2)
 end
 
 # tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes
-# gives the same result as `getindex` (except that it is a veiw not a copy)
+# gives the same result as `getindex` (except that it is a view not a copy)
 for idx in 0:1
     @test SubString("∀", 1, idx) == "∀"[1:idx]
 end
diff --git a/test/strings/util.jl b/test/strings/util.jl
index 8957513e37f25..59638dc3b9ca6 100644
--- a/test/strings/util.jl
+++ b/test/strings/util.jl
@@ -89,6 +89,30 @@ end
     @test rstrip(isnumeric, "abc0123") == "abc"
     @test lstrip("ello", ['e','o']) == "llo"
     @test rstrip("ello", ['e','o']) == "ell"
+
+    @test_throws ArgumentError strip("", "")
+    @test_throws ArgumentError lstrip("", "")
+    @test_throws ArgumentError rstrip("", "")
+end
+
+@testset "partition" begin
+    # AbstractString to partition into SubString
+    let v=collect(Iterators.partition("foobars",1))
+    @test v==SubString{String}["f","o","o","b","a","r","s"]
+    end
+
+    let v=collect(Iterators.partition("foobars",2))
+    @test v==SubString{String}["fo","ob","ar","s"]
+    end
+
+    for n in [7,8]
+        @test collect(Iterators.partition("foobars",n))[1]=="foobars"
+    end
+
+    # HOWEVER enumerate explicitly slices String "atoms" so `Chars` are returned
+    let v=collect(Iterators.partition(enumerate("foobars"),1))
+        @test v==Vector{Tuple{Int64, Char}}[[(1, 'f')],[(2, 'o')],[(3, 'o')],[(4, 'b')],[(5, 'a')],[(6, 'r')], [(7, 's')]]
+    end
 end
 
 @testset "rsplit/split" begin
@@ -188,6 +212,28 @@ end
     @test split("α β γ", "β") == rsplit("α β γ", "β") == ["α "," γ"]
 end
 
+@testset "eachrsplit" begin
+    @test collect(eachrsplit("", 'a')) == [""]
+    @test collect(eachrsplit("", isspace; limit=3)) == [""]
+    @test collect(eachrsplit("b c  d"; limit=2)) == ["d", "b c "]
+    @test collect(eachrsplit("a.b.c", '.'; limit=1)) == ["a.b.c"]
+    @test collect(eachrsplit("a..b..c", '.')) == ["c", "", "b", "", "a"]
+    @test collect(eachrsplit("ax  b  c")) == ["c", "b", "ax"]
+    @test collect(eachrsplit(" a 12 4 v ", isnumeric)) == [" v ", " ", "", " a "]
+    @test collect(eachrsplit("ba", 'a')) == ["", "b"]
+    @test collect(eachrsplit("   ")) == []
+    @test collect(eachrsplit("aaaa", 'a'; keepempty=false)) == []
+    @test collect(eachrsplit("aaaa", 'a'; limit=2)) == ["", "aaa"]
+    @test collect(eachrsplit("abcdef", ['b', 'e'])) == ["f", "cd", "a"]
+    @test collect(eachrsplit("abc", isletter)) == ["", "", "", ""]
+
+    # This behaviour is quite surprising, but is consistent with split
+    # See issue 45916
+    @test collect(eachrsplit("a  b"; limit=2)) == ["b", "a "] # only one trailing space
+    @test collect(eachrsplit("a "; limit=1)) == ["a "]
+    @test collect(eachrsplit("  a  b  c  d"; limit=3)) == ["d", "c", "  a  b "]
+end
+
 @testset "replace" begin
     @test replace("\u2202", '*' => '\0') == "\u2202"
 
@@ -313,6 +359,28 @@ end
     # Issue 36953
     @test replace("abc", "" => "_", count=1) == "_abc"
 
+    # tests for io::IO API (in addition to internals exercised above):
+    let buf = IOBuffer()
+        replace(buf, "aaa", 'a' => 'z', count=0)
+        replace(buf, "aaa", 'a' => 'z', count=1)
+        replace(buf, "bbb", 'a' => 'z')
+        replace(buf, "aaa", 'a' => 'z')
+        @test String(take!(buf)) == "aaazaabbbzzz"
+    end
+    let tempfile = tempname()
+        try
+            open(tempfile, "w") do f
+                replace(f, "aaa", 'a' => 'z', count=0)
+                replace(f, "aaa", 'a' => 'z', count=1)
+                replace(f, "bbb", 'a' => 'z')
+                replace(f, "aaa", 'a' => 'z')
+                print(f, "\n")
+            end
+            @test read(tempfile, String) == "aaazaabbbzzz\n"
+        finally
+            rm(tempfile, force=true)
+        end
+    end
 end
 
 @testset "replace many" begin
diff --git a/test/subarray.jl b/test/subarray.jl
index 98335cb257110..ad68496e38245 100644
--- a/test/subarray.jl
+++ b/test/subarray.jl
@@ -256,7 +256,7 @@ runviews(SB::AbstractArray{T,0}, indexN, indexNN, indexNNN) where {T} = nothing
 
 ######### Tests #########
 
-testfull = Bool(parse(Int,(get(ENV, "JULIA_TESTFULL", "0"))))
+testfull = Base.get_bool_env("JULIA_TESTFULL", false)
 
 ### Views from Arrays ###
 index5 = (1, :, 2:5, [4,1,5], reshape([2]), view(1:5,[2 3 4 1]))  # all work with at least size 5
@@ -275,9 +275,6 @@ end
 # with the exception of Int-slicing
 oindex = (:, 6, 3:7, reshape([12]), [8,4,6,12,5,7], [3:7 1:5 2:6 4:8 5:9], reshape(2:11, 2, 5))
 
-_ndims(::AbstractArray{T,N}) where {T,N} = N
-_ndims(x) = 1
-
 if testfull
     let B = copy(reshape(1:13^3, 13, 13, 13))
         @testset "full tests: ($o1,$o2,$o3)" for o3 in oindex, o2 in oindex, o1 in oindex
@@ -288,7 +285,8 @@ if testfull
 end
 
 let B = copy(reshape(1:13^3, 13, 13, 13))
-    @testset "spot checks: $oind" for oind in ((:,:,:),
+    @testset "spot checks: $oind" for oind in (
+                 (:,:,:),
                  (:,:,6),
                  (:,6,:),
                  (6,:,:),
@@ -296,7 +294,6 @@ let B = copy(reshape(1:13^3, 13, 13, 13))
                  (3:7,:,:),
                  (3:7,6,:),
                  (3:7,6,0x6),
-                 (6,UInt(3):UInt(7),3:7),
                  (13:-2:1,:,:),
                  ([8,4,6,12,5,7],:,3:7),
                  (6,CartesianIndex.(6,[8,4,6,12,5,7])),
@@ -307,7 +304,29 @@ let B = copy(reshape(1:13^3, 13, 13, 13))
                  (3,reshape(2:11,5,2),4),
                  (3,reshape(2:2:13,3,2),4),
                  (view(1:13,[9,12,4,13,1]),2:6,4),
-                 ([1:5 2:6 3:7 4:8 5:9], :, 3))
+                 ([1:5 2:6 3:7 4:8 5:9], :, 3),
+        )
+        runsubarraytests(B, oind...)
+        viewB = view(B, oind...)
+        runviews(viewB, index5, index25, index125)
+    end
+end
+
+let B = copy(reshape(1:13^3, 13, 13, 13))
+    @testset "spot checks (other BitIntegers): $oind" for oind in (
+                 (:,:,0x6),
+                 (:,0x00000006,:),
+                 (0x0006,:,:),
+                 (:,0x00000003:0x00000007,:),
+                 (0x0000000000000003:0x0000000000000007,:,:),
+                 (0x0003:0x0007,0x6,:),
+                 (6,UInt(3):UInt(7),3:7),
+                 (Int16(3):Int16(7),Int16(6),:),
+                 (CartesianIndex(0xD,0x6),UInt8[8,4,6,12,5,7]),
+                 (Int8(1),:,view(1:13,[9,12,4,13,1])),
+                 (view(1:13,Int16[9,12,4,13,1]),UInt8(2):UInt16(6),Int8(4)),
+                 (Int8[1:5 2:6 3:7 4:8 5:9],:,UInt64(3)),
+        )
         runsubarraytests(B, oind...)
         viewB = view(B, oind...)
         runviews(viewB, index5, index25, index125)
@@ -446,6 +465,113 @@ end
     @test sA[[1 2 4 4; 6 1 1 4]] == [34 35 38 38; 50 34 34 38]
 end
 
+@testset "fast linear indexing with AbstractUnitRange or Colon indices" begin
+    @testset "getindex" begin
+        @testset "1D" begin
+            for a1 in Any[1:5, [1:5;]]
+                b1 = @view a1[:]; # FastContiguousSubArray
+                c1 = @view a1[eachindex(a1)]; # FastContiguousSubArray
+                d1 = @view a1[begin:1:end]; # FastSubArray
+
+                ax1 = eachindex(a1);
+                @test b1[ax1] == c1[ax1] == d1[ax1] == a1[ax1]
+                @test b1[:] == c1[:] == d1[:] == a1[:]
+
+                # some arbitrary indices
+                inds1 = 2:4
+                c1 = @view a1[inds1]
+                @test c1[axes(c1,1)] == c1[:] == a1[inds1]
+
+                inds12 = Base.IdentityUnitRange(Base.OneTo(4))
+                c1 = @view a1[inds12]
+                @test c1[axes(c1,1)] == c1[:] == a1[inds12]
+
+                inds2 = 3:2:5
+                d1 = @view a1[inds2]
+                @test d1[axes(d1,1)] == d1[:] == a1[inds2]
+            end
+        end
+
+        @testset "2D" begin
+            a2_ = reshape(1:25, 5, 5)
+            for a2 in Any[a2_, collect(a2_)]
+                b2 = @view a2[:, :]; # 2D FastContiguousSubArray
+                b22 = @view a2[:]; # 1D FastContiguousSubArray
+                c2 = @view a2[eachindex(a2)]; # 1D FastContiguousSubArray
+                d2 = @view a2[begin:1:end]; # 1D FastSubArray
+
+                ax2 = eachindex(a2);
+                @test b2[ax2] == b22[ax2] == c2[ax2] == d2[ax2] == a2[ax2]
+                @test b2[:] == b22[:] == c2[:] == d2[:] == a2[:]
+
+                # some arbitrary indices
+                inds1 = 2:4
+                c2 = @view a2[inds1]
+                @test c2[axes(c2,1)] == c2[:] == a2[inds1]
+
+                inds12 = Base.IdentityUnitRange(Base.OneTo(4))
+                c2 = @view a2[inds12]
+                @test c2[axes(c2,1)] == c2[:] == a2[inds12]
+
+                inds2 = 2:2:4
+                d2 = @view a2[inds2];
+                @test d2[axes(d2,1)] == d2[:] == a2[inds2]
+            end
+        end
+    end
+    @testset "setindex!" begin
+        @testset "1D" begin
+            a1 = rand(10);
+            a12 = copy(a1);
+            b1 = @view a1[:]; # 1D FastContiguousSubArray
+            c1 = @view a1[eachindex(a1)]; # 1D FastContiguousSubArray
+            d1 = @view a1[begin:1:end]; # 1D FastSubArray
+
+            ax1 = eachindex(a1);
+            @test (b1[ax1] = a12; b1) == (c1[ax1] = a12; c1) == (d1[ax1] = a12; d1) == (a1[ax1] = a12; a1)
+            @test (b1[:] = a12; b1) == (c1[:] = a12; c1) == (d1[:] = a12; d1) == (a1[:] = a12; a1)
+
+            # some arbitrary indices
+            ind1 = 2:4
+            c1 = a12[ind1]
+            @test (c1[axes(c1,1)] = a12[ind1]; c1) == (c1[:] = a12[ind1]; c1) == a12[ind1]
+
+            inds1 = Base.IdentityUnitRange(Base.OneTo(4))
+            c1 = @view a1[inds1]
+            @test (c1[eachindex(c1)] = @view(a12[inds1]); c1) == @view(a12[inds1])
+
+            ind2 = 2:2:8
+            d1 = a12[ind2]
+            @test (d1[axes(d1,1)] = a12[ind2]; d1) == (d1[:] = a12[ind2]; d1) == a12[ind2]
+        end
+
+        @testset "2D" begin
+            a2 = rand(10, 10);
+            a22 = copy(a2);
+            a2v = vec(a22);
+            b2 = @view a2[:, :]; # 2D FastContiguousSubArray
+            c2 = @view a2[eachindex(a2)]; # 1D FastContiguousSubArray
+            d2 = @view a2[begin:1:end]; # 1D FastSubArray
+
+            @test (b2[eachindex(b2)] = a2v; vec(b2)) == (c2[eachindex(c2)] = a2v; c2) == a2v
+            @test (d2[eachindex(d2)] = a2v; d2) == a2v
+
+            # some arbitrary indices
+            inds1 = 3:9
+            c2 = @view a2[inds1]
+            @test (c2[eachindex(c2)] = @view(a22[inds1]); c2) == @view(a22[inds1])
+
+            inds1 = Base.IdentityUnitRange(Base.OneTo(4))
+            c2 = @view a2[inds1]
+            @test (c2[eachindex(c2)] = @view(a22[inds1]); c2) == @view(a22[inds1])
+
+            inds2 = 3:3:9
+            d2 = @view a2[inds2]
+            @test (d2[eachindex(d2)] = @view(a22[inds2]); d2) == @view(a22[inds2])
+        end
+    end
+end
+
 @testset "issue #11871" begin
     a = fill(1., (2,2))
     b = view(a, 1:2, 1:2)
@@ -641,8 +767,21 @@ end
 @testset "unaliascopy trimming; Issue #26263" begin
     A = rand(5,5,5,5)
     V = view(A, 2:5, :, 2:5, 1:2:5)
-    @test @inferred(Base.unaliascopy(V)) == V == A[2:5, :, 2:5, 1:2:5]
-    @test @inferred(sum(Base.unaliascopy(V))) ≈ sum(V) ≈ sum(A[2:5, :, 2:5, 1:2:5])
+    V′ = @inferred(Base.unaliascopy(V))
+    @test size(V′.parent) == size(V)
+    @test V′::typeof(V) == V == A[2:5, :, 2:5, 1:2:5]
+    @test @inferred(sum(V′)) ≈ sum(V) ≈ sum(A[2:5, :, 2:5, 1:2:5])
+    V = view(A, Base.IdentityUnitRange(2:4), :, Base.StepRangeLen(1,1,3), 1:2:5)
+    V′ = @inferred(Base.unaliascopy(V))
+    @test size(V.parent) != size(V′.parent)
+    @test V′ == V && V′ isa typeof(V)
+    i1 = collect(CartesianIndices((2:5)))
+    i2 = [CartesianIndex(), CartesianIndex()]
+    i3 = collect(CartesianIndices((2:5, 1:2:5)))
+    V = view(A, i1, 1:5, i2, i3)
+    @test @inferred(Base.unaliascopy(V))::typeof(V) == V == A[i1, 1:5, i2, i3]
+    V = view(A, i1, 1:5, i3, i2)
+    @test @inferred(Base.unaliascopy(V))::typeof(V) == V == A[i1, 1:5, i3, i2]
 end
 
 @testset "issue #27632" begin
@@ -750,3 +889,51 @@ end
     @test view(m, 1:2, 3, 1, 1) == m[1:2, 3]
     @test parent(view(m, 1:2, 3, 1, 1)) === m
 end
+
+@testset "replace_in_print_matrix" begin
+    struct MyIdentity <: AbstractMatrix{Bool}
+        n :: Int
+    end
+    Base.size(M::MyIdentity) = (M.n, M.n)
+    function Base.getindex(M::MyIdentity, i::Int, j::Int)
+        checkbounds(M, i, j)
+        i == j
+    end
+    function Base.replace_in_print_matrix(M::MyIdentity, i::Integer, j::Integer, s::AbstractString)
+        i == j ? s : Base.replace_with_centered_mark(s)
+    end
+    V = view(MyIdentity(3), 1:2, 1:3)
+    @test sprint(show, "text/plain", V) == "$(summary(V)):\n 1  ⋅  ⋅\n ⋅  1  ⋅"
+
+    struct OneElVec <: AbstractVector{Bool}
+        n :: Int
+        ind :: Int
+    end
+    Base.size(M::OneElVec) = (M.n,)
+    function Base.getindex(M::OneElVec, i::Int)
+        checkbounds(M, i)
+        i == M.ind
+    end
+    function Base.replace_in_print_matrix(M::OneElVec, i::Integer, j::Integer, s::AbstractString)
+        i == M.ind ? s : Base.replace_with_centered_mark(s)
+    end
+    V = view(OneElVec(6, 2), 1:5)
+    @test sprint(show, "text/plain", V) == "$(summary(V)):\n ⋅\n 1\n ⋅\n ⋅\n ⋅"
+
+    V = view(1:2, [CartesianIndex(2)])
+    @test sprint(show, "text/plain", V) == "$(summary(V)):\n 2"
+end
+
+@testset "Base.first_index for offset indices" begin
+    a = Vector(1:10)
+    b = view(a, Base.IdentityUnitRange(4:7))
+    @test first(b) == a[Base.first_index(b)]
+end
+
+@testset "StepRangeLen of CartesianIndex-es" begin
+    v = view(1:2, StepRangeLen(CartesianIndex(1,1), CartesianIndex(1,1), 0))
+    @test isempty(v)
+    r = StepRangeLen(CartesianIndex(1), CartesianIndex(1), 1)
+    v = view(1:2, r)
+    @test v == view(1:2, collect(r))
+end
diff --git a/test/subtype.jl b/test/subtype.jl
index e8493a807141c..edc38c8556f3c 100644
--- a/test/subtype.jl
+++ b/test/subtype.jl
@@ -588,7 +588,7 @@ function test_old()
 end
 
 const easy_menagerie =
-    Any[Bottom, Any, Int, Int8, Integer, Real,
+    Any[Any, Int, Int8, Integer, Real,
         Array{Int,1}, AbstractArray{Int,1},
         Tuple{Int,Vararg{Integer}}, Tuple{Integer,Vararg{Int}}, Tuple{},
         Union{Int,Int8},
@@ -627,6 +627,10 @@ end
 
 add_variants!(easy_menagerie)
 add_variants!(hard_menagerie)
+push!(easy_menagerie, Bottom)
+push!(easy_menagerie, Ref{Bottom})
+push!(easy_menagerie, @UnionAll N NTuple{N,Bottom})
+push!(easy_menagerie, @UnionAll S<:Bottom Ref{S})
 
 const menagerie = [easy_menagerie; hard_menagerie]
 
@@ -673,9 +677,11 @@ function test_properties()
             @test isequal_type(T, S) == isequal_type(Ref{T}, Ref{S})
 
             # covariance
-            @test issubTS == issub(Tuple{T}, Tuple{S})
-            @test issubTS == issub(Tuple{Vararg{T}}, Tuple{Vararg{S}})
-            @test issubTS == issub(Tuple{T}, Tuple{Vararg{S}})
+            if T !== Bottom && S !== Bottom
+                @test issubTS == issub(Tuple{T}, Tuple{S})
+                @test issubTS == issub(Tuple{Vararg{T}}, Tuple{Vararg{S}})
+                @test issubTS == issub(Tuple{T}, Tuple{Vararg{S}})
+            end
 
             # pseudo-contravariance
             @test issubTS == issub(¬S, ¬T)
@@ -753,8 +759,11 @@ function test_intersection()
     @testintersect((@UnionAll T Tuple{T, AbstractArray{T}}), Tuple{Int, Array{Number,1}},
                    Tuple{Int, Array{Number,1}})
 
+    # TODO: improve this result
+    #@testintersect((@UnionAll S Tuple{S,Vector{S}}), (@UnionAll T<:Real Tuple{T,AbstractVector{T}}),
+    #               (@UnionAll S<:Real Tuple{S,Vector{S}}))
     @testintersect((@UnionAll S Tuple{S,Vector{S}}), (@UnionAll T<:Real Tuple{T,AbstractVector{T}}),
-                   (@UnionAll S<:Real Tuple{S,Vector{S}}))
+                   (@UnionAll S<:Real Tuple{Real,Vector{S}}))
 
     # typevar corresponding to a type it will end up being neither greater than nor
     # less than
@@ -813,9 +822,9 @@ function test_intersection()
                    Tuple{Tuple{Vararg{Integer}}, Tuple{Integer,Integer}},
                    Tuple{Tuple{Integer,Integer}, Tuple{Integer,Integer}})
 
-    #@test isequal_type(typeintersect((@UnionAll N Tuple{NTuple{N,Any},Array{Int,N}}),
-    #                                 Tuple{Tuple{Int,Vararg{Int}},Array}),
-    #                   Tuple{Tuple{Int,Vararg{Int}},Array{Int,N}})
+    @test isequal_type(typeintersect((@UnionAll N Tuple{NTuple{N,Any},Array{Int,N}}),
+                                     Tuple{Tuple{Int,Vararg{Int}},Array}),
+                       @UnionAll N Tuple{Tuple{Int,Vararg{Int}},Array{Int,N}})
 
     @testintersect((@UnionAll N Tuple{NTuple{N,Any},Array{Int,N}}),
                    Tuple{Tuple{Int,Vararg{Int}},Array{Int,2}},
@@ -904,11 +913,11 @@ function test_intersection()
     # both of these answers seem acceptable
     #@testintersect(Tuple{T,T} where T<:Union{UpperTriangular, UnitUpperTriangular},
     #               Tuple{AbstractArray{T,N}, AbstractArray{T,N}} where N where T,
-    #               Union{Tuple{T,T} where T<:UpperTriangular,
-    #                     Tuple{T,T} where T<:UnitUpperTriangular})
+    #               Union{Tuple{T,T} where T<:UpperTriangular{T1},
+    #                     Tuple{T,T} where T<:UnitUpperTriangular{T1}} where T)
     @testintersect(Tuple{T,T} where T<:Union{UpperTriangular, UnitUpperTriangular},
                    Tuple{AbstractArray{T,N}, AbstractArray{T,N}} where N where T,
-                   Tuple{T,T} where T<:Union{UpperTriangular, UnitUpperTriangular})
+                   Tuple{T,T} where {T1, T<:Union{UpperTriangular{T1}, UnitUpperTriangular{T1}}})
 
     @testintersect(DataType, Type, DataType)
     @testintersect(DataType, Type{T} where T<:Integer, Type{T} where T<:Integer)
@@ -924,9 +933,10 @@ function test_intersection()
     # since this T is inside the invariant ctor Type{}, we allow T == Any here
     @testintersect((Type{Tuple{Vararg{T}}} where T), Type{Tuple}, Type{Tuple})
 
+    # TODO: improve this
     @testintersect(Tuple{Type{S}, Tuple{Any, Vararg{Any}}} where S<:Tuple{Any, Vararg{Any}},
                    Tuple{Type{T}, T} where T,
-                   Tuple{Type{S},S} where S<:Tuple{Any,Vararg{Any}})
+                   Tuple{Type{S}, Tuple{Any, Vararg{Any}}} where S<:Tuple{Any, Vararg{Any}})
 
     # part of issue #20450
     @testintersect(Tuple{Array{Ref{T}, 1}, Array{Pair{M, V}, 1}} where V where T where M,
@@ -1044,6 +1054,7 @@ function test_intersection()
     @testintersect(Type{<:Tuple{Any,Vararg{Any}}},
                    Type{Tuple{Vararg{Int,N}}} where N,
                    Type{Tuple{Int,Vararg{Int,N}}} where N)
+
     @testintersect(Type{<:Array},
                    Type{AbstractArray{T}} where T,
                    Bottom)
@@ -1072,8 +1083,7 @@ function test_intersection_properties()
             I2 = _type_intersect(S,T)
             @test isequal_type(I, I2)
             if i > length(easy_menagerie) || j > length(easy_menagerie)
-                # TODO: these cases give a conservative answer
-                @test issub(I, T) || issub(I, S)
+                # @test issub(I, T) || issub(I, S)
             else
                 @test issub(I, T) && issub(I, S)
             end
@@ -1176,11 +1186,25 @@ ftwoparams(::TwoParams{<:Real,<:Real}) = 3
 # a bunch of cases found by fuzzing
 let a = Tuple{Float64,T7} where T7,
     b = Tuple{S5,Tuple{S5}} where S5
-    @test typeintersect(a, b) <: b
+    I1 = typeintersect(a, b)
+    I2 = typeintersect(b, a)
+    @test I1 <: I2
+    @test I2 <: I1
+    @test I1 <: a
+    @test I2 <: a
+    @test I1 <: b
+    @test I2 <: b
 end
 let a = Tuple{T1,T1} where T1,
     b = Tuple{Val{S2},S6} where S2 where S6
-    @test typeintersect(a, b) == typeintersect(b, a)
+    I1 = typeintersect(a, b)
+    I2 = typeintersect(b, a)
+    @test I1 <: I2
+    @test I2 <: I1
+    @test I1 <: a
+    @test I2 <: a
+    @test I1 <: b
+    @test I2 <: b
 end
 let a = Val{Tuple{T1,T1}} where T1,
     b = Val{Tuple{Val{S2},S6}} where S2 where S6
@@ -1188,15 +1212,36 @@ let a = Val{Tuple{T1,T1}} where T1,
 end
 let a = Tuple{Float64,T3,T4} where T4 where T3,
     b = Tuple{S2,Tuple{S3},S3} where S2 where S3
-    @test typeintersect(a, b) == typeintersect(b, a)
+    I1 = typeintersect(a, b)
+    I2 = typeintersect(b, a)
+    @test_broken I1 <: I2
+    @test I2 <: I1
+    @test I1 <: a
+    @test I2 <: a
+    @test_broken I1 <: b
+    @test I2 <: b
 end
 let a = Tuple{T1,Tuple{T1}} where T1,
     b = Tuple{Float64,S3} where S3
-    @test typeintersect(a, b) <: a
+    I1 = typeintersect(a, b)
+    I2 = typeintersect(b, a)
+    @test I1 <: I2
+    @test I2 <: I1
+    @test I1 <: a
+    @test I2 <: a
+    @test I1 <: b
+    @test I2 <: b
 end
 let a = Tuple{5,T4,T5} where T4 where T5,
     b = Tuple{S2,S3,Tuple{S3}} where S2 where S3
-    @test typeintersect(a, b) == typeintersect(b, a)
+    I1 = typeintersect(a, b)
+    I2 = typeintersect(b, a)
+    @test_broken I1 <: I2
+    @test I2 <: I1
+    @test I1 <: a
+    @test I2 <: a
+    @test_broken I1 <: b
+    @test I2 <: b
 end
 let a = Tuple{T2,Tuple{T4,T2}} where T4 where T2,
     b = Tuple{Float64,Tuple{Tuple{S3},S3}} where S3
@@ -1204,23 +1249,58 @@ let a = Tuple{T2,Tuple{T4,T2}} where T4 where T2,
 end
 let a = Tuple{Tuple{T2,4},T6} where T2 where T6,
     b = Tuple{Tuple{S2,S3},Tuple{S2}} where S2 where S3
-    @test typeintersect(a, b) == typeintersect(b, a)
+    I1 = typeintersect(a, b)
+    I2 = typeintersect(b, a)
+    @test_broken I1 <: I2
+    @test I2 <: I1
+    @test I1 <: a
+    @test I2 <: a
+    @test_broken I1 <: b
+    @test I2 <: b
 end
 let a = Tuple{T3,Int64,Tuple{T3}} where T3,
     b = Tuple{S3,S3,S4} where S4 where S3
-    @test_broken typeintersect(a, b) <: a
+    I1 = typeintersect(a, b)
+    I2 = typeintersect(b, a)
+    @test I1 <: I2
+    @test I2 <: I1
+    @test_broken I1 <: a
+    @test I2 <: a
+    @test I1 <: b
+    @test I2 <: b
 end
 let a = Tuple{T1,Val{T2},T2} where T2 where T1,
     b = Tuple{Float64,S1,S2} where S2 where S1
-    @test typeintersect(a, b) == typeintersect(b, a)
+    I1 = typeintersect(a, b)
+    I2 = typeintersect(b, a)
+    @test I1 <: I2
+    @test I2 <: I1
+    @test_broken I1 <: a
+    @test_broken I2 <: a
+    @test I1 <: b
+    @test I2 <: b
 end
 let a = Tuple{T1,Val{T2},T2} where T2 where T1,
     b = Tuple{Float64,S1,S2} where S2 where S1
-    @test_broken typeintersect(a, b) <: a
+    I1 = typeintersect(a, b)
+    I2 = typeintersect(b, a)
+    @test I1 <: I2
+    @test I2 <: I1
+    @test_broken I1 <: a
+    @test_broken I2 <: a
+    @test I1 <: b
+    @test I2 <: b
 end
 let a = Tuple{Float64,T1} where T1,
     b = Tuple{S1,Tuple{S1}} where S1
-    @test typeintersect(a, b) <: b
+    I1 = typeintersect(a, b)
+    I2 = typeintersect(b, a)
+    @test I1 <: I2
+    @test I2 <: I1
+    @test I1 <: a
+    @test I2 <: a
+    @test I1 <: b
+    @test I2 <: b
 end
 let a = Tuple{Val{T1},T2,T2} where T2 where T1,
     b = Tuple{Val{Tuple{S2}},S3,Float64} where S2 where S3
@@ -1229,12 +1309,20 @@ end
 let a = Tuple{T1,T2,T2} where T1 where T2,
     b = Tuple{Val{S2},S2,Float64} where S2,
     x = Tuple{Val{Float64},Float64,Float64}
-    @test x <: typeintersect(a, b)
-end
-let a = Val{Tuple{T1,Val{T2},Val{Int64},Tuple{Tuple{T3,5,Float64},T4,T2,T5}}} where T1 where T5 where T4 where T3 where T2,
-    b = Val{Tuple{Tuple{S1,5,Float64},Val{S2},S3,Tuple{Tuple{Val{Float64},5,Float64},2,Float64,S4}}} where S2 where S3 where S1 where S4
-    @test_skip typeintersect(b, a)
-end
+    I1 = typeintersect(a, b)
+    I2 = typeintersect(b, a)
+    @test x <: I1
+    @test x <: I2
+    @test I1 <: I2
+    @test I2 <: I1
+    @test I1 <: a
+    @test I2 <: a
+    @test_broken I1 <: b
+    @test_broken I2 <: b
+end
+@testintersect(Val{Tuple{T1,Val{T2},Val{Int64},Tuple{Tuple{T3,5,Float64},T4,T2,T5}}} where T1 where T5 where T4 where T3 where T2,
+               Val{Tuple{Tuple{S1,5,Float64},Val{S2},S3,Tuple{Tuple{Val{Float64},5,Float64},2,Float64,S4}}} where S2 where S3 where S1 where S4,
+               Val{Tuple{Tuple{S1, 5, Float64}, Val{Float64}, Val{Int64}, Tuple{Tuple{Val{Float64}, 5, Float64}, 2, Float64, T5}}} where {T5, S1})
 
 # issue #20992
 abstract type A20992{T,D,d} end
@@ -1348,6 +1436,9 @@ struct A23764_2{T, N, S} <: AbstractArray{Union{Ref{T}, S}, N}; end
 @test Tuple{A23764_2{T, 1, Nothing} where T} <: Tuple{AbstractArray{T,N}} where {T,N}
 @test Tuple{A23764_2{T, 1, Nothing} where T} <: Tuple{AbstractArray{T,N} where {T,N}}
 
+# issue #50716
+@test !<:(Ref{Vector{Tuple{K}} where K}, Ref{<:Vector{K}} where K)
+
 # issue #26131
 @test !(Vector{Vector{Number}} <: Vector{Union{Vector{Number}, Vector{S}}} where S<:Integer)
 
@@ -1401,6 +1492,8 @@ f24521(::Type{T}, ::Type{T}) where {T} = T
 @test !(Ref{Union{Int64, Val{Number}}} <: Ref{Union{Val{T}, T}} where T)
 @test !(Ref{Union{Ref{Number}, Int64}} <: Ref{Union{Ref{T}, T}} where T)
 @test !(Ref{Union{Val{Number}, Int64}} <: Ref{Union{Val{T}, T}} where T)
+@test !(Val{Ref{Union{Int64, Ref{Number}}}} <: Val{S} where {S<:Ref{Union{Ref{T}, T}} where T})
+@test !(Tuple{Ref{Union{Int64, Ref{Number}}}} <: Tuple{S} where {S<:Ref{Union{Ref{T}, T}} where T})
 
 # issue #26180
 @test !(Ref{Union{Ref{Int64}, Ref{Number}}} <: Ref{Ref{T}} where T)
@@ -1427,7 +1520,7 @@ f26453(x::T,y::T) where {S,T>:S} = 0
 @test f26453(1,2) == 0
 @test f26453(1,"") == 0
 g26453(x::T,y::T) where {S,T>:S} = T
-@test_throws UndefVarError(:T) g26453(1,1)
+@test_throws UndefVarError(:T, :static_parameter) g26453(1,1)
 @test issub_strict((Tuple{T,T} where T), (Tuple{T,T} where {S,T>:S}))
 
 # issue #27632
@@ -1514,7 +1607,7 @@ end
                Tuple{Type{A29955{T,TV,TM}},
                      TM} where {T,TV<:AbstractVector{T},TM<:M29955{T,TV}},
                Tuple{Type{A29955{Float64,Array{Float64,1},TM}},
-                     TM} where TM<:M29955{Float64,Array{Float64,1}})
+                   M29955{Float64,Vector{Float64}}} where TM<:M29955{Float64,Array{Float64,1}})
 let M = M29955{T,Vector{Float64}} where T
     @test M == (M29955{T,Vector{Float64}} where T)
     @test M{Float64} == M29955{Float64,Vector{Float64}}
@@ -1532,9 +1625,9 @@ end
                Tuple{LT,R,I} where LT<:Union{I, R} where R<:Rational{I} where I<:Integer,
                Tuple{LT,Rational{Int},Int} where LT<:Union{Rational{Int},Int})
 
-#@testintersect(Tuple{Any,Tuple{Int},Int},
-#               Tuple{LT,R,I} where LT<:Union{I, R} where R<:Tuple{I} where I<:Integer,
-#               Tuple{LT,Tuple{Int},Int} where LT<:Union{Tuple{Int},Int})
+@testintersect(Tuple{Any,Tuple{Int},Int},
+               Tuple{LT,R,I} where LT<:Union{I, R} where R<:Tuple{I} where I<:Integer,
+               Tuple{LT,Tuple{Int},Int} where LT<:Union{Tuple{Int},Int})
 # fails due to this:
 let U = Tuple{Union{LT, LT1},Union{R, R1},Int} where LT1<:R1 where R1<:Tuple{Int} where LT<:Int where R<:Tuple{Int},
     U2 = Union{Tuple{LT,R,Int} where LT<:Int where R<:Tuple{Int}, Tuple{LT,R,Int} where LT<:R where R<:Tuple{Int}},
@@ -1551,9 +1644,10 @@ end
 # issue #31082 and #30741
 @test typeintersect(Tuple{T, Ref{T}, T} where T,
                     Tuple{Ref{S}, S, S} where S) != Union{}
+# TODO: improve this bound
 @testintersect(Tuple{Pair{B,C},Union{C,Pair{B,C}},Union{B,Real}} where {B,C},
                Tuple{Pair{B,C},C,C} where {B,C},
-               Tuple{Pair{B,C},C,C} where C<:Union{Real, B} where B)
+               Tuple{Pair{B,C}, Union{Pair{B,C},C},Union{Real,B}} where {B,C})
 f31082(::Pair{B, C}, ::Union{C, Pair{B, C}}, ::Union{B, Real}) where {B, C} = 0
 f31082(::Pair{B, C}, ::C, ::C) where {B, C} = 1
 @test f31082(""=>1, 2, 3) == 1
@@ -1712,15 +1806,25 @@ let (_, E) = intersection_env(Tuple{Tuple{Vararg{Int}}}, Tuple{Tuple{Vararg{Int,
     @test !isa(E[1], Type)
 end
 
-# this is is a timing test, so it would fail on debug builds
+# this is a timing test, so it would fail on debug builds
 #let T = Type{Tuple{(Union{Int, Nothing} for i = 1:23)..., Union{String, Nothing}}},
 #    S = Type{T} where T<:Tuple{E, Vararg{E}} where E
 #    @test @elapsed (@test T != S) < 5
 #end
 
 # issue #32386
-@test typeintersect(Type{S} where S<:(Vector{Pair{_A,N} where N} where _A),
-                    Type{Vector{T}} where T) == Type{Vector{Pair{_A,N} where N}} where _A
+@testintersect(Type{S} where S<:(Vector{Pair{_A,N} where N} where _A),
+               Type{Vector{T}} where T,
+               Type{Vector{Pair{_A,N} where N}} where _A)
+
+# pr #49049
+@testintersect(Tuple{Type{Pair{T, A} where {T, A<:Array{T}}}, Int, Any},
+               Tuple{Type{F}, Any, Int} where {F<:(Pair{T, A} where {T, A<:Array{T}})},
+               Tuple{Type{Pair{T, A} where {T, A<:(Array{T})}}, Int, Int})
+
+@testintersect(Type{Ref{Union{Int, Tuple{S,S} where S<:T}}} where T,
+              Type{F} where F<:(Base.RefValue{Union{Int, Tuple{S,S} where S<:T}} where T),
+              Union{})
 
 # issue #32488
 struct S32488{S <: Tuple, T, N, L}
@@ -1779,8 +1883,11 @@ s26065 = Ref{Tuple{T,Ref{Union{Ref{Tuple{Ref{Union{Ref{Ref{Tuple{Ref{Tuple{Union
              Tuple{Type{Tuple{Vararg{V}}}, Tuple{Vararg{V}}} where V)
 
 # issue 36100
-@test NamedTuple{(:a, :b), Tuple{Missing, Union{}}} == NamedTuple{(:a, :b), Tuple{Missing, Union{}}}
-@test Val{Tuple{Missing, Union{}}} === Val{Tuple{Missing, Union{}}}
+@test Pair{(:a, :b), Tuple{Missing, Vararg{Union{},N}} where N} ===
+      Pair{(:a, :b), Tuple{Missing, Vararg{Union{},N}} where N} !=
+      Pair{(:a, :b), Tuple{Missing, Vararg{Union{}}}} === Pair{(:a, :b), Tuple{Missing}}
+@test Val{Tuple{Missing, Vararg{Union{},N}} where N} === Val{Tuple{Missing, Vararg{Union{},N}} where N} !=
+      Val{Tuple{Missing, Vararg{Union{}}}} === Val{Tuple{Missing}}
 
 # issue #36869
 struct F36869{T, V} <: AbstractArray{Union{T, V}, 1}
@@ -1798,40 +1905,31 @@ end
 # issue #38081
 struct AlmostLU{T, S<:AbstractMatrix{T}}
 end
-let X1 = Tuple{AlmostLU, Vector{T}} where T,
-    X2 = Tuple{AlmostLU{S, X} where X<:Matrix, Vector{S}} where S<:Union{Float32, Float64},
-    I = typeintersect(X1, X2)
-    # TODO: the quality of this intersection is not great; for now just test that it
-    # doesn't stack overflow
-    @test I<:X1 || I<:X2
-    actual = Tuple{Union{AlmostLU{S, X} where X<:Matrix{S}, AlmostLU{S, <:Matrix}}, Vector{S}} where S<:Union{Float32, Float64}
-    @test I == actual
-end
+@testintersect(Tuple{AlmostLU, Vector{T}} where T,
+               Tuple{AlmostLU{S, X} where X<:Matrix, Vector{S}} where S<:Union{Float32, Float64},
+               Tuple{AlmostLU{T, X} where X<:Matrix{T}, Vector{T}} where T<:Union{Float32, Float64})
 
-let
-    # issue #22787
-    # for now check that these don't stack overflow
-    t = typeintersect(Tuple{Type{Q}, Q, Ref{Q}} where Q<:Ref,
-                      Tuple{Type{S}, Union{Ref{S}, Ref{R}}, R} where R where S)
-    @test_broken t != Union{}
-    t = typeintersect(Tuple{Type{T}, T, Ref{T}} where T,
-                      Tuple{Type{S}, Ref{S}, S} where S)
-    @test_broken t != Union{}
+# issue #22787
+@testintersect(Tuple{Type{Q}, Q, Ref{Q}} where Q<:Ref,
+               Tuple{Type{S}, Union{Ref{S}, Ref{R}}, R} where R where S,
+               Tuple{Type{Q}, Union{Ref{Q}, Ref{R}}, Ref{Q}} where {Q<:Ref, R}) # likely suboptimal
 
-    # issue #38279
-    t = typeintersect(Tuple{<:Array{T, N}, Val{T}} where {T<:Real, N},
-                      Tuple{<:Array{T, N}, Val{<:AbstractString}}  where {T<:Real, N})
-    @test t == Tuple{<:Array{Union{}, N}, Val{Union{}}} where N
+let t = typeintersect(Tuple{Type{T}, T, Ref{T}} where T,
+                  Tuple{Type{S}, Ref{S}, S} where S)
+    @test_broken t == Tuple{Type{T}, Ref{T}, Ref{T}} where T>:Ref
+    @test t == Tuple{Type{T}, Ref{T}, Ref{T}} where T
 end
 
+# issue #38279
+t = typeintersect(Tuple{<:Array{T, N}, Val{T}} where {T<:Real, N},
+                  Tuple{<:Array{T, N}, Val{<:AbstractString}}  where {T<:Real, N})
+@test t == Tuple{<:Array{Union{}, N}, Val{Union{}}} where N
+
 # issue #36951
 @testintersect(Type{T} where T>:Missing,
                Type{Some{T}} where T,
                Union{})
 
-# issue #24333
-@test_broken (Type{Union{Ref,Cvoid}} <: Type{Union{T,Cvoid}} where T)
-
 # issue #38423
 let
     Either{L, R} = Union{Ref{L}, Val{R}}
@@ -1863,10 +1961,25 @@ end
 # issue #34170
 let A = Tuple{Type{T} where T<:Ref, Ref, Union{T, Union{Ref{T}, T}} where T<:Ref},
     B = Tuple{Type{T}, Ref{T}, Union{Int, Ref{T}, T}} where T
-    I = typeintersect(A,B)
     # this was a case where <: disagreed with === (due to a badly-normalized type)
-    @test I == typeintersect(A,B)
-    @test I == Tuple{Type{T}, Ref{T}, Ref} where T<:Ref
+    I = _type_intersect(B, A)
+    @test_broken I == Union{Tuple{Type{T}, Ref{T}, Ref{T}} where T<:Ref, Tuple{Type{T}, Ref{T}, T} where T<:Ref}
+    @test I == _type_intersect(B, A) == Tuple{Type{T}, Ref{T}, Ref} where T<:Ref
+    I = typeintersect(B, A)
+    @test_broken I == Tuple{Type{T}, Ref{T}, Union{Ref{T}, T}} where T<:Ref
+    @test I == typeintersect(B, A) <: Tuple{Type{T}, Ref{T}, Ref} where T<:Ref
+
+    I = _type_intersect(A, B)
+    @test !Base.has_free_typevars(I)
+    J = Tuple{Type{T1}, Ref{T1}, Ref} where {T, T1<:Union{Ref, Ref{T}}}
+    @test I == _type_intersect(A, B) == J
+    @test_broken I == Tuple{Type{T}, Ref{T}, T1} where {T<:Ref, T1<:Union{T, Ref{T}}} # a better result, == to the result with arguments switched
+
+    I = typeintersect(A, B)
+    @test !Base.has_free_typevars(I)
+    J = Tuple{Type{T1}, Ref{T1}, Ref} where {T, T1<:Union{Ref, Ref{T}}}
+    @test I == typeintersect(A, B) == J
+
 end
 
 # issue #39218
@@ -1895,20 +2008,14 @@ let A = Tuple{Type{<:Union{Number, T}}, Ref{T}} where T,
 end
 
 # issue #39698
-let T = Type{T} where T<:(AbstractArray{I}) where I<:(Base.IteratorsMD.CartesianIndex),
-    S = Type{S} where S<:(Base.IteratorsMD.CartesianIndices{A, B} where B<:Tuple{Vararg{Any, A}} where A)
-    I = typeintersect(T, S)
-    @test_broken I <: T
-    @test I <: S
-    @test_broken I == typeintersect(S, T)
-end
+@testintersect(Type{T} where T<:(AbstractArray{I}) where I<:(Base.IteratorsMD.CartesianIndex),
+    Type{S} where S<:(Base.IteratorsMD.CartesianIndices{A, B} where B<:Tuple{Vararg{Any, A}} where A),
+    Type{S} where {N, S<:(Base.IteratorsMD.CartesianIndices{N, B} where B<:Tuple{Vararg{Any, N}})})
 
 # issue #39948
-let A = Tuple{Array{Pair{T, JT} where JT<:Ref{T}, 1} where T, Vector},
-    I = typeintersect(A, Tuple{Vararg{Vector{T}}} where T)
-    @test I <: A
-    @test !Base.has_free_typevars(I)
-end
+@testintersect(Tuple{Array{Pair{T, JT} where JT<:Ref{T}, 1} where T, Vector},
+    Tuple{Vararg{Vector{T}}} where T,
+    Tuple{Array{Pair{T, JT} where JT<:Ref{T}, 1}, Array{Pair{T, JT} where JT<:Ref{T}, 1}} where T)
 
 # issue #8915
 struct D8915{T<:Union{Float32,Float64}}
@@ -1928,18 +2035,34 @@ let A = Tuple{Ref{T}, Vararg{T}} where T,
     B = Tuple{Ref{U}, Union{Ref{S}, Ref{U}, Int}, Union{Ref{S}, S}} where S where U,
     C = Tuple{Ref{U}, Union{Ref{S}, Ref{U}, Ref{W}}, Union{Ref{S}, W, V}} where V<:AbstractArray where W where S where U
     I = typeintersect(A, B)
+    Ts = (Tuple{Ref{Int}, Int, Int}, Tuple{Ref{Ref{Int}}, Ref{Int}, Ref{Int}})
     @test I != Union{}
-    @test I <: A
+    @test_broken I <: A
     @test I <: B
-    # avoid stack overflow
+    for T in Ts
+        if T <: A && T <: B
+            @test T <: I
+        end
+    end
     J = typeintersect(A, C)
-    @test_broken J != Union{}
+    @test J != Union{}
+    @test_broken J <: A
+    @test J <: C
+    for T in Ts
+        if T <: A && T <: C
+            @test T <: J
+        end
+    end
 end
 
 let A = Tuple{Dict{I,T}, I, T} where T where I,
-    B = Tuple{AbstractDict{I,T}, T, I} where T where I
-    # TODO: we should probably have I == T here
-    @test typeintersect(A, B) == Tuple{Dict{I,T}, I, T} where {I, T}
+    B = Tuple{AbstractDict{I,T}, T, I} where T where I,
+    I = typeintersect(A, B)
+    # TODO: we should probably have something approaching I == T here,
+    # though note something more complex is needed since the intersection must also include types such as;
+    # Tuple{Dict{Integer,Any}, Integer, Int}
+    @test_broken I <: A && I <: B
+    @test I == typeintersect(B, A) == Tuple{Dict{I, T}, Any, Any} where {I, T}
 end
 
 let A = Tuple{UnionAll, Vector{Any}},
@@ -1964,8 +2087,9 @@ let A = Tuple{Any, Type{Ref{_A}} where _A},
     B = Tuple{Type{T}, Type{<:Union{Ref{T}, T}}} where T,
     I = typeintersect(A, B)
     @test I != Union{}
-    # TODO: this intersection result is still too narrow
-    @test_broken Tuple{Type{Ref{Integer}}, Type{Ref{Integer}}} <: I
+    @test Tuple{Type{Ref{Integer}}, Type{Ref{Integer}}} <: I
+    # TODO: this intersection result seems too wide (I == B) ?
+    @test_broken !<:(Tuple{Type{Int}, Type{Int}}, I)
 end
 
 @testintersect(Tuple{Type{T}, T} where T<:(Tuple{Vararg{_A, _B}} where _B where _A),
@@ -1996,3 +2120,453 @@ let T = TypeVar(:T, Real),
     @test !(UnionAll(T, UnionAll(V, UnionAll(T, Type{Pair{T, V}}))) <: UnionAll(T, UnionAll(V, Type{Pair{T, V}})))
     @test !(UnionAll(T, UnionAll(V, UnionAll(T, S))) <: UnionAll(T, UnionAll(V, S)))
 end
+
+# issue #41096
+let C = Val{Val{B}} where {B}
+    @testintersect(Val{<:Union{Missing, Val{false}, Val{true}}}, C, Val{<:Union{Val{true}, Val{false}}})
+    @testintersect(Val{<:Union{Nothing, Val{true}, Val{false}}}, C, Val{<:Union{Val{true}, Val{false}}})
+    @testintersect(Val{<:Union{Nothing, Val{false}}}, C, Val{Val{false}})
+end
+
+#issue #43082
+struct X43082{A, I, B<:Union{Ref{I},I}}; end
+@testintersect(Tuple{X43082{T}, Int} where T, Tuple{X43082{Int}, Any}, Tuple{X43082{Int}, Int})
+
+#issue #36443
+let C = Tuple{Val{3},Int,Int,Int},
+    As = (Tuple{Val{N},Vararg{T,N}} where {T,N},
+          Tuple{Val{N},Vararg{T,N}} where {N,T}),
+    Bs = (Tuple{Val{3},Int,Vararg{T,N}} where {T,N},
+          Tuple{Val{3},Int,Vararg{T,N}} where {N,T},
+          Tuple{Val{3},Int,Vararg{T}} where {T},
+          Tuple{Val{3},Int,Vararg{T,2}} where {T})
+    for A in As, B in Bs
+        @testintersect(A, B, C)
+    end
+end
+
+let A = Tuple{Type{Val{N}},Tuple{Vararg{T,N}} where T} where N,
+    C = Tuple{Type{Val{2}},Tuple{T,T} where T}
+    @testintersect(A, Tuple{Type{Val{2}},Tuple{Vararg{T,N}} where T} where N, C)
+    @testintersect(A, Tuple{Type{Val{2}},Tuple{T,Vararg{T,N}} where T} where N, C)
+    @testintersect(A, Tuple{Type{Val{2}},Tuple{T,T,Vararg{T,N}} where T} where N, C)
+end
+
+let f36443(::NTuple{N}=[(f36443,),(1,2)][2],::Val{N}=Val(2)) where{N} = 0
+    @test f36443() == 0;
+end
+
+let C = Tuple{Val{3},Int,Int,Int,Int},
+    As = (Tuple{Val{N},Int,Vararg{T,N}} where {T,N},
+          Tuple{Val{N},Int,Vararg{T,N}} where {N,T}),
+    Bs = (Tuple{Val{3},Vararg{T,N}} where {T,N},
+          Tuple{Val{3},Vararg{T,N}} where {N,T},
+          Tuple{Val{3},Vararg{T}} where {T})
+    for A in As, B in Bs
+        @testintersect(A, B, C)
+    end
+end
+
+#issue #37257
+let T = Tuple{Val{N}, Any, Any, Vararg{Any,N}} where N,
+    C = Tuple{Val{1}, Any, Any, Any}
+    @testintersect(T, Tuple{Val{1}, Vararg{Any}}, C)
+    @testintersect(T, Tuple{Val{1}, Any, Vararg{Any}}, C)
+    @testintersect(T, Tuple{Val{1}, Any, Any, Vararg{Any}}, C)
+    @testintersect(T, Tuple{Val{1}, Any, Any, Any, Vararg{Any}}, C)
+    @testintersect(T, Tuple{Val{1}, Any, Any, Any, Any, Vararg{Any}}, Union{})
+end
+
+let A = Tuple{NTuple{N,Any},Val{N}} where {N},
+    C = Tuple{NTuple{4,Any},Val{4}}
+    @testintersect(A, Tuple{Tuple{Vararg{Any,N}},Val{4}} where {N}, C)
+    @testintersect(A, Tuple{Tuple{Vararg{Any}},Val{4}}, C)
+    @testintersect(A, Tuple{Tuple{Vararg{Any,N}} where {N},Val{4}}, C)
+
+    @testintersect(A, Tuple{Tuple{Any,Vararg{Any,N}},Val{4}} where {N}, C)
+    @testintersect(A, Tuple{Tuple{Any,Vararg{Any}},Val{4}}, C)
+    @testintersect(A, Tuple{Tuple{Any,Vararg{Any,N}} where {N},Val{4}}, C)
+
+    @testintersect(A, Tuple{Tuple{Any,Any,Any,Any,Any,Vararg{Any,N}},Val{4}} where {N}, Union{})
+    @testintersect(A, Tuple{Tuple{Any,Any,Any,Any,Any,Vararg{Any}},Val{4}}, Union{})
+    @testintersect(A, Tuple{Tuple{Any,Any,Any,Any,Any,Vararg{Any,N}} where {N},Val{4}}, Union{})
+end
+
+#issue #39088
+let
+    a() = c((1,), (1,1,1,1))
+    c(d::NTuple{T}, ::NTuple{T}) where T = d
+    c(d::NTuple{f}, b) where f = c((d..., f), b)
+    j(h::NTuple{T}, ::NTuple{T} = a()) where T = nothing
+    @test j((1,1,1,1)) === nothing
+end
+
+let A = Tuple{NTuple{N, Int}, NTuple{N, Int}} where N,
+    C = Tuple{NTuple{4, Int}, NTuple{4, Int}}
+    @testintersect(A, Tuple{Tuple{Int, Vararg{Any}}, NTuple{4, Int}}, C)
+    @testintersect(A, Tuple{Tuple{Int, Vararg{Any, N}} where {N}, NTuple{4, Int}}, C)
+    @testintersect(A, Tuple{Tuple{Int, Vararg{Any, N}}, NTuple{4, Int}} where {N}, C)
+
+    Bs = (Tuple{Tuple{Int, Vararg{Any}}, Tuple{Int, Int, Vararg{Any}}},
+          Tuple{Tuple{Int, Vararg{Any,N1}}, Tuple{Int, Int, Vararg{Any,N2}}} where {N1,N2},
+          Tuple{Tuple{Int, Vararg{Any,N}} where {N}, Tuple{Int, Int, Vararg{Any,N}} where {N}})
+    C = Tuple{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    for B in Bs
+        @testintersect(A, B, C)
+    end
+    A = Tuple{NTuple{N, Int}, Tuple{Int, Vararg{Int, N}}} where N
+    C = Tuple{Tuple{Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    for B in Bs
+        @testintersect(A, B, C)
+    end
+    A = Tuple{Tuple{Int, Vararg{Int, N}}, NTuple{N, Int}} where N
+    C = Tuple{Tuple{Int, Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    for B in Bs
+        @testintersect(A, B, C)
+    end
+end
+
+let A = Pair{NTuple{N, Int}, NTuple{N, Int}} where N,
+    C = Pair{NTuple{4, Int}, NTuple{4, Int}}
+    @testintersect(A, Pair{<:Tuple{Int, Vararg{Any}}, NTuple{4, Int}}, C)
+    @testintersect(A, Pair{<:Tuple{Int, Vararg{Any, N}} where {N}, NTuple{4, Int}}, C)
+    @testintersect(A, Pair{<:Tuple{Int, Vararg{Any, N}}, NTuple{4, Int}} where {N}, C)
+
+    Bs = (Pair{<:Tuple{Int, Vararg{Int}}, <:Tuple{Int, Int, Vararg{Int}}},
+          Pair{Tuple{Int, Vararg{Int,N1}}, Tuple{Int, Int, Vararg{Int,N2}}} where {N1,N2},
+          Pair{<:Tuple{Int, Vararg{Int,N}} where {N}, <:Tuple{Int, Int, Vararg{Int,N}} where {N}})
+    C = Pair{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Int, Vararg{Int, N}}} where {N}
+    for B in Bs
+        @testintersect(A, B, C)
+    end
+end
+
+# Example from pr#39098
+@testintersect(NTuple, Tuple{Any,Vararg}, Tuple{T, Vararg{T}} where {T})
+
+@testintersect(Val{T} where T<:Tuple{Tuple{Any, Vararg{Any}}},
+               Val{Tuple{Tuple{Vararg{Any, N}}}} where {N},
+               Val{Tuple{Tuple{Any, Vararg{Any, N}}}} where {N})
+
+let A = Pair{NTuple{N, Int}, Val{N}} where N,
+    C = Pair{Tuple{Int, Vararg{Int,N1}}, Val{N2}} where {N1,N2},
+    B = Pair{<:Tuple{Int, Vararg{Int}}, <:Val}
+    @testintersect A B C
+    @testintersect A C C
+end
+
+# issue #49484
+let S = Tuple{Integer, U} where {II<:Array, U<:Tuple{Vararg{II, 1}}}
+    T = Tuple{Int, U} where {II<:Array, U<:Tuple{Vararg{II, 1}}}
+    @testintersect(S, Tuple{Int, U} where {U<:Tuple{Vararg{Any}}}, T)
+    @testintersect(S, Tuple{Int, U} where {N, U<:Tuple{Vararg{Any,N}}}, T)
+    @testintersect(S, Tuple{Int, U} where {U<:Tuple{Any,Vararg{Any}}}, T)
+    @testintersect(S, Tuple{Int, U} where {N, U<:Tuple{Any,Vararg{Any,N}}}, T)
+    @testintersect(S, Tuple{Int, U} where {U<:Tuple{Any,Any,Vararg{Any}}}, Union{})
+    @testintersect(S, Tuple{Int, U} where {N, U<:Tuple{Any,Any,Vararg{Any,N}}}, Union{})
+end
+
+# issue #43064
+let
+    env_tuple(@nospecialize(x), @nospecialize(y)) = (intersection_env(x, y)[2]...,)
+    all_var(x::UnionAll) = (x.var, all_var(x.body)...)
+    all_var(x::DataType) = ()
+    TT0 = Tuple{Type{T},Union{Real,Missing,Nothing}} where {T}
+    TT1 = Union{Type{Int8},Type{Int16}}
+    @test env_tuple(Tuple{TT1,Missing}, TT0) ===
+          env_tuple(Tuple{TT1,Nothing}, TT0) ===
+          env_tuple(Tuple{TT1,Int}, TT0) === all_var(TT0)
+
+    TT0 = Tuple{T1,T2,Union{Real,Missing,Nothing}} where {T1,T2}
+    TT1 = Tuple{T1,T2,Union{Real,Missing,Nothing}} where {T2,T1}
+    TT2 = Tuple{Union{Int,Int8},Union{Int,Int8},Int}
+    TT3 = Tuple{Int,Union{Int,Int8},Int}
+    @test env_tuple(TT2, TT0) === all_var(TT0)
+    @test env_tuple(TT2, TT1) === all_var(TT1)
+    @test env_tuple(TT3, TT0) === Base.setindex(all_var(TT0), Int, 1)
+    @test env_tuple(TT3, TT1) === Base.setindex(all_var(TT1), Int, 2)
+
+    TT0 = Tuple{T1,T2,T1,Union{Real,Missing,Nothing}} where {T1,T2}
+    TT1 = Tuple{T1,T2,T1,Union{Real,Missing,Nothing}} where {T2,T1}
+    TT2 = Tuple{Int,Union{Int,Int8},Int,Int}
+    @test env_tuple(TT2, TT0) === Base.setindex(all_var(TT0), Int, 1)
+    @test env_tuple(TT2, TT1) === Base.setindex(all_var(TT1), Int, 2)
+end
+
+#issue #46735
+T46735{B<:Real} = Pair{<:Union{B, Val{<:B}}, <:Union{AbstractMatrix{B}, AbstractMatrix{Vector{B}}}}
+@testintersect(T46735{B} where {B}, T46735, !Union{})
+@testintersect(T46735{B} where {B<:Integer}, T46735, !Union{})
+S46735{B<:Val, M<:AbstractMatrix} = Tuple{<:Union{B, <:Val{<:B}},M,<:(Union{AbstractMatrix{B}, AbstractMatrix{<:Vector{<:B}}})}
+@testintersect(S46735{B} where {B}, S46735, !Union{})
+@testintersect(S46735{B, M} where {B, M}, S46735, !Union{})
+A46735{B<:Val, M<:AbstractMatrix} = Tuple{<:Union{B, <:Val{<:B}},M,Union{AbstractMatrix{B}, AbstractMatrix{<:Vector{<:B}}}}
+@testintersect(A46735{B} where {B}, A46735, !Union{})
+@testintersect(A46735{B, M} where {B, M}, A46735, !Union{})
+
+#issue #46871 #38497
+struct A46871{T, N, M} <: AbstractArray{T, N} end
+struct B46871{T, N} <: Ref{A46871{T, N, N}} end
+for T in (B46871{Int, N} where {N}, B46871{Int}) # intentional duplication
+    @testintersect(T, Ref{<:AbstractArray{<:Real, 3}}, B46871{Int, 3})
+end
+abstract type C38497{e,g<:Tuple,i} end
+struct Q38497{o,e<:NTuple{o},g} <: C38497{e,g,Array{o}} end
+@testintersect(Q38497{<:Any, Tuple{Int}}, C38497, Q38497{<:Any, Tuple{Int}, <:Tuple})
+# n.b. the only concrete instance of this type is Q38497{1, Tuple{Int}, <:Tuple} (since NTuple{o} also adds an ::Int constraint)
+# but this abstract type is also part of the intersection abstractly
+
+abstract type X38497{T<:Number} end
+abstract type Y38497{T>:Integer} <: X38497{T} end
+struct Z38497{T>:Int} <: Y38497{T} end
+@testintersect(Z38497, X38497, Z38497{T} where Int<:T<:Number)
+@testintersect(Z38497, Y38497, Z38497{T} where T>:Integer)
+@testintersect(X38497, Y38497, Y38497{T} where Integer<:T<:Number)
+
+#issue #33138
+@test Vector{Vector{Tuple{T,T}} where Int<:T<:Int} <: Vector{Vector{Tuple{S1,S1} where S<:S1<:S}} where S
+
+#issue #46970
+@test only(intersection_env(Union{S, Matrix{Int}} where S<:Matrix, Matrix)[2]) isa TypeVar
+T46784{B<:Val, M<:AbstractMatrix} = Tuple{<:Union{B, <:Val{<:B}}, M, Union{AbstractMatrix{B}, AbstractMatrix{<:Vector{<:B}}}}
+@testintersect(T46784{T,S} where {T,S}, T46784, !Union{})
+@test T46784 <: T46784{T,S} where {T,S}
+
+#issue 36185
+let S = Tuple{Type{T},Array{Union{T,Missing},N}} where {T,N},
+    T = Tuple{Type{T},Array{Union{T,Nothing},N}} where {T,N}
+    @testintersect(S, T, !Union{})
+    @test_broken typeintersect(S, T) != S
+    @test_broken typeintersect(T, S) != T
+end
+
+#issue 46736
+let S = Tuple{Val{T}, T} where {S1,T<:Val{Union{Nothing,S1}}},
+    T = Tuple{Val{Val{Union{Nothing, S2}}}, Any} where S2
+    @testintersect(S, T, !Union{})
+    # not ideal (`S1` should be unbounded)
+    @test_broken testintersect(S, T) == Tuple{Val{Val{Union{Nothing, S1}}}, Val{Union{Nothing, S1}}} where S1<:(Union{Nothing, S2} where S2)
+end
+
+#issue #47874:case1
+let S1 = Tuple{Int, Any, Union{Val{C1}, C1}} where {R1<:Real, C1<:Union{Complex{R1}, R1}},
+    S2 = Tuple{Int, Any, Union{Val{C1}, C1} where {R1<:Real, C1<:Union{Complex{R1}, R1}}},
+    T1 = Tuple{Any, Int, Union{Val{C2}, C2}} where {R2<:Real, C2<:Union{Complex{R2}, R2}},
+    T2 = Tuple{Any, Int, V} where {R2<:Real, C2<:Union{Complex{R2}, R2}, V<:Union{Val{C2}, C2}}
+    for S in (S1, S2), T in (T1, T2)
+        @testintersect(S, T, !Union{})
+    end
+end
+
+#issue #47874:case2
+let S = Tuple{Int, Vararg{Val{C} where C<:Union{Complex{R}, R}}} where R
+    T = Tuple{Any, Vararg{Val{C} where C<:Union{Complex{R}, R}}} where R<:Real
+    I  = Tuple{Any, Vararg{Val{C} where C<:Union{Complex{R}, R}}} where R<:Real
+    @testintersect(S, T, !Union{})
+    @test_broken typeintersect(S, T) == I
+    @test_broken typeintersect(T, S) == I
+end
+
+#issue #47874:case3
+let S = Tuple{Int, Tuple{Vararg{Val{C1} where C1<:Union{Complex{R1}, R1}}} where R1<:(Union{Real, V1} where V1), Tuple{Vararg{Val{C2} where C2<:Union{Complex{R2}, Complex{R3}, R3}}} where {R2<:(Union{Real, V2} where V2), R3<:Union{Complex{R2}, Real, R2}}},
+    T = Tuple{Any, Tuple{Vararg{Val{CC1} where CC1<:Union{Complex{R}, R}}}, Tuple{Vararg{Val{CC2} where CC2<:Union{Complex{R}, R}}}} where R<:Real
+    @testintersect(S, T, !Union{})
+end
+
+let S = Tuple{T2, V2} where {T2, N2, V2<:(Array{S2, N2} where {S2 <: T2})},
+    T = Tuple{V1, T1} where {T1, N1, V1<:(Array{S1, N1} where {S1 <: T1})}
+    @testintersect(S, T, !Union{})
+end
+
+# A simple case which has a small local union.
+# make sure the env is not widened too much when we intersect(Int8, Int8).
+struct T48006{A1,A2,A3} end
+@testintersect(Tuple{T48006{Float64, Int, S1}, Int} where {F1<:Real, S1<:Union{Int8, Val{F1}}},
+               Tuple{T48006{F2, I, S2}, I} where {F2<:Real, I<:Int, S2<:Union{Int8, Val{F2}}},
+               Tuple{T48006{Float64, Int, S1}, Int} where S1<:Union{Val{Float64}, Int8})
+
+f48167(::Type{Val{L2}}, ::Type{Union{Val{L1}, Set{R}}}) where {L1, R, L2<:L1} = 1
+f48167(::Type{Val{L1}}, ::Type{Union{Val{L2}, Set{R}}}) where {L1, R, L2<:L1} = 2
+f48167(::Type{Val{L}}, ::Type{Union{Val{L}, Set{R}}}) where {L, R} = 3
+@test f48167(Val{Nothing}, Union{Val{Nothing}, Set{Int}}) == 3
+
+# https://github.com/JuliaLang/julia/pull/31167#issuecomment-1358381818
+let S = Tuple{Type{T1}, T1, Val{T1}} where T1<:(Val{S1} where S1<:Val),
+    T = Tuple{Union{Type{T2}, Type{S2}}, Union{Val{T2}, Val{S2}}, Union{Val{T2}, S2}} where T2<:Val{A2} where A2 where S2<:Val
+    I1 = typeintersect(S, T)
+    I2 = typeintersect(T, S)
+    @test I1 !== Union{} && I2 !== Union{}
+    @test_broken I1 <: S
+    @test_broken I2 <: T
+    @test_broken I2 <: S
+    @test_broken I2 <: T
+end
+
+#issue 44395
+@testintersect(Tuple{Type{T}, T} where {T <: Vector{Union{T, R}} where {R<:Real, T<:Real}},
+               Tuple{Type{Vector{Union{T, R}}}, Matrix{Union{T, R}}} where {R<:Real, T<:Real},
+               Union{})
+
+#issue 26487
+@testintersect(Tuple{Type{Tuple{T,Val{T}}}, Val{T}} where T,
+               Tuple{Type{Tuple{Val{T},T}}, Val{T}} where T,
+               Union{})
+
+@test only(intersection_env(Val{Union{Val{Val{T}} where {T},Int}}, Val{Union{T,Int}} where T)[2]) === Val{Val{T}} where {T}
+
+# issue 47654
+Vec47654{T} = Union{AbstractVector{T}, AbstractVector{Union{T,Nothing}}}
+struct Wrapper47654{T, V<:Vec47654{T}}
+    v::V
+end
+abstract type P47654{A} end
+@test Wrapper47654{P47654, Vector{Union{P47654,Nothing}}} <: Wrapper47654
+
+@testset "known subtype/intersect issue" begin
+    #issue 45874
+    let S = Pair{Val{P}, AbstractVector{<:Union{P,<:AbstractMatrix{P}}}} where P,
+        T = Pair{Val{R}, AbstractVector{<:Union{P,<:AbstractMatrix{P}}}} where {P,R}
+        @test S <: T
+    end
+
+    #issue 41561
+    @test_broken typeintersect(Tuple{Vector{VT}, Vector{VT}} where {N1, VT<:AbstractVector{N1}},
+                Tuple{Vector{VN} where {N, VN<:AbstractVector{N}}, Vector{Vector{Float64}}}) !== Union{}
+    #issue 40865
+    @test Tuple{Set{Ref{Int}}, Set{Ref{Int}}} <: Tuple{Set{KV}, Set{K}} where {K,KV<:Union{K,Ref{K}}}
+    @test Tuple{Set{Val{Int}}, Set{Val{Int}}} <: Tuple{Set{KV}, Set{K}} where {K,KV<:Union{K,Val{K}}}
+
+    #issue 39099
+    A = Tuple{Tuple{Int, Int, Vararg{Int, N}}, Tuple{Int, Vararg{Int, N}}, Tuple{Vararg{Int, N}}} where N
+    B = Tuple{NTuple{N, Int}, NTuple{N, Int}, NTuple{N, Int}} where N
+    @test_broken !(A <: B)
+
+    #issue 35698
+    @test_broken typeintersect(Type{Tuple{Array{T,1} where T}}, UnionAll) != Union{}
+
+    #issue 33137
+    @test_broken (Tuple{Q,Int} where Q<:Int) <: Tuple{T,T} where T
+
+    # issue 24333
+    @test (Type{Union{Ref,Cvoid}} <: Type{Union{T,Cvoid}} where T)
+
+    # issue 22123
+    t1 = Ref{Ref{Ref{Union{Int64, T}}} where T}
+    t2 = Ref{Ref{Ref{Union{T, S}}} where T} where S
+    @test t1 <: t2
+
+    # issue 21153
+    @test_broken (Tuple{T1,T1} where T1<:(Val{T2} where T2)) <: (Tuple{Val{S},Val{S}} where S)
+end
+
+# issue #47658
+let T = Ref{NTuple{8, Ref{Union{Int, P}}}} where P,
+    S = Ref{NTuple{8, Ref{Union{Int, P}}}} where P
+    # note T and S are identical but we need 2 copies to avoid being fooled by pointer equality
+    @test T <: Union{Int, S}
+end
+
+# try to fool a greedy algorithm that picks X=Int, Y=String here
+@test Tuple{Ref{Union{Int,String}}, Ref{Union{Int,String}}} <: Tuple{Ref{Union{X,Y}}, Ref{X}} where {X,Y}
+@test Tuple{Ref{Union{Int,String,Missing}}, Ref{Union{Int,String}}} <: Tuple{Ref{Union{X,Y}}, Ref{X}} where {X,Y}
+
+@test !(Tuple{Any, Any, Any} <: Tuple{Any, Vararg{T}} where T)
+
+# issue #39967
+@test (NTuple{27, T} where {S, T<:Union{Array, Array{S}}}) <: Tuple{Array, Array, Vararg{AbstractArray, 25}}
+
+abstract type MyAbstract47877{C}; end
+struct MyType47877{A,B} <: MyAbstract47877{A} end
+let A = Tuple{Type{T}, T} where T,
+    B = Tuple{Type{MyType47877{W, V} where V<:Union{Base.BitInteger, MyAbstract47877{W}}}, MyAbstract47877{<:Base.BitInteger}} where W
+    C = Tuple{Type{MyType47877{W, V} where V<:Union{MyAbstract47877{W}, Base.BitInteger}}, MyType47877{W, V} where V<:Union{MyAbstract47877{W}, Base.BitInteger}} where W<:Base.BitInteger
+    # ensure that merge_env for innervars does not blow up (the large Unions ensure this will take excessive memory if it does)
+    @testintersect(A, B, C)
+end
+
+let
+    a = (isodd(i) ? Pair{Char, String} : Pair{String, String} for i in 1:2000)
+    @test Tuple{Type{Pair{Union{Char, String}, String}}, a...} <: Tuple{Type{Pair{K, V}}, Vararg{Pair{A, B} where B where A}} where V where K
+    a = (isodd(i) ? Matrix{Int} : Vector{Int} for i in 1:4000)
+    @test Tuple{Type{Pair{Union{Char, String}, String}}, a...,} <: Tuple{Type{Pair{K, V}}, Vararg{Array}} where V where K
+end
+
+#issue 48582
+@test !<:(Tuple{Pair{<:T,<:T}, Val{S} where {S}} where {T<:Base.BitInteger},
+          Tuple{Pair{<:T,<:T}, Val{Int}} where {T<:Base.BitInteger})
+
+struct T48695{T, N, H<:AbstractArray} <: AbstractArray{Union{Missing, T}, N} end
+struct S48695{T, N, H<:AbstractArray{T, N}} <: AbstractArray{T, N} end
+let S = Tuple{Type{S48695{T, 2, T48695{B, 2, C}}} where {T<:(Union{Missing, A} where A), B, C}, T48695{T, 2} where T},
+    T = Tuple{Type{S48695{T, N, H}}, H} where {T, N, H<:AbstractArray{T, N}}
+    V = typeintersect(S, T)
+    vars_in_unionall(s) = s isa UnionAll ? (s.var, vars_in_unionall(s.body)...) : ()
+    @test V != Union{}
+    @test allunique(vars_in_unionall(V))
+    @test typeintersect(V, T) != Union{}
+end
+
+#issue 48961
+@test !<:(Type{Union{Missing, Int}}, Type{Union{Missing, Nothing, Int}})
+
+#issue 49127
+struct F49127{m,n} <: Function end
+let a = [TypeVar(:V, Union{}, Function) for i in 1:32]
+    b = a[1:end-1]
+    S = foldr((v, d) -> UnionAll(v, d), a; init = foldl((i, j) -> F49127{i, j}, a))
+    T = foldr((v, d) -> UnionAll(v, d), b; init = foldl((i, j) -> F49127{i, j}, b))
+    @test S <: T
+end
+
+# requires assertions enabled (to test union-split in `obviously_disjoint`)
+@test !<:(Tuple{Type{Int}, Int}, Tuple{Type{Union{Int, T}}, T} where T<:Union{Int8,Int16})
+@test <:(Tuple{Type{Int}, Int}, Tuple{Type{Union{Int, T}}, T} where T<:Union{Int8,Int})
+
+#issue #49354 (requires assertions enabled)
+@test !<:(Tuple{Type{Union{Int, Val{1}}}, Int}, Tuple{Type{Union{Int, T1}}, T1} where T1<:Val)
+@test !<:(Tuple{Type{Union{Int, Val{1}}}, Int}, Tuple{Type{Union{Int, T1}}, T1} where T1<:Union{Val,Pair})
+@test <:(Tuple{Type{Union{Int, Val{1}}}, Int}, Tuple{Type{Union{Int, T1}}, T1} where T1<:Union{Integer,Val})
+@test <:(Tuple{Type{Union{Int, Int8}}, Int}, Tuple{Type{Union{Int, T1}}, T1} where T1<:Integer)
+@test !<:(Tuple{Type{Union{Pair{Int, Any}, Pair{Int, Int}}}, Pair{Int, Any}},
+          Tuple{Type{Union{Pair{Int, Any}, T1}}, T1} where T1<:(Pair{T,T} where {T}))
+
+let A = Tuple{Type{T}, T, Val{T}} where T,
+    B = Tuple{Type{S}, Val{S}, Val{S}} where S
+    @test_broken typeintersect(A, B) == Tuple{Type{T}, Val{T}, Val{T}} where T>:Val
+    @test typeintersect(A, B) <: Tuple{Type{T}, Val{T}, Val{T}} where T
+end
+let A = Tuple{Type{T}, T, Val{T}} where T<:Val,
+    B = Tuple{Type{S}, Val{S}, Val{S}} where S<:Val
+    @test_broken typeintersect(A, B) == Tuple{Type{Val}, Val{Val}, Val{Val}}
+    @test typeintersect(A, B) <: Tuple{Type{T}, Val{T}, Val{T}} where T<:Val
+end
+let A = Tuple{Type{T}, T, Val{T}} where T<:Val,
+    B = Tuple{Type{S}, Val{S}, Val{S}} where S<:Val{A} where A
+    @test typeintersect(A, B) == Union{}
+end
+let A = Tuple{Type{T}, T, Val{T}} where T<:Val{<:Val},
+    B = Tuple{Type{S}, Val{S}, Val{S}} where S<:Val
+    @test_broken typeintersect(A, B) == Tuple{Type{Val{<:Val}}, Val{Val{<:Val}}, Val{Val{<:Val}}}
+    @test typeintersect(A, B) <: Tuple{Type{T}, Val{T}, Val{T}} where T<:(Val{<:Val})
+end
+let T = Tuple{Union{Type{T}, Type{S}}, Union{Val{T}, Val{S}}, Union{Val{T}, S}} where T<:Val{A} where A where S<:Val,
+    S = Tuple{Type{T}, T, Val{T}} where T<:(Val{S} where S<:Val)
+    # optimal = Union{}?
+    @test typeintersect(T, S) == Tuple{Type{A}, Union{Val{A}, Val{S} where S<:Union{Val, A}, Val{x} where x<:Val, Val{x} where x<:Union{Val, A}}, Val{A}} where A<:(Val{S} where S<:Val)
+    @test typeintersect(S, T) == Tuple{Type{T}, Union{Val{T}, Val{S}}, Val{T}} where {T<:Val, S<:(Union{Val{A}, Val} where A)}
+end
+
+#issue #49857
+@test !<:(Type{Vector{Union{Base.BitInteger, Base.IEEEFloat, StridedArray, Missing, Nothing, Val{T}}}} where {T}, Type{Array{T}} where {T})
+
+#issue 50195
+let a = Tuple{Type{X} where X<:Union{Nothing, Val{X1} where {X4, X1<:(Pair{X2, Val{X2}} where X2<:Val{X4})}}},
+    b = Tuple{Type{Y} where Y<:(Val{Y1} where {Y4<:Src, Y1<:(Pair{Y2, Val{Y2}} where Y2<:Union{Val{Y4}, Y4})})} where Src
+    @test typeintersect(a, b) <: Any
+end
+
+#issue 50195
+let a = Tuple{Union{Nothing, Type{Pair{T1}} where T1}}
+    b = Tuple{Type{X2} where X2<:(Pair{T2, Y2} where {Src, Z2<:Src, Y2<:Union{Val{Z2}, Z2}})} where T2
+    @test !Base.has_free_typevars(typeintersect(a, b))
+end
diff --git a/test/syntax.jl b/test/syntax.jl
index 3d306e8c2d780..4d204f3e29364 100644
--- a/test/syntax.jl
+++ b/test/syntax.jl
@@ -3,23 +3,31 @@
 # tests for parser and syntax lowering
 
 using Random
+using Base: remove_linenums!
 
-import Base.Meta.ParseError
+using_JuliaSyntax = parentmodule(Core._parse) != Core.Compiler
 
-function parseall(str)
-    pos = firstindex(str)
-    exs = []
-    while pos <= lastindex(str)
-        ex, pos = Meta.parse(str, pos)
-        push!(exs, ex)
-    end
-    if length(exs) == 0
-        throw(ParseError("end of input"))
-    elseif length(exs) == 1
-        return exs[1]
+macro test_parseerror(str, msg)
+    if using_JuliaSyntax
+        # Diagnostics are tested separately in JuliaSyntax
+        ex = :(@test_throws Meta.ParseError Meta.parse($(esc(str))))
     else
-        return Expr(:block, exs...)
+        ex = :(@test_throws Meta.ParseError($(esc(msg))) Meta.parse($(esc(str))))
     end
+    ex.args[2] = __source__
+    return ex
+end
+
+macro test_parseerror(str)
+    ex = :(@test_throws Meta.ParseError Meta.parse($(esc(str))))
+    ex.args[2] = __source__
+    return ex
+end
+
+function parseall_nolines(str)
+    ex = Meta.parseall(str)
+    filter!(e->!(e isa LineNumberNode), ex.args)
+    return ex
 end
 
 # issue #9684
@@ -38,13 +46,8 @@ end
 
 # issue #9704
 let a = :a
-    @test :(try
-            catch $a
-            end) == :(try
-                      catch a
-                      end)
-    @test :(module $a end) == :(module a
-                                end)
+    @test :(try catch $a end) == :(try catch a end)
+    @test :(module $a end) == :(module a end)
 end
 
 # string literals
@@ -64,19 +67,19 @@ macro test999_str(args...); args; end
 @test test999"foo"123 == ("foo", 123)
 
 # issue #5997
-@test_throws ParseError Meta.parse(": x")
-@test_throws ParseError Meta.parse("""begin
+@test_parseerror ": x"
+@test_parseerror """begin
     :
-    x""")
-@test_throws ParseError Meta.parse("d[: 2]")
+    x"""
+@test_parseerror "d[: 2]"
 
 # issue #6770
-@test_throws ParseError Meta.parse("x.3")
+@test_parseerror "x.3"
 
 # issue #8763
-@test_throws ParseError Meta.parse("sqrt(16)2")
-@test_throws ParseError Meta.parse("x' y")
-@test_throws ParseError Meta.parse("x 'y")
+@test_parseerror "sqrt(16)2"
+@test_parseerror "x' y"
+@test_parseerror "x 'y"
 @test Meta.parse("x'y") == Expr(:call, :*, Expr(Symbol("'"), :x), :y)
 
 # issue #18851
@@ -88,22 +91,22 @@ macro test999_str(args...); args; end
 @test Meta.parse("-2(m)") == Expr(:call, :*, -2, :m)
 
 # issue #8301
-@test_throws ParseError Meta.parse("&*s")
+@test_parseerror "&*s"
 
 # issue #10677
-@test_throws ParseError Meta.parse("/1")
-@test_throws ParseError Meta.parse("/pi")
+@test_parseerror "/1"
+@test_parseerror "/pi"
 @test Meta.parse("- = 2") == Expr(:(=), :(-), 2)
 @test Meta.parse("/ = 2") == Expr(:(=), :(/), 2)
-@test_throws ParseError Meta.parse("< : 2")
-@test_throws ParseError Meta.parse("+ : 2")
-@test_throws ParseError Meta.parse("< :2")
+@test_parseerror "< : 2"
+@test_parseerror "+ : 2"
+@test_parseerror "< :2"
 @test Meta.parse("+ :2") == Expr(:call, :(+), QuoteNode(2))
 
 # issue #10900
-@test_throws ParseError Meta.parse("+=")
-@test_throws ParseError Meta.parse(".")
-@test_throws ParseError Meta.parse("...")
+@test_parseerror "+="
+@test_parseerror "."
+@test_parseerror "..."
 
 # issue #10901
 @test Meta.parse("/([1], 1)[1]") == :(([1] / 1)[1])
@@ -156,35 +159,35 @@ macro test999_str(args...); args; end
               Expr(:., Expr(:$, :c), Expr(:$, :d))))
 
 # fix pr #11338 and test for #11497
-@test parseall("using \$\na") == Expr(:block, Expr(:using, Expr(:., :$)), :a)
-@test parseall("using \$,\na") == Expr(:using, Expr(:., :$), Expr(:., :a))
-@test parseall("using &\na") == Expr(:block, Expr(:using, Expr(:., :&)), :a)
+@test parseall_nolines("using \$\na") == Expr(:toplevel, Expr(:using, Expr(:., :$)), :a)
+@test parseall_nolines("using \$,\na") == Expr(:toplevel, Expr(:using, Expr(:., :$), Expr(:., :a)))
+@test parseall_nolines("using &\na") == Expr(:toplevel, Expr(:using, Expr(:., :&)), :a)
 
-@test parseall("a = &\nb") == Expr(:block, Expr(:(=), :a, :&), :b)
-@test parseall("a = \$\nb") == Expr(:block, Expr(:(=), :a, :$), :b)
-@test parseall(":(a = &\nb)") == Expr(:quote, Expr(:(=), :a, Expr(:&, :b)))
-@test parseall(":(a = \$\nb)") == Expr(:quote, Expr(:(=), :a, Expr(:$, :b)))
+@test parseall_nolines("a = &\nb") == Expr(:toplevel, Expr(:(=), :a, :&), :b)
+@test parseall_nolines("a = \$\nb") == Expr(:toplevel, Expr(:(=), :a, :$), :b)
+@test parseall_nolines(":(a = &\nb)") == Expr(:toplevel, Expr(:quote, Expr(:(=), :a, Expr(:&, :b))))
+@test parseall_nolines(":(a = \$\nb)") == Expr(:toplevel, Expr(:quote, Expr(:(=), :a, Expr(:$, :b))))
 
 # issue 12027 - short macro name parsing vs _str suffix
-@test parseall("""
-    macro f(args...) end; @f "macro argument"
+@test parseall_nolines("""
+    macro f(args...) end\n@f "macro argument"
 """) == Expr(:toplevel,
              Expr(:macro, Expr(:call, :f, Expr(:..., :args)),
                   Expr(:block, LineNumberNode(1, :none), LineNumberNode(1, :none))),
-             Expr(:macrocall, Symbol("@f"), LineNumberNode(1, :none), "macro argument"))
+             Expr(:macrocall, Symbol("@f"), LineNumberNode(2, :none), "macro argument"))
 
 # blocks vs. tuples
 @test Meta.parse("()") == Expr(:tuple)
 @test Meta.parse("(;)") == Expr(:tuple, Expr(:parameters))
 @test Meta.parse("(;;)") == Expr(:block)
 @test Meta.parse("(;;;;)") == Expr(:block)
-@test_throws ParseError Meta.parse("(,)")
-@test_throws ParseError Meta.parse("(;,)")
-@test_throws ParseError Meta.parse("(,;)")
+@test_parseerror "(,)"
+@test_parseerror "(;,)"
+@test_parseerror "(,;)"
 # TODO: would be nice to make these errors, but needed to parse e.g. `(x;y,)->x`
-#@test_throws ParseError Meta.parse("(1;2,)")
-#@test_throws ParseError Meta.parse("(1;2,;)")
-#@test_throws ParseError Meta.parse("(1;2,;3)")
+#@test_parseerror "(1;2,)"
+#@test_parseerror "(1;2,;)"
+#@test_parseerror "(1;2,;3)"
 @test Meta.parse("(x;)") == Expr(:block, :x)
 @test Meta.parse("(;x)") == Expr(:tuple, Expr(:parameters, :x))
 @test Meta.parse("(;x,)") == Expr(:tuple, Expr(:parameters, :x))
@@ -201,7 +204,7 @@ macro test999_str(args...); args; end
 @test Meta.parse("(x,a;y=1)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :y, 1)), :x, :a)
 @test Meta.parse("(x,a;y=1,z=2)") == Expr(:tuple, Expr(:parameters, Expr(:kw,:y,1), Expr(:kw,:z,2)), :x, :a)
 @test Meta.parse("(a=1, b=2)") == Expr(:tuple, Expr(:(=), :a, 1), Expr(:(=), :b, 2))
-@test_throws ParseError Meta.parse("(1 2)") # issue #15248
+@test_parseerror "(1 2)" # issue #15248
 
 @test Meta.parse("f(x;)") == Expr(:call, :f, Expr(:parameters), :x)
 
@@ -272,13 +275,16 @@ end
 @test_throws BoundsError Meta.parse("x = 1", 7)
 
 # issue #14683
-@test_throws ParseError Meta.parse("'\\A\"'")
+@test_parseerror "'\\A\"'"
 @test Meta.parse("'\"'") == Meta.parse("'\\\"'") == '"' == "\""[1] == '\42'
 
 # issue #24558
 @test '\u2200' == "\u2200"[1]
 
-@test_throws ParseError Meta.parse("f(2x for x=1:10, y")
+if !using_JuliaSyntax
+    # This should be Expr(:incomplete)
+    @test_parseerror "f(2x for x=1:10, y"
+end
 
 # issue #15223
 call0(f) = f()
@@ -314,11 +320,6 @@ let p = 15
     @test 2p+1 == 31  # not a hex float literal
 end
 
-macro test_parseerror(str, msg)
-    ex = :(@test_throws ParseError($(esc(msg))) Meta.parse($(esc(str))))
-    ex.args[2] = __source__
-    return ex
-end
 @test_parseerror("0x", "invalid numeric constant \"0x\"")
 @test_parseerror("0b", "invalid numeric constant \"0b\"")
 @test_parseerror("0o", "invalid numeric constant \"0o\"")
@@ -326,9 +327,8 @@ end
 @test_parseerror("0x1.0p", "invalid numeric constant \"0x1.0\"")
 
 # issue #15798
-@test Meta.lower(Main, Base.parse_input_line("""
-              try = "No"
-           """)) == Expr(:error, "unexpected \"=\"")
+# lowering preserves Expr(:error)
+@test Meta.lower(Main, Expr(:error, "no")) == Expr(:error, "no")
 
 # issue #19861 make sure macro-expansion happens in the newest world for top-level expression
 @test eval(Base.parse_input_line("""
@@ -372,9 +372,9 @@ add_method_to_glob_fn!()
 @test f15844(Int64(1)) == 3
 
 # issue #15661
-@test_throws ParseError Meta.parse("function catch() end")
-@test_throws ParseError Meta.parse("function end() end")
-@test_throws ParseError Meta.parse("function finally() end")
+@test_parseerror "function catch() end"
+@test_parseerror "function end() end"
+@test_parseerror "function finally() end"
 
 # PR #16170
 @test Meta.lower(Main, Meta.parse("true(x) = x")) == Expr(:error, "invalid function name \"true\"")
@@ -425,18 +425,18 @@ end
                                 :y))
 
 # test that pre 0.5 deprecated syntax is a parse error
-@test_throws ParseError Meta.parse("Int [1,2,3]")
-@test_throws ParseError Meta.parse("Int [x for x in 1:10]")
-@test_throws ParseError Meta.parse("foo (x) = x")
-@test_throws ParseError Meta.parse("foo {T<:Int}(x::T) = x")
+@test_parseerror "Int [1,2,3]"
+@test_parseerror "Int [x for x in 1:10]"
+@test_parseerror "foo (x) = x"
+@test_parseerror "foo {T<:Int}(x::T) = x"
 
-@test_throws ParseError Meta.parse("Foo .bar")
+@test_parseerror "Foo .bar"
 
-@test_throws ParseError Meta.parse("import x .y")
-@test_throws ParseError Meta.parse("using x .y")
+@test_parseerror "import x .y"
+@test_parseerror "using x .y"
 
-@test_throws ParseError Meta.parse("--x")
-@test_throws ParseError Meta.parse("stagedfunction foo(x); end")
+@test_parseerror "--x"
+@test_parseerror "stagedfunction foo(x); end"
 
 @test Meta.parse("A=>B") == Expr(:call, :(=>), :A, :B)
 
@@ -452,7 +452,7 @@ end
 @test Meta.parse("[a,;c]")   == Expr(:vect, Expr(:parameters, :c), :a)
 @test Meta.parse("a[b,c;d]") == Expr(:ref, :a, Expr(:parameters, :d), :b, :c)
 @test Meta.parse("a[b,;d]")  == Expr(:ref, :a, Expr(:parameters, :d), :b)
-@test_throws ParseError Meta.parse("[a,;,b]")
+@test_parseerror "[a,;,b]"
 @test Meta.parse("{a,b;c}")  == Expr(:braces, Expr(:parameters, :c), :a, :b)
 @test Meta.parse("{a,;c}")   == Expr(:braces, Expr(:parameters, :c), :a)
 @test Meta.parse("a{b,c;d}") == Expr(:curly, :a, Expr(:parameters, :d), :b, :c)
@@ -501,6 +501,10 @@ let m_error, error_out, filename = Base.source_path()
     m_error = try @eval foo(types::NTuple{N}, values::Vararg{Any,N}, c) where {N} = nothing; catch e; e; end
     error_out = sprint(showerror, m_error)
     @test startswith(error_out, "ArgumentError: Vararg on non-final argument")
+
+    m_error = try @eval method_c6(a::Vararg{:A}) = 1; catch e; e; end
+    error_out = sprint(showerror, m_error)
+    @test startswith(error_out, "ArgumentError: invalid type for argument a in method definition for method_c6 at $filename:")
 end
 
 # issue #7272
@@ -538,15 +542,20 @@ for (str, tag) in Dict("" => :none, "\"" => :string, "#=" => :comment, "'" => :c
                        "let;" => :block, "for i=1;" => :block, "function f();" => :block,
                        "f() do x;" => :block, "module X;" => :block, "mutable struct X;" => :block,
                        "struct X;" => :block, "(" => :other, "[" => :other,
-                       "begin" => :other, "quote" => :other,
-                       "let" => :other, "for" => :other, "function" => :other,
+                       "for" => :other, "function" => :other,
                        "f() do" => :other, "module" => :other, "mutable struct" => :other,
-                       "struct" => :other)
+                       "struct" => :other,
+                       "quote" => using_JuliaSyntax ? :block : :other,
+                       "let" => using_JuliaSyntax ? :block : :other,
+                       "begin" => using_JuliaSyntax ? :block : :other,
+                      )
     @test Base.incomplete_tag(Meta.parse(str, raise=false)) == tag
 end
 
 # meta nodes for optional positional arguments
-@test Meta.lower(Main, :(@inline f(p::Int=2) = 3)).args[1].code[end-1].args[3].inlineable
+let src = Meta.lower(Main, :(@inline f(p::Int=2) = 3)).args[1].code[end-2].args[3]
+    @test Core.Compiler.is_declared_inline(src)
+end
 
 # issue #16096
 module M16096
@@ -624,7 +633,7 @@ end
 
 # issue 10046
 for op in ["+", "-", "\$", "|", ".+", ".-", "*", ".*"]
-    @test_throws ParseError Meta.parse("$op in [+, -]")
+    @test_parseerror "$op in [+, -]"
 end
 
 # issue #17701
@@ -636,7 +645,7 @@ end
 
 # PR #15592
 let str = "[1] [2]"
-    @test_throws ParseError Meta.parse(str)
+    @test_parseerror str
 end
 
 # issue 15896 and PR 15913
@@ -649,7 +658,7 @@ function get_expr_list(ex::Core.CodeInfo)
     return ex.code::Array{Any,1}
 end
 function get_expr_list(ex::Expr)
-    if ex.head == :thunk
+    if ex.head === :thunk
         return get_expr_list(ex.args[1])
     else
         return ex.args
@@ -704,7 +713,7 @@ m1_exprs = get_expr_list(Meta.lower(@__MODULE__, quote @m1 end))
 let low3 = Meta.lower(@__MODULE__, quote @m3 end)
     m3_exprs = get_expr_list(low3)
     ci = low3.args[1]::Core.CodeInfo
-    @test ci.codelocs == [3, 1]
+    @test ci.codelocs in ([4, 4, 2], [4, 2])
     @test is_return_ssavalue(m3_exprs[end])
 end
 
@@ -755,7 +764,7 @@ end
 if test + test == test
     println(test)
 end
-```.head == :if
+```.head === :if
 
 end
 
@@ -837,6 +846,8 @@ let ε=1, μ=2, x=3, î=4, ⋅=5, (-)=6
     @test Meta.parse("100.0f\u22122") === Meta.parse("100.0f-2")
     @test Meta.parse("0x100p\u22128") === Meta.parse("0x100P\u22128") === Meta.parse("0x100p-8")
     @test (−) == (-) == 6
+    # hbar ℏ to ħ - (#48870)
+    @test :ℏ === :ħ
 end
 
 # issue #8925
@@ -846,6 +857,14 @@ end
 @test c8925 == 3 && isconst(@__MODULE__, :c8925)
 @test d8925 == 4 && isconst(@__MODULE__, :d8925)
 
+# issue #47168
+let t47168 = (;a47168 = 1, b47168 = 2);
+    global const (;a47168, b47168) = t47168
+    @test a47168 == 1 && isconst(@__MODULE__, :a47168)
+    @test b47168 == 2 && isconst(@__MODULE__, :b47168)
+end
+@test (let x = (;x=1); let (;x) = x; x; end, x; end) == (1, (x = 1,))
+
 # issue #18754: parse ccall as a regular function
 @test Meta.parse("ccall([1], 2)[3]") == Expr(:ref, Expr(:call, :ccall, Expr(:vect, 1), 2), 3)
 @test Meta.parse("ccall(a).member") == Expr(:., Expr(:call, :ccall, :a), QuoteNode(:member))
@@ -853,7 +872,7 @@ end
 # Check that the body of a `where`-qualified short form function definition gets
 # a :block for its body
 short_where_call = :(f(x::T) where T = T)
-@test short_where_call.args[2].head == :block
+@test short_where_call.args[2].head === :block
 
 # `where` with multi-line anonymous functions
 let f = function (x::T) where T
@@ -868,6 +887,12 @@ let f = function (x::T, y::S) where T<:S where S
     @test f(0,1) === (Int,Int)
 end
 
+# issue #45506
+@test :( function (a) where {B, C} end).args[1] == Expr(:where, Expr(:tuple, :a), :B, :C)
+@test (function(::Type{Tuple{A45506, B45506}}) where {A45506 <: Any, B45506 <: Any}
+    B45506
+end)(Tuple{Int8, Int16}) == Int16
+
 # issue #20541
 @test Meta.parse("[a .!b]") == Expr(:hcat, :a, Expr(:call, :.!, :b))
 
@@ -983,14 +1008,14 @@ end
 @test Test21604.X(1.0) === Test21604.X(1.0)
 
 # issue #20575
-@test_throws ParseError Meta.parse("\"a\"x")
-@test_throws ParseError Meta.parse("\"a\"begin end")
-@test_throws ParseError Meta.parse("\"a\"begin end\"b\"")
+@test_parseerror "\"a\"x"
+@test_parseerror "\"a\"begin end"
+@test_parseerror "\"a\"begin end\"b\""
 
 # issue #16427
-@test_throws ParseError Meta.parse("for i=1:1 end(3)")
-@test_throws ParseError Meta.parse("begin end(3)")
-@test_throws ParseError Meta.parse("while false end(3)")
+@test_parseerror "for i=1:1 end(3)"
+@test_parseerror "begin end(3)"
+@test_parseerror "while false end(3)"
 
 # comment 298107224 on pull #21607
 module Test21607
@@ -1051,7 +1076,7 @@ end === (3, String)
 @test Meta.parse("3 +⁽¹⁾ 4") == Expr(:call, :+⁽¹⁾, 3, 4)
 @test Meta.parse("3 +₍₀₎ 4") == Expr(:call, :+₍₀₎, 3, 4)
 for bad in ('=', '$', ':', "||", "&&", "->", "<:")
-    @test_throws ParseError Meta.parse("3 $(bad)⁽¹⁾ 4")
+    @test_parseerror "3 $(bad)⁽¹⁾ 4"
 end
 @test Base.operator_precedence(:+̂) == Base.operator_precedence(:+)
 
@@ -1066,20 +1091,20 @@ end
                                                       Expr(:tuple, :x, :y),
                                                       Expr(:tuple, 1, 2)))
 
-@test_throws ParseError Meta.parse("[2for i=1:10]")
-@test_throws ParseError Meta.parse("[1 for i in 1:2for j in 2]")
-@test_throws ParseError Meta.parse("(1 for i in 1:2for j in 2)")
+@test_parseerror "[2for i=1:10]"
+@test_parseerror "[1 for i in 1:2for j in 2]"
+@test_parseerror "(1 for i in 1:2for j in 2)"
 # issue #20441
-@test_throws ParseError Meta.parse("[x.2]")
-@test_throws ParseError Meta.parse("x.2")
+@test_parseerror "[x.2]"
+@test_parseerror "x.2"
 @test Meta.parse("[x;.2]") == Expr(:vcat, :x, 0.2)
 
 # issue #22840
 @test Meta.parse("[:a :b]") == Expr(:hcat, QuoteNode(:a), QuoteNode(:b))
 
 # issue #22868
-@test_throws ParseError Meta.parse("x@time 2")
-@test_throws ParseError Meta.parse("@ time")
+@test_parseerror "x@time 2"
+@test_parseerror "@ time"
 
 # issue #7479
 @test Meta.lower(Main, Meta.parse("(true &&& false)")) == Expr(:error, "invalid syntax &false")
@@ -1088,9 +1113,9 @@ end
 @test Meta.lower(Main, :(&(1, 2))) == Expr(:error, "invalid syntax &(1, 2)")
 
 # if an indexing expression becomes a cat expression, `end` is not special
-@test_throws ParseError Meta.parse("a[end end]")
-@test_throws ParseError Meta.parse("a[end;end]")
-#@test_throws ParseError Meta.parse("a[end;]")  # this is difficult to fix
+@test_parseerror "a[end end]"
+@test_parseerror "a[end;end]"
+#@test_parseerror "a[end;]"  # this is difficult to fix
 let a = rand(8), i = 3
     @test a[[1:i-1; i+1:end]] == a[[1,2,4,5,6,7,8]]
 end
@@ -1101,12 +1126,12 @@ end
        end for i = 1:5] == fill(nothing, 5)
 
 # issue #18912
-@test_throws ParseError Meta.parse("(::)")
+@test_parseerror "(::)"
 @test Meta.parse(":(::)") == QuoteNode(Symbol("::"))
-@test_throws ParseError Meta.parse("f(::) = ::")
+@test_parseerror "f(::) = ::"
 @test Meta.parse("(::A)") == Expr(Symbol("::"), :A)
-@test_throws ParseError Meta.parse("(::, 1)")
-@test_throws ParseError Meta.parse("(1, ::)")
+@test_parseerror "(::, 1)"
+@test_parseerror "(1, ::)"
 
 # issue #18650
 let ex = Meta.parse("maximum(@elapsed sleep(1) for k = 1:10)")
@@ -1168,17 +1193,20 @@ end
 @test Meta.parse("@Mdl.foo [1] + [2]") == Meta.parse("@Mdl.foo([1] + [2])")
 
 # issue #24289
+module M24289
 macro m24289()
     :(global $(esc(:x24289)) = 1)
 end
-@test (@macroexpand @m24289) == :(global x24289 = 1)
+end
+M24289.@m24289
+@test x24289 === 1
 
 # parsing numbers with _ and .
 @test Meta.parse("1_2.3_4") == 12.34
-@test_throws ParseError Meta.parse("1._")
-@test_throws ParseError Meta.parse("1._5")
-@test_throws ParseError Meta.parse("1e.3")
-@test_throws ParseError Meta.parse("1e3.")
+@test_parseerror "1._"
+@test_parseerror "1._5"
+@test_parseerror "1e.3"
+@test_parseerror "1e3."
 @test Meta.parse("2e_1") == Expr(:call, :*, 2, :e_1)
 # issue #17705
 @test Meta.parse("2e3_") == Expr(:call, :*, 2e3, :_)
@@ -1244,8 +1272,10 @@ end
     @test raw"x \\\ y" == "x \\\\\\ y"
 end
 
-@test_throws ParseError("expected \"}\" or separator in arguments to \"{ }\"; got \"V)\"") Meta.parse("f(x::V) where {V) = x")
-@test_throws ParseError("expected \"]\" or separator in arguments to \"[ ]\"; got \"1)\"") Meta.parse("[1)")
+@test_parseerror("f(x::V) where {V) = x",
+                 "expected \"}\" or separator in arguments to \"{ }\"; got \"V)\"")
+@test_parseerror("[1)",
+                 "expected \"]\" or separator in arguments to \"[ ]\"; got \"1)\"")
 
 # issue #9972
 @test Meta.lower(@__MODULE__, :(f(;3))) == Expr(:error, "invalid keyword argument syntax \"3\"")
@@ -1293,7 +1323,7 @@ let getindex = 0, setindex! = 1, colon = 2, vcat = 3, hcat = 4, hvcat = 5
 end
 
 # issue #25020
-@test_throws ParseError Meta.parse("using Colors()")
+@test_parseerror "using Colors()"
 
 let ex = Meta.parse("md\"x\"
                      f(x) = x", 1)[1]  # custom string literal is not a docstring
@@ -1347,18 +1377,18 @@ end
 @test Meta.parse("-(x;;;)^2")  == Expr(:call, :-, Expr(:call, :^, Expr(:block, :x), 2))
 @test Meta.parse("+((1,2))")   == Expr(:call, :+, Expr(:tuple, 1, 2))
 
-@test_throws ParseError("space before \"(\" not allowed in \"+ (\" at none:1") Meta.parse("1 -+ (a=1, b=2)")
+@test_parseerror "1 -+ (a=1, b=2)"  "space before \"(\" not allowed in \"+ (\" at none:1"
 # issue #29781
-@test_throws ParseError("space before \"(\" not allowed in \"sin. (\" at none:1") Meta.parse("sin. (1)")
+@test_parseerror "sin. (1)"     "space before \"(\" not allowed in \"sin. (\" at none:1"
 # Parser errors for disallowed space contain line numbers
-@test_throws ParseError("space before \"[\" not allowed in \"f() [\" at none:2") Meta.parse("\nf() [i]")
-@test_throws ParseError("space before \"(\" not allowed in \"f() (\" at none:2") Meta.parse("\nf() (i)")
-@test_throws ParseError("space before \".\" not allowed in \"f() .\" at none:2") Meta.parse("\nf() .i")
-@test_throws ParseError("space before \"{\" not allowed in \"f() {\" at none:2") Meta.parse("\nf() {i}")
-@test_throws ParseError("space before \"m\" not allowed in \"@ m\" at none:2") Meta.parse("\n@ m")
-@test_throws ParseError("space before \".\" not allowed in \"a .\" at none:2") Meta.parse("\nusing a .b")
-@test_throws ParseError("space before \".\" not allowed in \"a .\" at none:2") Meta.parse("\nusing a .b")
-@test_throws ParseError("space before \"(\" not allowed in \"+ (\" at none:2") Meta.parse("\n+ (x, y)")
+@test_parseerror "\nf() [i]"    "space before \"[\" not allowed in \"f() [\" at none:2"
+@test_parseerror "\nf() (i)"    "space before \"(\" not allowed in \"f() (\" at none:2"
+@test_parseerror "\nf() .i"     "space before \".\" not allowed in \"f() .\" at none:2"
+@test_parseerror "\nf() {i}"    "space before \"{\" not allowed in \"f() {\" at none:2"
+@test_parseerror "\n@ m"        "space before \"m\" not allowed in \"@ m\" at none:2"
+@test_parseerror "\nusing a .b" "space before \".\" not allowed in \"a .\" at none:2"
+@test_parseerror "\nusing a .b" "space before \".\" not allowed in \"a .\" at none:2"
+@test_parseerror "\n+ (x, y)"   "space before \"(\" not allowed in \"+ (\" at none:2"
 
 @test Meta.parse("1 -+(a=1, b=2)") == Expr(:call, :-, 1,
                                            Expr(:call, :+, Expr(:kw, :a, 1), Expr(:kw, :b, 2)))
@@ -1380,7 +1410,7 @@ end
 @test Meta.parse("-√2")   == Expr(:call, :-, Expr(:call, :√, 2))
 @test Meta.parse("√3x^2") == Expr(:call, :*, Expr(:call, :√, 3), Expr(:call, :^, :x, 2))
 @test Meta.parse("-3x^2") == Expr(:call, :*, -3, Expr(:call, :^, :x, 2))
-@test_throws ParseError Meta.parse("2!3")
+@test_parseerror "2!3"
 
 # issue #27914
 @test Meta.parse("2f(x)")        == Expr(:call, :*, 2, Expr(:call, :f, :x))
@@ -1390,7 +1420,7 @@ end
 @test Meta.parse("2(x)")         == Expr(:call, :*, 2, :x)
 @test Meta.parse("2(x)y")        == Expr(:call, :*, 2, :x, :y)
 
-@test_throws ParseError Meta.parse("a.: b")
+@test_parseerror "a.: b"
 @test Meta.parse("a.:end") == Expr(:., :a, QuoteNode(:end))
 @test Meta.parse("a.:catch") == Expr(:., :a, QuoteNode(:catch))
 @test Meta.parse("a.end") == Expr(:., :a, QuoteNode(:end))
@@ -1406,7 +1436,7 @@ let len = 10
 end
 
 # Module name cannot be a reserved word.
-@test_throws ParseError Meta.parse("module module end")
+@test_parseerror "module module end"
 
 @test Meta.lower(@__MODULE__, :(global true)) == Expr(:error, "invalid syntax in \"global\" declaration")
 @test Meta.lower(@__MODULE__, :(let ccall end)) == Expr(:error, "invalid identifier name \"ccall\"")
@@ -1423,7 +1453,7 @@ end
 # issue #27690
 # previously, this was allowed since it thought `end` was being used for indexing.
 # however the quote should disable that context.
-@test_throws ParseError Meta.parse("Any[:(end)]")
+@test_parseerror "Any[:(end)]"
 
 # issue #17781
 let ex = Meta.lower(@__MODULE__, Meta.parse("
@@ -1542,8 +1572,8 @@ end
 
 # issue #27129
 f27129(x = 1) = (@inline; x)
-for meth in methods(f27129)
-    @test ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), meth, C_NULL, meth.source).inlineable
+for method in methods(f27129)
+    @test Core.Compiler.is_declared_inline(method)
 end
 
 # issue #27710
@@ -1646,26 +1676,28 @@ end
 macro foo28244(sym)
     x = :(bar())
     push!(x.args, Expr(sym))
-    x
+    esc(x)
+end
+@test @macroexpand(@foo28244(kw)) == Expr(:call, :bar, Expr(:kw))
+let x = @macroexpand @foo28244(var"let")
+    @test Meta.lower(@__MODULE__, x) == Expr(:error, "malformed expression")
 end
-@test (@macroexpand @foo28244(kw)) == Expr(:call, GlobalRef(@__MODULE__,:bar), Expr(:kw))
-@test eval(:(@macroexpand @foo28244($(Symbol("let"))))) == Expr(:error, "malformed expression")
 
 # #16356
-@test_throws ParseError Meta.parse("0xapi")
+@test_parseerror "0xapi"
 
 # #22523 #22712
-@test_throws ParseError Meta.parse("a?b:c")
-@test_throws ParseError Meta.parse("a ?b:c")
-@test_throws ParseError Meta.parse("a ? b:c")
-@test_throws ParseError Meta.parse("a ? b :c")
-@test_throws ParseError Meta.parse("?")
+@test_parseerror "a?b:c"
+@test_parseerror "a ?b:c"
+@test_parseerror "a ? b:c"
+@test_parseerror "a ? b :c"
+@test_parseerror "?"
 
 # #13079
 @test Meta.parse("1<<2*3") == :((1<<2)*3)
 
 # #19987
-@test_throws ParseError Meta.parse("try ; catch f() ; end")
+@test_parseerror "try ; catch f() ; end"
 
 # #23076
 @test :([1,2;]) == Expr(:vect, Expr(:parameters), 1, 2)
@@ -1674,7 +1706,7 @@ end
 @test Meta.parse("(a...)") == Expr(Symbol("..."), :a)
 
 # #19324
-@test_throws UndefVarError(:x) eval(:(module M19324
+@test_throws UndefVarError(:x, :local) eval(:(module M19324
                  x=1
                  for i=1:10
                      x += i
@@ -1702,8 +1734,8 @@ end
 
 @test Meta.lower(@__MODULE__, :(f(x) = (y = x + 1; ccall((:a, y), Cvoid, ())))) == Expr(:error, "ccall function name and library expression cannot reference local variables")
 
-@test_throws ParseError Meta.parse("x.'")
-@test_throws ParseError Meta.parse("0.+1")
+@test_parseerror "x.'"
+@test_parseerror "0.+1"
 
 # #24221
 @test Meta.isexpr(Meta.lower(@__MODULE__, :(a=_)), :error)
@@ -1751,6 +1783,43 @@ end
 @test B28593.var.name === :S
 @test C28593.var.name === :S
 
+# issue #51899
+macro struct_macro_51899()
+    quote
+        mutable struct Struct51899
+            const const_field
+            const const_field_with_type::Int
+            $(esc(Expr(:const, :(escaped_const_field::MyType))))
+            @atomic atomic_field
+            @atomic atomic_field_with_type::Int
+        end
+    end
+end
+
+let ex = @macroexpand @struct_macro_51899()
+    const_field, const_field_with_type, escaped_const_field,
+    atomic_field, atomic_field_with_type = filter(x -> isa(x, Expr), ex.args[end].args[end].args)
+    @test Meta.isexpr(const_field, :const)
+    @test const_field.args[1] === :const_field
+
+    @test Meta.isexpr(const_field_with_type, :const)
+    @test Meta.isexpr(const_field_with_type.args[1], :(::))
+    @test const_field_with_type.args[1].args[1] === :const_field_with_type
+    @test const_field_with_type.args[1].args[2] == GlobalRef(@__MODULE__, :Int)
+
+    @test Meta.isexpr(escaped_const_field, :const)
+    @test Meta.isexpr(const_field_with_type.args[1], :(::))
+    @test escaped_const_field.args[1].args[1] === :escaped_const_field
+    @test escaped_const_field.args[1].args[2] === :MyType
+
+    @test Meta.isexpr(atomic_field, :atomic)
+    @test atomic_field.args[1] === :atomic_field
+
+    @test Meta.isexpr(atomic_field_with_type, :atomic)
+    @test atomic_field_with_type.args[1].args[1] === :atomic_field_with_type
+    @test atomic_field_with_type.args[1].args[2] == GlobalRef(@__MODULE__, :Int)
+end
+
 # issue #25955
 macro noeffect25955(e)
     return e
@@ -1797,7 +1866,7 @@ end
 @test Meta.parse("1⁝2") == Expr(:call, :⁝, 1, 2)
 @test Meta.parse("1..2") == Expr(:call, :.., 1, 2)
 # we don't parse chains of these since the associativity and meaning aren't clear
-@test_throws ParseError Meta.parse("1..2..3")
+@test_parseerror "1..2..3"
 
 # issue #30048
 @test Meta.isexpr(Meta.lower(@__MODULE__, :(for a in b
@@ -1854,7 +1923,7 @@ function capture_with_conditional_label()
     return y->x
 end
 let f = capture_with_conditional_label()  # should not throw
-    @test_throws UndefVarError(:x) f(0)
+    @test_throws UndefVarError(:x, :local) f(0)
 end
 
 # `_` should not create a global (or local)
@@ -1914,8 +1983,8 @@ macro id28992(x) x end
 @test Meta.@lower(.+(a,b) = 0) == Expr(:error, "invalid function name \".+\"")
 @test Meta.@lower((.+)(a,b) = 0) == Expr(:error, "invalid function name \"(.+)\"")
 let m = @__MODULE__
-    @test Meta.lower(m, :($m.@id28992(.+(a,b) = 0))) == Expr(:error, "invalid function name \"$(nameof(m)).:.+\"")
-    @test Meta.lower(m, :($m.@id28992((.+)(a,b) = 0))) == Expr(:error, "invalid function name \"(.$(nameof(m)).+)\"")
+    @test Meta.lower(m, :($m.@id28992(.+(a,b) = 0))) == Expr(:error, "invalid function name \"$(nameof(m)).:.+\" around $(@__FILE__):$(@__LINE__)")
+    @test Meta.lower(m, :($m.@id28992((.+)(a,b) = 0))) == Expr(:error, "invalid function name \"(.$(nameof(m)).+)\" around $(@__FILE__):$(@__LINE__)")
 end
 @test @id28992([1] .< [2] .< [3]) == [true]
 @test @id28992(2 ^ -2) == 0.25
@@ -1971,9 +2040,9 @@ end
 @test Meta.parse("var\"#\"") === Symbol("#")
 @test Meta.parse("var\"true\"") === Symbol("true")
 @test Meta.parse("var\"false\"") === Symbol("false")
-@test_throws ParseError Meta.parse("var\"#\"x") # Reject string macro-like suffix
-@test_throws ParseError Meta.parse("var \"#\"")
-@test_throws ParseError Meta.parse("var\"for\" i = 1:10; end")
+@test_parseerror "var\"#\"x" # Reject string macro-like suffix
+@test_parseerror "var \"#\""
+@test_parseerror "var\"for\" i = 1:10; end"
 # A few cases which would be ugly to deal with if var"#" were a string macro:
 @test Meta.parse("var\"#\".var\"a-b\"") == Expr(:., Symbol("#"), QuoteNode(Symbol("a-b")))
 @test Meta.parse("export var\"#\"") == Expr(:export, Symbol("#"))
@@ -1991,7 +2060,7 @@ end
 @test Meta.parse("import Base.Foo.:(==).bar") == :(import Base.Foo.==.bar)
 
 # issue #33135
-function f33135(x::T) where {C1, T}
+@test_warn "declares type variable C1 but does not use it" @eval function f33135(x::T) where {C1, T}
     let C1 = 1, C2 = 2
         C1
     end
@@ -2198,7 +2267,7 @@ end
 end
 
 # line break in : expression disallowed
-@test_throws Meta.ParseError Meta.parse("[1 :\n2] == [1:2]")
+@test_parseerror "[1 :\n2] == [1:2]"
 
 # added ⟂ to operator precedence (#24404)
 @test Meta.parse("a ⟂ b ⟂ c") == Expr(:comparison, :a, :⟂, :b, :⟂, :c)
@@ -2210,9 +2279,18 @@ end
     @test Meta.parse("a ⫫ b") == Expr(:call, :⫫, :a, :b)
 end
 
+# issue 45962
+@testset "binary ⭄, ⥺, ⭃, and ⥷" begin
+    @test Meta.parse("a ⭄ b") == Expr(:call, :⭄, :a, :b)
+    @test Meta.parse("a ⥺ b") == Expr(:call, :⥺, :a, :b)
+    @test Meta.parse("a ⭃ b") == Expr(:call, :⭃, :a, :b)
+    @test Meta.parse("a ⥷ b") == Expr(:call, :⥷, :a, :b)
+end
+
 # only allow certain characters after interpolated vars (#25231)
-@test Meta.parse("\"\$x෴  \"",raise=false) == Expr(:error, "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.")
-@test Base.incomplete_tag(Meta.parse("\"\$foo", raise=false)) == :string
+@test_parseerror("\"\$x෴  \"",
+                 "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.")
+@test Base.incomplete_tag(Meta.parse("\"\$foo", raise=false)) === :string
 
 @testset "issue #30341" begin
     @test Meta.parse("x .~ y") == Expr(:call, :.~, :x, :y)
@@ -2250,14 +2328,11 @@ end
 
         err = Expr(
             :error,
-            "\":\" in \"$imprt\" syntax can only be used when importing a single module. " *
-            "Split imports into multiple lines."
         )
-        ex = Meta.parse("$imprt A, B: x, y", raise=false)
-        @test ex == err
-
-        ex = Meta.parse("$imprt A: x, B: y", raise=false)
-        @test ex == err
+        @test_parseerror("$imprt A, B: x, y",
+                         "\":\" in \"$imprt\" syntax can only be used when importing a single module. Split imports into multiple lines.")
+        @test_parseerror("$imprt A: x, B: y",
+                         "\":\" in \"$imprt\" syntax can only be used when importing a single module. Split imports into multiple lines.")
     end
 end
 
@@ -2265,30 +2340,43 @@ end
 @test Meta.lower(@__MODULE__, Expr(:block, LineNumberNode(101, :some_file), :(f(x,x)=1))) ==
     Expr(:error, "function argument name not unique: \"x\" around some_file:101")
 
+@test Meta.lower(@__MODULE__, Expr(:block, LineNumberNode(102, :some_file), :(function f(x) where T where T; x::T; end))) ==
+    Expr(:error, "function static parameter name not unique: \"T\" around some_file:102")
+
+@test Meta.lower(@__MODULE__, Expr(:block, LineNumberNode(103, :some_file), :(function f(t) where t; x; end))) ==
+    Expr(:error, "function argument and static parameter name not distinct: \"t\" around some_file:103")
+
 # Ensure file names don't leak between `eval`s
 eval(LineNumberNode(11, :incorrect_file))
 let exc = try eval(:(f(x,x)=1)) catch e ; e ; end
     @test !occursin("incorrect_file", exc.msg)
 end
 
-# issue #34967
-@test_throws LoadError("string", 2, ErrorException("syntax: invalid UTF-8 sequence")) include_string(@__MODULE__,
-                                      "x34967 = 1\n# Halloa\xf5b\nx34967 = 2")
-@test x34967 == 1
-@test_throws LoadError("string", 1, ErrorException("syntax: invalid UTF-8 sequence")) include_string(@__MODULE__,
-                                      "x\xf5 = 3\n# Halloa\xf5b\nx34967 = 4")
-@test_throws LoadError("string", 3, ErrorException("syntax: invalid UTF-8 sequence")) include_string(@__MODULE__,
-                                      """
-                                      # line 1
-                                      # line 2
-                                      # Hello\xf5b
-                                      x34967 = 6
-                                      """)
-
-@test Meta.parse("aa\u200b_", raise=false) ==
-    Expr(:error, "invisible character \\u200b near column 3")
-@test Meta.parse("aa\UE0080", raise=false) ==
-    Expr(:error, "invalid character \"\Ue0080\" near column 3")
+@testset "issue #34967" begin
+    @test_parseerror "#\xf5b\nx" "invalid UTF-8 sequence"
+
+    # Test line UTF-8 errors with line numbers
+    let ex = Meta.parseall("x\n#\xf5b\ny")
+        @test Meta.isexpr(ex, :toplevel, 4) && Meta.isexpr(last(ex.args), :error)
+        @test ex.args[3] == LineNumberNode(2,:none)
+    end
+    let ex = Meta.parseall("x\xf5\n#\xf5b\ny")
+        @test Meta.isexpr(ex, :toplevel, 2) && Meta.isexpr(last(ex.args), :error)
+        @test ex.args[1] == LineNumberNode(1,:none)
+    end
+    let ex = Meta.parseall("#line1\n#line2\n#\xf5b\ny")
+        @test Meta.isexpr(ex, :toplevel, 2) && Meta.isexpr(last(ex.args), :error)
+        @test ex.args[1] == LineNumberNode(3,:none)
+    end
+end
+
+@test_parseerror "aa\u200b_" "invisible character \\u200b near column 3"
+@test_parseerror "aa\UE0080" "invalid character \"\Ue0080\" near column 3"
+
+@testset "unrecognized escapes in string/char literals" begin
+    @test_parseerror "\"\\.\""
+    @test_parseerror "\'\\.\'"
+end
 
 # issue #31238
 a31238, b31238 = let x
@@ -2308,14 +2396,19 @@ f35201(c) = h35201((;c...), k=true)
 f44343(;kw...) = NamedTuple(kw)
 @test f44343(u = (; :a => 1)) === (u = (; :a => 1),)
 
-@testset "issue #34544/35367" begin
-    # Test these evals shouldnt segfault
+@testset "issue #34544/35367/35429" begin
+    # Test these evals shouldn't segfault
     eval(Expr(:call, :eval, Expr(:quote, Expr(:module, true, :bar1, Expr(:block)))))
     eval(Expr(:module, true, :bar2, Expr(:block)))
     eval(Expr(:quote, Expr(:module, true, :bar3, Expr(:quote))))
     @test_throws ErrorException eval(Expr(:call, :eval, Expr(:quote, Expr(:module, true, :bar4, Expr(:quote)))))
     @test_throws ErrorException eval(Expr(:module, true, :bar5, Expr(:foo)))
     @test_throws ErrorException eval(Expr(:module, true, :bar6, Expr(:quote)))
+
+    #35429
+    @test_throws ErrorException eval(Expr(:thunk, x->x+9))
+    @test_throws ErrorException eval(Expr(:thunk, Meta.parse("x=17")))
+    @test_throws ErrorException eval(Expr(:thunk, Meta.parse("17")))
 end
 
 # issue #35391
@@ -2352,8 +2445,8 @@ end
 @test x == 6
 
 # issue #36196
-@test_throws ParseError("\"for\" at none:1 expected \"end\", got \")\"") Meta.parse("(for i=1; println())")
-@test_throws ParseError("\"try\" at none:1 expected \"end\", got \")\"") Meta.parse("(try i=1; println())")
+@test_parseerror "(for i=1; println())"  "\"for\" at none:1 expected \"end\", got \")\""
+@test_parseerror "(try i=1; println())"  "\"try\" at none:1 expected \"end\", got \")\""
 
 # issue #36272
 macro m36272()
@@ -2400,10 +2493,10 @@ end
 let (-->) = (+)
     @test (40 --> 2) == 42
 end
-@test_throws ParseError("invalid operator \"<---\"") Meta.parse("1<---2")
-@test_throws ParseError("invalid operator \".<---\"") Meta.parse("1 .<--- 2")
-@test_throws ParseError("invalid operator \"--\"") Meta.parse("a---b")
-@test_throws ParseError("invalid operator \".--\"") Meta.parse("a.---b")
+@test_parseerror("1<---2", "invalid operator \"<---\"")
+@test_parseerror("1 .<--- 2", "invalid operator \".<---\"")
+@test_parseerror("a---b", "invalid operator \"--\"")
+@test_parseerror("a.---b", "invalid operator \".--\"")
 
 # issue #37228
 # NOTE: the `if` needs to be at the top level
@@ -2418,7 +2511,14 @@ end
 function ncalls_in_lowered(ex, fname)
     lowered_exprs = Meta.lower(Main, ex).args[1].code
     return count(lowered_exprs) do ex
-        Meta.isexpr(ex, :call) && ex.args[1] == fname
+        if Meta.isexpr(ex, :call)
+            arg = ex.args[1]
+            if isa(arg, Core.SSAValue)
+                arg = lowered_exprs[arg.id]
+            end
+            return arg == fname
+        end
+        return false
     end
 end
 
@@ -2438,15 +2538,14 @@ end
 @test :(if true 'a' else 1 end) == Expr(:if, true, quote 'a' end, quote 1 end)
 
 # issue #37664
-@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a b")
-@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a#==#b")
-@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a #==#b")
-@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a#==# b")
-
-@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1 2")
-@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1#==#2")
-@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1 #==#2")
-@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1#==# 2")
+@test_parseerror("a b",     "extra token \"b\" after end of expression")
+@test_parseerror("a#==#b",  "extra token \"b\" after end of expression")
+@test_parseerror("a #==#b", "extra token \"b\" after end of expression")
+@test_parseerror("a#==# b", "extra token \"b\" after end of expression")
+@test_parseerror("1 2",     "extra token \"2\" after end of expression")
+@test_parseerror("1#==#2",  "extra token \"2\" after end of expression")
+@test_parseerror("1 #==#2", "extra token \"2\" after end of expression")
+@test_parseerror("1#==# 2", "extra token \"2\" after end of expression")
 
 @test size([1#==#2#==#3]) == size([1 2 3])
 @test size([1#==#2#==#3]) == size([1	2	3]) # tabs
@@ -2469,9 +2568,7 @@ end
   Meta.parse("if#==#x<y#==#x+1#==#elseif#==#y>0#==#y+1#==#else#==#z#==#end")
 @test Meta.parse("function(x) x end") == Meta.parse("function(x)#==#x#==#end")
 @test Meta.parse("a ? b : c") == Meta.parse("a#==#?#==#b#==#:#==#c")
-@test_throws ParseError("space before \"(\" not allowed in \"f (\" at none:1") begin
-  Meta.parse("f#==#(x)=x")
-end
+@test_parseerror("f#==#(x)=x", "space before \"(\" not allowed in \"f (\" at none:1")
 @test Meta.parse("try f() catch e g() finally h() end") ==
   Meta.parse("try#==#f()#==#catch#==#e#==#g()#==#finally#==#h()#==#end")
 @test Meta.parse("@m a b") == Meta.parse("@m#==#a#==#b")
@@ -2503,18 +2600,19 @@ end
 @test B37890(1.0, 2.0f0) isa B37890{Int, Int8}
 
 # import ... as
-@test_throws ParseError("invalid syntax \"using A as ...\"") Meta.parse("using A as B")
-@test_throws ParseError("invalid syntax \"using A.b as ...\"") Meta.parse("using A.b as B")
-@test_throws ParseError("invalid syntax \"using A.b as ...\"") Meta.parse("using X, A.b as B")
-@test_throws ParseError("invalid syntax \"import A as B:\"") Meta.parse("import A as B: c")
-@test_throws ParseError("invalid syntax \"import A.b as B:\"") Meta.parse("import A.b as B: c")
+@test_parseerror("using A as B",       "invalid syntax \"using A as ...\"")
+@test_parseerror("using A.b as B",     "invalid syntax \"using A.b as ...\"")
+@test_parseerror("using X, A.b as B",  "invalid syntax \"using A.b as ...\"")
+@test_parseerror("import A as B: c",   "invalid syntax \"import A as B:\"")
+@test_parseerror("import A.b as B: c", "invalid syntax \"import A.b as B:\"")
 
 module TestImportAs
 using Test
 
 module Mod
 const x = 1
-global maybe_undef
+global maybe_undef, always_undef
+export always_undef
 def() = (global maybe_undef = 0)
 func(x) = 2x + 1
 
@@ -2545,17 +2643,27 @@ import .Mod2.y as y2
 @test y2 == 2
 @test !@isdefined(y)
 
-@test_throws ErrorException eval(:(import .Mod.x as (a.b)))
+# Test that eval rejects the invalid syntax `import .Mod.x as (a.b)`
+@test_throws ErrorException eval(
+    Expr(:import, Expr(:as, Expr(:., :., :Mod, :x), Expr(:., :a, QuoteNode(:b)))))
 
 import .Mod.maybe_undef as mu
 @test_throws UndefVarError mu
 Mod.def()
 @test mu === 0
 
-using .Mod: func as f
-@test f(10) == 21
-@test !@isdefined(func)
-@test_throws ErrorException("error in method definition: function Mod.func must be explicitly imported to be extended") eval(:(f(x::Int) = x))
+module Mod3
+using ..Mod: func as f
+using ..Mod
+end
+@test Mod3.f(10) == 21
+@test !isdefined(Mod3, :func)
+@test_throws ErrorException("invalid method definition in Mod3: function Mod3.f must be explicitly imported to be extended") Core.eval(Mod3, :(f(x::Int) = x))
+@test !isdefined(Mod3, :always_undef) # resolve this binding now in Mod3
+@test_throws ErrorException("invalid method definition in Mod3: exported function Mod.always_undef does not exist") Core.eval(Mod3, :(always_undef(x::Int) = x))
+@test_throws ErrorException("cannot assign a value to imported variable Mod.always_undef from module Mod3") Core.eval(Mod3, :(const always_undef = 3))
+@test_throws ErrorException("cannot assign a value to imported variable Mod3.f") Core.eval(Mod3, :(const f = 3))
+@test_throws ErrorException("cannot declare Mod.maybe_undef constant; it already has a value") Core.eval(Mod, :(const maybe_undef = 3))
 
 z = 42
 import .z as also_z
@@ -2593,10 +2701,10 @@ import .TestImportAs.Mod2 as M2
 end
 
 @testset "issue #37393" begin
-    @test :(for outer i = 1:3; end) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;))
+    @test remove_linenums!(:(for outer i = 1:3; end)) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;))
     i = :i
-    @test :(for outer $i = 1:3; end) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;))
-    @test :(for outer = 1:3; end) == Expr(:for, Expr(:(=), :outer, :(1:3)), :(;;))
+    @test remove_linenums!(:(for outer $i = 1:3; end)) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;))
+    @test remove_linenums!(:(for outer = 1:3; end)) == Expr(:for, Expr(:(=), :outer, :(1:3)), :(;;))
     # TIL that this is possible
     for outer $ i = 1:3
         @test 1 $ 2 in 1:3
@@ -2615,10 +2723,10 @@ end
     @test Meta.isexpr(Meta.parse("""
         f(i for i
         in 1:3)""").args[2], :generator)
-    @test_throws Meta.ParseError Meta.parse("""
+    @test_parseerror """
         for i
             in 1:3
-        end""")
+        end"""
 end
 
 # PR #37973
@@ -2773,7 +2881,7 @@ end
                         Expr(:nrow, 1, Expr(:row, 0, 9, 3), Expr(:row, 4, 5, 4)))
     @test :([1 ; 2 ;; 3 ; 4]) == Expr(:ncat, 2, Expr(:nrow, 1, 1, 2), Expr(:nrow, 1, 3, 4))
 
-    @test_throws ParseError Meta.parse("[1 2 ;; 3 4]") # cannot mix spaces and ;; except as line break
+    @test_parseerror "[1 2 ;; 3 4]" # cannot mix spaces and ;; except as line break
     @test :([1 2 ;;
             3 4]) == :([1 2 3 4])
     @test :([1 2 ;;
@@ -2783,8 +2891,8 @@ end
     @test Meta.parse("[1;\n\n]") == :([1;])
     @test Meta.parse("[1\n;]") == :([1;]) # semicolons following a linebreak are fine
     @test Meta.parse("[1\n;;; 2]") == :([1;;; 2])
-    @test_throws ParseError Meta.parse("[1;\n;2]") # semicolons cannot straddle a line break
-    @test_throws ParseError Meta.parse("[1; ;2]") # semicolons cannot be separated by a space
+    @test_parseerror "[1;\n;2]" # semicolons cannot straddle a line break
+    @test_parseerror "[1; ;2]" # semicolons cannot be separated by a space
 end
 
 # issue #25652
@@ -2843,7 +2951,7 @@ end
 @test eval(:(x = $(QuoteNode(Core.SSAValue(1))))) == Core.SSAValue(1)
 @test eval(:(x = $(QuoteNode(Core.SlotNumber(1))))) == Core.SlotNumber(1)
 @test_throws ErrorException("syntax: SSAValue objects should not occur in an AST") eval(:(x = $(Core.SSAValue(1))))
-@test_throws ErrorException("syntax: Slot objects should not occur in an AST") eval(:(x = $(Core.SlotNumber(1))))
+@test_throws ErrorException("syntax: SlotNumber objects should not occur in an AST") eval(:(x = $(Core.SlotNumber(1))))
 
 # juxtaposition of radical symbols (#40094)
 @test Meta.parse("2√3") == Expr(:call, :*, 2, Expr(:call, :√, 3))
@@ -2854,13 +2962,13 @@ macro m_underscore_hygiene()
     return :(_ = 1)
 end
 
-@test @macroexpand(@m_underscore_hygiene()) == :(_ = 1)
+@test Meta.@lower(@m_underscore_hygiene()) === 1
 
 macro m_begin_hygiene(a)
     return :($(esc(a))[begin])
 end
 
-@test @m_begin_hygiene([1, 2, 3]) == 1
+@test @m_begin_hygiene([1, 2, 3]) === 1
 
 # issue 40258
 @test "a $("b $("c")")" == "a b c"
@@ -3017,9 +3125,6 @@ end
 end
 
 # issue 25678
-@generated f25678(x::T) where {T} = code_lowered(sin, Tuple{x})[]
-@test f25678(pi/6) === sin(pi/6)
-
 @generated g25678(x) = return :x
 @test g25678(7) === 7
 
@@ -3039,10 +3144,10 @@ end
 end
 
 # issue #19012
-@test Meta.parse("\U2200", raise=false) == Symbol("∀")
-@test Meta.parse("\U2203", raise=false) == Symbol("∃")
-@test Meta.parse("a\U2203", raise=false) == Symbol("a∃")
-@test Meta.parse("\U2204", raise=false) == Symbol("∄")
+@test Meta.parse("\U2200", raise=false) === Symbol("∀")
+@test Meta.parse("\U2203", raise=false) === Symbol("∃")
+@test Meta.parse("a\U2203", raise=false) === Symbol("a∃")
+@test Meta.parse("\U2204", raise=false) === Symbol("∄")
 
 # issue 42220
 macro m42220()
@@ -3060,10 +3165,10 @@ end
     @test fails(error)
     @test !fails(() -> 1 + 2)
 
-    @test_throws ParseError Meta.parse("try foo() else bar() end")
-    @test_throws ParseError Meta.parse("try foo() else bar() catch; baz() end")
-    @test_throws ParseError Meta.parse("try foo() catch; baz() finally foobar() else bar() end")
-    @test_throws ParseError Meta.parse("try foo() finally foobar() else bar() catch; baz() end")
+    @test_parseerror "try foo() else bar() end"
+    @test_parseerror "try foo() else bar() catch; baz() end"
+    @test_parseerror "try foo() catch; baz() finally foobar() else bar() end"
+    @test_parseerror "try foo() finally foobar() else bar() catch; baz() end"
 
     err = try
         try
@@ -3126,25 +3231,41 @@ end
     end
     @test err == 5 + 6
     @test x == 1
+
+    x = 0
+    try
+    catch
+    else
+        x = 1
+    end
+    @test x == 1
+
+    try
+    catch
+    else
+        tryelse_in_local_scope = true
+    end
+
+    @test !@isdefined(tryelse_in_local_scope)
 end
 
-@test_throws ParseError Meta.parse("""
+@test_parseerror """
 function checkUserAccess(u::User)
 	if u.accessLevel != "user\u202e \u2066# users are not allowed\u2069\u2066"
 		return true
 	end
 	return false
 end
-""")
+"""
 
-@test_throws ParseError Meta.parse("""
+@test_parseerror """
 function checkUserAccess(u::User)
 	#=\u202e \u2066if (u.isAdmin)\u2069 \u2066 begin admins only =#
 		return true
 	#= end admin only \u202e \u2066end\u2069 \u2066=#
 	return false
 end
-""")
+"""
 
 @testset "empty nd arrays" begin
     @test :([])    == Expr(:vect)
@@ -3175,16 +3296,22 @@ end
              ;;
             ]) == Expr(:ncat, 2)
 
-    @test_throws ParseError Meta.parse("[; ;]")
-    @test_throws ParseError Meta.parse("[;; ;]")
-    @test_throws ParseError Meta.parse("[;\n;]")
+    @test_parseerror "[; ;]"
+    @test_parseerror "[;; ;]"
+    @test_parseerror "[;\n;]"
 end
 
 @test Meta.parseatom("@foo", 1; filename="foo", lineno=7) == (Expr(:macrocall, :var"@foo", LineNumberNode(7, :foo)), 5)
 @test Meta.parseall("@foo"; filename="foo", lineno=3) == Expr(:toplevel, LineNumberNode(3, :foo), Expr(:macrocall, :var"@foo", LineNumberNode(3, :foo)))
 
-let ex = :(const $(esc(:x)) = 1; (::typeof(2))() = $(esc(:x)))
-    @test macroexpand(Main, Expr(:var"hygienic-scope", ex, Main)).args[3].args[1] == :((::$(GlobalRef(Main, :typeof))(2))())
+module M43993
+function foo43993 end
+const typeof = error
+end
+let ex = :(const $(esc(:x)) = 1; (::typeof($(esc(:foo43993))))() = $(esc(:x)))
+    Core.eval(M43993, Expr(:var"hygienic-scope", ex, Core))
+    @test M43993.x === 1
+    @test invokelatest(M43993.foo43993) === 1
 end
 
 struct Foo44013
@@ -3365,14 +3492,12 @@ f45162(f) = f(x=1)
 @test first(methods(f45162)).called != 0
 
 # issue #45024
-@test_throws ParseError("expected assignment after \"const\"") Meta.parse("const x")
-@test_throws ParseError("expected assignment after \"const\"") Meta.parse("const x::Int")
+@test_parseerror "const x" "expected assignment after \"const\""
+@test_parseerror "const x::Int" "expected assignment after \"const\""
 # these cases have always been caught during lowering, since (const (global x)) is not
 # ambiguous with the lowered form (const x), but that could probably be changed.
-@test Meta.lower(@__MODULE__, :(global const x)) == Expr(:error, "expected assignment after \"const\"")
-@test Meta.lower(@__MODULE__, :(global const x::Int)) == Expr(:error, "expected assignment after \"const\"")
-@test Meta.lower(@__MODULE__, :(const global x)) == Expr(:error, "expected assignment after \"const\"")
-@test Meta.lower(@__MODULE__, :(const global x::Int)) == Expr(:error, "expected assignment after \"const\"")
+@test Meta.lower(@__MODULE__, Expr(:const, Expr(:global, :x))) == Expr(:error, "expected assignment after \"const\"")
+@test Meta.lower(@__MODULE__, Expr(:const, Expr(:global, Expr(:(::), :x, :Int)))) == Expr(:error, "expected assignment after \"const\"")
 
 @testset "issue 25072" begin
     @test '\xc0\x80' == reinterpret(Char, 0xc0800000)
@@ -3395,3 +3520,102 @@ f45162(f) = f(x=1)
     @test Meta.isexpr(Meta.parse("'a"), :incomplete)
     @test ''' == "'"[1]
 end
+
+# issue #46251
+@test begin; global value = 1; (value, value += 1) end == (1, 2)
+@test begin; global value = 1; "($(value), $(value += 1))" end == "(1, 2)"
+
+# issue #47410
+# note `eval` is needed since this needs to be at the top level
+@test eval(:(if false
+             elseif false || (()->true)()
+                 42
+             end)) == 42
+
+macro _macroexpand(x, m=__module__)
+    :($__source__; macroexpand($m, Expr(:var"hygienic-scope", $(esc(Expr(:quote, x))), $m)))
+end
+
+@testset "unescaping in :global expressions" begin
+    m = @__MODULE__
+    @test @_macroexpand(global x::T) == :(global x::$(GlobalRef(m, :T)))
+    @test @_macroexpand(global (x, $(esc(:y)))) == :(global (x, y))
+    @test @_macroexpand(global (x::S, $(esc(:y))::$(esc(:T)))) ==
+        :(global (x::$(GlobalRef(m, :S)), y::T))
+    @test @_macroexpand(global (; x, $(esc(:y)))) == :(global (; x, y))
+    @test @_macroexpand(global (; x::S, $(esc(:y))::$(esc(:T)))) ==
+        :(global (; x::$(GlobalRef(m, :S)), y::T))
+
+    @test @_macroexpand(global x::T = a) == :(global x::$(GlobalRef(m, :T)) = $(GlobalRef(m, :a)))
+    @test @_macroexpand(global (x, $(esc(:y))) = a) == :(global (x, y) = $(GlobalRef(m, :a)))
+    @test @_macroexpand(global (x::S, $(esc(:y))::$(esc(:T))) = a) ==
+        :(global (x::$(GlobalRef(m, :S)), y::T) = $(GlobalRef(m, :a)))
+    @test @_macroexpand(global (; x, $(esc(:y))) = a) == :(global (; x, y) = $(GlobalRef(m, :a)))
+    @test @_macroexpand(global (; x::S, $(esc(:y))::$(esc(:T))) = a) ==
+        :(global (; x::$(GlobalRef(m, :S)), y::T) = $(GlobalRef(m, :a)))
+end
+
+# issue #49920
+let line1 = (quote end).args[1],
+    line2 = (quote end).args[1],
+    line3 = (quote end).args[1]
+    @test 1 === eval(Meta.lower(Main, Expr(:block, line1, 1, line2, line3)))
+end
+
+# issue #49984
+macro z49984(s); :(let a; $(esc(s)); end); end
+@test let a = 1; @z49984(a) === 1; end
+
+# issues #37783, #39929, #42552, #43379, and #48332
+let x = 1 => 2
+    @test_throws ErrorException @eval a => b = 2
+    @test_throws "function Base.=> must be explicitly imported to be extended" @eval a => b = 2
+end
+
+# Splatting in non-final default value (Ref #50518)
+for expr in (quote
+    function g1(a=(1,2)..., b...=3)
+        b
+    end
+end,quote
+    function g2(a=(1,2)..., b=3, c=4)
+        (b, c)
+    end
+end,quote
+    function g3(a=(1,2)..., b=3, c...=4)
+        (b, c)
+    end
+end)
+    let exc = try eval(expr); catch exc; exc end
+        @test isa(exc, ErrorException)
+        @test startswith(exc.msg, "syntax: invalid \"...\" in non-final positional argument default value")
+    end
+end
+
+# Test that bad lowering does not segfault (ref #50518)
+@test_throws ErrorException("syntax: Attempted to use slot marked unused") @eval function funused50518(::Float64)
+    $(Symbol("#unused#"))
+end
+
+@testset "public keyword" begin
+    p(str) = Base.remove_linenums!(Meta.parse(str))
+    # tests ported from JuliaSyntax.jl
+    @test p("function f(public)\n    public + 3\nend") == Expr(:function, Expr(:call, :f, :public), Expr(:block, Expr(:call, :+, :public, 3)))
+    @test p("public A, B") == Expr(:public, :A, :B)
+    @test p("if true \n public *= 4 \n end") == Expr(:if, true, Expr(:block, Expr(:*=, :public, 4)))
+    @test p("module Mod\n public A, B \n end") == Expr(:module, true, :Mod, Expr(:block, Expr(:public, :A, :B)))
+    @test p("module Mod2\n a = 3; b = 6; public a, b\n end") == Expr(:module, true, :Mod2, Expr(:block, Expr(:(=), :a, 3), Expr(:(=), :b, 6), Expr(:public, :a, :b)))
+    @test p("a = 3; b = 6; public a, b") == Expr(:toplevel, Expr(:(=), :a, 3), Expr(:(=), :b, 6), Expr(:public, :a, :b))
+    @test_throws Meta.ParseError p("begin \n public A, B \n end")
+    @test_throws Meta.ParseError p("if true \n public A, B \n end")
+    @test_throws Meta.ParseError p("public export=true foo, bar")
+    @test_throws Meta.ParseError p("public experimental=true foo, bar")
+    @test p("public(x::String) = false") == Expr(:(=), Expr(:call, :public, Expr(:(::), :x, :String)), Expr(:block, false))
+    @test p("module M; export @a; end") == Expr(:module, true, :M, Expr(:block, Expr(:export, :var"@a")))
+    @test p("module M; public @a; end") == Expr(:module, true, :M, Expr(:block, Expr(:public, :var"@a")))
+    @test p("module M; export ⤈; end") == Expr(:module, true, :M, Expr(:block, Expr(:export, :⤈)))
+    @test p("module M; public ⤈; end") == Expr(:module, true, :M, Expr(:block, Expr(:public, :⤈)))
+    @test p("public = 4") == Expr(:(=), :public, 4)
+    @test p("public[7] = 5") == Expr(:(=), Expr(:ref, :public, 7), 5)
+    @test p("public() = 6") == Expr(:(=), Expr(:call, :public), Expr(:block, 6))
+end
diff --git a/test/sysinfo.jl b/test/sysinfo.jl
index e423f6071c9e0..8864e3a48efc7 100644
--- a/test/sysinfo.jl
+++ b/test/sysinfo.jl
@@ -9,3 +9,48 @@ Base.Sys.loadavg()
 
 @test Base.libllvm_path() isa Symbol
 @test contains(String(Base.libllvm_path()), "LLVM")
+
+@test length(ccall(:jl_get_cpu_name, String, ())) != 0
+@test length(ccall(:jl_get_cpu_features, String, ())) >= 0
+foo_fma() = Core.Intrinsics.have_fma(Int64)
+@test ccall(:jl_cpu_has_fma, Bool, (Cint,), 64) == foo_fma()
+
+if Sys.isunix()
+    mktempdir() do tempdir
+        firstdir = joinpath(tempdir, "first")
+        seconddir = joinpath(tempdir, "second")
+
+        mkpath(firstdir)
+        mkpath(seconddir)
+
+        touch(joinpath(firstdir, "foo"))
+        touch(joinpath(seconddir, "foo"))
+
+        chmod(joinpath(firstdir, "foo"), 0o777)
+        chmod(joinpath(seconddir, "foo"), 0o777)
+
+        # zero permissions on first directory
+        chmod(firstdir, 0o000)
+
+        original_path = ENV["PATH"]
+        ENV["PATH"] = string(firstdir, ":", seconddir, ":", original_path)
+        try
+            @test abspath(Base.Sys.which("foo")) == abspath(joinpath(seconddir, "foo"))
+        finally
+            # clean up
+            chmod(firstdir, 0o777)
+            ENV["PATH"] = original_path
+        end
+    end
+end
+
+@testset "username()" begin
+    if Sys.isunix()
+        passwd = Libc.getpwuid(Libc.getuid())
+        @test Sys.username() == passwd.username
+    elseif Sys.iswindows()
+        @test Sys.username() == ENV["USERNAME"]
+    else
+        @test !isempty(Sys.username())
+    end
+end
diff --git a/test/terminfo.jl b/test/terminfo.jl
new file mode 100644
index 0000000000000..cbaab346a617b
--- /dev/null
+++ b/test/terminfo.jl
@@ -0,0 +1,911 @@
+let
+    dumb_terminfo = UInt8[
+        0x1a, 0x01, 0x18, 0x00, 0x02, 0x00, 0x01, 0x00, 0x82, 0x00, 0x08, 0x00,
+        0x64, 0x75, 0x6d, 0x62, 0x7c, 0x38, 0x30, 0x2d, 0x63, 0x6f, 0x6c, 0x75,
+        0x6d, 0x6e, 0x20, 0x64, 0x75, 0x6d, 0x62, 0x20, 0x74, 0x74, 0x79, 0x00,
+        0x00, 0x01, 0x50, 0x00, 0xff, 0xff, 0x00, 0x00, 0x02, 0x00, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0x04, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x06, 0x00,
+        0x07, 0x00, 0x0d, 0x00, 0x0a, 0x00, 0x0a, 0x00]
+
+    dumb_capabilities = Dict{Symbol, Union{Bool, Int, String}}(
+        :am => true,
+        :auto_right_margin => true,
+        :bw => false,
+        :auto_left_margin => false,
+        :bel => "\a",
+        :bell => "\a",
+        :cr => "\r",
+        :carriage_return => "\r",
+        :cols => 80,
+        :columns => 80,
+        :cud1 => "\n",
+        :cursor_down => "\n",
+        :ind => "\n",
+        :scroll_forward => "\n")
+
+    xterm_terminfo = UInt8[
+        0x1a, 0x01, 0x30, 0x00, 0x26, 0x00, 0x0f, 0x00, 0x9d, 0x01, 0xe6, 0x05,
+        0x78, 0x74, 0x65, 0x72, 0x6d, 0x7c, 0x78, 0x74, 0x65, 0x72, 0x6d, 0x20,
+        0x74, 0x65, 0x72, 0x6d, 0x69, 0x6e, 0x61, 0x6c, 0x20, 0x65, 0x6d, 0x75,
+        0x6c, 0x61, 0x74, 0x6f, 0x72, 0x20, 0x28, 0x58, 0x20, 0x57, 0x69, 0x6e,
+        0x64, 0x6f, 0x77, 0x20, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x29, 0x00,
+        0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+        0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x50, 0x00, 0x08, 0x00, 0x18, 0x00, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0x08, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x00,
+        0x06, 0x00, 0x08, 0x00, 0x19, 0x00, 0x1e, 0x00, 0x26, 0x00, 0x2a, 0x00,
+        0x2e, 0x00, 0xff, 0xff, 0x39, 0x00, 0x4a, 0x00, 0x4c, 0x00, 0x50, 0x00,
+        0x57, 0x00, 0xff, 0xff, 0x59, 0x00, 0x66, 0x00, 0xff, 0xff, 0x6a, 0x00,
+        0x6e, 0x00, 0x78, 0x00, 0x7c, 0x00, 0xff, 0xff, 0xff, 0xff, 0x80, 0x00,
+        0x84, 0x00, 0x89, 0x00, 0x8e, 0x00, 0xff, 0xff, 0xa0, 0x00, 0xa5, 0x00,
+        0xaa, 0x00, 0xff, 0xff, 0xaf, 0x00, 0xb4, 0x00, 0xb9, 0x00, 0xbe, 0x00,
+        0xc7, 0x00, 0xcb, 0x00, 0xd2, 0x00, 0xff, 0xff, 0xe4, 0x00, 0xe9, 0x00,
+        0xef, 0x00, 0xf5, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0x01,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x19, 0x01, 0xff, 0xff, 0x1d, 0x01,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x01, 0xff, 0xff, 0x24, 0x01,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x28, 0x01, 0x2c, 0x01,
+        0x32, 0x01, 0x36, 0x01, 0x3a, 0x01, 0x3e, 0x01, 0x44, 0x01, 0x4a, 0x01,
+        0x50, 0x01, 0x56, 0x01, 0x5c, 0x01, 0x60, 0x01, 0xff, 0xff, 0x65, 0x01,
+        0xff, 0xff, 0x69, 0x01, 0x6e, 0x01, 0x73, 0x01, 0x77, 0x01, 0x7e, 0x01,
+        0xff, 0xff, 0x85, 0x01, 0x89, 0x01, 0x91, 0x01, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x99, 0x01, 0xa2, 0x01, 0xff, 0xff,
+        0xff, 0xff, 0xab, 0x01, 0xb4, 0x01, 0xbd, 0x01, 0xc6, 0x01, 0xcf, 0x01,
+        0xd8, 0x01, 0xe1, 0x01, 0xea, 0x01, 0xf3, 0x01, 0xfc, 0x01, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0x05, 0x02, 0x09, 0x02, 0x0e, 0x02, 0x13, 0x02,
+        0x27, 0x02, 0x2a, 0x02, 0xff, 0xff, 0xff, 0xff, 0x3c, 0x02, 0x3f, 0x02,
+        0x4a, 0x02, 0x4d, 0x02, 0x4f, 0x02, 0x52, 0x02, 0xaf, 0x02, 0xff, 0xff,
+        0xb2, 0x02, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb4, 0x02,
+        0xb8, 0x02, 0xbc, 0x02, 0xc0, 0x02, 0xc4, 0x02, 0xff, 0xff, 0xff, 0xff,
+        0xc8, 0x02, 0xff, 0xff, 0xfd, 0x02, 0xff, 0xff, 0xff, 0xff, 0x01, 0x03,
+        0x07, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0x0d, 0x03, 0x11, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x15, 0x03, 0xff, 0xff, 0xff, 0xff,
+        0x1c, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x23, 0x03,
+        0x2a, 0x03, 0x31, 0x03, 0xff, 0xff, 0xff, 0xff, 0x38, 0x03, 0xff, 0xff,
+        0x3f, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x46, 0x03, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x4d, 0x03, 0x53, 0x03,
+        0x59, 0x03, 0x60, 0x03, 0x67, 0x03, 0x6e, 0x03, 0x75, 0x03, 0x7d, 0x03,
+        0x85, 0x03, 0x8d, 0x03, 0x95, 0x03, 0x9d, 0x03, 0xa5, 0x03, 0xad, 0x03,
+        0xb5, 0x03, 0xbc, 0x03, 0xc3, 0x03, 0xca, 0x03, 0xd1, 0x03, 0xd9, 0x03,
+        0xe1, 0x03, 0xe9, 0x03, 0xf1, 0x03, 0xf9, 0x03, 0x01, 0x04, 0x09, 0x04,
+        0x11, 0x04, 0x18, 0x04, 0x1f, 0x04, 0x26, 0x04, 0x2d, 0x04, 0x35, 0x04,
+        0x3d, 0x04, 0x45, 0x04, 0x4d, 0x04, 0x55, 0x04, 0x5d, 0x04, 0x65, 0x04,
+        0x6d, 0x04, 0x74, 0x04, 0x7b, 0x04, 0x82, 0x04, 0x89, 0x04, 0x91, 0x04,
+        0x99, 0x04, 0xa1, 0x04, 0xa9, 0x04, 0xb1, 0x04, 0xb9, 0x04, 0xc1, 0x04,
+        0xc9, 0x04, 0xd0, 0x04, 0xd7, 0x04, 0xde, 0x04, 0xe3, 0x04, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xea, 0x04, 0xf5, 0x04, 0xfa, 0x04,
+        0x0d, 0x05, 0x11, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0x1a, 0x05, 0x60, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa6, 0x05, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xab, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb1, 0x05,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb5, 0x05, 0xbf, 0x05, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xc9, 0x05, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xe0, 0x05, 0xe3, 0x05, 0x1b, 0x5b, 0x5a, 0x00, 0x07, 0x00,
+        0x0d, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31, 0x25, 0x64, 0x3b,
+        0x25, 0x70, 0x32, 0x25, 0x64, 0x72, 0x00, 0x1b, 0x5b, 0x33, 0x67, 0x00,
+        0x1b, 0x5b, 0x48, 0x1b, 0x5b, 0x32, 0x4a, 0x00, 0x1b, 0x5b, 0x4b, 0x00,
+        0x1b, 0x5b, 0x4a, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31, 0x25,
+        0x64, 0x47, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31, 0x25, 0x64,
+        0x3b, 0x25, 0x70, 0x32, 0x25, 0x64, 0x48, 0x00, 0x0a, 0x00, 0x1b, 0x5b,
+        0x48, 0x00, 0x1b, 0x5b, 0x3f, 0x32, 0x35, 0x6c, 0x00, 0x08, 0x00, 0x1b,
+        0x5b, 0x3f, 0x31, 0x32, 0x6c, 0x1b, 0x5b, 0x3f, 0x32, 0x35, 0x68, 0x00,
+        0x1b, 0x5b, 0x43, 0x00, 0x1b, 0x5b, 0x41, 0x00, 0x1b, 0x5b, 0x3f, 0x31,
+        0x32, 0x3b, 0x32, 0x35, 0x68, 0x00, 0x1b, 0x5b, 0x50, 0x00, 0x1b, 0x5b,
+        0x4d, 0x00, 0x1b, 0x28, 0x30, 0x00, 0x1b, 0x5b, 0x35, 0x6d, 0x00, 0x1b,
+        0x5b, 0x31, 0x6d, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x30, 0x34, 0x39, 0x68,
+        0x1b, 0x5b, 0x32, 0x32, 0x3b, 0x30, 0x3b, 0x30, 0x74, 0x00, 0x1b, 0x5b,
+        0x32, 0x6d, 0x00, 0x1b, 0x5b, 0x34, 0x68, 0x00, 0x1b, 0x5b, 0x38, 0x6d,
+        0x00, 0x1b, 0x5b, 0x37, 0x6d, 0x00, 0x1b, 0x5b, 0x37, 0x6d, 0x00, 0x1b,
+        0x5b, 0x34, 0x6d, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x58,
+        0x00, 0x1b, 0x28, 0x42, 0x00, 0x1b, 0x28, 0x42, 0x1b, 0x5b, 0x6d, 0x00,
+        0x1b, 0x5b, 0x3f, 0x31, 0x30, 0x34, 0x39, 0x6c, 0x1b, 0x5b, 0x32, 0x33,
+        0x3b, 0x30, 0x3b, 0x30, 0x74, 0x00, 0x1b, 0x5b, 0x34, 0x6c, 0x00, 0x1b,
+        0x5b, 0x32, 0x37, 0x6d, 0x00, 0x1b, 0x5b, 0x32, 0x34, 0x6d, 0x00, 0x1b,
+        0x5b, 0x3f, 0x35, 0x68, 0x24, 0x3c, 0x31, 0x30, 0x30, 0x2f, 0x3e, 0x1b,
+        0x5b, 0x3f, 0x35, 0x6c, 0x00, 0x1b, 0x5b, 0x21, 0x70, 0x1b, 0x5b, 0x3f,
+        0x33, 0x3b, 0x34, 0x6c, 0x1b, 0x5b, 0x34, 0x6c, 0x1b, 0x3e, 0x00, 0x1b,
+        0x5b, 0x4c, 0x00, 0x08, 0x00, 0x1b, 0x5b, 0x33, 0x7e, 0x00, 0x1b, 0x4f,
+        0x42, 0x00, 0x1b, 0x4f, 0x50, 0x00, 0x1b, 0x5b, 0x32, 0x31, 0x7e, 0x00,
+        0x1b, 0x4f, 0x51, 0x00, 0x1b, 0x4f, 0x52, 0x00, 0x1b, 0x4f, 0x53, 0x00,
+        0x1b, 0x5b, 0x31, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x37, 0x7e, 0x00,
+        0x1b, 0x5b, 0x31, 0x38, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x39, 0x7e, 0x00,
+        0x1b, 0x5b, 0x32, 0x30, 0x7e, 0x00, 0x1b, 0x4f, 0x48, 0x00, 0x1b, 0x5b,
+        0x32, 0x7e, 0x00, 0x1b, 0x4f, 0x44, 0x00, 0x1b, 0x5b, 0x36, 0x7e, 0x00,
+        0x1b, 0x5b, 0x35, 0x7e, 0x00, 0x1b, 0x4f, 0x43, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x32, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x41, 0x00, 0x1b,
+        0x4f, 0x41, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x6c, 0x1b, 0x3e, 0x00, 0x1b,
+        0x5b, 0x3f, 0x31, 0x68, 0x1b, 0x3d, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x30,
+        0x33, 0x34, 0x6c, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x30, 0x33, 0x34, 0x68,
+        0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x50, 0x00, 0x1b, 0x5b,
+        0x25, 0x70, 0x31, 0x25, 0x64, 0x4d, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31,
+        0x25, 0x64, 0x42, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x40,
+        0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x53, 0x00, 0x1b, 0x5b,
+        0x25, 0x70, 0x31, 0x25, 0x64, 0x4c, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31,
+        0x25, 0x64, 0x44, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x43,
+        0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x54, 0x00, 0x1b, 0x5b,
+        0x25, 0x70, 0x31, 0x25, 0x64, 0x41, 0x00, 0x1b, 0x5b, 0x69, 0x00, 0x1b,
+        0x5b, 0x34, 0x69, 0x00, 0x1b, 0x5b, 0x35, 0x69, 0x00, 0x25, 0x70, 0x31,
+        0x25, 0x63, 0x1b, 0x5b, 0x25, 0x70, 0x32, 0x25, 0x7b, 0x31, 0x7d, 0x25,
+        0x2d, 0x25, 0x64, 0x62, 0x00, 0x1b, 0x63, 0x00, 0x1b, 0x5b, 0x21, 0x70,
+        0x1b, 0x5b, 0x3f, 0x33, 0x3b, 0x34, 0x6c, 0x1b, 0x5b, 0x34, 0x6c, 0x1b,
+        0x3e, 0x00, 0x1b, 0x38, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31,
+        0x25, 0x64, 0x64, 0x00, 0x1b, 0x37, 0x00, 0x0a, 0x00, 0x1b, 0x4d, 0x00,
+        0x25, 0x3f, 0x25, 0x70, 0x39, 0x25, 0x74, 0x1b, 0x28, 0x30, 0x25, 0x65,
+        0x1b, 0x28, 0x42, 0x25, 0x3b, 0x1b, 0x5b, 0x30, 0x25, 0x3f, 0x25, 0x70,
+        0x36, 0x25, 0x74, 0x3b, 0x31, 0x25, 0x3b, 0x25, 0x3f, 0x25, 0x70, 0x35,
+        0x25, 0x74, 0x3b, 0x32, 0x25, 0x3b, 0x25, 0x3f, 0x25, 0x70, 0x32, 0x25,
+        0x74, 0x3b, 0x34, 0x25, 0x3b, 0x25, 0x3f, 0x25, 0x70, 0x31, 0x25, 0x70,
+        0x33, 0x25, 0x7c, 0x25, 0x74, 0x3b, 0x37, 0x25, 0x3b, 0x25, 0x3f, 0x25,
+        0x70, 0x34, 0x25, 0x74, 0x3b, 0x35, 0x25, 0x3b, 0x25, 0x3f, 0x25, 0x70,
+        0x37, 0x25, 0x74, 0x3b, 0x38, 0x25, 0x3b, 0x6d, 0x00, 0x1b, 0x48, 0x00,
+        0x09, 0x00, 0x1b, 0x4f, 0x77, 0x00, 0x1b, 0x4f, 0x79, 0x00, 0x1b, 0x4f,
+        0x75, 0x00, 0x1b, 0x4f, 0x71, 0x00, 0x1b, 0x4f, 0x73, 0x00, 0x60, 0x60,
+        0x61, 0x61, 0x66, 0x66, 0x67, 0x67, 0x69, 0x69, 0x6a, 0x6a, 0x6b, 0x6b,
+        0x6c, 0x6c, 0x6d, 0x6d, 0x6e, 0x6e, 0x6f, 0x6f, 0x70, 0x70, 0x71, 0x71,
+        0x72, 0x72, 0x73, 0x73, 0x74, 0x74, 0x75, 0x75, 0x76, 0x76, 0x77, 0x77,
+        0x78, 0x78, 0x79, 0x79, 0x7a, 0x7a, 0x7b, 0x7b, 0x7c, 0x7c, 0x7d, 0x7d,
+        0x7e, 0x7e, 0x00, 0x1b, 0x5b, 0x5a, 0x00, 0x1b, 0x5b, 0x3f, 0x37, 0x68,
+        0x00, 0x1b, 0x5b, 0x3f, 0x37, 0x6c, 0x00, 0x1b, 0x4f, 0x46, 0x00, 0x1b,
+        0x4f, 0x4d, 0x00, 0x1b, 0x5b, 0x33, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b,
+        0x31, 0x3b, 0x32, 0x46, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x48, 0x00,
+        0x1b, 0x5b, 0x32, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32,
+        0x44, 0x00, 0x1b, 0x5b, 0x36, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x35,
+        0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x43, 0x00, 0x1b,
+        0x5b, 0x32, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x34, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x32, 0x50, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x51,
+        0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x32, 0x52, 0x00, 0x1b, 0x5b, 0x31, 0x3b,
+        0x32, 0x53, 0x00, 0x1b, 0x5b, 0x31, 0x35, 0x3b, 0x32, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x37, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x38, 0x3b,
+        0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x39, 0x3b, 0x32, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x30, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x31, 0x3b,
+        0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x33, 0x3b, 0x32, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x34, 0x3b, 0x32, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35,
+        0x50, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x51, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x35, 0x52, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x53, 0x00, 0x1b,
+        0x5b, 0x31, 0x35, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x37, 0x3b,
+        0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x38, 0x3b, 0x35, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x39, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x30, 0x3b,
+        0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x31, 0x3b, 0x35, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x33, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x34, 0x3b,
+        0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x50, 0x00, 0x1b, 0x5b,
+        0x31, 0x3b, 0x36, 0x51, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x52, 0x00,
+        0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x53, 0x00, 0x1b, 0x5b, 0x31, 0x35, 0x3b,
+        0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x37, 0x3b, 0x36, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x38, 0x3b, 0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x39, 0x3b,
+        0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x30, 0x3b, 0x36, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x31, 0x3b, 0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x33, 0x3b,
+        0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x34, 0x3b, 0x36, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x33, 0x50, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x51,
+        0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x52, 0x00, 0x1b, 0x5b, 0x31, 0x3b,
+        0x33, 0x53, 0x00, 0x1b, 0x5b, 0x31, 0x35, 0x3b, 0x33, 0x7e, 0x00, 0x1b,
+        0x5b, 0x31, 0x37, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x38, 0x3b,
+        0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x39, 0x3b, 0x33, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x30, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x31, 0x3b,
+        0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x33, 0x3b, 0x33, 0x7e, 0x00, 0x1b,
+        0x5b, 0x32, 0x34, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x34,
+        0x50, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x34, 0x51, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x34, 0x52, 0x00, 0x1b, 0x5b, 0x31, 0x4b, 0x00, 0x1b, 0x5b, 0x3f,
+        0x36, 0x39, 0x6c, 0x00, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x64, 0x3b, 0x25,
+        0x64, 0x52, 0x00, 0x1b, 0x5b, 0x36, 0x6e, 0x00, 0x1b, 0x5b, 0x3f, 0x25,
+        0x5b, 0x3b, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+        0x5d, 0x63, 0x00, 0x1b, 0x5b, 0x63, 0x00, 0x1b, 0x5b, 0x33, 0x39, 0x3b,
+        0x34, 0x39, 0x6d, 0x00, 0x1b, 0x5b, 0x33, 0x25, 0x3f, 0x25, 0x70, 0x31,
+        0x25, 0x7b, 0x31, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x34, 0x25, 0x65, 0x25,
+        0x70, 0x31, 0x25, 0x7b, 0x33, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x36, 0x25,
+        0x65, 0x25, 0x70, 0x31, 0x25, 0x7b, 0x34, 0x7d, 0x25, 0x3d, 0x25, 0x74,
+        0x31, 0x25, 0x65, 0x25, 0x70, 0x31, 0x25, 0x7b, 0x36, 0x7d, 0x25, 0x3d,
+        0x25, 0x74, 0x33, 0x25, 0x65, 0x25, 0x70, 0x31, 0x25, 0x64, 0x25, 0x3b,
+        0x6d, 0x00, 0x1b, 0x5b, 0x34, 0x25, 0x3f, 0x25, 0x70, 0x31, 0x25, 0x7b,
+        0x31, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x34, 0x25, 0x65, 0x25, 0x70, 0x31,
+        0x25, 0x7b, 0x33, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x36, 0x25, 0x65, 0x25,
+        0x70, 0x31, 0x25, 0x7b, 0x34, 0x7d, 0x25, 0x3d, 0x25, 0x74, 0x31, 0x25,
+        0x65, 0x25, 0x70, 0x31, 0x25, 0x7b, 0x36, 0x7d, 0x25, 0x3d, 0x25, 0x74,
+        0x33, 0x25, 0x65, 0x25, 0x70, 0x31, 0x25, 0x64, 0x25, 0x3b, 0x6d, 0x00,
+        0x1b, 0x5b, 0x33, 0x6d, 0x00, 0x1b, 0x5b, 0x32, 0x33, 0x6d, 0x00, 0x1b,
+        0x5b, 0x3c, 0x00, 0x1b, 0x5b, 0x33, 0x25, 0x70, 0x31, 0x25, 0x64, 0x6d,
+        0x00, 0x1b, 0x5b, 0x34, 0x25, 0x70, 0x31, 0x25, 0x64, 0x6d, 0x00, 0x1b,
+        0x5b, 0x3f, 0x36, 0x39, 0x68, 0x1b, 0x5b, 0x25, 0x69, 0x25, 0x70, 0x31,
+        0x25, 0x64, 0x3b, 0x25, 0x70, 0x32, 0x25, 0x64, 0x73, 0x00, 0x1b, 0x6c,
+        0x00, 0x1b, 0x6d, 0x00, 0x02, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x96, 0x00,
+        0xac, 0x03, 0x01, 0x01, 0x00, 0x00, 0x07, 0x00, 0x13, 0x00, 0x18, 0x00,
+        0x2a, 0x00, 0x30, 0x00, 0x3a, 0x00, 0x5a, 0x00, 0x61, 0x00, 0x68, 0x00,
+        0x6f, 0x00, 0x76, 0x00, 0x7d, 0x00, 0x84, 0x00, 0x8b, 0x00, 0x92, 0x00,
+        0x99, 0x00, 0xa0, 0x00, 0xa7, 0x00, 0xae, 0x00, 0xb5, 0x00, 0xbc, 0x00,
+        0xc3, 0x00, 0xca, 0x00, 0xd1, 0x00, 0xd8, 0x00, 0xdf, 0x00, 0xe6, 0x00,
+        0xed, 0x00, 0xf4, 0x00, 0xfb, 0x00, 0x02, 0x01, 0x09, 0x01, 0x10, 0x01,
+        0x17, 0x01, 0x1e, 0x01, 0x25, 0x01, 0x2c, 0x01, 0x33, 0x01, 0x3a, 0x01,
+        0x41, 0x01, 0x48, 0x01, 0x4f, 0x01, 0x56, 0x01, 0x5d, 0x01, 0x64, 0x01,
+        0x6b, 0x01, 0x72, 0x01, 0x79, 0x01, 0x80, 0x01, 0x87, 0x01, 0x8e, 0x01,
+        0x95, 0x01, 0x9c, 0x01, 0xa3, 0x01, 0xaa, 0x01, 0xb1, 0x01, 0xb8, 0x01,
+        0xbf, 0x01, 0xc6, 0x01, 0xca, 0x01, 0xce, 0x01, 0xd2, 0x01, 0xd6, 0x01,
+        0xda, 0x01, 0xde, 0x01, 0xe2, 0x01, 0xe6, 0x01, 0xea, 0x01, 0xee, 0x01,
+        0xf2, 0x01, 0xf6, 0x01, 0xfc, 0x01, 0x01, 0x02, 0x00, 0x00, 0x03, 0x00,
+        0x06, 0x00, 0x09, 0x00, 0x0c, 0x00, 0x0f, 0x00, 0x12, 0x00, 0x15, 0x00,
+        0x18, 0x00, 0x1b, 0x00, 0x20, 0x00, 0x25, 0x00, 0x2a, 0x00, 0x2f, 0x00,
+        0x34, 0x00, 0x38, 0x00, 0x3d, 0x00, 0x42, 0x00, 0x47, 0x00, 0x4c, 0x00,
+        0x51, 0x00, 0x57, 0x00, 0x5d, 0x00, 0x63, 0x00, 0x69, 0x00, 0x6f, 0x00,
+        0x75, 0x00, 0x7b, 0x00, 0x81, 0x00, 0x87, 0x00, 0x8d, 0x00, 0x92, 0x00,
+        0x97, 0x00, 0x9c, 0x00, 0xa1, 0x00, 0xa6, 0x00, 0xac, 0x00, 0xb2, 0x00,
+        0xb8, 0x00, 0xbe, 0x00, 0xc4, 0x00, 0xca, 0x00, 0xd0, 0x00, 0xd6, 0x00,
+        0xdc, 0x00, 0xe2, 0x00, 0xe8, 0x00, 0xee, 0x00, 0xf4, 0x00, 0xfa, 0x00,
+        0x00, 0x01, 0x06, 0x01, 0x0c, 0x01, 0x12, 0x01, 0x18, 0x01, 0x1e, 0x01,
+        0x22, 0x01, 0x27, 0x01, 0x2c, 0x01, 0x31, 0x01, 0x36, 0x01, 0x3b, 0x01,
+        0x3f, 0x01, 0x43, 0x01, 0x47, 0x01, 0x4b, 0x01, 0x4f, 0x01, 0x55, 0x01,
+        0x5b, 0x01, 0x61, 0x01, 0x67, 0x01, 0x6d, 0x01, 0x73, 0x01, 0x79, 0x01,
+        0x7e, 0x01, 0x83, 0x01, 0x1b, 0x5d, 0x31, 0x31, 0x32, 0x07, 0x00, 0x1b,
+        0x5d, 0x31, 0x32, 0x3b, 0x25, 0x70, 0x31, 0x25, 0x73, 0x07, 0x00, 0x1b,
+        0x5b, 0x33, 0x4a, 0x00, 0x1b, 0x5d, 0x35, 0x32, 0x3b, 0x25, 0x70, 0x31,
+        0x25, 0x73, 0x3b, 0x25, 0x70, 0x32, 0x25, 0x73, 0x07, 0x00, 0x1b, 0x5b,
+        0x32, 0x20, 0x71, 0x00, 0x1b, 0x5b, 0x25, 0x70, 0x31, 0x25, 0x64, 0x20,
+        0x71, 0x00, 0x1b, 0x5b, 0x3f, 0x31, 0x30, 0x30, 0x36, 0x3b, 0x31, 0x30,
+        0x30, 0x30, 0x25, 0x3f, 0x25, 0x70, 0x31, 0x25, 0x7b, 0x31, 0x7d, 0x25,
+        0x3d, 0x25, 0x74, 0x68, 0x25, 0x65, 0x6c, 0x25, 0x3b, 0x00, 0x1b, 0x5b,
+        0x33, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x33, 0x3b, 0x34, 0x7e, 0x00,
+        0x1b, 0x5b, 0x33, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x33, 0x3b, 0x36,
+        0x7e, 0x00, 0x1b, 0x5b, 0x33, 0x3b, 0x37, 0x7e, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x32, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x42, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x34, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x42,
+        0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b,
+        0x37, 0x42, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x46, 0x00, 0x1b, 0x5b,
+        0x31, 0x3b, 0x34, 0x46, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x46, 0x00,
+        0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x46, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37,
+        0x46, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x48, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x34, 0x48, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x48, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x36, 0x48, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37, 0x48,
+        0x00, 0x1b, 0x5b, 0x32, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x3b,
+        0x34, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b,
+        0x32, 0x3b, 0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x32, 0x3b, 0x37, 0x7e, 0x00,
+        0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x44, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x34,
+        0x44, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x44, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x36, 0x44, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37, 0x44, 0x00, 0x1b,
+        0x5b, 0x36, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x36, 0x3b, 0x34, 0x7e,
+        0x00, 0x1b, 0x5b, 0x36, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x36, 0x3b,
+        0x36, 0x7e, 0x00, 0x1b, 0x5b, 0x36, 0x3b, 0x37, 0x7e, 0x00, 0x1b, 0x5b,
+        0x35, 0x3b, 0x33, 0x7e, 0x00, 0x1b, 0x5b, 0x35, 0x3b, 0x34, 0x7e, 0x00,
+        0x1b, 0x5b, 0x35, 0x3b, 0x35, 0x7e, 0x00, 0x1b, 0x5b, 0x35, 0x3b, 0x36,
+        0x7e, 0x00, 0x1b, 0x5b, 0x35, 0x3b, 0x37, 0x7e, 0x00, 0x1b, 0x5b, 0x31,
+        0x3b, 0x33, 0x43, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x34, 0x43, 0x00, 0x1b,
+        0x5b, 0x31, 0x3b, 0x35, 0x43, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x43,
+        0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37, 0x43, 0x00, 0x1b, 0x5b, 0x31, 0x3b,
+        0x32, 0x41, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x41, 0x00, 0x1b, 0x5b,
+        0x31, 0x3b, 0x34, 0x41, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x35, 0x41, 0x00,
+        0x1b, 0x5b, 0x31, 0x3b, 0x36, 0x41, 0x00, 0x1b, 0x5b, 0x31, 0x3b, 0x37,
+        0x41, 0x00, 0x1b, 0x4f, 0x78, 0x00, 0x1b, 0x4f, 0x74, 0x00, 0x1b, 0x4f,
+        0x76, 0x00, 0x1b, 0x4f, 0x72, 0x00, 0x1b, 0x4f, 0x45, 0x00, 0x1b, 0x4f,
+        0x6b, 0x00, 0x1b, 0x4f, 0x6c, 0x00, 0x1b, 0x4f, 0x6f, 0x00, 0x1b, 0x4f,
+        0x6e, 0x00, 0x1b, 0x4f, 0x6a, 0x00, 0x1b, 0x4f, 0x6d, 0x00, 0x1b, 0x4f,
+        0x70, 0x00, 0x1b, 0x5b, 0x32, 0x39, 0x6d, 0x00, 0x1b, 0x5b, 0x39, 0x6d,
+        0x00, 0x1b, 0x5b, 0x3c, 0x25, 0x69, 0x25, 0x70, 0x33, 0x25, 0x64, 0x3b,
+        0x25, 0x70, 0x31, 0x25, 0x64, 0x3b, 0x25, 0x70, 0x32, 0x25, 0x64, 0x3b,
+        0x25, 0x3f, 0x25, 0x70, 0x34, 0x25, 0x74, 0x4d, 0x25, 0x65, 0x6d, 0x25,
+        0x3b, 0x00, 0x41, 0x58, 0x00, 0x58, 0x54, 0x00, 0x43, 0x72, 0x00, 0x43,
+        0x73, 0x00, 0x45, 0x33, 0x00, 0x4d, 0x73, 0x00, 0x53, 0x65, 0x00, 0x53,
+        0x73, 0x00, 0x58, 0x4d, 0x00, 0x6b, 0x44, 0x43, 0x33, 0x00, 0x6b, 0x44,
+        0x43, 0x34, 0x00, 0x6b, 0x44, 0x43, 0x35, 0x00, 0x6b, 0x44, 0x43, 0x36,
+        0x00, 0x6b, 0x44, 0x43, 0x37, 0x00, 0x6b, 0x44, 0x4e, 0x00, 0x6b, 0x44,
+        0x4e, 0x33, 0x00, 0x6b, 0x44, 0x4e, 0x34, 0x00, 0x6b, 0x44, 0x4e, 0x35,
+        0x00, 0x6b, 0x44, 0x4e, 0x36, 0x00, 0x6b, 0x44, 0x4e, 0x37, 0x00, 0x6b,
+        0x45, 0x4e, 0x44, 0x33, 0x00, 0x6b, 0x45, 0x4e, 0x44, 0x34, 0x00, 0x6b,
+        0x45, 0x4e, 0x44, 0x35, 0x00, 0x6b, 0x45, 0x4e, 0x44, 0x36, 0x00, 0x6b,
+        0x45, 0x4e, 0x44, 0x37, 0x00, 0x6b, 0x48, 0x4f, 0x4d, 0x33, 0x00, 0x6b,
+        0x48, 0x4f, 0x4d, 0x34, 0x00, 0x6b, 0x48, 0x4f, 0x4d, 0x35, 0x00, 0x6b,
+        0x48, 0x4f, 0x4d, 0x36, 0x00, 0x6b, 0x48, 0x4f, 0x4d, 0x37, 0x00, 0x6b,
+        0x49, 0x43, 0x33, 0x00, 0x6b, 0x49, 0x43, 0x34, 0x00, 0x6b, 0x49, 0x43,
+        0x35, 0x00, 0x6b, 0x49, 0x43, 0x36, 0x00, 0x6b, 0x49, 0x43, 0x37, 0x00,
+        0x6b, 0x4c, 0x46, 0x54, 0x33, 0x00, 0x6b, 0x4c, 0x46, 0x54, 0x34, 0x00,
+        0x6b, 0x4c, 0x46, 0x54, 0x35, 0x00, 0x6b, 0x4c, 0x46, 0x54, 0x36, 0x00,
+        0x6b, 0x4c, 0x46, 0x54, 0x37, 0x00, 0x6b, 0x4e, 0x58, 0x54, 0x33, 0x00,
+        0x6b, 0x4e, 0x58, 0x54, 0x34, 0x00, 0x6b, 0x4e, 0x58, 0x54, 0x35, 0x00,
+        0x6b, 0x4e, 0x58, 0x54, 0x36, 0x00, 0x6b, 0x4e, 0x58, 0x54, 0x37, 0x00,
+        0x6b, 0x50, 0x52, 0x56, 0x33, 0x00, 0x6b, 0x50, 0x52, 0x56, 0x34, 0x00,
+        0x6b, 0x50, 0x52, 0x56, 0x35, 0x00, 0x6b, 0x50, 0x52, 0x56, 0x36, 0x00,
+        0x6b, 0x50, 0x52, 0x56, 0x37, 0x00, 0x6b, 0x52, 0x49, 0x54, 0x33, 0x00,
+        0x6b, 0x52, 0x49, 0x54, 0x34, 0x00, 0x6b, 0x52, 0x49, 0x54, 0x35, 0x00,
+        0x6b, 0x52, 0x49, 0x54, 0x36, 0x00, 0x6b, 0x52, 0x49, 0x54, 0x37, 0x00,
+        0x6b, 0x55, 0x50, 0x00, 0x6b, 0x55, 0x50, 0x33, 0x00, 0x6b, 0x55, 0x50,
+        0x34, 0x00, 0x6b, 0x55, 0x50, 0x35, 0x00, 0x6b, 0x55, 0x50, 0x36, 0x00,
+        0x6b, 0x55, 0x50, 0x37, 0x00, 0x6b, 0x61, 0x32, 0x00, 0x6b, 0x62, 0x31,
+        0x00, 0x6b, 0x62, 0x33, 0x00, 0x6b, 0x63, 0x32, 0x00, 0x6b, 0x70, 0x35,
+        0x00, 0x6b, 0x70, 0x41, 0x44, 0x44, 0x00, 0x6b, 0x70, 0x43, 0x4d, 0x41,
+        0x00, 0x6b, 0x70, 0x44, 0x49, 0x56, 0x00, 0x6b, 0x70, 0x44, 0x4f, 0x54,
+        0x00, 0x6b, 0x70, 0x4d, 0x55, 0x4c, 0x00, 0x6b, 0x70, 0x53, 0x55, 0x42,
+        0x00, 0x6b, 0x70, 0x5a, 0x52, 0x4f, 0x00, 0x72, 0x6d, 0x78, 0x78, 0x00,
+        0x73, 0x6d, 0x78, 0x78, 0x00, 0x78, 0x6d, 0x00]
+
+    xterm_extensions =
+        [:kEND5, :Cs, :kDN5, :Cr, :kDC6, :kPRV6, :kDN7, :kb1, :kpZRO, :kNXT6,
+        :kLFT5, :kPRV3, :kRIT4, :kDC4, :kc2, :kp5, :kLFT6, :kIC6, :kEND6, :kIC4,
+        :kRIT7, :rmxx, :kpADD, :xm, :kNXT3, :XT, :kIC7, :kHOM4, :kDC7, :kPRV7,
+        :ka2, :kUP7, :kDN6, :kIC5, :kNXT4, :kUP5, :AX, :kpSUB, :kb3, :kDN4,
+        :kHOM5, :kHOM6, :kDN3, :kLFT4, :kRIT5, :kIC3, :kPRV4, :kUP, :kRIT6, :E3,
+        :kEND3, :kHOM7, :kDC3, :kLFT7, :kNXT5, :Se, :Ss, :kHOM3, :kRIT3, :kNXT7,
+        :smxx, :kEND4, :kDN, :kUP6, :XM, :kPRV5, :kUP4, :kpDOT, :kpMUL, :kEND7,
+        :Ms, :kpCMA, :kDC5, :kLFT3, :kpDIV, :kUP3]
+
+    xterm_capabilities = Dict{Symbol, Union{Bool, Int, String}}(
+        :AX => true,
+        :Cr => "\e]112\a",
+        :Cs => "\e]12;%p1%s\a",
+        :E3 => "\e[3J",
+        :Ms => "\e]52;%p1%s;%p2%s\a",
+        :OTbs => true,
+        :Se => "\e[2 q",
+        :Ss => "\e[%p1%d q",
+        :XM => "\e[?1006;1000%?%p1%{1}%=%th%el%;",
+        :XT => true,
+        :acs_chars => "``aaffggiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz{{||}}~~",
+        :acsc => "``aaffggiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz{{||}}~~",
+        :am => true,
+        :auto_left_margin => false,
+        :auto_right_margin => true,
+        :back_color_erase => true,
+        :back_tab => "\e[Z",
+        :backspaces_with_bs => true,
+        :bce => true,
+        :bel => "\a",
+        :bell => "\a",
+        :blink => "\e[5m",
+        :bold => "\e[1m",
+        :bw => false,
+        :can_change => false,
+        :carriage_return => "\r",
+        :cbt => "\e[Z",
+        :ccc => false,
+        :ceol_standout_glitch => false,
+        :change_scroll_region => "\e[%i%p1%d;%p2%dr",
+        :chts => false,
+        :civis => "\e[?25l",
+        :clear => "\e[H\e[2J",
+        :clear_all_tabs => "\e[3g",
+        :clear_margins => "\e[?69l",
+        :clear_screen => "\e[H\e[2J",
+        :clr_bol => "\e[1K",
+        :clr_eol => "\e[K",
+        :clr_eos => "\e[J",
+        :cnorm => "\e[?12l\e[?25h",
+        :col_addr_glitch => false,
+        :colors => 8,
+        :cols => 80,
+        :column_address => "\e[%i%p1%dG",
+        :columns => 80,
+        :cpi_changes_res => false,
+        :cpix => false,
+        :cr => "\r",
+        :cr_cancels_micro_mode => false,
+        :crxm => false,
+        :csr => "\e[%i%p1%d;%p2%dr",
+        :cub => "\e[%p1%dD",
+        :cub1 => "\b",
+        :cud => "\e[%p1%dB",
+        :cud1 => "\n",
+        :cuf => "\e[%p1%dC",
+        :cuf1 => "\e[C",
+        :cup => "\e[%i%p1%d;%p2%dH",
+        :cursor_address => "\e[%i%p1%d;%p2%dH",
+        :cursor_down => "\n",
+        :cursor_home => "\e[H",
+        :cursor_invisible => "\e[?25l",
+        :cursor_left => "\b",
+        :cursor_normal => "\e[?12l\e[?25h",
+        :cursor_right => "\e[C",
+        :cursor_up => "\e[A",
+        :cursor_visible => "\e[?12;25h",
+        :cuu => "\e[%p1%dA",
+        :cuu1 => "\e[A",
+        :cvvis => "\e[?12;25h",
+        :da => false,
+        :daisy => false,
+        :db => false,
+        :dch => "\e[%p1%dP",
+        :dch1 => "\e[P",
+        :delete_character => "\e[P",
+        :delete_line => "\e[M",
+        :dest_tabs_magic_smso => false,
+        :dim => "\e[2m",
+        :dl => "\e[%p1%dM",
+        :dl1 => "\e[M",
+        :eat_newline_glitch => true,
+        :ech => "\e[%p1%dX",
+        :ed => "\e[J",
+        :el => "\e[K",
+        :el1 => "\e[1K",
+        :enter_alt_charset_mode => "\e(0",
+        :enter_am_mode => "\e[?7h",
+        :enter_blink_mode => "\e[5m",
+        :enter_bold_mode => "\e[1m",
+        :enter_ca_mode => "\e[?1049h\e[22;0;0t",
+        :enter_dim_mode => "\e[2m",
+        :enter_insert_mode => "\e[4h",
+        :enter_italics_mode => "\e[3m",
+        :enter_reverse_mode => "\e[7m",
+        :enter_secure_mode => "\e[8m",
+        :enter_standout_mode => "\e[7m",
+        :enter_underline_mode => "\e[4m",
+        :eo => false,
+        :erase_chars => "\e[%p1%dX",
+        :erase_overstrike => false,
+        :eslok => false,
+        :exit_alt_charset_mode => "\e(B",
+        :exit_am_mode => "\e[?7l",
+        :exit_attribute_mode => "\e(B\e[m",
+        :exit_ca_mode => "\e[?1049l\e[23;0;0t",
+        :exit_insert_mode => "\e[4l",
+        :exit_italics_mode => "\e[23m",
+        :exit_standout_mode => "\e[27m",
+        :exit_underline_mode => "\e[24m",
+        :flash => "\e[?5h\$<100/>\e[?5l",
+        :flash_screen => "\e[?5h\$<100/>\e[?5l",
+        :generic_type => false,
+        :gn => false,
+        :hard_copy => false,
+        :hard_cursor => false,
+        :has_meta_key => true,
+        :has_print_wheel => false,
+        :has_status_line => false,
+        :hc => false,
+        :hls => false,
+        :home => "\e[H",
+        :hpa => "\e[%i%p1%dG",
+        :hs => false,
+        :ht => "\t",
+        :hts => "\eH",
+        :hue_lightness_saturation => false,
+        :hz => false,
+        :ich => "\e[%p1%d@",
+        :il => "\e[%p1%dL",
+        :il1 => "\e[L",
+        :in => false,
+        :ind => "\n",
+        :indn => "\e[%p1%dS",
+        :init_2string => "\e[!p\e[?3;4l\e[4l\e>",
+        :init_tabs => 8,
+        :insert_line => "\e[L",
+        :insert_null_glitch => false,
+        :invis => "\e[8m",
+        :is2 => "\e[!p\e[?3;4l\e[4l\e>",
+        :it => 8,
+        :kDC => "\e[3;2~",
+        :kDC3 => "\e[3;3~",
+        :kDC4 => "\e[3;4~",
+        :kDC5 => "\e[3;5~",
+        :kDC6 => "\e[3;6~",
+        :kDC7 => "\e[3;7~",
+        :kDN => "\e[1;2B",
+        :kDN3 => "\e[1;3B",
+        :kDN4 => "\e[1;4B",
+        :kDN5 => "\e[1;5B",
+        :kDN6 => "\e[1;6B",
+        :kDN7 => "\e[1;7B",
+        :kEND => "\e[1;2F",
+        :kEND3 => "\e[1;3F",
+        :kEND4 => "\e[1;4F",
+        :kEND5 => "\e[1;5F",
+        :kEND6 => "\e[1;6F",
+        :kEND7 => "\e[1;7F",
+        :kHOM => "\e[1;2H",
+        :kHOM3 => "\e[1;3H",
+        :kHOM4 => "\e[1;4H",
+        :kHOM5 => "\e[1;5H",
+        :kHOM6 => "\e[1;6H",
+        :kHOM7 => "\e[1;7H",
+        :kIC => "\e[2;2~",
+        :kIC3 => "\e[2;3~",
+        :kIC4 => "\e[2;4~",
+        :kIC5 => "\e[2;5~",
+        :kIC6 => "\e[2;6~",
+        :kIC7 => "\e[2;7~",
+        :kLFT => "\e[1;2D",
+        :kLFT3 => "\e[1;3D",
+        :kLFT4 => "\e[1;4D",
+        :kLFT5 => "\e[1;5D",
+        :kLFT6 => "\e[1;6D",
+        :kLFT7 => "\e[1;7D",
+        :kNXT => "\e[6;2~",
+        :kNXT3 => "\e[6;3~",
+        :kNXT4 => "\e[6;4~",
+        :kNXT5 => "\e[6;5~",
+        :kNXT6 => "\e[6;6~",
+        :kNXT7 => "\e[6;7~",
+        :kPRV => "\e[5;2~",
+        :kPRV3 => "\e[5;3~",
+        :kPRV4 => "\e[5;4~",
+        :kPRV5 => "\e[5;5~",
+        :kPRV6 => "\e[5;6~",
+        :kPRV7 => "\e[5;7~",
+        :kRIT => "\e[1;2C",
+        :kRIT3 => "\e[1;3C",
+        :kRIT4 => "\e[1;4C",
+        :kRIT5 => "\e[1;5C",
+        :kRIT6 => "\e[1;6C",
+        :kRIT7 => "\e[1;7C",
+        :kUP => "\e[1;2A",
+        :kUP3 => "\e[1;3A",
+        :kUP4 => "\e[1;4A",
+        :kUP5 => "\e[1;5A",
+        :kUP6 => "\e[1;6A",
+        :kUP7 => "\e[1;7A",
+        :ka1 => "\eOw",
+        :ka2 => "\eOx",
+        :ka3 => "\eOy",
+        :kb1 => "\eOt",
+        :kb2 => "\eOu",
+        :kb3 => "\eOv",
+        :kbs => "\b",
+        :kc1 => "\eOq",
+        :kc2 => "\eOr",
+        :kc3 => "\eOs",
+        :kcbt => "\e[Z",
+        :kcub1 => "\eOD",
+        :kcud1 => "\eOB",
+        :kcuf1 => "\eOC",
+        :kcuu1 => "\eOA",
+        :kdch1 => "\e[3~",
+        :kend => "\eOF",
+        :kent => "\eOM",
+        :key_a1 => "\eOw",
+        :key_a3 => "\eOy",
+        :key_b2 => "\eOu",
+        :key_backspace => "\b",
+        :key_btab => "\e[Z",
+        :key_c1 => "\eOq",
+        :key_c3 => "\eOs",
+        :key_dc => "\e[3~",
+        :key_down => "\eOB",
+        :key_end => "\eOF",
+        :key_enter => "\eOM",
+        :key_f1 => "\eOP",
+        :key_f10 => "\e[21~",
+        :key_f11 => "\e[23~",
+        :key_f12 => "\e[24~",
+        :key_f13 => "\e[1;2P",
+        :key_f14 => "\e[1;2Q",
+        :key_f15 => "\e[1;2R",
+        :key_f16 => "\e[1;2S",
+        :key_f17 => "\e[15;2~",
+        :key_f18 => "\e[17;2~",
+        :key_f19 => "\e[18;2~",
+        :key_f2 => "\eOQ",
+        :key_f20 => "\e[19;2~",
+        :key_f21 => "\e[20;2~",
+        :key_f22 => "\e[21;2~",
+        :key_f23 => "\e[23;2~",
+        :key_f24 => "\e[24;2~",
+        :key_f25 => "\e[1;5P",
+        :key_f26 => "\e[1;5Q",
+        :key_f27 => "\e[1;5R",
+        :key_f28 => "\e[1;5S",
+        :key_f29 => "\e[15;5~",
+        :key_f3 => "\eOR",
+        :key_f30 => "\e[17;5~",
+        :key_f31 => "\e[18;5~",
+        :key_f32 => "\e[19;5~",
+        :key_f33 => "\e[20;5~",
+        :key_f34 => "\e[21;5~",
+        :key_f35 => "\e[23;5~",
+        :key_f36 => "\e[24;5~",
+        :key_f37 => "\e[1;6P",
+        :key_f38 => "\e[1;6Q",
+        :key_f39 => "\e[1;6R",
+        :key_f4 => "\eOS",
+        :key_f40 => "\e[1;6S",
+        :key_f41 => "\e[15;6~",
+        :key_f42 => "\e[17;6~",
+        :key_f43 => "\e[18;6~",
+        :key_f44 => "\e[19;6~",
+        :key_f45 => "\e[20;6~",
+        :key_f46 => "\e[21;6~",
+        :key_f47 => "\e[23;6~",
+        :key_f48 => "\e[24;6~",
+        :key_f49 => "\e[1;3P",
+        :key_f5 => "\e[15~",
+        :key_f50 => "\e[1;3Q",
+        :key_f51 => "\e[1;3R",
+        :key_f52 => "\e[1;3S",
+        :key_f53 => "\e[15;3~",
+        :key_f54 => "\e[17;3~",
+        :key_f55 => "\e[18;3~",
+        :key_f56 => "\e[19;3~",
+        :key_f57 => "\e[20;3~",
+        :key_f58 => "\e[21;3~",
+        :key_f59 => "\e[23;3~",
+        :key_f6 => "\e[17~",
+        :key_f60 => "\e[24;3~",
+        :key_f61 => "\e[1;4P",
+        :key_f62 => "\e[1;4Q",
+        :key_f63 => "\e[1;4R",
+        :key_f7 => "\e[18~",
+        :key_f8 => "\e[19~",
+        :key_f9 => "\e[20~",
+        :key_home => "\eOH",
+        :key_ic => "\e[2~",
+        :key_left => "\eOD",
+        :key_mouse => "\e[<",
+        :key_npage => "\e[6~",
+        :key_ppage => "\e[5~",
+        :key_right => "\eOC",
+        :key_sdc => "\e[3;2~",
+        :key_send => "\e[1;2F",
+        :key_sf => "\e[1;2B",
+        :key_shome => "\e[1;2H",
+        :key_sic => "\e[2;2~",
+        :key_sleft => "\e[1;2D",
+        :key_snext => "\e[6;2~",
+        :key_sprevious => "\e[5;2~",
+        :key_sr => "\e[1;2A",
+        :key_sright => "\e[1;2C",
+        :key_up => "\eOA",
+        :keypad_local => "\e[?1l\e>",
+        :keypad_xmit => "\e[?1h\e=",
+        :kf1 => "\eOP",
+        :kf10 => "\e[21~",
+        :kf11 => "\e[23~",
+        :kf12 => "\e[24~",
+        :kf13 => "\e[1;2P",
+        :kf14 => "\e[1;2Q",
+        :kf15 => "\e[1;2R",
+        :kf16 => "\e[1;2S",
+        :kf17 => "\e[15;2~",
+        :kf18 => "\e[17;2~",
+        :kf19 => "\e[18;2~",
+        :kf2 => "\eOQ",
+        :kf20 => "\e[19;2~",
+        :kf21 => "\e[20;2~",
+        :kf22 => "\e[21;2~",
+        :kf23 => "\e[23;2~",
+        :kf24 => "\e[24;2~",
+        :kf25 => "\e[1;5P",
+        :kf26 => "\e[1;5Q",
+        :kf27 => "\e[1;5R",
+        :kf28 => "\e[1;5S",
+        :kf29 => "\e[15;5~",
+        :kf3 => "\eOR",
+        :kf30 => "\e[17;5~",
+        :kf31 => "\e[18;5~",
+        :kf32 => "\e[19;5~",
+        :kf33 => "\e[20;5~",
+        :kf34 => "\e[21;5~",
+        :kf35 => "\e[23;5~",
+        :kf36 => "\e[24;5~",
+        :kf37 => "\e[1;6P",
+        :kf38 => "\e[1;6Q",
+        :kf39 => "\e[1;6R",
+        :kf4 => "\eOS",
+        :kf40 => "\e[1;6S",
+        :kf41 => "\e[15;6~",
+        :kf42 => "\e[17;6~",
+        :kf43 => "\e[18;6~",
+        :kf44 => "\e[19;6~",
+        :kf45 => "\e[20;6~",
+        :kf46 => "\e[21;6~",
+        :kf47 => "\e[23;6~",
+        :kf48 => "\e[24;6~",
+        :kf49 => "\e[1;3P",
+        :kf5 => "\e[15~",
+        :kf50 => "\e[1;3Q",
+        :kf51 => "\e[1;3R",
+        :kf52 => "\e[1;3S",
+        :kf53 => "\e[15;3~",
+        :kf54 => "\e[17;3~",
+        :kf55 => "\e[18;3~",
+        :kf56 => "\e[19;3~",
+        :kf57 => "\e[20;3~",
+        :kf58 => "\e[21;3~",
+        :kf59 => "\e[23;3~",
+        :kf6 => "\e[17~",
+        :kf60 => "\e[24;3~",
+        :kf61 => "\e[1;4P",
+        :kf62 => "\e[1;4Q",
+        :kf63 => "\e[1;4R",
+        :kf7 => "\e[18~",
+        :kf8 => "\e[19~",
+        :kf9 => "\e[20~",
+        :khome => "\eOH",
+        :kich1 => "\e[2~",
+        :kind => "\e[1;2B",
+        :km => true,
+        :kmous => "\e[<",
+        :knp => "\e[6~",
+        :kp5 => "\eOE",
+        :kpADD => "\eOk",
+        :kpCMA => "\eOl",
+        :kpDIV => "\eOo",
+        :kpDOT => "\eOn",
+        :kpMUL => "\eOj",
+        :kpSUB => "\eOm",
+        :kpZRO => "\eOp",
+        :kpp => "\e[5~",
+        :kri => "\e[1;2A",
+        :lines => 24,
+        :lpi_changes_res => false,
+        :lpix => false,
+        :max_colors => 8,
+        :max_pairs => 64,
+        :mc0 => "\e[i",
+        :mc4 => "\e[4i",
+        :mc5 => "\e[5i",
+        :mc5i => true,
+        :meml => "\el",
+        :memory_above => false,
+        :memory_below => false,
+        :memory_lock => "\el",
+        :memory_unlock => "\em",
+        :memu => "\em",
+        :meta_off => "\e[?1034l",
+        :meta_on => "\e[?1034h",
+        :mgc => "\e[?69l",
+        :mir => true,
+        :move_insert_mode => true,
+        :move_standout_mode => true,
+        :msgr => true,
+        :ndscr => false,
+        :needs_xon_xoff => false,
+        :no_esc_ctlc => false,
+        :no_pad_char => true,
+        :non_dest_scroll_region => false,
+        :non_rev_rmcup => false,
+        :npc => true,
+        :nrrmc => false,
+        :nxon => false,
+        :op => "\e[39;49m",
+        :orig_pair => "\e[39;49m",
+        :os => false,
+        :over_strike => false,
+        :pairs => 64,
+        :parm_dch => "\e[%p1%dP",
+        :parm_delete_line => "\e[%p1%dM",
+        :parm_down_cursor => "\e[%p1%dB",
+        :parm_ich => "\e[%p1%d@",
+        :parm_index => "\e[%p1%dS",
+        :parm_insert_line => "\e[%p1%dL",
+        :parm_left_cursor => "\e[%p1%dD",
+        :parm_right_cursor => "\e[%p1%dC",
+        :parm_rindex => "\e[%p1%dT",
+        :parm_up_cursor => "\e[%p1%dA",
+        :print_screen => "\e[i",
+        :prtr_off => "\e[4i",
+        :prtr_on => "\e[5i",
+        :prtr_silent => true,
+        :rc => "\e8",
+        :rep => "%p1%c\e[%p2%{1}%-%db",
+        :repeat_char => "%p1%c\e[%p2%{1}%-%db",
+        :reset_1string => "\ec",
+        :reset_2string => "\e[!p\e[?3;4l\e[4l\e>",
+        :restore_cursor => "\e8",
+        :rev => "\e[7m",
+        :ri => "\eM",
+        :rin => "\e[%p1%dT",
+        :ritm => "\e[23m",
+        :rmacs => "\e(B",
+        :rmam => "\e[?7l",
+        :rmcup => "\e[?1049l\e[23;0;0t",
+        :rmir => "\e[4l",
+        :rmkx => "\e[?1l\e>",
+        :rmm => "\e[?1034l",
+        :rmso => "\e[27m",
+        :rmul => "\e[24m",
+        :rmxx => "\e[29m",
+        :row_addr_glitch => false,
+        :row_address => "\e[%i%p1%dd",
+        :rs1 => "\ec",
+        :rs2 => "\e[!p\e[?3;4l\e[4l\e>",
+        :sam => false,
+        :save_cursor => "\e7",
+        :sc => "\e7",
+        :scroll_forward => "\n",
+        :scroll_reverse => "\eM",
+        :semi_auto_right_margin => false,
+        :set_a_background => "\e[4%p1%dm",
+        :set_a_foreground => "\e[3%p1%dm",
+        :set_attributes => "%?%p9%t\e(0%e\e(B%;\e[0%?%p6%t;1%;%?%p5%t;2%;%?%p2%t;4%;%?%p1%p3%|%t;7%;%?%p4%t;5%;%?%p7%t;8%;m",
+        :set_background => "\e[4%?%p1%{1}%=%t4%e%p1%{3}%=%t6%e%p1%{4}%=%t1%e%p1%{6}%=%t3%e%p1%d%;m",
+        :set_foreground => "\e[3%?%p1%{1}%=%t4%e%p1%{3}%=%t6%e%p1%{4}%=%t1%e%p1%{6}%=%t3%e%p1%d%;m",
+        :set_lr_margin => "\e[?69h\e[%i%p1%d;%p2%ds",
+        :set_tab => "\eH",
+        :setab => "\e[4%p1%dm",
+        :setaf => "\e[3%p1%dm",
+        :setb => "\e[4%?%p1%{1}%=%t4%e%p1%{3}%=%t6%e%p1%{4}%=%t1%e%p1%{6}%=%t3%e%p1%d%;m",
+        :setf => "\e[3%?%p1%{1}%=%t4%e%p1%{3}%=%t6%e%p1%{4}%=%t1%e%p1%{6}%=%t3%e%p1%d%;m",
+        :sgr => "%?%p9%t\e(0%e\e(B%;\e[0%?%p6%t;1%;%?%p5%t;2%;%?%p2%t;4%;%?%p1%p3%|%t;7%;%?%p4%t;5%;%?%p7%t;8%;m",
+        :sgr0 => "\e(B\e[m",
+        :sitm => "\e[3m",
+        :smacs => "\e(0",
+        :smam => "\e[?7h",
+        :smcup => "\e[?1049h\e[22;0;0t",
+        :smglr => "\e[?69h\e[%i%p1%d;%p2%ds",
+        :smir => "\e[4h",
+        :smkx => "\e[?1h\e=",
+        :smm => "\e[?1034h",
+        :smso => "\e[7m",
+        :smul => "\e[4m",
+        :smxx => "\e[9m",
+        :status_line_esc_ok => false,
+        :tab => "\t",
+        :tbc => "\e[3g",
+        :tilde_glitch => false,
+        :transparent_underline => false,
+        :u6 => "\e[%i%d;%dR",
+        :u7 => "\e[6n",
+        :u8 => "\e[?%[;0123456789]c",
+        :u9 => "\e[c",
+        :ul => false,
+        :user6 => "\e[%i%d;%dR",
+        :user7 => "\e[6n",
+        :user8 => "\e[?%[;0123456789]c",
+        :user9 => "\e[c",
+        :vpa => "\e[%i%p1%dd",
+        :xenl => true,
+        :xhp => false,
+        :xhpa => false,
+        :xm => "\e[<%i%p3%d;%p1%d;%p2%d;%?%p4%tM%em%;",
+        :xon => false,
+        :xon_xoff => false,
+        :xsb => false,
+        :xt => false,
+        :xvpa => false)
+
+@testset "terminfo" begin
+    dumb = Base.TermInfo(read(IOBuffer(dumb_terminfo), Base.TermInfoRaw))
+    @test dumb.names == ["dumb", "80-column dumb tty"]
+    @test dumb.flags == 2
+    @test dumb.numbers == [true]
+    @test dumb.extensions == Symbol[]
+    @test length(dumb.capabilities) == 14
+    for (key, value) in dumb_capabilities
+        @test dumb[key] == value
+    end
+
+    xterm = Base.TermInfo(read(IOBuffer(xterm_terminfo), Base.TermInfoRaw))
+    @test xterm.names == ["xterm", "xterm terminal emulator (X Window System)"]
+    @test xterm.flags == 38
+    @test xterm.numbers == Bool[1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
+    @test sort(xterm.extensions) == sort(xterm_extensions)
+    @test length(xterm.capabilities) == 519
+    for (key, value) in xterm_capabilities
+        @test xterm[key] == value
+    end
+end
+
+end
diff --git a/test/testdefs.jl b/test/testdefs.jl
index 1d36d8893e199..e8f62858d1cbb 100644
--- a/test/testdefs.jl
+++ b/test/testdefs.jl
@@ -5,6 +5,9 @@ using Test, Random
 function runtests(name, path, isolate=true; seed=nothing)
     old_print_setting = Test.TESTSET_PRINT_ENABLE[]
     Test.TESTSET_PRINT_ENABLE[] = false
+    # remove all hint_handlers, so that errorshow tests are not changed by which packages have been loaded on this worker already
+    # packages that call register_error_hint should also call this again, and then re-add any hooks they want to test
+    empty!(Base.Experimental._hint_handlers)
     try
         if isolate
             # Simple enough to type and random enough so that no one will hard
@@ -21,7 +24,60 @@ function runtests(name, path, isolate=true; seed=nothing)
         res_and_time_data = @timed @testset "$name" begin
             # Random.seed!(nothing) will fail
             seed != nothing && Random.seed!(seed)
+
+            original_depot_path = copy(Base.DEPOT_PATH)
+            original_load_path = copy(Base.LOAD_PATH)
+            original_env = copy(ENV)
+            original_project = Base.active_project()
+
             Base.include(m, "$path.jl")
+
+            if Base.DEPOT_PATH != original_depot_path
+                msg = "The `$(name)` test set mutated Base.DEPOT_PATH and did not restore the original values"
+                @error(
+                    msg,
+                    original_depot_path,
+                    Base.DEPOT_PATH,
+                    testset_name = name,
+                    testset_path = path,
+                )
+                error(msg)
+            end
+            if Base.LOAD_PATH != original_load_path
+                msg = "The `$(name)` test set mutated Base.LOAD_PATH and did not restore the original values"
+                @error(
+                    msg,
+                    original_load_path,
+                    Base.LOAD_PATH,
+                    testset_name = name,
+                    testset_path = path,
+                )
+                error(msg)
+            end
+            if copy(ENV) != original_env
+                throw_error_str = get(ENV, "JULIA_TEST_CHECK_MUTATED_ENV", "true")
+                throw_error_b = parse(Bool, throw_error_str)
+                if throw_error_b
+                    msg = "The `$(name)` test set mutated ENV and did not restore the original values"
+                    @error(
+                        msg,
+                        testset_name = name,
+                        testset_path = path,
+                    )
+                    error(msg)
+                end
+            end
+            if Base.active_project() != original_project
+                msg = "The `$(name)` test set changed the active project and did not restore the original value"
+                @error(
+                    msg,
+                    original_project,
+                    Base.active_project(),
+                    testset_name = name,
+                    testset_path = path,
+                )
+                error(msg)
+            end
         end
         rss = Sys.maxrss()
         #res_and_time_data[1] is the testset
diff --git a/test/testenv.jl b/test/testenv.jl
index 41706dd24e75e..3ef1126e0e927 100644
--- a/test/testenv.jl
+++ b/test/testenv.jl
@@ -35,8 +35,14 @@ if !@isdefined(testenv_defined)
         const rr_exename = ``
     end
 
+    const test_relocated_depot = haskey(ENV, "RELOCATEDEPOT")
+
     function addprocs_with_testenv(X; rr_allowed=true, kwargs...)
         exename = rr_allowed ? `$rr_exename $test_exename` : test_exename
+        if X isa Integer
+            heap_size=round(Int,(Sys.total_memory()/(1024^2)/(X+1)))
+            push!(test_exeflags.exec, "--heap-size-hint=$(heap_size)M")
+        end
         addprocs(X; exename=exename, exeflags=test_exeflags, kwargs...)
     end
 
diff --git a/test/testhelpers/DualNumbers.jl b/test/testhelpers/DualNumbers.jl
new file mode 100644
index 0000000000000..5c481aef47f76
--- /dev/null
+++ b/test/testhelpers/DualNumbers.jl
@@ -0,0 +1,46 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module DualNumbers
+
+export Dual
+
+# Dual numbers type with minimal interface
+# example of a (real) number type that subtypes Number, but not Real.
+# Can be used to test generic linear algebra functions.
+
+struct Dual{T<:Real} <: Number
+    val::T
+    eps::T
+end
+Base.:+(x::Dual, y::Dual) = Dual(x.val + y.val, x.eps + y.eps)
+Base.:-(x::Dual, y::Dual) = Dual(x.val - y.val, x.eps - y.eps)
+Base.:*(x::Dual, y::Dual) = Dual(x.val * y.val, x.eps * y.val + y.eps * x.val)
+Base.:*(x::Number, y::Dual) = Dual(x*y.val, x*y.eps)
+Base.:*(x::Dual, y::Number) = Dual(x.val*y, x.eps*y)
+Base.:/(x::Dual, y::Dual) = Dual(x.val / y.val, (x.eps*y.val - x.val*y.eps)/(y.val*y.val))
+
+Base.:(==)(x::Dual, y::Dual) = x.val == y.val && x.eps == y.eps
+
+Base.promote_rule(::Type{Dual{T}}, ::Type{T}) where {T} = Dual{T}
+Base.promote_rule(::Type{Dual{T}}, ::Type{S}) where {T,S<:Real} = Dual{promote_type(T, S)}
+Base.promote_rule(::Type{Dual{T}}, ::Type{Dual{S}}) where {T,S} = Dual{promote_type(T, S)}
+
+Base.convert(::Type{Dual{T}}, x::Dual{T}) where {T} = x
+Base.convert(::Type{Dual{T}}, x::Dual) where {T} = Dual(convert(T, x.val), convert(T, x.eps))
+Base.convert(::Type{Dual{T}}, x::Real) where {T} = Dual(convert(T, x), zero(T))
+
+Base.float(x::Dual) = Dual(float(x.val), float(x.eps))
+# the following two methods are needed for normalize (to check for potential overflow)
+Base.typemax(x::Dual) = Dual(typemax(x.val), zero(x.eps))
+Base.prevfloat(x::Dual{<:AbstractFloat}) = prevfloat(x.val)
+
+Base.abs2(x::Dual) = x*x
+Base.abs(x::Dual) = sqrt(abs2(x))
+Base.sqrt(x::Dual) = Dual(sqrt(x.val), x.eps/(2sqrt(x.val)))
+
+Base.isless(x::Dual, y::Dual) = x.val < y.val
+Base.isless(x::Real, y::Dual) = x < y.val
+Base.isinf(x::Dual) = isinf(x.val) & isfinite(x.eps)
+Base.real(x::Dual) = x # since we currently only consider Dual{<:Real}
+
+end # module
diff --git a/test/testhelpers/FakePTYs.jl b/test/testhelpers/FakePTYs.jl
index 17dd270cd2424..c592699440ee0 100644
--- a/test/testhelpers/FakePTYs.jl
+++ b/test/testhelpers/FakePTYs.jl
@@ -39,7 +39,7 @@ function open_fake_pty()
         rc = ccall(:unlockpt, Cint, (Cint,), fdm)
         rc != 0 && error("unlockpt")
 
-        fds = ccall(:open, Cint, (Ptr{UInt8}, Cint),
+        fds = ccall(:open, Cint, (Ptr{UInt8}, Cint, UInt32...),
             ccall(:ptsname, Ptr{UInt8}, (Cint,), fdm), O_RDWR | O_NOCTTY)
         pts = RawFD(fds)
 
diff --git a/test/testhelpers/FillArrays.jl b/test/testhelpers/FillArrays.jl
new file mode 100644
index 0000000000000..7ba18f22307ca
--- /dev/null
+++ b/test/testhelpers/FillArrays.jl
@@ -0,0 +1,35 @@
+module FillArrays
+
+struct Fill{T, N, S<:NTuple{N,Integer}} <: AbstractArray{T,N}
+    value::T
+    size::S
+end
+
+Fill(v, size::Vararg{Integer}) = Fill(v, size)
+
+Base.size(F::Fill) = F.size
+
+Base.copy(F::Fill) = F
+
+@inline getindex_value(F::Fill) = F.value
+
+@inline function Base.getindex(F::Fill{<:Any,N}, i::Vararg{Int,N}) where {N}
+    @boundscheck checkbounds(F, i...)
+    getindex_value(F)
+end
+
+@inline function Base.setindex!(F::Fill, v, k::Integer)
+    @boundscheck checkbounds(F, k)
+    v == getindex_value(F) || throw(ArgumentError("Cannot setindex! to $v for a Fill with value $(getindex_value(F))."))
+    F
+end
+
+@inline function Base.fill!(F::Fill, v)
+    v == getindex_value(F) || throw(ArgumentError("Cannot fill! with $v a Fill with value $(getindex_value(F))."))
+    F
+end
+
+Base.show(io::IO, F::Fill) = print(io, "Fill($(F.value), $(F.size))")
+Base.show(io::IO, ::MIME"text/plain", F::Fill) = show(io, F)
+
+end
diff --git a/test/testhelpers/Furlongs.jl b/test/testhelpers/Furlongs.jl
index 8ac22c6244cd3..6d52260bb20fd 100644
--- a/test/testhelpers/Furlongs.jl
+++ b/test/testhelpers/Furlongs.jl
@@ -21,25 +21,26 @@ Furlong{p}(v::Number) where {p} = Furlong{p,typeof(v)}(v)
 Furlong{p}(x::Furlong{q}) where {p,q} = (typeassert(x, Furlong{p}); Furlong{p,typeof(x.val)}(x.val))
 Furlong{p,T}(x::Furlong{q}) where {T,p,q} = (typeassert(x, Furlong{p}); Furlong{p,T}(T(x.val)))
 
-Base.promote_type(::Type{Furlong{p,T}}, ::Type{Furlong{p,S}}) where {p,T,S} =
+Base.promote_rule(::Type{Furlong{p,T}}, ::Type{Furlong{p,S}}) where {p,T,S} =
     Furlong{p,promote_type(T,S)}
-
+Base.promote_rule(::Type{Furlong{0,T}}, ::Type{S}) where {T,S<:Union{Real,Complex}} =
+    Furlong{0,promote_type(T,S)}
 # only Furlong{0} forms a ring and isa Number
-Base.convert(::Type{T}, y::Number) where {T<:Furlong{0}} = T(y)
+Base.convert(::Type{T}, y::Number) where {T<:Furlong{0}} = T(y)::T
 Base.convert(::Type{Furlong}, y::Number) = Furlong{0}(y)
 Base.convert(::Type{Furlong{<:Any,T}}, y::Number) where {T<:Number} = Furlong{0,T}(y)
 Base.convert(::Type{T}, y::Number) where {T<:Furlong} = typeassert(y, T) # throws, since cannot convert a Furlong{0} to a Furlong{p}
 # other Furlong{p} form a group
-Base.convert(::Type{T}, y::Furlong) where {T<:Furlong{0}} = T(y)
+Base.convert(::Type{T}, y::Furlong) where {T<:Furlong{0}} = T(y)::T
 Base.convert(::Type{Furlong}, y::Furlong) = y
 Base.convert(::Type{Furlong{<:Any,T}}, y::Furlong{p}) where {p,T<:Number} = Furlong{p,T}(y)
-Base.convert(::Type{T}, y::Furlong) where {T<:Furlong} = T(y)
+Base.convert(::Type{T}, y::Furlong) where {T<:Furlong} = T(y)::T
 
-Base.one(x::Furlong{p,T}) where {p,T} = one(T)
+Base.one(::Furlong{p,T}) where {p,T} = one(T)
 Base.one(::Type{Furlong{p,T}}) where {p,T} = one(T)
-Base.oneunit(x::Furlong{p,T}) where {p,T} = Furlong{p,T}(one(T))
-Base.oneunit(x::Type{Furlong{p,T}}) where {p,T} = Furlong{p,T}(one(T))
-Base.zero(x::Furlong{p,T}) where {p,T} = Furlong{p,T}(zero(T))
+Base.oneunit(::Furlong{p,T}) where {p,T} = Furlong{p,T}(one(T))
+Base.oneunit(::Type{Furlong{p,T}}) where {p,T} = Furlong{p,T}(one(T))
+Base.zero(::Furlong{p,T}) where {p,T} = Furlong{p,T}(zero(T))
 Base.zero(::Type{Furlong{p,T}}) where {p,T} = Furlong{p,T}(zero(T))
 Base.iszero(x::Furlong) = iszero(x.val)
 Base.float(x::Furlong{p}) where {p} = Furlong{p}(float(x.val))
@@ -73,7 +74,7 @@ for op in (:+, :-)
     end
 end
 for op in (:(==), :(!=), :<, :<=, :isless, :isequal)
-    @eval $op(x::Furlong{p}, y::Furlong{p}) where {p} = $op(x.val, y.val)
+    @eval $op(x::Furlong{p}, y::Furlong{p}) where {p} = $op(x.val, y.val)::Bool
 end
 for (f,op) in ((:_plus,:+),(:_minus,:-),(:_times,:*),(:_div,://))
     @eval function $f(v::T, ::Furlong{p}, ::Union{Furlong{q},Val{q}}) where {T,p,q}
@@ -98,5 +99,6 @@ for op in (:rem, :mod)
     end
 end
 Base.sqrt(x::Furlong) = _div(sqrt(x.val), x, Val(2))
+Base.muladd(x::Furlong, y::Furlong, z::Furlong) = x*y + z
 
 end
diff --git a/test/testhelpers/ImmutableArrays.jl b/test/testhelpers/ImmutableArrays.jl
index df2a78387e07b..8f2d23be3a7a7 100644
--- a/test/testhelpers/ImmutableArrays.jl
+++ b/test/testhelpers/ImmutableArrays.jl
@@ -25,4 +25,7 @@ Base.getindex(A::ImmutableArray, i...) = getindex(A.data, i...)
 AbstractArray{T}(A::ImmutableArray) where {T} = ImmutableArray(AbstractArray{T}(A.data))
 AbstractArray{T,N}(A::ImmutableArray{S,N}) where {S,T,N} = ImmutableArray(AbstractArray{T,N}(A.data))
 
+Base.copy(A::ImmutableArray) = ImmutableArray(copy(A.data))
+Base.zero(A::ImmutableArray) = ImmutableArray(zero(A.data))
+
 end
diff --git a/test/testhelpers/InfiniteArrays.jl b/test/testhelpers/InfiniteArrays.jl
index d69130f4d726a..cec3c94aaa296 100644
--- a/test/testhelpers/InfiniteArrays.jl
+++ b/test/testhelpers/InfiniteArrays.jl
@@ -21,11 +21,14 @@ Base.:(==)(::Infinity, ::Int) = false
 Base.:(==)(::Int, ::Infinity) = false
 Base.:(<)(::Int, ::Infinity) = true
 Base.:(≤)(::Int, ::Infinity) = true
+Base.:(<)(::Infinity, ::Int) = false
 Base.:(≤)(::Infinity, ::Int) = false
 Base.:(≤)(::Infinity, ::Infinity) = true
 Base.:(-)(::Infinity, ::Int) = Infinity()
 Base.:(+)(::Infinity, ::Int) = Infinity()
 Base.:(:)(::Infinity, ::Infinity) = 1:0
+Base.max(::Infinity, ::Int) = Infinity()
+Base.max(::Int, ::Infinity) = Infinity()
 
 """
     OneToInf(n)
@@ -45,5 +48,6 @@ Base.length(r::OneToInf) = Infinity()
 Base.last(r::OneToInf) = Infinity()
 Base.unitrange(r::OneToInf) = r
 Base.oneto(::Infinity) = OneToInf()
+Base.unchecked_oneto(::Infinity) = OneToInf()
 
 end
diff --git a/test/testhelpers/OffsetArrays.jl b/test/testhelpers/OffsetArrays.jl
index 01b34df8e18a9..f8da243da6b63 100644
--- a/test/testhelpers/OffsetArrays.jl
+++ b/test/testhelpers/OffsetArrays.jl
@@ -100,7 +100,7 @@ end
 # function offset_coerce(::Type{Base.OneTo{T}}, r::IdOffsetRange) where T<:Integer
 #     rc, o = offset_coerce(Base.OneTo{T}, r.parent)
 
-# Fallback, specialze this method if `convert(I, r)` doesn't do what you need
+# Fallback, specialize this method if `convert(I, r)` doesn't do what you need
 offset_coerce(::Type{I}, r::AbstractUnitRange) where I<:AbstractUnitRange =
     convert(I, r)::I, 0
 
@@ -142,7 +142,7 @@ end
 @inline function Base.getindex(r::IdOffsetRange, i::Integer)
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     @boundscheck checkbounds(r, i)
-    @inbounds eltype(r)(r.parent[i - r.offset] + r.offset)
+    @inbounds eltype(r)(r.parent[oftype(r.offset, i) - r.offset] + r.offset)
 end
 
 # Logical indexing following https://github.com/JuliaLang/julia/pull/31829
@@ -197,6 +197,7 @@ Base.show(io::IO, r::IdOffsetRange) = print(io, IdOffsetRange, "(values=",first(
 
 # Optimizations
 @inline Base.checkindex(::Type{Bool}, inds::IdOffsetRange, i::Real) = Base.checkindex(Bool, inds.parent, i - inds.offset)
+Base._firstslice(r::IdOffsetRange) = IdOffsetRange(Base._firstslice(r.parent), r.offset)
 
 ########################################################################################################
 # origin.jl
@@ -592,7 +593,7 @@ Base.fill!(A::OffsetArray, x) = parent_call(Ap -> fill!(Ap, x), A)
 #   Δi = i - first(r)
 #   i′ = first(r.parent) + Δi
 # and one obtains the result below.
-parentindex(r::IdOffsetRange, i) = i - r.offset
+parentindex(r::IdOffsetRange, i) = oftype(r.offset, i) - r.offset
 
 @propagate_inbounds Base.getindex(A::OffsetArray{<:Any,0})  = A.parent[]
 
@@ -641,7 +642,7 @@ Base.copy(A::OffsetArray) = parent_call(copy, A)
 
 Base.strides(A::OffsetArray) = strides(parent(A))
 Base.elsize(::Type{OffsetArray{T,N,A}}) where {T,N,A} = Base.elsize(A)
-@inline Base.unsafe_convert(::Type{Ptr{T}}, A::OffsetArray{T}) where {T} = Base.unsafe_convert(Ptr{T}, parent(A))
+Base.cconvert(::Type{Ptr{T}}, A::OffsetArray{T}) where {T} = Base.cconvert(Ptr{T}, parent(A))
 
 # For fast broadcasting: ref https://discourse.julialang.org/t/why-is-there-a-performance-hit-on-broadcasting-with-offsetarrays/32194
 Base.dataids(A::OffsetArray) = Base.dataids(parent(A))
diff --git a/test/testhelpers/Quaternions.jl b/test/testhelpers/Quaternions.jl
index 1eddad322ec40..81b7a0c2d0121 100644
--- a/test/testhelpers/Quaternions.jl
+++ b/test/testhelpers/Quaternions.jl
@@ -20,6 +20,7 @@ Base.abs2(q::Quaternion) = q.s*q.s + q.v1*q.v1 + q.v2*q.v2 + q.v3*q.v3
 Base.float(z::Quaternion{T}) where T = Quaternion(float(z.s), float(z.v1), float(z.v2), float(z.v3))
 Base.abs(q::Quaternion) = sqrt(abs2(q))
 Base.real(::Type{Quaternion{T}}) where {T} = T
+Base.real(q::Quaternion) = q.s
 Base.conj(q::Quaternion) = Quaternion(q.s, -q.v1, -q.v2, -q.v3)
 Base.isfinite(q::Quaternion) = isfinite(q.s) & isfinite(q.v1) & isfinite(q.v2) & isfinite(q.v3)
 Base.zero(::Type{Quaternion{T}}) where T = Quaternion{T}(zero(T), zero(T), zero(T), zero(T))
@@ -33,7 +34,9 @@ Base.:(*)(q::Quaternion, w::Quaternion) = Quaternion(q.s*w.s - q.v1*w.v1 - q.v2*
                                             q.s*w.v2 - q.v1*w.v3 + q.v2*w.s + q.v3*w.v1,
                                             q.s*w.v3 + q.v1*w.v2 - q.v2*w.v1 + q.v3*w.s)
 Base.:(*)(q::Quaternion, r::Real) = Quaternion(q.s*r, q.v1*r, q.v2*r, q.v3*r)
-Base.:(*)(q::Quaternion, b::Bool) = b * q # remove method ambiguity
+Base.:(*)(q::Quaternion, r::Bool) = Quaternion(q.s*r, q.v1*r, q.v2*r, q.v3*r) # remove method ambiguity
+Base.:(*)(r::Real, q::Quaternion) = q * r
+Base.:(*)(r::Bool, q::Quaternion) = q * r # remove method ambiguity
 Base.:(/)(q::Quaternion, w::Quaternion) = q * conj(w) * (1.0 / abs2(w))
 Base.:(\)(q::Quaternion, w::Quaternion) = conj(q) * w * (1.0 / abs2(q))
 
diff --git a/test/testhelpers/SizedArrays.jl b/test/testhelpers/SizedArrays.jl
index dfcc5b79f1387..fc2862d844b3f 100644
--- a/test/testhelpers/SizedArrays.jl
+++ b/test/testhelpers/SizedArrays.jl
@@ -9,6 +9,8 @@ module SizedArrays
 
 import Base: +, *, ==
 
+using LinearAlgebra
+
 export SizedArray
 
 struct SizedArray{SZ,T,N,A<:AbstractArray} <: AbstractArray{T,N}
@@ -31,9 +33,16 @@ Base.getindex(A::SizedArray, i...) = getindex(A.data, i...)
 Base.zero(::Type{T}) where T <: SizedArray = SizedArray{size(T)}(zeros(eltype(T), size(T)))
 +(S1::SizedArray{SZ}, S2::SizedArray{SZ}) where {SZ} = SizedArray{SZ}(S1.data + S2.data)
 ==(S1::SizedArray{SZ}, S2::SizedArray{SZ}) where {SZ} = S1.data == S2.data
-function *(S1::SizedArray, S2::SizedArray)
+
+const SizedArrayLike = Union{SizedArray, Transpose{<:Any, <:SizedArray}, Adjoint{<:Any, <:SizedArray}}
+
+_data(S::SizedArray) = S.data
+_data(T::Transpose{<:Any, <:SizedArray}) = transpose(_data(parent(T)))
+_data(T::Adjoint{<:Any, <:SizedArray}) = adjoint(_data(parent(T)))
+
+function *(S1::SizedArrayLike, S2::SizedArrayLike)
     0 < ndims(S1) < 3 && 0 < ndims(S2) < 3 && size(S1, 2) == size(S2, 1) || throw(ArgumentError("size mismatch!"))
-    data = S1.data * S2.data
+    data = _data(S1) * _data(S2)
     SZ = ndims(data) == 1 ? (size(S1, 1), ) : (size(S1, 1), size(S2, 2))
     SizedArray{SZ}(data)
 end
diff --git a/test/testhelpers/arrayindexingtypes.jl b/test/testhelpers/arrayindexingtypes.jl
index 0e956b5216c94..95c1f18e00903 100644
--- a/test/testhelpers/arrayindexingtypes.jl
+++ b/test/testhelpers/arrayindexingtypes.jl
@@ -66,3 +66,6 @@ Base.axes(A::WrapperArray) = axes(A.parent)
 Base.getindex(A::WrapperArray, i::Int...) = A.parent[i...]
 Base.setindex!(A::WrapperArray, v, i::Int...) = A.parent[i...] = v
 Base.similar(A::WrapperArray, ::Type{T}, dims::Dims) where T = similar(A.parent, T, dims)
+Base.cconvert(::Type{Ptr{T}}, A::WrapperArray{T}) where {T} = Base.cconvert(Ptr{T}, A.parent)
+Base.strides(A::WrapperArray) = strides(A.parent)
+Base.elsize(::Type{WrapperArray{T,N,A}}) where {T,N,A<:AbstractArray{T,N}} = Base.elsize(A)
diff --git a/test/testhelpers/withlocales.jl b/test/testhelpers/withlocales.jl
index a3be17cce4464..50c8058cc6466 100644
--- a/test/testhelpers/withlocales.jl
+++ b/test/testhelpers/withlocales.jl
@@ -9,7 +9,6 @@ function withlocales(f, newlocales)
             locales[cat] = unsafe_string(cstr)
         end
     end
-    timestrs = String[]
     try
         # change to each of given locales
         for lc in newlocales
@@ -17,7 +16,7 @@ function withlocales(f, newlocales)
             for (cat, _) in locales
                 set &= ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc) != C_NULL
             end
-            set && f()
+            set && f(lc)
         end
     finally
         # recover locales
diff --git a/test/threadpool_use.jl b/test/threadpool_use.jl
index 92a4458ee8076..7523991fdf6a7 100644
--- a/test/threadpool_use.jl
+++ b/test/threadpool_use.jl
@@ -4,13 +4,16 @@ using Test
 using Base.Threads
 
 @test nthreadpools() == 2
-@test threadpool() == :default
-@test threadpool(2) == :interactive
-dtask() = @test threadpool(current_task()) == :default
-itask() = @test threadpool(current_task()) == :interactive
-dt1 = @spawn dtask()
-dt2 = @spawn :default dtask()
-it = @spawn :interactive itask()
-wait(dt1)
-wait(dt2)
-wait(it)
+@test threadpool() === :interactive
+@test threadpool(2) === :default
+@test fetch(Threads.@spawn Threads.threadpool()) === :default
+@test fetch(Threads.@spawn :default Threads.threadpool()) === :default
+@test fetch(Threads.@spawn :interactive Threads.threadpool()) === :interactive
+tp = :default
+@test fetch(Threads.@spawn tp Threads.threadpool()) === :default
+tp = :interactive
+@test fetch(Threads.@spawn tp Threads.threadpool()) === :interactive
+tp = :foo
+@test_throws ArgumentError Threads.@spawn tp Threads.threadpool()
+@test Threads.threadpooltids(:interactive) == [1]
+@test Threads.threadpooltids(:default) == [2]
diff --git a/test/threads.jl b/test/threads.jl
index 09e802757062b..ad09304bbd80d 100644
--- a/test/threads.jl
+++ b/test/threads.jl
@@ -124,7 +124,7 @@ end
 
 function get_nthreads(options = ``; cpus = nothing)
     cmd = `$(Base.julia_cmd()) --startup-file=no $(options)`
-    cmd = `$cmd -e "print(Threads.nthreads())"`
+    cmd = `$cmd -e "print(Threads.threadpoolsize())"`
     cmd = addenv(cmd, "JULIA_EXCLUSIVE" => "0", "JULIA_NUM_THREADS" => "auto")
     if cpus !== nothing
         cmd = setcpuaffinity(cmd, cpus)
@@ -154,7 +154,7 @@ end
 
 # issue #34769
 function idle_callback(handle)
-    idle = @Base.handle_as handle UvTestIdle
+    idle = Base.@handle_as handle UvTestIdle
     if idle.active
         idle.count += 1
         if idle.count == 1
@@ -312,7 +312,7 @@ close(proc.in)
         if ( !success(proc) ) || ( timeout )
             @error "A \"spawn and wait lots of tasks\" test failed" n proc.exitcode proc.termsignal success(proc) timeout
         end
-        if Sys.iswindows()
+        if Sys.iswindows() || Sys.isapple()
             # Known failure: https://github.com/JuliaLang/julia/issues/43124
             @test_skip success(proc)
         else
@@ -327,3 +327,13 @@ end
     @test_throws ArgumentError @macroexpand(@threads 1) # arg isn't an Expr
     @test_throws ArgumentError @macroexpand(@threads if true 1 end) # arg doesn't start with for
 end
+
+@testset "rand_ptls underflow" begin
+    @test Base.Partr.cong(UInt32(0)) == 0
+end
+
+@testset "num_stack_mappings metric" begin
+    @test @ccall(jl_get_num_stack_mappings()::Cint) >= 1
+    # There must be at least two: one for the root test task and one for the async task:
+    @test fetch(@async(@ccall(jl_get_num_stack_mappings()::Cint))) >= 2
+end
diff --git a/test/threads_exec.jl b/test/threads_exec.jl
index a5e7ba6d7e21b..7953468f9c6f3 100644
--- a/test/threads_exec.jl
+++ b/test/threads_exec.jl
@@ -2,7 +2,7 @@
 
 using Test
 using Base.Threads
-using Base.Threads: SpinLock
+using Base.Threads: SpinLock, threadpoolsize
 
 # for cfunction_closure
 include("testenv.jl")
@@ -27,9 +27,14 @@ end
 # (expected test duration is about 18-180 seconds)
 Timer(t -> killjob("KILLING BY THREAD TEST WATCHDOG\n"), 1200)
 
+@testset """threads_exec.jl with JULIA_NUM_THREADS == $(ENV["JULIA_NUM_THREADS"])""" begin
+
+@test Threads.threadid() == 1
+@test 1 <= threadpoolsize() <= Threads.maxthreadid()
+
 # basic lock check
-if nthreads() > 1
-    let lk = Base.Threads.SpinLock()
+if threadpoolsize() > 1
+    let lk = SpinLock()
         c1 = Base.Event()
         c2 = Base.Event()
         @test trylock(lk)
@@ -50,7 +55,7 @@ end
 
 # threading constructs
 
-let a = zeros(Int, 2 * nthreads())
+let a = zeros(Int, 2 * threadpoolsize())
     @threads for i = 1:length(a)
         @sync begin
             @async begin
@@ -70,7 +75,7 @@ end
 
 # parallel loop with parallel atomic addition
 function threaded_loop(a, r, x)
-    counter = Threads.Atomic{Int}(min(Threads.nthreads(), length(r)))
+    counter = Threads.Atomic{Int}(min(threadpoolsize(), length(r)))
     @threads for i in r
         # synchronize the start given that each partition is started sequentially,
         # meaning that without the wait, if the loop is too fast the iteration can happen in order
@@ -208,7 +213,7 @@ function threaded_gc_locked(::Type{LockT}) where LockT
 end
 
 threaded_gc_locked(SpinLock)
-threaded_gc_locked(Threads.ReentrantLock)
+threaded_gc_locked(ReentrantLock)
 
 # Issue 33159
 # Make sure that a Threads.Condition can't be used without being locked, on any thread.
@@ -229,7 +234,7 @@ end
 # Make sure that eval'ing in a different module doesn't mess up other threads
 orig_curmodule14726 = @__MODULE__
 main_var14726 = 1
-module M14726
+@eval Main module M14726
 module_var14726 = 1
 end
 
@@ -249,7 +254,7 @@ end
     @test @__MODULE__() == orig_curmodule14726
 end
 
-module M14726_2
+@eval Main module M14726_2
 using Test
 using Base.Threads
 @threads for i in 1:100
@@ -265,7 +270,7 @@ end
 @test_throws TypeError Atomic{BigInt}
 @test_throws TypeError Atomic{ComplexF64}
 
-if Sys.ARCH == :i686 || startswith(string(Sys.ARCH), "arm") ||
+if Sys.ARCH === :i686 || startswith(string(Sys.ARCH), "arm") ||
    Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le
 
     @test_throws TypeError Atomic{Int128}()
@@ -423,7 +428,7 @@ end
 for T in intersect((Int32, Int64, Float32, Float64), Base.Threads.atomictypes)
     var = Atomic{T}()
     nloops = 1000
-    di = nthreads()
+    di = threadpoolsize()
     @threads for i in 1:di
         test_atomic_cas!(var, i:di:nloops)
     end
@@ -513,7 +518,7 @@ function test_thread_cfunction()
     @test cfs[1] == cf1
     @test cfs[2] == cf(fs[2])
     @test length(unique(cfs)) == 1000
-    ok = zeros(Int, nthreads())
+    ok = zeros(Int, threadpoolsize())
     @threads :static for i in 1:10000
         i = mod1(i, 1000)
         fi = fs[i]
@@ -529,14 +534,14 @@ if cfunction_closure
 end
 
 function test_thread_range()
-    a = zeros(Int, nthreads())
+    a = zeros(Int, threadpoolsize())
     @threads for i in 1:threadid()
         a[i] = 1
     end
     for i in 1:threadid()
         @test a[i] == 1
     end
-    for i in (threadid() + 1):nthreads()
+    for i in (threadid() + 1):threadpoolsize()
         @test a[i] == 0
     end
 end
@@ -576,17 +581,17 @@ test_nested_loops()
 
 function test_thread_too_few_iters()
     x = Atomic()
-    a = zeros(Int, nthreads()+2)
-    threaded_loop(a, 1:nthreads()-1, x)
-    found = zeros(Bool, nthreads()+2)
-    for i=1:nthreads()-1
+    a = zeros(Int, threadpoolsize()+2)
+    threaded_loop(a, 1:threadpoolsize()-1, x)
+    found = zeros(Bool, threadpoolsize()+2)
+    for i=1:threadpoolsize()-1
         found[a[i]] = true
     end
-    @test x[] == nthreads()-1
+    @test x[] == threadpoolsize()-1
     # Next test checks that all loop iterations ran,
     # and were unique (via pigeon-hole principle).
-    @test !(false in found[1:nthreads()-1])
-    @test !(true in found[nthreads():end])
+    @test !(false in found[1:threadpoolsize()-1])
+    @test !(true in found[threadpoolsize():end])
 end
 test_thread_too_few_iters()
 
@@ -728,10 +733,10 @@ function _atthreads_with_error(a, err)
     end
     a
 end
-@test_throws CompositeException _atthreads_with_error(zeros(nthreads()), true)
-let a = zeros(nthreads())
+@test_throws CompositeException _atthreads_with_error(zeros(threadpoolsize()), true)
+let a = zeros(threadpoolsize())
     _atthreads_with_error(a, false)
-    @test a == [1:nthreads();]
+    @test a == [1:threadpoolsize();]
 end
 
 # static schedule
@@ -742,11 +747,11 @@ function _atthreads_static_schedule(n)
     end
     return ids
 end
-@test _atthreads_static_schedule(nthreads()) == 1:nthreads()
+@test _atthreads_static_schedule(threadpoolsize()) == 1:threadpoolsize()
 @test _atthreads_static_schedule(1) == [1;]
 @test_throws(
     "`@threads :static` cannot be used concurrently or nested",
-    @threads(for i = 1:1; _atthreads_static_schedule(nthreads()); end),
+    @threads(for i = 1:1; _atthreads_static_schedule(threadpoolsize()); end),
 )
 
 # dynamic schedule
@@ -759,35 +764,35 @@ function _atthreads_dynamic_schedule(n)
     end
     return inc[], flags
 end
-@test _atthreads_dynamic_schedule(nthreads()) == (nthreads(), ones(nthreads()))
+@test _atthreads_dynamic_schedule(threadpoolsize()) == (threadpoolsize(), ones(threadpoolsize()))
 @test _atthreads_dynamic_schedule(1) == (1, ones(1))
 @test _atthreads_dynamic_schedule(10) == (10, ones(10))
-@test _atthreads_dynamic_schedule(nthreads() * 2) == (nthreads() * 2, ones(nthreads() * 2))
+@test _atthreads_dynamic_schedule(threadpoolsize() * 2) == (threadpoolsize() * 2, ones(threadpoolsize() * 2))
 
 # nested dynamic schedule
 function _atthreads_dynamic_dynamic_schedule()
     inc = Threads.Atomic{Int}(0)
-    Threads.@threads :dynamic for _ = 1:nthreads()
-        Threads.@threads :dynamic for _ = 1:nthreads()
+    Threads.@threads :dynamic for _ = 1:threadpoolsize()
+        Threads.@threads :dynamic for _ = 1:threadpoolsize()
             Threads.atomic_add!(inc, 1)
         end
     end
     return inc[]
 end
-@test _atthreads_dynamic_dynamic_schedule() == nthreads() * nthreads()
+@test _atthreads_dynamic_dynamic_schedule() == threadpoolsize() * threadpoolsize()
 
 function _atthreads_static_dynamic_schedule()
-    ids = zeros(Int, nthreads())
+    ids = zeros(Int, threadpoolsize())
     inc = Threads.Atomic{Int}(0)
-    Threads.@threads :static for i = 1:nthreads()
+    Threads.@threads :static for i = 1:threadpoolsize()
         ids[i] = Threads.threadid()
-        Threads.@threads :dynamic for _ = 1:nthreads()
+        Threads.@threads :dynamic for _ = 1:threadpoolsize()
             Threads.atomic_add!(inc, 1)
         end
     end
     return ids, inc[]
 end
-@test _atthreads_static_dynamic_schedule() == (1:nthreads(), nthreads() * nthreads())
+@test _atthreads_static_dynamic_schedule() == (1:threadpoolsize(), threadpoolsize() * threadpoolsize())
 
 # errors inside @threads :dynamic
 function _atthreads_dynamic_with_error(a)
@@ -796,7 +801,7 @@ function _atthreads_dynamic_with_error(a)
     end
     a
 end
-@test_throws "user error in the loop body" _atthreads_dynamic_with_error(zeros(nthreads()))
+@test_throws "user error in the loop body" _atthreads_dynamic_with_error(zeros(threadpoolsize()))
 
 try
     @macroexpand @threads(for i = 1:10, j = 1:10; end)
@@ -1025,7 +1030,7 @@ function check_sync_end_race()
                 nnotscheduled += y === :notscheduled
             end
             # Useful for tuning the test:
-            @debug "`check_sync_end_race` done" nthreads() ncompleted nnotscheduled nerror
+            @debug "`check_sync_end_race` done" threadpoolsize() ncompleted nnotscheduled nerror
         finally
             done[] = true
         end
@@ -1039,21 +1044,21 @@ end
 
 # issue #41546, thread-safe package loading
 @testset "package loading" begin
-    ch = Channel{Bool}(nthreads())
+    ch = Channel{Bool}(threadpoolsize())
     barrier = Base.Event()
     old_act_proj = Base.ACTIVE_PROJECT[]
     try
         pushfirst!(LOAD_PATH, "@")
         Base.ACTIVE_PROJECT[] = joinpath(@__DIR__, "TestPkg")
         @sync begin
-            for _ in 1:nthreads()
+            for _ in 1:threadpoolsize()
                 Threads.@spawn begin
                     put!(ch, true)
                     wait(barrier)
                     @eval using TestPkg
                 end
             end
-            for _ in 1:nthreads()
+            for _ in 1:threadpoolsize()
                 take!(ch)
             end
             notify(barrier)
@@ -1064,3 +1069,43 @@ end
         popfirst!(LOAD_PATH)
     end
 end
+
+# issue #49746, thread safety in `atexit(f)`
+@testset "atexit thread safety" begin
+    f = () -> nothing
+    before_len = length(Base.atexit_hooks)
+    @sync begin
+        for _ in 1:1_000_000
+            Threads.@spawn begin
+                atexit(f)
+            end
+        end
+    end
+    @test length(Base.atexit_hooks) == before_len + 1_000_000
+    @test all(hook -> hook === f, Base.atexit_hooks[1 : 1_000_000])
+
+    # cleanup
+    Base.@lock Base._atexit_hooks_lock begin
+        deleteat!(Base.atexit_hooks, 1:1_000_000)
+    end
+end
+
+#Thread safety of threacall
+function threadcall_threads()
+    Threads.@threads for i = 1:8
+        ptr = @threadcall(:jl_malloc, Ptr{Cint}, (Csize_t,), sizeof(Cint))
+        @test ptr != C_NULL
+        unsafe_store!(ptr, 3)
+        @test unsafe_load(ptr) == 3
+        ptr = @threadcall(:jl_realloc, Ptr{Cint}, (Ptr{Cint}, Csize_t,), ptr, 2 * sizeof(Cint))
+        @test ptr != C_NULL
+        unsafe_store!(ptr, 4, 2)
+        @test unsafe_load(ptr, 1) == 3
+        @test unsafe_load(ptr, 2) == 4
+        @threadcall(:jl_free, Cvoid, (Ptr{Cint},), ptr)
+    end
+end
+@testset "threadcall + threads" begin
+    threadcall_threads() #Shouldn't crash!
+end
+end # main testset
diff --git a/test/tuple.jl b/test/tuple.jl
index 055fd47a55cff..b806667fd9d0a 100644
--- a/test/tuple.jl
+++ b/test/tuple.jl
@@ -265,8 +265,10 @@ end
         @test map(foo, (1,2,3,4), (1,2,3,4)) === (2,4,6,8)
         @test map(foo, longtuple, longtuple) === ntuple(i->2i,20)
         @test map(foo, vlongtuple, vlongtuple) === ntuple(i->2i,33)
-        @test_throws BoundsError map(foo, (), (1,))
-        @test_throws BoundsError map(foo, (1,), ())
+        @test map(foo, longtuple, vlongtuple) === ntuple(i->2i,20)
+        @test map(foo, vlongtuple, longtuple) === ntuple(i->2i,20)
+        @test map(foo, (), (1,)) === ()
+        @test map(foo, (1,), ()) === ()
     end
 
     @testset "n arguments" begin
@@ -276,8 +278,11 @@ end
         @test map(foo, (1,2,3,4), (1,2,3,4), (1,2,3,4)) === (3,6,9,12)
         @test map(foo, longtuple, longtuple, longtuple) === ntuple(i->3i,20)
         @test map(foo, vlongtuple, vlongtuple, vlongtuple) === ntuple(i->3i,33)
-        @test_throws BoundsError map(foo, (), (1,), (1,))
-        @test_throws BoundsError map(foo, (1,), (1,), ())
+        @test map(foo, vlongtuple, longtuple, longtuple) === ntuple(i->3i,20)
+        @test map(foo, longtuple, vlongtuple, longtuple) === ntuple(i->3i,20)
+        @test map(foo, longtuple, vlongtuple, vlongtuple) === ntuple(i->3i,20)
+        @test map(foo, (), (1,), (1,)) === ()
+        @test map(foo, (1,), (1,), ()) === ()
     end
 end
 
@@ -617,7 +622,7 @@ end
 @testset "properties" begin
     ttest = (:a, :b, :c)
     @test propertynames(ttest) == (1, 2, 3)
-    @test getproperty(ttest, 2) == :b
+    @test getproperty(ttest, 2) === :b
     @test map(p->getproperty(ttest, p), propertynames(ttest)) == ttest
     @test_throws ErrorException setproperty!(ttest, 1, :d)
 end
@@ -644,7 +649,7 @@ end
     @test @inferred(f()) == (9, 2:2, 3:3)
 end
 
-@testset "inferrable range indexing with constant values" begin
+@testset "inferable range indexing with constant values" begin
     whole(t) = t[1:end]
     tail(t) = t[2:end]
     ttail(t) = t[3:end]
@@ -754,3 +759,51 @@ g42457(a, b) = Base.isequal(a, b) ? 1 : 2.0
 @test only(Base.return_types(g42457, (NTuple{3, Int}, Tuple))) === Union{Float64, Int}
 @test only(Base.return_types(g42457, (NTuple{3, Int}, NTuple))) === Union{Float64, Int}
 @test only(Base.return_types(g42457, (NTuple{3, Int}, NTuple{4}))) === Float64
+
+# issue #46049: setindex(::Tuple) regression
+@inferred Base.setindex((1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16), 42, 1)
+
+# issue #50562
+f50562(r) = in(:i_backward, r[])
+r50562 = Ref((:b_back, :foofakgka, :i_backw))
+f50562(r50562)
+@test @allocated(f50562(r50562)) == 0
+
+# issue #47326
+function fun1_47326(args...)
+    head..., tail = args
+    head
+end
+function fun2_47326(args...)
+    head, tail... = args
+    tail
+end
+@test @inferred(fun1_47326(1,2,3)) === (1, 2)
+@test @inferred(fun2_47326(1,2,3)) === (2, 3)
+
+f47326(x::Union{Tuple, NamedTuple}) = Base.split_rest(x, 1)
+tup = (1, 2, 3)
+namedtup = (;a=1, b=2, c=3)
+@test only(Base.return_types(f47326, (typeof(tup),))) == Tuple{Tuple{Int, Int}, Tuple{Int}}
+@test only(Base.return_types(f47326, (typeof(namedtup),))) ==
+    Tuple{
+        NamedTuple{(:a, :b), Tuple{Int, Int}},
+        NamedTuple{(:c,), Tuple{Int}},
+    }
+
+# Make sure that tuple iteration is foldable
+@test Core.Compiler.is_foldable(Base.infer_effects(iterate, Tuple{NTuple{4, Float64}, Int}))
+@test Core.Compiler.is_foldable(Base.infer_effects(eltype, Tuple{Tuple}))
+
+# some basic equivalence handling tests for Union{} appearing in Tuple Vararg parameters
+@test Tuple{} <: Tuple{Vararg{Union{}}}
+@test Tuple{Int} <: Tuple{Int, Vararg{Union{}}}
+@test_throws ErrorException("Tuple field type cannot be Union{}") Tuple{Int, Vararg{Union{},1}}
+@test_throws ErrorException("Tuple field type cannot be Union{}") Tuple{Vararg{Union{},1}}
+@test Tuple{} <: Tuple{Vararg{Union{},N}} where N
+@test !(Tuple{} >: Tuple{Vararg{Union{},N}} where N)
+
+@test Val{Tuple{T,T,T} where T} === Val{Tuple{Vararg{T,3}} where T}
+@test Val{Tuple{Vararg{T,4}} where T} === Val{Tuple{T,T,T,T} where T}
+@test Val{Tuple{Int64, Vararg{Int32,N}} where N} === Val{Tuple{Int64, Vararg{Int32}}}
+@test Val{Tuple{Int32, Vararg{Int64}}} === Val{Tuple{Int32, Vararg{Int64,N}} where N}
diff --git a/test/util/segfault.jl b/test/util/segfault.jl
deleted file mode 100644
index fef390870776f..0000000000000
--- a/test/util/segfault.jl
+++ /dev/null
@@ -1,3 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-unsafe_load(convert(Ptr{UInt8},C_NULL))
diff --git a/test/util/throw_error_exception.jl b/test/util/throw_error_exception.jl
deleted file mode 100644
index d0acea8f647f0..0000000000000
--- a/test/util/throw_error_exception.jl
+++ /dev/null
@@ -1,3 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-error("This purposefully dies")
diff --git a/test/version.jl b/test/version.jl
index 3723bb0f788e2..242b32c47cbdc 100644
--- a/test/version.jl
+++ b/test/version.jl
@@ -219,11 +219,14 @@ for major=0:3, minor=0:3, patch=0:3
     end
 end
 
-# banner
-import Base.banner
-io = IOBuffer()
-@test banner(io) === nothing
-@test length(String(take!(io))) > 50
+# VersionNumber has the promised fields
+let v = v"4.2.1-1.x+a.9"
+    @test v.major isa Integer
+    @test v.minor isa Integer
+    @test v.patch isa Integer
+    @test v.prerelease isa Tuple{Vararg{Union{Integer, AbstractString}}}
+    @test v.build isa Tuple{Vararg{Union{Integer, AbstractString}}}
+end
 
 # julia_version.h version test
 @test VERSION.major == ccall(:jl_ver_major, Cint, ())
diff --git a/test/worlds.jl b/test/worlds.jl
index 93445e07699c0..8e820bdab88df 100644
--- a/test/worlds.jl
+++ b/test/worlds.jl
@@ -226,30 +226,17 @@ g38435(x) = f38435(x, x)
 f38435(::Int, ::Int) = 3.0
 @test g38435(1) === 3.0
 
+# Invalidation
+# ============
 
-## Invalidation tests
-
-function instance(f, types)
+function method_instance(f, types=Base.default_tt(f))
     m = which(f, types)
     inst = nothing
-    tt = Tuple{typeof(f), types...}
-    specs = m.specializations
-    if isa(specs, Nothing)
-    elseif isa(specs, Core.SimpleVector)
-        for i = 1:length(specs)
-            mi = specs[i]
-            if mi isa Core.MethodInstance
-                if mi.specTypes <: tt && tt <: mi.specTypes
-                    inst = mi
-                    break
-                end
-            end
-        end
-    else
-        Base.visit(specs) do mi
-            if mi.specTypes === tt
-                inst = mi
-            end
+    tt = Base.signature_type(f, types)
+    for mi in Base.specializations(m)
+        if mi.specTypes <: tt && tt <: mi.specTypes
+            inst = mi
+            break
         end
     end
     return inst
@@ -290,30 +277,30 @@ f35855(::Float64) = 2
 applyf35855([1])
 applyf35855([1.0])
 applyf35855(Any[1])
-wint   = worlds(instance(applyf35855, (Vector{Int},)))
-wfloat = worlds(instance(applyf35855, (Vector{Float64},)))
-wany2  = worlds(instance(applyf35855, (Vector{Any},)))
+wint   = worlds(method_instance(applyf35855, (Vector{Int},)))
+wfloat = worlds(method_instance(applyf35855, (Vector{Float64},)))
+wany2  = worlds(method_instance(applyf35855, (Vector{Any},)))
 src2 = code_typed(applyf35855, (Vector{Any},))[1]
 f35855(::String) = 3
 applyf35855(Any[1])
-@test worlds(instance(applyf35855, (Vector{Int},))) == wint
-@test worlds(instance(applyf35855, (Vector{Float64},))) == wfloat
-wany3 = worlds(instance(applyf35855, (Vector{Any},)))
+@test worlds(method_instance(applyf35855, (Vector{Int},))) == wint
+@test worlds(method_instance(applyf35855, (Vector{Float64},))) == wfloat
+wany3 = worlds(method_instance(applyf35855, (Vector{Any},)))
 src3 = code_typed(applyf35855, (Vector{Any},))[1]
 @test !(wany3 == wany2) || equal(src3, src2) # code doesn't change unless you invalidate
 f35855(::AbstractVector) = 4
 applyf35855(Any[1])
-wany4 = worlds(instance(applyf35855, (Vector{Any},)))
+wany4 = worlds(method_instance(applyf35855, (Vector{Any},)))
 src4 = code_typed(applyf35855, (Vector{Any},))[1]
 @test !(wany4 == wany3) || equal(src4, src3) # code doesn't change unless you invalidate
 f35855(::Dict) = 5
 applyf35855(Any[1])
-wany5 = worlds(instance(applyf35855, (Vector{Any},)))
+wany5 = worlds(method_instance(applyf35855, (Vector{Any},)))
 src5 = code_typed(applyf35855, (Vector{Any},))[1]
 @test (wany5 == wany4) == equal(src5, src4)
 f35855(::Set) = 6    # with current settings, this shouldn't invalidate
 applyf35855(Any[1])
-wany6 = worlds(instance(applyf35855, (Vector{Any},)))
+wany6 = worlds(method_instance(applyf35855, (Vector{Any},)))
 src6 = code_typed(applyf35855, (Vector{Any},))[1]
 @test wany6 == wany5
 @test equal(src6, src5)
@@ -322,11 +309,11 @@ applyf35855_2(c) = f35855_2(c[1])
 f35855_2(::Int) = 1
 f35855_2(::Float64) = 2
 applyf35855_2(Any[1])
-wany3 = worlds(instance(applyf35855_2, (Vector{Any},)))
+wany3 = worlds(method_instance(applyf35855_2, (Vector{Any},)))
 src3 = code_typed(applyf35855_2, (Vector{Any},))[1]
 f35855_2(::AbstractVector) = 4
 applyf35855_2(Any[1])
-wany4 = worlds(instance(applyf35855_2, (Vector{Any},)))
+wany4 = worlds(method_instance(applyf35855_2, (Vector{Any},)))
 src4 = code_typed(applyf35855_2, (Vector{Any},))[1]
 @test !(wany4 == wany3) || equal(src4, src3) # code doesn't change unless you invalidate
 
@@ -343,25 +330,60 @@ end
 (::Type{X})(x::Real) where {T, X<:FixedPoint35855{T}} = X(round(T, typemax(T)*x), 0)
 @test worlds(mi) == w
 
-mi = instance(convert, (Type{Nothing}, String))
+mi = method_instance(convert, (Type{Nothing}, String))
 w = worlds(mi)
 abstract type Colorant35855 end
 Base.convert(::Type{C}, c) where {C<:Colorant35855} = false
 @test worlds(mi) == w
 
-# NamedTuple and extensions of eltype
+## NamedTuple and extensions of eltype
 outer(anyc) = inner(anyc[])
 inner(s::Union{Vector,Dict}; kw=false) = inneri(s, kwi=maximum(s), kwb=kw)
 inneri(s, args...; kwargs...) = inneri(IOBuffer(), s, args...; kwargs...)
 inneri(io::IO, s::Union{Vector,Dict}; kwi=0, kwb=false) = (print(io, first(s), " "^kwi, kwb); String(take!(io)))
 @test outer(Ref{Any}([1,2,3])) == "1   false"
-mi = instance(Core.kwfunc(inneri), (NamedTuple{(:kwi,:kwb),TT} where TT<:Tuple{Any,Bool}, typeof(inneri), Vector{T} where T))
+mi = method_instance(Core.kwcall, (NamedTuple{(:kwi,:kwb),TT} where TT<:Tuple{Any,Bool}, typeof(inneri), Vector{T} where T))
 w = worlds(mi)
 abstract type Container{T} end
 Base.eltype(::Type{C}) where {T,C<:Container{T}} = T
 @test worlds(mi) == w
 
+## invoke call
+
+_invoke46741(a::Int) = a > 0 ? :int : println(a)
+_invoke46741(a::Integer) = a > 0 ? :integer : println(a)
+invoke46741(a) = @invoke _invoke46741(a::Integer)
+@test invoke46741(42) === :integer
+invoke46741_world = worlds(method_instance(invoke46741, (Int,)))
+_invoke46741(a::Int) = a > 0 ? :int2 : println(a)
+@test invoke46741(42) === :integer
+@test worlds(method_instance(invoke46741, (Int,))) == invoke46741_world
+_invoke46741(a::UInt) = a > 0 ? :uint2 : println(a)
+@test invoke46741(42) === :integer
+@test worlds(method_instance(invoke46741, (Int,))) == invoke46741_world
+_invoke46741(a::Integer) = a > 0 ? :integer2 : println(a)
+@test invoke46741(42) === :integer2
+@test worlds(method_instance(invoke46741, (Int,))) ≠ invoke46741_world
+
+# const-prop'ed call
+_invoke46741(a::Int) = a > 0 ? :int : println(a)
+_invoke46741(a::Integer) = a > 0 ? :integer : println(a)
+invoke46741() = @invoke _invoke46741(42::Integer)
+@test invoke46741() === :integer
+invoke46741_world = worlds(method_instance(invoke46741, ()))
+_invoke46741(a::Int) = a > 0 ? :int2 : println(a)
+@test invoke46741() === :integer
+@test worlds(method_instance(invoke46741, ())) == invoke46741_world
+_invoke46741(a::UInt) = a > 0 ? :uint2 : println(a)
+@test invoke46741() === :integer
+@test worlds(method_instance(invoke46741, ())) == invoke46741_world
+_invoke46741(a::Integer) = a > 0 ? :integer2 : println(a)
+@test invoke46741() === :integer2
+@test worlds(method_instance(invoke46741, ())) ≠ invoke46741_world
+
 # invoke_in_world
+# ===============
+
 f_inworld(x) = "world one; x=$x"
 g_inworld(x; y) = "world one; x=$x, y=$y"
 wc_aiw1 = get_world_counter()
@@ -373,3 +395,64 @@ wc_aiw2 = get_world_counter()
 @test Base.invoke_in_world(wc_aiw2, f_inworld, 2) == "world two; x=2"
 @test Base.invoke_in_world(wc_aiw1, g_inworld, 2, y=3) == "world one; x=2, y=3"
 @test Base.invoke_in_world(wc_aiw2, g_inworld, 2, y=3) == "world two; x=2, y=3"
+
+# logging
+mc48954(x, y) = false
+mc48954(x::Int, y::Int) = x == y
+mc48954(x::Symbol, y::Symbol) = x == y
+function mcc48954(container, y)
+    x = container[1]
+    return mc48954(x, y)
+end
+
+mcc48954(Any[1], 1)
+mc48954i = method_instance(mc48954, (Any, Int))
+mcc48954i = method_instance(mcc48954, (Vector{Any}, Int))
+list48954 = ccall(:jl_debug_method_invalidation, Any, (Cint,), 1)
+mc48954(x::AbstractFloat, y::Int) = x == y
+ccall(:jl_debug_method_invalidation, Any, (Cint,), 0)
+@test list48954 == [
+    mcc48954i,
+    1,
+    mc48954i,
+    "jl_method_table_insert",
+    which(mc48954, (AbstractFloat, Int)),
+    "jl_method_table_insert"
+]
+
+# issue #50091 -- missing invoke edge affecting nospecialized dispatch
+module ExceptionUnwrapping
+@nospecialize
+unwrap_exception(@nospecialize(e)) = e
+unwrap_exception(e::Base.TaskFailedException) = e.task.exception
+@noinline function _summarize_task_exceptions(io::IO, exc, prefix = nothing)
+    _summarize_exception((;prefix,), io, exc)
+    nothing
+end
+@noinline function _summarize_exception(kws, io::IO, e::TaskFailedException)
+    _summarize_task_exceptions(io, e.task, kws.prefix)
+end
+# This is the overload that prints the actual exception that occurred.
+result = Bool[]
+@noinline function _summarize_exception(kws, io::IO, @nospecialize(exc))
+    global result
+    push!(result, unwrap_exception(exc) === exc)
+    if unwrap_exception(exc) !== exc # something uninferrable
+        return _summarize_exception(kws, io, unwrap_exception(exc))
+    end
+end
+struct X; x; end
+end
+let e = ExceptionUnwrapping.X(nothing)
+    @test ExceptionUnwrapping.unwrap_exception(e) === e
+    ExceptionUnwrapping._summarize_task_exceptions(devnull, e)
+    @test ExceptionUnwrapping.result == [true]
+    empty!(ExceptionUnwrapping.result)
+end
+ExceptionUnwrapping.unwrap_exception(e::ExceptionUnwrapping.X) = e.x
+let e = ExceptionUnwrapping.X(nothing)
+    @test !(ExceptionUnwrapping.unwrap_exception(e) === e)
+    ExceptionUnwrapping._summarize_task_exceptions(devnull, e)
+    @test ExceptionUnwrapping.result == [false, true]
+    empty!(ExceptionUnwrapping.result)
+end
diff --git a/typos.toml b/typos.toml
new file mode 100644
index 0000000000000..b9a9311946bc4
--- /dev/null
+++ b/typos.toml
@@ -0,0 +1,2 @@
+[default]
+extend-ignore-words-re = ["^[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?$"]